VirtualBox

source: vbox/trunk/src/VBox/Runtime/common/string/base64.cpp@ 84293

Last change on this file since 84293 was 84293, checked in by vboxsync, 5 years ago

IPRT/base64: Put the UTF-16 code in separate file. Implemented decoding of UTF-16 strings. bugref:9224

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 16.8 KB
Line 
1/* $Id: base64.cpp 84293 2020-05-13 16:23:25Z vboxsync $ */
2/** @file
3 * IPRT - Base64, MIME content transfer encoding.
4 *
5 * @note The base64-utf16.cpp file must be diffable with this one.
6 * Fixed typically applies to both files.
7 */
8
9/*
10 * Copyright (C) 2009-2020 Oracle Corporation
11 *
12 * This file is part of VirtualBox Open Source Edition (OSE), as
13 * available from http://www.virtualbox.org. This file is free software;
14 * you can redistribute it and/or modify it under the terms of the GNU
15 * General Public License (GPL) as published by the Free Software
16 * Foundation, in version 2 as it comes in the "COPYING" file of the
17 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
18 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
19 *
20 * The contents of this file may alternatively be used under the terms
21 * of the Common Development and Distribution License Version 1.0
22 * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
23 * VirtualBox OSE distribution, in which case the provisions of the
24 * CDDL are applicable instead of those of the GPL.
25 *
26 * You may elect to license modified versions of this file under the
27 * terms and conditions of either the GPL or the CDDL or both.
28 */
29
30
31/*********************************************************************************************************************************
32* Header Files *
33*********************************************************************************************************************************/
34#include <iprt/base64.h>
35#include "internal/iprt.h"
36
37#include <iprt/assert.h>
38#include <iprt/err.h>
39#include <iprt/ctype.h>
40#include <iprt/string.h>
41#ifdef RT_STRICT
42# include <iprt/asm.h>
43#endif
44
45#include "base64.h"
46
47
48/*********************************************************************************************************************************
49* Global Variables *
50*********************************************************************************************************************************/
51/** Base64 character to value. (RFC 2045)
52 * ASSUMES ASCII / UTF-8. */
53DECL_HIDDEN_CONST(const uint8_t) g_au8RTBase64CharToVal[256] =
54{
55 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xff, 0xff, /* 0x00..0x0f */
56 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x10..0x1f */
57 0xc0, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 62, 0xff, 0xff, 0xff, 63, /* 0x20..0x2f */
58 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 0xff, 0xff, 0xff, 0xe0, 0xff, 0xff, /* 0x30..0x3f */
59 0xff, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, /* 0x40..0x4f */
60 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x50..0x5f */
61 0xff, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, /* 0x60..0x6f */
62 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x70..0x7f */
63 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x80..0x8f */
64 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x90..0x9f */
65 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0xa0..0xaf */
66 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0xb0..0xbf */
67 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0xc0..0xcf */
68 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0xd0..0xdf */
69 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0xe0..0xef */
70 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff /* 0xf0..0xff */
71};
72
73/** Value to Base64 character. (RFC 2045) */
74DECL_HIDDEN_CONST(const char) g_szRTBase64ValToChar[64+1] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
75
76/** The end-of-line lengths (indexed by style flag value). */
77DECL_HIDDEN_CONST(const size_t) g_acchRTBase64EolStyles[RTBASE64_FLAGS_EOL_STYLE_MASK + 1] =
78{
79 /*[RTBASE64_FLAGS_EOL_NATIVE ]:*/ RTBASE64_EOL_SIZE,
80 /*[RTBASE64_FLAGS_NO_LINE_BREAKS]:*/ 0,
81 /*[RTBASE64_FLAGS_EOL_LF ]:*/ 1,
82 /*[RTBASE64_FLAGS_EOL_CRLF ]:*/ 2
83};
84
85/** The end-of-line characters (zero, one or two). */
86DECL_HIDDEN_CONST(const char) g_aachRTBase64EolStyles[RTBASE64_FLAGS_EOL_STYLE_MASK + 1][2] =
87{
88 /*[RTBASE64_FLAGS_EOL_NATIVE ]:*/ { RTBASE64_EOL_SIZE == 1 ? '\n' : '\r', RTBASE64_EOL_SIZE == 1 ? '\0' : '\n', },
89 /*[RTBASE64_FLAGS_NO_LINE_BREAKS]:*/ { '\0', '\0' },
90 /*[RTBASE64_FLAGS_EOL_LF ]:*/ { '\n', '\0' },
91 /*[RTBASE64_FLAGS_EOL_CRLF ]:*/ { '\r', '\n' },
92};
93
94
95
96#ifdef RT_STRICT
97/**
98 * Perform table sanity checks on the first call.
99 */
100DECLHIDDEN(void) rtBase64Sanity(void)
101{
102 static bool s_fSane = false;
103 if (RT_UNLIKELY(!s_fSane))
104 {
105 for (unsigned i = 0; i < 64; i++)
106 {
107 unsigned ch = g_szRTBase64ValToChar[i];
108 Assert(ch);
109 Assert(g_au8RTBase64CharToVal[ch] == i);
110 }
111
112 for (unsigned i = 0; i < 256; i++)
113 {
114 uint8_t u8 = g_au8RTBase64CharToVal[i];
115 Assert( ( u8 == BASE64_INVALID
116 && !RT_C_IS_ALNUM(i)
117 && !RT_C_IS_SPACE(i))
118 || ( u8 == BASE64_PAD
119 && i == '=')
120 || ( u8 == BASE64_SPACE
121 && RT_C_IS_SPACE(i))
122 || ( u8 < 64
123 && (unsigned)g_szRTBase64ValToChar[u8] == i));
124 }
125 ASMAtomicWriteBool(&s_fSane, true);
126 }
127}
128#endif /* RT_STRICT */
129
130
131/*
132 * Mostly the same as RTBase64DecodedUtf16SizeEx, except for the simpler
133 * character type. Fixes must be applied to both copies of the code.
134 */
135RTDECL(ssize_t) RTBase64DecodedSizeEx(const char *pszString, size_t cchStringMax, char **ppszEnd)
136{
137#ifdef RT_STRICT
138 rtBase64Sanity();
139#endif
140
141 /*
142 * Walk the string until a non-encoded or non-space character is encountered.
143 */
144 uint32_t c6Bits = 0;
145 uint8_t u8 = BASE64_INVALID;
146 unsigned ch = 0;
147 AssertCompile(sizeof(char) == sizeof(uint8_t));
148
149 while (cchStringMax > 0 && (ch = *pszString))
150 {
151 u8 = g_au8RTBase64CharToVal[ch];
152 if (u8 < 64)
153 c6Bits++;
154 else if (RT_UNLIKELY(u8 != BASE64_SPACE))
155 break;
156
157 /* advance */
158 pszString++;
159 cchStringMax--;
160 }
161
162 /*
163 * Padding can only be found at the end and there is
164 * only 1 or 2 padding chars. Deal with it first.
165 */
166 unsigned cbPad = 0;
167 if (u8 == BASE64_PAD)
168 {
169 cbPad = 1;
170 c6Bits++;
171 pszString++;
172 cchStringMax--;
173 while (cchStringMax > 0 && (ch = *pszString))
174 {
175 u8 = g_au8RTBase64CharToVal[ch];
176 if (u8 != BASE64_SPACE)
177 {
178 if (u8 != BASE64_PAD)
179 break;
180 c6Bits++;
181 cbPad++;
182 }
183 pszString++;
184 cchStringMax--;
185 }
186 if (cbPad >= 3)
187 return -1;
188 }
189
190 /*
191 * Invalid char and no where to indicate where the
192 * Base64 text ends? Return failure.
193 */
194 if ( u8 == BASE64_INVALID
195 && !ppszEnd
196 && ch)
197 return -1;
198
199 /*
200 * Recalc 6-bit to 8-bit and adjust for padding.
201 */
202 if (ppszEnd)
203 *ppszEnd = (char *)pszString;
204 return rtBase64DecodedSizeRecalc(c6Bits, cbPad);
205}
206RT_EXPORT_SYMBOL(RTBase64DecodedSizeEx);
207
208
209RTDECL(ssize_t) RTBase64DecodedSize(const char *pszString, char **ppszEnd)
210{
211 return RTBase64DecodedSizeEx(pszString, RTSTR_MAX, ppszEnd);
212}
213RT_EXPORT_SYMBOL(RTBase64DecodedSize);
214
215
216RTDECL(int) RTBase64DecodeEx(const char *pszString, size_t cchStringMax, void *pvData, size_t cbData,
217 size_t *pcbActual, char **ppszEnd)
218{
219#ifdef RT_STRICT
220 rtBase64Sanity();
221#endif
222
223 /*
224 * Process input in groups of 4 input / 3 output chars.
225 */
226 uint8_t u8Trio[3] = { 0, 0, 0 }; /* shuts up gcc */
227 uint8_t *pbData = (uint8_t *)pvData;
228 unsigned ch;
229 uint8_t u8;
230 unsigned c6Bits = 0;
231 AssertCompile(sizeof(char) == sizeof(uint8_t));
232
233 for (;;)
234 {
235 /* The first 6-bit group. */
236 while ((u8 = g_au8RTBase64CharToVal[ch = cchStringMax > 0 ? (uint8_t)*pszString : 0]) == BASE64_SPACE)
237 pszString++, cchStringMax--;
238 if (u8 >= 64)
239 {
240 c6Bits = 0;
241 break;
242 }
243 u8Trio[0] = u8 << 2;
244 pszString++;
245 cchStringMax--;
246
247 /* The second 6-bit group. */
248 while ((u8 = g_au8RTBase64CharToVal[ch = cchStringMax > 0 ? (uint8_t)*pszString : 0]) == BASE64_SPACE)
249 pszString++, cchStringMax--;
250 if (u8 >= 64)
251 {
252 c6Bits = 1;
253 break;
254 }
255 u8Trio[0] |= u8 >> 4;
256 u8Trio[1] = u8 << 4;
257 pszString++;
258 cchStringMax--;
259
260 /* The third 6-bit group. */
261 u8 = BASE64_INVALID;
262 while ((u8 = g_au8RTBase64CharToVal[ch = cchStringMax > 0 ? (uint8_t)*pszString : 0]) == BASE64_SPACE)
263 pszString++, cchStringMax--;
264 if (u8 >= 64)
265 {
266 c6Bits = 2;
267 break;
268 }
269 u8Trio[1] |= u8 >> 2;
270 u8Trio[2] = u8 << 6;
271 pszString++;
272 cchStringMax--;
273
274 /* The fourth 6-bit group. */
275 u8 = BASE64_INVALID;
276 while ((u8 = g_au8RTBase64CharToVal[ch = cchStringMax > 0 ? (uint8_t)*pszString : 0]) == BASE64_SPACE)
277 pszString++, cchStringMax--;
278 if (u8 >= 64)
279 {
280 c6Bits = 3;
281 break;
282 }
283 u8Trio[2] |= u8;
284 pszString++;
285 cchStringMax--;
286
287 /* flush the trio */
288 if (cbData < 3)
289 return VERR_BUFFER_OVERFLOW;
290 cbData -= 3;
291 pbData[0] = u8Trio[0];
292 pbData[1] = u8Trio[1];
293 pbData[2] = u8Trio[2];
294 pbData += 3;
295 }
296
297 /*
298 * Padding can only be found at the end and there is
299 * only 1 or 2 padding chars. Deal with it first.
300 */
301 unsigned cbPad = 0;
302 if (u8 == BASE64_PAD)
303 {
304 cbPad = 1;
305 pszString++;
306 cchStringMax--;
307 while (cchStringMax > 0 && (ch = (uint8_t)*pszString))
308 {
309 u8 = g_au8RTBase64CharToVal[ch];
310 if (u8 != BASE64_SPACE)
311 {
312 if (u8 != BASE64_PAD)
313 break;
314 cbPad++;
315 }
316 pszString++;
317 cchStringMax--;
318 }
319 if (cbPad >= 3)
320 return VERR_INVALID_BASE64_ENCODING;
321 }
322
323 /*
324 * Invalid char and no where to indicate where the
325 * Base64 text ends? Return failure.
326 */
327 if ( u8 == BASE64_INVALID
328 && !ppszEnd
329 && ch != '\0')
330 return VERR_INVALID_BASE64_ENCODING;
331
332 /*
333 * Check padding vs. pending sextets, if anything left to do finish it off.
334 */
335 if (c6Bits || cbPad)
336 {
337 if (c6Bits + cbPad != 4)
338 return VERR_INVALID_BASE64_ENCODING;
339
340 switch (c6Bits)
341 {
342 case 1:
343 u8Trio[1] = u8Trio[2] = 0;
344 break;
345 case 2:
346 u8Trio[2] = 0;
347 break;
348 case 3:
349 default:
350 break;
351 }
352 switch (3 - cbPad)
353 {
354 case 1:
355 if (cbData < 1)
356 return VERR_BUFFER_OVERFLOW;
357 cbData--;
358 pbData[0] = u8Trio[0];
359 pbData++;
360 break;
361
362 case 2:
363 if (cbData < 2)
364 return VERR_BUFFER_OVERFLOW;
365 cbData -= 2;
366 pbData[0] = u8Trio[0];
367 pbData[1] = u8Trio[1];
368 pbData += 2;
369 break;
370
371 default:
372 break;
373 }
374 }
375
376 /*
377 * Set optional return values and return successfully.
378 */
379 if (ppszEnd)
380 *ppszEnd = (char *)pszString;
381 if (pcbActual)
382 *pcbActual = pbData - (uint8_t *)pvData;
383 return VINF_SUCCESS;
384}
385RT_EXPORT_SYMBOL(RTBase64DecodeEx);
386
387
388RTDECL(int) RTBase64Decode(const char *pszString, void *pvData, size_t cbData, size_t *pcbActual, char **ppszEnd)
389{
390 return RTBase64DecodeEx(pszString, RTSTR_MAX, pvData, cbData, pcbActual, ppszEnd);
391}
392RT_EXPORT_SYMBOL(RTBase64Decode);
393
394
395RTDECL(size_t) RTBase64EncodedLength(size_t cbData)
396{
397 return RTBase64EncodedLengthEx(cbData, 0);
398}
399RT_EXPORT_SYMBOL(RTBase64EncodedLength);
400
401
402RTDECL(size_t) RTBase64EncodedLengthEx(size_t cbData, uint32_t fFlags)
403{
404 size_t const cchEol = g_acchRTBase64EolStyles[fFlags & RTBASE64_FLAGS_EOL_STYLE_MASK];
405
406 if (cbData * 8 / 8 != cbData)
407 {
408 AssertReturn(sizeof(size_t) == sizeof(uint64_t), ~(size_t)0);
409 uint64_t cch = cbData * (uint64_t)8;
410 while (cch % 24)
411 cch += 8;
412 cch /= 6;
413 cch += ((cch - 1) / RTBASE64_LINE_LEN) * cchEol;
414 return cch;
415 }
416
417 size_t cch = cbData * 8;
418 while (cch % 24)
419 cch += 8;
420 cch /= 6;
421 cch += ((cch - 1) / RTBASE64_LINE_LEN) * cchEol;
422 return cch;
423}
424RT_EXPORT_SYMBOL(RTBase64EncodedLengthEx);
425
426
427RTDECL(int) RTBase64Encode(const void *pvData, size_t cbData, char *pszBuf, size_t cbBuf, size_t *pcchActual)
428{
429 return RTBase64EncodeEx(pvData, cbData, 0, pszBuf, cbBuf, pcchActual);
430}
431RT_EXPORT_SYMBOL(RTBase64Encode);
432
433
434/*
435 * Please note that RTBase64EncodeUtf16Ex contains an almost exact copy of
436 * this code, just using different output character type and variable prefixes.
437 * So, all fixes must be applied to both versions of the code.
438 */
439RTDECL(int) RTBase64EncodeEx(const void *pvData, size_t cbData, uint32_t fFlags,
440 char *pszBuf, size_t cbBuf, size_t *pcchActual)
441{
442 /* Expand the EOL style flags: */
443 size_t const cchEol = g_acchRTBase64EolStyles[fFlags & RTBASE64_FLAGS_EOL_STYLE_MASK];
444 char const chEol0 = g_aachRTBase64EolStyles[fFlags & RTBASE64_FLAGS_EOL_STYLE_MASK][0];
445 char const chEol1 = g_aachRTBase64EolStyles[fFlags & RTBASE64_FLAGS_EOL_STYLE_MASK][1];
446 Assert(cchEol == (chEol0 != '\0' ? 1U : 0U) + (chEol1 != '\0' ? 1U : 0U));
447
448 /*
449 * Process whole "trios" of input data.
450 */
451 uint8_t u8A;
452 uint8_t u8B;
453 uint8_t u8C;
454 size_t cbLineFeed = cchEol ? cbBuf - RTBASE64_LINE_LEN : ~(size_t)0;
455 const uint8_t *pbSrc = (const uint8_t *)pvData;
456 char *pchDst = pszBuf;
457 while (cbData >= 3)
458 {
459 if (cbBuf < 4 + 1)
460 return VERR_BUFFER_OVERFLOW;
461
462 /* encode */
463 u8A = pbSrc[0];
464 pchDst[0] = g_szRTBase64ValToChar[u8A >> 2];
465 u8B = pbSrc[1];
466 pchDst[1] = g_szRTBase64ValToChar[((u8A << 4) & 0x3f) | (u8B >> 4)];
467 u8C = pbSrc[2];
468 pchDst[2] = g_szRTBase64ValToChar[((u8B << 2) & 0x3f) | (u8C >> 6)];
469 pchDst[3] = g_szRTBase64ValToChar[u8C & 0x3f];
470
471 /* advance */
472 cbBuf -= 4;
473 pchDst += 4;
474 cbData -= 3;
475 pbSrc += 3;
476
477 /* deal out end-of-line */
478 if (cbBuf == cbLineFeed && cbData && cchEol)
479 {
480 if (cbBuf < cchEol + 1)
481 return VERR_BUFFER_OVERFLOW;
482 cbBuf -= cchEol;
483 *pchDst++ = chEol0;
484 if (chEol1)
485 *pchDst++ = chEol1;
486 cbLineFeed = cbBuf - RTBASE64_LINE_LEN;
487 }
488 }
489
490 /*
491 * Deal with the odd bytes and string termination.
492 */
493 if (cbData)
494 {
495 if (cbBuf < 4 + 1)
496 return VERR_BUFFER_OVERFLOW;
497 switch (cbData)
498 {
499 case 1:
500 u8A = pbSrc[0];
501 pchDst[0] = g_szRTBase64ValToChar[u8A >> 2];
502 pchDst[1] = g_szRTBase64ValToChar[(u8A << 4) & 0x3f];
503 pchDst[2] = '=';
504 pchDst[3] = '=';
505 break;
506 case 2:
507 u8A = pbSrc[0];
508 pchDst[0] = g_szRTBase64ValToChar[u8A >> 2];
509 u8B = pbSrc[1];
510 pchDst[1] = g_szRTBase64ValToChar[((u8A << 4) & 0x3f) | (u8B >> 4)];
511 pchDst[2] = g_szRTBase64ValToChar[(u8B << 2) & 0x3f];
512 pchDst[3] = '=';
513 break;
514 }
515 pchDst += 4;
516 }
517
518 *pchDst = '\0';
519
520 if (pcchActual)
521 *pcchActual = pchDst - pszBuf;
522 return VINF_SUCCESS;
523}
524RT_EXPORT_SYMBOL(RTBase64EncodeEx);
525
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette