VirtualBox

source: vbox/trunk/src/VBox/Runtime/common/string/base64.cpp@ 86103

Last change on this file since 86103 was 84296, checked in by vboxsync, 5 years ago

IPRT/base64: Optimize '\0' handling a little and unify the two versions a little more. Try to address cranky linux build boxes wrt mangling. bugref:9224

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 16.9 KB
Line 
1/* $Id: base64.cpp 84296 2020-05-13 16:46:27Z vboxsync $ */
2/** @file
3 * IPRT - Base64, MIME content transfer encoding.
4 *
5 * @note The base64-utf16.cpp file must be diffable with this one.
6 * Fixed typically applies to both files.
7 */
8
9/*
10 * Copyright (C) 2009-2020 Oracle Corporation
11 *
12 * This file is part of VirtualBox Open Source Edition (OSE), as
13 * available from http://www.virtualbox.org. This file is free software;
14 * you can redistribute it and/or modify it under the terms of the GNU
15 * General Public License (GPL) as published by the Free Software
16 * Foundation, in version 2 as it comes in the "COPYING" file of the
17 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
18 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
19 *
20 * The contents of this file may alternatively be used under the terms
21 * of the Common Development and Distribution License Version 1.0
22 * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
23 * VirtualBox OSE distribution, in which case the provisions of the
24 * CDDL are applicable instead of those of the GPL.
25 *
26 * You may elect to license modified versions of this file under the
27 * terms and conditions of either the GPL or the CDDL or both.
28 */
29
30
31/*********************************************************************************************************************************
32* Header Files *
33*********************************************************************************************************************************/
34#include <iprt/base64.h>
35#include "internal/iprt.h"
36
37#include <iprt/assert.h>
38#include <iprt/err.h>
39#include <iprt/ctype.h>
40#include <iprt/string.h>
41#ifdef RT_STRICT
42# include <iprt/asm.h>
43#endif
44
45#include "base64.h"
46
47
48/*********************************************************************************************************************************
49* Global Variables *
50*********************************************************************************************************************************/
51/** Base64 character to value. (RFC 2045)
52 * ASSUMES ASCII / UTF-8. */
53DECL_HIDDEN_CONST(const uint8_t) g_au8rtBase64CharToVal[256] =
54{
55 0xfe, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xff, 0xff, /* 0x00..0x0f */
56 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x10..0x1f */
57 0xc0, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 62, 0xff, 0xff, 0xff, 63, /* 0x20..0x2f */
58 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 0xff, 0xff, 0xff, 0xe0, 0xff, 0xff, /* 0x30..0x3f */
59 0xff, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, /* 0x40..0x4f */
60 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x50..0x5f */
61 0xff, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, /* 0x60..0x6f */
62 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x70..0x7f */
63 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x80..0x8f */
64 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x90..0x9f */
65 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0xa0..0xaf */
66 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0xb0..0xbf */
67 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0xc0..0xcf */
68 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0xd0..0xdf */
69 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0xe0..0xef */
70 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff /* 0xf0..0xff */
71};
72
73/** Value to Base64 character. (RFC 2045) */
74DECL_HIDDEN_CONST(const char) g_szrtBase64ValToChar[64+1] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
75
76/** The end-of-line lengths (indexed by style flag value). */
77DECL_HIDDEN_CONST(const size_t) g_acchrtBase64EolStyles[RTBASE64_FLAGS_EOL_STYLE_MASK + 1] =
78{
79 /*[RTBASE64_FLAGS_EOL_NATIVE ]:*/ RTBASE64_EOL_SIZE,
80 /*[RTBASE64_FLAGS_NO_LINE_BREAKS]:*/ 0,
81 /*[RTBASE64_FLAGS_EOL_LF ]:*/ 1,
82 /*[RTBASE64_FLAGS_EOL_CRLF ]:*/ 2
83};
84
85/** The end-of-line characters (zero, one or two). */
86DECL_HIDDEN_CONST(const char) g_aachrtBase64EolStyles[RTBASE64_FLAGS_EOL_STYLE_MASK + 1][2] =
87{
88 /*[RTBASE64_FLAGS_EOL_NATIVE ]:*/ { RTBASE64_EOL_SIZE == 1 ? '\n' : '\r', RTBASE64_EOL_SIZE == 1 ? '\0' : '\n', },
89 /*[RTBASE64_FLAGS_NO_LINE_BREAKS]:*/ { '\0', '\0' },
90 /*[RTBASE64_FLAGS_EOL_LF ]:*/ { '\n', '\0' },
91 /*[RTBASE64_FLAGS_EOL_CRLF ]:*/ { '\r', '\n' },
92};
93
94
95
96#ifdef RT_STRICT
97/**
98 * Perform table sanity checks on the first call.
99 */
100DECLHIDDEN(void) rtBase64Sanity(void)
101{
102 static bool s_fSane = false;
103 if (RT_UNLIKELY(!s_fSane))
104 {
105 for (unsigned i = 0; i < 64; i++)
106 {
107 unsigned ch = g_szrtBase64ValToChar[i];
108 Assert(ch);
109 Assert(g_au8rtBase64CharToVal[ch] == i);
110 }
111
112 for (unsigned i = 0; i < 256; i++)
113 {
114 uint8_t u8 = g_au8rtBase64CharToVal[i];
115 Assert( ( u8 == BASE64_INVALID
116 && !RT_C_IS_ALNUM(i)
117 && !RT_C_IS_SPACE(i))
118 || ( u8 == BASE64_PAD
119 && i == '=')
120 || ( u8 == BASE64_SPACE
121 && RT_C_IS_SPACE(i))
122 || ( u8 < 64
123 && (unsigned)g_szrtBase64ValToChar[u8] == i)
124 || ( u8 == BASE64_NULL
125 && i == 0) );
126 }
127 ASMAtomicWriteBool(&s_fSane, true);
128 }
129}
130#endif /* RT_STRICT */
131
132
133
134/** Fetched the next character in the string and translates it. */
135DECL_FORCE_INLINE(uint8_t) rtBase64TranslateNext(const char *pszString, size_t cchStringMax)
136{
137 AssertCompile(sizeof(unsigned char) == sizeof(uint8_t));
138 if (cchStringMax > 0)
139 return g_au8rtBase64CharToVal[(unsigned char)*pszString];
140 return BASE64_NULL;
141}
142
143
144/*
145 * Mostly the same as RTBase64DecodedUtf16SizeEx, except for the simpler
146 * character type. Fixes must be applied to both copies of the code.
147 */
148RTDECL(ssize_t) RTBase64DecodedSizeEx(const char *pszString, size_t cchStringMax, char **ppszEnd)
149{
150#ifdef RT_STRICT
151 rtBase64Sanity();
152#endif
153
154 /*
155 * Walk the string until a non-encoded or non-space character is encountered.
156 */
157 uint32_t c6Bits = 0;
158 uint8_t u8;
159
160 while ((u8 = rtBase64TranslateNext(pszString, cchStringMax)) != BASE64_NULL)
161 {
162 if (u8 < 64)
163 c6Bits++;
164 else if (RT_UNLIKELY(u8 != BASE64_SPACE))
165 break;
166
167 /* advance */
168 pszString++;
169 cchStringMax--;
170 }
171
172 /*
173 * Padding can only be found at the end and there is
174 * only 1 or 2 padding chars. Deal with it first.
175 */
176 unsigned cbPad = 0;
177 if (u8 == BASE64_PAD)
178 {
179 cbPad = 1;
180 c6Bits++;
181 pszString++;
182 cchStringMax--;
183 while ((u8 = rtBase64TranslateNext(pszString, cchStringMax)) != BASE64_NULL)
184 {
185 if (u8 != BASE64_SPACE)
186 {
187 if (u8 != BASE64_PAD)
188 break;
189 c6Bits++;
190 cbPad++;
191 }
192 pszString++;
193 cchStringMax--;
194 }
195 if (cbPad >= 3)
196 return -1;
197 }
198
199 /*
200 * Invalid char and no where to indicate where the
201 * Base64 text ends? Return failure.
202 */
203 if ( u8 == BASE64_INVALID
204 && !ppszEnd)
205 return -1;
206
207 /*
208 * Recalc 6-bit to 8-bit and adjust for padding.
209 */
210 if (ppszEnd)
211 *ppszEnd = (char *)pszString;
212 return rtBase64DecodedSizeRecalc(c6Bits, cbPad);
213}
214RT_EXPORT_SYMBOL(RTBase64DecodedSizeEx);
215
216
217RTDECL(ssize_t) RTBase64DecodedSize(const char *pszString, char **ppszEnd)
218{
219 return RTBase64DecodedSizeEx(pszString, RTSTR_MAX, ppszEnd);
220}
221RT_EXPORT_SYMBOL(RTBase64DecodedSize);
222
223
224RTDECL(int) RTBase64DecodeEx(const char *pszString, size_t cchStringMax, void *pvData, size_t cbData,
225 size_t *pcbActual, char **ppszEnd)
226{
227#ifdef RT_STRICT
228 rtBase64Sanity();
229#endif
230
231 /*
232 * Process input in groups of 4 input / 3 output chars.
233 */
234 uint8_t u8Trio[3] = { 0, 0, 0 }; /* shuts up gcc */
235 uint8_t *pbData = (uint8_t *)pvData;
236 uint8_t u8;
237 unsigned c6Bits = 0;
238
239 for (;;)
240 {
241 /* The first 6-bit group. */
242 while ((u8 = rtBase64TranslateNext(pszString, cchStringMax)) == BASE64_SPACE)
243 pszString++, cchStringMax--;
244 if (u8 >= 64)
245 {
246 c6Bits = 0;
247 break;
248 }
249 u8Trio[0] = u8 << 2;
250 pszString++;
251 cchStringMax--;
252
253 /* The second 6-bit group. */
254 while ((u8 = rtBase64TranslateNext(pszString, cchStringMax)) == BASE64_SPACE)
255 pszString++, cchStringMax--;
256 if (u8 >= 64)
257 {
258 c6Bits = 1;
259 break;
260 }
261 u8Trio[0] |= u8 >> 4;
262 u8Trio[1] = u8 << 4;
263 pszString++;
264 cchStringMax--;
265
266 /* The third 6-bit group. */
267 u8 = BASE64_INVALID;
268 while ((u8 = rtBase64TranslateNext(pszString, cchStringMax)) == BASE64_SPACE)
269 pszString++, cchStringMax--;
270 if (u8 >= 64)
271 {
272 c6Bits = 2;
273 break;
274 }
275 u8Trio[1] |= u8 >> 2;
276 u8Trio[2] = u8 << 6;
277 pszString++;
278 cchStringMax--;
279
280 /* The fourth 6-bit group. */
281 u8 = BASE64_INVALID;
282 while ((u8 = rtBase64TranslateNext(pszString, cchStringMax)) == BASE64_SPACE)
283 pszString++, cchStringMax--;
284 if (u8 >= 64)
285 {
286 c6Bits = 3;
287 break;
288 }
289 u8Trio[2] |= u8;
290 pszString++;
291 cchStringMax--;
292
293 /* flush the trio */
294 if (cbData < 3)
295 return VERR_BUFFER_OVERFLOW;
296 cbData -= 3;
297 pbData[0] = u8Trio[0];
298 pbData[1] = u8Trio[1];
299 pbData[2] = u8Trio[2];
300 pbData += 3;
301 }
302
303 /*
304 * Padding can only be found at the end and there is
305 * only 1 or 2 padding chars. Deal with it first.
306 */
307 unsigned cbPad = 0;
308 if (u8 == BASE64_PAD)
309 {
310 cbPad = 1;
311 pszString++;
312 cchStringMax--;
313 while ((u8 = rtBase64TranslateNext(pszString, cchStringMax)) != BASE64_NULL)
314 {
315 if (u8 != BASE64_SPACE)
316 {
317 if (u8 != BASE64_PAD)
318 break;
319 cbPad++;
320 }
321 pszString++;
322 cchStringMax--;
323 }
324 if (cbPad >= 3)
325 return VERR_INVALID_BASE64_ENCODING;
326 }
327
328 /*
329 * Invalid char and no where to indicate where the
330 * Base64 text ends? Return failure.
331 */
332 if ( u8 == BASE64_INVALID
333 && !ppszEnd)
334 return VERR_INVALID_BASE64_ENCODING;
335
336 /*
337 * Check padding vs. pending sextets, if anything left to do finish it off.
338 */
339 if (c6Bits || cbPad)
340 {
341 if (c6Bits + cbPad != 4)
342 return VERR_INVALID_BASE64_ENCODING;
343
344 switch (c6Bits)
345 {
346 case 1:
347 u8Trio[1] = u8Trio[2] = 0;
348 break;
349 case 2:
350 u8Trio[2] = 0;
351 break;
352 case 3:
353 default:
354 break;
355 }
356 switch (3 - cbPad)
357 {
358 case 1:
359 if (cbData < 1)
360 return VERR_BUFFER_OVERFLOW;
361 cbData--;
362 pbData[0] = u8Trio[0];
363 pbData++;
364 break;
365
366 case 2:
367 if (cbData < 2)
368 return VERR_BUFFER_OVERFLOW;
369 cbData -= 2;
370 pbData[0] = u8Trio[0];
371 pbData[1] = u8Trio[1];
372 pbData += 2;
373 break;
374
375 default:
376 break;
377 }
378 }
379
380 /*
381 * Set optional return values and return successfully.
382 */
383 if (ppszEnd)
384 *ppszEnd = (char *)pszString;
385 if (pcbActual)
386 *pcbActual = pbData - (uint8_t *)pvData;
387 return VINF_SUCCESS;
388}
389RT_EXPORT_SYMBOL(RTBase64DecodeEx);
390
391
392RTDECL(int) RTBase64Decode(const char *pszString, void *pvData, size_t cbData, size_t *pcbActual, char **ppszEnd)
393{
394 return RTBase64DecodeEx(pszString, RTSTR_MAX, pvData, cbData, pcbActual, ppszEnd);
395}
396RT_EXPORT_SYMBOL(RTBase64Decode);
397
398
399RTDECL(size_t) RTBase64EncodedLength(size_t cbData)
400{
401 return RTBase64EncodedLengthEx(cbData, 0);
402}
403RT_EXPORT_SYMBOL(RTBase64EncodedLength);
404
405
406RTDECL(size_t) RTBase64EncodedLengthEx(size_t cbData, uint32_t fFlags)
407{
408 size_t const cchEol = g_acchrtBase64EolStyles[fFlags & RTBASE64_FLAGS_EOL_STYLE_MASK];
409
410 if (cbData * 8 / 8 != cbData)
411 {
412 AssertReturn(sizeof(size_t) == sizeof(uint64_t), ~(size_t)0);
413 uint64_t cch = cbData * (uint64_t)8;
414 while (cch % 24)
415 cch += 8;
416 cch /= 6;
417 cch += ((cch - 1) / RTBASE64_LINE_LEN) * cchEol;
418 return cch;
419 }
420
421 size_t cch = cbData * 8;
422 while (cch % 24)
423 cch += 8;
424 cch /= 6;
425 cch += ((cch - 1) / RTBASE64_LINE_LEN) * cchEol;
426 return cch;
427}
428RT_EXPORT_SYMBOL(RTBase64EncodedLengthEx);
429
430
431RTDECL(int) RTBase64Encode(const void *pvData, size_t cbData, char *pszBuf, size_t cbBuf, size_t *pcchActual)
432{
433 return RTBase64EncodeEx(pvData, cbData, 0, pszBuf, cbBuf, pcchActual);
434}
435RT_EXPORT_SYMBOL(RTBase64Encode);
436
437
438/*
439 * Please note that RTBase64EncodeUtf16Ex contains an almost exact copy of
440 * this code, just using different output character type and variable prefixes.
441 * So, all fixes must be applied to both versions of the code.
442 */
443RTDECL(int) RTBase64EncodeEx(const void *pvData, size_t cbData, uint32_t fFlags,
444 char *pszBuf, size_t cbBuf, size_t *pcchActual)
445{
446 /* Expand the EOL style flags: */
447 size_t const cchEol = g_acchrtBase64EolStyles[fFlags & RTBASE64_FLAGS_EOL_STYLE_MASK];
448 char const chEol0 = g_aachrtBase64EolStyles[fFlags & RTBASE64_FLAGS_EOL_STYLE_MASK][0];
449 char const chEol1 = g_aachrtBase64EolStyles[fFlags & RTBASE64_FLAGS_EOL_STYLE_MASK][1];
450 Assert(cchEol == (chEol0 != '\0' ? 1U : 0U) + (chEol1 != '\0' ? 1U : 0U));
451
452 /*
453 * Process whole "trios" of input data.
454 */
455 uint8_t u8A;
456 uint8_t u8B;
457 uint8_t u8C;
458 size_t cbLineFeed = cchEol ? cbBuf - RTBASE64_LINE_LEN : ~(size_t)0;
459 const uint8_t *pbSrc = (const uint8_t *)pvData;
460 char *pchDst = pszBuf;
461 while (cbData >= 3)
462 {
463 if (cbBuf < 4 + 1)
464 return VERR_BUFFER_OVERFLOW;
465
466 /* encode */
467 u8A = pbSrc[0];
468 pchDst[0] = g_szrtBase64ValToChar[u8A >> 2];
469 u8B = pbSrc[1];
470 pchDst[1] = g_szrtBase64ValToChar[((u8A << 4) & 0x3f) | (u8B >> 4)];
471 u8C = pbSrc[2];
472 pchDst[2] = g_szrtBase64ValToChar[((u8B << 2) & 0x3f) | (u8C >> 6)];
473 pchDst[3] = g_szrtBase64ValToChar[u8C & 0x3f];
474
475 /* advance */
476 cbBuf -= 4;
477 pchDst += 4;
478 cbData -= 3;
479 pbSrc += 3;
480
481 /* deal out end-of-line */
482 if (cbBuf == cbLineFeed && cbData && cchEol)
483 {
484 if (cbBuf < cchEol + 1)
485 return VERR_BUFFER_OVERFLOW;
486 cbBuf -= cchEol;
487 *pchDst++ = chEol0;
488 if (chEol1)
489 *pchDst++ = chEol1;
490 cbLineFeed = cbBuf - RTBASE64_LINE_LEN;
491 }
492 }
493
494 /*
495 * Deal with the odd bytes and string termination.
496 */
497 if (cbData)
498 {
499 if (cbBuf < 4 + 1)
500 return VERR_BUFFER_OVERFLOW;
501 switch (cbData)
502 {
503 case 1:
504 u8A = pbSrc[0];
505 pchDst[0] = g_szrtBase64ValToChar[u8A >> 2];
506 pchDst[1] = g_szrtBase64ValToChar[(u8A << 4) & 0x3f];
507 pchDst[2] = '=';
508 pchDst[3] = '=';
509 break;
510 case 2:
511 u8A = pbSrc[0];
512 pchDst[0] = g_szrtBase64ValToChar[u8A >> 2];
513 u8B = pbSrc[1];
514 pchDst[1] = g_szrtBase64ValToChar[((u8A << 4) & 0x3f) | (u8B >> 4)];
515 pchDst[2] = g_szrtBase64ValToChar[(u8B << 2) & 0x3f];
516 pchDst[3] = '=';
517 break;
518 }
519 pchDst += 4;
520 }
521
522 *pchDst = '\0';
523
524 if (pcchActual)
525 *pcchActual = pchDst - pszBuf;
526 return VINF_SUCCESS;
527}
528RT_EXPORT_SYMBOL(RTBase64EncodeEx);
529
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette