VirtualBox

source: vbox/trunk/src/VBox/Runtime/common/string/base64.cpp@ 84273

Last change on this file since 84273 was 84273, checked in by vboxsync, 5 years ago

IPRT/Base64: Allow the end-of-line style to be specified via the fFlags parameter when encoding. ?[typo] bugref:9699

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 18.6 KB
Line 
1/* $Id: base64.cpp 84273 2020-05-12 16:33:50Z vboxsync $ */
2/** @file
3 * IPRT - Base64, MIME content transfer encoding.
4 */
5
6/*
7 * Copyright (C) 2009-2020 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 *
17 * The contents of this file may alternatively be used under the terms
18 * of the Common Development and Distribution License Version 1.0
19 * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
20 * VirtualBox OSE distribution, in which case the provisions of the
21 * CDDL are applicable instead of those of the GPL.
22 *
23 * You may elect to license modified versions of this file under the
24 * terms and conditions of either the GPL or the CDDL or both.
25 */
26
27
28/*********************************************************************************************************************************
29* Header Files *
30*********************************************************************************************************************************/
31#include <iprt/base64.h>
32#include "internal/iprt.h"
33
34#include <iprt/assert.h>
35#include <iprt/err.h>
36#include <iprt/ctype.h>
37#include <iprt/string.h>
38#ifdef RT_STRICT
39# include <iprt/asm.h>
40#endif
41
42
43/*********************************************************************************************************************************
44* Defined Constants And Macros *
45*********************************************************************************************************************************/
46/** The line length used for encoding. */
47#define RTBASE64_LINE_LEN 64
48
49/** @name Special g_au8CharToVal values
50 * @{ */
51#define BASE64_SPACE 0xc0
52#define BASE64_PAD 0xe0
53#define BASE64_INVALID 0xff
54/** @} */
55
56
57/*********************************************************************************************************************************
58* Global Variables *
59*********************************************************************************************************************************/
60/** Base64 character to value. (RFC 2045)
61 * ASSUMES ASCII / UTF-8. */
62static const uint8_t g_au8CharToVal[256] =
63{
64 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xff, 0xff, /* 0x00..0x0f */
65 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x10..0x1f */
66 0xc0, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 62, 0xff, 0xff, 0xff, 63, /* 0x20..0x2f */
67 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 0xff, 0xff, 0xff, 0xe0, 0xff, 0xff, /* 0x30..0x3f */
68 0xff, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, /* 0x40..0x4f */
69 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x50..0x5f */
70 0xff, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, /* 0x60..0x6f */
71 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x70..0x7f */
72 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x80..0x8f */
73 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x90..0x9f */
74 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0xa0..0xaf */
75 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0xb0..0xbf */
76 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0xc0..0xcf */
77 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0xd0..0xdf */
78 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0xe0..0xef */
79 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff /* 0xf0..0xff */
80};
81
82/** Value to Base64 character. (RFC 2045) */
83static const char g_szValToChar[64+1] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
84
85/** The end-of-line lengths (indexed by style flag value). */
86static const size_t g_acchEolStyles[RTBASE64_FLAGS_EOL_STYLE_MASK + 1] =
87{
88 /*[RTBASE64_FLAGS_EOL_NATIVE ]:*/ RTBASE64_EOL_SIZE,
89 /*[RTBASE64_FLAGS_NO_LINE_BREAKS]:*/ 0,
90 /*[RTBASE64_FLAGS_EOL_LF ]:*/ 1,
91 /*[RTBASE64_FLAGS_EOL_CRLF ]:*/ 2
92};
93
94/** The end-of-line characters (zero, one or two). */
95static const char g_aachEolStyles[RTBASE64_FLAGS_EOL_STYLE_MASK + 1][2] =
96{
97 /*[RTBASE64_FLAGS_EOL_NATIVE ]:*/ { RTBASE64_EOL_SIZE == 1 ? '\n' : '\r', RTBASE64_EOL_SIZE == 1 ? '\0' : '\n', },
98 /*[RTBASE64_FLAGS_NO_LINE_BREAKS]:*/ { '\0', '\0' },
99 /*[RTBASE64_FLAGS_EOL_LF ]:*/ { '\n', '\0' },
100 /*[RTBASE64_FLAGS_EOL_CRLF ]:*/ { '\r', '\n' },
101};
102
103
104
105#ifdef RT_STRICT
106/**
107 * Perform table sanity checks on the first call.
108 */
109static void rtBase64Sanity(void)
110{
111 static bool s_fSane = false;
112 if (RT_UNLIKELY(!s_fSane))
113 {
114 for (unsigned i = 0; i < 64; i++)
115 {
116 unsigned ch = g_szValToChar[i];
117 Assert(ch);
118 Assert(g_au8CharToVal[ch] == i);
119 }
120
121 for (unsigned i = 0; i < 256; i++)
122 {
123 uint8_t u8 = g_au8CharToVal[i];
124 Assert( ( u8 == BASE64_INVALID
125 && !RT_C_IS_ALNUM(i)
126 && !RT_C_IS_SPACE(i))
127 || ( u8 == BASE64_PAD
128 && i == '=')
129 || ( u8 == BASE64_SPACE
130 && RT_C_IS_SPACE(i))
131 || ( u8 < 64
132 && (unsigned)g_szValToChar[u8] == i));
133 }
134 ASMAtomicWriteBool(&s_fSane, true);
135 }
136}
137#endif /* RT_STRICT */
138
139
140RTDECL(ssize_t) RTBase64DecodedSizeEx(const char *pszString, size_t cchStringMax, char **ppszEnd)
141{
142#ifdef RT_STRICT
143 rtBase64Sanity();
144#endif
145
146 /*
147 * Walk the string until a non-encoded or non-space character is encountered.
148 */
149 uint32_t c6Bits = 0;
150 uint8_t u8 = BASE64_INVALID;
151 unsigned ch = 0;
152 AssertCompile(sizeof(char) == sizeof(uint8_t));
153
154 while (cchStringMax > 0 && (ch = *pszString))
155 {
156 u8 = g_au8CharToVal[ch];
157 if (u8 < 64)
158 c6Bits++;
159 else if (RT_UNLIKELY(u8 != BASE64_SPACE))
160 break;
161
162 /* advance */
163 pszString++;
164 cchStringMax--;
165 }
166
167 /*
168 * Padding can only be found at the end and there is
169 * only 1 or 2 padding chars. Deal with it first.
170 */
171 unsigned cbPad = 0;
172 if (u8 == BASE64_PAD)
173 {
174 cbPad = 1;
175 c6Bits++;
176 pszString++;
177 cchStringMax--;
178 while (cchStringMax > 0 && (ch = *pszString))
179 {
180 u8 = g_au8CharToVal[ch];
181 if (u8 != BASE64_SPACE)
182 {
183 if (u8 != BASE64_PAD)
184 break;
185 c6Bits++;
186 cbPad++;
187 }
188 pszString++;
189 cchStringMax--;
190 }
191 if (cbPad >= 3)
192 return -1;
193 }
194
195 /*
196 * Invalid char and no where to indicate where the
197 * Base64 text ends? Return failure.
198 */
199 if ( u8 == BASE64_INVALID
200 && !ppszEnd
201 && ch)
202 return -1;
203
204 /*
205 * Recalc 6-bit to 8-bit and adjust for padding.
206 */
207 size_t cb;
208 if (c6Bits * 3 / 3 == c6Bits)
209 {
210 if ((c6Bits * 3 % 4) != 0)
211 return -1;
212 cb = c6Bits * 3 / 4;
213 }
214 else
215 {
216 if ((c6Bits * (uint64_t)3 % 4) != 0)
217 return -1;
218 cb = c6Bits * (uint64_t)3 / 4;
219 }
220
221 if (cb < cbPad)
222 return -1;
223 cb -= cbPad;
224
225 if (ppszEnd)
226 *ppszEnd = (char *)pszString;
227 return cb;
228}
229RT_EXPORT_SYMBOL(RTBase64DecodedSizeEx);
230
231
232RTDECL(ssize_t) RTBase64DecodedSize(const char *pszString, char **ppszEnd)
233{
234 return RTBase64DecodedSizeEx(pszString, RTSTR_MAX, ppszEnd);
235}
236RT_EXPORT_SYMBOL(RTBase64DecodedSize);
237
238
239RTDECL(int) RTBase64DecodeEx(const char *pszString, size_t cchStringMax, void *pvData, size_t cbData,
240 size_t *pcbActual, char **ppszEnd)
241{
242#ifdef RT_STRICT
243 rtBase64Sanity();
244#endif
245
246 /*
247 * Process input in groups of 4 input / 3 output chars.
248 */
249 uint8_t u8Trio[3] = { 0, 0, 0 }; /* shuts up gcc */
250 uint8_t *pbData = (uint8_t *)pvData;
251 unsigned ch;
252 uint8_t u8;
253 unsigned c6Bits = 0;
254 AssertCompile(sizeof(char) == sizeof(uint8_t));
255
256 for (;;)
257 {
258 /* The first 6-bit group. */
259 while ((u8 = g_au8CharToVal[ch = cchStringMax > 0 ? (uint8_t)*pszString : 0]) == BASE64_SPACE)
260 pszString++, cchStringMax--;
261 if (u8 >= 64)
262 {
263 c6Bits = 0;
264 break;
265 }
266 u8Trio[0] = u8 << 2;
267 pszString++;
268 cchStringMax--;
269
270 /* The second 6-bit group. */
271 while ((u8 = g_au8CharToVal[ch = cchStringMax > 0 ? (uint8_t)*pszString : 0]) == BASE64_SPACE)
272 pszString++, cchStringMax--;
273 if (u8 >= 64)
274 {
275 c6Bits = 1;
276 break;
277 }
278 u8Trio[0] |= u8 >> 4;
279 u8Trio[1] = u8 << 4;
280 pszString++;
281 cchStringMax--;
282
283 /* The third 6-bit group. */
284 u8 = BASE64_INVALID;
285 while ((u8 = g_au8CharToVal[ch = cchStringMax > 0 ? (uint8_t)*pszString : 0]) == BASE64_SPACE)
286 pszString++, cchStringMax--;
287 if (u8 >= 64)
288 {
289 c6Bits = 2;
290 break;
291 }
292 u8Trio[1] |= u8 >> 2;
293 u8Trio[2] = u8 << 6;
294 pszString++;
295 cchStringMax--;
296
297 /* The fourth 6-bit group. */
298 u8 = BASE64_INVALID;
299 while ((u8 = g_au8CharToVal[ch = cchStringMax > 0 ? (uint8_t)*pszString : 0]) == BASE64_SPACE)
300 pszString++, cchStringMax--;
301 if (u8 >= 64)
302 {
303 c6Bits = 3;
304 break;
305 }
306 u8Trio[2] |= u8;
307 pszString++;
308 cchStringMax--;
309
310 /* flush the trio */
311 if (cbData < 3)
312 return VERR_BUFFER_OVERFLOW;
313 cbData -= 3;
314 pbData[0] = u8Trio[0];
315 pbData[1] = u8Trio[1];
316 pbData[2] = u8Trio[2];
317 pbData += 3;
318 }
319
320 /*
321 * Padding can only be found at the end and there is
322 * only 1 or 2 padding chars. Deal with it first.
323 */
324 unsigned cbPad = 0;
325 if (u8 == BASE64_PAD)
326 {
327 cbPad = 1;
328 pszString++;
329 cchStringMax--;
330 while (cchStringMax > 0 && (ch = (uint8_t)*pszString))
331 {
332 u8 = g_au8CharToVal[ch];
333 if (u8 != BASE64_SPACE)
334 {
335 if (u8 != BASE64_PAD)
336 break;
337 cbPad++;
338 }
339 pszString++;
340 cchStringMax--;
341 }
342 if (cbPad >= 3)
343 return VERR_INVALID_BASE64_ENCODING;
344 }
345
346 /*
347 * Invalid char and no where to indicate where the
348 * Base64 text ends? Return failure.
349 */
350 if ( u8 == BASE64_INVALID
351 && !ppszEnd
352 && ch != '\0')
353 return VERR_INVALID_BASE64_ENCODING;
354
355 /*
356 * Check padding vs. pending sextets, if anything left to do finish it off.
357 */
358 if (c6Bits || cbPad)
359 {
360 if (c6Bits + cbPad != 4)
361 return VERR_INVALID_BASE64_ENCODING;
362
363 switch (c6Bits)
364 {
365 case 1:
366 u8Trio[1] = u8Trio[2] = 0;
367 break;
368 case 2:
369 u8Trio[2] = 0;
370 break;
371 case 3:
372 default:
373 break;
374 }
375 switch (3 - cbPad)
376 {
377 case 1:
378 if (cbData < 1)
379 return VERR_BUFFER_OVERFLOW;
380 cbData--;
381 pbData[0] = u8Trio[0];
382 pbData++;
383 break;
384
385 case 2:
386 if (cbData < 2)
387 return VERR_BUFFER_OVERFLOW;
388 cbData -= 2;
389 pbData[0] = u8Trio[0];
390 pbData[1] = u8Trio[1];
391 pbData += 2;
392 break;
393
394 default:
395 break;
396 }
397 }
398
399 /*
400 * Set optional return values and return successfully.
401 */
402 if (ppszEnd)
403 *ppszEnd = (char *)pszString;
404 if (pcbActual)
405 *pcbActual = pbData - (uint8_t *)pvData;
406 return VINF_SUCCESS;
407}
408RT_EXPORT_SYMBOL(RTBase64DecodeEx);
409
410
411RTDECL(int) RTBase64Decode(const char *pszString, void *pvData, size_t cbData, size_t *pcbActual, char **ppszEnd)
412{
413 return RTBase64DecodeEx(pszString, RTSTR_MAX, pvData, cbData, pcbActual, ppszEnd);
414}
415RT_EXPORT_SYMBOL(RTBase64Decode);
416
417
418/**
419 * Calculates the length of the Base64 encoding of a given number of bytes of
420 * data produced by RTBase64Encode().
421 *
422 * @returns The Base64 string length.
423 * @param cbData The number of bytes to encode.
424 */
425RTDECL(size_t) RTBase64EncodedLength(size_t cbData)
426{
427 return RTBase64EncodedLengthEx(cbData, 0);
428}
429RT_EXPORT_SYMBOL(RTBase64EncodedLength);
430
431
432/**
433 * Calculates the length of the Base64 encoding of a given number of bytes of
434 * data produced by RTBase64EncodeEx() with the same @a fFlags.
435 *
436 * @returns The Base64 string length.
437 * @param cbData The number of bytes to encode.
438 * @param fFlags Flags, any combination of the RTBASE64_FLAGS \#defines.
439 */
440RTDECL(size_t) RTBase64EncodedLengthEx(size_t cbData, uint32_t fFlags)
441{
442 size_t const cchEol = g_acchEolStyles[fFlags & RTBASE64_FLAGS_EOL_STYLE_MASK];
443
444 if (cbData * 8 / 8 != cbData)
445 {
446 AssertReturn(sizeof(size_t) == sizeof(uint64_t), ~(size_t)0);
447 uint64_t cch = cbData * (uint64_t)8;
448 while (cch % 24)
449 cch += 8;
450 cch /= 6;
451 cch += ((cch - 1) / RTBASE64_LINE_LEN) * cchEol;
452 return cch;
453 }
454
455 size_t cch = cbData * 8;
456 while (cch % 24)
457 cch += 8;
458 cch /= 6;
459 cch += ((cch - 1) / RTBASE64_LINE_LEN) * cchEol;
460 return cch;
461}
462RT_EXPORT_SYMBOL(RTBase64EncodedLengthEx);
463
464
465/**
466 * Encodes the specifed data into a Base64 string, the caller supplies the
467 * output buffer.
468 *
469 * This is equivalent to calling RTBase64EncodeEx() with no flags.
470 *
471 * @returns IRPT status code.
472 * @retval VERR_BUFFER_OVERFLOW if the output buffer is too small. The buffer
473 * may contain an invalid Base64 string.
474 *
475 * @param pvData The data to encode.
476 * @param cbData The number of bytes to encode.
477 * @param pszBuf Where to put the Base64 string.
478 * @param cbBuf The size of the output buffer, including the terminator.
479 * @param pcchActual The actual number of characters returned.
480 */
481RTDECL(int) RTBase64Encode(const void *pvData, size_t cbData, char *pszBuf, size_t cbBuf, size_t *pcchActual)
482{
483 return RTBase64EncodeEx(pvData, cbData, 0, pszBuf, cbBuf, pcchActual);
484}
485RT_EXPORT_SYMBOL(RTBase64Encode);
486
487
488/**
489 * Encodes the specifed data into a Base64 string, the caller supplies the
490 * output buffer.
491 *
492 * @returns IRPT status code.
493 * @retval VERR_BUFFER_OVERFLOW if the output buffer is too small. The buffer
494 * may contain an invalid Base64 string.
495 *
496 * @param pvData The data to encode.
497 * @param cbData The number of bytes to encode.
498 * @param pszBuf Where to put the Base64 string.
499 * @param cbBuf The size of the output buffer, including the terminator.
500 * @param pcchActual The actual number of characters returned.
501 */
502RTDECL(int) RTBase64EncodeEx(const void *pvData, size_t cbData, uint32_t fFlags,
503 char *pszBuf, size_t cbBuf, size_t *pcchActual)
504{
505 /* Expand the EOL style flags: */
506 size_t const cchEol = g_acchEolStyles[fFlags & RTBASE64_FLAGS_EOL_STYLE_MASK];
507 char const chEol0 = g_aachEolStyles[fFlags & RTBASE64_FLAGS_EOL_STYLE_MASK][0];
508 char const chEol1 = g_aachEolStyles[fFlags & RTBASE64_FLAGS_EOL_STYLE_MASK][1];
509
510 /*
511 * Process whole "trios" of input data.
512 */
513 uint8_t u8A;
514 uint8_t u8B;
515 uint8_t u8C;
516 size_t cbLineFeed = cbBuf - RTBASE64_LINE_LEN;
517 const uint8_t *pbSrc = (const uint8_t *)pvData;
518 char *pchDst = pszBuf;
519 while (cbData >= 3)
520 {
521 if (cbBuf < 4 + 1)
522 return VERR_BUFFER_OVERFLOW;
523
524 /* encode */
525 u8A = pbSrc[0];
526 pchDst[0] = g_szValToChar[u8A >> 2];
527 u8B = pbSrc[1];
528 pchDst[1] = g_szValToChar[((u8A << 4) & 0x3f) | (u8B >> 4)];
529 u8C = pbSrc[2];
530 pchDst[2] = g_szValToChar[((u8B << 2) & 0x3f) | (u8C >> 6)];
531 pchDst[3] = g_szValToChar[u8C & 0x3f];
532
533 /* advance */
534 cbBuf -= 4;
535 pchDst += 4;
536 cbData -= 3;
537 pbSrc += 3;
538
539 if (cchEol > 0)
540 {
541 /* deal out end-of-line */
542 if (cbBuf == cbLineFeed && cbData)
543 {
544 if (cbBuf < cchEol + 1)
545 return VERR_BUFFER_OVERFLOW;
546 cbBuf -= cchEol;
547 *pchDst++ = chEol0;
548 if (chEol1)
549 *pchDst++ = chEol1;
550 cbLineFeed = cbBuf - RTBASE64_LINE_LEN;
551 }
552 }
553 }
554
555 /*
556 * Deal with the odd bytes and string termination.
557 */
558 if (cbData)
559 {
560 if (cbBuf < 4 + 1)
561 return VERR_BUFFER_OVERFLOW;
562 switch (cbData)
563 {
564 case 1:
565 u8A = pbSrc[0];
566 pchDst[0] = g_szValToChar[u8A >> 2];
567 pchDst[1] = g_szValToChar[(u8A << 4) & 0x3f];
568 pchDst[2] = '=';
569 pchDst[3] = '=';
570 break;
571 case 2:
572 u8A = pbSrc[0];
573 pchDst[0] = g_szValToChar[u8A >> 2];
574 u8B = pbSrc[1];
575 pchDst[1] = g_szValToChar[((u8A << 4) & 0x3f) | (u8B >> 4)];
576 pchDst[2] = g_szValToChar[(u8B << 2) & 0x3f];
577 pchDst[3] = '=';
578 break;
579 }
580 pchDst += 4;
581 }
582
583 *pchDst = '\0';
584
585 if (pcchActual)
586 *pcchActual = pchDst - pszBuf;
587 return VINF_SUCCESS;
588}
589RT_EXPORT_SYMBOL(RTBase64EncodeEx);
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette