1 | /* $Id: base64.cpp 16762 2009-02-14 08:21:28Z vboxsync $ */
|
---|
2 | /** @file
|
---|
3 | * IPRT - Base64, MIME content transfer encoding.
|
---|
4 | */
|
---|
5 |
|
---|
6 | /*
|
---|
7 | * Copyright (C) 2009 Sun Microsystems, Inc.
|
---|
8 | *
|
---|
9 | * This file is part of VirtualBox Open Source Edition (OSE), as
|
---|
10 | * available from http://www.virtualbox.org. This file is free software;
|
---|
11 | * you can redistribute it and/or modify it under the terms of the GNU
|
---|
12 | * General Public License (GPL) as published by the Free Software
|
---|
13 | * Foundation, in version 2 as it comes in the "COPYING" file of the
|
---|
14 | * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
|
---|
15 | * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
|
---|
16 | *
|
---|
17 | * The contents of this file may alternatively be used under the terms
|
---|
18 | * of the Common Development and Distribution License Version 1.0
|
---|
19 | * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
|
---|
20 | * VirtualBox OSE distribution, in which case the provisions of the
|
---|
21 | * CDDL are applicable instead of those of the GPL.
|
---|
22 | *
|
---|
23 | * You may elect to license modified versions of this file under the
|
---|
24 | * terms and conditions of either the GPL or the CDDL or both.
|
---|
25 | *
|
---|
26 | * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa
|
---|
27 | * Clara, CA 95054 USA or visit http://www.sun.com if you need
|
---|
28 | * additional information or have any questions.
|
---|
29 | */
|
---|
30 |
|
---|
31 | /*******************************************************************************
|
---|
32 | * Header Files *
|
---|
33 | *******************************************************************************/
|
---|
34 | #include <iprt/base64.h>
|
---|
35 | #include <iprt/assert.h>
|
---|
36 | #include <iprt/err.h>
|
---|
37 | #include <iprt/ctype.h>
|
---|
38 | #ifdef RT_STRICT
|
---|
39 | # include <iprt/asm.h>
|
---|
40 | #endif
|
---|
41 |
|
---|
42 |
|
---|
43 | /*******************************************************************************
|
---|
44 | * Defined Constants And Macros *
|
---|
45 | *******************************************************************************/
|
---|
46 | /** @name Special g_au8CharToVal values
|
---|
47 | * @{ */
|
---|
48 | #define BASE64_SPACE 0xc0
|
---|
49 | #define BASE64_PAD 0xe0
|
---|
50 | #define BASE64_INVALID 0xff
|
---|
51 | /** @} */
|
---|
52 |
|
---|
53 |
|
---|
54 | /*******************************************************************************
|
---|
55 | * Global Variables *
|
---|
56 | *******************************************************************************/
|
---|
57 | /** Base64 character to value. (RFC 2045)
|
---|
58 | * ASSUMES ASCII / UTF-8. */
|
---|
59 | static const uint8_t g_au8CharToVal[256] =
|
---|
60 | {
|
---|
61 | 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xff, 0xff, /* 0x00..0x0f */
|
---|
62 | 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x10..0x1f */
|
---|
63 | 0xc0, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 62, 0xff, 0xff, 0xff, 63, /* 0x20..0x2f */
|
---|
64 | 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 0xff, 0xff, 0xff, 0xe0, 0xff, 0xff, /* 0x30..0x3f */
|
---|
65 | 0xff, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, /* 0x40..0x4f */
|
---|
66 | 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x50..0x5f */
|
---|
67 | 0xff, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, /* 0x60..0x6f */
|
---|
68 | 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x70..0x7f */
|
---|
69 | 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x80..0x8f */
|
---|
70 | 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x90..0x9f */
|
---|
71 | 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0xa0..0xaf */
|
---|
72 | 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0xb0..0xbf */
|
---|
73 | 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0xc0..0xcf */
|
---|
74 | 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0xd0..0xdf */
|
---|
75 | 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0xe0..0xef */
|
---|
76 | 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff /* 0xf0..0xff */
|
---|
77 | };
|
---|
78 |
|
---|
79 | /** Value to Base64 character. (RFC 2045) */
|
---|
80 | static const char g_szValToChar[64+1] =
|
---|
81 | "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
|
---|
82 |
|
---|
83 |
|
---|
84 | #ifdef RT_STRICT
|
---|
85 | /**
|
---|
86 | * Perform table sanity checks on the first call.
|
---|
87 | */
|
---|
88 | static void rtBase64Sanity(void)
|
---|
89 | {
|
---|
90 | static bool s_fSane = false;
|
---|
91 | if (RT_UNLIKELY(!s_fSane))
|
---|
92 | {
|
---|
93 | for (unsigned i = 0; i < 64; i++)
|
---|
94 | {
|
---|
95 | unsigned ch = g_szValToChar[i];
|
---|
96 | Assert(ch);
|
---|
97 | Assert(g_au8CharToVal[ch] == i);
|
---|
98 | }
|
---|
99 |
|
---|
100 | for (unsigned i = 0; i < 256; i++)
|
---|
101 | {
|
---|
102 | uint8_t u8 = g_au8CharToVal[i];
|
---|
103 | Assert( ( u8 == BASE64_INVALID
|
---|
104 | && !RT_C_IS_ALNUM(i)
|
---|
105 | && !RT_C_IS_SPACE(i))
|
---|
106 | || ( u8 == BASE64_PAD
|
---|
107 | && i == '=')
|
---|
108 | || ( u8 == BASE64_SPACE
|
---|
109 | && RT_C_IS_SPACE(i))
|
---|
110 | || ( u8 < 64
|
---|
111 | && (unsigned)g_szValToChar[u8] == i));
|
---|
112 | }
|
---|
113 | ASMAtomicWriteBool(&s_fSane, true);
|
---|
114 | }
|
---|
115 | }
|
---|
116 | #endif /* RT_STRICT */
|
---|
117 |
|
---|
118 |
|
---|
119 | /**
|
---|
120 | * Calculates the decoded data size for a Base64 encoded string.
|
---|
121 | *
|
---|
122 | * @returns The length in bytes. -1 if the encoding is bad.
|
---|
123 | *
|
---|
124 | * @param pszString The Base64 encoded string.
|
---|
125 | * @param ppszEnd If not NULL, this will point to the first char
|
---|
126 | * following the Base64 encoded text block. If
|
---|
127 | * NULL the entire string is assumed to be Base64.
|
---|
128 | */
|
---|
129 | RTDECL(ssize_t) RTBase64DecodedSize(const char *pszString, char **ppszEnd)
|
---|
130 | {
|
---|
131 | #ifdef RT_STRICT
|
---|
132 | rtBase64Sanity();
|
---|
133 | #endif
|
---|
134 |
|
---|
135 | /*
|
---|
136 | * Walk the string until a non-encoded or non-space character is encountered.
|
---|
137 | */
|
---|
138 | uint32_t c6Bits = 0;
|
---|
139 | uint8_t u8 = BASE64_INVALID;
|
---|
140 | unsigned ch;
|
---|
141 | AssertCompile(sizeof(char) == sizeof(uint8_t));
|
---|
142 |
|
---|
143 | while ((ch = *pszString))
|
---|
144 | {
|
---|
145 | u8 = g_au8CharToVal[ch];
|
---|
146 | if (u8 < 64)
|
---|
147 | c6Bits++;
|
---|
148 | else if (RT_UNLIKELY(u8 != BASE64_SPACE))
|
---|
149 | break;
|
---|
150 |
|
---|
151 | /* advance */
|
---|
152 | pszString++;
|
---|
153 | }
|
---|
154 |
|
---|
155 | /*
|
---|
156 | * Padding can only be found at the end and there is
|
---|
157 | * only 1 or 2 padding chars. Deal with it first.
|
---|
158 | */
|
---|
159 | unsigned cbPad = 0;
|
---|
160 | if (u8 == BASE64_PAD)
|
---|
161 | {
|
---|
162 | cbPad = 1;
|
---|
163 | c6Bits++;
|
---|
164 | pszString++;
|
---|
165 | while ((ch = *pszString))
|
---|
166 | {
|
---|
167 | u8 = g_au8CharToVal[ch];
|
---|
168 | if (u8 != BASE64_SPACE)
|
---|
169 | {
|
---|
170 | if (u8 != BASE64_PAD)
|
---|
171 | break;
|
---|
172 | c6Bits++;
|
---|
173 | cbPad++;
|
---|
174 | }
|
---|
175 | pszString++;
|
---|
176 | }
|
---|
177 | if (cbPad >= 3)
|
---|
178 | return -1;
|
---|
179 | }
|
---|
180 |
|
---|
181 | /*
|
---|
182 | * Invalid char and no where to indicate where the
|
---|
183 | * Base64 text ends? Return failure.
|
---|
184 | */
|
---|
185 | if ( u8 == BASE64_INVALID
|
---|
186 | && !ppszEnd
|
---|
187 | && ch)
|
---|
188 | return -1;
|
---|
189 |
|
---|
190 | /*
|
---|
191 | * Recalc 6-bit to 8-bit and adjust for padding.
|
---|
192 | */
|
---|
193 | size_t cb;
|
---|
194 | if (c6Bits * 3 / 3 == c6Bits)
|
---|
195 | {
|
---|
196 | if ((c6Bits * 3 % 4) != 0)
|
---|
197 | return -1;
|
---|
198 | cb = c6Bits * 3 / 4;
|
---|
199 | }
|
---|
200 | else
|
---|
201 | {
|
---|
202 | if ((c6Bits * (uint64_t)3 % 4) != 0)
|
---|
203 | return -1;
|
---|
204 | cb = c6Bits * (uint64_t)3 / 4;
|
---|
205 | }
|
---|
206 |
|
---|
207 | if (cb < cbPad)
|
---|
208 | return -1;
|
---|
209 | cb -= cbPad;
|
---|
210 |
|
---|
211 | if (ppszEnd)
|
---|
212 | *ppszEnd = (char *)pszString;
|
---|
213 | return cb;
|
---|
214 | }
|
---|
215 |
|
---|
216 |
|
---|
217 | /**
|
---|
218 | * Decodes a Base64 encoded string into the buffer supplied by the caller.
|
---|
219 | *
|
---|
220 | * @returns IPRT status code.
|
---|
221 | * @retval VERR_BUFFER_OVERFLOW if the buffer is too small. pcbActual will not
|
---|
222 | * be set, nor will ppszEnd.
|
---|
223 | * @retval VERR_INVALID_BASE64_ENCODING if the encoding is wrong.
|
---|
224 | *
|
---|
225 | * @param pszString The Base64 string. Whether the entire string or
|
---|
226 | * just the start of the string is in Base64 depends
|
---|
227 | * on wther ppszEnd is specified or not.
|
---|
228 | * @param pvData Where to store the decoded data.
|
---|
229 | * @param cbData The size of the output buffer that pvData points to.
|
---|
230 | * @param pcbActual Where to store the actual number of bytes returned.
|
---|
231 | * Optional.
|
---|
232 | * @param ppszEnd Indicats that the string may contain other stuff
|
---|
233 | * after the Base64 encoded data when not NULL. Will
|
---|
234 | * be set to point to the first char that's not part of
|
---|
235 | * the encoding. If NULL the entire string must be part
|
---|
236 | * of the Base64 encoded data.
|
---|
237 | */
|
---|
238 | RTDECL(int) RTBase64Decode(const char *pszString, void *pvData, size_t cbData, size_t *pcbActual, char **ppszEnd)
|
---|
239 | {
|
---|
240 | #ifdef RT_STRICT
|
---|
241 | rtBase64Sanity();
|
---|
242 | #endif
|
---|
243 |
|
---|
244 | /*
|
---|
245 | * Process input in groups of 4 input / 3 output chars.
|
---|
246 | */
|
---|
247 | uint8_t u8Trio[3];
|
---|
248 | uint8_t *pbData = (uint8_t *)pvData;
|
---|
249 | uint8_t u8 = BASE64_INVALID;
|
---|
250 | unsigned c6Bits = 0;
|
---|
251 | unsigned ch;
|
---|
252 | AssertCompile(sizeof(char) == sizeof(uint8_t));
|
---|
253 |
|
---|
254 | const char *pszCurStart;
|
---|
255 | for (;;)
|
---|
256 | {
|
---|
257 | pszCurStart = pszString;
|
---|
258 | /* The first 6-bit group. */
|
---|
259 | while ((u8 = g_au8CharToVal[ch = *pszString]) == BASE64_SPACE)
|
---|
260 | pszString++;
|
---|
261 | if (u8 >= 64)
|
---|
262 | {
|
---|
263 | c6Bits = 0;
|
---|
264 | break;
|
---|
265 | }
|
---|
266 | u8Trio[0] = u8 << 2;
|
---|
267 | pszString++;
|
---|
268 |
|
---|
269 | /* The second 6-bit group. */
|
---|
270 | while ((u8 = g_au8CharToVal[ch = *pszString]) == BASE64_SPACE)
|
---|
271 | pszString++;
|
---|
272 | if (u8 >= 64)
|
---|
273 | {
|
---|
274 | c6Bits = 1;
|
---|
275 | break;
|
---|
276 | }
|
---|
277 | u8Trio[0] |= u8 >> 4;
|
---|
278 | u8Trio[1] = u8 << 4;
|
---|
279 | pszString++;
|
---|
280 |
|
---|
281 | /* The third 6-bit group. */
|
---|
282 | while ((u8 = g_au8CharToVal[ch = *pszString]) == BASE64_SPACE)
|
---|
283 | pszString++;
|
---|
284 | if (u8 >= 64)
|
---|
285 | {
|
---|
286 | c6Bits = 2;
|
---|
287 | break;
|
---|
288 | }
|
---|
289 | u8Trio[1] |= u8 >> 2;
|
---|
290 | u8Trio[2] = u8 << 6;
|
---|
291 | pszString++;
|
---|
292 |
|
---|
293 | /* The fourth 6-bit group. */
|
---|
294 | while ((u8 = g_au8CharToVal[ch = *pszString]) == BASE64_SPACE)
|
---|
295 | pszString++;
|
---|
296 | if (u8 >= 64)
|
---|
297 | {
|
---|
298 | c6Bits = 3;
|
---|
299 | break;
|
---|
300 | }
|
---|
301 | u8Trio[2] |= u8;
|
---|
302 | pszString++;
|
---|
303 |
|
---|
304 | /* flush the trio */
|
---|
305 | if (cbData < 3)
|
---|
306 | return VERR_BUFFER_OVERFLOW;
|
---|
307 | cbData -= 3;
|
---|
308 | pbData[0] = u8Trio[0];
|
---|
309 | pbData[1] = u8Trio[1];
|
---|
310 | pbData[2] = u8Trio[2];
|
---|
311 | pbData += 3;
|
---|
312 | }
|
---|
313 |
|
---|
314 | /*
|
---|
315 | * Padding can only be found at the end and there is
|
---|
316 | * only 1 or 2 padding chars. Deal with it first.
|
---|
317 | */
|
---|
318 | unsigned cbPad = 0;
|
---|
319 | if (u8 == BASE64_PAD)
|
---|
320 | {
|
---|
321 | cbPad = 1;
|
---|
322 | pszString++;
|
---|
323 | while ((ch = *pszString))
|
---|
324 | {
|
---|
325 | u8 = g_au8CharToVal[ch];
|
---|
326 | if (u8 != BASE64_SPACE)
|
---|
327 | {
|
---|
328 | if (u8 != BASE64_PAD)
|
---|
329 | break;
|
---|
330 | cbPad++;
|
---|
331 | }
|
---|
332 | pszString++;
|
---|
333 | }
|
---|
334 | if (cbPad >= 3)
|
---|
335 | return VERR_INVALID_BASE64_ENCODING;
|
---|
336 | }
|
---|
337 |
|
---|
338 | /*
|
---|
339 | * Invalid char and no where to indicate where the
|
---|
340 | * Base64 text ends? Return failure.
|
---|
341 | */
|
---|
342 | if ( u8 == BASE64_INVALID
|
---|
343 | && !ppszEnd
|
---|
344 | && ch)
|
---|
345 | return VERR_INVALID_BASE64_ENCODING;
|
---|
346 |
|
---|
347 | /*
|
---|
348 | * Check padding vs. pending sextets, if anything left to do finish it off.
|
---|
349 | */
|
---|
350 | if (c6Bits || cbPad)
|
---|
351 | {
|
---|
352 | if (c6Bits + cbPad != 4)
|
---|
353 | return VERR_INVALID_BASE64_ENCODING;
|
---|
354 |
|
---|
355 | switch (c6Bits)
|
---|
356 | {
|
---|
357 | case 1:
|
---|
358 | u8Trio[1] = u8Trio[2] = 0;
|
---|
359 | break;
|
---|
360 | case 2:
|
---|
361 | u8Trio[2] = 0;
|
---|
362 | break;
|
---|
363 | case 3:
|
---|
364 | default:
|
---|
365 | break;
|
---|
366 | }
|
---|
367 | switch (3 - cbPad)
|
---|
368 | {
|
---|
369 | case 1:
|
---|
370 | if (cbData < 1)
|
---|
371 | return VERR_BUFFER_OVERFLOW;
|
---|
372 | cbData--;
|
---|
373 | pbData[0] = u8Trio[0];
|
---|
374 | pbData++;
|
---|
375 | break;
|
---|
376 |
|
---|
377 | case 2:
|
---|
378 | if (cbData < 2)
|
---|
379 | return VERR_BUFFER_OVERFLOW;
|
---|
380 | cbData -= 2;
|
---|
381 | pbData[0] = u8Trio[0];
|
---|
382 | pbData[1] = u8Trio[1];
|
---|
383 | pbData += 2;
|
---|
384 | break;
|
---|
385 |
|
---|
386 | default:
|
---|
387 | break;
|
---|
388 | }
|
---|
389 | }
|
---|
390 |
|
---|
391 | /*
|
---|
392 | * Set optional return values and return successfully.
|
---|
393 | */
|
---|
394 | if (ppszEnd)
|
---|
395 | *ppszEnd = (char *)pszString;
|
---|
396 | if (pcbActual)
|
---|
397 | *pcbActual = pbData - (uint8_t *)pvData;
|
---|
398 | return VINF_SUCCESS;
|
---|
399 | }
|
---|
400 |
|
---|
401 |
|
---|
402 | /**
|
---|
403 | * Calculates the length of the Base64 encoding of a given number of bytes of
|
---|
404 | * data.
|
---|
405 | *
|
---|
406 | * This will assume line breaks every 64 chars. A RTBase64EncodedLengthEx
|
---|
407 | * function can be added if closer control over the output is found to be
|
---|
408 | * required.
|
---|
409 | *
|
---|
410 | * @returns The Base64 string length.
|
---|
411 | * @param cbData The number of bytes to encode.
|
---|
412 | */
|
---|
413 | RTDECL(size_t) RTBase64EncodedLength(size_t cbData)
|
---|
414 | {
|
---|
415 | if (cbData * 8 / 8 != cbData)
|
---|
416 | {
|
---|
417 | AssertReturn(sizeof(size_t) == sizeof(uint64_t), ~(size_t)0);
|
---|
418 | uint64_t cch = cbData * (uint64_t)8;
|
---|
419 | while (cch % 24)
|
---|
420 | cch += 8;
|
---|
421 | cch /= 6;
|
---|
422 |
|
---|
423 | cch += (cch / 64) * RTBASE64_EOL_SIZE;
|
---|
424 | return cch;
|
---|
425 | }
|
---|
426 |
|
---|
427 | size_t cch = cbData * 8;
|
---|
428 | while (cch % 24)
|
---|
429 | cch += 8;
|
---|
430 | cch /= 6;
|
---|
431 |
|
---|
432 | cch += (cch / 64) * RTBASE64_EOL_SIZE;
|
---|
433 | return cch;
|
---|
434 | }
|
---|
435 |
|
---|
436 |
|
---|
437 | /**
|
---|
438 | * Encodes the specifed data into a Base64 string, the caller supplies the
|
---|
439 | * output buffer.
|
---|
440 | *
|
---|
441 | * This will make the same assumptions about line breaks and EOL size as
|
---|
442 | * RTBase64EncodedLength() does. A RTBase64EncodeEx function can be added if
|
---|
443 | * more strict control over the output formatting is found necessary.
|
---|
444 | *
|
---|
445 | * @returns IRPT status code.
|
---|
446 | * @retval VERR_BUFFER_OVERFLOW if the output buffer is too small. The buffer
|
---|
447 | * may contain an invalid Base64 string.
|
---|
448 | *
|
---|
449 | * @param pszBuf Where to put the Base64 string.
|
---|
450 | * @param cbBuf The size of the output buffer, including the terminator.
|
---|
451 | * @param pvData The data to encode.
|
---|
452 | * @param cbData The number of bytes to encode.
|
---|
453 | */
|
---|
454 | RTDECL(int) RTBase64Encode(char *pszBuf, size_t cbBuf, const void *pvData, size_t cbData)
|
---|
455 | {
|
---|
456 | /** @todo implement RTBase64Encode. */
|
---|
457 | return VERR_NOT_IMPLEMENTED;
|
---|
458 | }
|
---|
459 |
|
---|