VirtualBox

source: vbox/trunk/src/VBox/Runtime/common/string/base64.cpp@ 16762

Last change on this file since 16762 was 16762, checked in by vboxsync, 16 years ago

RTBase64Decode testcase and bugfixes.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 14.3 KB
Line 
1/* $Id: base64.cpp 16762 2009-02-14 08:21:28Z vboxsync $ */
2/** @file
3 * IPRT - Base64, MIME content transfer encoding.
4 */
5
6/*
7 * Copyright (C) 2009 Sun Microsystems, Inc.
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 *
17 * The contents of this file may alternatively be used under the terms
18 * of the Common Development and Distribution License Version 1.0
19 * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
20 * VirtualBox OSE distribution, in which case the provisions of the
21 * CDDL are applicable instead of those of the GPL.
22 *
23 * You may elect to license modified versions of this file under the
24 * terms and conditions of either the GPL or the CDDL or both.
25 *
26 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa
27 * Clara, CA 95054 USA or visit http://www.sun.com if you need
28 * additional information or have any questions.
29 */
30
31/*******************************************************************************
32* Header Files *
33*******************************************************************************/
34#include <iprt/base64.h>
35#include <iprt/assert.h>
36#include <iprt/err.h>
37#include <iprt/ctype.h>
38#ifdef RT_STRICT
39# include <iprt/asm.h>
40#endif
41
42
43/*******************************************************************************
44* Defined Constants And Macros *
45*******************************************************************************/
46/** @name Special g_au8CharToVal values
47 * @{ */
48#define BASE64_SPACE 0xc0
49#define BASE64_PAD 0xe0
50#define BASE64_INVALID 0xff
51/** @} */
52
53
54/*******************************************************************************
55* Global Variables *
56*******************************************************************************/
57/** Base64 character to value. (RFC 2045)
58 * ASSUMES ASCII / UTF-8. */
59static const uint8_t g_au8CharToVal[256] =
60{
61 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xff, 0xff, /* 0x00..0x0f */
62 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x10..0x1f */
63 0xc0, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 62, 0xff, 0xff, 0xff, 63, /* 0x20..0x2f */
64 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 0xff, 0xff, 0xff, 0xe0, 0xff, 0xff, /* 0x30..0x3f */
65 0xff, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, /* 0x40..0x4f */
66 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x50..0x5f */
67 0xff, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, /* 0x60..0x6f */
68 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x70..0x7f */
69 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x80..0x8f */
70 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x90..0x9f */
71 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0xa0..0xaf */
72 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0xb0..0xbf */
73 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0xc0..0xcf */
74 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0xd0..0xdf */
75 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0xe0..0xef */
76 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff /* 0xf0..0xff */
77};
78
79/** Value to Base64 character. (RFC 2045) */
80static const char g_szValToChar[64+1] =
81 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
82
83
84#ifdef RT_STRICT
85/**
86 * Perform table sanity checks on the first call.
87 */
88static void rtBase64Sanity(void)
89{
90 static bool s_fSane = false;
91 if (RT_UNLIKELY(!s_fSane))
92 {
93 for (unsigned i = 0; i < 64; i++)
94 {
95 unsigned ch = g_szValToChar[i];
96 Assert(ch);
97 Assert(g_au8CharToVal[ch] == i);
98 }
99
100 for (unsigned i = 0; i < 256; i++)
101 {
102 uint8_t u8 = g_au8CharToVal[i];
103 Assert( ( u8 == BASE64_INVALID
104 && !RT_C_IS_ALNUM(i)
105 && !RT_C_IS_SPACE(i))
106 || ( u8 == BASE64_PAD
107 && i == '=')
108 || ( u8 == BASE64_SPACE
109 && RT_C_IS_SPACE(i))
110 || ( u8 < 64
111 && (unsigned)g_szValToChar[u8] == i));
112 }
113 ASMAtomicWriteBool(&s_fSane, true);
114 }
115}
116#endif /* RT_STRICT */
117
118
119/**
120 * Calculates the decoded data size for a Base64 encoded string.
121 *
122 * @returns The length in bytes. -1 if the encoding is bad.
123 *
124 * @param pszString The Base64 encoded string.
125 * @param ppszEnd If not NULL, this will point to the first char
126 * following the Base64 encoded text block. If
127 * NULL the entire string is assumed to be Base64.
128 */
129RTDECL(ssize_t) RTBase64DecodedSize(const char *pszString, char **ppszEnd)
130{
131#ifdef RT_STRICT
132 rtBase64Sanity();
133#endif
134
135 /*
136 * Walk the string until a non-encoded or non-space character is encountered.
137 */
138 uint32_t c6Bits = 0;
139 uint8_t u8 = BASE64_INVALID;
140 unsigned ch;
141 AssertCompile(sizeof(char) == sizeof(uint8_t));
142
143 while ((ch = *pszString))
144 {
145 u8 = g_au8CharToVal[ch];
146 if (u8 < 64)
147 c6Bits++;
148 else if (RT_UNLIKELY(u8 != BASE64_SPACE))
149 break;
150
151 /* advance */
152 pszString++;
153 }
154
155 /*
156 * Padding can only be found at the end and there is
157 * only 1 or 2 padding chars. Deal with it first.
158 */
159 unsigned cbPad = 0;
160 if (u8 == BASE64_PAD)
161 {
162 cbPad = 1;
163 c6Bits++;
164 pszString++;
165 while ((ch = *pszString))
166 {
167 u8 = g_au8CharToVal[ch];
168 if (u8 != BASE64_SPACE)
169 {
170 if (u8 != BASE64_PAD)
171 break;
172 c6Bits++;
173 cbPad++;
174 }
175 pszString++;
176 }
177 if (cbPad >= 3)
178 return -1;
179 }
180
181 /*
182 * Invalid char and no where to indicate where the
183 * Base64 text ends? Return failure.
184 */
185 if ( u8 == BASE64_INVALID
186 && !ppszEnd
187 && ch)
188 return -1;
189
190 /*
191 * Recalc 6-bit to 8-bit and adjust for padding.
192 */
193 size_t cb;
194 if (c6Bits * 3 / 3 == c6Bits)
195 {
196 if ((c6Bits * 3 % 4) != 0)
197 return -1;
198 cb = c6Bits * 3 / 4;
199 }
200 else
201 {
202 if ((c6Bits * (uint64_t)3 % 4) != 0)
203 return -1;
204 cb = c6Bits * (uint64_t)3 / 4;
205 }
206
207 if (cb < cbPad)
208 return -1;
209 cb -= cbPad;
210
211 if (ppszEnd)
212 *ppszEnd = (char *)pszString;
213 return cb;
214}
215
216
217/**
218 * Decodes a Base64 encoded string into the buffer supplied by the caller.
219 *
220 * @returns IPRT status code.
221 * @retval VERR_BUFFER_OVERFLOW if the buffer is too small. pcbActual will not
222 * be set, nor will ppszEnd.
223 * @retval VERR_INVALID_BASE64_ENCODING if the encoding is wrong.
224 *
225 * @param pszString The Base64 string. Whether the entire string or
226 * just the start of the string is in Base64 depends
227 * on wther ppszEnd is specified or not.
228 * @param pvData Where to store the decoded data.
229 * @param cbData The size of the output buffer that pvData points to.
230 * @param pcbActual Where to store the actual number of bytes returned.
231 * Optional.
232 * @param ppszEnd Indicats that the string may contain other stuff
233 * after the Base64 encoded data when not NULL. Will
234 * be set to point to the first char that's not part of
235 * the encoding. If NULL the entire string must be part
236 * of the Base64 encoded data.
237 */
238RTDECL(int) RTBase64Decode(const char *pszString, void *pvData, size_t cbData, size_t *pcbActual, char **ppszEnd)
239{
240#ifdef RT_STRICT
241 rtBase64Sanity();
242#endif
243
244 /*
245 * Process input in groups of 4 input / 3 output chars.
246 */
247 uint8_t u8Trio[3];
248 uint8_t *pbData = (uint8_t *)pvData;
249 uint8_t u8 = BASE64_INVALID;
250 unsigned c6Bits = 0;
251 unsigned ch;
252 AssertCompile(sizeof(char) == sizeof(uint8_t));
253
254const char *pszCurStart;
255 for (;;)
256 {
257pszCurStart = pszString;
258 /* The first 6-bit group. */
259 while ((u8 = g_au8CharToVal[ch = *pszString]) == BASE64_SPACE)
260 pszString++;
261 if (u8 >= 64)
262 {
263 c6Bits = 0;
264 break;
265 }
266 u8Trio[0] = u8 << 2;
267 pszString++;
268
269 /* The second 6-bit group. */
270 while ((u8 = g_au8CharToVal[ch = *pszString]) == BASE64_SPACE)
271 pszString++;
272 if (u8 >= 64)
273 {
274 c6Bits = 1;
275 break;
276 }
277 u8Trio[0] |= u8 >> 4;
278 u8Trio[1] = u8 << 4;
279 pszString++;
280
281 /* The third 6-bit group. */
282 while ((u8 = g_au8CharToVal[ch = *pszString]) == BASE64_SPACE)
283 pszString++;
284 if (u8 >= 64)
285 {
286 c6Bits = 2;
287 break;
288 }
289 u8Trio[1] |= u8 >> 2;
290 u8Trio[2] = u8 << 6;
291 pszString++;
292
293 /* The fourth 6-bit group. */
294 while ((u8 = g_au8CharToVal[ch = *pszString]) == BASE64_SPACE)
295 pszString++;
296 if (u8 >= 64)
297 {
298 c6Bits = 3;
299 break;
300 }
301 u8Trio[2] |= u8;
302 pszString++;
303
304 /* flush the trio */
305 if (cbData < 3)
306 return VERR_BUFFER_OVERFLOW;
307 cbData -= 3;
308 pbData[0] = u8Trio[0];
309 pbData[1] = u8Trio[1];
310 pbData[2] = u8Trio[2];
311 pbData += 3;
312 }
313
314 /*
315 * Padding can only be found at the end and there is
316 * only 1 or 2 padding chars. Deal with it first.
317 */
318 unsigned cbPad = 0;
319 if (u8 == BASE64_PAD)
320 {
321 cbPad = 1;
322 pszString++;
323 while ((ch = *pszString))
324 {
325 u8 = g_au8CharToVal[ch];
326 if (u8 != BASE64_SPACE)
327 {
328 if (u8 != BASE64_PAD)
329 break;
330 cbPad++;
331 }
332 pszString++;
333 }
334 if (cbPad >= 3)
335 return VERR_INVALID_BASE64_ENCODING;
336 }
337
338 /*
339 * Invalid char and no where to indicate where the
340 * Base64 text ends? Return failure.
341 */
342 if ( u8 == BASE64_INVALID
343 && !ppszEnd
344 && ch)
345 return VERR_INVALID_BASE64_ENCODING;
346
347 /*
348 * Check padding vs. pending sextets, if anything left to do finish it off.
349 */
350 if (c6Bits || cbPad)
351 {
352 if (c6Bits + cbPad != 4)
353 return VERR_INVALID_BASE64_ENCODING;
354
355 switch (c6Bits)
356 {
357 case 1:
358 u8Trio[1] = u8Trio[2] = 0;
359 break;
360 case 2:
361 u8Trio[2] = 0;
362 break;
363 case 3:
364 default:
365 break;
366 }
367 switch (3 - cbPad)
368 {
369 case 1:
370 if (cbData < 1)
371 return VERR_BUFFER_OVERFLOW;
372 cbData--;
373 pbData[0] = u8Trio[0];
374 pbData++;
375 break;
376
377 case 2:
378 if (cbData < 2)
379 return VERR_BUFFER_OVERFLOW;
380 cbData -= 2;
381 pbData[0] = u8Trio[0];
382 pbData[1] = u8Trio[1];
383 pbData += 2;
384 break;
385
386 default:
387 break;
388 }
389 }
390
391 /*
392 * Set optional return values and return successfully.
393 */
394 if (ppszEnd)
395 *ppszEnd = (char *)pszString;
396 if (pcbActual)
397 *pcbActual = pbData - (uint8_t *)pvData;
398 return VINF_SUCCESS;
399}
400
401
402/**
403 * Calculates the length of the Base64 encoding of a given number of bytes of
404 * data.
405 *
406 * This will assume line breaks every 64 chars. A RTBase64EncodedLengthEx
407 * function can be added if closer control over the output is found to be
408 * required.
409 *
410 * @returns The Base64 string length.
411 * @param cbData The number of bytes to encode.
412 */
413RTDECL(size_t) RTBase64EncodedLength(size_t cbData)
414{
415 if (cbData * 8 / 8 != cbData)
416 {
417 AssertReturn(sizeof(size_t) == sizeof(uint64_t), ~(size_t)0);
418 uint64_t cch = cbData * (uint64_t)8;
419 while (cch % 24)
420 cch += 8;
421 cch /= 6;
422
423 cch += (cch / 64) * RTBASE64_EOL_SIZE;
424 return cch;
425 }
426
427 size_t cch = cbData * 8;
428 while (cch % 24)
429 cch += 8;
430 cch /= 6;
431
432 cch += (cch / 64) * RTBASE64_EOL_SIZE;
433 return cch;
434}
435
436
437/**
438 * Encodes the specifed data into a Base64 string, the caller supplies the
439 * output buffer.
440 *
441 * This will make the same assumptions about line breaks and EOL size as
442 * RTBase64EncodedLength() does. A RTBase64EncodeEx function can be added if
443 * more strict control over the output formatting is found necessary.
444 *
445 * @returns IRPT status code.
446 * @retval VERR_BUFFER_OVERFLOW if the output buffer is too small. The buffer
447 * may contain an invalid Base64 string.
448 *
449 * @param pszBuf Where to put the Base64 string.
450 * @param cbBuf The size of the output buffer, including the terminator.
451 * @param pvData The data to encode.
452 * @param cbData The number of bytes to encode.
453 */
454RTDECL(int) RTBase64Encode(char *pszBuf, size_t cbBuf, const void *pvData, size_t cbData)
455{
456 /** @todo implement RTBase64Encode. */
457 return VERR_NOT_IMPLEMENTED;
458}
459
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette