VirtualBox

source: vbox/trunk/src/VBox/Runtime/r3/posix/utf8-posix.cpp@ 30294

Last change on this file since 30294 was 30294, checked in by vboxsync, 14 years ago

build fix

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 16.2 KB
Line 
1/* $Id: utf8-posix.cpp 30294 2010-06-17 21:51:43Z vboxsync $ */
2/** @file
3 * IPRT - UTF-8 helpers, POSIX.
4 */
5
6/*
7 * Copyright (C) 2006-2010 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 *
17 * The contents of this file may alternatively be used under the terms
18 * of the Common Development and Distribution License Version 1.0
19 * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
20 * VirtualBox OSE distribution, in which case the provisions of the
21 * CDDL are applicable instead of those of the GPL.
22 *
23 * You may elect to license modified versions of this file under the
24 * terms and conditions of either the GPL or the CDDL or both.
25 */
26
27
28/*******************************************************************************
29* Header Files *
30*******************************************************************************/
31#include <iprt/string.h>
32#include "internal/iprt.h"
33
34#include <iprt/alloc.h>
35#include <iprt/assert.h>
36#include <iprt/err.h>
37#include <iprt/string.h>
38
39#include <errno.h>
40#include <locale.h>
41#include <iconv.h>
42#include <wctype.h>
43
44#include <langinfo.h>
45
46#include "internal/alignmentchecks.h"
47#include "internal/string.h"
48#ifdef RT_WITH_ICONV_CACHE
49# include "internal/thread.h"
50AssertCompile(sizeof(iconv_t) <= sizeof(void *));
51#endif
52
53
54/**
55 * Gets the codeset of the current locale (LC_CTYPE).
56 *
57 * @returns Pointer to read-only string with the codeset name.
58 */
59const char *rtStrGetLocaleCodeset(void)
60{
61 return nl_langinfo(CODESET);
62}
63
64
65#ifdef RT_WITH_ICONV_CACHE
66
67/**
68 * Initializes the iconv handle cache associated with a thread.
69 *
70 * @param pThread The thread in question.
71 */
72void rtStrIconvCacheInit(PRTTHREADINT pThread)
73{
74 for (size_t i = 0; i < RT_ELEMENTS(pThread->ahIconvs); i++)
75 pThread->ahIconvs[i] = (iconv_t)-1;
76}
77
78/**
79 * Destroys the iconv handle cache associated with a thread.
80 *
81 * @param pThread The thread in question.
82 */
83void rtStrIconvCacheDestroy(PRTTHREADINT pThread)
84{
85 for (size_t i = 0; i < RT_ELEMENTS(pThread->ahIconvs); i++)
86 {
87 iconv_t hIconv = (iconv_t)pThread->ahIconvs[i];
88 pThread->ahIconvs[i] = (iconv_t)-1;
89 if (hIconv != (iconv_t)-1)
90 iconv_close(hIconv);
91 }
92}
93
94
95/**
96 * Converts a string from one charset to another.
97 *
98 * @returns iprt status code.
99 * @param pvInput Pointer to intput string.
100 * @param cbInput Size (in bytes) of input string. Excludes any terminators.
101 * @param pszInputCS Codeset of the input string.
102 * @param ppvOutput Pointer to pointer to output buffer if cbOutput > 0.
103 * If cbOutput is 0 this is where the pointer to the allocated
104 * buffer is stored.
105 * @param cbOutput Size of the passed in buffer.
106 * @param pszOutputCS Codeset of the input string.
107 * @param cFactor Input vs. output size factor.
108 * @param phIconv Pointer to the cache entry.
109 */
110static int rtstrConvertCached(const void *pvInput, size_t cbInput, const char *pszInputCS,
111 void **ppvOutput, size_t cbOutput, const char *pszOutputCS,
112 unsigned cFactor, iconv_t *phIconv)
113{
114 /*
115 * Allocate buffer
116 */
117 bool fUcs2Term;
118 void *pvOutput;
119 size_t cbOutput2;
120 if (!cbOutput)
121 {
122 cbOutput2 = cbInput * cFactor;
123 pvOutput = RTMemTmpAlloc(cbOutput2 + sizeof(RTUTF16));
124 if (!pvOutput)
125 return VERR_NO_TMP_MEMORY;
126 fUcs2Term = true;
127 }
128 else
129 {
130 pvOutput = *ppvOutput;
131 fUcs2Term = !strcmp(pszOutputCS, "UCS-2")
132 || !strcmp(pszOutputCS, "UTF-16")
133 || !strcmp(pszOutputCS, "ucs-2")
134 || !strcmp(pszOutputCS, "utf-16");
135 cbOutput2 = cbOutput - (fUcs2Term ? sizeof(RTUTF16) : 1);
136 if (cbOutput2 > cbOutput)
137 return VERR_BUFFER_OVERFLOW;
138 }
139
140 /*
141 * Use a loop here to retry with bigger buffers.
142 */
143 for (unsigned cTries = 10; cTries > 0; cTries--)
144 {
145 /*
146 * Create conversion object if necessary.
147 */
148 iconv_t hIconv = (iconv_t)*phIconv;
149 if (hIconv == (iconv_t)-1)
150 {
151#ifdef RT_OS_SOLARIS
152 /* Solaris doesn't grok empty codeset strings, so help it find the current codeset. */
153 if (!*pszInputCS)
154 pszInputCS = rtStrGetLocaleCodeset();
155 if (!*pszOutputCS)
156 pszOutputCS = rtStrGetLocaleCodeset();
157#endif
158 IPRT_ALIGNMENT_CHECKS_DISABLE(); /* glibc causes trouble */
159 *phIconv = hIconv = iconv_open(pszOutputCS, pszInputCS);
160 IPRT_ALIGNMENT_CHECKS_ENABLE();
161 }
162 if (hIconv != (iconv_t)-1)
163 {
164 /*
165 * Do the conversion.
166 */
167 size_t cbInLeft = cbInput;
168 size_t cbOutLeft = cbOutput2;
169 const void *pvInputLeft = pvInput;
170 void *pvOutputLeft = pvOutput;
171#if defined(RT_OS_LINUX) || (defined(RT_OS_DARWIN) && defined(_DARWIN_FEATURE_UNIX_CONFORMANCE)) /* there are different opinions about the constness of the input buffer. */
172 if (iconv(hIconv, (char **)&pvInputLeft, &cbInLeft, (char **)&pvOutputLeft, &cbOutLeft) != (size_t)-1)
173#else
174 if (iconv(hIconv, (const char **)&pvInputLeft, &cbInLeft, (char **)&pvOutputLeft, &cbOutLeft) != (size_t)-1)
175#endif
176 {
177 if (!cbInLeft)
178 {
179 /*
180 * We're done, just add the terminator and return.
181 * (Two terminators to support UCS-2 output, too.)
182 */
183 ((char *)pvOutputLeft)[0] = '\0';
184 if (fUcs2Term)
185 ((char *)pvOutputLeft)[1] = '\0';
186 *ppvOutput = pvOutput;
187 return VINF_SUCCESS;
188 }
189 errno = E2BIG;
190 }
191
192 /*
193 * If we failed because of output buffer space we'll
194 * increase the output buffer size and retry.
195 */
196 if (errno == E2BIG)
197 {
198 if (!cbOutput)
199 {
200 RTMemTmpFree(pvOutput);
201 cbOutput2 *= 2;
202 pvOutput = RTMemTmpAlloc(cbOutput2 + sizeof(RTUTF16));
203 if (!pvOutput)
204 return VERR_NO_TMP_MEMORY;
205 continue;
206 }
207 return VERR_BUFFER_OVERFLOW;
208 }
209
210 /*
211 * Close the handle on all other errors to make sure we won't carry
212 * any bad state with us.
213 */
214 *phIconv = (iconv_t)-1;
215 iconv_close(hIconv);
216 }
217 break;
218 }
219
220 /* failure */
221 if (!cbOutput)
222 RTMemTmpFree(pvOutput);
223 return VERR_NO_TRANSLATION;
224}
225
226#endif /* RT_WITH_ICONV_CACHE */
227
228/**
229 * Converts a string from one charset to another without using the handle cache.
230 *
231 * @returns IPRT status code.
232 *
233 * @param pvInput Pointer to intput string.
234 * @param cbInput Size (in bytes) of input string. Excludes any terminators.
235 * @param pszInputCS Codeset of the input string.
236 * @param ppvOutput Pointer to pointer to output buffer if cbOutput > 0.
237 * If cbOutput is 0 this is where the pointer to the allocated
238 * buffer is stored.
239 * @param cbOutput Size of the passed in buffer.
240 * @param pszOutputCS Codeset of the input string.
241 * @param cFactor Input vs. output size factor.
242 */
243static int rtStrConvertUncached(const void *pvInput, size_t cbInput, const char *pszInputCS,
244 void **ppvOutput, size_t cbOutput, const char *pszOutputCS,
245 unsigned cFactor)
246{
247 /*
248 * Allocate buffer
249 */
250 bool fUcs2Term;
251 void *pvOutput;
252 size_t cbOutput2;
253 if (!cbOutput)
254 {
255 cbOutput2 = cbInput * cFactor;
256 pvOutput = RTMemTmpAlloc(cbOutput2 + sizeof(RTUTF16));
257 if (!pvOutput)
258 return VERR_NO_TMP_MEMORY;
259 fUcs2Term = true;
260 }
261 else
262 {
263 pvOutput = *ppvOutput;
264 fUcs2Term = !strcmp(pszOutputCS, "UCS-2");
265 cbOutput2 = cbOutput - (fUcs2Term ? sizeof(RTUTF16) : 1);
266 if (cbOutput2 > cbOutput)
267 return VERR_BUFFER_OVERFLOW;
268 }
269
270 /*
271 * Use a loop here to retry with bigger buffers.
272 */
273 for (unsigned cTries = 10; cTries > 0; cTries--)
274 {
275 /*
276 * Create conversion object.
277 */
278#ifdef RT_OS_SOLARIS
279 /* Solaris doesn't grok empty codeset strings, so help it find the current codeset. */
280 if (!*pszInputCS)
281 pszInputCS = rtStrGetLocaleCodeset();
282 if (!*pszOutputCS)
283 pszOutputCS = rtStrGetLocaleCodeset();
284#endif
285 IPRT_ALIGNMENT_CHECKS_DISABLE(); /* glibc causes trouble */
286 iconv_t icHandle = iconv_open(pszOutputCS, pszInputCS);
287 IPRT_ALIGNMENT_CHECKS_ENABLE();
288 if (icHandle != (iconv_t)-1)
289 {
290 /*
291 * Do the conversion.
292 */
293 size_t cbInLeft = cbInput;
294 size_t cbOutLeft = cbOutput2;
295 const void *pvInputLeft = pvInput;
296 void *pvOutputLeft = pvOutput;
297#if defined(RT_OS_LINUX) || (defined(RT_OS_DARWIN) && defined(_DARWIN_FEATURE_UNIX_CONFORMANCE)) /* there are different opinions about the constness of the input buffer. */
298 if (iconv(icHandle, (char **)&pvInputLeft, &cbInLeft, (char **)&pvOutputLeft, &cbOutLeft) != (size_t)-1)
299#else
300 if (iconv(icHandle, (const char **)&pvInputLeft, &cbInLeft, (char **)&pvOutputLeft, &cbOutLeft) != (size_t)-1)
301#endif
302 {
303 if (!cbInLeft)
304 {
305 /*
306 * We're done, just add the terminator and return.
307 * (Two terminators to support UCS-2 output, too.)
308 */
309 iconv_close(icHandle);
310 ((char *)pvOutputLeft)[0] = '\0';
311 if (fUcs2Term)
312 ((char *)pvOutputLeft)[1] = '\0';
313 *ppvOutput = pvOutput;
314 return VINF_SUCCESS;
315 }
316 errno = E2BIG;
317 }
318 iconv_close(icHandle);
319
320 /*
321 * If we failed because of output buffer space we'll
322 * increase the output buffer size and retry.
323 */
324 if (errno == E2BIG)
325 {
326 if (!cbOutput)
327 {
328 RTMemTmpFree(pvOutput);
329 cbOutput2 *= 2;
330 pvOutput = RTMemTmpAlloc(cbOutput2 + sizeof(RTUTF16));
331 if (!pvOutput)
332 return VERR_NO_TMP_MEMORY;
333 continue;
334 }
335 return VERR_BUFFER_OVERFLOW;
336 }
337 }
338 break;
339 }
340
341 /* failure */
342 if (!cbOutput)
343 RTMemTmpFree(pvOutput);
344 return VERR_NO_TRANSLATION;
345}
346
347
348/**
349 * Wrapper that selects rtStrConvertCached or rtStrConvertUncached.
350 *
351 * @returns IPRT status code.
352 *
353 * @param pszInput Pointer to intput string.
354 * @param cchInput Size (in bytes) of input string. Excludes any
355 * terminators.
356 * @param pszInputCS Codeset of the input string.
357 * @param ppszOutput Pointer to pointer to output buffer if cbOutput > 0.
358 * If cbOutput is 0 this is where the pointer to the
359 * allocated buffer is stored.
360 * @param cbOutput Size of the passed in buffer.
361 * @param pszOutputCS Codeset of the input string.
362 * @param cFactor Input vs. output size factor.
363 * @param enmCacheIdx The iconv cache index.
364 */
365DECLINLINE(int) rtStrConvertWrapper(const char *pchInput, size_t cchInput, const char *pszInputCS,
366 char **ppszOutput, size_t cbOutput, const char *pszOutputCS,
367 unsigned cFactor, RTSTRICONV enmCacheIdx)
368{
369#ifdef RT_WITH_ICONV_CACHE
370 RTTHREAD hSelf = RTThreadSelf();
371 if (hSelf != NIL_RTTHREAD)
372 {
373 PRTTHREADINT pThread = rtThreadGet(hSelf);
374 if ( pThread
375 && (pThread->fIntFlags & (RTTHREADINT_FLAGS_ALIEN | RTTHREADINT_FLAGS_MAIN)) != RTTHREADINT_FLAGS_ALIEN)
376 return rtstrConvertCached(pchInput, cchInput, pszInputCS,
377 (void **)ppszOutput, cbOutput, pszOutputCS,
378 cFactor, (iconv_t *)&pThread->ahIconvs[enmCacheIdx]);
379 }
380#endif
381 return rtStrConvertUncached(pchInput, cchInput, pszInputCS,
382 (void **)ppszOutput, cbOutput, pszOutputCS,
383 cFactor);
384}
385
386
387/**
388 * Internal API for use by the path conversion code.
389 *
390 * @returns IPRT status code.
391 *
392 * @param pszInput Pointer to intput string.
393 * @param cchInput Size (in bytes) of input string. Excludes any
394 * terminators.
395 * @param pszInputCS Codeset of the input string.
396 * @param ppszOutput Pointer to pointer to output buffer if cbOutput > 0.
397 * If cbOutput is 0 this is where the pointer to the
398 * allocated buffer is stored.
399 * @param cbOutput Size of the passed in buffer.
400 * @param pszOutputCS Codeset of the input string.
401 * @param cFactor Input vs. output size factor.
402 * @param enmCacheIdx The iconv cache index.
403 */
404int rtStrConvert(const char *pchInput, size_t cchInput, const char *pszInputCS,
405 char **ppszOutput, size_t cbOutput, const char *pszOutputCS,
406 unsigned cFactor, RTSTRICONV enmCacheIdx)
407{
408 Assert(enmCacheIdx >= 0 && enmCacheIdx < RTSTRICONV_END);
409 return rtStrConvertWrapper(pchInput, cchInput, pszInputCS,
410 ppszOutput, cbOutput, pszOutputCS,
411 cFactor, enmCacheIdx);
412}
413
414
415/**
416 * Allocates tmp buffer, translates pszString from UTF8 to current codepage.
417 *
418 * @returns iprt status code.
419 * @param ppszString Receives pointer of allocated native CP string.
420 * The returned pointer must be freed using RTStrFree().
421 * @param pszString UTF-8 string to convert.
422 */
423RTR3DECL(int) RTStrUtf8ToCurrentCP(char **ppszString, const char *pszString)
424{
425 Assert(ppszString);
426 Assert(pszString);
427 *ppszString = NULL;
428
429 /*
430 * Assume result string length is not longer than UTF-8 string.
431 */
432 size_t cch = strlen(pszString);
433 if (cch <= 0)
434 {
435 /* zero length string passed. */
436 *ppszString = (char *)RTMemTmpAllocZ(sizeof(char));
437 if (*ppszString)
438 return VINF_SUCCESS;
439 return VERR_NO_TMP_MEMORY;
440 }
441 return rtStrConvertWrapper(pszString, cch, "UTF-8", ppszString, 0, "", 1, RTSTRICONV_UTF8_TO_LOCALE);
442}
443
444
445/**
446 * Allocates tmp buffer, translates pszString from current codepage to UTF-8.
447 *
448 * @returns iprt status code.
449 * @param ppszString Receives pointer of allocated UTF-8 string.
450 * The returned pointer must be freed using RTStrFree().
451 * @param pszString Native string to convert.
452 */
453RTR3DECL(int) RTStrCurrentCPToUtf8(char **ppszString, const char *pszString)
454{
455 Assert(ppszString);
456 Assert(pszString);
457 *ppszString = NULL;
458
459 /*
460 * Attempt with UTF-8 length of 2x the native length.
461 */
462 size_t cch = strlen(pszString);
463 if (cch <= 0)
464 {
465 /* zero length string passed. */
466 *ppszString = (char *)RTMemTmpAllocZ(sizeof(char));
467 if (*ppszString)
468 return VINF_SUCCESS;
469 return VERR_NO_TMP_MEMORY;
470 }
471 return rtStrConvertWrapper(pszString, cch, "", ppszString, 0, "UTF-8", 2, RTSTRICONV_LOCALE_TO_UTF8);
472}
473
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette