VirtualBox

source: vbox/trunk/src/VBox/Runtime/r3/posix/utf8-posix.cpp@ 45733

Last change on this file since 45733 was 45260, checked in by vboxsync, 12 years ago

We should probably check the iconv return value a little more closely. Currently trying VWRN_NO_TRANSLATION instead of VERR_NO_TRANSLATION as tstUtf8 originally expected. Adjusted tstUtf8. Seen trouble in this area on testboxsh1 where neither LC_ALL, LANG nor LC_CTYPE were set.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 16.8 KB
Line 
1/* $Id: utf8-posix.cpp 45260 2013-03-31 00:07:11Z vboxsync $ */
2/** @file
3 * IPRT - UTF-8 helpers, POSIX.
4 */
5
6/*
7 * Copyright (C) 2006-2012 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 *
17 * The contents of this file may alternatively be used under the terms
18 * of the Common Development and Distribution License Version 1.0
19 * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
20 * VirtualBox OSE distribution, in which case the provisions of the
21 * CDDL are applicable instead of those of the GPL.
22 *
23 * You may elect to license modified versions of this file under the
24 * terms and conditions of either the GPL or the CDDL or both.
25 */
26
27
28/*******************************************************************************
29* Header Files *
30*******************************************************************************/
31#include <iprt/string.h>
32#include "internal/iprt.h"
33
34#include <iprt/alloc.h>
35#include <iprt/assert.h>
36#include <iprt/err.h>
37#include <iprt/string.h>
38
39#include <errno.h>
40#include <locale.h>
41
42/* iconv prototype changed with 165+ (thanks to PSARC/2010/160 Bugster 7037400) */
43#if defined(RT_OS_SOLARIS)
44# if !defined(_XPG6)
45# define VBOX_XPG6_TMP_DEF
46# define _XPG6
47# endif
48# if defined(__USE_LEGACY_PROTOTYPES__)
49# define VBOX_LEGACY_PROTO_TMP_DEF
50# undef __USE_LEGACY_PROTOTYPES__
51# endif
52#endif /* RT_OS_SOLARIS */
53
54# include <iconv.h>
55
56#if defined(RT_OS_SOLARIS)
57# if defined(VBOX_XPG6_TMP_DEF)
58# undef _XPG6
59# undef VBOX_XPG6_TMP_DEF
60# endif
61# if defined(VBOX_LEGACY_PROTO_TMP_DEF)
62# define __USE_LEGACY_PROTOTYPES__
63# undef VBOX_LEGACY_PROTO_TMP_DEF
64# endif
65#endif /* RT_OS_SOLARIS */
66
67#include <wctype.h>
68
69#include <langinfo.h>
70
71#include "internal/alignmentchecks.h"
72#include "internal/string.h"
73#ifdef RT_WITH_ICONV_CACHE
74# include "internal/thread.h"
75AssertCompile(sizeof(iconv_t) <= sizeof(void *));
76#endif
77
78
79/**
80 * Gets the codeset of the current locale (LC_CTYPE).
81 *
82 * @returns Pointer to read-only string with the codeset name.
83 */
84DECLHIDDEN(const char *) rtStrGetLocaleCodeset(void)
85{
86 return nl_langinfo(CODESET);
87}
88
89
90#ifdef RT_WITH_ICONV_CACHE
91
92/**
93 * Initializes the iconv handle cache associated with a thread.
94 *
95 * @param pThread The thread in question.
96 */
97DECLHIDDEN(void) rtStrIconvCacheInit(PRTTHREADINT pThread)
98{
99 for (size_t i = 0; i < RT_ELEMENTS(pThread->ahIconvs); i++)
100 pThread->ahIconvs[i] = (iconv_t)-1;
101}
102
103/**
104 * Destroys the iconv handle cache associated with a thread.
105 *
106 * @param pThread The thread in question.
107 */
108DECLHIDDEN(void) rtStrIconvCacheDestroy(PRTTHREADINT pThread)
109{
110 for (size_t i = 0; i < RT_ELEMENTS(pThread->ahIconvs); i++)
111 {
112 iconv_t hIconv = (iconv_t)pThread->ahIconvs[i];
113 pThread->ahIconvs[i] = (iconv_t)-1;
114 if (hIconv != (iconv_t)-1)
115 iconv_close(hIconv);
116 }
117}
118
119
120/**
121 * Converts a string from one charset to another.
122 *
123 * @returns iprt status code.
124 * @param pvInput Pointer to intput string.
125 * @param cbInput Size (in bytes) of input string. Excludes any terminators.
126 * @param pszInputCS Codeset of the input string.
127 * @param ppvOutput Pointer to pointer to output buffer if cbOutput > 0.
128 * If cbOutput is 0 this is where the pointer to the allocated
129 * buffer is stored.
130 * @param cbOutput Size of the passed in buffer.
131 * @param pszOutputCS Codeset of the input string.
132 * @param cFactor Input vs. output size factor.
133 * @param phIconv Pointer to the cache entry.
134 */
135static int rtstrConvertCached(const void *pvInput, size_t cbInput, const char *pszInputCS,
136 void **ppvOutput, size_t cbOutput, const char *pszOutputCS,
137 unsigned cFactor, iconv_t *phIconv)
138{
139 /*
140 * Allocate buffer
141 */
142 bool fUcs2Term;
143 void *pvOutput;
144 size_t cbOutput2;
145 if (!cbOutput)
146 {
147 cbOutput2 = cbInput * cFactor;
148 pvOutput = RTMemTmpAlloc(cbOutput2 + sizeof(RTUTF16));
149 if (!pvOutput)
150 return VERR_NO_TMP_MEMORY;
151 fUcs2Term = true;
152 }
153 else
154 {
155 pvOutput = *ppvOutput;
156 fUcs2Term = !strcmp(pszOutputCS, "UCS-2")
157 || !strcmp(pszOutputCS, "UTF-16")
158 || !strcmp(pszOutputCS, "ucs-2")
159 || !strcmp(pszOutputCS, "utf-16");
160 cbOutput2 = cbOutput - (fUcs2Term ? sizeof(RTUTF16) : 1);
161 if (cbOutput2 > cbOutput)
162 return VERR_BUFFER_OVERFLOW;
163 }
164
165 /*
166 * Use a loop here to retry with bigger buffers.
167 */
168 for (unsigned cTries = 10; cTries > 0; cTries--)
169 {
170 /*
171 * Create conversion object if necessary.
172 */
173 iconv_t hIconv = (iconv_t)*phIconv;
174 if (hIconv == (iconv_t)-1)
175 {
176#ifdef RT_OS_SOLARIS
177 /* Solaris doesn't grok empty codeset strings, so help it find the current codeset. */
178 if (!*pszInputCS)
179 pszInputCS = rtStrGetLocaleCodeset();
180 if (!*pszOutputCS)
181 pszOutputCS = rtStrGetLocaleCodeset();
182#endif
183 IPRT_ALIGNMENT_CHECKS_DISABLE(); /* glibc causes trouble */
184 *phIconv = hIconv = iconv_open(pszOutputCS, pszInputCS);
185 IPRT_ALIGNMENT_CHECKS_ENABLE();
186 }
187 if (hIconv != (iconv_t)-1)
188 {
189 /*
190 * Do the conversion.
191 */
192 size_t cbInLeft = cbInput;
193 size_t cbOutLeft = cbOutput2;
194 const void *pvInputLeft = pvInput;
195 void *pvOutputLeft = pvOutput;
196 size_t cchNonRev;
197#if defined(RT_OS_LINUX) || defined(RT_OS_HAIKU) || defined(RT_OS_SOLARIS) || (defined(RT_OS_DARWIN) && defined(_DARWIN_FEATURE_UNIX_CONFORMANCE)) /* there are different opinions about the constness of the input buffer. */
198 cchNonRev = iconv(hIconv, (char **)&pvInputLeft, &cbInLeft, (char **)&pvOutputLeft, &cbOutLeft);
199#else
200 cchNonRev = iconv(hIconv, (const char **)&pvInputLeft, &cbInLeft, (char **)&pvOutputLeft, &cbOutLeft);
201#endif
202 if (cchNonRev != (size_t)-1)
203 {
204 if (!cbInLeft)
205 {
206 /*
207 * We're done, just add the terminator and return.
208 * (Two terminators to support UCS-2 output, too.)
209 */
210 ((char *)pvOutputLeft)[0] = '\0';
211 if (fUcs2Term)
212 ((char *)pvOutputLeft)[1] = '\0';
213 *ppvOutput = pvOutput;
214 if (cchNonRev == 0)
215 return VINF_SUCCESS;
216 return VWRN_NO_TRANSLATION;
217 }
218 errno = E2BIG;
219 }
220
221 /*
222 * If we failed because of output buffer space we'll
223 * increase the output buffer size and retry.
224 */
225 if (errno == E2BIG)
226 {
227 if (!cbOutput)
228 {
229 RTMemTmpFree(pvOutput);
230 cbOutput2 *= 2;
231 pvOutput = RTMemTmpAlloc(cbOutput2 + sizeof(RTUTF16));
232 if (!pvOutput)
233 return VERR_NO_TMP_MEMORY;
234 continue;
235 }
236 return VERR_BUFFER_OVERFLOW;
237 }
238
239 /*
240 * Close the handle on all other errors to make sure we won't carry
241 * any bad state with us.
242 */
243 *phIconv = (iconv_t)-1;
244 iconv_close(hIconv);
245 }
246 break;
247 }
248
249 /* failure */
250 if (!cbOutput)
251 RTMemTmpFree(pvOutput);
252 return VERR_NO_TRANSLATION;
253}
254
255#endif /* RT_WITH_ICONV_CACHE */
256
257/**
258 * Converts a string from one charset to another without using the handle cache.
259 *
260 * @returns IPRT status code.
261 *
262 * @param pvInput Pointer to intput string.
263 * @param cbInput Size (in bytes) of input string. Excludes any terminators.
264 * @param pszInputCS Codeset of the input string.
265 * @param ppvOutput Pointer to pointer to output buffer if cbOutput > 0.
266 * If cbOutput is 0 this is where the pointer to the allocated
267 * buffer is stored.
268 * @param cbOutput Size of the passed in buffer.
269 * @param pszOutputCS Codeset of the input string.
270 * @param cFactor Input vs. output size factor.
271 */
272static int rtStrConvertUncached(const void *pvInput, size_t cbInput, const char *pszInputCS,
273 void **ppvOutput, size_t cbOutput, const char *pszOutputCS,
274 unsigned cFactor)
275{
276 /*
277 * Allocate buffer
278 */
279 bool fUcs2Term;
280 void *pvOutput;
281 size_t cbOutput2;
282 if (!cbOutput)
283 {
284 cbOutput2 = cbInput * cFactor;
285 pvOutput = RTMemTmpAlloc(cbOutput2 + sizeof(RTUTF16));
286 if (!pvOutput)
287 return VERR_NO_TMP_MEMORY;
288 fUcs2Term = true;
289 }
290 else
291 {
292 pvOutput = *ppvOutput;
293 fUcs2Term = !strcmp(pszOutputCS, "UCS-2");
294 cbOutput2 = cbOutput - (fUcs2Term ? sizeof(RTUTF16) : 1);
295 if (cbOutput2 > cbOutput)
296 return VERR_BUFFER_OVERFLOW;
297 }
298
299 /*
300 * Use a loop here to retry with bigger buffers.
301 */
302 for (unsigned cTries = 10; cTries > 0; cTries--)
303 {
304 /*
305 * Create conversion object.
306 */
307#ifdef RT_OS_SOLARIS
308 /* Solaris doesn't grok empty codeset strings, so help it find the current codeset. */
309 if (!*pszInputCS)
310 pszInputCS = rtStrGetLocaleCodeset();
311 if (!*pszOutputCS)
312 pszOutputCS = rtStrGetLocaleCodeset();
313#endif
314 IPRT_ALIGNMENT_CHECKS_DISABLE(); /* glibc causes trouble */
315 iconv_t icHandle = iconv_open(pszOutputCS, pszInputCS);
316 IPRT_ALIGNMENT_CHECKS_ENABLE();
317 if (icHandle != (iconv_t)-1)
318 {
319 /*
320 * Do the conversion.
321 */
322 size_t cbInLeft = cbInput;
323 size_t cbOutLeft = cbOutput2;
324 const void *pvInputLeft = pvInput;
325 void *pvOutputLeft = pvOutput;
326 size_t cchNonRev;
327#if defined(RT_OS_LINUX) || defined(RT_OS_HAIKU) || defined(RT_OS_SOLARIS) || (defined(RT_OS_DARWIN) && defined(_DARWIN_FEATURE_UNIX_CONFORMANCE)) /* there are different opinions about the constness of the input buffer. */
328 cchNonRev = iconv(icHandle, (char **)&pvInputLeft, &cbInLeft, (char **)&pvOutputLeft, &cbOutLeft);
329#else
330 cchNonRev = iconv(icHandle, (const char **)&pvInputLeft, &cbInLeft, (char **)&pvOutputLeft, &cbOutLeft);
331#endif
332 if (cchNonRev != (size_t)-1)
333 {
334 if (!cbInLeft)
335 {
336 /*
337 * We're done, just add the terminator and return.
338 * (Two terminators to support UCS-2 output, too.)
339 */
340 iconv_close(icHandle);
341 ((char *)pvOutputLeft)[0] = '\0';
342 if (fUcs2Term)
343 ((char *)pvOutputLeft)[1] = '\0';
344 *ppvOutput = pvOutput;
345 if (cchNonRev == 0)
346 return VINF_SUCCESS;
347 return VWRN_NO_TRANSLATION;
348 }
349 errno = E2BIG;
350 }
351 iconv_close(icHandle);
352
353 /*
354 * If we failed because of output buffer space we'll
355 * increase the output buffer size and retry.
356 */
357 if (errno == E2BIG)
358 {
359 if (!cbOutput)
360 {
361 RTMemTmpFree(pvOutput);
362 cbOutput2 *= 2;
363 pvOutput = RTMemTmpAlloc(cbOutput2 + sizeof(RTUTF16));
364 if (!pvOutput)
365 return VERR_NO_TMP_MEMORY;
366 continue;
367 }
368 return VERR_BUFFER_OVERFLOW;
369 }
370 }
371 break;
372 }
373
374 /* failure */
375 if (!cbOutput)
376 RTMemTmpFree(pvOutput);
377 return VERR_NO_TRANSLATION;
378}
379
380
381/**
382 * Wrapper that selects rtStrConvertCached or rtStrConvertUncached.
383 *
384 * @returns IPRT status code.
385 *
386 * @param pszInput Pointer to intput string.
387 * @param cchInput Size (in bytes) of input string. Excludes any
388 * terminators.
389 * @param pszInputCS Codeset of the input string.
390 * @param ppszOutput Pointer to pointer to output buffer if cbOutput > 0.
391 * If cbOutput is 0 this is where the pointer to the
392 * allocated buffer is stored.
393 * @param cbOutput Size of the passed in buffer.
394 * @param pszOutputCS Codeset of the input string.
395 * @param cFactor Input vs. output size factor.
396 * @param enmCacheIdx The iconv cache index.
397 */
398DECLINLINE(int) rtStrConvertWrapper(const char *pchInput, size_t cchInput, const char *pszInputCS,
399 char **ppszOutput, size_t cbOutput, const char *pszOutputCS,
400 unsigned cFactor, RTSTRICONV enmCacheIdx)
401{
402#ifdef RT_WITH_ICONV_CACHE
403 RTTHREAD hSelf = RTThreadSelf();
404 if (hSelf != NIL_RTTHREAD)
405 {
406 PRTTHREADINT pThread = rtThreadGet(hSelf);
407 if (pThread)
408 {
409 if ((pThread->fIntFlags & (RTTHREADINT_FLAGS_ALIEN | RTTHREADINT_FLAGS_MAIN)) != RTTHREADINT_FLAGS_ALIEN)
410 {
411 int rc = rtstrConvertCached(pchInput, cchInput, pszInputCS,
412 (void **)ppszOutput, cbOutput, pszOutputCS,
413 cFactor, (iconv_t *)&pThread->ahIconvs[enmCacheIdx]);
414 rtThreadRelease(pThread);
415 return rc;
416 }
417 rtThreadRelease(pThread);
418 }
419 }
420#endif
421 return rtStrConvertUncached(pchInput, cchInput, pszInputCS,
422 (void **)ppszOutput, cbOutput, pszOutputCS,
423 cFactor);
424}
425
426
427/**
428 * Internal API for use by the path conversion code.
429 *
430 * @returns IPRT status code.
431 *
432 * @param pszInput Pointer to intput string.
433 * @param cchInput Size (in bytes) of input string. Excludes any
434 * terminators.
435 * @param pszInputCS Codeset of the input string.
436 * @param ppszOutput Pointer to pointer to output buffer if cbOutput > 0.
437 * If cbOutput is 0 this is where the pointer to the
438 * allocated buffer is stored.
439 * @param cbOutput Size of the passed in buffer.
440 * @param pszOutputCS Codeset of the input string.
441 * @param cFactor Input vs. output size factor.
442 * @param enmCacheIdx The iconv cache index.
443 */
444DECLHIDDEN(int) rtStrConvert(const char *pchInput, size_t cchInput, const char *pszInputCS,
445 char **ppszOutput, size_t cbOutput, const char *pszOutputCS,
446 unsigned cFactor, RTSTRICONV enmCacheIdx)
447{
448 Assert(enmCacheIdx >= 0 && enmCacheIdx < RTSTRICONV_END);
449 return rtStrConvertWrapper(pchInput, cchInput, pszInputCS,
450 ppszOutput, cbOutput, pszOutputCS,
451 cFactor, enmCacheIdx);
452}
453
454
455RTR3DECL(int) RTStrUtf8ToCurrentCPTag(char **ppszString, const char *pszString, const char *pszTag)
456{
457 Assert(ppszString);
458 Assert(pszString);
459 *ppszString = NULL;
460
461 /*
462 * Assume result string length is not longer than UTF-8 string.
463 */
464 size_t cch = strlen(pszString);
465 if (cch <= 0)
466 {
467 /* zero length string passed. */
468 *ppszString = (char *)RTMemTmpAllocZTag(sizeof(char), pszTag);
469 if (*ppszString)
470 return VINF_SUCCESS;
471 return VERR_NO_TMP_MEMORY;
472 }
473 return rtStrConvertWrapper(pszString, cch, "UTF-8", ppszString, 0, "", 1, RTSTRICONV_UTF8_TO_LOCALE);
474}
475
476
477RTR3DECL(int) RTStrCurrentCPToUtf8Tag(char **ppszString, const char *pszString, const char *pszTag)
478{
479 Assert(ppszString);
480 Assert(pszString);
481 *ppszString = NULL;
482
483 /*
484 * Attempt with UTF-8 length of 2x the native length.
485 */
486 size_t cch = strlen(pszString);
487 if (cch <= 0)
488 {
489 /* zero length string passed. */
490 *ppszString = (char *)RTMemTmpAllocZTag(sizeof(char), pszTag);
491 if (*ppszString)
492 return VINF_SUCCESS;
493 return VERR_NO_TMP_MEMORY;
494 }
495 return rtStrConvertWrapper(pszString, cch, "", ppszString, 0, "UTF-8", 2, RTSTRICONV_LOCALE_TO_UTF8);
496}
497
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette