VirtualBox

source: vbox/trunk/src/VBox/Runtime/r3/posix/utf8-posix.cpp@ 28904

Last change on this file since 28904 was 28904, checked in by vboxsync, 15 years ago

build fix

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 15.9 KB
Line 
1/* $Id: utf8-posix.cpp 28904 2010-04-29 15:13:22Z vboxsync $ */
2/** @file
3 * IPRT - UTF-8 helpers, POSIX.
4 */
5
6/*
7 * Copyright (C) 2006-2007 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 *
17 * The contents of this file may alternatively be used under the terms
18 * of the Common Development and Distribution License Version 1.0
19 * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
20 * VirtualBox OSE distribution, in which case the provisions of the
21 * CDDL are applicable instead of those of the GPL.
22 *
23 * You may elect to license modified versions of this file under the
24 * terms and conditions of either the GPL or the CDDL or both.
25 */
26
27
28/*******************************************************************************
29* Header Files *
30*******************************************************************************/
31#include <iprt/string.h>
32#include "internal/iprt.h"
33
34#include <iprt/alloc.h>
35#include <iprt/assert.h>
36#include <iprt/err.h>
37#include <iprt/string.h>
38
39#include <errno.h>
40#include <locale.h>
41#include <iconv.h>
42#include <wctype.h>
43
44#include <langinfo.h>
45
46#include "internal/alignmentchecks.h"
47#ifdef RT_WITH_ICONV_CACHE
48# include "internal/thread.h"
49# include "internal/string.h"
50AssertCompile(sizeof(iconv_t) <= sizeof(void *));
51#endif
52
53
54/**
55 * Gets the codeset of the current locale (LC_CTYPE).
56 *
57 * @returns Pointer to read-only string with the codeset name.
58 */
59const char *rtStrGetLocaleCodeset(void)
60{
61 return nl_langinfo(CODESET);
62}
63
64
65#ifdef RT_WITH_ICONV_CACHE
66
67/**
68 * Initializes the iconv handle cache associated with a thread.
69 *
70 * @param pThread The thread in question.
71 */
72void rtStrIconvCacheInit(PRTTHREADINT pThread)
73{
74 for (size_t i = 0; i < RT_ELEMENTS(pThread->ahIconvs); i++)
75 pThread->ahIconvs[i] = (iconv_t)-1;
76}
77
78/**
79 * Destroys the iconv handle cache associated with a thread.
80 *
81 * @param pThread The thread in question.
82 */
83void rtStrIconvCacheDestroy(PRTTHREADINT pThread)
84{
85 for (size_t i = 0; i < RT_ELEMENTS(pThread->ahIconvs); i++)
86 {
87 iconv_t hIconv = (iconv_t)pThread->ahIconvs[i];
88 pThread->ahIconvs[i] = (iconv_t)-1;
89 if (hIconv != (iconv_t)-1)
90 iconv_close(hIconv);
91 }
92}
93
94
95/**
96 * Converts a string from one charset to another.
97 *
98 * @returns iprt status code.
99 * @param pvInput Pointer to intput string.
100 * @param cbInput Size (in bytes) of input string. Excludes any terminators.
101 * @param pszInputCS Codeset of the input string.
102 * @param ppvOutput Pointer to pointer to output buffer if cbOutput > 0.
103 * If cbOutput is 0 this is where the pointer to the allocated
104 * buffer is stored.
105 * @param cbOutput Size of the passed in buffer.
106 * @param pszOutputCS Codeset of the input string.
107 * @param cFactor Input vs. output size factor.
108 * @param phIconv Pointer to the cache entry.
109 */
110static int rtstrConvertCached(const void *pvInput, size_t cbInput, const char *pszInputCS,
111 void **ppvOutput, size_t cbOutput, const char *pszOutputCS,
112 unsigned cFactor, iconv_t *phIconv)
113{
114 /*
115 * Allocate buffer
116 */
117 bool fUcs2Term;
118 void *pvOutput;
119 size_t cbOutput2;
120 if (!cbOutput)
121 {
122 cbOutput2 = cbInput * cFactor;
123 pvOutput = RTMemTmpAlloc(cbOutput2 + sizeof(RTUTF16));
124 if (!pvOutput)
125 return VERR_NO_TMP_MEMORY;
126 fUcs2Term = true;
127 }
128 else
129 {
130 pvOutput = *ppvOutput;
131 fUcs2Term = !strcmp(pszOutputCS, "UCS-2")
132 || !strcmp(pszOutputCS, "UTF-16")
133 || !strcmp(pszOutputCS, "ucs-2")
134 || !strcmp(pszOutputCS, "utf-16");
135 cbOutput2 = cbOutput - (fUcs2Term ? sizeof(RTUTF16) : 1);
136 if (cbOutput2 > cbOutput)
137 return VERR_BUFFER_OVERFLOW;
138 }
139
140 /*
141 * Use a loop here to retry with bigger buffers.
142 */
143 for (unsigned cTries = 10; cTries > 0; cTries--)
144 {
145 /*
146 * Create conversion object if necessary.
147 */
148 iconv_t hIconv = (iconv_t)*phIconv;
149 if (hIconv == (iconv_t)-1)
150 {
151 IPRT_ALIGNMENT_CHECKS_DISABLE(); /* glibc causes trouble */
152 *phIconv = hIconv = iconv_open(pszOutputCS, pszInputCS);
153 IPRT_ALIGNMENT_CHECKS_ENABLE();
154 }
155 if (hIconv != (iconv_t)-1)
156 {
157 /*
158 * Do the conversion.
159 */
160 size_t cbInLeft = cbInput;
161 size_t cbOutLeft = cbOutput2;
162 const void *pvInputLeft = pvInput;
163 void *pvOutputLeft = pvOutput;
164#if defined(RT_OS_LINUX) || (defined(RT_OS_DARWIN) && defined(_DARWIN_FEATURE_UNIX_CONFORMANCE)) /* there are different opinions about the constness of the input buffer. */
165 if (iconv(hIconv, (char **)&pvInputLeft, &cbInLeft, (char **)&pvOutputLeft, &cbOutLeft) != (size_t)-1)
166#else
167 if (iconv(hIconv, (const char **)&pvInputLeft, &cbInLeft, (char **)&pvOutputLeft, &cbOutLeft) != (size_t)-1)
168#endif
169 {
170 if (!cbInLeft)
171 {
172 /*
173 * We're done, just add the terminator and return.
174 * (Two terminators to support UCS-2 output, too.)
175 */
176 ((char *)pvOutputLeft)[0] = '\0';
177 if (fUcs2Term)
178 ((char *)pvOutputLeft)[1] = '\0';
179 *ppvOutput = pvOutput;
180 return VINF_SUCCESS;
181 }
182 errno = E2BIG;
183 }
184
185 /*
186 * If we failed because of output buffer space we'll
187 * increase the output buffer size and retry.
188 */
189 if (errno == E2BIG)
190 {
191 if (!cbOutput)
192 {
193 RTMemTmpFree(pvOutput);
194 cbOutput2 *= 2;
195 pvOutput = RTMemTmpAlloc(cbOutput2 + sizeof(RTUTF16));
196 if (!pvOutput)
197 return VERR_NO_TMP_MEMORY;
198 continue;
199 }
200 return VERR_BUFFER_OVERFLOW;
201 }
202
203 /*
204 * Close the handle on all other errors to make sure we won't carry
205 * any bad state with us.
206 */
207 *phIconv = (iconv_t)-1;
208 iconv_close(hIconv);
209 }
210 break;
211 }
212
213 /* failure */
214 if (!cbOutput)
215 RTMemTmpFree(pvOutput);
216 return VERR_NO_TRANSLATION;
217}
218
219#endif /* RT_WITH_ICONV_CACHE */
220
221/**
222 * Converts a string from one charset to another without using the handle cache.
223 *
224 * @returns IPRT status code.
225 *
226 * @param pvInput Pointer to intput string.
227 * @param cbInput Size (in bytes) of input string. Excludes any terminators.
228 * @param pszInputCS Codeset of the input string.
229 * @param ppvOutput Pointer to pointer to output buffer if cbOutput > 0.
230 * If cbOutput is 0 this is where the pointer to the allocated
231 * buffer is stored.
232 * @param cbOutput Size of the passed in buffer.
233 * @param pszOutputCS Codeset of the input string.
234 * @param cFactor Input vs. output size factor.
235 */
236static int rtStrConvertUncached(const void *pvInput, size_t cbInput, const char *pszInputCS,
237 void **ppvOutput, size_t cbOutput, const char *pszOutputCS,
238 unsigned cFactor)
239{
240 /*
241 * Allocate buffer
242 */
243 bool fUcs2Term;
244 void *pvOutput;
245 size_t cbOutput2;
246 if (!cbOutput)
247 {
248 cbOutput2 = cbInput * cFactor;
249 pvOutput = RTMemTmpAlloc(cbOutput2 + sizeof(RTUTF16));
250 if (!pvOutput)
251 return VERR_NO_TMP_MEMORY;
252 fUcs2Term = true;
253 }
254 else
255 {
256 pvOutput = *ppvOutput;
257 fUcs2Term = !strcmp(pszOutputCS, "UCS-2");
258 cbOutput2 = cbOutput - (fUcs2Term ? sizeof(RTUTF16) : 1);
259 if (cbOutput2 > cbOutput)
260 return VERR_BUFFER_OVERFLOW;
261 }
262
263 /*
264 * Use a loop here to retry with bigger buffers.
265 */
266 for (unsigned cTries = 10; cTries > 0; cTries--)
267 {
268 /*
269 * Create conversion object.
270 */
271#ifdef RT_OS_SOLARIS
272 /* Solaris doesn't grok empty codeset strings, so help it find the current codeset. */
273 if (!*pszInputCS)
274 pszInputCS = rtStrGetLocaleCodeset();
275 if (!*pszOutputCS)
276 pszOutputCS = rtStrGetLocaleCodeset();
277#endif
278 IPRT_ALIGNMENT_CHECKS_DISABLE(); /* glibc causes trouble */
279 iconv_t icHandle = iconv_open(pszOutputCS, pszInputCS);
280 IPRT_ALIGNMENT_CHECKS_ENABLE();
281 if (icHandle != (iconv_t)-1)
282 {
283 /*
284 * Do the conversion.
285 */
286 size_t cbInLeft = cbInput;
287 size_t cbOutLeft = cbOutput2;
288 const void *pvInputLeft = pvInput;
289 void *pvOutputLeft = pvOutput;
290#if defined(RT_OS_LINUX) || (defined(RT_OS_DARWIN) && defined(_DARWIN_FEATURE_UNIX_CONFORMANCE)) /* there are different opinions about the constness of the input buffer. */
291 if (iconv(icHandle, (char **)&pvInputLeft, &cbInLeft, (char **)&pvOutputLeft, &cbOutLeft) != (size_t)-1)
292#else
293 if (iconv(icHandle, (const char **)&pvInputLeft, &cbInLeft, (char **)&pvOutputLeft, &cbOutLeft) != (size_t)-1)
294#endif
295 {
296 if (!cbInLeft)
297 {
298 /*
299 * We're done, just add the terminator and return.
300 * (Two terminators to support UCS-2 output, too.)
301 */
302 iconv_close(icHandle);
303 ((char *)pvOutputLeft)[0] = '\0';
304 if (fUcs2Term)
305 ((char *)pvOutputLeft)[1] = '\0';
306 *ppvOutput = pvOutput;
307 return VINF_SUCCESS;
308 }
309 errno = E2BIG;
310 }
311 iconv_close(icHandle);
312
313 /*
314 * If we failed because of output buffer space we'll
315 * increase the output buffer size and retry.
316 */
317 if (errno == E2BIG)
318 {
319 if (!cbOutput)
320 {
321 RTMemTmpFree(pvOutput);
322 cbOutput2 *= 2;
323 pvOutput = RTMemTmpAlloc(cbOutput2 + sizeof(RTUTF16));
324 if (!pvOutput)
325 return VERR_NO_TMP_MEMORY;
326 continue;
327 }
328 return VERR_BUFFER_OVERFLOW;
329 }
330 }
331 break;
332 }
333
334 /* failure */
335 if (!cbOutput)
336 RTMemTmpFree(pvOutput);
337 return VERR_NO_TRANSLATION;
338}
339
340
341/**
342 * Wrapper that selects rtStrConvertCached or rtStrConvertUncached.
343 *
344 * @returns IPRT status code.
345 *
346 * @param pszInput Pointer to intput string.
347 * @param cchInput Size (in bytes) of input string. Excludes any
348 * terminators.
349 * @param pszInputCS Codeset of the input string.
350 * @param ppszOutput Pointer to pointer to output buffer if cbOutput > 0.
351 * If cbOutput is 0 this is where the pointer to the
352 * allocated buffer is stored.
353 * @param cbOutput Size of the passed in buffer.
354 * @param pszOutputCS Codeset of the input string.
355 * @param cFactor Input vs. output size factor.
356 * @param enmCacheIdx The iconv cache index.
357 */
358DECLINLINE(int) rtStrConvertWrapper(const char *pchInput, size_t cchInput, const char *pszInputCS,
359 char **ppszOutput, size_t cbOutput, const char *pszOutputCS,
360 unsigned cFactor, RTSTRICONV enmCacheIdx)
361{
362#ifdef RT_WITH_ICONV_CACHE
363 RTTHREAD hSelf = RTThreadSelf();
364 if (hSelf != NIL_RTTHREAD)
365 {
366 PRTTHREADINT pThread = rtThreadGet(hSelf);
367 if ( pThread
368 && (pThread->fIntFlags & (RTTHREADINT_FLAGS_ALIEN | RTTHREADINT_FLAGS_MAIN)) != RTTHREADINT_FLAGS_ALIEN)
369 return rtstrConvertCached(pchInput, cchInput, pszInputCS,
370 (void **)ppszOutput, cbOutput, pszOutputCS,
371 cFactor, (iconv_t *)&pThread->ahIconvs[enmCacheIdx]);
372 }
373#endif
374 return rtStrConvertUncached(pchInput, cchInput, pszInputCS,
375 (void **)ppszOutput, cbOutput, pszOutputCS,
376 cFactor);
377}
378
379
380/**
381 * Internal API for use by the path conversion code.
382 *
383 * @returns IPRT status code.
384 *
385 * @param pszInput Pointer to intput string.
386 * @param cchInput Size (in bytes) of input string. Excludes any
387 * terminators.
388 * @param pszInputCS Codeset of the input string.
389 * @param ppszOutput Pointer to pointer to output buffer if cbOutput > 0.
390 * If cbOutput is 0 this is where the pointer to the
391 * allocated buffer is stored.
392 * @param cbOutput Size of the passed in buffer.
393 * @param pszOutputCS Codeset of the input string.
394 * @param cFactor Input vs. output size factor.
395 * @param enmCacheIdx The iconv cache index.
396 */
397int rtStrConvert(const char *pchInput, size_t cchInput, const char *pszInputCS,
398 char **ppszOutput, size_t cbOutput, const char *pszOutputCS,
399 unsigned cFactor, RTSTRICONV enmCacheIdx)
400{
401 Assert(enmCacheIdx >= 0 && enmCacheIdx < RTSTRICONV_END);
402 return rtStrConvertWrapper(pchInput, cchInput, pszInputCS,
403 ppszOutput, cbOutput, pszOutputCS,
404 cFactor, enmCacheIdx);
405}
406
407
408/**
409 * Allocates tmp buffer, translates pszString from UTF8 to current codepage.
410 *
411 * @returns iprt status code.
412 * @param ppszString Receives pointer of allocated native CP string.
413 * The returned pointer must be freed using RTStrFree().
414 * @param pszString UTF-8 string to convert.
415 */
416RTR3DECL(int) RTStrUtf8ToCurrentCP(char **ppszString, const char *pszString)
417{
418 Assert(ppszString);
419 Assert(pszString);
420 *ppszString = NULL;
421
422 /*
423 * Assume result string length is not longer than UTF-8 string.
424 */
425 size_t cch = strlen(pszString);
426 if (cch <= 0)
427 {
428 /* zero length string passed. */
429 *ppszString = (char *)RTMemTmpAllocZ(sizeof(char));
430 if (*ppszString)
431 return VINF_SUCCESS;
432 return VERR_NO_TMP_MEMORY;
433 }
434 return rtStrConvertWrapper(pszString, cch, "UTF-8", ppszString, 0, "", 1, RTSTRICONV_UTF8_TO_LOCALE);
435}
436
437
438/**
439 * Allocates tmp buffer, translates pszString from current codepage to UTF-8.
440 *
441 * @returns iprt status code.
442 * @param ppszString Receives pointer of allocated UTF-8 string.
443 * The returned pointer must be freed using RTStrFree().
444 * @param pszString Native string to convert.
445 */
446RTR3DECL(int) RTStrCurrentCPToUtf8(char **ppszString, const char *pszString)
447{
448 Assert(ppszString);
449 Assert(pszString);
450 *ppszString = NULL;
451
452 /*
453 * Attempt with UTF-8 length of 2x the native length.
454 */
455 size_t cch = strlen(pszString);
456 if (cch <= 0)
457 {
458 /* zero length string passed. */
459 *ppszString = (char *)RTMemTmpAllocZ(sizeof(char));
460 if (*ppszString)
461 return VINF_SUCCESS;
462 return VERR_NO_TMP_MEMORY;
463 }
464 return rtStrConvertWrapper(pszString, cch, "", ppszString, 0, "UTF-8", 2, RTSTRICONV_LOCALE_TO_UTF8);
465}
466
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette