VirtualBox

source: vbox/trunk/src/VBox/Runtime/r3/posix/utf8-posix.cpp@ 28317

Last change on this file since 28317 was 26344, checked in by vboxsync, 15 years ago

Runtime: white space cleanup.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 7.9 KB
Line 
1/* $Id: utf8-posix.cpp 26344 2010-02-09 03:39:45Z vboxsync $ */
2/** @file
3 * IPRT - UTF-8 helpers, POSIX.
4 */
5
6/*
7 * Copyright (C) 2006-2007 Sun Microsystems, Inc.
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 *
17 * The contents of this file may alternatively be used under the terms
18 * of the Common Development and Distribution License Version 1.0
19 * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
20 * VirtualBox OSE distribution, in which case the provisions of the
21 * CDDL are applicable instead of those of the GPL.
22 *
23 * You may elect to license modified versions of this file under the
24 * terms and conditions of either the GPL or the CDDL or both.
25 *
26 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa
27 * Clara, CA 95054 USA or visit http://www.sun.com if you need
28 * additional information or have any questions.
29 */
30
31
32/*******************************************************************************
33* Header Files *
34*******************************************************************************/
35#include <iprt/string.h>
36#include <iprt/alloc.h>
37#include <iprt/assert.h>
38#include <iprt/err.h>
39#include <iprt/string.h>
40
41#include <errno.h>
42#include <locale.h>
43#include <iconv.h>
44#include <wctype.h>
45
46#ifdef RT_OS_SOLARIS
47# include <langinfo.h>
48#endif
49
50#include "internal/alignmentchecks.h"
51
52
53/*******************************************************************************
54* Internal Functions *
55*******************************************************************************/
56static int rtstrConvert(const void *pvInput, size_t cbInput, const char *pszInputCS, void **ppvOutput, size_t cbOutput, const char *pszOutputCS, unsigned cFactor);
57
58
59/**
60 * Converts a string from one charset to another.
61 *
62 * @returns iprt status code.
63 * @param pvInput Pointer to intput string.
64 * @param cbInput Size (in bytes) of input string. Excludes any terminators.
65 * @param pszInputCS Codeset of the input string.
66 * @param ppvOutput Pointer to pointer to output buffer if cbOutput > 0.
67 * If cbOutput is 0 this is where the pointer to the allocated
68 * buffer is stored.
69 * @param cbOutput Size of the passed in buffer.
70 * @param pszOutputCS Codeset of the input string.
71 * @param cFactor Input vs. output size factor.
72 */
73static int rtstrConvert(const void *pvInput, size_t cbInput, const char *pszInputCS, void **ppvOutput, size_t cbOutput, const char *pszOutputCS, unsigned cFactor)
74{
75 /*
76 * Allocate buffer
77 */
78 bool fUcs2Term;
79 void *pvOutput;
80 size_t cbOutput2;
81 if (!cbOutput)
82 {
83 cbOutput2 = cbInput * cFactor;
84 pvOutput = RTMemTmpAlloc(cbOutput2 + sizeof(RTUTF16));
85 if (!pvOutput)
86 return VERR_NO_TMP_MEMORY;
87 fUcs2Term = true;
88 }
89 else
90 {
91 pvOutput = *ppvOutput;
92 fUcs2Term = !strcmp(pszOutputCS, "UCS-2");
93 cbOutput2 = cbOutput - (fUcs2Term ? sizeof(RTUTF16) : 1);
94 if (cbOutput2 > cbOutput)
95 return VERR_BUFFER_OVERFLOW;
96 }
97
98 /*
99 * Use a loop here to retry with bigger buffers.
100 */
101 for (unsigned cTries = 10; cTries > 0; cTries--)
102 {
103 /*
104 * Create conversion object.
105 */
106#ifdef RT_OS_SOLARIS
107 /* Solaris doesn't grok empty codeset strings, so help it find the current codeset. */
108 if (!*pszInputCS)
109 pszInputCS = nl_langinfo(CODESET);
110 if (!*pszOutputCS)
111 pszOutputCS = nl_langinfo(CODESET);
112#endif
113 IPRT_ALIGNMENT_CHECKS_DISABLE(); /* glibc causes trouble */
114 iconv_t icHandle = iconv_open(pszOutputCS, pszInputCS);
115 IPRT_ALIGNMENT_CHECKS_ENABLE();
116 if (icHandle != (iconv_t)-1)
117 {
118 /*
119 * Do the conversion.
120 */
121 size_t cbInLeft = cbInput;
122 size_t cbOutLeft = cbOutput2;
123 const void *pvInputLeft = pvInput;
124 void *pvOutputLeft = pvOutput;
125#if defined(RT_OS_LINUX) || (defined(RT_OS_DARWIN) && defined(_DARWIN_FEATURE_UNIX_CONFORMANCE)) /* there are different opinions about the constness of the input buffer. */
126 if (iconv(icHandle, (char **)&pvInputLeft, &cbInLeft, (char **)&pvOutputLeft, &cbOutLeft) != (size_t)-1)
127#else
128 if (iconv(icHandle, (const char **)&pvInputLeft, &cbInLeft, (char **)&pvOutputLeft, &cbOutLeft) != (size_t)-1)
129#endif
130 {
131 if (!cbInLeft)
132 {
133 /*
134 * We're done, just add the terminator and return.
135 * (Two terminators to support UCS-2 output, too.)
136 */
137 iconv_close(icHandle);
138 ((char *)pvOutputLeft)[0] = '\0';
139 if (fUcs2Term)
140 ((char *)pvOutputLeft)[1] = '\0';
141 *ppvOutput = pvOutput;
142 return VINF_SUCCESS;
143 }
144 errno = E2BIG;
145 }
146 iconv_close(icHandle);
147
148 /*
149 * If we failed because of output buffer space we'll
150 * increase the output buffer size and retry.
151 */
152 if (errno == E2BIG)
153 {
154 if (!cbOutput)
155 {
156 RTMemTmpFree(pvOutput);
157 cbOutput2 *= 2;
158 pvOutput = RTMemTmpAlloc(cbOutput2 + sizeof(RTUTF16));
159 if (!pvOutput)
160 return VERR_NO_TMP_MEMORY;
161 continue;
162 }
163 return VERR_BUFFER_OVERFLOW;
164 }
165 }
166 break;
167 }
168
169 /* failure */
170 if (!cbOutput)
171 RTMemTmpFree(pvOutput);
172 return VERR_NO_TRANSLATION;
173}
174
175
176/**
177 * Allocates tmp buffer, translates pszString from UTF8 to current codepage.
178 *
179 * @returns iprt status code.
180 * @param ppszString Receives pointer of allocated native CP string.
181 * The returned pointer must be freed using RTStrFree().
182 * @param pszString UTF-8 string to convert.
183 */
184RTR3DECL(int) RTStrUtf8ToCurrentCP(char **ppszString, const char *pszString)
185{
186 Assert(ppszString);
187 Assert(pszString);
188 *ppszString = NULL;
189
190 /*
191 * Assume result string length is not longer than UTF-8 string.
192 */
193 size_t cch = strlen(pszString);
194 if (cch <= 0)
195 {
196 /* zero length string passed. */
197 *ppszString = (char *)RTMemTmpAllocZ(sizeof(char));
198 if (*ppszString)
199 return VINF_SUCCESS;
200 return VERR_NO_TMP_MEMORY;
201 }
202 return rtstrConvert(pszString, cch, "UTF-8", (void **)ppszString, 0, "", 1);
203}
204
205
206/**
207 * Allocates tmp buffer, translates pszString from current codepage to UTF-8.
208 *
209 * @returns iprt status code.
210 * @param ppszString Receives pointer of allocated UTF-8 string.
211 * The returned pointer must be freed using RTStrFree().
212 * @param pszString Native string to convert.
213 */
214RTR3DECL(int) RTStrCurrentCPToUtf8(char **ppszString, const char *pszString)
215{
216 Assert(ppszString);
217 Assert(pszString);
218 *ppszString = NULL;
219
220 /*
221 * Attempt with UTF-8 length of 2x the native lenght.
222 */
223 size_t cch = strlen(pszString);
224 if (cch <= 0)
225 {
226 /* zero length string passed. */
227 *ppszString = (char *)RTMemTmpAllocZ(sizeof(char));
228 if (*ppszString)
229 return VINF_SUCCESS;
230 return VERR_NO_TMP_MEMORY;
231 }
232 return rtstrConvert(pszString, cch, "", (void **)ppszString, 0, "UTF-8", 2);
233}
234
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette