VirtualBox

source: vbox/trunk/src/VBox/Runtime/r3/posix/utf8-posix.cpp@ 10971

Last change on this file since 10971 was 8245, checked in by vboxsync, 17 years ago

rebranding: IPRT files again.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 7.8 KB
Line 
1/* $Id: utf8-posix.cpp 8245 2008-04-21 17:24:28Z vboxsync $ */
2/** @file
3 * IPRT - UTF-8 helpers, POSIX.
4 */
5
6/*
7 * Copyright (C) 2006-2007 Sun Microsystems, Inc.
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 *
17 * The contents of this file may alternatively be used under the terms
18 * of the Common Development and Distribution License Version 1.0
19 * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
20 * VirtualBox OSE distribution, in which case the provisions of the
21 * CDDL are applicable instead of those of the GPL.
22 *
23 * You may elect to license modified versions of this file under the
24 * terms and conditions of either the GPL or the CDDL or both.
25 *
26 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa
27 * Clara, CA 95054 USA or visit http://www.sun.com if you need
28 * additional information or have any questions.
29 */
30
31
32/*******************************************************************************
33* Header Files *
34*******************************************************************************/
35#include <iprt/string.h>
36#include <iprt/alloc.h>
37#include <iprt/assert.h>
38#include <iprt/err.h>
39#include <iprt/string.h>
40
41#include <errno.h>
42#include <locale.h>
43#include <iconv.h>
44#include <wctype.h>
45
46#ifdef RT_OS_SOLARIS
47#include <langinfo.h>
48#endif
49
50/*******************************************************************************
51* Internal Functions *
52*******************************************************************************/
53static int rtstrConvert(const void *pvInput, size_t cbInput, const char *pszInputCS, void **ppvOutput, size_t cbOutput, const char *pszOutputCS, unsigned cFactor);
54
55
56/**
57 * Converts a string from one charset to another.
58 *
59 * @returns iprt status code.
60 * @param pvInput Pointer to intput string.
61 * @param cbInput Size (in bytes) of input string. Excludes any terminators.
62 * @param pszInputCS Codeset of the input string.
63 * @param ppvOutput Pointer to pointer to output buffer if cbOutput > 0.
64 * If cbOutput is 0 this is where the pointer to the allocated
65 * buffer is stored.
66 * @param cbOutput Size of the passed in buffer.
67 * @param pszOutputCS Codeset of the input string.
68 * @param cFactor Input vs. output size factor.
69 */
70static int rtstrConvert(const void *pvInput, size_t cbInput, const char *pszInputCS, void **ppvOutput, size_t cbOutput, const char *pszOutputCS, unsigned cFactor)
71{
72 /*
73 * Allocate buffer
74 */
75 void *pvOutput;
76 size_t cbOutput2;
77 if (!cbOutput)
78 {
79 cbOutput2 = cbInput * cFactor;
80 pvOutput = RTMemTmpAlloc(cbOutput2 + sizeof(RTUTF16));
81 if (!pvOutput)
82 return VERR_NO_TMP_MEMORY;
83 }
84 else
85 {
86 pvOutput = *ppvOutput;
87 cbOutput2 = cbOutput - (!strcmp(pszOutputCS, "UCS-2") ? sizeof(RTUTF16) : 1);
88 if (cbOutput2 > cbOutput)
89 return VERR_BUFFER_OVERFLOW;
90 }
91
92 /*
93 * Use a loop here to retry with bigger buffers.
94 */
95 for (unsigned cTries = 10; cTries > 0; cTries--)
96 {
97 /*
98 * Create conversion object.
99 */
100#ifdef RT_OS_SOLARIS
101 /* Solaris doesn't grok empty codeset strings, so help it find the current codeset. */
102 if (!*pszInputCS)
103 pszInputCS = nl_langinfo(CODESET);
104 if (!*pszOutputCS)
105 pszOutputCS = nl_langinfo(CODESET);
106#endif
107 iconv_t icHandle = iconv_open(pszOutputCS, pszInputCS);
108 if (icHandle != (iconv_t)-1)
109 {
110 /*
111 * Do the conversion.
112 */
113 size_t cbInLeft = cbInput;
114 size_t cbOutLeft = cbOutput2;
115 const void *pvInputLeft = pvInput;
116 void *pvOutputLeft = pvOutput;
117#if defined(RT_OS_LINUX) || (defined(RT_OS_DARWIN) && defined(_DARWIN_FEATURE_UNIX_CONFORMANCE)) /* there are different opinions about the constness of the input buffer. */
118 if (iconv(icHandle, (char **)&pvInputLeft, &cbInLeft, (char **)&pvOutputLeft, &cbOutLeft) != (size_t)-1)
119#else
120 if (iconv(icHandle, (const char **)&pvInputLeft, &cbInLeft, (char **)&pvOutputLeft, &cbOutLeft) != (size_t)-1)
121#endif
122 {
123 if (!cbInLeft)
124 {
125 /*
126 * We're done, just add the terminator and return.
127 * (Two terminators to support UCS-2 output, too.)
128 */
129 iconv_close(icHandle);
130 if (!cbOutput || !strcmp(pszOutputCS, "UCS-2"))
131 *(PRTUTF16)pvOutputLeft = '\0';
132 else
133 *(char *)pvOutputLeft = '\0';
134 *ppvOutput = pvOutput;
135 return VINF_SUCCESS;
136 }
137 else
138 errno = E2BIG;
139 }
140 iconv_close(icHandle);
141
142 /*
143 * If we failed because of output buffer space we'll
144 * increase the output buffer size and retry.
145 */
146 if (errno == E2BIG)
147 {
148 if (!cbOutput)
149 {
150 RTMemTmpFree(pvOutput);
151 cbOutput2 *= 2;
152 pvOutput = RTMemTmpAlloc(cbOutput2);
153 if (!pvOutput)
154 return VERR_NO_TMP_MEMORY;
155 continue;
156 }
157 return VERR_BUFFER_OVERFLOW;
158 }
159 }
160 break;
161 }
162
163 /* failure */
164 if (!cbOutput)
165 RTMemTmpFree(pvOutput);
166 return VERR_NO_TRANSLATION;
167}
168
169
170/**
171 * Allocates tmp buffer, translates pszString from UTF8 to current codepage.
172 *
173 * @returns iprt status code.
174 * @param ppszString Receives pointer of allocated native CP string.
175 * The returned pointer must be freed using RTStrFree().
176 * @param pszString UTF-8 string to convert.
177 */
178RTR3DECL(int) RTStrUtf8ToCurrentCP(char **ppszString, const char *pszString)
179{
180 Assert(ppszString);
181 Assert(pszString);
182 *ppszString = NULL;
183
184 /*
185 * Assume result string length is not longer than UTF-8 string.
186 */
187 size_t cch = strlen(pszString);
188 if (cch <= 0)
189 {
190 /* zero length string passed. */
191 *ppszString = (char *)RTMemTmpAllocZ(sizeof(char));
192 if (*ppszString)
193 return VINF_SUCCESS;
194 return VERR_NO_TMP_MEMORY;
195 }
196 return rtstrConvert(pszString, cch, "UTF-8", (void **)ppszString, 0, "", 1);
197}
198
199
200/**
201 * Allocates tmp buffer, translates pszString from current codepage to UTF-8.
202 *
203 * @returns iprt status code.
204 * @param ppszString Receives pointer of allocated UTF-8 string.
205 * The returned pointer must be freed using RTStrFree().
206 * @param pszString Native string to convert.
207 */
208RTR3DECL(int) RTStrCurrentCPToUtf8(char **ppszString, const char *pszString)
209{
210 Assert(ppszString);
211 Assert(pszString);
212 *ppszString = NULL;
213
214 /*
215 * Attempt with UTF-8 length of 2x the native lenght.
216 */
217 size_t cch = strlen(pszString);
218 if (cch <= 0)
219 {
220 /* zero length string passed. */
221 *ppszString = (char *)RTMemTmpAllocZ(sizeof(char));
222 if (*ppszString)
223 return VINF_SUCCESS;
224 return VERR_NO_TMP_MEMORY;
225 }
226 return rtstrConvert(pszString, cch, "", (void **)ppszString, 0, "UTF-8", 2);
227}
228
229
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette