VirtualBox

source: vbox/trunk/src/VBox/Runtime/r3/posix/utf8-posix.cpp@ 7689

Last change on this file since 7689 was 7426, checked in by vboxsync, 17 years ago

RTUCS2 -> RTUTF16.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 7.6 KB
Line 
1/* $Id: utf8-posix.cpp 7426 2008-03-12 09:46:29Z vboxsync $ */
2/** @file
3 * innotek Portable Runtime - UTF-8 helpers, POSIX.
4 */
5
6/*
7 * Copyright (C) 2006-2007 innotek GmbH
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 *
17 * The contents of this file may alternatively be used under the terms
18 * of the Common Development and Distribution License Version 1.0
19 * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
20 * VirtualBox OSE distribution, in which case the provisions of the
21 * CDDL are applicable instead of those of the GPL.
22 *
23 * You may elect to license modified versions of this file under the
24 * terms and conditions of either the GPL or the CDDL or both.
25 */
26
27
28/*******************************************************************************
29* Header Files *
30*******************************************************************************/
31#include <iprt/string.h>
32#include <iprt/alloc.h>
33#include <iprt/assert.h>
34#include <iprt/err.h>
35#include <iprt/string.h>
36
37#include <errno.h>
38#include <locale.h>
39#include <iconv.h>
40#include <wctype.h>
41
42#ifdef RT_OS_SOLARIS
43#include <langinfo.h>
44#endif
45
46/*******************************************************************************
47* Internal Functions *
48*******************************************************************************/
49static int rtstrConvert(const void *pvInput, size_t cbInput, const char *pszInputCS, void **ppvOutput, size_t cbOutput, const char *pszOutputCS, unsigned cFactor);
50
51
52/**
53 * Converts a string from one charset to another.
54 *
55 * @returns iprt status code.
56 * @param pvInput Pointer to intput string.
57 * @param cbInput Size (in bytes) of input string. Excludes any terminators.
58 * @param pszInputCS Codeset of the input string.
59 * @param ppvOutput Pointer to pointer to output buffer if cbOutput > 0.
60 * If cbOutput is 0 this is where the pointer to the allocated
61 * buffer is stored.
62 * @param cbOutput Size of the passed in buffer.
63 * @param pszOutputCS Codeset of the input string.
64 * @param cFactor Input vs. output size factor.
65 */
66static int rtstrConvert(const void *pvInput, size_t cbInput, const char *pszInputCS, void **ppvOutput, size_t cbOutput, const char *pszOutputCS, unsigned cFactor)
67{
68 /*
69 * Allocate buffer
70 */
71 void *pvOutput;
72 size_t cbOutput2;
73 if (!cbOutput)
74 {
75 cbOutput2 = cbInput * cFactor;
76 pvOutput = RTMemTmpAlloc(cbOutput2 + sizeof(RTUTF16));
77 if (!pvOutput)
78 return VERR_NO_TMP_MEMORY;
79 }
80 else
81 {
82 pvOutput = *ppvOutput;
83 cbOutput2 = cbOutput - (!strcmp(pszOutputCS, "UCS-2") ? sizeof(RTUTF16) : 1);
84 if (cbOutput2 > cbOutput)
85 return VERR_BUFFER_OVERFLOW;
86 }
87
88 /*
89 * Use a loop here to retry with bigger buffers.
90 */
91 for (unsigned cTries = 10; cTries > 0; cTries--)
92 {
93 /*
94 * Create conversion object.
95 */
96#ifdef RT_OS_SOLARIS
97 /* Solaris doesn't grok empty codeset strings, so help it find the current codeset. */
98 if (!*pszInputCS)
99 pszInputCS = nl_langinfo(CODESET);
100 if (!*pszOutputCS)
101 pszOutputCS = nl_langinfo(CODESET);
102#endif
103 iconv_t icHandle = iconv_open(pszOutputCS, pszInputCS);
104 if (icHandle != (iconv_t)-1)
105 {
106 /*
107 * Do the conversion.
108 */
109 size_t cbInLeft = cbInput;
110 size_t cbOutLeft = cbOutput2;
111 const void *pvInputLeft = pvInput;
112 void *pvOutputLeft = pvOutput;
113#if defined(RT_OS_LINUX) || (defined(RT_OS_DARWIN) && defined(_DARWIN_FEATURE_UNIX_CONFORMANCE)) /* there are different opinions about the constness of the input buffer. */
114 if (iconv(icHandle, (char **)&pvInputLeft, &cbInLeft, (char **)&pvOutputLeft, &cbOutLeft) != (size_t)-1)
115#else
116 if (iconv(icHandle, (const char **)&pvInputLeft, &cbInLeft, (char **)&pvOutputLeft, &cbOutLeft) != (size_t)-1)
117#endif
118 {
119 if (!cbInLeft)
120 {
121 /*
122 * We're done, just add the terminator and return.
123 * (Two terminators to support UCS-2 output, too.)
124 */
125 iconv_close(icHandle);
126 if (!cbOutput || !strcmp(pszOutputCS, "UCS-2"))
127 *(PRTUTF16)pvOutputLeft = '\0';
128 else
129 *(char *)pvOutputLeft = '\0';
130 *ppvOutput = pvOutput;
131 return VINF_SUCCESS;
132 }
133 else
134 errno = E2BIG;
135 }
136 iconv_close(icHandle);
137
138 /*
139 * If we failed because of output buffer space we'll
140 * increase the output buffer size and retry.
141 */
142 if (errno == E2BIG)
143 {
144 if (!cbOutput)
145 {
146 RTMemTmpFree(pvOutput);
147 cbOutput2 *= 2;
148 pvOutput = RTMemTmpAlloc(cbOutput2);
149 if (!pvOutput)
150 return VERR_NO_TMP_MEMORY;
151 continue;
152 }
153 return VERR_BUFFER_OVERFLOW;
154 }
155 }
156 break;
157 }
158
159 /* failure */
160 if (!cbOutput)
161 RTMemTmpFree(pvOutput);
162 return VERR_NO_TRANSLATION;
163}
164
165
166/**
167 * Allocates tmp buffer, translates pszString from UTF8 to current codepage.
168 *
169 * @returns iprt status code.
170 * @param ppszString Receives pointer of allocated native CP string.
171 * The returned pointer must be freed using RTStrFree().
172 * @param pszString UTF-8 string to convert.
173 */
174RTR3DECL(int) RTStrUtf8ToCurrentCP(char **ppszString, const char *pszString)
175{
176 Assert(ppszString);
177 Assert(pszString);
178 *ppszString = NULL;
179
180 /*
181 * Assume result string length is not longer than UTF-8 string.
182 */
183 size_t cch = strlen(pszString);
184 if (cch <= 0)
185 {
186 /* zero length string passed. */
187 *ppszString = (char *)RTMemTmpAllocZ(sizeof(char));
188 if (*ppszString)
189 return VINF_SUCCESS;
190 return VERR_NO_TMP_MEMORY;
191 }
192 return rtstrConvert(pszString, cch, "UTF-8", (void **)ppszString, 0, "", 1);
193}
194
195
196/**
197 * Allocates tmp buffer, translates pszString from current codepage to UTF-8.
198 *
199 * @returns iprt status code.
200 * @param ppszString Receives pointer of allocated UTF-8 string.
201 * The returned pointer must be freed using RTStrFree().
202 * @param pszString Native string to convert.
203 */
204RTR3DECL(int) RTStrCurrentCPToUtf8(char **ppszString, const char *pszString)
205{
206 Assert(ppszString);
207 Assert(pszString);
208 *ppszString = NULL;
209
210 /*
211 * Attempt with UTF-8 length of 2x the native lenght.
212 */
213 size_t cch = strlen(pszString);
214 if (cch <= 0)
215 {
216 /* zero length string passed. */
217 *ppszString = (char *)RTMemTmpAllocZ(sizeof(char));
218 if (*ppszString)
219 return VINF_SUCCESS;
220 return VERR_NO_TMP_MEMORY;
221 }
222 return rtstrConvert(pszString, cch, "", (void **)ppszString, 0, "UTF-8", 2);
223}
224
225
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette