VirtualBox

source: vbox/trunk/src/VBox/Runtime/r3/posix/utf8-posix.cpp@ 28800

Last change on this file since 28800 was 28800, checked in by vboxsync, 15 years ago

Automated rebranding to Oracle copyright/license strings via filemuncher

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 7.7 KB
Line 
1/* $Id: utf8-posix.cpp 28800 2010-04-27 08:22:32Z vboxsync $ */
2/** @file
3 * IPRT - UTF-8 helpers, POSIX.
4 */
5
6/*
7 * Copyright (C) 2006-2007 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 *
17 * The contents of this file may alternatively be used under the terms
18 * of the Common Development and Distribution License Version 1.0
19 * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
20 * VirtualBox OSE distribution, in which case the provisions of the
21 * CDDL are applicable instead of those of the GPL.
22 *
23 * You may elect to license modified versions of this file under the
24 * terms and conditions of either the GPL or the CDDL or both.
25 */
26
27
28/*******************************************************************************
29* Header Files *
30*******************************************************************************/
31#include <iprt/string.h>
32#include <iprt/alloc.h>
33#include <iprt/assert.h>
34#include <iprt/err.h>
35#include <iprt/string.h>
36
37#include <errno.h>
38#include <locale.h>
39#include <iconv.h>
40#include <wctype.h>
41
42#ifdef RT_OS_SOLARIS
43# include <langinfo.h>
44#endif
45
46#include "internal/alignmentchecks.h"
47
48
49/*******************************************************************************
50* Internal Functions *
51*******************************************************************************/
52static int rtstrConvert(const void *pvInput, size_t cbInput, const char *pszInputCS, void **ppvOutput, size_t cbOutput, const char *pszOutputCS, unsigned cFactor);
53
54
55/**
56 * Converts a string from one charset to another.
57 *
58 * @returns iprt status code.
59 * @param pvInput Pointer to intput string.
60 * @param cbInput Size (in bytes) of input string. Excludes any terminators.
61 * @param pszInputCS Codeset of the input string.
62 * @param ppvOutput Pointer to pointer to output buffer if cbOutput > 0.
63 * If cbOutput is 0 this is where the pointer to the allocated
64 * buffer is stored.
65 * @param cbOutput Size of the passed in buffer.
66 * @param pszOutputCS Codeset of the input string.
67 * @param cFactor Input vs. output size factor.
68 */
69static int rtstrConvert(const void *pvInput, size_t cbInput, const char *pszInputCS, void **ppvOutput, size_t cbOutput, const char *pszOutputCS, unsigned cFactor)
70{
71 /*
72 * Allocate buffer
73 */
74 bool fUcs2Term;
75 void *pvOutput;
76 size_t cbOutput2;
77 if (!cbOutput)
78 {
79 cbOutput2 = cbInput * cFactor;
80 pvOutput = RTMemTmpAlloc(cbOutput2 + sizeof(RTUTF16));
81 if (!pvOutput)
82 return VERR_NO_TMP_MEMORY;
83 fUcs2Term = true;
84 }
85 else
86 {
87 pvOutput = *ppvOutput;
88 fUcs2Term = !strcmp(pszOutputCS, "UCS-2");
89 cbOutput2 = cbOutput - (fUcs2Term ? sizeof(RTUTF16) : 1);
90 if (cbOutput2 > cbOutput)
91 return VERR_BUFFER_OVERFLOW;
92 }
93
94 /*
95 * Use a loop here to retry with bigger buffers.
96 */
97 for (unsigned cTries = 10; cTries > 0; cTries--)
98 {
99 /*
100 * Create conversion object.
101 */
102#ifdef RT_OS_SOLARIS
103 /* Solaris doesn't grok empty codeset strings, so help it find the current codeset. */
104 if (!*pszInputCS)
105 pszInputCS = nl_langinfo(CODESET);
106 if (!*pszOutputCS)
107 pszOutputCS = nl_langinfo(CODESET);
108#endif
109 IPRT_ALIGNMENT_CHECKS_DISABLE(); /* glibc causes trouble */
110 iconv_t icHandle = iconv_open(pszOutputCS, pszInputCS);
111 IPRT_ALIGNMENT_CHECKS_ENABLE();
112 if (icHandle != (iconv_t)-1)
113 {
114 /*
115 * Do the conversion.
116 */
117 size_t cbInLeft = cbInput;
118 size_t cbOutLeft = cbOutput2;
119 const void *pvInputLeft = pvInput;
120 void *pvOutputLeft = pvOutput;
121#if defined(RT_OS_LINUX) || (defined(RT_OS_DARWIN) && defined(_DARWIN_FEATURE_UNIX_CONFORMANCE)) /* there are different opinions about the constness of the input buffer. */
122 if (iconv(icHandle, (char **)&pvInputLeft, &cbInLeft, (char **)&pvOutputLeft, &cbOutLeft) != (size_t)-1)
123#else
124 if (iconv(icHandle, (const char **)&pvInputLeft, &cbInLeft, (char **)&pvOutputLeft, &cbOutLeft) != (size_t)-1)
125#endif
126 {
127 if (!cbInLeft)
128 {
129 /*
130 * We're done, just add the terminator and return.
131 * (Two terminators to support UCS-2 output, too.)
132 */
133 iconv_close(icHandle);
134 ((char *)pvOutputLeft)[0] = '\0';
135 if (fUcs2Term)
136 ((char *)pvOutputLeft)[1] = '\0';
137 *ppvOutput = pvOutput;
138 return VINF_SUCCESS;
139 }
140 errno = E2BIG;
141 }
142 iconv_close(icHandle);
143
144 /*
145 * If we failed because of output buffer space we'll
146 * increase the output buffer size and retry.
147 */
148 if (errno == E2BIG)
149 {
150 if (!cbOutput)
151 {
152 RTMemTmpFree(pvOutput);
153 cbOutput2 *= 2;
154 pvOutput = RTMemTmpAlloc(cbOutput2 + sizeof(RTUTF16));
155 if (!pvOutput)
156 return VERR_NO_TMP_MEMORY;
157 continue;
158 }
159 return VERR_BUFFER_OVERFLOW;
160 }
161 }
162 break;
163 }
164
165 /* failure */
166 if (!cbOutput)
167 RTMemTmpFree(pvOutput);
168 return VERR_NO_TRANSLATION;
169}
170
171
172/**
173 * Allocates tmp buffer, translates pszString from UTF8 to current codepage.
174 *
175 * @returns iprt status code.
176 * @param ppszString Receives pointer of allocated native CP string.
177 * The returned pointer must be freed using RTStrFree().
178 * @param pszString UTF-8 string to convert.
179 */
180RTR3DECL(int) RTStrUtf8ToCurrentCP(char **ppszString, const char *pszString)
181{
182 Assert(ppszString);
183 Assert(pszString);
184 *ppszString = NULL;
185
186 /*
187 * Assume result string length is not longer than UTF-8 string.
188 */
189 size_t cch = strlen(pszString);
190 if (cch <= 0)
191 {
192 /* zero length string passed. */
193 *ppszString = (char *)RTMemTmpAllocZ(sizeof(char));
194 if (*ppszString)
195 return VINF_SUCCESS;
196 return VERR_NO_TMP_MEMORY;
197 }
198 return rtstrConvert(pszString, cch, "UTF-8", (void **)ppszString, 0, "", 1);
199}
200
201
202/**
203 * Allocates tmp buffer, translates pszString from current codepage to UTF-8.
204 *
205 * @returns iprt status code.
206 * @param ppszString Receives pointer of allocated UTF-8 string.
207 * The returned pointer must be freed using RTStrFree().
208 * @param pszString Native string to convert.
209 */
210RTR3DECL(int) RTStrCurrentCPToUtf8(char **ppszString, const char *pszString)
211{
212 Assert(ppszString);
213 Assert(pszString);
214 *ppszString = NULL;
215
216 /*
217 * Attempt with UTF-8 length of 2x the native lenght.
218 */
219 size_t cch = strlen(pszString);
220 if (cch <= 0)
221 {
222 /* zero length string passed. */
223 *ppszString = (char *)RTMemTmpAllocZ(sizeof(char));
224 if (*ppszString)
225 return VINF_SUCCESS;
226 return VERR_NO_TMP_MEMORY;
227 }
228 return rtstrConvert(pszString, cch, "", (void **)ppszString, 0, "UTF-8", 2);
229}
230
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette