VirtualBox

source: vbox/trunk/src/VBox/Runtime/r3/posix/utf8-posix.cpp@ 4512

Last change on this file since 4512 was 4071, checked in by vboxsync, 17 years ago

Biggest check-in ever. New source code headers for all (C) innotek files.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 7.1 KB
Line 
1/* $Id: utf8-posix.cpp 4071 2007-08-07 17:07:59Z vboxsync $ */
2/** @file
3 * innotek Portable Runtime - UTF-8 helpers, POSIX.
4 */
5
6/*
7 * Copyright (C) 2006-2007 innotek GmbH
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License as published by the Free Software Foundation,
13 * in version 2 as it comes in the "COPYING" file of the VirtualBox OSE
14 * distribution. VirtualBox OSE is distributed in the hope that it will
15 * be useful, but WITHOUT ANY WARRANTY of any kind.
16 */
17
18
19/*******************************************************************************
20* Header Files *
21*******************************************************************************/
22#include <iprt/string.h>
23#include <iprt/alloc.h>
24#include <iprt/assert.h>
25#include <iprt/err.h>
26#include <iprt/string.h>
27
28#include <errno.h>
29#include <locale.h>
30#include <iconv.h>
31#include <wctype.h>
32
33#ifdef RT_OS_SOLARIS
34#include <langinfo.h>
35#endif
36
37/*******************************************************************************
38* Internal Functions *
39*******************************************************************************/
40static int rtstrConvert(const void *pvInput, size_t cbInput, const char *pszInputCS, void **ppvOutput, size_t cbOutput, const char *pszOutputCS, unsigned cFactor);
41
42
43/**
44 * Converts a string from one charset to another.
45 *
46 * @returns iprt status code.
47 * @param pvInput Pointer to intput string.
48 * @param cbInput Size (in bytes) of input string. Excludes any terminators.
49 * @param pszInputCS Codeset of the input string.
50 * @param ppvOutput Pointer to pointer to output buffer if cbOutput > 0.
51 * If cbOutput is 0 this is where the pointer to the allocated
52 * buffer is stored.
53 * @param cbOutput Size of the passed in buffer.
54 * @param pszOutputCS Codeset of the input string.
55 * @param cFactor Input vs. output size factor.
56 */
57static int rtstrConvert(const void *pvInput, size_t cbInput, const char *pszInputCS, void **ppvOutput, size_t cbOutput, const char *pszOutputCS, unsigned cFactor)
58{
59 /*
60 * Allocate buffer
61 */
62 void *pvOutput;
63 size_t cbOutput2;
64 if (!cbOutput)
65 {
66 cbOutput2 = cbInput * cFactor;
67 pvOutput = RTMemTmpAlloc(cbOutput2 + sizeof(RTUCS2));
68 if (!pvOutput)
69 return VERR_NO_TMP_MEMORY;
70 }
71 else
72 {
73 pvOutput = *ppvOutput;
74 cbOutput2 = cbOutput - (!strcmp(pszOutputCS, "UCS-2") ? sizeof(RTUCS2) : 1);
75 if (cbOutput2 > cbOutput)
76 return VERR_BUFFER_OVERFLOW;
77 }
78
79 /*
80 * Use a loop here to retry with bigger buffers.
81 */
82 for (unsigned cTries = 10; cTries > 0; cTries--)
83 {
84 /*
85 * Create conversion object.
86 */
87#ifdef RT_OS_SOLARIS
88 /* Solaris doesn't grok empty codeset strings, so help it find the current codeset. */
89 if (!*pszInputCS)
90 pszInputCS = nl_langinfo(CODESET);
91 if (!*pszOutputCS)
92 pszOutputCS = nl_langinfo(CODESET);
93#endif
94 iconv_t icHandle = iconv_open(pszOutputCS, pszInputCS);
95 if (icHandle != (iconv_t)-1)
96 {
97 /*
98 * Do the conversion.
99 */
100 size_t cbInLeft = cbInput;
101 size_t cbOutLeft = cbOutput2;
102 const void *pvInputLeft = pvInput;
103 void *pvOutputLeft = pvOutput;
104#ifdef RT_OS_LINUX /* glibc has an incorrect declaration of the api. */
105 if (iconv(icHandle, (char **)&pvInputLeft, &cbInLeft, (char **)&pvOutputLeft, &cbOutLeft) != (size_t)-1)
106#else
107 if (iconv(icHandle, (const char **)&pvInputLeft, &cbInLeft, (char **)&pvOutputLeft, &cbOutLeft) != (size_t)-1)
108#endif
109 {
110 if (!cbInLeft)
111 {
112 /*
113 * We're done, just add the terminator and return.
114 * (Two terminators to support UCS-2 output, too.)
115 */
116 iconv_close(icHandle);
117 if (!cbOutput || !strcmp(pszOutputCS, "UCS-2"))
118 *(PRTUCS2)pvOutputLeft = '\0';
119 else
120 *(char *)pvOutputLeft = '\0';
121 *ppvOutput = pvOutput;
122 return VINF_SUCCESS;
123 }
124 else
125 errno = E2BIG;
126 }
127 iconv_close(icHandle);
128
129 /*
130 * If we failed because of output buffer space we'll
131 * increase the output buffer size and retry.
132 */
133 if (errno == E2BIG)
134 {
135 if (!cbOutput)
136 {
137 RTMemTmpFree(pvOutput);
138 cbOutput2 *= 2;
139 pvOutput = RTMemTmpAlloc(cbOutput2);
140 if (!pvOutput)
141 return VERR_NO_TMP_MEMORY;
142 continue;
143 }
144 return VERR_BUFFER_OVERFLOW;
145 }
146 }
147 break;
148 }
149
150 /* failure */
151 if (!cbOutput)
152 RTMemTmpFree(pvOutput);
153 return VERR_NO_TRANSLATION;
154}
155
156
157/**
158 * Allocates tmp buffer, translates pszString from UTF8 to current codepage.
159 *
160 * @returns iprt status code.
161 * @param ppszString Receives pointer of allocated native CP string.
162 * The returned pointer must be freed using RTStrFree().
163 * @param pszString UTF-8 string to convert.
164 */
165RTR3DECL(int) RTStrUtf8ToCurrentCP(char **ppszString, const char *pszString)
166{
167 Assert(ppszString);
168 Assert(pszString);
169 *ppszString = NULL;
170
171 /*
172 * Assume result string length is not longer than UTF-8 string.
173 */
174 size_t cch = strlen(pszString);
175 if (cch <= 0)
176 {
177 /* zero length string passed. */
178 *ppszString = (char *)RTMemTmpAllocZ(sizeof(char));
179 if (*ppszString)
180 return VINF_SUCCESS;
181 return VERR_NO_TMP_MEMORY;
182 }
183 return rtstrConvert(pszString, cch, "UTF-8", (void **)ppszString, 0, "", 1);
184}
185
186
187/**
188 * Allocates tmp buffer, translates pszString from current codepage to UTF-8.
189 *
190 * @returns iprt status code.
191 * @param ppszString Receives pointer of allocated UTF-8 string.
192 * The returned pointer must be freed using RTStrFree().
193 * @param pszString Native string to convert.
194 */
195RTR3DECL(int) RTStrCurrentCPToUtf8(char **ppszString, const char *pszString)
196{
197 Assert(ppszString);
198 Assert(pszString);
199 *ppszString = NULL;
200
201 /*
202 * Attempt with UTF-8 length of 2x the native lenght.
203 */
204 size_t cch = strlen(pszString);
205 if (cch <= 0)
206 {
207 /* zero length string passed. */
208 *ppszString = (char *)RTMemTmpAllocZ(sizeof(char));
209 if (*ppszString)
210 return VINF_SUCCESS;
211 return VERR_NO_TMP_MEMORY;
212 }
213 return rtstrConvert(pszString, cch, "", (void **)ppszString, 0, "UTF-8", 2);
214}
215
216
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette