1 | /* ***** BEGIN LICENSE BLOCK *****
|
---|
2 | * Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
---|
3 | *
|
---|
4 | * The contents of this file are subject to the Mozilla Public License Version
|
---|
5 | * 1.1 (the "License"); you may not use this file except in compliance with
|
---|
6 | * the License. You may obtain a copy of the License at
|
---|
7 | * http://www.mozilla.org/MPL/
|
---|
8 | *
|
---|
9 | * Software distributed under the License is distributed on an "AS IS" basis,
|
---|
10 | * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
---|
11 | * for the specific language governing rights and limitations under the
|
---|
12 | * License.
|
---|
13 | *
|
---|
14 | * The Original Code is Mozilla.
|
---|
15 | *
|
---|
16 | * The Initial Developer of the Original Code is
|
---|
17 | * Netscape Communications Corporation.
|
---|
18 | * Portions created by the Initial Developer are Copyright (C) 2002
|
---|
19 | * the Initial Developer. All Rights Reserved.
|
---|
20 | *
|
---|
21 | * Contributor(s):
|
---|
22 | * Darin Fisher <[email protected]>
|
---|
23 | * Brian Stell <[email protected]>
|
---|
24 | * Frank Tang <[email protected]>
|
---|
25 | * Brendan Eich <[email protected]>
|
---|
26 | * Sergei Dolgov <[email protected]>
|
---|
27 | *
|
---|
28 | * Alternatively, the contents of this file may be used under the terms of
|
---|
29 | * either the GNU General Public License Version 2 or later (the "GPL"), or
|
---|
30 | * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
---|
31 | * in which case the provisions of the GPL or the LGPL are applicable instead
|
---|
32 | * of those above. If you wish to allow use of your version of this file only
|
---|
33 | * under the terms of either the GPL or the LGPL, and not to allow others to
|
---|
34 | * use your version of this file under the terms of the MPL, indicate your
|
---|
35 | * decision by deleting the provisions above and replace them with the notice
|
---|
36 | * and other provisions required by the GPL or the LGPL. If you do not delete
|
---|
37 | * the provisions above, a recipient may use your version of this file under
|
---|
38 | * the terms of any one of the MPL, the GPL or the LGPL.
|
---|
39 | *
|
---|
40 | * ***** END LICENSE BLOCK ***** */
|
---|
41 |
|
---|
42 | #include "xpcom-private.h"
|
---|
43 |
|
---|
44 | //-----------------------------------------------------------------------------
|
---|
45 | // XP_UNIX
|
---|
46 | //-----------------------------------------------------------------------------
|
---|
47 | #if defined(XP_UNIX)
|
---|
48 |
|
---|
49 | #include <stdlib.h> // mbtowc, wctomb
|
---|
50 | #include <locale.h> // setlocale
|
---|
51 | #include "nscore.h"
|
---|
52 | #include "nsAString.h"
|
---|
53 | #include "nsReadableUtils.h"
|
---|
54 |
|
---|
55 | #include <iprt/assert.h>
|
---|
56 | #include <iprt/errcore.h>
|
---|
57 | #include <iprt/semaphore.h>
|
---|
58 |
|
---|
59 | //
|
---|
60 | // choose a conversion library. we used to use mbrtowc/wcrtomb under Linux,
|
---|
61 | // but that doesn't work for non-BMP characters whether we use '-fshort-wchar'
|
---|
62 | // or not (see bug 206811 and
|
---|
63 | // news://news.mozilla.org:119/[email protected]). we now use
|
---|
64 | // iconv for all platforms where nltypes.h and nllanginfo.h are present
|
---|
65 | // along with iconv.
|
---|
66 | //
|
---|
67 | #if defined(HAVE_ICONV) && defined(HAVE_NL_TYPES_H) && defined(HAVE_LANGINFO_CODESET)
|
---|
68 | #define USE_ICONV 1
|
---|
69 | #else
|
---|
70 | #define USE_STDCONV 1
|
---|
71 | #endif
|
---|
72 |
|
---|
73 | static void
|
---|
74 | isolatin1_to_utf16(const char **input, PRUint32 *inputLeft, PRUnichar **output, PRUint32 *outputLeft)
|
---|
75 | {
|
---|
76 | while (*inputLeft && *outputLeft) {
|
---|
77 | **output = (unsigned char) **input;
|
---|
78 | (*input)++;
|
---|
79 | (*inputLeft)--;
|
---|
80 | (*output)++;
|
---|
81 | (*outputLeft)--;
|
---|
82 | }
|
---|
83 | }
|
---|
84 |
|
---|
85 | static void
|
---|
86 | utf16_to_isolatin1(const PRUnichar **input, PRUint32 *inputLeft, char **output, PRUint32 *outputLeft)
|
---|
87 | {
|
---|
88 | while (*inputLeft && *outputLeft) {
|
---|
89 | **output = (unsigned char) **input;
|
---|
90 | (*input)++;
|
---|
91 | (*inputLeft)--;
|
---|
92 | (*output)++;
|
---|
93 | (*outputLeft)--;
|
---|
94 | }
|
---|
95 | }
|
---|
96 |
|
---|
97 | //-----------------------------------------------------------------------------
|
---|
98 | // conversion using iconv
|
---|
99 | //-----------------------------------------------------------------------------
|
---|
100 | #if defined(USE_ICONV)
|
---|
101 | #include <nl_types.h> // CODESET
|
---|
102 | #include <langinfo.h> // nl_langinfo
|
---|
103 | #include <iconv.h> // iconv_open, iconv, iconv_close
|
---|
104 | #include <errno.h>
|
---|
105 |
|
---|
106 | #if defined(HAVE_ICONV_WITH_CONST_INPUT)
|
---|
107 | #define ICONV_INPUT(x) (x)
|
---|
108 | #else
|
---|
109 | #define ICONV_INPUT(x) ((char **)x)
|
---|
110 | #endif
|
---|
111 |
|
---|
112 | // solaris definitely needs this, but we'll enable it by default
|
---|
113 | // just in case... but we know for sure that iconv(3) in glibc
|
---|
114 | // doesn't need this.
|
---|
115 | #if !defined(__GLIBC__)
|
---|
116 | #define ENABLE_UTF8_FALLBACK_SUPPORT
|
---|
117 | #endif
|
---|
118 |
|
---|
119 | #define INVALID_ICONV_T ((iconv_t) -1)
|
---|
120 |
|
---|
121 | static inline size_t
|
---|
122 | xp_iconv(iconv_t converter,
|
---|
123 | const char **input,
|
---|
124 | size_t *inputLeft,
|
---|
125 | char **output,
|
---|
126 | size_t *outputLeft)
|
---|
127 | {
|
---|
128 | size_t res, outputAvail = outputLeft ? *outputLeft : 0;
|
---|
129 | res = iconv(converter, ICONV_INPUT(input), inputLeft, output, outputLeft);
|
---|
130 | if (res == (size_t) -1) {
|
---|
131 | // on some platforms (e.g., linux) iconv will fail with
|
---|
132 | // E2BIG if it cannot convert _all_ of its input. it'll
|
---|
133 | // still adjust all of the in/out params correctly, so we
|
---|
134 | // can ignore this error. the assumption is that we will
|
---|
135 | // be called again to complete the conversion.
|
---|
136 | if ((errno == E2BIG) && (*outputLeft < outputAvail))
|
---|
137 | res = 0;
|
---|
138 | }
|
---|
139 | return res;
|
---|
140 | }
|
---|
141 |
|
---|
142 | static inline void
|
---|
143 | xp_iconv_reset(iconv_t converter)
|
---|
144 | {
|
---|
145 | // NOTE: the man pages on Solaris claim that you can pass NULL
|
---|
146 | // for all parameter to reset the converter, but beware the
|
---|
147 | // evil Solaris crash if you go down this route >:-)
|
---|
148 |
|
---|
149 | const char *zero_char_in_ptr = NULL;
|
---|
150 | char *zero_char_out_ptr = NULL;
|
---|
151 | size_t zero_size_in = 0,
|
---|
152 | zero_size_out = 0;
|
---|
153 |
|
---|
154 | xp_iconv(converter, &zero_char_in_ptr,
|
---|
155 | &zero_size_in,
|
---|
156 | &zero_char_out_ptr,
|
---|
157 | &zero_size_out);
|
---|
158 | }
|
---|
159 |
|
---|
160 | static inline iconv_t
|
---|
161 | xp_iconv_open(const char **to_list, const char **from_list)
|
---|
162 | {
|
---|
163 | iconv_t res;
|
---|
164 | const char **from_name;
|
---|
165 | const char **to_name;
|
---|
166 |
|
---|
167 | // try all possible combinations to locate a converter.
|
---|
168 | to_name = to_list;
|
---|
169 | while (*to_name) {
|
---|
170 | if (**to_name) {
|
---|
171 | from_name = from_list;
|
---|
172 | while (*from_name) {
|
---|
173 | if (**from_name) {
|
---|
174 | res = iconv_open(*to_name, *from_name);
|
---|
175 | if (res != INVALID_ICONV_T)
|
---|
176 | return res;
|
---|
177 | }
|
---|
178 | from_name++;
|
---|
179 | }
|
---|
180 | }
|
---|
181 | to_name++;
|
---|
182 | }
|
---|
183 |
|
---|
184 | return INVALID_ICONV_T;
|
---|
185 | }
|
---|
186 |
|
---|
187 | /*
|
---|
188 | * PRUnichar[] is NOT a UCS-2 array BUT a UTF-16 string. Therefore, we
|
---|
189 | * have to use UTF-16 with iconv(3) on platforms where it's supported.
|
---|
190 | * However, the way UTF-16 and UCS-2 are interpreted varies across platforms
|
---|
191 | * and implementations of iconv(3). On Tru64, it also depends on the environment
|
---|
192 | * variable. To avoid the trouble arising from byte-swapping
|
---|
193 | * (bug 208809), we have to try UTF-16LE/BE and UCS-2LE/BE before falling
|
---|
194 | * back to UTF-16 and UCS-2 and variants. We assume that UTF-16 and UCS-2
|
---|
195 | * on systems without UTF-16LE/BE and UCS-2LE/BE have the native endianness,
|
---|
196 | * which isn't the case of glibc 2.1.x, for which we use 'UNICODELITTLE'
|
---|
197 | * and 'UNICODEBIG'. It's also not true of Tru64 V4 when the environment
|
---|
198 | * variable ICONV_BYTEORDER is set to 'big-endian', about which not much
|
---|
199 | * can be done other than adding a note in the release notes. (bug 206811)
|
---|
200 | */
|
---|
201 | static const char *UTF_16_NAMES[] = {
|
---|
202 | #if defined(IS_LITTLE_ENDIAN)
|
---|
203 | "UTF-16LE",
|
---|
204 | #if defined(__GLIBC__)
|
---|
205 | "UNICODELITTLE",
|
---|
206 | #endif
|
---|
207 | "UCS-2LE",
|
---|
208 | #else
|
---|
209 | "UTF-16BE",
|
---|
210 | #if defined(__GLIBC__)
|
---|
211 | "UNICODEBIG",
|
---|
212 | #endif
|
---|
213 | "UCS-2BE",
|
---|
214 | #endif
|
---|
215 | "UTF-16",
|
---|
216 | "UCS-2",
|
---|
217 | "UCS2",
|
---|
218 | "UCS_2",
|
---|
219 | "ucs-2",
|
---|
220 | "ucs2",
|
---|
221 | "ucs_2",
|
---|
222 | NULL
|
---|
223 | };
|
---|
224 |
|
---|
225 | #if defined(ENABLE_UTF8_FALLBACK_SUPPORT)
|
---|
226 | static const char *UTF_8_NAMES[] = {
|
---|
227 | "UTF-8",
|
---|
228 | "UTF8",
|
---|
229 | "UTF_8",
|
---|
230 | "utf-8",
|
---|
231 | "utf8",
|
---|
232 | "utf_8",
|
---|
233 | NULL
|
---|
234 | };
|
---|
235 | #endif
|
---|
236 |
|
---|
237 | static const char *ISO_8859_1_NAMES[] = {
|
---|
238 | "ISO-8859-1",
|
---|
239 | #if !defined(__GLIBC__)
|
---|
240 | "ISO8859-1",
|
---|
241 | "ISO88591",
|
---|
242 | "ISO_8859_1",
|
---|
243 | "ISO8859_1",
|
---|
244 | "iso-8859-1",
|
---|
245 | "iso8859-1",
|
---|
246 | "iso88591",
|
---|
247 | "iso_8859_1",
|
---|
248 | "iso8859_1",
|
---|
249 | #endif
|
---|
250 | NULL
|
---|
251 | };
|
---|
252 |
|
---|
253 | class nsNativeCharsetConverter
|
---|
254 | {
|
---|
255 | public:
|
---|
256 | nsNativeCharsetConverter();
|
---|
257 | ~nsNativeCharsetConverter();
|
---|
258 |
|
---|
259 | nsresult NativeToUnicode(const char **input , PRUint32 *inputLeft,
|
---|
260 | PRUnichar **output, PRUint32 *outputLeft);
|
---|
261 | nsresult UnicodeToNative(const PRUnichar **input , PRUint32 *inputLeft,
|
---|
262 | char **output, PRUint32 *outputLeft);
|
---|
263 |
|
---|
264 | static void GlobalInit();
|
---|
265 | static void GlobalShutdown();
|
---|
266 |
|
---|
267 | private:
|
---|
268 | static iconv_t gNativeToUnicode;
|
---|
269 | static iconv_t gUnicodeToNative;
|
---|
270 | #if defined(ENABLE_UTF8_FALLBACK_SUPPORT)
|
---|
271 | static iconv_t gNativeToUTF8;
|
---|
272 | static iconv_t gUTF8ToNative;
|
---|
273 | static iconv_t gUnicodeToUTF8;
|
---|
274 | static iconv_t gUTF8ToUnicode;
|
---|
275 | #endif
|
---|
276 | static RTSEMFASTMUTEX gLock;
|
---|
277 | static PRBool gInitialized;
|
---|
278 |
|
---|
279 | static void LazyInit();
|
---|
280 |
|
---|
281 | static void Lock() { if (gLock != NILRTSEMFASTMUTEX) RTSemFastMutexRequest(gLock); }
|
---|
282 | static void Unlock() { if (gLock != NILRTSEMFASTMUTEX) RTSemFastMutexRelease(gLock); }
|
---|
283 | };
|
---|
284 |
|
---|
285 | iconv_t nsNativeCharsetConverter::gNativeToUnicode = INVALID_ICONV_T;
|
---|
286 | iconv_t nsNativeCharsetConverter::gUnicodeToNative = INVALID_ICONV_T;
|
---|
287 | #if defined(ENABLE_UTF8_FALLBACK_SUPPORT)
|
---|
288 | iconv_t nsNativeCharsetConverter::gNativeToUTF8 = INVALID_ICONV_T;
|
---|
289 | iconv_t nsNativeCharsetConverter::gUTF8ToNative = INVALID_ICONV_T;
|
---|
290 | iconv_t nsNativeCharsetConverter::gUnicodeToUTF8 = INVALID_ICONV_T;
|
---|
291 | iconv_t nsNativeCharsetConverter::gUTF8ToUnicode = INVALID_ICONV_T;
|
---|
292 | #endif
|
---|
293 | RTSEMFASTMUTEX nsNativeCharsetConverter::gLock = NIL_RTSEMFASTMUTEX;
|
---|
294 | PRBool nsNativeCharsetConverter::gInitialized = PR_FALSE;
|
---|
295 |
|
---|
296 | void
|
---|
297 | nsNativeCharsetConverter::LazyInit()
|
---|
298 | {
|
---|
299 | const char *blank_list[] = { "", NULL };
|
---|
300 | const char **native_charset_list = blank_list;
|
---|
301 | const char *native_charset = nl_langinfo(CODESET);
|
---|
302 | if (native_charset == nsnull) {
|
---|
303 | NS_ERROR("native charset is unknown");
|
---|
304 | // fallback to ISO-8859-1
|
---|
305 | native_charset_list = ISO_8859_1_NAMES;
|
---|
306 | }
|
---|
307 | else
|
---|
308 | native_charset_list[0] = native_charset;
|
---|
309 |
|
---|
310 | gNativeToUnicode = xp_iconv_open(UTF_16_NAMES, native_charset_list);
|
---|
311 | gUnicodeToNative = xp_iconv_open(native_charset_list, UTF_16_NAMES);
|
---|
312 |
|
---|
313 | #if defined(ENABLE_UTF8_FALLBACK_SUPPORT)
|
---|
314 | if (gNativeToUnicode == INVALID_ICONV_T) {
|
---|
315 | gNativeToUTF8 = xp_iconv_open(UTF_8_NAMES, native_charset_list);
|
---|
316 | gUTF8ToUnicode = xp_iconv_open(UTF_16_NAMES, UTF_8_NAMES);
|
---|
317 | NS_ASSERTION(gNativeToUTF8 != INVALID_ICONV_T, "no native to utf-8 converter");
|
---|
318 | NS_ASSERTION(gUTF8ToUnicode != INVALID_ICONV_T, "no utf-8 to utf-16 converter");
|
---|
319 | }
|
---|
320 | if (gUnicodeToNative == INVALID_ICONV_T) {
|
---|
321 | gUnicodeToUTF8 = xp_iconv_open(UTF_8_NAMES, UTF_16_NAMES);
|
---|
322 | gUTF8ToNative = xp_iconv_open(native_charset_list, UTF_8_NAMES);
|
---|
323 | NS_ASSERTION(gUnicodeToUTF8 != INVALID_ICONV_T, "no utf-16 to utf-8 converter");
|
---|
324 | NS_ASSERTION(gUTF8ToNative != INVALID_ICONV_T, "no utf-8 to native converter");
|
---|
325 | }
|
---|
326 | #else
|
---|
327 | NS_ASSERTION(gNativeToUnicode != INVALID_ICONV_T, "no native to utf-16 converter");
|
---|
328 | NS_ASSERTION(gUnicodeToNative != INVALID_ICONV_T, "no utf-16 to native converter");
|
---|
329 | #endif
|
---|
330 |
|
---|
331 | /*
|
---|
332 | * On Solaris 8 (and newer?), the iconv modules converting to UCS-2
|
---|
333 | * prepend a byte order mark unicode character (BOM, u+FEFF) during
|
---|
334 | * the first use of the iconv converter. The same is the case of
|
---|
335 | * glibc 2.2.9x and Tru64 V5 (see bug 208809) when 'UTF-16' is used.
|
---|
336 | * However, we use 'UTF-16LE/BE' in both cases, instead so that we
|
---|
337 | * should be safe. But just in case...
|
---|
338 | *
|
---|
339 | * This dummy conversion gets rid of the BOMs and fixes bug 153562.
|
---|
340 | */
|
---|
341 | char dummy_input[1] = { ' ' };
|
---|
342 | char dummy_output[4];
|
---|
343 |
|
---|
344 | if (gNativeToUnicode != INVALID_ICONV_T) {
|
---|
345 | const char *input = dummy_input;
|
---|
346 | size_t input_left = sizeof(dummy_input);
|
---|
347 | char *output = dummy_output;
|
---|
348 | size_t output_left = sizeof(dummy_output);
|
---|
349 |
|
---|
350 | xp_iconv(gNativeToUnicode, &input, &input_left, &output, &output_left);
|
---|
351 | }
|
---|
352 | #if defined(ENABLE_UTF8_FALLBACK_SUPPORT)
|
---|
353 | if (gUTF8ToUnicode != INVALID_ICONV_T) {
|
---|
354 | const char *input = dummy_input;
|
---|
355 | size_t input_left = sizeof(dummy_input);
|
---|
356 | char *output = dummy_output;
|
---|
357 | size_t output_left = sizeof(dummy_output);
|
---|
358 |
|
---|
359 | xp_iconv(gUTF8ToUnicode, &input, &input_left, &output, &output_left);
|
---|
360 | }
|
---|
361 | #endif
|
---|
362 |
|
---|
363 | gInitialized = PR_TRUE;
|
---|
364 | }
|
---|
365 |
|
---|
366 | void
|
---|
367 | nsNativeCharsetConverter::GlobalInit()
|
---|
368 | {
|
---|
369 | int vrc = RTSemFastMutexCreate(&gLock);
|
---|
370 | NS_ASSERTION(RT_SUCCESS(vrc), "lock creation failed");
|
---|
371 | }
|
---|
372 |
|
---|
373 | void
|
---|
374 | nsNativeCharsetConverter::GlobalShutdown()
|
---|
375 | {
|
---|
376 | if (gLock != NIL_RTSEMFASTMUTEX) {
|
---|
377 | RTSemFastMutexDestroy(gLock);
|
---|
378 | gLock = NIL_RTSEMFASTMUTEX;
|
---|
379 | }
|
---|
380 |
|
---|
381 | if (gNativeToUnicode != INVALID_ICONV_T) {
|
---|
382 | iconv_close(gNativeToUnicode);
|
---|
383 | gNativeToUnicode = INVALID_ICONV_T;
|
---|
384 | }
|
---|
385 |
|
---|
386 | if (gUnicodeToNative != INVALID_ICONV_T) {
|
---|
387 | iconv_close(gUnicodeToNative);
|
---|
388 | gUnicodeToNative = INVALID_ICONV_T;
|
---|
389 | }
|
---|
390 |
|
---|
391 | #if defined(ENABLE_UTF8_FALLBACK_SUPPORT)
|
---|
392 | if (gNativeToUTF8 != INVALID_ICONV_T) {
|
---|
393 | iconv_close(gNativeToUTF8);
|
---|
394 | gNativeToUTF8 = INVALID_ICONV_T;
|
---|
395 | }
|
---|
396 | if (gUTF8ToNative != INVALID_ICONV_T) {
|
---|
397 | iconv_close(gUTF8ToNative);
|
---|
398 | gUTF8ToNative = INVALID_ICONV_T;
|
---|
399 | }
|
---|
400 | if (gUnicodeToUTF8 != INVALID_ICONV_T) {
|
---|
401 | iconv_close(gUnicodeToUTF8);
|
---|
402 | gUnicodeToUTF8 = INVALID_ICONV_T;
|
---|
403 | }
|
---|
404 | if (gUTF8ToUnicode != INVALID_ICONV_T) {
|
---|
405 | iconv_close(gUTF8ToUnicode);
|
---|
406 | gUTF8ToUnicode = INVALID_ICONV_T;
|
---|
407 | }
|
---|
408 | #endif
|
---|
409 |
|
---|
410 | gInitialized = PR_FALSE;
|
---|
411 | }
|
---|
412 |
|
---|
413 | nsNativeCharsetConverter::nsNativeCharsetConverter()
|
---|
414 | {
|
---|
415 | Lock();
|
---|
416 | if (!gInitialized)
|
---|
417 | LazyInit();
|
---|
418 | }
|
---|
419 |
|
---|
420 | nsNativeCharsetConverter::~nsNativeCharsetConverter()
|
---|
421 | {
|
---|
422 | // reset converters for next time
|
---|
423 | if (gNativeToUnicode != INVALID_ICONV_T)
|
---|
424 | xp_iconv_reset(gNativeToUnicode);
|
---|
425 | if (gUnicodeToNative != INVALID_ICONV_T)
|
---|
426 | xp_iconv_reset(gUnicodeToNative);
|
---|
427 | #if defined(ENABLE_UTF8_FALLBACK_SUPPORT)
|
---|
428 | if (gNativeToUTF8 != INVALID_ICONV_T)
|
---|
429 | xp_iconv_reset(gNativeToUTF8);
|
---|
430 | if (gUTF8ToNative != INVALID_ICONV_T)
|
---|
431 | xp_iconv_reset(gUTF8ToNative);
|
---|
432 | if (gUnicodeToUTF8 != INVALID_ICONV_T)
|
---|
433 | xp_iconv_reset(gUnicodeToUTF8);
|
---|
434 | if (gUTF8ToUnicode != INVALID_ICONV_T)
|
---|
435 | xp_iconv_reset(gUTF8ToUnicode);
|
---|
436 | #endif
|
---|
437 | Unlock();
|
---|
438 | }
|
---|
439 |
|
---|
440 | nsresult
|
---|
441 | nsNativeCharsetConverter::NativeToUnicode(const char **input,
|
---|
442 | PRUint32 *inputLeft,
|
---|
443 | PRUnichar **output,
|
---|
444 | PRUint32 *outputLeft)
|
---|
445 | {
|
---|
446 | size_t res = 0;
|
---|
447 | size_t inLeft = (size_t) *inputLeft;
|
---|
448 | size_t outLeft = (size_t) *outputLeft * 2;
|
---|
449 |
|
---|
450 | if (gNativeToUnicode != INVALID_ICONV_T) {
|
---|
451 |
|
---|
452 | res = xp_iconv(gNativeToUnicode, input, &inLeft, (char **) output, &outLeft);
|
---|
453 |
|
---|
454 | *inputLeft = inLeft;
|
---|
455 | *outputLeft = outLeft / 2;
|
---|
456 | if (res != (size_t) -1)
|
---|
457 | return NS_OK;
|
---|
458 |
|
---|
459 | NS_WARNING("conversion from native to utf-16 failed");
|
---|
460 |
|
---|
461 | // reset converter
|
---|
462 | xp_iconv_reset(gNativeToUnicode);
|
---|
463 | }
|
---|
464 | #if defined(ENABLE_UTF8_FALLBACK_SUPPORT)
|
---|
465 | else if ((gNativeToUTF8 != INVALID_ICONV_T) &&
|
---|
466 | (gUTF8ToUnicode != INVALID_ICONV_T)) {
|
---|
467 | // convert first to UTF8, then from UTF8 to UCS2
|
---|
468 | const char *in = *input;
|
---|
469 |
|
---|
470 | char ubuf[1024];
|
---|
471 |
|
---|
472 | // we assume we're always called with enough space in |output|,
|
---|
473 | // so convert many chars at a time...
|
---|
474 | while (inLeft) {
|
---|
475 | char *p = ubuf;
|
---|
476 | size_t n = sizeof(ubuf);
|
---|
477 | res = xp_iconv(gNativeToUTF8, &in, &inLeft, &p, &n);
|
---|
478 | if (res == (size_t) -1) {
|
---|
479 | NS_ERROR("conversion from native to utf-8 failed");
|
---|
480 | break;
|
---|
481 | }
|
---|
482 | NS_ASSERTION(outLeft > 0, "bad assumption");
|
---|
483 | p = ubuf;
|
---|
484 | n = sizeof(ubuf) - n;
|
---|
485 | res = xp_iconv(gUTF8ToUnicode, (const char **) &p, &n, (char **) output, &outLeft);
|
---|
486 | if (res == (size_t) -1) {
|
---|
487 | NS_ERROR("conversion from utf-8 to utf-16 failed");
|
---|
488 | break;
|
---|
489 | }
|
---|
490 | }
|
---|
491 |
|
---|
492 | (*input) += (*inputLeft - inLeft);
|
---|
493 | *inputLeft = inLeft;
|
---|
494 | *outputLeft = outLeft / 2;
|
---|
495 |
|
---|
496 | if (res != (size_t) -1)
|
---|
497 | return NS_OK;
|
---|
498 |
|
---|
499 | // reset converters
|
---|
500 | xp_iconv_reset(gNativeToUTF8);
|
---|
501 | xp_iconv_reset(gUTF8ToUnicode);
|
---|
502 | }
|
---|
503 | #endif
|
---|
504 |
|
---|
505 | // fallback: zero-pad and hope for the best
|
---|
506 | // XXX This is lame and we have to do better.
|
---|
507 | isolatin1_to_utf16(input, inputLeft, output, outputLeft);
|
---|
508 |
|
---|
509 | return NS_OK;
|
---|
510 | }
|
---|
511 |
|
---|
512 | nsresult
|
---|
513 | nsNativeCharsetConverter::UnicodeToNative(const PRUnichar **input,
|
---|
514 | PRUint32 *inputLeft,
|
---|
515 | char **output,
|
---|
516 | PRUint32 *outputLeft)
|
---|
517 | {
|
---|
518 | size_t res = 0;
|
---|
519 | size_t inLeft = (size_t) *inputLeft * 2;
|
---|
520 | size_t outLeft = (size_t) *outputLeft;
|
---|
521 |
|
---|
522 | if (gUnicodeToNative != INVALID_ICONV_T) {
|
---|
523 | res = xp_iconv(gUnicodeToNative, (const char **) input, &inLeft, output, &outLeft);
|
---|
524 |
|
---|
525 | if (res != (size_t) -1) {
|
---|
526 | *inputLeft = inLeft / 2;
|
---|
527 | *outputLeft = outLeft;
|
---|
528 | return NS_OK;
|
---|
529 | }
|
---|
530 |
|
---|
531 | NS_ERROR("iconv failed");
|
---|
532 |
|
---|
533 | // reset converter
|
---|
534 | xp_iconv_reset(gUnicodeToNative);
|
---|
535 | }
|
---|
536 | #if defined(ENABLE_UTF8_FALLBACK_SUPPORT)
|
---|
537 | else if ((gUnicodeToUTF8 != INVALID_ICONV_T) &&
|
---|
538 | (gUTF8ToNative != INVALID_ICONV_T)) {
|
---|
539 | const char *in = (const char *) *input;
|
---|
540 |
|
---|
541 | char ubuf[6]; // max utf-8 char length (really only needs to be 4 bytes)
|
---|
542 |
|
---|
543 | // convert one uchar at a time...
|
---|
544 | while (inLeft && outLeft) {
|
---|
545 | char *p = ubuf;
|
---|
546 | size_t n = sizeof(ubuf), one_uchar = sizeof(PRUnichar);
|
---|
547 | res = xp_iconv(gUnicodeToUTF8, &in, &one_uchar, &p, &n);
|
---|
548 | if (res == (size_t) -1) {
|
---|
549 | NS_ERROR("conversion from utf-16 to utf-8 failed");
|
---|
550 | break;
|
---|
551 | }
|
---|
552 | p = ubuf;
|
---|
553 | n = sizeof(ubuf) - n;
|
---|
554 | res = xp_iconv(gUTF8ToNative, (const char **) &p, &n, output, &outLeft);
|
---|
555 | if (res == (size_t) -1) {
|
---|
556 | if (errno == E2BIG) {
|
---|
557 | // not enough room for last uchar... back up and return.
|
---|
558 | in -= sizeof(PRUnichar);
|
---|
559 | res = 0;
|
---|
560 | }
|
---|
561 | else
|
---|
562 | NS_ERROR("conversion from utf-8 to native failed");
|
---|
563 | break;
|
---|
564 | }
|
---|
565 | inLeft -= sizeof(PRUnichar);
|
---|
566 | }
|
---|
567 |
|
---|
568 | if (res != (size_t) -1) {
|
---|
569 | (*input) += (*inputLeft - inLeft/2);
|
---|
570 | *inputLeft = inLeft/2;
|
---|
571 | *outputLeft = outLeft;
|
---|
572 | return NS_OK;
|
---|
573 | }
|
---|
574 |
|
---|
575 | // reset converters
|
---|
576 | xp_iconv_reset(gUnicodeToUTF8);
|
---|
577 | xp_iconv_reset(gUTF8ToNative);
|
---|
578 | }
|
---|
579 | #endif
|
---|
580 |
|
---|
581 | // fallback: truncate and hope for the best
|
---|
582 | utf16_to_isolatin1(input, inputLeft, output, outputLeft);
|
---|
583 |
|
---|
584 | return NS_OK;
|
---|
585 | }
|
---|
586 |
|
---|
587 | #endif // USE_ICONV
|
---|
588 |
|
---|
589 | //-----------------------------------------------------------------------------
|
---|
590 | // conversion using mb[r]towc/wc[r]tomb
|
---|
591 | //-----------------------------------------------------------------------------
|
---|
592 | #if defined(USE_STDCONV)
|
---|
593 | #if defined(HAVE_WCRTOMB) || defined(HAVE_MBRTOWC)
|
---|
594 | #include <wchar.h> // mbrtowc, wcrtomb
|
---|
595 | #endif
|
---|
596 |
|
---|
597 | class nsNativeCharsetConverter
|
---|
598 | {
|
---|
599 | public:
|
---|
600 | nsNativeCharsetConverter();
|
---|
601 |
|
---|
602 | nsresult NativeToUnicode(const char **input , PRUint32 *inputLeft,
|
---|
603 | PRUnichar **output, PRUint32 *outputLeft);
|
---|
604 | nsresult UnicodeToNative(const PRUnichar **input , PRUint32 *inputLeft,
|
---|
605 | char **output, PRUint32 *outputLeft);
|
---|
606 |
|
---|
607 | static void GlobalInit();
|
---|
608 | static void GlobalShutdown() { }
|
---|
609 |
|
---|
610 | private:
|
---|
611 | static PRBool gWCharIsUnicode;
|
---|
612 |
|
---|
613 | #if defined(HAVE_WCRTOMB) || defined(HAVE_MBRTOWC)
|
---|
614 | mbstate_t ps;
|
---|
615 | #endif
|
---|
616 | };
|
---|
617 |
|
---|
618 | PRBool nsNativeCharsetConverter::gWCharIsUnicode = PR_FALSE;
|
---|
619 |
|
---|
620 | nsNativeCharsetConverter::nsNativeCharsetConverter()
|
---|
621 | {
|
---|
622 | #if defined(HAVE_WCRTOMB) || defined(HAVE_MBRTOWC)
|
---|
623 | memset(&ps, 0, sizeof(ps));
|
---|
624 | #endif
|
---|
625 | }
|
---|
626 |
|
---|
627 | void
|
---|
628 | nsNativeCharsetConverter::GlobalInit()
|
---|
629 | {
|
---|
630 | // verify that wchar_t for the current locale is actually unicode.
|
---|
631 | // if it is not, then we should avoid calling mbtowc/wctomb and
|
---|
632 | // just fallback on zero-pad/truncation conversion.
|
---|
633 | //
|
---|
634 | // this test cannot be done at build time because the encoding of
|
---|
635 | // wchar_t may depend on the runtime locale. sad, but true!!
|
---|
636 | //
|
---|
637 | // so, if wchar_t is unicode then converting an ASCII character
|
---|
638 | // to wchar_t should not change its numeric value. we'll just
|
---|
639 | // check what happens with the ASCII 'a' character.
|
---|
640 | //
|
---|
641 | // this test is not perfect... obviously, it could yield false
|
---|
642 | // positives, but then at least ASCII text would be converted
|
---|
643 | // properly (or maybe just the 'a' character) -- oh well :(
|
---|
644 |
|
---|
645 | char a = 'a';
|
---|
646 | unsigned int w = 0;
|
---|
647 |
|
---|
648 | int res = mbtowc((wchar_t *) &w, &a, 1);
|
---|
649 |
|
---|
650 | gWCharIsUnicode = (res != -1 && w == 'a');
|
---|
651 |
|
---|
652 | #ifdef DEBUG
|
---|
653 | if (!gWCharIsUnicode)
|
---|
654 | NS_WARNING("wchar_t is not unicode (unicode conversion will be lossy)");
|
---|
655 | #endif
|
---|
656 | }
|
---|
657 |
|
---|
658 | nsresult
|
---|
659 | nsNativeCharsetConverter::NativeToUnicode(const char **input,
|
---|
660 | PRUint32 *inputLeft,
|
---|
661 | PRUnichar **output,
|
---|
662 | PRUint32 *outputLeft)
|
---|
663 | {
|
---|
664 | if (gWCharIsUnicode) {
|
---|
665 | int incr;
|
---|
666 |
|
---|
667 | // cannot use wchar_t here since it may have been redefined (e.g.,
|
---|
668 | // via -fshort-wchar). hopefully, sizeof(tmp) is sufficient XP.
|
---|
669 | unsigned int tmp = 0;
|
---|
670 | while (*inputLeft && *outputLeft) {
|
---|
671 | #ifdef HAVE_MBRTOWC
|
---|
672 | incr = (int) mbrtowc((wchar_t *) &tmp, *input, *inputLeft, &ps);
|
---|
673 | #else
|
---|
674 | // XXX is this thread-safe?
|
---|
675 | incr = (int) mbtowc((wchar_t *) &tmp, *input, *inputLeft);
|
---|
676 | #endif
|
---|
677 | if (incr < 0) {
|
---|
678 | NS_WARNING("mbtowc failed: possible charset mismatch");
|
---|
679 | // zero-pad and hope for the best
|
---|
680 | tmp = (unsigned char) **input;
|
---|
681 | incr = 1;
|
---|
682 | }
|
---|
683 | **output = (PRUnichar) tmp;
|
---|
684 | (*input) += incr;
|
---|
685 | (*inputLeft) -= incr;
|
---|
686 | (*output)++;
|
---|
687 | (*outputLeft)--;
|
---|
688 | }
|
---|
689 | }
|
---|
690 | else {
|
---|
691 | // wchar_t isn't unicode, so the best we can do is treat the
|
---|
692 | // input as if it is isolatin1 :(
|
---|
693 | isolatin1_to_utf16(input, inputLeft, output, outputLeft);
|
---|
694 | }
|
---|
695 |
|
---|
696 | return NS_OK;
|
---|
697 | }
|
---|
698 |
|
---|
699 | nsresult
|
---|
700 | nsNativeCharsetConverter::UnicodeToNative(const PRUnichar **input,
|
---|
701 | PRUint32 *inputLeft,
|
---|
702 | char **output,
|
---|
703 | PRUint32 *outputLeft)
|
---|
704 | {
|
---|
705 | if (gWCharIsUnicode) {
|
---|
706 | int incr;
|
---|
707 |
|
---|
708 | while (*inputLeft && *outputLeft >= MB_CUR_MAX) {
|
---|
709 | #ifdef HAVE_WCRTOMB
|
---|
710 | incr = (int) wcrtomb(*output, (wchar_t) **input, &ps);
|
---|
711 | #else
|
---|
712 | // XXX is this thread-safe?
|
---|
713 | incr = (int) wctomb(*output, (wchar_t) **input);
|
---|
714 | #endif
|
---|
715 | if (incr < 0) {
|
---|
716 | NS_WARNING("mbtowc failed: possible charset mismatch");
|
---|
717 | **output = (unsigned char) **input; // truncate
|
---|
718 | incr = 1;
|
---|
719 | }
|
---|
720 | // most likely we're dead anyways if this assertion should fire
|
---|
721 | NS_ASSERTION(PRUint32(incr) <= *outputLeft, "wrote beyond end of string");
|
---|
722 | (*output) += incr;
|
---|
723 | (*outputLeft) -= incr;
|
---|
724 | (*input)++;
|
---|
725 | (*inputLeft)--;
|
---|
726 | }
|
---|
727 | }
|
---|
728 | else {
|
---|
729 | // wchar_t isn't unicode, so the best we can do is treat the
|
---|
730 | // input as if it is isolatin1 :(
|
---|
731 | utf16_to_isolatin1(input, inputLeft, output, outputLeft);
|
---|
732 | }
|
---|
733 |
|
---|
734 | return NS_OK;
|
---|
735 | }
|
---|
736 |
|
---|
737 | #endif // USE_STDCONV
|
---|
738 |
|
---|
739 | //-----------------------------------------------------------------------------
|
---|
740 | // API implementation
|
---|
741 | //-----------------------------------------------------------------------------
|
---|
742 |
|
---|
743 | NS_COM nsresult
|
---|
744 | NS_CopyNativeToUnicode(const nsACString &input, nsAString &output)
|
---|
745 | {
|
---|
746 | output.Truncate();
|
---|
747 |
|
---|
748 | PRUint32 inputLen = input.Length();
|
---|
749 |
|
---|
750 | nsACString::const_iterator iter;
|
---|
751 | input.BeginReading(iter);
|
---|
752 |
|
---|
753 | //
|
---|
754 | // OPTIMIZATION: preallocate space for largest possible result; convert
|
---|
755 | // directly into the result buffer to avoid intermediate buffer copy.
|
---|
756 | //
|
---|
757 | // this will generally result in a larger allocation, but that seems
|
---|
758 | // better than an extra buffer copy.
|
---|
759 | //
|
---|
760 | output.SetLength(inputLen);
|
---|
761 | nsAString::iterator out_iter;
|
---|
762 | output.BeginWriting(out_iter);
|
---|
763 |
|
---|
764 | PRUnichar *result = out_iter.get();
|
---|
765 | PRUint32 resultLeft = inputLen;
|
---|
766 |
|
---|
767 | const char *buf = iter.get();
|
---|
768 | PRUint32 bufLeft = inputLen;
|
---|
769 |
|
---|
770 | nsNativeCharsetConverter conv;
|
---|
771 | nsresult rv = conv.NativeToUnicode(&buf, &bufLeft, &result, &resultLeft);
|
---|
772 | if (NS_SUCCEEDED(rv)) {
|
---|
773 | NS_ASSERTION(bufLeft == 0, "did not consume entire input buffer");
|
---|
774 | output.SetLength(inputLen - resultLeft);
|
---|
775 | }
|
---|
776 | return rv;
|
---|
777 | }
|
---|
778 |
|
---|
779 | NS_COM nsresult
|
---|
780 | NS_CopyUnicodeToNative(const nsAString &input, nsACString &output)
|
---|
781 | {
|
---|
782 | output.Truncate();
|
---|
783 |
|
---|
784 | nsAString::const_iterator iter, end;
|
---|
785 | input.BeginReading(iter);
|
---|
786 | input.EndReading(end);
|
---|
787 |
|
---|
788 | // cannot easily avoid intermediate buffer copy.
|
---|
789 | char temp[4096];
|
---|
790 |
|
---|
791 | nsNativeCharsetConverter conv;
|
---|
792 |
|
---|
793 | const PRUnichar *buf = iter.get();
|
---|
794 | PRUint32 bufLeft = Distance(iter, end);
|
---|
795 | while (bufLeft) {
|
---|
796 | char *p = temp;
|
---|
797 | PRUint32 tempLeft = sizeof(temp);
|
---|
798 |
|
---|
799 | nsresult rv = conv.UnicodeToNative(&buf, &bufLeft, &p, &tempLeft);
|
---|
800 | if (NS_FAILED(rv)) return rv;
|
---|
801 |
|
---|
802 | if (tempLeft < sizeof(temp))
|
---|
803 | output.Append(temp, sizeof(temp) - tempLeft);
|
---|
804 | }
|
---|
805 | return NS_OK;
|
---|
806 | }
|
---|
807 |
|
---|
808 | void
|
---|
809 | NS_StartupNativeCharsetUtils()
|
---|
810 | {
|
---|
811 | //
|
---|
812 | // need to initialize the locale or else charset conversion will fail.
|
---|
813 | // better not delay this in case some other component alters the locale
|
---|
814 | // settings.
|
---|
815 | //
|
---|
816 | // XXX we assume that we are called early enough that we should
|
---|
817 | // always be the first to care about the locale's charset.
|
---|
818 | //
|
---|
819 | setlocale(LC_CTYPE, "");
|
---|
820 |
|
---|
821 | nsNativeCharsetConverter::GlobalInit();
|
---|
822 | }
|
---|
823 |
|
---|
824 | void
|
---|
825 | NS_ShutdownNativeCharsetUtils()
|
---|
826 | {
|
---|
827 | nsNativeCharsetConverter::GlobalShutdown();
|
---|
828 | }
|
---|
829 |
|
---|
830 | //-----------------------------------------------------------------------------
|
---|
831 | // XP_BEOS
|
---|
832 | //-----------------------------------------------------------------------------
|
---|
833 | #elif defined(XP_BEOS)
|
---|
834 |
|
---|
835 | #include "nsAString.h"
|
---|
836 | #include "nsReadableUtils.h"
|
---|
837 | #include "nsString.h"
|
---|
838 |
|
---|
839 | NS_COM nsresult
|
---|
840 | NS_CopyNativeToUnicode(const nsACString &input, nsAString &output)
|
---|
841 | {
|
---|
842 | CopyUTF8toUTF16(input, output);
|
---|
843 | return NS_OK;
|
---|
844 | }
|
---|
845 |
|
---|
846 | NS_COM nsresult
|
---|
847 | NS_CopyUnicodeToNative(const nsAString &input, nsACString &output)
|
---|
848 | {
|
---|
849 | CopyUTF16toUTF8(input, output);
|
---|
850 | return NS_OK;
|
---|
851 | }
|
---|
852 |
|
---|
853 | void
|
---|
854 | NS_StartupNativeCharsetUtils()
|
---|
855 | {
|
---|
856 | }
|
---|
857 |
|
---|
858 | void
|
---|
859 | NS_ShutdownNativeCharsetUtils()
|
---|
860 | {
|
---|
861 | }
|
---|
862 |
|
---|
863 | //-----------------------------------------------------------------------------
|
---|
864 | // XP_WIN
|
---|
865 | //-----------------------------------------------------------------------------
|
---|
866 | #elif defined(XP_WIN)
|
---|
867 |
|
---|
868 | #include <windows.h>
|
---|
869 | #include "nsAString.h"
|
---|
870 |
|
---|
871 | NS_COM nsresult
|
---|
872 | NS_CopyNativeToUnicode(const nsACString &input, nsAString &output)
|
---|
873 | {
|
---|
874 | PRUint32 inputLen = input.Length();
|
---|
875 |
|
---|
876 | nsACString::const_iterator iter;
|
---|
877 | input.BeginReading(iter);
|
---|
878 |
|
---|
879 | const char *buf = iter.get();
|
---|
880 |
|
---|
881 | // determine length of result
|
---|
882 | PRUint32 resultLen = 0;
|
---|
883 | int n = ::MultiByteToWideChar(CP_ACP, 0, buf, inputLen, NULL, 0);
|
---|
884 | if (n > 0)
|
---|
885 | resultLen += n;
|
---|
886 |
|
---|
887 | // allocate sufficient space
|
---|
888 | output.SetLength(resultLen);
|
---|
889 | if (resultLen > 0) {
|
---|
890 | nsAString::iterator out_iter;
|
---|
891 | output.BeginWriting(out_iter);
|
---|
892 |
|
---|
893 | PRUnichar *result = out_iter.get();
|
---|
894 |
|
---|
895 | ::MultiByteToWideChar(CP_ACP, 0, buf, inputLen, result, resultLen);
|
---|
896 | }
|
---|
897 | return NS_OK;
|
---|
898 | }
|
---|
899 |
|
---|
900 | NS_COM nsresult
|
---|
901 | NS_CopyUnicodeToNative(const nsAString &input, nsACString &output)
|
---|
902 | {
|
---|
903 | PRUint32 inputLen = input.Length();
|
---|
904 |
|
---|
905 | nsAString::const_iterator iter;
|
---|
906 | input.BeginReading(iter);
|
---|
907 |
|
---|
908 | const PRUnichar *buf = iter.get();
|
---|
909 |
|
---|
910 | // determine length of result
|
---|
911 | PRUint32 resultLen = 0;
|
---|
912 |
|
---|
913 | int n = ::WideCharToMultiByte(CP_ACP, 0, buf, inputLen, NULL, 0, NULL, NULL);
|
---|
914 | if (n > 0)
|
---|
915 | resultLen += n;
|
---|
916 |
|
---|
917 | // allocate sufficient space
|
---|
918 | output.SetLength(resultLen);
|
---|
919 | if (resultLen > 0) {
|
---|
920 | nsACString::iterator out_iter;
|
---|
921 | output.BeginWriting(out_iter);
|
---|
922 |
|
---|
923 | // default "defaultChar" is '?', which is an illegal character on windows
|
---|
924 | // file system. That will cause file uncreatable. Change it to '_'
|
---|
925 | const char defaultChar = '_';
|
---|
926 |
|
---|
927 | char *result = out_iter.get();
|
---|
928 |
|
---|
929 | ::WideCharToMultiByte(CP_ACP, 0, buf, inputLen, result, resultLen,
|
---|
930 | &defaultChar, NULL);
|
---|
931 | }
|
---|
932 | return NS_OK;
|
---|
933 | }
|
---|
934 |
|
---|
935 | void
|
---|
936 | NS_StartupNativeCharsetUtils()
|
---|
937 | {
|
---|
938 | }
|
---|
939 |
|
---|
940 | void
|
---|
941 | NS_ShutdownNativeCharsetUtils()
|
---|
942 | {
|
---|
943 | }
|
---|
944 |
|
---|
945 | //-----------------------------------------------------------------------------
|
---|
946 | // XP_OS2
|
---|
947 | //-----------------------------------------------------------------------------
|
---|
948 | #elif defined(XP_OS2)
|
---|
949 |
|
---|
950 | #define INCL_DOS
|
---|
951 | #include <os2.h>
|
---|
952 | #include <uconv.h>
|
---|
953 | #include "nsAString.h"
|
---|
954 | #include <ulserrno.h>
|
---|
955 | #include "nsNativeCharsetUtils.h"
|
---|
956 |
|
---|
957 | static UconvObject UnicodeConverter = NULL;
|
---|
958 |
|
---|
959 | NS_COM nsresult
|
---|
960 | NS_CopyNativeToUnicode(const nsACString &input, nsAString &output)
|
---|
961 | {
|
---|
962 | PRUint32 inputLen = input.Length();
|
---|
963 |
|
---|
964 | nsACString::const_iterator iter;
|
---|
965 | input.BeginReading(iter);
|
---|
966 | const char *inputStr = iter.get();
|
---|
967 |
|
---|
968 | // determine length of result
|
---|
969 | PRUint32 resultLen = inputLen;
|
---|
970 | output.SetLength(resultLen);
|
---|
971 |
|
---|
972 | nsAString::iterator out_iter;
|
---|
973 | output.BeginWriting(out_iter);
|
---|
974 | UniChar *result = (UniChar*)out_iter.get();
|
---|
975 |
|
---|
976 | size_t cSubs = 0;
|
---|
977 | size_t resultLeft = resultLen;
|
---|
978 |
|
---|
979 | if (!UnicodeConverter)
|
---|
980 | NS_StartupNativeCharsetUtils();
|
---|
981 |
|
---|
982 | int unirc = ::UniUconvToUcs(UnicodeConverter, (void**)&inputStr, &inputLen,
|
---|
983 | &result, &resultLeft, &cSubs);
|
---|
984 |
|
---|
985 | NS_ASSERTION(unirc != UCONV_E2BIG, "Path too big");
|
---|
986 |
|
---|
987 | if (unirc != ULS_SUCCESS) {
|
---|
988 | output.Truncate();
|
---|
989 | return NS_ERROR_FAILURE;
|
---|
990 | }
|
---|
991 |
|
---|
992 | // Need to update string length to reflect how many bytes were actually
|
---|
993 | // written.
|
---|
994 | output.Truncate(resultLen - resultLeft);
|
---|
995 | return NS_OK;
|
---|
996 | }
|
---|
997 |
|
---|
998 | NS_COM nsresult
|
---|
999 | NS_CopyUnicodeToNative(const nsAString &input, nsACString &output)
|
---|
1000 | {
|
---|
1001 | size_t inputLen = input.Length();
|
---|
1002 |
|
---|
1003 | nsAString::const_iterator iter;
|
---|
1004 | input.BeginReading(iter);
|
---|
1005 | UniChar* inputStr = (UniChar*) NS_CONST_CAST(PRUnichar*, iter.get());
|
---|
1006 |
|
---|
1007 | // maximum length of unicode string of length x converted to native
|
---|
1008 | // codepage is x*2
|
---|
1009 | size_t resultLen = inputLen * 2;
|
---|
1010 | output.SetLength(resultLen);
|
---|
1011 |
|
---|
1012 | nsACString::iterator out_iter;
|
---|
1013 | output.BeginWriting(out_iter);
|
---|
1014 | char *result = out_iter.get();
|
---|
1015 |
|
---|
1016 | size_t cSubs = 0;
|
---|
1017 | size_t resultLeft = resultLen;
|
---|
1018 |
|
---|
1019 | if (!UnicodeConverter)
|
---|
1020 | NS_StartupNativeCharsetUtils();
|
---|
1021 |
|
---|
1022 | int unirc = ::UniUconvFromUcs(UnicodeConverter, &inputStr, &inputLen,
|
---|
1023 | (void**)&result, &resultLeft, &cSubs);
|
---|
1024 |
|
---|
1025 | NS_ASSERTION(unirc != UCONV_E2BIG, "Path too big");
|
---|
1026 |
|
---|
1027 | if (unirc != ULS_SUCCESS) {
|
---|
1028 | output.Truncate();
|
---|
1029 | return NS_ERROR_FAILURE;
|
---|
1030 | }
|
---|
1031 |
|
---|
1032 | // Need to update string length to reflect how many bytes were actually
|
---|
1033 | // written.
|
---|
1034 | output.Truncate(resultLen - resultLeft);
|
---|
1035 | return NS_OK;
|
---|
1036 | }
|
---|
1037 |
|
---|
1038 | void
|
---|
1039 | NS_StartupNativeCharsetUtils()
|
---|
1040 | {
|
---|
1041 | ULONG ulLength;
|
---|
1042 | ULONG ulCodePage;
|
---|
1043 | DosQueryCp(sizeof(ULONG), &ulCodePage, &ulLength);
|
---|
1044 |
|
---|
1045 | UniChar codepage[20];
|
---|
1046 | int unirc = ::UniMapCpToUcsCp(ulCodePage, codepage, 20);
|
---|
1047 | if (unirc == ULS_SUCCESS) {
|
---|
1048 | unirc = ::UniCreateUconvObject(codepage, &UnicodeConverter);
|
---|
1049 | if (unirc == ULS_SUCCESS) {
|
---|
1050 | uconv_attribute_t attr;
|
---|
1051 | ::UniQueryUconvObject(UnicodeConverter, &attr, sizeof(uconv_attribute_t),
|
---|
1052 | NULL, NULL, NULL);
|
---|
1053 | attr.options = UCONV_OPTION_SUBSTITUTE_BOTH;
|
---|
1054 | attr.subchar_len=1;
|
---|
1055 | attr.subchar[0]='_';
|
---|
1056 | ::UniSetUconvObject(UnicodeConverter, &attr);
|
---|
1057 | }
|
---|
1058 | }
|
---|
1059 | }
|
---|
1060 |
|
---|
1061 | void
|
---|
1062 | NS_ShutdownNativeCharsetUtils()
|
---|
1063 | {
|
---|
1064 | ::UniFreeUconvObject(UnicodeConverter);
|
---|
1065 | }
|
---|
1066 |
|
---|
1067 | //-----------------------------------------------------------------------------
|
---|
1068 | // XP_MAC
|
---|
1069 | //-----------------------------------------------------------------------------
|
---|
1070 | #elif defined(XP_MAC)
|
---|
1071 |
|
---|
1072 | #include <UnicodeConverter.h>
|
---|
1073 | #include <TextCommon.h>
|
---|
1074 | #include <Script.h>
|
---|
1075 | #include <MacErrors.h>
|
---|
1076 | #include "nsAString.h"
|
---|
1077 |
|
---|
1078 | class nsFSStringConversionMac {
|
---|
1079 | public:
|
---|
1080 | static nsresult UCSToFS(const nsAString& aIn, nsACString& aOut);
|
---|
1081 | static nsresult FSToUCS(const nsACString& ain, nsAString& aOut);
|
---|
1082 |
|
---|
1083 | static void CleanUp();
|
---|
1084 |
|
---|
1085 | private:
|
---|
1086 | static TextEncoding GetSystemEncoding();
|
---|
1087 | static nsresult PrepareEncoder();
|
---|
1088 | static nsresult PrepareDecoder();
|
---|
1089 |
|
---|
1090 | static UnicodeToTextInfo sEncoderInfo;
|
---|
1091 | static TextToUnicodeInfo sDecoderInfo;
|
---|
1092 | };
|
---|
1093 |
|
---|
1094 | UnicodeToTextInfo nsFSStringConversionMac::sEncoderInfo = nsnull;
|
---|
1095 | TextToUnicodeInfo nsFSStringConversionMac::sDecoderInfo = nsnull;
|
---|
1096 |
|
---|
1097 | nsresult nsFSStringConversionMac::UCSToFS(const nsAString& aIn, nsACString& aOut)
|
---|
1098 | {
|
---|
1099 | nsresult rv = PrepareEncoder();
|
---|
1100 | if (NS_FAILED(rv)) return rv;
|
---|
1101 |
|
---|
1102 | OSStatus err = noErr;
|
---|
1103 | char stackBuffer[512];
|
---|
1104 |
|
---|
1105 | aOut.Truncate();
|
---|
1106 |
|
---|
1107 | // for each chunk of |aIn|...
|
---|
1108 | nsReadingIterator<PRUnichar> iter;
|
---|
1109 | aIn.BeginReading(iter);
|
---|
1110 |
|
---|
1111 | PRUint32 fragmentLength = PRUint32(iter.size_forward());
|
---|
1112 | UInt32 bytesLeft = fragmentLength * sizeof(UniChar);
|
---|
1113 |
|
---|
1114 | do {
|
---|
1115 | UInt32 bytesRead = 0, bytesWritten = 0;
|
---|
1116 | err = ::ConvertFromUnicodeToText(sEncoderInfo,
|
---|
1117 | bytesLeft,
|
---|
1118 | (const UniChar*)iter.get(),
|
---|
1119 | kUnicodeUseFallbacksMask | kUnicodeLooseMappingsMask,
|
---|
1120 | 0, nsnull, nsnull, nsnull,
|
---|
1121 | sizeof(stackBuffer),
|
---|
1122 | &bytesRead,
|
---|
1123 | &bytesWritten,
|
---|
1124 | stackBuffer);
|
---|
1125 | if (err == kTECUsedFallbacksStatus)
|
---|
1126 | err = noErr;
|
---|
1127 | else if (err == kTECOutputBufferFullStatus) {
|
---|
1128 | bytesLeft -= bytesRead;
|
---|
1129 | iter.advance(bytesRead / sizeof(UniChar));
|
---|
1130 | }
|
---|
1131 | aOut.Append(stackBuffer, bytesWritten);
|
---|
1132 | }
|
---|
1133 | while (err == kTECOutputBufferFullStatus);
|
---|
1134 |
|
---|
1135 | return (err == noErr) ? NS_OK : NS_ERROR_FAILURE;
|
---|
1136 | }
|
---|
1137 |
|
---|
1138 | nsresult nsFSStringConversionMac::FSToUCS(const nsACString& aIn, nsAString& aOut)
|
---|
1139 | {
|
---|
1140 | nsresult rv = PrepareDecoder();
|
---|
1141 | if (NS_FAILED(rv)) return rv;
|
---|
1142 |
|
---|
1143 | OSStatus err = noErr;
|
---|
1144 | UniChar stackBuffer[512];
|
---|
1145 |
|
---|
1146 | aOut.Truncate(0);
|
---|
1147 |
|
---|
1148 | // for each chunk of |aIn|...
|
---|
1149 | nsReadingIterator<char> iter;
|
---|
1150 | aIn.BeginReading(iter);
|
---|
1151 |
|
---|
1152 | PRUint32 fragmentLength = PRUint32(iter.size_forward());
|
---|
1153 | UInt32 bytesLeft = fragmentLength;
|
---|
1154 |
|
---|
1155 | do {
|
---|
1156 | UInt32 bytesRead = 0, bytesWritten = 0;
|
---|
1157 | err = ::ConvertFromTextToUnicode(sDecoderInfo,
|
---|
1158 | bytesLeft,
|
---|
1159 | iter.get(),
|
---|
1160 | kUnicodeUseFallbacksMask | kUnicodeLooseMappingsMask,
|
---|
1161 | 0, nsnull, nsnull, nsnull,
|
---|
1162 | sizeof(stackBuffer),
|
---|
1163 | &bytesRead,
|
---|
1164 | &bytesWritten,
|
---|
1165 | stackBuffer);
|
---|
1166 | if (err == kTECUsedFallbacksStatus)
|
---|
1167 | err = noErr;
|
---|
1168 | else if (err == kTECOutputBufferFullStatus) {
|
---|
1169 | bytesLeft -= bytesRead;
|
---|
1170 | iter.advance(bytesRead);
|
---|
1171 | }
|
---|
1172 | aOut.Append((PRUnichar *)stackBuffer, bytesWritten / sizeof(PRUnichar));
|
---|
1173 | }
|
---|
1174 | while (err == kTECOutputBufferFullStatus);
|
---|
1175 |
|
---|
1176 | return (err == noErr) ? NS_OK : NS_ERROR_FAILURE;
|
---|
1177 | }
|
---|
1178 |
|
---|
1179 | void nsFSStringConversionMac::CleanUp()
|
---|
1180 | {
|
---|
1181 | if (sDecoderInfo) {
|
---|
1182 | ::DisposeTextToUnicodeInfo(&sDecoderInfo);
|
---|
1183 | sDecoderInfo = nsnull;
|
---|
1184 | }
|
---|
1185 | if (sEncoderInfo) {
|
---|
1186 | ::DisposeUnicodeToTextInfo(&sEncoderInfo);
|
---|
1187 | sEncoderInfo = nsnull;
|
---|
1188 | }
|
---|
1189 | }
|
---|
1190 |
|
---|
1191 | TextEncoding nsFSStringConversionMac::GetSystemEncoding()
|
---|
1192 | {
|
---|
1193 | OSStatus err;
|
---|
1194 | TextEncoding theEncoding;
|
---|
1195 |
|
---|
1196 | err = ::UpgradeScriptInfoToTextEncoding(smSystemScript, kTextLanguageDontCare,
|
---|
1197 | kTextRegionDontCare, NULL, &theEncoding);
|
---|
1198 |
|
---|
1199 | if (err != noErr)
|
---|
1200 | theEncoding = kTextEncodingMacRoman;
|
---|
1201 |
|
---|
1202 | return theEncoding;
|
---|
1203 | }
|
---|
1204 |
|
---|
1205 | nsresult nsFSStringConversionMac::PrepareEncoder()
|
---|
1206 | {
|
---|
1207 | nsresult rv = NS_OK;
|
---|
1208 | if (!sEncoderInfo) {
|
---|
1209 | OSStatus err;
|
---|
1210 | err = ::CreateUnicodeToTextInfoByEncoding(GetSystemEncoding(), &sEncoderInfo);
|
---|
1211 | if (err)
|
---|
1212 | rv = NS_ERROR_FAILURE;
|
---|
1213 | }
|
---|
1214 | return rv;
|
---|
1215 | }
|
---|
1216 |
|
---|
1217 | nsresult nsFSStringConversionMac::PrepareDecoder()
|
---|
1218 | {
|
---|
1219 | nsresult rv = NS_OK;
|
---|
1220 | if (!sDecoderInfo) {
|
---|
1221 | OSStatus err;
|
---|
1222 | err = ::CreateTextToUnicodeInfoByEncoding(GetSystemEncoding(), &sDecoderInfo);
|
---|
1223 | if (err)
|
---|
1224 | rv = NS_ERROR_FAILURE;
|
---|
1225 | }
|
---|
1226 | return rv;
|
---|
1227 | }
|
---|
1228 |
|
---|
1229 | NS_COM nsresult
|
---|
1230 | NS_CopyNativeToUnicode(const nsACString &input, nsAString &output)
|
---|
1231 | {
|
---|
1232 | return nsFSStringConversionMac::FSToUCS(input, output);
|
---|
1233 | }
|
---|
1234 |
|
---|
1235 | NS_COM nsresult
|
---|
1236 | NS_CopyUnicodeToNative(const nsAString &input, nsACString &output)
|
---|
1237 | {
|
---|
1238 | return nsFSStringConversionMac::UCSToFS(input, output);
|
---|
1239 | }
|
---|
1240 |
|
---|
1241 | void
|
---|
1242 | NS_StartupNativeCharsetUtils()
|
---|
1243 | {
|
---|
1244 | }
|
---|
1245 |
|
---|
1246 | void
|
---|
1247 | NS_ShutdownNativeCharsetUtils()
|
---|
1248 | {
|
---|
1249 | nsFSStringConversionMac::CleanUp();
|
---|
1250 | }
|
---|
1251 |
|
---|
1252 | //-----------------------------------------------------------------------------
|
---|
1253 | // default : truncate/zeropad
|
---|
1254 | //-----------------------------------------------------------------------------
|
---|
1255 | #else
|
---|
1256 |
|
---|
1257 | #include "nsReadableUtils.h"
|
---|
1258 |
|
---|
1259 | NS_COM nsresult
|
---|
1260 | NS_CopyNativeToUnicode(const nsACString &input, nsAString &output)
|
---|
1261 | {
|
---|
1262 | CopyASCIItoUCS2(input, output);
|
---|
1263 | return NS_OK;
|
---|
1264 | }
|
---|
1265 |
|
---|
1266 | NS_COM nsresult
|
---|
1267 | NS_CopyUnicodeToNative(const nsAString &input, nsACString &output)
|
---|
1268 | {
|
---|
1269 | CopyUCS2toASCII(input, output);
|
---|
1270 | return NS_OK;
|
---|
1271 | }
|
---|
1272 |
|
---|
1273 | void
|
---|
1274 | NS_StartupNativeCharsetUtils()
|
---|
1275 | {
|
---|
1276 | }
|
---|
1277 |
|
---|
1278 | void
|
---|
1279 | NS_ShutdownNativeCharsetUtils()
|
---|
1280 | {
|
---|
1281 | }
|
---|
1282 |
|
---|
1283 | #endif
|
---|
1284 |
|
---|