VirtualBox

source: vbox/trunk/src/VBox/Additions/WINNT/Graphics/Wine/libWine/wctomb.c@ 20227

Last change on this file since 20227 was 16477, checked in by vboxsync, 16 years ago

LGPL disclaimer by filemuncher

  • Property svn:eol-style set to native
File size: 17.7 KB
Line 
1/*
2 * WideCharToMultiByte implementation
3 *
4 * Copyright 2000 Alexandre Julliard
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
19 */
20
21/*
22 * Sun LGPL Disclaimer: For the avoidance of doubt, except that if any license choice
23 * other than GPL or LGPL is available it will apply instead, Sun elects to use only
24 * the Lesser General Public License version 2.1 (LGPLv2) at this time for any software where
25 * a choice of LGPL license versions is made available with the language indicating
26 * that LGPLv2 or any later version may be used, or where a choice of which version
27 * of the LGPL is applied is otherwise unspecified.
28 */
29
30#include <string.h>
31
32#include "wine/unicode.h"
33
34/* search for a character in the unicode_compose_table; helper for compose() */
35static inline int binary_search( WCHAR ch, int low, int high )
36{
37 extern const WCHAR unicode_compose_table[];
38 while (low <= high)
39 {
40 int pos = (low + high) / 2;
41 if (unicode_compose_table[2*pos] < ch)
42 {
43 low = pos + 1;
44 continue;
45 }
46 if (unicode_compose_table[2*pos] > ch)
47 {
48 high = pos - 1;
49 continue;
50 }
51 return pos;
52 }
53 return -1;
54}
55
56/* return the result of the composition of two Unicode chars, or 0 if none */
57WCHAR compose( const WCHAR *str )
58{
59 extern const WCHAR unicode_compose_table[];
60 extern const unsigned int unicode_compose_table_size;
61
62 int idx = 1, low = 0, high = unicode_compose_table_size - 1;
63 for (;;)
64 {
65 int pos = binary_search( str[idx], low, high );
66 if (pos == -1) return 0;
67 if (!idx--) return unicode_compose_table[2*pos+1];
68 low = unicode_compose_table[2*pos+1];
69 high = unicode_compose_table[2*pos+3] - 1;
70 }
71}
72
73
74/****************************************************************/
75/* sbcs support */
76
77/* check if 'ch' is an acceptable sbcs mapping for 'wch' */
78static inline int is_valid_sbcs_mapping( const struct sbcs_table *table, int flags,
79 WCHAR wch, unsigned char ch )
80{
81 if (flags & WC_NO_BEST_FIT_CHARS) return (table->cp2uni[ch] == wch);
82 if (ch != (unsigned char)table->info.def_char) return 1;
83 return (wch == table->info.def_unicode_char);
84}
85
86/* query necessary dst length for src string */
87static int get_length_sbcs( const struct sbcs_table *table, int flags,
88 const WCHAR *src, unsigned int srclen, int *used )
89{
90 const unsigned char * const uni2cp_low = table->uni2cp_low;
91 const unsigned short * const uni2cp_high = table->uni2cp_high;
92 int ret, tmp;
93 WCHAR composed;
94
95 if (!used) used = &tmp; /* avoid checking on every char */
96 *used = 0;
97
98 for (ret = 0; srclen; ret++, src++, srclen--)
99 {
100 WCHAR wch = *src;
101 unsigned char ch;
102
103 if ((flags & WC_COMPOSITECHECK) && (srclen > 1) && (composed = compose(src)))
104 {
105 /* now check if we can use the composed char */
106 ch = uni2cp_low[uni2cp_high[composed >> 8] + (composed & 0xff)];
107 if (is_valid_sbcs_mapping( table, flags, composed, ch ))
108 {
109 /* we have a good mapping, use it */
110 src++;
111 srclen--;
112 continue;
113 }
114 /* no mapping for the composed char, check the other flags */
115 if (flags & WC_DEFAULTCHAR) /* use the default char instead */
116 {
117 *used = 1;
118 src++; /* skip the non-spacing char */
119 srclen--;
120 continue;
121 }
122 if (flags & WC_DISCARDNS) /* skip the second char of the composition */
123 {
124 src++;
125 srclen--;
126 }
127 /* WC_SEPCHARS is the default */
128 }
129 if (!*used)
130 {
131 ch = uni2cp_low[uni2cp_high[wch >> 8] + (wch & 0xff)];
132 *used = !is_valid_sbcs_mapping( table, flags, wch, ch );
133 }
134 }
135 return ret;
136}
137
138/* wcstombs for single-byte code page */
139static inline int wcstombs_sbcs( const struct sbcs_table *table,
140 const WCHAR *src, unsigned int srclen,
141 char *dst, unsigned int dstlen )
142{
143 const unsigned char * const uni2cp_low = table->uni2cp_low;
144 const unsigned short * const uni2cp_high = table->uni2cp_high;
145 int ret = srclen;
146
147 if (dstlen < srclen)
148 {
149 /* buffer too small: fill it up to dstlen and return error */
150 srclen = dstlen;
151 ret = -1;
152 }
153
154 while (srclen >= 16)
155 {
156 dst[0] = uni2cp_low[uni2cp_high[src[0] >> 8] + (src[0] & 0xff)];
157 dst[1] = uni2cp_low[uni2cp_high[src[1] >> 8] + (src[1] & 0xff)];
158 dst[2] = uni2cp_low[uni2cp_high[src[2] >> 8] + (src[2] & 0xff)];
159 dst[3] = uni2cp_low[uni2cp_high[src[3] >> 8] + (src[3] & 0xff)];
160 dst[4] = uni2cp_low[uni2cp_high[src[4] >> 8] + (src[4] & 0xff)];
161 dst[5] = uni2cp_low[uni2cp_high[src[5] >> 8] + (src[5] & 0xff)];
162 dst[6] = uni2cp_low[uni2cp_high[src[6] >> 8] + (src[6] & 0xff)];
163 dst[7] = uni2cp_low[uni2cp_high[src[7] >> 8] + (src[7] & 0xff)];
164 dst[8] = uni2cp_low[uni2cp_high[src[8] >> 8] + (src[8] & 0xff)];
165 dst[9] = uni2cp_low[uni2cp_high[src[9] >> 8] + (src[9] & 0xff)];
166 dst[10] = uni2cp_low[uni2cp_high[src[10] >> 8] + (src[10] & 0xff)];
167 dst[11] = uni2cp_low[uni2cp_high[src[11] >> 8] + (src[11] & 0xff)];
168 dst[12] = uni2cp_low[uni2cp_high[src[12] >> 8] + (src[12] & 0xff)];
169 dst[13] = uni2cp_low[uni2cp_high[src[13] >> 8] + (src[13] & 0xff)];
170 dst[14] = uni2cp_low[uni2cp_high[src[14] >> 8] + (src[14] & 0xff)];
171 dst[15] = uni2cp_low[uni2cp_high[src[15] >> 8] + (src[15] & 0xff)];
172 src += 16;
173 dst += 16;
174 srclen -= 16;
175 }
176
177 /* now handle remaining characters */
178 src += srclen;
179 dst += srclen;
180 switch(srclen)
181 {
182 case 15: dst[-15] = uni2cp_low[uni2cp_high[src[-15] >> 8] + (src[-15] & 0xff)];
183 case 14: dst[-14] = uni2cp_low[uni2cp_high[src[-14] >> 8] + (src[-14] & 0xff)];
184 case 13: dst[-13] = uni2cp_low[uni2cp_high[src[-13] >> 8] + (src[-13] & 0xff)];
185 case 12: dst[-12] = uni2cp_low[uni2cp_high[src[-12] >> 8] + (src[-12] & 0xff)];
186 case 11: dst[-11] = uni2cp_low[uni2cp_high[src[-11] >> 8] + (src[-11] & 0xff)];
187 case 10: dst[-10] = uni2cp_low[uni2cp_high[src[-10] >> 8] + (src[-10] & 0xff)];
188 case 9: dst[-9] = uni2cp_low[uni2cp_high[src[-9] >> 8] + (src[-9] & 0xff)];
189 case 8: dst[-8] = uni2cp_low[uni2cp_high[src[-8] >> 8] + (src[-8] & 0xff)];
190 case 7: dst[-7] = uni2cp_low[uni2cp_high[src[-7] >> 8] + (src[-7] & 0xff)];
191 case 6: dst[-6] = uni2cp_low[uni2cp_high[src[-6] >> 8] + (src[-6] & 0xff)];
192 case 5: dst[-5] = uni2cp_low[uni2cp_high[src[-5] >> 8] + (src[-5] & 0xff)];
193 case 4: dst[-4] = uni2cp_low[uni2cp_high[src[-4] >> 8] + (src[-4] & 0xff)];
194 case 3: dst[-3] = uni2cp_low[uni2cp_high[src[-3] >> 8] + (src[-3] & 0xff)];
195 case 2: dst[-2] = uni2cp_low[uni2cp_high[src[-2] >> 8] + (src[-2] & 0xff)];
196 case 1: dst[-1] = uni2cp_low[uni2cp_high[src[-1] >> 8] + (src[-1] & 0xff)];
197 case 0: break;
198 }
199 return ret;
200}
201
202/* slow version of wcstombs_sbcs that handles the various flags */
203static int wcstombs_sbcs_slow( const struct sbcs_table *table, int flags,
204 const WCHAR *src, unsigned int srclen,
205 char *dst, unsigned int dstlen,
206 const char *defchar, int *used )
207{
208 const unsigned char * const uni2cp_low = table->uni2cp_low;
209 const unsigned short * const uni2cp_high = table->uni2cp_high;
210 unsigned char def;
211 unsigned int len;
212 int tmp;
213 WCHAR composed;
214
215 if (!defchar)
216 def = table->info.def_char & 0xff;
217 else
218 def = *defchar;
219
220 if (!used) used = &tmp; /* avoid checking on every char */
221 *used = 0;
222
223 for (len = dstlen; srclen && len; dst++, len--, src++, srclen--)
224 {
225 WCHAR wch = *src;
226
227 if ((flags & WC_COMPOSITECHECK) && (srclen > 1) && (composed = compose(src)))
228 {
229 /* now check if we can use the composed char */
230 *dst = uni2cp_low[uni2cp_high[composed >> 8] + (composed & 0xff)];
231 if (is_valid_sbcs_mapping( table, flags, composed, *dst ))
232 {
233 /* we have a good mapping, use it */
234 src++;
235 srclen--;
236 continue;
237 }
238 /* no mapping for the composed char, check the other flags */
239 if (flags & WC_DEFAULTCHAR) /* use the default char instead */
240 {
241 *dst = def;
242 *used = 1;
243 src++; /* skip the non-spacing char */
244 srclen--;
245 continue;
246 }
247 if (flags & WC_DISCARDNS) /* skip the second char of the composition */
248 {
249 src++;
250 srclen--;
251 }
252 /* WC_SEPCHARS is the default */
253 }
254
255 *dst = uni2cp_low[uni2cp_high[wch >> 8] + (wch & 0xff)];
256 if (!is_valid_sbcs_mapping( table, flags, wch, *dst ))
257 {
258 *dst = def;
259 *used = 1;
260 }
261 }
262 if (srclen) return -1; /* overflow */
263 return dstlen - len;
264}
265
266
267/****************************************************************/
268/* dbcs support */
269
270/* check if 'ch' is an acceptable dbcs mapping for 'wch' */
271static inline int is_valid_dbcs_mapping( const struct dbcs_table *table, int flags,
272 WCHAR wch, unsigned short ch )
273{
274 if (ch == table->info.def_char && wch != table->info.def_unicode_char) return 0;
275 if (flags & WC_NO_BEST_FIT_CHARS)
276 {
277 /* check if char maps back to the same Unicode value */
278 if (ch & 0xff00)
279 {
280 unsigned char off = table->cp2uni_leadbytes[ch >> 8];
281 return (table->cp2uni[(off << 8) + (ch & 0xff)] == wch);
282 }
283 return (table->cp2uni[ch & 0xff] == wch);
284 }
285 return 1;
286}
287
288/* query necessary dst length for src string */
289static int get_length_dbcs( const struct dbcs_table *table, int flags,
290 const WCHAR *src, unsigned int srclen,
291 const char *defchar, int *used )
292{
293 const unsigned short * const uni2cp_low = table->uni2cp_low;
294 const unsigned short * const uni2cp_high = table->uni2cp_high;
295 WCHAR defchar_value = table->info.def_char;
296 WCHAR composed;
297 int len, tmp;
298
299 if (!defchar && !used && !(flags & WC_COMPOSITECHECK))
300 {
301 for (len = 0; srclen; srclen--, src++, len++)
302 {
303 if (uni2cp_low[uni2cp_high[*src >> 8] + (*src & 0xff)] & 0xff00) len++;
304 }
305 return len;
306 }
307
308 if (defchar) defchar_value = defchar[1] ? ((defchar[0] << 8) | defchar[1]) : defchar[0];
309 if (!used) used = &tmp; /* avoid checking on every char */
310 *used = 0;
311 for (len = 0; srclen; len++, srclen--, src++)
312 {
313 unsigned short res;
314 WCHAR wch = *src;
315
316 if ((flags & WC_COMPOSITECHECK) && (srclen > 1) && (composed = compose(src)))
317 {
318 /* now check if we can use the composed char */
319 res = uni2cp_low[uni2cp_high[composed >> 8] + (composed & 0xff)];
320
321 if (is_valid_dbcs_mapping( table, flags, composed, res ))
322 {
323 /* we have a good mapping for the composed char, use it */
324 if (res & 0xff00) len++;
325 src++;
326 srclen--;
327 continue;
328 }
329 /* no mapping for the composed char, check the other flags */
330 if (flags & WC_DEFAULTCHAR) /* use the default char instead */
331 {
332 if (defchar_value & 0xff00) len++;
333 *used = 1;
334 src++; /* skip the non-spacing char */
335 srclen--;
336 continue;
337 }
338 if (flags & WC_DISCARDNS) /* skip the second char of the composition */
339 {
340 src++;
341 srclen--;
342 }
343 /* WC_SEPCHARS is the default */
344 }
345
346 res = uni2cp_low[uni2cp_high[wch >> 8] + (wch & 0xff)];
347 if (!is_valid_dbcs_mapping( table, flags, wch, res ))
348 {
349 res = defchar_value;
350 *used = 1;
351 }
352 if (res & 0xff00) len++;
353 }
354 return len;
355}
356
357/* wcstombs for double-byte code page */
358static inline int wcstombs_dbcs( const struct dbcs_table *table,
359 const WCHAR *src, unsigned int srclen,
360 char *dst, unsigned int dstlen )
361{
362 const unsigned short * const uni2cp_low = table->uni2cp_low;
363 const unsigned short * const uni2cp_high = table->uni2cp_high;
364 int len;
365
366 for (len = dstlen; srclen && len; len--, srclen--, src++)
367 {
368 unsigned short res = uni2cp_low[uni2cp_high[*src >> 8] + (*src & 0xff)];
369 if (res & 0xff00)
370 {
371 if (len == 1) break; /* do not output a partial char */
372 len--;
373 *dst++ = res >> 8;
374 }
375 *dst++ = (char)res;
376 }
377 if (srclen) return -1; /* overflow */
378 return dstlen - len;
379}
380
381/* slow version of wcstombs_dbcs that handles the various flags */
382static int wcstombs_dbcs_slow( const struct dbcs_table *table, int flags,
383 const WCHAR *src, unsigned int srclen,
384 char *dst, unsigned int dstlen,
385 const char *defchar, int *used )
386{
387 const unsigned short * const uni2cp_low = table->uni2cp_low;
388 const unsigned short * const uni2cp_high = table->uni2cp_high;
389 WCHAR defchar_value = table->info.def_char;
390 WCHAR composed;
391 int len, tmp;
392
393 if (defchar) defchar_value = defchar[1] ? ((defchar[0] << 8) | defchar[1]) : defchar[0];
394 if (!used) used = &tmp; /* avoid checking on every char */
395 *used = 0;
396
397 for (len = dstlen; srclen && len; len--, srclen--, src++)
398 {
399 unsigned short res;
400 WCHAR wch = *src;
401
402 if ((flags & WC_COMPOSITECHECK) && (srclen > 1) && (composed = compose(src)))
403 {
404 /* now check if we can use the composed char */
405 res = uni2cp_low[uni2cp_high[composed >> 8] + (composed & 0xff)];
406
407 if (is_valid_dbcs_mapping( table, flags, composed, res ))
408 {
409 /* we have a good mapping for the composed char, use it */
410 src++;
411 srclen--;
412 goto output_char;
413 }
414 /* no mapping for the composed char, check the other flags */
415 if (flags & WC_DEFAULTCHAR) /* use the default char instead */
416 {
417 res = defchar_value;
418 *used = 1;
419 src++; /* skip the non-spacing char */
420 srclen--;
421 goto output_char;
422 }
423 if (flags & WC_DISCARDNS) /* skip the second char of the composition */
424 {
425 src++;
426 srclen--;
427 }
428 /* WC_SEPCHARS is the default */
429 }
430
431 res = uni2cp_low[uni2cp_high[wch >> 8] + (wch & 0xff)];
432 if (!is_valid_dbcs_mapping( table, flags, wch, res ))
433 {
434 res = defchar_value;
435 *used = 1;
436 }
437
438 output_char:
439 if (res & 0xff00)
440 {
441 if (len == 1) break; /* do not output a partial char */
442 len--;
443 *dst++ = res >> 8;
444 }
445 *dst++ = (char)res;
446 }
447 if (srclen) return -1; /* overflow */
448 return dstlen - len;
449}
450
451/* wide char to multi byte string conversion */
452/* return -1 on dst buffer overflow */
453int wine_cp_wcstombs( const union cptable *table, int flags,
454 const WCHAR *src, int srclen,
455 char *dst, int dstlen, const char *defchar, int *used )
456{
457 if (table->info.char_size == 1)
458 {
459 if (flags || defchar || used)
460 {
461 if (!dstlen) return get_length_sbcs( &table->sbcs, flags, src, srclen, used );
462 return wcstombs_sbcs_slow( &table->sbcs, flags, src, srclen,
463 dst, dstlen, defchar, used );
464 }
465 if (!dstlen) return srclen;
466 return wcstombs_sbcs( &table->sbcs, src, srclen, dst, dstlen );
467 }
468 else /* mbcs */
469 {
470 if (!dstlen) return get_length_dbcs( &table->dbcs, flags, src, srclen, defchar, used );
471 if (flags || defchar || used)
472 return wcstombs_dbcs_slow( &table->dbcs, flags, src, srclen,
473 dst, dstlen, defchar, used );
474 return wcstombs_dbcs( &table->dbcs, src, srclen, dst, dstlen );
475 }
476}
477
478/* CP_SYMBOL implementation */
479/* return -1 on dst buffer overflow, -2 on invalid character */
480int wine_cpsymbol_wcstombs( const WCHAR *src, int srclen, char *dst, int dstlen)
481{
482 int len, i;
483 if( dstlen == 0) return srclen;
484 len = dstlen > srclen ? srclen : dstlen;
485 for( i = 0; i < len; i++)
486 {
487 WCHAR w = src [ i ];
488 if( w < 0x20 )
489 dst[i] = w;
490 else if( w >= 0xf020 && w < 0xf100)
491 dst[i] = w - 0xf000;
492 else
493 return -2;
494 }
495 if( srclen > len) return -1;
496 return len;
497}
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette