VirtualBox

source: vbox/trunk/src/VBox/Additions/WINNT/Graphics/Wine/libWine/wctomb.c@ 33876

Last change on this file since 33876 was 33656, checked in by vboxsync, 14 years ago

*: rebrand Sun (L)GPL disclaimers

  • Property svn:eol-style set to native
File size: 17.6 KB
Line 
1/*
2 * WideCharToMultiByte implementation
3 *
4 * Copyright 2000 Alexandre Julliard
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
19 */
20
21/*
22 * Oracle LGPL Disclaimer: For the avoidance of doubt, except that if any license choice
23 * other than GPL or LGPL is available it will apply instead, Oracle elects to use only
24 * the Lesser General Public License version 2.1 (LGPLv2) at this time for any software where
25 * a choice of LGPL license versions is made available with the language indicating
26 * that LGPLv2 or any later version may be used, or where a choice of which version
27 * of the LGPL is applied is otherwise unspecified.
28 */
29
30#include "config.h"
31#include "wine/port.h"
32
33#include <string.h>
34
35#include "wine/unicode.h"
36
37/* search for a character in the unicode_compose_table; helper for compose() */
38static inline int binary_search( WCHAR ch, int low, int high )
39{
40 extern const WCHAR unicode_compose_table[];
41 while (low <= high)
42 {
43 int pos = (low + high) / 2;
44 if (unicode_compose_table[2*pos] < ch)
45 {
46 low = pos + 1;
47 continue;
48 }
49 if (unicode_compose_table[2*pos] > ch)
50 {
51 high = pos - 1;
52 continue;
53 }
54 return pos;
55 }
56 return -1;
57}
58
59/* return the result of the composition of two Unicode chars, or 0 if none */
60WCHAR compose( const WCHAR *str )
61{
62 extern const WCHAR unicode_compose_table[];
63 extern const unsigned int unicode_compose_table_size;
64
65 int idx = 1, low = 0, high = unicode_compose_table_size - 1;
66 for (;;)
67 {
68 int pos = binary_search( str[idx], low, high );
69 if (pos == -1) return 0;
70 if (!idx--) return unicode_compose_table[2*pos+1];
71 low = unicode_compose_table[2*pos+1];
72 high = unicode_compose_table[2*pos+3] - 1;
73 }
74}
75
76
77/****************************************************************/
78/* sbcs support */
79
80/* check if 'ch' is an acceptable sbcs mapping for 'wch' */
81static inline int is_valid_sbcs_mapping( const struct sbcs_table *table, int flags,
82 WCHAR wch, unsigned char ch )
83{
84 if ((flags & WC_NO_BEST_FIT_CHARS) || ch == (unsigned char)table->info.def_char)
85 return (table->cp2uni[ch] == wch);
86 return 1;
87}
88
89/* query necessary dst length for src string */
90static int get_length_sbcs( const struct sbcs_table *table, int flags,
91 const WCHAR *src, unsigned int srclen, int *used )
92{
93 const unsigned char * const uni2cp_low = table->uni2cp_low;
94 const unsigned short * const uni2cp_high = table->uni2cp_high;
95 int ret, tmp;
96 WCHAR composed;
97
98 if (!used) used = &tmp; /* avoid checking on every char */
99 *used = 0;
100
101 for (ret = 0; srclen; ret++, src++, srclen--)
102 {
103 WCHAR wch = *src;
104 unsigned char ch;
105
106 if ((flags & WC_COMPOSITECHECK) && (srclen > 1) && (composed = compose(src)))
107 {
108 /* now check if we can use the composed char */
109 ch = uni2cp_low[uni2cp_high[composed >> 8] + (composed & 0xff)];
110 if (is_valid_sbcs_mapping( table, flags, composed, ch ))
111 {
112 /* we have a good mapping, use it */
113 src++;
114 srclen--;
115 continue;
116 }
117 /* no mapping for the composed char, check the other flags */
118 if (flags & WC_DEFAULTCHAR) /* use the default char instead */
119 {
120 *used = 1;
121 src++; /* skip the non-spacing char */
122 srclen--;
123 continue;
124 }
125 if (flags & WC_DISCARDNS) /* skip the second char of the composition */
126 {
127 src++;
128 srclen--;
129 }
130 /* WC_SEPCHARS is the default */
131 }
132 if (!*used)
133 {
134 ch = uni2cp_low[uni2cp_high[wch >> 8] + (wch & 0xff)];
135 *used = !is_valid_sbcs_mapping( table, flags, wch, ch );
136 }
137 }
138 return ret;
139}
140
141/* wcstombs for single-byte code page */
142static inline int wcstombs_sbcs( const struct sbcs_table *table,
143 const WCHAR *src, unsigned int srclen,
144 char *dst, unsigned int dstlen )
145{
146 const unsigned char * const uni2cp_low = table->uni2cp_low;
147 const unsigned short * const uni2cp_high = table->uni2cp_high;
148 int ret = srclen;
149
150 if (dstlen < srclen)
151 {
152 /* buffer too small: fill it up to dstlen and return error */
153 srclen = dstlen;
154 ret = -1;
155 }
156
157 while (srclen >= 16)
158 {
159 dst[0] = uni2cp_low[uni2cp_high[src[0] >> 8] + (src[0] & 0xff)];
160 dst[1] = uni2cp_low[uni2cp_high[src[1] >> 8] + (src[1] & 0xff)];
161 dst[2] = uni2cp_low[uni2cp_high[src[2] >> 8] + (src[2] & 0xff)];
162 dst[3] = uni2cp_low[uni2cp_high[src[3] >> 8] + (src[3] & 0xff)];
163 dst[4] = uni2cp_low[uni2cp_high[src[4] >> 8] + (src[4] & 0xff)];
164 dst[5] = uni2cp_low[uni2cp_high[src[5] >> 8] + (src[5] & 0xff)];
165 dst[6] = uni2cp_low[uni2cp_high[src[6] >> 8] + (src[6] & 0xff)];
166 dst[7] = uni2cp_low[uni2cp_high[src[7] >> 8] + (src[7] & 0xff)];
167 dst[8] = uni2cp_low[uni2cp_high[src[8] >> 8] + (src[8] & 0xff)];
168 dst[9] = uni2cp_low[uni2cp_high[src[9] >> 8] + (src[9] & 0xff)];
169 dst[10] = uni2cp_low[uni2cp_high[src[10] >> 8] + (src[10] & 0xff)];
170 dst[11] = uni2cp_low[uni2cp_high[src[11] >> 8] + (src[11] & 0xff)];
171 dst[12] = uni2cp_low[uni2cp_high[src[12] >> 8] + (src[12] & 0xff)];
172 dst[13] = uni2cp_low[uni2cp_high[src[13] >> 8] + (src[13] & 0xff)];
173 dst[14] = uni2cp_low[uni2cp_high[src[14] >> 8] + (src[14] & 0xff)];
174 dst[15] = uni2cp_low[uni2cp_high[src[15] >> 8] + (src[15] & 0xff)];
175 src += 16;
176 dst += 16;
177 srclen -= 16;
178 }
179
180 /* now handle remaining characters */
181 src += srclen;
182 dst += srclen;
183 switch(srclen)
184 {
185 case 15: dst[-15] = uni2cp_low[uni2cp_high[src[-15] >> 8] + (src[-15] & 0xff)];
186 case 14: dst[-14] = uni2cp_low[uni2cp_high[src[-14] >> 8] + (src[-14] & 0xff)];
187 case 13: dst[-13] = uni2cp_low[uni2cp_high[src[-13] >> 8] + (src[-13] & 0xff)];
188 case 12: dst[-12] = uni2cp_low[uni2cp_high[src[-12] >> 8] + (src[-12] & 0xff)];
189 case 11: dst[-11] = uni2cp_low[uni2cp_high[src[-11] >> 8] + (src[-11] & 0xff)];
190 case 10: dst[-10] = uni2cp_low[uni2cp_high[src[-10] >> 8] + (src[-10] & 0xff)];
191 case 9: dst[-9] = uni2cp_low[uni2cp_high[src[-9] >> 8] + (src[-9] & 0xff)];
192 case 8: dst[-8] = uni2cp_low[uni2cp_high[src[-8] >> 8] + (src[-8] & 0xff)];
193 case 7: dst[-7] = uni2cp_low[uni2cp_high[src[-7] >> 8] + (src[-7] & 0xff)];
194 case 6: dst[-6] = uni2cp_low[uni2cp_high[src[-6] >> 8] + (src[-6] & 0xff)];
195 case 5: dst[-5] = uni2cp_low[uni2cp_high[src[-5] >> 8] + (src[-5] & 0xff)];
196 case 4: dst[-4] = uni2cp_low[uni2cp_high[src[-4] >> 8] + (src[-4] & 0xff)];
197 case 3: dst[-3] = uni2cp_low[uni2cp_high[src[-3] >> 8] + (src[-3] & 0xff)];
198 case 2: dst[-2] = uni2cp_low[uni2cp_high[src[-2] >> 8] + (src[-2] & 0xff)];
199 case 1: dst[-1] = uni2cp_low[uni2cp_high[src[-1] >> 8] + (src[-1] & 0xff)];
200 case 0: break;
201 }
202 return ret;
203}
204
205/* slow version of wcstombs_sbcs that handles the various flags */
206static int wcstombs_sbcs_slow( const struct sbcs_table *table, int flags,
207 const WCHAR *src, unsigned int srclen,
208 char *dst, unsigned int dstlen,
209 const char *defchar, int *used )
210{
211 const unsigned char * const uni2cp_low = table->uni2cp_low;
212 const unsigned short * const uni2cp_high = table->uni2cp_high;
213 unsigned char def;
214 unsigned int len;
215 int tmp;
216 WCHAR composed;
217
218 if (!defchar)
219 def = table->info.def_char & 0xff;
220 else
221 def = *defchar;
222
223 if (!used) used = &tmp; /* avoid checking on every char */
224 *used = 0;
225
226 for (len = dstlen; srclen && len; dst++, len--, src++, srclen--)
227 {
228 WCHAR wch = *src;
229
230 if ((flags & WC_COMPOSITECHECK) && (srclen > 1) && (composed = compose(src)))
231 {
232 /* now check if we can use the composed char */
233 *dst = uni2cp_low[uni2cp_high[composed >> 8] + (composed & 0xff)];
234 if (is_valid_sbcs_mapping( table, flags, composed, *dst ))
235 {
236 /* we have a good mapping, use it */
237 src++;
238 srclen--;
239 continue;
240 }
241 /* no mapping for the composed char, check the other flags */
242 if (flags & WC_DEFAULTCHAR) /* use the default char instead */
243 {
244 *dst = def;
245 *used = 1;
246 src++; /* skip the non-spacing char */
247 srclen--;
248 continue;
249 }
250 if (flags & WC_DISCARDNS) /* skip the second char of the composition */
251 {
252 src++;
253 srclen--;
254 }
255 /* WC_SEPCHARS is the default */
256 }
257
258 *dst = uni2cp_low[uni2cp_high[wch >> 8] + (wch & 0xff)];
259 if (!is_valid_sbcs_mapping( table, flags, wch, *dst ))
260 {
261 *dst = def;
262 *used = 1;
263 }
264 }
265 if (srclen) return -1; /* overflow */
266 return dstlen - len;
267}
268
269
270/****************************************************************/
271/* dbcs support */
272
273/* check if 'ch' is an acceptable dbcs mapping for 'wch' */
274static inline int is_valid_dbcs_mapping( const struct dbcs_table *table, int flags,
275 WCHAR wch, unsigned short ch )
276{
277 if ((flags & WC_NO_BEST_FIT_CHARS) || ch == table->info.def_char)
278 {
279 /* check if char maps back to the same Unicode value */
280 if (ch & 0xff00)
281 {
282 unsigned char off = table->cp2uni_leadbytes[ch >> 8];
283 return (table->cp2uni[(off << 8) + (ch & 0xff)] == wch);
284 }
285 return (table->cp2uni[ch & 0xff] == wch);
286 }
287 return 1;
288}
289
290/* query necessary dst length for src string */
291static int get_length_dbcs( const struct dbcs_table *table, int flags,
292 const WCHAR *src, unsigned int srclen,
293 const char *defchar, int *used )
294{
295 const unsigned short * const uni2cp_low = table->uni2cp_low;
296 const unsigned short * const uni2cp_high = table->uni2cp_high;
297 WCHAR defchar_value = table->info.def_char;
298 WCHAR composed;
299 int len, tmp;
300
301 if (!defchar && !used && !(flags & WC_COMPOSITECHECK))
302 {
303 for (len = 0; srclen; srclen--, src++, len++)
304 {
305 if (uni2cp_low[uni2cp_high[*src >> 8] + (*src & 0xff)] & 0xff00) len++;
306 }
307 return len;
308 }
309
310 if (defchar) defchar_value = defchar[1] ? ((defchar[0] << 8) | defchar[1]) : defchar[0];
311 if (!used) used = &tmp; /* avoid checking on every char */
312 *used = 0;
313 for (len = 0; srclen; len++, srclen--, src++)
314 {
315 unsigned short res;
316 WCHAR wch = *src;
317
318 if ((flags & WC_COMPOSITECHECK) && (srclen > 1) && (composed = compose(src)))
319 {
320 /* now check if we can use the composed char */
321 res = uni2cp_low[uni2cp_high[composed >> 8] + (composed & 0xff)];
322
323 if (is_valid_dbcs_mapping( table, flags, composed, res ))
324 {
325 /* we have a good mapping for the composed char, use it */
326 if (res & 0xff00) len++;
327 src++;
328 srclen--;
329 continue;
330 }
331 /* no mapping for the composed char, check the other flags */
332 if (flags & WC_DEFAULTCHAR) /* use the default char instead */
333 {
334 if (defchar_value & 0xff00) len++;
335 *used = 1;
336 src++; /* skip the non-spacing char */
337 srclen--;
338 continue;
339 }
340 if (flags & WC_DISCARDNS) /* skip the second char of the composition */
341 {
342 src++;
343 srclen--;
344 }
345 /* WC_SEPCHARS is the default */
346 }
347
348 res = uni2cp_low[uni2cp_high[wch >> 8] + (wch & 0xff)];
349 if (!is_valid_dbcs_mapping( table, flags, wch, res ))
350 {
351 res = defchar_value;
352 *used = 1;
353 }
354 if (res & 0xff00) len++;
355 }
356 return len;
357}
358
359/* wcstombs for double-byte code page */
360static inline int wcstombs_dbcs( const struct dbcs_table *table,
361 const WCHAR *src, unsigned int srclen,
362 char *dst, unsigned int dstlen )
363{
364 const unsigned short * const uni2cp_low = table->uni2cp_low;
365 const unsigned short * const uni2cp_high = table->uni2cp_high;
366 int len;
367
368 for (len = dstlen; srclen && len; len--, srclen--, src++)
369 {
370 unsigned short res = uni2cp_low[uni2cp_high[*src >> 8] + (*src & 0xff)];
371 if (res & 0xff00)
372 {
373 if (len == 1) break; /* do not output a partial char */
374 len--;
375 *dst++ = res >> 8;
376 }
377 *dst++ = (char)res;
378 }
379 if (srclen) return -1; /* overflow */
380 return dstlen - len;
381}
382
383/* slow version of wcstombs_dbcs that handles the various flags */
384static int wcstombs_dbcs_slow( const struct dbcs_table *table, int flags,
385 const WCHAR *src, unsigned int srclen,
386 char *dst, unsigned int dstlen,
387 const char *defchar, int *used )
388{
389 const unsigned short * const uni2cp_low = table->uni2cp_low;
390 const unsigned short * const uni2cp_high = table->uni2cp_high;
391 WCHAR defchar_value = table->info.def_char;
392 WCHAR composed;
393 int len, tmp;
394
395 if (defchar) defchar_value = defchar[1] ? ((defchar[0] << 8) | defchar[1]) : defchar[0];
396 if (!used) used = &tmp; /* avoid checking on every char */
397 *used = 0;
398
399 for (len = dstlen; srclen && len; len--, srclen--, src++)
400 {
401 unsigned short res;
402 WCHAR wch = *src;
403
404 if ((flags & WC_COMPOSITECHECK) && (srclen > 1) && (composed = compose(src)))
405 {
406 /* now check if we can use the composed char */
407 res = uni2cp_low[uni2cp_high[composed >> 8] + (composed & 0xff)];
408
409 if (is_valid_dbcs_mapping( table, flags, composed, res ))
410 {
411 /* we have a good mapping for the composed char, use it */
412 src++;
413 srclen--;
414 goto output_char;
415 }
416 /* no mapping for the composed char, check the other flags */
417 if (flags & WC_DEFAULTCHAR) /* use the default char instead */
418 {
419 res = defchar_value;
420 *used = 1;
421 src++; /* skip the non-spacing char */
422 srclen--;
423 goto output_char;
424 }
425 if (flags & WC_DISCARDNS) /* skip the second char of the composition */
426 {
427 src++;
428 srclen--;
429 }
430 /* WC_SEPCHARS is the default */
431 }
432
433 res = uni2cp_low[uni2cp_high[wch >> 8] + (wch & 0xff)];
434 if (!is_valid_dbcs_mapping( table, flags, wch, res ))
435 {
436 res = defchar_value;
437 *used = 1;
438 }
439
440 output_char:
441 if (res & 0xff00)
442 {
443 if (len == 1) break; /* do not output a partial char */
444 len--;
445 *dst++ = res >> 8;
446 }
447 *dst++ = (char)res;
448 }
449 if (srclen) return -1; /* overflow */
450 return dstlen - len;
451}
452
453/* wide char to multi byte string conversion */
454/* return -1 on dst buffer overflow */
455int wine_cp_wcstombs( const union cptable *table, int flags,
456 const WCHAR *src, int srclen,
457 char *dst, int dstlen, const char *defchar, int *used )
458{
459 if (table->info.char_size == 1)
460 {
461 if (flags || defchar || used)
462 {
463 if (!dstlen) return get_length_sbcs( &table->sbcs, flags, src, srclen, used );
464 return wcstombs_sbcs_slow( &table->sbcs, flags, src, srclen,
465 dst, dstlen, defchar, used );
466 }
467 if (!dstlen) return srclen;
468 return wcstombs_sbcs( &table->sbcs, src, srclen, dst, dstlen );
469 }
470 else /* mbcs */
471 {
472 if (!dstlen) return get_length_dbcs( &table->dbcs, flags, src, srclen, defchar, used );
473 if (flags || defchar || used)
474 return wcstombs_dbcs_slow( &table->dbcs, flags, src, srclen,
475 dst, dstlen, defchar, used );
476 return wcstombs_dbcs( &table->dbcs, src, srclen, dst, dstlen );
477 }
478}
479
480/* CP_SYMBOL implementation */
481/* return -1 on dst buffer overflow, -2 on invalid character */
482int wine_cpsymbol_wcstombs( const WCHAR *src, int srclen, char *dst, int dstlen)
483{
484 int len, i;
485 if( dstlen == 0) return srclen;
486 len = dstlen > srclen ? srclen : dstlen;
487 for( i = 0; i < len; i++)
488 {
489 WCHAR w = src [ i ];
490 if( w < 0x20 )
491 dst[i] = w;
492 else if( w >= 0xf020 && w < 0xf100)
493 dst[i] = w - 0xf000;
494 else
495 return -2;
496 }
497 if( srclen > len) return -1;
498 return len;
499}
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette