VirtualBox

source: vbox/trunk/src/VBox/Additions/WINNT/Graphics/Wine/libWine/utf8.c@ 30585

Last change on this file since 30585 was 22496, checked in by vboxsync, 15 years ago

crOpenGL: update wine to 1.1.27 and better fix for depthstencil surface refcounting

  • Property svn:eol-style set to native
File size: 10.5 KB
Line 
1/*
2 * UTF-8 support routines
3 *
4 * Copyright 2000 Alexandre Julliard
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
19 */
20
21/*
22 * Sun LGPL Disclaimer: For the avoidance of doubt, except that if any license choice
23 * other than GPL or LGPL is available it will apply instead, Sun elects to use only
24 * the Lesser General Public License version 2.1 (LGPLv2) at this time for any software where
25 * a choice of LGPL license versions is made available with the language indicating
26 * that LGPLv2 or any later version may be used, or where a choice of which version
27 * of the LGPL is applied is otherwise unspecified.
28 */
29
30#include "config.h"
31#include "wine/port.h"
32
33#include <string.h>
34
35#include "wine/unicode.h"
36
37extern WCHAR compose( const WCHAR *str );
38
39/* number of following bytes in sequence based on first byte value (for bytes above 0x7f) */
40static const char utf8_length[128] =
41{
42 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x80-0x8f */
43 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x90-0x9f */
44 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xa0-0xaf */
45 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xb0-0xbf */
46 0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* 0xc0-0xcf */
47 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* 0xd0-0xdf */
48 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, /* 0xe0-0xef */
49 3,3,3,3,3,0,0,0,0,0,0,0,0,0,0,0 /* 0xf0-0xff */
50};
51
52/* first byte mask depending on UTF-8 sequence length */
53static const unsigned char utf8_mask[4] = { 0x7f, 0x1f, 0x0f, 0x07 };
54
55/* minimum Unicode value depending on UTF-8 sequence length */
56static const unsigned int utf8_minval[4] = { 0x0, 0x80, 0x800, 0x10000 };
57
58
59/* get the next char value taking surrogates into account */
60static inline unsigned int get_surrogate_value( const WCHAR *src, unsigned int srclen )
61{
62 if (src[0] >= 0xd800 && src[0] <= 0xdfff) /* surrogate pair */
63 {
64 if (src[0] > 0xdbff || /* invalid high surrogate */
65 srclen <= 1 || /* missing low surrogate */
66 src[1] < 0xdc00 || src[1] > 0xdfff) /* invalid low surrogate */
67 return 0;
68 return 0x10000 + ((src[0] & 0x3ff) << 10) + (src[1] & 0x3ff);
69 }
70 return src[0];
71}
72
73/* query necessary dst length for src string */
74static inline int get_length_wcs_utf8( int flags, const WCHAR *src, unsigned int srclen )
75{
76 int len;
77 unsigned int val;
78
79 for (len = 0; srclen; srclen--, src++)
80 {
81 if (*src < 0x80) /* 0x00-0x7f: 1 byte */
82 {
83 len++;
84 continue;
85 }
86 if (*src < 0x800) /* 0x80-0x7ff: 2 bytes */
87 {
88 len += 2;
89 continue;
90 }
91 if (!(val = get_surrogate_value( src, srclen )))
92 {
93 if (flags & WC_ERR_INVALID_CHARS) return -2;
94 continue;
95 }
96 if (val < 0x10000) /* 0x800-0xffff: 3 bytes */
97 len += 3;
98 else /* 0x10000-0x10ffff: 4 bytes */
99 {
100 len += 4;
101 src++;
102 srclen--;
103 }
104 }
105 return len;
106}
107
108/* wide char to UTF-8 string conversion */
109/* return -1 on dst buffer overflow, -2 on invalid input char */
110int wine_utf8_wcstombs( int flags, const WCHAR *src, int srclen, char *dst, int dstlen )
111{
112 int len;
113
114 if (!dstlen) return get_length_wcs_utf8( flags, src, srclen );
115
116 for (len = dstlen; srclen; srclen--, src++)
117 {
118 WCHAR ch = *src;
119 unsigned int val;
120
121 if (ch < 0x80) /* 0x00-0x7f: 1 byte */
122 {
123 if (!len--) return -1; /* overflow */
124 *dst++ = ch;
125 continue;
126 }
127
128 if (ch < 0x800) /* 0x80-0x7ff: 2 bytes */
129 {
130 if ((len -= 2) < 0) return -1; /* overflow */
131 dst[1] = 0x80 | (ch & 0x3f);
132 ch >>= 6;
133 dst[0] = 0xc0 | ch;
134 dst += 2;
135 continue;
136 }
137
138 if (!(val = get_surrogate_value( src, srclen )))
139 {
140 if (flags & WC_ERR_INVALID_CHARS) return -2;
141 continue;
142 }
143
144 if (val < 0x10000) /* 0x800-0xffff: 3 bytes */
145 {
146 if ((len -= 3) < 0) return -1; /* overflow */
147 dst[2] = 0x80 | (val & 0x3f);
148 val >>= 6;
149 dst[1] = 0x80 | (val & 0x3f);
150 val >>= 6;
151 dst[0] = 0xe0 | val;
152 dst += 3;
153 }
154 else /* 0x10000-0x10ffff: 4 bytes */
155 {
156 if ((len -= 4) < 0) return -1; /* overflow */
157 dst[3] = 0x80 | (val & 0x3f);
158 val >>= 6;
159 dst[2] = 0x80 | (val & 0x3f);
160 val >>= 6;
161 dst[1] = 0x80 | (val & 0x3f);
162 val >>= 6;
163 dst[0] = 0xf0 | val;
164 dst += 4;
165 src++;
166 srclen--;
167 }
168 }
169 return dstlen - len;
170}
171
172/* helper for the various utf8 mbstowcs functions */
173static inline unsigned int decode_utf8_char( unsigned char ch, const char **str, const char *strend )
174{
175 unsigned int len = utf8_length[ch-0x80];
176 unsigned int res = ch & utf8_mask[len];
177 const char *end = *str + len;
178
179 if (end > strend) return ~0;
180 switch(len)
181 {
182 case 3:
183 if ((ch = end[-3] ^ 0x80) >= 0x40) break;
184 res = (res << 6) | ch;
185 (*str)++;
186 case 2:
187 if ((ch = end[-2] ^ 0x80) >= 0x40) break;
188 res = (res << 6) | ch;
189 (*str)++;
190 case 1:
191 if ((ch = end[-1] ^ 0x80) >= 0x40) break;
192 res = (res << 6) | ch;
193 (*str)++;
194 if (res < utf8_minval[len]) break;
195 return res;
196 }
197 return ~0;
198}
199
200/* query necessary dst length for src string with composition */
201static inline int get_length_mbs_utf8_compose( int flags, const char *src, int srclen )
202{
203 int ret = 0;
204 unsigned int res;
205 WCHAR composed[2];
206 const char *srcend = src + srclen;
207
208 composed[0] = 0;
209 while (src < srcend)
210 {
211 unsigned char ch = *src++;
212 if (ch < 0x80) /* special fast case for 7-bit ASCII */
213 {
214 composed[0] = ch;
215 ret++;
216 continue;
217 }
218 if ((res = decode_utf8_char( ch, &src, srcend )) <= 0xffff)
219 {
220 if (composed[0])
221 {
222 composed[1] = res;
223 if ((composed[0] = compose( composed ))) continue;
224 }
225 composed[0] = res;
226 ret++;
227 }
228 else if (res <= 0x10ffff)
229 {
230 ret += 2;
231 composed[0] = 0; /* no composition for surrogates */
232 }
233 else if (flags & MB_ERR_INVALID_CHARS) return -2; /* bad char */
234 /* otherwise ignore it */
235 }
236 return ret;
237}
238
239/* UTF-8 to wide char string conversion with composition */
240/* return -1 on dst buffer overflow, -2 on invalid input char */
241static int utf8_mbstowcs_compose( int flags, const char *src, int srclen, WCHAR *dst, int dstlen )
242{
243 unsigned int res;
244 const char *srcend = src + srclen;
245 WCHAR composed[2];
246 WCHAR *dstend = dst + dstlen;
247
248 if (!dstlen) return get_length_mbs_utf8_compose( flags, src, srclen );
249
250 composed[0] = 0;
251 while (src < srcend)
252 {
253 unsigned char ch = *src++;
254 if (ch < 0x80) /* special fast case for 7-bit ASCII */
255 {
256 if (dst >= dstend) return -1; /* overflow */
257 *dst++ = composed[0] = ch;
258 continue;
259 }
260 if ((res = decode_utf8_char( ch, &src, srcend )) <= 0xffff)
261 {
262 if (composed[0])
263 {
264 composed[1] = res;
265 if ((composed[0] = compose( composed )))
266 {
267 dst[-1] = composed[0];
268 continue;
269 }
270 }
271 if (dst >= dstend) return -1; /* overflow */
272 *dst++ = composed[0] = res;
273 }
274 else if (res <= 0x10ffff) /* we need surrogates */
275 {
276 if (dst >= dstend - 1) return -1; /* overflow */
277 res -= 0x10000;
278 *dst++ = 0xd800 | (res >> 10);
279 *dst++ = 0xdc00 | (res & 0x3ff);
280 composed[0] = 0; /* no composition for surrogates */
281 }
282 else if (flags & MB_ERR_INVALID_CHARS) return -2; /* bad char */
283 /* otherwise ignore it */
284 }
285 return dstlen - (dstend - dst);
286}
287
288/* query necessary dst length for src string */
289static inline int get_length_mbs_utf8( int flags, const char *src, int srclen )
290{
291 int ret = 0;
292 unsigned int res;
293 const char *srcend = src + srclen;
294
295 while (src < srcend)
296 {
297 unsigned char ch = *src++;
298 if (ch < 0x80) /* special fast case for 7-bit ASCII */
299 {
300 ret++;
301 continue;
302 }
303 if ((res = decode_utf8_char( ch, &src, srcend )) <= 0x10ffff)
304 {
305 if (res > 0xffff) ret++;
306 ret++;
307 }
308 else if (flags & MB_ERR_INVALID_CHARS) return -2; /* bad char */
309 /* otherwise ignore it */
310 }
311 return ret;
312}
313
314/* UTF-8 to wide char string conversion */
315/* return -1 on dst buffer overflow, -2 on invalid input char */
316int wine_utf8_mbstowcs( int flags, const char *src, int srclen, WCHAR *dst, int dstlen )
317{
318 unsigned int res;
319 const char *srcend = src + srclen;
320 WCHAR *dstend = dst + dstlen;
321
322 if (flags & MB_COMPOSITE) return utf8_mbstowcs_compose( flags, src, srclen, dst, dstlen );
323
324 if (!dstlen) return get_length_mbs_utf8( flags, src, srclen );
325
326 while ((dst < dstend) && (src < srcend))
327 {
328 unsigned char ch = *src++;
329 if (ch < 0x80) /* special fast case for 7-bit ASCII */
330 {
331 *dst++ = ch;
332 continue;
333 }
334 if ((res = decode_utf8_char( ch, &src, srcend )) <= 0xffff)
335 {
336 *dst++ = res;
337 }
338 else if (res <= 0x10ffff) /* we need surrogates */
339 {
340 if (dst == dstend - 1) return -1; /* overflow */
341 res -= 0x10000;
342 *dst++ = 0xd800 | (res >> 10);
343 *dst++ = 0xdc00 | (res & 0x3ff);
344 }
345 else if (flags & MB_ERR_INVALID_CHARS) return -2; /* bad char */
346 /* otherwise ignore it */
347 }
348 if (src < srcend) return -1; /* overflow */
349 return dstlen - (dstend - dst);
350}
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette