VirtualBox

source: vbox/trunk/src/VBox/Runtime/testcase/tstUtf8.cpp@ 1507

Last change on this file since 1507 was 1, checked in by vboxsync, 55 years ago

import

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 26.9 KB
Line 
1/* $Id: tstUtf8.cpp 1 1970-01-01 00:00:00Z vboxsync $ */
2/** @file
3 * InnoTek Portable Runtime Testcase - UTF-8 and UTF-16 string conversions.
4 */
5
6/*
7 * Copyright (C) 2006 InnoTek Systemberatung GmbH
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License as published by the Free Software Foundation,
13 * in version 2 as it comes in the "COPYING" file of the VirtualBox OSE
14 * distribution. VirtualBox OSE is distributed in the hope that it will
15 * be useful, but WITHOUT ANY WARRANTY of any kind.
16 *
17 * If you received this file as part of a commercial VirtualBox
18 * distribution, then only the terms of your commercial VirtualBox
19 * license agreement apply instead of the previous paragraph.
20 */
21
22/*******************************************************************************
23* Header Files *
24*******************************************************************************/
25#include <iprt/string.h>
26#include <iprt/uni.h>
27#include <iprt/runtime.h>
28#include <iprt/uuid.h>
29#include <iprt/time.h>
30#include <iprt/stream.h>
31#include <iprt/alloc.h>
32#include <iprt/assert.h>
33#include <iprt/err.h>
34
35#include <stdlib.h>
36
37
38/*******************************************************************************
39* Global Variables *
40*******************************************************************************/
41static int g_cErrors = 0;
42
43
44/**
45 * Generate a random codepoint for simple UTF-16 encoding.
46 */
47static RTUTF16 GetRandUcs2(void)
48{
49 RTUTF16 wc;
50 do
51 {
52 wc = (RTUTF16)((long long)rand() * 0xffff / RAND_MAX);
53 } while ((wc >= 0xd800 && wc <= 0xdfff) || wc == 0);
54 return wc;
55}
56
57
58/**
59 *
60 */
61static void test1(void)
62{
63 static const char s_szBadString1[] = "Bad \xe0\x13\x0";
64 static const char s_szBadString2[] = "Bad \xef\xbf\xc3";
65 int rc;
66 char *pszUtf8;
67 char *pszCurrent;
68 PRTUTF16 pwsz;
69 PRTUTF16 pwszRand;
70
71 RTPrintf("tstUtf8: TEST 1\n");
72
73 /*
74 * Invalid UTF-8 to UCS-2 test.
75 */
76 rc = RTStrToUtf16(s_szBadString1, &pwsz);
77 if (rc != VERR_NO_TRANSLATION && rc != VERR_INVALID_UTF8_ENCODING)
78 {
79 RTPrintf("tstUtf8: FAILURE - %d: Conversion of first bad UTF-8 string to UTF-16 apparantly succeeded. It shouldn't. rc=%Vrc\n",
80 __LINE__, rc);
81 g_cErrors++;
82 }
83 rc = RTStrToUtf16(s_szBadString2, &pwsz);
84 if (rc != VERR_NO_TRANSLATION && rc != VERR_INVALID_UTF8_ENCODING)
85 {
86 RTPrintf("tstUtf8: FAILURE - %d: Conversion of second bad UTF-8 strings to UTF-16 apparantly succeeded. It shouldn't. rc=%Vrc\n",
87 __LINE__, rc);
88 g_cErrors++;
89 }
90
91 /*
92 * Test current CP convertion.
93 */
94 pwszRand = (PRTUTF16)RTMemAlloc(31 * sizeof(*pwsz));
95 srand((unsigned)RTTimeNanoTS());
96 for (int i = 0; i < 30; i++)
97 pwszRand[i] = GetRandUcs2();
98 pwszRand[30] = 0;
99
100 rc = RTUtf16ToUtf8(pwszRand, &pszUtf8);
101 if (rc == VINF_SUCCESS)
102 {
103 rc = RTStrUtf8ToCurrentCP(&pszCurrent, pszUtf8);
104 if (rc == VINF_SUCCESS)
105 {
106 rc = RTStrCurrentCPToUtf8(&pszUtf8, pszCurrent);
107 if (rc == VINF_SUCCESS)
108 RTPrintf("tstUtf8: Random UTF-16 -> UTF-8 -> Current -> UTF-8 successful.\n");
109 else
110 {
111 RTPrintf("tstUtf8: FAILURE - %d: The third part of random UTF-16 -> UTF-8 -> Current -> UTF-8 failed with return value %Vrc.\n",
112 __LINE__, rc);
113 g_cErrors++;
114 }
115 }
116 else if (rc == VERR_NO_TRANSLATION)
117 RTPrintf("tstUtf8: The second part of random UTF-16 -> UTF-8 -> Current -> UTF-8 returned VERR_NO_TRANSLATION. This is probably as it should be.\n");
118 else
119 {
120 RTPrintf("tstUtf8: FAILURE - %d: The second part of random UTF-16 -> UTF-8 -> Current -> UTF-8 failed with return value %Vrc.\n",
121 __LINE__, rc);
122 g_cErrors++;
123 }
124 }
125 else
126 {
127 RTPrintf("tstUtf8: FAILURE - %d: The first part of random UTF-16 -> UTF-8 -> Current -> UTF-8 failed with return value %Vrc.\n",
128 __LINE__, rc);
129 g_cErrors++;
130 }
131
132 /*
133 * Generate a new random string.
134 */
135 pwszRand = (PRTUTF16)RTMemAlloc(31 * sizeof(*pwsz));
136 srand((unsigned)RTTimeNanoTS());
137 for (int i = 0; i < 30; i++)
138 pwszRand[i] = GetRandUcs2();
139 pwszRand[30] = 0;
140 rc = RTUtf16ToUtf8(pwszRand, &pszUtf8);
141 if (rc == VINF_SUCCESS)
142 {
143 rc = RTStrToUtf16(pszUtf8, &pwsz);
144 if (rc == VINF_SUCCESS)
145 {
146 int i;
147 for (i = 0; pwszRand[i] == pwsz[i] && pwsz[i] != 0; i++)
148 /* nothing */;
149 if (pwszRand[i] == pwsz[i] && pwsz[i] == 0)
150 RTPrintf("tstUtf8: Random UTF-16 -> UTF-8 -> UTF-16 successful.\n");
151 else
152 {
153 RTPrintf("tstUtf8: FAILURE - %d: The second part of random UTF-16 -> UTF-8 -> UTF-16 failed.\n", __LINE__);
154 RTPrintf("tstUtf8: First differing character is at position %d and has the value %x.\n", i, pwsz[i]);
155 g_cErrors++;
156 }
157 }
158 else
159 {
160 RTPrintf("tstUtf8: FAILURE - %d: The second part of random UTF-16 -> UTF-8 -> UTF-16 failed with return value %Vrc.\n",
161 __LINE__, rc);
162 g_cErrors++;
163 }
164 }
165 else
166 {
167 RTPrintf("tstUtf8: FAILURE - %d: The first part of random UTF-16 -> UTF-8 -> UTF-16 failed with return value %Vrc.\n",
168 __LINE__, rc);
169 g_cErrors++;
170 }
171
172 /*
173 * Generate yet another random string and convert it to a buffer.
174 */
175 pwszRand = (PRTUTF16)RTMemAlloc(31 * sizeof(*pwsz));
176 srand((unsigned)RTTimeNanoTS());
177 for (int i = 0; i < 30; i++)
178 pwszRand[i] = GetRandUcs2();
179 pwszRand[30] = 0;
180
181 char szUtf8Array[120];
182 char *pszUtf8Array = szUtf8Array;
183 rc = RTUtf16ToUtf8Ex(pwszRand, RTSTR_MAX, &pszUtf8Array, 120, NULL);
184 if (rc == 0)
185 {
186 rc = RTStrToUtf16(pszUtf8Array, &pwsz);
187 if (rc == 0)
188 {
189 int i;
190 for (i = 0; pwszRand[i] == pwsz[i] && pwsz[i] != 0; i++);
191 if (pwsz[i] == 0 && i >= 8)
192 RTPrintf("tstUtf8: Random UTF-16 -> fixed length UTF-8 -> UTF-16 successful.\n");
193 else
194 {
195 RTPrintf("tstUtf8: FAILURE - %d: Incorrect conversion of UTF-16 -> fixed length UTF-8 -> UTF-16.\n", __LINE__);
196 RTPrintf("tstUtf8: First differing character is at position %d and has the value %x.\n", i, pwsz[i]);
197 g_cErrors++;
198 }
199 }
200 else
201 {
202 RTPrintf("tstUtf8: FAILURE - %d: The second part of random UTF-16 -> fixed length UTF-8 -> UTF-16 failed with return value %Vrc.\n",
203 __LINE__, rc);
204 g_cErrors++;
205 }
206 }
207 else
208 {
209 RTPrintf("tstUtf8: FAILURE - %d: The first part of random UTF-16 -> fixed length UTF-8 -> UTF-16 failed with return value %Vrc.\n",
210 __LINE__, rc);
211 g_cErrors++;
212 }
213
214 /*
215 * And again.
216 */
217 pwszRand = (PRTUTF16)RTMemAlloc(31 * sizeof(*pwsz));
218 srand((unsigned)RTTimeNanoTS());
219 for (int i = 0; i < 30; i++)
220 pwszRand[i] = GetRandUcs2();
221 pwszRand[30] = 0;
222
223 RTUTF16 wszBuf[70];
224 PRTUTF16 pwsz2Buf = wszBuf;
225 rc = RTUtf16ToUtf8(pwszRand, &pszUtf8);
226 if (rc == 0)
227 {
228 rc = RTStrToUtf16Ex(pszUtf8, RTSTR_MAX, &pwsz2Buf, 70, NULL);
229 if (rc == 0)
230 {
231 int i;
232 for (i = 0; pwszRand[i] == pwsz2Buf[i] && pwsz2Buf[i] != 0; i++);
233 if (pwszRand[i] == 0 && pwsz2Buf[i] == 0)
234 RTPrintf("tstUtf8: Random UTF-16 -> UTF-8 -> fixed length UTF-16 successful.\n");
235 else
236 {
237 RTPrintf("tstUtf8: FAILURE - %d: Incorrect conversion of random UTF-16 -> UTF-8 -> fixed length UTF-16.\n", __LINE__);
238 RTPrintf("tstUtf8: First differing character is at position %d and has the value %x.\n", i, pwsz2Buf[i]);
239 g_cErrors++;
240 }
241 }
242 else
243 {
244 RTPrintf("tstUtf8: FAILURE - %d: The second part of random UTF-16 -> UTF-8 -> fixed length UTF-16 failed with return value %Vrc.\n",
245 __LINE__, rc);
246 g_cErrors++;
247 }
248 }
249 else
250 {
251 RTPrintf("tstUtf8: FAILURE - %d: The first part of random UTF-16 -> UTF-8 -> fixed length UTF-16 failed with return value %Vrc.\n",
252 __LINE__, rc);
253 g_cErrors++;
254 }
255 pwszRand = (PRTUTF16)RTMemAlloc(31 * sizeof(*pwsz));
256 srand((unsigned)RTTimeNanoTS());
257 for (int i = 0; i < 30; i++)
258 pwszRand[i] = GetRandUcs2();
259 pwszRand[30] = 0;
260
261 rc = RTUtf16ToUtf8Ex(pwszRand, RTSTR_MAX, &pszUtf8Array, 20, NULL);
262 if (rc == VERR_BUFFER_OVERFLOW)
263 RTPrintf("tstUtf8: Random UTF-16 -> fixed length UTF-8 with too short buffer successfully rejected.\n");
264 else
265 {
266 RTPrintf("tstUtf8: FAILURE - %d: Random UTF-16 -> fixed length UTF-8 with too small buffer returned value %d instead of VERR_BUFFER_OVERFLOW.\n",
267 __LINE__, rc);
268 g_cErrors++;
269 }
270
271 /*
272 * last time...
273 */
274 pwszRand = (PRTUTF16)RTMemAlloc(31 * sizeof(*pwsz));
275 srand((unsigned)RTTimeNanoTS());
276 for (int i = 0; i < 30; i++)
277 pwszRand[i] = GetRandUcs2();
278 pwszRand[30] = 0;
279
280 rc = RTUtf16ToUtf8(pwszRand, &pszUtf8);
281 if (rc == VINF_SUCCESS)
282 {
283 rc = RTStrToUtf16Ex(pszUtf8, RTSTR_MAX, &pwsz2Buf, 20, NULL);
284 if (rc == VERR_BUFFER_OVERFLOW)
285 RTPrintf("tstUtf8: Random UTF-16 -> UTF-8 -> fixed length UTF-16 with too short buffer successfully rejected.\n");
286 else
287 {
288 RTPrintf("tstUtf8: FAILURE - %d: The second part of random UTF-16 -> UTF-8 -> fixed length UTF-16 with too short buffer returned value %Vrc instead of VERR_BUFFER_OVERFLOW.\n",
289 __LINE__, rc);
290 g_cErrors++;
291 }
292 }
293 else
294 {
295 RTPrintf("tstUtf8: FAILURE - %d:The first part of random UTF-16 -> UTF-8 -> fixed length UTF-16 failed with return value %Vrc.\n",
296 __LINE__, rc);
297 g_cErrors++;
298 }
299
300}
301
302
303static RTUNICP g_uszAll[0x110000 - 1 - 0x800 - 2 + 1];
304static RTUTF16 g_wszAll[0xfffe - (0xe000 - 0xd800) + (0x110000 - 0x10000) * 2];
305static char g_szAll[0x7f + (0x800 - 0x80) * 2 + (0xfffe - 0x800 - (0xe000 - 0xd800))* 3 + (0x110000 - 0x10000) * 4 + 1];
306
307static void whereami(int cBits, size_t off)
308{
309 if (cBits == 8)
310 {
311 if (off < 0x7f)
312 RTPrintf("UTF-8 U+%#x\n", off + 1);
313 else if (off < 0xf7f)
314 RTPrintf("UTF-8 U+%#x\n", (off - 0x7f) / 2 + 0x80);
315 else if (off < 0x27f7f)
316 RTPrintf("UTF-8 U+%#x\n", (off - 0xf7f) / 3 + 0x800);
317 else if (off < 0x2df79)
318 RTPrintf("UTF-8 U+%#x\n", (off - 0x27f7f) / 3 + 0xe000);
319 else if (off < 0x42df79)
320 RTPrintf("UTF-8 U+%#x\n", (off - 0x2df79) / 4 + 0x10000);
321 else
322 RTPrintf("UTF-8 ???\n");
323 }
324 else if (cBits == 16)
325 {
326 if (off < 0xd7ff*2)
327 RTPrintf("UTF-16 U+%#x\n", off / 2 + 1);
328 else if (off < 0xf7fd*2)
329 RTPrintf("UTF-16 U+%#x\n", (off - 0xd7ff*2) / 2 + 0xe000);
330 else if (off < 0x20f7fd)
331 RTPrintf("UTF-16 U+%#x\n", (off - 0xf7fd*2) / 4 + 0x10000);
332 else
333 RTPrintf("UTF-16 ???\n");
334 }
335 else
336 {
337 if (off < (0xd800 - 1) * sizeof(RTUNICP))
338 RTPrintf("RTUNICP U+%#x\n", off / sizeof(RTUNICP) + 1);
339 else if (off < (0xfffe - 0x800 - 1) * sizeof(RTUNICP))
340 RTPrintf("RTUNICP U+%#x\n", off / sizeof(RTUNICP) + 0x800 + 1);
341 else
342 RTPrintf("RTUNICP U+%#x\n", off / sizeof(RTUNICP) + 0x800 + 1 + 2);
343 }
344}
345
346int mymemcmp(const void *pv1, const void *pv2, size_t cb, int cBits)
347{
348 const uint8_t *pb1 = (const uint8_t *)pv1;
349 const uint8_t *pb2 = (const uint8_t *)pv2;
350 for (size_t off = 0; off < cb; off++)
351 {
352 if (pb1[off] != pb2[off])
353 {
354 RTPrintf("mismatch at %#x: ", off);
355 whereami(cBits, off);
356 RTPrintf(" %#x: %02x != %02x!\n", off-1, pb1[off-1], pb2[off-1]);
357 RTPrintf("*%#x: %02x != %02x!\n", off, pb1[off], pb2[off]);
358 RTPrintf(" %#x: %02x != %02x!\n", off+1, pb1[off+1], pb2[off+1]);
359 RTPrintf(" %#x: %02x != %02x!\n", off+2, pb1[off+2], pb2[off+2]);
360 RTPrintf(" %#x: %02x != %02x!\n", off+3, pb1[off+3], pb2[off+3]);
361 RTPrintf(" %#x: %02x != %02x!\n", off+4, pb1[off+4], pb2[off+4]);
362 RTPrintf(" %#x: %02x != %02x!\n", off+5, pb1[off+5], pb2[off+5]);
363 RTPrintf(" %#x: %02x != %02x!\n", off+6, pb1[off+6], pb2[off+6]);
364 RTPrintf(" %#x: %02x != %02x!\n", off+7, pb1[off+7], pb2[off+7]);
365 RTPrintf(" %#x: %02x != %02x!\n", off+8, pb1[off+8], pb2[off+8]);
366 RTPrintf(" %#x: %02x != %02x!\n", off+9, pb1[off+9], pb2[off+9]);
367 return 1;
368 }
369 }
370 return 0;
371}
372
373
374void InitStrings(void)
375{
376 /*
377 * Generate unicode string containing all the legal UTF-16 codepoints, both UTF-16 and UTF-8 version.
378 */
379 /* the simple code point array first */
380 unsigned i = 0;
381 RTUNICP uc = 1;
382 while (uc < 0xd800)
383 g_uszAll[i++] = uc++;
384 uc = 0xe000;
385 while (uc < 0xfffe)
386 g_uszAll[i++] = uc++;
387 uc = 0x10000;
388 while (uc < 0x110000)
389 g_uszAll[i++] = uc++;
390 g_uszAll[i++] = 0;
391 Assert(ELEMENTS(g_uszAll) == i);
392
393 /* the utf-16 one */
394 i = 0;
395 uc = 1;
396 //RTPrintf("tstUtf8: %#x=%#x", i, uc);
397 while (uc < 0xd800)
398 g_wszAll[i++] = uc++;
399 uc = 0xe000;
400 //RTPrintf(" %#x=%#x", i, uc);
401 while (uc < 0xfffe)
402 g_wszAll[i++] = uc++;
403 uc = 0x10000;
404 //RTPrintf(" %#x=%#x", i, uc);
405 while (uc < 0x110000)
406 {
407 g_wszAll[i++] = 0xd800 | ((uc - 0x10000) >> 10);
408 g_wszAll[i++] = 0xdc00 | ((uc - 0x10000) & 0x3ff);
409 uc++;
410 }
411 //RTPrintf(" %#x=%#x\n", i, uc);
412 g_wszAll[i++] = '\0';
413 Assert(ELEMENTS(g_wszAll) == i);
414
415 /*
416 * The utf-8 one
417 */
418 i = 0;
419 uc = 1;
420 //RTPrintf("tstUtf8: %#x=%#x", i, uc);
421 while (uc < 0x80)
422 g_szAll[i++] = uc++;
423 //RTPrintf(" %#x=%#x", i, uc);
424 while (uc < 0x800)
425 {
426 g_szAll[i++] = 0xc0 | (uc >> 6);
427 g_szAll[i++] = 0x80 | (uc & 0x3f);
428 Assert(!((uc >> 6) & ~0x1f));
429 uc++;
430 }
431 //RTPrintf(" %#x=%#x", i, uc);
432 while (uc < 0xd800)
433 {
434 g_szAll[i++] = 0xe0 | (uc >> 12);
435 g_szAll[i++] = 0x80 | ((uc >> 6) & 0x3f);
436 g_szAll[i++] = 0x80 | (uc & 0x3f);
437 Assert(!((uc >> 12) & ~0xf));
438 uc++;
439 }
440 uc = 0xe000;
441 //RTPrintf(" %#x=%#x", i, uc);
442 while (uc < 0xfffe)
443 {
444 g_szAll[i++] = 0xe0 | (uc >> 12);
445 g_szAll[i++] = 0x80 | ((uc >> 6) & 0x3f);
446 g_szAll[i++] = 0x80 | (uc & 0x3f);
447 Assert(!((uc >> 12) & ~0xf));
448 uc++;
449 }
450 uc = 0x10000;
451 //RTPrintf(" %#x=%#x", i, uc);
452 while (uc < 0x110000)
453 {
454 g_szAll[i++] = 0xf0 | (uc >> 18);
455 g_szAll[i++] = 0x80 | ((uc >> 12) & 0x3f);
456 g_szAll[i++] = 0x80 | ((uc >> 6) & 0x3f);
457 g_szAll[i++] = 0x80 | (uc & 0x3f);
458 Assert(!((uc >> 18) & ~0x7));
459 uc++;
460 }
461 //RTPrintf(" %#x=%#x\n", i, uc);
462 g_szAll[i++] = '\0';
463 Assert(ELEMENTS(g_szAll) == i);
464}
465
466
467void test2(void)
468{
469 RTPrintf("tstUtf8: TEST 2\n");
470
471 /*
472 * Convert to UTF-8 and back.
473 */
474 RTPrintf("tstUtf8: #1: UTF-16 -> UTF-8 -> UTF-16...\n");
475 char *pszUtf8;
476 int rc = RTUtf16ToUtf8(&g_wszAll[0], &pszUtf8);
477 if (rc == VINF_SUCCESS)
478 {
479 if (mymemcmp(pszUtf8, g_szAll, sizeof(g_szAll), 8))
480 {
481 RTPrintf("tstUtf8: FAILURE - the full #1: UTF-16 -> UTF-8 mismatch!\n");
482 g_cErrors++;
483 }
484
485 PRTUTF16 puszUcs2;
486 rc = RTStrToUtf16(pszUtf8, &puszUcs2);
487 if (rc == VINF_SUCCESS)
488 {
489 if (mymemcmp(puszUcs2, g_wszAll, sizeof(g_wszAll), 16))
490 {
491 RTPrintf("tstUtf8: FAILURE - the full #1: UTF-8 -> UTF-16 failed compare!\n");
492 g_cErrors++;
493 }
494 RTUtf16Free(puszUcs2);
495 }
496 else
497 {
498 RTPrintf("tstUtf8: FAILURE - the full #1: UTF-8 -> UTF-16 failed, rc=%Rrc.\n", rc);
499 g_cErrors++;
500 }
501 RTStrFree(pszUtf8);
502 }
503 else
504 {
505 RTPrintf("tstUtf8: FAILURE - the full #1: UTF-16 -> UTF-8 failed, rc=%Rrc.\n", rc);
506 g_cErrors++;
507 }
508
509
510 /*
511 * Convert to UTF-16 and back. (just in case the above test fails)
512 */
513 RTPrintf("tstUtf8: #2: UTF-8 -> UTF-16 -> UTF-8...\n");
514 PRTUTF16 puszUcs2;
515 rc = RTStrToUtf16(&g_szAll[0], &puszUcs2);
516 if (rc == VINF_SUCCESS)
517 {
518 if (mymemcmp(puszUcs2, g_wszAll, sizeof(g_wszAll), 16))
519 {
520 RTPrintf("tstUtf8: FAILURE - the full #2: UTF-8 -> UTF-16 failed compare!\n");
521 g_cErrors++;
522 }
523
524 char *pszUtf8;
525 rc = RTUtf16ToUtf8(puszUcs2, &pszUtf8);
526 if (rc == VINF_SUCCESS)
527 {
528 if (mymemcmp(pszUtf8, g_szAll, sizeof(g_szAll), 8))
529 {
530 RTPrintf("tstUtf8: FAILURE - the full #2: UTF-16 -> UTF-8 failed compare!\n");
531 g_cErrors++;
532 }
533 RTStrFree(pszUtf8);
534 }
535 else
536 {
537 RTPrintf("tstUtf8: FAILURE - the full #2: UTF-16 -> UTF-8 failed, rc=%Rrc.\n", rc);
538 g_cErrors++;
539 }
540 RTStrUcs2Free(puszUcs2);
541 }
542 else
543 {
544 RTPrintf("tstUtf8: FAILURE - the full #2: UTF-8 -> UTF-16 failed, rc=%Rrc.\n", rc);
545 g_cErrors++;
546 }
547
548 /*
549 * Convert UTF-8 to CPs.
550 */
551 PRTUNICP paCps;
552 rc = RTStrToUni(g_szAll, &paCps);
553 if (rc == VINF_SUCCESS)
554 {
555 if (mymemcmp(paCps, g_uszAll, sizeof(g_uszAll), 32))
556 {
557 RTPrintf("tstUtf8: FAILURE - the full #2: UTF-8 -> UTF-16 failed, rc=%Rrc.\n", rc);
558 g_cErrors++;
559 }
560
561 size_t cCps;
562 rc = RTStrToUniEx(g_szAll, RTSTR_MAX, &paCps, ELEMENTS(g_uszAll), &cCps);
563 if (rc == VINF_SUCCESS)
564 {
565 if (cCps != ELEMENTS(g_uszAll) - 1)
566 {
567 RTPrintf("tstUtf8: FAILURE - the full #3+: wrong Code Point count %zu, expected %zu\n", cCps, ELEMENTS(g_uszAll) - 1);
568 g_cErrors++;
569 }
570 }
571 else
572 {
573 RTPrintf("tstUtf8: FAILURE - the full #3+: UTF-8 -> Code Points failed, rc=%Rrc.\n", rc);
574 g_cErrors++;
575 }
576
577 /** @todo RTCpsToUtf8 or something. */
578 }
579 else
580 {
581 RTPrintf("tstUtf8: FAILURE - the full #3a: UTF-8 -> Code Points failed, rc=%Rrc.\n", rc);
582 g_cErrors++;
583 }
584
585 /*
586 * Check the various string lengths.
587 */
588 size_t cuc1 = RTStrCalcUtf16Len(g_szAll);
589 size_t cuc2 = RTUtf16Len(g_wszAll);
590 if (cuc1 != cuc2)
591 {
592 RTPrintf("tstUtf8: FAILURE - cuc1=%zu != cuc2=%zu\n", cuc1, cuc2);
593 g_cErrors++;
594 }
595 //size_t cuc3 = RTUniLen(g_uszAll);
596
597
598 /*
599 * Enumerate the strings.
600 */
601 char *pszPut1Base = (char *)RTMemAlloc(sizeof(g_szAll));
602 AssertRelease(pszPut1Base);
603 char *pszPut1 = pszPut1Base;
604 PRTUTF16 pwszPut2Base = (PRTUTF16)RTMemAlloc(sizeof(g_wszAll));
605 AssertRelease(pwszPut2Base);
606 PRTUTF16 pwszPut2 = pwszPut2Base;
607 const char *psz1 = g_szAll;
608 const char *psz2 = g_szAll;
609 PCRTUTF16 pwsz3 = g_wszAll;
610 PCRTUTF16 pwsz4 = g_wszAll;
611 for (;;)
612 {
613 /*
614 * getters
615 */
616 RTUNICP uc1;
617 rc = RTStrGetCpEx(&psz1, &uc1);
618 if (RT_FAILURE(rc))
619 {
620 RTPrintf("tstUtf8: FAILURE - RTStrGetCpEx failed with rc=%Rrc at %.10Rhxs\n", rc, psz2);
621 whereami(8, psz2 - &g_szAll[0]);
622 g_cErrors++;
623 break;
624 }
625 char *pszPrev1 = RTStrPrevCp(g_szAll, psz1);
626 if (pszPrev1 != psz2)
627 {
628 RTPrintf("tstUtf8: FAILURE - RTStrPrevCp returned %p expected %p!\n", pszPrev1, psz2);
629 whereami(8, psz2 - &g_szAll[0]);
630 g_cErrors++;
631 break;
632 }
633 RTUNICP uc2 = RTStrGetCp(psz2);
634 if (uc2 != uc1)
635 {
636 RTPrintf("tstUtf8: FAILURE - RTStrGetCpEx and RTStrGetCp returned different CPs: %RTunicp != %RTunicp\n", uc2, uc1);
637 whereami(8, psz2 - &g_szAll[0]);
638 g_cErrors++;
639 break;
640 }
641 psz2 = RTStrNextCp(psz2);
642 if (psz2 != psz1)
643 {
644 RTPrintf("tstUtf8: FAILURE - RTStrGetCpEx and RTStrGetNext returned different next pointer!\n");
645 whereami(8, psz2 - &g_szAll[0]);
646 g_cErrors++;
647 break;
648 }
649
650 RTUNICP uc3;
651 rc = RTUtf16GetCpEx(&pwsz3, &uc3);
652 if (RT_FAILURE(rc))
653 {
654 RTPrintf("tstUtf8: FAILURE - RTUtf16GetCpEx failed with rc=%Rrc at %.10Rhxs\n", rc, pwsz4);
655 whereami(16, pwsz4 - &g_wszAll[0]);
656 g_cErrors++;
657 break;
658 }
659 if (uc3 != uc2)
660 {
661 RTPrintf("tstUtf8: FAILURE - RTUtf16GetCpEx and RTStrGetCp returned different CPs: %RTunicp != %RTunicp\n", uc3, uc2);
662 whereami(16, pwsz4 - &g_wszAll[0]);
663 g_cErrors++;
664 break;
665 }
666 RTUNICP uc4 = RTUtf16GetCp(pwsz4);
667 if (uc3 != uc4)
668 {
669 RTPrintf("tstUtf8: FAILURE - RTUtf16GetCpEx and RTUtf16GetCp returned different CPs: %RTunicp != %RTunicp\n", uc3, uc4);
670 whereami(16, pwsz4 - &g_wszAll[0]);
671 g_cErrors++;
672 break;
673 }
674 pwsz4 = RTUtf16NextCp(pwsz4);
675 if (pwsz4 != pwsz3)
676 {
677 RTPrintf("tstUtf8: FAILURE - RTUtf16GetCpEx and RTUtf16GetNext returned different next pointer!\n");
678 whereami(8, pwsz4 - &g_wszAll[0]);
679 g_cErrors++;
680 break;
681 }
682
683
684 /*
685 * putters
686 */
687 pszPut1 = RTStrPutCp(pszPut1, uc1);
688 if (pszPut1 - pszPut1Base != psz1 - &g_szAll[0])
689 {
690 RTPrintf("tstUtf8: FAILURE - RTStrPutCp is not at the same offset! %p != %p\n",
691 pszPut1 - pszPut1Base, psz1 - &g_szAll[0]);
692 whereami(8, psz2 - &g_szAll[0]);
693 g_cErrors++;
694 break;
695 }
696
697 pwszPut2 = RTUtf16PutCp(pwszPut2, uc3);
698 if (pwszPut2 - pwszPut2Base != pwsz3 - &g_wszAll[0])
699 {
700 RTPrintf("tstUtf8: FAILURE - RTStrPutCp is not at the same offset! %p != %p\n",
701 pwszPut2 - pwszPut2Base, pwsz3 - &g_wszAll[0]);
702 whereami(8, pwsz4 - &g_wszAll[0]);
703 g_cErrors++;
704 break;
705 }
706
707
708 /* the end? */
709 if (!uc1)
710 break;
711 }
712
713 /* check output if we seems to have made it thru it all. */
714 if (psz2 == &g_szAll[sizeof(g_szAll)])
715 {
716 if (mymemcmp(pszPut1Base, g_szAll, sizeof(g_szAll), 8))
717 {
718 RTPrintf("tstUtf8: FAILURE - RTStrPutCp encoded the string incorrectly.\n");
719 g_cErrors++;
720 }
721 if (mymemcmp(pwszPut2Base, g_wszAll, sizeof(g_wszAll), 16))
722 {
723 RTPrintf("tstUtf8: FAILURE - RTUtf16PutCp encoded the string incorrectly.\n");
724 g_cErrors++;
725 }
726 }
727
728 RTMemFree(pszPut1Base);
729 RTMemFree(pwszPut2Base);
730}
731
732
733/**
734 * Check case insensitivity.
735 */
736void test3(void)
737{
738 RTPrintf("tstUtf8: TEST 3\n");
739
740 if ( RTUniCpToLower('a') != 'a'
741 || RTUniCpToLower('A') != 'a'
742 || RTUniCpToLower('b') != 'b'
743 || RTUniCpToLower('B') != 'b'
744 || RTUniCpToLower('Z') != 'z'
745 || RTUniCpToLower('z') != 'z'
746 || RTUniCpToUpper('c') != 'C'
747 || RTUniCpToUpper('C') != 'C'
748 || RTUniCpToUpper('z') != 'Z'
749 || RTUniCpToUpper('Z') != 'Z')
750 {
751 RTPrintf("tstUtf8: FAILURE - RTUniToUpper/Lower failed basic tests.\n");
752 g_cErrors++;
753 }
754
755 if (RTUtf16ICmp(g_wszAll, g_wszAll))
756 {
757 RTPrintf("tstUtf8: FAILURE - RTUtf16ICmp failed the basic test.\n");
758 g_cErrors++;
759 }
760
761 if (RTUtf16Cmp(g_wszAll, g_wszAll))
762 {
763 RTPrintf("tstUtf8: FAILURE - RTUtf16Cmp failed the basic test.\n");
764 g_cErrors++;
765 }
766
767 static RTUTF16 s_wszTst1a[] = { 'a', 'B', 'c', 'D', 'E', 'f', 'g', 'h', 'i', 'j', 'K', 'L', 'm', 'N', 'o', 'P', 'q', 'r', 'S', 't', 'u', 'V', 'w', 'x', 'Y', 'Z', 0xc5, 0xc6, 0xf8, 0 };
768 static RTUTF16 s_wszTst1b[] = { 'A', 'B', 'c', 'd', 'e', 'F', 'G', 'h', 'i', 'J', 'k', 'l', 'M', 'n', 'O', 'p', 'Q', 'R', 's', 't', 'U', 'v', 'w', 'X', 'y', 'z', 0xe5, 0xe6, 0xd8, 0 };
769 if ( RTUtf16ICmp(s_wszTst1b, s_wszTst1b)
770 || RTUtf16ICmp(s_wszTst1a, s_wszTst1a)
771 || RTUtf16ICmp(s_wszTst1a, s_wszTst1b)
772 || RTUtf16ICmp(s_wszTst1b, s_wszTst1a)
773 )
774 {
775 RTPrintf("tstUtf8: FAILURE - RTUtf16ICmp failed the alphabet test.\n");
776 g_cErrors++;
777 }
778
779 if ( RTUtf16Cmp(s_wszTst1b, s_wszTst1b)
780 || RTUtf16Cmp(s_wszTst1a, s_wszTst1a)
781 || !RTUtf16Cmp(s_wszTst1a, s_wszTst1b)
782 || !RTUtf16Cmp(s_wszTst1b, s_wszTst1a)
783 )
784 {
785 RTPrintf("tstUtf8: FAILURE - RTUtf16Cmp failed the alphabet test.\n");
786 g_cErrors++;
787 }
788}
789
790
791/**
792 * Benchmark stuff.
793 */
794void Benchmarks(void)
795{
796 RTPrintf("tstUtf8: BENCHMARKS\n");
797 static union
798 {
799 RTUTF16 wszBuf[sizeof(g_wszAll)];
800 char szBuf[sizeof(g_szAll)];
801 } s_Buf;
802
803 PRTUTF16 pwsz = &s_Buf.wszBuf[0];
804 int rc = RTStrToUtf16Ex(&g_szAll[0], RTSTR_MAX, &pwsz, ELEMENTS(s_Buf.wszBuf), NULL);
805 if (RT_SUCCESS(rc))
806 {
807 int i;
808 uint64_t u64Start = RTTimeNanoTS();
809 for (i = 0; i < 100; i++)
810 {
811 rc = RTStrToUtf16Ex(&g_szAll[0], RTSTR_MAX, &pwsz, ELEMENTS(s_Buf.wszBuf), NULL);
812 if (RT_FAILURE(rc))
813 {
814 RTPrintf("tstUtf8: UTF-8 -> UTF-16 benchmark failed at i=%d, rc=%Rrc\n", i, rc);
815 break;
816 }
817 }
818 uint64_t u64Elapsed = RTTimeNanoTS() - u64Start;
819 RTPrintf("tstUtf8: UTF-8 -> UTF-16: %d in %RI64ns\n", i, u64Elapsed);
820 }
821
822 char *psz = &s_Buf.szBuf[0];
823 rc = RTUtf16ToUtf8Ex(&g_wszAll[0], RTSTR_MAX, &psz, ELEMENTS(s_Buf.szBuf), NULL);
824 if (RT_SUCCESS(rc))
825 {
826 int i;
827 uint64_t u64Start = RTTimeNanoTS();
828 for (i = 0; i < 100; i++)
829 {
830 rc = RTUtf16ToUtf8Ex(&g_wszAll[0], RTSTR_MAX, &psz, ELEMENTS(s_Buf.szBuf), NULL);
831 if (RT_FAILURE(rc))
832 {
833 RTPrintf("tstUtf8: UTF-16 -> UTF-8 benchmark failed at i=%d, rc=%Rrc\n", i, rc);
834 break;
835 }
836 }
837 uint64_t u64Elapsed = RTTimeNanoTS() - u64Start;
838 RTPrintf("tstUtf8: UTF-16 -> UTF-8: %d in %RI64ns\n", i, u64Elapsed);
839 }
840
841}
842
843
844int main()
845{
846 RTR3Init(false);
847
848 InitStrings();
849 test1();
850 test2();
851 test3();
852 Benchmarks();
853
854 /*
855 * Summary
856 */
857 if (!g_cErrors)
858 RTPrintf("tstUtf8: SUCCESS\n");
859 else
860 RTPrintf("tstUtf8: FAILURE - %d errors!\n", g_cErrors);
861
862 return !!g_cErrors;
863}
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette