tstUtf8.cpp@ 102335

Last change on this file since 102335 was 99775, checked in by vboxsync, 19 months ago
*: Mark functions as static if not used outside of a given compilation unit. Enables the compiler to optimize inlining, reduces the symbol tables, exposes unused functions and in some rare cases exposes mismtaches between function declarations and definitions, but most importantly reduces the number of parfait reports for the extern-function-no-forward-declaration category. This should not result in any functional changes, bugref:3409
Property svn:eol-style set to `native` Property svn:keywords set to `Id Revision`
File size: 58.9 KB

Line
1	/* $Id: tstUtf8.cpp 99775 2023-05-12 12:21:58Z vboxsync $ */
2	/** @file
3	* IPRT Testcase - UTF-8 and UTF-16 string conversions.
4	*/
5
6	/*
7	* Copyright (C) 2006-2023 Oracle and/or its affiliates.
8	*
9	* This file is part of VirtualBox base platform packages, as
10	* available from https://www.virtualbox.org.
11	*
12	* This program is free software; you can redistribute it and/or
13	* modify it under the terms of the GNU General Public License
14	* as published by the Free Software Foundation, in version 3 of the
15	* License.
16	*
17	* This program is distributed in the hope that it will be useful, but
18	* WITHOUT ANY WARRANTY; without even the implied warranty of
19	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20	* General Public License for more details.
21	*
22	* You should have received a copy of the GNU General Public License
23	* along with this program; if not, see <https://www.gnu.org/licenses>.
24	*
25	* The contents of this file may alternatively be used under the terms
26	* of the Common Development and Distribution License Version 1.0
27	* (CDDL), a copy of it is provided in the "COPYING.CDDL" file included
28	* in the VirtualBox distribution, in which case the provisions of the
29	* CDDL are applicable instead of those of the GPL.
30	*
31	* You may elect to license modified versions of this file under the
32	* terms and conditions of either the GPL or the CDDL or both.
33	*
34	* SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0
35	*/
36
37
38	/*********************************************************************************************************************************
39	* Header Files *
40	*********************************************************************************************************************************/
41	#include <iprt/string.h>
42	#include <iprt/latin1.h>
43	#include <iprt/utf16.h>
44
45	#include <iprt/alloc.h>
46	#include <iprt/assert.h>
47	#include <iprt/env.h>
48	#include <iprt/err.h>
49	#include <iprt/rand.h>
50	#include <iprt/stream.h>
51	#include <iprt/test.h>
52	#include <iprt/time.h>
53	#include <iprt/uni.h>
54	#include <iprt/uuid.h>
55
56	#ifdef RT_OS_WINDOWS
57	# include <iprt/win/windows.h> /* For GetACP(). */
58	#endif
59
60
61	/**
62	* Generate a random codepoint for simple UTF-16 encoding.
63	*/
64	static RTUTF16 GetRandUtf16(void)
65	{
66	RTUTF16 wc;
67	do
68	{
69	wc = (RTUTF16)RTRandU32Ex(1, 0xfffd);
70	} while (wc >= 0xd800 && wc <= 0xdfff);
71	return wc;
72	}
73
74
75	/**
76	*
77	*/
78	static void test1(RTTEST hTest)
79	{
80	static const char s_szBadString1[] = "Bad \xe0\x13\x0";
81	static const char s_szBadString2[] = "Bad \xef\xbf\xc3";
82	int rc;
83	char *pszUtf8;
84	char *pszCurrent;
85	PRTUTF16 pwsz;
86	PRTUTF16 pwszRand;
87
88	/*
89	* Invalid UTF-8 to UCS-2 test.
90	*/
91	RTTestSub(hTest, "Feeding bad UTF-8 to RTStrToUtf16");
92	rc = RTStrToUtf16(s_szBadString1, &pwsz);
93	RTTEST_CHECK_MSG(hTest, rc == VERR_NO_TRANSLATION \|\| rc == VERR_INVALID_UTF8_ENCODING,
94	(hTest, "Conversion of first bad UTF-8 string to UTF-16 apparently succeeded. It shouldn't. rc=%Rrc\n", rc));
95	rc = RTStrToUtf16(s_szBadString2, &pwsz);
96	RTTEST_CHECK_MSG(hTest, rc == VERR_NO_TRANSLATION \|\| rc == VERR_INVALID_UTF8_ENCODING,
97	(hTest, "Conversion of second bad UTF-8 strings to UTF-16 apparently succeeded. It shouldn't. rc=%Rrc\n", rc));
98
99	/*
100	* Test current CP conversion.
101	*/
102	RTTestSub(hTest, "Rand UTF-16 -> UTF-8 -> CP -> UTF-8");
103	pwszRand = (PRTUTF16)RTMemAlloc(31 * sizeof(*pwsz));
104	for (int i = 0; i < 30; i++)
105	pwszRand[i] = GetRandUtf16();
106	pwszRand[30] = 0;
107
108	rc = RTUtf16ToUtf8(pwszRand, &pszUtf8);
109	if (rc == VINF_SUCCESS)
110	{
111	rc = RTStrUtf8ToCurrentCP(&pszCurrent, pszUtf8);
112	if (rc == VINF_SUCCESS)
113	{
114	RTStrFree(pszUtf8);
115	rc = RTStrCurrentCPToUtf8(&pszUtf8, pszCurrent);
116	if (rc == VINF_SUCCESS)
117	RTTestPassed(hTest, "Random UTF-16 -> UTF-8 -> Current -> UTF-8 successful.\n");
118	else
119	RTTestFailed(hTest, "%d: The third part of random UTF-16 -> UTF-8 -> Current -> UTF-8 failed with return value %Rrc.",
120	__LINE__, rc);
121	if (RT_SUCCESS(rc))
122	RTStrFree(pszUtf8);
123	RTStrFree(pszCurrent);
124	}
125	else
126	{
127	if (rc == VERR_NO_TRANSLATION)
128	RTTestPassed(hTest, "The second part of random UTF-16 -> UTF-8 -> Current -> UTF-8 returned VERR_NO_TRANSLATION. This is probably as it should be.\n");
129	else if (rc == VWRN_NO_TRANSLATION)
130	RTTestPassed(hTest, "The second part of random UTF-16 -> UTF-8 -> Current -> UTF-8 returned VWRN_NO_TRANSLATION. This is probably as it should be.\n");
131	else
132	RTTestFailed(hTest, "%d: The second part of random UTF-16 -> UTF-8 -> Current -> UTF-8 failed with return value %Rrc.",
133	__LINE__, rc);
134	if (RT_SUCCESS(rc))
135	RTStrFree(pszCurrent);
136	RTStrFree(pszUtf8);
137	}
138	}
139	else
140	RTTestFailed(hTest, "%d: The first part of random UTF-16 -> UTF-8 -> Current -> UTF-8 failed with return value %Rrc.",
141	__LINE__, rc);
142	RTMemFree(pwszRand);
143
144	/*
145	* Generate a new random string.
146	*/
147	RTTestSub(hTest, "Random UTF-16 -> UTF-8 -> UTF-16");
148	pwszRand = (PRTUTF16)RTMemAlloc(31 * sizeof(*pwsz));
149	for (int i = 0; i < 30; i++)
150	pwszRand[i] = GetRandUtf16();
151	pwszRand[30] = 0;
152	rc = RTUtf16ToUtf8(pwszRand, &pszUtf8);
153	if (rc == VINF_SUCCESS)
154	{
155	rc = RTStrToUtf16(pszUtf8, &pwsz);
156	if (rc == VINF_SUCCESS)
157	{
158	int i;
159	for (i = 0; pwszRand[i] == pwsz[i] && pwsz[i] != 0; i++)
160	/* nothing */;
161	if (pwszRand[i] == pwsz[i] && pwsz[i] == 0)
162	RTTestPassed(hTest, "Random UTF-16 -> UTF-8 -> UTF-16 successful.\n");
163	else
164	{
165	RTTestFailed(hTest, "%d: The second part of random UTF-16 -> UTF-8 -> UTF-16 failed.", __LINE__);
166	RTTestPrintf(hTest, RTTESTLVL_FAILURE, "First differing character is at position %d and has the value %x.\n", i, pwsz[i]);
167	}
168	RTUtf16Free(pwsz);
169	}
170	else
171	RTTestFailed(hTest, "%d: The second part of random UTF-16 -> UTF-8 -> UTF-16 failed with return value %Rrc.",
172	__LINE__, rc);
173	RTStrFree(pszUtf8);
174	}
175	else
176	RTTestFailed(hTest, "%d: The first part of random UTF-16 -> UTF-8 -> UTF-16 failed with return value %Rrc.",
177	__LINE__, rc);
178	RTMemFree(pwszRand);
179
180	/*
181	* Generate yet another random string and convert it to a buffer.
182	*/
183	RTTestSub(hTest, "Random RTUtf16ToUtf8Ex + RTStrToUtf16");
184	pwszRand = (PRTUTF16)RTMemAlloc(31 * sizeof(*pwsz));
185	for (int i = 0; i < 30; i++)
186	pwszRand[i] = GetRandUtf16();
187	pwszRand[30] = 0;
188
189	char szUtf8Array[120];
190	char *pszUtf8Array = szUtf8Array;
191	rc = RTUtf16ToUtf8Ex(pwszRand, RTSTR_MAX, &pszUtf8Array, 120, NULL);
192	if (rc == 0)
193	{
194	rc = RTStrToUtf16(pszUtf8Array, &pwsz);
195	if (rc == 0)
196	{
197	int i;
198	for (i = 0; pwszRand[i] == pwsz[i] && pwsz[i] != 0; i++)
199	;
200	if (pwsz[i] == 0 && i >= 8)
201	RTTestPassed(hTest, "Random UTF-16 -> fixed length UTF-8 -> UTF-16 successful.\n");
202	else
203	{
204	RTTestFailed(hTest, "%d: Incorrect conversion of UTF-16 -> fixed length UTF-8 -> UTF-16.\n", __LINE__);
205	RTTestPrintf(hTest, RTTESTLVL_FAILURE, "First differing character is at position %d and has the value %x.\n", i, pwsz[i]);
206	}
207	RTUtf16Free(pwsz);
208	}
209	else
210	RTTestFailed(hTest, "%d: The second part of random UTF-16 -> fixed length UTF-8 -> UTF-16 failed with return value %Rrc.\n", __LINE__, rc);
211	}
212	else
213	RTTestFailed(hTest, "%d: The first part of random UTF-16 -> fixed length UTF-8 -> UTF-16 failed with return value %Rrc.\n", __LINE__, rc);
214	RTMemFree(pwszRand);
215
216	/*
217	* And again.
218	*/
219	RTTestSub(hTest, "Random RTUtf16ToUtf8 + RTStrToUtf16Ex");
220	pwszRand = (PRTUTF16)RTMemAlloc(31 * sizeof(*pwsz));
221	for (int i = 0; i < 30; i++)
222	pwszRand[i] = GetRandUtf16();
223	pwszRand[30] = 0;
224
225	RTUTF16 wszBuf[70];
226	PRTUTF16 pwsz2Buf = wszBuf;
227	rc = RTUtf16ToUtf8(pwszRand, &pszUtf8);
228	if (rc == 0)
229	{
230	rc = RTStrToUtf16Ex(pszUtf8, RTSTR_MAX, &pwsz2Buf, 70, NULL);
231	if (rc == 0)
232	{
233	int i;
234	for (i = 0; pwszRand[i] == pwsz2Buf[i] && pwsz2Buf[i] != 0; i++)
235	;
236	if (pwszRand[i] == 0 && pwsz2Buf[i] == 0)
237	RTTestPassed(hTest, "Random UTF-16 -> UTF-8 -> fixed length UTF-16 successful.\n");
238	else
239	{
240	RTTestFailed(hTest, "%d: Incorrect conversion of random UTF-16 -> UTF-8 -> fixed length UTF-16.\n", __LINE__);
241	RTTestPrintf(hTest, RTTESTLVL_FAILURE, "First differing character is at position %d and has the value %x.\n", i, pwsz2Buf[i]);
242	}
243	}
244	else
245	RTTestFailed(hTest, "%d: The second part of random UTF-16 -> UTF-8 -> fixed length UTF-16 failed with return value %Rrc.\n", __LINE__, rc);
246	RTStrFree(pszUtf8);
247	}
248	else
249	RTTestFailed(hTest, "%d: The first part of random UTF-16 -> UTF-8 -> fixed length UTF-16 failed with return value %Rrc.\n",
250	__LINE__, rc);
251	RTMemFree(pwszRand);
252
253	pwszRand = (PRTUTF16)RTMemAlloc(31 * sizeof(*pwsz));
254	for (int i = 0; i < 30; i++)
255	pwszRand[i] = GetRandUtf16();
256	pwszRand[30] = 0;
257
258	rc = RTUtf16ToUtf8Ex(pwszRand, RTSTR_MAX, &pszUtf8Array, 20, NULL);
259	if (rc == VERR_BUFFER_OVERFLOW)
260	RTTestPassed(hTest, "Random UTF-16 -> fixed length UTF-8 with too short buffer successfully rejected.\n");
261	else
262	RTTestFailed(hTest, "%d: Random UTF-16 -> fixed length UTF-8 with too small buffer returned value %d instead of VERR_BUFFER_OVERFLOW.\n",
263	__LINE__, rc);
264	RTMemFree(pwszRand);
265
266	/*
267	* last time...
268	*/
269	RTTestSub(hTest, "Random RTUtf16ToUtf8 + RTStrToUtf16Ex");
270	pwszRand = (PRTUTF16)RTMemAlloc(31 * sizeof(*pwsz));
271	for (int i = 0; i < 30; i++)
272	pwszRand[i] = GetRandUtf16();
273	pwszRand[30] = 0;
274
275	rc = RTUtf16ToUtf8(pwszRand, &pszUtf8);
276	if (rc == VINF_SUCCESS)
277	{
278	rc = RTStrToUtf16Ex(pszUtf8, RTSTR_MAX, &pwsz2Buf, 20, NULL);
279	if (rc == VERR_BUFFER_OVERFLOW)
280	RTTestPassed(hTest, "Random UTF-16 -> UTF-8 -> fixed length UTF-16 with too short buffer successfully rejected.\n");
281	else
282	RTTestFailed(hTest, "%d: The second part of random UTF-16 -> UTF-8 -> fixed length UTF-16 with too short buffer returned value %Rrc instead of VERR_BUFFER_OVERFLOW.\n",
283	__LINE__, rc);
284	RTStrFree(pszUtf8);
285	}
286	else
287	RTTestFailed(hTest, "%d:The first part of random UTF-16 -> UTF-8 -> fixed length UTF-16 failed with return value %Rrc.\n",
288	__LINE__, rc);
289	RTMemFree(pwszRand);
290
291	RTTestSubDone(hTest);
292	}
293
294
295	static RTUNICP g_uszAll[0x110000 - 1 - 0x800 - 2 + 1];
296	static RTUTF16 g_wszAll[0xfffe - (0xe000 - 0xd800) + (0x110000 - 0x10000) * 2];
297	static char g_szAll[0x7f + (0x800 - 0x80) * 2 + (0xfffe - 0x800 - (0xe000 - 0xd800))* 3 + (0x110000 - 0x10000) * 4 + 1];
298
299	static void whereami(int cBits, size_t off)
300	{
301	if (cBits == 8)
302	{
303	if (off < 0x7f)
304	RTTestPrintf(NIL_RTTEST, RTTESTLVL_FAILURE, "UTF-8 U+%#x\n", off + 1);
305	else if (off < 0xf7f)
306	RTTestPrintf(NIL_RTTEST, RTTESTLVL_FAILURE, "UTF-8 U+%#x\n", (off - 0x7f) / 2 + 0x80);
307	else if (off < 0x27f7f)
308	RTTestPrintf(NIL_RTTEST, RTTESTLVL_FAILURE, "UTF-8 U+%#x\n", (off - 0xf7f) / 3 + 0x800);
309	else if (off < 0x2df79)
310	RTTestPrintf(NIL_RTTEST, RTTESTLVL_FAILURE, "UTF-8 U+%#x\n", (off - 0x27f7f) / 3 + 0xe000);
311	else if (off < 0x42df79)
312	RTTestPrintf(NIL_RTTEST, RTTESTLVL_FAILURE, "UTF-8 U+%#x\n", (off - 0x2df79) / 4 + 0x10000);
313	else
314	RTTestPrintf(NIL_RTTEST, RTTESTLVL_FAILURE, "UTF-8 ???\n");
315	}
316	else if (cBits == 16)
317	{
318	if (off < 0xd7ff*2)
319	RTTestPrintf(NIL_RTTEST, RTTESTLVL_FAILURE, "UTF-16 U+%#x\n", off / 2 + 1);
320	else if (off < 0xf7fd*2)
321	RTTestPrintf(NIL_RTTEST, RTTESTLVL_FAILURE, "UTF-16 U+%#x\n", (off - 0xd7ff*2) / 2 + 0xe000);
322	else if (off < 0x20f7fd)
323	RTTestPrintf(NIL_RTTEST, RTTESTLVL_FAILURE, "UTF-16 U+%#x\n", (off - 0xf7fd*2) / 4 + 0x10000);
324	else
325	RTTestPrintf(NIL_RTTEST, RTTESTLVL_FAILURE, "UTF-16 ???\n");
326	}
327	else
328	{
329	if (off < (0xd800 - 1) * sizeof(RTUNICP))
330	RTTestPrintf(NIL_RTTEST, RTTESTLVL_FAILURE, "RTUNICP U+%#x\n", off / sizeof(RTUNICP) + 1);
331	else if (off < (0xfffe - 0x800 - 1) * sizeof(RTUNICP))
332	RTTestPrintf(NIL_RTTEST, RTTESTLVL_FAILURE, "RTUNICP U+%#x\n", off / sizeof(RTUNICP) + 0x800 + 1);
333	else
334	RTTestPrintf(NIL_RTTEST, RTTESTLVL_FAILURE, "RTUNICP U+%#x\n", off / sizeof(RTUNICP) + 0x800 + 1 + 2);
335	}
336	}
337
338	static int mymemcmp(const void pv1, const void pv2, size_t cb, int cBits)
339	{
340	const uint8_t pb1 = (const uint8_t )pv1;
341	const uint8_t pb2 = (const uint8_t )pv2;
342	for (size_t off = 0; off < cb; off++)
343	{
344	if (pb1[off] != pb2[off])
345	{
346	RTTestPrintf(NIL_RTTEST, RTTESTLVL_FAILURE, "mismatch at %#x: ", off);
347	whereami(cBits, off);
348	if (off > 0)
349	RTTestPrintf(NIL_RTTEST, RTTESTLVL_FAILURE, " %#x: %02x != %02x!\n", off-1, pb1[off-1], pb2[off-1]);
350	RTTestPrintf(NIL_RTTEST, RTTESTLVL_FAILURE, "*%#x: %02x != %02x!\n", off, pb1[off], pb2[off]);
351	for (size_t i = 1; i < 10; i++)
352	if (off + i < cb)
353	RTTestPrintf(NIL_RTTEST, RTTESTLVL_FAILURE, " %#x: %02x != %02x!\n", off+i, pb1[off+i], pb2[off+i]);
354	return 1;
355	}
356	}
357	return 0;
358	}
359
360
361	static void InitStrings()
362	{
363	/*
364	* Generate unicode string containing all the legal UTF-16 codepoints, both UTF-16 and UTF-8 version.
365	*/
366	/* the simple code point array first */
367	unsigned i = 0;
368	RTUNICP uc = 1;
369	while (uc < 0xd800)
370	g_uszAll[i++] = uc++;
371	uc = 0xe000;
372	while (uc < 0xfffe)
373	g_uszAll[i++] = uc++;
374	uc = 0x10000;
375	while (uc < 0x110000)
376	g_uszAll[i++] = uc++;
377	g_uszAll[i++] = 0;
378	Assert(RT_ELEMENTS(g_uszAll) == i);
379
380	/* the utf-16 one */
381	i = 0;
382	uc = 1;
383	//RTPrintf("tstUtf8: %#x=%#x", i, uc);
384	while (uc < 0xd800)
385	g_wszAll[i++] = uc++;
386	uc = 0xe000;
387	//RTPrintf(" %#x=%#x", i, uc);
388	while (uc < 0xfffe)
389	g_wszAll[i++] = uc++;
390	uc = 0x10000;
391	//RTPrintf(" %#x=%#x", i, uc);
392	while (uc < 0x110000)
393	{
394	g_wszAll[i++] = 0xd800 \| ((uc - 0x10000) >> 10);
395	g_wszAll[i++] = 0xdc00 \| ((uc - 0x10000) & 0x3ff);
396	uc++;
397	}
398	//RTPrintf(" %#x=%#x\n", i, uc);
399	g_wszAll[i++] = '\0';
400	Assert(RT_ELEMENTS(g_wszAll) == i);
401
402	/*
403	* The utf-8 one
404	*/
405	i = 0;
406	uc = 1;
407	//RTPrintf("tstUtf8: %#x=%#x", i, uc);
408	while (uc < 0x80)
409	g_szAll[i++] = uc++;
410	//RTPrintf(" %#x=%#x", i, uc);
411	while (uc < 0x800)
412	{
413	g_szAll[i++] = 0xc0 \| (uc >> 6);
414	g_szAll[i++] = 0x80 \| (uc & 0x3f);
415	Assert(!((uc >> 6) & ~0x1f));
416	uc++;
417	}
418	//RTPrintf(" %#x=%#x", i, uc);
419	while (uc < 0xd800)
420	{
421	g_szAll[i++] = 0xe0 \| (uc >> 12);
422	g_szAll[i++] = 0x80 \| ((uc >> 6) & 0x3f);
423	g_szAll[i++] = 0x80 \| (uc & 0x3f);
424	Assert(!((uc >> 12) & ~0xf));
425	uc++;
426	}
427	uc = 0xe000;
428	//RTPrintf(" %#x=%#x", i, uc);
429	while (uc < 0xfffe)
430	{
431	g_szAll[i++] = 0xe0 \| (uc >> 12);
432	g_szAll[i++] = 0x80 \| ((uc >> 6) & 0x3f);
433	g_szAll[i++] = 0x80 \| (uc & 0x3f);
434	Assert(!((uc >> 12) & ~0xf));
435	uc++;
436	}
437	uc = 0x10000;
438	//RTPrintf(" %#x=%#x", i, uc);
439	while (uc < 0x110000)
440	{
441	g_szAll[i++] = 0xf0 \| (uc >> 18);
442	g_szAll[i++] = 0x80 \| ((uc >> 12) & 0x3f);
443	g_szAll[i++] = 0x80 \| ((uc >> 6) & 0x3f);
444	g_szAll[i++] = 0x80 \| (uc & 0x3f);
445	Assert(!((uc >> 18) & ~0x7));
446	uc++;
447	}
448	//RTPrintf(" %#x=%#x\n", i, uc);
449	g_szAll[i++] = '\0';
450	Assert(RT_ELEMENTS(g_szAll) == i);
451	}
452
453
454	static void test2(RTTEST hTest)
455	{
456	/*
457	* Convert to UTF-8 and back.
458	*/
459	RTTestSub(hTest, "UTF-16 -> UTF-8 -> UTF-16");
460	char *pszUtf8;
461	int rc = RTUtf16ToUtf8(&g_wszAll[0], &pszUtf8);
462	if (rc == VINF_SUCCESS)
463	{
464	pszUtf8[0] = 1;
465	if (mymemcmp(pszUtf8, g_szAll, sizeof(g_szAll), 8))
466	RTTestFailed(hTest, "UTF-16 -> UTF-8 mismatch!");
467
468	PRTUTF16 pwszUtf16;
469	rc = RTStrToUtf16(pszUtf8, &pwszUtf16);
470	if (rc == VINF_SUCCESS)
471	{
472	if (mymemcmp(pwszUtf16, g_wszAll, sizeof(g_wszAll), 16))
473	RTTestFailed(hTest, "UTF-8 -> UTF-16 failed compare!");
474	RTUtf16Free(pwszUtf16);
475	}
476	else
477	RTTestFailed(hTest, "UTF-8 -> UTF-16 failed, rc=%Rrc.", rc);
478	RTStrFree(pszUtf8);
479	}
480	else
481	RTTestFailed(hTest, "UTF-16 -> UTF-8 failed, rc=%Rrc.", rc);
482
483
484	/*
485	* Convert to UTF-16 and back. (just in case the above test fails)
486	*/
487	RTTestSub(hTest, "UTF-8 -> UTF-16 -> UTF-8");
488	PRTUTF16 pwszUtf16;
489	rc = RTStrToUtf16(&g_szAll[0], &pwszUtf16);
490	if (rc == VINF_SUCCESS)
491	{
492	if (mymemcmp(pwszUtf16, g_wszAll, sizeof(g_wszAll), 16))
493	RTTestFailed(hTest, "UTF-8 -> UTF-16 failed compare!");
494
495	rc = RTUtf16ToUtf8(pwszUtf16, &pszUtf8);
496	if (rc == VINF_SUCCESS)
497	{
498	if (mymemcmp(pszUtf8, g_szAll, sizeof(g_szAll), 8))
499	RTTestFailed(hTest, "UTF-16 -> UTF-8 failed compare!");
500	RTStrFree(pszUtf8);
501	}
502	else
503	RTTestFailed(hTest, "UTF-16 -> UTF-8 failed, rc=%Rrc.", rc);
504	RTUtf16Free(pwszUtf16);
505	}
506	else
507	RTTestFailed(hTest, "UTF-8 -> UTF-16 failed, rc=%Rrc.", rc);
508
509	/*
510	* Convert UTF-8 to CPs.
511	*/
512	RTTestSub(hTest, "UTF-8 -> UNI -> UTF-8");
513	PRTUNICP paCps;
514	rc = RTStrToUni(g_szAll, &paCps);
515	if (rc == VINF_SUCCESS)
516	{
517	if (mymemcmp(paCps, g_uszAll, sizeof(g_uszAll), 32))
518	RTTestFailed(hTest, "UTF-8 -> UTF-16 failed, rc=%Rrc.", rc);
519
520	size_t cCps;
521	rc = RTStrToUniEx(g_szAll, RTSTR_MAX, &paCps, RT_ELEMENTS(g_uszAll), &cCps);
522	if (rc == VINF_SUCCESS)
523	{
524	if (cCps != RT_ELEMENTS(g_uszAll) - 1)
525	RTTestFailed(hTest, "wrong Code Point count %zu, expected %zu\n", cCps, RT_ELEMENTS(g_uszAll) - 1);
526	}
527	else
528	RTTestFailed(hTest, "UTF-8 -> Code Points failed, rc=%Rrc.\n", rc);
529
530	/** @todo RTCpsToUtf8 or something. */
531	RTUniFree(paCps);
532	}
533	else
534	RTTestFailed(hTest, "UTF-8 -> Code Points failed, rc=%Rrc.\n", rc);
535
536	/*
537	* Check the various string lengths.
538	*/
539	RTTestSub(hTest, "Lengths");
540	size_t cuc1 = RTStrCalcUtf16Len(g_szAll);
541	size_t cuc2 = RTUtf16Len(g_wszAll);
542	if (cuc1 != cuc2)
543	RTTestFailed(hTest, "cuc1=%zu != cuc2=%zu\n", cuc1, cuc2);
544	//size_t cuc3 = RTUniLen(g_uszAll);
545
546
547	/*
548	* Enumerate the strings.
549	*/
550	RTTestSub(hTest, "Code Point Getters and Putters");
551	char pszPut1Base = (char )RTMemAlloc(sizeof(g_szAll));
552	AssertRelease(pszPut1Base);
553	char *pszPut1 = pszPut1Base;
554	PRTUTF16 pwszPut2Base = (PRTUTF16)RTMemAlloc(sizeof(g_wszAll));
555	AssertRelease(pwszPut2Base);
556	PRTUTF16 pwszPut2 = pwszPut2Base;
557	const char *psz1 = g_szAll;
558	const char *psz2 = g_szAll;
559	PCRTUTF16 pwsz3 = g_wszAll;
560	PCRTUTF16 pwsz4 = g_wszAll;
561	for (;;)
562	{
563	/*
564	* getters
565	*/
566	RTUNICP uc1;
567	rc = RTStrGetCpEx(&psz1, &uc1);
568	if (RT_FAILURE(rc))
569	{
570	RTTestFailed(hTest, "RTStrGetCpEx failed with rc=%Rrc at %.10Rhxs", rc, psz2);
571	whereami(8, psz2 - &g_szAll[0]);
572	break;
573	}
574	char *pszPrev1 = RTStrPrevCp(g_szAll, psz1);
575	if (pszPrev1 != psz2)
576	{
577	RTTestFailed(hTest, "RTStrPrevCp returned %p expected %p!", pszPrev1, psz2);
578	whereami(8, psz2 - &g_szAll[0]);
579	break;
580	}
581	RTUNICP uc2 = RTStrGetCp(psz2);
582	if (uc2 != uc1)
583	{
584	RTTestFailed(hTest, "RTStrGetCpEx and RTStrGetCp returned different CPs: %RTunicp != %RTunicp", uc2, uc1);
585	whereami(8, psz2 - &g_szAll[0]);
586	break;
587	}
588	psz2 = RTStrNextCp(psz2);
589	if (psz2 != psz1)
590	{
591	RTTestFailed(hTest, "RTStrGetCpEx and RTStrGetNext returned different next pointer!");
592	whereami(8, psz2 - &g_szAll[0]);
593	break;
594	}
595
596	RTUNICP uc3;
597	rc = RTUtf16GetCpEx(&pwsz3, &uc3);
598	if (RT_FAILURE(rc))
599	{
600	RTTestFailed(hTest, "RTUtf16GetCpEx failed with rc=%Rrc at %.10Rhxs", rc, pwsz4);
601	whereami(16, pwsz4 - &g_wszAll[0]);
602	break;
603	}
604	if (uc3 != uc2)
605	{
606	RTTestFailed(hTest, "RTUtf16GetCpEx and RTStrGetCp returned different CPs: %RTunicp != %RTunicp", uc3, uc2);
607	whereami(16, pwsz4 - &g_wszAll[0]);
608	break;
609	}
610	RTUNICP uc4 = RTUtf16GetCp(pwsz4);
611	if (uc3 != uc4)
612	{
613	RTTestFailed(hTest, "RTUtf16GetCpEx and RTUtf16GetCp returned different CPs: %RTunicp != %RTunicp", uc3, uc4);
614	whereami(16, pwsz4 - &g_wszAll[0]);
615	break;
616	}
617	pwsz4 = RTUtf16NextCp(pwsz4);
618	if (pwsz4 != pwsz3)
619	{
620	RTTestFailed(hTest, "RTUtf16GetCpEx and RTUtf16GetNext returned different next pointer!");
621	whereami(8, pwsz4 - &g_wszAll[0]);
622	break;
623	}
624
625
626	/*
627	* putters
628	*/
629	pszPut1 = RTStrPutCp(pszPut1, uc1);
630	if (pszPut1 - pszPut1Base != psz1 - &g_szAll[0])
631	{
632	RTTestFailed(hTest, "RTStrPutCp is not at the same offset! %p != %p",
633	pszPut1 - pszPut1Base, psz1 - &g_szAll[0]);
634	whereami(8, psz2 - &g_szAll[0]);
635	break;
636	}
637
638	pwszPut2 = RTUtf16PutCp(pwszPut2, uc3);
639	if (pwszPut2 - pwszPut2Base != pwsz3 - &g_wszAll[0])
640	{
641	RTTestFailed(hTest, "RTStrPutCp is not at the same offset! %p != %p",
642	pwszPut2 - pwszPut2Base, pwsz3 - &g_wszAll[0]);
643	whereami(8, pwsz4 - &g_wszAll[0]);
644	break;
645	}
646
647
648	/* the end? */
649	if (!uc1)
650	break;
651	}
652
653	/* check output if we seems to have made it thru it all. */
654	if (psz2 == &g_szAll[sizeof(g_szAll)])
655	{
656	if (mymemcmp(pszPut1Base, g_szAll, sizeof(g_szAll), 8))
657	RTTestFailed(hTest, "RTStrPutCp encoded the string incorrectly.");
658	if (mymemcmp(pwszPut2Base, g_wszAll, sizeof(g_wszAll), 16))
659	RTTestFailed(hTest, "RTUtf16PutCp encoded the string incorrectly.");
660	}
661
662	RTMemFree(pszPut1Base);
663	RTMemFree(pwszPut2Base);
664
665	RTTestSubDone(hTest);
666	}
667
668
669	/**
670	* Check case insensitivity.
671	*/
672	static void test3(RTTEST hTest)
673	{
674	RTTestSub(hTest, "Case Sensitivity");
675
676	if ( RTUniCpToLower('a') != 'a'
677	\|\| RTUniCpToLower('A') != 'a'
678	\|\| RTUniCpToLower('b') != 'b'
679	\|\| RTUniCpToLower('B') != 'b'
680	\|\| RTUniCpToLower('Z') != 'z'
681	\|\| RTUniCpToLower('z') != 'z'
682	\|\| RTUniCpToUpper('c') != 'C'
683	\|\| RTUniCpToUpper('C') != 'C'
684	\|\| RTUniCpToUpper('z') != 'Z'
685	\|\| RTUniCpToUpper('Z') != 'Z')
686	RTTestFailed(hTest, "RTUniToUpper/Lower failed basic tests.\n");
687
688	if (RTUtf16ICmp(g_wszAll, g_wszAll))
689	RTTestFailed(hTest, "RTUtf16ICmp failed the basic test.\n");
690
691	if (RTUtf16Cmp(g_wszAll, g_wszAll))
692	RTTestFailed(hTest, "RTUtf16Cmp failed the basic test.\n");
693
694	static RTUTF16 s_wszTst1a[] = { 'a', 'B', 'c', 'D', 'E', 'f', 'g', 'h', 'i', 'j', 'K', 'L', 'm', 'N', 'o', 'P', 'q', 'r', 'S', 't', 'u', 'V', 'w', 'x', 'Y', 'Z', 0xc5, 0xc6, 0xf8, 0 };
695	static RTUTF16 s_wszTst1b[] = { 'A', 'B', 'c', 'd', 'e', 'F', 'G', 'h', 'i', 'J', 'k', 'l', 'M', 'n', 'O', 'p', 'Q', 'R', 's', 't', 'U', 'v', 'w', 'X', 'y', 'z', 0xe5, 0xe6, 0xd8, 0 };
696	if ( RTUtf16ICmp(s_wszTst1b, s_wszTst1b)
697	\|\| RTUtf16ICmp(s_wszTst1a, s_wszTst1a)
698	\|\| RTUtf16ICmp(s_wszTst1a, s_wszTst1b)
699	\|\| RTUtf16ICmp(s_wszTst1b, s_wszTst1a)
700	)
701	RTTestFailed(hTest, "RTUtf16ICmp failed the alphabet test.\n");
702
703	if ( RTUtf16Cmp(s_wszTst1b, s_wszTst1b)
704	\|\| RTUtf16Cmp(s_wszTst1a, s_wszTst1a)
705	\|\| !RTUtf16Cmp(s_wszTst1a, s_wszTst1b)
706	\|\| !RTUtf16Cmp(s_wszTst1b, s_wszTst1a)
707	)
708	RTTestFailed(hTest, "RTUtf16Cmp failed the alphabet test.\n");
709
710	RTTestSubDone(hTest);
711	}
712
713
714	/**
715	* Test the RTStr*Cmp functions.
716	*/
717	static void TstRTStrXCmp(RTTEST hTest)
718	{
719	#define CHECK_DIFF(expr, op) \
720	do \
721	{ \
722	int iDiff = expr; \
723	if (!(iDiff op 0)) \
724	RTTestFailed(hTest, "%d: %d " #op " 0: %s\n", __LINE__, iDiff, #expr); \
725	} while (0)
726
727	/** @todo test the non-ascii bits. */
728
729	RTTestSub(hTest, "RTStrCmp");
730	CHECK_DIFF(RTStrCmp(NULL, NULL), == );
731	CHECK_DIFF(RTStrCmp(NULL, ""), < );
732	CHECK_DIFF(RTStrCmp("", NULL), > );
733	CHECK_DIFF(RTStrCmp("", ""), == );
734	CHECK_DIFF(RTStrCmp("abcdef", "abcdef"), == );
735	CHECK_DIFF(RTStrCmp("abcdef", "abcde"), > );
736	CHECK_DIFF(RTStrCmp("abcde", "abcdef"), < );
737	CHECK_DIFF(RTStrCmp("abcdeg", "abcdef"), > );
738	CHECK_DIFF(RTStrCmp("abcdef", "abcdeg"), < );
739	CHECK_DIFF(RTStrCmp("abcdeF", "abcdef"), < );
740	CHECK_DIFF(RTStrCmp("abcdef", "abcdeF"), > );
741
742
743	RTTestSub(hTest, "RTStrNCmp");
744	CHECK_DIFF(RTStrNCmp(NULL, NULL, RTSTR_MAX), == );
745	CHECK_DIFF(RTStrNCmp(NULL, "", RTSTR_MAX), < );
746	CHECK_DIFF(RTStrNCmp("", NULL, RTSTR_MAX), > );
747	CHECK_DIFF(RTStrNCmp("", "", RTSTR_MAX), == );
748	CHECK_DIFF(RTStrNCmp("abcdef", "abcdef", RTSTR_MAX), == );
749	CHECK_DIFF(RTStrNCmp("abcdef", "abcde", RTSTR_MAX), > );
750	CHECK_DIFF(RTStrNCmp("abcde", "abcdef", RTSTR_MAX), < );
751	CHECK_DIFF(RTStrNCmp("abcdeg", "abcdef", RTSTR_MAX), > );
752	CHECK_DIFF(RTStrNCmp("abcdef", "abcdeg", RTSTR_MAX), < );
753	CHECK_DIFF(RTStrNCmp("abcdeF", "abcdef", RTSTR_MAX), < );
754	CHECK_DIFF(RTStrNCmp("abcdef", "abcdeF", RTSTR_MAX), > );
755
756	CHECK_DIFF(RTStrNCmp("abcdef", "fedcba", 0), ==);
757	CHECK_DIFF(RTStrNCmp("abcdef", "abcdeF", 5), ==);
758	CHECK_DIFF(RTStrNCmp("abcdef", "abcdeF", 6), > );
759
760
761	RTTestSub(hTest, "RTStrICmp");
762	CHECK_DIFF(RTStrICmp(NULL, NULL), == );
763	CHECK_DIFF(RTStrICmp(NULL, ""), < );
764	CHECK_DIFF(RTStrICmp("", NULL), > );
765	CHECK_DIFF(RTStrICmp("", ""), == );
766	CHECK_DIFF(RTStrICmp("abcdef", "abcdef"), == );
767	CHECK_DIFF(RTStrICmp("abcdef", "abcde"), > );
768	CHECK_DIFF(RTStrICmp("abcde", "abcdef"), < );
769	CHECK_DIFF(RTStrICmp("abcdeg", "abcdef"), > );
770	CHECK_DIFF(RTStrICmp("abcdef", "abcdeg"), < );
771
772	CHECK_DIFF(RTStrICmp("abcdeF", "abcdef"), ==);
773	CHECK_DIFF(RTStrICmp("abcdef", "abcdeF"), ==);
774	CHECK_DIFF(RTStrICmp("ABCDEF", "abcdef"), ==);
775	CHECK_DIFF(RTStrICmp("abcdef", "ABCDEF"), ==);
776	CHECK_DIFF(RTStrICmp("AbCdEf", "aBcDeF"), ==);
777	CHECK_DIFF(RTStrICmp("AbCdEg", "aBcDeF"), > );
778	CHECK_DIFF(RTStrICmp("AbCdEG", "aBcDef"), > ); /* diff performed on the lower case cp. */
779
780
781	RTTestSub(hTest, "RTStrICmpAscii");
782	CHECK_DIFF(RTStrICmpAscii(NULL, NULL), == );
783	CHECK_DIFF(RTStrICmpAscii(NULL, ""), < );
784	CHECK_DIFF(RTStrICmpAscii("", NULL), > );
785	CHECK_DIFF(RTStrICmpAscii("", ""), == );
786	CHECK_DIFF(RTStrICmpAscii("abcdef", "abcdef"), == );
787	CHECK_DIFF(RTStrICmpAscii("abcdef", "abcde"), > );
788	CHECK_DIFF(RTStrICmpAscii("abcde", "abcdef"), < );
789	CHECK_DIFF(RTStrICmpAscii("abcdeg", "abcdef"), > );
790	CHECK_DIFF(RTStrICmpAscii("abcdef", "abcdeg"), < );
791
792	CHECK_DIFF(RTStrICmpAscii("abcdeF", "abcdef"), ==);
793	CHECK_DIFF(RTStrICmpAscii("abcdef", "abcdeF"), ==);
794	CHECK_DIFF(RTStrICmpAscii("ABCDEF", "abcdef"), ==);
795	CHECK_DIFF(RTStrICmpAscii("abcdef", "ABCDEF"), ==);
796	CHECK_DIFF(RTStrICmpAscii("AbCdEf", "aBcDeF"), ==);
797	CHECK_DIFF(RTStrICmpAscii("AbCdEg", "aBcDeF"), > );
798	CHECK_DIFF(RTStrICmpAscii("AbCdEG", "aBcDef"), > ); /* diff performed on the lower case cp. */
799
800
801	RTTestSub(hTest, "RTStrNICmp");
802	CHECK_DIFF(RTStrNICmp(NULL, NULL, RTSTR_MAX), == );
803	CHECK_DIFF(RTStrNICmp(NULL, "", RTSTR_MAX), < );
804	CHECK_DIFF(RTStrNICmp("", NULL, RTSTR_MAX), > );
805	CHECK_DIFF(RTStrNICmp("", "", RTSTR_MAX), == );
806	CHECK_DIFF(RTStrNICmp(NULL, NULL, 0), == );
807	CHECK_DIFF(RTStrNICmp(NULL, "", 0), == );
808	CHECK_DIFF(RTStrNICmp("", NULL, 0), == );
809	CHECK_DIFF(RTStrNICmp("", "", 0), == );
810	CHECK_DIFF(RTStrNICmp("abcdef", "abcdef", RTSTR_MAX), == );
811	CHECK_DIFF(RTStrNICmp("abcdef", "abcde", RTSTR_MAX), > );
812	CHECK_DIFF(RTStrNICmp("abcde", "abcdef", RTSTR_MAX), < );
813	CHECK_DIFF(RTStrNICmp("abcdeg", "abcdef", RTSTR_MAX), > );
814	CHECK_DIFF(RTStrNICmp("abcdef", "abcdeg", RTSTR_MAX), < );
815
816	CHECK_DIFF(RTStrNICmp("abcdeF", "abcdef", RTSTR_MAX), ==);
817	CHECK_DIFF(RTStrNICmp("abcdef", "abcdeF", RTSTR_MAX), ==);
818	CHECK_DIFF(RTStrNICmp("ABCDEF", "abcdef", RTSTR_MAX), ==);
819	CHECK_DIFF(RTStrNICmp("abcdef", "ABCDEF", RTSTR_MAX), ==);
820	CHECK_DIFF(RTStrNICmp("AbCdEf", "aBcDeF", RTSTR_MAX), ==);
821	CHECK_DIFF(RTStrNICmp("AbCdEg", "aBcDeF", RTSTR_MAX), > );
822	CHECK_DIFF(RTStrNICmp("AbCdEG", "aBcDef", RTSTR_MAX), > ); /* diff performed on the lower case cp. */
823
824	CHECK_DIFF(RTStrNICmp("ABCDEF", "fedcba", 0), ==);
825	CHECK_DIFF(RTStrNICmp("AbCdEg", "aBcDeF", 5), ==);
826	CHECK_DIFF(RTStrNICmp("AbCdEf", "aBcDeF", 5), ==);
827	CHECK_DIFF(RTStrNICmp("AbCdE", "aBcDe", 5), ==);
828	CHECK_DIFF(RTStrNICmp("AbCdE", "aBcDeF", 5), ==);
829	CHECK_DIFF(RTStrNICmp("AbCdEf", "aBcDe", 5), ==);
830	CHECK_DIFF(RTStrNICmp("AbCdEg", "aBcDeF", 6), > );
831	CHECK_DIFF(RTStrNICmp("AbCdEG", "aBcDef", 6), > ); /* diff performed on the lower case cp. */
832	/* We should continue using byte comparison when we hit the invalid CP. Will assert in debug builds. */
833	// CHECK_DIFF(RTStrNICmp("AbCd\xff""eg", "aBcD\xff""eF", 6), ==);
834
835	RTTestSubDone(hTest);
836	}
837
838
839
840	/**
841	* Check UTF-8 encoding purging.
842	*/
843	static void TstRTStrPurgeEncoding(RTTEST hTest)
844	{
845	RTTestSub(hTest, "RTStrPurgeEncoding");
846
847	/*
848	* Test some good strings.
849	*/
850	char sz1[] = "1234567890wertyuiopsdfghjklzxcvbnm";
851	char sz1Copy[sizeof(sz1)];
852	memcpy(sz1Copy, sz1, sizeof(sz1));
853
854	RTTESTI_CHECK_RETV(RTStrPurgeEncoding(sz1) == 0);
855	RTTESTI_CHECK_RETV(!memcmp(sz1, sz1Copy, sizeof(sz1)));
856
857	char *pszAll = RTStrDup(g_szAll);
858	if (pszAll)
859	{
860	RTTESTI_CHECK(RTStrPurgeEncoding(pszAll) == 0);
861	RTTESTI_CHECK(!memcmp(pszAll, g_szAll, sizeof(g_szAll)));
862	RTStrFree(pszAll);
863	}
864
865	/*
866	* Test some bad stuff.
867	*/
868	struct
869	{
870	size_t cErrors;
871	unsigned char szIn[5];
872	const char *pszExpect;
873	} aTests[] =
874	{
875	{ 0, { '1', '2', '3', '4', '\0' }, "1234" },
876	{ 1, { 0x80, '2', '3', '4', '\0' }, "?234" },
877	{ 1, { '1', 0x80, '3', '4', '\0' }, "1?34" },
878	{ 1, { '1', '2', 0x80, '4', '\0' }, "12?4" },
879	{ 1, { '1', '2', '3', 0x80, '\0' }, "123?" },
880	{ 2, { 0x80, 0x81, '3', '4', '\0' }, "??34" },
881	{ 2, { '1', 0x80, 0x81, '4', '\0' }, "1??4" },
882	{ 2, { '1', '2', 0x80, 0x81, '\0' }, "12??" },
883	};
884	for (size_t i = 0; i < RT_ELEMENTS(aTests); i++)
885	{
886	size_t cErrors = RTStrPurgeEncoding((char *)aTests[i].szIn);
887	if (cErrors != aTests[i].cErrors)
888	RTTestFailed(hTest, "#%u: cErrors=%u expected %u\n", i, cErrors, aTests[i].cErrors);
889	else if (strcmp((char *)aTests[i].szIn, aTests[i].pszExpect))
890	RTTestFailed(hTest, "#%u: %.5Rhxs expected %.5Rhxs (%s)\n", i, aTests[i].szIn, aTests[i].pszExpect, aTests[i].pszExpect);
891	}
892
893	RTTestSubDone(hTest);
894	}
895
896
897	/**
898	* Check string sanitising.
899	*/
900	static void TstRTStrPurgeComplementSet(RTTEST hTest)
901	{
902	RTTestSub(hTest, "RTStrPurgeComplementSet");
903	RTUNICP aCpSet[] = { '1', '5', 'w', 'w', 'r', 'r', 'e', 'f', 't', 't',
904	'\0' };
905	RTUNICP aCpBadSet[] = { '1', '5', 'w', 'w', 'r', 'r', 'e', 'f', 't', 't',
906	'7', '\0' }; /* Contains an incomplete pair. */
907	struct
908	{
909	const char *pcszIn;
910	const char *pcszOut;
911	PCRTUNICP pcCpSet;
912	char chReplacement;
913	ssize_t cExpected;
914	}
915	aTests[] =
916	{
917	{ "1234werttrew4321", "1234werttrew4321", aCpSet, '_', 0 },
918	{ "123654wert\xc2\xa2trew\xe2\x82\xac""4321",
919	"123_54wert__trew___4321", aCpSet, '_', 3 },
920	{ "hjhj8766", "????????", aCpSet, '?', 8 },
921	{ "123\xf0\xa4\xad\xa2""4", "123____4", aCpSet, '_', 1 },
922	{ "\xff", "\xff", aCpSet, '_', -1 },
923	{ "____", "____", aCpBadSet, '_', -1 }
924	};
925	enum { MAX_IN_STRING = 256 };
926
927	for (unsigned i = 0; i < RT_ELEMENTS(aTests); ++i)
928	{
929	char szCopy[MAX_IN_STRING];
930	ssize_t cReplacements;
931	AssertRC(RTStrCopy(szCopy, RT_ELEMENTS(szCopy), aTests[i].pcszIn));
932	RTTestDisableAssertions(hTest);
933	cReplacements = RTStrPurgeComplementSet(szCopy, aTests[i].pcCpSet, aTests[i].chReplacement);
934	RTTestRestoreAssertions(hTest);
935	if (cReplacements != aTests[i].cExpected)
936	RTTestFailed(hTest, "#%u: expected %lld, actual %lld\n", i,
937	(long long) aTests[i].cExpected,
938	(long long) cReplacements);
939	if (strcmp(aTests[i].pcszOut, szCopy))
940	RTTestFailed(hTest, "#%u: expected %s, actual %s\n", i,
941	aTests[i].pcszOut, szCopy);
942	}
943	}
944
945
946	/**
947	* Check string sanitising.
948	*/
949	static void TstRTUtf16PurgeComplementSet(RTTEST hTest)
950	{
951	RTTestSub(hTest, "RTUtf16PurgeComplementSet");
952	RTUNICP aCpSet[] = { '1', '5', 'w', 'w', 'r', 'r', 'e', 'f', 't', 't',
953	'\0' };
954	RTUNICP aCpBadSet[] = { '1', '5', 'w', 'w', 'r', 'r', 'e', 'f', 't', 't',
955	'7', '\0' }; /* Contains an incomplete pair. */
956	struct
957	{
958	const char *pcszIn;
959	const char *pcszOut;
960	size_t cwc; /* Zero means the strings are Utf-8. */
961	PCRTUNICP pcCpSet;
962	char chReplacement;
963	ssize_t cExpected;
964	}
965	aTests[] =
966	{
967	{ "1234werttrew4321", "1234werttrew4321", 0, aCpSet, '_', 0 },
968	{ "123654wert\xc2\xa2trew\xe2\x82\xac""4321",
969	"123_54wert_trew_4321", 0, aCpSet, '_', 3 },
970	{ "hjhj8766", "????????", 0, aCpSet, '?', 8 },
971	{ "123\xf0\xa4\xad\xa2""4", "123__4", 0, aCpSet, '_', 1 },
972	{ "\xff\xff\0", "\xff\xff\0", 2, aCpSet, '_', -1 },
973	{ "\xff\xff\0", "\xff\xff\0", 2, aCpSet, '_', -1 },
974	{ "____", "____", 0, aCpBadSet, '_', -1 }
975	};
976	enum { MAX_IN_STRING = 256 };
977
978	for (unsigned i = 0; i < RT_ELEMENTS(aTests); ++i)
979	{
980	RTUTF16 wszInCopy[MAX_IN_STRING], *pwszInCopy = wszInCopy;
981	RTUTF16 wszOutCopy[MAX_IN_STRING], *pwszOutCopy = wszOutCopy;
982	ssize_t cReplacements;
983	if (!aTests[i].cwc)
984	{
985	AssertRC(RTStrToUtf16Ex(aTests[i].pcszIn, RTSTR_MAX, &pwszInCopy,
986	RT_ELEMENTS(wszInCopy), NULL));
987	AssertRC(RTStrToUtf16Ex(aTests[i].pcszOut, RTSTR_MAX, &pwszOutCopy,
988	RT_ELEMENTS(wszOutCopy), NULL));
989	}
990	else
991	{
992	Assert(aTests[i].cwc <= RT_ELEMENTS(wszInCopy));
993	memcpy(wszInCopy, aTests[i].pcszIn, aTests[i].cwc * 2);
994	memcpy(wszOutCopy, aTests[i].pcszOut, aTests[i].cwc * 2);
995	}
996
997	RTTestDisableAssertions(hTest);
998	cReplacements = RTUtf16PurgeComplementSet(wszInCopy, aTests[i].pcCpSet, aTests[i].chReplacement);
999	RTTestRestoreAssertions(hTest);
1000
1001	if (cReplacements != aTests[i].cExpected)
1002	RTTestFailed(hTest, "#%u: expected %lld, actual %lld\n", i,
1003	(long long) aTests[i].cExpected,
1004	(long long) cReplacements);
1005	if (RTUtf16Cmp(wszInCopy, wszOutCopy))
1006	RTTestFailed(hTest, "#%u: expected %ls, actual %ls\n", i,
1007	wszOutCopy, wszInCopy);
1008	}
1009	}
1010
1011
1012	/**
1013	* Benchmark stuff.
1014	*/
1015	static void Benchmarks(RTTEST hTest)
1016	{
1017	static union
1018	{
1019	RTUTF16 wszBuf[sizeof(g_wszAll)];
1020	char szBuf[sizeof(g_szAll)];
1021	} s_Buf;
1022
1023	RTTestSub(hTest, "Benchmarks");
1024	/** @todo add RTTest* methods for reporting benchmark results. */
1025	RTTestPrintf(hTest, RTTESTLVL_ALWAYS, "Benchmarking RTStrToUtf16Ex: "); /** @todo figure this stuff into the test framework. */
1026	PRTUTF16 pwsz = &s_Buf.wszBuf[0];
1027	int rc = RTStrToUtf16Ex(&g_szAll[0], RTSTR_MAX, &pwsz, RT_ELEMENTS(s_Buf.wszBuf), NULL);
1028	if (RT_SUCCESS(rc))
1029	{
1030	int i;
1031	uint64_t u64Start = RTTimeNanoTS();
1032	for (i = 0; i < 100; i++)
1033	{
1034	rc = RTStrToUtf16Ex(&g_szAll[0], RTSTR_MAX, &pwsz, RT_ELEMENTS(s_Buf.wszBuf), NULL);
1035	if (RT_FAILURE(rc))
1036	{
1037	RTTestFailed(hTest, "UTF-8 -> UTF-16 benchmark failed at i=%d, rc=%Rrc\n", i, rc);
1038	break;
1039	}
1040	}
1041	uint64_t u64Elapsed = RTTimeNanoTS() - u64Start;
1042	RTTestPrintf(hTest, RTTESTLVL_ALWAYS, "%d in %'RI64 ns\n", i, u64Elapsed);
1043	}
1044
1045	RTTestPrintf(hTest, RTTESTLVL_ALWAYS, "Benchmarking RTUtf16ToUtf8Ex: ");
1046	char *psz = &s_Buf.szBuf[0];
1047	rc = RTUtf16ToUtf8Ex(&g_wszAll[0], RTSTR_MAX, &psz, RT_ELEMENTS(s_Buf.szBuf), NULL);
1048	if (RT_SUCCESS(rc))
1049	{
1050	int i;
1051	uint64_t u64Start = RTTimeNanoTS();
1052	for (i = 0; i < 100; i++)
1053	{
1054	rc = RTUtf16ToUtf8Ex(&g_wszAll[0], RTSTR_MAX, &psz, RT_ELEMENTS(s_Buf.szBuf), NULL);
1055	if (RT_FAILURE(rc))
1056	{
1057	RTTestFailed(hTest, "UTF-16 -> UTF-8 benchmark failed at i=%d, rc=%Rrc\n", i, rc);
1058	break;
1059	}
1060	}
1061	uint64_t u64Elapsed = RTTimeNanoTS() - u64Start;
1062	RTTestPrintf(hTest, RTTESTLVL_ALWAYS, "%d in %'RI64 ns\n", i, u64Elapsed);
1063	}
1064
1065	RTTestSubDone(hTest);
1066	}
1067
1068
1069	/**
1070	* Tests RTStrEnd
1071	*/
1072	static void testStrEnd(RTTEST hTest)
1073	{
1074	RTTestSub(hTest, "RTStrEnd");
1075
1076	static char const s_szEmpty[1] = "";
1077	RTTESTI_CHECK(RTStrEnd(s_szEmpty, 0) == NULL);
1078	RTTESTI_CHECK(RTStrEnd(s_szEmpty, 1) == &s_szEmpty[0]);
1079	for (size_t i = 0; i < _1M; i++)
1080	RTTESTI_CHECK(RTStrEnd(s_szEmpty, ~i) == &s_szEmpty[0]);
1081
1082	/* Check the implementation won't ever overshoot the '\0' in the input in
1083	anyway that may lead to a SIGSEV. (VC++ 14.1 does this) */
1084	size_t const cchStr = 1023;
1085	char pszStr = (char )RTTestGuardedAllocTail(hTest, cchStr + 1);
1086	memset(pszStr, ' ', cchStr);
1087	char * const pszStrEnd = &pszStr[cchStr];
1088	*pszStrEnd = '\0';
1089	RTTEST_CHECK_RETV(hTest, strlen(pszStr) == cchStr);
1090
1091	for (size_t off = 0; off <= cchStr; off++)
1092	{
1093	RTTEST_CHECK(hTest, RTStrEnd(&pszStr[off], cchStr + 1 - off) == pszStrEnd);
1094	RTTEST_CHECK(hTest, RTStrEnd(&pszStr[off], RTSTR_MAX) == pszStrEnd);
1095
1096	RTTEST_CHECK(hTest, memchr(&pszStr[off], '\0', cchStr + 1 - off) == pszStrEnd);
1097	RTTEST_CHECK(hTest, strchr(&pszStr[off], '\0') == pszStrEnd);
1098	RTTEST_CHECK(hTest, strchr(&pszStr[off], '?') == NULL);
1099
1100	size_t cchMax = 0;
1101	for (; cchMax <= cchStr - off; cchMax++)
1102	{
1103	const char *pszRet = RTStrEnd(&pszStr[off], cchMax);
1104	if (pszRet != NULL)
1105	{
1106	RTTestFailed(hTest, "off=%zu cchMax=%zu: %p, expected NULL\n", off, cchMax, pszRet);
1107	break;
1108	}
1109	}
1110	for (; cchMax <= _8K; cchMax++)
1111	{
1112	const char *pszRet = RTStrEnd(&pszStr[off], cchMax);
1113	if (pszRet != pszStrEnd)
1114	{
1115	RTTestFailed(hTest, "off=%zu cchMax=%zu: off by %p\n", off, cchMax, pszRet);
1116	break;
1117	}
1118	}
1119	}
1120	RTTestGuardedFree(hTest, pszStr);
1121	}
1122
1123
1124	/**
1125	* Tests RTStrStr and RTStrIStr.
1126	*/
1127	static void testStrStr(RTTEST hTest)
1128	{
1129	#define CHECK_NULL(expr) \
1130	do { \
1131	const char *pszRet = expr; \
1132	if (pszRet != NULL) \
1133	RTTestFailed(hTest, "%d: %s -> %s expected NULL", __LINE__, #expr, pszRet); \
1134	} while (0)
1135
1136	#define CHECK(expr, expect) \
1137	do { \
1138	const char * const pszRet = expr; \
1139	const char * const pszExpect = (expect); \
1140	if ( (pszRet != NULL && pszExpect == NULL) \
1141	\|\| (pszRet == NULL && pszExpect != NULL) \
1142	\|\| strcmp(pszRet, pszExpect) \
1143	) \
1144	RTTestFailed(hTest, "%d: %s -> %s expected %s", __LINE__, #expr, pszRet, pszExpect); \
1145	} while (0)
1146
1147
1148	RTTestSub(hTest, "RTStrStr");
1149	CHECK(RTStrStr("abcdef", ""), "abcdef");
1150	CHECK_NULL(RTStrStr("abcdef", NULL));
1151	CHECK_NULL(RTStrStr(NULL, ""));
1152	CHECK_NULL(RTStrStr(NULL, NULL));
1153	CHECK(RTStrStr("abcdef", "abcdef"), "abcdef");
1154	CHECK(RTStrStr("abcdef", "b"), "bcdef");
1155	CHECK(RTStrStr("abcdef", "bcdef"), "bcdef");
1156	CHECK(RTStrStr("abcdef", "cdef"), "cdef");
1157	CHECK(RTStrStr("abcdef", "cde"), "cdef");
1158	CHECK(RTStrStr("abcdef", "cd"), "cdef");
1159	CHECK(RTStrStr("abcdef", "c"), "cdef");
1160	CHECK(RTStrStr("abcdef", "f"), "f");
1161	CHECK(RTStrStr("abcdef", "ef"), "ef");
1162	CHECK(RTStrStr("abcdef", "e"), "ef");
1163	CHECK_NULL(RTStrStr("abcdef", "z"));
1164	CHECK_NULL(RTStrStr("abcdef", "A"));
1165	CHECK_NULL(RTStrStr("abcdef", "F"));
1166
1167	RTTestSub(hTest, "RTStrIStr");
1168	CHECK(RTStrIStr("abcdef", ""), "abcdef");
1169	CHECK_NULL(RTStrIStr("abcdef", NULL));
1170	CHECK_NULL(RTStrIStr(NULL, ""));
1171	CHECK_NULL(RTStrIStr(NULL, NULL));
1172	CHECK(RTStrIStr("abcdef", "abcdef"), "abcdef");
1173	CHECK(RTStrIStr("abcdef", "Abcdef"), "abcdef");
1174	CHECK(RTStrIStr("abcdef", "ABcDeF"), "abcdef");
1175	CHECK(RTStrIStr("abcdef", "b"), "bcdef");
1176	CHECK(RTStrIStr("abcdef", "B"), "bcdef");
1177	CHECK(RTStrIStr("abcdef", "bcdef"), "bcdef");
1178	CHECK(RTStrIStr("abcdef", "BCdEf"), "bcdef");
1179	CHECK(RTStrIStr("abcdef", "bCdEf"), "bcdef");
1180	CHECK(RTStrIStr("abcdef", "bcdEf"), "bcdef");
1181	CHECK(RTStrIStr("abcdef", "BcdEf"), "bcdef");
1182	CHECK(RTStrIStr("abcdef", "cdef"), "cdef");
1183	CHECK(RTStrIStr("abcdef", "cde"), "cdef");
1184	CHECK(RTStrIStr("abcdef", "cd"), "cdef");
1185	CHECK(RTStrIStr("abcdef", "c"), "cdef");
1186	CHECK(RTStrIStr("abcdef", "f"), "f");
1187	CHECK(RTStrIStr("abcdeF", "F"), "F");
1188	CHECK(RTStrIStr("abcdef", "F"), "f");
1189	CHECK(RTStrIStr("abcdef", "ef"), "ef");
1190	CHECK(RTStrIStr("EeEef", "e"), "EeEef");
1191	CHECK(RTStrIStr("EeEef", "E"), "EeEef");
1192	CHECK(RTStrIStr("EeEef", "EE"), "EeEef");
1193	CHECK(RTStrIStr("EeEef", "EEE"), "EeEef");
1194	CHECK(RTStrIStr("EeEef", "EEEF"), "eEef");
1195	CHECK_NULL(RTStrIStr("EeEef", "z"));
1196
1197	#undef CHECK
1198	#undef CHECK_NULL
1199	RTTestSubDone(hTest);
1200	}
1201
1202
1203	static void testUtf8Latin1(RTTEST hTest)
1204	{
1205	RTTestSub(hTest, "Latin-1 <-> Utf-8 conversion functions");
1206
1207	/* Test Utf8 -> Latin1 */
1208	size_t cch_szAll = 0;
1209	size_t cbShort = RTStrCalcLatin1Len(g_szAll);
1210	RTTEST_CHECK(hTest, cbShort == 0);
1211	int rc = RTStrCalcLatin1LenEx(g_szAll, 383, &cch_szAll);
1212	RTTEST_CHECK(hTest, (cch_szAll == 255));
1213	rc = RTStrCalcLatin1LenEx(g_szAll, RTSTR_MAX, &cch_szAll);
1214	RTTEST_CHECK_RC(hTest, rc, VERR_NO_TRANSLATION);
1215	char *psz = NULL;
1216	char szShort[256] = { 0 };
1217	memcpy(szShort, g_szAll, 255);
1218	cbShort = RTStrCalcLatin1Len(szShort);
1219	RTTEST_CHECK(hTest, cbShort == 191);
1220	rc = RTStrToLatin1(szShort, &psz);
1221	RTTEST_CHECK_RC_OK(hTest, rc);
1222	if (RT_SUCCESS(rc))
1223	{
1224	RTTEST_CHECK(hTest, (strlen(psz) == 191));
1225	for (unsigned i = 0, j = 1; psz[i] != '\0'; ++i, ++j)
1226	if (psz[i] != (char) j)
1227	{
1228	RTTestFailed(hTest, "conversion of g_szAll to Latin1 failed at position %u\n", i);
1229	break;
1230	}
1231	}
1232	RTStrFree(psz);
1233	rc = RTStrToLatin1(g_szAll, &psz);
1234	RTTEST_CHECK_RC(hTest, rc, VERR_NO_TRANSLATION);
1235	char sz[512];
1236	char *psz2 = &sz[0];
1237	size_t cchActual = 0;
1238	rc = RTStrToLatin1Ex(g_szAll, sizeof(sz) - 1, &psz2, sizeof(sz),
1239	&cchActual);
1240	RTTEST_CHECK_RC(hTest, rc, VERR_NO_TRANSLATION);
1241	RTTEST_CHECK_MSG(hTest, cchActual == 0,
1242	(hTest, "cchActual=%lu\n", cchActual));
1243	rc = RTStrToLatin1Ex(g_szAll, 383, &psz2, sizeof(sz),
1244	&cchActual);
1245	RTTEST_CHECK_RC_OK(hTest, rc);
1246	if (RT_SUCCESS(rc))
1247	{
1248	RTTEST_CHECK(hTest, (cchActual == 255));
1249	RTTEST_CHECK(hTest, (cchActual == strlen(sz)));
1250	for (unsigned i = 0, j = 1; psz2[i] != '\0'; ++i, ++j)
1251	if (psz2[i] != (char) j)
1252	{
1253	RTTestFailed(hTest, "second conversion of g_szAll to Latin1 failed at position %u\n", i);
1254	break;
1255	}
1256	}
1257	rc = RTStrToLatin1Ex(g_szAll, 129, &psz2, 128, &cchActual);
1258	RTTEST_CHECK_RC(hTest, rc, VERR_BUFFER_OVERFLOW);
1259	RTTEST_CHECK_MSG(hTest, cchActual == 128,
1260	(hTest, "cchActual=%lu\n", cchActual));
1261	rc = RTStrToLatin1Ex(g_szAll, 383, &psz, 0, &cchActual);
1262	RTTEST_CHECK_RC_OK(hTest, rc);
1263	if (RT_SUCCESS(rc))
1264	{
1265	RTTEST_CHECK(hTest, (cchActual == 255));
1266	RTTEST_CHECK(hTest, (cchActual == strlen(psz)));
1267	for (unsigned i = 0, j = 1; psz[i] != '\0'; ++i, ++j)
1268	if ( ((j < 0x100) && (psz[i] != (char) j))
1269	\|\| ((j > 0xff) && psz[i] != '?'))
1270	{
1271	RTTestFailed(hTest, "third conversion of g_szAll to Latin1 failed at position %u\n", i);
1272	break;
1273	}
1274	}
1275	const char *pszBad = "Hello\xDC\xD8";
1276	rc = RTStrToLatin1Ex(pszBad, RTSTR_MAX, &psz2, sizeof(sz),
1277	&cchActual);
1278	RTTEST_CHECK_RC(hTest, rc, VERR_INVALID_UTF8_ENCODING);
1279	RTStrFree(psz);
1280
1281	/* Test Latin1 -> Utf8 */
1282	const char *pszLat1 = "\x01\x20\x40\x80\x81";
1283	RTTEST_CHECK(hTest, RTLatin1CalcUtf8Len(pszLat1) == 7);
1284	rc = RTLatin1CalcUtf8LenEx(pszLat1, 3, &cchActual);
1285	RTTEST_CHECK_RC_OK(hTest, rc);
1286	if (RT_SUCCESS(rc))
1287	RTTEST_CHECK(hTest, cchActual == 3);
1288	rc = RTLatin1CalcUtf8LenEx(pszLat1, RTSTR_MAX, &cchActual);
1289	RTTEST_CHECK_RC_OK(hTest, rc);
1290	if (RT_SUCCESS(rc))
1291	RTTEST_CHECK(hTest, cchActual == 7);
1292	char *pch = NULL;
1293	char ch[8];
1294	char *pch2 = &ch[0];
1295	cchActual = 0;
1296	rc = RTLatin1ToUtf8(pszLat1, &pch);
1297	RTTEST_CHECK_RC_OK(hTest, rc);
1298	if (RT_SUCCESS(rc))
1299	RTTEST_CHECK(hTest, !strcmp(pch, "\x01\x20\x40\xC2\x80\xC2\x81"));
1300	RTStrFree(pch);
1301	rc = RTLatin1ToUtf8Ex(pszLat1, RTSTR_MAX, &pch, 0, &cchActual);
1302	RTTEST_CHECK_RC_OK(hTest, rc);
1303	if (RT_SUCCESS(rc))
1304	{
1305	RTTEST_CHECK(hTest, (cchActual == 7));
1306	RTTEST_CHECK(hTest, !strcmp(pch, "\x01\x20\x40\xC2\x80\xC2\x81"));
1307	}
1308	RTStrFree(pch);
1309	rc = RTLatin1ToUtf8Ex(pszLat1, RTSTR_MAX, &pch, 0, NULL);
1310	RTTEST_CHECK_RC_OK(hTest, rc);
1311	if (RT_SUCCESS(rc))
1312	RTTEST_CHECK(hTest, !strcmp(pch, "\x01\x20\x40\xC2\x80\xC2\x81"));
1313	RTStrFree(pch);
1314	rc = RTLatin1ToUtf8Ex(pszLat1, RTSTR_MAX, &pch2, RT_ELEMENTS(ch),
1315	&cchActual);
1316	RTTEST_CHECK_RC_OK(hTest, rc);
1317	if (RT_SUCCESS(rc))
1318	{
1319	RTTEST_CHECK(hTest, (cchActual == 7));
1320	RTTEST_CHECK(hTest, !strcmp(pch2, "\x01\x20\x40\xC2\x80\xC2\x81"));
1321	}
1322	rc = RTLatin1ToUtf8Ex(pszLat1, 3, &pch2, RT_ELEMENTS(ch),
1323	&cchActual);
1324	RTTEST_CHECK_RC_OK(hTest, rc);
1325	if (RT_SUCCESS(rc))
1326	{
1327	RTTEST_CHECK(hTest, (cchActual == 3));
1328	RTTEST_CHECK(hTest, !strcmp(pch2, "\x01\x20\x40"));
1329	}
1330	rc = RTLatin1ToUtf8Ex(pszLat1, RTSTR_MAX, &pch2, RT_ELEMENTS(ch) - 1,
1331	&cchActual);
1332	RTTEST_CHECK_RC(hTest, rc, VERR_BUFFER_OVERFLOW);
1333	RTTEST_CHECK(hTest, (cchActual == 7));
1334	RTTestSubDone(hTest);
1335	}
1336
1337
1338	static void testUtf16Latin1(RTTEST hTest)
1339	{
1340	RTTestSub(hTest, "Latin-1 <-> Utf-16 conversion functions");
1341
1342	/* Test Utf16 -> Latin1 */
1343	size_t cch_szAll = 0;
1344	size_t cbShort = RTUtf16CalcLatin1Len(g_wszAll);
1345	RTTEST_CHECK(hTest, cbShort == 0);
1346	int rc = RTUtf16CalcLatin1LenEx(g_wszAll, 255, &cch_szAll);
1347	RTTEST_CHECK(hTest, (cch_szAll == 255));
1348	rc = RTUtf16CalcLatin1LenEx(g_wszAll, RTSTR_MAX, &cch_szAll);
1349	RTTEST_CHECK_RC(hTest, rc, VERR_NO_TRANSLATION);
1350	char *psz = NULL;
1351	RTUTF16 wszShort[256] = { 0 };
1352	for (unsigned i = 0; i < 255; ++i)
1353	wszShort[i] = i + 1;
1354	cbShort = RTUtf16CalcLatin1Len(wszShort);
1355	RTTEST_CHECK(hTest, cbShort == 255);
1356	rc = RTUtf16ToLatin1(wszShort, &psz);
1357	RTTEST_CHECK_RC_OK(hTest, rc);
1358	if (RT_SUCCESS(rc))
1359	{
1360	RTTEST_CHECK(hTest, (strlen(psz) == 255));
1361	for (unsigned i = 0, j = 1; psz[i] != '\0'; ++i, ++j)
1362	if (psz[i] != (char) j)
1363	{
1364	RTTestFailed(hTest, "conversion of g_wszAll to Latin1 failed at position %u\n", i);
1365	break;
1366	}
1367	}
1368	RTStrFree(psz);
1369	rc = RTUtf16ToLatin1(g_wszAll, &psz);
1370	RTTEST_CHECK_RC(hTest, rc, VERR_NO_TRANSLATION);
1371	char sz[512];
1372	char *psz2 = &sz[0];
1373	size_t cchActual = 0;
1374	rc = RTUtf16ToLatin1Ex(g_wszAll, sizeof(sz) - 1, &psz2, sizeof(sz),
1375	&cchActual);
1376	RTTEST_CHECK_RC(hTest, rc, VERR_NO_TRANSLATION);
1377	RTTEST_CHECK_MSG(hTest, cchActual == 0,
1378	(hTest, "cchActual=%lu\n", cchActual));
1379	rc = RTUtf16ToLatin1Ex(g_wszAll, 255, &psz2, sizeof(sz),
1380	&cchActual);
1381	RTTEST_CHECK_RC_OK(hTest, rc);
1382	if (RT_SUCCESS(rc))
1383	{
1384	RTTEST_CHECK(hTest, (cchActual == 255));
1385	RTTEST_CHECK(hTest, (cchActual == strlen(sz)));
1386	for (unsigned i = 0, j = 1; psz2[i] != '\0'; ++i, ++j)
1387	if (psz2[i] != (char) j)
1388	{
1389	RTTestFailed(hTest, "second conversion of g_wszAll to Latin1 failed at position %u\n", i);
1390	break;
1391	}
1392	}
1393	rc = RTUtf16ToLatin1Ex(g_wszAll, 128, &psz2, 128, &cchActual);
1394	RTTEST_CHECK_RC(hTest, rc, VERR_BUFFER_OVERFLOW);
1395	RTTEST_CHECK_MSG(hTest, cchActual == 128,
1396	(hTest, "cchActual=%lu\n", cchActual));
1397	rc = RTUtf16ToLatin1Ex(g_wszAll, 255, &psz, 0, &cchActual);
1398	RTTEST_CHECK_RC_OK(hTest, rc);
1399	if (RT_SUCCESS(rc))
1400	{
1401	RTTEST_CHECK(hTest, (cchActual == 255));
1402	RTTEST_CHECK(hTest, (cchActual == strlen(psz)));
1403	for (unsigned i = 0, j = 1; psz[i] != '\0'; ++i, ++j)
1404	if ( ((j < 0x100) && (psz[i] != (char) j))
1405	\|\| ((j > 0xff) && psz[i] != '?'))
1406	{
1407	RTTestFailed(hTest, "third conversion of g_wszAll to Latin1 failed at position %u\n", i);
1408	break;
1409	}
1410	}
1411	const char *pszBad = "H\0e\0l\0l\0o\0\0\xDC\0\xD8\0";
1412	rc = RTUtf16ToLatin1Ex((RTUTF16 *) pszBad, RTSTR_MAX, &psz2, sizeof(sz),
1413	&cchActual);
1414	RTTEST_CHECK_RC(hTest, rc, VERR_INVALID_UTF16_ENCODING);
1415	RTStrFree(psz);
1416
1417	/* Test Latin1 -> Utf16 */
1418	const char *pszLat1 = "\x01\x20\x40\x80\x81";
1419	RTTEST_CHECK(hTest, RTLatin1CalcUtf16Len(pszLat1) == 5);
1420	rc = RTLatin1CalcUtf16LenEx(pszLat1, 3, &cchActual);
1421	RTTEST_CHECK_RC_OK(hTest, rc);
1422	if (RT_SUCCESS(rc))
1423	RTTEST_CHECK(hTest, cchActual == 3);
1424	rc = RTLatin1CalcUtf16LenEx(pszLat1, RTSTR_MAX, &cchActual);
1425	RTTEST_CHECK_RC_OK(hTest, rc);
1426	if (RT_SUCCESS(rc))
1427	RTTEST_CHECK(hTest, cchActual == 5);
1428	RTUTF16 *pwc = NULL;
1429	RTUTF16 wc[6];
1430	RTUTF16 *pwc2 = &wc[0];
1431	size_t cwActual = 0;
1432	rc = RTLatin1ToUtf16(pszLat1, &pwc);
1433	RTTEST_CHECK_RC_OK(hTest, rc);
1434	if (RT_SUCCESS(rc))
1435	RTTEST_CHECK(hTest, (pwc[0] == 1) && (pwc[1] == 0x20)
1436	&& (pwc[2] == 0x40) && (pwc[3] == 0x80)
1437	&& (pwc[4] == 0x81) && (pwc[5] == '\0'));
1438	RTUtf16Free(pwc);
1439	rc = RTLatin1ToUtf16Ex(pszLat1, RTSTR_MAX, &pwc, 0, &cwActual);
1440	RTTEST_CHECK_RC_OK(hTest, rc);
1441	if (RT_SUCCESS(rc))
1442	{
1443	RTTEST_CHECK(hTest, (cwActual == 5));
1444	RTTEST_CHECK(hTest, (pwc[0] == 1) && (pwc[1] == 0x20)
1445	&& (pwc[2] == 0x40) && (pwc[3] == 0x80)
1446	&& (pwc[4] == 0x81) && (pwc[5] == '\0'));
1447	}
1448	RTUtf16Free(pwc);
1449	rc = RTLatin1ToUtf16Ex(pszLat1, RTSTR_MAX, &pwc, 0, NULL);
1450	RTTEST_CHECK_RC_OK(hTest, rc);
1451	if (RT_SUCCESS(rc))
1452	RTTEST_CHECK(hTest, (pwc[0] == 1) && (pwc[1] == 0x20)
1453	&& (pwc[2] == 0x40) && (pwc[3] == 0x80)
1454	&& (pwc[4] == 0x81) && (pwc[5] == '\0'));
1455	RTUtf16Free(pwc);
1456	rc = RTLatin1ToUtf16Ex(pszLat1, RTSTR_MAX, &pwc2, RT_ELEMENTS(wc),
1457	&cwActual);
1458	RTTEST_CHECK_RC_OK(hTest, rc);
1459	if (RT_SUCCESS(rc))
1460	{
1461	RTTEST_CHECK(hTest, (cwActual == 5));
1462	RTTEST_CHECK(hTest, (wc[0] == 1) && (wc[1] == 0x20)
1463	&& (wc[2] == 0x40) && (wc[3] == 0x80)
1464	&& (wc[4] == 0x81) && (wc[5] == '\0'));
1465	}
1466	rc = RTLatin1ToUtf16Ex(pszLat1, 3, &pwc2, RT_ELEMENTS(wc),
1467	&cwActual);
1468	RTTEST_CHECK_RC_OK(hTest, rc);
1469	if (RT_SUCCESS(rc))
1470	{
1471	RTTEST_CHECK(hTest, (cwActual == 3));
1472	RTTEST_CHECK(hTest, (wc[0] == 1) && (wc[1] == 0x20)
1473	&& (wc[2] == 0x40) && (wc[3] == '\0'));
1474	}
1475	rc = RTLatin1ToUtf16Ex(pszLat1, RTSTR_MAX, &pwc2, RT_ELEMENTS(wc) - 1,
1476	&cwActual);
1477	RTTEST_CHECK_RC(hTest, rc, VERR_BUFFER_OVERFLOW);
1478	RTTEST_CHECK(hTest, (cwActual == 5));
1479	RTTestSubDone(hTest);
1480	}
1481
1482
1483	static void testNoTranslation(RTTEST hTest)
1484	{
1485	/*
1486	* Try trigger a VERR_NO_TRANSLATION error in convert to
1487	* current CP to latin-1.
1488	*
1489	* On Windows / DOS OSes this is codepage 850.
1490	*
1491	* Note! On Windows-y systems there ALWAYS are two codepages active:
1492	* the OEM codepage for legacy (console) applications, and the ACP (ANSI CodePage).
1493	* 'chcp' only will tell you the OEM codepage, however.
1494	*/
1495
1496	/* Unicode code points (some of it on 2300-23FF -> misc. technical) to try. */
1497	const RTUTF16 s_swzTest1[] = { 0x2358, 0x2242, 0x2357, 0x2359, 0x22f9, 0x2c4e, 0x0030, 0x0060,
1498	0x0092, 0x00c1, 0x00f2, 0x1f80, 0x0088, 0x2c38, 0x2c30, 0x0000 };
1499	char *pszTest1;
1500	int rc = RTUtf16ToUtf8(s_swzTest1, &pszTest1);
1501	RTTESTI_CHECK_RC_RETV(rc, VINF_SUCCESS);
1502
1503	#ifdef RT_OS_WINDOWS
1504	UINT const uACP = GetACP();
1505	RTTestIPrintf(RTTESTLVL_ALWAYS, "Current Windows ANSI codepage is: %u%s\n",
1506	uACP, uACP == 65001 /* UTF-8 */ ? " (UTF-8)" : "");
1507	#endif
1508
1509	RTTestSub(hTest, "VERR_NO_TRANSLATION/RTStrUtf8ToCurrentCP");
1510	char *pszOut;
1511	rc = RTStrUtf8ToCurrentCP(&pszOut, pszTest1);
1512	if (rc == VINF_SUCCESS)
1513	{
1514	RTTestIPrintf(RTTESTLVL_ALWAYS, "CurrentCP is UTF-8 or similar (LC_ALL=%s LANG=%s LC_CTYPE=%s)\n",
1515	RTEnvGet("LC_ALL"), RTEnvGet("LANG"), RTEnvGet("LC_CTYPE"));
1516	#ifdef RT_OS_WINDOWS
1517	if (uACP == 65001 /* UTF-8 */)
1518	{
1519	/* The following string comparison will fail if the active ACP isn't UTF-8 (65001), so skip this then.
1520	* This applies to older Windows OSes like NT4. */
1521	#endif
1522	if (strcmp(pszOut, pszTest1))
1523	RTTestFailed(hTest, "mismatch\nutf8: %.Rhxs\n got: %.Rhxs\n", strlen(pszTest1), pszTest1, strlen(pszOut), pszOut);
1524	#ifdef RT_OS_WINDOWS
1525	}
1526	#endif
1527	RTStrFree(pszOut);
1528	}
1529	else
1530	RTTESTI_CHECK_MSG(rc == VWRN_NO_TRANSLATION \|\| rc == VERR_NO_TRANSLATION, ("rc=%Rrc\n", rc));
1531
1532	RTTestSub(hTest, "VERR_NO_TRANSLATION/RTUtf16ToLatin1");
1533	rc = RTUtf16ToLatin1(s_swzTest1, &pszOut);
1534	RTTESTI_CHECK_RC(rc, VERR_NO_TRANSLATION);
1535	if (RT_SUCCESS(rc))
1536	RTStrFree(pszOut);
1537
1538	RTStrFree(pszTest1);
1539	RTTestSubDone(hTest);
1540	}
1541
1542	static void testGetPut(RTTEST hTest)
1543	{
1544	/*
1545	* Test RTStrPutCp, RTStrGetCp and RTStrGetCpEx.
1546	*/
1547	RTTestSub(hTest, "RTStrPutCp, RTStrGetCp and RTStrGetCpEx");
1548
1549	RTUNICP uc = 0;
1550	while (uc <= 0x10fffd)
1551	{
1552	/* Figure the range - skip illegal ranges. */
1553	RTUNICP ucFirst = uc;
1554	if (ucFirst - UINT32_C(0xd800) <= 0x7ff)
1555	ucFirst = 0xe000;
1556	else if (ucFirst == UINT32_C(0xfffe) \|\| ucFirst == UINT32_C(0xffff))
1557	ucFirst = 0x10000;
1558
1559	RTUNICP ucLast = ucFirst + 1023;
1560	if (ucLast - UINT32_C(0xd800) <= 0x7ff)
1561	ucLast = 0xd7ff;
1562	else if (ucLast == UINT32_C(0xfffe) \|\| ucLast == UINT32_C(0xffff))
1563	ucLast = 0xfffd;
1564
1565	/* Encode the range into a string, decode each code point as we go along. */
1566	char sz1[8192];
1567	char *pszDst = sz1;
1568	for (uc = ucFirst; uc <= ucLast; uc++)
1569	{
1570	char *pszBefore = pszDst;
1571	pszDst = RTStrPutCp(pszDst, uc);
1572	RTTESTI_CHECK(pszBefore - pszDst < 6);
1573
1574	RTUNICP uc2 = RTStrGetCp(pszBefore);
1575	RTTESTI_CHECK_MSG(uc2 == uc, ("uc2=%#x uc=%#x\n", uc2, uc));
1576
1577	const char *pszSrc = pszBefore;
1578	RTUNICP uc3 = 42;
1579	RTTESTI_CHECK_RC(RTStrGetCpEx(&pszSrc, &uc3), VINF_SUCCESS);
1580	RTTESTI_CHECK_MSG(uc3 == uc, ("uc3=%#x uc=%#x\n", uc3, uc));
1581	RTTESTI_CHECK_MSG(pszSrc == pszDst, ("pszSrc=%p pszDst=%p\n", pszSrc, pszDst));
1582	}
1583
1584	/* Decode and re-encode it. */
1585	const char *pszSrc = pszDst = sz1;
1586	for (uc = ucFirst; uc <= ucLast; uc++)
1587	{
1588	RTUNICP uc2 = RTStrGetCp(pszSrc);
1589	RTTESTI_CHECK_MSG(uc2 == uc, ("uc2=%#x uc=%#x\n", uc2, uc));
1590
1591	RTUNICP uc3 = 42;
1592	RTTESTI_CHECK_RC(RTStrGetCpEx(&pszSrc, &uc3), VINF_SUCCESS);
1593	RTTESTI_CHECK_MSG(uc3 == uc, ("uc3=%#x uc=%#x\n", uc3, uc));
1594
1595	pszDst = RTStrPutCp(pszDst, uc);
1596	RTTESTI_CHECK_MSG(pszSrc == pszDst, ("pszSrc=%p pszDst=%p\n", pszSrc, pszDst));
1597	pszSrc = pszDst;
1598	}
1599
1600	/* Decode and wipe it (checking compiler optimizations). */
1601	pszSrc = pszDst = sz1;
1602	for (uc = ucFirst; uc <= ucLast; uc++)
1603	{
1604	RTUNICP uc2 = RTStrGetCp(pszSrc);
1605	RTTESTI_CHECK_MSG(uc2 == uc, ("uc2=%#x uc=%#x\n", uc2, uc));
1606
1607	RTUNICP uc3 = 42;
1608	RTTESTI_CHECK_RC(RTStrGetCpEx(&pszSrc, &uc3), VINF_SUCCESS);
1609	RTTESTI_CHECK_MSG(uc3 == uc, ("uc3=%#x uc=%#x\n", uc3, uc));
1610
1611	pszDst = RTStrPutCp(pszDst, 0);
1612	}
1613
1614	/* advance */
1615	uc = ucLast + 1;
1616	}
1617
1618	}
1619
1620
1621	int main()
1622	{
1623	/*
1624	* Init the runtime, test and say hello.
1625	*/
1626	RTTEST hTest;
1627	RTEXITCODE rcExit = RTTestInitAndCreate("tstUtf8", &hTest);
1628	if (rcExit != RTEXITCODE_SUCCESS)
1629	return rcExit;
1630	RTTestBanner(hTest);
1631
1632	/*
1633	* Run the tests.
1634	*/
1635	InitStrings();
1636	test1(hTest);
1637	test2(hTest);
1638	test3(hTest);
1639	TstRTStrXCmp(hTest);
1640	TstRTStrPurgeEncoding(hTest);
1641	/* TstRTPurgeComplementSet test conditions which assert. /
1642	TstRTStrPurgeComplementSet(hTest);
1643	TstRTUtf16PurgeComplementSet(hTest);
1644	testStrEnd(hTest);
1645	testStrStr(hTest);
1646	testUtf8Latin1(hTest);
1647	testUtf16Latin1(hTest);
1648	testNoTranslation(hTest);
1649	testGetPut(hTest);
1650
1651	Benchmarks(hTest);
1652
1653	/*
1654	* Summary
1655	*/
1656	return RTTestSummaryAndDestroy(hTest);
1657	}
1658

Note: See TracBrowser for help on using the repository browser.

source: vbox/trunk/src/VBox/Runtime/testcase/tstUtf8.cpp@ 102335

Download in other formats: