VirtualBox

source: vbox/trunk/src/VBox/Runtime/common/string/utf-16-case.cpp@ 78398

Last change on this file since 78398 was 77118, checked in by vboxsync, 6 years ago

Runtime: Get rid of the "register" keyword usage as all compilers we care about don't honor it for a long time already and it got deprecated in C++11 and removed entirely in C++17, fixes compile time warnings with a recent clang

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id Revision
  • Property svn:mergeinfo set to (toggle deleted branches)
File size: 13.8 KB
Line 
1/* $Id: utf-16-case.cpp 77118 2019-02-01 14:47:32Z vboxsync $ */
2/** @file
3 * IPRT - UTF-16, Case Sensitivity.
4 */
5
6/*
7 * Copyright (C) 2006-2019 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 *
17 * The contents of this file may alternatively be used under the terms
18 * of the Common Development and Distribution License Version 1.0
19 * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
20 * VirtualBox OSE distribution, in which case the provisions of the
21 * CDDL are applicable instead of those of the GPL.
22 *
23 * You may elect to license modified versions of this file under the
24 * terms and conditions of either the GPL or the CDDL or both.
25 */
26
27
28/*********************************************************************************************************************************
29* Header Files *
30*********************************************************************************************************************************/
31#include <iprt/utf16.h>
32#include "internal/iprt.h"
33
34#include <iprt/uni.h>
35#include <iprt/alloc.h>
36#include <iprt/asm.h>
37#include <iprt/assert.h>
38#include <iprt/errcore.h>
39#include "internal/string.h"
40
41
42RTDECL(int) RTUtf16ICmp(PCRTUTF16 pwsz1, PCRTUTF16 pwsz2)
43{
44 if (pwsz1 == pwsz2)
45 return 0;
46 if (!pwsz1)
47 return -1;
48 if (!pwsz2)
49 return 1;
50
51 PCRTUTF16 pwsz1Start = pwsz1; /* keep it around in case we have to backtrack on a surrogate pair */
52 for (;;)
53 {
54 RTUTF16 wc1 = *pwsz1;
55 RTUTF16 wc2 = *pwsz2;
56 int iDiff = wc1 - wc2;
57 if (iDiff)
58 {
59 /* unless they are *both* surrogate pairs, there is no chance they'll be identical. */
60 if ( wc1 < 0xd800
61 || wc2 < 0xd800
62 || wc1 > 0xdfff
63 || wc2 > 0xdfff)
64 {
65 /* simple UCS-2 char */
66 iDiff = RTUniCpToUpper(wc1) - RTUniCpToUpper(wc2);
67 if (iDiff)
68 iDiff = RTUniCpToLower(wc1) - RTUniCpToLower(wc2);
69 }
70 else
71 {
72 /* a damned pair */
73 RTUNICP uc1;
74 RTUNICP uc2;
75 if (wc1 >= 0xdc00)
76 {
77 if (pwsz1Start == pwsz1)
78 return iDiff;
79 uc1 = pwsz1[-1];
80 if (uc1 < 0xd800 || uc1 >= 0xdc00)
81 return iDiff;
82 uc1 = 0x10000 + (((uc1 & 0x3ff) << 10) | (wc1 & 0x3ff));
83 uc2 = 0x10000 + (((pwsz2[-1] & 0x3ff) << 10) | (wc2 & 0x3ff));
84 }
85 else
86 {
87 uc1 = *++pwsz1;
88 if (uc1 < 0xdc00 || uc1 >= 0xe000)
89 return iDiff;
90 uc1 = 0x10000 + (((wc1 & 0x3ff) << 10) | (uc1 & 0x3ff));
91 uc2 = 0x10000 + (((wc2 & 0x3ff) << 10) | (*++pwsz2 & 0x3ff));
92 }
93 iDiff = RTUniCpToUpper(uc1) - RTUniCpToUpper(uc2);
94 if (iDiff)
95 iDiff = RTUniCpToLower(uc1) - RTUniCpToLower(uc2); /* serious paranoia! */
96 }
97 if (iDiff)
98 return iDiff;
99 }
100 if (!wc1)
101 return 0;
102 pwsz1++;
103 pwsz2++;
104 }
105}
106RT_EXPORT_SYMBOL(RTUtf16ICmp);
107
108
109RTDECL(int) RTUtf16BigICmp(PCRTUTF16 pwsz1, PCRTUTF16 pwsz2)
110{
111 if (pwsz1 == pwsz2)
112 return 0;
113 if (!pwsz1)
114 return -1;
115 if (!pwsz2)
116 return 1;
117
118 PCRTUTF16 pwsz1Start = pwsz1; /* keep it around in case we have to backtrack on a surrogate pair */
119 for (;;)
120 {
121 RTUTF16 wc1 = *pwsz1;
122 RTUTF16 wc2 = *pwsz2;
123 int iDiff = wc1 - wc2;
124 if (iDiff)
125 {
126 /* unless they are *both* surrogate pairs, there is no chance they'll be identical. */
127 wc1 = RT_BE2H_U16(wc1);
128 wc2 = RT_BE2H_U16(wc2);
129 if ( wc1 < 0xd800
130 || wc2 < 0xd800
131 || wc1 > 0xdfff
132 || wc2 > 0xdfff)
133 {
134 /* simple UCS-2 char */
135 iDiff = RTUniCpToUpper(wc1) - RTUniCpToUpper(wc2);
136 if (iDiff)
137 iDiff = RTUniCpToLower(wc1) - RTUniCpToLower(wc2);
138 }
139 else
140 {
141 /* a damned pair */
142 RTUNICP uc1;
143 RTUNICP uc2;
144 if (wc1 >= 0xdc00)
145 {
146 if (pwsz1Start == pwsz1)
147 return iDiff;
148 uc1 = RT_BE2H_U16(pwsz1[-1]);
149 if (uc1 < 0xd800 || uc1 >= 0xdc00)
150 return iDiff;
151 uc1 = 0x10000 + (((uc1 & 0x3ff) << 10) | (wc1 & 0x3ff));
152 uc2 = 0x10000 + (((RT_BE2H_U16(pwsz2[-1]) & 0x3ff) << 10) | (wc2 & 0x3ff));
153 }
154 else
155 {
156 RTUTF16 wcTmp = *++pwsz1;
157 uc1 = RT_BE2H_U16(wcTmp);
158 if (uc1 < 0xdc00 || uc1 >= 0xe000)
159 return iDiff;
160 uc1 = 0x10000 + (((wc1 & 0x3ff) << 10) | (uc1 & 0x3ff));
161 wcTmp = *++pwsz2;
162 uc2 = 0x10000 + (((wc2 & 0x3ff) << 10) | (RT_BE2H_U16(wcTmp) & 0x3ff));
163 }
164 iDiff = RTUniCpToUpper(uc1) - RTUniCpToUpper(uc2);
165 if (iDiff)
166 iDiff = RTUniCpToLower(uc1) - RTUniCpToLower(uc2); /* serious paranoia! */
167 }
168 if (iDiff)
169 return iDiff;
170 }
171 if (!wc1)
172 return 0;
173 pwsz1++;
174 pwsz2++;
175 }
176}
177RT_EXPORT_SYMBOL(RTUtf16BigICmp);
178
179
180RTDECL(int) RTUtf16ICmpUtf8(PCRTUTF16 pwsz1, const char *psz2)
181{
182 /*
183 * NULL and empty strings are all the same.
184 */
185 if (!pwsz1)
186 return !psz2 || !*psz2 ? 0 : -1;
187 if (!psz2)
188 return !*pwsz1 ? 0 : 1;
189
190 /*
191 * Compare with a UTF-8 string by enumerating them char by char.
192 */
193 for (;;)
194 {
195 RTUNICP uc1;
196 int rc = RTUtf16GetCpEx(&pwsz1, &uc1);
197 AssertRCReturn(rc, 1);
198
199 RTUNICP uc2;
200 rc = RTStrGetCpEx(&psz2, &uc2);
201 AssertRCReturn(rc, -1);
202 if (uc1 == uc2)
203 {
204 if (uc1)
205 continue;
206 return 0;
207 }
208
209 if (RTUniCpToUpper(uc1) == RTUniCpToUpper(uc2))
210 continue;
211 if (RTUniCpToLower(uc1) == RTUniCpToLower(uc2))
212 continue;
213 return uc1 < uc2 ? -1 : 1;
214 }
215}
216RT_EXPORT_SYMBOL(RTUtf16CmpIUtf8);
217
218
219RTDECL(int) RTUtf16NICmp(PCRTUTF16 pwsz1, PCRTUTF16 pwsz2, size_t cwcMax)
220{
221 if (pwsz1 == pwsz2)
222 return 0;
223 if (!pwsz1)
224 return -1;
225 if (!pwsz2)
226 return 1;
227
228 PCRTUTF16 pwsz1Start = pwsz1; /* keep it around in case we have to backtrack on a surrogate pair */
229 while (cwcMax-- > 0)
230 {
231 RTUTF16 wc1 = *pwsz1;
232 RTUTF16 wc2 = *pwsz2;
233 int iDiff = wc1 - wc2;
234 if (iDiff)
235 {
236 /* unless they are *both* surrogate pairs, there is no chance they'll be identical. */
237 if ( wc1 < 0xd800
238 || wc2 < 0xd800
239 || wc1 > 0xdfff
240 || wc2 > 0xdfff)
241 {
242 /* simple UCS-2 char */
243 iDiff = RTUniCpToUpper(wc1) - RTUniCpToUpper(wc2);
244 if (iDiff)
245 iDiff = RTUniCpToLower(wc1) - RTUniCpToLower(wc2);
246 }
247 else
248 {
249 /* a damned pair */
250 RTUNICP uc1;
251 RTUNICP uc2;
252 if (wc1 >= 0xdc00)
253 {
254 if (pwsz1Start == pwsz1)
255 return iDiff;
256 uc1 = pwsz1[-1];
257 if (uc1 < 0xd800 || uc1 >= 0xdc00)
258 return iDiff;
259 uc1 = 0x10000 + (((uc1 & 0x3ff) << 10) | (wc1 & 0x3ff));
260 uc2 = 0x10000 + (((pwsz2[-1] & 0x3ff) << 10) | (wc2 & 0x3ff));
261 }
262 else if (cwcMax-- > 0)
263 {
264 uc1 = *++pwsz1;
265 if (uc1 < 0xdc00 || uc1 >= 0xe000)
266 return iDiff;
267 uc1 = 0x10000 + (((wc1 & 0x3ff) << 10) | (uc1 & 0x3ff));
268 uc2 = 0x10000 + (((wc2 & 0x3ff) << 10) | (*++pwsz2 & 0x3ff));
269 }
270 else
271 {
272 iDiff = wc1 - wc2;
273 return iDiff;
274 }
275 iDiff = RTUniCpToUpper(uc1) - RTUniCpToUpper(uc2);
276 if (iDiff)
277 iDiff = RTUniCpToLower(uc1) - RTUniCpToLower(uc2); /* serious paranoia! */
278 }
279 if (iDiff)
280 return iDiff;
281 }
282 if (!wc1)
283 return 0;
284 pwsz1++;
285 pwsz2++;
286 }
287 return 0;
288}
289RT_EXPORT_SYMBOL(RTUtf16NICmp);
290
291
292RTDECL(int) RTUtf16BigNICmp(PCRTUTF16 pwsz1, PCRTUTF16 pwsz2, size_t cwcMax)
293{
294 if (pwsz1 == pwsz2)
295 return 0;
296 if (!pwsz1)
297 return -1;
298 if (!pwsz2)
299 return 1;
300
301 PCRTUTF16 pwsz1Start = pwsz1; /* keep it around in case we have to backtrack on a surrogate pair */
302 while (cwcMax-- > 0)
303 {
304 RTUTF16 wc1 = *pwsz1;
305 RTUTF16 wc2 = *pwsz2;
306 int iDiff = wc1 - wc2;
307 if (iDiff)
308 {
309 /* unless they are *both* surrogate pairs, there is no chance they'll be identical. */
310 wc1 = RT_BE2H_U16(wc1);
311 wc2 = RT_BE2H_U16(wc2);
312 if ( wc1 < 0xd800
313 || wc2 < 0xd800
314 || wc1 > 0xdfff
315 || wc2 > 0xdfff)
316 {
317 /* simple UCS-2 char */
318 iDiff = RTUniCpToUpper(wc1) - RTUniCpToUpper(wc2);
319 if (iDiff)
320 iDiff = RTUniCpToLower(wc1) - RTUniCpToLower(wc2);
321 }
322 else
323 {
324 /* a damned pair */
325 RTUNICP uc1;
326 RTUNICP uc2;
327 if (wc1 >= 0xdc00)
328 {
329 if (pwsz1Start == pwsz1)
330 return iDiff;
331 uc1 = RT_BE2H_U16(pwsz1[-1]);
332 if (uc1 < 0xd800 || uc1 >= 0xdc00)
333 return iDiff;
334 uc1 = 0x10000 + (((uc1 & 0x3ff) << 10) | (wc1 & 0x3ff));
335 uc2 = 0x10000 + (((RT_BE2H_U16(pwsz2[-1]) & 0x3ff) << 10) | (wc2 & 0x3ff));
336 }
337 else if (cwcMax > 0)
338 {
339 RTUTF16 wcTmp = *++pwsz1;
340 uc1 = RT_BE2H_U16(wcTmp);
341 if (uc1 < 0xdc00 || uc1 >= 0xe000)
342 return iDiff;
343 uc1 = 0x10000 + (((wc1 & 0x3ff) << 10) | (uc1 & 0x3ff));
344 wcTmp = *++pwsz2;
345 uc2 = 0x10000 + (((wc2 & 0x3ff) << 10) | (RT_BE2H_U16(wcTmp) & 0x3ff));
346 }
347 else
348 {
349 iDiff = wc1 - wc2;
350 return iDiff;
351 }
352 iDiff = RTUniCpToUpper(uc1) - RTUniCpToUpper(uc2);
353 if (iDiff)
354 iDiff = RTUniCpToLower(uc1) - RTUniCpToLower(uc2); /* serious paranoia! */
355 }
356 if (iDiff)
357 return iDiff;
358 }
359 if (!wc1)
360 return 0;
361 pwsz1++;
362 pwsz2++;
363 }
364 return 0;
365}
366RT_EXPORT_SYMBOL(RTUtf16BigNICmp);
367
368
369RTDECL(PRTUTF16) RTUtf16ToLower(PRTUTF16 pwsz)
370{
371 PRTUTF16 pwc = pwsz;
372 for (;;)
373 {
374 RTUTF16 wc = *pwc;
375 if (!wc)
376 break;
377 if (wc < 0xd800 || wc >= 0xdc00)
378 {
379 RTUNICP ucFolded = RTUniCpToLower(wc);
380 if (ucFolded < 0x10000)
381 *pwc++ = RTUniCpToLower(wc);
382 }
383 else
384 {
385 /* surrogate */
386 RTUTF16 wc2 = pwc[1];
387 if (wc2 >= 0xdc00 && wc2 <= 0xdfff)
388 {
389 RTUNICP uc = 0x10000 + (((wc & 0x3ff) << 10) | (wc2 & 0x3ff));
390 RTUNICP ucFolded = RTUniCpToLower(uc);
391 if (uc != ucFolded && ucFolded >= 0x10000) /* we don't support shrinking the string */
392 {
393 uc -= 0x10000;
394 *pwc++ = 0xd800 | (uc >> 10);
395 *pwc++ = 0xdc00 | (uc & 0x3ff);
396 }
397 }
398 else /* invalid encoding. */
399 pwc++;
400 }
401 }
402 return pwsz;
403}
404RT_EXPORT_SYMBOL(RTUtf16ToLower);
405
406
407RTDECL(PRTUTF16) RTUtf16ToUpper(PRTUTF16 pwsz)
408{
409 PRTUTF16 pwc = pwsz;
410 for (;;)
411 {
412 RTUTF16 wc = *pwc;
413 if (!wc)
414 break;
415 if (wc < 0xd800 || wc >= 0xdc00)
416 *pwc++ = RTUniCpToUpper(wc);
417 else
418 {
419 /* surrogate */
420 RTUTF16 wc2 = pwc[1];
421 if (wc2 >= 0xdc00 && wc2 <= 0xdfff)
422 {
423 RTUNICP uc = 0x10000 + (((wc & 0x3ff) << 10) | (wc2 & 0x3ff));
424 RTUNICP ucFolded = RTUniCpToUpper(uc);
425 if (uc != ucFolded && ucFolded >= 0x10000) /* we don't support shrinking the string */
426 {
427 uc -= 0x10000;
428 *pwc++ = 0xd800 | (uc >> 10);
429 *pwc++ = 0xdc00 | (uc & 0x3ff);
430 }
431 }
432 else /* invalid encoding. */
433 pwc++;
434 }
435 }
436 return pwsz;
437}
438RT_EXPORT_SYMBOL(RTUtf16ToUpper);
439
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette