VirtualBox

source: vbox/trunk/include/iprt/string.h@ 3935

Last change on this file since 3935 was 3888, checked in by vboxsync, 18 years ago

Solaris.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 50.1 KB
Line 
1/** @file
2 * innotek Portable Runtime - String Manipluation.
3 */
4
5/*
6 * Copyright (C) 2006-2007 innotek GmbH
7 *
8 * This file is part of VirtualBox Open Source Edition (OSE), as
9 * available from http://www.virtualbox.org. This file is free software;
10 * you can redistribute it and/or modify it under the terms of the GNU
11 * General Public License as published by the Free Software Foundation,
12 * in version 2 as it comes in the "COPYING" file of the VirtualBox OSE
13 * distribution. VirtualBox OSE is distributed in the hope that it will
14 * be useful, but WITHOUT ANY WARRANTY of any kind.
15 *
16 * If you received this file as part of a commercial VirtualBox
17 * distribution, then only the terms of your commercial VirtualBox
18 * license agreement apply instead of the previous paragraph.
19 */
20
21#ifndef ___iprt_string_h
22#define ___iprt_string_h
23
24#include <iprt/cdefs.h>
25#include <iprt/types.h>
26#include <iprt/stdarg.h>
27#include <iprt/err.h> /* for VINF_SUCCESS */
28#if defined(RT_OS_LINUX) && defined(__KERNEL__)
29# include <linux/string.h>
30#elif defined(RT_OS_FREEBSD) && defined(_KERNEL)
31 /*
32 * Kludge for the FreeBSD kernel:
33 * Some of the string.h stuff clashes with sys/libkern.h, so just wrap
34 * it up while including string.h to keep things quiet. It's nothing
35 * important that's clashing, after all.
36 */
37# define strdup strdup_string_h
38# include <string.h>
39# undef strdup
40#elif defined(RT_OS_SOLARIS) && defined(_KERNEL)
41 /*
42 * Same case as with FreeBSD kernel:
43 * The string.h stuff clashes with sys/systm.h
44 * ffs = find first set bit.
45 */
46# define ffs ffs_string_h
47# include <string.h>
48# undef ffs
49#else
50# include <string.h>
51#endif
52
53/*
54 * Supply prototypes for standard string functions provided by
55 * IPRT instead of the operating environment.
56 */
57#if defined(RT_OS_DARWIN) && defined(KERNEL)
58__BEGIN_DECLS
59void *memchr(const void *pv, int ch, size_t cb);
60char *strpbrk(const char *pszStr, const char *pszChars);
61__END_DECLS
62#endif
63
64
65/** @defgroup grp_rt_str RTStr - String Manipulation
66 * Mostly UTF-8 related helpers where the standard string functions won't do.
67 * @ingroup grp_rt
68 * @{
69 */
70
71__BEGIN_DECLS
72
73
74/**
75 * The maximum string length.
76 */
77#define RTSTR_MAX (~(size_t)0)
78
79
80#ifdef IN_RING3
81
82/**
83 * Allocates tmp buffer, translates pszString from UTF8 to current codepage.
84 *
85 * @returns iprt status code.
86 * @param ppszString Receives pointer of allocated native CP string.
87 * The returned pointer must be freed using RTStrFree().
88 * @param pszString UTF-8 string to convert.
89 */
90RTR3DECL(int) RTStrUtf8ToCurrentCP(char **ppszString, const char *pszString);
91
92/**
93 * Allocates tmp buffer, translates pszString from current codepage to UTF-8.
94 *
95 * @returns iprt status code.
96 * @param ppszString Receives pointer of allocated UTF-8 string.
97 * The returned pointer must be freed using RTStrFree().
98 * @param pszString Native string to convert.
99 */
100RTR3DECL(int) RTStrCurrentCPToUtf8(char **ppszString, const char *pszString);
101
102#endif
103
104/**
105 * Free string allocated by any of the non-UCS-2 string functions.
106 *
107 * @returns iprt status code.
108 * @param pszString Pointer to buffer with string to free.
109 * NULL is accepted.
110 */
111RTDECL(void) RTStrFree(char *pszString);
112
113/**
114 * Allocates a new copy of the given UTF-8 string.
115 *
116 * @returns Pointer to the allocated UTF-8 string.
117 * @param pszString UTF-8 string to duplicate.
118 */
119RTDECL(char *) RTStrDup(const char *pszString);
120
121/**
122 * Allocates a new copy of the given UTF-8 string.
123 *
124 * @returns iprt status code.
125 * @param ppszString Receives pointer of the allocated UTF-8 string.
126 * The returned pointer must be freed using RTStrFree().
127 * @param pszString UTF-8 string to duplicate.
128 */
129RTDECL(int) RTStrDupEx(char **ppszString, const char *pszString);
130
131/**
132 * Gets the number of code points the string is made up of, excluding
133 * the terminator.
134 *
135 *
136 * @returns Number of code points (RTUNICP).
137 * @returns 0 if the string was incorrectly encoded.
138 * @param psz The string.
139 */
140RTDECL(size_t) RTStrUniLen(const char *psz);
141
142/**
143 * Gets the number of code points the string is made up of, excluding
144 * the terminator.
145 *
146 * This function will validate the string, and incorrectly encoded UTF-8
147 * strings will be rejected.
148 *
149 * @returns iprt status code.
150 * @param psz The string.
151 * @param cch The max string length. Use RTSTR_MAX to process the entire string.
152 * @param pcuc Where to store the code point count.
153 * This is undefined on failure.
154 */
155RTDECL(int) RTStrUniLenEx(const char *psz, size_t cch, size_t *pcuc);
156
157/**
158 * Translate a UTF-8 string into an unicode string (i.e. RTUNICPs), allocating the string buffer.
159 *
160 * @returns iprt status code.
161 * @param pszString UTF-8 string to convert.
162 * @param ppUniString Receives pointer to the allocated unicode string.
163 * The returned string must be freed using RTUniFree().
164 */
165RTDECL(int) RTStrToUni(const char *pszString, PRTUNICP *ppUniString);
166
167/**
168 * Translates pszString from UTF-8 to an array of code points, allocating the result
169 * array if requested.
170 *
171 * @returns iprt status code.
172 * @param pszString UTF-8 string to convert.
173 * @param cchString The maximum size in chars (the type) to convert. The conversion stop
174 * when it reaches cchString or the string terminator ('\\0').
175 * Use RTSTR_MAX to translate the entire string.
176 * @param ppaCps If cCps is non-zero, this must either be pointing to pointer to
177 * a buffer of the specified size, or pointer to a NULL pointer.
178 * If *ppusz is NULL or cCps is zero a buffer of at least cCps items
179 * will be allocated to hold the translated string.
180 * If a buffer was requirest it must be freed using RTUtf16Free().
181 * @param cCps The number of code points in the unicode string. This includes the terminator.
182 * @param pcCps Where to store the length of the translated string. (Optional)
183 * This field will be updated even on failure, however the value is only
184 * specified for the following two error codes. On VERR_BUFFER_OVERFLOW
185 * and VERR_NO_STR_MEMORY it contains the required buffer space.
186 */
187RTDECL(int) RTStrToUniEx(const char *pszString, size_t cchString, PRTUNICP *ppaCps, size_t cCps, size_t *pcCps);
188
189/**
190 * Calculates the length of the string in RTUTF16 items.
191 *
192 * This function will validate the string, and incorrectly encoded UTF-8
193 * strings will be rejected. The primary purpose of this function is to
194 * help allocate buffers for RTStrToUtf16Ex of the correct size. For most
195 * other puroses RTStrCalcUtf16LenEx() should be used.
196 *
197 * @returns Number of RTUTF16 items.
198 * @returns 0 if the string was incorrectly encoded.
199 * @param psz The string.
200 */
201RTDECL(size_t) RTStrCalcUtf16Len(const char *psz);
202
203/**
204 * Calculates the length of the string in RTUTF16 items.
205 *
206 * This function will validate the string, and incorrectly encoded UTF-8
207 * strings will be rejected.
208 *
209 * @returns iprt status code.
210 * @param psz The string.
211 * @param cch The max string length. Use RTSTR_MAX to process the entire string.
212 * @param pcwc Where to store the string length. Optional.
213 * This is undefined on failure.
214 */
215RTDECL(int) RTStrCalcUtf16LenEx(const char *psz, size_t cch, size_t *pcwc);
216
217/**
218 * Translate a UTF-8 string into a UTF-16 allocating the result buffer.
219 *
220 * @returns iprt status code.
221 * @param pszString UTF-8 string to convert.
222 * @param ppwszString Receives pointer to the allocated UTF-16 string.
223 * The returned string must be freed using RTUtf16Free().
224 */
225RTDECL(int) RTStrToUtf16(const char *pszString, PRTUTF16 *ppwszString);
226
227/**
228 * Translates pszString from UTF-8 to UTF-16, allocating the result buffer if requested.
229 *
230 * @returns iprt status code.
231 * @param pszString UTF-8 string to convert.
232 * @param cchString The maximum size in chars (the type) to convert. The conversion stop
233 * when it reaches cchString or the string terminator ('\\0').
234 * Use RTSTR_MAX to translate the entire string.
235 * @param ppwsz If cwc is non-zero, this must either be pointing to pointer to
236 * a buffer of the specified size, or pointer to a NULL pointer.
237 * If *ppwsz is NULL or cwc is zero a buffer of at least cwc items
238 * will be allocated to hold the translated string.
239 * If a buffer was requirest it must be freed using RTUtf16Free().
240 * @param cwc The buffer size in RTUTF16s. This includes the terminator.
241 * @param pcwc Where to store the length of the translated string. (Optional)
242 * This field will be updated even on failure, however the value is only
243 * specified for the following two error codes. On VERR_BUFFER_OVERFLOW
244 * and VERR_NO_STR_MEMORY it contains the required buffer space.
245 */
246RTDECL(int) RTStrToUtf16Ex(const char *pszString, size_t cchString, PRTUTF16 *ppwsz, size_t cwc, size_t *pcwc);
247
248/**
249 * Allocates tmp buffer, translates pszString from UTF8 to UCS-2.
250 *
251 * @returns iprt status code.
252 * @param ppwszString Receives pointer of allocated UCS-2 string.
253 * The returned pointer must be freed using RTStrUcs2Free().
254 * @param pszString UTF-8 string to convert.
255 * @deprecated Use RTStrToUtf16().
256 */
257DECLINLINE(int) RTStrUtf8ToUcs2(PRTUCS2 *ppwszString, const char *pszString)
258{
259 return RTStrToUtf16(pszString, ppwszString);
260}
261
262/**
263 * Translates pszString from UTF8 to backwater UCS-2, can allocate a temp buffer.
264 *
265 * @returns iprt status code.
266 * @param ppwszString Receives pointer of allocated UCS-2 string.
267 * The returned pointer must be freed using RTStrUcs2Free().
268 * @param cwc Length of target buffer in RTUCS2s including the trailing '\\0'.
269 * If 0 a temporary buffer is allocated.
270 * @param pszString UTF-8 string to convert.
271 * @deprecated Use RTStrToUtf16Ex().
272 */
273DECLINLINE(int) RTStrUtf8ToUcs2Ex(PRTUCS2 *ppwszString, unsigned cwc, const char *pszString)
274{
275 return RTStrToUtf16Ex(pszString, RTSTR_MAX, ppwszString, cwc, NULL);
276}
277
278
279/**
280 * Get the unicode code point at the given string position.
281 *
282 * @returns unicode code point.
283 * @returns RTUNICP_INVALID if the encoding is invalid.
284 * @param psz The string.
285 */
286RTDECL(RTUNICP) RTStrGetCpInternal(const char *psz);
287
288/**
289 * Get the unicode code point at the given string position.
290 *
291 * @returns unicode code point.
292 * @returns RTUNICP_INVALID if the encoding is invalid.
293 * @param ppsz The string.
294 * @param pCp Where to store the unicode code point.
295 */
296RTDECL(int) RTStrGetCpExInternal(const char **ppsz, PRTUNICP pCp);
297
298/**
299 * Put the unicode code point at the given string position
300 * and return the pointer to the char following it.
301 *
302 * This function will not consider anything at or following the the
303 * buffer area pointed to by psz. It is therefore not suitable for
304 * inserting code points into a string, only appending/overwriting.
305 *
306 * @returns pointer to the char following the written code point.
307 * @param psz The string.
308 * @param CodePoint The code point to write.
309 * This sould not be RTUNICP_INVALID or any other charater
310 * out of the UTF-8 range.
311 *
312 * @remark This is a worker function for RTStrPutCp().
313 *
314 */
315RTDECL(char *) RTStrPutCpInternal(char *psz, RTUNICP CodePoint);
316
317/**
318 * Get the unicode code point at the given string position.
319 *
320 * @returns unicode code point.
321 * @returns RTUNICP_INVALID if the encoding is invalid.
322 * @param psz The string.
323 *
324 * @remark We optimize this operation by using an inline function for
325 * the most frequent and simplest sequence, the rest is
326 * handled by RTStrGetCpInternal().
327 */
328DECLINLINE(RTUNICP) RTStrGetCp(const char *psz)
329{
330 const unsigned char uch = *(const unsigned char *)psz;
331 if (!(uch & BIT(7)))
332 return uch;
333 return RTStrGetCpInternal(psz);
334}
335
336/**
337 * Get the unicode code point at the given string position.
338 *
339 * @returns iprt status code.
340 * @param ppsz Pointer to the string pointer. This will be updated to
341 * point to the char following the current code point.
342 * @param pCp Where to store the code point.
343 * RTUNICP_INVALID is stored here on failure.
344 *
345 * @remark We optimize this operation by using an inline function for
346 * the most frequent and simplest sequence, the rest is
347 * handled by RTStrGetCpExInternal().
348 */
349DECLINLINE(int) RTStrGetCpEx(const char **ppsz, PRTUNICP pCp)
350{
351 const unsigned char uch = **(const unsigned char **)ppsz;
352 if (!(uch & BIT(7)))
353 {
354 (*ppsz)++;
355 *pCp = uch;
356 return VINF_SUCCESS;
357 }
358 return RTStrGetCpExInternal(ppsz, pCp);
359}
360
361/**
362 * Put the unicode code point at the given string position
363 * and return the pointer to the char following it.
364 *
365 * This function will not consider anything at or following the the
366 * buffer area pointed to by psz. It is therefore not suitable for
367 * inserting code points into a string, only appending/overwriting.
368 *
369 * @returns pointer to the char following the written code point.
370 * @param psz The string.
371 * @param CodePoint The code point to write.
372 * This sould not be RTUNICP_INVALID or any other charater
373 * out of the UTF-8 range.
374 *
375 * @remark We optimize this operation by using an inline function for
376 * the most frequent and simplest sequence, the rest is
377 * handled by RTStrPutCpInternal().
378 */
379DECLINLINE(char *) RTStrPutCp(char *psz, RTUNICP CodePoint)
380{
381 if (CodePoint < 0x80)
382 {
383 *psz++ = (unsigned char)CodePoint;
384 return psz;
385 }
386 return RTStrPutCpInternal(psz, CodePoint);
387}
388
389/**
390 * Skips ahead, past the current code point.
391 *
392 * @returns Pointer to the char after the current code point.
393 * @param psz Pointer to the current code point.
394 * @remark This will not move the next valid code point, only past the current one.
395 */
396DECLINLINE(char *) RTStrNextCp(const char *psz)
397{
398 RTUNICP Cp;
399 RTStrGetCpEx(&psz, &Cp);
400 return (char *)psz;
401}
402
403/**
404 * Skips back to the previous code point.
405 *
406 * @returns Pointer to the char before the current code point.
407 * @returns pszStart on failure.
408 * @param pszStart Pointer to the start of the string.
409 * @param psz Pointer to the current code point.
410 */
411RTDECL(char *) RTStrPrevCp(const char *pszStart, const char *psz);
412
413
414
415#ifndef DECLARED_FNRTSTROUTPUT /* duplicated in iprt/log.h */
416#define DECLARED_FNRTSTROUTPUT
417/**
418 * Output callback.
419 *
420 * @returns number of bytes written.
421 * @param pvArg User argument.
422 * @param pachChars Pointer to an array of utf-8 characters.
423 * @param cbChars Number of bytes in the character array pointed to by pachChars.
424 */
425typedef DECLCALLBACK(size_t) FNRTSTROUTPUT(void *pvArg, const char *pachChars, size_t cbChars);
426/** Pointer to callback function. */
427typedef FNRTSTROUTPUT *PFNRTSTROUTPUT;
428#endif
429
430/** Format flag.
431 * These are used by RTStrFormat extensions and RTStrFormatNumber, mind
432 * that not all flags makes sense to both of the functions.
433 * @{ */
434#define RTSTR_F_CAPITAL 0x0001
435#define RTSTR_F_LEFT 0x0002
436#define RTSTR_F_ZEROPAD 0x0004
437#define RTSTR_F_SPECIAL 0x0008
438#define RTSTR_F_VALSIGNED 0x0010
439#define RTSTR_F_PLUS 0x0020
440#define RTSTR_F_BLANK 0x0040
441#define RTSTR_F_WIDTH 0x0080
442#define RTSTR_F_PRECISION 0x0100
443
444#define RTSTR_F_BIT_MASK 0xf800
445#define RTSTR_F_8BIT 0x0800
446#define RTSTR_F_16BIT 0x1000
447#define RTSTR_F_32BIT 0x2000
448#define RTSTR_F_64BIT 0x4000
449#define RTSTR_F_128BIT 0x8000
450/** @} */
451
452/** @def RTSTR_GET_BIT_FLAG
453 * Gets the bit flag for the specified type.
454 */
455#define RTSTR_GET_BIT_FLAG(type) \
456 ( sizeof(type) == 32 ? RTSTR_F_32BIT \
457 : sizeof(type) == 64 ? RTSTR_F_64BIT \
458 : sizeof(type) == 16 ? RTSTR_F_16BIT \
459 : sizeof(type) == 8 ? RTSTR_F_8BIT \
460 : sizeof(type) == 128? RTSTR_F_128BIT \
461 : 0)
462
463
464/**
465 * Callback to format non-standard format specifiers.
466 *
467 * @returns The number of bytes formatted.
468 * @param pvArg Formatter argument.
469 * @param pfnOutput Pointer to output function.
470 * @param pvArgOutput Argument for the output function.
471 * @param ppszFormat Pointer to the format string pointer. Advance this till the char
472 * after the format specifier.
473 * @param pArgs Pointer to the argument list. Use this to fetch the arguments.
474 * @param cchWidth Format Width. -1 if not specified.
475 * @param cchPrecision Format Precision. -1 if not specified.
476 * @param fFlags Flags (RTSTR_NTFS_*).
477 * @param chArgSize The argument size specifier, 'l' or 'L'.
478 */
479typedef DECLCALLBACK(int) FNSTRFORMAT(void *pvArg, PFNRTSTROUTPUT pfnOutput, void *pvArgOutput,
480 const char **ppszFormat, va_list *pArgs, int cchWidth, int cchPrecision, unsigned fFlags, char chArgSize);
481/** Pointer to a FNSTRFORMAT() function. */
482typedef FNSTRFORMAT *PFNSTRFORMAT;
483
484
485/**
486 * Partial implementation of a printf like formatter.
487 * It doesn't do everything correct, and there is no floating point support.
488 * However, it supports custom formats by the means of a format callback.
489 *
490 * @returns number of bytes formatted.
491 * @param pfnOutput Output worker.
492 * Called in two ways. Normally with a string and its length.
493 * For termination, it's called with NULL for string, 0 for length.
494 * @param pvArgOutput Argument to the output worker.
495 * @param pfnFormat Custom format worker.
496 * @param pvArgFormat Argument to the format worker.
497 * @param pszFormat Format string pointer.
498 * @param args Argument list.
499 */
500RTDECL(size_t) RTStrFormatV(PFNRTSTROUTPUT pfnOutput, void *pvArgOutput, PFNSTRFORMAT pfnFormat, void *pvArgFormat, const char *pszFormat, va_list args);
501
502/**
503 * Partial implementation of a printf like formatter.
504 * It doesn't do everything correct, and there is no floating point support.
505 * However, it supports custom formats by the means of a format callback.
506 *
507 * @returns number of bytes formatted.
508 * @param pfnOutput Output worker.
509 * Called in two ways. Normally with a string and its length.
510 * For termination, it's called with NULL for string, 0 for length.
511 * @param pvArgOutput Argument to the output worker.
512 * @param pfnFormat Custom format worker.
513 * @param pvArgFormat Argument to the format worker.
514 * @param pszFormat Format string.
515 * @param ... Argument list.
516 */
517RTDECL(size_t) RTStrFormat(PFNRTSTROUTPUT pfnOutput, void *pvArgOutput, PFNSTRFORMAT pfnFormat, void *pvArgFormat, const char *pszFormat, ...);
518
519/**
520 * Formats an integer number according to the parameters.
521 *
522 * @returns Length of the formatted number.
523 * @param psz Pointer to output string buffer of sufficient size.
524 * @param u64Value Value to format.
525 * @param uiBase Number representation base.
526 * @param cchWidth Width.
527 * @param cchPrecision Precision.
528 * @param fFlags Flags (NTFS_*).
529 */
530RTDECL(int) RTStrFormatNumber(char *psz, uint64_t u64Value, unsigned int uiBase, signed int cchWidth, signed int cchPrecision, unsigned int fFlags);
531
532/**
533 * String printf.
534 *
535 * @returns The length of the returned string (in pszBuffer).
536 * @param pszBuffer Output buffer.
537 * @param cchBuffer Size of the output buffer.
538 * @param pszFormat The format string.
539 * @param args The format argument.
540 */
541RTDECL(size_t) RTStrPrintfV(char *pszBuffer, size_t cchBuffer, const char *pszFormat, va_list args);
542
543/**
544 * String printf.
545 *
546 * @returns The length of the returned string (in pszBuffer).
547 * @param pszBuffer Output buffer.
548 * @param cchBuffer Size of the output buffer.
549 * @param pszFormat The format string.
550 * @param ... The format argument.
551 */
552RTDECL(size_t) RTStrPrintf(char *pszBuffer, size_t cchBuffer, const char *pszFormat, ...);
553
554
555/**
556 * String printf with custom formatting.
557 *
558 * @returns The length of the returned string (in pszBuffer).
559 * @param pfnFormat Pointer to handler function for the custom formats.
560 * @param pvArg Argument to the pfnFormat function.
561 * @param pszBuffer Output buffer.
562 * @param cchBuffer Size of the output buffer.
563 * @param pszFormat The format string.
564 * @param args The format argument.
565 */
566RTDECL(size_t) RTStrPrintfExV(PFNSTRFORMAT pfnFormat, void *pvArg, char *pszBuffer, size_t cchBuffer, const char *pszFormat, va_list args);
567
568/**
569 * String printf with custom formatting.
570 *
571 * @returns The length of the returned string (in pszBuffer).
572 * @param pfnFormat Pointer to handler function for the custom formats.
573 * @param pvArg Argument to the pfnFormat function.
574 * @param pszBuffer Output buffer.
575 * @param cchBuffer Size of the output buffer.
576 * @param pszFormat The format string.
577 * @param ... The format argument.
578 */
579RTDECL(size_t) RTStrPrintfEx(PFNSTRFORMAT pfnFormat, void *pvArg, char *pszBuffer, size_t cchBuffer, const char *pszFormat, ...);
580
581
582/**
583 * Allocating string printf.
584 *
585 * @returns The length of the string in the returned *ppszBuffer.
586 * @returns -1 on failure.
587 * @param ppszBuffer Where to store the pointer to the allocated output buffer.
588 * The buffer should be freed using RTStrFree().
589 * On failure *ppszBuffer will be set to NULL.
590 * @param pszFormat The format string.
591 * @param args The format argument.
592 */
593RTDECL(int) RTStrAPrintfV(char **ppszBuffer, const char *pszFormat, va_list args);
594
595/**
596 * Allocating string printf.
597 *
598 * @returns The length of the string in the returned *ppszBuffer.
599 * @returns -1 on failure.
600 * @param ppszBuffer Where to store the pointer to the allocated output buffer.
601 * The buffer should be freed using RTStrFree().
602 * On failure *ppszBuffer will be set to NULL.
603 * @param pszFormat The format string.
604 * @param ... The format argument.
605 */
606RTDECL(int) RTStrAPrintf(char **ppszBuffer, const char *pszFormat, ...);
607
608
609/**
610 * Strips blankspaces from both ends of the string.
611 *
612 * @returns Pointer to first non-blank char in the string.
613 * @param psz The string to strip.
614 */
615RTDECL(char *) RTStrStrip(char *psz);
616
617/**
618 * Strips blankspaces from the start of the string.
619 *
620 * @returns Pointer to first non-blank char in the string.
621 * @param psz The string to strip.
622 */
623RTDECL(char *) RTStrStripL(const char *psz);
624
625/**
626 * Strips blankspaces from the end of the string.
627 *
628 * @returns psz.
629 * @param psz The string to strip.
630 */
631RTDECL(char *) RTStrStripR(char *psz);
632
633
634/** @defgroup rt_str_conv String To/From Number Conversions
635 * @ingroup grp_rt_str
636 * @{ */
637
638/**
639 * Converts a string representation of a number to a 64-bit unsigned number.
640 *
641 * @returns iprt status code.
642 * Warnings are used to indicate conversion problems.
643 * @param pszValue Pointer to the string value.
644 * @param ppszNext Where to store the pointer to the first char following the number. (Optional)
645 * @param uBase The base of the representation used.
646 * If 0 the function will look for known prefixes before defaulting to 10.
647 * @param pu64 Where to store the converted number. (optional)
648 */
649RTDECL(int) RTStrToUInt64Ex(const char *pszValue, char **ppszNext, unsigned uBase, uint64_t *pu64);
650
651/**
652 * Converts a string representation of a number to a 64-bit unsigned number.
653 * The base is guessed.
654 *
655 * @returns 64-bit unsigned number on success.
656 * @returns 0 on failure.
657 * @param pszValue Pointer to the string value.
658 */
659RTDECL(uint64_t) RTStrToUInt64(const char *pszValue);
660
661/**
662 * Converts a string representation of a number to a 32-bit unsigned number.
663 *
664 * @returns iprt status code.
665 * Warnings are used to indicate conversion problems.
666 * @param pszValue Pointer to the string value.
667 * @param ppszNext Where to store the pointer to the first char following the number. (Optional)
668 * @param uBase The base of the representation used.
669 * If 0 the function will look for known prefixes before defaulting to 10.
670 * @param pu32 Where to store the converted number. (optional)
671 */
672RTDECL(int) RTStrToUInt32Ex(const char *pszValue, char **ppszNext, unsigned uBase, uint32_t *pu32);
673
674/**
675 * Converts a string representation of a number to a 64-bit unsigned number.
676 * The base is guessed.
677 *
678 * @returns 32-bit unsigned number on success.
679 * @returns 0 on failure.
680 * @param pszValue Pointer to the string value.
681 */
682RTDECL(uint32_t) RTStrToUInt32(const char *pszValue);
683
684/**
685 * Converts a string representation of a number to a 16-bit unsigned number.
686 *
687 * @returns iprt status code.
688 * Warnings are used to indicate conversion problems.
689 * @param pszValue Pointer to the string value.
690 * @param ppszNext Where to store the pointer to the first char following the number. (Optional)
691 * @param uBase The base of the representation used.
692 * If 0 the function will look for known prefixes before defaulting to 10.
693 * @param pu16 Where to store the converted number. (optional)
694 */
695RTDECL(int) RTStrToUInt16Ex(const char *pszValue, char **ppszNext, unsigned uBase, uint16_t *pu16);
696
697/**
698 * Converts a string representation of a number to a 16-bit unsigned number.
699 * The base is guessed.
700 *
701 * @returns 16-bit unsigned number on success.
702 * @returns 0 on failure.
703 * @param pszValue Pointer to the string value.
704 */
705RTDECL(uint16_t) RTStrToUInt16(const char *pszValue);
706
707/**
708 * Converts a string representation of a number to a 8-bit unsigned number.
709 *
710 * @returns iprt status code.
711 * Warnings are used to indicate conversion problems.
712 * @param pszValue Pointer to the string value.
713 * @param ppszNext Where to store the pointer to the first char following the number. (Optional)
714 * @param uBase The base of the representation used.
715 * If 0 the function will look for known prefixes before defaulting to 10.
716 * @param pu8 Where to store the converted number. (optional)
717 */
718RTDECL(int) RTStrToUInt8Ex(const char *pszValue, char **ppszNext, unsigned uBase, uint8_t *pu8);
719
720/**
721 * Converts a string representation of a number to a 8-bit unsigned number.
722 * The base is guessed.
723 *
724 * @returns 8-bit unsigned number on success.
725 * @returns 0 on failure.
726 * @param pszValue Pointer to the string value.
727 */
728RTDECL(uint8_t) RTStrToUInt8(const char *pszValue);
729
730/**
731 * Converts a string representation of a number to a 64-bit signed number.
732 *
733 * @returns iprt status code.
734 * Warnings are used to indicate conversion problems.
735 * @param pszValue Pointer to the string value.
736 * @param ppszNext Where to store the pointer to the first char following the number. (Optional)
737 * @param uBase The base of the representation used.
738 * If 0 the function will look for known prefixes before defaulting to 10.
739 * @param pi64 Where to store the converted number. (optional)
740 */
741RTDECL(int) RTStrToInt64Ex(const char *pszValue, char **ppszNext, unsigned uBase, int64_t *pi64);
742
743/**
744 * Converts a string representation of a number to a 64-bit signed number.
745 * The base is guessed.
746 *
747 * @returns 64-bit signed number on success.
748 * @returns 0 on failure.
749 * @param pszValue Pointer to the string value.
750 */
751RTDECL(int64_t) RTStrToInt64(const char *pszValue);
752
753/**
754 * Converts a string representation of a number to a 32-bit signed number.
755 *
756 * @returns iprt status code.
757 * Warnings are used to indicate conversion problems.
758 * @param pszValue Pointer to the string value.
759 * @param ppszNext Where to store the pointer to the first char following the number. (Optional)
760 * @param uBase The base of the representation used.
761 * If 0 the function will look for known prefixes before defaulting to 10.
762 * @param pi32 Where to store the converted number. (optional)
763 */
764RTDECL(int) RTStrToInt32Ex(const char *pszValue, char **ppszNext, unsigned uBase, int32_t *pi32);
765
766/**
767 * Converts a string representation of a number to a 32-bit signed number.
768 * The base is guessed.
769 *
770 * @returns 32-bit signed number on success.
771 * @returns 0 on failure.
772 * @param pszValue Pointer to the string value.
773 */
774RTDECL(int32_t) RTStrToInt32(const char *pszValue);
775
776/**
777 * Converts a string representation of a number to a 16-bit signed number.
778 *
779 * @returns iprt status code.
780 * Warnings are used to indicate conversion problems.
781 * @param pszValue Pointer to the string value.
782 * @param ppszNext Where to store the pointer to the first char following the number. (Optional)
783 * @param uBase The base of the representation used.
784 * If 0 the function will look for known prefixes before defaulting to 10.
785 * @param pi16 Where to store the converted number. (optional)
786 */
787RTDECL(int) RTStrToInt16Ex(const char *pszValue, char **ppszNext, unsigned uBase, int16_t *pi16);
788
789/**
790 * Converts a string representation of a number to a 16-bit signed number.
791 * The base is guessed.
792 *
793 * @returns 16-bit signed number on success.
794 * @returns 0 on failure.
795 * @param pszValue Pointer to the string value.
796 */
797RTDECL(int16_t) RTStrToInt16(const char *pszValue);
798
799/**
800 * Converts a string representation of a number to a 8-bit signed number.
801 *
802 * @returns iprt status code.
803 * Warnings are used to indicate conversion problems.
804 * @param pszValue Pointer to the string value.
805 * @param ppszNext Where to store the pointer to the first char following the number. (Optional)
806 * @param uBase The base of the representation used.
807 * If 0 the function will look for known prefixes before defaulting to 10.
808 * @param pi8 Where to store the converted number. (optional)
809 */
810RTDECL(int) RTStrToInt8Ex(const char *pszValue, char **ppszNext, unsigned uBase, int8_t *pi8);
811
812/**
813 * Converts a string representation of a number to a 8-bit signed number.
814 * The base is guessed.
815 *
816 * @returns 8-bit signed number on success.
817 * @returns 0 on failure.
818 * @param pszValue Pointer to the string value.
819 */
820RTDECL(int8_t) RTStrToInt8(const char *pszValue);
821
822/**
823 * Performs a case insensitive string compare between two UTF-8 strings.
824 *
825 * This is a simplified compare, as only the simplified lower/upper case folding
826 * specified by the unicode specs are used. It does not consider character pairs
827 * as they are used in some languages, just simple upper & lower case compares.
828 *
829 * @returns < 0 if the first string less than the second string.
830 * @returns 0 if the first string identical to the second string.
831 * @returns > 0 if the first string greater than the second string.
832 * @param psz1 First UTF-8 string.
833 * @param psz2 Second UTF-8 string.
834 */
835RTDECL(int) RTStrICmp(const char *psz1, const char *psz2);
836
837/** @} */
838
839
840/** @defgroup rt_str_space Unique String Space
841 * @ingroup grp_rt_str
842 * @{
843 */
844
845/** Pointer to a string name space container node core. */
846typedef struct RTSTRSPACECORE *PRTSTRSPACECORE;
847/** Pointer to a pointer to a string name space container node core. */
848typedef PRTSTRSPACECORE *PPRTSTRSPACECORE;
849
850/**
851 * String name space container node core.
852 */
853typedef struct RTSTRSPACECORE
854{
855 /** Hash key. Don't touch. */
856 uint32_t Key;
857 /** Pointer to the left leaf node. Don't touch. */
858 PRTSTRSPACECORE pLeft;
859 /** Pointer to the left rigth node. Don't touch. */
860 PRTSTRSPACECORE pRight;
861 /** Pointer to the list of string with the same key. Don't touch. */
862 PRTSTRSPACECORE pList;
863 /** Height of this tree: max(heigth(left), heigth(right)) + 1. Don't touch */
864 unsigned char uchHeight;
865 /** The string length. Read only! */
866 size_t cchString;
867 /** Pointer to the string. Read only! */
868 const char * pszString;
869} RTSTRSPACECORE;
870
871/** String space. (Initialize with NULL.) */
872typedef PRTSTRSPACECORE RTSTRSPACE;
873/** Pointer to a string space. */
874typedef PPRTSTRSPACECORE PRTSTRSPACE;
875
876
877/**
878 * Inserts a string into a unique string space.
879 *
880 * @returns true on success.
881 * @returns false if the string collieded with an existing string.
882 * @param pStrSpace The space to insert it into.
883 * @param pStr The string node.
884 */
885RTDECL(bool) RTStrSpaceInsert(PRTSTRSPACE pStrSpace, PRTSTRSPACECORE pStr);
886
887/**
888 * Removes a string from a unique string space.
889 *
890 * @returns Pointer to the removed string node.
891 * @returns NULL if the string was not found in the string space.
892 * @param pStrSpace The space to insert it into.
893 * @param pszString The string to remove.
894 */
895RTDECL(PRTSTRSPACECORE) RTStrSpaceRemove(PRTSTRSPACE pStrSpace, const char *pszString);
896
897/**
898 * Gets a string from a unique string space.
899 *
900 * @returns Pointer to the string node.
901 * @returns NULL if the string was not found in the string space.
902 * @param pStrSpace The space to insert it into.
903 * @param pszString The string to get.
904 */
905RTDECL(PRTSTRSPACECORE) RTStrSpaceGet(PRTSTRSPACE pStrSpace, const char *pszString);
906
907/**
908 * Callback function for RTStrSpaceEnumerate() and RTStrSpaceDestroy().
909 *
910 * @returns 0 on continue.
911 * @returns Non-zero to aborts the operation.
912 * @param pStr The string node
913 * @param pvUser The user specified argument.
914 */
915typedef DECLCALLBACK(int) FNRTSTRSPACECALLBACK(PRTSTRSPACECORE pStr, void *pvUser);
916/** Pointer to callback function for RTStrSpaceEnumerate() and RTStrSpaceDestroy(). */
917typedef FNRTSTRSPACECALLBACK *PFNRTSTRSPACECALLBACK;
918
919/**
920 * Destroys the string space.
921 * The caller supplies a callback which will be called for each of
922 * the string nodes in for freeing their memory and other resources.
923 *
924 * @returns 0 or what ever non-zero return value pfnCallback returned
925 * when aborting the destruction.
926 * @param pStrSpace The space to insert it into.
927 * @param pfnCallback The callback.
928 * @param pvUser The user argument.
929 */
930RTDECL(int) RTStrSpaceDestroy(PRTSTRSPACE pStrSpace, PFNRTSTRSPACECALLBACK pfnCallback, void *pvUser);
931
932/**
933 * Enumerates the string space.
934 * The caller supplies a callback which will be called for each of
935 * the string nodes.
936 *
937 * @returns 0 or what ever non-zero return value pfnCallback returned
938 * when aborting the destruction.
939 * @param pStrSpace The space to insert it into.
940 * @param pfnCallback The callback.
941 * @param pvUser The user argument.
942 */
943RTDECL(int) RTStrSpaceEnumerate(PRTSTRSPACE pStrSpace, PFNRTSTRSPACECALLBACK pfnCallback, void *pvUser);
944
945/** @} */
946
947
948/** @defgroup rt_str_utf16 UTF-16 String Manipulation
949 * @ingroup grp_rt_str
950 * @{
951 */
952
953/**
954 * Free a UTF-16 string allocated by RTStrUtf8ToUtf16(), RTStrUtf8ToUtf16Ex(),
955 * RTUtf16Dup() or RTUtf16DupEx().
956 *
957 * @returns iprt status code.
958 * @param pwszString The UTF-16 string to free. NULL is accepted.
959 */
960RTDECL(void) RTUtf16Free(PRTUTF16 pwszString);
961
962/**
963 * Allocates a new copy of the specified UTF-16 string.
964 *
965 * @returns Pointer to the allocated string copy. Use RTUtf16Free() to free it.
966 * @returns NULL when out of memory.
967 * @param pwszString UTF-16 string to duplicate.
968 * @remark This function will not make any attempt to validate the encoding.
969 */
970RTDECL(PRTUTF16) RTUtf16Dup(PCRTUTF16 pwszString);
971
972/**
973 * Allocates a new copy of the specified UTF-16 string.
974 *
975 * @returns iprt status code.
976 * @param ppwszString Receives pointer of the allocated UTF-16 string.
977 * The returned pointer must be freed using RTUtf16Free().
978 * @param pwszString UTF-16 string to duplicate.
979 * @param cwcExtra Number of extra RTUTF16 items to allocate.
980 * @remark This function will not make any attempt to validate the encoding.
981 */
982RTDECL(int) RTUtf16DupEx(PRTUTF16 *ppwszString, PCRTUTF16 pwszString, size_t cwcExtra);
983
984/**
985 * Returns the length of a UTF-16 string in UTF-16 characters
986 * without trailing '\\0'.
987 *
988 * Surrogate pairs counts as two UTF-16 characters here. Use RTUtf16CpCnt()
989 * to get the exact number of code points in the string.
990 *
991 * @returns The number of RTUTF16 items in the string.
992 * @param pwszString Pointer the UTF-16 string.
993 * @remark This function will not make any attempt to validate the encoding.
994 */
995RTDECL(size_t) RTUtf16Len(PCRTUTF16 pwszString);
996
997/**
998 * Performs a case sensitive string compare between two UTF-16 strings.
999 *
1000 * @returns < 0 if the first string less than the second string.s
1001 * @returns 0 if the first string identical to the second string.
1002 * @returns > 0 if the first string greater than the second string.
1003 * @param pwsz1 First UTF-16 string.
1004 * @param pwsz2 Second UTF-16 string.
1005 * @remark This function will not make any attempt to validate the encoding.
1006 */
1007RTDECL(int) RTUtf16Cmp(register PCRTUTF16 pwsz1, register PCRTUTF16 pwsz2);
1008
1009/**
1010 * Performs a case insensitive string compare between two UTF-16 strings.
1011 *
1012 * This is a simplified compare, as only the simplified lower/upper case folding
1013 * specified by the unicode specs are used. It does not consider character pairs
1014 * as they are used in some languages, just simple upper & lower case compares.
1015 *
1016 * @returns < 0 if the first string less than the second string.
1017 * @returns 0 if the first string identical to the second string.
1018 * @returns > 0 if the first string greater than the second string.
1019 * @param pwsz1 First UTF-16 string.
1020 * @param pwsz2 Second UTF-16 string.
1021 */
1022RTDECL(int) RTUtf16ICmp(PCRTUTF16 pwsz1, PCRTUTF16 pwsz2);
1023
1024/**
1025 * Performs a case insensitive string compare between two UTF-16 strings
1026 * using the current locale of the process (if applicable).
1027 *
1028 * This differs from RTUtf16ICmp() in that it will try, if a locale with the
1029 * required data is available, to do a correct case-insensitive compare. It
1030 * follows that it is more complex and thereby likely to be more expensive.
1031 *
1032 * @returns < 0 if the first string less than the second string.
1033 * @returns 0 if the first string identical to the second string.
1034 * @returns > 0 if the first string greater than the second string.
1035 * @param pwsz1 First UTF-16 string.
1036 * @param pwsz2 Second UTF-16 string.
1037 */
1038RTDECL(int) RTUtf16LocaleICmp(PCRTUTF16 pwsz1, PCRTUTF16 pwsz2);
1039
1040/**
1041 * Folds a UTF-16 string to lowercase.
1042 *
1043 * This is a very simple folding; is uses the simple lowercase
1044 * code point, it is not related to any locale just the most common
1045 * lowercase codepoint setup by the unicode specs, and it will not
1046 * create new surrogate pairs or remove existing ones.
1047 *
1048 * @returns Pointer to the passed in string.
1049 * @param pwsz The string to fold.
1050 */
1051RTDECL(PRTUTF16) RTUtf16ToLower(PRTUTF16 pwsz);
1052
1053/**
1054 * Folds a UTF-16 string to uppercase.
1055 *
1056 * This is a very simple folding; is uses the simple uppercase
1057 * code point, it is not related to any locale just the most common
1058 * uppercase codepoint setup by the unicode specs, and it will not
1059 * create new surrogate pairs or remove existing ones.
1060 *
1061 * @returns Pointer to the passed in string.
1062 * @param pwsz The string to fold.
1063 */
1064RTDECL(PRTUTF16) RTUtf16ToUpper(PRTUTF16 pwsz);
1065
1066/**
1067 * Translate a UTF-16 string into a UTF-8 allocating the result buffer.
1068 *
1069 * @returns iprt status code.
1070 * @param pwszString UTF-16 string to convert.
1071 * @param ppszString Receives pointer of allocated UTF-8 string.
1072 * The returned pointer must be freed using RTStrFree().
1073 */
1074RTDECL(int) RTUtf16ToUtf8(PCRTUTF16 pwszString, char **ppszString);
1075
1076/**
1077 * Translates UTF-16 to UTF-8 using buffer provided by the caller or
1078 * a fittingly sized buffer allocated by the function.
1079 *
1080 * @returns iprt status code.
1081 * @param pwszString The UTF-16 string to convert.
1082 * @param cwcString The number of RTUTF16 items to translation from pwszString.
1083 * The translate will stop when reaching cwcString or the terminator ('\\0').
1084 * Use RTSTR_MAX to translate the entire string.
1085 * @param ppsz If cch is non-zero, this must either be pointing to pointer to
1086 * a buffer of the specified size, or pointer to a NULL pointer.
1087 * If *ppsz is NULL or cch is zero a buffer of at least cch chars
1088 * will be allocated to hold the translated string.
1089 * If a buffer was requirest it must be freed using RTUtf16Free().
1090 * @param cch The buffer size in chars (the type). This includes the terminator.
1091 * @param pcch Where to store the length of the translated string. (Optional)
1092 * This field will be updated even on failure, however the value is only
1093 * specified for the following two error codes. On VERR_BUFFER_OVERFLOW
1094 * and VERR_NO_STR_MEMORY it contains the required buffer space.
1095 */
1096RTDECL(int) RTUtf16ToUtf8Ex(PCRTUTF16 pwszString, size_t cwcString, char **ppsz, size_t cch, size_t *pcch);
1097
1098
1099/**
1100 * Allocates tmp buffer, translates pwszString from UCS-2 to UTF8.
1101 *
1102 * @returns iprt status code.
1103 * @param ppszString Receives pointer of allocated UTF8 string.
1104 * The returned pointer must be freed using RTStrFree().
1105 * @param pwszString UCS-2 string to convert.
1106 * @deprecated Use RTUtf16ToUtf8().
1107 */
1108DECLINLINE(int) RTStrUcs2ToUtf8(char **ppszString, PCRTUCS2 pwszString)
1109{
1110 return RTUtf16ToUtf8(pwszString, ppszString);
1111}
1112
1113/**
1114 * Translates UCS-2 to UTF-8 using buffer provided by the caller or
1115 * a fittingly sized buffer allocated by the function.
1116 *
1117 * @returns iprt status code.
1118 * @param ppszString If cch is not zero, this points to the pointer to the
1119 * buffer where the converted string shall be resulted.
1120 * If cch is zero, this is where the pointer to the allocated
1121 * buffer with the converted string is stored. The allocated
1122 * buffer must be freed by using RTStrFree().
1123 * @param cch Size of the passed in buffer (*ppszString).
1124 * If 0 a fittingly sized buffer is allocated.
1125 * @param pwszString UCS-2 string to convert.
1126 * @deprecated
1127 */
1128DECLINLINE(int) RTStrUcs2ToUtf8Ex(char **ppszString, size_t cch, PCRTUCS2 pwszString)
1129{
1130 return RTUtf16ToUtf8Ex(pwszString, RTSTR_MAX, ppszString, cch, NULL);
1131}
1132
1133/**
1134 * Free a UCS-2 string allocated by RTStrUtf8ToUcs2().
1135 *
1136 * @returns iprt status code.
1137 * @param pwszString Pointer to buffer with unicode string to free.
1138 * NULL is accepted.
1139 * @deprecated
1140 */
1141DECLINLINE(void) RTStrUcs2Free(PRTUCS2 pwszString)
1142{
1143 RTUtf16Free(pwszString);
1144}
1145
1146/**
1147 * Allocates a new copy of the given UCS-2 string.
1148 *
1149 * @returns Pointer to the allocated string copy. Use RTStrUcs2Free() to free it.
1150 * @returns NULL when out of memory.
1151 * @param pwszString UCS-2 string to duplicate.
1152 * @deprecated
1153 */
1154DECLINLINE(PRTUCS2) RTStrUcs2Dup(PCRTUCS2 pwszString)
1155{
1156 return RTUtf16Dup(pwszString);
1157}
1158
1159/**
1160 * Allocates a new copy of the given UCS-2 string.
1161 *
1162 * @returns iprt status code.
1163 * @param ppwszString Receives pointer of the allocated UCS-2 string.
1164 * The returned pointer must be freed using RTStrUcs2Free().
1165 * @param pwszString UCS-2 string to duplicate.
1166 * @deprecated
1167 */
1168DECLINLINE(int) RTStrUcs2DupEx(PRTUCS2 *ppwszString, PCRTUCS2 pwszString)
1169{
1170 return RTUtf16DupEx(ppwszString, pwszString, 0);
1171}
1172
1173/**
1174 * Returns the length of a UCS-2 string in UCS-2 characters
1175 * without trailing '\\0'.
1176 *
1177 * @returns Length of input string in UCS-2 characters.
1178 * @param pwszString Pointer the UCS-2 string.
1179 * @deprecated
1180 */
1181DECLINLINE(size_t) RTStrUcs2Len(PCRTUCS2 pwszString)
1182{
1183 return RTUtf16Len(pwszString);
1184}
1185
1186/**
1187 * Performs a case sensitive string compare between two UCS-2 strings.
1188 *
1189 * @returns < 0 if the first string less than the second string.
1190 * @returns 0 if the first string identical to the second string.
1191 * @returns > 0 if the first string greater than the second string.
1192 * @param pwsz1 First UCS-2 string.
1193 * @param pwsz2 Second UCS-2 string.
1194 * @deprecated
1195 */
1196DECLINLINE(int) RTStrUcs2Cmp(register PCRTUCS2 pwsz1, register PCRTUCS2 pwsz2)
1197{
1198 return RTUtf16Cmp(pwsz1, pwsz2);
1199}
1200
1201
1202/**
1203 * Get the unicode code point at the given string position.
1204 *
1205 * @returns unicode code point.
1206 * @returns RTUNICP_INVALID if the encoding is invalid.
1207 * @param pwsz The string.
1208 *
1209 * @remark This is an internal worker for RTUtf16GetCp().
1210 */
1211RTDECL(RTUNICP) RTUtf16GetCpInternal(PCRTUTF16 pwsz);
1212
1213/**
1214 * Get the unicode code point at the given string position.
1215 *
1216 * @returns iprt status code.
1217 * @param ppwsz Pointer to the string pointer. This will be updated to
1218 * point to the char following the current code point.
1219 * @param pCp Where to store the code point.
1220 * RTUNICP_INVALID is stored here on failure.
1221 *
1222 * @remark This is an internal worker for RTUtf16GetCpEx().
1223 */
1224RTDECL(int) RTUtf16GetCpExInternal(PCRTUTF16 *ppwsz, PRTUNICP pCp);
1225
1226/**
1227 * Put the unicode code point at the given string position
1228 * and return the pointer to the char following it.
1229 *
1230 * This function will not consider anything at or following the the
1231 * buffer area pointed to by pwsz. It is therefore not suitable for
1232 * inserting code points into a string, only appending/overwriting.
1233 *
1234 * @returns pointer to the char following the written code point.
1235 * @param pwsz The string.
1236 * @param CodePoint The code point to write.
1237 * This sould not be RTUNICP_INVALID or any other charater
1238 * out of the UTF-16 range.
1239 *
1240 * @remark This is an internal worker for RTUtf16GetCpEx().
1241 */
1242RTDECL(PRTUTF16) RTUtf16PutCpInternal(PRTUTF16 pwsz, RTUNICP CodePoint);
1243
1244/**
1245 * Get the unicode code point at the given string position.
1246 *
1247 * @returns unicode code point.
1248 * @returns RTUNICP_INVALID if the encoding is invalid.
1249 * @param pwsz The string.
1250 *
1251 * @remark We optimize this operation by using an inline function for
1252 * everything which isn't a surrogate pair or an endian indicator.
1253 */
1254DECLINLINE(RTUNICP) RTUtf16GetCp(PCRTUTF16 pwsz)
1255{
1256 const RTUTF16 wc = *pwsz;
1257 if (wc < 0xd800 || (wc > 0xdfff && wc < 0xfffe))
1258 return wc;
1259 return RTUtf16GetCpInternal(pwsz);
1260}
1261
1262/**
1263 * Get the unicode code point at the given string position.
1264 *
1265 * @returns iprt status code.
1266 * @param ppwsz Pointer to the string pointer. This will be updated to
1267 * point to the char following the current code point.
1268 * @param pCp Where to store the code point.
1269 * RTUNICP_INVALID is stored here on failure.
1270 *
1271 * @remark We optimize this operation by using an inline function for
1272 * everything which isn't a surrogate pair or and endian indicator.
1273 */
1274DECLINLINE(int) RTUtf16GetCpEx(PCRTUTF16 *ppwsz, PRTUNICP pCp)
1275{
1276 const RTUTF16 wc = **ppwsz;
1277 if (wc < 0xd800 || (wc > 0xdfff && wc < 0xfffe))
1278 {
1279 (*ppwsz)++;
1280 *pCp = wc;
1281 return VINF_SUCCESS;
1282 }
1283 return RTUtf16GetCpExInternal(ppwsz, pCp);
1284}
1285
1286/**
1287 * Put the unicode code point at the given string position
1288 * and return the pointer to the char following it.
1289 *
1290 * This function will not consider anything at or following the the
1291 * buffer area pointed to by pwsz. It is therefore not suitable for
1292 * inserting code points into a string, only appending/overwriting.
1293 *
1294 * @returns pointer to the char following the written code point.
1295 * @param pwsz The string.
1296 * @param CodePoint The code point to write.
1297 * This sould not be RTUNICP_INVALID or any other charater
1298 * out of the UTF-16 range.
1299 *
1300 * @remark We optimize this operation by using an inline function for
1301 * everything which isn't a surrogate pair or and endian indicator.
1302 */
1303DECLINLINE(PRTUTF16) RTUtf16PutCp(PRTUTF16 pwsz, RTUNICP CodePoint)
1304{
1305 if (CodePoint < 0xd800 || (CodePoint > 0xd800 && CodePoint < 0xfffe))
1306 {
1307 *pwsz++ = (RTUTF16)CodePoint;
1308 return pwsz;
1309 }
1310 return RTUtf16PutCpInternal(pwsz, CodePoint);
1311}
1312
1313/**
1314 * Skips ahead, past the current code point.
1315 *
1316 * @returns Pointer to the char after the current code point.
1317 * @param pwsz Pointer to the current code point.
1318 * @remark This will not move the next valid code point, only past the current one.
1319 */
1320DECLINLINE(PRTUTF16) RTUtf16NextCp(PCRTUTF16 pwsz)
1321{
1322 RTUNICP Cp;
1323 RTUtf16GetCpEx(&pwsz, &Cp);
1324 return (PRTUTF16)pwsz;
1325}
1326
1327/**
1328 * Skips backwards, to the previous code point.
1329 *
1330 * @returns Pointer to the char after the current code point.
1331 * @param pwszStart Pointer to the start of the string.
1332 * @param pwsz Pointer to the current code point.
1333 */
1334RTDECL(PRTUTF16) RTUtf16PrevCp(PCRTUTF16 pwszStart, PCRTUTF16 pwsz);
1335
1336
1337/**
1338 * Checks if the UTF-16 char is the high surrogate char (i.e.
1339 * the 1st char in the pair).
1340 *
1341 * @returns true if it is.
1342 * @returns false if it isn't.
1343 * @param wc The character to investigate.
1344 */
1345DECLINLINE(bool) RTUtf16IsHighSurrogate(RTUTF16 wc)
1346{
1347 return wc >= 0xd800 && wc <= 0xdbff;
1348}
1349
1350/**
1351 * Checks if the UTF-16 char is the low surrogate char (i.e.
1352 * the 2nd char in the pair).
1353 *
1354 * @returns true if it is.
1355 * @returns false if it isn't.
1356 * @param wc The character to investigate.
1357 */
1358DECLINLINE(bool) RTUtf16IsLowSurrogate(RTUTF16 wc)
1359{
1360 return wc >= 0xdc00 && wc <= 0xdfff;
1361}
1362
1363
1364/**
1365 * Checks if the two UTF-16 chars form a valid surrogate pair.
1366 *
1367 * @returns true if they do.
1368 * @returns false if they doesn't.
1369 * @param wcHigh The high (1st) character.
1370 * @param wcLow The low (2nd) character.
1371 */
1372DECLINLINE(bool) RTUtf16IsSurrogatePair(RTUTF16 wcHigh, RTUTF16 wcLow)
1373{
1374 return RTUtf16IsHighSurrogate(wcHigh)
1375 && RTUtf16IsLowSurrogate(wcLow);
1376}
1377
1378/** @} */
1379
1380__END_DECLS
1381
1382/** @} */
1383
1384#endif
1385
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette