VirtualBox

source: vbox/trunk/include/iprt/string.h@ 10949

Last change on this file since 10949 was 10949, checked in by vboxsync, 17 years ago

IPRT: Added RTStrNLen and RTStrNLenEx.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 60.1 KB
Line 
1/** @file
2 * IPRT - String Manipluation.
3 */
4
5/*
6 * Copyright (C) 2006-2007 Sun Microsystems, Inc.
7 *
8 * This file is part of VirtualBox Open Source Edition (OSE), as
9 * available from http://www.virtualbox.org. This file is free software;
10 * you can redistribute it and/or modify it under the terms of the GNU
11 * General Public License (GPL) as published by the Free Software
12 * Foundation, in version 2 as it comes in the "COPYING" file of the
13 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
14 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
15 *
16 * The contents of this file may alternatively be used under the terms
17 * of the Common Development and Distribution License Version 1.0
18 * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
19 * VirtualBox OSE distribution, in which case the provisions of the
20 * CDDL are applicable instead of those of the GPL.
21 *
22 * You may elect to license modified versions of this file under the
23 * terms and conditions of either the GPL or the CDDL or both.
24 *
25 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa
26 * Clara, CA 95054 USA or visit http://www.sun.com if you need
27 * additional information or have any questions.
28 */
29
30#ifndef ___iprt_string_h
31#define ___iprt_string_h
32
33#include <iprt/cdefs.h>
34#include <iprt/types.h>
35#include <iprt/stdarg.h>
36#include <iprt/err.h> /* for VINF_SUCCESS */
37#if defined(RT_OS_LINUX) && defined(__KERNEL__)
38# include <linux/string.h>
39#elif defined(RT_OS_FREEBSD) && defined(_KERNEL)
40 /*
41 * Kludge for the FreeBSD kernel:
42 * Some of the string.h stuff clashes with sys/libkern.h, so just wrap
43 * it up while including string.h to keep things quiet. It's nothing
44 * important that's clashing, after all.
45 */
46# define strdup strdup_string_h
47# include <string.h>
48# undef strdup
49#elif defined(RT_OS_SOLARIS) && defined(_KERNEL)
50 /*
51 * Same case as with FreeBSD kernel:
52 * The string.h stuff clashes with sys/systm.h
53 * ffs = find first set bit.
54 */
55# define ffs ffs_string_h
56# include <string.h>
57# undef ffs
58# undef strpbrk
59#else
60# include <string.h>
61#endif
62
63/*
64 * Supply prototypes for standard string functions provided by
65 * IPRT instead of the operating environment.
66 */
67#if defined(RT_OS_DARWIN) && defined(KERNEL)
68__BEGIN_DECLS
69void *memchr(const void *pv, int ch, size_t cb);
70char *strpbrk(const char *pszStr, const char *pszChars);
71__END_DECLS
72#endif
73
74
75/** @defgroup grp_rt_str RTStr - String Manipulation
76 * Mostly UTF-8 related helpers where the standard string functions won't do.
77 * @ingroup grp_rt
78 * @{
79 */
80
81__BEGIN_DECLS
82
83
84/**
85 * The maximum string length.
86 */
87#define RTSTR_MAX (~(size_t)0)
88
89
90#ifdef IN_RING3
91
92/**
93 * Allocates tmp buffer, translates pszString from UTF8 to current codepage.
94 *
95 * @returns iprt status code.
96 * @param ppszString Receives pointer of allocated native CP string.
97 * The returned pointer must be freed using RTStrFree().
98 * @param pszString UTF-8 string to convert.
99 */
100RTR3DECL(int) RTStrUtf8ToCurrentCP(char **ppszString, const char *pszString);
101
102/**
103 * Allocates tmp buffer, translates pszString from current codepage to UTF-8.
104 *
105 * @returns iprt status code.
106 * @param ppszString Receives pointer of allocated UTF-8 string.
107 * The returned pointer must be freed using RTStrFree().
108 * @param pszString Native string to convert.
109 */
110RTR3DECL(int) RTStrCurrentCPToUtf8(char **ppszString, const char *pszString);
111
112#endif
113
114/**
115 * Free string allocated by any of the non-UCS-2 string functions.
116 *
117 * @returns iprt status code.
118 * @param pszString Pointer to buffer with string to free.
119 * NULL is accepted.
120 */
121RTDECL(void) RTStrFree(char *pszString);
122
123/**
124 * Allocates a new copy of the given UTF-8 string.
125 *
126 * @returns Pointer to the allocated UTF-8 string.
127 * @param pszString UTF-8 string to duplicate.
128 */
129RTDECL(char *) RTStrDup(const char *pszString);
130
131/**
132 * Allocates a new copy of the given UTF-8 string.
133 *
134 * @returns iprt status code.
135 * @param ppszString Receives pointer of the allocated UTF-8 string.
136 * The returned pointer must be freed using RTStrFree().
137 * @param pszString UTF-8 string to duplicate.
138 */
139RTDECL(int) RTStrDupEx(char **ppszString, const char *pszString);
140
141/**
142 * Validates the UTF-8 encoding of the string.
143 *
144 * @returns iprt status code.
145 * @param psz The string.
146 */
147RTDECL(int) RTStrValidateEncoding(const char *psz);
148
149/**
150 * Validates the UTF-8 encoding of the string.
151 *
152 * @returns iprt status code.
153 * @param psz The string.
154 * @param cch The max string length. Use RTSTR_MAX to process the entire string.
155 * @param fFlags Teserved for future. Pass 0.
156 */
157RTDECL(int) RTStrValidateEncodingEx(const char *psz, size_t cch, unsigned fFlags);
158
159/**
160 * Checks if the UTF-8 encoding is valid.
161 *
162 * @returns true / false.
163 * @param psz The string.
164 */
165RTDECL(bool) RTStrIsValidEncoding(const char *psz);
166
167/**
168 * Gets the number of code points the string is made up of, excluding
169 * the terminator.
170 *
171 *
172 * @returns Number of code points (RTUNICP).
173 * @returns 0 if the string was incorrectly encoded.
174 * @param psz The string.
175 */
176RTDECL(size_t) RTStrUniLen(const char *psz);
177
178/**
179 * Gets the number of code points the string is made up of, excluding
180 * the terminator.
181 *
182 * This function will validate the string, and incorrectly encoded UTF-8
183 * strings will be rejected.
184 *
185 * @returns iprt status code.
186 * @param psz The string.
187 * @param cch The max string length. Use RTSTR_MAX to process the entire string.
188 * @param pcuc Where to store the code point count.
189 * This is undefined on failure.
190 */
191RTDECL(int) RTStrUniLenEx(const char *psz, size_t cch, size_t *pcuc);
192
193/**
194 * Translate a UTF-8 string into an unicode string (i.e. RTUNICPs), allocating the string buffer.
195 *
196 * @returns iprt status code.
197 * @param pszString UTF-8 string to convert.
198 * @param ppUniString Receives pointer to the allocated unicode string.
199 * The returned string must be freed using RTUniFree().
200 */
201RTDECL(int) RTStrToUni(const char *pszString, PRTUNICP *ppUniString);
202
203/**
204 * Translates pszString from UTF-8 to an array of code points, allocating the result
205 * array if requested.
206 *
207 * @returns iprt status code.
208 * @param pszString UTF-8 string to convert.
209 * @param cchString The maximum size in chars (the type) to convert. The conversion stop
210 * when it reaches cchString or the string terminator ('\\0').
211 * Use RTSTR_MAX to translate the entire string.
212 * @param ppaCps If cCps is non-zero, this must either be pointing to pointer to
213 * a buffer of the specified size, or pointer to a NULL pointer.
214 * If *ppusz is NULL or cCps is zero a buffer of at least cCps items
215 * will be allocated to hold the translated string.
216 * If a buffer was requirest it must be freed using RTUtf16Free().
217 * @param cCps The number of code points in the unicode string. This includes the terminator.
218 * @param pcCps Where to store the length of the translated string. (Optional)
219 * This field will be updated even on failure, however the value is only
220 * specified for the following two error codes. On VERR_BUFFER_OVERFLOW
221 * and VERR_NO_STR_MEMORY it contains the required buffer space.
222 */
223RTDECL(int) RTStrToUniEx(const char *pszString, size_t cchString, PRTUNICP *ppaCps, size_t cCps, size_t *pcCps);
224
225/**
226 * Calculates the length of the string in RTUTF16 items.
227 *
228 * This function will validate the string, and incorrectly encoded UTF-8
229 * strings will be rejected. The primary purpose of this function is to
230 * help allocate buffers for RTStrToUtf16Ex of the correct size. For most
231 * other puroses RTStrCalcUtf16LenEx() should be used.
232 *
233 * @returns Number of RTUTF16 items.
234 * @returns 0 if the string was incorrectly encoded.
235 * @param psz The string.
236 */
237RTDECL(size_t) RTStrCalcUtf16Len(const char *psz);
238
239/**
240 * Calculates the length of the string in RTUTF16 items.
241 *
242 * This function will validate the string, and incorrectly encoded UTF-8
243 * strings will be rejected.
244 *
245 * @returns iprt status code.
246 * @param psz The string.
247 * @param cch The max string length. Use RTSTR_MAX to process the entire string.
248 * @param pcwc Where to store the string length. Optional.
249 * This is undefined on failure.
250 */
251RTDECL(int) RTStrCalcUtf16LenEx(const char *psz, size_t cch, size_t *pcwc);
252
253/**
254 * Translate a UTF-8 string into a UTF-16 allocating the result buffer.
255 *
256 * @returns iprt status code.
257 * @param pszString UTF-8 string to convert.
258 * @param ppwszString Receives pointer to the allocated UTF-16 string.
259 * The returned string must be freed using RTUtf16Free().
260 */
261RTDECL(int) RTStrToUtf16(const char *pszString, PRTUTF16 *ppwszString);
262
263/**
264 * Translates pszString from UTF-8 to UTF-16, allocating the result buffer if requested.
265 *
266 * @returns iprt status code.
267 * @param pszString UTF-8 string to convert.
268 * @param cchString The maximum size in chars (the type) to convert. The conversion stop
269 * when it reaches cchString or the string terminator ('\\0').
270 * Use RTSTR_MAX to translate the entire string.
271 * @param ppwsz If cwc is non-zero, this must either be pointing to pointer to
272 * a buffer of the specified size, or pointer to a NULL pointer.
273 * If *ppwsz is NULL or cwc is zero a buffer of at least cwc items
274 * will be allocated to hold the translated string.
275 * If a buffer was requirest it must be freed using RTUtf16Free().
276 * @param cwc The buffer size in RTUTF16s. This includes the terminator.
277 * @param pcwc Where to store the length of the translated string. (Optional)
278 * This field will be updated even on failure, however the value is only
279 * specified for the following two error codes. On VERR_BUFFER_OVERFLOW
280 * and VERR_NO_STR_MEMORY it contains the required buffer space.
281 */
282RTDECL(int) RTStrToUtf16Ex(const char *pszString, size_t cchString, PRTUTF16 *ppwsz, size_t cwc, size_t *pcwc);
283
284
285/**
286 * Get the unicode code point at the given string position.
287 *
288 * @returns unicode code point.
289 * @returns RTUNICP_INVALID if the encoding is invalid.
290 * @param psz The string.
291 */
292RTDECL(RTUNICP) RTStrGetCpInternal(const char *psz);
293
294/**
295 * Get the unicode code point at the given string position.
296 *
297 * @returns unicode code point.
298 * @returns RTUNICP_INVALID if the encoding is invalid.
299 * @param ppsz The string.
300 * @param pCp Where to store the unicode code point.
301 */
302RTDECL(int) RTStrGetCpExInternal(const char **ppsz, PRTUNICP pCp);
303
304/**
305 * Put the unicode code point at the given string position
306 * and return the pointer to the char following it.
307 *
308 * This function will not consider anything at or following the the
309 * buffer area pointed to by psz. It is therefore not suitable for
310 * inserting code points into a string, only appending/overwriting.
311 *
312 * @returns pointer to the char following the written code point.
313 * @param psz The string.
314 * @param CodePoint The code point to write.
315 * This sould not be RTUNICP_INVALID or any other charater
316 * out of the UTF-8 range.
317 *
318 * @remark This is a worker function for RTStrPutCp().
319 *
320 */
321RTDECL(char *) RTStrPutCpInternal(char *psz, RTUNICP CodePoint);
322
323/**
324 * Get the unicode code point at the given string position.
325 *
326 * @returns unicode code point.
327 * @returns RTUNICP_INVALID if the encoding is invalid.
328 * @param psz The string.
329 *
330 * @remark We optimize this operation by using an inline function for
331 * the most frequent and simplest sequence, the rest is
332 * handled by RTStrGetCpInternal().
333 */
334DECLINLINE(RTUNICP) RTStrGetCp(const char *psz)
335{
336 const unsigned char uch = *(const unsigned char *)psz;
337 if (!(uch & RT_BIT(7)))
338 return uch;
339 return RTStrGetCpInternal(psz);
340}
341
342/**
343 * Get the unicode code point at the given string position.
344 *
345 * @returns iprt status code.
346 * @param ppsz Pointer to the string pointer. This will be updated to
347 * point to the char following the current code point.
348 * @param pCp Where to store the code point.
349 * RTUNICP_INVALID is stored here on failure.
350 *
351 * @remark We optimize this operation by using an inline function for
352 * the most frequent and simplest sequence, the rest is
353 * handled by RTStrGetCpExInternal().
354 */
355DECLINLINE(int) RTStrGetCpEx(const char **ppsz, PRTUNICP pCp)
356{
357 const unsigned char uch = **(const unsigned char **)ppsz;
358 if (!(uch & RT_BIT(7)))
359 {
360 (*ppsz)++;
361 *pCp = uch;
362 return VINF_SUCCESS;
363 }
364 return RTStrGetCpExInternal(ppsz, pCp);
365}
366
367/**
368 * Put the unicode code point at the given string position
369 * and return the pointer to the char following it.
370 *
371 * This function will not consider anything at or following the the
372 * buffer area pointed to by psz. It is therefore not suitable for
373 * inserting code points into a string, only appending/overwriting.
374 *
375 * @returns pointer to the char following the written code point.
376 * @param psz The string.
377 * @param CodePoint The code point to write.
378 * This sould not be RTUNICP_INVALID or any other charater
379 * out of the UTF-8 range.
380 *
381 * @remark We optimize this operation by using an inline function for
382 * the most frequent and simplest sequence, the rest is
383 * handled by RTStrPutCpInternal().
384 */
385DECLINLINE(char *) RTStrPutCp(char *psz, RTUNICP CodePoint)
386{
387 if (CodePoint < 0x80)
388 {
389 *psz++ = (unsigned char)CodePoint;
390 return psz;
391 }
392 return RTStrPutCpInternal(psz, CodePoint);
393}
394
395/**
396 * Skips ahead, past the current code point.
397 *
398 * @returns Pointer to the char after the current code point.
399 * @param psz Pointer to the current code point.
400 * @remark This will not move the next valid code point, only past the current one.
401 */
402DECLINLINE(char *) RTStrNextCp(const char *psz)
403{
404 RTUNICP Cp;
405 RTStrGetCpEx(&psz, &Cp);
406 return (char *)psz;
407}
408
409/**
410 * Skips back to the previous code point.
411 *
412 * @returns Pointer to the char before the current code point.
413 * @returns pszStart on failure.
414 * @param pszStart Pointer to the start of the string.
415 * @param psz Pointer to the current code point.
416 */
417RTDECL(char *) RTStrPrevCp(const char *pszStart, const char *psz);
418
419
420
421#ifndef DECLARED_FNRTSTROUTPUT /* duplicated in iprt/log.h */
422#define DECLARED_FNRTSTROUTPUT
423/**
424 * Output callback.
425 *
426 * @returns number of bytes written.
427 * @param pvArg User argument.
428 * @param pachChars Pointer to an array of utf-8 characters.
429 * @param cbChars Number of bytes in the character array pointed to by pachChars.
430 */
431typedef DECLCALLBACK(size_t) FNRTSTROUTPUT(void *pvArg, const char *pachChars, size_t cbChars);
432/** Pointer to callback function. */
433typedef FNRTSTROUTPUT *PFNRTSTROUTPUT;
434#endif
435
436/** Format flag.
437 * These are used by RTStrFormat extensions and RTStrFormatNumber, mind
438 * that not all flags makes sense to both of the functions.
439 * @{ */
440#define RTSTR_F_CAPITAL 0x0001
441#define RTSTR_F_LEFT 0x0002
442#define RTSTR_F_ZEROPAD 0x0004
443#define RTSTR_F_SPECIAL 0x0008
444#define RTSTR_F_VALSIGNED 0x0010
445#define RTSTR_F_PLUS 0x0020
446#define RTSTR_F_BLANK 0x0040
447#define RTSTR_F_WIDTH 0x0080
448#define RTSTR_F_PRECISION 0x0100
449
450#define RTSTR_F_BIT_MASK 0xf800
451#define RTSTR_F_8BIT 0x0800
452#define RTSTR_F_16BIT 0x1000
453#define RTSTR_F_32BIT 0x2000
454#define RTSTR_F_64BIT 0x4000
455#define RTSTR_F_128BIT 0x8000
456/** @} */
457
458/** @def RTSTR_GET_BIT_FLAG
459 * Gets the bit flag for the specified type.
460 */
461#define RTSTR_GET_BIT_FLAG(type) \
462 ( sizeof(type) == 32 ? RTSTR_F_32BIT \
463 : sizeof(type) == 64 ? RTSTR_F_64BIT \
464 : sizeof(type) == 16 ? RTSTR_F_16BIT \
465 : sizeof(type) == 8 ? RTSTR_F_8BIT \
466 : sizeof(type) == 128? RTSTR_F_128BIT \
467 : 0)
468
469
470/**
471 * Callback to format non-standard format specifiers.
472 *
473 * @returns The number of bytes formatted.
474 * @param pvArg Formatter argument.
475 * @param pfnOutput Pointer to output function.
476 * @param pvArgOutput Argument for the output function.
477 * @param ppszFormat Pointer to the format string pointer. Advance this till the char
478 * after the format specifier.
479 * @param pArgs Pointer to the argument list. Use this to fetch the arguments.
480 * @param cchWidth Format Width. -1 if not specified.
481 * @param cchPrecision Format Precision. -1 if not specified.
482 * @param fFlags Flags (RTSTR_NTFS_*).
483 * @param chArgSize The argument size specifier, 'l' or 'L'.
484 */
485typedef DECLCALLBACK(size_t) FNSTRFORMAT(void *pvArg, PFNRTSTROUTPUT pfnOutput, void *pvArgOutput,
486 const char **ppszFormat, va_list *pArgs, int cchWidth,
487 int cchPrecision, unsigned fFlags, char chArgSize);
488/** Pointer to a FNSTRFORMAT() function. */
489typedef FNSTRFORMAT *PFNSTRFORMAT;
490
491
492/**
493 * Partial implementation of a printf like formatter.
494 * It doesn't do everything correct, and there is no floating point support.
495 * However, it supports custom formats by the means of a format callback.
496 *
497 * @returns number of bytes formatted.
498 * @param pfnOutput Output worker.
499 * Called in two ways. Normally with a string and its length.
500 * For termination, it's called with NULL for string, 0 for length.
501 * @param pvArgOutput Argument to the output worker.
502 * @param pfnFormat Custom format worker.
503 * @param pvArgFormat Argument to the format worker.
504 * @param pszFormat Format string pointer.
505 * @param InArgs Argument list.
506 */
507RTDECL(size_t) RTStrFormatV(PFNRTSTROUTPUT pfnOutput, void *pvArgOutput, PFNSTRFORMAT pfnFormat, void *pvArgFormat, const char *pszFormat, va_list InArgs);
508
509/**
510 * Partial implementation of a printf like formatter.
511 * It doesn't do everything correct, and there is no floating point support.
512 * However, it supports custom formats by the means of a format callback.
513 *
514 * @returns number of bytes formatted.
515 * @param pfnOutput Output worker.
516 * Called in two ways. Normally with a string and its length.
517 * For termination, it's called with NULL for string, 0 for length.
518 * @param pvArgOutput Argument to the output worker.
519 * @param pfnFormat Custom format worker.
520 * @param pvArgFormat Argument to the format worker.
521 * @param pszFormat Format string.
522 * @param ... Argument list.
523 */
524RTDECL(size_t) RTStrFormat(PFNRTSTROUTPUT pfnOutput, void *pvArgOutput, PFNSTRFORMAT pfnFormat, void *pvArgFormat, const char *pszFormat, ...);
525
526/**
527 * Formats an integer number according to the parameters.
528 *
529 * @returns Length of the formatted number.
530 * @param psz Pointer to output string buffer of sufficient size.
531 * @param u64Value Value to format.
532 * @param uiBase Number representation base.
533 * @param cchWidth Width.
534 * @param cchPrecision Precision.
535 * @param fFlags Flags (NTFS_*).
536 */
537RTDECL(int) RTStrFormatNumber(char *psz, uint64_t u64Value, unsigned int uiBase, signed int cchWidth, signed int cchPrecision, unsigned int fFlags);
538
539
540/**
541 * Callback for formatting a type.
542 *
543 * This is registered using the RTStrFormatTypeRegister function and will
544 * be called during string formatting to handle the specified %R[type].
545 * The argument for this format type is assumed to be a pointer and it's
546 * passed in the @a pvValue argument.
547 *
548 * @returns Length of the formatted output.
549 * @param pfnOutput Output worker.
550 * @param pvArgOutput Argument to the output worker.
551 * @param pszType The type name.
552 * @param pvValue The argument value.
553 * @param cchWidth Width.
554 * @param cchPrecision Precision.
555 * @param fFlags Flags (NTFS_*).
556 * @param pvUser The user argument.
557 */
558typedef DECLCALLBACK(size_t) FNRTSTRFORMATTYPE(PFNRTSTROUTPUT pfnOutput, void *pvArgOutput,
559 const char *pszType, void const *pvValue,
560 int cchWidth, int cchPrecision, unsigned fFlags,
561 void *pvUser);
562/** Pointer to a FNRTSTRFORMATTYPE. */
563typedef FNRTSTRFORMATTYPE *PFNRTSTRFORMATTYPE;
564
565
566/**
567 * Register a format handler for a type.
568 *
569 * The format handler is used to handle '%R[type]' format types, where the argument
570 * in the vector is a pointer value (a bit restrictive, but keeps it simple).
571 *
572 * The caller must ensure that no other thread will be making use of any of
573 * the dynamic formatting type facilities simultaneously with this call.
574 *
575 * @returns IPRT status code.
576 * @retval VINF_SUCCESS on success.
577 * @retval VERR_ALREADY_EXISTS if the type has already been registered.
578 * @retval VERR_TOO_MANY_OPEN_FILES if all the type slots has been allocated already.
579 *
580 * @param pszType The type name.
581 * @param pfnHandler The handler address. See FNRTSTRFORMATTYPE for details.
582 * @param pvUser The user argument to pass to the handler. See RTStrFormatTypeSetUser
583 * for how to update this later.
584 */
585RTDECL(int) RTStrFormatTypeRegister(const char *pszType, PFNRTSTRFORMATTYPE pfnHandler, void *pvUser);
586
587/**
588 * Deregisters a format type.
589 *
590 * The caller must ensure that no other thread will be making use of any of
591 * the dynamic formatting type facilities simultaneously with this call.
592 *
593 * @returns IPRT status code.
594 * @retval VINF_SUCCESS on success.
595 * @retval VERR_FILE_NOT_FOUND if not found.
596 *
597 * @param pszType The type to deregister.
598 */
599RTDECL(int) RTStrFormatTypeDeregister(const char *pszType);
600
601/**
602 * Sets the user argument for a type.
603 *
604 * This can be used if a user argument needs relocating in GC.
605 *
606 * @returns IPRT status code.
607 * @retval VINF_SUCCESS on success.
608 * @retval VERR_FILE_NOT_FOUND if not found.
609 *
610 * @param pszType The type to update.
611 * @param pvUser The new user argument value.
612 */
613RTDECL(int) RTStrFormatTypeSetUser(const char *pszType, void *pvUser);
614
615
616/**
617 * String printf.
618 *
619 * @returns The length of the returned string (in pszBuffer).
620 * @param pszBuffer Output buffer.
621 * @param cchBuffer Size of the output buffer.
622 * @param pszFormat The format string.
623 * @param args The format argument.
624 */
625RTDECL(size_t) RTStrPrintfV(char *pszBuffer, size_t cchBuffer, const char *pszFormat, va_list args);
626
627/**
628 * String printf.
629 *
630 * @returns The length of the returned string (in pszBuffer).
631 * @param pszBuffer Output buffer.
632 * @param cchBuffer Size of the output buffer.
633 * @param pszFormat The format string.
634 * @param ... The format argument.
635 */
636RTDECL(size_t) RTStrPrintf(char *pszBuffer, size_t cchBuffer, const char *pszFormat, ...);
637
638
639/**
640 * String printf with custom formatting.
641 *
642 * @returns The length of the returned string (in pszBuffer).
643 * @param pfnFormat Pointer to handler function for the custom formats.
644 * @param pvArg Argument to the pfnFormat function.
645 * @param pszBuffer Output buffer.
646 * @param cchBuffer Size of the output buffer.
647 * @param pszFormat The format string.
648 * @param args The format argument.
649 */
650RTDECL(size_t) RTStrPrintfExV(PFNSTRFORMAT pfnFormat, void *pvArg, char *pszBuffer, size_t cchBuffer, const char *pszFormat, va_list args);
651
652/**
653 * String printf with custom formatting.
654 *
655 * @returns The length of the returned string (in pszBuffer).
656 * @param pfnFormat Pointer to handler function for the custom formats.
657 * @param pvArg Argument to the pfnFormat function.
658 * @param pszBuffer Output buffer.
659 * @param cchBuffer Size of the output buffer.
660 * @param pszFormat The format string.
661 * @param ... The format argument.
662 */
663RTDECL(size_t) RTStrPrintfEx(PFNSTRFORMAT pfnFormat, void *pvArg, char *pszBuffer, size_t cchBuffer, const char *pszFormat, ...);
664
665
666/**
667 * Allocating string printf.
668 *
669 * @returns The length of the string in the returned *ppszBuffer.
670 * @returns -1 on failure.
671 * @param ppszBuffer Where to store the pointer to the allocated output buffer.
672 * The buffer should be freed using RTStrFree().
673 * On failure *ppszBuffer will be set to NULL.
674 * @param pszFormat The format string.
675 * @param args The format argument.
676 */
677RTDECL(int) RTStrAPrintfV(char **ppszBuffer, const char *pszFormat, va_list args);
678
679/**
680 * Allocating string printf.
681 *
682 * @returns The length of the string in the returned *ppszBuffer.
683 * @returns -1 on failure.
684 * @param ppszBuffer Where to store the pointer to the allocated output buffer.
685 * The buffer should be freed using RTStrFree().
686 * On failure *ppszBuffer will be set to NULL.
687 * @param pszFormat The format string.
688 * @param ... The format argument.
689 */
690RTDECL(int) RTStrAPrintf(char **ppszBuffer, const char *pszFormat, ...);
691
692
693/**
694 * Strips blankspaces from both ends of the string.
695 *
696 * @returns Pointer to first non-blank char in the string.
697 * @param psz The string to strip.
698 */
699RTDECL(char *) RTStrStrip(char *psz);
700
701/**
702 * Strips blankspaces from the start of the string.
703 *
704 * @returns Pointer to first non-blank char in the string.
705 * @param psz The string to strip.
706 */
707RTDECL(char *) RTStrStripL(const char *psz);
708
709/**
710 * Strips blankspaces from the end of the string.
711 *
712 * @returns psz.
713 * @param psz The string to strip.
714 */
715RTDECL(char *) RTStrStripR(char *psz);
716
717
718/** @defgroup rt_str_conv String To/From Number Conversions
719 * @ingroup grp_rt_str
720 * @{ */
721
722/**
723 * Converts a string representation of a number to a 64-bit unsigned number.
724 *
725 * @returns iprt status code.
726 * Warnings are used to indicate convertion problems.
727 * @retval VWRN_NUMBER_TOO_BIG
728 * @retval VWRN_NEGATIVE_UNSIGNED
729 * @retval VWRN_TRAILING_CHARS
730 * @retval VWRN_TRAILING_SPACES
731 * @retval VINF_SUCCESS
732 * @retval VERR_NO_DIGITS
733 *
734 * @param pszValue Pointer to the string value.
735 * @param ppszNext Where to store the pointer to the first char following the number. (Optional)
736 * @param uBase The base of the representation used.
737 * If the function will look for known prefixes before defaulting to 10.
738 * @param pu64 Where to store the converted number. (optional)
739 */
740RTDECL(int) RTStrToUInt64Ex(const char *pszValue, char **ppszNext, unsigned uBase, uint64_t *pu64);
741
742/**
743 * Converts a string representation of a number to a 64-bit unsigned number,
744 * making sure the full string is converted.
745 *
746 * @returns iprt status code.
747 * Warnings are used to indicate convertion problems.
748 * @retval VWRN_NUMBER_TOO_BIG
749 * @retval VWRN_NEGATIVE_UNSIGNED
750 * @retval VINF_SUCCESS
751 * @retval VERR_NO_DIGITS
752 * @retval VERR_TRAILING_SPACES
753 * @retval VERR_TRAILING_CHARS
754 *
755 * @param pszValue Pointer to the string value.
756 * @param uBase The base of the representation used.
757 * If the function will look for known prefixes before defaulting to 10.
758 * @param pu64 Where to store the converted number. (optional)
759 */
760RTDECL(int) RTStrToUInt64Full(const char *pszValue, unsigned uBase, uint64_t *pu64);
761
762/**
763 * Converts a string representation of a number to a 64-bit unsigned number.
764 * The base is guessed.
765 *
766 * @returns 64-bit unsigned number on success.
767 * @returns 0 on failure.
768 * @param pszValue Pointer to the string value.
769 */
770RTDECL(uint64_t) RTStrToUInt64(const char *pszValue);
771
772/**
773 * Converts a string representation of a number to a 32-bit unsigned number.
774 *
775 * @returns iprt status code.
776 * Warnings are used to indicate conversion problems.
777 * @retval VWRN_NUMBER_TOO_BIG
778 * @retval VWRN_NEGATIVE_UNSIGNED
779 * @retval VWRN_TRAILING_CHARS
780 * @retval VWRN_TRAILING_SPACES
781 * @retval VINF_SUCCESS
782 * @retval VERR_NO_DIGITS
783 *
784 * @param pszValue Pointer to the string value.
785 * @param ppszNext Where to store the pointer to the first char following the number. (Optional)
786 * @param uBase The base of the representation used.
787 * If 0 the function will look for known prefixes before defaulting to 10.
788 * @param pu32 Where to store the converted number. (optional)
789 */
790RTDECL(int) RTStrToUInt32Ex(const char *pszValue, char **ppszNext, unsigned uBase, uint32_t *pu32);
791
792/**
793 * Converts a string representation of a number to a 32-bit unsigned number,
794 * making sure the full string is converted.
795 *
796 * @returns iprt status code.
797 * Warnings are used to indicate convertion problems.
798 * @retval VWRN_NUMBER_TOO_BIG
799 * @retval VWRN_NEGATIVE_UNSIGNED
800 * @retval VINF_SUCCESS
801 * @retval VERR_NO_DIGITS
802 * @retval VERR_TRAILING_SPACES
803 * @retval VERR_TRAILING_CHARS
804 *
805 * @param pszValue Pointer to the string value.
806 * @param uBase The base of the representation used.
807 * If the function will look for known prefixes before defaulting to 10.
808 * @param pu32 Where to store the converted number. (optional)
809 */
810RTDECL(int) RTStrToUInt32Full(const char *pszValue, unsigned uBase, uint32_t *pu32);
811
812/**
813 * Converts a string representation of a number to a 64-bit unsigned number.
814 * The base is guessed.
815 *
816 * @returns 32-bit unsigned number on success.
817 * @returns 0 on failure.
818 * @param pszValue Pointer to the string value.
819 */
820RTDECL(uint32_t) RTStrToUInt32(const char *pszValue);
821
822/**
823 * Converts a string representation of a number to a 16-bit unsigned number.
824 *
825 * @returns iprt status code.
826 * Warnings are used to indicate conversion problems.
827 * @retval VWRN_NUMBER_TOO_BIG
828 * @retval VWRN_NEGATIVE_UNSIGNED
829 * @retval VWRN_TRAILING_CHARS
830 * @retval VWRN_TRAILING_SPACES
831 * @retval VINF_SUCCESS
832 * @retval VERR_NO_DIGITS
833 *
834 * @param pszValue Pointer to the string value.
835 * @param ppszNext Where to store the pointer to the first char following the number. (Optional)
836 * @param uBase The base of the representation used.
837 * If 0 the function will look for known prefixes before defaulting to 10.
838 * @param pu16 Where to store the converted number. (optional)
839 */
840RTDECL(int) RTStrToUInt16Ex(const char *pszValue, char **ppszNext, unsigned uBase, uint16_t *pu16);
841
842/**
843 * Converts a string representation of a number to a 16-bit unsigned number,
844 * making sure the full string is converted.
845 *
846 * @returns iprt status code.
847 * Warnings are used to indicate convertion problems.
848 * @retval VWRN_NUMBER_TOO_BIG
849 * @retval VWRN_NEGATIVE_UNSIGNED
850 * @retval VINF_SUCCESS
851 * @retval VERR_NO_DIGITS
852 * @retval VERR_TRAILING_SPACES
853 * @retval VERR_TRAILING_CHARS
854 *
855 * @param pszValue Pointer to the string value.
856 * @param uBase The base of the representation used.
857 * If the function will look for known prefixes before defaulting to 10.
858 * @param pu16 Where to store the converted number. (optional)
859 */
860RTDECL(int) RTStrToUInt16Full(const char *pszValue, unsigned uBase, uint16_t *pu16);
861
862/**
863 * Converts a string representation of a number to a 16-bit unsigned number.
864 * The base is guessed.
865 *
866 * @returns 16-bit unsigned number on success.
867 * @returns 0 on failure.
868 * @param pszValue Pointer to the string value.
869 */
870RTDECL(uint16_t) RTStrToUInt16(const char *pszValue);
871
872/**
873 * Converts a string representation of a number to a 8-bit unsigned number.
874 *
875 * @returns iprt status code.
876 * Warnings are used to indicate conversion problems.
877 * @retval VWRN_NUMBER_TOO_BIG
878 * @retval VWRN_NEGATIVE_UNSIGNED
879 * @retval VWRN_TRAILING_CHARS
880 * @retval VWRN_TRAILING_SPACES
881 * @retval VINF_SUCCESS
882 * @retval VERR_NO_DIGITS
883 *
884 * @param pszValue Pointer to the string value.
885 * @param ppszNext Where to store the pointer to the first char following the number. (Optional)
886 * @param uBase The base of the representation used.
887 * If 0 the function will look for known prefixes before defaulting to 10.
888 * @param pu8 Where to store the converted number. (optional)
889 */
890RTDECL(int) RTStrToUInt8Ex(const char *pszValue, char **ppszNext, unsigned uBase, uint8_t *pu8);
891
892/**
893 * Converts a string representation of a number to a 8-bit unsigned number,
894 * making sure the full string is converted.
895 *
896 * @returns iprt status code.
897 * Warnings are used to indicate convertion problems.
898 * @retval VWRN_NUMBER_TOO_BIG
899 * @retval VWRN_NEGATIVE_UNSIGNED
900 * @retval VINF_SUCCESS
901 * @retval VERR_NO_DIGITS
902 * @retval VERR_TRAILING_SPACES
903 * @retval VERR_TRAILING_CHARS
904 *
905 * @param pszValue Pointer to the string value.
906 * @param uBase The base of the representation used.
907 * If the function will look for known prefixes before defaulting to 10.
908 * @param pu8 Where to store the converted number. (optional)
909 */
910RTDECL(int) RTStrToUInt8Full(const char *pszValue, unsigned uBase, uint8_t *pu8);
911
912/**
913 * Converts a string representation of a number to a 8-bit unsigned number.
914 * The base is guessed.
915 *
916 * @returns 8-bit unsigned number on success.
917 * @returns 0 on failure.
918 * @param pszValue Pointer to the string value.
919 */
920RTDECL(uint8_t) RTStrToUInt8(const char *pszValue);
921
922/**
923 * Converts a string representation of a number to a 64-bit signed number.
924 *
925 * @returns iprt status code.
926 * Warnings are used to indicate conversion problems.
927 * @retval VWRN_NUMBER_TOO_BIG
928 * @retval VWRN_TRAILING_CHARS
929 * @retval VWRN_TRAILING_SPACES
930 * @retval VINF_SUCCESS
931 * @retval VERR_NO_DIGITS
932 *
933 * @param pszValue Pointer to the string value.
934 * @param ppszNext Where to store the pointer to the first char following the number. (Optional)
935 * @param uBase The base of the representation used.
936 * If 0 the function will look for known prefixes before defaulting to 10.
937 * @param pi64 Where to store the converted number. (optional)
938 */
939RTDECL(int) RTStrToInt64Ex(const char *pszValue, char **ppszNext, unsigned uBase, int64_t *pi64);
940
941/**
942 * Converts a string representation of a number to a 64-bit signed number,
943 * making sure the full string is converted.
944 *
945 * @returns iprt status code.
946 * Warnings are used to indicate convertion problems.
947 * @retval VWRN_NUMBER_TOO_BIG
948 * @retval VINF_SUCCESS
949 * @retval VERR_TRAILING_CHARS
950 * @retval VERR_TRAILING_SPACES
951 * @retval VERR_NO_DIGITS
952 *
953 * @param pszValue Pointer to the string value.
954 * @param uBase The base of the representation used.
955 * If the function will look for known prefixes before defaulting to 10.
956 * @param pi64 Where to store the converted number. (optional)
957 */
958RTDECL(int) RTStrToInt64Full(const char *pszValue, unsigned uBase, int64_t *pi64);
959
960/**
961 * Converts a string representation of a number to a 64-bit signed number.
962 * The base is guessed.
963 *
964 * @returns 64-bit signed number on success.
965 * @returns 0 on failure.
966 * @param pszValue Pointer to the string value.
967 */
968RTDECL(int64_t) RTStrToInt64(const char *pszValue);
969
970/**
971 * Converts a string representation of a number to a 32-bit signed number.
972 *
973 * @returns iprt status code.
974 * Warnings are used to indicate conversion problems.
975 * @retval VWRN_NUMBER_TOO_BIG
976 * @retval VWRN_TRAILING_CHARS
977 * @retval VWRN_TRAILING_SPACES
978 * @retval VINF_SUCCESS
979 * @retval VERR_NO_DIGITS
980 *
981 * @param pszValue Pointer to the string value.
982 * @param ppszNext Where to store the pointer to the first char following the number. (Optional)
983 * @param uBase The base of the representation used.
984 * If 0 the function will look for known prefixes before defaulting to 10.
985 * @param pi32 Where to store the converted number. (optional)
986 */
987RTDECL(int) RTStrToInt32Ex(const char *pszValue, char **ppszNext, unsigned uBase, int32_t *pi32);
988
989/**
990 * Converts a string representation of a number to a 32-bit signed number,
991 * making sure the full string is converted.
992 *
993 * @returns iprt status code.
994 * Warnings are used to indicate convertion problems.
995 * @retval VWRN_NUMBER_TOO_BIG
996 * @retval VINF_SUCCESS
997 * @retval VERR_TRAILING_CHARS
998 * @retval VERR_TRAILING_SPACES
999 * @retval VERR_NO_DIGITS
1000 *
1001 * @param pszValue Pointer to the string value.
1002 * @param uBase The base of the representation used.
1003 * If the function will look for known prefixes before defaulting to 10.
1004 * @param pi32 Where to store the converted number. (optional)
1005 */
1006RTDECL(int) RTStrToInt32Full(const char *pszValue, unsigned uBase, int32_t *pi32);
1007
1008/**
1009 * Converts a string representation of a number to a 32-bit signed number.
1010 * The base is guessed.
1011 *
1012 * @returns 32-bit signed number on success.
1013 * @returns 0 on failure.
1014 * @param pszValue Pointer to the string value.
1015 */
1016RTDECL(int32_t) RTStrToInt32(const char *pszValue);
1017
1018/**
1019 * Converts a string representation of a number to a 16-bit signed number.
1020 *
1021 * @returns iprt status code.
1022 * Warnings are used to indicate conversion problems.
1023 * @retval VWRN_NUMBER_TOO_BIG
1024 * @retval VWRN_TRAILING_CHARS
1025 * @retval VWRN_TRAILING_SPACES
1026 * @retval VINF_SUCCESS
1027 * @retval VERR_NO_DIGITS
1028 *
1029 * @param pszValue Pointer to the string value.
1030 * @param ppszNext Where to store the pointer to the first char following the number. (Optional)
1031 * @param uBase The base of the representation used.
1032 * If 0 the function will look for known prefixes before defaulting to 10.
1033 * @param pi16 Where to store the converted number. (optional)
1034 */
1035RTDECL(int) RTStrToInt16Ex(const char *pszValue, char **ppszNext, unsigned uBase, int16_t *pi16);
1036
1037/**
1038 * Converts a string representation of a number to a 16-bit signed number,
1039 * making sure the full string is converted.
1040 *
1041 * @returns iprt status code.
1042 * Warnings are used to indicate convertion problems.
1043 * @retval VWRN_NUMBER_TOO_BIG
1044 * @retval VINF_SUCCESS
1045 * @retval VERR_TRAILING_CHARS
1046 * @retval VERR_TRAILING_SPACES
1047 * @retval VERR_NO_DIGITS
1048 *
1049 * @param pszValue Pointer to the string value.
1050 * @param uBase The base of the representation used.
1051 * If the function will look for known prefixes before defaulting to 10.
1052 * @param pi16 Where to store the converted number. (optional)
1053 */
1054RTDECL(int) RTStrToInt16Full(const char *pszValue, unsigned uBase, int16_t *pi16);
1055
1056/**
1057 * Converts a string representation of a number to a 16-bit signed number.
1058 * The base is guessed.
1059 *
1060 * @returns 16-bit signed number on success.
1061 * @returns 0 on failure.
1062 * @param pszValue Pointer to the string value.
1063 */
1064RTDECL(int16_t) RTStrToInt16(const char *pszValue);
1065
1066/**
1067 * Converts a string representation of a number to a 8-bit signed number.
1068 *
1069 * @returns iprt status code.
1070 * Warnings are used to indicate conversion problems.
1071 * @retval VWRN_NUMBER_TOO_BIG
1072 * @retval VWRN_TRAILING_CHARS
1073 * @retval VWRN_TRAILING_SPACES
1074 * @retval VINF_SUCCESS
1075 * @retval VERR_NO_DIGITS
1076 *
1077 * @param pszValue Pointer to the string value.
1078 * @param ppszNext Where to store the pointer to the first char following the number. (Optional)
1079 * @param uBase The base of the representation used.
1080 * If 0 the function will look for known prefixes before defaulting to 10.
1081 * @param pi8 Where to store the converted number. (optional)
1082 */
1083RTDECL(int) RTStrToInt8Ex(const char *pszValue, char **ppszNext, unsigned uBase, int8_t *pi8);
1084
1085/**
1086 * Converts a string representation of a number to a 8-bit signed number,
1087 * making sure the full string is converted.
1088 *
1089 * @returns iprt status code.
1090 * Warnings are used to indicate convertion problems.
1091 * @retval VWRN_NUMBER_TOO_BIG
1092 * @retval VINF_SUCCESS
1093 * @retval VERR_TRAILING_CHARS
1094 * @retval VERR_TRAILING_SPACES
1095 * @retval VERR_NO_DIGITS
1096 *
1097 * @param pszValue Pointer to the string value.
1098 * @param uBase The base of the representation used.
1099 * If the function will look for known prefixes before defaulting to 10.
1100 * @param pi8 Where to store the converted number. (optional)
1101 */
1102RTDECL(int) RTStrToInt8Full(const char *pszValue, unsigned uBase, int8_t *pi8);
1103
1104/**
1105 * Converts a string representation of a number to a 8-bit signed number.
1106 * The base is guessed.
1107 *
1108 * @returns 8-bit signed number on success.
1109 * @returns 0 on failure.
1110 * @param pszValue Pointer to the string value.
1111 */
1112RTDECL(int8_t) RTStrToInt8(const char *pszValue);
1113
1114/**
1115 * Performs a case sensitive string compare between two UTF-8 strings.
1116 *
1117 * Encoding errors are ignored by the current implementation. So, the only
1118 * difference between this and the CRT strcmp function is the handling of
1119 * NULL arguments.
1120 *
1121 * @returns < 0 if the first string less than the second string.
1122 * @returns 0 if the first string identical to the second string.
1123 * @returns > 0 if the first string greater than the second string.
1124 * @param psz1 First UTF-8 string. Null is allowed.
1125 * @param psz2 Second UTF-8 string. Null is allowed.
1126 */
1127RTDECL(int) RTStrCmp(const char *psz1, const char *psz2);
1128
1129/**
1130 * Performs a case insensitive string compare between two UTF-8 strings.
1131 *
1132 * This is a simplified compare, as only the simplified lower/upper case folding
1133 * specified by the unicode specs are used. It does not consider character pairs
1134 * as they are used in some languages, just simple upper & lower case compares.
1135 *
1136 * The result is the difference between the mismatching codepoints after they
1137 * both have been lower cased.
1138 *
1139 * If the string encoding is invalid the function will assert (strict builds)
1140 * and use RTStrCmp for the remainder of the string.
1141 *
1142 * @returns < 0 if the first string less than the second string.
1143 * @returns 0 if the first string identical to the second string.
1144 * @returns > 0 if the first string greater than the second string.
1145 * @param psz1 First UTF-8 string. Null is allowed.
1146 * @param psz2 Second UTF-8 string. Null is allowed.
1147 */
1148RTDECL(int) RTStrICmp(const char *psz1, const char *psz2);
1149
1150/**
1151 * Find the length of a zero-terminated byte string, given
1152 * a max string length.
1153 *
1154 * See also RTStrNLenEx.
1155 *
1156 * @returns The string length or cbMax. The returned length does not include
1157 * the zero terminator if it was found.
1158 *
1159 * @param pszString The string.
1160 * @param cchMax The max string length.
1161 */
1162RTDECL(size_t) RTStrNLen(const char *pszString, size_t cchMax);
1163
1164/**
1165 * Find the length of a zero-terminated byte string, given
1166 * a max string length.
1167 *
1168 * See also RTStrNLen.
1169 *
1170 * @returns IPRT status code.
1171 * @retval VINF_SUCCESS if the string has a length less than cchMax.
1172 * @retval VERR_BUFFER_OVERFLOW if the end of the string wasn't found
1173 * before cchMax was reached.
1174 *
1175 * @param pszString The string.
1176 * @param cchMax The max string length.
1177 * @param pcch Where to store the string length excluding the
1178 * terminator. This is set to cchMax if the terminator
1179 * isn't found.
1180 */
1181RTDECL(int) RTStrNLenEx(const char *pszString, size_t cchMax, size_t *pcch);
1182
1183/** @} */
1184
1185
1186/** @defgroup rt_str_space Unique String Space
1187 * @ingroup grp_rt_str
1188 * @{
1189 */
1190
1191/** Pointer to a string name space container node core. */
1192typedef struct RTSTRSPACECORE *PRTSTRSPACECORE;
1193/** Pointer to a pointer to a string name space container node core. */
1194typedef PRTSTRSPACECORE *PPRTSTRSPACECORE;
1195
1196/**
1197 * String name space container node core.
1198 */
1199typedef struct RTSTRSPACECORE
1200{
1201 /** Hash key. Don't touch. */
1202 uint32_t Key;
1203 /** Pointer to the left leaf node. Don't touch. */
1204 PRTSTRSPACECORE pLeft;
1205 /** Pointer to the left rigth node. Don't touch. */
1206 PRTSTRSPACECORE pRight;
1207 /** Pointer to the list of string with the same key. Don't touch. */
1208 PRTSTRSPACECORE pList;
1209 /** Height of this tree: max(heigth(left), heigth(right)) + 1. Don't touch */
1210 unsigned char uchHeight;
1211 /** The string length. Read only! */
1212 size_t cchString;
1213 /** Pointer to the string. Read only! */
1214 const char * pszString;
1215} RTSTRSPACECORE;
1216
1217/** String space. (Initialize with NULL.) */
1218typedef PRTSTRSPACECORE RTSTRSPACE;
1219/** Pointer to a string space. */
1220typedef PPRTSTRSPACECORE PRTSTRSPACE;
1221
1222
1223/**
1224 * Inserts a string into a unique string space.
1225 *
1226 * @returns true on success.
1227 * @returns false if the string collieded with an existing string.
1228 * @param pStrSpace The space to insert it into.
1229 * @param pStr The string node.
1230 */
1231RTDECL(bool) RTStrSpaceInsert(PRTSTRSPACE pStrSpace, PRTSTRSPACECORE pStr);
1232
1233/**
1234 * Removes a string from a unique string space.
1235 *
1236 * @returns Pointer to the removed string node.
1237 * @returns NULL if the string was not found in the string space.
1238 * @param pStrSpace The space to insert it into.
1239 * @param pszString The string to remove.
1240 */
1241RTDECL(PRTSTRSPACECORE) RTStrSpaceRemove(PRTSTRSPACE pStrSpace, const char *pszString);
1242
1243/**
1244 * Gets a string from a unique string space.
1245 *
1246 * @returns Pointer to the string node.
1247 * @returns NULL if the string was not found in the string space.
1248 * @param pStrSpace The space to insert it into.
1249 * @param pszString The string to get.
1250 */
1251RTDECL(PRTSTRSPACECORE) RTStrSpaceGet(PRTSTRSPACE pStrSpace, const char *pszString);
1252
1253/**
1254 * Callback function for RTStrSpaceEnumerate() and RTStrSpaceDestroy().
1255 *
1256 * @returns 0 on continue.
1257 * @returns Non-zero to aborts the operation.
1258 * @param pStr The string node
1259 * @param pvUser The user specified argument.
1260 */
1261typedef DECLCALLBACK(int) FNRTSTRSPACECALLBACK(PRTSTRSPACECORE pStr, void *pvUser);
1262/** Pointer to callback function for RTStrSpaceEnumerate() and RTStrSpaceDestroy(). */
1263typedef FNRTSTRSPACECALLBACK *PFNRTSTRSPACECALLBACK;
1264
1265/**
1266 * Destroys the string space.
1267 * The caller supplies a callback which will be called for each of
1268 * the string nodes in for freeing their memory and other resources.
1269 *
1270 * @returns 0 or what ever non-zero return value pfnCallback returned
1271 * when aborting the destruction.
1272 * @param pStrSpace The space to insert it into.
1273 * @param pfnCallback The callback.
1274 * @param pvUser The user argument.
1275 */
1276RTDECL(int) RTStrSpaceDestroy(PRTSTRSPACE pStrSpace, PFNRTSTRSPACECALLBACK pfnCallback, void *pvUser);
1277
1278/**
1279 * Enumerates the string space.
1280 * The caller supplies a callback which will be called for each of
1281 * the string nodes.
1282 *
1283 * @returns 0 or what ever non-zero return value pfnCallback returned
1284 * when aborting the destruction.
1285 * @param pStrSpace The space to insert it into.
1286 * @param pfnCallback The callback.
1287 * @param pvUser The user argument.
1288 */
1289RTDECL(int) RTStrSpaceEnumerate(PRTSTRSPACE pStrSpace, PFNRTSTRSPACECALLBACK pfnCallback, void *pvUser);
1290
1291/** @} */
1292
1293
1294/** @defgroup rt_str_utf16 UTF-16 String Manipulation
1295 * @ingroup grp_rt_str
1296 * @{
1297 */
1298
1299/**
1300 * Free a UTF-16 string allocated by RTStrUtf8ToUtf16(), RTStrUtf8ToUtf16Ex(),
1301 * RTUtf16Dup() or RTUtf16DupEx().
1302 *
1303 * @returns iprt status code.
1304 * @param pwszString The UTF-16 string to free. NULL is accepted.
1305 */
1306RTDECL(void) RTUtf16Free(PRTUTF16 pwszString);
1307
1308/**
1309 * Allocates a new copy of the specified UTF-16 string.
1310 *
1311 * @returns Pointer to the allocated string copy. Use RTUtf16Free() to free it.
1312 * @returns NULL when out of memory.
1313 * @param pwszString UTF-16 string to duplicate.
1314 * @remark This function will not make any attempt to validate the encoding.
1315 */
1316RTDECL(PRTUTF16) RTUtf16Dup(PCRTUTF16 pwszString);
1317
1318/**
1319 * Allocates a new copy of the specified UTF-16 string.
1320 *
1321 * @returns iprt status code.
1322 * @param ppwszString Receives pointer of the allocated UTF-16 string.
1323 * The returned pointer must be freed using RTUtf16Free().
1324 * @param pwszString UTF-16 string to duplicate.
1325 * @param cwcExtra Number of extra RTUTF16 items to allocate.
1326 * @remark This function will not make any attempt to validate the encoding.
1327 */
1328RTDECL(int) RTUtf16DupEx(PRTUTF16 *ppwszString, PCRTUTF16 pwszString, size_t cwcExtra);
1329
1330/**
1331 * Returns the length of a UTF-16 string in UTF-16 characters
1332 * without trailing '\\0'.
1333 *
1334 * Surrogate pairs counts as two UTF-16 characters here. Use RTUtf16CpCnt()
1335 * to get the exact number of code points in the string.
1336 *
1337 * @returns The number of RTUTF16 items in the string.
1338 * @param pwszString Pointer the UTF-16 string.
1339 * @remark This function will not make any attempt to validate the encoding.
1340 */
1341RTDECL(size_t) RTUtf16Len(PCRTUTF16 pwszString);
1342
1343/**
1344 * Performs a case sensitive string compare between two UTF-16 strings.
1345 *
1346 * @returns < 0 if the first string less than the second string.s
1347 * @returns 0 if the first string identical to the second string.
1348 * @returns > 0 if the first string greater than the second string.
1349 * @param pwsz1 First UTF-16 string. Null is allowed.
1350 * @param pwsz2 Second UTF-16 string. Null is allowed.
1351 * @remark This function will not make any attempt to validate the encoding.
1352 */
1353RTDECL(int) RTUtf16Cmp(register PCRTUTF16 pwsz1, register PCRTUTF16 pwsz2);
1354
1355/**
1356 * Performs a case insensitive string compare between two UTF-16 strings.
1357 *
1358 * This is a simplified compare, as only the simplified lower/upper case folding
1359 * specified by the unicode specs are used. It does not consider character pairs
1360 * as they are used in some languages, just simple upper & lower case compares.
1361 *
1362 * @returns < 0 if the first string less than the second string.
1363 * @returns 0 if the first string identical to the second string.
1364 * @returns > 0 if the first string greater than the second string.
1365 * @param pwsz1 First UTF-16 string. Null is allowed.
1366 * @param pwsz2 Second UTF-16 string. Null is allowed.
1367 */
1368RTDECL(int) RTUtf16ICmp(PCRTUTF16 pwsz1, PCRTUTF16 pwsz2);
1369
1370/**
1371 * Performs a case insensitive string compare between two UTF-16 strings
1372 * using the current locale of the process (if applicable).
1373 *
1374 * This differs from RTUtf16ICmp() in that it will try, if a locale with the
1375 * required data is available, to do a correct case-insensitive compare. It
1376 * follows that it is more complex and thereby likely to be more expensive.
1377 *
1378 * @returns < 0 if the first string less than the second string.
1379 * @returns 0 if the first string identical to the second string.
1380 * @returns > 0 if the first string greater than the second string.
1381 * @param pwsz1 First UTF-16 string. Null is allowed.
1382 * @param pwsz2 Second UTF-16 string. Null is allowed.
1383 */
1384RTDECL(int) RTUtf16LocaleICmp(PCRTUTF16 pwsz1, PCRTUTF16 pwsz2);
1385
1386/**
1387 * Folds a UTF-16 string to lowercase.
1388 *
1389 * This is a very simple folding; is uses the simple lowercase
1390 * code point, it is not related to any locale just the most common
1391 * lowercase codepoint setup by the unicode specs, and it will not
1392 * create new surrogate pairs or remove existing ones.
1393 *
1394 * @returns Pointer to the passed in string.
1395 * @param pwsz The string to fold.
1396 */
1397RTDECL(PRTUTF16) RTUtf16ToLower(PRTUTF16 pwsz);
1398
1399/**
1400 * Folds a UTF-16 string to uppercase.
1401 *
1402 * This is a very simple folding; is uses the simple uppercase
1403 * code point, it is not related to any locale just the most common
1404 * uppercase codepoint setup by the unicode specs, and it will not
1405 * create new surrogate pairs or remove existing ones.
1406 *
1407 * @returns Pointer to the passed in string.
1408 * @param pwsz The string to fold.
1409 */
1410RTDECL(PRTUTF16) RTUtf16ToUpper(PRTUTF16 pwsz);
1411
1412/**
1413 * Translate a UTF-16 string into a UTF-8 allocating the result buffer.
1414 *
1415 * @returns iprt status code.
1416 * @param pwszString UTF-16 string to convert.
1417 * @param ppszString Receives pointer of allocated UTF-8 string.
1418 * The returned pointer must be freed using RTStrFree().
1419 */
1420RTDECL(int) RTUtf16ToUtf8(PCRTUTF16 pwszString, char **ppszString);
1421
1422/**
1423 * Translates UTF-16 to UTF-8 using buffer provided by the caller or
1424 * a fittingly sized buffer allocated by the function.
1425 *
1426 * @returns iprt status code.
1427 * @param pwszString The UTF-16 string to convert.
1428 * @param cwcString The number of RTUTF16 items to translation from pwszString.
1429 * The translate will stop when reaching cwcString or the terminator ('\\0').
1430 * Use RTSTR_MAX to translate the entire string.
1431 * @param ppsz If cch is non-zero, this must either be pointing to pointer to
1432 * a buffer of the specified size, or pointer to a NULL pointer.
1433 * If *ppsz is NULL or cch is zero a buffer of at least cch chars
1434 * will be allocated to hold the translated string.
1435 * If a buffer was requirest it must be freed using RTUtf16Free().
1436 * @param cch The buffer size in chars (the type). This includes the terminator.
1437 * @param pcch Where to store the length of the translated string. (Optional)
1438 * This field will be updated even on failure, however the value is only
1439 * specified for the following two error codes. On VERR_BUFFER_OVERFLOW
1440 * and VERR_NO_STR_MEMORY it contains the required buffer space.
1441 */
1442RTDECL(int) RTUtf16ToUtf8Ex(PCRTUTF16 pwszString, size_t cwcString, char **ppsz, size_t cch, size_t *pcch);
1443
1444/**
1445 * Calculates the length of the UTF-16 string in UTF-8 chars (bytes).
1446 *
1447 * This function will validate the string, and incorrectly encoded UTF-16
1448 * strings will be rejected. The primary purpose of this function is to
1449 * help allocate buffers for RTUtf16ToUtf8() of the correct size. For most
1450 * other puroses RTUtf16ToUtf8Ex() should be used.
1451 *
1452 * @returns Number of char (bytes).
1453 * @returns 0 if the string was incorrectly encoded.
1454 * @param pwsz The UTF-16 string.
1455 */
1456RTDECL(size_t) RTUtf16CalcUtf8Len(PCRTUTF16 pwsz);
1457
1458/**
1459 * Calculates the length of the UTF-16 string in UTF-8 chars (bytes).
1460 *
1461 * This function will validate the string, and incorrectly encoded UTF-16
1462 * strings will be rejected.
1463 *
1464 * @returns iprt status code.
1465 * @param pwsz The string.
1466 * @param cwc The max string length. Use RTSTR_MAX to process the entire string.
1467 * @param pcch Where to store the string length (in bytes). Optional.
1468 * This is undefined on failure.
1469 */
1470RTDECL(int) RTUtf16CalcUtf8LenEx(PCRTUTF16 pwsz, size_t cwc, size_t *pcch);
1471
1472/**
1473 * Get the unicode code point at the given string position.
1474 *
1475 * @returns unicode code point.
1476 * @returns RTUNICP_INVALID if the encoding is invalid.
1477 * @param pwsz The string.
1478 *
1479 * @remark This is an internal worker for RTUtf16GetCp().
1480 */
1481RTDECL(RTUNICP) RTUtf16GetCpInternal(PCRTUTF16 pwsz);
1482
1483/**
1484 * Get the unicode code point at the given string position.
1485 *
1486 * @returns iprt status code.
1487 * @param ppwsz Pointer to the string pointer. This will be updated to
1488 * point to the char following the current code point.
1489 * @param pCp Where to store the code point.
1490 * RTUNICP_INVALID is stored here on failure.
1491 *
1492 * @remark This is an internal worker for RTUtf16GetCpEx().
1493 */
1494RTDECL(int) RTUtf16GetCpExInternal(PCRTUTF16 *ppwsz, PRTUNICP pCp);
1495
1496/**
1497 * Put the unicode code point at the given string position
1498 * and return the pointer to the char following it.
1499 *
1500 * This function will not consider anything at or following the the
1501 * buffer area pointed to by pwsz. It is therefore not suitable for
1502 * inserting code points into a string, only appending/overwriting.
1503 *
1504 * @returns pointer to the char following the written code point.
1505 * @param pwsz The string.
1506 * @param CodePoint The code point to write.
1507 * This sould not be RTUNICP_INVALID or any other charater
1508 * out of the UTF-16 range.
1509 *
1510 * @remark This is an internal worker for RTUtf16GetCpEx().
1511 */
1512RTDECL(PRTUTF16) RTUtf16PutCpInternal(PRTUTF16 pwsz, RTUNICP CodePoint);
1513
1514/**
1515 * Get the unicode code point at the given string position.
1516 *
1517 * @returns unicode code point.
1518 * @returns RTUNICP_INVALID if the encoding is invalid.
1519 * @param pwsz The string.
1520 *
1521 * @remark We optimize this operation by using an inline function for
1522 * everything which isn't a surrogate pair or an endian indicator.
1523 */
1524DECLINLINE(RTUNICP) RTUtf16GetCp(PCRTUTF16 pwsz)
1525{
1526 const RTUTF16 wc = *pwsz;
1527 if (wc < 0xd800 || (wc > 0xdfff && wc < 0xfffe))
1528 return wc;
1529 return RTUtf16GetCpInternal(pwsz);
1530}
1531
1532/**
1533 * Get the unicode code point at the given string position.
1534 *
1535 * @returns iprt status code.
1536 * @param ppwsz Pointer to the string pointer. This will be updated to
1537 * point to the char following the current code point.
1538 * @param pCp Where to store the code point.
1539 * RTUNICP_INVALID is stored here on failure.
1540 *
1541 * @remark We optimize this operation by using an inline function for
1542 * everything which isn't a surrogate pair or and endian indicator.
1543 */
1544DECLINLINE(int) RTUtf16GetCpEx(PCRTUTF16 *ppwsz, PRTUNICP pCp)
1545{
1546 const RTUTF16 wc = **ppwsz;
1547 if (wc < 0xd800 || (wc > 0xdfff && wc < 0xfffe))
1548 {
1549 (*ppwsz)++;
1550 *pCp = wc;
1551 return VINF_SUCCESS;
1552 }
1553 return RTUtf16GetCpExInternal(ppwsz, pCp);
1554}
1555
1556/**
1557 * Put the unicode code point at the given string position
1558 * and return the pointer to the char following it.
1559 *
1560 * This function will not consider anything at or following the the
1561 * buffer area pointed to by pwsz. It is therefore not suitable for
1562 * inserting code points into a string, only appending/overwriting.
1563 *
1564 * @returns pointer to the char following the written code point.
1565 * @param pwsz The string.
1566 * @param CodePoint The code point to write.
1567 * This sould not be RTUNICP_INVALID or any other charater
1568 * out of the UTF-16 range.
1569 *
1570 * @remark We optimize this operation by using an inline function for
1571 * everything which isn't a surrogate pair or and endian indicator.
1572 */
1573DECLINLINE(PRTUTF16) RTUtf16PutCp(PRTUTF16 pwsz, RTUNICP CodePoint)
1574{
1575 if (CodePoint < 0xd800 || (CodePoint > 0xd800 && CodePoint < 0xfffe))
1576 {
1577 *pwsz++ = (RTUTF16)CodePoint;
1578 return pwsz;
1579 }
1580 return RTUtf16PutCpInternal(pwsz, CodePoint);
1581}
1582
1583/**
1584 * Skips ahead, past the current code point.
1585 *
1586 * @returns Pointer to the char after the current code point.
1587 * @param pwsz Pointer to the current code point.
1588 * @remark This will not move the next valid code point, only past the current one.
1589 */
1590DECLINLINE(PRTUTF16) RTUtf16NextCp(PCRTUTF16 pwsz)
1591{
1592 RTUNICP Cp;
1593 RTUtf16GetCpEx(&pwsz, &Cp);
1594 return (PRTUTF16)pwsz;
1595}
1596
1597/**
1598 * Skips backwards, to the previous code point.
1599 *
1600 * @returns Pointer to the char after the current code point.
1601 * @param pwszStart Pointer to the start of the string.
1602 * @param pwsz Pointer to the current code point.
1603 */
1604RTDECL(PRTUTF16) RTUtf16PrevCp(PCRTUTF16 pwszStart, PCRTUTF16 pwsz);
1605
1606
1607/**
1608 * Checks if the UTF-16 char is the high surrogate char (i.e.
1609 * the 1st char in the pair).
1610 *
1611 * @returns true if it is.
1612 * @returns false if it isn't.
1613 * @param wc The character to investigate.
1614 */
1615DECLINLINE(bool) RTUtf16IsHighSurrogate(RTUTF16 wc)
1616{
1617 return wc >= 0xd800 && wc <= 0xdbff;
1618}
1619
1620/**
1621 * Checks if the UTF-16 char is the low surrogate char (i.e.
1622 * the 2nd char in the pair).
1623 *
1624 * @returns true if it is.
1625 * @returns false if it isn't.
1626 * @param wc The character to investigate.
1627 */
1628DECLINLINE(bool) RTUtf16IsLowSurrogate(RTUTF16 wc)
1629{
1630 return wc >= 0xdc00 && wc <= 0xdfff;
1631}
1632
1633
1634/**
1635 * Checks if the two UTF-16 chars form a valid surrogate pair.
1636 *
1637 * @returns true if they do.
1638 * @returns false if they doesn't.
1639 * @param wcHigh The high (1st) character.
1640 * @param wcLow The low (2nd) character.
1641 */
1642DECLINLINE(bool) RTUtf16IsSurrogatePair(RTUTF16 wcHigh, RTUTF16 wcLow)
1643{
1644 return RTUtf16IsHighSurrogate(wcHigh)
1645 && RTUtf16IsLowSurrogate(wcLow);
1646}
1647
1648/** @} */
1649
1650__END_DECLS
1651
1652/** @} */
1653
1654#endif
1655
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette