Changeset 57941 in vbox
- Timestamp:
- Sep 29, 2015 1:45:26 PM (9 years ago)
- Location:
- trunk/include/iprt
- Files:
-
- 1 edited
- 2 copied
Legend:
- Unmodified
- Added
- Removed
-
trunk/include/iprt/latin1.h
r57927 r57941 1 1 /** @file 2 * IPRT - String Manipulation .2 * IPRT - String Manipulation, Latin-1 (ISO-8859-1) encoding. 3 3 */ 4 4 … … 24 24 */ 25 25 26 #ifndef ___iprt_string_h 27 #define ___iprt_string_h 28 29 #include <iprt/cdefs.h> 30 #include <iprt/types.h> 31 #include <iprt/assert.h> 32 #include <iprt/stdarg.h> 33 #include <iprt/err.h> /* for VINF_SUCCESS */ 34 #if defined(RT_OS_LINUX) && defined(__KERNEL__) 35 RT_C_DECLS_BEGIN 36 # define new newhack /* string.h: strreplace */ 37 # include <linux/string.h> 38 # undef new 39 RT_C_DECLS_END 40 41 #elif defined(IN_XF86_MODULE) && !defined(NO_ANSIC) 42 RT_C_DECLS_BEGIN 43 # include "xf86_ansic.h" 44 RT_C_DECLS_END 45 46 #elif defined(RT_OS_FREEBSD) && defined(_KERNEL) 47 RT_C_DECLS_BEGIN 48 /** @todo 49 * XXX: Very ugly hack to get things build on recent FreeBSD builds. They have 50 * memchr now and we need to include param.h to get __FreeBSD_version and make 51 * memchr available based on the version below or we can't compile the kernel 52 * module on older versions anymore. 53 * 54 * But including param.h here opens Pandora's box because we clash with a few 55 * defines namely PVM and PAGE_SIZE. We can safely undefine PVM here but not 56 * PAGE_SIZE because this results in build errors sooner or later. Luckily this 57 * define is in a header included by param.h (machine/param.h). We define the 58 * guards here to prevent inclusion of it if PAGE_SIZE was defined already. 59 * 60 * @todo aeichner: Search for an elegant solution and cleanup this mess ASAP! 61 */ 62 # ifdef PAGE_SIZE 63 # define _AMD64_INCLUDE_PARAM_H_ 64 # define _I386_INCLUDE_PARAM_H_ 65 # define _MACHINE_PARAM_H_ 66 # endif 67 # include <sys/param.h> /* __FreeBSD_version */ 68 # undef PVM 69 # include <sys/libkern.h> 70 /* 71 * No memmove on versions < 7.2 72 * Defining a macro using bcopy here 73 */ 74 # define memmove(dst, src, size) bcopy(src, dst, size) 75 RT_C_DECLS_END 76 77 #elif defined(RT_OS_SOLARIS) && defined(_KERNEL) 78 /* 79 * Same case as with FreeBSD kernel: 80 * The string.h stuff clashes with sys/system.h 81 * ffs = find first set bit. 82 */ 83 # define ffs ffs_string_h 84 # include <string.h> 85 # undef ffs 86 # undef strpbrk 87 88 #else 89 # include <string.h> 90 #endif 91 92 /* 93 * Supply prototypes for standard string functions provided by 94 * IPRT instead of the operating environment. 95 */ 96 #if defined(RT_OS_DARWIN) && defined(KERNEL) 26 #ifndef ___iprt_latin1_h 27 #define ___iprt_latin1_h 28 29 #include <iprt/string.h> 30 97 31 RT_C_DECLS_BEGIN 98 void *memchr(const void *pv, int ch, size_t cb); 99 char *strpbrk(const char *pszStr, const char *pszChars); 100 RT_C_DECLS_END 101 #endif 102 103 #if defined(RT_OS_FREEBSD) && defined(_KERNEL) 104 RT_C_DECLS_BEGIN 105 #if __FreeBSD_version < 900000 106 void *memchr(const void *pv, int ch, size_t cb); 107 #endif 108 char *strpbrk(const char *pszStr, const char *pszChars); 109 RT_C_DECLS_END 110 #endif 111 112 #if !defined(RT_OS_LINUX) || !defined(_GNU_SOURCE) 113 RT_C_DECLS_BEGIN 114 void *memrchr(const char *pv, int ch, size_t cb); 115 RT_C_DECLS_END 116 #endif 117 118 119 /** @def RT_USE_RTC_3629 120 * When defined the UTF-8 range will stop at 0x10ffff. If not defined, the 121 * range stops at 0x7fffffff. 122 * @remarks Must be defined both when building and using the IPRT. */ 123 #ifdef DOXYGEN_RUNNING 124 # define RT_USE_RTC_3629 125 #endif 126 127 128 /** 129 * Byte zero the specified object. 130 * 131 * This will use sizeof(Obj) to figure the size and will call memset, bzero 132 * or some compiler intrinsic to perform the actual zeroing. 133 * 134 * @param Obj The object to zero. Make sure to dereference pointers. 135 * 136 * @remarks Because the macro may use memset it has been placed in string.h 137 * instead of cdefs.h to avoid build issues because someone forgot 138 * to include this header. 139 * 140 * @ingroup grp_rt_cdefs 141 */ 142 #define RT_ZERO(Obj) RT_BZERO(&(Obj), sizeof(Obj)) 143 144 /** 145 * Byte zero the specified memory area. 146 * 147 * This will call memset, bzero or some compiler intrinsic to clear the 148 * specified bytes of memory. 149 * 150 * @param pv Pointer to the memory. 151 * @param cb The number of bytes to clear. Please, don't pass 0. 152 * 153 * @remarks Because the macro may use memset it has been placed in string.h 154 * instead of cdefs.h to avoid build issues because someone forgot 155 * to include this header. 156 * 157 * @ingroup grp_rt_cdefs 158 */ 159 #define RT_BZERO(pv, cb) do { memset((pv), 0, cb); } while (0) 160 161 162 163 /** @defgroup grp_rt_str RTStr - String Manipulation 164 * Mostly UTF-8 related helpers where the standard string functions won't do. 165 * @ingroup grp_rt 32 33 34 /** @defgroup rt_str_latin1 Latin-1 (ISO-8859-1) String Manipulation 35 * @ingroup grp_rt_str 36 * 37 * Deals with Latin-1 encoded strings. 38 * 39 * @warning Make sure to name all variables dealing with Latin-1 strings 40 * suchthat there is no way to mistake them for normal UTF-8 strings. 41 * There may be severe security issues resulting from mistaking Latin-1 42 * for UTF-8! 43 * 166 44 * @{ 167 45 */ 168 46 169 RT_C_DECLS_BEGIN 170 171 172 /** 173 * The maximum string length. 174 */ 175 #define RTSTR_MAX (~(size_t)0) 176 177 178 /** @def RTSTR_TAG 179 * The default allocation tag used by the RTStr allocation APIs. 180 * 181 * When not defined before the inclusion of iprt/string.h, this will default to 182 * the pointer to the current file name. The string API will make of use of 183 * this as pointer to a volatile but read-only string. 184 */ 185 #if !defined(RTSTR_TAG) || defined(DOXYGEN_RUNNING) 186 # define RTSTR_TAG (__FILE__) 187 #endif 188 189 190 #ifdef IN_RING3 191 192 /** 193 * Allocates tmp buffer with default tag, translates pszString from UTF8 to 194 * current codepage. 195 * 196 * @returns iprt status code. 197 * @param ppszString Receives pointer of allocated native CP string. 47 /** 48 * Get the unicode code point at the given string position. 49 * 50 * @returns unicode code point. 51 * @returns RTUNICP_INVALID if the encoding is invalid. 52 * @param pszLatin1 The Latin-1 string. 53 */ 54 DECLINLINE(RTUNICP) RTLatin1GetCp(const char *pszLatin1) 55 { 56 return *(const unsigned char *)pszLatin1; 57 } 58 59 /** 60 * Get the unicode code point at the given string position. 61 * 62 * @returns iprt status code. 63 * @param ppszLatin1 Pointer to the string pointer. This will be updated to 64 * point to the char following the current code point. This 65 * is advanced one character forward on failure. 66 * @param pCp Where to store the code point. RTUNICP_INVALID is stored 67 * here on failure. 68 */ 69 DECLINLINE(int) RTLatin1GetCpEx(const char **ppszLatin1, PRTUNICP pCp) 70 { 71 const unsigned char uch = **(const unsigned char **)ppszLatin1; 72 (*ppszLatin1)++; 73 *pCp = uch; 74 return VINF_SUCCESS; 75 } 76 77 /** 78 * Get the unicode code point at the given string position for a string of a 79 * given maximum length. 80 * 81 * @returns iprt status code. 82 * @retval VERR_END_OF_STRING if *pcch is 0. *pCp is set to RTUNICP_INVALID. 83 * 84 * @param ppszLatin1 Pointer to the string pointer. This will be updated to 85 * point to the char following the current code point. 86 * @param pcchLatin1 Pointer to the maximum string length. This will be 87 * decremented by the size of the code point found. 88 * @param pCp Where to store the code point. 89 * RTUNICP_INVALID is stored here on failure. 90 */ 91 DECLINLINE(int) RTLatin1GetCpNEx(const char **ppszLatin1, size_t *pcchLatin1, PRTUNICP pCp) 92 { 93 if (RT_LIKELY(*pcchLatin1 != 0)) 94 { 95 const unsigned char uch = **(const unsigned char **)ppszLatin1; 96 (*ppszLatin1)++; 97 (*pcchLatin1)--; 98 *pCp = uch; 99 return VINF_SUCCESS; 100 } 101 *pCp = RTUNICP_INVALID; 102 return VERR_END_OF_STRING; 103 } 104 105 /** 106 * Get the Latin-1 size in characters of a given Unicode code point. 107 * 108 * The code point is expected to be a valid Unicode one, but not necessarily in 109 * the range supported by Latin-1. 110 * 111 * @returns the size in characters, or zero if there is no Latin-1 encoding 112 */ 113 DECLINLINE(size_t) RTLatin1CpSize(RTUNICP CodePoint) 114 { 115 if (CodePoint < 0x100) 116 return 1; 117 return 0; 118 } 119 120 /** 121 * Put the unicode code point at the given string position 122 * and return the pointer to the char following it. 123 * 124 * This function will not consider anything at or following the 125 * buffer area pointed to by psz. It is therefore not suitable for 126 * inserting code points into a string, only appending/overwriting. 127 * 128 * @returns pointer to the char following the written code point. 129 * @param pszLatin1 The string. 130 * @param CodePoint The code point to write. 131 * This should not be RTUNICP_INVALID or any other 132 * character out of the Latin-1 range. 133 */ 134 DECLINLINE(char *) RTLatin1PutCp(char *pszLatin1, RTUNICP CodePoint) 135 { 136 AssertReturn(CodePoint < 0x100, NULL); 137 *pszLatin1++ = (unsigned char)CodePoint; 138 return pszLatin1; 139 } 140 141 /** 142 * Skips ahead, past the current code point. 143 * 144 * @returns Pointer to the char after the current code point. 145 * @param pszLatin1 Pointer to the current code point. 146 * @remark This will not move the next valid code point, only past the current one. 147 */ 148 DECLINLINE(char *) RTLatin1NextCp(const char *pszLatin1) 149 { 150 pszLatin1++; 151 return (char *)pszLatin1; 152 } 153 154 /** 155 * Skips back to the previous code point. 156 * 157 * @returns Pointer to the char before the current code point. 158 * @returns pszLatin1Start on failure. 159 * @param pszLatin1Start Pointer to the start of the string. 160 * @param pszLatin1 Pointer to the current code point. 161 */ 162 DECLINLINE(char *) RTLatin1PrevCp(const char *pszLatin1Start, const char *pszLatin1) 163 { 164 if ((uintptr_t)pszLatin1 > (uintptr_t)pszLatin1Start) 165 { 166 pszLatin1--; 167 return (char *)pszLatin1; 168 } 169 return (char *)pszLatin1Start; 170 } 171 172 /** 173 * Translate a Latin1 string into a UTF-8 allocating the result buffer (default 174 * tag). 175 * 176 * @returns iprt status code. 177 * @param pszLatin1 Latin1 string to convert. 178 * @param ppszString Receives pointer of allocated UTF-8 string on 179 * success, and is always set to NULL on failure. 198 180 * The returned pointer must be freed using RTStrFree(). 199 * @param pszString UTF-8 string to convert. 200 */ 201 #define RTStrUtf8ToCurrentCP(ppszString, pszString) RTStrUtf8ToCurrentCPTag((ppszString), (pszString), RTSTR_TAG) 202 203 /** 204 * Allocates tmp buffer with custom tag, translates pszString from UTF8 to 205 * current codepage. 206 * 207 * @returns iprt status code. 208 * @param ppszString Receives pointer of allocated native CP string. 209 * The returned pointer must be freed using 210 * RTStrFree()., const char *pszTag 211 * @param pszString UTF-8 string to convert. 181 */ 182 #define RTLatin1ToUtf8(pszLatin1, ppszString) RTLatin1ToUtf8Tag((pszLatin1), (ppszString), RTSTR_TAG) 183 184 /** 185 * Translate a Latin-1 string into a UTF-8 allocating the result buffer. 186 * 187 * @returns iprt status code. 188 * @param pszLatin1 Latin-1 string to convert. 189 * @param ppszString Receives pointer of allocated UTF-8 string on 190 * success, and is always set to NULL on failure. 191 * The returned pointer must be freed using RTStrFree(). 212 192 * @param pszTag Allocation tag used for statistics and such. 213 193 */ 214 RTR3DECL(int) RTStrUtf8ToCurrentCPTag(char **ppszString, const char *pszString, const char *pszTag); 215 216 /** 217 * Allocates tmp buffer, translates pszString from current codepage to UTF-8. 218 * 219 * @returns iprt status code. 220 * @param ppszString Receives pointer of allocated UTF-8 string. 221 * The returned pointer must be freed using RTStrFree(). 222 * @param pszString Native string to convert. 223 */ 224 #define RTStrCurrentCPToUtf8(ppszString, pszString) RTStrCurrentCPToUtf8Tag((ppszString), (pszString), RTSTR_TAG) 225 226 /** 227 * Allocates tmp buffer, translates pszString from current codepage to UTF-8. 228 * 229 * @returns iprt status code. 230 * @param ppszString Receives pointer of allocated UTF-8 string. 231 * The returned pointer must be freed using RTStrFree(). 232 * @param pszString Native string to convert. 233 * @param pszTag Allocation tag used for statistics and such. 234 */ 235 RTR3DECL(int) RTStrCurrentCPToUtf8Tag(char **ppszString, const char *pszString, const char *pszTag); 236 237 #endif /* IN_RING3 */ 238 239 /** 240 * Free string allocated by any of the non-UCS-2 string functions. 241 * 242 * @returns iprt status code. 243 * @param pszString Pointer to buffer with string to free. 244 * NULL is accepted. 245 */ 246 RTDECL(void) RTStrFree(char *pszString); 247 248 /** 249 * Allocates a new copy of the given UTF-8 string (default tag). 250 * 251 * @returns Pointer to the allocated UTF-8 string. 252 * @param pszString UTF-8 string to duplicate. 253 */ 254 #define RTStrDup(pszString) RTStrDupTag((pszString), RTSTR_TAG) 255 256 /** 257 * Allocates a new copy of the given UTF-8 string (custom tag). 258 * 259 * @returns Pointer to the allocated UTF-8 string. 260 * @param pszString UTF-8 string to duplicate. 261 * @param pszTag Allocation tag used for statistics and such. 262 */ 263 RTDECL(char *) RTStrDupTag(const char *pszString, const char *pszTag); 264 265 /** 266 * Allocates a new copy of the given UTF-8 string (default tag). 267 * 268 * @returns iprt status code. 269 * @param ppszString Receives pointer of the allocated UTF-8 string. 270 * The returned pointer must be freed using RTStrFree(). 271 * @param pszString UTF-8 string to duplicate. 272 */ 273 #define RTStrDupEx(ppszString, pszString) RTStrDupExTag((ppszString), (pszString), RTSTR_TAG) 274 275 /** 276 * Allocates a new copy of the given UTF-8 string (custom tag). 277 * 278 * @returns iprt status code. 279 * @param ppszString Receives pointer of the allocated UTF-8 string. 280 * The returned pointer must be freed using RTStrFree(). 281 * @param pszString UTF-8 string to duplicate. 282 * @param pszTag Allocation tag used for statistics and such. 283 */ 284 RTDECL(int) RTStrDupExTag(char **ppszString, const char *pszString, const char *pszTag); 285 286 /** 287 * Allocates a new copy of the given UTF-8 substring (default tag). 288 * 289 * @returns Pointer to the allocated UTF-8 substring. 290 * @param pszString UTF-8 string to duplicate. 291 * @param cchMax The max number of chars to duplicate, not counting 292 * the terminator. 293 */ 294 #define RTStrDupN(pszString, cchMax) RTStrDupNTag((pszString), (cchMax), RTSTR_TAG) 295 296 /** 297 * Allocates a new copy of the given UTF-8 substring (custom tag). 298 * 299 * @returns Pointer to the allocated UTF-8 substring. 300 * @param pszString UTF-8 string to duplicate. 301 * @param cchMax The max number of chars to duplicate, not counting 302 * the terminator. 303 * @param pszTag Allocation tag used for statistics and such. 304 */ 305 RTDECL(char *) RTStrDupNTag(const char *pszString, size_t cchMax, const char *pszTag); 306 307 /** 308 * Appends a string onto an existing IPRT allocated string (default tag). 309 * 310 * @retval VINF_SUCCESS 311 * @retval VERR_NO_STR_MEMORY if we failed to reallocate the string, @a *ppsz 312 * remains unchanged. 313 * 314 * @param ppsz Pointer to the string pointer. The string 315 * pointer must either be NULL or point to a string 316 * returned by an IPRT string API. (In/Out) 317 * @param pszAppend The string to append. NULL and empty strings 318 * are quietly ignored. 319 */ 320 #define RTStrAAppend(ppsz, pszAppend) RTStrAAppendTag((ppsz), (pszAppend), RTSTR_TAG) 321 322 /** 323 * Appends a string onto an existing IPRT allocated string (custom tag). 324 * 325 * @retval VINF_SUCCESS 326 * @retval VERR_NO_STR_MEMORY if we failed to reallocate the string, @a *ppsz 327 * remains unchanged. 328 * 329 * @param ppsz Pointer to the string pointer. The string 330 * pointer must either be NULL or point to a string 331 * returned by an IPRT string API. (In/Out) 332 * @param pszAppend The string to append. NULL and empty strings 333 * are quietly ignored. 334 * @param pszTag Allocation tag used for statistics and such. 335 */ 336 RTDECL(int) RTStrAAppendTag(char **ppsz, const char *pszAppend, const char *pszTag); 337 338 /** 339 * Appends N bytes from a strings onto an existing IPRT allocated string 340 * (default tag). 341 * 342 * @retval VINF_SUCCESS 343 * @retval VERR_NO_STR_MEMORY if we failed to reallocate the string, @a *ppsz 344 * remains unchanged. 345 * 346 * @param ppsz Pointer to the string pointer. The string 347 * pointer must either be NULL or point to a string 348 * returned by an IPRT string API. (In/Out) 349 * @param pszAppend The string to append. Can be NULL if cchAppend 350 * is NULL. 351 * @param cchAppend The number of chars (not code points) to append 352 * from pszAppend. Must not be more than 353 * @a pszAppend contains, except for the special 354 * value RTSTR_MAX that can be used to indicate all 355 * of @a pszAppend without having to strlen it. 356 */ 357 #define RTStrAAppendN(ppsz, pszAppend, cchAppend) RTStrAAppendNTag((ppsz), (pszAppend), (cchAppend), RTSTR_TAG) 358 359 /** 360 * Appends N bytes from a strings onto an existing IPRT allocated string (custom 361 * tag). 362 * 363 * @retval VINF_SUCCESS 364 * @retval VERR_NO_STR_MEMORY if we failed to reallocate the string, @a *ppsz 365 * remains unchanged. 366 * 367 * @param ppsz Pointer to the string pointer. The string 368 * pointer must either be NULL or point to a string 369 * returned by an IPRT string API. (In/Out) 370 * @param pszAppend The string to append. Can be NULL if cchAppend 371 * is NULL. 372 * @param cchAppend The number of chars (not code points) to append 373 * from pszAppend. Must not be more than 374 * @a pszAppend contains, except for the special 375 * value RTSTR_MAX that can be used to indicate all 376 * of @a pszAppend without having to strlen it. 377 * @param pszTag Allocation tag used for statistics and such. 378 */ 379 RTDECL(int) RTStrAAppendNTag(char **ppsz, const char *pszAppend, size_t cchAppend, const char *pszTag); 380 381 /** 382 * Appends one or more strings onto an existing IPRT allocated string. 383 * 384 * This is a very flexible and efficient alternative to using RTStrAPrintf to 385 * combine several strings together. 386 * 387 * @retval VINF_SUCCESS 388 * @retval VERR_NO_STR_MEMORY if we failed to reallocate the string, @a *ppsz 389 * remains unchanged. 390 * 391 * @param ppsz Pointer to the string pointer. The string 392 * pointer must either be NULL or point to a string 393 * returned by an IPRT string API. (In/Out) 394 * @param cPairs The number of string / length pairs in the 395 * @a va. 396 * @param va List of string (const char *) and length 397 * (size_t) pairs. The strings will be appended to 398 * the string in the first argument. 399 */ 400 #define RTStrAAppendExNV(ppsz, cPairs, va) RTStrAAppendExNVTag((ppsz), (cPairs), (va), RTSTR_TAG) 401 402 /** 403 * Appends one or more strings onto an existing IPRT allocated string. 404 * 405 * This is a very flexible and efficient alternative to using RTStrAPrintf to 406 * combine several strings together. 407 * 408 * @retval VINF_SUCCESS 409 * @retval VERR_NO_STR_MEMORY if we failed to reallocate the string, @a *ppsz 410 * remains unchanged. 411 * 412 * @param ppsz Pointer to the string pointer. The string 413 * pointer must either be NULL or point to a string 414 * returned by an IPRT string API. (In/Out) 415 * @param cPairs The number of string / length pairs in the 416 * @a va. 417 * @param va List of string (const char *) and length 418 * (size_t) pairs. The strings will be appended to 419 * the string in the first argument. 420 * @param pszTag Allocation tag used for statistics and such. 421 */ 422 RTDECL(int) RTStrAAppendExNVTag(char **ppsz, size_t cPairs, va_list va, const char *pszTag); 423 424 /** 425 * Appends one or more strings onto an existing IPRT allocated string 426 * (untagged). 427 * 428 * This is a very flexible and efficient alternative to using RTStrAPrintf to 429 * combine several strings together. 430 * 431 * @retval VINF_SUCCESS 432 * @retval VERR_NO_STR_MEMORY if we failed to reallocate the string, @a *ppsz 433 * remains unchanged. 434 * 435 * @param ppsz Pointer to the string pointer. The string 436 * pointer must either be NULL or point to a string 437 * returned by an IPRT string API. (In/Out) 438 * @param cPairs The number of string / length pairs in the 439 * ellipsis. 440 * @param ... List of string (const char *) and length 441 * (size_t) pairs. The strings will be appended to 442 * the string in the first argument. 443 */ 444 DECLINLINE(int) RTStrAAppendExN(char **ppsz, size_t cPairs, ...) 445 { 446 int rc; 447 va_list va; 448 va_start(va, cPairs); 449 rc = RTStrAAppendExNVTag(ppsz, cPairs, va, RTSTR_TAG); 450 va_end(va); 451 return rc; 452 } 453 454 /** 455 * Appends one or more strings onto an existing IPRT allocated string (custom 456 * tag). 457 * 458 * This is a very flexible and efficient alternative to using RTStrAPrintf to 459 * combine several strings together. 460 * 461 * @retval VINF_SUCCESS 462 * @retval VERR_NO_STR_MEMORY if we failed to reallocate the string, @a *ppsz 463 * remains unchanged. 464 * 465 * @param ppsz Pointer to the string pointer. The string 466 * pointer must either be NULL or point to a string 467 * returned by an IPRT string API. (In/Out) 468 * @param pszTag Allocation tag used for statistics and such. 469 * @param cPairs The number of string / length pairs in the 470 * ellipsis. 471 * @param ... List of string (const char *) and length 472 * (size_t) pairs. The strings will be appended to 473 * the string in the first argument. 474 */ 475 DECLINLINE(int) RTStrAAppendExNTag(char **ppsz, const char *pszTag, size_t cPairs, ...) 476 { 477 int rc; 478 va_list va; 479 va_start(va, cPairs); 480 rc = RTStrAAppendExNVTag(ppsz, cPairs, va, pszTag); 481 va_end(va); 482 return rc; 483 } 484 485 /** 486 * Truncates an IPRT allocated string (default tag). 487 * 488 * @retval VINF_SUCCESS. 489 * @retval VERR_OUT_OF_RANGE if cchNew is too long. Nothing is done. 490 * 491 * @param ppsz Pointer to the string pointer. The string 492 * pointer can be NULL if @a cchNew is 0, no change 493 * is made then. If we actually reallocate the 494 * string, the string pointer might be changed by 495 * this call. (In/Out) 496 * @param cchNew The new string length (excluding the 497 * terminator). The string must be at least this 498 * long or we'll return VERR_OUT_OF_RANGE and 499 * assert on you. 500 */ 501 #define RTStrATruncate(ppsz, cchNew) RTStrATruncateTag((ppsz), (cchNew), RTSTR_TAG) 502 503 /** 504 * Truncates an IPRT allocated string. 505 * 506 * @retval VINF_SUCCESS. 507 * @retval VERR_OUT_OF_RANGE if cchNew is too long. Nothing is done. 508 * 509 * @param ppsz Pointer to the string pointer. The string 510 * pointer can be NULL if @a cchNew is 0, no change 511 * is made then. If we actually reallocate the 512 * string, the string pointer might be changed by 513 * this call. (In/Out) 514 * @param cchNew The new string length (excluding the 515 * terminator). The string must be at least this 516 * long or we'll return VERR_OUT_OF_RANGE and 517 * assert on you. 518 * @param pszTag Allocation tag used for statistics and such. 519 */ 520 RTDECL(int) RTStrATruncateTag(char **ppsz, size_t cchNew, const char *pszTag); 521 522 /** 523 * Allocates memory for string storage (default tag). 524 * 525 * You should normally not use this function, except if there is some very 526 * custom string handling you need doing that isn't covered by any of the other 527 * APIs. 528 * 529 * @returns Pointer to the allocated string. The first byte is always set 530 * to the string terminator char, the contents of the remainder of the 531 * memory is undefined. The string must be freed by calling RTStrFree. 532 * 533 * NULL is returned if the allocation failed. Please translate this to 534 * VERR_NO_STR_MEMORY and not VERR_NO_MEMORY. Also consider 535 * RTStrAllocEx if an IPRT status code is required. 536 * 537 * @param cb How many bytes to allocate. If this is zero, we 538 * will allocate a terminator byte anyway. 539 */ 540 #define RTStrAlloc(cb) RTStrAllocTag((cb), RTSTR_TAG) 541 542 /** 543 * Allocates memory for string storage (custom tag). 544 * 545 * You should normally not use this function, except if there is some very 546 * custom string handling you need doing that isn't covered by any of the other 547 * APIs. 548 * 549 * @returns Pointer to the allocated string. The first byte is always set 550 * to the string terminator char, the contents of the remainder of the 551 * memory is undefined. The string must be freed by calling RTStrFree. 552 * 553 * NULL is returned if the allocation failed. Please translate this to 554 * VERR_NO_STR_MEMORY and not VERR_NO_MEMORY. Also consider 555 * RTStrAllocEx if an IPRT status code is required. 556 * 557 * @param cb How many bytes to allocate. If this is zero, we 558 * will allocate a terminator byte anyway. 559 * @param pszTag Allocation tag used for statistics and such. 560 */ 561 RTDECL(char *) RTStrAllocTag(size_t cb, const char *pszTag); 562 563 /** 564 * Allocates memory for string storage, with status code (default tag). 565 * 566 * You should normally not use this function, except if there is some very 567 * custom string handling you need doing that isn't covered by any of the other 568 * APIs. 569 * 570 * @retval VINF_SUCCESS 571 * @retval VERR_NO_STR_MEMORY 572 * 573 * @param ppsz Where to return the allocated string. This will 574 * be set to NULL on failure. On success, the 575 * returned memory will always start with a 576 * terminator char so that it is considered a valid 577 * C string, the contents of rest of the memory is 578 * undefined. 579 * @param cb How many bytes to allocate. If this is zero, we 580 * will allocate a terminator byte anyway. 581 */ 582 #define RTStrAllocEx(ppsz, cb) RTStrAllocExTag((ppsz), (cb), RTSTR_TAG) 583 584 /** 585 * Allocates memory for string storage, with status code (custom tag). 586 * 587 * You should normally not use this function, except if there is some very 588 * custom string handling you need doing that isn't covered by any of the other 589 * APIs. 590 * 591 * @retval VINF_SUCCESS 592 * @retval VERR_NO_STR_MEMORY 593 * 594 * @param ppsz Where to return the allocated string. This will 595 * be set to NULL on failure. On success, the 596 * returned memory will always start with a 597 * terminator char so that it is considered a valid 598 * C string, the contents of rest of the memory is 599 * undefined. 600 * @param cb How many bytes to allocate. If this is zero, we 601 * will allocate a terminator byte anyway. 602 * @param pszTag Allocation tag used for statistics and such. 603 */ 604 RTDECL(int) RTStrAllocExTag(char **ppsz, size_t cb, const char *pszTag); 605 606 /** 607 * Reallocates the specified string (default tag). 608 * 609 * You should normally not have use this function, except perhaps to truncate a 610 * really long string you've got from some IPRT string API, but then you should 611 * use RTStrATruncate. 612 * 613 * @returns VINF_SUCCESS. 614 * @retval VERR_NO_STR_MEMORY if we failed to reallocate the string, @a *ppsz 615 * remains unchanged. 616 * 617 * @param ppsz Pointer to the string variable containing the 618 * input and output string. 619 * 620 * When not freeing the string, the result will 621 * always have the last byte set to the terminator 622 * character so that when used for string 623 * truncation the result will be a valid C string 624 * (your job to keep it a valid UTF-8 string). 625 * 626 * When the input string is NULL and we're supposed 627 * to reallocate, the returned string will also 628 * have the first byte set to the terminator char 629 * so it will be a valid C string. 630 * 631 * @param cbNew When @a cbNew is zero, we'll behave like 632 * RTStrFree and @a *ppsz will be set to NULL. 633 * 634 * When not zero, this will be the new size of the 635 * memory backing the string, i.e. it includes the 636 * terminator char. 637 */ 638 #define RTStrRealloc(ppsz, cbNew) RTStrReallocTag((ppsz), (cbNew), RTSTR_TAG) 639 640 /** 641 * Reallocates the specified string (custom tag). 642 * 643 * You should normally not have use this function, except perhaps to truncate a 644 * really long string you've got from some IPRT string API, but then you should 645 * use RTStrATruncate. 646 * 647 * @returns VINF_SUCCESS. 648 * @retval VERR_NO_STR_MEMORY if we failed to reallocate the string, @a *ppsz 649 * remains unchanged. 650 * 651 * @param ppsz Pointer to the string variable containing the 652 * input and output string. 653 * 654 * When not freeing the string, the result will 655 * always have the last byte set to the terminator 656 * character so that when used for string 657 * truncation the result will be a valid C string 658 * (your job to keep it a valid UTF-8 string). 659 * 660 * When the input string is NULL and we're supposed 661 * to reallocate, the returned string will also 662 * have the first byte set to the terminator char 663 * so it will be a valid C string. 664 * 665 * @param cbNew When @a cbNew is zero, we'll behave like 666 * RTStrFree and @a *ppsz will be set to NULL. 667 * 668 * When not zero, this will be the new size of the 669 * memory backing the string, i.e. it includes the 670 * terminator char. 671 * @param pszTag Allocation tag used for statistics and such. 672 */ 673 RTDECL(int) RTStrReallocTag(char **ppsz, size_t cbNew, const char *pszTag); 674 675 /** 676 * Validates the UTF-8 encoding of the string. 677 * 678 * @returns iprt status code. 679 * @param psz The string. 680 */ 681 RTDECL(int) RTStrValidateEncoding(const char *psz); 682 683 /** @name Flags for RTStrValidateEncodingEx and RTUtf16ValidateEncodingEx 684 */ 685 /** Check that the string is zero terminated within the given size. 686 * VERR_BUFFER_OVERFLOW will be returned if the check fails. */ 687 #define RTSTR_VALIDATE_ENCODING_ZERO_TERMINATED RT_BIT_32(0) 688 /** Check that the string is exactly the given length. 689 * If it terminates early, VERR_BUFFER_UNDERFLOW will be returned. When used 690 * together with RTSTR_VALIDATE_ENCODING_ZERO_TERMINATED, the given length must 691 * include the terminator or VERR_BUFFER_OVERFLOW will be returned. */ 692 #define RTSTR_VALIDATE_ENCODING_EXACT_LENGTH RT_BIT_32(1) 693 /** @} */ 694 695 /** 696 * Validates the UTF-8 encoding of the string. 697 * 698 * @returns iprt status code. 699 * @param psz The string. 700 * @param cch The max string length (/ size). Use RTSTR_MAX to 701 * process the entire string. 702 * @param fFlags Combination of RTSTR_VALIDATE_ENCODING_XXX flags. 703 */ 704 RTDECL(int) RTStrValidateEncodingEx(const char *psz, size_t cch, uint32_t fFlags); 705 706 /** 707 * Checks if the UTF-8 encoding is valid. 708 * 709 * @returns true / false. 710 * @param psz The string. 711 */ 712 RTDECL(bool) RTStrIsValidEncoding(const char *psz); 713 714 /** 715 * Purge all bad UTF-8 encoding in the string, replacing it with '?'. 716 * 717 * @returns The number of bad characters (0 if nothing was done). 718 * @param psz The string to purge. 719 */ 720 RTDECL(size_t) RTStrPurgeEncoding(char *psz); 721 722 /** 723 * Sanitise a (valid) UTF-8 string by replacing all characters outside a white 724 * list in-place by an ASCII replacement character. Multi-byte characters will 725 * be replaced byte by byte. 726 * 727 * @returns The number of code points replaced, or a negative value if the 728 * string is not correctly encoded. In this last case the string 729 * may be partially processed. 730 * @param psz The string to sanitise. 731 * @param puszValidSet A zero-terminated array of pairs of Unicode points. 732 * Each pair is the start and end point of a range, 733 * and the union of these ranges forms the white list. 734 * @param chReplacement The ASCII replacement character. 735 */ 736 RTDECL(ssize_t) RTStrPurgeComplementSet(char *psz, PCRTUNICP puszValidSet, char chReplacement); 737 738 /** 739 * Gets the number of code points the string is made up of, excluding 740 * the terminator. 741 * 742 * 743 * @returns Number of code points (RTUNICP). 744 * @returns 0 if the string was incorrectly encoded. 745 * @param psz The string. 746 */ 747 RTDECL(size_t) RTStrUniLen(const char *psz); 748 749 /** 750 * Gets the number of code points the string is made up of, excluding 751 * the terminator. 752 * 753 * This function will validate the string, and incorrectly encoded UTF-8 754 * strings will be rejected. 755 * 756 * @returns iprt status code. 757 * @param psz The string. 758 * @param cch The max string length. Use RTSTR_MAX to process the entire string. 759 * @param pcuc Where to store the code point count. 760 * This is undefined on failure. 761 */ 762 RTDECL(int) RTStrUniLenEx(const char *psz, size_t cch, size_t *pcuc); 763 764 /** 765 * Translate a UTF-8 string into an unicode string (i.e. RTUNICPs), allocating the string buffer. 766 * 767 * @returns iprt status code. 768 * @param pszString UTF-8 string to convert. 769 * @param ppUniString Receives pointer to the allocated unicode string. 770 * The returned string must be freed using RTUniFree(). 771 */ 772 RTDECL(int) RTStrToUni(const char *pszString, PRTUNICP *ppUniString); 773 774 /** 775 * Translates pszString from UTF-8 to an array of code points, allocating the result 776 * array if requested. 777 * 778 * @returns iprt status code. 779 * @param pszString UTF-8 string to convert. 780 * @param cchString The maximum size in chars (the type) to convert. The conversion stop 781 * when it reaches cchString or the string terminator ('\\0'). 782 * Use RTSTR_MAX to translate the entire string. 783 * @param ppaCps If cCps is non-zero, this must either be pointing to pointer to 784 * a buffer of the specified size, or pointer to a NULL pointer. 785 * If *ppusz is NULL or cCps is zero a buffer of at least cCps items 786 * will be allocated to hold the translated string. 787 * If a buffer was requested it must be freed using RTUtf16Free(). 788 * @param cCps The number of code points in the unicode string. This includes the terminator. 789 * @param pcCps Where to store the length of the translated string, 194 RTDECL(int) RTLatin1ToUtf8Tag(const char *pszLatin1, char **ppszString, const char *pszTag); 195 196 /** 197 * Translates Latin-1 to UTF-8 using buffer provided by the caller or a fittingly 198 * sized buffer allocated by the function (default tag). 199 * 200 * @returns iprt status code. 201 * @param pszLatin1 The Latin-1 string to convert. 202 * @param cchLatin1 The number of Latin-1 characters to translate from 203 * pszLatin1. The translation will stop when reaching 204 * cchLatin1 or the terminator ('\\0'). Use RTSTR_MAX 205 * to translate the entire string. 206 * @param ppsz If @a cch is non-zero, this must either be pointing 207 * to a pointer to a buffer of the specified size, or 208 * pointer to a NULL pointer. If *ppsz is NULL or 209 * @a cch is zero a buffer of at least @a cch chars 210 * will be allocated to hold the translated string. If 211 * a buffer was requested it must be freed using 212 * RTStrFree(). 213 * @param cch The buffer size in chars (the type). This includes the terminator. 214 * @param pcch Where to store the length of the translated string, 790 215 * excluding the terminator. (Optional) 791 216 * … … 795 220 * length that can be used to resize the buffer. 796 221 */ 797 RTDECL(int) RTStrToUniEx(const char *pszString, size_t cchString, PRTUNICP *ppaCps, size_t cCps, size_t *pcCps); 798 799 /** 800 * Calculates the length of the string in RTUTF16 items. 801 * 802 * This function will validate the string, and incorrectly encoded UTF-8 803 * strings will be rejected. The primary purpose of this function is to 804 * help allocate buffers for RTStrToUtf16Ex of the correct size. For most 805 * other purposes RTStrCalcUtf16LenEx() should be used. 806 * 807 * @returns Number of RTUTF16 items. 808 * @returns 0 if the string was incorrectly encoded. 809 * @param psz The string. 810 */ 811 RTDECL(size_t) RTStrCalcUtf16Len(const char *psz); 812 813 /** 814 * Calculates the length of the string in RTUTF16 items. 815 * 816 * This function will validate the string, and incorrectly encoded UTF-8 817 * strings will be rejected. 818 * 819 * @returns iprt status code. 820 * @param psz The string. 821 * @param cch The max string length. Use RTSTR_MAX to process the entire string. 822 * @param pcwc Where to store the string length. Optional. 823 * This is undefined on failure. 824 */ 825 RTDECL(int) RTStrCalcUtf16LenEx(const char *psz, size_t cch, size_t *pcwc); 826 827 /** 828 * Translate a UTF-8 string into a UTF-16 allocating the result buffer (default 829 * tag). 830 * 831 * @returns iprt status code. 832 * @param pszString UTF-8 string to convert. 833 * @param ppwszString Receives pointer to the allocated UTF-16 string. 834 * The returned string must be freed using RTUtf16Free(). 835 */ 836 #define RTStrToUtf16(pszString, ppwszString) RTStrToUtf16Tag((pszString), (ppwszString), RTSTR_TAG) 837 838 /** 839 * Translate a UTF-8 string into a UTF-16 allocating the result buffer (custom 840 * tag). 841 * 842 * @returns iprt status code. 843 * @param pszString UTF-8 string to convert. 844 * @param ppwszString Receives pointer to the allocated UTF-16 string. 845 * The returned string must be freed using RTUtf16Free(). 846 * @param pszTag Allocation tag used for statistics and such. 847 */ 848 RTDECL(int) RTStrToUtf16Tag(const char *pszString, PRTUTF16 *ppwszString, const char *pszTag); 849 850 /** 851 * Translates pszString from UTF-8 to UTF-16, allocating the result buffer if requested. 852 * 853 * @returns iprt status code. 854 * @param pszString UTF-8 string to convert. 855 * @param cchString The maximum size in chars (the type) to convert. The conversion stop 856 * when it reaches cchString or the string terminator ('\\0'). 857 * Use RTSTR_MAX to translate the entire string. 858 * @param ppwsz If cwc is non-zero, this must either be pointing to pointer to 859 * a buffer of the specified size, or pointer to a NULL pointer. 860 * If *ppwsz is NULL or cwc is zero a buffer of at least cwc items 861 * will be allocated to hold the translated string. 862 * If a buffer was requested it must be freed using RTUtf16Free(). 863 * @param cwc The buffer size in RTUTF16s. This includes the terminator. 864 * @param pcwc Where to store the length of the translated string, 865 * excluding the terminator. (Optional) 866 * 867 * This may be set under some error conditions, 868 * however, only for VERR_BUFFER_OVERFLOW and 869 * VERR_NO_STR_MEMORY will it contain a valid string 870 * length that can be used to resize the buffer. 871 */ 872 #define RTStrToUtf16Ex(pszString, cchString, ppwsz, cwc, pcwc) \ 873 RTStrToUtf16ExTag((pszString), (cchString), (ppwsz), (cwc), (pcwc), RTSTR_TAG) 874 875 /** 876 * Translates pszString from UTF-8 to UTF-16, allocating the result buffer if 877 * requested (custom tag). 878 * 879 * @returns iprt status code. 880 * @param pszString UTF-8 string to convert. 881 * @param cchString The maximum size in chars (the type) to convert. The conversion stop 882 * when it reaches cchString or the string terminator ('\\0'). 883 * Use RTSTR_MAX to translate the entire string. 884 * @param ppwsz If cwc is non-zero, this must either be pointing to pointer to 885 * a buffer of the specified size, or pointer to a NULL pointer. 886 * If *ppwsz is NULL or cwc is zero a buffer of at least cwc items 887 * will be allocated to hold the translated string. 888 * If a buffer was requested it must be freed using RTUtf16Free(). 889 * @param cwc The buffer size in RTUTF16s. This includes the terminator. 890 * @param pcwc Where to store the length of the translated string, 891 * excluding the terminator. (Optional) 892 * 893 * This may be set under some error conditions, 894 * however, only for VERR_BUFFER_OVERFLOW and 895 * VERR_NO_STR_MEMORY will it contain a valid string 896 * length that can be used to resize the buffer. 897 * @param pszTag Allocation tag used for statistics and such. 898 */ 899 RTDECL(int) RTStrToUtf16ExTag(const char *pszString, size_t cchString, PRTUTF16 *ppwsz, size_t cwc, size_t *pcwc, const char *pszTag); 900 901 902 /** 903 * Calculates the length of the string in Latin-1 characters. 904 * 905 * This function will validate the string, and incorrectly encoded UTF-8 906 * strings as well as string with codepoints outside the latin-1 range will be 907 * rejected. The primary purpose of this function is to help allocate buffers 908 * for RTStrToLatin1Ex of the correct size. For most other purposes 909 * RTStrCalcLatin1LenEx() should be used. 910 * 911 * @returns Number of Latin-1 characters. 912 * @returns 0 if the string was incorrectly encoded. 913 * @param psz The string. 914 */ 915 RTDECL(size_t) RTStrCalcLatin1Len(const char *psz); 916 917 /** 918 * Calculates the length of the string in Latin-1 characters. 919 * 920 * This function will validate the string, and incorrectly encoded UTF-8 921 * strings as well as string with codepoints outside the latin-1 range will be 922 * rejected. 923 * 924 * @returns iprt status code. 925 * @param psz The string. 926 * @param cch The max string length. Use RTSTR_MAX to process the 927 * entire string. 928 * @param pcch Where to store the string length. Optional. 929 * This is undefined on failure. 930 */ 931 RTDECL(int) RTStrCalcLatin1LenEx(const char *psz, size_t cch, size_t *pcch); 932 933 /** 934 * Translate a UTF-8 string into a Latin-1 allocating the result buffer (default 935 * tag). 936 * 937 * @returns iprt status code. 938 * @param pszString UTF-8 string to convert. 939 * @param ppszString Receives pointer to the allocated Latin-1 string. 940 * The returned string must be freed using RTStrFree(). 941 */ 942 #define RTStrToLatin1(pszString, ppszString) RTStrToLatin1Tag((pszString), (ppszString), RTSTR_TAG) 943 944 /** 945 * Translate a UTF-8 string into a Latin-1 allocating the result buffer (custom 946 * tag). 947 * 948 * @returns iprt status code. 949 * @param pszString UTF-8 string to convert. 950 * @param ppszString Receives pointer to the allocated Latin-1 string. 951 * The returned string must be freed using RTStrFree(). 952 * @param pszTag Allocation tag used for statistics and such. 953 */ 954 RTDECL(int) RTStrToLatin1Tag(const char *pszString, char **ppszString, const char *pszTag); 955 956 /** 957 * Translates pszString from UTF-8 to Latin-1, allocating the result buffer if requested. 958 * 959 * @returns iprt status code. 960 * @param pszString UTF-8 string to convert. 961 * @param cchString The maximum size in chars (the type) to convert. 962 * The conversion stop when it reaches cchString or 963 * the string terminator ('\\0'). Use RTSTR_MAX to 964 * translate the entire string. 965 * @param ppsz If cch is non-zero, this must either be pointing to 966 * pointer to a buffer of the specified size, or 967 * pointer to a NULL pointer. If *ppsz is NULL or cch 968 * is zero a buffer of at least cch items will be 969 * allocated to hold the translated string. If a 970 * buffer was requested it must be freed using 971 * RTStrFree(). 972 * @param cch The buffer size in bytes. This includes the 973 * terminator. 974 * @param pcch Where to store the length of the translated string, 975 * excluding the terminator. (Optional) 976 * 977 * This may be set under some error conditions, 978 * however, only for VERR_BUFFER_OVERFLOW and 979 * VERR_NO_STR_MEMORY will it contain a valid string 980 * length that can be used to resize the buffer. 981 */ 982 #define RTStrToLatin1Ex(pszString, cchString, ppsz, cch, pcch) \ 983 RTStrToLatin1ExTag((pszString), (cchString), (ppsz), (cch), (pcch), RTSTR_TAG) 984 985 /** 986 * Translates pszString from UTF-8 to Latin1, allocating the result buffer if 987 * requested (custom tag). 988 * 989 * @returns iprt status code. 990 * @param pszString UTF-8 string to convert. 991 * @param cchString The maximum size in chars (the type) to convert. 992 * The conversion stop when it reaches cchString or 993 * the string terminator ('\\0'). Use RTSTR_MAX to 994 * translate the entire string. 995 * @param ppsz If cch is non-zero, this must either be pointing to 996 * pointer to a buffer of the specified size, or 997 * pointer to a NULL pointer. If *ppsz is NULL or cch 998 * is zero a buffer of at least cch items will be 999 * allocated to hold the translated string. If a 1000 * buffer was requested it must be freed using 1001 * RTStrFree(). 1002 * @param cch The buffer size in bytes. This includes the 1003 * terminator. 1004 * @param pcch Where to store the length of the translated string, 1005 * excluding the terminator. (Optional) 1006 * 1007 * This may be set under some error conditions, 1008 * however, only for VERR_BUFFER_OVERFLOW and 1009 * VERR_NO_STR_MEMORY will it contain a valid string 1010 * length that can be used to resize the buffer. 1011 * @param pszTag Allocation tag used for statistics and such. 1012 */ 1013 RTDECL(int) RTStrToLatin1ExTag(const char *pszString, size_t cchString, char **ppsz, size_t cch, size_t *pcch, const char *pszTag); 1014 1015 1016 /** 1017 * Translate a Latin1 string into a UTF-8 allocating the result buffer (default 1018 * tag). 1019 * 1020 * @returns iprt status code. 1021 * @param pszString Latin1 string to convert. 1022 * @param ppszString Receives pointer of allocated UTF-8 string on 1023 * success, and is always set to NULL on failure. 1024 * The returned pointer must be freed using RTStrFree(). 1025 */ 1026 #define RTLatin1ToUtf8(pszString, ppszString) RTLatin1ToUtf8Tag((pszString), (ppszString), RTSTR_TAG) 1027 1028 /** 1029 * Translate a Latin-1 string into a UTF-8 allocating the result buffer. 1030 * 1031 * @returns iprt status code. 1032 * @param pszString Latin-1 string to convert. 1033 * @param ppszString Receives pointer of allocated UTF-8 string on 1034 * success, and is always set to NULL on failure. 1035 * The returned pointer must be freed using RTStrFree(). 1036 * @param pszTag Allocation tag used for statistics and such. 1037 */ 1038 RTDECL(int) RTLatin1ToUtf8Tag(const char *pszString, char **ppszString, const char *pszTag); 1039 1040 /** 1041 * Translates Latin-1 to UTF-8 using buffer provided by the caller or a fittingly 1042 * sized buffer allocated by the function (default tag). 1043 * 1044 * @returns iprt status code. 1045 * @param pszString The Latin-1 string to convert. 1046 * @param cchString The number of Latin-1 characters to translate from 1047 * pszString. The translation will stop when reaching 1048 * cchString or the terminator ('\\0'). Use RTSTR_MAX 1049 * to translate the entire string. 1050 * @param ppsz If cch is non-zero, this must either be pointing to 1051 * a pointer to a buffer of the specified size, or 1052 * pointer to a NULL pointer. If *ppsz is NULL or cch 1053 * is zero a buffer of at least cch chars will be 1054 * allocated to hold the translated string. If a 1055 * buffer was requested it must be freed using 1056 * RTStrFree(). 1057 * @param cch The buffer size in chars (the type). This includes the terminator. 1058 * @param pcch Where to store the length of the translated string, 1059 * excluding the terminator. (Optional) 1060 * 1061 * This may be set under some error conditions, 1062 * however, only for VERR_BUFFER_OVERFLOW and 1063 * VERR_NO_STR_MEMORY will it contain a valid string 1064 * length that can be used to resize the buffer. 1065 */ 1066 #define RTLatin1ToUtf8Ex(pszString, cchString, ppsz, cch, pcch) \ 1067 RTLatin1ToUtf8ExTag((pszString), (cchString), (ppsz), (cch), (pcch), RTSTR_TAG) 222 #define RTLatin1ToUtf8Ex(pszLatin1, cchLatin1, ppsz, cch, pcch) \ 223 RTLatin1ToUtf8ExTag((pszLatin1), (cchLatin1), (ppsz), (cch), (pcch), RTSTR_TAG) 1068 224 1069 225 /** … … 1072 228 * 1073 229 * @returns iprt status code. 1074 * @param psz StringThe Latin1 string to convert.1075 * @param cch StringThe number of Latin1 characters to translate from230 * @param pszLatin1 The Latin1 string to convert. 231 * @param cchLatin1 The number of Latin1 characters to translate from 1076 232 * pwszString. The translation will stop when 1077 * reaching cch Stringor the terminator ('\\0'). Use233 * reaching cchLatin1 or the terminator ('\\0'). Use 1078 234 * RTSTR_MAX to translate the entire string. 1079 235 * @param ppsz If cch is non-zero, this must either be pointing to … … 1095 251 * @param pszTag Allocation tag used for statistics and such. 1096 252 */ 1097 RTDECL(int) RTLatin1ToUtf8ExTag(const char *pszString, size_t cchString, char **ppsz, size_t cch, size_t *pcch, const char *pszTag); 253 RTDECL(int) RTLatin1ToUtf8ExTag(const char *pszLatin1, size_t cchLatin1, char **ppsz, size_t cch, size_t *pcch, 254 const char *pszTag); 1098 255 1099 256 /** … … 1106 263 * @returns Number of chars (bytes). 1107 264 * @returns 0 if the string was incorrectly encoded. 1108 * @param psz 1109 */ 1110 RTDECL(size_t) RTLatin1CalcUtf8Len(const char *psz );265 * @param pszLatin1 The Latin-1 string. 266 */ 267 RTDECL(size_t) RTLatin1CalcUtf8Len(const char *pszLatin1); 1111 268 1112 269 /** … … 1114 271 * 1115 272 * @returns iprt status code. 1116 * @param psz The string. 1117 * @param cch The max string length. Use RTSTR_MAX to process the entire string. 273 * @param pszLatin1 The Latin-1 string. 274 * @param cchLatin1 The max string length. Use RTSTR_MAX to process the 275 * entire string. 1118 276 * @param pcch Where to store the string length (in bytes). Optional. 1119 277 * This is undefined on failure. 1120 278 */ 1121 RTDECL(int) RTLatin1CalcUtf8LenEx(const char *psz, size_t cch, size_t *pcch); 1122 1123 /** 1124 * Get the unicode code point at the given string position. 1125 * 1126 * @returns unicode code point. 1127 * @returns RTUNICP_INVALID if the encoding is invalid. 1128 * @param psz The string. 1129 */ 1130 RTDECL(RTUNICP) RTStrGetCpInternal(const char *psz); 1131 1132 /** 1133 * Get the unicode code point at the given string position. 1134 * 1135 * @returns iprt status code 1136 * @returns VERR_INVALID_UTF8_ENCODING if the encoding is invalid. 1137 * @param ppsz The string cursor. 1138 * This is advanced one character forward on failure. 1139 * @param pCp Where to store the unicode code point. 1140 * Stores RTUNICP_INVALID if the encoding is invalid. 1141 */ 1142 RTDECL(int) RTStrGetCpExInternal(const char **ppsz, PRTUNICP pCp); 1143 1144 /** 1145 * Get the unicode code point at the given string position for a string of a 1146 * given length. 1147 * 1148 * @returns iprt status code 1149 * @retval VERR_INVALID_UTF8_ENCODING if the encoding is invalid. 1150 * @retval VERR_END_OF_STRING if *pcch is 0. *pCp is set to RTUNICP_INVALID. 1151 * 1152 * @param ppsz The string. 1153 * @param pcch Pointer to the length of the string. This will be 1154 * decremented by the size of the code point. 1155 * @param pCp Where to store the unicode code point. 1156 * Stores RTUNICP_INVALID if the encoding is invalid. 1157 */ 1158 RTDECL(int) RTStrGetCpNExInternal(const char **ppsz, size_t *pcch, PRTUNICP pCp); 1159 1160 /** 1161 * Put the unicode code point at the given string position 1162 * and return the pointer to the char following it. 1163 * 1164 * This function will not consider anything at or following the 1165 * buffer area pointed to by psz. It is therefore not suitable for 1166 * inserting code points into a string, only appending/overwriting. 1167 * 1168 * @returns pointer to the char following the written code point. 1169 * @param psz The string. 1170 * @param CodePoint The code point to write. 1171 * This should not be RTUNICP_INVALID or any other 1172 * character out of the UTF-8 range. 1173 * 1174 * @remark This is a worker function for RTStrPutCp(). 1175 * 1176 */ 1177 RTDECL(char *) RTStrPutCpInternal(char *psz, RTUNICP CodePoint); 1178 1179 /** 1180 * Get the unicode code point at the given string position. 1181 * 1182 * @returns unicode code point. 1183 * @returns RTUNICP_INVALID if the encoding is invalid. 1184 * @param psz The string. 1185 * 1186 * @remark We optimize this operation by using an inline function for 1187 * the most frequent and simplest sequence, the rest is 1188 * handled by RTStrGetCpInternal(). 1189 */ 1190 DECLINLINE(RTUNICP) RTStrGetCp(const char *psz) 1191 { 1192 const unsigned char uch = *(const unsigned char *)psz; 1193 if (!(uch & RT_BIT(7))) 1194 return uch; 1195 return RTStrGetCpInternal(psz); 1196 } 1197 1198 /** 1199 * Get the unicode code point at the given string position. 1200 * 1201 * @returns iprt status code. 1202 * @param ppsz Pointer to the string pointer. This will be updated to 1203 * point to the char following the current code point. 1204 * This is advanced one character forward on failure. 1205 * @param pCp Where to store the code point. 1206 * RTUNICP_INVALID is stored here on failure. 1207 * 1208 * @remark We optimize this operation by using an inline function for 1209 * the most frequent and simplest sequence, the rest is 1210 * handled by RTStrGetCpExInternal(). 1211 */ 1212 DECLINLINE(int) RTStrGetCpEx(const char **ppsz, PRTUNICP pCp) 1213 { 1214 const unsigned char uch = **(const unsigned char **)ppsz; 1215 if (!(uch & RT_BIT(7))) 1216 { 1217 (*ppsz)++; 1218 *pCp = uch; 1219 return VINF_SUCCESS; 1220 } 1221 return RTStrGetCpExInternal(ppsz, pCp); 1222 } 1223 1224 /** 1225 * Get the unicode code point at the given string position for a string of a 1226 * given maximum length. 1227 * 1228 * @returns iprt status code. 1229 * @retval VERR_INVALID_UTF8_ENCODING if the encoding is invalid. 1230 * @retval VERR_END_OF_STRING if *pcch is 0. *pCp is set to RTUNICP_INVALID. 1231 * 1232 * @param ppsz Pointer to the string pointer. This will be updated to 1233 * point to the char following the current code point. 1234 * @param pcch Pointer to the maximum string length. This will be 1235 * decremented by the size of the code point found. 1236 * @param pCp Where to store the code point. 1237 * RTUNICP_INVALID is stored here on failure. 1238 * 1239 * @remark We optimize this operation by using an inline function for 1240 * the most frequent and simplest sequence, the rest is 1241 * handled by RTStrGetCpNExInternal(). 1242 */ 1243 DECLINLINE(int) RTStrGetCpNEx(const char **ppsz, size_t *pcch, PRTUNICP pCp) 1244 { 1245 if (RT_LIKELY(*pcch != 0)) 1246 { 1247 const unsigned char uch = **(const unsigned char **)ppsz; 1248 if (!(uch & RT_BIT(7))) 1249 { 1250 (*ppsz)++; 1251 (*pcch)--; 1252 *pCp = uch; 1253 return VINF_SUCCESS; 1254 } 1255 } 1256 return RTStrGetCpNExInternal(ppsz, pcch, pCp); 1257 } 1258 1259 /** 1260 * Get the UTF-8 size in characters of a given Unicode code point. 1261 * 1262 * The code point is expected to be a valid Unicode one, but not necessarily in 1263 * the range supported by UTF-8. 1264 * 1265 * @returns The number of chars (bytes) required to encode the code point, or 1266 * zero if there is no UTF-8 encoding. 1267 * @param CodePoint The unicode code point. 1268 */ 1269 DECLINLINE(size_t) RTStrCpSize(RTUNICP CodePoint) 1270 { 1271 if (CodePoint < 0x00000080) 1272 return 1; 1273 if (CodePoint < 0x00000800) 1274 return 2; 1275 if (CodePoint < 0x00010000) 1276 return 3; 1277 #ifdef RT_USE_RTC_3629 1278 if (CodePoint < 0x00011000) 1279 return 4; 1280 #else 1281 if (CodePoint < 0x00200000) 1282 return 4; 1283 if (CodePoint < 0x04000000) 1284 return 5; 1285 if (CodePoint < 0x7fffffff) 1286 return 6; 1287 #endif 1288 return 0; 1289 } 1290 1291 /** 1292 * Put the unicode code point at the given string position 1293 * and return the pointer to the char following it. 1294 * 1295 * This function will not consider anything at or following the 1296 * buffer area pointed to by psz. It is therefore not suitable for 1297 * inserting code points into a string, only appending/overwriting. 1298 * 1299 * @returns pointer to the char following the written code point. 1300 * @param psz The string. 1301 * @param CodePoint The code point to write. 1302 * This should not be RTUNICP_INVALID or any other 1303 * character out of the UTF-8 range. 1304 * 1305 * @remark We optimize this operation by using an inline function for 1306 * the most frequent and simplest sequence, the rest is 1307 * handled by RTStrPutCpInternal(). 1308 */ 1309 DECLINLINE(char *) RTStrPutCp(char *psz, RTUNICP CodePoint) 1310 { 1311 if (CodePoint < 0x80) 1312 { 1313 *psz++ = (unsigned char)CodePoint; 1314 return psz; 1315 } 1316 return RTStrPutCpInternal(psz, CodePoint); 1317 } 1318 1319 /** 1320 * Skips ahead, past the current code point. 1321 * 1322 * @returns Pointer to the char after the current code point. 1323 * @param psz Pointer to the current code point. 1324 * @remark This will not move the next valid code point, only past the current one. 1325 */ 1326 DECLINLINE(char *) RTStrNextCp(const char *psz) 1327 { 1328 RTUNICP Cp; 1329 RTStrGetCpEx(&psz, &Cp); 1330 return (char *)psz; 1331 } 1332 1333 /** 1334 * Skips back to the previous code point. 1335 * 1336 * @returns Pointer to the char before the current code point. 1337 * @returns pszStart on failure. 1338 * @param pszStart Pointer to the start of the string. 1339 * @param psz Pointer to the current code point. 1340 */ 1341 RTDECL(char *) RTStrPrevCp(const char *pszStart, const char *psz); 1342 1343 /** 1344 * Get the unicode code point at the given string position. 1345 * 1346 * @returns unicode code point. 1347 * @returns RTUNICP_INVALID if the encoding is invalid. 1348 * @param psz The string. 1349 */ 1350 DECLINLINE(RTUNICP) RTLatin1GetCp(const char *psz) 1351 { 1352 return *(const unsigned char *)psz; 1353 } 1354 1355 /** 1356 * Get the unicode code point at the given string position. 1357 * 1358 * @returns iprt status code. 1359 * @param ppsz Pointer to the string pointer. This will be updated to 1360 * point to the char following the current code point. 1361 * This is advanced one character forward on failure. 1362 * @param pCp Where to store the code point. 1363 * RTUNICP_INVALID is stored here on failure. 1364 * 1365 * @remark We optimize this operation by using an inline function for 1366 * the most frequent and simplest sequence, the rest is 1367 * handled by RTStrGetCpExInternal(). 1368 */ 1369 DECLINLINE(int) RTLatin1GetCpEx(const char **ppsz, PRTUNICP pCp) 1370 { 1371 const unsigned char uch = **(const unsigned char **)ppsz; 1372 (*ppsz)++; 1373 *pCp = uch; 1374 return VINF_SUCCESS; 1375 } 1376 1377 /** 1378 * Get the unicode code point at the given string position for a string of a 1379 * given maximum length. 1380 * 1381 * @returns iprt status code. 1382 * @retval VERR_END_OF_STRING if *pcch is 0. *pCp is set to RTUNICP_INVALID. 1383 * 1384 * @param ppsz Pointer to the string pointer. This will be updated to 1385 * point to the char following the current code point. 1386 * @param pcch Pointer to the maximum string length. This will be 1387 * decremented by the size of the code point found. 1388 * @param pCp Where to store the code point. 1389 * RTUNICP_INVALID is stored here on failure. 1390 */ 1391 DECLINLINE(int) RTLatin1GetCpNEx(const char **ppsz, size_t *pcch, PRTUNICP pCp) 1392 { 1393 if (RT_LIKELY(*pcch != 0)) 1394 { 1395 const unsigned char uch = **(const unsigned char **)ppsz; 1396 (*ppsz)++; 1397 (*pcch)--; 1398 *pCp = uch; 1399 return VINF_SUCCESS; 1400 } 1401 *pCp = RTUNICP_INVALID; 1402 return VERR_END_OF_STRING; 1403 } 1404 1405 /** 1406 * Get the Latin-1 size in characters of a given Unicode code point. 1407 * 1408 * The code point is expected to be a valid Unicode one, but not necessarily in 1409 * the range supported by Latin-1. 1410 * 1411 * @returns the size in characters, or zero if there is no Latin-1 encoding 1412 */ 1413 DECLINLINE(size_t) RTLatin1CpSize(RTUNICP CodePoint) 1414 { 1415 if (CodePoint < 0x100) 1416 return 1; 1417 return 0; 1418 } 1419 1420 /** 1421 * Put the unicode code point at the given string position 1422 * and return the pointer to the char following it. 1423 * 1424 * This function will not consider anything at or following the 1425 * buffer area pointed to by psz. It is therefore not suitable for 1426 * inserting code points into a string, only appending/overwriting. 1427 * 1428 * @returns pointer to the char following the written code point. 1429 * @param psz The string. 1430 * @param CodePoint The code point to write. 1431 * This should not be RTUNICP_INVALID or any other 1432 * character out of the Latin-1 range. 1433 */ 1434 DECLINLINE(char *) RTLatin1PutCp(char *psz, RTUNICP CodePoint) 1435 { 1436 AssertReturn(CodePoint < 0x100, NULL); 1437 *psz++ = (unsigned char)CodePoint; 1438 return psz; 1439 } 1440 1441 /** 1442 * Skips ahead, past the current code point. 1443 * 1444 * @returns Pointer to the char after the current code point. 1445 * @param psz Pointer to the current code point. 1446 * @remark This will not move the next valid code point, only past the current one. 1447 */ 1448 DECLINLINE(char *) RTLatin1NextCp(const char *psz) 1449 { 1450 psz++; 1451 return (char *)psz; 1452 } 1453 1454 /** 1455 * Skips back to the previous code point. 1456 * 1457 * @returns Pointer to the char before the current code point. 1458 * @returns pszStart on failure. 1459 * @param pszStart Pointer to the start of the string. 1460 * @param psz Pointer to the current code point. 1461 */ 1462 DECLINLINE(char *) RTLatin1PrevCp(const char *pszStart, const char *psz) 1463 { 1464 if ((uintptr_t)psz > (uintptr_t)pszStart) 1465 { 1466 psz--; 1467 return (char *)psz; 1468 } 1469 return (char *)pszStart; 1470 } 1471 1472 1473 /** @page pg_rt_str_format The IPRT Format Strings 1474 * 1475 * IPRT implements most of the commonly used format types and flags with the 1476 * exception of floating point which is completely missing. In addition IPRT 1477 * provides a number of IPRT specific format types for the IPRT typedefs and 1478 * other useful things. Note that several of these extensions are similar to 1479 * \%p and doesn't care much if you try add formating flags/width/precision. 1480 * 1481 * 1482 * Group 0a, The commonly used format types: 1483 * - \%s - Takes a pointer to a zero terminated string (UTF-8) and 1484 * prints it with the optionally adjustment (width, -) and 1485 * length restriction (precision). 1486 * - \%ls - Same as \%s except that the input is UTF-16 (output UTF-8). 1487 * - \%Ls - Same as \%s except that the input is UCS-32 (output UTF-8). 1488 * - \%S - Same as \%s, used to convert to current codeset but this is 1489 * now done by the streams code. Deprecated, use \%s. 1490 * - \%lS - Ditto. Deprecated, use \%ls. 1491 * - \%LS - Ditto. Deprecated, use \%Ls. 1492 * - \%c - Takes a char and prints it. 1493 * - \%d - Takes a signed integer and prints it as decimal. Thousand 1494 * separator (\'), zero padding (0), adjustment (-+), width, 1495 * precision 1496 * - \%i - Same as \%d. 1497 * - \%u - Takes an unsigned integer and prints it as decimal. Thousand 1498 * separator (\'), zero padding (0), adjustment (-+), width, 1499 * precision 1500 * - \%x - Takes an unsigned integer and prints it as lowercased 1501 * hexadecimal. The special hash (\#) flag causes a '0x' 1502 * prefixed to be printed. Zero padding (0), adjustment (-+), 1503 * width, precision. 1504 * - \%X - Same as \%x except that it is uppercased. 1505 * - \%o - Takes an unsigned (?) integer and prints it as octal. Zero 1506 * padding (0), adjustment (-+), width, precision. 1507 * - \%p - Takes a pointer (void technically) and prints it. Zero 1508 * padding (0), adjustment (-+), width, precision. 1509 * 1510 * The \%d, \%i, \%u, \%x, \%X and \%o format types support the following 1511 * argument type specifiers: 1512 * - \%ll - long long (uint64_t). 1513 * - \%L - long long (uint64_t). 1514 * - \%l - long (uint32_t, uint64_t) 1515 * - \%h - short (int16_t). 1516 * - \%hh - char (int8_t). 1517 * - \%H - char (int8_t). 1518 * - \%z - size_t. 1519 * - \%j - intmax_t (int64_t). 1520 * - \%t - ptrdiff_t. 1521 * The type in parentheses is typical sizes, however when printing those types 1522 * you are better off using the special group 2 format types below (\%RX32 and 1523 * such). 1524 * 1525 * 1526 * Group 0b, IPRT format tricks: 1527 * - %M - Replaces the format string, takes a string pointer. 1528 * - %N - Nested formatting, takes a pointer to a format string 1529 * followed by the pointer to a va_list variable. The va_list 1530 * variable will not be modified and the caller must do va_end() 1531 * on it. Make sure the va_list variable is NOT in a parameter 1532 * list or some gcc versions/targets may get it all wrong. 1533 * 1534 * 1535 * Group 1, the basic runtime typedefs (excluding those which obviously are 1536 * pointer): 1537 * - \%RTbool - Takes a bool value and prints 'true', 'false', or '!%d!'. 1538 * - \%RTfile - Takes a #RTFILE value. 1539 * - \%RTfmode - Takes a #RTFMODE value. 1540 * - \%RTfoff - Takes a #RTFOFF value. 1541 * - \%RTfp16 - Takes a #RTFAR16 value. 1542 * - \%RTfp32 - Takes a #RTFAR32 value. 1543 * - \%RTfp64 - Takes a #RTFAR64 value. 1544 * - \%RTgid - Takes a #RTGID value. 1545 * - \%RTino - Takes a #RTINODE value. 1546 * - \%RTint - Takes a #RTINT value. 1547 * - \%RTiop - Takes a #RTIOPORT value. 1548 * - \%RTldrm - Takes a #RTLDRMOD value. 1549 * - \%RTmac - Takes a #PCRTMAC pointer. 1550 * - \%RTnaddr - Takes a #PCRTNETADDR value. 1551 * - \%RTnaipv4 - Takes a #RTNETADDRIPV4 value. 1552 * - \%RTnaipv6 - Takes a #PCRTNETADDRIPV6 value. 1553 * - \%RTnthrd - Takes a #RTNATIVETHREAD value. 1554 * - \%RTnthrd - Takes a #RTNATIVETHREAD value. 1555 * - \%RTproc - Takes a #RTPROCESS value. 1556 * - \%RTptr - Takes a #RTINTPTR or #RTUINTPTR value (but not void *). 1557 * - \%RTreg - Takes a #RTCCUINTREG value. 1558 * - \%RTsel - Takes a #RTSEL value. 1559 * - \%RTsem - Takes a #RTSEMEVENT, #RTSEMEVENTMULTI, #RTSEMMUTEX, #RTSEMFASTMUTEX, or #RTSEMRW value. 1560 * - \%RTsock - Takes a #RTSOCKET value. 1561 * - \%RTthrd - Takes a #RTTHREAD value. 1562 * - \%RTuid - Takes a #RTUID value. 1563 * - \%RTuint - Takes a #RTUINT value. 1564 * - \%RTunicp - Takes a #RTUNICP value. 1565 * - \%RTutf16 - Takes a #RTUTF16 value. 1566 * - \%RTuuid - Takes a #PCRTUUID and will print the UUID as a string. 1567 * - \%RTxuint - Takes a #RTUINT or #RTINT value, formatting it as hex. 1568 * - \%RGi - Takes a #RTGCINT value. 1569 * - \%RGp - Takes a #RTGCPHYS value. 1570 * - \%RGr - Takes a #RTGCUINTREG value. 1571 * - \%RGu - Takes a #RTGCUINT value. 1572 * - \%RGv - Takes a #RTGCPTR, #RTGCINTPTR or #RTGCUINTPTR value. 1573 * - \%RGx - Takes a #RTGCUINT or #RTGCINT value, formatting it as hex. 1574 * - \%RHi - Takes a #RTHCINT value. 1575 * - \%RHp - Takes a #RTHCPHYS value. 1576 * - \%RHr - Takes a #RTHCUINTREG value. 1577 * - \%RHu - Takes a #RTHCUINT value. 1578 * - \%RHv - Takes a #RTHCPTR, #RTHCINTPTR or #RTHCUINTPTR value. 1579 * - \%RHx - Takes a #RTHCUINT or #RTHCINT value, formatting it as hex. 1580 * - \%RRv - Takes a #RTRCPTR, #RTRCINTPTR or #RTRCUINTPTR value. 1581 * - \%RCi - Takes a #RTINT value. 1582 * - \%RCp - Takes a #RTCCPHYS value. 1583 * - \%RCr - Takes a #RTCCUINTREG value. 1584 * - \%RCu - Takes a #RTUINT value. 1585 * - \%RCv - Takes a #uintptr_t, #intptr_t, void * value. 1586 * - \%RCx - Takes a #RTUINT or #RTINT value, formatting it as hex. 1587 * 1588 * 1589 * Group 2, the generic integer types which are prefered over relying on what 1590 * bit-count a 'long', 'short', or 'long long' has on a platform. This are 1591 * highly prefered for the [u]intXX_t kind of types: 1592 * - \%RI[8|16|32|64] - Signed integer value of the specifed bit count. 1593 * - \%RU[8|16|32|64] - Unsigned integer value of the specifed bit count. 1594 * - \%RX[8|16|32|64] - Hexadecimal integer value of the specifed bit count. 1595 * 1596 * 1597 * Group 3, hex dumpers and other complex stuff which requires more than simple 1598 * formatting: 1599 * - \%Rhxd - Takes a pointer to the memory which is to be dumped in typical 1600 * hex format. Use the precision to specify the length, and the width to 1601 * set the number of bytes per line. Default width and precision is 16. 1602 * - \%Rhxs - Takes a pointer to the memory to be displayed as a hex string, 1603 * i.e. a series of space separated bytes formatted as two digit hex value. 1604 * Use the precision to specify the length. Default length is 16 bytes. 1605 * The width, if specified, is ignored. 1606 * - \%Rrc - Takes an integer iprt status code as argument. Will insert the 1607 * status code define corresponding to the iprt status code. 1608 * - \%Rrs - Takes an integer iprt status code as argument. Will insert the 1609 * short description of the specified status code. 1610 * - \%Rrf - Takes an integer iprt status code as argument. Will insert the 1611 * full description of the specified status code. 1612 * - \%Rra - Takes an integer iprt status code as argument. Will insert the 1613 * status code define + full description. 1614 * - \%Rwc - Takes a long Windows error code as argument. Will insert the status 1615 * code define corresponding to the Windows error code. 1616 * - \%Rwf - Takes a long Windows error code as argument. Will insert the 1617 * full description of the specified status code. 1618 * - \%Rwa - Takes a long Windows error code as argument. Will insert the 1619 * error code define + full description. 1620 * 1621 * - \%Rhrc - Takes a COM/XPCOM status code as argument. Will insert the status 1622 * code define corresponding to the Windows error code. 1623 * - \%Rhrf - Takes a COM/XPCOM status code as argument. Will insert the 1624 * full description of the specified status code. 1625 * - \%Rhra - Takes a COM/XPCOM error code as argument. Will insert the 1626 * error code define + full description. 1627 * 1628 * - \%Rfn - Pretty printing of a function or method. It drops the 1629 * return code and parameter list. 1630 * - \%Rbn - Prints the base name. For dropping the path in 1631 * order to save space when printing a path name. 1632 * 1633 * On other platforms, \%Rw? simply prints the argument in a form of 0xXXXXXXXX. 1634 * 1635 * 1636 * Group 4, structure dumpers: 1637 * - \%RDtimespec - Takes a PCRTTIMESPEC. 1638 * 1639 * 1640 * Group 5, XML / HTML escapers: 1641 * - \%RMas - Takes a string pointer (const char *) and outputs 1642 * it as an attribute value with the proper escaping. 1643 * This typically ends up in double quotes. 1644 * 1645 * - \%RMes - Takes a string pointer (const char *) and outputs 1646 * it as an element with the necessary escaping. 1647 * 1648 * Group 6, CPU Architecture Register dumpers: 1649 * - \%RAx86[reg] - Takes a 64-bit register value if the register is 1650 * 64-bit or smaller. Check the code wrt which 1651 * registers are implemented. 1652 * 1653 */ 1654 1655 #ifndef DECLARED_FNRTSTROUTPUT /* duplicated in iprt/log.h */ 1656 # define DECLARED_FNRTSTROUTPUT 1657 /** 1658 * Output callback. 1659 * 1660 * @returns number of bytes written. 1661 * @param pvArg User argument. 1662 * @param pachChars Pointer to an array of utf-8 characters. 1663 * @param cbChars Number of bytes in the character array pointed to by pachChars. 1664 */ 1665 typedef DECLCALLBACK(size_t) FNRTSTROUTPUT(void *pvArg, const char *pachChars, size_t cbChars); 1666 /** Pointer to callback function. */ 1667 typedef FNRTSTROUTPUT *PFNRTSTROUTPUT; 1668 #endif 1669 1670 /** Format flag. 1671 * These are used by RTStrFormat extensions and RTStrFormatNumber, mind 1672 * that not all flags makes sense to both of the functions. 1673 * @{ */ 1674 #define RTSTR_F_CAPITAL 0x0001 1675 #define RTSTR_F_LEFT 0x0002 1676 #define RTSTR_F_ZEROPAD 0x0004 1677 #define RTSTR_F_SPECIAL 0x0008 1678 #define RTSTR_F_VALSIGNED 0x0010 1679 #define RTSTR_F_PLUS 0x0020 1680 #define RTSTR_F_BLANK 0x0040 1681 #define RTSTR_F_WIDTH 0x0080 1682 #define RTSTR_F_PRECISION 0x0100 1683 #define RTSTR_F_THOUSAND_SEP 0x0200 1684 1685 #define RTSTR_F_BIT_MASK 0xf800 1686 #define RTSTR_F_8BIT 0x0800 1687 #define RTSTR_F_16BIT 0x1000 1688 #define RTSTR_F_32BIT 0x2000 1689 #define RTSTR_F_64BIT 0x4000 1690 #define RTSTR_F_128BIT 0x8000 1691 /** @} */ 1692 1693 /** @def RTSTR_GET_BIT_FLAG 1694 * Gets the bit flag for the specified type. 1695 */ 1696 #define RTSTR_GET_BIT_FLAG(type) \ 1697 ( sizeof(type) * 8 == 32 ? RTSTR_F_32BIT \ 1698 : sizeof(type) * 8 == 64 ? RTSTR_F_64BIT \ 1699 : sizeof(type) * 8 == 16 ? RTSTR_F_16BIT \ 1700 : sizeof(type) * 8 == 8 ? RTSTR_F_8BIT \ 1701 : sizeof(type) * 8 == 128 ? RTSTR_F_128BIT \ 1702 : 0) 1703 1704 1705 /** 1706 * Callback to format non-standard format specifiers. 1707 * 1708 * @returns The number of bytes formatted. 1709 * @param pvArg Formatter argument. 1710 * @param pfnOutput Pointer to output function. 1711 * @param pvArgOutput Argument for the output function. 1712 * @param ppszFormat Pointer to the format string pointer. Advance this till the char 1713 * after the format specifier. 1714 * @param pArgs Pointer to the argument list. Use this to fetch the arguments. 1715 * @param cchWidth Format Width. -1 if not specified. 1716 * @param cchPrecision Format Precision. -1 if not specified. 1717 * @param fFlags Flags (RTSTR_NTFS_*). 1718 * @param chArgSize The argument size specifier, 'l' or 'L'. 1719 */ 1720 typedef DECLCALLBACK(size_t) FNSTRFORMAT(void *pvArg, PFNRTSTROUTPUT pfnOutput, void *pvArgOutput, 1721 const char **ppszFormat, va_list *pArgs, int cchWidth, 1722 int cchPrecision, unsigned fFlags, char chArgSize); 1723 /** Pointer to a FNSTRFORMAT() function. */ 1724 typedef FNSTRFORMAT *PFNSTRFORMAT; 1725 1726 1727 /** 1728 * Partial implementation of a printf like formatter. 1729 * It doesn't do everything correct, and there is no floating point support. 1730 * However, it supports custom formats by the means of a format callback. 1731 * 1732 * @returns number of bytes formatted. 1733 * @param pfnOutput Output worker. 1734 * Called in two ways. Normally with a string and its length. 1735 * For termination, it's called with NULL for string, 0 for length. 1736 * @param pvArgOutput Argument to the output worker. 1737 * @param pfnFormat Custom format worker. 1738 * @param pvArgFormat Argument to the format worker. 1739 * @param pszFormat Pointer to the format string, @see pg_rt_str_format. 1740 * @param InArgs Argument list. 1741 */ 1742 RTDECL(size_t) RTStrFormatV(PFNRTSTROUTPUT pfnOutput, void *pvArgOutput, PFNSTRFORMAT pfnFormat, void *pvArgFormat, 1743 const char *pszFormat, va_list InArgs) RT_IPRT_FORMAT_ATTR(5, 0); 1744 1745 /** 1746 * Partial implementation of a printf like formatter. 1747 * It doesn't do everything correct, and there is no floating point support. 1748 * However, it supports custom formats by the means of a format callback. 1749 * 1750 * @returns number of bytes formatted. 1751 * @param pfnOutput Output worker. 1752 * Called in two ways. Normally with a string and its length. 1753 * For termination, it's called with NULL for string, 0 for length. 1754 * @param pvArgOutput Argument to the output worker. 1755 * @param pfnFormat Custom format worker. 1756 * @param pvArgFormat Argument to the format worker. 1757 * @param pszFormat Pointer to the format string, @see pg_rt_str_format. 1758 * @param ... Argument list. 1759 */ 1760 RTDECL(size_t) RTStrFormat(PFNRTSTROUTPUT pfnOutput, void *pvArgOutput, PFNSTRFORMAT pfnFormat, void *pvArgFormat, 1761 const char *pszFormat, ...) RT_IPRT_FORMAT_ATTR(5, 6); 1762 1763 /** 1764 * Formats an integer number according to the parameters. 1765 * 1766 * @returns Length of the formatted number. 1767 * @param psz Pointer to output string buffer of sufficient size. 1768 * @param u64Value Value to format. 1769 * @param uiBase Number representation base. 1770 * @param cchWidth Width. 1771 * @param cchPrecision Precision. 1772 * @param fFlags Flags, RTSTR_F_XXX. 1773 */ 1774 RTDECL(int) RTStrFormatNumber(char *psz, uint64_t u64Value, unsigned int uiBase, signed int cchWidth, signed int cchPrecision, 1775 unsigned int fFlags); 1776 1777 /** 1778 * Formats an unsigned 8-bit number. 1779 * 1780 * @returns The length of the formatted number or VERR_BUFFER_OVERFLOW. 1781 * @param pszBuf The output buffer. 1782 * @param cbBuf The size of the output buffer. 1783 * @param u8Value The value to format. 1784 * @param uiBase Number representation base. 1785 * @param cchWidth Width. 1786 * @param cchPrecision Precision. 1787 * @param fFlags Flags, RTSTR_F_XXX. 1788 */ 1789 RTDECL(ssize_t) RTStrFormatU8(char *pszBuf, size_t cbBuf, uint8_t u8Value, unsigned int uiBase, 1790 signed int cchWidth, signed int cchPrecision, uint32_t fFlags); 1791 1792 /** 1793 * Formats an unsigned 16-bit number. 1794 * 1795 * @returns The length of the formatted number or VERR_BUFFER_OVERFLOW. 1796 * @param pszBuf The output buffer. 1797 * @param cbBuf The size of the output buffer. 1798 * @param u16Value The value to format. 1799 * @param uiBase Number representation base. 1800 * @param cchWidth Width. 1801 * @param cchPrecision Precision. 1802 * @param fFlags Flags, RTSTR_F_XXX. 1803 */ 1804 RTDECL(ssize_t) RTStrFormatU16(char *pszBuf, size_t cbBuf, uint16_t u16Value, unsigned int uiBase, 1805 signed int cchWidth, signed int cchPrecision, uint32_t fFlags); 1806 1807 /** 1808 * Formats an unsigned 32-bit number. 1809 * 1810 * @returns The length of the formatted number or VERR_BUFFER_OVERFLOW. 1811 * @param pszBuf The output buffer. 1812 * @param cbBuf The size of the output buffer. 1813 * @param u32Value The value to format. 1814 * @param uiBase Number representation base. 1815 * @param cchWidth Width. 1816 * @param cchPrecision Precision. 1817 * @param fFlags Flags, RTSTR_F_XXX. 1818 */ 1819 RTDECL(ssize_t) RTStrFormatU32(char *pszBuf, size_t cbBuf, uint32_t u32Value, unsigned int uiBase, 1820 signed int cchWidth, signed int cchPrecision, uint32_t fFlags); 1821 1822 /** 1823 * Formats an unsigned 64-bit number. 1824 * 1825 * @returns The length of the formatted number or VERR_BUFFER_OVERFLOW. 1826 * @param pszBuf The output buffer. 1827 * @param cbBuf The size of the output buffer. 1828 * @param u64Value The value to format. 1829 * @param uiBase Number representation base. 1830 * @param cchWidth Width. 1831 * @param cchPrecision Precision. 1832 * @param fFlags Flags, RTSTR_F_XXX. 1833 */ 1834 RTDECL(ssize_t) RTStrFormatU64(char *pszBuf, size_t cbBuf, uint64_t u64Value, unsigned int uiBase, 1835 signed int cchWidth, signed int cchPrecision, uint32_t fFlags); 1836 1837 /** 1838 * Formats an unsigned 128-bit number. 1839 * 1840 * @returns The length of the formatted number or VERR_BUFFER_OVERFLOW. 1841 * @param pszBuf The output buffer. 1842 * @param cbBuf The size of the output buffer. 1843 * @param pu128Value The value to format. 1844 * @param uiBase Number representation base. 1845 * @param cchWidth Width. 1846 * @param cchPrecision Precision. 1847 * @param fFlags Flags, RTSTR_F_XXX. 1848 */ 1849 RTDECL(ssize_t) RTStrFormatU128(char *pszBuf, size_t cbBuf, PCRTUINT128U pu128Value, unsigned int uiBase, 1850 signed int cchWidth, signed int cchPrecision, uint32_t fFlags); 1851 1852 /** 1853 * Formats an 80-bit extended floating point number. 1854 * 1855 * @returns The length of the formatted number or VERR_BUFFER_OVERFLOW. 1856 * @param pszBuf The output buffer. 1857 * @param cbBuf The size of the output buffer. 1858 * @param pr80Value The value to format. 1859 * @param cchWidth Width. 1860 * @param cchPrecision Precision. 1861 * @param fFlags Flags, RTSTR_F_XXX. 1862 */ 1863 RTDECL(ssize_t) RTStrFormatR80(char *pszBuf, size_t cbBuf, PCRTFLOAT80U pr80Value, signed int cchWidth, 1864 signed int cchPrecision, uint32_t fFlags); 1865 1866 /** 1867 * Formats an 80-bit extended floating point number, version 2. 1868 * 1869 * @returns The length of the formatted number or VERR_BUFFER_OVERFLOW. 1870 * @param pszBuf The output buffer. 1871 * @param cbBuf The size of the output buffer. 1872 * @param pr80Value The value to format. 1873 * @param cchWidth Width. 1874 * @param cchPrecision Precision. 1875 * @param fFlags Flags, RTSTR_F_XXX. 1876 */ 1877 RTDECL(ssize_t) RTStrFormatR80u2(char *pszBuf, size_t cbBuf, PCRTFLOAT80U2 pr80Value, signed int cchWidth, 1878 signed int cchPrecision, uint32_t fFlags); 1879 1880 1881 1882 /** 1883 * Callback for formatting a type. 1884 * 1885 * This is registered using the RTStrFormatTypeRegister function and will 1886 * be called during string formatting to handle the specified %R[type]. 1887 * The argument for this format type is assumed to be a pointer and it's 1888 * passed in the @a pvValue argument. 1889 * 1890 * @returns Length of the formatted output. 1891 * @param pfnOutput Output worker. 1892 * @param pvArgOutput Argument to the output worker. 1893 * @param pszType The type name. 1894 * @param pvValue The argument value. 1895 * @param cchWidth Width. 1896 * @param cchPrecision Precision. 1897 * @param fFlags Flags (NTFS_*). 1898 * @param pvUser The user argument. 1899 */ 1900 typedef DECLCALLBACK(size_t) FNRTSTRFORMATTYPE(PFNRTSTROUTPUT pfnOutput, void *pvArgOutput, 1901 const char *pszType, void const *pvValue, 1902 int cchWidth, int cchPrecision, unsigned fFlags, 1903 void *pvUser); 1904 /** Pointer to a FNRTSTRFORMATTYPE. */ 1905 typedef FNRTSTRFORMATTYPE *PFNRTSTRFORMATTYPE; 1906 1907 1908 /** 1909 * Register a format handler for a type. 1910 * 1911 * The format handler is used to handle '%R[type]' format types, where the argument 1912 * in the vector is a pointer value (a bit restrictive, but keeps it simple). 1913 * 1914 * The caller must ensure that no other thread will be making use of any of 1915 * the dynamic formatting type facilities simultaneously with this call. 1916 * 1917 * @returns IPRT status code. 1918 * @retval VINF_SUCCESS on success. 1919 * @retval VERR_ALREADY_EXISTS if the type has already been registered. 1920 * @retval VERR_TOO_MANY_OPEN_FILES if all the type slots has been allocated already. 1921 * 1922 * @param pszType The type name. 1923 * @param pfnHandler The handler address. See FNRTSTRFORMATTYPE for details. 1924 * @param pvUser The user argument to pass to the handler. See RTStrFormatTypeSetUser 1925 * for how to update this later. 1926 */ 1927 RTDECL(int) RTStrFormatTypeRegister(const char *pszType, PFNRTSTRFORMATTYPE pfnHandler, void *pvUser); 1928 1929 /** 1930 * Deregisters a format type. 1931 * 1932 * The caller must ensure that no other thread will be making use of any of 1933 * the dynamic formatting type facilities simultaneously with this call. 1934 * 1935 * @returns IPRT status code. 1936 * @retval VINF_SUCCESS on success. 1937 * @retval VERR_FILE_NOT_FOUND if not found. 1938 * 1939 * @param pszType The type to deregister. 1940 */ 1941 RTDECL(int) RTStrFormatTypeDeregister(const char *pszType); 1942 1943 /** 1944 * Sets the user argument for a type. 1945 * 1946 * This can be used if a user argument needs relocating in GC. 1947 * 1948 * @returns IPRT status code. 1949 * @retval VINF_SUCCESS on success. 1950 * @retval VERR_FILE_NOT_FOUND if not found. 1951 * 1952 * @param pszType The type to update. 1953 * @param pvUser The new user argument value. 1954 */ 1955 RTDECL(int) RTStrFormatTypeSetUser(const char *pszType, void *pvUser); 1956 1957 1958 /** 1959 * String printf. 1960 * 1961 * @returns The length of the returned string (in pszBuffer) excluding the 1962 * terminator. 1963 * @param pszBuffer Output buffer. 1964 * @param cchBuffer Size of the output buffer. 1965 * @param pszFormat Pointer to the format string, @see pg_rt_str_format. 1966 * @param args The format argument. 1967 */ 1968 RTDECL(size_t) RTStrPrintfV(char *pszBuffer, size_t cchBuffer, const char *pszFormat, va_list args) RT_IPRT_FORMAT_ATTR(3, 0); 1969 1970 /** 1971 * String printf. 1972 * 1973 * @returns The length of the returned string (in pszBuffer) excluding the 1974 * terminator. 1975 * @param pszBuffer Output buffer. 1976 * @param cchBuffer Size of the output buffer. 1977 * @param pszFormat Pointer to the format string, @see pg_rt_str_format. 1978 * @param ... The format argument. 1979 */ 1980 RTDECL(size_t) RTStrPrintf(char *pszBuffer, size_t cchBuffer, const char *pszFormat, ...) RT_IPRT_FORMAT_ATTR(3, 4); 1981 1982 1983 /** 1984 * String printf with custom formatting. 1985 * 1986 * @returns The length of the returned string (in pszBuffer) excluding the 1987 * terminator. 1988 * @param pfnFormat Pointer to handler function for the custom formats. 1989 * @param pvArg Argument to the pfnFormat function. 1990 * @param pszBuffer Output buffer. 1991 * @param cchBuffer Size of the output buffer. 1992 * @param pszFormat Pointer to the format string, @see pg_rt_str_format. 1993 * @param args The format argument. 1994 */ 1995 RTDECL(size_t) RTStrPrintfExV(PFNSTRFORMAT pfnFormat, void *pvArg, char *pszBuffer, size_t cchBuffer, 1996 const char *pszFormat, va_list args) RT_IPRT_FORMAT_ATTR(5, 0); 1997 1998 /** 1999 * String printf with custom formatting. 2000 * 2001 * @returns The length of the returned string (in pszBuffer) excluding the 2002 * terminator. 2003 * @param pfnFormat Pointer to handler function for the custom formats. 2004 * @param pvArg Argument to the pfnFormat function. 2005 * @param pszBuffer Output buffer. 2006 * @param cchBuffer Size of the output buffer. 2007 * @param pszFormat Pointer to the format string, @see pg_rt_str_format. 2008 * @param ... The format argument. 2009 */ 2010 RTDECL(size_t) RTStrPrintfEx(PFNSTRFORMAT pfnFormat, void *pvArg, char *pszBuffer, size_t cchBuffer, 2011 const char *pszFormat, ...) RT_IPRT_FORMAT_ATTR(5, 6); 2012 2013 2014 /** 2015 * Allocating string printf (default tag). 2016 * 2017 * @returns The length of the string in the returned *ppszBuffer excluding the 2018 * terminator. 2019 * @returns -1 on failure. 2020 * @param ppszBuffer Where to store the pointer to the allocated output buffer. 2021 * The buffer should be freed using RTStrFree(). 2022 * On failure *ppszBuffer will be set to NULL. 2023 * @param pszFormat Pointer to the format string, @see pg_rt_str_format. 2024 * @param args The format argument. 2025 */ 2026 #define RTStrAPrintfV(ppszBuffer, pszFormat, args) RTStrAPrintfVTag((ppszBuffer), (pszFormat), (args), RTSTR_TAG) 2027 2028 /** 2029 * Allocating string printf (custom tag). 2030 * 2031 * @returns The length of the string in the returned *ppszBuffer excluding the 2032 * terminator. 2033 * @returns -1 on failure. 2034 * @param ppszBuffer Where to store the pointer to the allocated output buffer. 2035 * The buffer should be freed using RTStrFree(). 2036 * On failure *ppszBuffer will be set to NULL. 2037 * @param pszFormat Pointer to the format string, @see pg_rt_str_format. 2038 * @param args The format argument. 2039 * @param pszTag Allocation tag used for statistics and such. 2040 */ 2041 RTDECL(int) RTStrAPrintfVTag(char **ppszBuffer, const char *pszFormat, va_list args, const char *pszTag) RT_IPRT_FORMAT_ATTR(2, 0); 2042 2043 /** 2044 * Allocating string printf. 2045 * 2046 * @returns The length of the string in the returned *ppszBuffer excluding the 2047 * terminator. 2048 * @returns -1 on failure. 2049 * @param ppszBuffer Where to store the pointer to the allocated output buffer. 2050 * The buffer should be freed using RTStrFree(). 2051 * On failure *ppszBuffer will be set to NULL. 2052 * @param pszFormat Pointer to the format string, @see pg_rt_str_format. 2053 * @param ... The format argument. 2054 */ 2055 DECLINLINE(int) RT_IPRT_FORMAT_ATTR(2, 3) RTStrAPrintf(char **ppszBuffer, const char *pszFormat, ...) 2056 { 2057 int cbRet; 2058 va_list va; 2059 va_start(va, pszFormat); 2060 cbRet = RTStrAPrintfVTag(ppszBuffer, pszFormat, va, RTSTR_TAG); 2061 va_end(va); 2062 return cbRet; 2063 } 2064 2065 /** 2066 * Allocating string printf (custom tag). 2067 * 2068 * @returns The length of the string in the returned *ppszBuffer excluding the 2069 * terminator. 2070 * @returns -1 on failure. 2071 * @param ppszBuffer Where to store the pointer to the allocated output buffer. 2072 * The buffer should be freed using RTStrFree(). 2073 * On failure *ppszBuffer will be set to NULL. 2074 * @param pszTag Allocation tag used for statistics and such. 2075 * @param pszFormat Pointer to the format string, @see pg_rt_str_format. 2076 * @param ... The format argument. 2077 */ 2078 DECLINLINE(int) RT_IPRT_FORMAT_ATTR(3, 4) RTStrAPrintfTag(char **ppszBuffer, const char *pszTag, const char *pszFormat, ...) 2079 { 2080 int cbRet; 2081 va_list va; 2082 va_start(va, pszFormat); 2083 cbRet = RTStrAPrintfVTag(ppszBuffer, pszFormat, va, pszTag); 2084 va_end(va); 2085 return cbRet; 2086 } 2087 2088 /** 2089 * Allocating string printf, version 2. 2090 * 2091 * @returns Formatted string. Use RTStrFree() to free it. NULL when out of 2092 * memory. 2093 * @param pszFormat Pointer to the format string, @see pg_rt_str_format. 2094 * @param args The format argument. 2095 */ 2096 #define RTStrAPrintf2V(pszFormat, args) RTStrAPrintf2VTag((pszFormat), (args), RTSTR_TAG) 2097 2098 /** 2099 * Allocating string printf, version 2. 2100 * 2101 * @returns Formatted string. Use RTStrFree() to free it. NULL when out of 2102 * memory. 2103 * @param pszFormat Pointer to the format string, @see pg_rt_str_format. 2104 * @param args The format argument. 2105 * @param pszTag Allocation tag used for statistics and such. 2106 */ 2107 RTDECL(char *) RTStrAPrintf2VTag(const char *pszFormat, va_list args, const char *pszTag) RT_IPRT_FORMAT_ATTR(1, 0); 2108 2109 /** 2110 * Allocating string printf, version 2 (default tag). 2111 * 2112 * @returns Formatted string. Use RTStrFree() to free it. NULL when out of 2113 * memory. 2114 * @param pszFormat Pointer to the format string, @see pg_rt_str_format. 2115 * @param ... The format argument. 2116 */ 2117 DECLINLINE(char *) RT_IPRT_FORMAT_ATTR(1, 2) RTStrAPrintf2(const char *pszFormat, ...) 2118 { 2119 char *pszRet; 2120 va_list va; 2121 va_start(va, pszFormat); 2122 pszRet = RTStrAPrintf2VTag(pszFormat, va, RTSTR_TAG); 2123 va_end(va); 2124 return pszRet; 2125 } 2126 2127 /** 2128 * Allocating string printf, version 2 (custom tag). 2129 * 2130 * @returns Formatted string. Use RTStrFree() to free it. NULL when out of 2131 * memory. 2132 * @param pszTag Allocation tag used for statistics and such. 2133 * @param pszFormat Pointer to the format string, @see pg_rt_str_format. 2134 * @param ... The format argument. 2135 */ 2136 DECLINLINE(char *) RT_IPRT_FORMAT_ATTR(2, 3) RTStrAPrintf2Tag(const char *pszTag, const char *pszFormat, ...) 2137 { 2138 char *pszRet; 2139 va_list va; 2140 va_start(va, pszFormat); 2141 pszRet = RTStrAPrintf2VTag(pszFormat, va, pszTag); 2142 va_end(va); 2143 return pszRet; 2144 } 2145 2146 /** 2147 * Strips blankspaces from both ends of the string. 2148 * 2149 * @returns Pointer to first non-blank char in the string. 2150 * @param psz The string to strip. 2151 */ 2152 RTDECL(char *) RTStrStrip(char *psz); 2153 2154 /** 2155 * Strips blankspaces from the start of the string. 2156 * 2157 * @returns Pointer to first non-blank char in the string. 2158 * @param psz The string to strip. 2159 */ 2160 RTDECL(char *) RTStrStripL(const char *psz); 2161 2162 /** 2163 * Strips blankspaces from the end of the string. 2164 * 2165 * @returns psz. 2166 * @param psz The string to strip. 2167 */ 2168 RTDECL(char *) RTStrStripR(char *psz); 2169 2170 /** 2171 * String copy with overflow handling. 2172 * 2173 * @retval VINF_SUCCESS on success. 2174 * @retval VERR_BUFFER_OVERFLOW if the destination buffer is too small. The 2175 * buffer will contain as much of the string as it can hold, fully 2176 * terminated. 2177 * 2178 * @param pszDst The destination buffer. 2179 * @param cbDst The size of the destination buffer (in bytes). 2180 * @param pszSrc The source string. NULL is not OK. 2181 */ 2182 RTDECL(int) RTStrCopy(char *pszDst, size_t cbDst, const char *pszSrc); 2183 2184 /** 2185 * String copy with overflow handling. 2186 * 2187 * @retval VINF_SUCCESS on success. 2188 * @retval VERR_BUFFER_OVERFLOW if the destination buffer is too small. The 2189 * buffer will contain as much of the string as it can hold, fully 2190 * terminated. 2191 * 2192 * @param pszDst The destination buffer. 2193 * @param cbDst The size of the destination buffer (in bytes). 2194 * @param pszSrc The source string. NULL is not OK. 2195 * @param cchSrcMax The maximum number of chars (not code points) to 2196 * copy from the source string, not counting the 2197 * terminator as usual. 2198 */ 2199 RTDECL(int) RTStrCopyEx(char *pszDst, size_t cbDst, const char *pszSrc, size_t cchSrcMax); 2200 2201 /** 2202 * String copy with overflow handling and buffer advancing. 2203 * 2204 * @retval VINF_SUCCESS on success. 2205 * @retval VERR_BUFFER_OVERFLOW if the destination buffer is too small. The 2206 * buffer will contain as much of the string as it can hold, fully 2207 * terminated. 2208 * 2209 * @param ppszDst Pointer to the destination buffer pointer. 2210 * This will be advanced to the end of the copied 2211 * bytes (points at the terminator). This is also 2212 * updated on overflow. 2213 * @param pcbDst Pointer to the destination buffer size 2214 * variable. This will be updated in accord with 2215 * the buffer pointer. 2216 * @param pszSrc The source string. NULL is not OK. 2217 */ 2218 RTDECL(int) RTStrCopyP(char **ppszDst, size_t *pcbDst, const char *pszSrc); 2219 2220 /** 2221 * String copy with overflow handling. 2222 * 2223 * @retval VINF_SUCCESS on success. 2224 * @retval VERR_BUFFER_OVERFLOW if the destination buffer is too small. The 2225 * buffer will contain as much of the string as it can hold, fully 2226 * terminated. 2227 * 2228 * @param ppszDst Pointer to the destination buffer pointer. 2229 * This will be advanced to the end of the copied 2230 * bytes (points at the terminator). This is also 2231 * updated on overflow. 2232 * @param pcbDst Pointer to the destination buffer size 2233 * variable. This will be updated in accord with 2234 * the buffer pointer. 2235 * @param pszSrc The source string. NULL is not OK. 2236 * @param cchSrcMax The maximum number of chars (not code points) to 2237 * copy from the source string, not counting the 2238 * terminator as usual. 2239 */ 2240 RTDECL(int) RTStrCopyPEx(char **ppszDst, size_t *pcbDst, const char *pszSrc, size_t cchSrcMax); 2241 2242 /** 2243 * String concatenation with overflow handling. 2244 * 2245 * @retval VINF_SUCCESS on success. 2246 * @retval VERR_BUFFER_OVERFLOW if the destination buffer is too small. The 2247 * buffer will contain as much of the string as it can hold, fully 2248 * terminated. 2249 * 2250 * @param pszDst The destination buffer. 2251 * @param cbDst The size of the destination buffer (in bytes). 2252 * @param pszSrc The source string. NULL is not OK. 2253 */ 2254 RTDECL(int) RTStrCat(char *pszDst, size_t cbDst, const char *pszSrc); 2255 2256 /** 2257 * String concatenation with overflow handling. 2258 * 2259 * @retval VINF_SUCCESS on success. 2260 * @retval VERR_BUFFER_OVERFLOW if the destination buffer is too small. The 2261 * buffer will contain as much of the string as it can hold, fully 2262 * terminated. 2263 * 2264 * @param pszDst The destination buffer. 2265 * @param cbDst The size of the destination buffer (in bytes). 2266 * @param pszSrc The source string. NULL is not OK. 2267 * @param cchSrcMax The maximum number of chars (not code points) to 2268 * copy from the source string, not counting the 2269 * terminator as usual. 2270 */ 2271 RTDECL(int) RTStrCatEx(char *pszDst, size_t cbDst, const char *pszSrc, size_t cchSrcMax); 2272 2273 /** 2274 * String concatenation with overflow handling. 2275 * 2276 * @retval VINF_SUCCESS on success. 2277 * @retval VERR_BUFFER_OVERFLOW if the destination buffer is too small. The 2278 * buffer will contain as much of the string as it can hold, fully 2279 * terminated. 2280 * 2281 * @param ppszDst Pointer to the destination buffer pointer. 2282 * This will be advanced to the end of the copied 2283 * bytes (points at the terminator). This is also 2284 * updated on overflow. 2285 * @param pcbDst Pointer to the destination buffer size 2286 * variable. This will be updated in accord with 2287 * the buffer pointer. 2288 * @param pszSrc The source string. NULL is not OK. 2289 */ 2290 RTDECL(int) RTStrCatP(char **ppszDst, size_t *pcbDst, const char *pszSrc); 2291 2292 /** 2293 * String concatenation with overflow handling and buffer advancing. 2294 * 2295 * @retval VINF_SUCCESS on success. 2296 * @retval VERR_BUFFER_OVERFLOW if the destination buffer is too small. The 2297 * buffer will contain as much of the string as it can hold, fully 2298 * terminated. 2299 * 2300 * @param ppszDst Pointer to the destination buffer pointer. 2301 * This will be advanced to the end of the copied 2302 * bytes (points at the terminator). This is also 2303 * updated on overflow. 2304 * @param pcbDst Pointer to the destination buffer size 2305 * variable. This will be updated in accord with 2306 * the buffer pointer. 2307 * @param pszSrc The source string. NULL is not OK. 2308 * @param cchSrcMax The maximum number of chars (not code points) to 2309 * copy from the source string, not counting the 2310 * terminator as usual. 2311 */ 2312 RTDECL(int) RTStrCatPEx(char **ppszDst, size_t *pcbDst, const char *pszSrc, size_t cchSrcMax); 2313 2314 /** 2315 * Performs a case sensitive string compare between two UTF-8 strings. 2316 * 2317 * Encoding errors are ignored by the current implementation. So, the only 2318 * difference between this and the CRT strcmp function is the handling of 2319 * NULL arguments. 2320 * 2321 * @returns < 0 if the first string less than the second string. 2322 * @returns 0 if the first string identical to the second string. 2323 * @returns > 0 if the first string greater than the second string. 2324 * @param psz1 First UTF-8 string. Null is allowed. 2325 * @param psz2 Second UTF-8 string. Null is allowed. 2326 */ 2327 RTDECL(int) RTStrCmp(const char *psz1, const char *psz2); 2328 2329 /** 2330 * Performs a case sensitive string compare between two UTF-8 strings, given 2331 * a maximum string length. 2332 * 2333 * Encoding errors are ignored by the current implementation. So, the only 2334 * difference between this and the CRT strncmp function is the handling of 2335 * NULL arguments. 2336 * 2337 * @returns < 0 if the first string less than the second string. 2338 * @returns 0 if the first string identical to the second string. 2339 * @returns > 0 if the first string greater than the second string. 2340 * @param psz1 First UTF-8 string. Null is allowed. 2341 * @param psz2 Second UTF-8 string. Null is allowed. 2342 * @param cchMax The maximum string length 2343 */ 2344 RTDECL(int) RTStrNCmp(const char *psz1, const char *psz2, size_t cchMax); 2345 2346 /** 2347 * Performs a case insensitive string compare between two UTF-8 strings. 2348 * 2349 * This is a simplified compare, as only the simplified lower/upper case folding 2350 * specified by the unicode specs are used. It does not consider character pairs 2351 * as they are used in some languages, just simple upper & lower case compares. 2352 * 2353 * The result is the difference between the mismatching codepoints after they 2354 * both have been lower cased. 2355 * 2356 * If the string encoding is invalid the function will assert (strict builds) 2357 * and use RTStrCmp for the remainder of the string. 2358 * 2359 * @returns < 0 if the first string less than the second string. 2360 * @returns 0 if the first string identical to the second string. 2361 * @returns > 0 if the first string greater than the second string. 2362 * @param psz1 First UTF-8 string. Null is allowed. 2363 * @param psz2 Second UTF-8 string. Null is allowed. 2364 */ 2365 RTDECL(int) RTStrICmp(const char *psz1, const char *psz2); 2366 2367 /** 2368 * Performs a case insensitive string compare between two UTF-8 strings, given a 2369 * maximum string length. 2370 * 2371 * This is a simplified compare, as only the simplified lower/upper case folding 2372 * specified by the unicode specs are used. It does not consider character pairs 2373 * as they are used in some languages, just simple upper & lower case compares. 2374 * 2375 * The result is the difference between the mismatching codepoints after they 2376 * both have been lower cased. 2377 * 2378 * If the string encoding is invalid the function will assert (strict builds) 2379 * and use RTStrCmp for the remainder of the string. 2380 * 2381 * @returns < 0 if the first string less than the second string. 2382 * @returns 0 if the first string identical to the second string. 2383 * @returns > 0 if the first string greater than the second string. 2384 * @param psz1 First UTF-8 string. Null is allowed. 2385 * @param psz2 Second UTF-8 string. Null is allowed. 2386 * @param cchMax Maximum string length 2387 */ 2388 RTDECL(int) RTStrNICmp(const char *psz1, const char *psz2, size_t cchMax); 2389 2390 /** 2391 * Locates a case sensitive substring. 2392 * 2393 * If any of the two strings are NULL, then NULL is returned. If the needle is 2394 * an empty string, then the haystack is returned (i.e. matches anything). 2395 * 2396 * @returns Pointer to the first occurrence of the substring if found, NULL if 2397 * not. 2398 * 2399 * @param pszHaystack The string to search. 2400 * @param pszNeedle The substring to search for. 2401 * 2402 * @remarks The difference between this and strstr is the handling of NULL 2403 * pointers. 2404 */ 2405 RTDECL(char *) RTStrStr(const char *pszHaystack, const char *pszNeedle); 2406 2407 /** 2408 * Locates a case insensitive substring. 2409 * 2410 * If any of the two strings are NULL, then NULL is returned. If the needle is 2411 * an empty string, then the haystack is returned (i.e. matches anything). 2412 * 2413 * @returns Pointer to the first occurrence of the substring if found, NULL if 2414 * not. 2415 * 2416 * @param pszHaystack The string to search. 2417 * @param pszNeedle The substring to search for. 2418 * 2419 */ 2420 RTDECL(char *) RTStrIStr(const char *pszHaystack, const char *pszNeedle); 2421 2422 /** 2423 * Converts the string to lower case. 2424 * 2425 * @returns Pointer to the converted string. 2426 * @param psz The string to convert. 2427 */ 2428 RTDECL(char *) RTStrToLower(char *psz); 2429 2430 /** 2431 * Converts the string to upper case. 2432 * 2433 * @returns Pointer to the converted string. 2434 * @param psz The string to convert. 2435 */ 2436 RTDECL(char *) RTStrToUpper(char *psz); 2437 2438 /** 2439 * Checks if the string is case foldable, i.e. whether it would change if 2440 * subject to RTStrToLower or RTStrToUpper. 2441 * 2442 * @returns true / false 2443 * @param psz The string in question. 2444 */ 2445 RTDECL(bool) RTStrIsCaseFoldable(const char *psz); 2446 2447 /** 2448 * Checks if the string is upper cased (no lower case chars in it). 2449 * 2450 * @returns true / false 2451 * @param psz The string in question. 2452 */ 2453 RTDECL(bool) RTStrIsUpperCased(const char *psz); 2454 2455 /** 2456 * Checks if the string is lower cased (no upper case chars in it). 2457 * 2458 * @returns true / false 2459 * @param psz The string in question. 2460 */ 2461 RTDECL(bool) RTStrIsLowerCased(const char *psz); 2462 2463 /** 2464 * Find the length of a zero-terminated byte string, given 2465 * a max string length. 2466 * 2467 * See also RTStrNLenEx. 2468 * 2469 * @returns The string length or cbMax. The returned length does not include 2470 * the zero terminator if it was found. 2471 * 2472 * @param pszString The string. 2473 * @param cchMax The max string length. 2474 */ 2475 RTDECL(size_t) RTStrNLen(const char *pszString, size_t cchMax); 2476 2477 /** 2478 * Find the length of a zero-terminated byte string, given 2479 * a max string length. 2480 * 2481 * See also RTStrNLen. 2482 * 2483 * @returns IPRT status code. 2484 * @retval VINF_SUCCESS if the string has a length less than cchMax. 2485 * @retval VERR_BUFFER_OVERFLOW if the end of the string wasn't found 2486 * before cchMax was reached. 2487 * 2488 * @param pszString The string. 2489 * @param cchMax The max string length. 2490 * @param pcch Where to store the string length excluding the 2491 * terminator. This is set to cchMax if the terminator 2492 * isn't found. 2493 */ 2494 RTDECL(int) RTStrNLenEx(const char *pszString, size_t cchMax, size_t *pcch); 2495 2496 RT_C_DECLS_END 2497 2498 /** The maximum size argument of a memchr call. */ 2499 #define RTSTR_MEMCHR_MAX ((~(size_t)0 >> 1) - 15) 2500 2501 /** 2502 * Find the zero terminator in a string with a limited length. 2503 * 2504 * @returns Pointer to the zero terminator. 2505 * @returns NULL if the zero terminator was not found. 2506 * 2507 * @param pszString The string. 2508 * @param cchMax The max string length. RTSTR_MAX is fine. 2509 */ 2510 #if defined(__cplusplus) && !defined(DOXYGEN_RUNNING) 2511 DECLINLINE(char const *) RTStrEnd(char const *pszString, size_t cchMax) 2512 { 2513 /* Avoid potential issues with memchr seen in glibc. 2514 * See sysdeps/x86_64/memchr.S in glibc versions older than 2.11 */ 2515 while (cchMax > RTSTR_MEMCHR_MAX) 2516 { 2517 char const *pszRet = (char const *)memchr(pszString, '\0', RTSTR_MEMCHR_MAX); 2518 if (RT_LIKELY(pszRet)) 2519 return pszRet; 2520 pszString += RTSTR_MEMCHR_MAX; 2521 cchMax -= RTSTR_MEMCHR_MAX; 2522 } 2523 return (char const *)memchr(pszString, '\0', cchMax); 2524 } 2525 2526 DECLINLINE(char *) RTStrEnd(char *pszString, size_t cchMax) 2527 #else 2528 DECLINLINE(char *) RTStrEnd(const char *pszString, size_t cchMax) 2529 #endif 2530 { 2531 /* Avoid potential issues with memchr seen in glibc. 2532 * See sysdeps/x86_64/memchr.S in glibc versions older than 2.11 */ 2533 while (cchMax > RTSTR_MEMCHR_MAX) 2534 { 2535 char *pszRet = (char *)memchr(pszString, '\0', RTSTR_MEMCHR_MAX); 2536 if (RT_LIKELY(pszRet)) 2537 return pszRet; 2538 pszString += RTSTR_MEMCHR_MAX; 2539 cchMax -= RTSTR_MEMCHR_MAX; 2540 } 2541 return (char *)memchr(pszString, '\0', cchMax); 2542 } 2543 2544 RT_C_DECLS_BEGIN 2545 2546 /** 2547 * Finds the offset at which a simple character first occurs in a string. 2548 * 2549 * @returns The offset of the first occurence or the terminator offset. 2550 * @param pszHaystack The string to search. 2551 * @param chNeedle The character to search for. 2552 */ 2553 DECLINLINE(size_t) RTStrOffCharOrTerm(const char *pszHaystack, char chNeedle) 2554 { 2555 const char *psz = pszHaystack; 2556 char ch; 2557 while ( (ch = *psz) != chNeedle 2558 && ch != '\0') 2559 psz++; 2560 return psz - pszHaystack; 2561 } 2562 2563 2564 /** 2565 * Matches a simple string pattern. 2566 * 2567 * @returns true if the string matches the pattern, otherwise false. 2568 * 2569 * @param pszPattern The pattern. Special chars are '*' and '?', where the 2570 * asterisk matches zero or more characters and question 2571 * mark matches exactly one character. 2572 * @param pszString The string to match against the pattern. 2573 */ 2574 RTDECL(bool) RTStrSimplePatternMatch(const char *pszPattern, const char *pszString); 2575 2576 /** 2577 * Matches a simple string pattern, neither which needs to be zero terminated. 2578 * 2579 * This is identical to RTStrSimplePatternMatch except that you can optionally 2580 * specify the length of both the pattern and the string. The function will 2581 * stop when it hits a string terminator or either of the lengths. 2582 * 2583 * @returns true if the string matches the pattern, otherwise false. 2584 * 2585 * @param pszPattern The pattern. Special chars are '*' and '?', where the 2586 * asterisk matches zero or more characters and question 2587 * mark matches exactly one character. 2588 * @param cchPattern The pattern length. Pass RTSTR_MAX if you don't know the 2589 * length and wish to stop at the string terminator. 2590 * @param pszString The string to match against the pattern. 2591 * @param cchString The string length. Pass RTSTR_MAX if you don't know the 2592 * length and wish to match up to the string terminator. 2593 */ 2594 RTDECL(bool) RTStrSimplePatternNMatch(const char *pszPattern, size_t cchPattern, 2595 const char *pszString, size_t cchString); 2596 2597 /** 2598 * Matches multiple patterns against a string. 2599 * 2600 * The patterns are separated by the pipe character (|). 2601 * 2602 * @returns true if the string matches the pattern, otherwise false. 2603 * 2604 * @param pszPatterns The patterns. 2605 * @param cchPatterns The lengths of the patterns to use. Pass RTSTR_MAX to 2606 * stop at the terminator. 2607 * @param pszString The string to match against the pattern. 2608 * @param cchString The string length. Pass RTSTR_MAX stop stop at the 2609 * terminator. 2610 * @param poffPattern Offset into the patterns string of the patttern that 2611 * matched. If no match, this will be set to RTSTR_MAX. 2612 * This is optional, NULL is fine. 2613 */ 2614 RTDECL(bool) RTStrSimplePatternMultiMatch(const char *pszPatterns, size_t cchPatterns, 2615 const char *pszString, size_t cchString, 2616 size_t *poffPattern); 2617 2618 /** 2619 * Compares two version strings RTStrICmp fashion. 2620 * 2621 * The version string is split up into sections at punctuation, spaces, 2622 * underscores, dashes and plus signs. The sections are then split up into 2623 * numeric and string sub-sections. Finally, the sub-sections are compared 2624 * in a numeric or case insesntivie fashion depending on what they are. 2625 * 2626 * The following strings are considered to be equal: "1.0.0", "1.00.0", "1.0", 2627 * "1". These aren't: "1.0.0r993", "1.0", "1.0r993", "1.0_Beta3", "1.1" 2628 * 2629 * @returns < 0 if the first string less than the second string. 2630 * @returns 0 if the first string identical to the second string. 2631 * @returns > 0 if the first string greater than the second string. 2632 * 2633 * @param pszVer1 First version string to compare. 2634 * @param pszVer2 Second version string to compare first version with. 2635 */ 2636 RTDECL(int) RTStrVersionCompare(const char *pszVer1, const char *pszVer2); 2637 2638 2639 /** @defgroup rt_str_conv String To/From Number Conversions 2640 * @{ */ 2641 2642 /** 2643 * Converts a string representation of a number to a 64-bit unsigned number. 2644 * 2645 * @returns iprt status code. 2646 * Warnings are used to indicate conversion problems. 2647 * @retval VWRN_NUMBER_TOO_BIG 2648 * @retval VWRN_NEGATIVE_UNSIGNED 2649 * @retval VWRN_TRAILING_CHARS 2650 * @retval VWRN_TRAILING_SPACES 2651 * @retval VINF_SUCCESS 2652 * @retval VERR_NO_DIGITS 2653 * 2654 * @param pszValue Pointer to the string value. 2655 * @param ppszNext Where to store the pointer to the first char following the number. (Optional) 2656 * @param uBase The base of the representation used. 2657 * If 0 the function will look for known prefixes before defaulting to 10. 2658 * @param pu64 Where to store the converted number. (optional) 2659 */ 2660 RTDECL(int) RTStrToUInt64Ex(const char *pszValue, char **ppszNext, unsigned uBase, uint64_t *pu64); 2661 2662 /** 2663 * Converts a string representation of a number to a 64-bit unsigned number, 2664 * making sure the full string is converted. 2665 * 2666 * @returns iprt status code. 2667 * Warnings are used to indicate conversion problems. 2668 * @retval VWRN_NUMBER_TOO_BIG 2669 * @retval VWRN_NEGATIVE_UNSIGNED 2670 * @retval VINF_SUCCESS 2671 * @retval VERR_NO_DIGITS 2672 * @retval VERR_TRAILING_SPACES 2673 * @retval VERR_TRAILING_CHARS 2674 * 2675 * @param pszValue Pointer to the string value. 2676 * @param uBase The base of the representation used. 2677 * If 0 the function will look for known prefixes before defaulting to 10. 2678 * @param pu64 Where to store the converted number. (optional) 2679 */ 2680 RTDECL(int) RTStrToUInt64Full(const char *pszValue, unsigned uBase, uint64_t *pu64); 2681 2682 /** 2683 * Converts a string representation of a number to a 64-bit unsigned number. 2684 * The base is guessed. 2685 * 2686 * @returns 64-bit unsigned number on success. 2687 * @returns 0 on failure. 2688 * @param pszValue Pointer to the string value. 2689 */ 2690 RTDECL(uint64_t) RTStrToUInt64(const char *pszValue); 2691 2692 /** 2693 * Converts a string representation of a number to a 32-bit unsigned number. 2694 * 2695 * @returns iprt status code. 2696 * Warnings are used to indicate conversion problems. 2697 * @retval VWRN_NUMBER_TOO_BIG 2698 * @retval VWRN_NEGATIVE_UNSIGNED 2699 * @retval VWRN_TRAILING_CHARS 2700 * @retval VWRN_TRAILING_SPACES 2701 * @retval VINF_SUCCESS 2702 * @retval VERR_NO_DIGITS 2703 * 2704 * @param pszValue Pointer to the string value. 2705 * @param ppszNext Where to store the pointer to the first char following the number. (Optional) 2706 * @param uBase The base of the representation used. 2707 * If 0 the function will look for known prefixes before defaulting to 10. 2708 * @param pu32 Where to store the converted number. (optional) 2709 */ 2710 RTDECL(int) RTStrToUInt32Ex(const char *pszValue, char **ppszNext, unsigned uBase, uint32_t *pu32); 2711 2712 /** 2713 * Converts a string representation of a number to a 32-bit unsigned number, 2714 * making sure the full string is converted. 2715 * 2716 * @returns iprt status code. 2717 * Warnings are used to indicate conversion problems. 2718 * @retval VWRN_NUMBER_TOO_BIG 2719 * @retval VWRN_NEGATIVE_UNSIGNED 2720 * @retval VINF_SUCCESS 2721 * @retval VERR_NO_DIGITS 2722 * @retval VERR_TRAILING_SPACES 2723 * @retval VERR_TRAILING_CHARS 2724 * 2725 * @param pszValue Pointer to the string value. 2726 * @param uBase The base of the representation used. 2727 * If 0 the function will look for known prefixes before defaulting to 10. 2728 * @param pu32 Where to store the converted number. (optional) 2729 */ 2730 RTDECL(int) RTStrToUInt32Full(const char *pszValue, unsigned uBase, uint32_t *pu32); 2731 2732 /** 2733 * Converts a string representation of a number to a 64-bit unsigned number. 2734 * The base is guessed. 2735 * 2736 * @returns 32-bit unsigned number on success. 2737 * @returns 0 on failure. 2738 * @param pszValue Pointer to the string value. 2739 */ 2740 RTDECL(uint32_t) RTStrToUInt32(const char *pszValue); 2741 2742 /** 2743 * Converts a string representation of a number to a 16-bit unsigned number. 2744 * 2745 * @returns iprt status code. 2746 * Warnings are used to indicate conversion problems. 2747 * @retval VWRN_NUMBER_TOO_BIG 2748 * @retval VWRN_NEGATIVE_UNSIGNED 2749 * @retval VWRN_TRAILING_CHARS 2750 * @retval VWRN_TRAILING_SPACES 2751 * @retval VINF_SUCCESS 2752 * @retval VERR_NO_DIGITS 2753 * 2754 * @param pszValue Pointer to the string value. 2755 * @param ppszNext Where to store the pointer to the first char following the number. (Optional) 2756 * @param uBase The base of the representation used. 2757 * If 0 the function will look for known prefixes before defaulting to 10. 2758 * @param pu16 Where to store the converted number. (optional) 2759 */ 2760 RTDECL(int) RTStrToUInt16Ex(const char *pszValue, char **ppszNext, unsigned uBase, uint16_t *pu16); 2761 2762 /** 2763 * Converts a string representation of a number to a 16-bit unsigned number, 2764 * making sure the full string is converted. 2765 * 2766 * @returns iprt status code. 2767 * Warnings are used to indicate conversion problems. 2768 * @retval VWRN_NUMBER_TOO_BIG 2769 * @retval VWRN_NEGATIVE_UNSIGNED 2770 * @retval VINF_SUCCESS 2771 * @retval VERR_NO_DIGITS 2772 * @retval VERR_TRAILING_SPACES 2773 * @retval VERR_TRAILING_CHARS 2774 * 2775 * @param pszValue Pointer to the string value. 2776 * @param uBase The base of the representation used. 2777 * If 0 the function will look for known prefixes before defaulting to 10. 2778 * @param pu16 Where to store the converted number. (optional) 2779 */ 2780 RTDECL(int) RTStrToUInt16Full(const char *pszValue, unsigned uBase, uint16_t *pu16); 2781 2782 /** 2783 * Converts a string representation of a number to a 16-bit unsigned number. 2784 * The base is guessed. 2785 * 2786 * @returns 16-bit unsigned number on success. 2787 * @returns 0 on failure. 2788 * @param pszValue Pointer to the string value. 2789 */ 2790 RTDECL(uint16_t) RTStrToUInt16(const char *pszValue); 2791 2792 /** 2793 * Converts a string representation of a number to a 8-bit unsigned number. 2794 * 2795 * @returns iprt status code. 2796 * Warnings are used to indicate conversion problems. 2797 * @retval VWRN_NUMBER_TOO_BIG 2798 * @retval VWRN_NEGATIVE_UNSIGNED 2799 * @retval VWRN_TRAILING_CHARS 2800 * @retval VWRN_TRAILING_SPACES 2801 * @retval VINF_SUCCESS 2802 * @retval VERR_NO_DIGITS 2803 * 2804 * @param pszValue Pointer to the string value. 2805 * @param ppszNext Where to store the pointer to the first char following the number. (Optional) 2806 * @param uBase The base of the representation used. 2807 * If 0 the function will look for known prefixes before defaulting to 10. 2808 * @param pu8 Where to store the converted number. (optional) 2809 */ 2810 RTDECL(int) RTStrToUInt8Ex(const char *pszValue, char **ppszNext, unsigned uBase, uint8_t *pu8); 2811 2812 /** 2813 * Converts a string representation of a number to a 8-bit unsigned number, 2814 * making sure the full string is converted. 2815 * 2816 * @returns iprt status code. 2817 * Warnings are used to indicate conversion problems. 2818 * @retval VWRN_NUMBER_TOO_BIG 2819 * @retval VWRN_NEGATIVE_UNSIGNED 2820 * @retval VINF_SUCCESS 2821 * @retval VERR_NO_DIGITS 2822 * @retval VERR_TRAILING_SPACES 2823 * @retval VERR_TRAILING_CHARS 2824 * 2825 * @param pszValue Pointer to the string value. 2826 * @param uBase The base of the representation used. 2827 * If 0 the function will look for known prefixes before defaulting to 10. 2828 * @param pu8 Where to store the converted number. (optional) 2829 */ 2830 RTDECL(int) RTStrToUInt8Full(const char *pszValue, unsigned uBase, uint8_t *pu8); 2831 2832 /** 2833 * Converts a string representation of a number to a 8-bit unsigned number. 2834 * The base is guessed. 2835 * 2836 * @returns 8-bit unsigned number on success. 2837 * @returns 0 on failure. 2838 * @param pszValue Pointer to the string value. 2839 */ 2840 RTDECL(uint8_t) RTStrToUInt8(const char *pszValue); 2841 2842 /** 2843 * Converts a string representation of a number to a 64-bit signed number. 2844 * 2845 * @returns iprt status code. 2846 * Warnings are used to indicate conversion problems. 2847 * @retval VWRN_NUMBER_TOO_BIG 2848 * @retval VWRN_TRAILING_CHARS 2849 * @retval VWRN_TRAILING_SPACES 2850 * @retval VINF_SUCCESS 2851 * @retval VERR_NO_DIGITS 2852 * 2853 * @param pszValue Pointer to the string value. 2854 * @param ppszNext Where to store the pointer to the first char following the number. (Optional) 2855 * @param uBase The base of the representation used. 2856 * If 0 the function will look for known prefixes before defaulting to 10. 2857 * @param pi64 Where to store the converted number. (optional) 2858 */ 2859 RTDECL(int) RTStrToInt64Ex(const char *pszValue, char **ppszNext, unsigned uBase, int64_t *pi64); 2860 2861 /** 2862 * Converts a string representation of a number to a 64-bit signed number, 2863 * making sure the full string is converted. 2864 * 2865 * @returns iprt status code. 2866 * Warnings are used to indicate conversion problems. 2867 * @retval VWRN_NUMBER_TOO_BIG 2868 * @retval VINF_SUCCESS 2869 * @retval VERR_TRAILING_CHARS 2870 * @retval VERR_TRAILING_SPACES 2871 * @retval VERR_NO_DIGITS 2872 * 2873 * @param pszValue Pointer to the string value. 2874 * @param uBase The base of the representation used. 2875 * If 0 the function will look for known prefixes before defaulting to 10. 2876 * @param pi64 Where to store the converted number. (optional) 2877 */ 2878 RTDECL(int) RTStrToInt64Full(const char *pszValue, unsigned uBase, int64_t *pi64); 2879 2880 /** 2881 * Converts a string representation of a number to a 64-bit signed number. 2882 * The base is guessed. 2883 * 2884 * @returns 64-bit signed number on success. 2885 * @returns 0 on failure. 2886 * @param pszValue Pointer to the string value. 2887 */ 2888 RTDECL(int64_t) RTStrToInt64(const char *pszValue); 2889 2890 /** 2891 * Converts a string representation of a number to a 32-bit signed number. 2892 * 2893 * @returns iprt status code. 2894 * Warnings are used to indicate conversion problems. 2895 * @retval VWRN_NUMBER_TOO_BIG 2896 * @retval VWRN_TRAILING_CHARS 2897 * @retval VWRN_TRAILING_SPACES 2898 * @retval VINF_SUCCESS 2899 * @retval VERR_NO_DIGITS 2900 * 2901 * @param pszValue Pointer to the string value. 2902 * @param ppszNext Where to store the pointer to the first char following the number. (Optional) 2903 * @param uBase The base of the representation used. 2904 * If 0 the function will look for known prefixes before defaulting to 10. 2905 * @param pi32 Where to store the converted number. (optional) 2906 */ 2907 RTDECL(int) RTStrToInt32Ex(const char *pszValue, char **ppszNext, unsigned uBase, int32_t *pi32); 2908 2909 /** 2910 * Converts a string representation of a number to a 32-bit signed number, 2911 * making sure the full string is converted. 2912 * 2913 * @returns iprt status code. 2914 * Warnings are used to indicate conversion problems. 2915 * @retval VWRN_NUMBER_TOO_BIG 2916 * @retval VINF_SUCCESS 2917 * @retval VERR_TRAILING_CHARS 2918 * @retval VERR_TRAILING_SPACES 2919 * @retval VERR_NO_DIGITS 2920 * 2921 * @param pszValue Pointer to the string value. 2922 * @param uBase The base of the representation used. 2923 * If 0 the function will look for known prefixes before defaulting to 10. 2924 * @param pi32 Where to store the converted number. (optional) 2925 */ 2926 RTDECL(int) RTStrToInt32Full(const char *pszValue, unsigned uBase, int32_t *pi32); 2927 2928 /** 2929 * Converts a string representation of a number to a 32-bit signed number. 2930 * The base is guessed. 2931 * 2932 * @returns 32-bit signed number on success. 2933 * @returns 0 on failure. 2934 * @param pszValue Pointer to the string value. 2935 */ 2936 RTDECL(int32_t) RTStrToInt32(const char *pszValue); 2937 2938 /** 2939 * Converts a string representation of a number to a 16-bit signed number. 2940 * 2941 * @returns iprt status code. 2942 * Warnings are used to indicate conversion problems. 2943 * @retval VWRN_NUMBER_TOO_BIG 2944 * @retval VWRN_TRAILING_CHARS 2945 * @retval VWRN_TRAILING_SPACES 2946 * @retval VINF_SUCCESS 2947 * @retval VERR_NO_DIGITS 2948 * 2949 * @param pszValue Pointer to the string value. 2950 * @param ppszNext Where to store the pointer to the first char following the number. (Optional) 2951 * @param uBase The base of the representation used. 2952 * If 0 the function will look for known prefixes before defaulting to 10. 2953 * @param pi16 Where to store the converted number. (optional) 2954 */ 2955 RTDECL(int) RTStrToInt16Ex(const char *pszValue, char **ppszNext, unsigned uBase, int16_t *pi16); 2956 2957 /** 2958 * Converts a string representation of a number to a 16-bit signed number, 2959 * making sure the full string is converted. 2960 * 2961 * @returns iprt status code. 2962 * Warnings are used to indicate conversion problems. 2963 * @retval VWRN_NUMBER_TOO_BIG 2964 * @retval VINF_SUCCESS 2965 * @retval VERR_TRAILING_CHARS 2966 * @retval VERR_TRAILING_SPACES 2967 * @retval VERR_NO_DIGITS 2968 * 2969 * @param pszValue Pointer to the string value. 2970 * @param uBase The base of the representation used. 2971 * If 0 the function will look for known prefixes before defaulting to 10. 2972 * @param pi16 Where to store the converted number. (optional) 2973 */ 2974 RTDECL(int) RTStrToInt16Full(const char *pszValue, unsigned uBase, int16_t *pi16); 2975 2976 /** 2977 * Converts a string representation of a number to a 16-bit signed number. 2978 * The base is guessed. 2979 * 2980 * @returns 16-bit signed number on success. 2981 * @returns 0 on failure. 2982 * @param pszValue Pointer to the string value. 2983 */ 2984 RTDECL(int16_t) RTStrToInt16(const char *pszValue); 2985 2986 /** 2987 * Converts a string representation of a number to a 8-bit signed number. 2988 * 2989 * @returns iprt status code. 2990 * Warnings are used to indicate conversion problems. 2991 * @retval VWRN_NUMBER_TOO_BIG 2992 * @retval VWRN_TRAILING_CHARS 2993 * @retval VWRN_TRAILING_SPACES 2994 * @retval VINF_SUCCESS 2995 * @retval VERR_NO_DIGITS 2996 * 2997 * @param pszValue Pointer to the string value. 2998 * @param ppszNext Where to store the pointer to the first char following the number. (Optional) 2999 * @param uBase The base of the representation used. 3000 * If 0 the function will look for known prefixes before defaulting to 10. 3001 * @param pi8 Where to store the converted number. (optional) 3002 */ 3003 RTDECL(int) RTStrToInt8Ex(const char *pszValue, char **ppszNext, unsigned uBase, int8_t *pi8); 3004 3005 /** 3006 * Converts a string representation of a number to a 8-bit signed number, 3007 * making sure the full string is converted. 3008 * 3009 * @returns iprt status code. 3010 * Warnings are used to indicate conversion problems. 3011 * @retval VWRN_NUMBER_TOO_BIG 3012 * @retval VINF_SUCCESS 3013 * @retval VERR_TRAILING_CHARS 3014 * @retval VERR_TRAILING_SPACES 3015 * @retval VERR_NO_DIGITS 3016 * 3017 * @param pszValue Pointer to the string value. 3018 * @param uBase The base of the representation used. 3019 * If 0 the function will look for known prefixes before defaulting to 10. 3020 * @param pi8 Where to store the converted number. (optional) 3021 */ 3022 RTDECL(int) RTStrToInt8Full(const char *pszValue, unsigned uBase, int8_t *pi8); 3023 3024 /** 3025 * Converts a string representation of a number to a 8-bit signed number. 3026 * The base is guessed. 3027 * 3028 * @returns 8-bit signed number on success. 3029 * @returns 0 on failure. 3030 * @param pszValue Pointer to the string value. 3031 */ 3032 RTDECL(int8_t) RTStrToInt8(const char *pszValue); 3033 3034 /** 3035 * Formats a buffer stream as hex bytes. 3036 * 3037 * The default is no separating spaces or line breaks or anything. 3038 * 3039 * @returns IPRT status code. 3040 * @retval VERR_INVALID_POINTER if any of the pointers are wrong. 3041 * @retval VERR_BUFFER_OVERFLOW if the buffer is insufficent to hold the bytes. 3042 * 3043 * @param pszBuf Output string buffer. 3044 * @param cchBuf The size of the output buffer. 3045 * @param pv Pointer to the bytes to stringify. 3046 * @param cb The number of bytes to stringify. 3047 * @param fFlags Combination of RTSTRPRINTHEXBYTES_F_XXX values. 3048 * @sa RTUtf16PrintHexBytes. 3049 */ 3050 RTDECL(int) RTStrPrintHexBytes(char *pszBuf, size_t cchBuf, void const *pv, size_t cb, uint32_t fFlags); 3051 /** @name RTSTRPRINTHEXBYTES_F_XXX - flags for RTStrPrintHexBytes and RTUtf16PritnHexBytes. 3052 * @{ */ 3053 /** Upper case hex digits, the default is lower case. */ 3054 #define RTSTRPRINTHEXBYTES_F_UPPER RT_BIT(0) 3055 /** @} */ 3056 3057 /** 3058 * Converts a string of hex bytes back into binary data. 3059 * 3060 * @returns IPRT status code. 3061 * @retval VERR_INVALID_POINTER if any of the pointers are wrong. 3062 * @retval VERR_BUFFER_OVERFLOW if the string contains too many hex bytes. 3063 * @retval VERR_BUFFER_UNDERFLOW if there aren't enough hex bytes to fill up 3064 * the output buffer. 3065 * @retval VERR_UNEVEN_INPUT if the input contains a half byte. 3066 * @retval VERR_NO_DIGITS 3067 * @retval VWRN_TRAILING_CHARS 3068 * @retval VWRN_TRAILING_SPACES 3069 * 3070 * @param pszHex The string containing the hex bytes. 3071 * @param pv Output buffer. 3072 * @param cb The size of the output buffer. 3073 * @param fFlags Must be zero, reserved for future use. 3074 */ 3075 RTDECL(int) RTStrConvertHexBytes(char const *pszHex, void *pv, size_t cb, uint32_t fFlags); 3076 3077 /** @} */ 3078 3079 3080 /** @defgroup rt_str_space Unique String Space 3081 * @{ 3082 */ 3083 3084 /** Pointer to a string name space container node core. */ 3085 typedef struct RTSTRSPACECORE *PRTSTRSPACECORE; 3086 /** Pointer to a pointer to a string name space container node core. */ 3087 typedef PRTSTRSPACECORE *PPRTSTRSPACECORE; 3088 3089 /** 3090 * String name space container node core. 3091 */ 3092 typedef struct RTSTRSPACECORE 3093 { 3094 /** Hash key. Don't touch. */ 3095 uint32_t Key; 3096 /** Pointer to the left leaf node. Don't touch. */ 3097 PRTSTRSPACECORE pLeft; 3098 /** Pointer to the left right node. Don't touch. */ 3099 PRTSTRSPACECORE pRight; 3100 /** Pointer to the list of string with the same key. Don't touch. */ 3101 PRTSTRSPACECORE pList; 3102 /** Height of this tree: max(heigth(left), heigth(right)) + 1. Don't touch */ 3103 unsigned char uchHeight; 3104 /** The string length. Read only! */ 3105 size_t cchString; 3106 /** Pointer to the string. Read only! */ 3107 const char *pszString; 3108 } RTSTRSPACECORE; 3109 3110 /** String space. (Initialize with NULL.) */ 3111 typedef PRTSTRSPACECORE RTSTRSPACE; 3112 /** Pointer to a string space. */ 3113 typedef PPRTSTRSPACECORE PRTSTRSPACE; 3114 3115 3116 /** 3117 * Inserts a string into a unique string space. 3118 * 3119 * @returns true on success. 3120 * @returns false if the string collided with an existing string. 3121 * @param pStrSpace The space to insert it into. 3122 * @param pStr The string node. 3123 */ 3124 RTDECL(bool) RTStrSpaceInsert(PRTSTRSPACE pStrSpace, PRTSTRSPACECORE pStr); 3125 3126 /** 3127 * Removes a string from a unique string space. 3128 * 3129 * @returns Pointer to the removed string node. 3130 * @returns NULL if the string was not found in the string space. 3131 * @param pStrSpace The space to remove it from. 3132 * @param pszString The string to remove. 3133 */ 3134 RTDECL(PRTSTRSPACECORE) RTStrSpaceRemove(PRTSTRSPACE pStrSpace, const char *pszString); 3135 3136 /** 3137 * Gets a string from a unique string space. 3138 * 3139 * @returns Pointer to the string node. 3140 * @returns NULL if the string was not found in the string space. 3141 * @param pStrSpace The space to get it from. 3142 * @param pszString The string to get. 3143 */ 3144 RTDECL(PRTSTRSPACECORE) RTStrSpaceGet(PRTSTRSPACE pStrSpace, const char *pszString); 3145 3146 /** 3147 * Gets a string from a unique string space. 3148 * 3149 * @returns Pointer to the string node. 3150 * @returns NULL if the string was not found in the string space. 3151 * @param pStrSpace The space to get it from. 3152 * @param pszString The string to get. 3153 * @param cchMax The max string length to evaluate. Passing 3154 * RTSTR_MAX is ok and makes it behave just like 3155 * RTStrSpaceGet. 3156 */ 3157 RTDECL(PRTSTRSPACECORE) RTStrSpaceGetN(PRTSTRSPACE pStrSpace, const char *pszString, size_t cchMax); 3158 3159 /** 3160 * Callback function for RTStrSpaceEnumerate() and RTStrSpaceDestroy(). 3161 * 3162 * @returns 0 on continue. 3163 * @returns Non-zero to aborts the operation. 3164 * @param pStr The string node 3165 * @param pvUser The user specified argument. 3166 */ 3167 typedef DECLCALLBACK(int) FNRTSTRSPACECALLBACK(PRTSTRSPACECORE pStr, void *pvUser); 3168 /** Pointer to callback function for RTStrSpaceEnumerate() and RTStrSpaceDestroy(). */ 3169 typedef FNRTSTRSPACECALLBACK *PFNRTSTRSPACECALLBACK; 3170 3171 /** 3172 * Destroys the string space. 3173 * 3174 * The caller supplies a callback which will be called for each of the string 3175 * nodes in for freeing their memory and other resources. 3176 * 3177 * @returns 0 or what ever non-zero return value pfnCallback returned 3178 * when aborting the destruction. 3179 * @param pStrSpace The space to destroy. 3180 * @param pfnCallback The callback. 3181 * @param pvUser The user argument. 3182 */ 3183 RTDECL(int) RTStrSpaceDestroy(PRTSTRSPACE pStrSpace, PFNRTSTRSPACECALLBACK pfnCallback, void *pvUser); 3184 3185 /** 3186 * Enumerates the string space. 3187 * The caller supplies a callback which will be called for each of 3188 * the string nodes. 3189 * 3190 * @returns 0 or what ever non-zero return value pfnCallback returned 3191 * when aborting the destruction. 3192 * @param pStrSpace The space to enumerate. 3193 * @param pfnCallback The callback. 3194 * @param pvUser The user argument. 3195 */ 3196 RTDECL(int) RTStrSpaceEnumerate(PRTSTRSPACE pStrSpace, PFNRTSTRSPACECALLBACK pfnCallback, void *pvUser); 3197 3198 /** @} */ 3199 3200 3201 /** @defgroup rt_str_hash Sting hashing 3202 * @{ */ 3203 3204 /** 3205 * Hashes the given string using algorithm \#1. 3206 * 3207 * @returns String hash. 3208 * @param pszString The string to hash. 3209 */ 3210 RTDECL(uint32_t) RTStrHash1(const char *pszString); 3211 3212 /** 3213 * Hashes the given string using algorithm \#1. 3214 * 3215 * @returns String hash. 3216 * @param pszString The string to hash. 3217 * @param cchString The max length to hash. Hashing will stop if the 3218 * terminator character is encountered first. Passing 3219 * RTSTR_MAX is fine. 3220 */ 3221 RTDECL(uint32_t) RTStrHash1N(const char *pszString, size_t cchString); 3222 3223 /** 3224 * Hashes the given strings as if they were concatenated using algorithm \#1. 3225 * 3226 * @returns String hash. 3227 * @param cPairs The number of string / length pairs in the 3228 * ellipsis. 3229 * @param ... List of string (const char *) and length 3230 * (size_t) pairs. Passing RTSTR_MAX as the size is 3231 * fine. 3232 */ 3233 RTDECL(uint32_t) RTStrHash1ExN(size_t cPairs, ...); 3234 3235 /** 3236 * Hashes the given strings as if they were concatenated using algorithm \#1. 3237 * 3238 * @returns String hash. 3239 * @param cPairs The number of string / length pairs in the @a va. 3240 * @param va List of string (const char *) and length 3241 * (size_t) pairs. Passing RTSTR_MAX as the size is 3242 * fine. 3243 */ 3244 RTDECL(uint32_t) RTStrHash1ExNV(size_t cPairs, va_list va); 3245 3246 /** @} */ 3247 3248 3249 /** @defgroup rt_str_utf16 UTF-16 String Manipulation 3250 * @{ 3251 */ 3252 3253 /** 3254 * Allocates memory for UTF-16 string storage (default tag). 3255 * 3256 * You should normally not use this function, except if there is some very 3257 * custom string handling you need doing that isn't covered by any of the other 3258 * APIs. 3259 * 3260 * @returns Pointer to the allocated UTF-16 string. The first wide char is 3261 * always set to the string terminator char, the contents of the 3262 * remainder of the memory is undefined. The string must be freed by 3263 * calling RTUtf16Free. 3264 * 3265 * NULL is returned if the allocation failed. Please translate this to 3266 * VERR_NO_UTF16_MEMORY and not VERR_NO_MEMORY. Also consider 3267 * RTUtf16AllocEx if an IPRT status code is required. 3268 * 3269 * @param cb How many bytes to allocate, will be rounded up 3270 * to a multiple of two. If this is zero, we will 3271 * allocate a terminator wide char anyway. 3272 */ 3273 #define RTUtf16Alloc(cb) RTUtf16AllocTag((cb), RTSTR_TAG) 3274 3275 /** 3276 * Allocates memory for UTF-16 string storage (custom tag). 3277 * 3278 * You should normally not use this function, except if there is some very 3279 * custom string handling you need doing that isn't covered by any of the other 3280 * APIs. 3281 * 3282 * @returns Pointer to the allocated UTF-16 string. The first wide char is 3283 * always set to the string terminator char, the contents of the 3284 * remainder of the memory is undefined. The string must be freed by 3285 * calling RTUtf16Free. 3286 * 3287 * NULL is returned if the allocation failed. Please translate this to 3288 * VERR_NO_UTF16_MEMORY and not VERR_NO_MEMORY. Also consider 3289 * RTUtf16AllocExTag if an IPRT status code is required. 3290 * 3291 * @param cb How many bytes to allocate, will be rounded up 3292 * to a multiple of two. If this is zero, we will 3293 * allocate a terminator wide char anyway. 3294 * @param pszTag Allocation tag used for statistics and such. 3295 */ 3296 RTDECL(PRTUTF16) RTUtf16AllocTag(size_t cb, const char *pszTag); 3297 3298 3299 /** 3300 * Free a UTF-16 string allocated by RTStrToUtf16(), RTStrToUtf16Ex(), 3301 * RTLatin1ToUtf16(), RTLatin1ToUtf16Ex(), RTUtf16Dup() or RTUtf16DupEx(). 3302 * 3303 * @returns iprt status code. 3304 * @param pwszString The UTF-16 string to free. NULL is accepted. 3305 */ 3306 RTDECL(void) RTUtf16Free(PRTUTF16 pwszString); 3307 3308 /** 3309 * Allocates a new copy of the specified UTF-16 string (default tag). 3310 * 3311 * @returns Pointer to the allocated string copy. Use RTUtf16Free() to free it. 3312 * @returns NULL when out of memory. 3313 * @param pwszString UTF-16 string to duplicate. 3314 * @remark This function will not make any attempt to validate the encoding. 3315 */ 3316 #define RTUtf16Dup(pwszString) RTUtf16DupTag((pwszString), RTSTR_TAG) 3317 3318 /** 3319 * Allocates a new copy of the specified UTF-16 string (custom tag). 3320 * 3321 * @returns Pointer to the allocated string copy. Use RTUtf16Free() to free it. 3322 * @returns NULL when out of memory. 3323 * @param pwszString UTF-16 string to duplicate. 3324 * @param pszTag Allocation tag used for statistics and such. 3325 * @remark This function will not make any attempt to validate the encoding. 3326 */ 3327 RTDECL(PRTUTF16) RTUtf16DupTag(PCRTUTF16 pwszString, const char *pszTag); 3328 3329 /** 3330 * Allocates a new copy of the specified UTF-16 string (default tag). 3331 * 3332 * @returns iprt status code. 3333 * @param ppwszString Receives pointer of the allocated UTF-16 string. 3334 * The returned pointer must be freed using RTUtf16Free(). 3335 * @param pwszString UTF-16 string to duplicate. 3336 * @param cwcExtra Number of extra RTUTF16 items to allocate. 3337 * @remark This function will not make any attempt to validate the encoding. 3338 */ 3339 #define RTUtf16DupEx(ppwszString, pwszString, cwcExtra) \ 3340 RTUtf16DupExTag((ppwszString), (pwszString), (cwcExtra), RTSTR_TAG) 3341 3342 /** 3343 * Allocates a new copy of the specified UTF-16 string (custom tag). 3344 * 3345 * @returns iprt status code. 3346 * @param ppwszString Receives pointer of the allocated UTF-16 string. 3347 * The returned pointer must be freed using RTUtf16Free(). 3348 * @param pwszString UTF-16 string to duplicate. 3349 * @param cwcExtra Number of extra RTUTF16 items to allocate. 3350 * @param pszTag Allocation tag used for statistics and such. 3351 * @remark This function will not make any attempt to validate the encoding. 3352 */ 3353 RTDECL(int) RTUtf16DupExTag(PRTUTF16 *ppwszString, PCRTUTF16 pwszString, size_t cwcExtra, const char *pszTag); 3354 3355 /** 3356 * Returns the length of a UTF-16 string in UTF-16 characters 3357 * without trailing '\\0'. 3358 * 3359 * Surrogate pairs counts as two UTF-16 characters here. Use RTUtf16CpCnt() 3360 * to get the exact number of code points in the string. 3361 * 3362 * @returns The number of RTUTF16 items in the string. 3363 * @param pwszString Pointer the UTF-16 string. 3364 * @remark This function will not make any attempt to validate the encoding. 3365 */ 3366 RTDECL(size_t) RTUtf16Len(PCRTUTF16 pwszString); 3367 3368 /** 3369 * Find the length of a zero-terminated byte string, given a max string length. 3370 * 3371 * @returns The string length or cbMax. The returned length does not include 3372 * the zero terminator if it was found. 3373 * 3374 * @param pwszString The string. 3375 * @param cwcMax The max string length in RTUTF16s. 3376 * @sa RTUtf16NLenEx, RTStrNLen. 3377 */ 3378 RTDECL(size_t) RTUtf16NLen(PCRTUTF16 pwszString, size_t cwcMax); 3379 3380 /** 3381 * Find the length of a zero-terminated byte string, given 3382 * a max string length. 3383 * 3384 * @returns IPRT status code. 3385 * @retval VINF_SUCCESS if the string has a length less than cchMax. 3386 * @retval VERR_BUFFER_OVERFLOW if the end of the string wasn't found 3387 * before cwcMax was reached. 3388 * 3389 * @param pwszString The string. 3390 * @param cwcMax The max string length in RTUTF16s. 3391 * @param pcwc Where to store the string length excluding the 3392 * terminator. This is set to cwcMax if the terminator 3393 * isn't found. 3394 * @sa RTUtf16NLen, RTStrNLenEx. 3395 */ 3396 RTDECL(int) RTUtf16NLenEx(PCRTUTF16 pwszString, size_t cwcMax, size_t *pcwc); 3397 3398 /** 3399 * Find the zero terminator in a string with a limited length. 3400 * 3401 * @returns Pointer to the zero terminator. 3402 * @returns NULL if the zero terminator was not found. 3403 * 3404 * @param pwszString The string. 3405 * @param cwcMax The max string length. RTSTR_MAX is fine. 3406 */ 3407 RTDECL(PCRTUTF16) RTUtf16End(PCRTUTF16 pwszString, size_t cwcMax); 3408 3409 /** 3410 * Strips blankspaces from both ends of the string. 3411 * 3412 * @returns Pointer to first non-blank char in the string. 3413 * @param pwsz The string to strip. 3414 */ 3415 RTDECL(PRTUTF16) RTUtf16Strip(PRTUTF16 pwsz); 3416 3417 /** 3418 * Strips blankspaces from the start of the string. 3419 * 3420 * @returns Pointer to first non-blank char in the string. 3421 * @param pwsz The string to strip. 3422 */ 3423 RTDECL(PRTUTF16) RTUtf16StripL(PCRTUTF16 pwsz); 3424 3425 /** 3426 * Strips blankspaces from the end of the string. 3427 * 3428 * @returns pwsz. 3429 * @param pwsz The string to strip. 3430 */ 3431 RTDECL(PRTUTF16) RTUtf16StripR(PRTUTF16 pwsz); 3432 3433 /** 3434 * String copy with overflow handling. 3435 * 3436 * @retval VINF_SUCCESS on success. 3437 * @retval VERR_BUFFER_OVERFLOW if the destination buffer is too small. The 3438 * buffer will contain as much of the string as it can hold, fully 3439 * terminated. 3440 * 3441 * @param pwszDst The destination buffer. 3442 * @param cwcDst The size of the destination buffer in RTUTF16s. 3443 * @param pwszSrc The source string. NULL is not OK. 3444 */ 3445 RTDECL(int) RTUtf16Copy(PRTUTF16 pwszDst, size_t cwcDst, PCRTUTF16 pwszSrc); 3446 3447 /** 3448 * String copy with overflow handling, ASCII source. 3449 * 3450 * @retval VINF_SUCCESS on success. 3451 * @retval VERR_BUFFER_OVERFLOW if the destination buffer is too small. The 3452 * buffer will contain as much of the string as it can hold, fully 3453 * terminated. 3454 * 3455 * @param pwszDst The destination buffer. 3456 * @param cwcDst The size of the destination buffer in RTUTF16s. 3457 * @param pszSrc The source string, pure ASCII. NULL is not OK. 3458 */ 3459 RTDECL(int) RTUtf16CopyAscii(PRTUTF16 pwszDst, size_t cwcDst, const char *pszSrc); 3460 3461 /** 3462 * String copy with overflow handling. 3463 * 3464 * @retval VINF_SUCCESS on success. 3465 * @retval VERR_BUFFER_OVERFLOW if the destination buffer is too small. The 3466 * buffer will contain as much of the string as it can hold, fully 3467 * terminated. 3468 * 3469 * @param pwszDst The destination buffer. 3470 * @param cwcDst The size of the destination buffer in RTUTF16s. 3471 * @param pwszSrc The source string. NULL is not OK. 3472 * @param cwcSrcMax The maximum number of chars (not code points) to 3473 * copy from the source string, not counting the 3474 * terminator as usual. 3475 */ 3476 RTDECL(int) RTUtf16CopyEx(PRTUTF16 pwszDst, size_t cwcDst, PCRTUTF16 pwszSrc, size_t cwcSrcMax); 3477 3478 /** 3479 * String concatenation with overflow handling. 3480 * 3481 * @retval VINF_SUCCESS on success. 3482 * @retval VERR_BUFFER_OVERFLOW if the destination buffer is too small. The 3483 * buffer will contain as much of the string as it can hold, fully 3484 * terminated. 3485 * 3486 * @param pwszDst The destination buffer. 3487 * @param cwcDst The size of the destination buffer in RTUTF16s. 3488 * @param pwszSrc The source string. NULL is not OK. 3489 */ 3490 RTDECL(int) RTUtf16Cat(PRTUTF16 pwszDst, size_t cwcDst, PCRTUTF16 pwszSrc); 3491 3492 /** 3493 * String concatenation with overflow handling, ASCII source. 3494 * 3495 * @retval VINF_SUCCESS on success. 3496 * @retval VERR_BUFFER_OVERFLOW if the destination buffer is too small. The 3497 * buffer will contain as much of the string as it can hold, fully 3498 * terminated. 3499 * 3500 * @param pwszDst The destination buffer. 3501 * @param cwcDst The size of the destination buffer in RTUTF16s. 3502 * @param pszSrc The source string, pure ASCII. NULL is not OK. 3503 */ 3504 RTDECL(int) RTUtf16CatAscii(PRTUTF16 pwszDst, size_t cwcDst, const char *pszSrc); 3505 3506 /** 3507 * String concatenation with overflow handling. 3508 * 3509 * @retval VINF_SUCCESS on success. 3510 * @retval VERR_BUFFER_OVERFLOW if the destination buffer is too small. The 3511 * buffer will contain as much of the string as it can hold, fully 3512 * terminated. 3513 * 3514 * @param pwszDst The destination buffer. 3515 * @param cwcDst The size of the destination buffer in RTUTF16s. 3516 * @param pwszSrc The source string. NULL is not OK. 3517 * @param cwcSrcMax The maximum number of UTF-16 chars (not code 3518 * points) to copy from the source string, not 3519 * counting the terminator as usual. 3520 */ 3521 RTDECL(int) RTUtf16CatEx(PRTUTF16 pwszDst, size_t cwcDst, PCRTUTF16 pwszSrc, size_t cwcSrcMax); 3522 3523 /** 3524 * Performs a case sensitive string compare between two UTF-16 strings. 3525 * 3526 * @returns < 0 if the first string less than the second string.s 3527 * @returns 0 if the first string identical to the second string. 3528 * @returns > 0 if the first string greater than the second string. 3529 * @param pwsz1 First UTF-16 string. Null is allowed. 3530 * @param pwsz2 Second UTF-16 string. Null is allowed. 3531 * @remark This function will not make any attempt to validate the encoding. 3532 */ 3533 RTDECL(int) RTUtf16Cmp(PCRTUTF16 pwsz1, PCRTUTF16 pwsz2); 3534 3535 /** 3536 * Performs a case sensitive string compare between an UTF-16 string and a pure 3537 * ASCII string. 3538 * 3539 * @returns < 0 if the first string less than the second string.s 3540 * @returns 0 if the first string identical to the second string. 3541 * @returns > 0 if the first string greater than the second string. 3542 * @param pwsz1 First UTF-16 string. Null is allowed. 3543 * @param psz2 Second string, pure ASCII. Null is allowed. 3544 * @remark This function will not make any attempt to validate the encoding. 3545 */ 3546 RTDECL(int) RTUtf16CmpAscii(PCRTUTF16 pwsz1, const char *psz2); 3547 3548 /** 3549 * Performs a case insensitive string compare between two UTF-16 strings. 3550 * 3551 * This is a simplified compare, as only the simplified lower/upper case folding 3552 * specified by the unicode specs are used. It does not consider character pairs 3553 * as they are used in some languages, just simple upper & lower case compares. 3554 * 3555 * @returns < 0 if the first string less than the second string. 3556 * @returns 0 if the first string identical to the second string. 3557 * @returns > 0 if the first string greater than the second string. 3558 * @param pwsz1 First UTF-16 string. Null is allowed. 3559 * @param pwsz2 Second UTF-16 string. Null is allowed. 3560 */ 3561 RTDECL(int) RTUtf16ICmp(PCRTUTF16 pwsz1, PCRTUTF16 pwsz2); 3562 3563 /** 3564 * Performs a case insensitive string compare between an UTF-16 string and an 3565 * pure ASCII string. 3566 * 3567 * Since this compare only takes cares about the first 128 codepoints in 3568 * unicode, no tables are needed and there aren't any real complications. 3569 * 3570 * @returns < 0 if the first string less than the second string. 3571 * @returns 0 if the first string identical to the second string. 3572 * @returns > 0 if the first string greater than the second string. 3573 * @param pwsz1 First UTF-16 string. Null is allowed. 3574 * @param psz2 Second string, pure ASCII. Null is allowed. 3575 */ 3576 RTDECL(int) RTUtf16ICmpAscii(PCRTUTF16 pwsz1, const char *psz2); 3577 3578 /** 3579 * Performs a case insensitive string compare between two UTF-16 strings 3580 * using the current locale of the process (if applicable). 3581 * 3582 * This differs from RTUtf16ICmp() in that it will try, if a locale with the 3583 * required data is available, to do a correct case-insensitive compare. It 3584 * follows that it is more complex and thereby likely to be more expensive. 3585 * 3586 * @returns < 0 if the first string less than the second string. 3587 * @returns 0 if the first string identical to the second string. 3588 * @returns > 0 if the first string greater than the second string. 3589 * @param pwsz1 First UTF-16 string. Null is allowed. 3590 * @param pwsz2 Second UTF-16 string. Null is allowed. 3591 */ 3592 RTDECL(int) RTUtf16LocaleICmp(PCRTUTF16 pwsz1, PCRTUTF16 pwsz2); 3593 3594 /** 3595 * Folds a UTF-16 string to lowercase. 3596 * 3597 * This is a very simple folding; is uses the simple lowercase 3598 * code point, it is not related to any locale just the most common 3599 * lowercase codepoint setup by the unicode specs, and it will not 3600 * create new surrogate pairs or remove existing ones. 3601 * 3602 * @returns Pointer to the passed in string. 3603 * @param pwsz The string to fold. 3604 */ 3605 RTDECL(PRTUTF16) RTUtf16ToLower(PRTUTF16 pwsz); 3606 3607 /** 3608 * Folds a UTF-16 string to uppercase. 3609 * 3610 * This is a very simple folding; is uses the simple uppercase 3611 * code point, it is not related to any locale just the most common 3612 * uppercase codepoint setup by the unicode specs, and it will not 3613 * create new surrogate pairs or remove existing ones. 3614 * 3615 * @returns Pointer to the passed in string. 3616 * @param pwsz The string to fold. 3617 */ 3618 RTDECL(PRTUTF16) RTUtf16ToUpper(PRTUTF16 pwsz); 3619 3620 /** 3621 * Validates the UTF-16 encoding of the string. 3622 * 3623 * @returns iprt status code. 3624 * @param pwsz The string. 3625 */ 3626 RTDECL(int) RTUtf16ValidateEncoding(PCRTUTF16 pwsz); 3627 3628 /** 3629 * Validates the UTF-16 encoding of the string. 3630 * 3631 * @returns iprt status code. 3632 * @param pwsz The string. 3633 * @param cwc The max string length (/ size) in UTF-16 units. Use 3634 * RTSTR_MAX to process the entire string. 3635 * @param fFlags Combination of RTSTR_VALIDATE_ENCODING_XXX flags. 3636 */ 3637 RTDECL(int) RTUtf16ValidateEncodingEx(PCRTUTF16 pwsz, size_t cwc, uint32_t fFlags); 3638 3639 /** 3640 * Checks if the UTF-16 encoding is valid. 3641 * 3642 * @returns true / false. 3643 * @param pwsz The string. 3644 */ 3645 RTDECL(bool) RTUtf16IsValidEncoding(PCRTUTF16 pwsz); 3646 3647 /** 3648 * Sanitise a (valid) UTF-16 string by replacing all characters outside a white 3649 * list in-place by an ASCII replacement character. Multi-byte characters will 3650 * be replaced byte by byte. 3651 * 3652 * @returns The number of code points replaced, or a negative value if the 3653 * string is not correctly encoded. In this last case the string 3654 * may be partially processed. 3655 * @param pwsz The string to sanitise. 3656 * @param puszValidSet A zero-terminated array of pairs of Unicode points. 3657 * Each pair is the start and end point of a range, 3658 * and the union of these ranges forms the white list. 3659 * @param chReplacement The ASCII replacement character. 3660 */ 3661 RTDECL(ssize_t) RTUtf16PurgeComplementSet(PRTUTF16 pwsz, PCRTUNICP puszValidSet, char chReplacement); 3662 3663 /** 3664 * Translate a UTF-16 string into a UTF-8 allocating the result buffer (default 3665 * tag). 3666 * 3667 * @returns iprt status code. 3668 * @param pwszString UTF-16 string to convert. 3669 * @param ppszString Receives pointer of allocated UTF-8 string on 3670 * success, and is always set to NULL on failure. 3671 * The returned pointer must be freed using RTStrFree(). 3672 */ 3673 #define RTUtf16ToUtf8(pwszString, ppszString) RTUtf16ToUtf8Tag((pwszString), (ppszString), RTSTR_TAG) 3674 3675 /** 3676 * Translate a UTF-16 string into a UTF-8 allocating the result buffer. 3677 * 3678 * @returns iprt status code. 3679 * @param pwszString UTF-16 string to convert. 3680 * @param ppszString Receives pointer of allocated UTF-8 string on 3681 * success, and is always set to NULL on failure. 3682 * The returned pointer must be freed using RTStrFree(). 3683 * @param pszTag Allocation tag used for statistics and such. 3684 */ 3685 RTDECL(int) RTUtf16ToUtf8Tag(PCRTUTF16 pwszString, char **ppszString, const char *pszTag); 3686 3687 /** 3688 * Translates UTF-16 to UTF-8 using buffer provided by the caller or a fittingly 3689 * sized buffer allocated by the function (default tag). 3690 * 3691 * @returns iprt status code. 3692 * @param pwszString The UTF-16 string to convert. 3693 * @param cwcString The number of RTUTF16 items to translate from pwszString. 3694 * The translation will stop when reaching cwcString or the terminator ('\\0'). 3695 * Use RTSTR_MAX to translate the entire string. 3696 * @param ppsz If cch is non-zero, this must either be pointing to a pointer to 3697 * a buffer of the specified size, or pointer to a NULL pointer. 3698 * If *ppsz is NULL or cch is zero a buffer of at least cch chars 3699 * will be allocated to hold the translated string. 3700 * If a buffer was requested it must be freed using RTStrFree(). 3701 * @param cch The buffer size in chars (the type). This includes the terminator. 3702 * @param pcch Where to store the length of the translated string, 3703 * excluding the terminator. (Optional) 3704 * 3705 * This may be set under some error conditions, 3706 * however, only for VERR_BUFFER_OVERFLOW and 3707 * VERR_NO_STR_MEMORY will it contain a valid string 3708 * length that can be used to resize the buffer. 3709 */ 3710 #define RTUtf16ToUtf8Ex(pwszString, cwcString, ppsz, cch, pcch) \ 3711 RTUtf16ToUtf8ExTag((pwszString), (cwcString), (ppsz), (cch), (pcch), RTSTR_TAG) 3712 3713 /** 3714 * Translates UTF-16 to UTF-8 using buffer provided by the caller or a fittingly 3715 * sized buffer allocated by the function (custom tag). 3716 * 3717 * @returns iprt status code. 3718 * @param pwszString The UTF-16 string to convert. 3719 * @param cwcString The number of RTUTF16 items to translate from pwszString. 3720 * The translation will stop when reaching cwcString or the terminator ('\\0'). 3721 * Use RTSTR_MAX to translate the entire string. 3722 * @param ppsz If cch is non-zero, this must either be pointing to a pointer to 3723 * a buffer of the specified size, or pointer to a NULL pointer. 3724 * If *ppsz is NULL or cch is zero a buffer of at least cch chars 3725 * will be allocated to hold the translated string. 3726 * If a buffer was requested it must be freed using RTStrFree(). 3727 * @param cch The buffer size in chars (the type). This includes the terminator. 3728 * @param pcch Where to store the length of the translated string, 3729 * excluding the terminator. (Optional) 3730 * 3731 * This may be set under some error conditions, 3732 * however, only for VERR_BUFFER_OVERFLOW and 3733 * VERR_NO_STR_MEMORY will it contain a valid string 3734 * length that can be used to resize the buffer. 3735 * @param pszTag Allocation tag used for statistics and such. 3736 */ 3737 RTDECL(int) RTUtf16ToUtf8ExTag(PCRTUTF16 pwszString, size_t cwcString, char **ppsz, size_t cch, size_t *pcch, const char *pszTag); 3738 3739 /** 3740 * Calculates the length of the UTF-16 string in UTF-8 chars (bytes). 3741 * 3742 * This function will validate the string, and incorrectly encoded UTF-16 3743 * strings will be rejected. The primary purpose of this function is to 3744 * help allocate buffers for RTUtf16ToUtf8() of the correct size. For most 3745 * other purposes RTUtf16ToUtf8Ex() should be used. 3746 * 3747 * @returns Number of char (bytes). 3748 * @returns 0 if the string was incorrectly encoded. 3749 * @param pwsz The UTF-16 string. 3750 */ 3751 RTDECL(size_t) RTUtf16CalcUtf8Len(PCRTUTF16 pwsz); 3752 3753 /** 3754 * Calculates the length of the UTF-16 string in UTF-8 chars (bytes). 3755 * 3756 * This function will validate the string, and incorrectly encoded UTF-16 3757 * strings will be rejected. 3758 * 3759 * @returns iprt status code. 3760 * @param pwsz The string. 3761 * @param cwc The max string length. Use RTSTR_MAX to process the entire string. 3762 * @param pcch Where to store the string length (in bytes). Optional. 3763 * This is undefined on failure. 3764 */ 3765 RTDECL(int) RTUtf16CalcUtf8LenEx(PCRTUTF16 pwsz, size_t cwc, size_t *pcch); 3766 3767 /** 3768 * Translate a UTF-16 string into a Latin-1 (ISO-8859-1) allocating the result 3769 * buffer (default tag). 3770 * 3771 * @returns iprt status code. 3772 * @param pwszString UTF-16 string to convert. 3773 * @param ppszString Receives pointer of allocated Latin1 string on 3774 * success, and is always set to NULL on failure. 3775 * The returned pointer must be freed using RTStrFree(). 3776 */ 3777 #define RTUtf16ToLatin1(pwszString, ppszString) RTUtf16ToLatin1Tag((pwszString), (ppszString), RTSTR_TAG) 3778 3779 /** 3780 * Translate a UTF-16 string into a Latin-1 (ISO-8859-1) allocating the result 3781 * buffer (custom tag). 3782 * 3783 * @returns iprt status code. 3784 * @param pwszString UTF-16 string to convert. 3785 * @param ppszString Receives pointer of allocated Latin1 string on 3786 * success, and is always set to NULL on failure. 3787 * The returned pointer must be freed using RTStrFree(). 3788 * @param pszTag Allocation tag used for statistics and such. 3789 */ 3790 RTDECL(int) RTUtf16ToLatin1Tag(PCRTUTF16 pwszString, char **ppszString, const char *pszTag); 3791 3792 /** 3793 * Translates UTF-16 to Latin-1 (ISO-8859-1) using buffer provided by the caller 3794 * or a fittingly sized buffer allocated by the function (default tag). 3795 * 3796 * @returns iprt status code. 3797 * @param pwszString The UTF-16 string to convert. 3798 * @param cwcString The number of RTUTF16 items to translate from 3799 * pwszString. The translation will stop when reaching 3800 * cwcString or the terminator ('\\0'). Use RTSTR_MAX 3801 * to translate the entire string. 3802 * @param ppsz Pointer to the pointer to the Latin-1 string. The 3803 * buffer can optionally be preallocated by the caller. 3804 * 3805 * If cch is zero, *ppsz is undefined. 3806 * 3807 * If cch is non-zero and *ppsz is not NULL, then this 3808 * will be used as the output buffer. 3809 * VERR_BUFFER_OVERFLOW will be returned if this is 3810 * insufficient. 3811 * 3812 * If cch is zero or *ppsz is NULL, then a buffer of 3813 * sufficient size is allocated. cch can be used to 3814 * specify a minimum size of this buffer. Use 3815 * RTUtf16Free() to free the result. 3816 * 3817 * @param cch The buffer size in chars (the type). This includes 3818 * the terminator. 3819 * @param pcch Where to store the length of the translated string, 3820 * excluding the terminator. (Optional) 3821 * 3822 * This may be set under some error conditions, 3823 * however, only for VERR_BUFFER_OVERFLOW and 3824 * VERR_NO_STR_MEMORY will it contain a valid string 3825 * length that can be used to resize the buffer. 3826 */ 3827 #define RTUtf16ToLatin1Ex(pwszString, cwcString, ppsz, cch, pcch) \ 3828 RTUtf16ToLatin1ExTag((pwszString), (cwcString), (ppsz), (cch), (pcch), RTSTR_TAG) 3829 3830 /** 3831 * Translates UTF-16 to Latin-1 (ISO-8859-1) using buffer provided by the caller 3832 * or a fittingly sized buffer allocated by the function (custom tag). 3833 * 3834 * @returns iprt status code. 3835 * @param pwszString The UTF-16 string to convert. 3836 * @param cwcString The number of RTUTF16 items to translate from 3837 * pwszString. The translation will stop when reaching 3838 * cwcString or the terminator ('\\0'). Use RTSTR_MAX 3839 * to translate the entire string. 3840 * @param ppsz Pointer to the pointer to the Latin-1 string. The 3841 * buffer can optionally be preallocated by the caller. 3842 * 3843 * If cch is zero, *ppsz is undefined. 3844 * 3845 * If cch is non-zero and *ppsz is not NULL, then this 3846 * will be used as the output buffer. 3847 * VERR_BUFFER_OVERFLOW will be returned if this is 3848 * insufficient. 3849 * 3850 * If cch is zero or *ppsz is NULL, then a buffer of 3851 * sufficient size is allocated. cch can be used to 3852 * specify a minimum size of this buffer. Use 3853 * RTUtf16Free() to free the result. 3854 * 3855 * @param cch The buffer size in chars (the type). This includes 3856 * the terminator. 3857 * @param pcch Where to store the length of the translated string, 3858 * excluding the terminator. (Optional) 3859 * 3860 * This may be set under some error conditions, 3861 * however, only for VERR_BUFFER_OVERFLOW and 3862 * VERR_NO_STR_MEMORY will it contain a valid string 3863 * length that can be used to resize the buffer. 3864 * @param pszTag Allocation tag used for statistics and such. 3865 */ 3866 RTDECL(int) RTUtf16ToLatin1ExTag(PCRTUTF16 pwszString, size_t cwcString, char **ppsz, size_t cch, size_t *pcch, const char *pszTag); 3867 3868 /** 3869 * Calculates the length of the UTF-16 string in Latin-1 (ISO-8859-1) chars. 3870 * 3871 * This function will validate the string, and incorrectly encoded UTF-16 3872 * strings will be rejected. The primary purpose of this function is to 3873 * help allocate buffers for RTUtf16ToLatin1() of the correct size. For most 3874 * other purposes RTUtf16ToLatin1Ex() should be used. 3875 * 3876 * @returns Number of char (bytes). 3877 * @returns 0 if the string was incorrectly encoded. 3878 * @param pwsz The UTF-16 string. 3879 */ 3880 RTDECL(size_t) RTUtf16CalcLatin1Len(PCRTUTF16 pwsz); 3881 3882 /** 3883 * Calculates the length of the UTF-16 string in Latin-1 (ISO-8859-1) chars. 3884 * 3885 * This function will validate the string, and incorrectly encoded UTF-16 3886 * strings will be rejected. 3887 * 3888 * @returns iprt status code. 3889 * @param pwsz The string. 3890 * @param cwc The max string length. Use RTSTR_MAX to process the 3891 * entire string. 3892 * @param pcch Where to store the string length (in bytes). Optional. 3893 * This is undefined on failure. 3894 */ 3895 RTDECL(int) RTUtf16CalcLatin1LenEx(PCRTUTF16 pwsz, size_t cwc, size_t *pcch); 3896 3897 /** 3898 * Get the unicode code point at the given string position. 3899 * 3900 * @returns unicode code point. 3901 * @returns RTUNICP_INVALID if the encoding is invalid. 3902 * @param pwsz The string. 3903 * 3904 * @remark This is an internal worker for RTUtf16GetCp(). 3905 */ 3906 RTDECL(RTUNICP) RTUtf16GetCpInternal(PCRTUTF16 pwsz); 3907 3908 /** 3909 * Get the unicode code point at the given string position. 3910 * 3911 * @returns iprt status code. 3912 * @param ppwsz Pointer to the string pointer. This will be updated to 3913 * point to the char following the current code point. 3914 * @param pCp Where to store the code point. 3915 * RTUNICP_INVALID is stored here on failure. 3916 * 3917 * @remark This is an internal worker for RTUtf16GetCpEx(). 3918 */ 3919 RTDECL(int) RTUtf16GetCpExInternal(PCRTUTF16 *ppwsz, PRTUNICP pCp); 3920 3921 /** 3922 * Put the unicode code point at the given string position 3923 * and return the pointer to the char following it. 3924 * 3925 * This function will not consider anything at or following the 3926 * buffer area pointed to by pwsz. It is therefore not suitable for 3927 * inserting code points into a string, only appending/overwriting. 3928 * 3929 * @returns pointer to the char following the written code point. 3930 * @param pwsz The string. 3931 * @param CodePoint The code point to write. 3932 * This should not be RTUNICP_INVALID or any other 3933 * character out of the UTF-16 range. 3934 * 3935 * @remark This is an internal worker for RTUtf16GetCpEx(). 3936 */ 3937 RTDECL(PRTUTF16) RTUtf16PutCpInternal(PRTUTF16 pwsz, RTUNICP CodePoint); 3938 3939 /** 3940 * Get the unicode code point at the given string position. 3941 * 3942 * @returns unicode code point. 3943 * @returns RTUNICP_INVALID if the encoding is invalid. 3944 * @param pwsz The string. 3945 * 3946 * @remark We optimize this operation by using an inline function for 3947 * everything which isn't a surrogate pair or an endian indicator. 3948 */ 3949 DECLINLINE(RTUNICP) RTUtf16GetCp(PCRTUTF16 pwsz) 3950 { 3951 const RTUTF16 wc = *pwsz; 3952 if (wc < 0xd800 || (wc > 0xdfff && wc < 0xfffe)) 3953 return wc; 3954 return RTUtf16GetCpInternal(pwsz); 3955 } 3956 3957 /** 3958 * Get the unicode code point at the given string position. 3959 * 3960 * @returns iprt status code. 3961 * @param ppwsz Pointer to the string pointer. This will be updated to 3962 * point to the char following the current code point. 3963 * @param pCp Where to store the code point. 3964 * RTUNICP_INVALID is stored here on failure. 3965 * 3966 * @remark We optimize this operation by using an inline function for 3967 * everything which isn't a surrogate pair or and endian indicator. 3968 */ 3969 DECLINLINE(int) RTUtf16GetCpEx(PCRTUTF16 *ppwsz, PRTUNICP pCp) 3970 { 3971 const RTUTF16 wc = **ppwsz; 3972 if (wc < 0xd800 || (wc > 0xdfff && wc < 0xfffe)) 3973 { 3974 (*ppwsz)++; 3975 *pCp = wc; 3976 return VINF_SUCCESS; 3977 } 3978 return RTUtf16GetCpExInternal(ppwsz, pCp); 3979 } 3980 3981 /** 3982 * Put the unicode code point at the given string position 3983 * and return the pointer to the char following it. 3984 * 3985 * This function will not consider anything at or following the 3986 * buffer area pointed to by pwsz. It is therefore not suitable for 3987 * inserting code points into a string, only appending/overwriting. 3988 * 3989 * @returns pointer to the char following the written code point. 3990 * @param pwsz The string. 3991 * @param CodePoint The code point to write. 3992 * This should not be RTUNICP_INVALID or any other 3993 * character out of the UTF-16 range. 3994 * 3995 * @remark We optimize this operation by using an inline function for 3996 * everything which isn't a surrogate pair or and endian indicator. 3997 */ 3998 DECLINLINE(PRTUTF16) RTUtf16PutCp(PRTUTF16 pwsz, RTUNICP CodePoint) 3999 { 4000 if (CodePoint < 0xd800 || (CodePoint > 0xd800 && CodePoint < 0xfffe)) 4001 { 4002 *pwsz++ = (RTUTF16)CodePoint; 4003 return pwsz; 4004 } 4005 return RTUtf16PutCpInternal(pwsz, CodePoint); 4006 } 4007 4008 /** 4009 * Skips ahead, past the current code point. 4010 * 4011 * @returns Pointer to the char after the current code point. 4012 * @param pwsz Pointer to the current code point. 4013 * @remark This will not move the next valid code point, only past the current one. 4014 */ 4015 DECLINLINE(PRTUTF16) RTUtf16NextCp(PCRTUTF16 pwsz) 4016 { 4017 RTUNICP Cp; 4018 RTUtf16GetCpEx(&pwsz, &Cp); 4019 return (PRTUTF16)pwsz; 4020 } 4021 4022 /** 4023 * Skips backwards, to the previous code point. 4024 * 4025 * @returns Pointer to the char after the current code point. 4026 * @param pwszStart Pointer to the start of the string. 4027 * @param pwsz Pointer to the current code point. 4028 */ 4029 RTDECL(PRTUTF16) RTUtf16PrevCp(PCRTUTF16 pwszStart, PCRTUTF16 pwsz); 4030 4031 4032 /** 4033 * Checks if the UTF-16 char is the high surrogate char (i.e. 4034 * the 1st char in the pair). 4035 * 4036 * @returns true if it is. 4037 * @returns false if it isn't. 4038 * @param wc The character to investigate. 4039 */ 4040 DECLINLINE(bool) RTUtf16IsHighSurrogate(RTUTF16 wc) 4041 { 4042 return wc >= 0xd800 && wc <= 0xdbff; 4043 } 4044 4045 /** 4046 * Checks if the UTF-16 char is the low surrogate char (i.e. 4047 * the 2nd char in the pair). 4048 * 4049 * @returns true if it is. 4050 * @returns false if it isn't. 4051 * @param wc The character to investigate. 4052 */ 4053 DECLINLINE(bool) RTUtf16IsLowSurrogate(RTUTF16 wc) 4054 { 4055 return wc >= 0xdc00 && wc <= 0xdfff; 4056 } 4057 4058 4059 /** 4060 * Checks if the two UTF-16 chars form a valid surrogate pair. 4061 * 4062 * @returns true if they do. 4063 * @returns false if they doesn't. 4064 * @param wcHigh The high (1st) character. 4065 * @param wcLow The low (2nd) character. 4066 */ 4067 DECLINLINE(bool) RTUtf16IsSurrogatePair(RTUTF16 wcHigh, RTUTF16 wcLow) 4068 { 4069 return RTUtf16IsHighSurrogate(wcHigh) 4070 && RTUtf16IsLowSurrogate(wcLow); 4071 } 4072 4073 /** 4074 * Formats a buffer stream as hex bytes. 4075 * 4076 * The default is no separating spaces or line breaks or anything. 4077 * 4078 * @returns IPRT status code. 4079 * @retval VERR_INVALID_POINTER if any of the pointers are wrong. 4080 * @retval VERR_BUFFER_OVERFLOW if the buffer is insufficent to hold the bytes. 4081 * 4082 * @param pwszBuf Output string buffer. 4083 * @param cwcBuf The size of the output buffer in RTUTF16 units. 4084 * @param pv Pointer to the bytes to stringify. 4085 * @param cb The number of bytes to stringify. 4086 * @param fFlags Combination of RTSTRPRINTHEXBYTES_F_XXX values. 4087 * @sa RTStrPrintHexBytes. 4088 */ 4089 RTDECL(int) RTUtf16PrintHexBytes(PRTUTF16 pwszBuf, size_t cwcBuf, void const *pv, size_t cb, uint32_t fFlags); 4090 4091 /** @} */ 4092 4093 4094 /** @defgroup rt_str_latin1 Latin-1 (ISO-8859-1) String Manipulation 4095 * @{ 4096 */ 279 RTDECL(int) RTLatin1CalcUtf8LenEx(const char *pszLatin1, size_t cchLatin1, size_t *pcch); 4097 280 4098 281 /** … … 4100 283 * 4101 284 * @returns Number of RTUTF16 items. 4102 * @param psz 4103 */ 4104 RTDECL(size_t) RTLatin1CalcUtf16Len(const char *psz );285 * @param pszLatin1 The Latin-1 string. 286 */ 287 RTDECL(size_t) RTLatin1CalcUtf16Len(const char *pszLatin1); 4105 288 4106 289 /** … … 4108 291 * 4109 292 * @returns iprt status code. 4110 * @param psz 4111 * @param cch 293 * @param pszLatin1 The Latin-1 string. 294 * @param cchLatin1 The max string length. Use RTSTR_MAX to process the 4112 295 * entire string. 4113 296 * @param pcwc Where to store the string length. Optional. 4114 297 * This is undefined on failure. 4115 298 */ 4116 RTDECL(int) RTLatin1CalcUtf16LenEx(const char *psz , size_t cch, size_t *pcwc);299 RTDECL(int) RTLatin1CalcUtf16LenEx(const char *pszLatin1, size_t cchLatin1, size_t *pcwc); 4117 300 4118 301 /** … … 4121 304 * 4122 305 * @returns iprt status code. 4123 * @param psz StringThe Latin-1 string to convert.306 * @param pszLatin1 The Latin-1 string to convert. 4124 307 * @param ppwszString Receives pointer to the allocated UTF-16 string. The 4125 308 * returned string must be freed using RTUtf16Free(). 4126 309 */ 4127 #define RTLatin1ToUtf16(psz String, ppwszString) RTLatin1ToUtf16Tag((pszString), (ppwszString), RTSTR_TAG)310 #define RTLatin1ToUtf16(pszLatin1, ppwszString) RTLatin1ToUtf16Tag((pszLatin1), (ppwszString), RTSTR_TAG) 4128 311 4129 312 /** … … 4132 315 * 4133 316 * @returns iprt status code. 4134 * @param psz StringThe Latin-1 string to convert.317 * @param pszLatin1 The Latin-1 string to convert. 4135 318 * @param ppwszString Receives pointer to the allocated UTF-16 string. The 4136 319 * returned string must be freed using RTUtf16Free(). 4137 320 * @param pszTag Allocation tag used for statistics and such. 4138 321 */ 4139 RTDECL(int) RTLatin1ToUtf16Tag(const char *psz String, PRTUTF16 *ppwszString, const char *pszTag);4140 4141 /** 4142 * Translates psz Stringfrom Latin-1 (ISO-8859-1) to UTF-16, allocating the322 RTDECL(int) RTLatin1ToUtf16Tag(const char *pszLatin1, PRTUTF16 *ppwszString, const char *pszTag); 323 324 /** 325 * Translates pszLatin1 from Latin-1 (ISO-8859-1) to UTF-16, allocating the 4143 326 * result buffer if requested (default tag). 4144 327 * 4145 328 * @returns iprt status code. 4146 * @param psz StringThe Latin-1 string to convert.4147 * @param cch String The maximum size in chars (the type) to convert.4148 * The conversion stops when it reaches cchString or4149 * the string terminator ('\\0').4150 * Use RTSTR_MAX totranslate the entire string.329 * @param pszLatin1 The Latin-1 string to convert. 330 * @param cchLatin1 The maximum size in chars (the type) to convert. The 331 * conversion stops when it reaches cchLatin1 or the 332 * string terminator ('\\0'). Use RTSTR_MAX to 333 * translate the entire string. 4151 334 * @param ppwsz If cwc is non-zero, this must either be pointing 4152 335 * to pointer to a buffer of the specified size, or … … 4166 349 * length that can be used to resize the buffer. 4167 350 */ 4168 #define RTLatin1ToUtf16Ex(psz String, cchString, ppwsz, cwc, pcwc) \4169 RTLatin1ToUtf16ExTag((psz String), (cchString), (ppwsz), (cwc), (pcwc), RTSTR_TAG)4170 4171 /** 4172 * Translates psz Stringfrom Latin-1 (ISO-8859-1) to UTF-16, allocating the351 #define RTLatin1ToUtf16Ex(pszLatin1, cchLatin1, ppwsz, cwc, pcwc) \ 352 RTLatin1ToUtf16ExTag((pszLatin1), (cchLatin1), (ppwsz), (cwc), (pcwc), RTSTR_TAG) 353 354 /** 355 * Translates pszLatin1 from Latin-1 (ISO-8859-1) to UTF-16, allocating the 4173 356 * result buffer if requested. 4174 357 * 4175 358 * @returns iprt status code. 4176 * @param psz StringThe Latin-1 string to convert.4177 * @param cch String The maximum size in chars (the type) to convert.4178 * The conversion stops when it reaches cchString or4179 * the string terminator ('\\0').4180 * Use RTSTR_MAX totranslate the entire string.359 * @param pszLatin1 The Latin-1 string to convert. 360 * @param cchLatin1 The maximum size in chars (the type) to convert. The 361 * conversion stops when it reaches cchLatin1 or the 362 * string terminator ('\\0'). Use RTSTR_MAX to 363 * translate the entire string. 4181 364 * @param ppwsz If cwc is non-zero, this must either be pointing 4182 365 * to pointer to a buffer of the specified size, or … … 4197 380 * @param pszTag Allocation tag used for statistics and such. 4198 381 */ 4199 RTDECL(int) RTLatin1ToUtf16ExTag(const char *psz String, size_t cchString,382 RTDECL(int) RTLatin1ToUtf16ExTag(const char *pszLatin1, size_t cchLatin1, 4200 383 PRTUTF16 *ppwsz, size_t cwc, size_t *pcwc, const char *pszTag); 4201 384 4202 385 /** @} */ 4203 386 4204 #ifndef ___iprt_nocrt_string_h 4205 # if defined(RT_OS_WINDOWS) 4206 RTDECL(void *) mempcpy(void *pvDst, const void *pvSrc, size_t cb); 4207 # endif 387 RT_C_DECLS_END 388 389 /** @} */ 390 4208 391 #endif 4209 392 4210 4211 RT_C_DECLS_END4212 4213 /** @} */4214 4215 #endif4216 -
trunk/include/iprt/string.h
r57927 r57941 90 90 #endif 91 91 92 /* For the time being: */ 93 #include <iprt/utf16.h> 94 #include <iprt/latin1.h> 95 92 96 /* 93 97 * Supply prototypes for standard string functions provided by … … 682 686 683 687 /** @name Flags for RTStrValidateEncodingEx and RTUtf16ValidateEncodingEx 688 * @{ 684 689 */ 685 690 /** Check that the string is zero terminated within the given size. … … 1012 1017 */ 1013 1018 RTDECL(int) RTStrToLatin1ExTag(const char *pszString, size_t cchString, char **ppsz, size_t cch, size_t *pcch, const char *pszTag); 1014 1015 1016 /**1017 * Translate a Latin1 string into a UTF-8 allocating the result buffer (default1018 * tag).1019 *1020 * @returns iprt status code.1021 * @param pszString Latin1 string to convert.1022 * @param ppszString Receives pointer of allocated UTF-8 string on1023 * success, and is always set to NULL on failure.1024 * The returned pointer must be freed using RTStrFree().1025 */1026 #define RTLatin1ToUtf8(pszString, ppszString) RTLatin1ToUtf8Tag((pszString), (ppszString), RTSTR_TAG)1027 1028 /**1029 * Translate a Latin-1 string into a UTF-8 allocating the result buffer.1030 *1031 * @returns iprt status code.1032 * @param pszString Latin-1 string to convert.1033 * @param ppszString Receives pointer of allocated UTF-8 string on1034 * success, and is always set to NULL on failure.1035 * The returned pointer must be freed using RTStrFree().1036 * @param pszTag Allocation tag used for statistics and such.1037 */1038 RTDECL(int) RTLatin1ToUtf8Tag(const char *pszString, char **ppszString, const char *pszTag);1039 1040 /**1041 * Translates Latin-1 to UTF-8 using buffer provided by the caller or a fittingly1042 * sized buffer allocated by the function (default tag).1043 *1044 * @returns iprt status code.1045 * @param pszString The Latin-1 string to convert.1046 * @param cchString The number of Latin-1 characters to translate from1047 * pszString. The translation will stop when reaching1048 * cchString or the terminator ('\\0'). Use RTSTR_MAX1049 * to translate the entire string.1050 * @param ppsz If cch is non-zero, this must either be pointing to1051 * a pointer to a buffer of the specified size, or1052 * pointer to a NULL pointer. If *ppsz is NULL or cch1053 * is zero a buffer of at least cch chars will be1054 * allocated to hold the translated string. If a1055 * buffer was requested it must be freed using1056 * RTStrFree().1057 * @param cch The buffer size in chars (the type). This includes the terminator.1058 * @param pcch Where to store the length of the translated string,1059 * excluding the terminator. (Optional)1060 *1061 * This may be set under some error conditions,1062 * however, only for VERR_BUFFER_OVERFLOW and1063 * VERR_NO_STR_MEMORY will it contain a valid string1064 * length that can be used to resize the buffer.1065 */1066 #define RTLatin1ToUtf8Ex(pszString, cchString, ppsz, cch, pcch) \1067 RTLatin1ToUtf8ExTag((pszString), (cchString), (ppsz), (cch), (pcch), RTSTR_TAG)1068 1069 /**1070 * Translates Latin1 to UTF-8 using buffer provided by the caller or a fittingly1071 * sized buffer allocated by the function (custom tag).1072 *1073 * @returns iprt status code.1074 * @param pszString The Latin1 string to convert.1075 * @param cchString The number of Latin1 characters to translate from1076 * pwszString. The translation will stop when1077 * reaching cchString or the terminator ('\\0'). Use1078 * RTSTR_MAX to translate the entire string.1079 * @param ppsz If cch is non-zero, this must either be pointing to1080 * a pointer to a buffer of the specified size, or1081 * pointer to a NULL pointer. If *ppsz is NULL or cch1082 * is zero a buffer of at least cch chars will be1083 * allocated to hold the translated string. If a1084 * buffer was requested it must be freed using1085 * RTStrFree().1086 * @param cch The buffer size in chars (the type). This includes1087 * the terminator.1088 * @param pcch Where to store the length of the translated string,1089 * excluding the terminator. (Optional)1090 *1091 * This may be set under some error conditions,1092 * however, only for VERR_BUFFER_OVERFLOW and1093 * VERR_NO_STR_MEMORY will it contain a valid string1094 * length that can be used to resize the buffer.1095 * @param pszTag Allocation tag used for statistics and such.1096 */1097 RTDECL(int) RTLatin1ToUtf8ExTag(const char *pszString, size_t cchString, char **ppsz, size_t cch, size_t *pcch, const char *pszTag);1098 1099 /**1100 * Calculates the length of the Latin-1 string in UTF-8 chars (bytes).1101 *1102 * The primary purpose of this function is to help allocate buffers for1103 * RTLatin1ToUtf8() of the correct size. For most other purposes1104 * RTLatin1ToUtf8Ex() should be used.1105 *1106 * @returns Number of chars (bytes).1107 * @returns 0 if the string was incorrectly encoded.1108 * @param psz The Latin-1 string.1109 */1110 RTDECL(size_t) RTLatin1CalcUtf8Len(const char *psz);1111 1112 /**1113 * Calculates the length of the Latin-1 string in UTF-8 chars (bytes).1114 *1115 * @returns iprt status code.1116 * @param psz The string.1117 * @param cch The max string length. Use RTSTR_MAX to process the entire string.1118 * @param pcch Where to store the string length (in bytes). Optional.1119 * This is undefined on failure.1120 */1121 RTDECL(int) RTLatin1CalcUtf8LenEx(const char *psz, size_t cch, size_t *pcch);1122 1019 1123 1020 /** … … 1340 1237 */ 1341 1238 RTDECL(char *) RTStrPrevCp(const char *pszStart, const char *psz); 1342 1343 /**1344 * Get the unicode code point at the given string position.1345 *1346 * @returns unicode code point.1347 * @returns RTUNICP_INVALID if the encoding is invalid.1348 * @param psz The string.1349 */1350 DECLINLINE(RTUNICP) RTLatin1GetCp(const char *psz)1351 {1352 return *(const unsigned char *)psz;1353 }1354 1355 /**1356 * Get the unicode code point at the given string position.1357 *1358 * @returns iprt status code.1359 * @param ppsz Pointer to the string pointer. This will be updated to1360 * point to the char following the current code point.1361 * This is advanced one character forward on failure.1362 * @param pCp Where to store the code point.1363 * RTUNICP_INVALID is stored here on failure.1364 *1365 * @remark We optimize this operation by using an inline function for1366 * the most frequent and simplest sequence, the rest is1367 * handled by RTStrGetCpExInternal().1368 */1369 DECLINLINE(int) RTLatin1GetCpEx(const char **ppsz, PRTUNICP pCp)1370 {1371 const unsigned char uch = **(const unsigned char **)ppsz;1372 (*ppsz)++;1373 *pCp = uch;1374 return VINF_SUCCESS;1375 }1376 1377 /**1378 * Get the unicode code point at the given string position for a string of a1379 * given maximum length.1380 *1381 * @returns iprt status code.1382 * @retval VERR_END_OF_STRING if *pcch is 0. *pCp is set to RTUNICP_INVALID.1383 *1384 * @param ppsz Pointer to the string pointer. This will be updated to1385 * point to the char following the current code point.1386 * @param pcch Pointer to the maximum string length. This will be1387 * decremented by the size of the code point found.1388 * @param pCp Where to store the code point.1389 * RTUNICP_INVALID is stored here on failure.1390 */1391 DECLINLINE(int) RTLatin1GetCpNEx(const char **ppsz, size_t *pcch, PRTUNICP pCp)1392 {1393 if (RT_LIKELY(*pcch != 0))1394 {1395 const unsigned char uch = **(const unsigned char **)ppsz;1396 (*ppsz)++;1397 (*pcch)--;1398 *pCp = uch;1399 return VINF_SUCCESS;1400 }1401 *pCp = RTUNICP_INVALID;1402 return VERR_END_OF_STRING;1403 }1404 1405 /**1406 * Get the Latin-1 size in characters of a given Unicode code point.1407 *1408 * The code point is expected to be a valid Unicode one, but not necessarily in1409 * the range supported by Latin-1.1410 *1411 * @returns the size in characters, or zero if there is no Latin-1 encoding1412 */1413 DECLINLINE(size_t) RTLatin1CpSize(RTUNICP CodePoint)1414 {1415 if (CodePoint < 0x100)1416 return 1;1417 return 0;1418 }1419 1420 /**1421 * Put the unicode code point at the given string position1422 * and return the pointer to the char following it.1423 *1424 * This function will not consider anything at or following the1425 * buffer area pointed to by psz. It is therefore not suitable for1426 * inserting code points into a string, only appending/overwriting.1427 *1428 * @returns pointer to the char following the written code point.1429 * @param psz The string.1430 * @param CodePoint The code point to write.1431 * This should not be RTUNICP_INVALID or any other1432 * character out of the Latin-1 range.1433 */1434 DECLINLINE(char *) RTLatin1PutCp(char *psz, RTUNICP CodePoint)1435 {1436 AssertReturn(CodePoint < 0x100, NULL);1437 *psz++ = (unsigned char)CodePoint;1438 return psz;1439 }1440 1441 /**1442 * Skips ahead, past the current code point.1443 *1444 * @returns Pointer to the char after the current code point.1445 * @param psz Pointer to the current code point.1446 * @remark This will not move the next valid code point, only past the current one.1447 */1448 DECLINLINE(char *) RTLatin1NextCp(const char *psz)1449 {1450 psz++;1451 return (char *)psz;1452 }1453 1454 /**1455 * Skips back to the previous code point.1456 *1457 * @returns Pointer to the char before the current code point.1458 * @returns pszStart on failure.1459 * @param pszStart Pointer to the start of the string.1460 * @param psz Pointer to the current code point.1461 */1462 DECLINLINE(char *) RTLatin1PrevCp(const char *pszStart, const char *psz)1463 {1464 if ((uintptr_t)psz > (uintptr_t)pszStart)1465 {1466 psz--;1467 return (char *)psz;1468 }1469 return (char *)pszStart;1470 }1471 1239 1472 1240 … … 1668 1436 #endif 1669 1437 1670 /** Format flag.1438 /** @name Format flag. 1671 1439 * These are used by RTStrFormat extensions and RTStrFormatNumber, mind 1672 1440 * that not all flags makes sense to both of the functions. … … 3246 3014 /** @} */ 3247 3015 3248 3249 /** @defgroup rt_str_utf16 UTF-16 String Manipulation3250 * @{3251 */3252 3253 /**3254 * Allocates memory for UTF-16 string storage (default tag).3255 *3256 * You should normally not use this function, except if there is some very3257 * custom string handling you need doing that isn't covered by any of the other3258 * APIs.3259 *3260 * @returns Pointer to the allocated UTF-16 string. The first wide char is3261 * always set to the string terminator char, the contents of the3262 * remainder of the memory is undefined. The string must be freed by3263 * calling RTUtf16Free.3264 *3265 * NULL is returned if the allocation failed. Please translate this to3266 * VERR_NO_UTF16_MEMORY and not VERR_NO_MEMORY. Also consider3267 * RTUtf16AllocEx if an IPRT status code is required.3268 *3269 * @param cb How many bytes to allocate, will be rounded up3270 * to a multiple of two. If this is zero, we will3271 * allocate a terminator wide char anyway.3272 */3273 #define RTUtf16Alloc(cb) RTUtf16AllocTag((cb), RTSTR_TAG)3274 3275 /**3276 * Allocates memory for UTF-16 string storage (custom tag).3277 *3278 * You should normally not use this function, except if there is some very3279 * custom string handling you need doing that isn't covered by any of the other3280 * APIs.3281 *3282 * @returns Pointer to the allocated UTF-16 string. The first wide char is3283 * always set to the string terminator char, the contents of the3284 * remainder of the memory is undefined. The string must be freed by3285 * calling RTUtf16Free.3286 *3287 * NULL is returned if the allocation failed. Please translate this to3288 * VERR_NO_UTF16_MEMORY and not VERR_NO_MEMORY. Also consider3289 * RTUtf16AllocExTag if an IPRT status code is required.3290 *3291 * @param cb How many bytes to allocate, will be rounded up3292 * to a multiple of two. If this is zero, we will3293 * allocate a terminator wide char anyway.3294 * @param pszTag Allocation tag used for statistics and such.3295 */3296 RTDECL(PRTUTF16) RTUtf16AllocTag(size_t cb, const char *pszTag);3297 3298 3299 /**3300 * Free a UTF-16 string allocated by RTStrToUtf16(), RTStrToUtf16Ex(),3301 * RTLatin1ToUtf16(), RTLatin1ToUtf16Ex(), RTUtf16Dup() or RTUtf16DupEx().3302 *3303 * @returns iprt status code.3304 * @param pwszString The UTF-16 string to free. NULL is accepted.3305 */3306 RTDECL(void) RTUtf16Free(PRTUTF16 pwszString);3307 3308 /**3309 * Allocates a new copy of the specified UTF-16 string (default tag).3310 *3311 * @returns Pointer to the allocated string copy. Use RTUtf16Free() to free it.3312 * @returns NULL when out of memory.3313 * @param pwszString UTF-16 string to duplicate.3314 * @remark This function will not make any attempt to validate the encoding.3315 */3316 #define RTUtf16Dup(pwszString) RTUtf16DupTag((pwszString), RTSTR_TAG)3317 3318 /**3319 * Allocates a new copy of the specified UTF-16 string (custom tag).3320 *3321 * @returns Pointer to the allocated string copy. Use RTUtf16Free() to free it.3322 * @returns NULL when out of memory.3323 * @param pwszString UTF-16 string to duplicate.3324 * @param pszTag Allocation tag used for statistics and such.3325 * @remark This function will not make any attempt to validate the encoding.3326 */3327 RTDECL(PRTUTF16) RTUtf16DupTag(PCRTUTF16 pwszString, const char *pszTag);3328 3329 /**3330 * Allocates a new copy of the specified UTF-16 string (default tag).3331 *3332 * @returns iprt status code.3333 * @param ppwszString Receives pointer of the allocated UTF-16 string.3334 * The returned pointer must be freed using RTUtf16Free().3335 * @param pwszString UTF-16 string to duplicate.3336 * @param cwcExtra Number of extra RTUTF16 items to allocate.3337 * @remark This function will not make any attempt to validate the encoding.3338 */3339 #define RTUtf16DupEx(ppwszString, pwszString, cwcExtra) \3340 RTUtf16DupExTag((ppwszString), (pwszString), (cwcExtra), RTSTR_TAG)3341 3342 /**3343 * Allocates a new copy of the specified UTF-16 string (custom tag).3344 *3345 * @returns iprt status code.3346 * @param ppwszString Receives pointer of the allocated UTF-16 string.3347 * The returned pointer must be freed using RTUtf16Free().3348 * @param pwszString UTF-16 string to duplicate.3349 * @param cwcExtra Number of extra RTUTF16 items to allocate.3350 * @param pszTag Allocation tag used for statistics and such.3351 * @remark This function will not make any attempt to validate the encoding.3352 */3353 RTDECL(int) RTUtf16DupExTag(PRTUTF16 *ppwszString, PCRTUTF16 pwszString, size_t cwcExtra, const char *pszTag);3354 3355 /**3356 * Returns the length of a UTF-16 string in UTF-16 characters3357 * without trailing '\\0'.3358 *3359 * Surrogate pairs counts as two UTF-16 characters here. Use RTUtf16CpCnt()3360 * to get the exact number of code points in the string.3361 *3362 * @returns The number of RTUTF16 items in the string.3363 * @param pwszString Pointer the UTF-16 string.3364 * @remark This function will not make any attempt to validate the encoding.3365 */3366 RTDECL(size_t) RTUtf16Len(PCRTUTF16 pwszString);3367 3368 /**3369 * Find the length of a zero-terminated byte string, given a max string length.3370 *3371 * @returns The string length or cbMax. The returned length does not include3372 * the zero terminator if it was found.3373 *3374 * @param pwszString The string.3375 * @param cwcMax The max string length in RTUTF16s.3376 * @sa RTUtf16NLenEx, RTStrNLen.3377 */3378 RTDECL(size_t) RTUtf16NLen(PCRTUTF16 pwszString, size_t cwcMax);3379 3380 /**3381 * Find the length of a zero-terminated byte string, given3382 * a max string length.3383 *3384 * @returns IPRT status code.3385 * @retval VINF_SUCCESS if the string has a length less than cchMax.3386 * @retval VERR_BUFFER_OVERFLOW if the end of the string wasn't found3387 * before cwcMax was reached.3388 *3389 * @param pwszString The string.3390 * @param cwcMax The max string length in RTUTF16s.3391 * @param pcwc Where to store the string length excluding the3392 * terminator. This is set to cwcMax if the terminator3393 * isn't found.3394 * @sa RTUtf16NLen, RTStrNLenEx.3395 */3396 RTDECL(int) RTUtf16NLenEx(PCRTUTF16 pwszString, size_t cwcMax, size_t *pcwc);3397 3398 /**3399 * Find the zero terminator in a string with a limited length.3400 *3401 * @returns Pointer to the zero terminator.3402 * @returns NULL if the zero terminator was not found.3403 *3404 * @param pwszString The string.3405 * @param cwcMax The max string length. RTSTR_MAX is fine.3406 */3407 RTDECL(PCRTUTF16) RTUtf16End(PCRTUTF16 pwszString, size_t cwcMax);3408 3409 /**3410 * Strips blankspaces from both ends of the string.3411 *3412 * @returns Pointer to first non-blank char in the string.3413 * @param pwsz The string to strip.3414 */3415 RTDECL(PRTUTF16) RTUtf16Strip(PRTUTF16 pwsz);3416 3417 /**3418 * Strips blankspaces from the start of the string.3419 *3420 * @returns Pointer to first non-blank char in the string.3421 * @param pwsz The string to strip.3422 */3423 RTDECL(PRTUTF16) RTUtf16StripL(PCRTUTF16 pwsz);3424 3425 /**3426 * Strips blankspaces from the end of the string.3427 *3428 * @returns pwsz.3429 * @param pwsz The string to strip.3430 */3431 RTDECL(PRTUTF16) RTUtf16StripR(PRTUTF16 pwsz);3432 3433 /**3434 * String copy with overflow handling.3435 *3436 * @retval VINF_SUCCESS on success.3437 * @retval VERR_BUFFER_OVERFLOW if the destination buffer is too small. The3438 * buffer will contain as much of the string as it can hold, fully3439 * terminated.3440 *3441 * @param pwszDst The destination buffer.3442 * @param cwcDst The size of the destination buffer in RTUTF16s.3443 * @param pwszSrc The source string. NULL is not OK.3444 */3445 RTDECL(int) RTUtf16Copy(PRTUTF16 pwszDst, size_t cwcDst, PCRTUTF16 pwszSrc);3446 3447 /**3448 * String copy with overflow handling, ASCII source.3449 *3450 * @retval VINF_SUCCESS on success.3451 * @retval VERR_BUFFER_OVERFLOW if the destination buffer is too small. The3452 * buffer will contain as much of the string as it can hold, fully3453 * terminated.3454 *3455 * @param pwszDst The destination buffer.3456 * @param cwcDst The size of the destination buffer in RTUTF16s.3457 * @param pszSrc The source string, pure ASCII. NULL is not OK.3458 */3459 RTDECL(int) RTUtf16CopyAscii(PRTUTF16 pwszDst, size_t cwcDst, const char *pszSrc);3460 3461 /**3462 * String copy with overflow handling.3463 *3464 * @retval VINF_SUCCESS on success.3465 * @retval VERR_BUFFER_OVERFLOW if the destination buffer is too small. The3466 * buffer will contain as much of the string as it can hold, fully3467 * terminated.3468 *3469 * @param pwszDst The destination buffer.3470 * @param cwcDst The size of the destination buffer in RTUTF16s.3471 * @param pwszSrc The source string. NULL is not OK.3472 * @param cwcSrcMax The maximum number of chars (not code points) to3473 * copy from the source string, not counting the3474 * terminator as usual.3475 */3476 RTDECL(int) RTUtf16CopyEx(PRTUTF16 pwszDst, size_t cwcDst, PCRTUTF16 pwszSrc, size_t cwcSrcMax);3477 3478 /**3479 * String concatenation with overflow handling.3480 *3481 * @retval VINF_SUCCESS on success.3482 * @retval VERR_BUFFER_OVERFLOW if the destination buffer is too small. The3483 * buffer will contain as much of the string as it can hold, fully3484 * terminated.3485 *3486 * @param pwszDst The destination buffer.3487 * @param cwcDst The size of the destination buffer in RTUTF16s.3488 * @param pwszSrc The source string. NULL is not OK.3489 */3490 RTDECL(int) RTUtf16Cat(PRTUTF16 pwszDst, size_t cwcDst, PCRTUTF16 pwszSrc);3491 3492 /**3493 * String concatenation with overflow handling, ASCII source.3494 *3495 * @retval VINF_SUCCESS on success.3496 * @retval VERR_BUFFER_OVERFLOW if the destination buffer is too small. The3497 * buffer will contain as much of the string as it can hold, fully3498 * terminated.3499 *3500 * @param pwszDst The destination buffer.3501 * @param cwcDst The size of the destination buffer in RTUTF16s.3502 * @param pszSrc The source string, pure ASCII. NULL is not OK.3503 */3504 RTDECL(int) RTUtf16CatAscii(PRTUTF16 pwszDst, size_t cwcDst, const char *pszSrc);3505 3506 /**3507 * String concatenation with overflow handling.3508 *3509 * @retval VINF_SUCCESS on success.3510 * @retval VERR_BUFFER_OVERFLOW if the destination buffer is too small. The3511 * buffer will contain as much of the string as it can hold, fully3512 * terminated.3513 *3514 * @param pwszDst The destination buffer.3515 * @param cwcDst The size of the destination buffer in RTUTF16s.3516 * @param pwszSrc The source string. NULL is not OK.3517 * @param cwcSrcMax The maximum number of UTF-16 chars (not code3518 * points) to copy from the source string, not3519 * counting the terminator as usual.3520 */3521 RTDECL(int) RTUtf16CatEx(PRTUTF16 pwszDst, size_t cwcDst, PCRTUTF16 pwszSrc, size_t cwcSrcMax);3522 3523 /**3524 * Performs a case sensitive string compare between two UTF-16 strings.3525 *3526 * @returns < 0 if the first string less than the second string.s3527 * @returns 0 if the first string identical to the second string.3528 * @returns > 0 if the first string greater than the second string.3529 * @param pwsz1 First UTF-16 string. Null is allowed.3530 * @param pwsz2 Second UTF-16 string. Null is allowed.3531 * @remark This function will not make any attempt to validate the encoding.3532 */3533 RTDECL(int) RTUtf16Cmp(PCRTUTF16 pwsz1, PCRTUTF16 pwsz2);3534 3535 /**3536 * Performs a case sensitive string compare between an UTF-16 string and a pure3537 * ASCII string.3538 *3539 * @returns < 0 if the first string less than the second string.s3540 * @returns 0 if the first string identical to the second string.3541 * @returns > 0 if the first string greater than the second string.3542 * @param pwsz1 First UTF-16 string. Null is allowed.3543 * @param psz2 Second string, pure ASCII. Null is allowed.3544 * @remark This function will not make any attempt to validate the encoding.3545 */3546 RTDECL(int) RTUtf16CmpAscii(PCRTUTF16 pwsz1, const char *psz2);3547 3548 /**3549 * Performs a case insensitive string compare between two UTF-16 strings.3550 *3551 * This is a simplified compare, as only the simplified lower/upper case folding3552 * specified by the unicode specs are used. It does not consider character pairs3553 * as they are used in some languages, just simple upper & lower case compares.3554 *3555 * @returns < 0 if the first string less than the second string.3556 * @returns 0 if the first string identical to the second string.3557 * @returns > 0 if the first string greater than the second string.3558 * @param pwsz1 First UTF-16 string. Null is allowed.3559 * @param pwsz2 Second UTF-16 string. Null is allowed.3560 */3561 RTDECL(int) RTUtf16ICmp(PCRTUTF16 pwsz1, PCRTUTF16 pwsz2);3562 3563 /**3564 * Performs a case insensitive string compare between an UTF-16 string and an3565 * pure ASCII string.3566 *3567 * Since this compare only takes cares about the first 128 codepoints in3568 * unicode, no tables are needed and there aren't any real complications.3569 *3570 * @returns < 0 if the first string less than the second string.3571 * @returns 0 if the first string identical to the second string.3572 * @returns > 0 if the first string greater than the second string.3573 * @param pwsz1 First UTF-16 string. Null is allowed.3574 * @param psz2 Second string, pure ASCII. Null is allowed.3575 */3576 RTDECL(int) RTUtf16ICmpAscii(PCRTUTF16 pwsz1, const char *psz2);3577 3578 /**3579 * Performs a case insensitive string compare between two UTF-16 strings3580 * using the current locale of the process (if applicable).3581 *3582 * This differs from RTUtf16ICmp() in that it will try, if a locale with the3583 * required data is available, to do a correct case-insensitive compare. It3584 * follows that it is more complex and thereby likely to be more expensive.3585 *3586 * @returns < 0 if the first string less than the second string.3587 * @returns 0 if the first string identical to the second string.3588 * @returns > 0 if the first string greater than the second string.3589 * @param pwsz1 First UTF-16 string. Null is allowed.3590 * @param pwsz2 Second UTF-16 string. Null is allowed.3591 */3592 RTDECL(int) RTUtf16LocaleICmp(PCRTUTF16 pwsz1, PCRTUTF16 pwsz2);3593 3594 /**3595 * Folds a UTF-16 string to lowercase.3596 *3597 * This is a very simple folding; is uses the simple lowercase3598 * code point, it is not related to any locale just the most common3599 * lowercase codepoint setup by the unicode specs, and it will not3600 * create new surrogate pairs or remove existing ones.3601 *3602 * @returns Pointer to the passed in string.3603 * @param pwsz The string to fold.3604 */3605 RTDECL(PRTUTF16) RTUtf16ToLower(PRTUTF16 pwsz);3606 3607 /**3608 * Folds a UTF-16 string to uppercase.3609 *3610 * This is a very simple folding; is uses the simple uppercase3611 * code point, it is not related to any locale just the most common3612 * uppercase codepoint setup by the unicode specs, and it will not3613 * create new surrogate pairs or remove existing ones.3614 *3615 * @returns Pointer to the passed in string.3616 * @param pwsz The string to fold.3617 */3618 RTDECL(PRTUTF16) RTUtf16ToUpper(PRTUTF16 pwsz);3619 3620 /**3621 * Validates the UTF-16 encoding of the string.3622 *3623 * @returns iprt status code.3624 * @param pwsz The string.3625 */3626 RTDECL(int) RTUtf16ValidateEncoding(PCRTUTF16 pwsz);3627 3628 /**3629 * Validates the UTF-16 encoding of the string.3630 *3631 * @returns iprt status code.3632 * @param pwsz The string.3633 * @param cwc The max string length (/ size) in UTF-16 units. Use3634 * RTSTR_MAX to process the entire string.3635 * @param fFlags Combination of RTSTR_VALIDATE_ENCODING_XXX flags.3636 */3637 RTDECL(int) RTUtf16ValidateEncodingEx(PCRTUTF16 pwsz, size_t cwc, uint32_t fFlags);3638 3639 /**3640 * Checks if the UTF-16 encoding is valid.3641 *3642 * @returns true / false.3643 * @param pwsz The string.3644 */3645 RTDECL(bool) RTUtf16IsValidEncoding(PCRTUTF16 pwsz);3646 3647 /**3648 * Sanitise a (valid) UTF-16 string by replacing all characters outside a white3649 * list in-place by an ASCII replacement character. Multi-byte characters will3650 * be replaced byte by byte.3651 *3652 * @returns The number of code points replaced, or a negative value if the3653 * string is not correctly encoded. In this last case the string3654 * may be partially processed.3655 * @param pwsz The string to sanitise.3656 * @param puszValidSet A zero-terminated array of pairs of Unicode points.3657 * Each pair is the start and end point of a range,3658 * and the union of these ranges forms the white list.3659 * @param chReplacement The ASCII replacement character.3660 */3661 RTDECL(ssize_t) RTUtf16PurgeComplementSet(PRTUTF16 pwsz, PCRTUNICP puszValidSet, char chReplacement);3662 3663 /**3664 * Translate a UTF-16 string into a UTF-8 allocating the result buffer (default3665 * tag).3666 *3667 * @returns iprt status code.3668 * @param pwszString UTF-16 string to convert.3669 * @param ppszString Receives pointer of allocated UTF-8 string on3670 * success, and is always set to NULL on failure.3671 * The returned pointer must be freed using RTStrFree().3672 */3673 #define RTUtf16ToUtf8(pwszString, ppszString) RTUtf16ToUtf8Tag((pwszString), (ppszString), RTSTR_TAG)3674 3675 /**3676 * Translate a UTF-16 string into a UTF-8 allocating the result buffer.3677 *3678 * @returns iprt status code.3679 * @param pwszString UTF-16 string to convert.3680 * @param ppszString Receives pointer of allocated UTF-8 string on3681 * success, and is always set to NULL on failure.3682 * The returned pointer must be freed using RTStrFree().3683 * @param pszTag Allocation tag used for statistics and such.3684 */3685 RTDECL(int) RTUtf16ToUtf8Tag(PCRTUTF16 pwszString, char **ppszString, const char *pszTag);3686 3687 /**3688 * Translates UTF-16 to UTF-8 using buffer provided by the caller or a fittingly3689 * sized buffer allocated by the function (default tag).3690 *3691 * @returns iprt status code.3692 * @param pwszString The UTF-16 string to convert.3693 * @param cwcString The number of RTUTF16 items to translate from pwszString.3694 * The translation will stop when reaching cwcString or the terminator ('\\0').3695 * Use RTSTR_MAX to translate the entire string.3696 * @param ppsz If cch is non-zero, this must either be pointing to a pointer to3697 * a buffer of the specified size, or pointer to a NULL pointer.3698 * If *ppsz is NULL or cch is zero a buffer of at least cch chars3699 * will be allocated to hold the translated string.3700 * If a buffer was requested it must be freed using RTStrFree().3701 * @param cch The buffer size in chars (the type). This includes the terminator.3702 * @param pcch Where to store the length of the translated string,3703 * excluding the terminator. (Optional)3704 *3705 * This may be set under some error conditions,3706 * however, only for VERR_BUFFER_OVERFLOW and3707 * VERR_NO_STR_MEMORY will it contain a valid string3708 * length that can be used to resize the buffer.3709 */3710 #define RTUtf16ToUtf8Ex(pwszString, cwcString, ppsz, cch, pcch) \3711 RTUtf16ToUtf8ExTag((pwszString), (cwcString), (ppsz), (cch), (pcch), RTSTR_TAG)3712 3713 /**3714 * Translates UTF-16 to UTF-8 using buffer provided by the caller or a fittingly3715 * sized buffer allocated by the function (custom tag).3716 *3717 * @returns iprt status code.3718 * @param pwszString The UTF-16 string to convert.3719 * @param cwcString The number of RTUTF16 items to translate from pwszString.3720 * The translation will stop when reaching cwcString or the terminator ('\\0').3721 * Use RTSTR_MAX to translate the entire string.3722 * @param ppsz If cch is non-zero, this must either be pointing to a pointer to3723 * a buffer of the specified size, or pointer to a NULL pointer.3724 * If *ppsz is NULL or cch is zero a buffer of at least cch chars3725 * will be allocated to hold the translated string.3726 * If a buffer was requested it must be freed using RTStrFree().3727 * @param cch The buffer size in chars (the type). This includes the terminator.3728 * @param pcch Where to store the length of the translated string,3729 * excluding the terminator. (Optional)3730 *3731 * This may be set under some error conditions,3732 * however, only for VERR_BUFFER_OVERFLOW and3733 * VERR_NO_STR_MEMORY will it contain a valid string3734 * length that can be used to resize the buffer.3735 * @param pszTag Allocation tag used for statistics and such.3736 */3737 RTDECL(int) RTUtf16ToUtf8ExTag(PCRTUTF16 pwszString, size_t cwcString, char **ppsz, size_t cch, size_t *pcch, const char *pszTag);3738 3739 /**3740 * Calculates the length of the UTF-16 string in UTF-8 chars (bytes).3741 *3742 * This function will validate the string, and incorrectly encoded UTF-163743 * strings will be rejected. The primary purpose of this function is to3744 * help allocate buffers for RTUtf16ToUtf8() of the correct size. For most3745 * other purposes RTUtf16ToUtf8Ex() should be used.3746 *3747 * @returns Number of char (bytes).3748 * @returns 0 if the string was incorrectly encoded.3749 * @param pwsz The UTF-16 string.3750 */3751 RTDECL(size_t) RTUtf16CalcUtf8Len(PCRTUTF16 pwsz);3752 3753 /**3754 * Calculates the length of the UTF-16 string in UTF-8 chars (bytes).3755 *3756 * This function will validate the string, and incorrectly encoded UTF-163757 * strings will be rejected.3758 *3759 * @returns iprt status code.3760 * @param pwsz The string.3761 * @param cwc The max string length. Use RTSTR_MAX to process the entire string.3762 * @param pcch Where to store the string length (in bytes). Optional.3763 * This is undefined on failure.3764 */3765 RTDECL(int) RTUtf16CalcUtf8LenEx(PCRTUTF16 pwsz, size_t cwc, size_t *pcch);3766 3767 /**3768 * Translate a UTF-16 string into a Latin-1 (ISO-8859-1) allocating the result3769 * buffer (default tag).3770 *3771 * @returns iprt status code.3772 * @param pwszString UTF-16 string to convert.3773 * @param ppszString Receives pointer of allocated Latin1 string on3774 * success, and is always set to NULL on failure.3775 * The returned pointer must be freed using RTStrFree().3776 */3777 #define RTUtf16ToLatin1(pwszString, ppszString) RTUtf16ToLatin1Tag((pwszString), (ppszString), RTSTR_TAG)3778 3779 /**3780 * Translate a UTF-16 string into a Latin-1 (ISO-8859-1) allocating the result3781 * buffer (custom tag).3782 *3783 * @returns iprt status code.3784 * @param pwszString UTF-16 string to convert.3785 * @param ppszString Receives pointer of allocated Latin1 string on3786 * success, and is always set to NULL on failure.3787 * The returned pointer must be freed using RTStrFree().3788 * @param pszTag Allocation tag used for statistics and such.3789 */3790 RTDECL(int) RTUtf16ToLatin1Tag(PCRTUTF16 pwszString, char **ppszString, const char *pszTag);3791 3792 /**3793 * Translates UTF-16 to Latin-1 (ISO-8859-1) using buffer provided by the caller3794 * or a fittingly sized buffer allocated by the function (default tag).3795 *3796 * @returns iprt status code.3797 * @param pwszString The UTF-16 string to convert.3798 * @param cwcString The number of RTUTF16 items to translate from3799 * pwszString. The translation will stop when reaching3800 * cwcString or the terminator ('\\0'). Use RTSTR_MAX3801 * to translate the entire string.3802 * @param ppsz Pointer to the pointer to the Latin-1 string. The3803 * buffer can optionally be preallocated by the caller.3804 *3805 * If cch is zero, *ppsz is undefined.3806 *3807 * If cch is non-zero and *ppsz is not NULL, then this3808 * will be used as the output buffer.3809 * VERR_BUFFER_OVERFLOW will be returned if this is3810 * insufficient.3811 *3812 * If cch is zero or *ppsz is NULL, then a buffer of3813 * sufficient size is allocated. cch can be used to3814 * specify a minimum size of this buffer. Use3815 * RTUtf16Free() to free the result.3816 *3817 * @param cch The buffer size in chars (the type). This includes3818 * the terminator.3819 * @param pcch Where to store the length of the translated string,3820 * excluding the terminator. (Optional)3821 *3822 * This may be set under some error conditions,3823 * however, only for VERR_BUFFER_OVERFLOW and3824 * VERR_NO_STR_MEMORY will it contain a valid string3825 * length that can be used to resize the buffer.3826 */3827 #define RTUtf16ToLatin1Ex(pwszString, cwcString, ppsz, cch, pcch) \3828 RTUtf16ToLatin1ExTag((pwszString), (cwcString), (ppsz), (cch), (pcch), RTSTR_TAG)3829 3830 /**3831 * Translates UTF-16 to Latin-1 (ISO-8859-1) using buffer provided by the caller3832 * or a fittingly sized buffer allocated by the function (custom tag).3833 *3834 * @returns iprt status code.3835 * @param pwszString The UTF-16 string to convert.3836 * @param cwcString The number of RTUTF16 items to translate from3837 * pwszString. The translation will stop when reaching3838 * cwcString or the terminator ('\\0'). Use RTSTR_MAX3839 * to translate the entire string.3840 * @param ppsz Pointer to the pointer to the Latin-1 string. The3841 * buffer can optionally be preallocated by the caller.3842 *3843 * If cch is zero, *ppsz is undefined.3844 *3845 * If cch is non-zero and *ppsz is not NULL, then this3846 * will be used as the output buffer.3847 * VERR_BUFFER_OVERFLOW will be returned if this is3848 * insufficient.3849 *3850 * If cch is zero or *ppsz is NULL, then a buffer of3851 * sufficient size is allocated. cch can be used to3852 * specify a minimum size of this buffer. Use3853 * RTUtf16Free() to free the result.3854 *3855 * @param cch The buffer size in chars (the type). This includes3856 * the terminator.3857 * @param pcch Where to store the length of the translated string,3858 * excluding the terminator. (Optional)3859 *3860 * This may be set under some error conditions,3861 * however, only for VERR_BUFFER_OVERFLOW and3862 * VERR_NO_STR_MEMORY will it contain a valid string3863 * length that can be used to resize the buffer.3864 * @param pszTag Allocation tag used for statistics and such.3865 */3866 RTDECL(int) RTUtf16ToLatin1ExTag(PCRTUTF16 pwszString, size_t cwcString, char **ppsz, size_t cch, size_t *pcch, const char *pszTag);3867 3868 /**3869 * Calculates the length of the UTF-16 string in Latin-1 (ISO-8859-1) chars.3870 *3871 * This function will validate the string, and incorrectly encoded UTF-163872 * strings will be rejected. The primary purpose of this function is to3873 * help allocate buffers for RTUtf16ToLatin1() of the correct size. For most3874 * other purposes RTUtf16ToLatin1Ex() should be used.3875 *3876 * @returns Number of char (bytes).3877 * @returns 0 if the string was incorrectly encoded.3878 * @param pwsz The UTF-16 string.3879 */3880 RTDECL(size_t) RTUtf16CalcLatin1Len(PCRTUTF16 pwsz);3881 3882 /**3883 * Calculates the length of the UTF-16 string in Latin-1 (ISO-8859-1) chars.3884 *3885 * This function will validate the string, and incorrectly encoded UTF-163886 * strings will be rejected.3887 *3888 * @returns iprt status code.3889 * @param pwsz The string.3890 * @param cwc The max string length. Use RTSTR_MAX to process the3891 * entire string.3892 * @param pcch Where to store the string length (in bytes). Optional.3893 * This is undefined on failure.3894 */3895 RTDECL(int) RTUtf16CalcLatin1LenEx(PCRTUTF16 pwsz, size_t cwc, size_t *pcch);3896 3897 /**3898 * Get the unicode code point at the given string position.3899 *3900 * @returns unicode code point.3901 * @returns RTUNICP_INVALID if the encoding is invalid.3902 * @param pwsz The string.3903 *3904 * @remark This is an internal worker for RTUtf16GetCp().3905 */3906 RTDECL(RTUNICP) RTUtf16GetCpInternal(PCRTUTF16 pwsz);3907 3908 /**3909 * Get the unicode code point at the given string position.3910 *3911 * @returns iprt status code.3912 * @param ppwsz Pointer to the string pointer. This will be updated to3913 * point to the char following the current code point.3914 * @param pCp Where to store the code point.3915 * RTUNICP_INVALID is stored here on failure.3916 *3917 * @remark This is an internal worker for RTUtf16GetCpEx().3918 */3919 RTDECL(int) RTUtf16GetCpExInternal(PCRTUTF16 *ppwsz, PRTUNICP pCp);3920 3921 /**3922 * Put the unicode code point at the given string position3923 * and return the pointer to the char following it.3924 *3925 * This function will not consider anything at or following the3926 * buffer area pointed to by pwsz. It is therefore not suitable for3927 * inserting code points into a string, only appending/overwriting.3928 *3929 * @returns pointer to the char following the written code point.3930 * @param pwsz The string.3931 * @param CodePoint The code point to write.3932 * This should not be RTUNICP_INVALID or any other3933 * character out of the UTF-16 range.3934 *3935 * @remark This is an internal worker for RTUtf16GetCpEx().3936 */3937 RTDECL(PRTUTF16) RTUtf16PutCpInternal(PRTUTF16 pwsz, RTUNICP CodePoint);3938 3939 /**3940 * Get the unicode code point at the given string position.3941 *3942 * @returns unicode code point.3943 * @returns RTUNICP_INVALID if the encoding is invalid.3944 * @param pwsz The string.3945 *3946 * @remark We optimize this operation by using an inline function for3947 * everything which isn't a surrogate pair or an endian indicator.3948 */3949 DECLINLINE(RTUNICP) RTUtf16GetCp(PCRTUTF16 pwsz)3950 {3951 const RTUTF16 wc = *pwsz;3952 if (wc < 0xd800 || (wc > 0xdfff && wc < 0xfffe))3953 return wc;3954 return RTUtf16GetCpInternal(pwsz);3955 }3956 3957 /**3958 * Get the unicode code point at the given string position.3959 *3960 * @returns iprt status code.3961 * @param ppwsz Pointer to the string pointer. This will be updated to3962 * point to the char following the current code point.3963 * @param pCp Where to store the code point.3964 * RTUNICP_INVALID is stored here on failure.3965 *3966 * @remark We optimize this operation by using an inline function for3967 * everything which isn't a surrogate pair or and endian indicator.3968 */3969 DECLINLINE(int) RTUtf16GetCpEx(PCRTUTF16 *ppwsz, PRTUNICP pCp)3970 {3971 const RTUTF16 wc = **ppwsz;3972 if (wc < 0xd800 || (wc > 0xdfff && wc < 0xfffe))3973 {3974 (*ppwsz)++;3975 *pCp = wc;3976 return VINF_SUCCESS;3977 }3978 return RTUtf16GetCpExInternal(ppwsz, pCp);3979 }3980 3981 /**3982 * Put the unicode code point at the given string position3983 * and return the pointer to the char following it.3984 *3985 * This function will not consider anything at or following the3986 * buffer area pointed to by pwsz. It is therefore not suitable for3987 * inserting code points into a string, only appending/overwriting.3988 *3989 * @returns pointer to the char following the written code point.3990 * @param pwsz The string.3991 * @param CodePoint The code point to write.3992 * This should not be RTUNICP_INVALID or any other3993 * character out of the UTF-16 range.3994 *3995 * @remark We optimize this operation by using an inline function for3996 * everything which isn't a surrogate pair or and endian indicator.3997 */3998 DECLINLINE(PRTUTF16) RTUtf16PutCp(PRTUTF16 pwsz, RTUNICP CodePoint)3999 {4000 if (CodePoint < 0xd800 || (CodePoint > 0xd800 && CodePoint < 0xfffe))4001 {4002 *pwsz++ = (RTUTF16)CodePoint;4003 return pwsz;4004 }4005 return RTUtf16PutCpInternal(pwsz, CodePoint);4006 }4007 4008 /**4009 * Skips ahead, past the current code point.4010 *4011 * @returns Pointer to the char after the current code point.4012 * @param pwsz Pointer to the current code point.4013 * @remark This will not move the next valid code point, only past the current one.4014 */4015 DECLINLINE(PRTUTF16) RTUtf16NextCp(PCRTUTF16 pwsz)4016 {4017 RTUNICP Cp;4018 RTUtf16GetCpEx(&pwsz, &Cp);4019 return (PRTUTF16)pwsz;4020 }4021 4022 /**4023 * Skips backwards, to the previous code point.4024 *4025 * @returns Pointer to the char after the current code point.4026 * @param pwszStart Pointer to the start of the string.4027 * @param pwsz Pointer to the current code point.4028 */4029 RTDECL(PRTUTF16) RTUtf16PrevCp(PCRTUTF16 pwszStart, PCRTUTF16 pwsz);4030 4031 4032 /**4033 * Checks if the UTF-16 char is the high surrogate char (i.e.4034 * the 1st char in the pair).4035 *4036 * @returns true if it is.4037 * @returns false if it isn't.4038 * @param wc The character to investigate.4039 */4040 DECLINLINE(bool) RTUtf16IsHighSurrogate(RTUTF16 wc)4041 {4042 return wc >= 0xd800 && wc <= 0xdbff;4043 }4044 4045 /**4046 * Checks if the UTF-16 char is the low surrogate char (i.e.4047 * the 2nd char in the pair).4048 *4049 * @returns true if it is.4050 * @returns false if it isn't.4051 * @param wc The character to investigate.4052 */4053 DECLINLINE(bool) RTUtf16IsLowSurrogate(RTUTF16 wc)4054 {4055 return wc >= 0xdc00 && wc <= 0xdfff;4056 }4057 4058 4059 /**4060 * Checks if the two UTF-16 chars form a valid surrogate pair.4061 *4062 * @returns true if they do.4063 * @returns false if they doesn't.4064 * @param wcHigh The high (1st) character.4065 * @param wcLow The low (2nd) character.4066 */4067 DECLINLINE(bool) RTUtf16IsSurrogatePair(RTUTF16 wcHigh, RTUTF16 wcLow)4068 {4069 return RTUtf16IsHighSurrogate(wcHigh)4070 && RTUtf16IsLowSurrogate(wcLow);4071 }4072 4073 /**4074 * Formats a buffer stream as hex bytes.4075 *4076 * The default is no separating spaces or line breaks or anything.4077 *4078 * @returns IPRT status code.4079 * @retval VERR_INVALID_POINTER if any of the pointers are wrong.4080 * @retval VERR_BUFFER_OVERFLOW if the buffer is insufficent to hold the bytes.4081 *4082 * @param pwszBuf Output string buffer.4083 * @param cwcBuf The size of the output buffer in RTUTF16 units.4084 * @param pv Pointer to the bytes to stringify.4085 * @param cb The number of bytes to stringify.4086 * @param fFlags Combination of RTSTRPRINTHEXBYTES_F_XXX values.4087 * @sa RTStrPrintHexBytes.4088 */4089 RTDECL(int) RTUtf16PrintHexBytes(PRTUTF16 pwszBuf, size_t cwcBuf, void const *pv, size_t cb, uint32_t fFlags);4090 4091 3016 /** @} */ 4092 3017 4093 4094 /** @defgroup rt_str_latin1 Latin-1 (ISO-8859-1) String Manipulation 4095 * @{ 4096 */ 4097 4098 /** 4099 * Calculates the length of the Latin-1 (ISO-8859-1) string in RTUTF16 items. 4100 * 4101 * @returns Number of RTUTF16 items. 4102 * @param psz The Latin-1 string. 4103 */ 4104 RTDECL(size_t) RTLatin1CalcUtf16Len(const char *psz); 4105 4106 /** 4107 * Calculates the length of the Latin-1 (ISO-8859-1) string in RTUTF16 items. 4108 * 4109 * @returns iprt status code. 4110 * @param psz The Latin-1 string. 4111 * @param cch The max string length. Use RTSTR_MAX to process the 4112 * entire string. 4113 * @param pcwc Where to store the string length. Optional. 4114 * This is undefined on failure. 4115 */ 4116 RTDECL(int) RTLatin1CalcUtf16LenEx(const char *psz, size_t cch, size_t *pcwc); 4117 4118 /** 4119 * Translate a Latin-1 (ISO-8859-1) string into a UTF-16 allocating the result 4120 * buffer (default tag). 4121 * 4122 * @returns iprt status code. 4123 * @param pszString The Latin-1 string to convert. 4124 * @param ppwszString Receives pointer to the allocated UTF-16 string. The 4125 * returned string must be freed using RTUtf16Free(). 4126 */ 4127 #define RTLatin1ToUtf16(pszString, ppwszString) RTLatin1ToUtf16Tag((pszString), (ppwszString), RTSTR_TAG) 4128 4129 /** 4130 * Translate a Latin-1 (ISO-8859-1) string into a UTF-16 allocating the result 4131 * buffer (custom tag). 4132 * 4133 * @returns iprt status code. 4134 * @param pszString The Latin-1 string to convert. 4135 * @param ppwszString Receives pointer to the allocated UTF-16 string. The 4136 * returned string must be freed using RTUtf16Free(). 4137 * @param pszTag Allocation tag used for statistics and such. 4138 */ 4139 RTDECL(int) RTLatin1ToUtf16Tag(const char *pszString, PRTUTF16 *ppwszString, const char *pszTag); 4140 4141 /** 4142 * Translates pszString from Latin-1 (ISO-8859-1) to UTF-16, allocating the 4143 * result buffer if requested (default tag). 4144 * 4145 * @returns iprt status code. 4146 * @param pszString The Latin-1 string to convert. 4147 * @param cchString The maximum size in chars (the type) to convert. 4148 * The conversion stops when it reaches cchString or 4149 * the string terminator ('\\0'). 4150 * Use RTSTR_MAX to translate the entire string. 4151 * @param ppwsz If cwc is non-zero, this must either be pointing 4152 * to pointer to a buffer of the specified size, or 4153 * pointer to a NULL pointer. 4154 * If *ppwsz is NULL or cwc is zero a buffer of at 4155 * least cwc items will be allocated to hold the 4156 * translated string. If a buffer was requested it 4157 * must be freed using RTUtf16Free(). 4158 * @param cwc The buffer size in RTUTF16s. This includes the 4159 * terminator. 4160 * @param pcwc Where to store the length of the translated string, 4161 * excluding the terminator. (Optional) 4162 * 4163 * This may be set under some error conditions, 4164 * however, only for VERR_BUFFER_OVERFLOW and 4165 * VERR_NO_STR_MEMORY will it contain a valid string 4166 * length that can be used to resize the buffer. 4167 */ 4168 #define RTLatin1ToUtf16Ex(pszString, cchString, ppwsz, cwc, pcwc) \ 4169 RTLatin1ToUtf16ExTag((pszString), (cchString), (ppwsz), (cwc), (pcwc), RTSTR_TAG) 4170 4171 /** 4172 * Translates pszString from Latin-1 (ISO-8859-1) to UTF-16, allocating the 4173 * result buffer if requested. 4174 * 4175 * @returns iprt status code. 4176 * @param pszString The Latin-1 string to convert. 4177 * @param cchString The maximum size in chars (the type) to convert. 4178 * The conversion stops when it reaches cchString or 4179 * the string terminator ('\\0'). 4180 * Use RTSTR_MAX to translate the entire string. 4181 * @param ppwsz If cwc is non-zero, this must either be pointing 4182 * to pointer to a buffer of the specified size, or 4183 * pointer to a NULL pointer. 4184 * If *ppwsz is NULL or cwc is zero a buffer of at 4185 * least cwc items will be allocated to hold the 4186 * translated string. If a buffer was requested it 4187 * must be freed using RTUtf16Free(). 4188 * @param cwc The buffer size in RTUTF16s. This includes the 4189 * terminator. 4190 * @param pcwc Where to store the length of the translated string, 4191 * excluding the terminator. (Optional) 4192 * 4193 * This may be set under some error conditions, 4194 * however, only for VERR_BUFFER_OVERFLOW and 4195 * VERR_NO_STR_MEMORY will it contain a valid string 4196 * length that can be used to resize the buffer. 4197 * @param pszTag Allocation tag used for statistics and such. 4198 */ 4199 RTDECL(int) RTLatin1ToUtf16ExTag(const char *pszString, size_t cchString, 4200 PRTUTF16 *ppwsz, size_t cwc, size_t *pcwc, const char *pszTag); 4201 4202 /** @} */ 4203 4204 #ifndef ___iprt_nocrt_string_h 4205 # if defined(RT_OS_WINDOWS) 4206 RTDECL(void *) mempcpy(void *pvDst, const void *pvSrc, size_t cb); 4207 # endif 3018 RT_C_DECLS_END 3019 4208 3020 #endif 4209 3021 4210 4211 RT_C_DECLS_END4212 4213 /** @} */4214 4215 #endif4216 -
trunk/include/iprt/utf16.h
r57927 r57941 1 1 /** @file 2 * IPRT - String Manipulation .2 * IPRT - String Manipulation, UTF-16 encoding. 3 3 */ 4 4 … … 24 24 */ 25 25 26 #ifndef ___iprt_string_h 27 #define ___iprt_string_h 28 29 #include <iprt/cdefs.h> 30 #include <iprt/types.h> 31 #include <iprt/assert.h> 32 #include <iprt/stdarg.h> 33 #include <iprt/err.h> /* for VINF_SUCCESS */ 34 #if defined(RT_OS_LINUX) && defined(__KERNEL__) 35 RT_C_DECLS_BEGIN 36 # define new newhack /* string.h: strreplace */ 37 # include <linux/string.h> 38 # undef new 39 RT_C_DECLS_END 40 41 #elif defined(IN_XF86_MODULE) && !defined(NO_ANSIC) 42 RT_C_DECLS_BEGIN 43 # include "xf86_ansic.h" 44 RT_C_DECLS_END 45 46 #elif defined(RT_OS_FREEBSD) && defined(_KERNEL) 47 RT_C_DECLS_BEGIN 48 /** @todo 49 * XXX: Very ugly hack to get things build on recent FreeBSD builds. They have 50 * memchr now and we need to include param.h to get __FreeBSD_version and make 51 * memchr available based on the version below or we can't compile the kernel 52 * module on older versions anymore. 53 * 54 * But including param.h here opens Pandora's box because we clash with a few 55 * defines namely PVM and PAGE_SIZE. We can safely undefine PVM here but not 56 * PAGE_SIZE because this results in build errors sooner or later. Luckily this 57 * define is in a header included by param.h (machine/param.h). We define the 58 * guards here to prevent inclusion of it if PAGE_SIZE was defined already. 59 * 60 * @todo aeichner: Search for an elegant solution and cleanup this mess ASAP! 61 */ 62 # ifdef PAGE_SIZE 63 # define _AMD64_INCLUDE_PARAM_H_ 64 # define _I386_INCLUDE_PARAM_H_ 65 # define _MACHINE_PARAM_H_ 66 # endif 67 # include <sys/param.h> /* __FreeBSD_version */ 68 # undef PVM 69 # include <sys/libkern.h> 70 /* 71 * No memmove on versions < 7.2 72 * Defining a macro using bcopy here 73 */ 74 # define memmove(dst, src, size) bcopy(src, dst, size) 75 RT_C_DECLS_END 76 77 #elif defined(RT_OS_SOLARIS) && defined(_KERNEL) 78 /* 79 * Same case as with FreeBSD kernel: 80 * The string.h stuff clashes with sys/system.h 81 * ffs = find first set bit. 82 */ 83 # define ffs ffs_string_h 84 # include <string.h> 85 # undef ffs 86 # undef strpbrk 87 88 #else 89 # include <string.h> 90 #endif 91 92 /* 93 * Supply prototypes for standard string functions provided by 94 * IPRT instead of the operating environment. 95 */ 96 #if defined(RT_OS_DARWIN) && defined(KERNEL) 26 #ifndef ___iprt_utf16_h 27 #define ___iprt_utf16_h 28 29 #include <iprt/string.h> 30 97 31 RT_C_DECLS_BEGIN 98 void *memchr(const void *pv, int ch, size_t cb);99 char *strpbrk(const char *pszStr, const char *pszChars);100 RT_C_DECLS_END101 #endif102 103 #if defined(RT_OS_FREEBSD) && defined(_KERNEL)104 RT_C_DECLS_BEGIN105 #if __FreeBSD_version < 900000106 void *memchr(const void *pv, int ch, size_t cb);107 #endif108 char *strpbrk(const char *pszStr, const char *pszChars);109 RT_C_DECLS_END110 #endif111 112 #if !defined(RT_OS_LINUX) || !defined(_GNU_SOURCE)113 RT_C_DECLS_BEGIN114 void *memrchr(const char *pv, int ch, size_t cb);115 RT_C_DECLS_END116 #endif117 118 119 /** @def RT_USE_RTC_3629120 * When defined the UTF-8 range will stop at 0x10ffff. If not defined, the121 * range stops at 0x7fffffff.122 * @remarks Must be defined both when building and using the IPRT. */123 #ifdef DOXYGEN_RUNNING124 # define RT_USE_RTC_3629125 #endif126 127 128 /**129 * Byte zero the specified object.130 *131 * This will use sizeof(Obj) to figure the size and will call memset, bzero132 * or some compiler intrinsic to perform the actual zeroing.133 *134 * @param Obj The object to zero. Make sure to dereference pointers.135 *136 * @remarks Because the macro may use memset it has been placed in string.h137 * instead of cdefs.h to avoid build issues because someone forgot138 * to include this header.139 *140 * @ingroup grp_rt_cdefs141 */142 #define RT_ZERO(Obj) RT_BZERO(&(Obj), sizeof(Obj))143 144 /**145 * Byte zero the specified memory area.146 *147 * This will call memset, bzero or some compiler intrinsic to clear the148 * specified bytes of memory.149 *150 * @param pv Pointer to the memory.151 * @param cb The number of bytes to clear. Please, don't pass 0.152 *153 * @remarks Because the macro may use memset it has been placed in string.h154 * instead of cdefs.h to avoid build issues because someone forgot155 * to include this header.156 *157 * @ingroup grp_rt_cdefs158 */159 #define RT_BZERO(pv, cb) do { memset((pv), 0, cb); } while (0)160 161 162 163 /** @defgroup grp_rt_str RTStr - String Manipulation164 * Mostly UTF-8 related helpers where the standard string functions won't do.165 * @ingroup grp_rt166 * @{167 */168 169 RT_C_DECLS_BEGIN170 171 172 /**173 * The maximum string length.174 */175 #define RTSTR_MAX (~(size_t)0)176 177 178 /** @def RTSTR_TAG179 * The default allocation tag used by the RTStr allocation APIs.180 *181 * When not defined before the inclusion of iprt/string.h, this will default to182 * the pointer to the current file name. The string API will make of use of183 * this as pointer to a volatile but read-only string.184 */185 #if !defined(RTSTR_TAG) || defined(DOXYGEN_RUNNING)186 # define RTSTR_TAG (__FILE__)187 #endif188 189 190 #ifdef IN_RING3191 192 /**193 * Allocates tmp buffer with default tag, translates pszString from UTF8 to194 * current codepage.195 *196 * @returns iprt status code.197 * @param ppszString Receives pointer of allocated native CP string.198 * The returned pointer must be freed using RTStrFree().199 * @param pszString UTF-8 string to convert.200 */201 #define RTStrUtf8ToCurrentCP(ppszString, pszString) RTStrUtf8ToCurrentCPTag((ppszString), (pszString), RTSTR_TAG)202 203 /**204 * Allocates tmp buffer with custom tag, translates pszString from UTF8 to205 * current codepage.206 *207 * @returns iprt status code.208 * @param ppszString Receives pointer of allocated native CP string.209 * The returned pointer must be freed using210 * RTStrFree()., const char *pszTag211 * @param pszString UTF-8 string to convert.212 * @param pszTag Allocation tag used for statistics and such.213 */214 RTR3DECL(int) RTStrUtf8ToCurrentCPTag(char **ppszString, const char *pszString, const char *pszTag);215 216 /**217 * Allocates tmp buffer, translates pszString from current codepage to UTF-8.218 *219 * @returns iprt status code.220 * @param ppszString Receives pointer of allocated UTF-8 string.221 * The returned pointer must be freed using RTStrFree().222 * @param pszString Native string to convert.223 */224 #define RTStrCurrentCPToUtf8(ppszString, pszString) RTStrCurrentCPToUtf8Tag((ppszString), (pszString), RTSTR_TAG)225 226 /**227 * Allocates tmp buffer, translates pszString from current codepage to UTF-8.228 *229 * @returns iprt status code.230 * @param ppszString Receives pointer of allocated UTF-8 string.231 * The returned pointer must be freed using RTStrFree().232 * @param pszString Native string to convert.233 * @param pszTag Allocation tag used for statistics and such.234 */235 RTR3DECL(int) RTStrCurrentCPToUtf8Tag(char **ppszString, const char *pszString, const char *pszTag);236 237 #endif /* IN_RING3 */238 239 /**240 * Free string allocated by any of the non-UCS-2 string functions.241 *242 * @returns iprt status code.243 * @param pszString Pointer to buffer with string to free.244 * NULL is accepted.245 */246 RTDECL(void) RTStrFree(char *pszString);247 248 /**249 * Allocates a new copy of the given UTF-8 string (default tag).250 *251 * @returns Pointer to the allocated UTF-8 string.252 * @param pszString UTF-8 string to duplicate.253 */254 #define RTStrDup(pszString) RTStrDupTag((pszString), RTSTR_TAG)255 256 /**257 * Allocates a new copy of the given UTF-8 string (custom tag).258 *259 * @returns Pointer to the allocated UTF-8 string.260 * @param pszString UTF-8 string to duplicate.261 * @param pszTag Allocation tag used for statistics and such.262 */263 RTDECL(char *) RTStrDupTag(const char *pszString, const char *pszTag);264 265 /**266 * Allocates a new copy of the given UTF-8 string (default tag).267 *268 * @returns iprt status code.269 * @param ppszString Receives pointer of the allocated UTF-8 string.270 * The returned pointer must be freed using RTStrFree().271 * @param pszString UTF-8 string to duplicate.272 */273 #define RTStrDupEx(ppszString, pszString) RTStrDupExTag((ppszString), (pszString), RTSTR_TAG)274 275 /**276 * Allocates a new copy of the given UTF-8 string (custom tag).277 *278 * @returns iprt status code.279 * @param ppszString Receives pointer of the allocated UTF-8 string.280 * The returned pointer must be freed using RTStrFree().281 * @param pszString UTF-8 string to duplicate.282 * @param pszTag Allocation tag used for statistics and such.283 */284 RTDECL(int) RTStrDupExTag(char **ppszString, const char *pszString, const char *pszTag);285 286 /**287 * Allocates a new copy of the given UTF-8 substring (default tag).288 *289 * @returns Pointer to the allocated UTF-8 substring.290 * @param pszString UTF-8 string to duplicate.291 * @param cchMax The max number of chars to duplicate, not counting292 * the terminator.293 */294 #define RTStrDupN(pszString, cchMax) RTStrDupNTag((pszString), (cchMax), RTSTR_TAG)295 296 /**297 * Allocates a new copy of the given UTF-8 substring (custom tag).298 *299 * @returns Pointer to the allocated UTF-8 substring.300 * @param pszString UTF-8 string to duplicate.301 * @param cchMax The max number of chars to duplicate, not counting302 * the terminator.303 * @param pszTag Allocation tag used for statistics and such.304 */305 RTDECL(char *) RTStrDupNTag(const char *pszString, size_t cchMax, const char *pszTag);306 307 /**308 * Appends a string onto an existing IPRT allocated string (default tag).309 *310 * @retval VINF_SUCCESS311 * @retval VERR_NO_STR_MEMORY if we failed to reallocate the string, @a *ppsz312 * remains unchanged.313 *314 * @param ppsz Pointer to the string pointer. The string315 * pointer must either be NULL or point to a string316 * returned by an IPRT string API. (In/Out)317 * @param pszAppend The string to append. NULL and empty strings318 * are quietly ignored.319 */320 #define RTStrAAppend(ppsz, pszAppend) RTStrAAppendTag((ppsz), (pszAppend), RTSTR_TAG)321 322 /**323 * Appends a string onto an existing IPRT allocated string (custom tag).324 *325 * @retval VINF_SUCCESS326 * @retval VERR_NO_STR_MEMORY if we failed to reallocate the string, @a *ppsz327 * remains unchanged.328 *329 * @param ppsz Pointer to the string pointer. The string330 * pointer must either be NULL or point to a string331 * returned by an IPRT string API. (In/Out)332 * @param pszAppend The string to append. NULL and empty strings333 * are quietly ignored.334 * @param pszTag Allocation tag used for statistics and such.335 */336 RTDECL(int) RTStrAAppendTag(char **ppsz, const char *pszAppend, const char *pszTag);337 338 /**339 * Appends N bytes from a strings onto an existing IPRT allocated string340 * (default tag).341 *342 * @retval VINF_SUCCESS343 * @retval VERR_NO_STR_MEMORY if we failed to reallocate the string, @a *ppsz344 * remains unchanged.345 *346 * @param ppsz Pointer to the string pointer. The string347 * pointer must either be NULL or point to a string348 * returned by an IPRT string API. (In/Out)349 * @param pszAppend The string to append. Can be NULL if cchAppend350 * is NULL.351 * @param cchAppend The number of chars (not code points) to append352 * from pszAppend. Must not be more than353 * @a pszAppend contains, except for the special354 * value RTSTR_MAX that can be used to indicate all355 * of @a pszAppend without having to strlen it.356 */357 #define RTStrAAppendN(ppsz, pszAppend, cchAppend) RTStrAAppendNTag((ppsz), (pszAppend), (cchAppend), RTSTR_TAG)358 359 /**360 * Appends N bytes from a strings onto an existing IPRT allocated string (custom361 * tag).362 *363 * @retval VINF_SUCCESS364 * @retval VERR_NO_STR_MEMORY if we failed to reallocate the string, @a *ppsz365 * remains unchanged.366 *367 * @param ppsz Pointer to the string pointer. The string368 * pointer must either be NULL or point to a string369 * returned by an IPRT string API. (In/Out)370 * @param pszAppend The string to append. Can be NULL if cchAppend371 * is NULL.372 * @param cchAppend The number of chars (not code points) to append373 * from pszAppend. Must not be more than374 * @a pszAppend contains, except for the special375 * value RTSTR_MAX that can be used to indicate all376 * of @a pszAppend without having to strlen it.377 * @param pszTag Allocation tag used for statistics and such.378 */379 RTDECL(int) RTStrAAppendNTag(char **ppsz, const char *pszAppend, size_t cchAppend, const char *pszTag);380 381 /**382 * Appends one or more strings onto an existing IPRT allocated string.383 *384 * This is a very flexible and efficient alternative to using RTStrAPrintf to385 * combine several strings together.386 *387 * @retval VINF_SUCCESS388 * @retval VERR_NO_STR_MEMORY if we failed to reallocate the string, @a *ppsz389 * remains unchanged.390 *391 * @param ppsz Pointer to the string pointer. The string392 * pointer must either be NULL or point to a string393 * returned by an IPRT string API. (In/Out)394 * @param cPairs The number of string / length pairs in the395 * @a va.396 * @param va List of string (const char *) and length397 * (size_t) pairs. The strings will be appended to398 * the string in the first argument.399 */400 #define RTStrAAppendExNV(ppsz, cPairs, va) RTStrAAppendExNVTag((ppsz), (cPairs), (va), RTSTR_TAG)401 402 /**403 * Appends one or more strings onto an existing IPRT allocated string.404 *405 * This is a very flexible and efficient alternative to using RTStrAPrintf to406 * combine several strings together.407 *408 * @retval VINF_SUCCESS409 * @retval VERR_NO_STR_MEMORY if we failed to reallocate the string, @a *ppsz410 * remains unchanged.411 *412 * @param ppsz Pointer to the string pointer. The string413 * pointer must either be NULL or point to a string414 * returned by an IPRT string API. (In/Out)415 * @param cPairs The number of string / length pairs in the416 * @a va.417 * @param va List of string (const char *) and length418 * (size_t) pairs. The strings will be appended to419 * the string in the first argument.420 * @param pszTag Allocation tag used for statistics and such.421 */422 RTDECL(int) RTStrAAppendExNVTag(char **ppsz, size_t cPairs, va_list va, const char *pszTag);423 424 /**425 * Appends one or more strings onto an existing IPRT allocated string426 * (untagged).427 *428 * This is a very flexible and efficient alternative to using RTStrAPrintf to429 * combine several strings together.430 *431 * @retval VINF_SUCCESS432 * @retval VERR_NO_STR_MEMORY if we failed to reallocate the string, @a *ppsz433 * remains unchanged.434 *435 * @param ppsz Pointer to the string pointer. The string436 * pointer must either be NULL or point to a string437 * returned by an IPRT string API. (In/Out)438 * @param cPairs The number of string / length pairs in the439 * ellipsis.440 * @param ... List of string (const char *) and length441 * (size_t) pairs. The strings will be appended to442 * the string in the first argument.443 */444 DECLINLINE(int) RTStrAAppendExN(char **ppsz, size_t cPairs, ...)445 {446 int rc;447 va_list va;448 va_start(va, cPairs);449 rc = RTStrAAppendExNVTag(ppsz, cPairs, va, RTSTR_TAG);450 va_end(va);451 return rc;452 }453 454 /**455 * Appends one or more strings onto an existing IPRT allocated string (custom456 * tag).457 *458 * This is a very flexible and efficient alternative to using RTStrAPrintf to459 * combine several strings together.460 *461 * @retval VINF_SUCCESS462 * @retval VERR_NO_STR_MEMORY if we failed to reallocate the string, @a *ppsz463 * remains unchanged.464 *465 * @param ppsz Pointer to the string pointer. The string466 * pointer must either be NULL or point to a string467 * returned by an IPRT string API. (In/Out)468 * @param pszTag Allocation tag used for statistics and such.469 * @param cPairs The number of string / length pairs in the470 * ellipsis.471 * @param ... List of string (const char *) and length472 * (size_t) pairs. The strings will be appended to473 * the string in the first argument.474 */475 DECLINLINE(int) RTStrAAppendExNTag(char **ppsz, const char *pszTag, size_t cPairs, ...)476 {477 int rc;478 va_list va;479 va_start(va, cPairs);480 rc = RTStrAAppendExNVTag(ppsz, cPairs, va, pszTag);481 va_end(va);482 return rc;483 }484 485 /**486 * Truncates an IPRT allocated string (default tag).487 *488 * @retval VINF_SUCCESS.489 * @retval VERR_OUT_OF_RANGE if cchNew is too long. Nothing is done.490 *491 * @param ppsz Pointer to the string pointer. The string492 * pointer can be NULL if @a cchNew is 0, no change493 * is made then. If we actually reallocate the494 * string, the string pointer might be changed by495 * this call. (In/Out)496 * @param cchNew The new string length (excluding the497 * terminator). The string must be at least this498 * long or we'll return VERR_OUT_OF_RANGE and499 * assert on you.500 */501 #define RTStrATruncate(ppsz, cchNew) RTStrATruncateTag((ppsz), (cchNew), RTSTR_TAG)502 503 /**504 * Truncates an IPRT allocated string.505 *506 * @retval VINF_SUCCESS.507 * @retval VERR_OUT_OF_RANGE if cchNew is too long. Nothing is done.508 *509 * @param ppsz Pointer to the string pointer. The string510 * pointer can be NULL if @a cchNew is 0, no change511 * is made then. If we actually reallocate the512 * string, the string pointer might be changed by513 * this call. (In/Out)514 * @param cchNew The new string length (excluding the515 * terminator). The string must be at least this516 * long or we'll return VERR_OUT_OF_RANGE and517 * assert on you.518 * @param pszTag Allocation tag used for statistics and such.519 */520 RTDECL(int) RTStrATruncateTag(char **ppsz, size_t cchNew, const char *pszTag);521 522 /**523 * Allocates memory for string storage (default tag).524 *525 * You should normally not use this function, except if there is some very526 * custom string handling you need doing that isn't covered by any of the other527 * APIs.528 *529 * @returns Pointer to the allocated string. The first byte is always set530 * to the string terminator char, the contents of the remainder of the531 * memory is undefined. The string must be freed by calling RTStrFree.532 *533 * NULL is returned if the allocation failed. Please translate this to534 * VERR_NO_STR_MEMORY and not VERR_NO_MEMORY. Also consider535 * RTStrAllocEx if an IPRT status code is required.536 *537 * @param cb How many bytes to allocate. If this is zero, we538 * will allocate a terminator byte anyway.539 */540 #define RTStrAlloc(cb) RTStrAllocTag((cb), RTSTR_TAG)541 542 /**543 * Allocates memory for string storage (custom tag).544 *545 * You should normally not use this function, except if there is some very546 * custom string handling you need doing that isn't covered by any of the other547 * APIs.548 *549 * @returns Pointer to the allocated string. The first byte is always set550 * to the string terminator char, the contents of the remainder of the551 * memory is undefined. The string must be freed by calling RTStrFree.552 *553 * NULL is returned if the allocation failed. Please translate this to554 * VERR_NO_STR_MEMORY and not VERR_NO_MEMORY. Also consider555 * RTStrAllocEx if an IPRT status code is required.556 *557 * @param cb How many bytes to allocate. If this is zero, we558 * will allocate a terminator byte anyway.559 * @param pszTag Allocation tag used for statistics and such.560 */561 RTDECL(char *) RTStrAllocTag(size_t cb, const char *pszTag);562 563 /**564 * Allocates memory for string storage, with status code (default tag).565 *566 * You should normally not use this function, except if there is some very567 * custom string handling you need doing that isn't covered by any of the other568 * APIs.569 *570 * @retval VINF_SUCCESS571 * @retval VERR_NO_STR_MEMORY572 *573 * @param ppsz Where to return the allocated string. This will574 * be set to NULL on failure. On success, the575 * returned memory will always start with a576 * terminator char so that it is considered a valid577 * C string, the contents of rest of the memory is578 * undefined.579 * @param cb How many bytes to allocate. If this is zero, we580 * will allocate a terminator byte anyway.581 */582 #define RTStrAllocEx(ppsz, cb) RTStrAllocExTag((ppsz), (cb), RTSTR_TAG)583 584 /**585 * Allocates memory for string storage, with status code (custom tag).586 *587 * You should normally not use this function, except if there is some very588 * custom string handling you need doing that isn't covered by any of the other589 * APIs.590 *591 * @retval VINF_SUCCESS592 * @retval VERR_NO_STR_MEMORY593 *594 * @param ppsz Where to return the allocated string. This will595 * be set to NULL on failure. On success, the596 * returned memory will always start with a597 * terminator char so that it is considered a valid598 * C string, the contents of rest of the memory is599 * undefined.600 * @param cb How many bytes to allocate. If this is zero, we601 * will allocate a terminator byte anyway.602 * @param pszTag Allocation tag used for statistics and such.603 */604 RTDECL(int) RTStrAllocExTag(char **ppsz, size_t cb, const char *pszTag);605 606 /**607 * Reallocates the specified string (default tag).608 *609 * You should normally not have use this function, except perhaps to truncate a610 * really long string you've got from some IPRT string API, but then you should611 * use RTStrATruncate.612 *613 * @returns VINF_SUCCESS.614 * @retval VERR_NO_STR_MEMORY if we failed to reallocate the string, @a *ppsz615 * remains unchanged.616 *617 * @param ppsz Pointer to the string variable containing the618 * input and output string.619 *620 * When not freeing the string, the result will621 * always have the last byte set to the terminator622 * character so that when used for string623 * truncation the result will be a valid C string624 * (your job to keep it a valid UTF-8 string).625 *626 * When the input string is NULL and we're supposed627 * to reallocate, the returned string will also628 * have the first byte set to the terminator char629 * so it will be a valid C string.630 *631 * @param cbNew When @a cbNew is zero, we'll behave like632 * RTStrFree and @a *ppsz will be set to NULL.633 *634 * When not zero, this will be the new size of the635 * memory backing the string, i.e. it includes the636 * terminator char.637 */638 #define RTStrRealloc(ppsz, cbNew) RTStrReallocTag((ppsz), (cbNew), RTSTR_TAG)639 640 /**641 * Reallocates the specified string (custom tag).642 *643 * You should normally not have use this function, except perhaps to truncate a644 * really long string you've got from some IPRT string API, but then you should645 * use RTStrATruncate.646 *647 * @returns VINF_SUCCESS.648 * @retval VERR_NO_STR_MEMORY if we failed to reallocate the string, @a *ppsz649 * remains unchanged.650 *651 * @param ppsz Pointer to the string variable containing the652 * input and output string.653 *654 * When not freeing the string, the result will655 * always have the last byte set to the terminator656 * character so that when used for string657 * truncation the result will be a valid C string658 * (your job to keep it a valid UTF-8 string).659 *660 * When the input string is NULL and we're supposed661 * to reallocate, the returned string will also662 * have the first byte set to the terminator char663 * so it will be a valid C string.664 *665 * @param cbNew When @a cbNew is zero, we'll behave like666 * RTStrFree and @a *ppsz will be set to NULL.667 *668 * When not zero, this will be the new size of the669 * memory backing the string, i.e. it includes the670 * terminator char.671 * @param pszTag Allocation tag used for statistics and such.672 */673 RTDECL(int) RTStrReallocTag(char **ppsz, size_t cbNew, const char *pszTag);674 675 /**676 * Validates the UTF-8 encoding of the string.677 *678 * @returns iprt status code.679 * @param psz The string.680 */681 RTDECL(int) RTStrValidateEncoding(const char *psz);682 683 /** @name Flags for RTStrValidateEncodingEx and RTUtf16ValidateEncodingEx684 */685 /** Check that the string is zero terminated within the given size.686 * VERR_BUFFER_OVERFLOW will be returned if the check fails. */687 #define RTSTR_VALIDATE_ENCODING_ZERO_TERMINATED RT_BIT_32(0)688 /** Check that the string is exactly the given length.689 * If it terminates early, VERR_BUFFER_UNDERFLOW will be returned. When used690 * together with RTSTR_VALIDATE_ENCODING_ZERO_TERMINATED, the given length must691 * include the terminator or VERR_BUFFER_OVERFLOW will be returned. */692 #define RTSTR_VALIDATE_ENCODING_EXACT_LENGTH RT_BIT_32(1)693 /** @} */694 695 /**696 * Validates the UTF-8 encoding of the string.697 *698 * @returns iprt status code.699 * @param psz The string.700 * @param cch The max string length (/ size). Use RTSTR_MAX to701 * process the entire string.702 * @param fFlags Combination of RTSTR_VALIDATE_ENCODING_XXX flags.703 */704 RTDECL(int) RTStrValidateEncodingEx(const char *psz, size_t cch, uint32_t fFlags);705 706 /**707 * Checks if the UTF-8 encoding is valid.708 *709 * @returns true / false.710 * @param psz The string.711 */712 RTDECL(bool) RTStrIsValidEncoding(const char *psz);713 714 /**715 * Purge all bad UTF-8 encoding in the string, replacing it with '?'.716 *717 * @returns The number of bad characters (0 if nothing was done).718 * @param psz The string to purge.719 */720 RTDECL(size_t) RTStrPurgeEncoding(char *psz);721 722 /**723 * Sanitise a (valid) UTF-8 string by replacing all characters outside a white724 * list in-place by an ASCII replacement character. Multi-byte characters will725 * be replaced byte by byte.726 *727 * @returns The number of code points replaced, or a negative value if the728 * string is not correctly encoded. In this last case the string729 * may be partially processed.730 * @param psz The string to sanitise.731 * @param puszValidSet A zero-terminated array of pairs of Unicode points.732 * Each pair is the start and end point of a range,733 * and the union of these ranges forms the white list.734 * @param chReplacement The ASCII replacement character.735 */736 RTDECL(ssize_t) RTStrPurgeComplementSet(char *psz, PCRTUNICP puszValidSet, char chReplacement);737 738 /**739 * Gets the number of code points the string is made up of, excluding740 * the terminator.741 *742 *743 * @returns Number of code points (RTUNICP).744 * @returns 0 if the string was incorrectly encoded.745 * @param psz The string.746 */747 RTDECL(size_t) RTStrUniLen(const char *psz);748 749 /**750 * Gets the number of code points the string is made up of, excluding751 * the terminator.752 *753 * This function will validate the string, and incorrectly encoded UTF-8754 * strings will be rejected.755 *756 * @returns iprt status code.757 * @param psz The string.758 * @param cch The max string length. Use RTSTR_MAX to process the entire string.759 * @param pcuc Where to store the code point count.760 * This is undefined on failure.761 */762 RTDECL(int) RTStrUniLenEx(const char *psz, size_t cch, size_t *pcuc);763 764 /**765 * Translate a UTF-8 string into an unicode string (i.e. RTUNICPs), allocating the string buffer.766 *767 * @returns iprt status code.768 * @param pszString UTF-8 string to convert.769 * @param ppUniString Receives pointer to the allocated unicode string.770 * The returned string must be freed using RTUniFree().771 */772 RTDECL(int) RTStrToUni(const char *pszString, PRTUNICP *ppUniString);773 774 /**775 * Translates pszString from UTF-8 to an array of code points, allocating the result776 * array if requested.777 *778 * @returns iprt status code.779 * @param pszString UTF-8 string to convert.780 * @param cchString The maximum size in chars (the type) to convert. The conversion stop781 * when it reaches cchString or the string terminator ('\\0').782 * Use RTSTR_MAX to translate the entire string.783 * @param ppaCps If cCps is non-zero, this must either be pointing to pointer to784 * a buffer of the specified size, or pointer to a NULL pointer.785 * If *ppusz is NULL or cCps is zero a buffer of at least cCps items786 * will be allocated to hold the translated string.787 * If a buffer was requested it must be freed using RTUtf16Free().788 * @param cCps The number of code points in the unicode string. This includes the terminator.789 * @param pcCps Where to store the length of the translated string,790 * excluding the terminator. (Optional)791 *792 * This may be set under some error conditions,793 * however, only for VERR_BUFFER_OVERFLOW and794 * VERR_NO_STR_MEMORY will it contain a valid string795 * length that can be used to resize the buffer.796 */797 RTDECL(int) RTStrToUniEx(const char *pszString, size_t cchString, PRTUNICP *ppaCps, size_t cCps, size_t *pcCps);798 799 /**800 * Calculates the length of the string in RTUTF16 items.801 *802 * This function will validate the string, and incorrectly encoded UTF-8803 * strings will be rejected. The primary purpose of this function is to804 * help allocate buffers for RTStrToUtf16Ex of the correct size. For most805 * other purposes RTStrCalcUtf16LenEx() should be used.806 *807 * @returns Number of RTUTF16 items.808 * @returns 0 if the string was incorrectly encoded.809 * @param psz The string.810 */811 RTDECL(size_t) RTStrCalcUtf16Len(const char *psz);812 813 /**814 * Calculates the length of the string in RTUTF16 items.815 *816 * This function will validate the string, and incorrectly encoded UTF-8817 * strings will be rejected.818 *819 * @returns iprt status code.820 * @param psz The string.821 * @param cch The max string length. Use RTSTR_MAX to process the entire string.822 * @param pcwc Where to store the string length. Optional.823 * This is undefined on failure.824 */825 RTDECL(int) RTStrCalcUtf16LenEx(const char *psz, size_t cch, size_t *pcwc);826 827 /**828 * Translate a UTF-8 string into a UTF-16 allocating the result buffer (default829 * tag).830 *831 * @returns iprt status code.832 * @param pszString UTF-8 string to convert.833 * @param ppwszString Receives pointer to the allocated UTF-16 string.834 * The returned string must be freed using RTUtf16Free().835 */836 #define RTStrToUtf16(pszString, ppwszString) RTStrToUtf16Tag((pszString), (ppwszString), RTSTR_TAG)837 838 /**839 * Translate a UTF-8 string into a UTF-16 allocating the result buffer (custom840 * tag).841 *842 * @returns iprt status code.843 * @param pszString UTF-8 string to convert.844 * @param ppwszString Receives pointer to the allocated UTF-16 string.845 * The returned string must be freed using RTUtf16Free().846 * @param pszTag Allocation tag used for statistics and such.847 */848 RTDECL(int) RTStrToUtf16Tag(const char *pszString, PRTUTF16 *ppwszString, const char *pszTag);849 850 /**851 * Translates pszString from UTF-8 to UTF-16, allocating the result buffer if requested.852 *853 * @returns iprt status code.854 * @param pszString UTF-8 string to convert.855 * @param cchString The maximum size in chars (the type) to convert. The conversion stop856 * when it reaches cchString or the string terminator ('\\0').857 * Use RTSTR_MAX to translate the entire string.858 * @param ppwsz If cwc is non-zero, this must either be pointing to pointer to859 * a buffer of the specified size, or pointer to a NULL pointer.860 * If *ppwsz is NULL or cwc is zero a buffer of at least cwc items861 * will be allocated to hold the translated string.862 * If a buffer was requested it must be freed using RTUtf16Free().863 * @param cwc The buffer size in RTUTF16s. This includes the terminator.864 * @param pcwc Where to store the length of the translated string,865 * excluding the terminator. (Optional)866 *867 * This may be set under some error conditions,868 * however, only for VERR_BUFFER_OVERFLOW and869 * VERR_NO_STR_MEMORY will it contain a valid string870 * length that can be used to resize the buffer.871 */872 #define RTStrToUtf16Ex(pszString, cchString, ppwsz, cwc, pcwc) \873 RTStrToUtf16ExTag((pszString), (cchString), (ppwsz), (cwc), (pcwc), RTSTR_TAG)874 875 /**876 * Translates pszString from UTF-8 to UTF-16, allocating the result buffer if877 * requested (custom tag).878 *879 * @returns iprt status code.880 * @param pszString UTF-8 string to convert.881 * @param cchString The maximum size in chars (the type) to convert. The conversion stop882 * when it reaches cchString or the string terminator ('\\0').883 * Use RTSTR_MAX to translate the entire string.884 * @param ppwsz If cwc is non-zero, this must either be pointing to pointer to885 * a buffer of the specified size, or pointer to a NULL pointer.886 * If *ppwsz is NULL or cwc is zero a buffer of at least cwc items887 * will be allocated to hold the translated string.888 * If a buffer was requested it must be freed using RTUtf16Free().889 * @param cwc The buffer size in RTUTF16s. This includes the terminator.890 * @param pcwc Where to store the length of the translated string,891 * excluding the terminator. (Optional)892 *893 * This may be set under some error conditions,894 * however, only for VERR_BUFFER_OVERFLOW and895 * VERR_NO_STR_MEMORY will it contain a valid string896 * length that can be used to resize the buffer.897 * @param pszTag Allocation tag used for statistics and such.898 */899 RTDECL(int) RTStrToUtf16ExTag(const char *pszString, size_t cchString, PRTUTF16 *ppwsz, size_t cwc, size_t *pcwc, const char *pszTag);900 901 902 /**903 * Calculates the length of the string in Latin-1 characters.904 *905 * This function will validate the string, and incorrectly encoded UTF-8906 * strings as well as string with codepoints outside the latin-1 range will be907 * rejected. The primary purpose of this function is to help allocate buffers908 * for RTStrToLatin1Ex of the correct size. For most other purposes909 * RTStrCalcLatin1LenEx() should be used.910 *911 * @returns Number of Latin-1 characters.912 * @returns 0 if the string was incorrectly encoded.913 * @param psz The string.914 */915 RTDECL(size_t) RTStrCalcLatin1Len(const char *psz);916 917 /**918 * Calculates the length of the string in Latin-1 characters.919 *920 * This function will validate the string, and incorrectly encoded UTF-8921 * strings as well as string with codepoints outside the latin-1 range will be922 * rejected.923 *924 * @returns iprt status code.925 * @param psz The string.926 * @param cch The max string length. Use RTSTR_MAX to process the927 * entire string.928 * @param pcch Where to store the string length. Optional.929 * This is undefined on failure.930 */931 RTDECL(int) RTStrCalcLatin1LenEx(const char *psz, size_t cch, size_t *pcch);932 933 /**934 * Translate a UTF-8 string into a Latin-1 allocating the result buffer (default935 * tag).936 *937 * @returns iprt status code.938 * @param pszString UTF-8 string to convert.939 * @param ppszString Receives pointer to the allocated Latin-1 string.940 * The returned string must be freed using RTStrFree().941 */942 #define RTStrToLatin1(pszString, ppszString) RTStrToLatin1Tag((pszString), (ppszString), RTSTR_TAG)943 944 /**945 * Translate a UTF-8 string into a Latin-1 allocating the result buffer (custom946 * tag).947 *948 * @returns iprt status code.949 * @param pszString UTF-8 string to convert.950 * @param ppszString Receives pointer to the allocated Latin-1 string.951 * The returned string must be freed using RTStrFree().952 * @param pszTag Allocation tag used for statistics and such.953 */954 RTDECL(int) RTStrToLatin1Tag(const char *pszString, char **ppszString, const char *pszTag);955 956 /**957 * Translates pszString from UTF-8 to Latin-1, allocating the result buffer if requested.958 *959 * @returns iprt status code.960 * @param pszString UTF-8 string to convert.961 * @param cchString The maximum size in chars (the type) to convert.962 * The conversion stop when it reaches cchString or963 * the string terminator ('\\0'). Use RTSTR_MAX to964 * translate the entire string.965 * @param ppsz If cch is non-zero, this must either be pointing to966 * pointer to a buffer of the specified size, or967 * pointer to a NULL pointer. If *ppsz is NULL or cch968 * is zero a buffer of at least cch items will be969 * allocated to hold the translated string. If a970 * buffer was requested it must be freed using971 * RTStrFree().972 * @param cch The buffer size in bytes. This includes the973 * terminator.974 * @param pcch Where to store the length of the translated string,975 * excluding the terminator. (Optional)976 *977 * This may be set under some error conditions,978 * however, only for VERR_BUFFER_OVERFLOW and979 * VERR_NO_STR_MEMORY will it contain a valid string980 * length that can be used to resize the buffer.981 */982 #define RTStrToLatin1Ex(pszString, cchString, ppsz, cch, pcch) \983 RTStrToLatin1ExTag((pszString), (cchString), (ppsz), (cch), (pcch), RTSTR_TAG)984 985 /**986 * Translates pszString from UTF-8 to Latin1, allocating the result buffer if987 * requested (custom tag).988 *989 * @returns iprt status code.990 * @param pszString UTF-8 string to convert.991 * @param cchString The maximum size in chars (the type) to convert.992 * The conversion stop when it reaches cchString or993 * the string terminator ('\\0'). Use RTSTR_MAX to994 * translate the entire string.995 * @param ppsz If cch is non-zero, this must either be pointing to996 * pointer to a buffer of the specified size, or997 * pointer to a NULL pointer. If *ppsz is NULL or cch998 * is zero a buffer of at least cch items will be999 * allocated to hold the translated string. If a1000 * buffer was requested it must be freed using1001 * RTStrFree().1002 * @param cch The buffer size in bytes. This includes the1003 * terminator.1004 * @param pcch Where to store the length of the translated string,1005 * excluding the terminator. (Optional)1006 *1007 * This may be set under some error conditions,1008 * however, only for VERR_BUFFER_OVERFLOW and1009 * VERR_NO_STR_MEMORY will it contain a valid string1010 * length that can be used to resize the buffer.1011 * @param pszTag Allocation tag used for statistics and such.1012 */1013 RTDECL(int) RTStrToLatin1ExTag(const char *pszString, size_t cchString, char **ppsz, size_t cch, size_t *pcch, const char *pszTag);1014 1015 1016 /**1017 * Translate a Latin1 string into a UTF-8 allocating the result buffer (default1018 * tag).1019 *1020 * @returns iprt status code.1021 * @param pszString Latin1 string to convert.1022 * @param ppszString Receives pointer of allocated UTF-8 string on1023 * success, and is always set to NULL on failure.1024 * The returned pointer must be freed using RTStrFree().1025 */1026 #define RTLatin1ToUtf8(pszString, ppszString) RTLatin1ToUtf8Tag((pszString), (ppszString), RTSTR_TAG)1027 1028 /**1029 * Translate a Latin-1 string into a UTF-8 allocating the result buffer.1030 *1031 * @returns iprt status code.1032 * @param pszString Latin-1 string to convert.1033 * @param ppszString Receives pointer of allocated UTF-8 string on1034 * success, and is always set to NULL on failure.1035 * The returned pointer must be freed using RTStrFree().1036 * @param pszTag Allocation tag used for statistics and such.1037 */1038 RTDECL(int) RTLatin1ToUtf8Tag(const char *pszString, char **ppszString, const char *pszTag);1039 1040 /**1041 * Translates Latin-1 to UTF-8 using buffer provided by the caller or a fittingly1042 * sized buffer allocated by the function (default tag).1043 *1044 * @returns iprt status code.1045 * @param pszString The Latin-1 string to convert.1046 * @param cchString The number of Latin-1 characters to translate from1047 * pszString. The translation will stop when reaching1048 * cchString or the terminator ('\\0'). Use RTSTR_MAX1049 * to translate the entire string.1050 * @param ppsz If cch is non-zero, this must either be pointing to1051 * a pointer to a buffer of the specified size, or1052 * pointer to a NULL pointer. If *ppsz is NULL or cch1053 * is zero a buffer of at least cch chars will be1054 * allocated to hold the translated string. If a1055 * buffer was requested it must be freed using1056 * RTStrFree().1057 * @param cch The buffer size in chars (the type). This includes the terminator.1058 * @param pcch Where to store the length of the translated string,1059 * excluding the terminator. (Optional)1060 *1061 * This may be set under some error conditions,1062 * however, only for VERR_BUFFER_OVERFLOW and1063 * VERR_NO_STR_MEMORY will it contain a valid string1064 * length that can be used to resize the buffer.1065 */1066 #define RTLatin1ToUtf8Ex(pszString, cchString, ppsz, cch, pcch) \1067 RTLatin1ToUtf8ExTag((pszString), (cchString), (ppsz), (cch), (pcch), RTSTR_TAG)1068 1069 /**1070 * Translates Latin1 to UTF-8 using buffer provided by the caller or a fittingly1071 * sized buffer allocated by the function (custom tag).1072 *1073 * @returns iprt status code.1074 * @param pszString The Latin1 string to convert.1075 * @param cchString The number of Latin1 characters to translate from1076 * pwszString. The translation will stop when1077 * reaching cchString or the terminator ('\\0'). Use1078 * RTSTR_MAX to translate the entire string.1079 * @param ppsz If cch is non-zero, this must either be pointing to1080 * a pointer to a buffer of the specified size, or1081 * pointer to a NULL pointer. If *ppsz is NULL or cch1082 * is zero a buffer of at least cch chars will be1083 * allocated to hold the translated string. If a1084 * buffer was requested it must be freed using1085 * RTStrFree().1086 * @param cch The buffer size in chars (the type). This includes1087 * the terminator.1088 * @param pcch Where to store the length of the translated string,1089 * excluding the terminator. (Optional)1090 *1091 * This may be set under some error conditions,1092 * however, only for VERR_BUFFER_OVERFLOW and1093 * VERR_NO_STR_MEMORY will it contain a valid string1094 * length that can be used to resize the buffer.1095 * @param pszTag Allocation tag used for statistics and such.1096 */1097 RTDECL(int) RTLatin1ToUtf8ExTag(const char *pszString, size_t cchString, char **ppsz, size_t cch, size_t *pcch, const char *pszTag);1098 1099 /**1100 * Calculates the length of the Latin-1 string in UTF-8 chars (bytes).1101 *1102 * The primary purpose of this function is to help allocate buffers for1103 * RTLatin1ToUtf8() of the correct size. For most other purposes1104 * RTLatin1ToUtf8Ex() should be used.1105 *1106 * @returns Number of chars (bytes).1107 * @returns 0 if the string was incorrectly encoded.1108 * @param psz The Latin-1 string.1109 */1110 RTDECL(size_t) RTLatin1CalcUtf8Len(const char *psz);1111 1112 /**1113 * Calculates the length of the Latin-1 string in UTF-8 chars (bytes).1114 *1115 * @returns iprt status code.1116 * @param psz The string.1117 * @param cch The max string length. Use RTSTR_MAX to process the entire string.1118 * @param pcch Where to store the string length (in bytes). Optional.1119 * This is undefined on failure.1120 */1121 RTDECL(int) RTLatin1CalcUtf8LenEx(const char *psz, size_t cch, size_t *pcch);1122 1123 /**1124 * Get the unicode code point at the given string position.1125 *1126 * @returns unicode code point.1127 * @returns RTUNICP_INVALID if the encoding is invalid.1128 * @param psz The string.1129 */1130 RTDECL(RTUNICP) RTStrGetCpInternal(const char *psz);1131 1132 /**1133 * Get the unicode code point at the given string position.1134 *1135 * @returns iprt status code1136 * @returns VERR_INVALID_UTF8_ENCODING if the encoding is invalid.1137 * @param ppsz The string cursor.1138 * This is advanced one character forward on failure.1139 * @param pCp Where to store the unicode code point.1140 * Stores RTUNICP_INVALID if the encoding is invalid.1141 */1142 RTDECL(int) RTStrGetCpExInternal(const char **ppsz, PRTUNICP pCp);1143 1144 /**1145 * Get the unicode code point at the given string position for a string of a1146 * given length.1147 *1148 * @returns iprt status code1149 * @retval VERR_INVALID_UTF8_ENCODING if the encoding is invalid.1150 * @retval VERR_END_OF_STRING if *pcch is 0. *pCp is set to RTUNICP_INVALID.1151 *1152 * @param ppsz The string.1153 * @param pcch Pointer to the length of the string. This will be1154 * decremented by the size of the code point.1155 * @param pCp Where to store the unicode code point.1156 * Stores RTUNICP_INVALID if the encoding is invalid.1157 */1158 RTDECL(int) RTStrGetCpNExInternal(const char **ppsz, size_t *pcch, PRTUNICP pCp);1159 1160 /**1161 * Put the unicode code point at the given string position1162 * and return the pointer to the char following it.1163 *1164 * This function will not consider anything at or following the1165 * buffer area pointed to by psz. It is therefore not suitable for1166 * inserting code points into a string, only appending/overwriting.1167 *1168 * @returns pointer to the char following the written code point.1169 * @param psz The string.1170 * @param CodePoint The code point to write.1171 * This should not be RTUNICP_INVALID or any other1172 * character out of the UTF-8 range.1173 *1174 * @remark This is a worker function for RTStrPutCp().1175 *1176 */1177 RTDECL(char *) RTStrPutCpInternal(char *psz, RTUNICP CodePoint);1178 1179 /**1180 * Get the unicode code point at the given string position.1181 *1182 * @returns unicode code point.1183 * @returns RTUNICP_INVALID if the encoding is invalid.1184 * @param psz The string.1185 *1186 * @remark We optimize this operation by using an inline function for1187 * the most frequent and simplest sequence, the rest is1188 * handled by RTStrGetCpInternal().1189 */1190 DECLINLINE(RTUNICP) RTStrGetCp(const char *psz)1191 {1192 const unsigned char uch = *(const unsigned char *)psz;1193 if (!(uch & RT_BIT(7)))1194 return uch;1195 return RTStrGetCpInternal(psz);1196 }1197 1198 /**1199 * Get the unicode code point at the given string position.1200 *1201 * @returns iprt status code.1202 * @param ppsz Pointer to the string pointer. This will be updated to1203 * point to the char following the current code point.1204 * This is advanced one character forward on failure.1205 * @param pCp Where to store the code point.1206 * RTUNICP_INVALID is stored here on failure.1207 *1208 * @remark We optimize this operation by using an inline function for1209 * the most frequent and simplest sequence, the rest is1210 * handled by RTStrGetCpExInternal().1211 */1212 DECLINLINE(int) RTStrGetCpEx(const char **ppsz, PRTUNICP pCp)1213 {1214 const unsigned char uch = **(const unsigned char **)ppsz;1215 if (!(uch & RT_BIT(7)))1216 {1217 (*ppsz)++;1218 *pCp = uch;1219 return VINF_SUCCESS;1220 }1221 return RTStrGetCpExInternal(ppsz, pCp);1222 }1223 1224 /**1225 * Get the unicode code point at the given string position for a string of a1226 * given maximum length.1227 *1228 * @returns iprt status code.1229 * @retval VERR_INVALID_UTF8_ENCODING if the encoding is invalid.1230 * @retval VERR_END_OF_STRING if *pcch is 0. *pCp is set to RTUNICP_INVALID.1231 *1232 * @param ppsz Pointer to the string pointer. This will be updated to1233 * point to the char following the current code point.1234 * @param pcch Pointer to the maximum string length. This will be1235 * decremented by the size of the code point found.1236 * @param pCp Where to store the code point.1237 * RTUNICP_INVALID is stored here on failure.1238 *1239 * @remark We optimize this operation by using an inline function for1240 * the most frequent and simplest sequence, the rest is1241 * handled by RTStrGetCpNExInternal().1242 */1243 DECLINLINE(int) RTStrGetCpNEx(const char **ppsz, size_t *pcch, PRTUNICP pCp)1244 {1245 if (RT_LIKELY(*pcch != 0))1246 {1247 const unsigned char uch = **(const unsigned char **)ppsz;1248 if (!(uch & RT_BIT(7)))1249 {1250 (*ppsz)++;1251 (*pcch)--;1252 *pCp = uch;1253 return VINF_SUCCESS;1254 }1255 }1256 return RTStrGetCpNExInternal(ppsz, pcch, pCp);1257 }1258 1259 /**1260 * Get the UTF-8 size in characters of a given Unicode code point.1261 *1262 * The code point is expected to be a valid Unicode one, but not necessarily in1263 * the range supported by UTF-8.1264 *1265 * @returns The number of chars (bytes) required to encode the code point, or1266 * zero if there is no UTF-8 encoding.1267 * @param CodePoint The unicode code point.1268 */1269 DECLINLINE(size_t) RTStrCpSize(RTUNICP CodePoint)1270 {1271 if (CodePoint < 0x00000080)1272 return 1;1273 if (CodePoint < 0x00000800)1274 return 2;1275 if (CodePoint < 0x00010000)1276 return 3;1277 #ifdef RT_USE_RTC_36291278 if (CodePoint < 0x00011000)1279 return 4;1280 #else1281 if (CodePoint < 0x00200000)1282 return 4;1283 if (CodePoint < 0x04000000)1284 return 5;1285 if (CodePoint < 0x7fffffff)1286 return 6;1287 #endif1288 return 0;1289 }1290 1291 /**1292 * Put the unicode code point at the given string position1293 * and return the pointer to the char following it.1294 *1295 * This function will not consider anything at or following the1296 * buffer area pointed to by psz. It is therefore not suitable for1297 * inserting code points into a string, only appending/overwriting.1298 *1299 * @returns pointer to the char following the written code point.1300 * @param psz The string.1301 * @param CodePoint The code point to write.1302 * This should not be RTUNICP_INVALID or any other1303 * character out of the UTF-8 range.1304 *1305 * @remark We optimize this operation by using an inline function for1306 * the most frequent and simplest sequence, the rest is1307 * handled by RTStrPutCpInternal().1308 */1309 DECLINLINE(char *) RTStrPutCp(char *psz, RTUNICP CodePoint)1310 {1311 if (CodePoint < 0x80)1312 {1313 *psz++ = (unsigned char)CodePoint;1314 return psz;1315 }1316 return RTStrPutCpInternal(psz, CodePoint);1317 }1318 1319 /**1320 * Skips ahead, past the current code point.1321 *1322 * @returns Pointer to the char after the current code point.1323 * @param psz Pointer to the current code point.1324 * @remark This will not move the next valid code point, only past the current one.1325 */1326 DECLINLINE(char *) RTStrNextCp(const char *psz)1327 {1328 RTUNICP Cp;1329 RTStrGetCpEx(&psz, &Cp);1330 return (char *)psz;1331 }1332 1333 /**1334 * Skips back to the previous code point.1335 *1336 * @returns Pointer to the char before the current code point.1337 * @returns pszStart on failure.1338 * @param pszStart Pointer to the start of the string.1339 * @param psz Pointer to the current code point.1340 */1341 RTDECL(char *) RTStrPrevCp(const char *pszStart, const char *psz);1342 1343 /**1344 * Get the unicode code point at the given string position.1345 *1346 * @returns unicode code point.1347 * @returns RTUNICP_INVALID if the encoding is invalid.1348 * @param psz The string.1349 */1350 DECLINLINE(RTUNICP) RTLatin1GetCp(const char *psz)1351 {1352 return *(const unsigned char *)psz;1353 }1354 1355 /**1356 * Get the unicode code point at the given string position.1357 *1358 * @returns iprt status code.1359 * @param ppsz Pointer to the string pointer. This will be updated to1360 * point to the char following the current code point.1361 * This is advanced one character forward on failure.1362 * @param pCp Where to store the code point.1363 * RTUNICP_INVALID is stored here on failure.1364 *1365 * @remark We optimize this operation by using an inline function for1366 * the most frequent and simplest sequence, the rest is1367 * handled by RTStrGetCpExInternal().1368 */1369 DECLINLINE(int) RTLatin1GetCpEx(const char **ppsz, PRTUNICP pCp)1370 {1371 const unsigned char uch = **(const unsigned char **)ppsz;1372 (*ppsz)++;1373 *pCp = uch;1374 return VINF_SUCCESS;1375 }1376 1377 /**1378 * Get the unicode code point at the given string position for a string of a1379 * given maximum length.1380 *1381 * @returns iprt status code.1382 * @retval VERR_END_OF_STRING if *pcch is 0. *pCp is set to RTUNICP_INVALID.1383 *1384 * @param ppsz Pointer to the string pointer. This will be updated to1385 * point to the char following the current code point.1386 * @param pcch Pointer to the maximum string length. This will be1387 * decremented by the size of the code point found.1388 * @param pCp Where to store the code point.1389 * RTUNICP_INVALID is stored here on failure.1390 */1391 DECLINLINE(int) RTLatin1GetCpNEx(const char **ppsz, size_t *pcch, PRTUNICP pCp)1392 {1393 if (RT_LIKELY(*pcch != 0))1394 {1395 const unsigned char uch = **(const unsigned char **)ppsz;1396 (*ppsz)++;1397 (*pcch)--;1398 *pCp = uch;1399 return VINF_SUCCESS;1400 }1401 *pCp = RTUNICP_INVALID;1402 return VERR_END_OF_STRING;1403 }1404 1405 /**1406 * Get the Latin-1 size in characters of a given Unicode code point.1407 *1408 * The code point is expected to be a valid Unicode one, but not necessarily in1409 * the range supported by Latin-1.1410 *1411 * @returns the size in characters, or zero if there is no Latin-1 encoding1412 */1413 DECLINLINE(size_t) RTLatin1CpSize(RTUNICP CodePoint)1414 {1415 if (CodePoint < 0x100)1416 return 1;1417 return 0;1418 }1419 1420 /**1421 * Put the unicode code point at the given string position1422 * and return the pointer to the char following it.1423 *1424 * This function will not consider anything at or following the1425 * buffer area pointed to by psz. It is therefore not suitable for1426 * inserting code points into a string, only appending/overwriting.1427 *1428 * @returns pointer to the char following the written code point.1429 * @param psz The string.1430 * @param CodePoint The code point to write.1431 * This should not be RTUNICP_INVALID or any other1432 * character out of the Latin-1 range.1433 */1434 DECLINLINE(char *) RTLatin1PutCp(char *psz, RTUNICP CodePoint)1435 {1436 AssertReturn(CodePoint < 0x100, NULL);1437 *psz++ = (unsigned char)CodePoint;1438 return psz;1439 }1440 1441 /**1442 * Skips ahead, past the current code point.1443 *1444 * @returns Pointer to the char after the current code point.1445 * @param psz Pointer to the current code point.1446 * @remark This will not move the next valid code point, only past the current one.1447 */1448 DECLINLINE(char *) RTLatin1NextCp(const char *psz)1449 {1450 psz++;1451 return (char *)psz;1452 }1453 1454 /**1455 * Skips back to the previous code point.1456 *1457 * @returns Pointer to the char before the current code point.1458 * @returns pszStart on failure.1459 * @param pszStart Pointer to the start of the string.1460 * @param psz Pointer to the current code point.1461 */1462 DECLINLINE(char *) RTLatin1PrevCp(const char *pszStart, const char *psz)1463 {1464 if ((uintptr_t)psz > (uintptr_t)pszStart)1465 {1466 psz--;1467 return (char *)psz;1468 }1469 return (char *)pszStart;1470 }1471 1472 1473 /** @page pg_rt_str_format The IPRT Format Strings1474 *1475 * IPRT implements most of the commonly used format types and flags with the1476 * exception of floating point which is completely missing. In addition IPRT1477 * provides a number of IPRT specific format types for the IPRT typedefs and1478 * other useful things. Note that several of these extensions are similar to1479 * \%p and doesn't care much if you try add formating flags/width/precision.1480 *1481 *1482 * Group 0a, The commonly used format types:1483 * - \%s - Takes a pointer to a zero terminated string (UTF-8) and1484 * prints it with the optionally adjustment (width, -) and1485 * length restriction (precision).1486 * - \%ls - Same as \%s except that the input is UTF-16 (output UTF-8).1487 * - \%Ls - Same as \%s except that the input is UCS-32 (output UTF-8).1488 * - \%S - Same as \%s, used to convert to current codeset but this is1489 * now done by the streams code. Deprecated, use \%s.1490 * - \%lS - Ditto. Deprecated, use \%ls.1491 * - \%LS - Ditto. Deprecated, use \%Ls.1492 * - \%c - Takes a char and prints it.1493 * - \%d - Takes a signed integer and prints it as decimal. Thousand1494 * separator (\'), zero padding (0), adjustment (-+), width,1495 * precision1496 * - \%i - Same as \%d.1497 * - \%u - Takes an unsigned integer and prints it as decimal. Thousand1498 * separator (\'), zero padding (0), adjustment (-+), width,1499 * precision1500 * - \%x - Takes an unsigned integer and prints it as lowercased1501 * hexadecimal. The special hash (\#) flag causes a '0x'1502 * prefixed to be printed. Zero padding (0), adjustment (-+),1503 * width, precision.1504 * - \%X - Same as \%x except that it is uppercased.1505 * - \%o - Takes an unsigned (?) integer and prints it as octal. Zero1506 * padding (0), adjustment (-+), width, precision.1507 * - \%p - Takes a pointer (void technically) and prints it. Zero1508 * padding (0), adjustment (-+), width, precision.1509 *1510 * The \%d, \%i, \%u, \%x, \%X and \%o format types support the following1511 * argument type specifiers:1512 * - \%ll - long long (uint64_t).1513 * - \%L - long long (uint64_t).1514 * - \%l - long (uint32_t, uint64_t)1515 * - \%h - short (int16_t).1516 * - \%hh - char (int8_t).1517 * - \%H - char (int8_t).1518 * - \%z - size_t.1519 * - \%j - intmax_t (int64_t).1520 * - \%t - ptrdiff_t.1521 * The type in parentheses is typical sizes, however when printing those types1522 * you are better off using the special group 2 format types below (\%RX32 and1523 * such).1524 *1525 *1526 * Group 0b, IPRT format tricks:1527 * - %M - Replaces the format string, takes a string pointer.1528 * - %N - Nested formatting, takes a pointer to a format string1529 * followed by the pointer to a va_list variable. The va_list1530 * variable will not be modified and the caller must do va_end()1531 * on it. Make sure the va_list variable is NOT in a parameter1532 * list or some gcc versions/targets may get it all wrong.1533 *1534 *1535 * Group 1, the basic runtime typedefs (excluding those which obviously are1536 * pointer):1537 * - \%RTbool - Takes a bool value and prints 'true', 'false', or '!%d!'.1538 * - \%RTfile - Takes a #RTFILE value.1539 * - \%RTfmode - Takes a #RTFMODE value.1540 * - \%RTfoff - Takes a #RTFOFF value.1541 * - \%RTfp16 - Takes a #RTFAR16 value.1542 * - \%RTfp32 - Takes a #RTFAR32 value.1543 * - \%RTfp64 - Takes a #RTFAR64 value.1544 * - \%RTgid - Takes a #RTGID value.1545 * - \%RTino - Takes a #RTINODE value.1546 * - \%RTint - Takes a #RTINT value.1547 * - \%RTiop - Takes a #RTIOPORT value.1548 * - \%RTldrm - Takes a #RTLDRMOD value.1549 * - \%RTmac - Takes a #PCRTMAC pointer.1550 * - \%RTnaddr - Takes a #PCRTNETADDR value.1551 * - \%RTnaipv4 - Takes a #RTNETADDRIPV4 value.1552 * - \%RTnaipv6 - Takes a #PCRTNETADDRIPV6 value.1553 * - \%RTnthrd - Takes a #RTNATIVETHREAD value.1554 * - \%RTnthrd - Takes a #RTNATIVETHREAD value.1555 * - \%RTproc - Takes a #RTPROCESS value.1556 * - \%RTptr - Takes a #RTINTPTR or #RTUINTPTR value (but not void *).1557 * - \%RTreg - Takes a #RTCCUINTREG value.1558 * - \%RTsel - Takes a #RTSEL value.1559 * - \%RTsem - Takes a #RTSEMEVENT, #RTSEMEVENTMULTI, #RTSEMMUTEX, #RTSEMFASTMUTEX, or #RTSEMRW value.1560 * - \%RTsock - Takes a #RTSOCKET value.1561 * - \%RTthrd - Takes a #RTTHREAD value.1562 * - \%RTuid - Takes a #RTUID value.1563 * - \%RTuint - Takes a #RTUINT value.1564 * - \%RTunicp - Takes a #RTUNICP value.1565 * - \%RTutf16 - Takes a #RTUTF16 value.1566 * - \%RTuuid - Takes a #PCRTUUID and will print the UUID as a string.1567 * - \%RTxuint - Takes a #RTUINT or #RTINT value, formatting it as hex.1568 * - \%RGi - Takes a #RTGCINT value.1569 * - \%RGp - Takes a #RTGCPHYS value.1570 * - \%RGr - Takes a #RTGCUINTREG value.1571 * - \%RGu - Takes a #RTGCUINT value.1572 * - \%RGv - Takes a #RTGCPTR, #RTGCINTPTR or #RTGCUINTPTR value.1573 * - \%RGx - Takes a #RTGCUINT or #RTGCINT value, formatting it as hex.1574 * - \%RHi - Takes a #RTHCINT value.1575 * - \%RHp - Takes a #RTHCPHYS value.1576 * - \%RHr - Takes a #RTHCUINTREG value.1577 * - \%RHu - Takes a #RTHCUINT value.1578 * - \%RHv - Takes a #RTHCPTR, #RTHCINTPTR or #RTHCUINTPTR value.1579 * - \%RHx - Takes a #RTHCUINT or #RTHCINT value, formatting it as hex.1580 * - \%RRv - Takes a #RTRCPTR, #RTRCINTPTR or #RTRCUINTPTR value.1581 * - \%RCi - Takes a #RTINT value.1582 * - \%RCp - Takes a #RTCCPHYS value.1583 * - \%RCr - Takes a #RTCCUINTREG value.1584 * - \%RCu - Takes a #RTUINT value.1585 * - \%RCv - Takes a #uintptr_t, #intptr_t, void * value.1586 * - \%RCx - Takes a #RTUINT or #RTINT value, formatting it as hex.1587 *1588 *1589 * Group 2, the generic integer types which are prefered over relying on what1590 * bit-count a 'long', 'short', or 'long long' has on a platform. This are1591 * highly prefered for the [u]intXX_t kind of types:1592 * - \%RI[8|16|32|64] - Signed integer value of the specifed bit count.1593 * - \%RU[8|16|32|64] - Unsigned integer value of the specifed bit count.1594 * - \%RX[8|16|32|64] - Hexadecimal integer value of the specifed bit count.1595 *1596 *1597 * Group 3, hex dumpers and other complex stuff which requires more than simple1598 * formatting:1599 * - \%Rhxd - Takes a pointer to the memory which is to be dumped in typical1600 * hex format. Use the precision to specify the length, and the width to1601 * set the number of bytes per line. Default width and precision is 16.1602 * - \%Rhxs - Takes a pointer to the memory to be displayed as a hex string,1603 * i.e. a series of space separated bytes formatted as two digit hex value.1604 * Use the precision to specify the length. Default length is 16 bytes.1605 * The width, if specified, is ignored.1606 * - \%Rrc - Takes an integer iprt status code as argument. Will insert the1607 * status code define corresponding to the iprt status code.1608 * - \%Rrs - Takes an integer iprt status code as argument. Will insert the1609 * short description of the specified status code.1610 * - \%Rrf - Takes an integer iprt status code as argument. Will insert the1611 * full description of the specified status code.1612 * - \%Rra - Takes an integer iprt status code as argument. Will insert the1613 * status code define + full description.1614 * - \%Rwc - Takes a long Windows error code as argument. Will insert the status1615 * code define corresponding to the Windows error code.1616 * - \%Rwf - Takes a long Windows error code as argument. Will insert the1617 * full description of the specified status code.1618 * - \%Rwa - Takes a long Windows error code as argument. Will insert the1619 * error code define + full description.1620 *1621 * - \%Rhrc - Takes a COM/XPCOM status code as argument. Will insert the status1622 * code define corresponding to the Windows error code.1623 * - \%Rhrf - Takes a COM/XPCOM status code as argument. Will insert the1624 * full description of the specified status code.1625 * - \%Rhra - Takes a COM/XPCOM error code as argument. Will insert the1626 * error code define + full description.1627 *1628 * - \%Rfn - Pretty printing of a function or method. It drops the1629 * return code and parameter list.1630 * - \%Rbn - Prints the base name. For dropping the path in1631 * order to save space when printing a path name.1632 *1633 * On other platforms, \%Rw? simply prints the argument in a form of 0xXXXXXXXX.1634 *1635 *1636 * Group 4, structure dumpers:1637 * - \%RDtimespec - Takes a PCRTTIMESPEC.1638 *1639 *1640 * Group 5, XML / HTML escapers:1641 * - \%RMas - Takes a string pointer (const char *) and outputs1642 * it as an attribute value with the proper escaping.1643 * This typically ends up in double quotes.1644 *1645 * - \%RMes - Takes a string pointer (const char *) and outputs1646 * it as an element with the necessary escaping.1647 *1648 * Group 6, CPU Architecture Register dumpers:1649 * - \%RAx86[reg] - Takes a 64-bit register value if the register is1650 * 64-bit or smaller. Check the code wrt which1651 * registers are implemented.1652 *1653 */1654 1655 #ifndef DECLARED_FNRTSTROUTPUT /* duplicated in iprt/log.h */1656 # define DECLARED_FNRTSTROUTPUT1657 /**1658 * Output callback.1659 *1660 * @returns number of bytes written.1661 * @param pvArg User argument.1662 * @param pachChars Pointer to an array of utf-8 characters.1663 * @param cbChars Number of bytes in the character array pointed to by pachChars.1664 */1665 typedef DECLCALLBACK(size_t) FNRTSTROUTPUT(void *pvArg, const char *pachChars, size_t cbChars);1666 /** Pointer to callback function. */1667 typedef FNRTSTROUTPUT *PFNRTSTROUTPUT;1668 #endif1669 1670 /** Format flag.1671 * These are used by RTStrFormat extensions and RTStrFormatNumber, mind1672 * that not all flags makes sense to both of the functions.1673 * @{ */1674 #define RTSTR_F_CAPITAL 0x00011675 #define RTSTR_F_LEFT 0x00021676 #define RTSTR_F_ZEROPAD 0x00041677 #define RTSTR_F_SPECIAL 0x00081678 #define RTSTR_F_VALSIGNED 0x00101679 #define RTSTR_F_PLUS 0x00201680 #define RTSTR_F_BLANK 0x00401681 #define RTSTR_F_WIDTH 0x00801682 #define RTSTR_F_PRECISION 0x01001683 #define RTSTR_F_THOUSAND_SEP 0x02001684 1685 #define RTSTR_F_BIT_MASK 0xf8001686 #define RTSTR_F_8BIT 0x08001687 #define RTSTR_F_16BIT 0x10001688 #define RTSTR_F_32BIT 0x20001689 #define RTSTR_F_64BIT 0x40001690 #define RTSTR_F_128BIT 0x80001691 /** @} */1692 1693 /** @def RTSTR_GET_BIT_FLAG1694 * Gets the bit flag for the specified type.1695 */1696 #define RTSTR_GET_BIT_FLAG(type) \1697 ( sizeof(type) * 8 == 32 ? RTSTR_F_32BIT \1698 : sizeof(type) * 8 == 64 ? RTSTR_F_64BIT \1699 : sizeof(type) * 8 == 16 ? RTSTR_F_16BIT \1700 : sizeof(type) * 8 == 8 ? RTSTR_F_8BIT \1701 : sizeof(type) * 8 == 128 ? RTSTR_F_128BIT \1702 : 0)1703 1704 1705 /**1706 * Callback to format non-standard format specifiers.1707 *1708 * @returns The number of bytes formatted.1709 * @param pvArg Formatter argument.1710 * @param pfnOutput Pointer to output function.1711 * @param pvArgOutput Argument for the output function.1712 * @param ppszFormat Pointer to the format string pointer. Advance this till the char1713 * after the format specifier.1714 * @param pArgs Pointer to the argument list. Use this to fetch the arguments.1715 * @param cchWidth Format Width. -1 if not specified.1716 * @param cchPrecision Format Precision. -1 if not specified.1717 * @param fFlags Flags (RTSTR_NTFS_*).1718 * @param chArgSize The argument size specifier, 'l' or 'L'.1719 */1720 typedef DECLCALLBACK(size_t) FNSTRFORMAT(void *pvArg, PFNRTSTROUTPUT pfnOutput, void *pvArgOutput,1721 const char **ppszFormat, va_list *pArgs, int cchWidth,1722 int cchPrecision, unsigned fFlags, char chArgSize);1723 /** Pointer to a FNSTRFORMAT() function. */1724 typedef FNSTRFORMAT *PFNSTRFORMAT;1725 1726 1727 /**1728 * Partial implementation of a printf like formatter.1729 * It doesn't do everything correct, and there is no floating point support.1730 * However, it supports custom formats by the means of a format callback.1731 *1732 * @returns number of bytes formatted.1733 * @param pfnOutput Output worker.1734 * Called in two ways. Normally with a string and its length.1735 * For termination, it's called with NULL for string, 0 for length.1736 * @param pvArgOutput Argument to the output worker.1737 * @param pfnFormat Custom format worker.1738 * @param pvArgFormat Argument to the format worker.1739 * @param pszFormat Pointer to the format string, @see pg_rt_str_format.1740 * @param InArgs Argument list.1741 */1742 RTDECL(size_t) RTStrFormatV(PFNRTSTROUTPUT pfnOutput, void *pvArgOutput, PFNSTRFORMAT pfnFormat, void *pvArgFormat,1743 const char *pszFormat, va_list InArgs) RT_IPRT_FORMAT_ATTR(5, 0);1744 1745 /**1746 * Partial implementation of a printf like formatter.1747 * It doesn't do everything correct, and there is no floating point support.1748 * However, it supports custom formats by the means of a format callback.1749 *1750 * @returns number of bytes formatted.1751 * @param pfnOutput Output worker.1752 * Called in two ways. Normally with a string and its length.1753 * For termination, it's called with NULL for string, 0 for length.1754 * @param pvArgOutput Argument to the output worker.1755 * @param pfnFormat Custom format worker.1756 * @param pvArgFormat Argument to the format worker.1757 * @param pszFormat Pointer to the format string, @see pg_rt_str_format.1758 * @param ... Argument list.1759 */1760 RTDECL(size_t) RTStrFormat(PFNRTSTROUTPUT pfnOutput, void *pvArgOutput, PFNSTRFORMAT pfnFormat, void *pvArgFormat,1761 const char *pszFormat, ...) RT_IPRT_FORMAT_ATTR(5, 6);1762 1763 /**1764 * Formats an integer number according to the parameters.1765 *1766 * @returns Length of the formatted number.1767 * @param psz Pointer to output string buffer of sufficient size.1768 * @param u64Value Value to format.1769 * @param uiBase Number representation base.1770 * @param cchWidth Width.1771 * @param cchPrecision Precision.1772 * @param fFlags Flags, RTSTR_F_XXX.1773 */1774 RTDECL(int) RTStrFormatNumber(char *psz, uint64_t u64Value, unsigned int uiBase, signed int cchWidth, signed int cchPrecision,1775 unsigned int fFlags);1776 1777 /**1778 * Formats an unsigned 8-bit number.1779 *1780 * @returns The length of the formatted number or VERR_BUFFER_OVERFLOW.1781 * @param pszBuf The output buffer.1782 * @param cbBuf The size of the output buffer.1783 * @param u8Value The value to format.1784 * @param uiBase Number representation base.1785 * @param cchWidth Width.1786 * @param cchPrecision Precision.1787 * @param fFlags Flags, RTSTR_F_XXX.1788 */1789 RTDECL(ssize_t) RTStrFormatU8(char *pszBuf, size_t cbBuf, uint8_t u8Value, unsigned int uiBase,1790 signed int cchWidth, signed int cchPrecision, uint32_t fFlags);1791 1792 /**1793 * Formats an unsigned 16-bit number.1794 *1795 * @returns The length of the formatted number or VERR_BUFFER_OVERFLOW.1796 * @param pszBuf The output buffer.1797 * @param cbBuf The size of the output buffer.1798 * @param u16Value The value to format.1799 * @param uiBase Number representation base.1800 * @param cchWidth Width.1801 * @param cchPrecision Precision.1802 * @param fFlags Flags, RTSTR_F_XXX.1803 */1804 RTDECL(ssize_t) RTStrFormatU16(char *pszBuf, size_t cbBuf, uint16_t u16Value, unsigned int uiBase,1805 signed int cchWidth, signed int cchPrecision, uint32_t fFlags);1806 1807 /**1808 * Formats an unsigned 32-bit number.1809 *1810 * @returns The length of the formatted number or VERR_BUFFER_OVERFLOW.1811 * @param pszBuf The output buffer.1812 * @param cbBuf The size of the output buffer.1813 * @param u32Value The value to format.1814 * @param uiBase Number representation base.1815 * @param cchWidth Width.1816 * @param cchPrecision Precision.1817 * @param fFlags Flags, RTSTR_F_XXX.1818 */1819 RTDECL(ssize_t) RTStrFormatU32(char *pszBuf, size_t cbBuf, uint32_t u32Value, unsigned int uiBase,1820 signed int cchWidth, signed int cchPrecision, uint32_t fFlags);1821 1822 /**1823 * Formats an unsigned 64-bit number.1824 *1825 * @returns The length of the formatted number or VERR_BUFFER_OVERFLOW.1826 * @param pszBuf The output buffer.1827 * @param cbBuf The size of the output buffer.1828 * @param u64Value The value to format.1829 * @param uiBase Number representation base.1830 * @param cchWidth Width.1831 * @param cchPrecision Precision.1832 * @param fFlags Flags, RTSTR_F_XXX.1833 */1834 RTDECL(ssize_t) RTStrFormatU64(char *pszBuf, size_t cbBuf, uint64_t u64Value, unsigned int uiBase,1835 signed int cchWidth, signed int cchPrecision, uint32_t fFlags);1836 1837 /**1838 * Formats an unsigned 128-bit number.1839 *1840 * @returns The length of the formatted number or VERR_BUFFER_OVERFLOW.1841 * @param pszBuf The output buffer.1842 * @param cbBuf The size of the output buffer.1843 * @param pu128Value The value to format.1844 * @param uiBase Number representation base.1845 * @param cchWidth Width.1846 * @param cchPrecision Precision.1847 * @param fFlags Flags, RTSTR_F_XXX.1848 */1849 RTDECL(ssize_t) RTStrFormatU128(char *pszBuf, size_t cbBuf, PCRTUINT128U pu128Value, unsigned int uiBase,1850 signed int cchWidth, signed int cchPrecision, uint32_t fFlags);1851 1852 /**1853 * Formats an 80-bit extended floating point number.1854 *1855 * @returns The length of the formatted number or VERR_BUFFER_OVERFLOW.1856 * @param pszBuf The output buffer.1857 * @param cbBuf The size of the output buffer.1858 * @param pr80Value The value to format.1859 * @param cchWidth Width.1860 * @param cchPrecision Precision.1861 * @param fFlags Flags, RTSTR_F_XXX.1862 */1863 RTDECL(ssize_t) RTStrFormatR80(char *pszBuf, size_t cbBuf, PCRTFLOAT80U pr80Value, signed int cchWidth,1864 signed int cchPrecision, uint32_t fFlags);1865 1866 /**1867 * Formats an 80-bit extended floating point number, version 2.1868 *1869 * @returns The length of the formatted number or VERR_BUFFER_OVERFLOW.1870 * @param pszBuf The output buffer.1871 * @param cbBuf The size of the output buffer.1872 * @param pr80Value The value to format.1873 * @param cchWidth Width.1874 * @param cchPrecision Precision.1875 * @param fFlags Flags, RTSTR_F_XXX.1876 */1877 RTDECL(ssize_t) RTStrFormatR80u2(char *pszBuf, size_t cbBuf, PCRTFLOAT80U2 pr80Value, signed int cchWidth,1878 signed int cchPrecision, uint32_t fFlags);1879 1880 1881 1882 /**1883 * Callback for formatting a type.1884 *1885 * This is registered using the RTStrFormatTypeRegister function and will1886 * be called during string formatting to handle the specified %R[type].1887 * The argument for this format type is assumed to be a pointer and it's1888 * passed in the @a pvValue argument.1889 *1890 * @returns Length of the formatted output.1891 * @param pfnOutput Output worker.1892 * @param pvArgOutput Argument to the output worker.1893 * @param pszType The type name.1894 * @param pvValue The argument value.1895 * @param cchWidth Width.1896 * @param cchPrecision Precision.1897 * @param fFlags Flags (NTFS_*).1898 * @param pvUser The user argument.1899 */1900 typedef DECLCALLBACK(size_t) FNRTSTRFORMATTYPE(PFNRTSTROUTPUT pfnOutput, void *pvArgOutput,1901 const char *pszType, void const *pvValue,1902 int cchWidth, int cchPrecision, unsigned fFlags,1903 void *pvUser);1904 /** Pointer to a FNRTSTRFORMATTYPE. */1905 typedef FNRTSTRFORMATTYPE *PFNRTSTRFORMATTYPE;1906 1907 1908 /**1909 * Register a format handler for a type.1910 *1911 * The format handler is used to handle '%R[type]' format types, where the argument1912 * in the vector is a pointer value (a bit restrictive, but keeps it simple).1913 *1914 * The caller must ensure that no other thread will be making use of any of1915 * the dynamic formatting type facilities simultaneously with this call.1916 *1917 * @returns IPRT status code.1918 * @retval VINF_SUCCESS on success.1919 * @retval VERR_ALREADY_EXISTS if the type has already been registered.1920 * @retval VERR_TOO_MANY_OPEN_FILES if all the type slots has been allocated already.1921 *1922 * @param pszType The type name.1923 * @param pfnHandler The handler address. See FNRTSTRFORMATTYPE for details.1924 * @param pvUser The user argument to pass to the handler. See RTStrFormatTypeSetUser1925 * for how to update this later.1926 */1927 RTDECL(int) RTStrFormatTypeRegister(const char *pszType, PFNRTSTRFORMATTYPE pfnHandler, void *pvUser);1928 1929 /**1930 * Deregisters a format type.1931 *1932 * The caller must ensure that no other thread will be making use of any of1933 * the dynamic formatting type facilities simultaneously with this call.1934 *1935 * @returns IPRT status code.1936 * @retval VINF_SUCCESS on success.1937 * @retval VERR_FILE_NOT_FOUND if not found.1938 *1939 * @param pszType The type to deregister.1940 */1941 RTDECL(int) RTStrFormatTypeDeregister(const char *pszType);1942 1943 /**1944 * Sets the user argument for a type.1945 *1946 * This can be used if a user argument needs relocating in GC.1947 *1948 * @returns IPRT status code.1949 * @retval VINF_SUCCESS on success.1950 * @retval VERR_FILE_NOT_FOUND if not found.1951 *1952 * @param pszType The type to update.1953 * @param pvUser The new user argument value.1954 */1955 RTDECL(int) RTStrFormatTypeSetUser(const char *pszType, void *pvUser);1956 1957 1958 /**1959 * String printf.1960 *1961 * @returns The length of the returned string (in pszBuffer) excluding the1962 * terminator.1963 * @param pszBuffer Output buffer.1964 * @param cchBuffer Size of the output buffer.1965 * @param pszFormat Pointer to the format string, @see pg_rt_str_format.1966 * @param args The format argument.1967 */1968 RTDECL(size_t) RTStrPrintfV(char *pszBuffer, size_t cchBuffer, const char *pszFormat, va_list args) RT_IPRT_FORMAT_ATTR(3, 0);1969 1970 /**1971 * String printf.1972 *1973 * @returns The length of the returned string (in pszBuffer) excluding the1974 * terminator.1975 * @param pszBuffer Output buffer.1976 * @param cchBuffer Size of the output buffer.1977 * @param pszFormat Pointer to the format string, @see pg_rt_str_format.1978 * @param ... The format argument.1979 */1980 RTDECL(size_t) RTStrPrintf(char *pszBuffer, size_t cchBuffer, const char *pszFormat, ...) RT_IPRT_FORMAT_ATTR(3, 4);1981 1982 1983 /**1984 * String printf with custom formatting.1985 *1986 * @returns The length of the returned string (in pszBuffer) excluding the1987 * terminator.1988 * @param pfnFormat Pointer to handler function for the custom formats.1989 * @param pvArg Argument to the pfnFormat function.1990 * @param pszBuffer Output buffer.1991 * @param cchBuffer Size of the output buffer.1992 * @param pszFormat Pointer to the format string, @see pg_rt_str_format.1993 * @param args The format argument.1994 */1995 RTDECL(size_t) RTStrPrintfExV(PFNSTRFORMAT pfnFormat, void *pvArg, char *pszBuffer, size_t cchBuffer,1996 const char *pszFormat, va_list args) RT_IPRT_FORMAT_ATTR(5, 0);1997 1998 /**1999 * String printf with custom formatting.2000 *2001 * @returns The length of the returned string (in pszBuffer) excluding the2002 * terminator.2003 * @param pfnFormat Pointer to handler function for the custom formats.2004 * @param pvArg Argument to the pfnFormat function.2005 * @param pszBuffer Output buffer.2006 * @param cchBuffer Size of the output buffer.2007 * @param pszFormat Pointer to the format string, @see pg_rt_str_format.2008 * @param ... The format argument.2009 */2010 RTDECL(size_t) RTStrPrintfEx(PFNSTRFORMAT pfnFormat, void *pvArg, char *pszBuffer, size_t cchBuffer,2011 const char *pszFormat, ...) RT_IPRT_FORMAT_ATTR(5, 6);2012 2013 2014 /**2015 * Allocating string printf (default tag).2016 *2017 * @returns The length of the string in the returned *ppszBuffer excluding the2018 * terminator.2019 * @returns -1 on failure.2020 * @param ppszBuffer Where to store the pointer to the allocated output buffer.2021 * The buffer should be freed using RTStrFree().2022 * On failure *ppszBuffer will be set to NULL.2023 * @param pszFormat Pointer to the format string, @see pg_rt_str_format.2024 * @param args The format argument.2025 */2026 #define RTStrAPrintfV(ppszBuffer, pszFormat, args) RTStrAPrintfVTag((ppszBuffer), (pszFormat), (args), RTSTR_TAG)2027 2028 /**2029 * Allocating string printf (custom tag).2030 *2031 * @returns The length of the string in the returned *ppszBuffer excluding the2032 * terminator.2033 * @returns -1 on failure.2034 * @param ppszBuffer Where to store the pointer to the allocated output buffer.2035 * The buffer should be freed using RTStrFree().2036 * On failure *ppszBuffer will be set to NULL.2037 * @param pszFormat Pointer to the format string, @see pg_rt_str_format.2038 * @param args The format argument.2039 * @param pszTag Allocation tag used for statistics and such.2040 */2041 RTDECL(int) RTStrAPrintfVTag(char **ppszBuffer, const char *pszFormat, va_list args, const char *pszTag) RT_IPRT_FORMAT_ATTR(2, 0);2042 2043 /**2044 * Allocating string printf.2045 *2046 * @returns The length of the string in the returned *ppszBuffer excluding the2047 * terminator.2048 * @returns -1 on failure.2049 * @param ppszBuffer Where to store the pointer to the allocated output buffer.2050 * The buffer should be freed using RTStrFree().2051 * On failure *ppszBuffer will be set to NULL.2052 * @param pszFormat Pointer to the format string, @see pg_rt_str_format.2053 * @param ... The format argument.2054 */2055 DECLINLINE(int) RT_IPRT_FORMAT_ATTR(2, 3) RTStrAPrintf(char **ppszBuffer, const char *pszFormat, ...)2056 {2057 int cbRet;2058 va_list va;2059 va_start(va, pszFormat);2060 cbRet = RTStrAPrintfVTag(ppszBuffer, pszFormat, va, RTSTR_TAG);2061 va_end(va);2062 return cbRet;2063 }2064 2065 /**2066 * Allocating string printf (custom tag).2067 *2068 * @returns The length of the string in the returned *ppszBuffer excluding the2069 * terminator.2070 * @returns -1 on failure.2071 * @param ppszBuffer Where to store the pointer to the allocated output buffer.2072 * The buffer should be freed using RTStrFree().2073 * On failure *ppszBuffer will be set to NULL.2074 * @param pszTag Allocation tag used for statistics and such.2075 * @param pszFormat Pointer to the format string, @see pg_rt_str_format.2076 * @param ... The format argument.2077 */2078 DECLINLINE(int) RT_IPRT_FORMAT_ATTR(3, 4) RTStrAPrintfTag(char **ppszBuffer, const char *pszTag, const char *pszFormat, ...)2079 {2080 int cbRet;2081 va_list va;2082 va_start(va, pszFormat);2083 cbRet = RTStrAPrintfVTag(ppszBuffer, pszFormat, va, pszTag);2084 va_end(va);2085 return cbRet;2086 }2087 2088 /**2089 * Allocating string printf, version 2.2090 *2091 * @returns Formatted string. Use RTStrFree() to free it. NULL when out of2092 * memory.2093 * @param pszFormat Pointer to the format string, @see pg_rt_str_format.2094 * @param args The format argument.2095 */2096 #define RTStrAPrintf2V(pszFormat, args) RTStrAPrintf2VTag((pszFormat), (args), RTSTR_TAG)2097 2098 /**2099 * Allocating string printf, version 2.2100 *2101 * @returns Formatted string. Use RTStrFree() to free it. NULL when out of2102 * memory.2103 * @param pszFormat Pointer to the format string, @see pg_rt_str_format.2104 * @param args The format argument.2105 * @param pszTag Allocation tag used for statistics and such.2106 */2107 RTDECL(char *) RTStrAPrintf2VTag(const char *pszFormat, va_list args, const char *pszTag) RT_IPRT_FORMAT_ATTR(1, 0);2108 2109 /**2110 * Allocating string printf, version 2 (default tag).2111 *2112 * @returns Formatted string. Use RTStrFree() to free it. NULL when out of2113 * memory.2114 * @param pszFormat Pointer to the format string, @see pg_rt_str_format.2115 * @param ... The format argument.2116 */2117 DECLINLINE(char *) RT_IPRT_FORMAT_ATTR(1, 2) RTStrAPrintf2(const char *pszFormat, ...)2118 {2119 char *pszRet;2120 va_list va;2121 va_start(va, pszFormat);2122 pszRet = RTStrAPrintf2VTag(pszFormat, va, RTSTR_TAG);2123 va_end(va);2124 return pszRet;2125 }2126 2127 /**2128 * Allocating string printf, version 2 (custom tag).2129 *2130 * @returns Formatted string. Use RTStrFree() to free it. NULL when out of2131 * memory.2132 * @param pszTag Allocation tag used for statistics and such.2133 * @param pszFormat Pointer to the format string, @see pg_rt_str_format.2134 * @param ... The format argument.2135 */2136 DECLINLINE(char *) RT_IPRT_FORMAT_ATTR(2, 3) RTStrAPrintf2Tag(const char *pszTag, const char *pszFormat, ...)2137 {2138 char *pszRet;2139 va_list va;2140 va_start(va, pszFormat);2141 pszRet = RTStrAPrintf2VTag(pszFormat, va, pszTag);2142 va_end(va);2143 return pszRet;2144 }2145 2146 /**2147 * Strips blankspaces from both ends of the string.2148 *2149 * @returns Pointer to first non-blank char in the string.2150 * @param psz The string to strip.2151 */2152 RTDECL(char *) RTStrStrip(char *psz);2153 2154 /**2155 * Strips blankspaces from the start of the string.2156 *2157 * @returns Pointer to first non-blank char in the string.2158 * @param psz The string to strip.2159 */2160 RTDECL(char *) RTStrStripL(const char *psz);2161 2162 /**2163 * Strips blankspaces from the end of the string.2164 *2165 * @returns psz.2166 * @param psz The string to strip.2167 */2168 RTDECL(char *) RTStrStripR(char *psz);2169 2170 /**2171 * String copy with overflow handling.2172 *2173 * @retval VINF_SUCCESS on success.2174 * @retval VERR_BUFFER_OVERFLOW if the destination buffer is too small. The2175 * buffer will contain as much of the string as it can hold, fully2176 * terminated.2177 *2178 * @param pszDst The destination buffer.2179 * @param cbDst The size of the destination buffer (in bytes).2180 * @param pszSrc The source string. NULL is not OK.2181 */2182 RTDECL(int) RTStrCopy(char *pszDst, size_t cbDst, const char *pszSrc);2183 2184 /**2185 * String copy with overflow handling.2186 *2187 * @retval VINF_SUCCESS on success.2188 * @retval VERR_BUFFER_OVERFLOW if the destination buffer is too small. The2189 * buffer will contain as much of the string as it can hold, fully2190 * terminated.2191 *2192 * @param pszDst The destination buffer.2193 * @param cbDst The size of the destination buffer (in bytes).2194 * @param pszSrc The source string. NULL is not OK.2195 * @param cchSrcMax The maximum number of chars (not code points) to2196 * copy from the source string, not counting the2197 * terminator as usual.2198 */2199 RTDECL(int) RTStrCopyEx(char *pszDst, size_t cbDst, const char *pszSrc, size_t cchSrcMax);2200 2201 /**2202 * String copy with overflow handling and buffer advancing.2203 *2204 * @retval VINF_SUCCESS on success.2205 * @retval VERR_BUFFER_OVERFLOW if the destination buffer is too small. The2206 * buffer will contain as much of the string as it can hold, fully2207 * terminated.2208 *2209 * @param ppszDst Pointer to the destination buffer pointer.2210 * This will be advanced to the end of the copied2211 * bytes (points at the terminator). This is also2212 * updated on overflow.2213 * @param pcbDst Pointer to the destination buffer size2214 * variable. This will be updated in accord with2215 * the buffer pointer.2216 * @param pszSrc The source string. NULL is not OK.2217 */2218 RTDECL(int) RTStrCopyP(char **ppszDst, size_t *pcbDst, const char *pszSrc);2219 2220 /**2221 * String copy with overflow handling.2222 *2223 * @retval VINF_SUCCESS on success.2224 * @retval VERR_BUFFER_OVERFLOW if the destination buffer is too small. The2225 * buffer will contain as much of the string as it can hold, fully2226 * terminated.2227 *2228 * @param ppszDst Pointer to the destination buffer pointer.2229 * This will be advanced to the end of the copied2230 * bytes (points at the terminator). This is also2231 * updated on overflow.2232 * @param pcbDst Pointer to the destination buffer size2233 * variable. This will be updated in accord with2234 * the buffer pointer.2235 * @param pszSrc The source string. NULL is not OK.2236 * @param cchSrcMax The maximum number of chars (not code points) to2237 * copy from the source string, not counting the2238 * terminator as usual.2239 */2240 RTDECL(int) RTStrCopyPEx(char **ppszDst, size_t *pcbDst, const char *pszSrc, size_t cchSrcMax);2241 2242 /**2243 * String concatenation with overflow handling.2244 *2245 * @retval VINF_SUCCESS on success.2246 * @retval VERR_BUFFER_OVERFLOW if the destination buffer is too small. The2247 * buffer will contain as much of the string as it can hold, fully2248 * terminated.2249 *2250 * @param pszDst The destination buffer.2251 * @param cbDst The size of the destination buffer (in bytes).2252 * @param pszSrc The source string. NULL is not OK.2253 */2254 RTDECL(int) RTStrCat(char *pszDst, size_t cbDst, const char *pszSrc);2255 2256 /**2257 * String concatenation with overflow handling.2258 *2259 * @retval VINF_SUCCESS on success.2260 * @retval VERR_BUFFER_OVERFLOW if the destination buffer is too small. The2261 * buffer will contain as much of the string as it can hold, fully2262 * terminated.2263 *2264 * @param pszDst The destination buffer.2265 * @param cbDst The size of the destination buffer (in bytes).2266 * @param pszSrc The source string. NULL is not OK.2267 * @param cchSrcMax The maximum number of chars (not code points) to2268 * copy from the source string, not counting the2269 * terminator as usual.2270 */2271 RTDECL(int) RTStrCatEx(char *pszDst, size_t cbDst, const char *pszSrc, size_t cchSrcMax);2272 2273 /**2274 * String concatenation with overflow handling.2275 *2276 * @retval VINF_SUCCESS on success.2277 * @retval VERR_BUFFER_OVERFLOW if the destination buffer is too small. The2278 * buffer will contain as much of the string as it can hold, fully2279 * terminated.2280 *2281 * @param ppszDst Pointer to the destination buffer pointer.2282 * This will be advanced to the end of the copied2283 * bytes (points at the terminator). This is also2284 * updated on overflow.2285 * @param pcbDst Pointer to the destination buffer size2286 * variable. This will be updated in accord with2287 * the buffer pointer.2288 * @param pszSrc The source string. NULL is not OK.2289 */2290 RTDECL(int) RTStrCatP(char **ppszDst, size_t *pcbDst, const char *pszSrc);2291 2292 /**2293 * String concatenation with overflow handling and buffer advancing.2294 *2295 * @retval VINF_SUCCESS on success.2296 * @retval VERR_BUFFER_OVERFLOW if the destination buffer is too small. The2297 * buffer will contain as much of the string as it can hold, fully2298 * terminated.2299 *2300 * @param ppszDst Pointer to the destination buffer pointer.2301 * This will be advanced to the end of the copied2302 * bytes (points at the terminator). This is also2303 * updated on overflow.2304 * @param pcbDst Pointer to the destination buffer size2305 * variable. This will be updated in accord with2306 * the buffer pointer.2307 * @param pszSrc The source string. NULL is not OK.2308 * @param cchSrcMax The maximum number of chars (not code points) to2309 * copy from the source string, not counting the2310 * terminator as usual.2311 */2312 RTDECL(int) RTStrCatPEx(char **ppszDst, size_t *pcbDst, const char *pszSrc, size_t cchSrcMax);2313 2314 /**2315 * Performs a case sensitive string compare between two UTF-8 strings.2316 *2317 * Encoding errors are ignored by the current implementation. So, the only2318 * difference between this and the CRT strcmp function is the handling of2319 * NULL arguments.2320 *2321 * @returns < 0 if the first string less than the second string.2322 * @returns 0 if the first string identical to the second string.2323 * @returns > 0 if the first string greater than the second string.2324 * @param psz1 First UTF-8 string. Null is allowed.2325 * @param psz2 Second UTF-8 string. Null is allowed.2326 */2327 RTDECL(int) RTStrCmp(const char *psz1, const char *psz2);2328 2329 /**2330 * Performs a case sensitive string compare between two UTF-8 strings, given2331 * a maximum string length.2332 *2333 * Encoding errors are ignored by the current implementation. So, the only2334 * difference between this and the CRT strncmp function is the handling of2335 * NULL arguments.2336 *2337 * @returns < 0 if the first string less than the second string.2338 * @returns 0 if the first string identical to the second string.2339 * @returns > 0 if the first string greater than the second string.2340 * @param psz1 First UTF-8 string. Null is allowed.2341 * @param psz2 Second UTF-8 string. Null is allowed.2342 * @param cchMax The maximum string length2343 */2344 RTDECL(int) RTStrNCmp(const char *psz1, const char *psz2, size_t cchMax);2345 2346 /**2347 * Performs a case insensitive string compare between two UTF-8 strings.2348 *2349 * This is a simplified compare, as only the simplified lower/upper case folding2350 * specified by the unicode specs are used. It does not consider character pairs2351 * as they are used in some languages, just simple upper & lower case compares.2352 *2353 * The result is the difference between the mismatching codepoints after they2354 * both have been lower cased.2355 *2356 * If the string encoding is invalid the function will assert (strict builds)2357 * and use RTStrCmp for the remainder of the string.2358 *2359 * @returns < 0 if the first string less than the second string.2360 * @returns 0 if the first string identical to the second string.2361 * @returns > 0 if the first string greater than the second string.2362 * @param psz1 First UTF-8 string. Null is allowed.2363 * @param psz2 Second UTF-8 string. Null is allowed.2364 */2365 RTDECL(int) RTStrICmp(const char *psz1, const char *psz2);2366 2367 /**2368 * Performs a case insensitive string compare between two UTF-8 strings, given a2369 * maximum string length.2370 *2371 * This is a simplified compare, as only the simplified lower/upper case folding2372 * specified by the unicode specs are used. It does not consider character pairs2373 * as they are used in some languages, just simple upper & lower case compares.2374 *2375 * The result is the difference between the mismatching codepoints after they2376 * both have been lower cased.2377 *2378 * If the string encoding is invalid the function will assert (strict builds)2379 * and use RTStrCmp for the remainder of the string.2380 *2381 * @returns < 0 if the first string less than the second string.2382 * @returns 0 if the first string identical to the second string.2383 * @returns > 0 if the first string greater than the second string.2384 * @param psz1 First UTF-8 string. Null is allowed.2385 * @param psz2 Second UTF-8 string. Null is allowed.2386 * @param cchMax Maximum string length2387 */2388 RTDECL(int) RTStrNICmp(const char *psz1, const char *psz2, size_t cchMax);2389 2390 /**2391 * Locates a case sensitive substring.2392 *2393 * If any of the two strings are NULL, then NULL is returned. If the needle is2394 * an empty string, then the haystack is returned (i.e. matches anything).2395 *2396 * @returns Pointer to the first occurrence of the substring if found, NULL if2397 * not.2398 *2399 * @param pszHaystack The string to search.2400 * @param pszNeedle The substring to search for.2401 *2402 * @remarks The difference between this and strstr is the handling of NULL2403 * pointers.2404 */2405 RTDECL(char *) RTStrStr(const char *pszHaystack, const char *pszNeedle);2406 2407 /**2408 * Locates a case insensitive substring.2409 *2410 * If any of the two strings are NULL, then NULL is returned. If the needle is2411 * an empty string, then the haystack is returned (i.e. matches anything).2412 *2413 * @returns Pointer to the first occurrence of the substring if found, NULL if2414 * not.2415 *2416 * @param pszHaystack The string to search.2417 * @param pszNeedle The substring to search for.2418 *2419 */2420 RTDECL(char *) RTStrIStr(const char *pszHaystack, const char *pszNeedle);2421 2422 /**2423 * Converts the string to lower case.2424 *2425 * @returns Pointer to the converted string.2426 * @param psz The string to convert.2427 */2428 RTDECL(char *) RTStrToLower(char *psz);2429 2430 /**2431 * Converts the string to upper case.2432 *2433 * @returns Pointer to the converted string.2434 * @param psz The string to convert.2435 */2436 RTDECL(char *) RTStrToUpper(char *psz);2437 2438 /**2439 * Checks if the string is case foldable, i.e. whether it would change if2440 * subject to RTStrToLower or RTStrToUpper.2441 *2442 * @returns true / false2443 * @param psz The string in question.2444 */2445 RTDECL(bool) RTStrIsCaseFoldable(const char *psz);2446 2447 /**2448 * Checks if the string is upper cased (no lower case chars in it).2449 *2450 * @returns true / false2451 * @param psz The string in question.2452 */2453 RTDECL(bool) RTStrIsUpperCased(const char *psz);2454 2455 /**2456 * Checks if the string is lower cased (no upper case chars in it).2457 *2458 * @returns true / false2459 * @param psz The string in question.2460 */2461 RTDECL(bool) RTStrIsLowerCased(const char *psz);2462 2463 /**2464 * Find the length of a zero-terminated byte string, given2465 * a max string length.2466 *2467 * See also RTStrNLenEx.2468 *2469 * @returns The string length or cbMax. The returned length does not include2470 * the zero terminator if it was found.2471 *2472 * @param pszString The string.2473 * @param cchMax The max string length.2474 */2475 RTDECL(size_t) RTStrNLen(const char *pszString, size_t cchMax);2476 2477 /**2478 * Find the length of a zero-terminated byte string, given2479 * a max string length.2480 *2481 * See also RTStrNLen.2482 *2483 * @returns IPRT status code.2484 * @retval VINF_SUCCESS if the string has a length less than cchMax.2485 * @retval VERR_BUFFER_OVERFLOW if the end of the string wasn't found2486 * before cchMax was reached.2487 *2488 * @param pszString The string.2489 * @param cchMax The max string length.2490 * @param pcch Where to store the string length excluding the2491 * terminator. This is set to cchMax if the terminator2492 * isn't found.2493 */2494 RTDECL(int) RTStrNLenEx(const char *pszString, size_t cchMax, size_t *pcch);2495 2496 RT_C_DECLS_END2497 2498 /** The maximum size argument of a memchr call. */2499 #define RTSTR_MEMCHR_MAX ((~(size_t)0 >> 1) - 15)2500 2501 /**2502 * Find the zero terminator in a string with a limited length.2503 *2504 * @returns Pointer to the zero terminator.2505 * @returns NULL if the zero terminator was not found.2506 *2507 * @param pszString The string.2508 * @param cchMax The max string length. RTSTR_MAX is fine.2509 */2510 #if defined(__cplusplus) && !defined(DOXYGEN_RUNNING)2511 DECLINLINE(char const *) RTStrEnd(char const *pszString, size_t cchMax)2512 {2513 /* Avoid potential issues with memchr seen in glibc.2514 * See sysdeps/x86_64/memchr.S in glibc versions older than 2.11 */2515 while (cchMax > RTSTR_MEMCHR_MAX)2516 {2517 char const *pszRet = (char const *)memchr(pszString, '\0', RTSTR_MEMCHR_MAX);2518 if (RT_LIKELY(pszRet))2519 return pszRet;2520 pszString += RTSTR_MEMCHR_MAX;2521 cchMax -= RTSTR_MEMCHR_MAX;2522 }2523 return (char const *)memchr(pszString, '\0', cchMax);2524 }2525 2526 DECLINLINE(char *) RTStrEnd(char *pszString, size_t cchMax)2527 #else2528 DECLINLINE(char *) RTStrEnd(const char *pszString, size_t cchMax)2529 #endif2530 {2531 /* Avoid potential issues with memchr seen in glibc.2532 * See sysdeps/x86_64/memchr.S in glibc versions older than 2.11 */2533 while (cchMax > RTSTR_MEMCHR_MAX)2534 {2535 char *pszRet = (char *)memchr(pszString, '\0', RTSTR_MEMCHR_MAX);2536 if (RT_LIKELY(pszRet))2537 return pszRet;2538 pszString += RTSTR_MEMCHR_MAX;2539 cchMax -= RTSTR_MEMCHR_MAX;2540 }2541 return (char *)memchr(pszString, '\0', cchMax);2542 }2543 2544 RT_C_DECLS_BEGIN2545 2546 /**2547 * Finds the offset at which a simple character first occurs in a string.2548 *2549 * @returns The offset of the first occurence or the terminator offset.2550 * @param pszHaystack The string to search.2551 * @param chNeedle The character to search for.2552 */2553 DECLINLINE(size_t) RTStrOffCharOrTerm(const char *pszHaystack, char chNeedle)2554 {2555 const char *psz = pszHaystack;2556 char ch;2557 while ( (ch = *psz) != chNeedle2558 && ch != '\0')2559 psz++;2560 return psz - pszHaystack;2561 }2562 2563 2564 /**2565 * Matches a simple string pattern.2566 *2567 * @returns true if the string matches the pattern, otherwise false.2568 *2569 * @param pszPattern The pattern. Special chars are '*' and '?', where the2570 * asterisk matches zero or more characters and question2571 * mark matches exactly one character.2572 * @param pszString The string to match against the pattern.2573 */2574 RTDECL(bool) RTStrSimplePatternMatch(const char *pszPattern, const char *pszString);2575 2576 /**2577 * Matches a simple string pattern, neither which needs to be zero terminated.2578 *2579 * This is identical to RTStrSimplePatternMatch except that you can optionally2580 * specify the length of both the pattern and the string. The function will2581 * stop when it hits a string terminator or either of the lengths.2582 *2583 * @returns true if the string matches the pattern, otherwise false.2584 *2585 * @param pszPattern The pattern. Special chars are '*' and '?', where the2586 * asterisk matches zero or more characters and question2587 * mark matches exactly one character.2588 * @param cchPattern The pattern length. Pass RTSTR_MAX if you don't know the2589 * length and wish to stop at the string terminator.2590 * @param pszString The string to match against the pattern.2591 * @param cchString The string length. Pass RTSTR_MAX if you don't know the2592 * length and wish to match up to the string terminator.2593 */2594 RTDECL(bool) RTStrSimplePatternNMatch(const char *pszPattern, size_t cchPattern,2595 const char *pszString, size_t cchString);2596 2597 /**2598 * Matches multiple patterns against a string.2599 *2600 * The patterns are separated by the pipe character (|).2601 *2602 * @returns true if the string matches the pattern, otherwise false.2603 *2604 * @param pszPatterns The patterns.2605 * @param cchPatterns The lengths of the patterns to use. Pass RTSTR_MAX to2606 * stop at the terminator.2607 * @param pszString The string to match against the pattern.2608 * @param cchString The string length. Pass RTSTR_MAX stop stop at the2609 * terminator.2610 * @param poffPattern Offset into the patterns string of the patttern that2611 * matched. If no match, this will be set to RTSTR_MAX.2612 * This is optional, NULL is fine.2613 */2614 RTDECL(bool) RTStrSimplePatternMultiMatch(const char *pszPatterns, size_t cchPatterns,2615 const char *pszString, size_t cchString,2616 size_t *poffPattern);2617 2618 /**2619 * Compares two version strings RTStrICmp fashion.2620 *2621 * The version string is split up into sections at punctuation, spaces,2622 * underscores, dashes and plus signs. The sections are then split up into2623 * numeric and string sub-sections. Finally, the sub-sections are compared2624 * in a numeric or case insesntivie fashion depending on what they are.2625 *2626 * The following strings are considered to be equal: "1.0.0", "1.00.0", "1.0",2627 * "1". These aren't: "1.0.0r993", "1.0", "1.0r993", "1.0_Beta3", "1.1"2628 *2629 * @returns < 0 if the first string less than the second string.2630 * @returns 0 if the first string identical to the second string.2631 * @returns > 0 if the first string greater than the second string.2632 *2633 * @param pszVer1 First version string to compare.2634 * @param pszVer2 Second version string to compare first version with.2635 */2636 RTDECL(int) RTStrVersionCompare(const char *pszVer1, const char *pszVer2);2637 2638 2639 /** @defgroup rt_str_conv String To/From Number Conversions2640 * @{ */2641 2642 /**2643 * Converts a string representation of a number to a 64-bit unsigned number.2644 *2645 * @returns iprt status code.2646 * Warnings are used to indicate conversion problems.2647 * @retval VWRN_NUMBER_TOO_BIG2648 * @retval VWRN_NEGATIVE_UNSIGNED2649 * @retval VWRN_TRAILING_CHARS2650 * @retval VWRN_TRAILING_SPACES2651 * @retval VINF_SUCCESS2652 * @retval VERR_NO_DIGITS2653 *2654 * @param pszValue Pointer to the string value.2655 * @param ppszNext Where to store the pointer to the first char following the number. (Optional)2656 * @param uBase The base of the representation used.2657 * If 0 the function will look for known prefixes before defaulting to 10.2658 * @param pu64 Where to store the converted number. (optional)2659 */2660 RTDECL(int) RTStrToUInt64Ex(const char *pszValue, char **ppszNext, unsigned uBase, uint64_t *pu64);2661 2662 /**2663 * Converts a string representation of a number to a 64-bit unsigned number,2664 * making sure the full string is converted.2665 *2666 * @returns iprt status code.2667 * Warnings are used to indicate conversion problems.2668 * @retval VWRN_NUMBER_TOO_BIG2669 * @retval VWRN_NEGATIVE_UNSIGNED2670 * @retval VINF_SUCCESS2671 * @retval VERR_NO_DIGITS2672 * @retval VERR_TRAILING_SPACES2673 * @retval VERR_TRAILING_CHARS2674 *2675 * @param pszValue Pointer to the string value.2676 * @param uBase The base of the representation used.2677 * If 0 the function will look for known prefixes before defaulting to 10.2678 * @param pu64 Where to store the converted number. (optional)2679 */2680 RTDECL(int) RTStrToUInt64Full(const char *pszValue, unsigned uBase, uint64_t *pu64);2681 2682 /**2683 * Converts a string representation of a number to a 64-bit unsigned number.2684 * The base is guessed.2685 *2686 * @returns 64-bit unsigned number on success.2687 * @returns 0 on failure.2688 * @param pszValue Pointer to the string value.2689 */2690 RTDECL(uint64_t) RTStrToUInt64(const char *pszValue);2691 2692 /**2693 * Converts a string representation of a number to a 32-bit unsigned number.2694 *2695 * @returns iprt status code.2696 * Warnings are used to indicate conversion problems.2697 * @retval VWRN_NUMBER_TOO_BIG2698 * @retval VWRN_NEGATIVE_UNSIGNED2699 * @retval VWRN_TRAILING_CHARS2700 * @retval VWRN_TRAILING_SPACES2701 * @retval VINF_SUCCESS2702 * @retval VERR_NO_DIGITS2703 *2704 * @param pszValue Pointer to the string value.2705 * @param ppszNext Where to store the pointer to the first char following the number. (Optional)2706 * @param uBase The base of the representation used.2707 * If 0 the function will look for known prefixes before defaulting to 10.2708 * @param pu32 Where to store the converted number. (optional)2709 */2710 RTDECL(int) RTStrToUInt32Ex(const char *pszValue, char **ppszNext, unsigned uBase, uint32_t *pu32);2711 2712 /**2713 * Converts a string representation of a number to a 32-bit unsigned number,2714 * making sure the full string is converted.2715 *2716 * @returns iprt status code.2717 * Warnings are used to indicate conversion problems.2718 * @retval VWRN_NUMBER_TOO_BIG2719 * @retval VWRN_NEGATIVE_UNSIGNED2720 * @retval VINF_SUCCESS2721 * @retval VERR_NO_DIGITS2722 * @retval VERR_TRAILING_SPACES2723 * @retval VERR_TRAILING_CHARS2724 *2725 * @param pszValue Pointer to the string value.2726 * @param uBase The base of the representation used.2727 * If 0 the function will look for known prefixes before defaulting to 10.2728 * @param pu32 Where to store the converted number. (optional)2729 */2730 RTDECL(int) RTStrToUInt32Full(const char *pszValue, unsigned uBase, uint32_t *pu32);2731 2732 /**2733 * Converts a string representation of a number to a 64-bit unsigned number.2734 * The base is guessed.2735 *2736 * @returns 32-bit unsigned number on success.2737 * @returns 0 on failure.2738 * @param pszValue Pointer to the string value.2739 */2740 RTDECL(uint32_t) RTStrToUInt32(const char *pszValue);2741 2742 /**2743 * Converts a string representation of a number to a 16-bit unsigned number.2744 *2745 * @returns iprt status code.2746 * Warnings are used to indicate conversion problems.2747 * @retval VWRN_NUMBER_TOO_BIG2748 * @retval VWRN_NEGATIVE_UNSIGNED2749 * @retval VWRN_TRAILING_CHARS2750 * @retval VWRN_TRAILING_SPACES2751 * @retval VINF_SUCCESS2752 * @retval VERR_NO_DIGITS2753 *2754 * @param pszValue Pointer to the string value.2755 * @param ppszNext Where to store the pointer to the first char following the number. (Optional)2756 * @param uBase The base of the representation used.2757 * If 0 the function will look for known prefixes before defaulting to 10.2758 * @param pu16 Where to store the converted number. (optional)2759 */2760 RTDECL(int) RTStrToUInt16Ex(const char *pszValue, char **ppszNext, unsigned uBase, uint16_t *pu16);2761 2762 /**2763 * Converts a string representation of a number to a 16-bit unsigned number,2764 * making sure the full string is converted.2765 *2766 * @returns iprt status code.2767 * Warnings are used to indicate conversion problems.2768 * @retval VWRN_NUMBER_TOO_BIG2769 * @retval VWRN_NEGATIVE_UNSIGNED2770 * @retval VINF_SUCCESS2771 * @retval VERR_NO_DIGITS2772 * @retval VERR_TRAILING_SPACES2773 * @retval VERR_TRAILING_CHARS2774 *2775 * @param pszValue Pointer to the string value.2776 * @param uBase The base of the representation used.2777 * If 0 the function will look for known prefixes before defaulting to 10.2778 * @param pu16 Where to store the converted number. (optional)2779 */2780 RTDECL(int) RTStrToUInt16Full(const char *pszValue, unsigned uBase, uint16_t *pu16);2781 2782 /**2783 * Converts a string representation of a number to a 16-bit unsigned number.2784 * The base is guessed.2785 *2786 * @returns 16-bit unsigned number on success.2787 * @returns 0 on failure.2788 * @param pszValue Pointer to the string value.2789 */2790 RTDECL(uint16_t) RTStrToUInt16(const char *pszValue);2791 2792 /**2793 * Converts a string representation of a number to a 8-bit unsigned number.2794 *2795 * @returns iprt status code.2796 * Warnings are used to indicate conversion problems.2797 * @retval VWRN_NUMBER_TOO_BIG2798 * @retval VWRN_NEGATIVE_UNSIGNED2799 * @retval VWRN_TRAILING_CHARS2800 * @retval VWRN_TRAILING_SPACES2801 * @retval VINF_SUCCESS2802 * @retval VERR_NO_DIGITS2803 *2804 * @param pszValue Pointer to the string value.2805 * @param ppszNext Where to store the pointer to the first char following the number. (Optional)2806 * @param uBase The base of the representation used.2807 * If 0 the function will look for known prefixes before defaulting to 10.2808 * @param pu8 Where to store the converted number. (optional)2809 */2810 RTDECL(int) RTStrToUInt8Ex(const char *pszValue, char **ppszNext, unsigned uBase, uint8_t *pu8);2811 2812 /**2813 * Converts a string representation of a number to a 8-bit unsigned number,2814 * making sure the full string is converted.2815 *2816 * @returns iprt status code.2817 * Warnings are used to indicate conversion problems.2818 * @retval VWRN_NUMBER_TOO_BIG2819 * @retval VWRN_NEGATIVE_UNSIGNED2820 * @retval VINF_SUCCESS2821 * @retval VERR_NO_DIGITS2822 * @retval VERR_TRAILING_SPACES2823 * @retval VERR_TRAILING_CHARS2824 *2825 * @param pszValue Pointer to the string value.2826 * @param uBase The base of the representation used.2827 * If 0 the function will look for known prefixes before defaulting to 10.2828 * @param pu8 Where to store the converted number. (optional)2829 */2830 RTDECL(int) RTStrToUInt8Full(const char *pszValue, unsigned uBase, uint8_t *pu8);2831 2832 /**2833 * Converts a string representation of a number to a 8-bit unsigned number.2834 * The base is guessed.2835 *2836 * @returns 8-bit unsigned number on success.2837 * @returns 0 on failure.2838 * @param pszValue Pointer to the string value.2839 */2840 RTDECL(uint8_t) RTStrToUInt8(const char *pszValue);2841 2842 /**2843 * Converts a string representation of a number to a 64-bit signed number.2844 *2845 * @returns iprt status code.2846 * Warnings are used to indicate conversion problems.2847 * @retval VWRN_NUMBER_TOO_BIG2848 * @retval VWRN_TRAILING_CHARS2849 * @retval VWRN_TRAILING_SPACES2850 * @retval VINF_SUCCESS2851 * @retval VERR_NO_DIGITS2852 *2853 * @param pszValue Pointer to the string value.2854 * @param ppszNext Where to store the pointer to the first char following the number. (Optional)2855 * @param uBase The base of the representation used.2856 * If 0 the function will look for known prefixes before defaulting to 10.2857 * @param pi64 Where to store the converted number. (optional)2858 */2859 RTDECL(int) RTStrToInt64Ex(const char *pszValue, char **ppszNext, unsigned uBase, int64_t *pi64);2860 2861 /**2862 * Converts a string representation of a number to a 64-bit signed number,2863 * making sure the full string is converted.2864 *2865 * @returns iprt status code.2866 * Warnings are used to indicate conversion problems.2867 * @retval VWRN_NUMBER_TOO_BIG2868 * @retval VINF_SUCCESS2869 * @retval VERR_TRAILING_CHARS2870 * @retval VERR_TRAILING_SPACES2871 * @retval VERR_NO_DIGITS2872 *2873 * @param pszValue Pointer to the string value.2874 * @param uBase The base of the representation used.2875 * If 0 the function will look for known prefixes before defaulting to 10.2876 * @param pi64 Where to store the converted number. (optional)2877 */2878 RTDECL(int) RTStrToInt64Full(const char *pszValue, unsigned uBase, int64_t *pi64);2879 2880 /**2881 * Converts a string representation of a number to a 64-bit signed number.2882 * The base is guessed.2883 *2884 * @returns 64-bit signed number on success.2885 * @returns 0 on failure.2886 * @param pszValue Pointer to the string value.2887 */2888 RTDECL(int64_t) RTStrToInt64(const char *pszValue);2889 2890 /**2891 * Converts a string representation of a number to a 32-bit signed number.2892 *2893 * @returns iprt status code.2894 * Warnings are used to indicate conversion problems.2895 * @retval VWRN_NUMBER_TOO_BIG2896 * @retval VWRN_TRAILING_CHARS2897 * @retval VWRN_TRAILING_SPACES2898 * @retval VINF_SUCCESS2899 * @retval VERR_NO_DIGITS2900 *2901 * @param pszValue Pointer to the string value.2902 * @param ppszNext Where to store the pointer to the first char following the number. (Optional)2903 * @param uBase The base of the representation used.2904 * If 0 the function will look for known prefixes before defaulting to 10.2905 * @param pi32 Where to store the converted number. (optional)2906 */2907 RTDECL(int) RTStrToInt32Ex(const char *pszValue, char **ppszNext, unsigned uBase, int32_t *pi32);2908 2909 /**2910 * Converts a string representation of a number to a 32-bit signed number,2911 * making sure the full string is converted.2912 *2913 * @returns iprt status code.2914 * Warnings are used to indicate conversion problems.2915 * @retval VWRN_NUMBER_TOO_BIG2916 * @retval VINF_SUCCESS2917 * @retval VERR_TRAILING_CHARS2918 * @retval VERR_TRAILING_SPACES2919 * @retval VERR_NO_DIGITS2920 *2921 * @param pszValue Pointer to the string value.2922 * @param uBase The base of the representation used.2923 * If 0 the function will look for known prefixes before defaulting to 10.2924 * @param pi32 Where to store the converted number. (optional)2925 */2926 RTDECL(int) RTStrToInt32Full(const char *pszValue, unsigned uBase, int32_t *pi32);2927 2928 /**2929 * Converts a string representation of a number to a 32-bit signed number.2930 * The base is guessed.2931 *2932 * @returns 32-bit signed number on success.2933 * @returns 0 on failure.2934 * @param pszValue Pointer to the string value.2935 */2936 RTDECL(int32_t) RTStrToInt32(const char *pszValue);2937 2938 /**2939 * Converts a string representation of a number to a 16-bit signed number.2940 *2941 * @returns iprt status code.2942 * Warnings are used to indicate conversion problems.2943 * @retval VWRN_NUMBER_TOO_BIG2944 * @retval VWRN_TRAILING_CHARS2945 * @retval VWRN_TRAILING_SPACES2946 * @retval VINF_SUCCESS2947 * @retval VERR_NO_DIGITS2948 *2949 * @param pszValue Pointer to the string value.2950 * @param ppszNext Where to store the pointer to the first char following the number. (Optional)2951 * @param uBase The base of the representation used.2952 * If 0 the function will look for known prefixes before defaulting to 10.2953 * @param pi16 Where to store the converted number. (optional)2954 */2955 RTDECL(int) RTStrToInt16Ex(const char *pszValue, char **ppszNext, unsigned uBase, int16_t *pi16);2956 2957 /**2958 * Converts a string representation of a number to a 16-bit signed number,2959 * making sure the full string is converted.2960 *2961 * @returns iprt status code.2962 * Warnings are used to indicate conversion problems.2963 * @retval VWRN_NUMBER_TOO_BIG2964 * @retval VINF_SUCCESS2965 * @retval VERR_TRAILING_CHARS2966 * @retval VERR_TRAILING_SPACES2967 * @retval VERR_NO_DIGITS2968 *2969 * @param pszValue Pointer to the string value.2970 * @param uBase The base of the representation used.2971 * If 0 the function will look for known prefixes before defaulting to 10.2972 * @param pi16 Where to store the converted number. (optional)2973 */2974 RTDECL(int) RTStrToInt16Full(const char *pszValue, unsigned uBase, int16_t *pi16);2975 2976 /**2977 * Converts a string representation of a number to a 16-bit signed number.2978 * The base is guessed.2979 *2980 * @returns 16-bit signed number on success.2981 * @returns 0 on failure.2982 * @param pszValue Pointer to the string value.2983 */2984 RTDECL(int16_t) RTStrToInt16(const char *pszValue);2985 2986 /**2987 * Converts a string representation of a number to a 8-bit signed number.2988 *2989 * @returns iprt status code.2990 * Warnings are used to indicate conversion problems.2991 * @retval VWRN_NUMBER_TOO_BIG2992 * @retval VWRN_TRAILING_CHARS2993 * @retval VWRN_TRAILING_SPACES2994 * @retval VINF_SUCCESS2995 * @retval VERR_NO_DIGITS2996 *2997 * @param pszValue Pointer to the string value.2998 * @param ppszNext Where to store the pointer to the first char following the number. (Optional)2999 * @param uBase The base of the representation used.3000 * If 0 the function will look for known prefixes before defaulting to 10.3001 * @param pi8 Where to store the converted number. (optional)3002 */3003 RTDECL(int) RTStrToInt8Ex(const char *pszValue, char **ppszNext, unsigned uBase, int8_t *pi8);3004 3005 /**3006 * Converts a string representation of a number to a 8-bit signed number,3007 * making sure the full string is converted.3008 *3009 * @returns iprt status code.3010 * Warnings are used to indicate conversion problems.3011 * @retval VWRN_NUMBER_TOO_BIG3012 * @retval VINF_SUCCESS3013 * @retval VERR_TRAILING_CHARS3014 * @retval VERR_TRAILING_SPACES3015 * @retval VERR_NO_DIGITS3016 *3017 * @param pszValue Pointer to the string value.3018 * @param uBase The base of the representation used.3019 * If 0 the function will look for known prefixes before defaulting to 10.3020 * @param pi8 Where to store the converted number. (optional)3021 */3022 RTDECL(int) RTStrToInt8Full(const char *pszValue, unsigned uBase, int8_t *pi8);3023 3024 /**3025 * Converts a string representation of a number to a 8-bit signed number.3026 * The base is guessed.3027 *3028 * @returns 8-bit signed number on success.3029 * @returns 0 on failure.3030 * @param pszValue Pointer to the string value.3031 */3032 RTDECL(int8_t) RTStrToInt8(const char *pszValue);3033 3034 /**3035 * Formats a buffer stream as hex bytes.3036 *3037 * The default is no separating spaces or line breaks or anything.3038 *3039 * @returns IPRT status code.3040 * @retval VERR_INVALID_POINTER if any of the pointers are wrong.3041 * @retval VERR_BUFFER_OVERFLOW if the buffer is insufficent to hold the bytes.3042 *3043 * @param pszBuf Output string buffer.3044 * @param cchBuf The size of the output buffer.3045 * @param pv Pointer to the bytes to stringify.3046 * @param cb The number of bytes to stringify.3047 * @param fFlags Combination of RTSTRPRINTHEXBYTES_F_XXX values.3048 * @sa RTUtf16PrintHexBytes.3049 */3050 RTDECL(int) RTStrPrintHexBytes(char *pszBuf, size_t cchBuf, void const *pv, size_t cb, uint32_t fFlags);3051 /** @name RTSTRPRINTHEXBYTES_F_XXX - flags for RTStrPrintHexBytes and RTUtf16PritnHexBytes.3052 * @{ */3053 /** Upper case hex digits, the default is lower case. */3054 #define RTSTRPRINTHEXBYTES_F_UPPER RT_BIT(0)3055 /** @} */3056 3057 /**3058 * Converts a string of hex bytes back into binary data.3059 *3060 * @returns IPRT status code.3061 * @retval VERR_INVALID_POINTER if any of the pointers are wrong.3062 * @retval VERR_BUFFER_OVERFLOW if the string contains too many hex bytes.3063 * @retval VERR_BUFFER_UNDERFLOW if there aren't enough hex bytes to fill up3064 * the output buffer.3065 * @retval VERR_UNEVEN_INPUT if the input contains a half byte.3066 * @retval VERR_NO_DIGITS3067 * @retval VWRN_TRAILING_CHARS3068 * @retval VWRN_TRAILING_SPACES3069 *3070 * @param pszHex The string containing the hex bytes.3071 * @param pv Output buffer.3072 * @param cb The size of the output buffer.3073 * @param fFlags Must be zero, reserved for future use.3074 */3075 RTDECL(int) RTStrConvertHexBytes(char const *pszHex, void *pv, size_t cb, uint32_t fFlags);3076 3077 /** @} */3078 3079 3080 /** @defgroup rt_str_space Unique String Space3081 * @{3082 */3083 3084 /** Pointer to a string name space container node core. */3085 typedef struct RTSTRSPACECORE *PRTSTRSPACECORE;3086 /** Pointer to a pointer to a string name space container node core. */3087 typedef PRTSTRSPACECORE *PPRTSTRSPACECORE;3088 3089 /**3090 * String name space container node core.3091 */3092 typedef struct RTSTRSPACECORE3093 {3094 /** Hash key. Don't touch. */3095 uint32_t Key;3096 /** Pointer to the left leaf node. Don't touch. */3097 PRTSTRSPACECORE pLeft;3098 /** Pointer to the left right node. Don't touch. */3099 PRTSTRSPACECORE pRight;3100 /** Pointer to the list of string with the same key. Don't touch. */3101 PRTSTRSPACECORE pList;3102 /** Height of this tree: max(heigth(left), heigth(right)) + 1. Don't touch */3103 unsigned char uchHeight;3104 /** The string length. Read only! */3105 size_t cchString;3106 /** Pointer to the string. Read only! */3107 const char *pszString;3108 } RTSTRSPACECORE;3109 3110 /** String space. (Initialize with NULL.) */3111 typedef PRTSTRSPACECORE RTSTRSPACE;3112 /** Pointer to a string space. */3113 typedef PPRTSTRSPACECORE PRTSTRSPACE;3114 3115 3116 /**3117 * Inserts a string into a unique string space.3118 *3119 * @returns true on success.3120 * @returns false if the string collided with an existing string.3121 * @param pStrSpace The space to insert it into.3122 * @param pStr The string node.3123 */3124 RTDECL(bool) RTStrSpaceInsert(PRTSTRSPACE pStrSpace, PRTSTRSPACECORE pStr);3125 3126 /**3127 * Removes a string from a unique string space.3128 *3129 * @returns Pointer to the removed string node.3130 * @returns NULL if the string was not found in the string space.3131 * @param pStrSpace The space to remove it from.3132 * @param pszString The string to remove.3133 */3134 RTDECL(PRTSTRSPACECORE) RTStrSpaceRemove(PRTSTRSPACE pStrSpace, const char *pszString);3135 3136 /**3137 * Gets a string from a unique string space.3138 *3139 * @returns Pointer to the string node.3140 * @returns NULL if the string was not found in the string space.3141 * @param pStrSpace The space to get it from.3142 * @param pszString The string to get.3143 */3144 RTDECL(PRTSTRSPACECORE) RTStrSpaceGet(PRTSTRSPACE pStrSpace, const char *pszString);3145 3146 /**3147 * Gets a string from a unique string space.3148 *3149 * @returns Pointer to the string node.3150 * @returns NULL if the string was not found in the string space.3151 * @param pStrSpace The space to get it from.3152 * @param pszString The string to get.3153 * @param cchMax The max string length to evaluate. Passing3154 * RTSTR_MAX is ok and makes it behave just like3155 * RTStrSpaceGet.3156 */3157 RTDECL(PRTSTRSPACECORE) RTStrSpaceGetN(PRTSTRSPACE pStrSpace, const char *pszString, size_t cchMax);3158 3159 /**3160 * Callback function for RTStrSpaceEnumerate() and RTStrSpaceDestroy().3161 *3162 * @returns 0 on continue.3163 * @returns Non-zero to aborts the operation.3164 * @param pStr The string node3165 * @param pvUser The user specified argument.3166 */3167 typedef DECLCALLBACK(int) FNRTSTRSPACECALLBACK(PRTSTRSPACECORE pStr, void *pvUser);3168 /** Pointer to callback function for RTStrSpaceEnumerate() and RTStrSpaceDestroy(). */3169 typedef FNRTSTRSPACECALLBACK *PFNRTSTRSPACECALLBACK;3170 3171 /**3172 * Destroys the string space.3173 *3174 * The caller supplies a callback which will be called for each of the string3175 * nodes in for freeing their memory and other resources.3176 *3177 * @returns 0 or what ever non-zero return value pfnCallback returned3178 * when aborting the destruction.3179 * @param pStrSpace The space to destroy.3180 * @param pfnCallback The callback.3181 * @param pvUser The user argument.3182 */3183 RTDECL(int) RTStrSpaceDestroy(PRTSTRSPACE pStrSpace, PFNRTSTRSPACECALLBACK pfnCallback, void *pvUser);3184 3185 /**3186 * Enumerates the string space.3187 * The caller supplies a callback which will be called for each of3188 * the string nodes.3189 *3190 * @returns 0 or what ever non-zero return value pfnCallback returned3191 * when aborting the destruction.3192 * @param pStrSpace The space to enumerate.3193 * @param pfnCallback The callback.3194 * @param pvUser The user argument.3195 */3196 RTDECL(int) RTStrSpaceEnumerate(PRTSTRSPACE pStrSpace, PFNRTSTRSPACECALLBACK pfnCallback, void *pvUser);3197 3198 /** @} */3199 3200 3201 /** @defgroup rt_str_hash Sting hashing3202 * @{ */3203 3204 /**3205 * Hashes the given string using algorithm \#1.3206 *3207 * @returns String hash.3208 * @param pszString The string to hash.3209 */3210 RTDECL(uint32_t) RTStrHash1(const char *pszString);3211 3212 /**3213 * Hashes the given string using algorithm \#1.3214 *3215 * @returns String hash.3216 * @param pszString The string to hash.3217 * @param cchString The max length to hash. Hashing will stop if the3218 * terminator character is encountered first. Passing3219 * RTSTR_MAX is fine.3220 */3221 RTDECL(uint32_t) RTStrHash1N(const char *pszString, size_t cchString);3222 3223 /**3224 * Hashes the given strings as if they were concatenated using algorithm \#1.3225 *3226 * @returns String hash.3227 * @param cPairs The number of string / length pairs in the3228 * ellipsis.3229 * @param ... List of string (const char *) and length3230 * (size_t) pairs. Passing RTSTR_MAX as the size is3231 * fine.3232 */3233 RTDECL(uint32_t) RTStrHash1ExN(size_t cPairs, ...);3234 3235 /**3236 * Hashes the given strings as if they were concatenated using algorithm \#1.3237 *3238 * @returns String hash.3239 * @param cPairs The number of string / length pairs in the @a va.3240 * @param va List of string (const char *) and length3241 * (size_t) pairs. Passing RTSTR_MAX as the size is3242 * fine.3243 */3244 RTDECL(uint32_t) RTStrHash1ExNV(size_t cPairs, va_list va);3245 3246 /** @} */3247 32 3248 33 3249 34 /** @defgroup rt_str_utf16 UTF-16 String Manipulation 35 * @ingroup grp_rt_str 3250 36 * @{ 3251 37 */ … … 4092 878 4093 879 4094 /** @defgroup rt_str_latin1 Latin-1 (ISO-8859-1) String Manipulation 4095 * @{ 4096 */ 4097 4098 /** 4099 * Calculates the length of the Latin-1 (ISO-8859-1) string in RTUTF16 items. 4100 * 4101 * @returns Number of RTUTF16 items. 4102 * @param psz The Latin-1 string. 4103 */ 4104 RTDECL(size_t) RTLatin1CalcUtf16Len(const char *psz); 4105 4106 /** 4107 * Calculates the length of the Latin-1 (ISO-8859-1) string in RTUTF16 items. 4108 * 4109 * @returns iprt status code. 4110 * @param psz The Latin-1 string. 4111 * @param cch The max string length. Use RTSTR_MAX to process the 4112 * entire string. 4113 * @param pcwc Where to store the string length. Optional. 4114 * This is undefined on failure. 4115 */ 4116 RTDECL(int) RTLatin1CalcUtf16LenEx(const char *psz, size_t cch, size_t *pcwc); 4117 4118 /** 4119 * Translate a Latin-1 (ISO-8859-1) string into a UTF-16 allocating the result 4120 * buffer (default tag). 4121 * 4122 * @returns iprt status code. 4123 * @param pszString The Latin-1 string to convert. 4124 * @param ppwszString Receives pointer to the allocated UTF-16 string. The 4125 * returned string must be freed using RTUtf16Free(). 4126 */ 4127 #define RTLatin1ToUtf16(pszString, ppwszString) RTLatin1ToUtf16Tag((pszString), (ppwszString), RTSTR_TAG) 4128 4129 /** 4130 * Translate a Latin-1 (ISO-8859-1) string into a UTF-16 allocating the result 4131 * buffer (custom tag). 4132 * 4133 * @returns iprt status code. 4134 * @param pszString The Latin-1 string to convert. 4135 * @param ppwszString Receives pointer to the allocated UTF-16 string. The 4136 * returned string must be freed using RTUtf16Free(). 4137 * @param pszTag Allocation tag used for statistics and such. 4138 */ 4139 RTDECL(int) RTLatin1ToUtf16Tag(const char *pszString, PRTUTF16 *ppwszString, const char *pszTag); 4140 4141 /** 4142 * Translates pszString from Latin-1 (ISO-8859-1) to UTF-16, allocating the 4143 * result buffer if requested (default tag). 4144 * 4145 * @returns iprt status code. 4146 * @param pszString The Latin-1 string to convert. 4147 * @param cchString The maximum size in chars (the type) to convert. 4148 * The conversion stops when it reaches cchString or 4149 * the string terminator ('\\0'). 4150 * Use RTSTR_MAX to translate the entire string. 4151 * @param ppwsz If cwc is non-zero, this must either be pointing 4152 * to pointer to a buffer of the specified size, or 4153 * pointer to a NULL pointer. 4154 * If *ppwsz is NULL or cwc is zero a buffer of at 4155 * least cwc items will be allocated to hold the 4156 * translated string. If a buffer was requested it 4157 * must be freed using RTUtf16Free(). 4158 * @param cwc The buffer size in RTUTF16s. This includes the 4159 * terminator. 4160 * @param pcwc Where to store the length of the translated string, 4161 * excluding the terminator. (Optional) 4162 * 4163 * This may be set under some error conditions, 4164 * however, only for VERR_BUFFER_OVERFLOW and 4165 * VERR_NO_STR_MEMORY will it contain a valid string 4166 * length that can be used to resize the buffer. 4167 */ 4168 #define RTLatin1ToUtf16Ex(pszString, cchString, ppwsz, cwc, pcwc) \ 4169 RTLatin1ToUtf16ExTag((pszString), (cchString), (ppwsz), (cwc), (pcwc), RTSTR_TAG) 4170 4171 /** 4172 * Translates pszString from Latin-1 (ISO-8859-1) to UTF-16, allocating the 4173 * result buffer if requested. 4174 * 4175 * @returns iprt status code. 4176 * @param pszString The Latin-1 string to convert. 4177 * @param cchString The maximum size in chars (the type) to convert. 4178 * The conversion stops when it reaches cchString or 4179 * the string terminator ('\\0'). 4180 * Use RTSTR_MAX to translate the entire string. 4181 * @param ppwsz If cwc is non-zero, this must either be pointing 4182 * to pointer to a buffer of the specified size, or 4183 * pointer to a NULL pointer. 4184 * If *ppwsz is NULL or cwc is zero a buffer of at 4185 * least cwc items will be allocated to hold the 4186 * translated string. If a buffer was requested it 4187 * must be freed using RTUtf16Free(). 4188 * @param cwc The buffer size in RTUTF16s. This includes the 4189 * terminator. 4190 * @param pcwc Where to store the length of the translated string, 4191 * excluding the terminator. (Optional) 4192 * 4193 * This may be set under some error conditions, 4194 * however, only for VERR_BUFFER_OVERFLOW and 4195 * VERR_NO_STR_MEMORY will it contain a valid string 4196 * length that can be used to resize the buffer. 4197 * @param pszTag Allocation tag used for statistics and such. 4198 */ 4199 RTDECL(int) RTLatin1ToUtf16ExTag(const char *pszString, size_t cchString, 4200 PRTUTF16 *ppwsz, size_t cwc, size_t *pcwc, const char *pszTag); 880 RT_C_DECLS_END 4201 881 4202 882 /** @} */ 4203 883 4204 #ifndef ___iprt_nocrt_string_h4205 # if defined(RT_OS_WINDOWS)4206 RTDECL(void *) mempcpy(void *pvDst, const void *pvSrc, size_t cb);4207 # endif4208 884 #endif 4209 885 4210 4211 RT_C_DECLS_END4212 4213 /** @} */4214 4215 #endif4216
Note:
See TracChangeset
for help on using the changeset viewer.