Changeset 67391 in vbox for trunk/src/VBox/Runtime/common/string
- Timestamp:
- Jun 14, 2017 12:13:48 PM (8 years ago)
- svn:sync-xref-src-repo-rev:
- 116116
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/src/VBox/Runtime/common/string/utf-16.cpp
r66731 r67391 375 375 * @param cwc The max length of the UTF-16 string to consider. 376 376 * @param pcch Where to store the length (excluding '\\0') of the UTF-8 string. (cch == cb, btw) 377 * 378 * @note rtUtf16BigCalcUtf8Length is a copy of this. 377 379 */ 378 380 static int rtUtf16CalcUtf8Length(PCRTUTF16 pwsz, size_t cwc, size_t *pcch) … … 385 387 if (!wc) 386 388 break; 387 elseif (wc < 0xd800 || wc > 0xdfff)389 if (wc < 0xd800 || wc > 0xdfff) 388 390 { 389 391 if (wc < 0x80) … … 433 435 434 436 /** 437 * Validate the UTF-16BE encoding and calculates the length of an UTF-8 438 * encoding. 439 * 440 * @returns iprt status code. 441 * @param pwsz The UTF-16 string. 442 * @param cwc The max length of the UTF-16BE string to consider. 443 * @param pcch Where to store the length (excluding '\\0') of the UTF-8 string. (cch == cb, btw) 444 * 445 * @note Code is a copy of rtUtf16CalcUtf8Length, but with two RT_BE2H_U16 446 * invocations inserted. 447 */ 448 static int rtUtf16BigCalcUtf8Length(PCRTUTF16 pwsz, size_t cwc, size_t *pcch) 449 { 450 int rc = VINF_SUCCESS; 451 size_t cch = 0; 452 while (cwc > 0) 453 { 454 RTUTF16 wc = *pwsz++; cwc--; 455 if (!wc) 456 break; 457 wc = RT_BE2H_U16(wc); 458 if (wc < 0xd800 || wc > 0xdfff) 459 { 460 if (wc < 0x80) 461 cch++; 462 else if (wc < 0x800) 463 cch += 2; 464 else if (wc < 0xfffe) 465 cch += 3; 466 else 467 { 468 RTStrAssertMsgFailed(("endian indicator! wc=%#x\n", wc)); 469 rc = VERR_CODE_POINT_ENDIAN_INDICATOR; 470 break; 471 } 472 } 473 else 474 { 475 if (wc >= 0xdc00) 476 { 477 RTStrAssertMsgFailed(("Wrong 1st char in surrogate! wc=%#x\n", wc)); 478 rc = VERR_INVALID_UTF16_ENCODING; 479 break; 480 } 481 if (cwc <= 0) 482 { 483 RTStrAssertMsgFailed(("Invalid length! wc=%#x\n", wc)); 484 rc = VERR_INVALID_UTF16_ENCODING; 485 break; 486 } 487 wc = *pwsz++; cwc--; 488 wc = RT_BE2H_U16(wc); 489 if (wc < 0xdc00 || wc > 0xdfff) 490 { 491 RTStrAssertMsgFailed(("Wrong 2nd char in surrogate! wc=%#x\n", wc)); 492 rc = VERR_INVALID_UTF16_ENCODING; 493 break; 494 } 495 cch += 4; 496 } 497 } 498 499 500 /* done */ 501 *pcch = cch; 502 return rc; 503 } 504 505 506 /** 435 507 * Recodes an valid UTF-16 string as UTF-8. 436 508 * … … 442 514 * @param cch The size of the UTF-8 buffer, excluding the terminator. 443 515 * @param pcch Where to store the number of octets actually encoded. 516 * @note rtUtf16BigRecodeAsUtf8 is a copy of this. 444 517 */ 445 518 static int rtUtf16RecodeAsUtf8(PCRTUTF16 pwsz, size_t cwc, char *psz, size_t cch, size_t *pcch) … … 452 525 if (!wc) 453 526 break; 454 elseif (wc < 0xd800 || wc > 0xdfff)527 if (wc < 0xd800 || wc > 0xdfff) 455 528 { 456 529 if (wc < 0x80) … … 542 615 543 616 617 /** 618 * Recodes an valid UTF-16BE string as UTF-8. 619 * 620 * @returns iprt status code. 621 * @param pwsz The UTF-16BE string. 622 * @param cwc The number of RTUTF16 characters to process from pwsz. The recoding 623 * will stop when cwc or '\\0' is reached. 624 * @param psz Where to store the UTF-8 string. 625 * @param cch The size of the UTF-8 buffer, excluding the terminator. 626 * @param pcch Where to store the number of octets actually encoded. 627 * 628 * @note Copy of rtUtf16RecodeAsUtf8 with a few RT_BE2H_U16 invocations 629 * insterted. 630 */ 631 static int rtUtf16BigRecodeAsUtf8(PCRTUTF16 pwsz, size_t cwc, char *psz, size_t cch, size_t *pcch) 632 { 633 unsigned char *pwch = (unsigned char *)psz; 634 int rc = VINF_SUCCESS; 635 while (cwc > 0) 636 { 637 RTUTF16 wc = *pwsz++; cwc--; 638 if (!wc) 639 break; 640 wc = RT_BE2H_U16(wc); 641 if (wc < 0xd800 || wc > 0xdfff) 642 { 643 if (wc < 0x80) 644 { 645 if (RT_UNLIKELY(cch < 1)) 646 { 647 RTStrAssertMsgFailed(("Buffer overflow! 1\n")); 648 rc = VERR_BUFFER_OVERFLOW; 649 break; 650 } 651 cch--; 652 *pwch++ = (unsigned char)wc; 653 } 654 else if (wc < 0x800) 655 { 656 if (RT_UNLIKELY(cch < 2)) 657 { 658 RTStrAssertMsgFailed(("Buffer overflow! 2\n")); 659 rc = VERR_BUFFER_OVERFLOW; 660 break; 661 } 662 cch -= 2; 663 *pwch++ = 0xc0 | (wc >> 6); 664 *pwch++ = 0x80 | (wc & 0x3f); 665 } 666 else if (wc < 0xfffe) 667 { 668 if (RT_UNLIKELY(cch < 3)) 669 { 670 RTStrAssertMsgFailed(("Buffer overflow! 3\n")); 671 rc = VERR_BUFFER_OVERFLOW; 672 break; 673 } 674 cch -= 3; 675 *pwch++ = 0xe0 | (wc >> 12); 676 *pwch++ = 0x80 | ((wc >> 6) & 0x3f); 677 *pwch++ = 0x80 | (wc & 0x3f); 678 } 679 else 680 { 681 RTStrAssertMsgFailed(("endian indicator! wc=%#x\n", wc)); 682 rc = VERR_CODE_POINT_ENDIAN_INDICATOR; 683 break; 684 } 685 } 686 else 687 { 688 if (wc >= 0xdc00) 689 { 690 RTStrAssertMsgFailed(("Wrong 1st char in surrogate! wc=%#x\n", wc)); 691 rc = VERR_INVALID_UTF16_ENCODING; 692 break; 693 } 694 if (cwc <= 0) 695 { 696 RTStrAssertMsgFailed(("Invalid length! wc=%#x\n", wc)); 697 rc = VERR_INVALID_UTF16_ENCODING; 698 break; 699 } 700 RTUTF16 wc2 = *pwsz++; cwc--; 701 wc2 = RT_BE2H_U16(wc2); 702 if (wc2 < 0xdc00 || wc2 > 0xdfff) 703 { 704 RTStrAssertMsgFailed(("Wrong 2nd char in surrogate! wc=%#x\n", wc)); 705 rc = VERR_INVALID_UTF16_ENCODING; 706 break; 707 } 708 uint32_t CodePoint = 0x10000 709 + ( ((wc & 0x3ff) << 10) 710 | (wc2 & 0x3ff)); 711 if (RT_UNLIKELY(cch < 4)) 712 { 713 RTStrAssertMsgFailed(("Buffer overflow! 4\n")); 714 rc = VERR_BUFFER_OVERFLOW; 715 break; 716 } 717 cch -= 4; 718 *pwch++ = 0xf0 | (CodePoint >> 18); 719 *pwch++ = 0x80 | ((CodePoint >> 12) & 0x3f); 720 *pwch++ = 0x80 | ((CodePoint >> 6) & 0x3f); 721 *pwch++ = 0x80 | (CodePoint & 0x3f); 722 } 723 } 724 725 /* done */ 726 *pwch = '\0'; 727 *pcch = (char *)pwch - psz; 728 return rc; 729 } 730 731 544 732 545 733 RTDECL(int) RTUtf16ToUtf8Tag(PCRTUTF16 pwszString, char **ppszString, const char *pszTag) … … 582 770 583 771 772 RTDECL(int) RTUtf16BigToUtf8Tag(PCRTUTF16 pwszString, char **ppszString, const char *pszTag) 773 { 774 /* 775 * Validate input. 776 */ 777 Assert(VALID_PTR(ppszString)); 778 Assert(VALID_PTR(pwszString)); 779 *ppszString = NULL; 780 781 /* 782 * Validate the UTF-16 string and calculate the length of the UTF-8 encoding of it. 783 */ 784 size_t cch; 785 int rc = rtUtf16BigCalcUtf8Length(pwszString, RTSTR_MAX, &cch); 786 if (RT_SUCCESS(rc)) 787 { 788 /* 789 * Allocate buffer and recode it. 790 */ 791 char *pszResult = (char *)RTMemAllocTag(cch + 1, pszTag); 792 if (pszResult) 793 { 794 rc = rtUtf16BigRecodeAsUtf8(pwszString, RTSTR_MAX, pszResult, cch, &cch); 795 if (RT_SUCCESS(rc)) 796 { 797 *ppszString = pszResult; 798 return rc; 799 } 800 801 RTMemFree(pszResult); 802 } 803 else 804 rc = VERR_NO_STR_MEMORY; 805 } 806 return rc; 807 } 808 RT_EXPORT_SYMBOL(RTUtf16BigToUtf8Tag); 809 810 584 811 RTDECL(int) RTUtf16ToUtf8ExTag(PCRTUTF16 pwszString, size_t cwcString, char **ppsz, size_t cch, size_t *pcch, const char *pszTag) 585 812 { … … 587 814 * Validate input. 588 815 */ 589 Assert (VALID_PTR(pwszString));590 Assert (VALID_PTR(ppsz));591 Assert (!pcch || VALID_PTR(pcch));816 AssertPtr(pwszString); 817 AssertPtr(ppsz); 818 AssertPtrNull(pcch); 592 819 593 820 /* … … 640 867 641 868 869 RTDECL(int) RTUtf16BigToUtf8ExTag(PCRTUTF16 pwszString, size_t cwcString, char **ppsz, size_t cch, size_t *pcch, const char *pszTag) 870 { 871 /* 872 * Validate input. 873 */ 874 AssertPtr(pwszString); 875 AssertPtr(ppsz); 876 AssertPtrNull(pcch); 877 878 /* 879 * Validate the UTF-16BE string and calculate the length of the UTF-8 encoding of it. 880 */ 881 size_t cchResult; 882 int rc = rtUtf16BigCalcUtf8Length(pwszString, cwcString, &cchResult); 883 if (RT_SUCCESS(rc)) 884 { 885 if (pcch) 886 *pcch = cchResult; 887 888 /* 889 * Check buffer size / Allocate buffer and recode it. 890 */ 891 bool fShouldFree; 892 char *pszResult; 893 if (cch > 0 && *ppsz) 894 { 895 fShouldFree = false; 896 if (RT_UNLIKELY(cch <= cchResult)) 897 return VERR_BUFFER_OVERFLOW; 898 pszResult = *ppsz; 899 } 900 else 901 { 902 *ppsz = NULL; 903 fShouldFree = true; 904 cch = RT_MAX(cch, cchResult + 1); 905 pszResult = (char *)RTStrAllocTag(cch, pszTag); 906 } 907 if (pszResult) 908 { 909 rc = rtUtf16BigRecodeAsUtf8(pwszString, cwcString, pszResult, cch - 1, &cch); 910 if (RT_SUCCESS(rc)) 911 { 912 *ppsz = pszResult; 913 return rc; 914 } 915 916 if (fShouldFree) 917 RTStrFree(pszResult); 918 } 919 else 920 rc = VERR_NO_STR_MEMORY; 921 } 922 return rc; 923 } 924 RT_EXPORT_SYMBOL(RTUtf16BigToUtf8ExTag); 925 926 642 927 RTDECL(size_t) RTUtf16CalcUtf8Len(PCRTUTF16 pwsz) 643 928 {
Note:
See TracChangeset
for help on using the changeset viewer.