VirtualBox

Changeset 68316 in vbox for trunk


Ignore:
Timestamp:
Aug 7, 2017 2:19:34 PM (7 years ago)
Author:
vboxsync
Message:

iprt/utf16.h: Added some more RTUtf16Big functions, introducing a handful of RTUtf16Little functions to go along with them.

Location:
trunk
Files:
3 edited

Legend:

Unmodified
Added
Removed
  • trunk/include/iprt/mangling.h

    r68147 r68316  
    23182318# define RTUtf16CalcUtf8Len                             RT_MANGLER(RTUtf16CalcUtf8Len)
    23192319# define RTUtf16CalcUtf8LenEx                           RT_MANGLER(RTUtf16CalcUtf8LenEx)
     2320# define RTUtf16BigCalcUtf8Len                          RT_MANGLER(RTUtf16BigCalcUtf8Len)
     2321# define RTUtf16BigCalcUtf8LenEx                        RT_MANGLER(RTUtf16BigCalcUtf8LenEx)
     2322# define RTUtf16LittleCalcUtf8Len                       RT_MANGLER(RTUtf16LittleCalcUtf8Len)
     2323# define RTUtf16LittleCalcUtf8LenEx                     RT_MANGLER(RTUtf16LittleCalcUtf8LenEx)
    23202324# define RTUtf16Cmp                                     RT_MANGLER(RTUtf16Cmp)
    23212325# define RTUtf16CmpAscii                                RT_MANGLER(RTUtf16CmpAscii)
     
    23452349# define RTUtf16ToUtf8ExTag                             RT_MANGLER(RTUtf16ToUtf8ExTag)
    23462350# define RTUtf16BigToUtf8ExTag                          RT_MANGLER(RTUtf16BigToUtf8ExTag)
     2351# define RTUtf16LittleToUtf8ExTag                       RT_MANGLER(RTUtf16LittleToUtf8ExTag)
    23472352# define RTUtf16ToUtf8Tag                               RT_MANGLER(RTUtf16ToUtf8Tag)
    23482353# define RTUtf16BigToUtf8Tag                            RT_MANGLER(RTUtf16BigToUtf8Tag)
     2354# define RTUtf16LittleToUtf8Tag                         RT_MANGLER(RTUtf16LittleToUtf8Tag)
    23492355# define RTUtf16ValidateEncoding                        RT_MANGLER(RTUtf16ValidateEncoding)
    23502356# define RTUtf16ValidateEncodingEx                      RT_MANGLER(RTUtf16ValidateEncodingEx)
  • trunk/include/iprt/utf16.h

    r67979 r68316  
    781781
    782782/**
     783 * Translates UTF-16LE to UTF-8 using buffer provided by the caller or a
     784 * fittingly sized buffer allocated by the function (default tag).
     785 *
     786 * This differs from RTUtf16ToUtf8Ex in that the input is always a
     787 * little-endian string.
     788 *
     789 * @returns iprt status code.
     790 * @param   pwszString      The UTF-16LE string to convert.
     791 * @param   cwcString       The number of RTUTF16 items to translate from pwszString.
     792 *                          The translation will stop when reaching cwcString or the terminator ('\\0').
     793 *                          Use RTSTR_MAX to translate the entire string.
     794 * @param   ppsz            If cch is non-zero, this must either be pointing to a pointer to
     795 *                          a buffer of the specified size, or pointer to a NULL pointer.
     796 *                          If *ppsz is NULL or cch is zero a buffer of at least cch chars
     797 *                          will be allocated to hold the translated string.
     798 *                          If a buffer was requested it must be freed using RTStrFree().
     799 * @param   cch             The buffer size in chars (the type). This includes the terminator.
     800 * @param   pcch            Where to store the length of the translated string,
     801 *                          excluding the terminator. (Optional)
     802 *
     803 *                          This may be set under some error conditions,
     804 *                          however, only for VERR_BUFFER_OVERFLOW and
     805 *                          VERR_NO_STR_MEMORY will it contain a valid string
     806 *                          length that can be used to resize the buffer.
     807 */
     808#define RTUtf16LittleToUtf8Ex(pwszString, cwcString, ppsz, cch, pcch) \
     809    RTUtf16LittleToUtf8ExTag((pwszString), (cwcString), (ppsz), (cch), (pcch), RTSTR_TAG)
     810
     811/**
     812 * Translates UTF-16LE to UTF-8 using buffer provided by the caller or a
     813 * fittingly sized buffer allocated by the function (custom tag).
     814 *
     815 * This differs from RTUtf16ToUtf8ExTag in that the input is always a
     816 * little-endian string.
     817 *
     818 * @returns iprt status code.
     819 * @param   pwszString      The UTF-16LE string to convert.
     820 * @param   cwcString       The number of RTUTF16 items to translate from pwszString.
     821 *                          The translation will stop when reaching cwcString or the terminator ('\\0').
     822 *                          Use RTSTR_MAX to translate the entire string.
     823 * @param   ppsz            If cch is non-zero, this must either be pointing to a pointer to
     824 *                          a buffer of the specified size, or pointer to a NULL pointer.
     825 *                          If *ppsz is NULL or cch is zero a buffer of at least cch chars
     826 *                          will be allocated to hold the translated string.
     827 *                          If a buffer was requested it must be freed using RTStrFree().
     828 * @param   cch             The buffer size in chars (the type). This includes the terminator.
     829 * @param   pcch            Where to store the length of the translated string,
     830 *                          excluding the terminator. (Optional)
     831 *
     832 *                          This may be set under some error conditions,
     833 *                          however, only for VERR_BUFFER_OVERFLOW and
     834 *                          VERR_NO_STR_MEMORY will it contain a valid string
     835 *                          length that can be used to resize the buffer.
     836 * @param   pszTag          Allocation tag used for statistics and such.
     837 */
     838RTDECL(int) RTUtf16LittleToUtf8ExTag(PCRTUTF16 pwszString, size_t cwcString, char **ppsz, size_t cch, size_t *pcch,
     839                                     const char *pszTag);
     840
     841/**
    783842 * Calculates the length of the UTF-16 string in UTF-8 chars (bytes).
    784843 *
     
    795854
    796855/**
     856 * Calculates the length of the UTF-16BE string in UTF-8 chars (bytes).
     857 *
     858 * This function will validate the string, and incorrectly encoded UTF-16BE
     859 * strings will be rejected.  The primary purpose of this function is to
     860 * help allocate buffers for RTUtf16BigToUtf8() of the correct size.  For most
     861 * other purposes RTUtf16BigToUtf8Ex() should be used.
     862 *
     863 * @returns Number of char (bytes).
     864 * @returns 0 if the string was incorrectly encoded.
     865 * @param   pwsz        The UTF-16BE string.
     866 */
     867RTDECL(size_t) RTUtf16BigCalcUtf8Len(PCRTUTF16 pwsz);
     868
     869/**
     870 * Calculates the length of the UTF-16LE string in UTF-8 chars (bytes).
     871 *
     872 * This function will validate the string, and incorrectly encoded UTF-16LE
     873 * strings will be rejected.  The primary purpose of this function is to
     874 * help allocate buffers for RTUtf16LittleToUtf8() of the correct size.  For
     875 * most other purposes RTUtf16LittleToUtf8Ex() should be used.
     876 *
     877 * @returns Number of char (bytes).
     878 * @returns 0 if the string was incorrectly encoded.
     879 * @param   pwsz        The UTF-16LE string.
     880 */
     881RTDECL(size_t) RTUtf16LittleCalcUtf8Len(PCRTUTF16 pwsz);
     882
     883/**
    797884 * Calculates the length of the UTF-16 string in UTF-8 chars (bytes).
    798885 *
     
    807894 */
    808895RTDECL(int) RTUtf16CalcUtf8LenEx(PCRTUTF16 pwsz, size_t cwc, size_t *pcch);
     896
     897/**
     898 * Calculates the length of the UTF-16BE string in UTF-8 chars (bytes).
     899 *
     900 * This function will validate the string, and incorrectly encoded UTF-16BE
     901 * strings will be rejected.
     902 *
     903 * @returns iprt status code.
     904 * @param   pwsz        The string.
     905 * @param   cwc         The max string length. Use RTSTR_MAX to process the entire string.
     906 * @param   pcch        Where to store the string length (in bytes). Optional.
     907 *                      This is undefined on failure.
     908 */
     909RTDECL(int) RTUtf16BigCalcUtf8LenEx(PCRTUTF16 pwsz, size_t cwc, size_t *pcch);
     910
     911/**
     912 * Calculates the length of the UTF-16LE string in UTF-8 chars (bytes).
     913 *
     914 * This function will validate the string, and incorrectly encoded UTF-16LE
     915 * strings will be rejected.
     916 *
     917 * @returns iprt status code.
     918 * @param   pwsz        The string.
     919 * @param   cwc         The max string length. Use RTSTR_MAX to process the entire string.
     920 * @param   pcch        Where to store the string length (in bytes). Optional.
     921 *                      This is undefined on failure.
     922 */
     923RTDECL(int) RTUtf16LittleCalcUtf8LenEx(PCRTUTF16 pwsz, size_t cwc, size_t *pcch);
    809924
    810925/**
  • trunk/src/VBox/Runtime/common/string/utf-16.cpp

    r67391 r68316  
    369369
    370370/**
    371  * Validate the UTF-16 encoding and calculates the length of an UTF-8 encoding.
     371 * Validate the UTF-16BE encoding and calculates the length of an UTF-8
     372 * encoding.
    372373 *
    373374 * @returns iprt status code.
    374  * @param   pwsz        The UTF-16 string.
    375  * @param   cwc         The max length of the UTF-16 string to consider.
     375 * @param   pwsz        The UTF-16BE string.
     376 * @param   cwc         The max length of the UTF-16BE string to consider.
    376377 * @param   pcch        Where to store the length (excluding '\\0') of the UTF-8 string. (cch == cb, btw)
    377378 *
    378  * @note    rtUtf16BigCalcUtf8Length is a copy of this.
     379 * @note    rtUtf16LittleCalcUtf8Length | s/RT_LE2H_U16/RT_BE2H_U16/g
    379380 */
    380 static int rtUtf16CalcUtf8Length(PCRTUTF16 pwsz, size_t cwc, size_t *pcch)
     381static int rtUtf16BigCalcUtf8Length(PCRTUTF16 pwsz, size_t cwc, size_t *pcch)
    381382{
    382383    int     rc = VINF_SUCCESS;
     
    387388        if (!wc)
    388389            break;
     390        wc = RT_BE2H_U16(wc);
    389391        if (wc < 0xd800 || wc > 0xdfff)
    390392        {
     
    417419            }
    418420            wc = *pwsz++; cwc--;
     421            wc = RT_BE2H_U16(wc);
    419422            if (wc < 0xdc00 || wc > 0xdfff)
    420423            {
     
    435438
    436439/**
    437  * Validate the UTF-16BE encoding and calculates the length of an UTF-8
     440 * Validate the UTF-16LE encoding and calculates the length of an UTF-8
    438441 * encoding.
    439442 *
    440443 * @returns iprt status code.
    441  * @param   pwsz        The UTF-16 string.
    442  * @param   cwc         The max length of the UTF-16BE string to consider.
     444 * @param   pwsz        The UTF-16LE string.
     445 * @param   cwc         The max length of the UTF-16LE string to consider.
    443446 * @param   pcch        Where to store the length (excluding '\\0') of the UTF-8 string. (cch == cb, btw)
    444447 *
    445  * @note    Code is a copy of rtUtf16CalcUtf8Length, but with two RT_BE2H_U16
    446  *          invocations inserted.
     448 * @note    rtUtf16BigCalcUtf8Length | s/RT_BE2H_U16/RT_LE2H_U16/g
    447449 */
    448 static int rtUtf16BigCalcUtf8Length(PCRTUTF16 pwsz, size_t cwc, size_t *pcch)
     450static int rtUtf16LittleCalcUtf8Length(PCRTUTF16 pwsz, size_t cwc, size_t *pcch)
    449451{
    450452    int     rc = VINF_SUCCESS;
     
    455457        if (!wc)
    456458            break;
    457         wc = RT_BE2H_U16(wc);
     459        wc = RT_LE2H_U16(wc);
    458460        if (wc < 0xd800 || wc > 0xdfff)
    459461        {
     
    486488            }
    487489            wc = *pwsz++; cwc--;
    488             wc = RT_BE2H_U16(wc);
     490            wc = RT_LE2H_U16(wc);
    489491            if (wc < 0xdc00 || wc > 0xdfff)
    490492            {
     
    500502    /* done */
    501503    *pcch = cch;
    502     return rc;
    503 }
    504 
    505 
    506 /**
    507  * Recodes an valid UTF-16 string as UTF-8.
    508  *
    509  * @returns iprt status code.
    510  * @param   pwsz        The UTF-16 string.
    511  * @param   cwc         The number of RTUTF16 characters to process from pwsz. The recoding
    512  *                      will stop when cwc or '\\0' is reached.
    513  * @param   psz         Where to store the UTF-8 string.
    514  * @param   cch         The size of the UTF-8 buffer, excluding the terminator.
    515  * @param   pcch        Where to store the number of octets actually encoded.
    516  * @note    rtUtf16BigRecodeAsUtf8 is a copy of this.
    517  */
    518 static int rtUtf16RecodeAsUtf8(PCRTUTF16 pwsz, size_t cwc, char *psz, size_t cch, size_t *pcch)
    519 {
    520     unsigned char  *pwch = (unsigned char *)psz;
    521     int             rc = VINF_SUCCESS;
    522     while (cwc > 0)
    523     {
    524         RTUTF16 wc = *pwsz++; cwc--;
    525         if (!wc)
    526             break;
    527         if (wc < 0xd800 || wc > 0xdfff)
    528         {
    529             if (wc < 0x80)
    530             {
    531                 if (RT_UNLIKELY(cch < 1))
    532                 {
    533                     RTStrAssertMsgFailed(("Buffer overflow! 1\n"));
    534                     rc = VERR_BUFFER_OVERFLOW;
    535                     break;
    536                 }
    537                 cch--;
    538                 *pwch++ = (unsigned char)wc;
    539             }
    540             else if (wc < 0x800)
    541             {
    542                 if (RT_UNLIKELY(cch < 2))
    543                 {
    544                     RTStrAssertMsgFailed(("Buffer overflow! 2\n"));
    545                     rc = VERR_BUFFER_OVERFLOW;
    546                     break;
    547                 }
    548                 cch -= 2;
    549                 *pwch++ = 0xc0 | (wc >> 6);
    550                 *pwch++ = 0x80 | (wc & 0x3f);
    551             }
    552             else if (wc < 0xfffe)
    553             {
    554                 if (RT_UNLIKELY(cch < 3))
    555                 {
    556                     RTStrAssertMsgFailed(("Buffer overflow! 3\n"));
    557                     rc = VERR_BUFFER_OVERFLOW;
    558                     break;
    559                 }
    560                 cch -= 3;
    561                 *pwch++ = 0xe0 | (wc >> 12);
    562                 *pwch++ = 0x80 | ((wc >> 6) & 0x3f);
    563                 *pwch++ = 0x80 | (wc & 0x3f);
    564             }
    565             else
    566             {
    567                 RTStrAssertMsgFailed(("endian indicator! wc=%#x\n", wc));
    568                 rc = VERR_CODE_POINT_ENDIAN_INDICATOR;
    569                 break;
    570             }
    571         }
    572         else
    573         {
    574             if (wc >= 0xdc00)
    575             {
    576                 RTStrAssertMsgFailed(("Wrong 1st char in surrogate! wc=%#x\n", wc));
    577                 rc = VERR_INVALID_UTF16_ENCODING;
    578                 break;
    579             }
    580             if (cwc <= 0)
    581             {
    582                 RTStrAssertMsgFailed(("Invalid length! wc=%#x\n", wc));
    583                 rc = VERR_INVALID_UTF16_ENCODING;
    584                 break;
    585             }
    586             RTUTF16 wc2 = *pwsz++; cwc--;
    587             if (wc2 < 0xdc00 || wc2 > 0xdfff)
    588             {
    589                 RTStrAssertMsgFailed(("Wrong 2nd char in surrogate! wc=%#x\n", wc));
    590                 rc = VERR_INVALID_UTF16_ENCODING;
    591                 break;
    592             }
    593             uint32_t CodePoint = 0x10000
    594                                + (  ((wc & 0x3ff) << 10)
    595                                   | (wc2 & 0x3ff));
    596             if (RT_UNLIKELY(cch < 4))
    597             {
    598                 RTStrAssertMsgFailed(("Buffer overflow! 4\n"));
    599                 rc = VERR_BUFFER_OVERFLOW;
    600                 break;
    601             }
    602             cch -= 4;
    603             *pwch++ = 0xf0 | (CodePoint >> 18);
    604             *pwch++ = 0x80 | ((CodePoint >> 12) & 0x3f);
    605             *pwch++ = 0x80 | ((CodePoint >>  6) & 0x3f);
    606             *pwch++ = 0x80 | (CodePoint & 0x3f);
    607         }
    608     }
    609 
    610     /* done */
    611     *pwch = '\0';
    612     *pcch = (char *)pwch - psz;
    613504    return rc;
    614505}
     
    626517 * @param   pcch        Where to store the number of octets actually encoded.
    627518 *
    628  * @note    Copy of rtUtf16RecodeAsUtf8 with a few RT_BE2H_U16 invocations
    629  *          insterted.
     519 * @note    rtUtf16LittleRecodeAsUtf8 == s/RT_BE2H_U16/RT_LE2H_U16/g
    630520 */
    631521static int rtUtf16BigRecodeAsUtf8(PCRTUTF16 pwsz, size_t cwc, char *psz, size_t cch, size_t *pcch)
     
    730620
    731621
     622/**
     623 * Recodes an valid UTF-16LE string as UTF-8.
     624 *
     625 * @returns iprt status code.
     626 * @param   pwsz        The UTF-16LE string.
     627 * @param   cwc         The number of RTUTF16 characters to process from pwsz. The recoding
     628 *                      will stop when cwc or '\\0' is reached.
     629 * @param   psz         Where to store the UTF-8 string.
     630 * @param   cch         The size of the UTF-8 buffer, excluding the terminator.
     631 * @param   pcch        Where to store the number of octets actually encoded.
     632 *
     633 * @note    rtUtf16LittleRecodeAsUtf8 == s/RT_LE2H_U16/RT_GE2H_U16/g
     634 */
     635static int rtUtf16LittleRecodeAsUtf8(PCRTUTF16 pwsz, size_t cwc, char *psz, size_t cch, size_t *pcch)
     636{
     637    unsigned char  *pwch = (unsigned char *)psz;
     638    int             rc = VINF_SUCCESS;
     639    while (cwc > 0)
     640    {
     641        RTUTF16 wc = *pwsz++; cwc--;
     642        if (!wc)
     643            break;
     644        wc = RT_LE2H_U16(wc);
     645        if (wc < 0xd800 || wc > 0xdfff)
     646        {
     647            if (wc < 0x80)
     648            {
     649                if (RT_UNLIKELY(cch < 1))
     650                {
     651                    RTStrAssertMsgFailed(("Buffer overflow! 1\n"));
     652                    rc = VERR_BUFFER_OVERFLOW;
     653                    break;
     654                }
     655                cch--;
     656                *pwch++ = (unsigned char)wc;
     657            }
     658            else if (wc < 0x800)
     659            {
     660                if (RT_UNLIKELY(cch < 2))
     661                {
     662                    RTStrAssertMsgFailed(("Buffer overflow! 2\n"));
     663                    rc = VERR_BUFFER_OVERFLOW;
     664                    break;
     665                }
     666                cch -= 2;
     667                *pwch++ = 0xc0 | (wc >> 6);
     668                *pwch++ = 0x80 | (wc & 0x3f);
     669            }
     670            else if (wc < 0xfffe)
     671            {
     672                if (RT_UNLIKELY(cch < 3))
     673                {
     674                    RTStrAssertMsgFailed(("Buffer overflow! 3\n"));
     675                    rc = VERR_BUFFER_OVERFLOW;
     676                    break;
     677                }
     678                cch -= 3;
     679                *pwch++ = 0xe0 | (wc >> 12);
     680                *pwch++ = 0x80 | ((wc >> 6) & 0x3f);
     681                *pwch++ = 0x80 | (wc & 0x3f);
     682            }
     683            else
     684            {
     685                RTStrAssertMsgFailed(("endian indicator! wc=%#x\n", wc));
     686                rc = VERR_CODE_POINT_ENDIAN_INDICATOR;
     687                break;
     688            }
     689        }
     690        else
     691        {
     692            if (wc >= 0xdc00)
     693            {
     694                RTStrAssertMsgFailed(("Wrong 1st char in surrogate! wc=%#x\n", wc));
     695                rc = VERR_INVALID_UTF16_ENCODING;
     696                break;
     697            }
     698            if (cwc <= 0)
     699            {
     700                RTStrAssertMsgFailed(("Invalid length! wc=%#x\n", wc));
     701                rc = VERR_INVALID_UTF16_ENCODING;
     702                break;
     703            }
     704            RTUTF16 wc2 = *pwsz++; cwc--;
     705            wc2 = RT_LE2H_U16(wc2);
     706            if (wc2 < 0xdc00 || wc2 > 0xdfff)
     707            {
     708                RTStrAssertMsgFailed(("Wrong 2nd char in surrogate! wc=%#x\n", wc));
     709                rc = VERR_INVALID_UTF16_ENCODING;
     710                break;
     711            }
     712            uint32_t CodePoint = 0x10000
     713                               + (  ((wc & 0x3ff) << 10)
     714                                  | (wc2 & 0x3ff));
     715            if (RT_UNLIKELY(cch < 4))
     716            {
     717                RTStrAssertMsgFailed(("Buffer overflow! 4\n"));
     718                rc = VERR_BUFFER_OVERFLOW;
     719                break;
     720            }
     721            cch -= 4;
     722            *pwch++ = 0xf0 | (CodePoint >> 18);
     723            *pwch++ = 0x80 | ((CodePoint >> 12) & 0x3f);
     724            *pwch++ = 0x80 | ((CodePoint >>  6) & 0x3f);
     725            *pwch++ = 0x80 | (CodePoint & 0x3f);
     726        }
     727    }
     728
     729    /* done */
     730    *pwch = '\0';
     731    *pcch = (char *)pwch - psz;
     732    return rc;
     733}
     734
     735
    732736
    733737RTDECL(int)  RTUtf16ToUtf8Tag(PCRTUTF16 pwszString, char **ppszString, const char *pszTag)
     
    744748     */
    745749    size_t cch;
    746     int rc = rtUtf16CalcUtf8Length(pwszString, RTSTR_MAX, &cch);
     750#ifdef RT_BIG_ENDIAN
     751    int rc = rtUtf16BigCalcUtf8Length(pwszString, RTSTR_MAX, &cch);
     752#else
     753    int rc = rtUtf16LittleCalcUtf8Length(pwszString, RTSTR_MAX, &cch);
     754#endif
    747755    if (RT_SUCCESS(rc))
    748756    {
     
    753761        if (pszResult)
    754762        {
    755             rc = rtUtf16RecodeAsUtf8(pwszString, RTSTR_MAX, pszResult, cch, &cch);
     763#ifdef RT_BIG_ENDIAN
     764            rc = rtUtf16BigRecodeAsUtf8(pwszString, RTSTR_MAX, pszResult, cch, &cch);
     765#else
     766            rc = rtUtf16LittleRecodeAsUtf8(pwszString, RTSTR_MAX, pszResult, cch, &cch);
     767#endif
    756768            if (RT_SUCCESS(rc))
    757769            {
     
    809821
    810822
     823RTDECL(int)  RTUtf16LittleToUtf8Tag(PCRTUTF16 pwszString, char **ppszString, const char *pszTag)
     824{
     825    /*
     826     * Validate input.
     827     */
     828    Assert(VALID_PTR(ppszString));
     829    Assert(VALID_PTR(pwszString));
     830    *ppszString = NULL;
     831
     832    /*
     833     * Validate the UTF-16LE string and calculate the length of the UTF-8 encoding of it.
     834     */
     835    size_t cch;
     836    int rc = rtUtf16LittleCalcUtf8Length(pwszString, RTSTR_MAX, &cch);
     837    if (RT_SUCCESS(rc))
     838    {
     839        /*
     840         * Allocate buffer and recode it.
     841         */
     842        char *pszResult = (char *)RTMemAllocTag(cch + 1, pszTag);
     843        if (pszResult)
     844        {
     845            rc = rtUtf16LittleRecodeAsUtf8(pwszString, RTSTR_MAX, pszResult, cch, &cch);
     846            if (RT_SUCCESS(rc))
     847            {
     848                *ppszString = pszResult;
     849                return rc;
     850            }
     851
     852            RTMemFree(pszResult);
     853        }
     854        else
     855            rc = VERR_NO_STR_MEMORY;
     856    }
     857    return rc;
     858}
     859RT_EXPORT_SYMBOL(RTUtf16BigToUtf8Tag);
     860
     861
    811862RTDECL(int)  RTUtf16ToUtf8ExTag(PCRTUTF16 pwszString, size_t cwcString, char **ppsz, size_t cch, size_t *pcch, const char *pszTag)
    812863{
     
    822873     */
    823874    size_t cchResult;
    824     int rc = rtUtf16CalcUtf8Length(pwszString, cwcString, &cchResult);
     875#ifdef RT_BIG_ENDIAN
     876    int rc = rtUtf16BigCalcUtf8Length(pwszString, cwcString, &cchResult);
     877#else
     878    int rc = rtUtf16LittleCalcUtf8Length(pwszString, cwcString, &cchResult);
     879#endif
    825880    if (RT_SUCCESS(rc))
    826881    {
     
    849904        if (pszResult)
    850905        {
    851             rc = rtUtf16RecodeAsUtf8(pwszString, cwcString, pszResult, cch - 1, &cch);
     906#ifdef RT_BIG_ENDIAN
     907            rc = rtUtf16BigRecodeAsUtf8(pwszString, cwcString, pszResult, cch - 1, &cch);
     908#else
     909            rc = rtUtf16LittleRecodeAsUtf8(pwszString, cwcString, pszResult, cch - 1, &cch);
     910#endif
    852911            if (RT_SUCCESS(rc))
    853912            {
     
    925984
    926985
     986RTDECL(int)  RTUtf16LittleToUtf8ExTag(PCRTUTF16 pwszString, size_t cwcString, char **ppsz, size_t cch, size_t *pcch,
     987                                      const char *pszTag)
     988{
     989    /*
     990     * Validate input.
     991     */
     992    AssertPtr(pwszString);
     993    AssertPtr(ppsz);
     994    AssertPtrNull(pcch);
     995
     996    /*
     997     * Validate the UTF-16LE string and calculate the length of the UTF-8 encoding of it.
     998     */
     999    size_t cchResult;
     1000    int rc = rtUtf16LittleCalcUtf8Length(pwszString, cwcString, &cchResult);
     1001    if (RT_SUCCESS(rc))
     1002    {
     1003        if (pcch)
     1004            *pcch = cchResult;
     1005
     1006        /*
     1007         * Check buffer size / Allocate buffer and recode it.
     1008         */
     1009        bool fShouldFree;
     1010        char *pszResult;
     1011        if (cch > 0 && *ppsz)
     1012        {
     1013            fShouldFree = false;
     1014            if (RT_UNLIKELY(cch <= cchResult))
     1015                return VERR_BUFFER_OVERFLOW;
     1016            pszResult = *ppsz;
     1017        }
     1018        else
     1019        {
     1020            *ppsz = NULL;
     1021            fShouldFree = true;
     1022            cch = RT_MAX(cch, cchResult + 1);
     1023            pszResult = (char *)RTStrAllocTag(cch, pszTag);
     1024        }
     1025        if (pszResult)
     1026        {
     1027            rc = rtUtf16LittleRecodeAsUtf8(pwszString, cwcString, pszResult, cch - 1, &cch);
     1028            if (RT_SUCCESS(rc))
     1029            {
     1030                *ppsz = pszResult;
     1031                return rc;
     1032            }
     1033
     1034            if (fShouldFree)
     1035                RTStrFree(pszResult);
     1036        }
     1037        else
     1038            rc = VERR_NO_STR_MEMORY;
     1039    }
     1040    return rc;
     1041}
     1042RT_EXPORT_SYMBOL(RTUtf16BigToUtf8ExTag);
     1043
     1044
    9271045RTDECL(size_t) RTUtf16CalcUtf8Len(PCRTUTF16 pwsz)
    9281046{
    9291047    size_t cch;
    930     int rc = rtUtf16CalcUtf8Length(pwsz, RTSTR_MAX, &cch);
     1048#ifdef RT_BIG_ENDIAN
     1049    int rc = rtUtf16BigCalcUtf8Length(pwsz, RTSTR_MAX, &cch);
     1050#else
     1051    int rc = rtUtf16LittleCalcUtf8Length(pwsz, RTSTR_MAX, &cch);
     1052#endif
    9311053    return RT_SUCCESS(rc) ? cch : 0;
    9321054}
     
    9341056
    9351057
     1058RTDECL(size_t) RTUtf16BigCalcUtf8Len(PCRTUTF16 pwsz)
     1059{
     1060    size_t cch;
     1061    int rc = rtUtf16BigCalcUtf8Length(pwsz, RTSTR_MAX, &cch);
     1062    return RT_SUCCESS(rc) ? cch : 0;
     1063}
     1064RT_EXPORT_SYMBOL(RTUtf16BigCalcUtf8Len);
     1065
     1066
     1067RTDECL(size_t) RTUtf16LittleCalcUtf8Len(PCRTUTF16 pwsz)
     1068{
     1069    size_t cch;
     1070    int rc = rtUtf16LittleCalcUtf8Length(pwsz, RTSTR_MAX, &cch);
     1071    return RT_SUCCESS(rc) ? cch : 0;
     1072}
     1073RT_EXPORT_SYMBOL(RTUtf16LittleCalcUtf8Len);
     1074
     1075
    9361076RTDECL(int) RTUtf16CalcUtf8LenEx(PCRTUTF16 pwsz, size_t cwc, size_t *pcch)
    9371077{
    9381078    size_t cch;
    939     int rc = rtUtf16CalcUtf8Length(pwsz, cwc, &cch);
     1079#ifdef RT_BIG_ENDIAN
     1080    int rc = rtUtf16BigCalcUtf8Length(pwsz, cwc, &cch);
     1081#else
     1082    int rc = rtUtf16LittleCalcUtf8Length(pwsz, cwc, &cch);
     1083#endif
    9401084    if (pcch)
    9411085        *pcch = RT_SUCCESS(rc) ? cch : ~(size_t)0;
     
    9431087}
    9441088RT_EXPORT_SYMBOL(RTUtf16CalcUtf8LenEx);
     1089
     1090
     1091RTDECL(int) RTUtf16BigCalcUtf8LenEx(PCRTUTF16 pwsz, size_t cwc, size_t *pcch)
     1092{
     1093    size_t cch;
     1094    int rc = rtUtf16BigCalcUtf8Length(pwsz, cwc, &cch);
     1095    if (pcch)
     1096        *pcch = RT_SUCCESS(rc) ? cch : ~(size_t)0;
     1097    return rc;
     1098}
     1099RT_EXPORT_SYMBOL(RTUtf16BigCalcUtf8LenEx);
     1100
     1101
     1102RTDECL(int) RTUtf16LittleCalcUtf8LenEx(PCRTUTF16 pwsz, size_t cwc, size_t *pcch)
     1103{
     1104    size_t cch;
     1105    int rc = rtUtf16LittleCalcUtf8Length(pwsz, cwc, &cch);
     1106    if (pcch)
     1107        *pcch = RT_SUCCESS(rc) ? cch : ~(size_t)0;
     1108    return rc;
     1109}
     1110RT_EXPORT_SYMBOL(RTUtf16LittleCalcUtf8LenEx);
    9451111
    9461112
Note: See TracChangeset for help on using the changeset viewer.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette