string

Timestamp:

Jul 21, 2009 12:06:38 PM (16 years ago)

Author:

vboxsync

svn:sync-xref-src-repo-rev:

50317

Message:

IPRT: Some latin-1 cleanup and some preditions in the recoding loops.

File:

: 1 edited

trunk/src/VBox/Runtime/common/string/utf-16.cpp (modified) (17 diffs)

Legend:

: Unmodified
: Added
: Removed

trunk/src/VBox/Runtime/common/string/utf-16.cpp

-              r21728
+              r21740
             if (wc < 0x80)
+            {
                 if (cch < 1)
+                if (RT_UNLIKELY(cch < 1))
+                {
                     RTStrAssertMsgFailed(("Buffer overflow! 1\n"));
 …
             else if (wc < 0x800)
+            {
                 if (cch < 2)
+                if (RT_UNLIKELY(cch < 2))
+                {
                     RTStrAssertMsgFailed(("Buffer overflow! 2\n"));
 …
             else if (wc < 0xfffe)
+            {
                 if (cch < 3)
+                if (RT_UNLIKELY(cch < 3))
+                {
                     RTStrAssertMsgFailed(("Buffer overflow! 3\n"));
 …
                                + (  ((wc & 0x3ff) << 10)
                                   | (wc2 & 0x3ff));
             if (cch < 4)
+            if (RT_UNLIKELY(cch < 4))
+            {
                 RTStrAssertMsgFailed(("Buffer overflow! 4\n"));
 …
+        {
             fShouldFree = false;
             if (cch <= cchResult)
+            if (RT_UNLIKELY(cch <= cchResult))
                 return VERR_BUFFER_OVERFLOW;
             pszResult = *ppsz;
 …
         if (!wc)
             break;
         else if (wc < 256)
+        else if (RT_LIKELY(wc < 0x100))
             ++cch;
+        else if (wc < 0xd800 || wc > 0xdfff)
+        {
+            if (wc < 0xfffe)
+            {
+                rc = VERR_NO_TRANSLATION;
+                break;
+        else
+        {
+            if (wc < 0xd800 || wc > 0xdfff)
+            {
+                if (wc >= 0xfffe)
+                {
+                    RTStrAssertMsgFailed(("endian indicator! wc=%#x\n", wc));
+                    rc = VERR_CODE_POINT_ENDIAN_INDICATOR;
+                    break;
+                }
+            }
             else
+            {
+                RTStrAssertMsgFailed(("endian indicator! wc=%#x\n", wc));
+                rc = VERR_CODE_POINT_ENDIAN_INDICATOR;
+                break;
+            }
+        }
+        else
+        {
+            if (wc >= 0xdc00)
+            {
+                RTStrAssertMsgFailed(("Wrong 1st char in surrogate! wc=%#x\n", wc));
+                rc = VERR_INVALID_UTF16_ENCODING;
+                break;
+            }
+            if (cwc <= 0)
+            {
+                RTStrAssertMsgFailed(("Invalid length! wc=%#x\n", wc));
+                rc = VERR_INVALID_UTF16_ENCODING;
+                break;
+            }
+            wc = *pwsz++; cwc--;
+            if (wc < 0xdc00 || wc > 0xdfff)
+            {
+                RTStrAssertMsgFailed(("Wrong 2nd char in surrogate! wc=%#x\n", wc));
+                rc = VERR_INVALID_UTF16_ENCODING;
+                break;
+            }
+                if (wc >= 0xdc00)
+                {
+                    RTStrAssertMsgFailed(("Wrong 1st char in surrogate! wc=%#x\n", wc));
+                    rc = VERR_INVALID_UTF16_ENCODING;
+                    break;
+                }
+                if (cwc <= 0)
+                {
+                    RTStrAssertMsgFailed(("Invalid length! wc=%#x\n", wc));
+                    rc = VERR_INVALID_UTF16_ENCODING;
+                    break;
+                }
+                wc = *pwsz++; cwc--;
+                if (wc < 0xdc00 || wc > 0xdfff)
+                {
+                    RTStrAssertMsgFailed(("Wrong 2nd char in surrogate! wc=%#x\n", wc));
+                    rc = VERR_INVALID_UTF16_ENCODING;
+                    break;
+                }
+            }
             rc = VERR_NO_TRANSLATION;
             break;
+        }
+    }
     /* done */
 …
  * @param   psz         Where to store the Latin1 string.
  * @param   cch         The size of the Latin1 buffer, excluding the terminator.
- * @param   pcch        Where to store the number of octets actually encoded.
  */
 static int rtUtf16RecodeAsLatin1(PCRTUTF16 pwsz, size_t cwc, char *psz, size_t cch, size_t *pcch)
+{
     unsigned char  *pwch = (unsigned char *)psz;
     int             rc = VINF_SUCCESS;
+static int rtUtf16RecodeAsLatin1(PCRTUTF16 pwsz, size_t cwc, char *psz, size_t cch)
+{
+    unsigned char  *pch = (unsigned char *)psz;
+    int             rc  = VINF_SUCCESS;
     while (cwc > 0)
+    {
 …
         if (!wc)
             break;
+        else if (wc < 0xd800 || wc > 0xdfff)
+        {
+            if (wc < 0x100)
+            {
+                if (cch < 1)
+                {
+                    RTStrAssertMsgFailed(("Buffer overflow! 1\n"));
+                    rc = VERR_BUFFER_OVERFLOW;
+        if (RT_LIKELY(wc < 0x100))
+        {
+            if (RT_UNLIKELY(cch < 1))
+            {
+                RTStrAssertMsgFailed(("Buffer overflow! 1\n"));
+                rc = VERR_BUFFER_OVERFLOW;
+                break;
+            }
+            cch--;
+            *pch++ = (unsigned char)wc;
+        }
+        else
+        {
+            if (wc < 0xd800 || wc > 0xdfff)
+            {
+                if (wc >= 0xfffe)
+                {
+                    RTStrAssertMsgFailed(("endian indicator! wc=%#x\n", wc));
+                    rc = VERR_CODE_POINT_ENDIAN_INDICATOR;
                     break;
+                }
-                cch--;
-                *pwch++ = (char)wc;
+            }
-            else if (wc < 0xfffe)
+            {
-                rc = VERR_NO_TRANSLATION;
-                break;
+            }
             else
+            {
+                RTStrAssertMsgFailed(("endian indicator! wc=%#x\n", wc));
+                rc = VERR_CODE_POINT_ENDIAN_INDICATOR;
+                break;
+            }
+        }
+        else
+        {
+            if (wc >= 0xdc00)
+            {
+                RTStrAssertMsgFailed(("Wrong 1st char in surrogate! wc=%#x\n", wc));
+                rc = VERR_INVALID_UTF16_ENCODING;
+                break;
+            }
+            if (cwc <= 0)
+            {
+                RTStrAssertMsgFailed(("Invalid length! wc=%#x\n", wc));
+                rc = VERR_INVALID_UTF16_ENCODING;
+                break;
+            }
+            RTUTF16 wc2 = *pwsz++; cwc--;
+            if (wc2 < 0xdc00 || wc2 > 0xdfff)
+            {
+                RTStrAssertMsgFailed(("Wrong 2nd char in surrogate! wc=%#x\n", wc));
+                rc = VERR_INVALID_UTF16_ENCODING;
+                break;
+            }
+                if (wc >= 0xdc00)
+                {
+                    RTStrAssertMsgFailed(("Wrong 1st char in surrogate! wc=%#x\n", wc));
+                    rc = VERR_INVALID_UTF16_ENCODING;
+                    break;
+                }
+                if (cwc <= 0)
+                {
+                    RTStrAssertMsgFailed(("Invalid length! wc=%#x\n", wc));
+                    rc = VERR_INVALID_UTF16_ENCODING;
+                    break;
+                }
+                RTUTF16 wc2 = *pwsz++; cwc--;
+                if (wc2 < 0xdc00 || wc2 > 0xdfff)
+                {
+                    RTStrAssertMsgFailed(("Wrong 2nd char in surrogate! wc=%#x\n", wc));
+                    rc = VERR_INVALID_UTF16_ENCODING;
+                    break;
+                }
+            }
             rc = VERR_NO_TRANSLATION;
             break;
 …
     /* done */
+    *pwch = '\0';
+    *pcch = (char *)pwch - psz;
+    *pch = '\0';
     return rc;
+}
 …
         if (pszResult)
+        {
             rc = rtUtf16RecodeAsLatin1(pwszString, RTSTR_MAX, pszResult, cch, &cch);
+            rc = rtUtf16RecodeAsLatin1(pwszString, RTSTR_MAX, pszResult, cch);
             if (RT_SUCCESS(rc))
+            {
 …
      * Validate input.
      */
     Assert(VALID_PTR(pwszString));
     Assert(VALID_PTR(ppsz));
     Assert(!pcch || VALID_PTR(pcch));
+    AssertPtr(pwszString);
+    AssertPtr(ppsz);
+    AssertPtrNull(pcch);
     /*
 …
         if (pszResult)
+        {
             rc = rtUtf16RecodeAsLatin1(pwszString, cwcString, pszResult, cch - 1, &cch);
+            rc = rtUtf16RecodeAsLatin1(pwszString, cwcString, pszResult, cch - 1);
             if (RT_SUCCESS(rc))
+            {
 …
  * @param   pwsz    Where to store the UTF-16 string.
  * @param   cwc     The number of RTUTF16 items the pwsz buffer can hold, excluding the terminator ('\\0').
- * @param   pcwc    Where to store the actual number of RTUTF16 items encoded into the UTF-16. This excludes the terminator.
  */
 static int rtLatin1RecodeAsUtf16(const char *psz, size_t cch, PRTUTF16 pwsz, size_t cwc, size_t *pcwc)
+{
     int                     rc = VINF_SUCCESS;
+static int rtLatin1RecodeAsUtf16(const char *psz, size_t cch, PRTUTF16 pwsz, size_t cwc)
+{
+    int                     rc   = VINF_SUCCESS;
     const unsigned char    *puch = (const unsigned char *)psz;
+    const PRTUTF16          pwszEnd = pwsz + cwc;
+    PRTUTF16                pwc = pwsz;
+    Assert(pwszEnd >= pwc);
+    while (cch > 0)
+    PRTUTF16                pwc  = pwsz;
+    while (cch-- > 0)
+    {
         /* read the next char and check for terminator. */
 …
         /* check for output overflow */
         if (pwc >= pwszEnd)
+        if (RT_UNLIKELY(cwc < 1))
+        {
             rc = VERR_BUFFER_OVERFLOW;
 …
         /* expand the code point */
         *pwc++ = uch;
+        cwc--;
         puch++;
-        cch--;
+    }
     /* done */
     *pwc = '\0';
-    *pcwc = pwc - pwsz;
     return rc;
+}
 …
              * Encode the UTF-16 string.
              */
             rc = rtLatin1RecodeAsUtf16(pszString, RTSTR_MAX, pwsz, cwc, &cwc);
+            rc = rtLatin1RecodeAsUtf16(pszString, RTSTR_MAX, pwsz, cwc);
             if (RT_SUCCESS(rc))
+            {
 …
              * Encode the UTF-16 string.
              */
             rc = rtLatin1RecodeAsUtf16(pszString, cchString, pwszResult, cwc - 1, &cwcResult);
+            rc = rtLatin1RecodeAsUtf16(pszString, cchString, pwszResult, cwc - 1);
             if (RT_SUCCESS(rc))
+            {

Note: See TracChangeset for help on using the changeset viewer.

Changeset 21740 in vbox for trunk/src/VBox/Runtime/common/string

Legend:

trunk/src/VBox/Runtime/common/string/utf-16.cpp

Download in other formats: