Changeset 14007 in vbox for trunk/src/VBox/Runtime/common/string
- Timestamp:
- Nov 10, 2008 1:06:14 PM (16 years ago)
- svn:sync-xref-src-repo-rev:
- 39060
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/src/VBox/Runtime/common/string/utf-8.cpp
r13927 r14007 920 920 * @returns rc 921 921 * @param ppsz The pointer to the the string position point. 922 * @param pcch Pointer to the string length. 922 923 * @param pCp Where to store RTUNICP_INVALID. 923 * @param pcch Pointer to the string length.924 924 * @param rc The iprt error code. 925 925 */ 926 static int rtStrGetCpNExFailure(const char **ppsz, PRTUNICP pCp, size_t *pcch, int rc)926 static int rtStrGetCpNExFailure(const char **ppsz, size_t *pcch, PRTUNICP pCp, int rc) 927 927 { 928 928 /* … … 936 936 937 937 938 RTDECL(int) RTStrGetCpNExInternal(const char **ppsz, PRTUNICP pCp, size_t *pcch)938 RTDECL(int) RTStrGetCpNExInternal(const char **ppsz, size_t *pcch, PRTUNICP pCp) 939 939 { 940 940 const unsigned char *puch = (const unsigned char *)*ppsz; 941 941 const unsigned char uch = *puch; 942 size_t cch = *pcch; 942 943 RTUNICP uc; 943 944 944 if ( *pcch == 0)945 if (cch == 0) 945 946 { 946 947 *pCp = RTUNICP_INVALID; 947 return VERR_INVALID_UTF8_ENCODING; 948 } 948 return VERR_END_OF_STRING; 949 } 950 949 951 /* ASCII ? */ 950 952 if (!(uch & RT_BIT(7))) … … 952 954 uc = uch; 953 955 puch++; 956 cch--; 954 957 } 955 958 else if (uch & RT_BIT(6)) … … 970 973 { 971 974 RTStrAssertMsgFailed(("Invalid UTF-8 first byte: %.*Rhxs\n", RT_MIN(strlen((char *)puch), 10), puch)); 972 return rtStrGetCpNExFailure(ppsz, p Cp, pcch, VERR_INVALID_UTF8_ENCODING);973 } 974 975 if (cb > *pcch)976 return rtStrGetCpNExFailure(ppsz, p Cp, pcch, VERR_INVALID_UTF8_ENCODING);975 return rtStrGetCpNExFailure(ppsz, pcch, pCp, VERR_INVALID_UTF8_ENCODING); 976 } 977 978 if (cb > cch) 979 return rtStrGetCpNExFailure(ppsz, pcch, pCp, VERR_INVALID_UTF8_ENCODING); 977 980 978 981 /* validate the rest */ … … 981 984 case 6: 982 985 RTStrAssertMsgReturn((puch[5] & 0xc0) == 0x80, ("6/%u: %.*Rhxs\n", cb, RT_MIN(cb + 10, strlen((char *)puch)), puch), 983 rtStrGetCpNExFailure(ppsz, p Cp, pcch, VERR_INVALID_UTF8_ENCODING));986 rtStrGetCpNExFailure(ppsz, pcch, pCp, VERR_INVALID_UTF8_ENCODING)); 984 987 case 5: 985 988 RTStrAssertMsgReturn((puch[4] & 0xc0) == 0x80, ("5/%u: %.*Rhxs\n", cb, RT_MIN(cb + 10, strlen((char *)puch)), puch), 986 rtStrGetCpNExFailure(ppsz, p Cp, pcch, VERR_INVALID_UTF8_ENCODING));989 rtStrGetCpNExFailure(ppsz, pcch, pCp, VERR_INVALID_UTF8_ENCODING)); 987 990 case 4: 988 991 RTStrAssertMsgReturn((puch[3] & 0xc0) == 0x80, ("4/%u: %.*Rhxs\n", cb, RT_MIN(cb + 10, strlen((char *)puch)), puch), 989 rtStrGetCpNExFailure(ppsz, p Cp, pcch, VERR_INVALID_UTF8_ENCODING));992 rtStrGetCpNExFailure(ppsz, pcch, pCp, VERR_INVALID_UTF8_ENCODING)); 990 993 case 3: 991 994 RTStrAssertMsgReturn((puch[2] & 0xc0) == 0x80, ("3/%u: %.*Rhxs\n", cb, RT_MIN(cb + 10, strlen((char *)puch)), puch), 992 rtStrGetCpNExFailure(ppsz, p Cp, pcch, VERR_INVALID_UTF8_ENCODING));995 rtStrGetCpNExFailure(ppsz, pcch, pCp, VERR_INVALID_UTF8_ENCODING)); 993 996 case 2: 994 997 RTStrAssertMsgReturn((puch[1] & 0xc0) == 0x80, ("2/%u: %.*Rhxs\n", cb, RT_MIN(cb + 10, strlen((char *)puch)), puch), 995 rtStrGetCpNExFailure(ppsz, p Cp, pcch, VERR_INVALID_UTF8_ENCODING));998 rtStrGetCpNExFailure(ppsz, pcch, pCp, VERR_INVALID_UTF8_ENCODING)); 996 999 break; 997 1000 } … … 1009 1012 RTStrAssertMsgReturn(uc >= 0x04000000 && uc <= 0x7fffffff, 1010 1013 ("%u: cp=%#010RX32: %.*Rhxs\n", cb, uc, RT_MIN(cb + 10, strlen((char *)puch)), puch), 1011 rtStrGetCpNExFailure(ppsz, p Cp, pcch, VERR_INVALID_UTF8_ENCODING));1014 rtStrGetCpNExFailure(ppsz, pcch, pCp, VERR_INVALID_UTF8_ENCODING)); 1012 1015 break; 1013 1016 case 5: … … 1019 1022 RTStrAssertMsgReturn(uc >= 0x00200000 && uc <= 0x03ffffff, 1020 1023 ("%u: cp=%#010RX32: %.*Rhxs\n", cb, uc, RT_MIN(cb + 10, strlen((char *)puch)), puch), 1021 rtStrGetCpNExFailure(ppsz, p Cp, pcch, VERR_INVALID_UTF8_ENCODING));1024 rtStrGetCpNExFailure(ppsz, pcch, pCp, VERR_INVALID_UTF8_ENCODING)); 1022 1025 break; 1023 1026 case 4: … … 1028 1031 RTStrAssertMsgReturn(uc >= 0x00010000 && uc <= 0x001fffff, 1029 1032 ("%u: cp=%#010RX32: %.*Rhxs\n", cb, uc, RT_MIN(cb + 10, strlen((char *)puch)), puch), 1030 rtStrGetCpNExFailure(ppsz, p Cp, pcch, VERR_INVALID_UTF8_ENCODING));1033 rtStrGetCpNExFailure(ppsz, pcch, pCp, VERR_INVALID_UTF8_ENCODING)); 1031 1034 break; 1032 1035 case 3: … … 1036 1039 RTStrAssertMsgReturn(uc >= 0x00000800 && uc <= 0x0000fffd, 1037 1040 ("%u: cp=%#010RX32: %.*Rhxs\n", cb, uc, RT_MIN(cb + 10, strlen((char *)puch)), puch), 1038 rtStrGetCpNExFailure(ppsz, p Cp, pcch, uc == 0xffff || uc == 0xfffe ? VERR_CODE_POINT_ENDIAN_INDICATOR : VERR_INVALID_UTF8_ENCODING));1041 rtStrGetCpNExFailure(ppsz, pcch, pCp, uc == 0xffff || uc == 0xfffe ? VERR_CODE_POINT_ENDIAN_INDICATOR : VERR_INVALID_UTF8_ENCODING)); 1039 1042 RTStrAssertMsgReturn(uc < 0xd800 || uc > 0xdfff, 1040 1043 ("%u: cp=%#010RX32: %.*Rhxs\n", cb, uc, RT_MIN(cb + 10, strlen((char *)puch)), puch), 1041 rtStrGetCpNExFailure(ppsz, p Cp, pcch, VERR_CODE_POINT_SURROGATE));1044 rtStrGetCpNExFailure(ppsz, pcch, pCp, VERR_CODE_POINT_SURROGATE)); 1042 1045 break; 1043 1046 case 2: … … 1046 1049 RTStrAssertMsgReturn(uc >= 0x00000080 && uc <= 0x000007ff, 1047 1050 ("%u: cp=%#010RX32: %.*Rhxs\n", cb, uc, RT_MIN(cb + 10, strlen((char *)puch)), puch), 1048 rtStrGetCpNExFailure(ppsz, p Cp, pcch, VERR_INVALID_UTF8_ENCODING));1051 rtStrGetCpNExFailure(ppsz, pcch, pCp, VERR_INVALID_UTF8_ENCODING)); 1049 1052 break; 1050 1053 default: /* impossible, but GCC is bitching. */ … … 1053 1056 } 1054 1057 puch += cb; 1055 (*pcch)-= cb;1058 cch -= cb; 1056 1059 } 1057 1060 else … … 1059 1062 /* 6th bit is always set. */ 1060 1063 RTStrAssertMsgFailed(("Invalid UTF-8 first byte: %.*Rhxs\n", RT_MIN(strlen((char *)puch), 10), puch)); 1061 return rtStrGetCpNExFailure(ppsz, p Cp, pcch, VERR_INVALID_UTF8_ENCODING);1064 return rtStrGetCpNExFailure(ppsz, pcch, pCp, VERR_INVALID_UTF8_ENCODING); 1062 1065 } 1063 1066 *pCp = uc; 1064 1067 *ppsz = (const char *)puch; 1068 (*pcch) = cch; 1065 1069 return VINF_SUCCESS; 1066 1070 } … … 1241 1245 return 1; 1242 1246 1243 #if 1 /* new */1244 1247 const char *pszStart1 = psz1; 1245 1248 for (;;) … … 1285 1288 /* Hit some bad encoding, continue in case insensitive mode. */ 1286 1289 return RTStrCmp(psz1, psz2); 1287 #else /* old */1288 #ifdef RT_OS_WINDOWS1289 return stricmp(psz1, psz2);1290 #else /* !RT_OS_WINDOWS */1291 return strcasecmp(psz1, psz2);1292 #endif /* !RT_OS_WINDOWS */1293 #endif1294 1290 } 1295 1291 … … 1318 1314 RTDECL(int) RTStrNICmp(const char *psz1, const char *psz2, size_t cchMax) 1319 1315 { 1316 if (cchMax == 0) 1317 return 0; 1320 1318 if (psz1 == psz2) 1321 1319 return 0; … … 1324 1322 if (!psz2) 1325 1323 return 1; 1326 if (cchMax == 0) 1327 return 0; 1328 1329 #if 1 /* new */ 1324 1330 1325 const char *pszStart1 = psz1; 1331 1326 for (;;) … … 1334 1329 RTUNICP cp1; 1335 1330 size_t cchMax2 = cchMax; 1336 int rc = RTStrGetCpNEx(&psz1, &c p1, &cchMax);1331 int rc = RTStrGetCpNEx(&psz1, &cchMax, &cp1); 1337 1332 if (RT_FAILURE(rc)) 1338 1333 { … … 1344 1339 1345 1340 RTUNICP cp2; 1346 rc = RTStrGetCpNEx(&psz2, &c p2, &cchMax2);1341 rc = RTStrGetCpNEx(&psz2, &cchMax2, &cp2); 1347 1342 if (RT_FAILURE(rc)) 1348 1343 { … … 1374 1369 /* Hit some bad encoding, continue in case insensitive mode. */ 1375 1370 return RTStrNCmp(psz1, psz2, cchMax); 1376 #else /* old */ 1377 #ifdef RT_OS_WINDOWS 1378 return strnicmp(psz1, psz2, cchMax); 1379 #else /* !RT_OS_WINDOWS */ 1380 return strncasecmp(psz1, psz2, cchMax); 1381 #endif /* !RT_OS_WINDOWS */ 1382 #endif 1383 } 1384 1371 } 1372
Note:
See TracChangeset
for help on using the changeset viewer.