Changeset 31246 in vbox for trunk/src/VBox/Runtime/common/string
- Timestamp:
- Jul 30, 2010 1:24:53 PM (15 years ago)
- svn:sync-xref-src-repo-rev:
- 64260
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/src/VBox/Runtime/common/string/utf-8.cpp
r31229 r31246 805 805 { 806 806 size_t cch = 0; 807 while (cchIn > 0) 808 { 809 char ch = *psz++; cchIn--; 810 if (!ch) 807 while (true) 808 { 809 RTUNICP Cp; 810 size_t cchCp; 811 int rc = RTLatin1GetCpNEx(&psz, &cchIn, &Cp); 812 if (Cp == 0 || rc == VERR_END_OF_STRING) 811 813 break; 812 if (!(ch & 0x80)) 813 cch++; 814 else 815 cch += 2; 816 } 817 814 if (RT_FAILURE(rc)) 815 return rc; 816 cchCp = RTStrCpSize(Cp); 817 if (cchCp == 0) 818 return VERR_NO_TRANSLATION; 819 cch += cchCp; 820 } 818 821 819 822 /* done */ … … 832 835 * @param psz Where to store the UTF-8 string. 833 836 * @param cch The size of the UTF-8 buffer, excluding the terminator. 834 * @param pcch Where to store the number of octets actually encoded.835 837 */ 836 static int rtLatin1RecodeAsUtf8(const char *pszIn, size_t cchIn, char *psz, size_t cch, size_t *pcch) 837 { 838 unsigned char *puch = (unsigned char *)psz; 839 int rc = VINF_SUCCESS; 840 while (cchIn > 0) 841 { 842 unsigned char ch = (unsigned char) *pszIn++; cchIn--; 843 if (!ch) 838 static int rtLatin1RecodeAsUtf8(const char *pszIn, size_t cchIn, char *psz, size_t cch) 839 { 840 int rc = VINF_SUCCESS; 841 842 while (true) 843 { 844 RTUNICP Cp; 845 size_t cchCp; 846 rc = RTLatin1GetCpNEx(&pszIn, &cchIn, &Cp); 847 if (Cp == 0 || RT_FAILURE(rc)) 844 848 break; 845 if (!(ch & 0x80)) 846 { 847 if (RT_UNLIKELY(cch < 1)) 848 { 849 RTStrAssertMsgFailed(("Buffer overflow! 1\n")); 850 rc = VERR_BUFFER_OVERFLOW; 851 break; 852 } 853 cch--; 854 *puch++ = (unsigned char)ch; 855 } 856 else 857 { 858 if (RT_UNLIKELY(cch < 2)) 859 { 860 RTStrAssertMsgFailed(("Buffer overflow! 2\n")); 861 rc = VERR_BUFFER_OVERFLOW; 862 break; 863 } 864 cch -= 2; 865 *puch++ = 0xc0 | (ch >> 6); 866 *puch++ = 0x80 | (ch & 0x3f); 867 } 849 cchCp = RTStrCpSize(Cp); 850 if (RT_UNLIKELY(cch < cchCp)) 851 { 852 RTStrAssertMsgFailed(("Buffer overflow! 1\n")); 853 rc = VERR_BUFFER_OVERFLOW; 854 break; 855 } 856 psz = RTStrPutCp(psz, Cp); 857 cch -= cchCp; 868 858 } 869 859 870 860 /* done */ 871 *puch = '\0'; 872 *pcch = (char *)puch - psz; 861 if (rc == VERR_END_OF_STRING) 862 rc = VINF_SUCCESS; 863 *psz = '\0'; 873 864 return rc; 874 865 } … … 898 889 if (pszResult) 899 890 { 900 rc = rtLatin1RecodeAsUtf8(pszString, RTSTR_MAX, pszResult, cch , &cch);891 rc = rtLatin1RecodeAsUtf8(pszString, RTSTR_MAX, pszResult, cch); 901 892 if (RT_SUCCESS(rc)) 902 893 { … … 955 946 if (pszResult) 956 947 { 957 rc = rtLatin1RecodeAsUtf8(pszString, cchString, pszResult, cch - 1 , &cch);948 rc = rtLatin1RecodeAsUtf8(pszString, cchString, pszResult, cch - 1); 958 949 if (RT_SUCCESS(rc)) 959 950 { … … 998 989 * @returns IPRT status code. 999 990 * @param psz Pointer to the UTF-8 string. 1000 * @param cch 991 * @param cchIn The max length of the string. (btw cch = cb) 1001 992 * Use RTSTR_MAX if all of the string is to be examined. 1002 993 * @param pcch Where to store the length of the Latin-1 string in bytes. 1003 994 */ 1004 static int rtUtf8CalcLatin1Length(const char *psz, size_t cch , size_t *pcch)1005 { 1006 size_t cchOut= 0;995 static int rtUtf8CalcLatin1Length(const char *psz, size_t cchIn, size_t *pcch) 996 { 997 size_t cch = 0; 1007 998 while (true) 1008 999 { 1009 1000 RTUNICP Cp; 1010 int rc = RTStrGetCpNEx(&psz, &cch, &Cp); 1001 size_t cchCp; 1002 int rc = RTStrGetCpNEx(&psz, &cchIn, &Cp); 1011 1003 if (Cp == 0 || rc == VERR_END_OF_STRING) 1012 1004 break; 1013 1005 if (RT_FAILURE(rc)) 1014 1006 return rc; 1015 if (Cp >= 0x100) 1007 cchCp = RTLatin1CpSize(Cp); 1008 if (cchCp == 0) 1016 1009 return VERR_NO_TRANSLATION; 1017 cch Out++;1010 cch += cchCp; 1018 1011 } 1019 1012 1020 1013 /* done */ 1021 *pcch = cch Out;1014 *pcch = cch; 1022 1015 return VINF_SUCCESS; 1023 1016 } … … 1030 1023 * 1031 1024 * @returns iprt status code. 1032 * @param psz 1033 * @param cch 1025 * @param pszIn The UTF-8 string to recode. This is a valid encoding. 1026 * @param cchIn The number of chars (the type char, so bytes if you like) to process of the UTF-8 string. 1034 1027 * The recoding will stop when cch or '\\0' is reached. Pass RTSTR_MAX to process up to '\\0'. 1035 * @param psz OutWhere to store the Latin-1 string.1036 * @param cch OutThe number of characters the pszOut buffer can hold, excluding the terminator ('\\0').1028 * @param psz Where to store the Latin-1 string. 1029 * @param cch The number of characters the pszOut buffer can hold, excluding the terminator ('\\0'). 1037 1030 */ 1038 static int rtUtf8RecodeAsLatin1(const char *psz , size_t cch, char *pszOut, size_t cchOut)1039 { 1040 int rc= VINF_SUCCESS;1041 const unsigned char *puch = (const unsigned char *)psz; 1042 unsigned char *puchOut = (unsigned char *)pszOut;1043 while (cch > 0)1044 {1045 /* read the next char and check for terminator. */1046 const unsigned char uch = *puch;1047 if ( !uch)1031 static int rtUtf8RecodeAsLatin1(const char *pszIn, size_t cchIn, char *psz, size_t cch) 1032 { 1033 int rc = VINF_SUCCESS; 1034 1035 while (true) 1036 { 1037 RTUNICP Cp; 1038 size_t cchCp; 1039 rc = RTStrGetCpNEx(&pszIn, &cchIn, &Cp); 1040 if (Cp == 0 || RT_FAILURE(rc)) 1048 1041 break; 1049 1050 /* check for output overflow */1051 if (RT_UNLIKELY(cchOut < 1))1052 {1042 cchCp = RTLatin1CpSize(Cp); 1043 if (RT_UNLIKELY(cch < cchCp)) 1044 { 1045 RTStrAssertMsgFailed(("Buffer overflow! 1\n")); 1053 1046 rc = VERR_BUFFER_OVERFLOW; 1054 1047 break; 1055 1048 } 1056 cchOut--; 1057 1058 /* decode and recode the code point */ 1059 if (!(uch & RT_BIT(7))) 1060 { 1061 *puchOut++ = uch; 1062 puch++; 1063 cch--; 1064 } 1065 else if ((uch & (RT_BIT(7) | RT_BIT(6) | RT_BIT(5))) == (RT_BIT(7) | RT_BIT(6))) 1066 { 1067 uint16_t uc = (puch[1] & 0x3f) 1068 | ((uint16_t)(uch & 0x1f) << 6); 1069 if (uc >= 0x100) 1070 { 1071 rc = VERR_NO_TRANSLATION; 1072 break; 1073 } 1074 *puchOut++ = uc; 1075 puch += 2; 1076 cch -= 2; 1077 } 1078 else 1079 { 1080 rc = VERR_NO_TRANSLATION; 1081 break; 1082 } 1049 psz = RTLatin1PutCp(psz, Cp); 1050 cch -= cchCp; 1083 1051 } 1084 1052 1085 1053 /* done */ 1086 *puchOut = '\0'; 1087 return rc; 1088 } 1054 if (rc == VERR_END_OF_STRING) 1055 rc = VINF_SUCCESS; 1056 *psz = '\0'; 1057 return rc; 1058 } 1059 1089 1060 1090 1061
Note:
See TracChangeset
for help on using the changeset viewer.