Changeset 67334 in vbox for trunk/src/VBox/Runtime/common/string
- Timestamp:
- Jun 12, 2017 9:48:11 AM (8 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/src/VBox/Runtime/common/string/utf-8.cpp
r65642 r67334 33 33 34 34 #include <iprt/uni.h> 35 #include <iprt/asm.h> 35 36 #include <iprt/alloc.h> 36 37 #include <iprt/assert.h> … … 829 830 * @param pwsz Where to store the UTF-16 string. 830 831 * @param cwc The number of RTUTF16 items the pwsz buffer can hold, excluding the terminator ('\\0'). 832 * 833 * @note rtUtf8RecodeAsUtf16Big is a duplicate with RT_H2BE_U16 applied. 831 834 */ 832 835 static int rtUtf8RecodeAsUtf16(const char *psz, size_t cch, PRTUTF16 pwsz, size_t cwc) … … 907 910 908 911 912 /** 913 * Recodes a valid UTF-8 string as UTF-16BE. 914 * 915 * Since we know the input is valid, we do *not* perform encoding or length checks. 916 * 917 * @returns iprt status code. 918 * @param psz The UTF-8 string to recode. This is a valid encoding. 919 * @param cch The number of chars (the type char, so bytes if you like) to process of the UTF-8 string. 920 * The recoding will stop when cch or '\\0' is reached. Pass RTSTR_MAX to process up to '\\0'. 921 * @param pwsz Where to store the UTF-16BE string. 922 * @param cwc The number of RTUTF16 items the pwsz buffer can hold, excluding the terminator ('\\0'). 923 * 924 * @note This is a copy of rtUtf8RecodeAsUtf16 with RT_H2BE_U16 applied. 925 */ 926 static int rtUtf8RecodeAsUtf16Big(const char *psz, size_t cch, PRTUTF16 pwsz, size_t cwc) 927 { 928 int rc = VINF_SUCCESS; 929 const unsigned char *puch = (const unsigned char *)psz; 930 PRTUTF16 pwc = pwsz; 931 while (cch > 0) 932 { 933 /* read the next char and check for terminator. */ 934 const unsigned char uch = *puch; 935 if (uch) 936 { /* we only break once, so consider this the likely branch. */ } 937 else 938 break; 939 940 /* check for output overflow */ 941 if (RT_LIKELY(cwc >= 1)) 942 { /* likely */ } 943 else 944 { 945 rc = VERR_BUFFER_OVERFLOW; 946 break; 947 } 948 cwc--; 949 950 /* decode and recode the code point */ 951 if (!(uch & RT_BIT(7))) 952 { 953 *pwc++ = RT_H2BE_U16((RTUTF16)uch); 954 puch++; 955 cch--; 956 } 957 else if ((uch & (RT_BIT(7) | RT_BIT(6) | RT_BIT(5))) == (RT_BIT(7) | RT_BIT(6))) 958 { 959 uint16_t uc = (puch[1] & 0x3f) 960 | ((uint16_t)(uch & 0x1f) << 6); 961 *pwc++ = RT_H2BE_U16(uc); 962 puch += 2; 963 cch -= 2; 964 } 965 else if ((uch & (RT_BIT(7) | RT_BIT(6) | RT_BIT(5) | RT_BIT(4))) == (RT_BIT(7) | RT_BIT(6) | RT_BIT(5))) 966 { 967 uint16_t uc = (puch[2] & 0x3f) 968 | ((uint16_t)(puch[1] & 0x3f) << 6) 969 | ((uint16_t)(uch & 0x0f) << 12); 970 *pwc++ = RT_H2BE_U16(uc); 971 puch += 3; 972 cch -= 3; 973 } 974 else 975 { 976 /* generate surrogate pair */ 977 Assert((uch & (RT_BIT(7) | RT_BIT(6) | RT_BIT(5) | RT_BIT(4) | RT_BIT(3))) == (RT_BIT(7) | RT_BIT(6) | RT_BIT(5) | RT_BIT(4))); 978 RTUNICP uc = (puch[3] & 0x3f) 979 | ((RTUNICP)(puch[2] & 0x3f) << 6) 980 | ((RTUNICP)(puch[1] & 0x3f) << 12) 981 | ((RTUNICP)(uch & 0x07) << 18); 982 if (RT_UNLIKELY(cwc < 1)) 983 { 984 rc = VERR_BUFFER_OVERFLOW; 985 break; 986 } 987 cwc--; 988 989 uc -= 0x10000; 990 *pwc++ = RT_H2BE_U16(0xd800 | (uc >> 10)); 991 *pwc++ = RT_H2BE_U16(0xdc00 | (uc & 0x3ff)); 992 puch += 4; 993 cch -= 4; 994 } 995 } 996 997 /* done */ 998 *pwc = '\0'; 999 return rc; 1000 } 1001 1002 909 1003 RTDECL(int) RTStrToUtf16Tag(const char *pszString, PRTUTF16 *ppwszString, const char *pszTag) 910 1004 { … … 946 1040 } 947 1041 RT_EXPORT_SYMBOL(RTStrToUtf16Tag); 1042 1043 1044 RTDECL(int) RTStrToUtf16BigTag(const char *pszString, PRTUTF16 *ppwszString, const char *pszTag) 1045 { 1046 /* 1047 * Validate input. 1048 */ 1049 Assert(VALID_PTR(ppwszString)); 1050 Assert(VALID_PTR(pszString)); 1051 *ppwszString = NULL; 1052 1053 /* 1054 * Validate the UTF-8 input and calculate the length of the UTF-16 string. 1055 */ 1056 size_t cwc; 1057 int rc = rtUtf8CalcUtf16Length(pszString, &cwc); 1058 if (RT_SUCCESS(rc)) 1059 { 1060 /* 1061 * Allocate buffer. 1062 */ 1063 PRTUTF16 pwsz = (PRTUTF16)RTMemAllocTag((cwc + 1) * sizeof(RTUTF16), pszTag); 1064 if (pwsz) 1065 { 1066 /* 1067 * Encode the UTF-16 string. 1068 */ 1069 rc = rtUtf8RecodeAsUtf16Big(pszString, RTSTR_MAX, pwsz, cwc); 1070 if (RT_SUCCESS(rc)) 1071 { 1072 *ppwszString = pwsz; 1073 return rc; 1074 } 1075 RTMemFree(pwsz); 1076 } 1077 else 1078 rc = VERR_NO_UTF16_MEMORY; 1079 } 1080 return rc; 1081 } 1082 RT_EXPORT_SYMBOL(RTStrToUtf16TagBig); 948 1083 949 1084 … … 1011 1146 } 1012 1147 RT_EXPORT_SYMBOL(RTStrToUtf16ExTag); 1148 1149 1150 RTDECL(int) RTStrToUtf16BigExTag(const char *pszString, size_t cchString, 1151 PRTUTF16 *ppwsz, size_t cwc, size_t *pcwc, const char *pszTag) 1152 { 1153 /* 1154 * Validate input. 1155 */ 1156 Assert(VALID_PTR(pszString)); 1157 Assert(VALID_PTR(ppwsz)); 1158 Assert(!pcwc || VALID_PTR(pcwc)); 1159 1160 /* 1161 * Validate the UTF-8 input and calculate the length of the UTF-16 string. 1162 */ 1163 size_t cwcResult; 1164 int rc; 1165 if (cchString != RTSTR_MAX) 1166 rc = rtUtf8CalcUtf16LengthN(pszString, cchString, &cwcResult); 1167 else 1168 rc = rtUtf8CalcUtf16Length(pszString, &cwcResult); 1169 if (RT_SUCCESS(rc)) 1170 { 1171 if (pcwc) 1172 *pcwc = cwcResult; 1173 1174 /* 1175 * Check buffer size / Allocate buffer. 1176 */ 1177 bool fShouldFree; 1178 PRTUTF16 pwszResult; 1179 if (cwc > 0 && *ppwsz) 1180 { 1181 fShouldFree = false; 1182 if (cwc <= cwcResult) 1183 return VERR_BUFFER_OVERFLOW; 1184 pwszResult = *ppwsz; 1185 } 1186 else 1187 { 1188 *ppwsz = NULL; 1189 fShouldFree = true; 1190 cwc = RT_MAX(cwcResult + 1, cwc); 1191 pwszResult = (PRTUTF16)RTMemAllocTag(cwc * sizeof(RTUTF16), pszTag); 1192 } 1193 if (pwszResult) 1194 { 1195 /* 1196 * Encode the UTF-16BE string. 1197 */ 1198 rc = rtUtf8RecodeAsUtf16Big(pszString, cchString, pwszResult, cwc - 1); 1199 if (RT_SUCCESS(rc)) 1200 { 1201 *ppwsz = pwszResult; 1202 return rc; 1203 } 1204 if (fShouldFree) 1205 RTMemFree(pwszResult); 1206 } 1207 else 1208 rc = VERR_NO_UTF16_MEMORY; 1209 } 1210 return rc; 1211 } 1212 RT_EXPORT_SYMBOL(RTStrToUtf16BigExTag); 1013 1213 1014 1214
Note:
See TracChangeset
for help on using the changeset viewer.