VirtualBox

Changeset 31246 in vbox for trunk/include/iprt


Ignore:
Timestamp:
Jul 30, 2010 1:24:53 PM (15 years ago)
Author:
vboxsync
svn:sync-xref-src-repo-rev:
64260
Message:

Runtime/string: clean up the UTF-8 <-> Latin1 APIs a bit more

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/include/iprt/string.h

    r31221 r31246  
    2929#include <iprt/cdefs.h>
    3030#include <iprt/types.h>
     31#include <iprt/assert.h>
    3132#include <iprt/stdarg.h>
     33#include <iprt/uni.h> /* for RTUNICP_INVALID */
    3234#include <iprt/err.h> /* for VINF_SUCCESS */
    3335#if defined(RT_OS_LINUX) && defined(__KERNEL__)
     
    11571159
    11581160/**
     1161 * Get the UTF-8 size in characters of a given Unicode code point.  The code
     1162 * point is expected to be a valid Unicode one, but not necessarily in the
     1163 * range supported by UTF-8.
     1164 *
     1165 * @returns the size in characters, or zero if there is no UTF-8 encoding
     1166 */
     1167DECLINLINE(size_t) RTStrCpSize(RTUNICP CodePoint)
     1168{
     1169    if (CodePoint < 0x80)
     1170        return 1;
     1171    if (CodePoint < 0x800)
     1172        return 2;
     1173    if (CodePoint < 0x10000)
     1174        return 3;
     1175    if (CodePoint < 0x11000)
     1176        return 4;
     1177    return 0;
     1178}
     1179
     1180/**
    11591181 * Put the unicode code point at the given string position
    11601182 * and return the pointer to the char following it.
     
    12071229 */
    12081230RTDECL(char *) RTStrPrevCp(const char *pszStart, const char *psz);
     1231
     1232/**
     1233 * Get the unicode code point at the given string position.
     1234 *
     1235 * @returns unicode code point.
     1236 * @returns RTUNICP_INVALID if the encoding is invalid.
     1237 * @param   psz         The string.
     1238 */
     1239DECLINLINE(RTUNICP) RTLatin1GetCp(const char *psz)
     1240{
     1241    return *(const unsigned char *)psz;
     1242}
     1243
     1244/**
     1245 * Get the unicode code point at the given string position.
     1246 *
     1247 * @returns iprt status code.
     1248 * @param   ppsz        Pointer to the string pointer. This will be updated to
     1249 *                      point to the char following the current code point.
     1250 *                      This is advanced one character forward on failure.
     1251 * @param   pCp         Where to store the code point.
     1252 *                      RTUNICP_INVALID is stored here on failure.
     1253 *
     1254 * @remark  We optimize this operation by using an inline function for
     1255 *          the most frequent and simplest sequence, the rest is
     1256 *          handled by RTStrGetCpExInternal().
     1257 */
     1258DECLINLINE(int) RTLatin1GetCpEx(const char **ppsz, PRTUNICP pCp)
     1259{
     1260    const unsigned char uch = **(const unsigned char **)ppsz;
     1261    (*ppsz)++;
     1262    *pCp = uch;
     1263    return VINF_SUCCESS;
     1264}
     1265
     1266/**
     1267 * Get the unicode code point at the given string position for a string of a
     1268 * given maximum length.
     1269 *
     1270 * @returns iprt status code.
     1271 * @retval  VERR_END_OF_STRING if *pcch is 0. *pCp is set to RTUNICP_INVALID.
     1272 *
     1273 * @param   ppsz        Pointer to the string pointer. This will be updated to
     1274 *                      point to the char following the current code point.
     1275 * @param   pcch        Pointer to the maximum string length.  This will be
     1276 *                      decremented by the size of the code point found.
     1277 * @param   pCp         Where to store the code point.
     1278 *                      RTUNICP_INVALID is stored here on failure.
     1279 */
     1280DECLINLINE(int) RTLatin1GetCpNEx(const char **ppsz, size_t *pcch, PRTUNICP pCp)
     1281{
     1282    if (RT_LIKELY(*pcch != 0))
     1283    {
     1284        const unsigned char uch = **(const unsigned char **)ppsz;
     1285        (*ppsz)++;
     1286        (*pcch)--;
     1287        *pCp = uch;
     1288        return VINF_SUCCESS;
     1289    }
     1290    *pCp = RTUNICP_INVALID;
     1291    return VERR_END_OF_STRING;
     1292}
     1293
     1294/**
     1295 * Get the Latin-1 size in characters of a given Unicode code point.  The code
     1296 * point is expected to be a valid Unicode one, but not necessarily in the
     1297 * range supported by Latin-1.
     1298 *
     1299 * @returns the size in characters, or zero if there is no Latin-1 encoding
     1300 */
     1301DECLINLINE(size_t) RTLatin1CpSize(RTUNICP CodePoint)
     1302{
     1303    if (CodePoint < 0x100)
     1304        return 1;
     1305    return 0;
     1306}
     1307
     1308/**
     1309 * Put the unicode code point at the given string position
     1310 * and return the pointer to the char following it.
     1311 *
     1312 * This function will not consider anything at or following the
     1313 * buffer area pointed to by psz. It is therefore not suitable for
     1314 * inserting code points into a string, only appending/overwriting.
     1315 *
     1316 * @returns pointer to the char following the written code point.
     1317 * @param   psz         The string.
     1318 * @param   CodePoint   The code point to write.
     1319 *                      This should not be RTUNICP_INVALID or any other
     1320 *                      character out of the Latin-1 range.
     1321 */
     1322DECLINLINE(char *) RTLatin1PutCp(char *psz, RTUNICP CodePoint)
     1323{
     1324    AssertReturn(CodePoint < 0x100, NULL);
     1325    *psz++ = (unsigned char)CodePoint;
     1326    return psz;
     1327}
     1328
     1329/**
     1330 * Skips ahead, past the current code point.
     1331 *
     1332 * @returns Pointer to the char after the current code point.
     1333 * @param   psz     Pointer to the current code point.
     1334 * @remark  This will not move the next valid code point, only past the current one.
     1335 */
     1336DECLINLINE(char *) RTLatin1NextCp(const char *psz)
     1337{
     1338    psz++;
     1339    return (char *)psz;
     1340}
     1341
     1342/**
     1343 * Skips back to the previous code point.
     1344 *
     1345 * @returns Pointer to the char before the current code point.
     1346 * @returns pszStart on failure.
     1347 * @param   pszStart    Pointer to the start of the string.
     1348 * @param   psz         Pointer to the current code point.
     1349 */
     1350DECLINLINE(char *) RTLatin1PrevCp(const char *psz)
     1351{
     1352    psz--;
     1353    return (char *)psz;
     1354}
    12091355
    12101356
Note: See TracChangeset for help on using the changeset viewer.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette