VirtualBox

Changeset 33562 in vbox for trunk/src/VBox


Ignore:
Timestamp:
Oct 28, 2010 2:38:50 PM (14 years ago)
Author:
vboxsync
svn:sync-xref-src-repo-rev:
67168
Message:

RTStrToUpper,RTStrToLower: Fixed bad assumptions that lower and upper case chars are encoded with the same length (this is only true for the upper<->lower roundtrip). Also implemented the quiet handling of invalid coded sequences.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/src/VBox/Runtime/common/string/utf-8-case.cpp

    r28903 r33562  
    55
    66/*
    7  * Copyright (C) 2006-2009 Oracle Corporation
     7 * Copyright (C) 2006-2010 Oracle Corporation
    88 *
    99 * This file is part of VirtualBox Open Source Edition (OSE), as
     
    7272    {
    7373        /* Get the codepoints */
    74         RTUNICP cp1;
    75         int rc = RTStrGetCpEx(&psz1, &cp1);
     74        RTUNICP uc1;
     75        int rc = RTStrGetCpEx(&psz1, &uc1);
    7676        if (RT_FAILURE(rc))
    7777        {
     
    8181        }
    8282
    83         RTUNICP cp2;
    84         rc = RTStrGetCpEx(&psz2, &cp2);
     83        RTUNICP uc2;
     84        rc = RTStrGetCpEx(&psz2, &uc2);
    8585        if (RT_FAILURE(rc))
    8686        {
     
    9292
    9393        /* compare */
    94         int iDiff = cp1 - cp2;
     94        int iDiff = uc1 - uc2;
    9595        if (iDiff)
    9696        {
    97             iDiff = RTUniCpToUpper(cp1) != RTUniCpToUpper(cp2);
     97            iDiff = RTUniCpToUpper(uc1) != RTUniCpToUpper(uc2);
    9898            if (iDiff)
    9999            {
    100                 iDiff = RTUniCpToLower(cp1) - RTUniCpToLower(cp2); /* lower case diff last! */
     100                iDiff = RTUniCpToLower(uc1) - RTUniCpToLower(uc2); /* lower case diff last! */
    101101                if (iDiff)
    102102                    return iDiff;
     
    105105
    106106        /* hit the terminator? */
    107         if (!cp1)
     107        if (!uc1)
    108108            return 0;
    109109    }
     
    150150    {
    151151        /* Get the codepoints */
    152         RTUNICP cp1;
     152        RTUNICP uc1;
    153153        size_t cchMax2 = cchMax;
    154         int rc = RTStrGetCpNEx(&psz1, &cchMax, &cp1);
     154        int rc = RTStrGetCpNEx(&psz1, &cchMax, &uc1);
    155155        if (RT_FAILURE(rc))
    156156        {
     
    161161        }
    162162
    163         RTUNICP cp2;
    164         rc = RTStrGetCpNEx(&psz2, &cchMax2, &cp2);
     163        RTUNICP uc2;
     164        rc = RTStrGetCpNEx(&psz2, &cchMax2, &uc2);
    165165        if (RT_FAILURE(rc))
    166166        {
     
    173173
    174174        /* compare */
    175         int iDiff = cp1 - cp2;
     175        int iDiff = uc1 - uc2;
    176176        if (iDiff)
    177177        {
    178             iDiff = RTUniCpToUpper(cp1) != RTUniCpToUpper(cp2);
     178            iDiff = RTUniCpToUpper(uc1) != RTUniCpToUpper(uc2);
    179179            if (iDiff)
    180180            {
    181                 iDiff = RTUniCpToLower(cp1) - RTUniCpToLower(cp2); /* lower case diff last! */
     181                iDiff = RTUniCpToLower(uc1) - RTUniCpToLower(uc2); /* lower case diff last! */
    182182                if (iDiff)
    183183                    return iDiff;
     
    186186
    187187        /* hit the terminator? */
    188         if (!cp1 || cchMax == 0)
     188        if (!uc1 || cchMax == 0)
    189189            return 0;
    190190    }
     
    278278    /*
    279279     * Loop the code points in the string, converting them one by one.
    280      * ASSUMES that the code points for upper and lower case are encoded
    281      *         with the exact same length.
     280     *
     281     * ASSUMES that the folded code points have an encoding that is equal or
     282     *         shorter than the original (this is presently correct).
    282283     */
    283     /** @todo Handled bad encodings correctly+quietly, remove assumption,
    284      *        optimize. */
    285     char *pszCur = psz;
    286     while (*pszCur)
    287     {
    288         RTUNICP cp = RTStrGetCp(pszCur);
    289         cp = RTUniCpToLower(cp);
    290         pszCur = RTStrPutCp(pszCur, cp);
    291     }
     284    const char *pszSrc = psz;
     285    char       *pszDst = psz;
     286    RTUNICP     uc;
     287    do
     288    {
     289        int rc = RTStrGetCpEx(&pszSrc, &uc);
     290        if (RT_SUCCESS(rc))
     291        {
     292            uc = RTUniCpToLower(uc);
     293            pszDst = RTStrPutCp(pszDst, uc);
     294        }
     295        else
     296        {
     297            /* bad encoding, just copy it quietly (uc == RTUNICP_INVALID (!= 0)). */
     298            AssertRC(rc);
     299            *pszDst++ = pszSrc[-1];
     300        }
     301        Assert((uintptr_t)pszDst <= (uintptr_t)pszSrc);
     302    } while (uc != 0);
     303
    292304    return psz;
    293305}
     
    299311    /*
    300312     * Loop the code points in the string, converting them one by one.
    301      * ASSUMES that the code points for upper and lower case are encoded
    302      *         with the exact same length.
     313     *
     314     * ASSUMES that the folded code points have an encoding that is equal or
     315     *         shorter than the original (this is presently correct).
    303316     */
    304     /** @todo Handled bad encodings correctly+quietly, remove assumption,
    305      *        optimize. */
    306     char *pszCur = psz;
    307     while(*pszCur)
    308     {
    309         RTUNICP cp = RTStrGetCp(pszCur);
    310         cp = RTUniCpToUpper(cp);
    311         pszCur = RTStrPutCp(pszCur, cp);
    312     }
     317    const char *pszSrc = psz;
     318    char       *pszDst = psz;
     319    RTUNICP     uc;
     320    do
     321    {
     322        int rc = RTStrGetCpEx(&pszSrc, &uc);
     323        if (RT_SUCCESS(rc))
     324        {
     325            uc = RTUniCpToUpper(uc);
     326            pszDst = RTStrPutCp(pszDst, uc);
     327        }
     328        else
     329        {
     330            /* bad encoding, just copy it quietly (uc == RTUNICP_INVALID (!= 0)). */
     331            AssertRC(rc);
     332            *pszDst++ = pszSrc[-1];
     333        }
     334        Assert((uintptr_t)pszDst <= (uintptr_t)pszSrc);
     335    } while (uc != 0);
     336
    313337    return psz;
    314338}
Note: See TracChangeset for help on using the changeset viewer.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette