VirtualBox

Changeset 50792 in vbox


Ignore:
Timestamp:
Mar 14, 2014 8:17:09 PM (11 years ago)
Author:
vboxsync
svn:sync-xref-src-repo-rev:
92817
Message:

IPRT: Added RTUtf16ValidateEncodingEx, RTUtf16ValidateEncoding and RTUtf16IsValidEncoding.

Location:
trunk
Files:
3 edited

Legend:

Unmodified
Added
Removed
  • trunk/include/iprt/mangling.h

    r50790 r50792  
    17651765# define RTUtf16GetCpInternal                           RT_MANGLER(RTUtf16GetCpInternal)
    17661766# define RTUtf16ICmp                                    RT_MANGLER(RTUtf16ICmp)
     1767# define RTUtf16IsValidEncoding                         RT_MANGLER(RTUtf16IsValidEncoding)
    17671768# define RTUtf16Len                                     RT_MANGLER(RTUtf16Len)
    17681769# define RTUtf16LocaleICmp                              RT_MANGLER(RTUtf16LocaleICmp)
     
    17751776# define RTUtf16ToUtf8ExTag                             RT_MANGLER(RTUtf16ToUtf8ExTag)
    17761777# define RTUtf16ToUtf8Tag                               RT_MANGLER(RTUtf16ToUtf8Tag)
     1778# define RTUtf16ValidateEncoding                        RT_MANGLER(RTUtf16ValidateEncoding)
     1779# define RTUtf16ValidateEncodingEx                      RT_MANGLER(RTUtf16ValidateEncodingEx)
    17771780# define RTUuidClear                                    RT_MANGLER(RTUuidClear)
    17781781# define RTUuidCompare                                  RT_MANGLER(RTUuidCompare)
  • trunk/include/iprt/string.h

    r46010 r50792  
    672672RTDECL(int) RTStrValidateEncoding(const char *psz);
    673673
    674 /** @name Flags for RTStrValidateEncodingEx
     674/** @name Flags for RTStrValidateEncodingEx and RTUtf16ValidateEncodingEx
    675675 */
    676676/** Check that the string is zero terminated within the given size.
     
    685685 * @param   psz         The string.
    686686 * @param   cch         The max string length. Use RTSTR_MAX to process the entire string.
    687  * @param   fFlags      Reserved for future. Pass 0.
     687 * @param   fFlags      Combination of RTSTR_VALIDATE_ENCODING_XXX flags.
    688688 */
    689689RTDECL(int) RTStrValidateEncodingEx(const char *psz, size_t cch, uint32_t fFlags);
     
    33463346
    33473347/**
     3348 * Validates the UTF-16 encoding of the string.
     3349 *
     3350 * @returns iprt status code.
     3351 * @param   pwsz        The string.
     3352 */
     3353RTDECL(int) RTUtf16ValidateEncoding(PCRTUTF16 pwsz);
     3354
     3355/**
     3356 * Validates the UTF-16 encoding of the string.
     3357 *
     3358 * @returns iprt status code.
     3359 * @param   pwsz        The string.
     3360 * @param   cch         The max string length. Use RTSTR_MAX to process the entire string.
     3361 * @param   fFlags      Combination of RTSTR_VALIDATE_ENCODING_XXX flags.
     3362 */
     3363RTDECL(int) RTUtf16ValidateEncodingEx(PCRTUTF16 pwsz, size_t cwc, uint32_t fFlags);
     3364
     3365/**
     3366 * Checks if the UTF-16 encoding is valid.
     3367 *
     3368 * @returns true / false.
     3369 * @param   pwsz        The string.
     3370 */
     3371RTDECL(bool) RTUtf16IsValidEncoding(PCRTUTF16 pwsz);
     3372
     3373/**
    33483374 * Sanitise a (valid) UTF-16 string by replacing all characters outside a white
    33493375 * list in-place by an ASCII replacement character.  Multi-byte characters will
  • trunk/src/VBox/Runtime/common/string/utf-16.cpp

    r48935 r50792  
    3939
    4040
     41/**
     42 * Get get length in code points of an UTF-16 encoded string, validating the
     43 * string while doing so.
     44 *
     45 * @returns IPRT status code.
     46 * @param   pwsz            Pointer to the UTF-16 string.
     47 * @param   cwc             The max length of the string in UTF-16 units.  Use
     48 *                          RTSTR_MAX if all of the string is to be examined.
     49 * @param   pcuc            Where to store the length in unicode code points.
     50 * @param   pcwcActual      Where to store the actual size of the UTF-16 string
     51 *                          on success. Optional.
     52 */
     53static int rtUtf16Length(PCRTUTF16 pwsz, size_t cwc, size_t *pcuc, size_t *pcwcActual)
     54{
     55    PCRTUTF16 pwszStart   = pwsz;
     56    size_t    cCodePoints = 0;
     57    while (cwc > 0)
     58    {
     59        RTUTF16 wc = *pwsz;
     60        if (!wc)
     61            break;
     62        if (wc < 0xd800 || wc > 0xdfff)
     63        {
     64            cCodePoints++;
     65            pwsz++;
     66            cwc--;
     67        }
     68        /* Surrogate pair: */
     69        else if (wc >= 0xdc00)
     70        {
     71            RTStrAssertMsgFailed(("Lone UTF-16 trail surrogate: %#x (%.*Rhxs)\n", wc, RT_MIN(cwc * 2, 10), pwsz));
     72            return VERR_INVALID_UTF16_ENCODING;
     73        }
     74        else if (cwc < 2)
     75        {
     76            RTStrAssertMsgFailed(("Lone UTF-16 lead surrogate: %#x\n", wc));
     77            return VERR_INVALID_UTF16_ENCODING;
     78        }
     79        else
     80        {
     81            RTUTF16 wcTrail = pwsz[1];
     82            if (wcTrail < 0xdc00 || wcTrail > 0xdfff)
     83            {
     84                RTStrAssertMsgFailed(("Invalid UTF-16 trail surrogate: %#x (lead %#x)\n", wcTrail, wc));
     85                return VERR_INVALID_UTF16_ENCODING;
     86            }
     87
     88            cCodePoints++;
     89            pwsz += 2;
     90            cwc -= 2;
     91        }
     92    }
     93
     94    /* done */
     95    *pcuc = cCodePoints;
     96    if (pcwcActual)
     97        *pcwcActual = pwsz - pwszStart;
     98    return VINF_SUCCESS;
     99}
     100
    41101
    42102RTDECL(void)  RTUtf16Free(PRTUTF16 pwszString)
     
    248308}
    249309RT_EXPORT_SYMBOL(RTUtf16ToUpper);
     310
     311
     312RTDECL(int) RTUtf16ValidateEncoding(PCRTUTF16 pwsz)
     313{
     314    return RTUtf16ValidateEncodingEx(pwsz, RTSTR_MAX, 0);
     315}
     316RT_EXPORT_SYMBOL(RTUtf16ValidateEncoding);
     317
     318
     319RTDECL(int) RTUtf16ValidateEncodingEx(PCRTUTF16 pwsz, size_t cwc, uint32_t fFlags)
     320{
     321    AssertReturn(!(fFlags & ~(RTSTR_VALIDATE_ENCODING_ZERO_TERMINATED)), VERR_INVALID_PARAMETER);
     322    AssertPtr(pwsz);
     323
     324    /*
     325     * Use rtUtf16Length for the job.
     326     */
     327    size_t cwcActual;
     328    size_t cCpsIgnored;
     329    int rc = rtUtf16Length(pwsz, cwc, &cCpsIgnored, &cwcActual);
     330    if (RT_SUCCESS(rc))
     331    {
     332        if (    (fFlags & RTSTR_VALIDATE_ENCODING_ZERO_TERMINATED)
     333            &&  cwcActual >= cwc)
     334            rc = VERR_BUFFER_OVERFLOW;
     335    }
     336    return rc;
     337}
     338RT_EXPORT_SYMBOL(RTUtf16ValidateEncodingEx);
     339
     340
     341RTDECL(bool) RTUtf16IsValidEncoding(PCRTUTF16 pwsz)
     342{
     343    int rc = RTUtf16ValidateEncodingEx(pwsz, RTSTR_MAX, 0);
     344    return RT_SUCCESS(rc);
     345}
     346RT_EXPORT_SYMBOL(RTUtf16IsValidEncoding);
    250347
    251348
Note: See TracChangeset for help on using the changeset viewer.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette