/* $Id: uri.cpp 62477 2016-07-22 18:27:37Z vboxsync $ */
/** @file
 * IPRT - Uniform Resource Identifier handling.
 */

/*
 * Copyright (C) 2011-2016 Oracle Corporation
 *
 * This file is part of VirtualBox Open Source Edition (OSE), as
 * available from http://www.virtualbox.org. This file is free software;
 * you can redistribute it and/or modify it under the terms of the GNU
 * General Public License (GPL) as published by the Free Software
 * Foundation, in version 2 as it comes in the "COPYING" file of the
 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
 *
 * The contents of this file may alternatively be used under the terms
 * of the Common Development and Distribution License Version 1.0
 * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
 * VirtualBox OSE distribution, in which case the provisions of the
 * CDDL are applicable instead of those of the GPL.
 *
 * You may elect to license modified versions of this file under the
 * terms and conditions of either the GPL or the CDDL or both.
 */


/*********************************************************************************************************************************
*   Header Files                                                                                                                 *
*********************************************************************************************************************************/
#include <iprt/uri.h>

#include <iprt/assert.h>
#include <iprt/ctype.h>
#include <iprt/path.h>
#include <iprt/string.h>


/*********************************************************************************************************************************
*   Defined Constants And Macros                                                                                                 *
*********************************************************************************************************************************/
/** Internal magic value we use to check if a RTURIPARSED structure has made it thru RTUriParse. */
#define RTURIPARSED_MAGIC   UINT32_C(0x439e0745)


/* General URI format:

    foo://example.com:8042/over/there?name=ferret#nose
    \_/   \______________/\_________/ \_________/ \__/
     |           |             |           |        |
  scheme     authority       path        query   fragment
     |   _____________________|__
    / \ /                        \
    urn:example:animal:ferret:nose
*/


/**
 * The following defines characters which have to be % escaped:
 *  control = 00-1F
 *  space   = ' '
 *  delims  = '<' , '>' , '#' , '%' , '"'
 *  unwise  = '{' , '}' , '|' , '\' , '^' , '[' , ']' , '`'
 */
#define URI_EXCLUDED(a) \
  (   ((a) >= 0x0  && (a) <= 0x20) \
   || ((a) >= 0x5B && (a) <= 0x5E) \
   || ((a) >= 0x7B && (a) <= 0x7D) \
   || (a) == '<' || (a) == '>' || (a) == '#' \
   || (a) == '%' || (a) == '"' || (a) == '`' )

static char *rtUriPercentEncodeN(const char *pszString, size_t cchMax)
{
    if (!pszString)
        return NULL;

    int rc = VINF_SUCCESS;

    size_t cbLen = RT_MIN(strlen(pszString), cchMax);
    /* The new string can be max 3 times in size of the original string. */
    char *pszNew = RTStrAlloc(cbLen * 3 + 1);
    if (!pszNew)
        return NULL;

    char *pszRes = NULL;
    size_t iIn = 0;
    size_t iOut = 0;
    while (iIn < cbLen)
    {
        if (URI_EXCLUDED(pszString[iIn]))
        {
            char szNum[3] = { 0, 0, 0 };
            RTStrFormatU8(&szNum[0], 3, pszString[iIn++], 16, 2, 2, RTSTR_F_CAPITAL | RTSTR_F_ZEROPAD);
            pszNew[iOut++] = '%';
            pszNew[iOut++] = szNum[0];
            pszNew[iOut++] = szNum[1];
        }
        else
            pszNew[iOut++] = pszString[iIn++];
    }
    if (RT_SUCCESS(rc))
    {
        pszNew[iOut] = '\0';
        if (iOut != iIn)
        {
            /* If the source and target strings have different size, recreate
             * the target string with the correct size. */
            pszRes = RTStrDupN(pszNew, iOut);
            RTStrFree(pszNew);
        }
        else
            pszRes = pszNew;
    }
    else
        RTStrFree(pszNew);

    return pszRes;
}


/**
 * Calculates the encoded string length.
 *
 * @returns Number of chars (excluding the terminator).
 * @param   pszString       The string to encode.
 * @param   cchMax          The maximum string length (e.g. RTSTR_MAX).
 * @param   fEncodeDosSlash Whether to encode DOS slashes or not.
 */
static size_t rtUriCalcEncodedLength(const char *pszString, size_t cchMax, bool fEncodeDosSlash)
{
    size_t cchEncoded = 0;
    if (pszString)
    {
        size_t cchSrcLeft = RTStrNLen(pszString, cchMax);
        while (cchSrcLeft-- > 0)
        {
            char const ch = *pszString++;
            if (!URI_EXCLUDED(ch) || (ch == '\\' && !fEncodeDosSlash))
                cchEncoded += 1;
            else
                cchEncoded += 3;
        }
    }
    return cchEncoded;
}


/**
 * Encodes an URI into a caller allocated buffer.
 *
 * @returns IPRT status code.
 * @param   pszString       The string to encode.
 * @param   cchMax          The maximum string length (e.g. RTSTR_MAX).
 * @param   fEncodeDosSlash Whether to encode DOS slashes or not.
 * @param   pszDst          The destination buffer.
 * @param   cbDst           The size of the destination buffer.
 */
static int rtUriEncodeIntoBuffer(const char *pszString, size_t cchMax, bool fEncodeDosSlash, char *pszDst, size_t cbDst)
{
    AssertReturn(pszString, VERR_INVALID_POINTER);
    AssertPtrReturn(pszDst, VERR_INVALID_POINTER);

    /*
     * We do buffer size checking up front and every time we encode a special
     * character.  That's faster than checking for each char.
     */
    size_t cchSrcLeft = RTStrNLen(pszString, cchMax);
    AssertMsgReturn(cbDst > cchSrcLeft, ("cbDst=%zu cchSrcLeft=%zu\n", cbDst, cchSrcLeft), VERR_BUFFER_OVERFLOW);
    cbDst -= cchSrcLeft;

    while (cchSrcLeft-- > 0)
    {
        char const ch = *pszString++;
        if (!URI_EXCLUDED(ch) || (ch == '\\' && !fEncodeDosSlash))
            *pszDst++ = ch;
        else
        {
            AssertReturn(cbDst >= 3, VERR_BUFFER_OVERFLOW); /* 2 extra bytes + zero terminator. */
            cbDst -= 2;

            *pszDst++ = '%';
            ssize_t cchTmp = RTStrFormatU8(pszDst, 3, (unsigned char)ch, 16, 2, 2, RTSTR_F_CAPITAL | RTSTR_F_ZEROPAD);
            Assert(cchTmp == 2); NOREF(cchTmp);
            pszDst += 2;
        }
    }

    *pszDst = '\0';
    return VINF_SUCCESS;
}


static char *rtUriPercentDecodeN(const char *pszString, size_t cchString)
{
    AssertPtrReturn(pszString, NULL);
    AssertReturn(memchr(pszString, '\0', cchString) == NULL, NULL);

    /*
     * The new string can only get smaller, so use the input length as a
     * staring buffer size.
     */
    char *pszDecoded = RTStrAlloc(cchString + 1);
    if (pszDecoded)
    {
        /*
         * Knowing that the pszString itself is valid UTF-8, we only have to
         * validate the escape sequences.
         */
        size_t      cchLeft = cchString;
        char const *pchSrc  = pszString;
        char       *pchDst  = pszDecoded;
        while (cchLeft > 0)
        {
            const char *pchPct = (const char *)memchr(pchSrc, '%', cchLeft);
            if (pchPct)
            {
                size_t cchBefore = pchPct - pchSrc;
                if (cchBefore)
                {
                    memcpy(pchDst, pchSrc, cchBefore);
                    pchDst  += cchBefore;
                    pchSrc  += cchBefore;
                    cchLeft -= cchBefore;
                }

                char chHigh, chLow;
                if (   cchLeft >= 3
                    && RT_C_IS_XDIGIT(chHigh = pchSrc[1])
                    && RT_C_IS_XDIGIT(chLow  = pchSrc[2]))
                {
                    uint8_t b = RT_C_IS_DIGIT(chHigh) ? chHigh - '0' : (chHigh & ~0x20) - 'A' + 10;
                    b <<= 4;
                    b |= RT_C_IS_DIGIT(chLow) ? chLow - '0' : (chLow & ~0x20) - 'A' + 10;
                    *pchDst++ = (char)b;
                    pchSrc  += 3;
                    cchLeft -= 3;
                }
                else
                {
                    AssertFailed();
                    *pchDst++ = *pchSrc++;
                    cchLeft--;
                }
            }
            else
            {
                memcpy(pchDst, pchSrc, cchLeft);
                pchDst += cchLeft;
                pchSrc += cchLeft;
                cchLeft = 0;
                break;
            }
        }

        *pchDst = '\0';

        /*
         * If we've got lof space room in the result string, reallocate it.
         */
        size_t cchDecoded = pchDst - pszDecoded;
        Assert(cchDecoded <= cchString);
        if (cchString - cchDecoded > 64)
            RTStrRealloc(&pszDecoded, cchDecoded + 1);
    }
    return pszDecoded;
}


/**
 * Calculates the decoded string length.
 *
 * @returns Number of chars (excluding the terminator).
 * @param   pszString       The string to decode.
 * @param   cchMax          The maximum string length (e.g. RTSTR_MAX).
 */
static size_t rtUriCalcDecodedLength(const char *pszString, size_t cchMax)
{
    size_t cchDecoded;
    if (pszString)
    {
        size_t cchSrcLeft = cchDecoded = RTStrNLen(pszString, cchMax);
        while (cchSrcLeft-- > 0)
        {
            char const ch = *pszString++;
            if (ch != '%')
            { /* typical */}
            else if (   cchSrcLeft >= 2
                     && RT_C_IS_XDIGIT(pszString[0])
                     && RT_C_IS_XDIGIT(pszString[1]))
            {
                cchDecoded -= 2;
                pszString  += 2;
                cchSrcLeft -= 2;
            }
        }
    }
    else
        cchDecoded = 0;
    return cchDecoded;
}


/**
 * Decodes a string into a buffer.
 *
 * @returns IPRT status code.
 * @param   pchSrc      The source string.
 * @param   cchSrc      The max number of bytes to decode in the source string.
 * @param   pszDst      The destination buffer.
 * @param   cbDst       The size of the buffer (including terminator).
 */
static int rtUriDecodeIntoBuffer(const char *pchSrc, size_t cchSrc, char *pszDst, size_t cbDst)
{
    AssertPtrReturn(pchSrc, VERR_INVALID_POINTER);
    AssertPtrReturn(pszDst, VERR_INVALID_POINTER);

    /*
     * Knowing that the pszString itself is valid UTF-8, we only have to
     * validate the escape sequences.
     */
    cchSrc = RTStrNLen(pchSrc, cchSrc);
    while (cchSrc > 0)
    {
        const char *pchPct = (const char *)memchr(pchSrc, '%', cchSrc);
        if (pchPct)
        {
            size_t cchBefore = pchPct - pchSrc;
            AssertReturn(cchBefore + 1 < cbDst, VERR_BUFFER_OVERFLOW);
            if (cchBefore)
            {
                memcpy(pszDst, pchSrc, cchBefore);
                pszDst += cchBefore;
                cbDst  -= cchBefore;
                pchSrc += cchBefore;
                cchSrc -= cchBefore;
            }

            char chHigh, chLow;
            if (   cchSrc >= 3
                && RT_C_IS_XDIGIT(chHigh = pchSrc[1])
                && RT_C_IS_XDIGIT(chLow  = pchSrc[2]))
            {
                uint8_t b = RT_C_IS_DIGIT(chHigh) ? chHigh - '0' : (chHigh & ~0x20) - 'A' + 10;
                b <<= 4;
                b |= RT_C_IS_DIGIT(chLow) ? chLow - '0' : (chLow & ~0x20) - 'A' + 10;
                *pszDst++ = (char)b;
                pchSrc += 3;
                cchSrc -= 3;
            }
            else
            {
                AssertFailed();
                *pszDst++ = *pchSrc++;
                cchSrc--;
            }
            cbDst -= 1;
        }
        else
        {
            AssertReturn(cchSrc < cbDst, VERR_BUFFER_OVERFLOW);
            memcpy(pszDst, pchSrc, cchSrc);
            pszDst += cchSrc;
            cbDst  -= cchSrc;
            pchSrc += cchSrc;
            cchSrc  = 0;
            break;
        }
    }

    AssertReturn(cbDst > 0, VERR_BUFFER_OVERFLOW);
    *pszDst = '\0';
    return VINF_SUCCESS;
}


static int rtUriParse(const char *pszUri, PRTURIPARSED pParsed)
{
    /*
     * Validate the input and clear the output.
     */
    AssertPtrReturn(pParsed, VERR_INVALID_POINTER);
    RT_ZERO(*pParsed);
    pParsed->uAuthorityPort = UINT32_MAX;

    AssertPtrReturn(pszUri, VERR_INVALID_POINTER);

    size_t const cchUri = strlen(pszUri);
    if (RT_LIKELY(cchUri >= 3)) { /* likely */ }
    else return cchUri ? VERR_URI_TOO_SHORT : VERR_URI_EMPTY;

    /*
     * Validating escaped text sequences is much simpler if we know that
     * that the base URI string is valid.  Also, we don't necessarily trust
     * the developer calling us to remember to do this.
     */
    int rc = RTStrValidateEncoding(pszUri);
    AssertRCReturn(rc, rc);

    /*
     * RFC-3986, section 3.1:
     *      scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
     *
     * The scheme ends with a ':', which we also skip here.
     */
    size_t off = 0;
    char ch = pszUri[off++];
    if (RT_LIKELY(RT_C_IS_ALPHA(ch))) { /* likely */ }
    else return VERR_URI_INVALID_SCHEME;
    for (;;)
    {
        ch = pszUri[off];
        if (ch == ':')
            break;
        if (RT_LIKELY(RT_C_IS_ALNUM(ch) || ch == '.' || ch == '-' || ch == '+')) { /* likely */ }
        else return VERR_URI_INVALID_SCHEME;
        off++;
    }
    pParsed->cchScheme = off;

    /* Require the scheme length to be at least two chars so we won't confuse
       it with a path starting with a DOS drive letter specification. */
    if (RT_LIKELY(off >= 2)) { /* likely */ }
    else return VERR_URI_INVALID_SCHEME;

    off++;                              /* (skip colon) */

    /*
     * Find the end of the path, we'll need this several times.
     * Also, while we're potentially scanning the whole thing, check for '%'.
     */
    size_t const offHash         = RTStrOffCharOrTerm(&pszUri[off], '#') + off;
    size_t const offQuestionMark = RTStrOffCharOrTerm(&pszUri[off], '?') + off;

    if (memchr(pszUri, '%', cchUri) != NULL)
        pParsed->fFlags |= RTURIPARSED_F_CONTAINS_ESCAPED_CHARS;

    /*
     * RFC-3986, section 3.2:
     *      The authority component is preceeded by a double slash ("//")...
     */
    if (   pszUri[off] == '/'
        && pszUri[off + 1] == '/')
    {
        off += 2;
        pParsed->offAuthority = pParsed->offAuthorityUsername = pParsed->offAuthorityPassword = pParsed->offAuthorityHost = off;
        pParsed->fFlags |= RTURIPARSED_F_HAVE_AUTHORITY;

        /*
         * RFC-3986, section 3.2:
         *      ...and is terminated by the next slash ("/"), question mark ("?"),
         *       or number sign ("#") character, or by the end of the URI.
         */
        const char *pszAuthority = &pszUri[off];
        size_t      cchAuthority = RTStrOffCharOrTerm(pszAuthority, '/');
        cchAuthority = RT_MIN(cchAuthority, offHash - off);
        cchAuthority = RT_MIN(cchAuthority, offQuestionMark - off);
        pParsed->cchAuthority     = cchAuthority;

        /* The Authority can be empty, like for: file:///usr/bin/grep  */
        if (cchAuthority > 0)
        {
            pParsed->cchAuthorityHost = cchAuthority;

            /*
             * If there is a userinfo part, it is ended by a '@'.
             */
            const char *pszAt = (const char *)memchr(pszAuthority, '@', cchAuthority);
            if (pszAt)
            {
                size_t cchTmp = pszAt - pszAuthority;
                pParsed->offAuthorityHost += cchTmp + 1;
                pParsed->cchAuthorityHost -= cchTmp + 1;

                /* If there is a password part, it's separated from the username with a colon. */
                const char *pszColon = (const char *)memchr(pszAuthority, ':', cchTmp);
                if (pszColon)
                {
                    pParsed->cchAuthorityUsername = pszColon - pszAuthority;
                    pParsed->offAuthorityPassword = &pszColon[1] - pszUri;
                    pParsed->cchAuthorityPassword = pszAt - &pszColon[1];
                }
                else
                {
                    pParsed->cchAuthorityUsername = cchTmp;
                    pParsed->offAuthorityPassword = off + cchTmp;
                }
            }

            /*
             * If there is a port part, its after the last colon in the host part.
             */
            const char *pszColon = (const char *)memrchr(&pszUri[pParsed->offAuthorityHost], ':', pParsed->cchAuthorityHost);
            if (pszColon)
            {
                size_t cchTmp = &pszUri[pParsed->offAuthorityHost + pParsed->cchAuthorityHost] - &pszColon[1];
                pParsed->cchAuthorityHost -= cchTmp + 1;

                pParsed->uAuthorityPort = 0;
                while (cchTmp-- > 0)
                {
                    ch = *++pszColon;
                    if (   RT_C_IS_DIGIT(ch)
                        && pParsed->uAuthorityPort < UINT32_MAX / UINT32_C(10))
                    {
                        pParsed->uAuthorityPort *= 10;
                        pParsed->uAuthorityPort += ch - '0';
                    }
                    else
                        return VERR_URI_INVALID_PORT_NUMBER;
                }
            }
        }

        /* Skip past the authority. */
        off += cchAuthority;
    }
    else
        pParsed->offAuthority = pParsed->offAuthorityUsername = pParsed->offAuthorityPassword = pParsed->offAuthorityHost = off;

    /*
     * RFC-3986, section 3.3: Path
     *      The path is terminated by the first question mark ("?")
     *      or number sign ("#") character, or by the end of the URI.
     */
    pParsed->offPath = off;
    pParsed->cchPath = RT_MIN(offHash, offQuestionMark) - off;
    off += pParsed->cchPath;

    /*
     * RFC-3986, section 3.4: Query
     *      The query component is indicated by the first question mark ("?")
     *      character and terminated by a number sign ("#") character or by the
     *      end of the URI.
     */
    if (   off == offQuestionMark
        && off < cchUri)
    {
        Assert(pszUri[offQuestionMark] == '?');
        pParsed->offQuery = ++off;
        pParsed->cchQuery = offHash - off;
        off = offHash;
    }
    else
    {
        Assert(!pszUri[offQuestionMark]);
        pParsed->offQuery = off;
    }

    /*
     * RFC-3986, section 3.5: Fragment
     *      A fragment identifier component is indicated by the presence of a
     *      number sign ("#") character and terminated by the end of the URI.
     */
    if (   off == offHash
        && off < cchUri)
    {
        pParsed->offFragment = ++off;
        pParsed->cchFragment = cchUri - off;
    }
    else
    {
        Assert(!pszUri[offHash]);
        pParsed->offFragment = off;
    }

    /*
     * If there are any escape sequences, validate them.
     *
     * This is reasonably simple as we already know that the string is valid UTF-8
     * before they get decoded.  Thus we only have to validate the escaped sequences.
     */
    if (pParsed->fFlags & RTURIPARSED_F_CONTAINS_ESCAPED_CHARS)
    {
        const char *pchSrc  = (const char *)memchr(pszUri, '%', cchUri);
        AssertReturn(pchSrc, VERR_INTERNAL_ERROR);
        do
        {
            char        szUtf8Seq[8];
            unsigned    cchUtf8Seq = 0;
            unsigned    cchNeeded  = 0;
            size_t      cchLeft    = &pszUri[cchUri] - pchSrc;
            do
            {
                if (cchLeft >= 3)
                {
                    char chHigh = pchSrc[1];
                    char chLow  = pchSrc[2];
                    if (   RT_C_IS_XDIGIT(chHigh)
                        && RT_C_IS_XDIGIT(chLow))
                    {
                        uint8_t b = RT_C_IS_DIGIT(chHigh) ? chHigh - '0' : (chHigh & ~0x20) - 'A' + 10;
                        b <<= 4;
                        b |= RT_C_IS_DIGIT(chLow) ? chLow - '0' : (chLow & ~0x20) - 'A' + 10;

                        if (!(b & 0x80))
                        {
                            /* We don't want the string to be terminated prematurely. */
                            if (RT_LIKELY(b != 0)) { /* likely */ }
                            else return VERR_URI_ESCAPED_ZERO;

                            /* Check that we're not expecting more UTF-8 bytes. */
                            if (RT_LIKELY(cchNeeded == 0)) { /* likely */ }
                            else return VERR_URI_MISSING_UTF8_CONTINUATION_BYTE;
                        }
                        /* Are we waiting UTF-8 bytes? */
                        else if (cchNeeded > 0)
                        {
                            if (RT_LIKELY(!(b & 0x40))) { /* likely */ }
                            else return VERR_URI_INVALID_ESCAPED_UTF8_CONTINUATION_BYTE;

                            szUtf8Seq[cchUtf8Seq++] = (char)b;
                            if (--cchNeeded == 0)
                            {
                                szUtf8Seq[cchUtf8Seq] = '\0';
                                rc = RTStrValidateEncoding(szUtf8Seq);
                                if (RT_FAILURE(rc))
                                    return VERR_URI_ESCAPED_CHARS_NOT_VALID_UTF8;
                                cchUtf8Seq = 0;
                            }
                        }
                        /* Start a new UTF-8 sequence. */
                        else
                        {
                            if ((b & 0xf8) == 0xf0)
                                cchNeeded = 3;
                            else if ((b & 0xf0) == 0xe0)
                                cchNeeded = 2;
                            else if ((b & 0xe0) == 0xc0)
                                cchNeeded = 1;
                            else
                                return VERR_URI_INVALID_ESCAPED_UTF8_LEAD_BYTE;
                            szUtf8Seq[0] = (char)b;
                            cchUtf8Seq = 1;
                        }
                        pchSrc  += 3;
                        cchLeft -= 3;
                    }
                    else
                        return VERR_URI_INVALID_ESCAPE_SEQ;
                }
                else
                    return VERR_URI_INVALID_ESCAPE_SEQ;
            } while (cchLeft > 0 && pchSrc[0] == '%');

            /* Check that we're not expecting more UTF-8 bytes. */
            if (RT_LIKELY(cchNeeded == 0)) { /* likely */ }
            else return VERR_URI_MISSING_UTF8_CONTINUATION_BYTE;

            /* next */
            pchSrc = (const char *)memchr(pchSrc, '%', cchLeft);
        } while (pchSrc);
    }

    pParsed->u32Magic = RTURIPARSED_MAGIC;
    return VINF_SUCCESS;
}


RTDECL(int) RTUriParse(const char *pszUri, PRTURIPARSED pParsed)
{
    return rtUriParse(pszUri, pParsed);
}


RTDECL(char *) RTUriParsedScheme(const char *pszUri, PCRTURIPARSED pParsed)
{
    AssertPtrReturn(pszUri, NULL);
    AssertPtrReturn(pParsed, NULL);
    AssertReturn(pParsed->u32Magic == RTURIPARSED_MAGIC, NULL);
    return RTStrDupN(pszUri, pParsed->cchScheme);
}


RTDECL(char *) RTUriParsedAuthority(const char *pszUri, PCRTURIPARSED pParsed)
{
    AssertPtrReturn(pszUri, NULL);
    AssertPtrReturn(pParsed, NULL);
    AssertReturn(pParsed->u32Magic == RTURIPARSED_MAGIC, NULL);
    if (pParsed->cchAuthority || (pParsed->fFlags & RTURIPARSED_F_HAVE_AUTHORITY))
        return rtUriPercentDecodeN(&pszUri[pParsed->offAuthority], pParsed->cchAuthority);
    return NULL;
}


RTDECL(char *) RTUriParsedAuthorityUsername(const char *pszUri, PCRTURIPARSED pParsed)
{
    AssertPtrReturn(pszUri, NULL);
    AssertPtrReturn(pParsed, NULL);
    AssertReturn(pParsed->u32Magic == RTURIPARSED_MAGIC, NULL);
    if (pParsed->cchAuthorityUsername)
        return rtUriPercentDecodeN(&pszUri[pParsed->offAuthorityUsername], pParsed->cchAuthorityUsername);
    return NULL;
}


RTDECL(char *) RTUriParsedAuthorityPassword(const char *pszUri, PCRTURIPARSED pParsed)
{
    AssertPtrReturn(pszUri, NULL);
    AssertPtrReturn(pParsed, NULL);
    AssertReturn(pParsed->u32Magic == RTURIPARSED_MAGIC, NULL);
    if (pParsed->cchAuthorityPassword)
        return rtUriPercentDecodeN(&pszUri[pParsed->offAuthorityPassword], pParsed->cchAuthorityPassword);
    return NULL;
}


RTDECL(char *) RTUriParsedAuthorityHost(const char *pszUri, PCRTURIPARSED pParsed)
{
    AssertPtrReturn(pszUri, NULL);
    AssertPtrReturn(pParsed, NULL);
    AssertReturn(pParsed->u32Magic == RTURIPARSED_MAGIC, NULL);
    if (pParsed->cchAuthorityHost)
        return rtUriPercentDecodeN(&pszUri[pParsed->offAuthorityHost], pParsed->cchAuthorityHost);
    return NULL;
}


RTDECL(uint32_t) RTUriParsedAuthorityPort(const char *pszUri, PCRTURIPARSED pParsed)
{
    AssertPtrReturn(pszUri, UINT32_MAX);
    AssertPtrReturn(pParsed, UINT32_MAX);
    AssertReturn(pParsed->u32Magic == RTURIPARSED_MAGIC, UINT32_MAX);
    return pParsed->uAuthorityPort;
}


RTDECL(char *) RTUriParsedPath(const char *pszUri, PCRTURIPARSED pParsed)
{
    AssertPtrReturn(pszUri, NULL);
    AssertPtrReturn(pParsed, NULL);
    AssertReturn(pParsed->u32Magic == RTURIPARSED_MAGIC, NULL);
    if (pParsed->cchPath)
        return rtUriPercentDecodeN(&pszUri[pParsed->offPath], pParsed->cchPath);
    return NULL;
}


RTDECL(char *) RTUriParsedQuery(const char *pszUri, PCRTURIPARSED pParsed)
{
    AssertPtrReturn(pszUri, NULL);
    AssertPtrReturn(pParsed, NULL);
    AssertReturn(pParsed->u32Magic == RTURIPARSED_MAGIC, NULL);
    if (pParsed->cchQuery)
        return rtUriPercentDecodeN(&pszUri[pParsed->offQuery], pParsed->cchQuery);
    return NULL;
}


RTDECL(char *) RTUriParsedFragment(const char *pszUri, PCRTURIPARSED pParsed)
{
    AssertPtrReturn(pszUri, NULL);
    AssertPtrReturn(pParsed, NULL);
    AssertReturn(pParsed->u32Magic == RTURIPARSED_MAGIC, NULL);
    if (pParsed->cchFragment)
        return rtUriPercentDecodeN(&pszUri[pParsed->offFragment], pParsed->cchFragment);
    return NULL;
}


RTDECL(char *) RTUriCreate(const char *pszScheme, const char *pszAuthority, const char *pszPath, const char *pszQuery,
                           const char *pszFragment)
{
    if (!pszScheme) /* Scheme is minimum requirement */
        return NULL;

    char *pszResult = 0;
    char *pszAuthority1 = 0;
    char *pszPath1 = 0;
    char *pszQuery1 = 0;
    char *pszFragment1 = 0;

    do
    {
        /* Create the percent encoded strings and calculate the necessary uri
         * length. */
        size_t cbSize = strlen(pszScheme) + 1 + 1; /* plus zero byte */
        if (pszAuthority)
        {
            pszAuthority1 = rtUriPercentEncodeN(pszAuthority, RTSTR_MAX);
            if (!pszAuthority1)
                break;
            cbSize += strlen(pszAuthority1) + 2;
        }
        if (pszPath)
        {
            pszPath1 = rtUriPercentEncodeN(pszPath, RTSTR_MAX);
            if (!pszPath1)
                break;
            cbSize += strlen(pszPath1);
        }
        if (pszQuery)
        {
            pszQuery1 = rtUriPercentEncodeN(pszQuery, RTSTR_MAX);
            if (!pszQuery1)
                break;
            cbSize += strlen(pszQuery1) + 1;
        }
        if (pszFragment)
        {
            pszFragment1 = rtUriPercentEncodeN(pszFragment, RTSTR_MAX);
            if (!pszFragment1)
                break;
            cbSize += strlen(pszFragment1) + 1;
        }

        char *pszTmp = pszResult = (char *)RTStrAlloc(cbSize);
        if (!pszResult)
            break;
        RT_BZERO(pszTmp, cbSize);

        /* Compose the target uri string. */
        RTStrCatP(&pszTmp, &cbSize, pszScheme);
        RTStrCatP(&pszTmp, &cbSize, ":");
        if (pszAuthority1)
        {
            RTStrCatP(&pszTmp, &cbSize, "//");
            RTStrCatP(&pszTmp, &cbSize, pszAuthority1);
        }
        if (pszPath1)
        {
            RTStrCatP(&pszTmp, &cbSize, pszPath1);
        }
        if (pszQuery1)
        {
            RTStrCatP(&pszTmp, &cbSize, "?");
            RTStrCatP(&pszTmp, &cbSize, pszQuery1);
        }
        if (pszFragment1)
        {
            RTStrCatP(&pszTmp, &cbSize, "#");
            RTStrCatP(&pszTmp, &cbSize, pszFragment1);
        }
    } while (0);

    /* Cleanup */
    if (pszAuthority1)
        RTStrFree(pszAuthority1);
    if (pszPath1)
        RTStrFree(pszPath1);
    if (pszQuery1)
        RTStrFree(pszQuery1);
    if (pszFragment1)
        RTStrFree(pszFragment1);

    return pszResult;
}


RTDECL(bool)   RTUriIsSchemeMatch(const char *pszUri, const char *pszScheme)
{
    AssertPtrReturn(pszUri, false);
    size_t const cchScheme = strlen(pszScheme);
    return RTStrNICmp(pszUri, pszScheme, cchScheme) == 0
        && pszUri[cchScheme] == ':';
}


RTDECL(int) RTUriFileCreateEx(const char *pszPath, uint32_t fPathStyle, char **ppszUri, size_t cbUri, size_t *pcchUri)
{
    /*
     * Validate and adjust input. (RTPathParse check pszPath out for us)
     */
    if (pcchUri)
    {
        AssertPtrReturn(pcchUri, VERR_INVALID_POINTER);
        *pcchUri = ~(size_t)0;
    }
    AssertPtrReturn(ppszUri, VERR_INVALID_POINTER);
    AssertReturn(!(fPathStyle & ~RTPATH_STR_F_STYLE_MASK) && fPathStyle != RTPATH_STR_F_STYLE_RESERVED, VERR_INVALID_FLAGS);
    if (fPathStyle == RTPATH_STR_F_STYLE_HOST)
        fPathStyle = RTPATH_STYLE;

    /*
     * Let the RTPath code parse the stuff (no reason to duplicate path parsing
     * and get it slightly wrong here).
     */
    RTPATHPARSED ParsedPath;
    int rc = RTPathParse(pszPath, &ParsedPath, sizeof(ParsedPath), fPathStyle);
    if (RT_SUCCESS(rc) || rc == VERR_BUFFER_OVERFLOW)
    {
        /* Skip leading slashes. */
        if (ParsedPath.fProps & RTPATH_PROP_ROOT_SLASH)
        {
            if (fPathStyle == RTPATH_STR_F_STYLE_DOS)
                while (pszPath[0] == '/' || pszPath[0] == '\\')
                    pszPath++;
            else
                while (pszPath[0] == '/')
                    pszPath++;
        }
        const size_t cchPath = strlen(pszPath);

        /*
         * Calculate the encoded length and figure destination buffering.
         */
        static const char s_szPrefix[] = "file:///";
        size_t const      cchPrefix    = sizeof(s_szPrefix) - (ParsedPath.fProps & RTPATH_PROP_UNC ? 2 : 1);
        size_t cchEncoded = rtUriCalcEncodedLength(pszPath, cchPath, fPathStyle != RTPATH_STR_F_STYLE_DOS);

        if (pcchUri)
            *pcchUri = cchEncoded;

        char  *pszDst;
        char  *pszFreeMe = NULL;
        if (!cbUri || *ppszUri == NULL)
        {
            cbUri = RT_MAX(cbUri, cchPrefix + cchEncoded + 1);
            *ppszUri = pszFreeMe = pszDst = RTStrAlloc(cbUri);
            AssertReturn(pszDst, VERR_NO_STR_MEMORY);
        }
        else if (cchEncoded < cbUri)
            pszDst = *ppszUri;
        else
            return VERR_BUFFER_OVERFLOW;

        /*
         * Construct the URI.
         */
        memcpy(pszDst, s_szPrefix, cchPrefix);
        pszDst[cchPrefix] = '\0';
        rc = rtUriEncodeIntoBuffer(pszPath, cchPath, fPathStyle != RTPATH_STR_F_STYLE_DOS, &pszDst[cchPrefix], cbUri - cchPrefix);
        if (RT_SUCCESS(rc))
        {
            Assert(strlen(pszDst) == cbUri - 1);
            if (fPathStyle == RTPATH_STR_F_STYLE_DOS)
                RTPathChangeToUnixSlashes(pszDst, true /*fForce*/);
            return VINF_SUCCESS;
        }

        AssertRC(rc); /* Impossible! rtUriCalcEncodedLength or something above is busted! */
        if (pszFreeMe)
            RTStrFree(pszFreeMe);
    }
    return rc;
}


RTDECL(char *) RTUriFileCreate(const char *pszPath)
{
    char *pszUri = NULL;
    int rc = RTUriFileCreateEx(pszPath, RTPATH_STR_F_STYLE_HOST, &pszUri, 0 /*cbUri*/, NULL /*pcchUri*/);
    if (RT_SUCCESS(rc))
        return pszUri;
    return NULL;
}


RTDECL(int) RTUriFilePathEx(const char *pszUri, uint32_t fPathStyle, char **ppszPath, size_t cbPath, size_t *pcchPath)
{
    /*
     * Validate and adjust input.
     */
    if (pcchPath)
    {
        AssertPtrReturn(pcchPath, VERR_INVALID_POINTER);
        *pcchPath = ~(size_t)0;
    }
    AssertPtrReturn(ppszPath, VERR_INVALID_POINTER);
    AssertReturn(!(fPathStyle & ~RTPATH_STR_F_STYLE_MASK) && fPathStyle != RTPATH_STR_F_STYLE_RESERVED, VERR_INVALID_FLAGS);
    if (fPathStyle == RTPATH_STR_F_STYLE_HOST)
        fPathStyle = RTPATH_STYLE;
    AssertPtrReturn(pszUri, VERR_INVALID_POINTER);

    /*
     * Check that this is a file URI.
     */
    if (RTStrNICmp(pszUri, RT_STR_TUPLE("file:")) == 0)
    { /* likely */ }
    else
        return VERR_URI_NOT_FILE_SCHEME;

    /*
     * We may have a number of variations here, mostly thanks to
     * various windows software.  First the canonical variations:
     *     - file:///C:/Windows/System32/kernel32.dll
     *     - file:///C|/Windows/System32/kernel32.dll
     *     - file:///C:%5CWindows%5CSystem32%5Ckernel32.dll
     *     - file://localhost/C:%5CWindows%5CSystem32%5Ckernel32.dll
     *     - file://cifsserver.dev/systemshare%5CWindows%5CSystem32%5Ckernel32.dll
     *     - file://cifsserver.dev:139/systemshare%5CWindows%5CSystem32%5Ckernel32.dll  (not quite sure here, but whatever)
     *
     * Legacy variant without any slashes after the schema:
     *     - file:C:/Windows/System32/kernel32.dll
     *     - file:C|/Windows/System32%5Ckernel32.dll
     *     - file:~/.bashrc
     *            \--path-/
     *
     * Legacy variant with exactly one slashes after the schema:
     *     - file:/C:/Windows/System32%5Ckernel32.dll
     *     - file:/C|/Windows/System32/kernel32.dll
     *     - file:/usr/bin/env
     *            \---path---/
     *
     * Legacy variant with two slashes after the schema and an unescaped DOS path:
     *     - file://C:/Windows/System32\kernel32.dll (**)
     *     - file://C|/Windows/System32\kernel32.dll
     *                \---path---------------------/
     *              -- authority, with ':' as non-working port separator
     *
     * Legacy variant with exactly four slashes after the schema and an unescaped DOS path.
     *     - file:////C:/Windows\System32\user32.dll
     *
     * Legacy variant with four or more slashes after the schema and an unescaped UNC path:
     *     - file:////cifsserver.dev/systemshare/System32%\kernel32.dll
     *     - file://///cifsserver.dev/systemshare/System32\kernel32.dll
     *              \---path--------------------------------------------/
     *
     * The the two unescaped variants shouldn't be handed to rtUriParse, which
     * is good as we cannot actually handle the one marked by (**).  So, handle
     * those two special when parsing.
     */
    RTURIPARSED Parsed;
    int         rc;
    size_t      cSlashes = 0;
    while (pszUri[5 + cSlashes] == '/')
        cSlashes++;
    if (   (cSlashes == 2 || cSlashes == 4)
        && RT_C_IS_ALPHA(pszUri[5 + cSlashes])
        && (pszUri[5 + cSlashes + 1] == ':' || pszUri[5 + cSlashes + 1] == '|'))
    {
        RT_ZERO(Parsed); /* RTURIPARSED_F_CONTAINS_ESCAPED_CHARS is now clear. */
        Parsed.offPath = 5 + cSlashes;
        Parsed.cchPath = strlen(&pszUri[Parsed.offPath]);
        rc = RTStrValidateEncoding(&pszUri[Parsed.offPath]);
    }
    else if (cSlashes >= 4)
    {
        RT_ZERO(Parsed);
        Parsed.fFlags  = cSlashes > 4 ? RTURIPARSED_F_CONTAINS_ESCAPED_CHARS : 0;
        Parsed.offPath = 5 + cSlashes - 2;
        Parsed.cchPath = strlen(&pszUri[Parsed.offPath]);
        rc = RTStrValidateEncoding(&pszUri[Parsed.offPath]);
    }
    else
        rc = rtUriParse(pszUri, &Parsed);
    if (RT_SUCCESS(rc))
    {
        /*
         * Ignore localhost as hostname (it's implicit).
         */
        static char const s_szLocalhost[] = "localhost";
        if (    Parsed.cchAuthorityHost == sizeof(s_szLocalhost) - 1U
            &&  RTStrNICmp(&pszUri[Parsed.offAuthorityHost], RT_STR_TUPLE(s_szLocalhost)) == 0)
        {
            Parsed.cchAuthorityHost = 0;
            Parsed.cchAuthority     = 0;
        }

        /*
         * Ignore leading path slash/separator if we detect a DOS drive letter
         * and we don't have a host name.
         */
        if (   Parsed.cchPath >= 3
            && Parsed.cchAuthorityHost    == 0
            && pszUri[Parsed.offPath]     == '/'           /* Leading path slash/separator. */
            && (   pszUri[Parsed.offPath + 2] == ':'       /* Colon after drive letter. */
                || pszUri[Parsed.offPath + 2] == '|')      /* Colon alternative. */
            && RT_C_IS_ALPHA(pszUri[Parsed.offPath + 1]) ) /* Drive letter. */
        {
            Parsed.offPath++;
            Parsed.cchPath--;
        }

        /*
         * Calculate the size of the encoded result.
         *
         * Since we're happily returning "C:/Windows/System32/kernel.dll"
         * style paths when the caller requested UNIX style paths, we will
         * return straight UNC paths too ("//cifsserver/share/dir/file").
         */
        size_t cchDecodedHost = 0;
        size_t cbResult;
        if (Parsed.fFlags & RTURIPARSED_F_CONTAINS_ESCAPED_CHARS)
        {
            cchDecodedHost = rtUriCalcDecodedLength(&pszUri[Parsed.offAuthorityHost], Parsed.cchAuthorityHost);
            cbResult = cchDecodedHost + rtUriCalcDecodedLength(&pszUri[Parsed.offPath], Parsed.cchPath) + 1;
        }
        else
        {
            cchDecodedHost = 0;
            cbResult = Parsed.cchAuthorityHost + Parsed.cchPath + 1;
        }
        if (pcchPath)
            *pcchPath = cbResult - 1;
        if (cbResult > 1)
        {
            /*
             * Prepare the necessary buffer space for the result.
             */
            char  *pszDst;
            char  *pszFreeMe = NULL;
            if (!cbPath || *ppszPath == NULL)
            {
                cbPath = RT_MAX(cbPath, cbResult);
                *ppszPath = pszFreeMe = pszDst = RTStrAlloc(cbPath);
                AssertReturn(pszDst, VERR_NO_STR_MEMORY);
            }
            else if (cbResult <= cbPath)
                pszDst = *ppszPath;
            else
                return VERR_BUFFER_OVERFLOW;

            /*
             * Compose the result.
             */
            if (Parsed.fFlags & RTURIPARSED_F_CONTAINS_ESCAPED_CHARS)
            {
                rc = rtUriDecodeIntoBuffer(&pszUri[Parsed.offAuthorityHost],Parsed.cchAuthorityHost,
                                           pszDst, cchDecodedHost + 1);
                Assert(RT_SUCCESS(rc) && strlen(pszDst) == cchDecodedHost);
                if (RT_SUCCESS(rc))
                    rc = rtUriDecodeIntoBuffer(&pszUri[Parsed.offPath], Parsed.cchPath,
                                               &pszDst[cchDecodedHost], cbResult - cchDecodedHost);
                Assert(RT_SUCCESS(rc) && strlen(pszDst) == cbResult - 1);
            }
            else
            {
                memcpy(pszDst, &pszUri[Parsed.offAuthorityHost], Parsed.cchAuthorityHost);
                memcpy(&pszDst[Parsed.cchAuthorityHost], &pszUri[Parsed.offPath], Parsed.cchPath);
                pszDst[cbResult - 1] = '\0';
            }
            if (RT_SUCCESS(rc))
            {
                /*
                 * Convert colon DOS driver letter colon alternative.
                 * We do this regardless of the desired path style.
                 */
                if (   RT_C_IS_ALPHA(pszDst[0])
                    && pszDst[1] == '|')
                    pszDst[1] = ':';

                /*
                 * Fix slashes.
                 */
                if (fPathStyle == RTPATH_STR_F_STYLE_DOS)
                    RTPathChangeToDosSlashes(pszDst, true);
                else if (fPathStyle == RTPATH_STR_F_STYLE_UNIX)
                    RTPathChangeToUnixSlashes(pszDst, true); /** @todo not quite sure how this actually makes sense... */
                else
                    AssertFailed();
                return rc;
            }

            /* bail out */
            RTStrFree(pszFreeMe);
        }
        else
            rc = VERR_PATH_ZERO_LENGTH;
    }
    return rc;
}


RTDECL(char *) RTUriFilePath(const char *pszUri)
{
    char *pszPath = NULL;
    int rc = RTUriFilePathEx(pszUri, RTPATH_STR_F_STYLE_HOST, &pszPath, 0 /*cbPath*/, NULL /*pcchPath*/);
    if (RT_SUCCESS(rc))
        return pszPath;
    return NULL;
}