VirtualBox

source: vbox/trunk/src/VBox/Runtime/common/misc/uri.cpp@ 99248

Last change on this file since 99248 was 98103, checked in by vboxsync, 2 years ago

Copyright year updates by scm.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 40.0 KB
Line 
1/* $Id: uri.cpp 98103 2023-01-17 14:15:46Z vboxsync $ */
2/** @file
3 * IPRT - Uniform Resource Identifier handling.
4 */
5
6/*
7 * Copyright (C) 2011-2023 Oracle and/or its affiliates.
8 *
9 * This file is part of VirtualBox base platform packages, as
10 * available from https://www.virtualbox.org.
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation, in version 3 of the
15 * License.
16 *
17 * This program is distributed in the hope that it will be useful, but
18 * WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, see <https://www.gnu.org/licenses>.
24 *
25 * The contents of this file may alternatively be used under the terms
26 * of the Common Development and Distribution License Version 1.0
27 * (CDDL), a copy of it is provided in the "COPYING.CDDL" file included
28 * in the VirtualBox distribution, in which case the provisions of the
29 * CDDL are applicable instead of those of the GPL.
30 *
31 * You may elect to license modified versions of this file under the
32 * terms and conditions of either the GPL or the CDDL or both.
33 *
34 * SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0
35 */
36
37
38/*********************************************************************************************************************************
39* Header Files *
40*********************************************************************************************************************************/
41#include <iprt/uri.h>
42
43#include <iprt/assert.h>
44#include <iprt/ctype.h>
45#include <iprt/err.h>
46#include <iprt/path.h>
47#include <iprt/string.h>
48
49
50/*********************************************************************************************************************************
51* Defined Constants And Macros *
52*********************************************************************************************************************************/
53/** Internal magic value we use to check if a RTURIPARSED structure has made it thru RTUriParse. */
54#define RTURIPARSED_MAGIC UINT32_C(0x439e0745)
55
56
57/* General URI format:
58
59 foo://example.com:8042/over/there?name=ferret#nose
60 \_/ \______________/\_________/ \_________/ \__/
61 | | | | |
62 scheme authority path query fragment
63 | _____________________|__
64 / \ / \
65 urn:example:animal:ferret:nose
66*/
67
68
69/**
70 * The following defines characters which have to be % escaped:
71 * control = 00-1F
72 * space = ' '
73 * delims = '<' , '>' , '#' , '%' , '"'
74 * unwise = '{' , '}' , '|' , '\' , '^' , '[' , ']' , '`'
75 */
76#define URI_EXCLUDED(a) \
77 ( ((a) >= 0x0 && (a) <= 0x20) \
78 || ((a) >= 0x5B && (a) <= 0x5E) \
79 || ((a) >= 0x7B && (a) <= 0x7D) \
80 || (a) == '<' || (a) == '>' || (a) == '#' \
81 || (a) == '%' || (a) == '"' || (a) == '`' )
82
83static char *rtUriPercentEncodeN(const char *pszString, size_t cchMax)
84{
85 if (!pszString)
86 return NULL;
87
88 int rc = VINF_SUCCESS;
89
90 size_t cbLen = RT_MIN(strlen(pszString), cchMax);
91 /* The new string can be max 3 times in size of the original string. */
92 char *pszNew = RTStrAlloc(cbLen * 3 + 1);
93 if (!pszNew)
94 return NULL;
95
96 char *pszRes = NULL;
97 size_t iIn = 0;
98 size_t iOut = 0;
99 while (iIn < cbLen)
100 {
101 if (URI_EXCLUDED(pszString[iIn]))
102 {
103 char szNum[3] = { 0, 0, 0 };
104 RTStrFormatU8(&szNum[0], 3, pszString[iIn++], 16, 2, 2, RTSTR_F_CAPITAL | RTSTR_F_ZEROPAD);
105 pszNew[iOut++] = '%';
106 pszNew[iOut++] = szNum[0];
107 pszNew[iOut++] = szNum[1];
108 }
109 else
110 pszNew[iOut++] = pszString[iIn++];
111 }
112 if (RT_SUCCESS(rc))
113 {
114 pszNew[iOut] = '\0';
115 if (iOut != iIn)
116 {
117 /* If the source and target strings have different size, recreate
118 * the target string with the correct size. */
119 pszRes = RTStrDupN(pszNew, iOut);
120 RTStrFree(pszNew);
121 }
122 else
123 pszRes = pszNew;
124 }
125 else
126 RTStrFree(pszNew);
127
128 return pszRes;
129}
130
131
132/**
133 * Calculates the encoded string length.
134 *
135 * @returns Number of chars (excluding the terminator).
136 * @param pszString The string to encode.
137 * @param cchMax The maximum string length (e.g. RTSTR_MAX).
138 * @param fEncodeDosSlash Whether to encode DOS slashes or not.
139 */
140static size_t rtUriCalcEncodedLength(const char *pszString, size_t cchMax, bool fEncodeDosSlash)
141{
142 size_t cchEncoded = 0;
143 if (pszString)
144 {
145 size_t cchSrcLeft = RTStrNLen(pszString, cchMax);
146 while (cchSrcLeft-- > 0)
147 {
148 char const ch = *pszString++;
149 if (!URI_EXCLUDED(ch) || (ch == '\\' && !fEncodeDosSlash))
150 cchEncoded += 1;
151 else
152 cchEncoded += 3;
153 }
154 }
155 return cchEncoded;
156}
157
158
159/**
160 * Encodes an URI into a caller allocated buffer.
161 *
162 * @returns IPRT status code.
163 * @param pszString The string to encode.
164 * @param cchMax The maximum string length (e.g. RTSTR_MAX).
165 * @param fEncodeDosSlash Whether to encode DOS slashes or not.
166 * @param pszDst The destination buffer.
167 * @param cbDst The size of the destination buffer.
168 */
169static int rtUriEncodeIntoBuffer(const char *pszString, size_t cchMax, bool fEncodeDosSlash, char *pszDst, size_t cbDst)
170{
171 AssertReturn(pszString, VERR_INVALID_POINTER);
172 AssertPtrReturn(pszDst, VERR_INVALID_POINTER);
173
174 /*
175 * We do buffer size checking up front and every time we encode a special
176 * character. That's faster than checking for each char.
177 */
178 size_t cchSrcLeft = RTStrNLen(pszString, cchMax);
179 AssertMsgReturn(cbDst > cchSrcLeft, ("cbDst=%zu cchSrcLeft=%zu\n", cbDst, cchSrcLeft), VERR_BUFFER_OVERFLOW);
180 cbDst -= cchSrcLeft;
181
182 while (cchSrcLeft-- > 0)
183 {
184 char const ch = *pszString++;
185 if (!URI_EXCLUDED(ch) || (ch == '\\' && !fEncodeDosSlash))
186 *pszDst++ = ch;
187 else
188 {
189 AssertReturn(cbDst >= 3, VERR_BUFFER_OVERFLOW); /* 2 extra bytes + zero terminator. */
190 cbDst -= 2;
191
192 *pszDst++ = '%';
193 ssize_t cchTmp = RTStrFormatU8(pszDst, 3, (unsigned char)ch, 16, 2, 2, RTSTR_F_CAPITAL | RTSTR_F_ZEROPAD);
194 Assert(cchTmp == 2); NOREF(cchTmp);
195 pszDst += 2;
196 }
197 }
198
199 *pszDst = '\0';
200 return VINF_SUCCESS;
201}
202
203
204static char *rtUriPercentDecodeN(const char *pszString, size_t cchString)
205{
206 AssertPtrReturn(pszString, NULL);
207 AssertReturn(memchr(pszString, '\0', cchString) == NULL, NULL);
208
209 /*
210 * The new string can only get smaller, so use the input length as a
211 * staring buffer size.
212 */
213 char *pszDecoded = RTStrAlloc(cchString + 1);
214 if (pszDecoded)
215 {
216 /*
217 * Knowing that the pszString itself is valid UTF-8, we only have to
218 * validate the escape sequences.
219 */
220 size_t cchLeft = cchString;
221 char const *pchSrc = pszString;
222 char *pchDst = pszDecoded;
223 while (cchLeft > 0)
224 {
225 const char *pchPct = (const char *)memchr(pchSrc, '%', cchLeft);
226 if (pchPct)
227 {
228 size_t cchBefore = pchPct - pchSrc;
229 if (cchBefore)
230 {
231 memcpy(pchDst, pchSrc, cchBefore);
232 pchDst += cchBefore;
233 pchSrc += cchBefore;
234 cchLeft -= cchBefore;
235 }
236
237 char chHigh, chLow;
238 if ( cchLeft >= 3
239 && RT_C_IS_XDIGIT(chHigh = pchSrc[1])
240 && RT_C_IS_XDIGIT(chLow = pchSrc[2]))
241 {
242 uint8_t b = RT_C_IS_DIGIT(chHigh) ? chHigh - '0' : (chHigh & ~0x20) - 'A' + 10;
243 b <<= 4;
244 b |= RT_C_IS_DIGIT(chLow) ? chLow - '0' : (chLow & ~0x20) - 'A' + 10;
245 *pchDst++ = (char)b;
246 pchSrc += 3;
247 cchLeft -= 3;
248 }
249 else
250 {
251 AssertFailed();
252 *pchDst++ = *pchSrc++;
253 cchLeft--;
254 }
255 }
256 else
257 {
258 memcpy(pchDst, pchSrc, cchLeft);
259 pchDst += cchLeft;
260 pchSrc += cchLeft;
261 cchLeft = 0;
262 break;
263 }
264 }
265
266 *pchDst = '\0';
267
268 /*
269 * If we've got lof space room in the result string, reallocate it.
270 */
271 size_t cchDecoded = pchDst - pszDecoded;
272 Assert(cchDecoded <= cchString);
273 if (cchString - cchDecoded > 64)
274 RTStrRealloc(&pszDecoded, cchDecoded + 1);
275 }
276 return pszDecoded;
277}
278
279
280/**
281 * Calculates the decoded string length.
282 *
283 * @returns Number of chars (excluding the terminator).
284 * @param pszString The string to decode.
285 * @param cchMax The maximum string length (e.g. RTSTR_MAX).
286 */
287static size_t rtUriCalcDecodedLength(const char *pszString, size_t cchMax)
288{
289 size_t cchDecoded;
290 if (pszString)
291 {
292 size_t cchSrcLeft = cchDecoded = RTStrNLen(pszString, cchMax);
293 while (cchSrcLeft-- > 0)
294 {
295 char const ch = *pszString++;
296 if (ch != '%')
297 { /* typical */}
298 else if ( cchSrcLeft >= 2
299 && RT_C_IS_XDIGIT(pszString[0])
300 && RT_C_IS_XDIGIT(pszString[1]))
301 {
302 cchDecoded -= 2;
303 pszString += 2;
304 cchSrcLeft -= 2;
305 }
306 }
307 }
308 else
309 cchDecoded = 0;
310 return cchDecoded;
311}
312
313
314/**
315 * Decodes a string into a buffer.
316 *
317 * @returns IPRT status code.
318 * @param pchSrc The source string.
319 * @param cchSrc The max number of bytes to decode in the source string.
320 * @param pszDst The destination buffer.
321 * @param cbDst The size of the buffer (including terminator).
322 */
323static int rtUriDecodeIntoBuffer(const char *pchSrc, size_t cchSrc, char *pszDst, size_t cbDst)
324{
325 AssertPtrReturn(pchSrc, VERR_INVALID_POINTER);
326 AssertPtrReturn(pszDst, VERR_INVALID_POINTER);
327
328 /*
329 * Knowing that the pszString itself is valid UTF-8, we only have to
330 * validate the escape sequences.
331 */
332 cchSrc = RTStrNLen(pchSrc, cchSrc);
333 while (cchSrc > 0)
334 {
335 const char *pchPct = (const char *)memchr(pchSrc, '%', cchSrc);
336 if (pchPct)
337 {
338 size_t cchBefore = pchPct - pchSrc;
339 AssertReturn(cchBefore + 1 < cbDst, VERR_BUFFER_OVERFLOW);
340 if (cchBefore)
341 {
342 memcpy(pszDst, pchSrc, cchBefore);
343 pszDst += cchBefore;
344 cbDst -= cchBefore;
345 pchSrc += cchBefore;
346 cchSrc -= cchBefore;
347 }
348
349 char chHigh, chLow;
350 if ( cchSrc >= 3
351 && RT_C_IS_XDIGIT(chHigh = pchSrc[1])
352 && RT_C_IS_XDIGIT(chLow = pchSrc[2]))
353 {
354 uint8_t b = RT_C_IS_DIGIT(chHigh) ? chHigh - '0' : (chHigh & ~0x20) - 'A' + 10;
355 b <<= 4;
356 b |= RT_C_IS_DIGIT(chLow) ? chLow - '0' : (chLow & ~0x20) - 'A' + 10;
357 *pszDst++ = (char)b;
358 pchSrc += 3;
359 cchSrc -= 3;
360 }
361 else
362 {
363 AssertFailed();
364 *pszDst++ = *pchSrc++;
365 cchSrc--;
366 }
367 cbDst -= 1;
368 }
369 else
370 {
371 AssertReturn(cchSrc < cbDst, VERR_BUFFER_OVERFLOW);
372 memcpy(pszDst, pchSrc, cchSrc);
373 pszDst += cchSrc;
374 cbDst -= cchSrc;
375 pchSrc += cchSrc;
376 cchSrc = 0;
377 break;
378 }
379 }
380
381 AssertReturn(cbDst > 0, VERR_BUFFER_OVERFLOW);
382 *pszDst = '\0';
383 return VINF_SUCCESS;
384}
385
386
387
388static int rtUriParse(const char *pszUri, PRTURIPARSED pParsed)
389{
390 /*
391 * Validate the input and clear the output.
392 */
393 AssertPtrReturn(pParsed, VERR_INVALID_POINTER);
394 RT_ZERO(*pParsed);
395 pParsed->uAuthorityPort = UINT32_MAX;
396
397 AssertPtrReturn(pszUri, VERR_INVALID_POINTER);
398
399 size_t const cchUri = strlen(pszUri);
400 if (RT_LIKELY(cchUri >= 3)) { /* likely */ }
401 else return cchUri ? VERR_URI_TOO_SHORT : VERR_URI_EMPTY;
402
403 /*
404 * Validating escaped text sequences is much simpler if we know that
405 * that the base URI string is valid. Also, we don't necessarily trust
406 * the developer calling us to remember to do this.
407 */
408 int rc = RTStrValidateEncoding(pszUri);
409 AssertRCReturn(rc, rc);
410
411 /*
412 * RFC-3986, section 3.1:
413 * scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
414 *
415 * The scheme ends with a ':', which we also skip here.
416 */
417 size_t off = 0;
418 char ch = pszUri[off++];
419 if (RT_LIKELY(RT_C_IS_ALPHA(ch))) { /* likely */ }
420 else return VERR_URI_INVALID_SCHEME;
421 for (;;)
422 {
423 ch = pszUri[off];
424 if (ch == ':')
425 break;
426 if (RT_LIKELY(RT_C_IS_ALNUM(ch) || ch == '.' || ch == '-' || ch == '+')) { /* likely */ }
427 else return VERR_URI_INVALID_SCHEME;
428 off++;
429 }
430 pParsed->cchScheme = off;
431
432 /* Require the scheme length to be at least two chars so we won't confuse
433 it with a path starting with a DOS drive letter specification. */
434 if (RT_LIKELY(off >= 2)) { /* likely */ }
435 else return VERR_URI_INVALID_SCHEME;
436
437 off++; /* (skip colon) */
438
439 /*
440 * Find the end of the path, we'll need this several times.
441 * Also, while we're potentially scanning the whole thing, check for '%'.
442 */
443 size_t const offHash = RTStrOffCharOrTerm(&pszUri[off], '#') + off;
444 size_t const offQuestionMark = RTStrOffCharOrTerm(&pszUri[off], '?') + off;
445
446 if (memchr(pszUri, '%', cchUri) != NULL)
447 pParsed->fFlags |= RTURIPARSED_F_CONTAINS_ESCAPED_CHARS;
448
449 /*
450 * RFC-3986, section 3.2:
451 * The authority component is preceeded by a double slash ("//")...
452 */
453 if ( pszUri[off] == '/'
454 && pszUri[off + 1] == '/')
455 {
456 off += 2;
457 pParsed->offAuthority = pParsed->offAuthorityUsername = pParsed->offAuthorityPassword = pParsed->offAuthorityHost = off;
458 pParsed->fFlags |= RTURIPARSED_F_HAS_AUTHORITY;
459
460 /*
461 * RFC-3986, section 3.2:
462 * ...and is terminated by the next slash ("/"), question mark ("?"),
463 * or number sign ("#") character, or by the end of the URI.
464 */
465 const char *pszAuthority = &pszUri[off];
466 size_t cchAuthority = RTStrOffCharOrTerm(pszAuthority, '/');
467 cchAuthority = RT_MIN(cchAuthority, offHash - off);
468 cchAuthority = RT_MIN(cchAuthority, offQuestionMark - off);
469 pParsed->cchAuthority = cchAuthority;
470
471 /* The Authority can be empty, like for: file:///usr/bin/grep */
472 if (cchAuthority > 0)
473 {
474 pParsed->cchAuthorityHost = cchAuthority;
475
476 /*
477 * If there is a userinfo part, it is ended by a '@'.
478 */
479 const char *pszAt = (const char *)memchr(pszAuthority, '@', cchAuthority);
480 if (pszAt)
481 {
482 size_t cchTmp = pszAt - pszAuthority;
483 pParsed->offAuthorityHost += cchTmp + 1;
484 pParsed->cchAuthorityHost -= cchTmp + 1;
485
486 /* If there is a password part, it's separated from the username with a colon. */
487 const char *pszColon = (const char *)memchr(pszAuthority, ':', cchTmp);
488 if (pszColon)
489 {
490 pParsed->cchAuthorityUsername = pszColon - pszAuthority;
491 pParsed->offAuthorityPassword = &pszColon[1] - pszUri;
492 pParsed->cchAuthorityPassword = pszAt - &pszColon[1];
493 }
494 else
495 {
496 pParsed->cchAuthorityUsername = cchTmp;
497 pParsed->offAuthorityPassword = off + cchTmp;
498 }
499 }
500
501 /*
502 * If there is a port part, its after the last colon in the host part.
503 */
504 const char *pszColon = (const char *)memrchr(&pszUri[pParsed->offAuthorityHost], ':', pParsed->cchAuthorityHost);
505 if (pszColon)
506 {
507 size_t cchTmp = &pszUri[pParsed->offAuthorityHost + pParsed->cchAuthorityHost] - &pszColon[1];
508 pParsed->cchAuthorityHost -= cchTmp + 1;
509 pParsed->fFlags |= RTURIPARSED_F_HAS_PORT;
510 if (cchTmp > 0)
511 {
512 pParsed->uAuthorityPort = 0;
513 while (cchTmp-- > 0)
514 {
515 ch = *++pszColon;
516 if ( RT_C_IS_DIGIT(ch)
517 && pParsed->uAuthorityPort < UINT32_MAX / UINT32_C(10))
518 {
519 pParsed->uAuthorityPort *= 10;
520 pParsed->uAuthorityPort += ch - '0';
521 }
522 else
523 return VERR_URI_INVALID_PORT_NUMBER;
524 }
525 }
526 }
527 }
528
529 /* Skip past the authority. */
530 off += cchAuthority;
531 }
532 else
533 pParsed->offAuthority = pParsed->offAuthorityUsername = pParsed->offAuthorityPassword = pParsed->offAuthorityHost = off;
534
535 /*
536 * RFC-3986, section 3.3: Path
537 * The path is terminated by the first question mark ("?")
538 * or number sign ("#") character, or by the end of the URI.
539 */
540 pParsed->offPath = off;
541 pParsed->cchPath = RT_MIN(offHash, offQuestionMark) - off;
542 off += pParsed->cchPath;
543
544 /*
545 * RFC-3986, section 3.4: Query
546 * The query component is indicated by the first question mark ("?")
547 * character and terminated by a number sign ("#") character or by the
548 * end of the URI.
549 */
550 if ( off == offQuestionMark
551 && off < cchUri)
552 {
553 Assert(pszUri[offQuestionMark] == '?');
554 pParsed->offQuery = ++off;
555 pParsed->cchQuery = offHash - off;
556 off = offHash;
557 }
558 else
559 {
560 Assert(!pszUri[offQuestionMark]);
561 pParsed->offQuery = off;
562 }
563
564 /*
565 * RFC-3986, section 3.5: Fragment
566 * A fragment identifier component is indicated by the presence of a
567 * number sign ("#") character and terminated by the end of the URI.
568 */
569 if ( off == offHash
570 && off < cchUri)
571 {
572 pParsed->offFragment = ++off;
573 pParsed->cchFragment = cchUri - off;
574 }
575 else
576 {
577 Assert(!pszUri[offHash]);
578 pParsed->offFragment = off;
579 }
580
581 /*
582 * If there are any escape sequences, validate them.
583 *
584 * This is reasonably simple as we already know that the string is valid UTF-8
585 * before they get decoded. Thus we only have to validate the escaped sequences.
586 */
587 if (pParsed->fFlags & RTURIPARSED_F_CONTAINS_ESCAPED_CHARS)
588 {
589 const char *pchSrc = (const char *)memchr(pszUri, '%', cchUri);
590 AssertReturn(pchSrc, VERR_INTERNAL_ERROR);
591 do
592 {
593 char szUtf8Seq[8];
594 unsigned cchUtf8Seq = 0;
595 unsigned cchNeeded = 0;
596 size_t cchLeft = &pszUri[cchUri] - pchSrc;
597 do
598 {
599 if (cchLeft >= 3)
600 {
601 char chHigh = pchSrc[1];
602 char chLow = pchSrc[2];
603 if ( RT_C_IS_XDIGIT(chHigh)
604 && RT_C_IS_XDIGIT(chLow))
605 {
606 uint8_t b = RT_C_IS_DIGIT(chHigh) ? chHigh - '0' : (chHigh & ~0x20) - 'A' + 10;
607 b <<= 4;
608 b |= RT_C_IS_DIGIT(chLow) ? chLow - '0' : (chLow & ~0x20) - 'A' + 10;
609
610 if (!(b & 0x80))
611 {
612 /* We don't want the string to be terminated prematurely. */
613 if (RT_LIKELY(b != 0)) { /* likely */ }
614 else return VERR_URI_ESCAPED_ZERO;
615
616 /* Check that we're not expecting more UTF-8 bytes. */
617 if (RT_LIKELY(cchNeeded == 0)) { /* likely */ }
618 else return VERR_URI_MISSING_UTF8_CONTINUATION_BYTE;
619 }
620 /* Are we waiting UTF-8 bytes? */
621 else if (cchNeeded > 0)
622 {
623 if (RT_LIKELY(!(b & 0x40))) { /* likely */ }
624 else return VERR_URI_INVALID_ESCAPED_UTF8_CONTINUATION_BYTE;
625
626 szUtf8Seq[cchUtf8Seq++] = (char)b;
627 if (--cchNeeded == 0)
628 {
629 szUtf8Seq[cchUtf8Seq] = '\0';
630 rc = RTStrValidateEncoding(szUtf8Seq);
631 if (RT_FAILURE(rc))
632 return VERR_URI_ESCAPED_CHARS_NOT_VALID_UTF8;
633 cchUtf8Seq = 0;
634 }
635 }
636 /* Start a new UTF-8 sequence. */
637 else
638 {
639 if ((b & 0xf8) == 0xf0)
640 cchNeeded = 3;
641 else if ((b & 0xf0) == 0xe0)
642 cchNeeded = 2;
643 else if ((b & 0xe0) == 0xc0)
644 cchNeeded = 1;
645 else
646 return VERR_URI_INVALID_ESCAPED_UTF8_LEAD_BYTE;
647 szUtf8Seq[0] = (char)b;
648 cchUtf8Seq = 1;
649 }
650 pchSrc += 3;
651 cchLeft -= 3;
652 }
653 else
654 return VERR_URI_INVALID_ESCAPE_SEQ;
655 }
656 else
657 return VERR_URI_INVALID_ESCAPE_SEQ;
658 } while (cchLeft > 0 && pchSrc[0] == '%');
659
660 /* Check that we're not expecting more UTF-8 bytes. */
661 if (RT_LIKELY(cchNeeded == 0)) { /* likely */ }
662 else return VERR_URI_MISSING_UTF8_CONTINUATION_BYTE;
663
664 /* next */
665 pchSrc = (const char *)memchr(pchSrc, '%', cchLeft);
666 } while (pchSrc);
667 }
668
669 pParsed->u32Magic = RTURIPARSED_MAGIC;
670 return VINF_SUCCESS;
671}
672
673
674RTDECL(int) RTUriParse(const char *pszUri, PRTURIPARSED pParsed)
675{
676 return rtUriParse(pszUri, pParsed);
677}
678
679
680RTDECL(char *) RTUriParsedScheme(const char *pszUri, PCRTURIPARSED pParsed)
681{
682 AssertPtrReturn(pszUri, NULL);
683 AssertPtrReturn(pParsed, NULL);
684 AssertReturn(pParsed->u32Magic == RTURIPARSED_MAGIC, NULL);
685 return RTStrDupN(pszUri, pParsed->cchScheme);
686}
687
688
689RTDECL(char *) RTUriParsedAuthority(const char *pszUri, PCRTURIPARSED pParsed)
690{
691 AssertPtrReturn(pszUri, NULL);
692 AssertPtrReturn(pParsed, NULL);
693 AssertReturn(pParsed->u32Magic == RTURIPARSED_MAGIC, NULL);
694 if (pParsed->cchAuthority || (pParsed->fFlags & RTURIPARSED_F_HAS_AUTHORITY))
695 return rtUriPercentDecodeN(&pszUri[pParsed->offAuthority], pParsed->cchAuthority);
696 return NULL;
697}
698
699
700RTDECL(char *) RTUriParsedAuthorityUsername(const char *pszUri, PCRTURIPARSED pParsed)
701{
702 AssertPtrReturn(pszUri, NULL);
703 AssertPtrReturn(pParsed, NULL);
704 AssertReturn(pParsed->u32Magic == RTURIPARSED_MAGIC, NULL);
705 if (pParsed->cchAuthorityUsername)
706 return rtUriPercentDecodeN(&pszUri[pParsed->offAuthorityUsername], pParsed->cchAuthorityUsername);
707 return NULL;
708}
709
710
711RTDECL(char *) RTUriParsedAuthorityPassword(const char *pszUri, PCRTURIPARSED pParsed)
712{
713 AssertPtrReturn(pszUri, NULL);
714 AssertPtrReturn(pParsed, NULL);
715 AssertReturn(pParsed->u32Magic == RTURIPARSED_MAGIC, NULL);
716 if (pParsed->cchAuthorityPassword)
717 return rtUriPercentDecodeN(&pszUri[pParsed->offAuthorityPassword], pParsed->cchAuthorityPassword);
718 return NULL;
719}
720
721
722RTDECL(char *) RTUriParsedAuthorityHost(const char *pszUri, PCRTURIPARSED pParsed)
723{
724 AssertPtrReturn(pszUri, NULL);
725 AssertPtrReturn(pParsed, NULL);
726 AssertReturn(pParsed->u32Magic == RTURIPARSED_MAGIC, NULL);
727 if (pParsed->cchAuthorityHost)
728 return rtUriPercentDecodeN(&pszUri[pParsed->offAuthorityHost], pParsed->cchAuthorityHost);
729 return NULL;
730}
731
732
733RTDECL(uint32_t) RTUriParsedAuthorityPort(const char *pszUri, PCRTURIPARSED pParsed)
734{
735 AssertPtrReturn(pszUri, UINT32_MAX);
736 AssertPtrReturn(pParsed, UINT32_MAX);
737 AssertReturn(pParsed->u32Magic == RTURIPARSED_MAGIC, UINT32_MAX);
738 return pParsed->uAuthorityPort;
739}
740
741
742RTDECL(char *) RTUriParsedPath(const char *pszUri, PCRTURIPARSED pParsed)
743{
744 AssertPtrReturn(pszUri, NULL);
745 AssertPtrReturn(pParsed, NULL);
746 AssertReturn(pParsed->u32Magic == RTURIPARSED_MAGIC, NULL);
747 if (pParsed->cchPath)
748 return rtUriPercentDecodeN(&pszUri[pParsed->offPath], pParsed->cchPath);
749 return NULL;
750}
751
752
753RTDECL(char *) RTUriParsedQuery(const char *pszUri, PCRTURIPARSED pParsed)
754{
755 AssertPtrReturn(pszUri, NULL);
756 AssertPtrReturn(pParsed, NULL);
757 AssertReturn(pParsed->u32Magic == RTURIPARSED_MAGIC, NULL);
758 if (pParsed->cchQuery)
759 return rtUriPercentDecodeN(&pszUri[pParsed->offQuery], pParsed->cchQuery);
760 return NULL;
761}
762
763
764RTDECL(char *) RTUriParsedFragment(const char *pszUri, PCRTURIPARSED pParsed)
765{
766 AssertPtrReturn(pszUri, NULL);
767 AssertPtrReturn(pParsed, NULL);
768 AssertReturn(pParsed->u32Magic == RTURIPARSED_MAGIC, NULL);
769 if (pParsed->cchFragment)
770 return rtUriPercentDecodeN(&pszUri[pParsed->offFragment], pParsed->cchFragment);
771 return NULL;
772}
773
774
775RTDECL(char *) RTUriCreate(const char *pszScheme, const char *pszAuthority, const char *pszPath, const char *pszQuery,
776 const char *pszFragment)
777{
778 if (!pszScheme) /* Scheme is minimum requirement */
779 return NULL;
780
781 char *pszResult = 0;
782 char *pszAuthority1 = 0;
783 char *pszPath1 = 0;
784 char *pszQuery1 = 0;
785 char *pszFragment1 = 0;
786
787 do
788 {
789 /* Create the percent encoded strings and calculate the necessary uri
790 * length. */
791 size_t cbSize = strlen(pszScheme) + 1 + 1; /* plus zero byte */
792 if (pszAuthority)
793 {
794 pszAuthority1 = rtUriPercentEncodeN(pszAuthority, RTSTR_MAX);
795 if (!pszAuthority1)
796 break;
797 cbSize += strlen(pszAuthority1) + 2;
798 }
799 if (pszPath)
800 {
801 pszPath1 = rtUriPercentEncodeN(pszPath, RTSTR_MAX);
802 if (!pszPath1)
803 break;
804 cbSize += strlen(pszPath1);
805 }
806 if (pszQuery)
807 {
808 pszQuery1 = rtUriPercentEncodeN(pszQuery, RTSTR_MAX);
809 if (!pszQuery1)
810 break;
811 cbSize += strlen(pszQuery1) + 1;
812 }
813 if (pszFragment)
814 {
815 pszFragment1 = rtUriPercentEncodeN(pszFragment, RTSTR_MAX);
816 if (!pszFragment1)
817 break;
818 cbSize += strlen(pszFragment1) + 1;
819 }
820
821 char *pszTmp = pszResult = (char *)RTStrAlloc(cbSize);
822 if (!pszResult)
823 break;
824 RT_BZERO(pszTmp, cbSize);
825
826 /* Compose the target uri string. */
827 RTStrCatP(&pszTmp, &cbSize, pszScheme);
828 RTStrCatP(&pszTmp, &cbSize, ":");
829 if (pszAuthority1)
830 {
831 RTStrCatP(&pszTmp, &cbSize, "//");
832 RTStrCatP(&pszTmp, &cbSize, pszAuthority1);
833 }
834 if (pszPath1)
835 {
836 RTStrCatP(&pszTmp, &cbSize, pszPath1);
837 }
838 if (pszQuery1)
839 {
840 RTStrCatP(&pszTmp, &cbSize, "?");
841 RTStrCatP(&pszTmp, &cbSize, pszQuery1);
842 }
843 if (pszFragment1)
844 {
845 RTStrCatP(&pszTmp, &cbSize, "#");
846 RTStrCatP(&pszTmp, &cbSize, pszFragment1);
847 }
848 } while (0);
849
850 /* Cleanup */
851 if (pszAuthority1)
852 RTStrFree(pszAuthority1);
853 if (pszPath1)
854 RTStrFree(pszPath1);
855 if (pszQuery1)
856 RTStrFree(pszQuery1);
857 if (pszFragment1)
858 RTStrFree(pszFragment1);
859
860 return pszResult;
861}
862
863
864RTDECL(bool) RTUriIsSchemeMatch(const char *pszUri, const char *pszScheme)
865{
866 AssertPtrReturn(pszUri, false);
867 size_t const cchScheme = strlen(pszScheme);
868 return RTStrNICmp(pszUri, pszScheme, cchScheme) == 0
869 && pszUri[cchScheme] == ':';
870}
871
872
873RTDECL(int) RTUriFileCreateEx(const char *pszPath, uint32_t fPathStyle, char **ppszUri, size_t cbUri, size_t *pcchUri)
874{
875 /*
876 * Validate and adjust input. (RTPathParse check pszPath out for us)
877 */
878 if (pcchUri)
879 {
880 AssertPtrReturn(pcchUri, VERR_INVALID_POINTER);
881 *pcchUri = ~(size_t)0;
882 }
883 AssertPtrReturn(ppszUri, VERR_INVALID_POINTER);
884 AssertReturn(!(fPathStyle & ~RTPATH_STR_F_STYLE_MASK) && fPathStyle != RTPATH_STR_F_STYLE_RESERVED, VERR_INVALID_FLAGS);
885 if (fPathStyle == RTPATH_STR_F_STYLE_HOST)
886 fPathStyle = RTPATH_STYLE;
887
888 /*
889 * Let the RTPath code parse the stuff (no reason to duplicate path parsing
890 * and get it slightly wrong here).
891 */
892 union
893 {
894 RTPATHPARSED ParsedPath;
895 uint8_t abPadding[sizeof(RTPATHPARSED)];
896 } u;
897 int rc = RTPathParse(pszPath, &u.ParsedPath, sizeof(u.ParsedPath), fPathStyle);
898 if (RT_SUCCESS(rc) || rc == VERR_BUFFER_OVERFLOW)
899 {
900 /* Skip leading slashes. */
901 if (u.ParsedPath.fProps & RTPATH_PROP_ROOT_SLASH)
902 {
903 if (fPathStyle == RTPATH_STR_F_STYLE_DOS)
904 while (pszPath[0] == '/' || pszPath[0] == '\\')
905 pszPath++;
906 else
907 while (pszPath[0] == '/')
908 pszPath++;
909 }
910 const size_t cchPath = strlen(pszPath);
911
912 /*
913 * Calculate the encoded length and figure destination buffering.
914 */
915 static const char s_szPrefix[] = "file:///";
916 size_t const cchPrefix = sizeof(s_szPrefix) - (u.ParsedPath.fProps & RTPATH_PROP_UNC ? 2 : 1);
917 size_t cchEncoded = rtUriCalcEncodedLength(pszPath, cchPath, fPathStyle != RTPATH_STR_F_STYLE_DOS);
918
919 if (pcchUri)
920 *pcchUri = cchEncoded;
921
922 char *pszDst;
923 char *pszFreeMe = NULL;
924 if (!cbUri || *ppszUri == NULL)
925 {
926 cbUri = RT_MAX(cbUri, cchPrefix + cchEncoded + 1);
927 *ppszUri = pszFreeMe = pszDst = RTStrAlloc(cbUri);
928 AssertReturn(pszDst, VERR_NO_STR_MEMORY);
929 }
930 else if (cchEncoded < cbUri)
931 pszDst = *ppszUri;
932 else
933 return VERR_BUFFER_OVERFLOW;
934
935 /*
936 * Construct the URI.
937 */
938 memcpy(pszDst, s_szPrefix, cchPrefix);
939 pszDst[cchPrefix] = '\0';
940 rc = rtUriEncodeIntoBuffer(pszPath, cchPath, fPathStyle != RTPATH_STR_F_STYLE_DOS, &pszDst[cchPrefix], cbUri - cchPrefix);
941 if (RT_SUCCESS(rc))
942 {
943 Assert(strlen(pszDst) == cbUri - 1);
944 if (fPathStyle == RTPATH_STR_F_STYLE_DOS)
945 RTPathChangeToUnixSlashes(pszDst, true /*fForce*/);
946 return VINF_SUCCESS;
947 }
948
949 AssertRC(rc); /* Impossible! rtUriCalcEncodedLength or something above is busted! */
950 if (pszFreeMe)
951 RTStrFree(pszFreeMe);
952 }
953 return rc;
954}
955
956
957RTDECL(char *) RTUriFileCreate(const char *pszPath)
958{
959 char *pszUri = NULL;
960 int rc = RTUriFileCreateEx(pszPath, RTPATH_STR_F_STYLE_HOST, &pszUri, 0 /*cbUri*/, NULL /*pcchUri*/);
961 if (RT_SUCCESS(rc))
962 return pszUri;
963 return NULL;
964}
965
966
967RTDECL(int) RTUriFilePathEx(const char *pszUri, uint32_t fPathStyle, char **ppszPath, size_t cbPath, size_t *pcchPath)
968{
969 /*
970 * Validate and adjust input.
971 */
972 if (pcchPath)
973 {
974 AssertPtrReturn(pcchPath, VERR_INVALID_POINTER);
975 *pcchPath = ~(size_t)0;
976 }
977 AssertPtrReturn(ppszPath, VERR_INVALID_POINTER);
978 AssertReturn(!(fPathStyle & ~RTPATH_STR_F_STYLE_MASK) && fPathStyle != RTPATH_STR_F_STYLE_RESERVED, VERR_INVALID_FLAGS);
979 if (fPathStyle == RTPATH_STR_F_STYLE_HOST)
980 fPathStyle = RTPATH_STYLE;
981 AssertPtrReturn(pszUri, VERR_INVALID_POINTER);
982
983 /*
984 * Check that this is a file URI.
985 */
986 if (RTStrNICmp(pszUri, RT_STR_TUPLE("file:")) == 0)
987 { /* likely */ }
988 else
989 return VERR_URI_NOT_FILE_SCHEME;
990
991 /*
992 * We may have a number of variations here, mostly thanks to
993 * various windows software. First the canonical variations:
994 * - file:///C:/Windows/System32/kernel32.dll
995 * - file:///C|/Windows/System32/kernel32.dll
996 * - file:///C:%5CWindows%5CSystem32%5Ckernel32.dll
997 * - file://localhost/C:%5CWindows%5CSystem32%5Ckernel32.dll
998 * - file://cifsserver.dev/systemshare%5CWindows%5CSystem32%5Ckernel32.dll
999 * - file://cifsserver.dev:139/systemshare%5CWindows%5CSystem32%5Ckernel32.dll (not quite sure here, but whatever)
1000 *
1001 * Legacy variant without any slashes after the schema:
1002 * - file:C:/Windows/System32/kernel32.dll
1003 * - file:C|/Windows/System32%5Ckernel32.dll
1004 * - file:~/.bashrc
1005 * \--path-/
1006 *
1007 * Legacy variant with exactly one slashes after the schema:
1008 * - file:/C:/Windows/System32%5Ckernel32.dll
1009 * - file:/C|/Windows/System32/kernel32.dll
1010 * - file:/usr/bin/env
1011 * \---path---/
1012 *
1013 * Legacy variant with two slashes after the schema and an unescaped DOS path:
1014 * - file://C:/Windows/System32\kernel32.dll (**)
1015 * - file://C|/Windows/System32\kernel32.dll
1016 * \---path---------------------/
1017 * -- authority, with ':' as non-working port separator
1018 *
1019 * Legacy variant with exactly four slashes after the schema and an unescaped DOS path.
1020 * - file:////C:/Windows\System32\user32.dll
1021 *
1022 * Legacy variant with four or more slashes after the schema and an unescaped UNC path:
1023 * - file:////cifsserver.dev/systemshare/System32%\kernel32.dll
1024 * - file://///cifsserver.dev/systemshare/System32\kernel32.dll
1025 * \---path--------------------------------------------/
1026 *
1027 * The two unescaped variants shouldn't be handed to rtUriParse, which
1028 * is good as we cannot actually handle the one marked by (**). So, handle
1029 * those two special when parsing.
1030 */
1031 RTURIPARSED Parsed;
1032 int rc;
1033 size_t cSlashes = 0;
1034 while (pszUri[5 + cSlashes] == '/')
1035 cSlashes++;
1036 if ( (cSlashes == 2 || cSlashes == 4)
1037 && RT_C_IS_ALPHA(pszUri[5 + cSlashes])
1038 && (pszUri[5 + cSlashes + 1] == ':' || pszUri[5 + cSlashes + 1] == '|'))
1039 {
1040 RT_ZERO(Parsed); /* RTURIPARSED_F_CONTAINS_ESCAPED_CHARS is now clear. */
1041 Parsed.offPath = 5 + cSlashes;
1042 Parsed.cchPath = strlen(&pszUri[Parsed.offPath]);
1043 rc = RTStrValidateEncoding(&pszUri[Parsed.offPath]);
1044 }
1045 else if (cSlashes >= 4)
1046 {
1047 RT_ZERO(Parsed);
1048 Parsed.fFlags = cSlashes > 4 ? RTURIPARSED_F_CONTAINS_ESCAPED_CHARS : 0;
1049 Parsed.offPath = 5 + cSlashes - 2;
1050 Parsed.cchPath = strlen(&pszUri[Parsed.offPath]);
1051 rc = RTStrValidateEncoding(&pszUri[Parsed.offPath]);
1052 }
1053 else
1054 rc = rtUriParse(pszUri, &Parsed);
1055 if (RT_SUCCESS(rc))
1056 {
1057 /*
1058 * Ignore localhost as hostname (it's implicit).
1059 */
1060 static char const s_szLocalhost[] = "localhost";
1061 if ( Parsed.cchAuthorityHost == sizeof(s_szLocalhost) - 1U
1062 && RTStrNICmp(&pszUri[Parsed.offAuthorityHost], RT_STR_TUPLE(s_szLocalhost)) == 0)
1063 {
1064 Parsed.cchAuthorityHost = 0;
1065 Parsed.cchAuthority = 0;
1066 }
1067
1068 /*
1069 * Ignore leading path slash/separator if we detect a DOS drive letter
1070 * and we don't have a host name.
1071 */
1072 if ( Parsed.cchPath >= 3
1073 && Parsed.cchAuthorityHost == 0
1074 && pszUri[Parsed.offPath] == '/' /* Leading path slash/separator. */
1075 && ( pszUri[Parsed.offPath + 2] == ':' /* Colon after drive letter. */
1076 || pszUri[Parsed.offPath + 2] == '|') /* Colon alternative. */
1077 && RT_C_IS_ALPHA(pszUri[Parsed.offPath + 1]) ) /* Drive letter. */
1078 {
1079 Parsed.offPath++;
1080 Parsed.cchPath--;
1081 }
1082
1083 /*
1084 * Calculate the size of the encoded result.
1085 *
1086 * Since we're happily returning "C:/Windows/System32/kernel.dll"
1087 * style paths when the caller requested UNIX style paths, we will
1088 * return straight UNC paths too ("//cifsserver/share/dir/file").
1089 */
1090 size_t cchDecodedHost = 0;
1091 size_t cbResult;
1092 if (Parsed.fFlags & RTURIPARSED_F_CONTAINS_ESCAPED_CHARS)
1093 {
1094 cchDecodedHost = rtUriCalcDecodedLength(&pszUri[Parsed.offAuthorityHost], Parsed.cchAuthorityHost);
1095 cbResult = cchDecodedHost + rtUriCalcDecodedLength(&pszUri[Parsed.offPath], Parsed.cchPath) + 1;
1096 }
1097 else
1098 {
1099 cchDecodedHost = 0;
1100 cbResult = Parsed.cchAuthorityHost + Parsed.cchPath + 1;
1101 }
1102 if (pcchPath)
1103 *pcchPath = cbResult - 1;
1104 if (cbResult > 1)
1105 {
1106 /*
1107 * Prepare the necessary buffer space for the result.
1108 */
1109 char *pszDst;
1110 char *pszFreeMe = NULL;
1111 if (!cbPath || *ppszPath == NULL)
1112 {
1113 cbPath = RT_MAX(cbPath, cbResult);
1114 *ppszPath = pszFreeMe = pszDst = RTStrAlloc(cbPath);
1115 AssertReturn(pszDst, VERR_NO_STR_MEMORY);
1116 }
1117 else if (cbResult <= cbPath)
1118 pszDst = *ppszPath;
1119 else
1120 return VERR_BUFFER_OVERFLOW;
1121
1122 /*
1123 * Compose the result.
1124 */
1125 if (Parsed.fFlags & RTURIPARSED_F_CONTAINS_ESCAPED_CHARS)
1126 {
1127 rc = rtUriDecodeIntoBuffer(&pszUri[Parsed.offAuthorityHost],Parsed.cchAuthorityHost,
1128 pszDst, cchDecodedHost + 1);
1129 Assert(RT_SUCCESS(rc) && strlen(pszDst) == cchDecodedHost);
1130 if (RT_SUCCESS(rc))
1131 rc = rtUriDecodeIntoBuffer(&pszUri[Parsed.offPath], Parsed.cchPath,
1132 &pszDst[cchDecodedHost], cbResult - cchDecodedHost);
1133 Assert(RT_SUCCESS(rc) && strlen(pszDst) == cbResult - 1);
1134 }
1135 else
1136 {
1137 memcpy(pszDst, &pszUri[Parsed.offAuthorityHost], Parsed.cchAuthorityHost);
1138 memcpy(&pszDst[Parsed.cchAuthorityHost], &pszUri[Parsed.offPath], Parsed.cchPath);
1139 pszDst[cbResult - 1] = '\0';
1140 }
1141 if (RT_SUCCESS(rc))
1142 {
1143 /*
1144 * Convert colon DOS driver letter colon alternative.
1145 * We do this regardless of the desired path style.
1146 */
1147 if ( RT_C_IS_ALPHA(pszDst[0])
1148 && pszDst[1] == '|')
1149 pszDst[1] = ':';
1150
1151 /*
1152 * Fix slashes.
1153 */
1154 if (fPathStyle == RTPATH_STR_F_STYLE_DOS)
1155 RTPathChangeToDosSlashes(pszDst, true);
1156 else if (fPathStyle == RTPATH_STR_F_STYLE_UNIX)
1157 RTPathChangeToUnixSlashes(pszDst, true); /** @todo not quite sure how this actually makes sense... */
1158 else
1159 AssertFailed();
1160 return rc;
1161 }
1162
1163 /* bail out */
1164 RTStrFree(pszFreeMe);
1165 }
1166 else
1167 rc = VERR_PATH_ZERO_LENGTH;
1168 }
1169 return rc;
1170}
1171
1172
1173RTDECL(char *) RTUriFilePath(const char *pszUri)
1174{
1175 char *pszPath = NULL;
1176 int rc = RTUriFilePathEx(pszUri, RTPATH_STR_F_STYLE_HOST, &pszPath, 0 /*cbPath*/, NULL /*pcchPath*/);
1177 if (RT_SUCCESS(rc))
1178 return pszPath;
1179 return NULL;
1180}
1181
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette