VirtualBox

source: vbox/trunk/src/VBox/Runtime/common/misc/uri.cpp@ 107454

Last change on this file since 107454 was 107454, checked in by vboxsync, 5 weeks ago

Runtime/common/misc/uri.cpp: Don't call strlen() two times in the RT_MIN() expansion to save some time (maybe the compiler would optimize this but better not depend on it), bugref:3409

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 40.6 KB
Line 
1/* $Id: uri.cpp 107454 2025-01-07 10:16:43Z vboxsync $ */
2/** @file
3 * IPRT - Uniform Resource Identifier handling.
4 */
5
6/*
7 * Copyright (C) 2011-2024 Oracle and/or its affiliates.
8 *
9 * This file is part of VirtualBox base platform packages, as
10 * available from https://www.virtualbox.org.
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation, in version 3 of the
15 * License.
16 *
17 * This program is distributed in the hope that it will be useful, but
18 * WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, see <https://www.gnu.org/licenses>.
24 *
25 * The contents of this file may alternatively be used under the terms
26 * of the Common Development and Distribution License Version 1.0
27 * (CDDL), a copy of it is provided in the "COPYING.CDDL" file included
28 * in the VirtualBox distribution, in which case the provisions of the
29 * CDDL are applicable instead of those of the GPL.
30 *
31 * You may elect to license modified versions of this file under the
32 * terms and conditions of either the GPL or the CDDL or both.
33 *
34 * SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0
35 */
36
37
38/*********************************************************************************************************************************
39* Header Files *
40*********************************************************************************************************************************/
41#include <iprt/uri.h>
42
43#include <iprt/assert.h>
44#include <iprt/ctype.h>
45#include <iprt/err.h>
46#include <iprt/path.h>
47#include <iprt/string.h>
48
49
50/*********************************************************************************************************************************
51* Defined Constants And Macros *
52*********************************************************************************************************************************/
53/** Internal magic value we use to check if a RTURIPARSED structure has made it thru RTUriParse. */
54#define RTURIPARSED_MAGIC UINT32_C(0x439e0745)
55
56
57/* General URI format:
58
59 foo://example.com:8042/over/there?name=ferret#nose
60 \_/ \______________/\_________/ \_________/ \__/
61 | | | | |
62 scheme authority path query fragment
63 | _____________________|__
64 / \ / \
65 urn:example:animal:ferret:nose
66*/
67
68
69/**
70 * The following defines characters which have to be % escaped:
71 * control = 00-1F
72 * space = ' '
73 * delims = '<' , '>' , '#' , '%' , '"'
74 * unwise = '{' , '}' , '|' , '\' , '^' , '[' , ']' , '`'
75 *
76 * @note ARM defines char as unsigned by default in the AAPCS(64) so the first check would trigger
77 * a compiler warning/error. Apple decided to ignore that and declares char a signed like on
78 * the other platforms.
79 */
80#if defined(RT_OS_LINUX) \
81 && (defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32))
82# define URI_EXCLUDED(a) \
83 ( ((a) <= 0x20) \
84 || ((a) >= 0x5B && (a) <= 0x5E) \
85 || ((a) >= 0x7B && (a) <= 0x7D) \
86 || (a) == '<' || (a) == '>' || (a) == '#' \
87 || (a) == '%' || (a) == '"' || (a) == '`' )
88#else
89# define URI_EXCLUDED(a) \
90 ( ((a) >= 0x0 && (a) <= 0x20) \
91 || ((a) >= 0x5B && (a) <= 0x5E) \
92 || ((a) >= 0x7B && (a) <= 0x7D) \
93 || (a) == '<' || (a) == '>' || (a) == '#' \
94 || (a) == '%' || (a) == '"' || (a) == '`' )
95#endif
96
97static char *rtUriPercentEncodeN(const char *pszString, size_t cchMax)
98{
99 if (!pszString)
100 return NULL;
101
102 int rc = VINF_SUCCESS;
103
104 size_t const cchStr = strlen(pszString);
105 size_t cbLen = RT_MIN(cchStr, cchMax);
106 /* The new string can be max 3 times in size of the original string. */
107 char *pszNew = RTStrAlloc(cbLen * 3 + 1);
108 if (!pszNew)
109 return NULL;
110
111 char *pszRes = NULL;
112 size_t iIn = 0;
113 size_t iOut = 0;
114 while (iIn < cbLen)
115 {
116 if (URI_EXCLUDED(pszString[iIn]))
117 {
118 char szNum[3] = { 0, 0, 0 };
119 RTStrFormatU8(&szNum[0], 3, pszString[iIn++], 16, 2, 2, RTSTR_F_CAPITAL | RTSTR_F_ZEROPAD);
120 pszNew[iOut++] = '%';
121 pszNew[iOut++] = szNum[0];
122 pszNew[iOut++] = szNum[1];
123 }
124 else
125 pszNew[iOut++] = pszString[iIn++];
126 }
127 if (RT_SUCCESS(rc))
128 {
129 pszNew[iOut] = '\0';
130 if (iOut != iIn)
131 {
132 /* If the source and target strings have different size, recreate
133 * the target string with the correct size. */
134 pszRes = RTStrDupN(pszNew, iOut);
135 RTStrFree(pszNew);
136 }
137 else
138 pszRes = pszNew;
139 }
140 else
141 RTStrFree(pszNew);
142
143 return pszRes;
144}
145
146
147/**
148 * Calculates the encoded string length.
149 *
150 * @returns Number of chars (excluding the terminator).
151 * @param pszString The string to encode.
152 * @param cchMax The maximum string length (e.g. RTSTR_MAX).
153 * @param fEncodeDosSlash Whether to encode DOS slashes or not.
154 */
155static size_t rtUriCalcEncodedLength(const char *pszString, size_t cchMax, bool fEncodeDosSlash)
156{
157 size_t cchEncoded = 0;
158 if (pszString)
159 {
160 size_t cchSrcLeft = RTStrNLen(pszString, cchMax);
161 while (cchSrcLeft-- > 0)
162 {
163 char const ch = *pszString++;
164 if (!URI_EXCLUDED(ch) || (ch == '\\' && !fEncodeDosSlash))
165 cchEncoded += 1;
166 else
167 cchEncoded += 3;
168 }
169 }
170 return cchEncoded;
171}
172
173
174/**
175 * Encodes an URI into a caller allocated buffer.
176 *
177 * @returns IPRT status code.
178 * @param pszString The string to encode.
179 * @param cchMax The maximum string length (e.g. RTSTR_MAX).
180 * @param fEncodeDosSlash Whether to encode DOS slashes or not.
181 * @param pszDst The destination buffer.
182 * @param cbDst The size of the destination buffer.
183 */
184static int rtUriEncodeIntoBuffer(const char *pszString, size_t cchMax, bool fEncodeDosSlash, char *pszDst, size_t cbDst)
185{
186 AssertReturn(pszString, VERR_INVALID_POINTER);
187 AssertPtrReturn(pszDst, VERR_INVALID_POINTER);
188
189 /*
190 * We do buffer size checking up front and every time we encode a special
191 * character. That's faster than checking for each char.
192 */
193 size_t cchSrcLeft = RTStrNLen(pszString, cchMax);
194 AssertMsgReturn(cbDst > cchSrcLeft, ("cbDst=%zu cchSrcLeft=%zu\n", cbDst, cchSrcLeft), VERR_BUFFER_OVERFLOW);
195 cbDst -= cchSrcLeft;
196
197 while (cchSrcLeft-- > 0)
198 {
199 char const ch = *pszString++;
200 if (!URI_EXCLUDED(ch) || (ch == '\\' && !fEncodeDosSlash))
201 *pszDst++ = ch;
202 else
203 {
204 AssertReturn(cbDst >= 3, VERR_BUFFER_OVERFLOW); /* 2 extra bytes + zero terminator. */
205 cbDst -= 2;
206
207 *pszDst++ = '%';
208 ssize_t cchTmp = RTStrFormatU8(pszDst, 3, (unsigned char)ch, 16, 2, 2, RTSTR_F_CAPITAL | RTSTR_F_ZEROPAD);
209 Assert(cchTmp == 2); NOREF(cchTmp);
210 pszDst += 2;
211 }
212 }
213
214 *pszDst = '\0';
215 return VINF_SUCCESS;
216}
217
218
219static char *rtUriPercentDecodeN(const char *pszString, size_t cchString)
220{
221 AssertPtrReturn(pszString, NULL);
222 AssertReturn(memchr(pszString, '\0', cchString) == NULL, NULL);
223
224 /*
225 * The new string can only get smaller, so use the input length as a
226 * staring buffer size.
227 */
228 char *pszDecoded = RTStrAlloc(cchString + 1);
229 if (pszDecoded)
230 {
231 /*
232 * Knowing that the pszString itself is valid UTF-8, we only have to
233 * validate the escape sequences.
234 */
235 size_t cchLeft = cchString;
236 char const *pchSrc = pszString;
237 char *pchDst = pszDecoded;
238 while (cchLeft > 0)
239 {
240 const char *pchPct = (const char *)memchr(pchSrc, '%', cchLeft);
241 if (pchPct)
242 {
243 size_t cchBefore = pchPct - pchSrc;
244 if (cchBefore)
245 {
246 memcpy(pchDst, pchSrc, cchBefore);
247 pchDst += cchBefore;
248 pchSrc += cchBefore;
249 cchLeft -= cchBefore;
250 }
251
252 char chHigh, chLow;
253 if ( cchLeft >= 3
254 && RT_C_IS_XDIGIT(chHigh = pchSrc[1])
255 && RT_C_IS_XDIGIT(chLow = pchSrc[2]))
256 {
257 uint8_t b = RT_C_IS_DIGIT(chHigh) ? chHigh - '0' : (chHigh & ~0x20) - 'A' + 10;
258 b <<= 4;
259 b |= RT_C_IS_DIGIT(chLow) ? chLow - '0' : (chLow & ~0x20) - 'A' + 10;
260 *pchDst++ = (char)b;
261 pchSrc += 3;
262 cchLeft -= 3;
263 }
264 else
265 {
266 AssertFailed();
267 *pchDst++ = *pchSrc++;
268 cchLeft--;
269 }
270 }
271 else
272 {
273 memcpy(pchDst, pchSrc, cchLeft);
274 pchDst += cchLeft;
275 pchSrc += cchLeft;
276 cchLeft = 0;
277 break;
278 }
279 }
280
281 *pchDst = '\0';
282
283 /*
284 * If we've got lof space room in the result string, reallocate it.
285 */
286 size_t cchDecoded = pchDst - pszDecoded;
287 Assert(cchDecoded <= cchString);
288 if (cchString - cchDecoded > 64)
289 RTStrRealloc(&pszDecoded, cchDecoded + 1);
290 }
291 return pszDecoded;
292}
293
294
295/**
296 * Calculates the decoded string length.
297 *
298 * @returns Number of chars (excluding the terminator).
299 * @param pszString The string to decode.
300 * @param cchMax The maximum string length (e.g. RTSTR_MAX).
301 */
302static size_t rtUriCalcDecodedLength(const char *pszString, size_t cchMax)
303{
304 size_t cchDecoded;
305 if (pszString)
306 {
307 size_t cchSrcLeft = cchDecoded = RTStrNLen(pszString, cchMax);
308 while (cchSrcLeft-- > 0)
309 {
310 char const ch = *pszString++;
311 if (ch != '%')
312 { /* typical */}
313 else if ( cchSrcLeft >= 2
314 && RT_C_IS_XDIGIT(pszString[0])
315 && RT_C_IS_XDIGIT(pszString[1]))
316 {
317 cchDecoded -= 2;
318 pszString += 2;
319 cchSrcLeft -= 2;
320 }
321 }
322 }
323 else
324 cchDecoded = 0;
325 return cchDecoded;
326}
327
328
329/**
330 * Decodes a string into a buffer.
331 *
332 * @returns IPRT status code.
333 * @param pchSrc The source string.
334 * @param cchSrc The max number of bytes to decode in the source string.
335 * @param pszDst The destination buffer.
336 * @param cbDst The size of the buffer (including terminator).
337 */
338static int rtUriDecodeIntoBuffer(const char *pchSrc, size_t cchSrc, char *pszDst, size_t cbDst)
339{
340 AssertPtrReturn(pchSrc, VERR_INVALID_POINTER);
341 AssertPtrReturn(pszDst, VERR_INVALID_POINTER);
342
343 /*
344 * Knowing that the pszString itself is valid UTF-8, we only have to
345 * validate the escape sequences.
346 */
347 cchSrc = RTStrNLen(pchSrc, cchSrc);
348 while (cchSrc > 0)
349 {
350 const char *pchPct = (const char *)memchr(pchSrc, '%', cchSrc);
351 if (pchPct)
352 {
353 size_t cchBefore = pchPct - pchSrc;
354 AssertReturn(cchBefore + 1 < cbDst, VERR_BUFFER_OVERFLOW);
355 if (cchBefore)
356 {
357 memcpy(pszDst, pchSrc, cchBefore);
358 pszDst += cchBefore;
359 cbDst -= cchBefore;
360 pchSrc += cchBefore;
361 cchSrc -= cchBefore;
362 }
363
364 char chHigh, chLow;
365 if ( cchSrc >= 3
366 && RT_C_IS_XDIGIT(chHigh = pchSrc[1])
367 && RT_C_IS_XDIGIT(chLow = pchSrc[2]))
368 {
369 uint8_t b = RT_C_IS_DIGIT(chHigh) ? chHigh - '0' : (chHigh & ~0x20) - 'A' + 10;
370 b <<= 4;
371 b |= RT_C_IS_DIGIT(chLow) ? chLow - '0' : (chLow & ~0x20) - 'A' + 10;
372 *pszDst++ = (char)b;
373 pchSrc += 3;
374 cchSrc -= 3;
375 }
376 else
377 {
378 AssertFailed();
379 *pszDst++ = *pchSrc++;
380 cchSrc--;
381 }
382 cbDst -= 1;
383 }
384 else
385 {
386 AssertReturn(cchSrc < cbDst, VERR_BUFFER_OVERFLOW);
387 memcpy(pszDst, pchSrc, cchSrc);
388 pszDst += cchSrc;
389 cbDst -= cchSrc;
390 pchSrc += cchSrc;
391 cchSrc = 0;
392 break;
393 }
394 }
395
396 AssertReturn(cbDst > 0, VERR_BUFFER_OVERFLOW);
397 *pszDst = '\0';
398 return VINF_SUCCESS;
399}
400
401
402
403static int rtUriParse(const char *pszUri, PRTURIPARSED pParsed)
404{
405 /*
406 * Validate the input and clear the output.
407 */
408 AssertPtrReturn(pParsed, VERR_INVALID_POINTER);
409 RT_ZERO(*pParsed);
410 pParsed->uAuthorityPort = UINT32_MAX;
411
412 AssertPtrReturn(pszUri, VERR_INVALID_POINTER);
413
414 size_t const cchUri = strlen(pszUri);
415 if (RT_LIKELY(cchUri >= 3)) { /* likely */ }
416 else return cchUri ? VERR_URI_TOO_SHORT : VERR_URI_EMPTY;
417
418 /*
419 * Validating escaped text sequences is much simpler if we know that
420 * that the base URI string is valid. Also, we don't necessarily trust
421 * the developer calling us to remember to do this.
422 */
423 int rc = RTStrValidateEncoding(pszUri);
424 AssertRCReturn(rc, rc);
425
426 /*
427 * RFC-3986, section 3.1:
428 * scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
429 *
430 * The scheme ends with a ':', which we also skip here.
431 */
432 size_t off = 0;
433 char ch = pszUri[off++];
434 if (RT_LIKELY(RT_C_IS_ALPHA(ch))) { /* likely */ }
435 else return VERR_URI_INVALID_SCHEME;
436 for (;;)
437 {
438 ch = pszUri[off];
439 if (ch == ':')
440 break;
441 if (RT_LIKELY(RT_C_IS_ALNUM(ch) || ch == '.' || ch == '-' || ch == '+')) { /* likely */ }
442 else return VERR_URI_INVALID_SCHEME;
443 off++;
444 }
445 pParsed->cchScheme = off;
446
447 /* Require the scheme length to be at least two chars so we won't confuse
448 it with a path starting with a DOS drive letter specification. */
449 if (RT_LIKELY(off >= 2)) { /* likely */ }
450 else return VERR_URI_INVALID_SCHEME;
451
452 off++; /* (skip colon) */
453
454 /*
455 * Find the end of the path, we'll need this several times.
456 * Also, while we're potentially scanning the whole thing, check for '%'.
457 */
458 size_t const offHash = RTStrOffCharOrTerm(&pszUri[off], '#') + off;
459 size_t const offQuestionMark = RTStrOffCharOrTerm(&pszUri[off], '?') + off;
460
461 if (memchr(pszUri, '%', cchUri) != NULL)
462 pParsed->fFlags |= RTURIPARSED_F_CONTAINS_ESCAPED_CHARS;
463
464 /*
465 * RFC-3986, section 3.2:
466 * The authority component is preceeded by a double slash ("//")...
467 */
468 if ( pszUri[off] == '/'
469 && pszUri[off + 1] == '/')
470 {
471 off += 2;
472 pParsed->offAuthority = pParsed->offAuthorityUsername = pParsed->offAuthorityPassword = pParsed->offAuthorityHost = off;
473 pParsed->fFlags |= RTURIPARSED_F_HAS_AUTHORITY;
474
475 /*
476 * RFC-3986, section 3.2:
477 * ...and is terminated by the next slash ("/"), question mark ("?"),
478 * or number sign ("#") character, or by the end of the URI.
479 */
480 const char *pszAuthority = &pszUri[off];
481 size_t cchAuthority = RTStrOffCharOrTerm(pszAuthority, '/');
482 cchAuthority = RT_MIN(cchAuthority, offHash - off);
483 cchAuthority = RT_MIN(cchAuthority, offQuestionMark - off);
484 pParsed->cchAuthority = cchAuthority;
485
486 /* The Authority can be empty, like for: file:///usr/bin/grep */
487 if (cchAuthority > 0)
488 {
489 pParsed->cchAuthorityHost = cchAuthority;
490
491 /*
492 * If there is a userinfo part, it is ended by a '@'.
493 */
494 const char *pszAt = (const char *)memchr(pszAuthority, '@', cchAuthority);
495 if (pszAt)
496 {
497 size_t cchTmp = pszAt - pszAuthority;
498 pParsed->offAuthorityHost += cchTmp + 1;
499 pParsed->cchAuthorityHost -= cchTmp + 1;
500
501 /* If there is a password part, it's separated from the username with a colon. */
502 const char *pszColon = (const char *)memchr(pszAuthority, ':', cchTmp);
503 if (pszColon)
504 {
505 pParsed->cchAuthorityUsername = pszColon - pszAuthority;
506 pParsed->offAuthorityPassword = &pszColon[1] - pszUri;
507 pParsed->cchAuthorityPassword = pszAt - &pszColon[1];
508 }
509 else
510 {
511 pParsed->cchAuthorityUsername = cchTmp;
512 pParsed->offAuthorityPassword = off + cchTmp;
513 }
514 }
515
516 /*
517 * If there is a port part, its after the last colon in the host part.
518 */
519 const char *pszColon = (const char *)memrchr(&pszUri[pParsed->offAuthorityHost], ':', pParsed->cchAuthorityHost);
520 if (pszColon)
521 {
522 size_t cchTmp = &pszUri[pParsed->offAuthorityHost + pParsed->cchAuthorityHost] - &pszColon[1];
523 pParsed->cchAuthorityHost -= cchTmp + 1;
524 pParsed->fFlags |= RTURIPARSED_F_HAS_PORT;
525 if (cchTmp > 0)
526 {
527 pParsed->uAuthorityPort = 0;
528 while (cchTmp-- > 0)
529 {
530 ch = *++pszColon;
531 if ( RT_C_IS_DIGIT(ch)
532 && pParsed->uAuthorityPort < UINT32_MAX / UINT32_C(10))
533 {
534 pParsed->uAuthorityPort *= 10;
535 pParsed->uAuthorityPort += ch - '0';
536 }
537 else
538 return VERR_URI_INVALID_PORT_NUMBER;
539 }
540 }
541 }
542 }
543
544 /* Skip past the authority. */
545 off += cchAuthority;
546 }
547 else
548 pParsed->offAuthority = pParsed->offAuthorityUsername = pParsed->offAuthorityPassword = pParsed->offAuthorityHost = off;
549
550 /*
551 * RFC-3986, section 3.3: Path
552 * The path is terminated by the first question mark ("?")
553 * or number sign ("#") character, or by the end of the URI.
554 */
555 pParsed->offPath = off;
556 pParsed->cchPath = RT_MIN(offHash, offQuestionMark) - off;
557 off += pParsed->cchPath;
558
559 /*
560 * RFC-3986, section 3.4: Query
561 * The query component is indicated by the first question mark ("?")
562 * character and terminated by a number sign ("#") character or by the
563 * end of the URI.
564 */
565 if ( off == offQuestionMark
566 && off < cchUri)
567 {
568 Assert(pszUri[offQuestionMark] == '?');
569 pParsed->offQuery = ++off;
570 pParsed->cchQuery = offHash - off;
571 off = offHash;
572 }
573 else
574 {
575 Assert(!pszUri[offQuestionMark]);
576 pParsed->offQuery = off;
577 }
578
579 /*
580 * RFC-3986, section 3.5: Fragment
581 * A fragment identifier component is indicated by the presence of a
582 * number sign ("#") character and terminated by the end of the URI.
583 */
584 if ( off == offHash
585 && off < cchUri)
586 {
587 pParsed->offFragment = ++off;
588 pParsed->cchFragment = cchUri - off;
589 }
590 else
591 {
592 Assert(!pszUri[offHash]);
593 pParsed->offFragment = off;
594 }
595
596 /*
597 * If there are any escape sequences, validate them.
598 *
599 * This is reasonably simple as we already know that the string is valid UTF-8
600 * before they get decoded. Thus we only have to validate the escaped sequences.
601 */
602 if (pParsed->fFlags & RTURIPARSED_F_CONTAINS_ESCAPED_CHARS)
603 {
604 const char *pchSrc = (const char *)memchr(pszUri, '%', cchUri);
605 AssertReturn(pchSrc, VERR_INTERNAL_ERROR);
606 do
607 {
608 char szUtf8Seq[8];
609 unsigned cchUtf8Seq = 0;
610 unsigned cchNeeded = 0;
611 size_t cchLeft = &pszUri[cchUri] - pchSrc;
612 do
613 {
614 if (cchLeft >= 3)
615 {
616 char chHigh = pchSrc[1];
617 char chLow = pchSrc[2];
618 if ( RT_C_IS_XDIGIT(chHigh)
619 && RT_C_IS_XDIGIT(chLow))
620 {
621 uint8_t b = RT_C_IS_DIGIT(chHigh) ? chHigh - '0' : (chHigh & ~0x20) - 'A' + 10;
622 b <<= 4;
623 b |= RT_C_IS_DIGIT(chLow) ? chLow - '0' : (chLow & ~0x20) - 'A' + 10;
624
625 if (!(b & 0x80))
626 {
627 /* We don't want the string to be terminated prematurely. */
628 if (RT_LIKELY(b != 0)) { /* likely */ }
629 else return VERR_URI_ESCAPED_ZERO;
630
631 /* Check that we're not expecting more UTF-8 bytes. */
632 if (RT_LIKELY(cchNeeded == 0)) { /* likely */ }
633 else return VERR_URI_MISSING_UTF8_CONTINUATION_BYTE;
634 }
635 /* Are we waiting UTF-8 bytes? */
636 else if (cchNeeded > 0)
637 {
638 if (RT_LIKELY(!(b & 0x40))) { /* likely */ }
639 else return VERR_URI_INVALID_ESCAPED_UTF8_CONTINUATION_BYTE;
640
641 szUtf8Seq[cchUtf8Seq++] = (char)b;
642 if (--cchNeeded == 0)
643 {
644 szUtf8Seq[cchUtf8Seq] = '\0';
645 rc = RTStrValidateEncoding(szUtf8Seq);
646 if (RT_FAILURE(rc))
647 return VERR_URI_ESCAPED_CHARS_NOT_VALID_UTF8;
648 cchUtf8Seq = 0;
649 }
650 }
651 /* Start a new UTF-8 sequence. */
652 else
653 {
654 if ((b & 0xf8) == 0xf0)
655 cchNeeded = 3;
656 else if ((b & 0xf0) == 0xe0)
657 cchNeeded = 2;
658 else if ((b & 0xe0) == 0xc0)
659 cchNeeded = 1;
660 else
661 return VERR_URI_INVALID_ESCAPED_UTF8_LEAD_BYTE;
662 szUtf8Seq[0] = (char)b;
663 cchUtf8Seq = 1;
664 }
665 pchSrc += 3;
666 cchLeft -= 3;
667 }
668 else
669 return VERR_URI_INVALID_ESCAPE_SEQ;
670 }
671 else
672 return VERR_URI_INVALID_ESCAPE_SEQ;
673 } while (cchLeft > 0 && pchSrc[0] == '%');
674
675 /* Check that we're not expecting more UTF-8 bytes. */
676 if (RT_LIKELY(cchNeeded == 0)) { /* likely */ }
677 else return VERR_URI_MISSING_UTF8_CONTINUATION_BYTE;
678
679 /* next */
680 pchSrc = (const char *)memchr(pchSrc, '%', cchLeft);
681 } while (pchSrc);
682 }
683
684 pParsed->u32Magic = RTURIPARSED_MAGIC;
685 return VINF_SUCCESS;
686}
687
688
689RTDECL(int) RTUriParse(const char *pszUri, PRTURIPARSED pParsed)
690{
691 return rtUriParse(pszUri, pParsed);
692}
693
694
695RTDECL(char *) RTUriParsedScheme(const char *pszUri, PCRTURIPARSED pParsed)
696{
697 AssertPtrReturn(pszUri, NULL);
698 AssertPtrReturn(pParsed, NULL);
699 AssertReturn(pParsed->u32Magic == RTURIPARSED_MAGIC, NULL);
700 return RTStrDupN(pszUri, pParsed->cchScheme);
701}
702
703
704RTDECL(char *) RTUriParsedAuthority(const char *pszUri, PCRTURIPARSED pParsed)
705{
706 AssertPtrReturn(pszUri, NULL);
707 AssertPtrReturn(pParsed, NULL);
708 AssertReturn(pParsed->u32Magic == RTURIPARSED_MAGIC, NULL);
709 if (pParsed->cchAuthority || (pParsed->fFlags & RTURIPARSED_F_HAS_AUTHORITY))
710 return rtUriPercentDecodeN(&pszUri[pParsed->offAuthority], pParsed->cchAuthority);
711 return NULL;
712}
713
714
715RTDECL(char *) RTUriParsedAuthorityUsername(const char *pszUri, PCRTURIPARSED pParsed)
716{
717 AssertPtrReturn(pszUri, NULL);
718 AssertPtrReturn(pParsed, NULL);
719 AssertReturn(pParsed->u32Magic == RTURIPARSED_MAGIC, NULL);
720 if (pParsed->cchAuthorityUsername)
721 return rtUriPercentDecodeN(&pszUri[pParsed->offAuthorityUsername], pParsed->cchAuthorityUsername);
722 return NULL;
723}
724
725
726RTDECL(char *) RTUriParsedAuthorityPassword(const char *pszUri, PCRTURIPARSED pParsed)
727{
728 AssertPtrReturn(pszUri, NULL);
729 AssertPtrReturn(pParsed, NULL);
730 AssertReturn(pParsed->u32Magic == RTURIPARSED_MAGIC, NULL);
731 if (pParsed->cchAuthorityPassword)
732 return rtUriPercentDecodeN(&pszUri[pParsed->offAuthorityPassword], pParsed->cchAuthorityPassword);
733 return NULL;
734}
735
736
737RTDECL(char *) RTUriParsedAuthorityHost(const char *pszUri, PCRTURIPARSED pParsed)
738{
739 AssertPtrReturn(pszUri, NULL);
740 AssertPtrReturn(pParsed, NULL);
741 AssertReturn(pParsed->u32Magic == RTURIPARSED_MAGIC, NULL);
742 if (pParsed->cchAuthorityHost)
743 return rtUriPercentDecodeN(&pszUri[pParsed->offAuthorityHost], pParsed->cchAuthorityHost);
744 return NULL;
745}
746
747
748RTDECL(uint32_t) RTUriParsedAuthorityPort(const char *pszUri, PCRTURIPARSED pParsed)
749{
750 AssertPtrReturn(pszUri, UINT32_MAX);
751 AssertPtrReturn(pParsed, UINT32_MAX);
752 AssertReturn(pParsed->u32Magic == RTURIPARSED_MAGIC, UINT32_MAX);
753 return pParsed->uAuthorityPort;
754}
755
756
757RTDECL(char *) RTUriParsedPath(const char *pszUri, PCRTURIPARSED pParsed)
758{
759 AssertPtrReturn(pszUri, NULL);
760 AssertPtrReturn(pParsed, NULL);
761 AssertReturn(pParsed->u32Magic == RTURIPARSED_MAGIC, NULL);
762 if (pParsed->cchPath)
763 return rtUriPercentDecodeN(&pszUri[pParsed->offPath], pParsed->cchPath);
764 return NULL;
765}
766
767
768RTDECL(char *) RTUriParsedQuery(const char *pszUri, PCRTURIPARSED pParsed)
769{
770 AssertPtrReturn(pszUri, NULL);
771 AssertPtrReturn(pParsed, NULL);
772 AssertReturn(pParsed->u32Magic == RTURIPARSED_MAGIC, NULL);
773 if (pParsed->cchQuery)
774 return rtUriPercentDecodeN(&pszUri[pParsed->offQuery], pParsed->cchQuery);
775 return NULL;
776}
777
778
779RTDECL(char *) RTUriParsedFragment(const char *pszUri, PCRTURIPARSED pParsed)
780{
781 AssertPtrReturn(pszUri, NULL);
782 AssertPtrReturn(pParsed, NULL);
783 AssertReturn(pParsed->u32Magic == RTURIPARSED_MAGIC, NULL);
784 if (pParsed->cchFragment)
785 return rtUriPercentDecodeN(&pszUri[pParsed->offFragment], pParsed->cchFragment);
786 return NULL;
787}
788
789
790RTDECL(char *) RTUriCreate(const char *pszScheme, const char *pszAuthority, const char *pszPath, const char *pszQuery,
791 const char *pszFragment)
792{
793 if (!pszScheme) /* Scheme is minimum requirement */
794 return NULL;
795
796 char *pszResult = 0;
797 char *pszAuthority1 = 0;
798 char *pszPath1 = 0;
799 char *pszQuery1 = 0;
800 char *pszFragment1 = 0;
801
802 do
803 {
804 /* Create the percent encoded strings and calculate the necessary uri
805 * length. */
806 size_t cbSize = strlen(pszScheme) + 1 + 1; /* plus zero byte */
807 if (pszAuthority)
808 {
809 pszAuthority1 = rtUriPercentEncodeN(pszAuthority, RTSTR_MAX);
810 if (!pszAuthority1)
811 break;
812 cbSize += strlen(pszAuthority1) + 2;
813 }
814 if (pszPath)
815 {
816 pszPath1 = rtUriPercentEncodeN(pszPath, RTSTR_MAX);
817 if (!pszPath1)
818 break;
819 cbSize += strlen(pszPath1);
820 }
821 if (pszQuery)
822 {
823 pszQuery1 = rtUriPercentEncodeN(pszQuery, RTSTR_MAX);
824 if (!pszQuery1)
825 break;
826 cbSize += strlen(pszQuery1) + 1;
827 }
828 if (pszFragment)
829 {
830 pszFragment1 = rtUriPercentEncodeN(pszFragment, RTSTR_MAX);
831 if (!pszFragment1)
832 break;
833 cbSize += strlen(pszFragment1) + 1;
834 }
835
836 char *pszTmp = pszResult = (char *)RTStrAlloc(cbSize);
837 if (!pszResult)
838 break;
839 RT_BZERO(pszTmp, cbSize);
840
841 /* Compose the target uri string. */
842 RTStrCatP(&pszTmp, &cbSize, pszScheme);
843 RTStrCatP(&pszTmp, &cbSize, ":");
844 if (pszAuthority1)
845 {
846 RTStrCatP(&pszTmp, &cbSize, "//");
847 RTStrCatP(&pszTmp, &cbSize, pszAuthority1);
848 }
849 if (pszPath1)
850 {
851 RTStrCatP(&pszTmp, &cbSize, pszPath1);
852 }
853 if (pszQuery1)
854 {
855 RTStrCatP(&pszTmp, &cbSize, "?");
856 RTStrCatP(&pszTmp, &cbSize, pszQuery1);
857 }
858 if (pszFragment1)
859 {
860 RTStrCatP(&pszTmp, &cbSize, "#");
861 RTStrCatP(&pszTmp, &cbSize, pszFragment1);
862 }
863 } while (0);
864
865 /* Cleanup */
866 if (pszAuthority1)
867 RTStrFree(pszAuthority1);
868 if (pszPath1)
869 RTStrFree(pszPath1);
870 if (pszQuery1)
871 RTStrFree(pszQuery1);
872 if (pszFragment1)
873 RTStrFree(pszFragment1);
874
875 return pszResult;
876}
877
878
879RTDECL(bool) RTUriIsSchemeMatch(const char *pszUri, const char *pszScheme)
880{
881 AssertPtrReturn(pszUri, false);
882 size_t const cchScheme = strlen(pszScheme);
883 return RTStrNICmp(pszUri, pszScheme, cchScheme) == 0
884 && pszUri[cchScheme] == ':';
885}
886
887
888RTDECL(int) RTUriFileCreateEx(const char *pszPath, uint32_t fPathStyle, char **ppszUri, size_t cbUri, size_t *pcchUri)
889{
890 /*
891 * Validate and adjust input. (RTPathParse check pszPath out for us)
892 */
893 if (pcchUri)
894 {
895 AssertPtrReturn(pcchUri, VERR_INVALID_POINTER);
896 *pcchUri = ~(size_t)0;
897 }
898 AssertPtrReturn(ppszUri, VERR_INVALID_POINTER);
899 AssertReturn(!(fPathStyle & ~RTPATH_STR_F_STYLE_MASK) && fPathStyle != RTPATH_STR_F_STYLE_RESERVED, VERR_INVALID_FLAGS);
900 if (fPathStyle == RTPATH_STR_F_STYLE_HOST)
901 fPathStyle = RTPATH_STYLE;
902
903 /*
904 * Let the RTPath code parse the stuff (no reason to duplicate path parsing
905 * and get it slightly wrong here).
906 */
907 union
908 {
909 RTPATHPARSED ParsedPath;
910 uint8_t abPadding[sizeof(RTPATHPARSED)];
911 } u;
912 int rc = RTPathParse(pszPath, &u.ParsedPath, sizeof(u.ParsedPath), fPathStyle);
913 if (RT_SUCCESS(rc) || rc == VERR_BUFFER_OVERFLOW)
914 {
915 /* Skip leading slashes. */
916 if (u.ParsedPath.fProps & RTPATH_PROP_ROOT_SLASH)
917 {
918 if (fPathStyle == RTPATH_STR_F_STYLE_DOS)
919 while (pszPath[0] == '/' || pszPath[0] == '\\')
920 pszPath++;
921 else
922 while (pszPath[0] == '/')
923 pszPath++;
924 }
925 const size_t cchPath = strlen(pszPath);
926
927 /*
928 * Calculate the encoded length and figure destination buffering.
929 */
930 static const char s_szPrefix[] = "file:///";
931 size_t const cchPrefix = sizeof(s_szPrefix) - (u.ParsedPath.fProps & RTPATH_PROP_UNC ? 2 : 1);
932 size_t cchEncoded = rtUriCalcEncodedLength(pszPath, cchPath, fPathStyle != RTPATH_STR_F_STYLE_DOS);
933
934 if (pcchUri)
935 *pcchUri = cchEncoded;
936
937 char *pszDst;
938 char *pszFreeMe = NULL;
939 if (!cbUri || *ppszUri == NULL)
940 {
941 cbUri = RT_MAX(cbUri, cchPrefix + cchEncoded + 1);
942 *ppszUri = pszFreeMe = pszDst = RTStrAlloc(cbUri);
943 AssertReturn(pszDst, VERR_NO_STR_MEMORY);
944 }
945 else if (cchEncoded < cbUri)
946 pszDst = *ppszUri;
947 else
948 return VERR_BUFFER_OVERFLOW;
949
950 /*
951 * Construct the URI.
952 */
953 memcpy(pszDst, s_szPrefix, cchPrefix);
954 pszDst[cchPrefix] = '\0';
955 rc = rtUriEncodeIntoBuffer(pszPath, cchPath, fPathStyle != RTPATH_STR_F_STYLE_DOS, &pszDst[cchPrefix], cbUri - cchPrefix);
956 if (RT_SUCCESS(rc))
957 {
958 Assert(strlen(pszDst) == cbUri - 1);
959 if (fPathStyle == RTPATH_STR_F_STYLE_DOS)
960 RTPathChangeToUnixSlashes(pszDst, true /*fForce*/);
961 return VINF_SUCCESS;
962 }
963
964 AssertRC(rc); /* Impossible! rtUriCalcEncodedLength or something above is busted! */
965 if (pszFreeMe)
966 RTStrFree(pszFreeMe);
967 }
968 return rc;
969}
970
971
972RTDECL(char *) RTUriFileCreate(const char *pszPath)
973{
974 char *pszUri = NULL;
975 int rc = RTUriFileCreateEx(pszPath, RTPATH_STR_F_STYLE_HOST, &pszUri, 0 /*cbUri*/, NULL /*pcchUri*/);
976 if (RT_SUCCESS(rc))
977 return pszUri;
978 return NULL;
979}
980
981
982RTDECL(int) RTUriFilePathEx(const char *pszUri, uint32_t fPathStyle, char **ppszPath, size_t cbPath, size_t *pcchPath)
983{
984 /*
985 * Validate and adjust input.
986 */
987 if (pcchPath)
988 {
989 AssertPtrReturn(pcchPath, VERR_INVALID_POINTER);
990 *pcchPath = ~(size_t)0;
991 }
992 AssertPtrReturn(ppszPath, VERR_INVALID_POINTER);
993 AssertReturn(!(fPathStyle & ~RTPATH_STR_F_STYLE_MASK) && fPathStyle != RTPATH_STR_F_STYLE_RESERVED, VERR_INVALID_FLAGS);
994 if (fPathStyle == RTPATH_STR_F_STYLE_HOST)
995 fPathStyle = RTPATH_STYLE;
996 AssertPtrReturn(pszUri, VERR_INVALID_POINTER);
997
998 /*
999 * Check that this is a file URI.
1000 */
1001 if (RTStrNICmp(pszUri, RT_STR_TUPLE("file:")) == 0)
1002 { /* likely */ }
1003 else
1004 return VERR_URI_NOT_FILE_SCHEME;
1005
1006 /*
1007 * We may have a number of variations here, mostly thanks to
1008 * various windows software. First the canonical variations:
1009 * - file:///C:/Windows/System32/kernel32.dll
1010 * - file:///C|/Windows/System32/kernel32.dll
1011 * - file:///C:%5CWindows%5CSystem32%5Ckernel32.dll
1012 * - file://localhost/C:%5CWindows%5CSystem32%5Ckernel32.dll
1013 * - file://cifsserver.dev/systemshare%5CWindows%5CSystem32%5Ckernel32.dll
1014 * - file://cifsserver.dev:139/systemshare%5CWindows%5CSystem32%5Ckernel32.dll (not quite sure here, but whatever)
1015 *
1016 * Legacy variant without any slashes after the schema:
1017 * - file:C:/Windows/System32/kernel32.dll
1018 * - file:C|/Windows/System32%5Ckernel32.dll
1019 * - file:~/.bashrc
1020 * \--path-/
1021 *
1022 * Legacy variant with exactly one slashes after the schema:
1023 * - file:/C:/Windows/System32%5Ckernel32.dll
1024 * - file:/C|/Windows/System32/kernel32.dll
1025 * - file:/usr/bin/env
1026 * \---path---/
1027 *
1028 * Legacy variant with two slashes after the schema and an unescaped DOS path:
1029 * - file://C:/Windows/System32\kernel32.dll (**)
1030 * - file://C|/Windows/System32\kernel32.dll
1031 * \---path---------------------/
1032 * -- authority, with ':' as non-working port separator
1033 *
1034 * Legacy variant with exactly four slashes after the schema and an unescaped DOS path.
1035 * - file:////C:/Windows\System32\user32.dll
1036 *
1037 * Legacy variant with four or more slashes after the schema and an unescaped UNC path:
1038 * - file:////cifsserver.dev/systemshare/System32%\kernel32.dll
1039 * - file://///cifsserver.dev/systemshare/System32\kernel32.dll
1040 * \---path--------------------------------------------/
1041 *
1042 * The two unescaped variants shouldn't be handed to rtUriParse, which
1043 * is good as we cannot actually handle the one marked by (**). So, handle
1044 * those two special when parsing.
1045 */
1046 RTURIPARSED Parsed;
1047 int rc;
1048 size_t cSlashes = 0;
1049 while (pszUri[5 + cSlashes] == '/')
1050 cSlashes++;
1051 if ( (cSlashes == 2 || cSlashes == 4)
1052 && RT_C_IS_ALPHA(pszUri[5 + cSlashes])
1053 && (pszUri[5 + cSlashes + 1] == ':' || pszUri[5 + cSlashes + 1] == '|'))
1054 {
1055 RT_ZERO(Parsed); /* RTURIPARSED_F_CONTAINS_ESCAPED_CHARS is now clear. */
1056 Parsed.offPath = 5 + cSlashes;
1057 Parsed.cchPath = strlen(&pszUri[Parsed.offPath]);
1058 rc = RTStrValidateEncoding(&pszUri[Parsed.offPath]);
1059 }
1060 else if (cSlashes >= 4)
1061 {
1062 RT_ZERO(Parsed);
1063 Parsed.fFlags = cSlashes > 4 ? RTURIPARSED_F_CONTAINS_ESCAPED_CHARS : 0;
1064 Parsed.offPath = 5 + cSlashes - 2;
1065 Parsed.cchPath = strlen(&pszUri[Parsed.offPath]);
1066 rc = RTStrValidateEncoding(&pszUri[Parsed.offPath]);
1067 }
1068 else
1069 rc = rtUriParse(pszUri, &Parsed);
1070 if (RT_SUCCESS(rc))
1071 {
1072 /*
1073 * Ignore localhost as hostname (it's implicit).
1074 */
1075 static char const s_szLocalhost[] = "localhost";
1076 if ( Parsed.cchAuthorityHost == sizeof(s_szLocalhost) - 1U
1077 && RTStrNICmp(&pszUri[Parsed.offAuthorityHost], RT_STR_TUPLE(s_szLocalhost)) == 0)
1078 {
1079 Parsed.cchAuthorityHost = 0;
1080 Parsed.cchAuthority = 0;
1081 }
1082
1083 /*
1084 * Ignore leading path slash/separator if we detect a DOS drive letter
1085 * and we don't have a host name.
1086 */
1087 if ( Parsed.cchPath >= 3
1088 && Parsed.cchAuthorityHost == 0
1089 && pszUri[Parsed.offPath] == '/' /* Leading path slash/separator. */
1090 && ( pszUri[Parsed.offPath + 2] == ':' /* Colon after drive letter. */
1091 || pszUri[Parsed.offPath + 2] == '|') /* Colon alternative. */
1092 && RT_C_IS_ALPHA(pszUri[Parsed.offPath + 1]) ) /* Drive letter. */
1093 {
1094 Parsed.offPath++;
1095 Parsed.cchPath--;
1096 }
1097
1098 /*
1099 * Calculate the size of the encoded result.
1100 *
1101 * Since we're happily returning "C:/Windows/System32/kernel.dll"
1102 * style paths when the caller requested UNIX style paths, we will
1103 * return straight UNC paths too ("//cifsserver/share/dir/file").
1104 */
1105 size_t cchDecodedHost = 0;
1106 size_t cbResult;
1107 if (Parsed.fFlags & RTURIPARSED_F_CONTAINS_ESCAPED_CHARS)
1108 {
1109 cchDecodedHost = rtUriCalcDecodedLength(&pszUri[Parsed.offAuthorityHost], Parsed.cchAuthorityHost);
1110 cbResult = cchDecodedHost + rtUriCalcDecodedLength(&pszUri[Parsed.offPath], Parsed.cchPath) + 1;
1111 }
1112 else
1113 {
1114 cchDecodedHost = 0;
1115 cbResult = Parsed.cchAuthorityHost + Parsed.cchPath + 1;
1116 }
1117 if (pcchPath)
1118 *pcchPath = cbResult - 1;
1119 if (cbResult > 1)
1120 {
1121 /*
1122 * Prepare the necessary buffer space for the result.
1123 */
1124 char *pszDst;
1125 char *pszFreeMe = NULL;
1126 if (!cbPath || *ppszPath == NULL)
1127 {
1128 cbPath = RT_MAX(cbPath, cbResult);
1129 *ppszPath = pszFreeMe = pszDst = RTStrAlloc(cbPath);
1130 AssertReturn(pszDst, VERR_NO_STR_MEMORY);
1131 }
1132 else if (cbResult <= cbPath)
1133 pszDst = *ppszPath;
1134 else
1135 return VERR_BUFFER_OVERFLOW;
1136
1137 /*
1138 * Compose the result.
1139 */
1140 if (Parsed.fFlags & RTURIPARSED_F_CONTAINS_ESCAPED_CHARS)
1141 {
1142 rc = rtUriDecodeIntoBuffer(&pszUri[Parsed.offAuthorityHost],Parsed.cchAuthorityHost,
1143 pszDst, cchDecodedHost + 1);
1144 Assert(RT_SUCCESS(rc) && strlen(pszDst) == cchDecodedHost);
1145 if (RT_SUCCESS(rc))
1146 rc = rtUriDecodeIntoBuffer(&pszUri[Parsed.offPath], Parsed.cchPath,
1147 &pszDst[cchDecodedHost], cbResult - cchDecodedHost);
1148 Assert(RT_SUCCESS(rc) && strlen(pszDst) == cbResult - 1);
1149 }
1150 else
1151 {
1152 memcpy(pszDst, &pszUri[Parsed.offAuthorityHost], Parsed.cchAuthorityHost);
1153 memcpy(&pszDst[Parsed.cchAuthorityHost], &pszUri[Parsed.offPath], Parsed.cchPath);
1154 pszDst[cbResult - 1] = '\0';
1155 }
1156 if (RT_SUCCESS(rc))
1157 {
1158 /*
1159 * Convert colon DOS driver letter colon alternative.
1160 * We do this regardless of the desired path style.
1161 */
1162 if ( RT_C_IS_ALPHA(pszDst[0])
1163 && pszDst[1] == '|')
1164 pszDst[1] = ':';
1165
1166 /*
1167 * Fix slashes.
1168 */
1169 if (fPathStyle == RTPATH_STR_F_STYLE_DOS)
1170 RTPathChangeToDosSlashes(pszDst, true);
1171 else if (fPathStyle == RTPATH_STR_F_STYLE_UNIX)
1172 RTPathChangeToUnixSlashes(pszDst, true); /** @todo not quite sure how this actually makes sense... */
1173 else
1174 AssertFailed();
1175 return rc;
1176 }
1177
1178 /* bail out */
1179 RTStrFree(pszFreeMe);
1180 }
1181 else
1182 rc = VERR_PATH_ZERO_LENGTH;
1183 }
1184 return rc;
1185}
1186
1187
1188RTDECL(char *) RTUriFilePath(const char *pszUri)
1189{
1190 char *pszPath = NULL;
1191 int rc = RTUriFilePathEx(pszUri, RTPATH_STR_F_STYLE_HOST, &pszPath, 0 /*cbPath*/, NULL /*pcchPath*/);
1192 if (RT_SUCCESS(rc))
1193 return pszPath;
1194 return NULL;
1195}
1196
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette