VirtualBox

source: vbox/trunk/src/VBox/Runtime/common/misc/uri.cpp@ 74978

Last change on this file since 74978 was 74424, checked in by vboxsync, 6 years ago

IPRT/uri: Better handling of empty port specifiers. bugref:9249

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 39.6 KB
Line 
1/* $Id: uri.cpp 74424 2018-09-22 20:00:36Z vboxsync $ */
2/** @file
3 * IPRT - Uniform Resource Identifier handling.
4 */
5
6/*
7 * Copyright (C) 2011-2017 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 *
17 * The contents of this file may alternatively be used under the terms
18 * of the Common Development and Distribution License Version 1.0
19 * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
20 * VirtualBox OSE distribution, in which case the provisions of the
21 * CDDL are applicable instead of those of the GPL.
22 *
23 * You may elect to license modified versions of this file under the
24 * terms and conditions of either the GPL or the CDDL or both.
25 */
26
27
28/*********************************************************************************************************************************
29* Header Files *
30*********************************************************************************************************************************/
31#include <iprt/uri.h>
32
33#include <iprt/assert.h>
34#include <iprt/ctype.h>
35#include <iprt/path.h>
36#include <iprt/string.h>
37
38
39/*********************************************************************************************************************************
40* Defined Constants And Macros *
41*********************************************************************************************************************************/
42/** Internal magic value we use to check if a RTURIPARSED structure has made it thru RTUriParse. */
43#define RTURIPARSED_MAGIC UINT32_C(0x439e0745)
44
45
46/* General URI format:
47
48 foo://example.com:8042/over/there?name=ferret#nose
49 \_/ \______________/\_________/ \_________/ \__/
50 | | | | |
51 scheme authority path query fragment
52 | _____________________|__
53 / \ / \
54 urn:example:animal:ferret:nose
55*/
56
57
58/**
59 * The following defines characters which have to be % escaped:
60 * control = 00-1F
61 * space = ' '
62 * delims = '<' , '>' , '#' , '%' , '"'
63 * unwise = '{' , '}' , '|' , '\' , '^' , '[' , ']' , '`'
64 */
65#define URI_EXCLUDED(a) \
66 ( ((a) >= 0x0 && (a) <= 0x20) \
67 || ((a) >= 0x5B && (a) <= 0x5E) \
68 || ((a) >= 0x7B && (a) <= 0x7D) \
69 || (a) == '<' || (a) == '>' || (a) == '#' \
70 || (a) == '%' || (a) == '"' || (a) == '`' )
71
72static char *rtUriPercentEncodeN(const char *pszString, size_t cchMax)
73{
74 if (!pszString)
75 return NULL;
76
77 int rc = VINF_SUCCESS;
78
79 size_t cbLen = RT_MIN(strlen(pszString), cchMax);
80 /* The new string can be max 3 times in size of the original string. */
81 char *pszNew = RTStrAlloc(cbLen * 3 + 1);
82 if (!pszNew)
83 return NULL;
84
85 char *pszRes = NULL;
86 size_t iIn = 0;
87 size_t iOut = 0;
88 while (iIn < cbLen)
89 {
90 if (URI_EXCLUDED(pszString[iIn]))
91 {
92 char szNum[3] = { 0, 0, 0 };
93 RTStrFormatU8(&szNum[0], 3, pszString[iIn++], 16, 2, 2, RTSTR_F_CAPITAL | RTSTR_F_ZEROPAD);
94 pszNew[iOut++] = '%';
95 pszNew[iOut++] = szNum[0];
96 pszNew[iOut++] = szNum[1];
97 }
98 else
99 pszNew[iOut++] = pszString[iIn++];
100 }
101 if (RT_SUCCESS(rc))
102 {
103 pszNew[iOut] = '\0';
104 if (iOut != iIn)
105 {
106 /* If the source and target strings have different size, recreate
107 * the target string with the correct size. */
108 pszRes = RTStrDupN(pszNew, iOut);
109 RTStrFree(pszNew);
110 }
111 else
112 pszRes = pszNew;
113 }
114 else
115 RTStrFree(pszNew);
116
117 return pszRes;
118}
119
120
121/**
122 * Calculates the encoded string length.
123 *
124 * @returns Number of chars (excluding the terminator).
125 * @param pszString The string to encode.
126 * @param cchMax The maximum string length (e.g. RTSTR_MAX).
127 * @param fEncodeDosSlash Whether to encode DOS slashes or not.
128 */
129static size_t rtUriCalcEncodedLength(const char *pszString, size_t cchMax, bool fEncodeDosSlash)
130{
131 size_t cchEncoded = 0;
132 if (pszString)
133 {
134 size_t cchSrcLeft = RTStrNLen(pszString, cchMax);
135 while (cchSrcLeft-- > 0)
136 {
137 char const ch = *pszString++;
138 if (!URI_EXCLUDED(ch) || (ch == '\\' && !fEncodeDosSlash))
139 cchEncoded += 1;
140 else
141 cchEncoded += 3;
142 }
143 }
144 return cchEncoded;
145}
146
147
148/**
149 * Encodes an URI into a caller allocated buffer.
150 *
151 * @returns IPRT status code.
152 * @param pszString The string to encode.
153 * @param cchMax The maximum string length (e.g. RTSTR_MAX).
154 * @param fEncodeDosSlash Whether to encode DOS slashes or not.
155 * @param pszDst The destination buffer.
156 * @param cbDst The size of the destination buffer.
157 */
158static int rtUriEncodeIntoBuffer(const char *pszString, size_t cchMax, bool fEncodeDosSlash, char *pszDst, size_t cbDst)
159{
160 AssertReturn(pszString, VERR_INVALID_POINTER);
161 AssertPtrReturn(pszDst, VERR_INVALID_POINTER);
162
163 /*
164 * We do buffer size checking up front and every time we encode a special
165 * character. That's faster than checking for each char.
166 */
167 size_t cchSrcLeft = RTStrNLen(pszString, cchMax);
168 AssertMsgReturn(cbDst > cchSrcLeft, ("cbDst=%zu cchSrcLeft=%zu\n", cbDst, cchSrcLeft), VERR_BUFFER_OVERFLOW);
169 cbDst -= cchSrcLeft;
170
171 while (cchSrcLeft-- > 0)
172 {
173 char const ch = *pszString++;
174 if (!URI_EXCLUDED(ch) || (ch == '\\' && !fEncodeDosSlash))
175 *pszDst++ = ch;
176 else
177 {
178 AssertReturn(cbDst >= 3, VERR_BUFFER_OVERFLOW); /* 2 extra bytes + zero terminator. */
179 cbDst -= 2;
180
181 *pszDst++ = '%';
182 ssize_t cchTmp = RTStrFormatU8(pszDst, 3, (unsigned char)ch, 16, 2, 2, RTSTR_F_CAPITAL | RTSTR_F_ZEROPAD);
183 Assert(cchTmp == 2); NOREF(cchTmp);
184 pszDst += 2;
185 }
186 }
187
188 *pszDst = '\0';
189 return VINF_SUCCESS;
190}
191
192
193static char *rtUriPercentDecodeN(const char *pszString, size_t cchString)
194{
195 AssertPtrReturn(pszString, NULL);
196 AssertReturn(memchr(pszString, '\0', cchString) == NULL, NULL);
197
198 /*
199 * The new string can only get smaller, so use the input length as a
200 * staring buffer size.
201 */
202 char *pszDecoded = RTStrAlloc(cchString + 1);
203 if (pszDecoded)
204 {
205 /*
206 * Knowing that the pszString itself is valid UTF-8, we only have to
207 * validate the escape sequences.
208 */
209 size_t cchLeft = cchString;
210 char const *pchSrc = pszString;
211 char *pchDst = pszDecoded;
212 while (cchLeft > 0)
213 {
214 const char *pchPct = (const char *)memchr(pchSrc, '%', cchLeft);
215 if (pchPct)
216 {
217 size_t cchBefore = pchPct - pchSrc;
218 if (cchBefore)
219 {
220 memcpy(pchDst, pchSrc, cchBefore);
221 pchDst += cchBefore;
222 pchSrc += cchBefore;
223 cchLeft -= cchBefore;
224 }
225
226 char chHigh, chLow;
227 if ( cchLeft >= 3
228 && RT_C_IS_XDIGIT(chHigh = pchSrc[1])
229 && RT_C_IS_XDIGIT(chLow = pchSrc[2]))
230 {
231 uint8_t b = RT_C_IS_DIGIT(chHigh) ? chHigh - '0' : (chHigh & ~0x20) - 'A' + 10;
232 b <<= 4;
233 b |= RT_C_IS_DIGIT(chLow) ? chLow - '0' : (chLow & ~0x20) - 'A' + 10;
234 *pchDst++ = (char)b;
235 pchSrc += 3;
236 cchLeft -= 3;
237 }
238 else
239 {
240 AssertFailed();
241 *pchDst++ = *pchSrc++;
242 cchLeft--;
243 }
244 }
245 else
246 {
247 memcpy(pchDst, pchSrc, cchLeft);
248 pchDst += cchLeft;
249 pchSrc += cchLeft;
250 cchLeft = 0;
251 break;
252 }
253 }
254
255 *pchDst = '\0';
256
257 /*
258 * If we've got lof space room in the result string, reallocate it.
259 */
260 size_t cchDecoded = pchDst - pszDecoded;
261 Assert(cchDecoded <= cchString);
262 if (cchString - cchDecoded > 64)
263 RTStrRealloc(&pszDecoded, cchDecoded + 1);
264 }
265 return pszDecoded;
266}
267
268
269/**
270 * Calculates the decoded string length.
271 *
272 * @returns Number of chars (excluding the terminator).
273 * @param pszString The string to decode.
274 * @param cchMax The maximum string length (e.g. RTSTR_MAX).
275 */
276static size_t rtUriCalcDecodedLength(const char *pszString, size_t cchMax)
277{
278 size_t cchDecoded;
279 if (pszString)
280 {
281 size_t cchSrcLeft = cchDecoded = RTStrNLen(pszString, cchMax);
282 while (cchSrcLeft-- > 0)
283 {
284 char const ch = *pszString++;
285 if (ch != '%')
286 { /* typical */}
287 else if ( cchSrcLeft >= 2
288 && RT_C_IS_XDIGIT(pszString[0])
289 && RT_C_IS_XDIGIT(pszString[1]))
290 {
291 cchDecoded -= 2;
292 pszString += 2;
293 cchSrcLeft -= 2;
294 }
295 }
296 }
297 else
298 cchDecoded = 0;
299 return cchDecoded;
300}
301
302
303/**
304 * Decodes a string into a buffer.
305 *
306 * @returns IPRT status code.
307 * @param pchSrc The source string.
308 * @param cchSrc The max number of bytes to decode in the source string.
309 * @param pszDst The destination buffer.
310 * @param cbDst The size of the buffer (including terminator).
311 */
312static int rtUriDecodeIntoBuffer(const char *pchSrc, size_t cchSrc, char *pszDst, size_t cbDst)
313{
314 AssertPtrReturn(pchSrc, VERR_INVALID_POINTER);
315 AssertPtrReturn(pszDst, VERR_INVALID_POINTER);
316
317 /*
318 * Knowing that the pszString itself is valid UTF-8, we only have to
319 * validate the escape sequences.
320 */
321 cchSrc = RTStrNLen(pchSrc, cchSrc);
322 while (cchSrc > 0)
323 {
324 const char *pchPct = (const char *)memchr(pchSrc, '%', cchSrc);
325 if (pchPct)
326 {
327 size_t cchBefore = pchPct - pchSrc;
328 AssertReturn(cchBefore + 1 < cbDst, VERR_BUFFER_OVERFLOW);
329 if (cchBefore)
330 {
331 memcpy(pszDst, pchSrc, cchBefore);
332 pszDst += cchBefore;
333 cbDst -= cchBefore;
334 pchSrc += cchBefore;
335 cchSrc -= cchBefore;
336 }
337
338 char chHigh, chLow;
339 if ( cchSrc >= 3
340 && RT_C_IS_XDIGIT(chHigh = pchSrc[1])
341 && RT_C_IS_XDIGIT(chLow = pchSrc[2]))
342 {
343 uint8_t b = RT_C_IS_DIGIT(chHigh) ? chHigh - '0' : (chHigh & ~0x20) - 'A' + 10;
344 b <<= 4;
345 b |= RT_C_IS_DIGIT(chLow) ? chLow - '0' : (chLow & ~0x20) - 'A' + 10;
346 *pszDst++ = (char)b;
347 pchSrc += 3;
348 cchSrc -= 3;
349 }
350 else
351 {
352 AssertFailed();
353 *pszDst++ = *pchSrc++;
354 cchSrc--;
355 }
356 cbDst -= 1;
357 }
358 else
359 {
360 AssertReturn(cchSrc < cbDst, VERR_BUFFER_OVERFLOW);
361 memcpy(pszDst, pchSrc, cchSrc);
362 pszDst += cchSrc;
363 cbDst -= cchSrc;
364 pchSrc += cchSrc;
365 cchSrc = 0;
366 break;
367 }
368 }
369
370 AssertReturn(cbDst > 0, VERR_BUFFER_OVERFLOW);
371 *pszDst = '\0';
372 return VINF_SUCCESS;
373}
374
375
376
377static int rtUriParse(const char *pszUri, PRTURIPARSED pParsed)
378{
379 /*
380 * Validate the input and clear the output.
381 */
382 AssertPtrReturn(pParsed, VERR_INVALID_POINTER);
383 RT_ZERO(*pParsed);
384 pParsed->uAuthorityPort = UINT32_MAX;
385
386 AssertPtrReturn(pszUri, VERR_INVALID_POINTER);
387
388 size_t const cchUri = strlen(pszUri);
389 if (RT_LIKELY(cchUri >= 3)) { /* likely */ }
390 else return cchUri ? VERR_URI_TOO_SHORT : VERR_URI_EMPTY;
391
392 /*
393 * Validating escaped text sequences is much simpler if we know that
394 * that the base URI string is valid. Also, we don't necessarily trust
395 * the developer calling us to remember to do this.
396 */
397 int rc = RTStrValidateEncoding(pszUri);
398 AssertRCReturn(rc, rc);
399
400 /*
401 * RFC-3986, section 3.1:
402 * scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
403 *
404 * The scheme ends with a ':', which we also skip here.
405 */
406 size_t off = 0;
407 char ch = pszUri[off++];
408 if (RT_LIKELY(RT_C_IS_ALPHA(ch))) { /* likely */ }
409 else return VERR_URI_INVALID_SCHEME;
410 for (;;)
411 {
412 ch = pszUri[off];
413 if (ch == ':')
414 break;
415 if (RT_LIKELY(RT_C_IS_ALNUM(ch) || ch == '.' || ch == '-' || ch == '+')) { /* likely */ }
416 else return VERR_URI_INVALID_SCHEME;
417 off++;
418 }
419 pParsed->cchScheme = off;
420
421 /* Require the scheme length to be at least two chars so we won't confuse
422 it with a path starting with a DOS drive letter specification. */
423 if (RT_LIKELY(off >= 2)) { /* likely */ }
424 else return VERR_URI_INVALID_SCHEME;
425
426 off++; /* (skip colon) */
427
428 /*
429 * Find the end of the path, we'll need this several times.
430 * Also, while we're potentially scanning the whole thing, check for '%'.
431 */
432 size_t const offHash = RTStrOffCharOrTerm(&pszUri[off], '#') + off;
433 size_t const offQuestionMark = RTStrOffCharOrTerm(&pszUri[off], '?') + off;
434
435 if (memchr(pszUri, '%', cchUri) != NULL)
436 pParsed->fFlags |= RTURIPARSED_F_CONTAINS_ESCAPED_CHARS;
437
438 /*
439 * RFC-3986, section 3.2:
440 * The authority component is preceeded by a double slash ("//")...
441 */
442 if ( pszUri[off] == '/'
443 && pszUri[off + 1] == '/')
444 {
445 off += 2;
446 pParsed->offAuthority = pParsed->offAuthorityUsername = pParsed->offAuthorityPassword = pParsed->offAuthorityHost = off;
447 pParsed->fFlags |= RTURIPARSED_F_HAS_AUTHORITY;
448
449 /*
450 * RFC-3986, section 3.2:
451 * ...and is terminated by the next slash ("/"), question mark ("?"),
452 * or number sign ("#") character, or by the end of the URI.
453 */
454 const char *pszAuthority = &pszUri[off];
455 size_t cchAuthority = RTStrOffCharOrTerm(pszAuthority, '/');
456 cchAuthority = RT_MIN(cchAuthority, offHash - off);
457 cchAuthority = RT_MIN(cchAuthority, offQuestionMark - off);
458 pParsed->cchAuthority = cchAuthority;
459
460 /* The Authority can be empty, like for: file:///usr/bin/grep */
461 if (cchAuthority > 0)
462 {
463 pParsed->cchAuthorityHost = cchAuthority;
464
465 /*
466 * If there is a userinfo part, it is ended by a '@'.
467 */
468 const char *pszAt = (const char *)memchr(pszAuthority, '@', cchAuthority);
469 if (pszAt)
470 {
471 size_t cchTmp = pszAt - pszAuthority;
472 pParsed->offAuthorityHost += cchTmp + 1;
473 pParsed->cchAuthorityHost -= cchTmp + 1;
474
475 /* If there is a password part, it's separated from the username with a colon. */
476 const char *pszColon = (const char *)memchr(pszAuthority, ':', cchTmp);
477 if (pszColon)
478 {
479 pParsed->cchAuthorityUsername = pszColon - pszAuthority;
480 pParsed->offAuthorityPassword = &pszColon[1] - pszUri;
481 pParsed->cchAuthorityPassword = pszAt - &pszColon[1];
482 }
483 else
484 {
485 pParsed->cchAuthorityUsername = cchTmp;
486 pParsed->offAuthorityPassword = off + cchTmp;
487 }
488 }
489
490 /*
491 * If there is a port part, its after the last colon in the host part.
492 */
493 const char *pszColon = (const char *)memrchr(&pszUri[pParsed->offAuthorityHost], ':', pParsed->cchAuthorityHost);
494 if (pszColon)
495 {
496 size_t cchTmp = &pszUri[pParsed->offAuthorityHost + pParsed->cchAuthorityHost] - &pszColon[1];
497 pParsed->cchAuthorityHost -= cchTmp + 1;
498 pParsed->fFlags |= RTURIPARSED_F_HAS_PORT;
499 if (cchTmp > 0)
500 {
501 pParsed->uAuthorityPort = 0;
502 while (cchTmp-- > 0)
503 {
504 ch = *++pszColon;
505 if ( RT_C_IS_DIGIT(ch)
506 && pParsed->uAuthorityPort < UINT32_MAX / UINT32_C(10))
507 {
508 pParsed->uAuthorityPort *= 10;
509 pParsed->uAuthorityPort += ch - '0';
510 }
511 else
512 return VERR_URI_INVALID_PORT_NUMBER;
513 }
514 }
515 }
516 }
517
518 /* Skip past the authority. */
519 off += cchAuthority;
520 }
521 else
522 pParsed->offAuthority = pParsed->offAuthorityUsername = pParsed->offAuthorityPassword = pParsed->offAuthorityHost = off;
523
524 /*
525 * RFC-3986, section 3.3: Path
526 * The path is terminated by the first question mark ("?")
527 * or number sign ("#") character, or by the end of the URI.
528 */
529 pParsed->offPath = off;
530 pParsed->cchPath = RT_MIN(offHash, offQuestionMark) - off;
531 off += pParsed->cchPath;
532
533 /*
534 * RFC-3986, section 3.4: Query
535 * The query component is indicated by the first question mark ("?")
536 * character and terminated by a number sign ("#") character or by the
537 * end of the URI.
538 */
539 if ( off == offQuestionMark
540 && off < cchUri)
541 {
542 Assert(pszUri[offQuestionMark] == '?');
543 pParsed->offQuery = ++off;
544 pParsed->cchQuery = offHash - off;
545 off = offHash;
546 }
547 else
548 {
549 Assert(!pszUri[offQuestionMark]);
550 pParsed->offQuery = off;
551 }
552
553 /*
554 * RFC-3986, section 3.5: Fragment
555 * A fragment identifier component is indicated by the presence of a
556 * number sign ("#") character and terminated by the end of the URI.
557 */
558 if ( off == offHash
559 && off < cchUri)
560 {
561 pParsed->offFragment = ++off;
562 pParsed->cchFragment = cchUri - off;
563 }
564 else
565 {
566 Assert(!pszUri[offHash]);
567 pParsed->offFragment = off;
568 }
569
570 /*
571 * If there are any escape sequences, validate them.
572 *
573 * This is reasonably simple as we already know that the string is valid UTF-8
574 * before they get decoded. Thus we only have to validate the escaped sequences.
575 */
576 if (pParsed->fFlags & RTURIPARSED_F_CONTAINS_ESCAPED_CHARS)
577 {
578 const char *pchSrc = (const char *)memchr(pszUri, '%', cchUri);
579 AssertReturn(pchSrc, VERR_INTERNAL_ERROR);
580 do
581 {
582 char szUtf8Seq[8];
583 unsigned cchUtf8Seq = 0;
584 unsigned cchNeeded = 0;
585 size_t cchLeft = &pszUri[cchUri] - pchSrc;
586 do
587 {
588 if (cchLeft >= 3)
589 {
590 char chHigh = pchSrc[1];
591 char chLow = pchSrc[2];
592 if ( RT_C_IS_XDIGIT(chHigh)
593 && RT_C_IS_XDIGIT(chLow))
594 {
595 uint8_t b = RT_C_IS_DIGIT(chHigh) ? chHigh - '0' : (chHigh & ~0x20) - 'A' + 10;
596 b <<= 4;
597 b |= RT_C_IS_DIGIT(chLow) ? chLow - '0' : (chLow & ~0x20) - 'A' + 10;
598
599 if (!(b & 0x80))
600 {
601 /* We don't want the string to be terminated prematurely. */
602 if (RT_LIKELY(b != 0)) { /* likely */ }
603 else return VERR_URI_ESCAPED_ZERO;
604
605 /* Check that we're not expecting more UTF-8 bytes. */
606 if (RT_LIKELY(cchNeeded == 0)) { /* likely */ }
607 else return VERR_URI_MISSING_UTF8_CONTINUATION_BYTE;
608 }
609 /* Are we waiting UTF-8 bytes? */
610 else if (cchNeeded > 0)
611 {
612 if (RT_LIKELY(!(b & 0x40))) { /* likely */ }
613 else return VERR_URI_INVALID_ESCAPED_UTF8_CONTINUATION_BYTE;
614
615 szUtf8Seq[cchUtf8Seq++] = (char)b;
616 if (--cchNeeded == 0)
617 {
618 szUtf8Seq[cchUtf8Seq] = '\0';
619 rc = RTStrValidateEncoding(szUtf8Seq);
620 if (RT_FAILURE(rc))
621 return VERR_URI_ESCAPED_CHARS_NOT_VALID_UTF8;
622 cchUtf8Seq = 0;
623 }
624 }
625 /* Start a new UTF-8 sequence. */
626 else
627 {
628 if ((b & 0xf8) == 0xf0)
629 cchNeeded = 3;
630 else if ((b & 0xf0) == 0xe0)
631 cchNeeded = 2;
632 else if ((b & 0xe0) == 0xc0)
633 cchNeeded = 1;
634 else
635 return VERR_URI_INVALID_ESCAPED_UTF8_LEAD_BYTE;
636 szUtf8Seq[0] = (char)b;
637 cchUtf8Seq = 1;
638 }
639 pchSrc += 3;
640 cchLeft -= 3;
641 }
642 else
643 return VERR_URI_INVALID_ESCAPE_SEQ;
644 }
645 else
646 return VERR_URI_INVALID_ESCAPE_SEQ;
647 } while (cchLeft > 0 && pchSrc[0] == '%');
648
649 /* Check that we're not expecting more UTF-8 bytes. */
650 if (RT_LIKELY(cchNeeded == 0)) { /* likely */ }
651 else return VERR_URI_MISSING_UTF8_CONTINUATION_BYTE;
652
653 /* next */
654 pchSrc = (const char *)memchr(pchSrc, '%', cchLeft);
655 } while (pchSrc);
656 }
657
658 pParsed->u32Magic = RTURIPARSED_MAGIC;
659 return VINF_SUCCESS;
660}
661
662
663RTDECL(int) RTUriParse(const char *pszUri, PRTURIPARSED pParsed)
664{
665 return rtUriParse(pszUri, pParsed);
666}
667
668
669RTDECL(char *) RTUriParsedScheme(const char *pszUri, PCRTURIPARSED pParsed)
670{
671 AssertPtrReturn(pszUri, NULL);
672 AssertPtrReturn(pParsed, NULL);
673 AssertReturn(pParsed->u32Magic == RTURIPARSED_MAGIC, NULL);
674 return RTStrDupN(pszUri, pParsed->cchScheme);
675}
676
677
678RTDECL(char *) RTUriParsedAuthority(const char *pszUri, PCRTURIPARSED pParsed)
679{
680 AssertPtrReturn(pszUri, NULL);
681 AssertPtrReturn(pParsed, NULL);
682 AssertReturn(pParsed->u32Magic == RTURIPARSED_MAGIC, NULL);
683 if (pParsed->cchAuthority || (pParsed->fFlags & RTURIPARSED_F_HAS_AUTHORITY))
684 return rtUriPercentDecodeN(&pszUri[pParsed->offAuthority], pParsed->cchAuthority);
685 return NULL;
686}
687
688
689RTDECL(char *) RTUriParsedAuthorityUsername(const char *pszUri, PCRTURIPARSED pParsed)
690{
691 AssertPtrReturn(pszUri, NULL);
692 AssertPtrReturn(pParsed, NULL);
693 AssertReturn(pParsed->u32Magic == RTURIPARSED_MAGIC, NULL);
694 if (pParsed->cchAuthorityUsername)
695 return rtUriPercentDecodeN(&pszUri[pParsed->offAuthorityUsername], pParsed->cchAuthorityUsername);
696 return NULL;
697}
698
699
700RTDECL(char *) RTUriParsedAuthorityPassword(const char *pszUri, PCRTURIPARSED pParsed)
701{
702 AssertPtrReturn(pszUri, NULL);
703 AssertPtrReturn(pParsed, NULL);
704 AssertReturn(pParsed->u32Magic == RTURIPARSED_MAGIC, NULL);
705 if (pParsed->cchAuthorityPassword)
706 return rtUriPercentDecodeN(&pszUri[pParsed->offAuthorityPassword], pParsed->cchAuthorityPassword);
707 return NULL;
708}
709
710
711RTDECL(char *) RTUriParsedAuthorityHost(const char *pszUri, PCRTURIPARSED pParsed)
712{
713 AssertPtrReturn(pszUri, NULL);
714 AssertPtrReturn(pParsed, NULL);
715 AssertReturn(pParsed->u32Magic == RTURIPARSED_MAGIC, NULL);
716 if (pParsed->cchAuthorityHost)
717 return rtUriPercentDecodeN(&pszUri[pParsed->offAuthorityHost], pParsed->cchAuthorityHost);
718 return NULL;
719}
720
721
722RTDECL(uint32_t) RTUriParsedAuthorityPort(const char *pszUri, PCRTURIPARSED pParsed)
723{
724 AssertPtrReturn(pszUri, UINT32_MAX);
725 AssertPtrReturn(pParsed, UINT32_MAX);
726 AssertReturn(pParsed->u32Magic == RTURIPARSED_MAGIC, UINT32_MAX);
727 return pParsed->uAuthorityPort;
728}
729
730
731RTDECL(char *) RTUriParsedPath(const char *pszUri, PCRTURIPARSED pParsed)
732{
733 AssertPtrReturn(pszUri, NULL);
734 AssertPtrReturn(pParsed, NULL);
735 AssertReturn(pParsed->u32Magic == RTURIPARSED_MAGIC, NULL);
736 if (pParsed->cchPath)
737 return rtUriPercentDecodeN(&pszUri[pParsed->offPath], pParsed->cchPath);
738 return NULL;
739}
740
741
742RTDECL(char *) RTUriParsedQuery(const char *pszUri, PCRTURIPARSED pParsed)
743{
744 AssertPtrReturn(pszUri, NULL);
745 AssertPtrReturn(pParsed, NULL);
746 AssertReturn(pParsed->u32Magic == RTURIPARSED_MAGIC, NULL);
747 if (pParsed->cchQuery)
748 return rtUriPercentDecodeN(&pszUri[pParsed->offQuery], pParsed->cchQuery);
749 return NULL;
750}
751
752
753RTDECL(char *) RTUriParsedFragment(const char *pszUri, PCRTURIPARSED pParsed)
754{
755 AssertPtrReturn(pszUri, NULL);
756 AssertPtrReturn(pParsed, NULL);
757 AssertReturn(pParsed->u32Magic == RTURIPARSED_MAGIC, NULL);
758 if (pParsed->cchFragment)
759 return rtUriPercentDecodeN(&pszUri[pParsed->offFragment], pParsed->cchFragment);
760 return NULL;
761}
762
763
764RTDECL(char *) RTUriCreate(const char *pszScheme, const char *pszAuthority, const char *pszPath, const char *pszQuery,
765 const char *pszFragment)
766{
767 if (!pszScheme) /* Scheme is minimum requirement */
768 return NULL;
769
770 char *pszResult = 0;
771 char *pszAuthority1 = 0;
772 char *pszPath1 = 0;
773 char *pszQuery1 = 0;
774 char *pszFragment1 = 0;
775
776 do
777 {
778 /* Create the percent encoded strings and calculate the necessary uri
779 * length. */
780 size_t cbSize = strlen(pszScheme) + 1 + 1; /* plus zero byte */
781 if (pszAuthority)
782 {
783 pszAuthority1 = rtUriPercentEncodeN(pszAuthority, RTSTR_MAX);
784 if (!pszAuthority1)
785 break;
786 cbSize += strlen(pszAuthority1) + 2;
787 }
788 if (pszPath)
789 {
790 pszPath1 = rtUriPercentEncodeN(pszPath, RTSTR_MAX);
791 if (!pszPath1)
792 break;
793 cbSize += strlen(pszPath1);
794 }
795 if (pszQuery)
796 {
797 pszQuery1 = rtUriPercentEncodeN(pszQuery, RTSTR_MAX);
798 if (!pszQuery1)
799 break;
800 cbSize += strlen(pszQuery1) + 1;
801 }
802 if (pszFragment)
803 {
804 pszFragment1 = rtUriPercentEncodeN(pszFragment, RTSTR_MAX);
805 if (!pszFragment1)
806 break;
807 cbSize += strlen(pszFragment1) + 1;
808 }
809
810 char *pszTmp = pszResult = (char *)RTStrAlloc(cbSize);
811 if (!pszResult)
812 break;
813 RT_BZERO(pszTmp, cbSize);
814
815 /* Compose the target uri string. */
816 RTStrCatP(&pszTmp, &cbSize, pszScheme);
817 RTStrCatP(&pszTmp, &cbSize, ":");
818 if (pszAuthority1)
819 {
820 RTStrCatP(&pszTmp, &cbSize, "//");
821 RTStrCatP(&pszTmp, &cbSize, pszAuthority1);
822 }
823 if (pszPath1)
824 {
825 RTStrCatP(&pszTmp, &cbSize, pszPath1);
826 }
827 if (pszQuery1)
828 {
829 RTStrCatP(&pszTmp, &cbSize, "?");
830 RTStrCatP(&pszTmp, &cbSize, pszQuery1);
831 }
832 if (pszFragment1)
833 {
834 RTStrCatP(&pszTmp, &cbSize, "#");
835 RTStrCatP(&pszTmp, &cbSize, pszFragment1);
836 }
837 } while (0);
838
839 /* Cleanup */
840 if (pszAuthority1)
841 RTStrFree(pszAuthority1);
842 if (pszPath1)
843 RTStrFree(pszPath1);
844 if (pszQuery1)
845 RTStrFree(pszQuery1);
846 if (pszFragment1)
847 RTStrFree(pszFragment1);
848
849 return pszResult;
850}
851
852
853RTDECL(bool) RTUriIsSchemeMatch(const char *pszUri, const char *pszScheme)
854{
855 AssertPtrReturn(pszUri, false);
856 size_t const cchScheme = strlen(pszScheme);
857 return RTStrNICmp(pszUri, pszScheme, cchScheme) == 0
858 && pszUri[cchScheme] == ':';
859}
860
861
862RTDECL(int) RTUriFileCreateEx(const char *pszPath, uint32_t fPathStyle, char **ppszUri, size_t cbUri, size_t *pcchUri)
863{
864 /*
865 * Validate and adjust input. (RTPathParse check pszPath out for us)
866 */
867 if (pcchUri)
868 {
869 AssertPtrReturn(pcchUri, VERR_INVALID_POINTER);
870 *pcchUri = ~(size_t)0;
871 }
872 AssertPtrReturn(ppszUri, VERR_INVALID_POINTER);
873 AssertReturn(!(fPathStyle & ~RTPATH_STR_F_STYLE_MASK) && fPathStyle != RTPATH_STR_F_STYLE_RESERVED, VERR_INVALID_FLAGS);
874 if (fPathStyle == RTPATH_STR_F_STYLE_HOST)
875 fPathStyle = RTPATH_STYLE;
876
877 /*
878 * Let the RTPath code parse the stuff (no reason to duplicate path parsing
879 * and get it slightly wrong here).
880 */
881 RTPATHPARSED ParsedPath;
882 int rc = RTPathParse(pszPath, &ParsedPath, sizeof(ParsedPath), fPathStyle);
883 if (RT_SUCCESS(rc) || rc == VERR_BUFFER_OVERFLOW)
884 {
885 /* Skip leading slashes. */
886 if (ParsedPath.fProps & RTPATH_PROP_ROOT_SLASH)
887 {
888 if (fPathStyle == RTPATH_STR_F_STYLE_DOS)
889 while (pszPath[0] == '/' || pszPath[0] == '\\')
890 pszPath++;
891 else
892 while (pszPath[0] == '/')
893 pszPath++;
894 }
895 const size_t cchPath = strlen(pszPath);
896
897 /*
898 * Calculate the encoded length and figure destination buffering.
899 */
900 static const char s_szPrefix[] = "file:///";
901 size_t const cchPrefix = sizeof(s_szPrefix) - (ParsedPath.fProps & RTPATH_PROP_UNC ? 2 : 1);
902 size_t cchEncoded = rtUriCalcEncodedLength(pszPath, cchPath, fPathStyle != RTPATH_STR_F_STYLE_DOS);
903
904 if (pcchUri)
905 *pcchUri = cchEncoded;
906
907 char *pszDst;
908 char *pszFreeMe = NULL;
909 if (!cbUri || *ppszUri == NULL)
910 {
911 cbUri = RT_MAX(cbUri, cchPrefix + cchEncoded + 1);
912 *ppszUri = pszFreeMe = pszDst = RTStrAlloc(cbUri);
913 AssertReturn(pszDst, VERR_NO_STR_MEMORY);
914 }
915 else if (cchEncoded < cbUri)
916 pszDst = *ppszUri;
917 else
918 return VERR_BUFFER_OVERFLOW;
919
920 /*
921 * Construct the URI.
922 */
923 memcpy(pszDst, s_szPrefix, cchPrefix);
924 pszDst[cchPrefix] = '\0';
925 rc = rtUriEncodeIntoBuffer(pszPath, cchPath, fPathStyle != RTPATH_STR_F_STYLE_DOS, &pszDst[cchPrefix], cbUri - cchPrefix);
926 if (RT_SUCCESS(rc))
927 {
928 Assert(strlen(pszDst) == cbUri - 1);
929 if (fPathStyle == RTPATH_STR_F_STYLE_DOS)
930 RTPathChangeToUnixSlashes(pszDst, true /*fForce*/);
931 return VINF_SUCCESS;
932 }
933
934 AssertRC(rc); /* Impossible! rtUriCalcEncodedLength or something above is busted! */
935 if (pszFreeMe)
936 RTStrFree(pszFreeMe);
937 }
938 return rc;
939}
940
941
942RTDECL(char *) RTUriFileCreate(const char *pszPath)
943{
944 char *pszUri = NULL;
945 int rc = RTUriFileCreateEx(pszPath, RTPATH_STR_F_STYLE_HOST, &pszUri, 0 /*cbUri*/, NULL /*pcchUri*/);
946 if (RT_SUCCESS(rc))
947 return pszUri;
948 return NULL;
949}
950
951
952RTDECL(int) RTUriFilePathEx(const char *pszUri, uint32_t fPathStyle, char **ppszPath, size_t cbPath, size_t *pcchPath)
953{
954 /*
955 * Validate and adjust input.
956 */
957 if (pcchPath)
958 {
959 AssertPtrReturn(pcchPath, VERR_INVALID_POINTER);
960 *pcchPath = ~(size_t)0;
961 }
962 AssertPtrReturn(ppszPath, VERR_INVALID_POINTER);
963 AssertReturn(!(fPathStyle & ~RTPATH_STR_F_STYLE_MASK) && fPathStyle != RTPATH_STR_F_STYLE_RESERVED, VERR_INVALID_FLAGS);
964 if (fPathStyle == RTPATH_STR_F_STYLE_HOST)
965 fPathStyle = RTPATH_STYLE;
966 AssertPtrReturn(pszUri, VERR_INVALID_POINTER);
967
968 /*
969 * Check that this is a file URI.
970 */
971 if (RTStrNICmp(pszUri, RT_STR_TUPLE("file:")) == 0)
972 { /* likely */ }
973 else
974 return VERR_URI_NOT_FILE_SCHEME;
975
976 /*
977 * We may have a number of variations here, mostly thanks to
978 * various windows software. First the canonical variations:
979 * - file:///C:/Windows/System32/kernel32.dll
980 * - file:///C|/Windows/System32/kernel32.dll
981 * - file:///C:%5CWindows%5CSystem32%5Ckernel32.dll
982 * - file://localhost/C:%5CWindows%5CSystem32%5Ckernel32.dll
983 * - file://cifsserver.dev/systemshare%5CWindows%5CSystem32%5Ckernel32.dll
984 * - file://cifsserver.dev:139/systemshare%5CWindows%5CSystem32%5Ckernel32.dll (not quite sure here, but whatever)
985 *
986 * Legacy variant without any slashes after the schema:
987 * - file:C:/Windows/System32/kernel32.dll
988 * - file:C|/Windows/System32%5Ckernel32.dll
989 * - file:~/.bashrc
990 * \--path-/
991 *
992 * Legacy variant with exactly one slashes after the schema:
993 * - file:/C:/Windows/System32%5Ckernel32.dll
994 * - file:/C|/Windows/System32/kernel32.dll
995 * - file:/usr/bin/env
996 * \---path---/
997 *
998 * Legacy variant with two slashes after the schema and an unescaped DOS path:
999 * - file://C:/Windows/System32\kernel32.dll (**)
1000 * - file://C|/Windows/System32\kernel32.dll
1001 * \---path---------------------/
1002 * -- authority, with ':' as non-working port separator
1003 *
1004 * Legacy variant with exactly four slashes after the schema and an unescaped DOS path.
1005 * - file:////C:/Windows\System32\user32.dll
1006 *
1007 * Legacy variant with four or more slashes after the schema and an unescaped UNC path:
1008 * - file:////cifsserver.dev/systemshare/System32%\kernel32.dll
1009 * - file://///cifsserver.dev/systemshare/System32\kernel32.dll
1010 * \---path--------------------------------------------/
1011 *
1012 * The two unescaped variants shouldn't be handed to rtUriParse, which
1013 * is good as we cannot actually handle the one marked by (**). So, handle
1014 * those two special when parsing.
1015 */
1016 RTURIPARSED Parsed;
1017 int rc;
1018 size_t cSlashes = 0;
1019 while (pszUri[5 + cSlashes] == '/')
1020 cSlashes++;
1021 if ( (cSlashes == 2 || cSlashes == 4)
1022 && RT_C_IS_ALPHA(pszUri[5 + cSlashes])
1023 && (pszUri[5 + cSlashes + 1] == ':' || pszUri[5 + cSlashes + 1] == '|'))
1024 {
1025 RT_ZERO(Parsed); /* RTURIPARSED_F_CONTAINS_ESCAPED_CHARS is now clear. */
1026 Parsed.offPath = 5 + cSlashes;
1027 Parsed.cchPath = strlen(&pszUri[Parsed.offPath]);
1028 rc = RTStrValidateEncoding(&pszUri[Parsed.offPath]);
1029 }
1030 else if (cSlashes >= 4)
1031 {
1032 RT_ZERO(Parsed);
1033 Parsed.fFlags = cSlashes > 4 ? RTURIPARSED_F_CONTAINS_ESCAPED_CHARS : 0;
1034 Parsed.offPath = 5 + cSlashes - 2;
1035 Parsed.cchPath = strlen(&pszUri[Parsed.offPath]);
1036 rc = RTStrValidateEncoding(&pszUri[Parsed.offPath]);
1037 }
1038 else
1039 rc = rtUriParse(pszUri, &Parsed);
1040 if (RT_SUCCESS(rc))
1041 {
1042 /*
1043 * Ignore localhost as hostname (it's implicit).
1044 */
1045 static char const s_szLocalhost[] = "localhost";
1046 if ( Parsed.cchAuthorityHost == sizeof(s_szLocalhost) - 1U
1047 && RTStrNICmp(&pszUri[Parsed.offAuthorityHost], RT_STR_TUPLE(s_szLocalhost)) == 0)
1048 {
1049 Parsed.cchAuthorityHost = 0;
1050 Parsed.cchAuthority = 0;
1051 }
1052
1053 /*
1054 * Ignore leading path slash/separator if we detect a DOS drive letter
1055 * and we don't have a host name.
1056 */
1057 if ( Parsed.cchPath >= 3
1058 && Parsed.cchAuthorityHost == 0
1059 && pszUri[Parsed.offPath] == '/' /* Leading path slash/separator. */
1060 && ( pszUri[Parsed.offPath + 2] == ':' /* Colon after drive letter. */
1061 || pszUri[Parsed.offPath + 2] == '|') /* Colon alternative. */
1062 && RT_C_IS_ALPHA(pszUri[Parsed.offPath + 1]) ) /* Drive letter. */
1063 {
1064 Parsed.offPath++;
1065 Parsed.cchPath--;
1066 }
1067
1068 /*
1069 * Calculate the size of the encoded result.
1070 *
1071 * Since we're happily returning "C:/Windows/System32/kernel.dll"
1072 * style paths when the caller requested UNIX style paths, we will
1073 * return straight UNC paths too ("//cifsserver/share/dir/file").
1074 */
1075 size_t cchDecodedHost = 0;
1076 size_t cbResult;
1077 if (Parsed.fFlags & RTURIPARSED_F_CONTAINS_ESCAPED_CHARS)
1078 {
1079 cchDecodedHost = rtUriCalcDecodedLength(&pszUri[Parsed.offAuthorityHost], Parsed.cchAuthorityHost);
1080 cbResult = cchDecodedHost + rtUriCalcDecodedLength(&pszUri[Parsed.offPath], Parsed.cchPath) + 1;
1081 }
1082 else
1083 {
1084 cchDecodedHost = 0;
1085 cbResult = Parsed.cchAuthorityHost + Parsed.cchPath + 1;
1086 }
1087 if (pcchPath)
1088 *pcchPath = cbResult - 1;
1089 if (cbResult > 1)
1090 {
1091 /*
1092 * Prepare the necessary buffer space for the result.
1093 */
1094 char *pszDst;
1095 char *pszFreeMe = NULL;
1096 if (!cbPath || *ppszPath == NULL)
1097 {
1098 cbPath = RT_MAX(cbPath, cbResult);
1099 *ppszPath = pszFreeMe = pszDst = RTStrAlloc(cbPath);
1100 AssertReturn(pszDst, VERR_NO_STR_MEMORY);
1101 }
1102 else if (cbResult <= cbPath)
1103 pszDst = *ppszPath;
1104 else
1105 return VERR_BUFFER_OVERFLOW;
1106
1107 /*
1108 * Compose the result.
1109 */
1110 if (Parsed.fFlags & RTURIPARSED_F_CONTAINS_ESCAPED_CHARS)
1111 {
1112 rc = rtUriDecodeIntoBuffer(&pszUri[Parsed.offAuthorityHost],Parsed.cchAuthorityHost,
1113 pszDst, cchDecodedHost + 1);
1114 Assert(RT_SUCCESS(rc) && strlen(pszDst) == cchDecodedHost);
1115 if (RT_SUCCESS(rc))
1116 rc = rtUriDecodeIntoBuffer(&pszUri[Parsed.offPath], Parsed.cchPath,
1117 &pszDst[cchDecodedHost], cbResult - cchDecodedHost);
1118 Assert(RT_SUCCESS(rc) && strlen(pszDst) == cbResult - 1);
1119 }
1120 else
1121 {
1122 memcpy(pszDst, &pszUri[Parsed.offAuthorityHost], Parsed.cchAuthorityHost);
1123 memcpy(&pszDst[Parsed.cchAuthorityHost], &pszUri[Parsed.offPath], Parsed.cchPath);
1124 pszDst[cbResult - 1] = '\0';
1125 }
1126 if (RT_SUCCESS(rc))
1127 {
1128 /*
1129 * Convert colon DOS driver letter colon alternative.
1130 * We do this regardless of the desired path style.
1131 */
1132 if ( RT_C_IS_ALPHA(pszDst[0])
1133 && pszDst[1] == '|')
1134 pszDst[1] = ':';
1135
1136 /*
1137 * Fix slashes.
1138 */
1139 if (fPathStyle == RTPATH_STR_F_STYLE_DOS)
1140 RTPathChangeToDosSlashes(pszDst, true);
1141 else if (fPathStyle == RTPATH_STR_F_STYLE_UNIX)
1142 RTPathChangeToUnixSlashes(pszDst, true); /** @todo not quite sure how this actually makes sense... */
1143 else
1144 AssertFailed();
1145 return rc;
1146 }
1147
1148 /* bail out */
1149 RTStrFree(pszFreeMe);
1150 }
1151 else
1152 rc = VERR_PATH_ZERO_LENGTH;
1153 }
1154 return rc;
1155}
1156
1157
1158RTDECL(char *) RTUriFilePath(const char *pszUri)
1159{
1160 char *pszPath = NULL;
1161 int rc = RTUriFilePathEx(pszUri, RTPATH_STR_F_STYLE_HOST, &pszPath, 0 /*cbPath*/, NULL /*pcchPath*/);
1162 if (RT_SUCCESS(rc))
1163 return pszPath;
1164 return NULL;
1165}
1166
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette