VirtualBox

source: vbox/trunk/src/VBox/Runtime/common/misc/uri.cpp@ 46072

Last change on this file since 46072 was 39447, checked in by vboxsync, 13 years ago

Runtime: add Uniform Resource Identifier handling support

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 20.3 KB
Line 
1/* $Id: uri.cpp 39447 2011-11-29 10:02:42Z vboxsync $ */
2/** @file
3 * IPRT - Uniform Resource Identifier handling.
4 */
5
6/*
7 * Copyright (C) 2011 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 *
17 * The contents of this file may alternatively be used under the terms
18 * of the Common Development and Distribution License Version 1.0
19 * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
20 * VirtualBox OSE distribution, in which case the provisions of the
21 * CDDL are applicable instead of those of the GPL.
22 *
23 * You may elect to license modified versions of this file under the
24 * terms and conditions of either the GPL or the CDDL or both.
25 */
26
27
28/*******************************************************************************
29* Header Files *
30*******************************************************************************/
31#include <iprt/uri.h>
32
33#include <iprt/string.h>
34#include <iprt/mem.h>
35#include <iprt/path.h>
36#include <iprt/stream.h>
37
38/* General URI format:
39
40 foo://example.com:8042/over/there?name=ferret#nose
41 \_/ \______________/\_________/ \_________/ \__/
42 | | | | |
43 scheme authority path query fragment
44 | _____________________|__
45 / \ / \
46 urn:example:animal:ferret:nose
47*/
48
49
50/*******************************************************************************
51* Private RTUri helper *
52*******************************************************************************/
53
54/* The following defines characters which have to be % escaped:
55 control = 00-1F
56 space = ' '
57 delims = '<' , '>' , '#' , '%' , '"'
58 unwise = '{' , '}' , '|' , '\' , '^' , '[' , ']' , '`'
59*/
60#define URI_EXCLUDED(a) \
61 ((a) >= 0x0 && (a) <= 0x20) \
62 || ((a) >= 0x5B && (a) <= 0x5E) \
63 || ((a) >= 0x7B && (a) <= 0x7D) \
64 || (a) == '<' || (a) == '>' || (a) == '#' \
65 || (a) == '%' || (a) == '"' || (a) == '`'
66
67static char *rtUriPercentEncodeN(const char *pszString, size_t cchMax)
68{
69 if (!pszString)
70 return NULL;
71
72 int rc = VINF_SUCCESS;
73
74 size_t cbLen = RT_MIN(strlen(pszString), cchMax);
75 /* The new string can be max 3 times in size of the original string. */
76 char *pszNew = (char*)RTMemAlloc(cbLen * 3 + 1);
77 if (!pszNew)
78 return NULL;
79 char *pszRes = NULL;
80 size_t iIn = 0;
81 size_t iOut = 0;
82 while(iIn < cbLen)
83 {
84 if (URI_EXCLUDED(pszString[iIn]))
85 {
86 char szNum[3] = { 0, 0, 0 };
87 RTStrFormatU8(&szNum[0], 3, pszString[iIn++], 16, 2, 2, RTSTR_F_CAPITAL | RTSTR_F_ZEROPAD);
88 pszNew[iOut++] = '%';
89 pszNew[iOut++] = szNum[0];
90 pszNew[iOut++] = szNum[1];
91 }
92 else
93 pszNew[iOut++] = pszString[iIn++];
94 }
95 if (RT_SUCCESS(rc))
96 {
97 pszNew[iOut] = '\0';
98 if (iOut != iIn)
99 {
100 /* If the source and target strings have different size, recreate
101 * the target string with the correct size. */
102 pszRes = RTStrDupN(pszNew, iOut);
103 RTStrFree(pszNew);
104 }
105 else
106 pszRes = pszNew;
107 }
108 else
109 RTStrFree(pszNew);
110
111 return pszRes;
112}
113
114static char *rtUriPercentDecodeN(const char *pszString, size_t cchMax)
115{
116 if (!pszString)
117 return NULL;
118
119 int rc = VINF_SUCCESS;
120 size_t cbLen = RT_MIN(strlen(pszString), cchMax);
121 /* The new string can only get smaller. */
122 char *pszNew = (char*)RTMemAlloc(cbLen + 1);
123 if (!pszNew)
124 return NULL;
125 char *pszRes = NULL;
126 size_t iIn = 0;
127 size_t iOut = 0;
128 while(iIn < cbLen)
129 {
130 if (pszString[iIn] == '%')
131 {
132 /* % encoding means the percent sign and exactly 2 hexadecimal
133 * digits describing the ASCII number of the character. */
134 ++iIn;
135 char szNum[] = { pszString[iIn++], pszString[iIn++], '\0' };
136 uint8_t u8;
137 rc = RTStrToUInt8Ex(szNum, NULL, 16, &u8);
138 if (RT_FAILURE(rc))
139 break;
140 pszNew[iOut] = u8;
141 }
142 else
143 pszNew[iOut] = pszString[iIn++];
144 ++iOut;
145 }
146 if (RT_SUCCESS(rc))
147 {
148 pszNew[iOut] = '\0';
149 if (iOut != iIn)
150 {
151 /* If the source and target strings have different size, recreate
152 * the target string with the correct size. */
153 pszRes = RTStrDupN(pszNew, iOut);
154 RTStrFree(pszNew);
155 }
156 else
157 pszRes = pszNew;
158 }
159 else
160 RTStrFree(pszNew);
161
162 return pszRes;
163}
164
165static bool rtUriFindSchemeEnd(const char *pszUri, size_t iStart, size_t cbLen, size_t *piEnd)
166{
167 size_t i = iStart;
168 /* The scheme has to end with ':'. */
169 while(i < iStart + cbLen)
170 {
171 if (pszUri[i] == ':')
172 {
173 *piEnd = i;
174 return true;
175 }
176 ++i;
177 }
178 return false;
179}
180
181static bool rtUriCheckAuthorityStart(const char *pszUri, size_t iStart, size_t cbLen, size_t *piStart)
182{
183 /* The authority have to start with '//' */
184 if ( cbLen >= 2
185 && pszUri[iStart ] == '/'
186 && pszUri[iStart + 1] == '/')
187 {
188 *piStart = iStart + 2;
189 return true;
190 }
191
192 return false;
193}
194
195static bool rtUriFindAuthorityEnd(const char *pszUri, size_t iStart, size_t cbLen, size_t *piEnd)
196{
197 size_t i = iStart;
198 /* The authority can end with '/' || '?' || '#'. */
199 while(i < iStart + cbLen)
200 {
201 if ( pszUri[i] == '/'
202 || pszUri[i] == '?'
203 || pszUri[i] == '#')
204 {
205 *piEnd = i;
206 return true;
207 }
208 ++i;
209 }
210 return false;
211}
212
213static bool rtUriCheckPathStart(const char *pszUri, size_t iStart, size_t cbLen, size_t *piStart)
214{
215 /* The path could start with a '/'. */
216 if ( cbLen >= 1
217 && pszUri[iStart] == '/')
218 {
219 *piStart = iStart; /* Including '/' */
220 return true;
221 }
222 /* '?' || '#' means there is no path. */
223 if ( cbLen >= 1
224 && ( pszUri[iStart] == '?'
225 || pszUri[iStart] == '#'))
226 return false;
227 /* All other values are allowed. */
228 *piStart = iStart;
229 return true;
230}
231
232static bool rtUriFindPathEnd(const char *pszUri, size_t iStart, size_t cbLen, size_t *piEnd)
233{
234 size_t i = iStart;
235 /* The path can end with '?' || '#'. */
236 while(i < iStart + cbLen)
237 {
238 if ( pszUri[i] == '?'
239 || pszUri[i] == '#')
240 {
241 *piEnd = i;
242 return true;
243 }
244 ++i;
245 }
246 return false;
247}
248
249static bool rtUriCheckQueryStart(const char *pszUri, size_t iStart, size_t cbLen, size_t *piStart)
250{
251 /* The query start with a '?'. */
252 if ( cbLen >= 1
253 && pszUri[iStart] == '?')
254 {
255 *piStart = iStart + 1; /* Excluding '?' */
256 return true;
257 }
258 return false;
259}
260
261static bool rtUriFindQueryEnd(const char *pszUri, size_t iStart, size_t cbLen, size_t *piEnd)
262{
263 size_t i = iStart;
264 /* The query can end with '?' || '#'. */
265 while(i < iStart + cbLen)
266 {
267 if (pszUri[i] == '#')
268 {
269 *piEnd = i;
270 return true;
271 }
272 ++i;
273 }
274 return false;
275}
276
277static bool rtUriCheckFragmentStart(const char *pszUri, size_t iStart, size_t cbLen, size_t *piStart)
278{
279 /* The fragment start with a '#'. */
280 if ( cbLen >= 1
281 && pszUri[iStart] == '#')
282 {
283 *piStart = iStart + 1; /* Excluding '#' */
284 return true;
285 }
286 return false;
287}
288
289/*******************************************************************************
290* Public RTUri interface *
291*******************************************************************************/
292
293/*******************************************************************************
294* Generic Uri methods *
295*******************************************************************************/
296
297RTR3DECL(char *) RTUriCreate(const char *pszScheme, const char *pszAuthority, const char *pszPath, const char *pszQuery, const char *pszFragment)
298{
299 if (!pszScheme) /* Scheme is minimum requirement */
300 return NULL;
301
302 char *pszResult = 0;
303 char *pszAuthority1 = 0;
304 char *pszPath1 = 0;
305 char *pszQuery1 = 0;
306 char *pszFragment1 = 0;
307
308 do
309 {
310 /* Create the percent encoded strings and calculate the necessary uri
311 * length. */
312 size_t cbSize = strlen(pszScheme) + 1 + 1; /* plus zero byte */
313 if (pszAuthority)
314 {
315 pszAuthority1 = rtUriPercentEncodeN(pszAuthority, RTSTR_MAX);
316 if (!pszAuthority1)
317 break;
318 cbSize += strlen(pszAuthority1) + 2;
319 }
320 if (pszPath)
321 {
322 pszPath1 = rtUriPercentEncodeN(pszPath, RTSTR_MAX);
323 if (!pszPath1)
324 break;
325 cbSize += strlen(pszPath1);
326 }
327 if (pszQuery)
328 {
329 pszQuery1 = rtUriPercentEncodeN(pszQuery, RTSTR_MAX);
330 if (!pszQuery1)
331 break;
332 cbSize += strlen(pszQuery1) + 1;
333 }
334 if (pszFragment)
335 {
336 pszFragment1 = rtUriPercentEncodeN(pszFragment, RTSTR_MAX);
337 if (!pszFragment1)
338 break;
339 cbSize += strlen(pszFragment1) + 1;
340 }
341
342 char *pszTmp = pszResult = (char*)RTMemAllocZ(cbSize);
343 if (!pszResult)
344 break;
345 /* Compose the target uri string. */
346 RTStrCatP(&pszTmp, &cbSize, pszScheme);
347 RTStrCatP(&pszTmp, &cbSize, ":");
348 if (pszAuthority1)
349 {
350 RTStrCatP(&pszTmp, &cbSize, "//");
351 RTStrCatP(&pszTmp, &cbSize, pszAuthority1);
352 }
353 if (pszPath1)
354 {
355 RTStrCatP(&pszTmp, &cbSize, pszPath1);
356 }
357 if (pszQuery1)
358 {
359 RTStrCatP(&pszTmp, &cbSize, "?");
360 RTStrCatP(&pszTmp, &cbSize, pszQuery1);
361 }
362 if (pszFragment1)
363 {
364 RTStrCatP(&pszTmp, &cbSize, "#");
365 RTStrCatP(&pszTmp, &cbSize, pszFragment1);
366 }
367 }while (0);
368
369 /* Cleanup */
370 if (pszAuthority1)
371 RTStrFree(pszAuthority1);
372 if (pszPath1)
373 RTStrFree(pszPath1);
374 if (pszQuery1)
375 RTStrFree(pszQuery1);
376 if (pszFragment1)
377 RTStrFree(pszFragment1);
378
379 return pszResult;
380}
381
382RTR3DECL(bool) RTUriHasScheme(const char *pszUri, const char *pszScheme)
383{
384 bool fRes = false;
385 char *pszTmp = RTUriScheme(pszUri);
386 if (pszTmp)
387 {
388 fRes = RTStrNICmp(pszScheme, pszTmp, strlen(pszTmp)) == 0;
389 RTStrFree(pszTmp);
390 }
391 return fRes;
392}
393
394RTR3DECL(char *) RTUriScheme(const char *pszUri)
395{
396 AssertPtrReturn(pszUri, NULL);
397
398 size_t iPos1;
399 size_t cbLen = strlen(pszUri);
400 if (rtUriFindSchemeEnd(pszUri, 0, cbLen, &iPos1))
401 return rtUriPercentDecodeN(pszUri, iPos1);
402 return NULL;
403}
404
405RTR3DECL(char *) RTUriAuthority(const char *pszUri)
406{
407 AssertPtrReturn(pszUri, NULL);
408
409 size_t iPos1;
410 size_t cbLen = strlen(pszUri);
411 /* Find the end of the scheme. */
412 if (!rtUriFindSchemeEnd(pszUri, 0, cbLen, &iPos1))
413 return NULL; /* no URI */
414 else
415 ++iPos1; /* Skip ':' */
416
417 size_t iPos2;
418 /* Find the start of the authority. */
419 if (rtUriCheckAuthorityStart(pszUri, iPos1, cbLen - iPos1, &iPos2))
420 {
421 size_t iPos3 = cbLen;
422 /* Find the end of the authority. If not found, the rest of the string
423 * is used. */
424 rtUriFindAuthorityEnd(pszUri, iPos2, cbLen - iPos2, &iPos3);
425 if (iPos3 > iPos2) /* Length check */
426 return rtUriPercentDecodeN(&pszUri[iPos2], iPos3 - iPos2);
427 else
428 return NULL;
429 }
430 return NULL;
431}
432
433RTR3DECL(char *) RTUriPath(const char *pszUri)
434{
435 AssertPtrReturn(pszUri, NULL);
436
437 size_t iPos1;
438 size_t cbLen = strlen(pszUri);
439 /* Find the end of the scheme. */
440 if (!rtUriFindSchemeEnd(pszUri, 0, cbLen, &iPos1))
441 return NULL; /* no URI */
442 else
443 ++iPos1; /* Skip ':' */
444
445 size_t iPos2;
446 size_t iPos3 = iPos1; /* Skip if no authority is found */
447 /* Find the start of the authority. */
448 if (rtUriCheckAuthorityStart(pszUri, iPos1, cbLen - iPos1, &iPos2))
449 {
450 /* Find the end of the authority. If not found, then there is no path
451 * component, cause the authority is the rest of the string. */
452 if (!rtUriFindAuthorityEnd(pszUri, iPos2, cbLen - iPos2, &iPos3))
453 return NULL; /* no path! */
454 }
455
456 size_t iPos4;
457 /* Find the start of the path */
458 if (rtUriCheckPathStart(pszUri, iPos3, cbLen - iPos3, &iPos4))
459 {
460 /* Search for the end of the scheme. */
461 size_t iPos5 = cbLen;
462 rtUriFindPathEnd(pszUri, iPos4, cbLen - iPos4, &iPos5);
463 if (iPos5 > iPos4) /* Length check */
464 return rtUriPercentDecodeN(&pszUri[iPos4], iPos5 - iPos4);
465 }
466
467 return NULL;
468}
469
470RTR3DECL(char *) RTUriQuery(const char *pszUri)
471{
472 AssertPtrReturn(pszUri, NULL);
473
474 size_t iPos1;
475 size_t cbLen = strlen(pszUri);
476 /* Find the end of the scheme. */
477 if (!rtUriFindSchemeEnd(pszUri, 0, cbLen, &iPos1))
478 return NULL; /* no URI */
479 else
480 ++iPos1; /* Skip ':' */
481
482 size_t iPos2;
483 size_t iPos3 = iPos1; /* Skip if no authority is found */
484 /* Find the start of the authority. */
485 if (rtUriCheckAuthorityStart(pszUri, iPos1, cbLen - iPos1, &iPos2))
486 {
487 /* Find the end of the authority. If not found, then there is no path
488 * component, cause the authority is the rest of the string. */
489 if (!rtUriFindAuthorityEnd(pszUri, iPos2, cbLen - iPos2, &iPos3))
490 return NULL; /* no path! */
491 }
492
493 size_t iPos4;
494 size_t iPos5 = iPos3; /* Skip if no path is found */
495 /* Find the start of the path */
496 if (rtUriCheckPathStart(pszUri, iPos3, cbLen - iPos3, &iPos4))
497 {
498 /* Find the end of the path. If not found, then there is no query
499 * component, cause the path is the rest of the string. */
500 if (!rtUriFindPathEnd(pszUri, iPos4, cbLen - iPos4, &iPos5))
501 return NULL; /* no query! */
502 }
503
504 size_t iPos6;
505 /* Find the start of the query */
506 if (rtUriCheckQueryStart(pszUri, iPos5, cbLen - iPos5, &iPos6))
507 {
508 /* Search for the end of the query. */
509 size_t iPos7 = cbLen;
510 rtUriFindQueryEnd(pszUri, iPos6, cbLen - iPos6, &iPos7);
511 if (iPos7 > iPos6) /* Length check */
512 return rtUriPercentDecodeN(&pszUri[iPos6], iPos7 - iPos6);
513 }
514
515 return NULL;
516}
517
518RTR3DECL(char *) RTUriFragment(const char *pszUri)
519{
520 AssertPtrReturn(pszUri, NULL);
521
522 size_t iPos1;
523 size_t cbLen = strlen(pszUri);
524 /* Find the end of the scheme. */
525 if (!rtUriFindSchemeEnd(pszUri, 0, cbLen, &iPos1))
526 return NULL; /* no URI */
527 else
528 ++iPos1; /* Skip ':' */
529
530 size_t iPos2;
531 size_t iPos3 = iPos1; /* Skip if no authority is found */
532 /* Find the start of the authority. */
533 if (rtUriCheckAuthorityStart(pszUri, iPos1, cbLen - iPos1, &iPos2))
534 {
535 /* Find the end of the authority. If not found, then there is no path
536 * component, cause the authority is the rest of the string. */
537 if (!rtUriFindAuthorityEnd(pszUri, iPos2, cbLen - iPos2, &iPos3))
538 return NULL; /* no path! */
539 }
540
541 size_t iPos4;
542 size_t iPos5 = iPos3; /* Skip if no path is found */
543 /* Find the start of the path */
544 if (rtUriCheckPathStart(pszUri, iPos3, cbLen - iPos3, &iPos4))
545 {
546 /* Find the end of the path. If not found, then there is no query
547 * component, cause the path is the rest of the string. */
548 if (!rtUriFindPathEnd(pszUri, iPos4, cbLen - iPos4, &iPos5))
549 return NULL; /* no query! */
550 }
551
552 size_t iPos6;
553 size_t iPos7 = iPos5; /* Skip if no query is found */
554 /* Find the start of the query */
555 if (rtUriCheckQueryStart(pszUri, iPos5, cbLen - iPos5, &iPos6))
556 {
557 /* Find the end of the query If not found, then there is no fragment
558 * component, cause the query is the rest of the string. */
559 if (!rtUriFindQueryEnd(pszUri, iPos6, cbLen - iPos6, &iPos7))
560 return NULL; /* no query! */
561 }
562
563
564 size_t iPos8;
565 /* Find the start of the fragment */
566 if (rtUriCheckFragmentStart(pszUri, iPos7, cbLen - iPos7, &iPos8))
567 {
568 /* There could be nothing behind a fragment. So use the rest of the
569 * string. */
570 if (cbLen > iPos8) /* Length check */
571 return rtUriPercentDecodeN(&pszUri[iPos8], cbLen - iPos8);
572 }
573 return NULL;
574}
575
576/*******************************************************************************
577* File Uri methods *
578*******************************************************************************/
579
580RTR3DECL(char *) RTUriFileCreate(const char *pszPath)
581{
582 if (!pszPath)
583 return NULL;
584
585 char *pszResult = 0;
586 char *pszPath1 = 0;
587
588 do
589 {
590 /* Create the percent encoded strings and calculate the necessary uri
591 * length. */
592 pszPath1 = rtUriPercentEncodeN(pszPath, RTSTR_MAX);
593 if (!pszPath1)
594 break;
595 size_t cbSize = 7 /* file:// */ + strlen(pszPath1) + 1; /* plus zero byte */
596 if (pszPath1[0] != '/')
597 ++cbSize;
598 char *pszTmp = pszResult = (char*)RTMemAllocZ(cbSize);
599 if (!pszResult)
600 break;
601 /* Compose the target uri string. */
602 RTStrCatP(&pszTmp, &cbSize, "file://");
603 if (pszPath1[0] != '/')
604 RTStrCatP(&pszTmp, &cbSize, "/");
605 RTStrCatP(&pszTmp, &cbSize, pszPath1);
606 }while (0);
607
608 /* Cleanup */
609 if (pszPath1)
610 RTStrFree(pszPath1);
611
612 return pszResult;
613}
614
615RTR3DECL(char *) RTUriFilePath(const char *pszUri, uint32_t uFormat)
616{
617 return RTUriFileNPath(pszUri, uFormat, RTSTR_MAX);
618}
619
620RTR3DECL(char *) RTUriFileNPath(const char *pszUri, uint32_t uFormat, size_t cchMax)
621{
622 AssertPtrReturn(pszUri, NULL);
623
624 size_t iPos1;
625 size_t cbLen = RT_MIN(strlen(pszUri), cchMax);
626 /* Find the end of the scheme. */
627 if (!rtUriFindSchemeEnd(pszUri, 0, cbLen, &iPos1))
628 return NULL; /* no URI */
629 else
630 ++iPos1; /* Skip ':' */
631
632 /* Check that this is a file Uri */
633 if (RTStrNICmp(pszUri, "file:", iPos1) != 0)
634 return NULL;
635
636 size_t iPos2;
637 size_t iPos3 = iPos1; /* Skip if no authority is found */
638 /* Find the start of the authority. */
639 if (rtUriCheckAuthorityStart(pszUri, iPos1, cbLen - iPos1, &iPos2))
640 {
641 /* Find the end of the authority. If not found, then there is no path
642 * component, cause the authority is the rest of the string. */
643 if (!rtUriFindAuthorityEnd(pszUri, iPos2, cbLen - iPos2, &iPos3))
644 return NULL; /* no path! */
645 }
646
647 size_t iPos4;
648 /* Find the start of the path */
649 if (rtUriCheckPathStart(pszUri, iPos3, cbLen - iPos3, &iPos4))
650 {
651 uint32_t uFIntern = uFormat;
652 /* Auto is based on the current host OS. */
653 if (uFormat == URI_FILE_FORMAT_AUTO)
654#ifdef RT_OS_WINDOWS
655 uFIntern = URI_FILE_FORMAT_WIN;
656#else /* RT_OS_WINDOWS */
657 uFIntern = URI_FILE_FORMAT_UNIX;
658#endif /* !RT_OS_WINDOWS */
659
660 if ( uFIntern != URI_FILE_FORMAT_UNIX
661 && pszUri[iPos4] == '/')
662 ++iPos4;
663 /* Search for the end of the scheme. */
664 size_t iPos5 = cbLen;
665 rtUriFindPathEnd(pszUri, iPos4, cbLen - iPos4, &iPos5);
666 if (iPos5 > iPos4) /* Length check */
667 {
668 char *pszPath = rtUriPercentDecodeN(&pszUri[iPos4], iPos5 - iPos4);
669 if (uFIntern == URI_FILE_FORMAT_UNIX)
670 return RTPathChangeToUnixSlashes(pszPath, true);
671 else if (uFIntern == URI_FILE_FORMAT_WIN)
672 return RTPathChangeToDosSlashes(pszPath, true);
673 else
674 {
675 RTStrFree(pszPath);
676 AssertMsgFailed(("Unknown uri file format %u", uFIntern));
677 return NULL;
678 }
679 }
680 }
681
682 return NULL;
683}
684
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette