VirtualBox

source: vbox/trunk/src/libs/curl-7.64.0/lib/urlapi.c@ 94601

Last change on this file since 94601 was 85671, checked in by vboxsync, 4 years ago

Export out internal curl copy to make it a lot simpler to build VBox (OSE) on Windows. bugref:9814

  • Property svn:eol-style set to native
File size: 35.2 KB
Line 
1/***************************************************************************
2 * _ _ ____ _
3 * Project ___| | | | _ \| |
4 * / __| | | | |_) | |
5 * | (__| |_| | _ <| |___
6 * \___|\___/|_| \_\_____|
7 *
8 * Copyright (C) 1998 - 2018, Daniel Stenberg, <[email protected]>, et al.
9 *
10 * This software is licensed as described in the file COPYING, which
11 * you should have received as part of this distribution. The terms
12 * are also available at https://curl.haxx.se/docs/copyright.html.
13 *
14 * You may opt to use, copy, modify, merge, publish, distribute and/or sell
15 * copies of the Software, and permit persons to whom the Software is
16 * furnished to do so, under the terms of the COPYING file.
17 *
18 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
19 * KIND, either express or implied.
20 *
21 ***************************************************************************/
22
23#include "curl_setup.h"
24
25#include "urldata.h"
26#include "urlapi-int.h"
27#include "strcase.h"
28#include "dotdot.h"
29#include "url.h"
30#include "escape.h"
31#include "curl_ctype.h"
32
33/* The last 3 #include files should be in this order */
34#include "curl_printf.h"
35#include "curl_memory.h"
36#include "memdebug.h"
37
38 /* MSDOS/Windows style drive prefix, eg c: in c:foo */
39#define STARTS_WITH_DRIVE_PREFIX(str) \
40 ((('a' <= str[0] && str[0] <= 'z') || \
41 ('A' <= str[0] && str[0] <= 'Z')) && \
42 (str[1] == ':'))
43
44 /* MSDOS/Windows style drive prefix, optionally with
45 * a '|' instead of ':', followed by a slash or NUL */
46#define STARTS_WITH_URL_DRIVE_PREFIX(str) \
47 ((('a' <= (str)[0] && (str)[0] <= 'z') || \
48 ('A' <= (str)[0] && (str)[0] <= 'Z')) && \
49 ((str)[1] == ':' || (str)[1] == '|') && \
50 ((str)[2] == '/' || (str)[2] == '\\' || (str)[2] == 0))
51
52/* Internal representation of CURLU. Point to URL-encoded strings. */
53struct Curl_URL {
54 char *scheme;
55 char *user;
56 char *password;
57 char *options; /* IMAP only? */
58 char *host;
59 char *port;
60 char *path;
61 char *query;
62 char *fragment;
63
64 char *scratch; /* temporary scratch area */
65 long portnum; /* the numerical version */
66};
67
68#define DEFAULT_SCHEME "https"
69
70#ifdef DEBUGBUILD
71#define UNITTEST
72#else
73#define UNITTEST static
74#endif
75
76static void free_urlhandle(struct Curl_URL *u)
77{
78 free(u->scheme);
79 free(u->user);
80 free(u->password);
81 free(u->options);
82 free(u->host);
83 free(u->port);
84 free(u->path);
85 free(u->query);
86 free(u->fragment);
87 free(u->scratch);
88}
89
90/* move the full contents of one handle onto another and
91 free the original */
92static void mv_urlhandle(struct Curl_URL *from,
93 struct Curl_URL *to)
94{
95 free_urlhandle(to);
96 *to = *from;
97 free(from);
98}
99
100/*
101 * Find the separator at the end of the host name, or the '?' in cases like
102 * http://www.url.com?id=2380
103 */
104static const char *find_host_sep(const char *url)
105{
106 const char *sep;
107 const char *query;
108
109 /* Find the start of the hostname */
110 sep = strstr(url, "//");
111 if(!sep)
112 sep = url;
113 else
114 sep += 2;
115
116 query = strchr(sep, '?');
117 sep = strchr(sep, '/');
118
119 if(!sep)
120 sep = url + strlen(url);
121
122 if(!query)
123 query = url + strlen(url);
124
125 return sep < query ? sep : query;
126}
127
128/*
129 * Decide in an encoding-independent manner whether a character in an
130 * URL must be escaped. The same criterion must be used in strlen_url()
131 * and strcpy_url().
132 */
133static bool urlchar_needs_escaping(int c)
134{
135 return !(ISCNTRL(c) || ISSPACE(c) || ISGRAPH(c));
136}
137
138/*
139 * strlen_url() returns the length of the given URL if the spaces within the
140 * URL were properly URL encoded.
141 * URL encoding should be skipped for host names, otherwise IDN resolution
142 * will fail.
143 */
144size_t Curl_strlen_url(const char *url, bool relative)
145{
146 const unsigned char *ptr;
147 size_t newlen = 0;
148 bool left = TRUE; /* left side of the ? */
149 const unsigned char *host_sep = (const unsigned char *) url;
150
151 if(!relative)
152 host_sep = (const unsigned char *) find_host_sep(url);
153
154 for(ptr = (unsigned char *)url; *ptr; ptr++) {
155
156 if(ptr < host_sep) {
157 ++newlen;
158 continue;
159 }
160
161 switch(*ptr) {
162 case '?':
163 left = FALSE;
164 /* FALLTHROUGH */
165 default:
166 if(urlchar_needs_escaping(*ptr))
167 newlen += 2;
168 newlen++;
169 break;
170 case ' ':
171 if(left)
172 newlen += 3;
173 else
174 newlen++;
175 break;
176 }
177 }
178 return newlen;
179}
180
181/* strcpy_url() copies a url to a output buffer and URL-encodes the spaces in
182 * the source URL accordingly.
183 * URL encoding should be skipped for host names, otherwise IDN resolution
184 * will fail.
185 */
186void Curl_strcpy_url(char *output, const char *url, bool relative)
187{
188 /* we must add this with whitespace-replacing */
189 bool left = TRUE;
190 const unsigned char *iptr;
191 char *optr = output;
192 const unsigned char *host_sep = (const unsigned char *) url;
193
194 if(!relative)
195 host_sep = (const unsigned char *) find_host_sep(url);
196
197 for(iptr = (unsigned char *)url; /* read from here */
198 *iptr; /* until zero byte */
199 iptr++) {
200
201 if(iptr < host_sep) {
202 *optr++ = *iptr;
203 continue;
204 }
205
206 switch(*iptr) {
207 case '?':
208 left = FALSE;
209 /* FALLTHROUGH */
210 default:
211 if(urlchar_needs_escaping(*iptr)) {
212 msnprintf(optr, 4, "%%%02x", *iptr);
213 optr += 3;
214 }
215 else
216 *optr++=*iptr;
217 break;
218 case ' ':
219 if(left) {
220 *optr++='%'; /* add a '%' */
221 *optr++='2'; /* add a '2' */
222 *optr++='0'; /* add a '0' */
223 }
224 else
225 *optr++='+'; /* add a '+' here */
226 break;
227 }
228 }
229 *optr = 0; /* zero terminate output buffer */
230
231}
232
233/*
234 * Returns true if the given URL is absolute (as opposed to relative) within
235 * the buffer size. Returns the scheme in the buffer if TRUE and 'buf' is
236 * non-NULL.
237 */
238bool Curl_is_absolute_url(const char *url, char *buf, size_t buflen)
239{
240 size_t i;
241#ifdef WIN32
242 if(STARTS_WITH_DRIVE_PREFIX(url))
243 return FALSE;
244#endif
245 for(i = 0; i < buflen && url[i]; ++i) {
246 char s = url[i];
247 if((s == ':') && (url[i + 1] == '/')) {
248 if(buf)
249 buf[i] = 0;
250 return TRUE;
251 }
252 /* RFC 3986 3.1 explains:
253 scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
254 */
255 else if(ISALNUM(s) || (s == '+') || (s == '-') || (s == '.') ) {
256 if(buf)
257 buf[i] = (char)TOLOWER(s);
258 }
259 else
260 break;
261 }
262 return FALSE;
263}
264
265/*
266 * Concatenate a relative URL to a base URL making it absolute.
267 * URL-encodes any spaces.
268 * The returned pointer must be freed by the caller unless NULL
269 * (returns NULL on out of memory).
270 */
271char *Curl_concat_url(const char *base, const char *relurl)
272{
273 /***
274 TRY to append this new path to the old URL
275 to the right of the host part. Oh crap, this is doomed to cause
276 problems in the future...
277 */
278 char *newest;
279 char *protsep;
280 char *pathsep;
281 size_t newlen;
282 bool host_changed = FALSE;
283
284 const char *useurl = relurl;
285 size_t urllen;
286
287 /* we must make our own copy of the URL to play with, as it may
288 point to read-only data */
289 char *url_clone = strdup(base);
290
291 if(!url_clone)
292 return NULL; /* skip out of this NOW */
293
294 /* protsep points to the start of the host name */
295 protsep = strstr(url_clone, "//");
296 if(!protsep)
297 protsep = url_clone;
298 else
299 protsep += 2; /* pass the slashes */
300
301 if('/' != relurl[0]) {
302 int level = 0;
303
304 /* First we need to find out if there's a ?-letter in the URL,
305 and cut it and the right-side of that off */
306 pathsep = strchr(protsep, '?');
307 if(pathsep)
308 *pathsep = 0;
309
310 /* we have a relative path to append to the last slash if there's one
311 available, or if the new URL is just a query string (starts with a
312 '?') we append the new one at the end of the entire currently worked
313 out URL */
314 if(useurl[0] != '?') {
315 pathsep = strrchr(protsep, '/');
316 if(pathsep)
317 *pathsep = 0;
318 }
319
320 /* Check if there's any slash after the host name, and if so, remember
321 that position instead */
322 pathsep = strchr(protsep, '/');
323 if(pathsep)
324 protsep = pathsep + 1;
325 else
326 protsep = NULL;
327
328 /* now deal with one "./" or any amount of "../" in the newurl
329 and act accordingly */
330
331 if((useurl[0] == '.') && (useurl[1] == '/'))
332 useurl += 2; /* just skip the "./" */
333
334 while((useurl[0] == '.') &&
335 (useurl[1] == '.') &&
336 (useurl[2] == '/')) {
337 level++;
338 useurl += 3; /* pass the "../" */
339 }
340
341 if(protsep) {
342 while(level--) {
343 /* cut off one more level from the right of the original URL */
344 pathsep = strrchr(protsep, '/');
345 if(pathsep)
346 *pathsep = 0;
347 else {
348 *protsep = 0;
349 break;
350 }
351 }
352 }
353 }
354 else {
355 /* We got a new absolute path for this server */
356
357 if((relurl[0] == '/') && (relurl[1] == '/')) {
358 /* the new URL starts with //, just keep the protocol part from the
359 original one */
360 *protsep = 0;
361 useurl = &relurl[2]; /* we keep the slashes from the original, so we
362 skip the new ones */
363 host_changed = TRUE;
364 }
365 else {
366 /* cut off the original URL from the first slash, or deal with URLs
367 without slash */
368 pathsep = strchr(protsep, '/');
369 if(pathsep) {
370 /* When people use badly formatted URLs, such as
371 "http://www.url.com?dir=/home/daniel" we must not use the first
372 slash, if there's a ?-letter before it! */
373 char *sep = strchr(protsep, '?');
374 if(sep && (sep < pathsep))
375 pathsep = sep;
376 *pathsep = 0;
377 }
378 else {
379 /* There was no slash. Now, since we might be operating on a badly
380 formatted URL, such as "http://www.url.com?id=2380" which doesn't
381 use a slash separator as it is supposed to, we need to check for a
382 ?-letter as well! */
383 pathsep = strchr(protsep, '?');
384 if(pathsep)
385 *pathsep = 0;
386 }
387 }
388 }
389
390 /* If the new part contains a space, this is a mighty stupid redirect
391 but we still make an effort to do "right". To the left of a '?'
392 letter we replace each space with %20 while it is replaced with '+'
393 on the right side of the '?' letter.
394 */
395 newlen = Curl_strlen_url(useurl, !host_changed);
396
397 urllen = strlen(url_clone);
398
399 newest = malloc(urllen + 1 + /* possible slash */
400 newlen + 1 /* zero byte */);
401
402 if(!newest) {
403 free(url_clone); /* don't leak this */
404 return NULL;
405 }
406
407 /* copy over the root url part */
408 memcpy(newest, url_clone, urllen);
409
410 /* check if we need to append a slash */
411 if(('/' == useurl[0]) || (protsep && !*protsep) || ('?' == useurl[0]))
412 ;
413 else
414 newest[urllen++]='/';
415
416 /* then append the new piece on the right side */
417 Curl_strcpy_url(&newest[urllen], useurl, !host_changed);
418
419 free(url_clone);
420
421 return newest;
422}
423
424/*
425 * parse_hostname_login()
426 *
427 * Parse the login details (user name, password and options) from the URL and
428 * strip them out of the host name
429 *
430 */
431static CURLUcode parse_hostname_login(struct Curl_URL *u,
432 const struct Curl_handler *h,
433 char **hostname,
434 unsigned int flags)
435{
436 CURLUcode result = CURLUE_OK;
437 CURLcode ccode;
438 char *userp = NULL;
439 char *passwdp = NULL;
440 char *optionsp = NULL;
441
442 /* At this point, we're hoping all the other special cases have
443 * been taken care of, so conn->host.name is at most
444 * [user[:password][;options]]@]hostname
445 *
446 * We need somewhere to put the embedded details, so do that first.
447 */
448
449 char *ptr = strchr(*hostname, '@');
450 char *login = *hostname;
451
452 if(!ptr)
453 goto out;
454
455 /* We will now try to extract the
456 * possible login information in a string like:
457 * ftp://user:[email protected]:8021/README */
458 *hostname = ++ptr;
459
460 /* We could use the login information in the URL so extract it. Only parse
461 options if the handler says we should. Note that 'h' might be NULL! */
462 ccode = Curl_parse_login_details(login, ptr - login - 1,
463 &userp, &passwdp,
464 (h && (h->flags & PROTOPT_URLOPTIONS)) ?
465 &optionsp:NULL);
466 if(ccode) {
467 result = CURLUE_MALFORMED_INPUT;
468 goto out;
469 }
470
471 if(userp) {
472 if(flags & CURLU_DISALLOW_USER) {
473 /* Option DISALLOW_USER is set and url contains username. */
474 result = CURLUE_USER_NOT_ALLOWED;
475 goto out;
476 }
477
478 u->user = userp;
479 }
480
481 if(passwdp)
482 u->password = passwdp;
483
484 if(optionsp)
485 u->options = optionsp;
486
487 return CURLUE_OK;
488 out:
489
490 free(userp);
491 free(passwdp);
492 free(optionsp);
493
494 return result;
495}
496
497UNITTEST CURLUcode Curl_parse_port(struct Curl_URL *u, char *hostname)
498{
499 char *portptr = NULL;
500 char endbracket;
501 int len;
502
503 /*
504 * Find the end of an IPv6 address, either on the ']' ending bracket or
505 * a percent-encoded zone index.
506 */
507 if(1 == sscanf(hostname, "[%*45[0123456789abcdefABCDEF:.]%c%n",
508 &endbracket, &len)) {
509 if(']' == endbracket)
510 portptr = &hostname[len];
511 else if('%' == endbracket) {
512 int zonelen = len;
513 if(1 == sscanf(hostname + zonelen, "25%*[^]]%c%n", &endbracket, &len)) {
514 if(']' != endbracket)
515 return CURLUE_MALFORMED_INPUT;
516 portptr = &hostname[--zonelen + len + 1];
517 }
518 else
519 return CURLUE_MALFORMED_INPUT;
520 }
521 else
522 return CURLUE_MALFORMED_INPUT;
523
524 /* this is a RFC2732-style specified IP-address */
525 if(portptr && *portptr) {
526 if(*portptr != ':')
527 return CURLUE_MALFORMED_INPUT;
528 }
529 else
530 portptr = NULL;
531 }
532 else
533 portptr = strchr(hostname, ':');
534
535 if(portptr) {
536 char *rest;
537 long port;
538 char portbuf[7];
539
540 /* Browser behavior adaptation. If there's a colon with no digits after,
541 just cut off the name there which makes us ignore the colon and just
542 use the default port. Firefox, Chrome and Safari all do that. */
543 if(!portptr[1]) {
544 *portptr = '\0';
545 return CURLUE_OK;
546 }
547
548 if(!ISDIGIT(portptr[1]))
549 return CURLUE_BAD_PORT_NUMBER;
550
551 port = strtol(portptr + 1, &rest, 10); /* Port number must be decimal */
552
553 if((port <= 0) || (port > 0xffff))
554 /* Single unix standard says port numbers are 16 bits long, but we don't
555 treat port zero as OK. */
556 return CURLUE_BAD_PORT_NUMBER;
557
558 if(rest[0])
559 return CURLUE_BAD_PORT_NUMBER;
560
561 *portptr++ = '\0'; /* cut off the name there */
562 *rest = 0;
563 /* generate a new port number string to get rid of leading zeroes etc */
564 msnprintf(portbuf, sizeof(portbuf), "%ld", port);
565 u->portnum = port;
566 u->port = strdup(portbuf);
567 if(!u->port)
568 return CURLUE_OUT_OF_MEMORY;
569 }
570
571 return CURLUE_OK;
572}
573
574/* scan for byte values < 31 or 127 */
575static CURLUcode junkscan(char *part)
576{
577 char badbytes[]={
578 /* */ 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
579 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
580 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
581 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
582 0x7f,
583 0x00 /* zero terminate */
584 };
585 if(part) {
586 size_t n = strlen(part);
587 size_t nfine = strcspn(part, badbytes);
588 if(nfine != n)
589 /* since we don't know which part is scanned, return a generic error
590 code */
591 return CURLUE_MALFORMED_INPUT;
592 }
593 return CURLUE_OK;
594}
595
596static CURLUcode hostname_check(char *hostname, unsigned int flags)
597{
598 const char *l = NULL; /* accepted characters */
599 size_t len;
600 size_t hlen = strlen(hostname);
601 (void)flags;
602
603 if(hostname[0] == '[') {
604 hostname++;
605 l = "0123456789abcdefABCDEF::.%";
606 hlen -= 2;
607 }
608
609 if(l) {
610 /* only valid letters are ok */
611 len = strspn(hostname, l);
612 if(hlen != len)
613 /* hostname with bad content */
614 return CURLUE_MALFORMED_INPUT;
615 }
616 else {
617 /* letters from the second string is not ok */
618 len = strcspn(hostname, " ");
619 if(hlen != len)
620 /* hostname with bad content */
621 return CURLUE_MALFORMED_INPUT;
622 }
623 return CURLUE_OK;
624}
625
626#define HOSTNAME_END(x) (((x) == '/') || ((x) == '?') || ((x) == '#'))
627
628static CURLUcode seturl(const char *url, CURLU *u, unsigned int flags)
629{
630 char *path;
631 bool path_alloced = FALSE;
632 char *hostname;
633 char *query = NULL;
634 char *fragment = NULL;
635 CURLUcode result;
636 bool url_has_scheme = FALSE;
637 char schemebuf[MAX_SCHEME_LEN];
638 char *schemep = NULL;
639 size_t schemelen = 0;
640 size_t urllen;
641 const struct Curl_handler *h = NULL;
642
643 if(!url)
644 return CURLUE_MALFORMED_INPUT;
645
646 /*************************************************************
647 * Parse the URL.
648 ************************************************************/
649 /* allocate scratch area */
650 urllen = strlen(url);
651 path = u->scratch = malloc(urllen * 2 + 2);
652 if(!path)
653 return CURLUE_OUT_OF_MEMORY;
654
655 hostname = &path[urllen + 1];
656 hostname[0] = 0;
657
658 if(Curl_is_absolute_url(url, schemebuf, sizeof(schemebuf))) {
659 url_has_scheme = TRUE;
660 schemelen = strlen(schemebuf);
661 }
662
663 /* handle the file: scheme */
664 if(url_has_scheme && strcasecompare(schemebuf, "file")) {
665 /* path has been allocated large enough to hold this */
666 strcpy(path, &url[5]);
667
668 hostname = NULL; /* no host for file: URLs */
669 u->scheme = strdup("file");
670 if(!u->scheme)
671 return CURLUE_OUT_OF_MEMORY;
672
673 /* Extra handling URLs with an authority component (i.e. that start with
674 * "file://")
675 *
676 * We allow omitted hostname (e.g. file:/<path>) -- valid according to
677 * RFC 8089, but not the (current) WHAT-WG URL spec.
678 */
679 if(path[0] == '/' && path[1] == '/') {
680 /* swallow the two slashes */
681 char *ptr = &path[2];
682
683 /*
684 * According to RFC 8089, a file: URL can be reliably dereferenced if:
685 *
686 * o it has no/blank hostname, or
687 *
688 * o the hostname matches "localhost" (case-insensitively), or
689 *
690 * o the hostname is a FQDN that resolves to this machine.
691 *
692 * For brevity, we only consider URLs with empty, "localhost", or
693 * "127.0.0.1" hostnames as local.
694 *
695 * Additionally, there is an exception for URLs with a Windows drive
696 * letter in the authority (which was accidentally omitted from RFC 8089
697 * Appendix E, but believe me, it was meant to be there. --MK)
698 */
699 if(ptr[0] != '/' && !STARTS_WITH_URL_DRIVE_PREFIX(ptr)) {
700 /* the URL includes a host name, it must match "localhost" or
701 "127.0.0.1" to be valid */
702 if(!checkprefix("localhost/", ptr) &&
703 !checkprefix("127.0.0.1/", ptr)) {
704 /* Invalid file://hostname/, expected localhost or 127.0.0.1 or
705 none */
706 return CURLUE_MALFORMED_INPUT;
707 }
708 ptr += 9; /* now points to the slash after the host */
709 }
710
711 path = ptr;
712 }
713
714#if !defined(MSDOS) && !defined(WIN32) && !defined(__CYGWIN__)
715 /* Don't allow Windows drive letters when not in Windows.
716 * This catches both "file:/c:" and "file:c:" */
717 if(('/' == path[0] && STARTS_WITH_URL_DRIVE_PREFIX(&path[1])) ||
718 STARTS_WITH_URL_DRIVE_PREFIX(path)) {
719 /* File drive letters are only accepted in MSDOS/Windows */
720 return CURLUE_MALFORMED_INPUT;
721 }
722#else
723 /* If the path starts with a slash and a drive letter, ditch the slash */
724 if('/' == path[0] && STARTS_WITH_URL_DRIVE_PREFIX(&path[1])) {
725 /* This cannot be done with strcpy, as the memory chunks overlap! */
726 memmove(path, &path[1], strlen(&path[1]) + 1);
727 }
728#endif
729
730 }
731 else {
732 /* clear path */
733 const char *p;
734 const char *hostp;
735 size_t len;
736 path[0] = 0;
737
738 if(url_has_scheme) {
739 int i = 0;
740 p = &url[schemelen + 1];
741 while(p && (*p == '/') && (i < 4)) {
742 p++;
743 i++;
744 }
745 if((i < 1) || (i>3))
746 /* less than one or more than three slashes */
747 return CURLUE_MALFORMED_INPUT;
748
749 schemep = schemebuf;
750 if(!Curl_builtin_scheme(schemep) &&
751 !(flags & CURLU_NON_SUPPORT_SCHEME))
752 return CURLUE_UNSUPPORTED_SCHEME;
753
754 if(junkscan(schemep))
755 return CURLUE_MALFORMED_INPUT;
756 }
757 else {
758 /* no scheme! */
759
760 if(!(flags & (CURLU_DEFAULT_SCHEME|CURLU_GUESS_SCHEME)))
761 return CURLUE_MALFORMED_INPUT;
762 if(flags & CURLU_DEFAULT_SCHEME)
763 schemep = (char *) DEFAULT_SCHEME;
764
765 /*
766 * The URL was badly formatted, let's try without scheme specified.
767 */
768 p = url;
769 }
770 hostp = p; /* host name starts here */
771
772 while(*p && !HOSTNAME_END(*p)) /* find end of host name */
773 p++;
774
775 len = p - hostp;
776 if(!len)
777 return CURLUE_MALFORMED_INPUT;
778
779 memcpy(hostname, hostp, len);
780 hostname[len] = 0;
781
782 if((flags & CURLU_GUESS_SCHEME) && !schemep) {
783 /* legacy curl-style guess based on host name */
784 if(checkprefix("ftp.", hostname))
785 schemep = (char *)"ftp";
786 else if(checkprefix("dict.", hostname))
787 schemep = (char *)"dict";
788 else if(checkprefix("ldap.", hostname))
789 schemep = (char *)"ldap";
790 else if(checkprefix("imap.", hostname))
791 schemep = (char *)"imap";
792 else if(checkprefix("smtp.", hostname))
793 schemep = (char *)"smtp";
794 else if(checkprefix("pop3.", hostname))
795 schemep = (char *)"pop3";
796 else
797 schemep = (char *)"http";
798 }
799
800 len = strlen(p);
801 memcpy(path, p, len);
802 path[len] = 0;
803
804 u->scheme = strdup(schemep);
805 if(!u->scheme)
806 return CURLUE_OUT_OF_MEMORY;
807 }
808
809 /* if this is a known scheme, get some details */
810 h = Curl_builtin_scheme(u->scheme);
811
812 if(junkscan(path))
813 return CURLUE_MALFORMED_INPUT;
814
815 query = strchr(path, '?');
816 if(query)
817 *query++ = 0;
818
819 fragment = strchr(query?query:path, '#');
820 if(fragment)
821 *fragment++ = 0;
822
823 if(!path[0])
824 /* if there's no path set, unset */
825 path = NULL;
826 else if(!(flags & CURLU_PATH_AS_IS)) {
827 /* sanitise paths and remove ../ and ./ sequences according to RFC3986 */
828 char *newp = Curl_dedotdotify(path);
829 if(!newp)
830 return CURLUE_OUT_OF_MEMORY;
831
832 if(strcmp(newp, path)) {
833 /* if we got a new version */
834 path = newp;
835 path_alloced = TRUE;
836 }
837 else
838 free(newp);
839 }
840 if(path) {
841 u->path = path_alloced?path:strdup(path);
842 if(!u->path)
843 return CURLUE_OUT_OF_MEMORY;
844 }
845
846 if(hostname) {
847 /*
848 * Parse the login details and strip them out of the host name.
849 */
850 if(junkscan(hostname))
851 return CURLUE_MALFORMED_INPUT;
852
853 result = parse_hostname_login(u, h, &hostname, flags);
854 if(result)
855 return result;
856
857 result = Curl_parse_port(u, hostname);
858 if(result)
859 return result;
860
861 result = hostname_check(hostname, flags);
862 if(result)
863 return result;
864
865 u->host = strdup(hostname);
866 if(!u->host)
867 return CURLUE_OUT_OF_MEMORY;
868 }
869
870 if(query) {
871 u->query = strdup(query);
872 if(!u->query)
873 return CURLUE_OUT_OF_MEMORY;
874 }
875 if(fragment && fragment[0]) {
876 u->fragment = strdup(fragment);
877 if(!u->fragment)
878 return CURLUE_OUT_OF_MEMORY;
879 }
880
881 free(u->scratch);
882 u->scratch = NULL;
883
884 return CURLUE_OK;
885}
886
887/*
888 * Parse the URL and set the relevant members of the Curl_URL struct.
889 */
890static CURLUcode parseurl(const char *url, CURLU *u, unsigned int flags)
891{
892 CURLUcode result = seturl(url, u, flags);
893 if(result) {
894 free_urlhandle(u);
895 memset(u, 0, sizeof(struct Curl_URL));
896 }
897 return result;
898}
899
900/*
901 */
902CURLU *curl_url(void)
903{
904 return calloc(sizeof(struct Curl_URL), 1);
905}
906
907void curl_url_cleanup(CURLU *u)
908{
909 if(u) {
910 free_urlhandle(u);
911 free(u);
912 }
913}
914
915#define DUP(dest, src, name) \
916 if(src->name) { \
917 dest->name = strdup(src->name); \
918 if(!dest->name) \
919 goto fail; \
920 }
921
922CURLU *curl_url_dup(CURLU *in)
923{
924 struct Curl_URL *u = calloc(sizeof(struct Curl_URL), 1);
925 if(u) {
926 DUP(u, in, scheme);
927 DUP(u, in, user);
928 DUP(u, in, password);
929 DUP(u, in, options);
930 DUP(u, in, host);
931 DUP(u, in, port);
932 DUP(u, in, path);
933 DUP(u, in, query);
934 DUP(u, in, fragment);
935 u->portnum = in->portnum;
936 }
937 return u;
938 fail:
939 curl_url_cleanup(u);
940 return NULL;
941}
942
943CURLUcode curl_url_get(CURLU *u, CURLUPart what,
944 char **part, unsigned int flags)
945{
946 char *ptr;
947 CURLUcode ifmissing = CURLUE_UNKNOWN_PART;
948 char portbuf[7];
949 bool urldecode = (flags & CURLU_URLDECODE)?1:0;
950 bool plusdecode = FALSE;
951 (void)flags;
952 if(!u)
953 return CURLUE_BAD_HANDLE;
954 if(!part)
955 return CURLUE_BAD_PARTPOINTER;
956 *part = NULL;
957
958 switch(what) {
959 case CURLUPART_SCHEME:
960 ptr = u->scheme;
961 ifmissing = CURLUE_NO_SCHEME;
962 urldecode = FALSE; /* never for schemes */
963 break;
964 case CURLUPART_USER:
965 ptr = u->user;
966 ifmissing = CURLUE_NO_USER;
967 break;
968 case CURLUPART_PASSWORD:
969 ptr = u->password;
970 ifmissing = CURLUE_NO_PASSWORD;
971 break;
972 case CURLUPART_OPTIONS:
973 ptr = u->options;
974 ifmissing = CURLUE_NO_OPTIONS;
975 break;
976 case CURLUPART_HOST:
977 ptr = u->host;
978 ifmissing = CURLUE_NO_HOST;
979 break;
980 case CURLUPART_PORT:
981 ptr = u->port;
982 ifmissing = CURLUE_NO_PORT;
983 urldecode = FALSE; /* never for port */
984 if(!ptr && (flags & CURLU_DEFAULT_PORT) && u->scheme) {
985 /* there's no stored port number, but asked to deliver
986 a default one for the scheme */
987 const struct Curl_handler *h =
988 Curl_builtin_scheme(u->scheme);
989 if(h) {
990 msnprintf(portbuf, sizeof(portbuf), "%ld", h->defport);
991 ptr = portbuf;
992 }
993 }
994 else if(ptr && u->scheme) {
995 /* there is a stored port number, but ask to inhibit if
996 it matches the default one for the scheme */
997 const struct Curl_handler *h =
998 Curl_builtin_scheme(u->scheme);
999 if(h && (h->defport == u->portnum) &&
1000 (flags & CURLU_NO_DEFAULT_PORT))
1001 ptr = NULL;
1002 }
1003 break;
1004 case CURLUPART_PATH:
1005 ptr = u->path;
1006 if(!ptr) {
1007 ptr = u->path = strdup("/");
1008 if(!u->path)
1009 return CURLUE_OUT_OF_MEMORY;
1010 }
1011 break;
1012 case CURLUPART_QUERY:
1013 ptr = u->query;
1014 ifmissing = CURLUE_NO_QUERY;
1015 plusdecode = urldecode;
1016 break;
1017 case CURLUPART_FRAGMENT:
1018 ptr = u->fragment;
1019 ifmissing = CURLUE_NO_FRAGMENT;
1020 break;
1021 case CURLUPART_URL: {
1022 char *url;
1023 char *scheme;
1024 char *options = u->options;
1025 char *port = u->port;
1026 if(u->scheme && strcasecompare("file", u->scheme)) {
1027 url = aprintf("file://%s%s%s",
1028 u->path,
1029 u->fragment? "#": "",
1030 u->fragment? u->fragment : "");
1031 }
1032 else if(!u->host)
1033 return CURLUE_NO_HOST;
1034 else {
1035 const struct Curl_handler *h = NULL;
1036 if(u->scheme)
1037 scheme = u->scheme;
1038 else if(flags & CURLU_DEFAULT_SCHEME)
1039 scheme = (char *) DEFAULT_SCHEME;
1040 else
1041 return CURLUE_NO_SCHEME;
1042
1043 if(scheme) {
1044 h = Curl_builtin_scheme(scheme);
1045 if(!port && (flags & CURLU_DEFAULT_PORT)) {
1046 /* there's no stored port number, but asked to deliver
1047 a default one for the scheme */
1048 if(h) {
1049 msnprintf(portbuf, sizeof(portbuf), "%ld", h->defport);
1050 port = portbuf;
1051 }
1052 }
1053 else if(port) {
1054 /* there is a stored port number, but asked to inhibit if it matches
1055 the default one for the scheme */
1056 if(h && (h->defport == u->portnum) &&
1057 (flags & CURLU_NO_DEFAULT_PORT))
1058 port = NULL;
1059 }
1060 }
1061 if(h && !(h->flags & PROTOPT_URLOPTIONS))
1062 options = NULL;
1063
1064 url = aprintf("%s://%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
1065 scheme,
1066 u->user ? u->user : "",
1067 u->password ? ":": "",
1068 u->password ? u->password : "",
1069 options ? ";" : "",
1070 options ? options : "",
1071 (u->user || u->password || options) ? "@": "",
1072 u->host,
1073 port ? ":": "",
1074 port ? port : "",
1075 (u->path && (u->path[0] != '/')) ? "/": "",
1076 u->path ? u->path : "/",
1077 (u->query && u->query[0]) ? "?": "",
1078 (u->query && u->query[0]) ? u->query : "",
1079 u->fragment? "#": "",
1080 u->fragment? u->fragment : "");
1081 }
1082 if(!url)
1083 return CURLUE_OUT_OF_MEMORY;
1084 *part = url;
1085 return CURLUE_OK;
1086 break;
1087 }
1088 default:
1089 ptr = NULL;
1090 }
1091 if(ptr) {
1092 *part = strdup(ptr);
1093 if(!*part)
1094 return CURLUE_OUT_OF_MEMORY;
1095 if(plusdecode) {
1096 /* convert + to space */
1097 char *plus;
1098 for(plus = *part; *plus; ++plus) {
1099 if(*plus == '+')
1100 *plus = ' ';
1101 }
1102 }
1103 if(urldecode) {
1104 char *decoded;
1105 size_t dlen;
1106 CURLcode res = Curl_urldecode(NULL, *part, 0, &decoded, &dlen, TRUE);
1107 free(*part);
1108 if(res) {
1109 *part = NULL;
1110 return CURLUE_URLDECODE;
1111 }
1112 *part = decoded;
1113 }
1114 return CURLUE_OK;
1115 }
1116 else
1117 return ifmissing;
1118}
1119
1120CURLUcode curl_url_set(CURLU *u, CURLUPart what,
1121 const char *part, unsigned int flags)
1122{
1123 char **storep = NULL;
1124 long port = 0;
1125 bool urlencode = (flags & CURLU_URLENCODE)? 1 : 0;
1126 bool plusencode = FALSE;
1127 bool urlskipslash = FALSE;
1128 bool appendquery = FALSE;
1129 bool equalsencode = FALSE;
1130
1131 if(!u)
1132 return CURLUE_BAD_HANDLE;
1133 if(!part) {
1134 /* setting a part to NULL clears it */
1135 switch(what) {
1136 case CURLUPART_URL:
1137 break;
1138 case CURLUPART_SCHEME:
1139 storep = &u->scheme;
1140 break;
1141 case CURLUPART_USER:
1142 storep = &u->user;
1143 break;
1144 case CURLUPART_PASSWORD:
1145 storep = &u->password;
1146 break;
1147 case CURLUPART_OPTIONS:
1148 storep = &u->options;
1149 break;
1150 case CURLUPART_HOST:
1151 storep = &u->host;
1152 break;
1153 case CURLUPART_PORT:
1154 storep = &u->port;
1155 break;
1156 case CURLUPART_PATH:
1157 storep = &u->path;
1158 break;
1159 case CURLUPART_QUERY:
1160 storep = &u->query;
1161 break;
1162 case CURLUPART_FRAGMENT:
1163 storep = &u->fragment;
1164 break;
1165 default:
1166 return CURLUE_UNKNOWN_PART;
1167 }
1168 if(storep && *storep) {
1169 free(*storep);
1170 *storep = NULL;
1171 }
1172 return CURLUE_OK;
1173 }
1174
1175 switch(what) {
1176 case CURLUPART_SCHEME:
1177 if(!(flags & CURLU_NON_SUPPORT_SCHEME) &&
1178 /* verify that it is a fine scheme */
1179 !Curl_builtin_scheme(part))
1180 return CURLUE_UNSUPPORTED_SCHEME;
1181 storep = &u->scheme;
1182 urlencode = FALSE; /* never */
1183 break;
1184 case CURLUPART_USER:
1185 storep = &u->user;
1186 break;
1187 case CURLUPART_PASSWORD:
1188 storep = &u->password;
1189 break;
1190 case CURLUPART_OPTIONS:
1191 storep = &u->options;
1192 break;
1193 case CURLUPART_HOST:
1194 storep = &u->host;
1195 break;
1196 case CURLUPART_PORT:
1197 urlencode = FALSE; /* never */
1198 port = strtol(part, NULL, 10); /* Port number must be decimal */
1199 if((port <= 0) || (port > 0xffff))
1200 return CURLUE_BAD_PORT_NUMBER;
1201 storep = &u->port;
1202 break;
1203 case CURLUPART_PATH:
1204 urlskipslash = TRUE;
1205 storep = &u->path;
1206 break;
1207 case CURLUPART_QUERY:
1208 plusencode = urlencode;
1209 appendquery = (flags & CURLU_APPENDQUERY)?1:0;
1210 equalsencode = appendquery;
1211 storep = &u->query;
1212 break;
1213 case CURLUPART_FRAGMENT:
1214 storep = &u->fragment;
1215 break;
1216 case CURLUPART_URL: {
1217 /*
1218 * Allow a new URL to replace the existing (if any) contents.
1219 *
1220 * If the existing contents is enough for a URL, allow a relative URL to
1221 * replace it.
1222 */
1223 CURLUcode result;
1224 char *oldurl;
1225 char *redired_url;
1226 CURLU *handle2;
1227
1228 if(Curl_is_absolute_url(part, NULL, MAX_SCHEME_LEN)) {
1229 handle2 = curl_url();
1230 if(!handle2)
1231 return CURLUE_OUT_OF_MEMORY;
1232 result = parseurl(part, handle2, flags);
1233 if(!result)
1234 mv_urlhandle(handle2, u);
1235 else
1236 curl_url_cleanup(handle2);
1237 return result;
1238 }
1239 /* extract the full "old" URL to do the redirect on */
1240 result = curl_url_get(u, CURLUPART_URL, &oldurl, flags);
1241 if(result) {
1242 /* couldn't get the old URL, just use the new! */
1243 handle2 = curl_url();
1244 if(!handle2)
1245 return CURLUE_OUT_OF_MEMORY;
1246 result = parseurl(part, handle2, flags);
1247 if(!result)
1248 mv_urlhandle(handle2, u);
1249 else
1250 curl_url_cleanup(handle2);
1251 return result;
1252 }
1253
1254 /* apply the relative part to create a new URL */
1255 redired_url = Curl_concat_url(oldurl, part);
1256 free(oldurl);
1257 if(!redired_url)
1258 return CURLUE_OUT_OF_MEMORY;
1259
1260 /* now parse the new URL */
1261 handle2 = curl_url();
1262 if(!handle2) {
1263 free(redired_url);
1264 return CURLUE_OUT_OF_MEMORY;
1265 }
1266 result = parseurl(redired_url, handle2, flags);
1267 free(redired_url);
1268 if(!result)
1269 mv_urlhandle(handle2, u);
1270 else
1271 curl_url_cleanup(handle2);
1272 return result;
1273 }
1274 default:
1275 return CURLUE_UNKNOWN_PART;
1276 }
1277 if(storep) {
1278 const char *newp = part;
1279 size_t nalloc = strlen(part);
1280
1281 if(urlencode) {
1282 const char *i;
1283 char *o;
1284 bool free_part = FALSE;
1285 char *enc = malloc(nalloc * 3 + 1); /* for worst case! */
1286 if(!enc)
1287 return CURLUE_OUT_OF_MEMORY;
1288 if(plusencode) {
1289 /* space to plus */
1290 i = part;
1291 for(o = enc; *i; ++o, ++i)
1292 *o = (*i == ' ') ? '+' : *i;
1293 *o = 0; /* zero terminate */
1294 part = strdup(enc);
1295 if(!part) {
1296 free(enc);
1297 return CURLUE_OUT_OF_MEMORY;
1298 }
1299 free_part = TRUE;
1300 }
1301 for(i = part, o = enc; *i; i++) {
1302 if(Curl_isunreserved(*i) ||
1303 ((*i == '/') && urlskipslash) ||
1304 ((*i == '=') && equalsencode) ||
1305 ((*i == '+') && plusencode)) {
1306 if((*i == '=') && equalsencode)
1307 /* only skip the first equals sign */
1308 equalsencode = FALSE;
1309 *o = *i;
1310 o++;
1311 }
1312 else {
1313 msnprintf(o, 4, "%%%02x", *i);
1314 o += 3;
1315 }
1316 }
1317 *o = 0; /* zero terminate */
1318 newp = enc;
1319 if(free_part)
1320 free((char *)part);
1321 }
1322 else {
1323 char *p;
1324 newp = strdup(part);
1325 if(!newp)
1326 return CURLUE_OUT_OF_MEMORY;
1327 p = (char *)newp;
1328 while(*p) {
1329 /* make sure percent encoded are lower case */
1330 if((*p == '%') && ISXDIGIT(p[1]) && ISXDIGIT(p[2]) &&
1331 (ISUPPER(p[1]) || ISUPPER(p[2]))) {
1332 p[1] = (char)TOLOWER(p[1]);
1333 p[2] = (char)TOLOWER(p[2]);
1334 p += 3;
1335 }
1336 else
1337 p++;
1338 }
1339 }
1340
1341 if(appendquery) {
1342 /* Append the string onto the old query. Add a '&' separator if none is
1343 present at the end of the exsting query already */
1344 size_t querylen = u->query ? strlen(u->query) : 0;
1345 bool addamperand = querylen && (u->query[querylen -1] != '&');
1346 if(querylen) {
1347 size_t newplen = strlen(newp);
1348 char *p = malloc(querylen + addamperand + newplen + 1);
1349 if(!p) {
1350 free((char *)newp);
1351 return CURLUE_OUT_OF_MEMORY;
1352 }
1353 strcpy(p, u->query); /* original query */
1354 if(addamperand)
1355 p[querylen] = '&'; /* ampersand */
1356 strcpy(&p[querylen + addamperand], newp); /* new suffix */
1357 free((char *)newp);
1358 free(*storep);
1359 *storep = p;
1360 return CURLUE_OK;
1361 }
1362 }
1363
1364 free(*storep);
1365 *storep = (char *)newp;
1366 }
1367 /* set after the string, to make it not assigned if the allocation above
1368 fails */
1369 if(port)
1370 u->portnum = port;
1371 return CURLUE_OK;
1372}
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette