VirtualBox

source: vbox/trunk/src/libs/curl-7.87.0/lib/urlapi.c@ 98326

Last change on this file since 98326 was 98326, checked in by vboxsync, 2 years ago

curl-7.87.0: Applied and adjusted our curl changes to 7.83.1. bugref:10356

  • Property svn:eol-style set to native
File size: 50.3 KB
Line 
1/***************************************************************************
2 * _ _ ____ _
3 * Project ___| | | | _ \| |
4 * / __| | | | |_) | |
5 * | (__| |_| | _ <| |___
6 * \___|\___/|_| \_\_____|
7 *
8 * Copyright (C) 1998 - 2022, Daniel Stenberg, <[email protected]>, et al.
9 *
10 * This software is licensed as described in the file COPYING, which
11 * you should have received as part of this distribution. The terms
12 * are also available at https://curl.se/docs/copyright.html.
13 *
14 * You may opt to use, copy, modify, merge, publish, distribute and/or sell
15 * copies of the Software, and permit persons to whom the Software is
16 * furnished to do so, under the terms of the COPYING file.
17 *
18 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
19 * KIND, either express or implied.
20 *
21 * SPDX-License-Identifier: curl
22 *
23 ***************************************************************************/
24
25#include "curl_setup.h"
26
27#include "urldata.h"
28#include "urlapi-int.h"
29#include "strcase.h"
30#include "url.h"
31#include "escape.h"
32#include "curl_ctype.h"
33#include "inet_pton.h"
34#include "inet_ntop.h"
35#include "strdup.h"
36
37/* The last 3 #include files should be in this order */
38#include "curl_printf.h"
39#include "curl_memory.h"
40#include "memdebug.h"
41
42 /* MSDOS/Windows style drive prefix, eg c: in c:foo */
43#define STARTS_WITH_DRIVE_PREFIX(str) \
44 ((('a' <= str[0] && str[0] <= 'z') || \
45 ('A' <= str[0] && str[0] <= 'Z')) && \
46 (str[1] == ':'))
47
48 /* MSDOS/Windows style drive prefix, optionally with
49 * a '|' instead of ':', followed by a slash or NUL */
50#define STARTS_WITH_URL_DRIVE_PREFIX(str) \
51 ((('a' <= (str)[0] && (str)[0] <= 'z') || \
52 ('A' <= (str)[0] && (str)[0] <= 'Z')) && \
53 ((str)[1] == ':' || (str)[1] == '|') && \
54 ((str)[2] == '/' || (str)[2] == '\\' || (str)[2] == 0))
55
56/* scheme is not URL encoded, the longest libcurl supported ones are... */
57#define MAX_SCHEME_LEN 40
58
59/* Internal representation of CURLU. Point to URL-encoded strings. */
60struct Curl_URL {
61 char *scheme;
62 char *user;
63 char *password;
64 char *options; /* IMAP only? */
65 char *host;
66 char *zoneid; /* for numerical IPv6 addresses */
67 char *port;
68 char *path;
69 char *query;
70 char *fragment;
71 long portnum; /* the numerical version */
72};
73
74#define DEFAULT_SCHEME "https"
75
76static void free_urlhandle(struct Curl_URL *u)
77{
78 free(u->scheme);
79 free(u->user);
80 free(u->password);
81 free(u->options);
82 free(u->host);
83 free(u->zoneid);
84 free(u->port);
85 free(u->path);
86 free(u->query);
87 free(u->fragment);
88}
89
90/*
91 * Find the separator at the end of the host name, or the '?' in cases like
92 * http://www.url.com?id=2380
93 */
94static const char *find_host_sep(const char *url)
95{
96 const char *sep;
97 const char *query;
98
99 /* Find the start of the hostname */
100 sep = strstr(url, "//");
101 if(!sep)
102 sep = url;
103 else
104 sep += 2;
105
106 query = strchr(sep, '?');
107 sep = strchr(sep, '/');
108
109 if(!sep)
110 sep = url + strlen(url);
111
112 if(!query)
113 query = url + strlen(url);
114
115 return sep < query ? sep : query;
116}
117
118/*
119 * Decide in an encoding-independent manner whether a character in a URL must
120 * be escaped. This is used in urlencode_str().
121 */
122static bool urlchar_needs_escaping(int c)
123{
124 return !(ISCNTRL(c) || ISSPACE(c) || ISGRAPH(c));
125}
126
127/* urlencode_str() writes data into an output dynbuf and URL-encodes the
128 * spaces in the source URL accordingly.
129 *
130 * URL encoding should be skipped for host names, otherwise IDN resolution
131 * will fail.
132 */
133static CURLUcode urlencode_str(struct dynbuf *o, const char *url,
134 size_t len, bool relative,
135 bool query)
136{
137 /* we must add this with whitespace-replacing */
138 bool left = !query;
139 const unsigned char *iptr;
140 const unsigned char *host_sep = (const unsigned char *) url;
141
142 if(!relative)
143 host_sep = (const unsigned char *) find_host_sep(url);
144
145 for(iptr = (unsigned char *)url; /* read from here */
146 len; iptr++, len--) {
147
148 if(iptr < host_sep) {
149 if(Curl_dyn_addn(o, iptr, 1))
150 return CURLUE_OUT_OF_MEMORY;
151 continue;
152 }
153
154 if(*iptr == ' ') {
155 if(left) {
156 if(Curl_dyn_addn(o, "%20", 3))
157 return CURLUE_OUT_OF_MEMORY;
158 }
159 else {
160 if(Curl_dyn_addn(o, "+", 1))
161 return CURLUE_OUT_OF_MEMORY;
162 }
163 continue;
164 }
165
166 if(*iptr == '?')
167 left = FALSE;
168
169 if(urlchar_needs_escaping(*iptr)) {
170 if(Curl_dyn_addf(o, "%%%02x", *iptr))
171 return CURLUE_OUT_OF_MEMORY;
172 }
173 else {
174 if(Curl_dyn_addn(o, iptr, 1))
175 return CURLUE_OUT_OF_MEMORY;
176 }
177 }
178
179 return CURLUE_OK;
180}
181
182/*
183 * Returns the length of the scheme if the given URL is absolute (as opposed
184 * to relative). Stores the scheme in the buffer if TRUE and 'buf' is
185 * non-NULL. The buflen must be larger than MAX_SCHEME_LEN if buf is set.
186 *
187 * If 'guess_scheme' is TRUE, it means the URL might be provided without
188 * scheme.
189 */
190size_t Curl_is_absolute_url(const char *url, char *buf, size_t buflen,
191 bool guess_scheme)
192{
193 int i;
194 DEBUGASSERT(!buf || (buflen > MAX_SCHEME_LEN));
195 (void)buflen; /* only used in debug-builds */
196 if(buf)
197 buf[0] = 0; /* always leave a defined value in buf */
198#ifdef WIN32
199 if(guess_scheme && STARTS_WITH_DRIVE_PREFIX(url))
200 return 0;
201#endif
202 for(i = 0; i < MAX_SCHEME_LEN; ++i) {
203 char s = url[i];
204 if(s && (ISALNUM(s) || (s == '+') || (s == '-') || (s == '.') )) {
205 /* RFC 3986 3.1 explains:
206 scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
207 */
208 }
209 else {
210 break;
211 }
212 }
213 if(i && (url[i] == ':') && ((url[i + 1] == '/') || !guess_scheme)) {
214 /* If this does not guess scheme, the scheme always ends with the colon so
215 that this also detects data: URLs etc. In guessing mode, data: could
216 be the host name "data" with a specified port number. */
217
218 /* the length of the scheme is the name part only */
219 size_t len = i;
220 if(buf) {
221 buf[i] = 0;
222 while(i--) {
223 buf[i] = Curl_raw_tolower(url[i]);
224 }
225 }
226 return len;
227 }
228 return 0;
229}
230
231/*
232 * Concatenate a relative URL to a base URL making it absolute.
233 * URL-encodes any spaces.
234 * The returned pointer must be freed by the caller unless NULL
235 * (returns NULL on out of memory).
236 *
237 * Note that this function destroys the 'base' string.
238 */
239static char *concat_url(char *base, const char *relurl)
240{
241 /***
242 TRY to append this new path to the old URL
243 to the right of the host part. Oh crap, this is doomed to cause
244 problems in the future...
245 */
246 struct dynbuf newest;
247 char *protsep;
248 char *pathsep;
249 bool host_changed = FALSE;
250 const char *useurl = relurl;
251
252 /* protsep points to the start of the host name */
253 protsep = strstr(base, "//");
254 if(!protsep)
255 protsep = base;
256 else
257 protsep += 2; /* pass the slashes */
258
259 if('/' != relurl[0]) {
260 int level = 0;
261
262 /* First we need to find out if there's a ?-letter in the URL,
263 and cut it and the right-side of that off */
264 pathsep = strchr(protsep, '?');
265 if(pathsep)
266 *pathsep = 0;
267
268 /* we have a relative path to append to the last slash if there's one
269 available, or if the new URL is just a query string (starts with a
270 '?') we append the new one at the end of the entire currently worked
271 out URL */
272 if(useurl[0] != '?') {
273 pathsep = strrchr(protsep, '/');
274 if(pathsep)
275 *pathsep = 0;
276 }
277
278 /* Check if there's any slash after the host name, and if so, remember
279 that position instead */
280 pathsep = strchr(protsep, '/');
281 if(pathsep)
282 protsep = pathsep + 1;
283 else
284 protsep = NULL;
285
286 /* now deal with one "./" or any amount of "../" in the newurl
287 and act accordingly */
288
289 if((useurl[0] == '.') && (useurl[1] == '/'))
290 useurl += 2; /* just skip the "./" */
291
292 while((useurl[0] == '.') &&
293 (useurl[1] == '.') &&
294 (useurl[2] == '/')) {
295 level++;
296 useurl += 3; /* pass the "../" */
297 }
298
299 if(protsep) {
300 while(level--) {
301 /* cut off one more level from the right of the original URL */
302 pathsep = strrchr(protsep, '/');
303 if(pathsep)
304 *pathsep = 0;
305 else {
306 *protsep = 0;
307 break;
308 }
309 }
310 }
311 }
312 else {
313 /* We got a new absolute path for this server */
314
315 if(relurl[1] == '/') {
316 /* the new URL starts with //, just keep the protocol part from the
317 original one */
318 *protsep = 0;
319 useurl = &relurl[2]; /* we keep the slashes from the original, so we
320 skip the new ones */
321 host_changed = TRUE;
322 }
323 else {
324 /* cut off the original URL from the first slash, or deal with URLs
325 without slash */
326 pathsep = strchr(protsep, '/');
327 if(pathsep) {
328 /* When people use badly formatted URLs, such as
329 "http://www.url.com?dir=/home/daniel" we must not use the first
330 slash, if there's a ?-letter before it! */
331 char *sep = strchr(protsep, '?');
332 if(sep && (sep < pathsep))
333 pathsep = sep;
334 *pathsep = 0;
335 }
336 else {
337 /* There was no slash. Now, since we might be operating on a badly
338 formatted URL, such as "http://www.url.com?id=2380" which doesn't
339 use a slash separator as it is supposed to, we need to check for a
340 ?-letter as well! */
341 pathsep = strchr(protsep, '?');
342 if(pathsep)
343 *pathsep = 0;
344 }
345 }
346 }
347
348 Curl_dyn_init(&newest, CURL_MAX_INPUT_LENGTH);
349
350 /* copy over the root url part */
351 if(Curl_dyn_add(&newest, base))
352 return NULL;
353
354 /* check if we need to append a slash */
355 if(('/' == useurl[0]) || (protsep && !*protsep) || ('?' == useurl[0]))
356 ;
357 else {
358 if(Curl_dyn_addn(&newest, "/", 1))
359 return NULL;
360 }
361
362 /* then append the new piece on the right side */
363 urlencode_str(&newest, useurl, strlen(useurl), !host_changed, FALSE);
364
365 return Curl_dyn_ptr(&newest);
366}
367
368/* scan for byte values < 31 or 127 */
369static bool junkscan(const char *part, unsigned int flags)
370{
371 if(part) {
372 static const char badbytes[]={
373 /* */ 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
374 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
375 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
376 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
377 0x7f, 0x00 /* null-terminate */
378 };
379 size_t n = strlen(part);
380 size_t nfine = strcspn(part, badbytes);
381 if(nfine != n)
382 /* since we don't know which part is scanned, return a generic error
383 code */
384 return TRUE;
385 if(!(flags & CURLU_ALLOW_SPACE) && strchr(part, ' '))
386 return TRUE;
387 }
388 return FALSE;
389}
390
391/*
392 * parse_hostname_login()
393 *
394 * Parse the login details (user name, password and options) from the URL and
395 * strip them out of the host name
396 *
397 */
398static CURLUcode parse_hostname_login(struct Curl_URL *u,
399 struct dynbuf *host,
400 unsigned int flags)
401{
402 CURLUcode result = CURLUE_OK;
403 CURLcode ccode;
404 char *userp = NULL;
405 char *passwdp = NULL;
406 char *optionsp = NULL;
407 const struct Curl_handler *h = NULL;
408
409 /* At this point, we assume all the other special cases have been taken
410 * care of, so the host is at most
411 *
412 * [user[:password][;options]]@]hostname
413 *
414 * We need somewhere to put the embedded details, so do that first.
415 */
416
417 char *login = Curl_dyn_ptr(host);
418 char *ptr;
419
420 DEBUGASSERT(login);
421
422 ptr = strchr(login, '@');
423 if(!ptr)
424 goto out;
425
426 /* We will now try to extract the
427 * possible login information in a string like:
428 * ftp://user:[email protected]:8021/README */
429 ptr++;
430
431 /* if this is a known scheme, get some details */
432 if(u->scheme)
433 h = Curl_builtin_scheme(u->scheme, CURL_ZERO_TERMINATED);
434
435 /* We could use the login information in the URL so extract it. Only parse
436 options if the handler says we should. Note that 'h' might be NULL! */
437 ccode = Curl_parse_login_details(login, ptr - login - 1,
438 &userp, &passwdp,
439 (h && (h->flags & PROTOPT_URLOPTIONS)) ?
440 &optionsp:NULL);
441 if(ccode) {
442 result = CURLUE_BAD_LOGIN;
443 goto out;
444 }
445
446 if(userp) {
447 if(flags & CURLU_DISALLOW_USER) {
448 /* Option DISALLOW_USER is set and url contains username. */
449 result = CURLUE_USER_NOT_ALLOWED;
450 goto out;
451 }
452 if(junkscan(userp, flags)) {
453 result = CURLUE_BAD_USER;
454 goto out;
455 }
456 u->user = userp;
457 }
458
459 if(passwdp) {
460 if(junkscan(passwdp, flags)) {
461 result = CURLUE_BAD_PASSWORD;
462 goto out;
463 }
464 u->password = passwdp;
465 }
466
467 if(optionsp) {
468 if(junkscan(optionsp, flags)) {
469 result = CURLUE_BAD_LOGIN;
470 goto out;
471 }
472 u->options = optionsp;
473 }
474
475 /* move the name to the start of the host buffer */
476 if(Curl_dyn_tail(host, strlen(ptr)))
477 return CURLUE_OUT_OF_MEMORY;
478
479 return CURLUE_OK;
480 out:
481
482 free(userp);
483 free(passwdp);
484 free(optionsp);
485 u->user = NULL;
486 u->password = NULL;
487 u->options = NULL;
488
489 return result;
490}
491
492UNITTEST CURLUcode Curl_parse_port(struct Curl_URL *u, struct dynbuf *host,
493 bool has_scheme)
494{
495 char *portptr = NULL;
496 char endbracket;
497 int len;
498 char *hostname = Curl_dyn_ptr(host);
499 /*
500 * Find the end of an IPv6 address, either on the ']' ending bracket or
501 * a percent-encoded zone index.
502 */
503 if(1 == sscanf(hostname, "[%*45[0123456789abcdefABCDEF:.]%c%n",
504 &endbracket, &len)) {
505 if(']' == endbracket)
506 portptr = &hostname[len];
507 else if('%' == endbracket) {
508 int zonelen = len;
509 if(1 == sscanf(hostname + zonelen, "%*[^]]%c%n", &endbracket, &len)) {
510 if(']' != endbracket)
511 return CURLUE_BAD_IPV6;
512 portptr = &hostname[--zonelen + len + 1];
513 }
514 else
515 return CURLUE_BAD_IPV6;
516 }
517 else
518 return CURLUE_BAD_IPV6;
519
520 /* this is a RFC2732-style specified IP-address */
521 if(portptr && *portptr) {
522 if(*portptr != ':')
523 return CURLUE_BAD_IPV6;
524 }
525 else
526 portptr = NULL;
527 }
528 else
529 portptr = strchr(hostname, ':');
530
531 if(portptr) {
532 char *rest;
533 long port;
534 char portbuf[7];
535 size_t keep = portptr - hostname;
536
537 /* Browser behavior adaptation. If there's a colon with no digits after,
538 just cut off the name there which makes us ignore the colon and just
539 use the default port. Firefox, Chrome and Safari all do that.
540
541 Don't do it if the URL has no scheme, to make something that looks like
542 a scheme not work!
543 */
544 Curl_dyn_setlen(host, keep);
545 portptr++;
546 if(!*portptr)
547 return has_scheme ? CURLUE_OK : CURLUE_BAD_PORT_NUMBER;
548
549 if(!ISDIGIT(*portptr))
550 return CURLUE_BAD_PORT_NUMBER;
551
552 port = strtol(portptr, &rest, 10); /* Port number must be decimal */
553
554 if(port > 0xffff)
555 return CURLUE_BAD_PORT_NUMBER;
556
557 if(rest[0])
558 return CURLUE_BAD_PORT_NUMBER;
559
560 *rest = 0;
561 /* generate a new port number string to get rid of leading zeroes etc */
562 msnprintf(portbuf, sizeof(portbuf), "%ld", port);
563 u->portnum = port;
564 u->port = strdup(portbuf);
565 if(!u->port)
566 return CURLUE_OUT_OF_MEMORY;
567 }
568
569 return CURLUE_OK;
570}
571
572static CURLUcode hostname_check(struct Curl_URL *u, char *hostname,
573 size_t hlen) /* length of hostname */
574{
575 size_t len;
576 DEBUGASSERT(hostname);
577
578 if(!hostname[0])
579 return CURLUE_NO_HOST;
580 else if(hostname[0] == '[') {
581 const char *l = "0123456789abcdefABCDEF:.";
582 if(hlen < 4) /* '[::]' is the shortest possible valid string */
583 return CURLUE_BAD_IPV6;
584 hostname++;
585 hlen -= 2;
586
587 if(hostname[hlen] != ']')
588 return CURLUE_BAD_IPV6;
589
590 /* only valid letters are ok */
591 len = strspn(hostname, l);
592 if(hlen != len) {
593 hlen = len;
594 if(hostname[len] == '%') {
595 /* this could now be '%[zone id]' */
596 char zoneid[16];
597 int i = 0;
598 char *h = &hostname[len + 1];
599 /* pass '25' if present and is a url encoded percent sign */
600 if(!strncmp(h, "25", 2) && h[2] && (h[2] != ']'))
601 h += 2;
602 while(*h && (*h != ']') && (i < 15))
603 zoneid[i++] = *h++;
604 if(!i || (']' != *h))
605 /* impossible to reach? */
606 return CURLUE_MALFORMED_INPUT;
607 zoneid[i] = 0;
608 u->zoneid = strdup(zoneid);
609 if(!u->zoneid)
610 return CURLUE_OUT_OF_MEMORY;
611 hostname[len] = ']'; /* insert end bracket */
612 hostname[len + 1] = 0; /* terminate the hostname */
613 }
614 else
615 return CURLUE_BAD_IPV6;
616 /* hostname is fine */
617 }
618#ifdef ENABLE_IPV6
619 {
620 char dest[16]; /* fits a binary IPv6 address */
621 char norm[MAX_IPADR_LEN];
622 hostname[hlen] = 0; /* end the address there */
623 if(1 != Curl_inet_pton(AF_INET6, hostname, dest))
624 return CURLUE_BAD_IPV6;
625
626 /* check if it can be done shorter */
627 if(Curl_inet_ntop(AF_INET6, dest, norm, sizeof(norm)) &&
628 (strlen(norm) < hlen)) {
629 strcpy(hostname, norm);
630 hlen = strlen(norm);
631 hostname[hlen + 1] = 0;
632 }
633 hostname[hlen] = ']'; /* restore ending bracket */
634 }
635#endif
636 }
637 else {
638 /* letters from the second string are not ok */
639 len = strcspn(hostname, " \r\n\t/:#?!@{}[]\\$\'\"^`*<>=;,+&()");
640 if(hlen != len)
641 /* hostname with bad content */
642 return CURLUE_BAD_HOSTNAME;
643 }
644 return CURLUE_OK;
645}
646
647#define HOSTNAME_END(x) (((x) == '/') || ((x) == '?') || ((x) == '#'))
648
649/*
650 * Handle partial IPv4 numerical addresses and different bases, like
651 * '16843009', '0x7f', '0x7f.1' '0177.1.1.1' etc.
652 *
653 * If the given input string is syntactically wrong or any part for example is
654 * too big, this function returns FALSE and doesn't create any output.
655 *
656 * Output the "normalized" version of that input string in plain quad decimal
657 * integers and return TRUE.
658 */
659static bool ipv4_normalize(const char *hostname, char *outp, size_t olen)
660{
661 bool done = FALSE;
662 int n = 0;
663 const char *c = hostname;
664 unsigned long parts[4] = {0, 0, 0, 0};
665
666 while(!done) {
667 char *endp;
668 unsigned long l;
669 if((*c < '0') || (*c > '9'))
670 /* most importantly this doesn't allow a leading plus or minus */
671 return FALSE;
672 l = strtoul(c, &endp, 0);
673
674 /* overflow or nothing parsed at all */
675 if(((l == ULONG_MAX) && (errno == ERANGE)) || (endp == c))
676 return FALSE;
677
678#if SIZEOF_LONG > 4
679 /* a value larger than 32 bits */
680 if(l > UINT_MAX)
681 return FALSE;
682#endif
683
684 parts[n] = l;
685 c = endp;
686
687 switch (*c) {
688 case '.' :
689 if(n == 3)
690 return FALSE;
691 n++;
692 c++;
693 break;
694
695 case '\0':
696 done = TRUE;
697 break;
698
699 default:
700 return FALSE;
701 }
702 }
703
704 /* this is deemed a valid IPv4 numerical address */
705
706 switch(n) {
707 case 0: /* a -- 32 bits */
708 msnprintf(outp, olen, "%u.%u.%u.%u",
709 parts[0] >> 24, (parts[0] >> 16) & 0xff,
710 (parts[0] >> 8) & 0xff, parts[0] & 0xff);
711 break;
712 case 1: /* a.b -- 8.24 bits */
713 if((parts[0] > 0xff) || (parts[1] > 0xffffff))
714 return FALSE;
715 msnprintf(outp, olen, "%u.%u.%u.%u",
716 parts[0], (parts[1] >> 16) & 0xff,
717 (parts[1] >> 8) & 0xff, parts[1] & 0xff);
718 break;
719 case 2: /* a.b.c -- 8.8.16 bits */
720 if((parts[0] > 0xff) || (parts[1] > 0xff) || (parts[2] > 0xffff))
721 return FALSE;
722 msnprintf(outp, olen, "%u.%u.%u.%u",
723 parts[0], parts[1], (parts[2] >> 8) & 0xff,
724 parts[2] & 0xff);
725 break;
726 case 3: /* a.b.c.d -- 8.8.8.8 bits */
727 if((parts[0] > 0xff) || (parts[1] > 0xff) || (parts[2] > 0xff) ||
728 (parts[3] > 0xff))
729 return FALSE;
730 msnprintf(outp, olen, "%u.%u.%u.%u",
731 parts[0], parts[1], parts[2], parts[3]);
732 break;
733 }
734 return TRUE;
735}
736
737/* if necessary, replace the host content with a URL decoded version */
738static CURLUcode decode_host(struct dynbuf *host)
739{
740 char *per = NULL;
741 const char *hostname = Curl_dyn_ptr(host);
742 if(hostname[0] == '[')
743 /* only decode if not an ipv6 numerical */
744 return CURLUE_OK;
745 per = strchr(hostname, '%');
746 if(!per)
747 /* nothing to decode */
748 return CURLUE_OK;
749 else {
750 /* encoded */
751 size_t dlen;
752 char *decoded;
753 CURLcode result = Curl_urldecode(hostname, 0, &decoded, &dlen,
754 REJECT_CTRL);
755 if(result)
756 return CURLUE_BAD_HOSTNAME;
757 Curl_dyn_reset(host);
758 result = Curl_dyn_addn(host, decoded, dlen);
759 free(decoded);
760 if(result)
761 return CURLUE_OUT_OF_MEMORY;
762 }
763
764 return CURLUE_OK;
765}
766
767/*
768 * "Remove Dot Segments"
769 * https://datatracker.ietf.org/doc/html/rfc3986#section-5.2.4
770 */
771
772/*
773 * dedotdotify()
774 * @unittest: 1395
775 *
776 * This function gets a null-terminated path with dot and dotdot sequences
777 * passed in and strips them off according to the rules in RFC 3986 section
778 * 5.2.4.
779 *
780 * The function handles a query part ('?' + stuff) appended but it expects
781 * that fragments ('#' + stuff) have already been cut off.
782 *
783 * RETURNS
784 *
785 * an allocated dedotdotified output string
786 */
787UNITTEST char *dedotdotify(const char *input, size_t clen);
788UNITTEST char *dedotdotify(const char *input, size_t clen)
789{
790 char *out = malloc(clen + 1);
791 char *outptr;
792 const char *orginput = input;
793 char *queryp;
794 if(!out)
795 return NULL; /* out of memory */
796
797 *out = 0; /* null-terminates, for inputs like "./" */
798 outptr = out;
799
800 if(!*input)
801 /* zero length input string, return that */
802 return out;
803
804 /*
805 * To handle query-parts properly, we must find it and remove it during the
806 * dotdot-operation and then append it again at the end to the output
807 * string.
808 */
809 queryp = strchr(input, '?');
810
811 do {
812 bool dotdot = TRUE;
813 if(*input == '.') {
814 /* A. If the input buffer begins with a prefix of "../" or "./", then
815 remove that prefix from the input buffer; otherwise, */
816
817 if(!strncmp("./", input, 2)) {
818 input += 2;
819 clen -= 2;
820 }
821 else if(!strncmp("../", input, 3)) {
822 input += 3;
823 clen -= 3;
824 }
825 /* D. if the input buffer consists only of "." or "..", then remove
826 that from the input buffer; otherwise, */
827
828 else if(!strcmp(".", input) || !strcmp("..", input) ||
829 !strncmp(".?", input, 2) || !strncmp("..?", input, 3)) {
830 *out = 0;
831 break;
832 }
833 else
834 dotdot = FALSE;
835 }
836 else if(*input == '/') {
837 /* B. if the input buffer begins with a prefix of "/./" or "/.", where
838 "." is a complete path segment, then replace that prefix with "/" in
839 the input buffer; otherwise, */
840 if(!strncmp("/./", input, 3)) {
841 input += 2;
842 clen -= 2;
843 }
844 else if(!strcmp("/.", input) || !strncmp("/.?", input, 3)) {
845 *outptr++ = '/';
846 *outptr = 0;
847 break;
848 }
849
850 /* C. if the input buffer begins with a prefix of "/../" or "/..",
851 where ".." is a complete path segment, then replace that prefix with
852 "/" in the input buffer and remove the last segment and its
853 preceding "/" (if any) from the output buffer; otherwise, */
854
855 else if(!strncmp("/../", input, 4)) {
856 input += 3;
857 clen -= 3;
858 /* remove the last segment from the output buffer */
859 while(outptr > out) {
860 outptr--;
861 if(*outptr == '/')
862 break;
863 }
864 *outptr = 0; /* null-terminate where it stops */
865 }
866 else if(!strcmp("/..", input) || !strncmp("/..?", input, 4)) {
867 /* remove the last segment from the output buffer */
868 while(outptr > out) {
869 outptr--;
870 if(*outptr == '/')
871 break;
872 }
873 *outptr++ = '/';
874 *outptr = 0; /* null-terminate where it stops */
875 break;
876 }
877 else
878 dotdot = FALSE;
879 }
880 else
881 dotdot = FALSE;
882
883 if(!dotdot) {
884 /* E. move the first path segment in the input buffer to the end of
885 the output buffer, including the initial "/" character (if any) and
886 any subsequent characters up to, but not including, the next "/"
887 character or the end of the input buffer. */
888
889 do {
890 *outptr++ = *input++;
891 clen--;
892 } while(*input && (*input != '/') && (*input != '?'));
893 *outptr = 0;
894 }
895
896 /* continue until end of input string OR, if there is a terminating
897 query part, stop there */
898 } while(*input && (!queryp || (input < queryp)));
899
900 if(queryp) {
901 size_t qlen;
902 /* There was a query part, append that to the output. */
903 size_t oindex = queryp - orginput;
904 qlen = strlen(&orginput[oindex]);
905 memcpy(outptr, &orginput[oindex], qlen + 1); /* include zero byte */
906 }
907
908 return out;
909}
910
911static CURLUcode parseurl(const char *url, CURLU *u, unsigned int flags)
912{
913 const char *path;
914 size_t pathlen;
915 bool uncpath = FALSE;
916 char *query = NULL;
917 char *fragment = NULL;
918 char schemebuf[MAX_SCHEME_LEN + 1];
919 const char *schemep = NULL;
920 size_t schemelen = 0;
921 size_t urllen;
922 CURLUcode result = CURLUE_OK;
923 size_t fraglen = 0;
924 struct dynbuf host;
925
926 DEBUGASSERT(url);
927
928 Curl_dyn_init(&host, CURL_MAX_INPUT_LENGTH);
929
930 /*************************************************************
931 * Parse the URL.
932 ************************************************************/
933 /* allocate scratch area */
934 urllen = strlen(url);
935 if(urllen > CURL_MAX_INPUT_LENGTH) {
936 /* excessive input length */
937 result = CURLUE_MALFORMED_INPUT;
938 goto fail;
939 }
940
941 schemelen = Curl_is_absolute_url(url, schemebuf, sizeof(schemebuf),
942 flags & (CURLU_GUESS_SCHEME|
943 CURLU_DEFAULT_SCHEME));
944
945 /* handle the file: scheme */
946 if(schemelen && !strcmp(schemebuf, "file")) {
947 if(urllen <= 6) {
948 /* file:/ is not enough to actually be a complete file: URL */
949 result = CURLUE_BAD_FILE_URL;
950 goto fail;
951 }
952
953 /* path has been allocated large enough to hold this */
954 path = (char *)&url[5];
955
956 schemep = u->scheme = strdup("file");
957 if(!u->scheme) {
958 result = CURLUE_OUT_OF_MEMORY;
959 goto fail;
960 }
961
962 /* Extra handling URLs with an authority component (i.e. that start with
963 * "file://")
964 *
965 * We allow omitted hostname (e.g. file:/<path>) -- valid according to
966 * RFC 8089, but not the (current) WHAT-WG URL spec.
967 */
968 if(path[0] == '/' && path[1] == '/') {
969 /* swallow the two slashes */
970 const char *ptr = &path[2];
971
972 /*
973 * According to RFC 8089, a file: URL can be reliably dereferenced if:
974 *
975 * o it has no/blank hostname, or
976 *
977 * o the hostname matches "localhost" (case-insensitively), or
978 *
979 * o the hostname is a FQDN that resolves to this machine, or
980 *
981 * o it is an UNC String transformed to an URI (Windows only, RFC 8089
982 * Appendix E.3).
983 *
984 * For brevity, we only consider URLs with empty, "localhost", or
985 * "127.0.0.1" hostnames as local, otherwise as an UNC String.
986 *
987 * Additionally, there is an exception for URLs with a Windows drive
988 * letter in the authority (which was accidentally omitted from RFC 8089
989 * Appendix E, but believe me, it was meant to be there. --MK)
990 */
991 if(ptr[0] != '/' && !STARTS_WITH_URL_DRIVE_PREFIX(ptr)) {
992 /* the URL includes a host name, it must match "localhost" or
993 "127.0.0.1" to be valid */
994 if(checkprefix("localhost/", ptr) ||
995 checkprefix("127.0.0.1/", ptr)) {
996 ptr += 9; /* now points to the slash after the host */
997 }
998 else {
999#if defined(WIN32)
1000 size_t len;
1001
1002 /* the host name, NetBIOS computer name, can not contain disallowed
1003 chars, and the delimiting slash character must be appended to the
1004 host name */
1005 path = strpbrk(ptr, "/\\:*?\"<>|");
1006 if(!path || *path != '/') {
1007 result = CURLUE_BAD_FILE_URL;
1008 goto fail;
1009 }
1010
1011 len = path - ptr;
1012 if(len) {
1013 if(Curl_dyn_addn(&host, ptr, len)) {
1014 result = CURLUE_OUT_OF_MEMORY;
1015 goto fail;
1016 }
1017 uncpath = TRUE;
1018 }
1019
1020 ptr -= 2; /* now points to the // before the host in UNC */
1021#else
1022 /* Invalid file://hostname/, expected localhost or 127.0.0.1 or
1023 none */
1024 result = CURLUE_BAD_FILE_URL;
1025 goto fail;
1026#endif
1027 }
1028 }
1029
1030 path = ptr;
1031 }
1032
1033 if(!uncpath)
1034 /* no host for file: URLs by default */
1035 Curl_dyn_reset(&host);
1036
1037#if !defined(MSDOS) && !defined(WIN32) && !defined(__CYGWIN__)
1038 /* Don't allow Windows drive letters when not in Windows.
1039 * This catches both "file:/c:" and "file:c:" */
1040 if(('/' == path[0] && STARTS_WITH_URL_DRIVE_PREFIX(&path[1])) ||
1041 STARTS_WITH_URL_DRIVE_PREFIX(path)) {
1042 /* File drive letters are only accepted in MSDOS/Windows */
1043 result = CURLUE_BAD_FILE_URL;
1044 goto fail;
1045 }
1046#else
1047 /* If the path starts with a slash and a drive letter, ditch the slash */
1048 if('/' == path[0] && STARTS_WITH_URL_DRIVE_PREFIX(&path[1])) {
1049 /* This cannot be done with strcpy, as the memory chunks overlap! */
1050 path++;
1051 }
1052#endif
1053
1054 }
1055 else {
1056 /* clear path */
1057 const char *p;
1058 const char *hostp;
1059 size_t len;
1060
1061 if(schemelen) {
1062 int i = 0;
1063 p = &url[schemelen + 1];
1064 while(p && (*p == '/') && (i < 4)) {
1065 p++;
1066 i++;
1067 }
1068
1069 schemep = schemebuf;
1070 if(!Curl_builtin_scheme(schemep, CURL_ZERO_TERMINATED) &&
1071 !(flags & CURLU_NON_SUPPORT_SCHEME)) {
1072 result = CURLUE_UNSUPPORTED_SCHEME;
1073 goto fail;
1074 }
1075
1076 if((i < 1) || (i>3)) {
1077 /* less than one or more than three slashes */
1078 result = CURLUE_BAD_SLASHES;
1079 goto fail;
1080 }
1081 if(junkscan(schemep, flags)) {
1082 result = CURLUE_BAD_SCHEME;
1083 goto fail;
1084 }
1085 }
1086 else {
1087 /* no scheme! */
1088
1089 if(!(flags & (CURLU_DEFAULT_SCHEME|CURLU_GUESS_SCHEME))) {
1090 result = CURLUE_BAD_SCHEME;
1091 goto fail;
1092 }
1093 if(flags & CURLU_DEFAULT_SCHEME)
1094 schemep = DEFAULT_SCHEME;
1095
1096 /*
1097 * The URL was badly formatted, let's try without scheme specified.
1098 */
1099 p = url;
1100 }
1101 hostp = p; /* host name starts here */
1102
1103 /* find the end of the host name + port number */
1104 while(*p && !HOSTNAME_END(*p))
1105 p++;
1106
1107 len = p - hostp;
1108 if(len) {
1109 if(Curl_dyn_addn(&host, hostp, len)) {
1110 result = CURLUE_OUT_OF_MEMORY;
1111 goto fail;
1112 }
1113 }
1114 else {
1115 if(!(flags & CURLU_NO_AUTHORITY)) {
1116 result = CURLUE_NO_HOST;
1117 goto fail;
1118 }
1119 }
1120
1121 path = (char *)p;
1122
1123 if(schemep) {
1124 u->scheme = strdup(schemep);
1125 if(!u->scheme) {
1126 result = CURLUE_OUT_OF_MEMORY;
1127 goto fail;
1128 }
1129 }
1130 }
1131
1132 fragment = strchr(path, '#');
1133 if(fragment) {
1134 fraglen = strlen(fragment);
1135 if(fraglen > 1) {
1136 /* skip the leading '#' in the copy but include the terminating null */
1137 u->fragment = Curl_memdup(fragment + 1, fraglen);
1138 if(!u->fragment) {
1139 result = CURLUE_OUT_OF_MEMORY;
1140 goto fail;
1141 }
1142
1143 if(junkscan(u->fragment, flags)) {
1144 result = CURLUE_BAD_FRAGMENT;
1145 goto fail;
1146 }
1147 }
1148 }
1149
1150 query = strchr(path, '?');
1151 if(query && (!fragment || (query < fragment))) {
1152 size_t qlen = strlen(query) - fraglen; /* includes '?' */
1153 pathlen = strlen(path) - qlen - fraglen;
1154 if(qlen > 1) {
1155 if(qlen && (flags & CURLU_URLENCODE)) {
1156 struct dynbuf enc;
1157 Curl_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
1158 /* skip the leading question mark */
1159 if(urlencode_str(&enc, query + 1, qlen - 1, TRUE, TRUE)) {
1160 result = CURLUE_OUT_OF_MEMORY;
1161 goto fail;
1162 }
1163 u->query = Curl_dyn_ptr(&enc);
1164 }
1165 else {
1166 u->query = Curl_memdup(query + 1, qlen);
1167 if(!u->query) {
1168 result = CURLUE_OUT_OF_MEMORY;
1169 goto fail;
1170 }
1171 u->query[qlen - 1] = 0;
1172 }
1173
1174 if(junkscan(u->query, flags)) {
1175 result = CURLUE_BAD_QUERY;
1176 goto fail;
1177 }
1178 }
1179 else {
1180 /* single byte query */
1181 u->query = strdup("");
1182 if(!u->query) {
1183 result = CURLUE_OUT_OF_MEMORY;
1184 goto fail;
1185 }
1186 }
1187 }
1188 else
1189 pathlen = strlen(path) - fraglen;
1190
1191 if(pathlen && (flags & CURLU_URLENCODE)) {
1192 struct dynbuf enc;
1193 Curl_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
1194 if(urlencode_str(&enc, path, pathlen, TRUE, FALSE)) {
1195 result = CURLUE_OUT_OF_MEMORY;
1196 goto fail;
1197 }
1198 pathlen = Curl_dyn_len(&enc);
1199 path = u->path = Curl_dyn_ptr(&enc);
1200 }
1201
1202 if(!pathlen) {
1203 /* there is no path left, unset */
1204 path = NULL;
1205 }
1206 else {
1207 if(!u->path) {
1208 u->path = Curl_memdup(path, pathlen + 1);
1209 if(!u->path) {
1210 result = CURLUE_OUT_OF_MEMORY;
1211 goto fail;
1212 }
1213 u->path[pathlen] = 0;
1214 path = u->path;
1215 }
1216 else if(flags & CURLU_URLENCODE)
1217 /* it might have encoded more than just the path so cut it */
1218 u->path[pathlen] = 0;
1219
1220 if(junkscan(u->path, flags)) {
1221 result = CURLUE_BAD_PATH;
1222 goto fail;
1223 }
1224
1225 if(!(flags & CURLU_PATH_AS_IS)) {
1226 /* remove ../ and ./ sequences according to RFC3986 */
1227 char *newp = dedotdotify((char *)path, pathlen);
1228 if(!newp) {
1229 result = CURLUE_OUT_OF_MEMORY;
1230 goto fail;
1231 }
1232 free(u->path);
1233 u->path = newp;
1234 }
1235 }
1236
1237 if(Curl_dyn_len(&host)) {
1238 char normalized_ipv4[sizeof("255.255.255.255") + 1];
1239
1240 /*
1241 * Parse the login details and strip them out of the host name.
1242 */
1243 result = parse_hostname_login(u, &host, flags);
1244 if(!result)
1245 result = Curl_parse_port(u, &host, schemelen);
1246 if(result)
1247 goto fail;
1248
1249 if(junkscan(Curl_dyn_ptr(&host), flags)) {
1250 result = CURLUE_BAD_HOSTNAME;
1251 goto fail;
1252 }
1253
1254 if(ipv4_normalize(Curl_dyn_ptr(&host),
1255 normalized_ipv4, sizeof(normalized_ipv4))) {
1256 Curl_dyn_reset(&host);
1257 if(Curl_dyn_add(&host, normalized_ipv4)) {
1258 result = CURLUE_OUT_OF_MEMORY;
1259 goto fail;
1260 }
1261 }
1262 else {
1263 result = decode_host(&host);
1264 if(!result)
1265 result = hostname_check(u, Curl_dyn_ptr(&host), Curl_dyn_len(&host));
1266 if(result)
1267 goto fail;
1268 }
1269
1270 if((flags & CURLU_GUESS_SCHEME) && !schemep) {
1271 const char *hostname = Curl_dyn_ptr(&host);
1272 /* legacy curl-style guess based on host name */
1273 if(checkprefix("ftp.", hostname))
1274 schemep = "ftp";
1275 else if(checkprefix("dict.", hostname))
1276 schemep = "dict";
1277 else if(checkprefix("ldap.", hostname))
1278 schemep = "ldap";
1279 else if(checkprefix("imap.", hostname))
1280 schemep = "imap";
1281 else if(checkprefix("smtp.", hostname))
1282 schemep = "smtp";
1283 else if(checkprefix("pop3.", hostname))
1284 schemep = "pop3";
1285 else
1286 schemep = "http";
1287
1288 u->scheme = strdup(schemep);
1289 if(!u->scheme) {
1290 result = CURLUE_OUT_OF_MEMORY;
1291 goto fail;
1292 }
1293 }
1294 }
1295 else if(flags & CURLU_NO_AUTHORITY) {
1296 /* allowed to be empty. */
1297 if(Curl_dyn_add(&host, "")) {
1298 result = CURLUE_OUT_OF_MEMORY;
1299 goto fail;
1300 }
1301 }
1302
1303 u->host = Curl_dyn_ptr(&host);
1304
1305 return result;
1306 fail:
1307 Curl_dyn_free(&host);
1308 free_urlhandle(u);
1309 return result;
1310}
1311
1312/*
1313 * Parse the URL and, if successful, replace everything in the Curl_URL struct.
1314 */
1315static CURLUcode parseurl_and_replace(const char *url, CURLU *u,
1316 unsigned int flags)
1317{
1318 CURLUcode result;
1319 CURLU tmpurl;
1320 memset(&tmpurl, 0, sizeof(tmpurl));
1321 result = parseurl(url, &tmpurl, flags);
1322 if(!result) {
1323 free_urlhandle(u);
1324 *u = tmpurl;
1325 }
1326 return result;
1327}
1328
1329/*
1330 */
1331CURLU *curl_url(void)
1332{
1333 return calloc(sizeof(struct Curl_URL), 1);
1334}
1335
1336void curl_url_cleanup(CURLU *u)
1337{
1338 if(u) {
1339 free_urlhandle(u);
1340 free(u);
1341 }
1342}
1343
1344#define DUP(dest, src, name) \
1345 do { \
1346 if(src->name) { \
1347 dest->name = strdup(src->name); \
1348 if(!dest->name) \
1349 goto fail; \
1350 } \
1351 } while(0)
1352
1353CURLU *curl_url_dup(CURLU *in)
1354{
1355 struct Curl_URL *u = calloc(sizeof(struct Curl_URL), 1);
1356 if(u) {
1357 DUP(u, in, scheme);
1358 DUP(u, in, user);
1359 DUP(u, in, password);
1360 DUP(u, in, options);
1361 DUP(u, in, host);
1362 DUP(u, in, port);
1363 DUP(u, in, path);
1364 DUP(u, in, query);
1365 DUP(u, in, fragment);
1366 u->portnum = in->portnum;
1367 }
1368 return u;
1369 fail:
1370 curl_url_cleanup(u);
1371 return NULL;
1372}
1373
1374CURLUcode curl_url_get(CURLU *u, CURLUPart what,
1375 char **part, unsigned int flags)
1376{
1377 char *ptr;
1378 CURLUcode ifmissing = CURLUE_UNKNOWN_PART;
1379 char portbuf[7];
1380 bool urldecode = (flags & CURLU_URLDECODE)?1:0;
1381 bool urlencode = (flags & CURLU_URLENCODE)?1:0;
1382 bool plusdecode = FALSE;
1383 (void)flags;
1384 if(!u)
1385 return CURLUE_BAD_HANDLE;
1386 if(!part)
1387 return CURLUE_BAD_PARTPOINTER;
1388 *part = NULL;
1389
1390 switch(what) {
1391 case CURLUPART_SCHEME:
1392 ptr = u->scheme;
1393 ifmissing = CURLUE_NO_SCHEME;
1394 urldecode = FALSE; /* never for schemes */
1395 break;
1396 case CURLUPART_USER:
1397 ptr = u->user;
1398 ifmissing = CURLUE_NO_USER;
1399 break;
1400 case CURLUPART_PASSWORD:
1401 ptr = u->password;
1402 ifmissing = CURLUE_NO_PASSWORD;
1403 break;
1404 case CURLUPART_OPTIONS:
1405 ptr = u->options;
1406 ifmissing = CURLUE_NO_OPTIONS;
1407 break;
1408 case CURLUPART_HOST:
1409 ptr = u->host;
1410 ifmissing = CURLUE_NO_HOST;
1411 break;
1412 case CURLUPART_ZONEID:
1413 ptr = u->zoneid;
1414 ifmissing = CURLUE_NO_ZONEID;
1415 break;
1416 case CURLUPART_PORT:
1417 ptr = u->port;
1418 ifmissing = CURLUE_NO_PORT;
1419 urldecode = FALSE; /* never for port */
1420 if(!ptr && (flags & CURLU_DEFAULT_PORT) && u->scheme) {
1421 /* there's no stored port number, but asked to deliver
1422 a default one for the scheme */
1423 const struct Curl_handler *h =
1424 Curl_builtin_scheme(u->scheme, CURL_ZERO_TERMINATED);
1425 if(h) {
1426 msnprintf(portbuf, sizeof(portbuf), "%u", h->defport);
1427 ptr = portbuf;
1428 }
1429 }
1430 else if(ptr && u->scheme) {
1431 /* there is a stored port number, but ask to inhibit if
1432 it matches the default one for the scheme */
1433 const struct Curl_handler *h =
1434 Curl_builtin_scheme(u->scheme, CURL_ZERO_TERMINATED);
1435 if(h && (h->defport == u->portnum) &&
1436 (flags & CURLU_NO_DEFAULT_PORT))
1437 ptr = NULL;
1438 }
1439 break;
1440 case CURLUPART_PATH:
1441 ptr = u->path;
1442 if(!ptr) {
1443 ptr = u->path = strdup("/");
1444 if(!u->path)
1445 return CURLUE_OUT_OF_MEMORY;
1446 }
1447 break;
1448 case CURLUPART_QUERY:
1449 ptr = u->query;
1450 ifmissing = CURLUE_NO_QUERY;
1451 plusdecode = urldecode;
1452 break;
1453 case CURLUPART_FRAGMENT:
1454 ptr = u->fragment;
1455 ifmissing = CURLUE_NO_FRAGMENT;
1456 break;
1457 case CURLUPART_URL: {
1458 char *url;
1459 char *scheme;
1460 char *options = u->options;
1461 char *port = u->port;
1462 char *allochost = NULL;
1463 if(u->scheme && strcasecompare("file", u->scheme)) {
1464 url = aprintf("file://%s%s%s",
1465 u->path,
1466 u->fragment? "#": "",
1467 u->fragment? u->fragment : "");
1468 }
1469 else if(!u->host)
1470 return CURLUE_NO_HOST;
1471 else {
1472 const struct Curl_handler *h = NULL;
1473 if(u->scheme)
1474 scheme = u->scheme;
1475 else if(flags & CURLU_DEFAULT_SCHEME)
1476 scheme = (char *) DEFAULT_SCHEME;
1477 else
1478 return CURLUE_NO_SCHEME;
1479
1480 h = Curl_builtin_scheme(scheme, CURL_ZERO_TERMINATED);
1481 if(!port && (flags & CURLU_DEFAULT_PORT)) {
1482 /* there's no stored port number, but asked to deliver
1483 a default one for the scheme */
1484 if(h) {
1485 msnprintf(portbuf, sizeof(portbuf), "%u", h->defport);
1486 port = portbuf;
1487 }
1488 }
1489 else if(port) {
1490 /* there is a stored port number, but asked to inhibit if it matches
1491 the default one for the scheme */
1492 if(h && (h->defport == u->portnum) &&
1493 (flags & CURLU_NO_DEFAULT_PORT))
1494 port = NULL;
1495 }
1496
1497 if(h && !(h->flags & PROTOPT_URLOPTIONS))
1498 options = NULL;
1499
1500 if(u->host[0] == '[') {
1501 if(u->zoneid) {
1502 /* make it '[ host %25 zoneid ]' */
1503 struct dynbuf enc;
1504 size_t hostlen = strlen(u->host);
1505 Curl_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
1506 if(Curl_dyn_addf(&enc, "%.*s%%25%s]", (int)hostlen - 1, u->host,
1507 u->zoneid))
1508 return CURLUE_OUT_OF_MEMORY;
1509 allochost = Curl_dyn_ptr(&enc);
1510 }
1511 }
1512 else if(urlencode) {
1513 allochost = curl_easy_escape(NULL, u->host, 0);
1514 if(!allochost)
1515 return CURLUE_OUT_OF_MEMORY;
1516 }
1517 else {
1518 /* only encode '%' in output host name */
1519 char *host = u->host;
1520 bool percent = FALSE;
1521 /* first, count number of percents present in the name */
1522 while(*host) {
1523 if(*host == '%') {
1524 percent = TRUE;
1525 break;
1526 }
1527 host++;
1528 }
1529 /* if there were percent(s), encode the host name */
1530 if(percent) {
1531 struct dynbuf enc;
1532 CURLcode result;
1533 Curl_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
1534 host = u->host;
1535 while(*host) {
1536 if(*host == '%')
1537 result = Curl_dyn_addn(&enc, "%25", 3);
1538 else
1539 result = Curl_dyn_addn(&enc, host, 1);
1540 if(result)
1541 return CURLUE_OUT_OF_MEMORY;
1542 host++;
1543 }
1544 free(u->host);
1545 u->host = Curl_dyn_ptr(&enc);
1546 }
1547 }
1548
1549 url = aprintf("%s://%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
1550 scheme,
1551 u->user ? u->user : "",
1552 u->password ? ":": "",
1553 u->password ? u->password : "",
1554 options ? ";" : "",
1555 options ? options : "",
1556 (u->user || u->password || options) ? "@": "",
1557 allochost ? allochost : u->host,
1558 port ? ":": "",
1559 port ? port : "",
1560 (u->path && (u->path[0] != '/')) ? "/": "",
1561 u->path ? u->path : "/",
1562 (u->query && u->query[0]) ? "?": "",
1563 (u->query && u->query[0]) ? u->query : "",
1564 u->fragment? "#": "",
1565 u->fragment? u->fragment : "");
1566 free(allochost);
1567 }
1568 if(!url)
1569 return CURLUE_OUT_OF_MEMORY;
1570 *part = url;
1571 return CURLUE_OK;
1572 }
1573 default:
1574 ptr = NULL;
1575 break;
1576 }
1577 if(ptr) {
1578 size_t partlen = strlen(ptr);
1579 size_t i = 0;
1580 *part = Curl_memdup(ptr, partlen + 1);
1581 if(!*part)
1582 return CURLUE_OUT_OF_MEMORY;
1583 if(plusdecode) {
1584 /* convert + to space */
1585 char *plus = *part;
1586 for(i = 0; i < partlen; ++plus, i++) {
1587 if(*plus == '+')
1588 *plus = ' ';
1589 }
1590 }
1591 if(urldecode) {
1592 char *decoded;
1593 size_t dlen;
1594 /* this unconditional rejection of control bytes is documented
1595 API behavior */
1596 CURLcode res = Curl_urldecode(*part, 0, &decoded, &dlen, REJECT_CTRL);
1597 free(*part);
1598 if(res) {
1599 *part = NULL;
1600 return CURLUE_URLDECODE;
1601 }
1602 *part = decoded;
1603 partlen = dlen;
1604 }
1605 if(urlencode) {
1606 struct dynbuf enc;
1607 Curl_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
1608 if(urlencode_str(&enc, *part, partlen, TRUE,
1609 what == CURLUPART_QUERY))
1610 return CURLUE_OUT_OF_MEMORY;
1611 free(*part);
1612 *part = Curl_dyn_ptr(&enc);
1613 }
1614
1615 return CURLUE_OK;
1616 }
1617 else
1618 return ifmissing;
1619}
1620
1621CURLUcode curl_url_set(CURLU *u, CURLUPart what,
1622 const char *part, unsigned int flags)
1623{
1624 char **storep = NULL;
1625 long port = 0;
1626 bool urlencode = (flags & CURLU_URLENCODE)? 1 : 0;
1627 bool plusencode = FALSE;
1628 bool urlskipslash = FALSE;
1629 bool appendquery = FALSE;
1630 bool equalsencode = FALSE;
1631
1632 if(!u)
1633 return CURLUE_BAD_HANDLE;
1634 if(!part) {
1635 /* setting a part to NULL clears it */
1636 switch(what) {
1637 case CURLUPART_URL:
1638 break;
1639 case CURLUPART_SCHEME:
1640 storep = &u->scheme;
1641 break;
1642 case CURLUPART_USER:
1643 storep = &u->user;
1644 break;
1645 case CURLUPART_PASSWORD:
1646 storep = &u->password;
1647 break;
1648 case CURLUPART_OPTIONS:
1649 storep = &u->options;
1650 break;
1651 case CURLUPART_HOST:
1652 storep = &u->host;
1653 break;
1654 case CURLUPART_ZONEID:
1655 storep = &u->zoneid;
1656 break;
1657 case CURLUPART_PORT:
1658 u->portnum = 0;
1659 storep = &u->port;
1660 break;
1661 case CURLUPART_PATH:
1662 storep = &u->path;
1663 break;
1664 case CURLUPART_QUERY:
1665 storep = &u->query;
1666 break;
1667 case CURLUPART_FRAGMENT:
1668 storep = &u->fragment;
1669 break;
1670 default:
1671 return CURLUE_UNKNOWN_PART;
1672 }
1673 if(storep && *storep) {
1674 Curl_safefree(*storep);
1675 }
1676 else if(!storep) {
1677 free_urlhandle(u);
1678 memset(u, 0, sizeof(struct Curl_URL));
1679 }
1680 return CURLUE_OK;
1681 }
1682
1683 switch(what) {
1684 case CURLUPART_SCHEME:
1685 if(strlen(part) > MAX_SCHEME_LEN)
1686 /* too long */
1687 return CURLUE_BAD_SCHEME;
1688 if(!(flags & CURLU_NON_SUPPORT_SCHEME) &&
1689 /* verify that it is a fine scheme */
1690 !Curl_builtin_scheme(part, CURL_ZERO_TERMINATED))
1691 return CURLUE_UNSUPPORTED_SCHEME;
1692 storep = &u->scheme;
1693 urlencode = FALSE; /* never */
1694 break;
1695 case CURLUPART_USER:
1696 storep = &u->user;
1697 break;
1698 case CURLUPART_PASSWORD:
1699 storep = &u->password;
1700 break;
1701 case CURLUPART_OPTIONS:
1702 storep = &u->options;
1703 break;
1704 case CURLUPART_HOST: {
1705 size_t len = strcspn(part, " \r\n");
1706 if(strlen(part) != len)
1707 /* hostname with bad content */
1708 return CURLUE_BAD_HOSTNAME;
1709 storep = &u->host;
1710 Curl_safefree(u->zoneid);
1711 break;
1712 }
1713 case CURLUPART_ZONEID:
1714 storep = &u->zoneid;
1715 break;
1716 case CURLUPART_PORT:
1717 {
1718 char *endp;
1719 urlencode = FALSE; /* never */
1720 port = strtol(part, &endp, 10); /* Port number must be decimal */
1721 if((port <= 0) || (port > 0xffff))
1722 return CURLUE_BAD_PORT_NUMBER;
1723 if(*endp)
1724 /* weirdly provided number, not good! */
1725 return CURLUE_BAD_PORT_NUMBER;
1726 storep = &u->port;
1727 }
1728 break;
1729 case CURLUPART_PATH:
1730 urlskipslash = TRUE;
1731 storep = &u->path;
1732 break;
1733 case CURLUPART_QUERY:
1734 plusencode = urlencode;
1735 appendquery = (flags & CURLU_APPENDQUERY)?1:0;
1736 equalsencode = appendquery;
1737 storep = &u->query;
1738 break;
1739 case CURLUPART_FRAGMENT:
1740 storep = &u->fragment;
1741 break;
1742 case CURLUPART_URL: {
1743 /*
1744 * Allow a new URL to replace the existing (if any) contents.
1745 *
1746 * If the existing contents is enough for a URL, allow a relative URL to
1747 * replace it.
1748 */
1749 CURLUcode result;
1750 char *oldurl;
1751 char *redired_url;
1752
1753 /* if the new thing is absolute or the old one is not
1754 * (we could not get an absolute url in 'oldurl'),
1755 * then replace the existing with the new. */
1756 if(Curl_is_absolute_url(part, NULL, 0,
1757 flags & (CURLU_GUESS_SCHEME|
1758 CURLU_DEFAULT_SCHEME))
1759 || curl_url_get(u, CURLUPART_URL, &oldurl, flags)) {
1760 return parseurl_and_replace(part, u, flags);
1761 }
1762
1763 /* apply the relative part to create a new URL
1764 * and replace the existing one with it. */
1765 redired_url = concat_url(oldurl, part);
1766 free(oldurl);
1767 if(!redired_url)
1768 return CURLUE_OUT_OF_MEMORY;
1769
1770 result = parseurl_and_replace(redired_url, u, flags);
1771 free(redired_url);
1772 return result;
1773 }
1774 default:
1775 return CURLUE_UNKNOWN_PART;
1776 }
1777 DEBUGASSERT(storep);
1778 {
1779 const char *newp = part;
1780 size_t nalloc = strlen(part);
1781
1782 if(nalloc > CURL_MAX_INPUT_LENGTH)
1783 /* excessive input length */
1784 return CURLUE_MALFORMED_INPUT;
1785
1786 if(urlencode) {
1787 const unsigned char *i;
1788 struct dynbuf enc;
1789
1790 Curl_dyn_init(&enc, nalloc * 3 + 1);
1791
1792 for(i = (const unsigned char *)part; *i; i++) {
1793 CURLcode result;
1794 if((*i == ' ') && plusencode) {
1795 result = Curl_dyn_addn(&enc, "+", 1);
1796 if(result)
1797 return CURLUE_OUT_OF_MEMORY;
1798 }
1799 else if(Curl_isunreserved(*i) ||
1800 ((*i == '/') && urlskipslash) ||
1801 ((*i == '=') && equalsencode)) {
1802 if((*i == '=') && equalsencode)
1803 /* only skip the first equals sign */
1804 equalsencode = FALSE;
1805 result = Curl_dyn_addn(&enc, i, 1);
1806 if(result)
1807 return CURLUE_OUT_OF_MEMORY;
1808 }
1809 else {
1810 result = Curl_dyn_addf(&enc, "%%%02x", *i);
1811 if(result)
1812 return CURLUE_OUT_OF_MEMORY;
1813 }
1814 }
1815 newp = Curl_dyn_ptr(&enc);
1816 }
1817 else {
1818 char *p;
1819 newp = strdup(part);
1820 if(!newp)
1821 return CURLUE_OUT_OF_MEMORY;
1822 p = (char *)newp;
1823 while(*p) {
1824 /* make sure percent encoded are lower case */
1825 if((*p == '%') && ISXDIGIT(p[1]) && ISXDIGIT(p[2]) &&
1826 (ISUPPER(p[1]) || ISUPPER(p[2]))) {
1827 p[1] = Curl_raw_tolower(p[1]);
1828 p[2] = Curl_raw_tolower(p[2]);
1829 p += 3;
1830 }
1831 else
1832 p++;
1833 }
1834 }
1835
1836 if(appendquery) {
1837 /* Append the 'newp' string onto the old query. Add a '&' separator if
1838 none is present at the end of the existing query already */
1839
1840 size_t querylen = u->query ? strlen(u->query) : 0;
1841 bool addamperand = querylen && (u->query[querylen -1] != '&');
1842 if(querylen) {
1843 struct dynbuf enc;
1844 Curl_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
1845
1846 if(Curl_dyn_addn(&enc, u->query, querylen)) /* add original query */
1847 goto nomem;
1848
1849 if(addamperand) {
1850 if(Curl_dyn_addn(&enc, "&", 1))
1851 goto nomem;
1852 }
1853 if(Curl_dyn_add(&enc, newp))
1854 goto nomem;
1855 free((char *)newp);
1856 free(*storep);
1857 *storep = Curl_dyn_ptr(&enc);
1858 return CURLUE_OK;
1859 nomem:
1860 free((char *)newp);
1861 return CURLUE_OUT_OF_MEMORY;
1862 }
1863 }
1864
1865 if(what == CURLUPART_HOST) {
1866 size_t n = strlen(newp);
1867 if(!n && (flags & CURLU_NO_AUTHORITY)) {
1868 /* Skip hostname check, it's allowed to be empty. */
1869 }
1870 else {
1871 if(hostname_check(u, (char *)newp, n)) {
1872 free((char *)newp);
1873 return CURLUE_BAD_HOSTNAME;
1874 }
1875 }
1876 }
1877
1878 free(*storep);
1879 *storep = (char *)newp;
1880 }
1881 /* set after the string, to make it not assigned if the allocation above
1882 fails */
1883 if(port)
1884 u->portnum = port;
1885 return CURLUE_OK;
1886}
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette