VirtualBox

source: vbox/trunk/src/libs/curl-8.0.1/lib/urlapi.c@ 99874

Last change on this file since 99874 was 99344, checked in by vboxsync, 2 years ago

curl-8.0.1: Applied and adjusted our curl changes to 7.87.0 bugref:10417

  • Property svn:eol-style set to native
File size: 51.0 KB
Line 
1/***************************************************************************
2 * _ _ ____ _
3 * Project ___| | | | _ \| |
4 * / __| | | | |_) | |
5 * | (__| |_| | _ <| |___
6 * \___|\___/|_| \_\_____|
7 *
8 * Copyright (C) Daniel Stenberg, <[email protected]>, et al.
9 *
10 * This software is licensed as described in the file COPYING, which
11 * you should have received as part of this distribution. The terms
12 * are also available at https://curl.se/docs/copyright.html.
13 *
14 * You may opt to use, copy, modify, merge, publish, distribute and/or sell
15 * copies of the Software, and permit persons to whom the Software is
16 * furnished to do so, under the terms of the COPYING file.
17 *
18 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
19 * KIND, either express or implied.
20 *
21 * SPDX-License-Identifier: curl
22 *
23 ***************************************************************************/
24
25#include "curl_setup.h"
26
27#include "urldata.h"
28#include "urlapi-int.h"
29#include "strcase.h"
30#include "url.h"
31#include "escape.h"
32#include "curl_ctype.h"
33#include "inet_pton.h"
34#include "inet_ntop.h"
35#include "strdup.h"
36#include "idn.h"
37
38/* The last 3 #include files should be in this order */
39#include "curl_printf.h"
40#include "curl_memory.h"
41#include "memdebug.h"
42
43 /* MSDOS/Windows style drive prefix, eg c: in c:foo */
44#define STARTS_WITH_DRIVE_PREFIX(str) \
45 ((('a' <= str[0] && str[0] <= 'z') || \
46 ('A' <= str[0] && str[0] <= 'Z')) && \
47 (str[1] == ':'))
48
49 /* MSDOS/Windows style drive prefix, optionally with
50 * a '|' instead of ':', followed by a slash or NUL */
51#define STARTS_WITH_URL_DRIVE_PREFIX(str) \
52 ((('a' <= (str)[0] && (str)[0] <= 'z') || \
53 ('A' <= (str)[0] && (str)[0] <= 'Z')) && \
54 ((str)[1] == ':' || (str)[1] == '|') && \
55 ((str)[2] == '/' || (str)[2] == '\\' || (str)[2] == 0))
56
57/* scheme is not URL encoded, the longest libcurl supported ones are... */
58#define MAX_SCHEME_LEN 40
59
60/*
61 * If ENABLE_IPV6 is disabled, we still want to parse IPv6 addresses, so make
62 * sure we have _some_ value for AF_INET6 without polluting our fake value
63 * everywhere.
64 */
65#if !defined(ENABLE_IPV6) && !defined(AF_INET6)
66#define AF_INET6 (AF_INET + 1)
67#endif
68
69/* Internal representation of CURLU. Point to URL-encoded strings. */
70struct Curl_URL {
71 char *scheme;
72 char *user;
73 char *password;
74 char *options; /* IMAP only? */
75 char *host;
76 char *zoneid; /* for numerical IPv6 addresses */
77 char *port;
78 char *path;
79 char *query;
80 char *fragment;
81 long portnum; /* the numerical version */
82};
83
84#define DEFAULT_SCHEME "https"
85
86static void free_urlhandle(struct Curl_URL *u)
87{
88 free(u->scheme);
89 free(u->user);
90 free(u->password);
91 free(u->options);
92 free(u->host);
93 free(u->zoneid);
94 free(u->port);
95 free(u->path);
96 free(u->query);
97 free(u->fragment);
98}
99
100/*
101 * Find the separator at the end of the host name, or the '?' in cases like
102 * http://www.url.com?id=2380
103 */
104static const char *find_host_sep(const char *url)
105{
106 const char *sep;
107 const char *query;
108
109 /* Find the start of the hostname */
110 sep = strstr(url, "//");
111 if(!sep)
112 sep = url;
113 else
114 sep += 2;
115
116 query = strchr(sep, '?');
117 sep = strchr(sep, '/');
118
119 if(!sep)
120 sep = url + strlen(url);
121
122 if(!query)
123 query = url + strlen(url);
124
125 return sep < query ? sep : query;
126}
127
128/*
129 * Decide whether a character in a URL must be escaped.
130 */
131#define urlchar_needs_escaping(c) (!(ISCNTRL(c) || ISSPACE(c) || ISGRAPH(c)))
132
133static const char hexdigits[] = "0123456789abcdef";
134/* urlencode_str() writes data into an output dynbuf and URL-encodes the
135 * spaces in the source URL accordingly.
136 *
137 * URL encoding should be skipped for host names, otherwise IDN resolution
138 * will fail.
139 */
140static CURLUcode urlencode_str(struct dynbuf *o, const char *url,
141 size_t len, bool relative,
142 bool query)
143{
144 /* we must add this with whitespace-replacing */
145 bool left = !query;
146 const unsigned char *iptr;
147 const unsigned char *host_sep = (const unsigned char *) url;
148
149 if(!relative)
150 host_sep = (const unsigned char *) find_host_sep(url);
151
152 for(iptr = (unsigned char *)url; /* read from here */
153 len; iptr++, len--) {
154
155 if(iptr < host_sep) {
156 if(Curl_dyn_addn(o, iptr, 1))
157 return CURLUE_OUT_OF_MEMORY;
158 continue;
159 }
160
161 if(*iptr == ' ') {
162 if(left) {
163 if(Curl_dyn_addn(o, "%20", 3))
164 return CURLUE_OUT_OF_MEMORY;
165 }
166 else {
167 if(Curl_dyn_addn(o, "+", 1))
168 return CURLUE_OUT_OF_MEMORY;
169 }
170 continue;
171 }
172
173 if(*iptr == '?')
174 left = FALSE;
175
176 if(urlchar_needs_escaping(*iptr)) {
177 char out[3]={'%'};
178 out[1] = hexdigits[*iptr>>4];
179 out[2] = hexdigits[*iptr & 0xf];
180 if(Curl_dyn_addn(o, out, 3))
181 return CURLUE_OUT_OF_MEMORY;
182 }
183 else {
184 if(Curl_dyn_addn(o, iptr, 1))
185 return CURLUE_OUT_OF_MEMORY;
186 }
187 }
188
189 return CURLUE_OK;
190}
191
192/*
193 * Returns the length of the scheme if the given URL is absolute (as opposed
194 * to relative). Stores the scheme in the buffer if TRUE and 'buf' is
195 * non-NULL. The buflen must be larger than MAX_SCHEME_LEN if buf is set.
196 *
197 * If 'guess_scheme' is TRUE, it means the URL might be provided without
198 * scheme.
199 */
200size_t Curl_is_absolute_url(const char *url, char *buf, size_t buflen,
201 bool guess_scheme)
202{
203 int i;
204 DEBUGASSERT(!buf || (buflen > MAX_SCHEME_LEN));
205 (void)buflen; /* only used in debug-builds */
206 if(buf)
207 buf[0] = 0; /* always leave a defined value in buf */
208#ifdef WIN32
209 if(guess_scheme && STARTS_WITH_DRIVE_PREFIX(url))
210 return 0;
211#endif
212 for(i = 0; i < MAX_SCHEME_LEN; ++i) {
213 char s = url[i];
214 if(s && (ISALNUM(s) || (s == '+') || (s == '-') || (s == '.') )) {
215 /* RFC 3986 3.1 explains:
216 scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
217 */
218 }
219 else {
220 break;
221 }
222 }
223 if(i && (url[i] == ':') && ((url[i + 1] == '/') || !guess_scheme)) {
224 /* If this does not guess scheme, the scheme always ends with the colon so
225 that this also detects data: URLs etc. In guessing mode, data: could
226 be the host name "data" with a specified port number. */
227
228 /* the length of the scheme is the name part only */
229 size_t len = i;
230 if(buf) {
231 buf[i] = 0;
232 while(i--) {
233 buf[i] = Curl_raw_tolower(url[i]);
234 }
235 }
236 return len;
237 }
238 return 0;
239}
240
241/*
242 * Concatenate a relative URL to a base URL making it absolute.
243 * URL-encodes any spaces.
244 * The returned pointer must be freed by the caller unless NULL
245 * (returns NULL on out of memory).
246 *
247 * Note that this function destroys the 'base' string.
248 */
249static char *concat_url(char *base, const char *relurl)
250{
251 /***
252 TRY to append this new path to the old URL
253 to the right of the host part. Oh crap, this is doomed to cause
254 problems in the future...
255 */
256 struct dynbuf newest;
257 char *protsep;
258 char *pathsep;
259 bool host_changed = FALSE;
260 const char *useurl = relurl;
261
262 /* protsep points to the start of the host name */
263 protsep = strstr(base, "//");
264 if(!protsep)
265 protsep = base;
266 else
267 protsep += 2; /* pass the slashes */
268
269 if('/' != relurl[0]) {
270 int level = 0;
271
272 /* First we need to find out if there's a ?-letter in the URL,
273 and cut it and the right-side of that off */
274 pathsep = strchr(protsep, '?');
275 if(pathsep)
276 *pathsep = 0;
277
278 /* we have a relative path to append to the last slash if there's one
279 available, or if the new URL is just a query string (starts with a
280 '?') we append the new one at the end of the entire currently worked
281 out URL */
282 if(useurl[0] != '?') {
283 pathsep = strrchr(protsep, '/');
284 if(pathsep)
285 *pathsep = 0;
286 }
287
288 /* Check if there's any slash after the host name, and if so, remember
289 that position instead */
290 pathsep = strchr(protsep, '/');
291 if(pathsep)
292 protsep = pathsep + 1;
293 else
294 protsep = NULL;
295
296 /* now deal with one "./" or any amount of "../" in the newurl
297 and act accordingly */
298
299 if((useurl[0] == '.') && (useurl[1] == '/'))
300 useurl += 2; /* just skip the "./" */
301
302 while((useurl[0] == '.') &&
303 (useurl[1] == '.') &&
304 (useurl[2] == '/')) {
305 level++;
306 useurl += 3; /* pass the "../" */
307 }
308
309 if(protsep) {
310 while(level--) {
311 /* cut off one more level from the right of the original URL */
312 pathsep = strrchr(protsep, '/');
313 if(pathsep)
314 *pathsep = 0;
315 else {
316 *protsep = 0;
317 break;
318 }
319 }
320 }
321 }
322 else {
323 /* We got a new absolute path for this server */
324
325 if(relurl[1] == '/') {
326 /* the new URL starts with //, just keep the protocol part from the
327 original one */
328 *protsep = 0;
329 useurl = &relurl[2]; /* we keep the slashes from the original, so we
330 skip the new ones */
331 host_changed = TRUE;
332 }
333 else {
334 /* cut off the original URL from the first slash, or deal with URLs
335 without slash */
336 pathsep = strchr(protsep, '/');
337 if(pathsep) {
338 /* When people use badly formatted URLs, such as
339 "http://www.url.com?dir=/home/daniel" we must not use the first
340 slash, if there's a ?-letter before it! */
341 char *sep = strchr(protsep, '?');
342 if(sep && (sep < pathsep))
343 pathsep = sep;
344 *pathsep = 0;
345 }
346 else {
347 /* There was no slash. Now, since we might be operating on a badly
348 formatted URL, such as "http://www.url.com?id=2380" which doesn't
349 use a slash separator as it is supposed to, we need to check for a
350 ?-letter as well! */
351 pathsep = strchr(protsep, '?');
352 if(pathsep)
353 *pathsep = 0;
354 }
355 }
356 }
357
358 Curl_dyn_init(&newest, CURL_MAX_INPUT_LENGTH);
359
360 /* copy over the root url part */
361 if(Curl_dyn_add(&newest, base))
362 return NULL;
363
364 /* check if we need to append a slash */
365 if(('/' == useurl[0]) || (protsep && !*protsep) || ('?' == useurl[0]))
366 ;
367 else {
368 if(Curl_dyn_addn(&newest, "/", 1))
369 return NULL;
370 }
371
372 /* then append the new piece on the right side */
373 urlencode_str(&newest, useurl, strlen(useurl), !host_changed, FALSE);
374
375 return Curl_dyn_ptr(&newest);
376}
377
378/* scan for byte values < 31 or 127 */
379static bool junkscan(const char *part, unsigned int flags)
380{
381 if(part) {
382 static const char badbytes[]={
383 /* */ 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
384 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
385 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
386 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
387 0x7f, 0x00 /* null-terminate */
388 };
389 size_t n = strlen(part);
390 size_t nfine = strcspn(part, badbytes);
391 if(nfine != n)
392 /* since we don't know which part is scanned, return a generic error
393 code */
394 return TRUE;
395 if(!(flags & CURLU_ALLOW_SPACE) && strchr(part, ' '))
396 return TRUE;
397 }
398 return FALSE;
399}
400
401/*
402 * parse_hostname_login()
403 *
404 * Parse the login details (user name, password and options) from the URL and
405 * strip them out of the host name
406 *
407 */
408static CURLUcode parse_hostname_login(struct Curl_URL *u,
409 struct dynbuf *host,
410 unsigned int flags)
411{
412 CURLUcode result = CURLUE_OK;
413 CURLcode ccode;
414 char *userp = NULL;
415 char *passwdp = NULL;
416 char *optionsp = NULL;
417 const struct Curl_handler *h = NULL;
418
419 /* At this point, we assume all the other special cases have been taken
420 * care of, so the host is at most
421 *
422 * [user[:password][;options]]@]hostname
423 *
424 * We need somewhere to put the embedded details, so do that first.
425 */
426
427 char *login = Curl_dyn_ptr(host);
428 char *ptr;
429
430 DEBUGASSERT(login);
431
432 ptr = strchr(login, '@');
433 if(!ptr)
434 goto out;
435
436 /* We will now try to extract the
437 * possible login information in a string like:
438 * ftp://user:[email protected]:8021/README */
439 ptr++;
440
441 /* if this is a known scheme, get some details */
442 if(u->scheme)
443 h = Curl_builtin_scheme(u->scheme, CURL_ZERO_TERMINATED);
444
445 /* We could use the login information in the URL so extract it. Only parse
446 options if the handler says we should. Note that 'h' might be NULL! */
447 ccode = Curl_parse_login_details(login, ptr - login - 1,
448 &userp, &passwdp,
449 (h && (h->flags & PROTOPT_URLOPTIONS)) ?
450 &optionsp:NULL);
451 if(ccode) {
452 result = CURLUE_BAD_LOGIN;
453 goto out;
454 }
455
456 if(userp) {
457 if(flags & CURLU_DISALLOW_USER) {
458 /* Option DISALLOW_USER is set and url contains username. */
459 result = CURLUE_USER_NOT_ALLOWED;
460 goto out;
461 }
462 if(junkscan(userp, flags)) {
463 result = CURLUE_BAD_USER;
464 goto out;
465 }
466 u->user = userp;
467 }
468
469 if(passwdp) {
470 if(junkscan(passwdp, flags)) {
471 result = CURLUE_BAD_PASSWORD;
472 goto out;
473 }
474 u->password = passwdp;
475 }
476
477 if(optionsp) {
478 if(junkscan(optionsp, flags)) {
479 result = CURLUE_BAD_LOGIN;
480 goto out;
481 }
482 u->options = optionsp;
483 }
484
485 /* move the name to the start of the host buffer */
486 if(Curl_dyn_tail(host, strlen(ptr)))
487 return CURLUE_OUT_OF_MEMORY;
488
489 return CURLUE_OK;
490 out:
491
492 free(userp);
493 free(passwdp);
494 free(optionsp);
495 u->user = NULL;
496 u->password = NULL;
497 u->options = NULL;
498
499 return result;
500}
501
502UNITTEST CURLUcode Curl_parse_port(struct Curl_URL *u, struct dynbuf *host,
503 bool has_scheme)
504{
505 char *portptr;
506 char *hostname = Curl_dyn_ptr(host);
507 /*
508 * Find the end of an IPv6 address, either on the ']' ending bracket or
509 * a percent-encoded zone index.
510 */
511 if(hostname[0] == '[') {
512 portptr = strchr(hostname, ']');
513 if(!portptr)
514 return CURLUE_BAD_IPV6;
515 portptr++;
516 /* this is a RFC2732-style specified IP-address */
517 if(*portptr) {
518 if(*portptr != ':')
519 return CURLUE_BAD_PORT_NUMBER;
520 }
521 else
522 portptr = NULL;
523 }
524 else
525 portptr = strchr(hostname, ':');
526
527 if(portptr) {
528 char *rest;
529 long port;
530 char portbuf[7];
531 size_t keep = portptr - hostname;
532
533 /* Browser behavior adaptation. If there's a colon with no digits after,
534 just cut off the name there which makes us ignore the colon and just
535 use the default port. Firefox, Chrome and Safari all do that.
536
537 Don't do it if the URL has no scheme, to make something that looks like
538 a scheme not work!
539 */
540 Curl_dyn_setlen(host, keep);
541 portptr++;
542 if(!*portptr)
543 return has_scheme ? CURLUE_OK : CURLUE_BAD_PORT_NUMBER;
544
545 if(!ISDIGIT(*portptr))
546 return CURLUE_BAD_PORT_NUMBER;
547
548 port = strtol(portptr, &rest, 10); /* Port number must be decimal */
549
550 if(port > 0xffff)
551 return CURLUE_BAD_PORT_NUMBER;
552
553 if(rest[0])
554 return CURLUE_BAD_PORT_NUMBER;
555
556 *rest = 0;
557 /* generate a new port number string to get rid of leading zeroes etc */
558 msnprintf(portbuf, sizeof(portbuf), "%ld", port);
559 u->portnum = port;
560 u->port = strdup(portbuf);
561 if(!u->port)
562 return CURLUE_OUT_OF_MEMORY;
563 }
564
565 return CURLUE_OK;
566}
567
568static CURLUcode hostname_check(struct Curl_URL *u, char *hostname,
569 size_t hlen) /* length of hostname */
570{
571 size_t len;
572 DEBUGASSERT(hostname);
573
574 if(!hostname[0])
575 return CURLUE_NO_HOST;
576 else if(hostname[0] == '[') {
577 const char *l = "0123456789abcdefABCDEF:.";
578 if(hlen < 4) /* '[::]' is the shortest possible valid string */
579 return CURLUE_BAD_IPV6;
580 hostname++;
581 hlen -= 2;
582
583 /* only valid IPv6 letters are ok */
584 len = strspn(hostname, l);
585
586 if(hlen != len) {
587 hlen = len;
588 if(hostname[len] == '%') {
589 /* this could now be '%[zone id]' */
590 char zoneid[16];
591 int i = 0;
592 char *h = &hostname[len + 1];
593 /* pass '25' if present and is a url encoded percent sign */
594 if(!strncmp(h, "25", 2) && h[2] && (h[2] != ']'))
595 h += 2;
596 while(*h && (*h != ']') && (i < 15))
597 zoneid[i++] = *h++;
598 if(!i || (']' != *h))
599 return CURLUE_BAD_IPV6;
600 zoneid[i] = 0;
601 u->zoneid = strdup(zoneid);
602 if(!u->zoneid)
603 return CURLUE_OUT_OF_MEMORY;
604 hostname[len] = ']'; /* insert end bracket */
605 hostname[len + 1] = 0; /* terminate the hostname */
606 }
607 else
608 return CURLUE_BAD_IPV6;
609 /* hostname is fine */
610 }
611
612 /* Check the IPv6 address. */
613 {
614 char dest[16]; /* fits a binary IPv6 address */
615 char norm[MAX_IPADR_LEN];
616 hostname[hlen] = 0; /* end the address there */
617 if(1 != Curl_inet_pton(AF_INET6, hostname, dest))
618 return CURLUE_BAD_IPV6;
619
620 /* check if it can be done shorter */
621 if(Curl_inet_ntop(AF_INET6, dest, norm, sizeof(norm)) &&
622 (strlen(norm) < hlen)) {
623 strcpy(hostname, norm);
624 hlen = strlen(norm);
625 hostname[hlen + 1] = 0;
626 }
627 hostname[hlen] = ']'; /* restore ending bracket */
628 }
629 }
630 else {
631 /* letters from the second string are not ok */
632 len = strcspn(hostname, " \r\n\t/:#?!@{}[]\\$\'\"^`*<>=;,+&()%");
633 if(hlen != len)
634 /* hostname with bad content */
635 return CURLUE_BAD_HOSTNAME;
636 }
637 return CURLUE_OK;
638}
639
640#define HOSTNAME_END(x) (((x) == '/') || ((x) == '?') || ((x) == '#'))
641
642/*
643 * Handle partial IPv4 numerical addresses and different bases, like
644 * '16843009', '0x7f', '0x7f.1' '0177.1.1.1' etc.
645 *
646 * If the given input string is syntactically wrong or any part for example is
647 * too big, this function returns FALSE and doesn't create any output.
648 *
649 * Output the "normalized" version of that input string in plain quad decimal
650 * integers and return TRUE.
651 */
652static bool ipv4_normalize(const char *hostname, char *outp, size_t olen)
653{
654 bool done = FALSE;
655 int n = 0;
656 const char *c = hostname;
657 unsigned long parts[4] = {0, 0, 0, 0};
658
659 while(!done) {
660 char *endp;
661 unsigned long l;
662 if((*c < '0') || (*c > '9'))
663 /* most importantly this doesn't allow a leading plus or minus */
664 return FALSE;
665 l = strtoul(c, &endp, 0);
666
667 /* overflow or nothing parsed at all */
668 if(((l == ULONG_MAX) && (errno == ERANGE)) || (endp == c))
669 return FALSE;
670
671#if SIZEOF_LONG > 4
672 /* a value larger than 32 bits */
673 if(l > UINT_MAX)
674 return FALSE;
675#endif
676
677 parts[n] = l;
678 c = endp;
679
680 switch (*c) {
681 case '.' :
682 if(n == 3)
683 return FALSE;
684 n++;
685 c++;
686 break;
687
688 case '\0':
689 done = TRUE;
690 break;
691
692 default:
693 return FALSE;
694 }
695 }
696
697 /* this is deemed a valid IPv4 numerical address */
698
699 switch(n) {
700 case 0: /* a -- 32 bits */
701 msnprintf(outp, olen, "%u.%u.%u.%u",
702 parts[0] >> 24, (parts[0] >> 16) & 0xff,
703 (parts[0] >> 8) & 0xff, parts[0] & 0xff);
704 break;
705 case 1: /* a.b -- 8.24 bits */
706 if((parts[0] > 0xff) || (parts[1] > 0xffffff))
707 return FALSE;
708 msnprintf(outp, olen, "%u.%u.%u.%u",
709 parts[0], (parts[1] >> 16) & 0xff,
710 (parts[1] >> 8) & 0xff, parts[1] & 0xff);
711 break;
712 case 2: /* a.b.c -- 8.8.16 bits */
713 if((parts[0] > 0xff) || (parts[1] > 0xff) || (parts[2] > 0xffff))
714 return FALSE;
715 msnprintf(outp, olen, "%u.%u.%u.%u",
716 parts[0], parts[1], (parts[2] >> 8) & 0xff,
717 parts[2] & 0xff);
718 break;
719 case 3: /* a.b.c.d -- 8.8.8.8 bits */
720 if((parts[0] > 0xff) || (parts[1] > 0xff) || (parts[2] > 0xff) ||
721 (parts[3] > 0xff))
722 return FALSE;
723 msnprintf(outp, olen, "%u.%u.%u.%u",
724 parts[0], parts[1], parts[2], parts[3]);
725 break;
726 }
727 return TRUE;
728}
729
730/* if necessary, replace the host content with a URL decoded version */
731static CURLUcode decode_host(struct dynbuf *host)
732{
733 char *per = NULL;
734 const char *hostname = Curl_dyn_ptr(host);
735 if(hostname[0] == '[')
736 /* only decode if not an ipv6 numerical */
737 return CURLUE_OK;
738 per = strchr(hostname, '%');
739 if(!per)
740 /* nothing to decode */
741 return CURLUE_OK;
742 else {
743 /* encoded */
744 size_t dlen;
745 char *decoded;
746 CURLcode result = Curl_urldecode(hostname, 0, &decoded, &dlen,
747 REJECT_CTRL);
748 if(result)
749 return CURLUE_BAD_HOSTNAME;
750 Curl_dyn_reset(host);
751 result = Curl_dyn_addn(host, decoded, dlen);
752 free(decoded);
753 if(result)
754 return CURLUE_OUT_OF_MEMORY;
755 }
756
757 return CURLUE_OK;
758}
759
760/*
761 * "Remove Dot Segments"
762 * https://datatracker.ietf.org/doc/html/rfc3986#section-5.2.4
763 */
764
765/*
766 * dedotdotify()
767 * @unittest: 1395
768 *
769 * This function gets a null-terminated path with dot and dotdot sequences
770 * passed in and strips them off according to the rules in RFC 3986 section
771 * 5.2.4.
772 *
773 * The function handles a query part ('?' + stuff) appended but it expects
774 * that fragments ('#' + stuff) have already been cut off.
775 *
776 * RETURNS
777 *
778 * Zero for success and 'out' set to an allocated dedotdotified string.
779 */
780UNITTEST int dedotdotify(const char *input, size_t clen, char **outp);
781UNITTEST int dedotdotify(const char *input, size_t clen, char **outp)
782{
783 char *outptr;
784 const char *orginput = input;
785 char *queryp;
786 char *out;
787
788 *outp = NULL;
789 /* the path always starts with a slash, and a slash has not dot */
790 if((clen < 2) || !memchr(input, '.', clen))
791 return 0;
792
793 out = malloc(clen + 1);
794 if(!out)
795 return 1; /* out of memory */
796
797 *out = 0; /* null-terminates, for inputs like "./" */
798 outptr = out;
799
800 /*
801 * To handle query-parts properly, we must find it and remove it during the
802 * dotdot-operation and then append it again at the end to the output
803 * string.
804 */
805 queryp = strchr(input, '?');
806
807 do {
808 bool dotdot = TRUE;
809 if(*input == '.') {
810 /* A. If the input buffer begins with a prefix of "../" or "./", then
811 remove that prefix from the input buffer; otherwise, */
812
813 if(!strncmp("./", input, 2)) {
814 input += 2;
815 clen -= 2;
816 }
817 else if(!strncmp("../", input, 3)) {
818 input += 3;
819 clen -= 3;
820 }
821 /* D. if the input buffer consists only of "." or "..", then remove
822 that from the input buffer; otherwise, */
823
824 else if(!strcmp(".", input) || !strcmp("..", input) ||
825 !strncmp(".?", input, 2) || !strncmp("..?", input, 3)) {
826 *out = 0;
827 break;
828 }
829 else
830 dotdot = FALSE;
831 }
832 else if(*input == '/') {
833 /* B. if the input buffer begins with a prefix of "/./" or "/.", where
834 "." is a complete path segment, then replace that prefix with "/" in
835 the input buffer; otherwise, */
836 if(!strncmp("/./", input, 3)) {
837 input += 2;
838 clen -= 2;
839 }
840 else if(!strcmp("/.", input) || !strncmp("/.?", input, 3)) {
841 *outptr++ = '/';
842 *outptr = 0;
843 break;
844 }
845
846 /* C. if the input buffer begins with a prefix of "/../" or "/..",
847 where ".." is a complete path segment, then replace that prefix with
848 "/" in the input buffer and remove the last segment and its
849 preceding "/" (if any) from the output buffer; otherwise, */
850
851 else if(!strncmp("/../", input, 4)) {
852 input += 3;
853 clen -= 3;
854 /* remove the last segment from the output buffer */
855 while(outptr > out) {
856 outptr--;
857 if(*outptr == '/')
858 break;
859 }
860 *outptr = 0; /* null-terminate where it stops */
861 }
862 else if(!strcmp("/..", input) || !strncmp("/..?", input, 4)) {
863 /* remove the last segment from the output buffer */
864 while(outptr > out) {
865 outptr--;
866 if(*outptr == '/')
867 break;
868 }
869 *outptr++ = '/';
870 *outptr = 0; /* null-terminate where it stops */
871 break;
872 }
873 else
874 dotdot = FALSE;
875 }
876 else
877 dotdot = FALSE;
878
879 if(!dotdot) {
880 /* E. move the first path segment in the input buffer to the end of
881 the output buffer, including the initial "/" character (if any) and
882 any subsequent characters up to, but not including, the next "/"
883 character or the end of the input buffer. */
884
885 do {
886 *outptr++ = *input++;
887 clen--;
888 } while(*input && (*input != '/') && (*input != '?'));
889 *outptr = 0;
890 }
891
892 /* continue until end of input string OR, if there is a terminating
893 query part, stop there */
894 } while(*input && (!queryp || (input < queryp)));
895
896 if(queryp) {
897 size_t qlen;
898 /* There was a query part, append that to the output. */
899 size_t oindex = queryp - orginput;
900 qlen = strlen(&orginput[oindex]);
901 memcpy(outptr, &orginput[oindex], qlen + 1); /* include zero byte */
902 }
903
904 *outp = out;
905 return 0; /* success */
906}
907
908static CURLUcode parseurl(const char *url, CURLU *u, unsigned int flags)
909{
910 const char *path;
911 size_t pathlen;
912 bool uncpath = FALSE;
913 char *query = NULL;
914 char *fragment = NULL;
915 char schemebuf[MAX_SCHEME_LEN + 1];
916 const char *schemep = NULL;
917 size_t schemelen = 0;
918 size_t urllen;
919 CURLUcode result = CURLUE_OK;
920 size_t fraglen = 0;
921 struct dynbuf host;
922
923 DEBUGASSERT(url);
924
925 Curl_dyn_init(&host, CURL_MAX_INPUT_LENGTH);
926
927 /*************************************************************
928 * Parse the URL.
929 ************************************************************/
930 /* allocate scratch area */
931 urllen = strlen(url);
932 if(urllen > CURL_MAX_INPUT_LENGTH) {
933 /* excessive input length */
934 result = CURLUE_MALFORMED_INPUT;
935 goto fail;
936 }
937
938 schemelen = Curl_is_absolute_url(url, schemebuf, sizeof(schemebuf),
939 flags & (CURLU_GUESS_SCHEME|
940 CURLU_DEFAULT_SCHEME));
941
942 /* handle the file: scheme */
943 if(schemelen && !strcmp(schemebuf, "file")) {
944 if(urllen <= 6) {
945 /* file:/ is not enough to actually be a complete file: URL */
946 result = CURLUE_BAD_FILE_URL;
947 goto fail;
948 }
949
950 /* path has been allocated large enough to hold this */
951 path = (char *)&url[5];
952
953 schemep = u->scheme = strdup("file");
954 if(!u->scheme) {
955 result = CURLUE_OUT_OF_MEMORY;
956 goto fail;
957 }
958
959 /* Extra handling URLs with an authority component (i.e. that start with
960 * "file://")
961 *
962 * We allow omitted hostname (e.g. file:/<path>) -- valid according to
963 * RFC 8089, but not the (current) WHAT-WG URL spec.
964 */
965 if(path[0] == '/' && path[1] == '/') {
966 /* swallow the two slashes */
967 const char *ptr = &path[2];
968
969 /*
970 * According to RFC 8089, a file: URL can be reliably dereferenced if:
971 *
972 * o it has no/blank hostname, or
973 *
974 * o the hostname matches "localhost" (case-insensitively), or
975 *
976 * o the hostname is a FQDN that resolves to this machine, or
977 *
978 * o it is an UNC String transformed to an URI (Windows only, RFC 8089
979 * Appendix E.3).
980 *
981 * For brevity, we only consider URLs with empty, "localhost", or
982 * "127.0.0.1" hostnames as local, otherwise as an UNC String.
983 *
984 * Additionally, there is an exception for URLs with a Windows drive
985 * letter in the authority (which was accidentally omitted from RFC 8089
986 * Appendix E, but believe me, it was meant to be there. --MK)
987 */
988 if(ptr[0] != '/' && !STARTS_WITH_URL_DRIVE_PREFIX(ptr)) {
989 /* the URL includes a host name, it must match "localhost" or
990 "127.0.0.1" to be valid */
991 if(checkprefix("localhost/", ptr) ||
992 checkprefix("127.0.0.1/", ptr)) {
993 ptr += 9; /* now points to the slash after the host */
994 }
995 else {
996#if defined(WIN32)
997 size_t len;
998
999 /* the host name, NetBIOS computer name, can not contain disallowed
1000 chars, and the delimiting slash character must be appended to the
1001 host name */
1002 path = strpbrk(ptr, "/\\:*?\"<>|");
1003 if(!path || *path != '/') {
1004 result = CURLUE_BAD_FILE_URL;
1005 goto fail;
1006 }
1007
1008 len = path - ptr;
1009 if(len) {
1010 if(Curl_dyn_addn(&host, ptr, len)) {
1011 result = CURLUE_OUT_OF_MEMORY;
1012 goto fail;
1013 }
1014 uncpath = TRUE;
1015 }
1016
1017 ptr -= 2; /* now points to the // before the host in UNC */
1018#else
1019 /* Invalid file://hostname/, expected localhost or 127.0.0.1 or
1020 none */
1021 result = CURLUE_BAD_FILE_URL;
1022 goto fail;
1023#endif
1024 }
1025 }
1026
1027 path = ptr;
1028 }
1029
1030 if(!uncpath)
1031 /* no host for file: URLs by default */
1032 Curl_dyn_reset(&host);
1033
1034#if !defined(MSDOS) && !defined(WIN32) && !defined(__CYGWIN__)
1035 /* Don't allow Windows drive letters when not in Windows.
1036 * This catches both "file:/c:" and "file:c:" */
1037 if(('/' == path[0] && STARTS_WITH_URL_DRIVE_PREFIX(&path[1])) ||
1038 STARTS_WITH_URL_DRIVE_PREFIX(path)) {
1039 /* File drive letters are only accepted in MSDOS/Windows */
1040 result = CURLUE_BAD_FILE_URL;
1041 goto fail;
1042 }
1043#else
1044 /* If the path starts with a slash and a drive letter, ditch the slash */
1045 if('/' == path[0] && STARTS_WITH_URL_DRIVE_PREFIX(&path[1])) {
1046 /* This cannot be done with strcpy, as the memory chunks overlap! */
1047 path++;
1048 }
1049#endif
1050
1051 }
1052 else {
1053 /* clear path */
1054 const char *p;
1055 const char *hostp;
1056 size_t len;
1057
1058 if(schemelen) {
1059 int i = 0;
1060 p = &url[schemelen + 1];
1061 while(p && (*p == '/') && (i < 4)) {
1062 p++;
1063 i++;
1064 }
1065
1066 schemep = schemebuf;
1067 if(!Curl_builtin_scheme(schemep, CURL_ZERO_TERMINATED) &&
1068 !(flags & CURLU_NON_SUPPORT_SCHEME)) {
1069 result = CURLUE_UNSUPPORTED_SCHEME;
1070 goto fail;
1071 }
1072
1073 if((i < 1) || (i>3)) {
1074 /* less than one or more than three slashes */
1075 result = CURLUE_BAD_SLASHES;
1076 goto fail;
1077 }
1078 if(junkscan(schemep, flags)) {
1079 result = CURLUE_BAD_SCHEME;
1080 goto fail;
1081 }
1082 }
1083 else {
1084 /* no scheme! */
1085
1086 if(!(flags & (CURLU_DEFAULT_SCHEME|CURLU_GUESS_SCHEME))) {
1087 result = CURLUE_BAD_SCHEME;
1088 goto fail;
1089 }
1090 if(flags & CURLU_DEFAULT_SCHEME)
1091 schemep = DEFAULT_SCHEME;
1092
1093 /*
1094 * The URL was badly formatted, let's try without scheme specified.
1095 */
1096 p = url;
1097 }
1098 hostp = p; /* host name starts here */
1099
1100 /* find the end of the host name + port number */
1101 while(*p && !HOSTNAME_END(*p))
1102 p++;
1103
1104 len = p - hostp;
1105 if(len) {
1106 if(Curl_dyn_addn(&host, hostp, len)) {
1107 result = CURLUE_OUT_OF_MEMORY;
1108 goto fail;
1109 }
1110 }
1111 else {
1112 if(!(flags & CURLU_NO_AUTHORITY)) {
1113 result = CURLUE_NO_HOST;
1114 goto fail;
1115 }
1116 }
1117
1118 path = (char *)p;
1119
1120 if(schemep) {
1121 u->scheme = strdup(schemep);
1122 if(!u->scheme) {
1123 result = CURLUE_OUT_OF_MEMORY;
1124 goto fail;
1125 }
1126 }
1127 }
1128
1129 fragment = strchr(path, '#');
1130 if(fragment) {
1131 fraglen = strlen(fragment);
1132 if(fraglen > 1) {
1133 /* skip the leading '#' in the copy but include the terminating null */
1134 u->fragment = Curl_memdup(fragment + 1, fraglen);
1135 if(!u->fragment) {
1136 result = CURLUE_OUT_OF_MEMORY;
1137 goto fail;
1138 }
1139
1140 if(junkscan(u->fragment, flags)) {
1141 result = CURLUE_BAD_FRAGMENT;
1142 goto fail;
1143 }
1144 }
1145 }
1146
1147 query = strchr(path, '?');
1148 if(query && (!fragment || (query < fragment))) {
1149 size_t qlen = strlen(query) - fraglen; /* includes '?' */
1150 pathlen = strlen(path) - qlen - fraglen;
1151 if(qlen > 1) {
1152 if(flags & CURLU_URLENCODE) {
1153 struct dynbuf enc;
1154 Curl_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
1155 /* skip the leading question mark */
1156 if(urlencode_str(&enc, query + 1, qlen - 1, TRUE, TRUE)) {
1157 result = CURLUE_OUT_OF_MEMORY;
1158 goto fail;
1159 }
1160 u->query = Curl_dyn_ptr(&enc);
1161 }
1162 else {
1163 u->query = Curl_memdup(query + 1, qlen);
1164 if(!u->query) {
1165 result = CURLUE_OUT_OF_MEMORY;
1166 goto fail;
1167 }
1168 u->query[qlen - 1] = 0;
1169 }
1170
1171 if(junkscan(u->query, flags)) {
1172 result = CURLUE_BAD_QUERY;
1173 goto fail;
1174 }
1175 }
1176 else {
1177 /* single byte query */
1178 u->query = strdup("");
1179 if(!u->query) {
1180 result = CURLUE_OUT_OF_MEMORY;
1181 goto fail;
1182 }
1183 }
1184 }
1185 else
1186 pathlen = strlen(path) - fraglen;
1187
1188 if(pathlen && (flags & CURLU_URLENCODE)) {
1189 struct dynbuf enc;
1190 Curl_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
1191 if(urlencode_str(&enc, path, pathlen, TRUE, FALSE)) {
1192 result = CURLUE_OUT_OF_MEMORY;
1193 goto fail;
1194 }
1195 pathlen = Curl_dyn_len(&enc);
1196 path = u->path = Curl_dyn_ptr(&enc);
1197 }
1198
1199 if(pathlen <= 1) {
1200 /* there is no path left or just the slash, unset */
1201 path = NULL;
1202 }
1203 else {
1204 if(!u->path) {
1205 u->path = Curl_memdup(path, pathlen + 1);
1206 if(!u->path) {
1207 result = CURLUE_OUT_OF_MEMORY;
1208 goto fail;
1209 }
1210 u->path[pathlen] = 0;
1211 path = u->path;
1212 }
1213 else if(flags & CURLU_URLENCODE)
1214 /* it might have encoded more than just the path so cut it */
1215 u->path[pathlen] = 0;
1216
1217 if(junkscan(u->path, flags)) {
1218 result = CURLUE_BAD_PATH;
1219 goto fail;
1220 }
1221
1222 if(!(flags & CURLU_PATH_AS_IS)) {
1223 /* remove ../ and ./ sequences according to RFC3986 */
1224 char *dedot;
1225 int err = dedotdotify((char *)path, pathlen, &dedot);
1226 if(err) {
1227 result = CURLUE_OUT_OF_MEMORY;
1228 goto fail;
1229 }
1230 if(dedot) {
1231 free(u->path);
1232 u->path = dedot;
1233 }
1234 }
1235 }
1236
1237 if(Curl_dyn_len(&host)) {
1238 char normalized_ipv4[sizeof("255.255.255.255") + 1];
1239
1240 /*
1241 * Parse the login details and strip them out of the host name.
1242 */
1243 result = parse_hostname_login(u, &host, flags);
1244 if(!result)
1245 result = Curl_parse_port(u, &host, schemelen);
1246 if(result)
1247 goto fail;
1248
1249 if(junkscan(Curl_dyn_ptr(&host), flags)) {
1250 result = CURLUE_BAD_HOSTNAME;
1251 goto fail;
1252 }
1253
1254 if(ipv4_normalize(Curl_dyn_ptr(&host),
1255 normalized_ipv4, sizeof(normalized_ipv4))) {
1256 Curl_dyn_reset(&host);
1257 if(Curl_dyn_add(&host, normalized_ipv4)) {
1258 result = CURLUE_OUT_OF_MEMORY;
1259 goto fail;
1260 }
1261 }
1262 else {
1263 result = decode_host(&host);
1264 if(!result)
1265 result = hostname_check(u, Curl_dyn_ptr(&host), Curl_dyn_len(&host));
1266 if(result)
1267 goto fail;
1268 }
1269
1270 if((flags & CURLU_GUESS_SCHEME) && !schemep) {
1271 const char *hostname = Curl_dyn_ptr(&host);
1272 /* legacy curl-style guess based on host name */
1273 if(checkprefix("ftp.", hostname))
1274 schemep = "ftp";
1275 else if(checkprefix("dict.", hostname))
1276 schemep = "dict";
1277 else if(checkprefix("ldap.", hostname))
1278 schemep = "ldap";
1279 else if(checkprefix("imap.", hostname))
1280 schemep = "imap";
1281 else if(checkprefix("smtp.", hostname))
1282 schemep = "smtp";
1283 else if(checkprefix("pop3.", hostname))
1284 schemep = "pop3";
1285 else
1286 schemep = "http";
1287
1288 u->scheme = strdup(schemep);
1289 if(!u->scheme) {
1290 result = CURLUE_OUT_OF_MEMORY;
1291 goto fail;
1292 }
1293 }
1294 }
1295 else if(flags & CURLU_NO_AUTHORITY) {
1296 /* allowed to be empty. */
1297 if(Curl_dyn_add(&host, "")) {
1298 result = CURLUE_OUT_OF_MEMORY;
1299 goto fail;
1300 }
1301 }
1302
1303 u->host = Curl_dyn_ptr(&host);
1304
1305 return result;
1306 fail:
1307 Curl_dyn_free(&host);
1308 free_urlhandle(u);
1309 return result;
1310}
1311
1312/*
1313 * Parse the URL and, if successful, replace everything in the Curl_URL struct.
1314 */
1315static CURLUcode parseurl_and_replace(const char *url, CURLU *u,
1316 unsigned int flags)
1317{
1318 CURLUcode result;
1319 CURLU tmpurl;
1320 memset(&tmpurl, 0, sizeof(tmpurl));
1321 result = parseurl(url, &tmpurl, flags);
1322 if(!result) {
1323 free_urlhandle(u);
1324 *u = tmpurl;
1325 }
1326 return result;
1327}
1328
1329/*
1330 */
1331CURLU *curl_url(void)
1332{
1333 return calloc(sizeof(struct Curl_URL), 1);
1334}
1335
1336void curl_url_cleanup(CURLU *u)
1337{
1338 if(u) {
1339 free_urlhandle(u);
1340 free(u);
1341 }
1342}
1343
1344#define DUP(dest, src, name) \
1345 do { \
1346 if(src->name) { \
1347 dest->name = strdup(src->name); \
1348 if(!dest->name) \
1349 goto fail; \
1350 } \
1351 } while(0)
1352
1353CURLU *curl_url_dup(const CURLU *in)
1354{
1355 struct Curl_URL *u = calloc(sizeof(struct Curl_URL), 1);
1356 if(u) {
1357 DUP(u, in, scheme);
1358 DUP(u, in, user);
1359 DUP(u, in, password);
1360 DUP(u, in, options);
1361 DUP(u, in, host);
1362 DUP(u, in, port);
1363 DUP(u, in, path);
1364 DUP(u, in, query);
1365 DUP(u, in, fragment);
1366 u->portnum = in->portnum;
1367 }
1368 return u;
1369 fail:
1370 curl_url_cleanup(u);
1371 return NULL;
1372}
1373
1374CURLUcode curl_url_get(const CURLU *u, CURLUPart what,
1375 char **part, unsigned int flags)
1376{
1377 const char *ptr;
1378 CURLUcode ifmissing = CURLUE_UNKNOWN_PART;
1379 char portbuf[7];
1380 bool urldecode = (flags & CURLU_URLDECODE)?1:0;
1381 bool urlencode = (flags & CURLU_URLENCODE)?1:0;
1382 bool punycode = FALSE;
1383 bool plusdecode = FALSE;
1384 (void)flags;
1385 if(!u)
1386 return CURLUE_BAD_HANDLE;
1387 if(!part)
1388 return CURLUE_BAD_PARTPOINTER;
1389 *part = NULL;
1390
1391 switch(what) {
1392 case CURLUPART_SCHEME:
1393 ptr = u->scheme;
1394 ifmissing = CURLUE_NO_SCHEME;
1395 urldecode = FALSE; /* never for schemes */
1396 break;
1397 case CURLUPART_USER:
1398 ptr = u->user;
1399 ifmissing = CURLUE_NO_USER;
1400 break;
1401 case CURLUPART_PASSWORD:
1402 ptr = u->password;
1403 ifmissing = CURLUE_NO_PASSWORD;
1404 break;
1405 case CURLUPART_OPTIONS:
1406 ptr = u->options;
1407 ifmissing = CURLUE_NO_OPTIONS;
1408 break;
1409 case CURLUPART_HOST:
1410 ptr = u->host;
1411 ifmissing = CURLUE_NO_HOST;
1412 punycode = (flags & CURLU_PUNYCODE)?1:0;
1413 break;
1414 case CURLUPART_ZONEID:
1415 ptr = u->zoneid;
1416 ifmissing = CURLUE_NO_ZONEID;
1417 break;
1418 case CURLUPART_PORT:
1419 ptr = u->port;
1420 ifmissing = CURLUE_NO_PORT;
1421 urldecode = FALSE; /* never for port */
1422 if(!ptr && (flags & CURLU_DEFAULT_PORT) && u->scheme) {
1423 /* there's no stored port number, but asked to deliver
1424 a default one for the scheme */
1425 const struct Curl_handler *h =
1426 Curl_builtin_scheme(u->scheme, CURL_ZERO_TERMINATED);
1427 if(h) {
1428 msnprintf(portbuf, sizeof(portbuf), "%u", h->defport);
1429 ptr = portbuf;
1430 }
1431 }
1432 else if(ptr && u->scheme) {
1433 /* there is a stored port number, but ask to inhibit if
1434 it matches the default one for the scheme */
1435 const struct Curl_handler *h =
1436 Curl_builtin_scheme(u->scheme, CURL_ZERO_TERMINATED);
1437 if(h && (h->defport == u->portnum) &&
1438 (flags & CURLU_NO_DEFAULT_PORT))
1439 ptr = NULL;
1440 }
1441 break;
1442 case CURLUPART_PATH:
1443 ptr = u->path;
1444 if(!ptr)
1445 ptr = "/";
1446 break;
1447 case CURLUPART_QUERY:
1448 ptr = u->query;
1449 ifmissing = CURLUE_NO_QUERY;
1450 plusdecode = urldecode;
1451 break;
1452 case CURLUPART_FRAGMENT:
1453 ptr = u->fragment;
1454 ifmissing = CURLUE_NO_FRAGMENT;
1455 break;
1456 case CURLUPART_URL: {
1457 char *url;
1458 char *scheme;
1459 char *options = u->options;
1460 char *port = u->port;
1461 char *allochost = NULL;
1462 punycode = (flags & CURLU_PUNYCODE)?1:0;
1463 if(u->scheme && strcasecompare("file", u->scheme)) {
1464 url = aprintf("file://%s%s%s",
1465 u->path,
1466 u->fragment? "#": "",
1467 u->fragment? u->fragment : "");
1468 }
1469 else if(!u->host)
1470 return CURLUE_NO_HOST;
1471 else {
1472 const struct Curl_handler *h = NULL;
1473 if(u->scheme)
1474 scheme = u->scheme;
1475 else if(flags & CURLU_DEFAULT_SCHEME)
1476 scheme = (char *) DEFAULT_SCHEME;
1477 else
1478 return CURLUE_NO_SCHEME;
1479
1480 h = Curl_builtin_scheme(scheme, CURL_ZERO_TERMINATED);
1481 if(!port && (flags & CURLU_DEFAULT_PORT)) {
1482 /* there's no stored port number, but asked to deliver
1483 a default one for the scheme */
1484 if(h) {
1485 msnprintf(portbuf, sizeof(portbuf), "%u", h->defport);
1486 port = portbuf;
1487 }
1488 }
1489 else if(port) {
1490 /* there is a stored port number, but asked to inhibit if it matches
1491 the default one for the scheme */
1492 if(h && (h->defport == u->portnum) &&
1493 (flags & CURLU_NO_DEFAULT_PORT))
1494 port = NULL;
1495 }
1496
1497 if(h && !(h->flags & PROTOPT_URLOPTIONS))
1498 options = NULL;
1499
1500 if(u->host[0] == '[') {
1501 if(u->zoneid) {
1502 /* make it '[ host %25 zoneid ]' */
1503 struct dynbuf enc;
1504 size_t hostlen = strlen(u->host);
1505 Curl_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
1506 if(Curl_dyn_addf(&enc, "%.*s%%25%s]", (int)hostlen - 1, u->host,
1507 u->zoneid))
1508 return CURLUE_OUT_OF_MEMORY;
1509 allochost = Curl_dyn_ptr(&enc);
1510 }
1511 }
1512 else if(urlencode) {
1513 allochost = curl_easy_escape(NULL, u->host, 0);
1514 if(!allochost)
1515 return CURLUE_OUT_OF_MEMORY;
1516 }
1517 else if(punycode) {
1518 if(!Curl_is_ASCII_name(u->host)) {
1519#ifndef USE_IDN
1520 return CURLUE_LACKS_IDN;
1521#else
1522 allochost = Curl_idn_decode(u->host);
1523 if(!allochost)
1524 return CURLUE_OUT_OF_MEMORY;
1525#endif
1526 }
1527 }
1528 else {
1529 /* only encode '%' in output host name */
1530 char *host = u->host;
1531 bool percent = FALSE;
1532 /* first, count number of percents present in the name */
1533 while(*host) {
1534 if(*host == '%') {
1535 percent = TRUE;
1536 break;
1537 }
1538 host++;
1539 }
1540 /* if there were percent(s), encode the host name */
1541 if(percent) {
1542 struct dynbuf enc;
1543 CURLcode result;
1544 Curl_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
1545 host = u->host;
1546 while(*host) {
1547 if(*host == '%')
1548 result = Curl_dyn_addn(&enc, "%25", 3);
1549 else
1550 result = Curl_dyn_addn(&enc, host, 1);
1551 if(result)
1552 return CURLUE_OUT_OF_MEMORY;
1553 host++;
1554 }
1555 allochost = Curl_dyn_ptr(&enc);
1556 }
1557 }
1558
1559 url = aprintf("%s://%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
1560 scheme,
1561 u->user ? u->user : "",
1562 u->password ? ":": "",
1563 u->password ? u->password : "",
1564 options ? ";" : "",
1565 options ? options : "",
1566 (u->user || u->password || options) ? "@": "",
1567 allochost ? allochost : u->host,
1568 port ? ":": "",
1569 port ? port : "",
1570 (u->path && (u->path[0] != '/')) ? "/": "",
1571 u->path ? u->path : "/",
1572 (u->query && u->query[0]) ? "?": "",
1573 (u->query && u->query[0]) ? u->query : "",
1574 u->fragment? "#": "",
1575 u->fragment? u->fragment : "");
1576 free(allochost);
1577 }
1578 if(!url)
1579 return CURLUE_OUT_OF_MEMORY;
1580 *part = url;
1581 return CURLUE_OK;
1582 }
1583 default:
1584 ptr = NULL;
1585 break;
1586 }
1587 if(ptr) {
1588 size_t partlen = strlen(ptr);
1589 size_t i = 0;
1590 *part = Curl_memdup(ptr, partlen + 1);
1591 if(!*part)
1592 return CURLUE_OUT_OF_MEMORY;
1593 if(plusdecode) {
1594 /* convert + to space */
1595 char *plus = *part;
1596 for(i = 0; i < partlen; ++plus, i++) {
1597 if(*plus == '+')
1598 *plus = ' ';
1599 }
1600 }
1601 if(urldecode) {
1602 char *decoded;
1603 size_t dlen;
1604 /* this unconditional rejection of control bytes is documented
1605 API behavior */
1606 CURLcode res = Curl_urldecode(*part, 0, &decoded, &dlen, REJECT_CTRL);
1607 free(*part);
1608 if(res) {
1609 *part = NULL;
1610 return CURLUE_URLDECODE;
1611 }
1612 *part = decoded;
1613 partlen = dlen;
1614 }
1615 if(urlencode) {
1616 struct dynbuf enc;
1617 Curl_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
1618 if(urlencode_str(&enc, *part, partlen, TRUE,
1619 what == CURLUPART_QUERY))
1620 return CURLUE_OUT_OF_MEMORY;
1621 free(*part);
1622 *part = Curl_dyn_ptr(&enc);
1623 }
1624 else if(punycode) {
1625 if(!Curl_is_ASCII_name(u->host)) {
1626#ifndef USE_IDN
1627 return CURLUE_LACKS_IDN;
1628#else
1629 char *allochost = Curl_idn_decode(*part);
1630 if(!allochost)
1631 return CURLUE_OUT_OF_MEMORY;
1632 free(*part);
1633 *part = allochost;
1634#endif
1635 }
1636 }
1637
1638 return CURLUE_OK;
1639 }
1640 else
1641 return ifmissing;
1642}
1643
1644CURLUcode curl_url_set(CURLU *u, CURLUPart what,
1645 const char *part, unsigned int flags)
1646{
1647 char **storep = NULL;
1648 long port = 0;
1649 bool urlencode = (flags & CURLU_URLENCODE)? 1 : 0;
1650 bool plusencode = FALSE;
1651 bool urlskipslash = FALSE;
1652 bool appendquery = FALSE;
1653 bool equalsencode = FALSE;
1654
1655 if(!u)
1656 return CURLUE_BAD_HANDLE;
1657 if(!part) {
1658 /* setting a part to NULL clears it */
1659 switch(what) {
1660 case CURLUPART_URL:
1661 break;
1662 case CURLUPART_SCHEME:
1663 storep = &u->scheme;
1664 break;
1665 case CURLUPART_USER:
1666 storep = &u->user;
1667 break;
1668 case CURLUPART_PASSWORD:
1669 storep = &u->password;
1670 break;
1671 case CURLUPART_OPTIONS:
1672 storep = &u->options;
1673 break;
1674 case CURLUPART_HOST:
1675 storep = &u->host;
1676 break;
1677 case CURLUPART_ZONEID:
1678 storep = &u->zoneid;
1679 break;
1680 case CURLUPART_PORT:
1681 u->portnum = 0;
1682 storep = &u->port;
1683 break;
1684 case CURLUPART_PATH:
1685 storep = &u->path;
1686 break;
1687 case CURLUPART_QUERY:
1688 storep = &u->query;
1689 break;
1690 case CURLUPART_FRAGMENT:
1691 storep = &u->fragment;
1692 break;
1693 default:
1694 return CURLUE_UNKNOWN_PART;
1695 }
1696 if(storep && *storep) {
1697 Curl_safefree(*storep);
1698 }
1699 else if(!storep) {
1700 free_urlhandle(u);
1701 memset(u, 0, sizeof(struct Curl_URL));
1702 }
1703 return CURLUE_OK;
1704 }
1705
1706 switch(what) {
1707 case CURLUPART_SCHEME:
1708 if(strlen(part) > MAX_SCHEME_LEN)
1709 /* too long */
1710 return CURLUE_BAD_SCHEME;
1711 if(!(flags & CURLU_NON_SUPPORT_SCHEME) &&
1712 /* verify that it is a fine scheme */
1713 !Curl_builtin_scheme(part, CURL_ZERO_TERMINATED))
1714 return CURLUE_UNSUPPORTED_SCHEME;
1715 storep = &u->scheme;
1716 urlencode = FALSE; /* never */
1717 break;
1718 case CURLUPART_USER:
1719 storep = &u->user;
1720 break;
1721 case CURLUPART_PASSWORD:
1722 storep = &u->password;
1723 break;
1724 case CURLUPART_OPTIONS:
1725 storep = &u->options;
1726 break;
1727 case CURLUPART_HOST: {
1728 size_t len = strcspn(part, " \r\n");
1729 if(strlen(part) != len)
1730 /* hostname with bad content */
1731 return CURLUE_BAD_HOSTNAME;
1732 storep = &u->host;
1733 Curl_safefree(u->zoneid);
1734 break;
1735 }
1736 case CURLUPART_ZONEID:
1737 storep = &u->zoneid;
1738 break;
1739 case CURLUPART_PORT:
1740 {
1741 char *endp;
1742 urlencode = FALSE; /* never */
1743 port = strtol(part, &endp, 10); /* Port number must be decimal */
1744 if((port <= 0) || (port > 0xffff))
1745 return CURLUE_BAD_PORT_NUMBER;
1746 if(*endp)
1747 /* weirdly provided number, not good! */
1748 return CURLUE_BAD_PORT_NUMBER;
1749 storep = &u->port;
1750 }
1751 break;
1752 case CURLUPART_PATH:
1753 urlskipslash = TRUE;
1754 storep = &u->path;
1755 break;
1756 case CURLUPART_QUERY:
1757 plusencode = urlencode;
1758 appendquery = (flags & CURLU_APPENDQUERY)?1:0;
1759 equalsencode = appendquery;
1760 storep = &u->query;
1761 break;
1762 case CURLUPART_FRAGMENT:
1763 storep = &u->fragment;
1764 break;
1765 case CURLUPART_URL: {
1766 /*
1767 * Allow a new URL to replace the existing (if any) contents.
1768 *
1769 * If the existing contents is enough for a URL, allow a relative URL to
1770 * replace it.
1771 */
1772 CURLUcode result;
1773 char *oldurl;
1774 char *redired_url;
1775
1776 /* if the new thing is absolute or the old one is not
1777 * (we could not get an absolute url in 'oldurl'),
1778 * then replace the existing with the new. */
1779 if(Curl_is_absolute_url(part, NULL, 0,
1780 flags & (CURLU_GUESS_SCHEME|
1781 CURLU_DEFAULT_SCHEME))
1782 || curl_url_get(u, CURLUPART_URL, &oldurl, flags)) {
1783 return parseurl_and_replace(part, u, flags);
1784 }
1785
1786 /* apply the relative part to create a new URL
1787 * and replace the existing one with it. */
1788 redired_url = concat_url(oldurl, part);
1789 free(oldurl);
1790 if(!redired_url)
1791 return CURLUE_OUT_OF_MEMORY;
1792
1793 result = parseurl_and_replace(redired_url, u, flags);
1794 free(redired_url);
1795 return result;
1796 }
1797 default:
1798 return CURLUE_UNKNOWN_PART;
1799 }
1800 DEBUGASSERT(storep);
1801 {
1802 const char *newp = part;
1803 size_t nalloc = strlen(part);
1804
1805 if(nalloc > CURL_MAX_INPUT_LENGTH)
1806 /* excessive input length */
1807 return CURLUE_MALFORMED_INPUT;
1808
1809 if(urlencode) {
1810 const unsigned char *i;
1811 struct dynbuf enc;
1812
1813 Curl_dyn_init(&enc, nalloc * 3 + 1);
1814
1815 for(i = (const unsigned char *)part; *i; i++) {
1816 CURLcode result;
1817 if((*i == ' ') && plusencode) {
1818 result = Curl_dyn_addn(&enc, "+", 1);
1819 if(result)
1820 return CURLUE_OUT_OF_MEMORY;
1821 }
1822 else if(Curl_isunreserved(*i) ||
1823 ((*i == '/') && urlskipslash) ||
1824 ((*i == '=') && equalsencode)) {
1825 if((*i == '=') && equalsencode)
1826 /* only skip the first equals sign */
1827 equalsencode = FALSE;
1828 result = Curl_dyn_addn(&enc, i, 1);
1829 if(result)
1830 return CURLUE_OUT_OF_MEMORY;
1831 }
1832 else {
1833 char out[3]={'%'};
1834 out[1] = hexdigits[*i>>4];
1835 out[2] = hexdigits[*i & 0xf];
1836 result = Curl_dyn_addn(&enc, out, 3);
1837 if(result)
1838 return CURLUE_OUT_OF_MEMORY;
1839 }
1840 }
1841 newp = Curl_dyn_ptr(&enc);
1842 }
1843 else {
1844 char *p;
1845 newp = strdup(part);
1846 if(!newp)
1847 return CURLUE_OUT_OF_MEMORY;
1848 p = (char *)newp;
1849 while(*p) {
1850 /* make sure percent encoded are lower case */
1851 if((*p == '%') && ISXDIGIT(p[1]) && ISXDIGIT(p[2]) &&
1852 (ISUPPER(p[1]) || ISUPPER(p[2]))) {
1853 p[1] = Curl_raw_tolower(p[1]);
1854 p[2] = Curl_raw_tolower(p[2]);
1855 p += 3;
1856 }
1857 else
1858 p++;
1859 }
1860 }
1861
1862 if(appendquery) {
1863 /* Append the 'newp' string onto the old query. Add a '&' separator if
1864 none is present at the end of the existing query already */
1865
1866 size_t querylen = u->query ? strlen(u->query) : 0;
1867 bool addamperand = querylen && (u->query[querylen -1] != '&');
1868 if(querylen) {
1869 struct dynbuf enc;
1870 Curl_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
1871
1872 if(Curl_dyn_addn(&enc, u->query, querylen)) /* add original query */
1873 goto nomem;
1874
1875 if(addamperand) {
1876 if(Curl_dyn_addn(&enc, "&", 1))
1877 goto nomem;
1878 }
1879 if(Curl_dyn_add(&enc, newp))
1880 goto nomem;
1881 free((char *)newp);
1882 free(*storep);
1883 *storep = Curl_dyn_ptr(&enc);
1884 return CURLUE_OK;
1885 nomem:
1886 free((char *)newp);
1887 return CURLUE_OUT_OF_MEMORY;
1888 }
1889 }
1890
1891 if(what == CURLUPART_HOST) {
1892 size_t n = strlen(newp);
1893 if(!n && (flags & CURLU_NO_AUTHORITY)) {
1894 /* Skip hostname check, it's allowed to be empty. */
1895 }
1896 else {
1897 if(hostname_check(u, (char *)newp, n)) {
1898 free((char *)newp);
1899 return CURLUE_BAD_HOSTNAME;
1900 }
1901 }
1902 }
1903
1904 free(*storep);
1905 *storep = (char *)newp;
1906 }
1907 /* set after the string, to make it not assigned if the allocation above
1908 fails */
1909 if(port)
1910 u->portnum = port;
1911 return CURLUE_OK;
1912}
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette