urlapi.c@ 98326

Last change on this file since 98326 was 98326, checked in by vboxsync, 2 years ago
curl-7.87.0: Applied and adjusted our curl changes to 7.83.1. bugref:10356
Property svn:eol-style set to `native`
File size: 50.3 KB

Line
1	/***************************************************************************
2	* _ _ ____ _
3	* Project ___\| \| \| \| _ \\| \|
4	* / __\| \| \| \| \|_) \| \|
5	* \| (__\| \|_\| \| _ <\| \|___
6	* \___\|\___/\|_\| \_\_____\|
7	*
8	* Copyright (C) 1998 - 2022, Daniel Stenberg, <[email protected]>, et al.
9	*
10	* This software is licensed as described in the file COPYING, which
11	* you should have received as part of this distribution. The terms
12	* are also available at https://curl.se/docs/copyright.html.
13	*
14	* You may opt to use, copy, modify, merge, publish, distribute and/or sell
15	* copies of the Software, and permit persons to whom the Software is
16	* furnished to do so, under the terms of the COPYING file.
17	*
18	* This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
19	* KIND, either express or implied.
20	*
21	* SPDX-License-Identifier: curl
22	*
23	***************************************************************************/
24
25	#include "curl_setup.h"
26
27	#include "urldata.h"
28	#include "urlapi-int.h"
29	#include "strcase.h"
30	#include "url.h"
31	#include "escape.h"
32	#include "curl_ctype.h"
33	#include "inet_pton.h"
34	#include "inet_ntop.h"
35	#include "strdup.h"
36
37	/* The last 3 #include files should be in this order */
38	#include "curl_printf.h"
39	#include "curl_memory.h"
40	#include "memdebug.h"
41
42	/* MSDOS/Windows style drive prefix, eg c: in c:foo */
43	#define STARTS_WITH_DRIVE_PREFIX(str) \
44	((('a' <= str[0] && str[0] <= 'z') \|\| \
45	('A' <= str[0] && str[0] <= 'Z')) && \
46	(str[1] == ':'))
47
48	/* MSDOS/Windows style drive prefix, optionally with
49	* a '\|' instead of ':', followed by a slash or NUL */
50	#define STARTS_WITH_URL_DRIVE_PREFIX(str) \
51	((('a' <= (str)[0] && (str)[0] <= 'z') \|\| \
52	('A' <= (str)[0] && (str)[0] <= 'Z')) && \
53	((str)[1] == ':' \|\| (str)[1] == '\|') && \
54	((str)[2] == '/' \|\| (str)[2] == '\\' \|\| (str)[2] == 0))
55
56	/* scheme is not URL encoded, the longest libcurl supported ones are... */
57	#define MAX_SCHEME_LEN 40
58
59	/* Internal representation of CURLU. Point to URL-encoded strings. */
60	struct Curl_URL {
61	char *scheme;
62	char *user;
63	char *password;
64	char options; / IMAP only? */
65	char *host;
66	char zoneid; / for numerical IPv6 addresses */
67	char *port;
68	char *path;
69	char *query;
70	char *fragment;
71	long portnum; /* the numerical version */
72	};
73
74	#define DEFAULT_SCHEME "https"
75
76	static void free_urlhandle(struct Curl_URL *u)
77	{
78	free(u->scheme);
79	free(u->user);
80	free(u->password);
81	free(u->options);
82	free(u->host);
83	free(u->zoneid);
84	free(u->port);
85	free(u->path);
86	free(u->query);
87	free(u->fragment);
88	}
89
90	/*
91	* Find the separator at the end of the host name, or the '?' in cases like
92	* http://www.url.com?id=2380
93	*/
94	static const char find_host_sep(const char url)
95	{
96	const char *sep;
97	const char *query;
98
99	/* Find the start of the hostname */
100	sep = strstr(url, "//");
101	if(!sep)
102	sep = url;
103	else
104	sep += 2;
105
106	query = strchr(sep, '?');
107	sep = strchr(sep, '/');
108
109	if(!sep)
110	sep = url + strlen(url);
111
112	if(!query)
113	query = url + strlen(url);
114
115	return sep < query ? sep : query;
116	}
117
118	/*
119	* Decide in an encoding-independent manner whether a character in a URL must
120	* be escaped. This is used in urlencode_str().
121	*/
122	static bool urlchar_needs_escaping(int c)
123	{
124	return !(ISCNTRL(c) \|\| ISSPACE(c) \|\| ISGRAPH(c));
125	}
126
127	/* urlencode_str() writes data into an output dynbuf and URL-encodes the
128	* spaces in the source URL accordingly.
129	*
130	* URL encoding should be skipped for host names, otherwise IDN resolution
131	* will fail.
132	*/
133	static CURLUcode urlencode_str(struct dynbuf o, const char url,
134	size_t len, bool relative,
135	bool query)
136	{
137	/* we must add this with whitespace-replacing */
138	bool left = !query;
139	const unsigned char *iptr;
140	const unsigned char host_sep = (const unsigned char ) url;
141
142	if(!relative)
143	host_sep = (const unsigned char *) find_host_sep(url);
144
145	for(iptr = (unsigned char )url; / read from here */
146	len; iptr++, len--) {
147
148	if(iptr < host_sep) {
149	if(Curl_dyn_addn(o, iptr, 1))
150	return CURLUE_OUT_OF_MEMORY;
151	continue;
152	}
153
154	if(*iptr == ' ') {
155	if(left) {
156	if(Curl_dyn_addn(o, "%20", 3))
157	return CURLUE_OUT_OF_MEMORY;
158	}
159	else {
160	if(Curl_dyn_addn(o, "+", 1))
161	return CURLUE_OUT_OF_MEMORY;
162	}
163	continue;
164	}
165
166	if(*iptr == '?')
167	left = FALSE;
168
169	if(urlchar_needs_escaping(*iptr)) {
170	if(Curl_dyn_addf(o, "%%%02x", *iptr))
171	return CURLUE_OUT_OF_MEMORY;
172	}
173	else {
174	if(Curl_dyn_addn(o, iptr, 1))
175	return CURLUE_OUT_OF_MEMORY;
176	}
177	}
178
179	return CURLUE_OK;
180	}
181
182	/*
183	* Returns the length of the scheme if the given URL is absolute (as opposed
184	* to relative). Stores the scheme in the buffer if TRUE and 'buf' is
185	* non-NULL. The buflen must be larger than MAX_SCHEME_LEN if buf is set.
186	*
187	* If 'guess_scheme' is TRUE, it means the URL might be provided without
188	* scheme.
189	*/
190	size_t Curl_is_absolute_url(const char url, char buf, size_t buflen,
191	bool guess_scheme)
192	{
193	int i;
194	DEBUGASSERT(!buf \|\| (buflen > MAX_SCHEME_LEN));
195	(void)buflen; /* only used in debug-builds */
196	if(buf)
197	buf[0] = 0; /* always leave a defined value in buf */
198	#ifdef WIN32
199	if(guess_scheme && STARTS_WITH_DRIVE_PREFIX(url))
200	return 0;
201	#endif
202	for(i = 0; i < MAX_SCHEME_LEN; ++i) {
203	char s = url[i];
204	if(s && (ISALNUM(s) \|\| (s == '+') \|\| (s == '-') \|\| (s == '.') )) {
205	/* RFC 3986 3.1 explains:
206	scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
207	*/
208	}
209	else {
210	break;
211	}
212	}
213	if(i && (url[i] == ':') && ((url[i + 1] == '/') \|\| !guess_scheme)) {
214	/* If this does not guess scheme, the scheme always ends with the colon so
215	that this also detects data: URLs etc. In guessing mode, data: could
216	be the host name "data" with a specified port number. */
217
218	/* the length of the scheme is the name part only */
219	size_t len = i;
220	if(buf) {
221	buf[i] = 0;
222	while(i--) {
223	buf[i] = Curl_raw_tolower(url[i]);
224	}
225	}
226	return len;
227	}
228	return 0;
229	}
230
231	/*
232	* Concatenate a relative URL to a base URL making it absolute.
233	* URL-encodes any spaces.
234	* The returned pointer must be freed by the caller unless NULL
235	* (returns NULL on out of memory).
236	*
237	* Note that this function destroys the 'base' string.
238	*/
239	static char concat_url(char base, const char *relurl)
240	{
241	/***
242	TRY to append this new path to the old URL
243	to the right of the host part. Oh crap, this is doomed to cause
244	problems in the future...
245	*/
246	struct dynbuf newest;
247	char *protsep;
248	char *pathsep;
249	bool host_changed = FALSE;
250	const char *useurl = relurl;
251
252	/* protsep points to the start of the host name */
253	protsep = strstr(base, "//");
254	if(!protsep)
255	protsep = base;
256	else
257	protsep += 2; /* pass the slashes */
258
259	if('/' != relurl[0]) {
260	int level = 0;
261
262	/* First we need to find out if there's a ?-letter in the URL,
263	and cut it and the right-side of that off */
264	pathsep = strchr(protsep, '?');
265	if(pathsep)
266	*pathsep = 0;
267
268	/* we have a relative path to append to the last slash if there's one
269	available, or if the new URL is just a query string (starts with a
270	'?') we append the new one at the end of the entire currently worked
271	out URL */
272	if(useurl[0] != '?') {
273	pathsep = strrchr(protsep, '/');
274	if(pathsep)
275	*pathsep = 0;
276	}
277
278	/* Check if there's any slash after the host name, and if so, remember
279	that position instead */
280	pathsep = strchr(protsep, '/');
281	if(pathsep)
282	protsep = pathsep + 1;
283	else
284	protsep = NULL;
285
286	/* now deal with one "./" or any amount of "../" in the newurl
287	and act accordingly */
288
289	if((useurl[0] == '.') && (useurl[1] == '/'))
290	useurl += 2; /* just skip the "./" */
291
292	while((useurl[0] == '.') &&
293	(useurl[1] == '.') &&
294	(useurl[2] == '/')) {
295	level++;
296	useurl += 3; /* pass the "../" */
297	}
298
299	if(protsep) {
300	while(level--) {
301	/* cut off one more level from the right of the original URL */
302	pathsep = strrchr(protsep, '/');
303	if(pathsep)
304	*pathsep = 0;
305	else {
306	*protsep = 0;
307	break;
308	}
309	}
310	}
311	}
312	else {
313	/* We got a new absolute path for this server */
314
315	if(relurl[1] == '/') {
316	/* the new URL starts with //, just keep the protocol part from the
317	original one */
318	*protsep = 0;
319	useurl = &relurl[2]; /* we keep the slashes from the original, so we
320	skip the new ones */
321	host_changed = TRUE;
322	}
323	else {
324	/* cut off the original URL from the first slash, or deal with URLs
325	without slash */
326	pathsep = strchr(protsep, '/');
327	if(pathsep) {
328	/* When people use badly formatted URLs, such as
329	"http://www.url.com?dir=/home/daniel" we must not use the first
330	slash, if there's a ?-letter before it! */
331	char *sep = strchr(protsep, '?');
332	if(sep && (sep < pathsep))
333	pathsep = sep;
334	*pathsep = 0;
335	}
336	else {
337	/* There was no slash. Now, since we might be operating on a badly
338	formatted URL, such as "http://www.url.com?id=2380" which doesn't
339	use a slash separator as it is supposed to, we need to check for a
340	?-letter as well! */
341	pathsep = strchr(protsep, '?');
342	if(pathsep)
343	*pathsep = 0;
344	}
345	}
346	}
347
348	Curl_dyn_init(&newest, CURL_MAX_INPUT_LENGTH);
349
350	/* copy over the root url part */
351	if(Curl_dyn_add(&newest, base))
352	return NULL;
353
354	/* check if we need to append a slash */
355	if(('/' == useurl[0]) \|\| (protsep && !*protsep) \|\| ('?' == useurl[0]))
356	;
357	else {
358	if(Curl_dyn_addn(&newest, "/", 1))
359	return NULL;
360	}
361
362	/* then append the new piece on the right side */
363	urlencode_str(&newest, useurl, strlen(useurl), !host_changed, FALSE);
364
365	return Curl_dyn_ptr(&newest);
366	}
367
368	/* scan for byte values < 31 or 127 */
369	static bool junkscan(const char *part, unsigned int flags)
370	{
371	if(part) {
372	static const char badbytes[]={
373	/* */ 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
374	0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
375	0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
376	0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
377	0x7f, 0x00 /* null-terminate */
378	};
379	size_t n = strlen(part);
380	size_t nfine = strcspn(part, badbytes);
381	if(nfine != n)
382	/* since we don't know which part is scanned, return a generic error
383	code */
384	return TRUE;
385	if(!(flags & CURLU_ALLOW_SPACE) && strchr(part, ' '))
386	return TRUE;
387	}
388	return FALSE;
389	}
390
391	/*
392	* parse_hostname_login()
393	*
394	* Parse the login details (user name, password and options) from the URL and
395	* strip them out of the host name
396	*
397	*/
398	static CURLUcode parse_hostname_login(struct Curl_URL *u,
399	struct dynbuf *host,
400	unsigned int flags)
401	{
402	CURLUcode result = CURLUE_OK;
403	CURLcode ccode;
404	char *userp = NULL;
405	char *passwdp = NULL;
406	char *optionsp = NULL;
407	const struct Curl_handler *h = NULL;
408
409	/* At this point, we assume all the other special cases have been taken
410	* care of, so the host is at most
411	*
412	* [user[:password][;options]]@]hostname
413	*
414	* We need somewhere to put the embedded details, so do that first.
415	*/
416
417	char *login = Curl_dyn_ptr(host);
418	char *ptr;
419
420	DEBUGASSERT(login);
421
422	ptr = strchr(login, '@');
423	if(!ptr)
424	goto out;
425
426	/* We will now try to extract the
427	* possible login information in a string like:
428	* ftp://user:[email protected]:8021/README */
429	ptr++;
430
431	/* if this is a known scheme, get some details */
432	if(u->scheme)
433	h = Curl_builtin_scheme(u->scheme, CURL_ZERO_TERMINATED);
434
435	/* We could use the login information in the URL so extract it. Only parse
436	options if the handler says we should. Note that 'h' might be NULL! */
437	ccode = Curl_parse_login_details(login, ptr - login - 1,
438	&userp, &passwdp,
439	(h && (h->flags & PROTOPT_URLOPTIONS)) ?
440	&optionsp:NULL);
441	if(ccode) {
442	result = CURLUE_BAD_LOGIN;
443	goto out;
444	}
445
446	if(userp) {
447	if(flags & CURLU_DISALLOW_USER) {
448	/* Option DISALLOW_USER is set and url contains username. */
449	result = CURLUE_USER_NOT_ALLOWED;
450	goto out;
451	}
452	if(junkscan(userp, flags)) {
453	result = CURLUE_BAD_USER;
454	goto out;
455	}
456	u->user = userp;
457	}
458
459	if(passwdp) {
460	if(junkscan(passwdp, flags)) {
461	result = CURLUE_BAD_PASSWORD;
462	goto out;
463	}
464	u->password = passwdp;
465	}
466
467	if(optionsp) {
468	if(junkscan(optionsp, flags)) {
469	result = CURLUE_BAD_LOGIN;
470	goto out;
471	}
472	u->options = optionsp;
473	}
474
475	/* move the name to the start of the host buffer */
476	if(Curl_dyn_tail(host, strlen(ptr)))
477	return CURLUE_OUT_OF_MEMORY;
478
479	return CURLUE_OK;
480	out:
481
482	free(userp);
483	free(passwdp);
484	free(optionsp);
485	u->user = NULL;
486	u->password = NULL;
487	u->options = NULL;
488
489	return result;
490	}
491
492	UNITTEST CURLUcode Curl_parse_port(struct Curl_URL u, struct dynbuf host,
493	bool has_scheme)
494	{
495	char *portptr = NULL;
496	char endbracket;
497	int len;
498	char *hostname = Curl_dyn_ptr(host);
499	/*
500	* Find the end of an IPv6 address, either on the ']' ending bracket or
501	* a percent-encoded zone index.
502	*/
503	if(1 == sscanf(hostname, "[%*45[0123456789abcdefABCDEF:.]%c%n",
504	&endbracket, &len)) {
505	if(']' == endbracket)
506	portptr = &hostname[len];
507	else if('%' == endbracket) {
508	int zonelen = len;
509	if(1 == sscanf(hostname + zonelen, "%*[^]]%c%n", &endbracket, &len)) {
510	if(']' != endbracket)
511	return CURLUE_BAD_IPV6;
512	portptr = &hostname[--zonelen + len + 1];
513	}
514	else
515	return CURLUE_BAD_IPV6;
516	}
517	else
518	return CURLUE_BAD_IPV6;
519
520	/* this is a RFC2732-style specified IP-address */
521	if(portptr && *portptr) {
522	if(*portptr != ':')
523	return CURLUE_BAD_IPV6;
524	}
525	else
526	portptr = NULL;
527	}
528	else
529	portptr = strchr(hostname, ':');
530
531	if(portptr) {
532	char *rest;
533	long port;
534	char portbuf[7];
535	size_t keep = portptr - hostname;
536
537	/* Browser behavior adaptation. If there's a colon with no digits after,
538	just cut off the name there which makes us ignore the colon and just
539	use the default port. Firefox, Chrome and Safari all do that.
540
541	Don't do it if the URL has no scheme, to make something that looks like
542	a scheme not work!
543	*/
544	Curl_dyn_setlen(host, keep);
545	portptr++;
546	if(!*portptr)
547	return has_scheme ? CURLUE_OK : CURLUE_BAD_PORT_NUMBER;
548
549	if(!ISDIGIT(*portptr))
550	return CURLUE_BAD_PORT_NUMBER;
551
552	port = strtol(portptr, &rest, 10); /* Port number must be decimal */
553
554	if(port > 0xffff)
555	return CURLUE_BAD_PORT_NUMBER;
556
557	if(rest[0])
558	return CURLUE_BAD_PORT_NUMBER;
559
560	*rest = 0;
561	/* generate a new port number string to get rid of leading zeroes etc */
562	msnprintf(portbuf, sizeof(portbuf), "%ld", port);
563	u->portnum = port;
564	u->port = strdup(portbuf);
565	if(!u->port)
566	return CURLUE_OUT_OF_MEMORY;
567	}
568
569	return CURLUE_OK;
570	}
571
572	static CURLUcode hostname_check(struct Curl_URL u, char hostname,
573	size_t hlen) /* length of hostname */
574	{
575	size_t len;
576	DEBUGASSERT(hostname);
577
578	if(!hostname[0])
579	return CURLUE_NO_HOST;
580	else if(hostname[0] == '[') {
581	const char *l = "0123456789abcdefABCDEF:.";
582	if(hlen < 4) /* '[::]' is the shortest possible valid string */
583	return CURLUE_BAD_IPV6;
584	hostname++;
585	hlen -= 2;
586
587	if(hostname[hlen] != ']')
588	return CURLUE_BAD_IPV6;
589
590	/* only valid letters are ok */
591	len = strspn(hostname, l);
592	if(hlen != len) {
593	hlen = len;
594	if(hostname[len] == '%') {
595	/* this could now be '%[zone id]' */
596	char zoneid[16];
597	int i = 0;
598	char *h = &hostname[len + 1];
599	/* pass '25' if present and is a url encoded percent sign */
600	if(!strncmp(h, "25", 2) && h[2] && (h[2] != ']'))
601	h += 2;
602	while(h && (h != ']') && (i < 15))
603	zoneid[i++] = *h++;
604	if(!i \|\| (']' != *h))
605	/* impossible to reach? */
606	return CURLUE_MALFORMED_INPUT;
607	zoneid[i] = 0;
608	u->zoneid = strdup(zoneid);
609	if(!u->zoneid)
610	return CURLUE_OUT_OF_MEMORY;
611	hostname[len] = ']'; /* insert end bracket */
612	hostname[len + 1] = 0; /* terminate the hostname */
613	}
614	else
615	return CURLUE_BAD_IPV6;
616	/* hostname is fine */
617	}
618	#ifdef ENABLE_IPV6
619	{
620	char dest[16]; /* fits a binary IPv6 address */
621	char norm[MAX_IPADR_LEN];
622	hostname[hlen] = 0; /* end the address there */
623	if(1 != Curl_inet_pton(AF_INET6, hostname, dest))
624	return CURLUE_BAD_IPV6;
625
626	/* check if it can be done shorter */
627	if(Curl_inet_ntop(AF_INET6, dest, norm, sizeof(norm)) &&
628	(strlen(norm) < hlen)) {
629	strcpy(hostname, norm);
630	hlen = strlen(norm);
631	hostname[hlen + 1] = 0;
632	}
633	hostname[hlen] = ']'; /* restore ending bracket */
634	}
635	#endif
636	}
637	else {
638	/* letters from the second string are not ok */
639	len = strcspn(hostname, " \r\n\t/:#?!@{}[]\\$\'\"^`*<>=;,+&()");
640	if(hlen != len)
641	/* hostname with bad content */
642	return CURLUE_BAD_HOSTNAME;
643	}
644	return CURLUE_OK;
645	}
646
647	#define HOSTNAME_END(x) (((x) == '/') \|\| ((x) == '?') \|\| ((x) == '#'))
648
649	/*
650	* Handle partial IPv4 numerical addresses and different bases, like
651	* '16843009', '0x7f', '0x7f.1' '0177.1.1.1' etc.
652	*
653	* If the given input string is syntactically wrong or any part for example is
654	* too big, this function returns FALSE and doesn't create any output.
655	*
656	* Output the "normalized" version of that input string in plain quad decimal
657	* integers and return TRUE.
658	*/
659	static bool ipv4_normalize(const char hostname, char outp, size_t olen)
660	{
661	bool done = FALSE;
662	int n = 0;
663	const char *c = hostname;
664	unsigned long parts[4] = {0, 0, 0, 0};
665
666	while(!done) {
667	char *endp;
668	unsigned long l;
669	if((c < '0') \|\| (c > '9'))
670	/* most importantly this doesn't allow a leading plus or minus */
671	return FALSE;
672	l = strtoul(c, &endp, 0);
673
674	/* overflow or nothing parsed at all */
675	if(((l == ULONG_MAX) && (errno == ERANGE)) \|\| (endp == c))
676	return FALSE;
677
678	#if SIZEOF_LONG > 4
679	/* a value larger than 32 bits */
680	if(l > UINT_MAX)
681	return FALSE;
682	#endif
683
684	parts[n] = l;
685	c = endp;
686
687	switch (*c) {
688	case '.' :
689	if(n == 3)
690	return FALSE;
691	n++;
692	c++;
693	break;
694
695	case '\0':
696	done = TRUE;
697	break;
698
699	default:
700	return FALSE;
701	}
702	}
703
704	/* this is deemed a valid IPv4 numerical address */
705
706	switch(n) {
707	case 0: /* a -- 32 bits */
708	msnprintf(outp, olen, "%u.%u.%u.%u",
709	parts[0] >> 24, (parts[0] >> 16) & 0xff,
710	(parts[0] >> 8) & 0xff, parts[0] & 0xff);
711	break;
712	case 1: /* a.b -- 8.24 bits */
713	if((parts[0] > 0xff) \|\| (parts[1] > 0xffffff))
714	return FALSE;
715	msnprintf(outp, olen, "%u.%u.%u.%u",
716	parts[0], (parts[1] >> 16) & 0xff,
717	(parts[1] >> 8) & 0xff, parts[1] & 0xff);
718	break;
719	case 2: /* a.b.c -- 8.8.16 bits */
720	if((parts[0] > 0xff) \|\| (parts[1] > 0xff) \|\| (parts[2] > 0xffff))
721	return FALSE;
722	msnprintf(outp, olen, "%u.%u.%u.%u",
723	parts[0], parts[1], (parts[2] >> 8) & 0xff,
724	parts[2] & 0xff);
725	break;
726	case 3: /* a.b.c.d -- 8.8.8.8 bits */
727	if((parts[0] > 0xff) \|\| (parts[1] > 0xff) \|\| (parts[2] > 0xff) \|\|
728	(parts[3] > 0xff))
729	return FALSE;
730	msnprintf(outp, olen, "%u.%u.%u.%u",
731	parts[0], parts[1], parts[2], parts[3]);
732	break;
733	}
734	return TRUE;
735	}
736
737	/* if necessary, replace the host content with a URL decoded version */
738	static CURLUcode decode_host(struct dynbuf *host)
739	{
740	char *per = NULL;
741	const char *hostname = Curl_dyn_ptr(host);
742	if(hostname[0] == '[')
743	/* only decode if not an ipv6 numerical */
744	return CURLUE_OK;
745	per = strchr(hostname, '%');
746	if(!per)
747	/* nothing to decode */
748	return CURLUE_OK;
749	else {
750	/* encoded */
751	size_t dlen;
752	char *decoded;
753	CURLcode result = Curl_urldecode(hostname, 0, &decoded, &dlen,
754	REJECT_CTRL);
755	if(result)
756	return CURLUE_BAD_HOSTNAME;
757	Curl_dyn_reset(host);
758	result = Curl_dyn_addn(host, decoded, dlen);
759	free(decoded);
760	if(result)
761	return CURLUE_OUT_OF_MEMORY;
762	}
763
764	return CURLUE_OK;
765	}
766
767	/*
768	* "Remove Dot Segments"
769	* https://datatracker.ietf.org/doc/html/rfc3986#section-5.2.4
770	*/
771
772	/*
773	* dedotdotify()
774	* @unittest: 1395
775	*
776	* This function gets a null-terminated path with dot and dotdot sequences
777	* passed in and strips them off according to the rules in RFC 3986 section
778	* 5.2.4.
779	*
780	* The function handles a query part ('?' + stuff) appended but it expects
781	* that fragments ('#' + stuff) have already been cut off.
782	*
783	* RETURNS
784	*
785	* an allocated dedotdotified output string
786	*/
787	UNITTEST char dedotdotify(const char input, size_t clen);
788	UNITTEST char dedotdotify(const char input, size_t clen)
789	{
790	char *out = malloc(clen + 1);
791	char *outptr;
792	const char *orginput = input;
793	char *queryp;
794	if(!out)
795	return NULL; /* out of memory */
796
797	out = 0; / null-terminates, for inputs like "./" */
798	outptr = out;
799
800	if(!*input)
801	/* zero length input string, return that */
802	return out;
803
804	/*
805	* To handle query-parts properly, we must find it and remove it during the
806	* dotdot-operation and then append it again at the end to the output
807	* string.
808	*/
809	queryp = strchr(input, '?');
810
811	do {
812	bool dotdot = TRUE;
813	if(*input == '.') {
814	/* A. If the input buffer begins with a prefix of "../" or "./", then
815	remove that prefix from the input buffer; otherwise, */
816
817	if(!strncmp("./", input, 2)) {
818	input += 2;
819	clen -= 2;
820	}
821	else if(!strncmp("../", input, 3)) {
822	input += 3;
823	clen -= 3;
824	}
825	/* D. if the input buffer consists only of "." or "..", then remove
826	that from the input buffer; otherwise, */
827
828	else if(!strcmp(".", input) \|\| !strcmp("..", input) \|\|
829	!strncmp(".?", input, 2) \|\| !strncmp("..?", input, 3)) {
830	*out = 0;
831	break;
832	}
833	else
834	dotdot = FALSE;
835	}
836	else if(*input == '/') {
837	/* B. if the input buffer begins with a prefix of "/./" or "/.", where
838	"." is a complete path segment, then replace that prefix with "/" in
839	the input buffer; otherwise, */
840	if(!strncmp("/./", input, 3)) {
841	input += 2;
842	clen -= 2;
843	}
844	else if(!strcmp("/.", input) \|\| !strncmp("/.?", input, 3)) {
845	*outptr++ = '/';
846	*outptr = 0;
847	break;
848	}
849
850	/* C. if the input buffer begins with a prefix of "/../" or "/..",
851	where ".." is a complete path segment, then replace that prefix with
852	"/" in the input buffer and remove the last segment and its
853	preceding "/" (if any) from the output buffer; otherwise, */
854
855	else if(!strncmp("/../", input, 4)) {
856	input += 3;
857	clen -= 3;
858	/* remove the last segment from the output buffer */
859	while(outptr > out) {
860	outptr--;
861	if(*outptr == '/')
862	break;
863	}
864	outptr = 0; / null-terminate where it stops */
865	}
866	else if(!strcmp("/..", input) \|\| !strncmp("/..?", input, 4)) {
867	/* remove the last segment from the output buffer */
868	while(outptr > out) {
869	outptr--;
870	if(*outptr == '/')
871	break;
872	}
873	*outptr++ = '/';
874	outptr = 0; / null-terminate where it stops */
875	break;
876	}
877	else
878	dotdot = FALSE;
879	}
880	else
881	dotdot = FALSE;
882
883	if(!dotdot) {
884	/* E. move the first path segment in the input buffer to the end of
885	the output buffer, including the initial "/" character (if any) and
886	any subsequent characters up to, but not including, the next "/"
887	character or the end of the input buffer. */
888
889	do {
890	outptr++ = input++;
891	clen--;
892	} while(input && (input != '/') && (*input != '?'));
893	*outptr = 0;
894	}
895
896	/* continue until end of input string OR, if there is a terminating
897	query part, stop there */
898	} while(*input && (!queryp \|\| (input < queryp)));
899
900	if(queryp) {
901	size_t qlen;
902	/* There was a query part, append that to the output. */
903	size_t oindex = queryp - orginput;
904	qlen = strlen(&orginput[oindex]);
905	memcpy(outptr, &orginput[oindex], qlen + 1); /* include zero byte */
906	}
907
908	return out;
909	}
910
911	static CURLUcode parseurl(const char url, CURLU u, unsigned int flags)
912	{
913	const char *path;
914	size_t pathlen;
915	bool uncpath = FALSE;
916	char *query = NULL;
917	char *fragment = NULL;
918	char schemebuf[MAX_SCHEME_LEN + 1];
919	const char *schemep = NULL;
920	size_t schemelen = 0;
921	size_t urllen;
922	CURLUcode result = CURLUE_OK;
923	size_t fraglen = 0;
924	struct dynbuf host;
925
926	DEBUGASSERT(url);
927
928	Curl_dyn_init(&host, CURL_MAX_INPUT_LENGTH);
929
930	/*************************************************************
931	* Parse the URL.
932	************************************************************/
933	/* allocate scratch area */
934	urllen = strlen(url);
935	if(urllen > CURL_MAX_INPUT_LENGTH) {
936	/* excessive input length */
937	result = CURLUE_MALFORMED_INPUT;
938	goto fail;
939	}
940
941	schemelen = Curl_is_absolute_url(url, schemebuf, sizeof(schemebuf),
942	flags & (CURLU_GUESS_SCHEME\|
943	CURLU_DEFAULT_SCHEME));
944
945	/* handle the file: scheme */
946	if(schemelen && !strcmp(schemebuf, "file")) {
947	if(urllen <= 6) {
948	/* file:/ is not enough to actually be a complete file: URL */
949	result = CURLUE_BAD_FILE_URL;
950	goto fail;
951	}
952
953	/* path has been allocated large enough to hold this */
954	path = (char *)&url[5];
955
956	schemep = u->scheme = strdup("file");
957	if(!u->scheme) {
958	result = CURLUE_OUT_OF_MEMORY;
959	goto fail;
960	}
961
962	/* Extra handling URLs with an authority component (i.e. that start with
963	* "file://")
964	*
965	* We allow omitted hostname (e.g. file:/<path>) -- valid according to
966	* RFC 8089, but not the (current) WHAT-WG URL spec.
967	*/
968	if(path[0] == '/' && path[1] == '/') {
969	/* swallow the two slashes */
970	const char *ptr = &path[2];
971
972	/*
973	* According to RFC 8089, a file: URL can be reliably dereferenced if:
974	*
975	* o it has no/blank hostname, or
976	*
977	* o the hostname matches "localhost" (case-insensitively), or
978	*
979	* o the hostname is a FQDN that resolves to this machine, or
980	*
981	* o it is an UNC String transformed to an URI (Windows only, RFC 8089
982	* Appendix E.3).
983	*
984	* For brevity, we only consider URLs with empty, "localhost", or
985	* "127.0.0.1" hostnames as local, otherwise as an UNC String.
986	*
987	* Additionally, there is an exception for URLs with a Windows drive
988	* letter in the authority (which was accidentally omitted from RFC 8089
989	* Appendix E, but believe me, it was meant to be there. --MK)
990	*/
991	if(ptr[0] != '/' && !STARTS_WITH_URL_DRIVE_PREFIX(ptr)) {
992	/* the URL includes a host name, it must match "localhost" or
993	"127.0.0.1" to be valid */
994	if(checkprefix("localhost/", ptr) \|\|
995	checkprefix("127.0.0.1/", ptr)) {
996	ptr += 9; /* now points to the slash after the host */
997	}
998	else {
999	#if defined(WIN32)
1000	size_t len;
1001
1002	/* the host name, NetBIOS computer name, can not contain disallowed
1003	chars, and the delimiting slash character must be appended to the
1004	host name */
1005	path = strpbrk(ptr, "/\\:*?\"<>\|");
1006	if(!path \|\| *path != '/') {
1007	result = CURLUE_BAD_FILE_URL;
1008	goto fail;
1009	}
1010
1011	len = path - ptr;
1012	if(len) {
1013	if(Curl_dyn_addn(&host, ptr, len)) {
1014	result = CURLUE_OUT_OF_MEMORY;
1015	goto fail;
1016	}
1017	uncpath = TRUE;
1018	}
1019
1020	ptr -= 2; /* now points to the // before the host in UNC */
1021	#else
1022	/* Invalid file://hostname/, expected localhost or 127.0.0.1 or
1023	none */
1024	result = CURLUE_BAD_FILE_URL;
1025	goto fail;
1026	#endif
1027	}
1028	}
1029
1030	path = ptr;
1031	}
1032
1033	if(!uncpath)
1034	/* no host for file: URLs by default */
1035	Curl_dyn_reset(&host);
1036
1037	#if !defined(MSDOS) && !defined(WIN32) && !defined(__CYGWIN__)
1038	/* Don't allow Windows drive letters when not in Windows.
1039	* This catches both "file:/c:" and "file:c:" */
1040	if(('/' == path[0] && STARTS_WITH_URL_DRIVE_PREFIX(&path[1])) \|\|
1041	STARTS_WITH_URL_DRIVE_PREFIX(path)) {
1042	/* File drive letters are only accepted in MSDOS/Windows */
1043	result = CURLUE_BAD_FILE_URL;
1044	goto fail;
1045	}
1046	#else
1047	/* If the path starts with a slash and a drive letter, ditch the slash */
1048	if('/' == path[0] && STARTS_WITH_URL_DRIVE_PREFIX(&path[1])) {
1049	/* This cannot be done with strcpy, as the memory chunks overlap! */
1050	path++;
1051	}
1052	#endif
1053
1054	}
1055	else {
1056	/* clear path */
1057	const char *p;
1058	const char *hostp;
1059	size_t len;
1060
1061	if(schemelen) {
1062	int i = 0;
1063	p = &url[schemelen + 1];
1064	while(p && (*p == '/') && (i < 4)) {
1065	p++;
1066	i++;
1067	}
1068
1069	schemep = schemebuf;
1070	if(!Curl_builtin_scheme(schemep, CURL_ZERO_TERMINATED) &&
1071	!(flags & CURLU_NON_SUPPORT_SCHEME)) {
1072	result = CURLUE_UNSUPPORTED_SCHEME;
1073	goto fail;
1074	}
1075
1076	if((i < 1) \|\| (i>3)) {
1077	/* less than one or more than three slashes */
1078	result = CURLUE_BAD_SLASHES;
1079	goto fail;
1080	}
1081	if(junkscan(schemep, flags)) {
1082	result = CURLUE_BAD_SCHEME;
1083	goto fail;
1084	}
1085	}
1086	else {
1087	/* no scheme! */
1088
1089	if(!(flags & (CURLU_DEFAULT_SCHEME\|CURLU_GUESS_SCHEME))) {
1090	result = CURLUE_BAD_SCHEME;
1091	goto fail;
1092	}
1093	if(flags & CURLU_DEFAULT_SCHEME)
1094	schemep = DEFAULT_SCHEME;
1095
1096	/*
1097	* The URL was badly formatted, let's try without scheme specified.
1098	*/
1099	p = url;
1100	}
1101	hostp = p; /* host name starts here */
1102
1103	/* find the end of the host name + port number */
1104	while(p && !HOSTNAME_END(p))
1105	p++;
1106
1107	len = p - hostp;
1108	if(len) {
1109	if(Curl_dyn_addn(&host, hostp, len)) {
1110	result = CURLUE_OUT_OF_MEMORY;
1111	goto fail;
1112	}
1113	}
1114	else {
1115	if(!(flags & CURLU_NO_AUTHORITY)) {
1116	result = CURLUE_NO_HOST;
1117	goto fail;
1118	}
1119	}
1120
1121	path = (char *)p;
1122
1123	if(schemep) {
1124	u->scheme = strdup(schemep);
1125	if(!u->scheme) {
1126	result = CURLUE_OUT_OF_MEMORY;
1127	goto fail;
1128	}
1129	}
1130	}
1131
1132	fragment = strchr(path, '#');
1133	if(fragment) {
1134	fraglen = strlen(fragment);
1135	if(fraglen > 1) {
1136	/* skip the leading '#' in the copy but include the terminating null */
1137	u->fragment = Curl_memdup(fragment + 1, fraglen);
1138	if(!u->fragment) {
1139	result = CURLUE_OUT_OF_MEMORY;
1140	goto fail;
1141	}
1142
1143	if(junkscan(u->fragment, flags)) {
1144	result = CURLUE_BAD_FRAGMENT;
1145	goto fail;
1146	}
1147	}
1148	}
1149
1150	query = strchr(path, '?');
1151	if(query && (!fragment \|\| (query < fragment))) {
1152	size_t qlen = strlen(query) - fraglen; /* includes '?' */
1153	pathlen = strlen(path) - qlen - fraglen;
1154	if(qlen > 1) {
1155	if(qlen && (flags & CURLU_URLENCODE)) {
1156	struct dynbuf enc;
1157	Curl_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
1158	/* skip the leading question mark */
1159	if(urlencode_str(&enc, query + 1, qlen - 1, TRUE, TRUE)) {
1160	result = CURLUE_OUT_OF_MEMORY;
1161	goto fail;
1162	}
1163	u->query = Curl_dyn_ptr(&enc);
1164	}
1165	else {
1166	u->query = Curl_memdup(query + 1, qlen);
1167	if(!u->query) {
1168	result = CURLUE_OUT_OF_MEMORY;
1169	goto fail;
1170	}
1171	u->query[qlen - 1] = 0;
1172	}
1173
1174	if(junkscan(u->query, flags)) {
1175	result = CURLUE_BAD_QUERY;
1176	goto fail;
1177	}
1178	}
1179	else {
1180	/* single byte query */
1181	u->query = strdup("");
1182	if(!u->query) {
1183	result = CURLUE_OUT_OF_MEMORY;
1184	goto fail;
1185	}
1186	}
1187	}
1188	else
1189	pathlen = strlen(path) - fraglen;
1190
1191	if(pathlen && (flags & CURLU_URLENCODE)) {
1192	struct dynbuf enc;
1193	Curl_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
1194	if(urlencode_str(&enc, path, pathlen, TRUE, FALSE)) {
1195	result = CURLUE_OUT_OF_MEMORY;
1196	goto fail;
1197	}
1198	pathlen = Curl_dyn_len(&enc);
1199	path = u->path = Curl_dyn_ptr(&enc);
1200	}
1201
1202	if(!pathlen) {
1203	/* there is no path left, unset */
1204	path = NULL;
1205	}
1206	else {
1207	if(!u->path) {
1208	u->path = Curl_memdup(path, pathlen + 1);
1209	if(!u->path) {
1210	result = CURLUE_OUT_OF_MEMORY;
1211	goto fail;
1212	}
1213	u->path[pathlen] = 0;
1214	path = u->path;
1215	}
1216	else if(flags & CURLU_URLENCODE)
1217	/* it might have encoded more than just the path so cut it */
1218	u->path[pathlen] = 0;
1219
1220	if(junkscan(u->path, flags)) {
1221	result = CURLUE_BAD_PATH;
1222	goto fail;
1223	}
1224
1225	if(!(flags & CURLU_PATH_AS_IS)) {
1226	/* remove ../ and ./ sequences according to RFC3986 */
1227	char newp = dedotdotify((char )path, pathlen);
1228	if(!newp) {
1229	result = CURLUE_OUT_OF_MEMORY;
1230	goto fail;
1231	}
1232	free(u->path);
1233	u->path = newp;
1234	}
1235	}
1236
1237	if(Curl_dyn_len(&host)) {
1238	char normalized_ipv4[sizeof("255.255.255.255") + 1];
1239
1240	/*
1241	* Parse the login details and strip them out of the host name.
1242	*/
1243	result = parse_hostname_login(u, &host, flags);
1244	if(!result)
1245	result = Curl_parse_port(u, &host, schemelen);
1246	if(result)
1247	goto fail;
1248
1249	if(junkscan(Curl_dyn_ptr(&host), flags)) {
1250	result = CURLUE_BAD_HOSTNAME;
1251	goto fail;
1252	}
1253
1254	if(ipv4_normalize(Curl_dyn_ptr(&host),
1255	normalized_ipv4, sizeof(normalized_ipv4))) {
1256	Curl_dyn_reset(&host);
1257	if(Curl_dyn_add(&host, normalized_ipv4)) {
1258	result = CURLUE_OUT_OF_MEMORY;
1259	goto fail;
1260	}
1261	}
1262	else {
1263	result = decode_host(&host);
1264	if(!result)
1265	result = hostname_check(u, Curl_dyn_ptr(&host), Curl_dyn_len(&host));
1266	if(result)
1267	goto fail;
1268	}
1269
1270	if((flags & CURLU_GUESS_SCHEME) && !schemep) {
1271	const char *hostname = Curl_dyn_ptr(&host);
1272	/* legacy curl-style guess based on host name */
1273	if(checkprefix("ftp.", hostname))
1274	schemep = "ftp";
1275	else if(checkprefix("dict.", hostname))
1276	schemep = "dict";
1277	else if(checkprefix("ldap.", hostname))
1278	schemep = "ldap";
1279	else if(checkprefix("imap.", hostname))
1280	schemep = "imap";
1281	else if(checkprefix("smtp.", hostname))
1282	schemep = "smtp";
1283	else if(checkprefix("pop3.", hostname))
1284	schemep = "pop3";
1285	else
1286	schemep = "http";
1287
1288	u->scheme = strdup(schemep);
1289	if(!u->scheme) {
1290	result = CURLUE_OUT_OF_MEMORY;
1291	goto fail;
1292	}
1293	}
1294	}
1295	else if(flags & CURLU_NO_AUTHORITY) {
1296	/* allowed to be empty. */
1297	if(Curl_dyn_add(&host, "")) {
1298	result = CURLUE_OUT_OF_MEMORY;
1299	goto fail;
1300	}
1301	}
1302
1303	u->host = Curl_dyn_ptr(&host);
1304
1305	return result;
1306	fail:
1307	Curl_dyn_free(&host);
1308	free_urlhandle(u);
1309	return result;
1310	}
1311
1312	/*
1313	* Parse the URL and, if successful, replace everything in the Curl_URL struct.
1314	*/
1315	static CURLUcode parseurl_and_replace(const char url, CURLU u,
1316	unsigned int flags)
1317	{
1318	CURLUcode result;
1319	CURLU tmpurl;
1320	memset(&tmpurl, 0, sizeof(tmpurl));
1321	result = parseurl(url, &tmpurl, flags);
1322	if(!result) {
1323	free_urlhandle(u);
1324	*u = tmpurl;
1325	}
1326	return result;
1327	}
1328
1329	/*
1330	*/
1331	CURLU *curl_url(void)
1332	{
1333	return calloc(sizeof(struct Curl_URL), 1);
1334	}
1335
1336	void curl_url_cleanup(CURLU *u)
1337	{
1338	if(u) {
1339	free_urlhandle(u);
1340	free(u);
1341	}
1342	}
1343
1344	#define DUP(dest, src, name) \
1345	do { \
1346	if(src->name) { \
1347	dest->name = strdup(src->name); \
1348	if(!dest->name) \
1349	goto fail; \
1350	} \
1351	} while(0)
1352
1353	CURLU curl_url_dup(CURLU in)
1354	{
1355	struct Curl_URL *u = calloc(sizeof(struct Curl_URL), 1);
1356	if(u) {
1357	DUP(u, in, scheme);
1358	DUP(u, in, user);
1359	DUP(u, in, password);
1360	DUP(u, in, options);
1361	DUP(u, in, host);
1362	DUP(u, in, port);
1363	DUP(u, in, path);
1364	DUP(u, in, query);
1365	DUP(u, in, fragment);
1366	u->portnum = in->portnum;
1367	}
1368	return u;
1369	fail:
1370	curl_url_cleanup(u);
1371	return NULL;
1372	}
1373
1374	CURLUcode curl_url_get(CURLU *u, CURLUPart what,
1375	char **part, unsigned int flags)
1376	{
1377	char *ptr;
1378	CURLUcode ifmissing = CURLUE_UNKNOWN_PART;
1379	char portbuf[7];
1380	bool urldecode = (flags & CURLU_URLDECODE)?1:0;
1381	bool urlencode = (flags & CURLU_URLENCODE)?1:0;
1382	bool plusdecode = FALSE;
1383	(void)flags;
1384	if(!u)
1385	return CURLUE_BAD_HANDLE;
1386	if(!part)
1387	return CURLUE_BAD_PARTPOINTER;
1388	*part = NULL;
1389
1390	switch(what) {
1391	case CURLUPART_SCHEME:
1392	ptr = u->scheme;
1393	ifmissing = CURLUE_NO_SCHEME;
1394	urldecode = FALSE; /* never for schemes */
1395	break;
1396	case CURLUPART_USER:
1397	ptr = u->user;
1398	ifmissing = CURLUE_NO_USER;
1399	break;
1400	case CURLUPART_PASSWORD:
1401	ptr = u->password;
1402	ifmissing = CURLUE_NO_PASSWORD;
1403	break;
1404	case CURLUPART_OPTIONS:
1405	ptr = u->options;
1406	ifmissing = CURLUE_NO_OPTIONS;
1407	break;
1408	case CURLUPART_HOST:
1409	ptr = u->host;
1410	ifmissing = CURLUE_NO_HOST;
1411	break;
1412	case CURLUPART_ZONEID:
1413	ptr = u->zoneid;
1414	ifmissing = CURLUE_NO_ZONEID;
1415	break;
1416	case CURLUPART_PORT:
1417	ptr = u->port;
1418	ifmissing = CURLUE_NO_PORT;
1419	urldecode = FALSE; /* never for port */
1420	if(!ptr && (flags & CURLU_DEFAULT_PORT) && u->scheme) {
1421	/* there's no stored port number, but asked to deliver
1422	a default one for the scheme */
1423	const struct Curl_handler *h =
1424	Curl_builtin_scheme(u->scheme, CURL_ZERO_TERMINATED);
1425	if(h) {
1426	msnprintf(portbuf, sizeof(portbuf), "%u", h->defport);
1427	ptr = portbuf;
1428	}
1429	}
1430	else if(ptr && u->scheme) {
1431	/* there is a stored port number, but ask to inhibit if
1432	it matches the default one for the scheme */
1433	const struct Curl_handler *h =
1434	Curl_builtin_scheme(u->scheme, CURL_ZERO_TERMINATED);
1435	if(h && (h->defport == u->portnum) &&
1436	(flags & CURLU_NO_DEFAULT_PORT))
1437	ptr = NULL;
1438	}
1439	break;
1440	case CURLUPART_PATH:
1441	ptr = u->path;
1442	if(!ptr) {
1443	ptr = u->path = strdup("/");
1444	if(!u->path)
1445	return CURLUE_OUT_OF_MEMORY;
1446	}
1447	break;
1448	case CURLUPART_QUERY:
1449	ptr = u->query;
1450	ifmissing = CURLUE_NO_QUERY;
1451	plusdecode = urldecode;
1452	break;
1453	case CURLUPART_FRAGMENT:
1454	ptr = u->fragment;
1455	ifmissing = CURLUE_NO_FRAGMENT;
1456	break;
1457	case CURLUPART_URL: {
1458	char *url;
1459	char *scheme;
1460	char *options = u->options;
1461	char *port = u->port;
1462	char *allochost = NULL;
1463	if(u->scheme && strcasecompare("file", u->scheme)) {
1464	url = aprintf("file://%s%s%s",
1465	u->path,
1466	u->fragment? "#": "",
1467	u->fragment? u->fragment : "");
1468	}
1469	else if(!u->host)
1470	return CURLUE_NO_HOST;
1471	else {
1472	const struct Curl_handler *h = NULL;
1473	if(u->scheme)
1474	scheme = u->scheme;
1475	else if(flags & CURLU_DEFAULT_SCHEME)
1476	scheme = (char *) DEFAULT_SCHEME;
1477	else
1478	return CURLUE_NO_SCHEME;
1479
1480	h = Curl_builtin_scheme(scheme, CURL_ZERO_TERMINATED);
1481	if(!port && (flags & CURLU_DEFAULT_PORT)) {
1482	/* there's no stored port number, but asked to deliver
1483	a default one for the scheme */
1484	if(h) {
1485	msnprintf(portbuf, sizeof(portbuf), "%u", h->defport);
1486	port = portbuf;
1487	}
1488	}
1489	else if(port) {
1490	/* there is a stored port number, but asked to inhibit if it matches
1491	the default one for the scheme */
1492	if(h && (h->defport == u->portnum) &&
1493	(flags & CURLU_NO_DEFAULT_PORT))
1494	port = NULL;
1495	}
1496
1497	if(h && !(h->flags & PROTOPT_URLOPTIONS))
1498	options = NULL;
1499
1500	if(u->host[0] == '[') {
1501	if(u->zoneid) {
1502	/* make it '[ host %25 zoneid ]' */
1503	struct dynbuf enc;
1504	size_t hostlen = strlen(u->host);
1505	Curl_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
1506	if(Curl_dyn_addf(&enc, "%.*s%%25%s]", (int)hostlen - 1, u->host,
1507	u->zoneid))
1508	return CURLUE_OUT_OF_MEMORY;
1509	allochost = Curl_dyn_ptr(&enc);
1510	}
1511	}
1512	else if(urlencode) {
1513	allochost = curl_easy_escape(NULL, u->host, 0);
1514	if(!allochost)
1515	return CURLUE_OUT_OF_MEMORY;
1516	}
1517	else {
1518	/* only encode '%' in output host name */
1519	char *host = u->host;
1520	bool percent = FALSE;
1521	/* first, count number of percents present in the name */
1522	while(*host) {
1523	if(*host == '%') {
1524	percent = TRUE;
1525	break;
1526	}
1527	host++;
1528	}
1529	/* if there were percent(s), encode the host name */
1530	if(percent) {
1531	struct dynbuf enc;
1532	CURLcode result;
1533	Curl_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
1534	host = u->host;
1535	while(*host) {
1536	if(*host == '%')
1537	result = Curl_dyn_addn(&enc, "%25", 3);
1538	else
1539	result = Curl_dyn_addn(&enc, host, 1);
1540	if(result)
1541	return CURLUE_OUT_OF_MEMORY;
1542	host++;
1543	}
1544	free(u->host);
1545	u->host = Curl_dyn_ptr(&enc);
1546	}
1547	}
1548
1549	url = aprintf("%s://%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
1550	scheme,
1551	u->user ? u->user : "",
1552	u->password ? ":": "",
1553	u->password ? u->password : "",
1554	options ? ";" : "",
1555	options ? options : "",
1556	(u->user \|\| u->password \|\| options) ? "@": "",
1557	allochost ? allochost : u->host,
1558	port ? ":": "",
1559	port ? port : "",
1560	(u->path && (u->path[0] != '/')) ? "/": "",
1561	u->path ? u->path : "/",
1562	(u->query && u->query[0]) ? "?": "",
1563	(u->query && u->query[0]) ? u->query : "",
1564	u->fragment? "#": "",
1565	u->fragment? u->fragment : "");
1566	free(allochost);
1567	}
1568	if(!url)
1569	return CURLUE_OUT_OF_MEMORY;
1570	*part = url;
1571	return CURLUE_OK;
1572	}
1573	default:
1574	ptr = NULL;
1575	break;
1576	}
1577	if(ptr) {
1578	size_t partlen = strlen(ptr);
1579	size_t i = 0;
1580	*part = Curl_memdup(ptr, partlen + 1);
1581	if(!*part)
1582	return CURLUE_OUT_OF_MEMORY;
1583	if(plusdecode) {
1584	/* convert + to space */
1585	char plus = part;
1586	for(i = 0; i < partlen; ++plus, i++) {
1587	if(*plus == '+')
1588	*plus = ' ';
1589	}
1590	}
1591	if(urldecode) {
1592	char *decoded;
1593	size_t dlen;
1594	/* this unconditional rejection of control bytes is documented
1595	API behavior */
1596	CURLcode res = Curl_urldecode(*part, 0, &decoded, &dlen, REJECT_CTRL);
1597	free(*part);
1598	if(res) {
1599	*part = NULL;
1600	return CURLUE_URLDECODE;
1601	}
1602	*part = decoded;
1603	partlen = dlen;
1604	}
1605	if(urlencode) {
1606	struct dynbuf enc;
1607	Curl_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
1608	if(urlencode_str(&enc, *part, partlen, TRUE,
1609	what == CURLUPART_QUERY))
1610	return CURLUE_OUT_OF_MEMORY;
1611	free(*part);
1612	*part = Curl_dyn_ptr(&enc);
1613	}
1614
1615	return CURLUE_OK;
1616	}
1617	else
1618	return ifmissing;
1619	}
1620
1621	CURLUcode curl_url_set(CURLU *u, CURLUPart what,
1622	const char *part, unsigned int flags)
1623	{
1624	char **storep = NULL;
1625	long port = 0;
1626	bool urlencode = (flags & CURLU_URLENCODE)? 1 : 0;
1627	bool plusencode = FALSE;
1628	bool urlskipslash = FALSE;
1629	bool appendquery = FALSE;
1630	bool equalsencode = FALSE;
1631
1632	if(!u)
1633	return CURLUE_BAD_HANDLE;
1634	if(!part) {
1635	/* setting a part to NULL clears it */
1636	switch(what) {
1637	case CURLUPART_URL:
1638	break;
1639	case CURLUPART_SCHEME:
1640	storep = &u->scheme;
1641	break;
1642	case CURLUPART_USER:
1643	storep = &u->user;
1644	break;
1645	case CURLUPART_PASSWORD:
1646	storep = &u->password;
1647	break;
1648	case CURLUPART_OPTIONS:
1649	storep = &u->options;
1650	break;
1651	case CURLUPART_HOST:
1652	storep = &u->host;
1653	break;
1654	case CURLUPART_ZONEID:
1655	storep = &u->zoneid;
1656	break;
1657	case CURLUPART_PORT:
1658	u->portnum = 0;
1659	storep = &u->port;
1660	break;
1661	case CURLUPART_PATH:
1662	storep = &u->path;
1663	break;
1664	case CURLUPART_QUERY:
1665	storep = &u->query;
1666	break;
1667	case CURLUPART_FRAGMENT:
1668	storep = &u->fragment;
1669	break;
1670	default:
1671	return CURLUE_UNKNOWN_PART;
1672	}
1673	if(storep && *storep) {
1674	Curl_safefree(*storep);
1675	}
1676	else if(!storep) {
1677	free_urlhandle(u);
1678	memset(u, 0, sizeof(struct Curl_URL));
1679	}
1680	return CURLUE_OK;
1681	}
1682
1683	switch(what) {
1684	case CURLUPART_SCHEME:
1685	if(strlen(part) > MAX_SCHEME_LEN)
1686	/* too long */
1687	return CURLUE_BAD_SCHEME;
1688	if(!(flags & CURLU_NON_SUPPORT_SCHEME) &&
1689	/* verify that it is a fine scheme */
1690	!Curl_builtin_scheme(part, CURL_ZERO_TERMINATED))
1691	return CURLUE_UNSUPPORTED_SCHEME;
1692	storep = &u->scheme;
1693	urlencode = FALSE; /* never */
1694	break;
1695	case CURLUPART_USER:
1696	storep = &u->user;
1697	break;
1698	case CURLUPART_PASSWORD:
1699	storep = &u->password;
1700	break;
1701	case CURLUPART_OPTIONS:
1702	storep = &u->options;
1703	break;
1704	case CURLUPART_HOST: {
1705	size_t len = strcspn(part, " \r\n");
1706	if(strlen(part) != len)
1707	/* hostname with bad content */
1708	return CURLUE_BAD_HOSTNAME;
1709	storep = &u->host;
1710	Curl_safefree(u->zoneid);
1711	break;
1712	}
1713	case CURLUPART_ZONEID:
1714	storep = &u->zoneid;
1715	break;
1716	case CURLUPART_PORT:
1717	{
1718	char *endp;
1719	urlencode = FALSE; /* never */
1720	port = strtol(part, &endp, 10); /* Port number must be decimal */
1721	if((port <= 0) \|\| (port > 0xffff))
1722	return CURLUE_BAD_PORT_NUMBER;
1723	if(*endp)
1724	/* weirdly provided number, not good! */
1725	return CURLUE_BAD_PORT_NUMBER;
1726	storep = &u->port;
1727	}
1728	break;
1729	case CURLUPART_PATH:
1730	urlskipslash = TRUE;
1731	storep = &u->path;
1732	break;
1733	case CURLUPART_QUERY:
1734	plusencode = urlencode;
1735	appendquery = (flags & CURLU_APPENDQUERY)?1:0;
1736	equalsencode = appendquery;
1737	storep = &u->query;
1738	break;
1739	case CURLUPART_FRAGMENT:
1740	storep = &u->fragment;
1741	break;
1742	case CURLUPART_URL: {
1743	/*
1744	* Allow a new URL to replace the existing (if any) contents.
1745	*
1746	* If the existing contents is enough for a URL, allow a relative URL to
1747	* replace it.
1748	*/
1749	CURLUcode result;
1750	char *oldurl;
1751	char *redired_url;
1752
1753	/* if the new thing is absolute or the old one is not
1754	* (we could not get an absolute url in 'oldurl'),
1755	* then replace the existing with the new. */
1756	if(Curl_is_absolute_url(part, NULL, 0,
1757	flags & (CURLU_GUESS_SCHEME\|
1758	CURLU_DEFAULT_SCHEME))
1759	\|\| curl_url_get(u, CURLUPART_URL, &oldurl, flags)) {
1760	return parseurl_and_replace(part, u, flags);
1761	}
1762
1763	/* apply the relative part to create a new URL
1764	* and replace the existing one with it. */
1765	redired_url = concat_url(oldurl, part);
1766	free(oldurl);
1767	if(!redired_url)
1768	return CURLUE_OUT_OF_MEMORY;
1769
1770	result = parseurl_and_replace(redired_url, u, flags);
1771	free(redired_url);
1772	return result;
1773	}
1774	default:
1775	return CURLUE_UNKNOWN_PART;
1776	}
1777	DEBUGASSERT(storep);
1778	{
1779	const char *newp = part;
1780	size_t nalloc = strlen(part);
1781
1782	if(nalloc > CURL_MAX_INPUT_LENGTH)
1783	/* excessive input length */
1784	return CURLUE_MALFORMED_INPUT;
1785
1786	if(urlencode) {
1787	const unsigned char *i;
1788	struct dynbuf enc;
1789
1790	Curl_dyn_init(&enc, nalloc * 3 + 1);
1791
1792	for(i = (const unsigned char )part; i; i++) {
1793	CURLcode result;
1794	if((*i == ' ') && plusencode) {
1795	result = Curl_dyn_addn(&enc, "+", 1);
1796	if(result)
1797	return CURLUE_OUT_OF_MEMORY;
1798	}
1799	else if(Curl_isunreserved(*i) \|\|
1800	((*i == '/') && urlskipslash) \|\|
1801	((*i == '=') && equalsencode)) {
1802	if((*i == '=') && equalsencode)
1803	/* only skip the first equals sign */
1804	equalsencode = FALSE;
1805	result = Curl_dyn_addn(&enc, i, 1);
1806	if(result)
1807	return CURLUE_OUT_OF_MEMORY;
1808	}
1809	else {
1810	result = Curl_dyn_addf(&enc, "%%%02x", *i);
1811	if(result)
1812	return CURLUE_OUT_OF_MEMORY;
1813	}
1814	}
1815	newp = Curl_dyn_ptr(&enc);
1816	}
1817	else {
1818	char *p;
1819	newp = strdup(part);
1820	if(!newp)
1821	return CURLUE_OUT_OF_MEMORY;
1822	p = (char *)newp;
1823	while(*p) {
1824	/* make sure percent encoded are lower case */
1825	if((*p == '%') && ISXDIGIT(p[1]) && ISXDIGIT(p[2]) &&
1826	(ISUPPER(p[1]) \|\| ISUPPER(p[2]))) {
1827	p[1] = Curl_raw_tolower(p[1]);
1828	p[2] = Curl_raw_tolower(p[2]);
1829	p += 3;
1830	}
1831	else
1832	p++;
1833	}
1834	}
1835
1836	if(appendquery) {
1837	/* Append the 'newp' string onto the old query. Add a '&' separator if
1838	none is present at the end of the existing query already */
1839
1840	size_t querylen = u->query ? strlen(u->query) : 0;
1841	bool addamperand = querylen && (u->query[querylen -1] != '&');
1842	if(querylen) {
1843	struct dynbuf enc;
1844	Curl_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
1845
1846	if(Curl_dyn_addn(&enc, u->query, querylen)) /* add original query */
1847	goto nomem;
1848
1849	if(addamperand) {
1850	if(Curl_dyn_addn(&enc, "&", 1))
1851	goto nomem;
1852	}
1853	if(Curl_dyn_add(&enc, newp))
1854	goto nomem;
1855	free((char *)newp);
1856	free(*storep);
1857	*storep = Curl_dyn_ptr(&enc);
1858	return CURLUE_OK;
1859	nomem:
1860	free((char *)newp);
1861	return CURLUE_OUT_OF_MEMORY;
1862	}
1863	}
1864
1865	if(what == CURLUPART_HOST) {
1866	size_t n = strlen(newp);
1867	if(!n && (flags & CURLU_NO_AUTHORITY)) {
1868	/* Skip hostname check, it's allowed to be empty. */
1869	}
1870	else {
1871	if(hostname_check(u, (char *)newp, n)) {
1872	free((char *)newp);
1873	return CURLUE_BAD_HOSTNAME;
1874	}
1875	}
1876	}
1877
1878	free(*storep);
1879	storep = (char )newp;
1880	}
1881	/* set after the string, to make it not assigned if the allocation above
1882	fails */
1883	if(port)
1884	u->portnum = port;
1885	return CURLUE_OK;
1886	}

Note: See TracBrowser for help on using the repository browser.

source: vbox/trunk/src/libs/curl-7.87.0/lib/urlapi.c@ 98326

Download in other formats: