urlapi.c@ 99874

Last change on this file since 99874 was 99344, checked in by vboxsync, 2 years ago
curl-8.0.1: Applied and adjusted our curl changes to 7.87.0 bugref:10417
Property svn:eol-style set to `native`
File size: 51.0 KB

Line
1	/***************************************************************************
2	* _ _ ____ _
3	* Project ___\| \| \| \| _ \\| \|
4	* / __\| \| \| \| \|_) \| \|
5	* \| (__\| \|_\| \| _ <\| \|___
6	* \___\|\___/\|_\| \_\_____\|
7	*
8	* Copyright (C) Daniel Stenberg, <[email protected]>, et al.
9	*
10	* This software is licensed as described in the file COPYING, which
11	* you should have received as part of this distribution. The terms
12	* are also available at https://curl.se/docs/copyright.html.
13	*
14	* You may opt to use, copy, modify, merge, publish, distribute and/or sell
15	* copies of the Software, and permit persons to whom the Software is
16	* furnished to do so, under the terms of the COPYING file.
17	*
18	* This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
19	* KIND, either express or implied.
20	*
21	* SPDX-License-Identifier: curl
22	*
23	***************************************************************************/
24
25	#include "curl_setup.h"
26
27	#include "urldata.h"
28	#include "urlapi-int.h"
29	#include "strcase.h"
30	#include "url.h"
31	#include "escape.h"
32	#include "curl_ctype.h"
33	#include "inet_pton.h"
34	#include "inet_ntop.h"
35	#include "strdup.h"
36	#include "idn.h"
37
38	/* The last 3 #include files should be in this order */
39	#include "curl_printf.h"
40	#include "curl_memory.h"
41	#include "memdebug.h"
42
43	/* MSDOS/Windows style drive prefix, eg c: in c:foo */
44	#define STARTS_WITH_DRIVE_PREFIX(str) \
45	((('a' <= str[0] && str[0] <= 'z') \|\| \
46	('A' <= str[0] && str[0] <= 'Z')) && \
47	(str[1] == ':'))
48
49	/* MSDOS/Windows style drive prefix, optionally with
50	* a '\|' instead of ':', followed by a slash or NUL */
51	#define STARTS_WITH_URL_DRIVE_PREFIX(str) \
52	((('a' <= (str)[0] && (str)[0] <= 'z') \|\| \
53	('A' <= (str)[0] && (str)[0] <= 'Z')) && \
54	((str)[1] == ':' \|\| (str)[1] == '\|') && \
55	((str)[2] == '/' \|\| (str)[2] == '\\' \|\| (str)[2] == 0))
56
57	/* scheme is not URL encoded, the longest libcurl supported ones are... */
58	#define MAX_SCHEME_LEN 40
59
60	/*
61	* If ENABLE_IPV6 is disabled, we still want to parse IPv6 addresses, so make
62	* sure we have _some_ value for AF_INET6 without polluting our fake value
63	* everywhere.
64	*/
65	#if !defined(ENABLE_IPV6) && !defined(AF_INET6)
66	#define AF_INET6 (AF_INET + 1)
67	#endif
68
69	/* Internal representation of CURLU. Point to URL-encoded strings. */
70	struct Curl_URL {
71	char *scheme;
72	char *user;
73	char *password;
74	char options; / IMAP only? */
75	char *host;
76	char zoneid; / for numerical IPv6 addresses */
77	char *port;
78	char *path;
79	char *query;
80	char *fragment;
81	long portnum; /* the numerical version */
82	};
83
84	#define DEFAULT_SCHEME "https"
85
86	static void free_urlhandle(struct Curl_URL *u)
87	{
88	free(u->scheme);
89	free(u->user);
90	free(u->password);
91	free(u->options);
92	free(u->host);
93	free(u->zoneid);
94	free(u->port);
95	free(u->path);
96	free(u->query);
97	free(u->fragment);
98	}
99
100	/*
101	* Find the separator at the end of the host name, or the '?' in cases like
102	* http://www.url.com?id=2380
103	*/
104	static const char find_host_sep(const char url)
105	{
106	const char *sep;
107	const char *query;
108
109	/* Find the start of the hostname */
110	sep = strstr(url, "//");
111	if(!sep)
112	sep = url;
113	else
114	sep += 2;
115
116	query = strchr(sep, '?');
117	sep = strchr(sep, '/');
118
119	if(!sep)
120	sep = url + strlen(url);
121
122	if(!query)
123	query = url + strlen(url);
124
125	return sep < query ? sep : query;
126	}
127
128	/*
129	* Decide whether a character in a URL must be escaped.
130	*/
131	#define urlchar_needs_escaping(c) (!(ISCNTRL(c) \|\| ISSPACE(c) \|\| ISGRAPH(c)))
132
133	static const char hexdigits[] = "0123456789abcdef";
134	/* urlencode_str() writes data into an output dynbuf and URL-encodes the
135	* spaces in the source URL accordingly.
136	*
137	* URL encoding should be skipped for host names, otherwise IDN resolution
138	* will fail.
139	*/
140	static CURLUcode urlencode_str(struct dynbuf o, const char url,
141	size_t len, bool relative,
142	bool query)
143	{
144	/* we must add this with whitespace-replacing */
145	bool left = !query;
146	const unsigned char *iptr;
147	const unsigned char host_sep = (const unsigned char ) url;
148
149	if(!relative)
150	host_sep = (const unsigned char *) find_host_sep(url);
151
152	for(iptr = (unsigned char )url; / read from here */
153	len; iptr++, len--) {
154
155	if(iptr < host_sep) {
156	if(Curl_dyn_addn(o, iptr, 1))
157	return CURLUE_OUT_OF_MEMORY;
158	continue;
159	}
160
161	if(*iptr == ' ') {
162	if(left) {
163	if(Curl_dyn_addn(o, "%20", 3))
164	return CURLUE_OUT_OF_MEMORY;
165	}
166	else {
167	if(Curl_dyn_addn(o, "+", 1))
168	return CURLUE_OUT_OF_MEMORY;
169	}
170	continue;
171	}
172
173	if(*iptr == '?')
174	left = FALSE;
175
176	if(urlchar_needs_escaping(*iptr)) {
177	char out[3]={'%'};
178	out[1] = hexdigits[*iptr>>4];
179	out[2] = hexdigits[*iptr & 0xf];
180	if(Curl_dyn_addn(o, out, 3))
181	return CURLUE_OUT_OF_MEMORY;
182	}
183	else {
184	if(Curl_dyn_addn(o, iptr, 1))
185	return CURLUE_OUT_OF_MEMORY;
186	}
187	}
188
189	return CURLUE_OK;
190	}
191
192	/*
193	* Returns the length of the scheme if the given URL is absolute (as opposed
194	* to relative). Stores the scheme in the buffer if TRUE and 'buf' is
195	* non-NULL. The buflen must be larger than MAX_SCHEME_LEN if buf is set.
196	*
197	* If 'guess_scheme' is TRUE, it means the URL might be provided without
198	* scheme.
199	*/
200	size_t Curl_is_absolute_url(const char url, char buf, size_t buflen,
201	bool guess_scheme)
202	{
203	int i;
204	DEBUGASSERT(!buf \|\| (buflen > MAX_SCHEME_LEN));
205	(void)buflen; /* only used in debug-builds */
206	if(buf)
207	buf[0] = 0; /* always leave a defined value in buf */
208	#ifdef WIN32
209	if(guess_scheme && STARTS_WITH_DRIVE_PREFIX(url))
210	return 0;
211	#endif
212	for(i = 0; i < MAX_SCHEME_LEN; ++i) {
213	char s = url[i];
214	if(s && (ISALNUM(s) \|\| (s == '+') \|\| (s == '-') \|\| (s == '.') )) {
215	/* RFC 3986 3.1 explains:
216	scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
217	*/
218	}
219	else {
220	break;
221	}
222	}
223	if(i && (url[i] == ':') && ((url[i + 1] == '/') \|\| !guess_scheme)) {
224	/* If this does not guess scheme, the scheme always ends with the colon so
225	that this also detects data: URLs etc. In guessing mode, data: could
226	be the host name "data" with a specified port number. */
227
228	/* the length of the scheme is the name part only */
229	size_t len = i;
230	if(buf) {
231	buf[i] = 0;
232	while(i--) {
233	buf[i] = Curl_raw_tolower(url[i]);
234	}
235	}
236	return len;
237	}
238	return 0;
239	}
240
241	/*
242	* Concatenate a relative URL to a base URL making it absolute.
243	* URL-encodes any spaces.
244	* The returned pointer must be freed by the caller unless NULL
245	* (returns NULL on out of memory).
246	*
247	* Note that this function destroys the 'base' string.
248	*/
249	static char concat_url(char base, const char *relurl)
250	{
251	/***
252	TRY to append this new path to the old URL
253	to the right of the host part. Oh crap, this is doomed to cause
254	problems in the future...
255	*/
256	struct dynbuf newest;
257	char *protsep;
258	char *pathsep;
259	bool host_changed = FALSE;
260	const char *useurl = relurl;
261
262	/* protsep points to the start of the host name */
263	protsep = strstr(base, "//");
264	if(!protsep)
265	protsep = base;
266	else
267	protsep += 2; /* pass the slashes */
268
269	if('/' != relurl[0]) {
270	int level = 0;
271
272	/* First we need to find out if there's a ?-letter in the URL,
273	and cut it and the right-side of that off */
274	pathsep = strchr(protsep, '?');
275	if(pathsep)
276	*pathsep = 0;
277
278	/* we have a relative path to append to the last slash if there's one
279	available, or if the new URL is just a query string (starts with a
280	'?') we append the new one at the end of the entire currently worked
281	out URL */
282	if(useurl[0] != '?') {
283	pathsep = strrchr(protsep, '/');
284	if(pathsep)
285	*pathsep = 0;
286	}
287
288	/* Check if there's any slash after the host name, and if so, remember
289	that position instead */
290	pathsep = strchr(protsep, '/');
291	if(pathsep)
292	protsep = pathsep + 1;
293	else
294	protsep = NULL;
295
296	/* now deal with one "./" or any amount of "../" in the newurl
297	and act accordingly */
298
299	if((useurl[0] == '.') && (useurl[1] == '/'))
300	useurl += 2; /* just skip the "./" */
301
302	while((useurl[0] == '.') &&
303	(useurl[1] == '.') &&
304	(useurl[2] == '/')) {
305	level++;
306	useurl += 3; /* pass the "../" */
307	}
308
309	if(protsep) {
310	while(level--) {
311	/* cut off one more level from the right of the original URL */
312	pathsep = strrchr(protsep, '/');
313	if(pathsep)
314	*pathsep = 0;
315	else {
316	*protsep = 0;
317	break;
318	}
319	}
320	}
321	}
322	else {
323	/* We got a new absolute path for this server */
324
325	if(relurl[1] == '/') {
326	/* the new URL starts with //, just keep the protocol part from the
327	original one */
328	*protsep = 0;
329	useurl = &relurl[2]; /* we keep the slashes from the original, so we
330	skip the new ones */
331	host_changed = TRUE;
332	}
333	else {
334	/* cut off the original URL from the first slash, or deal with URLs
335	without slash */
336	pathsep = strchr(protsep, '/');
337	if(pathsep) {
338	/* When people use badly formatted URLs, such as
339	"http://www.url.com?dir=/home/daniel" we must not use the first
340	slash, if there's a ?-letter before it! */
341	char *sep = strchr(protsep, '?');
342	if(sep && (sep < pathsep))
343	pathsep = sep;
344	*pathsep = 0;
345	}
346	else {
347	/* There was no slash. Now, since we might be operating on a badly
348	formatted URL, such as "http://www.url.com?id=2380" which doesn't
349	use a slash separator as it is supposed to, we need to check for a
350	?-letter as well! */
351	pathsep = strchr(protsep, '?');
352	if(pathsep)
353	*pathsep = 0;
354	}
355	}
356	}
357
358	Curl_dyn_init(&newest, CURL_MAX_INPUT_LENGTH);
359
360	/* copy over the root url part */
361	if(Curl_dyn_add(&newest, base))
362	return NULL;
363
364	/* check if we need to append a slash */
365	if(('/' == useurl[0]) \|\| (protsep && !*protsep) \|\| ('?' == useurl[0]))
366	;
367	else {
368	if(Curl_dyn_addn(&newest, "/", 1))
369	return NULL;
370	}
371
372	/* then append the new piece on the right side */
373	urlencode_str(&newest, useurl, strlen(useurl), !host_changed, FALSE);
374
375	return Curl_dyn_ptr(&newest);
376	}
377
378	/* scan for byte values < 31 or 127 */
379	static bool junkscan(const char *part, unsigned int flags)
380	{
381	if(part) {
382	static const char badbytes[]={
383	/* */ 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
384	0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
385	0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
386	0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
387	0x7f, 0x00 /* null-terminate */
388	};
389	size_t n = strlen(part);
390	size_t nfine = strcspn(part, badbytes);
391	if(nfine != n)
392	/* since we don't know which part is scanned, return a generic error
393	code */
394	return TRUE;
395	if(!(flags & CURLU_ALLOW_SPACE) && strchr(part, ' '))
396	return TRUE;
397	}
398	return FALSE;
399	}
400
401	/*
402	* parse_hostname_login()
403	*
404	* Parse the login details (user name, password and options) from the URL and
405	* strip them out of the host name
406	*
407	*/
408	static CURLUcode parse_hostname_login(struct Curl_URL *u,
409	struct dynbuf *host,
410	unsigned int flags)
411	{
412	CURLUcode result = CURLUE_OK;
413	CURLcode ccode;
414	char *userp = NULL;
415	char *passwdp = NULL;
416	char *optionsp = NULL;
417	const struct Curl_handler *h = NULL;
418
419	/* At this point, we assume all the other special cases have been taken
420	* care of, so the host is at most
421	*
422	* [user[:password][;options]]@]hostname
423	*
424	* We need somewhere to put the embedded details, so do that first.
425	*/
426
427	char *login = Curl_dyn_ptr(host);
428	char *ptr;
429
430	DEBUGASSERT(login);
431
432	ptr = strchr(login, '@');
433	if(!ptr)
434	goto out;
435
436	/* We will now try to extract the
437	* possible login information in a string like:
438	* ftp://user:[email protected]:8021/README */
439	ptr++;
440
441	/* if this is a known scheme, get some details */
442	if(u->scheme)
443	h = Curl_builtin_scheme(u->scheme, CURL_ZERO_TERMINATED);
444
445	/* We could use the login information in the URL so extract it. Only parse
446	options if the handler says we should. Note that 'h' might be NULL! */
447	ccode = Curl_parse_login_details(login, ptr - login - 1,
448	&userp, &passwdp,
449	(h && (h->flags & PROTOPT_URLOPTIONS)) ?
450	&optionsp:NULL);
451	if(ccode) {
452	result = CURLUE_BAD_LOGIN;
453	goto out;
454	}
455
456	if(userp) {
457	if(flags & CURLU_DISALLOW_USER) {
458	/* Option DISALLOW_USER is set and url contains username. */
459	result = CURLUE_USER_NOT_ALLOWED;
460	goto out;
461	}
462	if(junkscan(userp, flags)) {
463	result = CURLUE_BAD_USER;
464	goto out;
465	}
466	u->user = userp;
467	}
468
469	if(passwdp) {
470	if(junkscan(passwdp, flags)) {
471	result = CURLUE_BAD_PASSWORD;
472	goto out;
473	}
474	u->password = passwdp;
475	}
476
477	if(optionsp) {
478	if(junkscan(optionsp, flags)) {
479	result = CURLUE_BAD_LOGIN;
480	goto out;
481	}
482	u->options = optionsp;
483	}
484
485	/* move the name to the start of the host buffer */
486	if(Curl_dyn_tail(host, strlen(ptr)))
487	return CURLUE_OUT_OF_MEMORY;
488
489	return CURLUE_OK;
490	out:
491
492	free(userp);
493	free(passwdp);
494	free(optionsp);
495	u->user = NULL;
496	u->password = NULL;
497	u->options = NULL;
498
499	return result;
500	}
501
502	UNITTEST CURLUcode Curl_parse_port(struct Curl_URL u, struct dynbuf host,
503	bool has_scheme)
504	{
505	char *portptr;
506	char *hostname = Curl_dyn_ptr(host);
507	/*
508	* Find the end of an IPv6 address, either on the ']' ending bracket or
509	* a percent-encoded zone index.
510	*/
511	if(hostname[0] == '[') {
512	portptr = strchr(hostname, ']');
513	if(!portptr)
514	return CURLUE_BAD_IPV6;
515	portptr++;
516	/* this is a RFC2732-style specified IP-address */
517	if(*portptr) {
518	if(*portptr != ':')
519	return CURLUE_BAD_PORT_NUMBER;
520	}
521	else
522	portptr = NULL;
523	}
524	else
525	portptr = strchr(hostname, ':');
526
527	if(portptr) {
528	char *rest;
529	long port;
530	char portbuf[7];
531	size_t keep = portptr - hostname;
532
533	/* Browser behavior adaptation. If there's a colon with no digits after,
534	just cut off the name there which makes us ignore the colon and just
535	use the default port. Firefox, Chrome and Safari all do that.
536
537	Don't do it if the URL has no scheme, to make something that looks like
538	a scheme not work!
539	*/
540	Curl_dyn_setlen(host, keep);
541	portptr++;
542	if(!*portptr)
543	return has_scheme ? CURLUE_OK : CURLUE_BAD_PORT_NUMBER;
544
545	if(!ISDIGIT(*portptr))
546	return CURLUE_BAD_PORT_NUMBER;
547
548	port = strtol(portptr, &rest, 10); /* Port number must be decimal */
549
550	if(port > 0xffff)
551	return CURLUE_BAD_PORT_NUMBER;
552
553	if(rest[0])
554	return CURLUE_BAD_PORT_NUMBER;
555
556	*rest = 0;
557	/* generate a new port number string to get rid of leading zeroes etc */
558	msnprintf(portbuf, sizeof(portbuf), "%ld", port);
559	u->portnum = port;
560	u->port = strdup(portbuf);
561	if(!u->port)
562	return CURLUE_OUT_OF_MEMORY;
563	}
564
565	return CURLUE_OK;
566	}
567
568	static CURLUcode hostname_check(struct Curl_URL u, char hostname,
569	size_t hlen) /* length of hostname */
570	{
571	size_t len;
572	DEBUGASSERT(hostname);
573
574	if(!hostname[0])
575	return CURLUE_NO_HOST;
576	else if(hostname[0] == '[') {
577	const char *l = "0123456789abcdefABCDEF:.";
578	if(hlen < 4) /* '[::]' is the shortest possible valid string */
579	return CURLUE_BAD_IPV6;
580	hostname++;
581	hlen -= 2;
582
583	/* only valid IPv6 letters are ok */
584	len = strspn(hostname, l);
585
586	if(hlen != len) {
587	hlen = len;
588	if(hostname[len] == '%') {
589	/* this could now be '%[zone id]' */
590	char zoneid[16];
591	int i = 0;
592	char *h = &hostname[len + 1];
593	/* pass '25' if present and is a url encoded percent sign */
594	if(!strncmp(h, "25", 2) && h[2] && (h[2] != ']'))
595	h += 2;
596	while(h && (h != ']') && (i < 15))
597	zoneid[i++] = *h++;
598	if(!i \|\| (']' != *h))
599	return CURLUE_BAD_IPV6;
600	zoneid[i] = 0;
601	u->zoneid = strdup(zoneid);
602	if(!u->zoneid)
603	return CURLUE_OUT_OF_MEMORY;
604	hostname[len] = ']'; /* insert end bracket */
605	hostname[len + 1] = 0; /* terminate the hostname */
606	}
607	else
608	return CURLUE_BAD_IPV6;
609	/* hostname is fine */
610	}
611
612	/* Check the IPv6 address. */
613	{
614	char dest[16]; /* fits a binary IPv6 address */
615	char norm[MAX_IPADR_LEN];
616	hostname[hlen] = 0; /* end the address there */
617	if(1 != Curl_inet_pton(AF_INET6, hostname, dest))
618	return CURLUE_BAD_IPV6;
619
620	/* check if it can be done shorter */
621	if(Curl_inet_ntop(AF_INET6, dest, norm, sizeof(norm)) &&
622	(strlen(norm) < hlen)) {
623	strcpy(hostname, norm);
624	hlen = strlen(norm);
625	hostname[hlen + 1] = 0;
626	}
627	hostname[hlen] = ']'; /* restore ending bracket */
628	}
629	}
630	else {
631	/* letters from the second string are not ok */
632	len = strcspn(hostname, " \r\n\t/:#?!@{}[]\\$\'\"^`*<>=;,+&()%");
633	if(hlen != len)
634	/* hostname with bad content */
635	return CURLUE_BAD_HOSTNAME;
636	}
637	return CURLUE_OK;
638	}
639
640	#define HOSTNAME_END(x) (((x) == '/') \|\| ((x) == '?') \|\| ((x) == '#'))
641
642	/*
643	* Handle partial IPv4 numerical addresses and different bases, like
644	* '16843009', '0x7f', '0x7f.1' '0177.1.1.1' etc.
645	*
646	* If the given input string is syntactically wrong or any part for example is
647	* too big, this function returns FALSE and doesn't create any output.
648	*
649	* Output the "normalized" version of that input string in plain quad decimal
650	* integers and return TRUE.
651	*/
652	static bool ipv4_normalize(const char hostname, char outp, size_t olen)
653	{
654	bool done = FALSE;
655	int n = 0;
656	const char *c = hostname;
657	unsigned long parts[4] = {0, 0, 0, 0};
658
659	while(!done) {
660	char *endp;
661	unsigned long l;
662	if((c < '0') \|\| (c > '9'))
663	/* most importantly this doesn't allow a leading plus or minus */
664	return FALSE;
665	l = strtoul(c, &endp, 0);
666
667	/* overflow or nothing parsed at all */
668	if(((l == ULONG_MAX) && (errno == ERANGE)) \|\| (endp == c))
669	return FALSE;
670
671	#if SIZEOF_LONG > 4
672	/* a value larger than 32 bits */
673	if(l > UINT_MAX)
674	return FALSE;
675	#endif
676
677	parts[n] = l;
678	c = endp;
679
680	switch (*c) {
681	case '.' :
682	if(n == 3)
683	return FALSE;
684	n++;
685	c++;
686	break;
687
688	case '\0':
689	done = TRUE;
690	break;
691
692	default:
693	return FALSE;
694	}
695	}
696
697	/* this is deemed a valid IPv4 numerical address */
698
699	switch(n) {
700	case 0: /* a -- 32 bits */
701	msnprintf(outp, olen, "%u.%u.%u.%u",
702	parts[0] >> 24, (parts[0] >> 16) & 0xff,
703	(parts[0] >> 8) & 0xff, parts[0] & 0xff);
704	break;
705	case 1: /* a.b -- 8.24 bits */
706	if((parts[0] > 0xff) \|\| (parts[1] > 0xffffff))
707	return FALSE;
708	msnprintf(outp, olen, "%u.%u.%u.%u",
709	parts[0], (parts[1] >> 16) & 0xff,
710	(parts[1] >> 8) & 0xff, parts[1] & 0xff);
711	break;
712	case 2: /* a.b.c -- 8.8.16 bits */
713	if((parts[0] > 0xff) \|\| (parts[1] > 0xff) \|\| (parts[2] > 0xffff))
714	return FALSE;
715	msnprintf(outp, olen, "%u.%u.%u.%u",
716	parts[0], parts[1], (parts[2] >> 8) & 0xff,
717	parts[2] & 0xff);
718	break;
719	case 3: /* a.b.c.d -- 8.8.8.8 bits */
720	if((parts[0] > 0xff) \|\| (parts[1] > 0xff) \|\| (parts[2] > 0xff) \|\|
721	(parts[3] > 0xff))
722	return FALSE;
723	msnprintf(outp, olen, "%u.%u.%u.%u",
724	parts[0], parts[1], parts[2], parts[3]);
725	break;
726	}
727	return TRUE;
728	}
729
730	/* if necessary, replace the host content with a URL decoded version */
731	static CURLUcode decode_host(struct dynbuf *host)
732	{
733	char *per = NULL;
734	const char *hostname = Curl_dyn_ptr(host);
735	if(hostname[0] == '[')
736	/* only decode if not an ipv6 numerical */
737	return CURLUE_OK;
738	per = strchr(hostname, '%');
739	if(!per)
740	/* nothing to decode */
741	return CURLUE_OK;
742	else {
743	/* encoded */
744	size_t dlen;
745	char *decoded;
746	CURLcode result = Curl_urldecode(hostname, 0, &decoded, &dlen,
747	REJECT_CTRL);
748	if(result)
749	return CURLUE_BAD_HOSTNAME;
750	Curl_dyn_reset(host);
751	result = Curl_dyn_addn(host, decoded, dlen);
752	free(decoded);
753	if(result)
754	return CURLUE_OUT_OF_MEMORY;
755	}
756
757	return CURLUE_OK;
758	}
759
760	/*
761	* "Remove Dot Segments"
762	* https://datatracker.ietf.org/doc/html/rfc3986#section-5.2.4
763	*/
764
765	/*
766	* dedotdotify()
767	* @unittest: 1395
768	*
769	* This function gets a null-terminated path with dot and dotdot sequences
770	* passed in and strips them off according to the rules in RFC 3986 section
771	* 5.2.4.
772	*
773	* The function handles a query part ('?' + stuff) appended but it expects
774	* that fragments ('#' + stuff) have already been cut off.
775	*
776	* RETURNS
777	*
778	* Zero for success and 'out' set to an allocated dedotdotified string.
779	*/
780	UNITTEST int dedotdotify(const char input, size_t clen, char *outp);
781	UNITTEST int dedotdotify(const char input, size_t clen, char *outp)
782	{
783	char *outptr;
784	const char *orginput = input;
785	char *queryp;
786	char *out;
787
788	*outp = NULL;
789	/* the path always starts with a slash, and a slash has not dot */
790	if((clen < 2) \|\| !memchr(input, '.', clen))
791	return 0;
792
793	out = malloc(clen + 1);
794	if(!out)
795	return 1; /* out of memory */
796
797	out = 0; / null-terminates, for inputs like "./" */
798	outptr = out;
799
800	/*
801	* To handle query-parts properly, we must find it and remove it during the
802	* dotdot-operation and then append it again at the end to the output
803	* string.
804	*/
805	queryp = strchr(input, '?');
806
807	do {
808	bool dotdot = TRUE;
809	if(*input == '.') {
810	/* A. If the input buffer begins with a prefix of "../" or "./", then
811	remove that prefix from the input buffer; otherwise, */
812
813	if(!strncmp("./", input, 2)) {
814	input += 2;
815	clen -= 2;
816	}
817	else if(!strncmp("../", input, 3)) {
818	input += 3;
819	clen -= 3;
820	}
821	/* D. if the input buffer consists only of "." or "..", then remove
822	that from the input buffer; otherwise, */
823
824	else if(!strcmp(".", input) \|\| !strcmp("..", input) \|\|
825	!strncmp(".?", input, 2) \|\| !strncmp("..?", input, 3)) {
826	*out = 0;
827	break;
828	}
829	else
830	dotdot = FALSE;
831	}
832	else if(*input == '/') {
833	/* B. if the input buffer begins with a prefix of "/./" or "/.", where
834	"." is a complete path segment, then replace that prefix with "/" in
835	the input buffer; otherwise, */
836	if(!strncmp("/./", input, 3)) {
837	input += 2;
838	clen -= 2;
839	}
840	else if(!strcmp("/.", input) \|\| !strncmp("/.?", input, 3)) {
841	*outptr++ = '/';
842	*outptr = 0;
843	break;
844	}
845
846	/* C. if the input buffer begins with a prefix of "/../" or "/..",
847	where ".." is a complete path segment, then replace that prefix with
848	"/" in the input buffer and remove the last segment and its
849	preceding "/" (if any) from the output buffer; otherwise, */
850
851	else if(!strncmp("/../", input, 4)) {
852	input += 3;
853	clen -= 3;
854	/* remove the last segment from the output buffer */
855	while(outptr > out) {
856	outptr--;
857	if(*outptr == '/')
858	break;
859	}
860	outptr = 0; / null-terminate where it stops */
861	}
862	else if(!strcmp("/..", input) \|\| !strncmp("/..?", input, 4)) {
863	/* remove the last segment from the output buffer */
864	while(outptr > out) {
865	outptr--;
866	if(*outptr == '/')
867	break;
868	}
869	*outptr++ = '/';
870	outptr = 0; / null-terminate where it stops */
871	break;
872	}
873	else
874	dotdot = FALSE;
875	}
876	else
877	dotdot = FALSE;
878
879	if(!dotdot) {
880	/* E. move the first path segment in the input buffer to the end of
881	the output buffer, including the initial "/" character (if any) and
882	any subsequent characters up to, but not including, the next "/"
883	character or the end of the input buffer. */
884
885	do {
886	outptr++ = input++;
887	clen--;
888	} while(input && (input != '/') && (*input != '?'));
889	*outptr = 0;
890	}
891
892	/* continue until end of input string OR, if there is a terminating
893	query part, stop there */
894	} while(*input && (!queryp \|\| (input < queryp)));
895
896	if(queryp) {
897	size_t qlen;
898	/* There was a query part, append that to the output. */
899	size_t oindex = queryp - orginput;
900	qlen = strlen(&orginput[oindex]);
901	memcpy(outptr, &orginput[oindex], qlen + 1); /* include zero byte */
902	}
903
904	*outp = out;
905	return 0; /* success */
906	}
907
908	static CURLUcode parseurl(const char url, CURLU u, unsigned int flags)
909	{
910	const char *path;
911	size_t pathlen;
912	bool uncpath = FALSE;
913	char *query = NULL;
914	char *fragment = NULL;
915	char schemebuf[MAX_SCHEME_LEN + 1];
916	const char *schemep = NULL;
917	size_t schemelen = 0;
918	size_t urllen;
919	CURLUcode result = CURLUE_OK;
920	size_t fraglen = 0;
921	struct dynbuf host;
922
923	DEBUGASSERT(url);
924
925	Curl_dyn_init(&host, CURL_MAX_INPUT_LENGTH);
926
927	/*************************************************************
928	* Parse the URL.
929	************************************************************/
930	/* allocate scratch area */
931	urllen = strlen(url);
932	if(urllen > CURL_MAX_INPUT_LENGTH) {
933	/* excessive input length */
934	result = CURLUE_MALFORMED_INPUT;
935	goto fail;
936	}
937
938	schemelen = Curl_is_absolute_url(url, schemebuf, sizeof(schemebuf),
939	flags & (CURLU_GUESS_SCHEME\|
940	CURLU_DEFAULT_SCHEME));
941
942	/* handle the file: scheme */
943	if(schemelen && !strcmp(schemebuf, "file")) {
944	if(urllen <= 6) {
945	/* file:/ is not enough to actually be a complete file: URL */
946	result = CURLUE_BAD_FILE_URL;
947	goto fail;
948	}
949
950	/* path has been allocated large enough to hold this */
951	path = (char *)&url[5];
952
953	schemep = u->scheme = strdup("file");
954	if(!u->scheme) {
955	result = CURLUE_OUT_OF_MEMORY;
956	goto fail;
957	}
958
959	/* Extra handling URLs with an authority component (i.e. that start with
960	* "file://")
961	*
962	* We allow omitted hostname (e.g. file:/<path>) -- valid according to
963	* RFC 8089, but not the (current) WHAT-WG URL spec.
964	*/
965	if(path[0] == '/' && path[1] == '/') {
966	/* swallow the two slashes */
967	const char *ptr = &path[2];
968
969	/*
970	* According to RFC 8089, a file: URL can be reliably dereferenced if:
971	*
972	* o it has no/blank hostname, or
973	*
974	* o the hostname matches "localhost" (case-insensitively), or
975	*
976	* o the hostname is a FQDN that resolves to this machine, or
977	*
978	* o it is an UNC String transformed to an URI (Windows only, RFC 8089
979	* Appendix E.3).
980	*
981	* For brevity, we only consider URLs with empty, "localhost", or
982	* "127.0.0.1" hostnames as local, otherwise as an UNC String.
983	*
984	* Additionally, there is an exception for URLs with a Windows drive
985	* letter in the authority (which was accidentally omitted from RFC 8089
986	* Appendix E, but believe me, it was meant to be there. --MK)
987	*/
988	if(ptr[0] != '/' && !STARTS_WITH_URL_DRIVE_PREFIX(ptr)) {
989	/* the URL includes a host name, it must match "localhost" or
990	"127.0.0.1" to be valid */
991	if(checkprefix("localhost/", ptr) \|\|
992	checkprefix("127.0.0.1/", ptr)) {
993	ptr += 9; /* now points to the slash after the host */
994	}
995	else {
996	#if defined(WIN32)
997	size_t len;
998
999	/* the host name, NetBIOS computer name, can not contain disallowed
1000	chars, and the delimiting slash character must be appended to the
1001	host name */
1002	path = strpbrk(ptr, "/\\:*?\"<>\|");
1003	if(!path \|\| *path != '/') {
1004	result = CURLUE_BAD_FILE_URL;
1005	goto fail;
1006	}
1007
1008	len = path - ptr;
1009	if(len) {
1010	if(Curl_dyn_addn(&host, ptr, len)) {
1011	result = CURLUE_OUT_OF_MEMORY;
1012	goto fail;
1013	}
1014	uncpath = TRUE;
1015	}
1016
1017	ptr -= 2; /* now points to the // before the host in UNC */
1018	#else
1019	/* Invalid file://hostname/, expected localhost or 127.0.0.1 or
1020	none */
1021	result = CURLUE_BAD_FILE_URL;
1022	goto fail;
1023	#endif
1024	}
1025	}
1026
1027	path = ptr;
1028	}
1029
1030	if(!uncpath)
1031	/* no host for file: URLs by default */
1032	Curl_dyn_reset(&host);
1033
1034	#if !defined(MSDOS) && !defined(WIN32) && !defined(__CYGWIN__)
1035	/* Don't allow Windows drive letters when not in Windows.
1036	* This catches both "file:/c:" and "file:c:" */
1037	if(('/' == path[0] && STARTS_WITH_URL_DRIVE_PREFIX(&path[1])) \|\|
1038	STARTS_WITH_URL_DRIVE_PREFIX(path)) {
1039	/* File drive letters are only accepted in MSDOS/Windows */
1040	result = CURLUE_BAD_FILE_URL;
1041	goto fail;
1042	}
1043	#else
1044	/* If the path starts with a slash and a drive letter, ditch the slash */
1045	if('/' == path[0] && STARTS_WITH_URL_DRIVE_PREFIX(&path[1])) {
1046	/* This cannot be done with strcpy, as the memory chunks overlap! */
1047	path++;
1048	}
1049	#endif
1050
1051	}
1052	else {
1053	/* clear path */
1054	const char *p;
1055	const char *hostp;
1056	size_t len;
1057
1058	if(schemelen) {
1059	int i = 0;
1060	p = &url[schemelen + 1];
1061	while(p && (*p == '/') && (i < 4)) {
1062	p++;
1063	i++;
1064	}
1065
1066	schemep = schemebuf;
1067	if(!Curl_builtin_scheme(schemep, CURL_ZERO_TERMINATED) &&
1068	!(flags & CURLU_NON_SUPPORT_SCHEME)) {
1069	result = CURLUE_UNSUPPORTED_SCHEME;
1070	goto fail;
1071	}
1072
1073	if((i < 1) \|\| (i>3)) {
1074	/* less than one or more than three slashes */
1075	result = CURLUE_BAD_SLASHES;
1076	goto fail;
1077	}
1078	if(junkscan(schemep, flags)) {
1079	result = CURLUE_BAD_SCHEME;
1080	goto fail;
1081	}
1082	}
1083	else {
1084	/* no scheme! */
1085
1086	if(!(flags & (CURLU_DEFAULT_SCHEME\|CURLU_GUESS_SCHEME))) {
1087	result = CURLUE_BAD_SCHEME;
1088	goto fail;
1089	}
1090	if(flags & CURLU_DEFAULT_SCHEME)
1091	schemep = DEFAULT_SCHEME;
1092
1093	/*
1094	* The URL was badly formatted, let's try without scheme specified.
1095	*/
1096	p = url;
1097	}
1098	hostp = p; /* host name starts here */
1099
1100	/* find the end of the host name + port number */
1101	while(p && !HOSTNAME_END(p))
1102	p++;
1103
1104	len = p - hostp;
1105	if(len) {
1106	if(Curl_dyn_addn(&host, hostp, len)) {
1107	result = CURLUE_OUT_OF_MEMORY;
1108	goto fail;
1109	}
1110	}
1111	else {
1112	if(!(flags & CURLU_NO_AUTHORITY)) {
1113	result = CURLUE_NO_HOST;
1114	goto fail;
1115	}
1116	}
1117
1118	path = (char *)p;
1119
1120	if(schemep) {
1121	u->scheme = strdup(schemep);
1122	if(!u->scheme) {
1123	result = CURLUE_OUT_OF_MEMORY;
1124	goto fail;
1125	}
1126	}
1127	}
1128
1129	fragment = strchr(path, '#');
1130	if(fragment) {
1131	fraglen = strlen(fragment);
1132	if(fraglen > 1) {
1133	/* skip the leading '#' in the copy but include the terminating null */
1134	u->fragment = Curl_memdup(fragment + 1, fraglen);
1135	if(!u->fragment) {
1136	result = CURLUE_OUT_OF_MEMORY;
1137	goto fail;
1138	}
1139
1140	if(junkscan(u->fragment, flags)) {
1141	result = CURLUE_BAD_FRAGMENT;
1142	goto fail;
1143	}
1144	}
1145	}
1146
1147	query = strchr(path, '?');
1148	if(query && (!fragment \|\| (query < fragment))) {
1149	size_t qlen = strlen(query) - fraglen; /* includes '?' */
1150	pathlen = strlen(path) - qlen - fraglen;
1151	if(qlen > 1) {
1152	if(flags & CURLU_URLENCODE) {
1153	struct dynbuf enc;
1154	Curl_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
1155	/* skip the leading question mark */
1156	if(urlencode_str(&enc, query + 1, qlen - 1, TRUE, TRUE)) {
1157	result = CURLUE_OUT_OF_MEMORY;
1158	goto fail;
1159	}
1160	u->query = Curl_dyn_ptr(&enc);
1161	}
1162	else {
1163	u->query = Curl_memdup(query + 1, qlen);
1164	if(!u->query) {
1165	result = CURLUE_OUT_OF_MEMORY;
1166	goto fail;
1167	}
1168	u->query[qlen - 1] = 0;
1169	}
1170
1171	if(junkscan(u->query, flags)) {
1172	result = CURLUE_BAD_QUERY;
1173	goto fail;
1174	}
1175	}
1176	else {
1177	/* single byte query */
1178	u->query = strdup("");
1179	if(!u->query) {
1180	result = CURLUE_OUT_OF_MEMORY;
1181	goto fail;
1182	}
1183	}
1184	}
1185	else
1186	pathlen = strlen(path) - fraglen;
1187
1188	if(pathlen && (flags & CURLU_URLENCODE)) {
1189	struct dynbuf enc;
1190	Curl_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
1191	if(urlencode_str(&enc, path, pathlen, TRUE, FALSE)) {
1192	result = CURLUE_OUT_OF_MEMORY;
1193	goto fail;
1194	}
1195	pathlen = Curl_dyn_len(&enc);
1196	path = u->path = Curl_dyn_ptr(&enc);
1197	}
1198
1199	if(pathlen <= 1) {
1200	/* there is no path left or just the slash, unset */
1201	path = NULL;
1202	}
1203	else {
1204	if(!u->path) {
1205	u->path = Curl_memdup(path, pathlen + 1);
1206	if(!u->path) {
1207	result = CURLUE_OUT_OF_MEMORY;
1208	goto fail;
1209	}
1210	u->path[pathlen] = 0;
1211	path = u->path;
1212	}
1213	else if(flags & CURLU_URLENCODE)
1214	/* it might have encoded more than just the path so cut it */
1215	u->path[pathlen] = 0;
1216
1217	if(junkscan(u->path, flags)) {
1218	result = CURLUE_BAD_PATH;
1219	goto fail;
1220	}
1221
1222	if(!(flags & CURLU_PATH_AS_IS)) {
1223	/* remove ../ and ./ sequences according to RFC3986 */
1224	char *dedot;
1225	int err = dedotdotify((char *)path, pathlen, &dedot);
1226	if(err) {
1227	result = CURLUE_OUT_OF_MEMORY;
1228	goto fail;
1229	}
1230	if(dedot) {
1231	free(u->path);
1232	u->path = dedot;
1233	}
1234	}
1235	}
1236
1237	if(Curl_dyn_len(&host)) {
1238	char normalized_ipv4[sizeof("255.255.255.255") + 1];
1239
1240	/*
1241	* Parse the login details and strip them out of the host name.
1242	*/
1243	result = parse_hostname_login(u, &host, flags);
1244	if(!result)
1245	result = Curl_parse_port(u, &host, schemelen);
1246	if(result)
1247	goto fail;
1248
1249	if(junkscan(Curl_dyn_ptr(&host), flags)) {
1250	result = CURLUE_BAD_HOSTNAME;
1251	goto fail;
1252	}
1253
1254	if(ipv4_normalize(Curl_dyn_ptr(&host),
1255	normalized_ipv4, sizeof(normalized_ipv4))) {
1256	Curl_dyn_reset(&host);
1257	if(Curl_dyn_add(&host, normalized_ipv4)) {
1258	result = CURLUE_OUT_OF_MEMORY;
1259	goto fail;
1260	}
1261	}
1262	else {
1263	result = decode_host(&host);
1264	if(!result)
1265	result = hostname_check(u, Curl_dyn_ptr(&host), Curl_dyn_len(&host));
1266	if(result)
1267	goto fail;
1268	}
1269
1270	if((flags & CURLU_GUESS_SCHEME) && !schemep) {
1271	const char *hostname = Curl_dyn_ptr(&host);
1272	/* legacy curl-style guess based on host name */
1273	if(checkprefix("ftp.", hostname))
1274	schemep = "ftp";
1275	else if(checkprefix("dict.", hostname))
1276	schemep = "dict";
1277	else if(checkprefix("ldap.", hostname))
1278	schemep = "ldap";
1279	else if(checkprefix("imap.", hostname))
1280	schemep = "imap";
1281	else if(checkprefix("smtp.", hostname))
1282	schemep = "smtp";
1283	else if(checkprefix("pop3.", hostname))
1284	schemep = "pop3";
1285	else
1286	schemep = "http";
1287
1288	u->scheme = strdup(schemep);
1289	if(!u->scheme) {
1290	result = CURLUE_OUT_OF_MEMORY;
1291	goto fail;
1292	}
1293	}
1294	}
1295	else if(flags & CURLU_NO_AUTHORITY) {
1296	/* allowed to be empty. */
1297	if(Curl_dyn_add(&host, "")) {
1298	result = CURLUE_OUT_OF_MEMORY;
1299	goto fail;
1300	}
1301	}
1302
1303	u->host = Curl_dyn_ptr(&host);
1304
1305	return result;
1306	fail:
1307	Curl_dyn_free(&host);
1308	free_urlhandle(u);
1309	return result;
1310	}
1311
1312	/*
1313	* Parse the URL and, if successful, replace everything in the Curl_URL struct.
1314	*/
1315	static CURLUcode parseurl_and_replace(const char url, CURLU u,
1316	unsigned int flags)
1317	{
1318	CURLUcode result;
1319	CURLU tmpurl;
1320	memset(&tmpurl, 0, sizeof(tmpurl));
1321	result = parseurl(url, &tmpurl, flags);
1322	if(!result) {
1323	free_urlhandle(u);
1324	*u = tmpurl;
1325	}
1326	return result;
1327	}
1328
1329	/*
1330	*/
1331	CURLU *curl_url(void)
1332	{
1333	return calloc(sizeof(struct Curl_URL), 1);
1334	}
1335
1336	void curl_url_cleanup(CURLU *u)
1337	{
1338	if(u) {
1339	free_urlhandle(u);
1340	free(u);
1341	}
1342	}
1343
1344	#define DUP(dest, src, name) \
1345	do { \
1346	if(src->name) { \
1347	dest->name = strdup(src->name); \
1348	if(!dest->name) \
1349	goto fail; \
1350	} \
1351	} while(0)
1352
1353	CURLU curl_url_dup(const CURLU in)
1354	{
1355	struct Curl_URL *u = calloc(sizeof(struct Curl_URL), 1);
1356	if(u) {
1357	DUP(u, in, scheme);
1358	DUP(u, in, user);
1359	DUP(u, in, password);
1360	DUP(u, in, options);
1361	DUP(u, in, host);
1362	DUP(u, in, port);
1363	DUP(u, in, path);
1364	DUP(u, in, query);
1365	DUP(u, in, fragment);
1366	u->portnum = in->portnum;
1367	}
1368	return u;
1369	fail:
1370	curl_url_cleanup(u);
1371	return NULL;
1372	}
1373
1374	CURLUcode curl_url_get(const CURLU *u, CURLUPart what,
1375	char **part, unsigned int flags)
1376	{
1377	const char *ptr;
1378	CURLUcode ifmissing = CURLUE_UNKNOWN_PART;
1379	char portbuf[7];
1380	bool urldecode = (flags & CURLU_URLDECODE)?1:0;
1381	bool urlencode = (flags & CURLU_URLENCODE)?1:0;
1382	bool punycode = FALSE;
1383	bool plusdecode = FALSE;
1384	(void)flags;
1385	if(!u)
1386	return CURLUE_BAD_HANDLE;
1387	if(!part)
1388	return CURLUE_BAD_PARTPOINTER;
1389	*part = NULL;
1390
1391	switch(what) {
1392	case CURLUPART_SCHEME:
1393	ptr = u->scheme;
1394	ifmissing = CURLUE_NO_SCHEME;
1395	urldecode = FALSE; /* never for schemes */
1396	break;
1397	case CURLUPART_USER:
1398	ptr = u->user;
1399	ifmissing = CURLUE_NO_USER;
1400	break;
1401	case CURLUPART_PASSWORD:
1402	ptr = u->password;
1403	ifmissing = CURLUE_NO_PASSWORD;
1404	break;
1405	case CURLUPART_OPTIONS:
1406	ptr = u->options;
1407	ifmissing = CURLUE_NO_OPTIONS;
1408	break;
1409	case CURLUPART_HOST:
1410	ptr = u->host;
1411	ifmissing = CURLUE_NO_HOST;
1412	punycode = (flags & CURLU_PUNYCODE)?1:0;
1413	break;
1414	case CURLUPART_ZONEID:
1415	ptr = u->zoneid;
1416	ifmissing = CURLUE_NO_ZONEID;
1417	break;
1418	case CURLUPART_PORT:
1419	ptr = u->port;
1420	ifmissing = CURLUE_NO_PORT;
1421	urldecode = FALSE; /* never for port */
1422	if(!ptr && (flags & CURLU_DEFAULT_PORT) && u->scheme) {
1423	/* there's no stored port number, but asked to deliver
1424	a default one for the scheme */
1425	const struct Curl_handler *h =
1426	Curl_builtin_scheme(u->scheme, CURL_ZERO_TERMINATED);
1427	if(h) {
1428	msnprintf(portbuf, sizeof(portbuf), "%u", h->defport);
1429	ptr = portbuf;
1430	}
1431	}
1432	else if(ptr && u->scheme) {
1433	/* there is a stored port number, but ask to inhibit if
1434	it matches the default one for the scheme */
1435	const struct Curl_handler *h =
1436	Curl_builtin_scheme(u->scheme, CURL_ZERO_TERMINATED);
1437	if(h && (h->defport == u->portnum) &&
1438	(flags & CURLU_NO_DEFAULT_PORT))
1439	ptr = NULL;
1440	}
1441	break;
1442	case CURLUPART_PATH:
1443	ptr = u->path;
1444	if(!ptr)
1445	ptr = "/";
1446	break;
1447	case CURLUPART_QUERY:
1448	ptr = u->query;
1449	ifmissing = CURLUE_NO_QUERY;
1450	plusdecode = urldecode;
1451	break;
1452	case CURLUPART_FRAGMENT:
1453	ptr = u->fragment;
1454	ifmissing = CURLUE_NO_FRAGMENT;
1455	break;
1456	case CURLUPART_URL: {
1457	char *url;
1458	char *scheme;
1459	char *options = u->options;
1460	char *port = u->port;
1461	char *allochost = NULL;
1462	punycode = (flags & CURLU_PUNYCODE)?1:0;
1463	if(u->scheme && strcasecompare("file", u->scheme)) {
1464	url = aprintf("file://%s%s%s",
1465	u->path,
1466	u->fragment? "#": "",
1467	u->fragment? u->fragment : "");
1468	}
1469	else if(!u->host)
1470	return CURLUE_NO_HOST;
1471	else {
1472	const struct Curl_handler *h = NULL;
1473	if(u->scheme)
1474	scheme = u->scheme;
1475	else if(flags & CURLU_DEFAULT_SCHEME)
1476	scheme = (char *) DEFAULT_SCHEME;
1477	else
1478	return CURLUE_NO_SCHEME;
1479
1480	h = Curl_builtin_scheme(scheme, CURL_ZERO_TERMINATED);
1481	if(!port && (flags & CURLU_DEFAULT_PORT)) {
1482	/* there's no stored port number, but asked to deliver
1483	a default one for the scheme */
1484	if(h) {
1485	msnprintf(portbuf, sizeof(portbuf), "%u", h->defport);
1486	port = portbuf;
1487	}
1488	}
1489	else if(port) {
1490	/* there is a stored port number, but asked to inhibit if it matches
1491	the default one for the scheme */
1492	if(h && (h->defport == u->portnum) &&
1493	(flags & CURLU_NO_DEFAULT_PORT))
1494	port = NULL;
1495	}
1496
1497	if(h && !(h->flags & PROTOPT_URLOPTIONS))
1498	options = NULL;
1499
1500	if(u->host[0] == '[') {
1501	if(u->zoneid) {
1502	/* make it '[ host %25 zoneid ]' */
1503	struct dynbuf enc;
1504	size_t hostlen = strlen(u->host);
1505	Curl_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
1506	if(Curl_dyn_addf(&enc, "%.*s%%25%s]", (int)hostlen - 1, u->host,
1507	u->zoneid))
1508	return CURLUE_OUT_OF_MEMORY;
1509	allochost = Curl_dyn_ptr(&enc);
1510	}
1511	}
1512	else if(urlencode) {
1513	allochost = curl_easy_escape(NULL, u->host, 0);
1514	if(!allochost)
1515	return CURLUE_OUT_OF_MEMORY;
1516	}
1517	else if(punycode) {
1518	if(!Curl_is_ASCII_name(u->host)) {
1519	#ifndef USE_IDN
1520	return CURLUE_LACKS_IDN;
1521	#else
1522	allochost = Curl_idn_decode(u->host);
1523	if(!allochost)
1524	return CURLUE_OUT_OF_MEMORY;
1525	#endif
1526	}
1527	}
1528	else {
1529	/* only encode '%' in output host name */
1530	char *host = u->host;
1531	bool percent = FALSE;
1532	/* first, count number of percents present in the name */
1533	while(*host) {
1534	if(*host == '%') {
1535	percent = TRUE;
1536	break;
1537	}
1538	host++;
1539	}
1540	/* if there were percent(s), encode the host name */
1541	if(percent) {
1542	struct dynbuf enc;
1543	CURLcode result;
1544	Curl_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
1545	host = u->host;
1546	while(*host) {
1547	if(*host == '%')
1548	result = Curl_dyn_addn(&enc, "%25", 3);
1549	else
1550	result = Curl_dyn_addn(&enc, host, 1);
1551	if(result)
1552	return CURLUE_OUT_OF_MEMORY;
1553	host++;
1554	}
1555	allochost = Curl_dyn_ptr(&enc);
1556	}
1557	}
1558
1559	url = aprintf("%s://%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
1560	scheme,
1561	u->user ? u->user : "",
1562	u->password ? ":": "",
1563	u->password ? u->password : "",
1564	options ? ";" : "",
1565	options ? options : "",
1566	(u->user \|\| u->password \|\| options) ? "@": "",
1567	allochost ? allochost : u->host,
1568	port ? ":": "",
1569	port ? port : "",
1570	(u->path && (u->path[0] != '/')) ? "/": "",
1571	u->path ? u->path : "/",
1572	(u->query && u->query[0]) ? "?": "",
1573	(u->query && u->query[0]) ? u->query : "",
1574	u->fragment? "#": "",
1575	u->fragment? u->fragment : "");
1576	free(allochost);
1577	}
1578	if(!url)
1579	return CURLUE_OUT_OF_MEMORY;
1580	*part = url;
1581	return CURLUE_OK;
1582	}
1583	default:
1584	ptr = NULL;
1585	break;
1586	}
1587	if(ptr) {
1588	size_t partlen = strlen(ptr);
1589	size_t i = 0;
1590	*part = Curl_memdup(ptr, partlen + 1);
1591	if(!*part)
1592	return CURLUE_OUT_OF_MEMORY;
1593	if(plusdecode) {
1594	/* convert + to space */
1595	char plus = part;
1596	for(i = 0; i < partlen; ++plus, i++) {
1597	if(*plus == '+')
1598	*plus = ' ';
1599	}
1600	}
1601	if(urldecode) {
1602	char *decoded;
1603	size_t dlen;
1604	/* this unconditional rejection of control bytes is documented
1605	API behavior */
1606	CURLcode res = Curl_urldecode(*part, 0, &decoded, &dlen, REJECT_CTRL);
1607	free(*part);
1608	if(res) {
1609	*part = NULL;
1610	return CURLUE_URLDECODE;
1611	}
1612	*part = decoded;
1613	partlen = dlen;
1614	}
1615	if(urlencode) {
1616	struct dynbuf enc;
1617	Curl_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
1618	if(urlencode_str(&enc, *part, partlen, TRUE,
1619	what == CURLUPART_QUERY))
1620	return CURLUE_OUT_OF_MEMORY;
1621	free(*part);
1622	*part = Curl_dyn_ptr(&enc);
1623	}
1624	else if(punycode) {
1625	if(!Curl_is_ASCII_name(u->host)) {
1626	#ifndef USE_IDN
1627	return CURLUE_LACKS_IDN;
1628	#else
1629	char allochost = Curl_idn_decode(part);
1630	if(!allochost)
1631	return CURLUE_OUT_OF_MEMORY;
1632	free(*part);
1633	*part = allochost;
1634	#endif
1635	}
1636	}
1637
1638	return CURLUE_OK;
1639	}
1640	else
1641	return ifmissing;
1642	}
1643
1644	CURLUcode curl_url_set(CURLU *u, CURLUPart what,
1645	const char *part, unsigned int flags)
1646	{
1647	char **storep = NULL;
1648	long port = 0;
1649	bool urlencode = (flags & CURLU_URLENCODE)? 1 : 0;
1650	bool plusencode = FALSE;
1651	bool urlskipslash = FALSE;
1652	bool appendquery = FALSE;
1653	bool equalsencode = FALSE;
1654
1655	if(!u)
1656	return CURLUE_BAD_HANDLE;
1657	if(!part) {
1658	/* setting a part to NULL clears it */
1659	switch(what) {
1660	case CURLUPART_URL:
1661	break;
1662	case CURLUPART_SCHEME:
1663	storep = &u->scheme;
1664	break;
1665	case CURLUPART_USER:
1666	storep = &u->user;
1667	break;
1668	case CURLUPART_PASSWORD:
1669	storep = &u->password;
1670	break;
1671	case CURLUPART_OPTIONS:
1672	storep = &u->options;
1673	break;
1674	case CURLUPART_HOST:
1675	storep = &u->host;
1676	break;
1677	case CURLUPART_ZONEID:
1678	storep = &u->zoneid;
1679	break;
1680	case CURLUPART_PORT:
1681	u->portnum = 0;
1682	storep = &u->port;
1683	break;
1684	case CURLUPART_PATH:
1685	storep = &u->path;
1686	break;
1687	case CURLUPART_QUERY:
1688	storep = &u->query;
1689	break;
1690	case CURLUPART_FRAGMENT:
1691	storep = &u->fragment;
1692	break;
1693	default:
1694	return CURLUE_UNKNOWN_PART;
1695	}
1696	if(storep && *storep) {
1697	Curl_safefree(*storep);
1698	}
1699	else if(!storep) {
1700	free_urlhandle(u);
1701	memset(u, 0, sizeof(struct Curl_URL));
1702	}
1703	return CURLUE_OK;
1704	}
1705
1706	switch(what) {
1707	case CURLUPART_SCHEME:
1708	if(strlen(part) > MAX_SCHEME_LEN)
1709	/* too long */
1710	return CURLUE_BAD_SCHEME;
1711	if(!(flags & CURLU_NON_SUPPORT_SCHEME) &&
1712	/* verify that it is a fine scheme */
1713	!Curl_builtin_scheme(part, CURL_ZERO_TERMINATED))
1714	return CURLUE_UNSUPPORTED_SCHEME;
1715	storep = &u->scheme;
1716	urlencode = FALSE; /* never */
1717	break;
1718	case CURLUPART_USER:
1719	storep = &u->user;
1720	break;
1721	case CURLUPART_PASSWORD:
1722	storep = &u->password;
1723	break;
1724	case CURLUPART_OPTIONS:
1725	storep = &u->options;
1726	break;
1727	case CURLUPART_HOST: {
1728	size_t len = strcspn(part, " \r\n");
1729	if(strlen(part) != len)
1730	/* hostname with bad content */
1731	return CURLUE_BAD_HOSTNAME;
1732	storep = &u->host;
1733	Curl_safefree(u->zoneid);
1734	break;
1735	}
1736	case CURLUPART_ZONEID:
1737	storep = &u->zoneid;
1738	break;
1739	case CURLUPART_PORT:
1740	{
1741	char *endp;
1742	urlencode = FALSE; /* never */
1743	port = strtol(part, &endp, 10); /* Port number must be decimal */
1744	if((port <= 0) \|\| (port > 0xffff))
1745	return CURLUE_BAD_PORT_NUMBER;
1746	if(*endp)
1747	/* weirdly provided number, not good! */
1748	return CURLUE_BAD_PORT_NUMBER;
1749	storep = &u->port;
1750	}
1751	break;
1752	case CURLUPART_PATH:
1753	urlskipslash = TRUE;
1754	storep = &u->path;
1755	break;
1756	case CURLUPART_QUERY:
1757	plusencode = urlencode;
1758	appendquery = (flags & CURLU_APPENDQUERY)?1:0;
1759	equalsencode = appendquery;
1760	storep = &u->query;
1761	break;
1762	case CURLUPART_FRAGMENT:
1763	storep = &u->fragment;
1764	break;
1765	case CURLUPART_URL: {
1766	/*
1767	* Allow a new URL to replace the existing (if any) contents.
1768	*
1769	* If the existing contents is enough for a URL, allow a relative URL to
1770	* replace it.
1771	*/
1772	CURLUcode result;
1773	char *oldurl;
1774	char *redired_url;
1775
1776	/* if the new thing is absolute or the old one is not
1777	* (we could not get an absolute url in 'oldurl'),
1778	* then replace the existing with the new. */
1779	if(Curl_is_absolute_url(part, NULL, 0,
1780	flags & (CURLU_GUESS_SCHEME\|
1781	CURLU_DEFAULT_SCHEME))
1782	\|\| curl_url_get(u, CURLUPART_URL, &oldurl, flags)) {
1783	return parseurl_and_replace(part, u, flags);
1784	}
1785
1786	/* apply the relative part to create a new URL
1787	* and replace the existing one with it. */
1788	redired_url = concat_url(oldurl, part);
1789	free(oldurl);
1790	if(!redired_url)
1791	return CURLUE_OUT_OF_MEMORY;
1792
1793	result = parseurl_and_replace(redired_url, u, flags);
1794	free(redired_url);
1795	return result;
1796	}
1797	default:
1798	return CURLUE_UNKNOWN_PART;
1799	}
1800	DEBUGASSERT(storep);
1801	{
1802	const char *newp = part;
1803	size_t nalloc = strlen(part);
1804
1805	if(nalloc > CURL_MAX_INPUT_LENGTH)
1806	/* excessive input length */
1807	return CURLUE_MALFORMED_INPUT;
1808
1809	if(urlencode) {
1810	const unsigned char *i;
1811	struct dynbuf enc;
1812
1813	Curl_dyn_init(&enc, nalloc * 3 + 1);
1814
1815	for(i = (const unsigned char )part; i; i++) {
1816	CURLcode result;
1817	if((*i == ' ') && plusencode) {
1818	result = Curl_dyn_addn(&enc, "+", 1);
1819	if(result)
1820	return CURLUE_OUT_OF_MEMORY;
1821	}
1822	else if(Curl_isunreserved(*i) \|\|
1823	((*i == '/') && urlskipslash) \|\|
1824	((*i == '=') && equalsencode)) {
1825	if((*i == '=') && equalsencode)
1826	/* only skip the first equals sign */
1827	equalsencode = FALSE;
1828	result = Curl_dyn_addn(&enc, i, 1);
1829	if(result)
1830	return CURLUE_OUT_OF_MEMORY;
1831	}
1832	else {
1833	char out[3]={'%'};
1834	out[1] = hexdigits[*i>>4];
1835	out[2] = hexdigits[*i & 0xf];
1836	result = Curl_dyn_addn(&enc, out, 3);
1837	if(result)
1838	return CURLUE_OUT_OF_MEMORY;
1839	}
1840	}
1841	newp = Curl_dyn_ptr(&enc);
1842	}
1843	else {
1844	char *p;
1845	newp = strdup(part);
1846	if(!newp)
1847	return CURLUE_OUT_OF_MEMORY;
1848	p = (char *)newp;
1849	while(*p) {
1850	/* make sure percent encoded are lower case */
1851	if((*p == '%') && ISXDIGIT(p[1]) && ISXDIGIT(p[2]) &&
1852	(ISUPPER(p[1]) \|\| ISUPPER(p[2]))) {
1853	p[1] = Curl_raw_tolower(p[1]);
1854	p[2] = Curl_raw_tolower(p[2]);
1855	p += 3;
1856	}
1857	else
1858	p++;
1859	}
1860	}
1861
1862	if(appendquery) {
1863	/* Append the 'newp' string onto the old query. Add a '&' separator if
1864	none is present at the end of the existing query already */
1865
1866	size_t querylen = u->query ? strlen(u->query) : 0;
1867	bool addamperand = querylen && (u->query[querylen -1] != '&');
1868	if(querylen) {
1869	struct dynbuf enc;
1870	Curl_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
1871
1872	if(Curl_dyn_addn(&enc, u->query, querylen)) /* add original query */
1873	goto nomem;
1874
1875	if(addamperand) {
1876	if(Curl_dyn_addn(&enc, "&", 1))
1877	goto nomem;
1878	}
1879	if(Curl_dyn_add(&enc, newp))
1880	goto nomem;
1881	free((char *)newp);
1882	free(*storep);
1883	*storep = Curl_dyn_ptr(&enc);
1884	return CURLUE_OK;
1885	nomem:
1886	free((char *)newp);
1887	return CURLUE_OUT_OF_MEMORY;
1888	}
1889	}
1890
1891	if(what == CURLUPART_HOST) {
1892	size_t n = strlen(newp);
1893	if(!n && (flags & CURLU_NO_AUTHORITY)) {
1894	/* Skip hostname check, it's allowed to be empty. */
1895	}
1896	else {
1897	if(hostname_check(u, (char *)newp, n)) {
1898	free((char *)newp);
1899	return CURLUE_BAD_HOSTNAME;
1900	}
1901	}
1902	}
1903
1904	free(*storep);
1905	storep = (char )newp;
1906	}
1907	/* set after the string, to make it not assigned if the allocation above
1908	fails */
1909	if(port)
1910	u->portnum = port;
1911	return CURLUE_OK;
1912	}

Note: See TracBrowser for help on using the repository browser.

source: vbox/trunk/src/libs/curl-8.0.1/lib/urlapi.c@ 99874

Download in other formats: