urlapi.c@ 94601

Last change on this file since 94601 was 85671, checked in by vboxsync, 4 years ago
Export out internal curl copy to make it a lot simpler to build VBox (OSE) on Windows. bugref:9814
Property svn:eol-style set to `native`
File size: 35.2 KB

Line
1	/***************************************************************************
2	* _ _ ____ _
3	* Project ___\| \| \| \| _ \\| \|
4	* / __\| \| \| \| \|_) \| \|
5	* \| (__\| \|_\| \| _ <\| \|___
6	* \___\|\___/\|_\| \_\_____\|
7	*
8	* Copyright (C) 1998 - 2018, Daniel Stenberg, <[email protected]>, et al.
9	*
10	* This software is licensed as described in the file COPYING, which
11	* you should have received as part of this distribution. The terms
12	* are also available at https://curl.haxx.se/docs/copyright.html.
13	*
14	* You may opt to use, copy, modify, merge, publish, distribute and/or sell
15	* copies of the Software, and permit persons to whom the Software is
16	* furnished to do so, under the terms of the COPYING file.
17	*
18	* This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
19	* KIND, either express or implied.
20	*
21	***************************************************************************/
22
23	#include "curl_setup.h"
24
25	#include "urldata.h"
26	#include "urlapi-int.h"
27	#include "strcase.h"
28	#include "dotdot.h"
29	#include "url.h"
30	#include "escape.h"
31	#include "curl_ctype.h"
32
33	/* The last 3 #include files should be in this order */
34	#include "curl_printf.h"
35	#include "curl_memory.h"
36	#include "memdebug.h"
37
38	/* MSDOS/Windows style drive prefix, eg c: in c:foo */
39	#define STARTS_WITH_DRIVE_PREFIX(str) \
40	((('a' <= str[0] && str[0] <= 'z') \|\| \
41	('A' <= str[0] && str[0] <= 'Z')) && \
42	(str[1] == ':'))
43
44	/* MSDOS/Windows style drive prefix, optionally with
45	* a '\|' instead of ':', followed by a slash or NUL */
46	#define STARTS_WITH_URL_DRIVE_PREFIX(str) \
47	((('a' <= (str)[0] && (str)[0] <= 'z') \|\| \
48	('A' <= (str)[0] && (str)[0] <= 'Z')) && \
49	((str)[1] == ':' \|\| (str)[1] == '\|') && \
50	((str)[2] == '/' \|\| (str)[2] == '\\' \|\| (str)[2] == 0))
51
52	/* Internal representation of CURLU. Point to URL-encoded strings. */
53	struct Curl_URL {
54	char *scheme;
55	char *user;
56	char *password;
57	char options; / IMAP only? */
58	char *host;
59	char *port;
60	char *path;
61	char *query;
62	char *fragment;
63
64	char scratch; / temporary scratch area */
65	long portnum; /* the numerical version */
66	};
67
68	#define DEFAULT_SCHEME "https"
69
70	#ifdef DEBUGBUILD
71	#define UNITTEST
72	#else
73	#define UNITTEST static
74	#endif
75
76	static void free_urlhandle(struct Curl_URL *u)
77	{
78	free(u->scheme);
79	free(u->user);
80	free(u->password);
81	free(u->options);
82	free(u->host);
83	free(u->port);
84	free(u->path);
85	free(u->query);
86	free(u->fragment);
87	free(u->scratch);
88	}
89
90	/* move the full contents of one handle onto another and
91	free the original */
92	static void mv_urlhandle(struct Curl_URL *from,
93	struct Curl_URL *to)
94	{
95	free_urlhandle(to);
96	to = from;
97	free(from);
98	}
99
100	/*
101	* Find the separator at the end of the host name, or the '?' in cases like
102	* http://www.url.com?id=2380
103	*/
104	static const char find_host_sep(const char url)
105	{
106	const char *sep;
107	const char *query;
108
109	/* Find the start of the hostname */
110	sep = strstr(url, "//");
111	if(!sep)
112	sep = url;
113	else
114	sep += 2;
115
116	query = strchr(sep, '?');
117	sep = strchr(sep, '/');
118
119	if(!sep)
120	sep = url + strlen(url);
121
122	if(!query)
123	query = url + strlen(url);
124
125	return sep < query ? sep : query;
126	}
127
128	/*
129	* Decide in an encoding-independent manner whether a character in an
130	* URL must be escaped. The same criterion must be used in strlen_url()
131	* and strcpy_url().
132	*/
133	static bool urlchar_needs_escaping(int c)
134	{
135	return !(ISCNTRL(c) \|\| ISSPACE(c) \|\| ISGRAPH(c));
136	}
137
138	/*
139	* strlen_url() returns the length of the given URL if the spaces within the
140	* URL were properly URL encoded.
141	* URL encoding should be skipped for host names, otherwise IDN resolution
142	* will fail.
143	*/
144	size_t Curl_strlen_url(const char *url, bool relative)
145	{
146	const unsigned char *ptr;
147	size_t newlen = 0;
148	bool left = TRUE; /* left side of the ? */
149	const unsigned char host_sep = (const unsigned char ) url;
150
151	if(!relative)
152	host_sep = (const unsigned char *) find_host_sep(url);
153
154	for(ptr = (unsigned char )url; ptr; ptr++) {
155
156	if(ptr < host_sep) {
157	++newlen;
158	continue;
159	}
160
161	switch(*ptr) {
162	case '?':
163	left = FALSE;
164	/* FALLTHROUGH */
165	default:
166	if(urlchar_needs_escaping(*ptr))
167	newlen += 2;
168	newlen++;
169	break;
170	case ' ':
171	if(left)
172	newlen += 3;
173	else
174	newlen++;
175	break;
176	}
177	}
178	return newlen;
179	}
180
181	/* strcpy_url() copies a url to a output buffer and URL-encodes the spaces in
182	* the source URL accordingly.
183	* URL encoding should be skipped for host names, otherwise IDN resolution
184	* will fail.
185	*/
186	void Curl_strcpy_url(char output, const char url, bool relative)
187	{
188	/* we must add this with whitespace-replacing */
189	bool left = TRUE;
190	const unsigned char *iptr;
191	char *optr = output;
192	const unsigned char host_sep = (const unsigned char ) url;
193
194	if(!relative)
195	host_sep = (const unsigned char *) find_host_sep(url);
196
197	for(iptr = (unsigned char )url; / read from here */
198	iptr; / until zero byte */
199	iptr++) {
200
201	if(iptr < host_sep) {
202	optr++ = iptr;
203	continue;
204	}
205
206	switch(*iptr) {
207	case '?':
208	left = FALSE;
209	/* FALLTHROUGH */
210	default:
211	if(urlchar_needs_escaping(*iptr)) {
212	msnprintf(optr, 4, "%%%02x", *iptr);
213	optr += 3;
214	}
215	else
216	optr++=iptr;
217	break;
218	case ' ':
219	if(left) {
220	optr++='%'; / add a '%' */
221	optr++='2'; / add a '2' */
222	optr++='0'; / add a '0' */
223	}
224	else
225	optr++='+'; / add a '+' here */
226	break;
227	}
228	}
229	optr = 0; / zero terminate output buffer */
230
231	}
232
233	/*
234	* Returns true if the given URL is absolute (as opposed to relative) within
235	* the buffer size. Returns the scheme in the buffer if TRUE and 'buf' is
236	* non-NULL.
237	*/
238	bool Curl_is_absolute_url(const char url, char buf, size_t buflen)
239	{
240	size_t i;
241	#ifdef WIN32
242	if(STARTS_WITH_DRIVE_PREFIX(url))
243	return FALSE;
244	#endif
245	for(i = 0; i < buflen && url[i]; ++i) {
246	char s = url[i];
247	if((s == ':') && (url[i + 1] == '/')) {
248	if(buf)
249	buf[i] = 0;
250	return TRUE;
251	}
252	/* RFC 3986 3.1 explains:
253	scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
254	*/
255	else if(ISALNUM(s) \|\| (s == '+') \|\| (s == '-') \|\| (s == '.') ) {
256	if(buf)
257	buf[i] = (char)TOLOWER(s);
258	}
259	else
260	break;
261	}
262	return FALSE;
263	}
264
265	/*
266	* Concatenate a relative URL to a base URL making it absolute.
267	* URL-encodes any spaces.
268	* The returned pointer must be freed by the caller unless NULL
269	* (returns NULL on out of memory).
270	*/
271	char Curl_concat_url(const char base, const char *relurl)
272	{
273	/***
274	TRY to append this new path to the old URL
275	to the right of the host part. Oh crap, this is doomed to cause
276	problems in the future...
277	*/
278	char *newest;
279	char *protsep;
280	char *pathsep;
281	size_t newlen;
282	bool host_changed = FALSE;
283
284	const char *useurl = relurl;
285	size_t urllen;
286
287	/* we must make our own copy of the URL to play with, as it may
288	point to read-only data */
289	char *url_clone = strdup(base);
290
291	if(!url_clone)
292	return NULL; /* skip out of this NOW */
293
294	/* protsep points to the start of the host name */
295	protsep = strstr(url_clone, "//");
296	if(!protsep)
297	protsep = url_clone;
298	else
299	protsep += 2; /* pass the slashes */
300
301	if('/' != relurl[0]) {
302	int level = 0;
303
304	/* First we need to find out if there's a ?-letter in the URL,
305	and cut it and the right-side of that off */
306	pathsep = strchr(protsep, '?');
307	if(pathsep)
308	*pathsep = 0;
309
310	/* we have a relative path to append to the last slash if there's one
311	available, or if the new URL is just a query string (starts with a
312	'?') we append the new one at the end of the entire currently worked
313	out URL */
314	if(useurl[0] != '?') {
315	pathsep = strrchr(protsep, '/');
316	if(pathsep)
317	*pathsep = 0;
318	}
319
320	/* Check if there's any slash after the host name, and if so, remember
321	that position instead */
322	pathsep = strchr(protsep, '/');
323	if(pathsep)
324	protsep = pathsep + 1;
325	else
326	protsep = NULL;
327
328	/* now deal with one "./" or any amount of "../" in the newurl
329	and act accordingly */
330
331	if((useurl[0] == '.') && (useurl[1] == '/'))
332	useurl += 2; /* just skip the "./" */
333
334	while((useurl[0] == '.') &&
335	(useurl[1] == '.') &&
336	(useurl[2] == '/')) {
337	level++;
338	useurl += 3; /* pass the "../" */
339	}
340
341	if(protsep) {
342	while(level--) {
343	/* cut off one more level from the right of the original URL */
344	pathsep = strrchr(protsep, '/');
345	if(pathsep)
346	*pathsep = 0;
347	else {
348	*protsep = 0;
349	break;
350	}
351	}
352	}
353	}
354	else {
355	/* We got a new absolute path for this server */
356
357	if((relurl[0] == '/') && (relurl[1] == '/')) {
358	/* the new URL starts with //, just keep the protocol part from the
359	original one */
360	*protsep = 0;
361	useurl = &relurl[2]; /* we keep the slashes from the original, so we
362	skip the new ones */
363	host_changed = TRUE;
364	}
365	else {
366	/* cut off the original URL from the first slash, or deal with URLs
367	without slash */
368	pathsep = strchr(protsep, '/');
369	if(pathsep) {
370	/* When people use badly formatted URLs, such as
371	"http://www.url.com?dir=/home/daniel" we must not use the first
372	slash, if there's a ?-letter before it! */
373	char *sep = strchr(protsep, '?');
374	if(sep && (sep < pathsep))
375	pathsep = sep;
376	*pathsep = 0;
377	}
378	else {
379	/* There was no slash. Now, since we might be operating on a badly
380	formatted URL, such as "http://www.url.com?id=2380" which doesn't
381	use a slash separator as it is supposed to, we need to check for a
382	?-letter as well! */
383	pathsep = strchr(protsep, '?');
384	if(pathsep)
385	*pathsep = 0;
386	}
387	}
388	}
389
390	/* If the new part contains a space, this is a mighty stupid redirect
391	but we still make an effort to do "right". To the left of a '?'
392	letter we replace each space with %20 while it is replaced with '+'
393	on the right side of the '?' letter.
394	*/
395	newlen = Curl_strlen_url(useurl, !host_changed);
396
397	urllen = strlen(url_clone);
398
399	newest = malloc(urllen + 1 + /* possible slash */
400	newlen + 1 /* zero byte */);
401
402	if(!newest) {
403	free(url_clone); /* don't leak this */
404	return NULL;
405	}
406
407	/* copy over the root url part */
408	memcpy(newest, url_clone, urllen);
409
410	/* check if we need to append a slash */
411	if(('/' == useurl[0]) \|\| (protsep && !*protsep) \|\| ('?' == useurl[0]))
412	;
413	else
414	newest[urllen++]='/';
415
416	/* then append the new piece on the right side */
417	Curl_strcpy_url(&newest[urllen], useurl, !host_changed);
418
419	free(url_clone);
420
421	return newest;
422	}
423
424	/*
425	* parse_hostname_login()
426	*
427	* Parse the login details (user name, password and options) from the URL and
428	* strip them out of the host name
429	*
430	*/
431	static CURLUcode parse_hostname_login(struct Curl_URL *u,
432	const struct Curl_handler *h,
433	char **hostname,
434	unsigned int flags)
435	{
436	CURLUcode result = CURLUE_OK;
437	CURLcode ccode;
438	char *userp = NULL;
439	char *passwdp = NULL;
440	char *optionsp = NULL;
441
442	/* At this point, we're hoping all the other special cases have
443	* been taken care of, so conn->host.name is at most
444	* [user[:password][;options]]@]hostname
445	*
446	* We need somewhere to put the embedded details, so do that first.
447	*/
448
449	char ptr = strchr(hostname, '@');
450	char login = hostname;
451
452	if(!ptr)
453	goto out;
454
455	/* We will now try to extract the
456	* possible login information in a string like:
457	* ftp://user:[email protected]:8021/README */
458	*hostname = ++ptr;
459
460	/* We could use the login information in the URL so extract it. Only parse
461	options if the handler says we should. Note that 'h' might be NULL! */
462	ccode = Curl_parse_login_details(login, ptr - login - 1,
463	&userp, &passwdp,
464	(h && (h->flags & PROTOPT_URLOPTIONS)) ?
465	&optionsp:NULL);
466	if(ccode) {
467	result = CURLUE_MALFORMED_INPUT;
468	goto out;
469	}
470
471	if(userp) {
472	if(flags & CURLU_DISALLOW_USER) {
473	/* Option DISALLOW_USER is set and url contains username. */
474	result = CURLUE_USER_NOT_ALLOWED;
475	goto out;
476	}
477
478	u->user = userp;
479	}
480
481	if(passwdp)
482	u->password = passwdp;
483
484	if(optionsp)
485	u->options = optionsp;
486
487	return CURLUE_OK;
488	out:
489
490	free(userp);
491	free(passwdp);
492	free(optionsp);
493
494	return result;
495	}
496
497	UNITTEST CURLUcode Curl_parse_port(struct Curl_URL u, char hostname)
498	{
499	char *portptr = NULL;
500	char endbracket;
501	int len;
502
503	/*
504	* Find the end of an IPv6 address, either on the ']' ending bracket or
505	* a percent-encoded zone index.
506	*/
507	if(1 == sscanf(hostname, "[%*45[0123456789abcdefABCDEF:.]%c%n",
508	&endbracket, &len)) {
509	if(']' == endbracket)
510	portptr = &hostname[len];
511	else if('%' == endbracket) {
512	int zonelen = len;
513	if(1 == sscanf(hostname + zonelen, "25%*[^]]%c%n", &endbracket, &len)) {
514	if(']' != endbracket)
515	return CURLUE_MALFORMED_INPUT;
516	portptr = &hostname[--zonelen + len + 1];
517	}
518	else
519	return CURLUE_MALFORMED_INPUT;
520	}
521	else
522	return CURLUE_MALFORMED_INPUT;
523
524	/* this is a RFC2732-style specified IP-address */
525	if(portptr && *portptr) {
526	if(*portptr != ':')
527	return CURLUE_MALFORMED_INPUT;
528	}
529	else
530	portptr = NULL;
531	}
532	else
533	portptr = strchr(hostname, ':');
534
535	if(portptr) {
536	char *rest;
537	long port;
538	char portbuf[7];
539
540	/* Browser behavior adaptation. If there's a colon with no digits after,
541	just cut off the name there which makes us ignore the colon and just
542	use the default port. Firefox, Chrome and Safari all do that. */
543	if(!portptr[1]) {
544	*portptr = '\0';
545	return CURLUE_OK;
546	}
547
548	if(!ISDIGIT(portptr[1]))
549	return CURLUE_BAD_PORT_NUMBER;
550
551	port = strtol(portptr + 1, &rest, 10); /* Port number must be decimal */
552
553	if((port <= 0) \|\| (port > 0xffff))
554	/* Single unix standard says port numbers are 16 bits long, but we don't
555	treat port zero as OK. */
556	return CURLUE_BAD_PORT_NUMBER;
557
558	if(rest[0])
559	return CURLUE_BAD_PORT_NUMBER;
560
561	portptr++ = '\0'; / cut off the name there */
562	*rest = 0;
563	/* generate a new port number string to get rid of leading zeroes etc */
564	msnprintf(portbuf, sizeof(portbuf), "%ld", port);
565	u->portnum = port;
566	u->port = strdup(portbuf);
567	if(!u->port)
568	return CURLUE_OUT_OF_MEMORY;
569	}
570
571	return CURLUE_OK;
572	}
573
574	/* scan for byte values < 31 or 127 */
575	static CURLUcode junkscan(char *part)
576	{
577	char badbytes[]={
578	/* */ 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
579	0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
580	0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
581	0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
582	0x7f,
583	0x00 /* zero terminate */
584	};
585	if(part) {
586	size_t n = strlen(part);
587	size_t nfine = strcspn(part, badbytes);
588	if(nfine != n)
589	/* since we don't know which part is scanned, return a generic error
590	code */
591	return CURLUE_MALFORMED_INPUT;
592	}
593	return CURLUE_OK;
594	}
595
596	static CURLUcode hostname_check(char *hostname, unsigned int flags)
597	{
598	const char l = NULL; / accepted characters */
599	size_t len;
600	size_t hlen = strlen(hostname);
601	(void)flags;
602
603	if(hostname[0] == '[') {
604	hostname++;
605	l = "0123456789abcdefABCDEF::.%";
606	hlen -= 2;
607	}
608
609	if(l) {
610	/* only valid letters are ok */
611	len = strspn(hostname, l);
612	if(hlen != len)
613	/* hostname with bad content */
614	return CURLUE_MALFORMED_INPUT;
615	}
616	else {
617	/* letters from the second string is not ok */
618	len = strcspn(hostname, " ");
619	if(hlen != len)
620	/* hostname with bad content */
621	return CURLUE_MALFORMED_INPUT;
622	}
623	return CURLUE_OK;
624	}
625
626	#define HOSTNAME_END(x) (((x) == '/') \|\| ((x) == '?') \|\| ((x) == '#'))
627
628	static CURLUcode seturl(const char url, CURLU u, unsigned int flags)
629	{
630	char *path;
631	bool path_alloced = FALSE;
632	char *hostname;
633	char *query = NULL;
634	char *fragment = NULL;
635	CURLUcode result;
636	bool url_has_scheme = FALSE;
637	char schemebuf[MAX_SCHEME_LEN];
638	char *schemep = NULL;
639	size_t schemelen = 0;
640	size_t urllen;
641	const struct Curl_handler *h = NULL;
642
643	if(!url)
644	return CURLUE_MALFORMED_INPUT;
645
646	/*************************************************************
647	* Parse the URL.
648	************************************************************/
649	/* allocate scratch area */
650	urllen = strlen(url);
651	path = u->scratch = malloc(urllen * 2 + 2);
652	if(!path)
653	return CURLUE_OUT_OF_MEMORY;
654
655	hostname = &path[urllen + 1];
656	hostname[0] = 0;
657
658	if(Curl_is_absolute_url(url, schemebuf, sizeof(schemebuf))) {
659	url_has_scheme = TRUE;
660	schemelen = strlen(schemebuf);
661	}
662
663	/* handle the file: scheme */
664	if(url_has_scheme && strcasecompare(schemebuf, "file")) {
665	/* path has been allocated large enough to hold this */
666	strcpy(path, &url[5]);
667
668	hostname = NULL; /* no host for file: URLs */
669	u->scheme = strdup("file");
670	if(!u->scheme)
671	return CURLUE_OUT_OF_MEMORY;
672
673	/* Extra handling URLs with an authority component (i.e. that start with
674	* "file://")
675	*
676	* We allow omitted hostname (e.g. file:/<path>) -- valid according to
677	* RFC 8089, but not the (current) WHAT-WG URL spec.
678	*/
679	if(path[0] == '/' && path[1] == '/') {
680	/* swallow the two slashes */
681	char *ptr = &path[2];
682
683	/*
684	* According to RFC 8089, a file: URL can be reliably dereferenced if:
685	*
686	* o it has no/blank hostname, or
687	*
688	* o the hostname matches "localhost" (case-insensitively), or
689	*
690	* o the hostname is a FQDN that resolves to this machine.
691	*
692	* For brevity, we only consider URLs with empty, "localhost", or
693	* "127.0.0.1" hostnames as local.
694	*
695	* Additionally, there is an exception for URLs with a Windows drive
696	* letter in the authority (which was accidentally omitted from RFC 8089
697	* Appendix E, but believe me, it was meant to be there. --MK)
698	*/
699	if(ptr[0] != '/' && !STARTS_WITH_URL_DRIVE_PREFIX(ptr)) {
700	/* the URL includes a host name, it must match "localhost" or
701	"127.0.0.1" to be valid */
702	if(!checkprefix("localhost/", ptr) &&
703	!checkprefix("127.0.0.1/", ptr)) {
704	/* Invalid file://hostname/, expected localhost or 127.0.0.1 or
705	none */
706	return CURLUE_MALFORMED_INPUT;
707	}
708	ptr += 9; /* now points to the slash after the host */
709	}
710
711	path = ptr;
712	}
713
714	#if !defined(MSDOS) && !defined(WIN32) && !defined(__CYGWIN__)
715	/* Don't allow Windows drive letters when not in Windows.
716	* This catches both "file:/c:" and "file:c:" */
717	if(('/' == path[0] && STARTS_WITH_URL_DRIVE_PREFIX(&path[1])) \|\|
718	STARTS_WITH_URL_DRIVE_PREFIX(path)) {
719	/* File drive letters are only accepted in MSDOS/Windows */
720	return CURLUE_MALFORMED_INPUT;
721	}
722	#else
723	/* If the path starts with a slash and a drive letter, ditch the slash */
724	if('/' == path[0] && STARTS_WITH_URL_DRIVE_PREFIX(&path[1])) {
725	/* This cannot be done with strcpy, as the memory chunks overlap! */
726	memmove(path, &path[1], strlen(&path[1]) + 1);
727	}
728	#endif
729
730	}
731	else {
732	/* clear path */
733	const char *p;
734	const char *hostp;
735	size_t len;
736	path[0] = 0;
737
738	if(url_has_scheme) {
739	int i = 0;
740	p = &url[schemelen + 1];
741	while(p && (*p == '/') && (i < 4)) {
742	p++;
743	i++;
744	}
745	if((i < 1) \|\| (i>3))
746	/* less than one or more than three slashes */
747	return CURLUE_MALFORMED_INPUT;
748
749	schemep = schemebuf;
750	if(!Curl_builtin_scheme(schemep) &&
751	!(flags & CURLU_NON_SUPPORT_SCHEME))
752	return CURLUE_UNSUPPORTED_SCHEME;
753
754	if(junkscan(schemep))
755	return CURLUE_MALFORMED_INPUT;
756	}
757	else {
758	/* no scheme! */
759
760	if(!(flags & (CURLU_DEFAULT_SCHEME\|CURLU_GUESS_SCHEME)))
761	return CURLUE_MALFORMED_INPUT;
762	if(flags & CURLU_DEFAULT_SCHEME)
763	schemep = (char *) DEFAULT_SCHEME;
764
765	/*
766	* The URL was badly formatted, let's try without scheme specified.
767	*/
768	p = url;
769	}
770	hostp = p; /* host name starts here */
771
772	while(p && !HOSTNAME_END(p)) /* find end of host name */
773	p++;
774
775	len = p - hostp;
776	if(!len)
777	return CURLUE_MALFORMED_INPUT;
778
779	memcpy(hostname, hostp, len);
780	hostname[len] = 0;
781
782	if((flags & CURLU_GUESS_SCHEME) && !schemep) {
783	/* legacy curl-style guess based on host name */
784	if(checkprefix("ftp.", hostname))
785	schemep = (char *)"ftp";
786	else if(checkprefix("dict.", hostname))
787	schemep = (char *)"dict";
788	else if(checkprefix("ldap.", hostname))
789	schemep = (char *)"ldap";
790	else if(checkprefix("imap.", hostname))
791	schemep = (char *)"imap";
792	else if(checkprefix("smtp.", hostname))
793	schemep = (char *)"smtp";
794	else if(checkprefix("pop3.", hostname))
795	schemep = (char *)"pop3";
796	else
797	schemep = (char *)"http";
798	}
799
800	len = strlen(p);
801	memcpy(path, p, len);
802	path[len] = 0;
803
804	u->scheme = strdup(schemep);
805	if(!u->scheme)
806	return CURLUE_OUT_OF_MEMORY;
807	}
808
809	/* if this is a known scheme, get some details */
810	h = Curl_builtin_scheme(u->scheme);
811
812	if(junkscan(path))
813	return CURLUE_MALFORMED_INPUT;
814
815	query = strchr(path, '?');
816	if(query)
817	*query++ = 0;
818
819	fragment = strchr(query?query:path, '#');
820	if(fragment)
821	*fragment++ = 0;
822
823	if(!path[0])
824	/* if there's no path set, unset */
825	path = NULL;
826	else if(!(flags & CURLU_PATH_AS_IS)) {
827	/* sanitise paths and remove ../ and ./ sequences according to RFC3986 */
828	char *newp = Curl_dedotdotify(path);
829	if(!newp)
830	return CURLUE_OUT_OF_MEMORY;
831
832	if(strcmp(newp, path)) {
833	/* if we got a new version */
834	path = newp;
835	path_alloced = TRUE;
836	}
837	else
838	free(newp);
839	}
840	if(path) {
841	u->path = path_alloced?path:strdup(path);
842	if(!u->path)
843	return CURLUE_OUT_OF_MEMORY;
844	}
845
846	if(hostname) {
847	/*
848	* Parse the login details and strip them out of the host name.
849	*/
850	if(junkscan(hostname))
851	return CURLUE_MALFORMED_INPUT;
852
853	result = parse_hostname_login(u, h, &hostname, flags);
854	if(result)
855	return result;
856
857	result = Curl_parse_port(u, hostname);
858	if(result)
859	return result;
860
861	result = hostname_check(hostname, flags);
862	if(result)
863	return result;
864
865	u->host = strdup(hostname);
866	if(!u->host)
867	return CURLUE_OUT_OF_MEMORY;
868	}
869
870	if(query) {
871	u->query = strdup(query);
872	if(!u->query)
873	return CURLUE_OUT_OF_MEMORY;
874	}
875	if(fragment && fragment[0]) {
876	u->fragment = strdup(fragment);
877	if(!u->fragment)
878	return CURLUE_OUT_OF_MEMORY;
879	}
880
881	free(u->scratch);
882	u->scratch = NULL;
883
884	return CURLUE_OK;
885	}
886
887	/*
888	* Parse the URL and set the relevant members of the Curl_URL struct.
889	*/
890	static CURLUcode parseurl(const char url, CURLU u, unsigned int flags)
891	{
892	CURLUcode result = seturl(url, u, flags);
893	if(result) {
894	free_urlhandle(u);
895	memset(u, 0, sizeof(struct Curl_URL));
896	}
897	return result;
898	}
899
900	/*
901	*/
902	CURLU *curl_url(void)
903	{
904	return calloc(sizeof(struct Curl_URL), 1);
905	}
906
907	void curl_url_cleanup(CURLU *u)
908	{
909	if(u) {
910	free_urlhandle(u);
911	free(u);
912	}
913	}
914
915	#define DUP(dest, src, name) \
916	if(src->name) { \
917	dest->name = strdup(src->name); \
918	if(!dest->name) \
919	goto fail; \
920	}
921
922	CURLU curl_url_dup(CURLU in)
923	{
924	struct Curl_URL *u = calloc(sizeof(struct Curl_URL), 1);
925	if(u) {
926	DUP(u, in, scheme);
927	DUP(u, in, user);
928	DUP(u, in, password);
929	DUP(u, in, options);
930	DUP(u, in, host);
931	DUP(u, in, port);
932	DUP(u, in, path);
933	DUP(u, in, query);
934	DUP(u, in, fragment);
935	u->portnum = in->portnum;
936	}
937	return u;
938	fail:
939	curl_url_cleanup(u);
940	return NULL;
941	}
942
943	CURLUcode curl_url_get(CURLU *u, CURLUPart what,
944	char **part, unsigned int flags)
945	{
946	char *ptr;
947	CURLUcode ifmissing = CURLUE_UNKNOWN_PART;
948	char portbuf[7];
949	bool urldecode = (flags & CURLU_URLDECODE)?1:0;
950	bool plusdecode = FALSE;
951	(void)flags;
952	if(!u)
953	return CURLUE_BAD_HANDLE;
954	if(!part)
955	return CURLUE_BAD_PARTPOINTER;
956	*part = NULL;
957
958	switch(what) {
959	case CURLUPART_SCHEME:
960	ptr = u->scheme;
961	ifmissing = CURLUE_NO_SCHEME;
962	urldecode = FALSE; /* never for schemes */
963	break;
964	case CURLUPART_USER:
965	ptr = u->user;
966	ifmissing = CURLUE_NO_USER;
967	break;
968	case CURLUPART_PASSWORD:
969	ptr = u->password;
970	ifmissing = CURLUE_NO_PASSWORD;
971	break;
972	case CURLUPART_OPTIONS:
973	ptr = u->options;
974	ifmissing = CURLUE_NO_OPTIONS;
975	break;
976	case CURLUPART_HOST:
977	ptr = u->host;
978	ifmissing = CURLUE_NO_HOST;
979	break;
980	case CURLUPART_PORT:
981	ptr = u->port;
982	ifmissing = CURLUE_NO_PORT;
983	urldecode = FALSE; /* never for port */
984	if(!ptr && (flags & CURLU_DEFAULT_PORT) && u->scheme) {
985	/* there's no stored port number, but asked to deliver
986	a default one for the scheme */
987	const struct Curl_handler *h =
988	Curl_builtin_scheme(u->scheme);
989	if(h) {
990	msnprintf(portbuf, sizeof(portbuf), "%ld", h->defport);
991	ptr = portbuf;
992	}
993	}
994	else if(ptr && u->scheme) {
995	/* there is a stored port number, but ask to inhibit if
996	it matches the default one for the scheme */
997	const struct Curl_handler *h =
998	Curl_builtin_scheme(u->scheme);
999	if(h && (h->defport == u->portnum) &&
1000	(flags & CURLU_NO_DEFAULT_PORT))
1001	ptr = NULL;
1002	}
1003	break;
1004	case CURLUPART_PATH:
1005	ptr = u->path;
1006	if(!ptr) {
1007	ptr = u->path = strdup("/");
1008	if(!u->path)
1009	return CURLUE_OUT_OF_MEMORY;
1010	}
1011	break;
1012	case CURLUPART_QUERY:
1013	ptr = u->query;
1014	ifmissing = CURLUE_NO_QUERY;
1015	plusdecode = urldecode;
1016	break;
1017	case CURLUPART_FRAGMENT:
1018	ptr = u->fragment;
1019	ifmissing = CURLUE_NO_FRAGMENT;
1020	break;
1021	case CURLUPART_URL: {
1022	char *url;
1023	char *scheme;
1024	char *options = u->options;
1025	char *port = u->port;
1026	if(u->scheme && strcasecompare("file", u->scheme)) {
1027	url = aprintf("file://%s%s%s",
1028	u->path,
1029	u->fragment? "#": "",
1030	u->fragment? u->fragment : "");
1031	}
1032	else if(!u->host)
1033	return CURLUE_NO_HOST;
1034	else {
1035	const struct Curl_handler *h = NULL;
1036	if(u->scheme)
1037	scheme = u->scheme;
1038	else if(flags & CURLU_DEFAULT_SCHEME)
1039	scheme = (char *) DEFAULT_SCHEME;
1040	else
1041	return CURLUE_NO_SCHEME;
1042
1043	if(scheme) {
1044	h = Curl_builtin_scheme(scheme);
1045	if(!port && (flags & CURLU_DEFAULT_PORT)) {
1046	/* there's no stored port number, but asked to deliver
1047	a default one for the scheme */
1048	if(h) {
1049	msnprintf(portbuf, sizeof(portbuf), "%ld", h->defport);
1050	port = portbuf;
1051	}
1052	}
1053	else if(port) {
1054	/* there is a stored port number, but asked to inhibit if it matches
1055	the default one for the scheme */
1056	if(h && (h->defport == u->portnum) &&
1057	(flags & CURLU_NO_DEFAULT_PORT))
1058	port = NULL;
1059	}
1060	}
1061	if(h && !(h->flags & PROTOPT_URLOPTIONS))
1062	options = NULL;
1063
1064	url = aprintf("%s://%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
1065	scheme,
1066	u->user ? u->user : "",
1067	u->password ? ":": "",
1068	u->password ? u->password : "",
1069	options ? ";" : "",
1070	options ? options : "",
1071	(u->user \|\| u->password \|\| options) ? "@": "",
1072	u->host,
1073	port ? ":": "",
1074	port ? port : "",
1075	(u->path && (u->path[0] != '/')) ? "/": "",
1076	u->path ? u->path : "/",
1077	(u->query && u->query[0]) ? "?": "",
1078	(u->query && u->query[0]) ? u->query : "",
1079	u->fragment? "#": "",
1080	u->fragment? u->fragment : "");
1081	}
1082	if(!url)
1083	return CURLUE_OUT_OF_MEMORY;
1084	*part = url;
1085	return CURLUE_OK;
1086	break;
1087	}
1088	default:
1089	ptr = NULL;
1090	}
1091	if(ptr) {
1092	*part = strdup(ptr);
1093	if(!*part)
1094	return CURLUE_OUT_OF_MEMORY;
1095	if(plusdecode) {
1096	/* convert + to space */
1097	char *plus;
1098	for(plus = part; plus; ++plus) {
1099	if(*plus == '+')
1100	*plus = ' ';
1101	}
1102	}
1103	if(urldecode) {
1104	char *decoded;
1105	size_t dlen;
1106	CURLcode res = Curl_urldecode(NULL, *part, 0, &decoded, &dlen, TRUE);
1107	free(*part);
1108	if(res) {
1109	*part = NULL;
1110	return CURLUE_URLDECODE;
1111	}
1112	*part = decoded;
1113	}
1114	return CURLUE_OK;
1115	}
1116	else
1117	return ifmissing;
1118	}
1119
1120	CURLUcode curl_url_set(CURLU *u, CURLUPart what,
1121	const char *part, unsigned int flags)
1122	{
1123	char **storep = NULL;
1124	long port = 0;
1125	bool urlencode = (flags & CURLU_URLENCODE)? 1 : 0;
1126	bool plusencode = FALSE;
1127	bool urlskipslash = FALSE;
1128	bool appendquery = FALSE;
1129	bool equalsencode = FALSE;
1130
1131	if(!u)
1132	return CURLUE_BAD_HANDLE;
1133	if(!part) {
1134	/* setting a part to NULL clears it */
1135	switch(what) {
1136	case CURLUPART_URL:
1137	break;
1138	case CURLUPART_SCHEME:
1139	storep = &u->scheme;
1140	break;
1141	case CURLUPART_USER:
1142	storep = &u->user;
1143	break;
1144	case CURLUPART_PASSWORD:
1145	storep = &u->password;
1146	break;
1147	case CURLUPART_OPTIONS:
1148	storep = &u->options;
1149	break;
1150	case CURLUPART_HOST:
1151	storep = &u->host;
1152	break;
1153	case CURLUPART_PORT:
1154	storep = &u->port;
1155	break;
1156	case CURLUPART_PATH:
1157	storep = &u->path;
1158	break;
1159	case CURLUPART_QUERY:
1160	storep = &u->query;
1161	break;
1162	case CURLUPART_FRAGMENT:
1163	storep = &u->fragment;
1164	break;
1165	default:
1166	return CURLUE_UNKNOWN_PART;
1167	}
1168	if(storep && *storep) {
1169	free(*storep);
1170	*storep = NULL;
1171	}
1172	return CURLUE_OK;
1173	}
1174
1175	switch(what) {
1176	case CURLUPART_SCHEME:
1177	if(!(flags & CURLU_NON_SUPPORT_SCHEME) &&
1178	/* verify that it is a fine scheme */
1179	!Curl_builtin_scheme(part))
1180	return CURLUE_UNSUPPORTED_SCHEME;
1181	storep = &u->scheme;
1182	urlencode = FALSE; /* never */
1183	break;
1184	case CURLUPART_USER:
1185	storep = &u->user;
1186	break;
1187	case CURLUPART_PASSWORD:
1188	storep = &u->password;
1189	break;
1190	case CURLUPART_OPTIONS:
1191	storep = &u->options;
1192	break;
1193	case CURLUPART_HOST:
1194	storep = &u->host;
1195	break;
1196	case CURLUPART_PORT:
1197	urlencode = FALSE; /* never */
1198	port = strtol(part, NULL, 10); /* Port number must be decimal */
1199	if((port <= 0) \|\| (port > 0xffff))
1200	return CURLUE_BAD_PORT_NUMBER;
1201	storep = &u->port;
1202	break;
1203	case CURLUPART_PATH:
1204	urlskipslash = TRUE;
1205	storep = &u->path;
1206	break;
1207	case CURLUPART_QUERY:
1208	plusencode = urlencode;
1209	appendquery = (flags & CURLU_APPENDQUERY)?1:0;
1210	equalsencode = appendquery;
1211	storep = &u->query;
1212	break;
1213	case CURLUPART_FRAGMENT:
1214	storep = &u->fragment;
1215	break;
1216	case CURLUPART_URL: {
1217	/*
1218	* Allow a new URL to replace the existing (if any) contents.
1219	*
1220	* If the existing contents is enough for a URL, allow a relative URL to
1221	* replace it.
1222	*/
1223	CURLUcode result;
1224	char *oldurl;
1225	char *redired_url;
1226	CURLU *handle2;
1227
1228	if(Curl_is_absolute_url(part, NULL, MAX_SCHEME_LEN)) {
1229	handle2 = curl_url();
1230	if(!handle2)
1231	return CURLUE_OUT_OF_MEMORY;
1232	result = parseurl(part, handle2, flags);
1233	if(!result)
1234	mv_urlhandle(handle2, u);
1235	else
1236	curl_url_cleanup(handle2);
1237	return result;
1238	}
1239	/* extract the full "old" URL to do the redirect on */
1240	result = curl_url_get(u, CURLUPART_URL, &oldurl, flags);
1241	if(result) {
1242	/* couldn't get the old URL, just use the new! */
1243	handle2 = curl_url();
1244	if(!handle2)
1245	return CURLUE_OUT_OF_MEMORY;
1246	result = parseurl(part, handle2, flags);
1247	if(!result)
1248	mv_urlhandle(handle2, u);
1249	else
1250	curl_url_cleanup(handle2);
1251	return result;
1252	}
1253
1254	/* apply the relative part to create a new URL */
1255	redired_url = Curl_concat_url(oldurl, part);
1256	free(oldurl);
1257	if(!redired_url)
1258	return CURLUE_OUT_OF_MEMORY;
1259
1260	/* now parse the new URL */
1261	handle2 = curl_url();
1262	if(!handle2) {
1263	free(redired_url);
1264	return CURLUE_OUT_OF_MEMORY;
1265	}
1266	result = parseurl(redired_url, handle2, flags);
1267	free(redired_url);
1268	if(!result)
1269	mv_urlhandle(handle2, u);
1270	else
1271	curl_url_cleanup(handle2);
1272	return result;
1273	}
1274	default:
1275	return CURLUE_UNKNOWN_PART;
1276	}
1277	if(storep) {
1278	const char *newp = part;
1279	size_t nalloc = strlen(part);
1280
1281	if(urlencode) {
1282	const char *i;
1283	char *o;
1284	bool free_part = FALSE;
1285	char enc = malloc(nalloc 3 + 1); /* for worst case! */
1286	if(!enc)
1287	return CURLUE_OUT_OF_MEMORY;
1288	if(plusencode) {
1289	/* space to plus */
1290	i = part;
1291	for(o = enc; *i; ++o, ++i)
1292	o = (i == ' ') ? '+' : *i;
1293	o = 0; / zero terminate */
1294	part = strdup(enc);
1295	if(!part) {
1296	free(enc);
1297	return CURLUE_OUT_OF_MEMORY;
1298	}
1299	free_part = TRUE;
1300	}
1301	for(i = part, o = enc; *i; i++) {
1302	if(Curl_isunreserved(*i) \|\|
1303	((*i == '/') && urlskipslash) \|\|
1304	((*i == '=') && equalsencode) \|\|
1305	((*i == '+') && plusencode)) {
1306	if((*i == '=') && equalsencode)
1307	/* only skip the first equals sign */
1308	equalsencode = FALSE;
1309	o = i;
1310	o++;
1311	}
1312	else {
1313	msnprintf(o, 4, "%%%02x", *i);
1314	o += 3;
1315	}
1316	}
1317	o = 0; / zero terminate */
1318	newp = enc;
1319	if(free_part)
1320	free((char *)part);
1321	}
1322	else {
1323	char *p;
1324	newp = strdup(part);
1325	if(!newp)
1326	return CURLUE_OUT_OF_MEMORY;
1327	p = (char *)newp;
1328	while(*p) {
1329	/* make sure percent encoded are lower case */
1330	if((*p == '%') && ISXDIGIT(p[1]) && ISXDIGIT(p[2]) &&
1331	(ISUPPER(p[1]) \|\| ISUPPER(p[2]))) {
1332	p[1] = (char)TOLOWER(p[1]);
1333	p[2] = (char)TOLOWER(p[2]);
1334	p += 3;
1335	}
1336	else
1337	p++;
1338	}
1339	}
1340
1341	if(appendquery) {
1342	/* Append the string onto the old query. Add a '&' separator if none is
1343	present at the end of the exsting query already */
1344	size_t querylen = u->query ? strlen(u->query) : 0;
1345	bool addamperand = querylen && (u->query[querylen -1] != '&');
1346	if(querylen) {
1347	size_t newplen = strlen(newp);
1348	char *p = malloc(querylen + addamperand + newplen + 1);
1349	if(!p) {
1350	free((char *)newp);
1351	return CURLUE_OUT_OF_MEMORY;
1352	}
1353	strcpy(p, u->query); /* original query */
1354	if(addamperand)
1355	p[querylen] = '&'; /* ampersand */
1356	strcpy(&p[querylen + addamperand], newp); /* new suffix */
1357	free((char *)newp);
1358	free(*storep);
1359	*storep = p;
1360	return CURLUE_OK;
1361	}
1362	}
1363
1364	free(*storep);
1365	storep = (char )newp;
1366	}
1367	/* set after the string, to make it not assigned if the allocation above
1368	fails */
1369	if(port)
1370	u->portnum = port;
1371	return CURLUE_OK;
1372	}

Note: See TracBrowser for help on using the repository browser.

source: vbox/trunk/src/libs/curl-7.64.0/lib/urlapi.c@ 94601

Download in other formats: