parserInternals.c@ 63001

Last change on this file since 63001 was 58072, checked in by vboxsync, 9 years ago
libxml 2.9.2 unmodified
Property svn:eol-style set to `native`
File size: 61.1 KB

Line
1	/*
2	* parserInternals.c : Internal routines (and obsolete ones) needed for the
3	* XML and HTML parsers.
4	*
5	* See Copyright for the status of this software.
6	*
7	* [email protected]
8	*/
9
10	#define IN_LIBXML
11	#include "libxml.h"
12
13	#if defined(WIN32) && !defined (__CYGWIN__)
14	#define XML_DIR_SEP '\\'
15	#else
16	#define XML_DIR_SEP '/'
17	#endif
18
19	#include <string.h>
20	#ifdef HAVE_CTYPE_H
21	#include <ctype.h>
22	#endif
23	#ifdef HAVE_STDLIB_H
24	#include <stdlib.h>
25	#endif
26	#ifdef HAVE_SYS_STAT_H
27	#include <sys/stat.h>
28	#endif
29	#ifdef HAVE_FCNTL_H
30	#include <fcntl.h>
31	#endif
32	#ifdef HAVE_UNISTD_H
33	#include <unistd.h>
34	#endif
35	#ifdef HAVE_ZLIB_H
36	#include <zlib.h>
37	#endif
38
39	#include <libxml/xmlmemory.h>
40	#include <libxml/tree.h>
41	#include <libxml/parser.h>
42	#include <libxml/parserInternals.h>
43	#include <libxml/valid.h>
44	#include <libxml/entities.h>
45	#include <libxml/xmlerror.h>
46	#include <libxml/encoding.h>
47	#include <libxml/valid.h>
48	#include <libxml/xmlIO.h>
49	#include <libxml/uri.h>
50	#include <libxml/dict.h>
51	#include <libxml/SAX.h>
52	#ifdef LIBXML_CATALOG_ENABLED
53	#include <libxml/catalog.h>
54	#endif
55	#include <libxml/globals.h>
56	#include <libxml/chvalid.h>
57
58	#include "buf.h"
59	#include "enc.h"
60
61	/*
62	* Various global defaults for parsing
63	*/
64
65	/**
66	* xmlCheckVersion:
67	* @version: the include version number
68	*
69	* check the compiled lib version against the include one.
70	* This can warn or immediately kill the application
71	*/
72	void
73	xmlCheckVersion(int version) {
74	int myversion = (int) LIBXML_VERSION;
75
76	xmlInitParser();
77
78	if ((myversion / 10000) != (version / 10000)) {
79	xmlGenericError(xmlGenericErrorContext,
80	"Fatal: program compiled against libxml %d using libxml %d\n",
81	(version / 10000), (myversion / 10000));
82	fprintf(stderr,
83	"Fatal: program compiled against libxml %d using libxml %d\n",
84	(version / 10000), (myversion / 10000));
85	}
86	if ((myversion / 100) < (version / 100)) {
87	xmlGenericError(xmlGenericErrorContext,
88	"Warning: program compiled against libxml %d using older %d\n",
89	(version / 100), (myversion / 100));
90	}
91	}
92
93
94	/************************************************************************
95	* *
96	* Some factorized error routines *
97	* *
98	************************************************************************/
99
100
101	/**
102	* xmlErrMemory:
103	* @ctxt: an XML parser context
104	* @extra: extra informations
105	*
106	* Handle a redefinition of attribute error
107	*/
108	void
109	xmlErrMemory(xmlParserCtxtPtr ctxt, const char *extra)
110	{
111	if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
112	(ctxt->instate == XML_PARSER_EOF))
113	return;
114	if (ctxt != NULL) {
115	ctxt->errNo = XML_ERR_NO_MEMORY;
116	ctxt->instate = XML_PARSER_EOF;
117	ctxt->disableSAX = 1;
118	}
119	if (extra)
120	__xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
121	XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0, extra,
122	NULL, NULL, 0, 0,
123	"Memory allocation failed : %s\n", extra);
124	else
125	__xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
126	XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0, NULL,
127	NULL, NULL, 0, 0, "Memory allocation failed\n");
128	}
129
130	/**
131	* __xmlErrEncoding:
132	* @ctxt: an XML parser context
133	* @xmlerr: the error number
134	* @msg: the error message
135	* @str1: an string info
136	* @str2: an string info
137	*
138	* Handle an encoding error
139	*/
140	void
141	__xmlErrEncoding(xmlParserCtxtPtr ctxt, xmlParserErrors xmlerr,
142	const char msg, const xmlChar str1, const xmlChar * str2)
143	{
144	if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
145	(ctxt->instate == XML_PARSER_EOF))
146	return;
147	if (ctxt != NULL)
148	ctxt->errNo = xmlerr;
149	__xmlRaiseError(NULL, NULL, NULL,
150	ctxt, NULL, XML_FROM_PARSER, xmlerr, XML_ERR_FATAL,
151	NULL, 0, (const char ) str1, (const char ) str2,
152	NULL, 0, 0, msg, str1, str2);
153	if (ctxt != NULL) {
154	ctxt->wellFormed = 0;
155	if (ctxt->recovery == 0)
156	ctxt->disableSAX = 1;
157	}
158	}
159
160	/**
161	* xmlErrInternal:
162	* @ctxt: an XML parser context
163	* @msg: the error message
164	* @str: error informations
165	*
166	* Handle an internal error
167	*/
168	static void
169	xmlErrInternal(xmlParserCtxtPtr ctxt, const char msg, const xmlChar str)
170	{
171	if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
172	(ctxt->instate == XML_PARSER_EOF))
173	return;
174	if (ctxt != NULL)
175	ctxt->errNo = XML_ERR_INTERNAL_ERROR;
176	__xmlRaiseError(NULL, NULL, NULL,
177	ctxt, NULL, XML_FROM_PARSER, XML_ERR_INTERNAL_ERROR,
178	XML_ERR_FATAL, NULL, 0, (const char *) str, NULL, NULL,
179	0, 0, msg, str);
180	if (ctxt != NULL) {
181	ctxt->wellFormed = 0;
182	if (ctxt->recovery == 0)
183	ctxt->disableSAX = 1;
184	}
185	}
186
187	/**
188	* xmlErrEncodingInt:
189	* @ctxt: an XML parser context
190	* @error: the error number
191	* @msg: the error message
192	* @val: an integer value
193	*
194	* n encoding error
195	*/
196	static void
197	xmlErrEncodingInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
198	const char *msg, int val)
199	{
200	if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
201	(ctxt->instate == XML_PARSER_EOF))
202	return;
203	if (ctxt != NULL)
204	ctxt->errNo = error;
205	__xmlRaiseError(NULL, NULL, NULL,
206	ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
207	NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
208	if (ctxt != NULL) {
209	ctxt->wellFormed = 0;
210	if (ctxt->recovery == 0)
211	ctxt->disableSAX = 1;
212	}
213	}
214
215	/**
216	* xmlIsLetter:
217	* @c: an unicode character (int)
218	*
219	* Check whether the character is allowed by the production
220	* [84] Letter ::= BaseChar \| Ideographic
221	*
222	* Returns 0 if not, non-zero otherwise
223	*/
224	int
225	xmlIsLetter(int c) {
226	return(IS_BASECHAR(c) \|\| IS_IDEOGRAPHIC(c));
227	}
228
229	/************************************************************************
230	* *
231	* Input handling functions for progressive parsing *
232	* *
233	************************************************************************/
234
235	/* #define DEBUG_INPUT */
236	/* #define DEBUG_STACK */
237	/* #define DEBUG_PUSH */
238
239
240	/* we need to keep enough input to show errors in context */
241	#define LINE_LEN 80
242
243	#ifdef DEBUG_INPUT
244	#define CHECK_BUFFER(in) check_buffer(in)
245
246	static
247	void check_buffer(xmlParserInputPtr in) {
248	if (in->base != xmlBufContent(in->buf->buffer)) {
249	xmlGenericError(xmlGenericErrorContext,
250	"xmlParserInput: base mismatch problem\n");
251	}
252	if (in->cur < in->base) {
253	xmlGenericError(xmlGenericErrorContext,
254	"xmlParserInput: cur < base problem\n");
255	}
256	if (in->cur > in->base + xmlBufUse(in->buf->buffer)) {
257	xmlGenericError(xmlGenericErrorContext,
258	"xmlParserInput: cur > base + use problem\n");
259	}
260	xmlGenericError(xmlGenericErrorContext,"buffer %x : content %x, cur %d, use %d\n",
261	(int) in, (int) xmlBufContent(in->buf->buffer), in->cur - in->base,
262	xmlBufUse(in->buf->buffer));
263	}
264
265	#else
266	#define CHECK_BUFFER(in)
267	#endif
268
269
270	/**
271	* xmlParserInputRead:
272	* @in: an XML parser input
273	* @len: an indicative size for the lookahead
274	*
275	* This function was internal and is deprecated.
276	*
277	* Returns -1 as this is an error to use it.
278	*/
279	int
280	xmlParserInputRead(xmlParserInputPtr in ATTRIBUTE_UNUSED, int len ATTRIBUTE_UNUSED) {
281	return(-1);
282	}
283
284	/**
285	* xmlParserInputGrow:
286	* @in: an XML parser input
287	* @len: an indicative size for the lookahead
288	*
289	* This function increase the input for the parser. It tries to
290	* preserve pointers to the input buffer, and keep already read data
291	*
292	* Returns the amount of char read, or -1 in case of error, 0 indicate the
293	* end of this entity
294	*/
295	int
296	xmlParserInputGrow(xmlParserInputPtr in, int len) {
297	size_t ret;
298	size_t indx;
299	const xmlChar *content;
300
301	if ((in == NULL) \|\| (len < 0)) return(-1);
302	#ifdef DEBUG_INPUT
303	xmlGenericError(xmlGenericErrorContext, "Grow\n");
304	#endif
305	if (in->buf == NULL) return(-1);
306	if (in->base == NULL) return(-1);
307	if (in->cur == NULL) return(-1);
308	if (in->buf->buffer == NULL) return(-1);
309
310	CHECK_BUFFER(in);
311
312	indx = in->cur - in->base;
313	if (xmlBufUse(in->buf->buffer) > (unsigned int) indx + INPUT_CHUNK) {
314
315	CHECK_BUFFER(in);
316
317	return(0);
318	}
319	if (in->buf->readcallback != NULL) {
320	ret = xmlParserInputBufferGrow(in->buf, len);
321	} else
322	return(0);
323
324	/*
325	* NOTE : in->base may be a "dangling" i.e. freed pointer in this
326	* block, but we use it really as an integer to do some
327	* pointer arithmetic. Insure will raise it as a bug but in
328	* that specific case, that's not !
329	*/
330
331	content = xmlBufContent(in->buf->buffer);
332	if (in->base != content) {
333	/*
334	* the buffer has been reallocated
335	*/
336	indx = in->cur - in->base;
337	in->base = content;
338	in->cur = &content[indx];
339	}
340	in->end = xmlBufEnd(in->buf->buffer);
341
342	CHECK_BUFFER(in);
343
344	return(ret);
345	}
346
347	/**
348	* xmlParserInputShrink:
349	* @in: an XML parser input
350	*
351	* This function removes used input for the parser.
352	*/
353	void
354	xmlParserInputShrink(xmlParserInputPtr in) {
355	size_t used;
356	size_t ret;
357	size_t indx;
358	const xmlChar *content;
359
360	#ifdef DEBUG_INPUT
361	xmlGenericError(xmlGenericErrorContext, "Shrink\n");
362	#endif
363	if (in == NULL) return;
364	if (in->buf == NULL) return;
365	if (in->base == NULL) return;
366	if (in->cur == NULL) return;
367	if (in->buf->buffer == NULL) return;
368
369	CHECK_BUFFER(in);
370
371	used = in->cur - xmlBufContent(in->buf->buffer);
372	/*
373	* Do not shrink on large buffers whose only a tiny fraction
374	* was consumed
375	*/
376	if (used > INPUT_CHUNK) {
377	ret = xmlBufShrink(in->buf->buffer, used - LINE_LEN);
378	if (ret > 0) {
379	in->cur -= ret;
380	in->consumed += ret;
381	}
382	in->end = xmlBufEnd(in->buf->buffer);
383	}
384
385	CHECK_BUFFER(in);
386
387	if (xmlBufUse(in->buf->buffer) > INPUT_CHUNK) {
388	return;
389	}
390	xmlParserInputBufferRead(in->buf, 2 * INPUT_CHUNK);
391	content = xmlBufContent(in->buf->buffer);
392	if (in->base != content) {
393	/*
394	* the buffer has been reallocated
395	*/
396	indx = in->cur - in->base;
397	in->base = content;
398	in->cur = &content[indx];
399	}
400	in->end = xmlBufEnd(in->buf->buffer);
401
402	CHECK_BUFFER(in);
403	}
404
405	/************************************************************************
406	* *
407	* UTF8 character input and related functions *
408	* *
409	************************************************************************/
410
411	/**
412	* xmlNextChar:
413	* @ctxt: the XML parser context
414	*
415	* Skip to the next char input char.
416	*/
417
418	void
419	xmlNextChar(xmlParserCtxtPtr ctxt)
420	{
421	if ((ctxt == NULL) \|\| (ctxt->instate == XML_PARSER_EOF) \|\|
422	(ctxt->input == NULL))
423	return;
424
425	if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
426	if ((*ctxt->input->cur == 0) &&
427	(xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0) &&
428	(ctxt->instate != XML_PARSER_COMMENT)) {
429	/*
430	* If we are at the end of the current entity and
431	* the context allows it, we pop consumed entities
432	* automatically.
433	* the auto closing should be blocked in other cases
434	*/
435	xmlPopInput(ctxt);
436	} else {
437	const unsigned char *cur;
438	unsigned char c;
439
440	/*
441	* 2.11 End-of-Line Handling
442	* the literal two-character sequence "#xD#xA" or a standalone
443	* literal #xD, an XML processor must pass to the application
444	* the single character #xA.
445	*/
446	if (*(ctxt->input->cur) == '\n') {
447	ctxt->input->line++; ctxt->input->col = 1;
448	} else
449	ctxt->input->col++;
450
451	/*
452	* We are supposed to handle UTF8, check it's valid
453	* From rfc2044: encoding of the Unicode values on UTF-8:
454	*
455	* UCS-4 range (hex.) UTF-8 octet sequence (binary)
456	* 0000 0000-0000 007F 0xxxxxxx
457	* 0000 0080-0000 07FF 110xxxxx 10xxxxxx
458	* 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
459	*
460	* Check for the 0x110000 limit too
461	*/
462	cur = ctxt->input->cur;
463
464	c = *cur;
465	if (c & 0x80) {
466	if (c == 0xC0)
467	goto encoding_error;
468	if (cur[1] == 0) {
469	xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
470	cur = ctxt->input->cur;
471	}
472	if ((cur[1] & 0xc0) != 0x80)
473	goto encoding_error;
474	if ((c & 0xe0) == 0xe0) {
475	unsigned int val;
476
477	if (cur[2] == 0) {
478	xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
479	cur = ctxt->input->cur;
480	}
481	if ((cur[2] & 0xc0) != 0x80)
482	goto encoding_error;
483	if ((c & 0xf0) == 0xf0) {
484	if (cur[3] == 0) {
485	xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
486	cur = ctxt->input->cur;
487	}
488	if (((c & 0xf8) != 0xf0) \|\|
489	((cur[3] & 0xc0) != 0x80))
490	goto encoding_error;
491	/* 4-byte code */
492	ctxt->input->cur += 4;
493	val = (cur[0] & 0x7) << 18;
494	val \|= (cur[1] & 0x3f) << 12;
495	val \|= (cur[2] & 0x3f) << 6;
496	val \|= cur[3] & 0x3f;
497	} else {
498	/* 3-byte code */
499	ctxt->input->cur += 3;
500	val = (cur[0] & 0xf) << 12;
501	val \|= (cur[1] & 0x3f) << 6;
502	val \|= cur[2] & 0x3f;
503	}
504	if (((val > 0xd7ff) && (val < 0xe000)) \|\|
505	((val > 0xfffd) && (val < 0x10000)) \|\|
506	(val >= 0x110000)) {
507	xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR,
508	"Char 0x%X out of allowed range\n",
509	val);
510	}
511	} else
512	/* 2-byte code */
513	ctxt->input->cur += 2;
514	} else
515	/* 1-byte code */
516	ctxt->input->cur++;
517
518	ctxt->nbChars++;
519	if (*ctxt->input->cur == 0)
520	xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
521	}
522	} else {
523	/*
524	* Assume it's a fixed length encoding (1) with
525	* a compatible encoding for the ASCII set, since
526	* XML constructs only use < 128 chars
527	*/
528
529	if (*(ctxt->input->cur) == '\n') {
530	ctxt->input->line++; ctxt->input->col = 1;
531	} else
532	ctxt->input->col++;
533	ctxt->input->cur++;
534	ctxt->nbChars++;
535	if (*ctxt->input->cur == 0)
536	xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
537	}
538	if ((*ctxt->input->cur == '%') && (!ctxt->html))
539	xmlParserHandlePEReference(ctxt);
540	if ((*ctxt->input->cur == 0) &&
541	(xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
542	xmlPopInput(ctxt);
543	return;
544	encoding_error:
545	/*
546	* If we detect an UTF8 error that probably mean that the
547	* input encoding didn't get properly advertised in the
548	* declaration header. Report the error and switch the encoding
549	* to ISO-Latin-1 (if you don't like this policy, just declare the
550	* encoding !)
551	*/
552	if ((ctxt == NULL) \|\| (ctxt->input == NULL) \|\|
553	(ctxt->input->end - ctxt->input->cur < 4)) {
554	__xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
555	"Input is not proper UTF-8, indicate encoding !\n",
556	NULL, NULL);
557	} else {
558	char buffer[150];
559
560	snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
561	ctxt->input->cur[0], ctxt->input->cur[1],
562	ctxt->input->cur[2], ctxt->input->cur[3]);
563	__xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
564	"Input is not proper UTF-8, indicate encoding !\n%s",
565	BAD_CAST buffer, NULL);
566	}
567	ctxt->charset = XML_CHAR_ENCODING_8859_1;
568	ctxt->input->cur++;
569	return;
570	}
571
572	/**
573	* xmlCurrentChar:
574	* @ctxt: the XML parser context
575	* @len: pointer to the length of the char read
576	*
577	* The current char value, if using UTF-8 this may actually span multiple
578	* bytes in the input buffer. Implement the end of line normalization:
579	* 2.11 End-of-Line Handling
580	* Wherever an external parsed entity or the literal entity value
581	* of an internal parsed entity contains either the literal two-character
582	* sequence "#xD#xA" or a standalone literal #xD, an XML processor
583	* must pass to the application the single character #xA.
584	* This behavior can conveniently be produced by normalizing all
585	* line breaks to #xA on input, before parsing.)
586	*
587	* Returns the current char value and its length
588	*/
589
590	int
591	xmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) {
592	if ((ctxt == NULL) \|\| (len == NULL) \|\| (ctxt->input == NULL)) return(0);
593	if (ctxt->instate == XML_PARSER_EOF)
594	return(0);
595
596	if ((ctxt->input->cur >= 0x20) && (ctxt->input->cur <= 0x7F)) {
597	*len = 1;
598	return((int) *ctxt->input->cur);
599	}
600	if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
601	/*
602	* We are supposed to handle UTF8, check it's valid
603	* From rfc2044: encoding of the Unicode values on UTF-8:
604	*
605	* UCS-4 range (hex.) UTF-8 octet sequence (binary)
606	* 0000 0000-0000 007F 0xxxxxxx
607	* 0000 0080-0000 07FF 110xxxxx 10xxxxxx
608	* 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
609	*
610	* Check for the 0x110000 limit too
611	*/
612	const unsigned char *cur = ctxt->input->cur;
613	unsigned char c;
614	unsigned int val;
615
616	c = *cur;
617	if (c & 0x80) {
618	if (((c & 0x40) == 0) \|\| (c == 0xC0))
619	goto encoding_error;
620	if (cur[1] == 0) {
621	xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
622	cur = ctxt->input->cur;
623	}
624	if ((cur[1] & 0xc0) != 0x80)
625	goto encoding_error;
626	if ((c & 0xe0) == 0xe0) {
627	if (cur[2] == 0) {
628	xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
629	cur = ctxt->input->cur;
630	}
631	if ((cur[2] & 0xc0) != 0x80)
632	goto encoding_error;
633	if ((c & 0xf0) == 0xf0) {
634	if (cur[3] == 0) {
635	xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
636	cur = ctxt->input->cur;
637	}
638	if (((c & 0xf8) != 0xf0) \|\|
639	((cur[3] & 0xc0) != 0x80))
640	goto encoding_error;
641	/* 4-byte code */
642	*len = 4;
643	val = (cur[0] & 0x7) << 18;
644	val \|= (cur[1] & 0x3f) << 12;
645	val \|= (cur[2] & 0x3f) << 6;
646	val \|= cur[3] & 0x3f;
647	if (val < 0x10000)
648	goto encoding_error;
649	} else {
650	/* 3-byte code */
651	*len = 3;
652	val = (cur[0] & 0xf) << 12;
653	val \|= (cur[1] & 0x3f) << 6;
654	val \|= cur[2] & 0x3f;
655	if (val < 0x800)
656	goto encoding_error;
657	}
658	} else {
659	/* 2-byte code */
660	*len = 2;
661	val = (cur[0] & 0x1f) << 6;
662	val \|= cur[1] & 0x3f;
663	if (val < 0x80)
664	goto encoding_error;
665	}
666	if (!IS_CHAR(val)) {
667	xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR,
668	"Char 0x%X out of allowed range\n", val);
669	}
670	return(val);
671	} else {
672	/* 1-byte code */
673	*len = 1;
674	if (*ctxt->input->cur == 0)
675	xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
676	if ((*ctxt->input->cur == 0) &&
677	(ctxt->input->end > ctxt->input->cur)) {
678	xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR,
679	"Char 0x0 out of allowed range\n", 0);
680	}
681	if (*ctxt->input->cur == 0xD) {
682	if (ctxt->input->cur[1] == 0xA) {
683	ctxt->nbChars++;
684	ctxt->input->cur++;
685	}
686	return(0xA);
687	}
688	return((int) *ctxt->input->cur);
689	}
690	}
691	/*
692	* Assume it's a fixed length encoding (1) with
693	* a compatible encoding for the ASCII set, since
694	* XML constructs only use < 128 chars
695	*/
696	*len = 1;
697	if (*ctxt->input->cur == 0xD) {
698	if (ctxt->input->cur[1] == 0xA) {
699	ctxt->nbChars++;
700	ctxt->input->cur++;
701	}
702	return(0xA);
703	}
704	return((int) *ctxt->input->cur);
705	encoding_error:
706	/*
707	* An encoding problem may arise from a truncated input buffer
708	* splitting a character in the middle. In that case do not raise
709	* an error but return 0 to endicate an end of stream problem
710	*/
711	if (ctxt->input->end - ctxt->input->cur < 4) {
712	*len = 0;
713	return(0);
714	}
715
716	/*
717	* If we detect an UTF8 error that probably mean that the
718	* input encoding didn't get properly advertised in the
719	* declaration header. Report the error and switch the encoding
720	* to ISO-Latin-1 (if you don't like this policy, just declare the
721	* encoding !)
722	*/
723	{
724	char buffer[150];
725
726	snprintf(&buffer[0], 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
727	ctxt->input->cur[0], ctxt->input->cur[1],
728	ctxt->input->cur[2], ctxt->input->cur[3]);
729	__xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
730	"Input is not proper UTF-8, indicate encoding !\n%s",
731	BAD_CAST buffer, NULL);
732	}
733	ctxt->charset = XML_CHAR_ENCODING_8859_1;
734	*len = 1;
735	return((int) *ctxt->input->cur);
736	}
737
738	/**
739	* xmlStringCurrentChar:
740	* @ctxt: the XML parser context
741	* @cur: pointer to the beginning of the char
742	* @len: pointer to the length of the char read
743	*
744	* The current char value, if using UTF-8 this may actually span multiple
745	* bytes in the input buffer.
746	*
747	* Returns the current char value and its length
748	*/
749
750	int
751	xmlStringCurrentChar(xmlParserCtxtPtr ctxt, const xmlChar * cur, int *len)
752	{
753	if ((len == NULL) \|\| (cur == NULL)) return(0);
754	if ((ctxt == NULL) \|\| (ctxt->charset == XML_CHAR_ENCODING_UTF8)) {
755	/*
756	* We are supposed to handle UTF8, check it's valid
757	* From rfc2044: encoding of the Unicode values on UTF-8:
758	*
759	* UCS-4 range (hex.) UTF-8 octet sequence (binary)
760	* 0000 0000-0000 007F 0xxxxxxx
761	* 0000 0080-0000 07FF 110xxxxx 10xxxxxx
762	* 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
763	*
764	* Check for the 0x110000 limit too
765	*/
766	unsigned char c;
767	unsigned int val;
768
769	c = *cur;
770	if (c & 0x80) {
771	if ((cur[1] & 0xc0) != 0x80)
772	goto encoding_error;
773	if ((c & 0xe0) == 0xe0) {
774
775	if ((cur[2] & 0xc0) != 0x80)
776	goto encoding_error;
777	if ((c & 0xf0) == 0xf0) {
778	if (((c & 0xf8) != 0xf0) \|\| ((cur[3] & 0xc0) != 0x80))
779	goto encoding_error;
780	/* 4-byte code */
781	*len = 4;
782	val = (cur[0] & 0x7) << 18;
783	val \|= (cur[1] & 0x3f) << 12;
784	val \|= (cur[2] & 0x3f) << 6;
785	val \|= cur[3] & 0x3f;
786	} else {
787	/* 3-byte code */
788	*len = 3;
789	val = (cur[0] & 0xf) << 12;
790	val \|= (cur[1] & 0x3f) << 6;
791	val \|= cur[2] & 0x3f;
792	}
793	} else {
794	/* 2-byte code */
795	*len = 2;
796	val = (cur[0] & 0x1f) << 6;
797	val \|= cur[1] & 0x3f;
798	}
799	if (!IS_CHAR(val)) {
800	xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR,
801	"Char 0x%X out of allowed range\n", val);
802	}
803	return (val);
804	} else {
805	/* 1-byte code */
806	*len = 1;
807	return ((int) *cur);
808	}
809	}
810	/*
811	* Assume it's a fixed length encoding (1) with
812	* a compatible encoding for the ASCII set, since
813	* XML constructs only use < 128 chars
814	*/
815	*len = 1;
816	return ((int) *cur);
817	encoding_error:
818
819	/*
820	* An encoding problem may arise from a truncated input buffer
821	* splitting a character in the middle. In that case do not raise
822	* an error but return 0 to endicate an end of stream problem
823	*/
824	if ((ctxt == NULL) \|\| (ctxt->input == NULL) \|\|
825	(ctxt->input->end - ctxt->input->cur < 4)) {
826	*len = 0;
827	return(0);
828	}
829	/*
830	* If we detect an UTF8 error that probably mean that the
831	* input encoding didn't get properly advertised in the
832	* declaration header. Report the error and switch the encoding
833	* to ISO-Latin-1 (if you don't like this policy, just declare the
834	* encoding !)
835	*/
836	{
837	char buffer[150];
838
839	snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
840	ctxt->input->cur[0], ctxt->input->cur[1],
841	ctxt->input->cur[2], ctxt->input->cur[3]);
842	__xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
843	"Input is not proper UTF-8, indicate encoding !\n%s",
844	BAD_CAST buffer, NULL);
845	}
846	*len = 1;
847	return ((int) *cur);
848	}
849
850	/**
851	* xmlCopyCharMultiByte:
852	* @out: pointer to an array of xmlChar
853	* @val: the char value
854	*
855	* append the char value in the array
856	*
857	* Returns the number of xmlChar written
858	*/
859	int
860	xmlCopyCharMultiByte(xmlChar *out, int val) {
861	if (out == NULL) return(0);
862	/*
863	* We are supposed to handle UTF8, check it's valid
864	* From rfc2044: encoding of the Unicode values on UTF-8:
865	*
866	* UCS-4 range (hex.) UTF-8 octet sequence (binary)
867	* 0000 0000-0000 007F 0xxxxxxx
868	* 0000 0080-0000 07FF 110xxxxx 10xxxxxx
869	* 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
870	*/
871	if (val >= 0x80) {
872	xmlChar *savedout = out;
873	int bits;
874	if (val < 0x800) { *out++= (val >> 6) \| 0xC0; bits= 0; }
875	else if (val < 0x10000) { *out++= (val >> 12) \| 0xE0; bits= 6;}
876	else if (val < 0x110000) { *out++= (val >> 18) \| 0xF0; bits= 12; }
877	else {
878	xmlErrEncodingInt(NULL, XML_ERR_INVALID_CHAR,
879	"Internal error, xmlCopyCharMultiByte 0x%X out of bound\n",
880	val);
881	return(0);
882	}
883	for ( ; bits >= 0; bits-= 6)
884	*out++= ((val >> bits) & 0x3F) \| 0x80 ;
885	return (out - savedout);
886	}
887	*out = (xmlChar) val;
888	return 1;
889	}
890
891	/**
892	* xmlCopyChar:
893	* @len: Ignored, compatibility
894	* @out: pointer to an array of xmlChar
895	* @val: the char value
896	*
897	* append the char value in the array
898	*
899	* Returns the number of xmlChar written
900	*/
901
902	int
903	xmlCopyChar(int len ATTRIBUTE_UNUSED, xmlChar *out, int val) {
904	if (out == NULL) return(0);
905	/* the len parameter is ignored */
906	if (val >= 0x80) {
907	return(xmlCopyCharMultiByte (out, val));
908	}
909	*out = (xmlChar) val;
910	return 1;
911	}
912
913	/************************************************************************
914	* *
915	* Commodity functions to switch encodings *
916	* *
917	************************************************************************/
918
919	static int
920	xmlSwitchToEncodingInt(xmlParserCtxtPtr ctxt,
921	xmlCharEncodingHandlerPtr handler, int len);
922	static int
923	xmlSwitchInputEncodingInt(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
924	xmlCharEncodingHandlerPtr handler, int len);
925	/**
926	* xmlSwitchEncoding:
927	* @ctxt: the parser context
928	* @enc: the encoding value (number)
929	*
930	* change the input functions when discovering the character encoding
931	* of a given entity.
932	*
933	* Returns 0 in case of success, -1 otherwise
934	*/
935	int
936	xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc)
937	{
938	xmlCharEncodingHandlerPtr handler;
939	int len = -1;
940
941	if (ctxt == NULL) return(-1);
942	switch (enc) {
943	case XML_CHAR_ENCODING_ERROR:
944	__xmlErrEncoding(ctxt, XML_ERR_UNKNOWN_ENCODING,
945	"encoding unknown\n", NULL, NULL);
946	return(-1);
947	case XML_CHAR_ENCODING_NONE:
948	/* let's assume it's UTF-8 without the XML decl */
949	ctxt->charset = XML_CHAR_ENCODING_UTF8;
950	return(0);
951	case XML_CHAR_ENCODING_UTF8:
952	/* default encoding, no conversion should be needed */
953	ctxt->charset = XML_CHAR_ENCODING_UTF8;
954
955	/*
956	* Errata on XML-1.0 June 20 2001
957	* Specific handling of the Byte Order Mark for
958	* UTF-8
959	*/
960	if ((ctxt->input != NULL) &&
961	(ctxt->input->cur[0] == 0xEF) &&
962	(ctxt->input->cur[1] == 0xBB) &&
963	(ctxt->input->cur[2] == 0xBF)) {
964	ctxt->input->cur += 3;
965	}
966	return(0);
967	case XML_CHAR_ENCODING_UTF16LE:
968	case XML_CHAR_ENCODING_UTF16BE:
969	/*The raw input characters are encoded
970	*in UTF-16. As we expect this function
971	*to be called after xmlCharEncInFunc, we expect
972	*ctxt->input->cur to contain UTF-8 encoded characters.
973	*So the raw UTF16 Byte Order Mark
974	*has also been converted into
975	*an UTF-8 BOM. Let's skip that BOM.
976	*/
977	if ((ctxt->input != NULL) && (ctxt->input->cur != NULL) &&
978	(ctxt->input->cur[0] == 0xEF) &&
979	(ctxt->input->cur[1] == 0xBB) &&
980	(ctxt->input->cur[2] == 0xBF)) {
981	ctxt->input->cur += 3;
982	}
983	len = 90;
984	break;
985	case XML_CHAR_ENCODING_UCS2:
986	len = 90;
987	break;
988	case XML_CHAR_ENCODING_UCS4BE:
989	case XML_CHAR_ENCODING_UCS4LE:
990	case XML_CHAR_ENCODING_UCS4_2143:
991	case XML_CHAR_ENCODING_UCS4_3412:
992	len = 180;
993	break;
994	case XML_CHAR_ENCODING_EBCDIC:
995	case XML_CHAR_ENCODING_8859_1:
996	case XML_CHAR_ENCODING_8859_2:
997	case XML_CHAR_ENCODING_8859_3:
998	case XML_CHAR_ENCODING_8859_4:
999	case XML_CHAR_ENCODING_8859_5:
1000	case XML_CHAR_ENCODING_8859_6:
1001	case XML_CHAR_ENCODING_8859_7:
1002	case XML_CHAR_ENCODING_8859_8:
1003	case XML_CHAR_ENCODING_8859_9:
1004	case XML_CHAR_ENCODING_ASCII:
1005	case XML_CHAR_ENCODING_2022_JP:
1006	case XML_CHAR_ENCODING_SHIFT_JIS:
1007	case XML_CHAR_ENCODING_EUC_JP:
1008	len = 45;
1009	break;
1010	}
1011	handler = xmlGetCharEncodingHandler(enc);
1012	if (handler == NULL) {
1013	/*
1014	* Default handlers.
1015	*/
1016	switch (enc) {
1017	case XML_CHAR_ENCODING_ASCII:
1018	/* default encoding, no conversion should be needed */
1019	ctxt->charset = XML_CHAR_ENCODING_UTF8;
1020	return(0);
1021	case XML_CHAR_ENCODING_UTF16LE:
1022	break;
1023	case XML_CHAR_ENCODING_UTF16BE:
1024	break;
1025	case XML_CHAR_ENCODING_UCS4LE:
1026	__xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
1027	"encoding not supported %s\n",
1028	BAD_CAST "USC4 little endian", NULL);
1029	break;
1030	case XML_CHAR_ENCODING_UCS4BE:
1031	__xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
1032	"encoding not supported %s\n",
1033	BAD_CAST "USC4 big endian", NULL);
1034	break;
1035	case XML_CHAR_ENCODING_EBCDIC:
1036	__xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
1037	"encoding not supported %s\n",
1038	BAD_CAST "EBCDIC", NULL);
1039	break;
1040	case XML_CHAR_ENCODING_UCS4_2143:
1041	__xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
1042	"encoding not supported %s\n",
1043	BAD_CAST "UCS4 2143", NULL);
1044	break;
1045	case XML_CHAR_ENCODING_UCS4_3412:
1046	__xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
1047	"encoding not supported %s\n",
1048	BAD_CAST "UCS4 3412", NULL);
1049	break;
1050	case XML_CHAR_ENCODING_UCS2:
1051	__xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
1052	"encoding not supported %s\n",
1053	BAD_CAST "UCS2", NULL);
1054	break;
1055	case XML_CHAR_ENCODING_8859_1:
1056	case XML_CHAR_ENCODING_8859_2:
1057	case XML_CHAR_ENCODING_8859_3:
1058	case XML_CHAR_ENCODING_8859_4:
1059	case XML_CHAR_ENCODING_8859_5:
1060	case XML_CHAR_ENCODING_8859_6:
1061	case XML_CHAR_ENCODING_8859_7:
1062	case XML_CHAR_ENCODING_8859_8:
1063	case XML_CHAR_ENCODING_8859_9:
1064	/*
1065	* We used to keep the internal content in the
1066	* document encoding however this turns being unmaintainable
1067	* So xmlGetCharEncodingHandler() will return non-null
1068	* values for this now.
1069	*/
1070	if ((ctxt->inputNr == 1) &&
1071	(ctxt->encoding == NULL) &&
1072	(ctxt->input != NULL) &&
1073	(ctxt->input->encoding != NULL)) {
1074	ctxt->encoding = xmlStrdup(ctxt->input->encoding);
1075	}
1076	ctxt->charset = enc;
1077	return(0);
1078	case XML_CHAR_ENCODING_2022_JP:
1079	__xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
1080	"encoding not supported %s\n",
1081	BAD_CAST "ISO-2022-JP", NULL);
1082	break;
1083	case XML_CHAR_ENCODING_SHIFT_JIS:
1084	__xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
1085	"encoding not supported %s\n",
1086	BAD_CAST "Shift_JIS", NULL);
1087	break;
1088	case XML_CHAR_ENCODING_EUC_JP:
1089	__xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
1090	"encoding not supported %s\n",
1091	BAD_CAST "EUC-JP", NULL);
1092	break;
1093	default:
1094	break;
1095	}
1096	}
1097	if (handler == NULL)
1098	return(-1);
1099	ctxt->charset = XML_CHAR_ENCODING_UTF8;
1100	return(xmlSwitchToEncodingInt(ctxt, handler, len));
1101	}
1102
1103	/**
1104	* xmlSwitchInputEncoding:
1105	* @ctxt: the parser context
1106	* @input: the input stream
1107	* @handler: the encoding handler
1108	* @len: the number of bytes to convert for the first line or -1
1109	*
1110	* change the input functions when discovering the character encoding
1111	* of a given entity.
1112	*
1113	* Returns 0 in case of success, -1 otherwise
1114	*/
1115	static int
1116	xmlSwitchInputEncodingInt(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
1117	xmlCharEncodingHandlerPtr handler, int len)
1118	{
1119	int nbchars;
1120
1121	if (handler == NULL)
1122	return (-1);
1123	if (input == NULL)
1124	return (-1);
1125	if (input->buf != NULL) {
1126	if (input->buf->encoder != NULL) {
1127	/*
1128	* Check in case the auto encoding detetection triggered
1129	* in already.
1130	*/
1131	if (input->buf->encoder == handler)
1132	return (0);
1133
1134	/*
1135	* "UTF-16" can be used for both LE and BE
1136	if ((!xmlStrncmp(BAD_CAST input->buf->encoder->name,
1137	BAD_CAST "UTF-16", 6)) &&
1138	(!xmlStrncmp(BAD_CAST handler->name,
1139	BAD_CAST "UTF-16", 6))) {
1140	return(0);
1141	}
1142	*/
1143
1144	/*
1145	* Note: this is a bit dangerous, but that's what it
1146	* takes to use nearly compatible signature for different
1147	* encodings.
1148	*/
1149	xmlCharEncCloseFunc(input->buf->encoder);
1150	input->buf->encoder = handler;
1151	return (0);
1152	}
1153	input->buf->encoder = handler;
1154
1155	/*
1156	* Is there already some content down the pipe to convert ?
1157	*/
1158	if (xmlBufIsEmpty(input->buf->buffer) == 0) {
1159	int processed;
1160	unsigned int use;
1161
1162	/*
1163	* Specific handling of the Byte Order Mark for
1164	* UTF-16
1165	*/
1166	if ((handler->name != NULL) &&
1167	(!strcmp(handler->name, "UTF-16LE") \|\|
1168	!strcmp(handler->name, "UTF-16")) &&
1169	(input->cur[0] == 0xFF) && (input->cur[1] == 0xFE)) {
1170	input->cur += 2;
1171	}
1172	if ((handler->name != NULL) &&
1173	(!strcmp(handler->name, "UTF-16BE")) &&
1174	(input->cur[0] == 0xFE) && (input->cur[1] == 0xFF)) {
1175	input->cur += 2;
1176	}
1177	/*
1178	* Errata on XML-1.0 June 20 2001
1179	* Specific handling of the Byte Order Mark for
1180	* UTF-8
1181	*/
1182	if ((handler->name != NULL) &&
1183	(!strcmp(handler->name, "UTF-8")) &&
1184	(input->cur[0] == 0xEF) &&
1185	(input->cur[1] == 0xBB) && (input->cur[2] == 0xBF)) {
1186	input->cur += 3;
1187	}
1188
1189	/*
1190	* Shrink the current input buffer.
1191	* Move it as the raw buffer and create a new input buffer
1192	*/
1193	processed = input->cur - input->base;
1194	xmlBufShrink(input->buf->buffer, processed);
1195	input->buf->raw = input->buf->buffer;
1196	input->buf->buffer = xmlBufCreate();
1197	input->buf->rawconsumed = processed;
1198	use = xmlBufUse(input->buf->raw);
1199
1200	if (ctxt->html) {
1201	/*
1202	* convert as much as possible of the buffer
1203	*/
1204	nbchars = xmlCharEncInput(input->buf, 1);
1205	} else {
1206	/*
1207	* convert just enough to get
1208	* '<?xml version="1.0" encoding="xxx"?>'
1209	* parsed with the autodetected encoding
1210	* into the parser reading buffer.
1211	*/
1212	nbchars = xmlCharEncFirstLineInput(input->buf, len);
1213	}
1214	if (nbchars < 0) {
1215	xmlErrInternal(ctxt,
1216	"switching encoding: encoder error\n",
1217	NULL);
1218	return (-1);
1219	}
1220	input->buf->rawconsumed += use - xmlBufUse(input->buf->raw);
1221	xmlBufResetInput(input->buf->buffer, input);
1222	}
1223	return (0);
1224	} else if (input->length == 0) {
1225	/*
1226	* When parsing a static memory array one must know the
1227	* size to be able to convert the buffer.
1228	*/
1229	xmlErrInternal(ctxt, "switching encoding : no input\n", NULL);
1230	return (-1);
1231	}
1232	return (0);
1233	}
1234
1235	/**
1236	* xmlSwitchInputEncoding:
1237	* @ctxt: the parser context
1238	* @input: the input stream
1239	* @handler: the encoding handler
1240	*
1241	* change the input functions when discovering the character encoding
1242	* of a given entity.
1243	*
1244	* Returns 0 in case of success, -1 otherwise
1245	*/
1246	int
1247	xmlSwitchInputEncoding(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
1248	xmlCharEncodingHandlerPtr handler) {
1249	return(xmlSwitchInputEncodingInt(ctxt, input, handler, -1));
1250	}
1251
1252	/**
1253	* xmlSwitchToEncodingInt:
1254	* @ctxt: the parser context
1255	* @handler: the encoding handler
1256	* @len: the length to convert or -1
1257	*
1258	* change the input functions when discovering the character encoding
1259	* of a given entity, and convert only @len bytes of the output, this
1260	* is needed on auto detect to allows any declared encoding later to
1261	* convert the actual content after the xmlDecl
1262	*
1263	* Returns 0 in case of success, -1 otherwise
1264	*/
1265	static int
1266	xmlSwitchToEncodingInt(xmlParserCtxtPtr ctxt,
1267	xmlCharEncodingHandlerPtr handler, int len) {
1268	int ret = 0;
1269
1270	if (handler != NULL) {
1271	if (ctxt->input != NULL) {
1272	ret = xmlSwitchInputEncodingInt(ctxt, ctxt->input, handler, len);
1273	} else {
1274	xmlErrInternal(ctxt, "xmlSwitchToEncoding : no input\n",
1275	NULL);
1276	return(-1);
1277	}
1278	/*
1279	* The parsing is now done in UTF8 natively
1280	*/
1281	ctxt->charset = XML_CHAR_ENCODING_UTF8;
1282	} else
1283	return(-1);
1284	return(ret);
1285	}
1286
1287	/**
1288	* xmlSwitchToEncoding:
1289	* @ctxt: the parser context
1290	* @handler: the encoding handler
1291	*
1292	* change the input functions when discovering the character encoding
1293	* of a given entity.
1294	*
1295	* Returns 0 in case of success, -1 otherwise
1296	*/
1297	int
1298	xmlSwitchToEncoding(xmlParserCtxtPtr ctxt, xmlCharEncodingHandlerPtr handler)
1299	{
1300	return (xmlSwitchToEncodingInt(ctxt, handler, -1));
1301	}
1302
1303	/************************************************************************
1304	* *
1305	* Commodity functions to handle entities processing *
1306	* *
1307	************************************************************************/
1308
1309	/**
1310	* xmlFreeInputStream:
1311	* @input: an xmlParserInputPtr
1312	*
1313	* Free up an input stream.
1314	*/
1315	void
1316	xmlFreeInputStream(xmlParserInputPtr input) {
1317	if (input == NULL) return;
1318
1319	if (input->filename != NULL) xmlFree((char *) input->filename);
1320	if (input->directory != NULL) xmlFree((char *) input->directory);
1321	if (input->encoding != NULL) xmlFree((char *) input->encoding);
1322	if (input->version != NULL) xmlFree((char *) input->version);
1323	if ((input->free != NULL) && (input->base != NULL))
1324	input->free((xmlChar *) input->base);
1325	if (input->buf != NULL)
1326	xmlFreeParserInputBuffer(input->buf);
1327	xmlFree(input);
1328	}
1329
1330	/**
1331	* xmlNewInputStream:
1332	* @ctxt: an XML parser context
1333	*
1334	* Create a new input stream structure.
1335	*
1336	* Returns the new input stream or NULL
1337	*/
1338	xmlParserInputPtr
1339	xmlNewInputStream(xmlParserCtxtPtr ctxt) {
1340	xmlParserInputPtr input;
1341
1342	input = (xmlParserInputPtr) xmlMalloc(sizeof(xmlParserInput));
1343	if (input == NULL) {
1344	xmlErrMemory(ctxt, "couldn't allocate a new input stream\n");
1345	return(NULL);
1346	}
1347	memset(input, 0, sizeof(xmlParserInput));
1348	input->line = 1;
1349	input->col = 1;
1350	input->standalone = -1;
1351
1352	/*
1353	* If the context is NULL the id cannot be initialized, but that
1354	* should not happen while parsing which is the situation where
1355	* the id is actually needed.
1356	*/
1357	if (ctxt != NULL)
1358	input->id = ctxt->input_id++;
1359
1360	return(input);
1361	}
1362
1363	/**
1364	* xmlNewIOInputStream:
1365	* @ctxt: an XML parser context
1366	* @input: an I/O Input
1367	* @enc: the charset encoding if known
1368	*
1369	* Create a new input stream structure encapsulating the @input into
1370	* a stream suitable for the parser.
1371	*
1372	* Returns the new input stream or NULL
1373	*/
1374	xmlParserInputPtr
1375	xmlNewIOInputStream(xmlParserCtxtPtr ctxt, xmlParserInputBufferPtr input,
1376	xmlCharEncoding enc) {
1377	xmlParserInputPtr inputStream;
1378
1379	if (input == NULL) return(NULL);
1380	if (xmlParserDebugEntities)
1381	xmlGenericError(xmlGenericErrorContext, "new input from I/O\n");
1382	inputStream = xmlNewInputStream(ctxt);
1383	if (inputStream == NULL) {
1384	return(NULL);
1385	}
1386	inputStream->filename = NULL;
1387	inputStream->buf = input;
1388	xmlBufResetInput(inputStream->buf->buffer, inputStream);
1389
1390	if (enc != XML_CHAR_ENCODING_NONE) {
1391	xmlSwitchEncoding(ctxt, enc);
1392	}
1393
1394	return(inputStream);
1395	}
1396
1397	/**
1398	* xmlNewEntityInputStream:
1399	* @ctxt: an XML parser context
1400	* @entity: an Entity pointer
1401	*
1402	* Create a new input stream based on an xmlEntityPtr
1403	*
1404	* Returns the new input stream or NULL
1405	*/
1406	xmlParserInputPtr
1407	xmlNewEntityInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
1408	xmlParserInputPtr input;
1409
1410	if (entity == NULL) {
1411	xmlErrInternal(ctxt, "xmlNewEntityInputStream entity = NULL\n",
1412	NULL);
1413	return(NULL);
1414	}
1415	if (xmlParserDebugEntities)
1416	xmlGenericError(xmlGenericErrorContext,
1417	"new input from entity: %s\n", entity->name);
1418	if (entity->content == NULL) {
1419	switch (entity->etype) {
1420	case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY:
1421	xmlErrInternal(ctxt, "Cannot parse entity %s\n",
1422	entity->name);
1423	break;
1424	case XML_EXTERNAL_GENERAL_PARSED_ENTITY:
1425	case XML_EXTERNAL_PARAMETER_ENTITY:
1426	return(xmlLoadExternalEntity((char *) entity->URI,
1427	(char *) entity->ExternalID, ctxt));
1428	case XML_INTERNAL_GENERAL_ENTITY:
1429	xmlErrInternal(ctxt,
1430	"Internal entity %s without content !\n",
1431	entity->name);
1432	break;
1433	case XML_INTERNAL_PARAMETER_ENTITY:
1434	xmlErrInternal(ctxt,
1435	"Internal parameter entity %s without content !\n",
1436	entity->name);
1437	break;
1438	case XML_INTERNAL_PREDEFINED_ENTITY:
1439	xmlErrInternal(ctxt,
1440	"Predefined entity %s without content !\n",
1441	entity->name);
1442	break;
1443	}
1444	return(NULL);
1445	}
1446	input = xmlNewInputStream(ctxt);
1447	if (input == NULL) {
1448	return(NULL);
1449	}
1450	if (entity->URI != NULL)
1451	input->filename = (char ) xmlStrdup((xmlChar ) entity->URI);
1452	input->base = entity->content;
1453	input->cur = entity->content;
1454	input->length = entity->length;
1455	input->end = &entity->content[input->length];
1456	return(input);
1457	}
1458
1459	/**
1460	* xmlNewStringInputStream:
1461	* @ctxt: an XML parser context
1462	* @buffer: an memory buffer
1463	*
1464	* Create a new input stream based on a memory buffer.
1465	* Returns the new input stream
1466	*/
1467	xmlParserInputPtr
1468	xmlNewStringInputStream(xmlParserCtxtPtr ctxt, const xmlChar *buffer) {
1469	xmlParserInputPtr input;
1470
1471	if (buffer == NULL) {
1472	xmlErrInternal(ctxt, "xmlNewStringInputStream string = NULL\n",
1473	NULL);
1474	return(NULL);
1475	}
1476	if (xmlParserDebugEntities)
1477	xmlGenericError(xmlGenericErrorContext,
1478	"new fixed input: %.30s\n", buffer);
1479	input = xmlNewInputStream(ctxt);
1480	if (input == NULL) {
1481	xmlErrMemory(ctxt, "couldn't allocate a new input stream\n");
1482	return(NULL);
1483	}
1484	input->base = buffer;
1485	input->cur = buffer;
1486	input->length = xmlStrlen(buffer);
1487	input->end = &buffer[input->length];
1488	return(input);
1489	}
1490
1491	/**
1492	* xmlNewInputFromFile:
1493	* @ctxt: an XML parser context
1494	* @filename: the filename to use as entity
1495	*
1496	* Create a new input stream based on a file or an URL.
1497	*
1498	* Returns the new input stream or NULL in case of error
1499	*/
1500	xmlParserInputPtr
1501	xmlNewInputFromFile(xmlParserCtxtPtr ctxt, const char *filename) {
1502	xmlParserInputBufferPtr buf;
1503	xmlParserInputPtr inputStream;
1504	char *directory = NULL;
1505	xmlChar *URI = NULL;
1506
1507	if (xmlParserDebugEntities)
1508	xmlGenericError(xmlGenericErrorContext,
1509	"new input from file: %s\n", filename);
1510	if (ctxt == NULL) return(NULL);
1511	buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE);
1512	if (buf == NULL) {
1513	if (filename == NULL)
1514	__xmlLoaderErr(ctxt,
1515	"failed to load external entity: NULL filename \n",
1516	NULL);
1517	else
1518	__xmlLoaderErr(ctxt, "failed to load external entity \"%s\"\n",
1519	(const char *) filename);
1520	return(NULL);
1521	}
1522
1523	inputStream = xmlNewInputStream(ctxt);
1524	if (inputStream == NULL)
1525	return(NULL);
1526
1527	inputStream->buf = buf;
1528	inputStream = xmlCheckHTTPInput(ctxt, inputStream);
1529	if (inputStream == NULL)
1530	return(NULL);
1531
1532	if (inputStream->filename == NULL)
1533	URI = xmlStrdup((xmlChar *) filename);
1534	else
1535	URI = xmlStrdup((xmlChar *) inputStream->filename);
1536	directory = xmlParserGetDirectory((const char *) URI);
1537	if (inputStream->filename != NULL) xmlFree((char *)inputStream->filename);
1538	inputStream->filename = (char ) xmlCanonicPath((const xmlChar ) URI);
1539	if (URI != NULL) xmlFree((char *) URI);
1540	inputStream->directory = directory;
1541
1542	xmlBufResetInput(inputStream->buf->buffer, inputStream);
1543	if ((ctxt->directory == NULL) && (directory != NULL))
1544	ctxt->directory = (char ) xmlStrdup((const xmlChar ) directory);
1545	return(inputStream);
1546	}
1547
1548	/************************************************************************
1549	* *
1550	* Commodity functions to handle parser contexts *
1551	* *
1552	************************************************************************/
1553
1554	/**
1555	* xmlInitParserCtxt:
1556	* @ctxt: an XML parser context
1557	*
1558	* Initialize a parser context
1559	*
1560	* Returns 0 in case of success and -1 in case of error
1561	*/
1562
1563	int
1564	xmlInitParserCtxt(xmlParserCtxtPtr ctxt)
1565	{
1566	xmlParserInputPtr input;
1567
1568	if(ctxt==NULL) {
1569	xmlErrInternal(NULL, "Got NULL parser context\n", NULL);
1570	return(-1);
1571	}
1572
1573	xmlDefaultSAXHandlerInit();
1574
1575	if (ctxt->dict == NULL)
1576	ctxt->dict = xmlDictCreate();
1577	if (ctxt->dict == NULL) {
1578	xmlErrMemory(NULL, "cannot initialize parser context\n");
1579	return(-1);
1580	}
1581	xmlDictSetLimit(ctxt->dict, XML_MAX_DICTIONARY_LIMIT);
1582
1583	if (ctxt->sax == NULL)
1584	ctxt->sax = (xmlSAXHandler *) xmlMalloc(sizeof(xmlSAXHandler));
1585	if (ctxt->sax == NULL) {
1586	xmlErrMemory(NULL, "cannot initialize parser context\n");
1587	return(-1);
1588	}
1589	else
1590	xmlSAXVersion(ctxt->sax, 2);
1591
1592	ctxt->maxatts = 0;
1593	ctxt->atts = NULL;
1594	/* Allocate the Input stack */
1595	if (ctxt->inputTab == NULL) {
1596	ctxt->inputTab = (xmlParserInputPtr *)
1597	xmlMalloc(5 * sizeof(xmlParserInputPtr));
1598	ctxt->inputMax = 5;
1599	}
1600	if (ctxt->inputTab == NULL) {
1601	xmlErrMemory(NULL, "cannot initialize parser context\n");
1602	ctxt->inputNr = 0;
1603	ctxt->inputMax = 0;
1604	ctxt->input = NULL;
1605	return(-1);
1606	}
1607	while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
1608	xmlFreeInputStream(input);
1609	}
1610	ctxt->inputNr = 0;
1611	ctxt->input = NULL;
1612
1613	ctxt->version = NULL;
1614	ctxt->encoding = NULL;
1615	ctxt->standalone = -1;
1616	ctxt->hasExternalSubset = 0;
1617	ctxt->hasPErefs = 0;
1618	ctxt->html = 0;
1619	ctxt->external = 0;
1620	ctxt->instate = XML_PARSER_START;
1621	ctxt->token = 0;
1622	ctxt->directory = NULL;
1623
1624	/* Allocate the Node stack */
1625	if (ctxt->nodeTab == NULL) {
1626	ctxt->nodeTab = (xmlNodePtr ) xmlMalloc(10 sizeof(xmlNodePtr));
1627	ctxt->nodeMax = 10;
1628	}
1629	if (ctxt->nodeTab == NULL) {
1630	xmlErrMemory(NULL, "cannot initialize parser context\n");
1631	ctxt->nodeNr = 0;
1632	ctxt->nodeMax = 0;
1633	ctxt->node = NULL;
1634	ctxt->inputNr = 0;
1635	ctxt->inputMax = 0;
1636	ctxt->input = NULL;
1637	return(-1);
1638	}
1639	ctxt->nodeNr = 0;
1640	ctxt->node = NULL;
1641
1642	/* Allocate the Name stack */
1643	if (ctxt->nameTab == NULL) {
1644	ctxt->nameTab = (const xmlChar *) xmlMalloc(10 sizeof(xmlChar *));
1645	ctxt->nameMax = 10;
1646	}
1647	if (ctxt->nameTab == NULL) {
1648	xmlErrMemory(NULL, "cannot initialize parser context\n");
1649	ctxt->nodeNr = 0;
1650	ctxt->nodeMax = 0;
1651	ctxt->node = NULL;
1652	ctxt->inputNr = 0;
1653	ctxt->inputMax = 0;
1654	ctxt->input = NULL;
1655	ctxt->nameNr = 0;
1656	ctxt->nameMax = 0;
1657	ctxt->name = NULL;
1658	return(-1);
1659	}
1660	ctxt->nameNr = 0;
1661	ctxt->name = NULL;
1662
1663	/* Allocate the space stack */
1664	if (ctxt->spaceTab == NULL) {
1665	ctxt->spaceTab = (int ) xmlMalloc(10 sizeof(int));
1666	ctxt->spaceMax = 10;
1667	}
1668	if (ctxt->spaceTab == NULL) {
1669	xmlErrMemory(NULL, "cannot initialize parser context\n");
1670	ctxt->nodeNr = 0;
1671	ctxt->nodeMax = 0;
1672	ctxt->node = NULL;
1673	ctxt->inputNr = 0;
1674	ctxt->inputMax = 0;
1675	ctxt->input = NULL;
1676	ctxt->nameNr = 0;
1677	ctxt->nameMax = 0;
1678	ctxt->name = NULL;
1679	ctxt->spaceNr = 0;
1680	ctxt->spaceMax = 0;
1681	ctxt->space = NULL;
1682	return(-1);
1683	}
1684	ctxt->spaceNr = 1;
1685	ctxt->spaceMax = 10;
1686	ctxt->spaceTab[0] = -1;
1687	ctxt->space = &ctxt->spaceTab[0];
1688	ctxt->userData = ctxt;
1689	ctxt->myDoc = NULL;
1690	ctxt->wellFormed = 1;
1691	ctxt->nsWellFormed = 1;
1692	ctxt->valid = 1;
1693	ctxt->loadsubset = xmlLoadExtDtdDefaultValue;
1694	if (ctxt->loadsubset) {
1695	ctxt->options \|= XML_PARSE_DTDLOAD;
1696	}
1697	ctxt->validate = xmlDoValidityCheckingDefaultValue;
1698	ctxt->pedantic = xmlPedanticParserDefaultValue;
1699	if (ctxt->pedantic) {
1700	ctxt->options \|= XML_PARSE_PEDANTIC;
1701	}
1702	ctxt->linenumbers = xmlLineNumbersDefaultValue;
1703	ctxt->keepBlanks = xmlKeepBlanksDefaultValue;
1704	if (ctxt->keepBlanks == 0) {
1705	ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
1706	ctxt->options \|= XML_PARSE_NOBLANKS;
1707	}
1708
1709	ctxt->vctxt.finishDtd = XML_CTXT_FINISH_DTD_0;
1710	ctxt->vctxt.userData = ctxt;
1711	ctxt->vctxt.error = xmlParserValidityError;
1712	ctxt->vctxt.warning = xmlParserValidityWarning;
1713	if (ctxt->validate) {
1714	if (xmlGetWarningsDefaultValue == 0)
1715	ctxt->vctxt.warning = NULL;
1716	else
1717	ctxt->vctxt.warning = xmlParserValidityWarning;
1718	ctxt->vctxt.nodeMax = 0;
1719	ctxt->options \|= XML_PARSE_DTDVALID;
1720	}
1721	ctxt->replaceEntities = xmlSubstituteEntitiesDefaultValue;
1722	if (ctxt->replaceEntities) {
1723	ctxt->options \|= XML_PARSE_NOENT;
1724	}
1725	ctxt->record_info = 0;
1726	ctxt->nbChars = 0;
1727	ctxt->checkIndex = 0;
1728	ctxt->inSubset = 0;
1729	ctxt->errNo = XML_ERR_OK;
1730	ctxt->depth = 0;
1731	ctxt->charset = XML_CHAR_ENCODING_UTF8;
1732	ctxt->catalogs = NULL;
1733	ctxt->nbentities = 0;
1734	ctxt->sizeentities = 0;
1735	ctxt->sizeentcopy = 0;
1736	ctxt->input_id = 1;
1737	xmlInitNodeInfoSeq(&ctxt->node_seq);
1738	return(0);
1739	}
1740
1741	/**
1742	* xmlFreeParserCtxt:
1743	* @ctxt: an XML parser context
1744	*
1745	* Free all the memory used by a parser context. However the parsed
1746	* document in ctxt->myDoc is not freed.
1747	*/
1748
1749	void
1750	xmlFreeParserCtxt(xmlParserCtxtPtr ctxt)
1751	{
1752	xmlParserInputPtr input;
1753
1754	if (ctxt == NULL) return;
1755
1756	while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
1757	xmlFreeInputStream(input);
1758	}
1759	if (ctxt->spaceTab != NULL) xmlFree(ctxt->spaceTab);
1760	if (ctxt->nameTab != NULL) xmlFree((xmlChar * *)ctxt->nameTab);
1761	if (ctxt->nodeTab != NULL) xmlFree(ctxt->nodeTab);
1762	if (ctxt->nodeInfoTab != NULL) xmlFree(ctxt->nodeInfoTab);
1763	if (ctxt->inputTab != NULL) xmlFree(ctxt->inputTab);
1764	if (ctxt->version != NULL) xmlFree((char *) ctxt->version);
1765	if (ctxt->encoding != NULL) xmlFree((char *) ctxt->encoding);
1766	if (ctxt->extSubURI != NULL) xmlFree((char *) ctxt->extSubURI);
1767	if (ctxt->extSubSystem != NULL) xmlFree((char *) ctxt->extSubSystem);
1768	#ifdef LIBXML_SAX1_ENABLED
1769	if ((ctxt->sax != NULL) &&
1770	(ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler))
1771	#else
1772	if (ctxt->sax != NULL)
1773	#endif /* LIBXML_SAX1_ENABLED */
1774	xmlFree(ctxt->sax);
1775	if (ctxt->directory != NULL) xmlFree((char *) ctxt->directory);
1776	if (ctxt->vctxt.nodeTab != NULL) xmlFree(ctxt->vctxt.nodeTab);
1777	if (ctxt->atts != NULL) xmlFree((xmlChar * *)ctxt->atts);
1778	if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
1779	if (ctxt->nsTab != NULL) xmlFree((char *) ctxt->nsTab);
1780	if (ctxt->pushTab != NULL) xmlFree(ctxt->pushTab);
1781	if (ctxt->attallocs != NULL) xmlFree(ctxt->attallocs);
1782	if (ctxt->attsDefault != NULL)
1783	xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree);
1784	if (ctxt->attsSpecial != NULL)
1785	xmlHashFree(ctxt->attsSpecial, NULL);
1786	if (ctxt->freeElems != NULL) {
1787	xmlNodePtr cur, next;
1788
1789	cur = ctxt->freeElems;
1790	while (cur != NULL) {
1791	next = cur->next;
1792	xmlFree(cur);
1793	cur = next;
1794	}
1795	}
1796	if (ctxt->freeAttrs != NULL) {
1797	xmlAttrPtr cur, next;
1798
1799	cur = ctxt->freeAttrs;
1800	while (cur != NULL) {
1801	next = cur->next;
1802	xmlFree(cur);
1803	cur = next;
1804	}
1805	}
1806	/*
1807	* cleanup the error strings
1808	*/
1809	if (ctxt->lastError.message != NULL)
1810	xmlFree(ctxt->lastError.message);
1811	if (ctxt->lastError.file != NULL)
1812	xmlFree(ctxt->lastError.file);
1813	if (ctxt->lastError.str1 != NULL)
1814	xmlFree(ctxt->lastError.str1);
1815	if (ctxt->lastError.str2 != NULL)
1816	xmlFree(ctxt->lastError.str2);
1817	if (ctxt->lastError.str3 != NULL)
1818	xmlFree(ctxt->lastError.str3);
1819
1820	#ifdef LIBXML_CATALOG_ENABLED
1821	if (ctxt->catalogs != NULL)
1822	xmlCatalogFreeLocal(ctxt->catalogs);
1823	#endif
1824	xmlFree(ctxt);
1825	}
1826
1827	/**
1828	* xmlNewParserCtxt:
1829	*
1830	* Allocate and initialize a new parser context.
1831	*
1832	* Returns the xmlParserCtxtPtr or NULL
1833	*/
1834
1835	xmlParserCtxtPtr
1836	xmlNewParserCtxt(void)
1837	{
1838	xmlParserCtxtPtr ctxt;
1839
1840	ctxt = (xmlParserCtxtPtr) xmlMalloc(sizeof(xmlParserCtxt));
1841	if (ctxt == NULL) {
1842	xmlErrMemory(NULL, "cannot allocate parser context\n");
1843	return(NULL);
1844	}
1845	memset(ctxt, 0, sizeof(xmlParserCtxt));
1846	if (xmlInitParserCtxt(ctxt) < 0) {
1847	xmlFreeParserCtxt(ctxt);
1848	return(NULL);
1849	}
1850	return(ctxt);
1851	}
1852
1853	/************************************************************************
1854	* *
1855	* Handling of node informations *
1856	* *
1857	************************************************************************/
1858
1859	/**
1860	* xmlClearParserCtxt:
1861	* @ctxt: an XML parser context
1862	*
1863	* Clear (release owned resources) and reinitialize a parser context
1864	*/
1865
1866	void
1867	xmlClearParserCtxt(xmlParserCtxtPtr ctxt)
1868	{
1869	if (ctxt==NULL)
1870	return;
1871	xmlClearNodeInfoSeq(&ctxt->node_seq);
1872	xmlCtxtReset(ctxt);
1873	}
1874
1875
1876	/**
1877	* xmlParserFindNodeInfo:
1878	* @ctx: an XML parser context
1879	* @node: an XML node within the tree
1880	*
1881	* Find the parser node info struct for a given node
1882	*
1883	* Returns an xmlParserNodeInfo block pointer or NULL
1884	*/
1885	const xmlParserNodeInfo *
1886	xmlParserFindNodeInfo(const xmlParserCtxtPtr ctx, const xmlNodePtr node)
1887	{
1888	unsigned long pos;
1889
1890	if ((ctx == NULL) \|\| (node == NULL))
1891	return (NULL);
1892	/* Find position where node should be at */
1893	pos = xmlParserFindNodeInfoIndex(&ctx->node_seq, node);
1894	if (pos < ctx->node_seq.length
1895	&& ctx->node_seq.buffer[pos].node == node)
1896	return &ctx->node_seq.buffer[pos];
1897	else
1898	return NULL;
1899	}
1900
1901
1902	/**
1903	* xmlInitNodeInfoSeq:
1904	* @seq: a node info sequence pointer
1905	*
1906	* -- Initialize (set to initial state) node info sequence
1907	*/
1908	void
1909	xmlInitNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
1910	{
1911	if (seq == NULL)
1912	return;
1913	seq->length = 0;
1914	seq->maximum = 0;
1915	seq->buffer = NULL;
1916	}
1917
1918	/**
1919	* xmlClearNodeInfoSeq:
1920	* @seq: a node info sequence pointer
1921	*
1922	* -- Clear (release memory and reinitialize) node
1923	* info sequence
1924	*/
1925	void
1926	xmlClearNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
1927	{
1928	if (seq == NULL)
1929	return;
1930	if (seq->buffer != NULL)
1931	xmlFree(seq->buffer);
1932	xmlInitNodeInfoSeq(seq);
1933	}
1934
1935	/**
1936	* xmlParserFindNodeInfoIndex:
1937	* @seq: a node info sequence pointer
1938	* @node: an XML node pointer
1939	*
1940	*
1941	* xmlParserFindNodeInfoIndex : Find the index that the info record for
1942	* the given node is or should be at in a sorted sequence
1943	*
1944	* Returns a long indicating the position of the record
1945	*/
1946	unsigned long
1947	xmlParserFindNodeInfoIndex(const xmlParserNodeInfoSeqPtr seq,
1948	const xmlNodePtr node)
1949	{
1950	unsigned long upper, lower, middle;
1951	int found = 0;
1952
1953	if ((seq == NULL) \|\| (node == NULL))
1954	return ((unsigned long) -1);
1955
1956	/* Do a binary search for the key */
1957	lower = 1;
1958	upper = seq->length;
1959	middle = 0;
1960	while (lower <= upper && !found) {
1961	middle = lower + (upper - lower) / 2;
1962	if (node == seq->buffer[middle - 1].node)
1963	found = 1;
1964	else if (node < seq->buffer[middle - 1].node)
1965	upper = middle - 1;
1966	else
1967	lower = middle + 1;
1968	}
1969
1970	/* Return position */
1971	if (middle == 0 \|\| seq->buffer[middle - 1].node < node)
1972	return middle;
1973	else
1974	return middle - 1;
1975	}
1976
1977
1978	/**
1979	* xmlParserAddNodeInfo:
1980	* @ctxt: an XML parser context
1981	* @info: a node info sequence pointer
1982	*
1983	* Insert node info record into the sorted sequence
1984	*/
1985	void
1986	xmlParserAddNodeInfo(xmlParserCtxtPtr ctxt,
1987	const xmlParserNodeInfoPtr info)
1988	{
1989	unsigned long pos;
1990
1991	if ((ctxt == NULL) \|\| (info == NULL)) return;
1992
1993	/* Find pos and check to see if node is already in the sequence */
1994	pos = xmlParserFindNodeInfoIndex(&ctxt->node_seq, (xmlNodePtr)
1995	info->node);
1996
1997	if ((pos < ctxt->node_seq.length) &&
1998	(ctxt->node_seq.buffer != NULL) &&
1999	(ctxt->node_seq.buffer[pos].node == info->node)) {
2000	ctxt->node_seq.buffer[pos] = *info;
2001	}
2002
2003	/* Otherwise, we need to add new node to buffer */
2004	else {
2005	if ((ctxt->node_seq.length + 1 > ctxt->node_seq.maximum) \|\|
2006	(ctxt->node_seq.buffer == NULL)) {
2007	xmlParserNodeInfo *tmp_buffer;
2008	unsigned int byte_size;
2009
2010	if (ctxt->node_seq.maximum == 0)
2011	ctxt->node_seq.maximum = 2;
2012	byte_size = (sizeof(ctxt->node_seq.buffer)
2013	(2 * ctxt->node_seq.maximum));
2014
2015	if (ctxt->node_seq.buffer == NULL)
2016	tmp_buffer = (xmlParserNodeInfo *) xmlMalloc(byte_size);
2017	else
2018	tmp_buffer =
2019	(xmlParserNodeInfo *) xmlRealloc(ctxt->node_seq.buffer,
2020	byte_size);
2021
2022	if (tmp_buffer == NULL) {
2023	xmlErrMemory(ctxt, "failed to allocate buffer\n");
2024	return;
2025	}
2026	ctxt->node_seq.buffer = tmp_buffer;
2027	ctxt->node_seq.maximum *= 2;
2028	}
2029
2030	/* If position is not at end, move elements out of the way */
2031	if (pos != ctxt->node_seq.length) {
2032	unsigned long i;
2033
2034	for (i = ctxt->node_seq.length; i > pos; i--)
2035	ctxt->node_seq.buffer[i] = ctxt->node_seq.buffer[i - 1];
2036	}
2037
2038	/* Copy element and increase length */
2039	ctxt->node_seq.buffer[pos] = *info;
2040	ctxt->node_seq.length++;
2041	}
2042	}
2043
2044	/************************************************************************
2045	* *
2046	* Defaults settings *
2047	* *
2048	************************************************************************/
2049	/**
2050	* xmlPedanticParserDefault:
2051	* @val: int 0 or 1
2052	*
2053	* Set and return the previous value for enabling pedantic warnings.
2054	*
2055	* Returns the last value for 0 for no substitution, 1 for substitution.
2056	*/
2057
2058	int
2059	xmlPedanticParserDefault(int val) {
2060	int old = xmlPedanticParserDefaultValue;
2061
2062	xmlPedanticParserDefaultValue = val;
2063	return(old);
2064	}
2065
2066	/**
2067	* xmlLineNumbersDefault:
2068	* @val: int 0 or 1
2069	*
2070	* Set and return the previous value for enabling line numbers in elements
2071	* contents. This may break on old application and is turned off by default.
2072	*
2073	* Returns the last value for 0 for no substitution, 1 for substitution.
2074	*/
2075
2076	int
2077	xmlLineNumbersDefault(int val) {
2078	int old = xmlLineNumbersDefaultValue;
2079
2080	xmlLineNumbersDefaultValue = val;
2081	return(old);
2082	}
2083
2084	/**
2085	* xmlSubstituteEntitiesDefault:
2086	* @val: int 0 or 1
2087	*
2088	* Set and return the previous value for default entity support.
2089	* Initially the parser always keep entity references instead of substituting
2090	* entity values in the output. This function has to be used to change the
2091	* default parser behavior
2092	* SAX::substituteEntities() has to be used for changing that on a file by
2093	* file basis.
2094	*
2095	* Returns the last value for 0 for no substitution, 1 for substitution.
2096	*/
2097
2098	int
2099	xmlSubstituteEntitiesDefault(int val) {
2100	int old = xmlSubstituteEntitiesDefaultValue;
2101
2102	xmlSubstituteEntitiesDefaultValue = val;
2103	return(old);
2104	}
2105
2106	/**
2107	* xmlKeepBlanksDefault:
2108	* @val: int 0 or 1
2109	*
2110	* Set and return the previous value for default blanks text nodes support.
2111	* The 1.x version of the parser used an heuristic to try to detect
2112	* ignorable white spaces. As a result the SAX callback was generating
2113	* xmlSAX2IgnorableWhitespace() callbacks instead of characters() one, and when
2114	* using the DOM output text nodes containing those blanks were not generated.
2115	* The 2.x and later version will switch to the XML standard way and
2116	* ignorableWhitespace() are only generated when running the parser in
2117	* validating mode and when the current element doesn't allow CDATA or
2118	* mixed content.
2119	* This function is provided as a way to force the standard behavior
2120	* on 1.X libs and to switch back to the old mode for compatibility when
2121	* running 1.X client code on 2.X . Upgrade of 1.X code should be done
2122	* by using xmlIsBlankNode() commodity function to detect the "empty"
2123	* nodes generated.
2124	* This value also affect autogeneration of indentation when saving code
2125	* if blanks sections are kept, indentation is not generated.
2126	*
2127	* Returns the last value for 0 for no substitution, 1 for substitution.
2128	*/
2129
2130	int
2131	xmlKeepBlanksDefault(int val) {
2132	int old = xmlKeepBlanksDefaultValue;
2133
2134	xmlKeepBlanksDefaultValue = val;
2135	if (!val) xmlIndentTreeOutput = 1;
2136	return(old);
2137	}
2138
2139	#define bottom_parserInternals
2140	#include "elfgcchack.h"

Note: See TracBrowser for help on using the repository browser.

source: vbox/trunk/src/libs/libxml2-2.9.2/parserInternals.c@ 63001

Download in other formats: