VirtualBox

source: vbox/trunk/src/libs/libxml2-2.13.2/parser.c@ 107713

Last change on this file since 107713 was 105420, checked in by vboxsync, 7 months ago

libxml2-2.12.6: Applied and adjusted our libxml2 changes to 2.12.6. bugref:10730

  • Property svn:eol-style set to native
File size: 382.3 KB
Line 
1/*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
15 * high level APIs to call the parser and a few miscellaneous functions.
16 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
25 * from the SAX callbacks or as standalone functions using a preparsed
26 * document.
27 *
28 * See Copyright for the status of this software.
29 *
30 * [email protected]
31 */
32
33/* To avoid EBCDIC trouble when parsing on zOS */
34#if defined(__MVS__)
35#pragma convert("ISO8859-1")
36#endif
37
38#define IN_LIBXML
39#include "libxml.h"
40
41#if defined(_WIN32)
42#define XML_DIR_SEP '\\'
43#else
44#define XML_DIR_SEP '/'
45#endif
46
47#include <stdlib.h>
48#include <limits.h>
49#include <string.h>
50#include <stdarg.h>
51#include <stddef.h>
52#include <ctype.h>
53#include <stdlib.h>
54#include <libxml/parser.h>
55#include <libxml/xmlmemory.h>
56#include <libxml/tree.h>
57#include <libxml/parserInternals.h>
58#include <libxml/valid.h>
59#include <libxml/entities.h>
60#include <libxml/xmlerror.h>
61#include <libxml/encoding.h>
62#include <libxml/xmlIO.h>
63#include <libxml/uri.h>
64#include <libxml/SAX2.h>
65#ifdef LIBXML_CATALOG_ENABLED
66#include <libxml/catalog.h>
67#endif
68
69#include "private/buf.h"
70#include "private/dict.h"
71#include "private/entities.h"
72#include "private/error.h"
73#include "private/html.h"
74#include "private/io.h"
75#include "private/parser.h"
76
77#define NS_INDEX_EMPTY INT_MAX
78#define NS_INDEX_XML (INT_MAX - 1)
79#define URI_HASH_EMPTY 0xD943A04E
80#define URI_HASH_XML 0xF0451F02
81
82#ifndef STDIN_FILENO
83 #define STDIN_FILENO 0
84#endif
85
86struct _xmlStartTag {
87 const xmlChar *prefix;
88 const xmlChar *URI;
89 int line;
90 int nsNr;
91};
92
93typedef struct {
94 void *saxData;
95 unsigned prefixHashValue;
96 unsigned uriHashValue;
97 unsigned elementId;
98 int oldIndex;
99} xmlParserNsExtra;
100
101typedef struct {
102 unsigned hashValue;
103 int index;
104} xmlParserNsBucket;
105
106struct _xmlParserNsData {
107 xmlParserNsExtra *extra;
108
109 unsigned hashSize;
110 unsigned hashElems;
111 xmlParserNsBucket *hash;
112
113 unsigned elementId;
114 int defaultNsIndex;
115 int minNsIndex;
116};
117
118struct _xmlAttrHashBucket {
119 int index;
120};
121
122static int
123xmlParseElementStart(xmlParserCtxtPtr ctxt);
124
125static void
126xmlParseElementEnd(xmlParserCtxtPtr ctxt);
127
128static xmlEntityPtr
129xmlLookupGeneralEntity(xmlParserCtxtPtr ctxt, const xmlChar *name, int inAttr);
130
131static const xmlChar *
132xmlParseEntityRefInternal(xmlParserCtxtPtr ctxt);
133
134/************************************************************************
135 * *
136 * Arbitrary limits set in the parser. See XML_PARSE_HUGE *
137 * *
138 ************************************************************************/
139
140#define XML_PARSER_BIG_ENTITY 1000
141#define XML_PARSER_LOT_ENTITY 5000
142
143/*
144 * Constants for protection against abusive entity expansion
145 * ("billion laughs").
146 */
147
148/*
149 * A certain amount of entity expansion which is always allowed.
150 */
151#define XML_PARSER_ALLOWED_EXPANSION 1000000
152
153/*
154 * Fixed cost for each entity reference. This crudely models processing time
155 * as well to protect, for example, against exponential expansion of empty
156 * or very short entities.
157 */
158#define XML_ENT_FIXED_COST 20
159
160/**
161 * xmlParserMaxDepth:
162 *
163 * arbitrary depth limit for the XML documents that we allow to
164 * process. This is not a limitation of the parser but a safety
165 * boundary feature. It can be disabled with the XML_PARSE_HUGE
166 * parser option.
167 */
168const unsigned int xmlParserMaxDepth = 256;
169
170
171
172#define XML_PARSER_BIG_BUFFER_SIZE 300
173#define XML_PARSER_BUFFER_SIZE 100
174#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
175
176/**
177 * XML_PARSER_CHUNK_SIZE
178 *
179 * When calling GROW that's the minimal amount of data
180 * the parser expected to have received. It is not a hard
181 * limit but an optimization when reading strings like Names
182 * It is not strictly needed as long as inputs available characters
183 * are followed by 0, which should be provided by the I/O level
184 */
185#define XML_PARSER_CHUNK_SIZE 100
186
187/**
188 * xmlParserVersion:
189 *
190 * Constant string describing the internal version of the library
191 */
192const char *const
193xmlParserVersion = LIBXML_VERSION_STRING LIBXML_VERSION_EXTRA;
194
195/*
196 * List of XML prefixed PI allowed by W3C specs
197 */
198
199static const char* const xmlW3CPIs[] = {
200 "xml-stylesheet",
201 "xml-model",
202 NULL
203};
204
205
206/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
207static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
208 const xmlChar **str);
209
210static void
211xmlCtxtParseEntity(xmlParserCtxtPtr ctxt, xmlEntityPtr ent);
212
213static int
214xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
215
216/************************************************************************
217 * *
218 * Some factorized error routines *
219 * *
220 ************************************************************************/
221
222static void
223xmlErrMemory(xmlParserCtxtPtr ctxt) {
224 xmlCtxtErrMemory(ctxt);
225}
226
227/**
228 * xmlErrAttributeDup:
229 * @ctxt: an XML parser context
230 * @prefix: the attribute prefix
231 * @localname: the attribute localname
232 *
233 * Handle a redefinition of attribute error
234 */
235static void
236xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
237 const xmlChar * localname)
238{
239 if (prefix == NULL)
240 xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, XML_ERR_ATTRIBUTE_REDEFINED,
241 XML_ERR_FATAL, localname, NULL, NULL, 0,
242 "Attribute %s redefined\n", localname);
243 else
244 xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, XML_ERR_ATTRIBUTE_REDEFINED,
245 XML_ERR_FATAL, prefix, localname, NULL, 0,
246 "Attribute %s:%s redefined\n", prefix, localname);
247}
248
249/**
250 * xmlFatalErrMsg:
251 * @ctxt: an XML parser context
252 * @error: the error number
253 * @msg: the error message
254 *
255 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
256 */
257static void LIBXML_ATTR_FORMAT(3,0)
258xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
259 const char *msg)
260{
261 xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
262 NULL, NULL, NULL, 0, "%s", msg);
263}
264
265/**
266 * xmlWarningMsg:
267 * @ctxt: an XML parser context
268 * @error: the error number
269 * @msg: the error message
270 * @str1: extra data
271 * @str2: extra data
272 *
273 * Handle a warning.
274 */
275void LIBXML_ATTR_FORMAT(3,0)
276xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
277 const char *msg, const xmlChar *str1, const xmlChar *str2)
278{
279 xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_WARNING,
280 str1, str2, NULL, 0, msg, str1, str2);
281}
282
283/**
284 * xmlValidityError:
285 * @ctxt: an XML parser context
286 * @error: the error number
287 * @msg: the error message
288 * @str1: extra data
289 *
290 * Handle a validity error.
291 */
292static void LIBXML_ATTR_FORMAT(3,0)
293xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
294 const char *msg, const xmlChar *str1, const xmlChar *str2)
295{
296 ctxt->valid = 0;
297
298 xmlCtxtErr(ctxt, NULL, XML_FROM_DTD, error, XML_ERR_ERROR,
299 str1, str2, NULL, 0, msg, str1, str2);
300}
301
302/**
303 * xmlFatalErrMsgInt:
304 * @ctxt: an XML parser context
305 * @error: the error number
306 * @msg: the error message
307 * @val: an integer value
308 *
309 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
310 */
311static void LIBXML_ATTR_FORMAT(3,0)
312xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
313 const char *msg, int val)
314{
315 xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
316 NULL, NULL, NULL, val, msg, val);
317}
318
319/**
320 * xmlFatalErrMsgStrIntStr:
321 * @ctxt: an XML parser context
322 * @error: the error number
323 * @msg: the error message
324 * @str1: an string info
325 * @val: an integer value
326 * @str2: an string info
327 *
328 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
329 */
330static void LIBXML_ATTR_FORMAT(3,0)
331xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
332 const char *msg, const xmlChar *str1, int val,
333 const xmlChar *str2)
334{
335 xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
336 str1, str2, NULL, val, msg, str1, val, str2);
337}
338
339/**
340 * xmlFatalErrMsgStr:
341 * @ctxt: an XML parser context
342 * @error: the error number
343 * @msg: the error message
344 * @val: a string value
345 *
346 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
347 */
348static void LIBXML_ATTR_FORMAT(3,0)
349xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
350 const char *msg, const xmlChar * val)
351{
352 xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
353 val, NULL, NULL, 0, msg, val);
354}
355
356/**
357 * xmlErrMsgStr:
358 * @ctxt: an XML parser context
359 * @error: the error number
360 * @msg: the error message
361 * @val: a string value
362 *
363 * Handle a non fatal parser error
364 */
365static void LIBXML_ATTR_FORMAT(3,0)
366xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
367 const char *msg, const xmlChar * val)
368{
369 xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_ERROR,
370 val, NULL, NULL, 0, msg, val);
371}
372
373/**
374 * xmlNsErr:
375 * @ctxt: an XML parser context
376 * @error: the error number
377 * @msg: the message
378 * @info1: extra information string
379 * @info2: extra information string
380 *
381 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
382 */
383static void LIBXML_ATTR_FORMAT(3,0)
384xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
385 const char *msg,
386 const xmlChar * info1, const xmlChar * info2,
387 const xmlChar * info3)
388{
389 ctxt->nsWellFormed = 0;
390
391 xmlCtxtErr(ctxt, NULL, XML_FROM_NAMESPACE, error, XML_ERR_ERROR,
392 info1, info2, info3, 0, msg, info1, info2, info3);
393}
394
395/**
396 * xmlNsWarn
397 * @ctxt: an XML parser context
398 * @error: the error number
399 * @msg: the message
400 * @info1: extra information string
401 * @info2: extra information string
402 *
403 * Handle a namespace warning error
404 */
405static void LIBXML_ATTR_FORMAT(3,0)
406xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
407 const char *msg,
408 const xmlChar * info1, const xmlChar * info2,
409 const xmlChar * info3)
410{
411 xmlCtxtErr(ctxt, NULL, XML_FROM_NAMESPACE, error, XML_ERR_WARNING,
412 info1, info2, info3, 0, msg, info1, info2, info3);
413}
414
415static void
416xmlSaturatedAdd(unsigned long *dst, unsigned long val) {
417 if (val > ULONG_MAX - *dst)
418 *dst = ULONG_MAX;
419 else
420 *dst += val;
421}
422
423static void
424xmlSaturatedAddSizeT(unsigned long *dst, unsigned long val) {
425 if (val > ULONG_MAX - *dst)
426 *dst = ULONG_MAX;
427 else
428 *dst += val;
429}
430
431/**
432 * xmlParserEntityCheck:
433 * @ctxt: parser context
434 * @extra: sum of unexpanded entity sizes
435 *
436 * Check for non-linear entity expansion behaviour.
437 *
438 * In some cases like xmlExpandEntityInAttValue, this function is called
439 * for each, possibly nested entity and its unexpanded content length.
440 *
441 * In other cases like xmlParseReference, it's only called for each
442 * top-level entity with its unexpanded content length plus the sum of
443 * the unexpanded content lengths (plus fixed cost) of all nested
444 * entities.
445 *
446 * Summing the unexpanded lengths also adds the length of the reference.
447 * This is by design. Taking the length of the entity name into account
448 * discourages attacks that try to waste CPU time with abusively long
449 * entity names. See test/recurse/lol6.xml for example. Each call also
450 * adds some fixed cost XML_ENT_FIXED_COST to discourage attacks with
451 * short entities.
452 *
453 * Returns 1 on error, 0 on success.
454 */
455static int
456xmlParserEntityCheck(xmlParserCtxtPtr ctxt, unsigned long extra)
457{
458 unsigned long consumed;
459 unsigned long *expandedSize;
460 xmlParserInputPtr input = ctxt->input;
461 xmlEntityPtr entity = input->entity;
462
463 if ((entity) && (entity->flags & XML_ENT_CHECKED))
464 return(0);
465
466 /*
467 * Compute total consumed bytes so far, including input streams of
468 * external entities.
469 */
470 consumed = input->consumed;
471 xmlSaturatedAddSizeT(&consumed, input->cur - input->base);
472 xmlSaturatedAdd(&consumed, ctxt->sizeentities);
473
474 if (entity)
475 expandedSize = &entity->expandedSize;
476 else
477 expandedSize = &ctxt->sizeentcopy;
478
479 /*
480 * Add extra cost and some fixed cost.
481 */
482 xmlSaturatedAdd(expandedSize, extra);
483 xmlSaturatedAdd(expandedSize, XML_ENT_FIXED_COST);
484
485 /*
486 * It's important to always use saturation arithmetic when tracking
487 * entity sizes to make the size checks reliable. If "sizeentcopy"
488 * overflows, we have to abort.
489 */
490 if ((*expandedSize > XML_PARSER_ALLOWED_EXPANSION) &&
491 ((*expandedSize >= ULONG_MAX) ||
492 (*expandedSize / ctxt->maxAmpl > consumed))) {
493 xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT,
494 "Maximum entity amplification factor exceeded, see "
495 "xmlCtxtSetMaxAmplification.\n");
496 xmlHaltParser(ctxt);
497 return(1);
498 }
499
500 return(0);
501}
502
503/************************************************************************
504 * *
505 * Library wide options *
506 * *
507 ************************************************************************/
508
509/**
510 * xmlHasFeature:
511 * @feature: the feature to be examined
512 *
513 * Examines if the library has been compiled with a given feature.
514 *
515 * Returns a non-zero value if the feature exist, otherwise zero.
516 * Returns zero (0) if the feature does not exist or an unknown
517 * unknown feature is requested, non-zero otherwise.
518 */
519int
520xmlHasFeature(xmlFeature feature)
521{
522 switch (feature) {
523 case XML_WITH_THREAD:
524#ifdef LIBXML_THREAD_ENABLED
525 return(1);
526#else
527 return(0);
528#endif
529 case XML_WITH_TREE:
530#ifdef LIBXML_TREE_ENABLED
531 return(1);
532#else
533 return(0);
534#endif
535 case XML_WITH_OUTPUT:
536#ifdef LIBXML_OUTPUT_ENABLED
537 return(1);
538#else
539 return(0);
540#endif
541 case XML_WITH_PUSH:
542#ifdef LIBXML_PUSH_ENABLED
543 return(1);
544#else
545 return(0);
546#endif
547 case XML_WITH_READER:
548#ifdef LIBXML_READER_ENABLED
549 return(1);
550#else
551 return(0);
552#endif
553 case XML_WITH_PATTERN:
554#ifdef LIBXML_PATTERN_ENABLED
555 return(1);
556#else
557 return(0);
558#endif
559 case XML_WITH_WRITER:
560#ifdef LIBXML_WRITER_ENABLED
561 return(1);
562#else
563 return(0);
564#endif
565 case XML_WITH_SAX1:
566#ifdef LIBXML_SAX1_ENABLED
567 return(1);
568#else
569 return(0);
570#endif
571 case XML_WITH_FTP:
572#ifdef LIBXML_FTP_ENABLED
573 return(1);
574#else
575 return(0);
576#endif
577 case XML_WITH_HTTP:
578#ifdef LIBXML_HTTP_ENABLED
579 return(1);
580#else
581 return(0);
582#endif
583 case XML_WITH_VALID:
584#ifdef LIBXML_VALID_ENABLED
585 return(1);
586#else
587 return(0);
588#endif
589 case XML_WITH_HTML:
590#ifdef LIBXML_HTML_ENABLED
591 return(1);
592#else
593 return(0);
594#endif
595 case XML_WITH_LEGACY:
596#ifdef LIBXML_LEGACY_ENABLED
597 return(1);
598#else
599 return(0);
600#endif
601 case XML_WITH_C14N:
602#ifdef LIBXML_C14N_ENABLED
603 return(1);
604#else
605 return(0);
606#endif
607 case XML_WITH_CATALOG:
608#ifdef LIBXML_CATALOG_ENABLED
609 return(1);
610#else
611 return(0);
612#endif
613 case XML_WITH_XPATH:
614#ifdef LIBXML_XPATH_ENABLED
615 return(1);
616#else
617 return(0);
618#endif
619 case XML_WITH_XPTR:
620#ifdef LIBXML_XPTR_ENABLED
621 return(1);
622#else
623 return(0);
624#endif
625 case XML_WITH_XINCLUDE:
626#ifdef LIBXML_XINCLUDE_ENABLED
627 return(1);
628#else
629 return(0);
630#endif
631 case XML_WITH_ICONV:
632#ifdef LIBXML_ICONV_ENABLED
633 return(1);
634#else
635 return(0);
636#endif
637 case XML_WITH_ISO8859X:
638#ifdef LIBXML_ISO8859X_ENABLED
639 return(1);
640#else
641 return(0);
642#endif
643 case XML_WITH_UNICODE:
644#ifdef LIBXML_UNICODE_ENABLED
645 return(1);
646#else
647 return(0);
648#endif
649 case XML_WITH_REGEXP:
650#ifdef LIBXML_REGEXP_ENABLED
651 return(1);
652#else
653 return(0);
654#endif
655 case XML_WITH_AUTOMATA:
656#ifdef LIBXML_AUTOMATA_ENABLED
657 return(1);
658#else
659 return(0);
660#endif
661 case XML_WITH_EXPR:
662#ifdef LIBXML_EXPR_ENABLED
663 return(1);
664#else
665 return(0);
666#endif
667 case XML_WITH_SCHEMAS:
668#ifdef LIBXML_SCHEMAS_ENABLED
669 return(1);
670#else
671 return(0);
672#endif
673 case XML_WITH_SCHEMATRON:
674#ifdef LIBXML_SCHEMATRON_ENABLED
675 return(1);
676#else
677 return(0);
678#endif
679 case XML_WITH_MODULES:
680#ifdef LIBXML_MODULES_ENABLED
681 return(1);
682#else
683 return(0);
684#endif
685 case XML_WITH_DEBUG:
686#ifdef LIBXML_DEBUG_ENABLED
687 return(1);
688#else
689 return(0);
690#endif
691 case XML_WITH_DEBUG_MEM:
692 return(0);
693 case XML_WITH_ZLIB:
694#ifdef LIBXML_ZLIB_ENABLED
695 return(1);
696#else
697 return(0);
698#endif
699 case XML_WITH_LZMA:
700#ifdef LIBXML_LZMA_ENABLED
701 return(1);
702#else
703 return(0);
704#endif
705 case XML_WITH_ICU:
706#ifdef LIBXML_ICU_ENABLED
707 return(1);
708#else
709 return(0);
710#endif
711 default:
712 break;
713 }
714 return(0);
715}
716
717/************************************************************************
718 * *
719 * Simple string buffer *
720 * *
721 ************************************************************************/
722
723typedef struct {
724 xmlChar *mem;
725 unsigned size;
726 unsigned cap; /* size < cap */
727 unsigned max; /* size <= max */
728 xmlParserErrors code;
729} xmlSBuf;
730
731static void
732xmlSBufInit(xmlSBuf *buf, unsigned max) {
733 buf->mem = NULL;
734 buf->size = 0;
735 buf->cap = 0;
736 buf->max = max;
737 buf->code = XML_ERR_OK;
738}
739
740static int
741xmlSBufGrow(xmlSBuf *buf, unsigned len) {
742 xmlChar *mem;
743 unsigned cap;
744
745 if (len >= UINT_MAX / 2 - buf->size) {
746 if (buf->code == XML_ERR_OK)
747 buf->code = XML_ERR_RESOURCE_LIMIT;
748 return(-1);
749 }
750
751 cap = (buf->size + len) * 2;
752 if (cap < 240)
753 cap = 240;
754
755 mem = xmlRealloc(buf->mem, cap);
756 if (mem == NULL) {
757 buf->code = XML_ERR_NO_MEMORY;
758 return(-1);
759 }
760
761 buf->mem = mem;
762 buf->cap = cap;
763
764 return(0);
765}
766
767static void
768xmlSBufAddString(xmlSBuf *buf, const xmlChar *str, unsigned len) {
769 if (buf->max - buf->size < len) {
770 if (buf->code == XML_ERR_OK)
771 buf->code = XML_ERR_RESOURCE_LIMIT;
772 return;
773 }
774
775 if (buf->cap - buf->size <= len) {
776 if (xmlSBufGrow(buf, len) < 0)
777 return;
778 }
779
780 if (len > 0)
781 memcpy(buf->mem + buf->size, str, len);
782 buf->size += len;
783}
784
785static void
786xmlSBufAddCString(xmlSBuf *buf, const char *str, unsigned len) {
787 xmlSBufAddString(buf, (const xmlChar *) str, len);
788}
789
790static void
791xmlSBufAddChar(xmlSBuf *buf, int c) {
792 xmlChar *end;
793
794 if (buf->max - buf->size < 4) {
795 if (buf->code == XML_ERR_OK)
796 buf->code = XML_ERR_RESOURCE_LIMIT;
797 return;
798 }
799
800 if (buf->cap - buf->size <= 4) {
801 if (xmlSBufGrow(buf, 4) < 0)
802 return;
803 }
804
805 end = buf->mem + buf->size;
806
807 if (c < 0x80) {
808 *end = (xmlChar) c;
809 buf->size += 1;
810 } else {
811 buf->size += xmlCopyCharMultiByte(end, c);
812 }
813}
814
815static void
816xmlSBufAddReplChar(xmlSBuf *buf) {
817 xmlSBufAddCString(buf, "\xEF\xBF\xBD", 3);
818}
819
820static void
821xmlSBufReportError(xmlSBuf *buf, xmlParserCtxtPtr ctxt, const char *errMsg) {
822 if (buf->code == XML_ERR_NO_MEMORY)
823 xmlCtxtErrMemory(ctxt);
824 else
825 xmlFatalErr(ctxt, buf->code, errMsg);
826}
827
828static xmlChar *
829xmlSBufFinish(xmlSBuf *buf, int *sizeOut, xmlParserCtxtPtr ctxt,
830 const char *errMsg) {
831 if (buf->mem == NULL) {
832 buf->mem = xmlMalloc(1);
833 if (buf->mem == NULL) {
834 buf->code = XML_ERR_NO_MEMORY;
835 } else {
836 buf->mem[0] = 0;
837 }
838 } else {
839 buf->mem[buf->size] = 0;
840 }
841
842 if (buf->code == XML_ERR_OK) {
843 if (sizeOut != NULL)
844 *sizeOut = buf->size;
845 return(buf->mem);
846 }
847
848 xmlSBufReportError(buf, ctxt, errMsg);
849
850 xmlFree(buf->mem);
851
852 if (sizeOut != NULL)
853 *sizeOut = 0;
854 return(NULL);
855}
856
857static void
858xmlSBufCleanup(xmlSBuf *buf, xmlParserCtxtPtr ctxt, const char *errMsg) {
859 if (buf->code != XML_ERR_OK)
860 xmlSBufReportError(buf, ctxt, errMsg);
861
862 xmlFree(buf->mem);
863}
864
865static int
866xmlUTF8MultibyteLen(xmlParserCtxtPtr ctxt, const xmlChar *str,
867 const char *errMsg) {
868 int c = str[0];
869 int c1 = str[1];
870
871 if ((c1 & 0xC0) != 0x80)
872 goto encoding_error;
873
874 if (c < 0xE0) {
875 /* 2-byte sequence */
876 if (c < 0xC2)
877 goto encoding_error;
878
879 return(2);
880 } else {
881 int c2 = str[2];
882
883 if ((c2 & 0xC0) != 0x80)
884 goto encoding_error;
885
886 if (c < 0xF0) {
887 /* 3-byte sequence */
888 if (c == 0xE0) {
889 /* overlong */
890 if (c1 < 0xA0)
891 goto encoding_error;
892 } else if (c == 0xED) {
893 /* surrogate */
894 if (c1 >= 0xA0)
895 goto encoding_error;
896 } else if (c == 0xEF) {
897 /* U+FFFE and U+FFFF are invalid Chars */
898 if ((c1 == 0xBF) && (c2 >= 0xBE))
899 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR, errMsg);
900 }
901
902 return(3);
903 } else {
904 /* 4-byte sequence */
905 if ((str[3] & 0xC0) != 0x80)
906 goto encoding_error;
907 if (c == 0xF0) {
908 /* overlong */
909 if (c1 < 0x90)
910 goto encoding_error;
911 } else if (c >= 0xF4) {
912 /* greater than 0x10FFFF */
913 if ((c > 0xF4) || (c1 >= 0x90))
914 goto encoding_error;
915 }
916
917 return(4);
918 }
919 }
920
921encoding_error:
922 /* Only report the first error */
923 if ((ctxt->input->flags & XML_INPUT_ENCODING_ERROR) == 0) {
924 xmlCtxtErrIO(ctxt, XML_ERR_INVALID_ENCODING, NULL);
925 ctxt->input->flags |= XML_INPUT_ENCODING_ERROR;
926 }
927
928 return(0);
929}
930
931/************************************************************************
932 * *
933 * SAX2 defaulted attributes handling *
934 * *
935 ************************************************************************/
936
937/**
938 * xmlCtxtInitializeLate:
939 * @ctxt: an XML parser context
940 *
941 * Final initialization of the parser context before starting to parse.
942 *
943 * This accounts for users modifying struct members of parser context
944 * directly.
945 */
946static void
947xmlCtxtInitializeLate(xmlParserCtxtPtr ctxt) {
948 xmlSAXHandlerPtr sax;
949
950 /* Avoid unused variable warning if features are disabled. */
951 (void) sax;
952
953 /*
954 * Changing the SAX struct directly is still widespread practice
955 * in internal and external code.
956 */
957 if (ctxt == NULL) return;
958 sax = ctxt->sax;
959#ifdef LIBXML_SAX1_ENABLED
960 /*
961 * Only enable SAX2 if there SAX2 element handlers, except when there
962 * are no element handlers at all.
963 */
964 if (((ctxt->options & XML_PARSE_SAX1) == 0) &&
965 (sax) &&
966 (sax->initialized == XML_SAX2_MAGIC) &&
967 ((sax->startElementNs != NULL) ||
968 (sax->endElementNs != NULL) ||
969 ((sax->startElement == NULL) && (sax->endElement == NULL))))
970 ctxt->sax2 = 1;
971#else
972 ctxt->sax2 = 1;
973#endif /* LIBXML_SAX1_ENABLED */
974
975 /*
976 * Some users replace the dictionary directly in the context struct.
977 * We really need an API function to do that cleanly.
978 */
979 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
980 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
981 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
982 if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
983 (ctxt->str_xml_ns == NULL)) {
984 xmlErrMemory(ctxt);
985 }
986}
987
988typedef struct {
989 xmlHashedString prefix;
990 xmlHashedString name;
991 xmlHashedString value;
992 const xmlChar *valueEnd;
993 int external;
994 int expandedSize;
995} xmlDefAttr;
996
997typedef struct _xmlDefAttrs xmlDefAttrs;
998typedef xmlDefAttrs *xmlDefAttrsPtr;
999struct _xmlDefAttrs {
1000 int nbAttrs; /* number of defaulted attributes on that element */
1001 int maxAttrs; /* the size of the array */
1002#if __STDC_VERSION__ >= 199901L
1003 /* Using a C99 flexible array member avoids UBSan errors. */
1004 xmlDefAttr attrs[]; /* array of localname/prefix/values/external */
1005#else
1006 xmlDefAttr attrs[1];
1007#endif
1008};
1009
1010/**
1011 * xmlAttrNormalizeSpace:
1012 * @src: the source string
1013 * @dst: the target string
1014 *
1015 * Normalize the space in non CDATA attribute values:
1016 * If the attribute type is not CDATA, then the XML processor MUST further
1017 * process the normalized attribute value by discarding any leading and
1018 * trailing space (#x20) characters, and by replacing sequences of space
1019 * (#x20) characters by a single space (#x20) character.
1020 * Note that the size of dst need to be at least src, and if one doesn't need
1021 * to preserve dst (and it doesn't come from a dictionary or read-only) then
1022 * passing src as dst is just fine.
1023 *
1024 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1025 * is needed.
1026 */
1027static xmlChar *
1028xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
1029{
1030 if ((src == NULL) || (dst == NULL))
1031 return(NULL);
1032
1033 while (*src == 0x20) src++;
1034 while (*src != 0) {
1035 if (*src == 0x20) {
1036 while (*src == 0x20) src++;
1037 if (*src != 0)
1038 *dst++ = 0x20;
1039 } else {
1040 *dst++ = *src++;
1041 }
1042 }
1043 *dst = 0;
1044 if (dst == src)
1045 return(NULL);
1046 return(dst);
1047}
1048
1049/**
1050 * xmlAddDefAttrs:
1051 * @ctxt: an XML parser context
1052 * @fullname: the element fullname
1053 * @fullattr: the attribute fullname
1054 * @value: the attribute value
1055 *
1056 * Add a defaulted attribute for an element
1057 */
1058static void
1059xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
1060 const xmlChar *fullname,
1061 const xmlChar *fullattr,
1062 const xmlChar *value) {
1063 xmlDefAttrsPtr defaults;
1064 xmlDefAttr *attr;
1065 int len, expandedSize;
1066 xmlHashedString name;
1067 xmlHashedString prefix;
1068 xmlHashedString hvalue;
1069 const xmlChar *localname;
1070
1071 /*
1072 * Allows to detect attribute redefinitions
1073 */
1074 if (ctxt->attsSpecial != NULL) {
1075 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1076 return;
1077 }
1078
1079 if (ctxt->attsDefault == NULL) {
1080 ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
1081 if (ctxt->attsDefault == NULL)
1082 goto mem_error;
1083 }
1084
1085 /*
1086 * split the element name into prefix:localname , the string found
1087 * are within the DTD and then not associated to namespace names.
1088 */
1089 localname = xmlSplitQName3(fullname, &len);
1090 if (localname == NULL) {
1091 name = xmlDictLookupHashed(ctxt->dict, fullname, -1);
1092 prefix.name = NULL;
1093 } else {
1094 name = xmlDictLookupHashed(ctxt->dict, localname, -1);
1095 prefix = xmlDictLookupHashed(ctxt->dict, fullname, len);
1096 if (prefix.name == NULL)
1097 goto mem_error;
1098 }
1099 if (name.name == NULL)
1100 goto mem_error;
1101
1102 /*
1103 * make sure there is some storage
1104 */
1105 defaults = xmlHashLookup2(ctxt->attsDefault, name.name, prefix.name);
1106 if ((defaults == NULL) ||
1107 (defaults->nbAttrs >= defaults->maxAttrs)) {
1108 xmlDefAttrsPtr temp;
1109 int newSize;
1110
1111 newSize = (defaults != NULL) ? 2 * defaults->maxAttrs : 4;
1112 temp = xmlRealloc(defaults,
1113 sizeof(*defaults) + newSize * sizeof(xmlDefAttr));
1114 if (temp == NULL)
1115 goto mem_error;
1116 if (defaults == NULL)
1117 temp->nbAttrs = 0;
1118 temp->maxAttrs = newSize;
1119 defaults = temp;
1120 if (xmlHashUpdateEntry2(ctxt->attsDefault, name.name, prefix.name,
1121 defaults, NULL) < 0) {
1122 xmlFree(defaults);
1123 goto mem_error;
1124 }
1125 }
1126
1127 /*
1128 * Split the attribute name into prefix:localname , the string found
1129 * are within the DTD and hen not associated to namespace names.
1130 */
1131 localname = xmlSplitQName3(fullattr, &len);
1132 if (localname == NULL) {
1133 name = xmlDictLookupHashed(ctxt->dict, fullattr, -1);
1134 prefix.name = NULL;
1135 } else {
1136 name = xmlDictLookupHashed(ctxt->dict, localname, -1);
1137 prefix = xmlDictLookupHashed(ctxt->dict, fullattr, len);
1138 if (prefix.name == NULL)
1139 goto mem_error;
1140 }
1141 if (name.name == NULL)
1142 goto mem_error;
1143
1144 /* intern the string and precompute the end */
1145 len = strlen((const char *) value);
1146 hvalue = xmlDictLookupHashed(ctxt->dict, value, len);
1147 if (hvalue.name == NULL)
1148 goto mem_error;
1149
1150 expandedSize = strlen((const char *) name.name);
1151 if (prefix.name != NULL)
1152 expandedSize += strlen((const char *) prefix.name);
1153 expandedSize += len;
1154
1155 attr = &defaults->attrs[defaults->nbAttrs++];
1156 attr->name = name;
1157 attr->prefix = prefix;
1158 attr->value = hvalue;
1159 attr->valueEnd = hvalue.name + len;
1160 attr->external = PARSER_EXTERNAL(ctxt);
1161 attr->expandedSize = expandedSize;
1162
1163 return;
1164
1165mem_error:
1166 xmlErrMemory(ctxt);
1167 return;
1168}
1169
1170/**
1171 * xmlAddSpecialAttr:
1172 * @ctxt: an XML parser context
1173 * @fullname: the element fullname
1174 * @fullattr: the attribute fullname
1175 * @type: the attribute type
1176 *
1177 * Register this attribute type
1178 */
1179static void
1180xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1181 const xmlChar *fullname,
1182 const xmlChar *fullattr,
1183 int type)
1184{
1185 if (ctxt->attsSpecial == NULL) {
1186 ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
1187 if (ctxt->attsSpecial == NULL)
1188 goto mem_error;
1189 }
1190
1191 if (xmlHashAdd2(ctxt->attsSpecial, fullname, fullattr,
1192 (void *) (ptrdiff_t) type) < 0)
1193 goto mem_error;
1194 return;
1195
1196mem_error:
1197 xmlErrMemory(ctxt);
1198 return;
1199}
1200
1201/**
1202 * xmlCleanSpecialAttrCallback:
1203 *
1204 * Removes CDATA attributes from the special attribute table
1205 */
1206static void
1207xmlCleanSpecialAttrCallback(void *payload, void *data,
1208 const xmlChar *fullname, const xmlChar *fullattr,
1209 const xmlChar *unused ATTRIBUTE_UNUSED) {
1210 xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1211
1212 if (((ptrdiff_t) payload) == XML_ATTRIBUTE_CDATA) {
1213 xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1214 }
1215}
1216
1217/**
1218 * xmlCleanSpecialAttr:
1219 * @ctxt: an XML parser context
1220 *
1221 * Trim the list of attributes defined to remove all those of type
1222 * CDATA as they are not special. This call should be done when finishing
1223 * to parse the DTD and before starting to parse the document root.
1224 */
1225static void
1226xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1227{
1228 if (ctxt->attsSpecial == NULL)
1229 return;
1230
1231 xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1232
1233 if (xmlHashSize(ctxt->attsSpecial) == 0) {
1234 xmlHashFree(ctxt->attsSpecial, NULL);
1235 ctxt->attsSpecial = NULL;
1236 }
1237 return;
1238}
1239
1240/**
1241 * xmlCheckLanguageID:
1242 * @lang: pointer to the string value
1243 *
1244 * DEPRECATED: Internal function, do not use.
1245 *
1246 * Checks that the value conforms to the LanguageID production:
1247 *
1248 * NOTE: this is somewhat deprecated, those productions were removed from
1249 * the XML Second edition.
1250 *
1251 * [33] LanguageID ::= Langcode ('-' Subcode)*
1252 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
1253 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1254 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1255 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1256 * [38] Subcode ::= ([a-z] | [A-Z])+
1257 *
1258 * The current REC reference the successors of RFC 1766, currently 5646
1259 *
1260 * http://www.rfc-editor.org/rfc/rfc5646.txt
1261 * langtag = language
1262 * ["-" script]
1263 * ["-" region]
1264 * *("-" variant)
1265 * *("-" extension)
1266 * ["-" privateuse]
1267 * language = 2*3ALPHA ; shortest ISO 639 code
1268 * ["-" extlang] ; sometimes followed by
1269 * ; extended language subtags
1270 * / 4ALPHA ; or reserved for future use
1271 * / 5*8ALPHA ; or registered language subtag
1272 *
1273 * extlang = 3ALPHA ; selected ISO 639 codes
1274 * *2("-" 3ALPHA) ; permanently reserved
1275 *
1276 * script = 4ALPHA ; ISO 15924 code
1277 *
1278 * region = 2ALPHA ; ISO 3166-1 code
1279 * / 3DIGIT ; UN M.49 code
1280 *
1281 * variant = 5*8alphanum ; registered variants
1282 * / (DIGIT 3alphanum)
1283 *
1284 * extension = singleton 1*("-" (2*8alphanum))
1285 *
1286 * ; Single alphanumerics
1287 * ; "x" reserved for private use
1288 * singleton = DIGIT ; 0 - 9
1289 * / %x41-57 ; A - W
1290 * / %x59-5A ; Y - Z
1291 * / %x61-77 ; a - w
1292 * / %x79-7A ; y - z
1293 *
1294 * it sounds right to still allow Irregular i-xxx IANA and user codes too
1295 * The parser below doesn't try to cope with extension or privateuse
1296 * that could be added but that's not interoperable anyway
1297 *
1298 * Returns 1 if correct 0 otherwise
1299 **/
1300int
1301xmlCheckLanguageID(const xmlChar * lang)
1302{
1303 const xmlChar *cur = lang, *nxt;
1304
1305 if (cur == NULL)
1306 return (0);
1307 if (((cur[0] == 'i') && (cur[1] == '-')) ||
1308 ((cur[0] == 'I') && (cur[1] == '-')) ||
1309 ((cur[0] == 'x') && (cur[1] == '-')) ||
1310 ((cur[0] == 'X') && (cur[1] == '-'))) {
1311 /*
1312 * Still allow IANA code and user code which were coming
1313 * from the previous version of the XML-1.0 specification
1314 * it's deprecated but we should not fail
1315 */
1316 cur += 2;
1317 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1318 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1319 cur++;
1320 return(cur[0] == 0);
1321 }
1322 nxt = cur;
1323 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1324 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1325 nxt++;
1326 if (nxt - cur >= 4) {
1327 /*
1328 * Reserved
1329 */
1330 if ((nxt - cur > 8) || (nxt[0] != 0))
1331 return(0);
1332 return(1);
1333 }
1334 if (nxt - cur < 2)
1335 return(0);
1336 /* we got an ISO 639 code */
1337 if (nxt[0] == 0)
1338 return(1);
1339 if (nxt[0] != '-')
1340 return(0);
1341
1342 nxt++;
1343 cur = nxt;
1344 /* now we can have extlang or script or region or variant */
1345 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1346 goto region_m49;
1347
1348 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1349 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1350 nxt++;
1351 if (nxt - cur == 4)
1352 goto script;
1353 if (nxt - cur == 2)
1354 goto region;
1355 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1356 goto variant;
1357 if (nxt - cur != 3)
1358 return(0);
1359 /* we parsed an extlang */
1360 if (nxt[0] == 0)
1361 return(1);
1362 if (nxt[0] != '-')
1363 return(0);
1364
1365 nxt++;
1366 cur = nxt;
1367 /* now we can have script or region or variant */
1368 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1369 goto region_m49;
1370
1371 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1372 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1373 nxt++;
1374 if (nxt - cur == 2)
1375 goto region;
1376 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1377 goto variant;
1378 if (nxt - cur != 4)
1379 return(0);
1380 /* we parsed a script */
1381script:
1382 if (nxt[0] == 0)
1383 return(1);
1384 if (nxt[0] != '-')
1385 return(0);
1386
1387 nxt++;
1388 cur = nxt;
1389 /* now we can have region or variant */
1390 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1391 goto region_m49;
1392
1393 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1394 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1395 nxt++;
1396
1397 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1398 goto variant;
1399 if (nxt - cur != 2)
1400 return(0);
1401 /* we parsed a region */
1402region:
1403 if (nxt[0] == 0)
1404 return(1);
1405 if (nxt[0] != '-')
1406 return(0);
1407
1408 nxt++;
1409 cur = nxt;
1410 /* now we can just have a variant */
1411 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1412 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1413 nxt++;
1414
1415 if ((nxt - cur < 5) || (nxt - cur > 8))
1416 return(0);
1417
1418 /* we parsed a variant */
1419variant:
1420 if (nxt[0] == 0)
1421 return(1);
1422 if (nxt[0] != '-')
1423 return(0);
1424 /* extensions and private use subtags not checked */
1425 return (1);
1426
1427region_m49:
1428 if (((nxt[1] >= '0') && (nxt[1] <= '9')) &&
1429 ((nxt[2] >= '0') && (nxt[2] <= '9'))) {
1430 nxt += 3;
1431 goto region;
1432 }
1433 return(0);
1434}
1435
1436/************************************************************************
1437 * *
1438 * Parser stacks related functions and macros *
1439 * *
1440 ************************************************************************/
1441
1442static xmlChar *
1443xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar **str);
1444
1445/**
1446 * xmlParserNsCreate:
1447 *
1448 * Create a new namespace database.
1449 *
1450 * Returns the new obejct.
1451 */
1452xmlParserNsData *
1453xmlParserNsCreate(void) {
1454 xmlParserNsData *nsdb = xmlMalloc(sizeof(*nsdb));
1455
1456 if (nsdb == NULL)
1457 return(NULL);
1458 memset(nsdb, 0, sizeof(*nsdb));
1459 nsdb->defaultNsIndex = INT_MAX;
1460
1461 return(nsdb);
1462}
1463
1464/**
1465 * xmlParserNsFree:
1466 * @nsdb: namespace database
1467 *
1468 * Free a namespace database.
1469 */
1470void
1471xmlParserNsFree(xmlParserNsData *nsdb) {
1472 if (nsdb == NULL)
1473 return;
1474
1475 xmlFree(nsdb->extra);
1476 xmlFree(nsdb->hash);
1477 xmlFree(nsdb);
1478}
1479
1480/**
1481 * xmlParserNsReset:
1482 * @nsdb: namespace database
1483 *
1484 * Reset a namespace database.
1485 */
1486static void
1487xmlParserNsReset(xmlParserNsData *nsdb) {
1488 if (nsdb == NULL)
1489 return;
1490
1491 nsdb->hashElems = 0;
1492 nsdb->elementId = 0;
1493 nsdb->defaultNsIndex = INT_MAX;
1494
1495 if (nsdb->hash)
1496 memset(nsdb->hash, 0, nsdb->hashSize * sizeof(nsdb->hash[0]));
1497}
1498
1499/**
1500 * xmlParserStartElement:
1501 * @nsdb: namespace database
1502 *
1503 * Signal that a new element has started.
1504 *
1505 * Returns 0 on success, -1 if the element counter overflowed.
1506 */
1507static int
1508xmlParserNsStartElement(xmlParserNsData *nsdb) {
1509 if (nsdb->elementId == UINT_MAX)
1510 return(-1);
1511 nsdb->elementId++;
1512
1513 return(0);
1514}
1515
1516/**
1517 * xmlParserNsLookup:
1518 * @ctxt: parser context
1519 * @prefix: namespace prefix
1520 * @bucketPtr: optional bucket (return value)
1521 *
1522 * Lookup namespace with given prefix. If @bucketPtr is non-NULL, it will
1523 * be set to the matching bucket, or the first empty bucket if no match
1524 * was found.
1525 *
1526 * Returns the namespace index on success, INT_MAX if no namespace was
1527 * found.
1528 */
1529static int
1530xmlParserNsLookup(xmlParserCtxtPtr ctxt, const xmlHashedString *prefix,
1531 xmlParserNsBucket **bucketPtr) {
1532 xmlParserNsBucket *bucket, *tombstone;
1533 unsigned index, hashValue;
1534
1535 if (prefix->name == NULL)
1536 return(ctxt->nsdb->defaultNsIndex);
1537
1538 if (ctxt->nsdb->hashSize == 0)
1539 return(INT_MAX);
1540
1541 hashValue = prefix->hashValue;
1542 index = hashValue & (ctxt->nsdb->hashSize - 1);
1543 bucket = &ctxt->nsdb->hash[index];
1544 tombstone = NULL;
1545
1546 while (bucket->hashValue) {
1547 if (bucket->index == INT_MAX) {
1548 if (tombstone == NULL)
1549 tombstone = bucket;
1550 } else if (bucket->hashValue == hashValue) {
1551 if (ctxt->nsTab[bucket->index * 2] == prefix->name) {
1552 if (bucketPtr != NULL)
1553 *bucketPtr = bucket;
1554 return(bucket->index);
1555 }
1556 }
1557
1558 index++;
1559 bucket++;
1560 if (index == ctxt->nsdb->hashSize) {
1561 index = 0;
1562 bucket = ctxt->nsdb->hash;
1563 }
1564 }
1565
1566 if (bucketPtr != NULL)
1567 *bucketPtr = tombstone ? tombstone : bucket;
1568 return(INT_MAX);
1569}
1570
1571/**
1572 * xmlParserNsLookupUri:
1573 * @ctxt: parser context
1574 * @prefix: namespace prefix
1575 *
1576 * Lookup namespace URI with given prefix.
1577 *
1578 * Returns the namespace URI on success, NULL if no namespace was found.
1579 */
1580static const xmlChar *
1581xmlParserNsLookupUri(xmlParserCtxtPtr ctxt, const xmlHashedString *prefix) {
1582 const xmlChar *ret;
1583 int nsIndex;
1584
1585 if (prefix->name == ctxt->str_xml)
1586 return(ctxt->str_xml_ns);
1587
1588 /*
1589 * minNsIndex is used when building an entity tree. We must
1590 * ignore namespaces declared outside the entity.
1591 */
1592 nsIndex = xmlParserNsLookup(ctxt, prefix, NULL);
1593 if ((nsIndex == INT_MAX) || (nsIndex < ctxt->nsdb->minNsIndex))
1594 return(NULL);
1595
1596 ret = ctxt->nsTab[nsIndex * 2 + 1];
1597 if (ret[0] == 0)
1598 ret = NULL;
1599 return(ret);
1600}
1601
1602/**
1603 * xmlParserNsLookupSax:
1604 * @ctxt: parser context
1605 * @prefix: namespace prefix
1606 *
1607 * Lookup extra data for the given prefix. This returns data stored
1608 * with xmlParserNsUdpateSax.
1609 *
1610 * Returns the data on success, NULL if no namespace was found.
1611 */
1612void *
1613xmlParserNsLookupSax(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
1614 xmlHashedString hprefix;
1615 int nsIndex;
1616
1617 if (prefix == ctxt->str_xml)
1618 return(NULL);
1619
1620 hprefix.name = prefix;
1621 if (prefix != NULL)
1622 hprefix.hashValue = xmlDictComputeHash(ctxt->dict, prefix);
1623 else
1624 hprefix.hashValue = 0;
1625 nsIndex = xmlParserNsLookup(ctxt, &hprefix, NULL);
1626 if ((nsIndex == INT_MAX) || (nsIndex < ctxt->nsdb->minNsIndex))
1627 return(NULL);
1628
1629 return(ctxt->nsdb->extra[nsIndex].saxData);
1630}
1631
1632/**
1633 * xmlParserNsUpdateSax:
1634 * @ctxt: parser context
1635 * @prefix: namespace prefix
1636 * @saxData: extra data for SAX handler
1637 *
1638 * Sets or updates extra data for the given prefix. This value will be
1639 * returned by xmlParserNsLookupSax as long as the namespace with the
1640 * given prefix is in scope.
1641 *
1642 * Returns the data on success, NULL if no namespace was found.
1643 */
1644int
1645xmlParserNsUpdateSax(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
1646 void *saxData) {
1647 xmlHashedString hprefix;
1648 int nsIndex;
1649
1650 if (prefix == ctxt->str_xml)
1651 return(-1);
1652
1653 hprefix.name = prefix;
1654 if (prefix != NULL)
1655 hprefix.hashValue = xmlDictComputeHash(ctxt->dict, prefix);
1656 else
1657 hprefix.hashValue = 0;
1658 nsIndex = xmlParserNsLookup(ctxt, &hprefix, NULL);
1659 if ((nsIndex == INT_MAX) || (nsIndex < ctxt->nsdb->minNsIndex))
1660 return(-1);
1661
1662 ctxt->nsdb->extra[nsIndex].saxData = saxData;
1663 return(0);
1664}
1665
1666/**
1667 * xmlParserNsGrow:
1668 * @ctxt: parser context
1669 *
1670 * Grows the namespace tables.
1671 *
1672 * Returns 0 on success, -1 if a memory allocation failed.
1673 */
1674static int
1675xmlParserNsGrow(xmlParserCtxtPtr ctxt) {
1676 const xmlChar **table;
1677 xmlParserNsExtra *extra;
1678 int newSize;
1679
1680 if (ctxt->nsMax > INT_MAX / 2)
1681 goto error;
1682 newSize = ctxt->nsMax ? ctxt->nsMax * 2 : 16;
1683
1684 table = xmlRealloc(ctxt->nsTab, 2 * newSize * sizeof(table[0]));
1685 if (table == NULL)
1686 goto error;
1687 ctxt->nsTab = table;
1688
1689 extra = xmlRealloc(ctxt->nsdb->extra, newSize * sizeof(extra[0]));
1690 if (extra == NULL)
1691 goto error;
1692 ctxt->nsdb->extra = extra;
1693
1694 ctxt->nsMax = newSize;
1695 return(0);
1696
1697error:
1698 xmlErrMemory(ctxt);
1699 return(-1);
1700}
1701
1702/**
1703 * xmlParserNsPush:
1704 * @ctxt: parser context
1705 * @prefix: prefix with hash value
1706 * @uri: uri with hash value
1707 * @saxData: extra data for SAX handler
1708 * @defAttr: whether the namespace comes from a default attribute
1709 *
1710 * Push a new namespace on the table.
1711 *
1712 * Returns 1 if the namespace was pushed, 0 if the namespace was ignored,
1713 * -1 if a memory allocation failed.
1714 */
1715static int
1716xmlParserNsPush(xmlParserCtxtPtr ctxt, const xmlHashedString *prefix,
1717 const xmlHashedString *uri, void *saxData, int defAttr) {
1718 xmlParserNsBucket *bucket = NULL;
1719 xmlParserNsExtra *extra;
1720 const xmlChar **ns;
1721 unsigned hashValue, nsIndex, oldIndex;
1722
1723 if ((prefix != NULL) && (prefix->name == ctxt->str_xml))
1724 return(0);
1725
1726 if ((ctxt->nsNr >= ctxt->nsMax) && (xmlParserNsGrow(ctxt) < 0)) {
1727 xmlErrMemory(ctxt);
1728 return(-1);
1729 }
1730
1731 /*
1732 * Default namespace and 'xml' namespace
1733 */
1734 if ((prefix == NULL) || (prefix->name == NULL)) {
1735 oldIndex = ctxt->nsdb->defaultNsIndex;
1736
1737 if (oldIndex != INT_MAX) {
1738 extra = &ctxt->nsdb->extra[oldIndex];
1739
1740 if (extra->elementId == ctxt->nsdb->elementId) {
1741 if (defAttr == 0)
1742 xmlErrAttributeDup(ctxt, NULL, BAD_CAST "xmlns");
1743 return(0);
1744 }
1745
1746 if ((ctxt->options & XML_PARSE_NSCLEAN) &&
1747 (uri->name == ctxt->nsTab[oldIndex * 2 + 1]))
1748 return(0);
1749 }
1750
1751 ctxt->nsdb->defaultNsIndex = ctxt->nsNr;
1752 goto populate_entry;
1753 }
1754
1755 /*
1756 * Hash table lookup
1757 */
1758 oldIndex = xmlParserNsLookup(ctxt, prefix, &bucket);
1759 if (oldIndex != INT_MAX) {
1760 extra = &ctxt->nsdb->extra[oldIndex];
1761
1762 /*
1763 * Check for duplicate definitions on the same element.
1764 */
1765 if (extra->elementId == ctxt->nsdb->elementId) {
1766 if (defAttr == 0)
1767 xmlErrAttributeDup(ctxt, BAD_CAST "xmlns", prefix->name);
1768 return(0);
1769 }
1770
1771 if ((ctxt->options & XML_PARSE_NSCLEAN) &&
1772 (uri->name == ctxt->nsTab[bucket->index * 2 + 1]))
1773 return(0);
1774
1775 bucket->index = ctxt->nsNr;
1776 goto populate_entry;
1777 }
1778
1779 /*
1780 * Insert new bucket
1781 */
1782
1783 hashValue = prefix->hashValue;
1784
1785 /*
1786 * Grow hash table, 50% fill factor
1787 */
1788 if (ctxt->nsdb->hashElems + 1 > ctxt->nsdb->hashSize / 2) {
1789 xmlParserNsBucket *newHash;
1790 unsigned newSize, i, index;
1791
1792 if (ctxt->nsdb->hashSize > UINT_MAX / 2) {
1793 xmlErrMemory(ctxt);
1794 return(-1);
1795 }
1796 newSize = ctxt->nsdb->hashSize ? ctxt->nsdb->hashSize * 2 : 16;
1797 newHash = xmlMalloc(newSize * sizeof(newHash[0]));
1798 if (newHash == NULL) {
1799 xmlErrMemory(ctxt);
1800 return(-1);
1801 }
1802 memset(newHash, 0, newSize * sizeof(newHash[0]));
1803
1804 for (i = 0; i < ctxt->nsdb->hashSize; i++) {
1805 unsigned hv = ctxt->nsdb->hash[i].hashValue;
1806 unsigned newIndex;
1807
1808 if ((hv == 0) || (ctxt->nsdb->hash[i].index == INT_MAX))
1809 continue;
1810 newIndex = hv & (newSize - 1);
1811
1812 while (newHash[newIndex].hashValue != 0) {
1813 newIndex++;
1814 if (newIndex == newSize)
1815 newIndex = 0;
1816 }
1817
1818 newHash[newIndex] = ctxt->nsdb->hash[i];
1819 }
1820
1821 xmlFree(ctxt->nsdb->hash);
1822 ctxt->nsdb->hash = newHash;
1823 ctxt->nsdb->hashSize = newSize;
1824
1825 /*
1826 * Relookup
1827 */
1828 index = hashValue & (newSize - 1);
1829
1830 while (newHash[index].hashValue != 0) {
1831 index++;
1832 if (index == newSize)
1833 index = 0;
1834 }
1835
1836 bucket = &newHash[index];
1837 }
1838
1839 bucket->hashValue = hashValue;
1840 bucket->index = ctxt->nsNr;
1841 ctxt->nsdb->hashElems++;
1842 oldIndex = INT_MAX;
1843
1844populate_entry:
1845 nsIndex = ctxt->nsNr;
1846
1847 ns = &ctxt->nsTab[nsIndex * 2];
1848 ns[0] = prefix ? prefix->name : NULL;
1849 ns[1] = uri->name;
1850
1851 extra = &ctxt->nsdb->extra[nsIndex];
1852 extra->saxData = saxData;
1853 extra->prefixHashValue = prefix ? prefix->hashValue : 0;
1854 extra->uriHashValue = uri->hashValue;
1855 extra->elementId = ctxt->nsdb->elementId;
1856 extra->oldIndex = oldIndex;
1857
1858 ctxt->nsNr++;
1859
1860 return(1);
1861}
1862
1863/**
1864 * xmlParserNsPop:
1865 * @ctxt: an XML parser context
1866 * @nr: the number to pop
1867 *
1868 * Pops the top @nr namespaces and restores the hash table.
1869 *
1870 * Returns the number of namespaces popped.
1871 */
1872static int
1873xmlParserNsPop(xmlParserCtxtPtr ctxt, int nr)
1874{
1875 int i;
1876
1877 /* assert(nr <= ctxt->nsNr); */
1878
1879 for (i = ctxt->nsNr - 1; i >= ctxt->nsNr - nr; i--) {
1880 const xmlChar *prefix = ctxt->nsTab[i * 2];
1881 xmlParserNsExtra *extra = &ctxt->nsdb->extra[i];
1882
1883 if (prefix == NULL) {
1884 ctxt->nsdb->defaultNsIndex = extra->oldIndex;
1885 } else {
1886 xmlHashedString hprefix;
1887 xmlParserNsBucket *bucket = NULL;
1888
1889 hprefix.name = prefix;
1890 hprefix.hashValue = extra->prefixHashValue;
1891 xmlParserNsLookup(ctxt, &hprefix, &bucket);
1892 /* assert(bucket && bucket->hashValue); */
1893 bucket->index = extra->oldIndex;
1894 }
1895 }
1896
1897 ctxt->nsNr -= nr;
1898 return(nr);
1899}
1900
1901static int
1902xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1903 const xmlChar **atts;
1904 unsigned *attallocs;
1905 int maxatts;
1906
1907 if (nr + 5 > ctxt->maxatts) {
1908 maxatts = ctxt->maxatts == 0 ? 55 : (nr + 5) * 2;
1909 atts = (const xmlChar **) xmlMalloc(
1910 maxatts * sizeof(const xmlChar *));
1911 if (atts == NULL) goto mem_error;
1912 attallocs = xmlRealloc(ctxt->attallocs,
1913 (maxatts / 5) * sizeof(attallocs[0]));
1914 if (attallocs == NULL) {
1915 xmlFree(atts);
1916 goto mem_error;
1917 }
1918 if (ctxt->maxatts > 0)
1919 memcpy(atts, ctxt->atts, ctxt->maxatts * sizeof(const xmlChar *));
1920 xmlFree(ctxt->atts);
1921 ctxt->atts = atts;
1922 ctxt->attallocs = attallocs;
1923 ctxt->maxatts = maxatts;
1924 }
1925 return(ctxt->maxatts);
1926mem_error:
1927 xmlErrMemory(ctxt);
1928 return(-1);
1929}
1930
1931/**
1932 * inputPush:
1933 * @ctxt: an XML parser context
1934 * @value: the parser input
1935 *
1936 * Pushes a new parser input on top of the input stack
1937 *
1938 * Returns -1 in case of error, the index in the stack otherwise
1939 */
1940int
1941inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1942{
1943 char *directory = NULL;
1944
1945 if ((ctxt == NULL) || (value == NULL))
1946 return(-1);
1947
1948 if (ctxt->inputNr >= ctxt->inputMax) {
1949 size_t newSize = ctxt->inputMax * 2;
1950 xmlParserInputPtr *tmp;
1951
1952 tmp = (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1953 newSize * sizeof(*tmp));
1954 if (tmp == NULL) {
1955 xmlErrMemory(ctxt);
1956 return (-1);
1957 }
1958 ctxt->inputTab = tmp;
1959 ctxt->inputMax = newSize;
1960 }
1961
1962 if ((ctxt->inputNr == 0) && (value->filename != NULL)) {
1963 directory = xmlParserGetDirectory(value->filename);
1964 if (directory == NULL) {
1965 xmlErrMemory(ctxt);
1966 return(-1);
1967 }
1968 }
1969
1970 ctxt->inputTab[ctxt->inputNr] = value;
1971 ctxt->input = value;
1972
1973 if (ctxt->inputNr == 0) {
1974 xmlFree(ctxt->directory);
1975 ctxt->directory = directory;
1976 }
1977
1978 return(ctxt->inputNr++);
1979}
1980/**
1981 * inputPop:
1982 * @ctxt: an XML parser context
1983 *
1984 * Pops the top parser input from the input stack
1985 *
1986 * Returns the input just removed
1987 */
1988xmlParserInputPtr
1989inputPop(xmlParserCtxtPtr ctxt)
1990{
1991 xmlParserInputPtr ret;
1992
1993 if (ctxt == NULL)
1994 return(NULL);
1995 if (ctxt->inputNr <= 0)
1996 return (NULL);
1997 ctxt->inputNr--;
1998 if (ctxt->inputNr > 0)
1999 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
2000 else
2001 ctxt->input = NULL;
2002 ret = ctxt->inputTab[ctxt->inputNr];
2003 ctxt->inputTab[ctxt->inputNr] = NULL;
2004 return (ret);
2005}
2006/**
2007 * nodePush:
2008 * @ctxt: an XML parser context
2009 * @value: the element node
2010 *
2011 * DEPRECATED: Internal function, do not use.
2012 *
2013 * Pushes a new element node on top of the node stack
2014 *
2015 * Returns -1 in case of error, the index in the stack otherwise
2016 */
2017int
2018nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
2019{
2020 int maxDepth;
2021
2022 if (ctxt == NULL)
2023 return(0);
2024
2025 maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 2048 : 256;
2026 if (ctxt->nodeNr > maxDepth) {
2027 xmlFatalErrMsgInt(ctxt, XML_ERR_RESOURCE_LIMIT,
2028 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
2029 ctxt->nodeNr);
2030 xmlHaltParser(ctxt);
2031 return(-1);
2032 }
2033 if (ctxt->nodeNr >= ctxt->nodeMax) {
2034 xmlNodePtr *tmp;
2035
2036 tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
2037 ctxt->nodeMax * 2 *
2038 sizeof(ctxt->nodeTab[0]));
2039 if (tmp == NULL) {
2040 xmlErrMemory(ctxt);
2041 return (-1);
2042 }
2043 ctxt->nodeTab = tmp;
2044 ctxt->nodeMax *= 2;
2045 }
2046 ctxt->nodeTab[ctxt->nodeNr] = value;
2047 ctxt->node = value;
2048 return (ctxt->nodeNr++);
2049}
2050
2051/**
2052 * nodePop:
2053 * @ctxt: an XML parser context
2054 *
2055 * DEPRECATED: Internal function, do not use.
2056 *
2057 * Pops the top element node from the node stack
2058 *
2059 * Returns the node just removed
2060 */
2061xmlNodePtr
2062nodePop(xmlParserCtxtPtr ctxt)
2063{
2064 xmlNodePtr ret;
2065
2066 if (ctxt == NULL) return(NULL);
2067 if (ctxt->nodeNr <= 0)
2068 return (NULL);
2069 ctxt->nodeNr--;
2070 if (ctxt->nodeNr > 0)
2071 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
2072 else
2073 ctxt->node = NULL;
2074 ret = ctxt->nodeTab[ctxt->nodeNr];
2075 ctxt->nodeTab[ctxt->nodeNr] = NULL;
2076 return (ret);
2077}
2078
2079/**
2080 * nameNsPush:
2081 * @ctxt: an XML parser context
2082 * @value: the element name
2083 * @prefix: the element prefix
2084 * @URI: the element namespace name
2085 * @line: the current line number for error messages
2086 * @nsNr: the number of namespaces pushed on the namespace table
2087 *
2088 * Pushes a new element name/prefix/URL on top of the name stack
2089 *
2090 * Returns -1 in case of error, the index in the stack otherwise
2091 */
2092static int
2093nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
2094 const xmlChar *prefix, const xmlChar *URI, int line, int nsNr)
2095{
2096 xmlStartTag *tag;
2097
2098 if (ctxt->nameNr >= ctxt->nameMax) {
2099 const xmlChar * *tmp;
2100 xmlStartTag *tmp2;
2101 ctxt->nameMax *= 2;
2102 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
2103 ctxt->nameMax *
2104 sizeof(ctxt->nameTab[0]));
2105 if (tmp == NULL) {
2106 ctxt->nameMax /= 2;
2107 goto mem_error;
2108 }
2109 ctxt->nameTab = tmp;
2110 tmp2 = (xmlStartTag *) xmlRealloc((void * *)ctxt->pushTab,
2111 ctxt->nameMax *
2112 sizeof(ctxt->pushTab[0]));
2113 if (tmp2 == NULL) {
2114 ctxt->nameMax /= 2;
2115 goto mem_error;
2116 }
2117 ctxt->pushTab = tmp2;
2118 } else if (ctxt->pushTab == NULL) {
2119 ctxt->pushTab = (xmlStartTag *) xmlMalloc(ctxt->nameMax *
2120 sizeof(ctxt->pushTab[0]));
2121 if (ctxt->pushTab == NULL)
2122 goto mem_error;
2123 }
2124 ctxt->nameTab[ctxt->nameNr] = value;
2125 ctxt->name = value;
2126 tag = &ctxt->pushTab[ctxt->nameNr];
2127 tag->prefix = prefix;
2128 tag->URI = URI;
2129 tag->line = line;
2130 tag->nsNr = nsNr;
2131 return (ctxt->nameNr++);
2132mem_error:
2133 xmlErrMemory(ctxt);
2134 return (-1);
2135}
2136#ifdef LIBXML_PUSH_ENABLED
2137/**
2138 * nameNsPop:
2139 * @ctxt: an XML parser context
2140 *
2141 * Pops the top element/prefix/URI name from the name stack
2142 *
2143 * Returns the name just removed
2144 */
2145static const xmlChar *
2146nameNsPop(xmlParserCtxtPtr ctxt)
2147{
2148 const xmlChar *ret;
2149
2150 if (ctxt->nameNr <= 0)
2151 return (NULL);
2152 ctxt->nameNr--;
2153 if (ctxt->nameNr > 0)
2154 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
2155 else
2156 ctxt->name = NULL;
2157 ret = ctxt->nameTab[ctxt->nameNr];
2158 ctxt->nameTab[ctxt->nameNr] = NULL;
2159 return (ret);
2160}
2161#endif /* LIBXML_PUSH_ENABLED */
2162
2163/**
2164 * namePush:
2165 * @ctxt: an XML parser context
2166 * @value: the element name
2167 *
2168 * DEPRECATED: Internal function, do not use.
2169 *
2170 * Pushes a new element name on top of the name stack
2171 *
2172 * Returns -1 in case of error, the index in the stack otherwise
2173 */
2174int
2175namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
2176{
2177 if (ctxt == NULL) return (-1);
2178
2179 if (ctxt->nameNr >= ctxt->nameMax) {
2180 const xmlChar * *tmp;
2181 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
2182 ctxt->nameMax * 2 *
2183 sizeof(ctxt->nameTab[0]));
2184 if (tmp == NULL) {
2185 goto mem_error;
2186 }
2187 ctxt->nameTab = tmp;
2188 ctxt->nameMax *= 2;
2189 }
2190 ctxt->nameTab[ctxt->nameNr] = value;
2191 ctxt->name = value;
2192 return (ctxt->nameNr++);
2193mem_error:
2194 xmlErrMemory(ctxt);
2195 return (-1);
2196}
2197
2198/**
2199 * namePop:
2200 * @ctxt: an XML parser context
2201 *
2202 * DEPRECATED: Internal function, do not use.
2203 *
2204 * Pops the top element name from the name stack
2205 *
2206 * Returns the name just removed
2207 */
2208const xmlChar *
2209namePop(xmlParserCtxtPtr ctxt)
2210{
2211 const xmlChar *ret;
2212
2213 if ((ctxt == NULL) || (ctxt->nameNr <= 0))
2214 return (NULL);
2215 ctxt->nameNr--;
2216 if (ctxt->nameNr > 0)
2217 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
2218 else
2219 ctxt->name = NULL;
2220 ret = ctxt->nameTab[ctxt->nameNr];
2221 ctxt->nameTab[ctxt->nameNr] = NULL;
2222 return (ret);
2223}
2224
2225static int spacePush(xmlParserCtxtPtr ctxt, int val) {
2226 if (ctxt->spaceNr >= ctxt->spaceMax) {
2227 int *tmp;
2228
2229 ctxt->spaceMax *= 2;
2230 tmp = (int *) xmlRealloc(ctxt->spaceTab,
2231 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
2232 if (tmp == NULL) {
2233 xmlErrMemory(ctxt);
2234 ctxt->spaceMax /=2;
2235 return(-1);
2236 }
2237 ctxt->spaceTab = tmp;
2238 }
2239 ctxt->spaceTab[ctxt->spaceNr] = val;
2240 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
2241 return(ctxt->spaceNr++);
2242}
2243
2244static int spacePop(xmlParserCtxtPtr ctxt) {
2245 int ret;
2246 if (ctxt->spaceNr <= 0) return(0);
2247 ctxt->spaceNr--;
2248 if (ctxt->spaceNr > 0)
2249 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
2250 else
2251 ctxt->space = &ctxt->spaceTab[0];
2252 ret = ctxt->spaceTab[ctxt->spaceNr];
2253 ctxt->spaceTab[ctxt->spaceNr] = -1;
2254 return(ret);
2255}
2256
2257/*
2258 * Macros for accessing the content. Those should be used only by the parser,
2259 * and not exported.
2260 *
2261 * Dirty macros, i.e. one often need to make assumption on the context to
2262 * use them
2263 *
2264 * CUR_PTR return the current pointer to the xmlChar to be parsed.
2265 * To be used with extreme caution since operations consuming
2266 * characters may move the input buffer to a different location !
2267 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
2268 * This should be used internally by the parser
2269 * only to compare to ASCII values otherwise it would break when
2270 * running with UTF-8 encoding.
2271 * RAW same as CUR but in the input buffer, bypass any token
2272 * extraction that may have been done
2273 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
2274 * to compare on ASCII based substring.
2275 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
2276 * strings without newlines within the parser.
2277 * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
2278 * defined char within the parser.
2279 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
2280 *
2281 * NEXT Skip to the next character, this does the proper decoding
2282 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
2283 * NEXTL(l) Skip the current unicode character of l xmlChars long.
2284 * CUR_CHAR(l) returns the current unicode character (int), set l
2285 * to the number of xmlChars used for the encoding [0-5].
2286 * CUR_SCHAR same but operate on a string instead of the context
2287 * COPY_BUF copy the current unicode char to the target buffer, increment
2288 * the index
2289 * GROW, SHRINK handling of input buffers
2290 */
2291
2292#define RAW (*ctxt->input->cur)
2293#define CUR (*ctxt->input->cur)
2294#define NXT(val) ctxt->input->cur[(val)]
2295#define CUR_PTR ctxt->input->cur
2296#define BASE_PTR ctxt->input->base
2297
2298#define CMP4( s, c1, c2, c3, c4 ) \
2299 ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
2300 ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
2301#define CMP5( s, c1, c2, c3, c4, c5 ) \
2302 ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
2303#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
2304 ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
2305#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
2306 ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
2307#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
2308 ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
2309#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
2310 ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
2311 ((unsigned char *) s)[ 8 ] == c9 )
2312#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
2313 ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
2314 ((unsigned char *) s)[ 9 ] == c10 )
2315
2316#define SKIP(val) do { \
2317 ctxt->input->cur += (val),ctxt->input->col+=(val); \
2318 if (*ctxt->input->cur == 0) \
2319 xmlParserGrow(ctxt); \
2320 } while (0)
2321
2322#define SKIPL(val) do { \
2323 int skipl; \
2324 for(skipl=0; skipl<val; skipl++) { \
2325 if (*(ctxt->input->cur) == '\n') { \
2326 ctxt->input->line++; ctxt->input->col = 1; \
2327 } else ctxt->input->col++; \
2328 ctxt->input->cur++; \
2329 } \
2330 if (*ctxt->input->cur == 0) \
2331 xmlParserGrow(ctxt); \
2332 } while (0)
2333
2334#define SHRINK \
2335 if ((!PARSER_PROGRESSIVE(ctxt)) && \
2336 (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
2337 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
2338 xmlParserShrink(ctxt);
2339
2340#define GROW \
2341 if ((!PARSER_PROGRESSIVE(ctxt)) && \
2342 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
2343 xmlParserGrow(ctxt);
2344
2345#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
2346
2347#define SKIP_BLANKS_PE xmlSkipBlankCharsPE(ctxt)
2348
2349#define NEXT xmlNextChar(ctxt)
2350
2351#define NEXT1 { \
2352 ctxt->input->col++; \
2353 ctxt->input->cur++; \
2354 if (*ctxt->input->cur == 0) \
2355 xmlParserGrow(ctxt); \
2356 }
2357
2358#define NEXTL(l) do { \
2359 if (*(ctxt->input->cur) == '\n') { \
2360 ctxt->input->line++; ctxt->input->col = 1; \
2361 } else ctxt->input->col++; \
2362 ctxt->input->cur += l; \
2363 } while (0)
2364
2365#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
2366#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
2367
2368#define COPY_BUF(b, i, v) \
2369 if (v < 0x80) b[i++] = v; \
2370 else i += xmlCopyCharMultiByte(&b[i],v)
2371
2372/**
2373 * xmlSkipBlankChars:
2374 * @ctxt: the XML parser context
2375 *
2376 * DEPRECATED: Internal function, do not use.
2377 *
2378 * Skip whitespace in the input stream.
2379 *
2380 * Returns the number of space chars skipped
2381 */
2382int
2383xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
2384 const xmlChar *cur;
2385 int res = 0;
2386
2387 /*
2388 * It's Okay to use CUR/NEXT here since all the blanks are on
2389 * the ASCII range.
2390 */
2391 cur = ctxt->input->cur;
2392 while (IS_BLANK_CH(*cur)) {
2393 if (*cur == '\n') {
2394 ctxt->input->line++; ctxt->input->col = 1;
2395 } else {
2396 ctxt->input->col++;
2397 }
2398 cur++;
2399 if (res < INT_MAX)
2400 res++;
2401 if (*cur == 0) {
2402 ctxt->input->cur = cur;
2403 xmlParserGrow(ctxt);
2404 cur = ctxt->input->cur;
2405 }
2406 }
2407 ctxt->input->cur = cur;
2408
2409 return(res);
2410}
2411
2412static void
2413xmlPopPE(xmlParserCtxtPtr ctxt) {
2414 unsigned long consumed;
2415 xmlEntityPtr ent;
2416
2417 ent = ctxt->input->entity;
2418
2419 ent->flags &= ~XML_ENT_EXPANDING;
2420
2421 if ((ent->flags & XML_ENT_CHECKED) == 0) {
2422 int result;
2423
2424 /*
2425 * Read the rest of the stream in case of errors. We want
2426 * to account for the whole entity size.
2427 */
2428 do {
2429 ctxt->input->cur = ctxt->input->end;
2430 xmlParserShrink(ctxt);
2431 result = xmlParserGrow(ctxt);
2432 } while (result > 0);
2433
2434 consumed = ctxt->input->consumed;
2435 xmlSaturatedAddSizeT(&consumed,
2436 ctxt->input->end - ctxt->input->base);
2437
2438 xmlSaturatedAdd(&ent->expandedSize, consumed);
2439
2440 /*
2441 * Add to sizeentities when parsing an external entity
2442 * for the first time.
2443 */
2444 if (ent->etype == XML_EXTERNAL_PARAMETER_ENTITY) {
2445 xmlSaturatedAdd(&ctxt->sizeentities, consumed);
2446 }
2447
2448 ent->flags |= XML_ENT_CHECKED;
2449 }
2450
2451 xmlPopInput(ctxt);
2452
2453 xmlParserEntityCheck(ctxt, ent->expandedSize);
2454}
2455
2456/**
2457 * xmlSkipBlankCharsPE:
2458 * @ctxt: the XML parser context
2459 *
2460 * Skip whitespace in the input stream, also handling parameter
2461 * entities.
2462 *
2463 * Returns the number of space chars skipped
2464 */
2465static int
2466xmlSkipBlankCharsPE(xmlParserCtxtPtr ctxt) {
2467 int res = 0;
2468 int inParam;
2469 int expandParam;
2470
2471 inParam = PARSER_IN_PE(ctxt);
2472 expandParam = PARSER_EXTERNAL(ctxt);
2473
2474 if (!inParam && !expandParam)
2475 return(xmlSkipBlankChars(ctxt));
2476
2477 while (PARSER_STOPPED(ctxt) == 0) {
2478 if (IS_BLANK_CH(CUR)) { /* CHECKED tstblanks.xml */
2479 NEXT;
2480 } else if (CUR == '%') {
2481 if ((expandParam == 0) ||
2482 (IS_BLANK_CH(NXT(1))) || (NXT(1) == 0))
2483 break;
2484
2485 /*
2486 * Expand parameter entity. We continue to consume
2487 * whitespace at the start of the entity and possible
2488 * even consume the whole entity and pop it. We might
2489 * even pop multiple PEs in this loop.
2490 */
2491 xmlParsePEReference(ctxt);
2492
2493 inParam = PARSER_IN_PE(ctxt);
2494 expandParam = PARSER_EXTERNAL(ctxt);
2495 } else if (CUR == 0) {
2496 if (inParam == 0)
2497 break;
2498
2499 xmlPopPE(ctxt);
2500
2501 inParam = PARSER_IN_PE(ctxt);
2502 expandParam = PARSER_EXTERNAL(ctxt);
2503 } else {
2504 break;
2505 }
2506
2507 /*
2508 * Also increase the counter when entering or exiting a PERef.
2509 * The spec says: "When a parameter-entity reference is recognized
2510 * in the DTD and included, its replacement text MUST be enlarged
2511 * by the attachment of one leading and one following space (#x20)
2512 * character."
2513 */
2514 if (res < INT_MAX)
2515 res++;
2516 }
2517
2518 return(res);
2519}
2520
2521/************************************************************************
2522 * *
2523 * Commodity functions to handle entities *
2524 * *
2525 ************************************************************************/
2526
2527/**
2528 * xmlPopInput:
2529 * @ctxt: an XML parser context
2530 *
2531 * xmlPopInput: the current input pointed by ctxt->input came to an end
2532 * pop it and return the next char.
2533 *
2534 * Returns the current xmlChar in the parser context
2535 */
2536xmlChar
2537xmlPopInput(xmlParserCtxtPtr ctxt) {
2538 xmlParserInputPtr input;
2539
2540 if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
2541 input = inputPop(ctxt);
2542 xmlFreeInputStream(input);
2543 if (*ctxt->input->cur == 0)
2544 xmlParserGrow(ctxt);
2545 return(CUR);
2546}
2547
2548/**
2549 * xmlPushInput:
2550 * @ctxt: an XML parser context
2551 * @input: an XML parser input fragment (entity, XML fragment ...).
2552 *
2553 * Push an input stream onto the stack.
2554 *
2555 * Returns -1 in case of error or the index in the input stack
2556 */
2557int
2558xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
2559 int maxDepth;
2560 int ret;
2561
2562 if ((ctxt == NULL) || (input == NULL))
2563 return(-1);
2564
2565 maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 40 : 20;
2566 if (ctxt->inputNr > maxDepth) {
2567 xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT,
2568 "Maximum entity nesting depth exceeded");
2569 xmlHaltParser(ctxt);
2570 return(-1);
2571 }
2572 ret = inputPush(ctxt, input);
2573 GROW;
2574 return(ret);
2575}
2576
2577/**
2578 * xmlParseCharRef:
2579 * @ctxt: an XML parser context
2580 *
2581 * DEPRECATED: Internal function, don't use.
2582 *
2583 * Parse a numeric character reference. Always consumes '&'.
2584 *
2585 * [66] CharRef ::= '&#' [0-9]+ ';' |
2586 * '&#x' [0-9a-fA-F]+ ';'
2587 *
2588 * [ WFC: Legal Character ]
2589 * Characters referred to using character references must match the
2590 * production for Char.
2591 *
2592 * Returns the value parsed (as an int), 0 in case of error
2593 */
2594int
2595xmlParseCharRef(xmlParserCtxtPtr ctxt) {
2596 int val = 0;
2597 int count = 0;
2598
2599 /*
2600 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
2601 */
2602 if ((RAW == '&') && (NXT(1) == '#') &&
2603 (NXT(2) == 'x')) {
2604 SKIP(3);
2605 GROW;
2606 while ((RAW != ';') && (PARSER_STOPPED(ctxt) == 0)) {
2607 if (count++ > 20) {
2608 count = 0;
2609 GROW;
2610 }
2611 if ((RAW >= '0') && (RAW <= '9'))
2612 val = val * 16 + (CUR - '0');
2613 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
2614 val = val * 16 + (CUR - 'a') + 10;
2615 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
2616 val = val * 16 + (CUR - 'A') + 10;
2617 else {
2618 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2619 val = 0;
2620 break;
2621 }
2622 if (val > 0x110000)
2623 val = 0x110000;
2624
2625 NEXT;
2626 count++;
2627 }
2628 if (RAW == ';') {
2629 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2630 ctxt->input->col++;
2631 ctxt->input->cur++;
2632 }
2633 } else if ((RAW == '&') && (NXT(1) == '#')) {
2634 SKIP(2);
2635 GROW;
2636 while (RAW != ';') { /* loop blocked by count */
2637 if (count++ > 20) {
2638 count = 0;
2639 GROW;
2640 }
2641 if ((RAW >= '0') && (RAW <= '9'))
2642 val = val * 10 + (CUR - '0');
2643 else {
2644 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2645 val = 0;
2646 break;
2647 }
2648 if (val > 0x110000)
2649 val = 0x110000;
2650
2651 NEXT;
2652 count++;
2653 }
2654 if (RAW == ';') {
2655 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2656 ctxt->input->col++;
2657 ctxt->input->cur++;
2658 }
2659 } else {
2660 if (RAW == '&')
2661 SKIP(1);
2662 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2663 }
2664
2665 /*
2666 * [ WFC: Legal Character ]
2667 * Characters referred to using character references must match the
2668 * production for Char.
2669 */
2670 if (val >= 0x110000) {
2671 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2672 "xmlParseCharRef: character reference out of bounds\n",
2673 val);
2674 } else if (IS_CHAR(val)) {
2675 return(val);
2676 } else {
2677 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2678 "xmlParseCharRef: invalid xmlChar value %d\n",
2679 val);
2680 }
2681 return(0);
2682}
2683
2684/**
2685 * xmlParseStringCharRef:
2686 * @ctxt: an XML parser context
2687 * @str: a pointer to an index in the string
2688 *
2689 * parse Reference declarations, variant parsing from a string rather
2690 * than an an input flow.
2691 *
2692 * [66] CharRef ::= '&#' [0-9]+ ';' |
2693 * '&#x' [0-9a-fA-F]+ ';'
2694 *
2695 * [ WFC: Legal Character ]
2696 * Characters referred to using character references must match the
2697 * production for Char.
2698 *
2699 * Returns the value parsed (as an int), 0 in case of error, str will be
2700 * updated to the current value of the index
2701 */
2702static int
2703xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
2704 const xmlChar *ptr;
2705 xmlChar cur;
2706 int val = 0;
2707
2708 if ((str == NULL) || (*str == NULL)) return(0);
2709 ptr = *str;
2710 cur = *ptr;
2711 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
2712 ptr += 3;
2713 cur = *ptr;
2714 while (cur != ';') { /* Non input consuming loop */
2715 if ((cur >= '0') && (cur <= '9'))
2716 val = val * 16 + (cur - '0');
2717 else if ((cur >= 'a') && (cur <= 'f'))
2718 val = val * 16 + (cur - 'a') + 10;
2719 else if ((cur >= 'A') && (cur <= 'F'))
2720 val = val * 16 + (cur - 'A') + 10;
2721 else {
2722 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2723 val = 0;
2724 break;
2725 }
2726 if (val > 0x110000)
2727 val = 0x110000;
2728
2729 ptr++;
2730 cur = *ptr;
2731 }
2732 if (cur == ';')
2733 ptr++;
2734 } else if ((cur == '&') && (ptr[1] == '#')){
2735 ptr += 2;
2736 cur = *ptr;
2737 while (cur != ';') { /* Non input consuming loops */
2738 if ((cur >= '0') && (cur <= '9'))
2739 val = val * 10 + (cur - '0');
2740 else {
2741 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2742 val = 0;
2743 break;
2744 }
2745 if (val > 0x110000)
2746 val = 0x110000;
2747
2748 ptr++;
2749 cur = *ptr;
2750 }
2751 if (cur == ';')
2752 ptr++;
2753 } else {
2754 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2755 return(0);
2756 }
2757 *str = ptr;
2758
2759 /*
2760 * [ WFC: Legal Character ]
2761 * Characters referred to using character references must match the
2762 * production for Char.
2763 */
2764 if (val >= 0x110000) {
2765 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2766 "xmlParseStringCharRef: character reference out of bounds\n",
2767 val);
2768 } else if (IS_CHAR(val)) {
2769 return(val);
2770 } else {
2771 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2772 "xmlParseStringCharRef: invalid xmlChar value %d\n",
2773 val);
2774 }
2775 return(0);
2776}
2777
2778/**
2779 * xmlParserHandlePEReference:
2780 * @ctxt: the parser context
2781 *
2782 * DEPRECATED: Internal function, do not use.
2783 *
2784 * [69] PEReference ::= '%' Name ';'
2785 *
2786 * [ WFC: No Recursion ]
2787 * A parsed entity must not contain a recursive
2788 * reference to itself, either directly or indirectly.
2789 *
2790 * [ WFC: Entity Declared ]
2791 * In a document without any DTD, a document with only an internal DTD
2792 * subset which contains no parameter entity references, or a document
2793 * with "standalone='yes'", ... ... The declaration of a parameter
2794 * entity must precede any reference to it...
2795 *
2796 * [ VC: Entity Declared ]
2797 * In a document with an external subset or external parameter entities
2798 * with "standalone='no'", ... ... The declaration of a parameter entity
2799 * must precede any reference to it...
2800 *
2801 * [ WFC: In DTD ]
2802 * Parameter-entity references may only appear in the DTD.
2803 * NOTE: misleading but this is handled.
2804 *
2805 * A PEReference may have been detected in the current input stream
2806 * the handling is done accordingly to
2807 * http://www.w3.org/TR/REC-xml#entproc
2808 * i.e.
2809 * - Included in literal in entity values
2810 * - Included as Parameter Entity reference within DTDs
2811 */
2812void
2813xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
2814 xmlParsePEReference(ctxt);
2815}
2816
2817/**
2818 * xmlStringLenDecodeEntities:
2819 * @ctxt: the parser context
2820 * @str: the input string
2821 * @len: the string length
2822 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2823 * @end: an end marker xmlChar, 0 if none
2824 * @end2: an end marker xmlChar, 0 if none
2825 * @end3: an end marker xmlChar, 0 if none
2826 *
2827 * DEPRECATED: Internal function, don't use.
2828 *
2829 * Returns A newly allocated string with the substitution done. The caller
2830 * must deallocate it !
2831 */
2832xmlChar *
2833xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2834 int what ATTRIBUTE_UNUSED,
2835 xmlChar end, xmlChar end2, xmlChar end3) {
2836 if ((ctxt == NULL) || (str == NULL) || (len < 0))
2837 return(NULL);
2838
2839 if ((str[len] != 0) ||
2840 (end != 0) || (end2 != 0) || (end3 != 0))
2841 return(NULL);
2842
2843 return(xmlExpandEntitiesInAttValue(ctxt, str, 0));
2844}
2845
2846/**
2847 * xmlStringDecodeEntities:
2848 * @ctxt: the parser context
2849 * @str: the input string
2850 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2851 * @end: an end marker xmlChar, 0 if none
2852 * @end2: an end marker xmlChar, 0 if none
2853 * @end3: an end marker xmlChar, 0 if none
2854 *
2855 * DEPRECATED: Internal function, don't use.
2856 *
2857 * Returns A newly allocated string with the substitution done. The caller
2858 * must deallocate it !
2859 */
2860xmlChar *
2861xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str,
2862 int what ATTRIBUTE_UNUSED,
2863 xmlChar end, xmlChar end2, xmlChar end3) {
2864 if ((ctxt == NULL) || (str == NULL))
2865 return(NULL);
2866
2867 if ((end != 0) || (end2 != 0) || (end3 != 0))
2868 return(NULL);
2869
2870 return(xmlExpandEntitiesInAttValue(ctxt, str, 0));
2871}
2872
2873/************************************************************************
2874 * *
2875 * Commodity functions, cleanup needed ? *
2876 * *
2877 ************************************************************************/
2878
2879/**
2880 * areBlanks:
2881 * @ctxt: an XML parser context
2882 * @str: a xmlChar *
2883 * @len: the size of @str
2884 * @blank_chars: we know the chars are blanks
2885 *
2886 * Is this a sequence of blank chars that one can ignore ?
2887 *
2888 * Returns 1 if ignorable 0 otherwise.
2889 */
2890
2891static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2892 int blank_chars) {
2893 int i;
2894 xmlNodePtr lastChild;
2895
2896 /*
2897 * Don't spend time trying to differentiate them, the same callback is
2898 * used !
2899 */
2900 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
2901 return(0);
2902
2903 /*
2904 * Check for xml:space value.
2905 */
2906 if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2907 (*(ctxt->space) == -2))
2908 return(0);
2909
2910 /*
2911 * Check that the string is made of blanks
2912 */
2913 if (blank_chars == 0) {
2914 for (i = 0;i < len;i++)
2915 if (!(IS_BLANK_CH(str[i]))) return(0);
2916 }
2917
2918 /*
2919 * Look if the element is mixed content in the DTD if available
2920 */
2921 if (ctxt->node == NULL) return(0);
2922 if (ctxt->myDoc != NULL) {
2923 xmlElementPtr elemDecl = NULL;
2924 xmlDocPtr doc = ctxt->myDoc;
2925 const xmlChar *prefix = NULL;
2926
2927 if (ctxt->node->ns)
2928 prefix = ctxt->node->ns->prefix;
2929 if (doc->intSubset != NULL)
2930 elemDecl = xmlHashLookup2(doc->intSubset->elements, ctxt->node->name,
2931 prefix);
2932 if ((elemDecl == NULL) && (doc->extSubset != NULL))
2933 elemDecl = xmlHashLookup2(doc->extSubset->elements, ctxt->node->name,
2934 prefix);
2935 if (elemDecl != NULL) {
2936 if (elemDecl->etype == XML_ELEMENT_TYPE_ELEMENT)
2937 return(1);
2938 if ((elemDecl->etype == XML_ELEMENT_TYPE_ANY) ||
2939 (elemDecl->etype == XML_ELEMENT_TYPE_MIXED))
2940 return(0);
2941 }
2942 }
2943
2944 /*
2945 * Otherwise, heuristic :-\
2946 */
2947 if ((RAW != '<') && (RAW != 0xD)) return(0);
2948 if ((ctxt->node->children == NULL) &&
2949 (RAW == '<') && (NXT(1) == '/')) return(0);
2950
2951 lastChild = xmlGetLastChild(ctxt->node);
2952 if (lastChild == NULL) {
2953 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2954 (ctxt->node->content != NULL)) return(0);
2955 } else if (xmlNodeIsText(lastChild))
2956 return(0);
2957 else if ((ctxt->node->children != NULL) &&
2958 (xmlNodeIsText(ctxt->node->children)))
2959 return(0);
2960 return(1);
2961}
2962
2963/************************************************************************
2964 * *
2965 * Extra stuff for namespace support *
2966 * Relates to http://www.w3.org/TR/WD-xml-names *
2967 * *
2968 ************************************************************************/
2969
2970/**
2971 * xmlSplitQName:
2972 * @ctxt: an XML parser context
2973 * @name: an XML parser context
2974 * @prefixOut: a xmlChar **
2975 *
2976 * parse an UTF8 encoded XML qualified name string
2977 *
2978 * [NS 5] QName ::= (Prefix ':')? LocalPart
2979 *
2980 * [NS 6] Prefix ::= NCName
2981 *
2982 * [NS 7] LocalPart ::= NCName
2983 *
2984 * Returns the local part, and prefix is updated
2985 * to get the Prefix if any.
2986 */
2987
2988xmlChar *
2989xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefixOut) {
2990 xmlChar buf[XML_MAX_NAMELEN + 5];
2991 xmlChar *buffer = NULL;
2992 int len = 0;
2993 int max = XML_MAX_NAMELEN;
2994 xmlChar *ret = NULL;
2995 xmlChar *prefix;
2996 const xmlChar *cur = name;
2997 int c;
2998
2999 if (prefixOut == NULL) return(NULL);
3000 *prefixOut = NULL;
3001
3002 if (cur == NULL) return(NULL);
3003
3004 /* nasty but well=formed */
3005 if (cur[0] == ':')
3006 return(xmlStrdup(name));
3007
3008 c = *cur++;
3009 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
3010 buf[len++] = c;
3011 c = *cur++;
3012 }
3013 if (len >= max) {
3014 /*
3015 * Okay someone managed to make a huge name, so he's ready to pay
3016 * for the processing speed.
3017 */
3018 max = len * 2;
3019
3020 buffer = (xmlChar *) xmlMallocAtomic(max);
3021 if (buffer == NULL) {
3022 xmlErrMemory(ctxt);
3023 return(NULL);
3024 }
3025 memcpy(buffer, buf, len);
3026 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
3027 if (len + 10 > max) {
3028 xmlChar *tmp;
3029
3030 max *= 2;
3031 tmp = (xmlChar *) xmlRealloc(buffer, max);
3032 if (tmp == NULL) {
3033 xmlFree(buffer);
3034 xmlErrMemory(ctxt);
3035 return(NULL);
3036 }
3037 buffer = tmp;
3038 }
3039 buffer[len++] = c;
3040 c = *cur++;
3041 }
3042 buffer[len] = 0;
3043 }
3044
3045 if ((c == ':') && (*cur == 0)) {
3046 if (buffer != NULL)
3047 xmlFree(buffer);
3048 return(xmlStrdup(name));
3049 }
3050
3051 if (buffer == NULL) {
3052 ret = xmlStrndup(buf, len);
3053 if (ret == NULL) {
3054 xmlErrMemory(ctxt);
3055 return(NULL);
3056 }
3057 } else {
3058 ret = buffer;
3059 buffer = NULL;
3060 max = XML_MAX_NAMELEN;
3061 }
3062
3063
3064 if (c == ':') {
3065 c = *cur;
3066 prefix = ret;
3067 if (c == 0) {
3068 ret = xmlStrndup(BAD_CAST "", 0);
3069 if (ret == NULL) {
3070 xmlFree(prefix);
3071 return(NULL);
3072 }
3073 *prefixOut = prefix;
3074 return(ret);
3075 }
3076 len = 0;
3077
3078 /*
3079 * Check that the first character is proper to start
3080 * a new name
3081 */
3082 if (!(((c >= 0x61) && (c <= 0x7A)) ||
3083 ((c >= 0x41) && (c <= 0x5A)) ||
3084 (c == '_') || (c == ':'))) {
3085 int l;
3086 int first = CUR_SCHAR(cur, l);
3087
3088 if (!IS_LETTER(first) && (first != '_')) {
3089 xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
3090 "Name %s is not XML Namespace compliant\n",
3091 name);
3092 }
3093 }
3094 cur++;
3095
3096 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
3097 buf[len++] = c;
3098 c = *cur++;
3099 }
3100 if (len >= max) {
3101 /*
3102 * Okay someone managed to make a huge name, so he's ready to pay
3103 * for the processing speed.
3104 */
3105 max = len * 2;
3106
3107 buffer = (xmlChar *) xmlMallocAtomic(max);
3108 if (buffer == NULL) {
3109 xmlErrMemory(ctxt);
3110 xmlFree(prefix);
3111 return(NULL);
3112 }
3113 memcpy(buffer, buf, len);
3114 while (c != 0) { /* tested bigname2.xml */
3115 if (len + 10 > max) {
3116 xmlChar *tmp;
3117
3118 max *= 2;
3119 tmp = (xmlChar *) xmlRealloc(buffer, max);
3120 if (tmp == NULL) {
3121 xmlErrMemory(ctxt);
3122 xmlFree(prefix);
3123 xmlFree(buffer);
3124 return(NULL);
3125 }
3126 buffer = tmp;
3127 }
3128 buffer[len++] = c;
3129 c = *cur++;
3130 }
3131 buffer[len] = 0;
3132 }
3133
3134 if (buffer == NULL) {
3135 ret = xmlStrndup(buf, len);
3136 if (ret == NULL) {
3137 xmlFree(prefix);
3138 return(NULL);
3139 }
3140 } else {
3141 ret = buffer;
3142 }
3143
3144 *prefixOut = prefix;
3145 }
3146
3147 return(ret);
3148}
3149
3150/************************************************************************
3151 * *
3152 * The parser itself *
3153 * Relates to http://www.w3.org/TR/REC-xml *
3154 * *
3155 ************************************************************************/
3156
3157/************************************************************************
3158 * *
3159 * Routines to parse Name, NCName and NmToken *
3160 * *
3161 ************************************************************************/
3162
3163/*
3164 * The two following functions are related to the change of accepted
3165 * characters for Name and NmToken in the Revision 5 of XML-1.0
3166 * They correspond to the modified production [4] and the new production [4a]
3167 * changes in that revision. Also note that the macros used for the
3168 * productions Letter, Digit, CombiningChar and Extender are not needed
3169 * anymore.
3170 * We still keep compatibility to pre-revision5 parsing semantic if the
3171 * new XML_PARSE_OLD10 option is given to the parser.
3172 */
3173static int
3174xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) {
3175 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3176 /*
3177 * Use the new checks of production [4] [4a] amd [5] of the
3178 * Update 5 of XML-1.0
3179 */
3180 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3181 (((c >= 'a') && (c <= 'z')) ||
3182 ((c >= 'A') && (c <= 'Z')) ||
3183 (c == '_') || (c == ':') ||
3184 ((c >= 0xC0) && (c <= 0xD6)) ||
3185 ((c >= 0xD8) && (c <= 0xF6)) ||
3186 ((c >= 0xF8) && (c <= 0x2FF)) ||
3187 ((c >= 0x370) && (c <= 0x37D)) ||
3188 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3189 ((c >= 0x200C) && (c <= 0x200D)) ||
3190 ((c >= 0x2070) && (c <= 0x218F)) ||
3191 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3192 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3193 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3194 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3195 ((c >= 0x10000) && (c <= 0xEFFFF))))
3196 return(1);
3197 } else {
3198 if (IS_LETTER(c) || (c == '_') || (c == ':'))
3199 return(1);
3200 }
3201 return(0);
3202}
3203
3204static int
3205xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) {
3206 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3207 /*
3208 * Use the new checks of production [4] [4a] amd [5] of the
3209 * Update 5 of XML-1.0
3210 */
3211 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3212 (((c >= 'a') && (c <= 'z')) ||
3213 ((c >= 'A') && (c <= 'Z')) ||
3214 ((c >= '0') && (c <= '9')) || /* !start */
3215 (c == '_') || (c == ':') ||
3216 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3217 ((c >= 0xC0) && (c <= 0xD6)) ||
3218 ((c >= 0xD8) && (c <= 0xF6)) ||
3219 ((c >= 0xF8) && (c <= 0x2FF)) ||
3220 ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3221 ((c >= 0x370) && (c <= 0x37D)) ||
3222 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3223 ((c >= 0x200C) && (c <= 0x200D)) ||
3224 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3225 ((c >= 0x2070) && (c <= 0x218F)) ||
3226 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3227 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3228 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3229 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3230 ((c >= 0x10000) && (c <= 0xEFFFF))))
3231 return(1);
3232 } else {
3233 if ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3234 (c == '.') || (c == '-') ||
3235 (c == '_') || (c == ':') ||
3236 (IS_COMBINING(c)) ||
3237 (IS_EXTENDER(c)))
3238 return(1);
3239 }
3240 return(0);
3241}
3242
3243static const xmlChar *
3244xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
3245 const xmlChar *ret;
3246 int len = 0, l;
3247 int c;
3248 int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3249 XML_MAX_TEXT_LENGTH :
3250 XML_MAX_NAME_LENGTH;
3251
3252 /*
3253 * Handler for more complex cases
3254 */
3255 c = CUR_CHAR(l);
3256 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3257 /*
3258 * Use the new checks of production [4] [4a] amd [5] of the
3259 * Update 5 of XML-1.0
3260 */
3261 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3262 (!(((c >= 'a') && (c <= 'z')) ||
3263 ((c >= 'A') && (c <= 'Z')) ||
3264 (c == '_') || (c == ':') ||
3265 ((c >= 0xC0) && (c <= 0xD6)) ||
3266 ((c >= 0xD8) && (c <= 0xF6)) ||
3267 ((c >= 0xF8) && (c <= 0x2FF)) ||
3268 ((c >= 0x370) && (c <= 0x37D)) ||
3269 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3270 ((c >= 0x200C) && (c <= 0x200D)) ||
3271 ((c >= 0x2070) && (c <= 0x218F)) ||
3272 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3273 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3274 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3275 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3276 ((c >= 0x10000) && (c <= 0xEFFFF))))) {
3277 return(NULL);
3278 }
3279 len += l;
3280 NEXTL(l);
3281 c = CUR_CHAR(l);
3282 while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3283 (((c >= 'a') && (c <= 'z')) ||
3284 ((c >= 'A') && (c <= 'Z')) ||
3285 ((c >= '0') && (c <= '9')) || /* !start */
3286 (c == '_') || (c == ':') ||
3287 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3288 ((c >= 0xC0) && (c <= 0xD6)) ||
3289 ((c >= 0xD8) && (c <= 0xF6)) ||
3290 ((c >= 0xF8) && (c <= 0x2FF)) ||
3291 ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3292 ((c >= 0x370) && (c <= 0x37D)) ||
3293 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3294 ((c >= 0x200C) && (c <= 0x200D)) ||
3295 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3296 ((c >= 0x2070) && (c <= 0x218F)) ||
3297 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3298 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3299 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3300 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3301 ((c >= 0x10000) && (c <= 0xEFFFF))
3302 )) {
3303 if (len <= INT_MAX - l)
3304 len += l;
3305 NEXTL(l);
3306 c = CUR_CHAR(l);
3307 }
3308 } else {
3309 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3310 (!IS_LETTER(c) && (c != '_') &&
3311 (c != ':'))) {
3312 return(NULL);
3313 }
3314 len += l;
3315 NEXTL(l);
3316 c = CUR_CHAR(l);
3317
3318 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3319 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3320 (c == '.') || (c == '-') ||
3321 (c == '_') || (c == ':') ||
3322 (IS_COMBINING(c)) ||
3323 (IS_EXTENDER(c)))) {
3324 if (len <= INT_MAX - l)
3325 len += l;
3326 NEXTL(l);
3327 c = CUR_CHAR(l);
3328 }
3329 }
3330 if (len > maxLength) {
3331 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3332 return(NULL);
3333 }
3334 if (ctxt->input->cur - ctxt->input->base < len) {
3335 /*
3336 * There were a couple of bugs where PERefs lead to to a change
3337 * of the buffer. Check the buffer size to avoid passing an invalid
3338 * pointer to xmlDictLookup.
3339 */
3340 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
3341 "unexpected change of input buffer");
3342 return (NULL);
3343 }
3344 if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
3345 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len);
3346 else
3347 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len);
3348 if (ret == NULL)
3349 xmlErrMemory(ctxt);
3350 return(ret);
3351}
3352
3353/**
3354 * xmlParseName:
3355 * @ctxt: an XML parser context
3356 *
3357 * DEPRECATED: Internal function, don't use.
3358 *
3359 * parse an XML name.
3360 *
3361 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3362 * CombiningChar | Extender
3363 *
3364 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3365 *
3366 * [6] Names ::= Name (#x20 Name)*
3367 *
3368 * Returns the Name parsed or NULL
3369 */
3370
3371const xmlChar *
3372xmlParseName(xmlParserCtxtPtr ctxt) {
3373 const xmlChar *in;
3374 const xmlChar *ret;
3375 size_t count = 0;
3376 size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3377 XML_MAX_TEXT_LENGTH :
3378 XML_MAX_NAME_LENGTH;
3379
3380 GROW;
3381
3382 /*
3383 * Accelerator for simple ASCII names
3384 */
3385 in = ctxt->input->cur;
3386 if (((*in >= 0x61) && (*in <= 0x7A)) ||
3387 ((*in >= 0x41) && (*in <= 0x5A)) ||
3388 (*in == '_') || (*in == ':')) {
3389 in++;
3390 while (((*in >= 0x61) && (*in <= 0x7A)) ||
3391 ((*in >= 0x41) && (*in <= 0x5A)) ||
3392 ((*in >= 0x30) && (*in <= 0x39)) ||
3393 (*in == '_') || (*in == '-') ||
3394 (*in == ':') || (*in == '.'))
3395 in++;
3396 if ((*in > 0) && (*in < 0x80)) {
3397 count = in - ctxt->input->cur;
3398 if (count > maxLength) {
3399 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3400 return(NULL);
3401 }
3402 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3403 ctxt->input->cur = in;
3404 ctxt->input->col += count;
3405 if (ret == NULL)
3406 xmlErrMemory(ctxt);
3407 return(ret);
3408 }
3409 }
3410 /* accelerator for special cases */
3411 return(xmlParseNameComplex(ctxt));
3412}
3413
3414static xmlHashedString
3415xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
3416 xmlHashedString ret;
3417 int len = 0, l;
3418 int c;
3419 int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3420 XML_MAX_TEXT_LENGTH :
3421 XML_MAX_NAME_LENGTH;
3422 size_t startPosition = 0;
3423
3424 ret.name = NULL;
3425 ret.hashValue = 0;
3426
3427 /*
3428 * Handler for more complex cases
3429 */
3430 startPosition = CUR_PTR - BASE_PTR;
3431 c = CUR_CHAR(l);
3432 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3433 (!xmlIsNameStartChar(ctxt, c) || (c == ':'))) {
3434 return(ret);
3435 }
3436
3437 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3438 (xmlIsNameChar(ctxt, c) && (c != ':'))) {
3439 if (len <= INT_MAX - l)
3440 len += l;
3441 NEXTL(l);
3442 c = CUR_CHAR(l);
3443 }
3444 if (len > maxLength) {
3445 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3446 return(ret);
3447 }
3448 ret = xmlDictLookupHashed(ctxt->dict, (BASE_PTR + startPosition), len);
3449 if (ret.name == NULL)
3450 xmlErrMemory(ctxt);
3451 return(ret);
3452}
3453
3454/**
3455 * xmlParseNCName:
3456 * @ctxt: an XML parser context
3457 * @len: length of the string parsed
3458 *
3459 * parse an XML name.
3460 *
3461 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3462 * CombiningChar | Extender
3463 *
3464 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
3465 *
3466 * Returns the Name parsed or NULL
3467 */
3468
3469static xmlHashedString
3470xmlParseNCName(xmlParserCtxtPtr ctxt) {
3471 const xmlChar *in, *e;
3472 xmlHashedString ret;
3473 size_t count = 0;
3474 size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3475 XML_MAX_TEXT_LENGTH :
3476 XML_MAX_NAME_LENGTH;
3477
3478 ret.name = NULL;
3479
3480 /*
3481 * Accelerator for simple ASCII names
3482 */
3483 in = ctxt->input->cur;
3484 e = ctxt->input->end;
3485 if ((((*in >= 0x61) && (*in <= 0x7A)) ||
3486 ((*in >= 0x41) && (*in <= 0x5A)) ||
3487 (*in == '_')) && (in < e)) {
3488 in++;
3489 while ((((*in >= 0x61) && (*in <= 0x7A)) ||
3490 ((*in >= 0x41) && (*in <= 0x5A)) ||
3491 ((*in >= 0x30) && (*in <= 0x39)) ||
3492 (*in == '_') || (*in == '-') ||
3493 (*in == '.')) && (in < e))
3494 in++;
3495 if (in >= e)
3496 goto complex;
3497 if ((*in > 0) && (*in < 0x80)) {
3498 count = in - ctxt->input->cur;
3499 if (count > maxLength) {
3500 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3501 return(ret);
3502 }
3503 ret = xmlDictLookupHashed(ctxt->dict, ctxt->input->cur, count);
3504 ctxt->input->cur = in;
3505 ctxt->input->col += count;
3506 if (ret.name == NULL) {
3507 xmlErrMemory(ctxt);
3508 }
3509 return(ret);
3510 }
3511 }
3512complex:
3513 return(xmlParseNCNameComplex(ctxt));
3514}
3515
3516/**
3517 * xmlParseNameAndCompare:
3518 * @ctxt: an XML parser context
3519 *
3520 * parse an XML name and compares for match
3521 * (specialized for endtag parsing)
3522 *
3523 * Returns NULL for an illegal name, (xmlChar*) 1 for success
3524 * and the name for mismatch
3525 */
3526
3527static const xmlChar *
3528xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
3529 register const xmlChar *cmp = other;
3530 register const xmlChar *in;
3531 const xmlChar *ret;
3532
3533 GROW;
3534
3535 in = ctxt->input->cur;
3536 while (*in != 0 && *in == *cmp) {
3537 ++in;
3538 ++cmp;
3539 }
3540 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
3541 /* success */
3542 ctxt->input->col += in - ctxt->input->cur;
3543 ctxt->input->cur = in;
3544 return (const xmlChar*) 1;
3545 }
3546 /* failure (or end of input buffer), check with full function */
3547 ret = xmlParseName (ctxt);
3548 /* strings coming from the dictionary direct compare possible */
3549 if (ret == other) {
3550 return (const xmlChar*) 1;
3551 }
3552 return ret;
3553}
3554
3555/**
3556 * xmlParseStringName:
3557 * @ctxt: an XML parser context
3558 * @str: a pointer to the string pointer (IN/OUT)
3559 *
3560 * parse an XML name.
3561 *
3562 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3563 * CombiningChar | Extender
3564 *
3565 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3566 *
3567 * [6] Names ::= Name (#x20 Name)*
3568 *
3569 * Returns the Name parsed or NULL. The @str pointer
3570 * is updated to the current location in the string.
3571 */
3572
3573static xmlChar *
3574xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
3575 xmlChar buf[XML_MAX_NAMELEN + 5];
3576 xmlChar *ret;
3577 const xmlChar *cur = *str;
3578 int len = 0, l;
3579 int c;
3580 int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3581 XML_MAX_TEXT_LENGTH :
3582 XML_MAX_NAME_LENGTH;
3583
3584 c = CUR_SCHAR(cur, l);
3585 if (!xmlIsNameStartChar(ctxt, c)) {
3586 return(NULL);
3587 }
3588
3589 COPY_BUF(buf, len, c);
3590 cur += l;
3591 c = CUR_SCHAR(cur, l);
3592 while (xmlIsNameChar(ctxt, c)) {
3593 COPY_BUF(buf, len, c);
3594 cur += l;
3595 c = CUR_SCHAR(cur, l);
3596 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
3597 /*
3598 * Okay someone managed to make a huge name, so he's ready to pay
3599 * for the processing speed.
3600 */
3601 xmlChar *buffer;
3602 int max = len * 2;
3603
3604 buffer = (xmlChar *) xmlMallocAtomic(max);
3605 if (buffer == NULL) {
3606 xmlErrMemory(ctxt);
3607 return(NULL);
3608 }
3609 memcpy(buffer, buf, len);
3610 while (xmlIsNameChar(ctxt, c)) {
3611 if (len + 10 > max) {
3612 xmlChar *tmp;
3613
3614 max *= 2;
3615 tmp = (xmlChar *) xmlRealloc(buffer, max);
3616 if (tmp == NULL) {
3617 xmlErrMemory(ctxt);
3618 xmlFree(buffer);
3619 return(NULL);
3620 }
3621 buffer = tmp;
3622 }
3623 COPY_BUF(buffer, len, c);
3624 cur += l;
3625 c = CUR_SCHAR(cur, l);
3626 if (len > maxLength) {
3627 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3628 xmlFree(buffer);
3629 return(NULL);
3630 }
3631 }
3632 buffer[len] = 0;
3633 *str = cur;
3634 return(buffer);
3635 }
3636 }
3637 if (len > maxLength) {
3638 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3639 return(NULL);
3640 }
3641 *str = cur;
3642 ret = xmlStrndup(buf, len);
3643 if (ret == NULL)
3644 xmlErrMemory(ctxt);
3645 return(ret);
3646}
3647
3648/**
3649 * xmlParseNmtoken:
3650 * @ctxt: an XML parser context
3651 *
3652 * DEPRECATED: Internal function, don't use.
3653 *
3654 * parse an XML Nmtoken.
3655 *
3656 * [7] Nmtoken ::= (NameChar)+
3657 *
3658 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
3659 *
3660 * Returns the Nmtoken parsed or NULL
3661 */
3662
3663xmlChar *
3664xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
3665 xmlChar buf[XML_MAX_NAMELEN + 5];
3666 xmlChar *ret;
3667 int len = 0, l;
3668 int c;
3669 int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3670 XML_MAX_TEXT_LENGTH :
3671 XML_MAX_NAME_LENGTH;
3672
3673 c = CUR_CHAR(l);
3674
3675 while (xmlIsNameChar(ctxt, c)) {
3676 COPY_BUF(buf, len, c);
3677 NEXTL(l);
3678 c = CUR_CHAR(l);
3679 if (len >= XML_MAX_NAMELEN) {
3680 /*
3681 * Okay someone managed to make a huge token, so he's ready to pay
3682 * for the processing speed.
3683 */
3684 xmlChar *buffer;
3685 int max = len * 2;
3686
3687 buffer = (xmlChar *) xmlMallocAtomic(max);
3688 if (buffer == NULL) {
3689 xmlErrMemory(ctxt);
3690 return(NULL);
3691 }
3692 memcpy(buffer, buf, len);
3693 while (xmlIsNameChar(ctxt, c)) {
3694 if (len + 10 > max) {
3695 xmlChar *tmp;
3696
3697 max *= 2;
3698 tmp = (xmlChar *) xmlRealloc(buffer, max);
3699 if (tmp == NULL) {
3700 xmlErrMemory(ctxt);
3701 xmlFree(buffer);
3702 return(NULL);
3703 }
3704 buffer = tmp;
3705 }
3706 COPY_BUF(buffer, len, c);
3707 if (len > maxLength) {
3708 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3709 xmlFree(buffer);
3710 return(NULL);
3711 }
3712 NEXTL(l);
3713 c = CUR_CHAR(l);
3714 }
3715 buffer[len] = 0;
3716 return(buffer);
3717 }
3718 }
3719 if (len == 0)
3720 return(NULL);
3721 if (len > maxLength) {
3722 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3723 return(NULL);
3724 }
3725 ret = xmlStrndup(buf, len);
3726 if (ret == NULL)
3727 xmlErrMemory(ctxt);
3728 return(ret);
3729}
3730
3731/**
3732 * xmlExpandPEsInEntityValue:
3733 * @ctxt: parser context
3734 * @buf: string buffer
3735 * @str: entity value
3736 * @length: size of entity value
3737 * @depth: nesting depth
3738 *
3739 * Validate an entity value and expand parameter entities.
3740 */
3741static void
3742xmlExpandPEsInEntityValue(xmlParserCtxtPtr ctxt, xmlSBuf *buf,
3743 const xmlChar *str, int length, int depth) {
3744 int maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 40 : 20;
3745 const xmlChar *end, *chunk;
3746 int c, l;
3747
3748 if (str == NULL)
3749 return;
3750
3751 depth += 1;
3752 if (depth > maxDepth) {
3753 xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT,
3754 "Maximum entity nesting depth exceeded");
3755 return;
3756 }
3757
3758 end = str + length;
3759 chunk = str;
3760
3761 while ((str < end) && (!PARSER_STOPPED(ctxt))) {
3762 c = *str;
3763
3764 if (c >= 0x80) {
3765 l = xmlUTF8MultibyteLen(ctxt, str,
3766 "invalid character in entity value\n");
3767 if (l == 0) {
3768 if (chunk < str)
3769 xmlSBufAddString(buf, chunk, str - chunk);
3770 xmlSBufAddReplChar(buf);
3771 str += 1;
3772 chunk = str;
3773 } else {
3774 str += l;
3775 }
3776 } else if (c == '&') {
3777 if (str[1] == '#') {
3778 if (chunk < str)
3779 xmlSBufAddString(buf, chunk, str - chunk);
3780
3781 c = xmlParseStringCharRef(ctxt, &str);
3782 if (c == 0)
3783 return;
3784
3785 xmlSBufAddChar(buf, c);
3786
3787 chunk = str;
3788 } else {
3789 xmlChar *name;
3790
3791 /*
3792 * General entity references are checked for
3793 * syntactic validity.
3794 */
3795 str++;
3796 name = xmlParseStringName(ctxt, &str);
3797
3798 if ((name == NULL) || (*str++ != ';')) {
3799 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
3800 "EntityValue: '&' forbidden except for entities "
3801 "references\n");
3802 xmlFree(name);
3803 return;
3804 }
3805
3806 xmlFree(name);
3807 }
3808 } else if (c == '%') {
3809 xmlEntityPtr ent;
3810
3811 if (chunk < str)
3812 xmlSBufAddString(buf, chunk, str - chunk);
3813
3814 ent = xmlParseStringPEReference(ctxt, &str);
3815 if (ent == NULL)
3816 return;
3817
3818 if (!PARSER_EXTERNAL(ctxt)) {
3819 xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
3820 return;
3821 }
3822
3823 if (ent->content == NULL) {
3824 /*
3825 * Note: external parsed entities will not be loaded,
3826 * it is not required for a non-validating parser to
3827 * complete external PEReferences coming from the
3828 * internal subset
3829 */
3830 if (((ctxt->options & XML_PARSE_NO_XXE) == 0) &&
3831 ((ctxt->replaceEntities) ||
3832 (ctxt->validate))) {
3833 xmlLoadEntityContent(ctxt, ent);
3834 } else {
3835 xmlWarningMsg(ctxt, XML_ERR_ENTITY_PROCESSING,
3836 "not validating will not read content for "
3837 "PE entity %s\n", ent->name, NULL);
3838 }
3839 }
3840
3841 /*
3842 * TODO: Skip if ent->content is still NULL.
3843 */
3844
3845 if (xmlParserEntityCheck(ctxt, ent->length))
3846 return;
3847
3848 if (ent->flags & XML_ENT_EXPANDING) {
3849 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
3850 xmlHaltParser(ctxt);
3851 return;
3852 }
3853
3854 ent->flags |= XML_ENT_EXPANDING;
3855 xmlExpandPEsInEntityValue(ctxt, buf, ent->content, ent->length,
3856 depth);
3857 ent->flags &= ~XML_ENT_EXPANDING;
3858
3859 chunk = str;
3860 } else {
3861 /* Normal ASCII char */
3862 if (!IS_BYTE_CHAR(c)) {
3863 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
3864 "invalid character in entity value\n");
3865 if (chunk < str)
3866 xmlSBufAddString(buf, chunk, str - chunk);
3867 xmlSBufAddReplChar(buf);
3868 str += 1;
3869 chunk = str;
3870 } else {
3871 str += 1;
3872 }
3873 }
3874 }
3875
3876 if (chunk < str)
3877 xmlSBufAddString(buf, chunk, str - chunk);
3878
3879 return;
3880}
3881
3882/**
3883 * xmlParseEntityValue:
3884 * @ctxt: an XML parser context
3885 * @orig: if non-NULL store a copy of the original entity value
3886 *
3887 * DEPRECATED: Internal function, don't use.
3888 *
3889 * parse a value for ENTITY declarations
3890 *
3891 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3892 * "'" ([^%&'] | PEReference | Reference)* "'"
3893 *
3894 * Returns the EntityValue parsed with reference substituted or NULL
3895 */
3896xmlChar *
3897xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
3898 unsigned maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3899 XML_MAX_HUGE_LENGTH :
3900 XML_MAX_TEXT_LENGTH;
3901 xmlSBuf buf;
3902 const xmlChar *start;
3903 int quote, length;
3904
3905 xmlSBufInit(&buf, maxLength);
3906
3907 GROW;
3908
3909 quote = CUR;
3910 if ((quote != '"') && (quote != '\'')) {
3911 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
3912 return(NULL);
3913 }
3914 CUR_PTR++;
3915
3916 length = 0;
3917
3918 /*
3919 * Copy raw content of the entity into a buffer
3920 */
3921 while (1) {
3922 int c;
3923
3924 if (PARSER_STOPPED(ctxt))
3925 goto error;
3926
3927 if (CUR_PTR >= ctxt->input->end) {
3928 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
3929 goto error;
3930 }
3931
3932 c = CUR;
3933
3934 if (c == 0) {
3935 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
3936 "invalid character in entity value\n");
3937 goto error;
3938 }
3939 if (c == quote)
3940 break;
3941 NEXTL(1);
3942 length += 1;
3943
3944 /*
3945 * TODO: Check growth threshold
3946 */
3947 if (ctxt->input->end - CUR_PTR < 10)
3948 GROW;
3949 }
3950
3951 start = CUR_PTR - length;
3952
3953 if (orig != NULL) {
3954 *orig = xmlStrndup(start, length);
3955 if (*orig == NULL)
3956 xmlErrMemory(ctxt);
3957 }
3958
3959 xmlExpandPEsInEntityValue(ctxt, &buf, start, length, ctxt->inputNr);
3960
3961 NEXTL(1);
3962
3963 return(xmlSBufFinish(&buf, NULL, ctxt, "entity length too long"));
3964
3965error:
3966 xmlSBufCleanup(&buf, ctxt, "entity length too long");
3967 return(NULL);
3968}
3969
3970/**
3971 * xmlCheckEntityInAttValue:
3972 * @ctxt: parser context
3973 * @pent: entity
3974 * @depth: nesting depth
3975 *
3976 * Check an entity reference in an attribute value for validity
3977 * without expanding it.
3978 */
3979static void
3980xmlCheckEntityInAttValue(xmlParserCtxtPtr ctxt, xmlEntityPtr pent, int depth) {
3981 int maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 40 : 20;
3982 const xmlChar *str;
3983 unsigned long expandedSize = pent->length;
3984 int c, flags;
3985
3986 depth += 1;
3987 if (depth > maxDepth) {
3988 xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT,
3989 "Maximum entity nesting depth exceeded");
3990 return;
3991 }
3992
3993 if (pent->flags & XML_ENT_EXPANDING) {
3994 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
3995 xmlHaltParser(ctxt);
3996 return;
3997 }
3998
3999 /*
4000 * If we're parsing a default attribute value in DTD content,
4001 * the entity might reference other entities which weren't
4002 * defined yet, so the check isn't reliable.
4003 */
4004 if (ctxt->inSubset == 0)
4005 flags = XML_ENT_CHECKED | XML_ENT_VALIDATED;
4006 else
4007 flags = XML_ENT_VALIDATED;
4008
4009 str = pent->content;
4010 if (str == NULL)
4011 goto done;
4012
4013 /*
4014 * Note that entity values are already validated. We only check
4015 * for illegal less-than signs and compute the expanded size
4016 * of the entity. No special handling for multi-byte characters
4017 * is needed.
4018 */
4019 while (!PARSER_STOPPED(ctxt)) {
4020 c = *str;
4021
4022 if (c != '&') {
4023 if (c == 0)
4024 break;
4025
4026 if (c == '<')
4027 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
4028 "'<' in entity '%s' is not allowed in attributes "
4029 "values\n", pent->name);
4030
4031 str += 1;
4032 } else if (str[1] == '#') {
4033 int val;
4034
4035 val = xmlParseStringCharRef(ctxt, &str);
4036 if (val == 0) {
4037 pent->content[0] = 0;
4038 break;
4039 }
4040 } else {
4041 xmlChar *name;
4042 xmlEntityPtr ent;
4043
4044 name = xmlParseStringEntityRef(ctxt, &str);
4045 if (name == NULL) {
4046 pent->content[0] = 0;
4047 break;
4048 }
4049
4050 ent = xmlLookupGeneralEntity(ctxt, name, /* inAttr */ 1);
4051 xmlFree(name);
4052
4053 if ((ent != NULL) &&
4054 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
4055 if ((ent->flags & flags) != flags) {
4056 pent->flags |= XML_ENT_EXPANDING;
4057 xmlCheckEntityInAttValue(ctxt, ent, depth);
4058 pent->flags &= ~XML_ENT_EXPANDING;
4059 }
4060
4061 xmlSaturatedAdd(&expandedSize, ent->expandedSize);
4062 xmlSaturatedAdd(&expandedSize, XML_ENT_FIXED_COST);
4063 }
4064 }
4065 }
4066
4067done:
4068 if (ctxt->inSubset == 0)
4069 pent->expandedSize = expandedSize;
4070
4071 pent->flags |= flags;
4072}
4073
4074/**
4075 * xmlExpandEntityInAttValue:
4076 * @ctxt: parser context
4077 * @buf: string buffer
4078 * @str: entity or attribute value
4079 * @pent: entity for entity value, NULL for attribute values
4080 * @normalize: whether to collapse whitespace
4081 * @inSpace: whitespace state
4082 * @depth: nesting depth
4083 * @check: whether to check for amplification
4084 *
4085 * Expand general entity references in an entity or attribute value.
4086 * Perform attribute value normalization.
4087 */
4088static void
4089xmlExpandEntityInAttValue(xmlParserCtxtPtr ctxt, xmlSBuf *buf,
4090 const xmlChar *str, xmlEntityPtr pent, int normalize,
4091 int *inSpace, int depth, int check) {
4092 int maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 40 : 20;
4093 int c, chunkSize;
4094
4095 if (str == NULL)
4096 return;
4097
4098 depth += 1;
4099 if (depth > maxDepth) {
4100 xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT,
4101 "Maximum entity nesting depth exceeded");
4102 return;
4103 }
4104
4105 if (pent != NULL) {
4106 if (pent->flags & XML_ENT_EXPANDING) {
4107 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
4108 xmlHaltParser(ctxt);
4109 return;
4110 }
4111
4112 if (check) {
4113 if (xmlParserEntityCheck(ctxt, pent->length))
4114 return;
4115 }
4116 }
4117
4118 chunkSize = 0;
4119
4120 /*
4121 * Note that entity values are already validated. No special
4122 * handling for multi-byte characters is needed.
4123 */
4124 while (!PARSER_STOPPED(ctxt)) {
4125 c = *str;
4126
4127 if (c != '&') {
4128 if (c == 0)
4129 break;
4130
4131 /*
4132 * If this function is called without an entity, it is used to
4133 * expand entities in an attribute content where less-than was
4134 * already unscaped and is allowed.
4135 */
4136 if ((pent != NULL) && (c == '<')) {
4137 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
4138 "'<' in entity '%s' is not allowed in attributes "
4139 "values\n", pent->name);
4140 break;
4141 }
4142
4143 if (c <= 0x20) {
4144 if ((normalize) && (*inSpace)) {
4145 /* Skip char */
4146 if (chunkSize > 0) {
4147 xmlSBufAddString(buf, str - chunkSize, chunkSize);
4148 chunkSize = 0;
4149 }
4150 } else if (c < 0x20) {
4151 if (chunkSize > 0) {
4152 xmlSBufAddString(buf, str - chunkSize, chunkSize);
4153 chunkSize = 0;
4154 }
4155
4156 xmlSBufAddCString(buf, " ", 1);
4157 } else {
4158 chunkSize += 1;
4159 }
4160
4161 *inSpace = 1;
4162 } else {
4163 chunkSize += 1;
4164 *inSpace = 0;
4165 }
4166
4167 str += 1;
4168 } else if (str[1] == '#') {
4169 int val;
4170
4171 if (chunkSize > 0) {
4172 xmlSBufAddString(buf, str - chunkSize, chunkSize);
4173 chunkSize = 0;
4174 }
4175
4176 val = xmlParseStringCharRef(ctxt, &str);
4177 if (val == 0) {
4178 if (pent != NULL)
4179 pent->content[0] = 0;
4180 break;
4181 }
4182
4183 if (val == ' ') {
4184 if ((!normalize) || (!*inSpace))
4185 xmlSBufAddCString(buf, " ", 1);
4186 *inSpace = 1;
4187 } else {
4188 xmlSBufAddChar(buf, val);
4189 *inSpace = 0;
4190 }
4191 } else {
4192 xmlChar *name;
4193 xmlEntityPtr ent;
4194
4195 if (chunkSize > 0) {
4196 xmlSBufAddString(buf, str - chunkSize, chunkSize);
4197 chunkSize = 0;
4198 }
4199
4200 name = xmlParseStringEntityRef(ctxt, &str);
4201 if (name == NULL) {
4202 if (pent != NULL)
4203 pent->content[0] = 0;
4204 break;
4205 }
4206
4207 ent = xmlLookupGeneralEntity(ctxt, name, /* inAttr */ 1);
4208 xmlFree(name);
4209
4210 if ((ent != NULL) &&
4211 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
4212 if (ent->content == NULL) {
4213 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
4214 "predefined entity has no content\n");
4215 break;
4216 }
4217
4218 xmlSBufAddString(buf, ent->content, ent->length);
4219
4220 *inSpace = 0;
4221 } else if ((ent != NULL) && (ent->content != NULL)) {
4222 if (pent != NULL)
4223 pent->flags |= XML_ENT_EXPANDING;
4224 xmlExpandEntityInAttValue(ctxt, buf, ent->content, ent,
4225 normalize, inSpace, depth, check);
4226 if (pent != NULL)
4227 pent->flags &= ~XML_ENT_EXPANDING;
4228 }
4229 }
4230 }
4231
4232 if (chunkSize > 0)
4233 xmlSBufAddString(buf, str - chunkSize, chunkSize);
4234
4235 return;
4236}
4237
4238/**
4239 * xmlExpandEntitiesInAttValue:
4240 * @ctxt: parser context
4241 * @str: entity or attribute value
4242 * @normalize: whether to collapse whitespace
4243 *
4244 * Expand general entity references in an entity or attribute value.
4245 * Perform attribute value normalization.
4246 *
4247 * Returns the expanded attribtue value.
4248 */
4249xmlChar *
4250xmlExpandEntitiesInAttValue(xmlParserCtxtPtr ctxt, const xmlChar *str,
4251 int normalize) {
4252 unsigned maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4253 XML_MAX_HUGE_LENGTH :
4254 XML_MAX_TEXT_LENGTH;
4255 xmlSBuf buf;
4256 int inSpace = 1;
4257
4258 xmlSBufInit(&buf, maxLength);
4259
4260 xmlExpandEntityInAttValue(ctxt, &buf, str, NULL, normalize, &inSpace,
4261 ctxt->inputNr, /* check */ 0);
4262
4263 if ((normalize) && (inSpace) && (buf.size > 0))
4264 buf.size--;
4265
4266 return(xmlSBufFinish(&buf, NULL, ctxt, "AttValue length too long"));
4267}
4268
4269/**
4270 * xmlParseAttValueInternal:
4271 * @ctxt: an XML parser context
4272 * @len: attribute len result
4273 * @alloc: whether the attribute was reallocated as a new string
4274 * @normalize: if 1 then further non-CDATA normalization must be done
4275 *
4276 * parse a value for an attribute.
4277 * NOTE: if no normalization is needed, the routine will return pointers
4278 * directly from the data buffer.
4279 *
4280 * 3.3.3 Attribute-Value Normalization:
4281 * Before the value of an attribute is passed to the application or
4282 * checked for validity, the XML processor must normalize it as follows:
4283 * - a character reference is processed by appending the referenced
4284 * character to the attribute value
4285 * - an entity reference is processed by recursively processing the
4286 * replacement text of the entity
4287 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
4288 * appending #x20 to the normalized value, except that only a single
4289 * #x20 is appended for a "#xD#xA" sequence that is part of an external
4290 * parsed entity or the literal entity value of an internal parsed entity
4291 * - other characters are processed by appending them to the normalized value
4292 * If the declared value is not CDATA, then the XML processor must further
4293 * process the normalized attribute value by discarding any leading and
4294 * trailing space (#x20) characters, and by replacing sequences of space
4295 * (#x20) characters by a single space (#x20) character.
4296 * All attributes for which no declaration has been read should be treated
4297 * by a non-validating parser as if declared CDATA.
4298 *
4299 * Returns the AttValue parsed or NULL. The value has to be freed by the
4300 * caller if it was copied, this can be detected by val[*len] == 0.
4301 */
4302static xmlChar *
4303xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *attlen, int *alloc,
4304 int normalize, int isNamespace) {
4305 unsigned maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4306 XML_MAX_HUGE_LENGTH :
4307 XML_MAX_TEXT_LENGTH;
4308 xmlSBuf buf;
4309 xmlChar *ret;
4310 int c, l, quote, flags, chunkSize;
4311 int inSpace = 1;
4312 int replaceEntities;
4313
4314 /* Always expand namespace URIs */
4315 replaceEntities = (ctxt->replaceEntities) || (isNamespace);
4316
4317 xmlSBufInit(&buf, maxLength);
4318
4319 GROW;
4320
4321 quote = CUR;
4322 if ((quote != '"') && (quote != '\'')) {
4323 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
4324 return(NULL);
4325 }
4326 NEXTL(1);
4327
4328 if (ctxt->inSubset == 0)
4329 flags = XML_ENT_CHECKED | XML_ENT_VALIDATED;
4330 else
4331 flags = XML_ENT_VALIDATED;
4332
4333 inSpace = 1;
4334 chunkSize = 0;
4335
4336 while (1) {
4337 if (PARSER_STOPPED(ctxt))
4338 goto error;
4339
4340 if (CUR_PTR >= ctxt->input->end) {
4341 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4342 "AttValue: ' expected\n");
4343 goto error;
4344 }
4345
4346 /*
4347 * TODO: Check growth threshold
4348 */
4349 if (ctxt->input->end - CUR_PTR < 10)
4350 GROW;
4351
4352 c = CUR;
4353
4354 if (c >= 0x80) {
4355 l = xmlUTF8MultibyteLen(ctxt, CUR_PTR,
4356 "invalid character in attribute value\n");
4357 if (l == 0) {
4358 if (chunkSize > 0) {
4359 xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4360 chunkSize = 0;
4361 }
4362 xmlSBufAddReplChar(&buf);
4363 NEXTL(1);
4364 } else {
4365 chunkSize += l;
4366 NEXTL(l);
4367 }
4368
4369 inSpace = 0;
4370 } else if (c != '&') {
4371 if (c > 0x20) {
4372 if (c == quote)
4373 break;
4374
4375 if (c == '<')
4376 xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
4377
4378 chunkSize += 1;
4379 inSpace = 0;
4380 } else if (!IS_BYTE_CHAR(c)) {
4381 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
4382 "invalid character in attribute value\n");
4383 if (chunkSize > 0) {
4384 xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4385 chunkSize = 0;
4386 }
4387 xmlSBufAddReplChar(&buf);
4388 inSpace = 0;
4389 } else {
4390 /* Whitespace */
4391 if ((normalize) && (inSpace)) {
4392 /* Skip char */
4393 if (chunkSize > 0) {
4394 xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4395 chunkSize = 0;
4396 }
4397 } else if (c < 0x20) {
4398 /* Convert to space */
4399 if (chunkSize > 0) {
4400 xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4401 chunkSize = 0;
4402 }
4403
4404 xmlSBufAddCString(&buf, " ", 1);
4405 } else {
4406 chunkSize += 1;
4407 }
4408
4409 inSpace = 1;
4410
4411 if ((c == 0xD) && (NXT(1) == 0xA))
4412 CUR_PTR++;
4413 }
4414
4415 NEXTL(1);
4416 } else if (NXT(1) == '#') {
4417 int val;
4418
4419 if (chunkSize > 0) {
4420 xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4421 chunkSize = 0;
4422 }
4423
4424 val = xmlParseCharRef(ctxt);
4425 if (val == 0)
4426 goto error;
4427
4428 if ((val == '&') && (!replaceEntities)) {
4429 /*
4430 * The reparsing will be done in xmlStringGetNodeList()
4431 * called by the attribute() function in SAX.c
4432 */
4433 xmlSBufAddCString(&buf, "&#38;", 5);
4434 inSpace = 0;
4435 } else if (val == ' ') {
4436 if ((!normalize) || (!inSpace))
4437 xmlSBufAddCString(&buf, " ", 1);
4438 inSpace = 1;
4439 } else {
4440 xmlSBufAddChar(&buf, val);
4441 inSpace = 0;
4442 }
4443 } else {
4444 const xmlChar *name;
4445 xmlEntityPtr ent;
4446
4447 if (chunkSize > 0) {
4448 xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4449 chunkSize = 0;
4450 }
4451
4452 name = xmlParseEntityRefInternal(ctxt);
4453 if (name == NULL) {
4454 /*
4455 * Probably a literal '&' which wasn't escaped.
4456 * TODO: Handle gracefully in recovery mode.
4457 */
4458 continue;
4459 }
4460
4461 ent = xmlLookupGeneralEntity(ctxt, name, /* isAttr */ 1);
4462 if (ent == NULL)
4463 continue;
4464
4465 if (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY) {
4466 if ((ent->content[0] == '&') && (!replaceEntities))
4467 xmlSBufAddCString(&buf, "&#38;", 5);
4468 else
4469 xmlSBufAddString(&buf, ent->content, ent->length);
4470 inSpace = 0;
4471 } else if (replaceEntities) {
4472 xmlExpandEntityInAttValue(ctxt, &buf, ent->content, ent,
4473 normalize, &inSpace, ctxt->inputNr,
4474 /* check */ 1);
4475 } else {
4476 if ((ent->flags & flags) != flags)
4477 xmlCheckEntityInAttValue(ctxt, ent, ctxt->inputNr);
4478
4479 if (xmlParserEntityCheck(ctxt, ent->expandedSize)) {
4480 ent->content[0] = 0;
4481 goto error;
4482 }
4483
4484 /*
4485 * Just output the reference
4486 */
4487 xmlSBufAddCString(&buf, "&", 1);
4488 xmlSBufAddString(&buf, ent->name, xmlStrlen(ent->name));
4489 xmlSBufAddCString(&buf, ";", 1);
4490
4491 inSpace = 0;
4492 }
4493 }
4494 }
4495
4496 if ((buf.mem == NULL) && (alloc != NULL)) {
4497 ret = (xmlChar *) CUR_PTR - chunkSize;
4498
4499 if (attlen != NULL)
4500 *attlen = chunkSize;
4501 if ((normalize) && (inSpace) && (chunkSize > 0))
4502 *attlen -= 1;
4503 *alloc = 0;
4504
4505 /* Report potential error */
4506 xmlSBufCleanup(&buf, ctxt, "AttValue length too long");
4507 } else {
4508 if (chunkSize > 0)
4509 xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4510
4511 if ((normalize) && (inSpace) && (buf.size > 0))
4512 buf.size--;
4513
4514 ret = xmlSBufFinish(&buf, attlen, ctxt, "AttValue length too long");
4515
4516 if (ret != NULL) {
4517 if (attlen != NULL)
4518 *attlen = buf.size;
4519 if (alloc != NULL)
4520 *alloc = 1;
4521 }
4522 }
4523
4524 NEXTL(1);
4525
4526 return(ret);
4527
4528error:
4529 xmlSBufCleanup(&buf, ctxt, "AttValue length too long");
4530 return(NULL);
4531}
4532
4533/**
4534 * xmlParseAttValue:
4535 * @ctxt: an XML parser context
4536 *
4537 * DEPRECATED: Internal function, don't use.
4538 *
4539 * parse a value for an attribute
4540 * Note: the parser won't do substitution of entities here, this
4541 * will be handled later in xmlStringGetNodeList
4542 *
4543 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
4544 * "'" ([^<&'] | Reference)* "'"
4545 *
4546 * 3.3.3 Attribute-Value Normalization:
4547 * Before the value of an attribute is passed to the application or
4548 * checked for validity, the XML processor must normalize it as follows:
4549 * - a character reference is processed by appending the referenced
4550 * character to the attribute value
4551 * - an entity reference is processed by recursively processing the
4552 * replacement text of the entity
4553 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
4554 * appending #x20 to the normalized value, except that only a single
4555 * #x20 is appended for a "#xD#xA" sequence that is part of an external
4556 * parsed entity or the literal entity value of an internal parsed entity
4557 * - other characters are processed by appending them to the normalized value
4558 * If the declared value is not CDATA, then the XML processor must further
4559 * process the normalized attribute value by discarding any leading and
4560 * trailing space (#x20) characters, and by replacing sequences of space
4561 * (#x20) characters by a single space (#x20) character.
4562 * All attributes for which no declaration has been read should be treated
4563 * by a non-validating parser as if declared CDATA.
4564 *
4565 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
4566 */
4567
4568
4569xmlChar *
4570xmlParseAttValue(xmlParserCtxtPtr ctxt) {
4571 if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
4572 return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0, 0));
4573}
4574
4575/**
4576 * xmlParseSystemLiteral:
4577 * @ctxt: an XML parser context
4578 *
4579 * DEPRECATED: Internal function, don't use.
4580 *
4581 * parse an XML Literal
4582 *
4583 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
4584 *
4585 * Returns the SystemLiteral parsed or NULL
4586 */
4587
4588xmlChar *
4589xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
4590 xmlChar *buf = NULL;
4591 int len = 0;
4592 int size = XML_PARSER_BUFFER_SIZE;
4593 int cur, l;
4594 int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4595 XML_MAX_TEXT_LENGTH :
4596 XML_MAX_NAME_LENGTH;
4597 xmlChar stop;
4598
4599 if (RAW == '"') {
4600 NEXT;
4601 stop = '"';
4602 } else if (RAW == '\'') {
4603 NEXT;
4604 stop = '\'';
4605 } else {
4606 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4607 return(NULL);
4608 }
4609
4610 buf = (xmlChar *) xmlMallocAtomic(size);
4611 if (buf == NULL) {
4612 xmlErrMemory(ctxt);
4613 return(NULL);
4614 }
4615 cur = CUR_CHAR(l);
4616 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
4617 if (len + 5 >= size) {
4618 xmlChar *tmp;
4619
4620 size *= 2;
4621 tmp = (xmlChar *) xmlRealloc(buf, size);
4622 if (tmp == NULL) {
4623 xmlFree(buf);
4624 xmlErrMemory(ctxt);
4625 return(NULL);
4626 }
4627 buf = tmp;
4628 }
4629 COPY_BUF(buf, len, cur);
4630 if (len > maxLength) {
4631 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "SystemLiteral");
4632 xmlFree(buf);
4633 return(NULL);
4634 }
4635 NEXTL(l);
4636 cur = CUR_CHAR(l);
4637 }
4638 buf[len] = 0;
4639 if (!IS_CHAR(cur)) {
4640 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4641 } else {
4642 NEXT;
4643 }
4644 return(buf);
4645}
4646
4647/**
4648 * xmlParsePubidLiteral:
4649 * @ctxt: an XML parser context
4650 *
4651 * DEPRECATED: Internal function, don't use.
4652 *
4653 * parse an XML public literal
4654 *
4655 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
4656 *
4657 * Returns the PubidLiteral parsed or NULL.
4658 */
4659
4660xmlChar *
4661xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
4662 xmlChar *buf = NULL;
4663 int len = 0;
4664 int size = XML_PARSER_BUFFER_SIZE;
4665 int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4666 XML_MAX_TEXT_LENGTH :
4667 XML_MAX_NAME_LENGTH;
4668 xmlChar cur;
4669 xmlChar stop;
4670
4671 if (RAW == '"') {
4672 NEXT;
4673 stop = '"';
4674 } else if (RAW == '\'') {
4675 NEXT;
4676 stop = '\'';
4677 } else {
4678 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4679 return(NULL);
4680 }
4681 buf = (xmlChar *) xmlMallocAtomic(size);
4682 if (buf == NULL) {
4683 xmlErrMemory(ctxt);
4684 return(NULL);
4685 }
4686 cur = CUR;
4687 while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop) &&
4688 (PARSER_STOPPED(ctxt) == 0)) { /* checked */
4689 if (len + 1 >= size) {
4690 xmlChar *tmp;
4691
4692 size *= 2;
4693 tmp = (xmlChar *) xmlRealloc(buf, size);
4694 if (tmp == NULL) {
4695 xmlErrMemory(ctxt);
4696 xmlFree(buf);
4697 return(NULL);
4698 }
4699 buf = tmp;
4700 }
4701 buf[len++] = cur;
4702 if (len > maxLength) {
4703 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Public ID");
4704 xmlFree(buf);
4705 return(NULL);
4706 }
4707 NEXT;
4708 cur = CUR;
4709 }
4710 buf[len] = 0;
4711 if (cur != stop) {
4712 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4713 } else {
4714 NEXTL(1);
4715 }
4716 return(buf);
4717}
4718
4719static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int partial);
4720
4721/*
4722 * used for the test in the inner loop of the char data testing
4723 */
4724static const unsigned char test_char_data[256] = {
4725 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4726 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
4727 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4728 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4729 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
4730 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
4731 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
4732 0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
4733 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
4734 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
4735 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
4736 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
4737 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
4738 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
4739 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
4740 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
4741 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
4742 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4743 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4744 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4745 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4746 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4747 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4748 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4749 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4750 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4751 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4752 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4753 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4754 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4755 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4756 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
4757};
4758
4759/**
4760 * xmlParseCharDataInternal:
4761 * @ctxt: an XML parser context
4762 * @partial: buffer may contain partial UTF-8 sequences
4763 *
4764 * Parse character data. Always makes progress if the first char isn't
4765 * '<' or '&'.
4766 *
4767 * The right angle bracket (>) may be represented using the string "&gt;",
4768 * and must, for compatibility, be escaped using "&gt;" or a character
4769 * reference when it appears in the string "]]>" in content, when that
4770 * string is not marking the end of a CDATA section.
4771 *
4772 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4773 */
4774static void
4775xmlParseCharDataInternal(xmlParserCtxtPtr ctxt, int partial) {
4776 const xmlChar *in;
4777 int nbchar = 0;
4778 int line = ctxt->input->line;
4779 int col = ctxt->input->col;
4780 int ccol;
4781
4782 GROW;
4783 /*
4784 * Accelerated common case where input don't need to be
4785 * modified before passing it to the handler.
4786 */
4787 in = ctxt->input->cur;
4788 do {
4789get_more_space:
4790 while (*in == 0x20) { in++; ctxt->input->col++; }
4791 if (*in == 0xA) {
4792 do {
4793 ctxt->input->line++; ctxt->input->col = 1;
4794 in++;
4795 } while (*in == 0xA);
4796 goto get_more_space;
4797 }
4798 if (*in == '<') {
4799 nbchar = in - ctxt->input->cur;
4800 if (nbchar > 0) {
4801 const xmlChar *tmp = ctxt->input->cur;
4802 ctxt->input->cur = in;
4803
4804 if ((ctxt->sax != NULL) &&
4805 (ctxt->disableSAX == 0) &&
4806 (ctxt->sax->ignorableWhitespace !=
4807 ctxt->sax->characters)) {
4808 if (areBlanks(ctxt, tmp, nbchar, 1)) {
4809 if (ctxt->sax->ignorableWhitespace != NULL)
4810 ctxt->sax->ignorableWhitespace(ctxt->userData,
4811 tmp, nbchar);
4812 } else {
4813 if (ctxt->sax->characters != NULL)
4814 ctxt->sax->characters(ctxt->userData,
4815 tmp, nbchar);
4816 if (*ctxt->space == -1)
4817 *ctxt->space = -2;
4818 }
4819 } else if ((ctxt->sax != NULL) &&
4820 (ctxt->disableSAX == 0) &&
4821 (ctxt->sax->characters != NULL)) {
4822 ctxt->sax->characters(ctxt->userData,
4823 tmp, nbchar);
4824 }
4825 }
4826 return;
4827 }
4828
4829get_more:
4830 ccol = ctxt->input->col;
4831 while (test_char_data[*in]) {
4832 in++;
4833 ccol++;
4834 }
4835 ctxt->input->col = ccol;
4836 if (*in == 0xA) {
4837 do {
4838 ctxt->input->line++; ctxt->input->col = 1;
4839 in++;
4840 } while (*in == 0xA);
4841 goto get_more;
4842 }
4843 if (*in == ']') {
4844 if ((in[1] == ']') && (in[2] == '>')) {
4845 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4846 ctxt->input->cur = in + 1;
4847 return;
4848 }
4849 in++;
4850 ctxt->input->col++;
4851 goto get_more;
4852 }
4853 nbchar = in - ctxt->input->cur;
4854 if (nbchar > 0) {
4855 if ((ctxt->sax != NULL) &&
4856 (ctxt->disableSAX == 0) &&
4857 (ctxt->sax->ignorableWhitespace !=
4858 ctxt->sax->characters) &&
4859 (IS_BLANK_CH(*ctxt->input->cur))) {
4860 const xmlChar *tmp = ctxt->input->cur;
4861 ctxt->input->cur = in;
4862
4863 if (areBlanks(ctxt, tmp, nbchar, 0)) {
4864 if (ctxt->sax->ignorableWhitespace != NULL)
4865 ctxt->sax->ignorableWhitespace(ctxt->userData,
4866 tmp, nbchar);
4867 } else {
4868 if (ctxt->sax->characters != NULL)
4869 ctxt->sax->characters(ctxt->userData,
4870 tmp, nbchar);
4871 if (*ctxt->space == -1)
4872 *ctxt->space = -2;
4873 }
4874 line = ctxt->input->line;
4875 col = ctxt->input->col;
4876 } else if ((ctxt->sax != NULL) &&
4877 (ctxt->disableSAX == 0)) {
4878 if (ctxt->sax->characters != NULL)
4879 ctxt->sax->characters(ctxt->userData,
4880 ctxt->input->cur, nbchar);
4881 line = ctxt->input->line;
4882 col = ctxt->input->col;
4883 }
4884 }
4885 ctxt->input->cur = in;
4886 if (*in == 0xD) {
4887 in++;
4888 if (*in == 0xA) {
4889 ctxt->input->cur = in;
4890 in++;
4891 ctxt->input->line++; ctxt->input->col = 1;
4892 continue; /* while */
4893 }
4894 in--;
4895 }
4896 if (*in == '<') {
4897 return;
4898 }
4899 if (*in == '&') {
4900 return;
4901 }
4902 SHRINK;
4903 GROW;
4904 in = ctxt->input->cur;
4905 } while (((*in >= 0x20) && (*in <= 0x7F)) ||
4906 (*in == 0x09) || (*in == 0x0a));
4907 ctxt->input->line = line;
4908 ctxt->input->col = col;
4909 xmlParseCharDataComplex(ctxt, partial);
4910}
4911
4912/**
4913 * xmlParseCharDataComplex:
4914 * @ctxt: an XML parser context
4915 * @cdata: int indicating whether we are within a CDATA section
4916 *
4917 * Always makes progress if the first char isn't '<' or '&'.
4918 *
4919 * parse a CharData section.this is the fallback function
4920 * of xmlParseCharData() when the parsing requires handling
4921 * of non-ASCII characters.
4922 */
4923static void
4924xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int partial) {
4925 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
4926 int nbchar = 0;
4927 int cur, l;
4928
4929 cur = CUR_CHAR(l);
4930 while ((cur != '<') && /* checked */
4931 (cur != '&') &&
4932 (IS_CHAR(cur))) {
4933 if ((cur == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
4934 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4935 }
4936 COPY_BUF(buf, nbchar, cur);
4937 /* move current position before possible calling of ctxt->sax->characters */
4938 NEXTL(l);
4939 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
4940 buf[nbchar] = 0;
4941
4942 /*
4943 * OK the segment is to be consumed as chars.
4944 */
4945 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4946 if (areBlanks(ctxt, buf, nbchar, 0)) {
4947 if (ctxt->sax->ignorableWhitespace != NULL)
4948 ctxt->sax->ignorableWhitespace(ctxt->userData,
4949 buf, nbchar);
4950 } else {
4951 if (ctxt->sax->characters != NULL)
4952 ctxt->sax->characters(ctxt->userData, buf, nbchar);
4953 if ((ctxt->sax->characters !=
4954 ctxt->sax->ignorableWhitespace) &&
4955 (*ctxt->space == -1))
4956 *ctxt->space = -2;
4957 }
4958 }
4959 nbchar = 0;
4960 SHRINK;
4961 }
4962 cur = CUR_CHAR(l);
4963 }
4964 if (nbchar != 0) {
4965 buf[nbchar] = 0;
4966 /*
4967 * OK the segment is to be consumed as chars.
4968 */
4969 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4970 if (areBlanks(ctxt, buf, nbchar, 0)) {
4971 if (ctxt->sax->ignorableWhitespace != NULL)
4972 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
4973 } else {
4974 if (ctxt->sax->characters != NULL)
4975 ctxt->sax->characters(ctxt->userData, buf, nbchar);
4976 if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
4977 (*ctxt->space == -1))
4978 *ctxt->space = -2;
4979 }
4980 }
4981 }
4982 /*
4983 * cur == 0 can mean
4984 *
4985 * - End of buffer.
4986 * - An actual 0 character.
4987 * - An incomplete UTF-8 sequence. This is allowed if partial is set.
4988 */
4989 if (ctxt->input->cur < ctxt->input->end) {
4990 if ((cur == 0) && (CUR != 0)) {
4991 if (partial == 0) {
4992 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4993 "Incomplete UTF-8 sequence starting with %02X\n", CUR);
4994 NEXTL(1);
4995 }
4996 } else if ((cur != '<') && (cur != '&')) {
4997 /* Generate the error and skip the offending character */
4998 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4999 "PCDATA invalid Char value %d\n", cur);
5000 NEXTL(l);
5001 }
5002 }
5003}
5004
5005/**
5006 * xmlParseCharData:
5007 * @ctxt: an XML parser context
5008 * @cdata: unused
5009 *
5010 * DEPRECATED: Internal function, don't use.
5011 */
5012void
5013xmlParseCharData(xmlParserCtxtPtr ctxt, ATTRIBUTE_UNUSED int cdata) {
5014 xmlParseCharDataInternal(ctxt, 0);
5015}
5016
5017/**
5018 * xmlParseExternalID:
5019 * @ctxt: an XML parser context
5020 * @publicID: a xmlChar** receiving PubidLiteral
5021 * @strict: indicate whether we should restrict parsing to only
5022 * production [75], see NOTE below
5023 *
5024 * DEPRECATED: Internal function, don't use.
5025 *
5026 * Parse an External ID or a Public ID
5027 *
5028 * NOTE: Productions [75] and [83] interact badly since [75] can generate
5029 * 'PUBLIC' S PubidLiteral S SystemLiteral
5030 *
5031 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
5032 * | 'PUBLIC' S PubidLiteral S SystemLiteral
5033 *
5034 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
5035 *
5036 * Returns the function returns SystemLiteral and in the second
5037 * case publicID receives PubidLiteral, is strict is off
5038 * it is possible to return NULL and have publicID set.
5039 */
5040
5041xmlChar *
5042xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
5043 xmlChar *URI = NULL;
5044
5045 *publicID = NULL;
5046 if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
5047 SKIP(6);
5048 if (SKIP_BLANKS == 0) {
5049 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5050 "Space required after 'SYSTEM'\n");
5051 }
5052 URI = xmlParseSystemLiteral(ctxt);
5053 if (URI == NULL) {
5054 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
5055 }
5056 } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
5057 SKIP(6);
5058 if (SKIP_BLANKS == 0) {
5059 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5060 "Space required after 'PUBLIC'\n");
5061 }
5062 *publicID = xmlParsePubidLiteral(ctxt);
5063 if (*publicID == NULL) {
5064 xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
5065 }
5066 if (strict) {
5067 /*
5068 * We don't handle [83] so "S SystemLiteral" is required.
5069 */
5070 if (SKIP_BLANKS == 0) {
5071 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5072 "Space required after the Public Identifier\n");
5073 }
5074 } else {
5075 /*
5076 * We handle [83] so we return immediately, if
5077 * "S SystemLiteral" is not detected. We skip blanks if no
5078 * system literal was found, but this is harmless since we must
5079 * be at the end of a NotationDecl.
5080 */
5081 if (SKIP_BLANKS == 0) return(NULL);
5082 if ((CUR != '\'') && (CUR != '"')) return(NULL);
5083 }
5084 URI = xmlParseSystemLiteral(ctxt);
5085 if (URI == NULL) {
5086 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
5087 }
5088 }
5089 return(URI);
5090}
5091
5092/**
5093 * xmlParseCommentComplex:
5094 * @ctxt: an XML parser context
5095 * @buf: the already parsed part of the buffer
5096 * @len: number of bytes in the buffer
5097 * @size: allocated size of the buffer
5098 *
5099 * Skip an XML (SGML) comment <!-- .... -->
5100 * The spec says that "For compatibility, the string "--" (double-hyphen)
5101 * must not occur within comments. "
5102 * This is the slow routine in case the accelerator for ascii didn't work
5103 *
5104 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
5105 */
5106static void
5107xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf,
5108 size_t len, size_t size) {
5109 int q, ql;
5110 int r, rl;
5111 int cur, l;
5112 size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
5113 XML_MAX_HUGE_LENGTH :
5114 XML_MAX_TEXT_LENGTH;
5115
5116 if (buf == NULL) {
5117 len = 0;
5118 size = XML_PARSER_BUFFER_SIZE;
5119 buf = (xmlChar *) xmlMallocAtomic(size);
5120 if (buf == NULL) {
5121 xmlErrMemory(ctxt);
5122 return;
5123 }
5124 }
5125 q = CUR_CHAR(ql);
5126 if (q == 0)
5127 goto not_terminated;
5128 if (!IS_CHAR(q)) {
5129 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
5130 "xmlParseComment: invalid xmlChar value %d\n",
5131 q);
5132 xmlFree (buf);
5133 return;
5134 }
5135 NEXTL(ql);
5136 r = CUR_CHAR(rl);
5137 if (r == 0)
5138 goto not_terminated;
5139 if (!IS_CHAR(r)) {
5140 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
5141 "xmlParseComment: invalid xmlChar value %d\n",
5142 r);
5143 xmlFree (buf);
5144 return;
5145 }
5146 NEXTL(rl);
5147 cur = CUR_CHAR(l);
5148 if (cur == 0)
5149 goto not_terminated;
5150 while (IS_CHAR(cur) && /* checked */
5151 ((cur != '>') ||
5152 (r != '-') || (q != '-'))) {
5153 if ((r == '-') && (q == '-')) {
5154 xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
5155 }
5156 if (len + 5 >= size) {
5157 xmlChar *new_buf;
5158 size_t new_size;
5159
5160 new_size = size * 2;
5161 new_buf = (xmlChar *) xmlRealloc(buf, new_size);
5162 if (new_buf == NULL) {
5163 xmlFree (buf);
5164 xmlErrMemory(ctxt);
5165 return;
5166 }
5167 buf = new_buf;
5168 size = new_size;
5169 }
5170 COPY_BUF(buf, len, q);
5171 if (len > maxLength) {
5172 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
5173 "Comment too big found", NULL);
5174 xmlFree (buf);
5175 return;
5176 }
5177
5178 q = r;
5179 ql = rl;
5180 r = cur;
5181 rl = l;
5182
5183 NEXTL(l);
5184 cur = CUR_CHAR(l);
5185
5186 }
5187 buf[len] = 0;
5188 if (cur == 0) {
5189 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
5190 "Comment not terminated \n<!--%.50s\n", buf);
5191 } else if (!IS_CHAR(cur)) {
5192 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
5193 "xmlParseComment: invalid xmlChar value %d\n",
5194 cur);
5195 } else {
5196 NEXT;
5197 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
5198 (!ctxt->disableSAX))
5199 ctxt->sax->comment(ctxt->userData, buf);
5200 }
5201 xmlFree(buf);
5202 return;
5203not_terminated:
5204 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
5205 "Comment not terminated\n", NULL);
5206 xmlFree(buf);
5207 return;
5208}
5209
5210/**
5211 * xmlParseComment:
5212 * @ctxt: an XML parser context
5213 *
5214 * DEPRECATED: Internal function, don't use.
5215 *
5216 * Parse an XML (SGML) comment. Always consumes '<!'.
5217 *
5218 * The spec says that "For compatibility, the string "--" (double-hyphen)
5219 * must not occur within comments. "
5220 *
5221 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
5222 */
5223void
5224xmlParseComment(xmlParserCtxtPtr ctxt) {
5225 xmlChar *buf = NULL;
5226 size_t size = XML_PARSER_BUFFER_SIZE;
5227 size_t len = 0;
5228 size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
5229 XML_MAX_HUGE_LENGTH :
5230 XML_MAX_TEXT_LENGTH;
5231 const xmlChar *in;
5232 size_t nbchar = 0;
5233 int ccol;
5234
5235 /*
5236 * Check that there is a comment right here.
5237 */
5238 if ((RAW != '<') || (NXT(1) != '!'))
5239 return;
5240 SKIP(2);
5241 if ((RAW != '-') || (NXT(1) != '-'))
5242 return;
5243 SKIP(2);
5244 GROW;
5245
5246 /*
5247 * Accelerated common case where input don't need to be
5248 * modified before passing it to the handler.
5249 */
5250 in = ctxt->input->cur;
5251 do {
5252 if (*in == 0xA) {
5253 do {
5254 ctxt->input->line++; ctxt->input->col = 1;
5255 in++;
5256 } while (*in == 0xA);
5257 }
5258get_more:
5259 ccol = ctxt->input->col;
5260 while (((*in > '-') && (*in <= 0x7F)) ||
5261 ((*in >= 0x20) && (*in < '-')) ||
5262 (*in == 0x09)) {
5263 in++;
5264 ccol++;
5265 }
5266 ctxt->input->col = ccol;
5267 if (*in == 0xA) {
5268 do {
5269 ctxt->input->line++; ctxt->input->col = 1;
5270 in++;
5271 } while (*in == 0xA);
5272 goto get_more;
5273 }
5274 nbchar = in - ctxt->input->cur;
5275 /*
5276 * save current set of data
5277 */
5278 if (nbchar > 0) {
5279 if (buf == NULL) {
5280 if ((*in == '-') && (in[1] == '-'))
5281 size = nbchar + 1;
5282 else
5283 size = XML_PARSER_BUFFER_SIZE + nbchar;
5284 buf = (xmlChar *) xmlMallocAtomic(size);
5285 if (buf == NULL) {
5286 xmlErrMemory(ctxt);
5287 return;
5288 }
5289 len = 0;
5290 } else if (len + nbchar + 1 >= size) {
5291 xmlChar *new_buf;
5292 size += len + nbchar + XML_PARSER_BUFFER_SIZE;
5293 new_buf = (xmlChar *) xmlRealloc(buf, size);
5294 if (new_buf == NULL) {
5295 xmlFree (buf);
5296 xmlErrMemory(ctxt);
5297 return;
5298 }
5299 buf = new_buf;
5300 }
5301 memcpy(&buf[len], ctxt->input->cur, nbchar);
5302 len += nbchar;
5303 buf[len] = 0;
5304 }
5305 if (len > maxLength) {
5306 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
5307 "Comment too big found", NULL);
5308 xmlFree (buf);
5309 return;
5310 }
5311 ctxt->input->cur = in;
5312 if (*in == 0xA) {
5313 in++;
5314 ctxt->input->line++; ctxt->input->col = 1;
5315 }
5316 if (*in == 0xD) {
5317 in++;
5318 if (*in == 0xA) {
5319 ctxt->input->cur = in;
5320 in++;
5321 ctxt->input->line++; ctxt->input->col = 1;
5322 goto get_more;
5323 }
5324 in--;
5325 }
5326 SHRINK;
5327 GROW;
5328 in = ctxt->input->cur;
5329 if (*in == '-') {
5330 if (in[1] == '-') {
5331 if (in[2] == '>') {
5332 SKIP(3);
5333 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
5334 (!ctxt->disableSAX)) {
5335 if (buf != NULL)
5336 ctxt->sax->comment(ctxt->userData, buf);
5337 else
5338 ctxt->sax->comment(ctxt->userData, BAD_CAST "");
5339 }
5340 if (buf != NULL)
5341 xmlFree(buf);
5342 return;
5343 }
5344 if (buf != NULL) {
5345 xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5346 "Double hyphen within comment: "
5347 "<!--%.50s\n",
5348 buf);
5349 } else
5350 xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5351 "Double hyphen within comment\n", NULL);
5352 in++;
5353 ctxt->input->col++;
5354 }
5355 in++;
5356 ctxt->input->col++;
5357 goto get_more;
5358 }
5359 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09) || (*in == 0x0a));
5360 xmlParseCommentComplex(ctxt, buf, len, size);
5361 return;
5362}
5363
5364
5365/**
5366 * xmlParsePITarget:
5367 * @ctxt: an XML parser context
5368 *
5369 * DEPRECATED: Internal function, don't use.
5370 *
5371 * parse the name of a PI
5372 *
5373 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
5374 *
5375 * Returns the PITarget name or NULL
5376 */
5377
5378const xmlChar *
5379xmlParsePITarget(xmlParserCtxtPtr ctxt) {
5380 const xmlChar *name;
5381
5382 name = xmlParseName(ctxt);
5383 if ((name != NULL) &&
5384 ((name[0] == 'x') || (name[0] == 'X')) &&
5385 ((name[1] == 'm') || (name[1] == 'M')) &&
5386 ((name[2] == 'l') || (name[2] == 'L'))) {
5387 int i;
5388 if ((name[0] == 'x') && (name[1] == 'm') &&
5389 (name[2] == 'l') && (name[3] == 0)) {
5390 xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5391 "XML declaration allowed only at the start of the document\n");
5392 return(name);
5393 } else if (name[3] == 0) {
5394 xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
5395 return(name);
5396 }
5397 for (i = 0;;i++) {
5398 if (xmlW3CPIs[i] == NULL) break;
5399 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
5400 return(name);
5401 }
5402 xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5403 "xmlParsePITarget: invalid name prefix 'xml'\n",
5404 NULL, NULL);
5405 }
5406 if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) {
5407 xmlNsErr(ctxt, XML_NS_ERR_COLON,
5408 "colons are forbidden from PI names '%s'\n", name, NULL, NULL);
5409 }
5410 return(name);
5411}
5412
5413#ifdef LIBXML_CATALOG_ENABLED
5414/**
5415 * xmlParseCatalogPI:
5416 * @ctxt: an XML parser context
5417 * @catalog: the PI value string
5418 *
5419 * parse an XML Catalog Processing Instruction.
5420 *
5421 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
5422 *
5423 * Occurs only if allowed by the user and if happening in the Misc
5424 * part of the document before any doctype information
5425 * This will add the given catalog to the parsing context in order
5426 * to be used if there is a resolution need further down in the document
5427 */
5428
5429static void
5430xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
5431 xmlChar *URL = NULL;
5432 const xmlChar *tmp, *base;
5433 xmlChar marker;
5434
5435 tmp = catalog;
5436 while (IS_BLANK_CH(*tmp)) tmp++;
5437 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
5438 goto error;
5439 tmp += 7;
5440 while (IS_BLANK_CH(*tmp)) tmp++;
5441 if (*tmp != '=') {
5442 return;
5443 }
5444 tmp++;
5445 while (IS_BLANK_CH(*tmp)) tmp++;
5446 marker = *tmp;
5447 if ((marker != '\'') && (marker != '"'))
5448 goto error;
5449 tmp++;
5450 base = tmp;
5451 while ((*tmp != 0) && (*tmp != marker)) tmp++;
5452 if (*tmp == 0)
5453 goto error;
5454 URL = xmlStrndup(base, tmp - base);
5455 tmp++;
5456 while (IS_BLANK_CH(*tmp)) tmp++;
5457 if (*tmp != 0)
5458 goto error;
5459
5460 if (URL != NULL) {
5461 /*
5462 * Unfortunately, the catalog API doesn't report OOM errors.
5463 * xmlGetLastError isn't very helpful since we don't know
5464 * where the last error came from. We'd have to reset it
5465 * before this call and restore it afterwards.
5466 */
5467 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
5468 xmlFree(URL);
5469 }
5470 return;
5471
5472error:
5473 xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
5474 "Catalog PI syntax error: %s\n",
5475 catalog, NULL);
5476 if (URL != NULL)
5477 xmlFree(URL);
5478}
5479#endif
5480
5481/**
5482 * xmlParsePI:
5483 * @ctxt: an XML parser context
5484 *
5485 * DEPRECATED: Internal function, don't use.
5486 *
5487 * parse an XML Processing Instruction.
5488 *
5489 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
5490 *
5491 * The processing is transferred to SAX once parsed.
5492 */
5493
5494void
5495xmlParsePI(xmlParserCtxtPtr ctxt) {
5496 xmlChar *buf = NULL;
5497 size_t len = 0;
5498 size_t size = XML_PARSER_BUFFER_SIZE;
5499 size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
5500 XML_MAX_HUGE_LENGTH :
5501 XML_MAX_TEXT_LENGTH;
5502 int cur, l;
5503 const xmlChar *target;
5504
5505 if ((RAW == '<') && (NXT(1) == '?')) {
5506 /*
5507 * this is a Processing Instruction.
5508 */
5509 SKIP(2);
5510
5511 /*
5512 * Parse the target name and check for special support like
5513 * namespace.
5514 */
5515 target = xmlParsePITarget(ctxt);
5516 if (target != NULL) {
5517 if ((RAW == '?') && (NXT(1) == '>')) {
5518 SKIP(2);
5519
5520 /*
5521 * SAX: PI detected.
5522 */
5523 if ((ctxt->sax) && (!ctxt->disableSAX) &&
5524 (ctxt->sax->processingInstruction != NULL))
5525 ctxt->sax->processingInstruction(ctxt->userData,
5526 target, NULL);
5527 return;
5528 }
5529 buf = (xmlChar *) xmlMallocAtomic(size);
5530 if (buf == NULL) {
5531 xmlErrMemory(ctxt);
5532 return;
5533 }
5534 if (SKIP_BLANKS == 0) {
5535 xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
5536 "ParsePI: PI %s space expected\n", target);
5537 }
5538 cur = CUR_CHAR(l);
5539 while (IS_CHAR(cur) && /* checked */
5540 ((cur != '?') || (NXT(1) != '>'))) {
5541 if (len + 5 >= size) {
5542 xmlChar *tmp;
5543 size_t new_size = size * 2;
5544 tmp = (xmlChar *) xmlRealloc(buf, new_size);
5545 if (tmp == NULL) {
5546 xmlErrMemory(ctxt);
5547 xmlFree(buf);
5548 return;
5549 }
5550 buf = tmp;
5551 size = new_size;
5552 }
5553 COPY_BUF(buf, len, cur);
5554 if (len > maxLength) {
5555 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5556 "PI %s too big found", target);
5557 xmlFree(buf);
5558 return;
5559 }
5560 NEXTL(l);
5561 cur = CUR_CHAR(l);
5562 }
5563 buf[len] = 0;
5564 if (cur != '?') {
5565 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5566 "ParsePI: PI %s never end ...\n", target);
5567 } else {
5568 SKIP(2);
5569
5570#ifdef LIBXML_CATALOG_ENABLED
5571 if ((ctxt->inSubset == 0) &&
5572 (xmlStrEqual(target, XML_CATALOG_PI))) {
5573 xmlCatalogAllow allow = xmlCatalogGetDefaults();
5574 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
5575 (allow == XML_CATA_ALLOW_ALL))
5576 xmlParseCatalogPI(ctxt, buf);
5577 }
5578#endif
5579
5580
5581 /*
5582 * SAX: PI detected.
5583 */
5584 if ((ctxt->sax) && (!ctxt->disableSAX) &&
5585 (ctxt->sax->processingInstruction != NULL))
5586 ctxt->sax->processingInstruction(ctxt->userData,
5587 target, buf);
5588 }
5589 xmlFree(buf);
5590 } else {
5591 xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
5592 }
5593 }
5594}
5595
5596/**
5597 * xmlParseNotationDecl:
5598 * @ctxt: an XML parser context
5599 *
5600 * DEPRECATED: Internal function, don't use.
5601 *
5602 * Parse a notation declaration. Always consumes '<!'.
5603 *
5604 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
5605 *
5606 * Hence there is actually 3 choices:
5607 * 'PUBLIC' S PubidLiteral
5608 * 'PUBLIC' S PubidLiteral S SystemLiteral
5609 * and 'SYSTEM' S SystemLiteral
5610 *
5611 * See the NOTE on xmlParseExternalID().
5612 */
5613
5614void
5615xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
5616 const xmlChar *name;
5617 xmlChar *Pubid;
5618 xmlChar *Systemid;
5619
5620 if ((CUR != '<') || (NXT(1) != '!'))
5621 return;
5622 SKIP(2);
5623
5624 if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5625 int inputid = ctxt->input->id;
5626 SKIP(8);
5627 if (SKIP_BLANKS_PE == 0) {
5628 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5629 "Space required after '<!NOTATION'\n");
5630 return;
5631 }
5632
5633 name = xmlParseName(ctxt);
5634 if (name == NULL) {
5635 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5636 return;
5637 }
5638 if (xmlStrchr(name, ':') != NULL) {
5639 xmlNsErr(ctxt, XML_NS_ERR_COLON,
5640 "colons are forbidden from notation names '%s'\n",
5641 name, NULL, NULL);
5642 }
5643 if (SKIP_BLANKS_PE == 0) {
5644 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5645 "Space required after the NOTATION name'\n");
5646 return;
5647 }
5648
5649 /*
5650 * Parse the IDs.
5651 */
5652 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
5653 SKIP_BLANKS_PE;
5654
5655 if (RAW == '>') {
5656 if (inputid != ctxt->input->id) {
5657 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5658 "Notation declaration doesn't start and stop"
5659 " in the same entity\n");
5660 }
5661 NEXT;
5662 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5663 (ctxt->sax->notationDecl != NULL))
5664 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
5665 } else {
5666 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5667 }
5668 if (Systemid != NULL) xmlFree(Systemid);
5669 if (Pubid != NULL) xmlFree(Pubid);
5670 }
5671}
5672
5673/**
5674 * xmlParseEntityDecl:
5675 * @ctxt: an XML parser context
5676 *
5677 * DEPRECATED: Internal function, don't use.
5678 *
5679 * Parse an entity declaration. Always consumes '<!'.
5680 *
5681 * [70] EntityDecl ::= GEDecl | PEDecl
5682 *
5683 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
5684 *
5685 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
5686 *
5687 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
5688 *
5689 * [74] PEDef ::= EntityValue | ExternalID
5690 *
5691 * [76] NDataDecl ::= S 'NDATA' S Name
5692 *
5693 * [ VC: Notation Declared ]
5694 * The Name must match the declared name of a notation.
5695 */
5696
5697void
5698xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
5699 const xmlChar *name = NULL;
5700 xmlChar *value = NULL;
5701 xmlChar *URI = NULL, *literal = NULL;
5702 const xmlChar *ndata = NULL;
5703 int isParameter = 0;
5704 xmlChar *orig = NULL;
5705
5706 if ((CUR != '<') || (NXT(1) != '!'))
5707 return;
5708 SKIP(2);
5709
5710 /* GROW; done in the caller */
5711 if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
5712 int inputid = ctxt->input->id;
5713 SKIP(6);
5714 if (SKIP_BLANKS_PE == 0) {
5715 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5716 "Space required after '<!ENTITY'\n");
5717 }
5718
5719 if (RAW == '%') {
5720 NEXT;
5721 if (SKIP_BLANKS_PE == 0) {
5722 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5723 "Space required after '%%'\n");
5724 }
5725 isParameter = 1;
5726 }
5727
5728 name = xmlParseName(ctxt);
5729 if (name == NULL) {
5730 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5731 "xmlParseEntityDecl: no name\n");
5732 return;
5733 }
5734 if (xmlStrchr(name, ':') != NULL) {
5735 xmlNsErr(ctxt, XML_NS_ERR_COLON,
5736 "colons are forbidden from entities names '%s'\n",
5737 name, NULL, NULL);
5738 }
5739 if (SKIP_BLANKS_PE == 0) {
5740 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5741 "Space required after the entity name\n");
5742 }
5743
5744 /*
5745 * handle the various case of definitions...
5746 */
5747 if (isParameter) {
5748 if ((RAW == '"') || (RAW == '\'')) {
5749 value = xmlParseEntityValue(ctxt, &orig);
5750 if (value) {
5751 if ((ctxt->sax != NULL) &&
5752 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5753 ctxt->sax->entityDecl(ctxt->userData, name,
5754 XML_INTERNAL_PARAMETER_ENTITY,
5755 NULL, NULL, value);
5756 }
5757 } else {
5758 URI = xmlParseExternalID(ctxt, &literal, 1);
5759 if ((URI == NULL) && (literal == NULL)) {
5760 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5761 }
5762 if (URI) {
5763 if (xmlStrchr(URI, '#')) {
5764 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5765 } else {
5766 if ((ctxt->sax != NULL) &&
5767 (!ctxt->disableSAX) &&
5768 (ctxt->sax->entityDecl != NULL))
5769 ctxt->sax->entityDecl(ctxt->userData, name,
5770 XML_EXTERNAL_PARAMETER_ENTITY,
5771 literal, URI, NULL);
5772 }
5773 }
5774 }
5775 } else {
5776 if ((RAW == '"') || (RAW == '\'')) {
5777 value = xmlParseEntityValue(ctxt, &orig);
5778 if ((ctxt->sax != NULL) &&
5779 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5780 ctxt->sax->entityDecl(ctxt->userData, name,
5781 XML_INTERNAL_GENERAL_ENTITY,
5782 NULL, NULL, value);
5783 /*
5784 * For expat compatibility in SAX mode.
5785 */
5786 if ((ctxt->myDoc == NULL) ||
5787 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
5788 if (ctxt->myDoc == NULL) {
5789 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5790 if (ctxt->myDoc == NULL) {
5791 xmlErrMemory(ctxt);
5792 goto done;
5793 }
5794 ctxt->myDoc->properties = XML_DOC_INTERNAL;
5795 }
5796 if (ctxt->myDoc->intSubset == NULL) {
5797 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5798 BAD_CAST "fake", NULL, NULL);
5799 if (ctxt->myDoc->intSubset == NULL) {
5800 xmlErrMemory(ctxt);
5801 goto done;
5802 }
5803 }
5804
5805 xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
5806 NULL, NULL, value);
5807 }
5808 } else {
5809 URI = xmlParseExternalID(ctxt, &literal, 1);
5810 if ((URI == NULL) && (literal == NULL)) {
5811 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5812 }
5813 if (URI) {
5814 if (xmlStrchr(URI, '#')) {
5815 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5816 }
5817 }
5818 if ((RAW != '>') && (SKIP_BLANKS_PE == 0)) {
5819 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5820 "Space required before 'NDATA'\n");
5821 }
5822 if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
5823 SKIP(5);
5824 if (SKIP_BLANKS_PE == 0) {
5825 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5826 "Space required after 'NDATA'\n");
5827 }
5828 ndata = xmlParseName(ctxt);
5829 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5830 (ctxt->sax->unparsedEntityDecl != NULL))
5831 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
5832 literal, URI, ndata);
5833 } else {
5834 if ((ctxt->sax != NULL) &&
5835 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5836 ctxt->sax->entityDecl(ctxt->userData, name,
5837 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5838 literal, URI, NULL);
5839 /*
5840 * For expat compatibility in SAX mode.
5841 * assuming the entity replacement was asked for
5842 */
5843 if ((ctxt->replaceEntities != 0) &&
5844 ((ctxt->myDoc == NULL) ||
5845 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
5846 if (ctxt->myDoc == NULL) {
5847 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5848 if (ctxt->myDoc == NULL) {
5849 xmlErrMemory(ctxt);
5850 goto done;
5851 }
5852 ctxt->myDoc->properties = XML_DOC_INTERNAL;
5853 }
5854
5855 if (ctxt->myDoc->intSubset == NULL) {
5856 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5857 BAD_CAST "fake", NULL, NULL);
5858 if (ctxt->myDoc->intSubset == NULL) {
5859 xmlErrMemory(ctxt);
5860 goto done;
5861 }
5862 }
5863 xmlSAX2EntityDecl(ctxt, name,
5864 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5865 literal, URI, NULL);
5866 }
5867 }
5868 }
5869 }
5870 SKIP_BLANKS_PE;
5871 if (RAW != '>') {
5872 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
5873 "xmlParseEntityDecl: entity %s not terminated\n", name);
5874 xmlHaltParser(ctxt);
5875 } else {
5876 if (inputid != ctxt->input->id) {
5877 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5878 "Entity declaration doesn't start and stop in"
5879 " the same entity\n");
5880 }
5881 NEXT;
5882 }
5883 if (orig != NULL) {
5884 /*
5885 * Ugly mechanism to save the raw entity value.
5886 */
5887 xmlEntityPtr cur = NULL;
5888
5889 if (isParameter) {
5890 if ((ctxt->sax != NULL) &&
5891 (ctxt->sax->getParameterEntity != NULL))
5892 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
5893 } else {
5894 if ((ctxt->sax != NULL) &&
5895 (ctxt->sax->getEntity != NULL))
5896 cur = ctxt->sax->getEntity(ctxt->userData, name);
5897 if ((cur == NULL) && (ctxt->userData==ctxt)) {
5898 cur = xmlSAX2GetEntity(ctxt, name);
5899 }
5900 }
5901 if ((cur != NULL) && (cur->orig == NULL)) {
5902 cur->orig = orig;
5903 orig = NULL;
5904 }
5905 }
5906
5907done:
5908 if (value != NULL) xmlFree(value);
5909 if (URI != NULL) xmlFree(URI);
5910 if (literal != NULL) xmlFree(literal);
5911 if (orig != NULL) xmlFree(orig);
5912 }
5913}
5914
5915/**
5916 * xmlParseDefaultDecl:
5917 * @ctxt: an XML parser context
5918 * @value: Receive a possible fixed default value for the attribute
5919 *
5920 * DEPRECATED: Internal function, don't use.
5921 *
5922 * Parse an attribute default declaration
5923 *
5924 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
5925 *
5926 * [ VC: Required Attribute ]
5927 * if the default declaration is the keyword #REQUIRED, then the
5928 * attribute must be specified for all elements of the type in the
5929 * attribute-list declaration.
5930 *
5931 * [ VC: Attribute Default Legal ]
5932 * The declared default value must meet the lexical constraints of
5933 * the declared attribute type c.f. xmlValidateAttributeDecl()
5934 *
5935 * [ VC: Fixed Attribute Default ]
5936 * if an attribute has a default value declared with the #FIXED
5937 * keyword, instances of that attribute must match the default value.
5938 *
5939 * [ WFC: No < in Attribute Values ]
5940 * handled in xmlParseAttValue()
5941 *
5942 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
5943 * or XML_ATTRIBUTE_FIXED.
5944 */
5945
5946int
5947xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
5948 int val;
5949 xmlChar *ret;
5950
5951 *value = NULL;
5952 if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
5953 SKIP(9);
5954 return(XML_ATTRIBUTE_REQUIRED);
5955 }
5956 if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
5957 SKIP(8);
5958 return(XML_ATTRIBUTE_IMPLIED);
5959 }
5960 val = XML_ATTRIBUTE_NONE;
5961 if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
5962 SKIP(6);
5963 val = XML_ATTRIBUTE_FIXED;
5964 if (SKIP_BLANKS_PE == 0) {
5965 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5966 "Space required after '#FIXED'\n");
5967 }
5968 }
5969 ret = xmlParseAttValue(ctxt);
5970 if (ret == NULL) {
5971 xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
5972 "Attribute default value declaration error\n");
5973 } else
5974 *value = ret;
5975 return(val);
5976}
5977
5978/**
5979 * xmlParseNotationType:
5980 * @ctxt: an XML parser context
5981 *
5982 * DEPRECATED: Internal function, don't use.
5983 *
5984 * parse an Notation attribute type.
5985 *
5986 * Note: the leading 'NOTATION' S part has already being parsed...
5987 *
5988 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5989 *
5990 * [ VC: Notation Attributes ]
5991 * Values of this type must match one of the notation names included
5992 * in the declaration; all notation names in the declaration must be declared.
5993 *
5994 * Returns: the notation attribute tree built while parsing
5995 */
5996
5997xmlEnumerationPtr
5998xmlParseNotationType(xmlParserCtxtPtr ctxt) {
5999 const xmlChar *name;
6000 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
6001
6002 if (RAW != '(') {
6003 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
6004 return(NULL);
6005 }
6006 do {
6007 NEXT;
6008 SKIP_BLANKS_PE;
6009 name = xmlParseName(ctxt);
6010 if (name == NULL) {
6011 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6012 "Name expected in NOTATION declaration\n");
6013 xmlFreeEnumeration(ret);
6014 return(NULL);
6015 }
6016 tmp = ret;
6017 while (tmp != NULL) {
6018 if (xmlStrEqual(name, tmp->name)) {
6019 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
6020 "standalone: attribute notation value token %s duplicated\n",
6021 name, NULL);
6022 if (!xmlDictOwns(ctxt->dict, name))
6023 xmlFree((xmlChar *) name);
6024 break;
6025 }
6026 tmp = tmp->next;
6027 }
6028 if (tmp == NULL) {
6029 cur = xmlCreateEnumeration(name);
6030 if (cur == NULL) {
6031 xmlErrMemory(ctxt);
6032 xmlFreeEnumeration(ret);
6033 return(NULL);
6034 }
6035 if (last == NULL) ret = last = cur;
6036 else {
6037 last->next = cur;
6038 last = cur;
6039 }
6040 }
6041 SKIP_BLANKS_PE;
6042 } while (RAW == '|');
6043 if (RAW != ')') {
6044 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
6045 xmlFreeEnumeration(ret);
6046 return(NULL);
6047 }
6048 NEXT;
6049 return(ret);
6050}
6051
6052/**
6053 * xmlParseEnumerationType:
6054 * @ctxt: an XML parser context
6055 *
6056 * DEPRECATED: Internal function, don't use.
6057 *
6058 * parse an Enumeration attribute type.
6059 *
6060 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
6061 *
6062 * [ VC: Enumeration ]
6063 * Values of this type must match one of the Nmtoken tokens in
6064 * the declaration
6065 *
6066 * Returns: the enumeration attribute tree built while parsing
6067 */
6068
6069xmlEnumerationPtr
6070xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
6071 xmlChar *name;
6072 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
6073
6074 if (RAW != '(') {
6075 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
6076 return(NULL);
6077 }
6078 do {
6079 NEXT;
6080 SKIP_BLANKS_PE;
6081 name = xmlParseNmtoken(ctxt);
6082 if (name == NULL) {
6083 xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
6084 return(ret);
6085 }
6086 tmp = ret;
6087 while (tmp != NULL) {
6088 if (xmlStrEqual(name, tmp->name)) {
6089 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
6090 "standalone: attribute enumeration value token %s duplicated\n",
6091 name, NULL);
6092 if (!xmlDictOwns(ctxt->dict, name))
6093 xmlFree(name);
6094 break;
6095 }
6096 tmp = tmp->next;
6097 }
6098 if (tmp == NULL) {
6099 cur = xmlCreateEnumeration(name);
6100 if (!xmlDictOwns(ctxt->dict, name))
6101 xmlFree(name);
6102 if (cur == NULL) {
6103 xmlErrMemory(ctxt);
6104 xmlFreeEnumeration(ret);
6105 return(NULL);
6106 }
6107 if (last == NULL) ret = last = cur;
6108 else {
6109 last->next = cur;
6110 last = cur;
6111 }
6112 }
6113 SKIP_BLANKS_PE;
6114 } while (RAW == '|');
6115 if (RAW != ')') {
6116 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
6117 return(ret);
6118 }
6119 NEXT;
6120 return(ret);
6121}
6122
6123/**
6124 * xmlParseEnumeratedType:
6125 * @ctxt: an XML parser context
6126 * @tree: the enumeration tree built while parsing
6127 *
6128 * DEPRECATED: Internal function, don't use.
6129 *
6130 * parse an Enumerated attribute type.
6131 *
6132 * [57] EnumeratedType ::= NotationType | Enumeration
6133 *
6134 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
6135 *
6136 *
6137 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
6138 */
6139
6140int
6141xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
6142 if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
6143 SKIP(8);
6144 if (SKIP_BLANKS_PE == 0) {
6145 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6146 "Space required after 'NOTATION'\n");
6147 return(0);
6148 }
6149 *tree = xmlParseNotationType(ctxt);
6150 if (*tree == NULL) return(0);
6151 return(XML_ATTRIBUTE_NOTATION);
6152 }
6153 *tree = xmlParseEnumerationType(ctxt);
6154 if (*tree == NULL) return(0);
6155 return(XML_ATTRIBUTE_ENUMERATION);
6156}
6157
6158/**
6159 * xmlParseAttributeType:
6160 * @ctxt: an XML parser context
6161 * @tree: the enumeration tree built while parsing
6162 *
6163 * DEPRECATED: Internal function, don't use.
6164 *
6165 * parse the Attribute list def for an element
6166 *
6167 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
6168 *
6169 * [55] StringType ::= 'CDATA'
6170 *
6171 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
6172 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
6173 *
6174 * Validity constraints for attribute values syntax are checked in
6175 * xmlValidateAttributeValue()
6176 *
6177 * [ VC: ID ]
6178 * Values of type ID must match the Name production. A name must not
6179 * appear more than once in an XML document as a value of this type;
6180 * i.e., ID values must uniquely identify the elements which bear them.
6181 *
6182 * [ VC: One ID per Element Type ]
6183 * No element type may have more than one ID attribute specified.
6184 *
6185 * [ VC: ID Attribute Default ]
6186 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
6187 *
6188 * [ VC: IDREF ]
6189 * Values of type IDREF must match the Name production, and values
6190 * of type IDREFS must match Names; each IDREF Name must match the value
6191 * of an ID attribute on some element in the XML document; i.e. IDREF
6192 * values must match the value of some ID attribute.
6193 *
6194 * [ VC: Entity Name ]
6195 * Values of type ENTITY must match the Name production, values
6196 * of type ENTITIES must match Names; each Entity Name must match the
6197 * name of an unparsed entity declared in the DTD.
6198 *
6199 * [ VC: Name Token ]
6200 * Values of type NMTOKEN must match the Nmtoken production; values
6201 * of type NMTOKENS must match Nmtokens.
6202 *
6203 * Returns the attribute type
6204 */
6205int
6206xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
6207 if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
6208 SKIP(5);
6209 return(XML_ATTRIBUTE_CDATA);
6210 } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
6211 SKIP(6);
6212 return(XML_ATTRIBUTE_IDREFS);
6213 } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
6214 SKIP(5);
6215 return(XML_ATTRIBUTE_IDREF);
6216 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
6217 SKIP(2);
6218 return(XML_ATTRIBUTE_ID);
6219 } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
6220 SKIP(6);
6221 return(XML_ATTRIBUTE_ENTITY);
6222 } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
6223 SKIP(8);
6224 return(XML_ATTRIBUTE_ENTITIES);
6225 } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
6226 SKIP(8);
6227 return(XML_ATTRIBUTE_NMTOKENS);
6228 } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
6229 SKIP(7);
6230 return(XML_ATTRIBUTE_NMTOKEN);
6231 }
6232 return(xmlParseEnumeratedType(ctxt, tree));
6233}
6234
6235/**
6236 * xmlParseAttributeListDecl:
6237 * @ctxt: an XML parser context
6238 *
6239 * DEPRECATED: Internal function, don't use.
6240 *
6241 * Parse an attribute list declaration for an element. Always consumes '<!'.
6242 *
6243 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
6244 *
6245 * [53] AttDef ::= S Name S AttType S DefaultDecl
6246 *
6247 */
6248void
6249xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
6250 const xmlChar *elemName;
6251 const xmlChar *attrName;
6252 xmlEnumerationPtr tree;
6253
6254 if ((CUR != '<') || (NXT(1) != '!'))
6255 return;
6256 SKIP(2);
6257
6258 if (CMP7(CUR_PTR, 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
6259 int inputid = ctxt->input->id;
6260
6261 SKIP(7);
6262 if (SKIP_BLANKS_PE == 0) {
6263 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6264 "Space required after '<!ATTLIST'\n");
6265 }
6266 elemName = xmlParseName(ctxt);
6267 if (elemName == NULL) {
6268 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6269 "ATTLIST: no name for Element\n");
6270 return;
6271 }
6272 SKIP_BLANKS_PE;
6273 GROW;
6274 while ((RAW != '>') && (PARSER_STOPPED(ctxt) == 0)) {
6275 int type;
6276 int def;
6277 xmlChar *defaultValue = NULL;
6278
6279 GROW;
6280 tree = NULL;
6281 attrName = xmlParseName(ctxt);
6282 if (attrName == NULL) {
6283 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6284 "ATTLIST: no name for Attribute\n");
6285 break;
6286 }
6287 GROW;
6288 if (SKIP_BLANKS_PE == 0) {
6289 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6290 "Space required after the attribute name\n");
6291 break;
6292 }
6293
6294 type = xmlParseAttributeType(ctxt, &tree);
6295 if (type <= 0) {
6296 break;
6297 }
6298
6299 GROW;
6300 if (SKIP_BLANKS_PE == 0) {
6301 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6302 "Space required after the attribute type\n");
6303 if (tree != NULL)
6304 xmlFreeEnumeration(tree);
6305 break;
6306 }
6307
6308 def = xmlParseDefaultDecl(ctxt, &defaultValue);
6309 if (def <= 0) {
6310 if (defaultValue != NULL)
6311 xmlFree(defaultValue);
6312 if (tree != NULL)
6313 xmlFreeEnumeration(tree);
6314 break;
6315 }
6316 if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL))
6317 xmlAttrNormalizeSpace(defaultValue, defaultValue);
6318
6319 GROW;
6320 if (RAW != '>') {
6321 if (SKIP_BLANKS_PE == 0) {
6322 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6323 "Space required after the attribute default value\n");
6324 if (defaultValue != NULL)
6325 xmlFree(defaultValue);
6326 if (tree != NULL)
6327 xmlFreeEnumeration(tree);
6328 break;
6329 }
6330 }
6331 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6332 (ctxt->sax->attributeDecl != NULL))
6333 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
6334 type, def, defaultValue, tree);
6335 else if (tree != NULL)
6336 xmlFreeEnumeration(tree);
6337
6338 if ((ctxt->sax2) && (defaultValue != NULL) &&
6339 (def != XML_ATTRIBUTE_IMPLIED) &&
6340 (def != XML_ATTRIBUTE_REQUIRED)) {
6341 xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
6342 }
6343 if (ctxt->sax2) {
6344 xmlAddSpecialAttr(ctxt, elemName, attrName, type);
6345 }
6346 if (defaultValue != NULL)
6347 xmlFree(defaultValue);
6348 GROW;
6349 }
6350 if (RAW == '>') {
6351 if (inputid != ctxt->input->id) {
6352 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6353 "Attribute list declaration doesn't start and"
6354 " stop in the same entity\n");
6355 }
6356 NEXT;
6357 }
6358 }
6359}
6360
6361/**
6362 * xmlParseElementMixedContentDecl:
6363 * @ctxt: an XML parser context
6364 * @inputchk: the input used for the current entity, needed for boundary checks
6365 *
6366 * DEPRECATED: Internal function, don't use.
6367 *
6368 * parse the declaration for a Mixed Element content
6369 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6370 *
6371 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
6372 * '(' S? '#PCDATA' S? ')'
6373 *
6374 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
6375 *
6376 * [ VC: No Duplicate Types ]
6377 * The same name must not appear more than once in a single
6378 * mixed-content declaration.
6379 *
6380 * returns: the list of the xmlElementContentPtr describing the element choices
6381 */
6382xmlElementContentPtr
6383xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6384 xmlElementContentPtr ret = NULL, cur = NULL, n;
6385 const xmlChar *elem = NULL;
6386
6387 GROW;
6388 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6389 SKIP(7);
6390 SKIP_BLANKS_PE;
6391 if (RAW == ')') {
6392 if (ctxt->input->id != inputchk) {
6393 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6394 "Element content declaration doesn't start and"
6395 " stop in the same entity\n");
6396 }
6397 NEXT;
6398 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6399 if (ret == NULL)
6400 goto mem_error;
6401 if (RAW == '*') {
6402 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6403 NEXT;
6404 }
6405 return(ret);
6406 }
6407 if ((RAW == '(') || (RAW == '|')) {
6408 ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6409 if (ret == NULL)
6410 goto mem_error;
6411 }
6412 while ((RAW == '|') && (PARSER_STOPPED(ctxt) == 0)) {
6413 NEXT;
6414 n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6415 if (n == NULL)
6416 goto mem_error;
6417 if (elem == NULL) {
6418 n->c1 = cur;
6419 if (cur != NULL)
6420 cur->parent = n;
6421 ret = cur = n;
6422 } else {
6423 cur->c2 = n;
6424 n->parent = cur;
6425 n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6426 if (n->c1 == NULL)
6427 goto mem_error;
6428 n->c1->parent = n;
6429 cur = n;
6430 }
6431 SKIP_BLANKS_PE;
6432 elem = xmlParseName(ctxt);
6433 if (elem == NULL) {
6434 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6435 "xmlParseElementMixedContentDecl : Name expected\n");
6436 xmlFreeDocElementContent(ctxt->myDoc, ret);
6437 return(NULL);
6438 }
6439 SKIP_BLANKS_PE;
6440 GROW;
6441 }
6442 if ((RAW == ')') && (NXT(1) == '*')) {
6443 if (elem != NULL) {
6444 cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
6445 XML_ELEMENT_CONTENT_ELEMENT);
6446 if (cur->c2 == NULL)
6447 goto mem_error;
6448 cur->c2->parent = cur;
6449 }
6450 if (ret != NULL)
6451 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6452 if (ctxt->input->id != inputchk) {
6453 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6454 "Element content declaration doesn't start and"
6455 " stop in the same entity\n");
6456 }
6457 SKIP(2);
6458 } else {
6459 xmlFreeDocElementContent(ctxt->myDoc, ret);
6460 xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
6461 return(NULL);
6462 }
6463
6464 } else {
6465 xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
6466 }
6467 return(ret);
6468
6469mem_error:
6470 xmlErrMemory(ctxt);
6471 xmlFreeDocElementContent(ctxt->myDoc, ret);
6472 return(NULL);
6473}
6474
6475/**
6476 * xmlParseElementChildrenContentDeclPriv:
6477 * @ctxt: an XML parser context
6478 * @inputchk: the input used for the current entity, needed for boundary checks
6479 * @depth: the level of recursion
6480 *
6481 * parse the declaration for a Mixed Element content
6482 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6483 *
6484 *
6485 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6486 *
6487 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6488 *
6489 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6490 *
6491 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6492 *
6493 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6494 * TODO Parameter-entity replacement text must be properly nested
6495 * with parenthesized groups. That is to say, if either of the
6496 * opening or closing parentheses in a choice, seq, or Mixed
6497 * construct is contained in the replacement text for a parameter
6498 * entity, both must be contained in the same replacement text. For
6499 * interoperability, if a parameter-entity reference appears in a
6500 * choice, seq, or Mixed construct, its replacement text should not
6501 * be empty, and neither the first nor last non-blank character of
6502 * the replacement text should be a connector (| or ,).
6503 *
6504 * Returns the tree of xmlElementContentPtr describing the element
6505 * hierarchy.
6506 */
6507static xmlElementContentPtr
6508xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int inputchk,
6509 int depth) {
6510 int maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 2048 : 256;
6511 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
6512 const xmlChar *elem;
6513 xmlChar type = 0;
6514
6515 if (depth > maxDepth) {
6516 xmlFatalErrMsgInt(ctxt, XML_ERR_RESOURCE_LIMIT,
6517 "xmlParseElementChildrenContentDecl : depth %d too deep, "
6518 "use XML_PARSE_HUGE\n", depth);
6519 return(NULL);
6520 }
6521 SKIP_BLANKS_PE;
6522 GROW;
6523 if (RAW == '(') {
6524 int inputid = ctxt->input->id;
6525
6526 /* Recurse on first child */
6527 NEXT;
6528 SKIP_BLANKS_PE;
6529 cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6530 depth + 1);
6531 if (cur == NULL)
6532 return(NULL);
6533 SKIP_BLANKS_PE;
6534 GROW;
6535 } else {
6536 elem = xmlParseName(ctxt);
6537 if (elem == NULL) {
6538 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6539 return(NULL);
6540 }
6541 cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6542 if (cur == NULL) {
6543 xmlErrMemory(ctxt);
6544 return(NULL);
6545 }
6546 GROW;
6547 if (RAW == '?') {
6548 cur->ocur = XML_ELEMENT_CONTENT_OPT;
6549 NEXT;
6550 } else if (RAW == '*') {
6551 cur->ocur = XML_ELEMENT_CONTENT_MULT;
6552 NEXT;
6553 } else if (RAW == '+') {
6554 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
6555 NEXT;
6556 } else {
6557 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
6558 }
6559 GROW;
6560 }
6561 SKIP_BLANKS_PE;
6562 while ((RAW != ')') && (PARSER_STOPPED(ctxt) == 0)) {
6563 /*
6564 * Each loop we parse one separator and one element.
6565 */
6566 if (RAW == ',') {
6567 if (type == 0) type = CUR;
6568
6569 /*
6570 * Detect "Name | Name , Name" error
6571 */
6572 else if (type != CUR) {
6573 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6574 "xmlParseElementChildrenContentDecl : '%c' expected\n",
6575 type);
6576 if ((last != NULL) && (last != ret))
6577 xmlFreeDocElementContent(ctxt->myDoc, last);
6578 if (ret != NULL)
6579 xmlFreeDocElementContent(ctxt->myDoc, ret);
6580 return(NULL);
6581 }
6582 NEXT;
6583
6584 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
6585 if (op == NULL) {
6586 xmlErrMemory(ctxt);
6587 if ((last != NULL) && (last != ret))
6588 xmlFreeDocElementContent(ctxt->myDoc, last);
6589 xmlFreeDocElementContent(ctxt->myDoc, ret);
6590 return(NULL);
6591 }
6592 if (last == NULL) {
6593 op->c1 = ret;
6594 if (ret != NULL)
6595 ret->parent = op;
6596 ret = cur = op;
6597 } else {
6598 cur->c2 = op;
6599 if (op != NULL)
6600 op->parent = cur;
6601 op->c1 = last;
6602 if (last != NULL)
6603 last->parent = op;
6604 cur =op;
6605 last = NULL;
6606 }
6607 } else if (RAW == '|') {
6608 if (type == 0) type = CUR;
6609
6610 /*
6611 * Detect "Name , Name | Name" error
6612 */
6613 else if (type != CUR) {
6614 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6615 "xmlParseElementChildrenContentDecl : '%c' expected\n",
6616 type);
6617 if ((last != NULL) && (last != ret))
6618 xmlFreeDocElementContent(ctxt->myDoc, last);
6619 if (ret != NULL)
6620 xmlFreeDocElementContent(ctxt->myDoc, ret);
6621 return(NULL);
6622 }
6623 NEXT;
6624
6625 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6626 if (op == NULL) {
6627 xmlErrMemory(ctxt);
6628 if ((last != NULL) && (last != ret))
6629 xmlFreeDocElementContent(ctxt->myDoc, last);
6630 if (ret != NULL)
6631 xmlFreeDocElementContent(ctxt->myDoc, ret);
6632 return(NULL);
6633 }
6634 if (last == NULL) {
6635 op->c1 = ret;
6636 if (ret != NULL)
6637 ret->parent = op;
6638 ret = cur = op;
6639 } else {
6640 cur->c2 = op;
6641 if (op != NULL)
6642 op->parent = cur;
6643 op->c1 = last;
6644 if (last != NULL)
6645 last->parent = op;
6646 cur =op;
6647 last = NULL;
6648 }
6649 } else {
6650 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
6651 if ((last != NULL) && (last != ret))
6652 xmlFreeDocElementContent(ctxt->myDoc, last);
6653 if (ret != NULL)
6654 xmlFreeDocElementContent(ctxt->myDoc, ret);
6655 return(NULL);
6656 }
6657 GROW;
6658 SKIP_BLANKS_PE;
6659 GROW;
6660 if (RAW == '(') {
6661 int inputid = ctxt->input->id;
6662 /* Recurse on second child */
6663 NEXT;
6664 SKIP_BLANKS_PE;
6665 last = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6666 depth + 1);
6667 if (last == NULL) {
6668 if (ret != NULL)
6669 xmlFreeDocElementContent(ctxt->myDoc, ret);
6670 return(NULL);
6671 }
6672 SKIP_BLANKS_PE;
6673 } else {
6674 elem = xmlParseName(ctxt);
6675 if (elem == NULL) {
6676 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6677 if (ret != NULL)
6678 xmlFreeDocElementContent(ctxt->myDoc, ret);
6679 return(NULL);
6680 }
6681 last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6682 if (last == NULL) {
6683 xmlErrMemory(ctxt);
6684 if (ret != NULL)
6685 xmlFreeDocElementContent(ctxt->myDoc, ret);
6686 return(NULL);
6687 }
6688 if (RAW == '?') {
6689 last->ocur = XML_ELEMENT_CONTENT_OPT;
6690 NEXT;
6691 } else if (RAW == '*') {
6692 last->ocur = XML_ELEMENT_CONTENT_MULT;
6693 NEXT;
6694 } else if (RAW == '+') {
6695 last->ocur = XML_ELEMENT_CONTENT_PLUS;
6696 NEXT;
6697 } else {
6698 last->ocur = XML_ELEMENT_CONTENT_ONCE;
6699 }
6700 }
6701 SKIP_BLANKS_PE;
6702 GROW;
6703 }
6704 if ((cur != NULL) && (last != NULL)) {
6705 cur->c2 = last;
6706 if (last != NULL)
6707 last->parent = cur;
6708 }
6709 if (ctxt->input->id != inputchk) {
6710 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6711 "Element content declaration doesn't start and stop in"
6712 " the same entity\n");
6713 }
6714 NEXT;
6715 if (RAW == '?') {
6716 if (ret != NULL) {
6717 if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
6718 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6719 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6720 else
6721 ret->ocur = XML_ELEMENT_CONTENT_OPT;
6722 }
6723 NEXT;
6724 } else if (RAW == '*') {
6725 if (ret != NULL) {
6726 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6727 cur = ret;
6728 /*
6729 * Some normalization:
6730 * (a | b* | c?)* == (a | b | c)*
6731 */
6732 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6733 if ((cur->c1 != NULL) &&
6734 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6735 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
6736 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6737 if ((cur->c2 != NULL) &&
6738 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6739 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
6740 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6741 cur = cur->c2;
6742 }
6743 }
6744 NEXT;
6745 } else if (RAW == '+') {
6746 if (ret != NULL) {
6747 int found = 0;
6748
6749 if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
6750 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6751 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6752 else
6753 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
6754 /*
6755 * Some normalization:
6756 * (a | b*)+ == (a | b)*
6757 * (a | b?)+ == (a | b)*
6758 */
6759 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6760 if ((cur->c1 != NULL) &&
6761 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6762 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
6763 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6764 found = 1;
6765 }
6766 if ((cur->c2 != NULL) &&
6767 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6768 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
6769 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6770 found = 1;
6771 }
6772 cur = cur->c2;
6773 }
6774 if (found)
6775 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6776 }
6777 NEXT;
6778 }
6779 return(ret);
6780}
6781
6782/**
6783 * xmlParseElementChildrenContentDecl:
6784 * @ctxt: an XML parser context
6785 * @inputchk: the input used for the current entity, needed for boundary checks
6786 *
6787 * DEPRECATED: Internal function, don't use.
6788 *
6789 * parse the declaration for a Mixed Element content
6790 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6791 *
6792 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6793 *
6794 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6795 *
6796 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6797 *
6798 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6799 *
6800 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6801 * TODO Parameter-entity replacement text must be properly nested
6802 * with parenthesized groups. That is to say, if either of the
6803 * opening or closing parentheses in a choice, seq, or Mixed
6804 * construct is contained in the replacement text for a parameter
6805 * entity, both must be contained in the same replacement text. For
6806 * interoperability, if a parameter-entity reference appears in a
6807 * choice, seq, or Mixed construct, its replacement text should not
6808 * be empty, and neither the first nor last non-blank character of
6809 * the replacement text should be a connector (| or ,).
6810 *
6811 * Returns the tree of xmlElementContentPtr describing the element
6812 * hierarchy.
6813 */
6814xmlElementContentPtr
6815xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6816 /* stub left for API/ABI compat */
6817 return(xmlParseElementChildrenContentDeclPriv(ctxt, inputchk, 1));
6818}
6819
6820/**
6821 * xmlParseElementContentDecl:
6822 * @ctxt: an XML parser context
6823 * @name: the name of the element being defined.
6824 * @result: the Element Content pointer will be stored here if any
6825 *
6826 * DEPRECATED: Internal function, don't use.
6827 *
6828 * parse the declaration for an Element content either Mixed or Children,
6829 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
6830 *
6831 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
6832 *
6833 * returns: the type of element content XML_ELEMENT_TYPE_xxx
6834 */
6835
6836int
6837xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
6838 xmlElementContentPtr *result) {
6839
6840 xmlElementContentPtr tree = NULL;
6841 int inputid = ctxt->input->id;
6842 int res;
6843
6844 *result = NULL;
6845
6846 if (RAW != '(') {
6847 xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6848 "xmlParseElementContentDecl : %s '(' expected\n", name);
6849 return(-1);
6850 }
6851 NEXT;
6852 GROW;
6853 SKIP_BLANKS_PE;
6854 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6855 tree = xmlParseElementMixedContentDecl(ctxt, inputid);
6856 res = XML_ELEMENT_TYPE_MIXED;
6857 } else {
6858 tree = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 1);
6859 res = XML_ELEMENT_TYPE_ELEMENT;
6860 }
6861 SKIP_BLANKS_PE;
6862 *result = tree;
6863 return(res);
6864}
6865
6866/**
6867 * xmlParseElementDecl:
6868 * @ctxt: an XML parser context
6869 *
6870 * DEPRECATED: Internal function, don't use.
6871 *
6872 * Parse an element declaration. Always consumes '<!'.
6873 *
6874 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
6875 *
6876 * [ VC: Unique Element Type Declaration ]
6877 * No element type may be declared more than once
6878 *
6879 * Returns the type of the element, or -1 in case of error
6880 */
6881int
6882xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
6883 const xmlChar *name;
6884 int ret = -1;
6885 xmlElementContentPtr content = NULL;
6886
6887 if ((CUR != '<') || (NXT(1) != '!'))
6888 return(ret);
6889 SKIP(2);
6890
6891 /* GROW; done in the caller */
6892 if (CMP7(CUR_PTR, 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
6893 int inputid = ctxt->input->id;
6894
6895 SKIP(7);
6896 if (SKIP_BLANKS_PE == 0) {
6897 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6898 "Space required after 'ELEMENT'\n");
6899 return(-1);
6900 }
6901 name = xmlParseName(ctxt);
6902 if (name == NULL) {
6903 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6904 "xmlParseElementDecl: no name for Element\n");
6905 return(-1);
6906 }
6907 if (SKIP_BLANKS_PE == 0) {
6908 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6909 "Space required after the element name\n");
6910 }
6911 if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
6912 SKIP(5);
6913 /*
6914 * Element must always be empty.
6915 */
6916 ret = XML_ELEMENT_TYPE_EMPTY;
6917 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
6918 (NXT(2) == 'Y')) {
6919 SKIP(3);
6920 /*
6921 * Element is a generic container.
6922 */
6923 ret = XML_ELEMENT_TYPE_ANY;
6924 } else if (RAW == '(') {
6925 ret = xmlParseElementContentDecl(ctxt, name, &content);
6926 } else {
6927 /*
6928 * [ WFC: PEs in Internal Subset ] error handling.
6929 */
6930 xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6931 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
6932 return(-1);
6933 }
6934
6935 SKIP_BLANKS_PE;
6936
6937 if (RAW != '>') {
6938 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
6939 if (content != NULL) {
6940 xmlFreeDocElementContent(ctxt->myDoc, content);
6941 }
6942 } else {
6943 if (inputid != ctxt->input->id) {
6944 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6945 "Element declaration doesn't start and stop in"
6946 " the same entity\n");
6947 }
6948
6949 NEXT;
6950 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6951 (ctxt->sax->elementDecl != NULL)) {
6952 if (content != NULL)
6953 content->parent = NULL;
6954 ctxt->sax->elementDecl(ctxt->userData, name, ret,
6955 content);
6956 if ((content != NULL) && (content->parent == NULL)) {
6957 /*
6958 * this is a trick: if xmlAddElementDecl is called,
6959 * instead of copying the full tree it is plugged directly
6960 * if called from the parser. Avoid duplicating the
6961 * interfaces or change the API/ABI
6962 */
6963 xmlFreeDocElementContent(ctxt->myDoc, content);
6964 }
6965 } else if (content != NULL) {
6966 xmlFreeDocElementContent(ctxt->myDoc, content);
6967 }
6968 }
6969 }
6970 return(ret);
6971}
6972
6973/**
6974 * xmlParseConditionalSections
6975 * @ctxt: an XML parser context
6976 *
6977 * Parse a conditional section. Always consumes '<!['.
6978 *
6979 * [61] conditionalSect ::= includeSect | ignoreSect
6980 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
6981 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6982 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
6983 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
6984 */
6985
6986static void
6987xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
6988 int *inputIds = NULL;
6989 size_t inputIdsSize = 0;
6990 size_t depth = 0;
6991
6992 while (PARSER_STOPPED(ctxt) == 0) {
6993 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6994 int id = ctxt->input->id;
6995
6996 SKIP(3);
6997 SKIP_BLANKS_PE;
6998
6999 if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
7000 SKIP(7);
7001 SKIP_BLANKS_PE;
7002 if (RAW != '[') {
7003 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
7004 xmlHaltParser(ctxt);
7005 goto error;
7006 }
7007 if (ctxt->input->id != id) {
7008 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
7009 "All markup of the conditional section is"
7010 " not in the same entity\n");
7011 }
7012 NEXT;
7013
7014 if (inputIdsSize <= depth) {
7015 int *tmp;
7016
7017 inputIdsSize = (inputIdsSize == 0 ? 4 : inputIdsSize * 2);
7018 tmp = (int *) xmlRealloc(inputIds,
7019 inputIdsSize * sizeof(int));
7020 if (tmp == NULL) {
7021 xmlErrMemory(ctxt);
7022 goto error;
7023 }
7024 inputIds = tmp;
7025 }
7026 inputIds[depth] = id;
7027 depth++;
7028 } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
7029 size_t ignoreDepth = 0;
7030
7031 SKIP(6);
7032 SKIP_BLANKS_PE;
7033 if (RAW != '[') {
7034 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
7035 xmlHaltParser(ctxt);
7036 goto error;
7037 }
7038 if (ctxt->input->id != id) {
7039 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
7040 "All markup of the conditional section is"
7041 " not in the same entity\n");
7042 }
7043 NEXT;
7044
7045 while (PARSER_STOPPED(ctxt) == 0) {
7046 if (RAW == 0) {
7047 xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
7048 goto error;
7049 }
7050 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
7051 SKIP(3);
7052 ignoreDepth++;
7053 /* Check for integer overflow */
7054 if (ignoreDepth == 0) {
7055 xmlErrMemory(ctxt);
7056 goto error;
7057 }
7058 } else if ((RAW == ']') && (NXT(1) == ']') &&
7059 (NXT(2) == '>')) {
7060 SKIP(3);
7061 if (ignoreDepth == 0)
7062 break;
7063 ignoreDepth--;
7064 } else {
7065 NEXT;
7066 }
7067 }
7068
7069 if (ctxt->input->id != id) {
7070 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
7071 "All markup of the conditional section is"
7072 " not in the same entity\n");
7073 }
7074 } else {
7075 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
7076 xmlHaltParser(ctxt);
7077 goto error;
7078 }
7079 } else if ((depth > 0) &&
7080 (RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
7081 depth--;
7082 if (ctxt->input->id != inputIds[depth]) {
7083 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
7084 "All markup of the conditional section is not"
7085 " in the same entity\n");
7086 }
7087 SKIP(3);
7088 } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
7089 xmlParseMarkupDecl(ctxt);
7090 } else {
7091 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7092 xmlHaltParser(ctxt);
7093 goto error;
7094 }
7095
7096 if (depth == 0)
7097 break;
7098
7099 SKIP_BLANKS_PE;
7100 SHRINK;
7101 GROW;
7102 }
7103
7104error:
7105 xmlFree(inputIds);
7106}
7107
7108/**
7109 * xmlParseMarkupDecl:
7110 * @ctxt: an XML parser context
7111 *
7112 * DEPRECATED: Internal function, don't use.
7113 *
7114 * Parse markup declarations. Always consumes '<!' or '<?'.
7115 *
7116 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
7117 * NotationDecl | PI | Comment
7118 *
7119 * [ VC: Proper Declaration/PE Nesting ]
7120 * Parameter-entity replacement text must be properly nested with
7121 * markup declarations. That is to say, if either the first character
7122 * or the last character of a markup declaration (markupdecl above) is
7123 * contained in the replacement text for a parameter-entity reference,
7124 * both must be contained in the same replacement text.
7125 *
7126 * [ WFC: PEs in Internal Subset ]
7127 * In the internal DTD subset, parameter-entity references can occur
7128 * only where markup declarations can occur, not within markup declarations.
7129 * (This does not apply to references that occur in external parameter
7130 * entities or to the external subset.)
7131 */
7132void
7133xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
7134 GROW;
7135 if (CUR == '<') {
7136 if (NXT(1) == '!') {
7137 switch (NXT(2)) {
7138 case 'E':
7139 if (NXT(3) == 'L')
7140 xmlParseElementDecl(ctxt);
7141 else if (NXT(3) == 'N')
7142 xmlParseEntityDecl(ctxt);
7143 else
7144 SKIP(2);
7145 break;
7146 case 'A':
7147 xmlParseAttributeListDecl(ctxt);
7148 break;
7149 case 'N':
7150 xmlParseNotationDecl(ctxt);
7151 break;
7152 case '-':
7153 xmlParseComment(ctxt);
7154 break;
7155 default:
7156 /* there is an error but it will be detected later */
7157 SKIP(2);
7158 break;
7159 }
7160 } else if (NXT(1) == '?') {
7161 xmlParsePI(ctxt);
7162 }
7163 }
7164}
7165
7166/**
7167 * xmlParseTextDecl:
7168 * @ctxt: an XML parser context
7169 *
7170 * DEPRECATED: Internal function, don't use.
7171 *
7172 * parse an XML declaration header for external entities
7173 *
7174 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
7175 */
7176
7177void
7178xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
7179 xmlChar *version;
7180
7181 /*
7182 * We know that '<?xml' is here.
7183 */
7184 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
7185 SKIP(5);
7186 } else {
7187 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
7188 return;
7189 }
7190
7191 if (SKIP_BLANKS == 0) {
7192 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7193 "Space needed after '<?xml'\n");
7194 }
7195
7196 /*
7197 * We may have the VersionInfo here.
7198 */
7199 version = xmlParseVersionInfo(ctxt);
7200 if (version == NULL) {
7201 version = xmlCharStrdup(XML_DEFAULT_VERSION);
7202 if (version == NULL) {
7203 xmlErrMemory(ctxt);
7204 return;
7205 }
7206 } else {
7207 if (SKIP_BLANKS == 0) {
7208 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7209 "Space needed here\n");
7210 }
7211 }
7212 ctxt->input->version = version;
7213
7214 /*
7215 * We must have the encoding declaration
7216 */
7217 xmlParseEncodingDecl(ctxt);
7218
7219 SKIP_BLANKS;
7220 if ((RAW == '?') && (NXT(1) == '>')) {
7221 SKIP(2);
7222 } else if (RAW == '>') {
7223 /* Deprecated old WD ... */
7224 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
7225 NEXT;
7226 } else {
7227 int c;
7228
7229 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
7230 while ((PARSER_STOPPED(ctxt) == 0) && ((c = CUR) != 0)) {
7231 NEXT;
7232 if (c == '>')
7233 break;
7234 }
7235 }
7236}
7237
7238/**
7239 * xmlParseExternalSubset:
7240 * @ctxt: an XML parser context
7241 * @ExternalID: the external identifier
7242 * @SystemID: the system identifier (or URL)
7243 *
7244 * parse Markup declarations from an external subset
7245 *
7246 * [30] extSubset ::= textDecl? extSubsetDecl
7247 *
7248 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
7249 */
7250void
7251xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
7252 const xmlChar *SystemID) {
7253 int oldInputNr;
7254
7255 xmlCtxtInitializeLate(ctxt);
7256
7257 xmlDetectEncoding(ctxt);
7258
7259 if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
7260 xmlParseTextDecl(ctxt);
7261 }
7262 if (ctxt->myDoc == NULL) {
7263 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
7264 if (ctxt->myDoc == NULL) {
7265 xmlErrMemory(ctxt);
7266 return;
7267 }
7268 ctxt->myDoc->properties = XML_DOC_INTERNAL;
7269 }
7270 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL) &&
7271 (xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID) == NULL)) {
7272 xmlErrMemory(ctxt);
7273 }
7274
7275 ctxt->inSubset = 2;
7276 oldInputNr = ctxt->inputNr;
7277
7278 SKIP_BLANKS_PE;
7279 while (((RAW != 0) || (ctxt->inputNr > oldInputNr)) &&
7280 (!PARSER_STOPPED(ctxt))) {
7281 GROW;
7282 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
7283 xmlParseConditionalSections(ctxt);
7284 } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
7285 xmlParseMarkupDecl(ctxt);
7286 } else {
7287 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7288 xmlHaltParser(ctxt);
7289 return;
7290 }
7291 SKIP_BLANKS_PE;
7292 SHRINK;
7293 }
7294
7295 while (ctxt->inputNr > oldInputNr)
7296 xmlPopPE(ctxt);
7297
7298 if (RAW != 0) {
7299 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7300 }
7301}
7302
7303/**
7304 * xmlParseReference:
7305 * @ctxt: an XML parser context
7306 *
7307 * DEPRECATED: Internal function, don't use.
7308 *
7309 * parse and handle entity references in content, depending on the SAX
7310 * interface, this may end-up in a call to character() if this is a
7311 * CharRef, a predefined entity, if there is no reference() callback.
7312 * or if the parser was asked to switch to that mode.
7313 *
7314 * Always consumes '&'.
7315 *
7316 * [67] Reference ::= EntityRef | CharRef
7317 */
7318void
7319xmlParseReference(xmlParserCtxtPtr ctxt) {
7320 xmlEntityPtr ent = NULL;
7321 const xmlChar *name;
7322 xmlChar *val;
7323
7324 if (RAW != '&')
7325 return;
7326
7327 /*
7328 * Simple case of a CharRef
7329 */
7330 if (NXT(1) == '#') {
7331 int i = 0;
7332 xmlChar out[16];
7333 int value = xmlParseCharRef(ctxt);
7334
7335 if (value == 0)
7336 return;
7337
7338 /*
7339 * Just encode the value in UTF-8
7340 */
7341 COPY_BUF(out, i, value);
7342 out[i] = 0;
7343 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7344 (!ctxt->disableSAX))
7345 ctxt->sax->characters(ctxt->userData, out, i);
7346 return;
7347 }
7348
7349 /*
7350 * We are seeing an entity reference
7351 */
7352 name = xmlParseEntityRefInternal(ctxt);
7353 if (name == NULL)
7354 return;
7355 ent = xmlLookupGeneralEntity(ctxt, name, /* isAttr */ 0);
7356 if (ent == NULL) {
7357 /*
7358 * Create a reference for undeclared entities.
7359 */
7360 if ((ctxt->replaceEntities == 0) &&
7361 (ctxt->sax != NULL) &&
7362 (ctxt->disableSAX == 0) &&
7363 (ctxt->sax->reference != NULL)) {
7364 ctxt->sax->reference(ctxt->userData, name);
7365 }
7366 return;
7367 }
7368 if (!ctxt->wellFormed)
7369 return;
7370
7371 /* special case of predefined entities */
7372 if ((ent->name == NULL) ||
7373 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
7374 val = ent->content;
7375 if (val == NULL) return;
7376 /*
7377 * inline the entity.
7378 */
7379 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7380 (!ctxt->disableSAX))
7381 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
7382 return;
7383 }
7384
7385 /*
7386 * The first reference to the entity trigger a parsing phase
7387 * where the ent->children is filled with the result from
7388 * the parsing.
7389 * Note: external parsed entities will not be loaded, it is not
7390 * required for a non-validating parser, unless the parsing option
7391 * of validating, or substituting entities were given. Doing so is
7392 * far more secure as the parser will only process data coming from
7393 * the document entity by default.
7394 *
7395 * FIXME: This doesn't work correctly since entities can be
7396 * expanded with different namespace declarations in scope.
7397 * For example:
7398 *
7399 * <!DOCTYPE doc [
7400 * <!ENTITY ent "<ns:elem/>">
7401 * ]>
7402 * <doc>
7403 * <decl1 xmlns:ns="urn:ns1">
7404 * &ent;
7405 * </decl1>
7406 * <decl2 xmlns:ns="urn:ns2">
7407 * &ent;
7408 * </decl2>
7409 * </doc>
7410 *
7411 * Proposed fix:
7412 *
7413 * - Ignore current namespace declarations when parsing the
7414 * entity. If a prefix can't be resolved, don't report an error
7415 * but mark it as unresolved.
7416 * - Try to resolve these prefixes when expanding the entity.
7417 * This will require a specialized version of xmlStaticCopyNode
7418 * which can also make use of the namespace hash table to avoid
7419 * quadratic behavior.
7420 *
7421 * Alternatively, we could simply reparse the entity on each
7422 * expansion like we already do with custom SAX callbacks.
7423 * External entity content should be cached in this case.
7424 */
7425 if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
7426 (((ctxt->options & XML_PARSE_NO_XXE) == 0) &&
7427 ((ctxt->replaceEntities) ||
7428 (ctxt->validate)))) {
7429 if ((ent->flags & XML_ENT_PARSED) == 0) {
7430 xmlCtxtParseEntity(ctxt, ent);
7431 } else if (ent->children == NULL) {
7432 /*
7433 * Probably running in SAX mode and the callbacks don't
7434 * build the entity content. Parse the entity again.
7435 *
7436 * This will also be triggered in normal tree builder mode
7437 * if an entity happens to be empty, causing unnecessary
7438 * reloads. It's hard to come up with a reliable check in
7439 * which mode we're running.
7440 */
7441 xmlCtxtParseEntity(ctxt, ent);
7442 }
7443 }
7444
7445 /*
7446 * We also check for amplification if entities aren't substituted.
7447 * They might be expanded later.
7448 */
7449 if (xmlParserEntityCheck(ctxt, ent->expandedSize))
7450 return;
7451
7452 if ((ctxt->sax == NULL) || (ctxt->disableSAX))
7453 return;
7454
7455 if (ctxt->replaceEntities == 0) {
7456 /*
7457 * Create a reference
7458 */
7459 if (ctxt->sax->reference != NULL)
7460 ctxt->sax->reference(ctxt->userData, ent->name);
7461 } else if ((ent->children != NULL) && (ctxt->node != NULL)) {
7462 xmlNodePtr copy, cur;
7463
7464 /*
7465 * Seems we are generating the DOM content, copy the tree
7466 */
7467 cur = ent->children;
7468
7469 /*
7470 * Handle first text node with SAX to coalesce text efficiently
7471 */
7472 if ((cur->type == XML_TEXT_NODE) ||
7473 (cur->type == XML_CDATA_SECTION_NODE)) {
7474 int len = xmlStrlen(cur->content);
7475
7476 if ((cur->type == XML_TEXT_NODE) ||
7477 (ctxt->sax->cdataBlock == NULL)) {
7478 if (ctxt->sax->characters != NULL)
7479 ctxt->sax->characters(ctxt, cur->content, len);
7480 } else {
7481 if (ctxt->sax->cdataBlock != NULL)
7482 ctxt->sax->cdataBlock(ctxt, cur->content, len);
7483 }
7484
7485 cur = cur->next;
7486 }
7487
7488 while (cur != NULL) {
7489 xmlNodePtr last;
7490
7491 /*
7492 * Handle last text node with SAX to coalesce text efficiently
7493 */
7494 if ((cur->next == NULL) &&
7495 ((cur->type == XML_TEXT_NODE) ||
7496 (cur->type == XML_CDATA_SECTION_NODE))) {
7497 int len = xmlStrlen(cur->content);
7498
7499 if ((cur->type == XML_TEXT_NODE) ||
7500 (ctxt->sax->cdataBlock == NULL)) {
7501 if (ctxt->sax->characters != NULL)
7502 ctxt->sax->characters(ctxt, cur->content, len);
7503 } else {
7504 if (ctxt->sax->cdataBlock != NULL)
7505 ctxt->sax->cdataBlock(ctxt, cur->content, len);
7506 }
7507
7508 break;
7509 }
7510
7511 /*
7512 * Reset coalesce buffer stats only for non-text nodes.
7513 */
7514 ctxt->nodemem = 0;
7515 ctxt->nodelen = 0;
7516
7517 copy = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7518
7519 if (copy == NULL) {
7520 xmlErrMemory(ctxt);
7521 break;
7522 }
7523
7524 if (ctxt->parseMode == XML_PARSE_READER) {
7525 /* Needed for reader */
7526 copy->extra = cur->extra;
7527 /* Maybe needed for reader */
7528 copy->_private = cur->_private;
7529 }
7530
7531 copy->parent = ctxt->node;
7532 last = ctxt->node->last;
7533 if (last == NULL) {
7534 ctxt->node->children = copy;
7535 } else {
7536 last->next = copy;
7537 copy->prev = last;
7538 }
7539 ctxt->node->last = copy;
7540
7541 cur = cur->next;
7542 }
7543 }
7544}
7545
7546static void
7547xmlHandleUndeclaredEntity(xmlParserCtxtPtr ctxt, const xmlChar *name) {
7548 /*
7549 * [ WFC: Entity Declared ]
7550 * In a document without any DTD, a document with only an
7551 * internal DTD subset which contains no parameter entity
7552 * references, or a document with "standalone='yes'", the
7553 * Name given in the entity reference must match that in an
7554 * entity declaration, except that well-formed documents
7555 * need not declare any of the following entities: amp, lt,
7556 * gt, apos, quot.
7557 * The declaration of a parameter entity must precede any
7558 * reference to it.
7559 * Similarly, the declaration of a general entity must
7560 * precede any reference to it which appears in a default
7561 * value in an attribute-list declaration. Note that if
7562 * entities are declared in the external subset or in
7563 * external parameter entities, a non-validating processor
7564 * is not obligated to read and process their declarations;
7565 * for such documents, the rule that an entity must be
7566 * declared is a well-formedness constraint only if
7567 * standalone='yes'.
7568 */
7569 if ((ctxt->standalone == 1) ||
7570 ((ctxt->hasExternalSubset == 0) &&
7571 (ctxt->hasPErefs == 0))) {
7572 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7573 "Entity '%s' not defined\n", name);
7574 } else if (ctxt->validate) {
7575 /*
7576 * [ VC: Entity Declared ]
7577 * In a document with an external subset or external
7578 * parameter entities with "standalone='no'", ...
7579 * ... The declaration of a parameter entity must
7580 * precede any reference to it...
7581 */
7582 xmlValidityError(ctxt, XML_ERR_UNDECLARED_ENTITY,
7583 "Entity '%s' not defined\n", name, NULL);
7584 } else if ((ctxt->loadsubset) ||
7585 ((ctxt->replaceEntities) &&
7586 ((ctxt->options & XML_PARSE_NO_XXE) == 0))) {
7587 /*
7588 * Also raise a non-fatal error
7589 *
7590 * - if the external subset is loaded and all entity declarations
7591 * should be available, or
7592 * - entity substition was requested without restricting
7593 * external entity access.
7594 */
7595 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7596 "Entity '%s' not defined\n", name);
7597 } else {
7598 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7599 "Entity '%s' not defined\n", name, NULL);
7600 }
7601
7602 ctxt->valid = 0;
7603}
7604
7605static xmlEntityPtr
7606xmlLookupGeneralEntity(xmlParserCtxtPtr ctxt, const xmlChar *name, int inAttr) {
7607 xmlEntityPtr ent;
7608
7609 /*
7610 * Predefined entities override any extra definition
7611 */
7612 if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7613 ent = xmlGetPredefinedEntity(name);
7614 if (ent != NULL)
7615 return(ent);
7616 }
7617
7618 /*
7619 * Ask first SAX for entity resolution, otherwise try the
7620 * entities which may have stored in the parser context.
7621 */
7622 if (ctxt->sax != NULL) {
7623 if (ctxt->sax->getEntity != NULL)
7624 ent = ctxt->sax->getEntity(ctxt->userData, name);
7625 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7626 (ctxt->options & XML_PARSE_OLDSAX))
7627 ent = xmlGetPredefinedEntity(name);
7628 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7629 (ctxt->userData==ctxt)) {
7630 ent = xmlSAX2GetEntity(ctxt, name);
7631 }
7632 }
7633
7634 if (ent == NULL) {
7635 xmlHandleUndeclaredEntity(ctxt, name);
7636 }
7637
7638 /*
7639 * [ WFC: Parsed Entity ]
7640 * An entity reference must not contain the name of an
7641 * unparsed entity
7642 */
7643 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7644 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7645 "Entity reference to unparsed entity %s\n", name);
7646 ent = NULL;
7647 }
7648
7649 /*
7650 * [ WFC: No External Entity References ]
7651 * Attribute values cannot contain direct or indirect
7652 * entity references to external entities.
7653 */
7654 else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7655 if (inAttr) {
7656 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7657 "Attribute references external entity '%s'\n", name);
7658 ent = NULL;
7659 }
7660 }
7661
7662 return(ent);
7663}
7664
7665/**
7666 * xmlParseEntityRefInternal:
7667 * @ctxt: an XML parser context
7668 * @inAttr: whether we are in an attribute value
7669 *
7670 * Parse an entity reference. Always consumes '&'.
7671 *
7672 * [68] EntityRef ::= '&' Name ';'
7673 *
7674 * Returns the name, or NULL in case of error.
7675 */
7676static const xmlChar *
7677xmlParseEntityRefInternal(xmlParserCtxtPtr ctxt) {
7678 const xmlChar *name;
7679
7680 GROW;
7681
7682 if (RAW != '&')
7683 return(NULL);
7684 NEXT;
7685 name = xmlParseName(ctxt);
7686 if (name == NULL) {
7687 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7688 "xmlParseEntityRef: no name\n");
7689 return(NULL);
7690 }
7691 if (RAW != ';') {
7692 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7693 return(NULL);
7694 }
7695 NEXT;
7696
7697 return(name);
7698}
7699
7700/**
7701 * xmlParseEntityRef:
7702 * @ctxt: an XML parser context
7703 *
7704 * DEPRECATED: Internal function, don't use.
7705 *
7706 * Returns the xmlEntityPtr if found, or NULL otherwise.
7707 */
7708xmlEntityPtr
7709xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
7710 const xmlChar *name;
7711
7712 if (ctxt == NULL)
7713 return(NULL);
7714
7715 name = xmlParseEntityRefInternal(ctxt);
7716 if (name == NULL)
7717 return(NULL);
7718
7719 return(xmlLookupGeneralEntity(ctxt, name, /* inAttr */ 0));
7720}
7721
7722/**
7723 * xmlParseStringEntityRef:
7724 * @ctxt: an XML parser context
7725 * @str: a pointer to an index in the string
7726 *
7727 * parse ENTITY references declarations, but this version parses it from
7728 * a string value.
7729 *
7730 * [68] EntityRef ::= '&' Name ';'
7731 *
7732 * [ WFC: Entity Declared ]
7733 * In a document without any DTD, a document with only an internal DTD
7734 * subset which contains no parameter entity references, or a document
7735 * with "standalone='yes'", the Name given in the entity reference
7736 * must match that in an entity declaration, except that well-formed
7737 * documents need not declare any of the following entities: amp, lt,
7738 * gt, apos, quot. The declaration of a parameter entity must precede
7739 * any reference to it. Similarly, the declaration of a general entity
7740 * must precede any reference to it which appears in a default value in an
7741 * attribute-list declaration. Note that if entities are declared in the
7742 * external subset or in external parameter entities, a non-validating
7743 * processor is not obligated to read and process their declarations;
7744 * for such documents, the rule that an entity must be declared is a
7745 * well-formedness constraint only if standalone='yes'.
7746 *
7747 * [ WFC: Parsed Entity ]
7748 * An entity reference must not contain the name of an unparsed entity
7749 *
7750 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
7751 * is updated to the current location in the string.
7752 */
7753static xmlChar *
7754xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
7755 xmlChar *name;
7756 const xmlChar *ptr;
7757 xmlChar cur;
7758
7759 if ((str == NULL) || (*str == NULL))
7760 return(NULL);
7761 ptr = *str;
7762 cur = *ptr;
7763 if (cur != '&')
7764 return(NULL);
7765
7766 ptr++;
7767 name = xmlParseStringName(ctxt, &ptr);
7768 if (name == NULL) {
7769 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7770 "xmlParseStringEntityRef: no name\n");
7771 *str = ptr;
7772 return(NULL);
7773 }
7774 if (*ptr != ';') {
7775 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7776 xmlFree(name);
7777 *str = ptr;
7778 return(NULL);
7779 }
7780 ptr++;
7781
7782 *str = ptr;
7783 return(name);
7784}
7785
7786/**
7787 * xmlParsePEReference:
7788 * @ctxt: an XML parser context
7789 *
7790 * DEPRECATED: Internal function, don't use.
7791 *
7792 * Parse a parameter entity reference. Always consumes '%'.
7793 *
7794 * The entity content is handled directly by pushing it's content as
7795 * a new input stream.
7796 *
7797 * [69] PEReference ::= '%' Name ';'
7798 *
7799 * [ WFC: No Recursion ]
7800 * A parsed entity must not contain a recursive
7801 * reference to itself, either directly or indirectly.
7802 *
7803 * [ WFC: Entity Declared ]
7804 * In a document without any DTD, a document with only an internal DTD
7805 * subset which contains no parameter entity references, or a document
7806 * with "standalone='yes'", ... ... The declaration of a parameter
7807 * entity must precede any reference to it...
7808 *
7809 * [ VC: Entity Declared ]
7810 * In a document with an external subset or external parameter entities
7811 * with "standalone='no'", ... ... The declaration of a parameter entity
7812 * must precede any reference to it...
7813 *
7814 * [ WFC: In DTD ]
7815 * Parameter-entity references may only appear in the DTD.
7816 * NOTE: misleading but this is handled.
7817 */
7818void
7819xmlParsePEReference(xmlParserCtxtPtr ctxt)
7820{
7821 const xmlChar *name;
7822 xmlEntityPtr entity = NULL;
7823 xmlParserInputPtr input;
7824
7825 if (RAW != '%')
7826 return;
7827 NEXT;
7828 name = xmlParseName(ctxt);
7829 if (name == NULL) {
7830 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_NO_NAME, "PEReference: no name\n");
7831 return;
7832 }
7833 if (RAW != ';') {
7834 xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
7835 return;
7836 }
7837
7838 NEXT;
7839
7840 /* Must be set before xmlHandleUndeclaredEntity */
7841 ctxt->hasPErefs = 1;
7842
7843 /*
7844 * Request the entity from SAX
7845 */
7846 if ((ctxt->sax != NULL) &&
7847 (ctxt->sax->getParameterEntity != NULL))
7848 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
7849
7850 if (entity == NULL) {
7851 xmlHandleUndeclaredEntity(ctxt, name);
7852 } else {
7853 /*
7854 * Internal checking in case the entity quest barfed
7855 */
7856 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7857 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
7858 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7859 "Internal: %%%s; is not a parameter entity\n",
7860 name, NULL);
7861 } else {
7862 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
7863 ((ctxt->options & XML_PARSE_NO_XXE) ||
7864 ((ctxt->loadsubset == 0) &&
7865 (ctxt->replaceEntities == 0) &&
7866 (ctxt->validate == 0))))
7867 return;
7868
7869 if (entity->flags & XML_ENT_EXPANDING) {
7870 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7871 xmlHaltParser(ctxt);
7872 return;
7873 }
7874
7875 input = xmlNewEntityInputStream(ctxt, entity);
7876 if (xmlPushInput(ctxt, input) < 0) {
7877 xmlFreeInputStream(input);
7878 return;
7879 }
7880
7881 entity->flags |= XML_ENT_EXPANDING;
7882
7883 if (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) {
7884 xmlDetectEncoding(ctxt);
7885
7886 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
7887 (IS_BLANK_CH(NXT(5)))) {
7888 xmlParseTextDecl(ctxt);
7889 }
7890 }
7891 }
7892 }
7893}
7894
7895/**
7896 * xmlLoadEntityContent:
7897 * @ctxt: an XML parser context
7898 * @entity: an unloaded system entity
7899 *
7900 * Load the original content of the given system entity from the
7901 * ExternalID/SystemID given. This is to be used for Included in Literal
7902 * http://www.w3.org/TR/REC-xml/#inliteral processing of entities references
7903 *
7904 * Returns 0 in case of success and -1 in case of failure
7905 */
7906static int
7907xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
7908 xmlParserInputPtr oldinput, input = NULL;
7909 xmlParserInputPtr *oldinputTab;
7910 const xmlChar *oldencoding;
7911 xmlChar *content = NULL;
7912 size_t length, i;
7913 int oldinputNr, oldinputMax;
7914 int ret = -1;
7915 int res;
7916
7917 if ((ctxt == NULL) || (entity == NULL) ||
7918 ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
7919 (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) ||
7920 (entity->content != NULL)) {
7921 xmlFatalErr(ctxt, XML_ERR_ARGUMENT,
7922 "xmlLoadEntityContent parameter error");
7923 return(-1);
7924 }
7925
7926 input = xmlLoadExternalEntity((char *) entity->URI,
7927 (char *) entity->ExternalID, ctxt);
7928 if (input == NULL)
7929 return(-1);
7930
7931 oldinput = ctxt->input;
7932 oldinputNr = ctxt->inputNr;
7933 oldinputMax = ctxt->inputMax;
7934 oldinputTab = ctxt->inputTab;
7935 oldencoding = ctxt->encoding;
7936
7937 ctxt->input = NULL;
7938 ctxt->inputNr = 0;
7939 ctxt->inputMax = 1;
7940 ctxt->encoding = NULL;
7941 ctxt->inputTab = xmlMalloc(sizeof(xmlParserInputPtr));
7942 if (ctxt->inputTab == NULL) {
7943 xmlErrMemory(ctxt);
7944 xmlFreeInputStream(input);
7945 goto error;
7946 }
7947
7948 xmlBufResetInput(input->buf->buffer, input);
7949
7950 inputPush(ctxt, input);
7951
7952 xmlDetectEncoding(ctxt);
7953
7954 /*
7955 * Parse a possible text declaration first
7956 */
7957 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
7958 xmlParseTextDecl(ctxt);
7959 /*
7960 * An XML-1.0 document can't reference an entity not XML-1.0
7961 */
7962 if ((xmlStrEqual(ctxt->version, BAD_CAST "1.0")) &&
7963 (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
7964 xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
7965 "Version mismatch between document and entity\n");
7966 }
7967 }
7968
7969 length = input->cur - input->base;
7970 xmlBufShrink(input->buf->buffer, length);
7971 xmlSaturatedAdd(&ctxt->sizeentities, length);
7972
7973 while ((res = xmlParserInputBufferGrow(input->buf, 4096)) > 0)
7974 ;
7975
7976 xmlBufResetInput(input->buf->buffer, input);
7977
7978 if (res < 0) {
7979 xmlCtxtErrIO(ctxt, input->buf->error, NULL);
7980 goto error;
7981 }
7982
7983 length = xmlBufUse(input->buf->buffer);
7984 content = xmlBufDetach(input->buf->buffer);
7985
7986 if (length > INT_MAX) {
7987 xmlErrMemory(ctxt);
7988 goto error;
7989 }
7990
7991 for (i = 0; i < length; ) {
7992 int clen = length - i;
7993 int c = xmlGetUTF8Char(content + i, &clen);
7994
7995 if ((c < 0) || (!IS_CHAR(c))) {
7996 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
7997 "xmlLoadEntityContent: invalid char value %d\n",
7998 content[i]);
7999 goto error;
8000 }
8001 i += clen;
8002 }
8003
8004 xmlSaturatedAdd(&ctxt->sizeentities, length);
8005 entity->content = content;
8006 entity->length = length;
8007 content = NULL;
8008 ret = 0;
8009
8010error:
8011 while (ctxt->inputNr > 0)
8012 xmlFreeInputStream(inputPop(ctxt));
8013 xmlFree(ctxt->inputTab);
8014 xmlFree((xmlChar *) ctxt->encoding);
8015
8016 ctxt->input = oldinput;
8017 ctxt->inputNr = oldinputNr;
8018 ctxt->inputMax = oldinputMax;
8019 ctxt->inputTab = oldinputTab;
8020 ctxt->encoding = oldencoding;
8021
8022 xmlFree(content);
8023
8024 return(ret);
8025}
8026
8027/**
8028 * xmlParseStringPEReference:
8029 * @ctxt: an XML parser context
8030 * @str: a pointer to an index in the string
8031 *
8032 * parse PEReference declarations
8033 *
8034 * [69] PEReference ::= '%' Name ';'
8035 *
8036 * [ WFC: No Recursion ]
8037 * A parsed entity must not contain a recursive
8038 * reference to itself, either directly or indirectly.
8039 *
8040 * [ WFC: Entity Declared ]
8041 * In a document without any DTD, a document with only an internal DTD
8042 * subset which contains no parameter entity references, or a document
8043 * with "standalone='yes'", ... ... The declaration of a parameter
8044 * entity must precede any reference to it...
8045 *
8046 * [ VC: Entity Declared ]
8047 * In a document with an external subset or external parameter entities
8048 * with "standalone='no'", ... ... The declaration of a parameter entity
8049 * must precede any reference to it...
8050 *
8051 * [ WFC: In DTD ]
8052 * Parameter-entity references may only appear in the DTD.
8053 * NOTE: misleading but this is handled.
8054 *
8055 * Returns the string of the entity content.
8056 * str is updated to the current value of the index
8057 */
8058static xmlEntityPtr
8059xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
8060 const xmlChar *ptr;
8061 xmlChar cur;
8062 xmlChar *name;
8063 xmlEntityPtr entity = NULL;
8064
8065 if ((str == NULL) || (*str == NULL)) return(NULL);
8066 ptr = *str;
8067 cur = *ptr;
8068 if (cur != '%')
8069 return(NULL);
8070 ptr++;
8071 name = xmlParseStringName(ctxt, &ptr);
8072 if (name == NULL) {
8073 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8074 "xmlParseStringPEReference: no name\n");
8075 *str = ptr;
8076 return(NULL);
8077 }
8078 cur = *ptr;
8079 if (cur != ';') {
8080 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
8081 xmlFree(name);
8082 *str = ptr;
8083 return(NULL);
8084 }
8085 ptr++;
8086
8087 /* Must be set before xmlHandleUndeclaredEntity */
8088 ctxt->hasPErefs = 1;
8089
8090 /*
8091 * Request the entity from SAX
8092 */
8093 if ((ctxt->sax != NULL) &&
8094 (ctxt->sax->getParameterEntity != NULL))
8095 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
8096
8097 if (entity == NULL) {
8098 xmlHandleUndeclaredEntity(ctxt, name);
8099 } else {
8100 /*
8101 * Internal checking in case the entity quest barfed
8102 */
8103 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
8104 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
8105 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8106 "%%%s; is not a parameter entity\n",
8107 name, NULL);
8108 }
8109 }
8110
8111 xmlFree(name);
8112 *str = ptr;
8113 return(entity);
8114}
8115
8116/**
8117 * xmlParseDocTypeDecl:
8118 * @ctxt: an XML parser context
8119 *
8120 * DEPRECATED: Internal function, don't use.
8121 *
8122 * parse a DOCTYPE declaration
8123 *
8124 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
8125 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8126 *
8127 * [ VC: Root Element Type ]
8128 * The Name in the document type declaration must match the element
8129 * type of the root element.
8130 */
8131
8132void
8133xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
8134 const xmlChar *name = NULL;
8135 xmlChar *ExternalID = NULL;
8136 xmlChar *URI = NULL;
8137
8138 /*
8139 * We know that '<!DOCTYPE' has been detected.
8140 */
8141 SKIP(9);
8142
8143 SKIP_BLANKS;
8144
8145 /*
8146 * Parse the DOCTYPE name.
8147 */
8148 name = xmlParseName(ctxt);
8149 if (name == NULL) {
8150 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8151 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
8152 }
8153 ctxt->intSubName = name;
8154
8155 SKIP_BLANKS;
8156
8157 /*
8158 * Check for SystemID and ExternalID
8159 */
8160 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
8161
8162 if ((URI != NULL) || (ExternalID != NULL)) {
8163 ctxt->hasExternalSubset = 1;
8164 }
8165 ctxt->extSubURI = URI;
8166 ctxt->extSubSystem = ExternalID;
8167
8168 SKIP_BLANKS;
8169
8170 /*
8171 * Create and update the internal subset.
8172 */
8173 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
8174 (!ctxt->disableSAX))
8175 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
8176
8177 /*
8178 * Is there any internal subset declarations ?
8179 * they are handled separately in xmlParseInternalSubset()
8180 */
8181 if (RAW == '[')
8182 return;
8183
8184 /*
8185 * We should be at the end of the DOCTYPE declaration.
8186 */
8187 if (RAW != '>') {
8188 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8189 }
8190 NEXT;
8191}
8192
8193/**
8194 * xmlParseInternalSubset:
8195 * @ctxt: an XML parser context
8196 *
8197 * parse the internal subset declaration
8198 *
8199 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8200 */
8201
8202static void
8203xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
8204 /*
8205 * Is there any DTD definition ?
8206 */
8207 if (RAW == '[') {
8208 int oldInputNr = ctxt->inputNr;
8209
8210 NEXT;
8211 /*
8212 * Parse the succession of Markup declarations and
8213 * PEReferences.
8214 * Subsequence (markupdecl | PEReference | S)*
8215 */
8216 SKIP_BLANKS;
8217 while (((RAW != ']') || (ctxt->inputNr > oldInputNr)) &&
8218 (PARSER_STOPPED(ctxt) == 0)) {
8219
8220 /*
8221 * Conditional sections are allowed from external entities included
8222 * by PE References in the internal subset.
8223 */
8224 if ((PARSER_EXTERNAL(ctxt)) &&
8225 (RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
8226 xmlParseConditionalSections(ctxt);
8227 } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
8228 xmlParseMarkupDecl(ctxt);
8229 } else if (RAW == '%') {
8230 xmlParsePEReference(ctxt);
8231 } else {
8232 xmlFatalErr(ctxt, XML_ERR_INT_SUBSET_NOT_FINISHED, NULL);
8233 break;
8234 }
8235 SKIP_BLANKS_PE;
8236 SHRINK;
8237 GROW;
8238 }
8239
8240 while (ctxt->inputNr > oldInputNr)
8241 xmlPopPE(ctxt);
8242
8243 if (RAW == ']') {
8244 NEXT;
8245 SKIP_BLANKS;
8246 }
8247 }
8248
8249 /*
8250 * We should be at the end of the DOCTYPE declaration.
8251 */
8252 if ((ctxt->wellFormed) && (RAW != '>')) {
8253 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8254 return;
8255 }
8256 NEXT;
8257}
8258
8259#ifdef LIBXML_SAX1_ENABLED
8260/**
8261 * xmlParseAttribute:
8262 * @ctxt: an XML parser context
8263 * @value: a xmlChar ** used to store the value of the attribute
8264 *
8265 * DEPRECATED: Internal function, don't use.
8266 *
8267 * parse an attribute
8268 *
8269 * [41] Attribute ::= Name Eq AttValue
8270 *
8271 * [ WFC: No External Entity References ]
8272 * Attribute values cannot contain direct or indirect entity references
8273 * to external entities.
8274 *
8275 * [ WFC: No < in Attribute Values ]
8276 * The replacement text of any entity referred to directly or indirectly in
8277 * an attribute value (other than "&lt;") must not contain a <.
8278 *
8279 * [ VC: Attribute Value Type ]
8280 * The attribute must have been declared; the value must be of the type
8281 * declared for it.
8282 *
8283 * [25] Eq ::= S? '=' S?
8284 *
8285 * With namespace:
8286 *
8287 * [NS 11] Attribute ::= QName Eq AttValue
8288 *
8289 * Also the case QName == xmlns:??? is handled independently as a namespace
8290 * definition.
8291 *
8292 * Returns the attribute name, and the value in *value.
8293 */
8294
8295const xmlChar *
8296xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
8297 const xmlChar *name;
8298 xmlChar *val;
8299
8300 *value = NULL;
8301 GROW;
8302 name = xmlParseName(ctxt);
8303 if (name == NULL) {
8304 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8305 "error parsing attribute name\n");
8306 return(NULL);
8307 }
8308
8309 /*
8310 * read the value
8311 */
8312 SKIP_BLANKS;
8313 if (RAW == '=') {
8314 NEXT;
8315 SKIP_BLANKS;
8316 val = xmlParseAttValue(ctxt);
8317 } else {
8318 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8319 "Specification mandates value for attribute %s\n", name);
8320 return(name);
8321 }
8322
8323 /*
8324 * Check that xml:lang conforms to the specification
8325 * No more registered as an error, just generate a warning now
8326 * since this was deprecated in XML second edition
8327 */
8328 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
8329 if (!xmlCheckLanguageID(val)) {
8330 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8331 "Malformed value for xml:lang : %s\n",
8332 val, NULL);
8333 }
8334 }
8335
8336 /*
8337 * Check that xml:space conforms to the specification
8338 */
8339 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
8340 if (xmlStrEqual(val, BAD_CAST "default"))
8341 *(ctxt->space) = 0;
8342 else if (xmlStrEqual(val, BAD_CAST "preserve"))
8343 *(ctxt->space) = 1;
8344 else {
8345 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8346"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8347 val, NULL);
8348 }
8349 }
8350
8351 *value = val;
8352 return(name);
8353}
8354
8355/**
8356 * xmlParseStartTag:
8357 * @ctxt: an XML parser context
8358 *
8359 * DEPRECATED: Internal function, don't use.
8360 *
8361 * Parse a start tag. Always consumes '<'.
8362 *
8363 * [40] STag ::= '<' Name (S Attribute)* S? '>'
8364 *
8365 * [ WFC: Unique Att Spec ]
8366 * No attribute name may appear more than once in the same start-tag or
8367 * empty-element tag.
8368 *
8369 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8370 *
8371 * [ WFC: Unique Att Spec ]
8372 * No attribute name may appear more than once in the same start-tag or
8373 * empty-element tag.
8374 *
8375 * With namespace:
8376 *
8377 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8378 *
8379 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8380 *
8381 * Returns the element name parsed
8382 */
8383
8384const xmlChar *
8385xmlParseStartTag(xmlParserCtxtPtr ctxt) {
8386 const xmlChar *name;
8387 const xmlChar *attname;
8388 xmlChar *attvalue;
8389 const xmlChar **atts = ctxt->atts;
8390 int nbatts = 0;
8391 int maxatts = ctxt->maxatts;
8392 int i;
8393
8394 if (RAW != '<') return(NULL);
8395 NEXT1;
8396
8397 name = xmlParseName(ctxt);
8398 if (name == NULL) {
8399 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8400 "xmlParseStartTag: invalid element name\n");
8401 return(NULL);
8402 }
8403
8404 /*
8405 * Now parse the attributes, it ends up with the ending
8406 *
8407 * (S Attribute)* S?
8408 */
8409 SKIP_BLANKS;
8410 GROW;
8411
8412 while (((RAW != '>') &&
8413 ((RAW != '/') || (NXT(1) != '>')) &&
8414 (IS_BYTE_CHAR(RAW))) && (PARSER_STOPPED(ctxt) == 0)) {
8415 attname = xmlParseAttribute(ctxt, &attvalue);
8416 if (attname == NULL)
8417 break;
8418 if (attvalue != NULL) {
8419 /*
8420 * [ WFC: Unique Att Spec ]
8421 * No attribute name may appear more than once in the same
8422 * start-tag or empty-element tag.
8423 */
8424 for (i = 0; i < nbatts;i += 2) {
8425 if (xmlStrEqual(atts[i], attname)) {
8426 xmlErrAttributeDup(ctxt, NULL, attname);
8427 xmlFree(attvalue);
8428 goto failed;
8429 }
8430 }
8431 /*
8432 * Add the pair to atts
8433 */
8434 if (atts == NULL) {
8435 maxatts = 22; /* allow for 10 attrs by default */
8436 atts = (const xmlChar **)
8437 xmlMalloc(maxatts * sizeof(xmlChar *));
8438 if (atts == NULL) {
8439 xmlErrMemory(ctxt);
8440 if (attvalue != NULL)
8441 xmlFree(attvalue);
8442 goto failed;
8443 }
8444 ctxt->atts = atts;
8445 ctxt->maxatts = maxatts;
8446 } else if (nbatts + 4 > maxatts) {
8447 const xmlChar **n;
8448
8449 maxatts *= 2;
8450 n = (const xmlChar **) xmlRealloc((void *) atts,
8451 maxatts * sizeof(const xmlChar *));
8452 if (n == NULL) {
8453 xmlErrMemory(ctxt);
8454 if (attvalue != NULL)
8455 xmlFree(attvalue);
8456 goto failed;
8457 }
8458 atts = n;
8459 ctxt->atts = atts;
8460 ctxt->maxatts = maxatts;
8461 }
8462 atts[nbatts++] = attname;
8463 atts[nbatts++] = attvalue;
8464 atts[nbatts] = NULL;
8465 atts[nbatts + 1] = NULL;
8466 } else {
8467 if (attvalue != NULL)
8468 xmlFree(attvalue);
8469 }
8470
8471failed:
8472
8473 GROW
8474 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8475 break;
8476 if (SKIP_BLANKS == 0) {
8477 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8478 "attributes construct error\n");
8479 }
8480 SHRINK;
8481 GROW;
8482 }
8483
8484 /*
8485 * SAX: Start of Element !
8486 */
8487 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
8488 (!ctxt->disableSAX)) {
8489 if (nbatts > 0)
8490 ctxt->sax->startElement(ctxt->userData, name, atts);
8491 else
8492 ctxt->sax->startElement(ctxt->userData, name, NULL);
8493 }
8494
8495 if (atts != NULL) {
8496 /* Free only the content strings */
8497 for (i = 1;i < nbatts;i+=2)
8498 if (atts[i] != NULL)
8499 xmlFree((xmlChar *) atts[i]);
8500 }
8501 return(name);
8502}
8503
8504/**
8505 * xmlParseEndTag1:
8506 * @ctxt: an XML parser context
8507 * @line: line of the start tag
8508 * @nsNr: number of namespaces on the start tag
8509 *
8510 * Parse an end tag. Always consumes '</'.
8511 *
8512 * [42] ETag ::= '</' Name S? '>'
8513 *
8514 * With namespace
8515 *
8516 * [NS 9] ETag ::= '</' QName S? '>'
8517 */
8518
8519static void
8520xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
8521 const xmlChar *name;
8522
8523 GROW;
8524 if ((RAW != '<') || (NXT(1) != '/')) {
8525 xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
8526 "xmlParseEndTag: '</' not found\n");
8527 return;
8528 }
8529 SKIP(2);
8530
8531 name = xmlParseNameAndCompare(ctxt,ctxt->name);
8532
8533 /*
8534 * We should definitely be at the ending "S? '>'" part
8535 */
8536 GROW;
8537 SKIP_BLANKS;
8538 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
8539 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
8540 } else
8541 NEXT1;
8542
8543 /*
8544 * [ WFC: Element Type Match ]
8545 * The Name in an element's end-tag must match the element type in the
8546 * start-tag.
8547 *
8548 */
8549 if (name != (xmlChar*)1) {
8550 if (name == NULL) name = BAD_CAST "unparsable";
8551 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
8552 "Opening and ending tag mismatch: %s line %d and %s\n",
8553 ctxt->name, line, name);
8554 }
8555
8556 /*
8557 * SAX: End of Tag
8558 */
8559 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8560 (!ctxt->disableSAX))
8561 ctxt->sax->endElement(ctxt->userData, ctxt->name);
8562
8563 namePop(ctxt);
8564 spacePop(ctxt);
8565 return;
8566}
8567
8568/**
8569 * xmlParseEndTag:
8570 * @ctxt: an XML parser context
8571 *
8572 * DEPRECATED: Internal function, don't use.
8573 *
8574 * parse an end of tag
8575 *
8576 * [42] ETag ::= '</' Name S? '>'
8577 *
8578 * With namespace
8579 *
8580 * [NS 9] ETag ::= '</' QName S? '>'
8581 */
8582
8583void
8584xmlParseEndTag(xmlParserCtxtPtr ctxt) {
8585 xmlParseEndTag1(ctxt, 0);
8586}
8587#endif /* LIBXML_SAX1_ENABLED */
8588
8589/************************************************************************
8590 * *
8591 * SAX 2 specific operations *
8592 * *
8593 ************************************************************************/
8594
8595/**
8596 * xmlParseQNameHashed:
8597 * @ctxt: an XML parser context
8598 * @prefix: pointer to store the prefix part
8599 *
8600 * parse an XML Namespace QName
8601 *
8602 * [6] QName ::= (Prefix ':')? LocalPart
8603 * [7] Prefix ::= NCName
8604 * [8] LocalPart ::= NCName
8605 *
8606 * Returns the Name parsed or NULL
8607 */
8608
8609static xmlHashedString
8610xmlParseQNameHashed(xmlParserCtxtPtr ctxt, xmlHashedString *prefix) {
8611 xmlHashedString l, p;
8612 int start, isNCName = 0;
8613
8614 l.name = NULL;
8615 p.name = NULL;
8616
8617 GROW;
8618 start = CUR_PTR - BASE_PTR;
8619
8620 l = xmlParseNCName(ctxt);
8621 if (l.name != NULL) {
8622 isNCName = 1;
8623 if (CUR == ':') {
8624 NEXT;
8625 p = l;
8626 l = xmlParseNCName(ctxt);
8627 }
8628 }
8629 if ((l.name == NULL) || (CUR == ':')) {
8630 xmlChar *tmp;
8631
8632 l.name = NULL;
8633 p.name = NULL;
8634 if ((isNCName == 0) && (CUR != ':'))
8635 return(l);
8636 tmp = xmlParseNmtoken(ctxt);
8637 if (tmp != NULL)
8638 xmlFree(tmp);
8639 l = xmlDictLookupHashed(ctxt->dict, BASE_PTR + start,
8640 CUR_PTR - (BASE_PTR + start));
8641 if (l.name == NULL) {
8642 xmlErrMemory(ctxt);
8643 return(l);
8644 }
8645 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8646 "Failed to parse QName '%s'\n", l.name, NULL, NULL);
8647 }
8648
8649 *prefix = p;
8650 return(l);
8651}
8652
8653/**
8654 * xmlParseQName:
8655 * @ctxt: an XML parser context
8656 * @prefix: pointer to store the prefix part
8657 *
8658 * parse an XML Namespace QName
8659 *
8660 * [6] QName ::= (Prefix ':')? LocalPart
8661 * [7] Prefix ::= NCName
8662 * [8] LocalPart ::= NCName
8663 *
8664 * Returns the Name parsed or NULL
8665 */
8666
8667static const xmlChar *
8668xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
8669 xmlHashedString n, p;
8670
8671 n = xmlParseQNameHashed(ctxt, &p);
8672 if (n.name == NULL)
8673 return(NULL);
8674 *prefix = p.name;
8675 return(n.name);
8676}
8677
8678/**
8679 * xmlParseQNameAndCompare:
8680 * @ctxt: an XML parser context
8681 * @name: the localname
8682 * @prefix: the prefix, if any.
8683 *
8684 * parse an XML name and compares for match
8685 * (specialized for endtag parsing)
8686 *
8687 * Returns NULL for an illegal name, (xmlChar*) 1 for success
8688 * and the name for mismatch
8689 */
8690
8691static const xmlChar *
8692xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
8693 xmlChar const *prefix) {
8694 const xmlChar *cmp;
8695 const xmlChar *in;
8696 const xmlChar *ret;
8697 const xmlChar *prefix2;
8698
8699 if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
8700
8701 GROW;
8702 in = ctxt->input->cur;
8703
8704 cmp = prefix;
8705 while (*in != 0 && *in == *cmp) {
8706 ++in;
8707 ++cmp;
8708 }
8709 if ((*cmp == 0) && (*in == ':')) {
8710 in++;
8711 cmp = name;
8712 while (*in != 0 && *in == *cmp) {
8713 ++in;
8714 ++cmp;
8715 }
8716 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
8717 /* success */
8718 ctxt->input->col += in - ctxt->input->cur;
8719 ctxt->input->cur = in;
8720 return((const xmlChar*) 1);
8721 }
8722 }
8723 /*
8724 * all strings coms from the dictionary, equality can be done directly
8725 */
8726 ret = xmlParseQName (ctxt, &prefix2);
8727 if (ret == NULL)
8728 return(NULL);
8729 if ((ret == name) && (prefix == prefix2))
8730 return((const xmlChar*) 1);
8731 return ret;
8732}
8733
8734/**
8735 * xmlParseAttribute2:
8736 * @ctxt: an XML parser context
8737 * @pref: the element prefix
8738 * @elem: the element name
8739 * @prefix: a xmlChar ** used to store the value of the attribute prefix
8740 * @value: a xmlChar ** used to store the value of the attribute
8741 * @len: an int * to save the length of the attribute
8742 * @alloc: an int * to indicate if the attribute was allocated
8743 *
8744 * parse an attribute in the new SAX2 framework.
8745 *
8746 * Returns the attribute name, and the value in *value, .
8747 */
8748
8749static xmlHashedString
8750xmlParseAttribute2(xmlParserCtxtPtr ctxt,
8751 const xmlChar * pref, const xmlChar * elem,
8752 xmlHashedString * hprefix, xmlChar ** value,
8753 int *len, int *alloc)
8754{
8755 xmlHashedString hname;
8756 const xmlChar *prefix, *name;
8757 xmlChar *val = NULL, *internal_val = NULL;
8758 int normalize = 0;
8759 int isNamespace;
8760
8761 *value = NULL;
8762 GROW;
8763 hname = xmlParseQNameHashed(ctxt, hprefix);
8764 if (hname.name == NULL) {
8765 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8766 "error parsing attribute name\n");
8767 return(hname);
8768 }
8769 name = hname.name;
8770 if (hprefix->name != NULL)
8771 prefix = hprefix->name;
8772 else
8773 prefix = NULL;
8774
8775 /*
8776 * get the type if needed
8777 */
8778 if (ctxt->attsSpecial != NULL) {
8779 int type;
8780
8781 type = (int) (ptrdiff_t) xmlHashQLookup2(ctxt->attsSpecial,
8782 pref, elem,
8783 prefix, name);
8784 if (type != 0)
8785 normalize = 1;
8786 }
8787
8788 /*
8789 * read the value
8790 */
8791 SKIP_BLANKS;
8792 if (RAW == '=') {
8793 NEXT;
8794 SKIP_BLANKS;
8795 isNamespace = (((prefix == NULL) && (name == ctxt->str_xmlns)) ||
8796 (prefix == ctxt->str_xmlns));
8797 val = xmlParseAttValueInternal(ctxt, len, alloc, normalize,
8798 isNamespace);
8799 if (val == NULL)
8800 goto error;
8801 } else {
8802 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8803 "Specification mandates value for attribute %s\n",
8804 name);
8805 goto error;
8806 }
8807
8808 if (prefix == ctxt->str_xml) {
8809 /*
8810 * Check that xml:lang conforms to the specification
8811 * No more registered as an error, just generate a warning now
8812 * since this was deprecated in XML second edition
8813 */
8814 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
8815 internal_val = xmlStrndup(val, *len);
8816 if (internal_val == NULL)
8817 goto mem_error;
8818 if (!xmlCheckLanguageID(internal_val)) {
8819 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8820 "Malformed value for xml:lang : %s\n",
8821 internal_val, NULL);
8822 }
8823 }
8824
8825 /*
8826 * Check that xml:space conforms to the specification
8827 */
8828 if (xmlStrEqual(name, BAD_CAST "space")) {
8829 internal_val = xmlStrndup(val, *len);
8830 if (internal_val == NULL)
8831 goto mem_error;
8832 if (xmlStrEqual(internal_val, BAD_CAST "default"))
8833 *(ctxt->space) = 0;
8834 else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
8835 *(ctxt->space) = 1;
8836 else {
8837 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8838 "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8839 internal_val, NULL);
8840 }
8841 }
8842 if (internal_val) {
8843 xmlFree(internal_val);
8844 }
8845 }
8846
8847 *value = val;
8848 return (hname);
8849
8850mem_error:
8851 xmlErrMemory(ctxt);
8852error:
8853 if ((val != NULL) && (*alloc != 0))
8854 xmlFree(val);
8855 return(hname);
8856}
8857
8858/**
8859 * xmlAttrHashInsert:
8860 * @ctxt: parser context
8861 * @size: size of the hash table
8862 * @name: attribute name
8863 * @uri: namespace uri
8864 * @hashValue: combined hash value of name and uri
8865 * @aindex: attribute index (this is a multiple of 5)
8866 *
8867 * Inserts a new attribute into the hash table.
8868 *
8869 * Returns INT_MAX if no existing attribute was found, the attribute
8870 * index if an attribute was found, -1 if a memory allocation failed.
8871 */
8872static int
8873xmlAttrHashInsert(xmlParserCtxtPtr ctxt, unsigned size, const xmlChar *name,
8874 const xmlChar *uri, unsigned hashValue, int aindex) {
8875 xmlAttrHashBucket *table = ctxt->attrHash;
8876 xmlAttrHashBucket *bucket;
8877 unsigned hindex;
8878
8879 hindex = hashValue & (size - 1);
8880 bucket = &table[hindex];
8881
8882 while (bucket->index >= 0) {
8883 const xmlChar **atts = &ctxt->atts[bucket->index];
8884
8885 if (name == atts[0]) {
8886 int nsIndex = (int) (ptrdiff_t) atts[2];
8887
8888 if ((nsIndex == NS_INDEX_EMPTY) ? (uri == NULL) :
8889 (nsIndex == NS_INDEX_XML) ? (uri == ctxt->str_xml_ns) :
8890 (uri == ctxt->nsTab[nsIndex * 2 + 1]))
8891 return(bucket->index);
8892 }
8893
8894 hindex++;
8895 bucket++;
8896 if (hindex >= size) {
8897 hindex = 0;
8898 bucket = table;
8899 }
8900 }
8901
8902 bucket->index = aindex;
8903
8904 return(INT_MAX);
8905}
8906
8907/**
8908 * xmlParseStartTag2:
8909 * @ctxt: an XML parser context
8910 *
8911 * Parse a start tag. Always consumes '<'.
8912 *
8913 * This routine is called when running SAX2 parsing
8914 *
8915 * [40] STag ::= '<' Name (S Attribute)* S? '>'
8916 *
8917 * [ WFC: Unique Att Spec ]
8918 * No attribute name may appear more than once in the same start-tag or
8919 * empty-element tag.
8920 *
8921 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8922 *
8923 * [ WFC: Unique Att Spec ]
8924 * No attribute name may appear more than once in the same start-tag or
8925 * empty-element tag.
8926 *
8927 * With namespace:
8928 *
8929 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8930 *
8931 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8932 *
8933 * Returns the element name parsed
8934 */
8935
8936static const xmlChar *
8937xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
8938 const xmlChar **URI, int *nbNsPtr) {
8939 xmlHashedString hlocalname;
8940 xmlHashedString hprefix;
8941 xmlHashedString hattname;
8942 xmlHashedString haprefix;
8943 const xmlChar *localname;
8944 const xmlChar *prefix;
8945 const xmlChar *attname;
8946 const xmlChar *aprefix;
8947 const xmlChar *uri;
8948 xmlChar *attvalue = NULL;
8949 const xmlChar **atts = ctxt->atts;
8950 unsigned attrHashSize = 0;
8951 int maxatts = ctxt->maxatts;
8952 int nratts, nbatts, nbdef;
8953 int i, j, nbNs, nbTotalDef, attval, nsIndex, maxAtts;
8954 int alloc = 0;
8955
8956 if (RAW != '<') return(NULL);
8957 NEXT1;
8958
8959 nbatts = 0;
8960 nratts = 0;
8961 nbdef = 0;
8962 nbNs = 0;
8963 nbTotalDef = 0;
8964 attval = 0;
8965
8966 if (xmlParserNsStartElement(ctxt->nsdb) < 0) {
8967 xmlErrMemory(ctxt);
8968 return(NULL);
8969 }
8970
8971 hlocalname = xmlParseQNameHashed(ctxt, &hprefix);
8972 if (hlocalname.name == NULL) {
8973 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8974 "StartTag: invalid element name\n");
8975 return(NULL);
8976 }
8977 localname = hlocalname.name;
8978 prefix = hprefix.name;
8979
8980 /*
8981 * Now parse the attributes, it ends up with the ending
8982 *
8983 * (S Attribute)* S?
8984 */
8985 SKIP_BLANKS;
8986 GROW;
8987
8988 /*
8989 * The ctxt->atts array will be ultimately passed to the SAX callback
8990 * containing five xmlChar pointers for each attribute:
8991 *
8992 * [0] attribute name
8993 * [1] attribute prefix
8994 * [2] namespace URI
8995 * [3] attribute value
8996 * [4] end of attribute value
8997 *
8998 * To save memory, we reuse this array temporarily and store integers
8999 * in these pointer variables.
9000 *
9001 * [0] attribute name
9002 * [1] attribute prefix
9003 * [2] hash value of attribute prefix, and later namespace index
9004 * [3] for non-allocated values: ptrdiff_t offset into input buffer
9005 * [4] for non-allocated values: ptrdiff_t offset into input buffer
9006 *
9007 * The ctxt->attallocs array contains an additional unsigned int for
9008 * each attribute, containing the hash value of the attribute name
9009 * and the alloc flag in bit 31.
9010 */
9011
9012 while (((RAW != '>') &&
9013 ((RAW != '/') || (NXT(1) != '>')) &&
9014 (IS_BYTE_CHAR(RAW))) && (PARSER_STOPPED(ctxt) == 0)) {
9015 int len = -1;
9016
9017 hattname = xmlParseAttribute2(ctxt, prefix, localname,
9018 &haprefix, &attvalue, &len,
9019 &alloc);
9020 if (hattname.name == NULL)
9021 break;
9022 if (attvalue == NULL)
9023 goto next_attr;
9024 attname = hattname.name;
9025 aprefix = haprefix.name;
9026 if (len < 0) len = xmlStrlen(attvalue);
9027
9028 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9029 xmlHashedString huri;
9030 xmlURIPtr parsedUri;
9031
9032 huri = xmlDictLookupHashed(ctxt->dict, attvalue, len);
9033 uri = huri.name;
9034 if (uri == NULL) {
9035 xmlErrMemory(ctxt);
9036 goto next_attr;
9037 }
9038 if (*uri != 0) {
9039 if (xmlParseURISafe((const char *) uri, &parsedUri) < 0) {
9040 xmlErrMemory(ctxt);
9041 goto next_attr;
9042 }
9043 if (parsedUri == NULL) {
9044 xmlNsErr(ctxt, XML_WAR_NS_URI,
9045 "xmlns: '%s' is not a valid URI\n",
9046 uri, NULL, NULL);
9047 } else {
9048 if (parsedUri->scheme == NULL) {
9049 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9050 "xmlns: URI %s is not absolute\n",
9051 uri, NULL, NULL);
9052 }
9053 xmlFreeURI(parsedUri);
9054 }
9055 if (uri == ctxt->str_xml_ns) {
9056 if (attname != ctxt->str_xml) {
9057 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9058 "xml namespace URI cannot be the default namespace\n",
9059 NULL, NULL, NULL);
9060 }
9061 goto next_attr;
9062 }
9063 if ((len == 29) &&
9064 (xmlStrEqual(uri,
9065 BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9066 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9067 "reuse of the xmlns namespace name is forbidden\n",
9068 NULL, NULL, NULL);
9069 goto next_attr;
9070 }
9071 }
9072
9073 if (xmlParserNsPush(ctxt, NULL, &huri, NULL, 0) > 0)
9074 nbNs++;
9075 } else if (aprefix == ctxt->str_xmlns) {
9076 xmlHashedString huri;
9077 xmlURIPtr parsedUri;
9078
9079 huri = xmlDictLookupHashed(ctxt->dict, attvalue, len);
9080 uri = huri.name;
9081 if (uri == NULL) {
9082 xmlErrMemory(ctxt);
9083 goto next_attr;
9084 }
9085
9086 if (attname == ctxt->str_xml) {
9087 if (uri != ctxt->str_xml_ns) {
9088 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9089 "xml namespace prefix mapped to wrong URI\n",
9090 NULL, NULL, NULL);
9091 }
9092 /*
9093 * Do not keep a namespace definition node
9094 */
9095 goto next_attr;
9096 }
9097 if (uri == ctxt->str_xml_ns) {
9098 if (attname != ctxt->str_xml) {
9099 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9100 "xml namespace URI mapped to wrong prefix\n",
9101 NULL, NULL, NULL);
9102 }
9103 goto next_attr;
9104 }
9105 if (attname == ctxt->str_xmlns) {
9106 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9107 "redefinition of the xmlns prefix is forbidden\n",
9108 NULL, NULL, NULL);
9109 goto next_attr;
9110 }
9111 if ((len == 29) &&
9112 (xmlStrEqual(uri,
9113 BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9114 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9115 "reuse of the xmlns namespace name is forbidden\n",
9116 NULL, NULL, NULL);
9117 goto next_attr;
9118 }
9119 if ((uri == NULL) || (uri[0] == 0)) {
9120 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9121 "xmlns:%s: Empty XML namespace is not allowed\n",
9122 attname, NULL, NULL);
9123 goto next_attr;
9124 } else {
9125 if (xmlParseURISafe((const char *) uri, &parsedUri) < 0) {
9126 xmlErrMemory(ctxt);
9127 goto next_attr;
9128 }
9129 if (parsedUri == NULL) {
9130 xmlNsErr(ctxt, XML_WAR_NS_URI,
9131 "xmlns:%s: '%s' is not a valid URI\n",
9132 attname, uri, NULL);
9133 } else {
9134 if ((ctxt->pedantic) && (parsedUri->scheme == NULL)) {
9135 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9136 "xmlns:%s: URI %s is not absolute\n",
9137 attname, uri, NULL);
9138 }
9139 xmlFreeURI(parsedUri);
9140 }
9141 }
9142
9143 if (xmlParserNsPush(ctxt, &hattname, &huri, NULL, 0) > 0)
9144 nbNs++;
9145 } else {
9146 /*
9147 * Populate attributes array, see above for repurposing
9148 * of xmlChar pointers.
9149 */
9150 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9151 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9152 goto next_attr;
9153 }
9154 maxatts = ctxt->maxatts;
9155 atts = ctxt->atts;
9156 }
9157 ctxt->attallocs[nratts++] = (hattname.hashValue & 0x7FFFFFFF) |
9158 ((unsigned) alloc << 31);
9159 atts[nbatts++] = attname;
9160 atts[nbatts++] = aprefix;
9161 atts[nbatts++] = (const xmlChar *) (size_t) haprefix.hashValue;
9162 if (alloc) {
9163 atts[nbatts++] = attvalue;
9164 attvalue += len;
9165 atts[nbatts++] = attvalue;
9166 } else {
9167 /*
9168 * attvalue points into the input buffer which can be
9169 * reallocated. Store differences to input->base instead.
9170 * The pointers will be reconstructed later.
9171 */
9172 atts[nbatts++] = (void *) (attvalue - BASE_PTR);
9173 attvalue += len;
9174 atts[nbatts++] = (void *) (attvalue - BASE_PTR);
9175 }
9176 /*
9177 * tag if some deallocation is needed
9178 */
9179 if (alloc != 0) attval = 1;
9180 attvalue = NULL; /* moved into atts */
9181 }
9182
9183next_attr:
9184 if ((attvalue != NULL) && (alloc != 0)) {
9185 xmlFree(attvalue);
9186 attvalue = NULL;
9187 }
9188
9189 GROW
9190 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
9191 break;
9192 if (SKIP_BLANKS == 0) {
9193 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9194 "attributes construct error\n");
9195 break;
9196 }
9197 GROW;
9198 }
9199
9200 /*
9201 * Namespaces from default attributes
9202 */
9203 if (ctxt->attsDefault != NULL) {
9204 xmlDefAttrsPtr defaults;
9205
9206 defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
9207 if (defaults != NULL) {
9208 for (i = 0; i < defaults->nbAttrs; i++) {
9209 xmlDefAttr *attr = &defaults->attrs[i];
9210
9211 attname = attr->name.name;
9212 aprefix = attr->prefix.name;
9213
9214 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9215 xmlParserEntityCheck(ctxt, attr->expandedSize);
9216
9217 if (xmlParserNsPush(ctxt, NULL, &attr->value, NULL, 1) > 0)
9218 nbNs++;
9219 } else if (aprefix == ctxt->str_xmlns) {
9220 xmlParserEntityCheck(ctxt, attr->expandedSize);
9221
9222 if (xmlParserNsPush(ctxt, &attr->name, &attr->value,
9223 NULL, 1) > 0)
9224 nbNs++;
9225 } else {
9226 nbTotalDef += 1;
9227 }
9228 }
9229 }
9230 }
9231
9232 /*
9233 * Resolve attribute namespaces
9234 */
9235 for (i = 0; i < nbatts; i += 5) {
9236 attname = atts[i];
9237 aprefix = atts[i+1];
9238
9239 /*
9240 * The default namespace does not apply to attribute names.
9241 */
9242 if (aprefix == NULL) {
9243 nsIndex = NS_INDEX_EMPTY;
9244 } else if (aprefix == ctxt->str_xml) {
9245 nsIndex = NS_INDEX_XML;
9246 } else {
9247 haprefix.name = aprefix;
9248 haprefix.hashValue = (size_t) atts[i+2];
9249 nsIndex = xmlParserNsLookup(ctxt, &haprefix, NULL);
9250
9251 if ((nsIndex == INT_MAX) || (nsIndex < ctxt->nsdb->minNsIndex)) {
9252 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9253 "Namespace prefix %s for %s on %s is not defined\n",
9254 aprefix, attname, localname);
9255 nsIndex = NS_INDEX_EMPTY;
9256 }
9257 }
9258
9259 atts[i+2] = (const xmlChar *) (ptrdiff_t) nsIndex;
9260 }
9261
9262 /*
9263 * Maximum number of attributes including default attributes.
9264 */
9265 maxAtts = nratts + nbTotalDef;
9266
9267 /*
9268 * Verify that attribute names are unique.
9269 */
9270 if (maxAtts > 1) {
9271 attrHashSize = 4;
9272 while (attrHashSize / 2 < (unsigned) maxAtts)
9273 attrHashSize *= 2;
9274
9275 if (attrHashSize > ctxt->attrHashMax) {
9276 xmlAttrHashBucket *tmp;
9277
9278 tmp = xmlRealloc(ctxt->attrHash, attrHashSize * sizeof(tmp[0]));
9279 if (tmp == NULL) {
9280 xmlErrMemory(ctxt);
9281 goto done;
9282 }
9283
9284 ctxt->attrHash = tmp;
9285 ctxt->attrHashMax = attrHashSize;
9286 }
9287
9288 memset(ctxt->attrHash, -1, attrHashSize * sizeof(ctxt->attrHash[0]));
9289
9290 for (i = 0, j = 0; j < nratts; i += 5, j++) {
9291 const xmlChar *nsuri;
9292 unsigned hashValue, nameHashValue, uriHashValue;
9293 int res;
9294
9295 attname = atts[i];
9296 aprefix = atts[i+1];
9297 nsIndex = (ptrdiff_t) atts[i+2];
9298 /* Hash values always have bit 31 set, see dict.c */
9299 nameHashValue = ctxt->attallocs[j] | 0x80000000;
9300
9301 if (nsIndex == NS_INDEX_EMPTY) {
9302 /*
9303 * Prefix with empty namespace means an undeclared
9304 * prefix which was already reported above.
9305 */
9306 if (aprefix != NULL)
9307 continue;
9308 nsuri = NULL;
9309 uriHashValue = URI_HASH_EMPTY;
9310 } else if (nsIndex == NS_INDEX_XML) {
9311 nsuri = ctxt->str_xml_ns;
9312 uriHashValue = URI_HASH_XML;
9313 } else {
9314 nsuri = ctxt->nsTab[nsIndex * 2 + 1];
9315 uriHashValue = ctxt->nsdb->extra[nsIndex].uriHashValue;
9316 }
9317
9318 hashValue = xmlDictCombineHash(nameHashValue, uriHashValue);
9319 res = xmlAttrHashInsert(ctxt, attrHashSize, attname, nsuri,
9320 hashValue, i);
9321 if (res < 0)
9322 continue;
9323
9324 /*
9325 * [ WFC: Unique Att Spec ]
9326 * No attribute name may appear more than once in the same
9327 * start-tag or empty-element tag.
9328 * As extended by the Namespace in XML REC.
9329 */
9330 if (res < INT_MAX) {
9331 if (aprefix == atts[res+1]) {
9332 xmlErrAttributeDup(ctxt, aprefix, attname);
9333 } else {
9334 xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9335 "Namespaced Attribute %s in '%s' redefined\n",
9336 attname, nsuri, NULL);
9337 }
9338 }
9339 }
9340 }
9341
9342 /*
9343 * Default attributes
9344 */
9345 if (ctxt->attsDefault != NULL) {
9346 xmlDefAttrsPtr defaults;
9347
9348 defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
9349 if (defaults != NULL) {
9350 for (i = 0; i < defaults->nbAttrs; i++) {
9351 xmlDefAttr *attr = &defaults->attrs[i];
9352 const xmlChar *nsuri;
9353 unsigned hashValue, uriHashValue;
9354 int res;
9355
9356 attname = attr->name.name;
9357 aprefix = attr->prefix.name;
9358
9359 if ((attname == ctxt->str_xmlns) && (aprefix == NULL))
9360 continue;
9361 if (aprefix == ctxt->str_xmlns)
9362 continue;
9363
9364 if (aprefix == NULL) {
9365 nsIndex = NS_INDEX_EMPTY;
9366 nsuri = NULL;
9367 uriHashValue = URI_HASH_EMPTY;
9368 } if (aprefix == ctxt->str_xml) {
9369 nsIndex = NS_INDEX_XML;
9370 nsuri = ctxt->str_xml_ns;
9371 uriHashValue = URI_HASH_XML;
9372 } else if (aprefix != NULL) {
9373 nsIndex = xmlParserNsLookup(ctxt, &attr->prefix, NULL);
9374 if ((nsIndex == INT_MAX) ||
9375 (nsIndex < ctxt->nsdb->minNsIndex)) {
9376 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9377 "Namespace prefix %s for %s on %s is not "
9378 "defined\n",
9379 aprefix, attname, localname);
9380 nsIndex = NS_INDEX_EMPTY;
9381 nsuri = NULL;
9382 uriHashValue = URI_HASH_EMPTY;
9383 } else {
9384 nsuri = ctxt->nsTab[nsIndex * 2 + 1];
9385 uriHashValue = ctxt->nsdb->extra[nsIndex].uriHashValue;
9386 }
9387 }
9388
9389 /*
9390 * Check whether the attribute exists
9391 */
9392 if (maxAtts > 1) {
9393 hashValue = xmlDictCombineHash(attr->name.hashValue,
9394 uriHashValue);
9395 res = xmlAttrHashInsert(ctxt, attrHashSize, attname, nsuri,
9396 hashValue, nbatts);
9397 if (res < 0)
9398 continue;
9399 if (res < INT_MAX) {
9400 if (aprefix == atts[res+1])
9401 continue;
9402 xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9403 "Namespaced Attribute %s in '%s' redefined\n",
9404 attname, nsuri, NULL);
9405 }
9406 }
9407
9408 xmlParserEntityCheck(ctxt, attr->expandedSize);
9409
9410 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9411 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9412 localname = NULL;
9413 goto done;
9414 }
9415 maxatts = ctxt->maxatts;
9416 atts = ctxt->atts;
9417 }
9418
9419 atts[nbatts++] = attname;
9420 atts[nbatts++] = aprefix;
9421 atts[nbatts++] = (const xmlChar *) (ptrdiff_t) nsIndex;
9422 atts[nbatts++] = attr->value.name;
9423 atts[nbatts++] = attr->valueEnd;
9424 if ((ctxt->standalone == 1) && (attr->external != 0)) {
9425 xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED,
9426 "standalone: attribute %s on %s defaulted "
9427 "from external subset\n",
9428 attname, localname);
9429 }
9430 nbdef++;
9431 }
9432 }
9433 }
9434
9435 /*
9436 * Reconstruct attribute pointers
9437 */
9438 for (i = 0, j = 0; i < nbatts; i += 5, j++) {
9439 /* namespace URI */
9440 nsIndex = (ptrdiff_t) atts[i+2];
9441 if (nsIndex == INT_MAX)
9442 atts[i+2] = NULL;
9443 else if (nsIndex == INT_MAX - 1)
9444 atts[i+2] = ctxt->str_xml_ns;
9445 else
9446 atts[i+2] = ctxt->nsTab[nsIndex * 2 + 1];
9447
9448 if ((j < nratts) && (ctxt->attallocs[j] & 0x80000000) == 0) {
9449 atts[i+3] = BASE_PTR + (ptrdiff_t) atts[i+3]; /* value */
9450 atts[i+4] = BASE_PTR + (ptrdiff_t) atts[i+4]; /* valuend */
9451 }
9452 }
9453
9454 uri = xmlParserNsLookupUri(ctxt, &hprefix);
9455 if ((prefix != NULL) && (uri == NULL)) {
9456 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9457 "Namespace prefix %s on %s is not defined\n",
9458 prefix, localname, NULL);
9459 }
9460 *pref = prefix;
9461 *URI = uri;
9462
9463 /*
9464 * SAX callback
9465 */
9466 if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
9467 (!ctxt->disableSAX)) {
9468 if (nbNs > 0)
9469 ctxt->sax->startElementNs(ctxt->userData, localname, prefix, uri,
9470 nbNs, ctxt->nsTab + 2 * (ctxt->nsNr - nbNs),
9471 nbatts / 5, nbdef, atts);
9472 else
9473 ctxt->sax->startElementNs(ctxt->userData, localname, prefix, uri,
9474 0, NULL, nbatts / 5, nbdef, atts);
9475 }
9476
9477done:
9478 /*
9479 * Free allocated attribute values
9480 */
9481 if (attval != 0) {
9482 for (i = 0, j = 0; j < nratts; i += 5, j++)
9483 if (ctxt->attallocs[j] & 0x80000000)
9484 xmlFree((xmlChar *) atts[i+3]);
9485 }
9486
9487 *nbNsPtr = nbNs;
9488 return(localname);
9489}
9490
9491/**
9492 * xmlParseEndTag2:
9493 * @ctxt: an XML parser context
9494 * @line: line of the start tag
9495 * @nsNr: number of namespaces on the start tag
9496 *
9497 * Parse an end tag. Always consumes '</'.
9498 *
9499 * [42] ETag ::= '</' Name S? '>'
9500 *
9501 * With namespace
9502 *
9503 * [NS 9] ETag ::= '</' QName S? '>'
9504 */
9505
9506static void
9507xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlStartTag *tag) {
9508 const xmlChar *name;
9509
9510 GROW;
9511 if ((RAW != '<') || (NXT(1) != '/')) {
9512 xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
9513 return;
9514 }
9515 SKIP(2);
9516
9517 if (tag->prefix == NULL)
9518 name = xmlParseNameAndCompare(ctxt, ctxt->name);
9519 else
9520 name = xmlParseQNameAndCompare(ctxt, ctxt->name, tag->prefix);
9521
9522 /*
9523 * We should definitely be at the ending "S? '>'" part
9524 */
9525 GROW;
9526 SKIP_BLANKS;
9527 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
9528 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
9529 } else
9530 NEXT1;
9531
9532 /*
9533 * [ WFC: Element Type Match ]
9534 * The Name in an element's end-tag must match the element type in the
9535 * start-tag.
9536 *
9537 */
9538 if (name != (xmlChar*)1) {
9539 if (name == NULL) name = BAD_CAST "unparsable";
9540 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
9541 "Opening and ending tag mismatch: %s line %d and %s\n",
9542 ctxt->name, tag->line, name);
9543 }
9544
9545 /*
9546 * SAX: End of Tag
9547 */
9548 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9549 (!ctxt->disableSAX))
9550 ctxt->sax->endElementNs(ctxt->userData, ctxt->name, tag->prefix,
9551 tag->URI);
9552
9553 spacePop(ctxt);
9554 if (tag->nsNr != 0)
9555 xmlParserNsPop(ctxt, tag->nsNr);
9556}
9557
9558/**
9559 * xmlParseCDSect:
9560 * @ctxt: an XML parser context
9561 *
9562 * DEPRECATED: Internal function, don't use.
9563 *
9564 * Parse escaped pure raw content. Always consumes '<!['.
9565 *
9566 * [18] CDSect ::= CDStart CData CDEnd
9567 *
9568 * [19] CDStart ::= '<![CDATA['
9569 *
9570 * [20] Data ::= (Char* - (Char* ']]>' Char*))
9571 *
9572 * [21] CDEnd ::= ']]>'
9573 */
9574void
9575xmlParseCDSect(xmlParserCtxtPtr ctxt) {
9576 xmlChar *buf = NULL;
9577 int len = 0;
9578 int size = XML_PARSER_BUFFER_SIZE;
9579 int r, rl;
9580 int s, sl;
9581 int cur, l;
9582 int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
9583 XML_MAX_HUGE_LENGTH :
9584 XML_MAX_TEXT_LENGTH;
9585
9586 if ((CUR != '<') || (NXT(1) != '!') || (NXT(2) != '['))
9587 return;
9588 SKIP(3);
9589
9590 if (!CMP6(CUR_PTR, 'C', 'D', 'A', 'T', 'A', '['))
9591 return;
9592 SKIP(6);
9593
9594 r = CUR_CHAR(rl);
9595 if (!IS_CHAR(r)) {
9596 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9597 goto out;
9598 }
9599 NEXTL(rl);
9600 s = CUR_CHAR(sl);
9601 if (!IS_CHAR(s)) {
9602 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9603 goto out;
9604 }
9605 NEXTL(sl);
9606 cur = CUR_CHAR(l);
9607 buf = (xmlChar *) xmlMallocAtomic(size);
9608 if (buf == NULL) {
9609 xmlErrMemory(ctxt);
9610 goto out;
9611 }
9612 while (IS_CHAR(cur) &&
9613 ((r != ']') || (s != ']') || (cur != '>'))) {
9614 if (len + 5 >= size) {
9615 xmlChar *tmp;
9616
9617 tmp = (xmlChar *) xmlRealloc(buf, size * 2);
9618 if (tmp == NULL) {
9619 xmlErrMemory(ctxt);
9620 goto out;
9621 }
9622 buf = tmp;
9623 size *= 2;
9624 }
9625 COPY_BUF(buf, len, r);
9626 if (len > maxLength) {
9627 xmlFatalErrMsg(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9628 "CData section too big found\n");
9629 goto out;
9630 }
9631 r = s;
9632 rl = sl;
9633 s = cur;
9634 sl = l;
9635 NEXTL(l);
9636 cur = CUR_CHAR(l);
9637 }
9638 buf[len] = 0;
9639 if (cur != '>') {
9640 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9641 "CData section not finished\n%.50s\n", buf);
9642 goto out;
9643 }
9644 NEXTL(l);
9645
9646 /*
9647 * OK the buffer is to be consumed as cdata.
9648 */
9649 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9650 if (ctxt->sax->cdataBlock != NULL)
9651 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
9652 else if (ctxt->sax->characters != NULL)
9653 ctxt->sax->characters(ctxt->userData, buf, len);
9654 }
9655
9656out:
9657 xmlFree(buf);
9658}
9659
9660/**
9661 * xmlParseContentInternal:
9662 * @ctxt: an XML parser context
9663 *
9664 * Parse a content sequence. Stops at EOF or '</'. Leaves checking of
9665 * unexpected EOF to the caller.
9666 */
9667
9668static void
9669xmlParseContentInternal(xmlParserCtxtPtr ctxt) {
9670 int oldNameNr = ctxt->nameNr;
9671 int oldSpaceNr = ctxt->spaceNr;
9672 int oldNodeNr = ctxt->nodeNr;
9673
9674 GROW;
9675 while ((ctxt->input->cur < ctxt->input->end) &&
9676 (PARSER_STOPPED(ctxt) == 0)) {
9677 const xmlChar *cur = ctxt->input->cur;
9678
9679 /*
9680 * First case : a Processing Instruction.
9681 */
9682 if ((*cur == '<') && (cur[1] == '?')) {
9683 xmlParsePI(ctxt);
9684 }
9685
9686 /*
9687 * Second case : a CDSection
9688 */
9689 /* 2.6.0 test was *cur not RAW */
9690 else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
9691 xmlParseCDSect(ctxt);
9692 }
9693
9694 /*
9695 * Third case : a comment
9696 */
9697 else if ((*cur == '<') && (NXT(1) == '!') &&
9698 (NXT(2) == '-') && (NXT(3) == '-')) {
9699 xmlParseComment(ctxt);
9700 }
9701
9702 /*
9703 * Fourth case : a sub-element.
9704 */
9705 else if (*cur == '<') {
9706 if (NXT(1) == '/') {
9707 if (ctxt->nameNr <= oldNameNr)
9708 break;
9709 xmlParseElementEnd(ctxt);
9710 } else {
9711 xmlParseElementStart(ctxt);
9712 }
9713 }
9714
9715 /*
9716 * Fifth case : a reference. If if has not been resolved,
9717 * parsing returns it's Name, create the node
9718 */
9719
9720 else if (*cur == '&') {
9721 xmlParseReference(ctxt);
9722 }
9723
9724 /*
9725 * Last case, text. Note that References are handled directly.
9726 */
9727 else {
9728 xmlParseCharDataInternal(ctxt, 0);
9729 }
9730
9731 SHRINK;
9732 GROW;
9733 }
9734
9735 if ((ctxt->nameNr > oldNameNr) &&
9736 (ctxt->input->cur >= ctxt->input->end) &&
9737 (ctxt->wellFormed)) {
9738 const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
9739 int line = ctxt->pushTab[ctxt->nameNr - 1].line;
9740 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
9741 "Premature end of data in tag %s line %d\n",
9742 name, line, NULL);
9743 }
9744
9745 /*
9746 * Clean up in error case
9747 */
9748
9749 while (ctxt->nodeNr > oldNodeNr)
9750 nodePop(ctxt);
9751
9752 while (ctxt->nameNr > oldNameNr) {
9753 xmlStartTag *tag = &ctxt->pushTab[ctxt->nameNr - 1];
9754
9755 if (tag->nsNr != 0)
9756 xmlParserNsPop(ctxt, tag->nsNr);
9757
9758 namePop(ctxt);
9759 }
9760
9761 while (ctxt->spaceNr > oldSpaceNr)
9762 spacePop(ctxt);
9763}
9764
9765/**
9766 * xmlParseContent:
9767 * @ctxt: an XML parser context
9768 *
9769 * Parse XML element content. This is useful if you're only interested
9770 * in custom SAX callbacks. If you want a node list, use
9771 * xmlParseInNodeContext.
9772 */
9773void
9774xmlParseContent(xmlParserCtxtPtr ctxt) {
9775 if ((ctxt == NULL) || (ctxt->input == NULL))
9776 return;
9777
9778 xmlCtxtInitializeLate(ctxt);
9779
9780 xmlParseContentInternal(ctxt);
9781
9782 if (ctxt->input->cur < ctxt->input->end)
9783 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
9784}
9785
9786/**
9787 * xmlParseElement:
9788 * @ctxt: an XML parser context
9789 *
9790 * DEPRECATED: Internal function, don't use.
9791 *
9792 * parse an XML element
9793 *
9794 * [39] element ::= EmptyElemTag | STag content ETag
9795 *
9796 * [ WFC: Element Type Match ]
9797 * The Name in an element's end-tag must match the element type in the
9798 * start-tag.
9799 *
9800 */
9801
9802void
9803xmlParseElement(xmlParserCtxtPtr ctxt) {
9804 if (xmlParseElementStart(ctxt) != 0)
9805 return;
9806
9807 xmlParseContentInternal(ctxt);
9808
9809 if (ctxt->input->cur >= ctxt->input->end) {
9810 if (ctxt->wellFormed) {
9811 const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
9812 int line = ctxt->pushTab[ctxt->nameNr - 1].line;
9813 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
9814 "Premature end of data in tag %s line %d\n",
9815 name, line, NULL);
9816 }
9817 return;
9818 }
9819
9820 xmlParseElementEnd(ctxt);
9821}
9822
9823/**
9824 * xmlParseElementStart:
9825 * @ctxt: an XML parser context
9826 *
9827 * Parse the start of an XML element. Returns -1 in case of error, 0 if an
9828 * opening tag was parsed, 1 if an empty element was parsed.
9829 *
9830 * Always consumes '<'.
9831 */
9832static int
9833xmlParseElementStart(xmlParserCtxtPtr ctxt) {
9834 int maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 2048 : 256;
9835 const xmlChar *name;
9836 const xmlChar *prefix = NULL;
9837 const xmlChar *URI = NULL;
9838 xmlParserNodeInfo node_info;
9839 int line;
9840 xmlNodePtr cur;
9841 int nbNs = 0;
9842
9843 if (ctxt->nameNr > maxDepth) {
9844 xmlFatalErrMsgInt(ctxt, XML_ERR_RESOURCE_LIMIT,
9845 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
9846 ctxt->nameNr);
9847 xmlHaltParser(ctxt);
9848 return(-1);
9849 }
9850
9851 /* Capture start position */
9852 if (ctxt->record_info) {
9853 node_info.begin_pos = ctxt->input->consumed +
9854 (CUR_PTR - ctxt->input->base);
9855 node_info.begin_line = ctxt->input->line;
9856 }
9857
9858 if (ctxt->spaceNr == 0)
9859 spacePush(ctxt, -1);
9860 else if (*ctxt->space == -2)
9861 spacePush(ctxt, -1);
9862 else
9863 spacePush(ctxt, *ctxt->space);
9864
9865 line = ctxt->input->line;
9866#ifdef LIBXML_SAX1_ENABLED
9867 if (ctxt->sax2)
9868#endif /* LIBXML_SAX1_ENABLED */
9869 name = xmlParseStartTag2(ctxt, &prefix, &URI, &nbNs);
9870#ifdef LIBXML_SAX1_ENABLED
9871 else
9872 name = xmlParseStartTag(ctxt);
9873#endif /* LIBXML_SAX1_ENABLED */
9874 if (name == NULL) {
9875 spacePop(ctxt);
9876 return(-1);
9877 }
9878 nameNsPush(ctxt, name, prefix, URI, line, nbNs);
9879 cur = ctxt->node;
9880
9881#ifdef LIBXML_VALID_ENABLED
9882 /*
9883 * [ VC: Root Element Type ]
9884 * The Name in the document type declaration must match the element
9885 * type of the root element.
9886 */
9887 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
9888 ctxt->node && (ctxt->node == ctxt->myDoc->children))
9889 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
9890#endif /* LIBXML_VALID_ENABLED */
9891
9892 /*
9893 * Check for an Empty Element.
9894 */
9895 if ((RAW == '/') && (NXT(1) == '>')) {
9896 SKIP(2);
9897 if (ctxt->sax2) {
9898 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9899 (!ctxt->disableSAX))
9900 ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
9901#ifdef LIBXML_SAX1_ENABLED
9902 } else {
9903 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
9904 (!ctxt->disableSAX))
9905 ctxt->sax->endElement(ctxt->userData, name);
9906#endif /* LIBXML_SAX1_ENABLED */
9907 }
9908 namePop(ctxt);
9909 spacePop(ctxt);
9910 if (nbNs > 0)
9911 xmlParserNsPop(ctxt, nbNs);
9912 if (cur != NULL && ctxt->record_info) {
9913 node_info.node = cur;
9914 node_info.end_pos = ctxt->input->consumed +
9915 (CUR_PTR - ctxt->input->base);
9916 node_info.end_line = ctxt->input->line;
9917 xmlParserAddNodeInfo(ctxt, &node_info);
9918 }
9919 return(1);
9920 }
9921 if (RAW == '>') {
9922 NEXT1;
9923 if (cur != NULL && ctxt->record_info) {
9924 node_info.node = cur;
9925 node_info.end_pos = 0;
9926 node_info.end_line = 0;
9927 xmlParserAddNodeInfo(ctxt, &node_info);
9928 }
9929 } else {
9930 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
9931 "Couldn't find end of Start Tag %s line %d\n",
9932 name, line, NULL);
9933
9934 /*
9935 * end of parsing of this node.
9936 */
9937 nodePop(ctxt);
9938 namePop(ctxt);
9939 spacePop(ctxt);
9940 if (nbNs > 0)
9941 xmlParserNsPop(ctxt, nbNs);
9942 return(-1);
9943 }
9944
9945 return(0);
9946}
9947
9948/**
9949 * xmlParseElementEnd:
9950 * @ctxt: an XML parser context
9951 *
9952 * Parse the end of an XML element. Always consumes '</'.
9953 */
9954static void
9955xmlParseElementEnd(xmlParserCtxtPtr ctxt) {
9956 xmlNodePtr cur = ctxt->node;
9957
9958 if (ctxt->nameNr <= 0) {
9959 if ((RAW == '<') && (NXT(1) == '/'))
9960 SKIP(2);
9961 return;
9962 }
9963
9964 /*
9965 * parse the end of tag: '</' should be here.
9966 */
9967 if (ctxt->sax2) {
9968 xmlParseEndTag2(ctxt, &ctxt->pushTab[ctxt->nameNr - 1]);
9969 namePop(ctxt);
9970 }
9971#ifdef LIBXML_SAX1_ENABLED
9972 else
9973 xmlParseEndTag1(ctxt, 0);
9974#endif /* LIBXML_SAX1_ENABLED */
9975
9976 /*
9977 * Capture end position
9978 */
9979 if (cur != NULL && ctxt->record_info) {
9980 xmlParserNodeInfoPtr node_info;
9981
9982 node_info = (xmlParserNodeInfoPtr) xmlParserFindNodeInfo(ctxt, cur);
9983 if (node_info != NULL) {
9984 node_info->end_pos = ctxt->input->consumed +
9985 (CUR_PTR - ctxt->input->base);
9986 node_info->end_line = ctxt->input->line;
9987 }
9988 }
9989}
9990
9991/**
9992 * xmlParseVersionNum:
9993 * @ctxt: an XML parser context
9994 *
9995 * DEPRECATED: Internal function, don't use.
9996 *
9997 * parse the XML version value.
9998 *
9999 * [26] VersionNum ::= '1.' [0-9]+
10000 *
10001 * In practice allow [0-9].[0-9]+ at that level
10002 *
10003 * Returns the string giving the XML version number, or NULL
10004 */
10005xmlChar *
10006xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
10007 xmlChar *buf = NULL;
10008 int len = 0;
10009 int size = 10;
10010 xmlChar cur;
10011
10012 buf = (xmlChar *) xmlMallocAtomic(size);
10013 if (buf == NULL) {
10014 xmlErrMemory(ctxt);
10015 return(NULL);
10016 }
10017 cur = CUR;
10018 if (!((cur >= '0') && (cur <= '9'))) {
10019 xmlFree(buf);
10020 return(NULL);
10021 }
10022 buf[len++] = cur;
10023 NEXT;
10024 cur=CUR;
10025 if (cur != '.') {
10026 xmlFree(buf);
10027 return(NULL);
10028 }
10029 buf[len++] = cur;
10030 NEXT;
10031 cur=CUR;
10032 while ((cur >= '0') && (cur <= '9')) {
10033 if (len + 1 >= size) {
10034 xmlChar *tmp;
10035
10036 size *= 2;
10037 tmp = (xmlChar *) xmlRealloc(buf, size);
10038 if (tmp == NULL) {
10039 xmlFree(buf);
10040 xmlErrMemory(ctxt);
10041 return(NULL);
10042 }
10043 buf = tmp;
10044 }
10045 buf[len++] = cur;
10046 NEXT;
10047 cur=CUR;
10048 }
10049 buf[len] = 0;
10050 return(buf);
10051}
10052
10053/**
10054 * xmlParseVersionInfo:
10055 * @ctxt: an XML parser context
10056 *
10057 * DEPRECATED: Internal function, don't use.
10058 *
10059 * parse the XML version.
10060 *
10061 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
10062 *
10063 * [25] Eq ::= S? '=' S?
10064 *
10065 * Returns the version string, e.g. "1.0"
10066 */
10067
10068xmlChar *
10069xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
10070 xmlChar *version = NULL;
10071
10072 if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
10073 SKIP(7);
10074 SKIP_BLANKS;
10075 if (RAW != '=') {
10076 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10077 return(NULL);
10078 }
10079 NEXT;
10080 SKIP_BLANKS;
10081 if (RAW == '"') {
10082 NEXT;
10083 version = xmlParseVersionNum(ctxt);
10084 if (RAW != '"') {
10085 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10086 } else
10087 NEXT;
10088 } else if (RAW == '\''){
10089 NEXT;
10090 version = xmlParseVersionNum(ctxt);
10091 if (RAW != '\'') {
10092 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10093 } else
10094 NEXT;
10095 } else {
10096 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10097 }
10098 }
10099 return(version);
10100}
10101
10102/**
10103 * xmlParseEncName:
10104 * @ctxt: an XML parser context
10105 *
10106 * DEPRECATED: Internal function, don't use.
10107 *
10108 * parse the XML encoding name
10109 *
10110 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
10111 *
10112 * Returns the encoding name value or NULL
10113 */
10114xmlChar *
10115xmlParseEncName(xmlParserCtxtPtr ctxt) {
10116 xmlChar *buf = NULL;
10117 int len = 0;
10118 int size = 10;
10119 int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
10120 XML_MAX_TEXT_LENGTH :
10121 XML_MAX_NAME_LENGTH;
10122 xmlChar cur;
10123
10124 cur = CUR;
10125 if (((cur >= 'a') && (cur <= 'z')) ||
10126 ((cur >= 'A') && (cur <= 'Z'))) {
10127 buf = (xmlChar *) xmlMallocAtomic(size);
10128 if (buf == NULL) {
10129 xmlErrMemory(ctxt);
10130 return(NULL);
10131 }
10132
10133 buf[len++] = cur;
10134 NEXT;
10135 cur = CUR;
10136 while (((cur >= 'a') && (cur <= 'z')) ||
10137 ((cur >= 'A') && (cur <= 'Z')) ||
10138 ((cur >= '0') && (cur <= '9')) ||
10139 (cur == '.') || (cur == '_') ||
10140 (cur == '-')) {
10141 if (len + 1 >= size) {
10142 xmlChar *tmp;
10143
10144 size *= 2;
10145 tmp = (xmlChar *) xmlRealloc(buf, size);
10146 if (tmp == NULL) {
10147 xmlErrMemory(ctxt);
10148 xmlFree(buf);
10149 return(NULL);
10150 }
10151 buf = tmp;
10152 }
10153 buf[len++] = cur;
10154 if (len > maxLength) {
10155 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "EncName");
10156 xmlFree(buf);
10157 return(NULL);
10158 }
10159 NEXT;
10160 cur = CUR;
10161 }
10162 buf[len] = 0;
10163 } else {
10164 xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
10165 }
10166 return(buf);
10167}
10168
10169/**
10170 * xmlParseEncodingDecl:
10171 * @ctxt: an XML parser context
10172 *
10173 * DEPRECATED: Internal function, don't use.
10174 *
10175 * parse the XML encoding declaration
10176 *
10177 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
10178 *
10179 * this setups the conversion filters.
10180 *
10181 * Returns the encoding value or NULL
10182 */
10183
10184const xmlChar *
10185xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
10186 xmlChar *encoding = NULL;
10187
10188 SKIP_BLANKS;
10189 if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g') == 0)
10190 return(NULL);
10191
10192 SKIP(8);
10193 SKIP_BLANKS;
10194 if (RAW != '=') {
10195 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10196 return(NULL);
10197 }
10198 NEXT;
10199 SKIP_BLANKS;
10200 if (RAW == '"') {
10201 NEXT;
10202 encoding = xmlParseEncName(ctxt);
10203 if (RAW != '"') {
10204 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10205 xmlFree((xmlChar *) encoding);
10206 return(NULL);
10207 } else
10208 NEXT;
10209 } else if (RAW == '\''){
10210 NEXT;
10211 encoding = xmlParseEncName(ctxt);
10212 if (RAW != '\'') {
10213 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10214 xmlFree((xmlChar *) encoding);
10215 return(NULL);
10216 } else
10217 NEXT;
10218 } else {
10219 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10220 }
10221
10222 if (encoding == NULL)
10223 return(NULL);
10224
10225 xmlSetDeclaredEncoding(ctxt, encoding);
10226
10227 return(ctxt->encoding);
10228}
10229
10230/**
10231 * xmlParseSDDecl:
10232 * @ctxt: an XML parser context
10233 *
10234 * DEPRECATED: Internal function, don't use.
10235 *
10236 * parse the XML standalone declaration
10237 *
10238 * [32] SDDecl ::= S 'standalone' Eq
10239 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
10240 *
10241 * [ VC: Standalone Document Declaration ]
10242 * TODO The standalone document declaration must have the value "no"
10243 * if any external markup declarations contain declarations of:
10244 * - attributes with default values, if elements to which these
10245 * attributes apply appear in the document without specifications
10246 * of values for these attributes, or
10247 * - entities (other than amp, lt, gt, apos, quot), if references
10248 * to those entities appear in the document, or
10249 * - attributes with values subject to normalization, where the
10250 * attribute appears in the document with a value which will change
10251 * as a result of normalization, or
10252 * - element types with element content, if white space occurs directly
10253 * within any instance of those types.
10254 *
10255 * Returns:
10256 * 1 if standalone="yes"
10257 * 0 if standalone="no"
10258 * -2 if standalone attribute is missing or invalid
10259 * (A standalone value of -2 means that the XML declaration was found,
10260 * but no value was specified for the standalone attribute).
10261 */
10262
10263int
10264xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
10265 int standalone = -2;
10266
10267 SKIP_BLANKS;
10268 if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
10269 SKIP(10);
10270 SKIP_BLANKS;
10271 if (RAW != '=') {
10272 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10273 return(standalone);
10274 }
10275 NEXT;
10276 SKIP_BLANKS;
10277 if (RAW == '\''){
10278 NEXT;
10279 if ((RAW == 'n') && (NXT(1) == 'o')) {
10280 standalone = 0;
10281 SKIP(2);
10282 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10283 (NXT(2) == 's')) {
10284 standalone = 1;
10285 SKIP(3);
10286 } else {
10287 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10288 }
10289 if (RAW != '\'') {
10290 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10291 } else
10292 NEXT;
10293 } else if (RAW == '"'){
10294 NEXT;
10295 if ((RAW == 'n') && (NXT(1) == 'o')) {
10296 standalone = 0;
10297 SKIP(2);
10298 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10299 (NXT(2) == 's')) {
10300 standalone = 1;
10301 SKIP(3);
10302 } else {
10303 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10304 }
10305 if (RAW != '"') {
10306 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10307 } else
10308 NEXT;
10309 } else {
10310 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10311 }
10312 }
10313 return(standalone);
10314}
10315
10316/**
10317 * xmlParseXMLDecl:
10318 * @ctxt: an XML parser context
10319 *
10320 * DEPRECATED: Internal function, don't use.
10321 *
10322 * parse an XML declaration header
10323 *
10324 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
10325 */
10326
10327void
10328xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
10329 xmlChar *version;
10330
10331 /*
10332 * This value for standalone indicates that the document has an
10333 * XML declaration but it does not have a standalone attribute.
10334 * It will be overwritten later if a standalone attribute is found.
10335 */
10336
10337 ctxt->standalone = -2;
10338
10339 /*
10340 * We know that '<?xml' is here.
10341 */
10342 SKIP(5);
10343
10344 if (!IS_BLANK_CH(RAW)) {
10345 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
10346 "Blank needed after '<?xml'\n");
10347 }
10348 SKIP_BLANKS;
10349
10350 /*
10351 * We must have the VersionInfo here.
10352 */
10353 version = xmlParseVersionInfo(ctxt);
10354 if (version == NULL) {
10355 xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
10356 } else {
10357 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
10358 /*
10359 * Changed here for XML-1.0 5th edition
10360 */
10361 if (ctxt->options & XML_PARSE_OLD10) {
10362 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10363 "Unsupported version '%s'\n",
10364 version);
10365 } else {
10366 if ((version[0] == '1') && ((version[1] == '.'))) {
10367 xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
10368 "Unsupported version '%s'\n",
10369 version, NULL);
10370 } else {
10371 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10372 "Unsupported version '%s'\n",
10373 version);
10374 }
10375 }
10376 }
10377 if (ctxt->version != NULL)
10378 xmlFree((void *) ctxt->version);
10379 ctxt->version = version;
10380 }
10381
10382 /*
10383 * We may have the encoding declaration
10384 */
10385 if (!IS_BLANK_CH(RAW)) {
10386 if ((RAW == '?') && (NXT(1) == '>')) {
10387 SKIP(2);
10388 return;
10389 }
10390 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10391 }
10392 xmlParseEncodingDecl(ctxt);
10393
10394 /*
10395 * We may have the standalone status.
10396 */
10397 if ((ctxt->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
10398 if ((RAW == '?') && (NXT(1) == '>')) {
10399 SKIP(2);
10400 return;
10401 }
10402 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10403 }
10404
10405 /*
10406 * We can grow the input buffer freely at that point
10407 */
10408 GROW;
10409
10410 SKIP_BLANKS;
10411 ctxt->standalone = xmlParseSDDecl(ctxt);
10412
10413 SKIP_BLANKS;
10414 if ((RAW == '?') && (NXT(1) == '>')) {
10415 SKIP(2);
10416 } else if (RAW == '>') {
10417 /* Deprecated old WD ... */
10418 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10419 NEXT;
10420 } else {
10421 int c;
10422
10423 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10424 while ((PARSER_STOPPED(ctxt) == 0) &&
10425 ((c = CUR) != 0)) {
10426 NEXT;
10427 if (c == '>')
10428 break;
10429 }
10430 }
10431}
10432
10433/**
10434 * xmlParseMisc:
10435 * @ctxt: an XML parser context
10436 *
10437 * DEPRECATED: Internal function, don't use.
10438 *
10439 * parse an XML Misc* optional field.
10440 *
10441 * [27] Misc ::= Comment | PI | S
10442 */
10443
10444void
10445xmlParseMisc(xmlParserCtxtPtr ctxt) {
10446 while (PARSER_STOPPED(ctxt) == 0) {
10447 SKIP_BLANKS;
10448 GROW;
10449 if ((RAW == '<') && (NXT(1) == '?')) {
10450 xmlParsePI(ctxt);
10451 } else if (CMP4(CUR_PTR, '<', '!', '-', '-')) {
10452 xmlParseComment(ctxt);
10453 } else {
10454 break;
10455 }
10456 }
10457}
10458
10459static void
10460xmlFinishDocument(xmlParserCtxtPtr ctxt) {
10461 xmlDocPtr doc;
10462
10463 /*
10464 * SAX: end of the document processing.
10465 */
10466 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10467 ctxt->sax->endDocument(ctxt->userData);
10468
10469 doc = ctxt->myDoc;
10470 if (doc != NULL) {
10471 if (ctxt->wellFormed) {
10472 doc->properties |= XML_DOC_WELLFORMED;
10473 if (ctxt->valid)
10474 doc->properties |= XML_DOC_DTDVALID;
10475 if (ctxt->nsWellFormed)
10476 doc->properties |= XML_DOC_NSVALID;
10477 }
10478
10479 if (ctxt->options & XML_PARSE_OLD10)
10480 doc->properties |= XML_DOC_OLD10;
10481
10482 /*
10483 * Remove locally kept entity definitions if the tree was not built
10484 */
10485 if (xmlStrEqual(doc->version, SAX_COMPAT_MODE)) {
10486 xmlFreeDoc(doc);
10487 ctxt->myDoc = NULL;
10488 }
10489 }
10490}
10491
10492/**
10493 * xmlParseDocument:
10494 * @ctxt: an XML parser context
10495 *
10496 * Parse an XML document and invoke the SAX handlers. This is useful
10497 * if you're only interested in custom SAX callbacks. If you want a
10498 * document tree, use xmlCtxtParseDocument.
10499 *
10500 * Returns 0, -1 in case of error.
10501 */
10502
10503int
10504xmlParseDocument(xmlParserCtxtPtr ctxt) {
10505 if ((ctxt == NULL) || (ctxt->input == NULL))
10506 return(-1);
10507
10508 GROW;
10509
10510 /*
10511 * SAX: detecting the level.
10512 */
10513 xmlCtxtInitializeLate(ctxt);
10514
10515 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) {
10516 ctxt->sax->setDocumentLocator(ctxt->userData,
10517 (xmlSAXLocator *) &xmlDefaultSAXLocator);
10518 }
10519
10520 xmlDetectEncoding(ctxt);
10521
10522 if (CUR == 0) {
10523 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10524 return(-1);
10525 }
10526
10527 GROW;
10528 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10529
10530 /*
10531 * Note that we will switch encoding on the fly.
10532 */
10533 xmlParseXMLDecl(ctxt);
10534 SKIP_BLANKS;
10535 } else {
10536 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10537 if (ctxt->version == NULL) {
10538 xmlErrMemory(ctxt);
10539 return(-1);
10540 }
10541 }
10542 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10543 ctxt->sax->startDocument(ctxt->userData);
10544 if ((ctxt->myDoc != NULL) && (ctxt->input != NULL) &&
10545 (ctxt->input->buf != NULL) && (ctxt->input->buf->compressed >= 0)) {
10546 ctxt->myDoc->compression = ctxt->input->buf->compressed;
10547 }
10548
10549 /*
10550 * The Misc part of the Prolog
10551 */
10552 xmlParseMisc(ctxt);
10553
10554 /*
10555 * Then possibly doc type declaration(s) and more Misc
10556 * (doctypedecl Misc*)?
10557 */
10558 GROW;
10559 if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
10560
10561 ctxt->inSubset = 1;
10562 xmlParseDocTypeDecl(ctxt);
10563 if (RAW == '[') {
10564 xmlParseInternalSubset(ctxt);
10565 }
10566
10567 /*
10568 * Create and update the external subset.
10569 */
10570 ctxt->inSubset = 2;
10571 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
10572 (!ctxt->disableSAX))
10573 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10574 ctxt->extSubSystem, ctxt->extSubURI);
10575 ctxt->inSubset = 0;
10576
10577 xmlCleanSpecialAttr(ctxt);
10578
10579 xmlParseMisc(ctxt);
10580 }
10581
10582 /*
10583 * Time to start parsing the tree itself
10584 */
10585 GROW;
10586 if (RAW != '<') {
10587 if (ctxt->wellFormed)
10588 xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
10589 "Start tag expected, '<' not found\n");
10590 } else {
10591 xmlParseElement(ctxt);
10592
10593 /*
10594 * The Misc part at the end
10595 */
10596 xmlParseMisc(ctxt);
10597
10598 if (ctxt->input->cur < ctxt->input->end) {
10599 if (ctxt->wellFormed)
10600 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
10601 } else if ((ctxt->input->buf != NULL) &&
10602 (ctxt->input->buf->encoder != NULL) &&
10603 (ctxt->input->buf->error == 0) &&
10604 (!xmlBufIsEmpty(ctxt->input->buf->raw))) {
10605 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
10606 "Truncated multi-byte sequence at EOF\n");
10607 }
10608 }
10609
10610 ctxt->instate = XML_PARSER_EOF;
10611 xmlFinishDocument(ctxt);
10612
10613 if (! ctxt->wellFormed) {
10614 ctxt->valid = 0;
10615 return(-1);
10616 }
10617
10618 return(0);
10619}
10620
10621/**
10622 * xmlParseExtParsedEnt:
10623 * @ctxt: an XML parser context
10624 *
10625 * parse a general parsed entity
10626 * An external general parsed entity is well-formed if it matches the
10627 * production labeled extParsedEnt.
10628 *
10629 * [78] extParsedEnt ::= TextDecl? content
10630 *
10631 * Returns 0, -1 in case of error. the parser context is augmented
10632 * as a result of the parsing.
10633 */
10634
10635int
10636xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
10637 if ((ctxt == NULL) || (ctxt->input == NULL))
10638 return(-1);
10639
10640 xmlCtxtInitializeLate(ctxt);
10641
10642 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) {
10643 ctxt->sax->setDocumentLocator(ctxt->userData,
10644 (xmlSAXLocator *) &xmlDefaultSAXLocator);
10645 }
10646
10647 xmlDetectEncoding(ctxt);
10648
10649 if (CUR == 0) {
10650 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10651 }
10652
10653 /*
10654 * Check for the XMLDecl in the Prolog.
10655 */
10656 GROW;
10657 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10658
10659 /*
10660 * Note that we will switch encoding on the fly.
10661 */
10662 xmlParseXMLDecl(ctxt);
10663 SKIP_BLANKS;
10664 } else {
10665 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10666 }
10667 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10668 ctxt->sax->startDocument(ctxt->userData);
10669
10670 /*
10671 * Doing validity checking on chunk doesn't make sense
10672 */
10673 ctxt->options &= ~XML_PARSE_DTDVALID;
10674 ctxt->validate = 0;
10675 ctxt->depth = 0;
10676
10677 xmlParseContentInternal(ctxt);
10678
10679 if (ctxt->input->cur < ctxt->input->end)
10680 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
10681
10682 /*
10683 * SAX: end of the document processing.
10684 */
10685 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10686 ctxt->sax->endDocument(ctxt->userData);
10687
10688 if (! ctxt->wellFormed) return(-1);
10689 return(0);
10690}
10691
10692#ifdef LIBXML_PUSH_ENABLED
10693/************************************************************************
10694 * *
10695 * Progressive parsing interfaces *
10696 * *
10697 ************************************************************************/
10698
10699/**
10700 * xmlParseLookupChar:
10701 * @ctxt: an XML parser context
10702 * @c: character
10703 *
10704 * Check whether the input buffer contains a character.
10705 */
10706static int
10707xmlParseLookupChar(xmlParserCtxtPtr ctxt, int c) {
10708 const xmlChar *cur;
10709
10710 if (ctxt->checkIndex == 0) {
10711 cur = ctxt->input->cur + 1;
10712 } else {
10713 cur = ctxt->input->cur + ctxt->checkIndex;
10714 }
10715
10716 if (memchr(cur, c, ctxt->input->end - cur) == NULL) {
10717 size_t index = ctxt->input->end - ctxt->input->cur;
10718
10719 if (index > LONG_MAX) {
10720 ctxt->checkIndex = 0;
10721 return(1);
10722 }
10723 ctxt->checkIndex = index;
10724 return(0);
10725 } else {
10726 ctxt->checkIndex = 0;
10727 return(1);
10728 }
10729}
10730
10731/**
10732 * xmlParseLookupString:
10733 * @ctxt: an XML parser context
10734 * @startDelta: delta to apply at the start
10735 * @str: string
10736 * @strLen: length of string
10737 *
10738 * Check whether the input buffer contains a string.
10739 */
10740static const xmlChar *
10741xmlParseLookupString(xmlParserCtxtPtr ctxt, size_t startDelta,
10742 const char *str, size_t strLen) {
10743 const xmlChar *cur, *term;
10744
10745 if (ctxt->checkIndex == 0) {
10746 cur = ctxt->input->cur + startDelta;
10747 } else {
10748 cur = ctxt->input->cur + ctxt->checkIndex;
10749 }
10750
10751 term = BAD_CAST strstr((const char *) cur, str);
10752 if (term == NULL) {
10753 const xmlChar *end = ctxt->input->end;
10754 size_t index;
10755
10756 /* Rescan (strLen - 1) characters. */
10757 if ((size_t) (end - cur) < strLen)
10758 end = cur;
10759 else
10760 end -= strLen - 1;
10761 index = end - ctxt->input->cur;
10762 if (index > LONG_MAX) {
10763 ctxt->checkIndex = 0;
10764 return(ctxt->input->end - strLen);
10765 }
10766 ctxt->checkIndex = index;
10767 } else {
10768 ctxt->checkIndex = 0;
10769 }
10770
10771 return(term);
10772}
10773
10774/**
10775 * xmlParseLookupCharData:
10776 * @ctxt: an XML parser context
10777 *
10778 * Check whether the input buffer contains terminated char data.
10779 */
10780static int
10781xmlParseLookupCharData(xmlParserCtxtPtr ctxt) {
10782 const xmlChar *cur = ctxt->input->cur + ctxt->checkIndex;
10783 const xmlChar *end = ctxt->input->end;
10784 size_t index;
10785
10786 while (cur < end) {
10787 if ((*cur == '<') || (*cur == '&')) {
10788 ctxt->checkIndex = 0;
10789 return(1);
10790 }
10791 cur++;
10792 }
10793
10794 index = cur - ctxt->input->cur;
10795 if (index > LONG_MAX) {
10796 ctxt->checkIndex = 0;
10797 return(1);
10798 }
10799 ctxt->checkIndex = index;
10800 return(0);
10801}
10802
10803/**
10804 * xmlParseLookupGt:
10805 * @ctxt: an XML parser context
10806 *
10807 * Check whether there's enough data in the input buffer to finish parsing
10808 * a start tag. This has to take quotes into account.
10809 */
10810static int
10811xmlParseLookupGt(xmlParserCtxtPtr ctxt) {
10812 const xmlChar *cur;
10813 const xmlChar *end = ctxt->input->end;
10814 int state = ctxt->endCheckState;
10815 size_t index;
10816
10817 if (ctxt->checkIndex == 0)
10818 cur = ctxt->input->cur + 1;
10819 else
10820 cur = ctxt->input->cur + ctxt->checkIndex;
10821
10822 while (cur < end) {
10823 if (state) {
10824 if (*cur == state)
10825 state = 0;
10826 } else if (*cur == '\'' || *cur == '"') {
10827 state = *cur;
10828 } else if (*cur == '>') {
10829 ctxt->checkIndex = 0;
10830 ctxt->endCheckState = 0;
10831 return(1);
10832 }
10833 cur++;
10834 }
10835
10836 index = cur - ctxt->input->cur;
10837 if (index > LONG_MAX) {
10838 ctxt->checkIndex = 0;
10839 ctxt->endCheckState = 0;
10840 return(1);
10841 }
10842 ctxt->checkIndex = index;
10843 ctxt->endCheckState = state;
10844 return(0);
10845}
10846
10847/**
10848 * xmlParseLookupInternalSubset:
10849 * @ctxt: an XML parser context
10850 *
10851 * Check whether there's enough data in the input buffer to finish parsing
10852 * the internal subset.
10853 */
10854static int
10855xmlParseLookupInternalSubset(xmlParserCtxtPtr ctxt) {
10856 /*
10857 * Sorry, but progressive parsing of the internal subset is not
10858 * supported. We first check that the full content of the internal
10859 * subset is available and parsing is launched only at that point.
10860 * Internal subset ends with "']' S? '>'" in an unescaped section and
10861 * not in a ']]>' sequence which are conditional sections.
10862 */
10863 const xmlChar *cur, *start;
10864 const xmlChar *end = ctxt->input->end;
10865 int state = ctxt->endCheckState;
10866 size_t index;
10867
10868 if (ctxt->checkIndex == 0) {
10869 cur = ctxt->input->cur + 1;
10870 } else {
10871 cur = ctxt->input->cur + ctxt->checkIndex;
10872 }
10873 start = cur;
10874
10875 while (cur < end) {
10876 if (state == '-') {
10877 if ((*cur == '-') &&
10878 (cur[1] == '-') &&
10879 (cur[2] == '>')) {
10880 state = 0;
10881 cur += 3;
10882 start = cur;
10883 continue;
10884 }
10885 }
10886 else if (state == ']') {
10887 if (*cur == '>') {
10888 ctxt->checkIndex = 0;
10889 ctxt->endCheckState = 0;
10890 return(1);
10891 }
10892 if (IS_BLANK_CH(*cur)) {
10893 state = ' ';
10894 } else if (*cur != ']') {
10895 state = 0;
10896 start = cur;
10897 continue;
10898 }
10899 }
10900 else if (state == ' ') {
10901 if (*cur == '>') {
10902 ctxt->checkIndex = 0;
10903 ctxt->endCheckState = 0;
10904 return(1);
10905 }
10906 if (!IS_BLANK_CH(*cur)) {
10907 state = 0;
10908 start = cur;
10909 continue;
10910 }
10911 }
10912 else if (state != 0) {
10913 if (*cur == state) {
10914 state = 0;
10915 start = cur + 1;
10916 }
10917 }
10918 else if (*cur == '<') {
10919 if ((cur[1] == '!') &&
10920 (cur[2] == '-') &&
10921 (cur[3] == '-')) {
10922 state = '-';
10923 cur += 4;
10924 /* Don't treat <!--> as comment */
10925 start = cur;
10926 continue;
10927 }
10928 }
10929 else if ((*cur == '"') || (*cur == '\'') || (*cur == ']')) {
10930 state = *cur;
10931 }
10932
10933 cur++;
10934 }
10935
10936 /*
10937 * Rescan the three last characters to detect "<!--" and "-->"
10938 * split across chunks.
10939 */
10940 if ((state == 0) || (state == '-')) {
10941 if (cur - start < 3)
10942 cur = start;
10943 else
10944 cur -= 3;
10945 }
10946 index = cur - ctxt->input->cur;
10947 if (index > LONG_MAX) {
10948 ctxt->checkIndex = 0;
10949 ctxt->endCheckState = 0;
10950 return(1);
10951 }
10952 ctxt->checkIndex = index;
10953 ctxt->endCheckState = state;
10954 return(0);
10955}
10956
10957/**
10958 * xmlCheckCdataPush:
10959 * @cur: pointer to the block of characters
10960 * @len: length of the block in bytes
10961 * @complete: 1 if complete CDATA block is passed in, 0 if partial block
10962 *
10963 * Check that the block of characters is okay as SCdata content [20]
10964 *
10965 * Returns the number of bytes to pass if okay, a negative index where an
10966 * UTF-8 error occurred otherwise
10967 */
10968static int
10969xmlCheckCdataPush(const xmlChar *utf, int len, int complete) {
10970 int ix;
10971 unsigned char c;
10972 int codepoint;
10973
10974 if ((utf == NULL) || (len <= 0))
10975 return(0);
10976
10977 for (ix = 0; ix < len;) { /* string is 0-terminated */
10978 c = utf[ix];
10979 if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */
10980 if (c >= 0x20)
10981 ix++;
10982 else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
10983 ix++;
10984 else
10985 return(-ix);
10986 } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
10987 if (ix + 2 > len) return(complete ? -ix : ix);
10988 if ((utf[ix+1] & 0xc0 ) != 0x80)
10989 return(-ix);
10990 codepoint = (utf[ix] & 0x1f) << 6;
10991 codepoint |= utf[ix+1] & 0x3f;
10992 if (!xmlIsCharQ(codepoint))
10993 return(-ix);
10994 ix += 2;
10995 } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
10996 if (ix + 3 > len) return(complete ? -ix : ix);
10997 if (((utf[ix+1] & 0xc0) != 0x80) ||
10998 ((utf[ix+2] & 0xc0) != 0x80))
10999 return(-ix);
11000 codepoint = (utf[ix] & 0xf) << 12;
11001 codepoint |= (utf[ix+1] & 0x3f) << 6;
11002 codepoint |= utf[ix+2] & 0x3f;
11003 if (!xmlIsCharQ(codepoint))
11004 return(-ix);
11005 ix += 3;
11006 } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
11007 if (ix + 4 > len) return(complete ? -ix : ix);
11008 if (((utf[ix+1] & 0xc0) != 0x80) ||
11009 ((utf[ix+2] & 0xc0) != 0x80) ||
11010 ((utf[ix+3] & 0xc0) != 0x80))
11011 return(-ix);
11012 codepoint = (utf[ix] & 0x7) << 18;
11013 codepoint |= (utf[ix+1] & 0x3f) << 12;
11014 codepoint |= (utf[ix+2] & 0x3f) << 6;
11015 codepoint |= utf[ix+3] & 0x3f;
11016 if (!xmlIsCharQ(codepoint))
11017 return(-ix);
11018 ix += 4;
11019 } else /* unknown encoding */
11020 return(-ix);
11021 }
11022 return(ix);
11023}
11024
11025/**
11026 * xmlParseTryOrFinish:
11027 * @ctxt: an XML parser context
11028 * @terminate: last chunk indicator
11029 *
11030 * Try to progress on parsing
11031 *
11032 * Returns zero if no parsing was possible
11033 */
11034static int
11035xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
11036 int ret = 0;
11037 size_t avail;
11038 xmlChar cur, next;
11039
11040 if (ctxt->input == NULL)
11041 return(0);
11042
11043 if ((ctxt->input != NULL) &&
11044 (ctxt->input->cur - ctxt->input->base > 4096)) {
11045 xmlParserShrink(ctxt);
11046 }
11047
11048 while (ctxt->disableSAX == 0) {
11049 avail = ctxt->input->end - ctxt->input->cur;
11050 if (avail < 1)
11051 goto done;
11052 switch (ctxt->instate) {
11053 case XML_PARSER_EOF:
11054 /*
11055 * Document parsing is done !
11056 */
11057 goto done;
11058 case XML_PARSER_START:
11059 /*
11060 * Very first chars read from the document flow.
11061 */
11062 if ((!terminate) && (avail < 4))
11063 goto done;
11064
11065 /*
11066 * We need more bytes to detect EBCDIC code pages.
11067 * See xmlDetectEBCDIC.
11068 */
11069 if ((CMP4(CUR_PTR, 0x4C, 0x6F, 0xA7, 0x94)) &&
11070 (!terminate) && (avail < 200))
11071 goto done;
11072
11073 xmlDetectEncoding(ctxt);
11074 ctxt->instate = XML_PARSER_XML_DECL;
11075 break;
11076
11077 case XML_PARSER_XML_DECL:
11078 if ((!terminate) && (avail < 2))
11079 goto done;
11080 cur = ctxt->input->cur[0];
11081 next = ctxt->input->cur[1];
11082 if ((cur == '<') && (next == '?')) {
11083 /* PI or XML decl */
11084 if ((!terminate) &&
11085 (!xmlParseLookupString(ctxt, 2, "?>", 2)))
11086 goto done;
11087 if ((ctxt->input->cur[2] == 'x') &&
11088 (ctxt->input->cur[3] == 'm') &&
11089 (ctxt->input->cur[4] == 'l') &&
11090 (IS_BLANK_CH(ctxt->input->cur[5]))) {
11091 ret += 5;
11092 xmlParseXMLDecl(ctxt);
11093 } else {
11094 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11095 if (ctxt->version == NULL) {
11096 xmlErrMemory(ctxt);
11097 break;
11098 }
11099 }
11100 } else {
11101 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11102 if (ctxt->version == NULL) {
11103 xmlErrMemory(ctxt);
11104 break;
11105 }
11106 }
11107 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) {
11108 ctxt->sax->setDocumentLocator(ctxt->userData,
11109 (xmlSAXLocator *) &xmlDefaultSAXLocator);
11110 }
11111 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11112 (!ctxt->disableSAX))
11113 ctxt->sax->startDocument(ctxt->userData);
11114 ctxt->instate = XML_PARSER_MISC;
11115 break;
11116 case XML_PARSER_START_TAG: {
11117 const xmlChar *name;
11118 const xmlChar *prefix = NULL;
11119 const xmlChar *URI = NULL;
11120 int line = ctxt->input->line;
11121 int nbNs = 0;
11122
11123 if ((!terminate) && (avail < 2))
11124 goto done;
11125 cur = ctxt->input->cur[0];
11126 if (cur != '<') {
11127 xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
11128 "Start tag expected, '<' not found");
11129 ctxt->instate = XML_PARSER_EOF;
11130 xmlFinishDocument(ctxt);
11131 goto done;
11132 }
11133 if ((!terminate) && (!xmlParseLookupGt(ctxt)))
11134 goto done;
11135 if (ctxt->spaceNr == 0)
11136 spacePush(ctxt, -1);
11137 else if (*ctxt->space == -2)
11138 spacePush(ctxt, -1);
11139 else
11140 spacePush(ctxt, *ctxt->space);
11141#ifdef LIBXML_SAX1_ENABLED
11142 if (ctxt->sax2)
11143#endif /* LIBXML_SAX1_ENABLED */
11144 name = xmlParseStartTag2(ctxt, &prefix, &URI, &nbNs);
11145#ifdef LIBXML_SAX1_ENABLED
11146 else
11147 name = xmlParseStartTag(ctxt);
11148#endif /* LIBXML_SAX1_ENABLED */
11149 if (name == NULL) {
11150 spacePop(ctxt);
11151 ctxt->instate = XML_PARSER_EOF;
11152 xmlFinishDocument(ctxt);
11153 goto done;
11154 }
11155#ifdef LIBXML_VALID_ENABLED
11156 /*
11157 * [ VC: Root Element Type ]
11158 * The Name in the document type declaration must match
11159 * the element type of the root element.
11160 */
11161 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
11162 ctxt->node && (ctxt->node == ctxt->myDoc->children))
11163 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
11164#endif /* LIBXML_VALID_ENABLED */
11165
11166 /*
11167 * Check for an Empty Element.
11168 */
11169 if ((RAW == '/') && (NXT(1) == '>')) {
11170 SKIP(2);
11171
11172 if (ctxt->sax2) {
11173 if ((ctxt->sax != NULL) &&
11174 (ctxt->sax->endElementNs != NULL) &&
11175 (!ctxt->disableSAX))
11176 ctxt->sax->endElementNs(ctxt->userData, name,
11177 prefix, URI);
11178 if (nbNs > 0)
11179 xmlParserNsPop(ctxt, nbNs);
11180#ifdef LIBXML_SAX1_ENABLED
11181 } else {
11182 if ((ctxt->sax != NULL) &&
11183 (ctxt->sax->endElement != NULL) &&
11184 (!ctxt->disableSAX))
11185 ctxt->sax->endElement(ctxt->userData, name);
11186#endif /* LIBXML_SAX1_ENABLED */
11187 }
11188 spacePop(ctxt);
11189 } else if (RAW == '>') {
11190 NEXT;
11191 nameNsPush(ctxt, name, prefix, URI, line, nbNs);
11192 } else {
11193 xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
11194 "Couldn't find end of Start Tag %s\n",
11195 name);
11196 nodePop(ctxt);
11197 spacePop(ctxt);
11198 if (nbNs > 0)
11199 xmlParserNsPop(ctxt, nbNs);
11200 }
11201
11202 if (ctxt->nameNr == 0)
11203 ctxt->instate = XML_PARSER_EPILOG;
11204 else
11205 ctxt->instate = XML_PARSER_CONTENT;
11206 break;
11207 }
11208 case XML_PARSER_CONTENT: {
11209 cur = ctxt->input->cur[0];
11210
11211 if (cur == '<') {
11212 if ((!terminate) && (avail < 2))
11213 goto done;
11214 next = ctxt->input->cur[1];
11215
11216 if (next == '/') {
11217 ctxt->instate = XML_PARSER_END_TAG;
11218 break;
11219 } else if (next == '?') {
11220 if ((!terminate) &&
11221 (!xmlParseLookupString(ctxt, 2, "?>", 2)))
11222 goto done;
11223 xmlParsePI(ctxt);
11224 ctxt->instate = XML_PARSER_CONTENT;
11225 break;
11226 } else if (next == '!') {
11227 if ((!terminate) && (avail < 3))
11228 goto done;
11229 next = ctxt->input->cur[2];
11230
11231 if (next == '-') {
11232 if ((!terminate) && (avail < 4))
11233 goto done;
11234 if (ctxt->input->cur[3] == '-') {
11235 if ((!terminate) &&
11236 (!xmlParseLookupString(ctxt, 4, "-->", 3)))
11237 goto done;
11238 xmlParseComment(ctxt);
11239 ctxt->instate = XML_PARSER_CONTENT;
11240 break;
11241 }
11242 } else if (next == '[') {
11243 if ((!terminate) && (avail < 9))
11244 goto done;
11245 if ((ctxt->input->cur[2] == '[') &&
11246 (ctxt->input->cur[3] == 'C') &&
11247 (ctxt->input->cur[4] == 'D') &&
11248 (ctxt->input->cur[5] == 'A') &&
11249 (ctxt->input->cur[6] == 'T') &&
11250 (ctxt->input->cur[7] == 'A') &&
11251 (ctxt->input->cur[8] == '[')) {
11252 SKIP(9);
11253 ctxt->instate = XML_PARSER_CDATA_SECTION;
11254 break;
11255 }
11256 }
11257 }
11258 } else if (cur == '&') {
11259 if ((!terminate) && (!xmlParseLookupChar(ctxt, ';')))
11260 goto done;
11261 xmlParseReference(ctxt);
11262 break;
11263 } else {
11264 /* TODO Avoid the extra copy, handle directly !!! */
11265 /*
11266 * Goal of the following test is:
11267 * - minimize calls to the SAX 'character' callback
11268 * when they are mergeable
11269 * - handle an problem for isBlank when we only parse
11270 * a sequence of blank chars and the next one is
11271 * not available to check against '<' presence.
11272 * - tries to homogenize the differences in SAX
11273 * callbacks between the push and pull versions
11274 * of the parser.
11275 */
11276 if (avail < XML_PARSER_BIG_BUFFER_SIZE) {
11277 if ((!terminate) && (!xmlParseLookupCharData(ctxt)))
11278 goto done;
11279 }
11280 ctxt->checkIndex = 0;
11281 xmlParseCharDataInternal(ctxt, !terminate);
11282 break;
11283 }
11284
11285 ctxt->instate = XML_PARSER_START_TAG;
11286 break;
11287 }
11288 case XML_PARSER_END_TAG:
11289 if ((!terminate) && (!xmlParseLookupChar(ctxt, '>')))
11290 goto done;
11291 if (ctxt->sax2) {
11292 xmlParseEndTag2(ctxt, &ctxt->pushTab[ctxt->nameNr - 1]);
11293 nameNsPop(ctxt);
11294 }
11295#ifdef LIBXML_SAX1_ENABLED
11296 else
11297 xmlParseEndTag1(ctxt, 0);
11298#endif /* LIBXML_SAX1_ENABLED */
11299 if (ctxt->nameNr == 0) {
11300 ctxt->instate = XML_PARSER_EPILOG;
11301 } else {
11302 ctxt->instate = XML_PARSER_CONTENT;
11303 }
11304 break;
11305 case XML_PARSER_CDATA_SECTION: {
11306 /*
11307 * The Push mode need to have the SAX callback for
11308 * cdataBlock merge back contiguous callbacks.
11309 */
11310 const xmlChar *term;
11311
11312 if (terminate) {
11313 /*
11314 * Don't call xmlParseLookupString. If 'terminate'
11315 * is set, checkIndex is invalid.
11316 */
11317 term = BAD_CAST strstr((const char *) ctxt->input->cur,
11318 "]]>");
11319 } else {
11320 term = xmlParseLookupString(ctxt, 0, "]]>", 3);
11321 }
11322
11323 if (term == NULL) {
11324 int tmp, size;
11325
11326 if (terminate) {
11327 /* Unfinished CDATA section */
11328 size = ctxt->input->end - ctxt->input->cur;
11329 } else {
11330 if (avail < XML_PARSER_BIG_BUFFER_SIZE + 2)
11331 goto done;
11332 ctxt->checkIndex = 0;
11333 /* XXX: Why don't we pass the full buffer? */
11334 size = XML_PARSER_BIG_BUFFER_SIZE;
11335 }
11336 tmp = xmlCheckCdataPush(ctxt->input->cur, size, 0);
11337 if (tmp <= 0) {
11338 tmp = -tmp;
11339 ctxt->input->cur += tmp;
11340 goto encoding_error;
11341 }
11342 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
11343 if (ctxt->sax->cdataBlock != NULL)
11344 ctxt->sax->cdataBlock(ctxt->userData,
11345 ctxt->input->cur, tmp);
11346 else if (ctxt->sax->characters != NULL)
11347 ctxt->sax->characters(ctxt->userData,
11348 ctxt->input->cur, tmp);
11349 }
11350 SKIPL(tmp);
11351 } else {
11352 int base = term - CUR_PTR;
11353 int tmp;
11354
11355 tmp = xmlCheckCdataPush(ctxt->input->cur, base, 1);
11356 if ((tmp < 0) || (tmp != base)) {
11357 tmp = -tmp;
11358 ctxt->input->cur += tmp;
11359 goto encoding_error;
11360 }
11361 if ((ctxt->sax != NULL) && (base == 0) &&
11362 (ctxt->sax->cdataBlock != NULL) &&
11363 (!ctxt->disableSAX)) {
11364 /*
11365 * Special case to provide identical behaviour
11366 * between pull and push parsers on enpty CDATA
11367 * sections
11368 */
11369 if ((ctxt->input->cur - ctxt->input->base >= 9) &&
11370 (!strncmp((const char *)&ctxt->input->cur[-9],
11371 "<![CDATA[", 9)))
11372 ctxt->sax->cdataBlock(ctxt->userData,
11373 BAD_CAST "", 0);
11374 } else if ((ctxt->sax != NULL) && (base > 0) &&
11375 (!ctxt->disableSAX)) {
11376 if (ctxt->sax->cdataBlock != NULL)
11377 ctxt->sax->cdataBlock(ctxt->userData,
11378 ctxt->input->cur, base);
11379 else if (ctxt->sax->characters != NULL)
11380 ctxt->sax->characters(ctxt->userData,
11381 ctxt->input->cur, base);
11382 }
11383 SKIPL(base + 3);
11384 ctxt->instate = XML_PARSER_CONTENT;
11385 }
11386 break;
11387 }
11388 case XML_PARSER_MISC:
11389 case XML_PARSER_PROLOG:
11390 case XML_PARSER_EPILOG:
11391 SKIP_BLANKS;
11392 avail = ctxt->input->end - ctxt->input->cur;
11393 if (avail < 1)
11394 goto done;
11395 if (ctxt->input->cur[0] == '<') {
11396 if ((!terminate) && (avail < 2))
11397 goto done;
11398 next = ctxt->input->cur[1];
11399 if (next == '?') {
11400 if ((!terminate) &&
11401 (!xmlParseLookupString(ctxt, 2, "?>", 2)))
11402 goto done;
11403 xmlParsePI(ctxt);
11404 break;
11405 } else if (next == '!') {
11406 if ((!terminate) && (avail < 3))
11407 goto done;
11408
11409 if (ctxt->input->cur[2] == '-') {
11410 if ((!terminate) && (avail < 4))
11411 goto done;
11412 if (ctxt->input->cur[3] == '-') {
11413 if ((!terminate) &&
11414 (!xmlParseLookupString(ctxt, 4, "-->", 3)))
11415 goto done;
11416 xmlParseComment(ctxt);
11417 break;
11418 }
11419 } else if (ctxt->instate == XML_PARSER_MISC) {
11420 if ((!terminate) && (avail < 9))
11421 goto done;
11422 if ((ctxt->input->cur[2] == 'D') &&
11423 (ctxt->input->cur[3] == 'O') &&
11424 (ctxt->input->cur[4] == 'C') &&
11425 (ctxt->input->cur[5] == 'T') &&
11426 (ctxt->input->cur[6] == 'Y') &&
11427 (ctxt->input->cur[7] == 'P') &&
11428 (ctxt->input->cur[8] == 'E')) {
11429 if ((!terminate) && (!xmlParseLookupGt(ctxt)))
11430 goto done;
11431 ctxt->inSubset = 1;
11432 xmlParseDocTypeDecl(ctxt);
11433 if (RAW == '[') {
11434 ctxt->instate = XML_PARSER_DTD;
11435 } else {
11436 /*
11437 * Create and update the external subset.
11438 */
11439 ctxt->inSubset = 2;
11440 if ((ctxt->sax != NULL) &&
11441 (!ctxt->disableSAX) &&
11442 (ctxt->sax->externalSubset != NULL))
11443 ctxt->sax->externalSubset(
11444 ctxt->userData,
11445 ctxt->intSubName,
11446 ctxt->extSubSystem,
11447 ctxt->extSubURI);
11448 ctxt->inSubset = 0;
11449 xmlCleanSpecialAttr(ctxt);
11450 ctxt->instate = XML_PARSER_PROLOG;
11451 }
11452 break;
11453 }
11454 }
11455 }
11456 }
11457
11458 if (ctxt->instate == XML_PARSER_EPILOG) {
11459 if (ctxt->errNo == XML_ERR_OK)
11460 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
11461 ctxt->instate = XML_PARSER_EOF;
11462 xmlFinishDocument(ctxt);
11463 } else {
11464 ctxt->instate = XML_PARSER_START_TAG;
11465 }
11466 break;
11467 case XML_PARSER_DTD: {
11468 if ((!terminate) && (!xmlParseLookupInternalSubset(ctxt)))
11469 goto done;
11470 xmlParseInternalSubset(ctxt);
11471 ctxt->inSubset = 2;
11472 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11473 (ctxt->sax->externalSubset != NULL))
11474 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
11475 ctxt->extSubSystem, ctxt->extSubURI);
11476 ctxt->inSubset = 0;
11477 xmlCleanSpecialAttr(ctxt);
11478 ctxt->instate = XML_PARSER_PROLOG;
11479 break;
11480 }
11481 default:
11482 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
11483 "PP: internal error\n");
11484 ctxt->instate = XML_PARSER_EOF;
11485 break;
11486 }
11487 }
11488done:
11489 return(ret);
11490encoding_error:
11491 /* Only report the first error */
11492 if ((ctxt->input->flags & XML_INPUT_ENCODING_ERROR) == 0) {
11493 xmlCtxtErrIO(ctxt, XML_ERR_INVALID_ENCODING, NULL);
11494 ctxt->input->flags |= XML_INPUT_ENCODING_ERROR;
11495 }
11496 return(0);
11497}
11498
11499/**
11500 * xmlParseChunk:
11501 * @ctxt: an XML parser context
11502 * @chunk: chunk of memory
11503 * @size: size of chunk in bytes
11504 * @terminate: last chunk indicator
11505 *
11506 * Parse a chunk of memory in push parser mode.
11507 *
11508 * Assumes that the parser context was initialized with
11509 * xmlCreatePushParserCtxt.
11510 *
11511 * The last chunk, which will often be empty, must be marked with
11512 * the @terminate flag. With the default SAX callbacks, the resulting
11513 * document will be available in ctxt->myDoc. This pointer will not
11514 * be freed by the library.
11515 *
11516 * If the document isn't well-formed, ctxt->myDoc is set to NULL.
11517 * The push parser doesn't support recovery mode.
11518 *
11519 * Returns an xmlParserErrors code (0 on success).
11520 */
11521int
11522xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
11523 int terminate) {
11524 size_t curBase;
11525 size_t maxLength;
11526 int end_in_lf = 0;
11527
11528 if ((ctxt == NULL) || (size < 0))
11529 return(XML_ERR_ARGUMENT);
11530 if (ctxt->disableSAX != 0)
11531 return(ctxt->errNo);
11532 if (ctxt->input == NULL)
11533 return(XML_ERR_INTERNAL_ERROR);
11534
11535 ctxt->input->flags |= XML_INPUT_PROGRESSIVE;
11536 if (ctxt->instate == XML_PARSER_START)
11537 xmlCtxtInitializeLate(ctxt);
11538 if ((size > 0) && (chunk != NULL) && (!terminate) &&
11539 (chunk[size - 1] == '\r')) {
11540 end_in_lf = 1;
11541 size--;
11542 }
11543
11544 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
11545 (ctxt->input->buf != NULL)) {
11546 size_t pos = ctxt->input->cur - ctxt->input->base;
11547 int res;
11548
11549 res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
11550 xmlBufUpdateInput(ctxt->input->buf->buffer, ctxt->input, pos);
11551 if (res < 0) {
11552 xmlCtxtErrIO(ctxt, ctxt->input->buf->error, NULL);
11553 xmlHaltParser(ctxt);
11554 return(ctxt->errNo);
11555 }
11556 }
11557
11558 xmlParseTryOrFinish(ctxt, terminate);
11559
11560 curBase = ctxt->input->cur - ctxt->input->base;
11561 maxLength = (ctxt->options & XML_PARSE_HUGE) ?
11562 XML_MAX_HUGE_LENGTH :
11563 XML_MAX_LOOKUP_LIMIT;
11564 if (curBase > maxLength) {
11565 xmlFatalErr(ctxt, XML_ERR_RESOURCE_LIMIT,
11566 "Buffer size limit exceeded, try XML_PARSE_HUGE\n");
11567 xmlHaltParser(ctxt);
11568 }
11569
11570 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
11571 return(ctxt->errNo);
11572
11573 if ((end_in_lf == 1) && (ctxt->input != NULL) &&
11574 (ctxt->input->buf != NULL)) {
11575 size_t pos = ctxt->input->cur - ctxt->input->base;
11576 int res;
11577
11578 res = xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
11579 xmlBufUpdateInput(ctxt->input->buf->buffer, ctxt->input, pos);
11580 if (res < 0) {
11581 xmlCtxtErrIO(ctxt, ctxt->input->buf->error, NULL);
11582 xmlHaltParser(ctxt);
11583 return(ctxt->errNo);
11584 }
11585 }
11586 if (terminate) {
11587 /*
11588 * Check for termination
11589 */
11590 if ((ctxt->instate != XML_PARSER_EOF) &&
11591 (ctxt->instate != XML_PARSER_EPILOG)) {
11592 if (ctxt->nameNr > 0) {
11593 const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
11594 int line = ctxt->pushTab[ctxt->nameNr - 1].line;
11595 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
11596 "Premature end of data in tag %s line %d\n",
11597 name, line, NULL);
11598 } else if (ctxt->instate == XML_PARSER_START) {
11599 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11600 } else {
11601 xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
11602 "Start tag expected, '<' not found\n");
11603 }
11604 } else if ((ctxt->input->buf != NULL) &&
11605 (ctxt->input->buf->encoder != NULL) &&
11606 (ctxt->input->buf->error == 0) &&
11607 (!xmlBufIsEmpty(ctxt->input->buf->raw))) {
11608 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
11609 "Truncated multi-byte sequence at EOF\n");
11610 }
11611 if (ctxt->instate != XML_PARSER_EOF) {
11612 ctxt->instate = XML_PARSER_EOF;
11613 xmlFinishDocument(ctxt);
11614 }
11615 }
11616 if (ctxt->wellFormed == 0)
11617 return((xmlParserErrors) ctxt->errNo);
11618 else
11619 return(0);
11620}
11621
11622/************************************************************************
11623 * *
11624 * I/O front end functions to the parser *
11625 * *
11626 ************************************************************************/
11627
11628/**
11629 * xmlCreatePushParserCtxt:
11630 * @sax: a SAX handler (optional)
11631 * @user_data: user data for SAX callbacks (optional)
11632 * @chunk: initial chunk (optional, deprecated)
11633 * @size: size of initial chunk in bytes
11634 * @filename: file name or URI (optional)
11635 *
11636 * Create a parser context for using the XML parser in push mode.
11637 * See xmlParseChunk.
11638 *
11639 * Passing an initial chunk is useless and deprecated.
11640 *
11641 * @filename is used as base URI to fetch external entities and for
11642 * error reports.
11643 *
11644 * Returns the new parser context or NULL in case of error.
11645 */
11646
11647xmlParserCtxtPtr
11648xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
11649 const char *chunk, int size, const char *filename) {
11650 xmlParserCtxtPtr ctxt;
11651 xmlParserInputPtr input;
11652
11653 ctxt = xmlNewSAXParserCtxt(sax, user_data);
11654 if (ctxt == NULL)
11655 return(NULL);
11656
11657 ctxt->options &= ~XML_PARSE_NODICT;
11658 ctxt->dictNames = 1;
11659
11660 input = xmlNewInputPush(ctxt, filename, chunk, size, NULL);
11661 if (input == NULL) {
11662 xmlFreeParserCtxt(ctxt);
11663 return(NULL);
11664 }
11665 inputPush(ctxt, input);
11666
11667 return(ctxt);
11668}
11669#endif /* LIBXML_PUSH_ENABLED */
11670
11671/**
11672 * xmlStopParser:
11673 * @ctxt: an XML parser context
11674 *
11675 * Blocks further parser processing
11676 */
11677void
11678xmlStopParser(xmlParserCtxtPtr ctxt) {
11679 if (ctxt == NULL)
11680 return;
11681 xmlHaltParser(ctxt);
11682 if (ctxt->errNo != XML_ERR_NO_MEMORY)
11683 ctxt->errNo = XML_ERR_USER_STOP;
11684}
11685
11686/**
11687 * xmlCreateIOParserCtxt:
11688 * @sax: a SAX handler (optional)
11689 * @user_data: user data for SAX callbacks (optional)
11690 * @ioread: an I/O read function
11691 * @ioclose: an I/O close function (optional)
11692 * @ioctx: an I/O handler
11693 * @enc: the charset encoding if known (deprecated)
11694 *
11695 * Create a parser context for using the XML parser with an existing
11696 * I/O stream
11697 *
11698 * Returns the new parser context or NULL
11699 */
11700xmlParserCtxtPtr
11701xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
11702 xmlInputReadCallback ioread,
11703 xmlInputCloseCallback ioclose,
11704 void *ioctx, xmlCharEncoding enc) {
11705 xmlParserCtxtPtr ctxt;
11706 xmlParserInputPtr input;
11707 const char *encoding;
11708
11709 ctxt = xmlNewSAXParserCtxt(sax, user_data);
11710 if (ctxt == NULL)
11711 return(NULL);
11712
11713 encoding = xmlGetCharEncodingName(enc);
11714 input = xmlNewInputIO(ctxt, NULL, ioread, ioclose, ioctx, encoding, 0);
11715 if (input == NULL) {
11716 xmlFreeParserCtxt(ctxt);
11717 return (NULL);
11718 }
11719 inputPush(ctxt, input);
11720
11721 return(ctxt);
11722}
11723
11724#ifdef LIBXML_VALID_ENABLED
11725/************************************************************************
11726 * *
11727 * Front ends when parsing a DTD *
11728 * *
11729 ************************************************************************/
11730
11731/**
11732 * xmlIOParseDTD:
11733 * @sax: the SAX handler block or NULL
11734 * @input: an Input Buffer
11735 * @enc: the charset encoding if known
11736 *
11737 * Load and parse a DTD
11738 *
11739 * Returns the resulting xmlDtdPtr or NULL in case of error.
11740 * @input will be freed by the function in any case.
11741 */
11742
11743xmlDtdPtr
11744xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
11745 xmlCharEncoding enc) {
11746 xmlDtdPtr ret = NULL;
11747 xmlParserCtxtPtr ctxt;
11748 xmlParserInputPtr pinput = NULL;
11749
11750 if (input == NULL)
11751 return(NULL);
11752
11753 ctxt = xmlNewSAXParserCtxt(sax, NULL);
11754 if (ctxt == NULL) {
11755 xmlFreeParserInputBuffer(input);
11756 return(NULL);
11757 }
11758
11759 /*
11760 * generate a parser input from the I/O handler
11761 */
11762
11763 pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
11764 if (pinput == NULL) {
11765 xmlFreeParserInputBuffer(input);
11766 xmlFreeParserCtxt(ctxt);
11767 return(NULL);
11768 }
11769
11770 /*
11771 * plug some encoding conversion routines here.
11772 */
11773 if (xmlPushInput(ctxt, pinput) < 0) {
11774 xmlFreeParserCtxt(ctxt);
11775 return(NULL);
11776 }
11777 if (enc != XML_CHAR_ENCODING_NONE) {
11778 xmlSwitchEncoding(ctxt, enc);
11779 }
11780
11781 /*
11782 * let's parse that entity knowing it's an external subset.
11783 */
11784 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
11785 if (ctxt->myDoc == NULL) {
11786 xmlErrMemory(ctxt);
11787 return(NULL);
11788 }
11789 ctxt->myDoc->properties = XML_DOC_INTERNAL;
11790 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
11791 BAD_CAST "none", BAD_CAST "none");
11792
11793 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
11794
11795 if (ctxt->myDoc != NULL) {
11796 if (ctxt->wellFormed) {
11797 ret = ctxt->myDoc->extSubset;
11798 ctxt->myDoc->extSubset = NULL;
11799 if (ret != NULL) {
11800 xmlNodePtr tmp;
11801
11802 ret->doc = NULL;
11803 tmp = ret->children;
11804 while (tmp != NULL) {
11805 tmp->doc = NULL;
11806 tmp = tmp->next;
11807 }
11808 }
11809 } else {
11810 ret = NULL;
11811 }
11812 xmlFreeDoc(ctxt->myDoc);
11813 ctxt->myDoc = NULL;
11814 }
11815 xmlFreeParserCtxt(ctxt);
11816
11817 return(ret);
11818}
11819
11820/**
11821 * xmlSAXParseDTD:
11822 * @sax: the SAX handler block
11823 * @ExternalID: a NAME* containing the External ID of the DTD
11824 * @SystemID: a NAME* containing the URL to the DTD
11825 *
11826 * DEPRECATED: Don't use.
11827 *
11828 * Load and parse an external subset.
11829 *
11830 * Returns the resulting xmlDtdPtr or NULL in case of error.
11831 */
11832
11833xmlDtdPtr
11834xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
11835 const xmlChar *SystemID) {
11836 xmlDtdPtr ret = NULL;
11837 xmlParserCtxtPtr ctxt;
11838 xmlParserInputPtr input = NULL;
11839 xmlChar* systemIdCanonic;
11840
11841 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
11842
11843 ctxt = xmlNewSAXParserCtxt(sax, NULL);
11844 if (ctxt == NULL) {
11845 return(NULL);
11846 }
11847
11848 /*
11849 * Canonicalise the system ID
11850 */
11851 systemIdCanonic = xmlCanonicPath(SystemID);
11852 if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
11853 xmlFreeParserCtxt(ctxt);
11854 return(NULL);
11855 }
11856
11857 /*
11858 * Ask the Entity resolver to load the damn thing
11859 */
11860
11861 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
11862 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
11863 systemIdCanonic);
11864 if (input == NULL) {
11865 xmlFreeParserCtxt(ctxt);
11866 if (systemIdCanonic != NULL)
11867 xmlFree(systemIdCanonic);
11868 return(NULL);
11869 }
11870
11871 /*
11872 * plug some encoding conversion routines here.
11873 */
11874 if (xmlPushInput(ctxt, input) < 0) {
11875 xmlFreeParserCtxt(ctxt);
11876 if (systemIdCanonic != NULL)
11877 xmlFree(systemIdCanonic);
11878 return(NULL);
11879 }
11880
11881 xmlDetectEncoding(ctxt);
11882
11883 if (input->filename == NULL)
11884 input->filename = (char *) systemIdCanonic;
11885 else
11886 xmlFree(systemIdCanonic);
11887
11888 /*
11889 * let's parse that entity knowing it's an external subset.
11890 */
11891 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
11892 if (ctxt->myDoc == NULL) {
11893 xmlErrMemory(ctxt);
11894 xmlFreeParserCtxt(ctxt);
11895 return(NULL);
11896 }
11897 ctxt->myDoc->properties = XML_DOC_INTERNAL;
11898 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
11899 ExternalID, SystemID);
11900 if (ctxt->myDoc->extSubset == NULL) {
11901 xmlFreeDoc(ctxt->myDoc);
11902 xmlFreeParserCtxt(ctxt);
11903 return(NULL);
11904 }
11905 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
11906
11907 if (ctxt->myDoc != NULL) {
11908 if (ctxt->wellFormed) {
11909 ret = ctxt->myDoc->extSubset;
11910 ctxt->myDoc->extSubset = NULL;
11911 if (ret != NULL) {
11912 xmlNodePtr tmp;
11913
11914 ret->doc = NULL;
11915 tmp = ret->children;
11916 while (tmp != NULL) {
11917 tmp->doc = NULL;
11918 tmp = tmp->next;
11919 }
11920 }
11921 } else {
11922 ret = NULL;
11923 }
11924 xmlFreeDoc(ctxt->myDoc);
11925 ctxt->myDoc = NULL;
11926 }
11927 xmlFreeParserCtxt(ctxt);
11928
11929 return(ret);
11930}
11931
11932
11933/**
11934 * xmlParseDTD:
11935 * @ExternalID: a NAME* containing the External ID of the DTD
11936 * @SystemID: a NAME* containing the URL to the DTD
11937 *
11938 * Load and parse an external subset.
11939 *
11940 * Returns the resulting xmlDtdPtr or NULL in case of error.
11941 */
11942
11943xmlDtdPtr
11944xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
11945 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
11946}
11947#endif /* LIBXML_VALID_ENABLED */
11948
11949/************************************************************************
11950 * *
11951 * Front ends when parsing an Entity *
11952 * *
11953 ************************************************************************/
11954
11955static xmlNodePtr
11956xmlCtxtParseContent(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
11957 int hasTextDecl, int buildTree) {
11958 xmlNodePtr root = NULL;
11959 xmlNodePtr list = NULL;
11960 xmlChar *rootName = BAD_CAST "#root";
11961 int result;
11962
11963 if (buildTree) {
11964 root = xmlNewDocNode(ctxt->myDoc, NULL, rootName, NULL);
11965 if (root == NULL) {
11966 xmlErrMemory(ctxt);
11967 goto error;
11968 }
11969 }
11970
11971 if (xmlPushInput(ctxt, input) < 0)
11972 goto error;
11973
11974 nameNsPush(ctxt, rootName, NULL, NULL, 0, 0);
11975 spacePush(ctxt, -1);
11976
11977 if (buildTree)
11978 nodePush(ctxt, root);
11979
11980 if (hasTextDecl) {
11981 xmlDetectEncoding(ctxt);
11982
11983 /*
11984 * Parse a possible text declaration first
11985 */
11986 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
11987 (IS_BLANK_CH(NXT(5)))) {
11988 xmlParseTextDecl(ctxt);
11989 /*
11990 * An XML-1.0 document can't reference an entity not XML-1.0
11991 */
11992 if ((xmlStrEqual(ctxt->version, BAD_CAST "1.0")) &&
11993 (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
11994 xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
11995 "Version mismatch between document and "
11996 "entity\n");
11997 }
11998 }
11999 }
12000
12001 xmlParseContentInternal(ctxt);
12002
12003 if (ctxt->input->cur < ctxt->input->end)
12004 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12005
12006 if ((ctxt->wellFormed) ||
12007 ((ctxt->recovery) && (ctxt->errNo != XML_ERR_NO_MEMORY))) {
12008 if (root != NULL) {
12009 xmlNodePtr cur;
12010
12011 /*
12012 * Return the newly created nodeset after unlinking it from
12013 * its pseudo parent.
12014 */
12015 cur = root->children;
12016 list = cur;
12017 while (cur != NULL) {
12018 cur->parent = NULL;
12019 cur = cur->next;
12020 }
12021 root->children = NULL;
12022 root->last = NULL;
12023 }
12024 }
12025
12026 /*
12027 * Read the rest of the stream in case of errors. We want
12028 * to account for the whole entity size.
12029 */
12030 do {
12031 ctxt->input->cur = ctxt->input->end;
12032 xmlParserShrink(ctxt);
12033 result = xmlParserGrow(ctxt);
12034 } while (result > 0);
12035
12036 if (buildTree)
12037 nodePop(ctxt);
12038
12039 namePop(ctxt);
12040 spacePop(ctxt);
12041
12042 /* xmlPopInput would free the stream */
12043 inputPop(ctxt);
12044
12045error:
12046 xmlFreeNode(root);
12047
12048 return(list);
12049}
12050
12051static void
12052xmlCtxtParseEntity(xmlParserCtxtPtr ctxt, xmlEntityPtr ent) {
12053 xmlParserInputPtr input;
12054 xmlNodePtr list;
12055 unsigned long consumed;
12056 int isExternal;
12057 int buildTree;
12058 int oldMinNsIndex;
12059 int oldNodelen, oldNodemem;
12060
12061 isExternal = (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY);
12062 buildTree = (ctxt->node != NULL);
12063
12064 /*
12065 * Recursion check
12066 */
12067 if (ent->flags & XML_ENT_EXPANDING) {
12068 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
12069 xmlHaltParser(ctxt);
12070 goto error;
12071 }
12072
12073 /*
12074 * Load entity
12075 */
12076 input = xmlNewEntityInputStream(ctxt, ent);
12077 if (input == NULL)
12078 goto error;
12079
12080 /*
12081 * When building a tree, we need to limit the scope of namespace
12082 * declarations, so that entities don't reference xmlNs structs
12083 * from the parent of a reference.
12084 */
12085 oldMinNsIndex = ctxt->nsdb->minNsIndex;
12086 if (buildTree)
12087 ctxt->nsdb->minNsIndex = ctxt->nsNr;
12088
12089 oldNodelen = ctxt->nodelen;
12090 oldNodemem = ctxt->nodemem;
12091 ctxt->nodelen = 0;
12092 ctxt->nodemem = 0;
12093
12094 /*
12095 * Parse content
12096 *
12097 * This initiates a recursive call chain:
12098 *
12099 * - xmlCtxtParseContent
12100 * - xmlParseContentInternal
12101 * - xmlParseReference
12102 * - xmlCtxtParseEntity
12103 *
12104 * The nesting depth is limited by the maximum number of inputs,
12105 * see xmlPushInput.
12106 *
12107 * It's possible to make this non-recursive (minNsIndex must be
12108 * stored in the input struct) at the expense of code readability.
12109 */
12110
12111 ent->flags |= XML_ENT_EXPANDING;
12112
12113 list = xmlCtxtParseContent(ctxt, input, isExternal, buildTree);
12114
12115 ent->flags &= ~XML_ENT_EXPANDING;
12116
12117 ctxt->nsdb->minNsIndex = oldMinNsIndex;
12118 ctxt->nodelen = oldNodelen;
12119 ctxt->nodemem = oldNodemem;
12120
12121 /*
12122 * Entity size accounting
12123 */
12124 consumed = input->consumed;
12125 xmlSaturatedAddSizeT(&consumed, input->end - input->base);
12126
12127 if ((ent->flags & XML_ENT_CHECKED) == 0)
12128 xmlSaturatedAdd(&ent->expandedSize, consumed);
12129
12130 if ((ent->flags & XML_ENT_PARSED) == 0) {
12131 if (isExternal)
12132 xmlSaturatedAdd(&ctxt->sizeentities, consumed);
12133
12134 ent->children = list;
12135
12136 while (list != NULL) {
12137 list->parent = (xmlNodePtr) ent;
12138 if (list->next == NULL)
12139 ent->last = list;
12140 list = list->next;
12141 }
12142 } else {
12143 xmlFreeNodeList(list);
12144 }
12145
12146 xmlFreeInputStream(input);
12147
12148error:
12149 ent->flags |= XML_ENT_PARSED | XML_ENT_CHECKED;
12150}
12151
12152/**
12153 * xmlParseCtxtExternalEntity:
12154 * @ctxt: the existing parsing context
12155 * @URL: the URL for the entity to load
12156 * @ID: the System ID for the entity to load
12157 * @listOut: the return value for the set of parsed nodes
12158 *
12159 * Parse an external general entity within an existing parsing context
12160 * An external general parsed entity is well-formed if it matches the
12161 * production labeled extParsedEnt.
12162 *
12163 * [78] extParsedEnt ::= TextDecl? content
12164 *
12165 * Returns 0 if the entity is well formed, -1 in case of args problem and
12166 * the parser error code otherwise
12167 */
12168
12169int
12170xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctxt, const xmlChar *URL,
12171 const xmlChar *ID, xmlNodePtr *listOut) {
12172 xmlParserInputPtr input;
12173 xmlNodePtr list;
12174
12175 if (listOut != NULL)
12176 *listOut = NULL;
12177
12178 if (ctxt == NULL)
12179 return(XML_ERR_ARGUMENT);
12180
12181 input = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
12182 if (input == NULL)
12183 return(ctxt->errNo);
12184
12185 xmlCtxtInitializeLate(ctxt);
12186
12187 list = xmlCtxtParseContent(ctxt, input, /* hasTextDecl */ 1, 1);
12188 if (listOut != NULL)
12189 *listOut = list;
12190 else
12191 xmlFreeNodeList(list);
12192
12193 xmlFreeInputStream(input);
12194 return(ctxt->errNo);
12195}
12196
12197#ifdef LIBXML_SAX1_ENABLED
12198/**
12199 * xmlParseExternalEntity:
12200 * @doc: the document the chunk pertains to
12201 * @sax: the SAX handler block (possibly NULL)
12202 * @user_data: The user data returned on SAX callbacks (possibly NULL)
12203 * @depth: Used for loop detection, use 0
12204 * @URL: the URL for the entity to load
12205 * @ID: the System ID for the entity to load
12206 * @list: the return value for the set of parsed nodes
12207 *
12208 * DEPRECATED: Use xmlParseCtxtExternalEntity.
12209 *
12210 * Parse an external general entity
12211 * An external general parsed entity is well-formed if it matches the
12212 * production labeled extParsedEnt.
12213 *
12214 * [78] extParsedEnt ::= TextDecl? content
12215 *
12216 * Returns 0 if the entity is well formed, -1 in case of args problem and
12217 * the parser error code otherwise
12218 */
12219
12220int
12221xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
12222 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *list) {
12223 xmlParserCtxtPtr ctxt;
12224 int ret;
12225
12226 if (list != NULL)
12227 *list = NULL;
12228
12229 if (doc == NULL)
12230 return(XML_ERR_ARGUMENT);
12231
12232 ctxt = xmlNewSAXParserCtxt(sax, user_data);
12233 if (ctxt == NULL)
12234 return(XML_ERR_NO_MEMORY);
12235
12236 ctxt->depth = depth;
12237 ctxt->myDoc = doc;
12238 ret = xmlParseCtxtExternalEntity(ctxt, URL, ID, list);
12239
12240 xmlFreeParserCtxt(ctxt);
12241 return(ret);
12242}
12243
12244/**
12245 * xmlParseBalancedChunkMemory:
12246 * @doc: the document the chunk pertains to (must not be NULL)
12247 * @sax: the SAX handler block (possibly NULL)
12248 * @user_data: The user data returned on SAX callbacks (possibly NULL)
12249 * @depth: Used for loop detection, use 0
12250 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
12251 * @lst: the return value for the set of parsed nodes
12252 *
12253 * Parse a well-balanced chunk of an XML document
12254 * called by the parser
12255 * The allowed sequence for the Well Balanced Chunk is the one defined by
12256 * the content production in the XML grammar:
12257 *
12258 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12259 *
12260 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
12261 * the parser error code otherwise
12262 */
12263
12264int
12265xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
12266 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
12267 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
12268 depth, string, lst, 0 );
12269}
12270#endif /* LIBXML_SAX1_ENABLED */
12271
12272/**
12273 * xmlParseInNodeContext:
12274 * @node: the context node
12275 * @data: the input string
12276 * @datalen: the input string length in bytes
12277 * @options: a combination of xmlParserOption
12278 * @lst: the return value for the set of parsed nodes
12279 *
12280 * Parse a well-balanced chunk of an XML document
12281 * within the context (DTD, namespaces, etc ...) of the given node.
12282 *
12283 * The allowed sequence for the data is a Well Balanced Chunk defined by
12284 * the content production in the XML grammar:
12285 *
12286 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12287 *
12288 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
12289 * error code otherwise
12290 */
12291xmlParserErrors
12292xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
12293 int options, xmlNodePtr *lst) {
12294 xmlParserCtxtPtr ctxt;
12295 xmlDocPtr doc = NULL;
12296 xmlNodePtr fake, cur;
12297 int nsnr = 0;
12298
12299 xmlParserErrors ret = XML_ERR_OK;
12300
12301 /*
12302 * check all input parameters, grab the document
12303 */
12304 if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
12305 return(XML_ERR_ARGUMENT);
12306 switch (node->type) {
12307 case XML_ELEMENT_NODE:
12308 case XML_ATTRIBUTE_NODE:
12309 case XML_TEXT_NODE:
12310 case XML_CDATA_SECTION_NODE:
12311 case XML_ENTITY_REF_NODE:
12312 case XML_PI_NODE:
12313 case XML_COMMENT_NODE:
12314 case XML_DOCUMENT_NODE:
12315 case XML_HTML_DOCUMENT_NODE:
12316 break;
12317 default:
12318 return(XML_ERR_INTERNAL_ERROR);
12319
12320 }
12321 while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
12322 (node->type != XML_DOCUMENT_NODE) &&
12323 (node->type != XML_HTML_DOCUMENT_NODE))
12324 node = node->parent;
12325 if (node == NULL)
12326 return(XML_ERR_INTERNAL_ERROR);
12327 if (node->type == XML_ELEMENT_NODE)
12328 doc = node->doc;
12329 else
12330 doc = (xmlDocPtr) node;
12331 if (doc == NULL)
12332 return(XML_ERR_INTERNAL_ERROR);
12333
12334 /*
12335 * allocate a context and set-up everything not related to the
12336 * node position in the tree
12337 */
12338 if (doc->type == XML_DOCUMENT_NODE)
12339 ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
12340#ifdef LIBXML_HTML_ENABLED
12341 else if (doc->type == XML_HTML_DOCUMENT_NODE) {
12342 ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
12343 /*
12344 * When parsing in context, it makes no sense to add implied
12345 * elements like html/body/etc...
12346 */
12347 options |= HTML_PARSE_NOIMPLIED;
12348 }
12349#endif
12350 else
12351 return(XML_ERR_INTERNAL_ERROR);
12352
12353 if (ctxt == NULL)
12354 return(XML_ERR_NO_MEMORY);
12355
12356 /*
12357 * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
12358 * We need a dictionary for xmlCtxtInitializeLate, so if there's no doc dict
12359 * we must wait until the last moment to free the original one.
12360 */
12361 if (doc->dict != NULL) {
12362 if (ctxt->dict != NULL)
12363 xmlDictFree(ctxt->dict);
12364 ctxt->dict = doc->dict;
12365 } else {
12366 options |= XML_PARSE_NODICT;
12367 ctxt->dictNames = 0;
12368 }
12369
12370 if (doc->encoding != NULL)
12371 xmlSwitchEncodingName(ctxt, (const char *) doc->encoding);
12372
12373 xmlCtxtUseOptions(ctxt, options);
12374 xmlCtxtInitializeLate(ctxt);
12375 ctxt->myDoc = doc;
12376 /* parsing in context, i.e. as within existing content */
12377 ctxt->input_id = 2;
12378
12379 /*
12380 * TODO: Use xmlCtxtParseContent
12381 */
12382
12383 fake = xmlNewDocComment(node->doc, NULL);
12384 if (fake == NULL) {
12385 xmlFreeParserCtxt(ctxt);
12386 return(XML_ERR_NO_MEMORY);
12387 }
12388 xmlAddChild(node, fake);
12389
12390 if (node->type == XML_ELEMENT_NODE)
12391 nodePush(ctxt, node);
12392
12393 if ((ctxt->html == 0) && (node->type == XML_ELEMENT_NODE)) {
12394 /*
12395 * initialize the SAX2 namespaces stack
12396 */
12397 cur = node;
12398 while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
12399 xmlNsPtr ns = cur->nsDef;
12400 xmlHashedString hprefix, huri;
12401
12402 while (ns != NULL) {
12403 hprefix = xmlDictLookupHashed(ctxt->dict, ns->prefix, -1);
12404 huri = xmlDictLookupHashed(ctxt->dict, ns->href, -1);
12405 if (xmlParserNsPush(ctxt, &hprefix, &huri, ns, 1) > 0)
12406 nsnr++;
12407 ns = ns->next;
12408 }
12409 cur = cur->parent;
12410 }
12411 }
12412
12413 if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
12414 /*
12415 * ID/IDREF registration will be done in xmlValidateElement below
12416 */
12417 ctxt->loadsubset |= XML_SKIP_IDS;
12418 }
12419
12420#ifdef LIBXML_HTML_ENABLED
12421 if (doc->type == XML_HTML_DOCUMENT_NODE)
12422 __htmlParseContent(ctxt);
12423 else
12424#endif
12425 xmlParseContentInternal(ctxt);
12426
12427 if (ctxt->input->cur < ctxt->input->end)
12428 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12429
12430 xmlParserNsPop(ctxt, nsnr);
12431
12432 if ((ctxt->wellFormed) ||
12433 ((ctxt->recovery) && (ctxt->errNo != XML_ERR_NO_MEMORY))) {
12434 ret = XML_ERR_OK;
12435 } else {
12436 ret = (xmlParserErrors) ctxt->errNo;
12437 }
12438
12439 /*
12440 * Return the newly created nodeset after unlinking it from
12441 * the pseudo sibling.
12442 */
12443
12444 cur = fake->next;
12445 fake->next = NULL;
12446 node->last = fake;
12447
12448 if (cur != NULL) {
12449 cur->prev = NULL;
12450 }
12451
12452 *lst = cur;
12453
12454 while (cur != NULL) {
12455 cur->parent = NULL;
12456 cur = cur->next;
12457 }
12458
12459 xmlUnlinkNode(fake);
12460 xmlFreeNode(fake);
12461
12462
12463 if (ret != XML_ERR_OK) {
12464 xmlFreeNodeList(*lst);
12465 *lst = NULL;
12466 }
12467
12468 if (doc->dict != NULL)
12469 ctxt->dict = NULL;
12470 xmlFreeParserCtxt(ctxt);
12471
12472 return(ret);
12473}
12474
12475#ifdef LIBXML_SAX1_ENABLED
12476/**
12477 * xmlParseBalancedChunkMemoryRecover:
12478 * @doc: the document the chunk pertains to (must not be NULL)
12479 * @sax: the SAX handler block (possibly NULL)
12480 * @user_data: The user data returned on SAX callbacks (possibly NULL)
12481 * @depth: Used for loop detection, use 0
12482 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
12483 * @listOut: the return value for the set of parsed nodes
12484 * @recover: return nodes even if the data is broken (use 0)
12485 *
12486 * Parse a well-balanced chunk of an XML document
12487 *
12488 * The allowed sequence for the Well Balanced Chunk is the one defined by
12489 * the content production in the XML grammar:
12490 *
12491 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12492 *
12493 * Returns 0 if the chunk is well balanced, or thehe parser error code
12494 * otherwise.
12495 *
12496 * In case recover is set to 1, the nodelist will not be empty even if
12497 * the parsed chunk is not well balanced, assuming the parsing succeeded to
12498 * some extent.
12499 */
12500int
12501xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
12502 void *user_data, int depth, const xmlChar *string, xmlNodePtr *listOut,
12503 int recover) {
12504 xmlParserCtxtPtr ctxt;
12505 xmlParserInputPtr input;
12506 xmlNodePtr list;
12507 int ret;
12508
12509 if (listOut != NULL)
12510 *listOut = NULL;
12511
12512 if (string == NULL)
12513 return(XML_ERR_ARGUMENT);
12514
12515 ctxt = xmlNewSAXParserCtxt(sax, user_data);
12516 if (ctxt == NULL)
12517 return(XML_ERR_NO_MEMORY);
12518
12519 xmlCtxtInitializeLate(ctxt);
12520
12521 ctxt->depth = depth;
12522 ctxt->myDoc = doc;
12523 if (recover) {
12524 ctxt->options |= XML_PARSE_RECOVER;
12525 ctxt->recovery = 1;
12526 }
12527
12528 input = xmlNewStringInputStream(ctxt, string);
12529 if (input == NULL)
12530 return(ctxt->errNo);
12531
12532 list = xmlCtxtParseContent(ctxt, input, /* hasTextDecl */ 0, 1);
12533 if (listOut != NULL)
12534 *listOut = list;
12535 else
12536 xmlFreeNodeList(list);
12537
12538 ret = ctxt->errNo;
12539
12540 xmlFreeInputStream(input);
12541 xmlFreeParserCtxt(ctxt);
12542 return(ret);
12543}
12544
12545/**
12546 * xmlSAXParseEntity:
12547 * @sax: the SAX handler block
12548 * @filename: the filename
12549 *
12550 * DEPRECATED: Don't use.
12551 *
12552 * parse an XML external entity out of context and build a tree.
12553 * It use the given SAX function block to handle the parsing callback.
12554 * If sax is NULL, fallback to the default DOM tree building routines.
12555 *
12556 * [78] extParsedEnt ::= TextDecl? content
12557 *
12558 * This correspond to a "Well Balanced" chunk
12559 *
12560 * Returns the resulting document tree
12561 */
12562
12563xmlDocPtr
12564xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
12565 xmlDocPtr ret;
12566 xmlParserCtxtPtr ctxt;
12567
12568 ctxt = xmlCreateFileParserCtxt(filename);
12569 if (ctxt == NULL) {
12570 return(NULL);
12571 }
12572 if (sax != NULL) {
12573 if (sax->initialized == XML_SAX2_MAGIC) {
12574 *ctxt->sax = *sax;
12575 } else {
12576 memset(ctxt->sax, 0, sizeof(*ctxt->sax));
12577 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
12578 }
12579 ctxt->userData = NULL;
12580 }
12581
12582 xmlParseExtParsedEnt(ctxt);
12583
12584 if (ctxt->wellFormed) {
12585 ret = ctxt->myDoc;
12586 } else {
12587 ret = NULL;
12588 xmlFreeDoc(ctxt->myDoc);
12589 }
12590
12591 xmlFreeParserCtxt(ctxt);
12592
12593 return(ret);
12594}
12595
12596/**
12597 * xmlParseEntity:
12598 * @filename: the filename
12599 *
12600 * parse an XML external entity out of context and build a tree.
12601 *
12602 * [78] extParsedEnt ::= TextDecl? content
12603 *
12604 * This correspond to a "Well Balanced" chunk
12605 *
12606 * Returns the resulting document tree
12607 */
12608
12609xmlDocPtr
12610xmlParseEntity(const char *filename) {
12611 return(xmlSAXParseEntity(NULL, filename));
12612}
12613#endif /* LIBXML_SAX1_ENABLED */
12614
12615/**
12616 * xmlCreateEntityParserCtxt:
12617 * @URL: the entity URL
12618 * @ID: the entity PUBLIC ID
12619 * @base: a possible base for the target URI
12620 *
12621 * DEPRECATED: Don't use.
12622 *
12623 * Create a parser context for an external entity
12624 * Automatic support for ZLIB/Compress compressed document is provided
12625 * by default if found at compile-time.
12626 *
12627 * Returns the new parser context or NULL
12628 */
12629xmlParserCtxtPtr
12630xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
12631 const xmlChar *base) {
12632 xmlParserCtxtPtr ctxt;
12633 xmlParserInputPtr input;
12634 xmlChar *uri = NULL;
12635
12636 ctxt = xmlNewParserCtxt();
12637 if (ctxt == NULL)
12638 return(NULL);
12639
12640 if (base != NULL) {
12641 if (xmlBuildURISafe(URL, base, &uri) < 0)
12642 goto error;
12643 if (uri != NULL)
12644 URL = uri;
12645 }
12646
12647 input = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
12648 if (input == NULL)
12649 goto error;
12650
12651 if (inputPush(ctxt, input) < 0)
12652 goto error;
12653
12654 xmlFree(uri);
12655 return(ctxt);
12656
12657error:
12658 xmlFree(uri);
12659 xmlFreeParserCtxt(ctxt);
12660 return(NULL);
12661}
12662
12663/************************************************************************
12664 * *
12665 * Front ends when parsing from a file *
12666 * *
12667 ************************************************************************/
12668
12669/**
12670 * xmlCreateURLParserCtxt:
12671 * @filename: the filename or URL
12672 * @options: a combination of xmlParserOption
12673 *
12674 * DEPRECATED: Use xmlNewParserCtxt and xmlCtxtReadFile.
12675 *
12676 * Create a parser context for a file or URL content.
12677 * Automatic support for ZLIB/Compress compressed document is provided
12678 * by default if found at compile-time and for file accesses
12679 *
12680 * Returns the new parser context or NULL
12681 */
12682xmlParserCtxtPtr
12683xmlCreateURLParserCtxt(const char *filename, int options)
12684{
12685 xmlParserCtxtPtr ctxt;
12686 xmlParserInputPtr input;
12687
12688 ctxt = xmlNewParserCtxt();
12689 if (ctxt == NULL)
12690 return(NULL);
12691
12692 xmlCtxtUseOptions(ctxt, options);
12693 ctxt->linenumbers = 1;
12694
12695 input = xmlLoadExternalEntity(filename, NULL, ctxt);
12696 if (input == NULL) {
12697 xmlFreeParserCtxt(ctxt);
12698 return(NULL);
12699 }
12700 inputPush(ctxt, input);
12701
12702 return(ctxt);
12703}
12704
12705/**
12706 * xmlCreateFileParserCtxt:
12707 * @filename: the filename
12708 *
12709 * DEPRECATED: Use xmlNewParserCtxt and xmlCtxtReadFile.
12710 *
12711 * Create a parser context for a file content.
12712 * Automatic support for ZLIB/Compress compressed document is provided
12713 * by default if found at compile-time.
12714 *
12715 * Returns the new parser context or NULL
12716 */
12717xmlParserCtxtPtr
12718xmlCreateFileParserCtxt(const char *filename)
12719{
12720 return(xmlCreateURLParserCtxt(filename, 0));
12721}
12722
12723#ifdef LIBXML_SAX1_ENABLED
12724/**
12725 * xmlSAXParseFileWithData:
12726 * @sax: the SAX handler block
12727 * @filename: the filename
12728 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
12729 * documents
12730 * @data: the userdata
12731 *
12732 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadFile.
12733 *
12734 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12735 * compressed document is provided by default if found at compile-time.
12736 * It use the given SAX function block to handle the parsing callback.
12737 * If sax is NULL, fallback to the default DOM tree building routines.
12738 *
12739 * User data (void *) is stored within the parser context in the
12740 * context's _private member, so it is available nearly everywhere in libxml
12741 *
12742 * Returns the resulting document tree
12743 */
12744
12745xmlDocPtr
12746xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
12747 int recovery, void *data) {
12748 xmlDocPtr ret;
12749 xmlParserCtxtPtr ctxt;
12750 xmlParserInputPtr input;
12751
12752 ctxt = xmlNewSAXParserCtxt(sax, NULL);
12753 if (ctxt == NULL)
12754 return(NULL);
12755
12756 if (data != NULL)
12757 ctxt->_private = data;
12758
12759 if (recovery) {
12760 ctxt->options |= XML_PARSE_RECOVER;
12761 ctxt->recovery = 1;
12762 }
12763
12764 if ((filename != NULL) && (filename[0] == '-') && (filename[1] == 0))
12765 input = xmlNewInputFd(ctxt, filename, STDIN_FILENO, NULL, 0);
12766 else
12767 input = xmlNewInputURL(ctxt, filename, NULL, NULL, 0);
12768
12769 ret = xmlCtxtParseDocument(ctxt, input);
12770
12771 xmlFreeParserCtxt(ctxt);
12772 return(ret);
12773}
12774
12775/**
12776 * xmlSAXParseFile:
12777 * @sax: the SAX handler block
12778 * @filename: the filename
12779 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
12780 * documents
12781 *
12782 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadFile.
12783 *
12784 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12785 * compressed document is provided by default if found at compile-time.
12786 * It use the given SAX function block to handle the parsing callback.
12787 * If sax is NULL, fallback to the default DOM tree building routines.
12788 *
12789 * Returns the resulting document tree
12790 */
12791
12792xmlDocPtr
12793xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
12794 int recovery) {
12795 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
12796}
12797
12798/**
12799 * xmlRecoverDoc:
12800 * @cur: a pointer to an array of xmlChar
12801 *
12802 * DEPRECATED: Use xmlReadDoc with XML_PARSE_RECOVER.
12803 *
12804 * parse an XML in-memory document and build a tree.
12805 * In the case the document is not Well Formed, a attempt to build a
12806 * tree is tried anyway
12807 *
12808 * Returns the resulting document tree or NULL in case of failure
12809 */
12810
12811xmlDocPtr
12812xmlRecoverDoc(const xmlChar *cur) {
12813 return(xmlSAXParseDoc(NULL, cur, 1));
12814}
12815
12816/**
12817 * xmlParseFile:
12818 * @filename: the filename
12819 *
12820 * DEPRECATED: Use xmlReadFile.
12821 *
12822 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12823 * compressed document is provided by default if found at compile-time.
12824 *
12825 * Returns the resulting document tree if the file was wellformed,
12826 * NULL otherwise.
12827 */
12828
12829xmlDocPtr
12830xmlParseFile(const char *filename) {
12831 return(xmlSAXParseFile(NULL, filename, 0));
12832}
12833
12834/**
12835 * xmlRecoverFile:
12836 * @filename: the filename
12837 *
12838 * DEPRECATED: Use xmlReadFile with XML_PARSE_RECOVER.
12839 *
12840 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12841 * compressed document is provided by default if found at compile-time.
12842 * In the case the document is not Well Formed, it attempts to build
12843 * a tree anyway
12844 *
12845 * Returns the resulting document tree or NULL in case of failure
12846 */
12847
12848xmlDocPtr
12849xmlRecoverFile(const char *filename) {
12850 return(xmlSAXParseFile(NULL, filename, 1));
12851}
12852
12853
12854/**
12855 * xmlSetupParserForBuffer:
12856 * @ctxt: an XML parser context
12857 * @buffer: a xmlChar * buffer
12858 * @filename: a file name
12859 *
12860 * DEPRECATED: Don't use.
12861 *
12862 * Setup the parser context to parse a new buffer; Clears any prior
12863 * contents from the parser context. The buffer parameter must not be
12864 * NULL, but the filename parameter can be
12865 */
12866void
12867xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
12868 const char* filename)
12869{
12870 xmlParserInputPtr input;
12871
12872 if ((ctxt == NULL) || (buffer == NULL))
12873 return;
12874
12875 xmlClearParserCtxt(ctxt);
12876
12877 input = xmlNewInputString(ctxt, filename, (const char *) buffer, NULL, 0);
12878 if (input == NULL)
12879 return;
12880 inputPush(ctxt, input);
12881}
12882
12883/**
12884 * xmlSAXUserParseFile:
12885 * @sax: a SAX handler
12886 * @user_data: The user data returned on SAX callbacks
12887 * @filename: a file name
12888 *
12889 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadFile.
12890 *
12891 * parse an XML file and call the given SAX handler routines.
12892 * Automatic support for ZLIB/Compress compressed document is provided
12893 *
12894 * Returns 0 in case of success or a error number otherwise
12895 */
12896int
12897xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
12898 const char *filename) {
12899 int ret = 0;
12900 xmlParserCtxtPtr ctxt;
12901
12902 ctxt = xmlCreateFileParserCtxt(filename);
12903 if (ctxt == NULL) return -1;
12904 if (sax != NULL) {
12905 if (sax->initialized == XML_SAX2_MAGIC) {
12906 *ctxt->sax = *sax;
12907 } else {
12908 memset(ctxt->sax, 0, sizeof(*ctxt->sax));
12909 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
12910 }
12911 ctxt->userData = user_data;
12912 }
12913
12914 xmlParseDocument(ctxt);
12915
12916 if (ctxt->wellFormed)
12917 ret = 0;
12918 else {
12919 if (ctxt->errNo != 0)
12920 ret = ctxt->errNo;
12921 else
12922 ret = -1;
12923 }
12924 if (ctxt->myDoc != NULL) {
12925 xmlFreeDoc(ctxt->myDoc);
12926 ctxt->myDoc = NULL;
12927 }
12928 xmlFreeParserCtxt(ctxt);
12929
12930 return ret;
12931}
12932#endif /* LIBXML_SAX1_ENABLED */
12933
12934/************************************************************************
12935 * *
12936 * Front ends when parsing from memory *
12937 * *
12938 ************************************************************************/
12939
12940/**
12941 * xmlCreateMemoryParserCtxt:
12942 * @buffer: a pointer to a char array
12943 * @size: the size of the array
12944 *
12945 * Create a parser context for an XML in-memory document. The input buffer
12946 * must not contain a terminating null byte.
12947 *
12948 * Returns the new parser context or NULL
12949 */
12950xmlParserCtxtPtr
12951xmlCreateMemoryParserCtxt(const char *buffer, int size) {
12952 xmlParserCtxtPtr ctxt;
12953 xmlParserInputPtr input;
12954
12955 if (size < 0)
12956 return(NULL);
12957
12958 ctxt = xmlNewParserCtxt();
12959 if (ctxt == NULL)
12960 return(NULL);
12961
12962 input = xmlNewInputMemory(ctxt, NULL, buffer, size, NULL, 0);
12963 if (input == NULL) {
12964 xmlFreeParserCtxt(ctxt);
12965 return(NULL);
12966 }
12967 inputPush(ctxt, input);
12968
12969 return(ctxt);
12970}
12971
12972#ifdef LIBXML_SAX1_ENABLED
12973/**
12974 * xmlSAXParseMemoryWithData:
12975 * @sax: the SAX handler block
12976 * @buffer: an pointer to a char array
12977 * @size: the size of the array
12978 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
12979 * documents
12980 * @data: the userdata
12981 *
12982 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadMemory.
12983 *
12984 * parse an XML in-memory block and use the given SAX function block
12985 * to handle the parsing callback. If sax is NULL, fallback to the default
12986 * DOM tree building routines.
12987 *
12988 * User data (void *) is stored within the parser context in the
12989 * context's _private member, so it is available nearly everywhere in libxml
12990 *
12991 * Returns the resulting document tree
12992 */
12993
12994xmlDocPtr
12995xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
12996 int size, int recovery, void *data) {
12997 xmlDocPtr ret;
12998 xmlParserCtxtPtr ctxt;
12999 xmlParserInputPtr input;
13000
13001 if (size < 0)
13002 return(NULL);
13003
13004 ctxt = xmlNewSAXParserCtxt(sax, NULL);
13005 if (ctxt == NULL)
13006 return(NULL);
13007
13008 if (data != NULL)
13009 ctxt->_private=data;
13010
13011 if (recovery) {
13012 ctxt->options |= XML_PARSE_RECOVER;
13013 ctxt->recovery = 1;
13014 }
13015
13016 input = xmlNewInputMemory(ctxt, NULL, buffer, size, NULL,
13017 XML_INPUT_BUF_STATIC);
13018
13019 ret = xmlCtxtParseDocument(ctxt, input);
13020
13021 xmlFreeParserCtxt(ctxt);
13022 return(ret);
13023}
13024
13025/**
13026 * xmlSAXParseMemory:
13027 * @sax: the SAX handler block
13028 * @buffer: an pointer to a char array
13029 * @size: the size of the array
13030 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
13031 * documents
13032 *
13033 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadMemory.
13034 *
13035 * parse an XML in-memory block and use the given SAX function block
13036 * to handle the parsing callback. If sax is NULL, fallback to the default
13037 * DOM tree building routines.
13038 *
13039 * Returns the resulting document tree
13040 */
13041xmlDocPtr
13042xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
13043 int size, int recovery) {
13044 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
13045}
13046
13047/**
13048 * xmlParseMemory:
13049 * @buffer: an pointer to a char array
13050 * @size: the size of the array
13051 *
13052 * DEPRECATED: Use xmlReadMemory.
13053 *
13054 * parse an XML in-memory block and build a tree.
13055 *
13056 * Returns the resulting document tree
13057 */
13058
13059xmlDocPtr xmlParseMemory(const char *buffer, int size) {
13060 return(xmlSAXParseMemory(NULL, buffer, size, 0));
13061}
13062
13063/**
13064 * xmlRecoverMemory:
13065 * @buffer: an pointer to a char array
13066 * @size: the size of the array
13067 *
13068 * DEPRECATED: Use xmlReadMemory with XML_PARSE_RECOVER.
13069 *
13070 * parse an XML in-memory block and build a tree.
13071 * In the case the document is not Well Formed, an attempt to
13072 * build a tree is tried anyway
13073 *
13074 * Returns the resulting document tree or NULL in case of error
13075 */
13076
13077xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
13078 return(xmlSAXParseMemory(NULL, buffer, size, 1));
13079}
13080
13081/**
13082 * xmlSAXUserParseMemory:
13083 * @sax: a SAX handler
13084 * @user_data: The user data returned on SAX callbacks
13085 * @buffer: an in-memory XML document input
13086 * @size: the length of the XML document in bytes
13087 *
13088 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadMemory.
13089 *
13090 * parse an XML in-memory buffer and call the given SAX handler routines.
13091 *
13092 * Returns 0 in case of success or a error number otherwise
13093 */
13094int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
13095 const char *buffer, int size) {
13096 int ret = 0;
13097 xmlParserCtxtPtr ctxt;
13098
13099 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
13100 if (ctxt == NULL) return -1;
13101 if (sax != NULL) {
13102 if (sax->initialized == XML_SAX2_MAGIC) {
13103 *ctxt->sax = *sax;
13104 } else {
13105 memset(ctxt->sax, 0, sizeof(*ctxt->sax));
13106 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
13107 }
13108 ctxt->userData = user_data;
13109 }
13110
13111 xmlParseDocument(ctxt);
13112
13113 if (ctxt->wellFormed)
13114 ret = 0;
13115 else {
13116 if (ctxt->errNo != 0)
13117 ret = ctxt->errNo;
13118 else
13119 ret = -1;
13120 }
13121 if (ctxt->myDoc != NULL) {
13122 xmlFreeDoc(ctxt->myDoc);
13123 ctxt->myDoc = NULL;
13124 }
13125 xmlFreeParserCtxt(ctxt);
13126
13127 return ret;
13128}
13129#endif /* LIBXML_SAX1_ENABLED */
13130
13131/**
13132 * xmlCreateDocParserCtxt:
13133 * @str: a pointer to an array of xmlChar
13134 *
13135 * Creates a parser context for an XML in-memory document.
13136 *
13137 * Returns the new parser context or NULL
13138 */
13139xmlParserCtxtPtr
13140xmlCreateDocParserCtxt(const xmlChar *str) {
13141 xmlParserCtxtPtr ctxt;
13142 xmlParserInputPtr input;
13143
13144 ctxt = xmlNewParserCtxt();
13145 if (ctxt == NULL)
13146 return(NULL);
13147
13148 input = xmlNewInputString(ctxt, NULL, (const char *) str, NULL, 0);
13149 if (input == NULL) {
13150 xmlFreeParserCtxt(ctxt);
13151 return(NULL);
13152 }
13153 inputPush(ctxt, input);
13154
13155 return(ctxt);
13156}
13157
13158#ifdef LIBXML_SAX1_ENABLED
13159/**
13160 * xmlSAXParseDoc:
13161 * @sax: the SAX handler block
13162 * @cur: a pointer to an array of xmlChar
13163 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
13164 * documents
13165 *
13166 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadDoc.
13167 *
13168 * parse an XML in-memory document and build a tree.
13169 * It use the given SAX function block to handle the parsing callback.
13170 * If sax is NULL, fallback to the default DOM tree building routines.
13171 *
13172 * Returns the resulting document tree
13173 */
13174
13175xmlDocPtr
13176xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
13177 xmlDocPtr ret;
13178 xmlParserCtxtPtr ctxt;
13179 xmlSAXHandlerPtr oldsax = NULL;
13180
13181 if (cur == NULL) return(NULL);
13182
13183
13184 ctxt = xmlCreateDocParserCtxt(cur);
13185 if (ctxt == NULL) return(NULL);
13186 if (sax != NULL) {
13187 oldsax = ctxt->sax;
13188 ctxt->sax = sax;
13189 ctxt->userData = NULL;
13190 }
13191
13192 xmlParseDocument(ctxt);
13193 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
13194 else {
13195 ret = NULL;
13196 xmlFreeDoc(ctxt->myDoc);
13197 ctxt->myDoc = NULL;
13198 }
13199 if (sax != NULL)
13200 ctxt->sax = oldsax;
13201 xmlFreeParserCtxt(ctxt);
13202
13203 return(ret);
13204}
13205
13206/**
13207 * xmlParseDoc:
13208 * @cur: a pointer to an array of xmlChar
13209 *
13210 * DEPRECATED: Use xmlReadDoc.
13211 *
13212 * parse an XML in-memory document and build a tree.
13213 *
13214 * Returns the resulting document tree
13215 */
13216
13217xmlDocPtr
13218xmlParseDoc(const xmlChar *cur) {
13219 return(xmlSAXParseDoc(NULL, cur, 0));
13220}
13221#endif /* LIBXML_SAX1_ENABLED */
13222
13223/************************************************************************
13224 * *
13225 * New set (2.6.0) of simpler and more flexible APIs *
13226 * *
13227 ************************************************************************/
13228
13229/**
13230 * DICT_FREE:
13231 * @str: a string
13232 *
13233 * Free a string if it is not owned by the "dict" dictionary in the
13234 * current scope
13235 */
13236#define DICT_FREE(str) \
13237 if ((str) && ((!dict) || \
13238 (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
13239 xmlFree((char *)(str));
13240
13241/**
13242 * xmlCtxtReset:
13243 * @ctxt: an XML parser context
13244 *
13245 * Reset a parser context
13246 */
13247void
13248xmlCtxtReset(xmlParserCtxtPtr ctxt)
13249{
13250 xmlParserInputPtr input;
13251 xmlDictPtr dict;
13252
13253 if (ctxt == NULL)
13254 return;
13255
13256 dict = ctxt->dict;
13257
13258 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
13259 xmlFreeInputStream(input);
13260 }
13261 ctxt->inputNr = 0;
13262 ctxt->input = NULL;
13263
13264 ctxt->spaceNr = 0;
13265 if (ctxt->spaceTab != NULL) {
13266 ctxt->spaceTab[0] = -1;
13267 ctxt->space = &ctxt->spaceTab[0];
13268 } else {
13269 ctxt->space = NULL;
13270 }
13271
13272
13273 ctxt->nodeNr = 0;
13274 ctxt->node = NULL;
13275
13276 ctxt->nameNr = 0;
13277 ctxt->name = NULL;
13278
13279 ctxt->nsNr = 0;
13280 xmlParserNsReset(ctxt->nsdb);
13281
13282 DICT_FREE(ctxt->version);
13283 ctxt->version = NULL;
13284 DICT_FREE(ctxt->encoding);
13285 ctxt->encoding = NULL;
13286 DICT_FREE(ctxt->extSubURI);
13287 ctxt->extSubURI = NULL;
13288 DICT_FREE(ctxt->extSubSystem);
13289 ctxt->extSubSystem = NULL;
13290
13291 if (ctxt->directory != NULL) {
13292 xmlFree(ctxt->directory);
13293 ctxt->directory = NULL;
13294 }
13295
13296 if (ctxt->myDoc != NULL)
13297 xmlFreeDoc(ctxt->myDoc);
13298 ctxt->myDoc = NULL;
13299
13300 ctxt->standalone = -1;
13301 ctxt->hasExternalSubset = 0;
13302 ctxt->hasPErefs = 0;
13303 ctxt->html = 0;
13304 ctxt->instate = XML_PARSER_START;
13305
13306 ctxt->wellFormed = 1;
13307 ctxt->nsWellFormed = 1;
13308 ctxt->disableSAX = 0;
13309 ctxt->valid = 1;
13310#if 0
13311 ctxt->vctxt.userData = ctxt;
13312 ctxt->vctxt.error = xmlParserValidityError;
13313 ctxt->vctxt.warning = xmlParserValidityWarning;
13314#endif
13315 ctxt->record_info = 0;
13316 ctxt->checkIndex = 0;
13317 ctxt->endCheckState = 0;
13318 ctxt->inSubset = 0;
13319 ctxt->errNo = XML_ERR_OK;
13320 ctxt->depth = 0;
13321 ctxt->catalogs = NULL;
13322 ctxt->sizeentities = 0;
13323 ctxt->sizeentcopy = 0;
13324 xmlInitNodeInfoSeq(&ctxt->node_seq);
13325
13326 if (ctxt->attsDefault != NULL) {
13327 xmlHashFree(ctxt->attsDefault, xmlHashDefaultDeallocator);
13328 ctxt->attsDefault = NULL;
13329 }
13330 if (ctxt->attsSpecial != NULL) {
13331 xmlHashFree(ctxt->attsSpecial, NULL);
13332 ctxt->attsSpecial = NULL;
13333 }
13334
13335#ifdef LIBXML_CATALOG_ENABLED
13336 if (ctxt->catalogs != NULL)
13337 xmlCatalogFreeLocal(ctxt->catalogs);
13338#endif
13339 ctxt->nbErrors = 0;
13340 ctxt->nbWarnings = 0;
13341 if (ctxt->lastError.code != XML_ERR_OK)
13342 xmlResetError(&ctxt->lastError);
13343}
13344
13345/**
13346 * xmlCtxtResetPush:
13347 * @ctxt: an XML parser context
13348 * @chunk: a pointer to an array of chars
13349 * @size: number of chars in the array
13350 * @filename: an optional file name or URI
13351 * @encoding: the document encoding, or NULL
13352 *
13353 * Reset a push parser context
13354 *
13355 * Returns 0 in case of success and 1 in case of error
13356 */
13357int
13358xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
13359 int size, const char *filename, const char *encoding)
13360{
13361 xmlParserInputPtr input;
13362
13363 if (ctxt == NULL)
13364 return(1);
13365
13366 xmlCtxtReset(ctxt);
13367
13368 input = xmlNewInputPush(ctxt, filename, chunk, size, encoding);
13369 if (input == NULL)
13370 return(1);
13371 inputPush(ctxt, input);
13372
13373 return(0);
13374}
13375
13376static int
13377xmlCtxtSetOptionsInternal(xmlParserCtxtPtr ctxt, int options, int keepMask)
13378{
13379 int allMask;
13380
13381 if (ctxt == NULL)
13382 return(-1);
13383
13384 /*
13385 * XInclude options aren't handled by the parser.
13386 *
13387 * XML_PARSE_XINCLUDE
13388 * XML_PARSE_NOXINCNODE
13389 * XML_PARSE_NOBASEFIX
13390 */
13391 allMask = XML_PARSE_RECOVER |
13392 XML_PARSE_NOENT |
13393 XML_PARSE_DTDLOAD |
13394 XML_PARSE_DTDATTR |
13395 XML_PARSE_DTDVALID |
13396 XML_PARSE_NOERROR |
13397 XML_PARSE_NOWARNING |
13398 XML_PARSE_PEDANTIC |
13399 XML_PARSE_NOBLANKS |
13400#ifdef LIBXML_SAX1_ENABLED
13401 XML_PARSE_SAX1 |
13402#endif
13403 XML_PARSE_NONET |
13404 XML_PARSE_NODICT |
13405 XML_PARSE_NSCLEAN |
13406 XML_PARSE_NOCDATA |
13407 XML_PARSE_COMPACT |
13408 XML_PARSE_OLD10 |
13409 XML_PARSE_HUGE |
13410 XML_PARSE_OLDSAX |
13411 XML_PARSE_IGNORE_ENC |
13412 XML_PARSE_BIG_LINES |
13413 XML_PARSE_NO_XXE;
13414
13415 ctxt->options = (ctxt->options & keepMask) | (options & allMask);
13416
13417 /*
13418 * For some options, struct members are historically the source
13419 * of truth. The values are initalized from global variables and
13420 * old code could also modify them directly. Several older API
13421 * functions that don't take an options argument rely on these
13422 * deprecated mechanisms.
13423 *
13424 * Once public access to struct members and the globals are
13425 * disabled, we can use the options bitmask as source of
13426 * truth, making all these struct members obsolete.
13427 *
13428 * The XML_DETECT_IDS flags is misnamed. It simply enables
13429 * loading of the external subset.
13430 */
13431 ctxt->recovery = (options & XML_PARSE_RECOVER) ? 1 : 0;
13432 ctxt->replaceEntities = (options & XML_PARSE_NOENT) ? 1 : 0;
13433 ctxt->loadsubset = (options & XML_PARSE_DTDLOAD) ? XML_DETECT_IDS : 0;
13434 ctxt->loadsubset |= (options & XML_PARSE_DTDATTR) ? XML_COMPLETE_ATTRS : 0;
13435 ctxt->validate = (options & XML_PARSE_DTDVALID) ? 1 : 0;
13436 ctxt->pedantic = (options & XML_PARSE_PEDANTIC) ? 1 : 0;
13437 ctxt->keepBlanks = (options & XML_PARSE_NOBLANKS) ? 0 : 1;
13438 ctxt->dictNames = (options & XML_PARSE_NODICT) ? 0 : 1;
13439
13440 /*
13441 * Changing SAX callbacks is a bad idea. This should be fixed.
13442 */
13443 if (options & XML_PARSE_NOBLANKS) {
13444 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
13445 }
13446 if (options & XML_PARSE_NOCDATA) {
13447 ctxt->sax->cdataBlock = NULL;
13448 }
13449 if (options & XML_PARSE_HUGE) {
13450 if (ctxt->dict != NULL)
13451 xmlDictSetLimit(ctxt->dict, 0);
13452 }
13453
13454 ctxt->linenumbers = 1;
13455
13456 return(options & ~allMask);
13457}
13458
13459/**
13460 * xmlCtxtSetOptions:
13461 * @ctxt: an XML parser context
13462 * @options: a bitmask of xmlParserOption values
13463 *
13464 * Applies the options to the parser context. Unset options are
13465 * cleared.
13466 *
13467 * Available since 2.13.0. With older versions, you can use
13468 * xmlCtxtUseOptions.
13469 *
13470 * XML_PARSE_RECOVER
13471 *
13472 * Enable "recovery" mode which allows non-wellformed documents.
13473 * How this mode behaves exactly is unspecified and may change
13474 * without further notice. Use of this feature is DISCOURAGED.
13475 *
13476 * XML_PARSE_NOENT
13477 *
13478 * Despite the confusing name, this option enables substitution
13479 * of entities. The resulting tree won't contain any entity
13480 * reference nodes.
13481 *
13482 * This option also enables loading of external entities (both
13483 * general and parameter entities) which is dangerous. If you
13484 * process untrusted data, it's recommended to set the
13485 * XML_PARSE_NO_XXE option to disable loading of external
13486 * entities.
13487 *
13488 * XML_PARSE_DTDLOAD
13489 *
13490 * Enables loading of an external DTD and the loading and
13491 * substitution of external parameter entities. Has no effect
13492 * if XML_PARSE_NO_XXE is set.
13493 *
13494 * XML_PARSE_DTDATTR
13495 *
13496 * Adds default attributes from the DTD to the result document.
13497 *
13498 * Implies XML_PARSE_DTDLOAD, but loading of external content
13499 * can be disabled with XML_PARSE_NO_XXE.
13500 *
13501 * XML_PARSE_DTDVALID
13502 *
13503 * This option enables DTD validation which requires to load
13504 * external DTDs and external entities (both general and
13505 * parameter entities) unless XML_PARSE_NO_XXE was set.
13506 *
13507 * XML_PARSE_NO_XXE
13508 *
13509 * Disables loading of external DTDs or entities.
13510 *
13511 * XML_PARSE_NOERROR
13512 *
13513 * Disable error and warning reports to the error handlers.
13514 * Errors are still accessible with xmlCtxtGetLastError.
13515 *
13516 * XML_PARSE_NOWARNING
13517 *
13518 * Disable warning reports.
13519 *
13520 * XML_PARSE_PEDANTIC
13521 *
13522 * Enable some pedantic warnings.
13523 *
13524 * XML_PARSE_NOBLANKS
13525 *
13526 * Remove some text nodes containing only whitespace from the
13527 * result document. Which nodes are removed depends on DTD
13528 * element declarations or a conservative heuristic. The
13529 * reindenting feature of the serialization code relies on this
13530 * option to be set when parsing. Use of this option is
13531 * DISCOURAGED.
13532 *
13533 * XML_PARSE_SAX1
13534 *
13535 * Always invoke the deprecated SAX1 startElement and endElement
13536 * handlers. This option is DEPRECATED.
13537 *
13538 * XML_PARSE_NONET
13539 *
13540 * Disable network access with the builtin HTTP and FTP clients.
13541 *
13542 * XML_PARSE_NODICT
13543 *
13544 * Create a document without interned strings, making all
13545 * strings separate memory allocations.
13546 *
13547 * XML_PARSE_NSCLEAN
13548 *
13549 * Remove redundant namespace declarations from the result
13550 * document.
13551 *
13552 * XML_PARSE_NOCDATA
13553 *
13554 * Output normal text nodes instead of CDATA nodes.
13555 *
13556 * XML_PARSE_COMPACT
13557 *
13558 * Store small strings directly in the node struct to save
13559 * memory.
13560 *
13561 * XML_PARSE_OLD10
13562 *
13563 * Use old Name productions from before XML 1.0 Fifth Edition.
13564 * This options is DEPRECATED.
13565 *
13566 * XML_PARSE_HUGE
13567 *
13568 * Relax some internal limits.
13569 *
13570 * Maximum size of text nodes, tags, comments, processing instructions,
13571 * CDATA sections, entity values
13572 *
13573 * normal: 10M
13574 * huge: 1B
13575 *
13576 * Maximum size of names, system literals, pubid literals
13577 *
13578 * normal: 50K
13579 * huge: 10M
13580 *
13581 * Maximum nesting depth of elements
13582 *
13583 * normal: 256
13584 * huge: 2048
13585 *
13586 * Maximum nesting depth of entities
13587 *
13588 * normal: 20
13589 * huge: 40
13590 *
13591 * XML_PARSE_OLDSAX
13592 *
13593 * Enable an unspecified legacy mode for SAX parsers. This
13594 * option is DEPRECATED.
13595 *
13596 * XML_PARSE_IGNORE_ENC
13597 *
13598 * Ignore the encoding in the XML declaration. This option is
13599 * mostly unneeded these days. The only effect is to enforce
13600 * UTF-8 decoding of ASCII-like data.
13601 *
13602 * XML_PARSE_BIG_LINES
13603 *
13604 * Enable reporting of line numbers larger than 65535.
13605 *
13606 * Returns 0 in case of success, the set of unknown or unimplemented options
13607 * in case of error.
13608 */
13609int
13610xmlCtxtSetOptions(xmlParserCtxtPtr ctxt, int options)
13611{
13612 return(xmlCtxtSetOptionsInternal(ctxt, options, 0));
13613}
13614
13615/**
13616 * xmlCtxtUseOptions:
13617 * @ctxt: an XML parser context
13618 * @options: a combination of xmlParserOption
13619 *
13620 * DEPRECATED: Use xmlCtxtSetOptions.
13621 *
13622 * Applies the options to the parser context. The following options
13623 * are never cleared and can only be enabled:
13624 *
13625 * XML_PARSE_NOERROR
13626 * XML_PARSE_NOWARNING
13627 * XML_PARSE_NONET
13628 * XML_PARSE_NSCLEAN
13629 * XML_PARSE_NOCDATA
13630 * XML_PARSE_COMPACT
13631 * XML_PARSE_OLD10
13632 * XML_PARSE_HUGE
13633 * XML_PARSE_OLDSAX
13634 * XML_PARSE_IGNORE_ENC
13635 * XML_PARSE_BIG_LINES
13636 *
13637 * Returns 0 in case of success, the set of unknown or unimplemented options
13638 * in case of error.
13639 */
13640int
13641xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
13642{
13643 int keepMask;
13644
13645 /*
13646 * For historic reasons, some options can only be enabled.
13647 */
13648 keepMask = XML_PARSE_NOERROR |
13649 XML_PARSE_NOWARNING |
13650 XML_PARSE_NONET |
13651 XML_PARSE_NSCLEAN |
13652 XML_PARSE_NOCDATA |
13653 XML_PARSE_COMPACT |
13654 XML_PARSE_OLD10 |
13655 XML_PARSE_HUGE |
13656 XML_PARSE_OLDSAX |
13657 XML_PARSE_IGNORE_ENC |
13658 XML_PARSE_BIG_LINES;
13659
13660 return(xmlCtxtSetOptionsInternal(ctxt, options, keepMask));
13661}
13662
13663/**
13664 * xmlCtxtSetMaxAmplification:
13665 * @ctxt: an XML parser context
13666 * @maxAmpl: maximum amplification factor
13667 *
13668 * To protect against exponential entity expansion ("billion laughs"), the
13669 * size of serialized output is (roughly) limited to the input size
13670 * multiplied by this factor. The default value is 5.
13671 *
13672 * When working with documents making heavy use of entity expansion, it can
13673 * be necessary to increase the value. For security reasons, this should only
13674 * be considered when processing trusted input.
13675 */
13676void
13677xmlCtxtSetMaxAmplification(xmlParserCtxtPtr ctxt, unsigned maxAmpl)
13678{
13679 ctxt->maxAmpl = maxAmpl;
13680}
13681
13682/**
13683 * xmlCtxtParseDocument:
13684 * @ctxt: an XML parser context
13685 * @input: parser input
13686 *
13687 * Parse an XML document and return the resulting document tree.
13688 * Takes ownership of the input object.
13689 *
13690 * Available since 2.13.0.
13691 *
13692 * Returns the resulting document tree or NULL
13693 */
13694xmlDocPtr
13695xmlCtxtParseDocument(xmlParserCtxtPtr ctxt, xmlParserInputPtr input)
13696{
13697 xmlDocPtr ret = NULL;
13698
13699 if ((ctxt == NULL) || (input == NULL))
13700 return(NULL);
13701
13702 /* assert(ctxt->inputNr == 0); */
13703 while (ctxt->inputNr > 0)
13704 xmlFreeInputStream(inputPop(ctxt));
13705
13706 if (inputPush(ctxt, input) < 0) {
13707 xmlFreeInputStream(input);
13708 return(NULL);
13709 }
13710
13711 xmlParseDocument(ctxt);
13712
13713 if ((ctxt->wellFormed) ||
13714 ((ctxt->recovery) && (ctxt->errNo != XML_ERR_NO_MEMORY))) {
13715 ret = ctxt->myDoc;
13716 } else {
13717 if (ctxt->errNo == XML_ERR_OK)
13718 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR, "unknown error\n");
13719
13720 ret = NULL;
13721 xmlFreeDoc(ctxt->myDoc);
13722 }
13723 ctxt->myDoc = NULL;
13724
13725 /* assert(ctxt->inputNr == 1); */
13726 while (ctxt->inputNr > 0)
13727 xmlFreeInputStream(inputPop(ctxt));
13728
13729 return(ret);
13730}
13731
13732/**
13733 * xmlReadDoc:
13734 * @cur: a pointer to a zero terminated string
13735 * @URL: base URL (optional)
13736 * @encoding: the document encoding (optional)
13737 * @options: a combination of xmlParserOption
13738 *
13739 * Convenience function to parse an XML document from a
13740 * zero-terminated string.
13741 *
13742 * See xmlCtxtReadDoc for details.
13743 *
13744 * Returns the resulting document tree
13745 */
13746xmlDocPtr
13747xmlReadDoc(const xmlChar *cur, const char *URL, const char *encoding,
13748 int options)
13749{
13750 xmlParserCtxtPtr ctxt;
13751 xmlParserInputPtr input;
13752 xmlDocPtr doc;
13753
13754 ctxt = xmlNewParserCtxt();
13755 if (ctxt == NULL)
13756 return(NULL);
13757
13758 xmlCtxtUseOptions(ctxt, options);
13759
13760 input = xmlNewInputString(ctxt, URL, (const char *) cur, encoding,
13761 XML_INPUT_BUF_STATIC);
13762
13763 doc = xmlCtxtParseDocument(ctxt, input);
13764
13765 xmlFreeParserCtxt(ctxt);
13766 return(doc);
13767}
13768
13769/**
13770 * xmlReadFile:
13771 * @filename: a file or URL
13772 * @encoding: the document encoding (optional)
13773 * @options: a combination of xmlParserOption
13774 *
13775 * Convenience function to parse an XML file from the filesystem,
13776 * the network or a global user-define resource loader.
13777 *
13778 * See xmlCtxtReadFile for details.
13779 *
13780 * Returns the resulting document tree
13781 */
13782xmlDocPtr
13783xmlReadFile(const char *filename, const char *encoding, int options)
13784{
13785 xmlParserCtxtPtr ctxt;
13786 xmlParserInputPtr input;
13787 xmlDocPtr doc;
13788
13789 ctxt = xmlNewParserCtxt();
13790 if (ctxt == NULL)
13791 return(NULL);
13792
13793 xmlCtxtUseOptions(ctxt, options);
13794
13795 /*
13796 * Backward compatibility for users of command line utilities like
13797 * xmlstarlet expecting "-" to mean stdin. This is dangerous and
13798 * should be removed at some point.
13799 */
13800 if ((filename != NULL) && (filename[0] == '-') && (filename[1] == 0))
13801 input = xmlNewInputFd(ctxt, filename, STDIN_FILENO, encoding, 0);
13802 else
13803 input = xmlNewInputURL(ctxt, filename, NULL, encoding, 0);
13804
13805 doc = xmlCtxtParseDocument(ctxt, input);
13806
13807 xmlFreeParserCtxt(ctxt);
13808 return(doc);
13809}
13810
13811/**
13812 * xmlReadMemory:
13813 * @buffer: a pointer to a char array
13814 * @size: the size of the array
13815 * @url: base URL (optional)
13816 * @encoding: the document encoding (optional)
13817 * @options: a combination of xmlParserOption
13818 *
13819 * Parse an XML in-memory document and build a tree. The input buffer must
13820 * not contain a terminating null byte.
13821 *
13822 * See xmlCtxtReadMemory for details.
13823 *
13824 * Returns the resulting document tree
13825 */
13826xmlDocPtr
13827xmlReadMemory(const char *buffer, int size, const char *url,
13828 const char *encoding, int options)
13829{
13830 xmlParserCtxtPtr ctxt;
13831 xmlParserInputPtr input;
13832 xmlDocPtr doc;
13833
13834 if (size < 0)
13835 return(NULL);
13836
13837 ctxt = xmlNewParserCtxt();
13838 if (ctxt == NULL)
13839 return(NULL);
13840
13841 xmlCtxtUseOptions(ctxt, options);
13842
13843 input = xmlNewInputMemory(ctxt, url, buffer, size, encoding,
13844 XML_INPUT_BUF_STATIC);
13845
13846 doc = xmlCtxtParseDocument(ctxt, input);
13847
13848 xmlFreeParserCtxt(ctxt);
13849 return(doc);
13850}
13851
13852/**
13853 * xmlReadFd:
13854 * @fd: an open file descriptor
13855 * @URL: base URL (optional)
13856 * @encoding: the document encoding (optional)
13857 * @options: a combination of xmlParserOption
13858 *
13859 * Parse an XML from a file descriptor and build a tree.
13860 *
13861 * See xmlCtxtReadFd for details.
13862 *
13863 * NOTE that the file descriptor will not be closed when the
13864 * context is freed or reset.
13865 *
13866 * Returns the resulting document tree
13867 */
13868xmlDocPtr
13869xmlReadFd(int fd, const char *URL, const char *encoding, int options)
13870{
13871 xmlParserCtxtPtr ctxt;
13872 xmlParserInputPtr input;
13873 xmlDocPtr doc;
13874
13875 ctxt = xmlNewParserCtxt();
13876 if (ctxt == NULL)
13877 return(NULL);
13878
13879 xmlCtxtUseOptions(ctxt, options);
13880
13881 input = xmlNewInputFd(ctxt, URL, fd, encoding, 0);
13882
13883 doc = xmlCtxtParseDocument(ctxt, input);
13884
13885 xmlFreeParserCtxt(ctxt);
13886 return(doc);
13887}
13888
13889/**
13890 * xmlReadIO:
13891 * @ioread: an I/O read function
13892 * @ioclose: an I/O close function (optional)
13893 * @ioctx: an I/O handler
13894 * @URL: base URL (optional)
13895 * @encoding: the document encoding (optional)
13896 * @options: a combination of xmlParserOption
13897 *
13898 * Parse an XML document from I/O functions and context and build a tree.
13899 *
13900 * See xmlCtxtReadIO for details.
13901 *
13902 * Returns the resulting document tree
13903 */
13904xmlDocPtr
13905xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
13906 void *ioctx, const char *URL, const char *encoding, int options)
13907{
13908 xmlParserCtxtPtr ctxt;
13909 xmlParserInputPtr input;
13910 xmlDocPtr doc;
13911
13912 ctxt = xmlNewParserCtxt();
13913 if (ctxt == NULL)
13914 return(NULL);
13915
13916 xmlCtxtUseOptions(ctxt, options);
13917
13918 input = xmlNewInputIO(ctxt, URL, ioread, ioclose, ioctx, encoding, 0);
13919
13920 doc = xmlCtxtParseDocument(ctxt, input);
13921
13922 xmlFreeParserCtxt(ctxt);
13923 return(doc);
13924}
13925
13926/**
13927 * xmlCtxtReadDoc:
13928 * @ctxt: an XML parser context
13929 * @str: a pointer to a zero terminated string
13930 * @URL: base URL (optional)
13931 * @encoding: the document encoding (optional)
13932 * @options: a combination of xmlParserOption
13933 *
13934 * Parse an XML in-memory document and build a tree.
13935 *
13936 * @URL is used as base to resolve external entities and for error
13937 * reporting.
13938 *
13939 * See xmlCtxtUseOptions for details.
13940 *
13941 * Returns the resulting document tree
13942 */
13943xmlDocPtr
13944xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar *str,
13945 const char *URL, const char *encoding, int options)
13946{
13947 xmlParserInputPtr input;
13948
13949 if (ctxt == NULL)
13950 return(NULL);
13951
13952 xmlCtxtReset(ctxt);
13953 xmlCtxtUseOptions(ctxt, options);
13954
13955 input = xmlNewInputString(ctxt, URL, (const char *) str, encoding,
13956 XML_INPUT_BUF_STATIC);
13957
13958 return(xmlCtxtParseDocument(ctxt, input));
13959}
13960
13961/**
13962 * xmlCtxtReadFile:
13963 * @ctxt: an XML parser context
13964 * @filename: a file or URL
13965 * @encoding: the document encoding (optional)
13966 * @options: a combination of xmlParserOption
13967 *
13968 * Parse an XML file from the filesystem, the network or a user-defined
13969 * resource loader.
13970 *
13971 * Returns the resulting document tree
13972 */
13973xmlDocPtr
13974xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
13975 const char *encoding, int options)
13976{
13977 xmlParserInputPtr input;
13978
13979 if (ctxt == NULL)
13980 return(NULL);
13981
13982 xmlCtxtReset(ctxt);
13983 xmlCtxtUseOptions(ctxt, options);
13984
13985 input = xmlNewInputURL(ctxt, filename, NULL, encoding, 0);
13986
13987 return(xmlCtxtParseDocument(ctxt, input));
13988}
13989
13990/**
13991 * xmlCtxtReadMemory:
13992 * @ctxt: an XML parser context
13993 * @buffer: a pointer to a char array
13994 * @size: the size of the array
13995 * @URL: base URL (optional)
13996 * @encoding: the document encoding (optional)
13997 * @options: a combination of xmlParserOption
13998 *
13999 * Parse an XML in-memory document and build a tree. The input buffer must
14000 * not contain a terminating null byte.
14001 *
14002 * @URL is used as base to resolve external entities and for error
14003 * reporting.
14004 *
14005 * See xmlCtxtUseOptions for details.
14006 *
14007 * Returns the resulting document tree
14008 */
14009xmlDocPtr
14010xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
14011 const char *URL, const char *encoding, int options)
14012{
14013 xmlParserInputPtr input;
14014
14015 if ((ctxt == NULL) || (size < 0))
14016 return(NULL);
14017
14018 xmlCtxtReset(ctxt);
14019 xmlCtxtUseOptions(ctxt, options);
14020
14021 input = xmlNewInputMemory(ctxt, URL, buffer, size, encoding,
14022 XML_INPUT_BUF_STATIC);
14023
14024 return(xmlCtxtParseDocument(ctxt, input));
14025}
14026
14027/**
14028 * xmlCtxtReadFd:
14029 * @ctxt: an XML parser context
14030 * @fd: an open file descriptor
14031 * @URL: base URL (optional)
14032 * @encoding: the document encoding (optional)
14033 * @options: a combination of xmlParserOption
14034 *
14035 * Parse an XML document from a file descriptor and build a tree.
14036 *
14037 * NOTE that the file descriptor will not be closed when the
14038 * context is freed or reset.
14039 *
14040 * @URL is used as base to resolve external entities and for error
14041 * reporting.
14042 *
14043 * See xmlCtxtUseOptions for details.
14044 *
14045 * Returns the resulting document tree
14046 */
14047xmlDocPtr
14048xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
14049 const char *URL, const char *encoding, int options)
14050{
14051 xmlParserInputPtr input;
14052
14053 if (ctxt == NULL)
14054 return(NULL);
14055
14056 xmlCtxtReset(ctxt);
14057 xmlCtxtUseOptions(ctxt, options);
14058
14059 input = xmlNewInputFd(ctxt, URL, fd, encoding, 0);
14060
14061 return(xmlCtxtParseDocument(ctxt, input));
14062}
14063
14064/**
14065 * xmlCtxtReadIO:
14066 * @ctxt: an XML parser context
14067 * @ioread: an I/O read function
14068 * @ioclose: an I/O close function
14069 * @ioctx: an I/O handler
14070 * @URL: the base URL to use for the document
14071 * @encoding: the document encoding, or NULL
14072 * @options: a combination of xmlParserOption
14073 *
14074 * parse an XML document from I/O functions and source and build a tree.
14075 * This reuses the existing @ctxt parser context
14076 *
14077 * @URL is used as base to resolve external entities and for error
14078 * reporting.
14079 *
14080 * See xmlCtxtUseOptions for details.
14081 *
14082 * Returns the resulting document tree
14083 */
14084xmlDocPtr
14085xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
14086 xmlInputCloseCallback ioclose, void *ioctx,
14087 const char *URL,
14088 const char *encoding, int options)
14089{
14090 xmlParserInputPtr input;
14091
14092 if (ctxt == NULL)
14093 return(NULL);
14094
14095 xmlCtxtReset(ctxt);
14096 xmlCtxtUseOptions(ctxt, options);
14097
14098 input = xmlNewInputIO(ctxt, URL, ioread, ioclose, ioctx, encoding, 0);
14099
14100 return(xmlCtxtParseDocument(ctxt, input));
14101}
14102
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette