VirtualBox

source: vbox/trunk/src/libs/libxml2-2.9.14/parser.c@ 98843

Last change on this file since 98843 was 95312, checked in by vboxsync, 3 years ago

libs/{curl,libxml2}: OSE export fixes, bugref:8515

  • Property svn:eol-style set to native
File size: 430.0 KB
Line 
1/*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
15 * high level APIs to call the parser and a few miscellaneous functions.
16 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
25 * from the SAX callbacks or as standalone functions using a preparsed
26 * document.
27 *
28 * See Copyright for the status of this software.
29 *
30 * [email protected]
31 */
32
33/* To avoid EBCDIC trouble when parsing on zOS */
34#if defined(__MVS__)
35#pragma convert("ISO8859-1")
36#endif
37
38#define IN_LIBXML
39#include "libxml.h"
40
41#if defined(_WIN32) && !defined (__CYGWIN__)
42#define XML_DIR_SEP '\\'
43#else
44#define XML_DIR_SEP '/'
45#endif
46
47#include <stdlib.h>
48#include <limits.h>
49#include <string.h>
50#include <stdarg.h>
51#include <stddef.h>
52#include <libxml/xmlmemory.h>
53#include <libxml/threads.h>
54#include <libxml/globals.h>
55#include <libxml/tree.h>
56#include <libxml/parser.h>
57#include <libxml/parserInternals.h>
58#include <libxml/valid.h>
59#include <libxml/entities.h>
60#include <libxml/xmlerror.h>
61#include <libxml/encoding.h>
62#include <libxml/xmlIO.h>
63#include <libxml/uri.h>
64#ifdef LIBXML_CATALOG_ENABLED
65#include <libxml/catalog.h>
66#endif
67#ifdef LIBXML_SCHEMAS_ENABLED
68#include <libxml/xmlschemastypes.h>
69#include <libxml/relaxng.h>
70#endif
71#ifdef HAVE_CTYPE_H
72#include <ctype.h>
73#endif
74#ifdef HAVE_STDLIB_H
75#include <stdlib.h>
76#endif
77#ifdef HAVE_SYS_STAT_H
78#include <sys/stat.h>
79#endif
80#ifdef HAVE_FCNTL_H
81#include <fcntl.h>
82#endif
83#ifdef HAVE_UNISTD_H
84#include <unistd.h>
85#endif
86
87#include "buf.h"
88#include "enc.h"
89
90struct _xmlStartTag {
91 const xmlChar *prefix;
92 const xmlChar *URI;
93 int line;
94 int nsNr;
95};
96
97static void
98xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info);
99
100static xmlParserCtxtPtr
101xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
102 const xmlChar *base, xmlParserCtxtPtr pctx);
103
104static void xmlHaltParser(xmlParserCtxtPtr ctxt);
105
106static int
107xmlParseElementStart(xmlParserCtxtPtr ctxt);
108
109static void
110xmlParseElementEnd(xmlParserCtxtPtr ctxt);
111
112/************************************************************************
113 * *
114 * Arbitrary limits set in the parser. See XML_PARSE_HUGE *
115 * *
116 ************************************************************************/
117
118#define XML_PARSER_BIG_ENTITY 1000
119#define XML_PARSER_LOT_ENTITY 5000
120
121/*
122 * XML_PARSER_NON_LINEAR is the threshold where the ratio of parsed entity
123 * replacement over the size in byte of the input indicates that you have
124 * and exponential behaviour. A value of 10 correspond to at least 3 entity
125 * replacement per byte of input.
126 */
127#define XML_PARSER_NON_LINEAR 10
128
129/*
130 * xmlParserEntityCheck
131 *
132 * Function to check non-linear entity expansion behaviour
133 * This is here to detect and stop exponential linear entity expansion
134 * This is not a limitation of the parser but a safety
135 * boundary feature. It can be disabled with the XML_PARSE_HUGE
136 * parser option.
137 */
138static int
139xmlParserEntityCheck(xmlParserCtxtPtr ctxt, size_t size,
140 xmlEntityPtr ent, size_t replacement)
141{
142 size_t consumed = 0;
143 int i;
144
145 if ((ctxt == NULL) || (ctxt->options & XML_PARSE_HUGE))
146 return (0);
147 if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
148 return (1);
149
150 /*
151 * This may look absurd but is needed to detect
152 * entities problems
153 */
154 if ((ent != NULL) && (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
155 (ent->content != NULL) && (ent->checked == 0) &&
156 (ctxt->errNo != XML_ERR_ENTITY_LOOP)) {
157 unsigned long oldnbent = ctxt->nbentities, diff;
158 xmlChar *rep;
159
160 ent->checked = 1;
161
162 ++ctxt->depth;
163 rep = xmlStringDecodeEntities(ctxt, ent->content,
164 XML_SUBSTITUTE_REF, 0, 0, 0);
165 --ctxt->depth;
166 if ((rep == NULL) || (ctxt->errNo == XML_ERR_ENTITY_LOOP)) {
167 ent->content[0] = 0;
168 }
169
170 diff = ctxt->nbentities - oldnbent + 1;
171 if (diff > INT_MAX / 2)
172 diff = INT_MAX / 2;
173 ent->checked = diff * 2;
174 if (rep != NULL) {
175 if (xmlStrchr(rep, '<'))
176 ent->checked |= 1;
177 xmlFree(rep);
178 rep = NULL;
179 }
180 }
181
182 /*
183 * Prevent entity exponential check, not just replacement while
184 * parsing the DTD
185 * The check is potentially costly so do that only once in a thousand
186 */
187 if ((ctxt->instate == XML_PARSER_DTD) && (ctxt->nbentities > 10000) &&
188 (ctxt->nbentities % 1024 == 0)) {
189 for (i = 0;i < ctxt->inputNr;i++) {
190 consumed += ctxt->inputTab[i]->consumed +
191 (ctxt->inputTab[i]->cur - ctxt->inputTab[i]->base);
192 }
193 if (ctxt->nbentities > consumed * XML_PARSER_NON_LINEAR) {
194 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
195 ctxt->instate = XML_PARSER_EOF;
196 return (1);
197 }
198 consumed = 0;
199 }
200
201
202
203 if (replacement != 0) {
204 if (replacement < XML_MAX_TEXT_LENGTH)
205 return(0);
206
207 /*
208 * If the volume of entity copy reaches 10 times the
209 * amount of parsed data and over the large text threshold
210 * then that's very likely to be an abuse.
211 */
212 if (ctxt->input != NULL) {
213 consumed = ctxt->input->consumed +
214 (ctxt->input->cur - ctxt->input->base);
215 }
216 consumed += ctxt->sizeentities;
217
218 if (replacement < XML_PARSER_NON_LINEAR * consumed)
219 return(0);
220 } else if (size != 0) {
221 /*
222 * Do the check based on the replacement size of the entity
223 */
224 if (size < XML_PARSER_BIG_ENTITY)
225 return(0);
226
227 /*
228 * A limit on the amount of text data reasonably used
229 */
230 if (ctxt->input != NULL) {
231 consumed = ctxt->input->consumed +
232 (ctxt->input->cur - ctxt->input->base);
233 }
234 consumed += ctxt->sizeentities;
235
236 if ((size < XML_PARSER_NON_LINEAR * consumed) &&
237 (ctxt->nbentities * 3 < XML_PARSER_NON_LINEAR * consumed))
238 return (0);
239 } else if (ent != NULL) {
240 /*
241 * use the number of parsed entities in the replacement
242 */
243 size = ent->checked / 2;
244
245 /*
246 * The amount of data parsed counting entities size only once
247 */
248 if (ctxt->input != NULL) {
249 consumed = ctxt->input->consumed +
250 (ctxt->input->cur - ctxt->input->base);
251 }
252 consumed += ctxt->sizeentities;
253
254 /*
255 * Check the density of entities for the amount of data
256 * knowing an entity reference will take at least 3 bytes
257 */
258 if (size * 3 < consumed * XML_PARSER_NON_LINEAR)
259 return (0);
260 } else {
261 /*
262 * strange we got no data for checking
263 */
264 if (((ctxt->lastError.code != XML_ERR_UNDECLARED_ENTITY) &&
265 (ctxt->lastError.code != XML_WAR_UNDECLARED_ENTITY)) ||
266 (ctxt->nbentities <= 10000))
267 return (0);
268 }
269 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
270 return (1);
271}
272
273/**
274 * xmlParserMaxDepth:
275 *
276 * arbitrary depth limit for the XML documents that we allow to
277 * process. This is not a limitation of the parser but a safety
278 * boundary feature. It can be disabled with the XML_PARSE_HUGE
279 * parser option.
280 */
281unsigned int xmlParserMaxDepth = 256;
282
283
284
285#define SAX2 1
286#define XML_PARSER_BIG_BUFFER_SIZE 300
287#define XML_PARSER_BUFFER_SIZE 100
288#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
289
290/**
291 * XML_PARSER_CHUNK_SIZE
292 *
293 * When calling GROW that's the minimal amount of data
294 * the parser expected to have received. It is not a hard
295 * limit but an optimization when reading strings like Names
296 * It is not strictly needed as long as inputs available characters
297 * are followed by 0, which should be provided by the I/O level
298 */
299#define XML_PARSER_CHUNK_SIZE 100
300
301/*
302 * List of XML prefixed PI allowed by W3C specs
303 */
304
305static const char *xmlW3CPIs[] = {
306 "xml-stylesheet",
307 "xml-model",
308 NULL
309};
310
311
312/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
313static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
314 const xmlChar **str);
315
316static xmlParserErrors
317xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
318 xmlSAXHandlerPtr sax,
319 void *user_data, int depth, const xmlChar *URL,
320 const xmlChar *ID, xmlNodePtr *list);
321
322static int
323xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options,
324 const char *encoding);
325#ifdef LIBXML_LEGACY_ENABLED
326static void
327xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
328 xmlNodePtr lastNode);
329#endif /* LIBXML_LEGACY_ENABLED */
330
331static xmlParserErrors
332xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
333 const xmlChar *string, void *user_data, xmlNodePtr *lst);
334
335static int
336xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
337
338/************************************************************************
339 * *
340 * Some factorized error routines *
341 * *
342 ************************************************************************/
343
344/**
345 * xmlErrAttributeDup:
346 * @ctxt: an XML parser context
347 * @prefix: the attribute prefix
348 * @localname: the attribute localname
349 *
350 * Handle a redefinition of attribute error
351 */
352static void
353xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
354 const xmlChar * localname)
355{
356 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
357 (ctxt->instate == XML_PARSER_EOF))
358 return;
359 if (ctxt != NULL)
360 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
361
362 if (prefix == NULL)
363 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
364 XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
365 (const char *) localname, NULL, NULL, 0, 0,
366 "Attribute %s redefined\n", localname);
367 else
368 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
369 XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
370 (const char *) prefix, (const char *) localname,
371 NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
372 localname);
373 if (ctxt != NULL) {
374 ctxt->wellFormed = 0;
375 if (ctxt->recovery == 0)
376 ctxt->disableSAX = 1;
377 }
378}
379
380/**
381 * xmlFatalErr:
382 * @ctxt: an XML parser context
383 * @error: the error number
384 * @extra: extra information string
385 *
386 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
387 */
388static void
389xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
390{
391 const char *errmsg;
392
393 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
394 (ctxt->instate == XML_PARSER_EOF))
395 return;
396 switch (error) {
397 case XML_ERR_INVALID_HEX_CHARREF:
398 errmsg = "CharRef: invalid hexadecimal value";
399 break;
400 case XML_ERR_INVALID_DEC_CHARREF:
401 errmsg = "CharRef: invalid decimal value";
402 break;
403 case XML_ERR_INVALID_CHARREF:
404 errmsg = "CharRef: invalid value";
405 break;
406 case XML_ERR_INTERNAL_ERROR:
407 errmsg = "internal error";
408 break;
409 case XML_ERR_PEREF_AT_EOF:
410 errmsg = "PEReference at end of document";
411 break;
412 case XML_ERR_PEREF_IN_PROLOG:
413 errmsg = "PEReference in prolog";
414 break;
415 case XML_ERR_PEREF_IN_EPILOG:
416 errmsg = "PEReference in epilog";
417 break;
418 case XML_ERR_PEREF_NO_NAME:
419 errmsg = "PEReference: no name";
420 break;
421 case XML_ERR_PEREF_SEMICOL_MISSING:
422 errmsg = "PEReference: expecting ';'";
423 break;
424 case XML_ERR_ENTITY_LOOP:
425 errmsg = "Detected an entity reference loop";
426 break;
427 case XML_ERR_ENTITY_NOT_STARTED:
428 errmsg = "EntityValue: \" or ' expected";
429 break;
430 case XML_ERR_ENTITY_PE_INTERNAL:
431 errmsg = "PEReferences forbidden in internal subset";
432 break;
433 case XML_ERR_ENTITY_NOT_FINISHED:
434 errmsg = "EntityValue: \" or ' expected";
435 break;
436 case XML_ERR_ATTRIBUTE_NOT_STARTED:
437 errmsg = "AttValue: \" or ' expected";
438 break;
439 case XML_ERR_LT_IN_ATTRIBUTE:
440 errmsg = "Unescaped '<' not allowed in attributes values";
441 break;
442 case XML_ERR_LITERAL_NOT_STARTED:
443 errmsg = "SystemLiteral \" or ' expected";
444 break;
445 case XML_ERR_LITERAL_NOT_FINISHED:
446 errmsg = "Unfinished System or Public ID \" or ' expected";
447 break;
448 case XML_ERR_MISPLACED_CDATA_END:
449 errmsg = "Sequence ']]>' not allowed in content";
450 break;
451 case XML_ERR_URI_REQUIRED:
452 errmsg = "SYSTEM or PUBLIC, the URI is missing";
453 break;
454 case XML_ERR_PUBID_REQUIRED:
455 errmsg = "PUBLIC, the Public Identifier is missing";
456 break;
457 case XML_ERR_HYPHEN_IN_COMMENT:
458 errmsg = "Comment must not contain '--' (double-hyphen)";
459 break;
460 case XML_ERR_PI_NOT_STARTED:
461 errmsg = "xmlParsePI : no target name";
462 break;
463 case XML_ERR_RESERVED_XML_NAME:
464 errmsg = "Invalid PI name";
465 break;
466 case XML_ERR_NOTATION_NOT_STARTED:
467 errmsg = "NOTATION: Name expected here";
468 break;
469 case XML_ERR_NOTATION_NOT_FINISHED:
470 errmsg = "'>' required to close NOTATION declaration";
471 break;
472 case XML_ERR_VALUE_REQUIRED:
473 errmsg = "Entity value required";
474 break;
475 case XML_ERR_URI_FRAGMENT:
476 errmsg = "Fragment not allowed";
477 break;
478 case XML_ERR_ATTLIST_NOT_STARTED:
479 errmsg = "'(' required to start ATTLIST enumeration";
480 break;
481 case XML_ERR_NMTOKEN_REQUIRED:
482 errmsg = "NmToken expected in ATTLIST enumeration";
483 break;
484 case XML_ERR_ATTLIST_NOT_FINISHED:
485 errmsg = "')' required to finish ATTLIST enumeration";
486 break;
487 case XML_ERR_MIXED_NOT_STARTED:
488 errmsg = "MixedContentDecl : '|' or ')*' expected";
489 break;
490 case XML_ERR_PCDATA_REQUIRED:
491 errmsg = "MixedContentDecl : '#PCDATA' expected";
492 break;
493 case XML_ERR_ELEMCONTENT_NOT_STARTED:
494 errmsg = "ContentDecl : Name or '(' expected";
495 break;
496 case XML_ERR_ELEMCONTENT_NOT_FINISHED:
497 errmsg = "ContentDecl : ',' '|' or ')' expected";
498 break;
499 case XML_ERR_PEREF_IN_INT_SUBSET:
500 errmsg =
501 "PEReference: forbidden within markup decl in internal subset";
502 break;
503 case XML_ERR_GT_REQUIRED:
504 errmsg = "expected '>'";
505 break;
506 case XML_ERR_CONDSEC_INVALID:
507 errmsg = "XML conditional section '[' expected";
508 break;
509 case XML_ERR_EXT_SUBSET_NOT_FINISHED:
510 errmsg = "Content error in the external subset";
511 break;
512 case XML_ERR_CONDSEC_INVALID_KEYWORD:
513 errmsg =
514 "conditional section INCLUDE or IGNORE keyword expected";
515 break;
516 case XML_ERR_CONDSEC_NOT_FINISHED:
517 errmsg = "XML conditional section not closed";
518 break;
519 case XML_ERR_XMLDECL_NOT_STARTED:
520 errmsg = "Text declaration '<?xml' required";
521 break;
522 case XML_ERR_XMLDECL_NOT_FINISHED:
523 errmsg = "parsing XML declaration: '?>' expected";
524 break;
525 case XML_ERR_EXT_ENTITY_STANDALONE:
526 errmsg = "external parsed entities cannot be standalone";
527 break;
528 case XML_ERR_ENTITYREF_SEMICOL_MISSING:
529 errmsg = "EntityRef: expecting ';'";
530 break;
531 case XML_ERR_DOCTYPE_NOT_FINISHED:
532 errmsg = "DOCTYPE improperly terminated";
533 break;
534 case XML_ERR_LTSLASH_REQUIRED:
535 errmsg = "EndTag: '</' not found";
536 break;
537 case XML_ERR_EQUAL_REQUIRED:
538 errmsg = "expected '='";
539 break;
540 case XML_ERR_STRING_NOT_CLOSED:
541 errmsg = "String not closed expecting \" or '";
542 break;
543 case XML_ERR_STRING_NOT_STARTED:
544 errmsg = "String not started expecting ' or \"";
545 break;
546 case XML_ERR_ENCODING_NAME:
547 errmsg = "Invalid XML encoding name";
548 break;
549 case XML_ERR_STANDALONE_VALUE:
550 errmsg = "standalone accepts only 'yes' or 'no'";
551 break;
552 case XML_ERR_DOCUMENT_EMPTY:
553 errmsg = "Document is empty";
554 break;
555 case XML_ERR_DOCUMENT_END:
556 errmsg = "Extra content at the end of the document";
557 break;
558 case XML_ERR_NOT_WELL_BALANCED:
559 errmsg = "chunk is not well balanced";
560 break;
561 case XML_ERR_EXTRA_CONTENT:
562 errmsg = "extra content at the end of well balanced chunk";
563 break;
564 case XML_ERR_VERSION_MISSING:
565 errmsg = "Malformed declaration expecting version";
566 break;
567 case XML_ERR_NAME_TOO_LONG:
568 errmsg = "Name too long use XML_PARSE_HUGE option";
569 break;
570#if 0
571 case:
572 errmsg = "";
573 break;
574#endif
575 default:
576 errmsg = "Unregistered error message";
577 }
578 if (ctxt != NULL)
579 ctxt->errNo = error;
580 if (info == NULL) {
581 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
582 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s\n",
583 errmsg);
584 } else {
585 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
586 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s: %s\n",
587 errmsg, info);
588 }
589 if (ctxt != NULL) {
590 ctxt->wellFormed = 0;
591 if (ctxt->recovery == 0)
592 ctxt->disableSAX = 1;
593 }
594}
595
596/**
597 * xmlFatalErrMsg:
598 * @ctxt: an XML parser context
599 * @error: the error number
600 * @msg: the error message
601 *
602 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
603 */
604static void LIBXML_ATTR_FORMAT(3,0)
605xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
606 const char *msg)
607{
608 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
609 (ctxt->instate == XML_PARSER_EOF))
610 return;
611 if (ctxt != NULL)
612 ctxt->errNo = error;
613 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
614 XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, "%s", msg);
615 if (ctxt != NULL) {
616 ctxt->wellFormed = 0;
617 if (ctxt->recovery == 0)
618 ctxt->disableSAX = 1;
619 }
620}
621
622/**
623 * xmlWarningMsg:
624 * @ctxt: an XML parser context
625 * @error: the error number
626 * @msg: the error message
627 * @str1: extra data
628 * @str2: extra data
629 *
630 * Handle a warning.
631 */
632static void LIBXML_ATTR_FORMAT(3,0)
633xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
634 const char *msg, const xmlChar *str1, const xmlChar *str2)
635{
636 xmlStructuredErrorFunc schannel = NULL;
637
638 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
639 (ctxt->instate == XML_PARSER_EOF))
640 return;
641 if ((ctxt != NULL) && (ctxt->sax != NULL) &&
642 (ctxt->sax->initialized == XML_SAX2_MAGIC))
643 schannel = ctxt->sax->serror;
644 if (ctxt != NULL) {
645 __xmlRaiseError(schannel,
646 (ctxt->sax) ? ctxt->sax->warning : NULL,
647 ctxt->userData,
648 ctxt, NULL, XML_FROM_PARSER, error,
649 XML_ERR_WARNING, NULL, 0,
650 (const char *) str1, (const char *) str2, NULL, 0, 0,
651 msg, (const char *) str1, (const char *) str2);
652 } else {
653 __xmlRaiseError(schannel, NULL, NULL,
654 ctxt, NULL, XML_FROM_PARSER, error,
655 XML_ERR_WARNING, NULL, 0,
656 (const char *) str1, (const char *) str2, NULL, 0, 0,
657 msg, (const char *) str1, (const char *) str2);
658 }
659}
660
661/**
662 * xmlValidityError:
663 * @ctxt: an XML parser context
664 * @error: the error number
665 * @msg: the error message
666 * @str1: extra data
667 *
668 * Handle a validity error.
669 */
670static void LIBXML_ATTR_FORMAT(3,0)
671xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
672 const char *msg, const xmlChar *str1, const xmlChar *str2)
673{
674 xmlStructuredErrorFunc schannel = NULL;
675
676 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
677 (ctxt->instate == XML_PARSER_EOF))
678 return;
679 if (ctxt != NULL) {
680 ctxt->errNo = error;
681 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
682 schannel = ctxt->sax->serror;
683 }
684 if (ctxt != NULL) {
685 __xmlRaiseError(schannel,
686 ctxt->vctxt.error, ctxt->vctxt.userData,
687 ctxt, NULL, XML_FROM_DTD, error,
688 XML_ERR_ERROR, NULL, 0, (const char *) str1,
689 (const char *) str2, NULL, 0, 0,
690 msg, (const char *) str1, (const char *) str2);
691 ctxt->valid = 0;
692 } else {
693 __xmlRaiseError(schannel, NULL, NULL,
694 ctxt, NULL, XML_FROM_DTD, error,
695 XML_ERR_ERROR, NULL, 0, (const char *) str1,
696 (const char *) str2, NULL, 0, 0,
697 msg, (const char *) str1, (const char *) str2);
698 }
699}
700
701/**
702 * xmlFatalErrMsgInt:
703 * @ctxt: an XML parser context
704 * @error: the error number
705 * @msg: the error message
706 * @val: an integer value
707 *
708 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
709 */
710static void LIBXML_ATTR_FORMAT(3,0)
711xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
712 const char *msg, int val)
713{
714 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
715 (ctxt->instate == XML_PARSER_EOF))
716 return;
717 if (ctxt != NULL)
718 ctxt->errNo = error;
719 __xmlRaiseError(NULL, NULL, NULL,
720 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
721 NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
722 if (ctxt != NULL) {
723 ctxt->wellFormed = 0;
724 if (ctxt->recovery == 0)
725 ctxt->disableSAX = 1;
726 }
727}
728
729/**
730 * xmlFatalErrMsgStrIntStr:
731 * @ctxt: an XML parser context
732 * @error: the error number
733 * @msg: the error message
734 * @str1: an string info
735 * @val: an integer value
736 * @str2: an string info
737 *
738 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
739 */
740static void LIBXML_ATTR_FORMAT(3,0)
741xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
742 const char *msg, const xmlChar *str1, int val,
743 const xmlChar *str2)
744{
745 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
746 (ctxt->instate == XML_PARSER_EOF))
747 return;
748 if (ctxt != NULL)
749 ctxt->errNo = error;
750 __xmlRaiseError(NULL, NULL, NULL,
751 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
752 NULL, 0, (const char *) str1, (const char *) str2,
753 NULL, val, 0, msg, str1, val, str2);
754 if (ctxt != NULL) {
755 ctxt->wellFormed = 0;
756 if (ctxt->recovery == 0)
757 ctxt->disableSAX = 1;
758 }
759}
760
761/**
762 * xmlFatalErrMsgStr:
763 * @ctxt: an XML parser context
764 * @error: the error number
765 * @msg: the error message
766 * @val: a string value
767 *
768 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
769 */
770static void LIBXML_ATTR_FORMAT(3,0)
771xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
772 const char *msg, const xmlChar * val)
773{
774 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
775 (ctxt->instate == XML_PARSER_EOF))
776 return;
777 if (ctxt != NULL)
778 ctxt->errNo = error;
779 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
780 XML_FROM_PARSER, error, XML_ERR_FATAL,
781 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
782 val);
783 if (ctxt != NULL) {
784 ctxt->wellFormed = 0;
785 if (ctxt->recovery == 0)
786 ctxt->disableSAX = 1;
787 }
788}
789
790/**
791 * xmlErrMsgStr:
792 * @ctxt: an XML parser context
793 * @error: the error number
794 * @msg: the error message
795 * @val: a string value
796 *
797 * Handle a non fatal parser error
798 */
799static void LIBXML_ATTR_FORMAT(3,0)
800xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
801 const char *msg, const xmlChar * val)
802{
803 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
804 (ctxt->instate == XML_PARSER_EOF))
805 return;
806 if (ctxt != NULL)
807 ctxt->errNo = error;
808 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
809 XML_FROM_PARSER, error, XML_ERR_ERROR,
810 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
811 val);
812}
813
814/**
815 * xmlNsErr:
816 * @ctxt: an XML parser context
817 * @error: the error number
818 * @msg: the message
819 * @info1: extra information string
820 * @info2: extra information string
821 *
822 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
823 */
824static void LIBXML_ATTR_FORMAT(3,0)
825xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
826 const char *msg,
827 const xmlChar * info1, const xmlChar * info2,
828 const xmlChar * info3)
829{
830 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
831 (ctxt->instate == XML_PARSER_EOF))
832 return;
833 if (ctxt != NULL)
834 ctxt->errNo = error;
835 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
836 XML_ERR_ERROR, NULL, 0, (const char *) info1,
837 (const char *) info2, (const char *) info3, 0, 0, msg,
838 info1, info2, info3);
839 if (ctxt != NULL)
840 ctxt->nsWellFormed = 0;
841}
842
843/**
844 * xmlNsWarn
845 * @ctxt: an XML parser context
846 * @error: the error number
847 * @msg: the message
848 * @info1: extra information string
849 * @info2: extra information string
850 *
851 * Handle a namespace warning error
852 */
853static void LIBXML_ATTR_FORMAT(3,0)
854xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
855 const char *msg,
856 const xmlChar * info1, const xmlChar * info2,
857 const xmlChar * info3)
858{
859 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
860 (ctxt->instate == XML_PARSER_EOF))
861 return;
862 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
863 XML_ERR_WARNING, NULL, 0, (const char *) info1,
864 (const char *) info2, (const char *) info3, 0, 0, msg,
865 info1, info2, info3);
866}
867
868/************************************************************************
869 * *
870 * Library wide options *
871 * *
872 ************************************************************************/
873
874/**
875 * xmlHasFeature:
876 * @feature: the feature to be examined
877 *
878 * Examines if the library has been compiled with a given feature.
879 *
880 * Returns a non-zero value if the feature exist, otherwise zero.
881 * Returns zero (0) if the feature does not exist or an unknown
882 * unknown feature is requested, non-zero otherwise.
883 */
884int
885xmlHasFeature(xmlFeature feature)
886{
887 switch (feature) {
888 case XML_WITH_THREAD:
889#ifdef LIBXML_THREAD_ENABLED
890 return(1);
891#else
892 return(0);
893#endif
894 case XML_WITH_TREE:
895#ifdef LIBXML_TREE_ENABLED
896 return(1);
897#else
898 return(0);
899#endif
900 case XML_WITH_OUTPUT:
901#ifdef LIBXML_OUTPUT_ENABLED
902 return(1);
903#else
904 return(0);
905#endif
906 case XML_WITH_PUSH:
907#ifdef LIBXML_PUSH_ENABLED
908 return(1);
909#else
910 return(0);
911#endif
912 case XML_WITH_READER:
913#ifdef LIBXML_READER_ENABLED
914 return(1);
915#else
916 return(0);
917#endif
918 case XML_WITH_PATTERN:
919#ifdef LIBXML_PATTERN_ENABLED
920 return(1);
921#else
922 return(0);
923#endif
924 case XML_WITH_WRITER:
925#ifdef LIBXML_WRITER_ENABLED
926 return(1);
927#else
928 return(0);
929#endif
930 case XML_WITH_SAX1:
931#ifdef LIBXML_SAX1_ENABLED
932 return(1);
933#else
934 return(0);
935#endif
936 case XML_WITH_FTP:
937#ifdef LIBXML_FTP_ENABLED
938 return(1);
939#else
940 return(0);
941#endif
942 case XML_WITH_HTTP:
943#ifdef LIBXML_HTTP_ENABLED
944 return(1);
945#else
946 return(0);
947#endif
948 case XML_WITH_VALID:
949#ifdef LIBXML_VALID_ENABLED
950 return(1);
951#else
952 return(0);
953#endif
954 case XML_WITH_HTML:
955#ifdef LIBXML_HTML_ENABLED
956 return(1);
957#else
958 return(0);
959#endif
960 case XML_WITH_LEGACY:
961#ifdef LIBXML_LEGACY_ENABLED
962 return(1);
963#else
964 return(0);
965#endif
966 case XML_WITH_C14N:
967#ifdef LIBXML_C14N_ENABLED
968 return(1);
969#else
970 return(0);
971#endif
972 case XML_WITH_CATALOG:
973#ifdef LIBXML_CATALOG_ENABLED
974 return(1);
975#else
976 return(0);
977#endif
978 case XML_WITH_XPATH:
979#ifdef LIBXML_XPATH_ENABLED
980 return(1);
981#else
982 return(0);
983#endif
984 case XML_WITH_XPTR:
985#ifdef LIBXML_XPTR_ENABLED
986 return(1);
987#else
988 return(0);
989#endif
990 case XML_WITH_XINCLUDE:
991#ifdef LIBXML_XINCLUDE_ENABLED
992 return(1);
993#else
994 return(0);
995#endif
996 case XML_WITH_ICONV:
997#ifdef LIBXML_ICONV_ENABLED
998 return(1);
999#else
1000 return(0);
1001#endif
1002 case XML_WITH_ISO8859X:
1003#ifdef LIBXML_ISO8859X_ENABLED
1004 return(1);
1005#else
1006 return(0);
1007#endif
1008 case XML_WITH_UNICODE:
1009#ifdef LIBXML_UNICODE_ENABLED
1010 return(1);
1011#else
1012 return(0);
1013#endif
1014 case XML_WITH_REGEXP:
1015#ifdef LIBXML_REGEXP_ENABLED
1016 return(1);
1017#else
1018 return(0);
1019#endif
1020 case XML_WITH_AUTOMATA:
1021#ifdef LIBXML_AUTOMATA_ENABLED
1022 return(1);
1023#else
1024 return(0);
1025#endif
1026 case XML_WITH_EXPR:
1027#ifdef LIBXML_EXPR_ENABLED
1028 return(1);
1029#else
1030 return(0);
1031#endif
1032 case XML_WITH_SCHEMAS:
1033#ifdef LIBXML_SCHEMAS_ENABLED
1034 return(1);
1035#else
1036 return(0);
1037#endif
1038 case XML_WITH_SCHEMATRON:
1039#ifdef LIBXML_SCHEMATRON_ENABLED
1040 return(1);
1041#else
1042 return(0);
1043#endif
1044 case XML_WITH_MODULES:
1045#ifdef LIBXML_MODULES_ENABLED
1046 return(1);
1047#else
1048 return(0);
1049#endif
1050 case XML_WITH_DEBUG:
1051#ifdef LIBXML_DEBUG_ENABLED
1052 return(1);
1053#else
1054 return(0);
1055#endif
1056 case XML_WITH_DEBUG_MEM:
1057#ifdef DEBUG_MEMORY_LOCATION
1058 return(1);
1059#else
1060 return(0);
1061#endif
1062 case XML_WITH_DEBUG_RUN:
1063#ifdef LIBXML_DEBUG_RUNTIME
1064 return(1);
1065#else
1066 return(0);
1067#endif
1068 case XML_WITH_ZLIB:
1069#ifdef LIBXML_ZLIB_ENABLED
1070 return(1);
1071#else
1072 return(0);
1073#endif
1074 case XML_WITH_LZMA:
1075#ifdef LIBXML_LZMA_ENABLED
1076 return(1);
1077#else
1078 return(0);
1079#endif
1080 case XML_WITH_ICU:
1081#ifdef LIBXML_ICU_ENABLED
1082 return(1);
1083#else
1084 return(0);
1085#endif
1086 default:
1087 break;
1088 }
1089 return(0);
1090}
1091
1092/************************************************************************
1093 * *
1094 * SAX2 defaulted attributes handling *
1095 * *
1096 ************************************************************************/
1097
1098/**
1099 * xmlDetectSAX2:
1100 * @ctxt: an XML parser context
1101 *
1102 * Do the SAX2 detection and specific initialization
1103 */
1104static void
1105xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
1106 xmlSAXHandlerPtr sax;
1107 if (ctxt == NULL) return;
1108 sax = ctxt->sax;
1109#ifdef LIBXML_SAX1_ENABLED
1110 if ((sax) && (sax->initialized == XML_SAX2_MAGIC) &&
1111 ((sax->startElementNs != NULL) ||
1112 (sax->endElementNs != NULL) ||
1113 ((sax->startElement == NULL) && (sax->endElement == NULL))))
1114 ctxt->sax2 = 1;
1115#else
1116 ctxt->sax2 = 1;
1117#endif /* LIBXML_SAX1_ENABLED */
1118
1119 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
1120 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
1121 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
1122 if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
1123 (ctxt->str_xml_ns == NULL)) {
1124 xmlErrMemory(ctxt, NULL);
1125 }
1126}
1127
1128typedef struct _xmlDefAttrs xmlDefAttrs;
1129typedef xmlDefAttrs *xmlDefAttrsPtr;
1130struct _xmlDefAttrs {
1131 int nbAttrs; /* number of defaulted attributes on that element */
1132 int maxAttrs; /* the size of the array */
1133#if __STDC_VERSION__ >= 199901L
1134 /* Using a C99 flexible array member avoids UBSan errors. */
1135 const xmlChar *values[]; /* array of localname/prefix/values/external */
1136#else
1137 const xmlChar *values[5];
1138#endif
1139};
1140
1141/**
1142 * xmlAttrNormalizeSpace:
1143 * @src: the source string
1144 * @dst: the target string
1145 *
1146 * Normalize the space in non CDATA attribute values:
1147 * If the attribute type is not CDATA, then the XML processor MUST further
1148 * process the normalized attribute value by discarding any leading and
1149 * trailing space (#x20) characters, and by replacing sequences of space
1150 * (#x20) characters by a single space (#x20) character.
1151 * Note that the size of dst need to be at least src, and if one doesn't need
1152 * to preserve dst (and it doesn't come from a dictionary or read-only) then
1153 * passing src as dst is just fine.
1154 *
1155 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1156 * is needed.
1157 */
1158static xmlChar *
1159xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
1160{
1161 if ((src == NULL) || (dst == NULL))
1162 return(NULL);
1163
1164 while (*src == 0x20) src++;
1165 while (*src != 0) {
1166 if (*src == 0x20) {
1167 while (*src == 0x20) src++;
1168 if (*src != 0)
1169 *dst++ = 0x20;
1170 } else {
1171 *dst++ = *src++;
1172 }
1173 }
1174 *dst = 0;
1175 if (dst == src)
1176 return(NULL);
1177 return(dst);
1178}
1179
1180/**
1181 * xmlAttrNormalizeSpace2:
1182 * @src: the source string
1183 *
1184 * Normalize the space in non CDATA attribute values, a slightly more complex
1185 * front end to avoid allocation problems when running on attribute values
1186 * coming from the input.
1187 *
1188 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1189 * is needed.
1190 */
1191static const xmlChar *
1192xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, xmlChar *src, int *len)
1193{
1194 int i;
1195 int remove_head = 0;
1196 int need_realloc = 0;
1197 const xmlChar *cur;
1198
1199 if ((ctxt == NULL) || (src == NULL) || (len == NULL))
1200 return(NULL);
1201 i = *len;
1202 if (i <= 0)
1203 return(NULL);
1204
1205 cur = src;
1206 while (*cur == 0x20) {
1207 cur++;
1208 remove_head++;
1209 }
1210 while (*cur != 0) {
1211 if (*cur == 0x20) {
1212 cur++;
1213 if ((*cur == 0x20) || (*cur == 0)) {
1214 need_realloc = 1;
1215 break;
1216 }
1217 } else
1218 cur++;
1219 }
1220 if (need_realloc) {
1221 xmlChar *ret;
1222
1223 ret = xmlStrndup(src + remove_head, i - remove_head + 1);
1224 if (ret == NULL) {
1225 xmlErrMemory(ctxt, NULL);
1226 return(NULL);
1227 }
1228 xmlAttrNormalizeSpace(ret, ret);
1229 *len = (int) strlen((const char *)ret);
1230 return(ret);
1231 } else if (remove_head) {
1232 *len -= remove_head;
1233 memmove(src, src + remove_head, 1 + *len);
1234 return(src);
1235 }
1236 return(NULL);
1237}
1238
1239/**
1240 * xmlAddDefAttrs:
1241 * @ctxt: an XML parser context
1242 * @fullname: the element fullname
1243 * @fullattr: the attribute fullname
1244 * @value: the attribute value
1245 *
1246 * Add a defaulted attribute for an element
1247 */
1248static void
1249xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
1250 const xmlChar *fullname,
1251 const xmlChar *fullattr,
1252 const xmlChar *value) {
1253 xmlDefAttrsPtr defaults;
1254 int len;
1255 const xmlChar *name;
1256 const xmlChar *prefix;
1257
1258 /*
1259 * Allows to detect attribute redefinitions
1260 */
1261 if (ctxt->attsSpecial != NULL) {
1262 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1263 return;
1264 }
1265
1266 if (ctxt->attsDefault == NULL) {
1267 ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
1268 if (ctxt->attsDefault == NULL)
1269 goto mem_error;
1270 }
1271
1272 /*
1273 * split the element name into prefix:localname , the string found
1274 * are within the DTD and then not associated to namespace names.
1275 */
1276 name = xmlSplitQName3(fullname, &len);
1277 if (name == NULL) {
1278 name = xmlDictLookup(ctxt->dict, fullname, -1);
1279 prefix = NULL;
1280 } else {
1281 name = xmlDictLookup(ctxt->dict, name, -1);
1282 prefix = xmlDictLookup(ctxt->dict, fullname, len);
1283 }
1284
1285 /*
1286 * make sure there is some storage
1287 */
1288 defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
1289 if (defaults == NULL) {
1290 defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
1291 (4 * 5) * sizeof(const xmlChar *));
1292 if (defaults == NULL)
1293 goto mem_error;
1294 defaults->nbAttrs = 0;
1295 defaults->maxAttrs = 4;
1296 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1297 defaults, NULL) < 0) {
1298 xmlFree(defaults);
1299 goto mem_error;
1300 }
1301 } else if (defaults->nbAttrs >= defaults->maxAttrs) {
1302 xmlDefAttrsPtr temp;
1303
1304 temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
1305 (2 * defaults->maxAttrs * 5) * sizeof(const xmlChar *));
1306 if (temp == NULL)
1307 goto mem_error;
1308 defaults = temp;
1309 defaults->maxAttrs *= 2;
1310 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1311 defaults, NULL) < 0) {
1312 xmlFree(defaults);
1313 goto mem_error;
1314 }
1315 }
1316
1317 /*
1318 * Split the element name into prefix:localname , the string found
1319 * are within the DTD and hen not associated to namespace names.
1320 */
1321 name = xmlSplitQName3(fullattr, &len);
1322 if (name == NULL) {
1323 name = xmlDictLookup(ctxt->dict, fullattr, -1);
1324 prefix = NULL;
1325 } else {
1326 name = xmlDictLookup(ctxt->dict, name, -1);
1327 prefix = xmlDictLookup(ctxt->dict, fullattr, len);
1328 }
1329
1330 defaults->values[5 * defaults->nbAttrs] = name;
1331 defaults->values[5 * defaults->nbAttrs + 1] = prefix;
1332 /* intern the string and precompute the end */
1333 len = xmlStrlen(value);
1334 value = xmlDictLookup(ctxt->dict, value, len);
1335 defaults->values[5 * defaults->nbAttrs + 2] = value;
1336 defaults->values[5 * defaults->nbAttrs + 3] = value + len;
1337 if (ctxt->external)
1338 defaults->values[5 * defaults->nbAttrs + 4] = BAD_CAST "external";
1339 else
1340 defaults->values[5 * defaults->nbAttrs + 4] = NULL;
1341 defaults->nbAttrs++;
1342
1343 return;
1344
1345mem_error:
1346 xmlErrMemory(ctxt, NULL);
1347 return;
1348}
1349
1350/**
1351 * xmlAddSpecialAttr:
1352 * @ctxt: an XML parser context
1353 * @fullname: the element fullname
1354 * @fullattr: the attribute fullname
1355 * @type: the attribute type
1356 *
1357 * Register this attribute type
1358 */
1359static void
1360xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1361 const xmlChar *fullname,
1362 const xmlChar *fullattr,
1363 int type)
1364{
1365 if (ctxt->attsSpecial == NULL) {
1366 ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
1367 if (ctxt->attsSpecial == NULL)
1368 goto mem_error;
1369 }
1370
1371 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1372 return;
1373
1374 xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
1375 (void *) (ptrdiff_t) type);
1376 return;
1377
1378mem_error:
1379 xmlErrMemory(ctxt, NULL);
1380 return;
1381}
1382
1383/**
1384 * xmlCleanSpecialAttrCallback:
1385 *
1386 * Removes CDATA attributes from the special attribute table
1387 */
1388static void
1389xmlCleanSpecialAttrCallback(void *payload, void *data,
1390 const xmlChar *fullname, const xmlChar *fullattr,
1391 const xmlChar *unused ATTRIBUTE_UNUSED) {
1392 xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1393
1394 if (((ptrdiff_t) payload) == XML_ATTRIBUTE_CDATA) {
1395 xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1396 }
1397}
1398
1399/**
1400 * xmlCleanSpecialAttr:
1401 * @ctxt: an XML parser context
1402 *
1403 * Trim the list of attributes defined to remove all those of type
1404 * CDATA as they are not special. This call should be done when finishing
1405 * to parse the DTD and before starting to parse the document root.
1406 */
1407static void
1408xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1409{
1410 if (ctxt->attsSpecial == NULL)
1411 return;
1412
1413 xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1414
1415 if (xmlHashSize(ctxt->attsSpecial) == 0) {
1416 xmlHashFree(ctxt->attsSpecial, NULL);
1417 ctxt->attsSpecial = NULL;
1418 }
1419 return;
1420}
1421
1422/**
1423 * xmlCheckLanguageID:
1424 * @lang: pointer to the string value
1425 *
1426 * Checks that the value conforms to the LanguageID production:
1427 *
1428 * NOTE: this is somewhat deprecated, those productions were removed from
1429 * the XML Second edition.
1430 *
1431 * [33] LanguageID ::= Langcode ('-' Subcode)*
1432 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
1433 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1434 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1435 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1436 * [38] Subcode ::= ([a-z] | [A-Z])+
1437 *
1438 * The current REC reference the successors of RFC 1766, currently 5646
1439 *
1440 * http://www.rfc-editor.org/rfc/rfc5646.txt
1441 * langtag = language
1442 * ["-" script]
1443 * ["-" region]
1444 * *("-" variant)
1445 * *("-" extension)
1446 * ["-" privateuse]
1447 * language = 2*3ALPHA ; shortest ISO 639 code
1448 * ["-" extlang] ; sometimes followed by
1449 * ; extended language subtags
1450 * / 4ALPHA ; or reserved for future use
1451 * / 5*8ALPHA ; or registered language subtag
1452 *
1453 * extlang = 3ALPHA ; selected ISO 639 codes
1454 * *2("-" 3ALPHA) ; permanently reserved
1455 *
1456 * script = 4ALPHA ; ISO 15924 code
1457 *
1458 * region = 2ALPHA ; ISO 3166-1 code
1459 * / 3DIGIT ; UN M.49 code
1460 *
1461 * variant = 5*8alphanum ; registered variants
1462 * / (DIGIT 3alphanum)
1463 *
1464 * extension = singleton 1*("-" (2*8alphanum))
1465 *
1466 * ; Single alphanumerics
1467 * ; "x" reserved for private use
1468 * singleton = DIGIT ; 0 - 9
1469 * / %x41-57 ; A - W
1470 * / %x59-5A ; Y - Z
1471 * / %x61-77 ; a - w
1472 * / %x79-7A ; y - z
1473 *
1474 * it sounds right to still allow Irregular i-xxx IANA and user codes too
1475 * The parser below doesn't try to cope with extension or privateuse
1476 * that could be added but that's not interoperable anyway
1477 *
1478 * Returns 1 if correct 0 otherwise
1479 **/
1480int
1481xmlCheckLanguageID(const xmlChar * lang)
1482{
1483 const xmlChar *cur = lang, *nxt;
1484
1485 if (cur == NULL)
1486 return (0);
1487 if (((cur[0] == 'i') && (cur[1] == '-')) ||
1488 ((cur[0] == 'I') && (cur[1] == '-')) ||
1489 ((cur[0] == 'x') && (cur[1] == '-')) ||
1490 ((cur[0] == 'X') && (cur[1] == '-'))) {
1491 /*
1492 * Still allow IANA code and user code which were coming
1493 * from the previous version of the XML-1.0 specification
1494 * it's deprecated but we should not fail
1495 */
1496 cur += 2;
1497 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1498 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1499 cur++;
1500 return(cur[0] == 0);
1501 }
1502 nxt = cur;
1503 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1504 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1505 nxt++;
1506 if (nxt - cur >= 4) {
1507 /*
1508 * Reserved
1509 */
1510 if ((nxt - cur > 8) || (nxt[0] != 0))
1511 return(0);
1512 return(1);
1513 }
1514 if (nxt - cur < 2)
1515 return(0);
1516 /* we got an ISO 639 code */
1517 if (nxt[0] == 0)
1518 return(1);
1519 if (nxt[0] != '-')
1520 return(0);
1521
1522 nxt++;
1523 cur = nxt;
1524 /* now we can have extlang or script or region or variant */
1525 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1526 goto region_m49;
1527
1528 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1529 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1530 nxt++;
1531 if (nxt - cur == 4)
1532 goto script;
1533 if (nxt - cur == 2)
1534 goto region;
1535 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1536 goto variant;
1537 if (nxt - cur != 3)
1538 return(0);
1539 /* we parsed an extlang */
1540 if (nxt[0] == 0)
1541 return(1);
1542 if (nxt[0] != '-')
1543 return(0);
1544
1545 nxt++;
1546 cur = nxt;
1547 /* now we can have script or region or variant */
1548 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1549 goto region_m49;
1550
1551 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1552 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1553 nxt++;
1554 if (nxt - cur == 2)
1555 goto region;
1556 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1557 goto variant;
1558 if (nxt - cur != 4)
1559 return(0);
1560 /* we parsed a script */
1561script:
1562 if (nxt[0] == 0)
1563 return(1);
1564 if (nxt[0] != '-')
1565 return(0);
1566
1567 nxt++;
1568 cur = nxt;
1569 /* now we can have region or variant */
1570 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1571 goto region_m49;
1572
1573 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1574 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1575 nxt++;
1576
1577 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1578 goto variant;
1579 if (nxt - cur != 2)
1580 return(0);
1581 /* we parsed a region */
1582region:
1583 if (nxt[0] == 0)
1584 return(1);
1585 if (nxt[0] != '-')
1586 return(0);
1587
1588 nxt++;
1589 cur = nxt;
1590 /* now we can just have a variant */
1591 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1592 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1593 nxt++;
1594
1595 if ((nxt - cur < 5) || (nxt - cur > 8))
1596 return(0);
1597
1598 /* we parsed a variant */
1599variant:
1600 if (nxt[0] == 0)
1601 return(1);
1602 if (nxt[0] != '-')
1603 return(0);
1604 /* extensions and private use subtags not checked */
1605 return (1);
1606
1607region_m49:
1608 if (((nxt[1] >= '0') && (nxt[1] <= '9')) &&
1609 ((nxt[2] >= '0') && (nxt[2] <= '9'))) {
1610 nxt += 3;
1611 goto region;
1612 }
1613 return(0);
1614}
1615
1616/************************************************************************
1617 * *
1618 * Parser stacks related functions and macros *
1619 * *
1620 ************************************************************************/
1621
1622static xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
1623 const xmlChar ** str);
1624
1625#ifdef SAX2
1626/**
1627 * nsPush:
1628 * @ctxt: an XML parser context
1629 * @prefix: the namespace prefix or NULL
1630 * @URL: the namespace name
1631 *
1632 * Pushes a new parser namespace on top of the ns stack
1633 *
1634 * Returns -1 in case of error, -2 if the namespace should be discarded
1635 * and the index in the stack otherwise.
1636 */
1637static int
1638nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
1639{
1640 if (ctxt->options & XML_PARSE_NSCLEAN) {
1641 int i;
1642 for (i = ctxt->nsNr - 2;i >= 0;i -= 2) {
1643 if (ctxt->nsTab[i] == prefix) {
1644 /* in scope */
1645 if (ctxt->nsTab[i + 1] == URL)
1646 return(-2);
1647 /* out of scope keep it */
1648 break;
1649 }
1650 }
1651 }
1652 if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
1653 ctxt->nsMax = 10;
1654 ctxt->nsNr = 0;
1655 ctxt->nsTab = (const xmlChar **)
1656 xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
1657 if (ctxt->nsTab == NULL) {
1658 xmlErrMemory(ctxt, NULL);
1659 ctxt->nsMax = 0;
1660 return (-1);
1661 }
1662 } else if (ctxt->nsNr >= ctxt->nsMax) {
1663 const xmlChar ** tmp;
1664 ctxt->nsMax *= 2;
1665 tmp = (const xmlChar **) xmlRealloc((char *) ctxt->nsTab,
1666 ctxt->nsMax * sizeof(ctxt->nsTab[0]));
1667 if (tmp == NULL) {
1668 xmlErrMemory(ctxt, NULL);
1669 ctxt->nsMax /= 2;
1670 return (-1);
1671 }
1672 ctxt->nsTab = tmp;
1673 }
1674 ctxt->nsTab[ctxt->nsNr++] = prefix;
1675 ctxt->nsTab[ctxt->nsNr++] = URL;
1676 return (ctxt->nsNr);
1677}
1678/**
1679 * nsPop:
1680 * @ctxt: an XML parser context
1681 * @nr: the number to pop
1682 *
1683 * Pops the top @nr parser prefix/namespace from the ns stack
1684 *
1685 * Returns the number of namespaces removed
1686 */
1687static int
1688nsPop(xmlParserCtxtPtr ctxt, int nr)
1689{
1690 int i;
1691
1692 if (ctxt->nsTab == NULL) return(0);
1693 if (ctxt->nsNr < nr) {
1694 xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
1695 nr = ctxt->nsNr;
1696 }
1697 if (ctxt->nsNr <= 0)
1698 return (0);
1699
1700 for (i = 0;i < nr;i++) {
1701 ctxt->nsNr--;
1702 ctxt->nsTab[ctxt->nsNr] = NULL;
1703 }
1704 return(nr);
1705}
1706#endif
1707
1708static int
1709xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1710 const xmlChar **atts;
1711 int *attallocs;
1712 int maxatts;
1713
1714 if (ctxt->atts == NULL) {
1715 maxatts = 55; /* allow for 10 attrs by default */
1716 atts = (const xmlChar **)
1717 xmlMalloc(maxatts * sizeof(xmlChar *));
1718 if (atts == NULL) goto mem_error;
1719 ctxt->atts = atts;
1720 attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
1721 if (attallocs == NULL) goto mem_error;
1722 ctxt->attallocs = attallocs;
1723 ctxt->maxatts = maxatts;
1724 } else if (nr + 5 > ctxt->maxatts) {
1725 maxatts = (nr + 5) * 2;
1726 atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
1727 maxatts * sizeof(const xmlChar *));
1728 if (atts == NULL) goto mem_error;
1729 ctxt->atts = atts;
1730 attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
1731 (maxatts / 5) * sizeof(int));
1732 if (attallocs == NULL) goto mem_error;
1733 ctxt->attallocs = attallocs;
1734 ctxt->maxatts = maxatts;
1735 }
1736 return(ctxt->maxatts);
1737mem_error:
1738 xmlErrMemory(ctxt, NULL);
1739 return(-1);
1740}
1741
1742/**
1743 * inputPush:
1744 * @ctxt: an XML parser context
1745 * @value: the parser input
1746 *
1747 * Pushes a new parser input on top of the input stack
1748 *
1749 * Returns -1 in case of error, the index in the stack otherwise
1750 */
1751int
1752inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1753{
1754 if ((ctxt == NULL) || (value == NULL))
1755 return(-1);
1756 if (ctxt->inputNr >= ctxt->inputMax) {
1757 ctxt->inputMax *= 2;
1758 ctxt->inputTab =
1759 (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1760 ctxt->inputMax *
1761 sizeof(ctxt->inputTab[0]));
1762 if (ctxt->inputTab == NULL) {
1763 xmlErrMemory(ctxt, NULL);
1764 xmlFreeInputStream(value);
1765 ctxt->inputMax /= 2;
1766 value = NULL;
1767 return (-1);
1768 }
1769 }
1770 ctxt->inputTab[ctxt->inputNr] = value;
1771 ctxt->input = value;
1772 return (ctxt->inputNr++);
1773}
1774/**
1775 * inputPop:
1776 * @ctxt: an XML parser context
1777 *
1778 * Pops the top parser input from the input stack
1779 *
1780 * Returns the input just removed
1781 */
1782xmlParserInputPtr
1783inputPop(xmlParserCtxtPtr ctxt)
1784{
1785 xmlParserInputPtr ret;
1786
1787 if (ctxt == NULL)
1788 return(NULL);
1789 if (ctxt->inputNr <= 0)
1790 return (NULL);
1791 ctxt->inputNr--;
1792 if (ctxt->inputNr > 0)
1793 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1794 else
1795 ctxt->input = NULL;
1796 ret = ctxt->inputTab[ctxt->inputNr];
1797 ctxt->inputTab[ctxt->inputNr] = NULL;
1798 return (ret);
1799}
1800/**
1801 * nodePush:
1802 * @ctxt: an XML parser context
1803 * @value: the element node
1804 *
1805 * Pushes a new element node on top of the node stack
1806 *
1807 * Returns -1 in case of error, the index in the stack otherwise
1808 */
1809int
1810nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1811{
1812 if (ctxt == NULL) return(0);
1813 if (ctxt->nodeNr >= ctxt->nodeMax) {
1814 xmlNodePtr *tmp;
1815
1816 tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1817 ctxt->nodeMax * 2 *
1818 sizeof(ctxt->nodeTab[0]));
1819 if (tmp == NULL) {
1820 xmlErrMemory(ctxt, NULL);
1821 return (-1);
1822 }
1823 ctxt->nodeTab = tmp;
1824 ctxt->nodeMax *= 2;
1825 }
1826 if ((((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) &&
1827 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
1828 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
1829 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
1830 xmlParserMaxDepth);
1831 xmlHaltParser(ctxt);
1832 return(-1);
1833 }
1834 ctxt->nodeTab[ctxt->nodeNr] = value;
1835 ctxt->node = value;
1836 return (ctxt->nodeNr++);
1837}
1838
1839/**
1840 * nodePop:
1841 * @ctxt: an XML parser context
1842 *
1843 * Pops the top element node from the node stack
1844 *
1845 * Returns the node just removed
1846 */
1847xmlNodePtr
1848nodePop(xmlParserCtxtPtr ctxt)
1849{
1850 xmlNodePtr ret;
1851
1852 if (ctxt == NULL) return(NULL);
1853 if (ctxt->nodeNr <= 0)
1854 return (NULL);
1855 ctxt->nodeNr--;
1856 if (ctxt->nodeNr > 0)
1857 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1858 else
1859 ctxt->node = NULL;
1860 ret = ctxt->nodeTab[ctxt->nodeNr];
1861 ctxt->nodeTab[ctxt->nodeNr] = NULL;
1862 return (ret);
1863}
1864
1865/**
1866 * nameNsPush:
1867 * @ctxt: an XML parser context
1868 * @value: the element name
1869 * @prefix: the element prefix
1870 * @URI: the element namespace name
1871 * @line: the current line number for error messages
1872 * @nsNr: the number of namespaces pushed on the namespace table
1873 *
1874 * Pushes a new element name/prefix/URL on top of the name stack
1875 *
1876 * Returns -1 in case of error, the index in the stack otherwise
1877 */
1878static int
1879nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1880 const xmlChar *prefix, const xmlChar *URI, int line, int nsNr)
1881{
1882 xmlStartTag *tag;
1883
1884 if (ctxt->nameNr >= ctxt->nameMax) {
1885 const xmlChar * *tmp;
1886 xmlStartTag *tmp2;
1887 ctxt->nameMax *= 2;
1888 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1889 ctxt->nameMax *
1890 sizeof(ctxt->nameTab[0]));
1891 if (tmp == NULL) {
1892 ctxt->nameMax /= 2;
1893 goto mem_error;
1894 }
1895 ctxt->nameTab = tmp;
1896 tmp2 = (xmlStartTag *) xmlRealloc((void * *)ctxt->pushTab,
1897 ctxt->nameMax *
1898 sizeof(ctxt->pushTab[0]));
1899 if (tmp2 == NULL) {
1900 ctxt->nameMax /= 2;
1901 goto mem_error;
1902 }
1903 ctxt->pushTab = tmp2;
1904 } else if (ctxt->pushTab == NULL) {
1905 ctxt->pushTab = (xmlStartTag *) xmlMalloc(ctxt->nameMax *
1906 sizeof(ctxt->pushTab[0]));
1907 if (ctxt->pushTab == NULL)
1908 goto mem_error;
1909 }
1910 ctxt->nameTab[ctxt->nameNr] = value;
1911 ctxt->name = value;
1912 tag = &ctxt->pushTab[ctxt->nameNr];
1913 tag->prefix = prefix;
1914 tag->URI = URI;
1915 tag->line = line;
1916 tag->nsNr = nsNr;
1917 return (ctxt->nameNr++);
1918mem_error:
1919 xmlErrMemory(ctxt, NULL);
1920 return (-1);
1921}
1922#ifdef LIBXML_PUSH_ENABLED
1923/**
1924 * nameNsPop:
1925 * @ctxt: an XML parser context
1926 *
1927 * Pops the top element/prefix/URI name from the name stack
1928 *
1929 * Returns the name just removed
1930 */
1931static const xmlChar *
1932nameNsPop(xmlParserCtxtPtr ctxt)
1933{
1934 const xmlChar *ret;
1935
1936 if (ctxt->nameNr <= 0)
1937 return (NULL);
1938 ctxt->nameNr--;
1939 if (ctxt->nameNr > 0)
1940 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1941 else
1942 ctxt->name = NULL;
1943 ret = ctxt->nameTab[ctxt->nameNr];
1944 ctxt->nameTab[ctxt->nameNr] = NULL;
1945 return (ret);
1946}
1947#endif /* LIBXML_PUSH_ENABLED */
1948
1949/**
1950 * namePush:
1951 * @ctxt: an XML parser context
1952 * @value: the element name
1953 *
1954 * Pushes a new element name on top of the name stack
1955 *
1956 * Returns -1 in case of error, the index in the stack otherwise
1957 */
1958int
1959namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
1960{
1961 if (ctxt == NULL) return (-1);
1962
1963 if (ctxt->nameNr >= ctxt->nameMax) {
1964 const xmlChar * *tmp;
1965 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1966 ctxt->nameMax * 2 *
1967 sizeof(ctxt->nameTab[0]));
1968 if (tmp == NULL) {
1969 goto mem_error;
1970 }
1971 ctxt->nameTab = tmp;
1972 ctxt->nameMax *= 2;
1973 }
1974 ctxt->nameTab[ctxt->nameNr] = value;
1975 ctxt->name = value;
1976 return (ctxt->nameNr++);
1977mem_error:
1978 xmlErrMemory(ctxt, NULL);
1979 return (-1);
1980}
1981/**
1982 * namePop:
1983 * @ctxt: an XML parser context
1984 *
1985 * Pops the top element name from the name stack
1986 *
1987 * Returns the name just removed
1988 */
1989const xmlChar *
1990namePop(xmlParserCtxtPtr ctxt)
1991{
1992 const xmlChar *ret;
1993
1994 if ((ctxt == NULL) || (ctxt->nameNr <= 0))
1995 return (NULL);
1996 ctxt->nameNr--;
1997 if (ctxt->nameNr > 0)
1998 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1999 else
2000 ctxt->name = NULL;
2001 ret = ctxt->nameTab[ctxt->nameNr];
2002 ctxt->nameTab[ctxt->nameNr] = NULL;
2003 return (ret);
2004}
2005
2006static int spacePush(xmlParserCtxtPtr ctxt, int val) {
2007 if (ctxt->spaceNr >= ctxt->spaceMax) {
2008 int *tmp;
2009
2010 ctxt->spaceMax *= 2;
2011 tmp = (int *) xmlRealloc(ctxt->spaceTab,
2012 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
2013 if (tmp == NULL) {
2014 xmlErrMemory(ctxt, NULL);
2015 ctxt->spaceMax /=2;
2016 return(-1);
2017 }
2018 ctxt->spaceTab = tmp;
2019 }
2020 ctxt->spaceTab[ctxt->spaceNr] = val;
2021 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
2022 return(ctxt->spaceNr++);
2023}
2024
2025static int spacePop(xmlParserCtxtPtr ctxt) {
2026 int ret;
2027 if (ctxt->spaceNr <= 0) return(0);
2028 ctxt->spaceNr--;
2029 if (ctxt->spaceNr > 0)
2030 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
2031 else
2032 ctxt->space = &ctxt->spaceTab[0];
2033 ret = ctxt->spaceTab[ctxt->spaceNr];
2034 ctxt->spaceTab[ctxt->spaceNr] = -1;
2035 return(ret);
2036}
2037
2038/*
2039 * Macros for accessing the content. Those should be used only by the parser,
2040 * and not exported.
2041 *
2042 * Dirty macros, i.e. one often need to make assumption on the context to
2043 * use them
2044 *
2045 * CUR_PTR return the current pointer to the xmlChar to be parsed.
2046 * To be used with extreme caution since operations consuming
2047 * characters may move the input buffer to a different location !
2048 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
2049 * This should be used internally by the parser
2050 * only to compare to ASCII values otherwise it would break when
2051 * running with UTF-8 encoding.
2052 * RAW same as CUR but in the input buffer, bypass any token
2053 * extraction that may have been done
2054 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
2055 * to compare on ASCII based substring.
2056 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
2057 * strings without newlines within the parser.
2058 * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
2059 * defined char within the parser.
2060 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
2061 *
2062 * NEXT Skip to the next character, this does the proper decoding
2063 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
2064 * NEXTL(l) Skip the current unicode character of l xmlChars long.
2065 * CUR_CHAR(l) returns the current unicode character (int), set l
2066 * to the number of xmlChars used for the encoding [0-5].
2067 * CUR_SCHAR same but operate on a string instead of the context
2068 * COPY_BUF copy the current unicode char to the target buffer, increment
2069 * the index
2070 * GROW, SHRINK handling of input buffers
2071 */
2072
2073#define RAW (*ctxt->input->cur)
2074#define CUR (*ctxt->input->cur)
2075#define NXT(val) ctxt->input->cur[(val)]
2076#define CUR_PTR ctxt->input->cur
2077#define BASE_PTR ctxt->input->base
2078
2079#define CMP4( s, c1, c2, c3, c4 ) \
2080 ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
2081 ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
2082#define CMP5( s, c1, c2, c3, c4, c5 ) \
2083 ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
2084#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
2085 ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
2086#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
2087 ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
2088#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
2089 ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
2090#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
2091 ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
2092 ((unsigned char *) s)[ 8 ] == c9 )
2093#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
2094 ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
2095 ((unsigned char *) s)[ 9 ] == c10 )
2096
2097#define SKIP(val) do { \
2098 ctxt->input->cur += (val),ctxt->input->col+=(val); \
2099 if (*ctxt->input->cur == 0) \
2100 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
2101 } while (0)
2102
2103#define SKIPL(val) do { \
2104 int skipl; \
2105 for(skipl=0; skipl<val; skipl++) { \
2106 if (*(ctxt->input->cur) == '\n') { \
2107 ctxt->input->line++; ctxt->input->col = 1; \
2108 } else ctxt->input->col++; \
2109 ctxt->input->cur++; \
2110 } \
2111 if (*ctxt->input->cur == 0) \
2112 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
2113 } while (0)
2114
2115#define SHRINK if ((ctxt->progressive == 0) && \
2116 (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
2117 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
2118 xmlSHRINK (ctxt);
2119
2120static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
2121 xmlParserInputShrink(ctxt->input);
2122 if (*ctxt->input->cur == 0)
2123 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2124}
2125
2126#define GROW if ((ctxt->progressive == 0) && \
2127 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
2128 xmlGROW (ctxt);
2129
2130static void xmlGROW (xmlParserCtxtPtr ctxt) {
2131 ptrdiff_t curEnd = ctxt->input->end - ctxt->input->cur;
2132 ptrdiff_t curBase = ctxt->input->cur - ctxt->input->base;
2133
2134 if (((curEnd > XML_MAX_LOOKUP_LIMIT) ||
2135 (curBase > XML_MAX_LOOKUP_LIMIT)) &&
2136 ((ctxt->input->buf) &&
2137 (ctxt->input->buf->readcallback != xmlInputReadCallbackNop)) &&
2138 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
2139 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
2140 xmlHaltParser(ctxt);
2141 return;
2142 }
2143 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2144 if ((ctxt->input->cur > ctxt->input->end) ||
2145 (ctxt->input->cur < ctxt->input->base)) {
2146 xmlHaltParser(ctxt);
2147 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "cur index out of bound");
2148 return;
2149 }
2150 if ((ctxt->input->cur != NULL) && (*ctxt->input->cur == 0))
2151 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2152}
2153
2154#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
2155
2156#define NEXT xmlNextChar(ctxt)
2157
2158#define NEXT1 { \
2159 ctxt->input->col++; \
2160 ctxt->input->cur++; \
2161 if (*ctxt->input->cur == 0) \
2162 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
2163 }
2164
2165#define NEXTL(l) do { \
2166 if (*(ctxt->input->cur) == '\n') { \
2167 ctxt->input->line++; ctxt->input->col = 1; \
2168 } else ctxt->input->col++; \
2169 ctxt->input->cur += l; \
2170 } while (0)
2171
2172#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
2173#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
2174
2175#define COPY_BUF(l,b,i,v) \
2176 if (l == 1) b[i++] = (xmlChar) v; \
2177 else i += xmlCopyCharMultiByte(&b[i],v)
2178
2179/**
2180 * xmlSkipBlankChars:
2181 * @ctxt: the XML parser context
2182 *
2183 * skip all blanks character found at that point in the input streams.
2184 * It pops up finished entities in the process if allowable at that point.
2185 *
2186 * Returns the number of space chars skipped
2187 */
2188
2189int
2190xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
2191 int res = 0;
2192
2193 /*
2194 * It's Okay to use CUR/NEXT here since all the blanks are on
2195 * the ASCII range.
2196 */
2197 if (((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) ||
2198 (ctxt->instate == XML_PARSER_START)) {
2199 const xmlChar *cur;
2200 /*
2201 * if we are in the document content, go really fast
2202 */
2203 cur = ctxt->input->cur;
2204 while (IS_BLANK_CH(*cur)) {
2205 if (*cur == '\n') {
2206 ctxt->input->line++; ctxt->input->col = 1;
2207 } else {
2208 ctxt->input->col++;
2209 }
2210 cur++;
2211 if (res < INT_MAX)
2212 res++;
2213 if (*cur == 0) {
2214 ctxt->input->cur = cur;
2215 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2216 cur = ctxt->input->cur;
2217 }
2218 }
2219 ctxt->input->cur = cur;
2220 } else {
2221 int expandPE = ((ctxt->external != 0) || (ctxt->inputNr != 1));
2222
2223 while (1) {
2224 if (IS_BLANK_CH(CUR)) { /* CHECKED tstblanks.xml */
2225 NEXT;
2226 } else if (CUR == '%') {
2227 /*
2228 * Need to handle support of entities branching here
2229 */
2230 if ((expandPE == 0) || (IS_BLANK_CH(NXT(1))) || (NXT(1) == 0))
2231 break;
2232 xmlParsePEReference(ctxt);
2233 } else if (CUR == 0) {
2234 if (ctxt->inputNr <= 1)
2235 break;
2236 xmlPopInput(ctxt);
2237 } else {
2238 break;
2239 }
2240
2241 /*
2242 * Also increase the counter when entering or exiting a PERef.
2243 * The spec says: "When a parameter-entity reference is recognized
2244 * in the DTD and included, its replacement text MUST be enlarged
2245 * by the attachment of one leading and one following space (#x20)
2246 * character."
2247 */
2248 if (res < INT_MAX)
2249 res++;
2250 }
2251 }
2252 return(res);
2253}
2254
2255/************************************************************************
2256 * *
2257 * Commodity functions to handle entities *
2258 * *
2259 ************************************************************************/
2260
2261/**
2262 * xmlPopInput:
2263 * @ctxt: an XML parser context
2264 *
2265 * xmlPopInput: the current input pointed by ctxt->input came to an end
2266 * pop it and return the next char.
2267 *
2268 * Returns the current xmlChar in the parser context
2269 */
2270xmlChar
2271xmlPopInput(xmlParserCtxtPtr ctxt) {
2272 if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
2273 if (xmlParserDebugEntities)
2274 xmlGenericError(xmlGenericErrorContext,
2275 "Popping input %d\n", ctxt->inputNr);
2276 if ((ctxt->inputNr > 1) && (ctxt->inSubset == 0) &&
2277 (ctxt->instate != XML_PARSER_EOF))
2278 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
2279 "Unfinished entity outside the DTD");
2280 xmlFreeInputStream(inputPop(ctxt));
2281 if (*ctxt->input->cur == 0)
2282 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2283 return(CUR);
2284}
2285
2286/**
2287 * xmlPushInput:
2288 * @ctxt: an XML parser context
2289 * @input: an XML parser input fragment (entity, XML fragment ...).
2290 *
2291 * xmlPushInput: switch to a new input stream which is stacked on top
2292 * of the previous one(s).
2293 * Returns -1 in case of error or the index in the input stack
2294 */
2295int
2296xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
2297 int ret;
2298 if (input == NULL) return(-1);
2299
2300 if (xmlParserDebugEntities) {
2301 if ((ctxt->input != NULL) && (ctxt->input->filename))
2302 xmlGenericError(xmlGenericErrorContext,
2303 "%s(%d): ", ctxt->input->filename,
2304 ctxt->input->line);
2305 xmlGenericError(xmlGenericErrorContext,
2306 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
2307 }
2308 if (((ctxt->inputNr > 40) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2309 (ctxt->inputNr > 1024)) {
2310 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2311 while (ctxt->inputNr > 1)
2312 xmlFreeInputStream(inputPop(ctxt));
2313 return(-1);
2314 }
2315 ret = inputPush(ctxt, input);
2316 if (ctxt->instate == XML_PARSER_EOF)
2317 return(-1);
2318 GROW;
2319 return(ret);
2320}
2321
2322/**
2323 * xmlParseCharRef:
2324 * @ctxt: an XML parser context
2325 *
2326 * parse Reference declarations
2327 *
2328 * [66] CharRef ::= '&#' [0-9]+ ';' |
2329 * '&#x' [0-9a-fA-F]+ ';'
2330 *
2331 * [ WFC: Legal Character ]
2332 * Characters referred to using character references must match the
2333 * production for Char.
2334 *
2335 * Returns the value parsed (as an int), 0 in case of error
2336 */
2337int
2338xmlParseCharRef(xmlParserCtxtPtr ctxt) {
2339 int val = 0;
2340 int count = 0;
2341
2342 /*
2343 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
2344 */
2345 if ((RAW == '&') && (NXT(1) == '#') &&
2346 (NXT(2) == 'x')) {
2347 SKIP(3);
2348 GROW;
2349 while (RAW != ';') { /* loop blocked by count */
2350 if (count++ > 20) {
2351 count = 0;
2352 GROW;
2353 if (ctxt->instate == XML_PARSER_EOF)
2354 return(0);
2355 }
2356 if ((RAW >= '0') && (RAW <= '9'))
2357 val = val * 16 + (CUR - '0');
2358 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
2359 val = val * 16 + (CUR - 'a') + 10;
2360 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
2361 val = val * 16 + (CUR - 'A') + 10;
2362 else {
2363 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2364 val = 0;
2365 break;
2366 }
2367 if (val > 0x110000)
2368 val = 0x110000;
2369
2370 NEXT;
2371 count++;
2372 }
2373 if (RAW == ';') {
2374 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2375 ctxt->input->col++;
2376 ctxt->input->cur++;
2377 }
2378 } else if ((RAW == '&') && (NXT(1) == '#')) {
2379 SKIP(2);
2380 GROW;
2381 while (RAW != ';') { /* loop blocked by count */
2382 if (count++ > 20) {
2383 count = 0;
2384 GROW;
2385 if (ctxt->instate == XML_PARSER_EOF)
2386 return(0);
2387 }
2388 if ((RAW >= '0') && (RAW <= '9'))
2389 val = val * 10 + (CUR - '0');
2390 else {
2391 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2392 val = 0;
2393 break;
2394 }
2395 if (val > 0x110000)
2396 val = 0x110000;
2397
2398 NEXT;
2399 count++;
2400 }
2401 if (RAW == ';') {
2402 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2403 ctxt->input->col++;
2404 ctxt->input->cur++;
2405 }
2406 } else {
2407 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2408 }
2409
2410 /*
2411 * [ WFC: Legal Character ]
2412 * Characters referred to using character references must match the
2413 * production for Char.
2414 */
2415 if (val >= 0x110000) {
2416 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2417 "xmlParseCharRef: character reference out of bounds\n",
2418 val);
2419 } else if (IS_CHAR(val)) {
2420 return(val);
2421 } else {
2422 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2423 "xmlParseCharRef: invalid xmlChar value %d\n",
2424 val);
2425 }
2426 return(0);
2427}
2428
2429/**
2430 * xmlParseStringCharRef:
2431 * @ctxt: an XML parser context
2432 * @str: a pointer to an index in the string
2433 *
2434 * parse Reference declarations, variant parsing from a string rather
2435 * than an an input flow.
2436 *
2437 * [66] CharRef ::= '&#' [0-9]+ ';' |
2438 * '&#x' [0-9a-fA-F]+ ';'
2439 *
2440 * [ WFC: Legal Character ]
2441 * Characters referred to using character references must match the
2442 * production for Char.
2443 *
2444 * Returns the value parsed (as an int), 0 in case of error, str will be
2445 * updated to the current value of the index
2446 */
2447static int
2448xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
2449 const xmlChar *ptr;
2450 xmlChar cur;
2451 int val = 0;
2452
2453 if ((str == NULL) || (*str == NULL)) return(0);
2454 ptr = *str;
2455 cur = *ptr;
2456 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
2457 ptr += 3;
2458 cur = *ptr;
2459 while (cur != ';') { /* Non input consuming loop */
2460 if ((cur >= '0') && (cur <= '9'))
2461 val = val * 16 + (cur - '0');
2462 else if ((cur >= 'a') && (cur <= 'f'))
2463 val = val * 16 + (cur - 'a') + 10;
2464 else if ((cur >= 'A') && (cur <= 'F'))
2465 val = val * 16 + (cur - 'A') + 10;
2466 else {
2467 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2468 val = 0;
2469 break;
2470 }
2471 if (val > 0x110000)
2472 val = 0x110000;
2473
2474 ptr++;
2475 cur = *ptr;
2476 }
2477 if (cur == ';')
2478 ptr++;
2479 } else if ((cur == '&') && (ptr[1] == '#')){
2480 ptr += 2;
2481 cur = *ptr;
2482 while (cur != ';') { /* Non input consuming loops */
2483 if ((cur >= '0') && (cur <= '9'))
2484 val = val * 10 + (cur - '0');
2485 else {
2486 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2487 val = 0;
2488 break;
2489 }
2490 if (val > 0x110000)
2491 val = 0x110000;
2492
2493 ptr++;
2494 cur = *ptr;
2495 }
2496 if (cur == ';')
2497 ptr++;
2498 } else {
2499 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2500 return(0);
2501 }
2502 *str = ptr;
2503
2504 /*
2505 * [ WFC: Legal Character ]
2506 * Characters referred to using character references must match the
2507 * production for Char.
2508 */
2509 if (val >= 0x110000) {
2510 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2511 "xmlParseStringCharRef: character reference out of bounds\n",
2512 val);
2513 } else if (IS_CHAR(val)) {
2514 return(val);
2515 } else {
2516 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2517 "xmlParseStringCharRef: invalid xmlChar value %d\n",
2518 val);
2519 }
2520 return(0);
2521}
2522
2523/**
2524 * xmlParserHandlePEReference:
2525 * @ctxt: the parser context
2526 *
2527 * [69] PEReference ::= '%' Name ';'
2528 *
2529 * [ WFC: No Recursion ]
2530 * A parsed entity must not contain a recursive
2531 * reference to itself, either directly or indirectly.
2532 *
2533 * [ WFC: Entity Declared ]
2534 * In a document without any DTD, a document with only an internal DTD
2535 * subset which contains no parameter entity references, or a document
2536 * with "standalone='yes'", ... ... The declaration of a parameter
2537 * entity must precede any reference to it...
2538 *
2539 * [ VC: Entity Declared ]
2540 * In a document with an external subset or external parameter entities
2541 * with "standalone='no'", ... ... The declaration of a parameter entity
2542 * must precede any reference to it...
2543 *
2544 * [ WFC: In DTD ]
2545 * Parameter-entity references may only appear in the DTD.
2546 * NOTE: misleading but this is handled.
2547 *
2548 * A PEReference may have been detected in the current input stream
2549 * the handling is done accordingly to
2550 * http://www.w3.org/TR/REC-xml#entproc
2551 * i.e.
2552 * - Included in literal in entity values
2553 * - Included as Parameter Entity reference within DTDs
2554 */
2555void
2556xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
2557 switch(ctxt->instate) {
2558 case XML_PARSER_CDATA_SECTION:
2559 return;
2560 case XML_PARSER_COMMENT:
2561 return;
2562 case XML_PARSER_START_TAG:
2563 return;
2564 case XML_PARSER_END_TAG:
2565 return;
2566 case XML_PARSER_EOF:
2567 xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
2568 return;
2569 case XML_PARSER_PROLOG:
2570 case XML_PARSER_START:
2571 case XML_PARSER_MISC:
2572 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
2573 return;
2574 case XML_PARSER_ENTITY_DECL:
2575 case XML_PARSER_CONTENT:
2576 case XML_PARSER_ATTRIBUTE_VALUE:
2577 case XML_PARSER_PI:
2578 case XML_PARSER_SYSTEM_LITERAL:
2579 case XML_PARSER_PUBLIC_LITERAL:
2580 /* we just ignore it there */
2581 return;
2582 case XML_PARSER_EPILOG:
2583 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
2584 return;
2585 case XML_PARSER_ENTITY_VALUE:
2586 /*
2587 * NOTE: in the case of entity values, we don't do the
2588 * substitution here since we need the literal
2589 * entity value to be able to save the internal
2590 * subset of the document.
2591 * This will be handled by xmlStringDecodeEntities
2592 */
2593 return;
2594 case XML_PARSER_DTD:
2595 /*
2596 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
2597 * In the internal DTD subset, parameter-entity references
2598 * can occur only where markup declarations can occur, not
2599 * within markup declarations.
2600 * In that case this is handled in xmlParseMarkupDecl
2601 */
2602 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
2603 return;
2604 if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
2605 return;
2606 break;
2607 case XML_PARSER_IGNORE:
2608 return;
2609 }
2610
2611 xmlParsePEReference(ctxt);
2612}
2613
2614/*
2615 * Macro used to grow the current buffer.
2616 * buffer##_size is expected to be a size_t
2617 * mem_error: is expected to handle memory allocation failures
2618 */
2619#define growBuffer(buffer, n) { \
2620 xmlChar *tmp; \
2621 size_t new_size = buffer##_size * 2 + n; \
2622 if (new_size < buffer##_size) goto mem_error; \
2623 tmp = (xmlChar *) xmlRealloc(buffer, new_size); \
2624 if (tmp == NULL) goto mem_error; \
2625 buffer = tmp; \
2626 buffer##_size = new_size; \
2627}
2628
2629/**
2630 * xmlStringLenDecodeEntities:
2631 * @ctxt: the parser context
2632 * @str: the input string
2633 * @len: the string length
2634 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2635 * @end: an end marker xmlChar, 0 if none
2636 * @end2: an end marker xmlChar, 0 if none
2637 * @end3: an end marker xmlChar, 0 if none
2638 *
2639 * Takes a entity string content and process to do the adequate substitutions.
2640 *
2641 * [67] Reference ::= EntityRef | CharRef
2642 *
2643 * [69] PEReference ::= '%' Name ';'
2644 *
2645 * Returns A newly allocated string with the substitution done. The caller
2646 * must deallocate it !
2647 */
2648xmlChar *
2649xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2650 int what, xmlChar end, xmlChar end2, xmlChar end3) {
2651 xmlChar *buffer = NULL;
2652 size_t buffer_size = 0;
2653 size_t nbchars = 0;
2654
2655 xmlChar *current = NULL;
2656 xmlChar *rep = NULL;
2657 const xmlChar *last;
2658 xmlEntityPtr ent;
2659 int c,l;
2660
2661 if ((ctxt == NULL) || (str == NULL) || (len < 0))
2662 return(NULL);
2663 last = str + len;
2664
2665 if (((ctxt->depth > 40) &&
2666 ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2667 (ctxt->depth > 1024)) {
2668 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2669 return(NULL);
2670 }
2671
2672 /*
2673 * allocate a translation buffer.
2674 */
2675 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
2676 buffer = (xmlChar *) xmlMallocAtomic(buffer_size);
2677 if (buffer == NULL) goto mem_error;
2678
2679 /*
2680 * OK loop until we reach one of the ending char or a size limit.
2681 * we are operating on already parsed values.
2682 */
2683 if (str < last)
2684 c = CUR_SCHAR(str, l);
2685 else
2686 c = 0;
2687 while ((c != 0) && (c != end) && /* non input consuming loop */
2688 (c != end2) && (c != end3) &&
2689 (ctxt->instate != XML_PARSER_EOF)) {
2690
2691 if (c == 0) break;
2692 if ((c == '&') && (str[1] == '#')) {
2693 int val = xmlParseStringCharRef(ctxt, &str);
2694 if (val == 0)
2695 goto int_error;
2696 COPY_BUF(0,buffer,nbchars,val);
2697 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2698 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2699 }
2700 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
2701 if (xmlParserDebugEntities)
2702 xmlGenericError(xmlGenericErrorContext,
2703 "String decoding Entity Reference: %.30s\n",
2704 str);
2705 ent = xmlParseStringEntityRef(ctxt, &str);
2706 xmlParserEntityCheck(ctxt, 0, ent, 0);
2707 if (ent != NULL)
2708 ctxt->nbentities += ent->checked / 2;
2709 if ((ent != NULL) &&
2710 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2711 if (ent->content != NULL) {
2712 COPY_BUF(0,buffer,nbchars,ent->content[0]);
2713 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2714 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2715 }
2716 } else {
2717 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
2718 "predefined entity has no content\n");
2719 goto int_error;
2720 }
2721 } else if ((ent != NULL) && (ent->content != NULL)) {
2722 ctxt->depth++;
2723 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2724 0, 0, 0);
2725 ctxt->depth--;
2726 if (rep == NULL) {
2727 ent->content[0] = 0;
2728 goto int_error;
2729 }
2730
2731 current = rep;
2732 while (*current != 0) { /* non input consuming loop */
2733 buffer[nbchars++] = *current++;
2734 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2735 if (xmlParserEntityCheck(ctxt, nbchars, ent, 0))
2736 goto int_error;
2737 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2738 }
2739 }
2740 xmlFree(rep);
2741 rep = NULL;
2742 } else if (ent != NULL) {
2743 int i = xmlStrlen(ent->name);
2744 const xmlChar *cur = ent->name;
2745
2746 buffer[nbchars++] = '&';
2747 if (nbchars + i + XML_PARSER_BUFFER_SIZE > buffer_size) {
2748 growBuffer(buffer, i + XML_PARSER_BUFFER_SIZE);
2749 }
2750 for (;i > 0;i--)
2751 buffer[nbchars++] = *cur++;
2752 buffer[nbchars++] = ';';
2753 }
2754 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2755 if (xmlParserDebugEntities)
2756 xmlGenericError(xmlGenericErrorContext,
2757 "String decoding PE Reference: %.30s\n", str);
2758 ent = xmlParseStringPEReference(ctxt, &str);
2759 xmlParserEntityCheck(ctxt, 0, ent, 0);
2760 if (ent != NULL)
2761 ctxt->nbentities += ent->checked / 2;
2762 if (ent != NULL) {
2763 if (ent->content == NULL) {
2764 /*
2765 * Note: external parsed entities will not be loaded,
2766 * it is not required for a non-validating parser to
2767 * complete external PEReferences coming from the
2768 * internal subset
2769 */
2770 if (((ctxt->options & XML_PARSE_NOENT) != 0) ||
2771 ((ctxt->options & XML_PARSE_DTDVALID) != 0) ||
2772 (ctxt->validate != 0)) {
2773 xmlLoadEntityContent(ctxt, ent);
2774 } else {
2775 xmlWarningMsg(ctxt, XML_ERR_ENTITY_PROCESSING,
2776 "not validating will not read content for PE entity %s\n",
2777 ent->name, NULL);
2778 }
2779 }
2780 ctxt->depth++;
2781 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2782 0, 0, 0);
2783 ctxt->depth--;
2784 if (rep == NULL) {
2785 if (ent->content != NULL)
2786 ent->content[0] = 0;
2787 goto int_error;
2788 }
2789 current = rep;
2790 while (*current != 0) { /* non input consuming loop */
2791 buffer[nbchars++] = *current++;
2792 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2793 if (xmlParserEntityCheck(ctxt, nbchars, ent, 0))
2794 goto int_error;
2795 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2796 }
2797 }
2798 xmlFree(rep);
2799 rep = NULL;
2800 }
2801 } else {
2802 COPY_BUF(l,buffer,nbchars,c);
2803 str += l;
2804 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2805 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2806 }
2807 }
2808 if (str < last)
2809 c = CUR_SCHAR(str, l);
2810 else
2811 c = 0;
2812 }
2813 buffer[nbchars] = 0;
2814 return(buffer);
2815
2816mem_error:
2817 xmlErrMemory(ctxt, NULL);
2818int_error:
2819 if (rep != NULL)
2820 xmlFree(rep);
2821 if (buffer != NULL)
2822 xmlFree(buffer);
2823 return(NULL);
2824}
2825
2826/**
2827 * xmlStringDecodeEntities:
2828 * @ctxt: the parser context
2829 * @str: the input string
2830 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2831 * @end: an end marker xmlChar, 0 if none
2832 * @end2: an end marker xmlChar, 0 if none
2833 * @end3: an end marker xmlChar, 0 if none
2834 *
2835 * Takes a entity string content and process to do the adequate substitutions.
2836 *
2837 * [67] Reference ::= EntityRef | CharRef
2838 *
2839 * [69] PEReference ::= '%' Name ';'
2840 *
2841 * Returns A newly allocated string with the substitution done. The caller
2842 * must deallocate it !
2843 */
2844xmlChar *
2845xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2846 xmlChar end, xmlChar end2, xmlChar end3) {
2847 if ((ctxt == NULL) || (str == NULL)) return(NULL);
2848 return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what,
2849 end, end2, end3));
2850}
2851
2852/************************************************************************
2853 * *
2854 * Commodity functions, cleanup needed ? *
2855 * *
2856 ************************************************************************/
2857
2858/**
2859 * areBlanks:
2860 * @ctxt: an XML parser context
2861 * @str: a xmlChar *
2862 * @len: the size of @str
2863 * @blank_chars: we know the chars are blanks
2864 *
2865 * Is this a sequence of blank chars that one can ignore ?
2866 *
2867 * Returns 1 if ignorable 0 otherwise.
2868 */
2869
2870static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2871 int blank_chars) {
2872 int i, ret;
2873 xmlNodePtr lastChild;
2874
2875 /*
2876 * Don't spend time trying to differentiate them, the same callback is
2877 * used !
2878 */
2879 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
2880 return(0);
2881
2882 /*
2883 * Check for xml:space value.
2884 */
2885 if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2886 (*(ctxt->space) == -2))
2887 return(0);
2888
2889 /*
2890 * Check that the string is made of blanks
2891 */
2892 if (blank_chars == 0) {
2893 for (i = 0;i < len;i++)
2894 if (!(IS_BLANK_CH(str[i]))) return(0);
2895 }
2896
2897 /*
2898 * Look if the element is mixed content in the DTD if available
2899 */
2900 if (ctxt->node == NULL) return(0);
2901 if (ctxt->myDoc != NULL) {
2902 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2903 if (ret == 0) return(1);
2904 if (ret == 1) return(0);
2905 }
2906
2907 /*
2908 * Otherwise, heuristic :-\
2909 */
2910 if ((RAW != '<') && (RAW != 0xD)) return(0);
2911 if ((ctxt->node->children == NULL) &&
2912 (RAW == '<') && (NXT(1) == '/')) return(0);
2913
2914 lastChild = xmlGetLastChild(ctxt->node);
2915 if (lastChild == NULL) {
2916 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2917 (ctxt->node->content != NULL)) return(0);
2918 } else if (xmlNodeIsText(lastChild))
2919 return(0);
2920 else if ((ctxt->node->children != NULL) &&
2921 (xmlNodeIsText(ctxt->node->children)))
2922 return(0);
2923 return(1);
2924}
2925
2926/************************************************************************
2927 * *
2928 * Extra stuff for namespace support *
2929 * Relates to http://www.w3.org/TR/WD-xml-names *
2930 * *
2931 ************************************************************************/
2932
2933/**
2934 * xmlSplitQName:
2935 * @ctxt: an XML parser context
2936 * @name: an XML parser context
2937 * @prefix: a xmlChar **
2938 *
2939 * parse an UTF8 encoded XML qualified name string
2940 *
2941 * [NS 5] QName ::= (Prefix ':')? LocalPart
2942 *
2943 * [NS 6] Prefix ::= NCName
2944 *
2945 * [NS 7] LocalPart ::= NCName
2946 *
2947 * Returns the local part, and prefix is updated
2948 * to get the Prefix if any.
2949 */
2950
2951xmlChar *
2952xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2953 xmlChar buf[XML_MAX_NAMELEN + 5];
2954 xmlChar *buffer = NULL;
2955 int len = 0;
2956 int max = XML_MAX_NAMELEN;
2957 xmlChar *ret = NULL;
2958 const xmlChar *cur = name;
2959 int c;
2960
2961 if (prefix == NULL) return(NULL);
2962 *prefix = NULL;
2963
2964 if (cur == NULL) return(NULL);
2965
2966#ifndef XML_XML_NAMESPACE
2967 /* xml: prefix is not really a namespace */
2968 if ((cur[0] == 'x') && (cur[1] == 'm') &&
2969 (cur[2] == 'l') && (cur[3] == ':'))
2970 return(xmlStrdup(name));
2971#endif
2972
2973 /* nasty but well=formed */
2974 if (cur[0] == ':')
2975 return(xmlStrdup(name));
2976
2977 c = *cur++;
2978 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2979 buf[len++] = c;
2980 c = *cur++;
2981 }
2982 if (len >= max) {
2983 /*
2984 * Okay someone managed to make a huge name, so he's ready to pay
2985 * for the processing speed.
2986 */
2987 max = len * 2;
2988
2989 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
2990 if (buffer == NULL) {
2991 xmlErrMemory(ctxt, NULL);
2992 return(NULL);
2993 }
2994 memcpy(buffer, buf, len);
2995 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
2996 if (len + 10 > max) {
2997 xmlChar *tmp;
2998
2999 max *= 2;
3000 tmp = (xmlChar *) xmlRealloc(buffer,
3001 max * sizeof(xmlChar));
3002 if (tmp == NULL) {
3003 xmlFree(buffer);
3004 xmlErrMemory(ctxt, NULL);
3005 return(NULL);
3006 }
3007 buffer = tmp;
3008 }
3009 buffer[len++] = c;
3010 c = *cur++;
3011 }
3012 buffer[len] = 0;
3013 }
3014
3015 if ((c == ':') && (*cur == 0)) {
3016 if (buffer != NULL)
3017 xmlFree(buffer);
3018 *prefix = NULL;
3019 return(xmlStrdup(name));
3020 }
3021
3022 if (buffer == NULL)
3023 ret = xmlStrndup(buf, len);
3024 else {
3025 ret = buffer;
3026 buffer = NULL;
3027 max = XML_MAX_NAMELEN;
3028 }
3029
3030
3031 if (c == ':') {
3032 c = *cur;
3033 *prefix = ret;
3034 if (c == 0) {
3035 return(xmlStrndup(BAD_CAST "", 0));
3036 }
3037 len = 0;
3038
3039 /*
3040 * Check that the first character is proper to start
3041 * a new name
3042 */
3043 if (!(((c >= 0x61) && (c <= 0x7A)) ||
3044 ((c >= 0x41) && (c <= 0x5A)) ||
3045 (c == '_') || (c == ':'))) {
3046 int l;
3047 int first = CUR_SCHAR(cur, l);
3048
3049 if (!IS_LETTER(first) && (first != '_')) {
3050 xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
3051 "Name %s is not XML Namespace compliant\n",
3052 name);
3053 }
3054 }
3055 cur++;
3056
3057 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
3058 buf[len++] = c;
3059 c = *cur++;
3060 }
3061 if (len >= max) {
3062 /*
3063 * Okay someone managed to make a huge name, so he's ready to pay
3064 * for the processing speed.
3065 */
3066 max = len * 2;
3067
3068 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3069 if (buffer == NULL) {
3070 xmlErrMemory(ctxt, NULL);
3071 return(NULL);
3072 }
3073 memcpy(buffer, buf, len);
3074 while (c != 0) { /* tested bigname2.xml */
3075 if (len + 10 > max) {
3076 xmlChar *tmp;
3077
3078 max *= 2;
3079 tmp = (xmlChar *) xmlRealloc(buffer,
3080 max * sizeof(xmlChar));
3081 if (tmp == NULL) {
3082 xmlErrMemory(ctxt, NULL);
3083 xmlFree(buffer);
3084 return(NULL);
3085 }
3086 buffer = tmp;
3087 }
3088 buffer[len++] = c;
3089 c = *cur++;
3090 }
3091 buffer[len] = 0;
3092 }
3093
3094 if (buffer == NULL)
3095 ret = xmlStrndup(buf, len);
3096 else {
3097 ret = buffer;
3098 }
3099 }
3100
3101 return(ret);
3102}
3103
3104/************************************************************************
3105 * *
3106 * The parser itself *
3107 * Relates to http://www.w3.org/TR/REC-xml *
3108 * *
3109 ************************************************************************/
3110
3111/************************************************************************
3112 * *
3113 * Routines to parse Name, NCName and NmToken *
3114 * *
3115 ************************************************************************/
3116#ifdef DEBUG
3117static unsigned long nbParseName = 0;
3118static unsigned long nbParseNmToken = 0;
3119static unsigned long nbParseNCName = 0;
3120static unsigned long nbParseNCNameComplex = 0;
3121static unsigned long nbParseNameComplex = 0;
3122static unsigned long nbParseStringName = 0;
3123#endif
3124
3125/*
3126 * The two following functions are related to the change of accepted
3127 * characters for Name and NmToken in the Revision 5 of XML-1.0
3128 * They correspond to the modified production [4] and the new production [4a]
3129 * changes in that revision. Also note that the macros used for the
3130 * productions Letter, Digit, CombiningChar and Extender are not needed
3131 * anymore.
3132 * We still keep compatibility to pre-revision5 parsing semantic if the
3133 * new XML_PARSE_OLD10 option is given to the parser.
3134 */
3135static int
3136xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) {
3137 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3138 /*
3139 * Use the new checks of production [4] [4a] amd [5] of the
3140 * Update 5 of XML-1.0
3141 */
3142 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3143 (((c >= 'a') && (c <= 'z')) ||
3144 ((c >= 'A') && (c <= 'Z')) ||
3145 (c == '_') || (c == ':') ||
3146 ((c >= 0xC0) && (c <= 0xD6)) ||
3147 ((c >= 0xD8) && (c <= 0xF6)) ||
3148 ((c >= 0xF8) && (c <= 0x2FF)) ||
3149 ((c >= 0x370) && (c <= 0x37D)) ||
3150 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3151 ((c >= 0x200C) && (c <= 0x200D)) ||
3152 ((c >= 0x2070) && (c <= 0x218F)) ||
3153 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3154 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3155 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3156 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3157 ((c >= 0x10000) && (c <= 0xEFFFF))))
3158 return(1);
3159 } else {
3160 if (IS_LETTER(c) || (c == '_') || (c == ':'))
3161 return(1);
3162 }
3163 return(0);
3164}
3165
3166static int
3167xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) {
3168 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3169 /*
3170 * Use the new checks of production [4] [4a] amd [5] of the
3171 * Update 5 of XML-1.0
3172 */
3173 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3174 (((c >= 'a') && (c <= 'z')) ||
3175 ((c >= 'A') && (c <= 'Z')) ||
3176 ((c >= '0') && (c <= '9')) || /* !start */
3177 (c == '_') || (c == ':') ||
3178 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3179 ((c >= 0xC0) && (c <= 0xD6)) ||
3180 ((c >= 0xD8) && (c <= 0xF6)) ||
3181 ((c >= 0xF8) && (c <= 0x2FF)) ||
3182 ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3183 ((c >= 0x370) && (c <= 0x37D)) ||
3184 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3185 ((c >= 0x200C) && (c <= 0x200D)) ||
3186 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3187 ((c >= 0x2070) && (c <= 0x218F)) ||
3188 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3189 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3190 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3191 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3192 ((c >= 0x10000) && (c <= 0xEFFFF))))
3193 return(1);
3194 } else {
3195 if ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3196 (c == '.') || (c == '-') ||
3197 (c == '_') || (c == ':') ||
3198 (IS_COMBINING(c)) ||
3199 (IS_EXTENDER(c)))
3200 return(1);
3201 }
3202 return(0);
3203}
3204
3205static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
3206 int *len, int *alloc, int normalize);
3207
3208static const xmlChar *
3209xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
3210 int len = 0, l;
3211 int c;
3212 int count = 0;
3213
3214#ifdef DEBUG
3215 nbParseNameComplex++;
3216#endif
3217
3218 /*
3219 * Handler for more complex cases
3220 */
3221 GROW;
3222 if (ctxt->instate == XML_PARSER_EOF)
3223 return(NULL);
3224 c = CUR_CHAR(l);
3225 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3226 /*
3227 * Use the new checks of production [4] [4a] amd [5] of the
3228 * Update 5 of XML-1.0
3229 */
3230 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3231 (!(((c >= 'a') && (c <= 'z')) ||
3232 ((c >= 'A') && (c <= 'Z')) ||
3233 (c == '_') || (c == ':') ||
3234 ((c >= 0xC0) && (c <= 0xD6)) ||
3235 ((c >= 0xD8) && (c <= 0xF6)) ||
3236 ((c >= 0xF8) && (c <= 0x2FF)) ||
3237 ((c >= 0x370) && (c <= 0x37D)) ||
3238 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3239 ((c >= 0x200C) && (c <= 0x200D)) ||
3240 ((c >= 0x2070) && (c <= 0x218F)) ||
3241 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3242 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3243 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3244 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3245 ((c >= 0x10000) && (c <= 0xEFFFF))))) {
3246 return(NULL);
3247 }
3248 len += l;
3249 NEXTL(l);
3250 c = CUR_CHAR(l);
3251 while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3252 (((c >= 'a') && (c <= 'z')) ||
3253 ((c >= 'A') && (c <= 'Z')) ||
3254 ((c >= '0') && (c <= '9')) || /* !start */
3255 (c == '_') || (c == ':') ||
3256 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3257 ((c >= 0xC0) && (c <= 0xD6)) ||
3258 ((c >= 0xD8) && (c <= 0xF6)) ||
3259 ((c >= 0xF8) && (c <= 0x2FF)) ||
3260 ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3261 ((c >= 0x370) && (c <= 0x37D)) ||
3262 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3263 ((c >= 0x200C) && (c <= 0x200D)) ||
3264 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3265 ((c >= 0x2070) && (c <= 0x218F)) ||
3266 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3267 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3268 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3269 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3270 ((c >= 0x10000) && (c <= 0xEFFFF))
3271 )) {
3272 if (count++ > XML_PARSER_CHUNK_SIZE) {
3273 count = 0;
3274 GROW;
3275 if (ctxt->instate == XML_PARSER_EOF)
3276 return(NULL);
3277 }
3278 len += l;
3279 NEXTL(l);
3280 c = CUR_CHAR(l);
3281 }
3282 } else {
3283 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3284 (!IS_LETTER(c) && (c != '_') &&
3285 (c != ':'))) {
3286 return(NULL);
3287 }
3288 len += l;
3289 NEXTL(l);
3290 c = CUR_CHAR(l);
3291
3292 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3293 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3294 (c == '.') || (c == '-') ||
3295 (c == '_') || (c == ':') ||
3296 (IS_COMBINING(c)) ||
3297 (IS_EXTENDER(c)))) {
3298 if (count++ > XML_PARSER_CHUNK_SIZE) {
3299 count = 0;
3300 GROW;
3301 if (ctxt->instate == XML_PARSER_EOF)
3302 return(NULL);
3303 }
3304 len += l;
3305 NEXTL(l);
3306 c = CUR_CHAR(l);
3307 }
3308 }
3309 if ((len > XML_MAX_NAME_LENGTH) &&
3310 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3311 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3312 return(NULL);
3313 }
3314 if (ctxt->input->cur - ctxt->input->base < len) {
3315 /*
3316 * There were a couple of bugs where PERefs lead to to a change
3317 * of the buffer. Check the buffer size to avoid passing an invalid
3318 * pointer to xmlDictLookup.
3319 */
3320 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
3321 "unexpected change of input buffer");
3322 return (NULL);
3323 }
3324 if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
3325 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
3326 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3327}
3328
3329/**
3330 * xmlParseName:
3331 * @ctxt: an XML parser context
3332 *
3333 * parse an XML name.
3334 *
3335 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3336 * CombiningChar | Extender
3337 *
3338 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3339 *
3340 * [6] Names ::= Name (#x20 Name)*
3341 *
3342 * Returns the Name parsed or NULL
3343 */
3344
3345const xmlChar *
3346xmlParseName(xmlParserCtxtPtr ctxt) {
3347 const xmlChar *in;
3348 const xmlChar *ret;
3349 int count = 0;
3350
3351 GROW;
3352
3353#ifdef DEBUG
3354 nbParseName++;
3355#endif
3356
3357 /*
3358 * Accelerator for simple ASCII names
3359 */
3360 in = ctxt->input->cur;
3361 if (((*in >= 0x61) && (*in <= 0x7A)) ||
3362 ((*in >= 0x41) && (*in <= 0x5A)) ||
3363 (*in == '_') || (*in == ':')) {
3364 in++;
3365 while (((*in >= 0x61) && (*in <= 0x7A)) ||
3366 ((*in >= 0x41) && (*in <= 0x5A)) ||
3367 ((*in >= 0x30) && (*in <= 0x39)) ||
3368 (*in == '_') || (*in == '-') ||
3369 (*in == ':') || (*in == '.'))
3370 in++;
3371 if ((*in > 0) && (*in < 0x80)) {
3372 count = in - ctxt->input->cur;
3373 if ((count > XML_MAX_NAME_LENGTH) &&
3374 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3375 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3376 return(NULL);
3377 }
3378 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3379 ctxt->input->cur = in;
3380 ctxt->input->col += count;
3381 if (ret == NULL)
3382 xmlErrMemory(ctxt, NULL);
3383 return(ret);
3384 }
3385 }
3386 /* accelerator for special cases */
3387 return(xmlParseNameComplex(ctxt));
3388}
3389
3390static const xmlChar *
3391xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
3392 int len = 0, l;
3393 int c;
3394 int count = 0;
3395 size_t startPosition = 0;
3396
3397#ifdef DEBUG
3398 nbParseNCNameComplex++;
3399#endif
3400
3401 /*
3402 * Handler for more complex cases
3403 */
3404 GROW;
3405 startPosition = CUR_PTR - BASE_PTR;
3406 c = CUR_CHAR(l);
3407 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3408 (!xmlIsNameStartChar(ctxt, c) || (c == ':'))) {
3409 return(NULL);
3410 }
3411
3412 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3413 (xmlIsNameChar(ctxt, c) && (c != ':'))) {
3414 if (count++ > XML_PARSER_CHUNK_SIZE) {
3415 if ((len > XML_MAX_NAME_LENGTH) &&
3416 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3417 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3418 return(NULL);
3419 }
3420 count = 0;
3421 GROW;
3422 if (ctxt->instate == XML_PARSER_EOF)
3423 return(NULL);
3424 }
3425 len += l;
3426 NEXTL(l);
3427 c = CUR_CHAR(l);
3428 if (c == 0) {
3429 count = 0;
3430 /*
3431 * when shrinking to extend the buffer we really need to preserve
3432 * the part of the name we already parsed. Hence rolling back
3433 * by current length.
3434 */
3435 ctxt->input->cur -= l;
3436 GROW;
3437 if (ctxt->instate == XML_PARSER_EOF)
3438 return(NULL);
3439 ctxt->input->cur += l;
3440 c = CUR_CHAR(l);
3441 }
3442 }
3443 if ((len > XML_MAX_NAME_LENGTH) &&
3444 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3445 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3446 return(NULL);
3447 }
3448 return(xmlDictLookup(ctxt->dict, (BASE_PTR + startPosition), len));
3449}
3450
3451/**
3452 * xmlParseNCName:
3453 * @ctxt: an XML parser context
3454 * @len: length of the string parsed
3455 *
3456 * parse an XML name.
3457 *
3458 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3459 * CombiningChar | Extender
3460 *
3461 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
3462 *
3463 * Returns the Name parsed or NULL
3464 */
3465
3466static const xmlChar *
3467xmlParseNCName(xmlParserCtxtPtr ctxt) {
3468 const xmlChar *in, *e;
3469 const xmlChar *ret;
3470 int count = 0;
3471
3472#ifdef DEBUG
3473 nbParseNCName++;
3474#endif
3475
3476 /*
3477 * Accelerator for simple ASCII names
3478 */
3479 in = ctxt->input->cur;
3480 e = ctxt->input->end;
3481 if ((((*in >= 0x61) && (*in <= 0x7A)) ||
3482 ((*in >= 0x41) && (*in <= 0x5A)) ||
3483 (*in == '_')) && (in < e)) {
3484 in++;
3485 while ((((*in >= 0x61) && (*in <= 0x7A)) ||
3486 ((*in >= 0x41) && (*in <= 0x5A)) ||
3487 ((*in >= 0x30) && (*in <= 0x39)) ||
3488 (*in == '_') || (*in == '-') ||
3489 (*in == '.')) && (in < e))
3490 in++;
3491 if (in >= e)
3492 goto complex;
3493 if ((*in > 0) && (*in < 0x80)) {
3494 count = in - ctxt->input->cur;
3495 if ((count > XML_MAX_NAME_LENGTH) &&
3496 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3497 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3498 return(NULL);
3499 }
3500 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3501 ctxt->input->cur = in;
3502 ctxt->input->col += count;
3503 if (ret == NULL) {
3504 xmlErrMemory(ctxt, NULL);
3505 }
3506 return(ret);
3507 }
3508 }
3509complex:
3510 return(xmlParseNCNameComplex(ctxt));
3511}
3512
3513/**
3514 * xmlParseNameAndCompare:
3515 * @ctxt: an XML parser context
3516 *
3517 * parse an XML name and compares for match
3518 * (specialized for endtag parsing)
3519 *
3520 * Returns NULL for an illegal name, (xmlChar*) 1 for success
3521 * and the name for mismatch
3522 */
3523
3524static const xmlChar *
3525xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
3526 register const xmlChar *cmp = other;
3527 register const xmlChar *in;
3528 const xmlChar *ret;
3529
3530 GROW;
3531 if (ctxt->instate == XML_PARSER_EOF)
3532 return(NULL);
3533
3534 in = ctxt->input->cur;
3535 while (*in != 0 && *in == *cmp) {
3536 ++in;
3537 ++cmp;
3538 }
3539 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
3540 /* success */
3541 ctxt->input->col += in - ctxt->input->cur;
3542 ctxt->input->cur = in;
3543 return (const xmlChar*) 1;
3544 }
3545 /* failure (or end of input buffer), check with full function */
3546 ret = xmlParseName (ctxt);
3547 /* strings coming from the dictionary direct compare possible */
3548 if (ret == other) {
3549 return (const xmlChar*) 1;
3550 }
3551 return ret;
3552}
3553
3554/**
3555 * xmlParseStringName:
3556 * @ctxt: an XML parser context
3557 * @str: a pointer to the string pointer (IN/OUT)
3558 *
3559 * parse an XML name.
3560 *
3561 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3562 * CombiningChar | Extender
3563 *
3564 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3565 *
3566 * [6] Names ::= Name (#x20 Name)*
3567 *
3568 * Returns the Name parsed or NULL. The @str pointer
3569 * is updated to the current location in the string.
3570 */
3571
3572static xmlChar *
3573xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
3574 xmlChar buf[XML_MAX_NAMELEN + 5];
3575 const xmlChar *cur = *str;
3576 int len = 0, l;
3577 int c;
3578
3579#ifdef DEBUG
3580 nbParseStringName++;
3581#endif
3582
3583 c = CUR_SCHAR(cur, l);
3584 if (!xmlIsNameStartChar(ctxt, c)) {
3585 return(NULL);
3586 }
3587
3588 COPY_BUF(l,buf,len,c);
3589 cur += l;
3590 c = CUR_SCHAR(cur, l);
3591 while (xmlIsNameChar(ctxt, c)) {
3592 COPY_BUF(l,buf,len,c);
3593 cur += l;
3594 c = CUR_SCHAR(cur, l);
3595 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
3596 /*
3597 * Okay someone managed to make a huge name, so he's ready to pay
3598 * for the processing speed.
3599 */
3600 xmlChar *buffer;
3601 int max = len * 2;
3602
3603 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3604 if (buffer == NULL) {
3605 xmlErrMemory(ctxt, NULL);
3606 return(NULL);
3607 }
3608 memcpy(buffer, buf, len);
3609 while (xmlIsNameChar(ctxt, c)) {
3610 if (len + 10 > max) {
3611 xmlChar *tmp;
3612
3613 if ((len > XML_MAX_NAME_LENGTH) &&
3614 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3615 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3616 xmlFree(buffer);
3617 return(NULL);
3618 }
3619 max *= 2;
3620 tmp = (xmlChar *) xmlRealloc(buffer,
3621 max * sizeof(xmlChar));
3622 if (tmp == NULL) {
3623 xmlErrMemory(ctxt, NULL);
3624 xmlFree(buffer);
3625 return(NULL);
3626 }
3627 buffer = tmp;
3628 }
3629 COPY_BUF(l,buffer,len,c);
3630 cur += l;
3631 c = CUR_SCHAR(cur, l);
3632 }
3633 buffer[len] = 0;
3634 *str = cur;
3635 return(buffer);
3636 }
3637 }
3638 if ((len > XML_MAX_NAME_LENGTH) &&
3639 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3640 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3641 return(NULL);
3642 }
3643 *str = cur;
3644 return(xmlStrndup(buf, len));
3645}
3646
3647/**
3648 * xmlParseNmtoken:
3649 * @ctxt: an XML parser context
3650 *
3651 * parse an XML Nmtoken.
3652 *
3653 * [7] Nmtoken ::= (NameChar)+
3654 *
3655 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
3656 *
3657 * Returns the Nmtoken parsed or NULL
3658 */
3659
3660xmlChar *
3661xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
3662 xmlChar buf[XML_MAX_NAMELEN + 5];
3663 int len = 0, l;
3664 int c;
3665 int count = 0;
3666
3667#ifdef DEBUG
3668 nbParseNmToken++;
3669#endif
3670
3671 GROW;
3672 if (ctxt->instate == XML_PARSER_EOF)
3673 return(NULL);
3674 c = CUR_CHAR(l);
3675
3676 while (xmlIsNameChar(ctxt, c)) {
3677 if (count++ > XML_PARSER_CHUNK_SIZE) {
3678 count = 0;
3679 GROW;
3680 }
3681 COPY_BUF(l,buf,len,c);
3682 NEXTL(l);
3683 c = CUR_CHAR(l);
3684 if (c == 0) {
3685 count = 0;
3686 GROW;
3687 if (ctxt->instate == XML_PARSER_EOF)
3688 return(NULL);
3689 c = CUR_CHAR(l);
3690 }
3691 if (len >= XML_MAX_NAMELEN) {
3692 /*
3693 * Okay someone managed to make a huge token, so he's ready to pay
3694 * for the processing speed.
3695 */
3696 xmlChar *buffer;
3697 int max = len * 2;
3698
3699 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3700 if (buffer == NULL) {
3701 xmlErrMemory(ctxt, NULL);
3702 return(NULL);
3703 }
3704 memcpy(buffer, buf, len);
3705 while (xmlIsNameChar(ctxt, c)) {
3706 if (count++ > XML_PARSER_CHUNK_SIZE) {
3707 count = 0;
3708 GROW;
3709 if (ctxt->instate == XML_PARSER_EOF) {
3710 xmlFree(buffer);
3711 return(NULL);
3712 }
3713 }
3714 if (len + 10 > max) {
3715 xmlChar *tmp;
3716
3717 if ((max > XML_MAX_NAME_LENGTH) &&
3718 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3719 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3720 xmlFree(buffer);
3721 return(NULL);
3722 }
3723 max *= 2;
3724 tmp = (xmlChar *) xmlRealloc(buffer,
3725 max * sizeof(xmlChar));
3726 if (tmp == NULL) {
3727 xmlErrMemory(ctxt, NULL);
3728 xmlFree(buffer);
3729 return(NULL);
3730 }
3731 buffer = tmp;
3732 }
3733 COPY_BUF(l,buffer,len,c);
3734 NEXTL(l);
3735 c = CUR_CHAR(l);
3736 }
3737 buffer[len] = 0;
3738 return(buffer);
3739 }
3740 }
3741 if (len == 0)
3742 return(NULL);
3743 if ((len > XML_MAX_NAME_LENGTH) &&
3744 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3745 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3746 return(NULL);
3747 }
3748 return(xmlStrndup(buf, len));
3749}
3750
3751/**
3752 * xmlParseEntityValue:
3753 * @ctxt: an XML parser context
3754 * @orig: if non-NULL store a copy of the original entity value
3755 *
3756 * parse a value for ENTITY declarations
3757 *
3758 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3759 * "'" ([^%&'] | PEReference | Reference)* "'"
3760 *
3761 * Returns the EntityValue parsed with reference substituted or NULL
3762 */
3763
3764xmlChar *
3765xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
3766 xmlChar *buf = NULL;
3767 int len = 0;
3768 int size = XML_PARSER_BUFFER_SIZE;
3769 int c, l;
3770 xmlChar stop;
3771 xmlChar *ret = NULL;
3772 const xmlChar *cur = NULL;
3773 xmlParserInputPtr input;
3774
3775 if (RAW == '"') stop = '"';
3776 else if (RAW == '\'') stop = '\'';
3777 else {
3778 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
3779 return(NULL);
3780 }
3781 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
3782 if (buf == NULL) {
3783 xmlErrMemory(ctxt, NULL);
3784 return(NULL);
3785 }
3786
3787 /*
3788 * The content of the entity definition is copied in a buffer.
3789 */
3790
3791 ctxt->instate = XML_PARSER_ENTITY_VALUE;
3792 input = ctxt->input;
3793 GROW;
3794 if (ctxt->instate == XML_PARSER_EOF)
3795 goto error;
3796 NEXT;
3797 c = CUR_CHAR(l);
3798 /*
3799 * NOTE: 4.4.5 Included in Literal
3800 * When a parameter entity reference appears in a literal entity
3801 * value, ... a single or double quote character in the replacement
3802 * text is always treated as a normal data character and will not
3803 * terminate the literal.
3804 * In practice it means we stop the loop only when back at parsing
3805 * the initial entity and the quote is found
3806 */
3807 while (((IS_CHAR(c)) && ((c != stop) || /* checked */
3808 (ctxt->input != input))) && (ctxt->instate != XML_PARSER_EOF)) {
3809 if (len + 5 >= size) {
3810 xmlChar *tmp;
3811
3812 size *= 2;
3813 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3814 if (tmp == NULL) {
3815 xmlErrMemory(ctxt, NULL);
3816 goto error;
3817 }
3818 buf = tmp;
3819 }
3820 COPY_BUF(l,buf,len,c);
3821 NEXTL(l);
3822
3823 GROW;
3824 c = CUR_CHAR(l);
3825 if (c == 0) {
3826 GROW;
3827 c = CUR_CHAR(l);
3828 }
3829 }
3830 buf[len] = 0;
3831 if (ctxt->instate == XML_PARSER_EOF)
3832 goto error;
3833 if (c != stop) {
3834 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
3835 goto error;
3836 }
3837 NEXT;
3838
3839 /*
3840 * Raise problem w.r.t. '&' and '%' being used in non-entities
3841 * reference constructs. Note Charref will be handled in
3842 * xmlStringDecodeEntities()
3843 */
3844 cur = buf;
3845 while (*cur != 0) { /* non input consuming */
3846 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
3847 xmlChar *name;
3848 xmlChar tmp = *cur;
3849 int nameOk = 0;
3850
3851 cur++;
3852 name = xmlParseStringName(ctxt, &cur);
3853 if (name != NULL) {
3854 nameOk = 1;
3855 xmlFree(name);
3856 }
3857 if ((nameOk == 0) || (*cur != ';')) {
3858 xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
3859 "EntityValue: '%c' forbidden except for entities references\n",
3860 tmp);
3861 goto error;
3862 }
3863 if ((tmp == '%') && (ctxt->inSubset == 1) &&
3864 (ctxt->inputNr == 1)) {
3865 xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
3866 goto error;
3867 }
3868 if (*cur == 0)
3869 break;
3870 }
3871 cur++;
3872 }
3873
3874 /*
3875 * Then PEReference entities are substituted.
3876 *
3877 * NOTE: 4.4.7 Bypassed
3878 * When a general entity reference appears in the EntityValue in
3879 * an entity declaration, it is bypassed and left as is.
3880 * so XML_SUBSTITUTE_REF is not set here.
3881 */
3882 ++ctxt->depth;
3883 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
3884 0, 0, 0);
3885 --ctxt->depth;
3886 if (orig != NULL) {
3887 *orig = buf;
3888 buf = NULL;
3889 }
3890
3891error:
3892 if (buf != NULL)
3893 xmlFree(buf);
3894 return(ret);
3895}
3896
3897/**
3898 * xmlParseAttValueComplex:
3899 * @ctxt: an XML parser context
3900 * @len: the resulting attribute len
3901 * @normalize: whether to apply the inner normalization
3902 *
3903 * parse a value for an attribute, this is the fallback function
3904 * of xmlParseAttValue() when the attribute parsing requires handling
3905 * of non-ASCII characters, or normalization compaction.
3906 *
3907 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3908 */
3909static xmlChar *
3910xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
3911 xmlChar limit = 0;
3912 xmlChar *buf = NULL;
3913 xmlChar *rep = NULL;
3914 size_t len = 0;
3915 size_t buf_size = 0;
3916 int c, l, in_space = 0;
3917 xmlChar *current = NULL;
3918 xmlEntityPtr ent;
3919
3920 if (NXT(0) == '"') {
3921 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3922 limit = '"';
3923 NEXT;
3924 } else if (NXT(0) == '\'') {
3925 limit = '\'';
3926 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3927 NEXT;
3928 } else {
3929 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
3930 return(NULL);
3931 }
3932
3933 /*
3934 * allocate a translation buffer.
3935 */
3936 buf_size = XML_PARSER_BUFFER_SIZE;
3937 buf = (xmlChar *) xmlMallocAtomic(buf_size);
3938 if (buf == NULL) goto mem_error;
3939
3940 /*
3941 * OK loop until we reach one of the ending char or a size limit.
3942 */
3943 c = CUR_CHAR(l);
3944 while (((NXT(0) != limit) && /* checked */
3945 (IS_CHAR(c)) && (c != '<')) &&
3946 (ctxt->instate != XML_PARSER_EOF)) {
3947 /*
3948 * Impose a reasonable limit on attribute size, unless XML_PARSE_HUGE
3949 * special option is given
3950 */
3951 if ((len > XML_MAX_TEXT_LENGTH) &&
3952 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3953 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
3954 "AttValue length too long\n");
3955 goto mem_error;
3956 }
3957 if (c == '&') {
3958 in_space = 0;
3959 if (NXT(1) == '#') {
3960 int val = xmlParseCharRef(ctxt);
3961
3962 if (val == '&') {
3963 if (ctxt->replaceEntities) {
3964 if (len + 10 > buf_size) {
3965 growBuffer(buf, 10);
3966 }
3967 buf[len++] = '&';
3968 } else {
3969 /*
3970 * The reparsing will be done in xmlStringGetNodeList()
3971 * called by the attribute() function in SAX.c
3972 */
3973 if (len + 10 > buf_size) {
3974 growBuffer(buf, 10);
3975 }
3976 buf[len++] = '&';
3977 buf[len++] = '#';
3978 buf[len++] = '3';
3979 buf[len++] = '8';
3980 buf[len++] = ';';
3981 }
3982 } else if (val != 0) {
3983 if (len + 10 > buf_size) {
3984 growBuffer(buf, 10);
3985 }
3986 len += xmlCopyChar(0, &buf[len], val);
3987 }
3988 } else {
3989 ent = xmlParseEntityRef(ctxt);
3990 ctxt->nbentities++;
3991 if (ent != NULL)
3992 ctxt->nbentities += ent->owner;
3993 if ((ent != NULL) &&
3994 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
3995 if (len + 10 > buf_size) {
3996 growBuffer(buf, 10);
3997 }
3998 if ((ctxt->replaceEntities == 0) &&
3999 (ent->content[0] == '&')) {
4000 buf[len++] = '&';
4001 buf[len++] = '#';
4002 buf[len++] = '3';
4003 buf[len++] = '8';
4004 buf[len++] = ';';
4005 } else {
4006 buf[len++] = ent->content[0];
4007 }
4008 } else if ((ent != NULL) &&
4009 (ctxt->replaceEntities != 0)) {
4010 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
4011 ++ctxt->depth;
4012 rep = xmlStringDecodeEntities(ctxt, ent->content,
4013 XML_SUBSTITUTE_REF,
4014 0, 0, 0);
4015 --ctxt->depth;
4016 if (rep != NULL) {
4017 current = rep;
4018 while (*current != 0) { /* non input consuming */
4019 if ((*current == 0xD) || (*current == 0xA) ||
4020 (*current == 0x9)) {
4021 buf[len++] = 0x20;
4022 current++;
4023 } else
4024 buf[len++] = *current++;
4025 if (len + 10 > buf_size) {
4026 growBuffer(buf, 10);
4027 }
4028 }
4029 xmlFree(rep);
4030 rep = NULL;
4031 }
4032 } else {
4033 if (len + 10 > buf_size) {
4034 growBuffer(buf, 10);
4035 }
4036 if (ent->content != NULL)
4037 buf[len++] = ent->content[0];
4038 }
4039 } else if (ent != NULL) {
4040 int i = xmlStrlen(ent->name);
4041 const xmlChar *cur = ent->name;
4042
4043 /*
4044 * This may look absurd but is needed to detect
4045 * entities problems
4046 */
4047 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
4048 (ent->content != NULL) && (ent->checked == 0)) {
4049 unsigned long oldnbent = ctxt->nbentities, diff;
4050
4051 ++ctxt->depth;
4052 rep = xmlStringDecodeEntities(ctxt, ent->content,
4053 XML_SUBSTITUTE_REF, 0, 0, 0);
4054 --ctxt->depth;
4055
4056 diff = ctxt->nbentities - oldnbent + 1;
4057 if (diff > INT_MAX / 2)
4058 diff = INT_MAX / 2;
4059 ent->checked = diff * 2;
4060 if (rep != NULL) {
4061 if (xmlStrchr(rep, '<'))
4062 ent->checked |= 1;
4063 xmlFree(rep);
4064 rep = NULL;
4065 } else {
4066 ent->content[0] = 0;
4067 }
4068 }
4069
4070 /*
4071 * Just output the reference
4072 */
4073 buf[len++] = '&';
4074 while (len + i + 10 > buf_size) {
4075 growBuffer(buf, i + 10);
4076 }
4077 for (;i > 0;i--)
4078 buf[len++] = *cur++;
4079 buf[len++] = ';';
4080 }
4081 }
4082 } else {
4083 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
4084 if ((len != 0) || (!normalize)) {
4085 if ((!normalize) || (!in_space)) {
4086 COPY_BUF(l,buf,len,0x20);
4087 while (len + 10 > buf_size) {
4088 growBuffer(buf, 10);
4089 }
4090 }
4091 in_space = 1;
4092 }
4093 } else {
4094 in_space = 0;
4095 COPY_BUF(l,buf,len,c);
4096 if (len + 10 > buf_size) {
4097 growBuffer(buf, 10);
4098 }
4099 }
4100 NEXTL(l);
4101 }
4102 GROW;
4103 c = CUR_CHAR(l);
4104 }
4105 if (ctxt->instate == XML_PARSER_EOF)
4106 goto error;
4107
4108 if ((in_space) && (normalize)) {
4109 while ((len > 0) && (buf[len - 1] == 0x20)) len--;
4110 }
4111 buf[len] = 0;
4112 if (RAW == '<') {
4113 xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
4114 } else if (RAW != limit) {
4115 if ((c != 0) && (!IS_CHAR(c))) {
4116 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
4117 "invalid character in attribute value\n");
4118 } else {
4119 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4120 "AttValue: ' expected\n");
4121 }
4122 } else
4123 NEXT;
4124
4125 /*
4126 * There we potentially risk an overflow, don't allow attribute value of
4127 * length more than INT_MAX it is a very reasonable assumption !
4128 */
4129 if (len >= INT_MAX) {
4130 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4131 "AttValue length too long\n");
4132 goto mem_error;
4133 }
4134
4135 if (attlen != NULL) *attlen = (int) len;
4136 return(buf);
4137
4138mem_error:
4139 xmlErrMemory(ctxt, NULL);
4140error:
4141 if (buf != NULL)
4142 xmlFree(buf);
4143 if (rep != NULL)
4144 xmlFree(rep);
4145 return(NULL);
4146}
4147
4148/**
4149 * xmlParseAttValue:
4150 * @ctxt: an XML parser context
4151 *
4152 * parse a value for an attribute
4153 * Note: the parser won't do substitution of entities here, this
4154 * will be handled later in xmlStringGetNodeList
4155 *
4156 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
4157 * "'" ([^<&'] | Reference)* "'"
4158 *
4159 * 3.3.3 Attribute-Value Normalization:
4160 * Before the value of an attribute is passed to the application or
4161 * checked for validity, the XML processor must normalize it as follows:
4162 * - a character reference is processed by appending the referenced
4163 * character to the attribute value
4164 * - an entity reference is processed by recursively processing the
4165 * replacement text of the entity
4166 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
4167 * appending #x20 to the normalized value, except that only a single
4168 * #x20 is appended for a "#xD#xA" sequence that is part of an external
4169 * parsed entity or the literal entity value of an internal parsed entity
4170 * - other characters are processed by appending them to the normalized value
4171 * If the declared value is not CDATA, then the XML processor must further
4172 * process the normalized attribute value by discarding any leading and
4173 * trailing space (#x20) characters, and by replacing sequences of space
4174 * (#x20) characters by a single space (#x20) character.
4175 * All attributes for which no declaration has been read should be treated
4176 * by a non-validating parser as if declared CDATA.
4177 *
4178 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
4179 */
4180
4181
4182xmlChar *
4183xmlParseAttValue(xmlParserCtxtPtr ctxt) {
4184 if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
4185 return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
4186}
4187
4188/**
4189 * xmlParseSystemLiteral:
4190 * @ctxt: an XML parser context
4191 *
4192 * parse an XML Literal
4193 *
4194 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
4195 *
4196 * Returns the SystemLiteral parsed or NULL
4197 */
4198
4199xmlChar *
4200xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
4201 xmlChar *buf = NULL;
4202 int len = 0;
4203 int size = XML_PARSER_BUFFER_SIZE;
4204 int cur, l;
4205 xmlChar stop;
4206 int state = ctxt->instate;
4207 int count = 0;
4208
4209 SHRINK;
4210 if (RAW == '"') {
4211 NEXT;
4212 stop = '"';
4213 } else if (RAW == '\'') {
4214 NEXT;
4215 stop = '\'';
4216 } else {
4217 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4218 return(NULL);
4219 }
4220
4221 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4222 if (buf == NULL) {
4223 xmlErrMemory(ctxt, NULL);
4224 return(NULL);
4225 }
4226 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
4227 cur = CUR_CHAR(l);
4228 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
4229 if (len + 5 >= size) {
4230 xmlChar *tmp;
4231
4232 if ((size > XML_MAX_NAME_LENGTH) &&
4233 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4234 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "SystemLiteral");
4235 xmlFree(buf);
4236 ctxt->instate = (xmlParserInputState) state;
4237 return(NULL);
4238 }
4239 size *= 2;
4240 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4241 if (tmp == NULL) {
4242 xmlFree(buf);
4243 xmlErrMemory(ctxt, NULL);
4244 ctxt->instate = (xmlParserInputState) state;
4245 return(NULL);
4246 }
4247 buf = tmp;
4248 }
4249 count++;
4250 if (count > 50) {
4251 SHRINK;
4252 GROW;
4253 count = 0;
4254 if (ctxt->instate == XML_PARSER_EOF) {
4255 xmlFree(buf);
4256 return(NULL);
4257 }
4258 }
4259 COPY_BUF(l,buf,len,cur);
4260 NEXTL(l);
4261 cur = CUR_CHAR(l);
4262 if (cur == 0) {
4263 GROW;
4264 SHRINK;
4265 cur = CUR_CHAR(l);
4266 }
4267 }
4268 buf[len] = 0;
4269 ctxt->instate = (xmlParserInputState) state;
4270 if (!IS_CHAR(cur)) {
4271 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4272 } else {
4273 NEXT;
4274 }
4275 return(buf);
4276}
4277
4278/**
4279 * xmlParsePubidLiteral:
4280 * @ctxt: an XML parser context
4281 *
4282 * parse an XML public literal
4283 *
4284 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
4285 *
4286 * Returns the PubidLiteral parsed or NULL.
4287 */
4288
4289xmlChar *
4290xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
4291 xmlChar *buf = NULL;
4292 int len = 0;
4293 int size = XML_PARSER_BUFFER_SIZE;
4294 xmlChar cur;
4295 xmlChar stop;
4296 int count = 0;
4297 xmlParserInputState oldstate = ctxt->instate;
4298
4299 SHRINK;
4300 if (RAW == '"') {
4301 NEXT;
4302 stop = '"';
4303 } else if (RAW == '\'') {
4304 NEXT;
4305 stop = '\'';
4306 } else {
4307 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4308 return(NULL);
4309 }
4310 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4311 if (buf == NULL) {
4312 xmlErrMemory(ctxt, NULL);
4313 return(NULL);
4314 }
4315 ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
4316 cur = CUR;
4317 while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
4318 if (len + 1 >= size) {
4319 xmlChar *tmp;
4320
4321 if ((size > XML_MAX_NAME_LENGTH) &&
4322 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4323 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Public ID");
4324 xmlFree(buf);
4325 return(NULL);
4326 }
4327 size *= 2;
4328 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4329 if (tmp == NULL) {
4330 xmlErrMemory(ctxt, NULL);
4331 xmlFree(buf);
4332 return(NULL);
4333 }
4334 buf = tmp;
4335 }
4336 buf[len++] = cur;
4337 count++;
4338 if (count > 50) {
4339 SHRINK;
4340 GROW;
4341 count = 0;
4342 if (ctxt->instate == XML_PARSER_EOF) {
4343 xmlFree(buf);
4344 return(NULL);
4345 }
4346 }
4347 NEXT;
4348 cur = CUR;
4349 if (cur == 0) {
4350 GROW;
4351 SHRINK;
4352 cur = CUR;
4353 }
4354 }
4355 buf[len] = 0;
4356 if (cur != stop) {
4357 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4358 } else {
4359 NEXT;
4360 }
4361 ctxt->instate = oldstate;
4362 return(buf);
4363}
4364
4365static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
4366
4367/*
4368 * used for the test in the inner loop of the char data testing
4369 */
4370static const unsigned char test_char_data[256] = {
4371 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4372 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
4373 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4374 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4375 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
4376 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
4377 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
4378 0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
4379 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
4380 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
4381 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
4382 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
4383 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
4384 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
4385 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
4386 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
4387 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
4388 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4389 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4390 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4391 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4392 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4393 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4394 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4395 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4396 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4397 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4398 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4399 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4400 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4401 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4402 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
4403};
4404
4405/**
4406 * xmlParseCharData:
4407 * @ctxt: an XML parser context
4408 * @cdata: int indicating whether we are within a CDATA section
4409 *
4410 * parse a CharData section.
4411 * if we are within a CDATA section ']]>' marks an end of section.
4412 *
4413 * The right angle bracket (>) may be represented using the string "&gt;",
4414 * and must, for compatibility, be escaped using "&gt;" or a character
4415 * reference when it appears in the string "]]>" in content, when that
4416 * string is not marking the end of a CDATA section.
4417 *
4418 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4419 */
4420
4421void
4422xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
4423 const xmlChar *in;
4424 int nbchar = 0;
4425 int line = ctxt->input->line;
4426 int col = ctxt->input->col;
4427 int ccol;
4428
4429 SHRINK;
4430 GROW;
4431 /*
4432 * Accelerated common case where input don't need to be
4433 * modified before passing it to the handler.
4434 */
4435 if (!cdata) {
4436 in = ctxt->input->cur;
4437 do {
4438get_more_space:
4439 while (*in == 0x20) { in++; ctxt->input->col++; }
4440 if (*in == 0xA) {
4441 do {
4442 ctxt->input->line++; ctxt->input->col = 1;
4443 in++;
4444 } while (*in == 0xA);
4445 goto get_more_space;
4446 }
4447 if (*in == '<') {
4448 nbchar = in - ctxt->input->cur;
4449 if (nbchar > 0) {
4450 const xmlChar *tmp = ctxt->input->cur;
4451 ctxt->input->cur = in;
4452
4453 if ((ctxt->sax != NULL) &&
4454 (ctxt->sax->ignorableWhitespace !=
4455 ctxt->sax->characters)) {
4456 if (areBlanks(ctxt, tmp, nbchar, 1)) {
4457 if (ctxt->sax->ignorableWhitespace != NULL)
4458 ctxt->sax->ignorableWhitespace(ctxt->userData,
4459 tmp, nbchar);
4460 } else {
4461 if (ctxt->sax->characters != NULL)
4462 ctxt->sax->characters(ctxt->userData,
4463 tmp, nbchar);
4464 if (*ctxt->space == -1)
4465 *ctxt->space = -2;
4466 }
4467 } else if ((ctxt->sax != NULL) &&
4468 (ctxt->sax->characters != NULL)) {
4469 ctxt->sax->characters(ctxt->userData,
4470 tmp, nbchar);
4471 }
4472 }
4473 return;
4474 }
4475
4476get_more:
4477 ccol = ctxt->input->col;
4478 while (test_char_data[*in]) {
4479 in++;
4480 ccol++;
4481 }
4482 ctxt->input->col = ccol;
4483 if (*in == 0xA) {
4484 do {
4485 ctxt->input->line++; ctxt->input->col = 1;
4486 in++;
4487 } while (*in == 0xA);
4488 goto get_more;
4489 }
4490 if (*in == ']') {
4491 if ((in[1] == ']') && (in[2] == '>')) {
4492 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4493 ctxt->input->cur = in + 1;
4494 return;
4495 }
4496 in++;
4497 ctxt->input->col++;
4498 goto get_more;
4499 }
4500 nbchar = in - ctxt->input->cur;
4501 if (nbchar > 0) {
4502 if ((ctxt->sax != NULL) &&
4503 (ctxt->sax->ignorableWhitespace !=
4504 ctxt->sax->characters) &&
4505 (IS_BLANK_CH(*ctxt->input->cur))) {
4506 const xmlChar *tmp = ctxt->input->cur;
4507 ctxt->input->cur = in;
4508
4509 if (areBlanks(ctxt, tmp, nbchar, 0)) {
4510 if (ctxt->sax->ignorableWhitespace != NULL)
4511 ctxt->sax->ignorableWhitespace(ctxt->userData,
4512 tmp, nbchar);
4513 } else {
4514 if (ctxt->sax->characters != NULL)
4515 ctxt->sax->characters(ctxt->userData,
4516 tmp, nbchar);
4517 if (*ctxt->space == -1)
4518 *ctxt->space = -2;
4519 }
4520 line = ctxt->input->line;
4521 col = ctxt->input->col;
4522 } else if (ctxt->sax != NULL) {
4523 if (ctxt->sax->characters != NULL)
4524 ctxt->sax->characters(ctxt->userData,
4525 ctxt->input->cur, nbchar);
4526 line = ctxt->input->line;
4527 col = ctxt->input->col;
4528 }
4529 /* something really bad happened in the SAX callback */
4530 if (ctxt->instate != XML_PARSER_CONTENT)
4531 return;
4532 }
4533 ctxt->input->cur = in;
4534 if (*in == 0xD) {
4535 in++;
4536 if (*in == 0xA) {
4537 ctxt->input->cur = in;
4538 in++;
4539 ctxt->input->line++; ctxt->input->col = 1;
4540 continue; /* while */
4541 }
4542 in--;
4543 }
4544 if (*in == '<') {
4545 return;
4546 }
4547 if (*in == '&') {
4548 return;
4549 }
4550 SHRINK;
4551 GROW;
4552 if (ctxt->instate == XML_PARSER_EOF)
4553 return;
4554 in = ctxt->input->cur;
4555 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09) || (*in == 0x0a));
4556 nbchar = 0;
4557 }
4558 ctxt->input->line = line;
4559 ctxt->input->col = col;
4560 xmlParseCharDataComplex(ctxt, cdata);
4561}
4562
4563/**
4564 * xmlParseCharDataComplex:
4565 * @ctxt: an XML parser context
4566 * @cdata: int indicating whether we are within a CDATA section
4567 *
4568 * parse a CharData section.this is the fallback function
4569 * of xmlParseCharData() when the parsing requires handling
4570 * of non-ASCII characters.
4571 */
4572static void
4573xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
4574 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
4575 int nbchar = 0;
4576 int cur, l;
4577 int count = 0;
4578
4579 SHRINK;
4580 GROW;
4581 cur = CUR_CHAR(l);
4582 while ((cur != '<') && /* checked */
4583 (cur != '&') &&
4584 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
4585 if ((cur == ']') && (NXT(1) == ']') &&
4586 (NXT(2) == '>')) {
4587 if (cdata) break;
4588 else {
4589 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4590 }
4591 }
4592 COPY_BUF(l,buf,nbchar,cur);
4593 /* move current position before possible calling of ctxt->sax->characters */
4594 NEXTL(l);
4595 cur = CUR_CHAR(l);
4596 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
4597 buf[nbchar] = 0;
4598
4599 /*
4600 * OK the segment is to be consumed as chars.
4601 */
4602 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4603 if (areBlanks(ctxt, buf, nbchar, 0)) {
4604 if (ctxt->sax->ignorableWhitespace != NULL)
4605 ctxt->sax->ignorableWhitespace(ctxt->userData,
4606 buf, nbchar);
4607 } else {
4608 if (ctxt->sax->characters != NULL)
4609 ctxt->sax->characters(ctxt->userData, buf, nbchar);
4610 if ((ctxt->sax->characters !=
4611 ctxt->sax->ignorableWhitespace) &&
4612 (*ctxt->space == -1))
4613 *ctxt->space = -2;
4614 }
4615 }
4616 nbchar = 0;
4617 /* something really bad happened in the SAX callback */
4618 if (ctxt->instate != XML_PARSER_CONTENT)
4619 return;
4620 }
4621 count++;
4622 if (count > 50) {
4623 SHRINK;
4624 GROW;
4625 count = 0;
4626 if (ctxt->instate == XML_PARSER_EOF)
4627 return;
4628 }
4629 }
4630 if (nbchar != 0) {
4631 buf[nbchar] = 0;
4632 /*
4633 * OK the segment is to be consumed as chars.
4634 */
4635 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4636 if (areBlanks(ctxt, buf, nbchar, 0)) {
4637 if (ctxt->sax->ignorableWhitespace != NULL)
4638 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
4639 } else {
4640 if (ctxt->sax->characters != NULL)
4641 ctxt->sax->characters(ctxt->userData, buf, nbchar);
4642 if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
4643 (*ctxt->space == -1))
4644 *ctxt->space = -2;
4645 }
4646 }
4647 }
4648 if ((cur != 0) && (!IS_CHAR(cur))) {
4649 /* Generate the error and skip the offending character */
4650 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4651 "PCDATA invalid Char value %d\n",
4652 cur);
4653 NEXTL(l);
4654 }
4655}
4656
4657/**
4658 * xmlParseExternalID:
4659 * @ctxt: an XML parser context
4660 * @publicID: a xmlChar** receiving PubidLiteral
4661 * @strict: indicate whether we should restrict parsing to only
4662 * production [75], see NOTE below
4663 *
4664 * Parse an External ID or a Public ID
4665 *
4666 * NOTE: Productions [75] and [83] interact badly since [75] can generate
4667 * 'PUBLIC' S PubidLiteral S SystemLiteral
4668 *
4669 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
4670 * | 'PUBLIC' S PubidLiteral S SystemLiteral
4671 *
4672 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
4673 *
4674 * Returns the function returns SystemLiteral and in the second
4675 * case publicID receives PubidLiteral, is strict is off
4676 * it is possible to return NULL and have publicID set.
4677 */
4678
4679xmlChar *
4680xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
4681 xmlChar *URI = NULL;
4682
4683 SHRINK;
4684
4685 *publicID = NULL;
4686 if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
4687 SKIP(6);
4688 if (SKIP_BLANKS == 0) {
4689 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4690 "Space required after 'SYSTEM'\n");
4691 }
4692 URI = xmlParseSystemLiteral(ctxt);
4693 if (URI == NULL) {
4694 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4695 }
4696 } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
4697 SKIP(6);
4698 if (SKIP_BLANKS == 0) {
4699 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4700 "Space required after 'PUBLIC'\n");
4701 }
4702 *publicID = xmlParsePubidLiteral(ctxt);
4703 if (*publicID == NULL) {
4704 xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
4705 }
4706 if (strict) {
4707 /*
4708 * We don't handle [83] so "S SystemLiteral" is required.
4709 */
4710 if (SKIP_BLANKS == 0) {
4711 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4712 "Space required after the Public Identifier\n");
4713 }
4714 } else {
4715 /*
4716 * We handle [83] so we return immediately, if
4717 * "S SystemLiteral" is not detected. We skip blanks if no
4718 * system literal was found, but this is harmless since we must
4719 * be at the end of a NotationDecl.
4720 */
4721 if (SKIP_BLANKS == 0) return(NULL);
4722 if ((CUR != '\'') && (CUR != '"')) return(NULL);
4723 }
4724 URI = xmlParseSystemLiteral(ctxt);
4725 if (URI == NULL) {
4726 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4727 }
4728 }
4729 return(URI);
4730}
4731
4732/**
4733 * xmlParseCommentComplex:
4734 * @ctxt: an XML parser context
4735 * @buf: the already parsed part of the buffer
4736 * @len: number of bytes in the buffer
4737 * @size: allocated size of the buffer
4738 *
4739 * Skip an XML (SGML) comment <!-- .... -->
4740 * The spec says that "For compatibility, the string "--" (double-hyphen)
4741 * must not occur within comments. "
4742 * This is the slow routine in case the accelerator for ascii didn't work
4743 *
4744 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4745 */
4746static void
4747xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf,
4748 size_t len, size_t size) {
4749 int q, ql;
4750 int r, rl;
4751 int cur, l;
4752 size_t count = 0;
4753 int inputid;
4754
4755 inputid = ctxt->input->id;
4756
4757 if (buf == NULL) {
4758 len = 0;
4759 size = XML_PARSER_BUFFER_SIZE;
4760 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4761 if (buf == NULL) {
4762 xmlErrMemory(ctxt, NULL);
4763 return;
4764 }
4765 }
4766 GROW; /* Assure there's enough input data */
4767 q = CUR_CHAR(ql);
4768 if (q == 0)
4769 goto not_terminated;
4770 if (!IS_CHAR(q)) {
4771 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4772 "xmlParseComment: invalid xmlChar value %d\n",
4773 q);
4774 xmlFree (buf);
4775 return;
4776 }
4777 NEXTL(ql);
4778 r = CUR_CHAR(rl);
4779 if (r == 0)
4780 goto not_terminated;
4781 if (!IS_CHAR(r)) {
4782 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4783 "xmlParseComment: invalid xmlChar value %d\n",
4784 q);
4785 xmlFree (buf);
4786 return;
4787 }
4788 NEXTL(rl);
4789 cur = CUR_CHAR(l);
4790 if (cur == 0)
4791 goto not_terminated;
4792 while (IS_CHAR(cur) && /* checked */
4793 ((cur != '>') ||
4794 (r != '-') || (q != '-'))) {
4795 if ((r == '-') && (q == '-')) {
4796 xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
4797 }
4798 if ((len > XML_MAX_TEXT_LENGTH) &&
4799 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4800 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4801 "Comment too big found", NULL);
4802 xmlFree (buf);
4803 return;
4804 }
4805 if (len + 5 >= size) {
4806 xmlChar *new_buf;
4807 size_t new_size;
4808
4809 new_size = size * 2;
4810 new_buf = (xmlChar *) xmlRealloc(buf, new_size);
4811 if (new_buf == NULL) {
4812 xmlFree (buf);
4813 xmlErrMemory(ctxt, NULL);
4814 return;
4815 }
4816 buf = new_buf;
4817 size = new_size;
4818 }
4819 COPY_BUF(ql,buf,len,q);
4820 q = r;
4821 ql = rl;
4822 r = cur;
4823 rl = l;
4824
4825 count++;
4826 if (count > 50) {
4827 SHRINK;
4828 GROW;
4829 count = 0;
4830 if (ctxt->instate == XML_PARSER_EOF) {
4831 xmlFree(buf);
4832 return;
4833 }
4834 }
4835 NEXTL(l);
4836 cur = CUR_CHAR(l);
4837 if (cur == 0) {
4838 SHRINK;
4839 GROW;
4840 cur = CUR_CHAR(l);
4841 }
4842 }
4843 buf[len] = 0;
4844 if (cur == 0) {
4845 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4846 "Comment not terminated \n<!--%.50s\n", buf);
4847 } else if (!IS_CHAR(cur)) {
4848 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4849 "xmlParseComment: invalid xmlChar value %d\n",
4850 cur);
4851 } else {
4852 if (inputid != ctxt->input->id) {
4853 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4854 "Comment doesn't start and stop in the same"
4855 " entity\n");
4856 }
4857 NEXT;
4858 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4859 (!ctxt->disableSAX))
4860 ctxt->sax->comment(ctxt->userData, buf);
4861 }
4862 xmlFree(buf);
4863 return;
4864not_terminated:
4865 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4866 "Comment not terminated\n", NULL);
4867 xmlFree(buf);
4868 return;
4869}
4870
4871/**
4872 * xmlParseComment:
4873 * @ctxt: an XML parser context
4874 *
4875 * Skip an XML (SGML) comment <!-- .... -->
4876 * The spec says that "For compatibility, the string "--" (double-hyphen)
4877 * must not occur within comments. "
4878 *
4879 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4880 */
4881void
4882xmlParseComment(xmlParserCtxtPtr ctxt) {
4883 xmlChar *buf = NULL;
4884 size_t size = XML_PARSER_BUFFER_SIZE;
4885 size_t len = 0;
4886 xmlParserInputState state;
4887 const xmlChar *in;
4888 size_t nbchar = 0;
4889 int ccol;
4890 int inputid;
4891
4892 /*
4893 * Check that there is a comment right here.
4894 */
4895 if ((RAW != '<') || (NXT(1) != '!') ||
4896 (NXT(2) != '-') || (NXT(3) != '-')) return;
4897 state = ctxt->instate;
4898 ctxt->instate = XML_PARSER_COMMENT;
4899 inputid = ctxt->input->id;
4900 SKIP(4);
4901 SHRINK;
4902 GROW;
4903
4904 /*
4905 * Accelerated common case where input don't need to be
4906 * modified before passing it to the handler.
4907 */
4908 in = ctxt->input->cur;
4909 do {
4910 if (*in == 0xA) {
4911 do {
4912 ctxt->input->line++; ctxt->input->col = 1;
4913 in++;
4914 } while (*in == 0xA);
4915 }
4916get_more:
4917 ccol = ctxt->input->col;
4918 while (((*in > '-') && (*in <= 0x7F)) ||
4919 ((*in >= 0x20) && (*in < '-')) ||
4920 (*in == 0x09)) {
4921 in++;
4922 ccol++;
4923 }
4924 ctxt->input->col = ccol;
4925 if (*in == 0xA) {
4926 do {
4927 ctxt->input->line++; ctxt->input->col = 1;
4928 in++;
4929 } while (*in == 0xA);
4930 goto get_more;
4931 }
4932 nbchar = in - ctxt->input->cur;
4933 /*
4934 * save current set of data
4935 */
4936 if (nbchar > 0) {
4937 if ((ctxt->sax != NULL) &&
4938 (ctxt->sax->comment != NULL)) {
4939 if (buf == NULL) {
4940 if ((*in == '-') && (in[1] == '-'))
4941 size = nbchar + 1;
4942 else
4943 size = XML_PARSER_BUFFER_SIZE + nbchar;
4944 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4945 if (buf == NULL) {
4946 xmlErrMemory(ctxt, NULL);
4947 ctxt->instate = state;
4948 return;
4949 }
4950 len = 0;
4951 } else if (len + nbchar + 1 >= size) {
4952 xmlChar *new_buf;
4953 size += len + nbchar + XML_PARSER_BUFFER_SIZE;
4954 new_buf = (xmlChar *) xmlRealloc(buf,
4955 size * sizeof(xmlChar));
4956 if (new_buf == NULL) {
4957 xmlFree (buf);
4958 xmlErrMemory(ctxt, NULL);
4959 ctxt->instate = state;
4960 return;
4961 }
4962 buf = new_buf;
4963 }
4964 memcpy(&buf[len], ctxt->input->cur, nbchar);
4965 len += nbchar;
4966 buf[len] = 0;
4967 }
4968 }
4969 if ((len > XML_MAX_TEXT_LENGTH) &&
4970 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4971 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4972 "Comment too big found", NULL);
4973 xmlFree (buf);
4974 return;
4975 }
4976 ctxt->input->cur = in;
4977 if (*in == 0xA) {
4978 in++;
4979 ctxt->input->line++; ctxt->input->col = 1;
4980 }
4981 if (*in == 0xD) {
4982 in++;
4983 if (*in == 0xA) {
4984 ctxt->input->cur = in;
4985 in++;
4986 ctxt->input->line++; ctxt->input->col = 1;
4987 goto get_more;
4988 }
4989 in--;
4990 }
4991 SHRINK;
4992 GROW;
4993 if (ctxt->instate == XML_PARSER_EOF) {
4994 xmlFree(buf);
4995 return;
4996 }
4997 in = ctxt->input->cur;
4998 if (*in == '-') {
4999 if (in[1] == '-') {
5000 if (in[2] == '>') {
5001 if (ctxt->input->id != inputid) {
5002 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5003 "comment doesn't start and stop in the"
5004 " same entity\n");
5005 }
5006 SKIP(3);
5007 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
5008 (!ctxt->disableSAX)) {
5009 if (buf != NULL)
5010 ctxt->sax->comment(ctxt->userData, buf);
5011 else
5012 ctxt->sax->comment(ctxt->userData, BAD_CAST "");
5013 }
5014 if (buf != NULL)
5015 xmlFree(buf);
5016 if (ctxt->instate != XML_PARSER_EOF)
5017 ctxt->instate = state;
5018 return;
5019 }
5020 if (buf != NULL) {
5021 xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5022 "Double hyphen within comment: "
5023 "<!--%.50s\n",
5024 buf);
5025 } else
5026 xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5027 "Double hyphen within comment\n", NULL);
5028 if (ctxt->instate == XML_PARSER_EOF) {
5029 xmlFree(buf);
5030 return;
5031 }
5032 in++;
5033 ctxt->input->col++;
5034 }
5035 in++;
5036 ctxt->input->col++;
5037 goto get_more;
5038 }
5039 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09) || (*in == 0x0a));
5040 xmlParseCommentComplex(ctxt, buf, len, size);
5041 ctxt->instate = state;
5042 return;
5043}
5044
5045
5046/**
5047 * xmlParsePITarget:
5048 * @ctxt: an XML parser context
5049 *
5050 * parse the name of a PI
5051 *
5052 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
5053 *
5054 * Returns the PITarget name or NULL
5055 */
5056
5057const xmlChar *
5058xmlParsePITarget(xmlParserCtxtPtr ctxt) {
5059 const xmlChar *name;
5060
5061 name = xmlParseName(ctxt);
5062 if ((name != NULL) &&
5063 ((name[0] == 'x') || (name[0] == 'X')) &&
5064 ((name[1] == 'm') || (name[1] == 'M')) &&
5065 ((name[2] == 'l') || (name[2] == 'L'))) {
5066 int i;
5067 if ((name[0] == 'x') && (name[1] == 'm') &&
5068 (name[2] == 'l') && (name[3] == 0)) {
5069 xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5070 "XML declaration allowed only at the start of the document\n");
5071 return(name);
5072 } else if (name[3] == 0) {
5073 xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
5074 return(name);
5075 }
5076 for (i = 0;;i++) {
5077 if (xmlW3CPIs[i] == NULL) break;
5078 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
5079 return(name);
5080 }
5081 xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5082 "xmlParsePITarget: invalid name prefix 'xml'\n",
5083 NULL, NULL);
5084 }
5085 if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) {
5086 xmlNsErr(ctxt, XML_NS_ERR_COLON,
5087 "colons are forbidden from PI names '%s'\n", name, NULL, NULL);
5088 }
5089 return(name);
5090}
5091
5092#ifdef LIBXML_CATALOG_ENABLED
5093/**
5094 * xmlParseCatalogPI:
5095 * @ctxt: an XML parser context
5096 * @catalog: the PI value string
5097 *
5098 * parse an XML Catalog Processing Instruction.
5099 *
5100 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
5101 *
5102 * Occurs only if allowed by the user and if happening in the Misc
5103 * part of the document before any doctype information
5104 * This will add the given catalog to the parsing context in order
5105 * to be used if there is a resolution need further down in the document
5106 */
5107
5108static void
5109xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
5110 xmlChar *URL = NULL;
5111 const xmlChar *tmp, *base;
5112 xmlChar marker;
5113
5114 tmp = catalog;
5115 while (IS_BLANK_CH(*tmp)) tmp++;
5116 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
5117 goto error;
5118 tmp += 7;
5119 while (IS_BLANK_CH(*tmp)) tmp++;
5120 if (*tmp != '=') {
5121 return;
5122 }
5123 tmp++;
5124 while (IS_BLANK_CH(*tmp)) tmp++;
5125 marker = *tmp;
5126 if ((marker != '\'') && (marker != '"'))
5127 goto error;
5128 tmp++;
5129 base = tmp;
5130 while ((*tmp != 0) && (*tmp != marker)) tmp++;
5131 if (*tmp == 0)
5132 goto error;
5133 URL = xmlStrndup(base, tmp - base);
5134 tmp++;
5135 while (IS_BLANK_CH(*tmp)) tmp++;
5136 if (*tmp != 0)
5137 goto error;
5138
5139 if (URL != NULL) {
5140 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
5141 xmlFree(URL);
5142 }
5143 return;
5144
5145error:
5146 xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
5147 "Catalog PI syntax error: %s\n",
5148 catalog, NULL);
5149 if (URL != NULL)
5150 xmlFree(URL);
5151}
5152#endif
5153
5154/**
5155 * xmlParsePI:
5156 * @ctxt: an XML parser context
5157 *
5158 * parse an XML Processing Instruction.
5159 *
5160 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
5161 *
5162 * The processing is transferred to SAX once parsed.
5163 */
5164
5165void
5166xmlParsePI(xmlParserCtxtPtr ctxt) {
5167 xmlChar *buf = NULL;
5168 size_t len = 0;
5169 size_t size = XML_PARSER_BUFFER_SIZE;
5170 int cur, l;
5171 const xmlChar *target;
5172 xmlParserInputState state;
5173 int count = 0;
5174
5175 if ((RAW == '<') && (NXT(1) == '?')) {
5176 int inputid = ctxt->input->id;
5177 state = ctxt->instate;
5178 ctxt->instate = XML_PARSER_PI;
5179 /*
5180 * this is a Processing Instruction.
5181 */
5182 SKIP(2);
5183 SHRINK;
5184
5185 /*
5186 * Parse the target name and check for special support like
5187 * namespace.
5188 */
5189 target = xmlParsePITarget(ctxt);
5190 if (target != NULL) {
5191 if ((RAW == '?') && (NXT(1) == '>')) {
5192 if (inputid != ctxt->input->id) {
5193 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5194 "PI declaration doesn't start and stop in"
5195 " the same entity\n");
5196 }
5197 SKIP(2);
5198
5199 /*
5200 * SAX: PI detected.
5201 */
5202 if ((ctxt->sax) && (!ctxt->disableSAX) &&
5203 (ctxt->sax->processingInstruction != NULL))
5204 ctxt->sax->processingInstruction(ctxt->userData,
5205 target, NULL);
5206 if (ctxt->instate != XML_PARSER_EOF)
5207 ctxt->instate = state;
5208 return;
5209 }
5210 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
5211 if (buf == NULL) {
5212 xmlErrMemory(ctxt, NULL);
5213 ctxt->instate = state;
5214 return;
5215 }
5216 if (SKIP_BLANKS == 0) {
5217 xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
5218 "ParsePI: PI %s space expected\n", target);
5219 }
5220 cur = CUR_CHAR(l);
5221 while (IS_CHAR(cur) && /* checked */
5222 ((cur != '?') || (NXT(1) != '>'))) {
5223 if (len + 5 >= size) {
5224 xmlChar *tmp;
5225 size_t new_size = size * 2;
5226 tmp = (xmlChar *) xmlRealloc(buf, new_size);
5227 if (tmp == NULL) {
5228 xmlErrMemory(ctxt, NULL);
5229 xmlFree(buf);
5230 ctxt->instate = state;
5231 return;
5232 }
5233 buf = tmp;
5234 size = new_size;
5235 }
5236 count++;
5237 if (count > 50) {
5238 SHRINK;
5239 GROW;
5240 if (ctxt->instate == XML_PARSER_EOF) {
5241 xmlFree(buf);
5242 return;
5243 }
5244 count = 0;
5245 if ((len > XML_MAX_TEXT_LENGTH) &&
5246 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
5247 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5248 "PI %s too big found", target);
5249 xmlFree(buf);
5250 ctxt->instate = state;
5251 return;
5252 }
5253 }
5254 COPY_BUF(l,buf,len,cur);
5255 NEXTL(l);
5256 cur = CUR_CHAR(l);
5257 if (cur == 0) {
5258 SHRINK;
5259 GROW;
5260 cur = CUR_CHAR(l);
5261 }
5262 }
5263 if ((len > XML_MAX_TEXT_LENGTH) &&
5264 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
5265 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5266 "PI %s too big found", target);
5267 xmlFree(buf);
5268 ctxt->instate = state;
5269 return;
5270 }
5271 buf[len] = 0;
5272 if (cur != '?') {
5273 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5274 "ParsePI: PI %s never end ...\n", target);
5275 } else {
5276 if (inputid != ctxt->input->id) {
5277 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5278 "PI declaration doesn't start and stop in"
5279 " the same entity\n");
5280 }
5281 SKIP(2);
5282
5283#ifdef LIBXML_CATALOG_ENABLED
5284 if (((state == XML_PARSER_MISC) ||
5285 (state == XML_PARSER_START)) &&
5286 (xmlStrEqual(target, XML_CATALOG_PI))) {
5287 xmlCatalogAllow allow = xmlCatalogGetDefaults();
5288 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
5289 (allow == XML_CATA_ALLOW_ALL))
5290 xmlParseCatalogPI(ctxt, buf);
5291 }
5292#endif
5293
5294
5295 /*
5296 * SAX: PI detected.
5297 */
5298 if ((ctxt->sax) && (!ctxt->disableSAX) &&
5299 (ctxt->sax->processingInstruction != NULL))
5300 ctxt->sax->processingInstruction(ctxt->userData,
5301 target, buf);
5302 }
5303 xmlFree(buf);
5304 } else {
5305 xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
5306 }
5307 if (ctxt->instate != XML_PARSER_EOF)
5308 ctxt->instate = state;
5309 }
5310}
5311
5312/**
5313 * xmlParseNotationDecl:
5314 * @ctxt: an XML parser context
5315 *
5316 * parse a notation declaration
5317 *
5318 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
5319 *
5320 * Hence there is actually 3 choices:
5321 * 'PUBLIC' S PubidLiteral
5322 * 'PUBLIC' S PubidLiteral S SystemLiteral
5323 * and 'SYSTEM' S SystemLiteral
5324 *
5325 * See the NOTE on xmlParseExternalID().
5326 */
5327
5328void
5329xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
5330 const xmlChar *name;
5331 xmlChar *Pubid;
5332 xmlChar *Systemid;
5333
5334 if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5335 int inputid = ctxt->input->id;
5336 SHRINK;
5337 SKIP(10);
5338 if (SKIP_BLANKS == 0) {
5339 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5340 "Space required after '<!NOTATION'\n");
5341 return;
5342 }
5343
5344 name = xmlParseName(ctxt);
5345 if (name == NULL) {
5346 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5347 return;
5348 }
5349 if (xmlStrchr(name, ':') != NULL) {
5350 xmlNsErr(ctxt, XML_NS_ERR_COLON,
5351 "colons are forbidden from notation names '%s'\n",
5352 name, NULL, NULL);
5353 }
5354 if (SKIP_BLANKS == 0) {
5355 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5356 "Space required after the NOTATION name'\n");
5357 return;
5358 }
5359
5360 /*
5361 * Parse the IDs.
5362 */
5363 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
5364 SKIP_BLANKS;
5365
5366 if (RAW == '>') {
5367 if (inputid != ctxt->input->id) {
5368 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5369 "Notation declaration doesn't start and stop"
5370 " in the same entity\n");
5371 }
5372 NEXT;
5373 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5374 (ctxt->sax->notationDecl != NULL))
5375 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
5376 } else {
5377 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5378 }
5379 if (Systemid != NULL) xmlFree(Systemid);
5380 if (Pubid != NULL) xmlFree(Pubid);
5381 }
5382}
5383
5384/**
5385 * xmlParseEntityDecl:
5386 * @ctxt: an XML parser context
5387 *
5388 * parse <!ENTITY declarations
5389 *
5390 * [70] EntityDecl ::= GEDecl | PEDecl
5391 *
5392 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
5393 *
5394 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
5395 *
5396 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
5397 *
5398 * [74] PEDef ::= EntityValue | ExternalID
5399 *
5400 * [76] NDataDecl ::= S 'NDATA' S Name
5401 *
5402 * [ VC: Notation Declared ]
5403 * The Name must match the declared name of a notation.
5404 */
5405
5406void
5407xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
5408 const xmlChar *name = NULL;
5409 xmlChar *value = NULL;
5410 xmlChar *URI = NULL, *literal = NULL;
5411 const xmlChar *ndata = NULL;
5412 int isParameter = 0;
5413 xmlChar *orig = NULL;
5414
5415 /* GROW; done in the caller */
5416 if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) {
5417 int inputid = ctxt->input->id;
5418 SHRINK;
5419 SKIP(8);
5420 if (SKIP_BLANKS == 0) {
5421 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5422 "Space required after '<!ENTITY'\n");
5423 }
5424
5425 if (RAW == '%') {
5426 NEXT;
5427 if (SKIP_BLANKS == 0) {
5428 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5429 "Space required after '%%'\n");
5430 }
5431 isParameter = 1;
5432 }
5433
5434 name = xmlParseName(ctxt);
5435 if (name == NULL) {
5436 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5437 "xmlParseEntityDecl: no name\n");
5438 return;
5439 }
5440 if (xmlStrchr(name, ':') != NULL) {
5441 xmlNsErr(ctxt, XML_NS_ERR_COLON,
5442 "colons are forbidden from entities names '%s'\n",
5443 name, NULL, NULL);
5444 }
5445 if (SKIP_BLANKS == 0) {
5446 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5447 "Space required after the entity name\n");
5448 }
5449
5450 ctxt->instate = XML_PARSER_ENTITY_DECL;
5451 /*
5452 * handle the various case of definitions...
5453 */
5454 if (isParameter) {
5455 if ((RAW == '"') || (RAW == '\'')) {
5456 value = xmlParseEntityValue(ctxt, &orig);
5457 if (value) {
5458 if ((ctxt->sax != NULL) &&
5459 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5460 ctxt->sax->entityDecl(ctxt->userData, name,
5461 XML_INTERNAL_PARAMETER_ENTITY,
5462 NULL, NULL, value);
5463 }
5464 } else {
5465 URI = xmlParseExternalID(ctxt, &literal, 1);
5466 if ((URI == NULL) && (literal == NULL)) {
5467 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5468 }
5469 if (URI) {
5470 xmlURIPtr uri;
5471
5472 uri = xmlParseURI((const char *) URI);
5473 if (uri == NULL) {
5474 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5475 "Invalid URI: %s\n", URI);
5476 /*
5477 * This really ought to be a well formedness error
5478 * but the XML Core WG decided otherwise c.f. issue
5479 * E26 of the XML erratas.
5480 */
5481 } else {
5482 if (uri->fragment != NULL) {
5483 /*
5484 * Okay this is foolish to block those but not
5485 * invalid URIs.
5486 */
5487 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5488 } else {
5489 if ((ctxt->sax != NULL) &&
5490 (!ctxt->disableSAX) &&
5491 (ctxt->sax->entityDecl != NULL))
5492 ctxt->sax->entityDecl(ctxt->userData, name,
5493 XML_EXTERNAL_PARAMETER_ENTITY,
5494 literal, URI, NULL);
5495 }
5496 xmlFreeURI(uri);
5497 }
5498 }
5499 }
5500 } else {
5501 if ((RAW == '"') || (RAW == '\'')) {
5502 value = xmlParseEntityValue(ctxt, &orig);
5503 if ((ctxt->sax != NULL) &&
5504 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5505 ctxt->sax->entityDecl(ctxt->userData, name,
5506 XML_INTERNAL_GENERAL_ENTITY,
5507 NULL, NULL, value);
5508 /*
5509 * For expat compatibility in SAX mode.
5510 */
5511 if ((ctxt->myDoc == NULL) ||
5512 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
5513 if (ctxt->myDoc == NULL) {
5514 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5515 if (ctxt->myDoc == NULL) {
5516 xmlErrMemory(ctxt, "New Doc failed");
5517 return;
5518 }
5519 ctxt->myDoc->properties = XML_DOC_INTERNAL;
5520 }
5521 if (ctxt->myDoc->intSubset == NULL)
5522 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5523 BAD_CAST "fake", NULL, NULL);
5524
5525 xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
5526 NULL, NULL, value);
5527 }
5528 } else {
5529 URI = xmlParseExternalID(ctxt, &literal, 1);
5530 if ((URI == NULL) && (literal == NULL)) {
5531 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5532 }
5533 if (URI) {
5534 xmlURIPtr uri;
5535
5536 uri = xmlParseURI((const char *)URI);
5537 if (uri == NULL) {
5538 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5539 "Invalid URI: %s\n", URI);
5540 /*
5541 * This really ought to be a well formedness error
5542 * but the XML Core WG decided otherwise c.f. issue
5543 * E26 of the XML erratas.
5544 */
5545 } else {
5546 if (uri->fragment != NULL) {
5547 /*
5548 * Okay this is foolish to block those but not
5549 * invalid URIs.
5550 */
5551 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5552 }
5553 xmlFreeURI(uri);
5554 }
5555 }
5556 if ((RAW != '>') && (SKIP_BLANKS == 0)) {
5557 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5558 "Space required before 'NDATA'\n");
5559 }
5560 if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
5561 SKIP(5);
5562 if (SKIP_BLANKS == 0) {
5563 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5564 "Space required after 'NDATA'\n");
5565 }
5566 ndata = xmlParseName(ctxt);
5567 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5568 (ctxt->sax->unparsedEntityDecl != NULL))
5569 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
5570 literal, URI, ndata);
5571 } else {
5572 if ((ctxt->sax != NULL) &&
5573 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5574 ctxt->sax->entityDecl(ctxt->userData, name,
5575 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5576 literal, URI, NULL);
5577 /*
5578 * For expat compatibility in SAX mode.
5579 * assuming the entity replacement was asked for
5580 */
5581 if ((ctxt->replaceEntities != 0) &&
5582 ((ctxt->myDoc == NULL) ||
5583 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
5584 if (ctxt->myDoc == NULL) {
5585 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5586 if (ctxt->myDoc == NULL) {
5587 xmlErrMemory(ctxt, "New Doc failed");
5588 return;
5589 }
5590 ctxt->myDoc->properties = XML_DOC_INTERNAL;
5591 }
5592
5593 if (ctxt->myDoc->intSubset == NULL)
5594 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5595 BAD_CAST "fake", NULL, NULL);
5596 xmlSAX2EntityDecl(ctxt, name,
5597 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5598 literal, URI, NULL);
5599 }
5600 }
5601 }
5602 }
5603 if (ctxt->instate == XML_PARSER_EOF)
5604 goto done;
5605 SKIP_BLANKS;
5606 if (RAW != '>') {
5607 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
5608 "xmlParseEntityDecl: entity %s not terminated\n", name);
5609 xmlHaltParser(ctxt);
5610 } else {
5611 if (inputid != ctxt->input->id) {
5612 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5613 "Entity declaration doesn't start and stop in"
5614 " the same entity\n");
5615 }
5616 NEXT;
5617 }
5618 if (orig != NULL) {
5619 /*
5620 * Ugly mechanism to save the raw entity value.
5621 */
5622 xmlEntityPtr cur = NULL;
5623
5624 if (isParameter) {
5625 if ((ctxt->sax != NULL) &&
5626 (ctxt->sax->getParameterEntity != NULL))
5627 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
5628 } else {
5629 if ((ctxt->sax != NULL) &&
5630 (ctxt->sax->getEntity != NULL))
5631 cur = ctxt->sax->getEntity(ctxt->userData, name);
5632 if ((cur == NULL) && (ctxt->userData==ctxt)) {
5633 cur = xmlSAX2GetEntity(ctxt, name);
5634 }
5635 }
5636 if ((cur != NULL) && (cur->orig == NULL)) {
5637 cur->orig = orig;
5638 orig = NULL;
5639 }
5640 }
5641
5642done:
5643 if (value != NULL) xmlFree(value);
5644 if (URI != NULL) xmlFree(URI);
5645 if (literal != NULL) xmlFree(literal);
5646 if (orig != NULL) xmlFree(orig);
5647 }
5648}
5649
5650/**
5651 * xmlParseDefaultDecl:
5652 * @ctxt: an XML parser context
5653 * @value: Receive a possible fixed default value for the attribute
5654 *
5655 * Parse an attribute default declaration
5656 *
5657 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
5658 *
5659 * [ VC: Required Attribute ]
5660 * if the default declaration is the keyword #REQUIRED, then the
5661 * attribute must be specified for all elements of the type in the
5662 * attribute-list declaration.
5663 *
5664 * [ VC: Attribute Default Legal ]
5665 * The declared default value must meet the lexical constraints of
5666 * the declared attribute type c.f. xmlValidateAttributeDecl()
5667 *
5668 * [ VC: Fixed Attribute Default ]
5669 * if an attribute has a default value declared with the #FIXED
5670 * keyword, instances of that attribute must match the default value.
5671 *
5672 * [ WFC: No < in Attribute Values ]
5673 * handled in xmlParseAttValue()
5674 *
5675 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
5676 * or XML_ATTRIBUTE_FIXED.
5677 */
5678
5679int
5680xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
5681 int val;
5682 xmlChar *ret;
5683
5684 *value = NULL;
5685 if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
5686 SKIP(9);
5687 return(XML_ATTRIBUTE_REQUIRED);
5688 }
5689 if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
5690 SKIP(8);
5691 return(XML_ATTRIBUTE_IMPLIED);
5692 }
5693 val = XML_ATTRIBUTE_NONE;
5694 if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
5695 SKIP(6);
5696 val = XML_ATTRIBUTE_FIXED;
5697 if (SKIP_BLANKS == 0) {
5698 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5699 "Space required after '#FIXED'\n");
5700 }
5701 }
5702 ret = xmlParseAttValue(ctxt);
5703 ctxt->instate = XML_PARSER_DTD;
5704 if (ret == NULL) {
5705 xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
5706 "Attribute default value declaration error\n");
5707 } else
5708 *value = ret;
5709 return(val);
5710}
5711
5712/**
5713 * xmlParseNotationType:
5714 * @ctxt: an XML parser context
5715 *
5716 * parse an Notation attribute type.
5717 *
5718 * Note: the leading 'NOTATION' S part has already being parsed...
5719 *
5720 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5721 *
5722 * [ VC: Notation Attributes ]
5723 * Values of this type must match one of the notation names included
5724 * in the declaration; all notation names in the declaration must be declared.
5725 *
5726 * Returns: the notation attribute tree built while parsing
5727 */
5728
5729xmlEnumerationPtr
5730xmlParseNotationType(xmlParserCtxtPtr ctxt) {
5731 const xmlChar *name;
5732 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5733
5734 if (RAW != '(') {
5735 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5736 return(NULL);
5737 }
5738 SHRINK;
5739 do {
5740 NEXT;
5741 SKIP_BLANKS;
5742 name = xmlParseName(ctxt);
5743 if (name == NULL) {
5744 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5745 "Name expected in NOTATION declaration\n");
5746 xmlFreeEnumeration(ret);
5747 return(NULL);
5748 }
5749 tmp = ret;
5750 while (tmp != NULL) {
5751 if (xmlStrEqual(name, tmp->name)) {
5752 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5753 "standalone: attribute notation value token %s duplicated\n",
5754 name, NULL);
5755 if (!xmlDictOwns(ctxt->dict, name))
5756 xmlFree((xmlChar *) name);
5757 break;
5758 }
5759 tmp = tmp->next;
5760 }
5761 if (tmp == NULL) {
5762 cur = xmlCreateEnumeration(name);
5763 if (cur == NULL) {
5764 xmlFreeEnumeration(ret);
5765 return(NULL);
5766 }
5767 if (last == NULL) ret = last = cur;
5768 else {
5769 last->next = cur;
5770 last = cur;
5771 }
5772 }
5773 SKIP_BLANKS;
5774 } while (RAW == '|');
5775 if (RAW != ')') {
5776 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5777 xmlFreeEnumeration(ret);
5778 return(NULL);
5779 }
5780 NEXT;
5781 return(ret);
5782}
5783
5784/**
5785 * xmlParseEnumerationType:
5786 * @ctxt: an XML parser context
5787 *
5788 * parse an Enumeration attribute type.
5789 *
5790 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
5791 *
5792 * [ VC: Enumeration ]
5793 * Values of this type must match one of the Nmtoken tokens in
5794 * the declaration
5795 *
5796 * Returns: the enumeration attribute tree built while parsing
5797 */
5798
5799xmlEnumerationPtr
5800xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
5801 xmlChar *name;
5802 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5803
5804 if (RAW != '(') {
5805 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
5806 return(NULL);
5807 }
5808 SHRINK;
5809 do {
5810 NEXT;
5811 SKIP_BLANKS;
5812 name = xmlParseNmtoken(ctxt);
5813 if (name == NULL) {
5814 xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
5815 return(ret);
5816 }
5817 tmp = ret;
5818 while (tmp != NULL) {
5819 if (xmlStrEqual(name, tmp->name)) {
5820 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5821 "standalone: attribute enumeration value token %s duplicated\n",
5822 name, NULL);
5823 if (!xmlDictOwns(ctxt->dict, name))
5824 xmlFree(name);
5825 break;
5826 }
5827 tmp = tmp->next;
5828 }
5829 if (tmp == NULL) {
5830 cur = xmlCreateEnumeration(name);
5831 if (!xmlDictOwns(ctxt->dict, name))
5832 xmlFree(name);
5833 if (cur == NULL) {
5834 xmlFreeEnumeration(ret);
5835 return(NULL);
5836 }
5837 if (last == NULL) ret = last = cur;
5838 else {
5839 last->next = cur;
5840 last = cur;
5841 }
5842 }
5843 SKIP_BLANKS;
5844 } while (RAW == '|');
5845 if (RAW != ')') {
5846 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
5847 return(ret);
5848 }
5849 NEXT;
5850 return(ret);
5851}
5852
5853/**
5854 * xmlParseEnumeratedType:
5855 * @ctxt: an XML parser context
5856 * @tree: the enumeration tree built while parsing
5857 *
5858 * parse an Enumerated attribute type.
5859 *
5860 * [57] EnumeratedType ::= NotationType | Enumeration
5861 *
5862 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5863 *
5864 *
5865 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
5866 */
5867
5868int
5869xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5870 if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5871 SKIP(8);
5872 if (SKIP_BLANKS == 0) {
5873 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5874 "Space required after 'NOTATION'\n");
5875 return(0);
5876 }
5877 *tree = xmlParseNotationType(ctxt);
5878 if (*tree == NULL) return(0);
5879 return(XML_ATTRIBUTE_NOTATION);
5880 }
5881 *tree = xmlParseEnumerationType(ctxt);
5882 if (*tree == NULL) return(0);
5883 return(XML_ATTRIBUTE_ENUMERATION);
5884}
5885
5886/**
5887 * xmlParseAttributeType:
5888 * @ctxt: an XML parser context
5889 * @tree: the enumeration tree built while parsing
5890 *
5891 * parse the Attribute list def for an element
5892 *
5893 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
5894 *
5895 * [55] StringType ::= 'CDATA'
5896 *
5897 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
5898 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
5899 *
5900 * Validity constraints for attribute values syntax are checked in
5901 * xmlValidateAttributeValue()
5902 *
5903 * [ VC: ID ]
5904 * Values of type ID must match the Name production. A name must not
5905 * appear more than once in an XML document as a value of this type;
5906 * i.e., ID values must uniquely identify the elements which bear them.
5907 *
5908 * [ VC: One ID per Element Type ]
5909 * No element type may have more than one ID attribute specified.
5910 *
5911 * [ VC: ID Attribute Default ]
5912 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
5913 *
5914 * [ VC: IDREF ]
5915 * Values of type IDREF must match the Name production, and values
5916 * of type IDREFS must match Names; each IDREF Name must match the value
5917 * of an ID attribute on some element in the XML document; i.e. IDREF
5918 * values must match the value of some ID attribute.
5919 *
5920 * [ VC: Entity Name ]
5921 * Values of type ENTITY must match the Name production, values
5922 * of type ENTITIES must match Names; each Entity Name must match the
5923 * name of an unparsed entity declared in the DTD.
5924 *
5925 * [ VC: Name Token ]
5926 * Values of type NMTOKEN must match the Nmtoken production; values
5927 * of type NMTOKENS must match Nmtokens.
5928 *
5929 * Returns the attribute type
5930 */
5931int
5932xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5933 SHRINK;
5934 if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
5935 SKIP(5);
5936 return(XML_ATTRIBUTE_CDATA);
5937 } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
5938 SKIP(6);
5939 return(XML_ATTRIBUTE_IDREFS);
5940 } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
5941 SKIP(5);
5942 return(XML_ATTRIBUTE_IDREF);
5943 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
5944 SKIP(2);
5945 return(XML_ATTRIBUTE_ID);
5946 } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
5947 SKIP(6);
5948 return(XML_ATTRIBUTE_ENTITY);
5949 } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
5950 SKIP(8);
5951 return(XML_ATTRIBUTE_ENTITIES);
5952 } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
5953 SKIP(8);
5954 return(XML_ATTRIBUTE_NMTOKENS);
5955 } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
5956 SKIP(7);
5957 return(XML_ATTRIBUTE_NMTOKEN);
5958 }
5959 return(xmlParseEnumeratedType(ctxt, tree));
5960}
5961
5962/**
5963 * xmlParseAttributeListDecl:
5964 * @ctxt: an XML parser context
5965 *
5966 * : parse the Attribute list def for an element
5967 *
5968 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
5969 *
5970 * [53] AttDef ::= S Name S AttType S DefaultDecl
5971 *
5972 */
5973void
5974xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
5975 const xmlChar *elemName;
5976 const xmlChar *attrName;
5977 xmlEnumerationPtr tree;
5978
5979 if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
5980 int inputid = ctxt->input->id;
5981
5982 SKIP(9);
5983 if (SKIP_BLANKS == 0) {
5984 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5985 "Space required after '<!ATTLIST'\n");
5986 }
5987 elemName = xmlParseName(ctxt);
5988 if (elemName == NULL) {
5989 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5990 "ATTLIST: no name for Element\n");
5991 return;
5992 }
5993 SKIP_BLANKS;
5994 GROW;
5995 while ((RAW != '>') && (ctxt->instate != XML_PARSER_EOF)) {
5996 int type;
5997 int def;
5998 xmlChar *defaultValue = NULL;
5999
6000 GROW;
6001 tree = NULL;
6002 attrName = xmlParseName(ctxt);
6003 if (attrName == NULL) {
6004 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6005 "ATTLIST: no name for Attribute\n");
6006 break;
6007 }
6008 GROW;
6009 if (SKIP_BLANKS == 0) {
6010 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6011 "Space required after the attribute name\n");
6012 break;
6013 }
6014
6015 type = xmlParseAttributeType(ctxt, &tree);
6016 if (type <= 0) {
6017 break;
6018 }
6019
6020 GROW;
6021 if (SKIP_BLANKS == 0) {
6022 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6023 "Space required after the attribute type\n");
6024 if (tree != NULL)
6025 xmlFreeEnumeration(tree);
6026 break;
6027 }
6028
6029 def = xmlParseDefaultDecl(ctxt, &defaultValue);
6030 if (def <= 0) {
6031 if (defaultValue != NULL)
6032 xmlFree(defaultValue);
6033 if (tree != NULL)
6034 xmlFreeEnumeration(tree);
6035 break;
6036 }
6037 if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL))
6038 xmlAttrNormalizeSpace(defaultValue, defaultValue);
6039
6040 GROW;
6041 if (RAW != '>') {
6042 if (SKIP_BLANKS == 0) {
6043 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6044 "Space required after the attribute default value\n");
6045 if (defaultValue != NULL)
6046 xmlFree(defaultValue);
6047 if (tree != NULL)
6048 xmlFreeEnumeration(tree);
6049 break;
6050 }
6051 }
6052 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6053 (ctxt->sax->attributeDecl != NULL))
6054 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
6055 type, def, defaultValue, tree);
6056 else if (tree != NULL)
6057 xmlFreeEnumeration(tree);
6058
6059 if ((ctxt->sax2) && (defaultValue != NULL) &&
6060 (def != XML_ATTRIBUTE_IMPLIED) &&
6061 (def != XML_ATTRIBUTE_REQUIRED)) {
6062 xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
6063 }
6064 if (ctxt->sax2) {
6065 xmlAddSpecialAttr(ctxt, elemName, attrName, type);
6066 }
6067 if (defaultValue != NULL)
6068 xmlFree(defaultValue);
6069 GROW;
6070 }
6071 if (RAW == '>') {
6072 if (inputid != ctxt->input->id) {
6073 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6074 "Attribute list declaration doesn't start and"
6075 " stop in the same entity\n");
6076 }
6077 NEXT;
6078 }
6079 }
6080}
6081
6082/**
6083 * xmlParseElementMixedContentDecl:
6084 * @ctxt: an XML parser context
6085 * @inputchk: the input used for the current entity, needed for boundary checks
6086 *
6087 * parse the declaration for a Mixed Element content
6088 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6089 *
6090 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
6091 * '(' S? '#PCDATA' S? ')'
6092 *
6093 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
6094 *
6095 * [ VC: No Duplicate Types ]
6096 * The same name must not appear more than once in a single
6097 * mixed-content declaration.
6098 *
6099 * returns: the list of the xmlElementContentPtr describing the element choices
6100 */
6101xmlElementContentPtr
6102xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6103 xmlElementContentPtr ret = NULL, cur = NULL, n;
6104 const xmlChar *elem = NULL;
6105
6106 GROW;
6107 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6108 SKIP(7);
6109 SKIP_BLANKS;
6110 SHRINK;
6111 if (RAW == ')') {
6112 if (ctxt->input->id != inputchk) {
6113 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6114 "Element content declaration doesn't start and"
6115 " stop in the same entity\n");
6116 }
6117 NEXT;
6118 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6119 if (ret == NULL)
6120 return(NULL);
6121 if (RAW == '*') {
6122 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6123 NEXT;
6124 }
6125 return(ret);
6126 }
6127 if ((RAW == '(') || (RAW == '|')) {
6128 ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6129 if (ret == NULL) return(NULL);
6130 }
6131 while ((RAW == '|') && (ctxt->instate != XML_PARSER_EOF)) {
6132 NEXT;
6133 if (elem == NULL) {
6134 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6135 if (ret == NULL) {
6136 xmlFreeDocElementContent(ctxt->myDoc, cur);
6137 return(NULL);
6138 }
6139 ret->c1 = cur;
6140 if (cur != NULL)
6141 cur->parent = ret;
6142 cur = ret;
6143 } else {
6144 n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6145 if (n == NULL) {
6146 xmlFreeDocElementContent(ctxt->myDoc, ret);
6147 return(NULL);
6148 }
6149 n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6150 if (n->c1 != NULL)
6151 n->c1->parent = n;
6152 cur->c2 = n;
6153 if (n != NULL)
6154 n->parent = cur;
6155 cur = n;
6156 }
6157 SKIP_BLANKS;
6158 elem = xmlParseName(ctxt);
6159 if (elem == NULL) {
6160 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6161 "xmlParseElementMixedContentDecl : Name expected\n");
6162 xmlFreeDocElementContent(ctxt->myDoc, ret);
6163 return(NULL);
6164 }
6165 SKIP_BLANKS;
6166 GROW;
6167 }
6168 if ((RAW == ')') && (NXT(1) == '*')) {
6169 if (elem != NULL) {
6170 cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
6171 XML_ELEMENT_CONTENT_ELEMENT);
6172 if (cur->c2 != NULL)
6173 cur->c2->parent = cur;
6174 }
6175 if (ret != NULL)
6176 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6177 if (ctxt->input->id != inputchk) {
6178 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6179 "Element content declaration doesn't start and"
6180 " stop in the same entity\n");
6181 }
6182 SKIP(2);
6183 } else {
6184 xmlFreeDocElementContent(ctxt->myDoc, ret);
6185 xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
6186 return(NULL);
6187 }
6188
6189 } else {
6190 xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
6191 }
6192 return(ret);
6193}
6194
6195/**
6196 * xmlParseElementChildrenContentDeclPriv:
6197 * @ctxt: an XML parser context
6198 * @inputchk: the input used for the current entity, needed for boundary checks
6199 * @depth: the level of recursion
6200 *
6201 * parse the declaration for a Mixed Element content
6202 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6203 *
6204 *
6205 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6206 *
6207 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6208 *
6209 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6210 *
6211 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6212 *
6213 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6214 * TODO Parameter-entity replacement text must be properly nested
6215 * with parenthesized groups. That is to say, if either of the
6216 * opening or closing parentheses in a choice, seq, or Mixed
6217 * construct is contained in the replacement text for a parameter
6218 * entity, both must be contained in the same replacement text. For
6219 * interoperability, if a parameter-entity reference appears in a
6220 * choice, seq, or Mixed construct, its replacement text should not
6221 * be empty, and neither the first nor last non-blank character of
6222 * the replacement text should be a connector (| or ,).
6223 *
6224 * Returns the tree of xmlElementContentPtr describing the element
6225 * hierarchy.
6226 */
6227static xmlElementContentPtr
6228xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int inputchk,
6229 int depth) {
6230 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
6231 const xmlChar *elem;
6232 xmlChar type = 0;
6233
6234 if (((depth > 128) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
6235 (depth > 2048)) {
6236 xmlFatalErrMsgInt(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED,
6237"xmlParseElementChildrenContentDecl : depth %d too deep, use XML_PARSE_HUGE\n",
6238 depth);
6239 return(NULL);
6240 }
6241 SKIP_BLANKS;
6242 GROW;
6243 if (RAW == '(') {
6244 int inputid = ctxt->input->id;
6245
6246 /* Recurse on first child */
6247 NEXT;
6248 SKIP_BLANKS;
6249 cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6250 depth + 1);
6251 if (cur == NULL)
6252 return(NULL);
6253 SKIP_BLANKS;
6254 GROW;
6255 } else {
6256 elem = xmlParseName(ctxt);
6257 if (elem == NULL) {
6258 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6259 return(NULL);
6260 }
6261 cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6262 if (cur == NULL) {
6263 xmlErrMemory(ctxt, NULL);
6264 return(NULL);
6265 }
6266 GROW;
6267 if (RAW == '?') {
6268 cur->ocur = XML_ELEMENT_CONTENT_OPT;
6269 NEXT;
6270 } else if (RAW == '*') {
6271 cur->ocur = XML_ELEMENT_CONTENT_MULT;
6272 NEXT;
6273 } else if (RAW == '+') {
6274 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
6275 NEXT;
6276 } else {
6277 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
6278 }
6279 GROW;
6280 }
6281 SKIP_BLANKS;
6282 SHRINK;
6283 while ((RAW != ')') && (ctxt->instate != XML_PARSER_EOF)) {
6284 /*
6285 * Each loop we parse one separator and one element.
6286 */
6287 if (RAW == ',') {
6288 if (type == 0) type = CUR;
6289
6290 /*
6291 * Detect "Name | Name , Name" error
6292 */
6293 else if (type != CUR) {
6294 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6295 "xmlParseElementChildrenContentDecl : '%c' expected\n",
6296 type);
6297 if ((last != NULL) && (last != ret))
6298 xmlFreeDocElementContent(ctxt->myDoc, last);
6299 if (ret != NULL)
6300 xmlFreeDocElementContent(ctxt->myDoc, ret);
6301 return(NULL);
6302 }
6303 NEXT;
6304
6305 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
6306 if (op == NULL) {
6307 if ((last != NULL) && (last != ret))
6308 xmlFreeDocElementContent(ctxt->myDoc, last);
6309 xmlFreeDocElementContent(ctxt->myDoc, ret);
6310 return(NULL);
6311 }
6312 if (last == NULL) {
6313 op->c1 = ret;
6314 if (ret != NULL)
6315 ret->parent = op;
6316 ret = cur = op;
6317 } else {
6318 cur->c2 = op;
6319 if (op != NULL)
6320 op->parent = cur;
6321 op->c1 = last;
6322 if (last != NULL)
6323 last->parent = op;
6324 cur =op;
6325 last = NULL;
6326 }
6327 } else if (RAW == '|') {
6328 if (type == 0) type = CUR;
6329
6330 /*
6331 * Detect "Name , Name | Name" error
6332 */
6333 else if (type != CUR) {
6334 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6335 "xmlParseElementChildrenContentDecl : '%c' expected\n",
6336 type);
6337 if ((last != NULL) && (last != ret))
6338 xmlFreeDocElementContent(ctxt->myDoc, last);
6339 if (ret != NULL)
6340 xmlFreeDocElementContent(ctxt->myDoc, ret);
6341 return(NULL);
6342 }
6343 NEXT;
6344
6345 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6346 if (op == NULL) {
6347 if ((last != NULL) && (last != ret))
6348 xmlFreeDocElementContent(ctxt->myDoc, last);
6349 if (ret != NULL)
6350 xmlFreeDocElementContent(ctxt->myDoc, ret);
6351 return(NULL);
6352 }
6353 if (last == NULL) {
6354 op->c1 = ret;
6355 if (ret != NULL)
6356 ret->parent = op;
6357 ret = cur = op;
6358 } else {
6359 cur->c2 = op;
6360 if (op != NULL)
6361 op->parent = cur;
6362 op->c1 = last;
6363 if (last != NULL)
6364 last->parent = op;
6365 cur =op;
6366 last = NULL;
6367 }
6368 } else {
6369 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
6370 if ((last != NULL) && (last != ret))
6371 xmlFreeDocElementContent(ctxt->myDoc, last);
6372 if (ret != NULL)
6373 xmlFreeDocElementContent(ctxt->myDoc, ret);
6374 return(NULL);
6375 }
6376 GROW;
6377 SKIP_BLANKS;
6378 GROW;
6379 if (RAW == '(') {
6380 int inputid = ctxt->input->id;
6381 /* Recurse on second child */
6382 NEXT;
6383 SKIP_BLANKS;
6384 last = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6385 depth + 1);
6386 if (last == NULL) {
6387 if (ret != NULL)
6388 xmlFreeDocElementContent(ctxt->myDoc, ret);
6389 return(NULL);
6390 }
6391 SKIP_BLANKS;
6392 } else {
6393 elem = xmlParseName(ctxt);
6394 if (elem == NULL) {
6395 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6396 if (ret != NULL)
6397 xmlFreeDocElementContent(ctxt->myDoc, ret);
6398 return(NULL);
6399 }
6400 last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6401 if (last == NULL) {
6402 if (ret != NULL)
6403 xmlFreeDocElementContent(ctxt->myDoc, ret);
6404 return(NULL);
6405 }
6406 if (RAW == '?') {
6407 last->ocur = XML_ELEMENT_CONTENT_OPT;
6408 NEXT;
6409 } else if (RAW == '*') {
6410 last->ocur = XML_ELEMENT_CONTENT_MULT;
6411 NEXT;
6412 } else if (RAW == '+') {
6413 last->ocur = XML_ELEMENT_CONTENT_PLUS;
6414 NEXT;
6415 } else {
6416 last->ocur = XML_ELEMENT_CONTENT_ONCE;
6417 }
6418 }
6419 SKIP_BLANKS;
6420 GROW;
6421 }
6422 if ((cur != NULL) && (last != NULL)) {
6423 cur->c2 = last;
6424 if (last != NULL)
6425 last->parent = cur;
6426 }
6427 if (ctxt->input->id != inputchk) {
6428 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6429 "Element content declaration doesn't start and stop in"
6430 " the same entity\n");
6431 }
6432 NEXT;
6433 if (RAW == '?') {
6434 if (ret != NULL) {
6435 if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
6436 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6437 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6438 else
6439 ret->ocur = XML_ELEMENT_CONTENT_OPT;
6440 }
6441 NEXT;
6442 } else if (RAW == '*') {
6443 if (ret != NULL) {
6444 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6445 cur = ret;
6446 /*
6447 * Some normalization:
6448 * (a | b* | c?)* == (a | b | c)*
6449 */
6450 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6451 if ((cur->c1 != NULL) &&
6452 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6453 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
6454 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6455 if ((cur->c2 != NULL) &&
6456 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6457 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
6458 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6459 cur = cur->c2;
6460 }
6461 }
6462 NEXT;
6463 } else if (RAW == '+') {
6464 if (ret != NULL) {
6465 int found = 0;
6466
6467 if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
6468 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6469 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6470 else
6471 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
6472 /*
6473 * Some normalization:
6474 * (a | b*)+ == (a | b)*
6475 * (a | b?)+ == (a | b)*
6476 */
6477 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6478 if ((cur->c1 != NULL) &&
6479 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6480 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
6481 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6482 found = 1;
6483 }
6484 if ((cur->c2 != NULL) &&
6485 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6486 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
6487 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6488 found = 1;
6489 }
6490 cur = cur->c2;
6491 }
6492 if (found)
6493 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6494 }
6495 NEXT;
6496 }
6497 return(ret);
6498}
6499
6500/**
6501 * xmlParseElementChildrenContentDecl:
6502 * @ctxt: an XML parser context
6503 * @inputchk: the input used for the current entity, needed for boundary checks
6504 *
6505 * parse the declaration for a Mixed Element content
6506 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6507 *
6508 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6509 *
6510 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6511 *
6512 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6513 *
6514 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6515 *
6516 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6517 * TODO Parameter-entity replacement text must be properly nested
6518 * with parenthesized groups. That is to say, if either of the
6519 * opening or closing parentheses in a choice, seq, or Mixed
6520 * construct is contained in the replacement text for a parameter
6521 * entity, both must be contained in the same replacement text. For
6522 * interoperability, if a parameter-entity reference appears in a
6523 * choice, seq, or Mixed construct, its replacement text should not
6524 * be empty, and neither the first nor last non-blank character of
6525 * the replacement text should be a connector (| or ,).
6526 *
6527 * Returns the tree of xmlElementContentPtr describing the element
6528 * hierarchy.
6529 */
6530xmlElementContentPtr
6531xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6532 /* stub left for API/ABI compat */
6533 return(xmlParseElementChildrenContentDeclPriv(ctxt, inputchk, 1));
6534}
6535
6536/**
6537 * xmlParseElementContentDecl:
6538 * @ctxt: an XML parser context
6539 * @name: the name of the element being defined.
6540 * @result: the Element Content pointer will be stored here if any
6541 *
6542 * parse the declaration for an Element content either Mixed or Children,
6543 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
6544 *
6545 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
6546 *
6547 * returns: the type of element content XML_ELEMENT_TYPE_xxx
6548 */
6549
6550int
6551xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
6552 xmlElementContentPtr *result) {
6553
6554 xmlElementContentPtr tree = NULL;
6555 int inputid = ctxt->input->id;
6556 int res;
6557
6558 *result = NULL;
6559
6560 if (RAW != '(') {
6561 xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6562 "xmlParseElementContentDecl : %s '(' expected\n", name);
6563 return(-1);
6564 }
6565 NEXT;
6566 GROW;
6567 if (ctxt->instate == XML_PARSER_EOF)
6568 return(-1);
6569 SKIP_BLANKS;
6570 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6571 tree = xmlParseElementMixedContentDecl(ctxt, inputid);
6572 res = XML_ELEMENT_TYPE_MIXED;
6573 } else {
6574 tree = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 1);
6575 res = XML_ELEMENT_TYPE_ELEMENT;
6576 }
6577 SKIP_BLANKS;
6578 *result = tree;
6579 return(res);
6580}
6581
6582/**
6583 * xmlParseElementDecl:
6584 * @ctxt: an XML parser context
6585 *
6586 * parse an Element declaration.
6587 *
6588 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
6589 *
6590 * [ VC: Unique Element Type Declaration ]
6591 * No element type may be declared more than once
6592 *
6593 * Returns the type of the element, or -1 in case of error
6594 */
6595int
6596xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
6597 const xmlChar *name;
6598 int ret = -1;
6599 xmlElementContentPtr content = NULL;
6600
6601 /* GROW; done in the caller */
6602 if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
6603 int inputid = ctxt->input->id;
6604
6605 SKIP(9);
6606 if (SKIP_BLANKS == 0) {
6607 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6608 "Space required after 'ELEMENT'\n");
6609 return(-1);
6610 }
6611 name = xmlParseName(ctxt);
6612 if (name == NULL) {
6613 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6614 "xmlParseElementDecl: no name for Element\n");
6615 return(-1);
6616 }
6617 if (SKIP_BLANKS == 0) {
6618 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6619 "Space required after the element name\n");
6620 }
6621 if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
6622 SKIP(5);
6623 /*
6624 * Element must always be empty.
6625 */
6626 ret = XML_ELEMENT_TYPE_EMPTY;
6627 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
6628 (NXT(2) == 'Y')) {
6629 SKIP(3);
6630 /*
6631 * Element is a generic container.
6632 */
6633 ret = XML_ELEMENT_TYPE_ANY;
6634 } else if (RAW == '(') {
6635 ret = xmlParseElementContentDecl(ctxt, name, &content);
6636 } else {
6637 /*
6638 * [ WFC: PEs in Internal Subset ] error handling.
6639 */
6640 if ((RAW == '%') && (ctxt->external == 0) &&
6641 (ctxt->inputNr == 1)) {
6642 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
6643 "PEReference: forbidden within markup decl in internal subset\n");
6644 } else {
6645 xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6646 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
6647 }
6648 return(-1);
6649 }
6650
6651 SKIP_BLANKS;
6652
6653 if (RAW != '>') {
6654 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
6655 if (content != NULL) {
6656 xmlFreeDocElementContent(ctxt->myDoc, content);
6657 }
6658 } else {
6659 if (inputid != ctxt->input->id) {
6660 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6661 "Element declaration doesn't start and stop in"
6662 " the same entity\n");
6663 }
6664
6665 NEXT;
6666 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6667 (ctxt->sax->elementDecl != NULL)) {
6668 if (content != NULL)
6669 content->parent = NULL;
6670 ctxt->sax->elementDecl(ctxt->userData, name, ret,
6671 content);
6672 if ((content != NULL) && (content->parent == NULL)) {
6673 /*
6674 * this is a trick: if xmlAddElementDecl is called,
6675 * instead of copying the full tree it is plugged directly
6676 * if called from the parser. Avoid duplicating the
6677 * interfaces or change the API/ABI
6678 */
6679 xmlFreeDocElementContent(ctxt->myDoc, content);
6680 }
6681 } else if (content != NULL) {
6682 xmlFreeDocElementContent(ctxt->myDoc, content);
6683 }
6684 }
6685 }
6686 return(ret);
6687}
6688
6689/**
6690 * xmlParseConditionalSections
6691 * @ctxt: an XML parser context
6692 *
6693 * [61] conditionalSect ::= includeSect | ignoreSect
6694 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
6695 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6696 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
6697 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
6698 */
6699
6700static void
6701xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
6702 int *inputIds = NULL;
6703 size_t inputIdsSize = 0;
6704 size_t depth = 0;
6705
6706 while (ctxt->instate != XML_PARSER_EOF) {
6707 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6708 int id = ctxt->input->id;
6709
6710 SKIP(3);
6711 SKIP_BLANKS;
6712
6713 if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
6714 SKIP(7);
6715 SKIP_BLANKS;
6716 if (RAW != '[') {
6717 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6718 xmlHaltParser(ctxt);
6719 goto error;
6720 }
6721 if (ctxt->input->id != id) {
6722 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6723 "All markup of the conditional section is"
6724 " not in the same entity\n");
6725 }
6726 NEXT;
6727
6728 if (inputIdsSize <= depth) {
6729 int *tmp;
6730
6731 inputIdsSize = (inputIdsSize == 0 ? 4 : inputIdsSize * 2);
6732 tmp = (int *) xmlRealloc(inputIds,
6733 inputIdsSize * sizeof(int));
6734 if (tmp == NULL) {
6735 xmlErrMemory(ctxt, NULL);
6736 goto error;
6737 }
6738 inputIds = tmp;
6739 }
6740 inputIds[depth] = id;
6741 depth++;
6742 } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
6743 int state;
6744 xmlParserInputState instate;
6745 size_t ignoreDepth = 0;
6746
6747 SKIP(6);
6748 SKIP_BLANKS;
6749 if (RAW != '[') {
6750 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6751 xmlHaltParser(ctxt);
6752 goto error;
6753 }
6754 if (ctxt->input->id != id) {
6755 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6756 "All markup of the conditional section is"
6757 " not in the same entity\n");
6758 }
6759 NEXT;
6760
6761 /*
6762 * Parse up to the end of the conditional section but disable
6763 * SAX event generating DTD building in the meantime
6764 */
6765 state = ctxt->disableSAX;
6766 instate = ctxt->instate;
6767 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
6768 ctxt->instate = XML_PARSER_IGNORE;
6769
6770 while (RAW != 0) {
6771 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6772 SKIP(3);
6773 ignoreDepth++;
6774 /* Check for integer overflow */
6775 if (ignoreDepth == 0) {
6776 xmlErrMemory(ctxt, NULL);
6777 goto error;
6778 }
6779 } else if ((RAW == ']') && (NXT(1) == ']') &&
6780 (NXT(2) == '>')) {
6781 if (ignoreDepth == 0)
6782 break;
6783 SKIP(3);
6784 ignoreDepth--;
6785 } else {
6786 NEXT;
6787 }
6788 }
6789
6790 ctxt->disableSAX = state;
6791 ctxt->instate = instate;
6792
6793 if (RAW == 0) {
6794 xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
6795 goto error;
6796 }
6797 if (ctxt->input->id != id) {
6798 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6799 "All markup of the conditional section is"
6800 " not in the same entity\n");
6801 }
6802 SKIP(3);
6803 } else {
6804 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
6805 xmlHaltParser(ctxt);
6806 goto error;
6807 }
6808 } else if ((depth > 0) &&
6809 (RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
6810 depth--;
6811 if (ctxt->input->id != inputIds[depth]) {
6812 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6813 "All markup of the conditional section is not"
6814 " in the same entity\n");
6815 }
6816 SKIP(3);
6817 } else {
6818 const xmlChar *check = CUR_PTR;
6819 unsigned int cons = ctxt->input->consumed;
6820
6821 xmlParseMarkupDecl(ctxt);
6822
6823 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
6824 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
6825 xmlHaltParser(ctxt);
6826 goto error;
6827 }
6828 }
6829
6830 if (depth == 0)
6831 break;
6832
6833 SKIP_BLANKS;
6834 GROW;
6835 }
6836
6837error:
6838 xmlFree(inputIds);
6839}
6840
6841/**
6842 * xmlParseMarkupDecl:
6843 * @ctxt: an XML parser context
6844 *
6845 * parse Markup declarations
6846 *
6847 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
6848 * NotationDecl | PI | Comment
6849 *
6850 * [ VC: Proper Declaration/PE Nesting ]
6851 * Parameter-entity replacement text must be properly nested with
6852 * markup declarations. That is to say, if either the first character
6853 * or the last character of a markup declaration (markupdecl above) is
6854 * contained in the replacement text for a parameter-entity reference,
6855 * both must be contained in the same replacement text.
6856 *
6857 * [ WFC: PEs in Internal Subset ]
6858 * In the internal DTD subset, parameter-entity references can occur
6859 * only where markup declarations can occur, not within markup declarations.
6860 * (This does not apply to references that occur in external parameter
6861 * entities or to the external subset.)
6862 */
6863void
6864xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
6865 GROW;
6866 if (CUR == '<') {
6867 if (NXT(1) == '!') {
6868 switch (NXT(2)) {
6869 case 'E':
6870 if (NXT(3) == 'L')
6871 xmlParseElementDecl(ctxt);
6872 else if (NXT(3) == 'N')
6873 xmlParseEntityDecl(ctxt);
6874 break;
6875 case 'A':
6876 xmlParseAttributeListDecl(ctxt);
6877 break;
6878 case 'N':
6879 xmlParseNotationDecl(ctxt);
6880 break;
6881 case '-':
6882 xmlParseComment(ctxt);
6883 break;
6884 default:
6885 /* there is an error but it will be detected later */
6886 break;
6887 }
6888 } else if (NXT(1) == '?') {
6889 xmlParsePI(ctxt);
6890 }
6891 }
6892
6893 /*
6894 * detect requirement to exit there and act accordingly
6895 * and avoid having instate overridden later on
6896 */
6897 if (ctxt->instate == XML_PARSER_EOF)
6898 return;
6899
6900 ctxt->instate = XML_PARSER_DTD;
6901}
6902
6903/**
6904 * xmlParseTextDecl:
6905 * @ctxt: an XML parser context
6906 *
6907 * parse an XML declaration header for external entities
6908 *
6909 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
6910 */
6911
6912void
6913xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
6914 xmlChar *version;
6915 const xmlChar *encoding;
6916 int oldstate;
6917
6918 /*
6919 * We know that '<?xml' is here.
6920 */
6921 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
6922 SKIP(5);
6923 } else {
6924 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
6925 return;
6926 }
6927
6928 /* Avoid expansion of parameter entities when skipping blanks. */
6929 oldstate = ctxt->instate;
6930 ctxt->instate = XML_PARSER_START;
6931
6932 if (SKIP_BLANKS == 0) {
6933 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6934 "Space needed after '<?xml'\n");
6935 }
6936
6937 /*
6938 * We may have the VersionInfo here.
6939 */
6940 version = xmlParseVersionInfo(ctxt);
6941 if (version == NULL)
6942 version = xmlCharStrdup(XML_DEFAULT_VERSION);
6943 else {
6944 if (SKIP_BLANKS == 0) {
6945 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6946 "Space needed here\n");
6947 }
6948 }
6949 ctxt->input->version = version;
6950
6951 /*
6952 * We must have the encoding declaration
6953 */
6954 encoding = xmlParseEncodingDecl(ctxt);
6955 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6956 /*
6957 * The XML REC instructs us to stop parsing right here
6958 */
6959 ctxt->instate = oldstate;
6960 return;
6961 }
6962 if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
6963 xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
6964 "Missing encoding in text declaration\n");
6965 }
6966
6967 SKIP_BLANKS;
6968 if ((RAW == '?') && (NXT(1) == '>')) {
6969 SKIP(2);
6970 } else if (RAW == '>') {
6971 /* Deprecated old WD ... */
6972 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
6973 NEXT;
6974 } else {
6975 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
6976 MOVETO_ENDTAG(CUR_PTR);
6977 NEXT;
6978 }
6979
6980 ctxt->instate = oldstate;
6981}
6982
6983/**
6984 * xmlParseExternalSubset:
6985 * @ctxt: an XML parser context
6986 * @ExternalID: the external identifier
6987 * @SystemID: the system identifier (or URL)
6988 *
6989 * parse Markup declarations from an external subset
6990 *
6991 * [30] extSubset ::= textDecl? extSubsetDecl
6992 *
6993 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
6994 */
6995void
6996xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
6997 const xmlChar *SystemID) {
6998 xmlDetectSAX2(ctxt);
6999 GROW;
7000
7001 if ((ctxt->encoding == NULL) &&
7002 (ctxt->input->end - ctxt->input->cur >= 4)) {
7003 xmlChar start[4];
7004 xmlCharEncoding enc;
7005
7006 start[0] = RAW;
7007 start[1] = NXT(1);
7008 start[2] = NXT(2);
7009 start[3] = NXT(3);
7010 enc = xmlDetectCharEncoding(start, 4);
7011 if (enc != XML_CHAR_ENCODING_NONE)
7012 xmlSwitchEncoding(ctxt, enc);
7013 }
7014
7015 if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
7016 xmlParseTextDecl(ctxt);
7017 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7018 /*
7019 * The XML REC instructs us to stop parsing right here
7020 */
7021 xmlHaltParser(ctxt);
7022 return;
7023 }
7024 }
7025 if (ctxt->myDoc == NULL) {
7026 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
7027 if (ctxt->myDoc == NULL) {
7028 xmlErrMemory(ctxt, "New Doc failed");
7029 return;
7030 }
7031 ctxt->myDoc->properties = XML_DOC_INTERNAL;
7032 }
7033 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
7034 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
7035
7036 ctxt->instate = XML_PARSER_DTD;
7037 ctxt->external = 1;
7038 SKIP_BLANKS;
7039 while (((RAW == '<') && (NXT(1) == '?')) ||
7040 ((RAW == '<') && (NXT(1) == '!')) ||
7041 (RAW == '%')) {
7042 const xmlChar *check = CUR_PTR;
7043 unsigned int cons = ctxt->input->consumed;
7044
7045 GROW;
7046 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
7047 xmlParseConditionalSections(ctxt);
7048 } else
7049 xmlParseMarkupDecl(ctxt);
7050 SKIP_BLANKS;
7051
7052 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
7053 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7054 break;
7055 }
7056 }
7057
7058 if (RAW != 0) {
7059 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7060 }
7061
7062}
7063
7064/**
7065 * xmlParseReference:
7066 * @ctxt: an XML parser context
7067 *
7068 * parse and handle entity references in content, depending on the SAX
7069 * interface, this may end-up in a call to character() if this is a
7070 * CharRef, a predefined entity, if there is no reference() callback.
7071 * or if the parser was asked to switch to that mode.
7072 *
7073 * [67] Reference ::= EntityRef | CharRef
7074 */
7075void
7076xmlParseReference(xmlParserCtxtPtr ctxt) {
7077 xmlEntityPtr ent;
7078 xmlChar *val;
7079 int was_checked;
7080 xmlNodePtr list = NULL;
7081 xmlParserErrors ret = XML_ERR_OK;
7082
7083
7084 if (RAW != '&')
7085 return;
7086
7087 /*
7088 * Simple case of a CharRef
7089 */
7090 if (NXT(1) == '#') {
7091 int i = 0;
7092 xmlChar out[16];
7093 int hex = NXT(2);
7094 int value = xmlParseCharRef(ctxt);
7095
7096 if (value == 0)
7097 return;
7098 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
7099 /*
7100 * So we are using non-UTF-8 buffers
7101 * Check that the char fit on 8bits, if not
7102 * generate a CharRef.
7103 */
7104 if (value <= 0xFF) {
7105 out[0] = value;
7106 out[1] = 0;
7107 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7108 (!ctxt->disableSAX))
7109 ctxt->sax->characters(ctxt->userData, out, 1);
7110 } else {
7111 if ((hex == 'x') || (hex == 'X'))
7112 snprintf((char *)out, sizeof(out), "#x%X", value);
7113 else
7114 snprintf((char *)out, sizeof(out), "#%d", value);
7115 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7116 (!ctxt->disableSAX))
7117 ctxt->sax->reference(ctxt->userData, out);
7118 }
7119 } else {
7120 /*
7121 * Just encode the value in UTF-8
7122 */
7123 COPY_BUF(0 ,out, i, value);
7124 out[i] = 0;
7125 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7126 (!ctxt->disableSAX))
7127 ctxt->sax->characters(ctxt->userData, out, i);
7128 }
7129 return;
7130 }
7131
7132 /*
7133 * We are seeing an entity reference
7134 */
7135 ent = xmlParseEntityRef(ctxt);
7136 if (ent == NULL) return;
7137 if (!ctxt->wellFormed)
7138 return;
7139 was_checked = ent->checked;
7140
7141 /* special case of predefined entities */
7142 if ((ent->name == NULL) ||
7143 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
7144 val = ent->content;
7145 if (val == NULL) return;
7146 /*
7147 * inline the entity.
7148 */
7149 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7150 (!ctxt->disableSAX))
7151 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
7152 return;
7153 }
7154
7155 /*
7156 * The first reference to the entity trigger a parsing phase
7157 * where the ent->children is filled with the result from
7158 * the parsing.
7159 * Note: external parsed entities will not be loaded, it is not
7160 * required for a non-validating parser, unless the parsing option
7161 * of validating, or substituting entities were given. Doing so is
7162 * far more secure as the parser will only process data coming from
7163 * the document entity by default.
7164 */
7165 if (((ent->checked == 0) ||
7166 ((ent->children == NULL) && (ctxt->options & XML_PARSE_NOENT))) &&
7167 ((ent->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY) ||
7168 (ctxt->options & (XML_PARSE_NOENT | XML_PARSE_DTDVALID)))) {
7169 unsigned long oldnbent = ctxt->nbentities, diff;
7170
7171 /*
7172 * This is a bit hackish but this seems the best
7173 * way to make sure both SAX and DOM entity support
7174 * behaves okay.
7175 */
7176 void *user_data;
7177 if (ctxt->userData == ctxt)
7178 user_data = NULL;
7179 else
7180 user_data = ctxt->userData;
7181
7182 /*
7183 * Check that this entity is well formed
7184 * 4.3.2: An internal general parsed entity is well-formed
7185 * if its replacement text matches the production labeled
7186 * content.
7187 */
7188 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7189 ctxt->depth++;
7190 ret = xmlParseBalancedChunkMemoryInternal(ctxt, ent->content,
7191 user_data, &list);
7192 ctxt->depth--;
7193
7194 } else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7195 ctxt->depth++;
7196 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, ctxt->sax,
7197 user_data, ctxt->depth, ent->URI,
7198 ent->ExternalID, &list);
7199 ctxt->depth--;
7200 } else {
7201 ret = XML_ERR_ENTITY_PE_INTERNAL;
7202 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7203 "invalid entity type found\n", NULL);
7204 }
7205
7206 /*
7207 * Store the number of entities needing parsing for this entity
7208 * content and do checkings
7209 */
7210 diff = ctxt->nbentities - oldnbent + 1;
7211 if (diff > INT_MAX / 2)
7212 diff = INT_MAX / 2;
7213 ent->checked = diff * 2;
7214 if ((ent->content != NULL) && (xmlStrchr(ent->content, '<')))
7215 ent->checked |= 1;
7216 if (ret == XML_ERR_ENTITY_LOOP) {
7217 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7218 xmlHaltParser(ctxt);
7219 xmlFreeNodeList(list);
7220 return;
7221 }
7222 if (xmlParserEntityCheck(ctxt, 0, ent, 0)) {
7223 xmlFreeNodeList(list);
7224 return;
7225 }
7226
7227 if ((ret == XML_ERR_OK) && (list != NULL)) {
7228 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
7229 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
7230 (ent->children == NULL)) {
7231 ent->children = list;
7232 /*
7233 * Prune it directly in the generated document
7234 * except for single text nodes.
7235 */
7236 if ((ctxt->replaceEntities == 0) ||
7237 (ctxt->parseMode == XML_PARSE_READER) ||
7238 ((list->type == XML_TEXT_NODE) &&
7239 (list->next == NULL))) {
7240 ent->owner = 1;
7241 while (list != NULL) {
7242 list->parent = (xmlNodePtr) ent;
7243 xmlSetTreeDoc(list, ent->doc);
7244 if (list->next == NULL)
7245 ent->last = list;
7246 list = list->next;
7247 }
7248 list = NULL;
7249 } else {
7250 ent->owner = 0;
7251 while (list != NULL) {
7252 list->parent = (xmlNodePtr) ctxt->node;
7253 list->doc = ctxt->myDoc;
7254 if (list->next == NULL)
7255 ent->last = list;
7256 list = list->next;
7257 }
7258 list = ent->children;
7259#ifdef LIBXML_LEGACY_ENABLED
7260 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7261 xmlAddEntityReference(ent, list, NULL);
7262#endif /* LIBXML_LEGACY_ENABLED */
7263 }
7264 } else {
7265 xmlFreeNodeList(list);
7266 list = NULL;
7267 }
7268 } else if ((ret != XML_ERR_OK) &&
7269 (ret != XML_WAR_UNDECLARED_ENTITY)) {
7270 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7271 "Entity '%s' failed to parse\n", ent->name);
7272 if (ent->content != NULL)
7273 ent->content[0] = 0;
7274 xmlParserEntityCheck(ctxt, 0, ent, 0);
7275 } else if (list != NULL) {
7276 xmlFreeNodeList(list);
7277 list = NULL;
7278 }
7279 if (ent->checked == 0)
7280 ent->checked = 2;
7281
7282 /* Prevent entity from being parsed and expanded twice (Bug 760367). */
7283 was_checked = 0;
7284 } else if (ent->checked != 1) {
7285 ctxt->nbentities += ent->checked / 2;
7286 }
7287
7288 /*
7289 * Now that the entity content has been gathered
7290 * provide it to the application, this can take different forms based
7291 * on the parsing modes.
7292 */
7293 if (ent->children == NULL) {
7294 /*
7295 * Probably running in SAX mode and the callbacks don't
7296 * build the entity content. So unless we already went
7297 * though parsing for first checking go though the entity
7298 * content to generate callbacks associated to the entity
7299 */
7300 if (was_checked != 0) {
7301 void *user_data;
7302 /*
7303 * This is a bit hackish but this seems the best
7304 * way to make sure both SAX and DOM entity support
7305 * behaves okay.
7306 */
7307 if (ctxt->userData == ctxt)
7308 user_data = NULL;
7309 else
7310 user_data = ctxt->userData;
7311
7312 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7313 ctxt->depth++;
7314 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
7315 ent->content, user_data, NULL);
7316 ctxt->depth--;
7317 } else if (ent->etype ==
7318 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7319 ctxt->depth++;
7320 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
7321 ctxt->sax, user_data, ctxt->depth,
7322 ent->URI, ent->ExternalID, NULL);
7323 ctxt->depth--;
7324 } else {
7325 ret = XML_ERR_ENTITY_PE_INTERNAL;
7326 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7327 "invalid entity type found\n", NULL);
7328 }
7329 if (ret == XML_ERR_ENTITY_LOOP) {
7330 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7331 return;
7332 }
7333 }
7334 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7335 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7336 /*
7337 * Entity reference callback comes second, it's somewhat
7338 * superfluous but a compatibility to historical behaviour
7339 */
7340 ctxt->sax->reference(ctxt->userData, ent->name);
7341 }
7342 return;
7343 }
7344
7345 /*
7346 * If we didn't get any children for the entity being built
7347 */
7348 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7349 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7350 /*
7351 * Create a node.
7352 */
7353 ctxt->sax->reference(ctxt->userData, ent->name);
7354 return;
7355 }
7356
7357 if ((ctxt->replaceEntities) || (ent->children == NULL)) {
7358 /*
7359 * There is a problem on the handling of _private for entities
7360 * (bug 155816): Should we copy the content of the field from
7361 * the entity (possibly overwriting some value set by the user
7362 * when a copy is created), should we leave it alone, or should
7363 * we try to take care of different situations? The problem
7364 * is exacerbated by the usage of this field by the xmlReader.
7365 * To fix this bug, we look at _private on the created node
7366 * and, if it's NULL, we copy in whatever was in the entity.
7367 * If it's not NULL we leave it alone. This is somewhat of a
7368 * hack - maybe we should have further tests to determine
7369 * what to do.
7370 */
7371 if ((ctxt->node != NULL) && (ent->children != NULL)) {
7372 /*
7373 * Seems we are generating the DOM content, do
7374 * a simple tree copy for all references except the first
7375 * In the first occurrence list contains the replacement.
7376 */
7377 if (((list == NULL) && (ent->owner == 0)) ||
7378 (ctxt->parseMode == XML_PARSE_READER)) {
7379 xmlNodePtr nw = NULL, cur, firstChild = NULL;
7380
7381 /*
7382 * We are copying here, make sure there is no abuse
7383 */
7384 ctxt->sizeentcopy += ent->length + 5;
7385 if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy))
7386 return;
7387
7388 /*
7389 * when operating on a reader, the entities definitions
7390 * are always owning the entities subtree.
7391 if (ctxt->parseMode == XML_PARSE_READER)
7392 ent->owner = 1;
7393 */
7394
7395 cur = ent->children;
7396 while (cur != NULL) {
7397 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7398 if (nw != NULL) {
7399 if (nw->_private == NULL)
7400 nw->_private = cur->_private;
7401 if (firstChild == NULL){
7402 firstChild = nw;
7403 }
7404 nw = xmlAddChild(ctxt->node, nw);
7405 }
7406 if (cur == ent->last) {
7407 /*
7408 * needed to detect some strange empty
7409 * node cases in the reader tests
7410 */
7411 if ((ctxt->parseMode == XML_PARSE_READER) &&
7412 (nw != NULL) &&
7413 (nw->type == XML_ELEMENT_NODE) &&
7414 (nw->children == NULL))
7415 nw->extra = 1;
7416
7417 break;
7418 }
7419 cur = cur->next;
7420 }
7421#ifdef LIBXML_LEGACY_ENABLED
7422 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7423 xmlAddEntityReference(ent, firstChild, nw);
7424#endif /* LIBXML_LEGACY_ENABLED */
7425 } else if ((list == NULL) || (ctxt->inputNr > 0)) {
7426 xmlNodePtr nw = NULL, cur, next, last,
7427 firstChild = NULL;
7428
7429 /*
7430 * We are copying here, make sure there is no abuse
7431 */
7432 ctxt->sizeentcopy += ent->length + 5;
7433 if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy))
7434 return;
7435
7436 /*
7437 * Copy the entity child list and make it the new
7438 * entity child list. The goal is to make sure any
7439 * ID or REF referenced will be the one from the
7440 * document content and not the entity copy.
7441 */
7442 cur = ent->children;
7443 ent->children = NULL;
7444 last = ent->last;
7445 ent->last = NULL;
7446 while (cur != NULL) {
7447 next = cur->next;
7448 cur->next = NULL;
7449 cur->parent = NULL;
7450 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7451 if (nw != NULL) {
7452 if (nw->_private == NULL)
7453 nw->_private = cur->_private;
7454 if (firstChild == NULL){
7455 firstChild = cur;
7456 }
7457 xmlAddChild((xmlNodePtr) ent, nw);
7458 xmlAddChild(ctxt->node, cur);
7459 }
7460 if (cur == last)
7461 break;
7462 cur = next;
7463 }
7464 if (ent->owner == 0)
7465 ent->owner = 1;
7466#ifdef LIBXML_LEGACY_ENABLED
7467 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7468 xmlAddEntityReference(ent, firstChild, nw);
7469#endif /* LIBXML_LEGACY_ENABLED */
7470 } else {
7471 const xmlChar *nbktext;
7472
7473 /*
7474 * the name change is to avoid coalescing of the
7475 * node with a possible previous text one which
7476 * would make ent->children a dangling pointer
7477 */
7478 nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
7479 -1);
7480 if (ent->children->type == XML_TEXT_NODE)
7481 ent->children->name = nbktext;
7482 if ((ent->last != ent->children) &&
7483 (ent->last->type == XML_TEXT_NODE))
7484 ent->last->name = nbktext;
7485 xmlAddChildList(ctxt->node, ent->children);
7486 }
7487
7488 /*
7489 * This is to avoid a nasty side effect, see
7490 * characters() in SAX.c
7491 */
7492 ctxt->nodemem = 0;
7493 ctxt->nodelen = 0;
7494 return;
7495 }
7496 }
7497}
7498
7499/**
7500 * xmlParseEntityRef:
7501 * @ctxt: an XML parser context
7502 *
7503 * parse ENTITY references declarations
7504 *
7505 * [68] EntityRef ::= '&' Name ';'
7506 *
7507 * [ WFC: Entity Declared ]
7508 * In a document without any DTD, a document with only an internal DTD
7509 * subset which contains no parameter entity references, or a document
7510 * with "standalone='yes'", the Name given in the entity reference
7511 * must match that in an entity declaration, except that well-formed
7512 * documents need not declare any of the following entities: amp, lt,
7513 * gt, apos, quot. The declaration of a parameter entity must precede
7514 * any reference to it. Similarly, the declaration of a general entity
7515 * must precede any reference to it which appears in a default value in an
7516 * attribute-list declaration. Note that if entities are declared in the
7517 * external subset or in external parameter entities, a non-validating
7518 * processor is not obligated to read and process their declarations;
7519 * for such documents, the rule that an entity must be declared is a
7520 * well-formedness constraint only if standalone='yes'.
7521 *
7522 * [ WFC: Parsed Entity ]
7523 * An entity reference must not contain the name of an unparsed entity
7524 *
7525 * Returns the xmlEntityPtr if found, or NULL otherwise.
7526 */
7527xmlEntityPtr
7528xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
7529 const xmlChar *name;
7530 xmlEntityPtr ent = NULL;
7531
7532 GROW;
7533 if (ctxt->instate == XML_PARSER_EOF)
7534 return(NULL);
7535
7536 if (RAW != '&')
7537 return(NULL);
7538 NEXT;
7539 name = xmlParseName(ctxt);
7540 if (name == NULL) {
7541 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7542 "xmlParseEntityRef: no name\n");
7543 return(NULL);
7544 }
7545 if (RAW != ';') {
7546 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7547 return(NULL);
7548 }
7549 NEXT;
7550
7551 /*
7552 * Predefined entities override any extra definition
7553 */
7554 if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7555 ent = xmlGetPredefinedEntity(name);
7556 if (ent != NULL)
7557 return(ent);
7558 }
7559
7560 /*
7561 * Increase the number of entity references parsed
7562 */
7563 ctxt->nbentities++;
7564
7565 /*
7566 * Ask first SAX for entity resolution, otherwise try the
7567 * entities which may have stored in the parser context.
7568 */
7569 if (ctxt->sax != NULL) {
7570 if (ctxt->sax->getEntity != NULL)
7571 ent = ctxt->sax->getEntity(ctxt->userData, name);
7572 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7573 (ctxt->options & XML_PARSE_OLDSAX))
7574 ent = xmlGetPredefinedEntity(name);
7575 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7576 (ctxt->userData==ctxt)) {
7577 ent = xmlSAX2GetEntity(ctxt, name);
7578 }
7579 }
7580 if (ctxt->instate == XML_PARSER_EOF)
7581 return(NULL);
7582 /*
7583 * [ WFC: Entity Declared ]
7584 * In a document without any DTD, a document with only an
7585 * internal DTD subset which contains no parameter entity
7586 * references, or a document with "standalone='yes'", the
7587 * Name given in the entity reference must match that in an
7588 * entity declaration, except that well-formed documents
7589 * need not declare any of the following entities: amp, lt,
7590 * gt, apos, quot.
7591 * The declaration of a parameter entity must precede any
7592 * reference to it.
7593 * Similarly, the declaration of a general entity must
7594 * precede any reference to it which appears in a default
7595 * value in an attribute-list declaration. Note that if
7596 * entities are declared in the external subset or in
7597 * external parameter entities, a non-validating processor
7598 * is not obligated to read and process their declarations;
7599 * for such documents, the rule that an entity must be
7600 * declared is a well-formedness constraint only if
7601 * standalone='yes'.
7602 */
7603 if (ent == NULL) {
7604 if ((ctxt->standalone == 1) ||
7605 ((ctxt->hasExternalSubset == 0) &&
7606 (ctxt->hasPErefs == 0))) {
7607 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7608 "Entity '%s' not defined\n", name);
7609 } else {
7610 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7611 "Entity '%s' not defined\n", name);
7612 if ((ctxt->inSubset == 0) &&
7613 (ctxt->sax != NULL) &&
7614 (ctxt->sax->reference != NULL)) {
7615 ctxt->sax->reference(ctxt->userData, name);
7616 }
7617 }
7618 xmlParserEntityCheck(ctxt, 0, ent, 0);
7619 ctxt->valid = 0;
7620 }
7621
7622 /*
7623 * [ WFC: Parsed Entity ]
7624 * An entity reference must not contain the name of an
7625 * unparsed entity
7626 */
7627 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7628 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7629 "Entity reference to unparsed entity %s\n", name);
7630 }
7631
7632 /*
7633 * [ WFC: No External Entity References ]
7634 * Attribute values cannot contain direct or indirect
7635 * entity references to external entities.
7636 */
7637 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7638 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7639 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7640 "Attribute references external entity '%s'\n", name);
7641 }
7642 /*
7643 * [ WFC: No < in Attribute Values ]
7644 * The replacement text of any entity referred to directly or
7645 * indirectly in an attribute value (other than "&lt;") must
7646 * not contain a <.
7647 */
7648 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7649 (ent != NULL) &&
7650 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
7651 if (((ent->checked & 1) || (ent->checked == 0)) &&
7652 (ent->content != NULL) && (xmlStrchr(ent->content, '<'))) {
7653 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7654 "'<' in entity '%s' is not allowed in attributes values\n", name);
7655 }
7656 }
7657
7658 /*
7659 * Internal check, no parameter entities here ...
7660 */
7661 else {
7662 switch (ent->etype) {
7663 case XML_INTERNAL_PARAMETER_ENTITY:
7664 case XML_EXTERNAL_PARAMETER_ENTITY:
7665 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7666 "Attempt to reference the parameter entity '%s'\n",
7667 name);
7668 break;
7669 default:
7670 break;
7671 }
7672 }
7673
7674 /*
7675 * [ WFC: No Recursion ]
7676 * A parsed entity must not contain a recursive reference
7677 * to itself, either directly or indirectly.
7678 * Done somewhere else
7679 */
7680 return(ent);
7681}
7682
7683/**
7684 * xmlParseStringEntityRef:
7685 * @ctxt: an XML parser context
7686 * @str: a pointer to an index in the string
7687 *
7688 * parse ENTITY references declarations, but this version parses it from
7689 * a string value.
7690 *
7691 * [68] EntityRef ::= '&' Name ';'
7692 *
7693 * [ WFC: Entity Declared ]
7694 * In a document without any DTD, a document with only an internal DTD
7695 * subset which contains no parameter entity references, or a document
7696 * with "standalone='yes'", the Name given in the entity reference
7697 * must match that in an entity declaration, except that well-formed
7698 * documents need not declare any of the following entities: amp, lt,
7699 * gt, apos, quot. The declaration of a parameter entity must precede
7700 * any reference to it. Similarly, the declaration of a general entity
7701 * must precede any reference to it which appears in a default value in an
7702 * attribute-list declaration. Note that if entities are declared in the
7703 * external subset or in external parameter entities, a non-validating
7704 * processor is not obligated to read and process their declarations;
7705 * for such documents, the rule that an entity must be declared is a
7706 * well-formedness constraint only if standalone='yes'.
7707 *
7708 * [ WFC: Parsed Entity ]
7709 * An entity reference must not contain the name of an unparsed entity
7710 *
7711 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
7712 * is updated to the current location in the string.
7713 */
7714static xmlEntityPtr
7715xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
7716 xmlChar *name;
7717 const xmlChar *ptr;
7718 xmlChar cur;
7719 xmlEntityPtr ent = NULL;
7720
7721 if ((str == NULL) || (*str == NULL))
7722 return(NULL);
7723 ptr = *str;
7724 cur = *ptr;
7725 if (cur != '&')
7726 return(NULL);
7727
7728 ptr++;
7729 name = xmlParseStringName(ctxt, &ptr);
7730 if (name == NULL) {
7731 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7732 "xmlParseStringEntityRef: no name\n");
7733 *str = ptr;
7734 return(NULL);
7735 }
7736 if (*ptr != ';') {
7737 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7738 xmlFree(name);
7739 *str = ptr;
7740 return(NULL);
7741 }
7742 ptr++;
7743
7744
7745 /*
7746 * Predefined entities override any extra definition
7747 */
7748 if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7749 ent = xmlGetPredefinedEntity(name);
7750 if (ent != NULL) {
7751 xmlFree(name);
7752 *str = ptr;
7753 return(ent);
7754 }
7755 }
7756
7757 /*
7758 * Increase the number of entity references parsed
7759 */
7760 ctxt->nbentities++;
7761
7762 /*
7763 * Ask first SAX for entity resolution, otherwise try the
7764 * entities which may have stored in the parser context.
7765 */
7766 if (ctxt->sax != NULL) {
7767 if (ctxt->sax->getEntity != NULL)
7768 ent = ctxt->sax->getEntity(ctxt->userData, name);
7769 if ((ent == NULL) && (ctxt->options & XML_PARSE_OLDSAX))
7770 ent = xmlGetPredefinedEntity(name);
7771 if ((ent == NULL) && (ctxt->userData==ctxt)) {
7772 ent = xmlSAX2GetEntity(ctxt, name);
7773 }
7774 }
7775 if (ctxt->instate == XML_PARSER_EOF) {
7776 xmlFree(name);
7777 return(NULL);
7778 }
7779
7780 /*
7781 * [ WFC: Entity Declared ]
7782 * In a document without any DTD, a document with only an
7783 * internal DTD subset which contains no parameter entity
7784 * references, or a document with "standalone='yes'", the
7785 * Name given in the entity reference must match that in an
7786 * entity declaration, except that well-formed documents
7787 * need not declare any of the following entities: amp, lt,
7788 * gt, apos, quot.
7789 * The declaration of a parameter entity must precede any
7790 * reference to it.
7791 * Similarly, the declaration of a general entity must
7792 * precede any reference to it which appears in a default
7793 * value in an attribute-list declaration. Note that if
7794 * entities are declared in the external subset or in
7795 * external parameter entities, a non-validating processor
7796 * is not obligated to read and process their declarations;
7797 * for such documents, the rule that an entity must be
7798 * declared is a well-formedness constraint only if
7799 * standalone='yes'.
7800 */
7801 if (ent == NULL) {
7802 if ((ctxt->standalone == 1) ||
7803 ((ctxt->hasExternalSubset == 0) &&
7804 (ctxt->hasPErefs == 0))) {
7805 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7806 "Entity '%s' not defined\n", name);
7807 } else {
7808 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7809 "Entity '%s' not defined\n",
7810 name);
7811 }
7812 xmlParserEntityCheck(ctxt, 0, ent, 0);
7813 /* TODO ? check regressions ctxt->valid = 0; */
7814 }
7815
7816 /*
7817 * [ WFC: Parsed Entity ]
7818 * An entity reference must not contain the name of an
7819 * unparsed entity
7820 */
7821 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7822 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7823 "Entity reference to unparsed entity %s\n", name);
7824 }
7825
7826 /*
7827 * [ WFC: No External Entity References ]
7828 * Attribute values cannot contain direct or indirect
7829 * entity references to external entities.
7830 */
7831 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7832 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7833 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7834 "Attribute references external entity '%s'\n", name);
7835 }
7836 /*
7837 * [ WFC: No < in Attribute Values ]
7838 * The replacement text of any entity referred to directly or
7839 * indirectly in an attribute value (other than "&lt;") must
7840 * not contain a <.
7841 */
7842 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7843 (ent != NULL) && (ent->content != NULL) &&
7844 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
7845 (xmlStrchr(ent->content, '<'))) {
7846 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7847 "'<' in entity '%s' is not allowed in attributes values\n",
7848 name);
7849 }
7850
7851 /*
7852 * Internal check, no parameter entities here ...
7853 */
7854 else {
7855 switch (ent->etype) {
7856 case XML_INTERNAL_PARAMETER_ENTITY:
7857 case XML_EXTERNAL_PARAMETER_ENTITY:
7858 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7859 "Attempt to reference the parameter entity '%s'\n",
7860 name);
7861 break;
7862 default:
7863 break;
7864 }
7865 }
7866
7867 /*
7868 * [ WFC: No Recursion ]
7869 * A parsed entity must not contain a recursive reference
7870 * to itself, either directly or indirectly.
7871 * Done somewhere else
7872 */
7873
7874 xmlFree(name);
7875 *str = ptr;
7876 return(ent);
7877}
7878
7879/**
7880 * xmlParsePEReference:
7881 * @ctxt: an XML parser context
7882 *
7883 * parse PEReference declarations
7884 * The entity content is handled directly by pushing it's content as
7885 * a new input stream.
7886 *
7887 * [69] PEReference ::= '%' Name ';'
7888 *
7889 * [ WFC: No Recursion ]
7890 * A parsed entity must not contain a recursive
7891 * reference to itself, either directly or indirectly.
7892 *
7893 * [ WFC: Entity Declared ]
7894 * In a document without any DTD, a document with only an internal DTD
7895 * subset which contains no parameter entity references, or a document
7896 * with "standalone='yes'", ... ... The declaration of a parameter
7897 * entity must precede any reference to it...
7898 *
7899 * [ VC: Entity Declared ]
7900 * In a document with an external subset or external parameter entities
7901 * with "standalone='no'", ... ... The declaration of a parameter entity
7902 * must precede any reference to it...
7903 *
7904 * [ WFC: In DTD ]
7905 * Parameter-entity references may only appear in the DTD.
7906 * NOTE: misleading but this is handled.
7907 */
7908void
7909xmlParsePEReference(xmlParserCtxtPtr ctxt)
7910{
7911 const xmlChar *name;
7912 xmlEntityPtr entity = NULL;
7913 xmlParserInputPtr input;
7914
7915 if (RAW != '%')
7916 return;
7917 NEXT;
7918 name = xmlParseName(ctxt);
7919 if (name == NULL) {
7920 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_NO_NAME, "PEReference: no name\n");
7921 return;
7922 }
7923 if (xmlParserDebugEntities)
7924 xmlGenericError(xmlGenericErrorContext,
7925 "PEReference: %s\n", name);
7926 if (RAW != ';') {
7927 xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
7928 return;
7929 }
7930
7931 NEXT;
7932
7933 /*
7934 * Increase the number of entity references parsed
7935 */
7936 ctxt->nbentities++;
7937
7938 /*
7939 * Request the entity from SAX
7940 */
7941 if ((ctxt->sax != NULL) &&
7942 (ctxt->sax->getParameterEntity != NULL))
7943 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
7944 if (ctxt->instate == XML_PARSER_EOF)
7945 return;
7946 if (entity == NULL) {
7947 /*
7948 * [ WFC: Entity Declared ]
7949 * In a document without any DTD, a document with only an
7950 * internal DTD subset which contains no parameter entity
7951 * references, or a document with "standalone='yes'", ...
7952 * ... The declaration of a parameter entity must precede
7953 * any reference to it...
7954 */
7955 if ((ctxt->standalone == 1) ||
7956 ((ctxt->hasExternalSubset == 0) &&
7957 (ctxt->hasPErefs == 0))) {
7958 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7959 "PEReference: %%%s; not found\n",
7960 name);
7961 } else {
7962 /*
7963 * [ VC: Entity Declared ]
7964 * In a document with an external subset or external
7965 * parameter entities with "standalone='no'", ...
7966 * ... The declaration of a parameter entity must
7967 * precede any reference to it...
7968 */
7969 if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
7970 xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
7971 "PEReference: %%%s; not found\n",
7972 name, NULL);
7973 } else
7974 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7975 "PEReference: %%%s; not found\n",
7976 name, NULL);
7977 ctxt->valid = 0;
7978 }
7979 xmlParserEntityCheck(ctxt, 0, NULL, 0);
7980 } else {
7981 /*
7982 * Internal checking in case the entity quest barfed
7983 */
7984 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7985 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
7986 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7987 "Internal: %%%s; is not a parameter entity\n",
7988 name, NULL);
7989 } else {
7990 xmlChar start[4];
7991 xmlCharEncoding enc;
7992
7993 if (xmlParserEntityCheck(ctxt, 0, entity, 0))
7994 return;
7995
7996 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
7997 ((ctxt->options & XML_PARSE_NOENT) == 0) &&
7998 ((ctxt->options & XML_PARSE_DTDVALID) == 0) &&
7999 ((ctxt->options & XML_PARSE_DTDLOAD) == 0) &&
8000 ((ctxt->options & XML_PARSE_DTDATTR) == 0) &&
8001 (ctxt->replaceEntities == 0) &&
8002 (ctxt->validate == 0))
8003 return;
8004
8005 input = xmlNewEntityInputStream(ctxt, entity);
8006 if (xmlPushInput(ctxt, input) < 0) {
8007 xmlFreeInputStream(input);
8008 return;
8009 }
8010
8011 if (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) {
8012 /*
8013 * Get the 4 first bytes and decode the charset
8014 * if enc != XML_CHAR_ENCODING_NONE
8015 * plug some encoding conversion routines.
8016 * Note that, since we may have some non-UTF8
8017 * encoding (like UTF16, bug 135229), the 'length'
8018 * is not known, but we can calculate based upon
8019 * the amount of data in the buffer.
8020 */
8021 GROW
8022 if (ctxt->instate == XML_PARSER_EOF)
8023 return;
8024 if ((ctxt->input->end - ctxt->input->cur)>=4) {
8025 start[0] = RAW;
8026 start[1] = NXT(1);
8027 start[2] = NXT(2);
8028 start[3] = NXT(3);
8029 enc = xmlDetectCharEncoding(start, 4);
8030 if (enc != XML_CHAR_ENCODING_NONE) {
8031 xmlSwitchEncoding(ctxt, enc);
8032 }
8033 }
8034
8035 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
8036 (IS_BLANK_CH(NXT(5)))) {
8037 xmlParseTextDecl(ctxt);
8038 }
8039 }
8040 }
8041 }
8042 ctxt->hasPErefs = 1;
8043}
8044
8045/**
8046 * xmlLoadEntityContent:
8047 * @ctxt: an XML parser context
8048 * @entity: an unloaded system entity
8049 *
8050 * Load the original content of the given system entity from the
8051 * ExternalID/SystemID given. This is to be used for Included in Literal
8052 * http://www.w3.org/TR/REC-xml/#inliteral processing of entities references
8053 *
8054 * Returns 0 in case of success and -1 in case of failure
8055 */
8056static int
8057xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
8058 xmlParserInputPtr input;
8059 xmlBufferPtr buf;
8060 int l, c;
8061 int count = 0;
8062
8063 if ((ctxt == NULL) || (entity == NULL) ||
8064 ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
8065 (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) ||
8066 (entity->content != NULL)) {
8067 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8068 "xmlLoadEntityContent parameter error");
8069 return(-1);
8070 }
8071
8072 if (xmlParserDebugEntities)
8073 xmlGenericError(xmlGenericErrorContext,
8074 "Reading %s entity content input\n", entity->name);
8075
8076 buf = xmlBufferCreate();
8077 if (buf == NULL) {
8078 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8079 "xmlLoadEntityContent parameter error");
8080 return(-1);
8081 }
8082
8083 input = xmlNewEntityInputStream(ctxt, entity);
8084 if (input == NULL) {
8085 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8086 "xmlLoadEntityContent input error");
8087 xmlBufferFree(buf);
8088 return(-1);
8089 }
8090
8091 /*
8092 * Push the entity as the current input, read char by char
8093 * saving to the buffer until the end of the entity or an error
8094 */
8095 if (xmlPushInput(ctxt, input) < 0) {
8096 xmlBufferFree(buf);
8097 return(-1);
8098 }
8099
8100 GROW;
8101 c = CUR_CHAR(l);
8102 while ((ctxt->input == input) && (ctxt->input->cur < ctxt->input->end) &&
8103 (IS_CHAR(c))) {
8104 xmlBufferAdd(buf, ctxt->input->cur, l);
8105 if (count++ > XML_PARSER_CHUNK_SIZE) {
8106 count = 0;
8107 GROW;
8108 if (ctxt->instate == XML_PARSER_EOF) {
8109 xmlBufferFree(buf);
8110 return(-1);
8111 }
8112 }
8113 NEXTL(l);
8114 c = CUR_CHAR(l);
8115 if (c == 0) {
8116 count = 0;
8117 GROW;
8118 if (ctxt->instate == XML_PARSER_EOF) {
8119 xmlBufferFree(buf);
8120 return(-1);
8121 }
8122 c = CUR_CHAR(l);
8123 }
8124 }
8125
8126 if ((ctxt->input == input) && (ctxt->input->cur >= ctxt->input->end)) {
8127 xmlPopInput(ctxt);
8128 } else if (!IS_CHAR(c)) {
8129 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
8130 "xmlLoadEntityContent: invalid char value %d\n",
8131 c);
8132 xmlBufferFree(buf);
8133 return(-1);
8134 }
8135 entity->content = buf->content;
8136 buf->content = NULL;
8137 xmlBufferFree(buf);
8138
8139 return(0);
8140}
8141
8142/**
8143 * xmlParseStringPEReference:
8144 * @ctxt: an XML parser context
8145 * @str: a pointer to an index in the string
8146 *
8147 * parse PEReference declarations
8148 *
8149 * [69] PEReference ::= '%' Name ';'
8150 *
8151 * [ WFC: No Recursion ]
8152 * A parsed entity must not contain a recursive
8153 * reference to itself, either directly or indirectly.
8154 *
8155 * [ WFC: Entity Declared ]
8156 * In a document without any DTD, a document with only an internal DTD
8157 * subset which contains no parameter entity references, or a document
8158 * with "standalone='yes'", ... ... The declaration of a parameter
8159 * entity must precede any reference to it...
8160 *
8161 * [ VC: Entity Declared ]
8162 * In a document with an external subset or external parameter entities
8163 * with "standalone='no'", ... ... The declaration of a parameter entity
8164 * must precede any reference to it...
8165 *
8166 * [ WFC: In DTD ]
8167 * Parameter-entity references may only appear in the DTD.
8168 * NOTE: misleading but this is handled.
8169 *
8170 * Returns the string of the entity content.
8171 * str is updated to the current value of the index
8172 */
8173static xmlEntityPtr
8174xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
8175 const xmlChar *ptr;
8176 xmlChar cur;
8177 xmlChar *name;
8178 xmlEntityPtr entity = NULL;
8179
8180 if ((str == NULL) || (*str == NULL)) return(NULL);
8181 ptr = *str;
8182 cur = *ptr;
8183 if (cur != '%')
8184 return(NULL);
8185 ptr++;
8186 name = xmlParseStringName(ctxt, &ptr);
8187 if (name == NULL) {
8188 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8189 "xmlParseStringPEReference: no name\n");
8190 *str = ptr;
8191 return(NULL);
8192 }
8193 cur = *ptr;
8194 if (cur != ';') {
8195 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
8196 xmlFree(name);
8197 *str = ptr;
8198 return(NULL);
8199 }
8200 ptr++;
8201
8202 /*
8203 * Increase the number of entity references parsed
8204 */
8205 ctxt->nbentities++;
8206
8207 /*
8208 * Request the entity from SAX
8209 */
8210 if ((ctxt->sax != NULL) &&
8211 (ctxt->sax->getParameterEntity != NULL))
8212 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
8213 if (ctxt->instate == XML_PARSER_EOF) {
8214 xmlFree(name);
8215 *str = ptr;
8216 return(NULL);
8217 }
8218 if (entity == NULL) {
8219 /*
8220 * [ WFC: Entity Declared ]
8221 * In a document without any DTD, a document with only an
8222 * internal DTD subset which contains no parameter entity
8223 * references, or a document with "standalone='yes'", ...
8224 * ... The declaration of a parameter entity must precede
8225 * any reference to it...
8226 */
8227 if ((ctxt->standalone == 1) ||
8228 ((ctxt->hasExternalSubset == 0) && (ctxt->hasPErefs == 0))) {
8229 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
8230 "PEReference: %%%s; not found\n", name);
8231 } else {
8232 /*
8233 * [ VC: Entity Declared ]
8234 * In a document with an external subset or external
8235 * parameter entities with "standalone='no'", ...
8236 * ... The declaration of a parameter entity must
8237 * precede any reference to it...
8238 */
8239 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8240 "PEReference: %%%s; not found\n",
8241 name, NULL);
8242 ctxt->valid = 0;
8243 }
8244 xmlParserEntityCheck(ctxt, 0, NULL, 0);
8245 } else {
8246 /*
8247 * Internal checking in case the entity quest barfed
8248 */
8249 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
8250 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
8251 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8252 "%%%s; is not a parameter entity\n",
8253 name, NULL);
8254 }
8255 }
8256 ctxt->hasPErefs = 1;
8257 xmlFree(name);
8258 *str = ptr;
8259 return(entity);
8260}
8261
8262/**
8263 * xmlParseDocTypeDecl:
8264 * @ctxt: an XML parser context
8265 *
8266 * parse a DOCTYPE declaration
8267 *
8268 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
8269 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8270 *
8271 * [ VC: Root Element Type ]
8272 * The Name in the document type declaration must match the element
8273 * type of the root element.
8274 */
8275
8276void
8277xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
8278 const xmlChar *name = NULL;
8279 xmlChar *ExternalID = NULL;
8280 xmlChar *URI = NULL;
8281
8282 /*
8283 * We know that '<!DOCTYPE' has been detected.
8284 */
8285 SKIP(9);
8286
8287 SKIP_BLANKS;
8288
8289 /*
8290 * Parse the DOCTYPE name.
8291 */
8292 name = xmlParseName(ctxt);
8293 if (name == NULL) {
8294 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8295 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
8296 }
8297 ctxt->intSubName = name;
8298
8299 SKIP_BLANKS;
8300
8301 /*
8302 * Check for SystemID and ExternalID
8303 */
8304 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
8305
8306 if ((URI != NULL) || (ExternalID != NULL)) {
8307 ctxt->hasExternalSubset = 1;
8308 }
8309 ctxt->extSubURI = URI;
8310 ctxt->extSubSystem = ExternalID;
8311
8312 SKIP_BLANKS;
8313
8314 /*
8315 * Create and update the internal subset.
8316 */
8317 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
8318 (!ctxt->disableSAX))
8319 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
8320 if (ctxt->instate == XML_PARSER_EOF)
8321 return;
8322
8323 /*
8324 * Is there any internal subset declarations ?
8325 * they are handled separately in xmlParseInternalSubset()
8326 */
8327 if (RAW == '[')
8328 return;
8329
8330 /*
8331 * We should be at the end of the DOCTYPE declaration.
8332 */
8333 if (RAW != '>') {
8334 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8335 }
8336 NEXT;
8337}
8338
8339/**
8340 * xmlParseInternalSubset:
8341 * @ctxt: an XML parser context
8342 *
8343 * parse the internal subset declaration
8344 *
8345 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8346 */
8347
8348static void
8349xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
8350 /*
8351 * Is there any DTD definition ?
8352 */
8353 if (RAW == '[') {
8354 int baseInputNr = ctxt->inputNr;
8355 ctxt->instate = XML_PARSER_DTD;
8356 NEXT;
8357 /*
8358 * Parse the succession of Markup declarations and
8359 * PEReferences.
8360 * Subsequence (markupdecl | PEReference | S)*
8361 */
8362 while (((RAW != ']') || (ctxt->inputNr > baseInputNr)) &&
8363 (ctxt->instate != XML_PARSER_EOF)) {
8364 const xmlChar *check = CUR_PTR;
8365 unsigned int cons = ctxt->input->consumed;
8366
8367 SKIP_BLANKS;
8368 xmlParseMarkupDecl(ctxt);
8369 xmlParsePEReference(ctxt);
8370
8371 /*
8372 * Conditional sections are allowed from external entities included
8373 * by PE References in the internal subset.
8374 */
8375 if ((ctxt->inputNr > 1) && (ctxt->input->filename != NULL) &&
8376 (RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
8377 xmlParseConditionalSections(ctxt);
8378 }
8379
8380 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
8381 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8382 "xmlParseInternalSubset: error detected in Markup declaration\n");
8383 if (ctxt->inputNr > baseInputNr)
8384 xmlPopInput(ctxt);
8385 else
8386 break;
8387 }
8388 }
8389 if (RAW == ']') {
8390 NEXT;
8391 SKIP_BLANKS;
8392 }
8393 }
8394
8395 /*
8396 * We should be at the end of the DOCTYPE declaration.
8397 */
8398 if (RAW != '>') {
8399 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8400 return;
8401 }
8402 NEXT;
8403}
8404
8405#ifdef LIBXML_SAX1_ENABLED
8406/**
8407 * xmlParseAttribute:
8408 * @ctxt: an XML parser context
8409 * @value: a xmlChar ** used to store the value of the attribute
8410 *
8411 * parse an attribute
8412 *
8413 * [41] Attribute ::= Name Eq AttValue
8414 *
8415 * [ WFC: No External Entity References ]
8416 * Attribute values cannot contain direct or indirect entity references
8417 * to external entities.
8418 *
8419 * [ WFC: No < in Attribute Values ]
8420 * The replacement text of any entity referred to directly or indirectly in
8421 * an attribute value (other than "&lt;") must not contain a <.
8422 *
8423 * [ VC: Attribute Value Type ]
8424 * The attribute must have been declared; the value must be of the type
8425 * declared for it.
8426 *
8427 * [25] Eq ::= S? '=' S?
8428 *
8429 * With namespace:
8430 *
8431 * [NS 11] Attribute ::= QName Eq AttValue
8432 *
8433 * Also the case QName == xmlns:??? is handled independently as a namespace
8434 * definition.
8435 *
8436 * Returns the attribute name, and the value in *value.
8437 */
8438
8439const xmlChar *
8440xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
8441 const xmlChar *name;
8442 xmlChar *val;
8443
8444 *value = NULL;
8445 GROW;
8446 name = xmlParseName(ctxt);
8447 if (name == NULL) {
8448 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8449 "error parsing attribute name\n");
8450 return(NULL);
8451 }
8452
8453 /*
8454 * read the value
8455 */
8456 SKIP_BLANKS;
8457 if (RAW == '=') {
8458 NEXT;
8459 SKIP_BLANKS;
8460 val = xmlParseAttValue(ctxt);
8461 ctxt->instate = XML_PARSER_CONTENT;
8462 } else {
8463 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8464 "Specification mandates value for attribute %s\n", name);
8465 return(NULL);
8466 }
8467
8468 /*
8469 * Check that xml:lang conforms to the specification
8470 * No more registered as an error, just generate a warning now
8471 * since this was deprecated in XML second edition
8472 */
8473 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
8474 if (!xmlCheckLanguageID(val)) {
8475 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8476 "Malformed value for xml:lang : %s\n",
8477 val, NULL);
8478 }
8479 }
8480
8481 /*
8482 * Check that xml:space conforms to the specification
8483 */
8484 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
8485 if (xmlStrEqual(val, BAD_CAST "default"))
8486 *(ctxt->space) = 0;
8487 else if (xmlStrEqual(val, BAD_CAST "preserve"))
8488 *(ctxt->space) = 1;
8489 else {
8490 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8491"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8492 val, NULL);
8493 }
8494 }
8495
8496 *value = val;
8497 return(name);
8498}
8499
8500/**
8501 * xmlParseStartTag:
8502 * @ctxt: an XML parser context
8503 *
8504 * parse a start of tag either for rule element or
8505 * EmptyElement. In both case we don't parse the tag closing chars.
8506 *
8507 * [40] STag ::= '<' Name (S Attribute)* S? '>'
8508 *
8509 * [ WFC: Unique Att Spec ]
8510 * No attribute name may appear more than once in the same start-tag or
8511 * empty-element tag.
8512 *
8513 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8514 *
8515 * [ WFC: Unique Att Spec ]
8516 * No attribute name may appear more than once in the same start-tag or
8517 * empty-element tag.
8518 *
8519 * With namespace:
8520 *
8521 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8522 *
8523 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8524 *
8525 * Returns the element name parsed
8526 */
8527
8528const xmlChar *
8529xmlParseStartTag(xmlParserCtxtPtr ctxt) {
8530 const xmlChar *name;
8531 const xmlChar *attname;
8532 xmlChar *attvalue;
8533 const xmlChar **atts = ctxt->atts;
8534 int nbatts = 0;
8535 int maxatts = ctxt->maxatts;
8536 int i;
8537
8538 if (RAW != '<') return(NULL);
8539 NEXT1;
8540
8541 name = xmlParseName(ctxt);
8542 if (name == NULL) {
8543 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8544 "xmlParseStartTag: invalid element name\n");
8545 return(NULL);
8546 }
8547
8548 /*
8549 * Now parse the attributes, it ends up with the ending
8550 *
8551 * (S Attribute)* S?
8552 */
8553 SKIP_BLANKS;
8554 GROW;
8555
8556 while (((RAW != '>') &&
8557 ((RAW != '/') || (NXT(1) != '>')) &&
8558 (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
8559 const xmlChar *q = CUR_PTR;
8560 unsigned int cons = ctxt->input->consumed;
8561
8562 attname = xmlParseAttribute(ctxt, &attvalue);
8563 if ((attname != NULL) && (attvalue != NULL)) {
8564 /*
8565 * [ WFC: Unique Att Spec ]
8566 * No attribute name may appear more than once in the same
8567 * start-tag or empty-element tag.
8568 */
8569 for (i = 0; i < nbatts;i += 2) {
8570 if (xmlStrEqual(atts[i], attname)) {
8571 xmlErrAttributeDup(ctxt, NULL, attname);
8572 xmlFree(attvalue);
8573 goto failed;
8574 }
8575 }
8576 /*
8577 * Add the pair to atts
8578 */
8579 if (atts == NULL) {
8580 maxatts = 22; /* allow for 10 attrs by default */
8581 atts = (const xmlChar **)
8582 xmlMalloc(maxatts * sizeof(xmlChar *));
8583 if (atts == NULL) {
8584 xmlErrMemory(ctxt, NULL);
8585 if (attvalue != NULL)
8586 xmlFree(attvalue);
8587 goto failed;
8588 }
8589 ctxt->atts = atts;
8590 ctxt->maxatts = maxatts;
8591 } else if (nbatts + 4 > maxatts) {
8592 const xmlChar **n;
8593
8594 maxatts *= 2;
8595 n = (const xmlChar **) xmlRealloc((void *) atts,
8596 maxatts * sizeof(const xmlChar *));
8597 if (n == NULL) {
8598 xmlErrMemory(ctxt, NULL);
8599 if (attvalue != NULL)
8600 xmlFree(attvalue);
8601 goto failed;
8602 }
8603 atts = n;
8604 ctxt->atts = atts;
8605 ctxt->maxatts = maxatts;
8606 }
8607 atts[nbatts++] = attname;
8608 atts[nbatts++] = attvalue;
8609 atts[nbatts] = NULL;
8610 atts[nbatts + 1] = NULL;
8611 } else {
8612 if (attvalue != NULL)
8613 xmlFree(attvalue);
8614 }
8615
8616failed:
8617
8618 GROW
8619 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8620 break;
8621 if (SKIP_BLANKS == 0) {
8622 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8623 "attributes construct error\n");
8624 }
8625 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
8626 (attname == NULL) && (attvalue == NULL)) {
8627 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
8628 "xmlParseStartTag: problem parsing attributes\n");
8629 break;
8630 }
8631 SHRINK;
8632 GROW;
8633 }
8634
8635 /*
8636 * SAX: Start of Element !
8637 */
8638 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
8639 (!ctxt->disableSAX)) {
8640 if (nbatts > 0)
8641 ctxt->sax->startElement(ctxt->userData, name, atts);
8642 else
8643 ctxt->sax->startElement(ctxt->userData, name, NULL);
8644 }
8645
8646 if (atts != NULL) {
8647 /* Free only the content strings */
8648 for (i = 1;i < nbatts;i+=2)
8649 if (atts[i] != NULL)
8650 xmlFree((xmlChar *) atts[i]);
8651 }
8652 return(name);
8653}
8654
8655/**
8656 * xmlParseEndTag1:
8657 * @ctxt: an XML parser context
8658 * @line: line of the start tag
8659 * @nsNr: number of namespaces on the start tag
8660 *
8661 * parse an end of tag
8662 *
8663 * [42] ETag ::= '</' Name S? '>'
8664 *
8665 * With namespace
8666 *
8667 * [NS 9] ETag ::= '</' QName S? '>'
8668 */
8669
8670static void
8671xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
8672 const xmlChar *name;
8673
8674 GROW;
8675 if ((RAW != '<') || (NXT(1) != '/')) {
8676 xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
8677 "xmlParseEndTag: '</' not found\n");
8678 return;
8679 }
8680 SKIP(2);
8681
8682 name = xmlParseNameAndCompare(ctxt,ctxt->name);
8683
8684 /*
8685 * We should definitely be at the ending "S? '>'" part
8686 */
8687 GROW;
8688 SKIP_BLANKS;
8689 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
8690 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
8691 } else
8692 NEXT1;
8693
8694 /*
8695 * [ WFC: Element Type Match ]
8696 * The Name in an element's end-tag must match the element type in the
8697 * start-tag.
8698 *
8699 */
8700 if (name != (xmlChar*)1) {
8701 if (name == NULL) name = BAD_CAST "unparsable";
8702 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
8703 "Opening and ending tag mismatch: %s line %d and %s\n",
8704 ctxt->name, line, name);
8705 }
8706
8707 /*
8708 * SAX: End of Tag
8709 */
8710 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8711 (!ctxt->disableSAX))
8712 ctxt->sax->endElement(ctxt->userData, ctxt->name);
8713
8714 namePop(ctxt);
8715 spacePop(ctxt);
8716 return;
8717}
8718
8719/**
8720 * xmlParseEndTag:
8721 * @ctxt: an XML parser context
8722 *
8723 * parse an end of tag
8724 *
8725 * [42] ETag ::= '</' Name S? '>'
8726 *
8727 * With namespace
8728 *
8729 * [NS 9] ETag ::= '</' QName S? '>'
8730 */
8731
8732void
8733xmlParseEndTag(xmlParserCtxtPtr ctxt) {
8734 xmlParseEndTag1(ctxt, 0);
8735}
8736#endif /* LIBXML_SAX1_ENABLED */
8737
8738/************************************************************************
8739 * *
8740 * SAX 2 specific operations *
8741 * *
8742 ************************************************************************/
8743
8744/*
8745 * xmlGetNamespace:
8746 * @ctxt: an XML parser context
8747 * @prefix: the prefix to lookup
8748 *
8749 * Lookup the namespace name for the @prefix (which ca be NULL)
8750 * The prefix must come from the @ctxt->dict dictionary
8751 *
8752 * Returns the namespace name or NULL if not bound
8753 */
8754static const xmlChar *
8755xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
8756 int i;
8757
8758 if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
8759 for (i = ctxt->nsNr - 2;i >= 0;i-=2)
8760 if (ctxt->nsTab[i] == prefix) {
8761 if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
8762 return(NULL);
8763 return(ctxt->nsTab[i + 1]);
8764 }
8765 return(NULL);
8766}
8767
8768/**
8769 * xmlParseQName:
8770 * @ctxt: an XML parser context
8771 * @prefix: pointer to store the prefix part
8772 *
8773 * parse an XML Namespace QName
8774 *
8775 * [6] QName ::= (Prefix ':')? LocalPart
8776 * [7] Prefix ::= NCName
8777 * [8] LocalPart ::= NCName
8778 *
8779 * Returns the Name parsed or NULL
8780 */
8781
8782static const xmlChar *
8783xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
8784 const xmlChar *l, *p;
8785
8786 GROW;
8787
8788 l = xmlParseNCName(ctxt);
8789 if (l == NULL) {
8790 if (CUR == ':') {
8791 l = xmlParseName(ctxt);
8792 if (l != NULL) {
8793 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8794 "Failed to parse QName '%s'\n", l, NULL, NULL);
8795 *prefix = NULL;
8796 return(l);
8797 }
8798 }
8799 return(NULL);
8800 }
8801 if (CUR == ':') {
8802 NEXT;
8803 p = l;
8804 l = xmlParseNCName(ctxt);
8805 if (l == NULL) {
8806 xmlChar *tmp;
8807
8808 if (ctxt->instate == XML_PARSER_EOF)
8809 return(NULL);
8810 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8811 "Failed to parse QName '%s:'\n", p, NULL, NULL);
8812 l = xmlParseNmtoken(ctxt);
8813 if (l == NULL) {
8814 if (ctxt->instate == XML_PARSER_EOF)
8815 return(NULL);
8816 tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
8817 } else {
8818 tmp = xmlBuildQName(l, p, NULL, 0);
8819 xmlFree((char *)l);
8820 }
8821 p = xmlDictLookup(ctxt->dict, tmp, -1);
8822 if (tmp != NULL) xmlFree(tmp);
8823 *prefix = NULL;
8824 return(p);
8825 }
8826 if (CUR == ':') {
8827 xmlChar *tmp;
8828
8829 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8830 "Failed to parse QName '%s:%s:'\n", p, l, NULL);
8831 NEXT;
8832 tmp = (xmlChar *) xmlParseName(ctxt);
8833 if (tmp != NULL) {
8834 tmp = xmlBuildQName(tmp, l, NULL, 0);
8835 l = xmlDictLookup(ctxt->dict, tmp, -1);
8836 if (tmp != NULL) xmlFree(tmp);
8837 *prefix = p;
8838 return(l);
8839 }
8840 if (ctxt->instate == XML_PARSER_EOF)
8841 return(NULL);
8842 tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
8843 l = xmlDictLookup(ctxt->dict, tmp, -1);
8844 if (tmp != NULL) xmlFree(tmp);
8845 *prefix = p;
8846 return(l);
8847 }
8848 *prefix = p;
8849 } else
8850 *prefix = NULL;
8851 return(l);
8852}
8853
8854/**
8855 * xmlParseQNameAndCompare:
8856 * @ctxt: an XML parser context
8857 * @name: the localname
8858 * @prefix: the prefix, if any.
8859 *
8860 * parse an XML name and compares for match
8861 * (specialized for endtag parsing)
8862 *
8863 * Returns NULL for an illegal name, (xmlChar*) 1 for success
8864 * and the name for mismatch
8865 */
8866
8867static const xmlChar *
8868xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
8869 xmlChar const *prefix) {
8870 const xmlChar *cmp;
8871 const xmlChar *in;
8872 const xmlChar *ret;
8873 const xmlChar *prefix2;
8874
8875 if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
8876
8877 GROW;
8878 in = ctxt->input->cur;
8879
8880 cmp = prefix;
8881 while (*in != 0 && *in == *cmp) {
8882 ++in;
8883 ++cmp;
8884 }
8885 if ((*cmp == 0) && (*in == ':')) {
8886 in++;
8887 cmp = name;
8888 while (*in != 0 && *in == *cmp) {
8889 ++in;
8890 ++cmp;
8891 }
8892 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
8893 /* success */
8894 ctxt->input->col += in - ctxt->input->cur;
8895 ctxt->input->cur = in;
8896 return((const xmlChar*) 1);
8897 }
8898 }
8899 /*
8900 * all strings coms from the dictionary, equality can be done directly
8901 */
8902 ret = xmlParseQName (ctxt, &prefix2);
8903 if ((ret == name) && (prefix == prefix2))
8904 return((const xmlChar*) 1);
8905 return ret;
8906}
8907
8908/**
8909 * xmlParseAttValueInternal:
8910 * @ctxt: an XML parser context
8911 * @len: attribute len result
8912 * @alloc: whether the attribute was reallocated as a new string
8913 * @normalize: if 1 then further non-CDATA normalization must be done
8914 *
8915 * parse a value for an attribute.
8916 * NOTE: if no normalization is needed, the routine will return pointers
8917 * directly from the data buffer.
8918 *
8919 * 3.3.3 Attribute-Value Normalization:
8920 * Before the value of an attribute is passed to the application or
8921 * checked for validity, the XML processor must normalize it as follows:
8922 * - a character reference is processed by appending the referenced
8923 * character to the attribute value
8924 * - an entity reference is processed by recursively processing the
8925 * replacement text of the entity
8926 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
8927 * appending #x20 to the normalized value, except that only a single
8928 * #x20 is appended for a "#xD#xA" sequence that is part of an external
8929 * parsed entity or the literal entity value of an internal parsed entity
8930 * - other characters are processed by appending them to the normalized value
8931 * If the declared value is not CDATA, then the XML processor must further
8932 * process the normalized attribute value by discarding any leading and
8933 * trailing space (#x20) characters, and by replacing sequences of space
8934 * (#x20) characters by a single space (#x20) character.
8935 * All attributes for which no declaration has been read should be treated
8936 * by a non-validating parser as if declared CDATA.
8937 *
8938 * Returns the AttValue parsed or NULL. The value has to be freed by the
8939 * caller if it was copied, this can be detected by val[*len] == 0.
8940 */
8941
8942#define GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end) \
8943 const xmlChar *oldbase = ctxt->input->base;\
8944 GROW;\
8945 if (ctxt->instate == XML_PARSER_EOF)\
8946 return(NULL);\
8947 if (oldbase != ctxt->input->base) {\
8948 ptrdiff_t delta = ctxt->input->base - oldbase;\
8949 start = start + delta;\
8950 in = in + delta;\
8951 }\
8952 end = ctxt->input->end;
8953
8954static xmlChar *
8955xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
8956 int normalize)
8957{
8958 xmlChar limit = 0;
8959 const xmlChar *in = NULL, *start, *end, *last;
8960 xmlChar *ret = NULL;
8961 int line, col;
8962
8963 GROW;
8964 in = (xmlChar *) CUR_PTR;
8965 line = ctxt->input->line;
8966 col = ctxt->input->col;
8967 if (*in != '"' && *in != '\'') {
8968 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
8969 return (NULL);
8970 }
8971 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
8972
8973 /*
8974 * try to handle in this routine the most common case where no
8975 * allocation of a new string is required and where content is
8976 * pure ASCII.
8977 */
8978 limit = *in++;
8979 col++;
8980 end = ctxt->input->end;
8981 start = in;
8982 if (in >= end) {
8983 GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
8984 }
8985 if (normalize) {
8986 /*
8987 * Skip any leading spaces
8988 */
8989 while ((in < end) && (*in != limit) &&
8990 ((*in == 0x20) || (*in == 0x9) ||
8991 (*in == 0xA) || (*in == 0xD))) {
8992 if (*in == 0xA) {
8993 line++; col = 1;
8994 } else {
8995 col++;
8996 }
8997 in++;
8998 start = in;
8999 if (in >= end) {
9000 GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
9001 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9002 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9003 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9004 "AttValue length too long\n");
9005 return(NULL);
9006 }
9007 }
9008 }
9009 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
9010 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
9011 col++;
9012 if ((*in++ == 0x20) && (*in == 0x20)) break;
9013 if (in >= end) {
9014 GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
9015 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9016 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9017 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9018 "AttValue length too long\n");
9019 return(NULL);
9020 }
9021 }
9022 }
9023 last = in;
9024 /*
9025 * skip the trailing blanks
9026 */
9027 while ((last[-1] == 0x20) && (last > start)) last--;
9028 while ((in < end) && (*in != limit) &&
9029 ((*in == 0x20) || (*in == 0x9) ||
9030 (*in == 0xA) || (*in == 0xD))) {
9031 if (*in == 0xA) {
9032 line++, col = 1;
9033 } else {
9034 col++;
9035 }
9036 in++;
9037 if (in >= end) {
9038 const xmlChar *oldbase = ctxt->input->base;
9039 GROW;
9040 if (ctxt->instate == XML_PARSER_EOF)
9041 return(NULL);
9042 if (oldbase != ctxt->input->base) {
9043 ptrdiff_t delta = ctxt->input->base - oldbase;
9044 start = start + delta;
9045 in = in + delta;
9046 last = last + delta;
9047 }
9048 end = ctxt->input->end;
9049 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9050 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9051 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9052 "AttValue length too long\n");
9053 return(NULL);
9054 }
9055 }
9056 }
9057 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9058 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9059 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9060 "AttValue length too long\n");
9061 return(NULL);
9062 }
9063 if (*in != limit) goto need_complex;
9064 } else {
9065 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
9066 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
9067 in++;
9068 col++;
9069 if (in >= end) {
9070 GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
9071 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9072 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9073 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9074 "AttValue length too long\n");
9075 return(NULL);
9076 }
9077 }
9078 }
9079 last = in;
9080 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9081 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9082 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9083 "AttValue length too long\n");
9084 return(NULL);
9085 }
9086 if (*in != limit) goto need_complex;
9087 }
9088 in++;
9089 col++;
9090 if (len != NULL) {
9091 *len = last - start;
9092 ret = (xmlChar *) start;
9093 } else {
9094 if (alloc) *alloc = 1;
9095 ret = xmlStrndup(start, last - start);
9096 }
9097 CUR_PTR = in;
9098 ctxt->input->line = line;
9099 ctxt->input->col = col;
9100 if (alloc) *alloc = 0;
9101 return ret;
9102need_complex:
9103 if (alloc) *alloc = 1;
9104 return xmlParseAttValueComplex(ctxt, len, normalize);
9105}
9106
9107/**
9108 * xmlParseAttribute2:
9109 * @ctxt: an XML parser context
9110 * @pref: the element prefix
9111 * @elem: the element name
9112 * @prefix: a xmlChar ** used to store the value of the attribute prefix
9113 * @value: a xmlChar ** used to store the value of the attribute
9114 * @len: an int * to save the length of the attribute
9115 * @alloc: an int * to indicate if the attribute was allocated
9116 *
9117 * parse an attribute in the new SAX2 framework.
9118 *
9119 * Returns the attribute name, and the value in *value, .
9120 */
9121
9122static const xmlChar *
9123xmlParseAttribute2(xmlParserCtxtPtr ctxt,
9124 const xmlChar * pref, const xmlChar * elem,
9125 const xmlChar ** prefix, xmlChar ** value,
9126 int *len, int *alloc)
9127{
9128 const xmlChar *name;
9129 xmlChar *val, *internal_val = NULL;
9130 int normalize = 0;
9131
9132 *value = NULL;
9133 GROW;
9134 name = xmlParseQName(ctxt, prefix);
9135 if (name == NULL) {
9136 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9137 "error parsing attribute name\n");
9138 return (NULL);
9139 }
9140
9141 /*
9142 * get the type if needed
9143 */
9144 if (ctxt->attsSpecial != NULL) {
9145 int type;
9146
9147 type = (int) (ptrdiff_t) xmlHashQLookup2(ctxt->attsSpecial,
9148 pref, elem, *prefix, name);
9149 if (type != 0)
9150 normalize = 1;
9151 }
9152
9153 /*
9154 * read the value
9155 */
9156 SKIP_BLANKS;
9157 if (RAW == '=') {
9158 NEXT;
9159 SKIP_BLANKS;
9160 val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
9161 if (normalize) {
9162 /*
9163 * Sometimes a second normalisation pass for spaces is needed
9164 * but that only happens if charrefs or entities references
9165 * have been used in the attribute value, i.e. the attribute
9166 * value have been extracted in an allocated string already.
9167 */
9168 if (*alloc) {
9169 const xmlChar *val2;
9170
9171 val2 = xmlAttrNormalizeSpace2(ctxt, val, len);
9172 if ((val2 != NULL) && (val2 != val)) {
9173 xmlFree(val);
9174 val = (xmlChar *) val2;
9175 }
9176 }
9177 }
9178 ctxt->instate = XML_PARSER_CONTENT;
9179 } else {
9180 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
9181 "Specification mandates value for attribute %s\n",
9182 name);
9183 return (NULL);
9184 }
9185
9186 if (*prefix == ctxt->str_xml) {
9187 /*
9188 * Check that xml:lang conforms to the specification
9189 * No more registered as an error, just generate a warning now
9190 * since this was deprecated in XML second edition
9191 */
9192 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
9193 internal_val = xmlStrndup(val, *len);
9194 if (!xmlCheckLanguageID(internal_val)) {
9195 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
9196 "Malformed value for xml:lang : %s\n",
9197 internal_val, NULL);
9198 }
9199 }
9200
9201 /*
9202 * Check that xml:space conforms to the specification
9203 */
9204 if (xmlStrEqual(name, BAD_CAST "space")) {
9205 internal_val = xmlStrndup(val, *len);
9206 if (xmlStrEqual(internal_val, BAD_CAST "default"))
9207 *(ctxt->space) = 0;
9208 else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
9209 *(ctxt->space) = 1;
9210 else {
9211 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
9212 "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
9213 internal_val, NULL);
9214 }
9215 }
9216 if (internal_val) {
9217 xmlFree(internal_val);
9218 }
9219 }
9220
9221 *value = val;
9222 return (name);
9223}
9224/**
9225 * xmlParseStartTag2:
9226 * @ctxt: an XML parser context
9227 *
9228 * parse a start of tag either for rule element or
9229 * EmptyElement. In both case we don't parse the tag closing chars.
9230 * This routine is called when running SAX2 parsing
9231 *
9232 * [40] STag ::= '<' Name (S Attribute)* S? '>'
9233 *
9234 * [ WFC: Unique Att Spec ]
9235 * No attribute name may appear more than once in the same start-tag or
9236 * empty-element tag.
9237 *
9238 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
9239 *
9240 * [ WFC: Unique Att Spec ]
9241 * No attribute name may appear more than once in the same start-tag or
9242 * empty-element tag.
9243 *
9244 * With namespace:
9245 *
9246 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
9247 *
9248 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
9249 *
9250 * Returns the element name parsed
9251 */
9252
9253static const xmlChar *
9254xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
9255 const xmlChar **URI, int *tlen) {
9256 const xmlChar *localname;
9257 const xmlChar *prefix;
9258 const xmlChar *attname;
9259 const xmlChar *aprefix;
9260 const xmlChar *nsname;
9261 xmlChar *attvalue;
9262 const xmlChar **atts = ctxt->atts;
9263 int maxatts = ctxt->maxatts;
9264 int nratts, nbatts, nbdef, inputid;
9265 int i, j, nbNs, attval;
9266 unsigned long cur;
9267 int nsNr = ctxt->nsNr;
9268
9269 if (RAW != '<') return(NULL);
9270 NEXT1;
9271
9272 /*
9273 * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
9274 * point since the attribute values may be stored as pointers to
9275 * the buffer and calling SHRINK would destroy them !
9276 * The Shrinking is only possible once the full set of attribute
9277 * callbacks have been done.
9278 */
9279 SHRINK;
9280 cur = ctxt->input->cur - ctxt->input->base;
9281 inputid = ctxt->input->id;
9282 nbatts = 0;
9283 nratts = 0;
9284 nbdef = 0;
9285 nbNs = 0;
9286 attval = 0;
9287 /* Forget any namespaces added during an earlier parse of this element. */
9288 ctxt->nsNr = nsNr;
9289
9290 localname = xmlParseQName(ctxt, &prefix);
9291 if (localname == NULL) {
9292 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9293 "StartTag: invalid element name\n");
9294 return(NULL);
9295 }
9296 *tlen = ctxt->input->cur - ctxt->input->base - cur;
9297
9298 /*
9299 * Now parse the attributes, it ends up with the ending
9300 *
9301 * (S Attribute)* S?
9302 */
9303 SKIP_BLANKS;
9304 GROW;
9305
9306 while (((RAW != '>') &&
9307 ((RAW != '/') || (NXT(1) != '>')) &&
9308 (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
9309 const xmlChar *q = CUR_PTR;
9310 unsigned int cons = ctxt->input->consumed;
9311 int len = -1, alloc = 0;
9312
9313 attname = xmlParseAttribute2(ctxt, prefix, localname,
9314 &aprefix, &attvalue, &len, &alloc);
9315 if ((attname == NULL) || (attvalue == NULL))
9316 goto next_attr;
9317 if (len < 0) len = xmlStrlen(attvalue);
9318
9319 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9320 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9321 xmlURIPtr uri;
9322
9323 if (URL == NULL) {
9324 xmlErrMemory(ctxt, "dictionary allocation failure");
9325 if ((attvalue != NULL) && (alloc != 0))
9326 xmlFree(attvalue);
9327 localname = NULL;
9328 goto done;
9329 }
9330 if (*URL != 0) {
9331 uri = xmlParseURI((const char *) URL);
9332 if (uri == NULL) {
9333 xmlNsErr(ctxt, XML_WAR_NS_URI,
9334 "xmlns: '%s' is not a valid URI\n",
9335 URL, NULL, NULL);
9336 } else {
9337 if (uri->scheme == NULL) {
9338 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9339 "xmlns: URI %s is not absolute\n",
9340 URL, NULL, NULL);
9341 }
9342 xmlFreeURI(uri);
9343 }
9344 if (URL == ctxt->str_xml_ns) {
9345 if (attname != ctxt->str_xml) {
9346 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9347 "xml namespace URI cannot be the default namespace\n",
9348 NULL, NULL, NULL);
9349 }
9350 goto next_attr;
9351 }
9352 if ((len == 29) &&
9353 (xmlStrEqual(URL,
9354 BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9355 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9356 "reuse of the xmlns namespace name is forbidden\n",
9357 NULL, NULL, NULL);
9358 goto next_attr;
9359 }
9360 }
9361 /*
9362 * check that it's not a defined namespace
9363 */
9364 for (j = 1;j <= nbNs;j++)
9365 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9366 break;
9367 if (j <= nbNs)
9368 xmlErrAttributeDup(ctxt, NULL, attname);
9369 else
9370 if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
9371
9372 } else if (aprefix == ctxt->str_xmlns) {
9373 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9374 xmlURIPtr uri;
9375
9376 if (attname == ctxt->str_xml) {
9377 if (URL != ctxt->str_xml_ns) {
9378 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9379 "xml namespace prefix mapped to wrong URI\n",
9380 NULL, NULL, NULL);
9381 }
9382 /*
9383 * Do not keep a namespace definition node
9384 */
9385 goto next_attr;
9386 }
9387 if (URL == ctxt->str_xml_ns) {
9388 if (attname != ctxt->str_xml) {
9389 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9390 "xml namespace URI mapped to wrong prefix\n",
9391 NULL, NULL, NULL);
9392 }
9393 goto next_attr;
9394 }
9395 if (attname == ctxt->str_xmlns) {
9396 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9397 "redefinition of the xmlns prefix is forbidden\n",
9398 NULL, NULL, NULL);
9399 goto next_attr;
9400 }
9401 if ((len == 29) &&
9402 (xmlStrEqual(URL,
9403 BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9404 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9405 "reuse of the xmlns namespace name is forbidden\n",
9406 NULL, NULL, NULL);
9407 goto next_attr;
9408 }
9409 if ((URL == NULL) || (URL[0] == 0)) {
9410 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9411 "xmlns:%s: Empty XML namespace is not allowed\n",
9412 attname, NULL, NULL);
9413 goto next_attr;
9414 } else {
9415 uri = xmlParseURI((const char *) URL);
9416 if (uri == NULL) {
9417 xmlNsErr(ctxt, XML_WAR_NS_URI,
9418 "xmlns:%s: '%s' is not a valid URI\n",
9419 attname, URL, NULL);
9420 } else {
9421 if ((ctxt->pedantic) && (uri->scheme == NULL)) {
9422 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9423 "xmlns:%s: URI %s is not absolute\n",
9424 attname, URL, NULL);
9425 }
9426 xmlFreeURI(uri);
9427 }
9428 }
9429
9430 /*
9431 * check that it's not a defined namespace
9432 */
9433 for (j = 1;j <= nbNs;j++)
9434 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9435 break;
9436 if (j <= nbNs)
9437 xmlErrAttributeDup(ctxt, aprefix, attname);
9438 else
9439 if (nsPush(ctxt, attname, URL) > 0) nbNs++;
9440
9441 } else {
9442 /*
9443 * Add the pair to atts
9444 */
9445 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9446 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9447 goto next_attr;
9448 }
9449 maxatts = ctxt->maxatts;
9450 atts = ctxt->atts;
9451 }
9452 ctxt->attallocs[nratts++] = alloc;
9453 atts[nbatts++] = attname;
9454 atts[nbatts++] = aprefix;
9455 /*
9456 * The namespace URI field is used temporarily to point at the
9457 * base of the current input buffer for non-alloced attributes.
9458 * When the input buffer is reallocated, all the pointers become
9459 * invalid, but they can be reconstructed later.
9460 */
9461 if (alloc)
9462 atts[nbatts++] = NULL;
9463 else
9464 atts[nbatts++] = ctxt->input->base;
9465 atts[nbatts++] = attvalue;
9466 attvalue += len;
9467 atts[nbatts++] = attvalue;
9468 /*
9469 * tag if some deallocation is needed
9470 */
9471 if (alloc != 0) attval = 1;
9472 attvalue = NULL; /* moved into atts */
9473 }
9474
9475next_attr:
9476 if ((attvalue != NULL) && (alloc != 0)) {
9477 xmlFree(attvalue);
9478 attvalue = NULL;
9479 }
9480
9481 GROW
9482 if (ctxt->instate == XML_PARSER_EOF)
9483 break;
9484 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
9485 break;
9486 if (SKIP_BLANKS == 0) {
9487 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9488 "attributes construct error\n");
9489 break;
9490 }
9491 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
9492 (attname == NULL) && (attvalue == NULL)) {
9493 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9494 "xmlParseStartTag: problem parsing attributes\n");
9495 break;
9496 }
9497 GROW;
9498 }
9499
9500 if (ctxt->input->id != inputid) {
9501 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9502 "Unexpected change of input\n");
9503 localname = NULL;
9504 goto done;
9505 }
9506
9507 /* Reconstruct attribute value pointers. */
9508 for (i = 0, j = 0; j < nratts; i += 5, j++) {
9509 if (atts[i+2] != NULL) {
9510 /*
9511 * Arithmetic on dangling pointers is technically undefined
9512 * behavior, but well...
9513 */
9514 ptrdiff_t offset = ctxt->input->base - atts[i+2];
9515 atts[i+2] = NULL; /* Reset repurposed namespace URI */
9516 atts[i+3] += offset; /* value */
9517 atts[i+4] += offset; /* valuend */
9518 }
9519 }
9520
9521 /*
9522 * The attributes defaulting
9523 */
9524 if (ctxt->attsDefault != NULL) {
9525 xmlDefAttrsPtr defaults;
9526
9527 defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
9528 if (defaults != NULL) {
9529 for (i = 0;i < defaults->nbAttrs;i++) {
9530 attname = defaults->values[5 * i];
9531 aprefix = defaults->values[5 * i + 1];
9532
9533 /*
9534 * special work for namespaces defaulted defs
9535 */
9536 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9537 /*
9538 * check that it's not a defined namespace
9539 */
9540 for (j = 1;j <= nbNs;j++)
9541 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9542 break;
9543 if (j <= nbNs) continue;
9544
9545 nsname = xmlGetNamespace(ctxt, NULL);
9546 if (nsname != defaults->values[5 * i + 2]) {
9547 if (nsPush(ctxt, NULL,
9548 defaults->values[5 * i + 2]) > 0)
9549 nbNs++;
9550 }
9551 } else if (aprefix == ctxt->str_xmlns) {
9552 /*
9553 * check that it's not a defined namespace
9554 */
9555 for (j = 1;j <= nbNs;j++)
9556 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9557 break;
9558 if (j <= nbNs) continue;
9559
9560 nsname = xmlGetNamespace(ctxt, attname);
9561 if (nsname != defaults->values[2]) {
9562 if (nsPush(ctxt, attname,
9563 defaults->values[5 * i + 2]) > 0)
9564 nbNs++;
9565 }
9566 } else {
9567 /*
9568 * check that it's not a defined attribute
9569 */
9570 for (j = 0;j < nbatts;j+=5) {
9571 if ((attname == atts[j]) && (aprefix == atts[j+1]))
9572 break;
9573 }
9574 if (j < nbatts) continue;
9575
9576 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9577 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9578 localname = NULL;
9579 goto done;
9580 }
9581 maxatts = ctxt->maxatts;
9582 atts = ctxt->atts;
9583 }
9584 atts[nbatts++] = attname;
9585 atts[nbatts++] = aprefix;
9586 if (aprefix == NULL)
9587 atts[nbatts++] = NULL;
9588 else
9589 atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
9590 atts[nbatts++] = defaults->values[5 * i + 2];
9591 atts[nbatts++] = defaults->values[5 * i + 3];
9592 if ((ctxt->standalone == 1) &&
9593 (defaults->values[5 * i + 4] != NULL)) {
9594 xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED,
9595 "standalone: attribute %s on %s defaulted from external subset\n",
9596 attname, localname);
9597 }
9598 nbdef++;
9599 }
9600 }
9601 }
9602 }
9603
9604 /*
9605 * The attributes checkings
9606 */
9607 for (i = 0; i < nbatts;i += 5) {
9608 /*
9609 * The default namespace does not apply to attribute names.
9610 */
9611 if (atts[i + 1] != NULL) {
9612 nsname = xmlGetNamespace(ctxt, atts[i + 1]);
9613 if (nsname == NULL) {
9614 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9615 "Namespace prefix %s for %s on %s is not defined\n",
9616 atts[i + 1], atts[i], localname);
9617 }
9618 atts[i + 2] = nsname;
9619 } else
9620 nsname = NULL;
9621 /*
9622 * [ WFC: Unique Att Spec ]
9623 * No attribute name may appear more than once in the same
9624 * start-tag or empty-element tag.
9625 * As extended by the Namespace in XML REC.
9626 */
9627 for (j = 0; j < i;j += 5) {
9628 if (atts[i] == atts[j]) {
9629 if (atts[i+1] == atts[j+1]) {
9630 xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
9631 break;
9632 }
9633 if ((nsname != NULL) && (atts[j + 2] == nsname)) {
9634 xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9635 "Namespaced Attribute %s in '%s' redefined\n",
9636 atts[i], nsname, NULL);
9637 break;
9638 }
9639 }
9640 }
9641 }
9642
9643 nsname = xmlGetNamespace(ctxt, prefix);
9644 if ((prefix != NULL) && (nsname == NULL)) {
9645 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9646 "Namespace prefix %s on %s is not defined\n",
9647 prefix, localname, NULL);
9648 }
9649 *pref = prefix;
9650 *URI = nsname;
9651
9652 /*
9653 * SAX: Start of Element !
9654 */
9655 if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
9656 (!ctxt->disableSAX)) {
9657 if (nbNs > 0)
9658 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9659 nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
9660 nbatts / 5, nbdef, atts);
9661 else
9662 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9663 nsname, 0, NULL, nbatts / 5, nbdef, atts);
9664 }
9665
9666done:
9667 /*
9668 * Free up attribute allocated strings if needed
9669 */
9670 if (attval != 0) {
9671 for (i = 3,j = 0; j < nratts;i += 5,j++)
9672 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9673 xmlFree((xmlChar *) atts[i]);
9674 }
9675
9676 return(localname);
9677}
9678
9679/**
9680 * xmlParseEndTag2:
9681 * @ctxt: an XML parser context
9682 * @line: line of the start tag
9683 * @nsNr: number of namespaces on the start tag
9684 *
9685 * parse an end of tag
9686 *
9687 * [42] ETag ::= '</' Name S? '>'
9688 *
9689 * With namespace
9690 *
9691 * [NS 9] ETag ::= '</' QName S? '>'
9692 */
9693
9694static void
9695xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlStartTag *tag) {
9696 const xmlChar *name;
9697
9698 GROW;
9699 if ((RAW != '<') || (NXT(1) != '/')) {
9700 xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
9701 return;
9702 }
9703 SKIP(2);
9704
9705 if (tag->prefix == NULL)
9706 name = xmlParseNameAndCompare(ctxt, ctxt->name);
9707 else
9708 name = xmlParseQNameAndCompare(ctxt, ctxt->name, tag->prefix);
9709
9710 /*
9711 * We should definitely be at the ending "S? '>'" part
9712 */
9713 GROW;
9714 if (ctxt->instate == XML_PARSER_EOF)
9715 return;
9716 SKIP_BLANKS;
9717 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
9718 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
9719 } else
9720 NEXT1;
9721
9722 /*
9723 * [ WFC: Element Type Match ]
9724 * The Name in an element's end-tag must match the element type in the
9725 * start-tag.
9726 *
9727 */
9728 if (name != (xmlChar*)1) {
9729 if (name == NULL) name = BAD_CAST "unparsable";
9730 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
9731 "Opening and ending tag mismatch: %s line %d and %s\n",
9732 ctxt->name, tag->line, name);
9733 }
9734
9735 /*
9736 * SAX: End of Tag
9737 */
9738 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9739 (!ctxt->disableSAX))
9740 ctxt->sax->endElementNs(ctxt->userData, ctxt->name, tag->prefix,
9741 tag->URI);
9742
9743 spacePop(ctxt);
9744 if (tag->nsNr != 0)
9745 nsPop(ctxt, tag->nsNr);
9746}
9747
9748/**
9749 * xmlParseCDSect:
9750 * @ctxt: an XML parser context
9751 *
9752 * Parse escaped pure raw content.
9753 *
9754 * [18] CDSect ::= CDStart CData CDEnd
9755 *
9756 * [19] CDStart ::= '<![CDATA['
9757 *
9758 * [20] Data ::= (Char* - (Char* ']]>' Char*))
9759 *
9760 * [21] CDEnd ::= ']]>'
9761 */
9762void
9763xmlParseCDSect(xmlParserCtxtPtr ctxt) {
9764 xmlChar *buf = NULL;
9765 int len = 0;
9766 int size = XML_PARSER_BUFFER_SIZE;
9767 int r, rl;
9768 int s, sl;
9769 int cur, l;
9770 int count = 0;
9771
9772 /* Check 2.6.0 was NXT(0) not RAW */
9773 if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
9774 SKIP(9);
9775 } else
9776 return;
9777
9778 ctxt->instate = XML_PARSER_CDATA_SECTION;
9779 r = CUR_CHAR(rl);
9780 if (!IS_CHAR(r)) {
9781 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9782 ctxt->instate = XML_PARSER_CONTENT;
9783 return;
9784 }
9785 NEXTL(rl);
9786 s = CUR_CHAR(sl);
9787 if (!IS_CHAR(s)) {
9788 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9789 ctxt->instate = XML_PARSER_CONTENT;
9790 return;
9791 }
9792 NEXTL(sl);
9793 cur = CUR_CHAR(l);
9794 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
9795 if (buf == NULL) {
9796 xmlErrMemory(ctxt, NULL);
9797 return;
9798 }
9799 while (IS_CHAR(cur) &&
9800 ((r != ']') || (s != ']') || (cur != '>'))) {
9801 if (len + 5 >= size) {
9802 xmlChar *tmp;
9803
9804 if ((size > XML_MAX_TEXT_LENGTH) &&
9805 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9806 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9807 "CData section too big found", NULL);
9808 xmlFree (buf);
9809 return;
9810 }
9811 tmp = (xmlChar *) xmlRealloc(buf, size * 2 * sizeof(xmlChar));
9812 if (tmp == NULL) {
9813 xmlFree(buf);
9814 xmlErrMemory(ctxt, NULL);
9815 return;
9816 }
9817 buf = tmp;
9818 size *= 2;
9819 }
9820 COPY_BUF(rl,buf,len,r);
9821 r = s;
9822 rl = sl;
9823 s = cur;
9824 sl = l;
9825 count++;
9826 if (count > 50) {
9827 SHRINK;
9828 GROW;
9829 if (ctxt->instate == XML_PARSER_EOF) {
9830 xmlFree(buf);
9831 return;
9832 }
9833 count = 0;
9834 }
9835 NEXTL(l);
9836 cur = CUR_CHAR(l);
9837 }
9838 buf[len] = 0;
9839 ctxt->instate = XML_PARSER_CONTENT;
9840 if (cur != '>') {
9841 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9842 "CData section not finished\n%.50s\n", buf);
9843 xmlFree(buf);
9844 return;
9845 }
9846 NEXTL(l);
9847
9848 /*
9849 * OK the buffer is to be consumed as cdata.
9850 */
9851 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9852 if (ctxt->sax->cdataBlock != NULL)
9853 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
9854 else if (ctxt->sax->characters != NULL)
9855 ctxt->sax->characters(ctxt->userData, buf, len);
9856 }
9857 xmlFree(buf);
9858}
9859
9860/**
9861 * xmlParseContentInternal:
9862 * @ctxt: an XML parser context
9863 *
9864 * Parse a content sequence. Stops at EOF or '</'. Leaves checking of
9865 * unexpected EOF to the caller.
9866 */
9867
9868static void
9869xmlParseContentInternal(xmlParserCtxtPtr ctxt) {
9870 int nameNr = ctxt->nameNr;
9871
9872 GROW;
9873 while ((RAW != 0) &&
9874 (ctxt->instate != XML_PARSER_EOF)) {
9875 const xmlChar *test = CUR_PTR;
9876 unsigned int cons = ctxt->input->consumed;
9877 const xmlChar *cur = ctxt->input->cur;
9878
9879 /*
9880 * First case : a Processing Instruction.
9881 */
9882 if ((*cur == '<') && (cur[1] == '?')) {
9883 xmlParsePI(ctxt);
9884 }
9885
9886 /*
9887 * Second case : a CDSection
9888 */
9889 /* 2.6.0 test was *cur not RAW */
9890 else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
9891 xmlParseCDSect(ctxt);
9892 }
9893
9894 /*
9895 * Third case : a comment
9896 */
9897 else if ((*cur == '<') && (NXT(1) == '!') &&
9898 (NXT(2) == '-') && (NXT(3) == '-')) {
9899 xmlParseComment(ctxt);
9900 ctxt->instate = XML_PARSER_CONTENT;
9901 }
9902
9903 /*
9904 * Fourth case : a sub-element.
9905 */
9906 else if (*cur == '<') {
9907 if (NXT(1) == '/') {
9908 if (ctxt->nameNr <= nameNr)
9909 break;
9910 xmlParseElementEnd(ctxt);
9911 } else {
9912 xmlParseElementStart(ctxt);
9913 }
9914 }
9915
9916 /*
9917 * Fifth case : a reference. If if has not been resolved,
9918 * parsing returns it's Name, create the node
9919 */
9920
9921 else if (*cur == '&') {
9922 xmlParseReference(ctxt);
9923 }
9924
9925 /*
9926 * Last case, text. Note that References are handled directly.
9927 */
9928 else {
9929 xmlParseCharData(ctxt, 0);
9930 }
9931
9932 GROW;
9933 SHRINK;
9934
9935 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
9936 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9937 "detected an error in element content\n");
9938 xmlHaltParser(ctxt);
9939 break;
9940 }
9941 }
9942}
9943
9944/**
9945 * xmlParseContent:
9946 * @ctxt: an XML parser context
9947 *
9948 * Parse a content sequence. Stops at EOF or '</'.
9949 *
9950 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9951 */
9952
9953void
9954xmlParseContent(xmlParserCtxtPtr ctxt) {
9955 int nameNr = ctxt->nameNr;
9956
9957 xmlParseContentInternal(ctxt);
9958
9959 if ((ctxt->instate != XML_PARSER_EOF) && (ctxt->nameNr > nameNr)) {
9960 const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
9961 int line = ctxt->pushTab[ctxt->nameNr - 1].line;
9962 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
9963 "Premature end of data in tag %s line %d\n",
9964 name, line, NULL);
9965 }
9966}
9967
9968/**
9969 * xmlParseElement:
9970 * @ctxt: an XML parser context
9971 *
9972 * parse an XML element
9973 *
9974 * [39] element ::= EmptyElemTag | STag content ETag
9975 *
9976 * [ WFC: Element Type Match ]
9977 * The Name in an element's end-tag must match the element type in the
9978 * start-tag.
9979 *
9980 */
9981
9982void
9983xmlParseElement(xmlParserCtxtPtr ctxt) {
9984 if (xmlParseElementStart(ctxt) != 0)
9985 return;
9986
9987 xmlParseContentInternal(ctxt);
9988 if (ctxt->instate == XML_PARSER_EOF)
9989 return;
9990
9991 if (CUR == 0) {
9992 const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
9993 int line = ctxt->pushTab[ctxt->nameNr - 1].line;
9994 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
9995 "Premature end of data in tag %s line %d\n",
9996 name, line, NULL);
9997 return;
9998 }
9999
10000 xmlParseElementEnd(ctxt);
10001}
10002
10003/**
10004 * xmlParseElementStart:
10005 * @ctxt: an XML parser context
10006 *
10007 * Parse the start of an XML element. Returns -1 in case of error, 0 if an
10008 * opening tag was parsed, 1 if an empty element was parsed.
10009 */
10010static int
10011xmlParseElementStart(xmlParserCtxtPtr ctxt) {
10012 const xmlChar *name;
10013 const xmlChar *prefix = NULL;
10014 const xmlChar *URI = NULL;
10015 xmlParserNodeInfo node_info;
10016 int line, tlen = 0;
10017 xmlNodePtr ret;
10018 int nsNr = ctxt->nsNr;
10019
10020 if (((unsigned int) ctxt->nameNr > xmlParserMaxDepth) &&
10021 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
10022 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
10023 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
10024 xmlParserMaxDepth);
10025 xmlHaltParser(ctxt);
10026 return(-1);
10027 }
10028
10029 /* Capture start position */
10030 if (ctxt->record_info) {
10031 node_info.begin_pos = ctxt->input->consumed +
10032 (CUR_PTR - ctxt->input->base);
10033 node_info.begin_line = ctxt->input->line;
10034 }
10035
10036 if (ctxt->spaceNr == 0)
10037 spacePush(ctxt, -1);
10038 else if (*ctxt->space == -2)
10039 spacePush(ctxt, -1);
10040 else
10041 spacePush(ctxt, *ctxt->space);
10042
10043 line = ctxt->input->line;
10044#ifdef LIBXML_SAX1_ENABLED
10045 if (ctxt->sax2)
10046#endif /* LIBXML_SAX1_ENABLED */
10047 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
10048#ifdef LIBXML_SAX1_ENABLED
10049 else
10050 name = xmlParseStartTag(ctxt);
10051#endif /* LIBXML_SAX1_ENABLED */
10052 if (ctxt->instate == XML_PARSER_EOF)
10053 return(-1);
10054 if (name == NULL) {
10055 spacePop(ctxt);
10056 return(-1);
10057 }
10058 nameNsPush(ctxt, name, prefix, URI, line, ctxt->nsNr - nsNr);
10059 ret = ctxt->node;
10060
10061#ifdef LIBXML_VALID_ENABLED
10062 /*
10063 * [ VC: Root Element Type ]
10064 * The Name in the document type declaration must match the element
10065 * type of the root element.
10066 */
10067 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
10068 ctxt->node && (ctxt->node == ctxt->myDoc->children))
10069 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
10070#endif /* LIBXML_VALID_ENABLED */
10071
10072 /*
10073 * Check for an Empty Element.
10074 */
10075 if ((RAW == '/') && (NXT(1) == '>')) {
10076 SKIP(2);
10077 if (ctxt->sax2) {
10078 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
10079 (!ctxt->disableSAX))
10080 ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
10081#ifdef LIBXML_SAX1_ENABLED
10082 } else {
10083 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
10084 (!ctxt->disableSAX))
10085 ctxt->sax->endElement(ctxt->userData, name);
10086#endif /* LIBXML_SAX1_ENABLED */
10087 }
10088 namePop(ctxt);
10089 spacePop(ctxt);
10090 if (nsNr != ctxt->nsNr)
10091 nsPop(ctxt, ctxt->nsNr - nsNr);
10092 if ( ret != NULL && ctxt->record_info ) {
10093 node_info.end_pos = ctxt->input->consumed +
10094 (CUR_PTR - ctxt->input->base);
10095 node_info.end_line = ctxt->input->line;
10096 node_info.node = ret;
10097 xmlParserAddNodeInfo(ctxt, &node_info);
10098 }
10099 return(1);
10100 }
10101 if (RAW == '>') {
10102 NEXT1;
10103 } else {
10104 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
10105 "Couldn't find end of Start Tag %s line %d\n",
10106 name, line, NULL);
10107
10108 /*
10109 * end of parsing of this node.
10110 */
10111 nodePop(ctxt);
10112 namePop(ctxt);
10113 spacePop(ctxt);
10114 if (nsNr != ctxt->nsNr)
10115 nsPop(ctxt, ctxt->nsNr - nsNr);
10116
10117 /*
10118 * Capture end position and add node
10119 */
10120 if ( ret != NULL && ctxt->record_info ) {
10121 node_info.end_pos = ctxt->input->consumed +
10122 (CUR_PTR - ctxt->input->base);
10123 node_info.end_line = ctxt->input->line;
10124 node_info.node = ret;
10125 xmlParserAddNodeInfo(ctxt, &node_info);
10126 }
10127 return(-1);
10128 }
10129
10130 return(0);
10131}
10132
10133/**
10134 * xmlParseElementEnd:
10135 * @ctxt: an XML parser context
10136 *
10137 * Parse the end of an XML element.
10138 */
10139static void
10140xmlParseElementEnd(xmlParserCtxtPtr ctxt) {
10141 xmlParserNodeInfo node_info;
10142 xmlNodePtr ret = ctxt->node;
10143
10144 if (ctxt->nameNr <= 0)
10145 return;
10146
10147 /*
10148 * parse the end of tag: '</' should be here.
10149 */
10150 if (ctxt->sax2) {
10151 xmlParseEndTag2(ctxt, &ctxt->pushTab[ctxt->nameNr - 1]);
10152 namePop(ctxt);
10153 }
10154#ifdef LIBXML_SAX1_ENABLED
10155 else
10156 xmlParseEndTag1(ctxt, 0);
10157#endif /* LIBXML_SAX1_ENABLED */
10158
10159 /*
10160 * Capture end position and add node
10161 */
10162 if ( ret != NULL && ctxt->record_info ) {
10163 node_info.end_pos = ctxt->input->consumed +
10164 (CUR_PTR - ctxt->input->base);
10165 node_info.end_line = ctxt->input->line;
10166 node_info.node = ret;
10167 xmlParserAddNodeInfo(ctxt, &node_info);
10168 }
10169}
10170
10171/**
10172 * xmlParseVersionNum:
10173 * @ctxt: an XML parser context
10174 *
10175 * parse the XML version value.
10176 *
10177 * [26] VersionNum ::= '1.' [0-9]+
10178 *
10179 * In practice allow [0-9].[0-9]+ at that level
10180 *
10181 * Returns the string giving the XML version number, or NULL
10182 */
10183xmlChar *
10184xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
10185 xmlChar *buf = NULL;
10186 int len = 0;
10187 int size = 10;
10188 xmlChar cur;
10189
10190 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
10191 if (buf == NULL) {
10192 xmlErrMemory(ctxt, NULL);
10193 return(NULL);
10194 }
10195 cur = CUR;
10196 if (!((cur >= '0') && (cur <= '9'))) {
10197 xmlFree(buf);
10198 return(NULL);
10199 }
10200 buf[len++] = cur;
10201 NEXT;
10202 cur=CUR;
10203 if (cur != '.') {
10204 xmlFree(buf);
10205 return(NULL);
10206 }
10207 buf[len++] = cur;
10208 NEXT;
10209 cur=CUR;
10210 while ((cur >= '0') && (cur <= '9')) {
10211 if (len + 1 >= size) {
10212 xmlChar *tmp;
10213
10214 size *= 2;
10215 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
10216 if (tmp == NULL) {
10217 xmlFree(buf);
10218 xmlErrMemory(ctxt, NULL);
10219 return(NULL);
10220 }
10221 buf = tmp;
10222 }
10223 buf[len++] = cur;
10224 NEXT;
10225 cur=CUR;
10226 }
10227 buf[len] = 0;
10228 return(buf);
10229}
10230
10231/**
10232 * xmlParseVersionInfo:
10233 * @ctxt: an XML parser context
10234 *
10235 * parse the XML version.
10236 *
10237 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
10238 *
10239 * [25] Eq ::= S? '=' S?
10240 *
10241 * Returns the version string, e.g. "1.0"
10242 */
10243
10244xmlChar *
10245xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
10246 xmlChar *version = NULL;
10247
10248 if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
10249 SKIP(7);
10250 SKIP_BLANKS;
10251 if (RAW != '=') {
10252 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10253 return(NULL);
10254 }
10255 NEXT;
10256 SKIP_BLANKS;
10257 if (RAW == '"') {
10258 NEXT;
10259 version = xmlParseVersionNum(ctxt);
10260 if (RAW != '"') {
10261 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10262 } else
10263 NEXT;
10264 } else if (RAW == '\''){
10265 NEXT;
10266 version = xmlParseVersionNum(ctxt);
10267 if (RAW != '\'') {
10268 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10269 } else
10270 NEXT;
10271 } else {
10272 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10273 }
10274 }
10275 return(version);
10276}
10277
10278/**
10279 * xmlParseEncName:
10280 * @ctxt: an XML parser context
10281 *
10282 * parse the XML encoding name
10283 *
10284 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
10285 *
10286 * Returns the encoding name value or NULL
10287 */
10288xmlChar *
10289xmlParseEncName(xmlParserCtxtPtr ctxt) {
10290 xmlChar *buf = NULL;
10291 int len = 0;
10292 int size = 10;
10293 xmlChar cur;
10294
10295 cur = CUR;
10296 if (((cur >= 'a') && (cur <= 'z')) ||
10297 ((cur >= 'A') && (cur <= 'Z'))) {
10298 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
10299 if (buf == NULL) {
10300 xmlErrMemory(ctxt, NULL);
10301 return(NULL);
10302 }
10303
10304 buf[len++] = cur;
10305 NEXT;
10306 cur = CUR;
10307 while (((cur >= 'a') && (cur <= 'z')) ||
10308 ((cur >= 'A') && (cur <= 'Z')) ||
10309 ((cur >= '0') && (cur <= '9')) ||
10310 (cur == '.') || (cur == '_') ||
10311 (cur == '-')) {
10312 if (len + 1 >= size) {
10313 xmlChar *tmp;
10314
10315 size *= 2;
10316 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
10317 if (tmp == NULL) {
10318 xmlErrMemory(ctxt, NULL);
10319 xmlFree(buf);
10320 return(NULL);
10321 }
10322 buf = tmp;
10323 }
10324 buf[len++] = cur;
10325 NEXT;
10326 cur = CUR;
10327 if (cur == 0) {
10328 SHRINK;
10329 GROW;
10330 cur = CUR;
10331 }
10332 }
10333 buf[len] = 0;
10334 } else {
10335 xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
10336 }
10337 return(buf);
10338}
10339
10340/**
10341 * xmlParseEncodingDecl:
10342 * @ctxt: an XML parser context
10343 *
10344 * parse the XML encoding declaration
10345 *
10346 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
10347 *
10348 * this setups the conversion filters.
10349 *
10350 * Returns the encoding value or NULL
10351 */
10352
10353const xmlChar *
10354xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
10355 xmlChar *encoding = NULL;
10356
10357 SKIP_BLANKS;
10358 if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
10359 SKIP(8);
10360 SKIP_BLANKS;
10361 if (RAW != '=') {
10362 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10363 return(NULL);
10364 }
10365 NEXT;
10366 SKIP_BLANKS;
10367 if (RAW == '"') {
10368 NEXT;
10369 encoding = xmlParseEncName(ctxt);
10370 if (RAW != '"') {
10371 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10372 xmlFree((xmlChar *) encoding);
10373 return(NULL);
10374 } else
10375 NEXT;
10376 } else if (RAW == '\''){
10377 NEXT;
10378 encoding = xmlParseEncName(ctxt);
10379 if (RAW != '\'') {
10380 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10381 xmlFree((xmlChar *) encoding);
10382 return(NULL);
10383 } else
10384 NEXT;
10385 } else {
10386 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10387 }
10388
10389 /*
10390 * Non standard parsing, allowing the user to ignore encoding
10391 */
10392 if (ctxt->options & XML_PARSE_IGNORE_ENC) {
10393 xmlFree((xmlChar *) encoding);
10394 return(NULL);
10395 }
10396
10397 /*
10398 * UTF-16 encoding switch has already taken place at this stage,
10399 * more over the little-endian/big-endian selection is already done
10400 */
10401 if ((encoding != NULL) &&
10402 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
10403 (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
10404 /*
10405 * If no encoding was passed to the parser, that we are
10406 * using UTF-16 and no decoder is present i.e. the
10407 * document is apparently UTF-8 compatible, then raise an
10408 * encoding mismatch fatal error
10409 */
10410 if ((ctxt->encoding == NULL) &&
10411 (ctxt->input->buf != NULL) &&
10412 (ctxt->input->buf->encoder == NULL)) {
10413 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_ENCODING,
10414 "Document labelled UTF-16 but has UTF-8 content\n");
10415 }
10416 if (ctxt->encoding != NULL)
10417 xmlFree((xmlChar *) ctxt->encoding);
10418 ctxt->encoding = encoding;
10419 }
10420 /*
10421 * UTF-8 encoding is handled natively
10422 */
10423 else if ((encoding != NULL) &&
10424 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
10425 (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
10426 if (ctxt->encoding != NULL)
10427 xmlFree((xmlChar *) ctxt->encoding);
10428 ctxt->encoding = encoding;
10429 }
10430 else if (encoding != NULL) {
10431 xmlCharEncodingHandlerPtr handler;
10432
10433 if (ctxt->input->encoding != NULL)
10434 xmlFree((xmlChar *) ctxt->input->encoding);
10435 ctxt->input->encoding = encoding;
10436
10437 handler = xmlFindCharEncodingHandler((const char *) encoding);
10438 if (handler != NULL) {
10439 if (xmlSwitchToEncoding(ctxt, handler) < 0) {
10440 /* failed to convert */
10441 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
10442 return(NULL);
10443 }
10444 } else {
10445 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
10446 "Unsupported encoding %s\n", encoding);
10447 return(NULL);
10448 }
10449 }
10450 }
10451 return(encoding);
10452}
10453
10454/**
10455 * xmlParseSDDecl:
10456 * @ctxt: an XML parser context
10457 *
10458 * parse the XML standalone declaration
10459 *
10460 * [32] SDDecl ::= S 'standalone' Eq
10461 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
10462 *
10463 * [ VC: Standalone Document Declaration ]
10464 * TODO The standalone document declaration must have the value "no"
10465 * if any external markup declarations contain declarations of:
10466 * - attributes with default values, if elements to which these
10467 * attributes apply appear in the document without specifications
10468 * of values for these attributes, or
10469 * - entities (other than amp, lt, gt, apos, quot), if references
10470 * to those entities appear in the document, or
10471 * - attributes with values subject to normalization, where the
10472 * attribute appears in the document with a value which will change
10473 * as a result of normalization, or
10474 * - element types with element content, if white space occurs directly
10475 * within any instance of those types.
10476 *
10477 * Returns:
10478 * 1 if standalone="yes"
10479 * 0 if standalone="no"
10480 * -2 if standalone attribute is missing or invalid
10481 * (A standalone value of -2 means that the XML declaration was found,
10482 * but no value was specified for the standalone attribute).
10483 */
10484
10485int
10486xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
10487 int standalone = -2;
10488
10489 SKIP_BLANKS;
10490 if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
10491 SKIP(10);
10492 SKIP_BLANKS;
10493 if (RAW != '=') {
10494 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10495 return(standalone);
10496 }
10497 NEXT;
10498 SKIP_BLANKS;
10499 if (RAW == '\''){
10500 NEXT;
10501 if ((RAW == 'n') && (NXT(1) == 'o')) {
10502 standalone = 0;
10503 SKIP(2);
10504 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10505 (NXT(2) == 's')) {
10506 standalone = 1;
10507 SKIP(3);
10508 } else {
10509 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10510 }
10511 if (RAW != '\'') {
10512 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10513 } else
10514 NEXT;
10515 } else if (RAW == '"'){
10516 NEXT;
10517 if ((RAW == 'n') && (NXT(1) == 'o')) {
10518 standalone = 0;
10519 SKIP(2);
10520 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10521 (NXT(2) == 's')) {
10522 standalone = 1;
10523 SKIP(3);
10524 } else {
10525 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10526 }
10527 if (RAW != '"') {
10528 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10529 } else
10530 NEXT;
10531 } else {
10532 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10533 }
10534 }
10535 return(standalone);
10536}
10537
10538/**
10539 * xmlParseXMLDecl:
10540 * @ctxt: an XML parser context
10541 *
10542 * parse an XML declaration header
10543 *
10544 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
10545 */
10546
10547void
10548xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
10549 xmlChar *version;
10550
10551 /*
10552 * This value for standalone indicates that the document has an
10553 * XML declaration but it does not have a standalone attribute.
10554 * It will be overwritten later if a standalone attribute is found.
10555 */
10556 ctxt->input->standalone = -2;
10557
10558 /*
10559 * We know that '<?xml' is here.
10560 */
10561 SKIP(5);
10562
10563 if (!IS_BLANK_CH(RAW)) {
10564 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
10565 "Blank needed after '<?xml'\n");
10566 }
10567 SKIP_BLANKS;
10568
10569 /*
10570 * We must have the VersionInfo here.
10571 */
10572 version = xmlParseVersionInfo(ctxt);
10573 if (version == NULL) {
10574 xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
10575 } else {
10576 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
10577 /*
10578 * Changed here for XML-1.0 5th edition
10579 */
10580 if (ctxt->options & XML_PARSE_OLD10) {
10581 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10582 "Unsupported version '%s'\n",
10583 version);
10584 } else {
10585 if ((version[0] == '1') && ((version[1] == '.'))) {
10586 xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
10587 "Unsupported version '%s'\n",
10588 version, NULL);
10589 } else {
10590 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10591 "Unsupported version '%s'\n",
10592 version);
10593 }
10594 }
10595 }
10596 if (ctxt->version != NULL)
10597 xmlFree((void *) ctxt->version);
10598 ctxt->version = version;
10599 }
10600
10601 /*
10602 * We may have the encoding declaration
10603 */
10604 if (!IS_BLANK_CH(RAW)) {
10605 if ((RAW == '?') && (NXT(1) == '>')) {
10606 SKIP(2);
10607 return;
10608 }
10609 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10610 }
10611 xmlParseEncodingDecl(ctxt);
10612 if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) ||
10613 (ctxt->instate == XML_PARSER_EOF)) {
10614 /*
10615 * The XML REC instructs us to stop parsing right here
10616 */
10617 return;
10618 }
10619
10620 /*
10621 * We may have the standalone status.
10622 */
10623 if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
10624 if ((RAW == '?') && (NXT(1) == '>')) {
10625 SKIP(2);
10626 return;
10627 }
10628 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10629 }
10630
10631 /*
10632 * We can grow the input buffer freely at that point
10633 */
10634 GROW;
10635
10636 SKIP_BLANKS;
10637 ctxt->input->standalone = xmlParseSDDecl(ctxt);
10638
10639 SKIP_BLANKS;
10640 if ((RAW == '?') && (NXT(1) == '>')) {
10641 SKIP(2);
10642 } else if (RAW == '>') {
10643 /* Deprecated old WD ... */
10644 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10645 NEXT;
10646 } else {
10647 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10648 MOVETO_ENDTAG(CUR_PTR);
10649 NEXT;
10650 }
10651}
10652
10653/**
10654 * xmlParseMisc:
10655 * @ctxt: an XML parser context
10656 *
10657 * parse an XML Misc* optional field.
10658 *
10659 * [27] Misc ::= Comment | PI | S
10660 */
10661
10662void
10663xmlParseMisc(xmlParserCtxtPtr ctxt) {
10664 while (ctxt->instate != XML_PARSER_EOF) {
10665 SKIP_BLANKS;
10666 GROW;
10667 if ((RAW == '<') && (NXT(1) == '?')) {
10668 xmlParsePI(ctxt);
10669 } else if (CMP4(CUR_PTR, '<', '!', '-', '-')) {
10670 xmlParseComment(ctxt);
10671 } else {
10672 break;
10673 }
10674 }
10675}
10676
10677/**
10678 * xmlParseDocument:
10679 * @ctxt: an XML parser context
10680 *
10681 * parse an XML document (and build a tree if using the standard SAX
10682 * interface).
10683 *
10684 * [1] document ::= prolog element Misc*
10685 *
10686 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
10687 *
10688 * Returns 0, -1 in case of error. the parser context is augmented
10689 * as a result of the parsing.
10690 */
10691
10692int
10693xmlParseDocument(xmlParserCtxtPtr ctxt) {
10694 xmlChar start[4];
10695 xmlCharEncoding enc;
10696
10697 xmlInitParser();
10698
10699 if ((ctxt == NULL) || (ctxt->input == NULL))
10700 return(-1);
10701
10702 GROW;
10703
10704 /*
10705 * SAX: detecting the level.
10706 */
10707 xmlDetectSAX2(ctxt);
10708
10709 /*
10710 * SAX: beginning of the document processing.
10711 */
10712 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10713 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10714 if (ctxt->instate == XML_PARSER_EOF)
10715 return(-1);
10716
10717 if ((ctxt->encoding == NULL) &&
10718 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
10719 /*
10720 * Get the 4 first bytes and decode the charset
10721 * if enc != XML_CHAR_ENCODING_NONE
10722 * plug some encoding conversion routines.
10723 */
10724 start[0] = RAW;
10725 start[1] = NXT(1);
10726 start[2] = NXT(2);
10727 start[3] = NXT(3);
10728 enc = xmlDetectCharEncoding(&start[0], 4);
10729 if (enc != XML_CHAR_ENCODING_NONE) {
10730 xmlSwitchEncoding(ctxt, enc);
10731 }
10732 }
10733
10734
10735 if (CUR == 0) {
10736 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10737 return(-1);
10738 }
10739
10740 /*
10741 * Check for the XMLDecl in the Prolog.
10742 * do not GROW here to avoid the detected encoder to decode more
10743 * than just the first line, unless the amount of data is really
10744 * too small to hold "<?xml version="1.0" encoding="foo"
10745 */
10746 if ((ctxt->input->end - ctxt->input->cur) < 35) {
10747 GROW;
10748 }
10749 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10750
10751 /*
10752 * Note that we will switch encoding on the fly.
10753 */
10754 xmlParseXMLDecl(ctxt);
10755 if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) ||
10756 (ctxt->instate == XML_PARSER_EOF)) {
10757 /*
10758 * The XML REC instructs us to stop parsing right here
10759 */
10760 return(-1);
10761 }
10762 ctxt->standalone = ctxt->input->standalone;
10763 SKIP_BLANKS;
10764 } else {
10765 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10766 }
10767 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10768 ctxt->sax->startDocument(ctxt->userData);
10769 if (ctxt->instate == XML_PARSER_EOF)
10770 return(-1);
10771 if ((ctxt->myDoc != NULL) && (ctxt->input != NULL) &&
10772 (ctxt->input->buf != NULL) && (ctxt->input->buf->compressed >= 0)) {
10773 ctxt->myDoc->compression = ctxt->input->buf->compressed;
10774 }
10775
10776 /*
10777 * The Misc part of the Prolog
10778 */
10779 xmlParseMisc(ctxt);
10780
10781 /*
10782 * Then possibly doc type declaration(s) and more Misc
10783 * (doctypedecl Misc*)?
10784 */
10785 GROW;
10786 if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
10787
10788 ctxt->inSubset = 1;
10789 xmlParseDocTypeDecl(ctxt);
10790 if (RAW == '[') {
10791 ctxt->instate = XML_PARSER_DTD;
10792 xmlParseInternalSubset(ctxt);
10793 if (ctxt->instate == XML_PARSER_EOF)
10794 return(-1);
10795 }
10796
10797 /*
10798 * Create and update the external subset.
10799 */
10800 ctxt->inSubset = 2;
10801 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
10802 (!ctxt->disableSAX))
10803 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10804 ctxt->extSubSystem, ctxt->extSubURI);
10805 if (ctxt->instate == XML_PARSER_EOF)
10806 return(-1);
10807 ctxt->inSubset = 0;
10808
10809 xmlCleanSpecialAttr(ctxt);
10810
10811 ctxt->instate = XML_PARSER_PROLOG;
10812 xmlParseMisc(ctxt);
10813 }
10814
10815 /*
10816 * Time to start parsing the tree itself
10817 */
10818 GROW;
10819 if (RAW != '<') {
10820 xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
10821 "Start tag expected, '<' not found\n");
10822 } else {
10823 ctxt->instate = XML_PARSER_CONTENT;
10824 xmlParseElement(ctxt);
10825 ctxt->instate = XML_PARSER_EPILOG;
10826
10827
10828 /*
10829 * The Misc part at the end
10830 */
10831 xmlParseMisc(ctxt);
10832
10833 if (RAW != 0) {
10834 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
10835 }
10836 ctxt->instate = XML_PARSER_EOF;
10837 }
10838
10839 /*
10840 * SAX: end of the document processing.
10841 */
10842 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10843 ctxt->sax->endDocument(ctxt->userData);
10844
10845 /*
10846 * Remove locally kept entity definitions if the tree was not built
10847 */
10848 if ((ctxt->myDoc != NULL) &&
10849 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
10850 xmlFreeDoc(ctxt->myDoc);
10851 ctxt->myDoc = NULL;
10852 }
10853
10854 if ((ctxt->wellFormed) && (ctxt->myDoc != NULL)) {
10855 ctxt->myDoc->properties |= XML_DOC_WELLFORMED;
10856 if (ctxt->valid)
10857 ctxt->myDoc->properties |= XML_DOC_DTDVALID;
10858 if (ctxt->nsWellFormed)
10859 ctxt->myDoc->properties |= XML_DOC_NSVALID;
10860 if (ctxt->options & XML_PARSE_OLD10)
10861 ctxt->myDoc->properties |= XML_DOC_OLD10;
10862 }
10863 if (! ctxt->wellFormed) {
10864 ctxt->valid = 0;
10865 return(-1);
10866 }
10867 return(0);
10868}
10869
10870/**
10871 * xmlParseExtParsedEnt:
10872 * @ctxt: an XML parser context
10873 *
10874 * parse a general parsed entity
10875 * An external general parsed entity is well-formed if it matches the
10876 * production labeled extParsedEnt.
10877 *
10878 * [78] extParsedEnt ::= TextDecl? content
10879 *
10880 * Returns 0, -1 in case of error. the parser context is augmented
10881 * as a result of the parsing.
10882 */
10883
10884int
10885xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
10886 xmlChar start[4];
10887 xmlCharEncoding enc;
10888
10889 if ((ctxt == NULL) || (ctxt->input == NULL))
10890 return(-1);
10891
10892 xmlDefaultSAXHandlerInit();
10893
10894 xmlDetectSAX2(ctxt);
10895
10896 GROW;
10897
10898 /*
10899 * SAX: beginning of the document processing.
10900 */
10901 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10902 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10903
10904 /*
10905 * Get the 4 first bytes and decode the charset
10906 * if enc != XML_CHAR_ENCODING_NONE
10907 * plug some encoding conversion routines.
10908 */
10909 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
10910 start[0] = RAW;
10911 start[1] = NXT(1);
10912 start[2] = NXT(2);
10913 start[3] = NXT(3);
10914 enc = xmlDetectCharEncoding(start, 4);
10915 if (enc != XML_CHAR_ENCODING_NONE) {
10916 xmlSwitchEncoding(ctxt, enc);
10917 }
10918 }
10919
10920
10921 if (CUR == 0) {
10922 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10923 }
10924
10925 /*
10926 * Check for the XMLDecl in the Prolog.
10927 */
10928 GROW;
10929 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10930
10931 /*
10932 * Note that we will switch encoding on the fly.
10933 */
10934 xmlParseXMLDecl(ctxt);
10935 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10936 /*
10937 * The XML REC instructs us to stop parsing right here
10938 */
10939 return(-1);
10940 }
10941 SKIP_BLANKS;
10942 } else {
10943 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10944 }
10945 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10946 ctxt->sax->startDocument(ctxt->userData);
10947 if (ctxt->instate == XML_PARSER_EOF)
10948 return(-1);
10949
10950 /*
10951 * Doing validity checking on chunk doesn't make sense
10952 */
10953 ctxt->instate = XML_PARSER_CONTENT;
10954 ctxt->validate = 0;
10955 ctxt->loadsubset = 0;
10956 ctxt->depth = 0;
10957
10958 xmlParseContent(ctxt);
10959 if (ctxt->instate == XML_PARSER_EOF)
10960 return(-1);
10961
10962 if ((RAW == '<') && (NXT(1) == '/')) {
10963 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
10964 } else if (RAW != 0) {
10965 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
10966 }
10967
10968 /*
10969 * SAX: end of the document processing.
10970 */
10971 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10972 ctxt->sax->endDocument(ctxt->userData);
10973
10974 if (! ctxt->wellFormed) return(-1);
10975 return(0);
10976}
10977
10978#ifdef LIBXML_PUSH_ENABLED
10979/************************************************************************
10980 * *
10981 * Progressive parsing interfaces *
10982 * *
10983 ************************************************************************/
10984
10985/**
10986 * xmlParseLookupSequence:
10987 * @ctxt: an XML parser context
10988 * @first: the first char to lookup
10989 * @next: the next char to lookup or zero
10990 * @third: the next char to lookup or zero
10991 *
10992 * Try to find if a sequence (first, next, third) or just (first next) or
10993 * (first) is available in the input stream.
10994 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
10995 * to avoid rescanning sequences of bytes, it DOES change the state of the
10996 * parser, do not use liberally.
10997 *
10998 * Returns the index to the current parsing point if the full sequence
10999 * is available, -1 otherwise.
11000 */
11001static int
11002xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
11003 xmlChar next, xmlChar third) {
11004 int base, len;
11005 xmlParserInputPtr in;
11006 const xmlChar *buf;
11007
11008 in = ctxt->input;
11009 if (in == NULL) return(-1);
11010 base = in->cur - in->base;
11011 if (base < 0) return(-1);
11012 if (ctxt->checkIndex > base)
11013 base = ctxt->checkIndex;
11014 if (in->buf == NULL) {
11015 buf = in->base;
11016 len = in->length;
11017 } else {
11018 buf = xmlBufContent(in->buf->buffer);
11019 len = xmlBufUse(in->buf->buffer);
11020 }
11021 /* take into account the sequence length */
11022 if (third) len -= 2;
11023 else if (next) len --;
11024 for (;base < len;base++) {
11025 if (buf[base] == first) {
11026 if (third != 0) {
11027 if ((buf[base + 1] != next) ||
11028 (buf[base + 2] != third)) continue;
11029 } else if (next != 0) {
11030 if (buf[base + 1] != next) continue;
11031 }
11032 ctxt->checkIndex = 0;
11033#ifdef DEBUG_PUSH
11034 if (next == 0)
11035 xmlGenericError(xmlGenericErrorContext,
11036 "PP: lookup '%c' found at %d\n",
11037 first, base);
11038 else if (third == 0)
11039 xmlGenericError(xmlGenericErrorContext,
11040 "PP: lookup '%c%c' found at %d\n",
11041 first, next, base);
11042 else
11043 xmlGenericError(xmlGenericErrorContext,
11044 "PP: lookup '%c%c%c' found at %d\n",
11045 first, next, third, base);
11046#endif
11047 return(base - (in->cur - in->base));
11048 }
11049 }
11050 ctxt->checkIndex = base;
11051#ifdef DEBUG_PUSH
11052 if (next == 0)
11053 xmlGenericError(xmlGenericErrorContext,
11054 "PP: lookup '%c' failed\n", first);
11055 else if (third == 0)
11056 xmlGenericError(xmlGenericErrorContext,
11057 "PP: lookup '%c%c' failed\n", first, next);
11058 else
11059 xmlGenericError(xmlGenericErrorContext,
11060 "PP: lookup '%c%c%c' failed\n", first, next, third);
11061#endif
11062 return(-1);
11063}
11064
11065/**
11066 * xmlParseGetLasts:
11067 * @ctxt: an XML parser context
11068 * @lastlt: pointer to store the last '<' from the input
11069 * @lastgt: pointer to store the last '>' from the input
11070 *
11071 * Lookup the last < and > in the current chunk
11072 */
11073static void
11074xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt,
11075 const xmlChar **lastgt) {
11076 const xmlChar *tmp;
11077
11078 if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) {
11079 xmlGenericError(xmlGenericErrorContext,
11080 "Internal error: xmlParseGetLasts\n");
11081 return;
11082 }
11083 if ((ctxt->progressive != 0) && (ctxt->inputNr == 1)) {
11084 tmp = ctxt->input->end;
11085 tmp--;
11086 while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--;
11087 if (tmp < ctxt->input->base) {
11088 *lastlt = NULL;
11089 *lastgt = NULL;
11090 } else {
11091 *lastlt = tmp;
11092 tmp++;
11093 while ((tmp < ctxt->input->end) && (*tmp != '>')) {
11094 if (*tmp == '\'') {
11095 tmp++;
11096 while ((tmp < ctxt->input->end) && (*tmp != '\'')) tmp++;
11097 if (tmp < ctxt->input->end) tmp++;
11098 } else if (*tmp == '"') {
11099 tmp++;
11100 while ((tmp < ctxt->input->end) && (*tmp != '"')) tmp++;
11101 if (tmp < ctxt->input->end) tmp++;
11102 } else
11103 tmp++;
11104 }
11105 if (tmp < ctxt->input->end)
11106 *lastgt = tmp;
11107 else {
11108 tmp = *lastlt;
11109 tmp--;
11110 while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--;
11111 if (tmp >= ctxt->input->base)
11112 *lastgt = tmp;
11113 else
11114 *lastgt = NULL;
11115 }
11116 }
11117 } else {
11118 *lastlt = NULL;
11119 *lastgt = NULL;
11120 }
11121}
11122/**
11123 * xmlCheckCdataPush:
11124 * @cur: pointer to the block of characters
11125 * @len: length of the block in bytes
11126 * @complete: 1 if complete CDATA block is passed in, 0 if partial block
11127 *
11128 * Check that the block of characters is okay as SCdata content [20]
11129 *
11130 * Returns the number of bytes to pass if okay, a negative index where an
11131 * UTF-8 error occurred otherwise
11132 */
11133static int
11134xmlCheckCdataPush(const xmlChar *utf, int len, int complete) {
11135 int ix;
11136 unsigned char c;
11137 int codepoint;
11138
11139 if ((utf == NULL) || (len <= 0))
11140 return(0);
11141
11142 for (ix = 0; ix < len;) { /* string is 0-terminated */
11143 c = utf[ix];
11144 if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */
11145 if (c >= 0x20)
11146 ix++;
11147 else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
11148 ix++;
11149 else
11150 return(-ix);
11151 } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
11152 if (ix + 2 > len) return(complete ? -ix : ix);
11153 if ((utf[ix+1] & 0xc0 ) != 0x80)
11154 return(-ix);
11155 codepoint = (utf[ix] & 0x1f) << 6;
11156 codepoint |= utf[ix+1] & 0x3f;
11157 if (!xmlIsCharQ(codepoint))
11158 return(-ix);
11159 ix += 2;
11160 } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
11161 if (ix + 3 > len) return(complete ? -ix : ix);
11162 if (((utf[ix+1] & 0xc0) != 0x80) ||
11163 ((utf[ix+2] & 0xc0) != 0x80))
11164 return(-ix);
11165 codepoint = (utf[ix] & 0xf) << 12;
11166 codepoint |= (utf[ix+1] & 0x3f) << 6;
11167 codepoint |= utf[ix+2] & 0x3f;
11168 if (!xmlIsCharQ(codepoint))
11169 return(-ix);
11170 ix += 3;
11171 } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
11172 if (ix + 4 > len) return(complete ? -ix : ix);
11173 if (((utf[ix+1] & 0xc0) != 0x80) ||
11174 ((utf[ix+2] & 0xc0) != 0x80) ||
11175 ((utf[ix+3] & 0xc0) != 0x80))
11176 return(-ix);
11177 codepoint = (utf[ix] & 0x7) << 18;
11178 codepoint |= (utf[ix+1] & 0x3f) << 12;
11179 codepoint |= (utf[ix+2] & 0x3f) << 6;
11180 codepoint |= utf[ix+3] & 0x3f;
11181 if (!xmlIsCharQ(codepoint))
11182 return(-ix);
11183 ix += 4;
11184 } else /* unknown encoding */
11185 return(-ix);
11186 }
11187 return(ix);
11188}
11189
11190/**
11191 * xmlParseTryOrFinish:
11192 * @ctxt: an XML parser context
11193 * @terminate: last chunk indicator
11194 *
11195 * Try to progress on parsing
11196 *
11197 * Returns zero if no parsing was possible
11198 */
11199static int
11200xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
11201 int ret = 0;
11202 int avail, tlen;
11203 xmlChar cur, next;
11204 const xmlChar *lastlt, *lastgt;
11205
11206 if (ctxt->input == NULL)
11207 return(0);
11208
11209#ifdef DEBUG_PUSH
11210 switch (ctxt->instate) {
11211 case XML_PARSER_EOF:
11212 xmlGenericError(xmlGenericErrorContext,
11213 "PP: try EOF\n"); break;
11214 case XML_PARSER_START:
11215 xmlGenericError(xmlGenericErrorContext,
11216 "PP: try START\n"); break;
11217 case XML_PARSER_MISC:
11218 xmlGenericError(xmlGenericErrorContext,
11219 "PP: try MISC\n");break;
11220 case XML_PARSER_COMMENT:
11221 xmlGenericError(xmlGenericErrorContext,
11222 "PP: try COMMENT\n");break;
11223 case XML_PARSER_PROLOG:
11224 xmlGenericError(xmlGenericErrorContext,
11225 "PP: try PROLOG\n");break;
11226 case XML_PARSER_START_TAG:
11227 xmlGenericError(xmlGenericErrorContext,
11228 "PP: try START_TAG\n");break;
11229 case XML_PARSER_CONTENT:
11230 xmlGenericError(xmlGenericErrorContext,
11231 "PP: try CONTENT\n");break;
11232 case XML_PARSER_CDATA_SECTION:
11233 xmlGenericError(xmlGenericErrorContext,
11234 "PP: try CDATA_SECTION\n");break;
11235 case XML_PARSER_END_TAG:
11236 xmlGenericError(xmlGenericErrorContext,
11237 "PP: try END_TAG\n");break;
11238 case XML_PARSER_ENTITY_DECL:
11239 xmlGenericError(xmlGenericErrorContext,
11240 "PP: try ENTITY_DECL\n");break;
11241 case XML_PARSER_ENTITY_VALUE:
11242 xmlGenericError(xmlGenericErrorContext,
11243 "PP: try ENTITY_VALUE\n");break;
11244 case XML_PARSER_ATTRIBUTE_VALUE:
11245 xmlGenericError(xmlGenericErrorContext,
11246 "PP: try ATTRIBUTE_VALUE\n");break;
11247 case XML_PARSER_DTD:
11248 xmlGenericError(xmlGenericErrorContext,
11249 "PP: try DTD\n");break;
11250 case XML_PARSER_EPILOG:
11251 xmlGenericError(xmlGenericErrorContext,
11252 "PP: try EPILOG\n");break;
11253 case XML_PARSER_PI:
11254 xmlGenericError(xmlGenericErrorContext,
11255 "PP: try PI\n");break;
11256 case XML_PARSER_IGNORE:
11257 xmlGenericError(xmlGenericErrorContext,
11258 "PP: try IGNORE\n");break;
11259 }
11260#endif
11261
11262 if ((ctxt->input != NULL) &&
11263 (ctxt->input->cur - ctxt->input->base > 4096)) {
11264 xmlSHRINK(ctxt);
11265 ctxt->checkIndex = 0;
11266 }
11267 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
11268
11269 while (ctxt->instate != XML_PARSER_EOF) {
11270 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
11271 return(0);
11272
11273 if (ctxt->input == NULL) break;
11274 if (ctxt->input->buf == NULL)
11275 avail = ctxt->input->length -
11276 (ctxt->input->cur - ctxt->input->base);
11277 else {
11278 /*
11279 * If we are operating on converted input, try to flush
11280 * remaining chars to avoid them stalling in the non-converted
11281 * buffer. But do not do this in document start where
11282 * encoding="..." may not have been read and we work on a
11283 * guessed encoding.
11284 */
11285 if ((ctxt->instate != XML_PARSER_START) &&
11286 (ctxt->input->buf->raw != NULL) &&
11287 (xmlBufIsEmpty(ctxt->input->buf->raw) == 0)) {
11288 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
11289 ctxt->input);
11290 size_t current = ctxt->input->cur - ctxt->input->base;
11291
11292 xmlParserInputBufferPush(ctxt->input->buf, 0, "");
11293 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
11294 base, current);
11295 }
11296 avail = xmlBufUse(ctxt->input->buf->buffer) -
11297 (ctxt->input->cur - ctxt->input->base);
11298 }
11299 if (avail < 1)
11300 goto done;
11301 switch (ctxt->instate) {
11302 case XML_PARSER_EOF:
11303 /*
11304 * Document parsing is done !
11305 */
11306 goto done;
11307 case XML_PARSER_START:
11308 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
11309 xmlChar start[4];
11310 xmlCharEncoding enc;
11311
11312 /*
11313 * Very first chars read from the document flow.
11314 */
11315 if (avail < 4)
11316 goto done;
11317
11318 /*
11319 * Get the 4 first bytes and decode the charset
11320 * if enc != XML_CHAR_ENCODING_NONE
11321 * plug some encoding conversion routines,
11322 * else xmlSwitchEncoding will set to (default)
11323 * UTF8.
11324 */
11325 start[0] = RAW;
11326 start[1] = NXT(1);
11327 start[2] = NXT(2);
11328 start[3] = NXT(3);
11329 enc = xmlDetectCharEncoding(start, 4);
11330 xmlSwitchEncoding(ctxt, enc);
11331 break;
11332 }
11333
11334 if (avail < 2)
11335 goto done;
11336 cur = ctxt->input->cur[0];
11337 next = ctxt->input->cur[1];
11338 if (cur == 0) {
11339 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11340 ctxt->sax->setDocumentLocator(ctxt->userData,
11341 &xmlDefaultSAXLocator);
11342 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11343 xmlHaltParser(ctxt);
11344#ifdef DEBUG_PUSH
11345 xmlGenericError(xmlGenericErrorContext,
11346 "PP: entering EOF\n");
11347#endif
11348 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11349 ctxt->sax->endDocument(ctxt->userData);
11350 goto done;
11351 }
11352 if ((cur == '<') && (next == '?')) {
11353 /* PI or XML decl */
11354 if (avail < 5) return(ret);
11355 if ((!terminate) &&
11356 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11357 return(ret);
11358 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11359 ctxt->sax->setDocumentLocator(ctxt->userData,
11360 &xmlDefaultSAXLocator);
11361 if ((ctxt->input->cur[2] == 'x') &&
11362 (ctxt->input->cur[3] == 'm') &&
11363 (ctxt->input->cur[4] == 'l') &&
11364 (IS_BLANK_CH(ctxt->input->cur[5]))) {
11365 ret += 5;
11366#ifdef DEBUG_PUSH
11367 xmlGenericError(xmlGenericErrorContext,
11368 "PP: Parsing XML Decl\n");
11369#endif
11370 xmlParseXMLDecl(ctxt);
11371 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
11372 /*
11373 * The XML REC instructs us to stop parsing right
11374 * here
11375 */
11376 xmlHaltParser(ctxt);
11377 return(0);
11378 }
11379 ctxt->standalone = ctxt->input->standalone;
11380 if ((ctxt->encoding == NULL) &&
11381 (ctxt->input->encoding != NULL))
11382 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
11383 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11384 (!ctxt->disableSAX))
11385 ctxt->sax->startDocument(ctxt->userData);
11386 ctxt->instate = XML_PARSER_MISC;
11387#ifdef DEBUG_PUSH
11388 xmlGenericError(xmlGenericErrorContext,
11389 "PP: entering MISC\n");
11390#endif
11391 } else {
11392 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11393 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11394 (!ctxt->disableSAX))
11395 ctxt->sax->startDocument(ctxt->userData);
11396 ctxt->instate = XML_PARSER_MISC;
11397#ifdef DEBUG_PUSH
11398 xmlGenericError(xmlGenericErrorContext,
11399 "PP: entering MISC\n");
11400#endif
11401 }
11402 } else {
11403 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11404 ctxt->sax->setDocumentLocator(ctxt->userData,
11405 &xmlDefaultSAXLocator);
11406 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11407 if (ctxt->version == NULL) {
11408 xmlErrMemory(ctxt, NULL);
11409 break;
11410 }
11411 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11412 (!ctxt->disableSAX))
11413 ctxt->sax->startDocument(ctxt->userData);
11414 ctxt->instate = XML_PARSER_MISC;
11415#ifdef DEBUG_PUSH
11416 xmlGenericError(xmlGenericErrorContext,
11417 "PP: entering MISC\n");
11418#endif
11419 }
11420 break;
11421 case XML_PARSER_START_TAG: {
11422 const xmlChar *name;
11423 const xmlChar *prefix = NULL;
11424 const xmlChar *URI = NULL;
11425 int line = ctxt->input->line;
11426 int nsNr = ctxt->nsNr;
11427
11428 if ((avail < 2) && (ctxt->inputNr == 1))
11429 goto done;
11430 cur = ctxt->input->cur[0];
11431 if (cur != '<') {
11432 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11433 xmlHaltParser(ctxt);
11434 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11435 ctxt->sax->endDocument(ctxt->userData);
11436 goto done;
11437 }
11438 if (!terminate) {
11439 if (ctxt->progressive) {
11440 /* > can be found unescaped in attribute values */
11441 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
11442 goto done;
11443 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
11444 goto done;
11445 }
11446 }
11447 if (ctxt->spaceNr == 0)
11448 spacePush(ctxt, -1);
11449 else if (*ctxt->space == -2)
11450 spacePush(ctxt, -1);
11451 else
11452 spacePush(ctxt, *ctxt->space);
11453#ifdef LIBXML_SAX1_ENABLED
11454 if (ctxt->sax2)
11455#endif /* LIBXML_SAX1_ENABLED */
11456 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
11457#ifdef LIBXML_SAX1_ENABLED
11458 else
11459 name = xmlParseStartTag(ctxt);
11460#endif /* LIBXML_SAX1_ENABLED */
11461 if (ctxt->instate == XML_PARSER_EOF)
11462 goto done;
11463 if (name == NULL) {
11464 spacePop(ctxt);
11465 xmlHaltParser(ctxt);
11466 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11467 ctxt->sax->endDocument(ctxt->userData);
11468 goto done;
11469 }
11470#ifdef LIBXML_VALID_ENABLED
11471 /*
11472 * [ VC: Root Element Type ]
11473 * The Name in the document type declaration must match
11474 * the element type of the root element.
11475 */
11476 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
11477 ctxt->node && (ctxt->node == ctxt->myDoc->children))
11478 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
11479#endif /* LIBXML_VALID_ENABLED */
11480
11481 /*
11482 * Check for an Empty Element.
11483 */
11484 if ((RAW == '/') && (NXT(1) == '>')) {
11485 SKIP(2);
11486
11487 if (ctxt->sax2) {
11488 if ((ctxt->sax != NULL) &&
11489 (ctxt->sax->endElementNs != NULL) &&
11490 (!ctxt->disableSAX))
11491 ctxt->sax->endElementNs(ctxt->userData, name,
11492 prefix, URI);
11493 if (ctxt->nsNr - nsNr > 0)
11494 nsPop(ctxt, ctxt->nsNr - nsNr);
11495#ifdef LIBXML_SAX1_ENABLED
11496 } else {
11497 if ((ctxt->sax != NULL) &&
11498 (ctxt->sax->endElement != NULL) &&
11499 (!ctxt->disableSAX))
11500 ctxt->sax->endElement(ctxt->userData, name);
11501#endif /* LIBXML_SAX1_ENABLED */
11502 }
11503 if (ctxt->instate == XML_PARSER_EOF)
11504 goto done;
11505 spacePop(ctxt);
11506 if (ctxt->nameNr == 0) {
11507 ctxt->instate = XML_PARSER_EPILOG;
11508 } else {
11509 ctxt->instate = XML_PARSER_CONTENT;
11510 }
11511 ctxt->progressive = 1;
11512 break;
11513 }
11514 if (RAW == '>') {
11515 NEXT;
11516 } else {
11517 xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
11518 "Couldn't find end of Start Tag %s\n",
11519 name);
11520 nodePop(ctxt);
11521 spacePop(ctxt);
11522 }
11523 nameNsPush(ctxt, name, prefix, URI, line, ctxt->nsNr - nsNr);
11524
11525 ctxt->instate = XML_PARSER_CONTENT;
11526 ctxt->progressive = 1;
11527 break;
11528 }
11529 case XML_PARSER_CONTENT: {
11530 const xmlChar *test;
11531 unsigned int cons;
11532 if ((avail < 2) && (ctxt->inputNr == 1))
11533 goto done;
11534 cur = ctxt->input->cur[0];
11535 next = ctxt->input->cur[1];
11536
11537 test = CUR_PTR;
11538 cons = ctxt->input->consumed;
11539 if ((cur == '<') && (next == '/')) {
11540 ctxt->instate = XML_PARSER_END_TAG;
11541 break;
11542 } else if ((cur == '<') && (next == '?')) {
11543 if ((!terminate) &&
11544 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11545 ctxt->progressive = XML_PARSER_PI;
11546 goto done;
11547 }
11548 xmlParsePI(ctxt);
11549 ctxt->instate = XML_PARSER_CONTENT;
11550 ctxt->progressive = 1;
11551 } else if ((cur == '<') && (next != '!')) {
11552 ctxt->instate = XML_PARSER_START_TAG;
11553 break;
11554 } else if ((cur == '<') && (next == '!') &&
11555 (ctxt->input->cur[2] == '-') &&
11556 (ctxt->input->cur[3] == '-')) {
11557 int term;
11558
11559 if (avail < 4)
11560 goto done;
11561 ctxt->input->cur += 4;
11562 term = xmlParseLookupSequence(ctxt, '-', '-', '>');
11563 ctxt->input->cur -= 4;
11564 if ((!terminate) && (term < 0)) {
11565 ctxt->progressive = XML_PARSER_COMMENT;
11566 goto done;
11567 }
11568 xmlParseComment(ctxt);
11569 ctxt->instate = XML_PARSER_CONTENT;
11570 ctxt->progressive = 1;
11571 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
11572 (ctxt->input->cur[2] == '[') &&
11573 (ctxt->input->cur[3] == 'C') &&
11574 (ctxt->input->cur[4] == 'D') &&
11575 (ctxt->input->cur[5] == 'A') &&
11576 (ctxt->input->cur[6] == 'T') &&
11577 (ctxt->input->cur[7] == 'A') &&
11578 (ctxt->input->cur[8] == '[')) {
11579 SKIP(9);
11580 ctxt->instate = XML_PARSER_CDATA_SECTION;
11581 break;
11582 } else if ((cur == '<') && (next == '!') &&
11583 (avail < 9)) {
11584 goto done;
11585 } else if (cur == '&') {
11586 if ((!terminate) &&
11587 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
11588 goto done;
11589 xmlParseReference(ctxt);
11590 } else {
11591 /* TODO Avoid the extra copy, handle directly !!! */
11592 /*
11593 * Goal of the following test is:
11594 * - minimize calls to the SAX 'character' callback
11595 * when they are mergeable
11596 * - handle an problem for isBlank when we only parse
11597 * a sequence of blank chars and the next one is
11598 * not available to check against '<' presence.
11599 * - tries to homogenize the differences in SAX
11600 * callbacks between the push and pull versions
11601 * of the parser.
11602 */
11603 if ((ctxt->inputNr == 1) &&
11604 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
11605 if (!terminate) {
11606 if (ctxt->progressive) {
11607 if ((lastlt == NULL) ||
11608 (ctxt->input->cur > lastlt))
11609 goto done;
11610 } else if (xmlParseLookupSequence(ctxt,
11611 '<', 0, 0) < 0) {
11612 goto done;
11613 }
11614 }
11615 }
11616 ctxt->checkIndex = 0;
11617 xmlParseCharData(ctxt, 0);
11618 }
11619 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
11620 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
11621 "detected an error in element content\n");
11622 xmlHaltParser(ctxt);
11623 break;
11624 }
11625 break;
11626 }
11627 case XML_PARSER_END_TAG:
11628 if (avail < 2)
11629 goto done;
11630 if (!terminate) {
11631 if (ctxt->progressive) {
11632 /* > can be found unescaped in attribute values */
11633 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
11634 goto done;
11635 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
11636 goto done;
11637 }
11638 }
11639 if (ctxt->sax2) {
11640 xmlParseEndTag2(ctxt, &ctxt->pushTab[ctxt->nameNr - 1]);
11641 nameNsPop(ctxt);
11642 }
11643#ifdef LIBXML_SAX1_ENABLED
11644 else
11645 xmlParseEndTag1(ctxt, 0);
11646#endif /* LIBXML_SAX1_ENABLED */
11647 if (ctxt->instate == XML_PARSER_EOF) {
11648 /* Nothing */
11649 } else if (ctxt->nameNr == 0) {
11650 ctxt->instate = XML_PARSER_EPILOG;
11651 } else {
11652 ctxt->instate = XML_PARSER_CONTENT;
11653 }
11654 break;
11655 case XML_PARSER_CDATA_SECTION: {
11656 /*
11657 * The Push mode need to have the SAX callback for
11658 * cdataBlock merge back contiguous callbacks.
11659 */
11660 int base;
11661
11662 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
11663 if (base < 0) {
11664 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
11665 int tmp;
11666
11667 tmp = xmlCheckCdataPush(ctxt->input->cur,
11668 XML_PARSER_BIG_BUFFER_SIZE, 0);
11669 if (tmp < 0) {
11670 tmp = -tmp;
11671 ctxt->input->cur += tmp;
11672 goto encoding_error;
11673 }
11674 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
11675 if (ctxt->sax->cdataBlock != NULL)
11676 ctxt->sax->cdataBlock(ctxt->userData,
11677 ctxt->input->cur, tmp);
11678 else if (ctxt->sax->characters != NULL)
11679 ctxt->sax->characters(ctxt->userData,
11680 ctxt->input->cur, tmp);
11681 }
11682 if (ctxt->instate == XML_PARSER_EOF)
11683 goto done;
11684 SKIPL(tmp);
11685 ctxt->checkIndex = 0;
11686 }
11687 goto done;
11688 } else {
11689 int tmp;
11690
11691 tmp = xmlCheckCdataPush(ctxt->input->cur, base, 1);
11692 if ((tmp < 0) || (tmp != base)) {
11693 tmp = -tmp;
11694 ctxt->input->cur += tmp;
11695 goto encoding_error;
11696 }
11697 if ((ctxt->sax != NULL) && (base == 0) &&
11698 (ctxt->sax->cdataBlock != NULL) &&
11699 (!ctxt->disableSAX)) {
11700 /*
11701 * Special case to provide identical behaviour
11702 * between pull and push parsers on enpty CDATA
11703 * sections
11704 */
11705 if ((ctxt->input->cur - ctxt->input->base >= 9) &&
11706 (!strncmp((const char *)&ctxt->input->cur[-9],
11707 "<![CDATA[", 9)))
11708 ctxt->sax->cdataBlock(ctxt->userData,
11709 BAD_CAST "", 0);
11710 } else if ((ctxt->sax != NULL) && (base > 0) &&
11711 (!ctxt->disableSAX)) {
11712 if (ctxt->sax->cdataBlock != NULL)
11713 ctxt->sax->cdataBlock(ctxt->userData,
11714 ctxt->input->cur, base);
11715 else if (ctxt->sax->characters != NULL)
11716 ctxt->sax->characters(ctxt->userData,
11717 ctxt->input->cur, base);
11718 }
11719 if (ctxt->instate == XML_PARSER_EOF)
11720 goto done;
11721 SKIPL(base + 3);
11722 ctxt->checkIndex = 0;
11723 ctxt->instate = XML_PARSER_CONTENT;
11724#ifdef DEBUG_PUSH
11725 xmlGenericError(xmlGenericErrorContext,
11726 "PP: entering CONTENT\n");
11727#endif
11728 }
11729 break;
11730 }
11731 case XML_PARSER_MISC:
11732 SKIP_BLANKS;
11733 if (ctxt->input->buf == NULL)
11734 avail = ctxt->input->length -
11735 (ctxt->input->cur - ctxt->input->base);
11736 else
11737 avail = xmlBufUse(ctxt->input->buf->buffer) -
11738 (ctxt->input->cur - ctxt->input->base);
11739 if (avail < 2)
11740 goto done;
11741 cur = ctxt->input->cur[0];
11742 next = ctxt->input->cur[1];
11743 if ((cur == '<') && (next == '?')) {
11744 if ((!terminate) &&
11745 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11746 ctxt->progressive = XML_PARSER_PI;
11747 goto done;
11748 }
11749#ifdef DEBUG_PUSH
11750 xmlGenericError(xmlGenericErrorContext,
11751 "PP: Parsing PI\n");
11752#endif
11753 xmlParsePI(ctxt);
11754 if (ctxt->instate == XML_PARSER_EOF)
11755 goto done;
11756 ctxt->instate = XML_PARSER_MISC;
11757 ctxt->progressive = 1;
11758 ctxt->checkIndex = 0;
11759 } else if ((cur == '<') && (next == '!') &&
11760 (ctxt->input->cur[2] == '-') &&
11761 (ctxt->input->cur[3] == '-')) {
11762 if ((!terminate) &&
11763 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
11764 ctxt->progressive = XML_PARSER_COMMENT;
11765 goto done;
11766 }
11767#ifdef DEBUG_PUSH
11768 xmlGenericError(xmlGenericErrorContext,
11769 "PP: Parsing Comment\n");
11770#endif
11771 xmlParseComment(ctxt);
11772 if (ctxt->instate == XML_PARSER_EOF)
11773 goto done;
11774 ctxt->instate = XML_PARSER_MISC;
11775 ctxt->progressive = 1;
11776 ctxt->checkIndex = 0;
11777 } else if ((cur == '<') && (next == '!') &&
11778 (ctxt->input->cur[2] == 'D') &&
11779 (ctxt->input->cur[3] == 'O') &&
11780 (ctxt->input->cur[4] == 'C') &&
11781 (ctxt->input->cur[5] == 'T') &&
11782 (ctxt->input->cur[6] == 'Y') &&
11783 (ctxt->input->cur[7] == 'P') &&
11784 (ctxt->input->cur[8] == 'E')) {
11785 if ((!terminate) &&
11786 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0)) {
11787 ctxt->progressive = XML_PARSER_DTD;
11788 goto done;
11789 }
11790#ifdef DEBUG_PUSH
11791 xmlGenericError(xmlGenericErrorContext,
11792 "PP: Parsing internal subset\n");
11793#endif
11794 ctxt->inSubset = 1;
11795 ctxt->progressive = 0;
11796 ctxt->checkIndex = 0;
11797 xmlParseDocTypeDecl(ctxt);
11798 if (ctxt->instate == XML_PARSER_EOF)
11799 goto done;
11800 if (RAW == '[') {
11801 ctxt->instate = XML_PARSER_DTD;
11802#ifdef DEBUG_PUSH
11803 xmlGenericError(xmlGenericErrorContext,
11804 "PP: entering DTD\n");
11805#endif
11806 } else {
11807 /*
11808 * Create and update the external subset.
11809 */
11810 ctxt->inSubset = 2;
11811 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11812 (ctxt->sax->externalSubset != NULL))
11813 ctxt->sax->externalSubset(ctxt->userData,
11814 ctxt->intSubName, ctxt->extSubSystem,
11815 ctxt->extSubURI);
11816 ctxt->inSubset = 0;
11817 xmlCleanSpecialAttr(ctxt);
11818 ctxt->instate = XML_PARSER_PROLOG;
11819#ifdef DEBUG_PUSH
11820 xmlGenericError(xmlGenericErrorContext,
11821 "PP: entering PROLOG\n");
11822#endif
11823 }
11824 } else if ((cur == '<') && (next == '!') &&
11825 (avail < 9)) {
11826 goto done;
11827 } else {
11828 ctxt->instate = XML_PARSER_START_TAG;
11829 ctxt->progressive = XML_PARSER_START_TAG;
11830 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
11831#ifdef DEBUG_PUSH
11832 xmlGenericError(xmlGenericErrorContext,
11833 "PP: entering START_TAG\n");
11834#endif
11835 }
11836 break;
11837 case XML_PARSER_PROLOG:
11838 SKIP_BLANKS;
11839 if (ctxt->input->buf == NULL)
11840 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11841 else
11842 avail = xmlBufUse(ctxt->input->buf->buffer) -
11843 (ctxt->input->cur - ctxt->input->base);
11844 if (avail < 2)
11845 goto done;
11846 cur = ctxt->input->cur[0];
11847 next = ctxt->input->cur[1];
11848 if ((cur == '<') && (next == '?')) {
11849 if ((!terminate) &&
11850 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11851 ctxt->progressive = XML_PARSER_PI;
11852 goto done;
11853 }
11854#ifdef DEBUG_PUSH
11855 xmlGenericError(xmlGenericErrorContext,
11856 "PP: Parsing PI\n");
11857#endif
11858 xmlParsePI(ctxt);
11859 if (ctxt->instate == XML_PARSER_EOF)
11860 goto done;
11861 ctxt->instate = XML_PARSER_PROLOG;
11862 ctxt->progressive = 1;
11863 } else if ((cur == '<') && (next == '!') &&
11864 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
11865 if ((!terminate) &&
11866 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
11867 ctxt->progressive = XML_PARSER_COMMENT;
11868 goto done;
11869 }
11870#ifdef DEBUG_PUSH
11871 xmlGenericError(xmlGenericErrorContext,
11872 "PP: Parsing Comment\n");
11873#endif
11874 xmlParseComment(ctxt);
11875 if (ctxt->instate == XML_PARSER_EOF)
11876 goto done;
11877 ctxt->instate = XML_PARSER_PROLOG;
11878 ctxt->progressive = 1;
11879 } else if ((cur == '<') && (next == '!') &&
11880 (avail < 4)) {
11881 goto done;
11882 } else {
11883 ctxt->instate = XML_PARSER_START_TAG;
11884 if (ctxt->progressive == 0)
11885 ctxt->progressive = XML_PARSER_START_TAG;
11886 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
11887#ifdef DEBUG_PUSH
11888 xmlGenericError(xmlGenericErrorContext,
11889 "PP: entering START_TAG\n");
11890#endif
11891 }
11892 break;
11893 case XML_PARSER_EPILOG:
11894 SKIP_BLANKS;
11895 if (ctxt->input->buf == NULL)
11896 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11897 else
11898 avail = xmlBufUse(ctxt->input->buf->buffer) -
11899 (ctxt->input->cur - ctxt->input->base);
11900 if (avail < 2)
11901 goto done;
11902 cur = ctxt->input->cur[0];
11903 next = ctxt->input->cur[1];
11904 if ((cur == '<') && (next == '?')) {
11905 if ((!terminate) &&
11906 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11907 ctxt->progressive = XML_PARSER_PI;
11908 goto done;
11909 }
11910#ifdef DEBUG_PUSH
11911 xmlGenericError(xmlGenericErrorContext,
11912 "PP: Parsing PI\n");
11913#endif
11914 xmlParsePI(ctxt);
11915 if (ctxt->instate == XML_PARSER_EOF)
11916 goto done;
11917 ctxt->instate = XML_PARSER_EPILOG;
11918 ctxt->progressive = 1;
11919 } else if ((cur == '<') && (next == '!') &&
11920 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
11921 if ((!terminate) &&
11922 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
11923 ctxt->progressive = XML_PARSER_COMMENT;
11924 goto done;
11925 }
11926#ifdef DEBUG_PUSH
11927 xmlGenericError(xmlGenericErrorContext,
11928 "PP: Parsing Comment\n");
11929#endif
11930 xmlParseComment(ctxt);
11931 if (ctxt->instate == XML_PARSER_EOF)
11932 goto done;
11933 ctxt->instate = XML_PARSER_EPILOG;
11934 ctxt->progressive = 1;
11935 } else if ((cur == '<') && (next == '!') &&
11936 (avail < 4)) {
11937 goto done;
11938 } else {
11939 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
11940 xmlHaltParser(ctxt);
11941#ifdef DEBUG_PUSH
11942 xmlGenericError(xmlGenericErrorContext,
11943 "PP: entering EOF\n");
11944#endif
11945 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11946 ctxt->sax->endDocument(ctxt->userData);
11947 goto done;
11948 }
11949 break;
11950 case XML_PARSER_DTD: {
11951 /*
11952 * Sorry but progressive parsing of the internal subset
11953 * is not expected to be supported. We first check that
11954 * the full content of the internal subset is available and
11955 * the parsing is launched only at that point.
11956 * Internal subset ends up with "']' S? '>'" in an unescaped
11957 * section and not in a ']]>' sequence which are conditional
11958 * sections (whoever argued to keep that crap in XML deserve
11959 * a place in hell !).
11960 */
11961 int base, i;
11962 xmlChar *buf;
11963 xmlChar quote = 0;
11964 size_t use;
11965
11966 base = ctxt->input->cur - ctxt->input->base;
11967 if (base < 0) return(0);
11968 if (ctxt->checkIndex > base)
11969 base = ctxt->checkIndex;
11970 buf = xmlBufContent(ctxt->input->buf->buffer);
11971 use = xmlBufUse(ctxt->input->buf->buffer);
11972 for (;(unsigned int) base < use; base++) {
11973 if (quote != 0) {
11974 if (buf[base] == quote)
11975 quote = 0;
11976 continue;
11977 }
11978 if ((quote == 0) && (buf[base] == '<')) {
11979 int found = 0;
11980 /* special handling of comments */
11981 if (((unsigned int) base + 4 < use) &&
11982 (buf[base + 1] == '!') &&
11983 (buf[base + 2] == '-') &&
11984 (buf[base + 3] == '-')) {
11985 for (;(unsigned int) base + 3 < use; base++) {
11986 if ((buf[base] == '-') &&
11987 (buf[base + 1] == '-') &&
11988 (buf[base + 2] == '>')) {
11989 found = 1;
11990 base += 2;
11991 break;
11992 }
11993 }
11994 if (!found) {
11995#if 0
11996 fprintf(stderr, "unfinished comment\n");
11997#endif
11998 break; /* for */
11999 }
12000 continue;
12001 }
12002 }
12003 if (buf[base] == '"') {
12004 quote = '"';
12005 continue;
12006 }
12007 if (buf[base] == '\'') {
12008 quote = '\'';
12009 continue;
12010 }
12011 if (buf[base] == ']') {
12012#if 0
12013 fprintf(stderr, "%c%c%c%c: ", buf[base],
12014 buf[base + 1], buf[base + 2], buf[base + 3]);
12015#endif
12016 if ((unsigned int) base +1 >= use)
12017 break;
12018 if (buf[base + 1] == ']') {
12019 /* conditional crap, skip both ']' ! */
12020 base++;
12021 continue;
12022 }
12023 for (i = 1; (unsigned int) base + i < use; i++) {
12024 if (buf[base + i] == '>') {
12025#if 0
12026 fprintf(stderr, "found\n");
12027#endif
12028 goto found_end_int_subset;
12029 }
12030 if (!IS_BLANK_CH(buf[base + i])) {
12031#if 0
12032 fprintf(stderr, "not found\n");
12033#endif
12034 goto not_end_of_int_subset;
12035 }
12036 }
12037#if 0
12038 fprintf(stderr, "end of stream\n");
12039#endif
12040 break;
12041
12042 }
12043not_end_of_int_subset:
12044 continue; /* for */
12045 }
12046 /*
12047 * We didn't found the end of the Internal subset
12048 */
12049 if (quote == 0)
12050 ctxt->checkIndex = base;
12051 else
12052 ctxt->checkIndex = 0;
12053#ifdef DEBUG_PUSH
12054 if (next == 0)
12055 xmlGenericError(xmlGenericErrorContext,
12056 "PP: lookup of int subset end filed\n");
12057#endif
12058 goto done;
12059
12060found_end_int_subset:
12061 ctxt->checkIndex = 0;
12062 xmlParseInternalSubset(ctxt);
12063 if (ctxt->instate == XML_PARSER_EOF)
12064 goto done;
12065 ctxt->inSubset = 2;
12066 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
12067 (ctxt->sax->externalSubset != NULL))
12068 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
12069 ctxt->extSubSystem, ctxt->extSubURI);
12070 ctxt->inSubset = 0;
12071 xmlCleanSpecialAttr(ctxt);
12072 if (ctxt->instate == XML_PARSER_EOF)
12073 goto done;
12074 ctxt->instate = XML_PARSER_PROLOG;
12075 ctxt->checkIndex = 0;
12076#ifdef DEBUG_PUSH
12077 xmlGenericError(xmlGenericErrorContext,
12078 "PP: entering PROLOG\n");
12079#endif
12080 break;
12081 }
12082 case XML_PARSER_COMMENT:
12083 xmlGenericError(xmlGenericErrorContext,
12084 "PP: internal error, state == COMMENT\n");
12085 ctxt->instate = XML_PARSER_CONTENT;
12086#ifdef DEBUG_PUSH
12087 xmlGenericError(xmlGenericErrorContext,
12088 "PP: entering CONTENT\n");
12089#endif
12090 break;
12091 case XML_PARSER_IGNORE:
12092 xmlGenericError(xmlGenericErrorContext,
12093 "PP: internal error, state == IGNORE");
12094 ctxt->instate = XML_PARSER_DTD;
12095#ifdef DEBUG_PUSH
12096 xmlGenericError(xmlGenericErrorContext,
12097 "PP: entering DTD\n");
12098#endif
12099 break;
12100 case XML_PARSER_PI:
12101 xmlGenericError(xmlGenericErrorContext,
12102 "PP: internal error, state == PI\n");
12103 ctxt->instate = XML_PARSER_CONTENT;
12104#ifdef DEBUG_PUSH
12105 xmlGenericError(xmlGenericErrorContext,
12106 "PP: entering CONTENT\n");
12107#endif
12108 break;
12109 case XML_PARSER_ENTITY_DECL:
12110 xmlGenericError(xmlGenericErrorContext,
12111 "PP: internal error, state == ENTITY_DECL\n");
12112 ctxt->instate = XML_PARSER_DTD;
12113#ifdef DEBUG_PUSH
12114 xmlGenericError(xmlGenericErrorContext,
12115 "PP: entering DTD\n");
12116#endif
12117 break;
12118 case XML_PARSER_ENTITY_VALUE:
12119 xmlGenericError(xmlGenericErrorContext,
12120 "PP: internal error, state == ENTITY_VALUE\n");
12121 ctxt->instate = XML_PARSER_CONTENT;
12122#ifdef DEBUG_PUSH
12123 xmlGenericError(xmlGenericErrorContext,
12124 "PP: entering DTD\n");
12125#endif
12126 break;
12127 case XML_PARSER_ATTRIBUTE_VALUE:
12128 xmlGenericError(xmlGenericErrorContext,
12129 "PP: internal error, state == ATTRIBUTE_VALUE\n");
12130 ctxt->instate = XML_PARSER_START_TAG;
12131#ifdef DEBUG_PUSH
12132 xmlGenericError(xmlGenericErrorContext,
12133 "PP: entering START_TAG\n");
12134#endif
12135 break;
12136 case XML_PARSER_SYSTEM_LITERAL:
12137 xmlGenericError(xmlGenericErrorContext,
12138 "PP: internal error, state == SYSTEM_LITERAL\n");
12139 ctxt->instate = XML_PARSER_START_TAG;
12140#ifdef DEBUG_PUSH
12141 xmlGenericError(xmlGenericErrorContext,
12142 "PP: entering START_TAG\n");
12143#endif
12144 break;
12145 case XML_PARSER_PUBLIC_LITERAL:
12146 xmlGenericError(xmlGenericErrorContext,
12147 "PP: internal error, state == PUBLIC_LITERAL\n");
12148 ctxt->instate = XML_PARSER_START_TAG;
12149#ifdef DEBUG_PUSH
12150 xmlGenericError(xmlGenericErrorContext,
12151 "PP: entering START_TAG\n");
12152#endif
12153 break;
12154 }
12155 }
12156done:
12157#ifdef DEBUG_PUSH
12158 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
12159#endif
12160 return(ret);
12161encoding_error:
12162 {
12163 char buffer[150];
12164
12165 snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
12166 ctxt->input->cur[0], ctxt->input->cur[1],
12167 ctxt->input->cur[2], ctxt->input->cur[3]);
12168 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
12169 "Input is not proper UTF-8, indicate encoding !\n%s",
12170 BAD_CAST buffer, NULL);
12171 }
12172 return(0);
12173}
12174
12175/**
12176 * xmlParseCheckTransition:
12177 * @ctxt: an XML parser context
12178 * @chunk: a char array
12179 * @size: the size in byte of the chunk
12180 *
12181 * Check depending on the current parser state if the chunk given must be
12182 * processed immediately or one need more data to advance on parsing.
12183 *
12184 * Returns -1 in case of error, 0 if the push is not needed and 1 if needed
12185 */
12186static int
12187xmlParseCheckTransition(xmlParserCtxtPtr ctxt, const char *chunk, int size) {
12188 if ((ctxt == NULL) || (chunk == NULL) || (size < 0))
12189 return(-1);
12190 if (ctxt->instate == XML_PARSER_START_TAG) {
12191 if (memchr(chunk, '>', size) != NULL)
12192 return(1);
12193 return(0);
12194 }
12195 if (ctxt->progressive == XML_PARSER_COMMENT) {
12196 if (memchr(chunk, '>', size) != NULL)
12197 return(1);
12198 return(0);
12199 }
12200 if (ctxt->instate == XML_PARSER_CDATA_SECTION) {
12201 if (memchr(chunk, '>', size) != NULL)
12202 return(1);
12203 return(0);
12204 }
12205 if (ctxt->progressive == XML_PARSER_PI) {
12206 if (memchr(chunk, '>', size) != NULL)
12207 return(1);
12208 return(0);
12209 }
12210 if (ctxt->instate == XML_PARSER_END_TAG) {
12211 if (memchr(chunk, '>', size) != NULL)
12212 return(1);
12213 return(0);
12214 }
12215 if ((ctxt->progressive == XML_PARSER_DTD) ||
12216 (ctxt->instate == XML_PARSER_DTD)) {
12217 if (memchr(chunk, '>', size) != NULL)
12218 return(1);
12219 return(0);
12220 }
12221 return(1);
12222}
12223
12224/**
12225 * xmlParseChunk:
12226 * @ctxt: an XML parser context
12227 * @chunk: an char array
12228 * @size: the size in byte of the chunk
12229 * @terminate: last chunk indicator
12230 *
12231 * Parse a Chunk of memory
12232 *
12233 * Returns zero if no error, the xmlParserErrors otherwise.
12234 */
12235int
12236xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
12237 int terminate) {
12238 int end_in_lf = 0;
12239 int remain = 0;
12240 size_t old_avail = 0;
12241 size_t avail = 0;
12242
12243 if (ctxt == NULL)
12244 return(XML_ERR_INTERNAL_ERROR);
12245 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
12246 return(ctxt->errNo);
12247 if (ctxt->instate == XML_PARSER_EOF)
12248 return(-1);
12249 if (ctxt->instate == XML_PARSER_START)
12250 xmlDetectSAX2(ctxt);
12251 if ((size > 0) && (chunk != NULL) && (!terminate) &&
12252 (chunk[size - 1] == '\r')) {
12253 end_in_lf = 1;
12254 size--;
12255 }
12256
12257xmldecl_done:
12258
12259 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
12260 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
12261 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12262 size_t cur = ctxt->input->cur - ctxt->input->base;
12263 int res;
12264
12265 old_avail = xmlBufUse(ctxt->input->buf->buffer);
12266 /*
12267 * Specific handling if we autodetected an encoding, we should not
12268 * push more than the first line ... which depend on the encoding
12269 * And only push the rest once the final encoding was detected
12270 */
12271 if ((ctxt->instate == XML_PARSER_START) && (ctxt->input != NULL) &&
12272 (ctxt->input->buf != NULL) && (ctxt->input->buf->encoder != NULL)) {
12273 unsigned int len = 45;
12274
12275 if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12276 BAD_CAST "UTF-16")) ||
12277 (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12278 BAD_CAST "UTF16")))
12279 len = 90;
12280 else if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12281 BAD_CAST "UCS-4")) ||
12282 (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12283 BAD_CAST "UCS4")))
12284 len = 180;
12285
12286 if (ctxt->input->buf->rawconsumed < len)
12287 len -= ctxt->input->buf->rawconsumed;
12288
12289 /*
12290 * Change size for reading the initial declaration only
12291 * if size is greater than len. Otherwise, memmove in xmlBufferAdd
12292 * will blindly copy extra bytes from memory.
12293 */
12294 if ((unsigned int) size > len) {
12295 remain = size - len;
12296 size = len;
12297 } else {
12298 remain = 0;
12299 }
12300 }
12301 res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
12302 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
12303 if (res < 0) {
12304 ctxt->errNo = XML_PARSER_EOF;
12305 xmlHaltParser(ctxt);
12306 return (XML_PARSER_EOF);
12307 }
12308#ifdef DEBUG_PUSH
12309 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12310#endif
12311
12312 } else if (ctxt->instate != XML_PARSER_EOF) {
12313 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
12314 xmlParserInputBufferPtr in = ctxt->input->buf;
12315 if ((in->encoder != NULL) && (in->buffer != NULL) &&
12316 (in->raw != NULL)) {
12317 int nbchars;
12318 size_t base = xmlBufGetInputBase(in->buffer, ctxt->input);
12319 size_t current = ctxt->input->cur - ctxt->input->base;
12320
12321 nbchars = xmlCharEncInput(in, terminate);
12322 xmlBufSetInputBaseCur(in->buffer, ctxt->input, base, current);
12323 if (nbchars < 0) {
12324 /* TODO 2.6.0 */
12325 xmlGenericError(xmlGenericErrorContext,
12326 "xmlParseChunk: encoder error\n");
12327 xmlHaltParser(ctxt);
12328 return(XML_ERR_INVALID_ENCODING);
12329 }
12330 }
12331 }
12332 }
12333 if (remain != 0) {
12334 xmlParseTryOrFinish(ctxt, 0);
12335 } else {
12336 if ((ctxt->input != NULL) && (ctxt->input->buf != NULL))
12337 avail = xmlBufUse(ctxt->input->buf->buffer);
12338 /*
12339 * Depending on the current state it may not be such
12340 * a good idea to try parsing if there is nothing in the chunk
12341 * which would be worth doing a parser state transition and we
12342 * need to wait for more data
12343 */
12344 if ((terminate) || (avail > XML_MAX_TEXT_LENGTH) ||
12345 (old_avail == 0) || (avail == 0) ||
12346 (xmlParseCheckTransition(ctxt,
12347 (const char *)&ctxt->input->base[old_avail],
12348 avail - old_avail)))
12349 xmlParseTryOrFinish(ctxt, terminate);
12350 }
12351 if (ctxt->instate == XML_PARSER_EOF)
12352 return(ctxt->errNo);
12353
12354 if ((ctxt->input != NULL) &&
12355 (((ctxt->input->end - ctxt->input->cur) > XML_MAX_LOOKUP_LIMIT) ||
12356 ((ctxt->input->cur - ctxt->input->base) > XML_MAX_LOOKUP_LIMIT)) &&
12357 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
12358 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
12359 xmlHaltParser(ctxt);
12360 }
12361 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
12362 return(ctxt->errNo);
12363
12364 if (remain != 0) {
12365 chunk += size;
12366 size = remain;
12367 remain = 0;
12368 goto xmldecl_done;
12369 }
12370 if ((end_in_lf == 1) && (ctxt->input != NULL) &&
12371 (ctxt->input->buf != NULL)) {
12372 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
12373 ctxt->input);
12374 size_t current = ctxt->input->cur - ctxt->input->base;
12375
12376 xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
12377
12378 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
12379 base, current);
12380 }
12381 if (terminate) {
12382 /*
12383 * Check for termination
12384 */
12385 int cur_avail = 0;
12386
12387 if (ctxt->input != NULL) {
12388 if (ctxt->input->buf == NULL)
12389 cur_avail = ctxt->input->length -
12390 (ctxt->input->cur - ctxt->input->base);
12391 else
12392 cur_avail = xmlBufUse(ctxt->input->buf->buffer) -
12393 (ctxt->input->cur - ctxt->input->base);
12394 }
12395
12396 if ((ctxt->instate != XML_PARSER_EOF) &&
12397 (ctxt->instate != XML_PARSER_EPILOG)) {
12398 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
12399 }
12400 if ((ctxt->instate == XML_PARSER_EPILOG) && (cur_avail > 0)) {
12401 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
12402 }
12403 if (ctxt->instate != XML_PARSER_EOF) {
12404 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
12405 ctxt->sax->endDocument(ctxt->userData);
12406 }
12407 ctxt->instate = XML_PARSER_EOF;
12408 }
12409 if (ctxt->wellFormed == 0)
12410 return((xmlParserErrors) ctxt->errNo);
12411 else
12412 return(0);
12413}
12414
12415/************************************************************************
12416 * *
12417 * I/O front end functions to the parser *
12418 * *
12419 ************************************************************************/
12420
12421/**
12422 * xmlCreatePushParserCtxt:
12423 * @sax: a SAX handler
12424 * @user_data: The user data returned on SAX callbacks
12425 * @chunk: a pointer to an array of chars
12426 * @size: number of chars in the array
12427 * @filename: an optional file name or URI
12428 *
12429 * Create a parser context for using the XML parser in push mode.
12430 * If @buffer and @size are non-NULL, the data is used to detect
12431 * the encoding. The remaining characters will be parsed so they
12432 * don't need to be fed in again through xmlParseChunk.
12433 * To allow content encoding detection, @size should be >= 4
12434 * The value of @filename is used for fetching external entities
12435 * and error/warning reports.
12436 *
12437 * Returns the new parser context or NULL
12438 */
12439
12440xmlParserCtxtPtr
12441xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
12442 const char *chunk, int size, const char *filename) {
12443 xmlParserCtxtPtr ctxt;
12444 xmlParserInputPtr inputStream;
12445 xmlParserInputBufferPtr buf;
12446 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
12447
12448 /*
12449 * plug some encoding conversion routines
12450 */
12451 if ((chunk != NULL) && (size >= 4))
12452 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
12453
12454 buf = xmlAllocParserInputBuffer(enc);
12455 if (buf == NULL) return(NULL);
12456
12457 ctxt = xmlNewParserCtxt();
12458 if (ctxt == NULL) {
12459 xmlErrMemory(NULL, "creating parser: out of memory\n");
12460 xmlFreeParserInputBuffer(buf);
12461 return(NULL);
12462 }
12463 ctxt->dictNames = 1;
12464 if (sax != NULL) {
12465#ifdef LIBXML_SAX1_ENABLED
12466 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
12467#endif /* LIBXML_SAX1_ENABLED */
12468 xmlFree(ctxt->sax);
12469 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
12470 if (ctxt->sax == NULL) {
12471 xmlErrMemory(ctxt, NULL);
12472 xmlFreeParserInputBuffer(buf);
12473 xmlFreeParserCtxt(ctxt);
12474 return(NULL);
12475 }
12476 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
12477 if (sax->initialized == XML_SAX2_MAGIC)
12478 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
12479 else
12480 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
12481 if (user_data != NULL)
12482 ctxt->userData = user_data;
12483 }
12484 if (filename == NULL) {
12485 ctxt->directory = NULL;
12486 } else {
12487 ctxt->directory = xmlParserGetDirectory(filename);
12488 }
12489
12490 inputStream = xmlNewInputStream(ctxt);
12491 if (inputStream == NULL) {
12492 xmlFreeParserCtxt(ctxt);
12493 xmlFreeParserInputBuffer(buf);
12494 return(NULL);
12495 }
12496
12497 if (filename == NULL)
12498 inputStream->filename = NULL;
12499 else {
12500 inputStream->filename = (char *)
12501 xmlCanonicPath((const xmlChar *) filename);
12502 if (inputStream->filename == NULL) {
12503 xmlFreeParserCtxt(ctxt);
12504 xmlFreeParserInputBuffer(buf);
12505 return(NULL);
12506 }
12507 }
12508 inputStream->buf = buf;
12509 xmlBufResetInput(inputStream->buf->buffer, inputStream);
12510 inputPush(ctxt, inputStream);
12511
12512 /*
12513 * If the caller didn't provide an initial 'chunk' for determining
12514 * the encoding, we set the context to XML_CHAR_ENCODING_NONE so
12515 * that it can be automatically determined later
12516 */
12517 if ((size == 0) || (chunk == NULL)) {
12518 ctxt->charset = XML_CHAR_ENCODING_NONE;
12519 } else if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) {
12520 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12521 size_t cur = ctxt->input->cur - ctxt->input->base;
12522
12523 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
12524
12525 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
12526#ifdef DEBUG_PUSH
12527 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12528#endif
12529 }
12530
12531 if (enc != XML_CHAR_ENCODING_NONE) {
12532 xmlSwitchEncoding(ctxt, enc);
12533 }
12534
12535 return(ctxt);
12536}
12537#endif /* LIBXML_PUSH_ENABLED */
12538
12539/**
12540 * xmlHaltParser:
12541 * @ctxt: an XML parser context
12542 *
12543 * Blocks further parser processing don't override error
12544 * for internal use
12545 */
12546static void
12547xmlHaltParser(xmlParserCtxtPtr ctxt) {
12548 if (ctxt == NULL)
12549 return;
12550 ctxt->instate = XML_PARSER_EOF;
12551 ctxt->disableSAX = 1;
12552 while (ctxt->inputNr > 1)
12553 xmlFreeInputStream(inputPop(ctxt));
12554 if (ctxt->input != NULL) {
12555 /*
12556 * in case there was a specific allocation deallocate before
12557 * overriding base
12558 */
12559 if (ctxt->input->free != NULL) {
12560 ctxt->input->free((xmlChar *) ctxt->input->base);
12561 ctxt->input->free = NULL;
12562 }
12563 if (ctxt->input->buf != NULL) {
12564 xmlFreeParserInputBuffer(ctxt->input->buf);
12565 ctxt->input->buf = NULL;
12566 }
12567 ctxt->input->cur = BAD_CAST"";
12568 ctxt->input->length = 0;
12569 ctxt->input->base = ctxt->input->cur;
12570 ctxt->input->end = ctxt->input->cur;
12571 }
12572}
12573
12574/**
12575 * xmlStopParser:
12576 * @ctxt: an XML parser context
12577 *
12578 * Blocks further parser processing
12579 */
12580void
12581xmlStopParser(xmlParserCtxtPtr ctxt) {
12582 if (ctxt == NULL)
12583 return;
12584 xmlHaltParser(ctxt);
12585 ctxt->errNo = XML_ERR_USER_STOP;
12586}
12587
12588/**
12589 * xmlCreateIOParserCtxt:
12590 * @sax: a SAX handler
12591 * @user_data: The user data returned on SAX callbacks
12592 * @ioread: an I/O read function
12593 * @ioclose: an I/O close function
12594 * @ioctx: an I/O handler
12595 * @enc: the charset encoding if known
12596 *
12597 * Create a parser context for using the XML parser with an existing
12598 * I/O stream
12599 *
12600 * Returns the new parser context or NULL
12601 */
12602xmlParserCtxtPtr
12603xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
12604 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
12605 void *ioctx, xmlCharEncoding enc) {
12606 xmlParserCtxtPtr ctxt;
12607 xmlParserInputPtr inputStream;
12608 xmlParserInputBufferPtr buf;
12609
12610 if (ioread == NULL) return(NULL);
12611
12612 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
12613 if (buf == NULL) {
12614 if (ioclose != NULL)
12615 ioclose(ioctx);
12616 return (NULL);
12617 }
12618
12619 ctxt = xmlNewParserCtxt();
12620 if (ctxt == NULL) {
12621 xmlFreeParserInputBuffer(buf);
12622 return(NULL);
12623 }
12624 if (sax != NULL) {
12625#ifdef LIBXML_SAX1_ENABLED
12626 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
12627#endif /* LIBXML_SAX1_ENABLED */
12628 xmlFree(ctxt->sax);
12629 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
12630 if (ctxt->sax == NULL) {
12631 xmlFreeParserInputBuffer(buf);
12632 xmlErrMemory(ctxt, NULL);
12633 xmlFreeParserCtxt(ctxt);
12634 return(NULL);
12635 }
12636 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
12637 if (sax->initialized == XML_SAX2_MAGIC)
12638 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
12639 else
12640 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
12641 if (user_data != NULL)
12642 ctxt->userData = user_data;
12643 }
12644
12645 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
12646 if (inputStream == NULL) {
12647 xmlFreeParserCtxt(ctxt);
12648 return(NULL);
12649 }
12650 inputPush(ctxt, inputStream);
12651
12652 return(ctxt);
12653}
12654
12655#ifdef LIBXML_VALID_ENABLED
12656/************************************************************************
12657 * *
12658 * Front ends when parsing a DTD *
12659 * *
12660 ************************************************************************/
12661
12662/**
12663 * xmlIOParseDTD:
12664 * @sax: the SAX handler block or NULL
12665 * @input: an Input Buffer
12666 * @enc: the charset encoding if known
12667 *
12668 * Load and parse a DTD
12669 *
12670 * Returns the resulting xmlDtdPtr or NULL in case of error.
12671 * @input will be freed by the function in any case.
12672 */
12673
12674xmlDtdPtr
12675xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
12676 xmlCharEncoding enc) {
12677 xmlDtdPtr ret = NULL;
12678 xmlParserCtxtPtr ctxt;
12679 xmlParserInputPtr pinput = NULL;
12680 xmlChar start[4];
12681
12682 if (input == NULL)
12683 return(NULL);
12684
12685 ctxt = xmlNewParserCtxt();
12686 if (ctxt == NULL) {
12687 xmlFreeParserInputBuffer(input);
12688 return(NULL);
12689 }
12690
12691 /* We are loading a DTD */
12692 ctxt->options |= XML_PARSE_DTDLOAD;
12693
12694 /*
12695 * Set-up the SAX context
12696 */
12697 if (sax != NULL) {
12698 if (ctxt->sax != NULL)
12699 xmlFree(ctxt->sax);
12700 ctxt->sax = sax;
12701 ctxt->userData = ctxt;
12702 }
12703 xmlDetectSAX2(ctxt);
12704
12705 /*
12706 * generate a parser input from the I/O handler
12707 */
12708
12709 pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12710 if (pinput == NULL) {
12711 if (sax != NULL) ctxt->sax = NULL;
12712 xmlFreeParserInputBuffer(input);
12713 xmlFreeParserCtxt(ctxt);
12714 return(NULL);
12715 }
12716
12717 /*
12718 * plug some encoding conversion routines here.
12719 */
12720 if (xmlPushInput(ctxt, pinput) < 0) {
12721 if (sax != NULL) ctxt->sax = NULL;
12722 xmlFreeParserCtxt(ctxt);
12723 return(NULL);
12724 }
12725 if (enc != XML_CHAR_ENCODING_NONE) {
12726 xmlSwitchEncoding(ctxt, enc);
12727 }
12728
12729 pinput->filename = NULL;
12730 pinput->line = 1;
12731 pinput->col = 1;
12732 pinput->base = ctxt->input->cur;
12733 pinput->cur = ctxt->input->cur;
12734 pinput->free = NULL;
12735
12736 /*
12737 * let's parse that entity knowing it's an external subset.
12738 */
12739 ctxt->inSubset = 2;
12740 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
12741 if (ctxt->myDoc == NULL) {
12742 xmlErrMemory(ctxt, "New Doc failed");
12743 return(NULL);
12744 }
12745 ctxt->myDoc->properties = XML_DOC_INTERNAL;
12746 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12747 BAD_CAST "none", BAD_CAST "none");
12748
12749 if ((enc == XML_CHAR_ENCODING_NONE) &&
12750 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
12751 /*
12752 * Get the 4 first bytes and decode the charset
12753 * if enc != XML_CHAR_ENCODING_NONE
12754 * plug some encoding conversion routines.
12755 */
12756 start[0] = RAW;
12757 start[1] = NXT(1);
12758 start[2] = NXT(2);
12759 start[3] = NXT(3);
12760 enc = xmlDetectCharEncoding(start, 4);
12761 if (enc != XML_CHAR_ENCODING_NONE) {
12762 xmlSwitchEncoding(ctxt, enc);
12763 }
12764 }
12765
12766 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
12767
12768 if (ctxt->myDoc != NULL) {
12769 if (ctxt->wellFormed) {
12770 ret = ctxt->myDoc->extSubset;
12771 ctxt->myDoc->extSubset = NULL;
12772 if (ret != NULL) {
12773 xmlNodePtr tmp;
12774
12775 ret->doc = NULL;
12776 tmp = ret->children;
12777 while (tmp != NULL) {
12778 tmp->doc = NULL;
12779 tmp = tmp->next;
12780 }
12781 }
12782 } else {
12783 ret = NULL;
12784 }
12785 xmlFreeDoc(ctxt->myDoc);
12786 ctxt->myDoc = NULL;
12787 }
12788 if (sax != NULL) ctxt->sax = NULL;
12789 xmlFreeParserCtxt(ctxt);
12790
12791 return(ret);
12792}
12793
12794/**
12795 * xmlSAXParseDTD:
12796 * @sax: the SAX handler block
12797 * @ExternalID: a NAME* containing the External ID of the DTD
12798 * @SystemID: a NAME* containing the URL to the DTD
12799 *
12800 * Load and parse an external subset.
12801 *
12802 * Returns the resulting xmlDtdPtr or NULL in case of error.
12803 */
12804
12805xmlDtdPtr
12806xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
12807 const xmlChar *SystemID) {
12808 xmlDtdPtr ret = NULL;
12809 xmlParserCtxtPtr ctxt;
12810 xmlParserInputPtr input = NULL;
12811 xmlCharEncoding enc;
12812 xmlChar* systemIdCanonic;
12813
12814 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
12815
12816 ctxt = xmlNewParserCtxt();
12817 if (ctxt == NULL) {
12818 return(NULL);
12819 }
12820
12821 /* We are loading a DTD */
12822 ctxt->options |= XML_PARSE_DTDLOAD;
12823
12824 /*
12825 * Set-up the SAX context
12826 */
12827 if (sax != NULL) {
12828 if (ctxt->sax != NULL)
12829 xmlFree(ctxt->sax);
12830 ctxt->sax = sax;
12831 ctxt->userData = ctxt;
12832 }
12833
12834 /*
12835 * Canonicalise the system ID
12836 */
12837 systemIdCanonic = xmlCanonicPath(SystemID);
12838 if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
12839 xmlFreeParserCtxt(ctxt);
12840 return(NULL);
12841 }
12842
12843 /*
12844 * Ask the Entity resolver to load the damn thing
12845 */
12846
12847 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
12848 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
12849 systemIdCanonic);
12850 if (input == NULL) {
12851 if (sax != NULL) ctxt->sax = NULL;
12852 xmlFreeParserCtxt(ctxt);
12853 if (systemIdCanonic != NULL)
12854 xmlFree(systemIdCanonic);
12855 return(NULL);
12856 }
12857
12858 /*
12859 * plug some encoding conversion routines here.
12860 */
12861 if (xmlPushInput(ctxt, input) < 0) {
12862 if (sax != NULL) ctxt->sax = NULL;
12863 xmlFreeParserCtxt(ctxt);
12864 if (systemIdCanonic != NULL)
12865 xmlFree(systemIdCanonic);
12866 return(NULL);
12867 }
12868 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12869 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
12870 xmlSwitchEncoding(ctxt, enc);
12871 }
12872
12873 if (input->filename == NULL)
12874 input->filename = (char *) systemIdCanonic;
12875 else
12876 xmlFree(systemIdCanonic);
12877 input->line = 1;
12878 input->col = 1;
12879 input->base = ctxt->input->cur;
12880 input->cur = ctxt->input->cur;
12881 input->free = NULL;
12882
12883 /*
12884 * let's parse that entity knowing it's an external subset.
12885 */
12886 ctxt->inSubset = 2;
12887 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
12888 if (ctxt->myDoc == NULL) {
12889 xmlErrMemory(ctxt, "New Doc failed");
12890 if (sax != NULL) ctxt->sax = NULL;
12891 xmlFreeParserCtxt(ctxt);
12892 return(NULL);
12893 }
12894 ctxt->myDoc->properties = XML_DOC_INTERNAL;
12895 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12896 ExternalID, SystemID);
12897 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
12898
12899 if (ctxt->myDoc != NULL) {
12900 if (ctxt->wellFormed) {
12901 ret = ctxt->myDoc->extSubset;
12902 ctxt->myDoc->extSubset = NULL;
12903 if (ret != NULL) {
12904 xmlNodePtr tmp;
12905
12906 ret->doc = NULL;
12907 tmp = ret->children;
12908 while (tmp != NULL) {
12909 tmp->doc = NULL;
12910 tmp = tmp->next;
12911 }
12912 }
12913 } else {
12914 ret = NULL;
12915 }
12916 xmlFreeDoc(ctxt->myDoc);
12917 ctxt->myDoc = NULL;
12918 }
12919 if (sax != NULL) ctxt->sax = NULL;
12920 xmlFreeParserCtxt(ctxt);
12921
12922 return(ret);
12923}
12924
12925
12926/**
12927 * xmlParseDTD:
12928 * @ExternalID: a NAME* containing the External ID of the DTD
12929 * @SystemID: a NAME* containing the URL to the DTD
12930 *
12931 * Load and parse an external subset.
12932 *
12933 * Returns the resulting xmlDtdPtr or NULL in case of error.
12934 */
12935
12936xmlDtdPtr
12937xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
12938 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
12939}
12940#endif /* LIBXML_VALID_ENABLED */
12941
12942/************************************************************************
12943 * *
12944 * Front ends when parsing an Entity *
12945 * *
12946 ************************************************************************/
12947
12948/**
12949 * xmlParseCtxtExternalEntity:
12950 * @ctx: the existing parsing context
12951 * @URL: the URL for the entity to load
12952 * @ID: the System ID for the entity to load
12953 * @lst: the return value for the set of parsed nodes
12954 *
12955 * Parse an external general entity within an existing parsing context
12956 * An external general parsed entity is well-formed if it matches the
12957 * production labeled extParsedEnt.
12958 *
12959 * [78] extParsedEnt ::= TextDecl? content
12960 *
12961 * Returns 0 if the entity is well formed, -1 in case of args problem and
12962 * the parser error code otherwise
12963 */
12964
12965int
12966xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
12967 const xmlChar *ID, xmlNodePtr *lst) {
12968 void *userData;
12969
12970 if (ctx == NULL) return(-1);
12971 /*
12972 * If the user provided their own SAX callbacks, then reuse the
12973 * userData callback field, otherwise the expected setup in a
12974 * DOM builder is to have userData == ctxt
12975 */
12976 if (ctx->userData == ctx)
12977 userData = NULL;
12978 else
12979 userData = ctx->userData;
12980 return xmlParseExternalEntityPrivate(ctx->myDoc, ctx, ctx->sax,
12981 userData, ctx->depth + 1,
12982 URL, ID, lst);
12983}
12984
12985/**
12986 * xmlParseExternalEntityPrivate:
12987 * @doc: the document the chunk pertains to
12988 * @oldctxt: the previous parser context if available
12989 * @sax: the SAX handler block (possibly NULL)
12990 * @user_data: The user data returned on SAX callbacks (possibly NULL)
12991 * @depth: Used for loop detection, use 0
12992 * @URL: the URL for the entity to load
12993 * @ID: the System ID for the entity to load
12994 * @list: the return value for the set of parsed nodes
12995 *
12996 * Private version of xmlParseExternalEntity()
12997 *
12998 * Returns 0 if the entity is well formed, -1 in case of args problem and
12999 * the parser error code otherwise
13000 */
13001
13002static xmlParserErrors
13003xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
13004 xmlSAXHandlerPtr sax,
13005 void *user_data, int depth, const xmlChar *URL,
13006 const xmlChar *ID, xmlNodePtr *list) {
13007 xmlParserCtxtPtr ctxt;
13008 xmlDocPtr newDoc;
13009 xmlNodePtr newRoot;
13010 xmlSAXHandlerPtr oldsax = NULL;
13011 xmlParserErrors ret = XML_ERR_OK;
13012 xmlChar start[4];
13013 xmlCharEncoding enc;
13014
13015 if (((depth > 40) &&
13016 ((oldctxt == NULL) || (oldctxt->options & XML_PARSE_HUGE) == 0)) ||
13017 (depth > 1024)) {
13018 return(XML_ERR_ENTITY_LOOP);
13019 }
13020
13021 if (list != NULL)
13022 *list = NULL;
13023 if ((URL == NULL) && (ID == NULL))
13024 return(XML_ERR_INTERNAL_ERROR);
13025 if (doc == NULL)
13026 return(XML_ERR_INTERNAL_ERROR);
13027
13028
13029 ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, oldctxt);
13030 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
13031 ctxt->userData = ctxt;
13032 if (sax != NULL) {
13033 oldsax = ctxt->sax;
13034 ctxt->sax = sax;
13035 if (user_data != NULL)
13036 ctxt->userData = user_data;
13037 }
13038 xmlDetectSAX2(ctxt);
13039 newDoc = xmlNewDoc(BAD_CAST "1.0");
13040 if (newDoc == NULL) {
13041 xmlFreeParserCtxt(ctxt);
13042 return(XML_ERR_INTERNAL_ERROR);
13043 }
13044 newDoc->properties = XML_DOC_INTERNAL;
13045 if (doc) {
13046 newDoc->intSubset = doc->intSubset;
13047 newDoc->extSubset = doc->extSubset;
13048 if (doc->dict) {
13049 newDoc->dict = doc->dict;
13050 xmlDictReference(newDoc->dict);
13051 }
13052 if (doc->URL != NULL) {
13053 newDoc->URL = xmlStrdup(doc->URL);
13054 }
13055 }
13056 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13057 if (newRoot == NULL) {
13058 if (sax != NULL)
13059 ctxt->sax = oldsax;
13060 xmlFreeParserCtxt(ctxt);
13061 newDoc->intSubset = NULL;
13062 newDoc->extSubset = NULL;
13063 xmlFreeDoc(newDoc);
13064 return(XML_ERR_INTERNAL_ERROR);
13065 }
13066 xmlAddChild((xmlNodePtr) newDoc, newRoot);
13067 nodePush(ctxt, newDoc->children);
13068 if (doc == NULL) {
13069 ctxt->myDoc = newDoc;
13070 } else {
13071 ctxt->myDoc = doc;
13072 newRoot->doc = doc;
13073 }
13074
13075 /*
13076 * Get the 4 first bytes and decode the charset
13077 * if enc != XML_CHAR_ENCODING_NONE
13078 * plug some encoding conversion routines.
13079 */
13080 GROW;
13081 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
13082 start[0] = RAW;
13083 start[1] = NXT(1);
13084 start[2] = NXT(2);
13085 start[3] = NXT(3);
13086 enc = xmlDetectCharEncoding(start, 4);
13087 if (enc != XML_CHAR_ENCODING_NONE) {
13088 xmlSwitchEncoding(ctxt, enc);
13089 }
13090 }
13091
13092 /*
13093 * Parse a possible text declaration first
13094 */
13095 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
13096 xmlParseTextDecl(ctxt);
13097 /*
13098 * An XML-1.0 document can't reference an entity not XML-1.0
13099 */
13100 if ((xmlStrEqual(oldctxt->version, BAD_CAST "1.0")) &&
13101 (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
13102 xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
13103 "Version mismatch between document and entity\n");
13104 }
13105 }
13106
13107 ctxt->instate = XML_PARSER_CONTENT;
13108 ctxt->depth = depth;
13109 if (oldctxt != NULL) {
13110 ctxt->_private = oldctxt->_private;
13111 ctxt->loadsubset = oldctxt->loadsubset;
13112 ctxt->validate = oldctxt->validate;
13113 ctxt->valid = oldctxt->valid;
13114 ctxt->replaceEntities = oldctxt->replaceEntities;
13115 if (oldctxt->validate) {
13116 ctxt->vctxt.error = oldctxt->vctxt.error;
13117 ctxt->vctxt.warning = oldctxt->vctxt.warning;
13118 ctxt->vctxt.userData = oldctxt->vctxt.userData;
13119 }
13120 ctxt->external = oldctxt->external;
13121 if (ctxt->dict) xmlDictFree(ctxt->dict);
13122 ctxt->dict = oldctxt->dict;
13123 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13124 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13125 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13126 ctxt->dictNames = oldctxt->dictNames;
13127 ctxt->attsDefault = oldctxt->attsDefault;
13128 ctxt->attsSpecial = oldctxt->attsSpecial;
13129 ctxt->linenumbers = oldctxt->linenumbers;
13130 ctxt->record_info = oldctxt->record_info;
13131 ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
13132 ctxt->node_seq.length = oldctxt->node_seq.length;
13133 ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
13134 } else {
13135 /*
13136 * Doing validity checking on chunk without context
13137 * doesn't make sense
13138 */
13139 ctxt->_private = NULL;
13140 ctxt->validate = 0;
13141 ctxt->external = 2;
13142 ctxt->loadsubset = 0;
13143 }
13144
13145 xmlParseContent(ctxt);
13146
13147 if ((RAW == '<') && (NXT(1) == '/')) {
13148 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13149 } else if (RAW != 0) {
13150 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13151 }
13152 if (ctxt->node != newDoc->children) {
13153 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13154 }
13155
13156 if (!ctxt->wellFormed) {
13157 if (ctxt->errNo == 0)
13158 ret = XML_ERR_INTERNAL_ERROR;
13159 else
13160 ret = (xmlParserErrors)ctxt->errNo;
13161 } else {
13162 if (list != NULL) {
13163 xmlNodePtr cur;
13164
13165 /*
13166 * Return the newly created nodeset after unlinking it from
13167 * they pseudo parent.
13168 */
13169 cur = newDoc->children->children;
13170 *list = cur;
13171 while (cur != NULL) {
13172 cur->parent = NULL;
13173 cur = cur->next;
13174 }
13175 newDoc->children->children = NULL;
13176 }
13177 ret = XML_ERR_OK;
13178 }
13179
13180 /*
13181 * Record in the parent context the number of entities replacement
13182 * done when parsing that reference.
13183 */
13184 if (oldctxt != NULL)
13185 oldctxt->nbentities += ctxt->nbentities;
13186
13187 /*
13188 * Also record the size of the entity parsed
13189 */
13190 if (ctxt->input != NULL && oldctxt != NULL) {
13191 oldctxt->sizeentities += ctxt->input->consumed;
13192 oldctxt->sizeentities += (ctxt->input->cur - ctxt->input->base);
13193 }
13194 /*
13195 * And record the last error if any
13196 */
13197 if ((oldctxt != NULL) && (ctxt->lastError.code != XML_ERR_OK))
13198 xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
13199
13200 if (sax != NULL)
13201 ctxt->sax = oldsax;
13202 if (oldctxt != NULL) {
13203 ctxt->dict = NULL;
13204 ctxt->attsDefault = NULL;
13205 ctxt->attsSpecial = NULL;
13206 oldctxt->validate = ctxt->validate;
13207 oldctxt->valid = ctxt->valid;
13208 oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
13209 oldctxt->node_seq.length = ctxt->node_seq.length;
13210 oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
13211 }
13212 ctxt->node_seq.maximum = 0;
13213 ctxt->node_seq.length = 0;
13214 ctxt->node_seq.buffer = NULL;
13215 xmlFreeParserCtxt(ctxt);
13216 newDoc->intSubset = NULL;
13217 newDoc->extSubset = NULL;
13218 xmlFreeDoc(newDoc);
13219
13220 return(ret);
13221}
13222
13223#ifdef LIBXML_SAX1_ENABLED
13224/**
13225 * xmlParseExternalEntity:
13226 * @doc: the document the chunk pertains to
13227 * @sax: the SAX handler block (possibly NULL)
13228 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13229 * @depth: Used for loop detection, use 0
13230 * @URL: the URL for the entity to load
13231 * @ID: the System ID for the entity to load
13232 * @lst: the return value for the set of parsed nodes
13233 *
13234 * Parse an external general entity
13235 * An external general parsed entity is well-formed if it matches the
13236 * production labeled extParsedEnt.
13237 *
13238 * [78] extParsedEnt ::= TextDecl? content
13239 *
13240 * Returns 0 if the entity is well formed, -1 in case of args problem and
13241 * the parser error code otherwise
13242 */
13243
13244int
13245xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
13246 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
13247 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
13248 ID, lst));
13249}
13250
13251/**
13252 * xmlParseBalancedChunkMemory:
13253 * @doc: the document the chunk pertains to (must not be NULL)
13254 * @sax: the SAX handler block (possibly NULL)
13255 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13256 * @depth: Used for loop detection, use 0
13257 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
13258 * @lst: the return value for the set of parsed nodes
13259 *
13260 * Parse a well-balanced chunk of an XML document
13261 * called by the parser
13262 * The allowed sequence for the Well Balanced Chunk is the one defined by
13263 * the content production in the XML grammar:
13264 *
13265 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13266 *
13267 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13268 * the parser error code otherwise
13269 */
13270
13271int
13272xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
13273 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
13274 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
13275 depth, string, lst, 0 );
13276}
13277#endif /* LIBXML_SAX1_ENABLED */
13278
13279/**
13280 * xmlParseBalancedChunkMemoryInternal:
13281 * @oldctxt: the existing parsing context
13282 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
13283 * @user_data: the user data field for the parser context
13284 * @lst: the return value for the set of parsed nodes
13285 *
13286 *
13287 * Parse a well-balanced chunk of an XML document
13288 * called by the parser
13289 * The allowed sequence for the Well Balanced Chunk is the one defined by
13290 * the content production in the XML grammar:
13291 *
13292 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13293 *
13294 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13295 * error code otherwise
13296 *
13297 * In case recover is set to 1, the nodelist will not be empty even if
13298 * the parsed chunk is not well balanced.
13299 */
13300static xmlParserErrors
13301xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
13302 const xmlChar *string, void *user_data, xmlNodePtr *lst) {
13303 xmlParserCtxtPtr ctxt;
13304 xmlDocPtr newDoc = NULL;
13305 xmlNodePtr newRoot;
13306 xmlSAXHandlerPtr oldsax = NULL;
13307 xmlNodePtr content = NULL;
13308 xmlNodePtr last = NULL;
13309 int size;
13310 xmlParserErrors ret = XML_ERR_OK;
13311#ifdef SAX2
13312 int i;
13313#endif
13314
13315 if (((oldctxt->depth > 40) && ((oldctxt->options & XML_PARSE_HUGE) == 0)) ||
13316 (oldctxt->depth > 1024)) {
13317 return(XML_ERR_ENTITY_LOOP);
13318 }
13319
13320
13321 if (lst != NULL)
13322 *lst = NULL;
13323 if (string == NULL)
13324 return(XML_ERR_INTERNAL_ERROR);
13325
13326 size = xmlStrlen(string);
13327
13328 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
13329 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
13330 if (user_data != NULL)
13331 ctxt->userData = user_data;
13332 else
13333 ctxt->userData = ctxt;
13334 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
13335 ctxt->dict = oldctxt->dict;
13336 ctxt->input_id = oldctxt->input_id + 1;
13337 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13338 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13339 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13340
13341#ifdef SAX2
13342 /* propagate namespaces down the entity */
13343 for (i = 0;i < oldctxt->nsNr;i += 2) {
13344 nsPush(ctxt, oldctxt->nsTab[i], oldctxt->nsTab[i+1]);
13345 }
13346#endif
13347
13348 oldsax = ctxt->sax;
13349 ctxt->sax = oldctxt->sax;
13350 xmlDetectSAX2(ctxt);
13351 ctxt->replaceEntities = oldctxt->replaceEntities;
13352 ctxt->options = oldctxt->options;
13353
13354 ctxt->_private = oldctxt->_private;
13355 if (oldctxt->myDoc == NULL) {
13356 newDoc = xmlNewDoc(BAD_CAST "1.0");
13357 if (newDoc == NULL) {
13358 ctxt->sax = oldsax;
13359 ctxt->dict = NULL;
13360 xmlFreeParserCtxt(ctxt);
13361 return(XML_ERR_INTERNAL_ERROR);
13362 }
13363 newDoc->properties = XML_DOC_INTERNAL;
13364 newDoc->dict = ctxt->dict;
13365 xmlDictReference(newDoc->dict);
13366 ctxt->myDoc = newDoc;
13367 } else {
13368 ctxt->myDoc = oldctxt->myDoc;
13369 content = ctxt->myDoc->children;
13370 last = ctxt->myDoc->last;
13371 }
13372 newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL);
13373 if (newRoot == NULL) {
13374 ctxt->sax = oldsax;
13375 ctxt->dict = NULL;
13376 xmlFreeParserCtxt(ctxt);
13377 if (newDoc != NULL) {
13378 xmlFreeDoc(newDoc);
13379 }
13380 return(XML_ERR_INTERNAL_ERROR);
13381 }
13382 ctxt->myDoc->children = NULL;
13383 ctxt->myDoc->last = NULL;
13384 xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot);
13385 nodePush(ctxt, ctxt->myDoc->children);
13386 ctxt->instate = XML_PARSER_CONTENT;
13387 ctxt->depth = oldctxt->depth + 1;
13388
13389 ctxt->validate = 0;
13390 ctxt->loadsubset = oldctxt->loadsubset;
13391 if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
13392 /*
13393 * ID/IDREF registration will be done in xmlValidateElement below
13394 */
13395 ctxt->loadsubset |= XML_SKIP_IDS;
13396 }
13397 ctxt->dictNames = oldctxt->dictNames;
13398 ctxt->attsDefault = oldctxt->attsDefault;
13399 ctxt->attsSpecial = oldctxt->attsSpecial;
13400
13401 xmlParseContent(ctxt);
13402 if ((RAW == '<') && (NXT(1) == '/')) {
13403 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13404 } else if (RAW != 0) {
13405 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13406 }
13407 if (ctxt->node != ctxt->myDoc->children) {
13408 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13409 }
13410
13411 if (!ctxt->wellFormed) {
13412 if (ctxt->errNo == 0)
13413 ret = XML_ERR_INTERNAL_ERROR;
13414 else
13415 ret = (xmlParserErrors)ctxt->errNo;
13416 } else {
13417 ret = XML_ERR_OK;
13418 }
13419
13420 if ((lst != NULL) && (ret == XML_ERR_OK)) {
13421 xmlNodePtr cur;
13422
13423 /*
13424 * Return the newly created nodeset after unlinking it from
13425 * they pseudo parent.
13426 */
13427 cur = ctxt->myDoc->children->children;
13428 *lst = cur;
13429 while (cur != NULL) {
13430#ifdef LIBXML_VALID_ENABLED
13431 if ((oldctxt->validate) && (oldctxt->wellFormed) &&
13432 (oldctxt->myDoc) && (oldctxt->myDoc->intSubset) &&
13433 (cur->type == XML_ELEMENT_NODE)) {
13434 oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
13435 oldctxt->myDoc, cur);
13436 }
13437#endif /* LIBXML_VALID_ENABLED */
13438 cur->parent = NULL;
13439 cur = cur->next;
13440 }
13441 ctxt->myDoc->children->children = NULL;
13442 }
13443 if (ctxt->myDoc != NULL) {
13444 xmlFreeNode(ctxt->myDoc->children);
13445 ctxt->myDoc->children = content;
13446 ctxt->myDoc->last = last;
13447 }
13448
13449 /*
13450 * Record in the parent context the number of entities replacement
13451 * done when parsing that reference.
13452 */
13453 if (oldctxt != NULL)
13454 oldctxt->nbentities += ctxt->nbentities;
13455
13456 /*
13457 * Also record the last error if any
13458 */
13459 if (ctxt->lastError.code != XML_ERR_OK)
13460 xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
13461
13462 ctxt->sax = oldsax;
13463 ctxt->dict = NULL;
13464 ctxt->attsDefault = NULL;
13465 ctxt->attsSpecial = NULL;
13466 xmlFreeParserCtxt(ctxt);
13467 if (newDoc != NULL) {
13468 xmlFreeDoc(newDoc);
13469 }
13470
13471 return(ret);
13472}
13473
13474/**
13475 * xmlParseInNodeContext:
13476 * @node: the context node
13477 * @data: the input string
13478 * @datalen: the input string length in bytes
13479 * @options: a combination of xmlParserOption
13480 * @lst: the return value for the set of parsed nodes
13481 *
13482 * Parse a well-balanced chunk of an XML document
13483 * within the context (DTD, namespaces, etc ...) of the given node.
13484 *
13485 * The allowed sequence for the data is a Well Balanced Chunk defined by
13486 * the content production in the XML grammar:
13487 *
13488 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13489 *
13490 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13491 * error code otherwise
13492 */
13493xmlParserErrors
13494xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
13495 int options, xmlNodePtr *lst) {
13496#ifdef SAX2
13497 xmlParserCtxtPtr ctxt;
13498 xmlDocPtr doc = NULL;
13499 xmlNodePtr fake, cur;
13500 int nsnr = 0;
13501
13502 xmlParserErrors ret = XML_ERR_OK;
13503
13504 /*
13505 * check all input parameters, grab the document
13506 */
13507 if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
13508 return(XML_ERR_INTERNAL_ERROR);
13509 switch (node->type) {
13510 case XML_ELEMENT_NODE:
13511 case XML_ATTRIBUTE_NODE:
13512 case XML_TEXT_NODE:
13513 case XML_CDATA_SECTION_NODE:
13514 case XML_ENTITY_REF_NODE:
13515 case XML_PI_NODE:
13516 case XML_COMMENT_NODE:
13517 case XML_DOCUMENT_NODE:
13518 case XML_HTML_DOCUMENT_NODE:
13519 break;
13520 default:
13521 return(XML_ERR_INTERNAL_ERROR);
13522
13523 }
13524 while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
13525 (node->type != XML_DOCUMENT_NODE) &&
13526 (node->type != XML_HTML_DOCUMENT_NODE))
13527 node = node->parent;
13528 if (node == NULL)
13529 return(XML_ERR_INTERNAL_ERROR);
13530 if (node->type == XML_ELEMENT_NODE)
13531 doc = node->doc;
13532 else
13533 doc = (xmlDocPtr) node;
13534 if (doc == NULL)
13535 return(XML_ERR_INTERNAL_ERROR);
13536
13537 /*
13538 * allocate a context and set-up everything not related to the
13539 * node position in the tree
13540 */
13541 if (doc->type == XML_DOCUMENT_NODE)
13542 ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
13543#ifdef LIBXML_HTML_ENABLED
13544 else if (doc->type == XML_HTML_DOCUMENT_NODE) {
13545 ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
13546 /*
13547 * When parsing in context, it makes no sense to add implied
13548 * elements like html/body/etc...
13549 */
13550 options |= HTML_PARSE_NOIMPLIED;
13551 }
13552#endif
13553 else
13554 return(XML_ERR_INTERNAL_ERROR);
13555
13556 if (ctxt == NULL)
13557 return(XML_ERR_NO_MEMORY);
13558
13559 /*
13560 * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
13561 * We need a dictionary for xmlDetectSAX2, so if there's no doc dict
13562 * we must wait until the last moment to free the original one.
13563 */
13564 if (doc->dict != NULL) {
13565 if (ctxt->dict != NULL)
13566 xmlDictFree(ctxt->dict);
13567 ctxt->dict = doc->dict;
13568 } else
13569 options |= XML_PARSE_NODICT;
13570
13571 if (doc->encoding != NULL) {
13572 xmlCharEncodingHandlerPtr hdlr;
13573
13574 if (ctxt->encoding != NULL)
13575 xmlFree((xmlChar *) ctxt->encoding);
13576 ctxt->encoding = xmlStrdup((const xmlChar *) doc->encoding);
13577
13578 hdlr = xmlFindCharEncodingHandler((const char *) doc->encoding);
13579 if (hdlr != NULL) {
13580 xmlSwitchToEncoding(ctxt, hdlr);
13581 } else {
13582 return(XML_ERR_UNSUPPORTED_ENCODING);
13583 }
13584 }
13585
13586 xmlCtxtUseOptionsInternal(ctxt, options, NULL);
13587 xmlDetectSAX2(ctxt);
13588 ctxt->myDoc = doc;
13589 /* parsing in context, i.e. as within existing content */
13590 ctxt->input_id = 2;
13591 ctxt->instate = XML_PARSER_CONTENT;
13592
13593 fake = xmlNewComment(NULL);
13594 if (fake == NULL) {
13595 xmlFreeParserCtxt(ctxt);
13596 return(XML_ERR_NO_MEMORY);
13597 }
13598 xmlAddChild(node, fake);
13599
13600 if (node->type == XML_ELEMENT_NODE) {
13601 nodePush(ctxt, node);
13602 /*
13603 * initialize the SAX2 namespaces stack
13604 */
13605 cur = node;
13606 while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
13607 xmlNsPtr ns = cur->nsDef;
13608 const xmlChar *iprefix, *ihref;
13609
13610 while (ns != NULL) {
13611 if (ctxt->dict) {
13612 iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1);
13613 ihref = xmlDictLookup(ctxt->dict, ns->href, -1);
13614 } else {
13615 iprefix = ns->prefix;
13616 ihref = ns->href;
13617 }
13618
13619 if (xmlGetNamespace(ctxt, iprefix) == NULL) {
13620 nsPush(ctxt, iprefix, ihref);
13621 nsnr++;
13622 }
13623 ns = ns->next;
13624 }
13625 cur = cur->parent;
13626 }
13627 }
13628
13629 if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
13630 /*
13631 * ID/IDREF registration will be done in xmlValidateElement below
13632 */
13633 ctxt->loadsubset |= XML_SKIP_IDS;
13634 }
13635
13636#ifdef LIBXML_HTML_ENABLED
13637 if (doc->type == XML_HTML_DOCUMENT_NODE)
13638 __htmlParseContent(ctxt);
13639 else
13640#endif
13641 xmlParseContent(ctxt);
13642
13643 nsPop(ctxt, nsnr);
13644 if ((RAW == '<') && (NXT(1) == '/')) {
13645 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13646 } else if (RAW != 0) {
13647 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13648 }
13649 if ((ctxt->node != NULL) && (ctxt->node != node)) {
13650 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13651 ctxt->wellFormed = 0;
13652 }
13653
13654 if (!ctxt->wellFormed) {
13655 if (ctxt->errNo == 0)
13656 ret = XML_ERR_INTERNAL_ERROR;
13657 else
13658 ret = (xmlParserErrors)ctxt->errNo;
13659 } else {
13660 ret = XML_ERR_OK;
13661 }
13662
13663 /*
13664 * Return the newly created nodeset after unlinking it from
13665 * the pseudo sibling.
13666 */
13667
13668 cur = fake->next;
13669 fake->next = NULL;
13670 node->last = fake;
13671
13672 if (cur != NULL) {
13673 cur->prev = NULL;
13674 }
13675
13676 *lst = cur;
13677
13678 while (cur != NULL) {
13679 cur->parent = NULL;
13680 cur = cur->next;
13681 }
13682
13683 xmlUnlinkNode(fake);
13684 xmlFreeNode(fake);
13685
13686
13687 if (ret != XML_ERR_OK) {
13688 xmlFreeNodeList(*lst);
13689 *lst = NULL;
13690 }
13691
13692 if (doc->dict != NULL)
13693 ctxt->dict = NULL;
13694 xmlFreeParserCtxt(ctxt);
13695
13696 return(ret);
13697#else /* !SAX2 */
13698 return(XML_ERR_INTERNAL_ERROR);
13699#endif
13700}
13701
13702#ifdef LIBXML_SAX1_ENABLED
13703/**
13704 * xmlParseBalancedChunkMemoryRecover:
13705 * @doc: the document the chunk pertains to (must not be NULL)
13706 * @sax: the SAX handler block (possibly NULL)
13707 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13708 * @depth: Used for loop detection, use 0
13709 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
13710 * @lst: the return value for the set of parsed nodes
13711 * @recover: return nodes even if the data is broken (use 0)
13712 *
13713 *
13714 * Parse a well-balanced chunk of an XML document
13715 * called by the parser
13716 * The allowed sequence for the Well Balanced Chunk is the one defined by
13717 * the content production in the XML grammar:
13718 *
13719 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13720 *
13721 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13722 * the parser error code otherwise
13723 *
13724 * In case recover is set to 1, the nodelist will not be empty even if
13725 * the parsed chunk is not well balanced, assuming the parsing succeeded to
13726 * some extent.
13727 */
13728int
13729xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
13730 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
13731 int recover) {
13732 xmlParserCtxtPtr ctxt;
13733 xmlDocPtr newDoc;
13734 xmlSAXHandlerPtr oldsax = NULL;
13735 xmlNodePtr content, newRoot;
13736 int size;
13737 int ret = 0;
13738
13739 if (depth > 40) {
13740 return(XML_ERR_ENTITY_LOOP);
13741 }
13742
13743
13744 if (lst != NULL)
13745 *lst = NULL;
13746 if (string == NULL)
13747 return(-1);
13748
13749 size = xmlStrlen(string);
13750
13751 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
13752 if (ctxt == NULL) return(-1);
13753 ctxt->userData = ctxt;
13754 if (sax != NULL) {
13755 oldsax = ctxt->sax;
13756 ctxt->sax = sax;
13757 if (user_data != NULL)
13758 ctxt->userData = user_data;
13759 }
13760 newDoc = xmlNewDoc(BAD_CAST "1.0");
13761 if (newDoc == NULL) {
13762 xmlFreeParserCtxt(ctxt);
13763 return(-1);
13764 }
13765 newDoc->properties = XML_DOC_INTERNAL;
13766 if ((doc != NULL) && (doc->dict != NULL)) {
13767 xmlDictFree(ctxt->dict);
13768 ctxt->dict = doc->dict;
13769 xmlDictReference(ctxt->dict);
13770 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13771 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13772 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13773 ctxt->dictNames = 1;
13774 } else {
13775 xmlCtxtUseOptionsInternal(ctxt, XML_PARSE_NODICT, NULL);
13776 }
13777 /* doc == NULL is only supported for historic reasons */
13778 if (doc != NULL) {
13779 newDoc->intSubset = doc->intSubset;
13780 newDoc->extSubset = doc->extSubset;
13781 }
13782 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13783 if (newRoot == NULL) {
13784 if (sax != NULL)
13785 ctxt->sax = oldsax;
13786 xmlFreeParserCtxt(ctxt);
13787 newDoc->intSubset = NULL;
13788 newDoc->extSubset = NULL;
13789 xmlFreeDoc(newDoc);
13790 return(-1);
13791 }
13792 xmlAddChild((xmlNodePtr) newDoc, newRoot);
13793 nodePush(ctxt, newRoot);
13794 /* doc == NULL is only supported for historic reasons */
13795 if (doc == NULL) {
13796 ctxt->myDoc = newDoc;
13797 } else {
13798 ctxt->myDoc = newDoc;
13799 newDoc->children->doc = doc;
13800 /* Ensure that doc has XML spec namespace */
13801 xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE);
13802 newDoc->oldNs = doc->oldNs;
13803 }
13804 ctxt->instate = XML_PARSER_CONTENT;
13805 ctxt->input_id = 2;
13806 ctxt->depth = depth;
13807
13808 /*
13809 * Doing validity checking on chunk doesn't make sense
13810 */
13811 ctxt->validate = 0;
13812 ctxt->loadsubset = 0;
13813 xmlDetectSAX2(ctxt);
13814
13815 if ( doc != NULL ){
13816 content = doc->children;
13817 doc->children = NULL;
13818 xmlParseContent(ctxt);
13819 doc->children = content;
13820 }
13821 else {
13822 xmlParseContent(ctxt);
13823 }
13824 if ((RAW == '<') && (NXT(1) == '/')) {
13825 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13826 } else if (RAW != 0) {
13827 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13828 }
13829 if (ctxt->node != newDoc->children) {
13830 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13831 }
13832
13833 if (!ctxt->wellFormed) {
13834 if (ctxt->errNo == 0)
13835 ret = 1;
13836 else
13837 ret = ctxt->errNo;
13838 } else {
13839 ret = 0;
13840 }
13841
13842 if ((lst != NULL) && ((ret == 0) || (recover == 1))) {
13843 xmlNodePtr cur;
13844
13845 /*
13846 * Return the newly created nodeset after unlinking it from
13847 * they pseudo parent.
13848 */
13849 cur = newDoc->children->children;
13850 *lst = cur;
13851 while (cur != NULL) {
13852 xmlSetTreeDoc(cur, doc);
13853 cur->parent = NULL;
13854 cur = cur->next;
13855 }
13856 newDoc->children->children = NULL;
13857 }
13858
13859 if (sax != NULL)
13860 ctxt->sax = oldsax;
13861 xmlFreeParserCtxt(ctxt);
13862 newDoc->intSubset = NULL;
13863 newDoc->extSubset = NULL;
13864 /* This leaks the namespace list if doc == NULL */
13865 newDoc->oldNs = NULL;
13866 xmlFreeDoc(newDoc);
13867
13868 return(ret);
13869}
13870
13871/**
13872 * xmlSAXParseEntity:
13873 * @sax: the SAX handler block
13874 * @filename: the filename
13875 *
13876 * parse an XML external entity out of context and build a tree.
13877 * It use the given SAX function block to handle the parsing callback.
13878 * If sax is NULL, fallback to the default DOM tree building routines.
13879 *
13880 * [78] extParsedEnt ::= TextDecl? content
13881 *
13882 * This correspond to a "Well Balanced" chunk
13883 *
13884 * Returns the resulting document tree
13885 */
13886
13887xmlDocPtr
13888xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
13889 xmlDocPtr ret;
13890 xmlParserCtxtPtr ctxt;
13891
13892 ctxt = xmlCreateFileParserCtxt(filename);
13893 if (ctxt == NULL) {
13894 return(NULL);
13895 }
13896 if (sax != NULL) {
13897 if (ctxt->sax != NULL)
13898 xmlFree(ctxt->sax);
13899 ctxt->sax = sax;
13900 ctxt->userData = NULL;
13901 }
13902
13903 xmlParseExtParsedEnt(ctxt);
13904
13905 if (ctxt->wellFormed)
13906 ret = ctxt->myDoc;
13907 else {
13908 ret = NULL;
13909 xmlFreeDoc(ctxt->myDoc);
13910 ctxt->myDoc = NULL;
13911 }
13912 if (sax != NULL)
13913 ctxt->sax = NULL;
13914 xmlFreeParserCtxt(ctxt);
13915
13916 return(ret);
13917}
13918
13919/**
13920 * xmlParseEntity:
13921 * @filename: the filename
13922 *
13923 * parse an XML external entity out of context and build a tree.
13924 *
13925 * [78] extParsedEnt ::= TextDecl? content
13926 *
13927 * This correspond to a "Well Balanced" chunk
13928 *
13929 * Returns the resulting document tree
13930 */
13931
13932xmlDocPtr
13933xmlParseEntity(const char *filename) {
13934 return(xmlSAXParseEntity(NULL, filename));
13935}
13936#endif /* LIBXML_SAX1_ENABLED */
13937
13938/**
13939 * xmlCreateEntityParserCtxtInternal:
13940 * @URL: the entity URL
13941 * @ID: the entity PUBLIC ID
13942 * @base: a possible base for the target URI
13943 * @pctx: parser context used to set options on new context
13944 *
13945 * Create a parser context for an external entity
13946 * Automatic support for ZLIB/Compress compressed document is provided
13947 * by default if found at compile-time.
13948 *
13949 * Returns the new parser context or NULL
13950 */
13951static xmlParserCtxtPtr
13952xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
13953 const xmlChar *base, xmlParserCtxtPtr pctx) {
13954 xmlParserCtxtPtr ctxt;
13955 xmlParserInputPtr inputStream;
13956 char *directory = NULL;
13957 xmlChar *uri;
13958
13959 ctxt = xmlNewParserCtxt();
13960 if (ctxt == NULL) {
13961 return(NULL);
13962 }
13963
13964 if (pctx != NULL) {
13965 ctxt->options = pctx->options;
13966 ctxt->_private = pctx->_private;
13967 /*
13968 * this is a subparser of pctx, so the input_id should be
13969 * incremented to distinguish from main entity
13970 */
13971 ctxt->input_id = pctx->input_id + 1;
13972 }
13973
13974 /* Don't read from stdin. */
13975 if (xmlStrcmp(URL, BAD_CAST "-") == 0)
13976 URL = BAD_CAST "./-";
13977
13978 uri = xmlBuildURI(URL, base);
13979
13980 if (uri == NULL) {
13981 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
13982 if (inputStream == NULL) {
13983 xmlFreeParserCtxt(ctxt);
13984 return(NULL);
13985 }
13986
13987 inputPush(ctxt, inputStream);
13988
13989 if ((ctxt->directory == NULL) && (directory == NULL))
13990 directory = xmlParserGetDirectory((char *)URL);
13991 if ((ctxt->directory == NULL) && (directory != NULL))
13992 ctxt->directory = directory;
13993 } else {
13994 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
13995 if (inputStream == NULL) {
13996 xmlFree(uri);
13997 xmlFreeParserCtxt(ctxt);
13998 return(NULL);
13999 }
14000
14001 inputPush(ctxt, inputStream);
14002
14003 if ((ctxt->directory == NULL) && (directory == NULL))
14004 directory = xmlParserGetDirectory((char *)uri);
14005 if ((ctxt->directory == NULL) && (directory != NULL))
14006 ctxt->directory = directory;
14007 xmlFree(uri);
14008 }
14009 return(ctxt);
14010}
14011
14012/**
14013 * xmlCreateEntityParserCtxt:
14014 * @URL: the entity URL
14015 * @ID: the entity PUBLIC ID
14016 * @base: a possible base for the target URI
14017 *
14018 * Create a parser context for an external entity
14019 * Automatic support for ZLIB/Compress compressed document is provided
14020 * by default if found at compile-time.
14021 *
14022 * Returns the new parser context or NULL
14023 */
14024xmlParserCtxtPtr
14025xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
14026 const xmlChar *base) {
14027 return xmlCreateEntityParserCtxtInternal(URL, ID, base, NULL);
14028
14029}
14030
14031/************************************************************************
14032 * *
14033 * Front ends when parsing from a file *
14034 * *
14035 ************************************************************************/
14036
14037/**
14038 * xmlCreateURLParserCtxt:
14039 * @filename: the filename or URL
14040 * @options: a combination of xmlParserOption
14041 *
14042 * Create a parser context for a file or URL content.
14043 * Automatic support for ZLIB/Compress compressed document is provided
14044 * by default if found at compile-time and for file accesses
14045 *
14046 * Returns the new parser context or NULL
14047 */
14048xmlParserCtxtPtr
14049xmlCreateURLParserCtxt(const char *filename, int options)
14050{
14051 xmlParserCtxtPtr ctxt;
14052 xmlParserInputPtr inputStream;
14053 char *directory = NULL;
14054
14055 ctxt = xmlNewParserCtxt();
14056 if (ctxt == NULL) {
14057 xmlErrMemory(NULL, "cannot allocate parser context");
14058 return(NULL);
14059 }
14060
14061 if (options)
14062 xmlCtxtUseOptionsInternal(ctxt, options, NULL);
14063 ctxt->linenumbers = 1;
14064
14065 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
14066 if (inputStream == NULL) {
14067 xmlFreeParserCtxt(ctxt);
14068 return(NULL);
14069 }
14070
14071 inputPush(ctxt, inputStream);
14072 if ((ctxt->directory == NULL) && (directory == NULL))
14073 directory = xmlParserGetDirectory(filename);
14074 if ((ctxt->directory == NULL) && (directory != NULL))
14075 ctxt->directory = directory;
14076
14077 return(ctxt);
14078}
14079
14080/**
14081 * xmlCreateFileParserCtxt:
14082 * @filename: the filename
14083 *
14084 * Create a parser context for a file content.
14085 * Automatic support for ZLIB/Compress compressed document is provided
14086 * by default if found at compile-time.
14087 *
14088 * Returns the new parser context or NULL
14089 */
14090xmlParserCtxtPtr
14091xmlCreateFileParserCtxt(const char *filename)
14092{
14093 return(xmlCreateURLParserCtxt(filename, 0));
14094}
14095
14096#ifdef LIBXML_SAX1_ENABLED
14097/**
14098 * xmlSAXParseFileWithData:
14099 * @sax: the SAX handler block
14100 * @filename: the filename
14101 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14102 * documents
14103 * @data: the userdata
14104 *
14105 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14106 * compressed document is provided by default if found at compile-time.
14107 * It use the given SAX function block to handle the parsing callback.
14108 * If sax is NULL, fallback to the default DOM tree building routines.
14109 *
14110 * User data (void *) is stored within the parser context in the
14111 * context's _private member, so it is available nearly everywhere in libxml
14112 *
14113 * Returns the resulting document tree
14114 */
14115
14116xmlDocPtr
14117xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
14118 int recovery, void *data) {
14119 xmlDocPtr ret;
14120 xmlParserCtxtPtr ctxt;
14121
14122 xmlInitParser();
14123
14124 ctxt = xmlCreateFileParserCtxt(filename);
14125 if (ctxt == NULL) {
14126 return(NULL);
14127 }
14128 if (sax != NULL) {
14129 if (ctxt->sax != NULL)
14130 xmlFree(ctxt->sax);
14131 ctxt->sax = sax;
14132 }
14133 xmlDetectSAX2(ctxt);
14134 if (data!=NULL) {
14135 ctxt->_private = data;
14136 }
14137
14138 if (ctxt->directory == NULL)
14139 ctxt->directory = xmlParserGetDirectory(filename);
14140
14141 ctxt->recovery = recovery;
14142
14143 xmlParseDocument(ctxt);
14144
14145 if ((ctxt->wellFormed) || recovery) {
14146 ret = ctxt->myDoc;
14147 if ((ret != NULL) && (ctxt->input->buf != NULL)) {
14148 if (ctxt->input->buf->compressed > 0)
14149 ret->compression = 9;
14150 else
14151 ret->compression = ctxt->input->buf->compressed;
14152 }
14153 }
14154 else {
14155 ret = NULL;
14156 xmlFreeDoc(ctxt->myDoc);
14157 ctxt->myDoc = NULL;
14158 }
14159 if (sax != NULL)
14160 ctxt->sax = NULL;
14161 xmlFreeParserCtxt(ctxt);
14162
14163 return(ret);
14164}
14165
14166/**
14167 * xmlSAXParseFile:
14168 * @sax: the SAX handler block
14169 * @filename: the filename
14170 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14171 * documents
14172 *
14173 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14174 * compressed document is provided by default if found at compile-time.
14175 * It use the given SAX function block to handle the parsing callback.
14176 * If sax is NULL, fallback to the default DOM tree building routines.
14177 *
14178 * Returns the resulting document tree
14179 */
14180
14181xmlDocPtr
14182xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
14183 int recovery) {
14184 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
14185}
14186
14187/**
14188 * xmlRecoverDoc:
14189 * @cur: a pointer to an array of xmlChar
14190 *
14191 * parse an XML in-memory document and build a tree.
14192 * In the case the document is not Well Formed, a attempt to build a
14193 * tree is tried anyway
14194 *
14195 * Returns the resulting document tree or NULL in case of failure
14196 */
14197
14198xmlDocPtr
14199xmlRecoverDoc(const xmlChar *cur) {
14200 return(xmlSAXParseDoc(NULL, cur, 1));
14201}
14202
14203/**
14204 * xmlParseFile:
14205 * @filename: the filename
14206 *
14207 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14208 * compressed document is provided by default if found at compile-time.
14209 *
14210 * Returns the resulting document tree if the file was wellformed,
14211 * NULL otherwise.
14212 */
14213
14214xmlDocPtr
14215xmlParseFile(const char *filename) {
14216 return(xmlSAXParseFile(NULL, filename, 0));
14217}
14218
14219/**
14220 * xmlRecoverFile:
14221 * @filename: the filename
14222 *
14223 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14224 * compressed document is provided by default if found at compile-time.
14225 * In the case the document is not Well Formed, it attempts to build
14226 * a tree anyway
14227 *
14228 * Returns the resulting document tree or NULL in case of failure
14229 */
14230
14231xmlDocPtr
14232xmlRecoverFile(const char *filename) {
14233 return(xmlSAXParseFile(NULL, filename, 1));
14234}
14235
14236
14237/**
14238 * xmlSetupParserForBuffer:
14239 * @ctxt: an XML parser context
14240 * @buffer: a xmlChar * buffer
14241 * @filename: a file name
14242 *
14243 * Setup the parser context to parse a new buffer; Clears any prior
14244 * contents from the parser context. The buffer parameter must not be
14245 * NULL, but the filename parameter can be
14246 */
14247void
14248xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
14249 const char* filename)
14250{
14251 xmlParserInputPtr input;
14252
14253 if ((ctxt == NULL) || (buffer == NULL))
14254 return;
14255
14256 input = xmlNewInputStream(ctxt);
14257 if (input == NULL) {
14258 xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
14259 xmlClearParserCtxt(ctxt);
14260 return;
14261 }
14262
14263 xmlClearParserCtxt(ctxt);
14264 if (filename != NULL)
14265 input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
14266 input->base = buffer;
14267 input->cur = buffer;
14268 input->end = &buffer[xmlStrlen(buffer)];
14269 inputPush(ctxt, input);
14270}
14271
14272/**
14273 * xmlSAXUserParseFile:
14274 * @sax: a SAX handler
14275 * @user_data: The user data returned on SAX callbacks
14276 * @filename: a file name
14277 *
14278 * parse an XML file and call the given SAX handler routines.
14279 * Automatic support for ZLIB/Compress compressed document is provided
14280 *
14281 * Returns 0 in case of success or a error number otherwise
14282 */
14283int
14284xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
14285 const char *filename) {
14286 int ret = 0;
14287 xmlParserCtxtPtr ctxt;
14288
14289 ctxt = xmlCreateFileParserCtxt(filename);
14290 if (ctxt == NULL) return -1;
14291 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
14292 xmlFree(ctxt->sax);
14293 ctxt->sax = sax;
14294 xmlDetectSAX2(ctxt);
14295
14296 if (user_data != NULL)
14297 ctxt->userData = user_data;
14298
14299 xmlParseDocument(ctxt);
14300
14301 if (ctxt->wellFormed)
14302 ret = 0;
14303 else {
14304 if (ctxt->errNo != 0)
14305 ret = ctxt->errNo;
14306 else
14307 ret = -1;
14308 }
14309 if (sax != NULL)
14310 ctxt->sax = NULL;
14311 if (ctxt->myDoc != NULL) {
14312 xmlFreeDoc(ctxt->myDoc);
14313 ctxt->myDoc = NULL;
14314 }
14315 xmlFreeParserCtxt(ctxt);
14316
14317 return ret;
14318}
14319#endif /* LIBXML_SAX1_ENABLED */
14320
14321/************************************************************************
14322 * *
14323 * Front ends when parsing from memory *
14324 * *
14325 ************************************************************************/
14326
14327/**
14328 * xmlCreateMemoryParserCtxt:
14329 * @buffer: a pointer to a char array
14330 * @size: the size of the array
14331 *
14332 * Create a parser context for an XML in-memory document.
14333 *
14334 * Returns the new parser context or NULL
14335 */
14336xmlParserCtxtPtr
14337xmlCreateMemoryParserCtxt(const char *buffer, int size) {
14338 xmlParserCtxtPtr ctxt;
14339 xmlParserInputPtr input;
14340 xmlParserInputBufferPtr buf;
14341
14342 if (buffer == NULL)
14343 return(NULL);
14344 if (size <= 0)
14345 return(NULL);
14346
14347 ctxt = xmlNewParserCtxt();
14348 if (ctxt == NULL)
14349 return(NULL);
14350
14351 /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */
14352 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
14353 if (buf == NULL) {
14354 xmlFreeParserCtxt(ctxt);
14355 return(NULL);
14356 }
14357
14358 input = xmlNewInputStream(ctxt);
14359 if (input == NULL) {
14360 xmlFreeParserInputBuffer(buf);
14361 xmlFreeParserCtxt(ctxt);
14362 return(NULL);
14363 }
14364
14365 input->filename = NULL;
14366 input->buf = buf;
14367 xmlBufResetInput(input->buf->buffer, input);
14368
14369 inputPush(ctxt, input);
14370 return(ctxt);
14371}
14372
14373#ifdef LIBXML_SAX1_ENABLED
14374/**
14375 * xmlSAXParseMemoryWithData:
14376 * @sax: the SAX handler block
14377 * @buffer: an pointer to a char array
14378 * @size: the size of the array
14379 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14380 * documents
14381 * @data: the userdata
14382 *
14383 * parse an XML in-memory block and use the given SAX function block
14384 * to handle the parsing callback. If sax is NULL, fallback to the default
14385 * DOM tree building routines.
14386 *
14387 * User data (void *) is stored within the parser context in the
14388 * context's _private member, so it is available nearly everywhere in libxml
14389 *
14390 * Returns the resulting document tree
14391 */
14392
14393xmlDocPtr
14394xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
14395 int size, int recovery, void *data) {
14396 xmlDocPtr ret;
14397 xmlParserCtxtPtr ctxt;
14398
14399 xmlInitParser();
14400
14401 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14402 if (ctxt == NULL) return(NULL);
14403 if (sax != NULL) {
14404 if (ctxt->sax != NULL)
14405 xmlFree(ctxt->sax);
14406 ctxt->sax = sax;
14407 }
14408 xmlDetectSAX2(ctxt);
14409 if (data!=NULL) {
14410 ctxt->_private=data;
14411 }
14412
14413 ctxt->recovery = recovery;
14414
14415 xmlParseDocument(ctxt);
14416
14417 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14418 else {
14419 ret = NULL;
14420 xmlFreeDoc(ctxt->myDoc);
14421 ctxt->myDoc = NULL;
14422 }
14423 if (sax != NULL)
14424 ctxt->sax = NULL;
14425 xmlFreeParserCtxt(ctxt);
14426
14427 return(ret);
14428}
14429
14430/**
14431 * xmlSAXParseMemory:
14432 * @sax: the SAX handler block
14433 * @buffer: an pointer to a char array
14434 * @size: the size of the array
14435 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
14436 * documents
14437 *
14438 * parse an XML in-memory block and use the given SAX function block
14439 * to handle the parsing callback. If sax is NULL, fallback to the default
14440 * DOM tree building routines.
14441 *
14442 * Returns the resulting document tree
14443 */
14444xmlDocPtr
14445xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
14446 int size, int recovery) {
14447 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
14448}
14449
14450/**
14451 * xmlParseMemory:
14452 * @buffer: an pointer to a char array
14453 * @size: the size of the array
14454 *
14455 * parse an XML in-memory block and build a tree.
14456 *
14457 * Returns the resulting document tree
14458 */
14459
14460xmlDocPtr xmlParseMemory(const char *buffer, int size) {
14461 return(xmlSAXParseMemory(NULL, buffer, size, 0));
14462}
14463
14464/**
14465 * xmlRecoverMemory:
14466 * @buffer: an pointer to a char array
14467 * @size: the size of the array
14468 *
14469 * parse an XML in-memory block and build a tree.
14470 * In the case the document is not Well Formed, an attempt to
14471 * build a tree is tried anyway
14472 *
14473 * Returns the resulting document tree or NULL in case of error
14474 */
14475
14476xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
14477 return(xmlSAXParseMemory(NULL, buffer, size, 1));
14478}
14479
14480/**
14481 * xmlSAXUserParseMemory:
14482 * @sax: a SAX handler
14483 * @user_data: The user data returned on SAX callbacks
14484 * @buffer: an in-memory XML document input
14485 * @size: the length of the XML document in bytes
14486 *
14487 * A better SAX parsing routine.
14488 * parse an XML in-memory buffer and call the given SAX handler routines.
14489 *
14490 * Returns 0 in case of success or a error number otherwise
14491 */
14492int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
14493 const char *buffer, int size) {
14494 int ret = 0;
14495 xmlParserCtxtPtr ctxt;
14496
14497 xmlInitParser();
14498
14499 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14500 if (ctxt == NULL) return -1;
14501 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
14502 xmlFree(ctxt->sax);
14503 ctxt->sax = sax;
14504 xmlDetectSAX2(ctxt);
14505
14506 if (user_data != NULL)
14507 ctxt->userData = user_data;
14508
14509 xmlParseDocument(ctxt);
14510
14511 if (ctxt->wellFormed)
14512 ret = 0;
14513 else {
14514 if (ctxt->errNo != 0)
14515 ret = ctxt->errNo;
14516 else
14517 ret = -1;
14518 }
14519 if (sax != NULL)
14520 ctxt->sax = NULL;
14521 if (ctxt->myDoc != NULL) {
14522 xmlFreeDoc(ctxt->myDoc);
14523 ctxt->myDoc = NULL;
14524 }
14525 xmlFreeParserCtxt(ctxt);
14526
14527 return ret;
14528}
14529#endif /* LIBXML_SAX1_ENABLED */
14530
14531/**
14532 * xmlCreateDocParserCtxt:
14533 * @cur: a pointer to an array of xmlChar
14534 *
14535 * Creates a parser context for an XML in-memory document.
14536 *
14537 * Returns the new parser context or NULL
14538 */
14539xmlParserCtxtPtr
14540xmlCreateDocParserCtxt(const xmlChar *cur) {
14541 int len;
14542
14543 if (cur == NULL)
14544 return(NULL);
14545 len = xmlStrlen(cur);
14546 return(xmlCreateMemoryParserCtxt((const char *)cur, len));
14547}
14548
14549#ifdef LIBXML_SAX1_ENABLED
14550/**
14551 * xmlSAXParseDoc:
14552 * @sax: the SAX handler block
14553 * @cur: a pointer to an array of xmlChar
14554 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14555 * documents
14556 *
14557 * parse an XML in-memory document and build a tree.
14558 * It use the given SAX function block to handle the parsing callback.
14559 * If sax is NULL, fallback to the default DOM tree building routines.
14560 *
14561 * Returns the resulting document tree
14562 */
14563
14564xmlDocPtr
14565xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
14566 xmlDocPtr ret;
14567 xmlParserCtxtPtr ctxt;
14568 xmlSAXHandlerPtr oldsax = NULL;
14569
14570 if (cur == NULL) return(NULL);
14571
14572
14573 ctxt = xmlCreateDocParserCtxt(cur);
14574 if (ctxt == NULL) return(NULL);
14575 if (sax != NULL) {
14576 oldsax = ctxt->sax;
14577 ctxt->sax = sax;
14578 ctxt->userData = NULL;
14579 }
14580 xmlDetectSAX2(ctxt);
14581
14582 xmlParseDocument(ctxt);
14583 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14584 else {
14585 ret = NULL;
14586 xmlFreeDoc(ctxt->myDoc);
14587 ctxt->myDoc = NULL;
14588 }
14589 if (sax != NULL)
14590 ctxt->sax = oldsax;
14591 xmlFreeParserCtxt(ctxt);
14592
14593 return(ret);
14594}
14595
14596/**
14597 * xmlParseDoc:
14598 * @cur: a pointer to an array of xmlChar
14599 *
14600 * parse an XML in-memory document and build a tree.
14601 *
14602 * Returns the resulting document tree
14603 */
14604
14605xmlDocPtr
14606xmlParseDoc(const xmlChar *cur) {
14607 return(xmlSAXParseDoc(NULL, cur, 0));
14608}
14609#endif /* LIBXML_SAX1_ENABLED */
14610
14611#ifdef LIBXML_LEGACY_ENABLED
14612/************************************************************************
14613 * *
14614 * Specific function to keep track of entities references *
14615 * and used by the XSLT debugger *
14616 * *
14617 ************************************************************************/
14618
14619static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
14620
14621/**
14622 * xmlAddEntityReference:
14623 * @ent : A valid entity
14624 * @firstNode : A valid first node for children of entity
14625 * @lastNode : A valid last node of children entity
14626 *
14627 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
14628 */
14629static void
14630xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
14631 xmlNodePtr lastNode)
14632{
14633 if (xmlEntityRefFunc != NULL) {
14634 (*xmlEntityRefFunc) (ent, firstNode, lastNode);
14635 }
14636}
14637
14638
14639/**
14640 * xmlSetEntityReferenceFunc:
14641 * @func: A valid function
14642 *
14643 * Set the function to call call back when a xml reference has been made
14644 */
14645void
14646xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
14647{
14648 xmlEntityRefFunc = func;
14649}
14650#endif /* LIBXML_LEGACY_ENABLED */
14651
14652/************************************************************************
14653 * *
14654 * Miscellaneous *
14655 * *
14656 ************************************************************************/
14657
14658#ifdef LIBXML_XPATH_ENABLED
14659#include <libxml/xpath.h>
14660#endif
14661
14662extern void XMLCDECL xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
14663static int xmlParserInitialized = 0;
14664
14665/**
14666 * xmlInitParser:
14667 *
14668 * Initialization function for the XML parser.
14669 * This is not reentrant. Call once before processing in case of
14670 * use in multithreaded programs.
14671 */
14672
14673void
14674xmlInitParser(void) {
14675 if (xmlParserInitialized != 0)
14676 return;
14677
14678#if defined(_WIN32) && (!defined(LIBXML_STATIC) || defined(LIBXML_STATIC_FOR_DLL))
14679 if (xmlFree == free)
14680 atexit(xmlCleanupParser);
14681#endif
14682
14683#ifdef LIBXML_THREAD_ENABLED
14684 __xmlGlobalInitMutexLock();
14685 if (xmlParserInitialized == 0) {
14686#endif
14687 xmlInitThreads();
14688 xmlInitGlobals();
14689 if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
14690 (xmlGenericError == NULL))
14691 initGenericErrorDefaultFunc(NULL);
14692 xmlInitMemory();
14693 xmlInitializeDict();
14694 xmlInitCharEncodingHandlers();
14695 xmlDefaultSAXHandlerInit();
14696 xmlRegisterDefaultInputCallbacks();
14697#ifdef LIBXML_OUTPUT_ENABLED
14698 xmlRegisterDefaultOutputCallbacks();
14699#endif /* LIBXML_OUTPUT_ENABLED */
14700#ifdef LIBXML_HTML_ENABLED
14701 htmlInitAutoClose();
14702 htmlDefaultSAXHandlerInit();
14703#endif
14704#ifdef LIBXML_XPATH_ENABLED
14705 xmlXPathInit();
14706#endif
14707 xmlParserInitialized = 1;
14708#ifdef LIBXML_THREAD_ENABLED
14709 }
14710 __xmlGlobalInitMutexUnlock();
14711#endif
14712}
14713
14714/**
14715 * xmlCleanupParser:
14716 *
14717 * This function name is somewhat misleading. It does not clean up
14718 * parser state, it cleans up memory allocated by the library itself.
14719 * It is a cleanup function for the XML library. It tries to reclaim all
14720 * related global memory allocated for the library processing.
14721 * It doesn't deallocate any document related memory. One should
14722 * call xmlCleanupParser() only when the process has finished using
14723 * the library and all XML/HTML documents built with it.
14724 * See also xmlInitParser() which has the opposite function of preparing
14725 * the library for operations.
14726 *
14727 * WARNING: if your application is multithreaded or has plugin support
14728 * calling this may crash the application if another thread or
14729 * a plugin is still using libxml2. It's sometimes very hard to
14730 * guess if libxml2 is in use in the application, some libraries
14731 * or plugins may use it without notice. In case of doubt abstain
14732 * from calling this function or do it just before calling exit()
14733 * to avoid leak reports from valgrind !
14734 */
14735
14736void
14737xmlCleanupParser(void) {
14738 if (!xmlParserInitialized)
14739 return;
14740
14741 xmlCleanupCharEncodingHandlers();
14742#ifdef LIBXML_CATALOG_ENABLED
14743 xmlCatalogCleanup();
14744#endif
14745 xmlDictCleanup();
14746 xmlCleanupInputCallbacks();
14747#ifdef LIBXML_OUTPUT_ENABLED
14748 xmlCleanupOutputCallbacks();
14749#endif
14750#ifdef LIBXML_SCHEMAS_ENABLED
14751 xmlSchemaCleanupTypes();
14752 xmlRelaxNGCleanupTypes();
14753#endif
14754 xmlCleanupGlobals();
14755 xmlCleanupThreads(); /* must be last if called not from the main thread */
14756 xmlCleanupMemory();
14757 xmlParserInitialized = 0;
14758}
14759
14760#if defined(HAVE_ATTRIBUTE_DESTRUCTOR) && !defined(LIBXML_STATIC) && \
14761 !defined(_WIN32)
14762static void
14763ATTRIBUTE_DESTRUCTOR
14764xmlDestructor(void) {
14765 /*
14766 * Calling custom deallocation functions in a destructor can cause
14767 * problems, for example with Nokogiri.
14768 */
14769 if (xmlFree == free)
14770 xmlCleanupParser();
14771}
14772#endif
14773
14774/************************************************************************
14775 * *
14776 * New set (2.6.0) of simpler and more flexible APIs *
14777 * *
14778 ************************************************************************/
14779
14780/**
14781 * DICT_FREE:
14782 * @str: a string
14783 *
14784 * Free a string if it is not owned by the "dict" dictionary in the
14785 * current scope
14786 */
14787#define DICT_FREE(str) \
14788 if ((str) && ((!dict) || \
14789 (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
14790 xmlFree((char *)(str));
14791
14792/**
14793 * xmlCtxtReset:
14794 * @ctxt: an XML parser context
14795 *
14796 * Reset a parser context
14797 */
14798void
14799xmlCtxtReset(xmlParserCtxtPtr ctxt)
14800{
14801 xmlParserInputPtr input;
14802 xmlDictPtr dict;
14803
14804 if (ctxt == NULL)
14805 return;
14806
14807 dict = ctxt->dict;
14808
14809 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
14810 xmlFreeInputStream(input);
14811 }
14812 ctxt->inputNr = 0;
14813 ctxt->input = NULL;
14814
14815 ctxt->spaceNr = 0;
14816 if (ctxt->spaceTab != NULL) {
14817 ctxt->spaceTab[0] = -1;
14818 ctxt->space = &ctxt->spaceTab[0];
14819 } else {
14820 ctxt->space = NULL;
14821 }
14822
14823
14824 ctxt->nodeNr = 0;
14825 ctxt->node = NULL;
14826
14827 ctxt->nameNr = 0;
14828 ctxt->name = NULL;
14829
14830 DICT_FREE(ctxt->version);
14831 ctxt->version = NULL;
14832 DICT_FREE(ctxt->encoding);
14833 ctxt->encoding = NULL;
14834 DICT_FREE(ctxt->directory);
14835 ctxt->directory = NULL;
14836 DICT_FREE(ctxt->extSubURI);
14837 ctxt->extSubURI = NULL;
14838 DICT_FREE(ctxt->extSubSystem);
14839 ctxt->extSubSystem = NULL;
14840 if (ctxt->myDoc != NULL)
14841 xmlFreeDoc(ctxt->myDoc);
14842 ctxt->myDoc = NULL;
14843
14844 ctxt->standalone = -1;
14845 ctxt->hasExternalSubset = 0;
14846 ctxt->hasPErefs = 0;
14847 ctxt->html = 0;
14848 ctxt->external = 0;
14849 ctxt->instate = XML_PARSER_START;
14850 ctxt->token = 0;
14851
14852 ctxt->wellFormed = 1;
14853 ctxt->nsWellFormed = 1;
14854 ctxt->disableSAX = 0;
14855 ctxt->valid = 1;
14856#if 0
14857 ctxt->vctxt.userData = ctxt;
14858 ctxt->vctxt.error = xmlParserValidityError;
14859 ctxt->vctxt.warning = xmlParserValidityWarning;
14860#endif
14861 ctxt->record_info = 0;
14862 ctxt->checkIndex = 0;
14863 ctxt->inSubset = 0;
14864 ctxt->errNo = XML_ERR_OK;
14865 ctxt->depth = 0;
14866 ctxt->charset = XML_CHAR_ENCODING_UTF8;
14867 ctxt->catalogs = NULL;
14868 ctxt->nbentities = 0;
14869 ctxt->sizeentities = 0;
14870 ctxt->sizeentcopy = 0;
14871 xmlInitNodeInfoSeq(&ctxt->node_seq);
14872
14873 if (ctxt->attsDefault != NULL) {
14874 xmlHashFree(ctxt->attsDefault, xmlHashDefaultDeallocator);
14875 ctxt->attsDefault = NULL;
14876 }
14877 if (ctxt->attsSpecial != NULL) {
14878 xmlHashFree(ctxt->attsSpecial, NULL);
14879 ctxt->attsSpecial = NULL;
14880 }
14881
14882#ifdef LIBXML_CATALOG_ENABLED
14883 if (ctxt->catalogs != NULL)
14884 xmlCatalogFreeLocal(ctxt->catalogs);
14885#endif
14886 if (ctxt->lastError.code != XML_ERR_OK)
14887 xmlResetError(&ctxt->lastError);
14888}
14889
14890/**
14891 * xmlCtxtResetPush:
14892 * @ctxt: an XML parser context
14893 * @chunk: a pointer to an array of chars
14894 * @size: number of chars in the array
14895 * @filename: an optional file name or URI
14896 * @encoding: the document encoding, or NULL
14897 *
14898 * Reset a push parser context
14899 *
14900 * Returns 0 in case of success and 1 in case of error
14901 */
14902int
14903xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
14904 int size, const char *filename, const char *encoding)
14905{
14906 xmlParserInputPtr inputStream;
14907 xmlParserInputBufferPtr buf;
14908 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
14909
14910 if (ctxt == NULL)
14911 return(1);
14912
14913 if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
14914 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
14915
14916 buf = xmlAllocParserInputBuffer(enc);
14917 if (buf == NULL)
14918 return(1);
14919
14920 if (ctxt == NULL) {
14921 xmlFreeParserInputBuffer(buf);
14922 return(1);
14923 }
14924
14925 xmlCtxtReset(ctxt);
14926
14927 if (filename == NULL) {
14928 ctxt->directory = NULL;
14929 } else {
14930 ctxt->directory = xmlParserGetDirectory(filename);
14931 }
14932
14933 inputStream = xmlNewInputStream(ctxt);
14934 if (inputStream == NULL) {
14935 xmlFreeParserInputBuffer(buf);
14936 return(1);
14937 }
14938
14939 if (filename == NULL)
14940 inputStream->filename = NULL;
14941 else
14942 inputStream->filename = (char *)
14943 xmlCanonicPath((const xmlChar *) filename);
14944 inputStream->buf = buf;
14945 xmlBufResetInput(buf->buffer, inputStream);
14946
14947 inputPush(ctxt, inputStream);
14948
14949 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
14950 (ctxt->input->buf != NULL)) {
14951 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
14952 size_t cur = ctxt->input->cur - ctxt->input->base;
14953
14954 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
14955
14956 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
14957#ifdef DEBUG_PUSH
14958 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
14959#endif
14960 }
14961
14962 if (encoding != NULL) {
14963 xmlCharEncodingHandlerPtr hdlr;
14964
14965 if (ctxt->encoding != NULL)
14966 xmlFree((xmlChar *) ctxt->encoding);
14967 ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
14968
14969 hdlr = xmlFindCharEncodingHandler(encoding);
14970 if (hdlr != NULL) {
14971 xmlSwitchToEncoding(ctxt, hdlr);
14972 } else {
14973 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
14974 "Unsupported encoding %s\n", BAD_CAST encoding);
14975 }
14976 } else if (enc != XML_CHAR_ENCODING_NONE) {
14977 xmlSwitchEncoding(ctxt, enc);
14978 }
14979
14980 return(0);
14981}
14982
14983
14984/**
14985 * xmlCtxtUseOptionsInternal:
14986 * @ctxt: an XML parser context
14987 * @options: a combination of xmlParserOption
14988 * @encoding: the user provided encoding to use
14989 *
14990 * Applies the options to the parser context
14991 *
14992 * Returns 0 in case of success, the set of unknown or unimplemented options
14993 * in case of error.
14994 */
14995static int
14996xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, const char *encoding)
14997{
14998 if (ctxt == NULL)
14999 return(-1);
15000 if (encoding != NULL) {
15001 if (ctxt->encoding != NULL)
15002 xmlFree((xmlChar *) ctxt->encoding);
15003 ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
15004 }
15005 if (options & XML_PARSE_RECOVER) {
15006 ctxt->recovery = 1;
15007 options -= XML_PARSE_RECOVER;
15008 ctxt->options |= XML_PARSE_RECOVER;
15009 } else
15010 ctxt->recovery = 0;
15011 if (options & XML_PARSE_DTDLOAD) {
15012 ctxt->loadsubset = XML_DETECT_IDS;
15013 options -= XML_PARSE_DTDLOAD;
15014 ctxt->options |= XML_PARSE_DTDLOAD;
15015 } else
15016 ctxt->loadsubset = 0;
15017 if (options & XML_PARSE_DTDATTR) {
15018 ctxt->loadsubset |= XML_COMPLETE_ATTRS;
15019 options -= XML_PARSE_DTDATTR;
15020 ctxt->options |= XML_PARSE_DTDATTR;
15021 }
15022 if (options & XML_PARSE_NOENT) {
15023 ctxt->replaceEntities = 1;
15024 /* ctxt->loadsubset |= XML_DETECT_IDS; */
15025 options -= XML_PARSE_NOENT;
15026 ctxt->options |= XML_PARSE_NOENT;
15027 } else
15028 ctxt->replaceEntities = 0;
15029 if (options & XML_PARSE_PEDANTIC) {
15030 ctxt->pedantic = 1;
15031 options -= XML_PARSE_PEDANTIC;
15032 ctxt->options |= XML_PARSE_PEDANTIC;
15033 } else
15034 ctxt->pedantic = 0;
15035 if (options & XML_PARSE_NOBLANKS) {
15036 ctxt->keepBlanks = 0;
15037 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
15038 options -= XML_PARSE_NOBLANKS;
15039 ctxt->options |= XML_PARSE_NOBLANKS;
15040 } else
15041 ctxt->keepBlanks = 1;
15042 if (options & XML_PARSE_DTDVALID) {
15043 ctxt->validate = 1;
15044 if (options & XML_PARSE_NOWARNING)
15045 ctxt->vctxt.warning = NULL;
15046 if (options & XML_PARSE_NOERROR)
15047 ctxt->vctxt.error = NULL;
15048 options -= XML_PARSE_DTDVALID;
15049 ctxt->options |= XML_PARSE_DTDVALID;
15050 } else
15051 ctxt->validate = 0;
15052 if (options & XML_PARSE_NOWARNING) {
15053 ctxt->sax->warning = NULL;
15054 options -= XML_PARSE_NOWARNING;
15055 }
15056 if (options & XML_PARSE_NOERROR) {
15057 ctxt->sax->error = NULL;
15058 ctxt->sax->fatalError = NULL;
15059 options -= XML_PARSE_NOERROR;
15060 }
15061#ifdef LIBXML_SAX1_ENABLED
15062 if (options & XML_PARSE_SAX1) {
15063 ctxt->sax->startElement = xmlSAX2StartElement;
15064 ctxt->sax->endElement = xmlSAX2EndElement;
15065 ctxt->sax->startElementNs = NULL;
15066 ctxt->sax->endElementNs = NULL;
15067 ctxt->sax->initialized = 1;
15068 options -= XML_PARSE_SAX1;
15069 ctxt->options |= XML_PARSE_SAX1;
15070 }
15071#endif /* LIBXML_SAX1_ENABLED */
15072 if (options & XML_PARSE_NODICT) {
15073 ctxt->dictNames = 0;
15074 options -= XML_PARSE_NODICT;
15075 ctxt->options |= XML_PARSE_NODICT;
15076 } else {
15077 ctxt->dictNames = 1;
15078 }
15079 if (options & XML_PARSE_NOCDATA) {
15080 ctxt->sax->cdataBlock = NULL;
15081 options -= XML_PARSE_NOCDATA;
15082 ctxt->options |= XML_PARSE_NOCDATA;
15083 }
15084 if (options & XML_PARSE_NSCLEAN) {
15085 ctxt->options |= XML_PARSE_NSCLEAN;
15086 options -= XML_PARSE_NSCLEAN;
15087 }
15088 if (options & XML_PARSE_NONET) {
15089 ctxt->options |= XML_PARSE_NONET;
15090 options -= XML_PARSE_NONET;
15091 }
15092 if (options & XML_PARSE_COMPACT) {
15093 ctxt->options |= XML_PARSE_COMPACT;
15094 options -= XML_PARSE_COMPACT;
15095 }
15096 if (options & XML_PARSE_OLD10) {
15097 ctxt->options |= XML_PARSE_OLD10;
15098 options -= XML_PARSE_OLD10;
15099 }
15100 if (options & XML_PARSE_NOBASEFIX) {
15101 ctxt->options |= XML_PARSE_NOBASEFIX;
15102 options -= XML_PARSE_NOBASEFIX;
15103 }
15104 if (options & XML_PARSE_HUGE) {
15105 ctxt->options |= XML_PARSE_HUGE;
15106 options -= XML_PARSE_HUGE;
15107 if (ctxt->dict != NULL)
15108 xmlDictSetLimit(ctxt->dict, 0);
15109 }
15110 if (options & XML_PARSE_OLDSAX) {
15111 ctxt->options |= XML_PARSE_OLDSAX;
15112 options -= XML_PARSE_OLDSAX;
15113 }
15114 if (options & XML_PARSE_IGNORE_ENC) {
15115 ctxt->options |= XML_PARSE_IGNORE_ENC;
15116 options -= XML_PARSE_IGNORE_ENC;
15117 }
15118 if (options & XML_PARSE_BIG_LINES) {
15119 ctxt->options |= XML_PARSE_BIG_LINES;
15120 options -= XML_PARSE_BIG_LINES;
15121 }
15122 ctxt->linenumbers = 1;
15123 return (options);
15124}
15125
15126/**
15127 * xmlCtxtUseOptions:
15128 * @ctxt: an XML parser context
15129 * @options: a combination of xmlParserOption
15130 *
15131 * Applies the options to the parser context
15132 *
15133 * Returns 0 in case of success, the set of unknown or unimplemented options
15134 * in case of error.
15135 */
15136int
15137xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
15138{
15139 return(xmlCtxtUseOptionsInternal(ctxt, options, NULL));
15140}
15141
15142/**
15143 * xmlDoRead:
15144 * @ctxt: an XML parser context
15145 * @URL: the base URL to use for the document
15146 * @encoding: the document encoding, or NULL
15147 * @options: a combination of xmlParserOption
15148 * @reuse: keep the context for reuse
15149 *
15150 * Common front-end for the xmlRead functions
15151 *
15152 * Returns the resulting document tree or NULL
15153 */
15154static xmlDocPtr
15155xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
15156 int options, int reuse)
15157{
15158 xmlDocPtr ret;
15159
15160 xmlCtxtUseOptionsInternal(ctxt, options, encoding);
15161 if (encoding != NULL) {
15162 xmlCharEncodingHandlerPtr hdlr;
15163
15164 hdlr = xmlFindCharEncodingHandler(encoding);
15165 if (hdlr != NULL)
15166 xmlSwitchToEncoding(ctxt, hdlr);
15167 }
15168 if ((URL != NULL) && (ctxt->input != NULL) &&
15169 (ctxt->input->filename == NULL))
15170 ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
15171 xmlParseDocument(ctxt);
15172 if ((ctxt->wellFormed) || ctxt->recovery)
15173 ret = ctxt->myDoc;
15174 else {
15175 ret = NULL;
15176 if (ctxt->myDoc != NULL) {
15177 xmlFreeDoc(ctxt->myDoc);
15178 }
15179 }
15180 ctxt->myDoc = NULL;
15181 if (!reuse) {
15182 xmlFreeParserCtxt(ctxt);
15183 }
15184
15185 return (ret);
15186}
15187
15188/**
15189 * xmlReadDoc:
15190 * @cur: a pointer to a zero terminated string
15191 * @URL: the base URL to use for the document
15192 * @encoding: the document encoding, or NULL
15193 * @options: a combination of xmlParserOption
15194 *
15195 * parse an XML in-memory document and build a tree.
15196 *
15197 * Returns the resulting document tree
15198 */
15199xmlDocPtr
15200xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
15201{
15202 xmlParserCtxtPtr ctxt;
15203
15204 if (cur == NULL)
15205 return (NULL);
15206 xmlInitParser();
15207
15208 ctxt = xmlCreateDocParserCtxt(cur);
15209 if (ctxt == NULL)
15210 return (NULL);
15211 return (xmlDoRead(ctxt, URL, encoding, options, 0));
15212}
15213
15214/**
15215 * xmlReadFile:
15216 * @filename: a file or URL
15217 * @encoding: the document encoding, or NULL
15218 * @options: a combination of xmlParserOption
15219 *
15220 * parse an XML file from the filesystem or the network.
15221 *
15222 * Returns the resulting document tree
15223 */
15224xmlDocPtr
15225xmlReadFile(const char *filename, const char *encoding, int options)
15226{
15227 xmlParserCtxtPtr ctxt;
15228
15229 xmlInitParser();
15230 ctxt = xmlCreateURLParserCtxt(filename, options);
15231 if (ctxt == NULL)
15232 return (NULL);
15233 return (xmlDoRead(ctxt, NULL, encoding, options, 0));
15234}
15235
15236/**
15237 * xmlReadMemory:
15238 * @buffer: a pointer to a char array
15239 * @size: the size of the array
15240 * @URL: the base URL to use for the document
15241 * @encoding: the document encoding, or NULL
15242 * @options: a combination of xmlParserOption
15243 *
15244 * parse an XML in-memory document and build a tree.
15245 *
15246 * Returns the resulting document tree
15247 */
15248xmlDocPtr
15249xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
15250{
15251 xmlParserCtxtPtr ctxt;
15252
15253 xmlInitParser();
15254 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
15255 if (ctxt == NULL)
15256 return (NULL);
15257 return (xmlDoRead(ctxt, URL, encoding, options, 0));
15258}
15259
15260/**
15261 * xmlReadFd:
15262 * @fd: an open file descriptor
15263 * @URL: the base URL to use for the document
15264 * @encoding: the document encoding, or NULL
15265 * @options: a combination of xmlParserOption
15266 *
15267 * parse an XML from a file descriptor and build a tree.
15268 * NOTE that the file descriptor will not be closed when the
15269 * reader is closed or reset.
15270 *
15271 * Returns the resulting document tree
15272 */
15273xmlDocPtr
15274xmlReadFd(int fd, const char *URL, const char *encoding, int options)
15275{
15276 xmlParserCtxtPtr ctxt;
15277 xmlParserInputBufferPtr input;
15278 xmlParserInputPtr stream;
15279
15280 if (fd < 0)
15281 return (NULL);
15282 xmlInitParser();
15283
15284 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15285 if (input == NULL)
15286 return (NULL);
15287 input->closecallback = NULL;
15288 ctxt = xmlNewParserCtxt();
15289 if (ctxt == NULL) {
15290 xmlFreeParserInputBuffer(input);
15291 return (NULL);
15292 }
15293 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15294 if (stream == NULL) {
15295 xmlFreeParserInputBuffer(input);
15296 xmlFreeParserCtxt(ctxt);
15297 return (NULL);
15298 }
15299 inputPush(ctxt, stream);
15300 return (xmlDoRead(ctxt, URL, encoding, options, 0));
15301}
15302
15303/**
15304 * xmlReadIO:
15305 * @ioread: an I/O read function
15306 * @ioclose: an I/O close function
15307 * @ioctx: an I/O handler
15308 * @URL: the base URL to use for the document
15309 * @encoding: the document encoding, or NULL
15310 * @options: a combination of xmlParserOption
15311 *
15312 * parse an XML document from I/O functions and source and build a tree.
15313 *
15314 * Returns the resulting document tree
15315 */
15316xmlDocPtr
15317xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
15318 void *ioctx, const char *URL, const char *encoding, int options)
15319{
15320 xmlParserCtxtPtr ctxt;
15321 xmlParserInputBufferPtr input;
15322 xmlParserInputPtr stream;
15323
15324 if (ioread == NULL)
15325 return (NULL);
15326 xmlInitParser();
15327
15328 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15329 XML_CHAR_ENCODING_NONE);
15330 if (input == NULL) {
15331 if (ioclose != NULL)
15332 ioclose(ioctx);
15333 return (NULL);
15334 }
15335 ctxt = xmlNewParserCtxt();
15336 if (ctxt == NULL) {
15337 xmlFreeParserInputBuffer(input);
15338 return (NULL);
15339 }
15340 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15341 if (stream == NULL) {
15342 xmlFreeParserInputBuffer(input);
15343 xmlFreeParserCtxt(ctxt);
15344 return (NULL);
15345 }
15346 inputPush(ctxt, stream);
15347 return (xmlDoRead(ctxt, URL, encoding, options, 0));
15348}
15349
15350/**
15351 * xmlCtxtReadDoc:
15352 * @ctxt: an XML parser context
15353 * @cur: a pointer to a zero terminated string
15354 * @URL: the base URL to use for the document
15355 * @encoding: the document encoding, or NULL
15356 * @options: a combination of xmlParserOption
15357 *
15358 * parse an XML in-memory document and build a tree.
15359 * This reuses the existing @ctxt parser context
15360 *
15361 * Returns the resulting document tree
15362 */
15363xmlDocPtr
15364xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
15365 const char *URL, const char *encoding, int options)
15366{
15367 xmlParserInputPtr stream;
15368
15369 if (cur == NULL)
15370 return (NULL);
15371 if (ctxt == NULL)
15372 return (NULL);
15373 xmlInitParser();
15374
15375 xmlCtxtReset(ctxt);
15376
15377 stream = xmlNewStringInputStream(ctxt, cur);
15378 if (stream == NULL) {
15379 return (NULL);
15380 }
15381 inputPush(ctxt, stream);
15382 return (xmlDoRead(ctxt, URL, encoding, options, 1));
15383}
15384
15385/**
15386 * xmlCtxtReadFile:
15387 * @ctxt: an XML parser context
15388 * @filename: a file or URL
15389 * @encoding: the document encoding, or NULL
15390 * @options: a combination of xmlParserOption
15391 *
15392 * parse an XML file from the filesystem or the network.
15393 * This reuses the existing @ctxt parser context
15394 *
15395 * Returns the resulting document tree
15396 */
15397xmlDocPtr
15398xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
15399 const char *encoding, int options)
15400{
15401 xmlParserInputPtr stream;
15402
15403 if (filename == NULL)
15404 return (NULL);
15405 if (ctxt == NULL)
15406 return (NULL);
15407 xmlInitParser();
15408
15409 xmlCtxtReset(ctxt);
15410
15411 stream = xmlLoadExternalEntity(filename, NULL, ctxt);
15412 if (stream == NULL) {
15413 return (NULL);
15414 }
15415 inputPush(ctxt, stream);
15416 return (xmlDoRead(ctxt, NULL, encoding, options, 1));
15417}
15418
15419/**
15420 * xmlCtxtReadMemory:
15421 * @ctxt: an XML parser context
15422 * @buffer: a pointer to a char array
15423 * @size: the size of the array
15424 * @URL: the base URL to use for the document
15425 * @encoding: the document encoding, or NULL
15426 * @options: a combination of xmlParserOption
15427 *
15428 * parse an XML in-memory document and build a tree.
15429 * This reuses the existing @ctxt parser context
15430 *
15431 * Returns the resulting document tree
15432 */
15433xmlDocPtr
15434xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
15435 const char *URL, const char *encoding, int options)
15436{
15437 xmlParserInputBufferPtr input;
15438 xmlParserInputPtr stream;
15439
15440 if (ctxt == NULL)
15441 return (NULL);
15442 if (buffer == NULL)
15443 return (NULL);
15444 xmlInitParser();
15445
15446 xmlCtxtReset(ctxt);
15447
15448 input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
15449 if (input == NULL) {
15450 return(NULL);
15451 }
15452
15453 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15454 if (stream == NULL) {
15455 xmlFreeParserInputBuffer(input);
15456 return(NULL);
15457 }
15458
15459 inputPush(ctxt, stream);
15460 return (xmlDoRead(ctxt, URL, encoding, options, 1));
15461}
15462
15463/**
15464 * xmlCtxtReadFd:
15465 * @ctxt: an XML parser context
15466 * @fd: an open file descriptor
15467 * @URL: the base URL to use for the document
15468 * @encoding: the document encoding, or NULL
15469 * @options: a combination of xmlParserOption
15470 *
15471 * parse an XML from a file descriptor and build a tree.
15472 * This reuses the existing @ctxt parser context
15473 * NOTE that the file descriptor will not be closed when the
15474 * reader is closed or reset.
15475 *
15476 * Returns the resulting document tree
15477 */
15478xmlDocPtr
15479xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
15480 const char *URL, const char *encoding, int options)
15481{
15482 xmlParserInputBufferPtr input;
15483 xmlParserInputPtr stream;
15484
15485 if (fd < 0)
15486 return (NULL);
15487 if (ctxt == NULL)
15488 return (NULL);
15489 xmlInitParser();
15490
15491 xmlCtxtReset(ctxt);
15492
15493
15494 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15495 if (input == NULL)
15496 return (NULL);
15497 input->closecallback = NULL;
15498 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15499 if (stream == NULL) {
15500 xmlFreeParserInputBuffer(input);
15501 return (NULL);
15502 }
15503 inputPush(ctxt, stream);
15504 return (xmlDoRead(ctxt, URL, encoding, options, 1));
15505}
15506
15507/**
15508 * xmlCtxtReadIO:
15509 * @ctxt: an XML parser context
15510 * @ioread: an I/O read function
15511 * @ioclose: an I/O close function
15512 * @ioctx: an I/O handler
15513 * @URL: the base URL to use for the document
15514 * @encoding: the document encoding, or NULL
15515 * @options: a combination of xmlParserOption
15516 *
15517 * parse an XML document from I/O functions and source and build a tree.
15518 * This reuses the existing @ctxt parser context
15519 *
15520 * Returns the resulting document tree
15521 */
15522xmlDocPtr
15523xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
15524 xmlInputCloseCallback ioclose, void *ioctx,
15525 const char *URL,
15526 const char *encoding, int options)
15527{
15528 xmlParserInputBufferPtr input;
15529 xmlParserInputPtr stream;
15530
15531 if (ioread == NULL)
15532 return (NULL);
15533 if (ctxt == NULL)
15534 return (NULL);
15535 xmlInitParser();
15536
15537 xmlCtxtReset(ctxt);
15538
15539 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15540 XML_CHAR_ENCODING_NONE);
15541 if (input == NULL) {
15542 if (ioclose != NULL)
15543 ioclose(ioctx);
15544 return (NULL);
15545 }
15546 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15547 if (stream == NULL) {
15548 xmlFreeParserInputBuffer(input);
15549 return (NULL);
15550 }
15551 inputPush(ctxt, stream);
15552 return (xmlDoRead(ctxt, URL, encoding, options, 1));
15553}
15554
15555#define bottom_parser
15556#include "elfgcchack.h"
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette