VirtualBox

source: vbox/trunk/src/libs/libxml2-2.6.31/parser.c@ 39915

Last change on this file since 39915 was 39915, checked in by vboxsync, 13 years ago

libxml-2.6.31 unmodified

  • Property svn:eol-style set to native
File size: 366.7 KB
Line 
1/*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
15 * high level APIs to call the parser and a few miscellaneous functions.
16 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
25 * from the SAX callbacks or as standalone functions using a preparsed
26 * document.
27 *
28 * See Copyright for the status of this software.
29 *
30 * [email protected]
31 */
32
33#define IN_LIBXML
34#include "libxml.h"
35
36#if defined(WIN32) && !defined (__CYGWIN__)
37#define XML_DIR_SEP '\\'
38#else
39#define XML_DIR_SEP '/'
40#endif
41
42#include <stdlib.h>
43#include <string.h>
44#include <stdarg.h>
45#include <libxml/xmlmemory.h>
46#include <libxml/threads.h>
47#include <libxml/globals.h>
48#include <libxml/tree.h>
49#include <libxml/parser.h>
50#include <libxml/parserInternals.h>
51#include <libxml/valid.h>
52#include <libxml/entities.h>
53#include <libxml/xmlerror.h>
54#include <libxml/encoding.h>
55#include <libxml/xmlIO.h>
56#include <libxml/uri.h>
57#ifdef LIBXML_CATALOG_ENABLED
58#include <libxml/catalog.h>
59#endif
60#ifdef LIBXML_SCHEMAS_ENABLED
61#include <libxml/xmlschemastypes.h>
62#include <libxml/relaxng.h>
63#endif
64#ifdef HAVE_CTYPE_H
65#include <ctype.h>
66#endif
67#ifdef HAVE_STDLIB_H
68#include <stdlib.h>
69#endif
70#ifdef HAVE_SYS_STAT_H
71#include <sys/stat.h>
72#endif
73#ifdef HAVE_FCNTL_H
74#include <fcntl.h>
75#endif
76#ifdef HAVE_UNISTD_H
77#include <unistd.h>
78#endif
79#ifdef HAVE_ZLIB_H
80#include <zlib.h>
81#endif
82
83/**
84 * xmlParserMaxDepth:
85 *
86 * arbitrary depth limit for the XML documents that we allow to
87 * process. This is not a limitation of the parser but a safety
88 * boundary feature.
89 */
90unsigned int xmlParserMaxDepth = 1024;
91
92#define SAX2 1
93
94#define XML_PARSER_BIG_BUFFER_SIZE 300
95#define XML_PARSER_BUFFER_SIZE 100
96
97#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
98
99/*
100 * List of XML prefixed PI allowed by W3C specs
101 */
102
103static const char *xmlW3CPIs[] = {
104 "xml-stylesheet",
105 NULL
106};
107
108
109/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
110xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
111 const xmlChar **str);
112
113static xmlParserErrors
114xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
115 xmlSAXHandlerPtr sax,
116 void *user_data, int depth, const xmlChar *URL,
117 const xmlChar *ID, xmlNodePtr *list);
118
119#ifdef LIBXML_LEGACY_ENABLED
120static void
121xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
122 xmlNodePtr lastNode);
123#endif /* LIBXML_LEGACY_ENABLED */
124
125static xmlParserErrors
126xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
127 const xmlChar *string, void *user_data, xmlNodePtr *lst);
128
129/************************************************************************
130 * *
131 * Some factorized error routines *
132 * *
133 ************************************************************************/
134
135/**
136 * xmlErrAttributeDup:
137 * @ctxt: an XML parser context
138 * @prefix: the attribute prefix
139 * @localname: the attribute localname
140 *
141 * Handle a redefinition of attribute error
142 */
143static void
144xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
145 const xmlChar * localname)
146{
147 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
148 (ctxt->instate == XML_PARSER_EOF))
149 return;
150 if (ctxt != NULL)
151 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
152 if (prefix == NULL)
153 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
154 ctxt->errNo, XML_ERR_FATAL, NULL, 0,
155 (const char *) localname, NULL, NULL, 0, 0,
156 "Attribute %s redefined\n", localname);
157 else
158 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
159 ctxt->errNo, XML_ERR_FATAL, NULL, 0,
160 (const char *) prefix, (const char *) localname,
161 NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
162 localname);
163 if (ctxt != NULL) {
164 ctxt->wellFormed = 0;
165 if (ctxt->recovery == 0)
166 ctxt->disableSAX = 1;
167 }
168}
169
170/**
171 * xmlFatalErr:
172 * @ctxt: an XML parser context
173 * @error: the error number
174 * @extra: extra information string
175 *
176 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
177 */
178static void
179xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
180{
181 const char *errmsg;
182
183 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
184 (ctxt->instate == XML_PARSER_EOF))
185 return;
186 switch (error) {
187 case XML_ERR_INVALID_HEX_CHARREF:
188 errmsg = "CharRef: invalid hexadecimal value\n";
189 break;
190 case XML_ERR_INVALID_DEC_CHARREF:
191 errmsg = "CharRef: invalid decimal value\n";
192 break;
193 case XML_ERR_INVALID_CHARREF:
194 errmsg = "CharRef: invalid value\n";
195 break;
196 case XML_ERR_INTERNAL_ERROR:
197 errmsg = "internal error";
198 break;
199 case XML_ERR_PEREF_AT_EOF:
200 errmsg = "PEReference at end of document\n";
201 break;
202 case XML_ERR_PEREF_IN_PROLOG:
203 errmsg = "PEReference in prolog\n";
204 break;
205 case XML_ERR_PEREF_IN_EPILOG:
206 errmsg = "PEReference in epilog\n";
207 break;
208 case XML_ERR_PEREF_NO_NAME:
209 errmsg = "PEReference: no name\n";
210 break;
211 case XML_ERR_PEREF_SEMICOL_MISSING:
212 errmsg = "PEReference: expecting ';'\n";
213 break;
214 case XML_ERR_ENTITY_LOOP:
215 errmsg = "Detected an entity reference loop\n";
216 break;
217 case XML_ERR_ENTITY_NOT_STARTED:
218 errmsg = "EntityValue: \" or ' expected\n";
219 break;
220 case XML_ERR_ENTITY_PE_INTERNAL:
221 errmsg = "PEReferences forbidden in internal subset\n";
222 break;
223 case XML_ERR_ENTITY_NOT_FINISHED:
224 errmsg = "EntityValue: \" or ' expected\n";
225 break;
226 case XML_ERR_ATTRIBUTE_NOT_STARTED:
227 errmsg = "AttValue: \" or ' expected\n";
228 break;
229 case XML_ERR_LT_IN_ATTRIBUTE:
230 errmsg = "Unescaped '<' not allowed in attributes values\n";
231 break;
232 case XML_ERR_LITERAL_NOT_STARTED:
233 errmsg = "SystemLiteral \" or ' expected\n";
234 break;
235 case XML_ERR_LITERAL_NOT_FINISHED:
236 errmsg = "Unfinished System or Public ID \" or ' expected\n";
237 break;
238 case XML_ERR_MISPLACED_CDATA_END:
239 errmsg = "Sequence ']]>' not allowed in content\n";
240 break;
241 case XML_ERR_URI_REQUIRED:
242 errmsg = "SYSTEM or PUBLIC, the URI is missing\n";
243 break;
244 case XML_ERR_PUBID_REQUIRED:
245 errmsg = "PUBLIC, the Public Identifier is missing\n";
246 break;
247 case XML_ERR_HYPHEN_IN_COMMENT:
248 errmsg = "Comment must not contain '--' (double-hyphen)\n";
249 break;
250 case XML_ERR_PI_NOT_STARTED:
251 errmsg = "xmlParsePI : no target name\n";
252 break;
253 case XML_ERR_RESERVED_XML_NAME:
254 errmsg = "Invalid PI name\n";
255 break;
256 case XML_ERR_NOTATION_NOT_STARTED:
257 errmsg = "NOTATION: Name expected here\n";
258 break;
259 case XML_ERR_NOTATION_NOT_FINISHED:
260 errmsg = "'>' required to close NOTATION declaration\n";
261 break;
262 case XML_ERR_VALUE_REQUIRED:
263 errmsg = "Entity value required\n";
264 break;
265 case XML_ERR_URI_FRAGMENT:
266 errmsg = "Fragment not allowed";
267 break;
268 case XML_ERR_ATTLIST_NOT_STARTED:
269 errmsg = "'(' required to start ATTLIST enumeration\n";
270 break;
271 case XML_ERR_NMTOKEN_REQUIRED:
272 errmsg = "NmToken expected in ATTLIST enumeration\n";
273 break;
274 case XML_ERR_ATTLIST_NOT_FINISHED:
275 errmsg = "')' required to finish ATTLIST enumeration\n";
276 break;
277 case XML_ERR_MIXED_NOT_STARTED:
278 errmsg = "MixedContentDecl : '|' or ')*' expected\n";
279 break;
280 case XML_ERR_PCDATA_REQUIRED:
281 errmsg = "MixedContentDecl : '#PCDATA' expected\n";
282 break;
283 case XML_ERR_ELEMCONTENT_NOT_STARTED:
284 errmsg = "ContentDecl : Name or '(' expected\n";
285 break;
286 case XML_ERR_ELEMCONTENT_NOT_FINISHED:
287 errmsg = "ContentDecl : ',' '|' or ')' expected\n";
288 break;
289 case XML_ERR_PEREF_IN_INT_SUBSET:
290 errmsg =
291 "PEReference: forbidden within markup decl in internal subset\n";
292 break;
293 case XML_ERR_GT_REQUIRED:
294 errmsg = "expected '>'\n";
295 break;
296 case XML_ERR_CONDSEC_INVALID:
297 errmsg = "XML conditional section '[' expected\n";
298 break;
299 case XML_ERR_EXT_SUBSET_NOT_FINISHED:
300 errmsg = "Content error in the external subset\n";
301 break;
302 case XML_ERR_CONDSEC_INVALID_KEYWORD:
303 errmsg =
304 "conditional section INCLUDE or IGNORE keyword expected\n";
305 break;
306 case XML_ERR_CONDSEC_NOT_FINISHED:
307 errmsg = "XML conditional section not closed\n";
308 break;
309 case XML_ERR_XMLDECL_NOT_STARTED:
310 errmsg = "Text declaration '<?xml' required\n";
311 break;
312 case XML_ERR_XMLDECL_NOT_FINISHED:
313 errmsg = "parsing XML declaration: '?>' expected\n";
314 break;
315 case XML_ERR_EXT_ENTITY_STANDALONE:
316 errmsg = "external parsed entities cannot be standalone\n";
317 break;
318 case XML_ERR_ENTITYREF_SEMICOL_MISSING:
319 errmsg = "EntityRef: expecting ';'\n";
320 break;
321 case XML_ERR_DOCTYPE_NOT_FINISHED:
322 errmsg = "DOCTYPE improperly terminated\n";
323 break;
324 case XML_ERR_LTSLASH_REQUIRED:
325 errmsg = "EndTag: '</' not found\n";
326 break;
327 case XML_ERR_EQUAL_REQUIRED:
328 errmsg = "expected '='\n";
329 break;
330 case XML_ERR_STRING_NOT_CLOSED:
331 errmsg = "String not closed expecting \" or '\n";
332 break;
333 case XML_ERR_STRING_NOT_STARTED:
334 errmsg = "String not started expecting ' or \"\n";
335 break;
336 case XML_ERR_ENCODING_NAME:
337 errmsg = "Invalid XML encoding name\n";
338 break;
339 case XML_ERR_STANDALONE_VALUE:
340 errmsg = "standalone accepts only 'yes' or 'no'\n";
341 break;
342 case XML_ERR_DOCUMENT_EMPTY:
343 errmsg = "Document is empty\n";
344 break;
345 case XML_ERR_DOCUMENT_END:
346 errmsg = "Extra content at the end of the document\n";
347 break;
348 case XML_ERR_NOT_WELL_BALANCED:
349 errmsg = "chunk is not well balanced\n";
350 break;
351 case XML_ERR_EXTRA_CONTENT:
352 errmsg = "extra content at the end of well balanced chunk\n";
353 break;
354 case XML_ERR_VERSION_MISSING:
355 errmsg = "Malformed declaration expecting version\n";
356 break;
357#if 0
358 case:
359 errmsg = "\n";
360 break;
361#endif
362 default:
363 errmsg = "Unregistered error message\n";
364 }
365 if (ctxt != NULL)
366 ctxt->errNo = error;
367 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
368 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, errmsg,
369 info);
370 if (ctxt != NULL) {
371 ctxt->wellFormed = 0;
372 if (ctxt->recovery == 0)
373 ctxt->disableSAX = 1;
374 }
375}
376
377/**
378 * xmlFatalErrMsg:
379 * @ctxt: an XML parser context
380 * @error: the error number
381 * @msg: the error message
382 *
383 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
384 */
385static void
386xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
387 const char *msg)
388{
389 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
390 (ctxt->instate == XML_PARSER_EOF))
391 return;
392 if (ctxt != NULL)
393 ctxt->errNo = error;
394 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
395 XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, msg);
396 if (ctxt != NULL) {
397 ctxt->wellFormed = 0;
398 if (ctxt->recovery == 0)
399 ctxt->disableSAX = 1;
400 }
401}
402
403/**
404 * xmlWarningMsg:
405 * @ctxt: an XML parser context
406 * @error: the error number
407 * @msg: the error message
408 * @str1: extra data
409 * @str2: extra data
410 *
411 * Handle a warning.
412 */
413static void
414xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
415 const char *msg, const xmlChar *str1, const xmlChar *str2)
416{
417 xmlStructuredErrorFunc schannel = NULL;
418
419 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
420 (ctxt->instate == XML_PARSER_EOF))
421 return;
422 if ((ctxt != NULL) && (ctxt->sax != NULL) &&
423 (ctxt->sax->initialized == XML_SAX2_MAGIC))
424 schannel = ctxt->sax->serror;
425 __xmlRaiseError(schannel,
426 (ctxt->sax) ? ctxt->sax->warning : NULL,
427 ctxt->userData,
428 ctxt, NULL, XML_FROM_PARSER, error,
429 XML_ERR_WARNING, NULL, 0,
430 (const char *) str1, (const char *) str2, NULL, 0, 0,
431 msg, (const char *) str1, (const char *) str2);
432}
433
434/**
435 * xmlValidityError:
436 * @ctxt: an XML parser context
437 * @error: the error number
438 * @msg: the error message
439 * @str1: extra data
440 *
441 * Handle a validity error.
442 */
443static void
444xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
445 const char *msg, const xmlChar *str1)
446{
447 xmlStructuredErrorFunc schannel = NULL;
448
449 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
450 (ctxt->instate == XML_PARSER_EOF))
451 return;
452 if (ctxt != NULL) {
453 ctxt->errNo = error;
454 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
455 schannel = ctxt->sax->serror;
456 }
457 __xmlRaiseError(schannel,
458 ctxt->vctxt.error, ctxt->vctxt.userData,
459 ctxt, NULL, XML_FROM_DTD, error,
460 XML_ERR_ERROR, NULL, 0, (const char *) str1,
461 NULL, NULL, 0, 0,
462 msg, (const char *) str1);
463 if (ctxt != NULL) {
464 ctxt->valid = 0;
465 }
466}
467
468/**
469 * xmlFatalErrMsgInt:
470 * @ctxt: an XML parser context
471 * @error: the error number
472 * @msg: the error message
473 * @val: an integer value
474 *
475 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
476 */
477static void
478xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
479 const char *msg, int val)
480{
481 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
482 (ctxt->instate == XML_PARSER_EOF))
483 return;
484 if (ctxt != NULL)
485 ctxt->errNo = error;
486 __xmlRaiseError(NULL, NULL, NULL,
487 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
488 NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
489 if (ctxt != NULL) {
490 ctxt->wellFormed = 0;
491 if (ctxt->recovery == 0)
492 ctxt->disableSAX = 1;
493 }
494}
495
496/**
497 * xmlFatalErrMsgStrIntStr:
498 * @ctxt: an XML parser context
499 * @error: the error number
500 * @msg: the error message
501 * @str1: an string info
502 * @val: an integer value
503 * @str2: an string info
504 *
505 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
506 */
507static void
508xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
509 const char *msg, const xmlChar *str1, int val,
510 const xmlChar *str2)
511{
512 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
513 (ctxt->instate == XML_PARSER_EOF))
514 return;
515 if (ctxt != NULL)
516 ctxt->errNo = error;
517 __xmlRaiseError(NULL, NULL, NULL,
518 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
519 NULL, 0, (const char *) str1, (const char *) str2,
520 NULL, val, 0, msg, str1, val, str2);
521 if (ctxt != NULL) {
522 ctxt->wellFormed = 0;
523 if (ctxt->recovery == 0)
524 ctxt->disableSAX = 1;
525 }
526}
527
528/**
529 * xmlFatalErrMsgStr:
530 * @ctxt: an XML parser context
531 * @error: the error number
532 * @msg: the error message
533 * @val: a string value
534 *
535 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
536 */
537static void
538xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
539 const char *msg, const xmlChar * val)
540{
541 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
542 (ctxt->instate == XML_PARSER_EOF))
543 return;
544 if (ctxt != NULL)
545 ctxt->errNo = error;
546 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
547 XML_FROM_PARSER, error, XML_ERR_FATAL,
548 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
549 val);
550 if (ctxt != NULL) {
551 ctxt->wellFormed = 0;
552 if (ctxt->recovery == 0)
553 ctxt->disableSAX = 1;
554 }
555}
556
557/**
558 * xmlErrMsgStr:
559 * @ctxt: an XML parser context
560 * @error: the error number
561 * @msg: the error message
562 * @val: a string value
563 *
564 * Handle a non fatal parser error
565 */
566static void
567xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
568 const char *msg, const xmlChar * val)
569{
570 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
571 (ctxt->instate == XML_PARSER_EOF))
572 return;
573 if (ctxt != NULL)
574 ctxt->errNo = error;
575 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
576 XML_FROM_PARSER, error, XML_ERR_ERROR,
577 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
578 val);
579}
580
581/**
582 * xmlNsErr:
583 * @ctxt: an XML parser context
584 * @error: the error number
585 * @msg: the message
586 * @info1: extra information string
587 * @info2: extra information string
588 *
589 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
590 */
591static void
592xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
593 const char *msg,
594 const xmlChar * info1, const xmlChar * info2,
595 const xmlChar * info3)
596{
597 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
598 (ctxt->instate == XML_PARSER_EOF))
599 return;
600 if (ctxt != NULL)
601 ctxt->errNo = error;
602 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
603 XML_ERR_ERROR, NULL, 0, (const char *) info1,
604 (const char *) info2, (const char *) info3, 0, 0, msg,
605 info1, info2, info3);
606 if (ctxt != NULL)
607 ctxt->nsWellFormed = 0;
608}
609
610/************************************************************************
611 * *
612 * Library wide options *
613 * *
614 ************************************************************************/
615
616/**
617 * xmlHasFeature:
618 * @feature: the feature to be examined
619 *
620 * Examines if the library has been compiled with a given feature.
621 *
622 * Returns a non-zero value if the feature exist, otherwise zero.
623 * Returns zero (0) if the feature does not exist or an unknown
624 * unknown feature is requested, non-zero otherwise.
625 */
626int
627xmlHasFeature(xmlFeature feature)
628{
629 switch (feature) {
630 case XML_WITH_THREAD:
631#ifdef LIBXML_THREAD_ENABLED
632 return(1);
633#else
634 return(0);
635#endif
636 case XML_WITH_TREE:
637#ifdef LIBXML_TREE_ENABLED
638 return(1);
639#else
640 return(0);
641#endif
642 case XML_WITH_OUTPUT:
643#ifdef LIBXML_OUTPUT_ENABLED
644 return(1);
645#else
646 return(0);
647#endif
648 case XML_WITH_PUSH:
649#ifdef LIBXML_PUSH_ENABLED
650 return(1);
651#else
652 return(0);
653#endif
654 case XML_WITH_READER:
655#ifdef LIBXML_READER_ENABLED
656 return(1);
657#else
658 return(0);
659#endif
660 case XML_WITH_PATTERN:
661#ifdef LIBXML_PATTERN_ENABLED
662 return(1);
663#else
664 return(0);
665#endif
666 case XML_WITH_WRITER:
667#ifdef LIBXML_WRITER_ENABLED
668 return(1);
669#else
670 return(0);
671#endif
672 case XML_WITH_SAX1:
673#ifdef LIBXML_SAX1_ENABLED
674 return(1);
675#else
676 return(0);
677#endif
678 case XML_WITH_FTP:
679#ifdef LIBXML_FTP_ENABLED
680 return(1);
681#else
682 return(0);
683#endif
684 case XML_WITH_HTTP:
685#ifdef LIBXML_HTTP_ENABLED
686 return(1);
687#else
688 return(0);
689#endif
690 case XML_WITH_VALID:
691#ifdef LIBXML_VALID_ENABLED
692 return(1);
693#else
694 return(0);
695#endif
696 case XML_WITH_HTML:
697#ifdef LIBXML_HTML_ENABLED
698 return(1);
699#else
700 return(0);
701#endif
702 case XML_WITH_LEGACY:
703#ifdef LIBXML_LEGACY_ENABLED
704 return(1);
705#else
706 return(0);
707#endif
708 case XML_WITH_C14N:
709#ifdef LIBXML_C14N_ENABLED
710 return(1);
711#else
712 return(0);
713#endif
714 case XML_WITH_CATALOG:
715#ifdef LIBXML_CATALOG_ENABLED
716 return(1);
717#else
718 return(0);
719#endif
720 case XML_WITH_XPATH:
721#ifdef LIBXML_XPATH_ENABLED
722 return(1);
723#else
724 return(0);
725#endif
726 case XML_WITH_XPTR:
727#ifdef LIBXML_XPTR_ENABLED
728 return(1);
729#else
730 return(0);
731#endif
732 case XML_WITH_XINCLUDE:
733#ifdef LIBXML_XINCLUDE_ENABLED
734 return(1);
735#else
736 return(0);
737#endif
738 case XML_WITH_ICONV:
739#ifdef LIBXML_ICONV_ENABLED
740 return(1);
741#else
742 return(0);
743#endif
744 case XML_WITH_ISO8859X:
745#ifdef LIBXML_ISO8859X_ENABLED
746 return(1);
747#else
748 return(0);
749#endif
750 case XML_WITH_UNICODE:
751#ifdef LIBXML_UNICODE_ENABLED
752 return(1);
753#else
754 return(0);
755#endif
756 case XML_WITH_REGEXP:
757#ifdef LIBXML_REGEXP_ENABLED
758 return(1);
759#else
760 return(0);
761#endif
762 case XML_WITH_AUTOMATA:
763#ifdef LIBXML_AUTOMATA_ENABLED
764 return(1);
765#else
766 return(0);
767#endif
768 case XML_WITH_EXPR:
769#ifdef LIBXML_EXPR_ENABLED
770 return(1);
771#else
772 return(0);
773#endif
774 case XML_WITH_SCHEMAS:
775#ifdef LIBXML_SCHEMAS_ENABLED
776 return(1);
777#else
778 return(0);
779#endif
780 case XML_WITH_SCHEMATRON:
781#ifdef LIBXML_SCHEMATRON_ENABLED
782 return(1);
783#else
784 return(0);
785#endif
786 case XML_WITH_MODULES:
787#ifdef LIBXML_MODULES_ENABLED
788 return(1);
789#else
790 return(0);
791#endif
792 case XML_WITH_DEBUG:
793#ifdef LIBXML_DEBUG_ENABLED
794 return(1);
795#else
796 return(0);
797#endif
798 case XML_WITH_DEBUG_MEM:
799#ifdef DEBUG_MEMORY_LOCATION
800 return(1);
801#else
802 return(0);
803#endif
804 case XML_WITH_DEBUG_RUN:
805#ifdef LIBXML_DEBUG_RUNTIME
806 return(1);
807#else
808 return(0);
809#endif
810 case XML_WITH_ZLIB:
811#ifdef LIBXML_ZLIB_ENABLED
812 return(1);
813#else
814 return(0);
815#endif
816 default:
817 break;
818 }
819 return(0);
820}
821
822/************************************************************************
823 * *
824 * SAX2 defaulted attributes handling *
825 * *
826 ************************************************************************/
827
828/**
829 * xmlDetectSAX2:
830 * @ctxt: an XML parser context
831 *
832 * Do the SAX2 detection and specific intialization
833 */
834static void
835xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
836 if (ctxt == NULL) return;
837#ifdef LIBXML_SAX1_ENABLED
838 if ((ctxt->sax) && (ctxt->sax->initialized == XML_SAX2_MAGIC) &&
839 ((ctxt->sax->startElementNs != NULL) ||
840 (ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1;
841#else
842 ctxt->sax2 = 1;
843#endif /* LIBXML_SAX1_ENABLED */
844
845 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
846 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
847 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
848 if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
849 (ctxt->str_xml_ns == NULL)) {
850 xmlErrMemory(ctxt, NULL);
851 }
852}
853
854typedef struct _xmlDefAttrs xmlDefAttrs;
855typedef xmlDefAttrs *xmlDefAttrsPtr;
856struct _xmlDefAttrs {
857 int nbAttrs; /* number of defaulted attributes on that element */
858 int maxAttrs; /* the size of the array */
859 const xmlChar *values[4]; /* array of localname/prefix/values */
860};
861
862/**
863 * xmlAddDefAttrs:
864 * @ctxt: an XML parser context
865 * @fullname: the element fullname
866 * @fullattr: the attribute fullname
867 * @value: the attribute value
868 *
869 * Add a defaulted attribute for an element
870 */
871static void
872xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
873 const xmlChar *fullname,
874 const xmlChar *fullattr,
875 const xmlChar *value) {
876 xmlDefAttrsPtr defaults;
877 int len;
878 const xmlChar *name;
879 const xmlChar *prefix;
880
881 if (ctxt->attsDefault == NULL) {
882 ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
883 if (ctxt->attsDefault == NULL)
884 goto mem_error;
885 }
886
887 /*
888 * split the element name into prefix:localname , the string found
889 * are within the DTD and then not associated to namespace names.
890 */
891 name = xmlSplitQName3(fullname, &len);
892 if (name == NULL) {
893 name = xmlDictLookup(ctxt->dict, fullname, -1);
894 prefix = NULL;
895 } else {
896 name = xmlDictLookup(ctxt->dict, name, -1);
897 prefix = xmlDictLookup(ctxt->dict, fullname, len);
898 }
899
900 /*
901 * make sure there is some storage
902 */
903 defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
904 if (defaults == NULL) {
905 defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
906 (4 * 4) * sizeof(const xmlChar *));
907 if (defaults == NULL)
908 goto mem_error;
909 defaults->nbAttrs = 0;
910 defaults->maxAttrs = 4;
911 xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, defaults, NULL);
912 } else if (defaults->nbAttrs >= defaults->maxAttrs) {
913 xmlDefAttrsPtr temp;
914
915 temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
916 (2 * defaults->maxAttrs * 4) * sizeof(const xmlChar *));
917 if (temp == NULL)
918 goto mem_error;
919 defaults = temp;
920 defaults->maxAttrs *= 2;
921 xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, defaults, NULL);
922 }
923
924 /*
925 * Split the element name into prefix:localname , the string found
926 * are within the DTD and hen not associated to namespace names.
927 */
928 name = xmlSplitQName3(fullattr, &len);
929 if (name == NULL) {
930 name = xmlDictLookup(ctxt->dict, fullattr, -1);
931 prefix = NULL;
932 } else {
933 name = xmlDictLookup(ctxt->dict, name, -1);
934 prefix = xmlDictLookup(ctxt->dict, fullattr, len);
935 }
936
937 defaults->values[4 * defaults->nbAttrs] = name;
938 defaults->values[4 * defaults->nbAttrs + 1] = prefix;
939 /* intern the string and precompute the end */
940 len = xmlStrlen(value);
941 value = xmlDictLookup(ctxt->dict, value, len);
942 defaults->values[4 * defaults->nbAttrs + 2] = value;
943 defaults->values[4 * defaults->nbAttrs + 3] = value + len;
944 defaults->nbAttrs++;
945
946 return;
947
948mem_error:
949 xmlErrMemory(ctxt, NULL);
950 return;
951}
952
953/**
954 * xmlAddSpecialAttr:
955 * @ctxt: an XML parser context
956 * @fullname: the element fullname
957 * @fullattr: the attribute fullname
958 * @type: the attribute type
959 *
960 * Register this attribute type
961 */
962static void
963xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
964 const xmlChar *fullname,
965 const xmlChar *fullattr,
966 int type)
967{
968 if (ctxt->attsSpecial == NULL) {
969 ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
970 if (ctxt->attsSpecial == NULL)
971 goto mem_error;
972 }
973
974 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
975 return;
976
977 xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
978 (void *) (long) type);
979 return;
980
981mem_error:
982 xmlErrMemory(ctxt, NULL);
983 return;
984}
985
986/**
987 * xmlCleanSpecialAttrCallback:
988 *
989 * Removes CDATA attributes from the special attribute table
990 */
991static void
992xmlCleanSpecialAttrCallback(void *payload, void *data,
993 const xmlChar *fullname, const xmlChar *fullattr,
994 const xmlChar *unused ATTRIBUTE_UNUSED) {
995 xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
996
997 if (((long) payload) == XML_ATTRIBUTE_CDATA) {
998 xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
999 }
1000}
1001
1002/**
1003 * xmlCleanSpecialAttr:
1004 * @ctxt: an XML parser context
1005 *
1006 * Trim the list of attributes defined to remove all those of type
1007 * CDATA as they are not special. This call should be done when finishing
1008 * to parse the DTD and before starting to parse the document root.
1009 */
1010static void
1011xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1012{
1013 if (ctxt->attsSpecial == NULL)
1014 return;
1015
1016 xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1017
1018 if (xmlHashSize(ctxt->attsSpecial) == 0) {
1019 xmlHashFree(ctxt->attsSpecial, NULL);
1020 ctxt->attsSpecial = NULL;
1021 }
1022 return;
1023}
1024
1025/**
1026 * xmlCheckLanguageID:
1027 * @lang: pointer to the string value
1028 *
1029 * Checks that the value conforms to the LanguageID production:
1030 *
1031 * NOTE: this is somewhat deprecated, those productions were removed from
1032 * the XML Second edition.
1033 *
1034 * [33] LanguageID ::= Langcode ('-' Subcode)*
1035 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
1036 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1037 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1038 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1039 * [38] Subcode ::= ([a-z] | [A-Z])+
1040 *
1041 * Returns 1 if correct 0 otherwise
1042 **/
1043int
1044xmlCheckLanguageID(const xmlChar * lang)
1045{
1046 const xmlChar *cur = lang;
1047
1048 if (cur == NULL)
1049 return (0);
1050 if (((cur[0] == 'i') && (cur[1] == '-')) ||
1051 ((cur[0] == 'I') && (cur[1] == '-'))) {
1052 /*
1053 * IANA code
1054 */
1055 cur += 2;
1056 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
1057 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1058 cur++;
1059 } else if (((cur[0] == 'x') && (cur[1] == '-')) ||
1060 ((cur[0] == 'X') && (cur[1] == '-'))) {
1061 /*
1062 * User code
1063 */
1064 cur += 2;
1065 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
1066 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1067 cur++;
1068 } else if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1069 ((cur[0] >= 'a') && (cur[0] <= 'z'))) {
1070 /*
1071 * ISO639
1072 */
1073 cur++;
1074 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1075 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1076 cur++;
1077 else
1078 return (0);
1079 } else
1080 return (0);
1081 while (cur[0] != 0) { /* non input consuming */
1082 if (cur[0] != '-')
1083 return (0);
1084 cur++;
1085 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1086 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1087 cur++;
1088 else
1089 return (0);
1090 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
1091 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1092 cur++;
1093 }
1094 return (1);
1095}
1096
1097/************************************************************************
1098 * *
1099 * Parser stacks related functions and macros *
1100 * *
1101 ************************************************************************/
1102
1103xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
1104 const xmlChar ** str);
1105
1106#ifdef SAX2
1107/**
1108 * nsPush:
1109 * @ctxt: an XML parser context
1110 * @prefix: the namespace prefix or NULL
1111 * @URL: the namespace name
1112 *
1113 * Pushes a new parser namespace on top of the ns stack
1114 *
1115 * Returns -1 in case of error, -2 if the namespace should be discarded
1116 * and the index in the stack otherwise.
1117 */
1118static int
1119nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
1120{
1121 if (ctxt->options & XML_PARSE_NSCLEAN) {
1122 int i;
1123 for (i = 0;i < ctxt->nsNr;i += 2) {
1124 if (ctxt->nsTab[i] == prefix) {
1125 /* in scope */
1126 if (ctxt->nsTab[i + 1] == URL)
1127 return(-2);
1128 /* out of scope keep it */
1129 break;
1130 }
1131 }
1132 }
1133 if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
1134 ctxt->nsMax = 10;
1135 ctxt->nsNr = 0;
1136 ctxt->nsTab = (const xmlChar **)
1137 xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
1138 if (ctxt->nsTab == NULL) {
1139 xmlErrMemory(ctxt, NULL);
1140 ctxt->nsMax = 0;
1141 return (-1);
1142 }
1143 } else if (ctxt->nsNr >= ctxt->nsMax) {
1144 ctxt->nsMax *= 2;
1145 ctxt->nsTab = (const xmlChar **)
1146 xmlRealloc((char *) ctxt->nsTab,
1147 ctxt->nsMax * sizeof(ctxt->nsTab[0]));
1148 if (ctxt->nsTab == NULL) {
1149 xmlErrMemory(ctxt, NULL);
1150 ctxt->nsMax /= 2;
1151 return (-1);
1152 }
1153 }
1154 ctxt->nsTab[ctxt->nsNr++] = prefix;
1155 ctxt->nsTab[ctxt->nsNr++] = URL;
1156 return (ctxt->nsNr);
1157}
1158/**
1159 * nsPop:
1160 * @ctxt: an XML parser context
1161 * @nr: the number to pop
1162 *
1163 * Pops the top @nr parser prefix/namespace from the ns stack
1164 *
1165 * Returns the number of namespaces removed
1166 */
1167static int
1168nsPop(xmlParserCtxtPtr ctxt, int nr)
1169{
1170 int i;
1171
1172 if (ctxt->nsTab == NULL) return(0);
1173 if (ctxt->nsNr < nr) {
1174 xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
1175 nr = ctxt->nsNr;
1176 }
1177 if (ctxt->nsNr <= 0)
1178 return (0);
1179
1180 for (i = 0;i < nr;i++) {
1181 ctxt->nsNr--;
1182 ctxt->nsTab[ctxt->nsNr] = NULL;
1183 }
1184 return(nr);
1185}
1186#endif
1187
1188static int
1189xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1190 const xmlChar **atts;
1191 int *attallocs;
1192 int maxatts;
1193
1194 if (ctxt->atts == NULL) {
1195 maxatts = 55; /* allow for 10 attrs by default */
1196 atts = (const xmlChar **)
1197 xmlMalloc(maxatts * sizeof(xmlChar *));
1198 if (atts == NULL) goto mem_error;
1199 ctxt->atts = atts;
1200 attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
1201 if (attallocs == NULL) goto mem_error;
1202 ctxt->attallocs = attallocs;
1203 ctxt->maxatts = maxatts;
1204 } else if (nr + 5 > ctxt->maxatts) {
1205 maxatts = (nr + 5) * 2;
1206 atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
1207 maxatts * sizeof(const xmlChar *));
1208 if (atts == NULL) goto mem_error;
1209 ctxt->atts = atts;
1210 attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
1211 (maxatts / 5) * sizeof(int));
1212 if (attallocs == NULL) goto mem_error;
1213 ctxt->attallocs = attallocs;
1214 ctxt->maxatts = maxatts;
1215 }
1216 return(ctxt->maxatts);
1217mem_error:
1218 xmlErrMemory(ctxt, NULL);
1219 return(-1);
1220}
1221
1222/**
1223 * inputPush:
1224 * @ctxt: an XML parser context
1225 * @value: the parser input
1226 *
1227 * Pushes a new parser input on top of the input stack
1228 *
1229 * Returns 0 in case of error, the index in the stack otherwise
1230 */
1231int
1232inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1233{
1234 if ((ctxt == NULL) || (value == NULL))
1235 return(0);
1236 if (ctxt->inputNr >= ctxt->inputMax) {
1237 ctxt->inputMax *= 2;
1238 ctxt->inputTab =
1239 (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1240 ctxt->inputMax *
1241 sizeof(ctxt->inputTab[0]));
1242 if (ctxt->inputTab == NULL) {
1243 xmlErrMemory(ctxt, NULL);
1244 return (0);
1245 }
1246 }
1247 ctxt->inputTab[ctxt->inputNr] = value;
1248 ctxt->input = value;
1249 return (ctxt->inputNr++);
1250}
1251/**
1252 * inputPop:
1253 * @ctxt: an XML parser context
1254 *
1255 * Pops the top parser input from the input stack
1256 *
1257 * Returns the input just removed
1258 */
1259xmlParserInputPtr
1260inputPop(xmlParserCtxtPtr ctxt)
1261{
1262 xmlParserInputPtr ret;
1263
1264 if (ctxt == NULL)
1265 return(NULL);
1266 if (ctxt->inputNr <= 0)
1267 return (NULL);
1268 ctxt->inputNr--;
1269 if (ctxt->inputNr > 0)
1270 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1271 else
1272 ctxt->input = NULL;
1273 ret = ctxt->inputTab[ctxt->inputNr];
1274 ctxt->inputTab[ctxt->inputNr] = NULL;
1275 return (ret);
1276}
1277/**
1278 * nodePush:
1279 * @ctxt: an XML parser context
1280 * @value: the element node
1281 *
1282 * Pushes a new element node on top of the node stack
1283 *
1284 * Returns 0 in case of error, the index in the stack otherwise
1285 */
1286int
1287nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1288{
1289 if (ctxt == NULL) return(0);
1290 if (ctxt->nodeNr >= ctxt->nodeMax) {
1291 xmlNodePtr *tmp;
1292
1293 tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1294 ctxt->nodeMax * 2 *
1295 sizeof(ctxt->nodeTab[0]));
1296 if (tmp == NULL) {
1297 xmlErrMemory(ctxt, NULL);
1298 return (0);
1299 }
1300 ctxt->nodeTab = tmp;
1301 ctxt->nodeMax *= 2;
1302 }
1303 if (((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) {
1304 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
1305 "Excessive depth in document: change xmlParserMaxDepth = %d\n",
1306 xmlParserMaxDepth);
1307 ctxt->instate = XML_PARSER_EOF;
1308 return(0);
1309 }
1310 ctxt->nodeTab[ctxt->nodeNr] = value;
1311 ctxt->node = value;
1312 return (ctxt->nodeNr++);
1313}
1314/**
1315 * nodePop:
1316 * @ctxt: an XML parser context
1317 *
1318 * Pops the top element node from the node stack
1319 *
1320 * Returns the node just removed
1321 */
1322xmlNodePtr
1323nodePop(xmlParserCtxtPtr ctxt)
1324{
1325 xmlNodePtr ret;
1326
1327 if (ctxt == NULL) return(NULL);
1328 if (ctxt->nodeNr <= 0)
1329 return (NULL);
1330 ctxt->nodeNr--;
1331 if (ctxt->nodeNr > 0)
1332 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1333 else
1334 ctxt->node = NULL;
1335 ret = ctxt->nodeTab[ctxt->nodeNr];
1336 ctxt->nodeTab[ctxt->nodeNr] = NULL;
1337 return (ret);
1338}
1339
1340#ifdef LIBXML_PUSH_ENABLED
1341/**
1342 * nameNsPush:
1343 * @ctxt: an XML parser context
1344 * @value: the element name
1345 * @prefix: the element prefix
1346 * @URI: the element namespace name
1347 *
1348 * Pushes a new element name/prefix/URL on top of the name stack
1349 *
1350 * Returns -1 in case of error, the index in the stack otherwise
1351 */
1352static int
1353nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1354 const xmlChar *prefix, const xmlChar *URI, int nsNr)
1355{
1356 if (ctxt->nameNr >= ctxt->nameMax) {
1357 const xmlChar * *tmp;
1358 void **tmp2;
1359 ctxt->nameMax *= 2;
1360 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1361 ctxt->nameMax *
1362 sizeof(ctxt->nameTab[0]));
1363 if (tmp == NULL) {
1364 ctxt->nameMax /= 2;
1365 goto mem_error;
1366 }
1367 ctxt->nameTab = tmp;
1368 tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab,
1369 ctxt->nameMax * 3 *
1370 sizeof(ctxt->pushTab[0]));
1371 if (tmp2 == NULL) {
1372 ctxt->nameMax /= 2;
1373 goto mem_error;
1374 }
1375 ctxt->pushTab = tmp2;
1376 }
1377 ctxt->nameTab[ctxt->nameNr] = value;
1378 ctxt->name = value;
1379 ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix;
1380 ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI;
1381 ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (long) nsNr;
1382 return (ctxt->nameNr++);
1383mem_error:
1384 xmlErrMemory(ctxt, NULL);
1385 return (-1);
1386}
1387/**
1388 * nameNsPop:
1389 * @ctxt: an XML parser context
1390 *
1391 * Pops the top element/prefix/URI name from the name stack
1392 *
1393 * Returns the name just removed
1394 */
1395static const xmlChar *
1396nameNsPop(xmlParserCtxtPtr ctxt)
1397{
1398 const xmlChar *ret;
1399
1400 if (ctxt->nameNr <= 0)
1401 return (NULL);
1402 ctxt->nameNr--;
1403 if (ctxt->nameNr > 0)
1404 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1405 else
1406 ctxt->name = NULL;
1407 ret = ctxt->nameTab[ctxt->nameNr];
1408 ctxt->nameTab[ctxt->nameNr] = NULL;
1409 return (ret);
1410}
1411#endif /* LIBXML_PUSH_ENABLED */
1412
1413/**
1414 * namePush:
1415 * @ctxt: an XML parser context
1416 * @value: the element name
1417 *
1418 * Pushes a new element name on top of the name stack
1419 *
1420 * Returns -1 in case of error, the index in the stack otherwise
1421 */
1422int
1423namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
1424{
1425 if (ctxt == NULL) return (-1);
1426
1427 if (ctxt->nameNr >= ctxt->nameMax) {
1428 const xmlChar * *tmp;
1429 ctxt->nameMax *= 2;
1430 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1431 ctxt->nameMax *
1432 sizeof(ctxt->nameTab[0]));
1433 if (tmp == NULL) {
1434 ctxt->nameMax /= 2;
1435 goto mem_error;
1436 }
1437 ctxt->nameTab = tmp;
1438 }
1439 ctxt->nameTab[ctxt->nameNr] = value;
1440 ctxt->name = value;
1441 return (ctxt->nameNr++);
1442mem_error:
1443 xmlErrMemory(ctxt, NULL);
1444 return (-1);
1445}
1446/**
1447 * namePop:
1448 * @ctxt: an XML parser context
1449 *
1450 * Pops the top element name from the name stack
1451 *
1452 * Returns the name just removed
1453 */
1454const xmlChar *
1455namePop(xmlParserCtxtPtr ctxt)
1456{
1457 const xmlChar *ret;
1458
1459 if ((ctxt == NULL) || (ctxt->nameNr <= 0))
1460 return (NULL);
1461 ctxt->nameNr--;
1462 if (ctxt->nameNr > 0)
1463 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1464 else
1465 ctxt->name = NULL;
1466 ret = ctxt->nameTab[ctxt->nameNr];
1467 ctxt->nameTab[ctxt->nameNr] = NULL;
1468 return (ret);
1469}
1470
1471static int spacePush(xmlParserCtxtPtr ctxt, int val) {
1472 if (ctxt->spaceNr >= ctxt->spaceMax) {
1473 ctxt->spaceMax *= 2;
1474 ctxt->spaceTab = (int *) xmlRealloc(ctxt->spaceTab,
1475 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
1476 if (ctxt->spaceTab == NULL) {
1477 xmlErrMemory(ctxt, NULL);
1478 return(0);
1479 }
1480 }
1481 ctxt->spaceTab[ctxt->spaceNr] = val;
1482 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
1483 return(ctxt->spaceNr++);
1484}
1485
1486static int spacePop(xmlParserCtxtPtr ctxt) {
1487 int ret;
1488 if (ctxt->spaceNr <= 0) return(0);
1489 ctxt->spaceNr--;
1490 if (ctxt->spaceNr > 0)
1491 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
1492 else
1493 ctxt->space = &ctxt->spaceTab[0];
1494 ret = ctxt->spaceTab[ctxt->spaceNr];
1495 ctxt->spaceTab[ctxt->spaceNr] = -1;
1496 return(ret);
1497}
1498
1499/*
1500 * Macros for accessing the content. Those should be used only by the parser,
1501 * and not exported.
1502 *
1503 * Dirty macros, i.e. one often need to make assumption on the context to
1504 * use them
1505 *
1506 * CUR_PTR return the current pointer to the xmlChar to be parsed.
1507 * To be used with extreme caution since operations consuming
1508 * characters may move the input buffer to a different location !
1509 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
1510 * This should be used internally by the parser
1511 * only to compare to ASCII values otherwise it would break when
1512 * running with UTF-8 encoding.
1513 * RAW same as CUR but in the input buffer, bypass any token
1514 * extraction that may have been done
1515 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
1516 * to compare on ASCII based substring.
1517 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
1518 * strings without newlines within the parser.
1519 * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
1520 * defined char within the parser.
1521 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
1522 *
1523 * NEXT Skip to the next character, this does the proper decoding
1524 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
1525 * NEXTL(l) Skip the current unicode character of l xmlChars long.
1526 * CUR_CHAR(l) returns the current unicode character (int), set l
1527 * to the number of xmlChars used for the encoding [0-5].
1528 * CUR_SCHAR same but operate on a string instead of the context
1529 * COPY_BUF copy the current unicode char to the target buffer, increment
1530 * the index
1531 * GROW, SHRINK handling of input buffers
1532 */
1533
1534#define RAW (*ctxt->input->cur)
1535#define CUR (*ctxt->input->cur)
1536#define NXT(val) ctxt->input->cur[(val)]
1537#define CUR_PTR ctxt->input->cur
1538
1539#define CMP4( s, c1, c2, c3, c4 ) \
1540 ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
1541 ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
1542#define CMP5( s, c1, c2, c3, c4, c5 ) \
1543 ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
1544#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
1545 ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
1546#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
1547 ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
1548#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
1549 ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
1550#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
1551 ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
1552 ((unsigned char *) s)[ 8 ] == c9 )
1553#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
1554 ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
1555 ((unsigned char *) s)[ 9 ] == c10 )
1556
1557#define SKIP(val) do { \
1558 ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \
1559 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
1560 if ((*ctxt->input->cur == 0) && \
1561 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1562 xmlPopInput(ctxt); \
1563 } while (0)
1564
1565#define SKIPL(val) do { \
1566 int skipl; \
1567 for(skipl=0; skipl<val; skipl++) { \
1568 if (*(ctxt->input->cur) == '\n') { \
1569 ctxt->input->line++; ctxt->input->col = 1; \
1570 } else ctxt->input->col++; \
1571 ctxt->nbChars++; \
1572 ctxt->input->cur++; \
1573 } \
1574 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
1575 if ((*ctxt->input->cur == 0) && \
1576 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1577 xmlPopInput(ctxt); \
1578 } while (0)
1579
1580#define SHRINK if ((ctxt->progressive == 0) && \
1581 (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
1582 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
1583 xmlSHRINK (ctxt);
1584
1585static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
1586 xmlParserInputShrink(ctxt->input);
1587 if ((*ctxt->input->cur == 0) &&
1588 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1589 xmlPopInput(ctxt);
1590 }
1591
1592#define GROW if ((ctxt->progressive == 0) && \
1593 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
1594 xmlGROW (ctxt);
1595
1596static void xmlGROW (xmlParserCtxtPtr ctxt) {
1597 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1598 if ((*ctxt->input->cur == 0) &&
1599 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1600 xmlPopInput(ctxt);
1601}
1602
1603#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
1604
1605#define NEXT xmlNextChar(ctxt)
1606
1607#define NEXT1 { \
1608 ctxt->input->col++; \
1609 ctxt->input->cur++; \
1610 ctxt->nbChars++; \
1611 if (*ctxt->input->cur == 0) \
1612 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
1613 }
1614
1615#define NEXTL(l) do { \
1616 if (*(ctxt->input->cur) == '\n') { \
1617 ctxt->input->line++; ctxt->input->col = 1; \
1618 } else ctxt->input->col++; \
1619 ctxt->input->cur += l; \
1620 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
1621 } while (0)
1622
1623#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
1624#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
1625
1626#define COPY_BUF(l,b,i,v) \
1627 if (l == 1) b[i++] = (xmlChar) v; \
1628 else i += xmlCopyCharMultiByte(&b[i],v)
1629
1630/**
1631 * xmlSkipBlankChars:
1632 * @ctxt: the XML parser context
1633 *
1634 * skip all blanks character found at that point in the input streams.
1635 * It pops up finished entities in the process if allowable at that point.
1636 *
1637 * Returns the number of space chars skipped
1638 */
1639
1640int
1641xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
1642 int res = 0;
1643
1644 /*
1645 * It's Okay to use CUR/NEXT here since all the blanks are on
1646 * the ASCII range.
1647 */
1648 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
1649 const xmlChar *cur;
1650 /*
1651 * if we are in the document content, go really fast
1652 */
1653 cur = ctxt->input->cur;
1654 while (IS_BLANK_CH(*cur)) {
1655 if (*cur == '\n') {
1656 ctxt->input->line++; ctxt->input->col = 1;
1657 }
1658 cur++;
1659 res++;
1660 if (*cur == 0) {
1661 ctxt->input->cur = cur;
1662 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1663 cur = ctxt->input->cur;
1664 }
1665 }
1666 ctxt->input->cur = cur;
1667 } else {
1668 int cur;
1669 do {
1670 cur = CUR;
1671 while (IS_BLANK_CH(cur)) { /* CHECKED tstblanks.xml */
1672 NEXT;
1673 cur = CUR;
1674 res++;
1675 }
1676 while ((cur == 0) && (ctxt->inputNr > 1) &&
1677 (ctxt->instate != XML_PARSER_COMMENT)) {
1678 xmlPopInput(ctxt);
1679 cur = CUR;
1680 }
1681 /*
1682 * Need to handle support of entities branching here
1683 */
1684 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
1685 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
1686 }
1687 return(res);
1688}
1689
1690/************************************************************************
1691 * *
1692 * Commodity functions to handle entities *
1693 * *
1694 ************************************************************************/
1695
1696/**
1697 * xmlPopInput:
1698 * @ctxt: an XML parser context
1699 *
1700 * xmlPopInput: the current input pointed by ctxt->input came to an end
1701 * pop it and return the next char.
1702 *
1703 * Returns the current xmlChar in the parser context
1704 */
1705xmlChar
1706xmlPopInput(xmlParserCtxtPtr ctxt) {
1707 if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
1708 if (xmlParserDebugEntities)
1709 xmlGenericError(xmlGenericErrorContext,
1710 "Popping input %d\n", ctxt->inputNr);
1711 xmlFreeInputStream(inputPop(ctxt));
1712 if ((*ctxt->input->cur == 0) &&
1713 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1714 return(xmlPopInput(ctxt));
1715 return(CUR);
1716}
1717
1718/**
1719 * xmlPushInput:
1720 * @ctxt: an XML parser context
1721 * @input: an XML parser input fragment (entity, XML fragment ...).
1722 *
1723 * xmlPushInput: switch to a new input stream which is stacked on top
1724 * of the previous one(s).
1725 */
1726void
1727xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
1728 if (input == NULL) return;
1729
1730 if (xmlParserDebugEntities) {
1731 if ((ctxt->input != NULL) && (ctxt->input->filename))
1732 xmlGenericError(xmlGenericErrorContext,
1733 "%s(%d): ", ctxt->input->filename,
1734 ctxt->input->line);
1735 xmlGenericError(xmlGenericErrorContext,
1736 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
1737 }
1738 inputPush(ctxt, input);
1739 GROW;
1740}
1741
1742/**
1743 * xmlParseCharRef:
1744 * @ctxt: an XML parser context
1745 *
1746 * parse Reference declarations
1747 *
1748 * [66] CharRef ::= '&#' [0-9]+ ';' |
1749 * '&#x' [0-9a-fA-F]+ ';'
1750 *
1751 * [ WFC: Legal Character ]
1752 * Characters referred to using character references must match the
1753 * production for Char.
1754 *
1755 * Returns the value parsed (as an int), 0 in case of error
1756 */
1757int
1758xmlParseCharRef(xmlParserCtxtPtr ctxt) {
1759 unsigned int val = 0;
1760 int count = 0;
1761 unsigned int outofrange = 0;
1762
1763 /*
1764 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
1765 */
1766 if ((RAW == '&') && (NXT(1) == '#') &&
1767 (NXT(2) == 'x')) {
1768 SKIP(3);
1769 GROW;
1770 while (RAW != ';') { /* loop blocked by count */
1771 if (count++ > 20) {
1772 count = 0;
1773 GROW;
1774 }
1775 if ((RAW >= '0') && (RAW <= '9'))
1776 val = val * 16 + (CUR - '0');
1777 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
1778 val = val * 16 + (CUR - 'a') + 10;
1779 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
1780 val = val * 16 + (CUR - 'A') + 10;
1781 else {
1782 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
1783 val = 0;
1784 break;
1785 }
1786 if (val > 0x10FFFF)
1787 outofrange = val;
1788
1789 NEXT;
1790 count++;
1791 }
1792 if (RAW == ';') {
1793 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
1794 ctxt->input->col++;
1795 ctxt->nbChars ++;
1796 ctxt->input->cur++;
1797 }
1798 } else if ((RAW == '&') && (NXT(1) == '#')) {
1799 SKIP(2);
1800 GROW;
1801 while (RAW != ';') { /* loop blocked by count */
1802 if (count++ > 20) {
1803 count = 0;
1804 GROW;
1805 }
1806 if ((RAW >= '0') && (RAW <= '9'))
1807 val = val * 10 + (CUR - '0');
1808 else {
1809 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
1810 val = 0;
1811 break;
1812 }
1813 if (val > 0x10FFFF)
1814 outofrange = val;
1815
1816 NEXT;
1817 count++;
1818 }
1819 if (RAW == ';') {
1820 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
1821 ctxt->input->col++;
1822 ctxt->nbChars ++;
1823 ctxt->input->cur++;
1824 }
1825 } else {
1826 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
1827 }
1828
1829 /*
1830 * [ WFC: Legal Character ]
1831 * Characters referred to using character references must match the
1832 * production for Char.
1833 */
1834 if ((IS_CHAR(val) && (outofrange == 0))) {
1835 return(val);
1836 } else {
1837 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
1838 "xmlParseCharRef: invalid xmlChar value %d\n",
1839 val);
1840 }
1841 return(0);
1842}
1843
1844/**
1845 * xmlParseStringCharRef:
1846 * @ctxt: an XML parser context
1847 * @str: a pointer to an index in the string
1848 *
1849 * parse Reference declarations, variant parsing from a string rather
1850 * than an an input flow.
1851 *
1852 * [66] CharRef ::= '&#' [0-9]+ ';' |
1853 * '&#x' [0-9a-fA-F]+ ';'
1854 *
1855 * [ WFC: Legal Character ]
1856 * Characters referred to using character references must match the
1857 * production for Char.
1858 *
1859 * Returns the value parsed (as an int), 0 in case of error, str will be
1860 * updated to the current value of the index
1861 */
1862static int
1863xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
1864 const xmlChar *ptr;
1865 xmlChar cur;
1866 unsigned int val = 0;
1867 unsigned int outofrange = 0;
1868
1869 if ((str == NULL) || (*str == NULL)) return(0);
1870 ptr = *str;
1871 cur = *ptr;
1872 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
1873 ptr += 3;
1874 cur = *ptr;
1875 while (cur != ';') { /* Non input consuming loop */
1876 if ((cur >= '0') && (cur <= '9'))
1877 val = val * 16 + (cur - '0');
1878 else if ((cur >= 'a') && (cur <= 'f'))
1879 val = val * 16 + (cur - 'a') + 10;
1880 else if ((cur >= 'A') && (cur <= 'F'))
1881 val = val * 16 + (cur - 'A') + 10;
1882 else {
1883 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
1884 val = 0;
1885 break;
1886 }
1887 if (val > 0x10FFFF)
1888 outofrange = val;
1889
1890 ptr++;
1891 cur = *ptr;
1892 }
1893 if (cur == ';')
1894 ptr++;
1895 } else if ((cur == '&') && (ptr[1] == '#')){
1896 ptr += 2;
1897 cur = *ptr;
1898 while (cur != ';') { /* Non input consuming loops */
1899 if ((cur >= '0') && (cur <= '9'))
1900 val = val * 10 + (cur - '0');
1901 else {
1902 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
1903 val = 0;
1904 break;
1905 }
1906 if (val > 0x10FFFF)
1907 outofrange = val;
1908
1909 ptr++;
1910 cur = *ptr;
1911 }
1912 if (cur == ';')
1913 ptr++;
1914 } else {
1915 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
1916 return(0);
1917 }
1918 *str = ptr;
1919
1920 /*
1921 * [ WFC: Legal Character ]
1922 * Characters referred to using character references must match the
1923 * production for Char.
1924 */
1925 if ((IS_CHAR(val) && (outofrange == 0))) {
1926 return(val);
1927 } else {
1928 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
1929 "xmlParseStringCharRef: invalid xmlChar value %d\n",
1930 val);
1931 }
1932 return(0);
1933}
1934
1935/**
1936 * xmlNewBlanksWrapperInputStream:
1937 * @ctxt: an XML parser context
1938 * @entity: an Entity pointer
1939 *
1940 * Create a new input stream for wrapping
1941 * blanks around a PEReference
1942 *
1943 * Returns the new input stream or NULL
1944 */
1945
1946static void deallocblankswrapper (xmlChar *str) {xmlFree(str);}
1947
1948static xmlParserInputPtr
1949xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
1950 xmlParserInputPtr input;
1951 xmlChar *buffer;
1952 size_t length;
1953 if (entity == NULL) {
1954 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
1955 "xmlNewBlanksWrapperInputStream entity\n");
1956 return(NULL);
1957 }
1958 if (xmlParserDebugEntities)
1959 xmlGenericError(xmlGenericErrorContext,
1960 "new blanks wrapper for entity: %s\n", entity->name);
1961 input = xmlNewInputStream(ctxt);
1962 if (input == NULL) {
1963 return(NULL);
1964 }
1965 length = xmlStrlen(entity->name) + 5;
1966 buffer = xmlMallocAtomic(length);
1967 if (buffer == NULL) {
1968 xmlErrMemory(ctxt, NULL);
1969 return(NULL);
1970 }
1971 buffer [0] = ' ';
1972 buffer [1] = '%';
1973 buffer [length-3] = ';';
1974 buffer [length-2] = ' ';
1975 buffer [length-1] = 0;
1976 memcpy(buffer + 2, entity->name, length - 5);
1977 input->free = deallocblankswrapper;
1978 input->base = buffer;
1979 input->cur = buffer;
1980 input->length = length;
1981 input->end = &buffer[length];
1982 return(input);
1983}
1984
1985/**
1986 * xmlParserHandlePEReference:
1987 * @ctxt: the parser context
1988 *
1989 * [69] PEReference ::= '%' Name ';'
1990 *
1991 * [ WFC: No Recursion ]
1992 * A parsed entity must not contain a recursive
1993 * reference to itself, either directly or indirectly.
1994 *
1995 * [ WFC: Entity Declared ]
1996 * In a document without any DTD, a document with only an internal DTD
1997 * subset which contains no parameter entity references, or a document
1998 * with "standalone='yes'", ... ... The declaration of a parameter
1999 * entity must precede any reference to it...
2000 *
2001 * [ VC: Entity Declared ]
2002 * In a document with an external subset or external parameter entities
2003 * with "standalone='no'", ... ... The declaration of a parameter entity
2004 * must precede any reference to it...
2005 *
2006 * [ WFC: In DTD ]
2007 * Parameter-entity references may only appear in the DTD.
2008 * NOTE: misleading but this is handled.
2009 *
2010 * A PEReference may have been detected in the current input stream
2011 * the handling is done accordingly to
2012 * http://www.w3.org/TR/REC-xml#entproc
2013 * i.e.
2014 * - Included in literal in entity values
2015 * - Included as Parameter Entity reference within DTDs
2016 */
2017void
2018xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
2019 const xmlChar *name;
2020 xmlEntityPtr entity = NULL;
2021 xmlParserInputPtr input;
2022
2023 if (RAW != '%') return;
2024 switch(ctxt->instate) {
2025 case XML_PARSER_CDATA_SECTION:
2026 return;
2027 case XML_PARSER_COMMENT:
2028 return;
2029 case XML_PARSER_START_TAG:
2030 return;
2031 case XML_PARSER_END_TAG:
2032 return;
2033 case XML_PARSER_EOF:
2034 xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
2035 return;
2036 case XML_PARSER_PROLOG:
2037 case XML_PARSER_START:
2038 case XML_PARSER_MISC:
2039 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
2040 return;
2041 case XML_PARSER_ENTITY_DECL:
2042 case XML_PARSER_CONTENT:
2043 case XML_PARSER_ATTRIBUTE_VALUE:
2044 case XML_PARSER_PI:
2045 case XML_PARSER_SYSTEM_LITERAL:
2046 case XML_PARSER_PUBLIC_LITERAL:
2047 /* we just ignore it there */
2048 return;
2049 case XML_PARSER_EPILOG:
2050 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
2051 return;
2052 case XML_PARSER_ENTITY_VALUE:
2053 /*
2054 * NOTE: in the case of entity values, we don't do the
2055 * substitution here since we need the literal
2056 * entity value to be able to save the internal
2057 * subset of the document.
2058 * This will be handled by xmlStringDecodeEntities
2059 */
2060 return;
2061 case XML_PARSER_DTD:
2062 /*
2063 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
2064 * In the internal DTD subset, parameter-entity references
2065 * can occur only where markup declarations can occur, not
2066 * within markup declarations.
2067 * In that case this is handled in xmlParseMarkupDecl
2068 */
2069 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
2070 return;
2071 if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
2072 return;
2073 break;
2074 case XML_PARSER_IGNORE:
2075 return;
2076 }
2077
2078 NEXT;
2079 name = xmlParseName(ctxt);
2080 if (xmlParserDebugEntities)
2081 xmlGenericError(xmlGenericErrorContext,
2082 "PEReference: %s\n", name);
2083 if (name == NULL) {
2084 xmlFatalErr(ctxt, XML_ERR_PEREF_NO_NAME, NULL);
2085 } else {
2086 if (RAW == ';') {
2087 NEXT;
2088 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
2089 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
2090 if (entity == NULL) {
2091
2092 /*
2093 * [ WFC: Entity Declared ]
2094 * In a document without any DTD, a document with only an
2095 * internal DTD subset which contains no parameter entity
2096 * references, or a document with "standalone='yes'", ...
2097 * ... The declaration of a parameter entity must precede
2098 * any reference to it...
2099 */
2100 if ((ctxt->standalone == 1) ||
2101 ((ctxt->hasExternalSubset == 0) &&
2102 (ctxt->hasPErefs == 0))) {
2103 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
2104 "PEReference: %%%s; not found\n", name);
2105 } else {
2106 /*
2107 * [ VC: Entity Declared ]
2108 * In a document with an external subset or external
2109 * parameter entities with "standalone='no'", ...
2110 * ... The declaration of a parameter entity must precede
2111 * any reference to it...
2112 */
2113 if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
2114 xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
2115 "PEReference: %%%s; not found\n",
2116 name);
2117 } else
2118 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
2119 "PEReference: %%%s; not found\n",
2120 name, NULL);
2121 ctxt->valid = 0;
2122 }
2123 } else if (ctxt->input->free != deallocblankswrapper) {
2124 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
2125 xmlPushInput(ctxt, input);
2126 } else {
2127 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
2128 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
2129 xmlChar start[4];
2130 xmlCharEncoding enc;
2131
2132 /*
2133 * handle the extra spaces added before and after
2134 * c.f. http://www.w3.org/TR/REC-xml#as-PE
2135 * this is done independently.
2136 */
2137 input = xmlNewEntityInputStream(ctxt, entity);
2138 xmlPushInput(ctxt, input);
2139
2140 /*
2141 * Get the 4 first bytes and decode the charset
2142 * if enc != XML_CHAR_ENCODING_NONE
2143 * plug some encoding conversion routines.
2144 * Note that, since we may have some non-UTF8
2145 * encoding (like UTF16, bug 135229), the 'length'
2146 * is not known, but we can calculate based upon
2147 * the amount of data in the buffer.
2148 */
2149 GROW
2150 if ((ctxt->input->end - ctxt->input->cur)>=4) {
2151 start[0] = RAW;
2152 start[1] = NXT(1);
2153 start[2] = NXT(2);
2154 start[3] = NXT(3);
2155 enc = xmlDetectCharEncoding(start, 4);
2156 if (enc != XML_CHAR_ENCODING_NONE) {
2157 xmlSwitchEncoding(ctxt, enc);
2158 }
2159 }
2160
2161 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
2162 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l' )) &&
2163 (IS_BLANK_CH(NXT(5)))) {
2164 xmlParseTextDecl(ctxt);
2165 }
2166 } else {
2167 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
2168 "PEReference: %s is not a parameter entity\n",
2169 name);
2170 }
2171 }
2172 } else {
2173 xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
2174 }
2175 }
2176}
2177
2178/*
2179 * Macro used to grow the current buffer.
2180 */
2181#define growBuffer(buffer) { \
2182 xmlChar *tmp; \
2183 buffer##_size *= 2; \
2184 tmp = (xmlChar *) \
2185 xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
2186 if (tmp == NULL) goto mem_error; \
2187 buffer = tmp; \
2188}
2189
2190/**
2191 * xmlStringLenDecodeEntities:
2192 * @ctxt: the parser context
2193 * @str: the input string
2194 * @len: the string length
2195 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2196 * @end: an end marker xmlChar, 0 if none
2197 * @end2: an end marker xmlChar, 0 if none
2198 * @end3: an end marker xmlChar, 0 if none
2199 *
2200 * Takes a entity string content and process to do the adequate substitutions.
2201 *
2202 * [67] Reference ::= EntityRef | CharRef
2203 *
2204 * [69] PEReference ::= '%' Name ';'
2205 *
2206 * Returns A newly allocated string with the substitution done. The caller
2207 * must deallocate it !
2208 */
2209xmlChar *
2210xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2211 int what, xmlChar end, xmlChar end2, xmlChar end3) {
2212 xmlChar *buffer = NULL;
2213 int buffer_size = 0;
2214
2215 xmlChar *current = NULL;
2216 const xmlChar *last;
2217 xmlEntityPtr ent;
2218 int c,l;
2219 int nbchars = 0;
2220
2221 if ((ctxt == NULL) || (str == NULL) || (len < 0))
2222 return(NULL);
2223 last = str + len;
2224
2225 if (ctxt->depth > 40) {
2226 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2227 return(NULL);
2228 }
2229
2230 /*
2231 * allocate a translation buffer.
2232 */
2233 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
2234 buffer = (xmlChar *) xmlMallocAtomic(buffer_size * sizeof(xmlChar));
2235 if (buffer == NULL) goto mem_error;
2236
2237 /*
2238 * OK loop until we reach one of the ending char or a size limit.
2239 * we are operating on already parsed values.
2240 */
2241 if (str < last)
2242 c = CUR_SCHAR(str, l);
2243 else
2244 c = 0;
2245 while ((c != 0) && (c != end) && /* non input consuming loop */
2246 (c != end2) && (c != end3)) {
2247
2248 if (c == 0) break;
2249 if ((c == '&') && (str[1] == '#')) {
2250 int val = xmlParseStringCharRef(ctxt, &str);
2251 if (val != 0) {
2252 COPY_BUF(0,buffer,nbchars,val);
2253 }
2254 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2255 growBuffer(buffer);
2256 }
2257 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
2258 if (xmlParserDebugEntities)
2259 xmlGenericError(xmlGenericErrorContext,
2260 "String decoding Entity Reference: %.30s\n",
2261 str);
2262 ent = xmlParseStringEntityRef(ctxt, &str);
2263 if ((ent != NULL) &&
2264 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2265 if (ent->content != NULL) {
2266 COPY_BUF(0,buffer,nbchars,ent->content[0]);
2267 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2268 growBuffer(buffer);
2269 }
2270 } else {
2271 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
2272 "predefined entity has no content\n");
2273 }
2274 } else if ((ent != NULL) && (ent->content != NULL)) {
2275 xmlChar *rep;
2276
2277 ctxt->depth++;
2278 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2279 0, 0, 0);
2280 ctxt->depth--;
2281 if (rep != NULL) {
2282 current = rep;
2283 while (*current != 0) { /* non input consuming loop */
2284 buffer[nbchars++] = *current++;
2285 if (nbchars >
2286 buffer_size - XML_PARSER_BUFFER_SIZE) {
2287 growBuffer(buffer);
2288 }
2289 }
2290 xmlFree(rep);
2291 }
2292 } else if (ent != NULL) {
2293 int i = xmlStrlen(ent->name);
2294 const xmlChar *cur = ent->name;
2295
2296 buffer[nbchars++] = '&';
2297 if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
2298 growBuffer(buffer);
2299 }
2300 for (;i > 0;i--)
2301 buffer[nbchars++] = *cur++;
2302 buffer[nbchars++] = ';';
2303 }
2304 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2305 if (xmlParserDebugEntities)
2306 xmlGenericError(xmlGenericErrorContext,
2307 "String decoding PE Reference: %.30s\n", str);
2308 ent = xmlParseStringPEReference(ctxt, &str);
2309 if (ent != NULL) {
2310 xmlChar *rep;
2311
2312 ctxt->depth++;
2313 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2314 0, 0, 0);
2315 ctxt->depth--;
2316 if (rep != NULL) {
2317 current = rep;
2318 while (*current != 0) { /* non input consuming loop */
2319 buffer[nbchars++] = *current++;
2320 if (nbchars >
2321 buffer_size - XML_PARSER_BUFFER_SIZE) {
2322 growBuffer(buffer);
2323 }
2324 }
2325 xmlFree(rep);
2326 }
2327 }
2328 } else {
2329 COPY_BUF(l,buffer,nbchars,c);
2330 str += l;
2331 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2332 growBuffer(buffer);
2333 }
2334 }
2335 if (str < last)
2336 c = CUR_SCHAR(str, l);
2337 else
2338 c = 0;
2339 }
2340 buffer[nbchars++] = 0;
2341 return(buffer);
2342
2343mem_error:
2344 xmlErrMemory(ctxt, NULL);
2345 return(NULL);
2346}
2347
2348/**
2349 * xmlStringDecodeEntities:
2350 * @ctxt: the parser context
2351 * @str: the input string
2352 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2353 * @end: an end marker xmlChar, 0 if none
2354 * @end2: an end marker xmlChar, 0 if none
2355 * @end3: an end marker xmlChar, 0 if none
2356 *
2357 * Takes a entity string content and process to do the adequate substitutions.
2358 *
2359 * [67] Reference ::= EntityRef | CharRef
2360 *
2361 * [69] PEReference ::= '%' Name ';'
2362 *
2363 * Returns A newly allocated string with the substitution done. The caller
2364 * must deallocate it !
2365 */
2366xmlChar *
2367xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2368 xmlChar end, xmlChar end2, xmlChar end3) {
2369 if ((ctxt == NULL) || (str == NULL)) return(NULL);
2370 return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what,
2371 end, end2, end3));
2372}
2373
2374/************************************************************************
2375 * *
2376 * Commodity functions, cleanup needed ? *
2377 * *
2378 ************************************************************************/
2379
2380/**
2381 * areBlanks:
2382 * @ctxt: an XML parser context
2383 * @str: a xmlChar *
2384 * @len: the size of @str
2385 * @blank_chars: we know the chars are blanks
2386 *
2387 * Is this a sequence of blank chars that one can ignore ?
2388 *
2389 * Returns 1 if ignorable 0 otherwise.
2390 */
2391
2392static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2393 int blank_chars) {
2394 int i, ret;
2395 xmlNodePtr lastChild;
2396
2397 /*
2398 * Don't spend time trying to differentiate them, the same callback is
2399 * used !
2400 */
2401 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
2402 return(0);
2403
2404 /*
2405 * Check for xml:space value.
2406 */
2407 if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2408 (*(ctxt->space) == -2))
2409 return(0);
2410
2411 /*
2412 * Check that the string is made of blanks
2413 */
2414 if (blank_chars == 0) {
2415 for (i = 0;i < len;i++)
2416 if (!(IS_BLANK_CH(str[i]))) return(0);
2417 }
2418
2419 /*
2420 * Look if the element is mixed content in the DTD if available
2421 */
2422 if (ctxt->node == NULL) return(0);
2423 if (ctxt->myDoc != NULL) {
2424 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2425 if (ret == 0) return(1);
2426 if (ret == 1) return(0);
2427 }
2428
2429 /*
2430 * Otherwise, heuristic :-\
2431 */
2432 if ((RAW != '<') && (RAW != 0xD)) return(0);
2433 if ((ctxt->node->children == NULL) &&
2434 (RAW == '<') && (NXT(1) == '/')) return(0);
2435
2436 lastChild = xmlGetLastChild(ctxt->node);
2437 if (lastChild == NULL) {
2438 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2439 (ctxt->node->content != NULL)) return(0);
2440 } else if (xmlNodeIsText(lastChild))
2441 return(0);
2442 else if ((ctxt->node->children != NULL) &&
2443 (xmlNodeIsText(ctxt->node->children)))
2444 return(0);
2445 return(1);
2446}
2447
2448/************************************************************************
2449 * *
2450 * Extra stuff for namespace support *
2451 * Relates to http://www.w3.org/TR/WD-xml-names *
2452 * *
2453 ************************************************************************/
2454
2455/**
2456 * xmlSplitQName:
2457 * @ctxt: an XML parser context
2458 * @name: an XML parser context
2459 * @prefix: a xmlChar **
2460 *
2461 * parse an UTF8 encoded XML qualified name string
2462 *
2463 * [NS 5] QName ::= (Prefix ':')? LocalPart
2464 *
2465 * [NS 6] Prefix ::= NCName
2466 *
2467 * [NS 7] LocalPart ::= NCName
2468 *
2469 * Returns the local part, and prefix is updated
2470 * to get the Prefix if any.
2471 */
2472
2473xmlChar *
2474xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2475 xmlChar buf[XML_MAX_NAMELEN + 5];
2476 xmlChar *buffer = NULL;
2477 int len = 0;
2478 int max = XML_MAX_NAMELEN;
2479 xmlChar *ret = NULL;
2480 const xmlChar *cur = name;
2481 int c;
2482
2483 if (prefix == NULL) return(NULL);
2484 *prefix = NULL;
2485
2486 if (cur == NULL) return(NULL);
2487
2488#ifndef XML_XML_NAMESPACE
2489 /* xml: prefix is not really a namespace */
2490 if ((cur[0] == 'x') && (cur[1] == 'm') &&
2491 (cur[2] == 'l') && (cur[3] == ':'))
2492 return(xmlStrdup(name));
2493#endif
2494
2495 /* nasty but well=formed */
2496 if (cur[0] == ':')
2497 return(xmlStrdup(name));
2498
2499 c = *cur++;
2500 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2501 buf[len++] = c;
2502 c = *cur++;
2503 }
2504 if (len >= max) {
2505 /*
2506 * Okay someone managed to make a huge name, so he's ready to pay
2507 * for the processing speed.
2508 */
2509 max = len * 2;
2510
2511 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
2512 if (buffer == NULL) {
2513 xmlErrMemory(ctxt, NULL);
2514 return(NULL);
2515 }
2516 memcpy(buffer, buf, len);
2517 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
2518 if (len + 10 > max) {
2519 xmlChar *tmp;
2520
2521 max *= 2;
2522 tmp = (xmlChar *) xmlRealloc(buffer,
2523 max * sizeof(xmlChar));
2524 if (tmp == NULL) {
2525 xmlFree(tmp);
2526 xmlErrMemory(ctxt, NULL);
2527 return(NULL);
2528 }
2529 buffer = tmp;
2530 }
2531 buffer[len++] = c;
2532 c = *cur++;
2533 }
2534 buffer[len] = 0;
2535 }
2536
2537 if ((c == ':') && (*cur == 0)) {
2538 if (buffer != NULL)
2539 xmlFree(buffer);
2540 *prefix = NULL;
2541 return(xmlStrdup(name));
2542 }
2543
2544 if (buffer == NULL)
2545 ret = xmlStrndup(buf, len);
2546 else {
2547 ret = buffer;
2548 buffer = NULL;
2549 max = XML_MAX_NAMELEN;
2550 }
2551
2552
2553 if (c == ':') {
2554 c = *cur;
2555 *prefix = ret;
2556 if (c == 0) {
2557 return(xmlStrndup(BAD_CAST "", 0));
2558 }
2559 len = 0;
2560
2561 /*
2562 * Check that the first character is proper to start
2563 * a new name
2564 */
2565 if (!(((c >= 0x61) && (c <= 0x7A)) ||
2566 ((c >= 0x41) && (c <= 0x5A)) ||
2567 (c == '_') || (c == ':'))) {
2568 int l;
2569 int first = CUR_SCHAR(cur, l);
2570
2571 if (!IS_LETTER(first) && (first != '_')) {
2572 xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
2573 "Name %s is not XML Namespace compliant\n",
2574 name);
2575 }
2576 }
2577 cur++;
2578
2579 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
2580 buf[len++] = c;
2581 c = *cur++;
2582 }
2583 if (len >= max) {
2584 /*
2585 * Okay someone managed to make a huge name, so he's ready to pay
2586 * for the processing speed.
2587 */
2588 max = len * 2;
2589
2590 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
2591 if (buffer == NULL) {
2592 xmlErrMemory(ctxt, NULL);
2593 return(NULL);
2594 }
2595 memcpy(buffer, buf, len);
2596 while (c != 0) { /* tested bigname2.xml */
2597 if (len + 10 > max) {
2598 xmlChar *tmp;
2599
2600 max *= 2;
2601 tmp = (xmlChar *) xmlRealloc(buffer,
2602 max * sizeof(xmlChar));
2603 if (tmp == NULL) {
2604 xmlErrMemory(ctxt, NULL);
2605 xmlFree(buffer);
2606 return(NULL);
2607 }
2608 buffer = tmp;
2609 }
2610 buffer[len++] = c;
2611 c = *cur++;
2612 }
2613 buffer[len] = 0;
2614 }
2615
2616 if (buffer == NULL)
2617 ret = xmlStrndup(buf, len);
2618 else {
2619 ret = buffer;
2620 }
2621 }
2622
2623 return(ret);
2624}
2625
2626/************************************************************************
2627 * *
2628 * The parser itself *
2629 * Relates to http://www.w3.org/TR/REC-xml *
2630 * *
2631 ************************************************************************/
2632
2633static const xmlChar * xmlParseNameComplex(xmlParserCtxtPtr ctxt);
2634static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
2635 int *len, int *alloc, int normalize);
2636
2637/**
2638 * xmlParseName:
2639 * @ctxt: an XML parser context
2640 *
2641 * parse an XML name.
2642 *
2643 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
2644 * CombiningChar | Extender
2645 *
2646 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
2647 *
2648 * [6] Names ::= Name (#x20 Name)*
2649 *
2650 * Returns the Name parsed or NULL
2651 */
2652
2653const xmlChar *
2654xmlParseName(xmlParserCtxtPtr ctxt) {
2655 const xmlChar *in;
2656 const xmlChar *ret;
2657 int count = 0;
2658
2659 GROW;
2660
2661 /*
2662 * Accelerator for simple ASCII names
2663 */
2664 in = ctxt->input->cur;
2665 if (((*in >= 0x61) && (*in <= 0x7A)) ||
2666 ((*in >= 0x41) && (*in <= 0x5A)) ||
2667 (*in == '_') || (*in == ':')) {
2668 in++;
2669 while (((*in >= 0x61) && (*in <= 0x7A)) ||
2670 ((*in >= 0x41) && (*in <= 0x5A)) ||
2671 ((*in >= 0x30) && (*in <= 0x39)) ||
2672 (*in == '_') || (*in == '-') ||
2673 (*in == ':') || (*in == '.'))
2674 in++;
2675 if ((*in > 0) && (*in < 0x80)) {
2676 count = in - ctxt->input->cur;
2677 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
2678 ctxt->input->cur = in;
2679 ctxt->nbChars += count;
2680 ctxt->input->col += count;
2681 if (ret == NULL)
2682 xmlErrMemory(ctxt, NULL);
2683 return(ret);
2684 }
2685 }
2686 return(xmlParseNameComplex(ctxt));
2687}
2688
2689/**
2690 * xmlParseNameAndCompare:
2691 * @ctxt: an XML parser context
2692 *
2693 * parse an XML name and compares for match
2694 * (specialized for endtag parsing)
2695 *
2696 * Returns NULL for an illegal name, (xmlChar*) 1 for success
2697 * and the name for mismatch
2698 */
2699
2700static const xmlChar *
2701xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
2702 register const xmlChar *cmp = other;
2703 register const xmlChar *in;
2704 const xmlChar *ret;
2705
2706 GROW;
2707
2708 in = ctxt->input->cur;
2709 while (*in != 0 && *in == *cmp) {
2710 ++in;
2711 ++cmp;
2712 ctxt->input->col++;
2713 }
2714 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
2715 /* success */
2716 ctxt->input->cur = in;
2717 return (const xmlChar*) 1;
2718 }
2719 /* failure (or end of input buffer), check with full function */
2720 ret = xmlParseName (ctxt);
2721 /* strings coming from the dictionnary direct compare possible */
2722 if (ret == other) {
2723 return (const xmlChar*) 1;
2724 }
2725 return ret;
2726}
2727
2728static const xmlChar *
2729xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
2730 int len = 0, l;
2731 int c;
2732 int count = 0;
2733
2734 /*
2735 * Handler for more complex cases
2736 */
2737 GROW;
2738 c = CUR_CHAR(l);
2739 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
2740 (!IS_LETTER(c) && (c != '_') &&
2741 (c != ':'))) {
2742 return(NULL);
2743 }
2744
2745 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
2746 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
2747 (c == '.') || (c == '-') ||
2748 (c == '_') || (c == ':') ||
2749 (IS_COMBINING(c)) ||
2750 (IS_EXTENDER(c)))) {
2751 if (count++ > 100) {
2752 count = 0;
2753 GROW;
2754 }
2755 len += l;
2756 NEXTL(l);
2757 c = CUR_CHAR(l);
2758 }
2759 if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
2760 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
2761 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
2762}
2763
2764/**
2765 * xmlParseStringName:
2766 * @ctxt: an XML parser context
2767 * @str: a pointer to the string pointer (IN/OUT)
2768 *
2769 * parse an XML name.
2770 *
2771 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
2772 * CombiningChar | Extender
2773 *
2774 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
2775 *
2776 * [6] Names ::= Name (#x20 Name)*
2777 *
2778 * Returns the Name parsed or NULL. The @str pointer
2779 * is updated to the current location in the string.
2780 */
2781
2782static xmlChar *
2783xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
2784 xmlChar buf[XML_MAX_NAMELEN + 5];
2785 const xmlChar *cur = *str;
2786 int len = 0, l;
2787 int c;
2788
2789 c = CUR_SCHAR(cur, l);
2790 if (!IS_LETTER(c) && (c != '_') &&
2791 (c != ':')) {
2792 return(NULL);
2793 }
2794
2795 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
2796 (c == '.') || (c == '-') ||
2797 (c == '_') || (c == ':') ||
2798 (IS_COMBINING(c)) ||
2799 (IS_EXTENDER(c))) {
2800 COPY_BUF(l,buf,len,c);
2801 cur += l;
2802 c = CUR_SCHAR(cur, l);
2803 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
2804 /*
2805 * Okay someone managed to make a huge name, so he's ready to pay
2806 * for the processing speed.
2807 */
2808 xmlChar *buffer;
2809 int max = len * 2;
2810
2811 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
2812 if (buffer == NULL) {
2813 xmlErrMemory(ctxt, NULL);
2814 return(NULL);
2815 }
2816 memcpy(buffer, buf, len);
2817 while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
2818 /* test bigentname.xml */
2819 (c == '.') || (c == '-') ||
2820 (c == '_') || (c == ':') ||
2821 (IS_COMBINING(c)) ||
2822 (IS_EXTENDER(c))) {
2823 if (len + 10 > max) {
2824 xmlChar *tmp;
2825 max *= 2;
2826 tmp = (xmlChar *) xmlRealloc(buffer,
2827 max * sizeof(xmlChar));
2828 if (tmp == NULL) {
2829 xmlErrMemory(ctxt, NULL);
2830 xmlFree(buffer);
2831 return(NULL);
2832 }
2833 buffer = tmp;
2834 }
2835 COPY_BUF(l,buffer,len,c);
2836 cur += l;
2837 c = CUR_SCHAR(cur, l);
2838 }
2839 buffer[len] = 0;
2840 *str = cur;
2841 return(buffer);
2842 }
2843 }
2844 *str = cur;
2845 return(xmlStrndup(buf, len));
2846}
2847
2848/**
2849 * xmlParseNmtoken:
2850 * @ctxt: an XML parser context
2851 *
2852 * parse an XML Nmtoken.
2853 *
2854 * [7] Nmtoken ::= (NameChar)+
2855 *
2856 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
2857 *
2858 * Returns the Nmtoken parsed or NULL
2859 */
2860
2861xmlChar *
2862xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
2863 xmlChar buf[XML_MAX_NAMELEN + 5];
2864 int len = 0, l;
2865 int c;
2866 int count = 0;
2867
2868 GROW;
2869 c = CUR_CHAR(l);
2870
2871 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
2872 (c == '.') || (c == '-') ||
2873 (c == '_') || (c == ':') ||
2874 (IS_COMBINING(c)) ||
2875 (IS_EXTENDER(c))) {
2876 if (count++ > 100) {
2877 count = 0;
2878 GROW;
2879 }
2880 COPY_BUF(l,buf,len,c);
2881 NEXTL(l);
2882 c = CUR_CHAR(l);
2883 if (len >= XML_MAX_NAMELEN) {
2884 /*
2885 * Okay someone managed to make a huge token, so he's ready to pay
2886 * for the processing speed.
2887 */
2888 xmlChar *buffer;
2889 int max = len * 2;
2890
2891 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
2892 if (buffer == NULL) {
2893 xmlErrMemory(ctxt, NULL);
2894 return(NULL);
2895 }
2896 memcpy(buffer, buf, len);
2897 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
2898 (c == '.') || (c == '-') ||
2899 (c == '_') || (c == ':') ||
2900 (IS_COMBINING(c)) ||
2901 (IS_EXTENDER(c))) {
2902 if (count++ > 100) {
2903 count = 0;
2904 GROW;
2905 }
2906 if (len + 10 > max) {
2907 xmlChar *tmp;
2908
2909 max *= 2;
2910 tmp = (xmlChar *) xmlRealloc(buffer,
2911 max * sizeof(xmlChar));
2912 if (tmp == NULL) {
2913 xmlErrMemory(ctxt, NULL);
2914 xmlFree(buffer);
2915 return(NULL);
2916 }
2917 buffer = tmp;
2918 }
2919 COPY_BUF(l,buffer,len,c);
2920 NEXTL(l);
2921 c = CUR_CHAR(l);
2922 }
2923 buffer[len] = 0;
2924 return(buffer);
2925 }
2926 }
2927 if (len == 0)
2928 return(NULL);
2929 return(xmlStrndup(buf, len));
2930}
2931
2932/**
2933 * xmlParseEntityValue:
2934 * @ctxt: an XML parser context
2935 * @orig: if non-NULL store a copy of the original entity value
2936 *
2937 * parse a value for ENTITY declarations
2938 *
2939 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
2940 * "'" ([^%&'] | PEReference | Reference)* "'"
2941 *
2942 * Returns the EntityValue parsed with reference substituted or NULL
2943 */
2944
2945xmlChar *
2946xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
2947 xmlChar *buf = NULL;
2948 int len = 0;
2949 int size = XML_PARSER_BUFFER_SIZE;
2950 int c, l;
2951 xmlChar stop;
2952 xmlChar *ret = NULL;
2953 const xmlChar *cur = NULL;
2954 xmlParserInputPtr input;
2955
2956 if (RAW == '"') stop = '"';
2957 else if (RAW == '\'') stop = '\'';
2958 else {
2959 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
2960 return(NULL);
2961 }
2962 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
2963 if (buf == NULL) {
2964 xmlErrMemory(ctxt, NULL);
2965 return(NULL);
2966 }
2967
2968 /*
2969 * The content of the entity definition is copied in a buffer.
2970 */
2971
2972 ctxt->instate = XML_PARSER_ENTITY_VALUE;
2973 input = ctxt->input;
2974 GROW;
2975 NEXT;
2976 c = CUR_CHAR(l);
2977 /*
2978 * NOTE: 4.4.5 Included in Literal
2979 * When a parameter entity reference appears in a literal entity
2980 * value, ... a single or double quote character in the replacement
2981 * text is always treated as a normal data character and will not
2982 * terminate the literal.
2983 * In practice it means we stop the loop only when back at parsing
2984 * the initial entity and the quote is found
2985 */
2986 while ((IS_CHAR(c)) && ((c != stop) || /* checked */
2987 (ctxt->input != input))) {
2988 if (len + 5 >= size) {
2989 xmlChar *tmp;
2990
2991 size *= 2;
2992 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2993 if (tmp == NULL) {
2994 xmlErrMemory(ctxt, NULL);
2995 xmlFree(buf);
2996 return(NULL);
2997 }
2998 buf = tmp;
2999 }
3000 COPY_BUF(l,buf,len,c);
3001 NEXTL(l);
3002 /*
3003 * Pop-up of finished entities.
3004 */
3005 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
3006 xmlPopInput(ctxt);
3007
3008 GROW;
3009 c = CUR_CHAR(l);
3010 if (c == 0) {
3011 GROW;
3012 c = CUR_CHAR(l);
3013 }
3014 }
3015 buf[len] = 0;
3016
3017 /*
3018 * Raise problem w.r.t. '&' and '%' being used in non-entities
3019 * reference constructs. Note Charref will be handled in
3020 * xmlStringDecodeEntities()
3021 */
3022 cur = buf;
3023 while (*cur != 0) { /* non input consuming */
3024 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
3025 xmlChar *name;
3026 xmlChar tmp = *cur;
3027
3028 cur++;
3029 name = xmlParseStringName(ctxt, &cur);
3030 if ((name == NULL) || (*cur != ';')) {
3031 xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
3032 "EntityValue: '%c' forbidden except for entities references\n",
3033 tmp);
3034 }
3035 if ((tmp == '%') && (ctxt->inSubset == 1) &&
3036 (ctxt->inputNr == 1)) {
3037 xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
3038 }
3039 if (name != NULL)
3040 xmlFree(name);
3041 if (*cur == 0)
3042 break;
3043 }
3044 cur++;
3045 }
3046
3047 /*
3048 * Then PEReference entities are substituted.
3049 */
3050 if (c != stop) {
3051 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
3052 xmlFree(buf);
3053 } else {
3054 NEXT;
3055 /*
3056 * NOTE: 4.4.7 Bypassed
3057 * When a general entity reference appears in the EntityValue in
3058 * an entity declaration, it is bypassed and left as is.
3059 * so XML_SUBSTITUTE_REF is not set here.
3060 */
3061 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
3062 0, 0, 0);
3063 if (orig != NULL)
3064 *orig = buf;
3065 else
3066 xmlFree(buf);
3067 }
3068
3069 return(ret);
3070}
3071
3072/**
3073 * xmlParseAttValueComplex:
3074 * @ctxt: an XML parser context
3075 * @len: the resulting attribute len
3076 * @normalize: wether to apply the inner normalization
3077 *
3078 * parse a value for an attribute, this is the fallback function
3079 * of xmlParseAttValue() when the attribute parsing requires handling
3080 * of non-ASCII characters, or normalization compaction.
3081 *
3082 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3083 */
3084static xmlChar *
3085xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
3086 xmlChar limit = 0;
3087 xmlChar *buf = NULL;
3088 int len = 0;
3089 int buf_size = 0;
3090 int c, l, in_space = 0;
3091 xmlChar *current = NULL;
3092 xmlEntityPtr ent;
3093
3094 if (NXT(0) == '"') {
3095 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3096 limit = '"';
3097 NEXT;
3098 } else if (NXT(0) == '\'') {
3099 limit = '\'';
3100 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3101 NEXT;
3102 } else {
3103 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
3104 return(NULL);
3105 }
3106
3107 /*
3108 * allocate a translation buffer.
3109 */
3110 buf_size = XML_PARSER_BUFFER_SIZE;
3111 buf = (xmlChar *) xmlMallocAtomic(buf_size * sizeof(xmlChar));
3112 if (buf == NULL) goto mem_error;
3113
3114 /*
3115 * OK loop until we reach one of the ending char or a size limit.
3116 */
3117 c = CUR_CHAR(l);
3118 while ((NXT(0) != limit) && /* checked */
3119 (IS_CHAR(c)) && (c != '<')) {
3120 if (c == 0) break;
3121 if (c == '&') {
3122 in_space = 0;
3123 if (NXT(1) == '#') {
3124 int val = xmlParseCharRef(ctxt);
3125
3126 if (val == '&') {
3127 if (ctxt->replaceEntities) {
3128 if (len > buf_size - 10) {
3129 growBuffer(buf);
3130 }
3131 buf[len++] = '&';
3132 } else {
3133 /*
3134 * The reparsing will be done in xmlStringGetNodeList()
3135 * called by the attribute() function in SAX.c
3136 */
3137 if (len > buf_size - 10) {
3138 growBuffer(buf);
3139 }
3140 buf[len++] = '&';
3141 buf[len++] = '#';
3142 buf[len++] = '3';
3143 buf[len++] = '8';
3144 buf[len++] = ';';
3145 }
3146 } else {
3147 if (len > buf_size - 10) {
3148 growBuffer(buf);
3149 }
3150 len += xmlCopyChar(0, &buf[len], val);
3151 }
3152 } else {
3153 ent = xmlParseEntityRef(ctxt);
3154 if ((ent != NULL) &&
3155 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
3156 if (len > buf_size - 10) {
3157 growBuffer(buf);
3158 }
3159 if ((ctxt->replaceEntities == 0) &&
3160 (ent->content[0] == '&')) {
3161 buf[len++] = '&';
3162 buf[len++] = '#';
3163 buf[len++] = '3';
3164 buf[len++] = '8';
3165 buf[len++] = ';';
3166 } else {
3167 buf[len++] = ent->content[0];
3168 }
3169 } else if ((ent != NULL) &&
3170 (ctxt->replaceEntities != 0)) {
3171 xmlChar *rep;
3172
3173 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
3174 rep = xmlStringDecodeEntities(ctxt, ent->content,
3175 XML_SUBSTITUTE_REF,
3176 0, 0, 0);
3177 if (rep != NULL) {
3178 current = rep;
3179 while (*current != 0) { /* non input consuming */
3180 buf[len++] = *current++;
3181 if (len > buf_size - 10) {
3182 growBuffer(buf);
3183 }
3184 }
3185 xmlFree(rep);
3186 }
3187 } else {
3188 if (len > buf_size - 10) {
3189 growBuffer(buf);
3190 }
3191 if (ent->content != NULL)
3192 buf[len++] = ent->content[0];
3193 }
3194 } else if (ent != NULL) {
3195 int i = xmlStrlen(ent->name);
3196 const xmlChar *cur = ent->name;
3197
3198 /*
3199 * This may look absurd but is needed to detect
3200 * entities problems
3201 */
3202 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
3203 (ent->content != NULL)) {
3204 xmlChar *rep;
3205 rep = xmlStringDecodeEntities(ctxt, ent->content,
3206 XML_SUBSTITUTE_REF, 0, 0, 0);
3207 if (rep != NULL)
3208 xmlFree(rep);
3209 }
3210
3211 /*
3212 * Just output the reference
3213 */
3214 buf[len++] = '&';
3215 if (len > buf_size - i - 10) {
3216 growBuffer(buf);
3217 }
3218 for (;i > 0;i--)
3219 buf[len++] = *cur++;
3220 buf[len++] = ';';
3221 }
3222 }
3223 } else {
3224 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
3225 if ((len != 0) || (!normalize)) {
3226 if ((!normalize) || (!in_space)) {
3227 COPY_BUF(l,buf,len,0x20);
3228 if (len > buf_size - 10) {
3229 growBuffer(buf);
3230 }
3231 }
3232 in_space = 1;
3233 }
3234 } else {
3235 in_space = 0;
3236 COPY_BUF(l,buf,len,c);
3237 if (len > buf_size - 10) {
3238 growBuffer(buf);
3239 }
3240 }
3241 NEXTL(l);
3242 }
3243 GROW;
3244 c = CUR_CHAR(l);
3245 }
3246 if ((in_space) && (normalize)) {
3247 while (buf[len - 1] == 0x20) len--;
3248 }
3249 buf[len] = 0;
3250 if (RAW == '<') {
3251 xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
3252 } else if (RAW != limit) {
3253 if ((c != 0) && (!IS_CHAR(c))) {
3254 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
3255 "invalid character in attribute value\n");
3256 } else {
3257 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
3258 "AttValue: ' expected\n");
3259 }
3260 } else
3261 NEXT;
3262 if (attlen != NULL) *attlen = len;
3263 return(buf);
3264
3265mem_error:
3266 xmlErrMemory(ctxt, NULL);
3267 return(NULL);
3268}
3269
3270/**
3271 * xmlParseAttValue:
3272 * @ctxt: an XML parser context
3273 *
3274 * parse a value for an attribute
3275 * Note: the parser won't do substitution of entities here, this
3276 * will be handled later in xmlStringGetNodeList
3277 *
3278 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
3279 * "'" ([^<&'] | Reference)* "'"
3280 *
3281 * 3.3.3 Attribute-Value Normalization:
3282 * Before the value of an attribute is passed to the application or
3283 * checked for validity, the XML processor must normalize it as follows:
3284 * - a character reference is processed by appending the referenced
3285 * character to the attribute value
3286 * - an entity reference is processed by recursively processing the
3287 * replacement text of the entity
3288 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
3289 * appending #x20 to the normalized value, except that only a single
3290 * #x20 is appended for a "#xD#xA" sequence that is part of an external
3291 * parsed entity or the literal entity value of an internal parsed entity
3292 * - other characters are processed by appending them to the normalized value
3293 * If the declared value is not CDATA, then the XML processor must further
3294 * process the normalized attribute value by discarding any leading and
3295 * trailing space (#x20) characters, and by replacing sequences of space
3296 * (#x20) characters by a single space (#x20) character.
3297 * All attributes for which no declaration has been read should be treated
3298 * by a non-validating parser as if declared CDATA.
3299 *
3300 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3301 */
3302
3303
3304xmlChar *
3305xmlParseAttValue(xmlParserCtxtPtr ctxt) {
3306 if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
3307 return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
3308}
3309
3310/**
3311 * xmlParseSystemLiteral:
3312 * @ctxt: an XML parser context
3313 *
3314 * parse an XML Literal
3315 *
3316 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
3317 *
3318 * Returns the SystemLiteral parsed or NULL
3319 */
3320
3321xmlChar *
3322xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
3323 xmlChar *buf = NULL;
3324 int len = 0;
3325 int size = XML_PARSER_BUFFER_SIZE;
3326 int cur, l;
3327 xmlChar stop;
3328 int state = ctxt->instate;
3329 int count = 0;
3330
3331 SHRINK;
3332 if (RAW == '"') {
3333 NEXT;
3334 stop = '"';
3335 } else if (RAW == '\'') {
3336 NEXT;
3337 stop = '\'';
3338 } else {
3339 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
3340 return(NULL);
3341 }
3342
3343 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
3344 if (buf == NULL) {
3345 xmlErrMemory(ctxt, NULL);
3346 return(NULL);
3347 }
3348 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
3349 cur = CUR_CHAR(l);
3350 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
3351 if (len + 5 >= size) {
3352 xmlChar *tmp;
3353
3354 size *= 2;
3355 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3356 if (tmp == NULL) {
3357 xmlFree(buf);
3358 xmlErrMemory(ctxt, NULL);
3359 ctxt->instate = (xmlParserInputState) state;
3360 return(NULL);
3361 }
3362 buf = tmp;
3363 }
3364 count++;
3365 if (count > 50) {
3366 GROW;
3367 count = 0;
3368 }
3369 COPY_BUF(l,buf,len,cur);
3370 NEXTL(l);
3371 cur = CUR_CHAR(l);
3372 if (cur == 0) {
3373 GROW;
3374 SHRINK;
3375 cur = CUR_CHAR(l);
3376 }
3377 }
3378 buf[len] = 0;
3379 ctxt->instate = (xmlParserInputState) state;
3380 if (!IS_CHAR(cur)) {
3381 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
3382 } else {
3383 NEXT;
3384 }
3385 return(buf);
3386}
3387
3388/**
3389 * xmlParsePubidLiteral:
3390 * @ctxt: an XML parser context
3391 *
3392 * parse an XML public literal
3393 *
3394 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
3395 *
3396 * Returns the PubidLiteral parsed or NULL.
3397 */
3398
3399xmlChar *
3400xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
3401 xmlChar *buf = NULL;
3402 int len = 0;
3403 int size = XML_PARSER_BUFFER_SIZE;
3404 xmlChar cur;
3405 xmlChar stop;
3406 int count = 0;
3407 xmlParserInputState oldstate = ctxt->instate;
3408
3409 SHRINK;
3410 if (RAW == '"') {
3411 NEXT;
3412 stop = '"';
3413 } else if (RAW == '\'') {
3414 NEXT;
3415 stop = '\'';
3416 } else {
3417 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
3418 return(NULL);
3419 }
3420 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
3421 if (buf == NULL) {
3422 xmlErrMemory(ctxt, NULL);
3423 return(NULL);
3424 }
3425 ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
3426 cur = CUR;
3427 while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
3428 if (len + 1 >= size) {
3429 xmlChar *tmp;
3430
3431 size *= 2;
3432 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3433 if (tmp == NULL) {
3434 xmlErrMemory(ctxt, NULL);
3435 xmlFree(buf);
3436 return(NULL);
3437 }
3438 buf = tmp;
3439 }
3440 buf[len++] = cur;
3441 count++;
3442 if (count > 50) {
3443 GROW;
3444 count = 0;
3445 }
3446 NEXT;
3447 cur = CUR;
3448 if (cur == 0) {
3449 GROW;
3450 SHRINK;
3451 cur = CUR;
3452 }
3453 }
3454 buf[len] = 0;
3455 if (cur != stop) {
3456 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
3457 } else {
3458 NEXT;
3459 }
3460 ctxt->instate = oldstate;
3461 return(buf);
3462}
3463
3464void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
3465
3466/*
3467 * used for the test in the inner loop of the char data testing
3468 */
3469static const unsigned char test_char_data[256] = {
3470 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3471 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
3472 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3473 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3474 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
3475 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
3476 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
3477 0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
3478 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
3479 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
3480 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
3481 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
3482 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
3483 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
3484 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
3485 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
3486 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
3487 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3488 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3489 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3490 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3491 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3492 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3493 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3494 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3495 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3496 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3497 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3498 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3499 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3500 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3501 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
3502};
3503
3504/**
3505 * xmlParseCharData:
3506 * @ctxt: an XML parser context
3507 * @cdata: int indicating whether we are within a CDATA section
3508 *
3509 * parse a CharData section.
3510 * if we are within a CDATA section ']]>' marks an end of section.
3511 *
3512 * The right angle bracket (>) may be represented using the string "&gt;",
3513 * and must, for compatibility, be escaped using "&gt;" or a character
3514 * reference when it appears in the string "]]>" in content, when that
3515 * string is not marking the end of a CDATA section.
3516 *
3517 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
3518 */
3519
3520void
3521xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
3522 const xmlChar *in;
3523 int nbchar = 0;
3524 int line = ctxt->input->line;
3525 int col = ctxt->input->col;
3526 int ccol;
3527
3528 SHRINK;
3529 GROW;
3530 /*
3531 * Accelerated common case where input don't need to be
3532 * modified before passing it to the handler.
3533 */
3534 if (!cdata) {
3535 in = ctxt->input->cur;
3536 do {
3537get_more_space:
3538 while (*in == 0x20) { in++; ctxt->input->col++; }
3539 if (*in == 0xA) {
3540 do {
3541 ctxt->input->line++; ctxt->input->col = 1;
3542 in++;
3543 } while (*in == 0xA);
3544 goto get_more_space;
3545 }
3546 if (*in == '<') {
3547 nbchar = in - ctxt->input->cur;
3548 if (nbchar > 0) {
3549 const xmlChar *tmp = ctxt->input->cur;
3550 ctxt->input->cur = in;
3551
3552 if ((ctxt->sax != NULL) &&
3553 (ctxt->sax->ignorableWhitespace !=
3554 ctxt->sax->characters)) {
3555 if (areBlanks(ctxt, tmp, nbchar, 1)) {
3556 if (ctxt->sax->ignorableWhitespace != NULL)
3557 ctxt->sax->ignorableWhitespace(ctxt->userData,
3558 tmp, nbchar);
3559 } else {
3560 if (ctxt->sax->characters != NULL)
3561 ctxt->sax->characters(ctxt->userData,
3562 tmp, nbchar);
3563 if (*ctxt->space == -1)
3564 *ctxt->space = -2;
3565 }
3566 } else if ((ctxt->sax != NULL) &&
3567 (ctxt->sax->characters != NULL)) {
3568 ctxt->sax->characters(ctxt->userData,
3569 tmp, nbchar);
3570 }
3571 }
3572 return;
3573 }
3574
3575get_more:
3576 ccol = ctxt->input->col;
3577 while (test_char_data[*in]) {
3578 in++;
3579 ccol++;
3580 }
3581 ctxt->input->col = ccol;
3582 if (*in == 0xA) {
3583 do {
3584 ctxt->input->line++; ctxt->input->col = 1;
3585 in++;
3586 } while (*in == 0xA);
3587 goto get_more;
3588 }
3589 if (*in == ']') {
3590 if ((in[1] == ']') && (in[2] == '>')) {
3591 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
3592 ctxt->input->cur = in;
3593 return;
3594 }
3595 in++;
3596 ctxt->input->col++;
3597 goto get_more;
3598 }
3599 nbchar = in - ctxt->input->cur;
3600 if (nbchar > 0) {
3601 if ((ctxt->sax != NULL) &&
3602 (ctxt->sax->ignorableWhitespace !=
3603 ctxt->sax->characters) &&
3604 (IS_BLANK_CH(*ctxt->input->cur))) {
3605 const xmlChar *tmp = ctxt->input->cur;
3606 ctxt->input->cur = in;
3607
3608 if (areBlanks(ctxt, tmp, nbchar, 0)) {
3609 if (ctxt->sax->ignorableWhitespace != NULL)
3610 ctxt->sax->ignorableWhitespace(ctxt->userData,
3611 tmp, nbchar);
3612 } else {
3613 if (ctxt->sax->characters != NULL)
3614 ctxt->sax->characters(ctxt->userData,
3615 tmp, nbchar);
3616 if (*ctxt->space == -1)
3617 *ctxt->space = -2;
3618 }
3619 line = ctxt->input->line;
3620 col = ctxt->input->col;
3621 } else if (ctxt->sax != NULL) {
3622 if (ctxt->sax->characters != NULL)
3623 ctxt->sax->characters(ctxt->userData,
3624 ctxt->input->cur, nbchar);
3625 line = ctxt->input->line;
3626 col = ctxt->input->col;
3627 }
3628 }
3629 ctxt->input->cur = in;
3630 if (*in == 0xD) {
3631 in++;
3632 if (*in == 0xA) {
3633 ctxt->input->cur = in;
3634 in++;
3635 ctxt->input->line++; ctxt->input->col = 1;
3636 continue; /* while */
3637 }
3638 in--;
3639 }
3640 if (*in == '<') {
3641 return;
3642 }
3643 if (*in == '&') {
3644 return;
3645 }
3646 SHRINK;
3647 GROW;
3648 in = ctxt->input->cur;
3649 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
3650 nbchar = 0;
3651 }
3652 ctxt->input->line = line;
3653 ctxt->input->col = col;
3654 xmlParseCharDataComplex(ctxt, cdata);
3655}
3656
3657/**
3658 * xmlParseCharDataComplex:
3659 * @ctxt: an XML parser context
3660 * @cdata: int indicating whether we are within a CDATA section
3661 *
3662 * parse a CharData section.this is the fallback function
3663 * of xmlParseCharData() when the parsing requires handling
3664 * of non-ASCII characters.
3665 */
3666void
3667xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
3668 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
3669 int nbchar = 0;
3670 int cur, l;
3671 int count = 0;
3672
3673 SHRINK;
3674 GROW;
3675 cur = CUR_CHAR(l);
3676 while ((cur != '<') && /* checked */
3677 (cur != '&') &&
3678 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
3679 if ((cur == ']') && (NXT(1) == ']') &&
3680 (NXT(2) == '>')) {
3681 if (cdata) break;
3682 else {
3683 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
3684 }
3685 }
3686 COPY_BUF(l,buf,nbchar,cur);
3687 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
3688 buf[nbchar] = 0;
3689
3690 /*
3691 * OK the segment is to be consumed as chars.
3692 */
3693 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
3694 if (areBlanks(ctxt, buf, nbchar, 0)) {
3695 if (ctxt->sax->ignorableWhitespace != NULL)
3696 ctxt->sax->ignorableWhitespace(ctxt->userData,
3697 buf, nbchar);
3698 } else {
3699 if (ctxt->sax->characters != NULL)
3700 ctxt->sax->characters(ctxt->userData, buf, nbchar);
3701 if ((ctxt->sax->characters !=
3702 ctxt->sax->ignorableWhitespace) &&
3703 (*ctxt->space == -1))
3704 *ctxt->space = -2;
3705 }
3706 }
3707 nbchar = 0;
3708 }
3709 count++;
3710 if (count > 50) {
3711 GROW;
3712 count = 0;
3713 }
3714 NEXTL(l);
3715 cur = CUR_CHAR(l);
3716 }
3717 if (nbchar != 0) {
3718 buf[nbchar] = 0;
3719 /*
3720 * OK the segment is to be consumed as chars.
3721 */
3722 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
3723 if (areBlanks(ctxt, buf, nbchar, 0)) {
3724 if (ctxt->sax->ignorableWhitespace != NULL)
3725 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
3726 } else {
3727 if (ctxt->sax->characters != NULL)
3728 ctxt->sax->characters(ctxt->userData, buf, nbchar);
3729 if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
3730 (*ctxt->space == -1))
3731 *ctxt->space = -2;
3732 }
3733 }
3734 }
3735 if ((cur != 0) && (!IS_CHAR(cur))) {
3736 /* Generate the error and skip the offending character */
3737 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
3738 "PCDATA invalid Char value %d\n",
3739 cur);
3740 NEXTL(l);
3741 }
3742}
3743
3744/**
3745 * xmlParseExternalID:
3746 * @ctxt: an XML parser context
3747 * @publicID: a xmlChar** receiving PubidLiteral
3748 * @strict: indicate whether we should restrict parsing to only
3749 * production [75], see NOTE below
3750 *
3751 * Parse an External ID or a Public ID
3752 *
3753 * NOTE: Productions [75] and [83] interact badly since [75] can generate
3754 * 'PUBLIC' S PubidLiteral S SystemLiteral
3755 *
3756 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
3757 * | 'PUBLIC' S PubidLiteral S SystemLiteral
3758 *
3759 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
3760 *
3761 * Returns the function returns SystemLiteral and in the second
3762 * case publicID receives PubidLiteral, is strict is off
3763 * it is possible to return NULL and have publicID set.
3764 */
3765
3766xmlChar *
3767xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
3768 xmlChar *URI = NULL;
3769
3770 SHRINK;
3771
3772 *publicID = NULL;
3773 if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
3774 SKIP(6);
3775 if (!IS_BLANK_CH(CUR)) {
3776 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3777 "Space required after 'SYSTEM'\n");
3778 }
3779 SKIP_BLANKS;
3780 URI = xmlParseSystemLiteral(ctxt);
3781 if (URI == NULL) {
3782 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
3783 }
3784 } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
3785 SKIP(6);
3786 if (!IS_BLANK_CH(CUR)) {
3787 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3788 "Space required after 'PUBLIC'\n");
3789 }
3790 SKIP_BLANKS;
3791 *publicID = xmlParsePubidLiteral(ctxt);
3792 if (*publicID == NULL) {
3793 xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
3794 }
3795 if (strict) {
3796 /*
3797 * We don't handle [83] so "S SystemLiteral" is required.
3798 */
3799 if (!IS_BLANK_CH(CUR)) {
3800 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3801 "Space required after the Public Identifier\n");
3802 }
3803 } else {
3804 /*
3805 * We handle [83] so we return immediately, if
3806 * "S SystemLiteral" is not detected. From a purely parsing
3807 * point of view that's a nice mess.
3808 */
3809 const xmlChar *ptr;
3810 GROW;
3811
3812 ptr = CUR_PTR;
3813 if (!IS_BLANK_CH(*ptr)) return(NULL);
3814
3815 while (IS_BLANK_CH(*ptr)) ptr++; /* TODO: dangerous, fix ! */
3816 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
3817 }
3818 SKIP_BLANKS;
3819 URI = xmlParseSystemLiteral(ctxt);
3820 if (URI == NULL) {
3821 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
3822 }
3823 }
3824 return(URI);
3825}
3826
3827/**
3828 * xmlParseCommentComplex:
3829 * @ctxt: an XML parser context
3830 * @buf: the already parsed part of the buffer
3831 * @len: number of bytes filles in the buffer
3832 * @size: allocated size of the buffer
3833 *
3834 * Skip an XML (SGML) comment <!-- .... -->
3835 * The spec says that "For compatibility, the string "--" (double-hyphen)
3836 * must not occur within comments. "
3837 * This is the slow routine in case the accelerator for ascii didn't work
3838 *
3839 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
3840 */
3841static void
3842xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf, int len, int size) {
3843 int q, ql;
3844 int r, rl;
3845 int cur, l;
3846 xmlParserInputPtr input = ctxt->input;
3847 int count = 0;
3848
3849 if (buf == NULL) {
3850 len = 0;
3851 size = XML_PARSER_BUFFER_SIZE;
3852 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
3853 if (buf == NULL) {
3854 xmlErrMemory(ctxt, NULL);
3855 return;
3856 }
3857 }
3858 GROW; /* Assure there's enough input data */
3859 q = CUR_CHAR(ql);
3860 if (q == 0)
3861 goto not_terminated;
3862 if (!IS_CHAR(q)) {
3863 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
3864 "xmlParseComment: invalid xmlChar value %d\n",
3865 q);
3866 xmlFree (buf);
3867 return;
3868 }
3869 NEXTL(ql);
3870 r = CUR_CHAR(rl);
3871 if (r == 0)
3872 goto not_terminated;
3873 if (!IS_CHAR(r)) {
3874 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
3875 "xmlParseComment: invalid xmlChar value %d\n",
3876 q);
3877 xmlFree (buf);
3878 return;
3879 }
3880 NEXTL(rl);
3881 cur = CUR_CHAR(l);
3882 if (cur == 0)
3883 goto not_terminated;
3884 while (IS_CHAR(cur) && /* checked */
3885 ((cur != '>') ||
3886 (r != '-') || (q != '-'))) {
3887 if ((r == '-') && (q == '-')) {
3888 xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
3889 }
3890 if (len + 5 >= size) {
3891 xmlChar *new_buf;
3892 size *= 2;
3893 new_buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3894 if (new_buf == NULL) {
3895 xmlFree (buf);
3896 xmlErrMemory(ctxt, NULL);
3897 return;
3898 }
3899 buf = new_buf;
3900 }
3901 COPY_BUF(ql,buf,len,q);
3902 q = r;
3903 ql = rl;
3904 r = cur;
3905 rl = l;
3906
3907 count++;
3908 if (count > 50) {
3909 GROW;
3910 count = 0;
3911 }
3912 NEXTL(l);
3913 cur = CUR_CHAR(l);
3914 if (cur == 0) {
3915 SHRINK;
3916 GROW;
3917 cur = CUR_CHAR(l);
3918 }
3919 }
3920 buf[len] = 0;
3921 if (cur == 0) {
3922 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
3923 "Comment not terminated \n<!--%.50s\n", buf);
3924 } else if (!IS_CHAR(cur)) {
3925 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
3926 "xmlParseComment: invalid xmlChar value %d\n",
3927 cur);
3928 } else {
3929 if (input != ctxt->input) {
3930 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
3931 "Comment doesn't start and stop in the same entity\n");
3932 }
3933 NEXT;
3934 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
3935 (!ctxt->disableSAX))
3936 ctxt->sax->comment(ctxt->userData, buf);
3937 }
3938 xmlFree(buf);
3939 return;
3940not_terminated:
3941 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
3942 "Comment not terminated\n", NULL);
3943 xmlFree(buf);
3944 return;
3945}
3946
3947/**
3948 * xmlParseComment:
3949 * @ctxt: an XML parser context
3950 *
3951 * Skip an XML (SGML) comment <!-- .... -->
3952 * The spec says that "For compatibility, the string "--" (double-hyphen)
3953 * must not occur within comments. "
3954 *
3955 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
3956 */
3957void
3958xmlParseComment(xmlParserCtxtPtr ctxt) {
3959 xmlChar *buf = NULL;
3960 int size = XML_PARSER_BUFFER_SIZE;
3961 int len = 0;
3962 xmlParserInputState state;
3963 const xmlChar *in;
3964 int nbchar = 0, ccol;
3965
3966 /*
3967 * Check that there is a comment right here.
3968 */
3969 if ((RAW != '<') || (NXT(1) != '!') ||
3970 (NXT(2) != '-') || (NXT(3) != '-')) return;
3971
3972 state = ctxt->instate;
3973 ctxt->instate = XML_PARSER_COMMENT;
3974 SKIP(4);
3975 SHRINK;
3976 GROW;
3977
3978 /*
3979 * Accelerated common case where input don't need to be
3980 * modified before passing it to the handler.
3981 */
3982 in = ctxt->input->cur;
3983 do {
3984 if (*in == 0xA) {
3985 do {
3986 ctxt->input->line++; ctxt->input->col = 1;
3987 in++;
3988 } while (*in == 0xA);
3989 }
3990get_more:
3991 ccol = ctxt->input->col;
3992 while (((*in > '-') && (*in <= 0x7F)) ||
3993 ((*in >= 0x20) && (*in < '-')) ||
3994 (*in == 0x09)) {
3995 in++;
3996 ccol++;
3997 }
3998 ctxt->input->col = ccol;
3999 if (*in == 0xA) {
4000 do {
4001 ctxt->input->line++; ctxt->input->col = 1;
4002 in++;
4003 } while (*in == 0xA);
4004 goto get_more;
4005 }
4006 nbchar = in - ctxt->input->cur;
4007 /*
4008 * save current set of data
4009 */
4010 if (nbchar > 0) {
4011 if ((ctxt->sax != NULL) &&
4012 (ctxt->sax->comment != NULL)) {
4013 if (buf == NULL) {
4014 if ((*in == '-') && (in[1] == '-'))
4015 size = nbchar + 1;
4016 else
4017 size = XML_PARSER_BUFFER_SIZE + nbchar;
4018 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4019 if (buf == NULL) {
4020 xmlErrMemory(ctxt, NULL);
4021 ctxt->instate = state;
4022 return;
4023 }
4024 len = 0;
4025 } else if (len + nbchar + 1 >= size) {
4026 xmlChar *new_buf;
4027 size += len + nbchar + XML_PARSER_BUFFER_SIZE;
4028 new_buf = (xmlChar *) xmlRealloc(buf,
4029 size * sizeof(xmlChar));
4030 if (new_buf == NULL) {
4031 xmlFree (buf);
4032 xmlErrMemory(ctxt, NULL);
4033 ctxt->instate = state;
4034 return;
4035 }
4036 buf = new_buf;
4037 }
4038 memcpy(&buf[len], ctxt->input->cur, nbchar);
4039 len += nbchar;
4040 buf[len] = 0;
4041 }
4042 }
4043 ctxt->input->cur = in;
4044 if (*in == 0xA) {
4045 in++;
4046 ctxt->input->line++; ctxt->input->col = 1;
4047 }
4048 if (*in == 0xD) {
4049 in++;
4050 if (*in == 0xA) {
4051 ctxt->input->cur = in;
4052 in++;
4053 ctxt->input->line++; ctxt->input->col = 1;
4054 continue; /* while */
4055 }
4056 in--;
4057 }
4058 SHRINK;
4059 GROW;
4060 in = ctxt->input->cur;
4061 if (*in == '-') {
4062 if (in[1] == '-') {
4063 if (in[2] == '>') {
4064 SKIP(3);
4065 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4066 (!ctxt->disableSAX)) {
4067 if (buf != NULL)
4068 ctxt->sax->comment(ctxt->userData, buf);
4069 else
4070 ctxt->sax->comment(ctxt->userData, BAD_CAST "");
4071 }
4072 if (buf != NULL)
4073 xmlFree(buf);
4074 ctxt->instate = state;
4075 return;
4076 }
4077 if (buf != NULL)
4078 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4079 "Comment not terminated \n<!--%.50s\n",
4080 buf);
4081 else
4082 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4083 "Comment not terminated \n", NULL);
4084 in++;
4085 ctxt->input->col++;
4086 }
4087 in++;
4088 ctxt->input->col++;
4089 goto get_more;
4090 }
4091 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
4092 xmlParseCommentComplex(ctxt, buf, len, size);
4093 ctxt->instate = state;
4094 return;
4095}
4096
4097
4098/**
4099 * xmlParsePITarget:
4100 * @ctxt: an XML parser context
4101 *
4102 * parse the name of a PI
4103 *
4104 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
4105 *
4106 * Returns the PITarget name or NULL
4107 */
4108
4109const xmlChar *
4110xmlParsePITarget(xmlParserCtxtPtr ctxt) {
4111 const xmlChar *name;
4112
4113 name = xmlParseName(ctxt);
4114 if ((name != NULL) &&
4115 ((name[0] == 'x') || (name[0] == 'X')) &&
4116 ((name[1] == 'm') || (name[1] == 'M')) &&
4117 ((name[2] == 'l') || (name[2] == 'L'))) {
4118 int i;
4119 if ((name[0] == 'x') && (name[1] == 'm') &&
4120 (name[2] == 'l') && (name[3] == 0)) {
4121 xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
4122 "XML declaration allowed only at the start of the document\n");
4123 return(name);
4124 } else if (name[3] == 0) {
4125 xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
4126 return(name);
4127 }
4128 for (i = 0;;i++) {
4129 if (xmlW3CPIs[i] == NULL) break;
4130 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
4131 return(name);
4132 }
4133 xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
4134 "xmlParsePITarget: invalid name prefix 'xml'\n",
4135 NULL, NULL);
4136 }
4137 return(name);
4138}
4139
4140#ifdef LIBXML_CATALOG_ENABLED
4141/**
4142 * xmlParseCatalogPI:
4143 * @ctxt: an XML parser context
4144 * @catalog: the PI value string
4145 *
4146 * parse an XML Catalog Processing Instruction.
4147 *
4148 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
4149 *
4150 * Occurs only if allowed by the user and if happening in the Misc
4151 * part of the document before any doctype informations
4152 * This will add the given catalog to the parsing context in order
4153 * to be used if there is a resolution need further down in the document
4154 */
4155
4156static void
4157xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
4158 xmlChar *URL = NULL;
4159 const xmlChar *tmp, *base;
4160 xmlChar marker;
4161
4162 tmp = catalog;
4163 while (IS_BLANK_CH(*tmp)) tmp++;
4164 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
4165 goto error;
4166 tmp += 7;
4167 while (IS_BLANK_CH(*tmp)) tmp++;
4168 if (*tmp != '=') {
4169 return;
4170 }
4171 tmp++;
4172 while (IS_BLANK_CH(*tmp)) tmp++;
4173 marker = *tmp;
4174 if ((marker != '\'') && (marker != '"'))
4175 goto error;
4176 tmp++;
4177 base = tmp;
4178 while ((*tmp != 0) && (*tmp != marker)) tmp++;
4179 if (*tmp == 0)
4180 goto error;
4181 URL = xmlStrndup(base, tmp - base);
4182 tmp++;
4183 while (IS_BLANK_CH(*tmp)) tmp++;
4184 if (*tmp != 0)
4185 goto error;
4186
4187 if (URL != NULL) {
4188 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
4189 xmlFree(URL);
4190 }
4191 return;
4192
4193error:
4194 xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
4195 "Catalog PI syntax error: %s\n",
4196 catalog, NULL);
4197 if (URL != NULL)
4198 xmlFree(URL);
4199}
4200#endif
4201
4202/**
4203 * xmlParsePI:
4204 * @ctxt: an XML parser context
4205 *
4206 * parse an XML Processing Instruction.
4207 *
4208 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
4209 *
4210 * The processing is transfered to SAX once parsed.
4211 */
4212
4213void
4214xmlParsePI(xmlParserCtxtPtr ctxt) {
4215 xmlChar *buf = NULL;
4216 int len = 0;
4217 int size = XML_PARSER_BUFFER_SIZE;
4218 int cur, l;
4219 const xmlChar *target;
4220 xmlParserInputState state;
4221 int count = 0;
4222
4223 if ((RAW == '<') && (NXT(1) == '?')) {
4224 xmlParserInputPtr input = ctxt->input;
4225 state = ctxt->instate;
4226 ctxt->instate = XML_PARSER_PI;
4227 /*
4228 * this is a Processing Instruction.
4229 */
4230 SKIP(2);
4231 SHRINK;
4232
4233 /*
4234 * Parse the target name and check for special support like
4235 * namespace.
4236 */
4237 target = xmlParsePITarget(ctxt);
4238 if (target != NULL) {
4239 if ((RAW == '?') && (NXT(1) == '>')) {
4240 if (input != ctxt->input) {
4241 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4242 "PI declaration doesn't start and stop in the same entity\n");
4243 }
4244 SKIP(2);
4245
4246 /*
4247 * SAX: PI detected.
4248 */
4249 if ((ctxt->sax) && (!ctxt->disableSAX) &&
4250 (ctxt->sax->processingInstruction != NULL))
4251 ctxt->sax->processingInstruction(ctxt->userData,
4252 target, NULL);
4253 ctxt->instate = state;
4254 return;
4255 }
4256 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4257 if (buf == NULL) {
4258 xmlErrMemory(ctxt, NULL);
4259 ctxt->instate = state;
4260 return;
4261 }
4262 cur = CUR;
4263 if (!IS_BLANK(cur)) {
4264 xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
4265 "ParsePI: PI %s space expected\n", target);
4266 }
4267 SKIP_BLANKS;
4268 cur = CUR_CHAR(l);
4269 while (IS_CHAR(cur) && /* checked */
4270 ((cur != '?') || (NXT(1) != '>'))) {
4271 if (len + 5 >= size) {
4272 xmlChar *tmp;
4273
4274 size *= 2;
4275 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4276 if (tmp == NULL) {
4277 xmlErrMemory(ctxt, NULL);
4278 xmlFree(buf);
4279 ctxt->instate = state;
4280 return;
4281 }
4282 buf = tmp;
4283 }
4284 count++;
4285 if (count > 50) {
4286 GROW;
4287 count = 0;
4288 }
4289 COPY_BUF(l,buf,len,cur);
4290 NEXTL(l);
4291 cur = CUR_CHAR(l);
4292 if (cur == 0) {
4293 SHRINK;
4294 GROW;
4295 cur = CUR_CHAR(l);
4296 }
4297 }
4298 buf[len] = 0;
4299 if (cur != '?') {
4300 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
4301 "ParsePI: PI %s never end ...\n", target);
4302 } else {
4303 if (input != ctxt->input) {
4304 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4305 "PI declaration doesn't start and stop in the same entity\n");
4306 }
4307 SKIP(2);
4308
4309#ifdef LIBXML_CATALOG_ENABLED
4310 if (((state == XML_PARSER_MISC) ||
4311 (state == XML_PARSER_START)) &&
4312 (xmlStrEqual(target, XML_CATALOG_PI))) {
4313 xmlCatalogAllow allow = xmlCatalogGetDefaults();
4314 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
4315 (allow == XML_CATA_ALLOW_ALL))
4316 xmlParseCatalogPI(ctxt, buf);
4317 }
4318#endif
4319
4320
4321 /*
4322 * SAX: PI detected.
4323 */
4324 if ((ctxt->sax) && (!ctxt->disableSAX) &&
4325 (ctxt->sax->processingInstruction != NULL))
4326 ctxt->sax->processingInstruction(ctxt->userData,
4327 target, buf);
4328 }
4329 xmlFree(buf);
4330 } else {
4331 xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
4332 }
4333 ctxt->instate = state;
4334 }
4335}
4336
4337/**
4338 * xmlParseNotationDecl:
4339 * @ctxt: an XML parser context
4340 *
4341 * parse a notation declaration
4342 *
4343 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
4344 *
4345 * Hence there is actually 3 choices:
4346 * 'PUBLIC' S PubidLiteral
4347 * 'PUBLIC' S PubidLiteral S SystemLiteral
4348 * and 'SYSTEM' S SystemLiteral
4349 *
4350 * See the NOTE on xmlParseExternalID().
4351 */
4352
4353void
4354xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
4355 const xmlChar *name;
4356 xmlChar *Pubid;
4357 xmlChar *Systemid;
4358
4359 if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
4360 xmlParserInputPtr input = ctxt->input;
4361 SHRINK;
4362 SKIP(10);
4363 if (!IS_BLANK_CH(CUR)) {
4364 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4365 "Space required after '<!NOTATION'\n");
4366 return;
4367 }
4368 SKIP_BLANKS;
4369
4370 name = xmlParseName(ctxt);
4371 if (name == NULL) {
4372 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
4373 return;
4374 }
4375 if (!IS_BLANK_CH(CUR)) {
4376 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4377 "Space required after the NOTATION name'\n");
4378 return;
4379 }
4380 SKIP_BLANKS;
4381
4382 /*
4383 * Parse the IDs.
4384 */
4385 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
4386 SKIP_BLANKS;
4387
4388 if (RAW == '>') {
4389 if (input != ctxt->input) {
4390 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4391 "Notation declaration doesn't start and stop in the same entity\n");
4392 }
4393 NEXT;
4394 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4395 (ctxt->sax->notationDecl != NULL))
4396 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
4397 } else {
4398 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
4399 }
4400 if (Systemid != NULL) xmlFree(Systemid);
4401 if (Pubid != NULL) xmlFree(Pubid);
4402 }
4403}
4404
4405/**
4406 * xmlParseEntityDecl:
4407 * @ctxt: an XML parser context
4408 *
4409 * parse <!ENTITY declarations
4410 *
4411 * [70] EntityDecl ::= GEDecl | PEDecl
4412 *
4413 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
4414 *
4415 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
4416 *
4417 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
4418 *
4419 * [74] PEDef ::= EntityValue | ExternalID
4420 *
4421 * [76] NDataDecl ::= S 'NDATA' S Name
4422 *
4423 * [ VC: Notation Declared ]
4424 * The Name must match the declared name of a notation.
4425 */
4426
4427void
4428xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
4429 const xmlChar *name = NULL;
4430 xmlChar *value = NULL;
4431 xmlChar *URI = NULL, *literal = NULL;
4432 const xmlChar *ndata = NULL;
4433 int isParameter = 0;
4434 xmlChar *orig = NULL;
4435 int skipped;
4436
4437 /* GROW; done in the caller */
4438 if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) {
4439 xmlParserInputPtr input = ctxt->input;
4440 SHRINK;
4441 SKIP(8);
4442 skipped = SKIP_BLANKS;
4443 if (skipped == 0) {
4444 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4445 "Space required after '<!ENTITY'\n");
4446 }
4447
4448 if (RAW == '%') {
4449 NEXT;
4450 skipped = SKIP_BLANKS;
4451 if (skipped == 0) {
4452 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4453 "Space required after '%'\n");
4454 }
4455 isParameter = 1;
4456 }
4457
4458 name = xmlParseName(ctxt);
4459 if (name == NULL) {
4460 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4461 "xmlParseEntityDecl: no name\n");
4462 return;
4463 }
4464 skipped = SKIP_BLANKS;
4465 if (skipped == 0) {
4466 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4467 "Space required after the entity name\n");
4468 }
4469
4470 ctxt->instate = XML_PARSER_ENTITY_DECL;
4471 /*
4472 * handle the various case of definitions...
4473 */
4474 if (isParameter) {
4475 if ((RAW == '"') || (RAW == '\'')) {
4476 value = xmlParseEntityValue(ctxt, &orig);
4477 if (value) {
4478 if ((ctxt->sax != NULL) &&
4479 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
4480 ctxt->sax->entityDecl(ctxt->userData, name,
4481 XML_INTERNAL_PARAMETER_ENTITY,
4482 NULL, NULL, value);
4483 }
4484 } else {
4485 URI = xmlParseExternalID(ctxt, &literal, 1);
4486 if ((URI == NULL) && (literal == NULL)) {
4487 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
4488 }
4489 if (URI) {
4490 xmlURIPtr uri;
4491
4492 uri = xmlParseURI((const char *) URI);
4493 if (uri == NULL) {
4494 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
4495 "Invalid URI: %s\n", URI);
4496 /*
4497 * This really ought to be a well formedness error
4498 * but the XML Core WG decided otherwise c.f. issue
4499 * E26 of the XML erratas.
4500 */
4501 } else {
4502 if (uri->fragment != NULL) {
4503 /*
4504 * Okay this is foolish to block those but not
4505 * invalid URIs.
4506 */
4507 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
4508 } else {
4509 if ((ctxt->sax != NULL) &&
4510 (!ctxt->disableSAX) &&
4511 (ctxt->sax->entityDecl != NULL))
4512 ctxt->sax->entityDecl(ctxt->userData, name,
4513 XML_EXTERNAL_PARAMETER_ENTITY,
4514 literal, URI, NULL);
4515 }
4516 xmlFreeURI(uri);
4517 }
4518 }
4519 }
4520 } else {
4521 if ((RAW == '"') || (RAW == '\'')) {
4522 value = xmlParseEntityValue(ctxt, &orig);
4523 if ((ctxt->sax != NULL) &&
4524 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
4525 ctxt->sax->entityDecl(ctxt->userData, name,
4526 XML_INTERNAL_GENERAL_ENTITY,
4527 NULL, NULL, value);
4528 /*
4529 * For expat compatibility in SAX mode.
4530 */
4531 if ((ctxt->myDoc == NULL) ||
4532 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
4533 if (ctxt->myDoc == NULL) {
4534 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
4535 }
4536 if (ctxt->myDoc->intSubset == NULL)
4537 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
4538 BAD_CAST "fake", NULL, NULL);
4539
4540 xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
4541 NULL, NULL, value);
4542 }
4543 } else {
4544 URI = xmlParseExternalID(ctxt, &literal, 1);
4545 if ((URI == NULL) && (literal == NULL)) {
4546 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
4547 }
4548 if (URI) {
4549 xmlURIPtr uri;
4550
4551 uri = xmlParseURI((const char *)URI);
4552 if (uri == NULL) {
4553 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
4554 "Invalid URI: %s\n", URI);
4555 /*
4556 * This really ought to be a well formedness error
4557 * but the XML Core WG decided otherwise c.f. issue
4558 * E26 of the XML erratas.
4559 */
4560 } else {
4561 if (uri->fragment != NULL) {
4562 /*
4563 * Okay this is foolish to block those but not
4564 * invalid URIs.
4565 */
4566 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
4567 }
4568 xmlFreeURI(uri);
4569 }
4570 }
4571 if ((RAW != '>') && (!IS_BLANK_CH(CUR))) {
4572 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4573 "Space required before 'NDATA'\n");
4574 }
4575 SKIP_BLANKS;
4576 if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
4577 SKIP(5);
4578 if (!IS_BLANK_CH(CUR)) {
4579 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4580 "Space required after 'NDATA'\n");
4581 }
4582 SKIP_BLANKS;
4583 ndata = xmlParseName(ctxt);
4584 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4585 (ctxt->sax->unparsedEntityDecl != NULL))
4586 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
4587 literal, URI, ndata);
4588 } else {
4589 if ((ctxt->sax != NULL) &&
4590 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
4591 ctxt->sax->entityDecl(ctxt->userData, name,
4592 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
4593 literal, URI, NULL);
4594 /*
4595 * For expat compatibility in SAX mode.
4596 * assuming the entity repalcement was asked for
4597 */
4598 if ((ctxt->replaceEntities != 0) &&
4599 ((ctxt->myDoc == NULL) ||
4600 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
4601 if (ctxt->myDoc == NULL) {
4602 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
4603 }
4604
4605 if (ctxt->myDoc->intSubset == NULL)
4606 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
4607 BAD_CAST "fake", NULL, NULL);
4608 xmlSAX2EntityDecl(ctxt, name,
4609 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
4610 literal, URI, NULL);
4611 }
4612 }
4613 }
4614 }
4615 SKIP_BLANKS;
4616 if (RAW != '>') {
4617 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
4618 "xmlParseEntityDecl: entity %s not terminated\n", name);
4619 } else {
4620 if (input != ctxt->input) {
4621 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4622 "Entity declaration doesn't start and stop in the same entity\n");
4623 }
4624 NEXT;
4625 }
4626 if (orig != NULL) {
4627 /*
4628 * Ugly mechanism to save the raw entity value.
4629 */
4630 xmlEntityPtr cur = NULL;
4631
4632 if (isParameter) {
4633 if ((ctxt->sax != NULL) &&
4634 (ctxt->sax->getParameterEntity != NULL))
4635 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
4636 } else {
4637 if ((ctxt->sax != NULL) &&
4638 (ctxt->sax->getEntity != NULL))
4639 cur = ctxt->sax->getEntity(ctxt->userData, name);
4640 if ((cur == NULL) && (ctxt->userData==ctxt)) {
4641 cur = xmlSAX2GetEntity(ctxt, name);
4642 }
4643 }
4644 if (cur != NULL) {
4645 if (cur->orig != NULL)
4646 xmlFree(orig);
4647 else
4648 cur->orig = orig;
4649 } else
4650 xmlFree(orig);
4651 }
4652 if (value != NULL) xmlFree(value);
4653 if (URI != NULL) xmlFree(URI);
4654 if (literal != NULL) xmlFree(literal);
4655 }
4656}
4657
4658/**
4659 * xmlParseDefaultDecl:
4660 * @ctxt: an XML parser context
4661 * @value: Receive a possible fixed default value for the attribute
4662 *
4663 * Parse an attribute default declaration
4664 *
4665 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
4666 *
4667 * [ VC: Required Attribute ]
4668 * if the default declaration is the keyword #REQUIRED, then the
4669 * attribute must be specified for all elements of the type in the
4670 * attribute-list declaration.
4671 *
4672 * [ VC: Attribute Default Legal ]
4673 * The declared default value must meet the lexical constraints of
4674 * the declared attribute type c.f. xmlValidateAttributeDecl()
4675 *
4676 * [ VC: Fixed Attribute Default ]
4677 * if an attribute has a default value declared with the #FIXED
4678 * keyword, instances of that attribute must match the default value.
4679 *
4680 * [ WFC: No < in Attribute Values ]
4681 * handled in xmlParseAttValue()
4682 *
4683 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
4684 * or XML_ATTRIBUTE_FIXED.
4685 */
4686
4687int
4688xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
4689 int val;
4690 xmlChar *ret;
4691
4692 *value = NULL;
4693 if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
4694 SKIP(9);
4695 return(XML_ATTRIBUTE_REQUIRED);
4696 }
4697 if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
4698 SKIP(8);
4699 return(XML_ATTRIBUTE_IMPLIED);
4700 }
4701 val = XML_ATTRIBUTE_NONE;
4702 if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
4703 SKIP(6);
4704 val = XML_ATTRIBUTE_FIXED;
4705 if (!IS_BLANK_CH(CUR)) {
4706 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4707 "Space required after '#FIXED'\n");
4708 }
4709 SKIP_BLANKS;
4710 }
4711 ret = xmlParseAttValue(ctxt);
4712 ctxt->instate = XML_PARSER_DTD;
4713 if (ret == NULL) {
4714 xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
4715 "Attribute default value declaration error\n");
4716 } else
4717 *value = ret;
4718 return(val);
4719}
4720
4721/**
4722 * xmlParseNotationType:
4723 * @ctxt: an XML parser context
4724 *
4725 * parse an Notation attribute type.
4726 *
4727 * Note: the leading 'NOTATION' S part has already being parsed...
4728 *
4729 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
4730 *
4731 * [ VC: Notation Attributes ]
4732 * Values of this type must match one of the notation names included
4733 * in the declaration; all notation names in the declaration must be declared.
4734 *
4735 * Returns: the notation attribute tree built while parsing
4736 */
4737
4738xmlEnumerationPtr
4739xmlParseNotationType(xmlParserCtxtPtr ctxt) {
4740 const xmlChar *name;
4741 xmlEnumerationPtr ret = NULL, last = NULL, cur;
4742
4743 if (RAW != '(') {
4744 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
4745 return(NULL);
4746 }
4747 SHRINK;
4748 do {
4749 NEXT;
4750 SKIP_BLANKS;
4751 name = xmlParseName(ctxt);
4752 if (name == NULL) {
4753 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4754 "Name expected in NOTATION declaration\n");
4755 return(ret);
4756 }
4757 cur = xmlCreateEnumeration(name);
4758 if (cur == NULL) return(ret);
4759 if (last == NULL) ret = last = cur;
4760 else {
4761 last->next = cur;
4762 last = cur;
4763 }
4764 SKIP_BLANKS;
4765 } while (RAW == '|');
4766 if (RAW != ')') {
4767 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
4768 if ((last != NULL) && (last != ret))
4769 xmlFreeEnumeration(last);
4770 return(ret);
4771 }
4772 NEXT;
4773 return(ret);
4774}
4775
4776/**
4777 * xmlParseEnumerationType:
4778 * @ctxt: an XML parser context
4779 *
4780 * parse an Enumeration attribute type.
4781 *
4782 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
4783 *
4784 * [ VC: Enumeration ]
4785 * Values of this type must match one of the Nmtoken tokens in
4786 * the declaration
4787 *
4788 * Returns: the enumeration attribute tree built while parsing
4789 */
4790
4791xmlEnumerationPtr
4792xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
4793 xmlChar *name;
4794 xmlEnumerationPtr ret = NULL, last = NULL, cur;
4795
4796 if (RAW != '(') {
4797 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
4798 return(NULL);
4799 }
4800 SHRINK;
4801 do {
4802 NEXT;
4803 SKIP_BLANKS;
4804 name = xmlParseNmtoken(ctxt);
4805 if (name == NULL) {
4806 xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
4807 return(ret);
4808 }
4809 cur = xmlCreateEnumeration(name);
4810 xmlFree(name);
4811 if (cur == NULL) return(ret);
4812 if (last == NULL) ret = last = cur;
4813 else {
4814 last->next = cur;
4815 last = cur;
4816 }
4817 SKIP_BLANKS;
4818 } while (RAW == '|');
4819 if (RAW != ')') {
4820 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
4821 return(ret);
4822 }
4823 NEXT;
4824 return(ret);
4825}
4826
4827/**
4828 * xmlParseEnumeratedType:
4829 * @ctxt: an XML parser context
4830 * @tree: the enumeration tree built while parsing
4831 *
4832 * parse an Enumerated attribute type.
4833 *
4834 * [57] EnumeratedType ::= NotationType | Enumeration
4835 *
4836 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
4837 *
4838 *
4839 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
4840 */
4841
4842int
4843xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
4844 if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
4845 SKIP(8);
4846 if (!IS_BLANK_CH(CUR)) {
4847 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4848 "Space required after 'NOTATION'\n");
4849 return(0);
4850 }
4851 SKIP_BLANKS;
4852 *tree = xmlParseNotationType(ctxt);
4853 if (*tree == NULL) return(0);
4854 return(XML_ATTRIBUTE_NOTATION);
4855 }
4856 *tree = xmlParseEnumerationType(ctxt);
4857 if (*tree == NULL) return(0);
4858 return(XML_ATTRIBUTE_ENUMERATION);
4859}
4860
4861/**
4862 * xmlParseAttributeType:
4863 * @ctxt: an XML parser context
4864 * @tree: the enumeration tree built while parsing
4865 *
4866 * parse the Attribute list def for an element
4867 *
4868 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
4869 *
4870 * [55] StringType ::= 'CDATA'
4871 *
4872 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
4873 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
4874 *
4875 * Validity constraints for attribute values syntax are checked in
4876 * xmlValidateAttributeValue()
4877 *
4878 * [ VC: ID ]
4879 * Values of type ID must match the Name production. A name must not
4880 * appear more than once in an XML document as a value of this type;
4881 * i.e., ID values must uniquely identify the elements which bear them.
4882 *
4883 * [ VC: One ID per Element Type ]
4884 * No element type may have more than one ID attribute specified.
4885 *
4886 * [ VC: ID Attribute Default ]
4887 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
4888 *
4889 * [ VC: IDREF ]
4890 * Values of type IDREF must match the Name production, and values
4891 * of type IDREFS must match Names; each IDREF Name must match the value
4892 * of an ID attribute on some element in the XML document; i.e. IDREF
4893 * values must match the value of some ID attribute.
4894 *
4895 * [ VC: Entity Name ]
4896 * Values of type ENTITY must match the Name production, values
4897 * of type ENTITIES must match Names; each Entity Name must match the
4898 * name of an unparsed entity declared in the DTD.
4899 *
4900 * [ VC: Name Token ]
4901 * Values of type NMTOKEN must match the Nmtoken production; values
4902 * of type NMTOKENS must match Nmtokens.
4903 *
4904 * Returns the attribute type
4905 */
4906int
4907xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
4908 SHRINK;
4909 if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
4910 SKIP(5);
4911 return(XML_ATTRIBUTE_CDATA);
4912 } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
4913 SKIP(6);
4914 return(XML_ATTRIBUTE_IDREFS);
4915 } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
4916 SKIP(5);
4917 return(XML_ATTRIBUTE_IDREF);
4918 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
4919 SKIP(2);
4920 return(XML_ATTRIBUTE_ID);
4921 } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
4922 SKIP(6);
4923 return(XML_ATTRIBUTE_ENTITY);
4924 } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
4925 SKIP(8);
4926 return(XML_ATTRIBUTE_ENTITIES);
4927 } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
4928 SKIP(8);
4929 return(XML_ATTRIBUTE_NMTOKENS);
4930 } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
4931 SKIP(7);
4932 return(XML_ATTRIBUTE_NMTOKEN);
4933 }
4934 return(xmlParseEnumeratedType(ctxt, tree));
4935}
4936
4937/**
4938 * xmlParseAttributeListDecl:
4939 * @ctxt: an XML parser context
4940 *
4941 * : parse the Attribute list def for an element
4942 *
4943 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
4944 *
4945 * [53] AttDef ::= S Name S AttType S DefaultDecl
4946 *
4947 */
4948void
4949xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
4950 const xmlChar *elemName;
4951 const xmlChar *attrName;
4952 xmlEnumerationPtr tree;
4953
4954 if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
4955 xmlParserInputPtr input = ctxt->input;
4956
4957 SKIP(9);
4958 if (!IS_BLANK_CH(CUR)) {
4959 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4960 "Space required after '<!ATTLIST'\n");
4961 }
4962 SKIP_BLANKS;
4963 elemName = xmlParseName(ctxt);
4964 if (elemName == NULL) {
4965 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4966 "ATTLIST: no name for Element\n");
4967 return;
4968 }
4969 SKIP_BLANKS;
4970 GROW;
4971 while (RAW != '>') {
4972 const xmlChar *check = CUR_PTR;
4973 int type;
4974 int def;
4975 xmlChar *defaultValue = NULL;
4976
4977 GROW;
4978 tree = NULL;
4979 attrName = xmlParseName(ctxt);
4980 if (attrName == NULL) {
4981 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4982 "ATTLIST: no name for Attribute\n");
4983 break;
4984 }
4985 GROW;
4986 if (!IS_BLANK_CH(CUR)) {
4987 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4988 "Space required after the attribute name\n");
4989 break;
4990 }
4991 SKIP_BLANKS;
4992
4993 type = xmlParseAttributeType(ctxt, &tree);
4994 if (type <= 0) {
4995 break;
4996 }
4997
4998 GROW;
4999 if (!IS_BLANK_CH(CUR)) {
5000 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5001 "Space required after the attribute type\n");
5002 if (tree != NULL)
5003 xmlFreeEnumeration(tree);
5004 break;
5005 }
5006 SKIP_BLANKS;
5007
5008 def = xmlParseDefaultDecl(ctxt, &defaultValue);
5009 if (def <= 0) {
5010 if (defaultValue != NULL)
5011 xmlFree(defaultValue);
5012 if (tree != NULL)
5013 xmlFreeEnumeration(tree);
5014 break;
5015 }
5016
5017 GROW;
5018 if (RAW != '>') {
5019 if (!IS_BLANK_CH(CUR)) {
5020 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5021 "Space required after the attribute default value\n");
5022 if (defaultValue != NULL)
5023 xmlFree(defaultValue);
5024 if (tree != NULL)
5025 xmlFreeEnumeration(tree);
5026 break;
5027 }
5028 SKIP_BLANKS;
5029 }
5030 if (check == CUR_PTR) {
5031 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
5032 "in xmlParseAttributeListDecl\n");
5033 if (defaultValue != NULL)
5034 xmlFree(defaultValue);
5035 if (tree != NULL)
5036 xmlFreeEnumeration(tree);
5037 break;
5038 }
5039 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5040 (ctxt->sax->attributeDecl != NULL))
5041 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
5042 type, def, defaultValue, tree);
5043 else if (tree != NULL)
5044 xmlFreeEnumeration(tree);
5045
5046 if ((ctxt->sax2) && (defaultValue != NULL) &&
5047 (def != XML_ATTRIBUTE_IMPLIED) &&
5048 (def != XML_ATTRIBUTE_REQUIRED)) {
5049 xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
5050 }
5051 if (ctxt->sax2) {
5052 xmlAddSpecialAttr(ctxt, elemName, attrName, type);
5053 }
5054 if (defaultValue != NULL)
5055 xmlFree(defaultValue);
5056 GROW;
5057 }
5058 if (RAW == '>') {
5059 if (input != ctxt->input) {
5060 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5061 "Attribute list declaration doesn't start and stop in the same entity\n");
5062 }
5063 NEXT;
5064 }
5065 }
5066}
5067
5068/**
5069 * xmlParseElementMixedContentDecl:
5070 * @ctxt: an XML parser context
5071 * @inputchk: the input used for the current entity, needed for boundary checks
5072 *
5073 * parse the declaration for a Mixed Element content
5074 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
5075 *
5076 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
5077 * '(' S? '#PCDATA' S? ')'
5078 *
5079 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
5080 *
5081 * [ VC: No Duplicate Types ]
5082 * The same name must not appear more than once in a single
5083 * mixed-content declaration.
5084 *
5085 * returns: the list of the xmlElementContentPtr describing the element choices
5086 */
5087xmlElementContentPtr
5088xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
5089 xmlElementContentPtr ret = NULL, cur = NULL, n;
5090 const xmlChar *elem = NULL;
5091
5092 GROW;
5093 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
5094 SKIP(7);
5095 SKIP_BLANKS;
5096 SHRINK;
5097 if (RAW == ')') {
5098 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
5099 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5100"Element content declaration doesn't start and stop in the same entity\n",
5101 NULL);
5102 }
5103 NEXT;
5104 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
5105 if (RAW == '*') {
5106 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5107 NEXT;
5108 }
5109 return(ret);
5110 }
5111 if ((RAW == '(') || (RAW == '|')) {
5112 ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
5113 if (ret == NULL) return(NULL);
5114 }
5115 while (RAW == '|') {
5116 NEXT;
5117 if (elem == NULL) {
5118 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
5119 if (ret == NULL) return(NULL);
5120 ret->c1 = cur;
5121 if (cur != NULL)
5122 cur->parent = ret;
5123 cur = ret;
5124 } else {
5125 n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
5126 if (n == NULL) return(NULL);
5127 n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
5128 if (n->c1 != NULL)
5129 n->c1->parent = n;
5130 cur->c2 = n;
5131 if (n != NULL)
5132 n->parent = cur;
5133 cur = n;
5134 }
5135 SKIP_BLANKS;
5136 elem = xmlParseName(ctxt);
5137 if (elem == NULL) {
5138 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5139 "xmlParseElementMixedContentDecl : Name expected\n");
5140 xmlFreeDocElementContent(ctxt->myDoc, cur);
5141 return(NULL);
5142 }
5143 SKIP_BLANKS;
5144 GROW;
5145 }
5146 if ((RAW == ')') && (NXT(1) == '*')) {
5147 if (elem != NULL) {
5148 cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
5149 XML_ELEMENT_CONTENT_ELEMENT);
5150 if (cur->c2 != NULL)
5151 cur->c2->parent = cur;
5152 }
5153 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5154 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
5155 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5156"Element content declaration doesn't start and stop in the same entity\n",
5157 NULL);
5158 }
5159 SKIP(2);
5160 } else {
5161 xmlFreeDocElementContent(ctxt->myDoc, ret);
5162 xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
5163 return(NULL);
5164 }
5165
5166 } else {
5167 xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
5168 }
5169 return(ret);
5170}
5171
5172/**
5173 * xmlParseElementChildrenContentDecl:
5174 * @ctxt: an XML parser context
5175 * @inputchk: the input used for the current entity, needed for boundary checks
5176 *
5177 * parse the declaration for a Mixed Element content
5178 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
5179 *
5180 *
5181 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
5182 *
5183 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
5184 *
5185 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
5186 *
5187 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
5188 *
5189 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
5190 * TODO Parameter-entity replacement text must be properly nested
5191 * with parenthesized groups. That is to say, if either of the
5192 * opening or closing parentheses in a choice, seq, or Mixed
5193 * construct is contained in the replacement text for a parameter
5194 * entity, both must be contained in the same replacement text. For
5195 * interoperability, if a parameter-entity reference appears in a
5196 * choice, seq, or Mixed construct, its replacement text should not
5197 * be empty, and neither the first nor last non-blank character of
5198 * the replacement text should be a connector (| or ,).
5199 *
5200 * Returns the tree of xmlElementContentPtr describing the element
5201 * hierarchy.
5202 */
5203xmlElementContentPtr
5204xmlParseElementChildrenContentDecl (xmlParserCtxtPtr ctxt, int inputchk) {
5205 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
5206 const xmlChar *elem;
5207 xmlChar type = 0;
5208
5209 SKIP_BLANKS;
5210 GROW;
5211 if (RAW == '(') {
5212 int inputid = ctxt->input->id;
5213
5214 /* Recurse on first child */
5215 NEXT;
5216 SKIP_BLANKS;
5217 cur = ret = xmlParseElementChildrenContentDecl(ctxt, inputid);
5218 SKIP_BLANKS;
5219 GROW;
5220 } else {
5221 elem = xmlParseName(ctxt);
5222 if (elem == NULL) {
5223 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
5224 return(NULL);
5225 }
5226 cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
5227 if (cur == NULL) {
5228 xmlErrMemory(ctxt, NULL);
5229 return(NULL);
5230 }
5231 GROW;
5232 if (RAW == '?') {
5233 cur->ocur = XML_ELEMENT_CONTENT_OPT;
5234 NEXT;
5235 } else if (RAW == '*') {
5236 cur->ocur = XML_ELEMENT_CONTENT_MULT;
5237 NEXT;
5238 } else if (RAW == '+') {
5239 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
5240 NEXT;
5241 } else {
5242 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
5243 }
5244 GROW;
5245 }
5246 SKIP_BLANKS;
5247 SHRINK;
5248 while (RAW != ')') {
5249 /*
5250 * Each loop we parse one separator and one element.
5251 */
5252 if (RAW == ',') {
5253 if (type == 0) type = CUR;
5254
5255 /*
5256 * Detect "Name | Name , Name" error
5257 */
5258 else if (type != CUR) {
5259 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
5260 "xmlParseElementChildrenContentDecl : '%c' expected\n",
5261 type);
5262 if ((last != NULL) && (last != ret))
5263 xmlFreeDocElementContent(ctxt->myDoc, last);
5264 if (ret != NULL)
5265 xmlFreeDocElementContent(ctxt->myDoc, ret);
5266 return(NULL);
5267 }
5268 NEXT;
5269
5270 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
5271 if (op == NULL) {
5272 if ((last != NULL) && (last != ret))
5273 xmlFreeDocElementContent(ctxt->myDoc, last);
5274 xmlFreeDocElementContent(ctxt->myDoc, ret);
5275 return(NULL);
5276 }
5277 if (last == NULL) {
5278 op->c1 = ret;
5279 if (ret != NULL)
5280 ret->parent = op;
5281 ret = cur = op;
5282 } else {
5283 cur->c2 = op;
5284 if (op != NULL)
5285 op->parent = cur;
5286 op->c1 = last;
5287 if (last != NULL)
5288 last->parent = op;
5289 cur =op;
5290 last = NULL;
5291 }
5292 } else if (RAW == '|') {
5293 if (type == 0) type = CUR;
5294
5295 /*
5296 * Detect "Name , Name | Name" error
5297 */
5298 else if (type != CUR) {
5299 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
5300 "xmlParseElementChildrenContentDecl : '%c' expected\n",
5301 type);
5302 if ((last != NULL) && (last != ret))
5303 xmlFreeDocElementContent(ctxt->myDoc, last);
5304 if (ret != NULL)
5305 xmlFreeDocElementContent(ctxt->myDoc, ret);
5306 return(NULL);
5307 }
5308 NEXT;
5309
5310 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
5311 if (op == NULL) {
5312 if ((last != NULL) && (last != ret))
5313 xmlFreeDocElementContent(ctxt->myDoc, last);
5314 if (ret != NULL)
5315 xmlFreeDocElementContent(ctxt->myDoc, ret);
5316 return(NULL);
5317 }
5318 if (last == NULL) {
5319 op->c1 = ret;
5320 if (ret != NULL)
5321 ret->parent = op;
5322 ret = cur = op;
5323 } else {
5324 cur->c2 = op;
5325 if (op != NULL)
5326 op->parent = cur;
5327 op->c1 = last;
5328 if (last != NULL)
5329 last->parent = op;
5330 cur =op;
5331 last = NULL;
5332 }
5333 } else {
5334 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
5335 if (ret != NULL)
5336 xmlFreeDocElementContent(ctxt->myDoc, ret);
5337 return(NULL);
5338 }
5339 GROW;
5340 SKIP_BLANKS;
5341 GROW;
5342 if (RAW == '(') {
5343 int inputid = ctxt->input->id;
5344 /* Recurse on second child */
5345 NEXT;
5346 SKIP_BLANKS;
5347 last = xmlParseElementChildrenContentDecl(ctxt, inputid);
5348 SKIP_BLANKS;
5349 } else {
5350 elem = xmlParseName(ctxt);
5351 if (elem == NULL) {
5352 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
5353 if (ret != NULL)
5354 xmlFreeDocElementContent(ctxt->myDoc, ret);
5355 return(NULL);
5356 }
5357 last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
5358 if (RAW == '?') {
5359 last->ocur = XML_ELEMENT_CONTENT_OPT;
5360 NEXT;
5361 } else if (RAW == '*') {
5362 last->ocur = XML_ELEMENT_CONTENT_MULT;
5363 NEXT;
5364 } else if (RAW == '+') {
5365 last->ocur = XML_ELEMENT_CONTENT_PLUS;
5366 NEXT;
5367 } else {
5368 last->ocur = XML_ELEMENT_CONTENT_ONCE;
5369 }
5370 }
5371 SKIP_BLANKS;
5372 GROW;
5373 }
5374 if ((cur != NULL) && (last != NULL)) {
5375 cur->c2 = last;
5376 if (last != NULL)
5377 last->parent = cur;
5378 }
5379 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
5380 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5381"Element content declaration doesn't start and stop in the same entity\n",
5382 NULL);
5383 }
5384 NEXT;
5385 if (RAW == '?') {
5386 if (ret != NULL) {
5387 if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
5388 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
5389 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5390 else
5391 ret->ocur = XML_ELEMENT_CONTENT_OPT;
5392 }
5393 NEXT;
5394 } else if (RAW == '*') {
5395 if (ret != NULL) {
5396 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5397 cur = ret;
5398 /*
5399 * Some normalization:
5400 * (a | b* | c?)* == (a | b | c)*
5401 */
5402 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
5403 if ((cur->c1 != NULL) &&
5404 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
5405 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
5406 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
5407 if ((cur->c2 != NULL) &&
5408 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
5409 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
5410 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
5411 cur = cur->c2;
5412 }
5413 }
5414 NEXT;
5415 } else if (RAW == '+') {
5416 if (ret != NULL) {
5417 int found = 0;
5418
5419 if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
5420 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
5421 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5422 else
5423 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
5424 /*
5425 * Some normalization:
5426 * (a | b*)+ == (a | b)*
5427 * (a | b?)+ == (a | b)*
5428 */
5429 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
5430 if ((cur->c1 != NULL) &&
5431 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
5432 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
5433 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
5434 found = 1;
5435 }
5436 if ((cur->c2 != NULL) &&
5437 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
5438 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
5439 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
5440 found = 1;
5441 }
5442 cur = cur->c2;
5443 }
5444 if (found)
5445 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5446 }
5447 NEXT;
5448 }
5449 return(ret);
5450}
5451
5452/**
5453 * xmlParseElementContentDecl:
5454 * @ctxt: an XML parser context
5455 * @name: the name of the element being defined.
5456 * @result: the Element Content pointer will be stored here if any
5457 *
5458 * parse the declaration for an Element content either Mixed or Children,
5459 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
5460 *
5461 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
5462 *
5463 * returns: the type of element content XML_ELEMENT_TYPE_xxx
5464 */
5465
5466int
5467xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
5468 xmlElementContentPtr *result) {
5469
5470 xmlElementContentPtr tree = NULL;
5471 int inputid = ctxt->input->id;
5472 int res;
5473
5474 *result = NULL;
5475
5476 if (RAW != '(') {
5477 xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
5478 "xmlParseElementContentDecl : %s '(' expected\n", name);
5479 return(-1);
5480 }
5481 NEXT;
5482 GROW;
5483 SKIP_BLANKS;
5484 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
5485 tree = xmlParseElementMixedContentDecl(ctxt, inputid);
5486 res = XML_ELEMENT_TYPE_MIXED;
5487 } else {
5488 tree = xmlParseElementChildrenContentDecl(ctxt, inputid);
5489 res = XML_ELEMENT_TYPE_ELEMENT;
5490 }
5491 SKIP_BLANKS;
5492 *result = tree;
5493 return(res);
5494}
5495
5496/**
5497 * xmlParseElementDecl:
5498 * @ctxt: an XML parser context
5499 *
5500 * parse an Element declaration.
5501 *
5502 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
5503 *
5504 * [ VC: Unique Element Type Declaration ]
5505 * No element type may be declared more than once
5506 *
5507 * Returns the type of the element, or -1 in case of error
5508 */
5509int
5510xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
5511 const xmlChar *name;
5512 int ret = -1;
5513 xmlElementContentPtr content = NULL;
5514
5515 /* GROW; done in the caller */
5516 if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
5517 xmlParserInputPtr input = ctxt->input;
5518
5519 SKIP(9);
5520 if (!IS_BLANK_CH(CUR)) {
5521 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5522 "Space required after 'ELEMENT'\n");
5523 }
5524 SKIP_BLANKS;
5525 name = xmlParseName(ctxt);
5526 if (name == NULL) {
5527 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5528 "xmlParseElementDecl: no name for Element\n");
5529 return(-1);
5530 }
5531 while ((RAW == 0) && (ctxt->inputNr > 1))
5532 xmlPopInput(ctxt);
5533 if (!IS_BLANK_CH(CUR)) {
5534 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5535 "Space required after the element name\n");
5536 }
5537 SKIP_BLANKS;
5538 if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
5539 SKIP(5);
5540 /*
5541 * Element must always be empty.
5542 */
5543 ret = XML_ELEMENT_TYPE_EMPTY;
5544 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
5545 (NXT(2) == 'Y')) {
5546 SKIP(3);
5547 /*
5548 * Element is a generic container.
5549 */
5550 ret = XML_ELEMENT_TYPE_ANY;
5551 } else if (RAW == '(') {
5552 ret = xmlParseElementContentDecl(ctxt, name, &content);
5553 } else {
5554 /*
5555 * [ WFC: PEs in Internal Subset ] error handling.
5556 */
5557 if ((RAW == '%') && (ctxt->external == 0) &&
5558 (ctxt->inputNr == 1)) {
5559 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
5560 "PEReference: forbidden within markup decl in internal subset\n");
5561 } else {
5562 xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
5563 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
5564 }
5565 return(-1);
5566 }
5567
5568 SKIP_BLANKS;
5569 /*
5570 * Pop-up of finished entities.
5571 */
5572 while ((RAW == 0) && (ctxt->inputNr > 1))
5573 xmlPopInput(ctxt);
5574 SKIP_BLANKS;
5575
5576 if (RAW != '>') {
5577 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
5578 if (content != NULL) {
5579 xmlFreeDocElementContent(ctxt->myDoc, content);
5580 }
5581 } else {
5582 if (input != ctxt->input) {
5583 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5584 "Element declaration doesn't start and stop in the same entity\n");
5585 }
5586
5587 NEXT;
5588 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5589 (ctxt->sax->elementDecl != NULL)) {
5590 if (content != NULL)
5591 content->parent = NULL;
5592 ctxt->sax->elementDecl(ctxt->userData, name, ret,
5593 content);
5594 if ((content != NULL) && (content->parent == NULL)) {
5595 /*
5596 * this is a trick: if xmlAddElementDecl is called,
5597 * instead of copying the full tree it is plugged directly
5598 * if called from the parser. Avoid duplicating the
5599 * interfaces or change the API/ABI
5600 */
5601 xmlFreeDocElementContent(ctxt->myDoc, content);
5602 }
5603 } else if (content != NULL) {
5604 xmlFreeDocElementContent(ctxt->myDoc, content);
5605 }
5606 }
5607 }
5608 return(ret);
5609}
5610
5611/**
5612 * xmlParseConditionalSections
5613 * @ctxt: an XML parser context
5614 *
5615 * [61] conditionalSect ::= includeSect | ignoreSect
5616 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
5617 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
5618 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
5619 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
5620 */
5621
5622static void
5623xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
5624 SKIP(3);
5625 SKIP_BLANKS;
5626 if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
5627 SKIP(7);
5628 SKIP_BLANKS;
5629 if (RAW != '[') {
5630 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
5631 } else {
5632 NEXT;
5633 }
5634 if (xmlParserDebugEntities) {
5635 if ((ctxt->input != NULL) && (ctxt->input->filename))
5636 xmlGenericError(xmlGenericErrorContext,
5637 "%s(%d): ", ctxt->input->filename,
5638 ctxt->input->line);
5639 xmlGenericError(xmlGenericErrorContext,
5640 "Entering INCLUDE Conditional Section\n");
5641 }
5642
5643 while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
5644 (NXT(2) != '>'))) {
5645 const xmlChar *check = CUR_PTR;
5646 unsigned int cons = ctxt->input->consumed;
5647
5648 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5649 xmlParseConditionalSections(ctxt);
5650 } else if (IS_BLANK_CH(CUR)) {
5651 NEXT;
5652 } else if (RAW == '%') {
5653 xmlParsePEReference(ctxt);
5654 } else
5655 xmlParseMarkupDecl(ctxt);
5656
5657 /*
5658 * Pop-up of finished entities.
5659 */
5660 while ((RAW == 0) && (ctxt->inputNr > 1))
5661 xmlPopInput(ctxt);
5662
5663 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
5664 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
5665 break;
5666 }
5667 }
5668 if (xmlParserDebugEntities) {
5669 if ((ctxt->input != NULL) && (ctxt->input->filename))
5670 xmlGenericError(xmlGenericErrorContext,
5671 "%s(%d): ", ctxt->input->filename,
5672 ctxt->input->line);
5673 xmlGenericError(xmlGenericErrorContext,
5674 "Leaving INCLUDE Conditional Section\n");
5675 }
5676
5677 } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
5678 int state;
5679 xmlParserInputState instate;
5680 int depth = 0;
5681
5682 SKIP(6);
5683 SKIP_BLANKS;
5684 if (RAW != '[') {
5685 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
5686 } else {
5687 NEXT;
5688 }
5689 if (xmlParserDebugEntities) {
5690 if ((ctxt->input != NULL) && (ctxt->input->filename))
5691 xmlGenericError(xmlGenericErrorContext,
5692 "%s(%d): ", ctxt->input->filename,
5693 ctxt->input->line);
5694 xmlGenericError(xmlGenericErrorContext,
5695 "Entering IGNORE Conditional Section\n");
5696 }
5697
5698 /*
5699 * Parse up to the end of the conditional section
5700 * But disable SAX event generating DTD building in the meantime
5701 */
5702 state = ctxt->disableSAX;
5703 instate = ctxt->instate;
5704 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
5705 ctxt->instate = XML_PARSER_IGNORE;
5706
5707 while ((depth >= 0) && (RAW != 0)) {
5708 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5709 depth++;
5710 SKIP(3);
5711 continue;
5712 }
5713 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
5714 if (--depth >= 0) SKIP(3);
5715 continue;
5716 }
5717 NEXT;
5718 continue;
5719 }
5720
5721 ctxt->disableSAX = state;
5722 ctxt->instate = instate;
5723
5724 if (xmlParserDebugEntities) {
5725 if ((ctxt->input != NULL) && (ctxt->input->filename))
5726 xmlGenericError(xmlGenericErrorContext,
5727 "%s(%d): ", ctxt->input->filename,
5728 ctxt->input->line);
5729 xmlGenericError(xmlGenericErrorContext,
5730 "Leaving IGNORE Conditional Section\n");
5731 }
5732
5733 } else {
5734 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
5735 }
5736
5737 if (RAW == 0)
5738 SHRINK;
5739
5740 if (RAW == 0) {
5741 xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
5742 } else {
5743 SKIP(3);
5744 }
5745}
5746
5747/**
5748 * xmlParseMarkupDecl:
5749 * @ctxt: an XML parser context
5750 *
5751 * parse Markup declarations
5752 *
5753 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
5754 * NotationDecl | PI | Comment
5755 *
5756 * [ VC: Proper Declaration/PE Nesting ]
5757 * Parameter-entity replacement text must be properly nested with
5758 * markup declarations. That is to say, if either the first character
5759 * or the last character of a markup declaration (markupdecl above) is
5760 * contained in the replacement text for a parameter-entity reference,
5761 * both must be contained in the same replacement text.
5762 *
5763 * [ WFC: PEs in Internal Subset ]
5764 * In the internal DTD subset, parameter-entity references can occur
5765 * only where markup declarations can occur, not within markup declarations.
5766 * (This does not apply to references that occur in external parameter
5767 * entities or to the external subset.)
5768 */
5769void
5770xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
5771 GROW;
5772 if (CUR == '<') {
5773 if (NXT(1) == '!') {
5774 switch (NXT(2)) {
5775 case 'E':
5776 if (NXT(3) == 'L')
5777 xmlParseElementDecl(ctxt);
5778 else if (NXT(3) == 'N')
5779 xmlParseEntityDecl(ctxt);
5780 break;
5781 case 'A':
5782 xmlParseAttributeListDecl(ctxt);
5783 break;
5784 case 'N':
5785 xmlParseNotationDecl(ctxt);
5786 break;
5787 case '-':
5788 xmlParseComment(ctxt);
5789 break;
5790 default:
5791 /* there is an error but it will be detected later */
5792 break;
5793 }
5794 } else if (NXT(1) == '?') {
5795 xmlParsePI(ctxt);
5796 }
5797 }
5798 /*
5799 * This is only for internal subset. On external entities,
5800 * the replacement is done before parsing stage
5801 */
5802 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
5803 xmlParsePEReference(ctxt);
5804
5805 /*
5806 * Conditional sections are allowed from entities included
5807 * by PE References in the internal subset.
5808 */
5809 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
5810 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5811 xmlParseConditionalSections(ctxt);
5812 }
5813 }
5814
5815 ctxt->instate = XML_PARSER_DTD;
5816}
5817
5818/**
5819 * xmlParseTextDecl:
5820 * @ctxt: an XML parser context
5821 *
5822 * parse an XML declaration header for external entities
5823 *
5824 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
5825 *
5826 * Question: Seems that EncodingDecl is mandatory ? Is that a typo ?
5827 */
5828
5829void
5830xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
5831 xmlChar *version;
5832 const xmlChar *encoding;
5833
5834 /*
5835 * We know that '<?xml' is here.
5836 */
5837 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
5838 SKIP(5);
5839 } else {
5840 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
5841 return;
5842 }
5843
5844 if (!IS_BLANK_CH(CUR)) {
5845 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5846 "Space needed after '<?xml'\n");
5847 }
5848 SKIP_BLANKS;
5849
5850 /*
5851 * We may have the VersionInfo here.
5852 */
5853 version = xmlParseVersionInfo(ctxt);
5854 if (version == NULL)
5855 version = xmlCharStrdup(XML_DEFAULT_VERSION);
5856 else {
5857 if (!IS_BLANK_CH(CUR)) {
5858 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5859 "Space needed here\n");
5860 }
5861 }
5862 ctxt->input->version = version;
5863
5864 /*
5865 * We must have the encoding declaration
5866 */
5867 encoding = xmlParseEncodingDecl(ctxt);
5868 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5869 /*
5870 * The XML REC instructs us to stop parsing right here
5871 */
5872 return;
5873 }
5874 if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
5875 xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
5876 "Missing encoding in text declaration\n");
5877 }
5878
5879 SKIP_BLANKS;
5880 if ((RAW == '?') && (NXT(1) == '>')) {
5881 SKIP(2);
5882 } else if (RAW == '>') {
5883 /* Deprecated old WD ... */
5884 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
5885 NEXT;
5886 } else {
5887 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
5888 MOVETO_ENDTAG(CUR_PTR);
5889 NEXT;
5890 }
5891}
5892
5893/**
5894 * xmlParseExternalSubset:
5895 * @ctxt: an XML parser context
5896 * @ExternalID: the external identifier
5897 * @SystemID: the system identifier (or URL)
5898 *
5899 * parse Markup declarations from an external subset
5900 *
5901 * [30] extSubset ::= textDecl? extSubsetDecl
5902 *
5903 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
5904 */
5905void
5906xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
5907 const xmlChar *SystemID) {
5908 xmlDetectSAX2(ctxt);
5909 GROW;
5910 if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
5911 xmlParseTextDecl(ctxt);
5912 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5913 /*
5914 * The XML REC instructs us to stop parsing right here
5915 */
5916 ctxt->instate = XML_PARSER_EOF;
5917 return;
5918 }
5919 }
5920 if (ctxt->myDoc == NULL) {
5921 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
5922 }
5923 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
5924 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
5925
5926 ctxt->instate = XML_PARSER_DTD;
5927 ctxt->external = 1;
5928 while (((RAW == '<') && (NXT(1) == '?')) ||
5929 ((RAW == '<') && (NXT(1) == '!')) ||
5930 (RAW == '%') || IS_BLANK_CH(CUR)) {
5931 const xmlChar *check = CUR_PTR;
5932 unsigned int cons = ctxt->input->consumed;
5933
5934 GROW;
5935 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5936 xmlParseConditionalSections(ctxt);
5937 } else if (IS_BLANK_CH(CUR)) {
5938 NEXT;
5939 } else if (RAW == '%') {
5940 xmlParsePEReference(ctxt);
5941 } else
5942 xmlParseMarkupDecl(ctxt);
5943
5944 /*
5945 * Pop-up of finished entities.
5946 */
5947 while ((RAW == 0) && (ctxt->inputNr > 1))
5948 xmlPopInput(ctxt);
5949
5950 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
5951 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
5952 break;
5953 }
5954 }
5955
5956 if (RAW != 0) {
5957 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
5958 }
5959
5960}
5961
5962/**
5963 * xmlParseReference:
5964 * @ctxt: an XML parser context
5965 *
5966 * parse and handle entity references in content, depending on the SAX
5967 * interface, this may end-up in a call to character() if this is a
5968 * CharRef, a predefined entity, if there is no reference() callback.
5969 * or if the parser was asked to switch to that mode.
5970 *
5971 * [67] Reference ::= EntityRef | CharRef
5972 */
5973void
5974xmlParseReference(xmlParserCtxtPtr ctxt) {
5975 xmlEntityPtr ent;
5976 xmlChar *val;
5977 if (RAW != '&') return;
5978
5979 if (NXT(1) == '#') {
5980 int i = 0;
5981 xmlChar out[10];
5982 int hex = NXT(2);
5983 int value = xmlParseCharRef(ctxt);
5984
5985 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
5986 /*
5987 * So we are using non-UTF-8 buffers
5988 * Check that the char fit on 8bits, if not
5989 * generate a CharRef.
5990 */
5991 if (value <= 0xFF) {
5992 out[0] = value;
5993 out[1] = 0;
5994 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5995 (!ctxt->disableSAX))
5996 ctxt->sax->characters(ctxt->userData, out, 1);
5997 } else {
5998 if ((hex == 'x') || (hex == 'X'))
5999 snprintf((char *)out, sizeof(out), "#x%X", value);
6000 else
6001 snprintf((char *)out, sizeof(out), "#%d", value);
6002 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
6003 (!ctxt->disableSAX))
6004 ctxt->sax->reference(ctxt->userData, out);
6005 }
6006 } else {
6007 /*
6008 * Just encode the value in UTF-8
6009 */
6010 COPY_BUF(0 ,out, i, value);
6011 out[i] = 0;
6012 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6013 (!ctxt->disableSAX))
6014 ctxt->sax->characters(ctxt->userData, out, i);
6015 }
6016 } else {
6017 int was_checked;
6018
6019 ent = xmlParseEntityRef(ctxt);
6020 if (ent == NULL) return;
6021 if (!ctxt->wellFormed)
6022 return;
6023 was_checked = ent->checked;
6024 if ((ent->name != NULL) &&
6025 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
6026 xmlNodePtr list = NULL;
6027 xmlParserErrors ret = XML_ERR_OK;
6028
6029
6030 /*
6031 * The first reference to the entity trigger a parsing phase
6032 * where the ent->children is filled with the result from
6033 * the parsing.
6034 */
6035 if (ent->checked == 0) {
6036 xmlChar *value;
6037
6038 value = ent->content;
6039
6040 /*
6041 * Check that this entity is well formed
6042 */
6043 if ((value != NULL) && (value[0] != 0) &&
6044 (value[1] == 0) && (value[0] == '<') &&
6045 (xmlStrEqual(ent->name, BAD_CAST "lt"))) {
6046 /*
6047 * DONE: get definite answer on this !!!
6048 * Lots of entity decls are used to declare a single
6049 * char
6050 * <!ENTITY lt "<">
6051 * Which seems to be valid since
6052 * 2.4: The ampersand character (&) and the left angle
6053 * bracket (<) may appear in their literal form only
6054 * when used ... They are also legal within the literal
6055 * entity value of an internal entity declaration;i
6056 * see "4.3.2 Well-Formed Parsed Entities".
6057 * IMHO 2.4 and 4.3.2 are directly in contradiction.
6058 * Looking at the OASIS test suite and James Clark
6059 * tests, this is broken. However the XML REC uses
6060 * it. Is the XML REC not well-formed ????
6061 * This is a hack to avoid this problem
6062 *
6063 * ANSWER: since lt gt amp .. are already defined,
6064 * this is a redefinition and hence the fact that the
6065 * content is not well balanced is not a Wf error, this
6066 * is lousy but acceptable.
6067 */
6068 list = xmlNewDocText(ctxt->myDoc, value);
6069 if (list != NULL) {
6070 if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
6071 (ent->children == NULL)) {
6072 ent->children = list;
6073 ent->last = list;
6074 ent->owner = 1;
6075 list->parent = (xmlNodePtr) ent;
6076 } else {
6077 xmlFreeNodeList(list);
6078 }
6079 } else if (list != NULL) {
6080 xmlFreeNodeList(list);
6081 }
6082 } else {
6083 /*
6084 * 4.3.2: An internal general parsed entity is well-formed
6085 * if its replacement text matches the production labeled
6086 * content.
6087 */
6088
6089 void *user_data;
6090 /*
6091 * This is a bit hackish but this seems the best
6092 * way to make sure both SAX and DOM entity support
6093 * behaves okay.
6094 */
6095 if (ctxt->userData == ctxt)
6096 user_data = NULL;
6097 else
6098 user_data = ctxt->userData;
6099
6100 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
6101 ctxt->depth++;
6102 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
6103 value, user_data, &list);
6104 ctxt->depth--;
6105 } else if (ent->etype ==
6106 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
6107 ctxt->depth++;
6108 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
6109 ctxt->sax, user_data, ctxt->depth,
6110 ent->URI, ent->ExternalID, &list);
6111 ctxt->depth--;
6112 } else {
6113 ret = XML_ERR_ENTITY_PE_INTERNAL;
6114 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
6115 "invalid entity type found\n", NULL);
6116 }
6117 if (ret == XML_ERR_ENTITY_LOOP) {
6118 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
6119 return;
6120 } else if ((ret == XML_ERR_OK) && (list != NULL)) {
6121 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
6122 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
6123 (ent->children == NULL)) {
6124 ent->children = list;
6125 if (ctxt->replaceEntities) {
6126 /*
6127 * Prune it directly in the generated document
6128 * except for single text nodes.
6129 */
6130 if (((list->type == XML_TEXT_NODE) &&
6131 (list->next == NULL)) ||
6132 (ctxt->parseMode == XML_PARSE_READER)) {
6133 list->parent = (xmlNodePtr) ent;
6134 list = NULL;
6135 ent->owner = 1;
6136 } else {
6137 ent->owner = 0;
6138 while (list != NULL) {
6139 list->parent = (xmlNodePtr) ctxt->node;
6140 list->doc = ctxt->myDoc;
6141 if (list->next == NULL)
6142 ent->last = list;
6143 list = list->next;
6144 }
6145 list = ent->children;
6146#ifdef LIBXML_LEGACY_ENABLED
6147 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
6148 xmlAddEntityReference(ent, list, NULL);
6149#endif /* LIBXML_LEGACY_ENABLED */
6150 }
6151 } else {
6152 ent->owner = 1;
6153 while (list != NULL) {
6154 list->parent = (xmlNodePtr) ent;
6155 if (list->next == NULL)
6156 ent->last = list;
6157 list = list->next;
6158 }
6159 }
6160 } else {
6161 xmlFreeNodeList(list);
6162 list = NULL;
6163 }
6164 } else if ((ret != XML_ERR_OK) &&
6165 (ret != XML_WAR_UNDECLARED_ENTITY)) {
6166 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
6167 "Entity '%s' failed to parse\n", ent->name);
6168 } else if (list != NULL) {
6169 xmlFreeNodeList(list);
6170 list = NULL;
6171 }
6172 }
6173 ent->checked = 1;
6174 }
6175
6176 if (ent->children == NULL) {
6177 /*
6178 * Probably running in SAX mode and the callbacks don't
6179 * build the entity content. So unless we already went
6180 * though parsing for first checking go though the entity
6181 * content to generate callbacks associated to the entity
6182 */
6183 if (was_checked == 1) {
6184 void *user_data;
6185 /*
6186 * This is a bit hackish but this seems the best
6187 * way to make sure both SAX and DOM entity support
6188 * behaves okay.
6189 */
6190 if (ctxt->userData == ctxt)
6191 user_data = NULL;
6192 else
6193 user_data = ctxt->userData;
6194
6195 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
6196 ctxt->depth++;
6197 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
6198 ent->content, user_data, NULL);
6199 ctxt->depth--;
6200 } else if (ent->etype ==
6201 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
6202 ctxt->depth++;
6203 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
6204 ctxt->sax, user_data, ctxt->depth,
6205 ent->URI, ent->ExternalID, NULL);
6206 ctxt->depth--;
6207 } else {
6208 ret = XML_ERR_ENTITY_PE_INTERNAL;
6209 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
6210 "invalid entity type found\n", NULL);
6211 }
6212 if (ret == XML_ERR_ENTITY_LOOP) {
6213 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
6214 return;
6215 }
6216 }
6217 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
6218 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
6219 /*
6220 * Entity reference callback comes second, it's somewhat
6221 * superfluous but a compatibility to historical behaviour
6222 */
6223 ctxt->sax->reference(ctxt->userData, ent->name);
6224 }
6225 return;
6226 }
6227 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
6228 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
6229 /*
6230 * Create a node.
6231 */
6232 ctxt->sax->reference(ctxt->userData, ent->name);
6233 return;
6234 }
6235 if ((ctxt->replaceEntities) || (ent->children == NULL)) {
6236 /*
6237 * There is a problem on the handling of _private for entities
6238 * (bug 155816): Should we copy the content of the field from
6239 * the entity (possibly overwriting some value set by the user
6240 * when a copy is created), should we leave it alone, or should
6241 * we try to take care of different situations? The problem
6242 * is exacerbated by the usage of this field by the xmlReader.
6243 * To fix this bug, we look at _private on the created node
6244 * and, if it's NULL, we copy in whatever was in the entity.
6245 * If it's not NULL we leave it alone. This is somewhat of a
6246 * hack - maybe we should have further tests to determine
6247 * what to do.
6248 */
6249 if ((ctxt->node != NULL) && (ent->children != NULL)) {
6250 /*
6251 * Seems we are generating the DOM content, do
6252 * a simple tree copy for all references except the first
6253 * In the first occurrence list contains the replacement.
6254 * progressive == 2 means we are operating on the Reader
6255 * and since nodes are discarded we must copy all the time.
6256 */
6257 if (((list == NULL) && (ent->owner == 0)) ||
6258 (ctxt->parseMode == XML_PARSE_READER)) {
6259 xmlNodePtr nw = NULL, cur, firstChild = NULL;
6260
6261 /*
6262 * when operating on a reader, the entities definitions
6263 * are always owning the entities subtree.
6264 if (ctxt->parseMode == XML_PARSE_READER)
6265 ent->owner = 1;
6266 */
6267
6268 cur = ent->children;
6269 while (cur != NULL) {
6270 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
6271 if (nw != NULL) {
6272 if (nw->_private == NULL)
6273 nw->_private = cur->_private;
6274 if (firstChild == NULL){
6275 firstChild = nw;
6276 }
6277 nw = xmlAddChild(ctxt->node, nw);
6278 }
6279 if (cur == ent->last) {
6280 /*
6281 * needed to detect some strange empty
6282 * node cases in the reader tests
6283 */
6284 if ((ctxt->parseMode == XML_PARSE_READER) &&
6285 (nw != NULL) &&
6286 (nw->type == XML_ELEMENT_NODE) &&
6287 (nw->children == NULL))
6288 nw->extra = 1;
6289
6290 break;
6291 }
6292 cur = cur->next;
6293 }
6294#ifdef LIBXML_LEGACY_ENABLED
6295 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
6296 xmlAddEntityReference(ent, firstChild, nw);
6297#endif /* LIBXML_LEGACY_ENABLED */
6298 } else if (list == NULL) {
6299 xmlNodePtr nw = NULL, cur, next, last,
6300 firstChild = NULL;
6301 /*
6302 * Copy the entity child list and make it the new
6303 * entity child list. The goal is to make sure any
6304 * ID or REF referenced will be the one from the
6305 * document content and not the entity copy.
6306 */
6307 cur = ent->children;
6308 ent->children = NULL;
6309 last = ent->last;
6310 ent->last = NULL;
6311 while (cur != NULL) {
6312 next = cur->next;
6313 cur->next = NULL;
6314 cur->parent = NULL;
6315 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
6316 if (nw != NULL) {
6317 if (nw->_private == NULL)
6318 nw->_private = cur->_private;
6319 if (firstChild == NULL){
6320 firstChild = cur;
6321 }
6322 xmlAddChild((xmlNodePtr) ent, nw);
6323 xmlAddChild(ctxt->node, cur);
6324 }
6325 if (cur == last)
6326 break;
6327 cur = next;
6328 }
6329 ent->owner = 1;
6330#ifdef LIBXML_LEGACY_ENABLED
6331 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
6332 xmlAddEntityReference(ent, firstChild, nw);
6333#endif /* LIBXML_LEGACY_ENABLED */
6334 } else {
6335 const xmlChar *nbktext;
6336
6337 /*
6338 * the name change is to avoid coalescing of the
6339 * node with a possible previous text one which
6340 * would make ent->children a dangling pointer
6341 */
6342 nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
6343 -1);
6344 if (ent->children->type == XML_TEXT_NODE)
6345 ent->children->name = nbktext;
6346 if ((ent->last != ent->children) &&
6347 (ent->last->type == XML_TEXT_NODE))
6348 ent->last->name = nbktext;
6349 xmlAddChildList(ctxt->node, ent->children);
6350 }
6351
6352 /*
6353 * This is to avoid a nasty side effect, see
6354 * characters() in SAX.c
6355 */
6356 ctxt->nodemem = 0;
6357 ctxt->nodelen = 0;
6358 return;
6359 }
6360 }
6361 } else {
6362 val = ent->content;
6363 if (val == NULL) return;
6364 /*
6365 * inline the entity.
6366 */
6367 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6368 (!ctxt->disableSAX))
6369 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
6370 }
6371 }
6372}
6373
6374/**
6375 * xmlParseEntityRef:
6376 * @ctxt: an XML parser context
6377 *
6378 * parse ENTITY references declarations
6379 *
6380 * [68] EntityRef ::= '&' Name ';'
6381 *
6382 * [ WFC: Entity Declared ]
6383 * In a document without any DTD, a document with only an internal DTD
6384 * subset which contains no parameter entity references, or a document
6385 * with "standalone='yes'", the Name given in the entity reference
6386 * must match that in an entity declaration, except that well-formed
6387 * documents need not declare any of the following entities: amp, lt,
6388 * gt, apos, quot. The declaration of a parameter entity must precede
6389 * any reference to it. Similarly, the declaration of a general entity
6390 * must precede any reference to it which appears in a default value in an
6391 * attribute-list declaration. Note that if entities are declared in the
6392 * external subset or in external parameter entities, a non-validating
6393 * processor is not obligated to read and process their declarations;
6394 * for such documents, the rule that an entity must be declared is a
6395 * well-formedness constraint only if standalone='yes'.
6396 *
6397 * [ WFC: Parsed Entity ]
6398 * An entity reference must not contain the name of an unparsed entity
6399 *
6400 * Returns the xmlEntityPtr if found, or NULL otherwise.
6401 */
6402xmlEntityPtr
6403xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
6404 const xmlChar *name;
6405 xmlEntityPtr ent = NULL;
6406
6407 GROW;
6408
6409 if (RAW == '&') {
6410 NEXT;
6411 name = xmlParseName(ctxt);
6412 if (name == NULL) {
6413 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6414 "xmlParseEntityRef: no name\n");
6415 } else {
6416 if (RAW == ';') {
6417 NEXT;
6418 /*
6419 * Ask first SAX for entity resolution, otherwise try the
6420 * predefined set.
6421 */
6422 if (ctxt->sax != NULL) {
6423 if (ctxt->sax->getEntity != NULL)
6424 ent = ctxt->sax->getEntity(ctxt->userData, name);
6425 if ((ctxt->wellFormed == 1 ) && (ent == NULL))
6426 ent = xmlGetPredefinedEntity(name);
6427 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
6428 (ctxt->userData==ctxt)) {
6429 ent = xmlSAX2GetEntity(ctxt, name);
6430 }
6431 }
6432 /*
6433 * [ WFC: Entity Declared ]
6434 * In a document without any DTD, a document with only an
6435 * internal DTD subset which contains no parameter entity
6436 * references, or a document with "standalone='yes'", the
6437 * Name given in the entity reference must match that in an
6438 * entity declaration, except that well-formed documents
6439 * need not declare any of the following entities: amp, lt,
6440 * gt, apos, quot.
6441 * The declaration of a parameter entity must precede any
6442 * reference to it.
6443 * Similarly, the declaration of a general entity must
6444 * precede any reference to it which appears in a default
6445 * value in an attribute-list declaration. Note that if
6446 * entities are declared in the external subset or in
6447 * external parameter entities, a non-validating processor
6448 * is not obligated to read and process their declarations;
6449 * for such documents, the rule that an entity must be
6450 * declared is a well-formedness constraint only if
6451 * standalone='yes'.
6452 */
6453 if (ent == NULL) {
6454 if ((ctxt->standalone == 1) ||
6455 ((ctxt->hasExternalSubset == 0) &&
6456 (ctxt->hasPErefs == 0))) {
6457 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
6458 "Entity '%s' not defined\n", name);
6459 } else {
6460 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
6461 "Entity '%s' not defined\n", name);
6462 if ((ctxt->inSubset == 0) &&
6463 (ctxt->sax != NULL) &&
6464 (ctxt->sax->reference != NULL)) {
6465 ctxt->sax->reference(ctxt->userData, name);
6466 }
6467 }
6468 ctxt->valid = 0;
6469 }
6470
6471 /*
6472 * [ WFC: Parsed Entity ]
6473 * An entity reference must not contain the name of an
6474 * unparsed entity
6475 */
6476 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
6477 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
6478 "Entity reference to unparsed entity %s\n", name);
6479 }
6480
6481 /*
6482 * [ WFC: No External Entity References ]
6483 * Attribute values cannot contain direct or indirect
6484 * entity references to external entities.
6485 */
6486 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6487 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
6488 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
6489 "Attribute references external entity '%s'\n", name);
6490 }
6491 /*
6492 * [ WFC: No < in Attribute Values ]
6493 * The replacement text of any entity referred to directly or
6494 * indirectly in an attribute value (other than "&lt;") must
6495 * not contain a <.
6496 */
6497 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6498 (ent != NULL) &&
6499 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
6500 (ent->content != NULL) &&
6501 (xmlStrchr(ent->content, '<'))) {
6502 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
6503 "'<' in entity '%s' is not allowed in attributes values\n", name);
6504 }
6505
6506 /*
6507 * Internal check, no parameter entities here ...
6508 */
6509 else {
6510 switch (ent->etype) {
6511 case XML_INTERNAL_PARAMETER_ENTITY:
6512 case XML_EXTERNAL_PARAMETER_ENTITY:
6513 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
6514 "Attempt to reference the parameter entity '%s'\n",
6515 name);
6516 break;
6517 default:
6518 break;
6519 }
6520 }
6521
6522 /*
6523 * [ WFC: No Recursion ]
6524 * A parsed entity must not contain a recursive reference
6525 * to itself, either directly or indirectly.
6526 * Done somewhere else
6527 */
6528
6529 } else {
6530 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
6531 }
6532 }
6533 }
6534 return(ent);
6535}
6536
6537/**
6538 * xmlParseStringEntityRef:
6539 * @ctxt: an XML parser context
6540 * @str: a pointer to an index in the string
6541 *
6542 * parse ENTITY references declarations, but this version parses it from
6543 * a string value.
6544 *
6545 * [68] EntityRef ::= '&' Name ';'
6546 *
6547 * [ WFC: Entity Declared ]
6548 * In a document without any DTD, a document with only an internal DTD
6549 * subset which contains no parameter entity references, or a document
6550 * with "standalone='yes'", the Name given in the entity reference
6551 * must match that in an entity declaration, except that well-formed
6552 * documents need not declare any of the following entities: amp, lt,
6553 * gt, apos, quot. The declaration of a parameter entity must precede
6554 * any reference to it. Similarly, the declaration of a general entity
6555 * must precede any reference to it which appears in a default value in an
6556 * attribute-list declaration. Note that if entities are declared in the
6557 * external subset or in external parameter entities, a non-validating
6558 * processor is not obligated to read and process their declarations;
6559 * for such documents, the rule that an entity must be declared is a
6560 * well-formedness constraint only if standalone='yes'.
6561 *
6562 * [ WFC: Parsed Entity ]
6563 * An entity reference must not contain the name of an unparsed entity
6564 *
6565 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
6566 * is updated to the current location in the string.
6567 */
6568xmlEntityPtr
6569xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
6570 xmlChar *name;
6571 const xmlChar *ptr;
6572 xmlChar cur;
6573 xmlEntityPtr ent = NULL;
6574
6575 if ((str == NULL) || (*str == NULL))
6576 return(NULL);
6577 ptr = *str;
6578 cur = *ptr;
6579 if (cur == '&') {
6580 ptr++;
6581 cur = *ptr;
6582 name = xmlParseStringName(ctxt, &ptr);
6583 if (name == NULL) {
6584 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6585 "xmlParseStringEntityRef: no name\n");
6586 } else {
6587 if (*ptr == ';') {
6588 ptr++;
6589 /*
6590 * Ask first SAX for entity resolution, otherwise try the
6591 * predefined set.
6592 */
6593 if (ctxt->sax != NULL) {
6594 if (ctxt->sax->getEntity != NULL)
6595 ent = ctxt->sax->getEntity(ctxt->userData, name);
6596 if (ent == NULL)
6597 ent = xmlGetPredefinedEntity(name);
6598 if ((ent == NULL) && (ctxt->userData==ctxt)) {
6599 ent = xmlSAX2GetEntity(ctxt, name);
6600 }
6601 }
6602 /*
6603 * [ WFC: Entity Declared ]
6604 * In a document without any DTD, a document with only an
6605 * internal DTD subset which contains no parameter entity
6606 * references, or a document with "standalone='yes'", the
6607 * Name given in the entity reference must match that in an
6608 * entity declaration, except that well-formed documents
6609 * need not declare any of the following entities: amp, lt,
6610 * gt, apos, quot.
6611 * The declaration of a parameter entity must precede any
6612 * reference to it.
6613 * Similarly, the declaration of a general entity must
6614 * precede any reference to it which appears in a default
6615 * value in an attribute-list declaration. Note that if
6616 * entities are declared in the external subset or in
6617 * external parameter entities, a non-validating processor
6618 * is not obligated to read and process their declarations;
6619 * for such documents, the rule that an entity must be
6620 * declared is a well-formedness constraint only if
6621 * standalone='yes'.
6622 */
6623 if (ent == NULL) {
6624 if ((ctxt->standalone == 1) ||
6625 ((ctxt->hasExternalSubset == 0) &&
6626 (ctxt->hasPErefs == 0))) {
6627 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
6628 "Entity '%s' not defined\n", name);
6629 } else {
6630 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
6631 "Entity '%s' not defined\n",
6632 name);
6633 }
6634 /* TODO ? check regressions ctxt->valid = 0; */
6635 }
6636
6637 /*
6638 * [ WFC: Parsed Entity ]
6639 * An entity reference must not contain the name of an
6640 * unparsed entity
6641 */
6642 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
6643 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
6644 "Entity reference to unparsed entity %s\n", name);
6645 }
6646
6647 /*
6648 * [ WFC: No External Entity References ]
6649 * Attribute values cannot contain direct or indirect
6650 * entity references to external entities.
6651 */
6652 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6653 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
6654 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
6655 "Attribute references external entity '%s'\n", name);
6656 }
6657 /*
6658 * [ WFC: No < in Attribute Values ]
6659 * The replacement text of any entity referred to directly or
6660 * indirectly in an attribute value (other than "&lt;") must
6661 * not contain a <.
6662 */
6663 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6664 (ent != NULL) &&
6665 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
6666 (ent->content != NULL) &&
6667 (xmlStrchr(ent->content, '<'))) {
6668 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
6669 "'<' in entity '%s' is not allowed in attributes values\n",
6670 name);
6671 }
6672
6673 /*
6674 * Internal check, no parameter entities here ...
6675 */
6676 else {
6677 switch (ent->etype) {
6678 case XML_INTERNAL_PARAMETER_ENTITY:
6679 case XML_EXTERNAL_PARAMETER_ENTITY:
6680 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
6681 "Attempt to reference the parameter entity '%s'\n",
6682 name);
6683 break;
6684 default:
6685 break;
6686 }
6687 }
6688
6689 /*
6690 * [ WFC: No Recursion ]
6691 * A parsed entity must not contain a recursive reference
6692 * to itself, either directly or indirectly.
6693 * Done somewhere else
6694 */
6695
6696 } else {
6697 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
6698 }
6699 xmlFree(name);
6700 }
6701 }
6702 *str = ptr;
6703 return(ent);
6704}
6705
6706/**
6707 * xmlParsePEReference:
6708 * @ctxt: an XML parser context
6709 *
6710 * parse PEReference declarations
6711 * The entity content is handled directly by pushing it's content as
6712 * a new input stream.
6713 *
6714 * [69] PEReference ::= '%' Name ';'
6715 *
6716 * [ WFC: No Recursion ]
6717 * A parsed entity must not contain a recursive
6718 * reference to itself, either directly or indirectly.
6719 *
6720 * [ WFC: Entity Declared ]
6721 * In a document without any DTD, a document with only an internal DTD
6722 * subset which contains no parameter entity references, or a document
6723 * with "standalone='yes'", ... ... The declaration of a parameter
6724 * entity must precede any reference to it...
6725 *
6726 * [ VC: Entity Declared ]
6727 * In a document with an external subset or external parameter entities
6728 * with "standalone='no'", ... ... The declaration of a parameter entity
6729 * must precede any reference to it...
6730 *
6731 * [ WFC: In DTD ]
6732 * Parameter-entity references may only appear in the DTD.
6733 * NOTE: misleading but this is handled.
6734 */
6735void
6736xmlParsePEReference(xmlParserCtxtPtr ctxt)
6737{
6738 const xmlChar *name;
6739 xmlEntityPtr entity = NULL;
6740 xmlParserInputPtr input;
6741
6742 if (RAW == '%') {
6743 NEXT;
6744 name = xmlParseName(ctxt);
6745 if (name == NULL) {
6746 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6747 "xmlParsePEReference: no name\n");
6748 } else {
6749 if (RAW == ';') {
6750 NEXT;
6751 if ((ctxt->sax != NULL) &&
6752 (ctxt->sax->getParameterEntity != NULL))
6753 entity = ctxt->sax->getParameterEntity(ctxt->userData,
6754 name);
6755 if (entity == NULL) {
6756 /*
6757 * [ WFC: Entity Declared ]
6758 * In a document without any DTD, a document with only an
6759 * internal DTD subset which contains no parameter entity
6760 * references, or a document with "standalone='yes'", ...
6761 * ... The declaration of a parameter entity must precede
6762 * any reference to it...
6763 */
6764 if ((ctxt->standalone == 1) ||
6765 ((ctxt->hasExternalSubset == 0) &&
6766 (ctxt->hasPErefs == 0))) {
6767 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
6768 "PEReference: %%%s; not found\n",
6769 name);
6770 } else {
6771 /*
6772 * [ VC: Entity Declared ]
6773 * In a document with an external subset or external
6774 * parameter entities with "standalone='no'", ...
6775 * ... The declaration of a parameter entity must
6776 * precede any reference to it...
6777 */
6778 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6779 "PEReference: %%%s; not found\n",
6780 name, NULL);
6781 ctxt->valid = 0;
6782 }
6783 } else {
6784 /*
6785 * Internal checking in case the entity quest barfed
6786 */
6787 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6788 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
6789 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6790 "Internal: %%%s; is not a parameter entity\n",
6791 name, NULL);
6792 } else if (ctxt->input->free != deallocblankswrapper) {
6793 input =
6794 xmlNewBlanksWrapperInputStream(ctxt, entity);
6795 xmlPushInput(ctxt, input);
6796 } else {
6797 /*
6798 * TODO !!!
6799 * handle the extra spaces added before and after
6800 * c.f. http://www.w3.org/TR/REC-xml#as-PE
6801 */
6802 input = xmlNewEntityInputStream(ctxt, entity);
6803 xmlPushInput(ctxt, input);
6804 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
6805 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
6806 (IS_BLANK_CH(NXT(5)))) {
6807 xmlParseTextDecl(ctxt);
6808 if (ctxt->errNo ==
6809 XML_ERR_UNSUPPORTED_ENCODING) {
6810 /*
6811 * The XML REC instructs us to stop parsing
6812 * right here
6813 */
6814 ctxt->instate = XML_PARSER_EOF;
6815 return;
6816 }
6817 }
6818 }
6819 }
6820 ctxt->hasPErefs = 1;
6821 } else {
6822 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
6823 }
6824 }
6825 }
6826}
6827
6828/**
6829 * xmlParseStringPEReference:
6830 * @ctxt: an XML parser context
6831 * @str: a pointer to an index in the string
6832 *
6833 * parse PEReference declarations
6834 *
6835 * [69] PEReference ::= '%' Name ';'
6836 *
6837 * [ WFC: No Recursion ]
6838 * A parsed entity must not contain a recursive
6839 * reference to itself, either directly or indirectly.
6840 *
6841 * [ WFC: Entity Declared ]
6842 * In a document without any DTD, a document with only an internal DTD
6843 * subset which contains no parameter entity references, or a document
6844 * with "standalone='yes'", ... ... The declaration of a parameter
6845 * entity must precede any reference to it...
6846 *
6847 * [ VC: Entity Declared ]
6848 * In a document with an external subset or external parameter entities
6849 * with "standalone='no'", ... ... The declaration of a parameter entity
6850 * must precede any reference to it...
6851 *
6852 * [ WFC: In DTD ]
6853 * Parameter-entity references may only appear in the DTD.
6854 * NOTE: misleading but this is handled.
6855 *
6856 * Returns the string of the entity content.
6857 * str is updated to the current value of the index
6858 */
6859xmlEntityPtr
6860xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
6861 const xmlChar *ptr;
6862 xmlChar cur;
6863 xmlChar *name;
6864 xmlEntityPtr entity = NULL;
6865
6866 if ((str == NULL) || (*str == NULL)) return(NULL);
6867 ptr = *str;
6868 cur = *ptr;
6869 if (cur == '%') {
6870 ptr++;
6871 cur = *ptr;
6872 name = xmlParseStringName(ctxt, &ptr);
6873 if (name == NULL) {
6874 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6875 "xmlParseStringPEReference: no name\n");
6876 } else {
6877 cur = *ptr;
6878 if (cur == ';') {
6879 ptr++;
6880 cur = *ptr;
6881 if ((ctxt->sax != NULL) &&
6882 (ctxt->sax->getParameterEntity != NULL))
6883 entity = ctxt->sax->getParameterEntity(ctxt->userData,
6884 name);
6885 if (entity == NULL) {
6886 /*
6887 * [ WFC: Entity Declared ]
6888 * In a document without any DTD, a document with only an
6889 * internal DTD subset which contains no parameter entity
6890 * references, or a document with "standalone='yes'", ...
6891 * ... The declaration of a parameter entity must precede
6892 * any reference to it...
6893 */
6894 if ((ctxt->standalone == 1) ||
6895 ((ctxt->hasExternalSubset == 0) &&
6896 (ctxt->hasPErefs == 0))) {
6897 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
6898 "PEReference: %%%s; not found\n", name);
6899 } else {
6900 /*
6901 * [ VC: Entity Declared ]
6902 * In a document with an external subset or external
6903 * parameter entities with "standalone='no'", ...
6904 * ... The declaration of a parameter entity must
6905 * precede any reference to it...
6906 */
6907 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6908 "PEReference: %%%s; not found\n",
6909 name, NULL);
6910 ctxt->valid = 0;
6911 }
6912 } else {
6913 /*
6914 * Internal checking in case the entity quest barfed
6915 */
6916 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6917 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
6918 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6919 "%%%s; is not a parameter entity\n",
6920 name, NULL);
6921 }
6922 }
6923 ctxt->hasPErefs = 1;
6924 } else {
6925 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
6926 }
6927 xmlFree(name);
6928 }
6929 }
6930 *str = ptr;
6931 return(entity);
6932}
6933
6934/**
6935 * xmlParseDocTypeDecl:
6936 * @ctxt: an XML parser context
6937 *
6938 * parse a DOCTYPE declaration
6939 *
6940 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
6941 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6942 *
6943 * [ VC: Root Element Type ]
6944 * The Name in the document type declaration must match the element
6945 * type of the root element.
6946 */
6947
6948void
6949xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
6950 const xmlChar *name = NULL;
6951 xmlChar *ExternalID = NULL;
6952 xmlChar *URI = NULL;
6953
6954 /*
6955 * We know that '<!DOCTYPE' has been detected.
6956 */
6957 SKIP(9);
6958
6959 SKIP_BLANKS;
6960
6961 /*
6962 * Parse the DOCTYPE name.
6963 */
6964 name = xmlParseName(ctxt);
6965 if (name == NULL) {
6966 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6967 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
6968 }
6969 ctxt->intSubName = name;
6970
6971 SKIP_BLANKS;
6972
6973 /*
6974 * Check for SystemID and ExternalID
6975 */
6976 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
6977
6978 if ((URI != NULL) || (ExternalID != NULL)) {
6979 ctxt->hasExternalSubset = 1;
6980 }
6981 ctxt->extSubURI = URI;
6982 ctxt->extSubSystem = ExternalID;
6983
6984 SKIP_BLANKS;
6985
6986 /*
6987 * Create and update the internal subset.
6988 */
6989 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
6990 (!ctxt->disableSAX))
6991 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
6992
6993 /*
6994 * Is there any internal subset declarations ?
6995 * they are handled separately in xmlParseInternalSubset()
6996 */
6997 if (RAW == '[')
6998 return;
6999
7000 /*
7001 * We should be at the end of the DOCTYPE declaration.
7002 */
7003 if (RAW != '>') {
7004 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
7005 }
7006 NEXT;
7007}
7008
7009/**
7010 * xmlParseInternalSubset:
7011 * @ctxt: an XML parser context
7012 *
7013 * parse the internal subset declaration
7014 *
7015 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
7016 */
7017
7018static void
7019xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
7020 /*
7021 * Is there any DTD definition ?
7022 */
7023 if (RAW == '[') {
7024 ctxt->instate = XML_PARSER_DTD;
7025 NEXT;
7026 /*
7027 * Parse the succession of Markup declarations and
7028 * PEReferences.
7029 * Subsequence (markupdecl | PEReference | S)*
7030 */
7031 while (RAW != ']') {
7032 const xmlChar *check = CUR_PTR;
7033 unsigned int cons = ctxt->input->consumed;
7034
7035 SKIP_BLANKS;
7036 xmlParseMarkupDecl(ctxt);
7037 xmlParsePEReference(ctxt);
7038
7039 /*
7040 * Pop-up of finished entities.
7041 */
7042 while ((RAW == 0) && (ctxt->inputNr > 1))
7043 xmlPopInput(ctxt);
7044
7045 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
7046 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7047 "xmlParseInternalSubset: error detected in Markup declaration\n");
7048 break;
7049 }
7050 }
7051 if (RAW == ']') {
7052 NEXT;
7053 SKIP_BLANKS;
7054 }
7055 }
7056
7057 /*
7058 * We should be at the end of the DOCTYPE declaration.
7059 */
7060 if (RAW != '>') {
7061 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
7062 }
7063 NEXT;
7064}
7065
7066#ifdef LIBXML_SAX1_ENABLED
7067/**
7068 * xmlParseAttribute:
7069 * @ctxt: an XML parser context
7070 * @value: a xmlChar ** used to store the value of the attribute
7071 *
7072 * parse an attribute
7073 *
7074 * [41] Attribute ::= Name Eq AttValue
7075 *
7076 * [ WFC: No External Entity References ]
7077 * Attribute values cannot contain direct or indirect entity references
7078 * to external entities.
7079 *
7080 * [ WFC: No < in Attribute Values ]
7081 * The replacement text of any entity referred to directly or indirectly in
7082 * an attribute value (other than "&lt;") must not contain a <.
7083 *
7084 * [ VC: Attribute Value Type ]
7085 * The attribute must have been declared; the value must be of the type
7086 * declared for it.
7087 *
7088 * [25] Eq ::= S? '=' S?
7089 *
7090 * With namespace:
7091 *
7092 * [NS 11] Attribute ::= QName Eq AttValue
7093 *
7094 * Also the case QName == xmlns:??? is handled independently as a namespace
7095 * definition.
7096 *
7097 * Returns the attribute name, and the value in *value.
7098 */
7099
7100const xmlChar *
7101xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
7102 const xmlChar *name;
7103 xmlChar *val;
7104
7105 *value = NULL;
7106 GROW;
7107 name = xmlParseName(ctxt);
7108 if (name == NULL) {
7109 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7110 "error parsing attribute name\n");
7111 return(NULL);
7112 }
7113
7114 /*
7115 * read the value
7116 */
7117 SKIP_BLANKS;
7118 if (RAW == '=') {
7119 NEXT;
7120 SKIP_BLANKS;
7121 val = xmlParseAttValue(ctxt);
7122 ctxt->instate = XML_PARSER_CONTENT;
7123 } else {
7124 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
7125 "Specification mandate value for attribute %s\n", name);
7126 return(NULL);
7127 }
7128
7129 /*
7130 * Check that xml:lang conforms to the specification
7131 * No more registered as an error, just generate a warning now
7132 * since this was deprecated in XML second edition
7133 */
7134 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
7135 if (!xmlCheckLanguageID(val)) {
7136 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
7137 "Malformed value for xml:lang : %s\n",
7138 val, NULL);
7139 }
7140 }
7141
7142 /*
7143 * Check that xml:space conforms to the specification
7144 */
7145 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
7146 if (xmlStrEqual(val, BAD_CAST "default"))
7147 *(ctxt->space) = 0;
7148 else if (xmlStrEqual(val, BAD_CAST "preserve"))
7149 *(ctxt->space) = 1;
7150 else {
7151 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
7152"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
7153 val, NULL);
7154 }
7155 }
7156
7157 *value = val;
7158 return(name);
7159}
7160
7161/**
7162 * xmlParseStartTag:
7163 * @ctxt: an XML parser context
7164 *
7165 * parse a start of tag either for rule element or
7166 * EmptyElement. In both case we don't parse the tag closing chars.
7167 *
7168 * [40] STag ::= '<' Name (S Attribute)* S? '>'
7169 *
7170 * [ WFC: Unique Att Spec ]
7171 * No attribute name may appear more than once in the same start-tag or
7172 * empty-element tag.
7173 *
7174 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
7175 *
7176 * [ WFC: Unique Att Spec ]
7177 * No attribute name may appear more than once in the same start-tag or
7178 * empty-element tag.
7179 *
7180 * With namespace:
7181 *
7182 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
7183 *
7184 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
7185 *
7186 * Returns the element name parsed
7187 */
7188
7189const xmlChar *
7190xmlParseStartTag(xmlParserCtxtPtr ctxt) {
7191 const xmlChar *name;
7192 const xmlChar *attname;
7193 xmlChar *attvalue;
7194 const xmlChar **atts = ctxt->atts;
7195 int nbatts = 0;
7196 int maxatts = ctxt->maxatts;
7197 int i;
7198
7199 if (RAW != '<') return(NULL);
7200 NEXT1;
7201
7202 name = xmlParseName(ctxt);
7203 if (name == NULL) {
7204 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7205 "xmlParseStartTag: invalid element name\n");
7206 return(NULL);
7207 }
7208
7209 /*
7210 * Now parse the attributes, it ends up with the ending
7211 *
7212 * (S Attribute)* S?
7213 */
7214 SKIP_BLANKS;
7215 GROW;
7216
7217 while ((RAW != '>') &&
7218 ((RAW != '/') || (NXT(1) != '>')) &&
7219 (IS_BYTE_CHAR(RAW))) {
7220 const xmlChar *q = CUR_PTR;
7221 unsigned int cons = ctxt->input->consumed;
7222
7223 attname = xmlParseAttribute(ctxt, &attvalue);
7224 if ((attname != NULL) && (attvalue != NULL)) {
7225 /*
7226 * [ WFC: Unique Att Spec ]
7227 * No attribute name may appear more than once in the same
7228 * start-tag or empty-element tag.
7229 */
7230 for (i = 0; i < nbatts;i += 2) {
7231 if (xmlStrEqual(atts[i], attname)) {
7232 xmlErrAttributeDup(ctxt, NULL, attname);
7233 xmlFree(attvalue);
7234 goto failed;
7235 }
7236 }
7237 /*
7238 * Add the pair to atts
7239 */
7240 if (atts == NULL) {
7241 maxatts = 22; /* allow for 10 attrs by default */
7242 atts = (const xmlChar **)
7243 xmlMalloc(maxatts * sizeof(xmlChar *));
7244 if (atts == NULL) {
7245 xmlErrMemory(ctxt, NULL);
7246 if (attvalue != NULL)
7247 xmlFree(attvalue);
7248 goto failed;
7249 }
7250 ctxt->atts = atts;
7251 ctxt->maxatts = maxatts;
7252 } else if (nbatts + 4 > maxatts) {
7253 const xmlChar **n;
7254
7255 maxatts *= 2;
7256 n = (const xmlChar **) xmlRealloc((void *) atts,
7257 maxatts * sizeof(const xmlChar *));
7258 if (n == NULL) {
7259 xmlErrMemory(ctxt, NULL);
7260 if (attvalue != NULL)
7261 xmlFree(attvalue);
7262 goto failed;
7263 }
7264 atts = n;
7265 ctxt->atts = atts;
7266 ctxt->maxatts = maxatts;
7267 }
7268 atts[nbatts++] = attname;
7269 atts[nbatts++] = attvalue;
7270 atts[nbatts] = NULL;
7271 atts[nbatts + 1] = NULL;
7272 } else {
7273 if (attvalue != NULL)
7274 xmlFree(attvalue);
7275 }
7276
7277failed:
7278
7279 GROW
7280 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
7281 break;
7282 if (!IS_BLANK_CH(RAW)) {
7283 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7284 "attributes construct error\n");
7285 }
7286 SKIP_BLANKS;
7287 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
7288 (attname == NULL) && (attvalue == NULL)) {
7289 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
7290 "xmlParseStartTag: problem parsing attributes\n");
7291 break;
7292 }
7293 SHRINK;
7294 GROW;
7295 }
7296
7297 /*
7298 * SAX: Start of Element !
7299 */
7300 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
7301 (!ctxt->disableSAX)) {
7302 if (nbatts > 0)
7303 ctxt->sax->startElement(ctxt->userData, name, atts);
7304 else
7305 ctxt->sax->startElement(ctxt->userData, name, NULL);
7306 }
7307
7308 if (atts != NULL) {
7309 /* Free only the content strings */
7310 for (i = 1;i < nbatts;i+=2)
7311 if (atts[i] != NULL)
7312 xmlFree((xmlChar *) atts[i]);
7313 }
7314 return(name);
7315}
7316
7317/**
7318 * xmlParseEndTag1:
7319 * @ctxt: an XML parser context
7320 * @line: line of the start tag
7321 * @nsNr: number of namespaces on the start tag
7322 *
7323 * parse an end of tag
7324 *
7325 * [42] ETag ::= '</' Name S? '>'
7326 *
7327 * With namespace
7328 *
7329 * [NS 9] ETag ::= '</' QName S? '>'
7330 */
7331
7332static void
7333xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
7334 const xmlChar *name;
7335
7336 GROW;
7337 if ((RAW != '<') || (NXT(1) != '/')) {
7338 xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
7339 "xmlParseEndTag: '</' not found\n");
7340 return;
7341 }
7342 SKIP(2);
7343
7344 name = xmlParseNameAndCompare(ctxt,ctxt->name);
7345
7346 /*
7347 * We should definitely be at the ending "S? '>'" part
7348 */
7349 GROW;
7350 SKIP_BLANKS;
7351 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
7352 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
7353 } else
7354 NEXT1;
7355
7356 /*
7357 * [ WFC: Element Type Match ]
7358 * The Name in an element's end-tag must match the element type in the
7359 * start-tag.
7360 *
7361 */
7362 if (name != (xmlChar*)1) {
7363 if (name == NULL) name = BAD_CAST "unparseable";
7364 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
7365 "Opening and ending tag mismatch: %s line %d and %s\n",
7366 ctxt->name, line, name);
7367 }
7368
7369 /*
7370 * SAX: End of Tag
7371 */
7372 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
7373 (!ctxt->disableSAX))
7374 ctxt->sax->endElement(ctxt->userData, ctxt->name);
7375
7376 namePop(ctxt);
7377 spacePop(ctxt);
7378 return;
7379}
7380
7381/**
7382 * xmlParseEndTag:
7383 * @ctxt: an XML parser context
7384 *
7385 * parse an end of tag
7386 *
7387 * [42] ETag ::= '</' Name S? '>'
7388 *
7389 * With namespace
7390 *
7391 * [NS 9] ETag ::= '</' QName S? '>'
7392 */
7393
7394void
7395xmlParseEndTag(xmlParserCtxtPtr ctxt) {
7396 xmlParseEndTag1(ctxt, 0);
7397}
7398#endif /* LIBXML_SAX1_ENABLED */
7399
7400/************************************************************************
7401 * *
7402 * SAX 2 specific operations *
7403 * *
7404 ************************************************************************/
7405
7406static const xmlChar *
7407xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
7408 int len = 0, l;
7409 int c;
7410 int count = 0;
7411
7412 /*
7413 * Handler for more complex cases
7414 */
7415 GROW;
7416 c = CUR_CHAR(l);
7417 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
7418 (!IS_LETTER(c) && (c != '_'))) {
7419 return(NULL);
7420 }
7421
7422 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
7423 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
7424 (c == '.') || (c == '-') || (c == '_') ||
7425 (IS_COMBINING(c)) ||
7426 (IS_EXTENDER(c)))) {
7427 if (count++ > 100) {
7428 count = 0;
7429 GROW;
7430 }
7431 len += l;
7432 NEXTL(l);
7433 c = CUR_CHAR(l);
7434 }
7435 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
7436}
7437
7438/*
7439 * xmlGetNamespace:
7440 * @ctxt: an XML parser context
7441 * @prefix: the prefix to lookup
7442 *
7443 * Lookup the namespace name for the @prefix (which ca be NULL)
7444 * The prefix must come from the @ctxt->dict dictionnary
7445 *
7446 * Returns the namespace name or NULL if not bound
7447 */
7448static const xmlChar *
7449xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
7450 int i;
7451
7452 if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
7453 for (i = ctxt->nsNr - 2;i >= 0;i-=2)
7454 if (ctxt->nsTab[i] == prefix) {
7455 if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
7456 return(NULL);
7457 return(ctxt->nsTab[i + 1]);
7458 }
7459 return(NULL);
7460}
7461
7462/**
7463 * xmlParseNCName:
7464 * @ctxt: an XML parser context
7465 * @len: lenght of the string parsed
7466 *
7467 * parse an XML name.
7468 *
7469 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
7470 * CombiningChar | Extender
7471 *
7472 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
7473 *
7474 * Returns the Name parsed or NULL
7475 */
7476
7477static const xmlChar *
7478xmlParseNCName(xmlParserCtxtPtr ctxt) {
7479 const xmlChar *in;
7480 const xmlChar *ret;
7481 int count = 0;
7482
7483 /*
7484 * Accelerator for simple ASCII names
7485 */
7486 in = ctxt->input->cur;
7487 if (((*in >= 0x61) && (*in <= 0x7A)) ||
7488 ((*in >= 0x41) && (*in <= 0x5A)) ||
7489 (*in == '_')) {
7490 in++;
7491 while (((*in >= 0x61) && (*in <= 0x7A)) ||
7492 ((*in >= 0x41) && (*in <= 0x5A)) ||
7493 ((*in >= 0x30) && (*in <= 0x39)) ||
7494 (*in == '_') || (*in == '-') ||
7495 (*in == '.'))
7496 in++;
7497 if ((*in > 0) && (*in < 0x80)) {
7498 count = in - ctxt->input->cur;
7499 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
7500 ctxt->input->cur = in;
7501 ctxt->nbChars += count;
7502 ctxt->input->col += count;
7503 if (ret == NULL) {
7504 xmlErrMemory(ctxt, NULL);
7505 }
7506 return(ret);
7507 }
7508 }
7509 return(xmlParseNCNameComplex(ctxt));
7510}
7511
7512/**
7513 * xmlParseQName:
7514 * @ctxt: an XML parser context
7515 * @prefix: pointer to store the prefix part
7516 *
7517 * parse an XML Namespace QName
7518 *
7519 * [6] QName ::= (Prefix ':')? LocalPart
7520 * [7] Prefix ::= NCName
7521 * [8] LocalPart ::= NCName
7522 *
7523 * Returns the Name parsed or NULL
7524 */
7525
7526static const xmlChar *
7527xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
7528 const xmlChar *l, *p;
7529
7530 GROW;
7531
7532 l = xmlParseNCName(ctxt);
7533 if (l == NULL) {
7534 if (CUR == ':') {
7535 l = xmlParseName(ctxt);
7536 if (l != NULL) {
7537 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
7538 "Failed to parse QName '%s'\n", l, NULL, NULL);
7539 *prefix = NULL;
7540 return(l);
7541 }
7542 }
7543 return(NULL);
7544 }
7545 if (CUR == ':') {
7546 NEXT;
7547 p = l;
7548 l = xmlParseNCName(ctxt);
7549 if (l == NULL) {
7550 xmlChar *tmp;
7551
7552 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
7553 "Failed to parse QName '%s:'\n", p, NULL, NULL);
7554 tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
7555 p = xmlDictLookup(ctxt->dict, tmp, -1);
7556 if (tmp != NULL) xmlFree(tmp);
7557 *prefix = NULL;
7558 return(p);
7559 }
7560 if (CUR == ':') {
7561 xmlChar *tmp;
7562
7563 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
7564 "Failed to parse QName '%s:%s:'\n", p, l, NULL);
7565 NEXT;
7566 tmp = (xmlChar *) xmlParseName(ctxt);
7567 if (tmp != NULL) {
7568 tmp = xmlBuildQName(tmp, l, NULL, 0);
7569 l = xmlDictLookup(ctxt->dict, tmp, -1);
7570 if (tmp != NULL) xmlFree(tmp);
7571 *prefix = p;
7572 return(l);
7573 }
7574 tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
7575 l = xmlDictLookup(ctxt->dict, tmp, -1);
7576 if (tmp != NULL) xmlFree(tmp);
7577 *prefix = p;
7578 return(l);
7579 }
7580 *prefix = p;
7581 } else
7582 *prefix = NULL;
7583 return(l);
7584}
7585
7586/**
7587 * xmlParseQNameAndCompare:
7588 * @ctxt: an XML parser context
7589 * @name: the localname
7590 * @prefix: the prefix, if any.
7591 *
7592 * parse an XML name and compares for match
7593 * (specialized for endtag parsing)
7594 *
7595 * Returns NULL for an illegal name, (xmlChar*) 1 for success
7596 * and the name for mismatch
7597 */
7598
7599static const xmlChar *
7600xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
7601 xmlChar const *prefix) {
7602 const xmlChar *cmp = name;
7603 const xmlChar *in;
7604 const xmlChar *ret;
7605 const xmlChar *prefix2;
7606
7607 if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
7608
7609 GROW;
7610 in = ctxt->input->cur;
7611
7612 cmp = prefix;
7613 while (*in != 0 && *in == *cmp) {
7614 ++in;
7615 ++cmp;
7616 }
7617 if ((*cmp == 0) && (*in == ':')) {
7618 in++;
7619 cmp = name;
7620 while (*in != 0 && *in == *cmp) {
7621 ++in;
7622 ++cmp;
7623 }
7624 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
7625 /* success */
7626 ctxt->input->cur = in;
7627 return((const xmlChar*) 1);
7628 }
7629 }
7630 /*
7631 * all strings coms from the dictionary, equality can be done directly
7632 */
7633 ret = xmlParseQName (ctxt, &prefix2);
7634 if ((ret == name) && (prefix == prefix2))
7635 return((const xmlChar*) 1);
7636 return ret;
7637}
7638
7639/**
7640 * xmlParseAttValueInternal:
7641 * @ctxt: an XML parser context
7642 * @len: attribute len result
7643 * @alloc: whether the attribute was reallocated as a new string
7644 * @normalize: if 1 then further non-CDATA normalization must be done
7645 *
7646 * parse a value for an attribute.
7647 * NOTE: if no normalization is needed, the routine will return pointers
7648 * directly from the data buffer.
7649 *
7650 * 3.3.3 Attribute-Value Normalization:
7651 * Before the value of an attribute is passed to the application or
7652 * checked for validity, the XML processor must normalize it as follows:
7653 * - a character reference is processed by appending the referenced
7654 * character to the attribute value
7655 * - an entity reference is processed by recursively processing the
7656 * replacement text of the entity
7657 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
7658 * appending #x20 to the normalized value, except that only a single
7659 * #x20 is appended for a "#xD#xA" sequence that is part of an external
7660 * parsed entity or the literal entity value of an internal parsed entity
7661 * - other characters are processed by appending them to the normalized value
7662 * If the declared value is not CDATA, then the XML processor must further
7663 * process the normalized attribute value by discarding any leading and
7664 * trailing space (#x20) characters, and by replacing sequences of space
7665 * (#x20) characters by a single space (#x20) character.
7666 * All attributes for which no declaration has been read should be treated
7667 * by a non-validating parser as if declared CDATA.
7668 *
7669 * Returns the AttValue parsed or NULL. The value has to be freed by the
7670 * caller if it was copied, this can be detected by val[*len] == 0.
7671 */
7672
7673static xmlChar *
7674xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
7675 int normalize)
7676{
7677 xmlChar limit = 0;
7678 const xmlChar *in = NULL, *start, *end, *last;
7679 xmlChar *ret = NULL;
7680
7681 GROW;
7682 in = (xmlChar *) CUR_PTR;
7683 if (*in != '"' && *in != '\'') {
7684 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
7685 return (NULL);
7686 }
7687 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
7688
7689 /*
7690 * try to handle in this routine the most common case where no
7691 * allocation of a new string is required and where content is
7692 * pure ASCII.
7693 */
7694 limit = *in++;
7695 end = ctxt->input->end;
7696 start = in;
7697 if (in >= end) {
7698 const xmlChar *oldbase = ctxt->input->base;
7699 GROW;
7700 if (oldbase != ctxt->input->base) {
7701 long delta = ctxt->input->base - oldbase;
7702 start = start + delta;
7703 in = in + delta;
7704 }
7705 end = ctxt->input->end;
7706 }
7707 if (normalize) {
7708 /*
7709 * Skip any leading spaces
7710 */
7711 while ((in < end) && (*in != limit) &&
7712 ((*in == 0x20) || (*in == 0x9) ||
7713 (*in == 0xA) || (*in == 0xD))) {
7714 in++;
7715 start = in;
7716 if (in >= end) {
7717 const xmlChar *oldbase = ctxt->input->base;
7718 GROW;
7719 if (oldbase != ctxt->input->base) {
7720 long delta = ctxt->input->base - oldbase;
7721 start = start + delta;
7722 in = in + delta;
7723 }
7724 end = ctxt->input->end;
7725 }
7726 }
7727 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
7728 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
7729 if ((*in++ == 0x20) && (*in == 0x20)) break;
7730 if (in >= end) {
7731 const xmlChar *oldbase = ctxt->input->base;
7732 GROW;
7733 if (oldbase != ctxt->input->base) {
7734 long delta = ctxt->input->base - oldbase;
7735 start = start + delta;
7736 in = in + delta;
7737 }
7738 end = ctxt->input->end;
7739 }
7740 }
7741 last = in;
7742 /*
7743 * skip the trailing blanks
7744 */
7745 while ((last[-1] == 0x20) && (last > start)) last--;
7746 while ((in < end) && (*in != limit) &&
7747 ((*in == 0x20) || (*in == 0x9) ||
7748 (*in == 0xA) || (*in == 0xD))) {
7749 in++;
7750 if (in >= end) {
7751 const xmlChar *oldbase = ctxt->input->base;
7752 GROW;
7753 if (oldbase != ctxt->input->base) {
7754 long delta = ctxt->input->base - oldbase;
7755 start = start + delta;
7756 in = in + delta;
7757 last = last + delta;
7758 }
7759 end = ctxt->input->end;
7760 }
7761 }
7762 if (*in != limit) goto need_complex;
7763 } else {
7764 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
7765 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
7766 in++;
7767 if (in >= end) {
7768 const xmlChar *oldbase = ctxt->input->base;
7769 GROW;
7770 if (oldbase != ctxt->input->base) {
7771 long delta = ctxt->input->base - oldbase;
7772 start = start + delta;
7773 in = in + delta;
7774 }
7775 end = ctxt->input->end;
7776 }
7777 }
7778 last = in;
7779 if (*in != limit) goto need_complex;
7780 }
7781 in++;
7782 if (len != NULL) {
7783 *len = last - start;
7784 ret = (xmlChar *) start;
7785 } else {
7786 if (alloc) *alloc = 1;
7787 ret = xmlStrndup(start, last - start);
7788 }
7789 CUR_PTR = in;
7790 if (alloc) *alloc = 0;
7791 return ret;
7792need_complex:
7793 if (alloc) *alloc = 1;
7794 return xmlParseAttValueComplex(ctxt, len, normalize);
7795}
7796
7797/**
7798 * xmlParseAttribute2:
7799 * @ctxt: an XML parser context
7800 * @pref: the element prefix
7801 * @elem: the element name
7802 * @prefix: a xmlChar ** used to store the value of the attribute prefix
7803 * @value: a xmlChar ** used to store the value of the attribute
7804 * @len: an int * to save the length of the attribute
7805 * @alloc: an int * to indicate if the attribute was allocated
7806 *
7807 * parse an attribute in the new SAX2 framework.
7808 *
7809 * Returns the attribute name, and the value in *value, .
7810 */
7811
7812static const xmlChar *
7813xmlParseAttribute2(xmlParserCtxtPtr ctxt,
7814 const xmlChar *pref, const xmlChar *elem,
7815 const xmlChar **prefix, xmlChar **value,
7816 int *len, int *alloc) {
7817 const xmlChar *name;
7818 xmlChar *val, *internal_val = NULL;
7819 int normalize = 0;
7820
7821 *value = NULL;
7822 GROW;
7823 name = xmlParseQName(ctxt, prefix);
7824 if (name == NULL) {
7825 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7826 "error parsing attribute name\n");
7827 return(NULL);
7828 }
7829
7830 /*
7831 * get the type if needed
7832 */
7833 if (ctxt->attsSpecial != NULL) {
7834 int type;
7835
7836 type = (int) (long) xmlHashQLookup2(ctxt->attsSpecial,
7837 pref, elem, *prefix, name);
7838 if (type != 0) normalize = 1;
7839 }
7840
7841 /*
7842 * read the value
7843 */
7844 SKIP_BLANKS;
7845 if (RAW == '=') {
7846 NEXT;
7847 SKIP_BLANKS;
7848 val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
7849 ctxt->instate = XML_PARSER_CONTENT;
7850 } else {
7851 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
7852 "Specification mandate value for attribute %s\n", name);
7853 return(NULL);
7854 }
7855
7856 if (*prefix == ctxt->str_xml) {
7857 /*
7858 * Check that xml:lang conforms to the specification
7859 * No more registered as an error, just generate a warning now
7860 * since this was deprecated in XML second edition
7861 */
7862 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
7863 internal_val = xmlStrndup(val, *len);
7864 if (!xmlCheckLanguageID(internal_val)) {
7865 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
7866 "Malformed value for xml:lang : %s\n",
7867 internal_val, NULL);
7868 }
7869 }
7870
7871 /*
7872 * Check that xml:space conforms to the specification
7873 */
7874 if (xmlStrEqual(name, BAD_CAST "space")) {
7875 internal_val = xmlStrndup(val, *len);
7876 if (xmlStrEqual(internal_val, BAD_CAST "default"))
7877 *(ctxt->space) = 0;
7878 else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
7879 *(ctxt->space) = 1;
7880 else {
7881 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
7882"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
7883 internal_val, NULL);
7884 }
7885 }
7886 if (internal_val) {
7887 xmlFree(internal_val);
7888 }
7889 }
7890
7891 *value = val;
7892 return(name);
7893}
7894
7895/**
7896 * xmlParseStartTag2:
7897 * @ctxt: an XML parser context
7898 *
7899 * parse a start of tag either for rule element or
7900 * EmptyElement. In both case we don't parse the tag closing chars.
7901 * This routine is called when running SAX2 parsing
7902 *
7903 * [40] STag ::= '<' Name (S Attribute)* S? '>'
7904 *
7905 * [ WFC: Unique Att Spec ]
7906 * No attribute name may appear more than once in the same start-tag or
7907 * empty-element tag.
7908 *
7909 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
7910 *
7911 * [ WFC: Unique Att Spec ]
7912 * No attribute name may appear more than once in the same start-tag or
7913 * empty-element tag.
7914 *
7915 * With namespace:
7916 *
7917 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
7918 *
7919 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
7920 *
7921 * Returns the element name parsed
7922 */
7923
7924static const xmlChar *
7925xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
7926 const xmlChar **URI, int *tlen) {
7927 const xmlChar *localname;
7928 const xmlChar *prefix;
7929 const xmlChar *attname;
7930 const xmlChar *aprefix;
7931 const xmlChar *nsname;
7932 xmlChar *attvalue;
7933 const xmlChar **atts = ctxt->atts;
7934 int maxatts = ctxt->maxatts;
7935 int nratts, nbatts, nbdef;
7936 int i, j, nbNs, attval, oldline, oldcol;
7937 const xmlChar *base;
7938 unsigned long cur;
7939 int nsNr = ctxt->nsNr;
7940
7941 if (RAW != '<') return(NULL);
7942 NEXT1;
7943
7944 /*
7945 * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
7946 * point since the attribute values may be stored as pointers to
7947 * the buffer and calling SHRINK would destroy them !
7948 * The Shrinking is only possible once the full set of attribute
7949 * callbacks have been done.
7950 */
7951reparse:
7952 SHRINK;
7953 base = ctxt->input->base;
7954 cur = ctxt->input->cur - ctxt->input->base;
7955 oldline = ctxt->input->line;
7956 oldcol = ctxt->input->col;
7957 nbatts = 0;
7958 nratts = 0;
7959 nbdef = 0;
7960 nbNs = 0;
7961 attval = 0;
7962 /* Forget any namespaces added during an earlier parse of this element. */
7963 ctxt->nsNr = nsNr;
7964
7965 localname = xmlParseQName(ctxt, &prefix);
7966 if (localname == NULL) {
7967 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7968 "StartTag: invalid element name\n");
7969 return(NULL);
7970 }
7971 *tlen = ctxt->input->cur - ctxt->input->base - cur;
7972
7973 /*
7974 * Now parse the attributes, it ends up with the ending
7975 *
7976 * (S Attribute)* S?
7977 */
7978 SKIP_BLANKS;
7979 GROW;
7980 if (ctxt->input->base != base) goto base_changed;
7981
7982 while ((RAW != '>') &&
7983 ((RAW != '/') || (NXT(1) != '>')) &&
7984 (IS_BYTE_CHAR(RAW))) {
7985 const xmlChar *q = CUR_PTR;
7986 unsigned int cons = ctxt->input->consumed;
7987 int len = -1, alloc = 0;
7988
7989 attname = xmlParseAttribute2(ctxt, prefix, localname,
7990 &aprefix, &attvalue, &len, &alloc);
7991 if (ctxt->input->base != base) {
7992 if ((attvalue != NULL) && (alloc != 0))
7993 xmlFree(attvalue);
7994 attvalue = NULL;
7995 goto base_changed;
7996 }
7997 if ((attname != NULL) && (attvalue != NULL)) {
7998 if (len < 0) len = xmlStrlen(attvalue);
7999 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
8000 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
8001 xmlURIPtr uri;
8002
8003 if (*URL != 0) {
8004 uri = xmlParseURI((const char *) URL);
8005 if (uri == NULL) {
8006 xmlWarningMsg(ctxt, XML_WAR_NS_URI,
8007 "xmlns: %s not a valid URI\n",
8008 URL, NULL);
8009 } else {
8010 if (uri->scheme == NULL) {
8011 xmlWarningMsg(ctxt, XML_WAR_NS_URI_RELATIVE,
8012 "xmlns: URI %s is not absolute\n",
8013 URL, NULL);
8014 }
8015 xmlFreeURI(uri);
8016 }
8017 }
8018 /*
8019 * check that it's not a defined namespace
8020 */
8021 for (j = 1;j <= nbNs;j++)
8022 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
8023 break;
8024 if (j <= nbNs)
8025 xmlErrAttributeDup(ctxt, NULL, attname);
8026 else
8027 if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
8028 if (alloc != 0) xmlFree(attvalue);
8029 SKIP_BLANKS;
8030 continue;
8031 }
8032 if (aprefix == ctxt->str_xmlns) {
8033 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
8034 xmlURIPtr uri;
8035
8036 if (attname == ctxt->str_xml) {
8037 if (URL != ctxt->str_xml_ns) {
8038 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8039 "xml namespace prefix mapped to wrong URI\n",
8040 NULL, NULL, NULL);
8041 }
8042 /*
8043 * Do not keep a namespace definition node
8044 */
8045 if (alloc != 0) xmlFree(attvalue);
8046 SKIP_BLANKS;
8047 continue;
8048 }
8049 uri = xmlParseURI((const char *) URL);
8050 if (uri == NULL) {
8051 xmlWarningMsg(ctxt, XML_WAR_NS_URI,
8052 "xmlns:%s: '%s' is not a valid URI\n",
8053 attname, URL);
8054 } else {
8055 if ((ctxt->pedantic) && (uri->scheme == NULL)) {
8056 xmlWarningMsg(ctxt, XML_WAR_NS_URI_RELATIVE,
8057 "xmlns:%s: URI %s is not absolute\n",
8058 attname, URL);
8059 }
8060 xmlFreeURI(uri);
8061 }
8062
8063 /*
8064 * check that it's not a defined namespace
8065 */
8066 for (j = 1;j <= nbNs;j++)
8067 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
8068 break;
8069 if (j <= nbNs)
8070 xmlErrAttributeDup(ctxt, aprefix, attname);
8071 else
8072 if (nsPush(ctxt, attname, URL) > 0) nbNs++;
8073 if (alloc != 0) xmlFree(attvalue);
8074 SKIP_BLANKS;
8075 if (ctxt->input->base != base) goto base_changed;
8076 continue;
8077 }
8078
8079 /*
8080 * Add the pair to atts
8081 */
8082 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
8083 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
8084 if (attvalue[len] == 0)
8085 xmlFree(attvalue);
8086 goto failed;
8087 }
8088 maxatts = ctxt->maxatts;
8089 atts = ctxt->atts;
8090 }
8091 ctxt->attallocs[nratts++] = alloc;
8092 atts[nbatts++] = attname;
8093 atts[nbatts++] = aprefix;
8094 atts[nbatts++] = NULL; /* the URI will be fetched later */
8095 atts[nbatts++] = attvalue;
8096 attvalue += len;
8097 atts[nbatts++] = attvalue;
8098 /*
8099 * tag if some deallocation is needed
8100 */
8101 if (alloc != 0) attval = 1;
8102 } else {
8103 if ((attvalue != NULL) && (attvalue[len] == 0))
8104 xmlFree(attvalue);
8105 }
8106
8107failed:
8108
8109 GROW
8110 if (ctxt->input->base != base) goto base_changed;
8111 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8112 break;
8113 if (!IS_BLANK_CH(RAW)) {
8114 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8115 "attributes construct error\n");
8116 break;
8117 }
8118 SKIP_BLANKS;
8119 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
8120 (attname == NULL) && (attvalue == NULL)) {
8121 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8122 "xmlParseStartTag: problem parsing attributes\n");
8123 break;
8124 }
8125 GROW;
8126 if (ctxt->input->base != base) goto base_changed;
8127 }
8128
8129 /*
8130 * The attributes defaulting
8131 */
8132 if (ctxt->attsDefault != NULL) {
8133 xmlDefAttrsPtr defaults;
8134
8135 defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
8136 if (defaults != NULL) {
8137 for (i = 0;i < defaults->nbAttrs;i++) {
8138 attname = defaults->values[4 * i];
8139 aprefix = defaults->values[4 * i + 1];
8140
8141 /*
8142 * special work for namespaces defaulted defs
8143 */
8144 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
8145 /*
8146 * check that it's not a defined namespace
8147 */
8148 for (j = 1;j <= nbNs;j++)
8149 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
8150 break;
8151 if (j <= nbNs) continue;
8152
8153 nsname = xmlGetNamespace(ctxt, NULL);
8154 if (nsname != defaults->values[4 * i + 2]) {
8155 if (nsPush(ctxt, NULL,
8156 defaults->values[4 * i + 2]) > 0)
8157 nbNs++;
8158 }
8159 } else if (aprefix == ctxt->str_xmlns) {
8160 /*
8161 * check that it's not a defined namespace
8162 */
8163 for (j = 1;j <= nbNs;j++)
8164 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
8165 break;
8166 if (j <= nbNs) continue;
8167
8168 nsname = xmlGetNamespace(ctxt, attname);
8169 if (nsname != defaults->values[2]) {
8170 if (nsPush(ctxt, attname,
8171 defaults->values[4 * i + 2]) > 0)
8172 nbNs++;
8173 }
8174 } else {
8175 /*
8176 * check that it's not a defined attribute
8177 */
8178 for (j = 0;j < nbatts;j+=5) {
8179 if ((attname == atts[j]) && (aprefix == atts[j+1]))
8180 break;
8181 }
8182 if (j < nbatts) continue;
8183
8184 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
8185 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
8186 return(NULL);
8187 }
8188 maxatts = ctxt->maxatts;
8189 atts = ctxt->atts;
8190 }
8191 atts[nbatts++] = attname;
8192 atts[nbatts++] = aprefix;
8193 if (aprefix == NULL)
8194 atts[nbatts++] = NULL;
8195 else
8196 atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
8197 atts[nbatts++] = defaults->values[4 * i + 2];
8198 atts[nbatts++] = defaults->values[4 * i + 3];
8199 nbdef++;
8200 }
8201 }
8202 }
8203 }
8204
8205 /*
8206 * The attributes checkings
8207 */
8208 for (i = 0; i < nbatts;i += 5) {
8209 /*
8210 * The default namespace does not apply to attribute names.
8211 */
8212 if (atts[i + 1] != NULL) {
8213 nsname = xmlGetNamespace(ctxt, atts[i + 1]);
8214 if (nsname == NULL) {
8215 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
8216 "Namespace prefix %s for %s on %s is not defined\n",
8217 atts[i + 1], atts[i], localname);
8218 }
8219 atts[i + 2] = nsname;
8220 } else
8221 nsname = NULL;
8222 /*
8223 * [ WFC: Unique Att Spec ]
8224 * No attribute name may appear more than once in the same
8225 * start-tag or empty-element tag.
8226 * As extended by the Namespace in XML REC.
8227 */
8228 for (j = 0; j < i;j += 5) {
8229 if (atts[i] == atts[j]) {
8230 if (atts[i+1] == atts[j+1]) {
8231 xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
8232 break;
8233 }
8234 if ((nsname != NULL) && (atts[j + 2] == nsname)) {
8235 xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
8236 "Namespaced Attribute %s in '%s' redefined\n",
8237 atts[i], nsname, NULL);
8238 break;
8239 }
8240 }
8241 }
8242 }
8243
8244 nsname = xmlGetNamespace(ctxt, prefix);
8245 if ((prefix != NULL) && (nsname == NULL)) {
8246 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
8247 "Namespace prefix %s on %s is not defined\n",
8248 prefix, localname, NULL);
8249 }
8250 *pref = prefix;
8251 *URI = nsname;
8252
8253 /*
8254 * SAX: Start of Element !
8255 */
8256 if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
8257 (!ctxt->disableSAX)) {
8258 if (nbNs > 0)
8259 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
8260 nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
8261 nbatts / 5, nbdef, atts);
8262 else
8263 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
8264 nsname, 0, NULL, nbatts / 5, nbdef, atts);
8265 }
8266
8267 /*
8268 * Free up attribute allocated strings if needed
8269 */
8270 if (attval != 0) {
8271 for (i = 3,j = 0; j < nratts;i += 5,j++)
8272 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
8273 xmlFree((xmlChar *) atts[i]);
8274 }
8275
8276 return(localname);
8277
8278base_changed:
8279 /*
8280 * the attribute strings are valid iif the base didn't changed
8281 */
8282 if (attval != 0) {
8283 for (i = 3,j = 0; j < nratts;i += 5,j++)
8284 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
8285 xmlFree((xmlChar *) atts[i]);
8286 }
8287 ctxt->input->cur = ctxt->input->base + cur;
8288 ctxt->input->line = oldline;
8289 ctxt->input->col = oldcol;
8290 if (ctxt->wellFormed == 1) {
8291 goto reparse;
8292 }
8293 return(NULL);
8294}
8295
8296/**
8297 * xmlParseEndTag2:
8298 * @ctxt: an XML parser context
8299 * @line: line of the start tag
8300 * @nsNr: number of namespaces on the start tag
8301 *
8302 * parse an end of tag
8303 *
8304 * [42] ETag ::= '</' Name S? '>'
8305 *
8306 * With namespace
8307 *
8308 * [NS 9] ETag ::= '</' QName S? '>'
8309 */
8310
8311static void
8312xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
8313 const xmlChar *URI, int line, int nsNr, int tlen) {
8314 const xmlChar *name;
8315
8316 GROW;
8317 if ((RAW != '<') || (NXT(1) != '/')) {
8318 xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
8319 return;
8320 }
8321 SKIP(2);
8322
8323 if ((tlen > 0) && (xmlStrncmp(ctxt->input->cur, ctxt->name, tlen) == 0)) {
8324 if (ctxt->input->cur[tlen] == '>') {
8325 ctxt->input->cur += tlen + 1;
8326 goto done;
8327 }
8328 ctxt->input->cur += tlen;
8329 name = (xmlChar*)1;
8330 } else {
8331 if (prefix == NULL)
8332 name = xmlParseNameAndCompare(ctxt, ctxt->name);
8333 else
8334 name = xmlParseQNameAndCompare(ctxt, ctxt->name, prefix);
8335 }
8336
8337 /*
8338 * We should definitely be at the ending "S? '>'" part
8339 */
8340 GROW;
8341 SKIP_BLANKS;
8342 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
8343 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
8344 } else
8345 NEXT1;
8346
8347 /*
8348 * [ WFC: Element Type Match ]
8349 * The Name in an element's end-tag must match the element type in the
8350 * start-tag.
8351 *
8352 */
8353 if (name != (xmlChar*)1) {
8354 if (name == NULL) name = BAD_CAST "unparseable";
8355 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
8356 "Opening and ending tag mismatch: %s line %d and %s\n",
8357 ctxt->name, line, name);
8358 }
8359
8360 /*
8361 * SAX: End of Tag
8362 */
8363done:
8364 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
8365 (!ctxt->disableSAX))
8366 ctxt->sax->endElementNs(ctxt->userData, ctxt->name, prefix, URI);
8367
8368 spacePop(ctxt);
8369 if (nsNr != 0)
8370 nsPop(ctxt, nsNr);
8371 return;
8372}
8373
8374/**
8375 * xmlParseCDSect:
8376 * @ctxt: an XML parser context
8377 *
8378 * Parse escaped pure raw content.
8379 *
8380 * [18] CDSect ::= CDStart CData CDEnd
8381 *
8382 * [19] CDStart ::= '<![CDATA['
8383 *
8384 * [20] Data ::= (Char* - (Char* ']]>' Char*))
8385 *
8386 * [21] CDEnd ::= ']]>'
8387 */
8388void
8389xmlParseCDSect(xmlParserCtxtPtr ctxt) {
8390 xmlChar *buf = NULL;
8391 int len = 0;
8392 int size = XML_PARSER_BUFFER_SIZE;
8393 int r, rl;
8394 int s, sl;
8395 int cur, l;
8396 int count = 0;
8397
8398 /* Check 2.6.0 was NXT(0) not RAW */
8399 if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
8400 SKIP(9);
8401 } else
8402 return;
8403
8404 ctxt->instate = XML_PARSER_CDATA_SECTION;
8405 r = CUR_CHAR(rl);
8406 if (!IS_CHAR(r)) {
8407 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
8408 ctxt->instate = XML_PARSER_CONTENT;
8409 return;
8410 }
8411 NEXTL(rl);
8412 s = CUR_CHAR(sl);
8413 if (!IS_CHAR(s)) {
8414 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
8415 ctxt->instate = XML_PARSER_CONTENT;
8416 return;
8417 }
8418 NEXTL(sl);
8419 cur = CUR_CHAR(l);
8420 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
8421 if (buf == NULL) {
8422 xmlErrMemory(ctxt, NULL);
8423 return;
8424 }
8425 while (IS_CHAR(cur) &&
8426 ((r != ']') || (s != ']') || (cur != '>'))) {
8427 if (len + 5 >= size) {
8428 xmlChar *tmp;
8429
8430 size *= 2;
8431 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
8432 if (tmp == NULL) {
8433 xmlFree(buf);
8434 xmlErrMemory(ctxt, NULL);
8435 return;
8436 }
8437 buf = tmp;
8438 }
8439 COPY_BUF(rl,buf,len,r);
8440 r = s;
8441 rl = sl;
8442 s = cur;
8443 sl = l;
8444 count++;
8445 if (count > 50) {
8446 GROW;
8447 count = 0;
8448 }
8449 NEXTL(l);
8450 cur = CUR_CHAR(l);
8451 }
8452 buf[len] = 0;
8453 ctxt->instate = XML_PARSER_CONTENT;
8454 if (cur != '>') {
8455 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
8456 "CData section not finished\n%.50s\n", buf);
8457 xmlFree(buf);
8458 return;
8459 }
8460 NEXTL(l);
8461
8462 /*
8463 * OK the buffer is to be consumed as cdata.
8464 */
8465 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
8466 if (ctxt->sax->cdataBlock != NULL)
8467 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
8468 else if (ctxt->sax->characters != NULL)
8469 ctxt->sax->characters(ctxt->userData, buf, len);
8470 }
8471 xmlFree(buf);
8472}
8473
8474/**
8475 * xmlParseContent:
8476 * @ctxt: an XML parser context
8477 *
8478 * Parse a content:
8479 *
8480 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
8481 */
8482
8483void
8484xmlParseContent(xmlParserCtxtPtr ctxt) {
8485 GROW;
8486 while ((RAW != 0) &&
8487 ((RAW != '<') || (NXT(1) != '/')) &&
8488 (ctxt->instate != XML_PARSER_EOF)) {
8489 const xmlChar *test = CUR_PTR;
8490 unsigned int cons = ctxt->input->consumed;
8491 const xmlChar *cur = ctxt->input->cur;
8492
8493 /*
8494 * First case : a Processing Instruction.
8495 */
8496 if ((*cur == '<') && (cur[1] == '?')) {
8497 xmlParsePI(ctxt);
8498 }
8499
8500 /*
8501 * Second case : a CDSection
8502 */
8503 /* 2.6.0 test was *cur not RAW */
8504 else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
8505 xmlParseCDSect(ctxt);
8506 }
8507
8508 /*
8509 * Third case : a comment
8510 */
8511 else if ((*cur == '<') && (NXT(1) == '!') &&
8512 (NXT(2) == '-') && (NXT(3) == '-')) {
8513 xmlParseComment(ctxt);
8514 ctxt->instate = XML_PARSER_CONTENT;
8515 }
8516
8517 /*
8518 * Fourth case : a sub-element.
8519 */
8520 else if (*cur == '<') {
8521 xmlParseElement(ctxt);
8522 }
8523
8524 /*
8525 * Fifth case : a reference. If if has not been resolved,
8526 * parsing returns it's Name, create the node
8527 */
8528
8529 else if (*cur == '&') {
8530 xmlParseReference(ctxt);
8531 }
8532
8533 /*
8534 * Last case, text. Note that References are handled directly.
8535 */
8536 else {
8537 xmlParseCharData(ctxt, 0);
8538 }
8539
8540 GROW;
8541 /*
8542 * Pop-up of finished entities.
8543 */
8544 while ((RAW == 0) && (ctxt->inputNr > 1))
8545 xmlPopInput(ctxt);
8546 SHRINK;
8547
8548 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
8549 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8550 "detected an error in element content\n");
8551 ctxt->instate = XML_PARSER_EOF;
8552 break;
8553 }
8554 }
8555}
8556
8557/**
8558 * xmlParseElement:
8559 * @ctxt: an XML parser context
8560 *
8561 * parse an XML element, this is highly recursive
8562 *
8563 * [39] element ::= EmptyElemTag | STag content ETag
8564 *
8565 * [ WFC: Element Type Match ]
8566 * The Name in an element's end-tag must match the element type in the
8567 * start-tag.
8568 *
8569 */
8570
8571void
8572xmlParseElement(xmlParserCtxtPtr ctxt) {
8573 const xmlChar *name;
8574 const xmlChar *prefix;
8575 const xmlChar *URI;
8576 xmlParserNodeInfo node_info;
8577 int line, tlen;
8578 xmlNodePtr ret;
8579 int nsNr = ctxt->nsNr;
8580
8581 if ((unsigned int) ctxt->nameNr > xmlParserMaxDepth) {
8582 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
8583 "Excessive depth in document: change xmlParserMaxDepth = %d\n",
8584 xmlParserMaxDepth);
8585 ctxt->instate = XML_PARSER_EOF;
8586 return;
8587 }
8588
8589 /* Capture start position */
8590 if (ctxt->record_info) {
8591 node_info.begin_pos = ctxt->input->consumed +
8592 (CUR_PTR - ctxt->input->base);
8593 node_info.begin_line = ctxt->input->line;
8594 }
8595
8596 if (ctxt->spaceNr == 0)
8597 spacePush(ctxt, -1);
8598 else if (*ctxt->space == -2)
8599 spacePush(ctxt, -1);
8600 else
8601 spacePush(ctxt, *ctxt->space);
8602
8603 line = ctxt->input->line;
8604#ifdef LIBXML_SAX1_ENABLED
8605 if (ctxt->sax2)
8606#endif /* LIBXML_SAX1_ENABLED */
8607 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
8608#ifdef LIBXML_SAX1_ENABLED
8609 else
8610 name = xmlParseStartTag(ctxt);
8611#endif /* LIBXML_SAX1_ENABLED */
8612 if (name == NULL) {
8613 spacePop(ctxt);
8614 return;
8615 }
8616 namePush(ctxt, name);
8617 ret = ctxt->node;
8618
8619#ifdef LIBXML_VALID_ENABLED
8620 /*
8621 * [ VC: Root Element Type ]
8622 * The Name in the document type declaration must match the element
8623 * type of the root element.
8624 */
8625 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
8626 ctxt->node && (ctxt->node == ctxt->myDoc->children))
8627 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
8628#endif /* LIBXML_VALID_ENABLED */
8629
8630 /*
8631 * Check for an Empty Element.
8632 */
8633 if ((RAW == '/') && (NXT(1) == '>')) {
8634 SKIP(2);
8635 if (ctxt->sax2) {
8636 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
8637 (!ctxt->disableSAX))
8638 ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
8639#ifdef LIBXML_SAX1_ENABLED
8640 } else {
8641 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8642 (!ctxt->disableSAX))
8643 ctxt->sax->endElement(ctxt->userData, name);
8644#endif /* LIBXML_SAX1_ENABLED */
8645 }
8646 namePop(ctxt);
8647 spacePop(ctxt);
8648 if (nsNr != ctxt->nsNr)
8649 nsPop(ctxt, ctxt->nsNr - nsNr);
8650 if ( ret != NULL && ctxt->record_info ) {
8651 node_info.end_pos = ctxt->input->consumed +
8652 (CUR_PTR - ctxt->input->base);
8653 node_info.end_line = ctxt->input->line;
8654 node_info.node = ret;
8655 xmlParserAddNodeInfo(ctxt, &node_info);
8656 }
8657 return;
8658 }
8659 if (RAW == '>') {
8660 NEXT1;
8661 } else {
8662 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
8663 "Couldn't find end of Start Tag %s line %d\n",
8664 name, line, NULL);
8665
8666 /*
8667 * end of parsing of this node.
8668 */
8669 nodePop(ctxt);
8670 namePop(ctxt);
8671 spacePop(ctxt);
8672 if (nsNr != ctxt->nsNr)
8673 nsPop(ctxt, ctxt->nsNr - nsNr);
8674
8675 /*
8676 * Capture end position and add node
8677 */
8678 if ( ret != NULL && ctxt->record_info ) {
8679 node_info.end_pos = ctxt->input->consumed +
8680 (CUR_PTR - ctxt->input->base);
8681 node_info.end_line = ctxt->input->line;
8682 node_info.node = ret;
8683 xmlParserAddNodeInfo(ctxt, &node_info);
8684 }
8685 return;
8686 }
8687
8688 /*
8689 * Parse the content of the element:
8690 */
8691 xmlParseContent(ctxt);
8692 if (!IS_BYTE_CHAR(RAW)) {
8693 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
8694 "Premature end of data in tag %s line %d\n",
8695 name, line, NULL);
8696
8697 /*
8698 * end of parsing of this node.
8699 */
8700 nodePop(ctxt);
8701 namePop(ctxt);
8702 spacePop(ctxt);
8703 if (nsNr != ctxt->nsNr)
8704 nsPop(ctxt, ctxt->nsNr - nsNr);
8705 return;
8706 }
8707
8708 /*
8709 * parse the end of tag: '</' should be here.
8710 */
8711 if (ctxt->sax2) {
8712 xmlParseEndTag2(ctxt, prefix, URI, line, ctxt->nsNr - nsNr, tlen);
8713 namePop(ctxt);
8714 }
8715#ifdef LIBXML_SAX1_ENABLED
8716 else
8717 xmlParseEndTag1(ctxt, line);
8718#endif /* LIBXML_SAX1_ENABLED */
8719
8720 /*
8721 * Capture end position and add node
8722 */
8723 if ( ret != NULL && ctxt->record_info ) {
8724 node_info.end_pos = ctxt->input->consumed +
8725 (CUR_PTR - ctxt->input->base);
8726 node_info.end_line = ctxt->input->line;
8727 node_info.node = ret;
8728 xmlParserAddNodeInfo(ctxt, &node_info);
8729 }
8730}
8731
8732/**
8733 * xmlParseVersionNum:
8734 * @ctxt: an XML parser context
8735 *
8736 * parse the XML version value.
8737 *
8738 * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
8739 *
8740 * Returns the string giving the XML version number, or NULL
8741 */
8742xmlChar *
8743xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
8744 xmlChar *buf = NULL;
8745 int len = 0;
8746 int size = 10;
8747 xmlChar cur;
8748
8749 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
8750 if (buf == NULL) {
8751 xmlErrMemory(ctxt, NULL);
8752 return(NULL);
8753 }
8754 cur = CUR;
8755 while (((cur >= 'a') && (cur <= 'z')) ||
8756 ((cur >= 'A') && (cur <= 'Z')) ||
8757 ((cur >= '0') && (cur <= '9')) ||
8758 (cur == '_') || (cur == '.') ||
8759 (cur == ':') || (cur == '-')) {
8760 if (len + 1 >= size) {
8761 xmlChar *tmp;
8762
8763 size *= 2;
8764 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
8765 if (tmp == NULL) {
8766 xmlErrMemory(ctxt, NULL);
8767 return(NULL);
8768 }
8769 buf = tmp;
8770 }
8771 buf[len++] = cur;
8772 NEXT;
8773 cur=CUR;
8774 }
8775 buf[len] = 0;
8776 return(buf);
8777}
8778
8779/**
8780 * xmlParseVersionInfo:
8781 * @ctxt: an XML parser context
8782 *
8783 * parse the XML version.
8784 *
8785 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
8786 *
8787 * [25] Eq ::= S? '=' S?
8788 *
8789 * Returns the version string, e.g. "1.0"
8790 */
8791
8792xmlChar *
8793xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
8794 xmlChar *version = NULL;
8795
8796 if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
8797 SKIP(7);
8798 SKIP_BLANKS;
8799 if (RAW != '=') {
8800 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
8801 return(NULL);
8802 }
8803 NEXT;
8804 SKIP_BLANKS;
8805 if (RAW == '"') {
8806 NEXT;
8807 version = xmlParseVersionNum(ctxt);
8808 if (RAW != '"') {
8809 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
8810 } else
8811 NEXT;
8812 } else if (RAW == '\''){
8813 NEXT;
8814 version = xmlParseVersionNum(ctxt);
8815 if (RAW != '\'') {
8816 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
8817 } else
8818 NEXT;
8819 } else {
8820 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
8821 }
8822 }
8823 return(version);
8824}
8825
8826/**
8827 * xmlParseEncName:
8828 * @ctxt: an XML parser context
8829 *
8830 * parse the XML encoding name
8831 *
8832 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
8833 *
8834 * Returns the encoding name value or NULL
8835 */
8836xmlChar *
8837xmlParseEncName(xmlParserCtxtPtr ctxt) {
8838 xmlChar *buf = NULL;
8839 int len = 0;
8840 int size = 10;
8841 xmlChar cur;
8842
8843 cur = CUR;
8844 if (((cur >= 'a') && (cur <= 'z')) ||
8845 ((cur >= 'A') && (cur <= 'Z'))) {
8846 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
8847 if (buf == NULL) {
8848 xmlErrMemory(ctxt, NULL);
8849 return(NULL);
8850 }
8851
8852 buf[len++] = cur;
8853 NEXT;
8854 cur = CUR;
8855 while (((cur >= 'a') && (cur <= 'z')) ||
8856 ((cur >= 'A') && (cur <= 'Z')) ||
8857 ((cur >= '0') && (cur <= '9')) ||
8858 (cur == '.') || (cur == '_') ||
8859 (cur == '-')) {
8860 if (len + 1 >= size) {
8861 xmlChar *tmp;
8862
8863 size *= 2;
8864 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
8865 if (tmp == NULL) {
8866 xmlErrMemory(ctxt, NULL);
8867 xmlFree(buf);
8868 return(NULL);
8869 }
8870 buf = tmp;
8871 }
8872 buf[len++] = cur;
8873 NEXT;
8874 cur = CUR;
8875 if (cur == 0) {
8876 SHRINK;
8877 GROW;
8878 cur = CUR;
8879 }
8880 }
8881 buf[len] = 0;
8882 } else {
8883 xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
8884 }
8885 return(buf);
8886}
8887
8888/**
8889 * xmlParseEncodingDecl:
8890 * @ctxt: an XML parser context
8891 *
8892 * parse the XML encoding declaration
8893 *
8894 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
8895 *
8896 * this setups the conversion filters.
8897 *
8898 * Returns the encoding value or NULL
8899 */
8900
8901const xmlChar *
8902xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
8903 xmlChar *encoding = NULL;
8904
8905 SKIP_BLANKS;
8906 if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
8907 SKIP(8);
8908 SKIP_BLANKS;
8909 if (RAW != '=') {
8910 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
8911 return(NULL);
8912 }
8913 NEXT;
8914 SKIP_BLANKS;
8915 if (RAW == '"') {
8916 NEXT;
8917 encoding = xmlParseEncName(ctxt);
8918 if (RAW != '"') {
8919 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
8920 } else
8921 NEXT;
8922 } else if (RAW == '\''){
8923 NEXT;
8924 encoding = xmlParseEncName(ctxt);
8925 if (RAW != '\'') {
8926 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
8927 } else
8928 NEXT;
8929 } else {
8930 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
8931 }
8932 /*
8933 * UTF-16 encoding stwich has already taken place at this stage,
8934 * more over the little-endian/big-endian selection is already done
8935 */
8936 if ((encoding != NULL) &&
8937 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
8938 (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
8939 if (ctxt->encoding != NULL)
8940 xmlFree((xmlChar *) ctxt->encoding);
8941 ctxt->encoding = encoding;
8942 }
8943 /*
8944 * UTF-8 encoding is handled natively
8945 */
8946 else if ((encoding != NULL) &&
8947 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
8948 (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
8949 if (ctxt->encoding != NULL)
8950 xmlFree((xmlChar *) ctxt->encoding);
8951 ctxt->encoding = encoding;
8952 }
8953 else if (encoding != NULL) {
8954 xmlCharEncodingHandlerPtr handler;
8955
8956 if (ctxt->input->encoding != NULL)
8957 xmlFree((xmlChar *) ctxt->input->encoding);
8958 ctxt->input->encoding = encoding;
8959
8960 handler = xmlFindCharEncodingHandler((const char *) encoding);
8961 if (handler != NULL) {
8962 xmlSwitchToEncoding(ctxt, handler);
8963 } else {
8964 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
8965 "Unsupported encoding %s\n", encoding);
8966 return(NULL);
8967 }
8968 }
8969 }
8970 return(encoding);
8971}
8972
8973/**
8974 * xmlParseSDDecl:
8975 * @ctxt: an XML parser context
8976 *
8977 * parse the XML standalone declaration
8978 *
8979 * [32] SDDecl ::= S 'standalone' Eq
8980 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
8981 *
8982 * [ VC: Standalone Document Declaration ]
8983 * TODO The standalone document declaration must have the value "no"
8984 * if any external markup declarations contain declarations of:
8985 * - attributes with default values, if elements to which these
8986 * attributes apply appear in the document without specifications
8987 * of values for these attributes, or
8988 * - entities (other than amp, lt, gt, apos, quot), if references
8989 * to those entities appear in the document, or
8990 * - attributes with values subject to normalization, where the
8991 * attribute appears in the document with a value which will change
8992 * as a result of normalization, or
8993 * - element types with element content, if white space occurs directly
8994 * within any instance of those types.
8995 *
8996 * Returns:
8997 * 1 if standalone="yes"
8998 * 0 if standalone="no"
8999 * -2 if standalone attribute is missing or invalid
9000 * (A standalone value of -2 means that the XML declaration was found,
9001 * but no value was specified for the standalone attribute).
9002 */
9003
9004int
9005xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
9006 int standalone = -2;
9007
9008 SKIP_BLANKS;
9009 if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
9010 SKIP(10);
9011 SKIP_BLANKS;
9012 if (RAW != '=') {
9013 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
9014 return(standalone);
9015 }
9016 NEXT;
9017 SKIP_BLANKS;
9018 if (RAW == '\''){
9019 NEXT;
9020 if ((RAW == 'n') && (NXT(1) == 'o')) {
9021 standalone = 0;
9022 SKIP(2);
9023 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
9024 (NXT(2) == 's')) {
9025 standalone = 1;
9026 SKIP(3);
9027 } else {
9028 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
9029 }
9030 if (RAW != '\'') {
9031 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
9032 } else
9033 NEXT;
9034 } else if (RAW == '"'){
9035 NEXT;
9036 if ((RAW == 'n') && (NXT(1) == 'o')) {
9037 standalone = 0;
9038 SKIP(2);
9039 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
9040 (NXT(2) == 's')) {
9041 standalone = 1;
9042 SKIP(3);
9043 } else {
9044 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
9045 }
9046 if (RAW != '"') {
9047 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
9048 } else
9049 NEXT;
9050 } else {
9051 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
9052 }
9053 }
9054 return(standalone);
9055}
9056
9057/**
9058 * xmlParseXMLDecl:
9059 * @ctxt: an XML parser context
9060 *
9061 * parse an XML declaration header
9062 *
9063 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
9064 */
9065
9066void
9067xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
9068 xmlChar *version;
9069
9070 /*
9071 * This value for standalone indicates that the document has an
9072 * XML declaration but it does not have a standalone attribute.
9073 * It will be overwritten later if a standalone attribute is found.
9074 */
9075 ctxt->input->standalone = -2;
9076
9077 /*
9078 * We know that '<?xml' is here.
9079 */
9080 SKIP(5);
9081
9082 if (!IS_BLANK_CH(RAW)) {
9083 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9084 "Blank needed after '<?xml'\n");
9085 }
9086 SKIP_BLANKS;
9087
9088 /*
9089 * We must have the VersionInfo here.
9090 */
9091 version = xmlParseVersionInfo(ctxt);
9092 if (version == NULL) {
9093 xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
9094 } else {
9095 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
9096 /*
9097 * TODO: Blueberry should be detected here
9098 */
9099 xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
9100 "Unsupported version '%s'\n",
9101 version, NULL);
9102 }
9103 if (ctxt->version != NULL)
9104 xmlFree((void *) ctxt->version);
9105 ctxt->version = version;
9106 }
9107
9108 /*
9109 * We may have the encoding declaration
9110 */
9111 if (!IS_BLANK_CH(RAW)) {
9112 if ((RAW == '?') && (NXT(1) == '>')) {
9113 SKIP(2);
9114 return;
9115 }
9116 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
9117 }
9118 xmlParseEncodingDecl(ctxt);
9119 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
9120 /*
9121 * The XML REC instructs us to stop parsing right here
9122 */
9123 return;
9124 }
9125
9126 /*
9127 * We may have the standalone status.
9128 */
9129 if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
9130 if ((RAW == '?') && (NXT(1) == '>')) {
9131 SKIP(2);
9132 return;
9133 }
9134 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
9135 }
9136 SKIP_BLANKS;
9137 ctxt->input->standalone = xmlParseSDDecl(ctxt);
9138
9139 SKIP_BLANKS;
9140 if ((RAW == '?') && (NXT(1) == '>')) {
9141 SKIP(2);
9142 } else if (RAW == '>') {
9143 /* Deprecated old WD ... */
9144 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
9145 NEXT;
9146 } else {
9147 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
9148 MOVETO_ENDTAG(CUR_PTR);
9149 NEXT;
9150 }
9151}
9152
9153/**
9154 * xmlParseMisc:
9155 * @ctxt: an XML parser context
9156 *
9157 * parse an XML Misc* optional field.
9158 *
9159 * [27] Misc ::= Comment | PI | S
9160 */
9161
9162void
9163xmlParseMisc(xmlParserCtxtPtr ctxt) {
9164 while (((RAW == '<') && (NXT(1) == '?')) ||
9165 (CMP4(CUR_PTR, '<', '!', '-', '-')) ||
9166 IS_BLANK_CH(CUR)) {
9167 if ((RAW == '<') && (NXT(1) == '?')) {
9168 xmlParsePI(ctxt);
9169 } else if (IS_BLANK_CH(CUR)) {
9170 NEXT;
9171 } else
9172 xmlParseComment(ctxt);
9173 }
9174}
9175
9176/**
9177 * xmlParseDocument:
9178 * @ctxt: an XML parser context
9179 *
9180 * parse an XML document (and build a tree if using the standard SAX
9181 * interface).
9182 *
9183 * [1] document ::= prolog element Misc*
9184 *
9185 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
9186 *
9187 * Returns 0, -1 in case of error. the parser context is augmented
9188 * as a result of the parsing.
9189 */
9190
9191int
9192xmlParseDocument(xmlParserCtxtPtr ctxt) {
9193 xmlChar start[4];
9194 xmlCharEncoding enc;
9195
9196 xmlInitParser();
9197
9198 if ((ctxt == NULL) || (ctxt->input == NULL))
9199 return(-1);
9200
9201 GROW;
9202
9203 /*
9204 * SAX: detecting the level.
9205 */
9206 xmlDetectSAX2(ctxt);
9207
9208 /*
9209 * SAX: beginning of the document processing.
9210 */
9211 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9212 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
9213
9214 if ((ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) &&
9215 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
9216 /*
9217 * Get the 4 first bytes and decode the charset
9218 * if enc != XML_CHAR_ENCODING_NONE
9219 * plug some encoding conversion routines.
9220 */
9221 start[0] = RAW;
9222 start[1] = NXT(1);
9223 start[2] = NXT(2);
9224 start[3] = NXT(3);
9225 enc = xmlDetectCharEncoding(&start[0], 4);
9226 if (enc != XML_CHAR_ENCODING_NONE) {
9227 xmlSwitchEncoding(ctxt, enc);
9228 }
9229 }
9230
9231
9232 if (CUR == 0) {
9233 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
9234 }
9235
9236 /*
9237 * Check for the XMLDecl in the Prolog.
9238 */
9239 GROW;
9240 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
9241
9242 /*
9243 * Note that we will switch encoding on the fly.
9244 */
9245 xmlParseXMLDecl(ctxt);
9246 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
9247 /*
9248 * The XML REC instructs us to stop parsing right here
9249 */
9250 return(-1);
9251 }
9252 ctxt->standalone = ctxt->input->standalone;
9253 SKIP_BLANKS;
9254 } else {
9255 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
9256 }
9257 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
9258 ctxt->sax->startDocument(ctxt->userData);
9259
9260 /*
9261 * The Misc part of the Prolog
9262 */
9263 GROW;
9264 xmlParseMisc(ctxt);
9265
9266 /*
9267 * Then possibly doc type declaration(s) and more Misc
9268 * (doctypedecl Misc*)?
9269 */
9270 GROW;
9271 if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
9272
9273 ctxt->inSubset = 1;
9274 xmlParseDocTypeDecl(ctxt);
9275 if (RAW == '[') {
9276 ctxt->instate = XML_PARSER_DTD;
9277 xmlParseInternalSubset(ctxt);
9278 }
9279
9280 /*
9281 * Create and update the external subset.
9282 */
9283 ctxt->inSubset = 2;
9284 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
9285 (!ctxt->disableSAX))
9286 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
9287 ctxt->extSubSystem, ctxt->extSubURI);
9288 ctxt->inSubset = 0;
9289
9290 xmlCleanSpecialAttr(ctxt);
9291
9292 ctxt->instate = XML_PARSER_PROLOG;
9293 xmlParseMisc(ctxt);
9294 }
9295
9296 /*
9297 * Time to start parsing the tree itself
9298 */
9299 GROW;
9300 if (RAW != '<') {
9301 xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
9302 "Start tag expected, '<' not found\n");
9303 } else {
9304 ctxt->instate = XML_PARSER_CONTENT;
9305 xmlParseElement(ctxt);
9306 ctxt->instate = XML_PARSER_EPILOG;
9307
9308
9309 /*
9310 * The Misc part at the end
9311 */
9312 xmlParseMisc(ctxt);
9313
9314 if (RAW != 0) {
9315 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
9316 }
9317 ctxt->instate = XML_PARSER_EOF;
9318 }
9319
9320 /*
9321 * SAX: end of the document processing.
9322 */
9323 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
9324 ctxt->sax->endDocument(ctxt->userData);
9325
9326 /*
9327 * Remove locally kept entity definitions if the tree was not built
9328 */
9329 if ((ctxt->myDoc != NULL) &&
9330 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
9331 xmlFreeDoc(ctxt->myDoc);
9332 ctxt->myDoc = NULL;
9333 }
9334
9335 if (! ctxt->wellFormed) {
9336 ctxt->valid = 0;
9337 return(-1);
9338 }
9339 return(0);
9340}
9341
9342/**
9343 * xmlParseExtParsedEnt:
9344 * @ctxt: an XML parser context
9345 *
9346 * parse a general parsed entity
9347 * An external general parsed entity is well-formed if it matches the
9348 * production labeled extParsedEnt.
9349 *
9350 * [78] extParsedEnt ::= TextDecl? content
9351 *
9352 * Returns 0, -1 in case of error. the parser context is augmented
9353 * as a result of the parsing.
9354 */
9355
9356int
9357xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
9358 xmlChar start[4];
9359 xmlCharEncoding enc;
9360
9361 if ((ctxt == NULL) || (ctxt->input == NULL))
9362 return(-1);
9363
9364 xmlDefaultSAXHandlerInit();
9365
9366 xmlDetectSAX2(ctxt);
9367
9368 GROW;
9369
9370 /*
9371 * SAX: beginning of the document processing.
9372 */
9373 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9374 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
9375
9376 /*
9377 * Get the 4 first bytes and decode the charset
9378 * if enc != XML_CHAR_ENCODING_NONE
9379 * plug some encoding conversion routines.
9380 */
9381 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
9382 start[0] = RAW;
9383 start[1] = NXT(1);
9384 start[2] = NXT(2);
9385 start[3] = NXT(3);
9386 enc = xmlDetectCharEncoding(start, 4);
9387 if (enc != XML_CHAR_ENCODING_NONE) {
9388 xmlSwitchEncoding(ctxt, enc);
9389 }
9390 }
9391
9392
9393 if (CUR == 0) {
9394 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
9395 }
9396
9397 /*
9398 * Check for the XMLDecl in the Prolog.
9399 */
9400 GROW;
9401 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
9402
9403 /*
9404 * Note that we will switch encoding on the fly.
9405 */
9406 xmlParseXMLDecl(ctxt);
9407 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
9408 /*
9409 * The XML REC instructs us to stop parsing right here
9410 */
9411 return(-1);
9412 }
9413 SKIP_BLANKS;
9414 } else {
9415 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
9416 }
9417 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
9418 ctxt->sax->startDocument(ctxt->userData);
9419
9420 /*
9421 * Doing validity checking on chunk doesn't make sense
9422 */
9423 ctxt->instate = XML_PARSER_CONTENT;
9424 ctxt->validate = 0;
9425 ctxt->loadsubset = 0;
9426 ctxt->depth = 0;
9427
9428 xmlParseContent(ctxt);
9429
9430 if ((RAW == '<') && (NXT(1) == '/')) {
9431 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
9432 } else if (RAW != 0) {
9433 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
9434 }
9435
9436 /*
9437 * SAX: end of the document processing.
9438 */
9439 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
9440 ctxt->sax->endDocument(ctxt->userData);
9441
9442 if (! ctxt->wellFormed) return(-1);
9443 return(0);
9444}
9445
9446#ifdef LIBXML_PUSH_ENABLED
9447/************************************************************************
9448 * *
9449 * Progressive parsing interfaces *
9450 * *
9451 ************************************************************************/
9452
9453/**
9454 * xmlParseLookupSequence:
9455 * @ctxt: an XML parser context
9456 * @first: the first char to lookup
9457 * @next: the next char to lookup or zero
9458 * @third: the next char to lookup or zero
9459 *
9460 * Try to find if a sequence (first, next, third) or just (first next) or
9461 * (first) is available in the input stream.
9462 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
9463 * to avoid rescanning sequences of bytes, it DOES change the state of the
9464 * parser, do not use liberally.
9465 *
9466 * Returns the index to the current parsing point if the full sequence
9467 * is available, -1 otherwise.
9468 */
9469static int
9470xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
9471 xmlChar next, xmlChar third) {
9472 int base, len;
9473 xmlParserInputPtr in;
9474 const xmlChar *buf;
9475
9476 in = ctxt->input;
9477 if (in == NULL) return(-1);
9478 base = in->cur - in->base;
9479 if (base < 0) return(-1);
9480 if (ctxt->checkIndex > base)
9481 base = ctxt->checkIndex;
9482 if (in->buf == NULL) {
9483 buf = in->base;
9484 len = in->length;
9485 } else {
9486 buf = in->buf->buffer->content;
9487 len = in->buf->buffer->use;
9488 }
9489 /* take into account the sequence length */
9490 if (third) len -= 2;
9491 else if (next) len --;
9492 for (;base < len;base++) {
9493 if (buf[base] == first) {
9494 if (third != 0) {
9495 if ((buf[base + 1] != next) ||
9496 (buf[base + 2] != third)) continue;
9497 } else if (next != 0) {
9498 if (buf[base + 1] != next) continue;
9499 }
9500 ctxt->checkIndex = 0;
9501#ifdef DEBUG_PUSH
9502 if (next == 0)
9503 xmlGenericError(xmlGenericErrorContext,
9504 "PP: lookup '%c' found at %d\n",
9505 first, base);
9506 else if (third == 0)
9507 xmlGenericError(xmlGenericErrorContext,
9508 "PP: lookup '%c%c' found at %d\n",
9509 first, next, base);
9510 else
9511 xmlGenericError(xmlGenericErrorContext,
9512 "PP: lookup '%c%c%c' found at %d\n",
9513 first, next, third, base);
9514#endif
9515 return(base - (in->cur - in->base));
9516 }
9517 }
9518 ctxt->checkIndex = base;
9519#ifdef DEBUG_PUSH
9520 if (next == 0)
9521 xmlGenericError(xmlGenericErrorContext,
9522 "PP: lookup '%c' failed\n", first);
9523 else if (third == 0)
9524 xmlGenericError(xmlGenericErrorContext,
9525 "PP: lookup '%c%c' failed\n", first, next);
9526 else
9527 xmlGenericError(xmlGenericErrorContext,
9528 "PP: lookup '%c%c%c' failed\n", first, next, third);
9529#endif
9530 return(-1);
9531}
9532
9533/**
9534 * xmlParseGetLasts:
9535 * @ctxt: an XML parser context
9536 * @lastlt: pointer to store the last '<' from the input
9537 * @lastgt: pointer to store the last '>' from the input
9538 *
9539 * Lookup the last < and > in the current chunk
9540 */
9541static void
9542xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt,
9543 const xmlChar **lastgt) {
9544 const xmlChar *tmp;
9545
9546 if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) {
9547 xmlGenericError(xmlGenericErrorContext,
9548 "Internal error: xmlParseGetLasts\n");
9549 return;
9550 }
9551 if ((ctxt->progressive != 0) && (ctxt->inputNr == 1)) {
9552 tmp = ctxt->input->end;
9553 tmp--;
9554 while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--;
9555 if (tmp < ctxt->input->base) {
9556 *lastlt = NULL;
9557 *lastgt = NULL;
9558 } else {
9559 *lastlt = tmp;
9560 tmp++;
9561 while ((tmp < ctxt->input->end) && (*tmp != '>')) {
9562 if (*tmp == '\'') {
9563 tmp++;
9564 while ((tmp < ctxt->input->end) && (*tmp != '\'')) tmp++;
9565 if (tmp < ctxt->input->end) tmp++;
9566 } else if (*tmp == '"') {
9567 tmp++;
9568 while ((tmp < ctxt->input->end) && (*tmp != '"')) tmp++;
9569 if (tmp < ctxt->input->end) tmp++;
9570 } else
9571 tmp++;
9572 }
9573 if (tmp < ctxt->input->end)
9574 *lastgt = tmp;
9575 else {
9576 tmp = *lastlt;
9577 tmp--;
9578 while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--;
9579 if (tmp >= ctxt->input->base)
9580 *lastgt = tmp;
9581 else
9582 *lastgt = NULL;
9583 }
9584 }
9585 } else {
9586 *lastlt = NULL;
9587 *lastgt = NULL;
9588 }
9589}
9590/**
9591 * xmlCheckCdataPush:
9592 * @cur: pointer to the bock of characters
9593 * @len: length of the block in bytes
9594 *
9595 * Check that the block of characters is okay as SCdata content [20]
9596 *
9597 * Returns the number of bytes to pass if okay, a negative index where an
9598 * UTF-8 error occured otherwise
9599 */
9600static int
9601xmlCheckCdataPush(const xmlChar *utf, int len) {
9602 int ix;
9603 unsigned char c;
9604 int codepoint;
9605
9606 if ((utf == NULL) || (len <= 0))
9607 return(0);
9608
9609 for (ix = 0; ix < len;) { /* string is 0-terminated */
9610 c = utf[ix];
9611 if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */
9612 if (c >= 0x20)
9613 ix++;
9614 else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
9615 ix++;
9616 else
9617 return(-ix);
9618 } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
9619 if (ix + 2 > len) return(ix);
9620 if ((utf[ix+1] & 0xc0 ) != 0x80)
9621 return(-ix);
9622 codepoint = (utf[ix] & 0x1f) << 6;
9623 codepoint |= utf[ix+1] & 0x3f;
9624 if (!xmlIsCharQ(codepoint))
9625 return(-ix);
9626 ix += 2;
9627 } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
9628 if (ix + 3 > len) return(ix);
9629 if (((utf[ix+1] & 0xc0) != 0x80) ||
9630 ((utf[ix+2] & 0xc0) != 0x80))
9631 return(-ix);
9632 codepoint = (utf[ix] & 0xf) << 12;
9633 codepoint |= (utf[ix+1] & 0x3f) << 6;
9634 codepoint |= utf[ix+2] & 0x3f;
9635 if (!xmlIsCharQ(codepoint))
9636 return(-ix);
9637 ix += 3;
9638 } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
9639 if (ix + 4 > len) return(ix);
9640 if (((utf[ix+1] & 0xc0) != 0x80) ||
9641 ((utf[ix+2] & 0xc0) != 0x80) ||
9642 ((utf[ix+3] & 0xc0) != 0x80))
9643 return(-ix);
9644 codepoint = (utf[ix] & 0x7) << 18;
9645 codepoint |= (utf[ix+1] & 0x3f) << 12;
9646 codepoint |= (utf[ix+2] & 0x3f) << 6;
9647 codepoint |= utf[ix+3] & 0x3f;
9648 if (!xmlIsCharQ(codepoint))
9649 return(-ix);
9650 ix += 4;
9651 } else /* unknown encoding */
9652 return(-ix);
9653 }
9654 return(ix);
9655}
9656
9657/**
9658 * xmlParseTryOrFinish:
9659 * @ctxt: an XML parser context
9660 * @terminate: last chunk indicator
9661 *
9662 * Try to progress on parsing
9663 *
9664 * Returns zero if no parsing was possible
9665 */
9666static int
9667xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
9668 int ret = 0;
9669 int avail, tlen;
9670 xmlChar cur, next;
9671 const xmlChar *lastlt, *lastgt;
9672
9673 if (ctxt->input == NULL)
9674 return(0);
9675
9676#ifdef DEBUG_PUSH
9677 switch (ctxt->instate) {
9678 case XML_PARSER_EOF:
9679 xmlGenericError(xmlGenericErrorContext,
9680 "PP: try EOF\n"); break;
9681 case XML_PARSER_START:
9682 xmlGenericError(xmlGenericErrorContext,
9683 "PP: try START\n"); break;
9684 case XML_PARSER_MISC:
9685 xmlGenericError(xmlGenericErrorContext,
9686 "PP: try MISC\n");break;
9687 case XML_PARSER_COMMENT:
9688 xmlGenericError(xmlGenericErrorContext,
9689 "PP: try COMMENT\n");break;
9690 case XML_PARSER_PROLOG:
9691 xmlGenericError(xmlGenericErrorContext,
9692 "PP: try PROLOG\n");break;
9693 case XML_PARSER_START_TAG:
9694 xmlGenericError(xmlGenericErrorContext,
9695 "PP: try START_TAG\n");break;
9696 case XML_PARSER_CONTENT:
9697 xmlGenericError(xmlGenericErrorContext,
9698 "PP: try CONTENT\n");break;
9699 case XML_PARSER_CDATA_SECTION:
9700 xmlGenericError(xmlGenericErrorContext,
9701 "PP: try CDATA_SECTION\n");break;
9702 case XML_PARSER_END_TAG:
9703 xmlGenericError(xmlGenericErrorContext,
9704 "PP: try END_TAG\n");break;
9705 case XML_PARSER_ENTITY_DECL:
9706 xmlGenericError(xmlGenericErrorContext,
9707 "PP: try ENTITY_DECL\n");break;
9708 case XML_PARSER_ENTITY_VALUE:
9709 xmlGenericError(xmlGenericErrorContext,
9710 "PP: try ENTITY_VALUE\n");break;
9711 case XML_PARSER_ATTRIBUTE_VALUE:
9712 xmlGenericError(xmlGenericErrorContext,
9713 "PP: try ATTRIBUTE_VALUE\n");break;
9714 case XML_PARSER_DTD:
9715 xmlGenericError(xmlGenericErrorContext,
9716 "PP: try DTD\n");break;
9717 case XML_PARSER_EPILOG:
9718 xmlGenericError(xmlGenericErrorContext,
9719 "PP: try EPILOG\n");break;
9720 case XML_PARSER_PI:
9721 xmlGenericError(xmlGenericErrorContext,
9722 "PP: try PI\n");break;
9723 case XML_PARSER_IGNORE:
9724 xmlGenericError(xmlGenericErrorContext,
9725 "PP: try IGNORE\n");break;
9726 }
9727#endif
9728
9729 if ((ctxt->input != NULL) &&
9730 (ctxt->input->cur - ctxt->input->base > 4096)) {
9731 xmlSHRINK(ctxt);
9732 ctxt->checkIndex = 0;
9733 }
9734 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
9735
9736 while (1) {
9737 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
9738 return(0);
9739
9740
9741 /*
9742 * Pop-up of finished entities.
9743 */
9744 while ((RAW == 0) && (ctxt->inputNr > 1))
9745 xmlPopInput(ctxt);
9746
9747 if (ctxt->input == NULL) break;
9748 if (ctxt->input->buf == NULL)
9749 avail = ctxt->input->length -
9750 (ctxt->input->cur - ctxt->input->base);
9751 else {
9752 /*
9753 * If we are operating on converted input, try to flush
9754 * remainng chars to avoid them stalling in the non-converted
9755 * buffer.
9756 */
9757 if ((ctxt->input->buf->raw != NULL) &&
9758 (ctxt->input->buf->raw->use > 0)) {
9759 int base = ctxt->input->base -
9760 ctxt->input->buf->buffer->content;
9761 int current = ctxt->input->cur - ctxt->input->base;
9762
9763 xmlParserInputBufferPush(ctxt->input->buf, 0, "");
9764 ctxt->input->base = ctxt->input->buf->buffer->content + base;
9765 ctxt->input->cur = ctxt->input->base + current;
9766 ctxt->input->end =
9767 &ctxt->input->buf->buffer->content[
9768 ctxt->input->buf->buffer->use];
9769 }
9770 avail = ctxt->input->buf->buffer->use -
9771 (ctxt->input->cur - ctxt->input->base);
9772 }
9773 if (avail < 1)
9774 goto done;
9775 switch (ctxt->instate) {
9776 case XML_PARSER_EOF:
9777 /*
9778 * Document parsing is done !
9779 */
9780 goto done;
9781 case XML_PARSER_START:
9782 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
9783 xmlChar start[4];
9784 xmlCharEncoding enc;
9785
9786 /*
9787 * Very first chars read from the document flow.
9788 */
9789 if (avail < 4)
9790 goto done;
9791
9792 /*
9793 * Get the 4 first bytes and decode the charset
9794 * if enc != XML_CHAR_ENCODING_NONE
9795 * plug some encoding conversion routines,
9796 * else xmlSwitchEncoding will set to (default)
9797 * UTF8.
9798 */
9799 start[0] = RAW;
9800 start[1] = NXT(1);
9801 start[2] = NXT(2);
9802 start[3] = NXT(3);
9803 enc = xmlDetectCharEncoding(start, 4);
9804 xmlSwitchEncoding(ctxt, enc);
9805 break;
9806 }
9807
9808 if (avail < 2)
9809 goto done;
9810 cur = ctxt->input->cur[0];
9811 next = ctxt->input->cur[1];
9812 if (cur == 0) {
9813 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9814 ctxt->sax->setDocumentLocator(ctxt->userData,
9815 &xmlDefaultSAXLocator);
9816 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
9817 ctxt->instate = XML_PARSER_EOF;
9818#ifdef DEBUG_PUSH
9819 xmlGenericError(xmlGenericErrorContext,
9820 "PP: entering EOF\n");
9821#endif
9822 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
9823 ctxt->sax->endDocument(ctxt->userData);
9824 goto done;
9825 }
9826 if ((cur == '<') && (next == '?')) {
9827 /* PI or XML decl */
9828 if (avail < 5) return(ret);
9829 if ((!terminate) &&
9830 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9831 return(ret);
9832 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9833 ctxt->sax->setDocumentLocator(ctxt->userData,
9834 &xmlDefaultSAXLocator);
9835 if ((ctxt->input->cur[2] == 'x') &&
9836 (ctxt->input->cur[3] == 'm') &&
9837 (ctxt->input->cur[4] == 'l') &&
9838 (IS_BLANK_CH(ctxt->input->cur[5]))) {
9839 ret += 5;
9840#ifdef DEBUG_PUSH
9841 xmlGenericError(xmlGenericErrorContext,
9842 "PP: Parsing XML Decl\n");
9843#endif
9844 xmlParseXMLDecl(ctxt);
9845 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
9846 /*
9847 * The XML REC instructs us to stop parsing right
9848 * here
9849 */
9850 ctxt->instate = XML_PARSER_EOF;
9851 return(0);
9852 }
9853 ctxt->standalone = ctxt->input->standalone;
9854 if ((ctxt->encoding == NULL) &&
9855 (ctxt->input->encoding != NULL))
9856 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
9857 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
9858 (!ctxt->disableSAX))
9859 ctxt->sax->startDocument(ctxt->userData);
9860 ctxt->instate = XML_PARSER_MISC;
9861#ifdef DEBUG_PUSH
9862 xmlGenericError(xmlGenericErrorContext,
9863 "PP: entering MISC\n");
9864#endif
9865 } else {
9866 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
9867 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
9868 (!ctxt->disableSAX))
9869 ctxt->sax->startDocument(ctxt->userData);
9870 ctxt->instate = XML_PARSER_MISC;
9871#ifdef DEBUG_PUSH
9872 xmlGenericError(xmlGenericErrorContext,
9873 "PP: entering MISC\n");
9874#endif
9875 }
9876 } else {
9877 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9878 ctxt->sax->setDocumentLocator(ctxt->userData,
9879 &xmlDefaultSAXLocator);
9880 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
9881 if (ctxt->version == NULL) {
9882 xmlErrMemory(ctxt, NULL);
9883 break;
9884 }
9885 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
9886 (!ctxt->disableSAX))
9887 ctxt->sax->startDocument(ctxt->userData);
9888 ctxt->instate = XML_PARSER_MISC;
9889#ifdef DEBUG_PUSH
9890 xmlGenericError(xmlGenericErrorContext,
9891 "PP: entering MISC\n");
9892#endif
9893 }
9894 break;
9895 case XML_PARSER_START_TAG: {
9896 const xmlChar *name;
9897 const xmlChar *prefix;
9898 const xmlChar *URI;
9899 int nsNr = ctxt->nsNr;
9900
9901 if ((avail < 2) && (ctxt->inputNr == 1))
9902 goto done;
9903 cur = ctxt->input->cur[0];
9904 if (cur != '<') {
9905 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
9906 ctxt->instate = XML_PARSER_EOF;
9907 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
9908 ctxt->sax->endDocument(ctxt->userData);
9909 goto done;
9910 }
9911 if (!terminate) {
9912 if (ctxt->progressive) {
9913 /* > can be found unescaped in attribute values */
9914 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
9915 goto done;
9916 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
9917 goto done;
9918 }
9919 }
9920 if (ctxt->spaceNr == 0)
9921 spacePush(ctxt, -1);
9922 else if (*ctxt->space == -2)
9923 spacePush(ctxt, -1);
9924 else
9925 spacePush(ctxt, *ctxt->space);
9926#ifdef LIBXML_SAX1_ENABLED
9927 if (ctxt->sax2)
9928#endif /* LIBXML_SAX1_ENABLED */
9929 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
9930#ifdef LIBXML_SAX1_ENABLED
9931 else
9932 name = xmlParseStartTag(ctxt);
9933#endif /* LIBXML_SAX1_ENABLED */
9934 if (name == NULL) {
9935 spacePop(ctxt);
9936 ctxt->instate = XML_PARSER_EOF;
9937 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
9938 ctxt->sax->endDocument(ctxt->userData);
9939 goto done;
9940 }
9941#ifdef LIBXML_VALID_ENABLED
9942 /*
9943 * [ VC: Root Element Type ]
9944 * The Name in the document type declaration must match
9945 * the element type of the root element.
9946 */
9947 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
9948 ctxt->node && (ctxt->node == ctxt->myDoc->children))
9949 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
9950#endif /* LIBXML_VALID_ENABLED */
9951
9952 /*
9953 * Check for an Empty Element.
9954 */
9955 if ((RAW == '/') && (NXT(1) == '>')) {
9956 SKIP(2);
9957
9958 if (ctxt->sax2) {
9959 if ((ctxt->sax != NULL) &&
9960 (ctxt->sax->endElementNs != NULL) &&
9961 (!ctxt->disableSAX))
9962 ctxt->sax->endElementNs(ctxt->userData, name,
9963 prefix, URI);
9964 if (ctxt->nsNr - nsNr > 0)
9965 nsPop(ctxt, ctxt->nsNr - nsNr);
9966#ifdef LIBXML_SAX1_ENABLED
9967 } else {
9968 if ((ctxt->sax != NULL) &&
9969 (ctxt->sax->endElement != NULL) &&
9970 (!ctxt->disableSAX))
9971 ctxt->sax->endElement(ctxt->userData, name);
9972#endif /* LIBXML_SAX1_ENABLED */
9973 }
9974 spacePop(ctxt);
9975 if (ctxt->nameNr == 0) {
9976 ctxt->instate = XML_PARSER_EPILOG;
9977 } else {
9978 ctxt->instate = XML_PARSER_CONTENT;
9979 }
9980 break;
9981 }
9982 if (RAW == '>') {
9983 NEXT;
9984 } else {
9985 xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
9986 "Couldn't find end of Start Tag %s\n",
9987 name);
9988 nodePop(ctxt);
9989 spacePop(ctxt);
9990 }
9991 if (ctxt->sax2)
9992 nameNsPush(ctxt, name, prefix, URI, ctxt->nsNr - nsNr);
9993#ifdef LIBXML_SAX1_ENABLED
9994 else
9995 namePush(ctxt, name);
9996#endif /* LIBXML_SAX1_ENABLED */
9997
9998 ctxt->instate = XML_PARSER_CONTENT;
9999 break;
10000 }
10001 case XML_PARSER_CONTENT: {
10002 const xmlChar *test;
10003 unsigned int cons;
10004 if ((avail < 2) && (ctxt->inputNr == 1))
10005 goto done;
10006 cur = ctxt->input->cur[0];
10007 next = ctxt->input->cur[1];
10008
10009 test = CUR_PTR;
10010 cons = ctxt->input->consumed;
10011 if ((cur == '<') && (next == '/')) {
10012 ctxt->instate = XML_PARSER_END_TAG;
10013 break;
10014 } else if ((cur == '<') && (next == '?')) {
10015 if ((!terminate) &&
10016 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10017 goto done;
10018 xmlParsePI(ctxt);
10019 } else if ((cur == '<') && (next != '!')) {
10020 ctxt->instate = XML_PARSER_START_TAG;
10021 break;
10022 } else if ((cur == '<') && (next == '!') &&
10023 (ctxt->input->cur[2] == '-') &&
10024 (ctxt->input->cur[3] == '-')) {
10025 int term;
10026
10027 if (avail < 4)
10028 goto done;
10029 ctxt->input->cur += 4;
10030 term = xmlParseLookupSequence(ctxt, '-', '-', '>');
10031 ctxt->input->cur -= 4;
10032 if ((!terminate) && (term < 0))
10033 goto done;
10034 xmlParseComment(ctxt);
10035 ctxt->instate = XML_PARSER_CONTENT;
10036 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
10037 (ctxt->input->cur[2] == '[') &&
10038 (ctxt->input->cur[3] == 'C') &&
10039 (ctxt->input->cur[4] == 'D') &&
10040 (ctxt->input->cur[5] == 'A') &&
10041 (ctxt->input->cur[6] == 'T') &&
10042 (ctxt->input->cur[7] == 'A') &&
10043 (ctxt->input->cur[8] == '[')) {
10044 SKIP(9);
10045 ctxt->instate = XML_PARSER_CDATA_SECTION;
10046 break;
10047 } else if ((cur == '<') && (next == '!') &&
10048 (avail < 9)) {
10049 goto done;
10050 } else if (cur == '&') {
10051 if ((!terminate) &&
10052 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
10053 goto done;
10054 xmlParseReference(ctxt);
10055 } else {
10056 /* TODO Avoid the extra copy, handle directly !!! */
10057 /*
10058 * Goal of the following test is:
10059 * - minimize calls to the SAX 'character' callback
10060 * when they are mergeable
10061 * - handle an problem for isBlank when we only parse
10062 * a sequence of blank chars and the next one is
10063 * not available to check against '<' presence.
10064 * - tries to homogenize the differences in SAX
10065 * callbacks between the push and pull versions
10066 * of the parser.
10067 */
10068 if ((ctxt->inputNr == 1) &&
10069 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
10070 if (!terminate) {
10071 if (ctxt->progressive) {
10072 if ((lastlt == NULL) ||
10073 (ctxt->input->cur > lastlt))
10074 goto done;
10075 } else if (xmlParseLookupSequence(ctxt,
10076 '<', 0, 0) < 0) {
10077 goto done;
10078 }
10079 }
10080 }
10081 ctxt->checkIndex = 0;
10082 xmlParseCharData(ctxt, 0);
10083 }
10084 /*
10085 * Pop-up of finished entities.
10086 */
10087 while ((RAW == 0) && (ctxt->inputNr > 1))
10088 xmlPopInput(ctxt);
10089 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
10090 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
10091 "detected an error in element content\n");
10092 ctxt->instate = XML_PARSER_EOF;
10093 break;
10094 }
10095 break;
10096 }
10097 case XML_PARSER_END_TAG:
10098 if (avail < 2)
10099 goto done;
10100 if (!terminate) {
10101 if (ctxt->progressive) {
10102 /* > can be found unescaped in attribute values */
10103 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
10104 goto done;
10105 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
10106 goto done;
10107 }
10108 }
10109 if (ctxt->sax2) {
10110 xmlParseEndTag2(ctxt,
10111 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 3],
10112 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 2], 0,
10113 (int) (long) ctxt->pushTab[ctxt->nameNr * 3 - 1], 0);
10114 nameNsPop(ctxt);
10115 }
10116#ifdef LIBXML_SAX1_ENABLED
10117 else
10118 xmlParseEndTag1(ctxt, 0);
10119#endif /* LIBXML_SAX1_ENABLED */
10120 if (ctxt->nameNr == 0) {
10121 ctxt->instate = XML_PARSER_EPILOG;
10122 } else {
10123 ctxt->instate = XML_PARSER_CONTENT;
10124 }
10125 break;
10126 case XML_PARSER_CDATA_SECTION: {
10127 /*
10128 * The Push mode need to have the SAX callback for
10129 * cdataBlock merge back contiguous callbacks.
10130 */
10131 int base;
10132
10133 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
10134 if (base < 0) {
10135 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
10136 int tmp;
10137
10138 tmp = xmlCheckCdataPush(ctxt->input->cur,
10139 XML_PARSER_BIG_BUFFER_SIZE);
10140 if (tmp < 0) {
10141 tmp = -tmp;
10142 ctxt->input->cur += tmp;
10143 goto encoding_error;
10144 }
10145 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
10146 if (ctxt->sax->cdataBlock != NULL)
10147 ctxt->sax->cdataBlock(ctxt->userData,
10148 ctxt->input->cur, tmp);
10149 else if (ctxt->sax->characters != NULL)
10150 ctxt->sax->characters(ctxt->userData,
10151 ctxt->input->cur, tmp);
10152 }
10153 SKIPL(tmp);
10154 ctxt->checkIndex = 0;
10155 }
10156 goto done;
10157 } else {
10158 int tmp;
10159
10160 tmp = xmlCheckCdataPush(ctxt->input->cur, base);
10161 if ((tmp < 0) || (tmp != base)) {
10162 tmp = -tmp;
10163 ctxt->input->cur += tmp;
10164 goto encoding_error;
10165 }
10166 if ((ctxt->sax != NULL) && (base > 0) &&
10167 (!ctxt->disableSAX)) {
10168 if (ctxt->sax->cdataBlock != NULL)
10169 ctxt->sax->cdataBlock(ctxt->userData,
10170 ctxt->input->cur, base);
10171 else if (ctxt->sax->characters != NULL)
10172 ctxt->sax->characters(ctxt->userData,
10173 ctxt->input->cur, base);
10174 }
10175 SKIPL(base + 3);
10176 ctxt->checkIndex = 0;
10177 ctxt->instate = XML_PARSER_CONTENT;
10178#ifdef DEBUG_PUSH
10179 xmlGenericError(xmlGenericErrorContext,
10180 "PP: entering CONTENT\n");
10181#endif
10182 }
10183 break;
10184 }
10185 case XML_PARSER_MISC:
10186 SKIP_BLANKS;
10187 if (ctxt->input->buf == NULL)
10188 avail = ctxt->input->length -
10189 (ctxt->input->cur - ctxt->input->base);
10190 else
10191 avail = ctxt->input->buf->buffer->use -
10192 (ctxt->input->cur - ctxt->input->base);
10193 if (avail < 2)
10194 goto done;
10195 cur = ctxt->input->cur[0];
10196 next = ctxt->input->cur[1];
10197 if ((cur == '<') && (next == '?')) {
10198 if ((!terminate) &&
10199 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10200 goto done;
10201#ifdef DEBUG_PUSH
10202 xmlGenericError(xmlGenericErrorContext,
10203 "PP: Parsing PI\n");
10204#endif
10205 xmlParsePI(ctxt);
10206 ctxt->checkIndex = 0;
10207 } else if ((cur == '<') && (next == '!') &&
10208 (ctxt->input->cur[2] == '-') &&
10209 (ctxt->input->cur[3] == '-')) {
10210 if ((!terminate) &&
10211 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
10212 goto done;
10213#ifdef DEBUG_PUSH
10214 xmlGenericError(xmlGenericErrorContext,
10215 "PP: Parsing Comment\n");
10216#endif
10217 xmlParseComment(ctxt);
10218 ctxt->instate = XML_PARSER_MISC;
10219 ctxt->checkIndex = 0;
10220 } else if ((cur == '<') && (next == '!') &&
10221 (ctxt->input->cur[2] == 'D') &&
10222 (ctxt->input->cur[3] == 'O') &&
10223 (ctxt->input->cur[4] == 'C') &&
10224 (ctxt->input->cur[5] == 'T') &&
10225 (ctxt->input->cur[6] == 'Y') &&
10226 (ctxt->input->cur[7] == 'P') &&
10227 (ctxt->input->cur[8] == 'E')) {
10228 if ((!terminate) &&
10229 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
10230 goto done;
10231#ifdef DEBUG_PUSH
10232 xmlGenericError(xmlGenericErrorContext,
10233 "PP: Parsing internal subset\n");
10234#endif
10235 ctxt->inSubset = 1;
10236 xmlParseDocTypeDecl(ctxt);
10237 if (RAW == '[') {
10238 ctxt->instate = XML_PARSER_DTD;
10239#ifdef DEBUG_PUSH
10240 xmlGenericError(xmlGenericErrorContext,
10241 "PP: entering DTD\n");
10242#endif
10243 } else {
10244 /*
10245 * Create and update the external subset.
10246 */
10247 ctxt->inSubset = 2;
10248 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
10249 (ctxt->sax->externalSubset != NULL))
10250 ctxt->sax->externalSubset(ctxt->userData,
10251 ctxt->intSubName, ctxt->extSubSystem,
10252 ctxt->extSubURI);
10253 ctxt->inSubset = 0;
10254 xmlCleanSpecialAttr(ctxt);
10255 ctxt->instate = XML_PARSER_PROLOG;
10256#ifdef DEBUG_PUSH
10257 xmlGenericError(xmlGenericErrorContext,
10258 "PP: entering PROLOG\n");
10259#endif
10260 }
10261 } else if ((cur == '<') && (next == '!') &&
10262 (avail < 9)) {
10263 goto done;
10264 } else {
10265 ctxt->instate = XML_PARSER_START_TAG;
10266 ctxt->progressive = 1;
10267 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
10268#ifdef DEBUG_PUSH
10269 xmlGenericError(xmlGenericErrorContext,
10270 "PP: entering START_TAG\n");
10271#endif
10272 }
10273 break;
10274 case XML_PARSER_PROLOG:
10275 SKIP_BLANKS;
10276 if (ctxt->input->buf == NULL)
10277 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
10278 else
10279 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
10280 if (avail < 2)
10281 goto done;
10282 cur = ctxt->input->cur[0];
10283 next = ctxt->input->cur[1];
10284 if ((cur == '<') && (next == '?')) {
10285 if ((!terminate) &&
10286 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10287 goto done;
10288#ifdef DEBUG_PUSH
10289 xmlGenericError(xmlGenericErrorContext,
10290 "PP: Parsing PI\n");
10291#endif
10292 xmlParsePI(ctxt);
10293 } else if ((cur == '<') && (next == '!') &&
10294 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
10295 if ((!terminate) &&
10296 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
10297 goto done;
10298#ifdef DEBUG_PUSH
10299 xmlGenericError(xmlGenericErrorContext,
10300 "PP: Parsing Comment\n");
10301#endif
10302 xmlParseComment(ctxt);
10303 ctxt->instate = XML_PARSER_PROLOG;
10304 } else if ((cur == '<') && (next == '!') &&
10305 (avail < 4)) {
10306 goto done;
10307 } else {
10308 ctxt->instate = XML_PARSER_START_TAG;
10309 if (ctxt->progressive == 0)
10310 ctxt->progressive = 1;
10311 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
10312#ifdef DEBUG_PUSH
10313 xmlGenericError(xmlGenericErrorContext,
10314 "PP: entering START_TAG\n");
10315#endif
10316 }
10317 break;
10318 case XML_PARSER_EPILOG:
10319 SKIP_BLANKS;
10320 if (ctxt->input->buf == NULL)
10321 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
10322 else
10323 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
10324 if (avail < 2)
10325 goto done;
10326 cur = ctxt->input->cur[0];
10327 next = ctxt->input->cur[1];
10328 if ((cur == '<') && (next == '?')) {
10329 if ((!terminate) &&
10330 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10331 goto done;
10332#ifdef DEBUG_PUSH
10333 xmlGenericError(xmlGenericErrorContext,
10334 "PP: Parsing PI\n");
10335#endif
10336 xmlParsePI(ctxt);
10337 ctxt->instate = XML_PARSER_EPILOG;
10338 } else if ((cur == '<') && (next == '!') &&
10339 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
10340 if ((!terminate) &&
10341 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
10342 goto done;
10343#ifdef DEBUG_PUSH
10344 xmlGenericError(xmlGenericErrorContext,
10345 "PP: Parsing Comment\n");
10346#endif
10347 xmlParseComment(ctxt);
10348 ctxt->instate = XML_PARSER_EPILOG;
10349 } else if ((cur == '<') && (next == '!') &&
10350 (avail < 4)) {
10351 goto done;
10352 } else {
10353 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
10354 ctxt->instate = XML_PARSER_EOF;
10355#ifdef DEBUG_PUSH
10356 xmlGenericError(xmlGenericErrorContext,
10357 "PP: entering EOF\n");
10358#endif
10359 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10360 ctxt->sax->endDocument(ctxt->userData);
10361 goto done;
10362 }
10363 break;
10364 case XML_PARSER_DTD: {
10365 /*
10366 * Sorry but progressive parsing of the internal subset
10367 * is not expected to be supported. We first check that
10368 * the full content of the internal subset is available and
10369 * the parsing is launched only at that point.
10370 * Internal subset ends up with "']' S? '>'" in an unescaped
10371 * section and not in a ']]>' sequence which are conditional
10372 * sections (whoever argued to keep that crap in XML deserve
10373 * a place in hell !).
10374 */
10375 int base, i;
10376 xmlChar *buf;
10377 xmlChar quote = 0;
10378
10379 base = ctxt->input->cur - ctxt->input->base;
10380 if (base < 0) return(0);
10381 if (ctxt->checkIndex > base)
10382 base = ctxt->checkIndex;
10383 buf = ctxt->input->buf->buffer->content;
10384 for (;(unsigned int) base < ctxt->input->buf->buffer->use;
10385 base++) {
10386 if (quote != 0) {
10387 if (buf[base] == quote)
10388 quote = 0;
10389 continue;
10390 }
10391 if ((quote == 0) && (buf[base] == '<')) {
10392 int found = 0;
10393 /* special handling of comments */
10394 if (((unsigned int) base + 4 <
10395 ctxt->input->buf->buffer->use) &&
10396 (buf[base + 1] == '!') &&
10397 (buf[base + 2] == '-') &&
10398 (buf[base + 3] == '-')) {
10399 for (;(unsigned int) base + 3 <
10400 ctxt->input->buf->buffer->use; base++) {
10401 if ((buf[base] == '-') &&
10402 (buf[base + 1] == '-') &&
10403 (buf[base + 2] == '>')) {
10404 found = 1;
10405 base += 2;
10406 break;
10407 }
10408 }
10409 if (!found) {
10410#if 0
10411 fprintf(stderr, "unfinished comment\n");
10412#endif
10413 break; /* for */
10414 }
10415 continue;
10416 }
10417 }
10418 if (buf[base] == '"') {
10419 quote = '"';
10420 continue;
10421 }
10422 if (buf[base] == '\'') {
10423 quote = '\'';
10424 continue;
10425 }
10426 if (buf[base] == ']') {
10427#if 0
10428 fprintf(stderr, "%c%c%c%c: ", buf[base],
10429 buf[base + 1], buf[base + 2], buf[base + 3]);
10430#endif
10431 if ((unsigned int) base +1 >=
10432 ctxt->input->buf->buffer->use)
10433 break;
10434 if (buf[base + 1] == ']') {
10435 /* conditional crap, skip both ']' ! */
10436 base++;
10437 continue;
10438 }
10439 for (i = 1;
10440 (unsigned int) base + i < ctxt->input->buf->buffer->use;
10441 i++) {
10442 if (buf[base + i] == '>') {
10443#if 0
10444 fprintf(stderr, "found\n");
10445#endif
10446 goto found_end_int_subset;
10447 }
10448 if (!IS_BLANK_CH(buf[base + i])) {
10449#if 0
10450 fprintf(stderr, "not found\n");
10451#endif
10452 goto not_end_of_int_subset;
10453 }
10454 }
10455#if 0
10456 fprintf(stderr, "end of stream\n");
10457#endif
10458 break;
10459
10460 }
10461not_end_of_int_subset:
10462 continue; /* for */
10463 }
10464 /*
10465 * We didn't found the end of the Internal subset
10466 */
10467#ifdef DEBUG_PUSH
10468 if (next == 0)
10469 xmlGenericError(xmlGenericErrorContext,
10470 "PP: lookup of int subset end filed\n");
10471#endif
10472 goto done;
10473
10474found_end_int_subset:
10475 xmlParseInternalSubset(ctxt);
10476 ctxt->inSubset = 2;
10477 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
10478 (ctxt->sax->externalSubset != NULL))
10479 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10480 ctxt->extSubSystem, ctxt->extSubURI);
10481 ctxt->inSubset = 0;
10482 xmlCleanSpecialAttr(ctxt);
10483 ctxt->instate = XML_PARSER_PROLOG;
10484 ctxt->checkIndex = 0;
10485#ifdef DEBUG_PUSH
10486 xmlGenericError(xmlGenericErrorContext,
10487 "PP: entering PROLOG\n");
10488#endif
10489 break;
10490 }
10491 case XML_PARSER_COMMENT:
10492 xmlGenericError(xmlGenericErrorContext,
10493 "PP: internal error, state == COMMENT\n");
10494 ctxt->instate = XML_PARSER_CONTENT;
10495#ifdef DEBUG_PUSH
10496 xmlGenericError(xmlGenericErrorContext,
10497 "PP: entering CONTENT\n");
10498#endif
10499 break;
10500 case XML_PARSER_IGNORE:
10501 xmlGenericError(xmlGenericErrorContext,
10502 "PP: internal error, state == IGNORE");
10503 ctxt->instate = XML_PARSER_DTD;
10504#ifdef DEBUG_PUSH
10505 xmlGenericError(xmlGenericErrorContext,
10506 "PP: entering DTD\n");
10507#endif
10508 break;
10509 case XML_PARSER_PI:
10510 xmlGenericError(xmlGenericErrorContext,
10511 "PP: internal error, state == PI\n");
10512 ctxt->instate = XML_PARSER_CONTENT;
10513#ifdef DEBUG_PUSH
10514 xmlGenericError(xmlGenericErrorContext,
10515 "PP: entering CONTENT\n");
10516#endif
10517 break;
10518 case XML_PARSER_ENTITY_DECL:
10519 xmlGenericError(xmlGenericErrorContext,
10520 "PP: internal error, state == ENTITY_DECL\n");
10521 ctxt->instate = XML_PARSER_DTD;
10522#ifdef DEBUG_PUSH
10523 xmlGenericError(xmlGenericErrorContext,
10524 "PP: entering DTD\n");
10525#endif
10526 break;
10527 case XML_PARSER_ENTITY_VALUE:
10528 xmlGenericError(xmlGenericErrorContext,
10529 "PP: internal error, state == ENTITY_VALUE\n");
10530 ctxt->instate = XML_PARSER_CONTENT;
10531#ifdef DEBUG_PUSH
10532 xmlGenericError(xmlGenericErrorContext,
10533 "PP: entering DTD\n");
10534#endif
10535 break;
10536 case XML_PARSER_ATTRIBUTE_VALUE:
10537 xmlGenericError(xmlGenericErrorContext,
10538 "PP: internal error, state == ATTRIBUTE_VALUE\n");
10539 ctxt->instate = XML_PARSER_START_TAG;
10540#ifdef DEBUG_PUSH
10541 xmlGenericError(xmlGenericErrorContext,
10542 "PP: entering START_TAG\n");
10543#endif
10544 break;
10545 case XML_PARSER_SYSTEM_LITERAL:
10546 xmlGenericError(xmlGenericErrorContext,
10547 "PP: internal error, state == SYSTEM_LITERAL\n");
10548 ctxt->instate = XML_PARSER_START_TAG;
10549#ifdef DEBUG_PUSH
10550 xmlGenericError(xmlGenericErrorContext,
10551 "PP: entering START_TAG\n");
10552#endif
10553 break;
10554 case XML_PARSER_PUBLIC_LITERAL:
10555 xmlGenericError(xmlGenericErrorContext,
10556 "PP: internal error, state == PUBLIC_LITERAL\n");
10557 ctxt->instate = XML_PARSER_START_TAG;
10558#ifdef DEBUG_PUSH
10559 xmlGenericError(xmlGenericErrorContext,
10560 "PP: entering START_TAG\n");
10561#endif
10562 break;
10563 }
10564 }
10565done:
10566#ifdef DEBUG_PUSH
10567 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
10568#endif
10569 return(ret);
10570encoding_error:
10571 {
10572 char buffer[150];
10573
10574 snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
10575 ctxt->input->cur[0], ctxt->input->cur[1],
10576 ctxt->input->cur[2], ctxt->input->cur[3]);
10577 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
10578 "Input is not proper UTF-8, indicate encoding !\n%s",
10579 BAD_CAST buffer, NULL);
10580 }
10581 return(0);
10582}
10583
10584/**
10585 * xmlParseChunk:
10586 * @ctxt: an XML parser context
10587 * @chunk: an char array
10588 * @size: the size in byte of the chunk
10589 * @terminate: last chunk indicator
10590 *
10591 * Parse a Chunk of memory
10592 *
10593 * Returns zero if no error, the xmlParserErrors otherwise.
10594 */
10595int
10596xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
10597 int terminate) {
10598 int end_in_lf = 0;
10599
10600 if (ctxt == NULL)
10601 return(XML_ERR_INTERNAL_ERROR);
10602 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
10603 return(ctxt->errNo);
10604 if (ctxt->instate == XML_PARSER_START)
10605 xmlDetectSAX2(ctxt);
10606 if ((size > 0) && (chunk != NULL) && (!terminate) &&
10607 (chunk[size - 1] == '\r')) {
10608 end_in_lf = 1;
10609 size--;
10610 }
10611 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
10612 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
10613 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
10614 int cur = ctxt->input->cur - ctxt->input->base;
10615 int res;
10616
10617 res =xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
10618 if (res < 0) {
10619 ctxt->errNo = XML_PARSER_EOF;
10620 ctxt->disableSAX = 1;
10621 return (XML_PARSER_EOF);
10622 }
10623 ctxt->input->base = ctxt->input->buf->buffer->content + base;
10624 ctxt->input->cur = ctxt->input->base + cur;
10625 ctxt->input->end =
10626 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
10627#ifdef DEBUG_PUSH
10628 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
10629#endif
10630
10631 } else if (ctxt->instate != XML_PARSER_EOF) {
10632 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
10633 xmlParserInputBufferPtr in = ctxt->input->buf;
10634 if ((in->encoder != NULL) && (in->buffer != NULL) &&
10635 (in->raw != NULL)) {
10636 int nbchars;
10637
10638 nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
10639 if (nbchars < 0) {
10640 /* TODO 2.6.0 */
10641 xmlGenericError(xmlGenericErrorContext,
10642 "xmlParseChunk: encoder error\n");
10643 return(XML_ERR_INVALID_ENCODING);
10644 }
10645 }
10646 }
10647 }
10648 xmlParseTryOrFinish(ctxt, terminate);
10649 if ((end_in_lf == 1) && (ctxt->input != NULL) &&
10650 (ctxt->input->buf != NULL)) {
10651 xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
10652 }
10653 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
10654 return(ctxt->errNo);
10655 if (terminate) {
10656 /*
10657 * Check for termination
10658 */
10659 int avail = 0;
10660
10661 if (ctxt->input != NULL) {
10662 if (ctxt->input->buf == NULL)
10663 avail = ctxt->input->length -
10664 (ctxt->input->cur - ctxt->input->base);
10665 else
10666 avail = ctxt->input->buf->buffer->use -
10667 (ctxt->input->cur - ctxt->input->base);
10668 }
10669
10670 if ((ctxt->instate != XML_PARSER_EOF) &&
10671 (ctxt->instate != XML_PARSER_EPILOG)) {
10672 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
10673 }
10674 if ((ctxt->instate == XML_PARSER_EPILOG) && (avail > 0)) {
10675 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
10676 }
10677 if (ctxt->instate != XML_PARSER_EOF) {
10678 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10679 ctxt->sax->endDocument(ctxt->userData);
10680 }
10681 ctxt->instate = XML_PARSER_EOF;
10682 }
10683 return((xmlParserErrors) ctxt->errNo);
10684}
10685
10686/************************************************************************
10687 * *
10688 * I/O front end functions to the parser *
10689 * *
10690 ************************************************************************/
10691
10692/**
10693 * xmlCreatePushParserCtxt:
10694 * @sax: a SAX handler
10695 * @user_data: The user data returned on SAX callbacks
10696 * @chunk: a pointer to an array of chars
10697 * @size: number of chars in the array
10698 * @filename: an optional file name or URI
10699 *
10700 * Create a parser context for using the XML parser in push mode.
10701 * If @buffer and @size are non-NULL, the data is used to detect
10702 * the encoding. The remaining characters will be parsed so they
10703 * don't need to be fed in again through xmlParseChunk.
10704 * To allow content encoding detection, @size should be >= 4
10705 * The value of @filename is used for fetching external entities
10706 * and error/warning reports.
10707 *
10708 * Returns the new parser context or NULL
10709 */
10710
10711xmlParserCtxtPtr
10712xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
10713 const char *chunk, int size, const char *filename) {
10714 xmlParserCtxtPtr ctxt;
10715 xmlParserInputPtr inputStream;
10716 xmlParserInputBufferPtr buf;
10717 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
10718
10719 /*
10720 * plug some encoding conversion routines
10721 */
10722 if ((chunk != NULL) && (size >= 4))
10723 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
10724
10725 buf = xmlAllocParserInputBuffer(enc);
10726 if (buf == NULL) return(NULL);
10727
10728 ctxt = xmlNewParserCtxt();
10729 if (ctxt == NULL) {
10730 xmlErrMemory(NULL, "creating parser: out of memory\n");
10731 xmlFreeParserInputBuffer(buf);
10732 return(NULL);
10733 }
10734 ctxt->dictNames = 1;
10735 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * sizeof(xmlChar *));
10736 if (ctxt->pushTab == NULL) {
10737 xmlErrMemory(ctxt, NULL);
10738 xmlFreeParserInputBuffer(buf);
10739 xmlFreeParserCtxt(ctxt);
10740 return(NULL);
10741 }
10742 if (sax != NULL) {
10743#ifdef LIBXML_SAX1_ENABLED
10744 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
10745#endif /* LIBXML_SAX1_ENABLED */
10746 xmlFree(ctxt->sax);
10747 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
10748 if (ctxt->sax == NULL) {
10749 xmlErrMemory(ctxt, NULL);
10750 xmlFreeParserInputBuffer(buf);
10751 xmlFreeParserCtxt(ctxt);
10752 return(NULL);
10753 }
10754 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
10755 if (sax->initialized == XML_SAX2_MAGIC)
10756 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
10757 else
10758 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
10759 if (user_data != NULL)
10760 ctxt->userData = user_data;
10761 }
10762 if (filename == NULL) {
10763 ctxt->directory = NULL;
10764 } else {
10765 ctxt->directory = xmlParserGetDirectory(filename);
10766 }
10767
10768 inputStream = xmlNewInputStream(ctxt);
10769 if (inputStream == NULL) {
10770 xmlFreeParserCtxt(ctxt);
10771 xmlFreeParserInputBuffer(buf);
10772 return(NULL);
10773 }
10774
10775 if (filename == NULL)
10776 inputStream->filename = NULL;
10777 else {
10778 inputStream->filename = (char *)
10779 xmlCanonicPath((const xmlChar *) filename);
10780 if (inputStream->filename == NULL) {
10781 xmlFreeParserCtxt(ctxt);
10782 xmlFreeParserInputBuffer(buf);
10783 return(NULL);
10784 }
10785 }
10786 inputStream->buf = buf;
10787 inputStream->base = inputStream->buf->buffer->content;
10788 inputStream->cur = inputStream->buf->buffer->content;
10789 inputStream->end =
10790 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
10791
10792 inputPush(ctxt, inputStream);
10793
10794 /*
10795 * If the caller didn't provide an initial 'chunk' for determining
10796 * the encoding, we set the context to XML_CHAR_ENCODING_NONE so
10797 * that it can be automatically determined later
10798 */
10799 if ((size == 0) || (chunk == NULL)) {
10800 ctxt->charset = XML_CHAR_ENCODING_NONE;
10801 } else if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) {
10802 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
10803 int cur = ctxt->input->cur - ctxt->input->base;
10804
10805 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
10806
10807 ctxt->input->base = ctxt->input->buf->buffer->content + base;
10808 ctxt->input->cur = ctxt->input->base + cur;
10809 ctxt->input->end =
10810 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
10811#ifdef DEBUG_PUSH
10812 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
10813#endif
10814 }
10815
10816 if (enc != XML_CHAR_ENCODING_NONE) {
10817 xmlSwitchEncoding(ctxt, enc);
10818 }
10819
10820 return(ctxt);
10821}
10822#endif /* LIBXML_PUSH_ENABLED */
10823
10824/**
10825 * xmlStopParser:
10826 * @ctxt: an XML parser context
10827 *
10828 * Blocks further parser processing
10829 */
10830void
10831xmlStopParser(xmlParserCtxtPtr ctxt) {
10832 if (ctxt == NULL)
10833 return;
10834 ctxt->instate = XML_PARSER_EOF;
10835 ctxt->disableSAX = 1;
10836 if (ctxt->input != NULL) {
10837 ctxt->input->cur = BAD_CAST"";
10838 ctxt->input->base = ctxt->input->cur;
10839 }
10840}
10841
10842/**
10843 * xmlCreateIOParserCtxt:
10844 * @sax: a SAX handler
10845 * @user_data: The user data returned on SAX callbacks
10846 * @ioread: an I/O read function
10847 * @ioclose: an I/O close function
10848 * @ioctx: an I/O handler
10849 * @enc: the charset encoding if known
10850 *
10851 * Create a parser context for using the XML parser with an existing
10852 * I/O stream
10853 *
10854 * Returns the new parser context or NULL
10855 */
10856xmlParserCtxtPtr
10857xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
10858 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
10859 void *ioctx, xmlCharEncoding enc) {
10860 xmlParserCtxtPtr ctxt;
10861 xmlParserInputPtr inputStream;
10862 xmlParserInputBufferPtr buf;
10863
10864 if (ioread == NULL) return(NULL);
10865
10866 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
10867 if (buf == NULL) return(NULL);
10868
10869 ctxt = xmlNewParserCtxt();
10870 if (ctxt == NULL) {
10871 xmlFreeParserInputBuffer(buf);
10872 return(NULL);
10873 }
10874 if (sax != NULL) {
10875#ifdef LIBXML_SAX1_ENABLED
10876 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
10877#endif /* LIBXML_SAX1_ENABLED */
10878 xmlFree(ctxt->sax);
10879 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
10880 if (ctxt->sax == NULL) {
10881 xmlErrMemory(ctxt, NULL);
10882 xmlFreeParserCtxt(ctxt);
10883 return(NULL);
10884 }
10885 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
10886 if (sax->initialized == XML_SAX2_MAGIC)
10887 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
10888 else
10889 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
10890 if (user_data != NULL)
10891 ctxt->userData = user_data;
10892 }
10893
10894 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
10895 if (inputStream == NULL) {
10896 xmlFreeParserCtxt(ctxt);
10897 return(NULL);
10898 }
10899 inputPush(ctxt, inputStream);
10900
10901 return(ctxt);
10902}
10903
10904#ifdef LIBXML_VALID_ENABLED
10905/************************************************************************
10906 * *
10907 * Front ends when parsing a DTD *
10908 * *
10909 ************************************************************************/
10910
10911/**
10912 * xmlIOParseDTD:
10913 * @sax: the SAX handler block or NULL
10914 * @input: an Input Buffer
10915 * @enc: the charset encoding if known
10916 *
10917 * Load and parse a DTD
10918 *
10919 * Returns the resulting xmlDtdPtr or NULL in case of error.
10920 * @input will be freed by the function in any case.
10921 */
10922
10923xmlDtdPtr
10924xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
10925 xmlCharEncoding enc) {
10926 xmlDtdPtr ret = NULL;
10927 xmlParserCtxtPtr ctxt;
10928 xmlParserInputPtr pinput = NULL;
10929 xmlChar start[4];
10930
10931 if (input == NULL)
10932 return(NULL);
10933
10934 ctxt = xmlNewParserCtxt();
10935 if (ctxt == NULL) {
10936 xmlFreeParserInputBuffer(input);
10937 return(NULL);
10938 }
10939
10940 /*
10941 * Set-up the SAX context
10942 */
10943 if (sax != NULL) {
10944 if (ctxt->sax != NULL)
10945 xmlFree(ctxt->sax);
10946 ctxt->sax = sax;
10947 ctxt->userData = ctxt;
10948 }
10949 xmlDetectSAX2(ctxt);
10950
10951 /*
10952 * generate a parser input from the I/O handler
10953 */
10954
10955 pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
10956 if (pinput == NULL) {
10957 if (sax != NULL) ctxt->sax = NULL;
10958 xmlFreeParserInputBuffer(input);
10959 xmlFreeParserCtxt(ctxt);
10960 return(NULL);
10961 }
10962
10963 /*
10964 * plug some encoding conversion routines here.
10965 */
10966 xmlPushInput(ctxt, pinput);
10967 if (enc != XML_CHAR_ENCODING_NONE) {
10968 xmlSwitchEncoding(ctxt, enc);
10969 }
10970
10971 pinput->filename = NULL;
10972 pinput->line = 1;
10973 pinput->col = 1;
10974 pinput->base = ctxt->input->cur;
10975 pinput->cur = ctxt->input->cur;
10976 pinput->free = NULL;
10977
10978 /*
10979 * let's parse that entity knowing it's an external subset.
10980 */
10981 ctxt->inSubset = 2;
10982 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
10983 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
10984 BAD_CAST "none", BAD_CAST "none");
10985
10986 if ((enc == XML_CHAR_ENCODING_NONE) &&
10987 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
10988 /*
10989 * Get the 4 first bytes and decode the charset
10990 * if enc != XML_CHAR_ENCODING_NONE
10991 * plug some encoding conversion routines.
10992 */
10993 start[0] = RAW;
10994 start[1] = NXT(1);
10995 start[2] = NXT(2);
10996 start[3] = NXT(3);
10997 enc = xmlDetectCharEncoding(start, 4);
10998 if (enc != XML_CHAR_ENCODING_NONE) {
10999 xmlSwitchEncoding(ctxt, enc);
11000 }
11001 }
11002
11003 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
11004
11005 if (ctxt->myDoc != NULL) {
11006 if (ctxt->wellFormed) {
11007 ret = ctxt->myDoc->extSubset;
11008 ctxt->myDoc->extSubset = NULL;
11009 if (ret != NULL) {
11010 xmlNodePtr tmp;
11011
11012 ret->doc = NULL;
11013 tmp = ret->children;
11014 while (tmp != NULL) {
11015 tmp->doc = NULL;
11016 tmp = tmp->next;
11017 }
11018 }
11019 } else {
11020 ret = NULL;
11021 }
11022 xmlFreeDoc(ctxt->myDoc);
11023 ctxt->myDoc = NULL;
11024 }
11025 if (sax != NULL) ctxt->sax = NULL;
11026 xmlFreeParserCtxt(ctxt);
11027
11028 return(ret);
11029}
11030
11031/**
11032 * xmlSAXParseDTD:
11033 * @sax: the SAX handler block
11034 * @ExternalID: a NAME* containing the External ID of the DTD
11035 * @SystemID: a NAME* containing the URL to the DTD
11036 *
11037 * Load and parse an external subset.
11038 *
11039 * Returns the resulting xmlDtdPtr or NULL in case of error.
11040 */
11041
11042xmlDtdPtr
11043xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
11044 const xmlChar *SystemID) {
11045 xmlDtdPtr ret = NULL;
11046 xmlParserCtxtPtr ctxt;
11047 xmlParserInputPtr input = NULL;
11048 xmlCharEncoding enc;
11049 xmlChar* systemIdCanonic;
11050
11051 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
11052
11053 ctxt = xmlNewParserCtxt();
11054 if (ctxt == NULL) {
11055 return(NULL);
11056 }
11057
11058 /*
11059 * Set-up the SAX context
11060 */
11061 if (sax != NULL) {
11062 if (ctxt->sax != NULL)
11063 xmlFree(ctxt->sax);
11064 ctxt->sax = sax;
11065 ctxt->userData = ctxt;
11066 }
11067
11068 /*
11069 * Canonicalise the system ID
11070 */
11071 systemIdCanonic = xmlCanonicPath(SystemID);
11072 if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
11073 xmlFreeParserCtxt(ctxt);
11074 return(NULL);
11075 }
11076
11077 /*
11078 * Ask the Entity resolver to load the damn thing
11079 */
11080
11081 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
11082 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
11083 systemIdCanonic);
11084 if (input == NULL) {
11085 if (sax != NULL) ctxt->sax = NULL;
11086 xmlFreeParserCtxt(ctxt);
11087 if (systemIdCanonic != NULL)
11088 xmlFree(systemIdCanonic);
11089 return(NULL);
11090 }
11091
11092 /*
11093 * plug some encoding conversion routines here.
11094 */
11095 xmlPushInput(ctxt, input);
11096 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
11097 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
11098 xmlSwitchEncoding(ctxt, enc);
11099 }
11100
11101 if (input->filename == NULL)
11102 input->filename = (char *) systemIdCanonic;
11103 else
11104 xmlFree(systemIdCanonic);
11105 input->line = 1;
11106 input->col = 1;
11107 input->base = ctxt->input->cur;
11108 input->cur = ctxt->input->cur;
11109 input->free = NULL;
11110
11111 /*
11112 * let's parse that entity knowing it's an external subset.
11113 */
11114 ctxt->inSubset = 2;
11115 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
11116 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
11117 ExternalID, SystemID);
11118 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
11119
11120 if (ctxt->myDoc != NULL) {
11121 if (ctxt->wellFormed) {
11122 ret = ctxt->myDoc->extSubset;
11123 ctxt->myDoc->extSubset = NULL;
11124 if (ret != NULL) {
11125 xmlNodePtr tmp;
11126
11127 ret->doc = NULL;
11128 tmp = ret->children;
11129 while (tmp != NULL) {
11130 tmp->doc = NULL;
11131 tmp = tmp->next;
11132 }
11133 }
11134 } else {
11135 ret = NULL;
11136 }
11137 xmlFreeDoc(ctxt->myDoc);
11138 ctxt->myDoc = NULL;
11139 }
11140 if (sax != NULL) ctxt->sax = NULL;
11141 xmlFreeParserCtxt(ctxt);
11142
11143 return(ret);
11144}
11145
11146
11147/**
11148 * xmlParseDTD:
11149 * @ExternalID: a NAME* containing the External ID of the DTD
11150 * @SystemID: a NAME* containing the URL to the DTD
11151 *
11152 * Load and parse an external subset.
11153 *
11154 * Returns the resulting xmlDtdPtr or NULL in case of error.
11155 */
11156
11157xmlDtdPtr
11158xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
11159 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
11160}
11161#endif /* LIBXML_VALID_ENABLED */
11162
11163/************************************************************************
11164 * *
11165 * Front ends when parsing an Entity *
11166 * *
11167 ************************************************************************/
11168
11169/**
11170 * xmlParseCtxtExternalEntity:
11171 * @ctx: the existing parsing context
11172 * @URL: the URL for the entity to load
11173 * @ID: the System ID for the entity to load
11174 * @lst: the return value for the set of parsed nodes
11175 *
11176 * Parse an external general entity within an existing parsing context
11177 * An external general parsed entity is well-formed if it matches the
11178 * production labeled extParsedEnt.
11179 *
11180 * [78] extParsedEnt ::= TextDecl? content
11181 *
11182 * Returns 0 if the entity is well formed, -1 in case of args problem and
11183 * the parser error code otherwise
11184 */
11185
11186int
11187xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
11188 const xmlChar *ID, xmlNodePtr *lst) {
11189 xmlParserCtxtPtr ctxt;
11190 xmlDocPtr newDoc;
11191 xmlNodePtr newRoot;
11192 xmlSAXHandlerPtr oldsax = NULL;
11193 int ret = 0;
11194 xmlChar start[4];
11195 xmlCharEncoding enc;
11196 xmlParserInputPtr inputStream;
11197 char *directory = NULL;
11198
11199 if (ctx == NULL) return(-1);
11200
11201 if (ctx->depth > 40) {
11202 return(XML_ERR_ENTITY_LOOP);
11203 }
11204
11205 if (lst != NULL)
11206 *lst = NULL;
11207 if ((URL == NULL) && (ID == NULL))
11208 return(-1);
11209 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
11210 return(-1);
11211
11212 ctxt = xmlNewParserCtxt();
11213 if (ctxt == NULL) {
11214 return(-1);
11215 }
11216
11217 ctxt->userData = ctxt;
11218 ctxt->_private = ctx->_private;
11219
11220 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
11221 if (inputStream == NULL) {
11222 xmlFreeParserCtxt(ctxt);
11223 return(-1);
11224 }
11225
11226 inputPush(ctxt, inputStream);
11227
11228 if ((ctxt->directory == NULL) && (directory == NULL))
11229 directory = xmlParserGetDirectory((char *)URL);
11230 if ((ctxt->directory == NULL) && (directory != NULL))
11231 ctxt->directory = directory;
11232
11233 oldsax = ctxt->sax;
11234 ctxt->sax = ctx->sax;
11235 xmlDetectSAX2(ctxt);
11236 newDoc = xmlNewDoc(BAD_CAST "1.0");
11237 if (newDoc == NULL) {
11238 xmlFreeParserCtxt(ctxt);
11239 return(-1);
11240 }
11241 if (ctx->myDoc->dict) {
11242 newDoc->dict = ctx->myDoc->dict;
11243 xmlDictReference(newDoc->dict);
11244 }
11245 if (ctx->myDoc != NULL) {
11246 newDoc->intSubset = ctx->myDoc->intSubset;
11247 newDoc->extSubset = ctx->myDoc->extSubset;
11248 }
11249 if (ctx->myDoc->URL != NULL) {
11250 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
11251 }
11252 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
11253 if (newRoot == NULL) {
11254 ctxt->sax = oldsax;
11255 xmlFreeParserCtxt(ctxt);
11256 newDoc->intSubset = NULL;
11257 newDoc->extSubset = NULL;
11258 xmlFreeDoc(newDoc);
11259 return(-1);
11260 }
11261 xmlAddChild((xmlNodePtr) newDoc, newRoot);
11262 nodePush(ctxt, newDoc->children);
11263 if (ctx->myDoc == NULL) {
11264 ctxt->myDoc = newDoc;
11265 } else {
11266 ctxt->myDoc = ctx->myDoc;
11267 newDoc->children->doc = ctx->myDoc;
11268 }
11269
11270 /*
11271 * Get the 4 first bytes and decode the charset
11272 * if enc != XML_CHAR_ENCODING_NONE
11273 * plug some encoding conversion routines.
11274 */
11275 GROW
11276 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
11277 start[0] = RAW;
11278 start[1] = NXT(1);
11279 start[2] = NXT(2);
11280 start[3] = NXT(3);
11281 enc = xmlDetectCharEncoding(start, 4);
11282 if (enc != XML_CHAR_ENCODING_NONE) {
11283 xmlSwitchEncoding(ctxt, enc);
11284 }
11285 }
11286
11287 /*
11288 * Parse a possible text declaration first
11289 */
11290 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
11291 xmlParseTextDecl(ctxt);
11292 }
11293
11294 /*
11295 * Doing validity checking on chunk doesn't make sense
11296 */
11297 ctxt->instate = XML_PARSER_CONTENT;
11298 ctxt->validate = ctx->validate;
11299 ctxt->valid = ctx->valid;
11300 ctxt->loadsubset = ctx->loadsubset;
11301 ctxt->depth = ctx->depth + 1;
11302 ctxt->replaceEntities = ctx->replaceEntities;
11303 if (ctxt->validate) {
11304 ctxt->vctxt.error = ctx->vctxt.error;
11305 ctxt->vctxt.warning = ctx->vctxt.warning;
11306 } else {
11307 ctxt->vctxt.error = NULL;
11308 ctxt->vctxt.warning = NULL;
11309 }
11310 ctxt->vctxt.nodeTab = NULL;
11311 ctxt->vctxt.nodeNr = 0;
11312 ctxt->vctxt.nodeMax = 0;
11313 ctxt->vctxt.node = NULL;
11314 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
11315 ctxt->dict = ctx->dict;
11316 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
11317 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
11318 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
11319 ctxt->dictNames = ctx->dictNames;
11320 ctxt->attsDefault = ctx->attsDefault;
11321 ctxt->attsSpecial = ctx->attsSpecial;
11322 ctxt->linenumbers = ctx->linenumbers;
11323
11324 xmlParseContent(ctxt);
11325
11326 ctx->validate = ctxt->validate;
11327 ctx->valid = ctxt->valid;
11328 if ((RAW == '<') && (NXT(1) == '/')) {
11329 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
11330 } else if (RAW != 0) {
11331 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
11332 }
11333 if (ctxt->node != newDoc->children) {
11334 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
11335 }
11336
11337 if (!ctxt->wellFormed) {
11338 if (ctxt->errNo == 0)
11339 ret = 1;
11340 else
11341 ret = ctxt->errNo;
11342 } else {
11343 if (lst != NULL) {
11344 xmlNodePtr cur;
11345
11346 /*
11347 * Return the newly created nodeset after unlinking it from
11348 * they pseudo parent.
11349 */
11350 cur = newDoc->children->children;
11351 *lst = cur;
11352 while (cur != NULL) {
11353 cur->parent = NULL;
11354 cur = cur->next;
11355 }
11356 newDoc->children->children = NULL;
11357 }
11358 ret = 0;
11359 }
11360 ctxt->sax = oldsax;
11361 ctxt->dict = NULL;
11362 ctxt->attsDefault = NULL;
11363 ctxt->attsSpecial = NULL;
11364 xmlFreeParserCtxt(ctxt);
11365 newDoc->intSubset = NULL;
11366 newDoc->extSubset = NULL;
11367 xmlFreeDoc(newDoc);
11368
11369 return(ret);
11370}
11371
11372/**
11373 * xmlParseExternalEntityPrivate:
11374 * @doc: the document the chunk pertains to
11375 * @oldctxt: the previous parser context if available
11376 * @sax: the SAX handler bloc (possibly NULL)
11377 * @user_data: The user data returned on SAX callbacks (possibly NULL)
11378 * @depth: Used for loop detection, use 0
11379 * @URL: the URL for the entity to load
11380 * @ID: the System ID for the entity to load
11381 * @list: the return value for the set of parsed nodes
11382 *
11383 * Private version of xmlParseExternalEntity()
11384 *
11385 * Returns 0 if the entity is well formed, -1 in case of args problem and
11386 * the parser error code otherwise
11387 */
11388
11389static xmlParserErrors
11390xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
11391 xmlSAXHandlerPtr sax,
11392 void *user_data, int depth, const xmlChar *URL,
11393 const xmlChar *ID, xmlNodePtr *list) {
11394 xmlParserCtxtPtr ctxt;
11395 xmlDocPtr newDoc;
11396 xmlNodePtr newRoot;
11397 xmlSAXHandlerPtr oldsax = NULL;
11398 xmlParserErrors ret = XML_ERR_OK;
11399 xmlChar start[4];
11400 xmlCharEncoding enc;
11401
11402 if (depth > 40) {
11403 return(XML_ERR_ENTITY_LOOP);
11404 }
11405
11406
11407
11408 if (list != NULL)
11409 *list = NULL;
11410 if ((URL == NULL) && (ID == NULL))
11411 return(XML_ERR_INTERNAL_ERROR);
11412 if (doc == NULL)
11413 return(XML_ERR_INTERNAL_ERROR);
11414
11415
11416 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
11417 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
11418 ctxt->userData = ctxt;
11419 if (oldctxt != NULL) {
11420 ctxt->_private = oldctxt->_private;
11421 ctxt->loadsubset = oldctxt->loadsubset;
11422 ctxt->validate = oldctxt->validate;
11423 ctxt->external = oldctxt->external;
11424 ctxt->record_info = oldctxt->record_info;
11425 ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
11426 ctxt->node_seq.length = oldctxt->node_seq.length;
11427 ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
11428 } else {
11429 /*
11430 * Doing validity checking on chunk without context
11431 * doesn't make sense
11432 */
11433 ctxt->_private = NULL;
11434 ctxt->validate = 0;
11435 ctxt->external = 2;
11436 ctxt->loadsubset = 0;
11437 }
11438 if (sax != NULL) {
11439 oldsax = ctxt->sax;
11440 ctxt->sax = sax;
11441 if (user_data != NULL)
11442 ctxt->userData = user_data;
11443 }
11444 xmlDetectSAX2(ctxt);
11445 newDoc = xmlNewDoc(BAD_CAST "1.0");
11446 if (newDoc == NULL) {
11447 ctxt->node_seq.maximum = 0;
11448 ctxt->node_seq.length = 0;
11449 ctxt->node_seq.buffer = NULL;
11450 xmlFreeParserCtxt(ctxt);
11451 return(XML_ERR_INTERNAL_ERROR);
11452 }
11453 newDoc->intSubset = doc->intSubset;
11454 newDoc->extSubset = doc->extSubset;
11455 newDoc->dict = doc->dict;
11456 xmlDictReference(newDoc->dict);
11457
11458 if (doc->URL != NULL) {
11459 newDoc->URL = xmlStrdup(doc->URL);
11460 }
11461 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
11462 if (newRoot == NULL) {
11463 if (sax != NULL)
11464 ctxt->sax = oldsax;
11465 ctxt->node_seq.maximum = 0;
11466 ctxt->node_seq.length = 0;
11467 ctxt->node_seq.buffer = NULL;
11468 xmlFreeParserCtxt(ctxt);
11469 newDoc->intSubset = NULL;
11470 newDoc->extSubset = NULL;
11471 xmlFreeDoc(newDoc);
11472 return(XML_ERR_INTERNAL_ERROR);
11473 }
11474 xmlAddChild((xmlNodePtr) newDoc, newRoot);
11475 nodePush(ctxt, newDoc->children);
11476 ctxt->myDoc = doc;
11477 newRoot->doc = doc;
11478
11479 /*
11480 * Get the 4 first bytes and decode the charset
11481 * if enc != XML_CHAR_ENCODING_NONE
11482 * plug some encoding conversion routines.
11483 */
11484 GROW;
11485 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
11486 start[0] = RAW;
11487 start[1] = NXT(1);
11488 start[2] = NXT(2);
11489 start[3] = NXT(3);
11490 enc = xmlDetectCharEncoding(start, 4);
11491 if (enc != XML_CHAR_ENCODING_NONE) {
11492 xmlSwitchEncoding(ctxt, enc);
11493 }
11494 }
11495
11496 /*
11497 * Parse a possible text declaration first
11498 */
11499 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
11500 xmlParseTextDecl(ctxt);
11501 }
11502
11503 ctxt->instate = XML_PARSER_CONTENT;
11504 ctxt->depth = depth;
11505
11506 xmlParseContent(ctxt);
11507
11508 if ((RAW == '<') && (NXT(1) == '/')) {
11509 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
11510 } else if (RAW != 0) {
11511 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
11512 }
11513 if (ctxt->node != newDoc->children) {
11514 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
11515 }
11516
11517 if (!ctxt->wellFormed) {
11518 if (ctxt->errNo == 0)
11519 ret = XML_ERR_INTERNAL_ERROR;
11520 else
11521 ret = (xmlParserErrors)ctxt->errNo;
11522 } else {
11523 if (list != NULL) {
11524 xmlNodePtr cur;
11525
11526 /*
11527 * Return the newly created nodeset after unlinking it from
11528 * they pseudo parent.
11529 */
11530 cur = newDoc->children->children;
11531 *list = cur;
11532 while (cur != NULL) {
11533 cur->parent = NULL;
11534 cur = cur->next;
11535 }
11536 newDoc->children->children = NULL;
11537 }
11538 ret = XML_ERR_OK;
11539 }
11540 if (sax != NULL)
11541 ctxt->sax = oldsax;
11542 oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
11543 oldctxt->node_seq.length = ctxt->node_seq.length;
11544 oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
11545 ctxt->node_seq.maximum = 0;
11546 ctxt->node_seq.length = 0;
11547 ctxt->node_seq.buffer = NULL;
11548 xmlFreeParserCtxt(ctxt);
11549 newDoc->intSubset = NULL;
11550 newDoc->extSubset = NULL;
11551 xmlFreeDoc(newDoc);
11552
11553 return(ret);
11554}
11555
11556#ifdef LIBXML_SAX1_ENABLED
11557/**
11558 * xmlParseExternalEntity:
11559 * @doc: the document the chunk pertains to
11560 * @sax: the SAX handler bloc (possibly NULL)
11561 * @user_data: The user data returned on SAX callbacks (possibly NULL)
11562 * @depth: Used for loop detection, use 0
11563 * @URL: the URL for the entity to load
11564 * @ID: the System ID for the entity to load
11565 * @lst: the return value for the set of parsed nodes
11566 *
11567 * Parse an external general entity
11568 * An external general parsed entity is well-formed if it matches the
11569 * production labeled extParsedEnt.
11570 *
11571 * [78] extParsedEnt ::= TextDecl? content
11572 *
11573 * Returns 0 if the entity is well formed, -1 in case of args problem and
11574 * the parser error code otherwise
11575 */
11576
11577int
11578xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
11579 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
11580 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
11581 ID, lst));
11582}
11583
11584/**
11585 * xmlParseBalancedChunkMemory:
11586 * @doc: the document the chunk pertains to
11587 * @sax: the SAX handler bloc (possibly NULL)
11588 * @user_data: The user data returned on SAX callbacks (possibly NULL)
11589 * @depth: Used for loop detection, use 0
11590 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
11591 * @lst: the return value for the set of parsed nodes
11592 *
11593 * Parse a well-balanced chunk of an XML document
11594 * called by the parser
11595 * The allowed sequence for the Well Balanced Chunk is the one defined by
11596 * the content production in the XML grammar:
11597 *
11598 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
11599 *
11600 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
11601 * the parser error code otherwise
11602 */
11603
11604int
11605xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
11606 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
11607 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
11608 depth, string, lst, 0 );
11609}
11610#endif /* LIBXML_SAX1_ENABLED */
11611
11612/**
11613 * xmlParseBalancedChunkMemoryInternal:
11614 * @oldctxt: the existing parsing context
11615 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
11616 * @user_data: the user data field for the parser context
11617 * @lst: the return value for the set of parsed nodes
11618 *
11619 *
11620 * Parse a well-balanced chunk of an XML document
11621 * called by the parser
11622 * The allowed sequence for the Well Balanced Chunk is the one defined by
11623 * the content production in the XML grammar:
11624 *
11625 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
11626 *
11627 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
11628 * error code otherwise
11629 *
11630 * In case recover is set to 1, the nodelist will not be empty even if
11631 * the parsed chunk is not well balanced.
11632 */
11633static xmlParserErrors
11634xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
11635 const xmlChar *string, void *user_data, xmlNodePtr *lst) {
11636 xmlParserCtxtPtr ctxt;
11637 xmlDocPtr newDoc = NULL;
11638 xmlNodePtr newRoot;
11639 xmlSAXHandlerPtr oldsax = NULL;
11640 xmlNodePtr content = NULL;
11641 xmlNodePtr last = NULL;
11642 int size;
11643 xmlParserErrors ret = XML_ERR_OK;
11644
11645 if (oldctxt->depth > 40) {
11646 return(XML_ERR_ENTITY_LOOP);
11647 }
11648
11649
11650 if (lst != NULL)
11651 *lst = NULL;
11652 if (string == NULL)
11653 return(XML_ERR_INTERNAL_ERROR);
11654
11655 size = xmlStrlen(string);
11656
11657 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
11658 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
11659 if (user_data != NULL)
11660 ctxt->userData = user_data;
11661 else
11662 ctxt->userData = ctxt;
11663 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
11664 ctxt->dict = oldctxt->dict;
11665 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
11666 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
11667 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
11668
11669 oldsax = ctxt->sax;
11670 ctxt->sax = oldctxt->sax;
11671 xmlDetectSAX2(ctxt);
11672 ctxt->replaceEntities = oldctxt->replaceEntities;
11673 ctxt->options = oldctxt->options;
11674
11675 ctxt->_private = oldctxt->_private;
11676 if (oldctxt->myDoc == NULL) {
11677 newDoc = xmlNewDoc(BAD_CAST "1.0");
11678 if (newDoc == NULL) {
11679 ctxt->sax = oldsax;
11680 ctxt->dict = NULL;
11681 xmlFreeParserCtxt(ctxt);
11682 return(XML_ERR_INTERNAL_ERROR);
11683 }
11684 newDoc->dict = ctxt->dict;
11685 xmlDictReference(newDoc->dict);
11686 ctxt->myDoc = newDoc;
11687 } else {
11688 ctxt->myDoc = oldctxt->myDoc;
11689 content = ctxt->myDoc->children;
11690 last = ctxt->myDoc->last;
11691 }
11692 newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL);
11693 if (newRoot == NULL) {
11694 ctxt->sax = oldsax;
11695 ctxt->dict = NULL;
11696 xmlFreeParserCtxt(ctxt);
11697 if (newDoc != NULL) {
11698 xmlFreeDoc(newDoc);
11699 }
11700 return(XML_ERR_INTERNAL_ERROR);
11701 }
11702 ctxt->myDoc->children = NULL;
11703 ctxt->myDoc->last = NULL;
11704 xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot);
11705 nodePush(ctxt, ctxt->myDoc->children);
11706 ctxt->instate = XML_PARSER_CONTENT;
11707 ctxt->depth = oldctxt->depth + 1;
11708
11709 ctxt->validate = 0;
11710 ctxt->loadsubset = oldctxt->loadsubset;
11711 if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
11712 /*
11713 * ID/IDREF registration will be done in xmlValidateElement below
11714 */
11715 ctxt->loadsubset |= XML_SKIP_IDS;
11716 }
11717 ctxt->dictNames = oldctxt->dictNames;
11718 ctxt->attsDefault = oldctxt->attsDefault;
11719 ctxt->attsSpecial = oldctxt->attsSpecial;
11720
11721 xmlParseContent(ctxt);
11722 if ((RAW == '<') && (NXT(1) == '/')) {
11723 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
11724 } else if (RAW != 0) {
11725 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
11726 }
11727 if (ctxt->node != ctxt->myDoc->children) {
11728 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
11729 }
11730
11731 if (!ctxt->wellFormed) {
11732 if (ctxt->errNo == 0)
11733 ret = XML_ERR_INTERNAL_ERROR;
11734 else
11735 ret = (xmlParserErrors)ctxt->errNo;
11736 } else {
11737 ret = XML_ERR_OK;
11738 }
11739
11740 if ((lst != NULL) && (ret == XML_ERR_OK)) {
11741 xmlNodePtr cur;
11742
11743 /*
11744 * Return the newly created nodeset after unlinking it from
11745 * they pseudo parent.
11746 */
11747 cur = ctxt->myDoc->children->children;
11748 *lst = cur;
11749 while (cur != NULL) {
11750#ifdef LIBXML_VALID_ENABLED
11751 if ((oldctxt->validate) && (oldctxt->wellFormed) &&
11752 (oldctxt->myDoc) && (oldctxt->myDoc->intSubset) &&
11753 (cur->type == XML_ELEMENT_NODE)) {
11754 oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
11755 oldctxt->myDoc, cur);
11756 }
11757#endif /* LIBXML_VALID_ENABLED */
11758 cur->parent = NULL;
11759 cur = cur->next;
11760 }
11761 ctxt->myDoc->children->children = NULL;
11762 }
11763 if (ctxt->myDoc != NULL) {
11764 xmlFreeNode(ctxt->myDoc->children);
11765 ctxt->myDoc->children = content;
11766 ctxt->myDoc->last = last;
11767 }
11768
11769 ctxt->sax = oldsax;
11770 ctxt->dict = NULL;
11771 ctxt->attsDefault = NULL;
11772 ctxt->attsSpecial = NULL;
11773 xmlFreeParserCtxt(ctxt);
11774 if (newDoc != NULL) {
11775 xmlFreeDoc(newDoc);
11776 }
11777
11778 return(ret);
11779}
11780
11781/**
11782 * xmlParseInNodeContext:
11783 * @node: the context node
11784 * @data: the input string
11785 * @datalen: the input string length in bytes
11786 * @options: a combination of xmlParserOption
11787 * @lst: the return value for the set of parsed nodes
11788 *
11789 * Parse a well-balanced chunk of an XML document
11790 * within the context (DTD, namespaces, etc ...) of the given node.
11791 *
11792 * The allowed sequence for the data is a Well Balanced Chunk defined by
11793 * the content production in the XML grammar:
11794 *
11795 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
11796 *
11797 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
11798 * error code otherwise
11799 */
11800xmlParserErrors
11801xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
11802 int options, xmlNodePtr *lst) {
11803#ifdef SAX2
11804 xmlParserCtxtPtr ctxt;
11805 xmlDocPtr doc = NULL;
11806 xmlNodePtr fake, cur;
11807 int nsnr = 0;
11808
11809 xmlParserErrors ret = XML_ERR_OK;
11810
11811 /*
11812 * check all input parameters, grab the document
11813 */
11814 if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
11815 return(XML_ERR_INTERNAL_ERROR);
11816 switch (node->type) {
11817 case XML_ELEMENT_NODE:
11818 case XML_ATTRIBUTE_NODE:
11819 case XML_TEXT_NODE:
11820 case XML_CDATA_SECTION_NODE:
11821 case XML_ENTITY_REF_NODE:
11822 case XML_PI_NODE:
11823 case XML_COMMENT_NODE:
11824 case XML_DOCUMENT_NODE:
11825 case XML_HTML_DOCUMENT_NODE:
11826 break;
11827 default:
11828 return(XML_ERR_INTERNAL_ERROR);
11829
11830 }
11831 while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
11832 (node->type != XML_DOCUMENT_NODE) &&
11833 (node->type != XML_HTML_DOCUMENT_NODE))
11834 node = node->parent;
11835 if (node == NULL)
11836 return(XML_ERR_INTERNAL_ERROR);
11837 if (node->type == XML_ELEMENT_NODE)
11838 doc = node->doc;
11839 else
11840 doc = (xmlDocPtr) node;
11841 if (doc == NULL)
11842 return(XML_ERR_INTERNAL_ERROR);
11843
11844 /*
11845 * allocate a context and set-up everything not related to the
11846 * node position in the tree
11847 */
11848 if (doc->type == XML_DOCUMENT_NODE)
11849 ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
11850#ifdef LIBXML_HTML_ENABLED
11851 else if (doc->type == XML_HTML_DOCUMENT_NODE)
11852 ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
11853#endif
11854 else
11855 return(XML_ERR_INTERNAL_ERROR);
11856
11857 if (ctxt == NULL)
11858 return(XML_ERR_NO_MEMORY);
11859 fake = xmlNewComment(NULL);
11860 if (fake == NULL) {
11861 xmlFreeParserCtxt(ctxt);
11862 return(XML_ERR_NO_MEMORY);
11863 }
11864 xmlAddChild(node, fake);
11865
11866 /*
11867 * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
11868 * We need a dictionary for xmlDetectSAX2, so if there's no doc dict
11869 * we must wait until the last moment to free the original one.
11870 */
11871 if (doc->dict != NULL) {
11872 if (ctxt->dict != NULL)
11873 xmlDictFree(ctxt->dict);
11874 ctxt->dict = doc->dict;
11875 } else
11876 options |= XML_PARSE_NODICT;
11877
11878 xmlCtxtUseOptions(ctxt, options);
11879 xmlDetectSAX2(ctxt);
11880 ctxt->myDoc = doc;
11881
11882 if (node->type == XML_ELEMENT_NODE) {
11883 nodePush(ctxt, node);
11884 /*
11885 * initialize the SAX2 namespaces stack
11886 */
11887 cur = node;
11888 while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
11889 xmlNsPtr ns = cur->nsDef;
11890 const xmlChar *iprefix, *ihref;
11891
11892 while (ns != NULL) {
11893 if (ctxt->dict) {
11894 iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1);
11895 ihref = xmlDictLookup(ctxt->dict, ns->href, -1);
11896 } else {
11897 iprefix = ns->prefix;
11898 ihref = ns->href;
11899 }
11900
11901 if (xmlGetNamespace(ctxt, iprefix) == NULL) {
11902 nsPush(ctxt, iprefix, ihref);
11903 nsnr++;
11904 }
11905 ns = ns->next;
11906 }
11907 cur = cur->parent;
11908 }
11909 ctxt->instate = XML_PARSER_CONTENT;
11910 }
11911
11912 if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
11913 /*
11914 * ID/IDREF registration will be done in xmlValidateElement below
11915 */
11916 ctxt->loadsubset |= XML_SKIP_IDS;
11917 }
11918
11919#ifdef LIBXML_HTML_ENABLED
11920 if (doc->type == XML_HTML_DOCUMENT_NODE)
11921 __htmlParseContent(ctxt);
11922 else
11923#endif
11924 xmlParseContent(ctxt);
11925
11926 nsPop(ctxt, nsnr);
11927 if ((RAW == '<') && (NXT(1) == '/')) {
11928 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
11929 } else if (RAW != 0) {
11930 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
11931 }
11932 if ((ctxt->node != NULL) && (ctxt->node != node)) {
11933 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
11934 ctxt->wellFormed = 0;
11935 }
11936
11937 if (!ctxt->wellFormed) {
11938 if (ctxt->errNo == 0)
11939 ret = XML_ERR_INTERNAL_ERROR;
11940 else
11941 ret = (xmlParserErrors)ctxt->errNo;
11942 } else {
11943 ret = XML_ERR_OK;
11944 }
11945
11946 /*
11947 * Return the newly created nodeset after unlinking it from
11948 * the pseudo sibling.
11949 */
11950
11951 cur = fake->next;
11952 fake->next = NULL;
11953 node->last = fake;
11954
11955 if (cur != NULL) {
11956 cur->prev = NULL;
11957 }
11958
11959 *lst = cur;
11960
11961 while (cur != NULL) {
11962 cur->parent = NULL;
11963 cur = cur->next;
11964 }
11965
11966 xmlUnlinkNode(fake);
11967 xmlFreeNode(fake);
11968
11969
11970 if (ret != XML_ERR_OK) {
11971 xmlFreeNodeList(*lst);
11972 *lst = NULL;
11973 }
11974
11975 if (doc->dict != NULL)
11976 ctxt->dict = NULL;
11977 xmlFreeParserCtxt(ctxt);
11978
11979 return(ret);
11980#else /* !SAX2 */
11981 return(XML_ERR_INTERNAL_ERROR);
11982#endif
11983}
11984
11985#ifdef LIBXML_SAX1_ENABLED
11986/**
11987 * xmlParseBalancedChunkMemoryRecover:
11988 * @doc: the document the chunk pertains to
11989 * @sax: the SAX handler bloc (possibly NULL)
11990 * @user_data: The user data returned on SAX callbacks (possibly NULL)
11991 * @depth: Used for loop detection, use 0
11992 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
11993 * @lst: the return value for the set of parsed nodes
11994 * @recover: return nodes even if the data is broken (use 0)
11995 *
11996 *
11997 * Parse a well-balanced chunk of an XML document
11998 * called by the parser
11999 * The allowed sequence for the Well Balanced Chunk is the one defined by
12000 * the content production in the XML grammar:
12001 *
12002 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12003 *
12004 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
12005 * the parser error code otherwise
12006 *
12007 * In case recover is set to 1, the nodelist will not be empty even if
12008 * the parsed chunk is not well balanced.
12009 */
12010int
12011xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
12012 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
12013 int recover) {
12014 xmlParserCtxtPtr ctxt;
12015 xmlDocPtr newDoc;
12016 xmlSAXHandlerPtr oldsax = NULL;
12017 xmlNodePtr content, newRoot;
12018 int size;
12019 int ret = 0;
12020
12021 if (depth > 40) {
12022 return(XML_ERR_ENTITY_LOOP);
12023 }
12024
12025
12026 if (lst != NULL)
12027 *lst = NULL;
12028 if (string == NULL)
12029 return(-1);
12030
12031 size = xmlStrlen(string);
12032
12033 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
12034 if (ctxt == NULL) return(-1);
12035 ctxt->userData = ctxt;
12036 if (sax != NULL) {
12037 oldsax = ctxt->sax;
12038 ctxt->sax = sax;
12039 if (user_data != NULL)
12040 ctxt->userData = user_data;
12041 }
12042 newDoc = xmlNewDoc(BAD_CAST "1.0");
12043 if (newDoc == NULL) {
12044 xmlFreeParserCtxt(ctxt);
12045 return(-1);
12046 }
12047 if ((doc != NULL) && (doc->dict != NULL)) {
12048 xmlDictFree(ctxt->dict);
12049 ctxt->dict = doc->dict;
12050 xmlDictReference(ctxt->dict);
12051 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
12052 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
12053 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
12054 ctxt->dictNames = 1;
12055 } else {
12056 xmlCtxtUseOptions(ctxt, XML_PARSE_NODICT);
12057 }
12058 if (doc != NULL) {
12059 newDoc->intSubset = doc->intSubset;
12060 newDoc->extSubset = doc->extSubset;
12061 }
12062 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
12063 if (newRoot == NULL) {
12064 if (sax != NULL)
12065 ctxt->sax = oldsax;
12066 xmlFreeParserCtxt(ctxt);
12067 newDoc->intSubset = NULL;
12068 newDoc->extSubset = NULL;
12069 xmlFreeDoc(newDoc);
12070 return(-1);
12071 }
12072 xmlAddChild((xmlNodePtr) newDoc, newRoot);
12073 nodePush(ctxt, newRoot);
12074 if (doc == NULL) {
12075 ctxt->myDoc = newDoc;
12076 } else {
12077 ctxt->myDoc = newDoc;
12078 newDoc->children->doc = doc;
12079 /* Ensure that doc has XML spec namespace */
12080 xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE);
12081 newDoc->oldNs = doc->oldNs;
12082 }
12083 ctxt->instate = XML_PARSER_CONTENT;
12084 ctxt->depth = depth;
12085
12086 /*
12087 * Doing validity checking on chunk doesn't make sense
12088 */
12089 ctxt->validate = 0;
12090 ctxt->loadsubset = 0;
12091 xmlDetectSAX2(ctxt);
12092
12093 if ( doc != NULL ){
12094 content = doc->children;
12095 doc->children = NULL;
12096 xmlParseContent(ctxt);
12097 doc->children = content;
12098 }
12099 else {
12100 xmlParseContent(ctxt);
12101 }
12102 if ((RAW == '<') && (NXT(1) == '/')) {
12103 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12104 } else if (RAW != 0) {
12105 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
12106 }
12107 if (ctxt->node != newDoc->children) {
12108 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12109 }
12110
12111 if (!ctxt->wellFormed) {
12112 if (ctxt->errNo == 0)
12113 ret = 1;
12114 else
12115 ret = ctxt->errNo;
12116 } else {
12117 ret = 0;
12118 }
12119
12120 if ((lst != NULL) && ((ret == 0) || (recover == 1))) {
12121 xmlNodePtr cur;
12122
12123 /*
12124 * Return the newly created nodeset after unlinking it from
12125 * they pseudo parent.
12126 */
12127 cur = newDoc->children->children;
12128 *lst = cur;
12129 while (cur != NULL) {
12130 xmlSetTreeDoc(cur, doc);
12131 cur->parent = NULL;
12132 cur = cur->next;
12133 }
12134 newDoc->children->children = NULL;
12135 }
12136
12137 if (sax != NULL)
12138 ctxt->sax = oldsax;
12139 xmlFreeParserCtxt(ctxt);
12140 newDoc->intSubset = NULL;
12141 newDoc->extSubset = NULL;
12142 newDoc->oldNs = NULL;
12143 xmlFreeDoc(newDoc);
12144
12145 return(ret);
12146}
12147
12148/**
12149 * xmlSAXParseEntity:
12150 * @sax: the SAX handler block
12151 * @filename: the filename
12152 *
12153 * parse an XML external entity out of context and build a tree.
12154 * It use the given SAX function block to handle the parsing callback.
12155 * If sax is NULL, fallback to the default DOM tree building routines.
12156 *
12157 * [78] extParsedEnt ::= TextDecl? content
12158 *
12159 * This correspond to a "Well Balanced" chunk
12160 *
12161 * Returns the resulting document tree
12162 */
12163
12164xmlDocPtr
12165xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
12166 xmlDocPtr ret;
12167 xmlParserCtxtPtr ctxt;
12168
12169 ctxt = xmlCreateFileParserCtxt(filename);
12170 if (ctxt == NULL) {
12171 return(NULL);
12172 }
12173 if (sax != NULL) {
12174 if (ctxt->sax != NULL)
12175 xmlFree(ctxt->sax);
12176 ctxt->sax = sax;
12177 ctxt->userData = NULL;
12178 }
12179
12180 xmlParseExtParsedEnt(ctxt);
12181
12182 if (ctxt->wellFormed)
12183 ret = ctxt->myDoc;
12184 else {
12185 ret = NULL;
12186 xmlFreeDoc(ctxt->myDoc);
12187 ctxt->myDoc = NULL;
12188 }
12189 if (sax != NULL)
12190 ctxt->sax = NULL;
12191 xmlFreeParserCtxt(ctxt);
12192
12193 return(ret);
12194}
12195
12196/**
12197 * xmlParseEntity:
12198 * @filename: the filename
12199 *
12200 * parse an XML external entity out of context and build a tree.
12201 *
12202 * [78] extParsedEnt ::= TextDecl? content
12203 *
12204 * This correspond to a "Well Balanced" chunk
12205 *
12206 * Returns the resulting document tree
12207 */
12208
12209xmlDocPtr
12210xmlParseEntity(const char *filename) {
12211 return(xmlSAXParseEntity(NULL, filename));
12212}
12213#endif /* LIBXML_SAX1_ENABLED */
12214
12215/**
12216 * xmlCreateEntityParserCtxt:
12217 * @URL: the entity URL
12218 * @ID: the entity PUBLIC ID
12219 * @base: a possible base for the target URI
12220 *
12221 * Create a parser context for an external entity
12222 * Automatic support for ZLIB/Compress compressed document is provided
12223 * by default if found at compile-time.
12224 *
12225 * Returns the new parser context or NULL
12226 */
12227xmlParserCtxtPtr
12228xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
12229 const xmlChar *base) {
12230 xmlParserCtxtPtr ctxt;
12231 xmlParserInputPtr inputStream;
12232 char *directory = NULL;
12233 xmlChar *uri;
12234
12235 ctxt = xmlNewParserCtxt();
12236 if (ctxt == NULL) {
12237 return(NULL);
12238 }
12239
12240 uri = xmlBuildURI(URL, base);
12241
12242 if (uri == NULL) {
12243 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
12244 if (inputStream == NULL) {
12245 xmlFreeParserCtxt(ctxt);
12246 return(NULL);
12247 }
12248
12249 inputPush(ctxt, inputStream);
12250
12251 if ((ctxt->directory == NULL) && (directory == NULL))
12252 directory = xmlParserGetDirectory((char *)URL);
12253 if ((ctxt->directory == NULL) && (directory != NULL))
12254 ctxt->directory = directory;
12255 } else {
12256 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
12257 if (inputStream == NULL) {
12258 xmlFree(uri);
12259 xmlFreeParserCtxt(ctxt);
12260 return(NULL);
12261 }
12262
12263 inputPush(ctxt, inputStream);
12264
12265 if ((ctxt->directory == NULL) && (directory == NULL))
12266 directory = xmlParserGetDirectory((char *)uri);
12267 if ((ctxt->directory == NULL) && (directory != NULL))
12268 ctxt->directory = directory;
12269 xmlFree(uri);
12270 }
12271 return(ctxt);
12272}
12273
12274/************************************************************************
12275 * *
12276 * Front ends when parsing from a file *
12277 * *
12278 ************************************************************************/
12279
12280/**
12281 * xmlCreateURLParserCtxt:
12282 * @filename: the filename or URL
12283 * @options: a combination of xmlParserOption
12284 *
12285 * Create a parser context for a file or URL content.
12286 * Automatic support for ZLIB/Compress compressed document is provided
12287 * by default if found at compile-time and for file accesses
12288 *
12289 * Returns the new parser context or NULL
12290 */
12291xmlParserCtxtPtr
12292xmlCreateURLParserCtxt(const char *filename, int options)
12293{
12294 xmlParserCtxtPtr ctxt;
12295 xmlParserInputPtr inputStream;
12296 char *directory = NULL;
12297
12298 ctxt = xmlNewParserCtxt();
12299 if (ctxt == NULL) {
12300 xmlErrMemory(NULL, "cannot allocate parser context");
12301 return(NULL);
12302 }
12303
12304 if (options)
12305 xmlCtxtUseOptions(ctxt, options);
12306 ctxt->linenumbers = 1;
12307
12308 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
12309 if (inputStream == NULL) {
12310 xmlFreeParserCtxt(ctxt);
12311 return(NULL);
12312 }
12313
12314 inputPush(ctxt, inputStream);
12315 if ((ctxt->directory == NULL) && (directory == NULL))
12316 directory = xmlParserGetDirectory(filename);
12317 if ((ctxt->directory == NULL) && (directory != NULL))
12318 ctxt->directory = directory;
12319
12320 return(ctxt);
12321}
12322
12323/**
12324 * xmlCreateFileParserCtxt:
12325 * @filename: the filename
12326 *
12327 * Create a parser context for a file content.
12328 * Automatic support for ZLIB/Compress compressed document is provided
12329 * by default if found at compile-time.
12330 *
12331 * Returns the new parser context or NULL
12332 */
12333xmlParserCtxtPtr
12334xmlCreateFileParserCtxt(const char *filename)
12335{
12336 return(xmlCreateURLParserCtxt(filename, 0));
12337}
12338
12339#ifdef LIBXML_SAX1_ENABLED
12340/**
12341 * xmlSAXParseFileWithData:
12342 * @sax: the SAX handler block
12343 * @filename: the filename
12344 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
12345 * documents
12346 * @data: the userdata
12347 *
12348 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12349 * compressed document is provided by default if found at compile-time.
12350 * It use the given SAX function block to handle the parsing callback.
12351 * If sax is NULL, fallback to the default DOM tree building routines.
12352 *
12353 * User data (void *) is stored within the parser context in the
12354 * context's _private member, so it is available nearly everywhere in libxml
12355 *
12356 * Returns the resulting document tree
12357 */
12358
12359xmlDocPtr
12360xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
12361 int recovery, void *data) {
12362 xmlDocPtr ret;
12363 xmlParserCtxtPtr ctxt;
12364 char *directory = NULL;
12365
12366 xmlInitParser();
12367
12368 ctxt = xmlCreateFileParserCtxt(filename);
12369 if (ctxt == NULL) {
12370 return(NULL);
12371 }
12372 if (sax != NULL) {
12373 if (ctxt->sax != NULL)
12374 xmlFree(ctxt->sax);
12375 ctxt->sax = sax;
12376 }
12377 xmlDetectSAX2(ctxt);
12378 if (data!=NULL) {
12379 ctxt->_private = data;
12380 }
12381
12382 if ((ctxt->directory == NULL) && (directory == NULL))
12383 directory = xmlParserGetDirectory(filename);
12384 if ((ctxt->directory == NULL) && (directory != NULL))
12385 ctxt->directory = (char *) xmlStrdup((xmlChar *) directory);
12386
12387 ctxt->recovery = recovery;
12388
12389 xmlParseDocument(ctxt);
12390
12391 if ((ctxt->wellFormed) || recovery) {
12392 ret = ctxt->myDoc;
12393 if (ret != NULL) {
12394 if (ctxt->input->buf->compressed > 0)
12395 ret->compression = 9;
12396 else
12397 ret->compression = ctxt->input->buf->compressed;
12398 }
12399 }
12400 else {
12401 ret = NULL;
12402 xmlFreeDoc(ctxt->myDoc);
12403 ctxt->myDoc = NULL;
12404 }
12405 if (sax != NULL)
12406 ctxt->sax = NULL;
12407 xmlFreeParserCtxt(ctxt);
12408
12409 return(ret);
12410}
12411
12412/**
12413 * xmlSAXParseFile:
12414 * @sax: the SAX handler block
12415 * @filename: the filename
12416 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
12417 * documents
12418 *
12419 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12420 * compressed document is provided by default if found at compile-time.
12421 * It use the given SAX function block to handle the parsing callback.
12422 * If sax is NULL, fallback to the default DOM tree building routines.
12423 *
12424 * Returns the resulting document tree
12425 */
12426
12427xmlDocPtr
12428xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
12429 int recovery) {
12430 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
12431}
12432
12433/**
12434 * xmlRecoverDoc:
12435 * @cur: a pointer to an array of xmlChar
12436 *
12437 * parse an XML in-memory document and build a tree.
12438 * In the case the document is not Well Formed, a tree is built anyway
12439 *
12440 * Returns the resulting document tree
12441 */
12442
12443xmlDocPtr
12444xmlRecoverDoc(xmlChar *cur) {
12445 return(xmlSAXParseDoc(NULL, cur, 1));
12446}
12447
12448/**
12449 * xmlParseFile:
12450 * @filename: the filename
12451 *
12452 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12453 * compressed document is provided by default if found at compile-time.
12454 *
12455 * Returns the resulting document tree if the file was wellformed,
12456 * NULL otherwise.
12457 */
12458
12459xmlDocPtr
12460xmlParseFile(const char *filename) {
12461 return(xmlSAXParseFile(NULL, filename, 0));
12462}
12463
12464/**
12465 * xmlRecoverFile:
12466 * @filename: the filename
12467 *
12468 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12469 * compressed document is provided by default if found at compile-time.
12470 * In the case the document is not Well Formed, a tree is built anyway
12471 *
12472 * Returns the resulting document tree
12473 */
12474
12475xmlDocPtr
12476xmlRecoverFile(const char *filename) {
12477 return(xmlSAXParseFile(NULL, filename, 1));
12478}
12479
12480
12481/**
12482 * xmlSetupParserForBuffer:
12483 * @ctxt: an XML parser context
12484 * @buffer: a xmlChar * buffer
12485 * @filename: a file name
12486 *
12487 * Setup the parser context to parse a new buffer; Clears any prior
12488 * contents from the parser context. The buffer parameter must not be
12489 * NULL, but the filename parameter can be
12490 */
12491void
12492xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
12493 const char* filename)
12494{
12495 xmlParserInputPtr input;
12496
12497 if ((ctxt == NULL) || (buffer == NULL))
12498 return;
12499
12500 input = xmlNewInputStream(ctxt);
12501 if (input == NULL) {
12502 xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
12503 xmlClearParserCtxt(ctxt);
12504 return;
12505 }
12506
12507 xmlClearParserCtxt(ctxt);
12508 if (filename != NULL)
12509 input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
12510 input->base = buffer;
12511 input->cur = buffer;
12512 input->end = &buffer[xmlStrlen(buffer)];
12513 inputPush(ctxt, input);
12514}
12515
12516/**
12517 * xmlSAXUserParseFile:
12518 * @sax: a SAX handler
12519 * @user_data: The user data returned on SAX callbacks
12520 * @filename: a file name
12521 *
12522 * parse an XML file and call the given SAX handler routines.
12523 * Automatic support for ZLIB/Compress compressed document is provided
12524 *
12525 * Returns 0 in case of success or a error number otherwise
12526 */
12527int
12528xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
12529 const char *filename) {
12530 int ret = 0;
12531 xmlParserCtxtPtr ctxt;
12532
12533 ctxt = xmlCreateFileParserCtxt(filename);
12534 if (ctxt == NULL) return -1;
12535 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
12536 xmlFree(ctxt->sax);
12537 ctxt->sax = sax;
12538 xmlDetectSAX2(ctxt);
12539
12540 if (user_data != NULL)
12541 ctxt->userData = user_data;
12542
12543 xmlParseDocument(ctxt);
12544
12545 if (ctxt->wellFormed)
12546 ret = 0;
12547 else {
12548 if (ctxt->errNo != 0)
12549 ret = ctxt->errNo;
12550 else
12551 ret = -1;
12552 }
12553 if (sax != NULL)
12554 ctxt->sax = NULL;
12555 if (ctxt->myDoc != NULL) {
12556 xmlFreeDoc(ctxt->myDoc);
12557 ctxt->myDoc = NULL;
12558 }
12559 xmlFreeParserCtxt(ctxt);
12560
12561 return ret;
12562}
12563#endif /* LIBXML_SAX1_ENABLED */
12564
12565/************************************************************************
12566 * *
12567 * Front ends when parsing from memory *
12568 * *
12569 ************************************************************************/
12570
12571/**
12572 * xmlCreateMemoryParserCtxt:
12573 * @buffer: a pointer to a char array
12574 * @size: the size of the array
12575 *
12576 * Create a parser context for an XML in-memory document.
12577 *
12578 * Returns the new parser context or NULL
12579 */
12580xmlParserCtxtPtr
12581xmlCreateMemoryParserCtxt(const char *buffer, int size) {
12582 xmlParserCtxtPtr ctxt;
12583 xmlParserInputPtr input;
12584 xmlParserInputBufferPtr buf;
12585
12586 if (buffer == NULL)
12587 return(NULL);
12588 if (size <= 0)
12589 return(NULL);
12590
12591 ctxt = xmlNewParserCtxt();
12592 if (ctxt == NULL)
12593 return(NULL);
12594
12595 /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */
12596 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
12597 if (buf == NULL) {
12598 xmlFreeParserCtxt(ctxt);
12599 return(NULL);
12600 }
12601
12602 input = xmlNewInputStream(ctxt);
12603 if (input == NULL) {
12604 xmlFreeParserInputBuffer(buf);
12605 xmlFreeParserCtxt(ctxt);
12606 return(NULL);
12607 }
12608
12609 input->filename = NULL;
12610 input->buf = buf;
12611 input->base = input->buf->buffer->content;
12612 input->cur = input->buf->buffer->content;
12613 input->end = &input->buf->buffer->content[input->buf->buffer->use];
12614
12615 inputPush(ctxt, input);
12616 return(ctxt);
12617}
12618
12619#ifdef LIBXML_SAX1_ENABLED
12620/**
12621 * xmlSAXParseMemoryWithData:
12622 * @sax: the SAX handler block
12623 * @buffer: an pointer to a char array
12624 * @size: the size of the array
12625 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
12626 * documents
12627 * @data: the userdata
12628 *
12629 * parse an XML in-memory block and use the given SAX function block
12630 * to handle the parsing callback. If sax is NULL, fallback to the default
12631 * DOM tree building routines.
12632 *
12633 * User data (void *) is stored within the parser context in the
12634 * context's _private member, so it is available nearly everywhere in libxml
12635 *
12636 * Returns the resulting document tree
12637 */
12638
12639xmlDocPtr
12640xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
12641 int size, int recovery, void *data) {
12642 xmlDocPtr ret;
12643 xmlParserCtxtPtr ctxt;
12644
12645 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
12646 if (ctxt == NULL) return(NULL);
12647 if (sax != NULL) {
12648 if (ctxt->sax != NULL)
12649 xmlFree(ctxt->sax);
12650 ctxt->sax = sax;
12651 }
12652 xmlDetectSAX2(ctxt);
12653 if (data!=NULL) {
12654 ctxt->_private=data;
12655 }
12656
12657 ctxt->recovery = recovery;
12658
12659 xmlParseDocument(ctxt);
12660
12661 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
12662 else {
12663 ret = NULL;
12664 xmlFreeDoc(ctxt->myDoc);
12665 ctxt->myDoc = NULL;
12666 }
12667 if (sax != NULL)
12668 ctxt->sax = NULL;
12669 xmlFreeParserCtxt(ctxt);
12670
12671 return(ret);
12672}
12673
12674/**
12675 * xmlSAXParseMemory:
12676 * @sax: the SAX handler block
12677 * @buffer: an pointer to a char array
12678 * @size: the size of the array
12679 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
12680 * documents
12681 *
12682 * parse an XML in-memory block and use the given SAX function block
12683 * to handle the parsing callback. If sax is NULL, fallback to the default
12684 * DOM tree building routines.
12685 *
12686 * Returns the resulting document tree
12687 */
12688xmlDocPtr
12689xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
12690 int size, int recovery) {
12691 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
12692}
12693
12694/**
12695 * xmlParseMemory:
12696 * @buffer: an pointer to a char array
12697 * @size: the size of the array
12698 *
12699 * parse an XML in-memory block and build a tree.
12700 *
12701 * Returns the resulting document tree
12702 */
12703
12704xmlDocPtr xmlParseMemory(const char *buffer, int size) {
12705 return(xmlSAXParseMemory(NULL, buffer, size, 0));
12706}
12707
12708/**
12709 * xmlRecoverMemory:
12710 * @buffer: an pointer to a char array
12711 * @size: the size of the array
12712 *
12713 * parse an XML in-memory block and build a tree.
12714 * In the case the document is not Well Formed, a tree is built anyway
12715 *
12716 * Returns the resulting document tree
12717 */
12718
12719xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
12720 return(xmlSAXParseMemory(NULL, buffer, size, 1));
12721}
12722
12723/**
12724 * xmlSAXUserParseMemory:
12725 * @sax: a SAX handler
12726 * @user_data: The user data returned on SAX callbacks
12727 * @buffer: an in-memory XML document input
12728 * @size: the length of the XML document in bytes
12729 *
12730 * A better SAX parsing routine.
12731 * parse an XML in-memory buffer and call the given SAX handler routines.
12732 *
12733 * Returns 0 in case of success or a error number otherwise
12734 */
12735int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
12736 const char *buffer, int size) {
12737 int ret = 0;
12738 xmlParserCtxtPtr ctxt;
12739
12740 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
12741 if (ctxt == NULL) return -1;
12742 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
12743 xmlFree(ctxt->sax);
12744 ctxt->sax = sax;
12745 xmlDetectSAX2(ctxt);
12746
12747 if (user_data != NULL)
12748 ctxt->userData = user_data;
12749
12750 xmlParseDocument(ctxt);
12751
12752 if (ctxt->wellFormed)
12753 ret = 0;
12754 else {
12755 if (ctxt->errNo != 0)
12756 ret = ctxt->errNo;
12757 else
12758 ret = -1;
12759 }
12760 if (sax != NULL)
12761 ctxt->sax = NULL;
12762 if (ctxt->myDoc != NULL) {
12763 xmlFreeDoc(ctxt->myDoc);
12764 ctxt->myDoc = NULL;
12765 }
12766 xmlFreeParserCtxt(ctxt);
12767
12768 return ret;
12769}
12770#endif /* LIBXML_SAX1_ENABLED */
12771
12772/**
12773 * xmlCreateDocParserCtxt:
12774 * @cur: a pointer to an array of xmlChar
12775 *
12776 * Creates a parser context for an XML in-memory document.
12777 *
12778 * Returns the new parser context or NULL
12779 */
12780xmlParserCtxtPtr
12781xmlCreateDocParserCtxt(const xmlChar *cur) {
12782 int len;
12783
12784 if (cur == NULL)
12785 return(NULL);
12786 len = xmlStrlen(cur);
12787 return(xmlCreateMemoryParserCtxt((const char *)cur, len));
12788}
12789
12790#ifdef LIBXML_SAX1_ENABLED
12791/**
12792 * xmlSAXParseDoc:
12793 * @sax: the SAX handler block
12794 * @cur: a pointer to an array of xmlChar
12795 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
12796 * documents
12797 *
12798 * parse an XML in-memory document and build a tree.
12799 * It use the given SAX function block to handle the parsing callback.
12800 * If sax is NULL, fallback to the default DOM tree building routines.
12801 *
12802 * Returns the resulting document tree
12803 */
12804
12805xmlDocPtr
12806xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
12807 xmlDocPtr ret;
12808 xmlParserCtxtPtr ctxt;
12809 xmlSAXHandlerPtr oldsax = NULL;
12810
12811 if (cur == NULL) return(NULL);
12812
12813
12814 ctxt = xmlCreateDocParserCtxt(cur);
12815 if (ctxt == NULL) return(NULL);
12816 if (sax != NULL) {
12817 oldsax = ctxt->sax;
12818 ctxt->sax = sax;
12819 ctxt->userData = NULL;
12820 }
12821 xmlDetectSAX2(ctxt);
12822
12823 xmlParseDocument(ctxt);
12824 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
12825 else {
12826 ret = NULL;
12827 xmlFreeDoc(ctxt->myDoc);
12828 ctxt->myDoc = NULL;
12829 }
12830 if (sax != NULL)
12831 ctxt->sax = oldsax;
12832 xmlFreeParserCtxt(ctxt);
12833
12834 return(ret);
12835}
12836
12837/**
12838 * xmlParseDoc:
12839 * @cur: a pointer to an array of xmlChar
12840 *
12841 * parse an XML in-memory document and build a tree.
12842 *
12843 * Returns the resulting document tree
12844 */
12845
12846xmlDocPtr
12847xmlParseDoc(const xmlChar *cur) {
12848 return(xmlSAXParseDoc(NULL, cur, 0));
12849}
12850#endif /* LIBXML_SAX1_ENABLED */
12851
12852#ifdef LIBXML_LEGACY_ENABLED
12853/************************************************************************
12854 * *
12855 * Specific function to keep track of entities references *
12856 * and used by the XSLT debugger *
12857 * *
12858 ************************************************************************/
12859
12860static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
12861
12862/**
12863 * xmlAddEntityReference:
12864 * @ent : A valid entity
12865 * @firstNode : A valid first node for children of entity
12866 * @lastNode : A valid last node of children entity
12867 *
12868 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
12869 */
12870static void
12871xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
12872 xmlNodePtr lastNode)
12873{
12874 if (xmlEntityRefFunc != NULL) {
12875 (*xmlEntityRefFunc) (ent, firstNode, lastNode);
12876 }
12877}
12878
12879
12880/**
12881 * xmlSetEntityReferenceFunc:
12882 * @func: A valid function
12883 *
12884 * Set the function to call call back when a xml reference has been made
12885 */
12886void
12887xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
12888{
12889 xmlEntityRefFunc = func;
12890}
12891#endif /* LIBXML_LEGACY_ENABLED */
12892
12893/************************************************************************
12894 * *
12895 * Miscellaneous *
12896 * *
12897 ************************************************************************/
12898
12899#ifdef LIBXML_XPATH_ENABLED
12900#include <libxml/xpath.h>
12901#endif
12902
12903extern void XMLCDECL xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
12904static int xmlParserInitialized = 0;
12905
12906/**
12907 * xmlInitParser:
12908 *
12909 * Initialization function for the XML parser.
12910 * This is not reentrant. Call once before processing in case of
12911 * use in multithreaded programs.
12912 */
12913
12914void
12915xmlInitParser(void) {
12916 if (xmlParserInitialized != 0)
12917 return;
12918
12919#ifdef LIBXML_THREAD_ENABLED
12920 __xmlGlobalInitMutexLock();
12921 if (xmlParserInitialized == 0) {
12922#endif
12923 if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
12924 (xmlGenericError == NULL))
12925 initGenericErrorDefaultFunc(NULL);
12926 xmlInitGlobals();
12927 xmlInitThreads();
12928 xmlInitMemory();
12929 xmlInitCharEncodingHandlers();
12930 xmlDefaultSAXHandlerInit();
12931 xmlRegisterDefaultInputCallbacks();
12932#ifdef LIBXML_OUTPUT_ENABLED
12933 xmlRegisterDefaultOutputCallbacks();
12934#endif /* LIBXML_OUTPUT_ENABLED */
12935#ifdef LIBXML_HTML_ENABLED
12936 htmlInitAutoClose();
12937 htmlDefaultSAXHandlerInit();
12938#endif
12939#ifdef LIBXML_XPATH_ENABLED
12940 xmlXPathInit();
12941#endif
12942 xmlParserInitialized = 1;
12943#ifdef LIBXML_THREAD_ENABLED
12944 }
12945 __xmlGlobalInitMutexUnlock();
12946#endif
12947}
12948
12949/**
12950 * xmlCleanupParser:
12951 *
12952 * Cleanup function for the XML library. It tries to reclaim all
12953 * parsing related global memory allocated for the library processing.
12954 * It doesn't deallocate any document related memory. Calling this
12955 * function should not prevent reusing the library but one should
12956 * call xmlCleanupParser() only when the process has
12957 * finished using the library or XML document built with it.
12958 */
12959
12960void
12961xmlCleanupParser(void) {
12962 if (!xmlParserInitialized)
12963 return;
12964
12965 xmlCleanupCharEncodingHandlers();
12966#ifdef LIBXML_CATALOG_ENABLED
12967 xmlCatalogCleanup();
12968#endif
12969 xmlDictCleanup();
12970 xmlCleanupInputCallbacks();
12971#ifdef LIBXML_OUTPUT_ENABLED
12972 xmlCleanupOutputCallbacks();
12973#endif
12974#ifdef LIBXML_SCHEMAS_ENABLED
12975 xmlSchemaCleanupTypes();
12976 xmlRelaxNGCleanupTypes();
12977#endif
12978 xmlCleanupGlobals();
12979 xmlResetLastError();
12980 xmlCleanupThreads(); /* must be last if called not from the main thread */
12981 xmlCleanupMemory();
12982 xmlParserInitialized = 0;
12983}
12984
12985/************************************************************************
12986 * *
12987 * New set (2.6.0) of simpler and more flexible APIs *
12988 * *
12989 ************************************************************************/
12990
12991/**
12992 * DICT_FREE:
12993 * @str: a string
12994 *
12995 * Free a string if it is not owned by the "dict" dictionnary in the
12996 * current scope
12997 */
12998#define DICT_FREE(str) \
12999 if ((str) && ((!dict) || \
13000 (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
13001 xmlFree((char *)(str));
13002
13003/**
13004 * xmlCtxtReset:
13005 * @ctxt: an XML parser context
13006 *
13007 * Reset a parser context
13008 */
13009void
13010xmlCtxtReset(xmlParserCtxtPtr ctxt)
13011{
13012 xmlParserInputPtr input;
13013 xmlDictPtr dict;
13014
13015 if (ctxt == NULL)
13016 return;
13017
13018 dict = ctxt->dict;
13019
13020 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
13021 xmlFreeInputStream(input);
13022 }
13023 ctxt->inputNr = 0;
13024 ctxt->input = NULL;
13025
13026 ctxt->spaceNr = 0;
13027 if (ctxt->spaceTab != NULL) {
13028 ctxt->spaceTab[0] = -1;
13029 ctxt->space = &ctxt->spaceTab[0];
13030 } else {
13031 ctxt->space = NULL;
13032 }
13033
13034
13035 ctxt->nodeNr = 0;
13036 ctxt->node = NULL;
13037
13038 ctxt->nameNr = 0;
13039 ctxt->name = NULL;
13040
13041 DICT_FREE(ctxt->version);
13042 ctxt->version = NULL;
13043 DICT_FREE(ctxt->encoding);
13044 ctxt->encoding = NULL;
13045 DICT_FREE(ctxt->directory);
13046 ctxt->directory = NULL;
13047 DICT_FREE(ctxt->extSubURI);
13048 ctxt->extSubURI = NULL;
13049 DICT_FREE(ctxt->extSubSystem);
13050 ctxt->extSubSystem = NULL;
13051 if (ctxt->myDoc != NULL)
13052 xmlFreeDoc(ctxt->myDoc);
13053 ctxt->myDoc = NULL;
13054
13055 ctxt->standalone = -1;
13056 ctxt->hasExternalSubset = 0;
13057 ctxt->hasPErefs = 0;
13058 ctxt->html = 0;
13059 ctxt->external = 0;
13060 ctxt->instate = XML_PARSER_START;
13061 ctxt->token = 0;
13062
13063 ctxt->wellFormed = 1;
13064 ctxt->nsWellFormed = 1;
13065 ctxt->disableSAX = 0;
13066 ctxt->valid = 1;
13067#if 0
13068 ctxt->vctxt.userData = ctxt;
13069 ctxt->vctxt.error = xmlParserValidityError;
13070 ctxt->vctxt.warning = xmlParserValidityWarning;
13071#endif
13072 ctxt->record_info = 0;
13073 ctxt->nbChars = 0;
13074 ctxt->checkIndex = 0;
13075 ctxt->inSubset = 0;
13076 ctxt->errNo = XML_ERR_OK;
13077 ctxt->depth = 0;
13078 ctxt->charset = XML_CHAR_ENCODING_UTF8;
13079 ctxt->catalogs = NULL;
13080 xmlInitNodeInfoSeq(&ctxt->node_seq);
13081
13082 if (ctxt->attsDefault != NULL) {
13083 xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree);
13084 ctxt->attsDefault = NULL;
13085 }
13086 if (ctxt->attsSpecial != NULL) {
13087 xmlHashFree(ctxt->attsSpecial, NULL);
13088 ctxt->attsSpecial = NULL;
13089 }
13090
13091#ifdef LIBXML_CATALOG_ENABLED
13092 if (ctxt->catalogs != NULL)
13093 xmlCatalogFreeLocal(ctxt->catalogs);
13094#endif
13095 if (ctxt->lastError.code != XML_ERR_OK)
13096 xmlResetError(&ctxt->lastError);
13097}
13098
13099/**
13100 * xmlCtxtResetPush:
13101 * @ctxt: an XML parser context
13102 * @chunk: a pointer to an array of chars
13103 * @size: number of chars in the array
13104 * @filename: an optional file name or URI
13105 * @encoding: the document encoding, or NULL
13106 *
13107 * Reset a push parser context
13108 *
13109 * Returns 0 in case of success and 1 in case of error
13110 */
13111int
13112xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
13113 int size, const char *filename, const char *encoding)
13114{
13115 xmlParserInputPtr inputStream;
13116 xmlParserInputBufferPtr buf;
13117 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
13118
13119 if (ctxt == NULL)
13120 return(1);
13121
13122 if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
13123 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
13124
13125 buf = xmlAllocParserInputBuffer(enc);
13126 if (buf == NULL)
13127 return(1);
13128
13129 if (ctxt == NULL) {
13130 xmlFreeParserInputBuffer(buf);
13131 return(1);
13132 }
13133
13134 xmlCtxtReset(ctxt);
13135
13136 if (ctxt->pushTab == NULL) {
13137 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 *
13138 sizeof(xmlChar *));
13139 if (ctxt->pushTab == NULL) {
13140 xmlErrMemory(ctxt, NULL);
13141 xmlFreeParserInputBuffer(buf);
13142 return(1);
13143 }
13144 }
13145
13146 if (filename == NULL) {
13147 ctxt->directory = NULL;
13148 } else {
13149 ctxt->directory = xmlParserGetDirectory(filename);
13150 }
13151
13152 inputStream = xmlNewInputStream(ctxt);
13153 if (inputStream == NULL) {
13154 xmlFreeParserInputBuffer(buf);
13155 return(1);
13156 }
13157
13158 if (filename == NULL)
13159 inputStream->filename = NULL;
13160 else
13161 inputStream->filename = (char *)
13162 xmlCanonicPath((const xmlChar *) filename);
13163 inputStream->buf = buf;
13164 inputStream->base = inputStream->buf->buffer->content;
13165 inputStream->cur = inputStream->buf->buffer->content;
13166 inputStream->end =
13167 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
13168
13169 inputPush(ctxt, inputStream);
13170
13171 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
13172 (ctxt->input->buf != NULL)) {
13173 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
13174 int cur = ctxt->input->cur - ctxt->input->base;
13175
13176 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
13177
13178 ctxt->input->base = ctxt->input->buf->buffer->content + base;
13179 ctxt->input->cur = ctxt->input->base + cur;
13180 ctxt->input->end =
13181 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->
13182 use];
13183#ifdef DEBUG_PUSH
13184 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
13185#endif
13186 }
13187
13188 if (encoding != NULL) {
13189 xmlCharEncodingHandlerPtr hdlr;
13190
13191 hdlr = xmlFindCharEncodingHandler(encoding);
13192 if (hdlr != NULL) {
13193 xmlSwitchToEncoding(ctxt, hdlr);
13194 } else {
13195 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
13196 "Unsupported encoding %s\n", BAD_CAST encoding);
13197 }
13198 } else if (enc != XML_CHAR_ENCODING_NONE) {
13199 xmlSwitchEncoding(ctxt, enc);
13200 }
13201
13202 return(0);
13203}
13204
13205/**
13206 * xmlCtxtUseOptions:
13207 * @ctxt: an XML parser context
13208 * @options: a combination of xmlParserOption
13209 *
13210 * Applies the options to the parser context
13211 *
13212 * Returns 0 in case of success, the set of unknown or unimplemented options
13213 * in case of error.
13214 */
13215int
13216xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
13217{
13218 if (ctxt == NULL)
13219 return(-1);
13220 if (options & XML_PARSE_RECOVER) {
13221 ctxt->recovery = 1;
13222 options -= XML_PARSE_RECOVER;
13223 } else
13224 ctxt->recovery = 0;
13225 if (options & XML_PARSE_DTDLOAD) {
13226 ctxt->loadsubset = XML_DETECT_IDS;
13227 options -= XML_PARSE_DTDLOAD;
13228 } else
13229 ctxt->loadsubset = 0;
13230 if (options & XML_PARSE_DTDATTR) {
13231 ctxt->loadsubset |= XML_COMPLETE_ATTRS;
13232 options -= XML_PARSE_DTDATTR;
13233 }
13234 if (options & XML_PARSE_NOENT) {
13235 ctxt->replaceEntities = 1;
13236 /* ctxt->loadsubset |= XML_DETECT_IDS; */
13237 options -= XML_PARSE_NOENT;
13238 } else
13239 ctxt->replaceEntities = 0;
13240 if (options & XML_PARSE_PEDANTIC) {
13241 ctxt->pedantic = 1;
13242 options -= XML_PARSE_PEDANTIC;
13243 } else
13244 ctxt->pedantic = 0;
13245 if (options & XML_PARSE_NOBLANKS) {
13246 ctxt->keepBlanks = 0;
13247 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
13248 options -= XML_PARSE_NOBLANKS;
13249 } else
13250 ctxt->keepBlanks = 1;
13251 if (options & XML_PARSE_DTDVALID) {
13252 ctxt->validate = 1;
13253 if (options & XML_PARSE_NOWARNING)
13254 ctxt->vctxt.warning = NULL;
13255 if (options & XML_PARSE_NOERROR)
13256 ctxt->vctxt.error = NULL;
13257 options -= XML_PARSE_DTDVALID;
13258 } else
13259 ctxt->validate = 0;
13260 if (options & XML_PARSE_NOWARNING) {
13261 ctxt->sax->warning = NULL;
13262 options -= XML_PARSE_NOWARNING;
13263 }
13264 if (options & XML_PARSE_NOERROR) {
13265 ctxt->sax->error = NULL;
13266 ctxt->sax->fatalError = NULL;
13267 options -= XML_PARSE_NOERROR;
13268 }
13269#ifdef LIBXML_SAX1_ENABLED
13270 if (options & XML_PARSE_SAX1) {
13271 ctxt->sax->startElement = xmlSAX2StartElement;
13272 ctxt->sax->endElement = xmlSAX2EndElement;
13273 ctxt->sax->startElementNs = NULL;
13274 ctxt->sax->endElementNs = NULL;
13275 ctxt->sax->initialized = 1;
13276 options -= XML_PARSE_SAX1;
13277 }
13278#endif /* LIBXML_SAX1_ENABLED */
13279 if (options & XML_PARSE_NODICT) {
13280 ctxt->dictNames = 0;
13281 options -= XML_PARSE_NODICT;
13282 } else {
13283 ctxt->dictNames = 1;
13284 }
13285 if (options & XML_PARSE_NOCDATA) {
13286 ctxt->sax->cdataBlock = NULL;
13287 options -= XML_PARSE_NOCDATA;
13288 }
13289 if (options & XML_PARSE_NSCLEAN) {
13290 ctxt->options |= XML_PARSE_NSCLEAN;
13291 options -= XML_PARSE_NSCLEAN;
13292 }
13293 if (options & XML_PARSE_NONET) {
13294 ctxt->options |= XML_PARSE_NONET;
13295 options -= XML_PARSE_NONET;
13296 }
13297 if (options & XML_PARSE_COMPACT) {
13298 ctxt->options |= XML_PARSE_COMPACT;
13299 options -= XML_PARSE_COMPACT;
13300 }
13301 ctxt->linenumbers = 1;
13302 return (options);
13303}
13304
13305/**
13306 * xmlDoRead:
13307 * @ctxt: an XML parser context
13308 * @URL: the base URL to use for the document
13309 * @encoding: the document encoding, or NULL
13310 * @options: a combination of xmlParserOption
13311 * @reuse: keep the context for reuse
13312 *
13313 * Common front-end for the xmlRead functions
13314 *
13315 * Returns the resulting document tree or NULL
13316 */
13317static xmlDocPtr
13318xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
13319 int options, int reuse)
13320{
13321 xmlDocPtr ret;
13322
13323 xmlCtxtUseOptions(ctxt, options);
13324 if (encoding != NULL) {
13325 xmlCharEncodingHandlerPtr hdlr;
13326
13327 hdlr = xmlFindCharEncodingHandler(encoding);
13328 if (hdlr != NULL)
13329 xmlSwitchToEncoding(ctxt, hdlr);
13330 }
13331 if ((URL != NULL) && (ctxt->input != NULL) &&
13332 (ctxt->input->filename == NULL))
13333 ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
13334 xmlParseDocument(ctxt);
13335 if ((ctxt->wellFormed) || ctxt->recovery)
13336 ret = ctxt->myDoc;
13337 else {
13338 ret = NULL;
13339 if (ctxt->myDoc != NULL) {
13340 xmlFreeDoc(ctxt->myDoc);
13341 }
13342 }
13343 ctxt->myDoc = NULL;
13344 if (!reuse) {
13345 xmlFreeParserCtxt(ctxt);
13346 }
13347
13348 return (ret);
13349}
13350
13351/**
13352 * xmlReadDoc:
13353 * @cur: a pointer to a zero terminated string
13354 * @URL: the base URL to use for the document
13355 * @encoding: the document encoding, or NULL
13356 * @options: a combination of xmlParserOption
13357 *
13358 * parse an XML in-memory document and build a tree.
13359 *
13360 * Returns the resulting document tree
13361 */
13362xmlDocPtr
13363xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
13364{
13365 xmlParserCtxtPtr ctxt;
13366
13367 if (cur == NULL)
13368 return (NULL);
13369
13370 ctxt = xmlCreateDocParserCtxt(cur);
13371 if (ctxt == NULL)
13372 return (NULL);
13373 return (xmlDoRead(ctxt, URL, encoding, options, 0));
13374}
13375
13376/**
13377 * xmlReadFile:
13378 * @filename: a file or URL
13379 * @encoding: the document encoding, or NULL
13380 * @options: a combination of xmlParserOption
13381 *
13382 * parse an XML file from the filesystem or the network.
13383 *
13384 * Returns the resulting document tree
13385 */
13386xmlDocPtr
13387xmlReadFile(const char *filename, const char *encoding, int options)
13388{
13389 xmlParserCtxtPtr ctxt;
13390
13391 ctxt = xmlCreateURLParserCtxt(filename, options);
13392 if (ctxt == NULL)
13393 return (NULL);
13394 return (xmlDoRead(ctxt, NULL, encoding, options, 0));
13395}
13396
13397/**
13398 * xmlReadMemory:
13399 * @buffer: a pointer to a char array
13400 * @size: the size of the array
13401 * @URL: the base URL to use for the document
13402 * @encoding: the document encoding, or NULL
13403 * @options: a combination of xmlParserOption
13404 *
13405 * parse an XML in-memory document and build a tree.
13406 *
13407 * Returns the resulting document tree
13408 */
13409xmlDocPtr
13410xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
13411{
13412 xmlParserCtxtPtr ctxt;
13413
13414 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
13415 if (ctxt == NULL)
13416 return (NULL);
13417 return (xmlDoRead(ctxt, URL, encoding, options, 0));
13418}
13419
13420/**
13421 * xmlReadFd:
13422 * @fd: an open file descriptor
13423 * @URL: the base URL to use for the document
13424 * @encoding: the document encoding, or NULL
13425 * @options: a combination of xmlParserOption
13426 *
13427 * parse an XML from a file descriptor and build a tree.
13428 * NOTE that the file descriptor will not be closed when the
13429 * reader is closed or reset.
13430 *
13431 * Returns the resulting document tree
13432 */
13433xmlDocPtr
13434xmlReadFd(int fd, const char *URL, const char *encoding, int options)
13435{
13436 xmlParserCtxtPtr ctxt;
13437 xmlParserInputBufferPtr input;
13438 xmlParserInputPtr stream;
13439
13440 if (fd < 0)
13441 return (NULL);
13442
13443 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
13444 if (input == NULL)
13445 return (NULL);
13446 input->closecallback = NULL;
13447 ctxt = xmlNewParserCtxt();
13448 if (ctxt == NULL) {
13449 xmlFreeParserInputBuffer(input);
13450 return (NULL);
13451 }
13452 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
13453 if (stream == NULL) {
13454 xmlFreeParserInputBuffer(input);
13455 xmlFreeParserCtxt(ctxt);
13456 return (NULL);
13457 }
13458 inputPush(ctxt, stream);
13459 return (xmlDoRead(ctxt, URL, encoding, options, 0));
13460}
13461
13462/**
13463 * xmlReadIO:
13464 * @ioread: an I/O read function
13465 * @ioclose: an I/O close function
13466 * @ioctx: an I/O handler
13467 * @URL: the base URL to use for the document
13468 * @encoding: the document encoding, or NULL
13469 * @options: a combination of xmlParserOption
13470 *
13471 * parse an XML document from I/O functions and source and build a tree.
13472 *
13473 * Returns the resulting document tree
13474 */
13475xmlDocPtr
13476xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
13477 void *ioctx, const char *URL, const char *encoding, int options)
13478{
13479 xmlParserCtxtPtr ctxt;
13480 xmlParserInputBufferPtr input;
13481 xmlParserInputPtr stream;
13482
13483 if (ioread == NULL)
13484 return (NULL);
13485
13486 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
13487 XML_CHAR_ENCODING_NONE);
13488 if (input == NULL)
13489 return (NULL);
13490 ctxt = xmlNewParserCtxt();
13491 if (ctxt == NULL) {
13492 xmlFreeParserInputBuffer(input);
13493 return (NULL);
13494 }
13495 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
13496 if (stream == NULL) {
13497 xmlFreeParserInputBuffer(input);
13498 xmlFreeParserCtxt(ctxt);
13499 return (NULL);
13500 }
13501 inputPush(ctxt, stream);
13502 return (xmlDoRead(ctxt, URL, encoding, options, 0));
13503}
13504
13505/**
13506 * xmlCtxtReadDoc:
13507 * @ctxt: an XML parser context
13508 * @cur: a pointer to a zero terminated string
13509 * @URL: the base URL to use for the document
13510 * @encoding: the document encoding, or NULL
13511 * @options: a combination of xmlParserOption
13512 *
13513 * parse an XML in-memory document and build a tree.
13514 * This reuses the existing @ctxt parser context
13515 *
13516 * Returns the resulting document tree
13517 */
13518xmlDocPtr
13519xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
13520 const char *URL, const char *encoding, int options)
13521{
13522 xmlParserInputPtr stream;
13523
13524 if (cur == NULL)
13525 return (NULL);
13526 if (ctxt == NULL)
13527 return (NULL);
13528
13529 xmlCtxtReset(ctxt);
13530
13531 stream = xmlNewStringInputStream(ctxt, cur);
13532 if (stream == NULL) {
13533 return (NULL);
13534 }
13535 inputPush(ctxt, stream);
13536 return (xmlDoRead(ctxt, URL, encoding, options, 1));
13537}
13538
13539/**
13540 * xmlCtxtReadFile:
13541 * @ctxt: an XML parser context
13542 * @filename: a file or URL
13543 * @encoding: the document encoding, or NULL
13544 * @options: a combination of xmlParserOption
13545 *
13546 * parse an XML file from the filesystem or the network.
13547 * This reuses the existing @ctxt parser context
13548 *
13549 * Returns the resulting document tree
13550 */
13551xmlDocPtr
13552xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
13553 const char *encoding, int options)
13554{
13555 xmlParserInputPtr stream;
13556
13557 if (filename == NULL)
13558 return (NULL);
13559 if (ctxt == NULL)
13560 return (NULL);
13561
13562 xmlCtxtReset(ctxt);
13563
13564 stream = xmlLoadExternalEntity(filename, NULL, ctxt);
13565 if (stream == NULL) {
13566 return (NULL);
13567 }
13568 inputPush(ctxt, stream);
13569 return (xmlDoRead(ctxt, NULL, encoding, options, 1));
13570}
13571
13572/**
13573 * xmlCtxtReadMemory:
13574 * @ctxt: an XML parser context
13575 * @buffer: a pointer to a char array
13576 * @size: the size of the array
13577 * @URL: the base URL to use for the document
13578 * @encoding: the document encoding, or NULL
13579 * @options: a combination of xmlParserOption
13580 *
13581 * parse an XML in-memory document and build a tree.
13582 * This reuses the existing @ctxt parser context
13583 *
13584 * Returns the resulting document tree
13585 */
13586xmlDocPtr
13587xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
13588 const char *URL, const char *encoding, int options)
13589{
13590 xmlParserInputBufferPtr input;
13591 xmlParserInputPtr stream;
13592
13593 if (ctxt == NULL)
13594 return (NULL);
13595 if (buffer == NULL)
13596 return (NULL);
13597
13598 xmlCtxtReset(ctxt);
13599
13600 input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
13601 if (input == NULL) {
13602 return(NULL);
13603 }
13604
13605 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
13606 if (stream == NULL) {
13607 xmlFreeParserInputBuffer(input);
13608 return(NULL);
13609 }
13610
13611 inputPush(ctxt, stream);
13612 return (xmlDoRead(ctxt, URL, encoding, options, 1));
13613}
13614
13615/**
13616 * xmlCtxtReadFd:
13617 * @ctxt: an XML parser context
13618 * @fd: an open file descriptor
13619 * @URL: the base URL to use for the document
13620 * @encoding: the document encoding, or NULL
13621 * @options: a combination of xmlParserOption
13622 *
13623 * parse an XML from a file descriptor and build a tree.
13624 * This reuses the existing @ctxt parser context
13625 * NOTE that the file descriptor will not be closed when the
13626 * reader is closed or reset.
13627 *
13628 * Returns the resulting document tree
13629 */
13630xmlDocPtr
13631xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
13632 const char *URL, const char *encoding, int options)
13633{
13634 xmlParserInputBufferPtr input;
13635 xmlParserInputPtr stream;
13636
13637 if (fd < 0)
13638 return (NULL);
13639 if (ctxt == NULL)
13640 return (NULL);
13641
13642 xmlCtxtReset(ctxt);
13643
13644
13645 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
13646 if (input == NULL)
13647 return (NULL);
13648 input->closecallback = NULL;
13649 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
13650 if (stream == NULL) {
13651 xmlFreeParserInputBuffer(input);
13652 return (NULL);
13653 }
13654 inputPush(ctxt, stream);
13655 return (xmlDoRead(ctxt, URL, encoding, options, 1));
13656}
13657
13658/**
13659 * xmlCtxtReadIO:
13660 * @ctxt: an XML parser context
13661 * @ioread: an I/O read function
13662 * @ioclose: an I/O close function
13663 * @ioctx: an I/O handler
13664 * @URL: the base URL to use for the document
13665 * @encoding: the document encoding, or NULL
13666 * @options: a combination of xmlParserOption
13667 *
13668 * parse an XML document from I/O functions and source and build a tree.
13669 * This reuses the existing @ctxt parser context
13670 *
13671 * Returns the resulting document tree
13672 */
13673xmlDocPtr
13674xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
13675 xmlInputCloseCallback ioclose, void *ioctx,
13676 const char *URL,
13677 const char *encoding, int options)
13678{
13679 xmlParserInputBufferPtr input;
13680 xmlParserInputPtr stream;
13681
13682 if (ioread == NULL)
13683 return (NULL);
13684 if (ctxt == NULL)
13685 return (NULL);
13686
13687 xmlCtxtReset(ctxt);
13688
13689 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
13690 XML_CHAR_ENCODING_NONE);
13691 if (input == NULL)
13692 return (NULL);
13693 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
13694 if (stream == NULL) {
13695 xmlFreeParserInputBuffer(input);
13696 return (NULL);
13697 }
13698 inputPush(ctxt, stream);
13699 return (xmlDoRead(ctxt, URL, encoding, options, 1));
13700}
13701
13702#define bottom_parser
13703#include "elfgcchack.h"
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette