VirtualBox

source: vbox/trunk/src/libs/libxml2-2.6.30/parser.c@ 9259

Last change on this file since 9259 was 6076, checked in by vboxsync, 17 years ago

Merged dmik/s2 branch (r25959:26751) to the trunk.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Date Revision Author Id
File size: 365.4 KB
Line 
1/*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
15 * high level APIs to call the parser and a few miscellaneous functions.
16 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
25 * from the SAX callbacks or as standalone functions using a preparsed
26 * document.
27 *
28 * See Copyright for the status of this software.
29 *
30 * [email protected]
31 */
32
33#define IN_LIBXML
34#include "libxml.h"
35
36#if defined(WIN32) && !defined (__CYGWIN__)
37#define XML_DIR_SEP '\\'
38#else
39#define XML_DIR_SEP '/'
40#endif
41
42#include <stdlib.h>
43#include <string.h>
44#include <stdarg.h>
45#include <libxml/xmlmemory.h>
46#include <libxml/threads.h>
47#include <libxml/globals.h>
48#include <libxml/tree.h>
49#include <libxml/parser.h>
50#include <libxml/parserInternals.h>
51#include <libxml/valid.h>
52#include <libxml/entities.h>
53#include <libxml/xmlerror.h>
54#include <libxml/encoding.h>
55#include <libxml/xmlIO.h>
56#include <libxml/uri.h>
57#ifdef LIBXML_CATALOG_ENABLED
58#include <libxml/catalog.h>
59#endif
60#ifdef LIBXML_SCHEMAS_ENABLED
61#include <libxml/xmlschemastypes.h>
62#include <libxml/relaxng.h>
63#endif
64#ifdef HAVE_CTYPE_H
65#include <ctype.h>
66#endif
67#ifdef HAVE_STDLIB_H
68#include <stdlib.h>
69#endif
70#ifdef HAVE_SYS_STAT_H
71#include <sys/stat.h>
72#endif
73#ifdef HAVE_FCNTL_H
74#include <fcntl.h>
75#endif
76#ifdef HAVE_UNISTD_H
77#include <unistd.h>
78#endif
79#ifdef HAVE_ZLIB_H
80#include <zlib.h>
81#endif
82
83/**
84 * xmlParserMaxDepth:
85 *
86 * arbitrary depth limit for the XML documents that we allow to
87 * process. This is not a limitation of the parser but a safety
88 * boundary feature.
89 */
90unsigned int xmlParserMaxDepth = 1024;
91
92#define SAX2 1
93
94#define XML_PARSER_BIG_BUFFER_SIZE 300
95#define XML_PARSER_BUFFER_SIZE 100
96
97#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
98
99/*
100 * List of XML prefixed PI allowed by W3C specs
101 */
102
103static const char *xmlW3CPIs[] = {
104 "xml-stylesheet",
105 NULL
106};
107
108
109/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
110xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
111 const xmlChar **str);
112
113static xmlParserErrors
114xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
115 xmlSAXHandlerPtr sax,
116 void *user_data, int depth, const xmlChar *URL,
117 const xmlChar *ID, xmlNodePtr *list);
118
119#ifdef LIBXML_LEGACY_ENABLED
120static void
121xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
122 xmlNodePtr lastNode);
123#endif /* LIBXML_LEGACY_ENABLED */
124
125static xmlParserErrors
126xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
127 const xmlChar *string, void *user_data, xmlNodePtr *lst);
128
129/************************************************************************
130 * *
131 * Some factorized error routines *
132 * *
133 ************************************************************************/
134
135/**
136 * xmlErrAttributeDup:
137 * @ctxt: an XML parser context
138 * @prefix: the attribute prefix
139 * @localname: the attribute localname
140 *
141 * Handle a redefinition of attribute error
142 */
143static void
144xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
145 const xmlChar * localname)
146{
147 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
148 (ctxt->instate == XML_PARSER_EOF))
149 return;
150 if (ctxt != NULL)
151 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
152 if (prefix == NULL)
153 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
154 ctxt->errNo, XML_ERR_FATAL, NULL, 0,
155 (const char *) localname, NULL, NULL, 0, 0,
156 "Attribute %s redefined\n", localname);
157 else
158 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
159 ctxt->errNo, XML_ERR_FATAL, NULL, 0,
160 (const char *) prefix, (const char *) localname,
161 NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
162 localname);
163 if (ctxt != NULL) {
164 ctxt->wellFormed = 0;
165 if (ctxt->recovery == 0)
166 ctxt->disableSAX = 1;
167 }
168}
169
170/**
171 * xmlFatalErr:
172 * @ctxt: an XML parser context
173 * @error: the error number
174 * @extra: extra information string
175 *
176 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
177 */
178static void
179xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
180{
181 const char *errmsg;
182
183 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
184 (ctxt->instate == XML_PARSER_EOF))
185 return;
186 switch (error) {
187 case XML_ERR_INVALID_HEX_CHARREF:
188 errmsg = "CharRef: invalid hexadecimal value\n";
189 break;
190 case XML_ERR_INVALID_DEC_CHARREF:
191 errmsg = "CharRef: invalid decimal value\n";
192 break;
193 case XML_ERR_INVALID_CHARREF:
194 errmsg = "CharRef: invalid value\n";
195 break;
196 case XML_ERR_INTERNAL_ERROR:
197 errmsg = "internal error";
198 break;
199 case XML_ERR_PEREF_AT_EOF:
200 errmsg = "PEReference at end of document\n";
201 break;
202 case XML_ERR_PEREF_IN_PROLOG:
203 errmsg = "PEReference in prolog\n";
204 break;
205 case XML_ERR_PEREF_IN_EPILOG:
206 errmsg = "PEReference in epilog\n";
207 break;
208 case XML_ERR_PEREF_NO_NAME:
209 errmsg = "PEReference: no name\n";
210 break;
211 case XML_ERR_PEREF_SEMICOL_MISSING:
212 errmsg = "PEReference: expecting ';'\n";
213 break;
214 case XML_ERR_ENTITY_LOOP:
215 errmsg = "Detected an entity reference loop\n";
216 break;
217 case XML_ERR_ENTITY_NOT_STARTED:
218 errmsg = "EntityValue: \" or ' expected\n";
219 break;
220 case XML_ERR_ENTITY_PE_INTERNAL:
221 errmsg = "PEReferences forbidden in internal subset\n";
222 break;
223 case XML_ERR_ENTITY_NOT_FINISHED:
224 errmsg = "EntityValue: \" or ' expected\n";
225 break;
226 case XML_ERR_ATTRIBUTE_NOT_STARTED:
227 errmsg = "AttValue: \" or ' expected\n";
228 break;
229 case XML_ERR_LT_IN_ATTRIBUTE:
230 errmsg = "Unescaped '<' not allowed in attributes values\n";
231 break;
232 case XML_ERR_LITERAL_NOT_STARTED:
233 errmsg = "SystemLiteral \" or ' expected\n";
234 break;
235 case XML_ERR_LITERAL_NOT_FINISHED:
236 errmsg = "Unfinished System or Public ID \" or ' expected\n";
237 break;
238 case XML_ERR_MISPLACED_CDATA_END:
239 errmsg = "Sequence ']]>' not allowed in content\n";
240 break;
241 case XML_ERR_URI_REQUIRED:
242 errmsg = "SYSTEM or PUBLIC, the URI is missing\n";
243 break;
244 case XML_ERR_PUBID_REQUIRED:
245 errmsg = "PUBLIC, the Public Identifier is missing\n";
246 break;
247 case XML_ERR_HYPHEN_IN_COMMENT:
248 errmsg = "Comment must not contain '--' (double-hyphen)\n";
249 break;
250 case XML_ERR_PI_NOT_STARTED:
251 errmsg = "xmlParsePI : no target name\n";
252 break;
253 case XML_ERR_RESERVED_XML_NAME:
254 errmsg = "Invalid PI name\n";
255 break;
256 case XML_ERR_NOTATION_NOT_STARTED:
257 errmsg = "NOTATION: Name expected here\n";
258 break;
259 case XML_ERR_NOTATION_NOT_FINISHED:
260 errmsg = "'>' required to close NOTATION declaration\n";
261 break;
262 case XML_ERR_VALUE_REQUIRED:
263 errmsg = "Entity value required\n";
264 break;
265 case XML_ERR_URI_FRAGMENT:
266 errmsg = "Fragment not allowed";
267 break;
268 case XML_ERR_ATTLIST_NOT_STARTED:
269 errmsg = "'(' required to start ATTLIST enumeration\n";
270 break;
271 case XML_ERR_NMTOKEN_REQUIRED:
272 errmsg = "NmToken expected in ATTLIST enumeration\n";
273 break;
274 case XML_ERR_ATTLIST_NOT_FINISHED:
275 errmsg = "')' required to finish ATTLIST enumeration\n";
276 break;
277 case XML_ERR_MIXED_NOT_STARTED:
278 errmsg = "MixedContentDecl : '|' or ')*' expected\n";
279 break;
280 case XML_ERR_PCDATA_REQUIRED:
281 errmsg = "MixedContentDecl : '#PCDATA' expected\n";
282 break;
283 case XML_ERR_ELEMCONTENT_NOT_STARTED:
284 errmsg = "ContentDecl : Name or '(' expected\n";
285 break;
286 case XML_ERR_ELEMCONTENT_NOT_FINISHED:
287 errmsg = "ContentDecl : ',' '|' or ')' expected\n";
288 break;
289 case XML_ERR_PEREF_IN_INT_SUBSET:
290 errmsg =
291 "PEReference: forbidden within markup decl in internal subset\n";
292 break;
293 case XML_ERR_GT_REQUIRED:
294 errmsg = "expected '>'\n";
295 break;
296 case XML_ERR_CONDSEC_INVALID:
297 errmsg = "XML conditional section '[' expected\n";
298 break;
299 case XML_ERR_EXT_SUBSET_NOT_FINISHED:
300 errmsg = "Content error in the external subset\n";
301 break;
302 case XML_ERR_CONDSEC_INVALID_KEYWORD:
303 errmsg =
304 "conditional section INCLUDE or IGNORE keyword expected\n";
305 break;
306 case XML_ERR_CONDSEC_NOT_FINISHED:
307 errmsg = "XML conditional section not closed\n";
308 break;
309 case XML_ERR_XMLDECL_NOT_STARTED:
310 errmsg = "Text declaration '<?xml' required\n";
311 break;
312 case XML_ERR_XMLDECL_NOT_FINISHED:
313 errmsg = "parsing XML declaration: '?>' expected\n";
314 break;
315 case XML_ERR_EXT_ENTITY_STANDALONE:
316 errmsg = "external parsed entities cannot be standalone\n";
317 break;
318 case XML_ERR_ENTITYREF_SEMICOL_MISSING:
319 errmsg = "EntityRef: expecting ';'\n";
320 break;
321 case XML_ERR_DOCTYPE_NOT_FINISHED:
322 errmsg = "DOCTYPE improperly terminated\n";
323 break;
324 case XML_ERR_LTSLASH_REQUIRED:
325 errmsg = "EndTag: '</' not found\n";
326 break;
327 case XML_ERR_EQUAL_REQUIRED:
328 errmsg = "expected '='\n";
329 break;
330 case XML_ERR_STRING_NOT_CLOSED:
331 errmsg = "String not closed expecting \" or '\n";
332 break;
333 case XML_ERR_STRING_NOT_STARTED:
334 errmsg = "String not started expecting ' or \"\n";
335 break;
336 case XML_ERR_ENCODING_NAME:
337 errmsg = "Invalid XML encoding name\n";
338 break;
339 case XML_ERR_STANDALONE_VALUE:
340 errmsg = "standalone accepts only 'yes' or 'no'\n";
341 break;
342 case XML_ERR_DOCUMENT_EMPTY:
343 errmsg = "Document is empty\n";
344 break;
345 case XML_ERR_DOCUMENT_END:
346 errmsg = "Extra content at the end of the document\n";
347 break;
348 case XML_ERR_NOT_WELL_BALANCED:
349 errmsg = "chunk is not well balanced\n";
350 break;
351 case XML_ERR_EXTRA_CONTENT:
352 errmsg = "extra content at the end of well balanced chunk\n";
353 break;
354 case XML_ERR_VERSION_MISSING:
355 errmsg = "Malformed declaration expecting version\n";
356 break;
357#if 0
358 case:
359 errmsg = "\n";
360 break;
361#endif
362 default:
363 errmsg = "Unregistered error message\n";
364 }
365 if (ctxt != NULL)
366 ctxt->errNo = error;
367 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
368 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, errmsg,
369 info);
370 if (ctxt != NULL) {
371 ctxt->wellFormed = 0;
372 if (ctxt->recovery == 0)
373 ctxt->disableSAX = 1;
374 }
375}
376
377/**
378 * xmlFatalErrMsg:
379 * @ctxt: an XML parser context
380 * @error: the error number
381 * @msg: the error message
382 *
383 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
384 */
385static void
386xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
387 const char *msg)
388{
389 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
390 (ctxt->instate == XML_PARSER_EOF))
391 return;
392 if (ctxt != NULL)
393 ctxt->errNo = error;
394 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
395 XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, msg);
396 if (ctxt != NULL) {
397 ctxt->wellFormed = 0;
398 if (ctxt->recovery == 0)
399 ctxt->disableSAX = 1;
400 }
401}
402
403/**
404 * xmlWarningMsg:
405 * @ctxt: an XML parser context
406 * @error: the error number
407 * @msg: the error message
408 * @str1: extra data
409 * @str2: extra data
410 *
411 * Handle a warning.
412 */
413static void
414xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
415 const char *msg, const xmlChar *str1, const xmlChar *str2)
416{
417 xmlStructuredErrorFunc schannel = NULL;
418
419 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
420 (ctxt->instate == XML_PARSER_EOF))
421 return;
422 if ((ctxt != NULL) && (ctxt->sax != NULL) &&
423 (ctxt->sax->initialized == XML_SAX2_MAGIC))
424 schannel = ctxt->sax->serror;
425 __xmlRaiseError(schannel,
426 (ctxt->sax) ? ctxt->sax->warning : NULL,
427 ctxt->userData,
428 ctxt, NULL, XML_FROM_PARSER, error,
429 XML_ERR_WARNING, NULL, 0,
430 (const char *) str1, (const char *) str2, NULL, 0, 0,
431 msg, (const char *) str1, (const char *) str2);
432}
433
434/**
435 * xmlValidityError:
436 * @ctxt: an XML parser context
437 * @error: the error number
438 * @msg: the error message
439 * @str1: extra data
440 *
441 * Handle a validity error.
442 */
443static void
444xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
445 const char *msg, const xmlChar *str1)
446{
447 xmlStructuredErrorFunc schannel = NULL;
448
449 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
450 (ctxt->instate == XML_PARSER_EOF))
451 return;
452 if (ctxt != NULL) {
453 ctxt->errNo = error;
454 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
455 schannel = ctxt->sax->serror;
456 }
457 __xmlRaiseError(schannel,
458 ctxt->vctxt.error, ctxt->vctxt.userData,
459 ctxt, NULL, XML_FROM_DTD, error,
460 XML_ERR_ERROR, NULL, 0, (const char *) str1,
461 NULL, NULL, 0, 0,
462 msg, (const char *) str1);
463 if (ctxt != NULL) {
464 ctxt->valid = 0;
465 }
466}
467
468/**
469 * xmlFatalErrMsgInt:
470 * @ctxt: an XML parser context
471 * @error: the error number
472 * @msg: the error message
473 * @val: an integer value
474 *
475 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
476 */
477static void
478xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
479 const char *msg, int val)
480{
481 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
482 (ctxt->instate == XML_PARSER_EOF))
483 return;
484 if (ctxt != NULL)
485 ctxt->errNo = error;
486 __xmlRaiseError(NULL, NULL, NULL,
487 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
488 NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
489 if (ctxt != NULL) {
490 ctxt->wellFormed = 0;
491 if (ctxt->recovery == 0)
492 ctxt->disableSAX = 1;
493 }
494}
495
496/**
497 * xmlFatalErrMsgStrIntStr:
498 * @ctxt: an XML parser context
499 * @error: the error number
500 * @msg: the error message
501 * @str1: an string info
502 * @val: an integer value
503 * @str2: an string info
504 *
505 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
506 */
507static void
508xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
509 const char *msg, const xmlChar *str1, int val,
510 const xmlChar *str2)
511{
512 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
513 (ctxt->instate == XML_PARSER_EOF))
514 return;
515 if (ctxt != NULL)
516 ctxt->errNo = error;
517 __xmlRaiseError(NULL, NULL, NULL,
518 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
519 NULL, 0, (const char *) str1, (const char *) str2,
520 NULL, val, 0, msg, str1, val, str2);
521 if (ctxt != NULL) {
522 ctxt->wellFormed = 0;
523 if (ctxt->recovery == 0)
524 ctxt->disableSAX = 1;
525 }
526}
527
528/**
529 * xmlFatalErrMsgStr:
530 * @ctxt: an XML parser context
531 * @error: the error number
532 * @msg: the error message
533 * @val: a string value
534 *
535 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
536 */
537static void
538xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
539 const char *msg, const xmlChar * val)
540{
541 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
542 (ctxt->instate == XML_PARSER_EOF))
543 return;
544 if (ctxt != NULL)
545 ctxt->errNo = error;
546 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
547 XML_FROM_PARSER, error, XML_ERR_FATAL,
548 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
549 val);
550 if (ctxt != NULL) {
551 ctxt->wellFormed = 0;
552 if (ctxt->recovery == 0)
553 ctxt->disableSAX = 1;
554 }
555}
556
557/**
558 * xmlErrMsgStr:
559 * @ctxt: an XML parser context
560 * @error: the error number
561 * @msg: the error message
562 * @val: a string value
563 *
564 * Handle a non fatal parser error
565 */
566static void
567xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
568 const char *msg, const xmlChar * val)
569{
570 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
571 (ctxt->instate == XML_PARSER_EOF))
572 return;
573 if (ctxt != NULL)
574 ctxt->errNo = error;
575 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
576 XML_FROM_PARSER, error, XML_ERR_ERROR,
577 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
578 val);
579}
580
581/**
582 * xmlNsErr:
583 * @ctxt: an XML parser context
584 * @error: the error number
585 * @msg: the message
586 * @info1: extra information string
587 * @info2: extra information string
588 *
589 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
590 */
591static void
592xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
593 const char *msg,
594 const xmlChar * info1, const xmlChar * info2,
595 const xmlChar * info3)
596{
597 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
598 (ctxt->instate == XML_PARSER_EOF))
599 return;
600 if (ctxt != NULL)
601 ctxt->errNo = error;
602 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
603 XML_ERR_ERROR, NULL, 0, (const char *) info1,
604 (const char *) info2, (const char *) info3, 0, 0, msg,
605 info1, info2, info3);
606 if (ctxt != NULL)
607 ctxt->nsWellFormed = 0;
608}
609
610/************************************************************************
611 * *
612 * Library wide options *
613 * *
614 ************************************************************************/
615
616/**
617 * xmlHasFeature:
618 * @feature: the feature to be examined
619 *
620 * Examines if the library has been compiled with a given feature.
621 *
622 * Returns a non-zero value if the feature exist, otherwise zero.
623 * Returns zero (0) if the feature does not exist or an unknown
624 * unknown feature is requested, non-zero otherwise.
625 */
626int
627xmlHasFeature(xmlFeature feature)
628{
629 switch (feature) {
630 case XML_WITH_THREAD:
631#ifdef LIBXML_THREAD_ENABLED
632 return(1);
633#else
634 return(0);
635#endif
636 case XML_WITH_TREE:
637#ifdef LIBXML_TREE_ENABLED
638 return(1);
639#else
640 return(0);
641#endif
642 case XML_WITH_OUTPUT:
643#ifdef LIBXML_OUTPUT_ENABLED
644 return(1);
645#else
646 return(0);
647#endif
648 case XML_WITH_PUSH:
649#ifdef LIBXML_PUSH_ENABLED
650 return(1);
651#else
652 return(0);
653#endif
654 case XML_WITH_READER:
655#ifdef LIBXML_READER_ENABLED
656 return(1);
657#else
658 return(0);
659#endif
660 case XML_WITH_PATTERN:
661#ifdef LIBXML_PATTERN_ENABLED
662 return(1);
663#else
664 return(0);
665#endif
666 case XML_WITH_WRITER:
667#ifdef LIBXML_WRITER_ENABLED
668 return(1);
669#else
670 return(0);
671#endif
672 case XML_WITH_SAX1:
673#ifdef LIBXML_SAX1_ENABLED
674 return(1);
675#else
676 return(0);
677#endif
678 case XML_WITH_FTP:
679#ifdef LIBXML_FTP_ENABLED
680 return(1);
681#else
682 return(0);
683#endif
684 case XML_WITH_HTTP:
685#ifdef LIBXML_HTTP_ENABLED
686 return(1);
687#else
688 return(0);
689#endif
690 case XML_WITH_VALID:
691#ifdef LIBXML_VALID_ENABLED
692 return(1);
693#else
694 return(0);
695#endif
696 case XML_WITH_HTML:
697#ifdef LIBXML_HTML_ENABLED
698 return(1);
699#else
700 return(0);
701#endif
702 case XML_WITH_LEGACY:
703#ifdef LIBXML_LEGACY_ENABLED
704 return(1);
705#else
706 return(0);
707#endif
708 case XML_WITH_C14N:
709#ifdef LIBXML_C14N_ENABLED
710 return(1);
711#else
712 return(0);
713#endif
714 case XML_WITH_CATALOG:
715#ifdef LIBXML_CATALOG_ENABLED
716 return(1);
717#else
718 return(0);
719#endif
720 case XML_WITH_XPATH:
721#ifdef LIBXML_XPATH_ENABLED
722 return(1);
723#else
724 return(0);
725#endif
726 case XML_WITH_XPTR:
727#ifdef LIBXML_XPTR_ENABLED
728 return(1);
729#else
730 return(0);
731#endif
732 case XML_WITH_XINCLUDE:
733#ifdef LIBXML_XINCLUDE_ENABLED
734 return(1);
735#else
736 return(0);
737#endif
738 case XML_WITH_ICONV:
739#ifdef LIBXML_ICONV_ENABLED
740 return(1);
741#else
742 return(0);
743#endif
744 case XML_WITH_ISO8859X:
745#ifdef LIBXML_ISO8859X_ENABLED
746 return(1);
747#else
748 return(0);
749#endif
750 case XML_WITH_UNICODE:
751#ifdef LIBXML_UNICODE_ENABLED
752 return(1);
753#else
754 return(0);
755#endif
756 case XML_WITH_REGEXP:
757#ifdef LIBXML_REGEXP_ENABLED
758 return(1);
759#else
760 return(0);
761#endif
762 case XML_WITH_AUTOMATA:
763#ifdef LIBXML_AUTOMATA_ENABLED
764 return(1);
765#else
766 return(0);
767#endif
768 case XML_WITH_EXPR:
769#ifdef LIBXML_EXPR_ENABLED
770 return(1);
771#else
772 return(0);
773#endif
774 case XML_WITH_SCHEMAS:
775#ifdef LIBXML_SCHEMAS_ENABLED
776 return(1);
777#else
778 return(0);
779#endif
780 case XML_WITH_SCHEMATRON:
781#ifdef LIBXML_SCHEMATRON_ENABLED
782 return(1);
783#else
784 return(0);
785#endif
786 case XML_WITH_MODULES:
787#ifdef LIBXML_MODULES_ENABLED
788 return(1);
789#else
790 return(0);
791#endif
792 case XML_WITH_DEBUG:
793#ifdef LIBXML_DEBUG_ENABLED
794 return(1);
795#else
796 return(0);
797#endif
798 case XML_WITH_DEBUG_MEM:
799#ifdef DEBUG_MEMORY_LOCATION
800 return(1);
801#else
802 return(0);
803#endif
804 case XML_WITH_DEBUG_RUN:
805#ifdef LIBXML_DEBUG_RUNTIME
806 return(1);
807#else
808 return(0);
809#endif
810 case XML_WITH_ZLIB:
811#ifdef LIBXML_ZLIB_ENABLED
812 return(1);
813#else
814 return(0);
815#endif
816 default:
817 break;
818 }
819 return(0);
820}
821
822/************************************************************************
823 * *
824 * SAX2 defaulted attributes handling *
825 * *
826 ************************************************************************/
827
828/**
829 * xmlDetectSAX2:
830 * @ctxt: an XML parser context
831 *
832 * Do the SAX2 detection and specific intialization
833 */
834static void
835xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
836 if (ctxt == NULL) return;
837#ifdef LIBXML_SAX1_ENABLED
838 if ((ctxt->sax) && (ctxt->sax->initialized == XML_SAX2_MAGIC) &&
839 ((ctxt->sax->startElementNs != NULL) ||
840 (ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1;
841#else
842 ctxt->sax2 = 1;
843#endif /* LIBXML_SAX1_ENABLED */
844
845 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
846 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
847 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
848 if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
849 (ctxt->str_xml_ns == NULL)) {
850 xmlErrMemory(ctxt, NULL);
851 }
852}
853
854typedef struct _xmlDefAttrs xmlDefAttrs;
855typedef xmlDefAttrs *xmlDefAttrsPtr;
856struct _xmlDefAttrs {
857 int nbAttrs; /* number of defaulted attributes on that element */
858 int maxAttrs; /* the size of the array */
859 const xmlChar *values[4]; /* array of localname/prefix/values */
860};
861
862/**
863 * xmlAddDefAttrs:
864 * @ctxt: an XML parser context
865 * @fullname: the element fullname
866 * @fullattr: the attribute fullname
867 * @value: the attribute value
868 *
869 * Add a defaulted attribute for an element
870 */
871static void
872xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
873 const xmlChar *fullname,
874 const xmlChar *fullattr,
875 const xmlChar *value) {
876 xmlDefAttrsPtr defaults;
877 int len;
878 const xmlChar *name;
879 const xmlChar *prefix;
880
881 if (ctxt->attsDefault == NULL) {
882 ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
883 if (ctxt->attsDefault == NULL)
884 goto mem_error;
885 }
886
887 /*
888 * split the element name into prefix:localname , the string found
889 * are within the DTD and then not associated to namespace names.
890 */
891 name = xmlSplitQName3(fullname, &len);
892 if (name == NULL) {
893 name = xmlDictLookup(ctxt->dict, fullname, -1);
894 prefix = NULL;
895 } else {
896 name = xmlDictLookup(ctxt->dict, name, -1);
897 prefix = xmlDictLookup(ctxt->dict, fullname, len);
898 }
899
900 /*
901 * make sure there is some storage
902 */
903 defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
904 if (defaults == NULL) {
905 defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
906 (4 * 4) * sizeof(const xmlChar *));
907 if (defaults == NULL)
908 goto mem_error;
909 defaults->nbAttrs = 0;
910 defaults->maxAttrs = 4;
911 xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, defaults, NULL);
912 } else if (defaults->nbAttrs >= defaults->maxAttrs) {
913 xmlDefAttrsPtr temp;
914
915 temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
916 (2 * defaults->maxAttrs * 4) * sizeof(const xmlChar *));
917 if (temp == NULL)
918 goto mem_error;
919 defaults = temp;
920 defaults->maxAttrs *= 2;
921 xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, defaults, NULL);
922 }
923
924 /*
925 * Split the element name into prefix:localname , the string found
926 * are within the DTD and hen not associated to namespace names.
927 */
928 name = xmlSplitQName3(fullattr, &len);
929 if (name == NULL) {
930 name = xmlDictLookup(ctxt->dict, fullattr, -1);
931 prefix = NULL;
932 } else {
933 name = xmlDictLookup(ctxt->dict, name, -1);
934 prefix = xmlDictLookup(ctxt->dict, fullattr, len);
935 }
936
937 defaults->values[4 * defaults->nbAttrs] = name;
938 defaults->values[4 * defaults->nbAttrs + 1] = prefix;
939 /* intern the string and precompute the end */
940 len = xmlStrlen(value);
941 value = xmlDictLookup(ctxt->dict, value, len);
942 defaults->values[4 * defaults->nbAttrs + 2] = value;
943 defaults->values[4 * defaults->nbAttrs + 3] = value + len;
944 defaults->nbAttrs++;
945
946 return;
947
948mem_error:
949 xmlErrMemory(ctxt, NULL);
950 return;
951}
952
953/**
954 * xmlAddSpecialAttr:
955 * @ctxt: an XML parser context
956 * @fullname: the element fullname
957 * @fullattr: the attribute fullname
958 * @type: the attribute type
959 *
960 * Register that this attribute is not CDATA
961 */
962static void
963xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
964 const xmlChar *fullname,
965 const xmlChar *fullattr,
966 int type)
967{
968 if (ctxt->attsSpecial == NULL) {
969 ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
970 if (ctxt->attsSpecial == NULL)
971 goto mem_error;
972 }
973
974 xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
975 (void *) (long) type);
976 return;
977
978mem_error:
979 xmlErrMemory(ctxt, NULL);
980 return;
981}
982
983/**
984 * xmlCheckLanguageID:
985 * @lang: pointer to the string value
986 *
987 * Checks that the value conforms to the LanguageID production:
988 *
989 * NOTE: this is somewhat deprecated, those productions were removed from
990 * the XML Second edition.
991 *
992 * [33] LanguageID ::= Langcode ('-' Subcode)*
993 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
994 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
995 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
996 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
997 * [38] Subcode ::= ([a-z] | [A-Z])+
998 *
999 * Returns 1 if correct 0 otherwise
1000 **/
1001int
1002xmlCheckLanguageID(const xmlChar * lang)
1003{
1004 const xmlChar *cur = lang;
1005
1006 if (cur == NULL)
1007 return (0);
1008 if (((cur[0] == 'i') && (cur[1] == '-')) ||
1009 ((cur[0] == 'I') && (cur[1] == '-'))) {
1010 /*
1011 * IANA code
1012 */
1013 cur += 2;
1014 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
1015 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1016 cur++;
1017 } else if (((cur[0] == 'x') && (cur[1] == '-')) ||
1018 ((cur[0] == 'X') && (cur[1] == '-'))) {
1019 /*
1020 * User code
1021 */
1022 cur += 2;
1023 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
1024 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1025 cur++;
1026 } else if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1027 ((cur[0] >= 'a') && (cur[0] <= 'z'))) {
1028 /*
1029 * ISO639
1030 */
1031 cur++;
1032 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1033 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1034 cur++;
1035 else
1036 return (0);
1037 } else
1038 return (0);
1039 while (cur[0] != 0) { /* non input consuming */
1040 if (cur[0] != '-')
1041 return (0);
1042 cur++;
1043 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1044 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1045 cur++;
1046 else
1047 return (0);
1048 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
1049 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1050 cur++;
1051 }
1052 return (1);
1053}
1054
1055/************************************************************************
1056 * *
1057 * Parser stacks related functions and macros *
1058 * *
1059 ************************************************************************/
1060
1061xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
1062 const xmlChar ** str);
1063
1064#ifdef SAX2
1065/**
1066 * nsPush:
1067 * @ctxt: an XML parser context
1068 * @prefix: the namespace prefix or NULL
1069 * @URL: the namespace name
1070 *
1071 * Pushes a new parser namespace on top of the ns stack
1072 *
1073 * Returns -1 in case of error, -2 if the namespace should be discarded
1074 * and the index in the stack otherwise.
1075 */
1076static int
1077nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
1078{
1079 if (ctxt->options & XML_PARSE_NSCLEAN) {
1080 int i;
1081 for (i = 0;i < ctxt->nsNr;i += 2) {
1082 if (ctxt->nsTab[i] == prefix) {
1083 /* in scope */
1084 if (ctxt->nsTab[i + 1] == URL)
1085 return(-2);
1086 /* out of scope keep it */
1087 break;
1088 }
1089 }
1090 }
1091 if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
1092 ctxt->nsMax = 10;
1093 ctxt->nsNr = 0;
1094 ctxt->nsTab = (const xmlChar **)
1095 xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
1096 if (ctxt->nsTab == NULL) {
1097 xmlErrMemory(ctxt, NULL);
1098 ctxt->nsMax = 0;
1099 return (-1);
1100 }
1101 } else if (ctxt->nsNr >= ctxt->nsMax) {
1102 ctxt->nsMax *= 2;
1103 ctxt->nsTab = (const xmlChar **)
1104 xmlRealloc((char *) ctxt->nsTab,
1105 ctxt->nsMax * sizeof(ctxt->nsTab[0]));
1106 if (ctxt->nsTab == NULL) {
1107 xmlErrMemory(ctxt, NULL);
1108 ctxt->nsMax /= 2;
1109 return (-1);
1110 }
1111 }
1112 ctxt->nsTab[ctxt->nsNr++] = prefix;
1113 ctxt->nsTab[ctxt->nsNr++] = URL;
1114 return (ctxt->nsNr);
1115}
1116/**
1117 * nsPop:
1118 * @ctxt: an XML parser context
1119 * @nr: the number to pop
1120 *
1121 * Pops the top @nr parser prefix/namespace from the ns stack
1122 *
1123 * Returns the number of namespaces removed
1124 */
1125static int
1126nsPop(xmlParserCtxtPtr ctxt, int nr)
1127{
1128 int i;
1129
1130 if (ctxt->nsTab == NULL) return(0);
1131 if (ctxt->nsNr < nr) {
1132 xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
1133 nr = ctxt->nsNr;
1134 }
1135 if (ctxt->nsNr <= 0)
1136 return (0);
1137
1138 for (i = 0;i < nr;i++) {
1139 ctxt->nsNr--;
1140 ctxt->nsTab[ctxt->nsNr] = NULL;
1141 }
1142 return(nr);
1143}
1144#endif
1145
1146static int
1147xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1148 const xmlChar **atts;
1149 int *attallocs;
1150 int maxatts;
1151
1152 if (ctxt->atts == NULL) {
1153 maxatts = 55; /* allow for 10 attrs by default */
1154 atts = (const xmlChar **)
1155 xmlMalloc(maxatts * sizeof(xmlChar *));
1156 if (atts == NULL) goto mem_error;
1157 ctxt->atts = atts;
1158 attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
1159 if (attallocs == NULL) goto mem_error;
1160 ctxt->attallocs = attallocs;
1161 ctxt->maxatts = maxatts;
1162 } else if (nr + 5 > ctxt->maxatts) {
1163 maxatts = (nr + 5) * 2;
1164 atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
1165 maxatts * sizeof(const xmlChar *));
1166 if (atts == NULL) goto mem_error;
1167 ctxt->atts = atts;
1168 attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
1169 (maxatts / 5) * sizeof(int));
1170 if (attallocs == NULL) goto mem_error;
1171 ctxt->attallocs = attallocs;
1172 ctxt->maxatts = maxatts;
1173 }
1174 return(ctxt->maxatts);
1175mem_error:
1176 xmlErrMemory(ctxt, NULL);
1177 return(-1);
1178}
1179
1180/**
1181 * inputPush:
1182 * @ctxt: an XML parser context
1183 * @value: the parser input
1184 *
1185 * Pushes a new parser input on top of the input stack
1186 *
1187 * Returns 0 in case of error, the index in the stack otherwise
1188 */
1189int
1190inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1191{
1192 if ((ctxt == NULL) || (value == NULL))
1193 return(0);
1194 if (ctxt->inputNr >= ctxt->inputMax) {
1195 ctxt->inputMax *= 2;
1196 ctxt->inputTab =
1197 (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1198 ctxt->inputMax *
1199 sizeof(ctxt->inputTab[0]));
1200 if (ctxt->inputTab == NULL) {
1201 xmlErrMemory(ctxt, NULL);
1202 return (0);
1203 }
1204 }
1205 ctxt->inputTab[ctxt->inputNr] = value;
1206 ctxt->input = value;
1207 return (ctxt->inputNr++);
1208}
1209/**
1210 * inputPop:
1211 * @ctxt: an XML parser context
1212 *
1213 * Pops the top parser input from the input stack
1214 *
1215 * Returns the input just removed
1216 */
1217xmlParserInputPtr
1218inputPop(xmlParserCtxtPtr ctxt)
1219{
1220 xmlParserInputPtr ret;
1221
1222 if (ctxt == NULL)
1223 return(NULL);
1224 if (ctxt->inputNr <= 0)
1225 return (NULL);
1226 ctxt->inputNr--;
1227 if (ctxt->inputNr > 0)
1228 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1229 else
1230 ctxt->input = NULL;
1231 ret = ctxt->inputTab[ctxt->inputNr];
1232 ctxt->inputTab[ctxt->inputNr] = NULL;
1233 return (ret);
1234}
1235/**
1236 * nodePush:
1237 * @ctxt: an XML parser context
1238 * @value: the element node
1239 *
1240 * Pushes a new element node on top of the node stack
1241 *
1242 * Returns 0 in case of error, the index in the stack otherwise
1243 */
1244int
1245nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1246{
1247 if (ctxt == NULL) return(0);
1248 if (ctxt->nodeNr >= ctxt->nodeMax) {
1249 xmlNodePtr *tmp;
1250
1251 tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1252 ctxt->nodeMax * 2 *
1253 sizeof(ctxt->nodeTab[0]));
1254 if (tmp == NULL) {
1255 xmlErrMemory(ctxt, NULL);
1256 return (0);
1257 }
1258 ctxt->nodeTab = tmp;
1259 ctxt->nodeMax *= 2;
1260 }
1261 if (((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) {
1262 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
1263 "Excessive depth in document: change xmlParserMaxDepth = %d\n",
1264 xmlParserMaxDepth);
1265 ctxt->instate = XML_PARSER_EOF;
1266 return(0);
1267 }
1268 ctxt->nodeTab[ctxt->nodeNr] = value;
1269 ctxt->node = value;
1270 return (ctxt->nodeNr++);
1271}
1272/**
1273 * nodePop:
1274 * @ctxt: an XML parser context
1275 *
1276 * Pops the top element node from the node stack
1277 *
1278 * Returns the node just removed
1279 */
1280xmlNodePtr
1281nodePop(xmlParserCtxtPtr ctxt)
1282{
1283 xmlNodePtr ret;
1284
1285 if (ctxt == NULL) return(NULL);
1286 if (ctxt->nodeNr <= 0)
1287 return (NULL);
1288 ctxt->nodeNr--;
1289 if (ctxt->nodeNr > 0)
1290 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1291 else
1292 ctxt->node = NULL;
1293 ret = ctxt->nodeTab[ctxt->nodeNr];
1294 ctxt->nodeTab[ctxt->nodeNr] = NULL;
1295 return (ret);
1296}
1297
1298#ifdef LIBXML_PUSH_ENABLED
1299/**
1300 * nameNsPush:
1301 * @ctxt: an XML parser context
1302 * @value: the element name
1303 * @prefix: the element prefix
1304 * @URI: the element namespace name
1305 *
1306 * Pushes a new element name/prefix/URL on top of the name stack
1307 *
1308 * Returns -1 in case of error, the index in the stack otherwise
1309 */
1310static int
1311nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1312 const xmlChar *prefix, const xmlChar *URI, int nsNr)
1313{
1314 if (ctxt->nameNr >= ctxt->nameMax) {
1315 const xmlChar * *tmp;
1316 void **tmp2;
1317 ctxt->nameMax *= 2;
1318 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1319 ctxt->nameMax *
1320 sizeof(ctxt->nameTab[0]));
1321 if (tmp == NULL) {
1322 ctxt->nameMax /= 2;
1323 goto mem_error;
1324 }
1325 ctxt->nameTab = tmp;
1326 tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab,
1327 ctxt->nameMax * 3 *
1328 sizeof(ctxt->pushTab[0]));
1329 if (tmp2 == NULL) {
1330 ctxt->nameMax /= 2;
1331 goto mem_error;
1332 }
1333 ctxt->pushTab = tmp2;
1334 }
1335 ctxt->nameTab[ctxt->nameNr] = value;
1336 ctxt->name = value;
1337 ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix;
1338 ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI;
1339 ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (long) nsNr;
1340 return (ctxt->nameNr++);
1341mem_error:
1342 xmlErrMemory(ctxt, NULL);
1343 return (-1);
1344}
1345/**
1346 * nameNsPop:
1347 * @ctxt: an XML parser context
1348 *
1349 * Pops the top element/prefix/URI name from the name stack
1350 *
1351 * Returns the name just removed
1352 */
1353static const xmlChar *
1354nameNsPop(xmlParserCtxtPtr ctxt)
1355{
1356 const xmlChar *ret;
1357
1358 if (ctxt->nameNr <= 0)
1359 return (NULL);
1360 ctxt->nameNr--;
1361 if (ctxt->nameNr > 0)
1362 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1363 else
1364 ctxt->name = NULL;
1365 ret = ctxt->nameTab[ctxt->nameNr];
1366 ctxt->nameTab[ctxt->nameNr] = NULL;
1367 return (ret);
1368}
1369#endif /* LIBXML_PUSH_ENABLED */
1370
1371/**
1372 * namePush:
1373 * @ctxt: an XML parser context
1374 * @value: the element name
1375 *
1376 * Pushes a new element name on top of the name stack
1377 *
1378 * Returns -1 in case of error, the index in the stack otherwise
1379 */
1380int
1381namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
1382{
1383 if (ctxt == NULL) return (-1);
1384
1385 if (ctxt->nameNr >= ctxt->nameMax) {
1386 const xmlChar * *tmp;
1387 ctxt->nameMax *= 2;
1388 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1389 ctxt->nameMax *
1390 sizeof(ctxt->nameTab[0]));
1391 if (tmp == NULL) {
1392 ctxt->nameMax /= 2;
1393 goto mem_error;
1394 }
1395 ctxt->nameTab = tmp;
1396 }
1397 ctxt->nameTab[ctxt->nameNr] = value;
1398 ctxt->name = value;
1399 return (ctxt->nameNr++);
1400mem_error:
1401 xmlErrMemory(ctxt, NULL);
1402 return (-1);
1403}
1404/**
1405 * namePop:
1406 * @ctxt: an XML parser context
1407 *
1408 * Pops the top element name from the name stack
1409 *
1410 * Returns the name just removed
1411 */
1412const xmlChar *
1413namePop(xmlParserCtxtPtr ctxt)
1414{
1415 const xmlChar *ret;
1416
1417 if ((ctxt == NULL) || (ctxt->nameNr <= 0))
1418 return (NULL);
1419 ctxt->nameNr--;
1420 if (ctxt->nameNr > 0)
1421 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1422 else
1423 ctxt->name = NULL;
1424 ret = ctxt->nameTab[ctxt->nameNr];
1425 ctxt->nameTab[ctxt->nameNr] = NULL;
1426 return (ret);
1427}
1428
1429static int spacePush(xmlParserCtxtPtr ctxt, int val) {
1430 if (ctxt->spaceNr >= ctxt->spaceMax) {
1431 ctxt->spaceMax *= 2;
1432 ctxt->spaceTab = (int *) xmlRealloc(ctxt->spaceTab,
1433 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
1434 if (ctxt->spaceTab == NULL) {
1435 xmlErrMemory(ctxt, NULL);
1436 return(0);
1437 }
1438 }
1439 ctxt->spaceTab[ctxt->spaceNr] = val;
1440 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
1441 return(ctxt->spaceNr++);
1442}
1443
1444static int spacePop(xmlParserCtxtPtr ctxt) {
1445 int ret;
1446 if (ctxt->spaceNr <= 0) return(0);
1447 ctxt->spaceNr--;
1448 if (ctxt->spaceNr > 0)
1449 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
1450 else
1451 ctxt->space = &ctxt->spaceTab[0];
1452 ret = ctxt->spaceTab[ctxt->spaceNr];
1453 ctxt->spaceTab[ctxt->spaceNr] = -1;
1454 return(ret);
1455}
1456
1457/*
1458 * Macros for accessing the content. Those should be used only by the parser,
1459 * and not exported.
1460 *
1461 * Dirty macros, i.e. one often need to make assumption on the context to
1462 * use them
1463 *
1464 * CUR_PTR return the current pointer to the xmlChar to be parsed.
1465 * To be used with extreme caution since operations consuming
1466 * characters may move the input buffer to a different location !
1467 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
1468 * This should be used internally by the parser
1469 * only to compare to ASCII values otherwise it would break when
1470 * running with UTF-8 encoding.
1471 * RAW same as CUR but in the input buffer, bypass any token
1472 * extraction that may have been done
1473 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
1474 * to compare on ASCII based substring.
1475 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
1476 * strings without newlines within the parser.
1477 * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
1478 * defined char within the parser.
1479 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
1480 *
1481 * NEXT Skip to the next character, this does the proper decoding
1482 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
1483 * NEXTL(l) Skip the current unicode character of l xmlChars long.
1484 * CUR_CHAR(l) returns the current unicode character (int), set l
1485 * to the number of xmlChars used for the encoding [0-5].
1486 * CUR_SCHAR same but operate on a string instead of the context
1487 * COPY_BUF copy the current unicode char to the target buffer, increment
1488 * the index
1489 * GROW, SHRINK handling of input buffers
1490 */
1491
1492#define RAW (*ctxt->input->cur)
1493#define CUR (*ctxt->input->cur)
1494#define NXT(val) ctxt->input->cur[(val)]
1495#define CUR_PTR ctxt->input->cur
1496
1497#define CMP4( s, c1, c2, c3, c4 ) \
1498 ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
1499 ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
1500#define CMP5( s, c1, c2, c3, c4, c5 ) \
1501 ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
1502#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
1503 ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
1504#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
1505 ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
1506#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
1507 ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
1508#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
1509 ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
1510 ((unsigned char *) s)[ 8 ] == c9 )
1511#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
1512 ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
1513 ((unsigned char *) s)[ 9 ] == c10 )
1514
1515#define SKIP(val) do { \
1516 ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \
1517 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
1518 if ((*ctxt->input->cur == 0) && \
1519 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1520 xmlPopInput(ctxt); \
1521 } while (0)
1522
1523#define SKIPL(val) do { \
1524 int skipl; \
1525 for(skipl=0; skipl<val; skipl++) { \
1526 if (*(ctxt->input->cur) == '\n') { \
1527 ctxt->input->line++; ctxt->input->col = 1; \
1528 } else ctxt->input->col++; \
1529 ctxt->nbChars++; \
1530 ctxt->input->cur++; \
1531 } \
1532 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
1533 if ((*ctxt->input->cur == 0) && \
1534 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1535 xmlPopInput(ctxt); \
1536 } while (0)
1537
1538#define SHRINK if ((ctxt->progressive == 0) && \
1539 (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
1540 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
1541 xmlSHRINK (ctxt);
1542
1543static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
1544 xmlParserInputShrink(ctxt->input);
1545 if ((*ctxt->input->cur == 0) &&
1546 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1547 xmlPopInput(ctxt);
1548 }
1549
1550#define GROW if ((ctxt->progressive == 0) && \
1551 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
1552 xmlGROW (ctxt);
1553
1554static void xmlGROW (xmlParserCtxtPtr ctxt) {
1555 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1556 if ((*ctxt->input->cur == 0) &&
1557 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1558 xmlPopInput(ctxt);
1559}
1560
1561#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
1562
1563#define NEXT xmlNextChar(ctxt)
1564
1565#define NEXT1 { \
1566 ctxt->input->col++; \
1567 ctxt->input->cur++; \
1568 ctxt->nbChars++; \
1569 if (*ctxt->input->cur == 0) \
1570 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
1571 }
1572
1573#define NEXTL(l) do { \
1574 if (*(ctxt->input->cur) == '\n') { \
1575 ctxt->input->line++; ctxt->input->col = 1; \
1576 } else ctxt->input->col++; \
1577 ctxt->input->cur += l; \
1578 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
1579 } while (0)
1580
1581#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
1582#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
1583
1584#define COPY_BUF(l,b,i,v) \
1585 if (l == 1) b[i++] = (xmlChar) v; \
1586 else i += xmlCopyCharMultiByte(&b[i],v)
1587
1588/**
1589 * xmlSkipBlankChars:
1590 * @ctxt: the XML parser context
1591 *
1592 * skip all blanks character found at that point in the input streams.
1593 * It pops up finished entities in the process if allowable at that point.
1594 *
1595 * Returns the number of space chars skipped
1596 */
1597
1598int
1599xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
1600 int res = 0;
1601
1602 /*
1603 * It's Okay to use CUR/NEXT here since all the blanks are on
1604 * the ASCII range.
1605 */
1606 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
1607 const xmlChar *cur;
1608 /*
1609 * if we are in the document content, go really fast
1610 */
1611 cur = ctxt->input->cur;
1612 while (IS_BLANK_CH(*cur)) {
1613 if (*cur == '\n') {
1614 ctxt->input->line++; ctxt->input->col = 1;
1615 }
1616 cur++;
1617 res++;
1618 if (*cur == 0) {
1619 ctxt->input->cur = cur;
1620 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1621 cur = ctxt->input->cur;
1622 }
1623 }
1624 ctxt->input->cur = cur;
1625 } else {
1626 int cur;
1627 do {
1628 cur = CUR;
1629 while (IS_BLANK_CH(cur)) { /* CHECKED tstblanks.xml */
1630 NEXT;
1631 cur = CUR;
1632 res++;
1633 }
1634 while ((cur == 0) && (ctxt->inputNr > 1) &&
1635 (ctxt->instate != XML_PARSER_COMMENT)) {
1636 xmlPopInput(ctxt);
1637 cur = CUR;
1638 }
1639 /*
1640 * Need to handle support of entities branching here
1641 */
1642 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
1643 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
1644 }
1645 return(res);
1646}
1647
1648/************************************************************************
1649 * *
1650 * Commodity functions to handle entities *
1651 * *
1652 ************************************************************************/
1653
1654/**
1655 * xmlPopInput:
1656 * @ctxt: an XML parser context
1657 *
1658 * xmlPopInput: the current input pointed by ctxt->input came to an end
1659 * pop it and return the next char.
1660 *
1661 * Returns the current xmlChar in the parser context
1662 */
1663xmlChar
1664xmlPopInput(xmlParserCtxtPtr ctxt) {
1665 if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
1666 if (xmlParserDebugEntities)
1667 xmlGenericError(xmlGenericErrorContext,
1668 "Popping input %d\n", ctxt->inputNr);
1669 xmlFreeInputStream(inputPop(ctxt));
1670 if ((*ctxt->input->cur == 0) &&
1671 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1672 return(xmlPopInput(ctxt));
1673 return(CUR);
1674}
1675
1676/**
1677 * xmlPushInput:
1678 * @ctxt: an XML parser context
1679 * @input: an XML parser input fragment (entity, XML fragment ...).
1680 *
1681 * xmlPushInput: switch to a new input stream which is stacked on top
1682 * of the previous one(s).
1683 */
1684void
1685xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
1686 if (input == NULL) return;
1687
1688 if (xmlParserDebugEntities) {
1689 if ((ctxt->input != NULL) && (ctxt->input->filename))
1690 xmlGenericError(xmlGenericErrorContext,
1691 "%s(%d): ", ctxt->input->filename,
1692 ctxt->input->line);
1693 xmlGenericError(xmlGenericErrorContext,
1694 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
1695 }
1696 inputPush(ctxt, input);
1697 GROW;
1698}
1699
1700/**
1701 * xmlParseCharRef:
1702 * @ctxt: an XML parser context
1703 *
1704 * parse Reference declarations
1705 *
1706 * [66] CharRef ::= '&#' [0-9]+ ';' |
1707 * '&#x' [0-9a-fA-F]+ ';'
1708 *
1709 * [ WFC: Legal Character ]
1710 * Characters referred to using character references must match the
1711 * production for Char.
1712 *
1713 * Returns the value parsed (as an int), 0 in case of error
1714 */
1715int
1716xmlParseCharRef(xmlParserCtxtPtr ctxt) {
1717 unsigned int val = 0;
1718 int count = 0;
1719 unsigned int outofrange = 0;
1720
1721 /*
1722 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
1723 */
1724 if ((RAW == '&') && (NXT(1) == '#') &&
1725 (NXT(2) == 'x')) {
1726 SKIP(3);
1727 GROW;
1728 while (RAW != ';') { /* loop blocked by count */
1729 if (count++ > 20) {
1730 count = 0;
1731 GROW;
1732 }
1733 if ((RAW >= '0') && (RAW <= '9'))
1734 val = val * 16 + (CUR - '0');
1735 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
1736 val = val * 16 + (CUR - 'a') + 10;
1737 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
1738 val = val * 16 + (CUR - 'A') + 10;
1739 else {
1740 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
1741 val = 0;
1742 break;
1743 }
1744 if (val > 0x10FFFF)
1745 outofrange = val;
1746
1747 NEXT;
1748 count++;
1749 }
1750 if (RAW == ';') {
1751 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
1752 ctxt->input->col++;
1753 ctxt->nbChars ++;
1754 ctxt->input->cur++;
1755 }
1756 } else if ((RAW == '&') && (NXT(1) == '#')) {
1757 SKIP(2);
1758 GROW;
1759 while (RAW != ';') { /* loop blocked by count */
1760 if (count++ > 20) {
1761 count = 0;
1762 GROW;
1763 }
1764 if ((RAW >= '0') && (RAW <= '9'))
1765 val = val * 10 + (CUR - '0');
1766 else {
1767 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
1768 val = 0;
1769 break;
1770 }
1771 if (val > 0x10FFFF)
1772 outofrange = val;
1773
1774 NEXT;
1775 count++;
1776 }
1777 if (RAW == ';') {
1778 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
1779 ctxt->input->col++;
1780 ctxt->nbChars ++;
1781 ctxt->input->cur++;
1782 }
1783 } else {
1784 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
1785 }
1786
1787 /*
1788 * [ WFC: Legal Character ]
1789 * Characters referred to using character references must match the
1790 * production for Char.
1791 */
1792 if ((IS_CHAR(val) && (outofrange == 0))) {
1793 return(val);
1794 } else {
1795 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
1796 "xmlParseCharRef: invalid xmlChar value %d\n",
1797 val);
1798 }
1799 return(0);
1800}
1801
1802/**
1803 * xmlParseStringCharRef:
1804 * @ctxt: an XML parser context
1805 * @str: a pointer to an index in the string
1806 *
1807 * parse Reference declarations, variant parsing from a string rather
1808 * than an an input flow.
1809 *
1810 * [66] CharRef ::= '&#' [0-9]+ ';' |
1811 * '&#x' [0-9a-fA-F]+ ';'
1812 *
1813 * [ WFC: Legal Character ]
1814 * Characters referred to using character references must match the
1815 * production for Char.
1816 *
1817 * Returns the value parsed (as an int), 0 in case of error, str will be
1818 * updated to the current value of the index
1819 */
1820static int
1821xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
1822 const xmlChar *ptr;
1823 xmlChar cur;
1824 unsigned int val = 0;
1825 unsigned int outofrange = 0;
1826
1827 if ((str == NULL) || (*str == NULL)) return(0);
1828 ptr = *str;
1829 cur = *ptr;
1830 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
1831 ptr += 3;
1832 cur = *ptr;
1833 while (cur != ';') { /* Non input consuming loop */
1834 if ((cur >= '0') && (cur <= '9'))
1835 val = val * 16 + (cur - '0');
1836 else if ((cur >= 'a') && (cur <= 'f'))
1837 val = val * 16 + (cur - 'a') + 10;
1838 else if ((cur >= 'A') && (cur <= 'F'))
1839 val = val * 16 + (cur - 'A') + 10;
1840 else {
1841 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
1842 val = 0;
1843 break;
1844 }
1845 if (val > 0x10FFFF)
1846 outofrange = val;
1847
1848 ptr++;
1849 cur = *ptr;
1850 }
1851 if (cur == ';')
1852 ptr++;
1853 } else if ((cur == '&') && (ptr[1] == '#')){
1854 ptr += 2;
1855 cur = *ptr;
1856 while (cur != ';') { /* Non input consuming loops */
1857 if ((cur >= '0') && (cur <= '9'))
1858 val = val * 10 + (cur - '0');
1859 else {
1860 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
1861 val = 0;
1862 break;
1863 }
1864 if (val > 0x10FFFF)
1865 outofrange = val;
1866
1867 ptr++;
1868 cur = *ptr;
1869 }
1870 if (cur == ';')
1871 ptr++;
1872 } else {
1873 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
1874 return(0);
1875 }
1876 *str = ptr;
1877
1878 /*
1879 * [ WFC: Legal Character ]
1880 * Characters referred to using character references must match the
1881 * production for Char.
1882 */
1883 if ((IS_CHAR(val) && (outofrange == 0))) {
1884 return(val);
1885 } else {
1886 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
1887 "xmlParseStringCharRef: invalid xmlChar value %d\n",
1888 val);
1889 }
1890 return(0);
1891}
1892
1893/**
1894 * xmlNewBlanksWrapperInputStream:
1895 * @ctxt: an XML parser context
1896 * @entity: an Entity pointer
1897 *
1898 * Create a new input stream for wrapping
1899 * blanks around a PEReference
1900 *
1901 * Returns the new input stream or NULL
1902 */
1903
1904static void deallocblankswrapper (xmlChar *str) {xmlFree(str);}
1905
1906static xmlParserInputPtr
1907xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
1908 xmlParserInputPtr input;
1909 xmlChar *buffer;
1910 size_t length;
1911 if (entity == NULL) {
1912 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
1913 "xmlNewBlanksWrapperInputStream entity\n");
1914 return(NULL);
1915 }
1916 if (xmlParserDebugEntities)
1917 xmlGenericError(xmlGenericErrorContext,
1918 "new blanks wrapper for entity: %s\n", entity->name);
1919 input = xmlNewInputStream(ctxt);
1920 if (input == NULL) {
1921 return(NULL);
1922 }
1923 length = xmlStrlen(entity->name) + 5;
1924 buffer = xmlMallocAtomic(length);
1925 if (buffer == NULL) {
1926 xmlErrMemory(ctxt, NULL);
1927 return(NULL);
1928 }
1929 buffer [0] = ' ';
1930 buffer [1] = '%';
1931 buffer [length-3] = ';';
1932 buffer [length-2] = ' ';
1933 buffer [length-1] = 0;
1934 memcpy(buffer + 2, entity->name, length - 5);
1935 input->free = deallocblankswrapper;
1936 input->base = buffer;
1937 input->cur = buffer;
1938 input->length = length;
1939 input->end = &buffer[length];
1940 return(input);
1941}
1942
1943/**
1944 * xmlParserHandlePEReference:
1945 * @ctxt: the parser context
1946 *
1947 * [69] PEReference ::= '%' Name ';'
1948 *
1949 * [ WFC: No Recursion ]
1950 * A parsed entity must not contain a recursive
1951 * reference to itself, either directly or indirectly.
1952 *
1953 * [ WFC: Entity Declared ]
1954 * In a document without any DTD, a document with only an internal DTD
1955 * subset which contains no parameter entity references, or a document
1956 * with "standalone='yes'", ... ... The declaration of a parameter
1957 * entity must precede any reference to it...
1958 *
1959 * [ VC: Entity Declared ]
1960 * In a document with an external subset or external parameter entities
1961 * with "standalone='no'", ... ... The declaration of a parameter entity
1962 * must precede any reference to it...
1963 *
1964 * [ WFC: In DTD ]
1965 * Parameter-entity references may only appear in the DTD.
1966 * NOTE: misleading but this is handled.
1967 *
1968 * A PEReference may have been detected in the current input stream
1969 * the handling is done accordingly to
1970 * http://www.w3.org/TR/REC-xml#entproc
1971 * i.e.
1972 * - Included in literal in entity values
1973 * - Included as Parameter Entity reference within DTDs
1974 */
1975void
1976xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
1977 const xmlChar *name;
1978 xmlEntityPtr entity = NULL;
1979 xmlParserInputPtr input;
1980
1981 if (RAW != '%') return;
1982 switch(ctxt->instate) {
1983 case XML_PARSER_CDATA_SECTION:
1984 return;
1985 case XML_PARSER_COMMENT:
1986 return;
1987 case XML_PARSER_START_TAG:
1988 return;
1989 case XML_PARSER_END_TAG:
1990 return;
1991 case XML_PARSER_EOF:
1992 xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
1993 return;
1994 case XML_PARSER_PROLOG:
1995 case XML_PARSER_START:
1996 case XML_PARSER_MISC:
1997 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
1998 return;
1999 case XML_PARSER_ENTITY_DECL:
2000 case XML_PARSER_CONTENT:
2001 case XML_PARSER_ATTRIBUTE_VALUE:
2002 case XML_PARSER_PI:
2003 case XML_PARSER_SYSTEM_LITERAL:
2004 case XML_PARSER_PUBLIC_LITERAL:
2005 /* we just ignore it there */
2006 return;
2007 case XML_PARSER_EPILOG:
2008 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
2009 return;
2010 case XML_PARSER_ENTITY_VALUE:
2011 /*
2012 * NOTE: in the case of entity values, we don't do the
2013 * substitution here since we need the literal
2014 * entity value to be able to save the internal
2015 * subset of the document.
2016 * This will be handled by xmlStringDecodeEntities
2017 */
2018 return;
2019 case XML_PARSER_DTD:
2020 /*
2021 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
2022 * In the internal DTD subset, parameter-entity references
2023 * can occur only where markup declarations can occur, not
2024 * within markup declarations.
2025 * In that case this is handled in xmlParseMarkupDecl
2026 */
2027 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
2028 return;
2029 if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
2030 return;
2031 break;
2032 case XML_PARSER_IGNORE:
2033 return;
2034 }
2035
2036 NEXT;
2037 name = xmlParseName(ctxt);
2038 if (xmlParserDebugEntities)
2039 xmlGenericError(xmlGenericErrorContext,
2040 "PEReference: %s\n", name);
2041 if (name == NULL) {
2042 xmlFatalErr(ctxt, XML_ERR_PEREF_NO_NAME, NULL);
2043 } else {
2044 if (RAW == ';') {
2045 NEXT;
2046 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
2047 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
2048 if (entity == NULL) {
2049
2050 /*
2051 * [ WFC: Entity Declared ]
2052 * In a document without any DTD, a document with only an
2053 * internal DTD subset which contains no parameter entity
2054 * references, or a document with "standalone='yes'", ...
2055 * ... The declaration of a parameter entity must precede
2056 * any reference to it...
2057 */
2058 if ((ctxt->standalone == 1) ||
2059 ((ctxt->hasExternalSubset == 0) &&
2060 (ctxt->hasPErefs == 0))) {
2061 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
2062 "PEReference: %%%s; not found\n", name);
2063 } else {
2064 /*
2065 * [ VC: Entity Declared ]
2066 * In a document with an external subset or external
2067 * parameter entities with "standalone='no'", ...
2068 * ... The declaration of a parameter entity must precede
2069 * any reference to it...
2070 */
2071 if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
2072 xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
2073 "PEReference: %%%s; not found\n",
2074 name);
2075 } else
2076 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
2077 "PEReference: %%%s; not found\n",
2078 name, NULL);
2079 ctxt->valid = 0;
2080 }
2081 } else if (ctxt->input->free != deallocblankswrapper) {
2082 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
2083 xmlPushInput(ctxt, input);
2084 } else {
2085 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
2086 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
2087 xmlChar start[4];
2088 xmlCharEncoding enc;
2089
2090 /*
2091 * handle the extra spaces added before and after
2092 * c.f. http://www.w3.org/TR/REC-xml#as-PE
2093 * this is done independently.
2094 */
2095 input = xmlNewEntityInputStream(ctxt, entity);
2096 xmlPushInput(ctxt, input);
2097
2098 /*
2099 * Get the 4 first bytes and decode the charset
2100 * if enc != XML_CHAR_ENCODING_NONE
2101 * plug some encoding conversion routines.
2102 * Note that, since we may have some non-UTF8
2103 * encoding (like UTF16, bug 135229), the 'length'
2104 * is not known, but we can calculate based upon
2105 * the amount of data in the buffer.
2106 */
2107 GROW
2108 if ((ctxt->input->end - ctxt->input->cur)>=4) {
2109 start[0] = RAW;
2110 start[1] = NXT(1);
2111 start[2] = NXT(2);
2112 start[3] = NXT(3);
2113 enc = xmlDetectCharEncoding(start, 4);
2114 if (enc != XML_CHAR_ENCODING_NONE) {
2115 xmlSwitchEncoding(ctxt, enc);
2116 }
2117 }
2118
2119 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
2120 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l' )) &&
2121 (IS_BLANK_CH(NXT(5)))) {
2122 xmlParseTextDecl(ctxt);
2123 }
2124 } else {
2125 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
2126 "PEReference: %s is not a parameter entity\n",
2127 name);
2128 }
2129 }
2130 } else {
2131 xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
2132 }
2133 }
2134}
2135
2136/*
2137 * Macro used to grow the current buffer.
2138 */
2139#define growBuffer(buffer) { \
2140 xmlChar *tmp; \
2141 buffer##_size *= 2; \
2142 tmp = (xmlChar *) \
2143 xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
2144 if (tmp == NULL) goto mem_error; \
2145 buffer = tmp; \
2146}
2147
2148/**
2149 * xmlStringLenDecodeEntities:
2150 * @ctxt: the parser context
2151 * @str: the input string
2152 * @len: the string length
2153 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2154 * @end: an end marker xmlChar, 0 if none
2155 * @end2: an end marker xmlChar, 0 if none
2156 * @end3: an end marker xmlChar, 0 if none
2157 *
2158 * Takes a entity string content and process to do the adequate substitutions.
2159 *
2160 * [67] Reference ::= EntityRef | CharRef
2161 *
2162 * [69] PEReference ::= '%' Name ';'
2163 *
2164 * Returns A newly allocated string with the substitution done. The caller
2165 * must deallocate it !
2166 */
2167xmlChar *
2168xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2169 int what, xmlChar end, xmlChar end2, xmlChar end3) {
2170 xmlChar *buffer = NULL;
2171 int buffer_size = 0;
2172
2173 xmlChar *current = NULL;
2174 const xmlChar *last;
2175 xmlEntityPtr ent;
2176 int c,l;
2177 int nbchars = 0;
2178
2179 if ((ctxt == NULL) || (str == NULL) || (len < 0))
2180 return(NULL);
2181 last = str + len;
2182
2183 if (ctxt->depth > 40) {
2184 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2185 return(NULL);
2186 }
2187
2188 /*
2189 * allocate a translation buffer.
2190 */
2191 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
2192 buffer = (xmlChar *) xmlMallocAtomic(buffer_size * sizeof(xmlChar));
2193 if (buffer == NULL) goto mem_error;
2194
2195 /*
2196 * OK loop until we reach one of the ending char or a size limit.
2197 * we are operating on already parsed values.
2198 */
2199 if (str < last)
2200 c = CUR_SCHAR(str, l);
2201 else
2202 c = 0;
2203 while ((c != 0) && (c != end) && /* non input consuming loop */
2204 (c != end2) && (c != end3)) {
2205
2206 if (c == 0) break;
2207 if ((c == '&') && (str[1] == '#')) {
2208 int val = xmlParseStringCharRef(ctxt, &str);
2209 if (val != 0) {
2210 COPY_BUF(0,buffer,nbchars,val);
2211 }
2212 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2213 growBuffer(buffer);
2214 }
2215 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
2216 if (xmlParserDebugEntities)
2217 xmlGenericError(xmlGenericErrorContext,
2218 "String decoding Entity Reference: %.30s\n",
2219 str);
2220 ent = xmlParseStringEntityRef(ctxt, &str);
2221 if ((ent != NULL) &&
2222 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2223 if (ent->content != NULL) {
2224 COPY_BUF(0,buffer,nbchars,ent->content[0]);
2225 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2226 growBuffer(buffer);
2227 }
2228 } else {
2229 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
2230 "predefined entity has no content\n");
2231 }
2232 } else if ((ent != NULL) && (ent->content != NULL)) {
2233 xmlChar *rep;
2234
2235 ctxt->depth++;
2236 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2237 0, 0, 0);
2238 ctxt->depth--;
2239 if (rep != NULL) {
2240 current = rep;
2241 while (*current != 0) { /* non input consuming loop */
2242 buffer[nbchars++] = *current++;
2243 if (nbchars >
2244 buffer_size - XML_PARSER_BUFFER_SIZE) {
2245 growBuffer(buffer);
2246 }
2247 }
2248 xmlFree(rep);
2249 }
2250 } else if (ent != NULL) {
2251 int i = xmlStrlen(ent->name);
2252 const xmlChar *cur = ent->name;
2253
2254 buffer[nbchars++] = '&';
2255 if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
2256 growBuffer(buffer);
2257 }
2258 for (;i > 0;i--)
2259 buffer[nbchars++] = *cur++;
2260 buffer[nbchars++] = ';';
2261 }
2262 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2263 if (xmlParserDebugEntities)
2264 xmlGenericError(xmlGenericErrorContext,
2265 "String decoding PE Reference: %.30s\n", str);
2266 ent = xmlParseStringPEReference(ctxt, &str);
2267 if (ent != NULL) {
2268 xmlChar *rep;
2269
2270 ctxt->depth++;
2271 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2272 0, 0, 0);
2273 ctxt->depth--;
2274 if (rep != NULL) {
2275 current = rep;
2276 while (*current != 0) { /* non input consuming loop */
2277 buffer[nbchars++] = *current++;
2278 if (nbchars >
2279 buffer_size - XML_PARSER_BUFFER_SIZE) {
2280 growBuffer(buffer);
2281 }
2282 }
2283 xmlFree(rep);
2284 }
2285 }
2286 } else {
2287 COPY_BUF(l,buffer,nbchars,c);
2288 str += l;
2289 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2290 growBuffer(buffer);
2291 }
2292 }
2293 if (str < last)
2294 c = CUR_SCHAR(str, l);
2295 else
2296 c = 0;
2297 }
2298 buffer[nbchars++] = 0;
2299 return(buffer);
2300
2301mem_error:
2302 xmlErrMemory(ctxt, NULL);
2303 return(NULL);
2304}
2305
2306/**
2307 * xmlStringDecodeEntities:
2308 * @ctxt: the parser context
2309 * @str: the input string
2310 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2311 * @end: an end marker xmlChar, 0 if none
2312 * @end2: an end marker xmlChar, 0 if none
2313 * @end3: an end marker xmlChar, 0 if none
2314 *
2315 * Takes a entity string content and process to do the adequate substitutions.
2316 *
2317 * [67] Reference ::= EntityRef | CharRef
2318 *
2319 * [69] PEReference ::= '%' Name ';'
2320 *
2321 * Returns A newly allocated string with the substitution done. The caller
2322 * must deallocate it !
2323 */
2324xmlChar *
2325xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2326 xmlChar end, xmlChar end2, xmlChar end3) {
2327 if ((ctxt == NULL) || (str == NULL)) return(NULL);
2328 return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what,
2329 end, end2, end3));
2330}
2331
2332/************************************************************************
2333 * *
2334 * Commodity functions, cleanup needed ? *
2335 * *
2336 ************************************************************************/
2337
2338/**
2339 * areBlanks:
2340 * @ctxt: an XML parser context
2341 * @str: a xmlChar *
2342 * @len: the size of @str
2343 * @blank_chars: we know the chars are blanks
2344 *
2345 * Is this a sequence of blank chars that one can ignore ?
2346 *
2347 * Returns 1 if ignorable 0 otherwise.
2348 */
2349
2350static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2351 int blank_chars) {
2352 int i, ret;
2353 xmlNodePtr lastChild;
2354
2355 /*
2356 * Don't spend time trying to differentiate them, the same callback is
2357 * used !
2358 */
2359 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
2360 return(0);
2361
2362 /*
2363 * Check for xml:space value.
2364 */
2365 if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2366 (*(ctxt->space) == -2))
2367 return(0);
2368
2369 /*
2370 * Check that the string is made of blanks
2371 */
2372 if (blank_chars == 0) {
2373 for (i = 0;i < len;i++)
2374 if (!(IS_BLANK_CH(str[i]))) return(0);
2375 }
2376
2377 /*
2378 * Look if the element is mixed content in the DTD if available
2379 */
2380 if (ctxt->node == NULL) return(0);
2381 if (ctxt->myDoc != NULL) {
2382 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2383 if (ret == 0) return(1);
2384 if (ret == 1) return(0);
2385 }
2386
2387 /*
2388 * Otherwise, heuristic :-\
2389 */
2390 if ((RAW != '<') && (RAW != 0xD)) return(0);
2391 if ((ctxt->node->children == NULL) &&
2392 (RAW == '<') && (NXT(1) == '/')) return(0);
2393
2394 lastChild = xmlGetLastChild(ctxt->node);
2395 if (lastChild == NULL) {
2396 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2397 (ctxt->node->content != NULL)) return(0);
2398 } else if (xmlNodeIsText(lastChild))
2399 return(0);
2400 else if ((ctxt->node->children != NULL) &&
2401 (xmlNodeIsText(ctxt->node->children)))
2402 return(0);
2403 return(1);
2404}
2405
2406/************************************************************************
2407 * *
2408 * Extra stuff for namespace support *
2409 * Relates to http://www.w3.org/TR/WD-xml-names *
2410 * *
2411 ************************************************************************/
2412
2413/**
2414 * xmlSplitQName:
2415 * @ctxt: an XML parser context
2416 * @name: an XML parser context
2417 * @prefix: a xmlChar **
2418 *
2419 * parse an UTF8 encoded XML qualified name string
2420 *
2421 * [NS 5] QName ::= (Prefix ':')? LocalPart
2422 *
2423 * [NS 6] Prefix ::= NCName
2424 *
2425 * [NS 7] LocalPart ::= NCName
2426 *
2427 * Returns the local part, and prefix is updated
2428 * to get the Prefix if any.
2429 */
2430
2431xmlChar *
2432xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2433 xmlChar buf[XML_MAX_NAMELEN + 5];
2434 xmlChar *buffer = NULL;
2435 int len = 0;
2436 int max = XML_MAX_NAMELEN;
2437 xmlChar *ret = NULL;
2438 const xmlChar *cur = name;
2439 int c;
2440
2441 if (prefix == NULL) return(NULL);
2442 *prefix = NULL;
2443
2444 if (cur == NULL) return(NULL);
2445
2446#ifndef XML_XML_NAMESPACE
2447 /* xml: prefix is not really a namespace */
2448 if ((cur[0] == 'x') && (cur[1] == 'm') &&
2449 (cur[2] == 'l') && (cur[3] == ':'))
2450 return(xmlStrdup(name));
2451#endif
2452
2453 /* nasty but well=formed */
2454 if (cur[0] == ':')
2455 return(xmlStrdup(name));
2456
2457 c = *cur++;
2458 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2459 buf[len++] = c;
2460 c = *cur++;
2461 }
2462 if (len >= max) {
2463 /*
2464 * Okay someone managed to make a huge name, so he's ready to pay
2465 * for the processing speed.
2466 */
2467 max = len * 2;
2468
2469 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
2470 if (buffer == NULL) {
2471 xmlErrMemory(ctxt, NULL);
2472 return(NULL);
2473 }
2474 memcpy(buffer, buf, len);
2475 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
2476 if (len + 10 > max) {
2477 xmlChar *tmp;
2478
2479 max *= 2;
2480 tmp = (xmlChar *) xmlRealloc(buffer,
2481 max * sizeof(xmlChar));
2482 if (tmp == NULL) {
2483 xmlFree(tmp);
2484 xmlErrMemory(ctxt, NULL);
2485 return(NULL);
2486 }
2487 buffer = tmp;
2488 }
2489 buffer[len++] = c;
2490 c = *cur++;
2491 }
2492 buffer[len] = 0;
2493 }
2494
2495 if ((c == ':') && (*cur == 0)) {
2496 if (buffer != NULL)
2497 xmlFree(buffer);
2498 *prefix = NULL;
2499 return(xmlStrdup(name));
2500 }
2501
2502 if (buffer == NULL)
2503 ret = xmlStrndup(buf, len);
2504 else {
2505 ret = buffer;
2506 buffer = NULL;
2507 max = XML_MAX_NAMELEN;
2508 }
2509
2510
2511 if (c == ':') {
2512 c = *cur;
2513 *prefix = ret;
2514 if (c == 0) {
2515 return(xmlStrndup(BAD_CAST "", 0));
2516 }
2517 len = 0;
2518
2519 /*
2520 * Check that the first character is proper to start
2521 * a new name
2522 */
2523 if (!(((c >= 0x61) && (c <= 0x7A)) ||
2524 ((c >= 0x41) && (c <= 0x5A)) ||
2525 (c == '_') || (c == ':'))) {
2526 int l;
2527 int first = CUR_SCHAR(cur, l);
2528
2529 if (!IS_LETTER(first) && (first != '_')) {
2530 xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
2531 "Name %s is not XML Namespace compliant\n",
2532 name);
2533 }
2534 }
2535 cur++;
2536
2537 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
2538 buf[len++] = c;
2539 c = *cur++;
2540 }
2541 if (len >= max) {
2542 /*
2543 * Okay someone managed to make a huge name, so he's ready to pay
2544 * for the processing speed.
2545 */
2546 max = len * 2;
2547
2548 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
2549 if (buffer == NULL) {
2550 xmlErrMemory(ctxt, NULL);
2551 return(NULL);
2552 }
2553 memcpy(buffer, buf, len);
2554 while (c != 0) { /* tested bigname2.xml */
2555 if (len + 10 > max) {
2556 xmlChar *tmp;
2557
2558 max *= 2;
2559 tmp = (xmlChar *) xmlRealloc(buffer,
2560 max * sizeof(xmlChar));
2561 if (tmp == NULL) {
2562 xmlErrMemory(ctxt, NULL);
2563 xmlFree(buffer);
2564 return(NULL);
2565 }
2566 buffer = tmp;
2567 }
2568 buffer[len++] = c;
2569 c = *cur++;
2570 }
2571 buffer[len] = 0;
2572 }
2573
2574 if (buffer == NULL)
2575 ret = xmlStrndup(buf, len);
2576 else {
2577 ret = buffer;
2578 }
2579 }
2580
2581 return(ret);
2582}
2583
2584/************************************************************************
2585 * *
2586 * The parser itself *
2587 * Relates to http://www.w3.org/TR/REC-xml *
2588 * *
2589 ************************************************************************/
2590
2591static const xmlChar * xmlParseNameComplex(xmlParserCtxtPtr ctxt);
2592static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
2593 int *len, int *alloc, int normalize);
2594
2595/**
2596 * xmlParseName:
2597 * @ctxt: an XML parser context
2598 *
2599 * parse an XML name.
2600 *
2601 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
2602 * CombiningChar | Extender
2603 *
2604 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
2605 *
2606 * [6] Names ::= Name (#x20 Name)*
2607 *
2608 * Returns the Name parsed or NULL
2609 */
2610
2611const xmlChar *
2612xmlParseName(xmlParserCtxtPtr ctxt) {
2613 const xmlChar *in;
2614 const xmlChar *ret;
2615 int count = 0;
2616
2617 GROW;
2618
2619 /*
2620 * Accelerator for simple ASCII names
2621 */
2622 in = ctxt->input->cur;
2623 if (((*in >= 0x61) && (*in <= 0x7A)) ||
2624 ((*in >= 0x41) && (*in <= 0x5A)) ||
2625 (*in == '_') || (*in == ':')) {
2626 in++;
2627 while (((*in >= 0x61) && (*in <= 0x7A)) ||
2628 ((*in >= 0x41) && (*in <= 0x5A)) ||
2629 ((*in >= 0x30) && (*in <= 0x39)) ||
2630 (*in == '_') || (*in == '-') ||
2631 (*in == ':') || (*in == '.'))
2632 in++;
2633 if ((*in > 0) && (*in < 0x80)) {
2634 count = in - ctxt->input->cur;
2635 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
2636 ctxt->input->cur = in;
2637 ctxt->nbChars += count;
2638 ctxt->input->col += count;
2639 if (ret == NULL)
2640 xmlErrMemory(ctxt, NULL);
2641 return(ret);
2642 }
2643 }
2644 return(xmlParseNameComplex(ctxt));
2645}
2646
2647/**
2648 * xmlParseNameAndCompare:
2649 * @ctxt: an XML parser context
2650 *
2651 * parse an XML name and compares for match
2652 * (specialized for endtag parsing)
2653 *
2654 * Returns NULL for an illegal name, (xmlChar*) 1 for success
2655 * and the name for mismatch
2656 */
2657
2658static const xmlChar *
2659xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
2660 register const xmlChar *cmp = other;
2661 register const xmlChar *in;
2662 const xmlChar *ret;
2663
2664 GROW;
2665
2666 in = ctxt->input->cur;
2667 while (*in != 0 && *in == *cmp) {
2668 ++in;
2669 ++cmp;
2670 ctxt->input->col++;
2671 }
2672 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
2673 /* success */
2674 ctxt->input->cur = in;
2675 return (const xmlChar*) 1;
2676 }
2677 /* failure (or end of input buffer), check with full function */
2678 ret = xmlParseName (ctxt);
2679 /* strings coming from the dictionnary direct compare possible */
2680 if (ret == other) {
2681 return (const xmlChar*) 1;
2682 }
2683 return ret;
2684}
2685
2686static const xmlChar *
2687xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
2688 int len = 0, l;
2689 int c;
2690 int count = 0;
2691
2692 /*
2693 * Handler for more complex cases
2694 */
2695 GROW;
2696 c = CUR_CHAR(l);
2697 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
2698 (!IS_LETTER(c) && (c != '_') &&
2699 (c != ':'))) {
2700 return(NULL);
2701 }
2702
2703 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
2704 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
2705 (c == '.') || (c == '-') ||
2706 (c == '_') || (c == ':') ||
2707 (IS_COMBINING(c)) ||
2708 (IS_EXTENDER(c)))) {
2709 if (count++ > 100) {
2710 count = 0;
2711 GROW;
2712 }
2713 len += l;
2714 NEXTL(l);
2715 c = CUR_CHAR(l);
2716 }
2717 if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
2718 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
2719 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
2720}
2721
2722/**
2723 * xmlParseStringName:
2724 * @ctxt: an XML parser context
2725 * @str: a pointer to the string pointer (IN/OUT)
2726 *
2727 * parse an XML name.
2728 *
2729 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
2730 * CombiningChar | Extender
2731 *
2732 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
2733 *
2734 * [6] Names ::= Name (#x20 Name)*
2735 *
2736 * Returns the Name parsed or NULL. The @str pointer
2737 * is updated to the current location in the string.
2738 */
2739
2740static xmlChar *
2741xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
2742 xmlChar buf[XML_MAX_NAMELEN + 5];
2743 const xmlChar *cur = *str;
2744 int len = 0, l;
2745 int c;
2746
2747 c = CUR_SCHAR(cur, l);
2748 if (!IS_LETTER(c) && (c != '_') &&
2749 (c != ':')) {
2750 return(NULL);
2751 }
2752
2753 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
2754 (c == '.') || (c == '-') ||
2755 (c == '_') || (c == ':') ||
2756 (IS_COMBINING(c)) ||
2757 (IS_EXTENDER(c))) {
2758 COPY_BUF(l,buf,len,c);
2759 cur += l;
2760 c = CUR_SCHAR(cur, l);
2761 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
2762 /*
2763 * Okay someone managed to make a huge name, so he's ready to pay
2764 * for the processing speed.
2765 */
2766 xmlChar *buffer;
2767 int max = len * 2;
2768
2769 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
2770 if (buffer == NULL) {
2771 xmlErrMemory(ctxt, NULL);
2772 return(NULL);
2773 }
2774 memcpy(buffer, buf, len);
2775 while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
2776 /* test bigentname.xml */
2777 (c == '.') || (c == '-') ||
2778 (c == '_') || (c == ':') ||
2779 (IS_COMBINING(c)) ||
2780 (IS_EXTENDER(c))) {
2781 if (len + 10 > max) {
2782 xmlChar *tmp;
2783 max *= 2;
2784 tmp = (xmlChar *) xmlRealloc(buffer,
2785 max * sizeof(xmlChar));
2786 if (tmp == NULL) {
2787 xmlErrMemory(ctxt, NULL);
2788 xmlFree(buffer);
2789 return(NULL);
2790 }
2791 buffer = tmp;
2792 }
2793 COPY_BUF(l,buffer,len,c);
2794 cur += l;
2795 c = CUR_SCHAR(cur, l);
2796 }
2797 buffer[len] = 0;
2798 *str = cur;
2799 return(buffer);
2800 }
2801 }
2802 *str = cur;
2803 return(xmlStrndup(buf, len));
2804}
2805
2806/**
2807 * xmlParseNmtoken:
2808 * @ctxt: an XML parser context
2809 *
2810 * parse an XML Nmtoken.
2811 *
2812 * [7] Nmtoken ::= (NameChar)+
2813 *
2814 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
2815 *
2816 * Returns the Nmtoken parsed or NULL
2817 */
2818
2819xmlChar *
2820xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
2821 xmlChar buf[XML_MAX_NAMELEN + 5];
2822 int len = 0, l;
2823 int c;
2824 int count = 0;
2825
2826 GROW;
2827 c = CUR_CHAR(l);
2828
2829 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
2830 (c == '.') || (c == '-') ||
2831 (c == '_') || (c == ':') ||
2832 (IS_COMBINING(c)) ||
2833 (IS_EXTENDER(c))) {
2834 if (count++ > 100) {
2835 count = 0;
2836 GROW;
2837 }
2838 COPY_BUF(l,buf,len,c);
2839 NEXTL(l);
2840 c = CUR_CHAR(l);
2841 if (len >= XML_MAX_NAMELEN) {
2842 /*
2843 * Okay someone managed to make a huge token, so he's ready to pay
2844 * for the processing speed.
2845 */
2846 xmlChar *buffer;
2847 int max = len * 2;
2848
2849 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
2850 if (buffer == NULL) {
2851 xmlErrMemory(ctxt, NULL);
2852 return(NULL);
2853 }
2854 memcpy(buffer, buf, len);
2855 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
2856 (c == '.') || (c == '-') ||
2857 (c == '_') || (c == ':') ||
2858 (IS_COMBINING(c)) ||
2859 (IS_EXTENDER(c))) {
2860 if (count++ > 100) {
2861 count = 0;
2862 GROW;
2863 }
2864 if (len + 10 > max) {
2865 xmlChar *tmp;
2866
2867 max *= 2;
2868 tmp = (xmlChar *) xmlRealloc(buffer,
2869 max * sizeof(xmlChar));
2870 if (tmp == NULL) {
2871 xmlErrMemory(ctxt, NULL);
2872 xmlFree(buffer);
2873 return(NULL);
2874 }
2875 buffer = tmp;
2876 }
2877 COPY_BUF(l,buffer,len,c);
2878 NEXTL(l);
2879 c = CUR_CHAR(l);
2880 }
2881 buffer[len] = 0;
2882 return(buffer);
2883 }
2884 }
2885 if (len == 0)
2886 return(NULL);
2887 return(xmlStrndup(buf, len));
2888}
2889
2890/**
2891 * xmlParseEntityValue:
2892 * @ctxt: an XML parser context
2893 * @orig: if non-NULL store a copy of the original entity value
2894 *
2895 * parse a value for ENTITY declarations
2896 *
2897 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
2898 * "'" ([^%&'] | PEReference | Reference)* "'"
2899 *
2900 * Returns the EntityValue parsed with reference substituted or NULL
2901 */
2902
2903xmlChar *
2904xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
2905 xmlChar *buf = NULL;
2906 int len = 0;
2907 int size = XML_PARSER_BUFFER_SIZE;
2908 int c, l;
2909 xmlChar stop;
2910 xmlChar *ret = NULL;
2911 const xmlChar *cur = NULL;
2912 xmlParserInputPtr input;
2913
2914 if (RAW == '"') stop = '"';
2915 else if (RAW == '\'') stop = '\'';
2916 else {
2917 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
2918 return(NULL);
2919 }
2920 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
2921 if (buf == NULL) {
2922 xmlErrMemory(ctxt, NULL);
2923 return(NULL);
2924 }
2925
2926 /*
2927 * The content of the entity definition is copied in a buffer.
2928 */
2929
2930 ctxt->instate = XML_PARSER_ENTITY_VALUE;
2931 input = ctxt->input;
2932 GROW;
2933 NEXT;
2934 c = CUR_CHAR(l);
2935 /*
2936 * NOTE: 4.4.5 Included in Literal
2937 * When a parameter entity reference appears in a literal entity
2938 * value, ... a single or double quote character in the replacement
2939 * text is always treated as a normal data character and will not
2940 * terminate the literal.
2941 * In practice it means we stop the loop only when back at parsing
2942 * the initial entity and the quote is found
2943 */
2944 while ((IS_CHAR(c)) && ((c != stop) || /* checked */
2945 (ctxt->input != input))) {
2946 if (len + 5 >= size) {
2947 xmlChar *tmp;
2948
2949 size *= 2;
2950 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2951 if (tmp == NULL) {
2952 xmlErrMemory(ctxt, NULL);
2953 xmlFree(buf);
2954 return(NULL);
2955 }
2956 buf = tmp;
2957 }
2958 COPY_BUF(l,buf,len,c);
2959 NEXTL(l);
2960 /*
2961 * Pop-up of finished entities.
2962 */
2963 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
2964 xmlPopInput(ctxt);
2965
2966 GROW;
2967 c = CUR_CHAR(l);
2968 if (c == 0) {
2969 GROW;
2970 c = CUR_CHAR(l);
2971 }
2972 }
2973 buf[len] = 0;
2974
2975 /*
2976 * Raise problem w.r.t. '&' and '%' being used in non-entities
2977 * reference constructs. Note Charref will be handled in
2978 * xmlStringDecodeEntities()
2979 */
2980 cur = buf;
2981 while (*cur != 0) { /* non input consuming */
2982 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
2983 xmlChar *name;
2984 xmlChar tmp = *cur;
2985
2986 cur++;
2987 name = xmlParseStringName(ctxt, &cur);
2988 if ((name == NULL) || (*cur != ';')) {
2989 xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
2990 "EntityValue: '%c' forbidden except for entities references\n",
2991 tmp);
2992 }
2993 if ((tmp == '%') && (ctxt->inSubset == 1) &&
2994 (ctxt->inputNr == 1)) {
2995 xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
2996 }
2997 if (name != NULL)
2998 xmlFree(name);
2999 if (*cur == 0)
3000 break;
3001 }
3002 cur++;
3003 }
3004
3005 /*
3006 * Then PEReference entities are substituted.
3007 */
3008 if (c != stop) {
3009 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
3010 xmlFree(buf);
3011 } else {
3012 NEXT;
3013 /*
3014 * NOTE: 4.4.7 Bypassed
3015 * When a general entity reference appears in the EntityValue in
3016 * an entity declaration, it is bypassed and left as is.
3017 * so XML_SUBSTITUTE_REF is not set here.
3018 */
3019 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
3020 0, 0, 0);
3021 if (orig != NULL)
3022 *orig = buf;
3023 else
3024 xmlFree(buf);
3025 }
3026
3027 return(ret);
3028}
3029
3030/**
3031 * xmlParseAttValueComplex:
3032 * @ctxt: an XML parser context
3033 * @len: the resulting attribute len
3034 * @normalize: wether to apply the inner normalization
3035 *
3036 * parse a value for an attribute, this is the fallback function
3037 * of xmlParseAttValue() when the attribute parsing requires handling
3038 * of non-ASCII characters, or normalization compaction.
3039 *
3040 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3041 */
3042static xmlChar *
3043xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
3044 xmlChar limit = 0;
3045 xmlChar *buf = NULL;
3046 int len = 0;
3047 int buf_size = 0;
3048 int c, l, in_space = 0;
3049 xmlChar *current = NULL;
3050 xmlEntityPtr ent;
3051
3052 if (NXT(0) == '"') {
3053 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3054 limit = '"';
3055 NEXT;
3056 } else if (NXT(0) == '\'') {
3057 limit = '\'';
3058 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3059 NEXT;
3060 } else {
3061 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
3062 return(NULL);
3063 }
3064
3065 /*
3066 * allocate a translation buffer.
3067 */
3068 buf_size = XML_PARSER_BUFFER_SIZE;
3069 buf = (xmlChar *) xmlMallocAtomic(buf_size * sizeof(xmlChar));
3070 if (buf == NULL) goto mem_error;
3071
3072 /*
3073 * OK loop until we reach one of the ending char or a size limit.
3074 */
3075 c = CUR_CHAR(l);
3076 while ((NXT(0) != limit) && /* checked */
3077 (IS_CHAR(c)) && (c != '<')) {
3078 if (c == 0) break;
3079 if (c == '&') {
3080 in_space = 0;
3081 if (NXT(1) == '#') {
3082 int val = xmlParseCharRef(ctxt);
3083
3084 if (val == '&') {
3085 if (ctxt->replaceEntities) {
3086 if (len > buf_size - 10) {
3087 growBuffer(buf);
3088 }
3089 buf[len++] = '&';
3090 } else {
3091 /*
3092 * The reparsing will be done in xmlStringGetNodeList()
3093 * called by the attribute() function in SAX.c
3094 */
3095 if (len > buf_size - 10) {
3096 growBuffer(buf);
3097 }
3098 buf[len++] = '&';
3099 buf[len++] = '#';
3100 buf[len++] = '3';
3101 buf[len++] = '8';
3102 buf[len++] = ';';
3103 }
3104 } else {
3105 if (len > buf_size - 10) {
3106 growBuffer(buf);
3107 }
3108 len += xmlCopyChar(0, &buf[len], val);
3109 }
3110 } else {
3111 ent = xmlParseEntityRef(ctxt);
3112 if ((ent != NULL) &&
3113 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
3114 if (len > buf_size - 10) {
3115 growBuffer(buf);
3116 }
3117 if ((ctxt->replaceEntities == 0) &&
3118 (ent->content[0] == '&')) {
3119 buf[len++] = '&';
3120 buf[len++] = '#';
3121 buf[len++] = '3';
3122 buf[len++] = '8';
3123 buf[len++] = ';';
3124 } else {
3125 buf[len++] = ent->content[0];
3126 }
3127 } else if ((ent != NULL) &&
3128 (ctxt->replaceEntities != 0)) {
3129 xmlChar *rep;
3130
3131 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
3132 rep = xmlStringDecodeEntities(ctxt, ent->content,
3133 XML_SUBSTITUTE_REF,
3134 0, 0, 0);
3135 if (rep != NULL) {
3136 current = rep;
3137 while (*current != 0) { /* non input consuming */
3138 buf[len++] = *current++;
3139 if (len > buf_size - 10) {
3140 growBuffer(buf);
3141 }
3142 }
3143 xmlFree(rep);
3144 }
3145 } else {
3146 if (len > buf_size - 10) {
3147 growBuffer(buf);
3148 }
3149 if (ent->content != NULL)
3150 buf[len++] = ent->content[0];
3151 }
3152 } else if (ent != NULL) {
3153 int i = xmlStrlen(ent->name);
3154 const xmlChar *cur = ent->name;
3155
3156 /*
3157 * This may look absurd but is needed to detect
3158 * entities problems
3159 */
3160 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
3161 (ent->content != NULL)) {
3162 xmlChar *rep;
3163 rep = xmlStringDecodeEntities(ctxt, ent->content,
3164 XML_SUBSTITUTE_REF, 0, 0, 0);
3165 if (rep != NULL)
3166 xmlFree(rep);
3167 }
3168
3169 /*
3170 * Just output the reference
3171 */
3172 buf[len++] = '&';
3173 if (len > buf_size - i - 10) {
3174 growBuffer(buf);
3175 }
3176 for (;i > 0;i--)
3177 buf[len++] = *cur++;
3178 buf[len++] = ';';
3179 }
3180 }
3181 } else {
3182 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
3183 if ((len != 0) || (!normalize)) {
3184 if ((!normalize) || (!in_space)) {
3185 COPY_BUF(l,buf,len,0x20);
3186 if (len > buf_size - 10) {
3187 growBuffer(buf);
3188 }
3189 }
3190 in_space = 1;
3191 }
3192 } else {
3193 in_space = 0;
3194 COPY_BUF(l,buf,len,c);
3195 if (len > buf_size - 10) {
3196 growBuffer(buf);
3197 }
3198 }
3199 NEXTL(l);
3200 }
3201 GROW;
3202 c = CUR_CHAR(l);
3203 }
3204 if ((in_space) && (normalize)) {
3205 while (buf[len - 1] == 0x20) len--;
3206 }
3207 buf[len] = 0;
3208 if (RAW == '<') {
3209 xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
3210 } else if (RAW != limit) {
3211 if ((c != 0) && (!IS_CHAR(c))) {
3212 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
3213 "invalid character in attribute value\n");
3214 } else {
3215 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
3216 "AttValue: ' expected\n");
3217 }
3218 } else
3219 NEXT;
3220 if (attlen != NULL) *attlen = len;
3221 return(buf);
3222
3223mem_error:
3224 xmlErrMemory(ctxt, NULL);
3225 return(NULL);
3226}
3227
3228/**
3229 * xmlParseAttValue:
3230 * @ctxt: an XML parser context
3231 *
3232 * parse a value for an attribute
3233 * Note: the parser won't do substitution of entities here, this
3234 * will be handled later in xmlStringGetNodeList
3235 *
3236 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
3237 * "'" ([^<&'] | Reference)* "'"
3238 *
3239 * 3.3.3 Attribute-Value Normalization:
3240 * Before the value of an attribute is passed to the application or
3241 * checked for validity, the XML processor must normalize it as follows:
3242 * - a character reference is processed by appending the referenced
3243 * character to the attribute value
3244 * - an entity reference is processed by recursively processing the
3245 * replacement text of the entity
3246 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
3247 * appending #x20 to the normalized value, except that only a single
3248 * #x20 is appended for a "#xD#xA" sequence that is part of an external
3249 * parsed entity or the literal entity value of an internal parsed entity
3250 * - other characters are processed by appending them to the normalized value
3251 * If the declared value is not CDATA, then the XML processor must further
3252 * process the normalized attribute value by discarding any leading and
3253 * trailing space (#x20) characters, and by replacing sequences of space
3254 * (#x20) characters by a single space (#x20) character.
3255 * All attributes for which no declaration has been read should be treated
3256 * by a non-validating parser as if declared CDATA.
3257 *
3258 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3259 */
3260
3261
3262xmlChar *
3263xmlParseAttValue(xmlParserCtxtPtr ctxt) {
3264 if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
3265 return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
3266}
3267
3268/**
3269 * xmlParseSystemLiteral:
3270 * @ctxt: an XML parser context
3271 *
3272 * parse an XML Literal
3273 *
3274 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
3275 *
3276 * Returns the SystemLiteral parsed or NULL
3277 */
3278
3279xmlChar *
3280xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
3281 xmlChar *buf = NULL;
3282 int len = 0;
3283 int size = XML_PARSER_BUFFER_SIZE;
3284 int cur, l;
3285 xmlChar stop;
3286 int state = ctxt->instate;
3287 int count = 0;
3288
3289 SHRINK;
3290 if (RAW == '"') {
3291 NEXT;
3292 stop = '"';
3293 } else if (RAW == '\'') {
3294 NEXT;
3295 stop = '\'';
3296 } else {
3297 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
3298 return(NULL);
3299 }
3300
3301 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
3302 if (buf == NULL) {
3303 xmlErrMemory(ctxt, NULL);
3304 return(NULL);
3305 }
3306 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
3307 cur = CUR_CHAR(l);
3308 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
3309 if (len + 5 >= size) {
3310 xmlChar *tmp;
3311
3312 size *= 2;
3313 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3314 if (tmp == NULL) {
3315 xmlFree(buf);
3316 xmlErrMemory(ctxt, NULL);
3317 ctxt->instate = (xmlParserInputState) state;
3318 return(NULL);
3319 }
3320 buf = tmp;
3321 }
3322 count++;
3323 if (count > 50) {
3324 GROW;
3325 count = 0;
3326 }
3327 COPY_BUF(l,buf,len,cur);
3328 NEXTL(l);
3329 cur = CUR_CHAR(l);
3330 if (cur == 0) {
3331 GROW;
3332 SHRINK;
3333 cur = CUR_CHAR(l);
3334 }
3335 }
3336 buf[len] = 0;
3337 ctxt->instate = (xmlParserInputState) state;
3338 if (!IS_CHAR(cur)) {
3339 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
3340 } else {
3341 NEXT;
3342 }
3343 return(buf);
3344}
3345
3346/**
3347 * xmlParsePubidLiteral:
3348 * @ctxt: an XML parser context
3349 *
3350 * parse an XML public literal
3351 *
3352 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
3353 *
3354 * Returns the PubidLiteral parsed or NULL.
3355 */
3356
3357xmlChar *
3358xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
3359 xmlChar *buf = NULL;
3360 int len = 0;
3361 int size = XML_PARSER_BUFFER_SIZE;
3362 xmlChar cur;
3363 xmlChar stop;
3364 int count = 0;
3365 xmlParserInputState oldstate = ctxt->instate;
3366
3367 SHRINK;
3368 if (RAW == '"') {
3369 NEXT;
3370 stop = '"';
3371 } else if (RAW == '\'') {
3372 NEXT;
3373 stop = '\'';
3374 } else {
3375 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
3376 return(NULL);
3377 }
3378 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
3379 if (buf == NULL) {
3380 xmlErrMemory(ctxt, NULL);
3381 return(NULL);
3382 }
3383 ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
3384 cur = CUR;
3385 while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
3386 if (len + 1 >= size) {
3387 xmlChar *tmp;
3388
3389 size *= 2;
3390 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3391 if (tmp == NULL) {
3392 xmlErrMemory(ctxt, NULL);
3393 xmlFree(buf);
3394 return(NULL);
3395 }
3396 buf = tmp;
3397 }
3398 buf[len++] = cur;
3399 count++;
3400 if (count > 50) {
3401 GROW;
3402 count = 0;
3403 }
3404 NEXT;
3405 cur = CUR;
3406 if (cur == 0) {
3407 GROW;
3408 SHRINK;
3409 cur = CUR;
3410 }
3411 }
3412 buf[len] = 0;
3413 if (cur != stop) {
3414 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
3415 } else {
3416 NEXT;
3417 }
3418 ctxt->instate = oldstate;
3419 return(buf);
3420}
3421
3422void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
3423
3424/*
3425 * used for the test in the inner loop of the char data testing
3426 */
3427static const unsigned char test_char_data[256] = {
3428 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3429 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
3430 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3431 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3432 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
3433 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
3434 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
3435 0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
3436 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
3437 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
3438 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
3439 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
3440 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
3441 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
3442 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
3443 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
3444 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
3445 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3446 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3447 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3448 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3449 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3450 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3451 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3452 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3453 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3454 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3455 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3456 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3457 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3458 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3459 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
3460};
3461
3462/**
3463 * xmlParseCharData:
3464 * @ctxt: an XML parser context
3465 * @cdata: int indicating whether we are within a CDATA section
3466 *
3467 * parse a CharData section.
3468 * if we are within a CDATA section ']]>' marks an end of section.
3469 *
3470 * The right angle bracket (>) may be represented using the string "&gt;",
3471 * and must, for compatibility, be escaped using "&gt;" or a character
3472 * reference when it appears in the string "]]>" in content, when that
3473 * string is not marking the end of a CDATA section.
3474 *
3475 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
3476 */
3477
3478void
3479xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
3480 const xmlChar *in;
3481 int nbchar = 0;
3482 int line = ctxt->input->line;
3483 int col = ctxt->input->col;
3484 int ccol;
3485
3486 SHRINK;
3487 GROW;
3488 /*
3489 * Accelerated common case where input don't need to be
3490 * modified before passing it to the handler.
3491 */
3492 if (!cdata) {
3493 in = ctxt->input->cur;
3494 do {
3495get_more_space:
3496 while (*in == 0x20) in++;
3497 if (*in == 0xA) {
3498 do {
3499 ctxt->input->line++; ctxt->input->col = 1;
3500 in++;
3501 } while (*in == 0xA);
3502 goto get_more_space;
3503 }
3504 if (*in == '<') {
3505 nbchar = in - ctxt->input->cur;
3506 if (nbchar > 0) {
3507 const xmlChar *tmp = ctxt->input->cur;
3508 ctxt->input->cur = in;
3509
3510 if ((ctxt->sax != NULL) &&
3511 (ctxt->sax->ignorableWhitespace !=
3512 ctxt->sax->characters)) {
3513 if (areBlanks(ctxt, tmp, nbchar, 1)) {
3514 if (ctxt->sax->ignorableWhitespace != NULL)
3515 ctxt->sax->ignorableWhitespace(ctxt->userData,
3516 tmp, nbchar);
3517 } else {
3518 if (ctxt->sax->characters != NULL)
3519 ctxt->sax->characters(ctxt->userData,
3520 tmp, nbchar);
3521 if (*ctxt->space == -1)
3522 *ctxt->space = -2;
3523 }
3524 } else if ((ctxt->sax != NULL) &&
3525 (ctxt->sax->characters != NULL)) {
3526 ctxt->sax->characters(ctxt->userData,
3527 tmp, nbchar);
3528 }
3529 }
3530 return;
3531 }
3532
3533get_more:
3534 ccol = ctxt->input->col;
3535 while (test_char_data[*in]) {
3536 in++;
3537 ccol++;
3538 }
3539 ctxt->input->col = ccol;
3540 if (*in == 0xA) {
3541 do {
3542 ctxt->input->line++; ctxt->input->col = 1;
3543 in++;
3544 } while (*in == 0xA);
3545 goto get_more;
3546 }
3547 if (*in == ']') {
3548 if ((in[1] == ']') && (in[2] == '>')) {
3549 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
3550 ctxt->input->cur = in;
3551 return;
3552 }
3553 in++;
3554 ctxt->input->col++;
3555 goto get_more;
3556 }
3557 nbchar = in - ctxt->input->cur;
3558 if (nbchar > 0) {
3559 if ((ctxt->sax != NULL) &&
3560 (ctxt->sax->ignorableWhitespace !=
3561 ctxt->sax->characters) &&
3562 (IS_BLANK_CH(*ctxt->input->cur))) {
3563 const xmlChar *tmp = ctxt->input->cur;
3564 ctxt->input->cur = in;
3565
3566 if (areBlanks(ctxt, tmp, nbchar, 0)) {
3567 if (ctxt->sax->ignorableWhitespace != NULL)
3568 ctxt->sax->ignorableWhitespace(ctxt->userData,
3569 tmp, nbchar);
3570 } else {
3571 if (ctxt->sax->characters != NULL)
3572 ctxt->sax->characters(ctxt->userData,
3573 tmp, nbchar);
3574 if (*ctxt->space == -1)
3575 *ctxt->space = -2;
3576 }
3577 line = ctxt->input->line;
3578 col = ctxt->input->col;
3579 } else if (ctxt->sax != NULL) {
3580 if (ctxt->sax->characters != NULL)
3581 ctxt->sax->characters(ctxt->userData,
3582 ctxt->input->cur, nbchar);
3583 line = ctxt->input->line;
3584 col = ctxt->input->col;
3585 }
3586 }
3587 ctxt->input->cur = in;
3588 if (*in == 0xD) {
3589 in++;
3590 if (*in == 0xA) {
3591 ctxt->input->cur = in;
3592 in++;
3593 ctxt->input->line++; ctxt->input->col = 1;
3594 continue; /* while */
3595 }
3596 in--;
3597 }
3598 if (*in == '<') {
3599 return;
3600 }
3601 if (*in == '&') {
3602 return;
3603 }
3604 SHRINK;
3605 GROW;
3606 in = ctxt->input->cur;
3607 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
3608 nbchar = 0;
3609 }
3610 ctxt->input->line = line;
3611 ctxt->input->col = col;
3612 xmlParseCharDataComplex(ctxt, cdata);
3613}
3614
3615/**
3616 * xmlParseCharDataComplex:
3617 * @ctxt: an XML parser context
3618 * @cdata: int indicating whether we are within a CDATA section
3619 *
3620 * parse a CharData section.this is the fallback function
3621 * of xmlParseCharData() when the parsing requires handling
3622 * of non-ASCII characters.
3623 */
3624void
3625xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
3626 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
3627 int nbchar = 0;
3628 int cur, l;
3629 int count = 0;
3630
3631 SHRINK;
3632 GROW;
3633 cur = CUR_CHAR(l);
3634 while ((cur != '<') && /* checked */
3635 (cur != '&') &&
3636 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
3637 if ((cur == ']') && (NXT(1) == ']') &&
3638 (NXT(2) == '>')) {
3639 if (cdata) break;
3640 else {
3641 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
3642 }
3643 }
3644 COPY_BUF(l,buf,nbchar,cur);
3645 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
3646 buf[nbchar] = 0;
3647
3648 /*
3649 * OK the segment is to be consumed as chars.
3650 */
3651 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
3652 if (areBlanks(ctxt, buf, nbchar, 0)) {
3653 if (ctxt->sax->ignorableWhitespace != NULL)
3654 ctxt->sax->ignorableWhitespace(ctxt->userData,
3655 buf, nbchar);
3656 } else {
3657 if (ctxt->sax->characters != NULL)
3658 ctxt->sax->characters(ctxt->userData, buf, nbchar);
3659 if ((ctxt->sax->characters !=
3660 ctxt->sax->ignorableWhitespace) &&
3661 (*ctxt->space == -1))
3662 *ctxt->space = -2;
3663 }
3664 }
3665 nbchar = 0;
3666 }
3667 count++;
3668 if (count > 50) {
3669 GROW;
3670 count = 0;
3671 }
3672 NEXTL(l);
3673 cur = CUR_CHAR(l);
3674 }
3675 if (nbchar != 0) {
3676 buf[nbchar] = 0;
3677 /*
3678 * OK the segment is to be consumed as chars.
3679 */
3680 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
3681 if (areBlanks(ctxt, buf, nbchar, 0)) {
3682 if (ctxt->sax->ignorableWhitespace != NULL)
3683 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
3684 } else {
3685 if (ctxt->sax->characters != NULL)
3686 ctxt->sax->characters(ctxt->userData, buf, nbchar);
3687 if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
3688 (*ctxt->space == -1))
3689 *ctxt->space = -2;
3690 }
3691 }
3692 }
3693 if ((cur != 0) && (!IS_CHAR(cur))) {
3694 /* Generate the error and skip the offending character */
3695 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
3696 "PCDATA invalid Char value %d\n",
3697 cur);
3698 NEXTL(l);
3699 }
3700}
3701
3702/**
3703 * xmlParseExternalID:
3704 * @ctxt: an XML parser context
3705 * @publicID: a xmlChar** receiving PubidLiteral
3706 * @strict: indicate whether we should restrict parsing to only
3707 * production [75], see NOTE below
3708 *
3709 * Parse an External ID or a Public ID
3710 *
3711 * NOTE: Productions [75] and [83] interact badly since [75] can generate
3712 * 'PUBLIC' S PubidLiteral S SystemLiteral
3713 *
3714 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
3715 * | 'PUBLIC' S PubidLiteral S SystemLiteral
3716 *
3717 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
3718 *
3719 * Returns the function returns SystemLiteral and in the second
3720 * case publicID receives PubidLiteral, is strict is off
3721 * it is possible to return NULL and have publicID set.
3722 */
3723
3724xmlChar *
3725xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
3726 xmlChar *URI = NULL;
3727
3728 SHRINK;
3729
3730 *publicID = NULL;
3731 if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
3732 SKIP(6);
3733 if (!IS_BLANK_CH(CUR)) {
3734 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3735 "Space required after 'SYSTEM'\n");
3736 }
3737 SKIP_BLANKS;
3738 URI = xmlParseSystemLiteral(ctxt);
3739 if (URI == NULL) {
3740 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
3741 }
3742 } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
3743 SKIP(6);
3744 if (!IS_BLANK_CH(CUR)) {
3745 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3746 "Space required after 'PUBLIC'\n");
3747 }
3748 SKIP_BLANKS;
3749 *publicID = xmlParsePubidLiteral(ctxt);
3750 if (*publicID == NULL) {
3751 xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
3752 }
3753 if (strict) {
3754 /*
3755 * We don't handle [83] so "S SystemLiteral" is required.
3756 */
3757 if (!IS_BLANK_CH(CUR)) {
3758 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3759 "Space required after the Public Identifier\n");
3760 }
3761 } else {
3762 /*
3763 * We handle [83] so we return immediately, if
3764 * "S SystemLiteral" is not detected. From a purely parsing
3765 * point of view that's a nice mess.
3766 */
3767 const xmlChar *ptr;
3768 GROW;
3769
3770 ptr = CUR_PTR;
3771 if (!IS_BLANK_CH(*ptr)) return(NULL);
3772
3773 while (IS_BLANK_CH(*ptr)) ptr++; /* TODO: dangerous, fix ! */
3774 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
3775 }
3776 SKIP_BLANKS;
3777 URI = xmlParseSystemLiteral(ctxt);
3778 if (URI == NULL) {
3779 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
3780 }
3781 }
3782 return(URI);
3783}
3784
3785/**
3786 * xmlParseCommentComplex:
3787 * @ctxt: an XML parser context
3788 * @buf: the already parsed part of the buffer
3789 * @len: number of bytes filles in the buffer
3790 * @size: allocated size of the buffer
3791 *
3792 * Skip an XML (SGML) comment <!-- .... -->
3793 * The spec says that "For compatibility, the string "--" (double-hyphen)
3794 * must not occur within comments. "
3795 * This is the slow routine in case the accelerator for ascii didn't work
3796 *
3797 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
3798 */
3799static void
3800xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf, int len, int size) {
3801 int q, ql;
3802 int r, rl;
3803 int cur, l;
3804 xmlParserInputPtr input = ctxt->input;
3805 int count = 0;
3806
3807 if (buf == NULL) {
3808 len = 0;
3809 size = XML_PARSER_BUFFER_SIZE;
3810 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
3811 if (buf == NULL) {
3812 xmlErrMemory(ctxt, NULL);
3813 return;
3814 }
3815 }
3816 GROW; /* Assure there's enough input data */
3817 q = CUR_CHAR(ql);
3818 if (q == 0)
3819 goto not_terminated;
3820 if (!IS_CHAR(q)) {
3821 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
3822 "xmlParseComment: invalid xmlChar value %d\n",
3823 q);
3824 xmlFree (buf);
3825 return;
3826 }
3827 NEXTL(ql);
3828 r = CUR_CHAR(rl);
3829 if (r == 0)
3830 goto not_terminated;
3831 if (!IS_CHAR(r)) {
3832 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
3833 "xmlParseComment: invalid xmlChar value %d\n",
3834 q);
3835 xmlFree (buf);
3836 return;
3837 }
3838 NEXTL(rl);
3839 cur = CUR_CHAR(l);
3840 if (cur == 0)
3841 goto not_terminated;
3842 while (IS_CHAR(cur) && /* checked */
3843 ((cur != '>') ||
3844 (r != '-') || (q != '-'))) {
3845 if ((r == '-') && (q == '-')) {
3846 xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
3847 }
3848 if (len + 5 >= size) {
3849 xmlChar *new_buf;
3850 size *= 2;
3851 new_buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3852 if (new_buf == NULL) {
3853 xmlFree (buf);
3854 xmlErrMemory(ctxt, NULL);
3855 return;
3856 }
3857 buf = new_buf;
3858 }
3859 COPY_BUF(ql,buf,len,q);
3860 q = r;
3861 ql = rl;
3862 r = cur;
3863 rl = l;
3864
3865 count++;
3866 if (count > 50) {
3867 GROW;
3868 count = 0;
3869 }
3870 NEXTL(l);
3871 cur = CUR_CHAR(l);
3872 if (cur == 0) {
3873 SHRINK;
3874 GROW;
3875 cur = CUR_CHAR(l);
3876 }
3877 }
3878 buf[len] = 0;
3879 if (cur == 0) {
3880 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
3881 "Comment not terminated \n<!--%.50s\n", buf);
3882 } else if (!IS_CHAR(cur)) {
3883 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
3884 "xmlParseComment: invalid xmlChar value %d\n",
3885 cur);
3886 } else {
3887 if (input != ctxt->input) {
3888 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
3889 "Comment doesn't start and stop in the same entity\n");
3890 }
3891 NEXT;
3892 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
3893 (!ctxt->disableSAX))
3894 ctxt->sax->comment(ctxt->userData, buf);
3895 }
3896 xmlFree(buf);
3897 return;
3898not_terminated:
3899 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
3900 "Comment not terminated\n", NULL);
3901 xmlFree(buf);
3902 return;
3903}
3904
3905/**
3906 * xmlParseComment:
3907 * @ctxt: an XML parser context
3908 *
3909 * Skip an XML (SGML) comment <!-- .... -->
3910 * The spec says that "For compatibility, the string "--" (double-hyphen)
3911 * must not occur within comments. "
3912 *
3913 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
3914 */
3915void
3916xmlParseComment(xmlParserCtxtPtr ctxt) {
3917 xmlChar *buf = NULL;
3918 int size = XML_PARSER_BUFFER_SIZE;
3919 int len = 0;
3920 xmlParserInputState state;
3921 const xmlChar *in;
3922 int nbchar = 0, ccol;
3923
3924 /*
3925 * Check that there is a comment right here.
3926 */
3927 if ((RAW != '<') || (NXT(1) != '!') ||
3928 (NXT(2) != '-') || (NXT(3) != '-')) return;
3929
3930 state = ctxt->instate;
3931 ctxt->instate = XML_PARSER_COMMENT;
3932 SKIP(4);
3933 SHRINK;
3934 GROW;
3935
3936 /*
3937 * Accelerated common case where input don't need to be
3938 * modified before passing it to the handler.
3939 */
3940 in = ctxt->input->cur;
3941 do {
3942 if (*in == 0xA) {
3943 do {
3944 ctxt->input->line++; ctxt->input->col = 1;
3945 in++;
3946 } while (*in == 0xA);
3947 }
3948get_more:
3949 ccol = ctxt->input->col;
3950 while (((*in > '-') && (*in <= 0x7F)) ||
3951 ((*in >= 0x20) && (*in < '-')) ||
3952 (*in == 0x09)) {
3953 in++;
3954 ccol++;
3955 }
3956 ctxt->input->col = ccol;
3957 if (*in == 0xA) {
3958 do {
3959 ctxt->input->line++; ctxt->input->col = 1;
3960 in++;
3961 } while (*in == 0xA);
3962 goto get_more;
3963 }
3964 nbchar = in - ctxt->input->cur;
3965 /*
3966 * save current set of data
3967 */
3968 if (nbchar > 0) {
3969 if ((ctxt->sax != NULL) &&
3970 (ctxt->sax->comment != NULL)) {
3971 if (buf == NULL) {
3972 if ((*in == '-') && (in[1] == '-'))
3973 size = nbchar + 1;
3974 else
3975 size = XML_PARSER_BUFFER_SIZE + nbchar;
3976 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
3977 if (buf == NULL) {
3978 xmlErrMemory(ctxt, NULL);
3979 ctxt->instate = state;
3980 return;
3981 }
3982 len = 0;
3983 } else if (len + nbchar + 1 >= size) {
3984 xmlChar *new_buf;
3985 size += len + nbchar + XML_PARSER_BUFFER_SIZE;
3986 new_buf = (xmlChar *) xmlRealloc(buf,
3987 size * sizeof(xmlChar));
3988 if (new_buf == NULL) {
3989 xmlFree (buf);
3990 xmlErrMemory(ctxt, NULL);
3991 ctxt->instate = state;
3992 return;
3993 }
3994 buf = new_buf;
3995 }
3996 memcpy(&buf[len], ctxt->input->cur, nbchar);
3997 len += nbchar;
3998 buf[len] = 0;
3999 }
4000 }
4001 ctxt->input->cur = in;
4002 if (*in == 0xA) {
4003 in++;
4004 ctxt->input->line++; ctxt->input->col = 1;
4005 }
4006 if (*in == 0xD) {
4007 in++;
4008 if (*in == 0xA) {
4009 ctxt->input->cur = in;
4010 in++;
4011 ctxt->input->line++; ctxt->input->col = 1;
4012 continue; /* while */
4013 }
4014 in--;
4015 }
4016 SHRINK;
4017 GROW;
4018 in = ctxt->input->cur;
4019 if (*in == '-') {
4020 if (in[1] == '-') {
4021 if (in[2] == '>') {
4022 SKIP(3);
4023 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4024 (!ctxt->disableSAX)) {
4025 if (buf != NULL)
4026 ctxt->sax->comment(ctxt->userData, buf);
4027 else
4028 ctxt->sax->comment(ctxt->userData, BAD_CAST "");
4029 }
4030 if (buf != NULL)
4031 xmlFree(buf);
4032 ctxt->instate = state;
4033 return;
4034 }
4035 if (buf != NULL)
4036 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4037 "Comment not terminated \n<!--%.50s\n",
4038 buf);
4039 else
4040 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4041 "Comment not terminated \n", NULL);
4042 in++;
4043 ctxt->input->col++;
4044 }
4045 in++;
4046 ctxt->input->col++;
4047 goto get_more;
4048 }
4049 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
4050 xmlParseCommentComplex(ctxt, buf, len, size);
4051 ctxt->instate = state;
4052 return;
4053}
4054
4055
4056/**
4057 * xmlParsePITarget:
4058 * @ctxt: an XML parser context
4059 *
4060 * parse the name of a PI
4061 *
4062 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
4063 *
4064 * Returns the PITarget name or NULL
4065 */
4066
4067const xmlChar *
4068xmlParsePITarget(xmlParserCtxtPtr ctxt) {
4069 const xmlChar *name;
4070
4071 name = xmlParseName(ctxt);
4072 if ((name != NULL) &&
4073 ((name[0] == 'x') || (name[0] == 'X')) &&
4074 ((name[1] == 'm') || (name[1] == 'M')) &&
4075 ((name[2] == 'l') || (name[2] == 'L'))) {
4076 int i;
4077 if ((name[0] == 'x') && (name[1] == 'm') &&
4078 (name[2] == 'l') && (name[3] == 0)) {
4079 xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
4080 "XML declaration allowed only at the start of the document\n");
4081 return(name);
4082 } else if (name[3] == 0) {
4083 xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
4084 return(name);
4085 }
4086 for (i = 0;;i++) {
4087 if (xmlW3CPIs[i] == NULL) break;
4088 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
4089 return(name);
4090 }
4091 xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
4092 "xmlParsePITarget: invalid name prefix 'xml'\n",
4093 NULL, NULL);
4094 }
4095 return(name);
4096}
4097
4098#ifdef LIBXML_CATALOG_ENABLED
4099/**
4100 * xmlParseCatalogPI:
4101 * @ctxt: an XML parser context
4102 * @catalog: the PI value string
4103 *
4104 * parse an XML Catalog Processing Instruction.
4105 *
4106 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
4107 *
4108 * Occurs only if allowed by the user and if happening in the Misc
4109 * part of the document before any doctype informations
4110 * This will add the given catalog to the parsing context in order
4111 * to be used if there is a resolution need further down in the document
4112 */
4113
4114static void
4115xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
4116 xmlChar *URL = NULL;
4117 const xmlChar *tmp, *base;
4118 xmlChar marker;
4119
4120 tmp = catalog;
4121 while (IS_BLANK_CH(*tmp)) tmp++;
4122 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
4123 goto error;
4124 tmp += 7;
4125 while (IS_BLANK_CH(*tmp)) tmp++;
4126 if (*tmp != '=') {
4127 return;
4128 }
4129 tmp++;
4130 while (IS_BLANK_CH(*tmp)) tmp++;
4131 marker = *tmp;
4132 if ((marker != '\'') && (marker != '"'))
4133 goto error;
4134 tmp++;
4135 base = tmp;
4136 while ((*tmp != 0) && (*tmp != marker)) tmp++;
4137 if (*tmp == 0)
4138 goto error;
4139 URL = xmlStrndup(base, tmp - base);
4140 tmp++;
4141 while (IS_BLANK_CH(*tmp)) tmp++;
4142 if (*tmp != 0)
4143 goto error;
4144
4145 if (URL != NULL) {
4146 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
4147 xmlFree(URL);
4148 }
4149 return;
4150
4151error:
4152 xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
4153 "Catalog PI syntax error: %s\n",
4154 catalog, NULL);
4155 if (URL != NULL)
4156 xmlFree(URL);
4157}
4158#endif
4159
4160/**
4161 * xmlParsePI:
4162 * @ctxt: an XML parser context
4163 *
4164 * parse an XML Processing Instruction.
4165 *
4166 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
4167 *
4168 * The processing is transfered to SAX once parsed.
4169 */
4170
4171void
4172xmlParsePI(xmlParserCtxtPtr ctxt) {
4173 xmlChar *buf = NULL;
4174 int len = 0;
4175 int size = XML_PARSER_BUFFER_SIZE;
4176 int cur, l;
4177 const xmlChar *target;
4178 xmlParserInputState state;
4179 int count = 0;
4180
4181 if ((RAW == '<') && (NXT(1) == '?')) {
4182 xmlParserInputPtr input = ctxt->input;
4183 state = ctxt->instate;
4184 ctxt->instate = XML_PARSER_PI;
4185 /*
4186 * this is a Processing Instruction.
4187 */
4188 SKIP(2);
4189 SHRINK;
4190
4191 /*
4192 * Parse the target name and check for special support like
4193 * namespace.
4194 */
4195 target = xmlParsePITarget(ctxt);
4196 if (target != NULL) {
4197 if ((RAW == '?') && (NXT(1) == '>')) {
4198 if (input != ctxt->input) {
4199 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4200 "PI declaration doesn't start and stop in the same entity\n");
4201 }
4202 SKIP(2);
4203
4204 /*
4205 * SAX: PI detected.
4206 */
4207 if ((ctxt->sax) && (!ctxt->disableSAX) &&
4208 (ctxt->sax->processingInstruction != NULL))
4209 ctxt->sax->processingInstruction(ctxt->userData,
4210 target, NULL);
4211 ctxt->instate = state;
4212 return;
4213 }
4214 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4215 if (buf == NULL) {
4216 xmlErrMemory(ctxt, NULL);
4217 ctxt->instate = state;
4218 return;
4219 }
4220 cur = CUR;
4221 if (!IS_BLANK(cur)) {
4222 xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
4223 "ParsePI: PI %s space expected\n", target);
4224 }
4225 SKIP_BLANKS;
4226 cur = CUR_CHAR(l);
4227 while (IS_CHAR(cur) && /* checked */
4228 ((cur != '?') || (NXT(1) != '>'))) {
4229 if (len + 5 >= size) {
4230 xmlChar *tmp;
4231
4232 size *= 2;
4233 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4234 if (tmp == NULL) {
4235 xmlErrMemory(ctxt, NULL);
4236 xmlFree(buf);
4237 ctxt->instate = state;
4238 return;
4239 }
4240 buf = tmp;
4241 }
4242 count++;
4243 if (count > 50) {
4244 GROW;
4245 count = 0;
4246 }
4247 COPY_BUF(l,buf,len,cur);
4248 NEXTL(l);
4249 cur = CUR_CHAR(l);
4250 if (cur == 0) {
4251 SHRINK;
4252 GROW;
4253 cur = CUR_CHAR(l);
4254 }
4255 }
4256 buf[len] = 0;
4257 if (cur != '?') {
4258 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
4259 "ParsePI: PI %s never end ...\n", target);
4260 } else {
4261 if (input != ctxt->input) {
4262 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4263 "PI declaration doesn't start and stop in the same entity\n");
4264 }
4265 SKIP(2);
4266
4267#ifdef LIBXML_CATALOG_ENABLED
4268 if (((state == XML_PARSER_MISC) ||
4269 (state == XML_PARSER_START)) &&
4270 (xmlStrEqual(target, XML_CATALOG_PI))) {
4271 xmlCatalogAllow allow = xmlCatalogGetDefaults();
4272 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
4273 (allow == XML_CATA_ALLOW_ALL))
4274 xmlParseCatalogPI(ctxt, buf);
4275 }
4276#endif
4277
4278
4279 /*
4280 * SAX: PI detected.
4281 */
4282 if ((ctxt->sax) && (!ctxt->disableSAX) &&
4283 (ctxt->sax->processingInstruction != NULL))
4284 ctxt->sax->processingInstruction(ctxt->userData,
4285 target, buf);
4286 }
4287 xmlFree(buf);
4288 } else {
4289 xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
4290 }
4291 ctxt->instate = state;
4292 }
4293}
4294
4295/**
4296 * xmlParseNotationDecl:
4297 * @ctxt: an XML parser context
4298 *
4299 * parse a notation declaration
4300 *
4301 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
4302 *
4303 * Hence there is actually 3 choices:
4304 * 'PUBLIC' S PubidLiteral
4305 * 'PUBLIC' S PubidLiteral S SystemLiteral
4306 * and 'SYSTEM' S SystemLiteral
4307 *
4308 * See the NOTE on xmlParseExternalID().
4309 */
4310
4311void
4312xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
4313 const xmlChar *name;
4314 xmlChar *Pubid;
4315 xmlChar *Systemid;
4316
4317 if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
4318 xmlParserInputPtr input = ctxt->input;
4319 SHRINK;
4320 SKIP(10);
4321 if (!IS_BLANK_CH(CUR)) {
4322 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4323 "Space required after '<!NOTATION'\n");
4324 return;
4325 }
4326 SKIP_BLANKS;
4327
4328 name = xmlParseName(ctxt);
4329 if (name == NULL) {
4330 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
4331 return;
4332 }
4333 if (!IS_BLANK_CH(CUR)) {
4334 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4335 "Space required after the NOTATION name'\n");
4336 return;
4337 }
4338 SKIP_BLANKS;
4339
4340 /*
4341 * Parse the IDs.
4342 */
4343 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
4344 SKIP_BLANKS;
4345
4346 if (RAW == '>') {
4347 if (input != ctxt->input) {
4348 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4349 "Notation declaration doesn't start and stop in the same entity\n");
4350 }
4351 NEXT;
4352 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4353 (ctxt->sax->notationDecl != NULL))
4354 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
4355 } else {
4356 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
4357 }
4358 if (Systemid != NULL) xmlFree(Systemid);
4359 if (Pubid != NULL) xmlFree(Pubid);
4360 }
4361}
4362
4363/**
4364 * xmlParseEntityDecl:
4365 * @ctxt: an XML parser context
4366 *
4367 * parse <!ENTITY declarations
4368 *
4369 * [70] EntityDecl ::= GEDecl | PEDecl
4370 *
4371 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
4372 *
4373 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
4374 *
4375 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
4376 *
4377 * [74] PEDef ::= EntityValue | ExternalID
4378 *
4379 * [76] NDataDecl ::= S 'NDATA' S Name
4380 *
4381 * [ VC: Notation Declared ]
4382 * The Name must match the declared name of a notation.
4383 */
4384
4385void
4386xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
4387 const xmlChar *name = NULL;
4388 xmlChar *value = NULL;
4389 xmlChar *URI = NULL, *literal = NULL;
4390 const xmlChar *ndata = NULL;
4391 int isParameter = 0;
4392 xmlChar *orig = NULL;
4393 int skipped;
4394
4395 /* GROW; done in the caller */
4396 if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) {
4397 xmlParserInputPtr input = ctxt->input;
4398 SHRINK;
4399 SKIP(8);
4400 skipped = SKIP_BLANKS;
4401 if (skipped == 0) {
4402 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4403 "Space required after '<!ENTITY'\n");
4404 }
4405
4406 if (RAW == '%') {
4407 NEXT;
4408 skipped = SKIP_BLANKS;
4409 if (skipped == 0) {
4410 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4411 "Space required after '%'\n");
4412 }
4413 isParameter = 1;
4414 }
4415
4416 name = xmlParseName(ctxt);
4417 if (name == NULL) {
4418 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4419 "xmlParseEntityDecl: no name\n");
4420 return;
4421 }
4422 skipped = SKIP_BLANKS;
4423 if (skipped == 0) {
4424 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4425 "Space required after the entity name\n");
4426 }
4427
4428 ctxt->instate = XML_PARSER_ENTITY_DECL;
4429 /*
4430 * handle the various case of definitions...
4431 */
4432 if (isParameter) {
4433 if ((RAW == '"') || (RAW == '\'')) {
4434 value = xmlParseEntityValue(ctxt, &orig);
4435 if (value) {
4436 if ((ctxt->sax != NULL) &&
4437 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
4438 ctxt->sax->entityDecl(ctxt->userData, name,
4439 XML_INTERNAL_PARAMETER_ENTITY,
4440 NULL, NULL, value);
4441 }
4442 } else {
4443 URI = xmlParseExternalID(ctxt, &literal, 1);
4444 if ((URI == NULL) && (literal == NULL)) {
4445 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
4446 }
4447 if (URI) {
4448 xmlURIPtr uri;
4449
4450 uri = xmlParseURI((const char *) URI);
4451 if (uri == NULL) {
4452 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
4453 "Invalid URI: %s\n", URI);
4454 /*
4455 * This really ought to be a well formedness error
4456 * but the XML Core WG decided otherwise c.f. issue
4457 * E26 of the XML erratas.
4458 */
4459 } else {
4460 if (uri->fragment != NULL) {
4461 /*
4462 * Okay this is foolish to block those but not
4463 * invalid URIs.
4464 */
4465 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
4466 } else {
4467 if ((ctxt->sax != NULL) &&
4468 (!ctxt->disableSAX) &&
4469 (ctxt->sax->entityDecl != NULL))
4470 ctxt->sax->entityDecl(ctxt->userData, name,
4471 XML_EXTERNAL_PARAMETER_ENTITY,
4472 literal, URI, NULL);
4473 }
4474 xmlFreeURI(uri);
4475 }
4476 }
4477 }
4478 } else {
4479 if ((RAW == '"') || (RAW == '\'')) {
4480 value = xmlParseEntityValue(ctxt, &orig);
4481 if ((ctxt->sax != NULL) &&
4482 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
4483 ctxt->sax->entityDecl(ctxt->userData, name,
4484 XML_INTERNAL_GENERAL_ENTITY,
4485 NULL, NULL, value);
4486 /*
4487 * For expat compatibility in SAX mode.
4488 */
4489 if ((ctxt->myDoc == NULL) ||
4490 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
4491 if (ctxt->myDoc == NULL) {
4492 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
4493 }
4494 if (ctxt->myDoc->intSubset == NULL)
4495 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
4496 BAD_CAST "fake", NULL, NULL);
4497
4498 xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
4499 NULL, NULL, value);
4500 }
4501 } else {
4502 URI = xmlParseExternalID(ctxt, &literal, 1);
4503 if ((URI == NULL) && (literal == NULL)) {
4504 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
4505 }
4506 if (URI) {
4507 xmlURIPtr uri;
4508
4509 uri = xmlParseURI((const char *)URI);
4510 if (uri == NULL) {
4511 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
4512 "Invalid URI: %s\n", URI);
4513 /*
4514 * This really ought to be a well formedness error
4515 * but the XML Core WG decided otherwise c.f. issue
4516 * E26 of the XML erratas.
4517 */
4518 } else {
4519 if (uri->fragment != NULL) {
4520 /*
4521 * Okay this is foolish to block those but not
4522 * invalid URIs.
4523 */
4524 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
4525 }
4526 xmlFreeURI(uri);
4527 }
4528 }
4529 if ((RAW != '>') && (!IS_BLANK_CH(CUR))) {
4530 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4531 "Space required before 'NDATA'\n");
4532 }
4533 SKIP_BLANKS;
4534 if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
4535 SKIP(5);
4536 if (!IS_BLANK_CH(CUR)) {
4537 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4538 "Space required after 'NDATA'\n");
4539 }
4540 SKIP_BLANKS;
4541 ndata = xmlParseName(ctxt);
4542 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4543 (ctxt->sax->unparsedEntityDecl != NULL))
4544 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
4545 literal, URI, ndata);
4546 } else {
4547 if ((ctxt->sax != NULL) &&
4548 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
4549 ctxt->sax->entityDecl(ctxt->userData, name,
4550 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
4551 literal, URI, NULL);
4552 /*
4553 * For expat compatibility in SAX mode.
4554 * assuming the entity repalcement was asked for
4555 */
4556 if ((ctxt->replaceEntities != 0) &&
4557 ((ctxt->myDoc == NULL) ||
4558 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
4559 if (ctxt->myDoc == NULL) {
4560 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
4561 }
4562
4563 if (ctxt->myDoc->intSubset == NULL)
4564 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
4565 BAD_CAST "fake", NULL, NULL);
4566 xmlSAX2EntityDecl(ctxt, name,
4567 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
4568 literal, URI, NULL);
4569 }
4570 }
4571 }
4572 }
4573 SKIP_BLANKS;
4574 if (RAW != '>') {
4575 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
4576 "xmlParseEntityDecl: entity %s not terminated\n", name);
4577 } else {
4578 if (input != ctxt->input) {
4579 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4580 "Entity declaration doesn't start and stop in the same entity\n");
4581 }
4582 NEXT;
4583 }
4584 if (orig != NULL) {
4585 /*
4586 * Ugly mechanism to save the raw entity value.
4587 */
4588 xmlEntityPtr cur = NULL;
4589
4590 if (isParameter) {
4591 if ((ctxt->sax != NULL) &&
4592 (ctxt->sax->getParameterEntity != NULL))
4593 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
4594 } else {
4595 if ((ctxt->sax != NULL) &&
4596 (ctxt->sax->getEntity != NULL))
4597 cur = ctxt->sax->getEntity(ctxt->userData, name);
4598 if ((cur == NULL) && (ctxt->userData==ctxt)) {
4599 cur = xmlSAX2GetEntity(ctxt, name);
4600 }
4601 }
4602 if (cur != NULL) {
4603 if (cur->orig != NULL)
4604 xmlFree(orig);
4605 else
4606 cur->orig = orig;
4607 } else
4608 xmlFree(orig);
4609 }
4610 if (value != NULL) xmlFree(value);
4611 if (URI != NULL) xmlFree(URI);
4612 if (literal != NULL) xmlFree(literal);
4613 }
4614}
4615
4616/**
4617 * xmlParseDefaultDecl:
4618 * @ctxt: an XML parser context
4619 * @value: Receive a possible fixed default value for the attribute
4620 *
4621 * Parse an attribute default declaration
4622 *
4623 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
4624 *
4625 * [ VC: Required Attribute ]
4626 * if the default declaration is the keyword #REQUIRED, then the
4627 * attribute must be specified for all elements of the type in the
4628 * attribute-list declaration.
4629 *
4630 * [ VC: Attribute Default Legal ]
4631 * The declared default value must meet the lexical constraints of
4632 * the declared attribute type c.f. xmlValidateAttributeDecl()
4633 *
4634 * [ VC: Fixed Attribute Default ]
4635 * if an attribute has a default value declared with the #FIXED
4636 * keyword, instances of that attribute must match the default value.
4637 *
4638 * [ WFC: No < in Attribute Values ]
4639 * handled in xmlParseAttValue()
4640 *
4641 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
4642 * or XML_ATTRIBUTE_FIXED.
4643 */
4644
4645int
4646xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
4647 int val;
4648 xmlChar *ret;
4649
4650 *value = NULL;
4651 if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
4652 SKIP(9);
4653 return(XML_ATTRIBUTE_REQUIRED);
4654 }
4655 if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
4656 SKIP(8);
4657 return(XML_ATTRIBUTE_IMPLIED);
4658 }
4659 val = XML_ATTRIBUTE_NONE;
4660 if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
4661 SKIP(6);
4662 val = XML_ATTRIBUTE_FIXED;
4663 if (!IS_BLANK_CH(CUR)) {
4664 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4665 "Space required after '#FIXED'\n");
4666 }
4667 SKIP_BLANKS;
4668 }
4669 ret = xmlParseAttValue(ctxt);
4670 ctxt->instate = XML_PARSER_DTD;
4671 if (ret == NULL) {
4672 xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
4673 "Attribute default value declaration error\n");
4674 } else
4675 *value = ret;
4676 return(val);
4677}
4678
4679/**
4680 * xmlParseNotationType:
4681 * @ctxt: an XML parser context
4682 *
4683 * parse an Notation attribute type.
4684 *
4685 * Note: the leading 'NOTATION' S part has already being parsed...
4686 *
4687 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
4688 *
4689 * [ VC: Notation Attributes ]
4690 * Values of this type must match one of the notation names included
4691 * in the declaration; all notation names in the declaration must be declared.
4692 *
4693 * Returns: the notation attribute tree built while parsing
4694 */
4695
4696xmlEnumerationPtr
4697xmlParseNotationType(xmlParserCtxtPtr ctxt) {
4698 const xmlChar *name;
4699 xmlEnumerationPtr ret = NULL, last = NULL, cur;
4700
4701 if (RAW != '(') {
4702 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
4703 return(NULL);
4704 }
4705 SHRINK;
4706 do {
4707 NEXT;
4708 SKIP_BLANKS;
4709 name = xmlParseName(ctxt);
4710 if (name == NULL) {
4711 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4712 "Name expected in NOTATION declaration\n");
4713 return(ret);
4714 }
4715 cur = xmlCreateEnumeration(name);
4716 if (cur == NULL) return(ret);
4717 if (last == NULL) ret = last = cur;
4718 else {
4719 last->next = cur;
4720 last = cur;
4721 }
4722 SKIP_BLANKS;
4723 } while (RAW == '|');
4724 if (RAW != ')') {
4725 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
4726 if ((last != NULL) && (last != ret))
4727 xmlFreeEnumeration(last);
4728 return(ret);
4729 }
4730 NEXT;
4731 return(ret);
4732}
4733
4734/**
4735 * xmlParseEnumerationType:
4736 * @ctxt: an XML parser context
4737 *
4738 * parse an Enumeration attribute type.
4739 *
4740 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
4741 *
4742 * [ VC: Enumeration ]
4743 * Values of this type must match one of the Nmtoken tokens in
4744 * the declaration
4745 *
4746 * Returns: the enumeration attribute tree built while parsing
4747 */
4748
4749xmlEnumerationPtr
4750xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
4751 xmlChar *name;
4752 xmlEnumerationPtr ret = NULL, last = NULL, cur;
4753
4754 if (RAW != '(') {
4755 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
4756 return(NULL);
4757 }
4758 SHRINK;
4759 do {
4760 NEXT;
4761 SKIP_BLANKS;
4762 name = xmlParseNmtoken(ctxt);
4763 if (name == NULL) {
4764 xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
4765 return(ret);
4766 }
4767 cur = xmlCreateEnumeration(name);
4768 xmlFree(name);
4769 if (cur == NULL) return(ret);
4770 if (last == NULL) ret = last = cur;
4771 else {
4772 last->next = cur;
4773 last = cur;
4774 }
4775 SKIP_BLANKS;
4776 } while (RAW == '|');
4777 if (RAW != ')') {
4778 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
4779 return(ret);
4780 }
4781 NEXT;
4782 return(ret);
4783}
4784
4785/**
4786 * xmlParseEnumeratedType:
4787 * @ctxt: an XML parser context
4788 * @tree: the enumeration tree built while parsing
4789 *
4790 * parse an Enumerated attribute type.
4791 *
4792 * [57] EnumeratedType ::= NotationType | Enumeration
4793 *
4794 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
4795 *
4796 *
4797 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
4798 */
4799
4800int
4801xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
4802 if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
4803 SKIP(8);
4804 if (!IS_BLANK_CH(CUR)) {
4805 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4806 "Space required after 'NOTATION'\n");
4807 return(0);
4808 }
4809 SKIP_BLANKS;
4810 *tree = xmlParseNotationType(ctxt);
4811 if (*tree == NULL) return(0);
4812 return(XML_ATTRIBUTE_NOTATION);
4813 }
4814 *tree = xmlParseEnumerationType(ctxt);
4815 if (*tree == NULL) return(0);
4816 return(XML_ATTRIBUTE_ENUMERATION);
4817}
4818
4819/**
4820 * xmlParseAttributeType:
4821 * @ctxt: an XML parser context
4822 * @tree: the enumeration tree built while parsing
4823 *
4824 * parse the Attribute list def for an element
4825 *
4826 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
4827 *
4828 * [55] StringType ::= 'CDATA'
4829 *
4830 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
4831 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
4832 *
4833 * Validity constraints for attribute values syntax are checked in
4834 * xmlValidateAttributeValue()
4835 *
4836 * [ VC: ID ]
4837 * Values of type ID must match the Name production. A name must not
4838 * appear more than once in an XML document as a value of this type;
4839 * i.e., ID values must uniquely identify the elements which bear them.
4840 *
4841 * [ VC: One ID per Element Type ]
4842 * No element type may have more than one ID attribute specified.
4843 *
4844 * [ VC: ID Attribute Default ]
4845 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
4846 *
4847 * [ VC: IDREF ]
4848 * Values of type IDREF must match the Name production, and values
4849 * of type IDREFS must match Names; each IDREF Name must match the value
4850 * of an ID attribute on some element in the XML document; i.e. IDREF
4851 * values must match the value of some ID attribute.
4852 *
4853 * [ VC: Entity Name ]
4854 * Values of type ENTITY must match the Name production, values
4855 * of type ENTITIES must match Names; each Entity Name must match the
4856 * name of an unparsed entity declared in the DTD.
4857 *
4858 * [ VC: Name Token ]
4859 * Values of type NMTOKEN must match the Nmtoken production; values
4860 * of type NMTOKENS must match Nmtokens.
4861 *
4862 * Returns the attribute type
4863 */
4864int
4865xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
4866 SHRINK;
4867 if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
4868 SKIP(5);
4869 return(XML_ATTRIBUTE_CDATA);
4870 } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
4871 SKIP(6);
4872 return(XML_ATTRIBUTE_IDREFS);
4873 } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
4874 SKIP(5);
4875 return(XML_ATTRIBUTE_IDREF);
4876 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
4877 SKIP(2);
4878 return(XML_ATTRIBUTE_ID);
4879 } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
4880 SKIP(6);
4881 return(XML_ATTRIBUTE_ENTITY);
4882 } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
4883 SKIP(8);
4884 return(XML_ATTRIBUTE_ENTITIES);
4885 } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
4886 SKIP(8);
4887 return(XML_ATTRIBUTE_NMTOKENS);
4888 } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
4889 SKIP(7);
4890 return(XML_ATTRIBUTE_NMTOKEN);
4891 }
4892 return(xmlParseEnumeratedType(ctxt, tree));
4893}
4894
4895/**
4896 * xmlParseAttributeListDecl:
4897 * @ctxt: an XML parser context
4898 *
4899 * : parse the Attribute list def for an element
4900 *
4901 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
4902 *
4903 * [53] AttDef ::= S Name S AttType S DefaultDecl
4904 *
4905 */
4906void
4907xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
4908 const xmlChar *elemName;
4909 const xmlChar *attrName;
4910 xmlEnumerationPtr tree;
4911
4912 if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
4913 xmlParserInputPtr input = ctxt->input;
4914
4915 SKIP(9);
4916 if (!IS_BLANK_CH(CUR)) {
4917 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4918 "Space required after '<!ATTLIST'\n");
4919 }
4920 SKIP_BLANKS;
4921 elemName = xmlParseName(ctxt);
4922 if (elemName == NULL) {
4923 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4924 "ATTLIST: no name for Element\n");
4925 return;
4926 }
4927 SKIP_BLANKS;
4928 GROW;
4929 while (RAW != '>') {
4930 const xmlChar *check = CUR_PTR;
4931 int type;
4932 int def;
4933 xmlChar *defaultValue = NULL;
4934
4935 GROW;
4936 tree = NULL;
4937 attrName = xmlParseName(ctxt);
4938 if (attrName == NULL) {
4939 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4940 "ATTLIST: no name for Attribute\n");
4941 break;
4942 }
4943 GROW;
4944 if (!IS_BLANK_CH(CUR)) {
4945 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4946 "Space required after the attribute name\n");
4947 break;
4948 }
4949 SKIP_BLANKS;
4950
4951 type = xmlParseAttributeType(ctxt, &tree);
4952 if (type <= 0) {
4953 break;
4954 }
4955
4956 GROW;
4957 if (!IS_BLANK_CH(CUR)) {
4958 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4959 "Space required after the attribute type\n");
4960 if (tree != NULL)
4961 xmlFreeEnumeration(tree);
4962 break;
4963 }
4964 SKIP_BLANKS;
4965
4966 def = xmlParseDefaultDecl(ctxt, &defaultValue);
4967 if (def <= 0) {
4968 if (defaultValue != NULL)
4969 xmlFree(defaultValue);
4970 if (tree != NULL)
4971 xmlFreeEnumeration(tree);
4972 break;
4973 }
4974
4975 GROW;
4976 if (RAW != '>') {
4977 if (!IS_BLANK_CH(CUR)) {
4978 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4979 "Space required after the attribute default value\n");
4980 if (defaultValue != NULL)
4981 xmlFree(defaultValue);
4982 if (tree != NULL)
4983 xmlFreeEnumeration(tree);
4984 break;
4985 }
4986 SKIP_BLANKS;
4987 }
4988 if (check == CUR_PTR) {
4989 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
4990 "in xmlParseAttributeListDecl\n");
4991 if (defaultValue != NULL)
4992 xmlFree(defaultValue);
4993 if (tree != NULL)
4994 xmlFreeEnumeration(tree);
4995 break;
4996 }
4997 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4998 (ctxt->sax->attributeDecl != NULL))
4999 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
5000 type, def, defaultValue, tree);
5001 else if (tree != NULL)
5002 xmlFreeEnumeration(tree);
5003
5004 if ((ctxt->sax2) && (defaultValue != NULL) &&
5005 (def != XML_ATTRIBUTE_IMPLIED) &&
5006 (def != XML_ATTRIBUTE_REQUIRED)) {
5007 xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
5008 }
5009 if ((ctxt->sax2) && (type != XML_ATTRIBUTE_CDATA)) {
5010 xmlAddSpecialAttr(ctxt, elemName, attrName, type);
5011 }
5012 if (defaultValue != NULL)
5013 xmlFree(defaultValue);
5014 GROW;
5015 }
5016 if (RAW == '>') {
5017 if (input != ctxt->input) {
5018 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5019 "Attribute list declaration doesn't start and stop in the same entity\n");
5020 }
5021 NEXT;
5022 }
5023 }
5024}
5025
5026/**
5027 * xmlParseElementMixedContentDecl:
5028 * @ctxt: an XML parser context
5029 * @inputchk: the input used for the current entity, needed for boundary checks
5030 *
5031 * parse the declaration for a Mixed Element content
5032 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
5033 *
5034 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
5035 * '(' S? '#PCDATA' S? ')'
5036 *
5037 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
5038 *
5039 * [ VC: No Duplicate Types ]
5040 * The same name must not appear more than once in a single
5041 * mixed-content declaration.
5042 *
5043 * returns: the list of the xmlElementContentPtr describing the element choices
5044 */
5045xmlElementContentPtr
5046xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
5047 xmlElementContentPtr ret = NULL, cur = NULL, n;
5048 const xmlChar *elem = NULL;
5049
5050 GROW;
5051 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
5052 SKIP(7);
5053 SKIP_BLANKS;
5054 SHRINK;
5055 if (RAW == ')') {
5056 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
5057 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5058"Element content declaration doesn't start and stop in the same entity\n",
5059 NULL);
5060 }
5061 NEXT;
5062 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
5063 if (RAW == '*') {
5064 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5065 NEXT;
5066 }
5067 return(ret);
5068 }
5069 if ((RAW == '(') || (RAW == '|')) {
5070 ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
5071 if (ret == NULL) return(NULL);
5072 }
5073 while (RAW == '|') {
5074 NEXT;
5075 if (elem == NULL) {
5076 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
5077 if (ret == NULL) return(NULL);
5078 ret->c1 = cur;
5079 if (cur != NULL)
5080 cur->parent = ret;
5081 cur = ret;
5082 } else {
5083 n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
5084 if (n == NULL) return(NULL);
5085 n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
5086 if (n->c1 != NULL)
5087 n->c1->parent = n;
5088 cur->c2 = n;
5089 if (n != NULL)
5090 n->parent = cur;
5091 cur = n;
5092 }
5093 SKIP_BLANKS;
5094 elem = xmlParseName(ctxt);
5095 if (elem == NULL) {
5096 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5097 "xmlParseElementMixedContentDecl : Name expected\n");
5098 xmlFreeDocElementContent(ctxt->myDoc, cur);
5099 return(NULL);
5100 }
5101 SKIP_BLANKS;
5102 GROW;
5103 }
5104 if ((RAW == ')') && (NXT(1) == '*')) {
5105 if (elem != NULL) {
5106 cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
5107 XML_ELEMENT_CONTENT_ELEMENT);
5108 if (cur->c2 != NULL)
5109 cur->c2->parent = cur;
5110 }
5111 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5112 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
5113 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5114"Element content declaration doesn't start and stop in the same entity\n",
5115 NULL);
5116 }
5117 SKIP(2);
5118 } else {
5119 xmlFreeDocElementContent(ctxt->myDoc, ret);
5120 xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
5121 return(NULL);
5122 }
5123
5124 } else {
5125 xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
5126 }
5127 return(ret);
5128}
5129
5130/**
5131 * xmlParseElementChildrenContentDecl:
5132 * @ctxt: an XML parser context
5133 * @inputchk: the input used for the current entity, needed for boundary checks
5134 *
5135 * parse the declaration for a Mixed Element content
5136 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
5137 *
5138 *
5139 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
5140 *
5141 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
5142 *
5143 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
5144 *
5145 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
5146 *
5147 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
5148 * TODO Parameter-entity replacement text must be properly nested
5149 * with parenthesized groups. That is to say, if either of the
5150 * opening or closing parentheses in a choice, seq, or Mixed
5151 * construct is contained in the replacement text for a parameter
5152 * entity, both must be contained in the same replacement text. For
5153 * interoperability, if a parameter-entity reference appears in a
5154 * choice, seq, or Mixed construct, its replacement text should not
5155 * be empty, and neither the first nor last non-blank character of
5156 * the replacement text should be a connector (| or ,).
5157 *
5158 * Returns the tree of xmlElementContentPtr describing the element
5159 * hierarchy.
5160 */
5161xmlElementContentPtr
5162xmlParseElementChildrenContentDecl (xmlParserCtxtPtr ctxt, int inputchk) {
5163 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
5164 const xmlChar *elem;
5165 xmlChar type = 0;
5166
5167 SKIP_BLANKS;
5168 GROW;
5169 if (RAW == '(') {
5170 int inputid = ctxt->input->id;
5171
5172 /* Recurse on first child */
5173 NEXT;
5174 SKIP_BLANKS;
5175 cur = ret = xmlParseElementChildrenContentDecl(ctxt, inputid);
5176 SKIP_BLANKS;
5177 GROW;
5178 } else {
5179 elem = xmlParseName(ctxt);
5180 if (elem == NULL) {
5181 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
5182 return(NULL);
5183 }
5184 cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
5185 if (cur == NULL) {
5186 xmlErrMemory(ctxt, NULL);
5187 return(NULL);
5188 }
5189 GROW;
5190 if (RAW == '?') {
5191 cur->ocur = XML_ELEMENT_CONTENT_OPT;
5192 NEXT;
5193 } else if (RAW == '*') {
5194 cur->ocur = XML_ELEMENT_CONTENT_MULT;
5195 NEXT;
5196 } else if (RAW == '+') {
5197 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
5198 NEXT;
5199 } else {
5200 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
5201 }
5202 GROW;
5203 }
5204 SKIP_BLANKS;
5205 SHRINK;
5206 while (RAW != ')') {
5207 /*
5208 * Each loop we parse one separator and one element.
5209 */
5210 if (RAW == ',') {
5211 if (type == 0) type = CUR;
5212
5213 /*
5214 * Detect "Name | Name , Name" error
5215 */
5216 else if (type != CUR) {
5217 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
5218 "xmlParseElementChildrenContentDecl : '%c' expected\n",
5219 type);
5220 if ((last != NULL) && (last != ret))
5221 xmlFreeDocElementContent(ctxt->myDoc, last);
5222 if (ret != NULL)
5223 xmlFreeDocElementContent(ctxt->myDoc, ret);
5224 return(NULL);
5225 }
5226 NEXT;
5227
5228 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
5229 if (op == NULL) {
5230 if ((last != NULL) && (last != ret))
5231 xmlFreeDocElementContent(ctxt->myDoc, last);
5232 xmlFreeDocElementContent(ctxt->myDoc, ret);
5233 return(NULL);
5234 }
5235 if (last == NULL) {
5236 op->c1 = ret;
5237 if (ret != NULL)
5238 ret->parent = op;
5239 ret = cur = op;
5240 } else {
5241 cur->c2 = op;
5242 if (op != NULL)
5243 op->parent = cur;
5244 op->c1 = last;
5245 if (last != NULL)
5246 last->parent = op;
5247 cur =op;
5248 last = NULL;
5249 }
5250 } else if (RAW == '|') {
5251 if (type == 0) type = CUR;
5252
5253 /*
5254 * Detect "Name , Name | Name" error
5255 */
5256 else if (type != CUR) {
5257 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
5258 "xmlParseElementChildrenContentDecl : '%c' expected\n",
5259 type);
5260 if ((last != NULL) && (last != ret))
5261 xmlFreeDocElementContent(ctxt->myDoc, last);
5262 if (ret != NULL)
5263 xmlFreeDocElementContent(ctxt->myDoc, ret);
5264 return(NULL);
5265 }
5266 NEXT;
5267
5268 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
5269 if (op == NULL) {
5270 if ((last != NULL) && (last != ret))
5271 xmlFreeDocElementContent(ctxt->myDoc, last);
5272 if (ret != NULL)
5273 xmlFreeDocElementContent(ctxt->myDoc, ret);
5274 return(NULL);
5275 }
5276 if (last == NULL) {
5277 op->c1 = ret;
5278 if (ret != NULL)
5279 ret->parent = op;
5280 ret = cur = op;
5281 } else {
5282 cur->c2 = op;
5283 if (op != NULL)
5284 op->parent = cur;
5285 op->c1 = last;
5286 if (last != NULL)
5287 last->parent = op;
5288 cur =op;
5289 last = NULL;
5290 }
5291 } else {
5292 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
5293 if (ret != NULL)
5294 xmlFreeDocElementContent(ctxt->myDoc, ret);
5295 return(NULL);
5296 }
5297 GROW;
5298 SKIP_BLANKS;
5299 GROW;
5300 if (RAW == '(') {
5301 int inputid = ctxt->input->id;
5302 /* Recurse on second child */
5303 NEXT;
5304 SKIP_BLANKS;
5305 last = xmlParseElementChildrenContentDecl(ctxt, inputid);
5306 SKIP_BLANKS;
5307 } else {
5308 elem = xmlParseName(ctxt);
5309 if (elem == NULL) {
5310 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
5311 if (ret != NULL)
5312 xmlFreeDocElementContent(ctxt->myDoc, ret);
5313 return(NULL);
5314 }
5315 last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
5316 if (RAW == '?') {
5317 last->ocur = XML_ELEMENT_CONTENT_OPT;
5318 NEXT;
5319 } else if (RAW == '*') {
5320 last->ocur = XML_ELEMENT_CONTENT_MULT;
5321 NEXT;
5322 } else if (RAW == '+') {
5323 last->ocur = XML_ELEMENT_CONTENT_PLUS;
5324 NEXT;
5325 } else {
5326 last->ocur = XML_ELEMENT_CONTENT_ONCE;
5327 }
5328 }
5329 SKIP_BLANKS;
5330 GROW;
5331 }
5332 if ((cur != NULL) && (last != NULL)) {
5333 cur->c2 = last;
5334 if (last != NULL)
5335 last->parent = cur;
5336 }
5337 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
5338 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5339"Element content declaration doesn't start and stop in the same entity\n",
5340 NULL);
5341 }
5342 NEXT;
5343 if (RAW == '?') {
5344 if (ret != NULL) {
5345 if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
5346 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
5347 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5348 else
5349 ret->ocur = XML_ELEMENT_CONTENT_OPT;
5350 }
5351 NEXT;
5352 } else if (RAW == '*') {
5353 if (ret != NULL) {
5354 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5355 cur = ret;
5356 /*
5357 * Some normalization:
5358 * (a | b* | c?)* == (a | b | c)*
5359 */
5360 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
5361 if ((cur->c1 != NULL) &&
5362 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
5363 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
5364 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
5365 if ((cur->c2 != NULL) &&
5366 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
5367 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
5368 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
5369 cur = cur->c2;
5370 }
5371 }
5372 NEXT;
5373 } else if (RAW == '+') {
5374 if (ret != NULL) {
5375 int found = 0;
5376
5377 if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
5378 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
5379 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5380 else
5381 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
5382 /*
5383 * Some normalization:
5384 * (a | b*)+ == (a | b)*
5385 * (a | b?)+ == (a | b)*
5386 */
5387 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
5388 if ((cur->c1 != NULL) &&
5389 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
5390 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
5391 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
5392 found = 1;
5393 }
5394 if ((cur->c2 != NULL) &&
5395 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
5396 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
5397 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
5398 found = 1;
5399 }
5400 cur = cur->c2;
5401 }
5402 if (found)
5403 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5404 }
5405 NEXT;
5406 }
5407 return(ret);
5408}
5409
5410/**
5411 * xmlParseElementContentDecl:
5412 * @ctxt: an XML parser context
5413 * @name: the name of the element being defined.
5414 * @result: the Element Content pointer will be stored here if any
5415 *
5416 * parse the declaration for an Element content either Mixed or Children,
5417 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
5418 *
5419 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
5420 *
5421 * returns: the type of element content XML_ELEMENT_TYPE_xxx
5422 */
5423
5424int
5425xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
5426 xmlElementContentPtr *result) {
5427
5428 xmlElementContentPtr tree = NULL;
5429 int inputid = ctxt->input->id;
5430 int res;
5431
5432 *result = NULL;
5433
5434 if (RAW != '(') {
5435 xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
5436 "xmlParseElementContentDecl : %s '(' expected\n", name);
5437 return(-1);
5438 }
5439 NEXT;
5440 GROW;
5441 SKIP_BLANKS;
5442 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
5443 tree = xmlParseElementMixedContentDecl(ctxt, inputid);
5444 res = XML_ELEMENT_TYPE_MIXED;
5445 } else {
5446 tree = xmlParseElementChildrenContentDecl(ctxt, inputid);
5447 res = XML_ELEMENT_TYPE_ELEMENT;
5448 }
5449 SKIP_BLANKS;
5450 *result = tree;
5451 return(res);
5452}
5453
5454/**
5455 * xmlParseElementDecl:
5456 * @ctxt: an XML parser context
5457 *
5458 * parse an Element declaration.
5459 *
5460 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
5461 *
5462 * [ VC: Unique Element Type Declaration ]
5463 * No element type may be declared more than once
5464 *
5465 * Returns the type of the element, or -1 in case of error
5466 */
5467int
5468xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
5469 const xmlChar *name;
5470 int ret = -1;
5471 xmlElementContentPtr content = NULL;
5472
5473 /* GROW; done in the caller */
5474 if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
5475 xmlParserInputPtr input = ctxt->input;
5476
5477 SKIP(9);
5478 if (!IS_BLANK_CH(CUR)) {
5479 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5480 "Space required after 'ELEMENT'\n");
5481 }
5482 SKIP_BLANKS;
5483 name = xmlParseName(ctxt);
5484 if (name == NULL) {
5485 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5486 "xmlParseElementDecl: no name for Element\n");
5487 return(-1);
5488 }
5489 while ((RAW == 0) && (ctxt->inputNr > 1))
5490 xmlPopInput(ctxt);
5491 if (!IS_BLANK_CH(CUR)) {
5492 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5493 "Space required after the element name\n");
5494 }
5495 SKIP_BLANKS;
5496 if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
5497 SKIP(5);
5498 /*
5499 * Element must always be empty.
5500 */
5501 ret = XML_ELEMENT_TYPE_EMPTY;
5502 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
5503 (NXT(2) == 'Y')) {
5504 SKIP(3);
5505 /*
5506 * Element is a generic container.
5507 */
5508 ret = XML_ELEMENT_TYPE_ANY;
5509 } else if (RAW == '(') {
5510 ret = xmlParseElementContentDecl(ctxt, name, &content);
5511 } else {
5512 /*
5513 * [ WFC: PEs in Internal Subset ] error handling.
5514 */
5515 if ((RAW == '%') && (ctxt->external == 0) &&
5516 (ctxt->inputNr == 1)) {
5517 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
5518 "PEReference: forbidden within markup decl in internal subset\n");
5519 } else {
5520 xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
5521 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
5522 }
5523 return(-1);
5524 }
5525
5526 SKIP_BLANKS;
5527 /*
5528 * Pop-up of finished entities.
5529 */
5530 while ((RAW == 0) && (ctxt->inputNr > 1))
5531 xmlPopInput(ctxt);
5532 SKIP_BLANKS;
5533
5534 if (RAW != '>') {
5535 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
5536 if (content != NULL) {
5537 xmlFreeDocElementContent(ctxt->myDoc, content);
5538 }
5539 } else {
5540 if (input != ctxt->input) {
5541 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5542 "Element declaration doesn't start and stop in the same entity\n");
5543 }
5544
5545 NEXT;
5546 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5547 (ctxt->sax->elementDecl != NULL)) {
5548 if (content != NULL)
5549 content->parent = NULL;
5550 ctxt->sax->elementDecl(ctxt->userData, name, ret,
5551 content);
5552 if ((content != NULL) && (content->parent == NULL)) {
5553 /*
5554 * this is a trick: if xmlAddElementDecl is called,
5555 * instead of copying the full tree it is plugged directly
5556 * if called from the parser. Avoid duplicating the
5557 * interfaces or change the API/ABI
5558 */
5559 xmlFreeDocElementContent(ctxt->myDoc, content);
5560 }
5561 } else if (content != NULL) {
5562 xmlFreeDocElementContent(ctxt->myDoc, content);
5563 }
5564 }
5565 }
5566 return(ret);
5567}
5568
5569/**
5570 * xmlParseConditionalSections
5571 * @ctxt: an XML parser context
5572 *
5573 * [61] conditionalSect ::= includeSect | ignoreSect
5574 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
5575 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
5576 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
5577 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
5578 */
5579
5580static void
5581xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
5582 SKIP(3);
5583 SKIP_BLANKS;
5584 if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
5585 SKIP(7);
5586 SKIP_BLANKS;
5587 if (RAW != '[') {
5588 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
5589 } else {
5590 NEXT;
5591 }
5592 if (xmlParserDebugEntities) {
5593 if ((ctxt->input != NULL) && (ctxt->input->filename))
5594 xmlGenericError(xmlGenericErrorContext,
5595 "%s(%d): ", ctxt->input->filename,
5596 ctxt->input->line);
5597 xmlGenericError(xmlGenericErrorContext,
5598 "Entering INCLUDE Conditional Section\n");
5599 }
5600
5601 while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
5602 (NXT(2) != '>'))) {
5603 const xmlChar *check = CUR_PTR;
5604 unsigned int cons = ctxt->input->consumed;
5605
5606 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5607 xmlParseConditionalSections(ctxt);
5608 } else if (IS_BLANK_CH(CUR)) {
5609 NEXT;
5610 } else if (RAW == '%') {
5611 xmlParsePEReference(ctxt);
5612 } else
5613 xmlParseMarkupDecl(ctxt);
5614
5615 /*
5616 * Pop-up of finished entities.
5617 */
5618 while ((RAW == 0) && (ctxt->inputNr > 1))
5619 xmlPopInput(ctxt);
5620
5621 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
5622 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
5623 break;
5624 }
5625 }
5626 if (xmlParserDebugEntities) {
5627 if ((ctxt->input != NULL) && (ctxt->input->filename))
5628 xmlGenericError(xmlGenericErrorContext,
5629 "%s(%d): ", ctxt->input->filename,
5630 ctxt->input->line);
5631 xmlGenericError(xmlGenericErrorContext,
5632 "Leaving INCLUDE Conditional Section\n");
5633 }
5634
5635 } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
5636 int state;
5637 xmlParserInputState instate;
5638 int depth = 0;
5639
5640 SKIP(6);
5641 SKIP_BLANKS;
5642 if (RAW != '[') {
5643 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
5644 } else {
5645 NEXT;
5646 }
5647 if (xmlParserDebugEntities) {
5648 if ((ctxt->input != NULL) && (ctxt->input->filename))
5649 xmlGenericError(xmlGenericErrorContext,
5650 "%s(%d): ", ctxt->input->filename,
5651 ctxt->input->line);
5652 xmlGenericError(xmlGenericErrorContext,
5653 "Entering IGNORE Conditional Section\n");
5654 }
5655
5656 /*
5657 * Parse up to the end of the conditional section
5658 * But disable SAX event generating DTD building in the meantime
5659 */
5660 state = ctxt->disableSAX;
5661 instate = ctxt->instate;
5662 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
5663 ctxt->instate = XML_PARSER_IGNORE;
5664
5665 while ((depth >= 0) && (RAW != 0)) {
5666 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5667 depth++;
5668 SKIP(3);
5669 continue;
5670 }
5671 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
5672 if (--depth >= 0) SKIP(3);
5673 continue;
5674 }
5675 NEXT;
5676 continue;
5677 }
5678
5679 ctxt->disableSAX = state;
5680 ctxt->instate = instate;
5681
5682 if (xmlParserDebugEntities) {
5683 if ((ctxt->input != NULL) && (ctxt->input->filename))
5684 xmlGenericError(xmlGenericErrorContext,
5685 "%s(%d): ", ctxt->input->filename,
5686 ctxt->input->line);
5687 xmlGenericError(xmlGenericErrorContext,
5688 "Leaving IGNORE Conditional Section\n");
5689 }
5690
5691 } else {
5692 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
5693 }
5694
5695 if (RAW == 0)
5696 SHRINK;
5697
5698 if (RAW == 0) {
5699 xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
5700 } else {
5701 SKIP(3);
5702 }
5703}
5704
5705/**
5706 * xmlParseMarkupDecl:
5707 * @ctxt: an XML parser context
5708 *
5709 * parse Markup declarations
5710 *
5711 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
5712 * NotationDecl | PI | Comment
5713 *
5714 * [ VC: Proper Declaration/PE Nesting ]
5715 * Parameter-entity replacement text must be properly nested with
5716 * markup declarations. That is to say, if either the first character
5717 * or the last character of a markup declaration (markupdecl above) is
5718 * contained in the replacement text for a parameter-entity reference,
5719 * both must be contained in the same replacement text.
5720 *
5721 * [ WFC: PEs in Internal Subset ]
5722 * In the internal DTD subset, parameter-entity references can occur
5723 * only where markup declarations can occur, not within markup declarations.
5724 * (This does not apply to references that occur in external parameter
5725 * entities or to the external subset.)
5726 */
5727void
5728xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
5729 GROW;
5730 if (CUR == '<') {
5731 if (NXT(1) == '!') {
5732 switch (NXT(2)) {
5733 case 'E':
5734 if (NXT(3) == 'L')
5735 xmlParseElementDecl(ctxt);
5736 else if (NXT(3) == 'N')
5737 xmlParseEntityDecl(ctxt);
5738 break;
5739 case 'A':
5740 xmlParseAttributeListDecl(ctxt);
5741 break;
5742 case 'N':
5743 xmlParseNotationDecl(ctxt);
5744 break;
5745 case '-':
5746 xmlParseComment(ctxt);
5747 break;
5748 default:
5749 /* there is an error but it will be detected later */
5750 break;
5751 }
5752 } else if (NXT(1) == '?') {
5753 xmlParsePI(ctxt);
5754 }
5755 }
5756 /*
5757 * This is only for internal subset. On external entities,
5758 * the replacement is done before parsing stage
5759 */
5760 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
5761 xmlParsePEReference(ctxt);
5762
5763 /*
5764 * Conditional sections are allowed from entities included
5765 * by PE References in the internal subset.
5766 */
5767 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
5768 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5769 xmlParseConditionalSections(ctxt);
5770 }
5771 }
5772
5773 ctxt->instate = XML_PARSER_DTD;
5774}
5775
5776/**
5777 * xmlParseTextDecl:
5778 * @ctxt: an XML parser context
5779 *
5780 * parse an XML declaration header for external entities
5781 *
5782 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
5783 *
5784 * Question: Seems that EncodingDecl is mandatory ? Is that a typo ?
5785 */
5786
5787void
5788xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
5789 xmlChar *version;
5790 const xmlChar *encoding;
5791
5792 /*
5793 * We know that '<?xml' is here.
5794 */
5795 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
5796 SKIP(5);
5797 } else {
5798 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
5799 return;
5800 }
5801
5802 if (!IS_BLANK_CH(CUR)) {
5803 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5804 "Space needed after '<?xml'\n");
5805 }
5806 SKIP_BLANKS;
5807
5808 /*
5809 * We may have the VersionInfo here.
5810 */
5811 version = xmlParseVersionInfo(ctxt);
5812 if (version == NULL)
5813 version = xmlCharStrdup(XML_DEFAULT_VERSION);
5814 else {
5815 if (!IS_BLANK_CH(CUR)) {
5816 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5817 "Space needed here\n");
5818 }
5819 }
5820 ctxt->input->version = version;
5821
5822 /*
5823 * We must have the encoding declaration
5824 */
5825 encoding = xmlParseEncodingDecl(ctxt);
5826 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5827 /*
5828 * The XML REC instructs us to stop parsing right here
5829 */
5830 return;
5831 }
5832 if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
5833 xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
5834 "Missing encoding in text declaration\n");
5835 }
5836
5837 SKIP_BLANKS;
5838 if ((RAW == '?') && (NXT(1) == '>')) {
5839 SKIP(2);
5840 } else if (RAW == '>') {
5841 /* Deprecated old WD ... */
5842 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
5843 NEXT;
5844 } else {
5845 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
5846 MOVETO_ENDTAG(CUR_PTR);
5847 NEXT;
5848 }
5849}
5850
5851/**
5852 * xmlParseExternalSubset:
5853 * @ctxt: an XML parser context
5854 * @ExternalID: the external identifier
5855 * @SystemID: the system identifier (or URL)
5856 *
5857 * parse Markup declarations from an external subset
5858 *
5859 * [30] extSubset ::= textDecl? extSubsetDecl
5860 *
5861 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
5862 */
5863void
5864xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
5865 const xmlChar *SystemID) {
5866 xmlDetectSAX2(ctxt);
5867 GROW;
5868 if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
5869 xmlParseTextDecl(ctxt);
5870 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5871 /*
5872 * The XML REC instructs us to stop parsing right here
5873 */
5874 ctxt->instate = XML_PARSER_EOF;
5875 return;
5876 }
5877 }
5878 if (ctxt->myDoc == NULL) {
5879 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
5880 }
5881 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
5882 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
5883
5884 ctxt->instate = XML_PARSER_DTD;
5885 ctxt->external = 1;
5886 while (((RAW == '<') && (NXT(1) == '?')) ||
5887 ((RAW == '<') && (NXT(1) == '!')) ||
5888 (RAW == '%') || IS_BLANK_CH(CUR)) {
5889 const xmlChar *check = CUR_PTR;
5890 unsigned int cons = ctxt->input->consumed;
5891
5892 GROW;
5893 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5894 xmlParseConditionalSections(ctxt);
5895 } else if (IS_BLANK_CH(CUR)) {
5896 NEXT;
5897 } else if (RAW == '%') {
5898 xmlParsePEReference(ctxt);
5899 } else
5900 xmlParseMarkupDecl(ctxt);
5901
5902 /*
5903 * Pop-up of finished entities.
5904 */
5905 while ((RAW == 0) && (ctxt->inputNr > 1))
5906 xmlPopInput(ctxt);
5907
5908 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
5909 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
5910 break;
5911 }
5912 }
5913
5914 if (RAW != 0) {
5915 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
5916 }
5917
5918}
5919
5920/**
5921 * xmlParseReference:
5922 * @ctxt: an XML parser context
5923 *
5924 * parse and handle entity references in content, depending on the SAX
5925 * interface, this may end-up in a call to character() if this is a
5926 * CharRef, a predefined entity, if there is no reference() callback.
5927 * or if the parser was asked to switch to that mode.
5928 *
5929 * [67] Reference ::= EntityRef | CharRef
5930 */
5931void
5932xmlParseReference(xmlParserCtxtPtr ctxt) {
5933 xmlEntityPtr ent;
5934 xmlChar *val;
5935 if (RAW != '&') return;
5936
5937 if (NXT(1) == '#') {
5938 int i = 0;
5939 xmlChar out[10];
5940 int hex = NXT(2);
5941 int value = xmlParseCharRef(ctxt);
5942
5943 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
5944 /*
5945 * So we are using non-UTF-8 buffers
5946 * Check that the char fit on 8bits, if not
5947 * generate a CharRef.
5948 */
5949 if (value <= 0xFF) {
5950 out[0] = value;
5951 out[1] = 0;
5952 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5953 (!ctxt->disableSAX))
5954 ctxt->sax->characters(ctxt->userData, out, 1);
5955 } else {
5956 if ((hex == 'x') || (hex == 'X'))
5957 snprintf((char *)out, sizeof(out), "#x%X", value);
5958 else
5959 snprintf((char *)out, sizeof(out), "#%d", value);
5960 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5961 (!ctxt->disableSAX))
5962 ctxt->sax->reference(ctxt->userData, out);
5963 }
5964 } else {
5965 /*
5966 * Just encode the value in UTF-8
5967 */
5968 COPY_BUF(0 ,out, i, value);
5969 out[i] = 0;
5970 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5971 (!ctxt->disableSAX))
5972 ctxt->sax->characters(ctxt->userData, out, i);
5973 }
5974 } else {
5975 int was_checked;
5976
5977 ent = xmlParseEntityRef(ctxt);
5978 if (ent == NULL) return;
5979 if (!ctxt->wellFormed)
5980 return;
5981 was_checked = ent->checked;
5982 if ((ent->name != NULL) &&
5983 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
5984 xmlNodePtr list = NULL;
5985 xmlParserErrors ret = XML_ERR_OK;
5986
5987
5988 /*
5989 * The first reference to the entity trigger a parsing phase
5990 * where the ent->children is filled with the result from
5991 * the parsing.
5992 */
5993 if (ent->checked == 0) {
5994 xmlChar *value;
5995
5996 value = ent->content;
5997
5998 /*
5999 * Check that this entity is well formed
6000 */
6001 if ((value != NULL) && (value[0] != 0) &&
6002 (value[1] == 0) && (value[0] == '<') &&
6003 (xmlStrEqual(ent->name, BAD_CAST "lt"))) {
6004 /*
6005 * DONE: get definite answer on this !!!
6006 * Lots of entity decls are used to declare a single
6007 * char
6008 * <!ENTITY lt "<">
6009 * Which seems to be valid since
6010 * 2.4: The ampersand character (&) and the left angle
6011 * bracket (<) may appear in their literal form only
6012 * when used ... They are also legal within the literal
6013 * entity value of an internal entity declaration;i
6014 * see "4.3.2 Well-Formed Parsed Entities".
6015 * IMHO 2.4 and 4.3.2 are directly in contradiction.
6016 * Looking at the OASIS test suite and James Clark
6017 * tests, this is broken. However the XML REC uses
6018 * it. Is the XML REC not well-formed ????
6019 * This is a hack to avoid this problem
6020 *
6021 * ANSWER: since lt gt amp .. are already defined,
6022 * this is a redefinition and hence the fact that the
6023 * content is not well balanced is not a Wf error, this
6024 * is lousy but acceptable.
6025 */
6026 list = xmlNewDocText(ctxt->myDoc, value);
6027 if (list != NULL) {
6028 if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
6029 (ent->children == NULL)) {
6030 ent->children = list;
6031 ent->last = list;
6032 ent->owner = 1;
6033 list->parent = (xmlNodePtr) ent;
6034 } else {
6035 xmlFreeNodeList(list);
6036 }
6037 } else if (list != NULL) {
6038 xmlFreeNodeList(list);
6039 }
6040 } else {
6041 /*
6042 * 4.3.2: An internal general parsed entity is well-formed
6043 * if its replacement text matches the production labeled
6044 * content.
6045 */
6046
6047 void *user_data;
6048 /*
6049 * This is a bit hackish but this seems the best
6050 * way to make sure both SAX and DOM entity support
6051 * behaves okay.
6052 */
6053 if (ctxt->userData == ctxt)
6054 user_data = NULL;
6055 else
6056 user_data = ctxt->userData;
6057
6058 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
6059 ctxt->depth++;
6060 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
6061 value, user_data, &list);
6062 ctxt->depth--;
6063 } else if (ent->etype ==
6064 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
6065 ctxt->depth++;
6066 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
6067 ctxt->sax, user_data, ctxt->depth,
6068 ent->URI, ent->ExternalID, &list);
6069 ctxt->depth--;
6070 } else {
6071 ret = XML_ERR_ENTITY_PE_INTERNAL;
6072 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
6073 "invalid entity type found\n", NULL);
6074 }
6075 if (ret == XML_ERR_ENTITY_LOOP) {
6076 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
6077 return;
6078 } else if ((ret == XML_ERR_OK) && (list != NULL)) {
6079 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
6080 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
6081 (ent->children == NULL)) {
6082 ent->children = list;
6083 if (ctxt->replaceEntities) {
6084 /*
6085 * Prune it directly in the generated document
6086 * except for single text nodes.
6087 */
6088 if (((list->type == XML_TEXT_NODE) &&
6089 (list->next == NULL)) ||
6090 (ctxt->parseMode == XML_PARSE_READER)) {
6091 list->parent = (xmlNodePtr) ent;
6092 list = NULL;
6093 ent->owner = 1;
6094 } else {
6095 ent->owner = 0;
6096 while (list != NULL) {
6097 list->parent = (xmlNodePtr) ctxt->node;
6098 list->doc = ctxt->myDoc;
6099 if (list->next == NULL)
6100 ent->last = list;
6101 list = list->next;
6102 }
6103 list = ent->children;
6104#ifdef LIBXML_LEGACY_ENABLED
6105 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
6106 xmlAddEntityReference(ent, list, NULL);
6107#endif /* LIBXML_LEGACY_ENABLED */
6108 }
6109 } else {
6110 ent->owner = 1;
6111 while (list != NULL) {
6112 list->parent = (xmlNodePtr) ent;
6113 if (list->next == NULL)
6114 ent->last = list;
6115 list = list->next;
6116 }
6117 }
6118 } else {
6119 xmlFreeNodeList(list);
6120 list = NULL;
6121 }
6122 } else if ((ret != XML_ERR_OK) &&
6123 (ret != XML_WAR_UNDECLARED_ENTITY)) {
6124 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
6125 "Entity '%s' failed to parse\n", ent->name);
6126 } else if (list != NULL) {
6127 xmlFreeNodeList(list);
6128 list = NULL;
6129 }
6130 }
6131 ent->checked = 1;
6132 }
6133
6134 if (ent->children == NULL) {
6135 /*
6136 * Probably running in SAX mode and the callbacks don't
6137 * build the entity content. So unless we already went
6138 * though parsing for first checking go though the entity
6139 * content to generate callbacks associated to the entity
6140 */
6141 if (was_checked == 1) {
6142 void *user_data;
6143 /*
6144 * This is a bit hackish but this seems the best
6145 * way to make sure both SAX and DOM entity support
6146 * behaves okay.
6147 */
6148 if (ctxt->userData == ctxt)
6149 user_data = NULL;
6150 else
6151 user_data = ctxt->userData;
6152
6153 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
6154 ctxt->depth++;
6155 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
6156 ent->content, user_data, NULL);
6157 ctxt->depth--;
6158 } else if (ent->etype ==
6159 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
6160 ctxt->depth++;
6161 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
6162 ctxt->sax, user_data, ctxt->depth,
6163 ent->URI, ent->ExternalID, NULL);
6164 ctxt->depth--;
6165 } else {
6166 ret = XML_ERR_ENTITY_PE_INTERNAL;
6167 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
6168 "invalid entity type found\n", NULL);
6169 }
6170 if (ret == XML_ERR_ENTITY_LOOP) {
6171 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
6172 return;
6173 }
6174 }
6175 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
6176 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
6177 /*
6178 * Entity reference callback comes second, it's somewhat
6179 * superfluous but a compatibility to historical behaviour
6180 */
6181 ctxt->sax->reference(ctxt->userData, ent->name);
6182 }
6183 return;
6184 }
6185 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
6186 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
6187 /*
6188 * Create a node.
6189 */
6190 ctxt->sax->reference(ctxt->userData, ent->name);
6191 return;
6192 }
6193 if ((ctxt->replaceEntities) || (ent->children == NULL)) {
6194 /*
6195 * There is a problem on the handling of _private for entities
6196 * (bug 155816): Should we copy the content of the field from
6197 * the entity (possibly overwriting some value set by the user
6198 * when a copy is created), should we leave it alone, or should
6199 * we try to take care of different situations? The problem
6200 * is exacerbated by the usage of this field by the xmlReader.
6201 * To fix this bug, we look at _private on the created node
6202 * and, if it's NULL, we copy in whatever was in the entity.
6203 * If it's not NULL we leave it alone. This is somewhat of a
6204 * hack - maybe we should have further tests to determine
6205 * what to do.
6206 */
6207 if ((ctxt->node != NULL) && (ent->children != NULL)) {
6208 /*
6209 * Seems we are generating the DOM content, do
6210 * a simple tree copy for all references except the first
6211 * In the first occurrence list contains the replacement.
6212 * progressive == 2 means we are operating on the Reader
6213 * and since nodes are discarded we must copy all the time.
6214 */
6215 if (((list == NULL) && (ent->owner == 0)) ||
6216 (ctxt->parseMode == XML_PARSE_READER)) {
6217 xmlNodePtr nw = NULL, cur, firstChild = NULL;
6218
6219 /*
6220 * when operating on a reader, the entities definitions
6221 * are always owning the entities subtree.
6222 if (ctxt->parseMode == XML_PARSE_READER)
6223 ent->owner = 1;
6224 */
6225
6226 cur = ent->children;
6227 while (cur != NULL) {
6228 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
6229 if (nw != NULL) {
6230 if (nw->_private == NULL)
6231 nw->_private = cur->_private;
6232 if (firstChild == NULL){
6233 firstChild = nw;
6234 }
6235 nw = xmlAddChild(ctxt->node, nw);
6236 }
6237 if (cur == ent->last) {
6238 /*
6239 * needed to detect some strange empty
6240 * node cases in the reader tests
6241 */
6242 if ((ctxt->parseMode == XML_PARSE_READER) &&
6243 (nw != NULL) &&
6244 (nw->type == XML_ELEMENT_NODE) &&
6245 (nw->children == NULL))
6246 nw->extra = 1;
6247
6248 break;
6249 }
6250 cur = cur->next;
6251 }
6252#ifdef LIBXML_LEGACY_ENABLED
6253 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
6254 xmlAddEntityReference(ent, firstChild, nw);
6255#endif /* LIBXML_LEGACY_ENABLED */
6256 } else if (list == NULL) {
6257 xmlNodePtr nw = NULL, cur, next, last,
6258 firstChild = NULL;
6259 /*
6260 * Copy the entity child list and make it the new
6261 * entity child list. The goal is to make sure any
6262 * ID or REF referenced will be the one from the
6263 * document content and not the entity copy.
6264 */
6265 cur = ent->children;
6266 ent->children = NULL;
6267 last = ent->last;
6268 ent->last = NULL;
6269 while (cur != NULL) {
6270 next = cur->next;
6271 cur->next = NULL;
6272 cur->parent = NULL;
6273 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
6274 if (nw != NULL) {
6275 if (nw->_private == NULL)
6276 nw->_private = cur->_private;
6277 if (firstChild == NULL){
6278 firstChild = cur;
6279 }
6280 xmlAddChild((xmlNodePtr) ent, nw);
6281 xmlAddChild(ctxt->node, cur);
6282 }
6283 if (cur == last)
6284 break;
6285 cur = next;
6286 }
6287 ent->owner = 1;
6288#ifdef LIBXML_LEGACY_ENABLED
6289 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
6290 xmlAddEntityReference(ent, firstChild, nw);
6291#endif /* LIBXML_LEGACY_ENABLED */
6292 } else {
6293 const xmlChar *nbktext;
6294
6295 /*
6296 * the name change is to avoid coalescing of the
6297 * node with a possible previous text one which
6298 * would make ent->children a dangling pointer
6299 */
6300 nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
6301 -1);
6302 if (ent->children->type == XML_TEXT_NODE)
6303 ent->children->name = nbktext;
6304 if ((ent->last != ent->children) &&
6305 (ent->last->type == XML_TEXT_NODE))
6306 ent->last->name = nbktext;
6307 xmlAddChildList(ctxt->node, ent->children);
6308 }
6309
6310 /*
6311 * This is to avoid a nasty side effect, see
6312 * characters() in SAX.c
6313 */
6314 ctxt->nodemem = 0;
6315 ctxt->nodelen = 0;
6316 return;
6317 }
6318 }
6319 } else {
6320 val = ent->content;
6321 if (val == NULL) return;
6322 /*
6323 * inline the entity.
6324 */
6325 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6326 (!ctxt->disableSAX))
6327 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
6328 }
6329 }
6330}
6331
6332/**
6333 * xmlParseEntityRef:
6334 * @ctxt: an XML parser context
6335 *
6336 * parse ENTITY references declarations
6337 *
6338 * [68] EntityRef ::= '&' Name ';'
6339 *
6340 * [ WFC: Entity Declared ]
6341 * In a document without any DTD, a document with only an internal DTD
6342 * subset which contains no parameter entity references, or a document
6343 * with "standalone='yes'", the Name given in the entity reference
6344 * must match that in an entity declaration, except that well-formed
6345 * documents need not declare any of the following entities: amp, lt,
6346 * gt, apos, quot. The declaration of a parameter entity must precede
6347 * any reference to it. Similarly, the declaration of a general entity
6348 * must precede any reference to it which appears in a default value in an
6349 * attribute-list declaration. Note that if entities are declared in the
6350 * external subset or in external parameter entities, a non-validating
6351 * processor is not obligated to read and process their declarations;
6352 * for such documents, the rule that an entity must be declared is a
6353 * well-formedness constraint only if standalone='yes'.
6354 *
6355 * [ WFC: Parsed Entity ]
6356 * An entity reference must not contain the name of an unparsed entity
6357 *
6358 * Returns the xmlEntityPtr if found, or NULL otherwise.
6359 */
6360xmlEntityPtr
6361xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
6362 const xmlChar *name;
6363 xmlEntityPtr ent = NULL;
6364
6365 GROW;
6366
6367 if (RAW == '&') {
6368 NEXT;
6369 name = xmlParseName(ctxt);
6370 if (name == NULL) {
6371 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6372 "xmlParseEntityRef: no name\n");
6373 } else {
6374 if (RAW == ';') {
6375 NEXT;
6376 /*
6377 * Ask first SAX for entity resolution, otherwise try the
6378 * predefined set.
6379 */
6380 if (ctxt->sax != NULL) {
6381 if (ctxt->sax->getEntity != NULL)
6382 ent = ctxt->sax->getEntity(ctxt->userData, name);
6383 if ((ctxt->wellFormed == 1 ) && (ent == NULL))
6384 ent = xmlGetPredefinedEntity(name);
6385 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
6386 (ctxt->userData==ctxt)) {
6387 ent = xmlSAX2GetEntity(ctxt, name);
6388 }
6389 }
6390 /*
6391 * [ WFC: Entity Declared ]
6392 * In a document without any DTD, a document with only an
6393 * internal DTD subset which contains no parameter entity
6394 * references, or a document with "standalone='yes'", the
6395 * Name given in the entity reference must match that in an
6396 * entity declaration, except that well-formed documents
6397 * need not declare any of the following entities: amp, lt,
6398 * gt, apos, quot.
6399 * The declaration of a parameter entity must precede any
6400 * reference to it.
6401 * Similarly, the declaration of a general entity must
6402 * precede any reference to it which appears in a default
6403 * value in an attribute-list declaration. Note that if
6404 * entities are declared in the external subset or in
6405 * external parameter entities, a non-validating processor
6406 * is not obligated to read and process their declarations;
6407 * for such documents, the rule that an entity must be
6408 * declared is a well-formedness constraint only if
6409 * standalone='yes'.
6410 */
6411 if (ent == NULL) {
6412 if ((ctxt->standalone == 1) ||
6413 ((ctxt->hasExternalSubset == 0) &&
6414 (ctxt->hasPErefs == 0))) {
6415 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
6416 "Entity '%s' not defined\n", name);
6417 } else {
6418 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
6419 "Entity '%s' not defined\n", name);
6420 if ((ctxt->inSubset == 0) &&
6421 (ctxt->sax != NULL) &&
6422 (ctxt->sax->reference != NULL)) {
6423 ctxt->sax->reference(ctxt->userData, name);
6424 }
6425 }
6426 ctxt->valid = 0;
6427 }
6428
6429 /*
6430 * [ WFC: Parsed Entity ]
6431 * An entity reference must not contain the name of an
6432 * unparsed entity
6433 */
6434 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
6435 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
6436 "Entity reference to unparsed entity %s\n", name);
6437 }
6438
6439 /*
6440 * [ WFC: No External Entity References ]
6441 * Attribute values cannot contain direct or indirect
6442 * entity references to external entities.
6443 */
6444 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6445 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
6446 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
6447 "Attribute references external entity '%s'\n", name);
6448 }
6449 /*
6450 * [ WFC: No < in Attribute Values ]
6451 * The replacement text of any entity referred to directly or
6452 * indirectly in an attribute value (other than "&lt;") must
6453 * not contain a <.
6454 */
6455 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6456 (ent != NULL) &&
6457 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
6458 (ent->content != NULL) &&
6459 (xmlStrchr(ent->content, '<'))) {
6460 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
6461 "'<' in entity '%s' is not allowed in attributes values\n", name);
6462 }
6463
6464 /*
6465 * Internal check, no parameter entities here ...
6466 */
6467 else {
6468 switch (ent->etype) {
6469 case XML_INTERNAL_PARAMETER_ENTITY:
6470 case XML_EXTERNAL_PARAMETER_ENTITY:
6471 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
6472 "Attempt to reference the parameter entity '%s'\n",
6473 name);
6474 break;
6475 default:
6476 break;
6477 }
6478 }
6479
6480 /*
6481 * [ WFC: No Recursion ]
6482 * A parsed entity must not contain a recursive reference
6483 * to itself, either directly or indirectly.
6484 * Done somewhere else
6485 */
6486
6487 } else {
6488 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
6489 }
6490 }
6491 }
6492 return(ent);
6493}
6494
6495/**
6496 * xmlParseStringEntityRef:
6497 * @ctxt: an XML parser context
6498 * @str: a pointer to an index in the string
6499 *
6500 * parse ENTITY references declarations, but this version parses it from
6501 * a string value.
6502 *
6503 * [68] EntityRef ::= '&' Name ';'
6504 *
6505 * [ WFC: Entity Declared ]
6506 * In a document without any DTD, a document with only an internal DTD
6507 * subset which contains no parameter entity references, or a document
6508 * with "standalone='yes'", the Name given in the entity reference
6509 * must match that in an entity declaration, except that well-formed
6510 * documents need not declare any of the following entities: amp, lt,
6511 * gt, apos, quot. The declaration of a parameter entity must precede
6512 * any reference to it. Similarly, the declaration of a general entity
6513 * must precede any reference to it which appears in a default value in an
6514 * attribute-list declaration. Note that if entities are declared in the
6515 * external subset or in external parameter entities, a non-validating
6516 * processor is not obligated to read and process their declarations;
6517 * for such documents, the rule that an entity must be declared is a
6518 * well-formedness constraint only if standalone='yes'.
6519 *
6520 * [ WFC: Parsed Entity ]
6521 * An entity reference must not contain the name of an unparsed entity
6522 *
6523 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
6524 * is updated to the current location in the string.
6525 */
6526xmlEntityPtr
6527xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
6528 xmlChar *name;
6529 const xmlChar *ptr;
6530 xmlChar cur;
6531 xmlEntityPtr ent = NULL;
6532
6533 if ((str == NULL) || (*str == NULL))
6534 return(NULL);
6535 ptr = *str;
6536 cur = *ptr;
6537 if (cur == '&') {
6538 ptr++;
6539 cur = *ptr;
6540 name = xmlParseStringName(ctxt, &ptr);
6541 if (name == NULL) {
6542 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6543 "xmlParseStringEntityRef: no name\n");
6544 } else {
6545 if (*ptr == ';') {
6546 ptr++;
6547 /*
6548 * Ask first SAX for entity resolution, otherwise try the
6549 * predefined set.
6550 */
6551 if (ctxt->sax != NULL) {
6552 if (ctxt->sax->getEntity != NULL)
6553 ent = ctxt->sax->getEntity(ctxt->userData, name);
6554 if (ent == NULL)
6555 ent = xmlGetPredefinedEntity(name);
6556 if ((ent == NULL) && (ctxt->userData==ctxt)) {
6557 ent = xmlSAX2GetEntity(ctxt, name);
6558 }
6559 }
6560 /*
6561 * [ WFC: Entity Declared ]
6562 * In a document without any DTD, a document with only an
6563 * internal DTD subset which contains no parameter entity
6564 * references, or a document with "standalone='yes'", the
6565 * Name given in the entity reference must match that in an
6566 * entity declaration, except that well-formed documents
6567 * need not declare any of the following entities: amp, lt,
6568 * gt, apos, quot.
6569 * The declaration of a parameter entity must precede any
6570 * reference to it.
6571 * Similarly, the declaration of a general entity must
6572 * precede any reference to it which appears in a default
6573 * value in an attribute-list declaration. Note that if
6574 * entities are declared in the external subset or in
6575 * external parameter entities, a non-validating processor
6576 * is not obligated to read and process their declarations;
6577 * for such documents, the rule that an entity must be
6578 * declared is a well-formedness constraint only if
6579 * standalone='yes'.
6580 */
6581 if (ent == NULL) {
6582 if ((ctxt->standalone == 1) ||
6583 ((ctxt->hasExternalSubset == 0) &&
6584 (ctxt->hasPErefs == 0))) {
6585 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
6586 "Entity '%s' not defined\n", name);
6587 } else {
6588 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
6589 "Entity '%s' not defined\n",
6590 name);
6591 }
6592 /* TODO ? check regressions ctxt->valid = 0; */
6593 }
6594
6595 /*
6596 * [ WFC: Parsed Entity ]
6597 * An entity reference must not contain the name of an
6598 * unparsed entity
6599 */
6600 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
6601 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
6602 "Entity reference to unparsed entity %s\n", name);
6603 }
6604
6605 /*
6606 * [ WFC: No External Entity References ]
6607 * Attribute values cannot contain direct or indirect
6608 * entity references to external entities.
6609 */
6610 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6611 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
6612 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
6613 "Attribute references external entity '%s'\n", name);
6614 }
6615 /*
6616 * [ WFC: No < in Attribute Values ]
6617 * The replacement text of any entity referred to directly or
6618 * indirectly in an attribute value (other than "&lt;") must
6619 * not contain a <.
6620 */
6621 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6622 (ent != NULL) &&
6623 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
6624 (ent->content != NULL) &&
6625 (xmlStrchr(ent->content, '<'))) {
6626 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
6627 "'<' in entity '%s' is not allowed in attributes values\n",
6628 name);
6629 }
6630
6631 /*
6632 * Internal check, no parameter entities here ...
6633 */
6634 else {
6635 switch (ent->etype) {
6636 case XML_INTERNAL_PARAMETER_ENTITY:
6637 case XML_EXTERNAL_PARAMETER_ENTITY:
6638 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
6639 "Attempt to reference the parameter entity '%s'\n",
6640 name);
6641 break;
6642 default:
6643 break;
6644 }
6645 }
6646
6647 /*
6648 * [ WFC: No Recursion ]
6649 * A parsed entity must not contain a recursive reference
6650 * to itself, either directly or indirectly.
6651 * Done somewhere else
6652 */
6653
6654 } else {
6655 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
6656 }
6657 xmlFree(name);
6658 }
6659 }
6660 *str = ptr;
6661 return(ent);
6662}
6663
6664/**
6665 * xmlParsePEReference:
6666 * @ctxt: an XML parser context
6667 *
6668 * parse PEReference declarations
6669 * The entity content is handled directly by pushing it's content as
6670 * a new input stream.
6671 *
6672 * [69] PEReference ::= '%' Name ';'
6673 *
6674 * [ WFC: No Recursion ]
6675 * A parsed entity must not contain a recursive
6676 * reference to itself, either directly or indirectly.
6677 *
6678 * [ WFC: Entity Declared ]
6679 * In a document without any DTD, a document with only an internal DTD
6680 * subset which contains no parameter entity references, or a document
6681 * with "standalone='yes'", ... ... The declaration of a parameter
6682 * entity must precede any reference to it...
6683 *
6684 * [ VC: Entity Declared ]
6685 * In a document with an external subset or external parameter entities
6686 * with "standalone='no'", ... ... The declaration of a parameter entity
6687 * must precede any reference to it...
6688 *
6689 * [ WFC: In DTD ]
6690 * Parameter-entity references may only appear in the DTD.
6691 * NOTE: misleading but this is handled.
6692 */
6693void
6694xmlParsePEReference(xmlParserCtxtPtr ctxt)
6695{
6696 const xmlChar *name;
6697 xmlEntityPtr entity = NULL;
6698 xmlParserInputPtr input;
6699
6700 if (RAW == '%') {
6701 NEXT;
6702 name = xmlParseName(ctxt);
6703 if (name == NULL) {
6704 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6705 "xmlParsePEReference: no name\n");
6706 } else {
6707 if (RAW == ';') {
6708 NEXT;
6709 if ((ctxt->sax != NULL) &&
6710 (ctxt->sax->getParameterEntity != NULL))
6711 entity = ctxt->sax->getParameterEntity(ctxt->userData,
6712 name);
6713 if (entity == NULL) {
6714 /*
6715 * [ WFC: Entity Declared ]
6716 * In a document without any DTD, a document with only an
6717 * internal DTD subset which contains no parameter entity
6718 * references, or a document with "standalone='yes'", ...
6719 * ... The declaration of a parameter entity must precede
6720 * any reference to it...
6721 */
6722 if ((ctxt->standalone == 1) ||
6723 ((ctxt->hasExternalSubset == 0) &&
6724 (ctxt->hasPErefs == 0))) {
6725 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
6726 "PEReference: %%%s; not found\n",
6727 name);
6728 } else {
6729 /*
6730 * [ VC: Entity Declared ]
6731 * In a document with an external subset or external
6732 * parameter entities with "standalone='no'", ...
6733 * ... The declaration of a parameter entity must
6734 * precede any reference to it...
6735 */
6736 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6737 "PEReference: %%%s; not found\n",
6738 name, NULL);
6739 ctxt->valid = 0;
6740 }
6741 } else {
6742 /*
6743 * Internal checking in case the entity quest barfed
6744 */
6745 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6746 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
6747 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6748 "Internal: %%%s; is not a parameter entity\n",
6749 name, NULL);
6750 } else if (ctxt->input->free != deallocblankswrapper) {
6751 input =
6752 xmlNewBlanksWrapperInputStream(ctxt, entity);
6753 xmlPushInput(ctxt, input);
6754 } else {
6755 /*
6756 * TODO !!!
6757 * handle the extra spaces added before and after
6758 * c.f. http://www.w3.org/TR/REC-xml#as-PE
6759 */
6760 input = xmlNewEntityInputStream(ctxt, entity);
6761 xmlPushInput(ctxt, input);
6762 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
6763 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
6764 (IS_BLANK_CH(NXT(5)))) {
6765 xmlParseTextDecl(ctxt);
6766 if (ctxt->errNo ==
6767 XML_ERR_UNSUPPORTED_ENCODING) {
6768 /*
6769 * The XML REC instructs us to stop parsing
6770 * right here
6771 */
6772 ctxt->instate = XML_PARSER_EOF;
6773 return;
6774 }
6775 }
6776 }
6777 }
6778 ctxt->hasPErefs = 1;
6779 } else {
6780 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
6781 }
6782 }
6783 }
6784}
6785
6786/**
6787 * xmlParseStringPEReference:
6788 * @ctxt: an XML parser context
6789 * @str: a pointer to an index in the string
6790 *
6791 * parse PEReference declarations
6792 *
6793 * [69] PEReference ::= '%' Name ';'
6794 *
6795 * [ WFC: No Recursion ]
6796 * A parsed entity must not contain a recursive
6797 * reference to itself, either directly or indirectly.
6798 *
6799 * [ WFC: Entity Declared ]
6800 * In a document without any DTD, a document with only an internal DTD
6801 * subset which contains no parameter entity references, or a document
6802 * with "standalone='yes'", ... ... The declaration of a parameter
6803 * entity must precede any reference to it...
6804 *
6805 * [ VC: Entity Declared ]
6806 * In a document with an external subset or external parameter entities
6807 * with "standalone='no'", ... ... The declaration of a parameter entity
6808 * must precede any reference to it...
6809 *
6810 * [ WFC: In DTD ]
6811 * Parameter-entity references may only appear in the DTD.
6812 * NOTE: misleading but this is handled.
6813 *
6814 * Returns the string of the entity content.
6815 * str is updated to the current value of the index
6816 */
6817xmlEntityPtr
6818xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
6819 const xmlChar *ptr;
6820 xmlChar cur;
6821 xmlChar *name;
6822 xmlEntityPtr entity = NULL;
6823
6824 if ((str == NULL) || (*str == NULL)) return(NULL);
6825 ptr = *str;
6826 cur = *ptr;
6827 if (cur == '%') {
6828 ptr++;
6829 cur = *ptr;
6830 name = xmlParseStringName(ctxt, &ptr);
6831 if (name == NULL) {
6832 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6833 "xmlParseStringPEReference: no name\n");
6834 } else {
6835 cur = *ptr;
6836 if (cur == ';') {
6837 ptr++;
6838 cur = *ptr;
6839 if ((ctxt->sax != NULL) &&
6840 (ctxt->sax->getParameterEntity != NULL))
6841 entity = ctxt->sax->getParameterEntity(ctxt->userData,
6842 name);
6843 if (entity == NULL) {
6844 /*
6845 * [ WFC: Entity Declared ]
6846 * In a document without any DTD, a document with only an
6847 * internal DTD subset which contains no parameter entity
6848 * references, or a document with "standalone='yes'", ...
6849 * ... The declaration of a parameter entity must precede
6850 * any reference to it...
6851 */
6852 if ((ctxt->standalone == 1) ||
6853 ((ctxt->hasExternalSubset == 0) &&
6854 (ctxt->hasPErefs == 0))) {
6855 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
6856 "PEReference: %%%s; not found\n", name);
6857 } else {
6858 /*
6859 * [ VC: Entity Declared ]
6860 * In a document with an external subset or external
6861 * parameter entities with "standalone='no'", ...
6862 * ... The declaration of a parameter entity must
6863 * precede any reference to it...
6864 */
6865 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6866 "PEReference: %%%s; not found\n",
6867 name, NULL);
6868 ctxt->valid = 0;
6869 }
6870 } else {
6871 /*
6872 * Internal checking in case the entity quest barfed
6873 */
6874 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6875 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
6876 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6877 "%%%s; is not a parameter entity\n",
6878 name, NULL);
6879 }
6880 }
6881 ctxt->hasPErefs = 1;
6882 } else {
6883 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
6884 }
6885 xmlFree(name);
6886 }
6887 }
6888 *str = ptr;
6889 return(entity);
6890}
6891
6892/**
6893 * xmlParseDocTypeDecl:
6894 * @ctxt: an XML parser context
6895 *
6896 * parse a DOCTYPE declaration
6897 *
6898 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
6899 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6900 *
6901 * [ VC: Root Element Type ]
6902 * The Name in the document type declaration must match the element
6903 * type of the root element.
6904 */
6905
6906void
6907xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
6908 const xmlChar *name = NULL;
6909 xmlChar *ExternalID = NULL;
6910 xmlChar *URI = NULL;
6911
6912 /*
6913 * We know that '<!DOCTYPE' has been detected.
6914 */
6915 SKIP(9);
6916
6917 SKIP_BLANKS;
6918
6919 /*
6920 * Parse the DOCTYPE name.
6921 */
6922 name = xmlParseName(ctxt);
6923 if (name == NULL) {
6924 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6925 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
6926 }
6927 ctxt->intSubName = name;
6928
6929 SKIP_BLANKS;
6930
6931 /*
6932 * Check for SystemID and ExternalID
6933 */
6934 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
6935
6936 if ((URI != NULL) || (ExternalID != NULL)) {
6937 ctxt->hasExternalSubset = 1;
6938 }
6939 ctxt->extSubURI = URI;
6940 ctxt->extSubSystem = ExternalID;
6941
6942 SKIP_BLANKS;
6943
6944 /*
6945 * Create and update the internal subset.
6946 */
6947 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
6948 (!ctxt->disableSAX))
6949 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
6950
6951 /*
6952 * Is there any internal subset declarations ?
6953 * they are handled separately in xmlParseInternalSubset()
6954 */
6955 if (RAW == '[')
6956 return;
6957
6958 /*
6959 * We should be at the end of the DOCTYPE declaration.
6960 */
6961 if (RAW != '>') {
6962 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
6963 }
6964 NEXT;
6965}
6966
6967/**
6968 * xmlParseInternalSubset:
6969 * @ctxt: an XML parser context
6970 *
6971 * parse the internal subset declaration
6972 *
6973 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6974 */
6975
6976static void
6977xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
6978 /*
6979 * Is there any DTD definition ?
6980 */
6981 if (RAW == '[') {
6982 ctxt->instate = XML_PARSER_DTD;
6983 NEXT;
6984 /*
6985 * Parse the succession of Markup declarations and
6986 * PEReferences.
6987 * Subsequence (markupdecl | PEReference | S)*
6988 */
6989 while (RAW != ']') {
6990 const xmlChar *check = CUR_PTR;
6991 unsigned int cons = ctxt->input->consumed;
6992
6993 SKIP_BLANKS;
6994 xmlParseMarkupDecl(ctxt);
6995 xmlParsePEReference(ctxt);
6996
6997 /*
6998 * Pop-up of finished entities.
6999 */
7000 while ((RAW == 0) && (ctxt->inputNr > 1))
7001 xmlPopInput(ctxt);
7002
7003 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
7004 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7005 "xmlParseInternalSubset: error detected in Markup declaration\n");
7006 break;
7007 }
7008 }
7009 if (RAW == ']') {
7010 NEXT;
7011 SKIP_BLANKS;
7012 }
7013 }
7014
7015 /*
7016 * We should be at the end of the DOCTYPE declaration.
7017 */
7018 if (RAW != '>') {
7019 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
7020 }
7021 NEXT;
7022}
7023
7024#ifdef LIBXML_SAX1_ENABLED
7025/**
7026 * xmlParseAttribute:
7027 * @ctxt: an XML parser context
7028 * @value: a xmlChar ** used to store the value of the attribute
7029 *
7030 * parse an attribute
7031 *
7032 * [41] Attribute ::= Name Eq AttValue
7033 *
7034 * [ WFC: No External Entity References ]
7035 * Attribute values cannot contain direct or indirect entity references
7036 * to external entities.
7037 *
7038 * [ WFC: No < in Attribute Values ]
7039 * The replacement text of any entity referred to directly or indirectly in
7040 * an attribute value (other than "&lt;") must not contain a <.
7041 *
7042 * [ VC: Attribute Value Type ]
7043 * The attribute must have been declared; the value must be of the type
7044 * declared for it.
7045 *
7046 * [25] Eq ::= S? '=' S?
7047 *
7048 * With namespace:
7049 *
7050 * [NS 11] Attribute ::= QName Eq AttValue
7051 *
7052 * Also the case QName == xmlns:??? is handled independently as a namespace
7053 * definition.
7054 *
7055 * Returns the attribute name, and the value in *value.
7056 */
7057
7058const xmlChar *
7059xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
7060 const xmlChar *name;
7061 xmlChar *val;
7062
7063 *value = NULL;
7064 GROW;
7065 name = xmlParseName(ctxt);
7066 if (name == NULL) {
7067 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7068 "error parsing attribute name\n");
7069 return(NULL);
7070 }
7071
7072 /*
7073 * read the value
7074 */
7075 SKIP_BLANKS;
7076 if (RAW == '=') {
7077 NEXT;
7078 SKIP_BLANKS;
7079 val = xmlParseAttValue(ctxt);
7080 ctxt->instate = XML_PARSER_CONTENT;
7081 } else {
7082 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
7083 "Specification mandate value for attribute %s\n", name);
7084 return(NULL);
7085 }
7086
7087 /*
7088 * Check that xml:lang conforms to the specification
7089 * No more registered as an error, just generate a warning now
7090 * since this was deprecated in XML second edition
7091 */
7092 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
7093 if (!xmlCheckLanguageID(val)) {
7094 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
7095 "Malformed value for xml:lang : %s\n",
7096 val, NULL);
7097 }
7098 }
7099
7100 /*
7101 * Check that xml:space conforms to the specification
7102 */
7103 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
7104 if (xmlStrEqual(val, BAD_CAST "default"))
7105 *(ctxt->space) = 0;
7106 else if (xmlStrEqual(val, BAD_CAST "preserve"))
7107 *(ctxt->space) = 1;
7108 else {
7109 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
7110"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
7111 val, NULL);
7112 }
7113 }
7114
7115 *value = val;
7116 return(name);
7117}
7118
7119/**
7120 * xmlParseStartTag:
7121 * @ctxt: an XML parser context
7122 *
7123 * parse a start of tag either for rule element or
7124 * EmptyElement. In both case we don't parse the tag closing chars.
7125 *
7126 * [40] STag ::= '<' Name (S Attribute)* S? '>'
7127 *
7128 * [ WFC: Unique Att Spec ]
7129 * No attribute name may appear more than once in the same start-tag or
7130 * empty-element tag.
7131 *
7132 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
7133 *
7134 * [ WFC: Unique Att Spec ]
7135 * No attribute name may appear more than once in the same start-tag or
7136 * empty-element tag.
7137 *
7138 * With namespace:
7139 *
7140 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
7141 *
7142 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
7143 *
7144 * Returns the element name parsed
7145 */
7146
7147const xmlChar *
7148xmlParseStartTag(xmlParserCtxtPtr ctxt) {
7149 const xmlChar *name;
7150 const xmlChar *attname;
7151 xmlChar *attvalue;
7152 const xmlChar **atts = ctxt->atts;
7153 int nbatts = 0;
7154 int maxatts = ctxt->maxatts;
7155 int i;
7156
7157 if (RAW != '<') return(NULL);
7158 NEXT1;
7159
7160 name = xmlParseName(ctxt);
7161 if (name == NULL) {
7162 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7163 "xmlParseStartTag: invalid element name\n");
7164 return(NULL);
7165 }
7166
7167 /*
7168 * Now parse the attributes, it ends up with the ending
7169 *
7170 * (S Attribute)* S?
7171 */
7172 SKIP_BLANKS;
7173 GROW;
7174
7175 while ((RAW != '>') &&
7176 ((RAW != '/') || (NXT(1) != '>')) &&
7177 (IS_BYTE_CHAR(RAW))) {
7178 const xmlChar *q = CUR_PTR;
7179 unsigned int cons = ctxt->input->consumed;
7180
7181 attname = xmlParseAttribute(ctxt, &attvalue);
7182 if ((attname != NULL) && (attvalue != NULL)) {
7183 /*
7184 * [ WFC: Unique Att Spec ]
7185 * No attribute name may appear more than once in the same
7186 * start-tag or empty-element tag.
7187 */
7188 for (i = 0; i < nbatts;i += 2) {
7189 if (xmlStrEqual(atts[i], attname)) {
7190 xmlErrAttributeDup(ctxt, NULL, attname);
7191 xmlFree(attvalue);
7192 goto failed;
7193 }
7194 }
7195 /*
7196 * Add the pair to atts
7197 */
7198 if (atts == NULL) {
7199 maxatts = 22; /* allow for 10 attrs by default */
7200 atts = (const xmlChar **)
7201 xmlMalloc(maxatts * sizeof(xmlChar *));
7202 if (atts == NULL) {
7203 xmlErrMemory(ctxt, NULL);
7204 if (attvalue != NULL)
7205 xmlFree(attvalue);
7206 goto failed;
7207 }
7208 ctxt->atts = atts;
7209 ctxt->maxatts = maxatts;
7210 } else if (nbatts + 4 > maxatts) {
7211 const xmlChar **n;
7212
7213 maxatts *= 2;
7214 n = (const xmlChar **) xmlRealloc((void *) atts,
7215 maxatts * sizeof(const xmlChar *));
7216 if (n == NULL) {
7217 xmlErrMemory(ctxt, NULL);
7218 if (attvalue != NULL)
7219 xmlFree(attvalue);
7220 goto failed;
7221 }
7222 atts = n;
7223 ctxt->atts = atts;
7224 ctxt->maxatts = maxatts;
7225 }
7226 atts[nbatts++] = attname;
7227 atts[nbatts++] = attvalue;
7228 atts[nbatts] = NULL;
7229 atts[nbatts + 1] = NULL;
7230 } else {
7231 if (attvalue != NULL)
7232 xmlFree(attvalue);
7233 }
7234
7235failed:
7236
7237 GROW
7238 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
7239 break;
7240 if (!IS_BLANK_CH(RAW)) {
7241 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7242 "attributes construct error\n");
7243 }
7244 SKIP_BLANKS;
7245 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
7246 (attname == NULL) && (attvalue == NULL)) {
7247 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
7248 "xmlParseStartTag: problem parsing attributes\n");
7249 break;
7250 }
7251 SHRINK;
7252 GROW;
7253 }
7254
7255 /*
7256 * SAX: Start of Element !
7257 */
7258 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
7259 (!ctxt->disableSAX)) {
7260 if (nbatts > 0)
7261 ctxt->sax->startElement(ctxt->userData, name, atts);
7262 else
7263 ctxt->sax->startElement(ctxt->userData, name, NULL);
7264 }
7265
7266 if (atts != NULL) {
7267 /* Free only the content strings */
7268 for (i = 1;i < nbatts;i+=2)
7269 if (atts[i] != NULL)
7270 xmlFree((xmlChar *) atts[i]);
7271 }
7272 return(name);
7273}
7274
7275/**
7276 * xmlParseEndTag1:
7277 * @ctxt: an XML parser context
7278 * @line: line of the start tag
7279 * @nsNr: number of namespaces on the start tag
7280 *
7281 * parse an end of tag
7282 *
7283 * [42] ETag ::= '</' Name S? '>'
7284 *
7285 * With namespace
7286 *
7287 * [NS 9] ETag ::= '</' QName S? '>'
7288 */
7289
7290static void
7291xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
7292 const xmlChar *name;
7293
7294 GROW;
7295 if ((RAW != '<') || (NXT(1) != '/')) {
7296 xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
7297 "xmlParseEndTag: '</' not found\n");
7298 return;
7299 }
7300 SKIP(2);
7301
7302 name = xmlParseNameAndCompare(ctxt,ctxt->name);
7303
7304 /*
7305 * We should definitely be at the ending "S? '>'" part
7306 */
7307 GROW;
7308 SKIP_BLANKS;
7309 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
7310 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
7311 } else
7312 NEXT1;
7313
7314 /*
7315 * [ WFC: Element Type Match ]
7316 * The Name in an element's end-tag must match the element type in the
7317 * start-tag.
7318 *
7319 */
7320 if (name != (xmlChar*)1) {
7321 if (name == NULL) name = BAD_CAST "unparseable";
7322 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
7323 "Opening and ending tag mismatch: %s line %d and %s\n",
7324 ctxt->name, line, name);
7325 }
7326
7327 /*
7328 * SAX: End of Tag
7329 */
7330 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
7331 (!ctxt->disableSAX))
7332 ctxt->sax->endElement(ctxt->userData, ctxt->name);
7333
7334 namePop(ctxt);
7335 spacePop(ctxt);
7336 return;
7337}
7338
7339/**
7340 * xmlParseEndTag:
7341 * @ctxt: an XML parser context
7342 *
7343 * parse an end of tag
7344 *
7345 * [42] ETag ::= '</' Name S? '>'
7346 *
7347 * With namespace
7348 *
7349 * [NS 9] ETag ::= '</' QName S? '>'
7350 */
7351
7352void
7353xmlParseEndTag(xmlParserCtxtPtr ctxt) {
7354 xmlParseEndTag1(ctxt, 0);
7355}
7356#endif /* LIBXML_SAX1_ENABLED */
7357
7358/************************************************************************
7359 * *
7360 * SAX 2 specific operations *
7361 * *
7362 ************************************************************************/
7363
7364static const xmlChar *
7365xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
7366 int len = 0, l;
7367 int c;
7368 int count = 0;
7369
7370 /*
7371 * Handler for more complex cases
7372 */
7373 GROW;
7374 c = CUR_CHAR(l);
7375 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
7376 (!IS_LETTER(c) && (c != '_'))) {
7377 return(NULL);
7378 }
7379
7380 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
7381 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
7382 (c == '.') || (c == '-') || (c == '_') ||
7383 (IS_COMBINING(c)) ||
7384 (IS_EXTENDER(c)))) {
7385 if (count++ > 100) {
7386 count = 0;
7387 GROW;
7388 }
7389 len += l;
7390 NEXTL(l);
7391 c = CUR_CHAR(l);
7392 }
7393 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
7394}
7395
7396/*
7397 * xmlGetNamespace:
7398 * @ctxt: an XML parser context
7399 * @prefix: the prefix to lookup
7400 *
7401 * Lookup the namespace name for the @prefix (which ca be NULL)
7402 * The prefix must come from the @ctxt->dict dictionnary
7403 *
7404 * Returns the namespace name or NULL if not bound
7405 */
7406static const xmlChar *
7407xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
7408 int i;
7409
7410 if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
7411 for (i = ctxt->nsNr - 2;i >= 0;i-=2)
7412 if (ctxt->nsTab[i] == prefix) {
7413 if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
7414 return(NULL);
7415 return(ctxt->nsTab[i + 1]);
7416 }
7417 return(NULL);
7418}
7419
7420/**
7421 * xmlParseNCName:
7422 * @ctxt: an XML parser context
7423 * @len: lenght of the string parsed
7424 *
7425 * parse an XML name.
7426 *
7427 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
7428 * CombiningChar | Extender
7429 *
7430 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
7431 *
7432 * Returns the Name parsed or NULL
7433 */
7434
7435static const xmlChar *
7436xmlParseNCName(xmlParserCtxtPtr ctxt) {
7437 const xmlChar *in;
7438 const xmlChar *ret;
7439 int count = 0;
7440
7441 /*
7442 * Accelerator for simple ASCII names
7443 */
7444 in = ctxt->input->cur;
7445 if (((*in >= 0x61) && (*in <= 0x7A)) ||
7446 ((*in >= 0x41) && (*in <= 0x5A)) ||
7447 (*in == '_')) {
7448 in++;
7449 while (((*in >= 0x61) && (*in <= 0x7A)) ||
7450 ((*in >= 0x41) && (*in <= 0x5A)) ||
7451 ((*in >= 0x30) && (*in <= 0x39)) ||
7452 (*in == '_') || (*in == '-') ||
7453 (*in == '.'))
7454 in++;
7455 if ((*in > 0) && (*in < 0x80)) {
7456 count = in - ctxt->input->cur;
7457 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
7458 ctxt->input->cur = in;
7459 ctxt->nbChars += count;
7460 ctxt->input->col += count;
7461 if (ret == NULL) {
7462 xmlErrMemory(ctxt, NULL);
7463 }
7464 return(ret);
7465 }
7466 }
7467 return(xmlParseNCNameComplex(ctxt));
7468}
7469
7470/**
7471 * xmlParseQName:
7472 * @ctxt: an XML parser context
7473 * @prefix: pointer to store the prefix part
7474 *
7475 * parse an XML Namespace QName
7476 *
7477 * [6] QName ::= (Prefix ':')? LocalPart
7478 * [7] Prefix ::= NCName
7479 * [8] LocalPart ::= NCName
7480 *
7481 * Returns the Name parsed or NULL
7482 */
7483
7484static const xmlChar *
7485xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
7486 const xmlChar *l, *p;
7487
7488 GROW;
7489
7490 l = xmlParseNCName(ctxt);
7491 if (l == NULL) {
7492 if (CUR == ':') {
7493 l = xmlParseName(ctxt);
7494 if (l != NULL) {
7495 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
7496 "Failed to parse QName '%s'\n", l, NULL, NULL);
7497 *prefix = NULL;
7498 return(l);
7499 }
7500 }
7501 return(NULL);
7502 }
7503 if (CUR == ':') {
7504 NEXT;
7505 p = l;
7506 l = xmlParseNCName(ctxt);
7507 if (l == NULL) {
7508 xmlChar *tmp;
7509
7510 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
7511 "Failed to parse QName '%s:'\n", p, NULL, NULL);
7512 tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
7513 p = xmlDictLookup(ctxt->dict, tmp, -1);
7514 if (tmp != NULL) xmlFree(tmp);
7515 *prefix = NULL;
7516 return(p);
7517 }
7518 if (CUR == ':') {
7519 xmlChar *tmp;
7520
7521 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
7522 "Failed to parse QName '%s:%s:'\n", p, l, NULL);
7523 NEXT;
7524 tmp = (xmlChar *) xmlParseName(ctxt);
7525 if (tmp != NULL) {
7526 tmp = xmlBuildQName(tmp, l, NULL, 0);
7527 l = xmlDictLookup(ctxt->dict, tmp, -1);
7528 if (tmp != NULL) xmlFree(tmp);
7529 *prefix = p;
7530 return(l);
7531 }
7532 tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
7533 l = xmlDictLookup(ctxt->dict, tmp, -1);
7534 if (tmp != NULL) xmlFree(tmp);
7535 *prefix = p;
7536 return(l);
7537 }
7538 *prefix = p;
7539 } else
7540 *prefix = NULL;
7541 return(l);
7542}
7543
7544/**
7545 * xmlParseQNameAndCompare:
7546 * @ctxt: an XML parser context
7547 * @name: the localname
7548 * @prefix: the prefix, if any.
7549 *
7550 * parse an XML name and compares for match
7551 * (specialized for endtag parsing)
7552 *
7553 * Returns NULL for an illegal name, (xmlChar*) 1 for success
7554 * and the name for mismatch
7555 */
7556
7557static const xmlChar *
7558xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
7559 xmlChar const *prefix) {
7560 const xmlChar *cmp = name;
7561 const xmlChar *in;
7562 const xmlChar *ret;
7563 const xmlChar *prefix2;
7564
7565 if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
7566
7567 GROW;
7568 in = ctxt->input->cur;
7569
7570 cmp = prefix;
7571 while (*in != 0 && *in == *cmp) {
7572 ++in;
7573 ++cmp;
7574 }
7575 if ((*cmp == 0) && (*in == ':')) {
7576 in++;
7577 cmp = name;
7578 while (*in != 0 && *in == *cmp) {
7579 ++in;
7580 ++cmp;
7581 }
7582 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
7583 /* success */
7584 ctxt->input->cur = in;
7585 return((const xmlChar*) 1);
7586 }
7587 }
7588 /*
7589 * all strings coms from the dictionary, equality can be done directly
7590 */
7591 ret = xmlParseQName (ctxt, &prefix2);
7592 if ((ret == name) && (prefix == prefix2))
7593 return((const xmlChar*) 1);
7594 return ret;
7595}
7596
7597/**
7598 * xmlParseAttValueInternal:
7599 * @ctxt: an XML parser context
7600 * @len: attribute len result
7601 * @alloc: whether the attribute was reallocated as a new string
7602 * @normalize: if 1 then further non-CDATA normalization must be done
7603 *
7604 * parse a value for an attribute.
7605 * NOTE: if no normalization is needed, the routine will return pointers
7606 * directly from the data buffer.
7607 *
7608 * 3.3.3 Attribute-Value Normalization:
7609 * Before the value of an attribute is passed to the application or
7610 * checked for validity, the XML processor must normalize it as follows:
7611 * - a character reference is processed by appending the referenced
7612 * character to the attribute value
7613 * - an entity reference is processed by recursively processing the
7614 * replacement text of the entity
7615 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
7616 * appending #x20 to the normalized value, except that only a single
7617 * #x20 is appended for a "#xD#xA" sequence that is part of an external
7618 * parsed entity or the literal entity value of an internal parsed entity
7619 * - other characters are processed by appending them to the normalized value
7620 * If the declared value is not CDATA, then the XML processor must further
7621 * process the normalized attribute value by discarding any leading and
7622 * trailing space (#x20) characters, and by replacing sequences of space
7623 * (#x20) characters by a single space (#x20) character.
7624 * All attributes for which no declaration has been read should be treated
7625 * by a non-validating parser as if declared CDATA.
7626 *
7627 * Returns the AttValue parsed or NULL. The value has to be freed by the
7628 * caller if it was copied, this can be detected by val[*len] == 0.
7629 */
7630
7631static xmlChar *
7632xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
7633 int normalize)
7634{
7635 xmlChar limit = 0;
7636 const xmlChar *in = NULL, *start, *end, *last;
7637 xmlChar *ret = NULL;
7638
7639 GROW;
7640 in = (xmlChar *) CUR_PTR;
7641 if (*in != '"' && *in != '\'') {
7642 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
7643 return (NULL);
7644 }
7645 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
7646
7647 /*
7648 * try to handle in this routine the most common case where no
7649 * allocation of a new string is required and where content is
7650 * pure ASCII.
7651 */
7652 limit = *in++;
7653 end = ctxt->input->end;
7654 start = in;
7655 if (in >= end) {
7656 const xmlChar *oldbase = ctxt->input->base;
7657 GROW;
7658 if (oldbase != ctxt->input->base) {
7659 long delta = ctxt->input->base - oldbase;
7660 start = start + delta;
7661 in = in + delta;
7662 }
7663 end = ctxt->input->end;
7664 }
7665 if (normalize) {
7666 /*
7667 * Skip any leading spaces
7668 */
7669 while ((in < end) && (*in != limit) &&
7670 ((*in == 0x20) || (*in == 0x9) ||
7671 (*in == 0xA) || (*in == 0xD))) {
7672 in++;
7673 start = in;
7674 if (in >= end) {
7675 const xmlChar *oldbase = ctxt->input->base;
7676 GROW;
7677 if (oldbase != ctxt->input->base) {
7678 long delta = ctxt->input->base - oldbase;
7679 start = start + delta;
7680 in = in + delta;
7681 }
7682 end = ctxt->input->end;
7683 }
7684 }
7685 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
7686 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
7687 if ((*in++ == 0x20) && (*in == 0x20)) break;
7688 if (in >= end) {
7689 const xmlChar *oldbase = ctxt->input->base;
7690 GROW;
7691 if (oldbase != ctxt->input->base) {
7692 long delta = ctxt->input->base - oldbase;
7693 start = start + delta;
7694 in = in + delta;
7695 }
7696 end = ctxt->input->end;
7697 }
7698 }
7699 last = in;
7700 /*
7701 * skip the trailing blanks
7702 */
7703 while ((last[-1] == 0x20) && (last > start)) last--;
7704 while ((in < end) && (*in != limit) &&
7705 ((*in == 0x20) || (*in == 0x9) ||
7706 (*in == 0xA) || (*in == 0xD))) {
7707 in++;
7708 if (in >= end) {
7709 const xmlChar *oldbase = ctxt->input->base;
7710 GROW;
7711 if (oldbase != ctxt->input->base) {
7712 long delta = ctxt->input->base - oldbase;
7713 start = start + delta;
7714 in = in + delta;
7715 last = last + delta;
7716 }
7717 end = ctxt->input->end;
7718 }
7719 }
7720 if (*in != limit) goto need_complex;
7721 } else {
7722 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
7723 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
7724 in++;
7725 if (in >= end) {
7726 const xmlChar *oldbase = ctxt->input->base;
7727 GROW;
7728 if (oldbase != ctxt->input->base) {
7729 long delta = ctxt->input->base - oldbase;
7730 start = start + delta;
7731 in = in + delta;
7732 }
7733 end = ctxt->input->end;
7734 }
7735 }
7736 last = in;
7737 if (*in != limit) goto need_complex;
7738 }
7739 in++;
7740 if (len != NULL) {
7741 *len = last - start;
7742 ret = (xmlChar *) start;
7743 } else {
7744 if (alloc) *alloc = 1;
7745 ret = xmlStrndup(start, last - start);
7746 }
7747 CUR_PTR = in;
7748 if (alloc) *alloc = 0;
7749 return ret;
7750need_complex:
7751 if (alloc) *alloc = 1;
7752 return xmlParseAttValueComplex(ctxt, len, normalize);
7753}
7754
7755/**
7756 * xmlParseAttribute2:
7757 * @ctxt: an XML parser context
7758 * @pref: the element prefix
7759 * @elem: the element name
7760 * @prefix: a xmlChar ** used to store the value of the attribute prefix
7761 * @value: a xmlChar ** used to store the value of the attribute
7762 * @len: an int * to save the length of the attribute
7763 * @alloc: an int * to indicate if the attribute was allocated
7764 *
7765 * parse an attribute in the new SAX2 framework.
7766 *
7767 * Returns the attribute name, and the value in *value, .
7768 */
7769
7770static const xmlChar *
7771xmlParseAttribute2(xmlParserCtxtPtr ctxt,
7772 const xmlChar *pref, const xmlChar *elem,
7773 const xmlChar **prefix, xmlChar **value,
7774 int *len, int *alloc) {
7775 const xmlChar *name;
7776 xmlChar *val, *internal_val = NULL;
7777 int normalize = 0;
7778
7779 *value = NULL;
7780 GROW;
7781 name = xmlParseQName(ctxt, prefix);
7782 if (name == NULL) {
7783 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7784 "error parsing attribute name\n");
7785 return(NULL);
7786 }
7787
7788 /*
7789 * get the type if needed
7790 */
7791 if (ctxt->attsSpecial != NULL) {
7792 int type;
7793
7794 type = (int) (long) xmlHashQLookup2(ctxt->attsSpecial,
7795 pref, elem, *prefix, name);
7796 if (type != 0) normalize = 1;
7797 }
7798
7799 /*
7800 * read the value
7801 */
7802 SKIP_BLANKS;
7803 if (RAW == '=') {
7804 NEXT;
7805 SKIP_BLANKS;
7806 val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
7807 ctxt->instate = XML_PARSER_CONTENT;
7808 } else {
7809 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
7810 "Specification mandate value for attribute %s\n", name);
7811 return(NULL);
7812 }
7813
7814 if (*prefix == ctxt->str_xml) {
7815 /*
7816 * Check that xml:lang conforms to the specification
7817 * No more registered as an error, just generate a warning now
7818 * since this was deprecated in XML second edition
7819 */
7820 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
7821 internal_val = xmlStrndup(val, *len);
7822 if (!xmlCheckLanguageID(internal_val)) {
7823 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
7824 "Malformed value for xml:lang : %s\n",
7825 internal_val, NULL);
7826 }
7827 }
7828
7829 /*
7830 * Check that xml:space conforms to the specification
7831 */
7832 if (xmlStrEqual(name, BAD_CAST "space")) {
7833 internal_val = xmlStrndup(val, *len);
7834 if (xmlStrEqual(internal_val, BAD_CAST "default"))
7835 *(ctxt->space) = 0;
7836 else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
7837 *(ctxt->space) = 1;
7838 else {
7839 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
7840"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
7841 internal_val, NULL);
7842 }
7843 }
7844 if (internal_val) {
7845 xmlFree(internal_val);
7846 }
7847 }
7848
7849 *value = val;
7850 return(name);
7851}
7852
7853/**
7854 * xmlParseStartTag2:
7855 * @ctxt: an XML parser context
7856 *
7857 * parse a start of tag either for rule element or
7858 * EmptyElement. In both case we don't parse the tag closing chars.
7859 * This routine is called when running SAX2 parsing
7860 *
7861 * [40] STag ::= '<' Name (S Attribute)* S? '>'
7862 *
7863 * [ WFC: Unique Att Spec ]
7864 * No attribute name may appear more than once in the same start-tag or
7865 * empty-element tag.
7866 *
7867 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
7868 *
7869 * [ WFC: Unique Att Spec ]
7870 * No attribute name may appear more than once in the same start-tag or
7871 * empty-element tag.
7872 *
7873 * With namespace:
7874 *
7875 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
7876 *
7877 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
7878 *
7879 * Returns the element name parsed
7880 */
7881
7882static const xmlChar *
7883xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
7884 const xmlChar **URI, int *tlen) {
7885 const xmlChar *localname;
7886 const xmlChar *prefix;
7887 const xmlChar *attname;
7888 const xmlChar *aprefix;
7889 const xmlChar *nsname;
7890 xmlChar *attvalue;
7891 const xmlChar **atts = ctxt->atts;
7892 int maxatts = ctxt->maxatts;
7893 int nratts, nbatts, nbdef;
7894 int i, j, nbNs, attval, oldline, oldcol;
7895 const xmlChar *base;
7896 unsigned long cur;
7897 int nsNr = ctxt->nsNr;
7898
7899 if (RAW != '<') return(NULL);
7900 NEXT1;
7901
7902 /*
7903 * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
7904 * point since the attribute values may be stored as pointers to
7905 * the buffer and calling SHRINK would destroy them !
7906 * The Shrinking is only possible once the full set of attribute
7907 * callbacks have been done.
7908 */
7909reparse:
7910 SHRINK;
7911 base = ctxt->input->base;
7912 cur = ctxt->input->cur - ctxt->input->base;
7913 oldline = ctxt->input->line;
7914 oldcol = ctxt->input->col;
7915 nbatts = 0;
7916 nratts = 0;
7917 nbdef = 0;
7918 nbNs = 0;
7919 attval = 0;
7920 /* Forget any namespaces added during an earlier parse of this element. */
7921 ctxt->nsNr = nsNr;
7922
7923 localname = xmlParseQName(ctxt, &prefix);
7924 if (localname == NULL) {
7925 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7926 "StartTag: invalid element name\n");
7927 return(NULL);
7928 }
7929 *tlen = ctxt->input->cur - ctxt->input->base - cur;
7930
7931 /*
7932 * Now parse the attributes, it ends up with the ending
7933 *
7934 * (S Attribute)* S?
7935 */
7936 SKIP_BLANKS;
7937 GROW;
7938 if (ctxt->input->base != base) goto base_changed;
7939
7940 while ((RAW != '>') &&
7941 ((RAW != '/') || (NXT(1) != '>')) &&
7942 (IS_BYTE_CHAR(RAW))) {
7943 const xmlChar *q = CUR_PTR;
7944 unsigned int cons = ctxt->input->consumed;
7945 int len = -1, alloc = 0;
7946
7947 attname = xmlParseAttribute2(ctxt, prefix, localname,
7948 &aprefix, &attvalue, &len, &alloc);
7949 if (ctxt->input->base != base) {
7950 if ((attvalue != NULL) && (alloc != 0))
7951 xmlFree(attvalue);
7952 attvalue = NULL;
7953 goto base_changed;
7954 }
7955 if ((attname != NULL) && (attvalue != NULL)) {
7956 if (len < 0) len = xmlStrlen(attvalue);
7957 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
7958 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
7959 xmlURIPtr uri;
7960
7961 if (*URL != 0) {
7962 uri = xmlParseURI((const char *) URL);
7963 if (uri == NULL) {
7964 xmlWarningMsg(ctxt, XML_WAR_NS_URI,
7965 "xmlns: %s not a valid URI\n",
7966 URL, NULL);
7967 } else {
7968 if (uri->scheme == NULL) {
7969 xmlWarningMsg(ctxt, XML_WAR_NS_URI_RELATIVE,
7970 "xmlns: URI %s is not absolute\n",
7971 URL, NULL);
7972 }
7973 xmlFreeURI(uri);
7974 }
7975 }
7976 /*
7977 * check that it's not a defined namespace
7978 */
7979 for (j = 1;j <= nbNs;j++)
7980 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
7981 break;
7982 if (j <= nbNs)
7983 xmlErrAttributeDup(ctxt, NULL, attname);
7984 else
7985 if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
7986 if (alloc != 0) xmlFree(attvalue);
7987 SKIP_BLANKS;
7988 continue;
7989 }
7990 if (aprefix == ctxt->str_xmlns) {
7991 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
7992 xmlURIPtr uri;
7993
7994 if (attname == ctxt->str_xml) {
7995 if (URL != ctxt->str_xml_ns) {
7996 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
7997 "xml namespace prefix mapped to wrong URI\n",
7998 NULL, NULL, NULL);
7999 }
8000 /*
8001 * Do not keep a namespace definition node
8002 */
8003 if (alloc != 0) xmlFree(attvalue);
8004 SKIP_BLANKS;
8005 continue;
8006 }
8007 uri = xmlParseURI((const char *) URL);
8008 if (uri == NULL) {
8009 xmlWarningMsg(ctxt, XML_WAR_NS_URI,
8010 "xmlns:%s: '%s' is not a valid URI\n",
8011 attname, URL);
8012 } else {
8013 if ((ctxt->pedantic) && (uri->scheme == NULL)) {
8014 xmlWarningMsg(ctxt, XML_WAR_NS_URI_RELATIVE,
8015 "xmlns:%s: URI %s is not absolute\n",
8016 attname, URL);
8017 }
8018 xmlFreeURI(uri);
8019 }
8020
8021 /*
8022 * check that it's not a defined namespace
8023 */
8024 for (j = 1;j <= nbNs;j++)
8025 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
8026 break;
8027 if (j <= nbNs)
8028 xmlErrAttributeDup(ctxt, aprefix, attname);
8029 else
8030 if (nsPush(ctxt, attname, URL) > 0) nbNs++;
8031 if (alloc != 0) xmlFree(attvalue);
8032 SKIP_BLANKS;
8033 if (ctxt->input->base != base) goto base_changed;
8034 continue;
8035 }
8036
8037 /*
8038 * Add the pair to atts
8039 */
8040 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
8041 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
8042 if (attvalue[len] == 0)
8043 xmlFree(attvalue);
8044 goto failed;
8045 }
8046 maxatts = ctxt->maxatts;
8047 atts = ctxt->atts;
8048 }
8049 ctxt->attallocs[nratts++] = alloc;
8050 atts[nbatts++] = attname;
8051 atts[nbatts++] = aprefix;
8052 atts[nbatts++] = NULL; /* the URI will be fetched later */
8053 atts[nbatts++] = attvalue;
8054 attvalue += len;
8055 atts[nbatts++] = attvalue;
8056 /*
8057 * tag if some deallocation is needed
8058 */
8059 if (alloc != 0) attval = 1;
8060 } else {
8061 if ((attvalue != NULL) && (attvalue[len] == 0))
8062 xmlFree(attvalue);
8063 }
8064
8065failed:
8066
8067 GROW
8068 if (ctxt->input->base != base) goto base_changed;
8069 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8070 break;
8071 if (!IS_BLANK_CH(RAW)) {
8072 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8073 "attributes construct error\n");
8074 break;
8075 }
8076 SKIP_BLANKS;
8077 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
8078 (attname == NULL) && (attvalue == NULL)) {
8079 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8080 "xmlParseStartTag: problem parsing attributes\n");
8081 break;
8082 }
8083 GROW;
8084 if (ctxt->input->base != base) goto base_changed;
8085 }
8086
8087 /*
8088 * The attributes defaulting
8089 */
8090 if (ctxt->attsDefault != NULL) {
8091 xmlDefAttrsPtr defaults;
8092
8093 defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
8094 if (defaults != NULL) {
8095 for (i = 0;i < defaults->nbAttrs;i++) {
8096 attname = defaults->values[4 * i];
8097 aprefix = defaults->values[4 * i + 1];
8098
8099 /*
8100 * special work for namespaces defaulted defs
8101 */
8102 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
8103 /*
8104 * check that it's not a defined namespace
8105 */
8106 for (j = 1;j <= nbNs;j++)
8107 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
8108 break;
8109 if (j <= nbNs) continue;
8110
8111 nsname = xmlGetNamespace(ctxt, NULL);
8112 if (nsname != defaults->values[4 * i + 2]) {
8113 if (nsPush(ctxt, NULL,
8114 defaults->values[4 * i + 2]) > 0)
8115 nbNs++;
8116 }
8117 } else if (aprefix == ctxt->str_xmlns) {
8118 /*
8119 * check that it's not a defined namespace
8120 */
8121 for (j = 1;j <= nbNs;j++)
8122 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
8123 break;
8124 if (j <= nbNs) continue;
8125
8126 nsname = xmlGetNamespace(ctxt, attname);
8127 if (nsname != defaults->values[2]) {
8128 if (nsPush(ctxt, attname,
8129 defaults->values[4 * i + 2]) > 0)
8130 nbNs++;
8131 }
8132 } else {
8133 /*
8134 * check that it's not a defined attribute
8135 */
8136 for (j = 0;j < nbatts;j+=5) {
8137 if ((attname == atts[j]) && (aprefix == atts[j+1]))
8138 break;
8139 }
8140 if (j < nbatts) continue;
8141
8142 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
8143 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
8144 return(NULL);
8145 }
8146 maxatts = ctxt->maxatts;
8147 atts = ctxt->atts;
8148 }
8149 atts[nbatts++] = attname;
8150 atts[nbatts++] = aprefix;
8151 if (aprefix == NULL)
8152 atts[nbatts++] = NULL;
8153 else
8154 atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
8155 atts[nbatts++] = defaults->values[4 * i + 2];
8156 atts[nbatts++] = defaults->values[4 * i + 3];
8157 nbdef++;
8158 }
8159 }
8160 }
8161 }
8162
8163 /*
8164 * The attributes checkings
8165 */
8166 for (i = 0; i < nbatts;i += 5) {
8167 /*
8168 * The default namespace does not apply to attribute names.
8169 */
8170 if (atts[i + 1] != NULL) {
8171 nsname = xmlGetNamespace(ctxt, atts[i + 1]);
8172 if (nsname == NULL) {
8173 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
8174 "Namespace prefix %s for %s on %s is not defined\n",
8175 atts[i + 1], atts[i], localname);
8176 }
8177 atts[i + 2] = nsname;
8178 } else
8179 nsname = NULL;
8180 /*
8181 * [ WFC: Unique Att Spec ]
8182 * No attribute name may appear more than once in the same
8183 * start-tag or empty-element tag.
8184 * As extended by the Namespace in XML REC.
8185 */
8186 for (j = 0; j < i;j += 5) {
8187 if (atts[i] == atts[j]) {
8188 if (atts[i+1] == atts[j+1]) {
8189 xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
8190 break;
8191 }
8192 if ((nsname != NULL) && (atts[j + 2] == nsname)) {
8193 xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
8194 "Namespaced Attribute %s in '%s' redefined\n",
8195 atts[i], nsname, NULL);
8196 break;
8197 }
8198 }
8199 }
8200 }
8201
8202 nsname = xmlGetNamespace(ctxt, prefix);
8203 if ((prefix != NULL) && (nsname == NULL)) {
8204 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
8205 "Namespace prefix %s on %s is not defined\n",
8206 prefix, localname, NULL);
8207 }
8208 *pref = prefix;
8209 *URI = nsname;
8210
8211 /*
8212 * SAX: Start of Element !
8213 */
8214 if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
8215 (!ctxt->disableSAX)) {
8216 if (nbNs > 0)
8217 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
8218 nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
8219 nbatts / 5, nbdef, atts);
8220 else
8221 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
8222 nsname, 0, NULL, nbatts / 5, nbdef, atts);
8223 }
8224
8225 /*
8226 * Free up attribute allocated strings if needed
8227 */
8228 if (attval != 0) {
8229 for (i = 3,j = 0; j < nratts;i += 5,j++)
8230 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
8231 xmlFree((xmlChar *) atts[i]);
8232 }
8233
8234 return(localname);
8235
8236base_changed:
8237 /*
8238 * the attribute strings are valid iif the base didn't changed
8239 */
8240 if (attval != 0) {
8241 for (i = 3,j = 0; j < nratts;i += 5,j++)
8242 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
8243 xmlFree((xmlChar *) atts[i]);
8244 }
8245 ctxt->input->cur = ctxt->input->base + cur;
8246 ctxt->input->line = oldline;
8247 ctxt->input->col = oldcol;
8248 if (ctxt->wellFormed == 1) {
8249 goto reparse;
8250 }
8251 return(NULL);
8252}
8253
8254/**
8255 * xmlParseEndTag2:
8256 * @ctxt: an XML parser context
8257 * @line: line of the start tag
8258 * @nsNr: number of namespaces on the start tag
8259 *
8260 * parse an end of tag
8261 *
8262 * [42] ETag ::= '</' Name S? '>'
8263 *
8264 * With namespace
8265 *
8266 * [NS 9] ETag ::= '</' QName S? '>'
8267 */
8268
8269static void
8270xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
8271 const xmlChar *URI, int line, int nsNr, int tlen) {
8272 const xmlChar *name;
8273
8274 GROW;
8275 if ((RAW != '<') || (NXT(1) != '/')) {
8276 xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
8277 return;
8278 }
8279 SKIP(2);
8280
8281 if ((tlen > 0) && (xmlStrncmp(ctxt->input->cur, ctxt->name, tlen) == 0)) {
8282 if (ctxt->input->cur[tlen] == '>') {
8283 ctxt->input->cur += tlen + 1;
8284 goto done;
8285 }
8286 ctxt->input->cur += tlen;
8287 name = (xmlChar*)1;
8288 } else {
8289 if (prefix == NULL)
8290 name = xmlParseNameAndCompare(ctxt, ctxt->name);
8291 else
8292 name = xmlParseQNameAndCompare(ctxt, ctxt->name, prefix);
8293 }
8294
8295 /*
8296 * We should definitely be at the ending "S? '>'" part
8297 */
8298 GROW;
8299 SKIP_BLANKS;
8300 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
8301 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
8302 } else
8303 NEXT1;
8304
8305 /*
8306 * [ WFC: Element Type Match ]
8307 * The Name in an element's end-tag must match the element type in the
8308 * start-tag.
8309 *
8310 */
8311 if (name != (xmlChar*)1) {
8312 if (name == NULL) name = BAD_CAST "unparseable";
8313 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
8314 "Opening and ending tag mismatch: %s line %d and %s\n",
8315 ctxt->name, line, name);
8316 }
8317
8318 /*
8319 * SAX: End of Tag
8320 */
8321done:
8322 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
8323 (!ctxt->disableSAX))
8324 ctxt->sax->endElementNs(ctxt->userData, ctxt->name, prefix, URI);
8325
8326 spacePop(ctxt);
8327 if (nsNr != 0)
8328 nsPop(ctxt, nsNr);
8329 return;
8330}
8331
8332/**
8333 * xmlParseCDSect:
8334 * @ctxt: an XML parser context
8335 *
8336 * Parse escaped pure raw content.
8337 *
8338 * [18] CDSect ::= CDStart CData CDEnd
8339 *
8340 * [19] CDStart ::= '<![CDATA['
8341 *
8342 * [20] Data ::= (Char* - (Char* ']]>' Char*))
8343 *
8344 * [21] CDEnd ::= ']]>'
8345 */
8346void
8347xmlParseCDSect(xmlParserCtxtPtr ctxt) {
8348 xmlChar *buf = NULL;
8349 int len = 0;
8350 int size = XML_PARSER_BUFFER_SIZE;
8351 int r, rl;
8352 int s, sl;
8353 int cur, l;
8354 int count = 0;
8355
8356 /* Check 2.6.0 was NXT(0) not RAW */
8357 if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
8358 SKIP(9);
8359 } else
8360 return;
8361
8362 ctxt->instate = XML_PARSER_CDATA_SECTION;
8363 r = CUR_CHAR(rl);
8364 if (!IS_CHAR(r)) {
8365 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
8366 ctxt->instate = XML_PARSER_CONTENT;
8367 return;
8368 }
8369 NEXTL(rl);
8370 s = CUR_CHAR(sl);
8371 if (!IS_CHAR(s)) {
8372 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
8373 ctxt->instate = XML_PARSER_CONTENT;
8374 return;
8375 }
8376 NEXTL(sl);
8377 cur = CUR_CHAR(l);
8378 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
8379 if (buf == NULL) {
8380 xmlErrMemory(ctxt, NULL);
8381 return;
8382 }
8383 while (IS_CHAR(cur) &&
8384 ((r != ']') || (s != ']') || (cur != '>'))) {
8385 if (len + 5 >= size) {
8386 xmlChar *tmp;
8387
8388 size *= 2;
8389 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
8390 if (tmp == NULL) {
8391 xmlFree(buf);
8392 xmlErrMemory(ctxt, NULL);
8393 return;
8394 }
8395 buf = tmp;
8396 }
8397 COPY_BUF(rl,buf,len,r);
8398 r = s;
8399 rl = sl;
8400 s = cur;
8401 sl = l;
8402 count++;
8403 if (count > 50) {
8404 GROW;
8405 count = 0;
8406 }
8407 NEXTL(l);
8408 cur = CUR_CHAR(l);
8409 }
8410 buf[len] = 0;
8411 ctxt->instate = XML_PARSER_CONTENT;
8412 if (cur != '>') {
8413 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
8414 "CData section not finished\n%.50s\n", buf);
8415 xmlFree(buf);
8416 return;
8417 }
8418 NEXTL(l);
8419
8420 /*
8421 * OK the buffer is to be consumed as cdata.
8422 */
8423 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
8424 if (ctxt->sax->cdataBlock != NULL)
8425 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
8426 else if (ctxt->sax->characters != NULL)
8427 ctxt->sax->characters(ctxt->userData, buf, len);
8428 }
8429 xmlFree(buf);
8430}
8431
8432/**
8433 * xmlParseContent:
8434 * @ctxt: an XML parser context
8435 *
8436 * Parse a content:
8437 *
8438 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
8439 */
8440
8441void
8442xmlParseContent(xmlParserCtxtPtr ctxt) {
8443 GROW;
8444 while ((RAW != 0) &&
8445 ((RAW != '<') || (NXT(1) != '/')) &&
8446 (ctxt->instate != XML_PARSER_EOF)) {
8447 const xmlChar *test = CUR_PTR;
8448 unsigned int cons = ctxt->input->consumed;
8449 const xmlChar *cur = ctxt->input->cur;
8450
8451 /*
8452 * First case : a Processing Instruction.
8453 */
8454 if ((*cur == '<') && (cur[1] == '?')) {
8455 xmlParsePI(ctxt);
8456 }
8457
8458 /*
8459 * Second case : a CDSection
8460 */
8461 /* 2.6.0 test was *cur not RAW */
8462 else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
8463 xmlParseCDSect(ctxt);
8464 }
8465
8466 /*
8467 * Third case : a comment
8468 */
8469 else if ((*cur == '<') && (NXT(1) == '!') &&
8470 (NXT(2) == '-') && (NXT(3) == '-')) {
8471 xmlParseComment(ctxt);
8472 ctxt->instate = XML_PARSER_CONTENT;
8473 }
8474
8475 /*
8476 * Fourth case : a sub-element.
8477 */
8478 else if (*cur == '<') {
8479 xmlParseElement(ctxt);
8480 }
8481
8482 /*
8483 * Fifth case : a reference. If if has not been resolved,
8484 * parsing returns it's Name, create the node
8485 */
8486
8487 else if (*cur == '&') {
8488 xmlParseReference(ctxt);
8489 }
8490
8491 /*
8492 * Last case, text. Note that References are handled directly.
8493 */
8494 else {
8495 xmlParseCharData(ctxt, 0);
8496 }
8497
8498 GROW;
8499 /*
8500 * Pop-up of finished entities.
8501 */
8502 while ((RAW == 0) && (ctxt->inputNr > 1))
8503 xmlPopInput(ctxt);
8504 SHRINK;
8505
8506 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
8507 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8508 "detected an error in element content\n");
8509 ctxt->instate = XML_PARSER_EOF;
8510 break;
8511 }
8512 }
8513}
8514
8515/**
8516 * xmlParseElement:
8517 * @ctxt: an XML parser context
8518 *
8519 * parse an XML element, this is highly recursive
8520 *
8521 * [39] element ::= EmptyElemTag | STag content ETag
8522 *
8523 * [ WFC: Element Type Match ]
8524 * The Name in an element's end-tag must match the element type in the
8525 * start-tag.
8526 *
8527 */
8528
8529void
8530xmlParseElement(xmlParserCtxtPtr ctxt) {
8531 const xmlChar *name;
8532 const xmlChar *prefix;
8533 const xmlChar *URI;
8534 xmlParserNodeInfo node_info;
8535 int line, tlen;
8536 xmlNodePtr ret;
8537 int nsNr = ctxt->nsNr;
8538
8539 if ((unsigned int) ctxt->nameNr > xmlParserMaxDepth) {
8540 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
8541 "Excessive depth in document: change xmlParserMaxDepth = %d\n",
8542 xmlParserMaxDepth);
8543 ctxt->instate = XML_PARSER_EOF;
8544 return;
8545 }
8546
8547 /* Capture start position */
8548 if (ctxt->record_info) {
8549 node_info.begin_pos = ctxt->input->consumed +
8550 (CUR_PTR - ctxt->input->base);
8551 node_info.begin_line = ctxt->input->line;
8552 }
8553
8554 if (ctxt->spaceNr == 0)
8555 spacePush(ctxt, -1);
8556 else if (*ctxt->space == -2)
8557 spacePush(ctxt, -1);
8558 else
8559 spacePush(ctxt, *ctxt->space);
8560
8561 line = ctxt->input->line;
8562#ifdef LIBXML_SAX1_ENABLED
8563 if (ctxt->sax2)
8564#endif /* LIBXML_SAX1_ENABLED */
8565 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
8566#ifdef LIBXML_SAX1_ENABLED
8567 else
8568 name = xmlParseStartTag(ctxt);
8569#endif /* LIBXML_SAX1_ENABLED */
8570 if (name == NULL) {
8571 spacePop(ctxt);
8572 return;
8573 }
8574 namePush(ctxt, name);
8575 ret = ctxt->node;
8576
8577#ifdef LIBXML_VALID_ENABLED
8578 /*
8579 * [ VC: Root Element Type ]
8580 * The Name in the document type declaration must match the element
8581 * type of the root element.
8582 */
8583 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
8584 ctxt->node && (ctxt->node == ctxt->myDoc->children))
8585 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
8586#endif /* LIBXML_VALID_ENABLED */
8587
8588 /*
8589 * Check for an Empty Element.
8590 */
8591 if ((RAW == '/') && (NXT(1) == '>')) {
8592 SKIP(2);
8593 if (ctxt->sax2) {
8594 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
8595 (!ctxt->disableSAX))
8596 ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
8597#ifdef LIBXML_SAX1_ENABLED
8598 } else {
8599 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8600 (!ctxt->disableSAX))
8601 ctxt->sax->endElement(ctxt->userData, name);
8602#endif /* LIBXML_SAX1_ENABLED */
8603 }
8604 namePop(ctxt);
8605 spacePop(ctxt);
8606 if (nsNr != ctxt->nsNr)
8607 nsPop(ctxt, ctxt->nsNr - nsNr);
8608 if ( ret != NULL && ctxt->record_info ) {
8609 node_info.end_pos = ctxt->input->consumed +
8610 (CUR_PTR - ctxt->input->base);
8611 node_info.end_line = ctxt->input->line;
8612 node_info.node = ret;
8613 xmlParserAddNodeInfo(ctxt, &node_info);
8614 }
8615 return;
8616 }
8617 if (RAW == '>') {
8618 NEXT1;
8619 } else {
8620 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
8621 "Couldn't find end of Start Tag %s line %d\n",
8622 name, line, NULL);
8623
8624 /*
8625 * end of parsing of this node.
8626 */
8627 nodePop(ctxt);
8628 namePop(ctxt);
8629 spacePop(ctxt);
8630 if (nsNr != ctxt->nsNr)
8631 nsPop(ctxt, ctxt->nsNr - nsNr);
8632
8633 /*
8634 * Capture end position and add node
8635 */
8636 if ( ret != NULL && ctxt->record_info ) {
8637 node_info.end_pos = ctxt->input->consumed +
8638 (CUR_PTR - ctxt->input->base);
8639 node_info.end_line = ctxt->input->line;
8640 node_info.node = ret;
8641 xmlParserAddNodeInfo(ctxt, &node_info);
8642 }
8643 return;
8644 }
8645
8646 /*
8647 * Parse the content of the element:
8648 */
8649 xmlParseContent(ctxt);
8650 if (!IS_BYTE_CHAR(RAW)) {
8651 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
8652 "Premature end of data in tag %s line %d\n",
8653 name, line, NULL);
8654
8655 /*
8656 * end of parsing of this node.
8657 */
8658 nodePop(ctxt);
8659 namePop(ctxt);
8660 spacePop(ctxt);
8661 if (nsNr != ctxt->nsNr)
8662 nsPop(ctxt, ctxt->nsNr - nsNr);
8663 return;
8664 }
8665
8666 /*
8667 * parse the end of tag: '</' should be here.
8668 */
8669 if (ctxt->sax2) {
8670 xmlParseEndTag2(ctxt, prefix, URI, line, ctxt->nsNr - nsNr, tlen);
8671 namePop(ctxt);
8672 }
8673#ifdef LIBXML_SAX1_ENABLED
8674 else
8675 xmlParseEndTag1(ctxt, line);
8676#endif /* LIBXML_SAX1_ENABLED */
8677
8678 /*
8679 * Capture end position and add node
8680 */
8681 if ( ret != NULL && ctxt->record_info ) {
8682 node_info.end_pos = ctxt->input->consumed +
8683 (CUR_PTR - ctxt->input->base);
8684 node_info.end_line = ctxt->input->line;
8685 node_info.node = ret;
8686 xmlParserAddNodeInfo(ctxt, &node_info);
8687 }
8688}
8689
8690/**
8691 * xmlParseVersionNum:
8692 * @ctxt: an XML parser context
8693 *
8694 * parse the XML version value.
8695 *
8696 * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
8697 *
8698 * Returns the string giving the XML version number, or NULL
8699 */
8700xmlChar *
8701xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
8702 xmlChar *buf = NULL;
8703 int len = 0;
8704 int size = 10;
8705 xmlChar cur;
8706
8707 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
8708 if (buf == NULL) {
8709 xmlErrMemory(ctxt, NULL);
8710 return(NULL);
8711 }
8712 cur = CUR;
8713 while (((cur >= 'a') && (cur <= 'z')) ||
8714 ((cur >= 'A') && (cur <= 'Z')) ||
8715 ((cur >= '0') && (cur <= '9')) ||
8716 (cur == '_') || (cur == '.') ||
8717 (cur == ':') || (cur == '-')) {
8718 if (len + 1 >= size) {
8719 xmlChar *tmp;
8720
8721 size *= 2;
8722 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
8723 if (tmp == NULL) {
8724 xmlErrMemory(ctxt, NULL);
8725 return(NULL);
8726 }
8727 buf = tmp;
8728 }
8729 buf[len++] = cur;
8730 NEXT;
8731 cur=CUR;
8732 }
8733 buf[len] = 0;
8734 return(buf);
8735}
8736
8737/**
8738 * xmlParseVersionInfo:
8739 * @ctxt: an XML parser context
8740 *
8741 * parse the XML version.
8742 *
8743 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
8744 *
8745 * [25] Eq ::= S? '=' S?
8746 *
8747 * Returns the version string, e.g. "1.0"
8748 */
8749
8750xmlChar *
8751xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
8752 xmlChar *version = NULL;
8753
8754 if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
8755 SKIP(7);
8756 SKIP_BLANKS;
8757 if (RAW != '=') {
8758 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
8759 return(NULL);
8760 }
8761 NEXT;
8762 SKIP_BLANKS;
8763 if (RAW == '"') {
8764 NEXT;
8765 version = xmlParseVersionNum(ctxt);
8766 if (RAW != '"') {
8767 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
8768 } else
8769 NEXT;
8770 } else if (RAW == '\''){
8771 NEXT;
8772 version = xmlParseVersionNum(ctxt);
8773 if (RAW != '\'') {
8774 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
8775 } else
8776 NEXT;
8777 } else {
8778 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
8779 }
8780 }
8781 return(version);
8782}
8783
8784/**
8785 * xmlParseEncName:
8786 * @ctxt: an XML parser context
8787 *
8788 * parse the XML encoding name
8789 *
8790 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
8791 *
8792 * Returns the encoding name value or NULL
8793 */
8794xmlChar *
8795xmlParseEncName(xmlParserCtxtPtr ctxt) {
8796 xmlChar *buf = NULL;
8797 int len = 0;
8798 int size = 10;
8799 xmlChar cur;
8800
8801 cur = CUR;
8802 if (((cur >= 'a') && (cur <= 'z')) ||
8803 ((cur >= 'A') && (cur <= 'Z'))) {
8804 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
8805 if (buf == NULL) {
8806 xmlErrMemory(ctxt, NULL);
8807 return(NULL);
8808 }
8809
8810 buf[len++] = cur;
8811 NEXT;
8812 cur = CUR;
8813 while (((cur >= 'a') && (cur <= 'z')) ||
8814 ((cur >= 'A') && (cur <= 'Z')) ||
8815 ((cur >= '0') && (cur <= '9')) ||
8816 (cur == '.') || (cur == '_') ||
8817 (cur == '-')) {
8818 if (len + 1 >= size) {
8819 xmlChar *tmp;
8820
8821 size *= 2;
8822 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
8823 if (tmp == NULL) {
8824 xmlErrMemory(ctxt, NULL);
8825 xmlFree(buf);
8826 return(NULL);
8827 }
8828 buf = tmp;
8829 }
8830 buf[len++] = cur;
8831 NEXT;
8832 cur = CUR;
8833 if (cur == 0) {
8834 SHRINK;
8835 GROW;
8836 cur = CUR;
8837 }
8838 }
8839 buf[len] = 0;
8840 } else {
8841 xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
8842 }
8843 return(buf);
8844}
8845
8846/**
8847 * xmlParseEncodingDecl:
8848 * @ctxt: an XML parser context
8849 *
8850 * parse the XML encoding declaration
8851 *
8852 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
8853 *
8854 * this setups the conversion filters.
8855 *
8856 * Returns the encoding value or NULL
8857 */
8858
8859const xmlChar *
8860xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
8861 xmlChar *encoding = NULL;
8862
8863 SKIP_BLANKS;
8864 if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
8865 SKIP(8);
8866 SKIP_BLANKS;
8867 if (RAW != '=') {
8868 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
8869 return(NULL);
8870 }
8871 NEXT;
8872 SKIP_BLANKS;
8873 if (RAW == '"') {
8874 NEXT;
8875 encoding = xmlParseEncName(ctxt);
8876 if (RAW != '"') {
8877 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
8878 } else
8879 NEXT;
8880 } else if (RAW == '\''){
8881 NEXT;
8882 encoding = xmlParseEncName(ctxt);
8883 if (RAW != '\'') {
8884 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
8885 } else
8886 NEXT;
8887 } else {
8888 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
8889 }
8890 /*
8891 * UTF-16 encoding stwich has already taken place at this stage,
8892 * more over the little-endian/big-endian selection is already done
8893 */
8894 if ((encoding != NULL) &&
8895 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
8896 (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
8897 if (ctxt->encoding != NULL)
8898 xmlFree((xmlChar *) ctxt->encoding);
8899 ctxt->encoding = encoding;
8900 }
8901 /*
8902 * UTF-8 encoding is handled natively
8903 */
8904 else if ((encoding != NULL) &&
8905 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
8906 (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
8907 if (ctxt->encoding != NULL)
8908 xmlFree((xmlChar *) ctxt->encoding);
8909 ctxt->encoding = encoding;
8910 }
8911 else if (encoding != NULL) {
8912 xmlCharEncodingHandlerPtr handler;
8913
8914 if (ctxt->input->encoding != NULL)
8915 xmlFree((xmlChar *) ctxt->input->encoding);
8916 ctxt->input->encoding = encoding;
8917
8918 handler = xmlFindCharEncodingHandler((const char *) encoding);
8919 if (handler != NULL) {
8920 xmlSwitchToEncoding(ctxt, handler);
8921 } else {
8922 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
8923 "Unsupported encoding %s\n", encoding);
8924 return(NULL);
8925 }
8926 }
8927 }
8928 return(encoding);
8929}
8930
8931/**
8932 * xmlParseSDDecl:
8933 * @ctxt: an XML parser context
8934 *
8935 * parse the XML standalone declaration
8936 *
8937 * [32] SDDecl ::= S 'standalone' Eq
8938 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
8939 *
8940 * [ VC: Standalone Document Declaration ]
8941 * TODO The standalone document declaration must have the value "no"
8942 * if any external markup declarations contain declarations of:
8943 * - attributes with default values, if elements to which these
8944 * attributes apply appear in the document without specifications
8945 * of values for these attributes, or
8946 * - entities (other than amp, lt, gt, apos, quot), if references
8947 * to those entities appear in the document, or
8948 * - attributes with values subject to normalization, where the
8949 * attribute appears in the document with a value which will change
8950 * as a result of normalization, or
8951 * - element types with element content, if white space occurs directly
8952 * within any instance of those types.
8953 *
8954 * Returns:
8955 * 1 if standalone="yes"
8956 * 0 if standalone="no"
8957 * -2 if standalone attribute is missing or invalid
8958 * (A standalone value of -2 means that the XML declaration was found,
8959 * but no value was specified for the standalone attribute).
8960 */
8961
8962int
8963xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
8964 int standalone = -2;
8965
8966 SKIP_BLANKS;
8967 if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
8968 SKIP(10);
8969 SKIP_BLANKS;
8970 if (RAW != '=') {
8971 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
8972 return(standalone);
8973 }
8974 NEXT;
8975 SKIP_BLANKS;
8976 if (RAW == '\''){
8977 NEXT;
8978 if ((RAW == 'n') && (NXT(1) == 'o')) {
8979 standalone = 0;
8980 SKIP(2);
8981 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
8982 (NXT(2) == 's')) {
8983 standalone = 1;
8984 SKIP(3);
8985 } else {
8986 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
8987 }
8988 if (RAW != '\'') {
8989 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
8990 } else
8991 NEXT;
8992 } else if (RAW == '"'){
8993 NEXT;
8994 if ((RAW == 'n') && (NXT(1) == 'o')) {
8995 standalone = 0;
8996 SKIP(2);
8997 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
8998 (NXT(2) == 's')) {
8999 standalone = 1;
9000 SKIP(3);
9001 } else {
9002 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
9003 }
9004 if (RAW != '"') {
9005 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
9006 } else
9007 NEXT;
9008 } else {
9009 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
9010 }
9011 }
9012 return(standalone);
9013}
9014
9015/**
9016 * xmlParseXMLDecl:
9017 * @ctxt: an XML parser context
9018 *
9019 * parse an XML declaration header
9020 *
9021 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
9022 */
9023
9024void
9025xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
9026 xmlChar *version;
9027
9028 /*
9029 * This value for standalone indicates that the document has an
9030 * XML declaration but it does not have a standalone attribute.
9031 * It will be overwritten later if a standalone attribute is found.
9032 */
9033 ctxt->input->standalone = -2;
9034
9035 /*
9036 * We know that '<?xml' is here.
9037 */
9038 SKIP(5);
9039
9040 if (!IS_BLANK_CH(RAW)) {
9041 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9042 "Blank needed after '<?xml'\n");
9043 }
9044 SKIP_BLANKS;
9045
9046 /*
9047 * We must have the VersionInfo here.
9048 */
9049 version = xmlParseVersionInfo(ctxt);
9050 if (version == NULL) {
9051 xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
9052 } else {
9053 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
9054 /*
9055 * TODO: Blueberry should be detected here
9056 */
9057 xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
9058 "Unsupported version '%s'\n",
9059 version, NULL);
9060 }
9061 if (ctxt->version != NULL)
9062 xmlFree((void *) ctxt->version);
9063 ctxt->version = version;
9064 }
9065
9066 /*
9067 * We may have the encoding declaration
9068 */
9069 if (!IS_BLANK_CH(RAW)) {
9070 if ((RAW == '?') && (NXT(1) == '>')) {
9071 SKIP(2);
9072 return;
9073 }
9074 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
9075 }
9076 xmlParseEncodingDecl(ctxt);
9077 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
9078 /*
9079 * The XML REC instructs us to stop parsing right here
9080 */
9081 return;
9082 }
9083
9084 /*
9085 * We may have the standalone status.
9086 */
9087 if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
9088 if ((RAW == '?') && (NXT(1) == '>')) {
9089 SKIP(2);
9090 return;
9091 }
9092 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
9093 }
9094 SKIP_BLANKS;
9095 ctxt->input->standalone = xmlParseSDDecl(ctxt);
9096
9097 SKIP_BLANKS;
9098 if ((RAW == '?') && (NXT(1) == '>')) {
9099 SKIP(2);
9100 } else if (RAW == '>') {
9101 /* Deprecated old WD ... */
9102 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
9103 NEXT;
9104 } else {
9105 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
9106 MOVETO_ENDTAG(CUR_PTR);
9107 NEXT;
9108 }
9109}
9110
9111/**
9112 * xmlParseMisc:
9113 * @ctxt: an XML parser context
9114 *
9115 * parse an XML Misc* optional field.
9116 *
9117 * [27] Misc ::= Comment | PI | S
9118 */
9119
9120void
9121xmlParseMisc(xmlParserCtxtPtr ctxt) {
9122 while (((RAW == '<') && (NXT(1) == '?')) ||
9123 (CMP4(CUR_PTR, '<', '!', '-', '-')) ||
9124 IS_BLANK_CH(CUR)) {
9125 if ((RAW == '<') && (NXT(1) == '?')) {
9126 xmlParsePI(ctxt);
9127 } else if (IS_BLANK_CH(CUR)) {
9128 NEXT;
9129 } else
9130 xmlParseComment(ctxt);
9131 }
9132}
9133
9134/**
9135 * xmlParseDocument:
9136 * @ctxt: an XML parser context
9137 *
9138 * parse an XML document (and build a tree if using the standard SAX
9139 * interface).
9140 *
9141 * [1] document ::= prolog element Misc*
9142 *
9143 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
9144 *
9145 * Returns 0, -1 in case of error. the parser context is augmented
9146 * as a result of the parsing.
9147 */
9148
9149int
9150xmlParseDocument(xmlParserCtxtPtr ctxt) {
9151 xmlChar start[4];
9152 xmlCharEncoding enc;
9153
9154 xmlInitParser();
9155
9156 if ((ctxt == NULL) || (ctxt->input == NULL))
9157 return(-1);
9158
9159 GROW;
9160
9161 /*
9162 * SAX: detecting the level.
9163 */
9164 xmlDetectSAX2(ctxt);
9165
9166 /*
9167 * SAX: beginning of the document processing.
9168 */
9169 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9170 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
9171
9172 if ((ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) &&
9173 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
9174 /*
9175 * Get the 4 first bytes and decode the charset
9176 * if enc != XML_CHAR_ENCODING_NONE
9177 * plug some encoding conversion routines.
9178 */
9179 start[0] = RAW;
9180 start[1] = NXT(1);
9181 start[2] = NXT(2);
9182 start[3] = NXT(3);
9183 enc = xmlDetectCharEncoding(&start[0], 4);
9184 if (enc != XML_CHAR_ENCODING_NONE) {
9185 xmlSwitchEncoding(ctxt, enc);
9186 }
9187 }
9188
9189
9190 if (CUR == 0) {
9191 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
9192 }
9193
9194 /*
9195 * Check for the XMLDecl in the Prolog.
9196 */
9197 GROW;
9198 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
9199
9200 /*
9201 * Note that we will switch encoding on the fly.
9202 */
9203 xmlParseXMLDecl(ctxt);
9204 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
9205 /*
9206 * The XML REC instructs us to stop parsing right here
9207 */
9208 return(-1);
9209 }
9210 ctxt->standalone = ctxt->input->standalone;
9211 SKIP_BLANKS;
9212 } else {
9213 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
9214 }
9215 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
9216 ctxt->sax->startDocument(ctxt->userData);
9217
9218 /*
9219 * The Misc part of the Prolog
9220 */
9221 GROW;
9222 xmlParseMisc(ctxt);
9223
9224 /*
9225 * Then possibly doc type declaration(s) and more Misc
9226 * (doctypedecl Misc*)?
9227 */
9228 GROW;
9229 if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
9230
9231 ctxt->inSubset = 1;
9232 xmlParseDocTypeDecl(ctxt);
9233 if (RAW == '[') {
9234 ctxt->instate = XML_PARSER_DTD;
9235 xmlParseInternalSubset(ctxt);
9236 }
9237
9238 /*
9239 * Create and update the external subset.
9240 */
9241 ctxt->inSubset = 2;
9242 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
9243 (!ctxt->disableSAX))
9244 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
9245 ctxt->extSubSystem, ctxt->extSubURI);
9246 ctxt->inSubset = 0;
9247
9248
9249 ctxt->instate = XML_PARSER_PROLOG;
9250 xmlParseMisc(ctxt);
9251 }
9252
9253 /*
9254 * Time to start parsing the tree itself
9255 */
9256 GROW;
9257 if (RAW != '<') {
9258 xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
9259 "Start tag expected, '<' not found\n");
9260 } else {
9261 ctxt->instate = XML_PARSER_CONTENT;
9262 xmlParseElement(ctxt);
9263 ctxt->instate = XML_PARSER_EPILOG;
9264
9265
9266 /*
9267 * The Misc part at the end
9268 */
9269 xmlParseMisc(ctxt);
9270
9271 if (RAW != 0) {
9272 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
9273 }
9274 ctxt->instate = XML_PARSER_EOF;
9275 }
9276
9277 /*
9278 * SAX: end of the document processing.
9279 */
9280 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
9281 ctxt->sax->endDocument(ctxt->userData);
9282
9283 /*
9284 * Remove locally kept entity definitions if the tree was not built
9285 */
9286 if ((ctxt->myDoc != NULL) &&
9287 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
9288 xmlFreeDoc(ctxt->myDoc);
9289 ctxt->myDoc = NULL;
9290 }
9291
9292 if (! ctxt->wellFormed) {
9293 ctxt->valid = 0;
9294 return(-1);
9295 }
9296 return(0);
9297}
9298
9299/**
9300 * xmlParseExtParsedEnt:
9301 * @ctxt: an XML parser context
9302 *
9303 * parse a general parsed entity
9304 * An external general parsed entity is well-formed if it matches the
9305 * production labeled extParsedEnt.
9306 *
9307 * [78] extParsedEnt ::= TextDecl? content
9308 *
9309 * Returns 0, -1 in case of error. the parser context is augmented
9310 * as a result of the parsing.
9311 */
9312
9313int
9314xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
9315 xmlChar start[4];
9316 xmlCharEncoding enc;
9317
9318 if ((ctxt == NULL) || (ctxt->input == NULL))
9319 return(-1);
9320
9321 xmlDefaultSAXHandlerInit();
9322
9323 xmlDetectSAX2(ctxt);
9324
9325 GROW;
9326
9327 /*
9328 * SAX: beginning of the document processing.
9329 */
9330 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9331 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
9332
9333 /*
9334 * Get the 4 first bytes and decode the charset
9335 * if enc != XML_CHAR_ENCODING_NONE
9336 * plug some encoding conversion routines.
9337 */
9338 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
9339 start[0] = RAW;
9340 start[1] = NXT(1);
9341 start[2] = NXT(2);
9342 start[3] = NXT(3);
9343 enc = xmlDetectCharEncoding(start, 4);
9344 if (enc != XML_CHAR_ENCODING_NONE) {
9345 xmlSwitchEncoding(ctxt, enc);
9346 }
9347 }
9348
9349
9350 if (CUR == 0) {
9351 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
9352 }
9353
9354 /*
9355 * Check for the XMLDecl in the Prolog.
9356 */
9357 GROW;
9358 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
9359
9360 /*
9361 * Note that we will switch encoding on the fly.
9362 */
9363 xmlParseXMLDecl(ctxt);
9364 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
9365 /*
9366 * The XML REC instructs us to stop parsing right here
9367 */
9368 return(-1);
9369 }
9370 SKIP_BLANKS;
9371 } else {
9372 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
9373 }
9374 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
9375 ctxt->sax->startDocument(ctxt->userData);
9376
9377 /*
9378 * Doing validity checking on chunk doesn't make sense
9379 */
9380 ctxt->instate = XML_PARSER_CONTENT;
9381 ctxt->validate = 0;
9382 ctxt->loadsubset = 0;
9383 ctxt->depth = 0;
9384
9385 xmlParseContent(ctxt);
9386
9387 if ((RAW == '<') && (NXT(1) == '/')) {
9388 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
9389 } else if (RAW != 0) {
9390 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
9391 }
9392
9393 /*
9394 * SAX: end of the document processing.
9395 */
9396 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
9397 ctxt->sax->endDocument(ctxt->userData);
9398
9399 if (! ctxt->wellFormed) return(-1);
9400 return(0);
9401}
9402
9403#ifdef LIBXML_PUSH_ENABLED
9404/************************************************************************
9405 * *
9406 * Progressive parsing interfaces *
9407 * *
9408 ************************************************************************/
9409
9410/**
9411 * xmlParseLookupSequence:
9412 * @ctxt: an XML parser context
9413 * @first: the first char to lookup
9414 * @next: the next char to lookup or zero
9415 * @third: the next char to lookup or zero
9416 *
9417 * Try to find if a sequence (first, next, third) or just (first next) or
9418 * (first) is available in the input stream.
9419 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
9420 * to avoid rescanning sequences of bytes, it DOES change the state of the
9421 * parser, do not use liberally.
9422 *
9423 * Returns the index to the current parsing point if the full sequence
9424 * is available, -1 otherwise.
9425 */
9426static int
9427xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
9428 xmlChar next, xmlChar third) {
9429 int base, len;
9430 xmlParserInputPtr in;
9431 const xmlChar *buf;
9432
9433 in = ctxt->input;
9434 if (in == NULL) return(-1);
9435 base = in->cur - in->base;
9436 if (base < 0) return(-1);
9437 if (ctxt->checkIndex > base)
9438 base = ctxt->checkIndex;
9439 if (in->buf == NULL) {
9440 buf = in->base;
9441 len = in->length;
9442 } else {
9443 buf = in->buf->buffer->content;
9444 len = in->buf->buffer->use;
9445 }
9446 /* take into account the sequence length */
9447 if (third) len -= 2;
9448 else if (next) len --;
9449 for (;base < len;base++) {
9450 if (buf[base] == first) {
9451 if (third != 0) {
9452 if ((buf[base + 1] != next) ||
9453 (buf[base + 2] != third)) continue;
9454 } else if (next != 0) {
9455 if (buf[base + 1] != next) continue;
9456 }
9457 ctxt->checkIndex = 0;
9458#ifdef DEBUG_PUSH
9459 if (next == 0)
9460 xmlGenericError(xmlGenericErrorContext,
9461 "PP: lookup '%c' found at %d\n",
9462 first, base);
9463 else if (third == 0)
9464 xmlGenericError(xmlGenericErrorContext,
9465 "PP: lookup '%c%c' found at %d\n",
9466 first, next, base);
9467 else
9468 xmlGenericError(xmlGenericErrorContext,
9469 "PP: lookup '%c%c%c' found at %d\n",
9470 first, next, third, base);
9471#endif
9472 return(base - (in->cur - in->base));
9473 }
9474 }
9475 ctxt->checkIndex = base;
9476#ifdef DEBUG_PUSH
9477 if (next == 0)
9478 xmlGenericError(xmlGenericErrorContext,
9479 "PP: lookup '%c' failed\n", first);
9480 else if (third == 0)
9481 xmlGenericError(xmlGenericErrorContext,
9482 "PP: lookup '%c%c' failed\n", first, next);
9483 else
9484 xmlGenericError(xmlGenericErrorContext,
9485 "PP: lookup '%c%c%c' failed\n", first, next, third);
9486#endif
9487 return(-1);
9488}
9489
9490/**
9491 * xmlParseGetLasts:
9492 * @ctxt: an XML parser context
9493 * @lastlt: pointer to store the last '<' from the input
9494 * @lastgt: pointer to store the last '>' from the input
9495 *
9496 * Lookup the last < and > in the current chunk
9497 */
9498static void
9499xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt,
9500 const xmlChar **lastgt) {
9501 const xmlChar *tmp;
9502
9503 if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) {
9504 xmlGenericError(xmlGenericErrorContext,
9505 "Internal error: xmlParseGetLasts\n");
9506 return;
9507 }
9508 if ((ctxt->progressive != 0) && (ctxt->inputNr == 1)) {
9509 tmp = ctxt->input->end;
9510 tmp--;
9511 while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--;
9512 if (tmp < ctxt->input->base) {
9513 *lastlt = NULL;
9514 *lastgt = NULL;
9515 } else {
9516 *lastlt = tmp;
9517 tmp++;
9518 while ((tmp < ctxt->input->end) && (*tmp != '>')) {
9519 if (*tmp == '\'') {
9520 tmp++;
9521 while ((tmp < ctxt->input->end) && (*tmp != '\'')) tmp++;
9522 if (tmp < ctxt->input->end) tmp++;
9523 } else if (*tmp == '"') {
9524 tmp++;
9525 while ((tmp < ctxt->input->end) && (*tmp != '"')) tmp++;
9526 if (tmp < ctxt->input->end) tmp++;
9527 } else
9528 tmp++;
9529 }
9530 if (tmp < ctxt->input->end)
9531 *lastgt = tmp;
9532 else {
9533 tmp = *lastlt;
9534 tmp--;
9535 while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--;
9536 if (tmp >= ctxt->input->base)
9537 *lastgt = tmp;
9538 else
9539 *lastgt = NULL;
9540 }
9541 }
9542 } else {
9543 *lastlt = NULL;
9544 *lastgt = NULL;
9545 }
9546}
9547/**
9548 * xmlCheckCdataPush:
9549 * @cur: pointer to the bock of characters
9550 * @len: length of the block in bytes
9551 *
9552 * Check that the block of characters is okay as SCdata content [20]
9553 *
9554 * Returns the number of bytes to pass if okay, a negative index where an
9555 * UTF-8 error occured otherwise
9556 */
9557static int
9558xmlCheckCdataPush(const xmlChar *utf, int len) {
9559 int ix;
9560 unsigned char c;
9561 int codepoint;
9562
9563 if ((utf == NULL) || (len <= 0))
9564 return(0);
9565
9566 for (ix = 0; ix < len;) { /* string is 0-terminated */
9567 c = utf[ix];
9568 if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */
9569 if (c >= 0x20)
9570 ix++;
9571 else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
9572 ix++;
9573 else
9574 return(-ix);
9575 } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
9576 if (ix + 2 > len) return(ix);
9577 if ((utf[ix+1] & 0xc0 ) != 0x80)
9578 return(-ix);
9579 codepoint = (utf[ix] & 0x1f) << 6;
9580 codepoint |= utf[ix+1] & 0x3f;
9581 if (!xmlIsCharQ(codepoint))
9582 return(-ix);
9583 ix += 2;
9584 } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
9585 if (ix + 3 > len) return(ix);
9586 if (((utf[ix+1] & 0xc0) != 0x80) ||
9587 ((utf[ix+2] & 0xc0) != 0x80))
9588 return(-ix);
9589 codepoint = (utf[ix] & 0xf) << 12;
9590 codepoint |= (utf[ix+1] & 0x3f) << 6;
9591 codepoint |= utf[ix+2] & 0x3f;
9592 if (!xmlIsCharQ(codepoint))
9593 return(-ix);
9594 ix += 3;
9595 } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
9596 if (ix + 4 > len) return(ix);
9597 if (((utf[ix+1] & 0xc0) != 0x80) ||
9598 ((utf[ix+2] & 0xc0) != 0x80) ||
9599 ((utf[ix+3] & 0xc0) != 0x80))
9600 return(-ix);
9601 codepoint = (utf[ix] & 0x7) << 18;
9602 codepoint |= (utf[ix+1] & 0x3f) << 12;
9603 codepoint |= (utf[ix+2] & 0x3f) << 6;
9604 codepoint |= utf[ix+3] & 0x3f;
9605 if (!xmlIsCharQ(codepoint))
9606 return(-ix);
9607 ix += 4;
9608 } else /* unknown encoding */
9609 return(-ix);
9610 }
9611 return(ix);
9612}
9613
9614/**
9615 * xmlParseTryOrFinish:
9616 * @ctxt: an XML parser context
9617 * @terminate: last chunk indicator
9618 *
9619 * Try to progress on parsing
9620 *
9621 * Returns zero if no parsing was possible
9622 */
9623static int
9624xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
9625 int ret = 0;
9626 int avail, tlen;
9627 xmlChar cur, next;
9628 const xmlChar *lastlt, *lastgt;
9629
9630 if (ctxt->input == NULL)
9631 return(0);
9632
9633#ifdef DEBUG_PUSH
9634 switch (ctxt->instate) {
9635 case XML_PARSER_EOF:
9636 xmlGenericError(xmlGenericErrorContext,
9637 "PP: try EOF\n"); break;
9638 case XML_PARSER_START:
9639 xmlGenericError(xmlGenericErrorContext,
9640 "PP: try START\n"); break;
9641 case XML_PARSER_MISC:
9642 xmlGenericError(xmlGenericErrorContext,
9643 "PP: try MISC\n");break;
9644 case XML_PARSER_COMMENT:
9645 xmlGenericError(xmlGenericErrorContext,
9646 "PP: try COMMENT\n");break;
9647 case XML_PARSER_PROLOG:
9648 xmlGenericError(xmlGenericErrorContext,
9649 "PP: try PROLOG\n");break;
9650 case XML_PARSER_START_TAG:
9651 xmlGenericError(xmlGenericErrorContext,
9652 "PP: try START_TAG\n");break;
9653 case XML_PARSER_CONTENT:
9654 xmlGenericError(xmlGenericErrorContext,
9655 "PP: try CONTENT\n");break;
9656 case XML_PARSER_CDATA_SECTION:
9657 xmlGenericError(xmlGenericErrorContext,
9658 "PP: try CDATA_SECTION\n");break;
9659 case XML_PARSER_END_TAG:
9660 xmlGenericError(xmlGenericErrorContext,
9661 "PP: try END_TAG\n");break;
9662 case XML_PARSER_ENTITY_DECL:
9663 xmlGenericError(xmlGenericErrorContext,
9664 "PP: try ENTITY_DECL\n");break;
9665 case XML_PARSER_ENTITY_VALUE:
9666 xmlGenericError(xmlGenericErrorContext,
9667 "PP: try ENTITY_VALUE\n");break;
9668 case XML_PARSER_ATTRIBUTE_VALUE:
9669 xmlGenericError(xmlGenericErrorContext,
9670 "PP: try ATTRIBUTE_VALUE\n");break;
9671 case XML_PARSER_DTD:
9672 xmlGenericError(xmlGenericErrorContext,
9673 "PP: try DTD\n");break;
9674 case XML_PARSER_EPILOG:
9675 xmlGenericError(xmlGenericErrorContext,
9676 "PP: try EPILOG\n");break;
9677 case XML_PARSER_PI:
9678 xmlGenericError(xmlGenericErrorContext,
9679 "PP: try PI\n");break;
9680 case XML_PARSER_IGNORE:
9681 xmlGenericError(xmlGenericErrorContext,
9682 "PP: try IGNORE\n");break;
9683 }
9684#endif
9685
9686 if ((ctxt->input != NULL) &&
9687 (ctxt->input->cur - ctxt->input->base > 4096)) {
9688 xmlSHRINK(ctxt);
9689 ctxt->checkIndex = 0;
9690 }
9691 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
9692
9693 while (1) {
9694 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
9695 return(0);
9696
9697
9698 /*
9699 * Pop-up of finished entities.
9700 */
9701 while ((RAW == 0) && (ctxt->inputNr > 1))
9702 xmlPopInput(ctxt);
9703
9704 if (ctxt->input == NULL) break;
9705 if (ctxt->input->buf == NULL)
9706 avail = ctxt->input->length -
9707 (ctxt->input->cur - ctxt->input->base);
9708 else {
9709 /*
9710 * If we are operating on converted input, try to flush
9711 * remainng chars to avoid them stalling in the non-converted
9712 * buffer.
9713 */
9714 if ((ctxt->input->buf->raw != NULL) &&
9715 (ctxt->input->buf->raw->use > 0)) {
9716 int base = ctxt->input->base -
9717 ctxt->input->buf->buffer->content;
9718 int current = ctxt->input->cur - ctxt->input->base;
9719
9720 xmlParserInputBufferPush(ctxt->input->buf, 0, "");
9721 ctxt->input->base = ctxt->input->buf->buffer->content + base;
9722 ctxt->input->cur = ctxt->input->base + current;
9723 ctxt->input->end =
9724 &ctxt->input->buf->buffer->content[
9725 ctxt->input->buf->buffer->use];
9726 }
9727 avail = ctxt->input->buf->buffer->use -
9728 (ctxt->input->cur - ctxt->input->base);
9729 }
9730 if (avail < 1)
9731 goto done;
9732 switch (ctxt->instate) {
9733 case XML_PARSER_EOF:
9734 /*
9735 * Document parsing is done !
9736 */
9737 goto done;
9738 case XML_PARSER_START:
9739 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
9740 xmlChar start[4];
9741 xmlCharEncoding enc;
9742
9743 /*
9744 * Very first chars read from the document flow.
9745 */
9746 if (avail < 4)
9747 goto done;
9748
9749 /*
9750 * Get the 4 first bytes and decode the charset
9751 * if enc != XML_CHAR_ENCODING_NONE
9752 * plug some encoding conversion routines,
9753 * else xmlSwitchEncoding will set to (default)
9754 * UTF8.
9755 */
9756 start[0] = RAW;
9757 start[1] = NXT(1);
9758 start[2] = NXT(2);
9759 start[3] = NXT(3);
9760 enc = xmlDetectCharEncoding(start, 4);
9761 xmlSwitchEncoding(ctxt, enc);
9762 break;
9763 }
9764
9765 if (avail < 2)
9766 goto done;
9767 cur = ctxt->input->cur[0];
9768 next = ctxt->input->cur[1];
9769 if (cur == 0) {
9770 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9771 ctxt->sax->setDocumentLocator(ctxt->userData,
9772 &xmlDefaultSAXLocator);
9773 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
9774 ctxt->instate = XML_PARSER_EOF;
9775#ifdef DEBUG_PUSH
9776 xmlGenericError(xmlGenericErrorContext,
9777 "PP: entering EOF\n");
9778#endif
9779 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
9780 ctxt->sax->endDocument(ctxt->userData);
9781 goto done;
9782 }
9783 if ((cur == '<') && (next == '?')) {
9784 /* PI or XML decl */
9785 if (avail < 5) return(ret);
9786 if ((!terminate) &&
9787 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9788 return(ret);
9789 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9790 ctxt->sax->setDocumentLocator(ctxt->userData,
9791 &xmlDefaultSAXLocator);
9792 if ((ctxt->input->cur[2] == 'x') &&
9793 (ctxt->input->cur[3] == 'm') &&
9794 (ctxt->input->cur[4] == 'l') &&
9795 (IS_BLANK_CH(ctxt->input->cur[5]))) {
9796 ret += 5;
9797#ifdef DEBUG_PUSH
9798 xmlGenericError(xmlGenericErrorContext,
9799 "PP: Parsing XML Decl\n");
9800#endif
9801 xmlParseXMLDecl(ctxt);
9802 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
9803 /*
9804 * The XML REC instructs us to stop parsing right
9805 * here
9806 */
9807 ctxt->instate = XML_PARSER_EOF;
9808 return(0);
9809 }
9810 ctxt->standalone = ctxt->input->standalone;
9811 if ((ctxt->encoding == NULL) &&
9812 (ctxt->input->encoding != NULL))
9813 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
9814 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
9815 (!ctxt->disableSAX))
9816 ctxt->sax->startDocument(ctxt->userData);
9817 ctxt->instate = XML_PARSER_MISC;
9818#ifdef DEBUG_PUSH
9819 xmlGenericError(xmlGenericErrorContext,
9820 "PP: entering MISC\n");
9821#endif
9822 } else {
9823 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
9824 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
9825 (!ctxt->disableSAX))
9826 ctxt->sax->startDocument(ctxt->userData);
9827 ctxt->instate = XML_PARSER_MISC;
9828#ifdef DEBUG_PUSH
9829 xmlGenericError(xmlGenericErrorContext,
9830 "PP: entering MISC\n");
9831#endif
9832 }
9833 } else {
9834 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9835 ctxt->sax->setDocumentLocator(ctxt->userData,
9836 &xmlDefaultSAXLocator);
9837 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
9838 if (ctxt->version == NULL) {
9839 xmlErrMemory(ctxt, NULL);
9840 break;
9841 }
9842 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
9843 (!ctxt->disableSAX))
9844 ctxt->sax->startDocument(ctxt->userData);
9845 ctxt->instate = XML_PARSER_MISC;
9846#ifdef DEBUG_PUSH
9847 xmlGenericError(xmlGenericErrorContext,
9848 "PP: entering MISC\n");
9849#endif
9850 }
9851 break;
9852 case XML_PARSER_START_TAG: {
9853 const xmlChar *name;
9854 const xmlChar *prefix;
9855 const xmlChar *URI;
9856 int nsNr = ctxt->nsNr;
9857
9858 if ((avail < 2) && (ctxt->inputNr == 1))
9859 goto done;
9860 cur = ctxt->input->cur[0];
9861 if (cur != '<') {
9862 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
9863 ctxt->instate = XML_PARSER_EOF;
9864 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
9865 ctxt->sax->endDocument(ctxt->userData);
9866 goto done;
9867 }
9868 if (!terminate) {
9869 if (ctxt->progressive) {
9870 /* > can be found unescaped in attribute values */
9871 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
9872 goto done;
9873 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
9874 goto done;
9875 }
9876 }
9877 if (ctxt->spaceNr == 0)
9878 spacePush(ctxt, -1);
9879 else if (*ctxt->space == -2)
9880 spacePush(ctxt, -1);
9881 else
9882 spacePush(ctxt, *ctxt->space);
9883#ifdef LIBXML_SAX1_ENABLED
9884 if (ctxt->sax2)
9885#endif /* LIBXML_SAX1_ENABLED */
9886 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
9887#ifdef LIBXML_SAX1_ENABLED
9888 else
9889 name = xmlParseStartTag(ctxt);
9890#endif /* LIBXML_SAX1_ENABLED */
9891 if (name == NULL) {
9892 spacePop(ctxt);
9893 ctxt->instate = XML_PARSER_EOF;
9894 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
9895 ctxt->sax->endDocument(ctxt->userData);
9896 goto done;
9897 }
9898#ifdef LIBXML_VALID_ENABLED
9899 /*
9900 * [ VC: Root Element Type ]
9901 * The Name in the document type declaration must match
9902 * the element type of the root element.
9903 */
9904 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
9905 ctxt->node && (ctxt->node == ctxt->myDoc->children))
9906 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
9907#endif /* LIBXML_VALID_ENABLED */
9908
9909 /*
9910 * Check for an Empty Element.
9911 */
9912 if ((RAW == '/') && (NXT(1) == '>')) {
9913 SKIP(2);
9914
9915 if (ctxt->sax2) {
9916 if ((ctxt->sax != NULL) &&
9917 (ctxt->sax->endElementNs != NULL) &&
9918 (!ctxt->disableSAX))
9919 ctxt->sax->endElementNs(ctxt->userData, name,
9920 prefix, URI);
9921 if (ctxt->nsNr - nsNr > 0)
9922 nsPop(ctxt, ctxt->nsNr - nsNr);
9923#ifdef LIBXML_SAX1_ENABLED
9924 } else {
9925 if ((ctxt->sax != NULL) &&
9926 (ctxt->sax->endElement != NULL) &&
9927 (!ctxt->disableSAX))
9928 ctxt->sax->endElement(ctxt->userData, name);
9929#endif /* LIBXML_SAX1_ENABLED */
9930 }
9931 spacePop(ctxt);
9932 if (ctxt->nameNr == 0) {
9933 ctxt->instate = XML_PARSER_EPILOG;
9934 } else {
9935 ctxt->instate = XML_PARSER_CONTENT;
9936 }
9937 break;
9938 }
9939 if (RAW == '>') {
9940 NEXT;
9941 } else {
9942 xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
9943 "Couldn't find end of Start Tag %s\n",
9944 name);
9945 nodePop(ctxt);
9946 spacePop(ctxt);
9947 }
9948 if (ctxt->sax2)
9949 nameNsPush(ctxt, name, prefix, URI, ctxt->nsNr - nsNr);
9950#ifdef LIBXML_SAX1_ENABLED
9951 else
9952 namePush(ctxt, name);
9953#endif /* LIBXML_SAX1_ENABLED */
9954
9955 ctxt->instate = XML_PARSER_CONTENT;
9956 break;
9957 }
9958 case XML_PARSER_CONTENT: {
9959 const xmlChar *test;
9960 unsigned int cons;
9961 if ((avail < 2) && (ctxt->inputNr == 1))
9962 goto done;
9963 cur = ctxt->input->cur[0];
9964 next = ctxt->input->cur[1];
9965
9966 test = CUR_PTR;
9967 cons = ctxt->input->consumed;
9968 if ((cur == '<') && (next == '/')) {
9969 ctxt->instate = XML_PARSER_END_TAG;
9970 break;
9971 } else if ((cur == '<') && (next == '?')) {
9972 if ((!terminate) &&
9973 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9974 goto done;
9975 xmlParsePI(ctxt);
9976 } else if ((cur == '<') && (next != '!')) {
9977 ctxt->instate = XML_PARSER_START_TAG;
9978 break;
9979 } else if ((cur == '<') && (next == '!') &&
9980 (ctxt->input->cur[2] == '-') &&
9981 (ctxt->input->cur[3] == '-')) {
9982 int term;
9983
9984 if (avail < 4)
9985 goto done;
9986 ctxt->input->cur += 4;
9987 term = xmlParseLookupSequence(ctxt, '-', '-', '>');
9988 ctxt->input->cur -= 4;
9989 if ((!terminate) && (term < 0))
9990 goto done;
9991 xmlParseComment(ctxt);
9992 ctxt->instate = XML_PARSER_CONTENT;
9993 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
9994 (ctxt->input->cur[2] == '[') &&
9995 (ctxt->input->cur[3] == 'C') &&
9996 (ctxt->input->cur[4] == 'D') &&
9997 (ctxt->input->cur[5] == 'A') &&
9998 (ctxt->input->cur[6] == 'T') &&
9999 (ctxt->input->cur[7] == 'A') &&
10000 (ctxt->input->cur[8] == '[')) {
10001 SKIP(9);
10002 ctxt->instate = XML_PARSER_CDATA_SECTION;
10003 break;
10004 } else if ((cur == '<') && (next == '!') &&
10005 (avail < 9)) {
10006 goto done;
10007 } else if (cur == '&') {
10008 if ((!terminate) &&
10009 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
10010 goto done;
10011 xmlParseReference(ctxt);
10012 } else {
10013 /* TODO Avoid the extra copy, handle directly !!! */
10014 /*
10015 * Goal of the following test is:
10016 * - minimize calls to the SAX 'character' callback
10017 * when they are mergeable
10018 * - handle an problem for isBlank when we only parse
10019 * a sequence of blank chars and the next one is
10020 * not available to check against '<' presence.
10021 * - tries to homogenize the differences in SAX
10022 * callbacks between the push and pull versions
10023 * of the parser.
10024 */
10025 if ((ctxt->inputNr == 1) &&
10026 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
10027 if (!terminate) {
10028 if (ctxt->progressive) {
10029 if ((lastlt == NULL) ||
10030 (ctxt->input->cur > lastlt))
10031 goto done;
10032 } else if (xmlParseLookupSequence(ctxt,
10033 '<', 0, 0) < 0) {
10034 goto done;
10035 }
10036 }
10037 }
10038 ctxt->checkIndex = 0;
10039 xmlParseCharData(ctxt, 0);
10040 }
10041 /*
10042 * Pop-up of finished entities.
10043 */
10044 while ((RAW == 0) && (ctxt->inputNr > 1))
10045 xmlPopInput(ctxt);
10046 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
10047 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
10048 "detected an error in element content\n");
10049 ctxt->instate = XML_PARSER_EOF;
10050 break;
10051 }
10052 break;
10053 }
10054 case XML_PARSER_END_TAG:
10055 if (avail < 2)
10056 goto done;
10057 if (!terminate) {
10058 if (ctxt->progressive) {
10059 /* > can be found unescaped in attribute values */
10060 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
10061 goto done;
10062 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
10063 goto done;
10064 }
10065 }
10066 if (ctxt->sax2) {
10067 xmlParseEndTag2(ctxt,
10068 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 3],
10069 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 2], 0,
10070 (int) (long) ctxt->pushTab[ctxt->nameNr * 3 - 1], 0);
10071 nameNsPop(ctxt);
10072 }
10073#ifdef LIBXML_SAX1_ENABLED
10074 else
10075 xmlParseEndTag1(ctxt, 0);
10076#endif /* LIBXML_SAX1_ENABLED */
10077 if (ctxt->nameNr == 0) {
10078 ctxt->instate = XML_PARSER_EPILOG;
10079 } else {
10080 ctxt->instate = XML_PARSER_CONTENT;
10081 }
10082 break;
10083 case XML_PARSER_CDATA_SECTION: {
10084 /*
10085 * The Push mode need to have the SAX callback for
10086 * cdataBlock merge back contiguous callbacks.
10087 */
10088 int base;
10089
10090 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
10091 if (base < 0) {
10092 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
10093 int tmp;
10094
10095 tmp = xmlCheckCdataPush(ctxt->input->cur,
10096 XML_PARSER_BIG_BUFFER_SIZE);
10097 if (tmp < 0) {
10098 tmp = -tmp;
10099 ctxt->input->cur += tmp;
10100 goto encoding_error;
10101 }
10102 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
10103 if (ctxt->sax->cdataBlock != NULL)
10104 ctxt->sax->cdataBlock(ctxt->userData,
10105 ctxt->input->cur, tmp);
10106 else if (ctxt->sax->characters != NULL)
10107 ctxt->sax->characters(ctxt->userData,
10108 ctxt->input->cur, tmp);
10109 }
10110 SKIPL(tmp);
10111 ctxt->checkIndex = 0;
10112 }
10113 goto done;
10114 } else {
10115 int tmp;
10116
10117 tmp = xmlCheckCdataPush(ctxt->input->cur, base);
10118 if ((tmp < 0) || (tmp != base)) {
10119 tmp = -tmp;
10120 ctxt->input->cur += tmp;
10121 goto encoding_error;
10122 }
10123 if ((ctxt->sax != NULL) && (base > 0) &&
10124 (!ctxt->disableSAX)) {
10125 if (ctxt->sax->cdataBlock != NULL)
10126 ctxt->sax->cdataBlock(ctxt->userData,
10127 ctxt->input->cur, base);
10128 else if (ctxt->sax->characters != NULL)
10129 ctxt->sax->characters(ctxt->userData,
10130 ctxt->input->cur, base);
10131 }
10132 SKIPL(base + 3);
10133 ctxt->checkIndex = 0;
10134 ctxt->instate = XML_PARSER_CONTENT;
10135#ifdef DEBUG_PUSH
10136 xmlGenericError(xmlGenericErrorContext,
10137 "PP: entering CONTENT\n");
10138#endif
10139 }
10140 break;
10141 }
10142 case XML_PARSER_MISC:
10143 SKIP_BLANKS;
10144 if (ctxt->input->buf == NULL)
10145 avail = ctxt->input->length -
10146 (ctxt->input->cur - ctxt->input->base);
10147 else
10148 avail = ctxt->input->buf->buffer->use -
10149 (ctxt->input->cur - ctxt->input->base);
10150 if (avail < 2)
10151 goto done;
10152 cur = ctxt->input->cur[0];
10153 next = ctxt->input->cur[1];
10154 if ((cur == '<') && (next == '?')) {
10155 if ((!terminate) &&
10156 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10157 goto done;
10158#ifdef DEBUG_PUSH
10159 xmlGenericError(xmlGenericErrorContext,
10160 "PP: Parsing PI\n");
10161#endif
10162 xmlParsePI(ctxt);
10163 ctxt->checkIndex = 0;
10164 } else if ((cur == '<') && (next == '!') &&
10165 (ctxt->input->cur[2] == '-') &&
10166 (ctxt->input->cur[3] == '-')) {
10167 if ((!terminate) &&
10168 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
10169 goto done;
10170#ifdef DEBUG_PUSH
10171 xmlGenericError(xmlGenericErrorContext,
10172 "PP: Parsing Comment\n");
10173#endif
10174 xmlParseComment(ctxt);
10175 ctxt->instate = XML_PARSER_MISC;
10176 ctxt->checkIndex = 0;
10177 } else if ((cur == '<') && (next == '!') &&
10178 (ctxt->input->cur[2] == 'D') &&
10179 (ctxt->input->cur[3] == 'O') &&
10180 (ctxt->input->cur[4] == 'C') &&
10181 (ctxt->input->cur[5] == 'T') &&
10182 (ctxt->input->cur[6] == 'Y') &&
10183 (ctxt->input->cur[7] == 'P') &&
10184 (ctxt->input->cur[8] == 'E')) {
10185 if ((!terminate) &&
10186 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
10187 goto done;
10188#ifdef DEBUG_PUSH
10189 xmlGenericError(xmlGenericErrorContext,
10190 "PP: Parsing internal subset\n");
10191#endif
10192 ctxt->inSubset = 1;
10193 xmlParseDocTypeDecl(ctxt);
10194 if (RAW == '[') {
10195 ctxt->instate = XML_PARSER_DTD;
10196#ifdef DEBUG_PUSH
10197 xmlGenericError(xmlGenericErrorContext,
10198 "PP: entering DTD\n");
10199#endif
10200 } else {
10201 /*
10202 * Create and update the external subset.
10203 */
10204 ctxt->inSubset = 2;
10205 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
10206 (ctxt->sax->externalSubset != NULL))
10207 ctxt->sax->externalSubset(ctxt->userData,
10208 ctxt->intSubName, ctxt->extSubSystem,
10209 ctxt->extSubURI);
10210 ctxt->inSubset = 0;
10211 ctxt->instate = XML_PARSER_PROLOG;
10212#ifdef DEBUG_PUSH
10213 xmlGenericError(xmlGenericErrorContext,
10214 "PP: entering PROLOG\n");
10215#endif
10216 }
10217 } else if ((cur == '<') && (next == '!') &&
10218 (avail < 9)) {
10219 goto done;
10220 } else {
10221 ctxt->instate = XML_PARSER_START_TAG;
10222 ctxt->progressive = 1;
10223 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
10224#ifdef DEBUG_PUSH
10225 xmlGenericError(xmlGenericErrorContext,
10226 "PP: entering START_TAG\n");
10227#endif
10228 }
10229 break;
10230 case XML_PARSER_PROLOG:
10231 SKIP_BLANKS;
10232 if (ctxt->input->buf == NULL)
10233 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
10234 else
10235 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
10236 if (avail < 2)
10237 goto done;
10238 cur = ctxt->input->cur[0];
10239 next = ctxt->input->cur[1];
10240 if ((cur == '<') && (next == '?')) {
10241 if ((!terminate) &&
10242 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10243 goto done;
10244#ifdef DEBUG_PUSH
10245 xmlGenericError(xmlGenericErrorContext,
10246 "PP: Parsing PI\n");
10247#endif
10248 xmlParsePI(ctxt);
10249 } else if ((cur == '<') && (next == '!') &&
10250 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
10251 if ((!terminate) &&
10252 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
10253 goto done;
10254#ifdef DEBUG_PUSH
10255 xmlGenericError(xmlGenericErrorContext,
10256 "PP: Parsing Comment\n");
10257#endif
10258 xmlParseComment(ctxt);
10259 ctxt->instate = XML_PARSER_PROLOG;
10260 } else if ((cur == '<') && (next == '!') &&
10261 (avail < 4)) {
10262 goto done;
10263 } else {
10264 ctxt->instate = XML_PARSER_START_TAG;
10265 if (ctxt->progressive == 0)
10266 ctxt->progressive = 1;
10267 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
10268#ifdef DEBUG_PUSH
10269 xmlGenericError(xmlGenericErrorContext,
10270 "PP: entering START_TAG\n");
10271#endif
10272 }
10273 break;
10274 case XML_PARSER_EPILOG:
10275 SKIP_BLANKS;
10276 if (ctxt->input->buf == NULL)
10277 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
10278 else
10279 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
10280 if (avail < 2)
10281 goto done;
10282 cur = ctxt->input->cur[0];
10283 next = ctxt->input->cur[1];
10284 if ((cur == '<') && (next == '?')) {
10285 if ((!terminate) &&
10286 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10287 goto done;
10288#ifdef DEBUG_PUSH
10289 xmlGenericError(xmlGenericErrorContext,
10290 "PP: Parsing PI\n");
10291#endif
10292 xmlParsePI(ctxt);
10293 ctxt->instate = XML_PARSER_EPILOG;
10294 } else if ((cur == '<') && (next == '!') &&
10295 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
10296 if ((!terminate) &&
10297 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
10298 goto done;
10299#ifdef DEBUG_PUSH
10300 xmlGenericError(xmlGenericErrorContext,
10301 "PP: Parsing Comment\n");
10302#endif
10303 xmlParseComment(ctxt);
10304 ctxt->instate = XML_PARSER_EPILOG;
10305 } else if ((cur == '<') && (next == '!') &&
10306 (avail < 4)) {
10307 goto done;
10308 } else {
10309 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
10310 ctxt->instate = XML_PARSER_EOF;
10311#ifdef DEBUG_PUSH
10312 xmlGenericError(xmlGenericErrorContext,
10313 "PP: entering EOF\n");
10314#endif
10315 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10316 ctxt->sax->endDocument(ctxt->userData);
10317 goto done;
10318 }
10319 break;
10320 case XML_PARSER_DTD: {
10321 /*
10322 * Sorry but progressive parsing of the internal subset
10323 * is not expected to be supported. We first check that
10324 * the full content of the internal subset is available and
10325 * the parsing is launched only at that point.
10326 * Internal subset ends up with "']' S? '>'" in an unescaped
10327 * section and not in a ']]>' sequence which are conditional
10328 * sections (whoever argued to keep that crap in XML deserve
10329 * a place in hell !).
10330 */
10331 int base, i;
10332 xmlChar *buf;
10333 xmlChar quote = 0;
10334
10335 base = ctxt->input->cur - ctxt->input->base;
10336 if (base < 0) return(0);
10337 if (ctxt->checkIndex > base)
10338 base = ctxt->checkIndex;
10339 buf = ctxt->input->buf->buffer->content;
10340 for (;(unsigned int) base < ctxt->input->buf->buffer->use;
10341 base++) {
10342 if (quote != 0) {
10343 if (buf[base] == quote)
10344 quote = 0;
10345 continue;
10346 }
10347 if ((quote == 0) && (buf[base] == '<')) {
10348 int found = 0;
10349 /* special handling of comments */
10350 if (((unsigned int) base + 4 <
10351 ctxt->input->buf->buffer->use) &&
10352 (buf[base + 1] == '!') &&
10353 (buf[base + 2] == '-') &&
10354 (buf[base + 3] == '-')) {
10355 for (;(unsigned int) base + 3 <
10356 ctxt->input->buf->buffer->use; base++) {
10357 if ((buf[base] == '-') &&
10358 (buf[base + 1] == '-') &&
10359 (buf[base + 2] == '>')) {
10360 found = 1;
10361 base += 2;
10362 break;
10363 }
10364 }
10365 if (!found) {
10366#if 0
10367 fprintf(stderr, "unfinished comment\n");
10368#endif
10369 break; /* for */
10370 }
10371 continue;
10372 }
10373 }
10374 if (buf[base] == '"') {
10375 quote = '"';
10376 continue;
10377 }
10378 if (buf[base] == '\'') {
10379 quote = '\'';
10380 continue;
10381 }
10382 if (buf[base] == ']') {
10383#if 0
10384 fprintf(stderr, "%c%c%c%c: ", buf[base],
10385 buf[base + 1], buf[base + 2], buf[base + 3]);
10386#endif
10387 if ((unsigned int) base +1 >=
10388 ctxt->input->buf->buffer->use)
10389 break;
10390 if (buf[base + 1] == ']') {
10391 /* conditional crap, skip both ']' ! */
10392 base++;
10393 continue;
10394 }
10395 for (i = 1;
10396 (unsigned int) base + i < ctxt->input->buf->buffer->use;
10397 i++) {
10398 if (buf[base + i] == '>') {
10399#if 0
10400 fprintf(stderr, "found\n");
10401#endif
10402 goto found_end_int_subset;
10403 }
10404 if (!IS_BLANK_CH(buf[base + i])) {
10405#if 0
10406 fprintf(stderr, "not found\n");
10407#endif
10408 goto not_end_of_int_subset;
10409 }
10410 }
10411#if 0
10412 fprintf(stderr, "end of stream\n");
10413#endif
10414 break;
10415
10416 }
10417not_end_of_int_subset:
10418 continue; /* for */
10419 }
10420 /*
10421 * We didn't found the end of the Internal subset
10422 */
10423#ifdef DEBUG_PUSH
10424 if (next == 0)
10425 xmlGenericError(xmlGenericErrorContext,
10426 "PP: lookup of int subset end filed\n");
10427#endif
10428 goto done;
10429
10430found_end_int_subset:
10431 xmlParseInternalSubset(ctxt);
10432 ctxt->inSubset = 2;
10433 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
10434 (ctxt->sax->externalSubset != NULL))
10435 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10436 ctxt->extSubSystem, ctxt->extSubURI);
10437 ctxt->inSubset = 0;
10438 ctxt->instate = XML_PARSER_PROLOG;
10439 ctxt->checkIndex = 0;
10440#ifdef DEBUG_PUSH
10441 xmlGenericError(xmlGenericErrorContext,
10442 "PP: entering PROLOG\n");
10443#endif
10444 break;
10445 }
10446 case XML_PARSER_COMMENT:
10447 xmlGenericError(xmlGenericErrorContext,
10448 "PP: internal error, state == COMMENT\n");
10449 ctxt->instate = XML_PARSER_CONTENT;
10450#ifdef DEBUG_PUSH
10451 xmlGenericError(xmlGenericErrorContext,
10452 "PP: entering CONTENT\n");
10453#endif
10454 break;
10455 case XML_PARSER_IGNORE:
10456 xmlGenericError(xmlGenericErrorContext,
10457 "PP: internal error, state == IGNORE");
10458 ctxt->instate = XML_PARSER_DTD;
10459#ifdef DEBUG_PUSH
10460 xmlGenericError(xmlGenericErrorContext,
10461 "PP: entering DTD\n");
10462#endif
10463 break;
10464 case XML_PARSER_PI:
10465 xmlGenericError(xmlGenericErrorContext,
10466 "PP: internal error, state == PI\n");
10467 ctxt->instate = XML_PARSER_CONTENT;
10468#ifdef DEBUG_PUSH
10469 xmlGenericError(xmlGenericErrorContext,
10470 "PP: entering CONTENT\n");
10471#endif
10472 break;
10473 case XML_PARSER_ENTITY_DECL:
10474 xmlGenericError(xmlGenericErrorContext,
10475 "PP: internal error, state == ENTITY_DECL\n");
10476 ctxt->instate = XML_PARSER_DTD;
10477#ifdef DEBUG_PUSH
10478 xmlGenericError(xmlGenericErrorContext,
10479 "PP: entering DTD\n");
10480#endif
10481 break;
10482 case XML_PARSER_ENTITY_VALUE:
10483 xmlGenericError(xmlGenericErrorContext,
10484 "PP: internal error, state == ENTITY_VALUE\n");
10485 ctxt->instate = XML_PARSER_CONTENT;
10486#ifdef DEBUG_PUSH
10487 xmlGenericError(xmlGenericErrorContext,
10488 "PP: entering DTD\n");
10489#endif
10490 break;
10491 case XML_PARSER_ATTRIBUTE_VALUE:
10492 xmlGenericError(xmlGenericErrorContext,
10493 "PP: internal error, state == ATTRIBUTE_VALUE\n");
10494 ctxt->instate = XML_PARSER_START_TAG;
10495#ifdef DEBUG_PUSH
10496 xmlGenericError(xmlGenericErrorContext,
10497 "PP: entering START_TAG\n");
10498#endif
10499 break;
10500 case XML_PARSER_SYSTEM_LITERAL:
10501 xmlGenericError(xmlGenericErrorContext,
10502 "PP: internal error, state == SYSTEM_LITERAL\n");
10503 ctxt->instate = XML_PARSER_START_TAG;
10504#ifdef DEBUG_PUSH
10505 xmlGenericError(xmlGenericErrorContext,
10506 "PP: entering START_TAG\n");
10507#endif
10508 break;
10509 case XML_PARSER_PUBLIC_LITERAL:
10510 xmlGenericError(xmlGenericErrorContext,
10511 "PP: internal error, state == PUBLIC_LITERAL\n");
10512 ctxt->instate = XML_PARSER_START_TAG;
10513#ifdef DEBUG_PUSH
10514 xmlGenericError(xmlGenericErrorContext,
10515 "PP: entering START_TAG\n");
10516#endif
10517 break;
10518 }
10519 }
10520done:
10521#ifdef DEBUG_PUSH
10522 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
10523#endif
10524 return(ret);
10525encoding_error:
10526 {
10527 char buffer[150];
10528
10529 snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
10530 ctxt->input->cur[0], ctxt->input->cur[1],
10531 ctxt->input->cur[2], ctxt->input->cur[3]);
10532 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
10533 "Input is not proper UTF-8, indicate encoding !\n%s",
10534 BAD_CAST buffer, NULL);
10535 }
10536 return(0);
10537}
10538
10539/**
10540 * xmlParseChunk:
10541 * @ctxt: an XML parser context
10542 * @chunk: an char array
10543 * @size: the size in byte of the chunk
10544 * @terminate: last chunk indicator
10545 *
10546 * Parse a Chunk of memory
10547 *
10548 * Returns zero if no error, the xmlParserErrors otherwise.
10549 */
10550int
10551xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
10552 int terminate) {
10553 int end_in_lf = 0;
10554
10555 if (ctxt == NULL)
10556 return(XML_ERR_INTERNAL_ERROR);
10557 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
10558 return(ctxt->errNo);
10559 if (ctxt->instate == XML_PARSER_START)
10560 xmlDetectSAX2(ctxt);
10561 if ((size > 0) && (chunk != NULL) && (!terminate) &&
10562 (chunk[size - 1] == '\r')) {
10563 end_in_lf = 1;
10564 size--;
10565 }
10566 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
10567 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
10568 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
10569 int cur = ctxt->input->cur - ctxt->input->base;
10570 int res;
10571
10572 res =xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
10573 if (res < 0) {
10574 ctxt->errNo = XML_PARSER_EOF;
10575 ctxt->disableSAX = 1;
10576 return (XML_PARSER_EOF);
10577 }
10578 ctxt->input->base = ctxt->input->buf->buffer->content + base;
10579 ctxt->input->cur = ctxt->input->base + cur;
10580 ctxt->input->end =
10581 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
10582#ifdef DEBUG_PUSH
10583 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
10584#endif
10585
10586 } else if (ctxt->instate != XML_PARSER_EOF) {
10587 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
10588 xmlParserInputBufferPtr in = ctxt->input->buf;
10589 if ((in->encoder != NULL) && (in->buffer != NULL) &&
10590 (in->raw != NULL)) {
10591 int nbchars;
10592
10593 nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
10594 if (nbchars < 0) {
10595 /* TODO 2.6.0 */
10596 xmlGenericError(xmlGenericErrorContext,
10597 "xmlParseChunk: encoder error\n");
10598 return(XML_ERR_INVALID_ENCODING);
10599 }
10600 }
10601 }
10602 }
10603 xmlParseTryOrFinish(ctxt, terminate);
10604 if ((end_in_lf == 1) && (ctxt->input != NULL) &&
10605 (ctxt->input->buf != NULL)) {
10606 xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
10607 }
10608 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
10609 return(ctxt->errNo);
10610 if (terminate) {
10611 /*
10612 * Check for termination
10613 */
10614 int avail = 0;
10615
10616 if (ctxt->input != NULL) {
10617 if (ctxt->input->buf == NULL)
10618 avail = ctxt->input->length -
10619 (ctxt->input->cur - ctxt->input->base);
10620 else
10621 avail = ctxt->input->buf->buffer->use -
10622 (ctxt->input->cur - ctxt->input->base);
10623 }
10624
10625 if ((ctxt->instate != XML_PARSER_EOF) &&
10626 (ctxt->instate != XML_PARSER_EPILOG)) {
10627 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
10628 }
10629 if ((ctxt->instate == XML_PARSER_EPILOG) && (avail > 0)) {
10630 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
10631 }
10632 if (ctxt->instate != XML_PARSER_EOF) {
10633 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10634 ctxt->sax->endDocument(ctxt->userData);
10635 }
10636 ctxt->instate = XML_PARSER_EOF;
10637 }
10638 return((xmlParserErrors) ctxt->errNo);
10639}
10640
10641/************************************************************************
10642 * *
10643 * I/O front end functions to the parser *
10644 * *
10645 ************************************************************************/
10646
10647/**
10648 * xmlCreatePushParserCtxt:
10649 * @sax: a SAX handler
10650 * @user_data: The user data returned on SAX callbacks
10651 * @chunk: a pointer to an array of chars
10652 * @size: number of chars in the array
10653 * @filename: an optional file name or URI
10654 *
10655 * Create a parser context for using the XML parser in push mode.
10656 * If @buffer and @size are non-NULL, the data is used to detect
10657 * the encoding. The remaining characters will be parsed so they
10658 * don't need to be fed in again through xmlParseChunk.
10659 * To allow content encoding detection, @size should be >= 4
10660 * The value of @filename is used for fetching external entities
10661 * and error/warning reports.
10662 *
10663 * Returns the new parser context or NULL
10664 */
10665
10666xmlParserCtxtPtr
10667xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
10668 const char *chunk, int size, const char *filename) {
10669 xmlParserCtxtPtr ctxt;
10670 xmlParserInputPtr inputStream;
10671 xmlParserInputBufferPtr buf;
10672 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
10673
10674 /*
10675 * plug some encoding conversion routines
10676 */
10677 if ((chunk != NULL) && (size >= 4))
10678 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
10679
10680 buf = xmlAllocParserInputBuffer(enc);
10681 if (buf == NULL) return(NULL);
10682
10683 ctxt = xmlNewParserCtxt();
10684 if (ctxt == NULL) {
10685 xmlErrMemory(NULL, "creating parser: out of memory\n");
10686 xmlFreeParserInputBuffer(buf);
10687 return(NULL);
10688 }
10689 ctxt->dictNames = 1;
10690 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * sizeof(xmlChar *));
10691 if (ctxt->pushTab == NULL) {
10692 xmlErrMemory(ctxt, NULL);
10693 xmlFreeParserInputBuffer(buf);
10694 xmlFreeParserCtxt(ctxt);
10695 return(NULL);
10696 }
10697 if (sax != NULL) {
10698#ifdef LIBXML_SAX1_ENABLED
10699 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
10700#endif /* LIBXML_SAX1_ENABLED */
10701 xmlFree(ctxt->sax);
10702 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
10703 if (ctxt->sax == NULL) {
10704 xmlErrMemory(ctxt, NULL);
10705 xmlFreeParserInputBuffer(buf);
10706 xmlFreeParserCtxt(ctxt);
10707 return(NULL);
10708 }
10709 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
10710 if (sax->initialized == XML_SAX2_MAGIC)
10711 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
10712 else
10713 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
10714 if (user_data != NULL)
10715 ctxt->userData = user_data;
10716 }
10717 if (filename == NULL) {
10718 ctxt->directory = NULL;
10719 } else {
10720 ctxt->directory = xmlParserGetDirectory(filename);
10721 }
10722
10723 inputStream = xmlNewInputStream(ctxt);
10724 if (inputStream == NULL) {
10725 xmlFreeParserCtxt(ctxt);
10726 xmlFreeParserInputBuffer(buf);
10727 return(NULL);
10728 }
10729
10730 if (filename == NULL)
10731 inputStream->filename = NULL;
10732 else {
10733 inputStream->filename = (char *)
10734 xmlCanonicPath((const xmlChar *) filename);
10735 if (inputStream->filename == NULL) {
10736 xmlFreeParserCtxt(ctxt);
10737 xmlFreeParserInputBuffer(buf);
10738 return(NULL);
10739 }
10740 }
10741 inputStream->buf = buf;
10742 inputStream->base = inputStream->buf->buffer->content;
10743 inputStream->cur = inputStream->buf->buffer->content;
10744 inputStream->end =
10745 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
10746
10747 inputPush(ctxt, inputStream);
10748
10749 /*
10750 * If the caller didn't provide an initial 'chunk' for determining
10751 * the encoding, we set the context to XML_CHAR_ENCODING_NONE so
10752 * that it can be automatically determined later
10753 */
10754 if ((size == 0) || (chunk == NULL)) {
10755 ctxt->charset = XML_CHAR_ENCODING_NONE;
10756 } else if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) {
10757 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
10758 int cur = ctxt->input->cur - ctxt->input->base;
10759
10760 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
10761
10762 ctxt->input->base = ctxt->input->buf->buffer->content + base;
10763 ctxt->input->cur = ctxt->input->base + cur;
10764 ctxt->input->end =
10765 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
10766#ifdef DEBUG_PUSH
10767 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
10768#endif
10769 }
10770
10771 if (enc != XML_CHAR_ENCODING_NONE) {
10772 xmlSwitchEncoding(ctxt, enc);
10773 }
10774
10775 return(ctxt);
10776}
10777#endif /* LIBXML_PUSH_ENABLED */
10778
10779/**
10780 * xmlStopParser:
10781 * @ctxt: an XML parser context
10782 *
10783 * Blocks further parser processing
10784 */
10785void
10786xmlStopParser(xmlParserCtxtPtr ctxt) {
10787 if (ctxt == NULL)
10788 return;
10789 ctxt->instate = XML_PARSER_EOF;
10790 ctxt->disableSAX = 1;
10791 if (ctxt->input != NULL) {
10792 ctxt->input->cur = BAD_CAST"";
10793 ctxt->input->base = ctxt->input->cur;
10794 }
10795}
10796
10797/**
10798 * xmlCreateIOParserCtxt:
10799 * @sax: a SAX handler
10800 * @user_data: The user data returned on SAX callbacks
10801 * @ioread: an I/O read function
10802 * @ioclose: an I/O close function
10803 * @ioctx: an I/O handler
10804 * @enc: the charset encoding if known
10805 *
10806 * Create a parser context for using the XML parser with an existing
10807 * I/O stream
10808 *
10809 * Returns the new parser context or NULL
10810 */
10811xmlParserCtxtPtr
10812xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
10813 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
10814 void *ioctx, xmlCharEncoding enc) {
10815 xmlParserCtxtPtr ctxt;
10816 xmlParserInputPtr inputStream;
10817 xmlParserInputBufferPtr buf;
10818
10819 if (ioread == NULL) return(NULL);
10820
10821 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
10822 if (buf == NULL) return(NULL);
10823
10824 ctxt = xmlNewParserCtxt();
10825 if (ctxt == NULL) {
10826 xmlFreeParserInputBuffer(buf);
10827 return(NULL);
10828 }
10829 if (sax != NULL) {
10830#ifdef LIBXML_SAX1_ENABLED
10831 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
10832#endif /* LIBXML_SAX1_ENABLED */
10833 xmlFree(ctxt->sax);
10834 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
10835 if (ctxt->sax == NULL) {
10836 xmlErrMemory(ctxt, NULL);
10837 xmlFreeParserCtxt(ctxt);
10838 return(NULL);
10839 }
10840 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
10841 if (sax->initialized == XML_SAX2_MAGIC)
10842 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
10843 else
10844 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
10845 if (user_data != NULL)
10846 ctxt->userData = user_data;
10847 }
10848
10849 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
10850 if (inputStream == NULL) {
10851 xmlFreeParserCtxt(ctxt);
10852 return(NULL);
10853 }
10854 inputPush(ctxt, inputStream);
10855
10856 return(ctxt);
10857}
10858
10859#ifdef LIBXML_VALID_ENABLED
10860/************************************************************************
10861 * *
10862 * Front ends when parsing a DTD *
10863 * *
10864 ************************************************************************/
10865
10866/**
10867 * xmlIOParseDTD:
10868 * @sax: the SAX handler block or NULL
10869 * @input: an Input Buffer
10870 * @enc: the charset encoding if known
10871 *
10872 * Load and parse a DTD
10873 *
10874 * Returns the resulting xmlDtdPtr or NULL in case of error.
10875 * @input will be freed by the function in any case.
10876 */
10877
10878xmlDtdPtr
10879xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
10880 xmlCharEncoding enc) {
10881 xmlDtdPtr ret = NULL;
10882 xmlParserCtxtPtr ctxt;
10883 xmlParserInputPtr pinput = NULL;
10884 xmlChar start[4];
10885
10886 if (input == NULL)
10887 return(NULL);
10888
10889 ctxt = xmlNewParserCtxt();
10890 if (ctxt == NULL) {
10891 xmlFreeParserInputBuffer(input);
10892 return(NULL);
10893 }
10894
10895 /*
10896 * Set-up the SAX context
10897 */
10898 if (sax != NULL) {
10899 if (ctxt->sax != NULL)
10900 xmlFree(ctxt->sax);
10901 ctxt->sax = sax;
10902 ctxt->userData = ctxt;
10903 }
10904 xmlDetectSAX2(ctxt);
10905
10906 /*
10907 * generate a parser input from the I/O handler
10908 */
10909
10910 pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
10911 if (pinput == NULL) {
10912 if (sax != NULL) ctxt->sax = NULL;
10913 xmlFreeParserInputBuffer(input);
10914 xmlFreeParserCtxt(ctxt);
10915 return(NULL);
10916 }
10917
10918 /*
10919 * plug some encoding conversion routines here.
10920 */
10921 xmlPushInput(ctxt, pinput);
10922 if (enc != XML_CHAR_ENCODING_NONE) {
10923 xmlSwitchEncoding(ctxt, enc);
10924 }
10925
10926 pinput->filename = NULL;
10927 pinput->line = 1;
10928 pinput->col = 1;
10929 pinput->base = ctxt->input->cur;
10930 pinput->cur = ctxt->input->cur;
10931 pinput->free = NULL;
10932
10933 /*
10934 * let's parse that entity knowing it's an external subset.
10935 */
10936 ctxt->inSubset = 2;
10937 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
10938 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
10939 BAD_CAST "none", BAD_CAST "none");
10940
10941 if ((enc == XML_CHAR_ENCODING_NONE) &&
10942 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
10943 /*
10944 * Get the 4 first bytes and decode the charset
10945 * if enc != XML_CHAR_ENCODING_NONE
10946 * plug some encoding conversion routines.
10947 */
10948 start[0] = RAW;
10949 start[1] = NXT(1);
10950 start[2] = NXT(2);
10951 start[3] = NXT(3);
10952 enc = xmlDetectCharEncoding(start, 4);
10953 if (enc != XML_CHAR_ENCODING_NONE) {
10954 xmlSwitchEncoding(ctxt, enc);
10955 }
10956 }
10957
10958 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
10959
10960 if (ctxt->myDoc != NULL) {
10961 if (ctxt->wellFormed) {
10962 ret = ctxt->myDoc->extSubset;
10963 ctxt->myDoc->extSubset = NULL;
10964 if (ret != NULL) {
10965 xmlNodePtr tmp;
10966
10967 ret->doc = NULL;
10968 tmp = ret->children;
10969 while (tmp != NULL) {
10970 tmp->doc = NULL;
10971 tmp = tmp->next;
10972 }
10973 }
10974 } else {
10975 ret = NULL;
10976 }
10977 xmlFreeDoc(ctxt->myDoc);
10978 ctxt->myDoc = NULL;
10979 }
10980 if (sax != NULL) ctxt->sax = NULL;
10981 xmlFreeParserCtxt(ctxt);
10982
10983 return(ret);
10984}
10985
10986/**
10987 * xmlSAXParseDTD:
10988 * @sax: the SAX handler block
10989 * @ExternalID: a NAME* containing the External ID of the DTD
10990 * @SystemID: a NAME* containing the URL to the DTD
10991 *
10992 * Load and parse an external subset.
10993 *
10994 * Returns the resulting xmlDtdPtr or NULL in case of error.
10995 */
10996
10997xmlDtdPtr
10998xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
10999 const xmlChar *SystemID) {
11000 xmlDtdPtr ret = NULL;
11001 xmlParserCtxtPtr ctxt;
11002 xmlParserInputPtr input = NULL;
11003 xmlCharEncoding enc;
11004 xmlChar* systemIdCanonic;
11005
11006 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
11007
11008 ctxt = xmlNewParserCtxt();
11009 if (ctxt == NULL) {
11010 return(NULL);
11011 }
11012
11013 /*
11014 * Set-up the SAX context
11015 */
11016 if (sax != NULL) {
11017 if (ctxt->sax != NULL)
11018 xmlFree(ctxt->sax);
11019 ctxt->sax = sax;
11020 ctxt->userData = ctxt;
11021 }
11022
11023 /*
11024 * Canonicalise the system ID
11025 */
11026 systemIdCanonic = xmlCanonicPath(SystemID);
11027 if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
11028 xmlFreeParserCtxt(ctxt);
11029 return(NULL);
11030 }
11031
11032 /*
11033 * Ask the Entity resolver to load the damn thing
11034 */
11035
11036 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
11037 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
11038 systemIdCanonic);
11039 if (input == NULL) {
11040 if (sax != NULL) ctxt->sax = NULL;
11041 xmlFreeParserCtxt(ctxt);
11042 if (systemIdCanonic != NULL)
11043 xmlFree(systemIdCanonic);
11044 return(NULL);
11045 }
11046
11047 /*
11048 * plug some encoding conversion routines here.
11049 */
11050 xmlPushInput(ctxt, input);
11051 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
11052 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
11053 xmlSwitchEncoding(ctxt, enc);
11054 }
11055
11056 if (input->filename == NULL)
11057 input->filename = (char *) systemIdCanonic;
11058 else
11059 xmlFree(systemIdCanonic);
11060 input->line = 1;
11061 input->col = 1;
11062 input->base = ctxt->input->cur;
11063 input->cur = ctxt->input->cur;
11064 input->free = NULL;
11065
11066 /*
11067 * let's parse that entity knowing it's an external subset.
11068 */
11069 ctxt->inSubset = 2;
11070 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
11071 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
11072 ExternalID, SystemID);
11073 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
11074
11075 if (ctxt->myDoc != NULL) {
11076 if (ctxt->wellFormed) {
11077 ret = ctxt->myDoc->extSubset;
11078 ctxt->myDoc->extSubset = NULL;
11079 if (ret != NULL) {
11080 xmlNodePtr tmp;
11081
11082 ret->doc = NULL;
11083 tmp = ret->children;
11084 while (tmp != NULL) {
11085 tmp->doc = NULL;
11086 tmp = tmp->next;
11087 }
11088 }
11089 } else {
11090 ret = NULL;
11091 }
11092 xmlFreeDoc(ctxt->myDoc);
11093 ctxt->myDoc = NULL;
11094 }
11095 if (sax != NULL) ctxt->sax = NULL;
11096 xmlFreeParserCtxt(ctxt);
11097
11098 return(ret);
11099}
11100
11101
11102/**
11103 * xmlParseDTD:
11104 * @ExternalID: a NAME* containing the External ID of the DTD
11105 * @SystemID: a NAME* containing the URL to the DTD
11106 *
11107 * Load and parse an external subset.
11108 *
11109 * Returns the resulting xmlDtdPtr or NULL in case of error.
11110 */
11111
11112xmlDtdPtr
11113xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
11114 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
11115}
11116#endif /* LIBXML_VALID_ENABLED */
11117
11118/************************************************************************
11119 * *
11120 * Front ends when parsing an Entity *
11121 * *
11122 ************************************************************************/
11123
11124/**
11125 * xmlParseCtxtExternalEntity:
11126 * @ctx: the existing parsing context
11127 * @URL: the URL for the entity to load
11128 * @ID: the System ID for the entity to load
11129 * @lst: the return value for the set of parsed nodes
11130 *
11131 * Parse an external general entity within an existing parsing context
11132 * An external general parsed entity is well-formed if it matches the
11133 * production labeled extParsedEnt.
11134 *
11135 * [78] extParsedEnt ::= TextDecl? content
11136 *
11137 * Returns 0 if the entity is well formed, -1 in case of args problem and
11138 * the parser error code otherwise
11139 */
11140
11141int
11142xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
11143 const xmlChar *ID, xmlNodePtr *lst) {
11144 xmlParserCtxtPtr ctxt;
11145 xmlDocPtr newDoc;
11146 xmlNodePtr newRoot;
11147 xmlSAXHandlerPtr oldsax = NULL;
11148 int ret = 0;
11149 xmlChar start[4];
11150 xmlCharEncoding enc;
11151 xmlParserInputPtr inputStream;
11152 char *directory = NULL;
11153
11154 if (ctx == NULL) return(-1);
11155
11156 if (ctx->depth > 40) {
11157 return(XML_ERR_ENTITY_LOOP);
11158 }
11159
11160 if (lst != NULL)
11161 *lst = NULL;
11162 if ((URL == NULL) && (ID == NULL))
11163 return(-1);
11164 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
11165 return(-1);
11166
11167 ctxt = xmlNewParserCtxt();
11168 if (ctxt == NULL) {
11169 return(-1);
11170 }
11171
11172 ctxt->userData = ctxt;
11173 ctxt->_private = ctx->_private;
11174
11175 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
11176 if (inputStream == NULL) {
11177 xmlFreeParserCtxt(ctxt);
11178 return(-1);
11179 }
11180
11181 inputPush(ctxt, inputStream);
11182
11183 if ((ctxt->directory == NULL) && (directory == NULL))
11184 directory = xmlParserGetDirectory((char *)URL);
11185 if ((ctxt->directory == NULL) && (directory != NULL))
11186 ctxt->directory = directory;
11187
11188 oldsax = ctxt->sax;
11189 ctxt->sax = ctx->sax;
11190 xmlDetectSAX2(ctxt);
11191 newDoc = xmlNewDoc(BAD_CAST "1.0");
11192 if (newDoc == NULL) {
11193 xmlFreeParserCtxt(ctxt);
11194 return(-1);
11195 }
11196 if (ctx->myDoc->dict) {
11197 newDoc->dict = ctx->myDoc->dict;
11198 xmlDictReference(newDoc->dict);
11199 }
11200 if (ctx->myDoc != NULL) {
11201 newDoc->intSubset = ctx->myDoc->intSubset;
11202 newDoc->extSubset = ctx->myDoc->extSubset;
11203 }
11204 if (ctx->myDoc->URL != NULL) {
11205 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
11206 }
11207 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
11208 if (newRoot == NULL) {
11209 ctxt->sax = oldsax;
11210 xmlFreeParserCtxt(ctxt);
11211 newDoc->intSubset = NULL;
11212 newDoc->extSubset = NULL;
11213 xmlFreeDoc(newDoc);
11214 return(-1);
11215 }
11216 xmlAddChild((xmlNodePtr) newDoc, newRoot);
11217 nodePush(ctxt, newDoc->children);
11218 if (ctx->myDoc == NULL) {
11219 ctxt->myDoc = newDoc;
11220 } else {
11221 ctxt->myDoc = ctx->myDoc;
11222 newDoc->children->doc = ctx->myDoc;
11223 }
11224
11225 /*
11226 * Get the 4 first bytes and decode the charset
11227 * if enc != XML_CHAR_ENCODING_NONE
11228 * plug some encoding conversion routines.
11229 */
11230 GROW
11231 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
11232 start[0] = RAW;
11233 start[1] = NXT(1);
11234 start[2] = NXT(2);
11235 start[3] = NXT(3);
11236 enc = xmlDetectCharEncoding(start, 4);
11237 if (enc != XML_CHAR_ENCODING_NONE) {
11238 xmlSwitchEncoding(ctxt, enc);
11239 }
11240 }
11241
11242 /*
11243 * Parse a possible text declaration first
11244 */
11245 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
11246 xmlParseTextDecl(ctxt);
11247 }
11248
11249 /*
11250 * Doing validity checking on chunk doesn't make sense
11251 */
11252 ctxt->instate = XML_PARSER_CONTENT;
11253 ctxt->validate = ctx->validate;
11254 ctxt->valid = ctx->valid;
11255 ctxt->loadsubset = ctx->loadsubset;
11256 ctxt->depth = ctx->depth + 1;
11257 ctxt->replaceEntities = ctx->replaceEntities;
11258 if (ctxt->validate) {
11259 ctxt->vctxt.error = ctx->vctxt.error;
11260 ctxt->vctxt.warning = ctx->vctxt.warning;
11261 } else {
11262 ctxt->vctxt.error = NULL;
11263 ctxt->vctxt.warning = NULL;
11264 }
11265 ctxt->vctxt.nodeTab = NULL;
11266 ctxt->vctxt.nodeNr = 0;
11267 ctxt->vctxt.nodeMax = 0;
11268 ctxt->vctxt.node = NULL;
11269 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
11270 ctxt->dict = ctx->dict;
11271 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
11272 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
11273 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
11274 ctxt->dictNames = ctx->dictNames;
11275 ctxt->attsDefault = ctx->attsDefault;
11276 ctxt->attsSpecial = ctx->attsSpecial;
11277 ctxt->linenumbers = ctx->linenumbers;
11278
11279 xmlParseContent(ctxt);
11280
11281 ctx->validate = ctxt->validate;
11282 ctx->valid = ctxt->valid;
11283 if ((RAW == '<') && (NXT(1) == '/')) {
11284 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
11285 } else if (RAW != 0) {
11286 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
11287 }
11288 if (ctxt->node != newDoc->children) {
11289 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
11290 }
11291
11292 if (!ctxt->wellFormed) {
11293 if (ctxt->errNo == 0)
11294 ret = 1;
11295 else
11296 ret = ctxt->errNo;
11297 } else {
11298 if (lst != NULL) {
11299 xmlNodePtr cur;
11300
11301 /*
11302 * Return the newly created nodeset after unlinking it from
11303 * they pseudo parent.
11304 */
11305 cur = newDoc->children->children;
11306 *lst = cur;
11307 while (cur != NULL) {
11308 cur->parent = NULL;
11309 cur = cur->next;
11310 }
11311 newDoc->children->children = NULL;
11312 }
11313 ret = 0;
11314 }
11315 ctxt->sax = oldsax;
11316 ctxt->dict = NULL;
11317 ctxt->attsDefault = NULL;
11318 ctxt->attsSpecial = NULL;
11319 xmlFreeParserCtxt(ctxt);
11320 newDoc->intSubset = NULL;
11321 newDoc->extSubset = NULL;
11322 xmlFreeDoc(newDoc);
11323
11324 return(ret);
11325}
11326
11327/**
11328 * xmlParseExternalEntityPrivate:
11329 * @doc: the document the chunk pertains to
11330 * @oldctxt: the previous parser context if available
11331 * @sax: the SAX handler bloc (possibly NULL)
11332 * @user_data: The user data returned on SAX callbacks (possibly NULL)
11333 * @depth: Used for loop detection, use 0
11334 * @URL: the URL for the entity to load
11335 * @ID: the System ID for the entity to load
11336 * @list: the return value for the set of parsed nodes
11337 *
11338 * Private version of xmlParseExternalEntity()
11339 *
11340 * Returns 0 if the entity is well formed, -1 in case of args problem and
11341 * the parser error code otherwise
11342 */
11343
11344static xmlParserErrors
11345xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
11346 xmlSAXHandlerPtr sax,
11347 void *user_data, int depth, const xmlChar *URL,
11348 const xmlChar *ID, xmlNodePtr *list) {
11349 xmlParserCtxtPtr ctxt;
11350 xmlDocPtr newDoc;
11351 xmlNodePtr newRoot;
11352 xmlSAXHandlerPtr oldsax = NULL;
11353 xmlParserErrors ret = XML_ERR_OK;
11354 xmlChar start[4];
11355 xmlCharEncoding enc;
11356
11357 if (depth > 40) {
11358 return(XML_ERR_ENTITY_LOOP);
11359 }
11360
11361
11362
11363 if (list != NULL)
11364 *list = NULL;
11365 if ((URL == NULL) && (ID == NULL))
11366 return(XML_ERR_INTERNAL_ERROR);
11367 if (doc == NULL)
11368 return(XML_ERR_INTERNAL_ERROR);
11369
11370
11371 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
11372 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
11373 ctxt->userData = ctxt;
11374 if (oldctxt != NULL) {
11375 ctxt->_private = oldctxt->_private;
11376 ctxt->loadsubset = oldctxt->loadsubset;
11377 ctxt->validate = oldctxt->validate;
11378 ctxt->external = oldctxt->external;
11379 ctxt->record_info = oldctxt->record_info;
11380 ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
11381 ctxt->node_seq.length = oldctxt->node_seq.length;
11382 ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
11383 } else {
11384 /*
11385 * Doing validity checking on chunk without context
11386 * doesn't make sense
11387 */
11388 ctxt->_private = NULL;
11389 ctxt->validate = 0;
11390 ctxt->external = 2;
11391 ctxt->loadsubset = 0;
11392 }
11393 if (sax != NULL) {
11394 oldsax = ctxt->sax;
11395 ctxt->sax = sax;
11396 if (user_data != NULL)
11397 ctxt->userData = user_data;
11398 }
11399 xmlDetectSAX2(ctxt);
11400 newDoc = xmlNewDoc(BAD_CAST "1.0");
11401 if (newDoc == NULL) {
11402 ctxt->node_seq.maximum = 0;
11403 ctxt->node_seq.length = 0;
11404 ctxt->node_seq.buffer = NULL;
11405 xmlFreeParserCtxt(ctxt);
11406 return(XML_ERR_INTERNAL_ERROR);
11407 }
11408 newDoc->intSubset = doc->intSubset;
11409 newDoc->extSubset = doc->extSubset;
11410 newDoc->dict = doc->dict;
11411 xmlDictReference(newDoc->dict);
11412
11413 if (doc->URL != NULL) {
11414 newDoc->URL = xmlStrdup(doc->URL);
11415 }
11416 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
11417 if (newRoot == NULL) {
11418 if (sax != NULL)
11419 ctxt->sax = oldsax;
11420 ctxt->node_seq.maximum = 0;
11421 ctxt->node_seq.length = 0;
11422 ctxt->node_seq.buffer = NULL;
11423 xmlFreeParserCtxt(ctxt);
11424 newDoc->intSubset = NULL;
11425 newDoc->extSubset = NULL;
11426 xmlFreeDoc(newDoc);
11427 return(XML_ERR_INTERNAL_ERROR);
11428 }
11429 xmlAddChild((xmlNodePtr) newDoc, newRoot);
11430 nodePush(ctxt, newDoc->children);
11431 ctxt->myDoc = doc;
11432 newRoot->doc = doc;
11433
11434 /*
11435 * Get the 4 first bytes and decode the charset
11436 * if enc != XML_CHAR_ENCODING_NONE
11437 * plug some encoding conversion routines.
11438 */
11439 GROW;
11440 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
11441 start[0] = RAW;
11442 start[1] = NXT(1);
11443 start[2] = NXT(2);
11444 start[3] = NXT(3);
11445 enc = xmlDetectCharEncoding(start, 4);
11446 if (enc != XML_CHAR_ENCODING_NONE) {
11447 xmlSwitchEncoding(ctxt, enc);
11448 }
11449 }
11450
11451 /*
11452 * Parse a possible text declaration first
11453 */
11454 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
11455 xmlParseTextDecl(ctxt);
11456 }
11457
11458 ctxt->instate = XML_PARSER_CONTENT;
11459 ctxt->depth = depth;
11460
11461 xmlParseContent(ctxt);
11462
11463 if ((RAW == '<') && (NXT(1) == '/')) {
11464 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
11465 } else if (RAW != 0) {
11466 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
11467 }
11468 if (ctxt->node != newDoc->children) {
11469 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
11470 }
11471
11472 if (!ctxt->wellFormed) {
11473 if (ctxt->errNo == 0)
11474 ret = XML_ERR_INTERNAL_ERROR;
11475 else
11476 ret = (xmlParserErrors)ctxt->errNo;
11477 } else {
11478 if (list != NULL) {
11479 xmlNodePtr cur;
11480
11481 /*
11482 * Return the newly created nodeset after unlinking it from
11483 * they pseudo parent.
11484 */
11485 cur = newDoc->children->children;
11486 *list = cur;
11487 while (cur != NULL) {
11488 cur->parent = NULL;
11489 cur = cur->next;
11490 }
11491 newDoc->children->children = NULL;
11492 }
11493 ret = XML_ERR_OK;
11494 }
11495 if (sax != NULL)
11496 ctxt->sax = oldsax;
11497 oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
11498 oldctxt->node_seq.length = ctxt->node_seq.length;
11499 oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
11500 ctxt->node_seq.maximum = 0;
11501 ctxt->node_seq.length = 0;
11502 ctxt->node_seq.buffer = NULL;
11503 xmlFreeParserCtxt(ctxt);
11504 newDoc->intSubset = NULL;
11505 newDoc->extSubset = NULL;
11506 xmlFreeDoc(newDoc);
11507
11508 return(ret);
11509}
11510
11511#ifdef LIBXML_SAX1_ENABLED
11512/**
11513 * xmlParseExternalEntity:
11514 * @doc: the document the chunk pertains to
11515 * @sax: the SAX handler bloc (possibly NULL)
11516 * @user_data: The user data returned on SAX callbacks (possibly NULL)
11517 * @depth: Used for loop detection, use 0
11518 * @URL: the URL for the entity to load
11519 * @ID: the System ID for the entity to load
11520 * @lst: the return value for the set of parsed nodes
11521 *
11522 * Parse an external general entity
11523 * An external general parsed entity is well-formed if it matches the
11524 * production labeled extParsedEnt.
11525 *
11526 * [78] extParsedEnt ::= TextDecl? content
11527 *
11528 * Returns 0 if the entity is well formed, -1 in case of args problem and
11529 * the parser error code otherwise
11530 */
11531
11532int
11533xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
11534 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
11535 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
11536 ID, lst));
11537}
11538
11539/**
11540 * xmlParseBalancedChunkMemory:
11541 * @doc: the document the chunk pertains to
11542 * @sax: the SAX handler bloc (possibly NULL)
11543 * @user_data: The user data returned on SAX callbacks (possibly NULL)
11544 * @depth: Used for loop detection, use 0
11545 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
11546 * @lst: the return value for the set of parsed nodes
11547 *
11548 * Parse a well-balanced chunk of an XML document
11549 * called by the parser
11550 * The allowed sequence for the Well Balanced Chunk is the one defined by
11551 * the content production in the XML grammar:
11552 *
11553 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
11554 *
11555 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
11556 * the parser error code otherwise
11557 */
11558
11559int
11560xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
11561 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
11562 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
11563 depth, string, lst, 0 );
11564}
11565#endif /* LIBXML_SAX1_ENABLED */
11566
11567/**
11568 * xmlParseBalancedChunkMemoryInternal:
11569 * @oldctxt: the existing parsing context
11570 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
11571 * @user_data: the user data field for the parser context
11572 * @lst: the return value for the set of parsed nodes
11573 *
11574 *
11575 * Parse a well-balanced chunk of an XML document
11576 * called by the parser
11577 * The allowed sequence for the Well Balanced Chunk is the one defined by
11578 * the content production in the XML grammar:
11579 *
11580 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
11581 *
11582 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
11583 * error code otherwise
11584 *
11585 * In case recover is set to 1, the nodelist will not be empty even if
11586 * the parsed chunk is not well balanced.
11587 */
11588static xmlParserErrors
11589xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
11590 const xmlChar *string, void *user_data, xmlNodePtr *lst) {
11591 xmlParserCtxtPtr ctxt;
11592 xmlDocPtr newDoc = NULL;
11593 xmlNodePtr newRoot;
11594 xmlSAXHandlerPtr oldsax = NULL;
11595 xmlNodePtr content = NULL;
11596 xmlNodePtr last = NULL;
11597 int size;
11598 xmlParserErrors ret = XML_ERR_OK;
11599
11600 if (oldctxt->depth > 40) {
11601 return(XML_ERR_ENTITY_LOOP);
11602 }
11603
11604
11605 if (lst != NULL)
11606 *lst = NULL;
11607 if (string == NULL)
11608 return(XML_ERR_INTERNAL_ERROR);
11609
11610 size = xmlStrlen(string);
11611
11612 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
11613 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
11614 if (user_data != NULL)
11615 ctxt->userData = user_data;
11616 else
11617 ctxt->userData = ctxt;
11618 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
11619 ctxt->dict = oldctxt->dict;
11620 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
11621 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
11622 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
11623
11624 oldsax = ctxt->sax;
11625 ctxt->sax = oldctxt->sax;
11626 xmlDetectSAX2(ctxt);
11627 ctxt->replaceEntities = oldctxt->replaceEntities;
11628 ctxt->options = oldctxt->options;
11629
11630 ctxt->_private = oldctxt->_private;
11631 if (oldctxt->myDoc == NULL) {
11632 newDoc = xmlNewDoc(BAD_CAST "1.0");
11633 if (newDoc == NULL) {
11634 ctxt->sax = oldsax;
11635 ctxt->dict = NULL;
11636 xmlFreeParserCtxt(ctxt);
11637 return(XML_ERR_INTERNAL_ERROR);
11638 }
11639 newDoc->dict = ctxt->dict;
11640 xmlDictReference(newDoc->dict);
11641 ctxt->myDoc = newDoc;
11642 } else {
11643 ctxt->myDoc = oldctxt->myDoc;
11644 content = ctxt->myDoc->children;
11645 last = ctxt->myDoc->last;
11646 }
11647 newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL);
11648 if (newRoot == NULL) {
11649 ctxt->sax = oldsax;
11650 ctxt->dict = NULL;
11651 xmlFreeParserCtxt(ctxt);
11652 if (newDoc != NULL) {
11653 xmlFreeDoc(newDoc);
11654 }
11655 return(XML_ERR_INTERNAL_ERROR);
11656 }
11657 ctxt->myDoc->children = NULL;
11658 ctxt->myDoc->last = NULL;
11659 xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot);
11660 nodePush(ctxt, ctxt->myDoc->children);
11661 ctxt->instate = XML_PARSER_CONTENT;
11662 ctxt->depth = oldctxt->depth + 1;
11663
11664 ctxt->validate = 0;
11665 ctxt->loadsubset = oldctxt->loadsubset;
11666 if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
11667 /*
11668 * ID/IDREF registration will be done in xmlValidateElement below
11669 */
11670 ctxt->loadsubset |= XML_SKIP_IDS;
11671 }
11672 ctxt->dictNames = oldctxt->dictNames;
11673 ctxt->attsDefault = oldctxt->attsDefault;
11674 ctxt->attsSpecial = oldctxt->attsSpecial;
11675
11676 xmlParseContent(ctxt);
11677 if ((RAW == '<') && (NXT(1) == '/')) {
11678 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
11679 } else if (RAW != 0) {
11680 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
11681 }
11682 if (ctxt->node != ctxt->myDoc->children) {
11683 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
11684 }
11685
11686 if (!ctxt->wellFormed) {
11687 if (ctxt->errNo == 0)
11688 ret = XML_ERR_INTERNAL_ERROR;
11689 else
11690 ret = (xmlParserErrors)ctxt->errNo;
11691 } else {
11692 ret = XML_ERR_OK;
11693 }
11694
11695 if ((lst != NULL) && (ret == XML_ERR_OK)) {
11696 xmlNodePtr cur;
11697
11698 /*
11699 * Return the newly created nodeset after unlinking it from
11700 * they pseudo parent.
11701 */
11702 cur = ctxt->myDoc->children->children;
11703 *lst = cur;
11704 while (cur != NULL) {
11705#ifdef LIBXML_VALID_ENABLED
11706 if ((oldctxt->validate) && (oldctxt->wellFormed) &&
11707 (oldctxt->myDoc) && (oldctxt->myDoc->intSubset) &&
11708 (cur->type == XML_ELEMENT_NODE)) {
11709 oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
11710 oldctxt->myDoc, cur);
11711 }
11712#endif /* LIBXML_VALID_ENABLED */
11713 cur->parent = NULL;
11714 cur = cur->next;
11715 }
11716 ctxt->myDoc->children->children = NULL;
11717 }
11718 if (ctxt->myDoc != NULL) {
11719 xmlFreeNode(ctxt->myDoc->children);
11720 ctxt->myDoc->children = content;
11721 ctxt->myDoc->last = last;
11722 }
11723
11724 ctxt->sax = oldsax;
11725 ctxt->dict = NULL;
11726 ctxt->attsDefault = NULL;
11727 ctxt->attsSpecial = NULL;
11728 xmlFreeParserCtxt(ctxt);
11729 if (newDoc != NULL) {
11730 xmlFreeDoc(newDoc);
11731 }
11732
11733 return(ret);
11734}
11735
11736/**
11737 * xmlParseInNodeContext:
11738 * @node: the context node
11739 * @data: the input string
11740 * @datalen: the input string length in bytes
11741 * @options: a combination of xmlParserOption
11742 * @lst: the return value for the set of parsed nodes
11743 *
11744 * Parse a well-balanced chunk of an XML document
11745 * within the context (DTD, namespaces, etc ...) of the given node.
11746 *
11747 * The allowed sequence for the data is a Well Balanced Chunk defined by
11748 * the content production in the XML grammar:
11749 *
11750 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
11751 *
11752 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
11753 * error code otherwise
11754 */
11755xmlParserErrors
11756xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
11757 int options, xmlNodePtr *lst) {
11758#ifdef SAX2
11759 xmlParserCtxtPtr ctxt;
11760 xmlDocPtr doc = NULL;
11761 xmlNodePtr fake, cur;
11762 int nsnr = 0;
11763
11764 xmlParserErrors ret = XML_ERR_OK;
11765
11766 /*
11767 * check all input parameters, grab the document
11768 */
11769 if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
11770 return(XML_ERR_INTERNAL_ERROR);
11771 switch (node->type) {
11772 case XML_ELEMENT_NODE:
11773 case XML_ATTRIBUTE_NODE:
11774 case XML_TEXT_NODE:
11775 case XML_CDATA_SECTION_NODE:
11776 case XML_ENTITY_REF_NODE:
11777 case XML_PI_NODE:
11778 case XML_COMMENT_NODE:
11779 case XML_DOCUMENT_NODE:
11780 case XML_HTML_DOCUMENT_NODE:
11781 break;
11782 default:
11783 return(XML_ERR_INTERNAL_ERROR);
11784
11785 }
11786 while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
11787 (node->type != XML_DOCUMENT_NODE) &&
11788 (node->type != XML_HTML_DOCUMENT_NODE))
11789 node = node->parent;
11790 if (node == NULL)
11791 return(XML_ERR_INTERNAL_ERROR);
11792 if (node->type == XML_ELEMENT_NODE)
11793 doc = node->doc;
11794 else
11795 doc = (xmlDocPtr) node;
11796 if (doc == NULL)
11797 return(XML_ERR_INTERNAL_ERROR);
11798
11799 /*
11800 * allocate a context and set-up everything not related to the
11801 * node position in the tree
11802 */
11803 if (doc->type == XML_DOCUMENT_NODE)
11804 ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
11805#ifdef LIBXML_HTML_ENABLED
11806 else if (doc->type == XML_HTML_DOCUMENT_NODE)
11807 ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
11808#endif
11809 else
11810 return(XML_ERR_INTERNAL_ERROR);
11811
11812 if (ctxt == NULL)
11813 return(XML_ERR_NO_MEMORY);
11814 fake = xmlNewComment(NULL);
11815 if (fake == NULL) {
11816 xmlFreeParserCtxt(ctxt);
11817 return(XML_ERR_NO_MEMORY);
11818 }
11819 xmlAddChild(node, fake);
11820
11821 /*
11822 * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
11823 * We need a dictionary for xmlDetectSAX2, so if there's no doc dict
11824 * we must wait until the last moment to free the original one.
11825 */
11826 if (doc->dict != NULL) {
11827 if (ctxt->dict != NULL)
11828 xmlDictFree(ctxt->dict);
11829 ctxt->dict = doc->dict;
11830 } else
11831 options |= XML_PARSE_NODICT;
11832
11833 xmlCtxtUseOptions(ctxt, options);
11834 xmlDetectSAX2(ctxt);
11835 ctxt->myDoc = doc;
11836
11837 if (node->type == XML_ELEMENT_NODE) {
11838 nodePush(ctxt, node);
11839 /*
11840 * initialize the SAX2 namespaces stack
11841 */
11842 cur = node;
11843 while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
11844 xmlNsPtr ns = cur->nsDef;
11845 const xmlChar *iprefix, *ihref;
11846
11847 while (ns != NULL) {
11848 if (ctxt->dict) {
11849 iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1);
11850 ihref = xmlDictLookup(ctxt->dict, ns->href, -1);
11851 } else {
11852 iprefix = ns->prefix;
11853 ihref = ns->href;
11854 }
11855
11856 if (xmlGetNamespace(ctxt, iprefix) == NULL) {
11857 nsPush(ctxt, iprefix, ihref);
11858 nsnr++;
11859 }
11860 ns = ns->next;
11861 }
11862 cur = cur->parent;
11863 }
11864 ctxt->instate = XML_PARSER_CONTENT;
11865 }
11866
11867 if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
11868 /*
11869 * ID/IDREF registration will be done in xmlValidateElement below
11870 */
11871 ctxt->loadsubset |= XML_SKIP_IDS;
11872 }
11873
11874#ifdef LIBXML_HTML_ENABLED
11875 if (doc->type == XML_HTML_DOCUMENT_NODE)
11876 __htmlParseContent(ctxt);
11877 else
11878#endif
11879 xmlParseContent(ctxt);
11880
11881 nsPop(ctxt, nsnr);
11882 if ((RAW == '<') && (NXT(1) == '/')) {
11883 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
11884 } else if (RAW != 0) {
11885 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
11886 }
11887 if ((ctxt->node != NULL) && (ctxt->node != node)) {
11888 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
11889 ctxt->wellFormed = 0;
11890 }
11891
11892 if (!ctxt->wellFormed) {
11893 if (ctxt->errNo == 0)
11894 ret = XML_ERR_INTERNAL_ERROR;
11895 else
11896 ret = (xmlParserErrors)ctxt->errNo;
11897 } else {
11898 ret = XML_ERR_OK;
11899 }
11900
11901 /*
11902 * Return the newly created nodeset after unlinking it from
11903 * the pseudo sibling.
11904 */
11905
11906 cur = fake->next;
11907 fake->next = NULL;
11908 node->last = fake;
11909
11910 if (cur != NULL) {
11911 cur->prev = NULL;
11912 }
11913
11914 *lst = cur;
11915
11916 while (cur != NULL) {
11917 cur->parent = NULL;
11918 cur = cur->next;
11919 }
11920
11921 xmlUnlinkNode(fake);
11922 xmlFreeNode(fake);
11923
11924
11925 if (ret != XML_ERR_OK) {
11926 xmlFreeNodeList(*lst);
11927 *lst = NULL;
11928 }
11929
11930 if (doc->dict != NULL)
11931 ctxt->dict = NULL;
11932 xmlFreeParserCtxt(ctxt);
11933
11934 return(ret);
11935#else /* !SAX2 */
11936 return(XML_ERR_INTERNAL_ERROR);
11937#endif
11938}
11939
11940#ifdef LIBXML_SAX1_ENABLED
11941/**
11942 * xmlParseBalancedChunkMemoryRecover:
11943 * @doc: the document the chunk pertains to
11944 * @sax: the SAX handler bloc (possibly NULL)
11945 * @user_data: The user data returned on SAX callbacks (possibly NULL)
11946 * @depth: Used for loop detection, use 0
11947 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
11948 * @lst: the return value for the set of parsed nodes
11949 * @recover: return nodes even if the data is broken (use 0)
11950 *
11951 *
11952 * Parse a well-balanced chunk of an XML document
11953 * called by the parser
11954 * The allowed sequence for the Well Balanced Chunk is the one defined by
11955 * the content production in the XML grammar:
11956 *
11957 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
11958 *
11959 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
11960 * the parser error code otherwise
11961 *
11962 * In case recover is set to 1, the nodelist will not be empty even if
11963 * the parsed chunk is not well balanced.
11964 */
11965int
11966xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
11967 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
11968 int recover) {
11969 xmlParserCtxtPtr ctxt;
11970 xmlDocPtr newDoc;
11971 xmlSAXHandlerPtr oldsax = NULL;
11972 xmlNodePtr content, newRoot;
11973 int size;
11974 int ret = 0;
11975
11976 if (depth > 40) {
11977 return(XML_ERR_ENTITY_LOOP);
11978 }
11979
11980
11981 if (lst != NULL)
11982 *lst = NULL;
11983 if (string == NULL)
11984 return(-1);
11985
11986 size = xmlStrlen(string);
11987
11988 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
11989 if (ctxt == NULL) return(-1);
11990 ctxt->userData = ctxt;
11991 if (sax != NULL) {
11992 oldsax = ctxt->sax;
11993 ctxt->sax = sax;
11994 if (user_data != NULL)
11995 ctxt->userData = user_data;
11996 }
11997 newDoc = xmlNewDoc(BAD_CAST "1.0");
11998 if (newDoc == NULL) {
11999 xmlFreeParserCtxt(ctxt);
12000 return(-1);
12001 }
12002 if ((doc != NULL) && (doc->dict != NULL)) {
12003 xmlDictFree(ctxt->dict);
12004 ctxt->dict = doc->dict;
12005 xmlDictReference(ctxt->dict);
12006 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
12007 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
12008 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
12009 ctxt->dictNames = 1;
12010 } else {
12011 xmlCtxtUseOptions(ctxt, XML_PARSE_NODICT);
12012 }
12013 if (doc != NULL) {
12014 newDoc->intSubset = doc->intSubset;
12015 newDoc->extSubset = doc->extSubset;
12016 }
12017 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
12018 if (newRoot == NULL) {
12019 if (sax != NULL)
12020 ctxt->sax = oldsax;
12021 xmlFreeParserCtxt(ctxt);
12022 newDoc->intSubset = NULL;
12023 newDoc->extSubset = NULL;
12024 xmlFreeDoc(newDoc);
12025 return(-1);
12026 }
12027 xmlAddChild((xmlNodePtr) newDoc, newRoot);
12028 nodePush(ctxt, newRoot);
12029 if (doc == NULL) {
12030 ctxt->myDoc = newDoc;
12031 } else {
12032 ctxt->myDoc = newDoc;
12033 newDoc->children->doc = doc;
12034 /* Ensure that doc has XML spec namespace */
12035 xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE);
12036 newDoc->oldNs = doc->oldNs;
12037 }
12038 ctxt->instate = XML_PARSER_CONTENT;
12039 ctxt->depth = depth;
12040
12041 /*
12042 * Doing validity checking on chunk doesn't make sense
12043 */
12044 ctxt->validate = 0;
12045 ctxt->loadsubset = 0;
12046 xmlDetectSAX2(ctxt);
12047
12048 if ( doc != NULL ){
12049 content = doc->children;
12050 doc->children = NULL;
12051 xmlParseContent(ctxt);
12052 doc->children = content;
12053 }
12054 else {
12055 xmlParseContent(ctxt);
12056 }
12057 if ((RAW == '<') && (NXT(1) == '/')) {
12058 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12059 } else if (RAW != 0) {
12060 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
12061 }
12062 if (ctxt->node != newDoc->children) {
12063 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12064 }
12065
12066 if (!ctxt->wellFormed) {
12067 if (ctxt->errNo == 0)
12068 ret = 1;
12069 else
12070 ret = ctxt->errNo;
12071 } else {
12072 ret = 0;
12073 }
12074
12075 if ((lst != NULL) && ((ret == 0) || (recover == 1))) {
12076 xmlNodePtr cur;
12077
12078 /*
12079 * Return the newly created nodeset after unlinking it from
12080 * they pseudo parent.
12081 */
12082 cur = newDoc->children->children;
12083 *lst = cur;
12084 while (cur != NULL) {
12085 xmlSetTreeDoc(cur, doc);
12086 cur->parent = NULL;
12087 cur = cur->next;
12088 }
12089 newDoc->children->children = NULL;
12090 }
12091
12092 if (sax != NULL)
12093 ctxt->sax = oldsax;
12094 xmlFreeParserCtxt(ctxt);
12095 newDoc->intSubset = NULL;
12096 newDoc->extSubset = NULL;
12097 newDoc->oldNs = NULL;
12098 xmlFreeDoc(newDoc);
12099
12100 return(ret);
12101}
12102
12103/**
12104 * xmlSAXParseEntity:
12105 * @sax: the SAX handler block
12106 * @filename: the filename
12107 *
12108 * parse an XML external entity out of context and build a tree.
12109 * It use the given SAX function block to handle the parsing callback.
12110 * If sax is NULL, fallback to the default DOM tree building routines.
12111 *
12112 * [78] extParsedEnt ::= TextDecl? content
12113 *
12114 * This correspond to a "Well Balanced" chunk
12115 *
12116 * Returns the resulting document tree
12117 */
12118
12119xmlDocPtr
12120xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
12121 xmlDocPtr ret;
12122 xmlParserCtxtPtr ctxt;
12123
12124 ctxt = xmlCreateFileParserCtxt(filename);
12125 if (ctxt == NULL) {
12126 return(NULL);
12127 }
12128 if (sax != NULL) {
12129 if (ctxt->sax != NULL)
12130 xmlFree(ctxt->sax);
12131 ctxt->sax = sax;
12132 ctxt->userData = NULL;
12133 }
12134
12135 xmlParseExtParsedEnt(ctxt);
12136
12137 if (ctxt->wellFormed)
12138 ret = ctxt->myDoc;
12139 else {
12140 ret = NULL;
12141 xmlFreeDoc(ctxt->myDoc);
12142 ctxt->myDoc = NULL;
12143 }
12144 if (sax != NULL)
12145 ctxt->sax = NULL;
12146 xmlFreeParserCtxt(ctxt);
12147
12148 return(ret);
12149}
12150
12151/**
12152 * xmlParseEntity:
12153 * @filename: the filename
12154 *
12155 * parse an XML external entity out of context and build a tree.
12156 *
12157 * [78] extParsedEnt ::= TextDecl? content
12158 *
12159 * This correspond to a "Well Balanced" chunk
12160 *
12161 * Returns the resulting document tree
12162 */
12163
12164xmlDocPtr
12165xmlParseEntity(const char *filename) {
12166 return(xmlSAXParseEntity(NULL, filename));
12167}
12168#endif /* LIBXML_SAX1_ENABLED */
12169
12170/**
12171 * xmlCreateEntityParserCtxt:
12172 * @URL: the entity URL
12173 * @ID: the entity PUBLIC ID
12174 * @base: a possible base for the target URI
12175 *
12176 * Create a parser context for an external entity
12177 * Automatic support for ZLIB/Compress compressed document is provided
12178 * by default if found at compile-time.
12179 *
12180 * Returns the new parser context or NULL
12181 */
12182xmlParserCtxtPtr
12183xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
12184 const xmlChar *base) {
12185 xmlParserCtxtPtr ctxt;
12186 xmlParserInputPtr inputStream;
12187 char *directory = NULL;
12188 xmlChar *uri;
12189
12190 ctxt = xmlNewParserCtxt();
12191 if (ctxt == NULL) {
12192 return(NULL);
12193 }
12194
12195 uri = xmlBuildURI(URL, base);
12196
12197 if (uri == NULL) {
12198 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
12199 if (inputStream == NULL) {
12200 xmlFreeParserCtxt(ctxt);
12201 return(NULL);
12202 }
12203
12204 inputPush(ctxt, inputStream);
12205
12206 if ((ctxt->directory == NULL) && (directory == NULL))
12207 directory = xmlParserGetDirectory((char *)URL);
12208 if ((ctxt->directory == NULL) && (directory != NULL))
12209 ctxt->directory = directory;
12210 } else {
12211 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
12212 if (inputStream == NULL) {
12213 xmlFree(uri);
12214 xmlFreeParserCtxt(ctxt);
12215 return(NULL);
12216 }
12217
12218 inputPush(ctxt, inputStream);
12219
12220 if ((ctxt->directory == NULL) && (directory == NULL))
12221 directory = xmlParserGetDirectory((char *)uri);
12222 if ((ctxt->directory == NULL) && (directory != NULL))
12223 ctxt->directory = directory;
12224 xmlFree(uri);
12225 }
12226 return(ctxt);
12227}
12228
12229/************************************************************************
12230 * *
12231 * Front ends when parsing from a file *
12232 * *
12233 ************************************************************************/
12234
12235/**
12236 * xmlCreateURLParserCtxt:
12237 * @filename: the filename or URL
12238 * @options: a combination of xmlParserOption
12239 *
12240 * Create a parser context for a file or URL content.
12241 * Automatic support for ZLIB/Compress compressed document is provided
12242 * by default if found at compile-time and for file accesses
12243 *
12244 * Returns the new parser context or NULL
12245 */
12246xmlParserCtxtPtr
12247xmlCreateURLParserCtxt(const char *filename, int options)
12248{
12249 xmlParserCtxtPtr ctxt;
12250 xmlParserInputPtr inputStream;
12251 char *directory = NULL;
12252
12253 ctxt = xmlNewParserCtxt();
12254 if (ctxt == NULL) {
12255 xmlErrMemory(NULL, "cannot allocate parser context");
12256 return(NULL);
12257 }
12258
12259 if (options)
12260 xmlCtxtUseOptions(ctxt, options);
12261 ctxt->linenumbers = 1;
12262
12263 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
12264 if (inputStream == NULL) {
12265 xmlFreeParserCtxt(ctxt);
12266 return(NULL);
12267 }
12268
12269 inputPush(ctxt, inputStream);
12270 if ((ctxt->directory == NULL) && (directory == NULL))
12271 directory = xmlParserGetDirectory(filename);
12272 if ((ctxt->directory == NULL) && (directory != NULL))
12273 ctxt->directory = directory;
12274
12275 return(ctxt);
12276}
12277
12278/**
12279 * xmlCreateFileParserCtxt:
12280 * @filename: the filename
12281 *
12282 * Create a parser context for a file content.
12283 * Automatic support for ZLIB/Compress compressed document is provided
12284 * by default if found at compile-time.
12285 *
12286 * Returns the new parser context or NULL
12287 */
12288xmlParserCtxtPtr
12289xmlCreateFileParserCtxt(const char *filename)
12290{
12291 return(xmlCreateURLParserCtxt(filename, 0));
12292}
12293
12294#ifdef LIBXML_SAX1_ENABLED
12295/**
12296 * xmlSAXParseFileWithData:
12297 * @sax: the SAX handler block
12298 * @filename: the filename
12299 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
12300 * documents
12301 * @data: the userdata
12302 *
12303 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12304 * compressed document is provided by default if found at compile-time.
12305 * It use the given SAX function block to handle the parsing callback.
12306 * If sax is NULL, fallback to the default DOM tree building routines.
12307 *
12308 * User data (void *) is stored within the parser context in the
12309 * context's _private member, so it is available nearly everywhere in libxml
12310 *
12311 * Returns the resulting document tree
12312 */
12313
12314xmlDocPtr
12315xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
12316 int recovery, void *data) {
12317 xmlDocPtr ret;
12318 xmlParserCtxtPtr ctxt;
12319 char *directory = NULL;
12320
12321 xmlInitParser();
12322
12323 ctxt = xmlCreateFileParserCtxt(filename);
12324 if (ctxt == NULL) {
12325 return(NULL);
12326 }
12327 if (sax != NULL) {
12328 if (ctxt->sax != NULL)
12329 xmlFree(ctxt->sax);
12330 ctxt->sax = sax;
12331 }
12332 xmlDetectSAX2(ctxt);
12333 if (data!=NULL) {
12334 ctxt->_private = data;
12335 }
12336
12337 if ((ctxt->directory == NULL) && (directory == NULL))
12338 directory = xmlParserGetDirectory(filename);
12339 if ((ctxt->directory == NULL) && (directory != NULL))
12340 ctxt->directory = (char *) xmlStrdup((xmlChar *) directory);
12341
12342 ctxt->recovery = recovery;
12343
12344 xmlParseDocument(ctxt);
12345
12346 if ((ctxt->wellFormed) || recovery) {
12347 ret = ctxt->myDoc;
12348 if (ret != NULL) {
12349 if (ctxt->input->buf->compressed > 0)
12350 ret->compression = 9;
12351 else
12352 ret->compression = ctxt->input->buf->compressed;
12353 }
12354 }
12355 else {
12356 ret = NULL;
12357 xmlFreeDoc(ctxt->myDoc);
12358 ctxt->myDoc = NULL;
12359 }
12360 if (sax != NULL)
12361 ctxt->sax = NULL;
12362 xmlFreeParserCtxt(ctxt);
12363
12364 return(ret);
12365}
12366
12367/**
12368 * xmlSAXParseFile:
12369 * @sax: the SAX handler block
12370 * @filename: the filename
12371 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
12372 * documents
12373 *
12374 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12375 * compressed document is provided by default if found at compile-time.
12376 * It use the given SAX function block to handle the parsing callback.
12377 * If sax is NULL, fallback to the default DOM tree building routines.
12378 *
12379 * Returns the resulting document tree
12380 */
12381
12382xmlDocPtr
12383xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
12384 int recovery) {
12385 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
12386}
12387
12388/**
12389 * xmlRecoverDoc:
12390 * @cur: a pointer to an array of xmlChar
12391 *
12392 * parse an XML in-memory document and build a tree.
12393 * In the case the document is not Well Formed, a tree is built anyway
12394 *
12395 * Returns the resulting document tree
12396 */
12397
12398xmlDocPtr
12399xmlRecoverDoc(xmlChar *cur) {
12400 return(xmlSAXParseDoc(NULL, cur, 1));
12401}
12402
12403/**
12404 * xmlParseFile:
12405 * @filename: the filename
12406 *
12407 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12408 * compressed document is provided by default if found at compile-time.
12409 *
12410 * Returns the resulting document tree if the file was wellformed,
12411 * NULL otherwise.
12412 */
12413
12414xmlDocPtr
12415xmlParseFile(const char *filename) {
12416 return(xmlSAXParseFile(NULL, filename, 0));
12417}
12418
12419/**
12420 * xmlRecoverFile:
12421 * @filename: the filename
12422 *
12423 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12424 * compressed document is provided by default if found at compile-time.
12425 * In the case the document is not Well Formed, a tree is built anyway
12426 *
12427 * Returns the resulting document tree
12428 */
12429
12430xmlDocPtr
12431xmlRecoverFile(const char *filename) {
12432 return(xmlSAXParseFile(NULL, filename, 1));
12433}
12434
12435
12436/**
12437 * xmlSetupParserForBuffer:
12438 * @ctxt: an XML parser context
12439 * @buffer: a xmlChar * buffer
12440 * @filename: a file name
12441 *
12442 * Setup the parser context to parse a new buffer; Clears any prior
12443 * contents from the parser context. The buffer parameter must not be
12444 * NULL, but the filename parameter can be
12445 */
12446void
12447xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
12448 const char* filename)
12449{
12450 xmlParserInputPtr input;
12451
12452 if ((ctxt == NULL) || (buffer == NULL))
12453 return;
12454
12455 input = xmlNewInputStream(ctxt);
12456 if (input == NULL) {
12457 xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
12458 xmlClearParserCtxt(ctxt);
12459 return;
12460 }
12461
12462 xmlClearParserCtxt(ctxt);
12463 if (filename != NULL)
12464 input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
12465 input->base = buffer;
12466 input->cur = buffer;
12467 input->end = &buffer[xmlStrlen(buffer)];
12468 inputPush(ctxt, input);
12469}
12470
12471/**
12472 * xmlSAXUserParseFile:
12473 * @sax: a SAX handler
12474 * @user_data: The user data returned on SAX callbacks
12475 * @filename: a file name
12476 *
12477 * parse an XML file and call the given SAX handler routines.
12478 * Automatic support for ZLIB/Compress compressed document is provided
12479 *
12480 * Returns 0 in case of success or a error number otherwise
12481 */
12482int
12483xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
12484 const char *filename) {
12485 int ret = 0;
12486 xmlParserCtxtPtr ctxt;
12487
12488 ctxt = xmlCreateFileParserCtxt(filename);
12489 if (ctxt == NULL) return -1;
12490 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
12491 xmlFree(ctxt->sax);
12492 ctxt->sax = sax;
12493 xmlDetectSAX2(ctxt);
12494
12495 if (user_data != NULL)
12496 ctxt->userData = user_data;
12497
12498 xmlParseDocument(ctxt);
12499
12500 if (ctxt->wellFormed)
12501 ret = 0;
12502 else {
12503 if (ctxt->errNo != 0)
12504 ret = ctxt->errNo;
12505 else
12506 ret = -1;
12507 }
12508 if (sax != NULL)
12509 ctxt->sax = NULL;
12510 if (ctxt->myDoc != NULL) {
12511 xmlFreeDoc(ctxt->myDoc);
12512 ctxt->myDoc = NULL;
12513 }
12514 xmlFreeParserCtxt(ctxt);
12515
12516 return ret;
12517}
12518#endif /* LIBXML_SAX1_ENABLED */
12519
12520/************************************************************************
12521 * *
12522 * Front ends when parsing from memory *
12523 * *
12524 ************************************************************************/
12525
12526/**
12527 * xmlCreateMemoryParserCtxt:
12528 * @buffer: a pointer to a char array
12529 * @size: the size of the array
12530 *
12531 * Create a parser context for an XML in-memory document.
12532 *
12533 * Returns the new parser context or NULL
12534 */
12535xmlParserCtxtPtr
12536xmlCreateMemoryParserCtxt(const char *buffer, int size) {
12537 xmlParserCtxtPtr ctxt;
12538 xmlParserInputPtr input;
12539 xmlParserInputBufferPtr buf;
12540
12541 if (buffer == NULL)
12542 return(NULL);
12543 if (size <= 0)
12544 return(NULL);
12545
12546 ctxt = xmlNewParserCtxt();
12547 if (ctxt == NULL)
12548 return(NULL);
12549
12550 /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */
12551 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
12552 if (buf == NULL) {
12553 xmlFreeParserCtxt(ctxt);
12554 return(NULL);
12555 }
12556
12557 input = xmlNewInputStream(ctxt);
12558 if (input == NULL) {
12559 xmlFreeParserInputBuffer(buf);
12560 xmlFreeParserCtxt(ctxt);
12561 return(NULL);
12562 }
12563
12564 input->filename = NULL;
12565 input->buf = buf;
12566 input->base = input->buf->buffer->content;
12567 input->cur = input->buf->buffer->content;
12568 input->end = &input->buf->buffer->content[input->buf->buffer->use];
12569
12570 inputPush(ctxt, input);
12571 return(ctxt);
12572}
12573
12574#ifdef LIBXML_SAX1_ENABLED
12575/**
12576 * xmlSAXParseMemoryWithData:
12577 * @sax: the SAX handler block
12578 * @buffer: an pointer to a char array
12579 * @size: the size of the array
12580 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
12581 * documents
12582 * @data: the userdata
12583 *
12584 * parse an XML in-memory block and use the given SAX function block
12585 * to handle the parsing callback. If sax is NULL, fallback to the default
12586 * DOM tree building routines.
12587 *
12588 * User data (void *) is stored within the parser context in the
12589 * context's _private member, so it is available nearly everywhere in libxml
12590 *
12591 * Returns the resulting document tree
12592 */
12593
12594xmlDocPtr
12595xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
12596 int size, int recovery, void *data) {
12597 xmlDocPtr ret;
12598 xmlParserCtxtPtr ctxt;
12599
12600 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
12601 if (ctxt == NULL) return(NULL);
12602 if (sax != NULL) {
12603 if (ctxt->sax != NULL)
12604 xmlFree(ctxt->sax);
12605 ctxt->sax = sax;
12606 }
12607 xmlDetectSAX2(ctxt);
12608 if (data!=NULL) {
12609 ctxt->_private=data;
12610 }
12611
12612 ctxt->recovery = recovery;
12613
12614 xmlParseDocument(ctxt);
12615
12616 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
12617 else {
12618 ret = NULL;
12619 xmlFreeDoc(ctxt->myDoc);
12620 ctxt->myDoc = NULL;
12621 }
12622 if (sax != NULL)
12623 ctxt->sax = NULL;
12624 xmlFreeParserCtxt(ctxt);
12625
12626 return(ret);
12627}
12628
12629/**
12630 * xmlSAXParseMemory:
12631 * @sax: the SAX handler block
12632 * @buffer: an pointer to a char array
12633 * @size: the size of the array
12634 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
12635 * documents
12636 *
12637 * parse an XML in-memory block and use the given SAX function block
12638 * to handle the parsing callback. If sax is NULL, fallback to the default
12639 * DOM tree building routines.
12640 *
12641 * Returns the resulting document tree
12642 */
12643xmlDocPtr
12644xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
12645 int size, int recovery) {
12646 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
12647}
12648
12649/**
12650 * xmlParseMemory:
12651 * @buffer: an pointer to a char array
12652 * @size: the size of the array
12653 *
12654 * parse an XML in-memory block and build a tree.
12655 *
12656 * Returns the resulting document tree
12657 */
12658
12659xmlDocPtr xmlParseMemory(const char *buffer, int size) {
12660 return(xmlSAXParseMemory(NULL, buffer, size, 0));
12661}
12662
12663/**
12664 * xmlRecoverMemory:
12665 * @buffer: an pointer to a char array
12666 * @size: the size of the array
12667 *
12668 * parse an XML in-memory block and build a tree.
12669 * In the case the document is not Well Formed, a tree is built anyway
12670 *
12671 * Returns the resulting document tree
12672 */
12673
12674xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
12675 return(xmlSAXParseMemory(NULL, buffer, size, 1));
12676}
12677
12678/**
12679 * xmlSAXUserParseMemory:
12680 * @sax: a SAX handler
12681 * @user_data: The user data returned on SAX callbacks
12682 * @buffer: an in-memory XML document input
12683 * @size: the length of the XML document in bytes
12684 *
12685 * A better SAX parsing routine.
12686 * parse an XML in-memory buffer and call the given SAX handler routines.
12687 *
12688 * Returns 0 in case of success or a error number otherwise
12689 */
12690int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
12691 const char *buffer, int size) {
12692 int ret = 0;
12693 xmlParserCtxtPtr ctxt;
12694
12695 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
12696 if (ctxt == NULL) return -1;
12697 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
12698 xmlFree(ctxt->sax);
12699 ctxt->sax = sax;
12700 xmlDetectSAX2(ctxt);
12701
12702 if (user_data != NULL)
12703 ctxt->userData = user_data;
12704
12705 xmlParseDocument(ctxt);
12706
12707 if (ctxt->wellFormed)
12708 ret = 0;
12709 else {
12710 if (ctxt->errNo != 0)
12711 ret = ctxt->errNo;
12712 else
12713 ret = -1;
12714 }
12715 if (sax != NULL)
12716 ctxt->sax = NULL;
12717 if (ctxt->myDoc != NULL) {
12718 xmlFreeDoc(ctxt->myDoc);
12719 ctxt->myDoc = NULL;
12720 }
12721 xmlFreeParserCtxt(ctxt);
12722
12723 return ret;
12724}
12725#endif /* LIBXML_SAX1_ENABLED */
12726
12727/**
12728 * xmlCreateDocParserCtxt:
12729 * @cur: a pointer to an array of xmlChar
12730 *
12731 * Creates a parser context for an XML in-memory document.
12732 *
12733 * Returns the new parser context or NULL
12734 */
12735xmlParserCtxtPtr
12736xmlCreateDocParserCtxt(const xmlChar *cur) {
12737 int len;
12738
12739 if (cur == NULL)
12740 return(NULL);
12741 len = xmlStrlen(cur);
12742 return(xmlCreateMemoryParserCtxt((const char *)cur, len));
12743}
12744
12745#ifdef LIBXML_SAX1_ENABLED
12746/**
12747 * xmlSAXParseDoc:
12748 * @sax: the SAX handler block
12749 * @cur: a pointer to an array of xmlChar
12750 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
12751 * documents
12752 *
12753 * parse an XML in-memory document and build a tree.
12754 * It use the given SAX function block to handle the parsing callback.
12755 * If sax is NULL, fallback to the default DOM tree building routines.
12756 *
12757 * Returns the resulting document tree
12758 */
12759
12760xmlDocPtr
12761xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
12762 xmlDocPtr ret;
12763 xmlParserCtxtPtr ctxt;
12764 xmlSAXHandlerPtr oldsax = NULL;
12765
12766 if (cur == NULL) return(NULL);
12767
12768
12769 ctxt = xmlCreateDocParserCtxt(cur);
12770 if (ctxt == NULL) return(NULL);
12771 if (sax != NULL) {
12772 oldsax = ctxt->sax;
12773 ctxt->sax = sax;
12774 ctxt->userData = NULL;
12775 }
12776 xmlDetectSAX2(ctxt);
12777
12778 xmlParseDocument(ctxt);
12779 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
12780 else {
12781 ret = NULL;
12782 xmlFreeDoc(ctxt->myDoc);
12783 ctxt->myDoc = NULL;
12784 }
12785 if (sax != NULL)
12786 ctxt->sax = oldsax;
12787 xmlFreeParserCtxt(ctxt);
12788
12789 return(ret);
12790}
12791
12792/**
12793 * xmlParseDoc:
12794 * @cur: a pointer to an array of xmlChar
12795 *
12796 * parse an XML in-memory document and build a tree.
12797 *
12798 * Returns the resulting document tree
12799 */
12800
12801xmlDocPtr
12802xmlParseDoc(const xmlChar *cur) {
12803 return(xmlSAXParseDoc(NULL, cur, 0));
12804}
12805#endif /* LIBXML_SAX1_ENABLED */
12806
12807#ifdef LIBXML_LEGACY_ENABLED
12808/************************************************************************
12809 * *
12810 * Specific function to keep track of entities references *
12811 * and used by the XSLT debugger *
12812 * *
12813 ************************************************************************/
12814
12815static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
12816
12817/**
12818 * xmlAddEntityReference:
12819 * @ent : A valid entity
12820 * @firstNode : A valid first node for children of entity
12821 * @lastNode : A valid last node of children entity
12822 *
12823 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
12824 */
12825static void
12826xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
12827 xmlNodePtr lastNode)
12828{
12829 if (xmlEntityRefFunc != NULL) {
12830 (*xmlEntityRefFunc) (ent, firstNode, lastNode);
12831 }
12832}
12833
12834
12835/**
12836 * xmlSetEntityReferenceFunc:
12837 * @func: A valid function
12838 *
12839 * Set the function to call call back when a xml reference has been made
12840 */
12841void
12842xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
12843{
12844 xmlEntityRefFunc = func;
12845}
12846#endif /* LIBXML_LEGACY_ENABLED */
12847
12848/************************************************************************
12849 * *
12850 * Miscellaneous *
12851 * *
12852 ************************************************************************/
12853
12854#ifdef LIBXML_XPATH_ENABLED
12855#include <libxml/xpath.h>
12856#endif
12857
12858extern void XMLCDECL xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
12859static int xmlParserInitialized = 0;
12860
12861/**
12862 * xmlInitParser:
12863 *
12864 * Initialization function for the XML parser.
12865 * This is not reentrant. Call once before processing in case of
12866 * use in multithreaded programs.
12867 */
12868
12869void
12870xmlInitParser(void) {
12871 if (xmlParserInitialized != 0)
12872 return;
12873
12874#ifdef LIBXML_THREAD_ENABLED
12875 __xmlGlobalInitMutexLock();
12876 if (xmlParserInitialized == 0) {
12877#endif
12878 if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
12879 (xmlGenericError == NULL))
12880 initGenericErrorDefaultFunc(NULL);
12881 xmlInitGlobals();
12882 xmlInitThreads();
12883 xmlInitMemory();
12884 xmlInitCharEncodingHandlers();
12885 xmlDefaultSAXHandlerInit();
12886 xmlRegisterDefaultInputCallbacks();
12887#ifdef LIBXML_OUTPUT_ENABLED
12888 xmlRegisterDefaultOutputCallbacks();
12889#endif /* LIBXML_OUTPUT_ENABLED */
12890#ifdef LIBXML_HTML_ENABLED
12891 htmlInitAutoClose();
12892 htmlDefaultSAXHandlerInit();
12893#endif
12894#ifdef LIBXML_XPATH_ENABLED
12895 xmlXPathInit();
12896#endif
12897 xmlParserInitialized = 1;
12898#ifdef LIBXML_THREAD_ENABLED
12899 }
12900 __xmlGlobalInitMutexUnlock();
12901#endif
12902}
12903
12904/**
12905 * xmlCleanupParser:
12906 *
12907 * Cleanup function for the XML library. It tries to reclaim all
12908 * parsing related global memory allocated for the library processing.
12909 * It doesn't deallocate any document related memory. Calling this
12910 * function should not prevent reusing the library but one should
12911 * call xmlCleanupParser() only when the process has
12912 * finished using the library or XML document built with it.
12913 */
12914
12915void
12916xmlCleanupParser(void) {
12917 if (!xmlParserInitialized)
12918 return;
12919
12920 xmlCleanupCharEncodingHandlers();
12921#ifdef LIBXML_CATALOG_ENABLED
12922 xmlCatalogCleanup();
12923#endif
12924 xmlDictCleanup();
12925 xmlCleanupInputCallbacks();
12926#ifdef LIBXML_OUTPUT_ENABLED
12927 xmlCleanupOutputCallbacks();
12928#endif
12929#ifdef LIBXML_SCHEMAS_ENABLED
12930 xmlSchemaCleanupTypes();
12931 xmlRelaxNGCleanupTypes();
12932#endif
12933 xmlCleanupGlobals();
12934 xmlResetLastError();
12935 xmlCleanupThreads(); /* must be last if called not from the main thread */
12936 xmlCleanupMemory();
12937 xmlParserInitialized = 0;
12938}
12939
12940/************************************************************************
12941 * *
12942 * New set (2.6.0) of simpler and more flexible APIs *
12943 * *
12944 ************************************************************************/
12945
12946/**
12947 * DICT_FREE:
12948 * @str: a string
12949 *
12950 * Free a string if it is not owned by the "dict" dictionnary in the
12951 * current scope
12952 */
12953#define DICT_FREE(str) \
12954 if ((str) && ((!dict) || \
12955 (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
12956 xmlFree((char *)(str));
12957
12958/**
12959 * xmlCtxtReset:
12960 * @ctxt: an XML parser context
12961 *
12962 * Reset a parser context
12963 */
12964void
12965xmlCtxtReset(xmlParserCtxtPtr ctxt)
12966{
12967 xmlParserInputPtr input;
12968 xmlDictPtr dict;
12969
12970 if (ctxt == NULL)
12971 return;
12972
12973 dict = ctxt->dict;
12974
12975 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
12976 xmlFreeInputStream(input);
12977 }
12978 ctxt->inputNr = 0;
12979 ctxt->input = NULL;
12980
12981 ctxt->spaceNr = 0;
12982 if (ctxt->spaceTab != NULL) {
12983 ctxt->spaceTab[0] = -1;
12984 ctxt->space = &ctxt->spaceTab[0];
12985 } else {
12986 ctxt->space = NULL;
12987 }
12988
12989
12990 ctxt->nodeNr = 0;
12991 ctxt->node = NULL;
12992
12993 ctxt->nameNr = 0;
12994 ctxt->name = NULL;
12995
12996 DICT_FREE(ctxt->version);
12997 ctxt->version = NULL;
12998 DICT_FREE(ctxt->encoding);
12999 ctxt->encoding = NULL;
13000 DICT_FREE(ctxt->directory);
13001 ctxt->directory = NULL;
13002 DICT_FREE(ctxt->extSubURI);
13003 ctxt->extSubURI = NULL;
13004 DICT_FREE(ctxt->extSubSystem);
13005 ctxt->extSubSystem = NULL;
13006 if (ctxt->myDoc != NULL)
13007 xmlFreeDoc(ctxt->myDoc);
13008 ctxt->myDoc = NULL;
13009
13010 ctxt->standalone = -1;
13011 ctxt->hasExternalSubset = 0;
13012 ctxt->hasPErefs = 0;
13013 ctxt->html = 0;
13014 ctxt->external = 0;
13015 ctxt->instate = XML_PARSER_START;
13016 ctxt->token = 0;
13017
13018 ctxt->wellFormed = 1;
13019 ctxt->nsWellFormed = 1;
13020 ctxt->disableSAX = 0;
13021 ctxt->valid = 1;
13022#if 0
13023 ctxt->vctxt.userData = ctxt;
13024 ctxt->vctxt.error = xmlParserValidityError;
13025 ctxt->vctxt.warning = xmlParserValidityWarning;
13026#endif
13027 ctxt->record_info = 0;
13028 ctxt->nbChars = 0;
13029 ctxt->checkIndex = 0;
13030 ctxt->inSubset = 0;
13031 ctxt->errNo = XML_ERR_OK;
13032 ctxt->depth = 0;
13033 ctxt->charset = XML_CHAR_ENCODING_UTF8;
13034 ctxt->catalogs = NULL;
13035 xmlInitNodeInfoSeq(&ctxt->node_seq);
13036
13037 if (ctxt->attsDefault != NULL) {
13038 xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree);
13039 ctxt->attsDefault = NULL;
13040 }
13041 if (ctxt->attsSpecial != NULL) {
13042 xmlHashFree(ctxt->attsSpecial, NULL);
13043 ctxt->attsSpecial = NULL;
13044 }
13045
13046#ifdef LIBXML_CATALOG_ENABLED
13047 if (ctxt->catalogs != NULL)
13048 xmlCatalogFreeLocal(ctxt->catalogs);
13049#endif
13050 if (ctxt->lastError.code != XML_ERR_OK)
13051 xmlResetError(&ctxt->lastError);
13052}
13053
13054/**
13055 * xmlCtxtResetPush:
13056 * @ctxt: an XML parser context
13057 * @chunk: a pointer to an array of chars
13058 * @size: number of chars in the array
13059 * @filename: an optional file name or URI
13060 * @encoding: the document encoding, or NULL
13061 *
13062 * Reset a push parser context
13063 *
13064 * Returns 0 in case of success and 1 in case of error
13065 */
13066int
13067xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
13068 int size, const char *filename, const char *encoding)
13069{
13070 xmlParserInputPtr inputStream;
13071 xmlParserInputBufferPtr buf;
13072 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
13073
13074 if (ctxt == NULL)
13075 return(1);
13076
13077 if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
13078 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
13079
13080 buf = xmlAllocParserInputBuffer(enc);
13081 if (buf == NULL)
13082 return(1);
13083
13084 if (ctxt == NULL) {
13085 xmlFreeParserInputBuffer(buf);
13086 return(1);
13087 }
13088
13089 xmlCtxtReset(ctxt);
13090
13091 if (ctxt->pushTab == NULL) {
13092 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 *
13093 sizeof(xmlChar *));
13094 if (ctxt->pushTab == NULL) {
13095 xmlErrMemory(ctxt, NULL);
13096 xmlFreeParserInputBuffer(buf);
13097 return(1);
13098 }
13099 }
13100
13101 if (filename == NULL) {
13102 ctxt->directory = NULL;
13103 } else {
13104 ctxt->directory = xmlParserGetDirectory(filename);
13105 }
13106
13107 inputStream = xmlNewInputStream(ctxt);
13108 if (inputStream == NULL) {
13109 xmlFreeParserInputBuffer(buf);
13110 return(1);
13111 }
13112
13113 if (filename == NULL)
13114 inputStream->filename = NULL;
13115 else
13116 inputStream->filename = (char *)
13117 xmlCanonicPath((const xmlChar *) filename);
13118 inputStream->buf = buf;
13119 inputStream->base = inputStream->buf->buffer->content;
13120 inputStream->cur = inputStream->buf->buffer->content;
13121 inputStream->end =
13122 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
13123
13124 inputPush(ctxt, inputStream);
13125
13126 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
13127 (ctxt->input->buf != NULL)) {
13128 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
13129 int cur = ctxt->input->cur - ctxt->input->base;
13130
13131 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
13132
13133 ctxt->input->base = ctxt->input->buf->buffer->content + base;
13134 ctxt->input->cur = ctxt->input->base + cur;
13135 ctxt->input->end =
13136 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->
13137 use];
13138#ifdef DEBUG_PUSH
13139 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
13140#endif
13141 }
13142
13143 if (encoding != NULL) {
13144 xmlCharEncodingHandlerPtr hdlr;
13145
13146 hdlr = xmlFindCharEncodingHandler(encoding);
13147 if (hdlr != NULL) {
13148 xmlSwitchToEncoding(ctxt, hdlr);
13149 } else {
13150 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
13151 "Unsupported encoding %s\n", BAD_CAST encoding);
13152 }
13153 } else if (enc != XML_CHAR_ENCODING_NONE) {
13154 xmlSwitchEncoding(ctxt, enc);
13155 }
13156
13157 return(0);
13158}
13159
13160/**
13161 * xmlCtxtUseOptions:
13162 * @ctxt: an XML parser context
13163 * @options: a combination of xmlParserOption
13164 *
13165 * Applies the options to the parser context
13166 *
13167 * Returns 0 in case of success, the set of unknown or unimplemented options
13168 * in case of error.
13169 */
13170int
13171xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
13172{
13173 if (ctxt == NULL)
13174 return(-1);
13175 if (options & XML_PARSE_RECOVER) {
13176 ctxt->recovery = 1;
13177 options -= XML_PARSE_RECOVER;
13178 } else
13179 ctxt->recovery = 0;
13180 if (options & XML_PARSE_DTDLOAD) {
13181 ctxt->loadsubset = XML_DETECT_IDS;
13182 options -= XML_PARSE_DTDLOAD;
13183 } else
13184 ctxt->loadsubset = 0;
13185 if (options & XML_PARSE_DTDATTR) {
13186 ctxt->loadsubset |= XML_COMPLETE_ATTRS;
13187 options -= XML_PARSE_DTDATTR;
13188 }
13189 if (options & XML_PARSE_NOENT) {
13190 ctxt->replaceEntities = 1;
13191 /* ctxt->loadsubset |= XML_DETECT_IDS; */
13192 options -= XML_PARSE_NOENT;
13193 } else
13194 ctxt->replaceEntities = 0;
13195 if (options & XML_PARSE_PEDANTIC) {
13196 ctxt->pedantic = 1;
13197 options -= XML_PARSE_PEDANTIC;
13198 } else
13199 ctxt->pedantic = 0;
13200 if (options & XML_PARSE_NOBLANKS) {
13201 ctxt->keepBlanks = 0;
13202 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
13203 options -= XML_PARSE_NOBLANKS;
13204 } else
13205 ctxt->keepBlanks = 1;
13206 if (options & XML_PARSE_DTDVALID) {
13207 ctxt->validate = 1;
13208 if (options & XML_PARSE_NOWARNING)
13209 ctxt->vctxt.warning = NULL;
13210 if (options & XML_PARSE_NOERROR)
13211 ctxt->vctxt.error = NULL;
13212 options -= XML_PARSE_DTDVALID;
13213 } else
13214 ctxt->validate = 0;
13215 if (options & XML_PARSE_NOWARNING) {
13216 ctxt->sax->warning = NULL;
13217 options -= XML_PARSE_NOWARNING;
13218 }
13219 if (options & XML_PARSE_NOERROR) {
13220 ctxt->sax->error = NULL;
13221 ctxt->sax->fatalError = NULL;
13222 options -= XML_PARSE_NOERROR;
13223 }
13224#ifdef LIBXML_SAX1_ENABLED
13225 if (options & XML_PARSE_SAX1) {
13226 ctxt->sax->startElement = xmlSAX2StartElement;
13227 ctxt->sax->endElement = xmlSAX2EndElement;
13228 ctxt->sax->startElementNs = NULL;
13229 ctxt->sax->endElementNs = NULL;
13230 ctxt->sax->initialized = 1;
13231 options -= XML_PARSE_SAX1;
13232 }
13233#endif /* LIBXML_SAX1_ENABLED */
13234 if (options & XML_PARSE_NODICT) {
13235 ctxt->dictNames = 0;
13236 options -= XML_PARSE_NODICT;
13237 } else {
13238 ctxt->dictNames = 1;
13239 }
13240 if (options & XML_PARSE_NOCDATA) {
13241 ctxt->sax->cdataBlock = NULL;
13242 options -= XML_PARSE_NOCDATA;
13243 }
13244 if (options & XML_PARSE_NSCLEAN) {
13245 ctxt->options |= XML_PARSE_NSCLEAN;
13246 options -= XML_PARSE_NSCLEAN;
13247 }
13248 if (options & XML_PARSE_NONET) {
13249 ctxt->options |= XML_PARSE_NONET;
13250 options -= XML_PARSE_NONET;
13251 }
13252 if (options & XML_PARSE_COMPACT) {
13253 ctxt->options |= XML_PARSE_COMPACT;
13254 options -= XML_PARSE_COMPACT;
13255 }
13256 ctxt->linenumbers = 1;
13257 return (options);
13258}
13259
13260/**
13261 * xmlDoRead:
13262 * @ctxt: an XML parser context
13263 * @URL: the base URL to use for the document
13264 * @encoding: the document encoding, or NULL
13265 * @options: a combination of xmlParserOption
13266 * @reuse: keep the context for reuse
13267 *
13268 * Common front-end for the xmlRead functions
13269 *
13270 * Returns the resulting document tree or NULL
13271 */
13272static xmlDocPtr
13273xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
13274 int options, int reuse)
13275{
13276 xmlDocPtr ret;
13277
13278 xmlCtxtUseOptions(ctxt, options);
13279 if (encoding != NULL) {
13280 xmlCharEncodingHandlerPtr hdlr;
13281
13282 hdlr = xmlFindCharEncodingHandler(encoding);
13283 if (hdlr != NULL)
13284 xmlSwitchToEncoding(ctxt, hdlr);
13285 }
13286 if ((URL != NULL) && (ctxt->input != NULL) &&
13287 (ctxt->input->filename == NULL))
13288 ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
13289 xmlParseDocument(ctxt);
13290 if ((ctxt->wellFormed) || ctxt->recovery)
13291 ret = ctxt->myDoc;
13292 else {
13293 ret = NULL;
13294 if (ctxt->myDoc != NULL) {
13295 xmlFreeDoc(ctxt->myDoc);
13296 }
13297 }
13298 ctxt->myDoc = NULL;
13299 if (!reuse) {
13300 xmlFreeParserCtxt(ctxt);
13301 }
13302
13303 return (ret);
13304}
13305
13306/**
13307 * xmlReadDoc:
13308 * @cur: a pointer to a zero terminated string
13309 * @URL: the base URL to use for the document
13310 * @encoding: the document encoding, or NULL
13311 * @options: a combination of xmlParserOption
13312 *
13313 * parse an XML in-memory document and build a tree.
13314 *
13315 * Returns the resulting document tree
13316 */
13317xmlDocPtr
13318xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
13319{
13320 xmlParserCtxtPtr ctxt;
13321
13322 if (cur == NULL)
13323 return (NULL);
13324
13325 ctxt = xmlCreateDocParserCtxt(cur);
13326 if (ctxt == NULL)
13327 return (NULL);
13328 return (xmlDoRead(ctxt, URL, encoding, options, 0));
13329}
13330
13331/**
13332 * xmlReadFile:
13333 * @filename: a file or URL
13334 * @encoding: the document encoding, or NULL
13335 * @options: a combination of xmlParserOption
13336 *
13337 * parse an XML file from the filesystem or the network.
13338 *
13339 * Returns the resulting document tree
13340 */
13341xmlDocPtr
13342xmlReadFile(const char *filename, const char *encoding, int options)
13343{
13344 xmlParserCtxtPtr ctxt;
13345
13346 ctxt = xmlCreateURLParserCtxt(filename, options);
13347 if (ctxt == NULL)
13348 return (NULL);
13349 return (xmlDoRead(ctxt, NULL, encoding, options, 0));
13350}
13351
13352/**
13353 * xmlReadMemory:
13354 * @buffer: a pointer to a char array
13355 * @size: the size of the array
13356 * @URL: the base URL to use for the document
13357 * @encoding: the document encoding, or NULL
13358 * @options: a combination of xmlParserOption
13359 *
13360 * parse an XML in-memory document and build a tree.
13361 *
13362 * Returns the resulting document tree
13363 */
13364xmlDocPtr
13365xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
13366{
13367 xmlParserCtxtPtr ctxt;
13368
13369 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
13370 if (ctxt == NULL)
13371 return (NULL);
13372 return (xmlDoRead(ctxt, URL, encoding, options, 0));
13373}
13374
13375/**
13376 * xmlReadFd:
13377 * @fd: an open file descriptor
13378 * @URL: the base URL to use for the document
13379 * @encoding: the document encoding, or NULL
13380 * @options: a combination of xmlParserOption
13381 *
13382 * parse an XML from a file descriptor and build a tree.
13383 * NOTE that the file descriptor will not be closed when the
13384 * reader is closed or reset.
13385 *
13386 * Returns the resulting document tree
13387 */
13388xmlDocPtr
13389xmlReadFd(int fd, const char *URL, const char *encoding, int options)
13390{
13391 xmlParserCtxtPtr ctxt;
13392 xmlParserInputBufferPtr input;
13393 xmlParserInputPtr stream;
13394
13395 if (fd < 0)
13396 return (NULL);
13397
13398 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
13399 if (input == NULL)
13400 return (NULL);
13401 input->closecallback = NULL;
13402 ctxt = xmlNewParserCtxt();
13403 if (ctxt == NULL) {
13404 xmlFreeParserInputBuffer(input);
13405 return (NULL);
13406 }
13407 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
13408 if (stream == NULL) {
13409 xmlFreeParserInputBuffer(input);
13410 xmlFreeParserCtxt(ctxt);
13411 return (NULL);
13412 }
13413 inputPush(ctxt, stream);
13414 return (xmlDoRead(ctxt, URL, encoding, options, 0));
13415}
13416
13417/**
13418 * xmlReadIO:
13419 * @ioread: an I/O read function
13420 * @ioclose: an I/O close function
13421 * @ioctx: an I/O handler
13422 * @URL: the base URL to use for the document
13423 * @encoding: the document encoding, or NULL
13424 * @options: a combination of xmlParserOption
13425 *
13426 * parse an XML document from I/O functions and source and build a tree.
13427 *
13428 * Returns the resulting document tree
13429 */
13430xmlDocPtr
13431xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
13432 void *ioctx, const char *URL, const char *encoding, int options)
13433{
13434 xmlParserCtxtPtr ctxt;
13435 xmlParserInputBufferPtr input;
13436 xmlParserInputPtr stream;
13437
13438 if (ioread == NULL)
13439 return (NULL);
13440
13441 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
13442 XML_CHAR_ENCODING_NONE);
13443 if (input == NULL)
13444 return (NULL);
13445 ctxt = xmlNewParserCtxt();
13446 if (ctxt == NULL) {
13447 xmlFreeParserInputBuffer(input);
13448 return (NULL);
13449 }
13450 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
13451 if (stream == NULL) {
13452 xmlFreeParserInputBuffer(input);
13453 xmlFreeParserCtxt(ctxt);
13454 return (NULL);
13455 }
13456 inputPush(ctxt, stream);
13457 return (xmlDoRead(ctxt, URL, encoding, options, 0));
13458}
13459
13460/**
13461 * xmlCtxtReadDoc:
13462 * @ctxt: an XML parser context
13463 * @cur: a pointer to a zero terminated string
13464 * @URL: the base URL to use for the document
13465 * @encoding: the document encoding, or NULL
13466 * @options: a combination of xmlParserOption
13467 *
13468 * parse an XML in-memory document and build a tree.
13469 * This reuses the existing @ctxt parser context
13470 *
13471 * Returns the resulting document tree
13472 */
13473xmlDocPtr
13474xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
13475 const char *URL, const char *encoding, int options)
13476{
13477 xmlParserInputPtr stream;
13478
13479 if (cur == NULL)
13480 return (NULL);
13481 if (ctxt == NULL)
13482 return (NULL);
13483
13484 xmlCtxtReset(ctxt);
13485
13486 stream = xmlNewStringInputStream(ctxt, cur);
13487 if (stream == NULL) {
13488 return (NULL);
13489 }
13490 inputPush(ctxt, stream);
13491 return (xmlDoRead(ctxt, URL, encoding, options, 1));
13492}
13493
13494/**
13495 * xmlCtxtReadFile:
13496 * @ctxt: an XML parser context
13497 * @filename: a file or URL
13498 * @encoding: the document encoding, or NULL
13499 * @options: a combination of xmlParserOption
13500 *
13501 * parse an XML file from the filesystem or the network.
13502 * This reuses the existing @ctxt parser context
13503 *
13504 * Returns the resulting document tree
13505 */
13506xmlDocPtr
13507xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
13508 const char *encoding, int options)
13509{
13510 xmlParserInputPtr stream;
13511
13512 if (filename == NULL)
13513 return (NULL);
13514 if (ctxt == NULL)
13515 return (NULL);
13516
13517 xmlCtxtReset(ctxt);
13518
13519 stream = xmlLoadExternalEntity(filename, NULL, ctxt);
13520 if (stream == NULL) {
13521 return (NULL);
13522 }
13523 inputPush(ctxt, stream);
13524 return (xmlDoRead(ctxt, NULL, encoding, options, 1));
13525}
13526
13527/**
13528 * xmlCtxtReadMemory:
13529 * @ctxt: an XML parser context
13530 * @buffer: a pointer to a char array
13531 * @size: the size of the array
13532 * @URL: the base URL to use for the document
13533 * @encoding: the document encoding, or NULL
13534 * @options: a combination of xmlParserOption
13535 *
13536 * parse an XML in-memory document and build a tree.
13537 * This reuses the existing @ctxt parser context
13538 *
13539 * Returns the resulting document tree
13540 */
13541xmlDocPtr
13542xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
13543 const char *URL, const char *encoding, int options)
13544{
13545 xmlParserInputBufferPtr input;
13546 xmlParserInputPtr stream;
13547
13548 if (ctxt == NULL)
13549 return (NULL);
13550 if (buffer == NULL)
13551 return (NULL);
13552
13553 xmlCtxtReset(ctxt);
13554
13555 input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
13556 if (input == NULL) {
13557 return(NULL);
13558 }
13559
13560 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
13561 if (stream == NULL) {
13562 xmlFreeParserInputBuffer(input);
13563 return(NULL);
13564 }
13565
13566 inputPush(ctxt, stream);
13567 return (xmlDoRead(ctxt, URL, encoding, options, 1));
13568}
13569
13570/**
13571 * xmlCtxtReadFd:
13572 * @ctxt: an XML parser context
13573 * @fd: an open file descriptor
13574 * @URL: the base URL to use for the document
13575 * @encoding: the document encoding, or NULL
13576 * @options: a combination of xmlParserOption
13577 *
13578 * parse an XML from a file descriptor and build a tree.
13579 * This reuses the existing @ctxt parser context
13580 * NOTE that the file descriptor will not be closed when the
13581 * reader is closed or reset.
13582 *
13583 * Returns the resulting document tree
13584 */
13585xmlDocPtr
13586xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
13587 const char *URL, const char *encoding, int options)
13588{
13589 xmlParserInputBufferPtr input;
13590 xmlParserInputPtr stream;
13591
13592 if (fd < 0)
13593 return (NULL);
13594 if (ctxt == NULL)
13595 return (NULL);
13596
13597 xmlCtxtReset(ctxt);
13598
13599
13600 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
13601 if (input == NULL)
13602 return (NULL);
13603 input->closecallback = NULL;
13604 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
13605 if (stream == NULL) {
13606 xmlFreeParserInputBuffer(input);
13607 return (NULL);
13608 }
13609 inputPush(ctxt, stream);
13610 return (xmlDoRead(ctxt, URL, encoding, options, 1));
13611}
13612
13613/**
13614 * xmlCtxtReadIO:
13615 * @ctxt: an XML parser context
13616 * @ioread: an I/O read function
13617 * @ioclose: an I/O close function
13618 * @ioctx: an I/O handler
13619 * @URL: the base URL to use for the document
13620 * @encoding: the document encoding, or NULL
13621 * @options: a combination of xmlParserOption
13622 *
13623 * parse an XML document from I/O functions and source and build a tree.
13624 * This reuses the existing @ctxt parser context
13625 *
13626 * Returns the resulting document tree
13627 */
13628xmlDocPtr
13629xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
13630 xmlInputCloseCallback ioclose, void *ioctx,
13631 const char *URL,
13632 const char *encoding, int options)
13633{
13634 xmlParserInputBufferPtr input;
13635 xmlParserInputPtr stream;
13636
13637 if (ioread == NULL)
13638 return (NULL);
13639 if (ctxt == NULL)
13640 return (NULL);
13641
13642 xmlCtxtReset(ctxt);
13643
13644 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
13645 XML_CHAR_ENCODING_NONE);
13646 if (input == NULL)
13647 return (NULL);
13648 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
13649 if (stream == NULL) {
13650 xmlFreeParserInputBuffer(input);
13651 return (NULL);
13652 }
13653 inputPush(ctxt, stream);
13654 return (xmlDoRead(ctxt, URL, encoding, options, 1));
13655}
13656
13657#define bottom_parser
13658#include "elfgcchack.h"
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette