VirtualBox

source: vbox/trunk/src/libs/libxml2-2.6.30/parser.c@ 34034

Last change on this file since 34034 was 22405, checked in by vboxsync, 15 years ago

libxml2: fixes from upstream

  • Property svn:eol-style set to native
  • Property svn:keywords set to Date Revision Author Id
File size: 367.2 KB
Line 
1/*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
15 * high level APIs to call the parser and a few miscellaneous functions.
16 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
25 * from the SAX callbacks or as standalone functions using a preparsed
26 * document.
27 *
28 * See Copyright for the status of this software.
29 *
30 * [email protected]
31 */
32
33#define IN_LIBXML
34#include "libxml.h"
35
36#if defined(WIN32) && !defined (__CYGWIN__)
37#define XML_DIR_SEP '\\'
38#else
39#define XML_DIR_SEP '/'
40#endif
41
42#include <stdlib.h>
43#include <string.h>
44#include <stdarg.h>
45#include <libxml/xmlmemory.h>
46#include <libxml/threads.h>
47#include <libxml/globals.h>
48#include <libxml/tree.h>
49#include <libxml/parser.h>
50#include <libxml/parserInternals.h>
51#include <libxml/valid.h>
52#include <libxml/entities.h>
53#include <libxml/xmlerror.h>
54#include <libxml/encoding.h>
55#include <libxml/xmlIO.h>
56#include <libxml/uri.h>
57#ifdef LIBXML_CATALOG_ENABLED
58#include <libxml/catalog.h>
59#endif
60#ifdef LIBXML_SCHEMAS_ENABLED
61#include <libxml/xmlschemastypes.h>
62#include <libxml/relaxng.h>
63#endif
64#ifdef HAVE_CTYPE_H
65#include <ctype.h>
66#endif
67#ifdef HAVE_STDLIB_H
68#include <stdlib.h>
69#endif
70#ifdef HAVE_SYS_STAT_H
71#include <sys/stat.h>
72#endif
73#ifdef HAVE_FCNTL_H
74#include <fcntl.h>
75#endif
76#ifdef HAVE_UNISTD_H
77#include <unistd.h>
78#endif
79#ifdef HAVE_ZLIB_H
80#include <zlib.h>
81#endif
82
83/**
84 * xmlParserMaxDepth:
85 *
86 * arbitrary depth limit for the XML documents that we allow to
87 * process. This is not a limitation of the parser but a safety
88 * boundary feature.
89 */
90unsigned int xmlParserMaxDepth = 1024;
91
92#define SAX2 1
93
94#define XML_PARSER_BIG_BUFFER_SIZE 300
95#define XML_PARSER_BUFFER_SIZE 100
96
97#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
98
99/*
100 * List of XML prefixed PI allowed by W3C specs
101 */
102
103static const char *xmlW3CPIs[] = {
104 "xml-stylesheet",
105 NULL
106};
107
108
109/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
110xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
111 const xmlChar **str);
112
113static xmlParserErrors
114xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
115 xmlSAXHandlerPtr sax,
116 void *user_data, int depth, const xmlChar *URL,
117 const xmlChar *ID, xmlNodePtr *list);
118
119#ifdef LIBXML_LEGACY_ENABLED
120static void
121xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
122 xmlNodePtr lastNode);
123#endif /* LIBXML_LEGACY_ENABLED */
124
125static xmlParserErrors
126xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
127 const xmlChar *string, void *user_data, xmlNodePtr *lst);
128
129/************************************************************************
130 * *
131 * Some factorized error routines *
132 * *
133 ************************************************************************/
134
135/**
136 * xmlErrAttributeDup:
137 * @ctxt: an XML parser context
138 * @prefix: the attribute prefix
139 * @localname: the attribute localname
140 *
141 * Handle a redefinition of attribute error
142 */
143static void
144xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
145 const xmlChar * localname)
146{
147 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
148 (ctxt->instate == XML_PARSER_EOF))
149 return;
150 if (ctxt != NULL)
151 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
152 if (prefix == NULL)
153 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
154 ctxt->errNo, XML_ERR_FATAL, NULL, 0,
155 (const char *) localname, NULL, NULL, 0, 0,
156 "Attribute %s redefined\n", localname);
157 else
158 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
159 ctxt->errNo, XML_ERR_FATAL, NULL, 0,
160 (const char *) prefix, (const char *) localname,
161 NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
162 localname);
163 if (ctxt != NULL) {
164 ctxt->wellFormed = 0;
165 if (ctxt->recovery == 0)
166 ctxt->disableSAX = 1;
167 }
168}
169
170/**
171 * xmlFatalErr:
172 * @ctxt: an XML parser context
173 * @error: the error number
174 * @extra: extra information string
175 *
176 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
177 */
178static void
179xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
180{
181 const char *errmsg;
182
183 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
184 (ctxt->instate == XML_PARSER_EOF))
185 return;
186 switch (error) {
187 case XML_ERR_INVALID_HEX_CHARREF:
188 errmsg = "CharRef: invalid hexadecimal value\n";
189 break;
190 case XML_ERR_INVALID_DEC_CHARREF:
191 errmsg = "CharRef: invalid decimal value\n";
192 break;
193 case XML_ERR_INVALID_CHARREF:
194 errmsg = "CharRef: invalid value\n";
195 break;
196 case XML_ERR_INTERNAL_ERROR:
197 errmsg = "internal error";
198 break;
199 case XML_ERR_PEREF_AT_EOF:
200 errmsg = "PEReference at end of document\n";
201 break;
202 case XML_ERR_PEREF_IN_PROLOG:
203 errmsg = "PEReference in prolog\n";
204 break;
205 case XML_ERR_PEREF_IN_EPILOG:
206 errmsg = "PEReference in epilog\n";
207 break;
208 case XML_ERR_PEREF_NO_NAME:
209 errmsg = "PEReference: no name\n";
210 break;
211 case XML_ERR_PEREF_SEMICOL_MISSING:
212 errmsg = "PEReference: expecting ';'\n";
213 break;
214 case XML_ERR_ENTITY_LOOP:
215 errmsg = "Detected an entity reference loop\n";
216 break;
217 case XML_ERR_ENTITY_NOT_STARTED:
218 errmsg = "EntityValue: \" or ' expected\n";
219 break;
220 case XML_ERR_ENTITY_PE_INTERNAL:
221 errmsg = "PEReferences forbidden in internal subset\n";
222 break;
223 case XML_ERR_ENTITY_NOT_FINISHED:
224 errmsg = "EntityValue: \" or ' expected\n";
225 break;
226 case XML_ERR_ATTRIBUTE_NOT_STARTED:
227 errmsg = "AttValue: \" or ' expected\n";
228 break;
229 case XML_ERR_LT_IN_ATTRIBUTE:
230 errmsg = "Unescaped '<' not allowed in attributes values\n";
231 break;
232 case XML_ERR_LITERAL_NOT_STARTED:
233 errmsg = "SystemLiteral \" or ' expected\n";
234 break;
235 case XML_ERR_LITERAL_NOT_FINISHED:
236 errmsg = "Unfinished System or Public ID \" or ' expected\n";
237 break;
238 case XML_ERR_MISPLACED_CDATA_END:
239 errmsg = "Sequence ']]>' not allowed in content\n";
240 break;
241 case XML_ERR_URI_REQUIRED:
242 errmsg = "SYSTEM or PUBLIC, the URI is missing\n";
243 break;
244 case XML_ERR_PUBID_REQUIRED:
245 errmsg = "PUBLIC, the Public Identifier is missing\n";
246 break;
247 case XML_ERR_HYPHEN_IN_COMMENT:
248 errmsg = "Comment must not contain '--' (double-hyphen)\n";
249 break;
250 case XML_ERR_PI_NOT_STARTED:
251 errmsg = "xmlParsePI : no target name\n";
252 break;
253 case XML_ERR_RESERVED_XML_NAME:
254 errmsg = "Invalid PI name\n";
255 break;
256 case XML_ERR_NOTATION_NOT_STARTED:
257 errmsg = "NOTATION: Name expected here\n";
258 break;
259 case XML_ERR_NOTATION_NOT_FINISHED:
260 errmsg = "'>' required to close NOTATION declaration\n";
261 break;
262 case XML_ERR_VALUE_REQUIRED:
263 errmsg = "Entity value required\n";
264 break;
265 case XML_ERR_URI_FRAGMENT:
266 errmsg = "Fragment not allowed";
267 break;
268 case XML_ERR_ATTLIST_NOT_STARTED:
269 errmsg = "'(' required to start ATTLIST enumeration\n";
270 break;
271 case XML_ERR_NMTOKEN_REQUIRED:
272 errmsg = "NmToken expected in ATTLIST enumeration\n";
273 break;
274 case XML_ERR_ATTLIST_NOT_FINISHED:
275 errmsg = "')' required to finish ATTLIST enumeration\n";
276 break;
277 case XML_ERR_MIXED_NOT_STARTED:
278 errmsg = "MixedContentDecl : '|' or ')*' expected\n";
279 break;
280 case XML_ERR_PCDATA_REQUIRED:
281 errmsg = "MixedContentDecl : '#PCDATA' expected\n";
282 break;
283 case XML_ERR_ELEMCONTENT_NOT_STARTED:
284 errmsg = "ContentDecl : Name or '(' expected\n";
285 break;
286 case XML_ERR_ELEMCONTENT_NOT_FINISHED:
287 errmsg = "ContentDecl : ',' '|' or ')' expected\n";
288 break;
289 case XML_ERR_PEREF_IN_INT_SUBSET:
290 errmsg =
291 "PEReference: forbidden within markup decl in internal subset\n";
292 break;
293 case XML_ERR_GT_REQUIRED:
294 errmsg = "expected '>'\n";
295 break;
296 case XML_ERR_CONDSEC_INVALID:
297 errmsg = "XML conditional section '[' expected\n";
298 break;
299 case XML_ERR_EXT_SUBSET_NOT_FINISHED:
300 errmsg = "Content error in the external subset\n";
301 break;
302 case XML_ERR_CONDSEC_INVALID_KEYWORD:
303 errmsg =
304 "conditional section INCLUDE or IGNORE keyword expected\n";
305 break;
306 case XML_ERR_CONDSEC_NOT_FINISHED:
307 errmsg = "XML conditional section not closed\n";
308 break;
309 case XML_ERR_XMLDECL_NOT_STARTED:
310 errmsg = "Text declaration '<?xml' required\n";
311 break;
312 case XML_ERR_XMLDECL_NOT_FINISHED:
313 errmsg = "parsing XML declaration: '?>' expected\n";
314 break;
315 case XML_ERR_EXT_ENTITY_STANDALONE:
316 errmsg = "external parsed entities cannot be standalone\n";
317 break;
318 case XML_ERR_ENTITYREF_SEMICOL_MISSING:
319 errmsg = "EntityRef: expecting ';'\n";
320 break;
321 case XML_ERR_DOCTYPE_NOT_FINISHED:
322 errmsg = "DOCTYPE improperly terminated\n";
323 break;
324 case XML_ERR_LTSLASH_REQUIRED:
325 errmsg = "EndTag: '</' not found\n";
326 break;
327 case XML_ERR_EQUAL_REQUIRED:
328 errmsg = "expected '='\n";
329 break;
330 case XML_ERR_STRING_NOT_CLOSED:
331 errmsg = "String not closed expecting \" or '\n";
332 break;
333 case XML_ERR_STRING_NOT_STARTED:
334 errmsg = "String not started expecting ' or \"\n";
335 break;
336 case XML_ERR_ENCODING_NAME:
337 errmsg = "Invalid XML encoding name\n";
338 break;
339 case XML_ERR_STANDALONE_VALUE:
340 errmsg = "standalone accepts only 'yes' or 'no'\n";
341 break;
342 case XML_ERR_DOCUMENT_EMPTY:
343 errmsg = "Document is empty\n";
344 break;
345 case XML_ERR_DOCUMENT_END:
346 errmsg = "Extra content at the end of the document\n";
347 break;
348 case XML_ERR_NOT_WELL_BALANCED:
349 errmsg = "chunk is not well balanced\n";
350 break;
351 case XML_ERR_EXTRA_CONTENT:
352 errmsg = "extra content at the end of well balanced chunk\n";
353 break;
354 case XML_ERR_VERSION_MISSING:
355 errmsg = "Malformed declaration expecting version\n";
356 break;
357#if 0
358 case:
359 errmsg = "\n";
360 break;
361#endif
362 default:
363 errmsg = "Unregistered error message\n";
364 }
365 if (ctxt != NULL)
366 ctxt->errNo = error;
367 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
368 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, errmsg,
369 info);
370 if (ctxt != NULL) {
371 ctxt->wellFormed = 0;
372 if (ctxt->recovery == 0)
373 ctxt->disableSAX = 1;
374 }
375}
376
377/**
378 * xmlFatalErrMsg:
379 * @ctxt: an XML parser context
380 * @error: the error number
381 * @msg: the error message
382 *
383 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
384 */
385static void
386xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
387 const char *msg)
388{
389 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
390 (ctxt->instate == XML_PARSER_EOF))
391 return;
392 if (ctxt != NULL)
393 ctxt->errNo = error;
394 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
395 XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, msg);
396 if (ctxt != NULL) {
397 ctxt->wellFormed = 0;
398 if (ctxt->recovery == 0)
399 ctxt->disableSAX = 1;
400 }
401}
402
403/**
404 * xmlWarningMsg:
405 * @ctxt: an XML parser context
406 * @error: the error number
407 * @msg: the error message
408 * @str1: extra data
409 * @str2: extra data
410 *
411 * Handle a warning.
412 */
413static void
414xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
415 const char *msg, const xmlChar *str1, const xmlChar *str2)
416{
417 xmlStructuredErrorFunc schannel = NULL;
418
419 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
420 (ctxt->instate == XML_PARSER_EOF))
421 return;
422 if ((ctxt != NULL) && (ctxt->sax != NULL) &&
423 (ctxt->sax->initialized == XML_SAX2_MAGIC))
424 schannel = ctxt->sax->serror;
425 __xmlRaiseError(schannel,
426 (ctxt->sax) ? ctxt->sax->warning : NULL,
427 ctxt->userData,
428 ctxt, NULL, XML_FROM_PARSER, error,
429 XML_ERR_WARNING, NULL, 0,
430 (const char *) str1, (const char *) str2, NULL, 0, 0,
431 msg, (const char *) str1, (const char *) str2);
432}
433
434/**
435 * xmlValidityError:
436 * @ctxt: an XML parser context
437 * @error: the error number
438 * @msg: the error message
439 * @str1: extra data
440 *
441 * Handle a validity error.
442 */
443static void
444xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
445 const char *msg, const xmlChar *str1)
446{
447 xmlStructuredErrorFunc schannel = NULL;
448
449 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
450 (ctxt->instate == XML_PARSER_EOF))
451 return;
452 if (ctxt != NULL) {
453 ctxt->errNo = error;
454 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
455 schannel = ctxt->sax->serror;
456 }
457 __xmlRaiseError(schannel,
458 ctxt->vctxt.error, ctxt->vctxt.userData,
459 ctxt, NULL, XML_FROM_DTD, error,
460 XML_ERR_ERROR, NULL, 0, (const char *) str1,
461 NULL, NULL, 0, 0,
462 msg, (const char *) str1);
463 if (ctxt != NULL) {
464 ctxt->valid = 0;
465 }
466}
467
468/**
469 * xmlFatalErrMsgInt:
470 * @ctxt: an XML parser context
471 * @error: the error number
472 * @msg: the error message
473 * @val: an integer value
474 *
475 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
476 */
477static void
478xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
479 const char *msg, int val)
480{
481 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
482 (ctxt->instate == XML_PARSER_EOF))
483 return;
484 if (ctxt != NULL)
485 ctxt->errNo = error;
486 __xmlRaiseError(NULL, NULL, NULL,
487 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
488 NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
489 if (ctxt != NULL) {
490 ctxt->wellFormed = 0;
491 if (ctxt->recovery == 0)
492 ctxt->disableSAX = 1;
493 }
494}
495
496/**
497 * xmlFatalErrMsgStrIntStr:
498 * @ctxt: an XML parser context
499 * @error: the error number
500 * @msg: the error message
501 * @str1: an string info
502 * @val: an integer value
503 * @str2: an string info
504 *
505 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
506 */
507static void
508xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
509 const char *msg, const xmlChar *str1, int val,
510 const xmlChar *str2)
511{
512 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
513 (ctxt->instate == XML_PARSER_EOF))
514 return;
515 if (ctxt != NULL)
516 ctxt->errNo = error;
517 __xmlRaiseError(NULL, NULL, NULL,
518 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
519 NULL, 0, (const char *) str1, (const char *) str2,
520 NULL, val, 0, msg, str1, val, str2);
521 if (ctxt != NULL) {
522 ctxt->wellFormed = 0;
523 if (ctxt->recovery == 0)
524 ctxt->disableSAX = 1;
525 }
526}
527
528/**
529 * xmlFatalErrMsgStr:
530 * @ctxt: an XML parser context
531 * @error: the error number
532 * @msg: the error message
533 * @val: a string value
534 *
535 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
536 */
537static void
538xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
539 const char *msg, const xmlChar * val)
540{
541 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
542 (ctxt->instate == XML_PARSER_EOF))
543 return;
544 if (ctxt != NULL)
545 ctxt->errNo = error;
546 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
547 XML_FROM_PARSER, error, XML_ERR_FATAL,
548 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
549 val);
550 if (ctxt != NULL) {
551 ctxt->wellFormed = 0;
552 if (ctxt->recovery == 0)
553 ctxt->disableSAX = 1;
554 }
555}
556
557/**
558 * xmlErrMsgStr:
559 * @ctxt: an XML parser context
560 * @error: the error number
561 * @msg: the error message
562 * @val: a string value
563 *
564 * Handle a non fatal parser error
565 */
566static void
567xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
568 const char *msg, const xmlChar * val)
569{
570 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
571 (ctxt->instate == XML_PARSER_EOF))
572 return;
573 if (ctxt != NULL)
574 ctxt->errNo = error;
575 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
576 XML_FROM_PARSER, error, XML_ERR_ERROR,
577 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
578 val);
579}
580
581/**
582 * xmlNsErr:
583 * @ctxt: an XML parser context
584 * @error: the error number
585 * @msg: the message
586 * @info1: extra information string
587 * @info2: extra information string
588 *
589 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
590 */
591static void
592xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
593 const char *msg,
594 const xmlChar * info1, const xmlChar * info2,
595 const xmlChar * info3)
596{
597 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
598 (ctxt->instate == XML_PARSER_EOF))
599 return;
600 if (ctxt != NULL)
601 ctxt->errNo = error;
602 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
603 XML_ERR_ERROR, NULL, 0, (const char *) info1,
604 (const char *) info2, (const char *) info3, 0, 0, msg,
605 info1, info2, info3);
606 if (ctxt != NULL)
607 ctxt->nsWellFormed = 0;
608}
609
610/************************************************************************
611 * *
612 * Library wide options *
613 * *
614 ************************************************************************/
615
616/**
617 * xmlHasFeature:
618 * @feature: the feature to be examined
619 *
620 * Examines if the library has been compiled with a given feature.
621 *
622 * Returns a non-zero value if the feature exist, otherwise zero.
623 * Returns zero (0) if the feature does not exist or an unknown
624 * unknown feature is requested, non-zero otherwise.
625 */
626int
627xmlHasFeature(xmlFeature feature)
628{
629 switch (feature) {
630 case XML_WITH_THREAD:
631#ifdef LIBXML_THREAD_ENABLED
632 return(1);
633#else
634 return(0);
635#endif
636 case XML_WITH_TREE:
637#ifdef LIBXML_TREE_ENABLED
638 return(1);
639#else
640 return(0);
641#endif
642 case XML_WITH_OUTPUT:
643#ifdef LIBXML_OUTPUT_ENABLED
644 return(1);
645#else
646 return(0);
647#endif
648 case XML_WITH_PUSH:
649#ifdef LIBXML_PUSH_ENABLED
650 return(1);
651#else
652 return(0);
653#endif
654 case XML_WITH_READER:
655#ifdef LIBXML_READER_ENABLED
656 return(1);
657#else
658 return(0);
659#endif
660 case XML_WITH_PATTERN:
661#ifdef LIBXML_PATTERN_ENABLED
662 return(1);
663#else
664 return(0);
665#endif
666 case XML_WITH_WRITER:
667#ifdef LIBXML_WRITER_ENABLED
668 return(1);
669#else
670 return(0);
671#endif
672 case XML_WITH_SAX1:
673#ifdef LIBXML_SAX1_ENABLED
674 return(1);
675#else
676 return(0);
677#endif
678 case XML_WITH_FTP:
679#ifdef LIBXML_FTP_ENABLED
680 return(1);
681#else
682 return(0);
683#endif
684 case XML_WITH_HTTP:
685#ifdef LIBXML_HTTP_ENABLED
686 return(1);
687#else
688 return(0);
689#endif
690 case XML_WITH_VALID:
691#ifdef LIBXML_VALID_ENABLED
692 return(1);
693#else
694 return(0);
695#endif
696 case XML_WITH_HTML:
697#ifdef LIBXML_HTML_ENABLED
698 return(1);
699#else
700 return(0);
701#endif
702 case XML_WITH_LEGACY:
703#ifdef LIBXML_LEGACY_ENABLED
704 return(1);
705#else
706 return(0);
707#endif
708 case XML_WITH_C14N:
709#ifdef LIBXML_C14N_ENABLED
710 return(1);
711#else
712 return(0);
713#endif
714 case XML_WITH_CATALOG:
715#ifdef LIBXML_CATALOG_ENABLED
716 return(1);
717#else
718 return(0);
719#endif
720 case XML_WITH_XPATH:
721#ifdef LIBXML_XPATH_ENABLED
722 return(1);
723#else
724 return(0);
725#endif
726 case XML_WITH_XPTR:
727#ifdef LIBXML_XPTR_ENABLED
728 return(1);
729#else
730 return(0);
731#endif
732 case XML_WITH_XINCLUDE:
733#ifdef LIBXML_XINCLUDE_ENABLED
734 return(1);
735#else
736 return(0);
737#endif
738 case XML_WITH_ICONV:
739#ifdef LIBXML_ICONV_ENABLED
740 return(1);
741#else
742 return(0);
743#endif
744 case XML_WITH_ISO8859X:
745#ifdef LIBXML_ISO8859X_ENABLED
746 return(1);
747#else
748 return(0);
749#endif
750 case XML_WITH_UNICODE:
751#ifdef LIBXML_UNICODE_ENABLED
752 return(1);
753#else
754 return(0);
755#endif
756 case XML_WITH_REGEXP:
757#ifdef LIBXML_REGEXP_ENABLED
758 return(1);
759#else
760 return(0);
761#endif
762 case XML_WITH_AUTOMATA:
763#ifdef LIBXML_AUTOMATA_ENABLED
764 return(1);
765#else
766 return(0);
767#endif
768 case XML_WITH_EXPR:
769#ifdef LIBXML_EXPR_ENABLED
770 return(1);
771#else
772 return(0);
773#endif
774 case XML_WITH_SCHEMAS:
775#ifdef LIBXML_SCHEMAS_ENABLED
776 return(1);
777#else
778 return(0);
779#endif
780 case XML_WITH_SCHEMATRON:
781#ifdef LIBXML_SCHEMATRON_ENABLED
782 return(1);
783#else
784 return(0);
785#endif
786 case XML_WITH_MODULES:
787#ifdef LIBXML_MODULES_ENABLED
788 return(1);
789#else
790 return(0);
791#endif
792 case XML_WITH_DEBUG:
793#ifdef LIBXML_DEBUG_ENABLED
794 return(1);
795#else
796 return(0);
797#endif
798 case XML_WITH_DEBUG_MEM:
799#ifdef DEBUG_MEMORY_LOCATION
800 return(1);
801#else
802 return(0);
803#endif
804 case XML_WITH_DEBUG_RUN:
805#ifdef LIBXML_DEBUG_RUNTIME
806 return(1);
807#else
808 return(0);
809#endif
810 case XML_WITH_ZLIB:
811#ifdef LIBXML_ZLIB_ENABLED
812 return(1);
813#else
814 return(0);
815#endif
816 default:
817 break;
818 }
819 return(0);
820}
821
822/************************************************************************
823 * *
824 * SAX2 defaulted attributes handling *
825 * *
826 ************************************************************************/
827
828/**
829 * xmlDetectSAX2:
830 * @ctxt: an XML parser context
831 *
832 * Do the SAX2 detection and specific intialization
833 */
834static void
835xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
836 if (ctxt == NULL) return;
837#ifdef LIBXML_SAX1_ENABLED
838 if ((ctxt->sax) && (ctxt->sax->initialized == XML_SAX2_MAGIC) &&
839 ((ctxt->sax->startElementNs != NULL) ||
840 (ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1;
841#else
842 ctxt->sax2 = 1;
843#endif /* LIBXML_SAX1_ENABLED */
844
845 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
846 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
847 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
848 if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
849 (ctxt->str_xml_ns == NULL)) {
850 xmlErrMemory(ctxt, NULL);
851 }
852}
853
854typedef struct _xmlDefAttrs xmlDefAttrs;
855typedef xmlDefAttrs *xmlDefAttrsPtr;
856struct _xmlDefAttrs {
857 int nbAttrs; /* number of defaulted attributes on that element */
858 int maxAttrs; /* the size of the array */
859 const xmlChar *values[4]; /* array of localname/prefix/values */
860};
861
862/**
863 * xmlAddDefAttrs:
864 * @ctxt: an XML parser context
865 * @fullname: the element fullname
866 * @fullattr: the attribute fullname
867 * @value: the attribute value
868 *
869 * Add a defaulted attribute for an element
870 */
871static void
872xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
873 const xmlChar *fullname,
874 const xmlChar *fullattr,
875 const xmlChar *value) {
876 xmlDefAttrsPtr defaults;
877 int len;
878 const xmlChar *name;
879 const xmlChar *prefix;
880
881 if (ctxt->attsDefault == NULL) {
882 ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
883 if (ctxt->attsDefault == NULL)
884 goto mem_error;
885 }
886
887 /*
888 * split the element name into prefix:localname , the string found
889 * are within the DTD and then not associated to namespace names.
890 */
891 name = xmlSplitQName3(fullname, &len);
892 if (name == NULL) {
893 name = xmlDictLookup(ctxt->dict, fullname, -1);
894 prefix = NULL;
895 } else {
896 name = xmlDictLookup(ctxt->dict, name, -1);
897 prefix = xmlDictLookup(ctxt->dict, fullname, len);
898 }
899
900 /*
901 * make sure there is some storage
902 */
903 defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
904 if (defaults == NULL) {
905 defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
906 (4 * 4) * sizeof(const xmlChar *));
907 if (defaults == NULL)
908 goto mem_error;
909 defaults->nbAttrs = 0;
910 defaults->maxAttrs = 4;
911 xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, defaults, NULL);
912 } else if (defaults->nbAttrs >= defaults->maxAttrs) {
913 xmlDefAttrsPtr temp;
914
915 temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
916 (2 * defaults->maxAttrs * 4) * sizeof(const xmlChar *));
917 if (temp == NULL)
918 goto mem_error;
919 defaults = temp;
920 defaults->maxAttrs *= 2;
921 xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, defaults, NULL);
922 }
923
924 /*
925 * Split the element name into prefix:localname , the string found
926 * are within the DTD and hen not associated to namespace names.
927 */
928 name = xmlSplitQName3(fullattr, &len);
929 if (name == NULL) {
930 name = xmlDictLookup(ctxt->dict, fullattr, -1);
931 prefix = NULL;
932 } else {
933 name = xmlDictLookup(ctxt->dict, name, -1);
934 prefix = xmlDictLookup(ctxt->dict, fullattr, len);
935 }
936
937 defaults->values[4 * defaults->nbAttrs] = name;
938 defaults->values[4 * defaults->nbAttrs + 1] = prefix;
939 /* intern the string and precompute the end */
940 len = xmlStrlen(value);
941 value = xmlDictLookup(ctxt->dict, value, len);
942 defaults->values[4 * defaults->nbAttrs + 2] = value;
943 defaults->values[4 * defaults->nbAttrs + 3] = value + len;
944 defaults->nbAttrs++;
945
946 return;
947
948mem_error:
949 xmlErrMemory(ctxt, NULL);
950 return;
951}
952
953/**
954 * xmlAddSpecialAttr:
955 * @ctxt: an XML parser context
956 * @fullname: the element fullname
957 * @fullattr: the attribute fullname
958 * @type: the attribute type
959 *
960 * Register that this attribute is not CDATA
961 */
962static void
963xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
964 const xmlChar *fullname,
965 const xmlChar *fullattr,
966 int type)
967{
968 if (ctxt->attsSpecial == NULL) {
969 ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
970 if (ctxt->attsSpecial == NULL)
971 goto mem_error;
972 }
973
974 xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
975 (void *) (long) type);
976 return;
977
978mem_error:
979 xmlErrMemory(ctxt, NULL);
980 return;
981}
982
983/**
984 * xmlCheckLanguageID:
985 * @lang: pointer to the string value
986 *
987 * Checks that the value conforms to the LanguageID production:
988 *
989 * NOTE: this is somewhat deprecated, those productions were removed from
990 * the XML Second edition.
991 *
992 * [33] LanguageID ::= Langcode ('-' Subcode)*
993 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
994 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
995 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
996 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
997 * [38] Subcode ::= ([a-z] | [A-Z])+
998 *
999 * Returns 1 if correct 0 otherwise
1000 **/
1001int
1002xmlCheckLanguageID(const xmlChar * lang)
1003{
1004 const xmlChar *cur = lang;
1005
1006 if (cur == NULL)
1007 return (0);
1008 if (((cur[0] == 'i') && (cur[1] == '-')) ||
1009 ((cur[0] == 'I') && (cur[1] == '-'))) {
1010 /*
1011 * IANA code
1012 */
1013 cur += 2;
1014 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
1015 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1016 cur++;
1017 } else if (((cur[0] == 'x') && (cur[1] == '-')) ||
1018 ((cur[0] == 'X') && (cur[1] == '-'))) {
1019 /*
1020 * User code
1021 */
1022 cur += 2;
1023 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
1024 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1025 cur++;
1026 } else if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1027 ((cur[0] >= 'a') && (cur[0] <= 'z'))) {
1028 /*
1029 * ISO639
1030 */
1031 cur++;
1032 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1033 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1034 cur++;
1035 else
1036 return (0);
1037 } else
1038 return (0);
1039 while (cur[0] != 0) { /* non input consuming */
1040 if (cur[0] != '-')
1041 return (0);
1042 cur++;
1043 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1044 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1045 cur++;
1046 else
1047 return (0);
1048 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
1049 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1050 cur++;
1051 }
1052 return (1);
1053}
1054
1055/************************************************************************
1056 * *
1057 * Parser stacks related functions and macros *
1058 * *
1059 ************************************************************************/
1060
1061xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
1062 const xmlChar ** str);
1063
1064#ifdef SAX2
1065/**
1066 * nsPush:
1067 * @ctxt: an XML parser context
1068 * @prefix: the namespace prefix or NULL
1069 * @URL: the namespace name
1070 *
1071 * Pushes a new parser namespace on top of the ns stack
1072 *
1073 * Returns -1 in case of error, -2 if the namespace should be discarded
1074 * and the index in the stack otherwise.
1075 */
1076static int
1077nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
1078{
1079 if (ctxt->options & XML_PARSE_NSCLEAN) {
1080 int i;
1081 for (i = 0;i < ctxt->nsNr;i += 2) {
1082 if (ctxt->nsTab[i] == prefix) {
1083 /* in scope */
1084 if (ctxt->nsTab[i + 1] == URL)
1085 return(-2);
1086 /* out of scope keep it */
1087 break;
1088 }
1089 }
1090 }
1091 if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
1092 ctxt->nsMax = 10;
1093 ctxt->nsNr = 0;
1094 ctxt->nsTab = (const xmlChar **)
1095 xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
1096 if (ctxt->nsTab == NULL) {
1097 xmlErrMemory(ctxt, NULL);
1098 ctxt->nsMax = 0;
1099 return (-1);
1100 }
1101 } else if (ctxt->nsNr >= ctxt->nsMax) {
1102 ctxt->nsMax *= 2;
1103 ctxt->nsTab = (const xmlChar **)
1104 xmlRealloc((char *) ctxt->nsTab,
1105 ctxt->nsMax * sizeof(ctxt->nsTab[0]));
1106 if (ctxt->nsTab == NULL) {
1107 xmlErrMemory(ctxt, NULL);
1108 ctxt->nsMax /= 2;
1109 return (-1);
1110 }
1111 }
1112 ctxt->nsTab[ctxt->nsNr++] = prefix;
1113 ctxt->nsTab[ctxt->nsNr++] = URL;
1114 return (ctxt->nsNr);
1115}
1116/**
1117 * nsPop:
1118 * @ctxt: an XML parser context
1119 * @nr: the number to pop
1120 *
1121 * Pops the top @nr parser prefix/namespace from the ns stack
1122 *
1123 * Returns the number of namespaces removed
1124 */
1125static int
1126nsPop(xmlParserCtxtPtr ctxt, int nr)
1127{
1128 int i;
1129
1130 if (ctxt->nsTab == NULL) return(0);
1131 if (ctxt->nsNr < nr) {
1132 xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
1133 nr = ctxt->nsNr;
1134 }
1135 if (ctxt->nsNr <= 0)
1136 return (0);
1137
1138 for (i = 0;i < nr;i++) {
1139 ctxt->nsNr--;
1140 ctxt->nsTab[ctxt->nsNr] = NULL;
1141 }
1142 return(nr);
1143}
1144#endif
1145
1146static int
1147xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1148 const xmlChar **atts;
1149 int *attallocs;
1150 int maxatts;
1151
1152 if (ctxt->atts == NULL) {
1153 maxatts = 55; /* allow for 10 attrs by default */
1154 atts = (const xmlChar **)
1155 xmlMalloc(maxatts * sizeof(xmlChar *));
1156 if (atts == NULL) goto mem_error;
1157 ctxt->atts = atts;
1158 attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
1159 if (attallocs == NULL) goto mem_error;
1160 ctxt->attallocs = attallocs;
1161 ctxt->maxatts = maxatts;
1162 } else if (nr + 5 > ctxt->maxatts) {
1163 maxatts = (nr + 5) * 2;
1164 atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
1165 maxatts * sizeof(const xmlChar *));
1166 if (atts == NULL) goto mem_error;
1167 ctxt->atts = atts;
1168 attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
1169 (maxatts / 5) * sizeof(int));
1170 if (attallocs == NULL) goto mem_error;
1171 ctxt->attallocs = attallocs;
1172 ctxt->maxatts = maxatts;
1173 }
1174 return(ctxt->maxatts);
1175mem_error:
1176 xmlErrMemory(ctxt, NULL);
1177 return(-1);
1178}
1179
1180/**
1181 * inputPush:
1182 * @ctxt: an XML parser context
1183 * @value: the parser input
1184 *
1185 * Pushes a new parser input on top of the input stack
1186 *
1187 * Returns 0 in case of error, the index in the stack otherwise
1188 */
1189int
1190inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1191{
1192 if ((ctxt == NULL) || (value == NULL))
1193 return(0);
1194 if (ctxt->inputNr >= ctxt->inputMax) {
1195 ctxt->inputMax *= 2;
1196 ctxt->inputTab =
1197 (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1198 ctxt->inputMax *
1199 sizeof(ctxt->inputTab[0]));
1200 if (ctxt->inputTab == NULL) {
1201 xmlErrMemory(ctxt, NULL);
1202 return (0);
1203 }
1204 }
1205 ctxt->inputTab[ctxt->inputNr] = value;
1206 ctxt->input = value;
1207 return (ctxt->inputNr++);
1208}
1209/**
1210 * inputPop:
1211 * @ctxt: an XML parser context
1212 *
1213 * Pops the top parser input from the input stack
1214 *
1215 * Returns the input just removed
1216 */
1217xmlParserInputPtr
1218inputPop(xmlParserCtxtPtr ctxt)
1219{
1220 xmlParserInputPtr ret;
1221
1222 if (ctxt == NULL)
1223 return(NULL);
1224 if (ctxt->inputNr <= 0)
1225 return (NULL);
1226 ctxt->inputNr--;
1227 if (ctxt->inputNr > 0)
1228 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1229 else
1230 ctxt->input = NULL;
1231 ret = ctxt->inputTab[ctxt->inputNr];
1232 ctxt->inputTab[ctxt->inputNr] = NULL;
1233 return (ret);
1234}
1235/**
1236 * nodePush:
1237 * @ctxt: an XML parser context
1238 * @value: the element node
1239 *
1240 * Pushes a new element node on top of the node stack
1241 *
1242 * Returns 0 in case of error, the index in the stack otherwise
1243 */
1244int
1245nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1246{
1247 if (ctxt == NULL) return(0);
1248 if (ctxt->nodeNr >= ctxt->nodeMax) {
1249 xmlNodePtr *tmp;
1250
1251 tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1252 ctxt->nodeMax * 2 *
1253 sizeof(ctxt->nodeTab[0]));
1254 if (tmp == NULL) {
1255 xmlErrMemory(ctxt, NULL);
1256 return (0);
1257 }
1258 ctxt->nodeTab = tmp;
1259 ctxt->nodeMax *= 2;
1260 }
1261 if (((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) {
1262 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
1263 "Excessive depth in document: change xmlParserMaxDepth = %d\n",
1264 xmlParserMaxDepth);
1265 ctxt->instate = XML_PARSER_EOF;
1266 return(0);
1267 }
1268 ctxt->nodeTab[ctxt->nodeNr] = value;
1269 ctxt->node = value;
1270 return (ctxt->nodeNr++);
1271}
1272/**
1273 * nodePop:
1274 * @ctxt: an XML parser context
1275 *
1276 * Pops the top element node from the node stack
1277 *
1278 * Returns the node just removed
1279 */
1280xmlNodePtr
1281nodePop(xmlParserCtxtPtr ctxt)
1282{
1283 xmlNodePtr ret;
1284
1285 if (ctxt == NULL) return(NULL);
1286 if (ctxt->nodeNr <= 0)
1287 return (NULL);
1288 ctxt->nodeNr--;
1289 if (ctxt->nodeNr > 0)
1290 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1291 else
1292 ctxt->node = NULL;
1293 ret = ctxt->nodeTab[ctxt->nodeNr];
1294 ctxt->nodeTab[ctxt->nodeNr] = NULL;
1295 return (ret);
1296}
1297
1298#ifdef LIBXML_PUSH_ENABLED
1299/**
1300 * nameNsPush:
1301 * @ctxt: an XML parser context
1302 * @value: the element name
1303 * @prefix: the element prefix
1304 * @URI: the element namespace name
1305 *
1306 * Pushes a new element name/prefix/URL on top of the name stack
1307 *
1308 * Returns -1 in case of error, the index in the stack otherwise
1309 */
1310static int
1311nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1312 const xmlChar *prefix, const xmlChar *URI, int nsNr)
1313{
1314 if (ctxt->nameNr >= ctxt->nameMax) {
1315 const xmlChar * *tmp;
1316 void **tmp2;
1317 ctxt->nameMax *= 2;
1318 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1319 ctxt->nameMax *
1320 sizeof(ctxt->nameTab[0]));
1321 if (tmp == NULL) {
1322 ctxt->nameMax /= 2;
1323 goto mem_error;
1324 }
1325 ctxt->nameTab = tmp;
1326 tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab,
1327 ctxt->nameMax * 3 *
1328 sizeof(ctxt->pushTab[0]));
1329 if (tmp2 == NULL) {
1330 ctxt->nameMax /= 2;
1331 goto mem_error;
1332 }
1333 ctxt->pushTab = tmp2;
1334 }
1335 ctxt->nameTab[ctxt->nameNr] = value;
1336 ctxt->name = value;
1337 ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix;
1338 ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI;
1339 ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (long) nsNr;
1340 return (ctxt->nameNr++);
1341mem_error:
1342 xmlErrMemory(ctxt, NULL);
1343 return (-1);
1344}
1345/**
1346 * nameNsPop:
1347 * @ctxt: an XML parser context
1348 *
1349 * Pops the top element/prefix/URI name from the name stack
1350 *
1351 * Returns the name just removed
1352 */
1353static const xmlChar *
1354nameNsPop(xmlParserCtxtPtr ctxt)
1355{
1356 const xmlChar *ret;
1357
1358 if (ctxt->nameNr <= 0)
1359 return (NULL);
1360 ctxt->nameNr--;
1361 if (ctxt->nameNr > 0)
1362 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1363 else
1364 ctxt->name = NULL;
1365 ret = ctxt->nameTab[ctxt->nameNr];
1366 ctxt->nameTab[ctxt->nameNr] = NULL;
1367 return (ret);
1368}
1369#endif /* LIBXML_PUSH_ENABLED */
1370
1371/**
1372 * namePush:
1373 * @ctxt: an XML parser context
1374 * @value: the element name
1375 *
1376 * Pushes a new element name on top of the name stack
1377 *
1378 * Returns -1 in case of error, the index in the stack otherwise
1379 */
1380int
1381namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
1382{
1383 if (ctxt == NULL) return (-1);
1384
1385 if (ctxt->nameNr >= ctxt->nameMax) {
1386 const xmlChar * *tmp;
1387 ctxt->nameMax *= 2;
1388 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1389 ctxt->nameMax *
1390 sizeof(ctxt->nameTab[0]));
1391 if (tmp == NULL) {
1392 ctxt->nameMax /= 2;
1393 goto mem_error;
1394 }
1395 ctxt->nameTab = tmp;
1396 }
1397 ctxt->nameTab[ctxt->nameNr] = value;
1398 ctxt->name = value;
1399 return (ctxt->nameNr++);
1400mem_error:
1401 xmlErrMemory(ctxt, NULL);
1402 return (-1);
1403}
1404/**
1405 * namePop:
1406 * @ctxt: an XML parser context
1407 *
1408 * Pops the top element name from the name stack
1409 *
1410 * Returns the name just removed
1411 */
1412const xmlChar *
1413namePop(xmlParserCtxtPtr ctxt)
1414{
1415 const xmlChar *ret;
1416
1417 if ((ctxt == NULL) || (ctxt->nameNr <= 0))
1418 return (NULL);
1419 ctxt->nameNr--;
1420 if (ctxt->nameNr > 0)
1421 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1422 else
1423 ctxt->name = NULL;
1424 ret = ctxt->nameTab[ctxt->nameNr];
1425 ctxt->nameTab[ctxt->nameNr] = NULL;
1426 return (ret);
1427}
1428
1429static int spacePush(xmlParserCtxtPtr ctxt, int val) {
1430 if (ctxt->spaceNr >= ctxt->spaceMax) {
1431 ctxt->spaceMax *= 2;
1432 ctxt->spaceTab = (int *) xmlRealloc(ctxt->spaceTab,
1433 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
1434 if (ctxt->spaceTab == NULL) {
1435 xmlErrMemory(ctxt, NULL);
1436 return(0);
1437 }
1438 }
1439 ctxt->spaceTab[ctxt->spaceNr] = val;
1440 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
1441 return(ctxt->spaceNr++);
1442}
1443
1444static int spacePop(xmlParserCtxtPtr ctxt) {
1445 int ret;
1446 if (ctxt->spaceNr <= 0) return(0);
1447 ctxt->spaceNr--;
1448 if (ctxt->spaceNr > 0)
1449 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
1450 else
1451 ctxt->space = &ctxt->spaceTab[0];
1452 ret = ctxt->spaceTab[ctxt->spaceNr];
1453 ctxt->spaceTab[ctxt->spaceNr] = -1;
1454 return(ret);
1455}
1456
1457/*
1458 * Macros for accessing the content. Those should be used only by the parser,
1459 * and not exported.
1460 *
1461 * Dirty macros, i.e. one often need to make assumption on the context to
1462 * use them
1463 *
1464 * CUR_PTR return the current pointer to the xmlChar to be parsed.
1465 * To be used with extreme caution since operations consuming
1466 * characters may move the input buffer to a different location !
1467 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
1468 * This should be used internally by the parser
1469 * only to compare to ASCII values otherwise it would break when
1470 * running with UTF-8 encoding.
1471 * RAW same as CUR but in the input buffer, bypass any token
1472 * extraction that may have been done
1473 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
1474 * to compare on ASCII based substring.
1475 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
1476 * strings without newlines within the parser.
1477 * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
1478 * defined char within the parser.
1479 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
1480 *
1481 * NEXT Skip to the next character, this does the proper decoding
1482 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
1483 * NEXTL(l) Skip the current unicode character of l xmlChars long.
1484 * CUR_CHAR(l) returns the current unicode character (int), set l
1485 * to the number of xmlChars used for the encoding [0-5].
1486 * CUR_SCHAR same but operate on a string instead of the context
1487 * COPY_BUF copy the current unicode char to the target buffer, increment
1488 * the index
1489 * GROW, SHRINK handling of input buffers
1490 */
1491
1492#define RAW (*ctxt->input->cur)
1493#define CUR (*ctxt->input->cur)
1494#define NXT(val) ctxt->input->cur[(val)]
1495#define CUR_PTR ctxt->input->cur
1496
1497#define CMP4( s, c1, c2, c3, c4 ) \
1498 ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
1499 ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
1500#define CMP5( s, c1, c2, c3, c4, c5 ) \
1501 ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
1502#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
1503 ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
1504#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
1505 ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
1506#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
1507 ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
1508#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
1509 ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
1510 ((unsigned char *) s)[ 8 ] == c9 )
1511#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
1512 ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
1513 ((unsigned char *) s)[ 9 ] == c10 )
1514
1515#define SKIP(val) do { \
1516 ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \
1517 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
1518 if ((*ctxt->input->cur == 0) && \
1519 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1520 xmlPopInput(ctxt); \
1521 } while (0)
1522
1523#define SKIPL(val) do { \
1524 int skipl; \
1525 for(skipl=0; skipl<val; skipl++) { \
1526 if (*(ctxt->input->cur) == '\n') { \
1527 ctxt->input->line++; ctxt->input->col = 1; \
1528 } else ctxt->input->col++; \
1529 ctxt->nbChars++; \
1530 ctxt->input->cur++; \
1531 } \
1532 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
1533 if ((*ctxt->input->cur == 0) && \
1534 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1535 xmlPopInput(ctxt); \
1536 } while (0)
1537
1538#define SHRINK if ((ctxt->progressive == 0) && \
1539 (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
1540 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
1541 xmlSHRINK (ctxt);
1542
1543static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
1544 xmlParserInputShrink(ctxt->input);
1545 if ((*ctxt->input->cur == 0) &&
1546 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1547 xmlPopInput(ctxt);
1548 }
1549
1550#define GROW if ((ctxt->progressive == 0) && \
1551 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
1552 xmlGROW (ctxt);
1553
1554static void xmlGROW (xmlParserCtxtPtr ctxt) {
1555 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1556 if ((*ctxt->input->cur == 0) &&
1557 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1558 xmlPopInput(ctxt);
1559}
1560
1561#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
1562
1563#define NEXT xmlNextChar(ctxt)
1564
1565#define NEXT1 { \
1566 ctxt->input->col++; \
1567 ctxt->input->cur++; \
1568 ctxt->nbChars++; \
1569 if (*ctxt->input->cur == 0) \
1570 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
1571 }
1572
1573#define NEXTL(l) do { \
1574 if (*(ctxt->input->cur) == '\n') { \
1575 ctxt->input->line++; ctxt->input->col = 1; \
1576 } else ctxt->input->col++; \
1577 ctxt->input->cur += l; \
1578 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
1579 } while (0)
1580
1581#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
1582#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
1583
1584#define COPY_BUF(l,b,i,v) \
1585 if (l == 1) b[i++] = (xmlChar) v; \
1586 else i += xmlCopyCharMultiByte(&b[i],v)
1587
1588/**
1589 * xmlSkipBlankChars:
1590 * @ctxt: the XML parser context
1591 *
1592 * skip all blanks character found at that point in the input streams.
1593 * It pops up finished entities in the process if allowable at that point.
1594 *
1595 * Returns the number of space chars skipped
1596 */
1597
1598int
1599xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
1600 int res = 0;
1601
1602 /*
1603 * It's Okay to use CUR/NEXT here since all the blanks are on
1604 * the ASCII range.
1605 */
1606 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
1607 const xmlChar *cur;
1608 /*
1609 * if we are in the document content, go really fast
1610 */
1611 cur = ctxt->input->cur;
1612 while (IS_BLANK_CH(*cur)) {
1613 if (*cur == '\n') {
1614 ctxt->input->line++; ctxt->input->col = 1;
1615 }
1616 cur++;
1617 res++;
1618 if (*cur == 0) {
1619 ctxt->input->cur = cur;
1620 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1621 cur = ctxt->input->cur;
1622 }
1623 }
1624 ctxt->input->cur = cur;
1625 } else {
1626 int cur;
1627 do {
1628 cur = CUR;
1629 while (IS_BLANK_CH(cur)) { /* CHECKED tstblanks.xml */
1630 NEXT;
1631 cur = CUR;
1632 res++;
1633 }
1634 while ((cur == 0) && (ctxt->inputNr > 1) &&
1635 (ctxt->instate != XML_PARSER_COMMENT)) {
1636 xmlPopInput(ctxt);
1637 cur = CUR;
1638 }
1639 /*
1640 * Need to handle support of entities branching here
1641 */
1642 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
1643 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
1644 }
1645 return(res);
1646}
1647
1648/************************************************************************
1649 * *
1650 * Commodity functions to handle entities *
1651 * *
1652 ************************************************************************/
1653
1654/**
1655 * xmlPopInput:
1656 * @ctxt: an XML parser context
1657 *
1658 * xmlPopInput: the current input pointed by ctxt->input came to an end
1659 * pop it and return the next char.
1660 *
1661 * Returns the current xmlChar in the parser context
1662 */
1663xmlChar
1664xmlPopInput(xmlParserCtxtPtr ctxt) {
1665 if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
1666 if (xmlParserDebugEntities)
1667 xmlGenericError(xmlGenericErrorContext,
1668 "Popping input %d\n", ctxt->inputNr);
1669 xmlFreeInputStream(inputPop(ctxt));
1670 if ((*ctxt->input->cur == 0) &&
1671 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1672 return(xmlPopInput(ctxt));
1673 return(CUR);
1674}
1675
1676/**
1677 * xmlPushInput:
1678 * @ctxt: an XML parser context
1679 * @input: an XML parser input fragment (entity, XML fragment ...).
1680 *
1681 * xmlPushInput: switch to a new input stream which is stacked on top
1682 * of the previous one(s).
1683 */
1684void
1685xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
1686 if (input == NULL) return;
1687
1688 if (xmlParserDebugEntities) {
1689 if ((ctxt->input != NULL) && (ctxt->input->filename))
1690 xmlGenericError(xmlGenericErrorContext,
1691 "%s(%d): ", ctxt->input->filename,
1692 ctxt->input->line);
1693 xmlGenericError(xmlGenericErrorContext,
1694 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
1695 }
1696 inputPush(ctxt, input);
1697 GROW;
1698}
1699
1700/**
1701 * xmlParseCharRef:
1702 * @ctxt: an XML parser context
1703 *
1704 * parse Reference declarations
1705 *
1706 * [66] CharRef ::= '&#' [0-9]+ ';' |
1707 * '&#x' [0-9a-fA-F]+ ';'
1708 *
1709 * [ WFC: Legal Character ]
1710 * Characters referred to using character references must match the
1711 * production for Char.
1712 *
1713 * Returns the value parsed (as an int), 0 in case of error
1714 */
1715int
1716xmlParseCharRef(xmlParserCtxtPtr ctxt) {
1717 unsigned int val = 0;
1718 int count = 0;
1719 unsigned int outofrange = 0;
1720
1721 /*
1722 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
1723 */
1724 if ((RAW == '&') && (NXT(1) == '#') &&
1725 (NXT(2) == 'x')) {
1726 SKIP(3);
1727 GROW;
1728 while (RAW != ';') { /* loop blocked by count */
1729 if (count++ > 20) {
1730 count = 0;
1731 GROW;
1732 }
1733 if ((RAW >= '0') && (RAW <= '9'))
1734 val = val * 16 + (CUR - '0');
1735 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
1736 val = val * 16 + (CUR - 'a') + 10;
1737 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
1738 val = val * 16 + (CUR - 'A') + 10;
1739 else {
1740 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
1741 val = 0;
1742 break;
1743 }
1744 if (val > 0x10FFFF)
1745 outofrange = val;
1746
1747 NEXT;
1748 count++;
1749 }
1750 if (RAW == ';') {
1751 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
1752 ctxt->input->col++;
1753 ctxt->nbChars ++;
1754 ctxt->input->cur++;
1755 }
1756 } else if ((RAW == '&') && (NXT(1) == '#')) {
1757 SKIP(2);
1758 GROW;
1759 while (RAW != ';') { /* loop blocked by count */
1760 if (count++ > 20) {
1761 count = 0;
1762 GROW;
1763 }
1764 if ((RAW >= '0') && (RAW <= '9'))
1765 val = val * 10 + (CUR - '0');
1766 else {
1767 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
1768 val = 0;
1769 break;
1770 }
1771 if (val > 0x10FFFF)
1772 outofrange = val;
1773
1774 NEXT;
1775 count++;
1776 }
1777 if (RAW == ';') {
1778 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
1779 ctxt->input->col++;
1780 ctxt->nbChars ++;
1781 ctxt->input->cur++;
1782 }
1783 } else {
1784 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
1785 }
1786
1787 /*
1788 * [ WFC: Legal Character ]
1789 * Characters referred to using character references must match the
1790 * production for Char.
1791 */
1792 if ((IS_CHAR(val) && (outofrange == 0))) {
1793 return(val);
1794 } else {
1795 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
1796 "xmlParseCharRef: invalid xmlChar value %d\n",
1797 val);
1798 }
1799 return(0);
1800}
1801
1802/**
1803 * xmlParseStringCharRef:
1804 * @ctxt: an XML parser context
1805 * @str: a pointer to an index in the string
1806 *
1807 * parse Reference declarations, variant parsing from a string rather
1808 * than an an input flow.
1809 *
1810 * [66] CharRef ::= '&#' [0-9]+ ';' |
1811 * '&#x' [0-9a-fA-F]+ ';'
1812 *
1813 * [ WFC: Legal Character ]
1814 * Characters referred to using character references must match the
1815 * production for Char.
1816 *
1817 * Returns the value parsed (as an int), 0 in case of error, str will be
1818 * updated to the current value of the index
1819 */
1820static int
1821xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
1822 const xmlChar *ptr;
1823 xmlChar cur;
1824 unsigned int val = 0;
1825 unsigned int outofrange = 0;
1826
1827 if ((str == NULL) || (*str == NULL)) return(0);
1828 ptr = *str;
1829 cur = *ptr;
1830 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
1831 ptr += 3;
1832 cur = *ptr;
1833 while (cur != ';') { /* Non input consuming loop */
1834 if ((cur >= '0') && (cur <= '9'))
1835 val = val * 16 + (cur - '0');
1836 else if ((cur >= 'a') && (cur <= 'f'))
1837 val = val * 16 + (cur - 'a') + 10;
1838 else if ((cur >= 'A') && (cur <= 'F'))
1839 val = val * 16 + (cur - 'A') + 10;
1840 else {
1841 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
1842 val = 0;
1843 break;
1844 }
1845 if (val > 0x10FFFF)
1846 outofrange = val;
1847
1848 ptr++;
1849 cur = *ptr;
1850 }
1851 if (cur == ';')
1852 ptr++;
1853 } else if ((cur == '&') && (ptr[1] == '#')){
1854 ptr += 2;
1855 cur = *ptr;
1856 while (cur != ';') { /* Non input consuming loops */
1857 if ((cur >= '0') && (cur <= '9'))
1858 val = val * 10 + (cur - '0');
1859 else {
1860 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
1861 val = 0;
1862 break;
1863 }
1864 if (val > 0x10FFFF)
1865 outofrange = val;
1866
1867 ptr++;
1868 cur = *ptr;
1869 }
1870 if (cur == ';')
1871 ptr++;
1872 } else {
1873 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
1874 return(0);
1875 }
1876 *str = ptr;
1877
1878 /*
1879 * [ WFC: Legal Character ]
1880 * Characters referred to using character references must match the
1881 * production for Char.
1882 */
1883 if ((IS_CHAR(val) && (outofrange == 0))) {
1884 return(val);
1885 } else {
1886 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
1887 "xmlParseStringCharRef: invalid xmlChar value %d\n",
1888 val);
1889 }
1890 return(0);
1891}
1892
1893/**
1894 * xmlNewBlanksWrapperInputStream:
1895 * @ctxt: an XML parser context
1896 * @entity: an Entity pointer
1897 *
1898 * Create a new input stream for wrapping
1899 * blanks around a PEReference
1900 *
1901 * Returns the new input stream or NULL
1902 */
1903
1904static void deallocblankswrapper (xmlChar *str) {xmlFree(str);}
1905
1906static xmlParserInputPtr
1907xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
1908 xmlParserInputPtr input;
1909 xmlChar *buffer;
1910 size_t length;
1911 if (entity == NULL) {
1912 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
1913 "xmlNewBlanksWrapperInputStream entity\n");
1914 return(NULL);
1915 }
1916 if (xmlParserDebugEntities)
1917 xmlGenericError(xmlGenericErrorContext,
1918 "new blanks wrapper for entity: %s\n", entity->name);
1919 input = xmlNewInputStream(ctxt);
1920 if (input == NULL) {
1921 return(NULL);
1922 }
1923 length = xmlStrlen(entity->name) + 5;
1924 buffer = xmlMallocAtomic(length);
1925 if (buffer == NULL) {
1926 xmlErrMemory(ctxt, NULL);
1927 return(NULL);
1928 }
1929 buffer [0] = ' ';
1930 buffer [1] = '%';
1931 buffer [length-3] = ';';
1932 buffer [length-2] = ' ';
1933 buffer [length-1] = 0;
1934 memcpy(buffer + 2, entity->name, length - 5);
1935 input->free = deallocblankswrapper;
1936 input->base = buffer;
1937 input->cur = buffer;
1938 input->length = length;
1939 input->end = &buffer[length];
1940 return(input);
1941}
1942
1943/**
1944 * xmlParserHandlePEReference:
1945 * @ctxt: the parser context
1946 *
1947 * [69] PEReference ::= '%' Name ';'
1948 *
1949 * [ WFC: No Recursion ]
1950 * A parsed entity must not contain a recursive
1951 * reference to itself, either directly or indirectly.
1952 *
1953 * [ WFC: Entity Declared ]
1954 * In a document without any DTD, a document with only an internal DTD
1955 * subset which contains no parameter entity references, or a document
1956 * with "standalone='yes'", ... ... The declaration of a parameter
1957 * entity must precede any reference to it...
1958 *
1959 * [ VC: Entity Declared ]
1960 * In a document with an external subset or external parameter entities
1961 * with "standalone='no'", ... ... The declaration of a parameter entity
1962 * must precede any reference to it...
1963 *
1964 * [ WFC: In DTD ]
1965 * Parameter-entity references may only appear in the DTD.
1966 * NOTE: misleading but this is handled.
1967 *
1968 * A PEReference may have been detected in the current input stream
1969 * the handling is done accordingly to
1970 * http://www.w3.org/TR/REC-xml#entproc
1971 * i.e.
1972 * - Included in literal in entity values
1973 * - Included as Parameter Entity reference within DTDs
1974 */
1975void
1976xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
1977 const xmlChar *name;
1978 xmlEntityPtr entity = NULL;
1979 xmlParserInputPtr input;
1980
1981 if (RAW != '%') return;
1982 switch(ctxt->instate) {
1983 case XML_PARSER_CDATA_SECTION:
1984 return;
1985 case XML_PARSER_COMMENT:
1986 return;
1987 case XML_PARSER_START_TAG:
1988 return;
1989 case XML_PARSER_END_TAG:
1990 return;
1991 case XML_PARSER_EOF:
1992 xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
1993 return;
1994 case XML_PARSER_PROLOG:
1995 case XML_PARSER_START:
1996 case XML_PARSER_MISC:
1997 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
1998 return;
1999 case XML_PARSER_ENTITY_DECL:
2000 case XML_PARSER_CONTENT:
2001 case XML_PARSER_ATTRIBUTE_VALUE:
2002 case XML_PARSER_PI:
2003 case XML_PARSER_SYSTEM_LITERAL:
2004 case XML_PARSER_PUBLIC_LITERAL:
2005 /* we just ignore it there */
2006 return;
2007 case XML_PARSER_EPILOG:
2008 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
2009 return;
2010 case XML_PARSER_ENTITY_VALUE:
2011 /*
2012 * NOTE: in the case of entity values, we don't do the
2013 * substitution here since we need the literal
2014 * entity value to be able to save the internal
2015 * subset of the document.
2016 * This will be handled by xmlStringDecodeEntities
2017 */
2018 return;
2019 case XML_PARSER_DTD:
2020 /*
2021 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
2022 * In the internal DTD subset, parameter-entity references
2023 * can occur only where markup declarations can occur, not
2024 * within markup declarations.
2025 * In that case this is handled in xmlParseMarkupDecl
2026 */
2027 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
2028 return;
2029 if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
2030 return;
2031 break;
2032 case XML_PARSER_IGNORE:
2033 return;
2034 }
2035
2036 NEXT;
2037 name = xmlParseName(ctxt);
2038 if (xmlParserDebugEntities)
2039 xmlGenericError(xmlGenericErrorContext,
2040 "PEReference: %s\n", name);
2041 if (name == NULL) {
2042 xmlFatalErr(ctxt, XML_ERR_PEREF_NO_NAME, NULL);
2043 } else {
2044 if (RAW == ';') {
2045 NEXT;
2046 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
2047 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
2048 if (entity == NULL) {
2049
2050 /*
2051 * [ WFC: Entity Declared ]
2052 * In a document without any DTD, a document with only an
2053 * internal DTD subset which contains no parameter entity
2054 * references, or a document with "standalone='yes'", ...
2055 * ... The declaration of a parameter entity must precede
2056 * any reference to it...
2057 */
2058 if ((ctxt->standalone == 1) ||
2059 ((ctxt->hasExternalSubset == 0) &&
2060 (ctxt->hasPErefs == 0))) {
2061 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
2062 "PEReference: %%%s; not found\n", name);
2063 } else {
2064 /*
2065 * [ VC: Entity Declared ]
2066 * In a document with an external subset or external
2067 * parameter entities with "standalone='no'", ...
2068 * ... The declaration of a parameter entity must precede
2069 * any reference to it...
2070 */
2071 if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
2072 xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
2073 "PEReference: %%%s; not found\n",
2074 name);
2075 } else
2076 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
2077 "PEReference: %%%s; not found\n",
2078 name, NULL);
2079 ctxt->valid = 0;
2080 }
2081 } else if (ctxt->input->free != deallocblankswrapper) {
2082 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
2083 xmlPushInput(ctxt, input);
2084 } else {
2085 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
2086 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
2087 xmlChar start[4];
2088 xmlCharEncoding enc;
2089
2090 /*
2091 * handle the extra spaces added before and after
2092 * c.f. http://www.w3.org/TR/REC-xml#as-PE
2093 * this is done independently.
2094 */
2095 input = xmlNewEntityInputStream(ctxt, entity);
2096 xmlPushInput(ctxt, input);
2097
2098 /*
2099 * Get the 4 first bytes and decode the charset
2100 * if enc != XML_CHAR_ENCODING_NONE
2101 * plug some encoding conversion routines.
2102 * Note that, since we may have some non-UTF8
2103 * encoding (like UTF16, bug 135229), the 'length'
2104 * is not known, but we can calculate based upon
2105 * the amount of data in the buffer.
2106 */
2107 GROW
2108 if ((ctxt->input->end - ctxt->input->cur)>=4) {
2109 start[0] = RAW;
2110 start[1] = NXT(1);
2111 start[2] = NXT(2);
2112 start[3] = NXT(3);
2113 enc = xmlDetectCharEncoding(start, 4);
2114 if (enc != XML_CHAR_ENCODING_NONE) {
2115 xmlSwitchEncoding(ctxt, enc);
2116 }
2117 }
2118
2119 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
2120 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l' )) &&
2121 (IS_BLANK_CH(NXT(5)))) {
2122 xmlParseTextDecl(ctxt);
2123 }
2124 } else {
2125 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
2126 "PEReference: %s is not a parameter entity\n",
2127 name);
2128 }
2129 }
2130 } else {
2131 xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
2132 }
2133 }
2134}
2135
2136/*
2137 * Macro used to grow the current buffer.
2138 */
2139#define growBuffer(buffer) { \
2140 xmlChar *tmp; \
2141 buffer##_size *= 2; \
2142 tmp = (xmlChar *) \
2143 xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
2144 if (tmp == NULL) goto mem_error; \
2145 buffer = tmp; \
2146}
2147
2148/**
2149 * xmlStringLenDecodeEntities:
2150 * @ctxt: the parser context
2151 * @str: the input string
2152 * @len: the string length
2153 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2154 * @end: an end marker xmlChar, 0 if none
2155 * @end2: an end marker xmlChar, 0 if none
2156 * @end3: an end marker xmlChar, 0 if none
2157 *
2158 * Takes a entity string content and process to do the adequate substitutions.
2159 *
2160 * [67] Reference ::= EntityRef | CharRef
2161 *
2162 * [69] PEReference ::= '%' Name ';'
2163 *
2164 * Returns A newly allocated string with the substitution done. The caller
2165 * must deallocate it !
2166 */
2167xmlChar *
2168xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2169 int what, xmlChar end, xmlChar end2, xmlChar end3) {
2170 xmlChar *buffer = NULL;
2171 int buffer_size = 0;
2172
2173 xmlChar *current = NULL;
2174 const xmlChar *last;
2175 xmlEntityPtr ent;
2176 int c,l;
2177 int nbchars = 0;
2178
2179 if ((ctxt == NULL) || (str == NULL) || (len < 0))
2180 return(NULL);
2181 last = str + len;
2182
2183 if ((ctxt->depth > 40) || (ctxt->nbentities >= 500000)) {
2184 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2185 return(NULL);
2186 }
2187
2188 /*
2189 * allocate a translation buffer.
2190 */
2191 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
2192 buffer = (xmlChar *) xmlMallocAtomic(buffer_size * sizeof(xmlChar));
2193 if (buffer == NULL) goto mem_error;
2194
2195 /*
2196 * OK loop until we reach one of the ending char or a size limit.
2197 * we are operating on already parsed values.
2198 */
2199 if (str < last)
2200 c = CUR_SCHAR(str, l);
2201 else
2202 c = 0;
2203 while ((c != 0) && (c != end) && /* non input consuming loop */
2204 (c != end2) && (c != end3)) {
2205
2206 if (c == 0) break;
2207 if ((c == '&') && (str[1] == '#')) {
2208 int val = xmlParseStringCharRef(ctxt, &str);
2209 if (val != 0) {
2210 COPY_BUF(0,buffer,nbchars,val);
2211 }
2212 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2213 growBuffer(buffer);
2214 }
2215 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
2216 if (xmlParserDebugEntities)
2217 xmlGenericError(xmlGenericErrorContext,
2218 "String decoding Entity Reference: %.30s\n",
2219 str);
2220 ent = xmlParseStringEntityRef(ctxt, &str);
2221 if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
2222 goto int_error;
2223 ctxt->nbentities++;
2224 if (ent != NULL)
2225 ctxt->nbentities += ent->checked;
2226 if ((ent != NULL) &&
2227 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2228 if (ent->content != NULL) {
2229 COPY_BUF(0,buffer,nbchars,ent->content[0]);
2230 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2231 growBuffer(buffer);
2232 }
2233 } else {
2234 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
2235 "predefined entity has no content\n");
2236 }
2237 } else if ((ent != NULL) && (ent->content != NULL)) {
2238 xmlChar *rep;
2239
2240 ctxt->depth++;
2241 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2242 0, 0, 0);
2243 ctxt->depth--;
2244 if (rep != NULL) {
2245 current = rep;
2246 while (*current != 0) { /* non input consuming loop */
2247 buffer[nbchars++] = *current++;
2248 if (nbchars >
2249 buffer_size - XML_PARSER_BUFFER_SIZE) {
2250 growBuffer(buffer);
2251 }
2252 }
2253 xmlFree(rep);
2254 }
2255 } else if (ent != NULL) {
2256 int i = xmlStrlen(ent->name);
2257 const xmlChar *cur = ent->name;
2258
2259 buffer[nbchars++] = '&';
2260 if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
2261 growBuffer(buffer);
2262 }
2263 for (;i > 0;i--)
2264 buffer[nbchars++] = *cur++;
2265 buffer[nbchars++] = ';';
2266 }
2267 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2268 if (xmlParserDebugEntities)
2269 xmlGenericError(xmlGenericErrorContext,
2270 "String decoding PE Reference: %.30s\n", str);
2271 ent = xmlParseStringPEReference(ctxt, &str);
2272 if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
2273 goto int_error;
2274 ctxt->nbentities++;
2275 if (ent != NULL)
2276 ctxt->nbentities += ent->checked;
2277 if (ent != NULL) {
2278 xmlChar *rep;
2279
2280 ctxt->depth++;
2281 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2282 0, 0, 0);
2283 ctxt->depth--;
2284 if (rep != NULL) {
2285 current = rep;
2286 while (*current != 0) { /* non input consuming loop */
2287 buffer[nbchars++] = *current++;
2288 if (nbchars >
2289 buffer_size - XML_PARSER_BUFFER_SIZE) {
2290 growBuffer(buffer);
2291 }
2292 }
2293 xmlFree(rep);
2294 }
2295 }
2296 } else {
2297 COPY_BUF(l,buffer,nbchars,c);
2298 str += l;
2299 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2300 growBuffer(buffer);
2301 }
2302 }
2303 if (str < last)
2304 c = CUR_SCHAR(str, l);
2305 else
2306 c = 0;
2307 }
2308 buffer[nbchars++] = 0;
2309 return(buffer);
2310
2311mem_error:
2312 xmlErrMemory(ctxt, NULL);
2313int_error:
2314 if (buffer != NULL)
2315 xmlFree(buffer);
2316 return(NULL);
2317}
2318
2319/**
2320 * xmlStringDecodeEntities:
2321 * @ctxt: the parser context
2322 * @str: the input string
2323 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2324 * @end: an end marker xmlChar, 0 if none
2325 * @end2: an end marker xmlChar, 0 if none
2326 * @end3: an end marker xmlChar, 0 if none
2327 *
2328 * Takes a entity string content and process to do the adequate substitutions.
2329 *
2330 * [67] Reference ::= EntityRef | CharRef
2331 *
2332 * [69] PEReference ::= '%' Name ';'
2333 *
2334 * Returns A newly allocated string with the substitution done. The caller
2335 * must deallocate it !
2336 */
2337xmlChar *
2338xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2339 xmlChar end, xmlChar end2, xmlChar end3) {
2340 if ((ctxt == NULL) || (str == NULL)) return(NULL);
2341 return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what,
2342 end, end2, end3));
2343}
2344
2345/************************************************************************
2346 * *
2347 * Commodity functions, cleanup needed ? *
2348 * *
2349 ************************************************************************/
2350
2351/**
2352 * areBlanks:
2353 * @ctxt: an XML parser context
2354 * @str: a xmlChar *
2355 * @len: the size of @str
2356 * @blank_chars: we know the chars are blanks
2357 *
2358 * Is this a sequence of blank chars that one can ignore ?
2359 *
2360 * Returns 1 if ignorable 0 otherwise.
2361 */
2362
2363static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2364 int blank_chars) {
2365 int i, ret;
2366 xmlNodePtr lastChild;
2367
2368 /*
2369 * Don't spend time trying to differentiate them, the same callback is
2370 * used !
2371 */
2372 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
2373 return(0);
2374
2375 /*
2376 * Check for xml:space value.
2377 */
2378 if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2379 (*(ctxt->space) == -2))
2380 return(0);
2381
2382 /*
2383 * Check that the string is made of blanks
2384 */
2385 if (blank_chars == 0) {
2386 for (i = 0;i < len;i++)
2387 if (!(IS_BLANK_CH(str[i]))) return(0);
2388 }
2389
2390 /*
2391 * Look if the element is mixed content in the DTD if available
2392 */
2393 if (ctxt->node == NULL) return(0);
2394 if (ctxt->myDoc != NULL) {
2395 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2396 if (ret == 0) return(1);
2397 if (ret == 1) return(0);
2398 }
2399
2400 /*
2401 * Otherwise, heuristic :-\
2402 */
2403 if ((RAW != '<') && (RAW != 0xD)) return(0);
2404 if ((ctxt->node->children == NULL) &&
2405 (RAW == '<') && (NXT(1) == '/')) return(0);
2406
2407 lastChild = xmlGetLastChild(ctxt->node);
2408 if (lastChild == NULL) {
2409 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2410 (ctxt->node->content != NULL)) return(0);
2411 } else if (xmlNodeIsText(lastChild))
2412 return(0);
2413 else if ((ctxt->node->children != NULL) &&
2414 (xmlNodeIsText(ctxt->node->children)))
2415 return(0);
2416 return(1);
2417}
2418
2419/************************************************************************
2420 * *
2421 * Extra stuff for namespace support *
2422 * Relates to http://www.w3.org/TR/WD-xml-names *
2423 * *
2424 ************************************************************************/
2425
2426/**
2427 * xmlSplitQName:
2428 * @ctxt: an XML parser context
2429 * @name: an XML parser context
2430 * @prefix: a xmlChar **
2431 *
2432 * parse an UTF8 encoded XML qualified name string
2433 *
2434 * [NS 5] QName ::= (Prefix ':')? LocalPart
2435 *
2436 * [NS 6] Prefix ::= NCName
2437 *
2438 * [NS 7] LocalPart ::= NCName
2439 *
2440 * Returns the local part, and prefix is updated
2441 * to get the Prefix if any.
2442 */
2443
2444xmlChar *
2445xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2446 xmlChar buf[XML_MAX_NAMELEN + 5];
2447 xmlChar *buffer = NULL;
2448 int len = 0;
2449 int max = XML_MAX_NAMELEN;
2450 xmlChar *ret = NULL;
2451 const xmlChar *cur = name;
2452 int c;
2453
2454 if (prefix == NULL) return(NULL);
2455 *prefix = NULL;
2456
2457 if (cur == NULL) return(NULL);
2458
2459#ifndef XML_XML_NAMESPACE
2460 /* xml: prefix is not really a namespace */
2461 if ((cur[0] == 'x') && (cur[1] == 'm') &&
2462 (cur[2] == 'l') && (cur[3] == ':'))
2463 return(xmlStrdup(name));
2464#endif
2465
2466 /* nasty but well=formed */
2467 if (cur[0] == ':')
2468 return(xmlStrdup(name));
2469
2470 c = *cur++;
2471 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2472 buf[len++] = c;
2473 c = *cur++;
2474 }
2475 if (len >= max) {
2476 /*
2477 * Okay someone managed to make a huge name, so he's ready to pay
2478 * for the processing speed.
2479 */
2480 max = len * 2;
2481
2482 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
2483 if (buffer == NULL) {
2484 xmlErrMemory(ctxt, NULL);
2485 return(NULL);
2486 }
2487 memcpy(buffer, buf, len);
2488 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
2489 if (len + 10 > max) {
2490 xmlChar *tmp;
2491
2492 max *= 2;
2493 tmp = (xmlChar *) xmlRealloc(buffer,
2494 max * sizeof(xmlChar));
2495 if (tmp == NULL) {
2496 xmlFree(tmp);
2497 xmlErrMemory(ctxt, NULL);
2498 return(NULL);
2499 }
2500 buffer = tmp;
2501 }
2502 buffer[len++] = c;
2503 c = *cur++;
2504 }
2505 buffer[len] = 0;
2506 }
2507
2508 if ((c == ':') && (*cur == 0)) {
2509 if (buffer != NULL)
2510 xmlFree(buffer);
2511 *prefix = NULL;
2512 return(xmlStrdup(name));
2513 }
2514
2515 if (buffer == NULL)
2516 ret = xmlStrndup(buf, len);
2517 else {
2518 ret = buffer;
2519 buffer = NULL;
2520 max = XML_MAX_NAMELEN;
2521 }
2522
2523
2524 if (c == ':') {
2525 c = *cur;
2526 *prefix = ret;
2527 if (c == 0) {
2528 return(xmlStrndup(BAD_CAST "", 0));
2529 }
2530 len = 0;
2531
2532 /*
2533 * Check that the first character is proper to start
2534 * a new name
2535 */
2536 if (!(((c >= 0x61) && (c <= 0x7A)) ||
2537 ((c >= 0x41) && (c <= 0x5A)) ||
2538 (c == '_') || (c == ':'))) {
2539 int l;
2540 int first = CUR_SCHAR(cur, l);
2541
2542 if (!IS_LETTER(first) && (first != '_')) {
2543 xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
2544 "Name %s is not XML Namespace compliant\n",
2545 name);
2546 }
2547 }
2548 cur++;
2549
2550 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
2551 buf[len++] = c;
2552 c = *cur++;
2553 }
2554 if (len >= max) {
2555 /*
2556 * Okay someone managed to make a huge name, so he's ready to pay
2557 * for the processing speed.
2558 */
2559 max = len * 2;
2560
2561 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
2562 if (buffer == NULL) {
2563 xmlErrMemory(ctxt, NULL);
2564 return(NULL);
2565 }
2566 memcpy(buffer, buf, len);
2567 while (c != 0) { /* tested bigname2.xml */
2568 if (len + 10 > max) {
2569 xmlChar *tmp;
2570
2571 max *= 2;
2572 tmp = (xmlChar *) xmlRealloc(buffer,
2573 max * sizeof(xmlChar));
2574 if (tmp == NULL) {
2575 xmlErrMemory(ctxt, NULL);
2576 xmlFree(buffer);
2577 return(NULL);
2578 }
2579 buffer = tmp;
2580 }
2581 buffer[len++] = c;
2582 c = *cur++;
2583 }
2584 buffer[len] = 0;
2585 }
2586
2587 if (buffer == NULL)
2588 ret = xmlStrndup(buf, len);
2589 else {
2590 ret = buffer;
2591 }
2592 }
2593
2594 return(ret);
2595}
2596
2597/************************************************************************
2598 * *
2599 * The parser itself *
2600 * Relates to http://www.w3.org/TR/REC-xml *
2601 * *
2602 ************************************************************************/
2603
2604static const xmlChar * xmlParseNameComplex(xmlParserCtxtPtr ctxt);
2605static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
2606 int *len, int *alloc, int normalize);
2607
2608/**
2609 * xmlParseName:
2610 * @ctxt: an XML parser context
2611 *
2612 * parse an XML name.
2613 *
2614 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
2615 * CombiningChar | Extender
2616 *
2617 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
2618 *
2619 * [6] Names ::= Name (#x20 Name)*
2620 *
2621 * Returns the Name parsed or NULL
2622 */
2623
2624const xmlChar *
2625xmlParseName(xmlParserCtxtPtr ctxt) {
2626 const xmlChar *in;
2627 const xmlChar *ret;
2628 int count = 0;
2629
2630 GROW;
2631
2632 /*
2633 * Accelerator for simple ASCII names
2634 */
2635 in = ctxt->input->cur;
2636 if (((*in >= 0x61) && (*in <= 0x7A)) ||
2637 ((*in >= 0x41) && (*in <= 0x5A)) ||
2638 (*in == '_') || (*in == ':')) {
2639 in++;
2640 while (((*in >= 0x61) && (*in <= 0x7A)) ||
2641 ((*in >= 0x41) && (*in <= 0x5A)) ||
2642 ((*in >= 0x30) && (*in <= 0x39)) ||
2643 (*in == '_') || (*in == '-') ||
2644 (*in == ':') || (*in == '.'))
2645 in++;
2646 if ((*in > 0) && (*in < 0x80)) {
2647 count = in - ctxt->input->cur;
2648 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
2649 ctxt->input->cur = in;
2650 ctxt->nbChars += count;
2651 ctxt->input->col += count;
2652 if (ret == NULL)
2653 xmlErrMemory(ctxt, NULL);
2654 return(ret);
2655 }
2656 }
2657 return(xmlParseNameComplex(ctxt));
2658}
2659
2660/**
2661 * xmlParseNameAndCompare:
2662 * @ctxt: an XML parser context
2663 *
2664 * parse an XML name and compares for match
2665 * (specialized for endtag parsing)
2666 *
2667 * Returns NULL for an illegal name, (xmlChar*) 1 for success
2668 * and the name for mismatch
2669 */
2670
2671static const xmlChar *
2672xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
2673 register const xmlChar *cmp = other;
2674 register const xmlChar *in;
2675 const xmlChar *ret;
2676
2677 GROW;
2678
2679 in = ctxt->input->cur;
2680 while (*in != 0 && *in == *cmp) {
2681 ++in;
2682 ++cmp;
2683 ctxt->input->col++;
2684 }
2685 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
2686 /* success */
2687 ctxt->input->cur = in;
2688 return (const xmlChar*) 1;
2689 }
2690 /* failure (or end of input buffer), check with full function */
2691 ret = xmlParseName (ctxt);
2692 /* strings coming from the dictionnary direct compare possible */
2693 if (ret == other) {
2694 return (const xmlChar*) 1;
2695 }
2696 return ret;
2697}
2698
2699static const xmlChar *
2700xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
2701 int len = 0, l;
2702 int c;
2703 int count = 0;
2704
2705 /*
2706 * Handler for more complex cases
2707 */
2708 GROW;
2709 c = CUR_CHAR(l);
2710 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
2711 (!IS_LETTER(c) && (c != '_') &&
2712 (c != ':'))) {
2713 return(NULL);
2714 }
2715
2716 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
2717 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
2718 (c == '.') || (c == '-') ||
2719 (c == '_') || (c == ':') ||
2720 (IS_COMBINING(c)) ||
2721 (IS_EXTENDER(c)))) {
2722 if (count++ > 100) {
2723 count = 0;
2724 GROW;
2725 }
2726 len += l;
2727 NEXTL(l);
2728 c = CUR_CHAR(l);
2729 }
2730 if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
2731 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
2732 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
2733}
2734
2735/**
2736 * xmlParseStringName:
2737 * @ctxt: an XML parser context
2738 * @str: a pointer to the string pointer (IN/OUT)
2739 *
2740 * parse an XML name.
2741 *
2742 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
2743 * CombiningChar | Extender
2744 *
2745 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
2746 *
2747 * [6] Names ::= Name (#x20 Name)*
2748 *
2749 * Returns the Name parsed or NULL. The @str pointer
2750 * is updated to the current location in the string.
2751 */
2752
2753static xmlChar *
2754xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
2755 xmlChar buf[XML_MAX_NAMELEN + 5];
2756 const xmlChar *cur = *str;
2757 int len = 0, l;
2758 int c;
2759
2760 c = CUR_SCHAR(cur, l);
2761 if (!IS_LETTER(c) && (c != '_') &&
2762 (c != ':')) {
2763 return(NULL);
2764 }
2765
2766 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
2767 (c == '.') || (c == '-') ||
2768 (c == '_') || (c == ':') ||
2769 (IS_COMBINING(c)) ||
2770 (IS_EXTENDER(c))) {
2771 COPY_BUF(l,buf,len,c);
2772 cur += l;
2773 c = CUR_SCHAR(cur, l);
2774 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
2775 /*
2776 * Okay someone managed to make a huge name, so he's ready to pay
2777 * for the processing speed.
2778 */
2779 xmlChar *buffer;
2780 int max = len * 2;
2781
2782 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
2783 if (buffer == NULL) {
2784 xmlErrMemory(ctxt, NULL);
2785 return(NULL);
2786 }
2787 memcpy(buffer, buf, len);
2788 while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
2789 /* test bigentname.xml */
2790 (c == '.') || (c == '-') ||
2791 (c == '_') || (c == ':') ||
2792 (IS_COMBINING(c)) ||
2793 (IS_EXTENDER(c))) {
2794 if (len + 10 > max) {
2795 xmlChar *tmp;
2796 max *= 2;
2797 tmp = (xmlChar *) xmlRealloc(buffer,
2798 max * sizeof(xmlChar));
2799 if (tmp == NULL) {
2800 xmlErrMemory(ctxt, NULL);
2801 xmlFree(buffer);
2802 return(NULL);
2803 }
2804 buffer = tmp;
2805 }
2806 COPY_BUF(l,buffer,len,c);
2807 cur += l;
2808 c = CUR_SCHAR(cur, l);
2809 }
2810 buffer[len] = 0;
2811 *str = cur;
2812 return(buffer);
2813 }
2814 }
2815 *str = cur;
2816 return(xmlStrndup(buf, len));
2817}
2818
2819/**
2820 * xmlParseNmtoken:
2821 * @ctxt: an XML parser context
2822 *
2823 * parse an XML Nmtoken.
2824 *
2825 * [7] Nmtoken ::= (NameChar)+
2826 *
2827 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
2828 *
2829 * Returns the Nmtoken parsed or NULL
2830 */
2831
2832xmlChar *
2833xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
2834 xmlChar buf[XML_MAX_NAMELEN + 5];
2835 int len = 0, l;
2836 int c;
2837 int count = 0;
2838
2839 GROW;
2840 c = CUR_CHAR(l);
2841
2842 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
2843 (c == '.') || (c == '-') ||
2844 (c == '_') || (c == ':') ||
2845 (IS_COMBINING(c)) ||
2846 (IS_EXTENDER(c))) {
2847 if (count++ > 100) {
2848 count = 0;
2849 GROW;
2850 }
2851 COPY_BUF(l,buf,len,c);
2852 NEXTL(l);
2853 c = CUR_CHAR(l);
2854 if (len >= XML_MAX_NAMELEN) {
2855 /*
2856 * Okay someone managed to make a huge token, so he's ready to pay
2857 * for the processing speed.
2858 */
2859 xmlChar *buffer;
2860 int max = len * 2;
2861
2862 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
2863 if (buffer == NULL) {
2864 xmlErrMemory(ctxt, NULL);
2865 return(NULL);
2866 }
2867 memcpy(buffer, buf, len);
2868 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
2869 (c == '.') || (c == '-') ||
2870 (c == '_') || (c == ':') ||
2871 (IS_COMBINING(c)) ||
2872 (IS_EXTENDER(c))) {
2873 if (count++ > 100) {
2874 count = 0;
2875 GROW;
2876 }
2877 if (len + 10 > max) {
2878 xmlChar *tmp;
2879
2880 max *= 2;
2881 tmp = (xmlChar *) xmlRealloc(buffer,
2882 max * sizeof(xmlChar));
2883 if (tmp == NULL) {
2884 xmlErrMemory(ctxt, NULL);
2885 xmlFree(buffer);
2886 return(NULL);
2887 }
2888 buffer = tmp;
2889 }
2890 COPY_BUF(l,buffer,len,c);
2891 NEXTL(l);
2892 c = CUR_CHAR(l);
2893 }
2894 buffer[len] = 0;
2895 return(buffer);
2896 }
2897 }
2898 if (len == 0)
2899 return(NULL);
2900 return(xmlStrndup(buf, len));
2901}
2902
2903/**
2904 * xmlParseEntityValue:
2905 * @ctxt: an XML parser context
2906 * @orig: if non-NULL store a copy of the original entity value
2907 *
2908 * parse a value for ENTITY declarations
2909 *
2910 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
2911 * "'" ([^%&'] | PEReference | Reference)* "'"
2912 *
2913 * Returns the EntityValue parsed with reference substituted or NULL
2914 */
2915
2916xmlChar *
2917xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
2918 xmlChar *buf = NULL;
2919 int len = 0;
2920 int size = XML_PARSER_BUFFER_SIZE;
2921 int c, l;
2922 xmlChar stop;
2923 xmlChar *ret = NULL;
2924 const xmlChar *cur = NULL;
2925 xmlParserInputPtr input;
2926
2927 if (RAW == '"') stop = '"';
2928 else if (RAW == '\'') stop = '\'';
2929 else {
2930 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
2931 return(NULL);
2932 }
2933 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
2934 if (buf == NULL) {
2935 xmlErrMemory(ctxt, NULL);
2936 return(NULL);
2937 }
2938
2939 /*
2940 * The content of the entity definition is copied in a buffer.
2941 */
2942
2943 ctxt->instate = XML_PARSER_ENTITY_VALUE;
2944 input = ctxt->input;
2945 GROW;
2946 NEXT;
2947 c = CUR_CHAR(l);
2948 /*
2949 * NOTE: 4.4.5 Included in Literal
2950 * When a parameter entity reference appears in a literal entity
2951 * value, ... a single or double quote character in the replacement
2952 * text is always treated as a normal data character and will not
2953 * terminate the literal.
2954 * In practice it means we stop the loop only when back at parsing
2955 * the initial entity and the quote is found
2956 */
2957 while ((IS_CHAR(c)) && ((c != stop) || /* checked */
2958 (ctxt->input != input))) {
2959 if (len + 5 >= size) {
2960 xmlChar *tmp;
2961
2962 size *= 2;
2963 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2964 if (tmp == NULL) {
2965 xmlErrMemory(ctxt, NULL);
2966 xmlFree(buf);
2967 return(NULL);
2968 }
2969 buf = tmp;
2970 }
2971 COPY_BUF(l,buf,len,c);
2972 NEXTL(l);
2973 /*
2974 * Pop-up of finished entities.
2975 */
2976 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
2977 xmlPopInput(ctxt);
2978
2979 GROW;
2980 c = CUR_CHAR(l);
2981 if (c == 0) {
2982 GROW;
2983 c = CUR_CHAR(l);
2984 }
2985 }
2986 buf[len] = 0;
2987
2988 /*
2989 * Raise problem w.r.t. '&' and '%' being used in non-entities
2990 * reference constructs. Note Charref will be handled in
2991 * xmlStringDecodeEntities()
2992 */
2993 cur = buf;
2994 while (*cur != 0) { /* non input consuming */
2995 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
2996 xmlChar *name;
2997 xmlChar tmp = *cur;
2998
2999 cur++;
3000 name = xmlParseStringName(ctxt, &cur);
3001 if ((name == NULL) || (*cur != ';')) {
3002 xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
3003 "EntityValue: '%c' forbidden except for entities references\n",
3004 tmp);
3005 }
3006 if ((tmp == '%') && (ctxt->inSubset == 1) &&
3007 (ctxt->inputNr == 1)) {
3008 xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
3009 }
3010 if (name != NULL)
3011 xmlFree(name);
3012 if (*cur == 0)
3013 break;
3014 }
3015 cur++;
3016 }
3017
3018 /*
3019 * Then PEReference entities are substituted.
3020 */
3021 if (c != stop) {
3022 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
3023 xmlFree(buf);
3024 } else {
3025 NEXT;
3026 /*
3027 * NOTE: 4.4.7 Bypassed
3028 * When a general entity reference appears in the EntityValue in
3029 * an entity declaration, it is bypassed and left as is.
3030 * so XML_SUBSTITUTE_REF is not set here.
3031 */
3032 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
3033 0, 0, 0);
3034 if (orig != NULL)
3035 *orig = buf;
3036 else
3037 xmlFree(buf);
3038 }
3039
3040 return(ret);
3041}
3042
3043/**
3044 * xmlParseAttValueComplex:
3045 * @ctxt: an XML parser context
3046 * @len: the resulting attribute len
3047 * @normalize: wether to apply the inner normalization
3048 *
3049 * parse a value for an attribute, this is the fallback function
3050 * of xmlParseAttValue() when the attribute parsing requires handling
3051 * of non-ASCII characters, or normalization compaction.
3052 *
3053 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3054 */
3055static xmlChar *
3056xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
3057 xmlChar limit = 0;
3058 xmlChar *buf = NULL;
3059 int len = 0;
3060 int buf_size = 0;
3061 int c, l, in_space = 0;
3062 xmlChar *current = NULL;
3063 xmlEntityPtr ent;
3064
3065 if (NXT(0) == '"') {
3066 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3067 limit = '"';
3068 NEXT;
3069 } else if (NXT(0) == '\'') {
3070 limit = '\'';
3071 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3072 NEXT;
3073 } else {
3074 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
3075 return(NULL);
3076 }
3077
3078 /*
3079 * allocate a translation buffer.
3080 */
3081 buf_size = XML_PARSER_BUFFER_SIZE;
3082 buf = (xmlChar *) xmlMallocAtomic(buf_size * sizeof(xmlChar));
3083 if (buf == NULL) goto mem_error;
3084
3085 /*
3086 * OK loop until we reach one of the ending char or a size limit.
3087 */
3088 c = CUR_CHAR(l);
3089 while ((NXT(0) != limit) && /* checked */
3090 (IS_CHAR(c)) && (c != '<')) {
3091 if (c == 0) break;
3092 if (c == '&') {
3093 in_space = 0;
3094 if (NXT(1) == '#') {
3095 int val = xmlParseCharRef(ctxt);
3096
3097 if (val == '&') {
3098 if (ctxt->replaceEntities) {
3099 if (len > buf_size - 10) {
3100 growBuffer(buf);
3101 }
3102 buf[len++] = '&';
3103 } else {
3104 /*
3105 * The reparsing will be done in xmlStringGetNodeList()
3106 * called by the attribute() function in SAX.c
3107 */
3108 if (len > buf_size - 10) {
3109 growBuffer(buf);
3110 }
3111 buf[len++] = '&';
3112 buf[len++] = '#';
3113 buf[len++] = '3';
3114 buf[len++] = '8';
3115 buf[len++] = ';';
3116 }
3117 } else {
3118 if (len > buf_size - 10) {
3119 growBuffer(buf);
3120 }
3121 len += xmlCopyChar(0, &buf[len], val);
3122 }
3123 } else {
3124 ent = xmlParseEntityRef(ctxt);
3125 ctxt->nbentities++;
3126 if (ent != NULL)
3127 ctxt->nbentities += ent->checked;
3128 if ((ent != NULL) &&
3129 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
3130 if (len > buf_size - 10) {
3131 growBuffer(buf);
3132 }
3133 if ((ctxt->replaceEntities == 0) &&
3134 (ent->content[0] == '&')) {
3135 buf[len++] = '&';
3136 buf[len++] = '#';
3137 buf[len++] = '3';
3138 buf[len++] = '8';
3139 buf[len++] = ';';
3140 } else {
3141 buf[len++] = ent->content[0];
3142 }
3143 } else if ((ent != NULL) &&
3144 (ctxt->replaceEntities != 0)) {
3145 xmlChar *rep;
3146
3147 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
3148 rep = xmlStringDecodeEntities(ctxt, ent->content,
3149 XML_SUBSTITUTE_REF,
3150 0, 0, 0);
3151 if (rep != NULL) {
3152 current = rep;
3153 while (*current != 0) { /* non input consuming */
3154 buf[len++] = *current++;
3155 if (len > buf_size - 10) {
3156 growBuffer(buf);
3157 }
3158 }
3159 xmlFree(rep);
3160 }
3161 } else {
3162 if (len > buf_size - 10) {
3163 growBuffer(buf);
3164 }
3165 if (ent->content != NULL)
3166 buf[len++] = ent->content[0];
3167 }
3168 } else if (ent != NULL) {
3169 int i = xmlStrlen(ent->name);
3170 const xmlChar *cur = ent->name;
3171
3172 /*
3173 * This may look absurd but is needed to detect
3174 * entities problems
3175 */
3176 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
3177 (ent->content != NULL)) {
3178 xmlChar *rep;
3179 rep = xmlStringDecodeEntities(ctxt, ent->content,
3180 XML_SUBSTITUTE_REF, 0, 0, 0);
3181 if (rep != NULL)
3182 xmlFree(rep);
3183 }
3184
3185 /*
3186 * Just output the reference
3187 */
3188 buf[len++] = '&';
3189 if (len > buf_size - i - 10) {
3190 growBuffer(buf);
3191 }
3192 for (;i > 0;i--)
3193 buf[len++] = *cur++;
3194 buf[len++] = ';';
3195 }
3196 }
3197 } else {
3198 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
3199 if ((len != 0) || (!normalize)) {
3200 if ((!normalize) || (!in_space)) {
3201 COPY_BUF(l,buf,len,0x20);
3202 if (len > buf_size - 10) {
3203 growBuffer(buf);
3204 }
3205 }
3206 in_space = 1;
3207 }
3208 } else {
3209 in_space = 0;
3210 COPY_BUF(l,buf,len,c);
3211 if (len > buf_size - 10) {
3212 growBuffer(buf);
3213 }
3214 }
3215 NEXTL(l);
3216 }
3217 GROW;
3218 c = CUR_CHAR(l);
3219 }
3220 if ((in_space) && (normalize)) {
3221 while (buf[len - 1] == 0x20) len--;
3222 }
3223 buf[len] = 0;
3224 if (RAW == '<') {
3225 xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
3226 } else if (RAW != limit) {
3227 if ((c != 0) && (!IS_CHAR(c))) {
3228 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
3229 "invalid character in attribute value\n");
3230 } else {
3231 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
3232 "AttValue: ' expected\n");
3233 }
3234 } else
3235 NEXT;
3236 if (attlen != NULL) *attlen = len;
3237 return(buf);
3238
3239mem_error:
3240 xmlErrMemory(ctxt, NULL);
3241 return(NULL);
3242}
3243
3244/**
3245 * xmlParseAttValue:
3246 * @ctxt: an XML parser context
3247 *
3248 * parse a value for an attribute
3249 * Note: the parser won't do substitution of entities here, this
3250 * will be handled later in xmlStringGetNodeList
3251 *
3252 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
3253 * "'" ([^<&'] | Reference)* "'"
3254 *
3255 * 3.3.3 Attribute-Value Normalization:
3256 * Before the value of an attribute is passed to the application or
3257 * checked for validity, the XML processor must normalize it as follows:
3258 * - a character reference is processed by appending the referenced
3259 * character to the attribute value
3260 * - an entity reference is processed by recursively processing the
3261 * replacement text of the entity
3262 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
3263 * appending #x20 to the normalized value, except that only a single
3264 * #x20 is appended for a "#xD#xA" sequence that is part of an external
3265 * parsed entity or the literal entity value of an internal parsed entity
3266 * - other characters are processed by appending them to the normalized value
3267 * If the declared value is not CDATA, then the XML processor must further
3268 * process the normalized attribute value by discarding any leading and
3269 * trailing space (#x20) characters, and by replacing sequences of space
3270 * (#x20) characters by a single space (#x20) character.
3271 * All attributes for which no declaration has been read should be treated
3272 * by a non-validating parser as if declared CDATA.
3273 *
3274 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3275 */
3276
3277
3278xmlChar *
3279xmlParseAttValue(xmlParserCtxtPtr ctxt) {
3280 if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
3281 return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
3282}
3283
3284/**
3285 * xmlParseSystemLiteral:
3286 * @ctxt: an XML parser context
3287 *
3288 * parse an XML Literal
3289 *
3290 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
3291 *
3292 * Returns the SystemLiteral parsed or NULL
3293 */
3294
3295xmlChar *
3296xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
3297 xmlChar *buf = NULL;
3298 int len = 0;
3299 int size = XML_PARSER_BUFFER_SIZE;
3300 int cur, l;
3301 xmlChar stop;
3302 int state = ctxt->instate;
3303 int count = 0;
3304
3305 SHRINK;
3306 if (RAW == '"') {
3307 NEXT;
3308 stop = '"';
3309 } else if (RAW == '\'') {
3310 NEXT;
3311 stop = '\'';
3312 } else {
3313 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
3314 return(NULL);
3315 }
3316
3317 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
3318 if (buf == NULL) {
3319 xmlErrMemory(ctxt, NULL);
3320 return(NULL);
3321 }
3322 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
3323 cur = CUR_CHAR(l);
3324 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
3325 if (len + 5 >= size) {
3326 xmlChar *tmp;
3327
3328 size *= 2;
3329 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3330 if (tmp == NULL) {
3331 xmlFree(buf);
3332 xmlErrMemory(ctxt, NULL);
3333 ctxt->instate = (xmlParserInputState) state;
3334 return(NULL);
3335 }
3336 buf = tmp;
3337 }
3338 count++;
3339 if (count > 50) {
3340 GROW;
3341 count = 0;
3342 }
3343 COPY_BUF(l,buf,len,cur);
3344 NEXTL(l);
3345 cur = CUR_CHAR(l);
3346 if (cur == 0) {
3347 GROW;
3348 SHRINK;
3349 cur = CUR_CHAR(l);
3350 }
3351 }
3352 buf[len] = 0;
3353 ctxt->instate = (xmlParserInputState) state;
3354 if (!IS_CHAR(cur)) {
3355 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
3356 } else {
3357 NEXT;
3358 }
3359 return(buf);
3360}
3361
3362/**
3363 * xmlParsePubidLiteral:
3364 * @ctxt: an XML parser context
3365 *
3366 * parse an XML public literal
3367 *
3368 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
3369 *
3370 * Returns the PubidLiteral parsed or NULL.
3371 */
3372
3373xmlChar *
3374xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
3375 xmlChar *buf = NULL;
3376 int len = 0;
3377 int size = XML_PARSER_BUFFER_SIZE;
3378 xmlChar cur;
3379 xmlChar stop;
3380 int count = 0;
3381 xmlParserInputState oldstate = ctxt->instate;
3382
3383 SHRINK;
3384 if (RAW == '"') {
3385 NEXT;
3386 stop = '"';
3387 } else if (RAW == '\'') {
3388 NEXT;
3389 stop = '\'';
3390 } else {
3391 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
3392 return(NULL);
3393 }
3394 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
3395 if (buf == NULL) {
3396 xmlErrMemory(ctxt, NULL);
3397 return(NULL);
3398 }
3399 ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
3400 cur = CUR;
3401 while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
3402 if (len + 1 >= size) {
3403 xmlChar *tmp;
3404
3405 size *= 2;
3406 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3407 if (tmp == NULL) {
3408 xmlErrMemory(ctxt, NULL);
3409 xmlFree(buf);
3410 return(NULL);
3411 }
3412 buf = tmp;
3413 }
3414 buf[len++] = cur;
3415 count++;
3416 if (count > 50) {
3417 GROW;
3418 count = 0;
3419 }
3420 NEXT;
3421 cur = CUR;
3422 if (cur == 0) {
3423 GROW;
3424 SHRINK;
3425 cur = CUR;
3426 }
3427 }
3428 buf[len] = 0;
3429 if (cur != stop) {
3430 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
3431 } else {
3432 NEXT;
3433 }
3434 ctxt->instate = oldstate;
3435 return(buf);
3436}
3437
3438void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
3439
3440/*
3441 * used for the test in the inner loop of the char data testing
3442 */
3443static const unsigned char test_char_data[256] = {
3444 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3445 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
3446 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3447 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3448 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
3449 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
3450 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
3451 0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
3452 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
3453 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
3454 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
3455 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
3456 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
3457 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
3458 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
3459 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
3460 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
3461 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3462 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3463 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3464 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3465 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3466 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3467 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3468 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3469 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3470 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3471 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3472 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3473 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3474 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3475 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
3476};
3477
3478/**
3479 * xmlParseCharData:
3480 * @ctxt: an XML parser context
3481 * @cdata: int indicating whether we are within a CDATA section
3482 *
3483 * parse a CharData section.
3484 * if we are within a CDATA section ']]>' marks an end of section.
3485 *
3486 * The right angle bracket (>) may be represented using the string "&gt;",
3487 * and must, for compatibility, be escaped using "&gt;" or a character
3488 * reference when it appears in the string "]]>" in content, when that
3489 * string is not marking the end of a CDATA section.
3490 *
3491 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
3492 */
3493
3494void
3495xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
3496 const xmlChar *in;
3497 int nbchar = 0;
3498 int line = ctxt->input->line;
3499 int col = ctxt->input->col;
3500 int ccol;
3501
3502 SHRINK;
3503 GROW;
3504 /*
3505 * Accelerated common case where input don't need to be
3506 * modified before passing it to the handler.
3507 */
3508 if (!cdata) {
3509 in = ctxt->input->cur;
3510 do {
3511get_more_space:
3512 while (*in == 0x20) in++;
3513 if (*in == 0xA) {
3514 do {
3515 ctxt->input->line++; ctxt->input->col = 1;
3516 in++;
3517 } while (*in == 0xA);
3518 goto get_more_space;
3519 }
3520 if (*in == '<') {
3521 nbchar = in - ctxt->input->cur;
3522 if (nbchar > 0) {
3523 const xmlChar *tmp = ctxt->input->cur;
3524 ctxt->input->cur = in;
3525
3526 if ((ctxt->sax != NULL) &&
3527 (ctxt->sax->ignorableWhitespace !=
3528 ctxt->sax->characters)) {
3529 if (areBlanks(ctxt, tmp, nbchar, 1)) {
3530 if (ctxt->sax->ignorableWhitespace != NULL)
3531 ctxt->sax->ignorableWhitespace(ctxt->userData,
3532 tmp, nbchar);
3533 } else {
3534 if (ctxt->sax->characters != NULL)
3535 ctxt->sax->characters(ctxt->userData,
3536 tmp, nbchar);
3537 if (*ctxt->space == -1)
3538 *ctxt->space = -2;
3539 }
3540 } else if ((ctxt->sax != NULL) &&
3541 (ctxt->sax->characters != NULL)) {
3542 ctxt->sax->characters(ctxt->userData,
3543 tmp, nbchar);
3544 }
3545 }
3546 return;
3547 }
3548
3549get_more:
3550 ccol = ctxt->input->col;
3551 while (test_char_data[*in]) {
3552 in++;
3553 ccol++;
3554 }
3555 ctxt->input->col = ccol;
3556 if (*in == 0xA) {
3557 do {
3558 ctxt->input->line++; ctxt->input->col = 1;
3559 in++;
3560 } while (*in == 0xA);
3561 goto get_more;
3562 }
3563 if (*in == ']') {
3564 if ((in[1] == ']') && (in[2] == '>')) {
3565 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
3566 ctxt->input->cur = in;
3567 return;
3568 }
3569 in++;
3570 ctxt->input->col++;
3571 goto get_more;
3572 }
3573 nbchar = in - ctxt->input->cur;
3574 if (nbchar > 0) {
3575 if ((ctxt->sax != NULL) &&
3576 (ctxt->sax->ignorableWhitespace !=
3577 ctxt->sax->characters) &&
3578 (IS_BLANK_CH(*ctxt->input->cur))) {
3579 const xmlChar *tmp = ctxt->input->cur;
3580 ctxt->input->cur = in;
3581
3582 if (areBlanks(ctxt, tmp, nbchar, 0)) {
3583 if (ctxt->sax->ignorableWhitespace != NULL)
3584 ctxt->sax->ignorableWhitespace(ctxt->userData,
3585 tmp, nbchar);
3586 } else {
3587 if (ctxt->sax->characters != NULL)
3588 ctxt->sax->characters(ctxt->userData,
3589 tmp, nbchar);
3590 if (*ctxt->space == -1)
3591 *ctxt->space = -2;
3592 }
3593 line = ctxt->input->line;
3594 col = ctxt->input->col;
3595 } else if (ctxt->sax != NULL) {
3596 if (ctxt->sax->characters != NULL)
3597 ctxt->sax->characters(ctxt->userData,
3598 ctxt->input->cur, nbchar);
3599 line = ctxt->input->line;
3600 col = ctxt->input->col;
3601 }
3602 /* something really bad happened in the SAX callback */
3603 if (ctxt->instate != XML_PARSER_CONTENT)
3604 return;
3605 }
3606 ctxt->input->cur = in;
3607 if (*in == 0xD) {
3608 in++;
3609 if (*in == 0xA) {
3610 ctxt->input->cur = in;
3611 in++;
3612 ctxt->input->line++; ctxt->input->col = 1;
3613 continue; /* while */
3614 }
3615 in--;
3616 }
3617 if (*in == '<') {
3618 return;
3619 }
3620 if (*in == '&') {
3621 return;
3622 }
3623 SHRINK;
3624 GROW;
3625 in = ctxt->input->cur;
3626 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
3627 nbchar = 0;
3628 }
3629 ctxt->input->line = line;
3630 ctxt->input->col = col;
3631 xmlParseCharDataComplex(ctxt, cdata);
3632}
3633
3634/**
3635 * xmlParseCharDataComplex:
3636 * @ctxt: an XML parser context
3637 * @cdata: int indicating whether we are within a CDATA section
3638 *
3639 * parse a CharData section.this is the fallback function
3640 * of xmlParseCharData() when the parsing requires handling
3641 * of non-ASCII characters.
3642 */
3643void
3644xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
3645 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
3646 int nbchar = 0;
3647 int cur, l;
3648 int count = 0;
3649
3650 SHRINK;
3651 GROW;
3652 cur = CUR_CHAR(l);
3653 while ((cur != '<') && /* checked */
3654 (cur != '&') &&
3655 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
3656 if ((cur == ']') && (NXT(1) == ']') &&
3657 (NXT(2) == '>')) {
3658 if (cdata) break;
3659 else {
3660 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
3661 }
3662 }
3663 COPY_BUF(l,buf,nbchar,cur);
3664 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
3665 buf[nbchar] = 0;
3666
3667 /*
3668 * OK the segment is to be consumed as chars.
3669 */
3670 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
3671 if (areBlanks(ctxt, buf, nbchar, 0)) {
3672 if (ctxt->sax->ignorableWhitespace != NULL)
3673 ctxt->sax->ignorableWhitespace(ctxt->userData,
3674 buf, nbchar);
3675 } else {
3676 if (ctxt->sax->characters != NULL)
3677 ctxt->sax->characters(ctxt->userData, buf, nbchar);
3678 if ((ctxt->sax->characters !=
3679 ctxt->sax->ignorableWhitespace) &&
3680 (*ctxt->space == -1))
3681 *ctxt->space = -2;
3682 }
3683 }
3684 nbchar = 0;
3685 /* something really bad happened in the SAX callback */
3686 if (ctxt->instate != XML_PARSER_CONTENT)
3687 return;
3688 }
3689 count++;
3690 if (count > 50) {
3691 GROW;
3692 count = 0;
3693 }
3694 NEXTL(l);
3695 cur = CUR_CHAR(l);
3696 }
3697 if (nbchar != 0) {
3698 buf[nbchar] = 0;
3699 /*
3700 * OK the segment is to be consumed as chars.
3701 */
3702 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
3703 if (areBlanks(ctxt, buf, nbchar, 0)) {
3704 if (ctxt->sax->ignorableWhitespace != NULL)
3705 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
3706 } else {
3707 if (ctxt->sax->characters != NULL)
3708 ctxt->sax->characters(ctxt->userData, buf, nbchar);
3709 if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
3710 (*ctxt->space == -1))
3711 *ctxt->space = -2;
3712 }
3713 }
3714 }
3715 if ((cur != 0) && (!IS_CHAR(cur))) {
3716 /* Generate the error and skip the offending character */
3717 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
3718 "PCDATA invalid Char value %d\n",
3719 cur);
3720 NEXTL(l);
3721 }
3722}
3723
3724/**
3725 * xmlParseExternalID:
3726 * @ctxt: an XML parser context
3727 * @publicID: a xmlChar** receiving PubidLiteral
3728 * @strict: indicate whether we should restrict parsing to only
3729 * production [75], see NOTE below
3730 *
3731 * Parse an External ID or a Public ID
3732 *
3733 * NOTE: Productions [75] and [83] interact badly since [75] can generate
3734 * 'PUBLIC' S PubidLiteral S SystemLiteral
3735 *
3736 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
3737 * | 'PUBLIC' S PubidLiteral S SystemLiteral
3738 *
3739 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
3740 *
3741 * Returns the function returns SystemLiteral and in the second
3742 * case publicID receives PubidLiteral, is strict is off
3743 * it is possible to return NULL and have publicID set.
3744 */
3745
3746xmlChar *
3747xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
3748 xmlChar *URI = NULL;
3749
3750 SHRINK;
3751
3752 *publicID = NULL;
3753 if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
3754 SKIP(6);
3755 if (!IS_BLANK_CH(CUR)) {
3756 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3757 "Space required after 'SYSTEM'\n");
3758 }
3759 SKIP_BLANKS;
3760 URI = xmlParseSystemLiteral(ctxt);
3761 if (URI == NULL) {
3762 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
3763 }
3764 } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
3765 SKIP(6);
3766 if (!IS_BLANK_CH(CUR)) {
3767 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3768 "Space required after 'PUBLIC'\n");
3769 }
3770 SKIP_BLANKS;
3771 *publicID = xmlParsePubidLiteral(ctxt);
3772 if (*publicID == NULL) {
3773 xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
3774 }
3775 if (strict) {
3776 /*
3777 * We don't handle [83] so "S SystemLiteral" is required.
3778 */
3779 if (!IS_BLANK_CH(CUR)) {
3780 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3781 "Space required after the Public Identifier\n");
3782 }
3783 } else {
3784 /*
3785 * We handle [83] so we return immediately, if
3786 * "S SystemLiteral" is not detected. From a purely parsing
3787 * point of view that's a nice mess.
3788 */
3789 const xmlChar *ptr;
3790 GROW;
3791
3792 ptr = CUR_PTR;
3793 if (!IS_BLANK_CH(*ptr)) return(NULL);
3794
3795 while (IS_BLANK_CH(*ptr)) ptr++; /* TODO: dangerous, fix ! */
3796 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
3797 }
3798 SKIP_BLANKS;
3799 URI = xmlParseSystemLiteral(ctxt);
3800 if (URI == NULL) {
3801 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
3802 }
3803 }
3804 return(URI);
3805}
3806
3807/**
3808 * xmlParseCommentComplex:
3809 * @ctxt: an XML parser context
3810 * @buf: the already parsed part of the buffer
3811 * @len: number of bytes filles in the buffer
3812 * @size: allocated size of the buffer
3813 *
3814 * Skip an XML (SGML) comment <!-- .... -->
3815 * The spec says that "For compatibility, the string "--" (double-hyphen)
3816 * must not occur within comments. "
3817 * This is the slow routine in case the accelerator for ascii didn't work
3818 *
3819 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
3820 */
3821static void
3822xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf, int len, int size) {
3823 int q, ql;
3824 int r, rl;
3825 int cur, l;
3826 xmlParserInputPtr input = ctxt->input;
3827 int count = 0;
3828
3829 if (buf == NULL) {
3830 len = 0;
3831 size = XML_PARSER_BUFFER_SIZE;
3832 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
3833 if (buf == NULL) {
3834 xmlErrMemory(ctxt, NULL);
3835 return;
3836 }
3837 }
3838 GROW; /* Assure there's enough input data */
3839 q = CUR_CHAR(ql);
3840 if (q == 0)
3841 goto not_terminated;
3842 if (!IS_CHAR(q)) {
3843 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
3844 "xmlParseComment: invalid xmlChar value %d\n",
3845 q);
3846 xmlFree (buf);
3847 return;
3848 }
3849 NEXTL(ql);
3850 r = CUR_CHAR(rl);
3851 if (r == 0)
3852 goto not_terminated;
3853 if (!IS_CHAR(r)) {
3854 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
3855 "xmlParseComment: invalid xmlChar value %d\n",
3856 q);
3857 xmlFree (buf);
3858 return;
3859 }
3860 NEXTL(rl);
3861 cur = CUR_CHAR(l);
3862 if (cur == 0)
3863 goto not_terminated;
3864 while (IS_CHAR(cur) && /* checked */
3865 ((cur != '>') ||
3866 (r != '-') || (q != '-'))) {
3867 if ((r == '-') && (q == '-')) {
3868 xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
3869 }
3870 if (len + 5 >= size) {
3871 xmlChar *new_buf;
3872 size *= 2;
3873 new_buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3874 if (new_buf == NULL) {
3875 xmlFree (buf);
3876 xmlErrMemory(ctxt, NULL);
3877 return;
3878 }
3879 buf = new_buf;
3880 }
3881 COPY_BUF(ql,buf,len,q);
3882 q = r;
3883 ql = rl;
3884 r = cur;
3885 rl = l;
3886
3887 count++;
3888 if (count > 50) {
3889 GROW;
3890 count = 0;
3891 }
3892 NEXTL(l);
3893 cur = CUR_CHAR(l);
3894 if (cur == 0) {
3895 SHRINK;
3896 GROW;
3897 cur = CUR_CHAR(l);
3898 }
3899 }
3900 buf[len] = 0;
3901 if (cur == 0) {
3902 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
3903 "Comment not terminated \n<!--%.50s\n", buf);
3904 } else if (!IS_CHAR(cur)) {
3905 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
3906 "xmlParseComment: invalid xmlChar value %d\n",
3907 cur);
3908 } else {
3909 if (input != ctxt->input) {
3910 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
3911 "Comment doesn't start and stop in the same entity\n");
3912 }
3913 NEXT;
3914 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
3915 (!ctxt->disableSAX))
3916 ctxt->sax->comment(ctxt->userData, buf);
3917 }
3918 xmlFree(buf);
3919 return;
3920not_terminated:
3921 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
3922 "Comment not terminated\n", NULL);
3923 xmlFree(buf);
3924 return;
3925}
3926
3927/**
3928 * xmlParseComment:
3929 * @ctxt: an XML parser context
3930 *
3931 * Skip an XML (SGML) comment <!-- .... -->
3932 * The spec says that "For compatibility, the string "--" (double-hyphen)
3933 * must not occur within comments. "
3934 *
3935 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
3936 */
3937void
3938xmlParseComment(xmlParserCtxtPtr ctxt) {
3939 xmlChar *buf = NULL;
3940 int size = XML_PARSER_BUFFER_SIZE;
3941 int len = 0;
3942 xmlParserInputState state;
3943 const xmlChar *in;
3944 int nbchar = 0, ccol;
3945
3946 /*
3947 * Check that there is a comment right here.
3948 */
3949 if ((RAW != '<') || (NXT(1) != '!') ||
3950 (NXT(2) != '-') || (NXT(3) != '-')) return;
3951
3952 state = ctxt->instate;
3953 ctxt->instate = XML_PARSER_COMMENT;
3954 SKIP(4);
3955 SHRINK;
3956 GROW;
3957
3958 /*
3959 * Accelerated common case where input don't need to be
3960 * modified before passing it to the handler.
3961 */
3962 in = ctxt->input->cur;
3963 do {
3964 if (*in == 0xA) {
3965 do {
3966 ctxt->input->line++; ctxt->input->col = 1;
3967 in++;
3968 } while (*in == 0xA);
3969 }
3970get_more:
3971 ccol = ctxt->input->col;
3972 while (((*in > '-') && (*in <= 0x7F)) ||
3973 ((*in >= 0x20) && (*in < '-')) ||
3974 (*in == 0x09)) {
3975 in++;
3976 ccol++;
3977 }
3978 ctxt->input->col = ccol;
3979 if (*in == 0xA) {
3980 do {
3981 ctxt->input->line++; ctxt->input->col = 1;
3982 in++;
3983 } while (*in == 0xA);
3984 goto get_more;
3985 }
3986 nbchar = in - ctxt->input->cur;
3987 /*
3988 * save current set of data
3989 */
3990 if (nbchar > 0) {
3991 if ((ctxt->sax != NULL) &&
3992 (ctxt->sax->comment != NULL)) {
3993 if (buf == NULL) {
3994 if ((*in == '-') && (in[1] == '-'))
3995 size = nbchar + 1;
3996 else
3997 size = XML_PARSER_BUFFER_SIZE + nbchar;
3998 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
3999 if (buf == NULL) {
4000 xmlErrMemory(ctxt, NULL);
4001 ctxt->instate = state;
4002 return;
4003 }
4004 len = 0;
4005 } else if (len + nbchar + 1 >= size) {
4006 xmlChar *new_buf;
4007 size += len + nbchar + XML_PARSER_BUFFER_SIZE;
4008 new_buf = (xmlChar *) xmlRealloc(buf,
4009 size * sizeof(xmlChar));
4010 if (new_buf == NULL) {
4011 xmlFree (buf);
4012 xmlErrMemory(ctxt, NULL);
4013 ctxt->instate = state;
4014 return;
4015 }
4016 buf = new_buf;
4017 }
4018 memcpy(&buf[len], ctxt->input->cur, nbchar);
4019 len += nbchar;
4020 buf[len] = 0;
4021 }
4022 }
4023 ctxt->input->cur = in;
4024 if (*in == 0xA) {
4025 in++;
4026 ctxt->input->line++; ctxt->input->col = 1;
4027 }
4028 if (*in == 0xD) {
4029 in++;
4030 if (*in == 0xA) {
4031 ctxt->input->cur = in;
4032 in++;
4033 ctxt->input->line++; ctxt->input->col = 1;
4034 continue; /* while */
4035 }
4036 in--;
4037 }
4038 SHRINK;
4039 GROW;
4040 in = ctxt->input->cur;
4041 if (*in == '-') {
4042 if (in[1] == '-') {
4043 if (in[2] == '>') {
4044 SKIP(3);
4045 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4046 (!ctxt->disableSAX)) {
4047 if (buf != NULL)
4048 ctxt->sax->comment(ctxt->userData, buf);
4049 else
4050 ctxt->sax->comment(ctxt->userData, BAD_CAST "");
4051 }
4052 if (buf != NULL)
4053 xmlFree(buf);
4054 ctxt->instate = state;
4055 return;
4056 }
4057 if (buf != NULL)
4058 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4059 "Comment not terminated \n<!--%.50s\n",
4060 buf);
4061 else
4062 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4063 "Comment not terminated \n", NULL);
4064 in++;
4065 ctxt->input->col++;
4066 }
4067 in++;
4068 ctxt->input->col++;
4069 goto get_more;
4070 }
4071 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
4072 xmlParseCommentComplex(ctxt, buf, len, size);
4073 ctxt->instate = state;
4074 return;
4075}
4076
4077
4078/**
4079 * xmlParsePITarget:
4080 * @ctxt: an XML parser context
4081 *
4082 * parse the name of a PI
4083 *
4084 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
4085 *
4086 * Returns the PITarget name or NULL
4087 */
4088
4089const xmlChar *
4090xmlParsePITarget(xmlParserCtxtPtr ctxt) {
4091 const xmlChar *name;
4092
4093 name = xmlParseName(ctxt);
4094 if ((name != NULL) &&
4095 ((name[0] == 'x') || (name[0] == 'X')) &&
4096 ((name[1] == 'm') || (name[1] == 'M')) &&
4097 ((name[2] == 'l') || (name[2] == 'L'))) {
4098 int i;
4099 if ((name[0] == 'x') && (name[1] == 'm') &&
4100 (name[2] == 'l') && (name[3] == 0)) {
4101 xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
4102 "XML declaration allowed only at the start of the document\n");
4103 return(name);
4104 } else if (name[3] == 0) {
4105 xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
4106 return(name);
4107 }
4108 for (i = 0;;i++) {
4109 if (xmlW3CPIs[i] == NULL) break;
4110 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
4111 return(name);
4112 }
4113 xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
4114 "xmlParsePITarget: invalid name prefix 'xml'\n",
4115 NULL, NULL);
4116 }
4117 return(name);
4118}
4119
4120#ifdef LIBXML_CATALOG_ENABLED
4121/**
4122 * xmlParseCatalogPI:
4123 * @ctxt: an XML parser context
4124 * @catalog: the PI value string
4125 *
4126 * parse an XML Catalog Processing Instruction.
4127 *
4128 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
4129 *
4130 * Occurs only if allowed by the user and if happening in the Misc
4131 * part of the document before any doctype informations
4132 * This will add the given catalog to the parsing context in order
4133 * to be used if there is a resolution need further down in the document
4134 */
4135
4136static void
4137xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
4138 xmlChar *URL = NULL;
4139 const xmlChar *tmp, *base;
4140 xmlChar marker;
4141
4142 tmp = catalog;
4143 while (IS_BLANK_CH(*tmp)) tmp++;
4144 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
4145 goto error;
4146 tmp += 7;
4147 while (IS_BLANK_CH(*tmp)) tmp++;
4148 if (*tmp != '=') {
4149 return;
4150 }
4151 tmp++;
4152 while (IS_BLANK_CH(*tmp)) tmp++;
4153 marker = *tmp;
4154 if ((marker != '\'') && (marker != '"'))
4155 goto error;
4156 tmp++;
4157 base = tmp;
4158 while ((*tmp != 0) && (*tmp != marker)) tmp++;
4159 if (*tmp == 0)
4160 goto error;
4161 URL = xmlStrndup(base, tmp - base);
4162 tmp++;
4163 while (IS_BLANK_CH(*tmp)) tmp++;
4164 if (*tmp != 0)
4165 goto error;
4166
4167 if (URL != NULL) {
4168 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
4169 xmlFree(URL);
4170 }
4171 return;
4172
4173error:
4174 xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
4175 "Catalog PI syntax error: %s\n",
4176 catalog, NULL);
4177 if (URL != NULL)
4178 xmlFree(URL);
4179}
4180#endif
4181
4182/**
4183 * xmlParsePI:
4184 * @ctxt: an XML parser context
4185 *
4186 * parse an XML Processing Instruction.
4187 *
4188 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
4189 *
4190 * The processing is transfered to SAX once parsed.
4191 */
4192
4193void
4194xmlParsePI(xmlParserCtxtPtr ctxt) {
4195 xmlChar *buf = NULL;
4196 int len = 0;
4197 int size = XML_PARSER_BUFFER_SIZE;
4198 int cur, l;
4199 const xmlChar *target;
4200 xmlParserInputState state;
4201 int count = 0;
4202
4203 if ((RAW == '<') && (NXT(1) == '?')) {
4204 xmlParserInputPtr input = ctxt->input;
4205 state = ctxt->instate;
4206 ctxt->instate = XML_PARSER_PI;
4207 /*
4208 * this is a Processing Instruction.
4209 */
4210 SKIP(2);
4211 SHRINK;
4212
4213 /*
4214 * Parse the target name and check for special support like
4215 * namespace.
4216 */
4217 target = xmlParsePITarget(ctxt);
4218 if (target != NULL) {
4219 if ((RAW == '?') && (NXT(1) == '>')) {
4220 if (input != ctxt->input) {
4221 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4222 "PI declaration doesn't start and stop in the same entity\n");
4223 }
4224 SKIP(2);
4225
4226 /*
4227 * SAX: PI detected.
4228 */
4229 if ((ctxt->sax) && (!ctxt->disableSAX) &&
4230 (ctxt->sax->processingInstruction != NULL))
4231 ctxt->sax->processingInstruction(ctxt->userData,
4232 target, NULL);
4233 ctxt->instate = state;
4234 return;
4235 }
4236 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4237 if (buf == NULL) {
4238 xmlErrMemory(ctxt, NULL);
4239 ctxt->instate = state;
4240 return;
4241 }
4242 cur = CUR;
4243 if (!IS_BLANK(cur)) {
4244 xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
4245 "ParsePI: PI %s space expected\n", target);
4246 }
4247 SKIP_BLANKS;
4248 cur = CUR_CHAR(l);
4249 while (IS_CHAR(cur) && /* checked */
4250 ((cur != '?') || (NXT(1) != '>'))) {
4251 if (len + 5 >= size) {
4252 xmlChar *tmp;
4253
4254 size *= 2;
4255 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4256 if (tmp == NULL) {
4257 xmlErrMemory(ctxt, NULL);
4258 xmlFree(buf);
4259 ctxt->instate = state;
4260 return;
4261 }
4262 buf = tmp;
4263 }
4264 count++;
4265 if (count > 50) {
4266 GROW;
4267 count = 0;
4268 }
4269 COPY_BUF(l,buf,len,cur);
4270 NEXTL(l);
4271 cur = CUR_CHAR(l);
4272 if (cur == 0) {
4273 SHRINK;
4274 GROW;
4275 cur = CUR_CHAR(l);
4276 }
4277 }
4278 buf[len] = 0;
4279 if (cur != '?') {
4280 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
4281 "ParsePI: PI %s never end ...\n", target);
4282 } else {
4283 if (input != ctxt->input) {
4284 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4285 "PI declaration doesn't start and stop in the same entity\n");
4286 }
4287 SKIP(2);
4288
4289#ifdef LIBXML_CATALOG_ENABLED
4290 if (((state == XML_PARSER_MISC) ||
4291 (state == XML_PARSER_START)) &&
4292 (xmlStrEqual(target, XML_CATALOG_PI))) {
4293 xmlCatalogAllow allow = xmlCatalogGetDefaults();
4294 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
4295 (allow == XML_CATA_ALLOW_ALL))
4296 xmlParseCatalogPI(ctxt, buf);
4297 }
4298#endif
4299
4300
4301 /*
4302 * SAX: PI detected.
4303 */
4304 if ((ctxt->sax) && (!ctxt->disableSAX) &&
4305 (ctxt->sax->processingInstruction != NULL))
4306 ctxt->sax->processingInstruction(ctxt->userData,
4307 target, buf);
4308 }
4309 xmlFree(buf);
4310 } else {
4311 xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
4312 }
4313 ctxt->instate = state;
4314 }
4315}
4316
4317/**
4318 * xmlParseNotationDecl:
4319 * @ctxt: an XML parser context
4320 *
4321 * parse a notation declaration
4322 *
4323 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
4324 *
4325 * Hence there is actually 3 choices:
4326 * 'PUBLIC' S PubidLiteral
4327 * 'PUBLIC' S PubidLiteral S SystemLiteral
4328 * and 'SYSTEM' S SystemLiteral
4329 *
4330 * See the NOTE on xmlParseExternalID().
4331 */
4332
4333void
4334xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
4335 const xmlChar *name;
4336 xmlChar *Pubid;
4337 xmlChar *Systemid;
4338
4339 if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
4340 xmlParserInputPtr input = ctxt->input;
4341 SHRINK;
4342 SKIP(10);
4343 if (!IS_BLANK_CH(CUR)) {
4344 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4345 "Space required after '<!NOTATION'\n");
4346 return;
4347 }
4348 SKIP_BLANKS;
4349
4350 name = xmlParseName(ctxt);
4351 if (name == NULL) {
4352 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
4353 return;
4354 }
4355 if (!IS_BLANK_CH(CUR)) {
4356 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4357 "Space required after the NOTATION name'\n");
4358 return;
4359 }
4360 SKIP_BLANKS;
4361
4362 /*
4363 * Parse the IDs.
4364 */
4365 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
4366 SKIP_BLANKS;
4367
4368 if (RAW == '>') {
4369 if (input != ctxt->input) {
4370 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4371 "Notation declaration doesn't start and stop in the same entity\n");
4372 }
4373 NEXT;
4374 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4375 (ctxt->sax->notationDecl != NULL))
4376 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
4377 } else {
4378 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
4379 }
4380 if (Systemid != NULL) xmlFree(Systemid);
4381 if (Pubid != NULL) xmlFree(Pubid);
4382 }
4383}
4384
4385/**
4386 * xmlParseEntityDecl:
4387 * @ctxt: an XML parser context
4388 *
4389 * parse <!ENTITY declarations
4390 *
4391 * [70] EntityDecl ::= GEDecl | PEDecl
4392 *
4393 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
4394 *
4395 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
4396 *
4397 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
4398 *
4399 * [74] PEDef ::= EntityValue | ExternalID
4400 *
4401 * [76] NDataDecl ::= S 'NDATA' S Name
4402 *
4403 * [ VC: Notation Declared ]
4404 * The Name must match the declared name of a notation.
4405 */
4406
4407void
4408xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
4409 const xmlChar *name = NULL;
4410 xmlChar *value = NULL;
4411 xmlChar *URI = NULL, *literal = NULL;
4412 const xmlChar *ndata = NULL;
4413 int isParameter = 0;
4414 xmlChar *orig = NULL;
4415 int skipped;
4416 unsigned long oldnbent = ctxt->nbentities;
4417
4418 /* GROW; done in the caller */
4419 if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) {
4420 xmlParserInputPtr input = ctxt->input;
4421 SHRINK;
4422 SKIP(8);
4423 skipped = SKIP_BLANKS;
4424 if (skipped == 0) {
4425 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4426 "Space required after '<!ENTITY'\n");
4427 }
4428
4429 if (RAW == '%') {
4430 NEXT;
4431 skipped = SKIP_BLANKS;
4432 if (skipped == 0) {
4433 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4434 "Space required after '%'\n");
4435 }
4436 isParameter = 1;
4437 }
4438
4439 name = xmlParseName(ctxt);
4440 if (name == NULL) {
4441 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4442 "xmlParseEntityDecl: no name\n");
4443 return;
4444 }
4445 skipped = SKIP_BLANKS;
4446 if (skipped == 0) {
4447 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4448 "Space required after the entity name\n");
4449 }
4450
4451 ctxt->instate = XML_PARSER_ENTITY_DECL;
4452 /*
4453 * handle the various case of definitions...
4454 */
4455 if (isParameter) {
4456 if ((RAW == '"') || (RAW == '\'')) {
4457 value = xmlParseEntityValue(ctxt, &orig);
4458 if (value) {
4459 if ((ctxt->sax != NULL) &&
4460 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
4461 ctxt->sax->entityDecl(ctxt->userData, name,
4462 XML_INTERNAL_PARAMETER_ENTITY,
4463 NULL, NULL, value);
4464 }
4465 } else {
4466 URI = xmlParseExternalID(ctxt, &literal, 1);
4467 if ((URI == NULL) && (literal == NULL)) {
4468 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
4469 }
4470 if (URI) {
4471 xmlURIPtr uri;
4472
4473 uri = xmlParseURI((const char *) URI);
4474 if (uri == NULL) {
4475 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
4476 "Invalid URI: %s\n", URI);
4477 /*
4478 * This really ought to be a well formedness error
4479 * but the XML Core WG decided otherwise c.f. issue
4480 * E26 of the XML erratas.
4481 */
4482 } else {
4483 if (uri->fragment != NULL) {
4484 /*
4485 * Okay this is foolish to block those but not
4486 * invalid URIs.
4487 */
4488 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
4489 } else {
4490 if ((ctxt->sax != NULL) &&
4491 (!ctxt->disableSAX) &&
4492 (ctxt->sax->entityDecl != NULL))
4493 ctxt->sax->entityDecl(ctxt->userData, name,
4494 XML_EXTERNAL_PARAMETER_ENTITY,
4495 literal, URI, NULL);
4496 }
4497 xmlFreeURI(uri);
4498 }
4499 }
4500 }
4501 } else {
4502 if ((RAW == '"') || (RAW == '\'')) {
4503 value = xmlParseEntityValue(ctxt, &orig);
4504 if ((ctxt->sax != NULL) &&
4505 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
4506 ctxt->sax->entityDecl(ctxt->userData, name,
4507 XML_INTERNAL_GENERAL_ENTITY,
4508 NULL, NULL, value);
4509 /*
4510 * For expat compatibility in SAX mode.
4511 */
4512 if ((ctxt->myDoc == NULL) ||
4513 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
4514 if (ctxt->myDoc == NULL) {
4515 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
4516 }
4517 if (ctxt->myDoc->intSubset == NULL)
4518 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
4519 BAD_CAST "fake", NULL, NULL);
4520
4521 xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
4522 NULL, NULL, value);
4523 }
4524 } else {
4525 URI = xmlParseExternalID(ctxt, &literal, 1);
4526 if ((URI == NULL) && (literal == NULL)) {
4527 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
4528 }
4529 if (URI) {
4530 xmlURIPtr uri;
4531
4532 uri = xmlParseURI((const char *)URI);
4533 if (uri == NULL) {
4534 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
4535 "Invalid URI: %s\n", URI);
4536 /*
4537 * This really ought to be a well formedness error
4538 * but the XML Core WG decided otherwise c.f. issue
4539 * E26 of the XML erratas.
4540 */
4541 } else {
4542 if (uri->fragment != NULL) {
4543 /*
4544 * Okay this is foolish to block those but not
4545 * invalid URIs.
4546 */
4547 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
4548 }
4549 xmlFreeURI(uri);
4550 }
4551 }
4552 if ((RAW != '>') && (!IS_BLANK_CH(CUR))) {
4553 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4554 "Space required before 'NDATA'\n");
4555 }
4556 SKIP_BLANKS;
4557 if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
4558 SKIP(5);
4559 if (!IS_BLANK_CH(CUR)) {
4560 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4561 "Space required after 'NDATA'\n");
4562 }
4563 SKIP_BLANKS;
4564 ndata = xmlParseName(ctxt);
4565 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4566 (ctxt->sax->unparsedEntityDecl != NULL))
4567 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
4568 literal, URI, ndata);
4569 } else {
4570 if ((ctxt->sax != NULL) &&
4571 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
4572 ctxt->sax->entityDecl(ctxt->userData, name,
4573 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
4574 literal, URI, NULL);
4575 /*
4576 * For expat compatibility in SAX mode.
4577 * assuming the entity repalcement was asked for
4578 */
4579 if ((ctxt->replaceEntities != 0) &&
4580 ((ctxt->myDoc == NULL) ||
4581 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
4582 if (ctxt->myDoc == NULL) {
4583 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
4584 }
4585
4586 if (ctxt->myDoc->intSubset == NULL)
4587 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
4588 BAD_CAST "fake", NULL, NULL);
4589 xmlSAX2EntityDecl(ctxt, name,
4590 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
4591 literal, URI, NULL);
4592 }
4593 }
4594 }
4595 }
4596 SKIP_BLANKS;
4597 if (RAW != '>') {
4598 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
4599 "xmlParseEntityDecl: entity %s not terminated\n", name);
4600 } else {
4601 if (input != ctxt->input) {
4602 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4603 "Entity declaration doesn't start and stop in the same entity\n");
4604 }
4605 NEXT;
4606 }
4607 if (orig != NULL) {
4608 /*
4609 * Ugly mechanism to save the raw entity value.
4610 */
4611 xmlEntityPtr cur = NULL;
4612
4613 if (isParameter) {
4614 if ((ctxt->sax != NULL) &&
4615 (ctxt->sax->getParameterEntity != NULL))
4616 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
4617 } else {
4618 if ((ctxt->sax != NULL) &&
4619 (ctxt->sax->getEntity != NULL))
4620 cur = ctxt->sax->getEntity(ctxt->userData, name);
4621 if ((cur == NULL) && (ctxt->userData==ctxt)) {
4622 cur = xmlSAX2GetEntity(ctxt, name);
4623 }
4624 }
4625 if (cur != NULL) {
4626 cur->checked = ctxt->nbentities - oldnbent;
4627 if (cur->orig != NULL)
4628 xmlFree(orig);
4629 else
4630 cur->orig = orig;
4631 } else
4632 xmlFree(orig);
4633 }
4634 if (value != NULL) xmlFree(value);
4635 if (URI != NULL) xmlFree(URI);
4636 if (literal != NULL) xmlFree(literal);
4637 }
4638}
4639
4640/**
4641 * xmlParseDefaultDecl:
4642 * @ctxt: an XML parser context
4643 * @value: Receive a possible fixed default value for the attribute
4644 *
4645 * Parse an attribute default declaration
4646 *
4647 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
4648 *
4649 * [ VC: Required Attribute ]
4650 * if the default declaration is the keyword #REQUIRED, then the
4651 * attribute must be specified for all elements of the type in the
4652 * attribute-list declaration.
4653 *
4654 * [ VC: Attribute Default Legal ]
4655 * The declared default value must meet the lexical constraints of
4656 * the declared attribute type c.f. xmlValidateAttributeDecl()
4657 *
4658 * [ VC: Fixed Attribute Default ]
4659 * if an attribute has a default value declared with the #FIXED
4660 * keyword, instances of that attribute must match the default value.
4661 *
4662 * [ WFC: No < in Attribute Values ]
4663 * handled in xmlParseAttValue()
4664 *
4665 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
4666 * or XML_ATTRIBUTE_FIXED.
4667 */
4668
4669int
4670xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
4671 int val;
4672 xmlChar *ret;
4673
4674 *value = NULL;
4675 if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
4676 SKIP(9);
4677 return(XML_ATTRIBUTE_REQUIRED);
4678 }
4679 if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
4680 SKIP(8);
4681 return(XML_ATTRIBUTE_IMPLIED);
4682 }
4683 val = XML_ATTRIBUTE_NONE;
4684 if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
4685 SKIP(6);
4686 val = XML_ATTRIBUTE_FIXED;
4687 if (!IS_BLANK_CH(CUR)) {
4688 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4689 "Space required after '#FIXED'\n");
4690 }
4691 SKIP_BLANKS;
4692 }
4693 ret = xmlParseAttValue(ctxt);
4694 ctxt->instate = XML_PARSER_DTD;
4695 if (ret == NULL) {
4696 xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
4697 "Attribute default value declaration error\n");
4698 } else
4699 *value = ret;
4700 return(val);
4701}
4702
4703/**
4704 * xmlParseNotationType:
4705 * @ctxt: an XML parser context
4706 *
4707 * parse an Notation attribute type.
4708 *
4709 * Note: the leading 'NOTATION' S part has already being parsed...
4710 *
4711 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
4712 *
4713 * [ VC: Notation Attributes ]
4714 * Values of this type must match one of the notation names included
4715 * in the declaration; all notation names in the declaration must be declared.
4716 *
4717 * Returns: the notation attribute tree built while parsing
4718 */
4719
4720xmlEnumerationPtr
4721xmlParseNotationType(xmlParserCtxtPtr ctxt) {
4722 const xmlChar *name;
4723 xmlEnumerationPtr ret = NULL, last = NULL, cur;
4724
4725 if (RAW != '(') {
4726 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
4727 return(NULL);
4728 }
4729 SHRINK;
4730 do {
4731 NEXT;
4732 SKIP_BLANKS;
4733 name = xmlParseName(ctxt);
4734 if (name == NULL) {
4735 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4736 "Name expected in NOTATION declaration\n");
4737 xmlFreeEnumeration(ret);
4738 return(NULL);
4739 }
4740 cur = xmlCreateEnumeration(name);
4741 if (cur == NULL) {
4742 xmlFreeEnumeration(ret);
4743 return(NULL);
4744 }
4745 if (last == NULL) ret = last = cur;
4746 else {
4747 last->next = cur;
4748 last = cur;
4749 }
4750 SKIP_BLANKS;
4751 } while (RAW == '|');
4752 if (RAW != ')') {
4753 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
4754 xmlFreeEnumeration(ret);
4755 return(NULL);
4756 }
4757 NEXT;
4758 return(ret);
4759}
4760
4761/**
4762 * xmlParseEnumerationType:
4763 * @ctxt: an XML parser context
4764 *
4765 * parse an Enumeration attribute type.
4766 *
4767 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
4768 *
4769 * [ VC: Enumeration ]
4770 * Values of this type must match one of the Nmtoken tokens in
4771 * the declaration
4772 *
4773 * Returns: the enumeration attribute tree built while parsing
4774 */
4775
4776xmlEnumerationPtr
4777xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
4778 xmlChar *name;
4779 xmlEnumerationPtr ret = NULL, last = NULL, cur;
4780
4781 if (RAW != '(') {
4782 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
4783 return(NULL);
4784 }
4785 SHRINK;
4786 do {
4787 NEXT;
4788 SKIP_BLANKS;
4789 name = xmlParseNmtoken(ctxt);
4790 if (name == NULL) {
4791 xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
4792 return(ret);
4793 }
4794 cur = xmlCreateEnumeration(name);
4795 xmlFree(name);
4796 if (cur == NULL) {
4797 xmlFreeEnumeration(ret);
4798 return(NULL);
4799 }
4800 if (last == NULL) ret = last = cur;
4801 else {
4802 last->next = cur;
4803 last = cur;
4804 }
4805 SKIP_BLANKS;
4806 } while (RAW == '|');
4807 if (RAW != ')') {
4808 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
4809 return(ret);
4810 }
4811 NEXT;
4812 return(ret);
4813}
4814
4815/**
4816 * xmlParseEnumeratedType:
4817 * @ctxt: an XML parser context
4818 * @tree: the enumeration tree built while parsing
4819 *
4820 * parse an Enumerated attribute type.
4821 *
4822 * [57] EnumeratedType ::= NotationType | Enumeration
4823 *
4824 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
4825 *
4826 *
4827 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
4828 */
4829
4830int
4831xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
4832 if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
4833 SKIP(8);
4834 if (!IS_BLANK_CH(CUR)) {
4835 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4836 "Space required after 'NOTATION'\n");
4837 return(0);
4838 }
4839 SKIP_BLANKS;
4840 *tree = xmlParseNotationType(ctxt);
4841 if (*tree == NULL) return(0);
4842 return(XML_ATTRIBUTE_NOTATION);
4843 }
4844 *tree = xmlParseEnumerationType(ctxt);
4845 if (*tree == NULL) return(0);
4846 return(XML_ATTRIBUTE_ENUMERATION);
4847}
4848
4849/**
4850 * xmlParseAttributeType:
4851 * @ctxt: an XML parser context
4852 * @tree: the enumeration tree built while parsing
4853 *
4854 * parse the Attribute list def for an element
4855 *
4856 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
4857 *
4858 * [55] StringType ::= 'CDATA'
4859 *
4860 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
4861 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
4862 *
4863 * Validity constraints for attribute values syntax are checked in
4864 * xmlValidateAttributeValue()
4865 *
4866 * [ VC: ID ]
4867 * Values of type ID must match the Name production. A name must not
4868 * appear more than once in an XML document as a value of this type;
4869 * i.e., ID values must uniquely identify the elements which bear them.
4870 *
4871 * [ VC: One ID per Element Type ]
4872 * No element type may have more than one ID attribute specified.
4873 *
4874 * [ VC: ID Attribute Default ]
4875 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
4876 *
4877 * [ VC: IDREF ]
4878 * Values of type IDREF must match the Name production, and values
4879 * of type IDREFS must match Names; each IDREF Name must match the value
4880 * of an ID attribute on some element in the XML document; i.e. IDREF
4881 * values must match the value of some ID attribute.
4882 *
4883 * [ VC: Entity Name ]
4884 * Values of type ENTITY must match the Name production, values
4885 * of type ENTITIES must match Names; each Entity Name must match the
4886 * name of an unparsed entity declared in the DTD.
4887 *
4888 * [ VC: Name Token ]
4889 * Values of type NMTOKEN must match the Nmtoken production; values
4890 * of type NMTOKENS must match Nmtokens.
4891 *
4892 * Returns the attribute type
4893 */
4894int
4895xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
4896 SHRINK;
4897 if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
4898 SKIP(5);
4899 return(XML_ATTRIBUTE_CDATA);
4900 } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
4901 SKIP(6);
4902 return(XML_ATTRIBUTE_IDREFS);
4903 } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
4904 SKIP(5);
4905 return(XML_ATTRIBUTE_IDREF);
4906 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
4907 SKIP(2);
4908 return(XML_ATTRIBUTE_ID);
4909 } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
4910 SKIP(6);
4911 return(XML_ATTRIBUTE_ENTITY);
4912 } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
4913 SKIP(8);
4914 return(XML_ATTRIBUTE_ENTITIES);
4915 } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
4916 SKIP(8);
4917 return(XML_ATTRIBUTE_NMTOKENS);
4918 } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
4919 SKIP(7);
4920 return(XML_ATTRIBUTE_NMTOKEN);
4921 }
4922 return(xmlParseEnumeratedType(ctxt, tree));
4923}
4924
4925/**
4926 * xmlParseAttributeListDecl:
4927 * @ctxt: an XML parser context
4928 *
4929 * : parse the Attribute list def for an element
4930 *
4931 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
4932 *
4933 * [53] AttDef ::= S Name S AttType S DefaultDecl
4934 *
4935 */
4936void
4937xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
4938 const xmlChar *elemName;
4939 const xmlChar *attrName;
4940 xmlEnumerationPtr tree;
4941
4942 if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
4943 xmlParserInputPtr input = ctxt->input;
4944
4945 SKIP(9);
4946 if (!IS_BLANK_CH(CUR)) {
4947 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4948 "Space required after '<!ATTLIST'\n");
4949 }
4950 SKIP_BLANKS;
4951 elemName = xmlParseName(ctxt);
4952 if (elemName == NULL) {
4953 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4954 "ATTLIST: no name for Element\n");
4955 return;
4956 }
4957 SKIP_BLANKS;
4958 GROW;
4959 while (RAW != '>') {
4960 const xmlChar *check = CUR_PTR;
4961 int type;
4962 int def;
4963 xmlChar *defaultValue = NULL;
4964
4965 GROW;
4966 tree = NULL;
4967 attrName = xmlParseName(ctxt);
4968 if (attrName == NULL) {
4969 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4970 "ATTLIST: no name for Attribute\n");
4971 break;
4972 }
4973 GROW;
4974 if (!IS_BLANK_CH(CUR)) {
4975 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4976 "Space required after the attribute name\n");
4977 break;
4978 }
4979 SKIP_BLANKS;
4980
4981 type = xmlParseAttributeType(ctxt, &tree);
4982 if (type <= 0) {
4983 break;
4984 }
4985
4986 GROW;
4987 if (!IS_BLANK_CH(CUR)) {
4988 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4989 "Space required after the attribute type\n");
4990 if (tree != NULL)
4991 xmlFreeEnumeration(tree);
4992 break;
4993 }
4994 SKIP_BLANKS;
4995
4996 def = xmlParseDefaultDecl(ctxt, &defaultValue);
4997 if (def <= 0) {
4998 if (defaultValue != NULL)
4999 xmlFree(defaultValue);
5000 if (tree != NULL)
5001 xmlFreeEnumeration(tree);
5002 break;
5003 }
5004
5005 GROW;
5006 if (RAW != '>') {
5007 if (!IS_BLANK_CH(CUR)) {
5008 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5009 "Space required after the attribute default value\n");
5010 if (defaultValue != NULL)
5011 xmlFree(defaultValue);
5012 if (tree != NULL)
5013 xmlFreeEnumeration(tree);
5014 break;
5015 }
5016 SKIP_BLANKS;
5017 }
5018 if (check == CUR_PTR) {
5019 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
5020 "in xmlParseAttributeListDecl\n");
5021 if (defaultValue != NULL)
5022 xmlFree(defaultValue);
5023 if (tree != NULL)
5024 xmlFreeEnumeration(tree);
5025 break;
5026 }
5027 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5028 (ctxt->sax->attributeDecl != NULL))
5029 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
5030 type, def, defaultValue, tree);
5031 else if (tree != NULL)
5032 xmlFreeEnumeration(tree);
5033
5034 if ((ctxt->sax2) && (defaultValue != NULL) &&
5035 (def != XML_ATTRIBUTE_IMPLIED) &&
5036 (def != XML_ATTRIBUTE_REQUIRED)) {
5037 xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
5038 }
5039 if ((ctxt->sax2) && (type != XML_ATTRIBUTE_CDATA)) {
5040 xmlAddSpecialAttr(ctxt, elemName, attrName, type);
5041 }
5042 if (defaultValue != NULL)
5043 xmlFree(defaultValue);
5044 GROW;
5045 }
5046 if (RAW == '>') {
5047 if (input != ctxt->input) {
5048 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5049 "Attribute list declaration doesn't start and stop in the same entity\n");
5050 }
5051 NEXT;
5052 }
5053 }
5054}
5055
5056/**
5057 * xmlParseElementMixedContentDecl:
5058 * @ctxt: an XML parser context
5059 * @inputchk: the input used for the current entity, needed for boundary checks
5060 *
5061 * parse the declaration for a Mixed Element content
5062 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
5063 *
5064 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
5065 * '(' S? '#PCDATA' S? ')'
5066 *
5067 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
5068 *
5069 * [ VC: No Duplicate Types ]
5070 * The same name must not appear more than once in a single
5071 * mixed-content declaration.
5072 *
5073 * returns: the list of the xmlElementContentPtr describing the element choices
5074 */
5075xmlElementContentPtr
5076xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
5077 xmlElementContentPtr ret = NULL, cur = NULL, n;
5078 const xmlChar *elem = NULL;
5079
5080 GROW;
5081 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
5082 SKIP(7);
5083 SKIP_BLANKS;
5084 SHRINK;
5085 if (RAW == ')') {
5086 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
5087 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5088"Element content declaration doesn't start and stop in the same entity\n",
5089 NULL);
5090 }
5091 NEXT;
5092 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
5093 if (RAW == '*') {
5094 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5095 NEXT;
5096 }
5097 return(ret);
5098 }
5099 if ((RAW == '(') || (RAW == '|')) {
5100 ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
5101 if (ret == NULL) return(NULL);
5102 }
5103 while (RAW == '|') {
5104 NEXT;
5105 if (elem == NULL) {
5106 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
5107 if (ret == NULL) return(NULL);
5108 ret->c1 = cur;
5109 if (cur != NULL)
5110 cur->parent = ret;
5111 cur = ret;
5112 } else {
5113 n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
5114 if (n == NULL) return(NULL);
5115 n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
5116 if (n->c1 != NULL)
5117 n->c1->parent = n;
5118 cur->c2 = n;
5119 if (n != NULL)
5120 n->parent = cur;
5121 cur = n;
5122 }
5123 SKIP_BLANKS;
5124 elem = xmlParseName(ctxt);
5125 if (elem == NULL) {
5126 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5127 "xmlParseElementMixedContentDecl : Name expected\n");
5128 xmlFreeDocElementContent(ctxt->myDoc, cur);
5129 return(NULL);
5130 }
5131 SKIP_BLANKS;
5132 GROW;
5133 }
5134 if ((RAW == ')') && (NXT(1) == '*')) {
5135 if (elem != NULL) {
5136 cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
5137 XML_ELEMENT_CONTENT_ELEMENT);
5138 if (cur->c2 != NULL)
5139 cur->c2->parent = cur;
5140 }
5141 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5142 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
5143 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5144"Element content declaration doesn't start and stop in the same entity\n",
5145 NULL);
5146 }
5147 SKIP(2);
5148 } else {
5149 xmlFreeDocElementContent(ctxt->myDoc, ret);
5150 xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
5151 return(NULL);
5152 }
5153
5154 } else {
5155 xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
5156 }
5157 return(ret);
5158}
5159
5160/**
5161 * xmlParseElementChildrenContentDecl:
5162 * @ctxt: an XML parser context
5163 * @inputchk: the input used for the current entity, needed for boundary checks
5164 *
5165 * parse the declaration for a Mixed Element content
5166 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
5167 *
5168 *
5169 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
5170 *
5171 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
5172 *
5173 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
5174 *
5175 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
5176 *
5177 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
5178 * TODO Parameter-entity replacement text must be properly nested
5179 * with parenthesized groups. That is to say, if either of the
5180 * opening or closing parentheses in a choice, seq, or Mixed
5181 * construct is contained in the replacement text for a parameter
5182 * entity, both must be contained in the same replacement text. For
5183 * interoperability, if a parameter-entity reference appears in a
5184 * choice, seq, or Mixed construct, its replacement text should not
5185 * be empty, and neither the first nor last non-blank character of
5186 * the replacement text should be a connector (| or ,).
5187 *
5188 * Returns the tree of xmlElementContentPtr describing the element
5189 * hierarchy.
5190 */
5191xmlElementContentPtr
5192xmlParseElementChildrenContentDecl (xmlParserCtxtPtr ctxt, int inputchk) {
5193 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
5194 const xmlChar *elem;
5195 xmlChar type = 0;
5196
5197 if (ctxt->depth > 128) {
5198 xmlFatalErrMsgInt(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED,
5199 "xmlParseElementChildrenContentDecl : depth %d too deep\n",
5200 ctxt->depth);
5201 return(NULL);
5202 }
5203
5204 SKIP_BLANKS;
5205 GROW;
5206 if (RAW == '(') {
5207 int inputid = ctxt->input->id;
5208
5209 /* Recurse on first child */
5210 NEXT;
5211 SKIP_BLANKS;
5212 ctxt->depth++;
5213 cur = ret = xmlParseElementChildrenContentDecl(ctxt, inputid);
5214 ctxt->depth--;
5215 SKIP_BLANKS;
5216 GROW;
5217 } else {
5218 elem = xmlParseName(ctxt);
5219 if (elem == NULL) {
5220 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
5221 return(NULL);
5222 }
5223 cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
5224 if (cur == NULL) {
5225 xmlErrMemory(ctxt, NULL);
5226 return(NULL);
5227 }
5228 GROW;
5229 if (RAW == '?') {
5230 cur->ocur = XML_ELEMENT_CONTENT_OPT;
5231 NEXT;
5232 } else if (RAW == '*') {
5233 cur->ocur = XML_ELEMENT_CONTENT_MULT;
5234 NEXT;
5235 } else if (RAW == '+') {
5236 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
5237 NEXT;
5238 } else {
5239 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
5240 }
5241 GROW;
5242 }
5243 SKIP_BLANKS;
5244 SHRINK;
5245 while (RAW != ')') {
5246 /*
5247 * Each loop we parse one separator and one element.
5248 */
5249 if (RAW == ',') {
5250 if (type == 0) type = CUR;
5251
5252 /*
5253 * Detect "Name | Name , Name" error
5254 */
5255 else if (type != CUR) {
5256 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
5257 "xmlParseElementChildrenContentDecl : '%c' expected\n",
5258 type);
5259 if ((last != NULL) && (last != ret))
5260 xmlFreeDocElementContent(ctxt->myDoc, last);
5261 if (ret != NULL)
5262 xmlFreeDocElementContent(ctxt->myDoc, ret);
5263 return(NULL);
5264 }
5265 NEXT;
5266
5267 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
5268 if (op == NULL) {
5269 if ((last != NULL) && (last != ret))
5270 xmlFreeDocElementContent(ctxt->myDoc, last);
5271 xmlFreeDocElementContent(ctxt->myDoc, ret);
5272 return(NULL);
5273 }
5274 if (last == NULL) {
5275 op->c1 = ret;
5276 if (ret != NULL)
5277 ret->parent = op;
5278 ret = cur = op;
5279 } else {
5280 cur->c2 = op;
5281 if (op != NULL)
5282 op->parent = cur;
5283 op->c1 = last;
5284 if (last != NULL)
5285 last->parent = op;
5286 cur =op;
5287 last = NULL;
5288 }
5289 } else if (RAW == '|') {
5290 if (type == 0) type = CUR;
5291
5292 /*
5293 * Detect "Name , Name | Name" error
5294 */
5295 else if (type != CUR) {
5296 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
5297 "xmlParseElementChildrenContentDecl : '%c' expected\n",
5298 type);
5299 if ((last != NULL) && (last != ret))
5300 xmlFreeDocElementContent(ctxt->myDoc, last);
5301 if (ret != NULL)
5302 xmlFreeDocElementContent(ctxt->myDoc, ret);
5303 return(NULL);
5304 }
5305 NEXT;
5306
5307 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
5308 if (op == NULL) {
5309 if ((last != NULL) && (last != ret))
5310 xmlFreeDocElementContent(ctxt->myDoc, last);
5311 if (ret != NULL)
5312 xmlFreeDocElementContent(ctxt->myDoc, ret);
5313 return(NULL);
5314 }
5315 if (last == NULL) {
5316 op->c1 = ret;
5317 if (ret != NULL)
5318 ret->parent = op;
5319 ret = cur = op;
5320 } else {
5321 cur->c2 = op;
5322 if (op != NULL)
5323 op->parent = cur;
5324 op->c1 = last;
5325 if (last != NULL)
5326 last->parent = op;
5327 cur =op;
5328 last = NULL;
5329 }
5330 } else {
5331 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
5332 if (ret != NULL)
5333 xmlFreeDocElementContent(ctxt->myDoc, ret);
5334 return(NULL);
5335 }
5336 GROW;
5337 SKIP_BLANKS;
5338 GROW;
5339 if (RAW == '(') {
5340 int inputid = ctxt->input->id;
5341 /* Recurse on second child */
5342 NEXT;
5343 SKIP_BLANKS;
5344 ctxt->depth++;
5345 last = xmlParseElementChildrenContentDecl(ctxt, inputid);
5346 ctxt->depth--;
5347 SKIP_BLANKS;
5348 } else {
5349 elem = xmlParseName(ctxt);
5350 if (elem == NULL) {
5351 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
5352 if (ret != NULL)
5353 xmlFreeDocElementContent(ctxt->myDoc, ret);
5354 return(NULL);
5355 }
5356 last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
5357 if (RAW == '?') {
5358 last->ocur = XML_ELEMENT_CONTENT_OPT;
5359 NEXT;
5360 } else if (RAW == '*') {
5361 last->ocur = XML_ELEMENT_CONTENT_MULT;
5362 NEXT;
5363 } else if (RAW == '+') {
5364 last->ocur = XML_ELEMENT_CONTENT_PLUS;
5365 NEXT;
5366 } else {
5367 last->ocur = XML_ELEMENT_CONTENT_ONCE;
5368 }
5369 }
5370 SKIP_BLANKS;
5371 GROW;
5372 }
5373 if ((cur != NULL) && (last != NULL)) {
5374 cur->c2 = last;
5375 if (last != NULL)
5376 last->parent = cur;
5377 }
5378 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
5379 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5380"Element content declaration doesn't start and stop in the same entity\n",
5381 NULL);
5382 }
5383 NEXT;
5384 if (RAW == '?') {
5385 if (ret != NULL) {
5386 if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
5387 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
5388 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5389 else
5390 ret->ocur = XML_ELEMENT_CONTENT_OPT;
5391 }
5392 NEXT;
5393 } else if (RAW == '*') {
5394 if (ret != NULL) {
5395 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5396 cur = ret;
5397 /*
5398 * Some normalization:
5399 * (a | b* | c?)* == (a | b | c)*
5400 */
5401 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
5402 if ((cur->c1 != NULL) &&
5403 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
5404 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
5405 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
5406 if ((cur->c2 != NULL) &&
5407 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
5408 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
5409 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
5410 cur = cur->c2;
5411 }
5412 }
5413 NEXT;
5414 } else if (RAW == '+') {
5415 if (ret != NULL) {
5416 int found = 0;
5417
5418 if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
5419 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
5420 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5421 else
5422 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
5423 /*
5424 * Some normalization:
5425 * (a | b*)+ == (a | b)*
5426 * (a | b?)+ == (a | b)*
5427 */
5428 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
5429 if ((cur->c1 != NULL) &&
5430 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
5431 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
5432 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
5433 found = 1;
5434 }
5435 if ((cur->c2 != NULL) &&
5436 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
5437 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
5438 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
5439 found = 1;
5440 }
5441 cur = cur->c2;
5442 }
5443 if (found)
5444 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5445 }
5446 NEXT;
5447 }
5448 return(ret);
5449}
5450
5451/**
5452 * xmlParseElementContentDecl:
5453 * @ctxt: an XML parser context
5454 * @name: the name of the element being defined.
5455 * @result: the Element Content pointer will be stored here if any
5456 *
5457 * parse the declaration for an Element content either Mixed or Children,
5458 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
5459 *
5460 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
5461 *
5462 * returns: the type of element content XML_ELEMENT_TYPE_xxx
5463 */
5464
5465int
5466xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
5467 xmlElementContentPtr *result) {
5468
5469 xmlElementContentPtr tree = NULL;
5470 int inputid = ctxt->input->id;
5471 int res;
5472
5473 *result = NULL;
5474
5475 if (RAW != '(') {
5476 xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
5477 "xmlParseElementContentDecl : %s '(' expected\n", name);
5478 return(-1);
5479 }
5480 NEXT;
5481 GROW;
5482 SKIP_BLANKS;
5483 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
5484 tree = xmlParseElementMixedContentDecl(ctxt, inputid);
5485 res = XML_ELEMENT_TYPE_MIXED;
5486 } else {
5487 tree = xmlParseElementChildrenContentDecl(ctxt, inputid);
5488 res = XML_ELEMENT_TYPE_ELEMENT;
5489 }
5490 SKIP_BLANKS;
5491 *result = tree;
5492 return(res);
5493}
5494
5495/**
5496 * xmlParseElementDecl:
5497 * @ctxt: an XML parser context
5498 *
5499 * parse an Element declaration.
5500 *
5501 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
5502 *
5503 * [ VC: Unique Element Type Declaration ]
5504 * No element type may be declared more than once
5505 *
5506 * Returns the type of the element, or -1 in case of error
5507 */
5508int
5509xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
5510 const xmlChar *name;
5511 int ret = -1;
5512 xmlElementContentPtr content = NULL;
5513
5514 /* GROW; done in the caller */
5515 if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
5516 xmlParserInputPtr input = ctxt->input;
5517
5518 SKIP(9);
5519 if (!IS_BLANK_CH(CUR)) {
5520 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5521 "Space required after 'ELEMENT'\n");
5522 }
5523 SKIP_BLANKS;
5524 name = xmlParseName(ctxt);
5525 if (name == NULL) {
5526 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5527 "xmlParseElementDecl: no name for Element\n");
5528 return(-1);
5529 }
5530 while ((RAW == 0) && (ctxt->inputNr > 1))
5531 xmlPopInput(ctxt);
5532 if (!IS_BLANK_CH(CUR)) {
5533 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5534 "Space required after the element name\n");
5535 }
5536 SKIP_BLANKS;
5537 if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
5538 SKIP(5);
5539 /*
5540 * Element must always be empty.
5541 */
5542 ret = XML_ELEMENT_TYPE_EMPTY;
5543 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
5544 (NXT(2) == 'Y')) {
5545 SKIP(3);
5546 /*
5547 * Element is a generic container.
5548 */
5549 ret = XML_ELEMENT_TYPE_ANY;
5550 } else if (RAW == '(') {
5551 ret = xmlParseElementContentDecl(ctxt, name, &content);
5552 } else {
5553 /*
5554 * [ WFC: PEs in Internal Subset ] error handling.
5555 */
5556 if ((RAW == '%') && (ctxt->external == 0) &&
5557 (ctxt->inputNr == 1)) {
5558 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
5559 "PEReference: forbidden within markup decl in internal subset\n");
5560 } else {
5561 xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
5562 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
5563 }
5564 return(-1);
5565 }
5566
5567 SKIP_BLANKS;
5568 /*
5569 * Pop-up of finished entities.
5570 */
5571 while ((RAW == 0) && (ctxt->inputNr > 1))
5572 xmlPopInput(ctxt);
5573 SKIP_BLANKS;
5574
5575 if (RAW != '>') {
5576 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
5577 if (content != NULL) {
5578 xmlFreeDocElementContent(ctxt->myDoc, content);
5579 }
5580 } else {
5581 if (input != ctxt->input) {
5582 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5583 "Element declaration doesn't start and stop in the same entity\n");
5584 }
5585
5586 NEXT;
5587 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5588 (ctxt->sax->elementDecl != NULL)) {
5589 if (content != NULL)
5590 content->parent = NULL;
5591 ctxt->sax->elementDecl(ctxt->userData, name, ret,
5592 content);
5593 if ((content != NULL) && (content->parent == NULL)) {
5594 /*
5595 * this is a trick: if xmlAddElementDecl is called,
5596 * instead of copying the full tree it is plugged directly
5597 * if called from the parser. Avoid duplicating the
5598 * interfaces or change the API/ABI
5599 */
5600 xmlFreeDocElementContent(ctxt->myDoc, content);
5601 }
5602 } else if (content != NULL) {
5603 xmlFreeDocElementContent(ctxt->myDoc, content);
5604 }
5605 }
5606 }
5607 return(ret);
5608}
5609
5610/**
5611 * xmlParseConditionalSections
5612 * @ctxt: an XML parser context
5613 *
5614 * [61] conditionalSect ::= includeSect | ignoreSect
5615 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
5616 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
5617 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
5618 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
5619 */
5620
5621static void
5622xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
5623 SKIP(3);
5624 SKIP_BLANKS;
5625 if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
5626 SKIP(7);
5627 SKIP_BLANKS;
5628 if (RAW != '[') {
5629 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
5630 } else {
5631 NEXT;
5632 }
5633 if (xmlParserDebugEntities) {
5634 if ((ctxt->input != NULL) && (ctxt->input->filename))
5635 xmlGenericError(xmlGenericErrorContext,
5636 "%s(%d): ", ctxt->input->filename,
5637 ctxt->input->line);
5638 xmlGenericError(xmlGenericErrorContext,
5639 "Entering INCLUDE Conditional Section\n");
5640 }
5641
5642 while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
5643 (NXT(2) != '>'))) {
5644 const xmlChar *check = CUR_PTR;
5645 unsigned int cons = ctxt->input->consumed;
5646
5647 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5648 xmlParseConditionalSections(ctxt);
5649 } else if (IS_BLANK_CH(CUR)) {
5650 NEXT;
5651 } else if (RAW == '%') {
5652 xmlParsePEReference(ctxt);
5653 } else
5654 xmlParseMarkupDecl(ctxt);
5655
5656 /*
5657 * Pop-up of finished entities.
5658 */
5659 while ((RAW == 0) && (ctxt->inputNr > 1))
5660 xmlPopInput(ctxt);
5661
5662 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
5663 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
5664 break;
5665 }
5666 }
5667 if (xmlParserDebugEntities) {
5668 if ((ctxt->input != NULL) && (ctxt->input->filename))
5669 xmlGenericError(xmlGenericErrorContext,
5670 "%s(%d): ", ctxt->input->filename,
5671 ctxt->input->line);
5672 xmlGenericError(xmlGenericErrorContext,
5673 "Leaving INCLUDE Conditional Section\n");
5674 }
5675
5676 } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
5677 int state;
5678 xmlParserInputState instate;
5679 int depth = 0;
5680
5681 SKIP(6);
5682 SKIP_BLANKS;
5683 if (RAW != '[') {
5684 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
5685 } else {
5686 NEXT;
5687 }
5688 if (xmlParserDebugEntities) {
5689 if ((ctxt->input != NULL) && (ctxt->input->filename))
5690 xmlGenericError(xmlGenericErrorContext,
5691 "%s(%d): ", ctxt->input->filename,
5692 ctxt->input->line);
5693 xmlGenericError(xmlGenericErrorContext,
5694 "Entering IGNORE Conditional Section\n");
5695 }
5696
5697 /*
5698 * Parse up to the end of the conditional section
5699 * But disable SAX event generating DTD building in the meantime
5700 */
5701 state = ctxt->disableSAX;
5702 instate = ctxt->instate;
5703 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
5704 ctxt->instate = XML_PARSER_IGNORE;
5705
5706 while ((depth >= 0) && (RAW != 0)) {
5707 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5708 depth++;
5709 SKIP(3);
5710 continue;
5711 }
5712 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
5713 if (--depth >= 0) SKIP(3);
5714 continue;
5715 }
5716 NEXT;
5717 continue;
5718 }
5719
5720 ctxt->disableSAX = state;
5721 ctxt->instate = instate;
5722
5723 if (xmlParserDebugEntities) {
5724 if ((ctxt->input != NULL) && (ctxt->input->filename))
5725 xmlGenericError(xmlGenericErrorContext,
5726 "%s(%d): ", ctxt->input->filename,
5727 ctxt->input->line);
5728 xmlGenericError(xmlGenericErrorContext,
5729 "Leaving IGNORE Conditional Section\n");
5730 }
5731
5732 } else {
5733 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
5734 }
5735
5736 if (RAW == 0)
5737 SHRINK;
5738
5739 if (RAW == 0) {
5740 xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
5741 } else {
5742 SKIP(3);
5743 }
5744}
5745
5746/**
5747 * xmlParseMarkupDecl:
5748 * @ctxt: an XML parser context
5749 *
5750 * parse Markup declarations
5751 *
5752 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
5753 * NotationDecl | PI | Comment
5754 *
5755 * [ VC: Proper Declaration/PE Nesting ]
5756 * Parameter-entity replacement text must be properly nested with
5757 * markup declarations. That is to say, if either the first character
5758 * or the last character of a markup declaration (markupdecl above) is
5759 * contained in the replacement text for a parameter-entity reference,
5760 * both must be contained in the same replacement text.
5761 *
5762 * [ WFC: PEs in Internal Subset ]
5763 * In the internal DTD subset, parameter-entity references can occur
5764 * only where markup declarations can occur, not within markup declarations.
5765 * (This does not apply to references that occur in external parameter
5766 * entities or to the external subset.)
5767 */
5768void
5769xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
5770 GROW;
5771 if (CUR == '<') {
5772 if (NXT(1) == '!') {
5773 switch (NXT(2)) {
5774 case 'E':
5775 if (NXT(3) == 'L')
5776 xmlParseElementDecl(ctxt);
5777 else if (NXT(3) == 'N')
5778 xmlParseEntityDecl(ctxt);
5779 break;
5780 case 'A':
5781 xmlParseAttributeListDecl(ctxt);
5782 break;
5783 case 'N':
5784 xmlParseNotationDecl(ctxt);
5785 break;
5786 case '-':
5787 xmlParseComment(ctxt);
5788 break;
5789 default:
5790 /* there is an error but it will be detected later */
5791 break;
5792 }
5793 } else if (NXT(1) == '?') {
5794 xmlParsePI(ctxt);
5795 }
5796 }
5797 /*
5798 * This is only for internal subset. On external entities,
5799 * the replacement is done before parsing stage
5800 */
5801 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
5802 xmlParsePEReference(ctxt);
5803
5804 /*
5805 * Conditional sections are allowed from entities included
5806 * by PE References in the internal subset.
5807 */
5808 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
5809 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5810 xmlParseConditionalSections(ctxt);
5811 }
5812 }
5813
5814 ctxt->instate = XML_PARSER_DTD;
5815}
5816
5817/**
5818 * xmlParseTextDecl:
5819 * @ctxt: an XML parser context
5820 *
5821 * parse an XML declaration header for external entities
5822 *
5823 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
5824 *
5825 * Question: Seems that EncodingDecl is mandatory ? Is that a typo ?
5826 */
5827
5828void
5829xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
5830 xmlChar *version;
5831 const xmlChar *encoding;
5832
5833 /*
5834 * We know that '<?xml' is here.
5835 */
5836 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
5837 SKIP(5);
5838 } else {
5839 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
5840 return;
5841 }
5842
5843 if (!IS_BLANK_CH(CUR)) {
5844 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5845 "Space needed after '<?xml'\n");
5846 }
5847 SKIP_BLANKS;
5848
5849 /*
5850 * We may have the VersionInfo here.
5851 */
5852 version = xmlParseVersionInfo(ctxt);
5853 if (version == NULL)
5854 version = xmlCharStrdup(XML_DEFAULT_VERSION);
5855 else {
5856 if (!IS_BLANK_CH(CUR)) {
5857 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5858 "Space needed here\n");
5859 }
5860 }
5861 ctxt->input->version = version;
5862
5863 /*
5864 * We must have the encoding declaration
5865 */
5866 encoding = xmlParseEncodingDecl(ctxt);
5867 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5868 /*
5869 * The XML REC instructs us to stop parsing right here
5870 */
5871 return;
5872 }
5873 if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
5874 xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
5875 "Missing encoding in text declaration\n");
5876 }
5877
5878 SKIP_BLANKS;
5879 if ((RAW == '?') && (NXT(1) == '>')) {
5880 SKIP(2);
5881 } else if (RAW == '>') {
5882 /* Deprecated old WD ... */
5883 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
5884 NEXT;
5885 } else {
5886 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
5887 MOVETO_ENDTAG(CUR_PTR);
5888 NEXT;
5889 }
5890}
5891
5892/**
5893 * xmlParseExternalSubset:
5894 * @ctxt: an XML parser context
5895 * @ExternalID: the external identifier
5896 * @SystemID: the system identifier (or URL)
5897 *
5898 * parse Markup declarations from an external subset
5899 *
5900 * [30] extSubset ::= textDecl? extSubsetDecl
5901 *
5902 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
5903 */
5904void
5905xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
5906 const xmlChar *SystemID) {
5907 xmlDetectSAX2(ctxt);
5908 GROW;
5909 if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
5910 xmlParseTextDecl(ctxt);
5911 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5912 /*
5913 * The XML REC instructs us to stop parsing right here
5914 */
5915 ctxt->instate = XML_PARSER_EOF;
5916 return;
5917 }
5918 }
5919 if (ctxt->myDoc == NULL) {
5920 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
5921 }
5922 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
5923 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
5924
5925 ctxt->instate = XML_PARSER_DTD;
5926 ctxt->external = 1;
5927 while (((RAW == '<') && (NXT(1) == '?')) ||
5928 ((RAW == '<') && (NXT(1) == '!')) ||
5929 (RAW == '%') || IS_BLANK_CH(CUR)) {
5930 const xmlChar *check = CUR_PTR;
5931 unsigned int cons = ctxt->input->consumed;
5932
5933 GROW;
5934 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5935 xmlParseConditionalSections(ctxt);
5936 } else if (IS_BLANK_CH(CUR)) {
5937 NEXT;
5938 } else if (RAW == '%') {
5939 xmlParsePEReference(ctxt);
5940 } else
5941 xmlParseMarkupDecl(ctxt);
5942
5943 /*
5944 * Pop-up of finished entities.
5945 */
5946 while ((RAW == 0) && (ctxt->inputNr > 1))
5947 xmlPopInput(ctxt);
5948
5949 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
5950 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
5951 break;
5952 }
5953 }
5954
5955 if (RAW != 0) {
5956 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
5957 }
5958
5959}
5960
5961/**
5962 * xmlParseReference:
5963 * @ctxt: an XML parser context
5964 *
5965 * parse and handle entity references in content, depending on the SAX
5966 * interface, this may end-up in a call to character() if this is a
5967 * CharRef, a predefined entity, if there is no reference() callback.
5968 * or if the parser was asked to switch to that mode.
5969 *
5970 * [67] Reference ::= EntityRef | CharRef
5971 */
5972void
5973xmlParseReference(xmlParserCtxtPtr ctxt) {
5974 xmlEntityPtr ent;
5975 xmlChar *val;
5976 if (RAW != '&') return;
5977
5978 if (NXT(1) == '#') {
5979 int i = 0;
5980 xmlChar out[10];
5981 int hex = NXT(2);
5982 int value = xmlParseCharRef(ctxt);
5983
5984 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
5985 /*
5986 * So we are using non-UTF-8 buffers
5987 * Check that the char fit on 8bits, if not
5988 * generate a CharRef.
5989 */
5990 if (value <= 0xFF) {
5991 out[0] = value;
5992 out[1] = 0;
5993 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5994 (!ctxt->disableSAX))
5995 ctxt->sax->characters(ctxt->userData, out, 1);
5996 } else {
5997 if ((hex == 'x') || (hex == 'X'))
5998 snprintf((char *)out, sizeof(out), "#x%X", value);
5999 else
6000 snprintf((char *)out, sizeof(out), "#%d", value);
6001 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
6002 (!ctxt->disableSAX))
6003 ctxt->sax->reference(ctxt->userData, out);
6004 }
6005 } else {
6006 /*
6007 * Just encode the value in UTF-8
6008 */
6009 COPY_BUF(0 ,out, i, value);
6010 out[i] = 0;
6011 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6012 (!ctxt->disableSAX))
6013 ctxt->sax->characters(ctxt->userData, out, i);
6014 }
6015 } else {
6016 int was_checked;
6017
6018 ent = xmlParseEntityRef(ctxt);
6019 if (ent == NULL) return;
6020 if (!ctxt->wellFormed)
6021 return;
6022 ctxt->nbentities++;
6023 if (ctxt->nbentities >= 500000) {
6024 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
6025 return;
6026 }
6027 was_checked = ent->checked;
6028 if ((ent->name != NULL) &&
6029 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
6030 xmlNodePtr list = NULL;
6031 xmlParserErrors ret = XML_ERR_OK;
6032
6033
6034 /*
6035 * The first reference to the entity trigger a parsing phase
6036 * where the ent->children is filled with the result from
6037 * the parsing.
6038 */
6039 if (ent->checked == 0) {
6040 xmlChar *value;
6041
6042 value = ent->content;
6043
6044 /*
6045 * Check that this entity is well formed
6046 */
6047 if ((value != NULL) && (value[0] != 0) &&
6048 (value[1] == 0) && (value[0] == '<') &&
6049 (xmlStrEqual(ent->name, BAD_CAST "lt"))) {
6050 /*
6051 * DONE: get definite answer on this !!!
6052 * Lots of entity decls are used to declare a single
6053 * char
6054 * <!ENTITY lt "<">
6055 * Which seems to be valid since
6056 * 2.4: The ampersand character (&) and the left angle
6057 * bracket (<) may appear in their literal form only
6058 * when used ... They are also legal within the literal
6059 * entity value of an internal entity declaration;i
6060 * see "4.3.2 Well-Formed Parsed Entities".
6061 * IMHO 2.4 and 4.3.2 are directly in contradiction.
6062 * Looking at the OASIS test suite and James Clark
6063 * tests, this is broken. However the XML REC uses
6064 * it. Is the XML REC not well-formed ????
6065 * This is a hack to avoid this problem
6066 *
6067 * ANSWER: since lt gt amp .. are already defined,
6068 * this is a redefinition and hence the fact that the
6069 * content is not well balanced is not a Wf error, this
6070 * is lousy but acceptable.
6071 */
6072 list = xmlNewDocText(ctxt->myDoc, value);
6073 if (list != NULL) {
6074 if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
6075 (ent->children == NULL)) {
6076 ent->children = list;
6077 ent->last = list;
6078 ent->owner = 1;
6079 list->parent = (xmlNodePtr) ent;
6080 } else {
6081 xmlFreeNodeList(list);
6082 }
6083 } else if (list != NULL) {
6084 xmlFreeNodeList(list);
6085 }
6086 } else {
6087 unsigned long oldnbent = ctxt->nbentities;
6088 /*
6089 * 4.3.2: An internal general parsed entity is well-formed
6090 * if its replacement text matches the production labeled
6091 * content.
6092 */
6093
6094 void *user_data;
6095 /*
6096 * This is a bit hackish but this seems the best
6097 * way to make sure both SAX and DOM entity support
6098 * behaves okay.
6099 */
6100 if (ctxt->userData == ctxt)
6101 user_data = NULL;
6102 else
6103 user_data = ctxt->userData;
6104
6105 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
6106 ctxt->depth++;
6107 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
6108 value, user_data, &list);
6109 ctxt->depth--;
6110 } else if (ent->etype ==
6111 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
6112 ctxt->depth++;
6113 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
6114 ctxt->sax, user_data, ctxt->depth,
6115 ent->URI, ent->ExternalID, &list);
6116 ctxt->depth--;
6117 } else {
6118 ret = XML_ERR_ENTITY_PE_INTERNAL;
6119 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
6120 "invalid entity type found\n", NULL);
6121 }
6122 ent->checked = ctxt->nbentities - oldnbent;
6123 if (ret == XML_ERR_ENTITY_LOOP) {
6124 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
6125 return;
6126 } else if ((ret == XML_ERR_OK) && (list != NULL)) {
6127 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
6128 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
6129 (ent->children == NULL)) {
6130 ent->children = list;
6131 if (ctxt->replaceEntities) {
6132 /*
6133 * Prune it directly in the generated document
6134 * except for single text nodes.
6135 */
6136 if (((list->type == XML_TEXT_NODE) &&
6137 (list->next == NULL)) ||
6138 (ctxt->parseMode == XML_PARSE_READER)) {
6139 list->parent = (xmlNodePtr) ent;
6140 list = NULL;
6141 ent->owner = 1;
6142 } else {
6143 ent->owner = 0;
6144 while (list != NULL) {
6145 list->parent = (xmlNodePtr) ctxt->node;
6146 list->doc = ctxt->myDoc;
6147 if (list->next == NULL)
6148 ent->last = list;
6149 list = list->next;
6150 }
6151 list = ent->children;
6152#ifdef LIBXML_LEGACY_ENABLED
6153 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
6154 xmlAddEntityReference(ent, list, NULL);
6155#endif /* LIBXML_LEGACY_ENABLED */
6156 }
6157 } else {
6158 ent->owner = 1;
6159 while (list != NULL) {
6160 list->parent = (xmlNodePtr) ent;
6161 if (list->next == NULL)
6162 ent->last = list;
6163 list = list->next;
6164 }
6165 }
6166 } else {
6167 xmlFreeNodeList(list);
6168 list = NULL;
6169 }
6170 } else if ((ret != XML_ERR_OK) &&
6171 (ret != XML_WAR_UNDECLARED_ENTITY)) {
6172 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
6173 "Entity '%s' failed to parse\n", ent->name);
6174 } else if (list != NULL) {
6175 xmlFreeNodeList(list);
6176 list = NULL;
6177 }
6178 }
6179 if (ent->checked == 0)
6180 ent->checked = 1;
6181 }
6182 ctxt->nbentities += ent->checked;
6183
6184 if (ent->children == NULL) {
6185 /*
6186 * Probably running in SAX mode and the callbacks don't
6187 * build the entity content. So unless we already went
6188 * though parsing for first checking go though the entity
6189 * content to generate callbacks associated to the entity
6190 */
6191 if (was_checked != 0) {
6192 void *user_data;
6193 /*
6194 * This is a bit hackish but this seems the best
6195 * way to make sure both SAX and DOM entity support
6196 * behaves okay.
6197 */
6198 if (ctxt->userData == ctxt)
6199 user_data = NULL;
6200 else
6201 user_data = ctxt->userData;
6202
6203 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
6204 ctxt->depth++;
6205 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
6206 ent->content, user_data, NULL);
6207 ctxt->depth--;
6208 } else if (ent->etype ==
6209 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
6210 ctxt->depth++;
6211 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
6212 ctxt->sax, user_data, ctxt->depth,
6213 ent->URI, ent->ExternalID, NULL);
6214 ctxt->depth--;
6215 } else {
6216 ret = XML_ERR_ENTITY_PE_INTERNAL;
6217 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
6218 "invalid entity type found\n", NULL);
6219 }
6220 if (ret == XML_ERR_ENTITY_LOOP) {
6221 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
6222 return;
6223 }
6224 }
6225 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
6226 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
6227 /*
6228 * Entity reference callback comes second, it's somewhat
6229 * superfluous but a compatibility to historical behaviour
6230 */
6231 ctxt->sax->reference(ctxt->userData, ent->name);
6232 }
6233 return;
6234 }
6235 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
6236 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
6237 /*
6238 * Create a node.
6239 */
6240 ctxt->sax->reference(ctxt->userData, ent->name);
6241 return;
6242 }
6243 if ((ctxt->replaceEntities) || (ent->children == NULL)) {
6244 /*
6245 * There is a problem on the handling of _private for entities
6246 * (bug 155816): Should we copy the content of the field from
6247 * the entity (possibly overwriting some value set by the user
6248 * when a copy is created), should we leave it alone, or should
6249 * we try to take care of different situations? The problem
6250 * is exacerbated by the usage of this field by the xmlReader.
6251 * To fix this bug, we look at _private on the created node
6252 * and, if it's NULL, we copy in whatever was in the entity.
6253 * If it's not NULL we leave it alone. This is somewhat of a
6254 * hack - maybe we should have further tests to determine
6255 * what to do.
6256 */
6257 if ((ctxt->node != NULL) && (ent->children != NULL)) {
6258 /*
6259 * Seems we are generating the DOM content, do
6260 * a simple tree copy for all references except the first
6261 * In the first occurrence list contains the replacement.
6262 * progressive == 2 means we are operating on the Reader
6263 * and since nodes are discarded we must copy all the time.
6264 */
6265 if (((list == NULL) && (ent->owner == 0)) ||
6266 (ctxt->parseMode == XML_PARSE_READER)) {
6267 xmlNodePtr nw = NULL, cur, firstChild = NULL;
6268
6269 /*
6270 * when operating on a reader, the entities definitions
6271 * are always owning the entities subtree.
6272 if (ctxt->parseMode == XML_PARSE_READER)
6273 ent->owner = 1;
6274 */
6275
6276 cur = ent->children;
6277 while (cur != NULL) {
6278 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
6279 if (nw != NULL) {
6280 if (nw->_private == NULL)
6281 nw->_private = cur->_private;
6282 if (firstChild == NULL){
6283 firstChild = nw;
6284 }
6285 nw = xmlAddChild(ctxt->node, nw);
6286 }
6287 if (cur == ent->last) {
6288 /*
6289 * needed to detect some strange empty
6290 * node cases in the reader tests
6291 */
6292 if ((ctxt->parseMode == XML_PARSE_READER) &&
6293 (nw != NULL) &&
6294 (nw->type == XML_ELEMENT_NODE) &&
6295 (nw->children == NULL))
6296 nw->extra = 1;
6297
6298 break;
6299 }
6300 cur = cur->next;
6301 }
6302#ifdef LIBXML_LEGACY_ENABLED
6303 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
6304 xmlAddEntityReference(ent, firstChild, nw);
6305#endif /* LIBXML_LEGACY_ENABLED */
6306 } else if (list == NULL) {
6307 xmlNodePtr nw = NULL, cur, next, last,
6308 firstChild = NULL;
6309 /*
6310 * Copy the entity child list and make it the new
6311 * entity child list. The goal is to make sure any
6312 * ID or REF referenced will be the one from the
6313 * document content and not the entity copy.
6314 */
6315 cur = ent->children;
6316 ent->children = NULL;
6317 last = ent->last;
6318 ent->last = NULL;
6319 while (cur != NULL) {
6320 next = cur->next;
6321 cur->next = NULL;
6322 cur->parent = NULL;
6323 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
6324 if (nw != NULL) {
6325 if (nw->_private == NULL)
6326 nw->_private = cur->_private;
6327 if (firstChild == NULL){
6328 firstChild = cur;
6329 }
6330 xmlAddChild((xmlNodePtr) ent, nw);
6331 xmlAddChild(ctxt->node, cur);
6332 }
6333 if (cur == last)
6334 break;
6335 cur = next;
6336 }
6337 ent->owner = 1;
6338#ifdef LIBXML_LEGACY_ENABLED
6339 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
6340 xmlAddEntityReference(ent, firstChild, nw);
6341#endif /* LIBXML_LEGACY_ENABLED */
6342 } else {
6343 const xmlChar *nbktext;
6344
6345 /*
6346 * the name change is to avoid coalescing of the
6347 * node with a possible previous text one which
6348 * would make ent->children a dangling pointer
6349 */
6350 nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
6351 -1);
6352 if (ent->children->type == XML_TEXT_NODE)
6353 ent->children->name = nbktext;
6354 if ((ent->last != ent->children) &&
6355 (ent->last->type == XML_TEXT_NODE))
6356 ent->last->name = nbktext;
6357 xmlAddChildList(ctxt->node, ent->children);
6358 }
6359
6360 /*
6361 * This is to avoid a nasty side effect, see
6362 * characters() in SAX.c
6363 */
6364 ctxt->nodemem = 0;
6365 ctxt->nodelen = 0;
6366 return;
6367 }
6368 }
6369 } else {
6370 val = ent->content;
6371 if (val == NULL) return;
6372 /*
6373 * inline the entity.
6374 */
6375 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6376 (!ctxt->disableSAX))
6377 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
6378 }
6379 }
6380}
6381
6382/**
6383 * xmlParseEntityRef:
6384 * @ctxt: an XML parser context
6385 *
6386 * parse ENTITY references declarations
6387 *
6388 * [68] EntityRef ::= '&' Name ';'
6389 *
6390 * [ WFC: Entity Declared ]
6391 * In a document without any DTD, a document with only an internal DTD
6392 * subset which contains no parameter entity references, or a document
6393 * with "standalone='yes'", the Name given in the entity reference
6394 * must match that in an entity declaration, except that well-formed
6395 * documents need not declare any of the following entities: amp, lt,
6396 * gt, apos, quot. The declaration of a parameter entity must precede
6397 * any reference to it. Similarly, the declaration of a general entity
6398 * must precede any reference to it which appears in a default value in an
6399 * attribute-list declaration. Note that if entities are declared in the
6400 * external subset or in external parameter entities, a non-validating
6401 * processor is not obligated to read and process their declarations;
6402 * for such documents, the rule that an entity must be declared is a
6403 * well-formedness constraint only if standalone='yes'.
6404 *
6405 * [ WFC: Parsed Entity ]
6406 * An entity reference must not contain the name of an unparsed entity
6407 *
6408 * Returns the xmlEntityPtr if found, or NULL otherwise.
6409 */
6410xmlEntityPtr
6411xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
6412 const xmlChar *name;
6413 xmlEntityPtr ent = NULL;
6414
6415 GROW;
6416
6417 if (RAW == '&') {
6418 NEXT;
6419 name = xmlParseName(ctxt);
6420 if (name == NULL) {
6421 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6422 "xmlParseEntityRef: no name\n");
6423 } else {
6424 if (RAW == ';') {
6425 NEXT;
6426 /*
6427 * Ask first SAX for entity resolution, otherwise try the
6428 * predefined set.
6429 */
6430 if (ctxt->sax != NULL) {
6431 if (ctxt->sax->getEntity != NULL)
6432 ent = ctxt->sax->getEntity(ctxt->userData, name);
6433 if ((ctxt->wellFormed == 1 ) && (ent == NULL))
6434 ent = xmlGetPredefinedEntity(name);
6435 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
6436 (ctxt->userData==ctxt)) {
6437 ent = xmlSAX2GetEntity(ctxt, name);
6438 }
6439 }
6440 /*
6441 * [ WFC: Entity Declared ]
6442 * In a document without any DTD, a document with only an
6443 * internal DTD subset which contains no parameter entity
6444 * references, or a document with "standalone='yes'", the
6445 * Name given in the entity reference must match that in an
6446 * entity declaration, except that well-formed documents
6447 * need not declare any of the following entities: amp, lt,
6448 * gt, apos, quot.
6449 * The declaration of a parameter entity must precede any
6450 * reference to it.
6451 * Similarly, the declaration of a general entity must
6452 * precede any reference to it which appears in a default
6453 * value in an attribute-list declaration. Note that if
6454 * entities are declared in the external subset or in
6455 * external parameter entities, a non-validating processor
6456 * is not obligated to read and process their declarations;
6457 * for such documents, the rule that an entity must be
6458 * declared is a well-formedness constraint only if
6459 * standalone='yes'.
6460 */
6461 if (ent == NULL) {
6462 if ((ctxt->standalone == 1) ||
6463 ((ctxt->hasExternalSubset == 0) &&
6464 (ctxt->hasPErefs == 0))) {
6465 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
6466 "Entity '%s' not defined\n", name);
6467 } else {
6468 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
6469 "Entity '%s' not defined\n", name);
6470 if ((ctxt->inSubset == 0) &&
6471 (ctxt->sax != NULL) &&
6472 (ctxt->sax->reference != NULL)) {
6473 ctxt->sax->reference(ctxt->userData, name);
6474 }
6475 }
6476 ctxt->valid = 0;
6477 }
6478
6479 /*
6480 * [ WFC: Parsed Entity ]
6481 * An entity reference must not contain the name of an
6482 * unparsed entity
6483 */
6484 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
6485 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
6486 "Entity reference to unparsed entity %s\n", name);
6487 }
6488
6489 /*
6490 * [ WFC: No External Entity References ]
6491 * Attribute values cannot contain direct or indirect
6492 * entity references to external entities.
6493 */
6494 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6495 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
6496 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
6497 "Attribute references external entity '%s'\n", name);
6498 }
6499 /*
6500 * [ WFC: No < in Attribute Values ]
6501 * The replacement text of any entity referred to directly or
6502 * indirectly in an attribute value (other than "&lt;") must
6503 * not contain a <.
6504 */
6505 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6506 (ent != NULL) &&
6507 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
6508 (ent->content != NULL) &&
6509 (xmlStrchr(ent->content, '<'))) {
6510 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
6511 "'<' in entity '%s' is not allowed in attributes values\n", name);
6512 }
6513
6514 /*
6515 * Internal check, no parameter entities here ...
6516 */
6517 else {
6518 switch (ent->etype) {
6519 case XML_INTERNAL_PARAMETER_ENTITY:
6520 case XML_EXTERNAL_PARAMETER_ENTITY:
6521 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
6522 "Attempt to reference the parameter entity '%s'\n",
6523 name);
6524 break;
6525 default:
6526 break;
6527 }
6528 }
6529
6530 /*
6531 * [ WFC: No Recursion ]
6532 * A parsed entity must not contain a recursive reference
6533 * to itself, either directly or indirectly.
6534 * Done somewhere else
6535 */
6536
6537 } else {
6538 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
6539 }
6540 }
6541 }
6542 return(ent);
6543}
6544
6545/**
6546 * xmlParseStringEntityRef:
6547 * @ctxt: an XML parser context
6548 * @str: a pointer to an index in the string
6549 *
6550 * parse ENTITY references declarations, but this version parses it from
6551 * a string value.
6552 *
6553 * [68] EntityRef ::= '&' Name ';'
6554 *
6555 * [ WFC: Entity Declared ]
6556 * In a document without any DTD, a document with only an internal DTD
6557 * subset which contains no parameter entity references, or a document
6558 * with "standalone='yes'", the Name given in the entity reference
6559 * must match that in an entity declaration, except that well-formed
6560 * documents need not declare any of the following entities: amp, lt,
6561 * gt, apos, quot. The declaration of a parameter entity must precede
6562 * any reference to it. Similarly, the declaration of a general entity
6563 * must precede any reference to it which appears in a default value in an
6564 * attribute-list declaration. Note that if entities are declared in the
6565 * external subset or in external parameter entities, a non-validating
6566 * processor is not obligated to read and process their declarations;
6567 * for such documents, the rule that an entity must be declared is a
6568 * well-formedness constraint only if standalone='yes'.
6569 *
6570 * [ WFC: Parsed Entity ]
6571 * An entity reference must not contain the name of an unparsed entity
6572 *
6573 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
6574 * is updated to the current location in the string.
6575 */
6576xmlEntityPtr
6577xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
6578 xmlChar *name;
6579 const xmlChar *ptr;
6580 xmlChar cur;
6581 xmlEntityPtr ent = NULL;
6582
6583 if ((str == NULL) || (*str == NULL))
6584 return(NULL);
6585 ptr = *str;
6586 cur = *ptr;
6587 if (cur == '&') {
6588 ptr++;
6589 cur = *ptr;
6590 name = xmlParseStringName(ctxt, &ptr);
6591 if (name == NULL) {
6592 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6593 "xmlParseStringEntityRef: no name\n");
6594 } else {
6595 if (*ptr == ';') {
6596 ptr++;
6597 /*
6598 * Ask first SAX for entity resolution, otherwise try the
6599 * predefined set.
6600 */
6601 if (ctxt->sax != NULL) {
6602 if (ctxt->sax->getEntity != NULL)
6603 ent = ctxt->sax->getEntity(ctxt->userData, name);
6604 if (ent == NULL)
6605 ent = xmlGetPredefinedEntity(name);
6606 if ((ent == NULL) && (ctxt->userData==ctxt)) {
6607 ent = xmlSAX2GetEntity(ctxt, name);
6608 }
6609 }
6610 /*
6611 * [ WFC: Entity Declared ]
6612 * In a document without any DTD, a document with only an
6613 * internal DTD subset which contains no parameter entity
6614 * references, or a document with "standalone='yes'", the
6615 * Name given in the entity reference must match that in an
6616 * entity declaration, except that well-formed documents
6617 * need not declare any of the following entities: amp, lt,
6618 * gt, apos, quot.
6619 * The declaration of a parameter entity must precede any
6620 * reference to it.
6621 * Similarly, the declaration of a general entity must
6622 * precede any reference to it which appears in a default
6623 * value in an attribute-list declaration. Note that if
6624 * entities are declared in the external subset or in
6625 * external parameter entities, a non-validating processor
6626 * is not obligated to read and process their declarations;
6627 * for such documents, the rule that an entity must be
6628 * declared is a well-formedness constraint only if
6629 * standalone='yes'.
6630 */
6631 if (ent == NULL) {
6632 if ((ctxt->standalone == 1) ||
6633 ((ctxt->hasExternalSubset == 0) &&
6634 (ctxt->hasPErefs == 0))) {
6635 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
6636 "Entity '%s' not defined\n", name);
6637 } else {
6638 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
6639 "Entity '%s' not defined\n",
6640 name);
6641 }
6642 /* TODO ? check regressions ctxt->valid = 0; */
6643 }
6644
6645 /*
6646 * [ WFC: Parsed Entity ]
6647 * An entity reference must not contain the name of an
6648 * unparsed entity
6649 */
6650 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
6651 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
6652 "Entity reference to unparsed entity %s\n", name);
6653 }
6654
6655 /*
6656 * [ WFC: No External Entity References ]
6657 * Attribute values cannot contain direct or indirect
6658 * entity references to external entities.
6659 */
6660 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6661 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
6662 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
6663 "Attribute references external entity '%s'\n", name);
6664 }
6665 /*
6666 * [ WFC: No < in Attribute Values ]
6667 * The replacement text of any entity referred to directly or
6668 * indirectly in an attribute value (other than "&lt;") must
6669 * not contain a <.
6670 */
6671 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6672 (ent != NULL) &&
6673 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
6674 (ent->content != NULL) &&
6675 (xmlStrchr(ent->content, '<'))) {
6676 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
6677 "'<' in entity '%s' is not allowed in attributes values\n",
6678 name);
6679 }
6680
6681 /*
6682 * Internal check, no parameter entities here ...
6683 */
6684 else {
6685 switch (ent->etype) {
6686 case XML_INTERNAL_PARAMETER_ENTITY:
6687 case XML_EXTERNAL_PARAMETER_ENTITY:
6688 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
6689 "Attempt to reference the parameter entity '%s'\n",
6690 name);
6691 break;
6692 default:
6693 break;
6694 }
6695 }
6696
6697 /*
6698 * [ WFC: No Recursion ]
6699 * A parsed entity must not contain a recursive reference
6700 * to itself, either directly or indirectly.
6701 * Done somewhere else
6702 */
6703
6704 } else {
6705 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
6706 }
6707 xmlFree(name);
6708 }
6709 }
6710 *str = ptr;
6711 return(ent);
6712}
6713
6714/**
6715 * xmlParsePEReference:
6716 * @ctxt: an XML parser context
6717 *
6718 * parse PEReference declarations
6719 * The entity content is handled directly by pushing it's content as
6720 * a new input stream.
6721 *
6722 * [69] PEReference ::= '%' Name ';'
6723 *
6724 * [ WFC: No Recursion ]
6725 * A parsed entity must not contain a recursive
6726 * reference to itself, either directly or indirectly.
6727 *
6728 * [ WFC: Entity Declared ]
6729 * In a document without any DTD, a document with only an internal DTD
6730 * subset which contains no parameter entity references, or a document
6731 * with "standalone='yes'", ... ... The declaration of a parameter
6732 * entity must precede any reference to it...
6733 *
6734 * [ VC: Entity Declared ]
6735 * In a document with an external subset or external parameter entities
6736 * with "standalone='no'", ... ... The declaration of a parameter entity
6737 * must precede any reference to it...
6738 *
6739 * [ WFC: In DTD ]
6740 * Parameter-entity references may only appear in the DTD.
6741 * NOTE: misleading but this is handled.
6742 */
6743void
6744xmlParsePEReference(xmlParserCtxtPtr ctxt)
6745{
6746 const xmlChar *name;
6747 xmlEntityPtr entity = NULL;
6748 xmlParserInputPtr input;
6749
6750 if (RAW == '%') {
6751 NEXT;
6752 name = xmlParseName(ctxt);
6753 if (name == NULL) {
6754 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6755 "xmlParsePEReference: no name\n");
6756 } else {
6757 if (RAW == ';') {
6758 NEXT;
6759 if ((ctxt->sax != NULL) &&
6760 (ctxt->sax->getParameterEntity != NULL))
6761 entity = ctxt->sax->getParameterEntity(ctxt->userData,
6762 name);
6763 if (entity == NULL) {
6764 /*
6765 * [ WFC: Entity Declared ]
6766 * In a document without any DTD, a document with only an
6767 * internal DTD subset which contains no parameter entity
6768 * references, or a document with "standalone='yes'", ...
6769 * ... The declaration of a parameter entity must precede
6770 * any reference to it...
6771 */
6772 if ((ctxt->standalone == 1) ||
6773 ((ctxt->hasExternalSubset == 0) &&
6774 (ctxt->hasPErefs == 0))) {
6775 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
6776 "PEReference: %%%s; not found\n",
6777 name);
6778 } else {
6779 /*
6780 * [ VC: Entity Declared ]
6781 * In a document with an external subset or external
6782 * parameter entities with "standalone='no'", ...
6783 * ... The declaration of a parameter entity must
6784 * precede any reference to it...
6785 */
6786 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6787 "PEReference: %%%s; not found\n",
6788 name, NULL);
6789 ctxt->valid = 0;
6790 }
6791 } else {
6792 /*
6793 * Internal checking in case the entity quest barfed
6794 */
6795 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6796 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
6797 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6798 "Internal: %%%s; is not a parameter entity\n",
6799 name, NULL);
6800 } else if (ctxt->input->free != deallocblankswrapper) {
6801 input =
6802 xmlNewBlanksWrapperInputStream(ctxt, entity);
6803 xmlPushInput(ctxt, input);
6804 } else {
6805 /*
6806 * TODO !!!
6807 * handle the extra spaces added before and after
6808 * c.f. http://www.w3.org/TR/REC-xml#as-PE
6809 */
6810 input = xmlNewEntityInputStream(ctxt, entity);
6811 xmlPushInput(ctxt, input);
6812 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
6813 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
6814 (IS_BLANK_CH(NXT(5)))) {
6815 xmlParseTextDecl(ctxt);
6816 if (ctxt->errNo ==
6817 XML_ERR_UNSUPPORTED_ENCODING) {
6818 /*
6819 * The XML REC instructs us to stop parsing
6820 * right here
6821 */
6822 ctxt->instate = XML_PARSER_EOF;
6823 return;
6824 }
6825 }
6826 }
6827 }
6828 ctxt->hasPErefs = 1;
6829 } else {
6830 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
6831 }
6832 }
6833 }
6834}
6835
6836/**
6837 * xmlParseStringPEReference:
6838 * @ctxt: an XML parser context
6839 * @str: a pointer to an index in the string
6840 *
6841 * parse PEReference declarations
6842 *
6843 * [69] PEReference ::= '%' Name ';'
6844 *
6845 * [ WFC: No Recursion ]
6846 * A parsed entity must not contain a recursive
6847 * reference to itself, either directly or indirectly.
6848 *
6849 * [ WFC: Entity Declared ]
6850 * In a document without any DTD, a document with only an internal DTD
6851 * subset which contains no parameter entity references, or a document
6852 * with "standalone='yes'", ... ... The declaration of a parameter
6853 * entity must precede any reference to it...
6854 *
6855 * [ VC: Entity Declared ]
6856 * In a document with an external subset or external parameter entities
6857 * with "standalone='no'", ... ... The declaration of a parameter entity
6858 * must precede any reference to it...
6859 *
6860 * [ WFC: In DTD ]
6861 * Parameter-entity references may only appear in the DTD.
6862 * NOTE: misleading but this is handled.
6863 *
6864 * Returns the string of the entity content.
6865 * str is updated to the current value of the index
6866 */
6867xmlEntityPtr
6868xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
6869 const xmlChar *ptr;
6870 xmlChar cur;
6871 xmlChar *name;
6872 xmlEntityPtr entity = NULL;
6873
6874 if ((str == NULL) || (*str == NULL)) return(NULL);
6875 ptr = *str;
6876 cur = *ptr;
6877 if (cur == '%') {
6878 ptr++;
6879 cur = *ptr;
6880 name = xmlParseStringName(ctxt, &ptr);
6881 if (name == NULL) {
6882 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6883 "xmlParseStringPEReference: no name\n");
6884 } else {
6885 cur = *ptr;
6886 if (cur == ';') {
6887 ptr++;
6888 cur = *ptr;
6889 if ((ctxt->sax != NULL) &&
6890 (ctxt->sax->getParameterEntity != NULL))
6891 entity = ctxt->sax->getParameterEntity(ctxt->userData,
6892 name);
6893 if (entity == NULL) {
6894 /*
6895 * [ WFC: Entity Declared ]
6896 * In a document without any DTD, a document with only an
6897 * internal DTD subset which contains no parameter entity
6898 * references, or a document with "standalone='yes'", ...
6899 * ... The declaration of a parameter entity must precede
6900 * any reference to it...
6901 */
6902 if ((ctxt->standalone == 1) ||
6903 ((ctxt->hasExternalSubset == 0) &&
6904 (ctxt->hasPErefs == 0))) {
6905 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
6906 "PEReference: %%%s; not found\n", name);
6907 } else {
6908 /*
6909 * [ VC: Entity Declared ]
6910 * In a document with an external subset or external
6911 * parameter entities with "standalone='no'", ...
6912 * ... The declaration of a parameter entity must
6913 * precede any reference to it...
6914 */
6915 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6916 "PEReference: %%%s; not found\n",
6917 name, NULL);
6918 ctxt->valid = 0;
6919 }
6920 } else {
6921 /*
6922 * Internal checking in case the entity quest barfed
6923 */
6924 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6925 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
6926 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6927 "%%%s; is not a parameter entity\n",
6928 name, NULL);
6929 }
6930 }
6931 ctxt->hasPErefs = 1;
6932 } else {
6933 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
6934 }
6935 xmlFree(name);
6936 }
6937 }
6938 *str = ptr;
6939 return(entity);
6940}
6941
6942/**
6943 * xmlParseDocTypeDecl:
6944 * @ctxt: an XML parser context
6945 *
6946 * parse a DOCTYPE declaration
6947 *
6948 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
6949 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6950 *
6951 * [ VC: Root Element Type ]
6952 * The Name in the document type declaration must match the element
6953 * type of the root element.
6954 */
6955
6956void
6957xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
6958 const xmlChar *name = NULL;
6959 xmlChar *ExternalID = NULL;
6960 xmlChar *URI = NULL;
6961
6962 /*
6963 * We know that '<!DOCTYPE' has been detected.
6964 */
6965 SKIP(9);
6966
6967 SKIP_BLANKS;
6968
6969 /*
6970 * Parse the DOCTYPE name.
6971 */
6972 name = xmlParseName(ctxt);
6973 if (name == NULL) {
6974 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6975 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
6976 }
6977 ctxt->intSubName = name;
6978
6979 SKIP_BLANKS;
6980
6981 /*
6982 * Check for SystemID and ExternalID
6983 */
6984 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
6985
6986 if ((URI != NULL) || (ExternalID != NULL)) {
6987 ctxt->hasExternalSubset = 1;
6988 }
6989 ctxt->extSubURI = URI;
6990 ctxt->extSubSystem = ExternalID;
6991
6992 SKIP_BLANKS;
6993
6994 /*
6995 * Create and update the internal subset.
6996 */
6997 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
6998 (!ctxt->disableSAX))
6999 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
7000
7001 /*
7002 * Is there any internal subset declarations ?
7003 * they are handled separately in xmlParseInternalSubset()
7004 */
7005 if (RAW == '[')
7006 return;
7007
7008 /*
7009 * We should be at the end of the DOCTYPE declaration.
7010 */
7011 if (RAW != '>') {
7012 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
7013 }
7014 NEXT;
7015}
7016
7017/**
7018 * xmlParseInternalSubset:
7019 * @ctxt: an XML parser context
7020 *
7021 * parse the internal subset declaration
7022 *
7023 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
7024 */
7025
7026static void
7027xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
7028 /*
7029 * Is there any DTD definition ?
7030 */
7031 if (RAW == '[') {
7032 ctxt->instate = XML_PARSER_DTD;
7033 NEXT;
7034 /*
7035 * Parse the succession of Markup declarations and
7036 * PEReferences.
7037 * Subsequence (markupdecl | PEReference | S)*
7038 */
7039 while (RAW != ']') {
7040 const xmlChar *check = CUR_PTR;
7041 unsigned int cons = ctxt->input->consumed;
7042
7043 SKIP_BLANKS;
7044 xmlParseMarkupDecl(ctxt);
7045 xmlParsePEReference(ctxt);
7046
7047 /*
7048 * Pop-up of finished entities.
7049 */
7050 while ((RAW == 0) && (ctxt->inputNr > 1))
7051 xmlPopInput(ctxt);
7052
7053 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
7054 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7055 "xmlParseInternalSubset: error detected in Markup declaration\n");
7056 break;
7057 }
7058 }
7059 if (RAW == ']') {
7060 NEXT;
7061 SKIP_BLANKS;
7062 }
7063 }
7064
7065 /*
7066 * We should be at the end of the DOCTYPE declaration.
7067 */
7068 if (RAW != '>') {
7069 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
7070 }
7071 NEXT;
7072}
7073
7074#ifdef LIBXML_SAX1_ENABLED
7075/**
7076 * xmlParseAttribute:
7077 * @ctxt: an XML parser context
7078 * @value: a xmlChar ** used to store the value of the attribute
7079 *
7080 * parse an attribute
7081 *
7082 * [41] Attribute ::= Name Eq AttValue
7083 *
7084 * [ WFC: No External Entity References ]
7085 * Attribute values cannot contain direct or indirect entity references
7086 * to external entities.
7087 *
7088 * [ WFC: No < in Attribute Values ]
7089 * The replacement text of any entity referred to directly or indirectly in
7090 * an attribute value (other than "&lt;") must not contain a <.
7091 *
7092 * [ VC: Attribute Value Type ]
7093 * The attribute must have been declared; the value must be of the type
7094 * declared for it.
7095 *
7096 * [25] Eq ::= S? '=' S?
7097 *
7098 * With namespace:
7099 *
7100 * [NS 11] Attribute ::= QName Eq AttValue
7101 *
7102 * Also the case QName == xmlns:??? is handled independently as a namespace
7103 * definition.
7104 *
7105 * Returns the attribute name, and the value in *value.
7106 */
7107
7108const xmlChar *
7109xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
7110 const xmlChar *name;
7111 xmlChar *val;
7112
7113 *value = NULL;
7114 GROW;
7115 name = xmlParseName(ctxt);
7116 if (name == NULL) {
7117 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7118 "error parsing attribute name\n");
7119 return(NULL);
7120 }
7121
7122 /*
7123 * read the value
7124 */
7125 SKIP_BLANKS;
7126 if (RAW == '=') {
7127 NEXT;
7128 SKIP_BLANKS;
7129 val = xmlParseAttValue(ctxt);
7130 ctxt->instate = XML_PARSER_CONTENT;
7131 } else {
7132 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
7133 "Specification mandate value for attribute %s\n", name);
7134 return(NULL);
7135 }
7136
7137 /*
7138 * Check that xml:lang conforms to the specification
7139 * No more registered as an error, just generate a warning now
7140 * since this was deprecated in XML second edition
7141 */
7142 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
7143 if (!xmlCheckLanguageID(val)) {
7144 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
7145 "Malformed value for xml:lang : %s\n",
7146 val, NULL);
7147 }
7148 }
7149
7150 /*
7151 * Check that xml:space conforms to the specification
7152 */
7153 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
7154 if (xmlStrEqual(val, BAD_CAST "default"))
7155 *(ctxt->space) = 0;
7156 else if (xmlStrEqual(val, BAD_CAST "preserve"))
7157 *(ctxt->space) = 1;
7158 else {
7159 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
7160"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
7161 val, NULL);
7162 }
7163 }
7164
7165 *value = val;
7166 return(name);
7167}
7168
7169/**
7170 * xmlParseStartTag:
7171 * @ctxt: an XML parser context
7172 *
7173 * parse a start of tag either for rule element or
7174 * EmptyElement. In both case we don't parse the tag closing chars.
7175 *
7176 * [40] STag ::= '<' Name (S Attribute)* S? '>'
7177 *
7178 * [ WFC: Unique Att Spec ]
7179 * No attribute name may appear more than once in the same start-tag or
7180 * empty-element tag.
7181 *
7182 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
7183 *
7184 * [ WFC: Unique Att Spec ]
7185 * No attribute name may appear more than once in the same start-tag or
7186 * empty-element tag.
7187 *
7188 * With namespace:
7189 *
7190 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
7191 *
7192 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
7193 *
7194 * Returns the element name parsed
7195 */
7196
7197const xmlChar *
7198xmlParseStartTag(xmlParserCtxtPtr ctxt) {
7199 const xmlChar *name;
7200 const xmlChar *attname;
7201 xmlChar *attvalue;
7202 const xmlChar **atts = ctxt->atts;
7203 int nbatts = 0;
7204 int maxatts = ctxt->maxatts;
7205 int i;
7206
7207 if (RAW != '<') return(NULL);
7208 NEXT1;
7209
7210 name = xmlParseName(ctxt);
7211 if (name == NULL) {
7212 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7213 "xmlParseStartTag: invalid element name\n");
7214 return(NULL);
7215 }
7216
7217 /*
7218 * Now parse the attributes, it ends up with the ending
7219 *
7220 * (S Attribute)* S?
7221 */
7222 SKIP_BLANKS;
7223 GROW;
7224
7225 while ((RAW != '>') &&
7226 ((RAW != '/') || (NXT(1) != '>')) &&
7227 (IS_BYTE_CHAR(RAW))) {
7228 const xmlChar *q = CUR_PTR;
7229 unsigned int cons = ctxt->input->consumed;
7230
7231 attname = xmlParseAttribute(ctxt, &attvalue);
7232 if ((attname != NULL) && (attvalue != NULL)) {
7233 /*
7234 * [ WFC: Unique Att Spec ]
7235 * No attribute name may appear more than once in the same
7236 * start-tag or empty-element tag.
7237 */
7238 for (i = 0; i < nbatts;i += 2) {
7239 if (xmlStrEqual(atts[i], attname)) {
7240 xmlErrAttributeDup(ctxt, NULL, attname);
7241 xmlFree(attvalue);
7242 goto failed;
7243 }
7244 }
7245 /*
7246 * Add the pair to atts
7247 */
7248 if (atts == NULL) {
7249 maxatts = 22; /* allow for 10 attrs by default */
7250 atts = (const xmlChar **)
7251 xmlMalloc(maxatts * sizeof(xmlChar *));
7252 if (atts == NULL) {
7253 xmlErrMemory(ctxt, NULL);
7254 if (attvalue != NULL)
7255 xmlFree(attvalue);
7256 goto failed;
7257 }
7258 ctxt->atts = atts;
7259 ctxt->maxatts = maxatts;
7260 } else if (nbatts + 4 > maxatts) {
7261 const xmlChar **n;
7262
7263 maxatts *= 2;
7264 n = (const xmlChar **) xmlRealloc((void *) atts,
7265 maxatts * sizeof(const xmlChar *));
7266 if (n == NULL) {
7267 xmlErrMemory(ctxt, NULL);
7268 if (attvalue != NULL)
7269 xmlFree(attvalue);
7270 goto failed;
7271 }
7272 atts = n;
7273 ctxt->atts = atts;
7274 ctxt->maxatts = maxatts;
7275 }
7276 atts[nbatts++] = attname;
7277 atts[nbatts++] = attvalue;
7278 atts[nbatts] = NULL;
7279 atts[nbatts + 1] = NULL;
7280 } else {
7281 if (attvalue != NULL)
7282 xmlFree(attvalue);
7283 }
7284
7285failed:
7286
7287 GROW
7288 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
7289 break;
7290 if (!IS_BLANK_CH(RAW)) {
7291 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7292 "attributes construct error\n");
7293 }
7294 SKIP_BLANKS;
7295 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
7296 (attname == NULL) && (attvalue == NULL)) {
7297 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
7298 "xmlParseStartTag: problem parsing attributes\n");
7299 break;
7300 }
7301 SHRINK;
7302 GROW;
7303 }
7304
7305 /*
7306 * SAX: Start of Element !
7307 */
7308 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
7309 (!ctxt->disableSAX)) {
7310 if (nbatts > 0)
7311 ctxt->sax->startElement(ctxt->userData, name, atts);
7312 else
7313 ctxt->sax->startElement(ctxt->userData, name, NULL);
7314 }
7315
7316 if (atts != NULL) {
7317 /* Free only the content strings */
7318 for (i = 1;i < nbatts;i+=2)
7319 if (atts[i] != NULL)
7320 xmlFree((xmlChar *) atts[i]);
7321 }
7322 return(name);
7323}
7324
7325/**
7326 * xmlParseEndTag1:
7327 * @ctxt: an XML parser context
7328 * @line: line of the start tag
7329 * @nsNr: number of namespaces on the start tag
7330 *
7331 * parse an end of tag
7332 *
7333 * [42] ETag ::= '</' Name S? '>'
7334 *
7335 * With namespace
7336 *
7337 * [NS 9] ETag ::= '</' QName S? '>'
7338 */
7339
7340static void
7341xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
7342 const xmlChar *name;
7343
7344 GROW;
7345 if ((RAW != '<') || (NXT(1) != '/')) {
7346 xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
7347 "xmlParseEndTag: '</' not found\n");
7348 return;
7349 }
7350 SKIP(2);
7351
7352 name = xmlParseNameAndCompare(ctxt,ctxt->name);
7353
7354 /*
7355 * We should definitely be at the ending "S? '>'" part
7356 */
7357 GROW;
7358 SKIP_BLANKS;
7359 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
7360 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
7361 } else
7362 NEXT1;
7363
7364 /*
7365 * [ WFC: Element Type Match ]
7366 * The Name in an element's end-tag must match the element type in the
7367 * start-tag.
7368 *
7369 */
7370 if (name != (xmlChar*)1) {
7371 if (name == NULL) name = BAD_CAST "unparseable";
7372 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
7373 "Opening and ending tag mismatch: %s line %d and %s\n",
7374 ctxt->name, line, name);
7375 }
7376
7377 /*
7378 * SAX: End of Tag
7379 */
7380 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
7381 (!ctxt->disableSAX))
7382 ctxt->sax->endElement(ctxt->userData, ctxt->name);
7383
7384 namePop(ctxt);
7385 spacePop(ctxt);
7386 return;
7387}
7388
7389/**
7390 * xmlParseEndTag:
7391 * @ctxt: an XML parser context
7392 *
7393 * parse an end of tag
7394 *
7395 * [42] ETag ::= '</' Name S? '>'
7396 *
7397 * With namespace
7398 *
7399 * [NS 9] ETag ::= '</' QName S? '>'
7400 */
7401
7402void
7403xmlParseEndTag(xmlParserCtxtPtr ctxt) {
7404 xmlParseEndTag1(ctxt, 0);
7405}
7406#endif /* LIBXML_SAX1_ENABLED */
7407
7408/************************************************************************
7409 * *
7410 * SAX 2 specific operations *
7411 * *
7412 ************************************************************************/
7413
7414static const xmlChar *
7415xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
7416 int len = 0, l;
7417 int c;
7418 int count = 0;
7419
7420 /*
7421 * Handler for more complex cases
7422 */
7423 GROW;
7424 c = CUR_CHAR(l);
7425 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
7426 (!IS_LETTER(c) && (c != '_'))) {
7427 return(NULL);
7428 }
7429
7430 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
7431 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
7432 (c == '.') || (c == '-') || (c == '_') ||
7433 (IS_COMBINING(c)) ||
7434 (IS_EXTENDER(c)))) {
7435 if (count++ > 100) {
7436 count = 0;
7437 GROW;
7438 }
7439 len += l;
7440 NEXTL(l);
7441 c = CUR_CHAR(l);
7442 }
7443 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
7444}
7445
7446/*
7447 * xmlGetNamespace:
7448 * @ctxt: an XML parser context
7449 * @prefix: the prefix to lookup
7450 *
7451 * Lookup the namespace name for the @prefix (which ca be NULL)
7452 * The prefix must come from the @ctxt->dict dictionnary
7453 *
7454 * Returns the namespace name or NULL if not bound
7455 */
7456static const xmlChar *
7457xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
7458 int i;
7459
7460 if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
7461 for (i = ctxt->nsNr - 2;i >= 0;i-=2)
7462 if (ctxt->nsTab[i] == prefix) {
7463 if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
7464 return(NULL);
7465 return(ctxt->nsTab[i + 1]);
7466 }
7467 return(NULL);
7468}
7469
7470/**
7471 * xmlParseNCName:
7472 * @ctxt: an XML parser context
7473 * @len: lenght of the string parsed
7474 *
7475 * parse an XML name.
7476 *
7477 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
7478 * CombiningChar | Extender
7479 *
7480 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
7481 *
7482 * Returns the Name parsed or NULL
7483 */
7484
7485static const xmlChar *
7486xmlParseNCName(xmlParserCtxtPtr ctxt) {
7487 const xmlChar *in;
7488 const xmlChar *ret;
7489 int count = 0;
7490
7491 /*
7492 * Accelerator for simple ASCII names
7493 */
7494 in = ctxt->input->cur;
7495 if (((*in >= 0x61) && (*in <= 0x7A)) ||
7496 ((*in >= 0x41) && (*in <= 0x5A)) ||
7497 (*in == '_')) {
7498 in++;
7499 while (((*in >= 0x61) && (*in <= 0x7A)) ||
7500 ((*in >= 0x41) && (*in <= 0x5A)) ||
7501 ((*in >= 0x30) && (*in <= 0x39)) ||
7502 (*in == '_') || (*in == '-') ||
7503 (*in == '.'))
7504 in++;
7505 if ((*in > 0) && (*in < 0x80)) {
7506 count = in - ctxt->input->cur;
7507 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
7508 ctxt->input->cur = in;
7509 ctxt->nbChars += count;
7510 ctxt->input->col += count;
7511 if (ret == NULL) {
7512 xmlErrMemory(ctxt, NULL);
7513 }
7514 return(ret);
7515 }
7516 }
7517 return(xmlParseNCNameComplex(ctxt));
7518}
7519
7520/**
7521 * xmlParseQName:
7522 * @ctxt: an XML parser context
7523 * @prefix: pointer to store the prefix part
7524 *
7525 * parse an XML Namespace QName
7526 *
7527 * [6] QName ::= (Prefix ':')? LocalPart
7528 * [7] Prefix ::= NCName
7529 * [8] LocalPart ::= NCName
7530 *
7531 * Returns the Name parsed or NULL
7532 */
7533
7534static const xmlChar *
7535xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
7536 const xmlChar *l, *p;
7537
7538 GROW;
7539
7540 l = xmlParseNCName(ctxt);
7541 if (l == NULL) {
7542 if (CUR == ':') {
7543 l = xmlParseName(ctxt);
7544 if (l != NULL) {
7545 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
7546 "Failed to parse QName '%s'\n", l, NULL, NULL);
7547 *prefix = NULL;
7548 return(l);
7549 }
7550 }
7551 return(NULL);
7552 }
7553 if (CUR == ':') {
7554 NEXT;
7555 p = l;
7556 l = xmlParseNCName(ctxt);
7557 if (l == NULL) {
7558 xmlChar *tmp;
7559
7560 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
7561 "Failed to parse QName '%s:'\n", p, NULL, NULL);
7562 tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
7563 p = xmlDictLookup(ctxt->dict, tmp, -1);
7564 if (tmp != NULL) xmlFree(tmp);
7565 *prefix = NULL;
7566 return(p);
7567 }
7568 if (CUR == ':') {
7569 xmlChar *tmp;
7570
7571 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
7572 "Failed to parse QName '%s:%s:'\n", p, l, NULL);
7573 NEXT;
7574 tmp = (xmlChar *) xmlParseName(ctxt);
7575 if (tmp != NULL) {
7576 tmp = xmlBuildQName(tmp, l, NULL, 0);
7577 l = xmlDictLookup(ctxt->dict, tmp, -1);
7578 if (tmp != NULL) xmlFree(tmp);
7579 *prefix = p;
7580 return(l);
7581 }
7582 tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
7583 l = xmlDictLookup(ctxt->dict, tmp, -1);
7584 if (tmp != NULL) xmlFree(tmp);
7585 *prefix = p;
7586 return(l);
7587 }
7588 *prefix = p;
7589 } else
7590 *prefix = NULL;
7591 return(l);
7592}
7593
7594/**
7595 * xmlParseQNameAndCompare:
7596 * @ctxt: an XML parser context
7597 * @name: the localname
7598 * @prefix: the prefix, if any.
7599 *
7600 * parse an XML name and compares for match
7601 * (specialized for endtag parsing)
7602 *
7603 * Returns NULL for an illegal name, (xmlChar*) 1 for success
7604 * and the name for mismatch
7605 */
7606
7607static const xmlChar *
7608xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
7609 xmlChar const *prefix) {
7610 const xmlChar *cmp = name;
7611 const xmlChar *in;
7612 const xmlChar *ret;
7613 const xmlChar *prefix2;
7614
7615 if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
7616
7617 GROW;
7618 in = ctxt->input->cur;
7619
7620 cmp = prefix;
7621 while (*in != 0 && *in == *cmp) {
7622 ++in;
7623 ++cmp;
7624 }
7625 if ((*cmp == 0) && (*in == ':')) {
7626 in++;
7627 cmp = name;
7628 while (*in != 0 && *in == *cmp) {
7629 ++in;
7630 ++cmp;
7631 }
7632 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
7633 /* success */
7634 ctxt->input->cur = in;
7635 return((const xmlChar*) 1);
7636 }
7637 }
7638 /*
7639 * all strings coms from the dictionary, equality can be done directly
7640 */
7641 ret = xmlParseQName (ctxt, &prefix2);
7642 if ((ret == name) && (prefix == prefix2))
7643 return((const xmlChar*) 1);
7644 return ret;
7645}
7646
7647/**
7648 * xmlParseAttValueInternal:
7649 * @ctxt: an XML parser context
7650 * @len: attribute len result
7651 * @alloc: whether the attribute was reallocated as a new string
7652 * @normalize: if 1 then further non-CDATA normalization must be done
7653 *
7654 * parse a value for an attribute.
7655 * NOTE: if no normalization is needed, the routine will return pointers
7656 * directly from the data buffer.
7657 *
7658 * 3.3.3 Attribute-Value Normalization:
7659 * Before the value of an attribute is passed to the application or
7660 * checked for validity, the XML processor must normalize it as follows:
7661 * - a character reference is processed by appending the referenced
7662 * character to the attribute value
7663 * - an entity reference is processed by recursively processing the
7664 * replacement text of the entity
7665 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
7666 * appending #x20 to the normalized value, except that only a single
7667 * #x20 is appended for a "#xD#xA" sequence that is part of an external
7668 * parsed entity or the literal entity value of an internal parsed entity
7669 * - other characters are processed by appending them to the normalized value
7670 * If the declared value is not CDATA, then the XML processor must further
7671 * process the normalized attribute value by discarding any leading and
7672 * trailing space (#x20) characters, and by replacing sequences of space
7673 * (#x20) characters by a single space (#x20) character.
7674 * All attributes for which no declaration has been read should be treated
7675 * by a non-validating parser as if declared CDATA.
7676 *
7677 * Returns the AttValue parsed or NULL. The value has to be freed by the
7678 * caller if it was copied, this can be detected by val[*len] == 0.
7679 */
7680
7681static xmlChar *
7682xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
7683 int normalize)
7684{
7685 xmlChar limit = 0;
7686 const xmlChar *in = NULL, *start, *end, *last;
7687 xmlChar *ret = NULL;
7688
7689 GROW;
7690 in = (xmlChar *) CUR_PTR;
7691 if (*in != '"' && *in != '\'') {
7692 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
7693 return (NULL);
7694 }
7695 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
7696
7697 /*
7698 * try to handle in this routine the most common case where no
7699 * allocation of a new string is required and where content is
7700 * pure ASCII.
7701 */
7702 limit = *in++;
7703 end = ctxt->input->end;
7704 start = in;
7705 if (in >= end) {
7706 const xmlChar *oldbase = ctxt->input->base;
7707 GROW;
7708 if (oldbase != ctxt->input->base) {
7709 long delta = ctxt->input->base - oldbase;
7710 start = start + delta;
7711 in = in + delta;
7712 }
7713 end = ctxt->input->end;
7714 }
7715 if (normalize) {
7716 /*
7717 * Skip any leading spaces
7718 */
7719 while ((in < end) && (*in != limit) &&
7720 ((*in == 0x20) || (*in == 0x9) ||
7721 (*in == 0xA) || (*in == 0xD))) {
7722 in++;
7723 start = in;
7724 if (in >= end) {
7725 const xmlChar *oldbase = ctxt->input->base;
7726 GROW;
7727 if (oldbase != ctxt->input->base) {
7728 long delta = ctxt->input->base - oldbase;
7729 start = start + delta;
7730 in = in + delta;
7731 }
7732 end = ctxt->input->end;
7733 }
7734 }
7735 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
7736 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
7737 if ((*in++ == 0x20) && (*in == 0x20)) break;
7738 if (in >= end) {
7739 const xmlChar *oldbase = ctxt->input->base;
7740 GROW;
7741 if (oldbase != ctxt->input->base) {
7742 long delta = ctxt->input->base - oldbase;
7743 start = start + delta;
7744 in = in + delta;
7745 }
7746 end = ctxt->input->end;
7747 }
7748 }
7749 last = in;
7750 /*
7751 * skip the trailing blanks
7752 */
7753 while ((last[-1] == 0x20) && (last > start)) last--;
7754 while ((in < end) && (*in != limit) &&
7755 ((*in == 0x20) || (*in == 0x9) ||
7756 (*in == 0xA) || (*in == 0xD))) {
7757 in++;
7758 if (in >= end) {
7759 const xmlChar *oldbase = ctxt->input->base;
7760 GROW;
7761 if (oldbase != ctxt->input->base) {
7762 long delta = ctxt->input->base - oldbase;
7763 start = start + delta;
7764 in = in + delta;
7765 last = last + delta;
7766 }
7767 end = ctxt->input->end;
7768 }
7769 }
7770 if (*in != limit) goto need_complex;
7771 } else {
7772 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
7773 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
7774 in++;
7775 if (in >= end) {
7776 const xmlChar *oldbase = ctxt->input->base;
7777 GROW;
7778 if (oldbase != ctxt->input->base) {
7779 long delta = ctxt->input->base - oldbase;
7780 start = start + delta;
7781 in = in + delta;
7782 }
7783 end = ctxt->input->end;
7784 }
7785 }
7786 last = in;
7787 if (*in != limit) goto need_complex;
7788 }
7789 in++;
7790 if (len != NULL) {
7791 *len = last - start;
7792 ret = (xmlChar *) start;
7793 } else {
7794 if (alloc) *alloc = 1;
7795 ret = xmlStrndup(start, last - start);
7796 }
7797 CUR_PTR = in;
7798 if (alloc) *alloc = 0;
7799 return ret;
7800need_complex:
7801 if (alloc) *alloc = 1;
7802 return xmlParseAttValueComplex(ctxt, len, normalize);
7803}
7804
7805/**
7806 * xmlParseAttribute2:
7807 * @ctxt: an XML parser context
7808 * @pref: the element prefix
7809 * @elem: the element name
7810 * @prefix: a xmlChar ** used to store the value of the attribute prefix
7811 * @value: a xmlChar ** used to store the value of the attribute
7812 * @len: an int * to save the length of the attribute
7813 * @alloc: an int * to indicate if the attribute was allocated
7814 *
7815 * parse an attribute in the new SAX2 framework.
7816 *
7817 * Returns the attribute name, and the value in *value, .
7818 */
7819
7820static const xmlChar *
7821xmlParseAttribute2(xmlParserCtxtPtr ctxt,
7822 const xmlChar *pref, const xmlChar *elem,
7823 const xmlChar **prefix, xmlChar **value,
7824 int *len, int *alloc) {
7825 const xmlChar *name;
7826 xmlChar *val, *internal_val = NULL;
7827 int normalize = 0;
7828
7829 *value = NULL;
7830 GROW;
7831 name = xmlParseQName(ctxt, prefix);
7832 if (name == NULL) {
7833 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7834 "error parsing attribute name\n");
7835 return(NULL);
7836 }
7837
7838 /*
7839 * get the type if needed
7840 */
7841 if (ctxt->attsSpecial != NULL) {
7842 int type;
7843
7844 type = (int) (long) xmlHashQLookup2(ctxt->attsSpecial,
7845 pref, elem, *prefix, name);
7846 if (type != 0) normalize = 1;
7847 }
7848
7849 /*
7850 * read the value
7851 */
7852 SKIP_BLANKS;
7853 if (RAW == '=') {
7854 NEXT;
7855 SKIP_BLANKS;
7856 val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
7857 ctxt->instate = XML_PARSER_CONTENT;
7858 } else {
7859 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
7860 "Specification mandate value for attribute %s\n", name);
7861 return(NULL);
7862 }
7863
7864 if (*prefix == ctxt->str_xml) {
7865 /*
7866 * Check that xml:lang conforms to the specification
7867 * No more registered as an error, just generate a warning now
7868 * since this was deprecated in XML second edition
7869 */
7870 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
7871 internal_val = xmlStrndup(val, *len);
7872 if (!xmlCheckLanguageID(internal_val)) {
7873 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
7874 "Malformed value for xml:lang : %s\n",
7875 internal_val, NULL);
7876 }
7877 }
7878
7879 /*
7880 * Check that xml:space conforms to the specification
7881 */
7882 if (xmlStrEqual(name, BAD_CAST "space")) {
7883 internal_val = xmlStrndup(val, *len);
7884 if (xmlStrEqual(internal_val, BAD_CAST "default"))
7885 *(ctxt->space) = 0;
7886 else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
7887 *(ctxt->space) = 1;
7888 else {
7889 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
7890"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
7891 internal_val, NULL);
7892 }
7893 }
7894 if (internal_val) {
7895 xmlFree(internal_val);
7896 }
7897 }
7898
7899 *value = val;
7900 return(name);
7901}
7902
7903/**
7904 * xmlParseStartTag2:
7905 * @ctxt: an XML parser context
7906 *
7907 * parse a start of tag either for rule element or
7908 * EmptyElement. In both case we don't parse the tag closing chars.
7909 * This routine is called when running SAX2 parsing
7910 *
7911 * [40] STag ::= '<' Name (S Attribute)* S? '>'
7912 *
7913 * [ WFC: Unique Att Spec ]
7914 * No attribute name may appear more than once in the same start-tag or
7915 * empty-element tag.
7916 *
7917 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
7918 *
7919 * [ WFC: Unique Att Spec ]
7920 * No attribute name may appear more than once in the same start-tag or
7921 * empty-element tag.
7922 *
7923 * With namespace:
7924 *
7925 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
7926 *
7927 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
7928 *
7929 * Returns the element name parsed
7930 */
7931
7932static const xmlChar *
7933xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
7934 const xmlChar **URI, int *tlen) {
7935 const xmlChar *localname;
7936 const xmlChar *prefix;
7937 const xmlChar *attname;
7938 const xmlChar *aprefix;
7939 const xmlChar *nsname;
7940 xmlChar *attvalue;
7941 const xmlChar **atts = ctxt->atts;
7942 int maxatts = ctxt->maxatts;
7943 int nratts, nbatts, nbdef;
7944 int i, j, nbNs, attval, oldline, oldcol;
7945 const xmlChar *base;
7946 unsigned long cur;
7947 int nsNr = ctxt->nsNr;
7948
7949 if (RAW != '<') return(NULL);
7950 NEXT1;
7951
7952 /*
7953 * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
7954 * point since the attribute values may be stored as pointers to
7955 * the buffer and calling SHRINK would destroy them !
7956 * The Shrinking is only possible once the full set of attribute
7957 * callbacks have been done.
7958 */
7959reparse:
7960 SHRINK;
7961 base = ctxt->input->base;
7962 cur = ctxt->input->cur - ctxt->input->base;
7963 oldline = ctxt->input->line;
7964 oldcol = ctxt->input->col;
7965 nbatts = 0;
7966 nratts = 0;
7967 nbdef = 0;
7968 nbNs = 0;
7969 attval = 0;
7970 /* Forget any namespaces added during an earlier parse of this element. */
7971 ctxt->nsNr = nsNr;
7972
7973 localname = xmlParseQName(ctxt, &prefix);
7974 if (localname == NULL) {
7975 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7976 "StartTag: invalid element name\n");
7977 return(NULL);
7978 }
7979 *tlen = ctxt->input->cur - ctxt->input->base - cur;
7980
7981 /*
7982 * Now parse the attributes, it ends up with the ending
7983 *
7984 * (S Attribute)* S?
7985 */
7986 SKIP_BLANKS;
7987 GROW;
7988 if (ctxt->input->base != base) goto base_changed;
7989
7990 while ((RAW != '>') &&
7991 ((RAW != '/') || (NXT(1) != '>')) &&
7992 (IS_BYTE_CHAR(RAW))) {
7993 const xmlChar *q = CUR_PTR;
7994 unsigned int cons = ctxt->input->consumed;
7995 int len = -1, alloc = 0;
7996
7997 attname = xmlParseAttribute2(ctxt, prefix, localname,
7998 &aprefix, &attvalue, &len, &alloc);
7999 if (ctxt->input->base != base) {
8000 if ((attvalue != NULL) && (alloc != 0))
8001 xmlFree(attvalue);
8002 attvalue = NULL;
8003 goto base_changed;
8004 }
8005 if ((attname != NULL) && (attvalue != NULL)) {
8006 if (len < 0) len = xmlStrlen(attvalue);
8007 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
8008 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
8009 xmlURIPtr uri;
8010
8011 if (*URL != 0) {
8012 uri = xmlParseURI((const char *) URL);
8013 if (uri == NULL) {
8014 xmlWarningMsg(ctxt, XML_WAR_NS_URI,
8015 "xmlns: %s not a valid URI\n",
8016 URL, NULL);
8017 } else {
8018 if (uri->scheme == NULL) {
8019 xmlWarningMsg(ctxt, XML_WAR_NS_URI_RELATIVE,
8020 "xmlns: URI %s is not absolute\n",
8021 URL, NULL);
8022 }
8023 xmlFreeURI(uri);
8024 }
8025 }
8026 /*
8027 * check that it's not a defined namespace
8028 */
8029 for (j = 1;j <= nbNs;j++)
8030 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
8031 break;
8032 if (j <= nbNs)
8033 xmlErrAttributeDup(ctxt, NULL, attname);
8034 else
8035 if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
8036 if (alloc != 0) xmlFree(attvalue);
8037 SKIP_BLANKS;
8038 continue;
8039 }
8040 if (aprefix == ctxt->str_xmlns) {
8041 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
8042 xmlURIPtr uri;
8043
8044 if (attname == ctxt->str_xml) {
8045 if (URL != ctxt->str_xml_ns) {
8046 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8047 "xml namespace prefix mapped to wrong URI\n",
8048 NULL, NULL, NULL);
8049 }
8050 /*
8051 * Do not keep a namespace definition node
8052 */
8053 if (alloc != 0) xmlFree(attvalue);
8054 SKIP_BLANKS;
8055 continue;
8056 }
8057 uri = xmlParseURI((const char *) URL);
8058 if (uri == NULL) {
8059 xmlWarningMsg(ctxt, XML_WAR_NS_URI,
8060 "xmlns:%s: '%s' is not a valid URI\n",
8061 attname, URL);
8062 } else {
8063 if ((ctxt->pedantic) && (uri->scheme == NULL)) {
8064 xmlWarningMsg(ctxt, XML_WAR_NS_URI_RELATIVE,
8065 "xmlns:%s: URI %s is not absolute\n",
8066 attname, URL);
8067 }
8068 xmlFreeURI(uri);
8069 }
8070
8071 /*
8072 * check that it's not a defined namespace
8073 */
8074 for (j = 1;j <= nbNs;j++)
8075 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
8076 break;
8077 if (j <= nbNs)
8078 xmlErrAttributeDup(ctxt, aprefix, attname);
8079 else
8080 if (nsPush(ctxt, attname, URL) > 0) nbNs++;
8081 if (alloc != 0) xmlFree(attvalue);
8082 SKIP_BLANKS;
8083 if (ctxt->input->base != base) goto base_changed;
8084 continue;
8085 }
8086
8087 /*
8088 * Add the pair to atts
8089 */
8090 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
8091 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
8092 if (attvalue[len] == 0)
8093 xmlFree(attvalue);
8094 goto failed;
8095 }
8096 maxatts = ctxt->maxatts;
8097 atts = ctxt->atts;
8098 }
8099 ctxt->attallocs[nratts++] = alloc;
8100 atts[nbatts++] = attname;
8101 atts[nbatts++] = aprefix;
8102 atts[nbatts++] = NULL; /* the URI will be fetched later */
8103 atts[nbatts++] = attvalue;
8104 attvalue += len;
8105 atts[nbatts++] = attvalue;
8106 /*
8107 * tag if some deallocation is needed
8108 */
8109 if (alloc != 0) attval = 1;
8110 } else {
8111 if ((attvalue != NULL) && (attvalue[len] == 0))
8112 xmlFree(attvalue);
8113 }
8114
8115failed:
8116
8117 GROW
8118 if (ctxt->input->base != base) goto base_changed;
8119 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8120 break;
8121 if (!IS_BLANK_CH(RAW)) {
8122 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8123 "attributes construct error\n");
8124 break;
8125 }
8126 SKIP_BLANKS;
8127 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
8128 (attname == NULL) && (attvalue == NULL)) {
8129 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8130 "xmlParseStartTag: problem parsing attributes\n");
8131 break;
8132 }
8133 GROW;
8134 if (ctxt->input->base != base) goto base_changed;
8135 }
8136
8137 /*
8138 * The attributes defaulting
8139 */
8140 if (ctxt->attsDefault != NULL) {
8141 xmlDefAttrsPtr defaults;
8142
8143 defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
8144 if (defaults != NULL) {
8145 for (i = 0;i < defaults->nbAttrs;i++) {
8146 attname = defaults->values[4 * i];
8147 aprefix = defaults->values[4 * i + 1];
8148
8149 /*
8150 * special work for namespaces defaulted defs
8151 */
8152 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
8153 /*
8154 * check that it's not a defined namespace
8155 */
8156 for (j = 1;j <= nbNs;j++)
8157 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
8158 break;
8159 if (j <= nbNs) continue;
8160
8161 nsname = xmlGetNamespace(ctxt, NULL);
8162 if (nsname != defaults->values[4 * i + 2]) {
8163 if (nsPush(ctxt, NULL,
8164 defaults->values[4 * i + 2]) > 0)
8165 nbNs++;
8166 }
8167 } else if (aprefix == ctxt->str_xmlns) {
8168 /*
8169 * check that it's not a defined namespace
8170 */
8171 for (j = 1;j <= nbNs;j++)
8172 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
8173 break;
8174 if (j <= nbNs) continue;
8175
8176 nsname = xmlGetNamespace(ctxt, attname);
8177 if (nsname != defaults->values[2]) {
8178 if (nsPush(ctxt, attname,
8179 defaults->values[4 * i + 2]) > 0)
8180 nbNs++;
8181 }
8182 } else {
8183 /*
8184 * check that it's not a defined attribute
8185 */
8186 for (j = 0;j < nbatts;j+=5) {
8187 if ((attname == atts[j]) && (aprefix == atts[j+1]))
8188 break;
8189 }
8190 if (j < nbatts) continue;
8191
8192 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
8193 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
8194 return(NULL);
8195 }
8196 maxatts = ctxt->maxatts;
8197 atts = ctxt->atts;
8198 }
8199 atts[nbatts++] = attname;
8200 atts[nbatts++] = aprefix;
8201 if (aprefix == NULL)
8202 atts[nbatts++] = NULL;
8203 else
8204 atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
8205 atts[nbatts++] = defaults->values[4 * i + 2];
8206 atts[nbatts++] = defaults->values[4 * i + 3];
8207 nbdef++;
8208 }
8209 }
8210 }
8211 }
8212
8213 /*
8214 * The attributes checkings
8215 */
8216 for (i = 0; i < nbatts;i += 5) {
8217 /*
8218 * The default namespace does not apply to attribute names.
8219 */
8220 if (atts[i + 1] != NULL) {
8221 nsname = xmlGetNamespace(ctxt, atts[i + 1]);
8222 if (nsname == NULL) {
8223 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
8224 "Namespace prefix %s for %s on %s is not defined\n",
8225 atts[i + 1], atts[i], localname);
8226 }
8227 atts[i + 2] = nsname;
8228 } else
8229 nsname = NULL;
8230 /*
8231 * [ WFC: Unique Att Spec ]
8232 * No attribute name may appear more than once in the same
8233 * start-tag or empty-element tag.
8234 * As extended by the Namespace in XML REC.
8235 */
8236 for (j = 0; j < i;j += 5) {
8237 if (atts[i] == atts[j]) {
8238 if (atts[i+1] == atts[j+1]) {
8239 xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
8240 break;
8241 }
8242 if ((nsname != NULL) && (atts[j + 2] == nsname)) {
8243 xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
8244 "Namespaced Attribute %s in '%s' redefined\n",
8245 atts[i], nsname, NULL);
8246 break;
8247 }
8248 }
8249 }
8250 }
8251
8252 nsname = xmlGetNamespace(ctxt, prefix);
8253 if ((prefix != NULL) && (nsname == NULL)) {
8254 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
8255 "Namespace prefix %s on %s is not defined\n",
8256 prefix, localname, NULL);
8257 }
8258 *pref = prefix;
8259 *URI = nsname;
8260
8261 /*
8262 * SAX: Start of Element !
8263 */
8264 if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
8265 (!ctxt->disableSAX)) {
8266 if (nbNs > 0)
8267 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
8268 nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
8269 nbatts / 5, nbdef, atts);
8270 else
8271 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
8272 nsname, 0, NULL, nbatts / 5, nbdef, atts);
8273 }
8274
8275 /*
8276 * Free up attribute allocated strings if needed
8277 */
8278 if (attval != 0) {
8279 for (i = 3,j = 0; j < nratts;i += 5,j++)
8280 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
8281 xmlFree((xmlChar *) atts[i]);
8282 }
8283
8284 return(localname);
8285
8286base_changed:
8287 /*
8288 * the attribute strings are valid iif the base didn't changed
8289 */
8290 if (attval != 0) {
8291 for (i = 3,j = 0; j < nratts;i += 5,j++)
8292 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
8293 xmlFree((xmlChar *) atts[i]);
8294 }
8295 ctxt->input->cur = ctxt->input->base + cur;
8296 ctxt->input->line = oldline;
8297 ctxt->input->col = oldcol;
8298 if (ctxt->wellFormed == 1) {
8299 goto reparse;
8300 }
8301 return(NULL);
8302}
8303
8304/**
8305 * xmlParseEndTag2:
8306 * @ctxt: an XML parser context
8307 * @line: line of the start tag
8308 * @nsNr: number of namespaces on the start tag
8309 *
8310 * parse an end of tag
8311 *
8312 * [42] ETag ::= '</' Name S? '>'
8313 *
8314 * With namespace
8315 *
8316 * [NS 9] ETag ::= '</' QName S? '>'
8317 */
8318
8319static void
8320xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
8321 const xmlChar *URI, int line, int nsNr, int tlen) {
8322 const xmlChar *name;
8323
8324 GROW;
8325 if ((RAW != '<') || (NXT(1) != '/')) {
8326 xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
8327 return;
8328 }
8329 SKIP(2);
8330
8331 if ((tlen > 0) && (xmlStrncmp(ctxt->input->cur, ctxt->name, tlen) == 0)) {
8332 if (ctxt->input->cur[tlen] == '>') {
8333 ctxt->input->cur += tlen + 1;
8334 goto done;
8335 }
8336 ctxt->input->cur += tlen;
8337 name = (xmlChar*)1;
8338 } else {
8339 if (prefix == NULL)
8340 name = xmlParseNameAndCompare(ctxt, ctxt->name);
8341 else
8342 name = xmlParseQNameAndCompare(ctxt, ctxt->name, prefix);
8343 }
8344
8345 /*
8346 * We should definitely be at the ending "S? '>'" part
8347 */
8348 GROW;
8349 SKIP_BLANKS;
8350 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
8351 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
8352 } else
8353 NEXT1;
8354
8355 /*
8356 * [ WFC: Element Type Match ]
8357 * The Name in an element's end-tag must match the element type in the
8358 * start-tag.
8359 *
8360 */
8361 if (name != (xmlChar*)1) {
8362 if (name == NULL) name = BAD_CAST "unparseable";
8363 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
8364 "Opening and ending tag mismatch: %s line %d and %s\n",
8365 ctxt->name, line, name);
8366 }
8367
8368 /*
8369 * SAX: End of Tag
8370 */
8371done:
8372 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
8373 (!ctxt->disableSAX))
8374 ctxt->sax->endElementNs(ctxt->userData, ctxt->name, prefix, URI);
8375
8376 spacePop(ctxt);
8377 if (nsNr != 0)
8378 nsPop(ctxt, nsNr);
8379 return;
8380}
8381
8382/**
8383 * xmlParseCDSect:
8384 * @ctxt: an XML parser context
8385 *
8386 * Parse escaped pure raw content.
8387 *
8388 * [18] CDSect ::= CDStart CData CDEnd
8389 *
8390 * [19] CDStart ::= '<![CDATA['
8391 *
8392 * [20] Data ::= (Char* - (Char* ']]>' Char*))
8393 *
8394 * [21] CDEnd ::= ']]>'
8395 */
8396void
8397xmlParseCDSect(xmlParserCtxtPtr ctxt) {
8398 xmlChar *buf = NULL;
8399 int len = 0;
8400 int size = XML_PARSER_BUFFER_SIZE;
8401 int r, rl;
8402 int s, sl;
8403 int cur, l;
8404 int count = 0;
8405
8406 /* Check 2.6.0 was NXT(0) not RAW */
8407 if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
8408 SKIP(9);
8409 } else
8410 return;
8411
8412 ctxt->instate = XML_PARSER_CDATA_SECTION;
8413 r = CUR_CHAR(rl);
8414 if (!IS_CHAR(r)) {
8415 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
8416 ctxt->instate = XML_PARSER_CONTENT;
8417 return;
8418 }
8419 NEXTL(rl);
8420 s = CUR_CHAR(sl);
8421 if (!IS_CHAR(s)) {
8422 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
8423 ctxt->instate = XML_PARSER_CONTENT;
8424 return;
8425 }
8426 NEXTL(sl);
8427 cur = CUR_CHAR(l);
8428 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
8429 if (buf == NULL) {
8430 xmlErrMemory(ctxt, NULL);
8431 return;
8432 }
8433 while (IS_CHAR(cur) &&
8434 ((r != ']') || (s != ']') || (cur != '>'))) {
8435 if (len + 5 >= size) {
8436 xmlChar *tmp;
8437
8438 size *= 2;
8439 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
8440 if (tmp == NULL) {
8441 xmlFree(buf);
8442 xmlErrMemory(ctxt, NULL);
8443 return;
8444 }
8445 buf = tmp;
8446 }
8447 COPY_BUF(rl,buf,len,r);
8448 r = s;
8449 rl = sl;
8450 s = cur;
8451 sl = l;
8452 count++;
8453 if (count > 50) {
8454 GROW;
8455 count = 0;
8456 }
8457 NEXTL(l);
8458 cur = CUR_CHAR(l);
8459 }
8460 buf[len] = 0;
8461 ctxt->instate = XML_PARSER_CONTENT;
8462 if (cur != '>') {
8463 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
8464 "CData section not finished\n%.50s\n", buf);
8465 xmlFree(buf);
8466 return;
8467 }
8468 NEXTL(l);
8469
8470 /*
8471 * OK the buffer is to be consumed as cdata.
8472 */
8473 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
8474 if (ctxt->sax->cdataBlock != NULL)
8475 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
8476 else if (ctxt->sax->characters != NULL)
8477 ctxt->sax->characters(ctxt->userData, buf, len);
8478 }
8479 xmlFree(buf);
8480}
8481
8482/**
8483 * xmlParseContent:
8484 * @ctxt: an XML parser context
8485 *
8486 * Parse a content:
8487 *
8488 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
8489 */
8490
8491void
8492xmlParseContent(xmlParserCtxtPtr ctxt) {
8493 GROW;
8494 while ((RAW != 0) &&
8495 ((RAW != '<') || (NXT(1) != '/')) &&
8496 (ctxt->instate != XML_PARSER_EOF)) {
8497 const xmlChar *test = CUR_PTR;
8498 unsigned int cons = ctxt->input->consumed;
8499 const xmlChar *cur = ctxt->input->cur;
8500
8501 /*
8502 * First case : a Processing Instruction.
8503 */
8504 if ((*cur == '<') && (cur[1] == '?')) {
8505 xmlParsePI(ctxt);
8506 }
8507
8508 /*
8509 * Second case : a CDSection
8510 */
8511 /* 2.6.0 test was *cur not RAW */
8512 else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
8513 xmlParseCDSect(ctxt);
8514 }
8515
8516 /*
8517 * Third case : a comment
8518 */
8519 else if ((*cur == '<') && (NXT(1) == '!') &&
8520 (NXT(2) == '-') && (NXT(3) == '-')) {
8521 xmlParseComment(ctxt);
8522 ctxt->instate = XML_PARSER_CONTENT;
8523 }
8524
8525 /*
8526 * Fourth case : a sub-element.
8527 */
8528 else if (*cur == '<') {
8529 xmlParseElement(ctxt);
8530 }
8531
8532 /*
8533 * Fifth case : a reference. If if has not been resolved,
8534 * parsing returns it's Name, create the node
8535 */
8536
8537 else if (*cur == '&') {
8538 xmlParseReference(ctxt);
8539 }
8540
8541 /*
8542 * Last case, text. Note that References are handled directly.
8543 */
8544 else {
8545 xmlParseCharData(ctxt, 0);
8546 }
8547
8548 GROW;
8549 /*
8550 * Pop-up of finished entities.
8551 */
8552 while ((RAW == 0) && (ctxt->inputNr > 1))
8553 xmlPopInput(ctxt);
8554 SHRINK;
8555
8556 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
8557 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8558 "detected an error in element content\n");
8559 ctxt->instate = XML_PARSER_EOF;
8560 break;
8561 }
8562 }
8563}
8564
8565/**
8566 * xmlParseElement:
8567 * @ctxt: an XML parser context
8568 *
8569 * parse an XML element, this is highly recursive
8570 *
8571 * [39] element ::= EmptyElemTag | STag content ETag
8572 *
8573 * [ WFC: Element Type Match ]
8574 * The Name in an element's end-tag must match the element type in the
8575 * start-tag.
8576 *
8577 */
8578
8579void
8580xmlParseElement(xmlParserCtxtPtr ctxt) {
8581 const xmlChar *name;
8582 const xmlChar *prefix;
8583 const xmlChar *URI;
8584 xmlParserNodeInfo node_info;
8585 int line, tlen;
8586 xmlNodePtr ret;
8587 int nsNr = ctxt->nsNr;
8588
8589 if ((unsigned int) ctxt->nameNr > xmlParserMaxDepth) {
8590 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
8591 "Excessive depth in document: change xmlParserMaxDepth = %d\n",
8592 xmlParserMaxDepth);
8593 ctxt->instate = XML_PARSER_EOF;
8594 return;
8595 }
8596
8597 /* Capture start position */
8598 if (ctxt->record_info) {
8599 node_info.begin_pos = ctxt->input->consumed +
8600 (CUR_PTR - ctxt->input->base);
8601 node_info.begin_line = ctxt->input->line;
8602 }
8603
8604 if (ctxt->spaceNr == 0)
8605 spacePush(ctxt, -1);
8606 else if (*ctxt->space == -2)
8607 spacePush(ctxt, -1);
8608 else
8609 spacePush(ctxt, *ctxt->space);
8610
8611 line = ctxt->input->line;
8612#ifdef LIBXML_SAX1_ENABLED
8613 if (ctxt->sax2)
8614#endif /* LIBXML_SAX1_ENABLED */
8615 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
8616#ifdef LIBXML_SAX1_ENABLED
8617 else
8618 name = xmlParseStartTag(ctxt);
8619#endif /* LIBXML_SAX1_ENABLED */
8620 if (name == NULL) {
8621 spacePop(ctxt);
8622 return;
8623 }
8624 namePush(ctxt, name);
8625 ret = ctxt->node;
8626
8627#ifdef LIBXML_VALID_ENABLED
8628 /*
8629 * [ VC: Root Element Type ]
8630 * The Name in the document type declaration must match the element
8631 * type of the root element.
8632 */
8633 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
8634 ctxt->node && (ctxt->node == ctxt->myDoc->children))
8635 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
8636#endif /* LIBXML_VALID_ENABLED */
8637
8638 /*
8639 * Check for an Empty Element.
8640 */
8641 if ((RAW == '/') && (NXT(1) == '>')) {
8642 SKIP(2);
8643 if (ctxt->sax2) {
8644 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
8645 (!ctxt->disableSAX))
8646 ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
8647#ifdef LIBXML_SAX1_ENABLED
8648 } else {
8649 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8650 (!ctxt->disableSAX))
8651 ctxt->sax->endElement(ctxt->userData, name);
8652#endif /* LIBXML_SAX1_ENABLED */
8653 }
8654 namePop(ctxt);
8655 spacePop(ctxt);
8656 if (nsNr != ctxt->nsNr)
8657 nsPop(ctxt, ctxt->nsNr - nsNr);
8658 if ( ret != NULL && ctxt->record_info ) {
8659 node_info.end_pos = ctxt->input->consumed +
8660 (CUR_PTR - ctxt->input->base);
8661 node_info.end_line = ctxt->input->line;
8662 node_info.node = ret;
8663 xmlParserAddNodeInfo(ctxt, &node_info);
8664 }
8665 return;
8666 }
8667 if (RAW == '>') {
8668 NEXT1;
8669 } else {
8670 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
8671 "Couldn't find end of Start Tag %s line %d\n",
8672 name, line, NULL);
8673
8674 /*
8675 * end of parsing of this node.
8676 */
8677 nodePop(ctxt);
8678 namePop(ctxt);
8679 spacePop(ctxt);
8680 if (nsNr != ctxt->nsNr)
8681 nsPop(ctxt, ctxt->nsNr - nsNr);
8682
8683 /*
8684 * Capture end position and add node
8685 */
8686 if ( ret != NULL && ctxt->record_info ) {
8687 node_info.end_pos = ctxt->input->consumed +
8688 (CUR_PTR - ctxt->input->base);
8689 node_info.end_line = ctxt->input->line;
8690 node_info.node = ret;
8691 xmlParserAddNodeInfo(ctxt, &node_info);
8692 }
8693 return;
8694 }
8695
8696 /*
8697 * Parse the content of the element:
8698 */
8699 xmlParseContent(ctxt);
8700 if (!IS_BYTE_CHAR(RAW)) {
8701 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
8702 "Premature end of data in tag %s line %d\n",
8703 name, line, NULL);
8704
8705 /*
8706 * end of parsing of this node.
8707 */
8708 nodePop(ctxt);
8709 namePop(ctxt);
8710 spacePop(ctxt);
8711 if (nsNr != ctxt->nsNr)
8712 nsPop(ctxt, ctxt->nsNr - nsNr);
8713 return;
8714 }
8715
8716 /*
8717 * parse the end of tag: '</' should be here.
8718 */
8719 if (ctxt->sax2) {
8720 xmlParseEndTag2(ctxt, prefix, URI, line, ctxt->nsNr - nsNr, tlen);
8721 namePop(ctxt);
8722 }
8723#ifdef LIBXML_SAX1_ENABLED
8724 else
8725 xmlParseEndTag1(ctxt, line);
8726#endif /* LIBXML_SAX1_ENABLED */
8727
8728 /*
8729 * Capture end position and add node
8730 */
8731 if ( ret != NULL && ctxt->record_info ) {
8732 node_info.end_pos = ctxt->input->consumed +
8733 (CUR_PTR - ctxt->input->base);
8734 node_info.end_line = ctxt->input->line;
8735 node_info.node = ret;
8736 xmlParserAddNodeInfo(ctxt, &node_info);
8737 }
8738}
8739
8740/**
8741 * xmlParseVersionNum:
8742 * @ctxt: an XML parser context
8743 *
8744 * parse the XML version value.
8745 *
8746 * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
8747 *
8748 * Returns the string giving the XML version number, or NULL
8749 */
8750xmlChar *
8751xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
8752 xmlChar *buf = NULL;
8753 int len = 0;
8754 int size = 10;
8755 xmlChar cur;
8756
8757 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
8758 if (buf == NULL) {
8759 xmlErrMemory(ctxt, NULL);
8760 return(NULL);
8761 }
8762 cur = CUR;
8763 while (((cur >= 'a') && (cur <= 'z')) ||
8764 ((cur >= 'A') && (cur <= 'Z')) ||
8765 ((cur >= '0') && (cur <= '9')) ||
8766 (cur == '_') || (cur == '.') ||
8767 (cur == ':') || (cur == '-')) {
8768 if (len + 1 >= size) {
8769 xmlChar *tmp;
8770
8771 size *= 2;
8772 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
8773 if (tmp == NULL) {
8774 xmlErrMemory(ctxt, NULL);
8775 return(NULL);
8776 }
8777 buf = tmp;
8778 }
8779 buf[len++] = cur;
8780 NEXT;
8781 cur=CUR;
8782 }
8783 buf[len] = 0;
8784 return(buf);
8785}
8786
8787/**
8788 * xmlParseVersionInfo:
8789 * @ctxt: an XML parser context
8790 *
8791 * parse the XML version.
8792 *
8793 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
8794 *
8795 * [25] Eq ::= S? '=' S?
8796 *
8797 * Returns the version string, e.g. "1.0"
8798 */
8799
8800xmlChar *
8801xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
8802 xmlChar *version = NULL;
8803
8804 if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
8805 SKIP(7);
8806 SKIP_BLANKS;
8807 if (RAW != '=') {
8808 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
8809 return(NULL);
8810 }
8811 NEXT;
8812 SKIP_BLANKS;
8813 if (RAW == '"') {
8814 NEXT;
8815 version = xmlParseVersionNum(ctxt);
8816 if (RAW != '"') {
8817 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
8818 } else
8819 NEXT;
8820 } else if (RAW == '\''){
8821 NEXT;
8822 version = xmlParseVersionNum(ctxt);
8823 if (RAW != '\'') {
8824 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
8825 } else
8826 NEXT;
8827 } else {
8828 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
8829 }
8830 }
8831 return(version);
8832}
8833
8834/**
8835 * xmlParseEncName:
8836 * @ctxt: an XML parser context
8837 *
8838 * parse the XML encoding name
8839 *
8840 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
8841 *
8842 * Returns the encoding name value or NULL
8843 */
8844xmlChar *
8845xmlParseEncName(xmlParserCtxtPtr ctxt) {
8846 xmlChar *buf = NULL;
8847 int len = 0;
8848 int size = 10;
8849 xmlChar cur;
8850
8851 cur = CUR;
8852 if (((cur >= 'a') && (cur <= 'z')) ||
8853 ((cur >= 'A') && (cur <= 'Z'))) {
8854 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
8855 if (buf == NULL) {
8856 xmlErrMemory(ctxt, NULL);
8857 return(NULL);
8858 }
8859
8860 buf[len++] = cur;
8861 NEXT;
8862 cur = CUR;
8863 while (((cur >= 'a') && (cur <= 'z')) ||
8864 ((cur >= 'A') && (cur <= 'Z')) ||
8865 ((cur >= '0') && (cur <= '9')) ||
8866 (cur == '.') || (cur == '_') ||
8867 (cur == '-')) {
8868 if (len + 1 >= size) {
8869 xmlChar *tmp;
8870
8871 size *= 2;
8872 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
8873 if (tmp == NULL) {
8874 xmlErrMemory(ctxt, NULL);
8875 xmlFree(buf);
8876 return(NULL);
8877 }
8878 buf = tmp;
8879 }
8880 buf[len++] = cur;
8881 NEXT;
8882 cur = CUR;
8883 if (cur == 0) {
8884 SHRINK;
8885 GROW;
8886 cur = CUR;
8887 }
8888 }
8889 buf[len] = 0;
8890 } else {
8891 xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
8892 }
8893 return(buf);
8894}
8895
8896/**
8897 * xmlParseEncodingDecl:
8898 * @ctxt: an XML parser context
8899 *
8900 * parse the XML encoding declaration
8901 *
8902 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
8903 *
8904 * this setups the conversion filters.
8905 *
8906 * Returns the encoding value or NULL
8907 */
8908
8909const xmlChar *
8910xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
8911 xmlChar *encoding = NULL;
8912
8913 SKIP_BLANKS;
8914 if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
8915 SKIP(8);
8916 SKIP_BLANKS;
8917 if (RAW != '=') {
8918 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
8919 return(NULL);
8920 }
8921 NEXT;
8922 SKIP_BLANKS;
8923 if (RAW == '"') {
8924 NEXT;
8925 encoding = xmlParseEncName(ctxt);
8926 if (RAW != '"') {
8927 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
8928 } else
8929 NEXT;
8930 } else if (RAW == '\''){
8931 NEXT;
8932 encoding = xmlParseEncName(ctxt);
8933 if (RAW != '\'') {
8934 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
8935 } else
8936 NEXT;
8937 } else {
8938 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
8939 }
8940 /*
8941 * UTF-16 encoding stwich has already taken place at this stage,
8942 * more over the little-endian/big-endian selection is already done
8943 */
8944 if ((encoding != NULL) &&
8945 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
8946 (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
8947 if (ctxt->encoding != NULL)
8948 xmlFree((xmlChar *) ctxt->encoding);
8949 ctxt->encoding = encoding;
8950 }
8951 /*
8952 * UTF-8 encoding is handled natively
8953 */
8954 else if ((encoding != NULL) &&
8955 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
8956 (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
8957 if (ctxt->encoding != NULL)
8958 xmlFree((xmlChar *) ctxt->encoding);
8959 ctxt->encoding = encoding;
8960 }
8961 else if (encoding != NULL) {
8962 xmlCharEncodingHandlerPtr handler;
8963
8964 if (ctxt->input->encoding != NULL)
8965 xmlFree((xmlChar *) ctxt->input->encoding);
8966 ctxt->input->encoding = encoding;
8967
8968 handler = xmlFindCharEncodingHandler((const char *) encoding);
8969 if (handler != NULL) {
8970 xmlSwitchToEncoding(ctxt, handler);
8971 } else {
8972 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
8973 "Unsupported encoding %s\n", encoding);
8974 return(NULL);
8975 }
8976 }
8977 }
8978 return(encoding);
8979}
8980
8981/**
8982 * xmlParseSDDecl:
8983 * @ctxt: an XML parser context
8984 *
8985 * parse the XML standalone declaration
8986 *
8987 * [32] SDDecl ::= S 'standalone' Eq
8988 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
8989 *
8990 * [ VC: Standalone Document Declaration ]
8991 * TODO The standalone document declaration must have the value "no"
8992 * if any external markup declarations contain declarations of:
8993 * - attributes with default values, if elements to which these
8994 * attributes apply appear in the document without specifications
8995 * of values for these attributes, or
8996 * - entities (other than amp, lt, gt, apos, quot), if references
8997 * to those entities appear in the document, or
8998 * - attributes with values subject to normalization, where the
8999 * attribute appears in the document with a value which will change
9000 * as a result of normalization, or
9001 * - element types with element content, if white space occurs directly
9002 * within any instance of those types.
9003 *
9004 * Returns:
9005 * 1 if standalone="yes"
9006 * 0 if standalone="no"
9007 * -2 if standalone attribute is missing or invalid
9008 * (A standalone value of -2 means that the XML declaration was found,
9009 * but no value was specified for the standalone attribute).
9010 */
9011
9012int
9013xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
9014 int standalone = -2;
9015
9016 SKIP_BLANKS;
9017 if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
9018 SKIP(10);
9019 SKIP_BLANKS;
9020 if (RAW != '=') {
9021 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
9022 return(standalone);
9023 }
9024 NEXT;
9025 SKIP_BLANKS;
9026 if (RAW == '\''){
9027 NEXT;
9028 if ((RAW == 'n') && (NXT(1) == 'o')) {
9029 standalone = 0;
9030 SKIP(2);
9031 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
9032 (NXT(2) == 's')) {
9033 standalone = 1;
9034 SKIP(3);
9035 } else {
9036 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
9037 }
9038 if (RAW != '\'') {
9039 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
9040 } else
9041 NEXT;
9042 } else if (RAW == '"'){
9043 NEXT;
9044 if ((RAW == 'n') && (NXT(1) == 'o')) {
9045 standalone = 0;
9046 SKIP(2);
9047 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
9048 (NXT(2) == 's')) {
9049 standalone = 1;
9050 SKIP(3);
9051 } else {
9052 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
9053 }
9054 if (RAW != '"') {
9055 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
9056 } else
9057 NEXT;
9058 } else {
9059 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
9060 }
9061 }
9062 return(standalone);
9063}
9064
9065/**
9066 * xmlParseXMLDecl:
9067 * @ctxt: an XML parser context
9068 *
9069 * parse an XML declaration header
9070 *
9071 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
9072 */
9073
9074void
9075xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
9076 xmlChar *version;
9077
9078 /*
9079 * This value for standalone indicates that the document has an
9080 * XML declaration but it does not have a standalone attribute.
9081 * It will be overwritten later if a standalone attribute is found.
9082 */
9083 ctxt->input->standalone = -2;
9084
9085 /*
9086 * We know that '<?xml' is here.
9087 */
9088 SKIP(5);
9089
9090 if (!IS_BLANK_CH(RAW)) {
9091 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9092 "Blank needed after '<?xml'\n");
9093 }
9094 SKIP_BLANKS;
9095
9096 /*
9097 * We must have the VersionInfo here.
9098 */
9099 version = xmlParseVersionInfo(ctxt);
9100 if (version == NULL) {
9101 xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
9102 } else {
9103 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
9104 /*
9105 * TODO: Blueberry should be detected here
9106 */
9107 xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
9108 "Unsupported version '%s'\n",
9109 version, NULL);
9110 }
9111 if (ctxt->version != NULL)
9112 xmlFree((void *) ctxt->version);
9113 ctxt->version = version;
9114 }
9115
9116 /*
9117 * We may have the encoding declaration
9118 */
9119 if (!IS_BLANK_CH(RAW)) {
9120 if ((RAW == '?') && (NXT(1) == '>')) {
9121 SKIP(2);
9122 return;
9123 }
9124 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
9125 }
9126 xmlParseEncodingDecl(ctxt);
9127 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
9128 /*
9129 * The XML REC instructs us to stop parsing right here
9130 */
9131 return;
9132 }
9133
9134 /*
9135 * We may have the standalone status.
9136 */
9137 if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
9138 if ((RAW == '?') && (NXT(1) == '>')) {
9139 SKIP(2);
9140 return;
9141 }
9142 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
9143 }
9144 SKIP_BLANKS;
9145 ctxt->input->standalone = xmlParseSDDecl(ctxt);
9146
9147 SKIP_BLANKS;
9148 if ((RAW == '?') && (NXT(1) == '>')) {
9149 SKIP(2);
9150 } else if (RAW == '>') {
9151 /* Deprecated old WD ... */
9152 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
9153 NEXT;
9154 } else {
9155 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
9156 MOVETO_ENDTAG(CUR_PTR);
9157 NEXT;
9158 }
9159}
9160
9161/**
9162 * xmlParseMisc:
9163 * @ctxt: an XML parser context
9164 *
9165 * parse an XML Misc* optional field.
9166 *
9167 * [27] Misc ::= Comment | PI | S
9168 */
9169
9170void
9171xmlParseMisc(xmlParserCtxtPtr ctxt) {
9172 while (((RAW == '<') && (NXT(1) == '?')) ||
9173 (CMP4(CUR_PTR, '<', '!', '-', '-')) ||
9174 IS_BLANK_CH(CUR)) {
9175 if ((RAW == '<') && (NXT(1) == '?')) {
9176 xmlParsePI(ctxt);
9177 } else if (IS_BLANK_CH(CUR)) {
9178 NEXT;
9179 } else
9180 xmlParseComment(ctxt);
9181 }
9182}
9183
9184/**
9185 * xmlParseDocument:
9186 * @ctxt: an XML parser context
9187 *
9188 * parse an XML document (and build a tree if using the standard SAX
9189 * interface).
9190 *
9191 * [1] document ::= prolog element Misc*
9192 *
9193 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
9194 *
9195 * Returns 0, -1 in case of error. the parser context is augmented
9196 * as a result of the parsing.
9197 */
9198
9199int
9200xmlParseDocument(xmlParserCtxtPtr ctxt) {
9201 xmlChar start[4];
9202 xmlCharEncoding enc;
9203
9204 xmlInitParser();
9205
9206 if ((ctxt == NULL) || (ctxt->input == NULL))
9207 return(-1);
9208
9209 GROW;
9210
9211 /*
9212 * SAX: detecting the level.
9213 */
9214 xmlDetectSAX2(ctxt);
9215
9216 /*
9217 * SAX: beginning of the document processing.
9218 */
9219 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9220 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
9221
9222 if ((ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) &&
9223 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
9224 /*
9225 * Get the 4 first bytes and decode the charset
9226 * if enc != XML_CHAR_ENCODING_NONE
9227 * plug some encoding conversion routines.
9228 */
9229 start[0] = RAW;
9230 start[1] = NXT(1);
9231 start[2] = NXT(2);
9232 start[3] = NXT(3);
9233 enc = xmlDetectCharEncoding(&start[0], 4);
9234 if (enc != XML_CHAR_ENCODING_NONE) {
9235 xmlSwitchEncoding(ctxt, enc);
9236 }
9237 }
9238
9239
9240 if (CUR == 0) {
9241 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
9242 }
9243
9244 /*
9245 * Check for the XMLDecl in the Prolog.
9246 */
9247 GROW;
9248 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
9249
9250 /*
9251 * Note that we will switch encoding on the fly.
9252 */
9253 xmlParseXMLDecl(ctxt);
9254 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
9255 /*
9256 * The XML REC instructs us to stop parsing right here
9257 */
9258 return(-1);
9259 }
9260 ctxt->standalone = ctxt->input->standalone;
9261 SKIP_BLANKS;
9262 } else {
9263 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
9264 }
9265 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
9266 ctxt->sax->startDocument(ctxt->userData);
9267
9268 /*
9269 * The Misc part of the Prolog
9270 */
9271 GROW;
9272 xmlParseMisc(ctxt);
9273
9274 /*
9275 * Then possibly doc type declaration(s) and more Misc
9276 * (doctypedecl Misc*)?
9277 */
9278 GROW;
9279 if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
9280
9281 ctxt->inSubset = 1;
9282 xmlParseDocTypeDecl(ctxt);
9283 if (RAW == '[') {
9284 ctxt->instate = XML_PARSER_DTD;
9285 xmlParseInternalSubset(ctxt);
9286 }
9287
9288 /*
9289 * Create and update the external subset.
9290 */
9291 ctxt->inSubset = 2;
9292 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
9293 (!ctxt->disableSAX))
9294 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
9295 ctxt->extSubSystem, ctxt->extSubURI);
9296 ctxt->inSubset = 0;
9297
9298
9299 ctxt->instate = XML_PARSER_PROLOG;
9300 xmlParseMisc(ctxt);
9301 }
9302
9303 /*
9304 * Time to start parsing the tree itself
9305 */
9306 GROW;
9307 if (RAW != '<') {
9308 xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
9309 "Start tag expected, '<' not found\n");
9310 } else {
9311 ctxt->instate = XML_PARSER_CONTENT;
9312 xmlParseElement(ctxt);
9313 ctxt->instate = XML_PARSER_EPILOG;
9314
9315
9316 /*
9317 * The Misc part at the end
9318 */
9319 xmlParseMisc(ctxt);
9320
9321 if (RAW != 0) {
9322 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
9323 }
9324 ctxt->instate = XML_PARSER_EOF;
9325 }
9326
9327 /*
9328 * SAX: end of the document processing.
9329 */
9330 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
9331 ctxt->sax->endDocument(ctxt->userData);
9332
9333 /*
9334 * Remove locally kept entity definitions if the tree was not built
9335 */
9336 if ((ctxt->myDoc != NULL) &&
9337 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
9338 xmlFreeDoc(ctxt->myDoc);
9339 ctxt->myDoc = NULL;
9340 }
9341
9342 if (! ctxt->wellFormed) {
9343 ctxt->valid = 0;
9344 return(-1);
9345 }
9346 return(0);
9347}
9348
9349/**
9350 * xmlParseExtParsedEnt:
9351 * @ctxt: an XML parser context
9352 *
9353 * parse a general parsed entity
9354 * An external general parsed entity is well-formed if it matches the
9355 * production labeled extParsedEnt.
9356 *
9357 * [78] extParsedEnt ::= TextDecl? content
9358 *
9359 * Returns 0, -1 in case of error. the parser context is augmented
9360 * as a result of the parsing.
9361 */
9362
9363int
9364xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
9365 xmlChar start[4];
9366 xmlCharEncoding enc;
9367
9368 if ((ctxt == NULL) || (ctxt->input == NULL))
9369 return(-1);
9370
9371 xmlDefaultSAXHandlerInit();
9372
9373 xmlDetectSAX2(ctxt);
9374
9375 GROW;
9376
9377 /*
9378 * SAX: beginning of the document processing.
9379 */
9380 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9381 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
9382
9383 /*
9384 * Get the 4 first bytes and decode the charset
9385 * if enc != XML_CHAR_ENCODING_NONE
9386 * plug some encoding conversion routines.
9387 */
9388 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
9389 start[0] = RAW;
9390 start[1] = NXT(1);
9391 start[2] = NXT(2);
9392 start[3] = NXT(3);
9393 enc = xmlDetectCharEncoding(start, 4);
9394 if (enc != XML_CHAR_ENCODING_NONE) {
9395 xmlSwitchEncoding(ctxt, enc);
9396 }
9397 }
9398
9399
9400 if (CUR == 0) {
9401 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
9402 }
9403
9404 /*
9405 * Check for the XMLDecl in the Prolog.
9406 */
9407 GROW;
9408 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
9409
9410 /*
9411 * Note that we will switch encoding on the fly.
9412 */
9413 xmlParseXMLDecl(ctxt);
9414 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
9415 /*
9416 * The XML REC instructs us to stop parsing right here
9417 */
9418 return(-1);
9419 }
9420 SKIP_BLANKS;
9421 } else {
9422 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
9423 }
9424 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
9425 ctxt->sax->startDocument(ctxt->userData);
9426
9427 /*
9428 * Doing validity checking on chunk doesn't make sense
9429 */
9430 ctxt->instate = XML_PARSER_CONTENT;
9431 ctxt->validate = 0;
9432 ctxt->loadsubset = 0;
9433 ctxt->depth = 0;
9434
9435 xmlParseContent(ctxt);
9436
9437 if ((RAW == '<') && (NXT(1) == '/')) {
9438 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
9439 } else if (RAW != 0) {
9440 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
9441 }
9442
9443 /*
9444 * SAX: end of the document processing.
9445 */
9446 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
9447 ctxt->sax->endDocument(ctxt->userData);
9448
9449 if (! ctxt->wellFormed) return(-1);
9450 return(0);
9451}
9452
9453#ifdef LIBXML_PUSH_ENABLED
9454/************************************************************************
9455 * *
9456 * Progressive parsing interfaces *
9457 * *
9458 ************************************************************************/
9459
9460/**
9461 * xmlParseLookupSequence:
9462 * @ctxt: an XML parser context
9463 * @first: the first char to lookup
9464 * @next: the next char to lookup or zero
9465 * @third: the next char to lookup or zero
9466 *
9467 * Try to find if a sequence (first, next, third) or just (first next) or
9468 * (first) is available in the input stream.
9469 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
9470 * to avoid rescanning sequences of bytes, it DOES change the state of the
9471 * parser, do not use liberally.
9472 *
9473 * Returns the index to the current parsing point if the full sequence
9474 * is available, -1 otherwise.
9475 */
9476static int
9477xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
9478 xmlChar next, xmlChar third) {
9479 int base, len;
9480 xmlParserInputPtr in;
9481 const xmlChar *buf;
9482
9483 in = ctxt->input;
9484 if (in == NULL) return(-1);
9485 base = in->cur - in->base;
9486 if (base < 0) return(-1);
9487 if (ctxt->checkIndex > base)
9488 base = ctxt->checkIndex;
9489 if (in->buf == NULL) {
9490 buf = in->base;
9491 len = in->length;
9492 } else {
9493 buf = in->buf->buffer->content;
9494 len = in->buf->buffer->use;
9495 }
9496 /* take into account the sequence length */
9497 if (third) len -= 2;
9498 else if (next) len --;
9499 for (;base < len;base++) {
9500 if (buf[base] == first) {
9501 if (third != 0) {
9502 if ((buf[base + 1] != next) ||
9503 (buf[base + 2] != third)) continue;
9504 } else if (next != 0) {
9505 if (buf[base + 1] != next) continue;
9506 }
9507 ctxt->checkIndex = 0;
9508#ifdef DEBUG_PUSH
9509 if (next == 0)
9510 xmlGenericError(xmlGenericErrorContext,
9511 "PP: lookup '%c' found at %d\n",
9512 first, base);
9513 else if (third == 0)
9514 xmlGenericError(xmlGenericErrorContext,
9515 "PP: lookup '%c%c' found at %d\n",
9516 first, next, base);
9517 else
9518 xmlGenericError(xmlGenericErrorContext,
9519 "PP: lookup '%c%c%c' found at %d\n",
9520 first, next, third, base);
9521#endif
9522 return(base - (in->cur - in->base));
9523 }
9524 }
9525 ctxt->checkIndex = base;
9526#ifdef DEBUG_PUSH
9527 if (next == 0)
9528 xmlGenericError(xmlGenericErrorContext,
9529 "PP: lookup '%c' failed\n", first);
9530 else if (third == 0)
9531 xmlGenericError(xmlGenericErrorContext,
9532 "PP: lookup '%c%c' failed\n", first, next);
9533 else
9534 xmlGenericError(xmlGenericErrorContext,
9535 "PP: lookup '%c%c%c' failed\n", first, next, third);
9536#endif
9537 return(-1);
9538}
9539
9540/**
9541 * xmlParseGetLasts:
9542 * @ctxt: an XML parser context
9543 * @lastlt: pointer to store the last '<' from the input
9544 * @lastgt: pointer to store the last '>' from the input
9545 *
9546 * Lookup the last < and > in the current chunk
9547 */
9548static void
9549xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt,
9550 const xmlChar **lastgt) {
9551 const xmlChar *tmp;
9552
9553 if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) {
9554 xmlGenericError(xmlGenericErrorContext,
9555 "Internal error: xmlParseGetLasts\n");
9556 return;
9557 }
9558 if ((ctxt->progressive != 0) && (ctxt->inputNr == 1)) {
9559 tmp = ctxt->input->end;
9560 tmp--;
9561 while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--;
9562 if (tmp < ctxt->input->base) {
9563 *lastlt = NULL;
9564 *lastgt = NULL;
9565 } else {
9566 *lastlt = tmp;
9567 tmp++;
9568 while ((tmp < ctxt->input->end) && (*tmp != '>')) {
9569 if (*tmp == '\'') {
9570 tmp++;
9571 while ((tmp < ctxt->input->end) && (*tmp != '\'')) tmp++;
9572 if (tmp < ctxt->input->end) tmp++;
9573 } else if (*tmp == '"') {
9574 tmp++;
9575 while ((tmp < ctxt->input->end) && (*tmp != '"')) tmp++;
9576 if (tmp < ctxt->input->end) tmp++;
9577 } else
9578 tmp++;
9579 }
9580 if (tmp < ctxt->input->end)
9581 *lastgt = tmp;
9582 else {
9583 tmp = *lastlt;
9584 tmp--;
9585 while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--;
9586 if (tmp >= ctxt->input->base)
9587 *lastgt = tmp;
9588 else
9589 *lastgt = NULL;
9590 }
9591 }
9592 } else {
9593 *lastlt = NULL;
9594 *lastgt = NULL;
9595 }
9596}
9597/**
9598 * xmlCheckCdataPush:
9599 * @cur: pointer to the bock of characters
9600 * @len: length of the block in bytes
9601 *
9602 * Check that the block of characters is okay as SCdata content [20]
9603 *
9604 * Returns the number of bytes to pass if okay, a negative index where an
9605 * UTF-8 error occured otherwise
9606 */
9607static int
9608xmlCheckCdataPush(const xmlChar *utf, int len) {
9609 int ix;
9610 unsigned char c;
9611 int codepoint;
9612
9613 if ((utf == NULL) || (len <= 0))
9614 return(0);
9615
9616 for (ix = 0; ix < len;) { /* string is 0-terminated */
9617 c = utf[ix];
9618 if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */
9619 if (c >= 0x20)
9620 ix++;
9621 else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
9622 ix++;
9623 else
9624 return(-ix);
9625 } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
9626 if (ix + 2 > len) return(ix);
9627 if ((utf[ix+1] & 0xc0 ) != 0x80)
9628 return(-ix);
9629 codepoint = (utf[ix] & 0x1f) << 6;
9630 codepoint |= utf[ix+1] & 0x3f;
9631 if (!xmlIsCharQ(codepoint))
9632 return(-ix);
9633 ix += 2;
9634 } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
9635 if (ix + 3 > len) return(ix);
9636 if (((utf[ix+1] & 0xc0) != 0x80) ||
9637 ((utf[ix+2] & 0xc0) != 0x80))
9638 return(-ix);
9639 codepoint = (utf[ix] & 0xf) << 12;
9640 codepoint |= (utf[ix+1] & 0x3f) << 6;
9641 codepoint |= utf[ix+2] & 0x3f;
9642 if (!xmlIsCharQ(codepoint))
9643 return(-ix);
9644 ix += 3;
9645 } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
9646 if (ix + 4 > len) return(ix);
9647 if (((utf[ix+1] & 0xc0) != 0x80) ||
9648 ((utf[ix+2] & 0xc0) != 0x80) ||
9649 ((utf[ix+3] & 0xc0) != 0x80))
9650 return(-ix);
9651 codepoint = (utf[ix] & 0x7) << 18;
9652 codepoint |= (utf[ix+1] & 0x3f) << 12;
9653 codepoint |= (utf[ix+2] & 0x3f) << 6;
9654 codepoint |= utf[ix+3] & 0x3f;
9655 if (!xmlIsCharQ(codepoint))
9656 return(-ix);
9657 ix += 4;
9658 } else /* unknown encoding */
9659 return(-ix);
9660 }
9661 return(ix);
9662}
9663
9664/**
9665 * xmlParseTryOrFinish:
9666 * @ctxt: an XML parser context
9667 * @terminate: last chunk indicator
9668 *
9669 * Try to progress on parsing
9670 *
9671 * Returns zero if no parsing was possible
9672 */
9673static int
9674xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
9675 int ret = 0;
9676 int avail, tlen;
9677 xmlChar cur, next;
9678 const xmlChar *lastlt, *lastgt;
9679
9680 if (ctxt->input == NULL)
9681 return(0);
9682
9683#ifdef DEBUG_PUSH
9684 switch (ctxt->instate) {
9685 case XML_PARSER_EOF:
9686 xmlGenericError(xmlGenericErrorContext,
9687 "PP: try EOF\n"); break;
9688 case XML_PARSER_START:
9689 xmlGenericError(xmlGenericErrorContext,
9690 "PP: try START\n"); break;
9691 case XML_PARSER_MISC:
9692 xmlGenericError(xmlGenericErrorContext,
9693 "PP: try MISC\n");break;
9694 case XML_PARSER_COMMENT:
9695 xmlGenericError(xmlGenericErrorContext,
9696 "PP: try COMMENT\n");break;
9697 case XML_PARSER_PROLOG:
9698 xmlGenericError(xmlGenericErrorContext,
9699 "PP: try PROLOG\n");break;
9700 case XML_PARSER_START_TAG:
9701 xmlGenericError(xmlGenericErrorContext,
9702 "PP: try START_TAG\n");break;
9703 case XML_PARSER_CONTENT:
9704 xmlGenericError(xmlGenericErrorContext,
9705 "PP: try CONTENT\n");break;
9706 case XML_PARSER_CDATA_SECTION:
9707 xmlGenericError(xmlGenericErrorContext,
9708 "PP: try CDATA_SECTION\n");break;
9709 case XML_PARSER_END_TAG:
9710 xmlGenericError(xmlGenericErrorContext,
9711 "PP: try END_TAG\n");break;
9712 case XML_PARSER_ENTITY_DECL:
9713 xmlGenericError(xmlGenericErrorContext,
9714 "PP: try ENTITY_DECL\n");break;
9715 case XML_PARSER_ENTITY_VALUE:
9716 xmlGenericError(xmlGenericErrorContext,
9717 "PP: try ENTITY_VALUE\n");break;
9718 case XML_PARSER_ATTRIBUTE_VALUE:
9719 xmlGenericError(xmlGenericErrorContext,
9720 "PP: try ATTRIBUTE_VALUE\n");break;
9721 case XML_PARSER_DTD:
9722 xmlGenericError(xmlGenericErrorContext,
9723 "PP: try DTD\n");break;
9724 case XML_PARSER_EPILOG:
9725 xmlGenericError(xmlGenericErrorContext,
9726 "PP: try EPILOG\n");break;
9727 case XML_PARSER_PI:
9728 xmlGenericError(xmlGenericErrorContext,
9729 "PP: try PI\n");break;
9730 case XML_PARSER_IGNORE:
9731 xmlGenericError(xmlGenericErrorContext,
9732 "PP: try IGNORE\n");break;
9733 }
9734#endif
9735
9736 if ((ctxt->input != NULL) &&
9737 (ctxt->input->cur - ctxt->input->base > 4096)) {
9738 xmlSHRINK(ctxt);
9739 ctxt->checkIndex = 0;
9740 }
9741 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
9742
9743 while (1) {
9744 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
9745 return(0);
9746
9747
9748 /*
9749 * Pop-up of finished entities.
9750 */
9751 while ((RAW == 0) && (ctxt->inputNr > 1))
9752 xmlPopInput(ctxt);
9753
9754 if (ctxt->input == NULL) break;
9755 if (ctxt->input->buf == NULL)
9756 avail = ctxt->input->length -
9757 (ctxt->input->cur - ctxt->input->base);
9758 else {
9759 /*
9760 * If we are operating on converted input, try to flush
9761 * remainng chars to avoid them stalling in the non-converted
9762 * buffer.
9763 */
9764 if ((ctxt->input->buf->raw != NULL) &&
9765 (ctxt->input->buf->raw->use > 0)) {
9766 int base = ctxt->input->base -
9767 ctxt->input->buf->buffer->content;
9768 int current = ctxt->input->cur - ctxt->input->base;
9769
9770 xmlParserInputBufferPush(ctxt->input->buf, 0, "");
9771 ctxt->input->base = ctxt->input->buf->buffer->content + base;
9772 ctxt->input->cur = ctxt->input->base + current;
9773 ctxt->input->end =
9774 &ctxt->input->buf->buffer->content[
9775 ctxt->input->buf->buffer->use];
9776 }
9777 avail = ctxt->input->buf->buffer->use -
9778 (ctxt->input->cur - ctxt->input->base);
9779 }
9780 if (avail < 1)
9781 goto done;
9782 switch (ctxt->instate) {
9783 case XML_PARSER_EOF:
9784 /*
9785 * Document parsing is done !
9786 */
9787 goto done;
9788 case XML_PARSER_START:
9789 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
9790 xmlChar start[4];
9791 xmlCharEncoding enc;
9792
9793 /*
9794 * Very first chars read from the document flow.
9795 */
9796 if (avail < 4)
9797 goto done;
9798
9799 /*
9800 * Get the 4 first bytes and decode the charset
9801 * if enc != XML_CHAR_ENCODING_NONE
9802 * plug some encoding conversion routines,
9803 * else xmlSwitchEncoding will set to (default)
9804 * UTF8.
9805 */
9806 start[0] = RAW;
9807 start[1] = NXT(1);
9808 start[2] = NXT(2);
9809 start[3] = NXT(3);
9810 enc = xmlDetectCharEncoding(start, 4);
9811 xmlSwitchEncoding(ctxt, enc);
9812 break;
9813 }
9814
9815 if (avail < 2)
9816 goto done;
9817 cur = ctxt->input->cur[0];
9818 next = ctxt->input->cur[1];
9819 if (cur == 0) {
9820 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9821 ctxt->sax->setDocumentLocator(ctxt->userData,
9822 &xmlDefaultSAXLocator);
9823 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
9824 ctxt->instate = XML_PARSER_EOF;
9825#ifdef DEBUG_PUSH
9826 xmlGenericError(xmlGenericErrorContext,
9827 "PP: entering EOF\n");
9828#endif
9829 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
9830 ctxt->sax->endDocument(ctxt->userData);
9831 goto done;
9832 }
9833 if ((cur == '<') && (next == '?')) {
9834 /* PI or XML decl */
9835 if (avail < 5) return(ret);
9836 if ((!terminate) &&
9837 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9838 return(ret);
9839 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9840 ctxt->sax->setDocumentLocator(ctxt->userData,
9841 &xmlDefaultSAXLocator);
9842 if ((ctxt->input->cur[2] == 'x') &&
9843 (ctxt->input->cur[3] == 'm') &&
9844 (ctxt->input->cur[4] == 'l') &&
9845 (IS_BLANK_CH(ctxt->input->cur[5]))) {
9846 ret += 5;
9847#ifdef DEBUG_PUSH
9848 xmlGenericError(xmlGenericErrorContext,
9849 "PP: Parsing XML Decl\n");
9850#endif
9851 xmlParseXMLDecl(ctxt);
9852 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
9853 /*
9854 * The XML REC instructs us to stop parsing right
9855 * here
9856 */
9857 ctxt->instate = XML_PARSER_EOF;
9858 return(0);
9859 }
9860 ctxt->standalone = ctxt->input->standalone;
9861 if ((ctxt->encoding == NULL) &&
9862 (ctxt->input->encoding != NULL))
9863 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
9864 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
9865 (!ctxt->disableSAX))
9866 ctxt->sax->startDocument(ctxt->userData);
9867 ctxt->instate = XML_PARSER_MISC;
9868#ifdef DEBUG_PUSH
9869 xmlGenericError(xmlGenericErrorContext,
9870 "PP: entering MISC\n");
9871#endif
9872 } else {
9873 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
9874 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
9875 (!ctxt->disableSAX))
9876 ctxt->sax->startDocument(ctxt->userData);
9877 ctxt->instate = XML_PARSER_MISC;
9878#ifdef DEBUG_PUSH
9879 xmlGenericError(xmlGenericErrorContext,
9880 "PP: entering MISC\n");
9881#endif
9882 }
9883 } else {
9884 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9885 ctxt->sax->setDocumentLocator(ctxt->userData,
9886 &xmlDefaultSAXLocator);
9887 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
9888 if (ctxt->version == NULL) {
9889 xmlErrMemory(ctxt, NULL);
9890 break;
9891 }
9892 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
9893 (!ctxt->disableSAX))
9894 ctxt->sax->startDocument(ctxt->userData);
9895 ctxt->instate = XML_PARSER_MISC;
9896#ifdef DEBUG_PUSH
9897 xmlGenericError(xmlGenericErrorContext,
9898 "PP: entering MISC\n");
9899#endif
9900 }
9901 break;
9902 case XML_PARSER_START_TAG: {
9903 const xmlChar *name;
9904 const xmlChar *prefix;
9905 const xmlChar *URI;
9906 int nsNr = ctxt->nsNr;
9907
9908 if ((avail < 2) && (ctxt->inputNr == 1))
9909 goto done;
9910 cur = ctxt->input->cur[0];
9911 if (cur != '<') {
9912 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
9913 ctxt->instate = XML_PARSER_EOF;
9914 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
9915 ctxt->sax->endDocument(ctxt->userData);
9916 goto done;
9917 }
9918 if (!terminate) {
9919 if (ctxt->progressive) {
9920 /* > can be found unescaped in attribute values */
9921 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
9922 goto done;
9923 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
9924 goto done;
9925 }
9926 }
9927 if (ctxt->spaceNr == 0)
9928 spacePush(ctxt, -1);
9929 else if (*ctxt->space == -2)
9930 spacePush(ctxt, -1);
9931 else
9932 spacePush(ctxt, *ctxt->space);
9933#ifdef LIBXML_SAX1_ENABLED
9934 if (ctxt->sax2)
9935#endif /* LIBXML_SAX1_ENABLED */
9936 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
9937#ifdef LIBXML_SAX1_ENABLED
9938 else
9939 name = xmlParseStartTag(ctxt);
9940#endif /* LIBXML_SAX1_ENABLED */
9941 if (name == NULL) {
9942 spacePop(ctxt);
9943 ctxt->instate = XML_PARSER_EOF;
9944 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
9945 ctxt->sax->endDocument(ctxt->userData);
9946 goto done;
9947 }
9948#ifdef LIBXML_VALID_ENABLED
9949 /*
9950 * [ VC: Root Element Type ]
9951 * The Name in the document type declaration must match
9952 * the element type of the root element.
9953 */
9954 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
9955 ctxt->node && (ctxt->node == ctxt->myDoc->children))
9956 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
9957#endif /* LIBXML_VALID_ENABLED */
9958
9959 /*
9960 * Check for an Empty Element.
9961 */
9962 if ((RAW == '/') && (NXT(1) == '>')) {
9963 SKIP(2);
9964
9965 if (ctxt->sax2) {
9966 if ((ctxt->sax != NULL) &&
9967 (ctxt->sax->endElementNs != NULL) &&
9968 (!ctxt->disableSAX))
9969 ctxt->sax->endElementNs(ctxt->userData, name,
9970 prefix, URI);
9971 if (ctxt->nsNr - nsNr > 0)
9972 nsPop(ctxt, ctxt->nsNr - nsNr);
9973#ifdef LIBXML_SAX1_ENABLED
9974 } else {
9975 if ((ctxt->sax != NULL) &&
9976 (ctxt->sax->endElement != NULL) &&
9977 (!ctxt->disableSAX))
9978 ctxt->sax->endElement(ctxt->userData, name);
9979#endif /* LIBXML_SAX1_ENABLED */
9980 }
9981 spacePop(ctxt);
9982 if (ctxt->nameNr == 0) {
9983 ctxt->instate = XML_PARSER_EPILOG;
9984 } else {
9985 ctxt->instate = XML_PARSER_CONTENT;
9986 }
9987 break;
9988 }
9989 if (RAW == '>') {
9990 NEXT;
9991 } else {
9992 xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
9993 "Couldn't find end of Start Tag %s\n",
9994 name);
9995 nodePop(ctxt);
9996 spacePop(ctxt);
9997 }
9998 if (ctxt->sax2)
9999 nameNsPush(ctxt, name, prefix, URI, ctxt->nsNr - nsNr);
10000#ifdef LIBXML_SAX1_ENABLED
10001 else
10002 namePush(ctxt, name);
10003#endif /* LIBXML_SAX1_ENABLED */
10004
10005 ctxt->instate = XML_PARSER_CONTENT;
10006 break;
10007 }
10008 case XML_PARSER_CONTENT: {
10009 const xmlChar *test;
10010 unsigned int cons;
10011 if ((avail < 2) && (ctxt->inputNr == 1))
10012 goto done;
10013 cur = ctxt->input->cur[0];
10014 next = ctxt->input->cur[1];
10015
10016 test = CUR_PTR;
10017 cons = ctxt->input->consumed;
10018 if ((cur == '<') && (next == '/')) {
10019 ctxt->instate = XML_PARSER_END_TAG;
10020 break;
10021 } else if ((cur == '<') && (next == '?')) {
10022 if ((!terminate) &&
10023 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10024 goto done;
10025 xmlParsePI(ctxt);
10026 } else if ((cur == '<') && (next != '!')) {
10027 ctxt->instate = XML_PARSER_START_TAG;
10028 break;
10029 } else if ((cur == '<') && (next == '!') &&
10030 (ctxt->input->cur[2] == '-') &&
10031 (ctxt->input->cur[3] == '-')) {
10032 int term;
10033
10034 if (avail < 4)
10035 goto done;
10036 ctxt->input->cur += 4;
10037 term = xmlParseLookupSequence(ctxt, '-', '-', '>');
10038 ctxt->input->cur -= 4;
10039 if ((!terminate) && (term < 0))
10040 goto done;
10041 xmlParseComment(ctxt);
10042 ctxt->instate = XML_PARSER_CONTENT;
10043 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
10044 (ctxt->input->cur[2] == '[') &&
10045 (ctxt->input->cur[3] == 'C') &&
10046 (ctxt->input->cur[4] == 'D') &&
10047 (ctxt->input->cur[5] == 'A') &&
10048 (ctxt->input->cur[6] == 'T') &&
10049 (ctxt->input->cur[7] == 'A') &&
10050 (ctxt->input->cur[8] == '[')) {
10051 SKIP(9);
10052 ctxt->instate = XML_PARSER_CDATA_SECTION;
10053 break;
10054 } else if ((cur == '<') && (next == '!') &&
10055 (avail < 9)) {
10056 goto done;
10057 } else if (cur == '&') {
10058 if ((!terminate) &&
10059 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
10060 goto done;
10061 xmlParseReference(ctxt);
10062 } else {
10063 /* TODO Avoid the extra copy, handle directly !!! */
10064 /*
10065 * Goal of the following test is:
10066 * - minimize calls to the SAX 'character' callback
10067 * when they are mergeable
10068 * - handle an problem for isBlank when we only parse
10069 * a sequence of blank chars and the next one is
10070 * not available to check against '<' presence.
10071 * - tries to homogenize the differences in SAX
10072 * callbacks between the push and pull versions
10073 * of the parser.
10074 */
10075 if ((ctxt->inputNr == 1) &&
10076 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
10077 if (!terminate) {
10078 if (ctxt->progressive) {
10079 if ((lastlt == NULL) ||
10080 (ctxt->input->cur > lastlt))
10081 goto done;
10082 } else if (xmlParseLookupSequence(ctxt,
10083 '<', 0, 0) < 0) {
10084 goto done;
10085 }
10086 }
10087 }
10088 ctxt->checkIndex = 0;
10089 xmlParseCharData(ctxt, 0);
10090 }
10091 /*
10092 * Pop-up of finished entities.
10093 */
10094 while ((RAW == 0) && (ctxt->inputNr > 1))
10095 xmlPopInput(ctxt);
10096 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
10097 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
10098 "detected an error in element content\n");
10099 ctxt->instate = XML_PARSER_EOF;
10100 break;
10101 }
10102 break;
10103 }
10104 case XML_PARSER_END_TAG:
10105 if (avail < 2)
10106 goto done;
10107 if (!terminate) {
10108 if (ctxt->progressive) {
10109 /* > can be found unescaped in attribute values */
10110 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
10111 goto done;
10112 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
10113 goto done;
10114 }
10115 }
10116 if (ctxt->sax2) {
10117 xmlParseEndTag2(ctxt,
10118 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 3],
10119 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 2], 0,
10120 (int) (long) ctxt->pushTab[ctxt->nameNr * 3 - 1], 0);
10121 nameNsPop(ctxt);
10122 }
10123#ifdef LIBXML_SAX1_ENABLED
10124 else
10125 xmlParseEndTag1(ctxt, 0);
10126#endif /* LIBXML_SAX1_ENABLED */
10127 if (ctxt->nameNr == 0) {
10128 ctxt->instate = XML_PARSER_EPILOG;
10129 } else {
10130 ctxt->instate = XML_PARSER_CONTENT;
10131 }
10132 break;
10133 case XML_PARSER_CDATA_SECTION: {
10134 /*
10135 * The Push mode need to have the SAX callback for
10136 * cdataBlock merge back contiguous callbacks.
10137 */
10138 int base;
10139
10140 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
10141 if (base < 0) {
10142 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
10143 int tmp;
10144
10145 tmp = xmlCheckCdataPush(ctxt->input->cur,
10146 XML_PARSER_BIG_BUFFER_SIZE);
10147 if (tmp < 0) {
10148 tmp = -tmp;
10149 ctxt->input->cur += tmp;
10150 goto encoding_error;
10151 }
10152 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
10153 if (ctxt->sax->cdataBlock != NULL)
10154 ctxt->sax->cdataBlock(ctxt->userData,
10155 ctxt->input->cur, tmp);
10156 else if (ctxt->sax->characters != NULL)
10157 ctxt->sax->characters(ctxt->userData,
10158 ctxt->input->cur, tmp);
10159 }
10160 SKIPL(tmp);
10161 ctxt->checkIndex = 0;
10162 }
10163 goto done;
10164 } else {
10165 int tmp;
10166
10167 tmp = xmlCheckCdataPush(ctxt->input->cur, base);
10168 if ((tmp < 0) || (tmp != base)) {
10169 tmp = -tmp;
10170 ctxt->input->cur += tmp;
10171 goto encoding_error;
10172 }
10173 if ((ctxt->sax != NULL) && (base > 0) &&
10174 (!ctxt->disableSAX)) {
10175 if (ctxt->sax->cdataBlock != NULL)
10176 ctxt->sax->cdataBlock(ctxt->userData,
10177 ctxt->input->cur, base);
10178 else if (ctxt->sax->characters != NULL)
10179 ctxt->sax->characters(ctxt->userData,
10180 ctxt->input->cur, base);
10181 }
10182 SKIPL(base + 3);
10183 ctxt->checkIndex = 0;
10184 ctxt->instate = XML_PARSER_CONTENT;
10185#ifdef DEBUG_PUSH
10186 xmlGenericError(xmlGenericErrorContext,
10187 "PP: entering CONTENT\n");
10188#endif
10189 }
10190 break;
10191 }
10192 case XML_PARSER_MISC:
10193 SKIP_BLANKS;
10194 if (ctxt->input->buf == NULL)
10195 avail = ctxt->input->length -
10196 (ctxt->input->cur - ctxt->input->base);
10197 else
10198 avail = ctxt->input->buf->buffer->use -
10199 (ctxt->input->cur - ctxt->input->base);
10200 if (avail < 2)
10201 goto done;
10202 cur = ctxt->input->cur[0];
10203 next = ctxt->input->cur[1];
10204 if ((cur == '<') && (next == '?')) {
10205 if ((!terminate) &&
10206 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10207 goto done;
10208#ifdef DEBUG_PUSH
10209 xmlGenericError(xmlGenericErrorContext,
10210 "PP: Parsing PI\n");
10211#endif
10212 xmlParsePI(ctxt);
10213 ctxt->checkIndex = 0;
10214 } else if ((cur == '<') && (next == '!') &&
10215 (ctxt->input->cur[2] == '-') &&
10216 (ctxt->input->cur[3] == '-')) {
10217 if ((!terminate) &&
10218 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
10219 goto done;
10220#ifdef DEBUG_PUSH
10221 xmlGenericError(xmlGenericErrorContext,
10222 "PP: Parsing Comment\n");
10223#endif
10224 xmlParseComment(ctxt);
10225 ctxt->instate = XML_PARSER_MISC;
10226 ctxt->checkIndex = 0;
10227 } else if ((cur == '<') && (next == '!') &&
10228 (ctxt->input->cur[2] == 'D') &&
10229 (ctxt->input->cur[3] == 'O') &&
10230 (ctxt->input->cur[4] == 'C') &&
10231 (ctxt->input->cur[5] == 'T') &&
10232 (ctxt->input->cur[6] == 'Y') &&
10233 (ctxt->input->cur[7] == 'P') &&
10234 (ctxt->input->cur[8] == 'E')) {
10235 if ((!terminate) &&
10236 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
10237 goto done;
10238#ifdef DEBUG_PUSH
10239 xmlGenericError(xmlGenericErrorContext,
10240 "PP: Parsing internal subset\n");
10241#endif
10242 ctxt->inSubset = 1;
10243 xmlParseDocTypeDecl(ctxt);
10244 if (RAW == '[') {
10245 ctxt->instate = XML_PARSER_DTD;
10246#ifdef DEBUG_PUSH
10247 xmlGenericError(xmlGenericErrorContext,
10248 "PP: entering DTD\n");
10249#endif
10250 } else {
10251 /*
10252 * Create and update the external subset.
10253 */
10254 ctxt->inSubset = 2;
10255 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
10256 (ctxt->sax->externalSubset != NULL))
10257 ctxt->sax->externalSubset(ctxt->userData,
10258 ctxt->intSubName, ctxt->extSubSystem,
10259 ctxt->extSubURI);
10260 ctxt->inSubset = 0;
10261 ctxt->instate = XML_PARSER_PROLOG;
10262#ifdef DEBUG_PUSH
10263 xmlGenericError(xmlGenericErrorContext,
10264 "PP: entering PROLOG\n");
10265#endif
10266 }
10267 } else if ((cur == '<') && (next == '!') &&
10268 (avail < 9)) {
10269 goto done;
10270 } else {
10271 ctxt->instate = XML_PARSER_START_TAG;
10272 ctxt->progressive = 1;
10273 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
10274#ifdef DEBUG_PUSH
10275 xmlGenericError(xmlGenericErrorContext,
10276 "PP: entering START_TAG\n");
10277#endif
10278 }
10279 break;
10280 case XML_PARSER_PROLOG:
10281 SKIP_BLANKS;
10282 if (ctxt->input->buf == NULL)
10283 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
10284 else
10285 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
10286 if (avail < 2)
10287 goto done;
10288 cur = ctxt->input->cur[0];
10289 next = ctxt->input->cur[1];
10290 if ((cur == '<') && (next == '?')) {
10291 if ((!terminate) &&
10292 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10293 goto done;
10294#ifdef DEBUG_PUSH
10295 xmlGenericError(xmlGenericErrorContext,
10296 "PP: Parsing PI\n");
10297#endif
10298 xmlParsePI(ctxt);
10299 } else if ((cur == '<') && (next == '!') &&
10300 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
10301 if ((!terminate) &&
10302 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
10303 goto done;
10304#ifdef DEBUG_PUSH
10305 xmlGenericError(xmlGenericErrorContext,
10306 "PP: Parsing Comment\n");
10307#endif
10308 xmlParseComment(ctxt);
10309 ctxt->instate = XML_PARSER_PROLOG;
10310 } else if ((cur == '<') && (next == '!') &&
10311 (avail < 4)) {
10312 goto done;
10313 } else {
10314 ctxt->instate = XML_PARSER_START_TAG;
10315 if (ctxt->progressive == 0)
10316 ctxt->progressive = 1;
10317 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
10318#ifdef DEBUG_PUSH
10319 xmlGenericError(xmlGenericErrorContext,
10320 "PP: entering START_TAG\n");
10321#endif
10322 }
10323 break;
10324 case XML_PARSER_EPILOG:
10325 SKIP_BLANKS;
10326 if (ctxt->input->buf == NULL)
10327 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
10328 else
10329 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
10330 if (avail < 2)
10331 goto done;
10332 cur = ctxt->input->cur[0];
10333 next = ctxt->input->cur[1];
10334 if ((cur == '<') && (next == '?')) {
10335 if ((!terminate) &&
10336 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10337 goto done;
10338#ifdef DEBUG_PUSH
10339 xmlGenericError(xmlGenericErrorContext,
10340 "PP: Parsing PI\n");
10341#endif
10342 xmlParsePI(ctxt);
10343 ctxt->instate = XML_PARSER_EPILOG;
10344 } else if ((cur == '<') && (next == '!') &&
10345 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
10346 if ((!terminate) &&
10347 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
10348 goto done;
10349#ifdef DEBUG_PUSH
10350 xmlGenericError(xmlGenericErrorContext,
10351 "PP: Parsing Comment\n");
10352#endif
10353 xmlParseComment(ctxt);
10354 ctxt->instate = XML_PARSER_EPILOG;
10355 } else if ((cur == '<') && (next == '!') &&
10356 (avail < 4)) {
10357 goto done;
10358 } else {
10359 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
10360 ctxt->instate = XML_PARSER_EOF;
10361#ifdef DEBUG_PUSH
10362 xmlGenericError(xmlGenericErrorContext,
10363 "PP: entering EOF\n");
10364#endif
10365 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10366 ctxt->sax->endDocument(ctxt->userData);
10367 goto done;
10368 }
10369 break;
10370 case XML_PARSER_DTD: {
10371 /*
10372 * Sorry but progressive parsing of the internal subset
10373 * is not expected to be supported. We first check that
10374 * the full content of the internal subset is available and
10375 * the parsing is launched only at that point.
10376 * Internal subset ends up with "']' S? '>'" in an unescaped
10377 * section and not in a ']]>' sequence which are conditional
10378 * sections (whoever argued to keep that crap in XML deserve
10379 * a place in hell !).
10380 */
10381 int base, i;
10382 xmlChar *buf;
10383 xmlChar quote = 0;
10384
10385 base = ctxt->input->cur - ctxt->input->base;
10386 if (base < 0) return(0);
10387 if (ctxt->checkIndex > base)
10388 base = ctxt->checkIndex;
10389 buf = ctxt->input->buf->buffer->content;
10390 for (;(unsigned int) base < ctxt->input->buf->buffer->use;
10391 base++) {
10392 if (quote != 0) {
10393 if (buf[base] == quote)
10394 quote = 0;
10395 continue;
10396 }
10397 if ((quote == 0) && (buf[base] == '<')) {
10398 int found = 0;
10399 /* special handling of comments */
10400 if (((unsigned int) base + 4 <
10401 ctxt->input->buf->buffer->use) &&
10402 (buf[base + 1] == '!') &&
10403 (buf[base + 2] == '-') &&
10404 (buf[base + 3] == '-')) {
10405 for (;(unsigned int) base + 3 <
10406 ctxt->input->buf->buffer->use; base++) {
10407 if ((buf[base] == '-') &&
10408 (buf[base + 1] == '-') &&
10409 (buf[base + 2] == '>')) {
10410 found = 1;
10411 base += 2;
10412 break;
10413 }
10414 }
10415 if (!found) {
10416#if 0
10417 fprintf(stderr, "unfinished comment\n");
10418#endif
10419 break; /* for */
10420 }
10421 continue;
10422 }
10423 }
10424 if (buf[base] == '"') {
10425 quote = '"';
10426 continue;
10427 }
10428 if (buf[base] == '\'') {
10429 quote = '\'';
10430 continue;
10431 }
10432 if (buf[base] == ']') {
10433#if 0
10434 fprintf(stderr, "%c%c%c%c: ", buf[base],
10435 buf[base + 1], buf[base + 2], buf[base + 3]);
10436#endif
10437 if ((unsigned int) base +1 >=
10438 ctxt->input->buf->buffer->use)
10439 break;
10440 if (buf[base + 1] == ']') {
10441 /* conditional crap, skip both ']' ! */
10442 base++;
10443 continue;
10444 }
10445 for (i = 1;
10446 (unsigned int) base + i < ctxt->input->buf->buffer->use;
10447 i++) {
10448 if (buf[base + i] == '>') {
10449#if 0
10450 fprintf(stderr, "found\n");
10451#endif
10452 goto found_end_int_subset;
10453 }
10454 if (!IS_BLANK_CH(buf[base + i])) {
10455#if 0
10456 fprintf(stderr, "not found\n");
10457#endif
10458 goto not_end_of_int_subset;
10459 }
10460 }
10461#if 0
10462 fprintf(stderr, "end of stream\n");
10463#endif
10464 break;
10465
10466 }
10467not_end_of_int_subset:
10468 continue; /* for */
10469 }
10470 /*
10471 * We didn't found the end of the Internal subset
10472 */
10473#ifdef DEBUG_PUSH
10474 if (next == 0)
10475 xmlGenericError(xmlGenericErrorContext,
10476 "PP: lookup of int subset end filed\n");
10477#endif
10478 goto done;
10479
10480found_end_int_subset:
10481 xmlParseInternalSubset(ctxt);
10482 ctxt->inSubset = 2;
10483 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
10484 (ctxt->sax->externalSubset != NULL))
10485 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10486 ctxt->extSubSystem, ctxt->extSubURI);
10487 ctxt->inSubset = 0;
10488 ctxt->instate = XML_PARSER_PROLOG;
10489 ctxt->checkIndex = 0;
10490#ifdef DEBUG_PUSH
10491 xmlGenericError(xmlGenericErrorContext,
10492 "PP: entering PROLOG\n");
10493#endif
10494 break;
10495 }
10496 case XML_PARSER_COMMENT:
10497 xmlGenericError(xmlGenericErrorContext,
10498 "PP: internal error, state == COMMENT\n");
10499 ctxt->instate = XML_PARSER_CONTENT;
10500#ifdef DEBUG_PUSH
10501 xmlGenericError(xmlGenericErrorContext,
10502 "PP: entering CONTENT\n");
10503#endif
10504 break;
10505 case XML_PARSER_IGNORE:
10506 xmlGenericError(xmlGenericErrorContext,
10507 "PP: internal error, state == IGNORE");
10508 ctxt->instate = XML_PARSER_DTD;
10509#ifdef DEBUG_PUSH
10510 xmlGenericError(xmlGenericErrorContext,
10511 "PP: entering DTD\n");
10512#endif
10513 break;
10514 case XML_PARSER_PI:
10515 xmlGenericError(xmlGenericErrorContext,
10516 "PP: internal error, state == PI\n");
10517 ctxt->instate = XML_PARSER_CONTENT;
10518#ifdef DEBUG_PUSH
10519 xmlGenericError(xmlGenericErrorContext,
10520 "PP: entering CONTENT\n");
10521#endif
10522 break;
10523 case XML_PARSER_ENTITY_DECL:
10524 xmlGenericError(xmlGenericErrorContext,
10525 "PP: internal error, state == ENTITY_DECL\n");
10526 ctxt->instate = XML_PARSER_DTD;
10527#ifdef DEBUG_PUSH
10528 xmlGenericError(xmlGenericErrorContext,
10529 "PP: entering DTD\n");
10530#endif
10531 break;
10532 case XML_PARSER_ENTITY_VALUE:
10533 xmlGenericError(xmlGenericErrorContext,
10534 "PP: internal error, state == ENTITY_VALUE\n");
10535 ctxt->instate = XML_PARSER_CONTENT;
10536#ifdef DEBUG_PUSH
10537 xmlGenericError(xmlGenericErrorContext,
10538 "PP: entering DTD\n");
10539#endif
10540 break;
10541 case XML_PARSER_ATTRIBUTE_VALUE:
10542 xmlGenericError(xmlGenericErrorContext,
10543 "PP: internal error, state == ATTRIBUTE_VALUE\n");
10544 ctxt->instate = XML_PARSER_START_TAG;
10545#ifdef DEBUG_PUSH
10546 xmlGenericError(xmlGenericErrorContext,
10547 "PP: entering START_TAG\n");
10548#endif
10549 break;
10550 case XML_PARSER_SYSTEM_LITERAL:
10551 xmlGenericError(xmlGenericErrorContext,
10552 "PP: internal error, state == SYSTEM_LITERAL\n");
10553 ctxt->instate = XML_PARSER_START_TAG;
10554#ifdef DEBUG_PUSH
10555 xmlGenericError(xmlGenericErrorContext,
10556 "PP: entering START_TAG\n");
10557#endif
10558 break;
10559 case XML_PARSER_PUBLIC_LITERAL:
10560 xmlGenericError(xmlGenericErrorContext,
10561 "PP: internal error, state == PUBLIC_LITERAL\n");
10562 ctxt->instate = XML_PARSER_START_TAG;
10563#ifdef DEBUG_PUSH
10564 xmlGenericError(xmlGenericErrorContext,
10565 "PP: entering START_TAG\n");
10566#endif
10567 break;
10568 }
10569 }
10570done:
10571#ifdef DEBUG_PUSH
10572 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
10573#endif
10574 return(ret);
10575encoding_error:
10576 {
10577 char buffer[150];
10578
10579 snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
10580 ctxt->input->cur[0], ctxt->input->cur[1],
10581 ctxt->input->cur[2], ctxt->input->cur[3]);
10582 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
10583 "Input is not proper UTF-8, indicate encoding !\n%s",
10584 BAD_CAST buffer, NULL);
10585 }
10586 return(0);
10587}
10588
10589/**
10590 * xmlParseChunk:
10591 * @ctxt: an XML parser context
10592 * @chunk: an char array
10593 * @size: the size in byte of the chunk
10594 * @terminate: last chunk indicator
10595 *
10596 * Parse a Chunk of memory
10597 *
10598 * Returns zero if no error, the xmlParserErrors otherwise.
10599 */
10600int
10601xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
10602 int terminate) {
10603 int end_in_lf = 0;
10604
10605 if (ctxt == NULL)
10606 return(XML_ERR_INTERNAL_ERROR);
10607 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
10608 return(ctxt->errNo);
10609 if (ctxt->instate == XML_PARSER_START)
10610 xmlDetectSAX2(ctxt);
10611 if ((size > 0) && (chunk != NULL) && (!terminate) &&
10612 (chunk[size - 1] == '\r')) {
10613 end_in_lf = 1;
10614 size--;
10615 }
10616 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
10617 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
10618 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
10619 int cur = ctxt->input->cur - ctxt->input->base;
10620 int res;
10621
10622 res =xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
10623 if (res < 0) {
10624 ctxt->errNo = XML_PARSER_EOF;
10625 ctxt->disableSAX = 1;
10626 return (XML_PARSER_EOF);
10627 }
10628 ctxt->input->base = ctxt->input->buf->buffer->content + base;
10629 ctxt->input->cur = ctxt->input->base + cur;
10630 ctxt->input->end =
10631 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
10632#ifdef DEBUG_PUSH
10633 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
10634#endif
10635
10636 } else if (ctxt->instate != XML_PARSER_EOF) {
10637 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
10638 xmlParserInputBufferPtr in = ctxt->input->buf;
10639 if ((in->encoder != NULL) && (in->buffer != NULL) &&
10640 (in->raw != NULL)) {
10641 int nbchars;
10642
10643 nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
10644 if (nbchars < 0) {
10645 /* TODO 2.6.0 */
10646 xmlGenericError(xmlGenericErrorContext,
10647 "xmlParseChunk: encoder error\n");
10648 return(XML_ERR_INVALID_ENCODING);
10649 }
10650 }
10651 }
10652 }
10653 xmlParseTryOrFinish(ctxt, terminate);
10654 if ((end_in_lf == 1) && (ctxt->input != NULL) &&
10655 (ctxt->input->buf != NULL)) {
10656 xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
10657 }
10658 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
10659 return(ctxt->errNo);
10660 if (terminate) {
10661 /*
10662 * Check for termination
10663 */
10664 int avail = 0;
10665
10666 if (ctxt->input != NULL) {
10667 if (ctxt->input->buf == NULL)
10668 avail = ctxt->input->length -
10669 (ctxt->input->cur - ctxt->input->base);
10670 else
10671 avail = ctxt->input->buf->buffer->use -
10672 (ctxt->input->cur - ctxt->input->base);
10673 }
10674
10675 if ((ctxt->instate != XML_PARSER_EOF) &&
10676 (ctxt->instate != XML_PARSER_EPILOG)) {
10677 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
10678 }
10679 if ((ctxt->instate == XML_PARSER_EPILOG) && (avail > 0)) {
10680 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
10681 }
10682 if (ctxt->instate != XML_PARSER_EOF) {
10683 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10684 ctxt->sax->endDocument(ctxt->userData);
10685 }
10686 ctxt->instate = XML_PARSER_EOF;
10687 }
10688 return((xmlParserErrors) ctxt->errNo);
10689}
10690
10691/************************************************************************
10692 * *
10693 * I/O front end functions to the parser *
10694 * *
10695 ************************************************************************/
10696
10697/**
10698 * xmlCreatePushParserCtxt:
10699 * @sax: a SAX handler
10700 * @user_data: The user data returned on SAX callbacks
10701 * @chunk: a pointer to an array of chars
10702 * @size: number of chars in the array
10703 * @filename: an optional file name or URI
10704 *
10705 * Create a parser context for using the XML parser in push mode.
10706 * If @buffer and @size are non-NULL, the data is used to detect
10707 * the encoding. The remaining characters will be parsed so they
10708 * don't need to be fed in again through xmlParseChunk.
10709 * To allow content encoding detection, @size should be >= 4
10710 * The value of @filename is used for fetching external entities
10711 * and error/warning reports.
10712 *
10713 * Returns the new parser context or NULL
10714 */
10715
10716xmlParserCtxtPtr
10717xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
10718 const char *chunk, int size, const char *filename) {
10719 xmlParserCtxtPtr ctxt;
10720 xmlParserInputPtr inputStream;
10721 xmlParserInputBufferPtr buf;
10722 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
10723
10724 /*
10725 * plug some encoding conversion routines
10726 */
10727 if ((chunk != NULL) && (size >= 4))
10728 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
10729
10730 buf = xmlAllocParserInputBuffer(enc);
10731 if (buf == NULL) return(NULL);
10732
10733 ctxt = xmlNewParserCtxt();
10734 if (ctxt == NULL) {
10735 xmlErrMemory(NULL, "creating parser: out of memory\n");
10736 xmlFreeParserInputBuffer(buf);
10737 return(NULL);
10738 }
10739 ctxt->dictNames = 1;
10740 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * sizeof(xmlChar *));
10741 if (ctxt->pushTab == NULL) {
10742 xmlErrMemory(ctxt, NULL);
10743 xmlFreeParserInputBuffer(buf);
10744 xmlFreeParserCtxt(ctxt);
10745 return(NULL);
10746 }
10747 if (sax != NULL) {
10748#ifdef LIBXML_SAX1_ENABLED
10749 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
10750#endif /* LIBXML_SAX1_ENABLED */
10751 xmlFree(ctxt->sax);
10752 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
10753 if (ctxt->sax == NULL) {
10754 xmlErrMemory(ctxt, NULL);
10755 xmlFreeParserInputBuffer(buf);
10756 xmlFreeParserCtxt(ctxt);
10757 return(NULL);
10758 }
10759 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
10760 if (sax->initialized == XML_SAX2_MAGIC)
10761 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
10762 else
10763 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
10764 if (user_data != NULL)
10765 ctxt->userData = user_data;
10766 }
10767 if (filename == NULL) {
10768 ctxt->directory = NULL;
10769 } else {
10770 ctxt->directory = xmlParserGetDirectory(filename);
10771 }
10772
10773 inputStream = xmlNewInputStream(ctxt);
10774 if (inputStream == NULL) {
10775 xmlFreeParserCtxt(ctxt);
10776 xmlFreeParserInputBuffer(buf);
10777 return(NULL);
10778 }
10779
10780 if (filename == NULL)
10781 inputStream->filename = NULL;
10782 else {
10783 inputStream->filename = (char *)
10784 xmlCanonicPath((const xmlChar *) filename);
10785 if (inputStream->filename == NULL) {
10786 xmlFreeParserCtxt(ctxt);
10787 xmlFreeParserInputBuffer(buf);
10788 return(NULL);
10789 }
10790 }
10791 inputStream->buf = buf;
10792 inputStream->base = inputStream->buf->buffer->content;
10793 inputStream->cur = inputStream->buf->buffer->content;
10794 inputStream->end =
10795 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
10796
10797 inputPush(ctxt, inputStream);
10798
10799 /*
10800 * If the caller didn't provide an initial 'chunk' for determining
10801 * the encoding, we set the context to XML_CHAR_ENCODING_NONE so
10802 * that it can be automatically determined later
10803 */
10804 if ((size == 0) || (chunk == NULL)) {
10805 ctxt->charset = XML_CHAR_ENCODING_NONE;
10806 } else if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) {
10807 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
10808 int cur = ctxt->input->cur - ctxt->input->base;
10809
10810 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
10811
10812 ctxt->input->base = ctxt->input->buf->buffer->content + base;
10813 ctxt->input->cur = ctxt->input->base + cur;
10814 ctxt->input->end =
10815 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
10816#ifdef DEBUG_PUSH
10817 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
10818#endif
10819 }
10820
10821 if (enc != XML_CHAR_ENCODING_NONE) {
10822 xmlSwitchEncoding(ctxt, enc);
10823 }
10824
10825 return(ctxt);
10826}
10827#endif /* LIBXML_PUSH_ENABLED */
10828
10829/**
10830 * xmlStopParser:
10831 * @ctxt: an XML parser context
10832 *
10833 * Blocks further parser processing
10834 */
10835void
10836xmlStopParser(xmlParserCtxtPtr ctxt) {
10837 if (ctxt == NULL)
10838 return;
10839 ctxt->instate = XML_PARSER_EOF;
10840 ctxt->disableSAX = 1;
10841 if (ctxt->input != NULL) {
10842 ctxt->input->cur = BAD_CAST"";
10843 ctxt->input->base = ctxt->input->cur;
10844 }
10845}
10846
10847/**
10848 * xmlCreateIOParserCtxt:
10849 * @sax: a SAX handler
10850 * @user_data: The user data returned on SAX callbacks
10851 * @ioread: an I/O read function
10852 * @ioclose: an I/O close function
10853 * @ioctx: an I/O handler
10854 * @enc: the charset encoding if known
10855 *
10856 * Create a parser context for using the XML parser with an existing
10857 * I/O stream
10858 *
10859 * Returns the new parser context or NULL
10860 */
10861xmlParserCtxtPtr
10862xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
10863 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
10864 void *ioctx, xmlCharEncoding enc) {
10865 xmlParserCtxtPtr ctxt;
10866 xmlParserInputPtr inputStream;
10867 xmlParserInputBufferPtr buf;
10868
10869 if (ioread == NULL) return(NULL);
10870
10871 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
10872 if (buf == NULL) return(NULL);
10873
10874 ctxt = xmlNewParserCtxt();
10875 if (ctxt == NULL) {
10876 xmlFreeParserInputBuffer(buf);
10877 return(NULL);
10878 }
10879 if (sax != NULL) {
10880#ifdef LIBXML_SAX1_ENABLED
10881 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
10882#endif /* LIBXML_SAX1_ENABLED */
10883 xmlFree(ctxt->sax);
10884 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
10885 if (ctxt->sax == NULL) {
10886 xmlErrMemory(ctxt, NULL);
10887 xmlFreeParserCtxt(ctxt);
10888 return(NULL);
10889 }
10890 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
10891 if (sax->initialized == XML_SAX2_MAGIC)
10892 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
10893 else
10894 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
10895 if (user_data != NULL)
10896 ctxt->userData = user_data;
10897 }
10898
10899 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
10900 if (inputStream == NULL) {
10901 xmlFreeParserCtxt(ctxt);
10902 return(NULL);
10903 }
10904 inputPush(ctxt, inputStream);
10905
10906 return(ctxt);
10907}
10908
10909#ifdef LIBXML_VALID_ENABLED
10910/************************************************************************
10911 * *
10912 * Front ends when parsing a DTD *
10913 * *
10914 ************************************************************************/
10915
10916/**
10917 * xmlIOParseDTD:
10918 * @sax: the SAX handler block or NULL
10919 * @input: an Input Buffer
10920 * @enc: the charset encoding if known
10921 *
10922 * Load and parse a DTD
10923 *
10924 * Returns the resulting xmlDtdPtr or NULL in case of error.
10925 * @input will be freed by the function in any case.
10926 */
10927
10928xmlDtdPtr
10929xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
10930 xmlCharEncoding enc) {
10931 xmlDtdPtr ret = NULL;
10932 xmlParserCtxtPtr ctxt;
10933 xmlParserInputPtr pinput = NULL;
10934 xmlChar start[4];
10935
10936 if (input == NULL)
10937 return(NULL);
10938
10939 ctxt = xmlNewParserCtxt();
10940 if (ctxt == NULL) {
10941 xmlFreeParserInputBuffer(input);
10942 return(NULL);
10943 }
10944
10945 /*
10946 * Set-up the SAX context
10947 */
10948 if (sax != NULL) {
10949 if (ctxt->sax != NULL)
10950 xmlFree(ctxt->sax);
10951 ctxt->sax = sax;
10952 ctxt->userData = ctxt;
10953 }
10954 xmlDetectSAX2(ctxt);
10955
10956 /*
10957 * generate a parser input from the I/O handler
10958 */
10959
10960 pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
10961 if (pinput == NULL) {
10962 if (sax != NULL) ctxt->sax = NULL;
10963 xmlFreeParserInputBuffer(input);
10964 xmlFreeParserCtxt(ctxt);
10965 return(NULL);
10966 }
10967
10968 /*
10969 * plug some encoding conversion routines here.
10970 */
10971 xmlPushInput(ctxt, pinput);
10972 if (enc != XML_CHAR_ENCODING_NONE) {
10973 xmlSwitchEncoding(ctxt, enc);
10974 }
10975
10976 pinput->filename = NULL;
10977 pinput->line = 1;
10978 pinput->col = 1;
10979 pinput->base = ctxt->input->cur;
10980 pinput->cur = ctxt->input->cur;
10981 pinput->free = NULL;
10982
10983 /*
10984 * let's parse that entity knowing it's an external subset.
10985 */
10986 ctxt->inSubset = 2;
10987 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
10988 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
10989 BAD_CAST "none", BAD_CAST "none");
10990
10991 if ((enc == XML_CHAR_ENCODING_NONE) &&
10992 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
10993 /*
10994 * Get the 4 first bytes and decode the charset
10995 * if enc != XML_CHAR_ENCODING_NONE
10996 * plug some encoding conversion routines.
10997 */
10998 start[0] = RAW;
10999 start[1] = NXT(1);
11000 start[2] = NXT(2);
11001 start[3] = NXT(3);
11002 enc = xmlDetectCharEncoding(start, 4);
11003 if (enc != XML_CHAR_ENCODING_NONE) {
11004 xmlSwitchEncoding(ctxt, enc);
11005 }
11006 }
11007
11008 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
11009
11010 if (ctxt->myDoc != NULL) {
11011 if (ctxt->wellFormed) {
11012 ret = ctxt->myDoc->extSubset;
11013 ctxt->myDoc->extSubset = NULL;
11014 if (ret != NULL) {
11015 xmlNodePtr tmp;
11016
11017 ret->doc = NULL;
11018 tmp = ret->children;
11019 while (tmp != NULL) {
11020 tmp->doc = NULL;
11021 tmp = tmp->next;
11022 }
11023 }
11024 } else {
11025 ret = NULL;
11026 }
11027 xmlFreeDoc(ctxt->myDoc);
11028 ctxt->myDoc = NULL;
11029 }
11030 if (sax != NULL) ctxt->sax = NULL;
11031 xmlFreeParserCtxt(ctxt);
11032
11033 return(ret);
11034}
11035
11036/**
11037 * xmlSAXParseDTD:
11038 * @sax: the SAX handler block
11039 * @ExternalID: a NAME* containing the External ID of the DTD
11040 * @SystemID: a NAME* containing the URL to the DTD
11041 *
11042 * Load and parse an external subset.
11043 *
11044 * Returns the resulting xmlDtdPtr or NULL in case of error.
11045 */
11046
11047xmlDtdPtr
11048xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
11049 const xmlChar *SystemID) {
11050 xmlDtdPtr ret = NULL;
11051 xmlParserCtxtPtr ctxt;
11052 xmlParserInputPtr input = NULL;
11053 xmlCharEncoding enc;
11054 xmlChar* systemIdCanonic;
11055
11056 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
11057
11058 ctxt = xmlNewParserCtxt();
11059 if (ctxt == NULL) {
11060 return(NULL);
11061 }
11062
11063 /*
11064 * Set-up the SAX context
11065 */
11066 if (sax != NULL) {
11067 if (ctxt->sax != NULL)
11068 xmlFree(ctxt->sax);
11069 ctxt->sax = sax;
11070 ctxt->userData = ctxt;
11071 }
11072
11073 /*
11074 * Canonicalise the system ID
11075 */
11076 systemIdCanonic = xmlCanonicPath(SystemID);
11077 if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
11078 xmlFreeParserCtxt(ctxt);
11079 return(NULL);
11080 }
11081
11082 /*
11083 * Ask the Entity resolver to load the damn thing
11084 */
11085
11086 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
11087 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
11088 systemIdCanonic);
11089 if (input == NULL) {
11090 if (sax != NULL) ctxt->sax = NULL;
11091 xmlFreeParserCtxt(ctxt);
11092 if (systemIdCanonic != NULL)
11093 xmlFree(systemIdCanonic);
11094 return(NULL);
11095 }
11096
11097 /*
11098 * plug some encoding conversion routines here.
11099 */
11100 xmlPushInput(ctxt, input);
11101 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
11102 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
11103 xmlSwitchEncoding(ctxt, enc);
11104 }
11105
11106 if (input->filename == NULL)
11107 input->filename = (char *) systemIdCanonic;
11108 else
11109 xmlFree(systemIdCanonic);
11110 input->line = 1;
11111 input->col = 1;
11112 input->base = ctxt->input->cur;
11113 input->cur = ctxt->input->cur;
11114 input->free = NULL;
11115
11116 /*
11117 * let's parse that entity knowing it's an external subset.
11118 */
11119 ctxt->inSubset = 2;
11120 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
11121 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
11122 ExternalID, SystemID);
11123 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
11124
11125 if (ctxt->myDoc != NULL) {
11126 if (ctxt->wellFormed) {
11127 ret = ctxt->myDoc->extSubset;
11128 ctxt->myDoc->extSubset = NULL;
11129 if (ret != NULL) {
11130 xmlNodePtr tmp;
11131
11132 ret->doc = NULL;
11133 tmp = ret->children;
11134 while (tmp != NULL) {
11135 tmp->doc = NULL;
11136 tmp = tmp->next;
11137 }
11138 }
11139 } else {
11140 ret = NULL;
11141 }
11142 xmlFreeDoc(ctxt->myDoc);
11143 ctxt->myDoc = NULL;
11144 }
11145 if (sax != NULL) ctxt->sax = NULL;
11146 xmlFreeParserCtxt(ctxt);
11147
11148 return(ret);
11149}
11150
11151
11152/**
11153 * xmlParseDTD:
11154 * @ExternalID: a NAME* containing the External ID of the DTD
11155 * @SystemID: a NAME* containing the URL to the DTD
11156 *
11157 * Load and parse an external subset.
11158 *
11159 * Returns the resulting xmlDtdPtr or NULL in case of error.
11160 */
11161
11162xmlDtdPtr
11163xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
11164 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
11165}
11166#endif /* LIBXML_VALID_ENABLED */
11167
11168/************************************************************************
11169 * *
11170 * Front ends when parsing an Entity *
11171 * *
11172 ************************************************************************/
11173
11174/**
11175 * xmlParseCtxtExternalEntity:
11176 * @ctx: the existing parsing context
11177 * @URL: the URL for the entity to load
11178 * @ID: the System ID for the entity to load
11179 * @lst: the return value for the set of parsed nodes
11180 *
11181 * Parse an external general entity within an existing parsing context
11182 * An external general parsed entity is well-formed if it matches the
11183 * production labeled extParsedEnt.
11184 *
11185 * [78] extParsedEnt ::= TextDecl? content
11186 *
11187 * Returns 0 if the entity is well formed, -1 in case of args problem and
11188 * the parser error code otherwise
11189 */
11190
11191int
11192xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
11193 const xmlChar *ID, xmlNodePtr *lst) {
11194 xmlParserCtxtPtr ctxt;
11195 xmlDocPtr newDoc;
11196 xmlNodePtr newRoot;
11197 xmlSAXHandlerPtr oldsax = NULL;
11198 int ret = 0;
11199 xmlChar start[4];
11200 xmlCharEncoding enc;
11201 xmlParserInputPtr inputStream;
11202 char *directory = NULL;
11203
11204 if (ctx == NULL) return(-1);
11205
11206 if ((ctx->depth > 40) || (ctx->nbentities >= 500000)) {
11207 return(XML_ERR_ENTITY_LOOP);
11208 }
11209
11210 if (lst != NULL)
11211 *lst = NULL;
11212 if ((URL == NULL) && (ID == NULL))
11213 return(-1);
11214 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
11215 return(-1);
11216
11217 ctxt = xmlNewParserCtxt();
11218 if (ctxt == NULL) {
11219 return(-1);
11220 }
11221
11222 ctxt->userData = ctxt;
11223 ctxt->_private = ctx->_private;
11224
11225 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
11226 if (inputStream == NULL) {
11227 xmlFreeParserCtxt(ctxt);
11228 return(-1);
11229 }
11230
11231 inputPush(ctxt, inputStream);
11232
11233 if ((ctxt->directory == NULL) && (directory == NULL))
11234 directory = xmlParserGetDirectory((char *)URL);
11235 if ((ctxt->directory == NULL) && (directory != NULL))
11236 ctxt->directory = directory;
11237
11238 oldsax = ctxt->sax;
11239 ctxt->sax = ctx->sax;
11240 xmlDetectSAX2(ctxt);
11241 newDoc = xmlNewDoc(BAD_CAST "1.0");
11242 if (newDoc == NULL) {
11243 xmlFreeParserCtxt(ctxt);
11244 return(-1);
11245 }
11246 if (ctx->myDoc->dict) {
11247 newDoc->dict = ctx->myDoc->dict;
11248 xmlDictReference(newDoc->dict);
11249 }
11250 if (ctx->myDoc != NULL) {
11251 newDoc->intSubset = ctx->myDoc->intSubset;
11252 newDoc->extSubset = ctx->myDoc->extSubset;
11253 }
11254 if (ctx->myDoc->URL != NULL) {
11255 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
11256 }
11257 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
11258 if (newRoot == NULL) {
11259 ctxt->sax = oldsax;
11260 xmlFreeParserCtxt(ctxt);
11261 newDoc->intSubset = NULL;
11262 newDoc->extSubset = NULL;
11263 xmlFreeDoc(newDoc);
11264 return(-1);
11265 }
11266 xmlAddChild((xmlNodePtr) newDoc, newRoot);
11267 nodePush(ctxt, newDoc->children);
11268 if (ctx->myDoc == NULL) {
11269 ctxt->myDoc = newDoc;
11270 } else {
11271 ctxt->myDoc = ctx->myDoc;
11272 newDoc->children->doc = ctx->myDoc;
11273 }
11274
11275 /*
11276 * Get the 4 first bytes and decode the charset
11277 * if enc != XML_CHAR_ENCODING_NONE
11278 * plug some encoding conversion routines.
11279 */
11280 GROW
11281 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
11282 start[0] = RAW;
11283 start[1] = NXT(1);
11284 start[2] = NXT(2);
11285 start[3] = NXT(3);
11286 enc = xmlDetectCharEncoding(start, 4);
11287 if (enc != XML_CHAR_ENCODING_NONE) {
11288 xmlSwitchEncoding(ctxt, enc);
11289 }
11290 }
11291
11292 /*
11293 * Parse a possible text declaration first
11294 */
11295 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
11296 xmlParseTextDecl(ctxt);
11297 }
11298
11299 /*
11300 * Doing validity checking on chunk doesn't make sense
11301 */
11302 ctxt->instate = XML_PARSER_CONTENT;
11303 ctxt->validate = ctx->validate;
11304 ctxt->valid = ctx->valid;
11305 ctxt->loadsubset = ctx->loadsubset;
11306 ctxt->depth = ctx->depth + 1;
11307 ctxt->replaceEntities = ctx->replaceEntities;
11308 if (ctxt->validate) {
11309 ctxt->vctxt.error = ctx->vctxt.error;
11310 ctxt->vctxt.warning = ctx->vctxt.warning;
11311 } else {
11312 ctxt->vctxt.error = NULL;
11313 ctxt->vctxt.warning = NULL;
11314 }
11315 ctxt->vctxt.nodeTab = NULL;
11316 ctxt->vctxt.nodeNr = 0;
11317 ctxt->vctxt.nodeMax = 0;
11318 ctxt->vctxt.node = NULL;
11319 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
11320 ctxt->dict = ctx->dict;
11321 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
11322 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
11323 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
11324 ctxt->dictNames = ctx->dictNames;
11325 ctxt->attsDefault = ctx->attsDefault;
11326 ctxt->attsSpecial = ctx->attsSpecial;
11327 ctxt->linenumbers = ctx->linenumbers;
11328
11329 xmlParseContent(ctxt);
11330
11331 ctx->validate = ctxt->validate;
11332 ctx->valid = ctxt->valid;
11333 if ((RAW == '<') && (NXT(1) == '/')) {
11334 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
11335 } else if (RAW != 0) {
11336 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
11337 }
11338 if (ctxt->node != newDoc->children) {
11339 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
11340 }
11341
11342 if (!ctxt->wellFormed) {
11343 if (ctxt->errNo == 0)
11344 ret = 1;
11345 else
11346 ret = ctxt->errNo;
11347 } else {
11348 if (lst != NULL) {
11349 xmlNodePtr cur;
11350
11351 /*
11352 * Return the newly created nodeset after unlinking it from
11353 * they pseudo parent.
11354 */
11355 cur = newDoc->children->children;
11356 *lst = cur;
11357 while (cur != NULL) {
11358 cur->parent = NULL;
11359 cur = cur->next;
11360 }
11361 newDoc->children->children = NULL;
11362 }
11363 ret = 0;
11364 }
11365 ctxt->sax = oldsax;
11366 ctxt->dict = NULL;
11367 ctxt->attsDefault = NULL;
11368 ctxt->attsSpecial = NULL;
11369 xmlFreeParserCtxt(ctxt);
11370 newDoc->intSubset = NULL;
11371 newDoc->extSubset = NULL;
11372 xmlFreeDoc(newDoc);
11373
11374 return(ret);
11375}
11376
11377/**
11378 * xmlParseExternalEntityPrivate:
11379 * @doc: the document the chunk pertains to
11380 * @oldctxt: the previous parser context if available
11381 * @sax: the SAX handler bloc (possibly NULL)
11382 * @user_data: The user data returned on SAX callbacks (possibly NULL)
11383 * @depth: Used for loop detection, use 0
11384 * @URL: the URL for the entity to load
11385 * @ID: the System ID for the entity to load
11386 * @list: the return value for the set of parsed nodes
11387 *
11388 * Private version of xmlParseExternalEntity()
11389 *
11390 * Returns 0 if the entity is well formed, -1 in case of args problem and
11391 * the parser error code otherwise
11392 */
11393
11394static xmlParserErrors
11395xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
11396 xmlSAXHandlerPtr sax,
11397 void *user_data, int depth, const xmlChar *URL,
11398 const xmlChar *ID, xmlNodePtr *list) {
11399 xmlParserCtxtPtr ctxt;
11400 xmlDocPtr newDoc;
11401 xmlNodePtr newRoot;
11402 xmlSAXHandlerPtr oldsax = NULL;
11403 xmlParserErrors ret = XML_ERR_OK;
11404 xmlChar start[4];
11405 xmlCharEncoding enc;
11406
11407 if ((depth > 40) ||
11408 ((oldctxt != NULL) && (oldctxt->nbentities >= 500000))) {
11409 return(XML_ERR_ENTITY_LOOP);
11410 }
11411
11412
11413
11414 if (list != NULL)
11415 *list = NULL;
11416 if ((URL == NULL) && (ID == NULL))
11417 return(XML_ERR_INTERNAL_ERROR);
11418 if (doc == NULL)
11419 return(XML_ERR_INTERNAL_ERROR);
11420
11421
11422 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
11423 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
11424 ctxt->userData = ctxt;
11425 if (oldctxt != NULL) {
11426 ctxt->_private = oldctxt->_private;
11427 ctxt->loadsubset = oldctxt->loadsubset;
11428 ctxt->validate = oldctxt->validate;
11429 ctxt->external = oldctxt->external;
11430 ctxt->record_info = oldctxt->record_info;
11431 ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
11432 ctxt->node_seq.length = oldctxt->node_seq.length;
11433 ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
11434 } else {
11435 /*
11436 * Doing validity checking on chunk without context
11437 * doesn't make sense
11438 */
11439 ctxt->_private = NULL;
11440 ctxt->validate = 0;
11441 ctxt->external = 2;
11442 ctxt->loadsubset = 0;
11443 }
11444 if (sax != NULL) {
11445 oldsax = ctxt->sax;
11446 ctxt->sax = sax;
11447 if (user_data != NULL)
11448 ctxt->userData = user_data;
11449 }
11450 xmlDetectSAX2(ctxt);
11451 newDoc = xmlNewDoc(BAD_CAST "1.0");
11452 if (newDoc == NULL) {
11453 ctxt->node_seq.maximum = 0;
11454 ctxt->node_seq.length = 0;
11455 ctxt->node_seq.buffer = NULL;
11456 xmlFreeParserCtxt(ctxt);
11457 return(XML_ERR_INTERNAL_ERROR);
11458 }
11459 newDoc->intSubset = doc->intSubset;
11460 newDoc->extSubset = doc->extSubset;
11461 newDoc->dict = doc->dict;
11462 xmlDictReference(newDoc->dict);
11463
11464 if (doc->URL != NULL) {
11465 newDoc->URL = xmlStrdup(doc->URL);
11466 }
11467 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
11468 if (newRoot == NULL) {
11469 if (sax != NULL)
11470 ctxt->sax = oldsax;
11471 ctxt->node_seq.maximum = 0;
11472 ctxt->node_seq.length = 0;
11473 ctxt->node_seq.buffer = NULL;
11474 xmlFreeParserCtxt(ctxt);
11475 newDoc->intSubset = NULL;
11476 newDoc->extSubset = NULL;
11477 xmlFreeDoc(newDoc);
11478 return(XML_ERR_INTERNAL_ERROR);
11479 }
11480 xmlAddChild((xmlNodePtr) newDoc, newRoot);
11481 nodePush(ctxt, newDoc->children);
11482 ctxt->myDoc = doc;
11483 newRoot->doc = doc;
11484
11485 /*
11486 * Get the 4 first bytes and decode the charset
11487 * if enc != XML_CHAR_ENCODING_NONE
11488 * plug some encoding conversion routines.
11489 */
11490 GROW;
11491 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
11492 start[0] = RAW;
11493 start[1] = NXT(1);
11494 start[2] = NXT(2);
11495 start[3] = NXT(3);
11496 enc = xmlDetectCharEncoding(start, 4);
11497 if (enc != XML_CHAR_ENCODING_NONE) {
11498 xmlSwitchEncoding(ctxt, enc);
11499 }
11500 }
11501
11502 /*
11503 * Parse a possible text declaration first
11504 */
11505 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
11506 xmlParseTextDecl(ctxt);
11507 }
11508
11509 ctxt->instate = XML_PARSER_CONTENT;
11510 ctxt->depth = depth;
11511
11512 xmlParseContent(ctxt);
11513
11514 if ((RAW == '<') && (NXT(1) == '/')) {
11515 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
11516 } else if (RAW != 0) {
11517 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
11518 }
11519 if (ctxt->node != newDoc->children) {
11520 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
11521 }
11522
11523 if (!ctxt->wellFormed) {
11524 if (ctxt->errNo == 0)
11525 ret = XML_ERR_INTERNAL_ERROR;
11526 else
11527 ret = (xmlParserErrors)ctxt->errNo;
11528 } else {
11529 if (list != NULL) {
11530 xmlNodePtr cur;
11531
11532 /*
11533 * Return the newly created nodeset after unlinking it from
11534 * they pseudo parent.
11535 */
11536 cur = newDoc->children->children;
11537 *list = cur;
11538 while (cur != NULL) {
11539 cur->parent = NULL;
11540 cur = cur->next;
11541 }
11542 newDoc->children->children = NULL;
11543 }
11544 ret = XML_ERR_OK;
11545 }
11546 if (sax != NULL)
11547 ctxt->sax = oldsax;
11548 oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
11549 oldctxt->node_seq.length = ctxt->node_seq.length;
11550 oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
11551 oldctxt->nbentities += ctxt->nbentities;
11552 ctxt->node_seq.maximum = 0;
11553 ctxt->node_seq.length = 0;
11554 ctxt->node_seq.buffer = NULL;
11555 xmlFreeParserCtxt(ctxt);
11556 newDoc->intSubset = NULL;
11557 newDoc->extSubset = NULL;
11558 xmlFreeDoc(newDoc);
11559
11560 return(ret);
11561}
11562
11563#ifdef LIBXML_SAX1_ENABLED
11564/**
11565 * xmlParseExternalEntity:
11566 * @doc: the document the chunk pertains to
11567 * @sax: the SAX handler bloc (possibly NULL)
11568 * @user_data: The user data returned on SAX callbacks (possibly NULL)
11569 * @depth: Used for loop detection, use 0
11570 * @URL: the URL for the entity to load
11571 * @ID: the System ID for the entity to load
11572 * @lst: the return value for the set of parsed nodes
11573 *
11574 * Parse an external general entity
11575 * An external general parsed entity is well-formed if it matches the
11576 * production labeled extParsedEnt.
11577 *
11578 * [78] extParsedEnt ::= TextDecl? content
11579 *
11580 * Returns 0 if the entity is well formed, -1 in case of args problem and
11581 * the parser error code otherwise
11582 */
11583
11584int
11585xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
11586 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
11587 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
11588 ID, lst));
11589}
11590
11591/**
11592 * xmlParseBalancedChunkMemory:
11593 * @doc: the document the chunk pertains to
11594 * @sax: the SAX handler bloc (possibly NULL)
11595 * @user_data: The user data returned on SAX callbacks (possibly NULL)
11596 * @depth: Used for loop detection, use 0
11597 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
11598 * @lst: the return value for the set of parsed nodes
11599 *
11600 * Parse a well-balanced chunk of an XML document
11601 * called by the parser
11602 * The allowed sequence for the Well Balanced Chunk is the one defined by
11603 * the content production in the XML grammar:
11604 *
11605 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
11606 *
11607 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
11608 * the parser error code otherwise
11609 */
11610
11611int
11612xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
11613 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
11614 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
11615 depth, string, lst, 0 );
11616}
11617#endif /* LIBXML_SAX1_ENABLED */
11618
11619/**
11620 * xmlParseBalancedChunkMemoryInternal:
11621 * @oldctxt: the existing parsing context
11622 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
11623 * @user_data: the user data field for the parser context
11624 * @lst: the return value for the set of parsed nodes
11625 *
11626 *
11627 * Parse a well-balanced chunk of an XML document
11628 * called by the parser
11629 * The allowed sequence for the Well Balanced Chunk is the one defined by
11630 * the content production in the XML grammar:
11631 *
11632 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
11633 *
11634 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
11635 * error code otherwise
11636 *
11637 * In case recover is set to 1, the nodelist will not be empty even if
11638 * the parsed chunk is not well balanced.
11639 */
11640static xmlParserErrors
11641xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
11642 const xmlChar *string, void *user_data, xmlNodePtr *lst) {
11643 xmlParserCtxtPtr ctxt;
11644 xmlDocPtr newDoc = NULL;
11645 xmlNodePtr newRoot;
11646 xmlSAXHandlerPtr oldsax = NULL;
11647 xmlNodePtr content = NULL;
11648 xmlNodePtr last = NULL;
11649 int size;
11650 xmlParserErrors ret = XML_ERR_OK;
11651
11652 if ((oldctxt->depth > 40) || (oldctxt->nbentities >= 500000)) {
11653 return(XML_ERR_ENTITY_LOOP);
11654 }
11655
11656
11657 if (lst != NULL)
11658 *lst = NULL;
11659 if (string == NULL)
11660 return(XML_ERR_INTERNAL_ERROR);
11661
11662 size = xmlStrlen(string);
11663
11664 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
11665 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
11666 if (user_data != NULL)
11667 ctxt->userData = user_data;
11668 else
11669 ctxt->userData = ctxt;
11670 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
11671 ctxt->dict = oldctxt->dict;
11672 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
11673 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
11674 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
11675
11676 oldsax = ctxt->sax;
11677 ctxt->sax = oldctxt->sax;
11678 xmlDetectSAX2(ctxt);
11679 ctxt->replaceEntities = oldctxt->replaceEntities;
11680 ctxt->options = oldctxt->options;
11681
11682 ctxt->_private = oldctxt->_private;
11683 if (oldctxt->myDoc == NULL) {
11684 newDoc = xmlNewDoc(BAD_CAST "1.0");
11685 if (newDoc == NULL) {
11686 ctxt->sax = oldsax;
11687 ctxt->dict = NULL;
11688 xmlFreeParserCtxt(ctxt);
11689 return(XML_ERR_INTERNAL_ERROR);
11690 }
11691 newDoc->dict = ctxt->dict;
11692 xmlDictReference(newDoc->dict);
11693 ctxt->myDoc = newDoc;
11694 } else {
11695 ctxt->myDoc = oldctxt->myDoc;
11696 content = ctxt->myDoc->children;
11697 last = ctxt->myDoc->last;
11698 }
11699 newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL);
11700 if (newRoot == NULL) {
11701 ctxt->sax = oldsax;
11702 ctxt->dict = NULL;
11703 xmlFreeParserCtxt(ctxt);
11704 if (newDoc != NULL) {
11705 xmlFreeDoc(newDoc);
11706 }
11707 return(XML_ERR_INTERNAL_ERROR);
11708 }
11709 ctxt->myDoc->children = NULL;
11710 ctxt->myDoc->last = NULL;
11711 xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot);
11712 nodePush(ctxt, ctxt->myDoc->children);
11713 ctxt->instate = XML_PARSER_CONTENT;
11714 ctxt->depth = oldctxt->depth + 1;
11715
11716 ctxt->validate = 0;
11717 ctxt->loadsubset = oldctxt->loadsubset;
11718 if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
11719 /*
11720 * ID/IDREF registration will be done in xmlValidateElement below
11721 */
11722 ctxt->loadsubset |= XML_SKIP_IDS;
11723 }
11724 ctxt->dictNames = oldctxt->dictNames;
11725 ctxt->attsDefault = oldctxt->attsDefault;
11726 ctxt->attsSpecial = oldctxt->attsSpecial;
11727
11728 xmlParseContent(ctxt);
11729 if ((RAW == '<') && (NXT(1) == '/')) {
11730 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
11731 } else if (RAW != 0) {
11732 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
11733 }
11734 if (ctxt->node != ctxt->myDoc->children) {
11735 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
11736 }
11737
11738 if (!ctxt->wellFormed) {
11739 if (ctxt->errNo == 0)
11740 ret = XML_ERR_INTERNAL_ERROR;
11741 else
11742 ret = (xmlParserErrors)ctxt->errNo;
11743 } else {
11744 ret = XML_ERR_OK;
11745 }
11746
11747 if ((lst != NULL) && (ret == XML_ERR_OK)) {
11748 xmlNodePtr cur;
11749
11750 /*
11751 * Return the newly created nodeset after unlinking it from
11752 * they pseudo parent.
11753 */
11754 cur = ctxt->myDoc->children->children;
11755 *lst = cur;
11756 while (cur != NULL) {
11757#ifdef LIBXML_VALID_ENABLED
11758 if ((oldctxt->validate) && (oldctxt->wellFormed) &&
11759 (oldctxt->myDoc) && (oldctxt->myDoc->intSubset) &&
11760 (cur->type == XML_ELEMENT_NODE)) {
11761 oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
11762 oldctxt->myDoc, cur);
11763 }
11764#endif /* LIBXML_VALID_ENABLED */
11765 cur->parent = NULL;
11766 cur = cur->next;
11767 }
11768 ctxt->myDoc->children->children = NULL;
11769 }
11770 if (ctxt->myDoc != NULL) {
11771 xmlFreeNode(ctxt->myDoc->children);
11772 ctxt->myDoc->children = content;
11773 ctxt->myDoc->last = last;
11774 }
11775
11776 oldctxt->nbentities += ctxt->nbentities;
11777 ctxt->sax = oldsax;
11778 ctxt->dict = NULL;
11779 ctxt->attsDefault = NULL;
11780 ctxt->attsSpecial = NULL;
11781 xmlFreeParserCtxt(ctxt);
11782 if (newDoc != NULL) {
11783 xmlFreeDoc(newDoc);
11784 }
11785
11786 return(ret);
11787}
11788
11789/**
11790 * xmlParseInNodeContext:
11791 * @node: the context node
11792 * @data: the input string
11793 * @datalen: the input string length in bytes
11794 * @options: a combination of xmlParserOption
11795 * @lst: the return value for the set of parsed nodes
11796 *
11797 * Parse a well-balanced chunk of an XML document
11798 * within the context (DTD, namespaces, etc ...) of the given node.
11799 *
11800 * The allowed sequence for the data is a Well Balanced Chunk defined by
11801 * the content production in the XML grammar:
11802 *
11803 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
11804 *
11805 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
11806 * error code otherwise
11807 */
11808xmlParserErrors
11809xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
11810 int options, xmlNodePtr *lst) {
11811#ifdef SAX2
11812 xmlParserCtxtPtr ctxt;
11813 xmlDocPtr doc = NULL;
11814 xmlNodePtr fake, cur;
11815 int nsnr = 0;
11816
11817 xmlParserErrors ret = XML_ERR_OK;
11818
11819 /*
11820 * check all input parameters, grab the document
11821 */
11822 if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
11823 return(XML_ERR_INTERNAL_ERROR);
11824 switch (node->type) {
11825 case XML_ELEMENT_NODE:
11826 case XML_ATTRIBUTE_NODE:
11827 case XML_TEXT_NODE:
11828 case XML_CDATA_SECTION_NODE:
11829 case XML_ENTITY_REF_NODE:
11830 case XML_PI_NODE:
11831 case XML_COMMENT_NODE:
11832 case XML_DOCUMENT_NODE:
11833 case XML_HTML_DOCUMENT_NODE:
11834 break;
11835 default:
11836 return(XML_ERR_INTERNAL_ERROR);
11837
11838 }
11839 while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
11840 (node->type != XML_DOCUMENT_NODE) &&
11841 (node->type != XML_HTML_DOCUMENT_NODE))
11842 node = node->parent;
11843 if (node == NULL)
11844 return(XML_ERR_INTERNAL_ERROR);
11845 if (node->type == XML_ELEMENT_NODE)
11846 doc = node->doc;
11847 else
11848 doc = (xmlDocPtr) node;
11849 if (doc == NULL)
11850 return(XML_ERR_INTERNAL_ERROR);
11851
11852 /*
11853 * allocate a context and set-up everything not related to the
11854 * node position in the tree
11855 */
11856 if (doc->type == XML_DOCUMENT_NODE)
11857 ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
11858#ifdef LIBXML_HTML_ENABLED
11859 else if (doc->type == XML_HTML_DOCUMENT_NODE)
11860 ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
11861#endif
11862 else
11863 return(XML_ERR_INTERNAL_ERROR);
11864
11865 if (ctxt == NULL)
11866 return(XML_ERR_NO_MEMORY);
11867 fake = xmlNewComment(NULL);
11868 if (fake == NULL) {
11869 xmlFreeParserCtxt(ctxt);
11870 return(XML_ERR_NO_MEMORY);
11871 }
11872 xmlAddChild(node, fake);
11873
11874 /*
11875 * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
11876 * We need a dictionary for xmlDetectSAX2, so if there's no doc dict
11877 * we must wait until the last moment to free the original one.
11878 */
11879 if (doc->dict != NULL) {
11880 if (ctxt->dict != NULL)
11881 xmlDictFree(ctxt->dict);
11882 ctxt->dict = doc->dict;
11883 } else
11884 options |= XML_PARSE_NODICT;
11885
11886 xmlCtxtUseOptions(ctxt, options);
11887 xmlDetectSAX2(ctxt);
11888 ctxt->myDoc = doc;
11889
11890 if (node->type == XML_ELEMENT_NODE) {
11891 nodePush(ctxt, node);
11892 /*
11893 * initialize the SAX2 namespaces stack
11894 */
11895 cur = node;
11896 while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
11897 xmlNsPtr ns = cur->nsDef;
11898 const xmlChar *iprefix, *ihref;
11899
11900 while (ns != NULL) {
11901 if (ctxt->dict) {
11902 iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1);
11903 ihref = xmlDictLookup(ctxt->dict, ns->href, -1);
11904 } else {
11905 iprefix = ns->prefix;
11906 ihref = ns->href;
11907 }
11908
11909 if (xmlGetNamespace(ctxt, iprefix) == NULL) {
11910 nsPush(ctxt, iprefix, ihref);
11911 nsnr++;
11912 }
11913 ns = ns->next;
11914 }
11915 cur = cur->parent;
11916 }
11917 ctxt->instate = XML_PARSER_CONTENT;
11918 }
11919
11920 if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
11921 /*
11922 * ID/IDREF registration will be done in xmlValidateElement below
11923 */
11924 ctxt->loadsubset |= XML_SKIP_IDS;
11925 }
11926
11927#ifdef LIBXML_HTML_ENABLED
11928 if (doc->type == XML_HTML_DOCUMENT_NODE)
11929 __htmlParseContent(ctxt);
11930 else
11931#endif
11932 xmlParseContent(ctxt);
11933
11934 nsPop(ctxt, nsnr);
11935 if ((RAW == '<') && (NXT(1) == '/')) {
11936 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
11937 } else if (RAW != 0) {
11938 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
11939 }
11940 if ((ctxt->node != NULL) && (ctxt->node != node)) {
11941 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
11942 ctxt->wellFormed = 0;
11943 }
11944
11945 if (!ctxt->wellFormed) {
11946 if (ctxt->errNo == 0)
11947 ret = XML_ERR_INTERNAL_ERROR;
11948 else
11949 ret = (xmlParserErrors)ctxt->errNo;
11950 } else {
11951 ret = XML_ERR_OK;
11952 }
11953
11954 /*
11955 * Return the newly created nodeset after unlinking it from
11956 * the pseudo sibling.
11957 */
11958
11959 cur = fake->next;
11960 fake->next = NULL;
11961 node->last = fake;
11962
11963 if (cur != NULL) {
11964 cur->prev = NULL;
11965 }
11966
11967 *lst = cur;
11968
11969 while (cur != NULL) {
11970 cur->parent = NULL;
11971 cur = cur->next;
11972 }
11973
11974 xmlUnlinkNode(fake);
11975 xmlFreeNode(fake);
11976
11977
11978 if (ret != XML_ERR_OK) {
11979 xmlFreeNodeList(*lst);
11980 *lst = NULL;
11981 }
11982
11983 if (doc->dict != NULL)
11984 ctxt->dict = NULL;
11985 xmlFreeParserCtxt(ctxt);
11986
11987 return(ret);
11988#else /* !SAX2 */
11989 return(XML_ERR_INTERNAL_ERROR);
11990#endif
11991}
11992
11993#ifdef LIBXML_SAX1_ENABLED
11994/**
11995 * xmlParseBalancedChunkMemoryRecover:
11996 * @doc: the document the chunk pertains to
11997 * @sax: the SAX handler bloc (possibly NULL)
11998 * @user_data: The user data returned on SAX callbacks (possibly NULL)
11999 * @depth: Used for loop detection, use 0
12000 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
12001 * @lst: the return value for the set of parsed nodes
12002 * @recover: return nodes even if the data is broken (use 0)
12003 *
12004 *
12005 * Parse a well-balanced chunk of an XML document
12006 * called by the parser
12007 * The allowed sequence for the Well Balanced Chunk is the one defined by
12008 * the content production in the XML grammar:
12009 *
12010 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12011 *
12012 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
12013 * the parser error code otherwise
12014 *
12015 * In case recover is set to 1, the nodelist will not be empty even if
12016 * the parsed chunk is not well balanced.
12017 */
12018int
12019xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
12020 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
12021 int recover) {
12022 xmlParserCtxtPtr ctxt;
12023 xmlDocPtr newDoc;
12024 xmlSAXHandlerPtr oldsax = NULL;
12025 xmlNodePtr content, newRoot;
12026 int size;
12027 int ret = 0;
12028
12029 if (depth > 40) {
12030 return(XML_ERR_ENTITY_LOOP);
12031 }
12032
12033
12034 if (lst != NULL)
12035 *lst = NULL;
12036 if (string == NULL)
12037 return(-1);
12038
12039 size = xmlStrlen(string);
12040
12041 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
12042 if (ctxt == NULL) return(-1);
12043 ctxt->userData = ctxt;
12044 if (sax != NULL) {
12045 oldsax = ctxt->sax;
12046 ctxt->sax = sax;
12047 if (user_data != NULL)
12048 ctxt->userData = user_data;
12049 }
12050 newDoc = xmlNewDoc(BAD_CAST "1.0");
12051 if (newDoc == NULL) {
12052 xmlFreeParserCtxt(ctxt);
12053 return(-1);
12054 }
12055 if ((doc != NULL) && (doc->dict != NULL)) {
12056 xmlDictFree(ctxt->dict);
12057 ctxt->dict = doc->dict;
12058 xmlDictReference(ctxt->dict);
12059 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
12060 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
12061 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
12062 ctxt->dictNames = 1;
12063 } else {
12064 xmlCtxtUseOptions(ctxt, XML_PARSE_NODICT);
12065 }
12066 if (doc != NULL) {
12067 newDoc->intSubset = doc->intSubset;
12068 newDoc->extSubset = doc->extSubset;
12069 }
12070 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
12071 if (newRoot == NULL) {
12072 if (sax != NULL)
12073 ctxt->sax = oldsax;
12074 xmlFreeParserCtxt(ctxt);
12075 newDoc->intSubset = NULL;
12076 newDoc->extSubset = NULL;
12077 xmlFreeDoc(newDoc);
12078 return(-1);
12079 }
12080 xmlAddChild((xmlNodePtr) newDoc, newRoot);
12081 nodePush(ctxt, newRoot);
12082 if (doc == NULL) {
12083 ctxt->myDoc = newDoc;
12084 } else {
12085 ctxt->myDoc = newDoc;
12086 newDoc->children->doc = doc;
12087 /* Ensure that doc has XML spec namespace */
12088 xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE);
12089 newDoc->oldNs = doc->oldNs;
12090 }
12091 ctxt->instate = XML_PARSER_CONTENT;
12092 ctxt->depth = depth;
12093
12094 /*
12095 * Doing validity checking on chunk doesn't make sense
12096 */
12097 ctxt->validate = 0;
12098 ctxt->loadsubset = 0;
12099 xmlDetectSAX2(ctxt);
12100
12101 if ( doc != NULL ){
12102 content = doc->children;
12103 doc->children = NULL;
12104 xmlParseContent(ctxt);
12105 doc->children = content;
12106 }
12107 else {
12108 xmlParseContent(ctxt);
12109 }
12110 if ((RAW == '<') && (NXT(1) == '/')) {
12111 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12112 } else if (RAW != 0) {
12113 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
12114 }
12115 if (ctxt->node != newDoc->children) {
12116 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12117 }
12118
12119 if (!ctxt->wellFormed) {
12120 if (ctxt->errNo == 0)
12121 ret = 1;
12122 else
12123 ret = ctxt->errNo;
12124 } else {
12125 ret = 0;
12126 }
12127
12128 if ((lst != NULL) && ((ret == 0) || (recover == 1))) {
12129 xmlNodePtr cur;
12130
12131 /*
12132 * Return the newly created nodeset after unlinking it from
12133 * they pseudo parent.
12134 */
12135 cur = newDoc->children->children;
12136 *lst = cur;
12137 while (cur != NULL) {
12138 xmlSetTreeDoc(cur, doc);
12139 cur->parent = NULL;
12140 cur = cur->next;
12141 }
12142 newDoc->children->children = NULL;
12143 }
12144
12145 if (sax != NULL)
12146 ctxt->sax = oldsax;
12147 xmlFreeParserCtxt(ctxt);
12148 newDoc->intSubset = NULL;
12149 newDoc->extSubset = NULL;
12150 newDoc->oldNs = NULL;
12151 xmlFreeDoc(newDoc);
12152
12153 return(ret);
12154}
12155
12156/**
12157 * xmlSAXParseEntity:
12158 * @sax: the SAX handler block
12159 * @filename: the filename
12160 *
12161 * parse an XML external entity out of context and build a tree.
12162 * It use the given SAX function block to handle the parsing callback.
12163 * If sax is NULL, fallback to the default DOM tree building routines.
12164 *
12165 * [78] extParsedEnt ::= TextDecl? content
12166 *
12167 * This correspond to a "Well Balanced" chunk
12168 *
12169 * Returns the resulting document tree
12170 */
12171
12172xmlDocPtr
12173xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
12174 xmlDocPtr ret;
12175 xmlParserCtxtPtr ctxt;
12176
12177 ctxt = xmlCreateFileParserCtxt(filename);
12178 if (ctxt == NULL) {
12179 return(NULL);
12180 }
12181 if (sax != NULL) {
12182 if (ctxt->sax != NULL)
12183 xmlFree(ctxt->sax);
12184 ctxt->sax = sax;
12185 ctxt->userData = NULL;
12186 }
12187
12188 xmlParseExtParsedEnt(ctxt);
12189
12190 if (ctxt->wellFormed)
12191 ret = ctxt->myDoc;
12192 else {
12193 ret = NULL;
12194 xmlFreeDoc(ctxt->myDoc);
12195 ctxt->myDoc = NULL;
12196 }
12197 if (sax != NULL)
12198 ctxt->sax = NULL;
12199 xmlFreeParserCtxt(ctxt);
12200
12201 return(ret);
12202}
12203
12204/**
12205 * xmlParseEntity:
12206 * @filename: the filename
12207 *
12208 * parse an XML external entity out of context and build a tree.
12209 *
12210 * [78] extParsedEnt ::= TextDecl? content
12211 *
12212 * This correspond to a "Well Balanced" chunk
12213 *
12214 * Returns the resulting document tree
12215 */
12216
12217xmlDocPtr
12218xmlParseEntity(const char *filename) {
12219 return(xmlSAXParseEntity(NULL, filename));
12220}
12221#endif /* LIBXML_SAX1_ENABLED */
12222
12223/**
12224 * xmlCreateEntityParserCtxt:
12225 * @URL: the entity URL
12226 * @ID: the entity PUBLIC ID
12227 * @base: a possible base for the target URI
12228 *
12229 * Create a parser context for an external entity
12230 * Automatic support for ZLIB/Compress compressed document is provided
12231 * by default if found at compile-time.
12232 *
12233 * Returns the new parser context or NULL
12234 */
12235xmlParserCtxtPtr
12236xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
12237 const xmlChar *base) {
12238 xmlParserCtxtPtr ctxt;
12239 xmlParserInputPtr inputStream;
12240 char *directory = NULL;
12241 xmlChar *uri;
12242
12243 ctxt = xmlNewParserCtxt();
12244 if (ctxt == NULL) {
12245 return(NULL);
12246 }
12247
12248 uri = xmlBuildURI(URL, base);
12249
12250 if (uri == NULL) {
12251 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
12252 if (inputStream == NULL) {
12253 xmlFreeParserCtxt(ctxt);
12254 return(NULL);
12255 }
12256
12257 inputPush(ctxt, inputStream);
12258
12259 if ((ctxt->directory == NULL) && (directory == NULL))
12260 directory = xmlParserGetDirectory((char *)URL);
12261 if ((ctxt->directory == NULL) && (directory != NULL))
12262 ctxt->directory = directory;
12263 } else {
12264 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
12265 if (inputStream == NULL) {
12266 xmlFree(uri);
12267 xmlFreeParserCtxt(ctxt);
12268 return(NULL);
12269 }
12270
12271 inputPush(ctxt, inputStream);
12272
12273 if ((ctxt->directory == NULL) && (directory == NULL))
12274 directory = xmlParserGetDirectory((char *)uri);
12275 if ((ctxt->directory == NULL) && (directory != NULL))
12276 ctxt->directory = directory;
12277 xmlFree(uri);
12278 }
12279 return(ctxt);
12280}
12281
12282/************************************************************************
12283 * *
12284 * Front ends when parsing from a file *
12285 * *
12286 ************************************************************************/
12287
12288/**
12289 * xmlCreateURLParserCtxt:
12290 * @filename: the filename or URL
12291 * @options: a combination of xmlParserOption
12292 *
12293 * Create a parser context for a file or URL content.
12294 * Automatic support for ZLIB/Compress compressed document is provided
12295 * by default if found at compile-time and for file accesses
12296 *
12297 * Returns the new parser context or NULL
12298 */
12299xmlParserCtxtPtr
12300xmlCreateURLParserCtxt(const char *filename, int options)
12301{
12302 xmlParserCtxtPtr ctxt;
12303 xmlParserInputPtr inputStream;
12304 char *directory = NULL;
12305
12306 ctxt = xmlNewParserCtxt();
12307 if (ctxt == NULL) {
12308 xmlErrMemory(NULL, "cannot allocate parser context");
12309 return(NULL);
12310 }
12311
12312 if (options)
12313 xmlCtxtUseOptions(ctxt, options);
12314 ctxt->linenumbers = 1;
12315
12316 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
12317 if (inputStream == NULL) {
12318 xmlFreeParserCtxt(ctxt);
12319 return(NULL);
12320 }
12321
12322 inputPush(ctxt, inputStream);
12323 if ((ctxt->directory == NULL) && (directory == NULL))
12324 directory = xmlParserGetDirectory(filename);
12325 if ((ctxt->directory == NULL) && (directory != NULL))
12326 ctxt->directory = directory;
12327
12328 return(ctxt);
12329}
12330
12331/**
12332 * xmlCreateFileParserCtxt:
12333 * @filename: the filename
12334 *
12335 * Create a parser context for a file content.
12336 * Automatic support for ZLIB/Compress compressed document is provided
12337 * by default if found at compile-time.
12338 *
12339 * Returns the new parser context or NULL
12340 */
12341xmlParserCtxtPtr
12342xmlCreateFileParserCtxt(const char *filename)
12343{
12344 return(xmlCreateURLParserCtxt(filename, 0));
12345}
12346
12347#ifdef LIBXML_SAX1_ENABLED
12348/**
12349 * xmlSAXParseFileWithData:
12350 * @sax: the SAX handler block
12351 * @filename: the filename
12352 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
12353 * documents
12354 * @data: the userdata
12355 *
12356 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12357 * compressed document is provided by default if found at compile-time.
12358 * It use the given SAX function block to handle the parsing callback.
12359 * If sax is NULL, fallback to the default DOM tree building routines.
12360 *
12361 * User data (void *) is stored within the parser context in the
12362 * context's _private member, so it is available nearly everywhere in libxml
12363 *
12364 * Returns the resulting document tree
12365 */
12366
12367xmlDocPtr
12368xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
12369 int recovery, void *data) {
12370 xmlDocPtr ret;
12371 xmlParserCtxtPtr ctxt;
12372 char *directory = NULL;
12373
12374 xmlInitParser();
12375
12376 ctxt = xmlCreateFileParserCtxt(filename);
12377 if (ctxt == NULL) {
12378 return(NULL);
12379 }
12380 if (sax != NULL) {
12381 if (ctxt->sax != NULL)
12382 xmlFree(ctxt->sax);
12383 ctxt->sax = sax;
12384 }
12385 xmlDetectSAX2(ctxt);
12386 if (data!=NULL) {
12387 ctxt->_private = data;
12388 }
12389
12390 if ((ctxt->directory == NULL) && (directory == NULL))
12391 directory = xmlParserGetDirectory(filename);
12392 if ((ctxt->directory == NULL) && (directory != NULL))
12393 ctxt->directory = (char *) xmlStrdup((xmlChar *) directory);
12394
12395 ctxt->recovery = recovery;
12396
12397 xmlParseDocument(ctxt);
12398
12399 if ((ctxt->wellFormed) || recovery) {
12400 ret = ctxt->myDoc;
12401 if (ret != NULL) {
12402 if (ctxt->input->buf->compressed > 0)
12403 ret->compression = 9;
12404 else
12405 ret->compression = ctxt->input->buf->compressed;
12406 }
12407 }
12408 else {
12409 ret = NULL;
12410 xmlFreeDoc(ctxt->myDoc);
12411 ctxt->myDoc = NULL;
12412 }
12413 if (sax != NULL)
12414 ctxt->sax = NULL;
12415 xmlFreeParserCtxt(ctxt);
12416
12417 return(ret);
12418}
12419
12420/**
12421 * xmlSAXParseFile:
12422 * @sax: the SAX handler block
12423 * @filename: the filename
12424 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
12425 * documents
12426 *
12427 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12428 * compressed document is provided by default if found at compile-time.
12429 * It use the given SAX function block to handle the parsing callback.
12430 * If sax is NULL, fallback to the default DOM tree building routines.
12431 *
12432 * Returns the resulting document tree
12433 */
12434
12435xmlDocPtr
12436xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
12437 int recovery) {
12438 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
12439}
12440
12441/**
12442 * xmlRecoverDoc:
12443 * @cur: a pointer to an array of xmlChar
12444 *
12445 * parse an XML in-memory document and build a tree.
12446 * In the case the document is not Well Formed, a tree is built anyway
12447 *
12448 * Returns the resulting document tree
12449 */
12450
12451xmlDocPtr
12452xmlRecoverDoc(xmlChar *cur) {
12453 return(xmlSAXParseDoc(NULL, cur, 1));
12454}
12455
12456/**
12457 * xmlParseFile:
12458 * @filename: the filename
12459 *
12460 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12461 * compressed document is provided by default if found at compile-time.
12462 *
12463 * Returns the resulting document tree if the file was wellformed,
12464 * NULL otherwise.
12465 */
12466
12467xmlDocPtr
12468xmlParseFile(const char *filename) {
12469 return(xmlSAXParseFile(NULL, filename, 0));
12470}
12471
12472/**
12473 * xmlRecoverFile:
12474 * @filename: the filename
12475 *
12476 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12477 * compressed document is provided by default if found at compile-time.
12478 * In the case the document is not Well Formed, a tree is built anyway
12479 *
12480 * Returns the resulting document tree
12481 */
12482
12483xmlDocPtr
12484xmlRecoverFile(const char *filename) {
12485 return(xmlSAXParseFile(NULL, filename, 1));
12486}
12487
12488
12489/**
12490 * xmlSetupParserForBuffer:
12491 * @ctxt: an XML parser context
12492 * @buffer: a xmlChar * buffer
12493 * @filename: a file name
12494 *
12495 * Setup the parser context to parse a new buffer; Clears any prior
12496 * contents from the parser context. The buffer parameter must not be
12497 * NULL, but the filename parameter can be
12498 */
12499void
12500xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
12501 const char* filename)
12502{
12503 xmlParserInputPtr input;
12504
12505 if ((ctxt == NULL) || (buffer == NULL))
12506 return;
12507
12508 input = xmlNewInputStream(ctxt);
12509 if (input == NULL) {
12510 xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
12511 xmlClearParserCtxt(ctxt);
12512 return;
12513 }
12514
12515 xmlClearParserCtxt(ctxt);
12516 if (filename != NULL)
12517 input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
12518 input->base = buffer;
12519 input->cur = buffer;
12520 input->end = &buffer[xmlStrlen(buffer)];
12521 inputPush(ctxt, input);
12522}
12523
12524/**
12525 * xmlSAXUserParseFile:
12526 * @sax: a SAX handler
12527 * @user_data: The user data returned on SAX callbacks
12528 * @filename: a file name
12529 *
12530 * parse an XML file and call the given SAX handler routines.
12531 * Automatic support for ZLIB/Compress compressed document is provided
12532 *
12533 * Returns 0 in case of success or a error number otherwise
12534 */
12535int
12536xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
12537 const char *filename) {
12538 int ret = 0;
12539 xmlParserCtxtPtr ctxt;
12540
12541 ctxt = xmlCreateFileParserCtxt(filename);
12542 if (ctxt == NULL) return -1;
12543 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
12544 xmlFree(ctxt->sax);
12545 ctxt->sax = sax;
12546 xmlDetectSAX2(ctxt);
12547
12548 if (user_data != NULL)
12549 ctxt->userData = user_data;
12550
12551 xmlParseDocument(ctxt);
12552
12553 if (ctxt->wellFormed)
12554 ret = 0;
12555 else {
12556 if (ctxt->errNo != 0)
12557 ret = ctxt->errNo;
12558 else
12559 ret = -1;
12560 }
12561 if (sax != NULL)
12562 ctxt->sax = NULL;
12563 if (ctxt->myDoc != NULL) {
12564 xmlFreeDoc(ctxt->myDoc);
12565 ctxt->myDoc = NULL;
12566 }
12567 xmlFreeParserCtxt(ctxt);
12568
12569 return ret;
12570}
12571#endif /* LIBXML_SAX1_ENABLED */
12572
12573/************************************************************************
12574 * *
12575 * Front ends when parsing from memory *
12576 * *
12577 ************************************************************************/
12578
12579/**
12580 * xmlCreateMemoryParserCtxt:
12581 * @buffer: a pointer to a char array
12582 * @size: the size of the array
12583 *
12584 * Create a parser context for an XML in-memory document.
12585 *
12586 * Returns the new parser context or NULL
12587 */
12588xmlParserCtxtPtr
12589xmlCreateMemoryParserCtxt(const char *buffer, int size) {
12590 xmlParserCtxtPtr ctxt;
12591 xmlParserInputPtr input;
12592 xmlParserInputBufferPtr buf;
12593
12594 if (buffer == NULL)
12595 return(NULL);
12596 if (size <= 0)
12597 return(NULL);
12598
12599 ctxt = xmlNewParserCtxt();
12600 if (ctxt == NULL)
12601 return(NULL);
12602
12603 /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */
12604 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
12605 if (buf == NULL) {
12606 xmlFreeParserCtxt(ctxt);
12607 return(NULL);
12608 }
12609
12610 input = xmlNewInputStream(ctxt);
12611 if (input == NULL) {
12612 xmlFreeParserInputBuffer(buf);
12613 xmlFreeParserCtxt(ctxt);
12614 return(NULL);
12615 }
12616
12617 input->filename = NULL;
12618 input->buf = buf;
12619 input->base = input->buf->buffer->content;
12620 input->cur = input->buf->buffer->content;
12621 input->end = &input->buf->buffer->content[input->buf->buffer->use];
12622
12623 inputPush(ctxt, input);
12624 return(ctxt);
12625}
12626
12627#ifdef LIBXML_SAX1_ENABLED
12628/**
12629 * xmlSAXParseMemoryWithData:
12630 * @sax: the SAX handler block
12631 * @buffer: an pointer to a char array
12632 * @size: the size of the array
12633 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
12634 * documents
12635 * @data: the userdata
12636 *
12637 * parse an XML in-memory block and use the given SAX function block
12638 * to handle the parsing callback. If sax is NULL, fallback to the default
12639 * DOM tree building routines.
12640 *
12641 * User data (void *) is stored within the parser context in the
12642 * context's _private member, so it is available nearly everywhere in libxml
12643 *
12644 * Returns the resulting document tree
12645 */
12646
12647xmlDocPtr
12648xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
12649 int size, int recovery, void *data) {
12650 xmlDocPtr ret;
12651 xmlParserCtxtPtr ctxt;
12652
12653 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
12654 if (ctxt == NULL) return(NULL);
12655 if (sax != NULL) {
12656 if (ctxt->sax != NULL)
12657 xmlFree(ctxt->sax);
12658 ctxt->sax = sax;
12659 }
12660 xmlDetectSAX2(ctxt);
12661 if (data!=NULL) {
12662 ctxt->_private=data;
12663 }
12664
12665 ctxt->recovery = recovery;
12666
12667 xmlParseDocument(ctxt);
12668
12669 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
12670 else {
12671 ret = NULL;
12672 xmlFreeDoc(ctxt->myDoc);
12673 ctxt->myDoc = NULL;
12674 }
12675 if (sax != NULL)
12676 ctxt->sax = NULL;
12677 xmlFreeParserCtxt(ctxt);
12678
12679 return(ret);
12680}
12681
12682/**
12683 * xmlSAXParseMemory:
12684 * @sax: the SAX handler block
12685 * @buffer: an pointer to a char array
12686 * @size: the size of the array
12687 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
12688 * documents
12689 *
12690 * parse an XML in-memory block and use the given SAX function block
12691 * to handle the parsing callback. If sax is NULL, fallback to the default
12692 * DOM tree building routines.
12693 *
12694 * Returns the resulting document tree
12695 */
12696xmlDocPtr
12697xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
12698 int size, int recovery) {
12699 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
12700}
12701
12702/**
12703 * xmlParseMemory:
12704 * @buffer: an pointer to a char array
12705 * @size: the size of the array
12706 *
12707 * parse an XML in-memory block and build a tree.
12708 *
12709 * Returns the resulting document tree
12710 */
12711
12712xmlDocPtr xmlParseMemory(const char *buffer, int size) {
12713 return(xmlSAXParseMemory(NULL, buffer, size, 0));
12714}
12715
12716/**
12717 * xmlRecoverMemory:
12718 * @buffer: an pointer to a char array
12719 * @size: the size of the array
12720 *
12721 * parse an XML in-memory block and build a tree.
12722 * In the case the document is not Well Formed, a tree is built anyway
12723 *
12724 * Returns the resulting document tree
12725 */
12726
12727xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
12728 return(xmlSAXParseMemory(NULL, buffer, size, 1));
12729}
12730
12731/**
12732 * xmlSAXUserParseMemory:
12733 * @sax: a SAX handler
12734 * @user_data: The user data returned on SAX callbacks
12735 * @buffer: an in-memory XML document input
12736 * @size: the length of the XML document in bytes
12737 *
12738 * A better SAX parsing routine.
12739 * parse an XML in-memory buffer and call the given SAX handler routines.
12740 *
12741 * Returns 0 in case of success or a error number otherwise
12742 */
12743int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
12744 const char *buffer, int size) {
12745 int ret = 0;
12746 xmlParserCtxtPtr ctxt;
12747
12748 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
12749 if (ctxt == NULL) return -1;
12750 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
12751 xmlFree(ctxt->sax);
12752 ctxt->sax = sax;
12753 xmlDetectSAX2(ctxt);
12754
12755 if (user_data != NULL)
12756 ctxt->userData = user_data;
12757
12758 xmlParseDocument(ctxt);
12759
12760 if (ctxt->wellFormed)
12761 ret = 0;
12762 else {
12763 if (ctxt->errNo != 0)
12764 ret = ctxt->errNo;
12765 else
12766 ret = -1;
12767 }
12768 if (sax != NULL)
12769 ctxt->sax = NULL;
12770 if (ctxt->myDoc != NULL) {
12771 xmlFreeDoc(ctxt->myDoc);
12772 ctxt->myDoc = NULL;
12773 }
12774 xmlFreeParserCtxt(ctxt);
12775
12776 return ret;
12777}
12778#endif /* LIBXML_SAX1_ENABLED */
12779
12780/**
12781 * xmlCreateDocParserCtxt:
12782 * @cur: a pointer to an array of xmlChar
12783 *
12784 * Creates a parser context for an XML in-memory document.
12785 *
12786 * Returns the new parser context or NULL
12787 */
12788xmlParserCtxtPtr
12789xmlCreateDocParserCtxt(const xmlChar *cur) {
12790 int len;
12791
12792 if (cur == NULL)
12793 return(NULL);
12794 len = xmlStrlen(cur);
12795 return(xmlCreateMemoryParserCtxt((const char *)cur, len));
12796}
12797
12798#ifdef LIBXML_SAX1_ENABLED
12799/**
12800 * xmlSAXParseDoc:
12801 * @sax: the SAX handler block
12802 * @cur: a pointer to an array of xmlChar
12803 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
12804 * documents
12805 *
12806 * parse an XML in-memory document and build a tree.
12807 * It use the given SAX function block to handle the parsing callback.
12808 * If sax is NULL, fallback to the default DOM tree building routines.
12809 *
12810 * Returns the resulting document tree
12811 */
12812
12813xmlDocPtr
12814xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
12815 xmlDocPtr ret;
12816 xmlParserCtxtPtr ctxt;
12817 xmlSAXHandlerPtr oldsax = NULL;
12818
12819 if (cur == NULL) return(NULL);
12820
12821
12822 ctxt = xmlCreateDocParserCtxt(cur);
12823 if (ctxt == NULL) return(NULL);
12824 if (sax != NULL) {
12825 oldsax = ctxt->sax;
12826 ctxt->sax = sax;
12827 ctxt->userData = NULL;
12828 }
12829 xmlDetectSAX2(ctxt);
12830
12831 xmlParseDocument(ctxt);
12832 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
12833 else {
12834 ret = NULL;
12835 xmlFreeDoc(ctxt->myDoc);
12836 ctxt->myDoc = NULL;
12837 }
12838 if (sax != NULL)
12839 ctxt->sax = oldsax;
12840 xmlFreeParserCtxt(ctxt);
12841
12842 return(ret);
12843}
12844
12845/**
12846 * xmlParseDoc:
12847 * @cur: a pointer to an array of xmlChar
12848 *
12849 * parse an XML in-memory document and build a tree.
12850 *
12851 * Returns the resulting document tree
12852 */
12853
12854xmlDocPtr
12855xmlParseDoc(const xmlChar *cur) {
12856 return(xmlSAXParseDoc(NULL, cur, 0));
12857}
12858#endif /* LIBXML_SAX1_ENABLED */
12859
12860#ifdef LIBXML_LEGACY_ENABLED
12861/************************************************************************
12862 * *
12863 * Specific function to keep track of entities references *
12864 * and used by the XSLT debugger *
12865 * *
12866 ************************************************************************/
12867
12868static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
12869
12870/**
12871 * xmlAddEntityReference:
12872 * @ent : A valid entity
12873 * @firstNode : A valid first node for children of entity
12874 * @lastNode : A valid last node of children entity
12875 *
12876 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
12877 */
12878static void
12879xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
12880 xmlNodePtr lastNode)
12881{
12882 if (xmlEntityRefFunc != NULL) {
12883 (*xmlEntityRefFunc) (ent, firstNode, lastNode);
12884 }
12885}
12886
12887
12888/**
12889 * xmlSetEntityReferenceFunc:
12890 * @func: A valid function
12891 *
12892 * Set the function to call call back when a xml reference has been made
12893 */
12894void
12895xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
12896{
12897 xmlEntityRefFunc = func;
12898}
12899#endif /* LIBXML_LEGACY_ENABLED */
12900
12901/************************************************************************
12902 * *
12903 * Miscellaneous *
12904 * *
12905 ************************************************************************/
12906
12907#ifdef LIBXML_XPATH_ENABLED
12908#include <libxml/xpath.h>
12909#endif
12910
12911extern void XMLCDECL xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
12912static int xmlParserInitialized = 0;
12913
12914/**
12915 * xmlInitParser:
12916 *
12917 * Initialization function for the XML parser.
12918 * This is not reentrant. Call once before processing in case of
12919 * use in multithreaded programs.
12920 */
12921
12922void
12923xmlInitParser(void) {
12924 if (xmlParserInitialized != 0)
12925 return;
12926
12927#ifdef LIBXML_THREAD_ENABLED
12928 __xmlGlobalInitMutexLock();
12929 if (xmlParserInitialized == 0) {
12930#endif
12931 if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
12932 (xmlGenericError == NULL))
12933 initGenericErrorDefaultFunc(NULL);
12934 xmlInitGlobals();
12935 xmlInitThreads();
12936 xmlInitMemory();
12937 xmlInitCharEncodingHandlers();
12938 xmlDefaultSAXHandlerInit();
12939 xmlRegisterDefaultInputCallbacks();
12940#ifdef LIBXML_OUTPUT_ENABLED
12941 xmlRegisterDefaultOutputCallbacks();
12942#endif /* LIBXML_OUTPUT_ENABLED */
12943#ifdef LIBXML_HTML_ENABLED
12944 htmlInitAutoClose();
12945 htmlDefaultSAXHandlerInit();
12946#endif
12947#ifdef LIBXML_XPATH_ENABLED
12948 xmlXPathInit();
12949#endif
12950 xmlParserInitialized = 1;
12951#ifdef LIBXML_THREAD_ENABLED
12952 }
12953 __xmlGlobalInitMutexUnlock();
12954#endif
12955}
12956
12957/**
12958 * xmlCleanupParser:
12959 *
12960 * Cleanup function for the XML library. It tries to reclaim all
12961 * parsing related global memory allocated for the library processing.
12962 * It doesn't deallocate any document related memory. Calling this
12963 * function should not prevent reusing the library but one should
12964 * call xmlCleanupParser() only when the process has
12965 * finished using the library or XML document built with it.
12966 */
12967
12968void
12969xmlCleanupParser(void) {
12970 if (!xmlParserInitialized)
12971 return;
12972
12973 xmlCleanupCharEncodingHandlers();
12974#ifdef LIBXML_CATALOG_ENABLED
12975 xmlCatalogCleanup();
12976#endif
12977 xmlDictCleanup();
12978 xmlCleanupInputCallbacks();
12979#ifdef LIBXML_OUTPUT_ENABLED
12980 xmlCleanupOutputCallbacks();
12981#endif
12982#ifdef LIBXML_SCHEMAS_ENABLED
12983 xmlSchemaCleanupTypes();
12984 xmlRelaxNGCleanupTypes();
12985#endif
12986 xmlCleanupGlobals();
12987 xmlResetLastError();
12988 xmlCleanupThreads(); /* must be last if called not from the main thread */
12989 xmlCleanupMemory();
12990 xmlParserInitialized = 0;
12991}
12992
12993/************************************************************************
12994 * *
12995 * New set (2.6.0) of simpler and more flexible APIs *
12996 * *
12997 ************************************************************************/
12998
12999/**
13000 * DICT_FREE:
13001 * @str: a string
13002 *
13003 * Free a string if it is not owned by the "dict" dictionnary in the
13004 * current scope
13005 */
13006#define DICT_FREE(str) \
13007 if ((str) && ((!dict) || \
13008 (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
13009 xmlFree((char *)(str));
13010
13011/**
13012 * xmlCtxtReset:
13013 * @ctxt: an XML parser context
13014 *
13015 * Reset a parser context
13016 */
13017void
13018xmlCtxtReset(xmlParserCtxtPtr ctxt)
13019{
13020 xmlParserInputPtr input;
13021 xmlDictPtr dict;
13022
13023 if (ctxt == NULL)
13024 return;
13025
13026 dict = ctxt->dict;
13027
13028 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
13029 xmlFreeInputStream(input);
13030 }
13031 ctxt->inputNr = 0;
13032 ctxt->input = NULL;
13033
13034 ctxt->spaceNr = 0;
13035 if (ctxt->spaceTab != NULL) {
13036 ctxt->spaceTab[0] = -1;
13037 ctxt->space = &ctxt->spaceTab[0];
13038 } else {
13039 ctxt->space = NULL;
13040 }
13041
13042
13043 ctxt->nodeNr = 0;
13044 ctxt->node = NULL;
13045
13046 ctxt->nameNr = 0;
13047 ctxt->name = NULL;
13048
13049 DICT_FREE(ctxt->version);
13050 ctxt->version = NULL;
13051 DICT_FREE(ctxt->encoding);
13052 ctxt->encoding = NULL;
13053 DICT_FREE(ctxt->directory);
13054 ctxt->directory = NULL;
13055 DICT_FREE(ctxt->extSubURI);
13056 ctxt->extSubURI = NULL;
13057 DICT_FREE(ctxt->extSubSystem);
13058 ctxt->extSubSystem = NULL;
13059 if (ctxt->myDoc != NULL)
13060 xmlFreeDoc(ctxt->myDoc);
13061 ctxt->myDoc = NULL;
13062
13063 ctxt->standalone = -1;
13064 ctxt->hasExternalSubset = 0;
13065 ctxt->hasPErefs = 0;
13066 ctxt->html = 0;
13067 ctxt->external = 0;
13068 ctxt->instate = XML_PARSER_START;
13069 ctxt->token = 0;
13070
13071 ctxt->wellFormed = 1;
13072 ctxt->nsWellFormed = 1;
13073 ctxt->disableSAX = 0;
13074 ctxt->valid = 1;
13075#if 0
13076 ctxt->vctxt.userData = ctxt;
13077 ctxt->vctxt.error = xmlParserValidityError;
13078 ctxt->vctxt.warning = xmlParserValidityWarning;
13079#endif
13080 ctxt->record_info = 0;
13081 ctxt->nbChars = 0;
13082 ctxt->checkIndex = 0;
13083 ctxt->inSubset = 0;
13084 ctxt->errNo = XML_ERR_OK;
13085 ctxt->depth = 0;
13086 ctxt->charset = XML_CHAR_ENCODING_UTF8;
13087 ctxt->catalogs = NULL;
13088 ctxt->nbentities = 0;
13089 xmlInitNodeInfoSeq(&ctxt->node_seq);
13090
13091 if (ctxt->attsDefault != NULL) {
13092 xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree);
13093 ctxt->attsDefault = NULL;
13094 }
13095 if (ctxt->attsSpecial != NULL) {
13096 xmlHashFree(ctxt->attsSpecial, NULL);
13097 ctxt->attsSpecial = NULL;
13098 }
13099
13100#ifdef LIBXML_CATALOG_ENABLED
13101 if (ctxt->catalogs != NULL)
13102 xmlCatalogFreeLocal(ctxt->catalogs);
13103#endif
13104 if (ctxt->lastError.code != XML_ERR_OK)
13105 xmlResetError(&ctxt->lastError);
13106}
13107
13108/**
13109 * xmlCtxtResetPush:
13110 * @ctxt: an XML parser context
13111 * @chunk: a pointer to an array of chars
13112 * @size: number of chars in the array
13113 * @filename: an optional file name or URI
13114 * @encoding: the document encoding, or NULL
13115 *
13116 * Reset a push parser context
13117 *
13118 * Returns 0 in case of success and 1 in case of error
13119 */
13120int
13121xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
13122 int size, const char *filename, const char *encoding)
13123{
13124 xmlParserInputPtr inputStream;
13125 xmlParserInputBufferPtr buf;
13126 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
13127
13128 if (ctxt == NULL)
13129 return(1);
13130
13131 if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
13132 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
13133
13134 buf = xmlAllocParserInputBuffer(enc);
13135 if (buf == NULL)
13136 return(1);
13137
13138 if (ctxt == NULL) {
13139 xmlFreeParserInputBuffer(buf);
13140 return(1);
13141 }
13142
13143 xmlCtxtReset(ctxt);
13144
13145 if (ctxt->pushTab == NULL) {
13146 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 *
13147 sizeof(xmlChar *));
13148 if (ctxt->pushTab == NULL) {
13149 xmlErrMemory(ctxt, NULL);
13150 xmlFreeParserInputBuffer(buf);
13151 return(1);
13152 }
13153 }
13154
13155 if (filename == NULL) {
13156 ctxt->directory = NULL;
13157 } else {
13158 ctxt->directory = xmlParserGetDirectory(filename);
13159 }
13160
13161 inputStream = xmlNewInputStream(ctxt);
13162 if (inputStream == NULL) {
13163 xmlFreeParserInputBuffer(buf);
13164 return(1);
13165 }
13166
13167 if (filename == NULL)
13168 inputStream->filename = NULL;
13169 else
13170 inputStream->filename = (char *)
13171 xmlCanonicPath((const xmlChar *) filename);
13172 inputStream->buf = buf;
13173 inputStream->base = inputStream->buf->buffer->content;
13174 inputStream->cur = inputStream->buf->buffer->content;
13175 inputStream->end =
13176 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
13177
13178 inputPush(ctxt, inputStream);
13179
13180 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
13181 (ctxt->input->buf != NULL)) {
13182 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
13183 int cur = ctxt->input->cur - ctxt->input->base;
13184
13185 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
13186
13187 ctxt->input->base = ctxt->input->buf->buffer->content + base;
13188 ctxt->input->cur = ctxt->input->base + cur;
13189 ctxt->input->end =
13190 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->
13191 use];
13192#ifdef DEBUG_PUSH
13193 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
13194#endif
13195 }
13196
13197 if (encoding != NULL) {
13198 xmlCharEncodingHandlerPtr hdlr;
13199
13200 hdlr = xmlFindCharEncodingHandler(encoding);
13201 if (hdlr != NULL) {
13202 xmlSwitchToEncoding(ctxt, hdlr);
13203 } else {
13204 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
13205 "Unsupported encoding %s\n", BAD_CAST encoding);
13206 }
13207 } else if (enc != XML_CHAR_ENCODING_NONE) {
13208 xmlSwitchEncoding(ctxt, enc);
13209 }
13210
13211 return(0);
13212}
13213
13214/**
13215 * xmlCtxtUseOptions:
13216 * @ctxt: an XML parser context
13217 * @options: a combination of xmlParserOption
13218 *
13219 * Applies the options to the parser context
13220 *
13221 * Returns 0 in case of success, the set of unknown or unimplemented options
13222 * in case of error.
13223 */
13224int
13225xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
13226{
13227 if (ctxt == NULL)
13228 return(-1);
13229 if (options & XML_PARSE_RECOVER) {
13230 ctxt->recovery = 1;
13231 options -= XML_PARSE_RECOVER;
13232 } else
13233 ctxt->recovery = 0;
13234 if (options & XML_PARSE_DTDLOAD) {
13235 ctxt->loadsubset = XML_DETECT_IDS;
13236 options -= XML_PARSE_DTDLOAD;
13237 } else
13238 ctxt->loadsubset = 0;
13239 if (options & XML_PARSE_DTDATTR) {
13240 ctxt->loadsubset |= XML_COMPLETE_ATTRS;
13241 options -= XML_PARSE_DTDATTR;
13242 }
13243 if (options & XML_PARSE_NOENT) {
13244 ctxt->replaceEntities = 1;
13245 /* ctxt->loadsubset |= XML_DETECT_IDS; */
13246 options -= XML_PARSE_NOENT;
13247 } else
13248 ctxt->replaceEntities = 0;
13249 if (options & XML_PARSE_PEDANTIC) {
13250 ctxt->pedantic = 1;
13251 options -= XML_PARSE_PEDANTIC;
13252 } else
13253 ctxt->pedantic = 0;
13254 if (options & XML_PARSE_NOBLANKS) {
13255 ctxt->keepBlanks = 0;
13256 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
13257 options -= XML_PARSE_NOBLANKS;
13258 } else
13259 ctxt->keepBlanks = 1;
13260 if (options & XML_PARSE_DTDVALID) {
13261 ctxt->validate = 1;
13262 if (options & XML_PARSE_NOWARNING)
13263 ctxt->vctxt.warning = NULL;
13264 if (options & XML_PARSE_NOERROR)
13265 ctxt->vctxt.error = NULL;
13266 options -= XML_PARSE_DTDVALID;
13267 } else
13268 ctxt->validate = 0;
13269 if (options & XML_PARSE_NOWARNING) {
13270 ctxt->sax->warning = NULL;
13271 options -= XML_PARSE_NOWARNING;
13272 }
13273 if (options & XML_PARSE_NOERROR) {
13274 ctxt->sax->error = NULL;
13275 ctxt->sax->fatalError = NULL;
13276 options -= XML_PARSE_NOERROR;
13277 }
13278#ifdef LIBXML_SAX1_ENABLED
13279 if (options & XML_PARSE_SAX1) {
13280 ctxt->sax->startElement = xmlSAX2StartElement;
13281 ctxt->sax->endElement = xmlSAX2EndElement;
13282 ctxt->sax->startElementNs = NULL;
13283 ctxt->sax->endElementNs = NULL;
13284 ctxt->sax->initialized = 1;
13285 options -= XML_PARSE_SAX1;
13286 }
13287#endif /* LIBXML_SAX1_ENABLED */
13288 if (options & XML_PARSE_NODICT) {
13289 ctxt->dictNames = 0;
13290 options -= XML_PARSE_NODICT;
13291 } else {
13292 ctxt->dictNames = 1;
13293 }
13294 if (options & XML_PARSE_NOCDATA) {
13295 ctxt->sax->cdataBlock = NULL;
13296 options -= XML_PARSE_NOCDATA;
13297 }
13298 if (options & XML_PARSE_NSCLEAN) {
13299 ctxt->options |= XML_PARSE_NSCLEAN;
13300 options -= XML_PARSE_NSCLEAN;
13301 }
13302 if (options & XML_PARSE_NONET) {
13303 ctxt->options |= XML_PARSE_NONET;
13304 options -= XML_PARSE_NONET;
13305 }
13306 if (options & XML_PARSE_COMPACT) {
13307 ctxt->options |= XML_PARSE_COMPACT;
13308 options -= XML_PARSE_COMPACT;
13309 }
13310 ctxt->linenumbers = 1;
13311 return (options);
13312}
13313
13314/**
13315 * xmlDoRead:
13316 * @ctxt: an XML parser context
13317 * @URL: the base URL to use for the document
13318 * @encoding: the document encoding, or NULL
13319 * @options: a combination of xmlParserOption
13320 * @reuse: keep the context for reuse
13321 *
13322 * Common front-end for the xmlRead functions
13323 *
13324 * Returns the resulting document tree or NULL
13325 */
13326static xmlDocPtr
13327xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
13328 int options, int reuse)
13329{
13330 xmlDocPtr ret;
13331
13332 xmlCtxtUseOptions(ctxt, options);
13333 if (encoding != NULL) {
13334 xmlCharEncodingHandlerPtr hdlr;
13335
13336 hdlr = xmlFindCharEncodingHandler(encoding);
13337 if (hdlr != NULL)
13338 xmlSwitchToEncoding(ctxt, hdlr);
13339 }
13340 if ((URL != NULL) && (ctxt->input != NULL) &&
13341 (ctxt->input->filename == NULL))
13342 ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
13343 xmlParseDocument(ctxt);
13344 if ((ctxt->wellFormed) || ctxt->recovery)
13345 ret = ctxt->myDoc;
13346 else {
13347 ret = NULL;
13348 if (ctxt->myDoc != NULL) {
13349 xmlFreeDoc(ctxt->myDoc);
13350 }
13351 }
13352 ctxt->myDoc = NULL;
13353 if (!reuse) {
13354 xmlFreeParserCtxt(ctxt);
13355 }
13356
13357 return (ret);
13358}
13359
13360/**
13361 * xmlReadDoc:
13362 * @cur: a pointer to a zero terminated string
13363 * @URL: the base URL to use for the document
13364 * @encoding: the document encoding, or NULL
13365 * @options: a combination of xmlParserOption
13366 *
13367 * parse an XML in-memory document and build a tree.
13368 *
13369 * Returns the resulting document tree
13370 */
13371xmlDocPtr
13372xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
13373{
13374 xmlParserCtxtPtr ctxt;
13375
13376 if (cur == NULL)
13377 return (NULL);
13378
13379 ctxt = xmlCreateDocParserCtxt(cur);
13380 if (ctxt == NULL)
13381 return (NULL);
13382 return (xmlDoRead(ctxt, URL, encoding, options, 0));
13383}
13384
13385/**
13386 * xmlReadFile:
13387 * @filename: a file or URL
13388 * @encoding: the document encoding, or NULL
13389 * @options: a combination of xmlParserOption
13390 *
13391 * parse an XML file from the filesystem or the network.
13392 *
13393 * Returns the resulting document tree
13394 */
13395xmlDocPtr
13396xmlReadFile(const char *filename, const char *encoding, int options)
13397{
13398 xmlParserCtxtPtr ctxt;
13399
13400 ctxt = xmlCreateURLParserCtxt(filename, options);
13401 if (ctxt == NULL)
13402 return (NULL);
13403 return (xmlDoRead(ctxt, NULL, encoding, options, 0));
13404}
13405
13406/**
13407 * xmlReadMemory:
13408 * @buffer: a pointer to a char array
13409 * @size: the size of the array
13410 * @URL: the base URL to use for the document
13411 * @encoding: the document encoding, or NULL
13412 * @options: a combination of xmlParserOption
13413 *
13414 * parse an XML in-memory document and build a tree.
13415 *
13416 * Returns the resulting document tree
13417 */
13418xmlDocPtr
13419xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
13420{
13421 xmlParserCtxtPtr ctxt;
13422
13423 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
13424 if (ctxt == NULL)
13425 return (NULL);
13426 return (xmlDoRead(ctxt, URL, encoding, options, 0));
13427}
13428
13429/**
13430 * xmlReadFd:
13431 * @fd: an open file descriptor
13432 * @URL: the base URL to use for the document
13433 * @encoding: the document encoding, or NULL
13434 * @options: a combination of xmlParserOption
13435 *
13436 * parse an XML from a file descriptor and build a tree.
13437 * NOTE that the file descriptor will not be closed when the
13438 * reader is closed or reset.
13439 *
13440 * Returns the resulting document tree
13441 */
13442xmlDocPtr
13443xmlReadFd(int fd, const char *URL, const char *encoding, int options)
13444{
13445 xmlParserCtxtPtr ctxt;
13446 xmlParserInputBufferPtr input;
13447 xmlParserInputPtr stream;
13448
13449 if (fd < 0)
13450 return (NULL);
13451
13452 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
13453 if (input == NULL)
13454 return (NULL);
13455 input->closecallback = NULL;
13456 ctxt = xmlNewParserCtxt();
13457 if (ctxt == NULL) {
13458 xmlFreeParserInputBuffer(input);
13459 return (NULL);
13460 }
13461 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
13462 if (stream == NULL) {
13463 xmlFreeParserInputBuffer(input);
13464 xmlFreeParserCtxt(ctxt);
13465 return (NULL);
13466 }
13467 inputPush(ctxt, stream);
13468 return (xmlDoRead(ctxt, URL, encoding, options, 0));
13469}
13470
13471/**
13472 * xmlReadIO:
13473 * @ioread: an I/O read function
13474 * @ioclose: an I/O close function
13475 * @ioctx: an I/O handler
13476 * @URL: the base URL to use for the document
13477 * @encoding: the document encoding, or NULL
13478 * @options: a combination of xmlParserOption
13479 *
13480 * parse an XML document from I/O functions and source and build a tree.
13481 *
13482 * Returns the resulting document tree
13483 */
13484xmlDocPtr
13485xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
13486 void *ioctx, const char *URL, const char *encoding, int options)
13487{
13488 xmlParserCtxtPtr ctxt;
13489 xmlParserInputBufferPtr input;
13490 xmlParserInputPtr stream;
13491
13492 if (ioread == NULL)
13493 return (NULL);
13494
13495 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
13496 XML_CHAR_ENCODING_NONE);
13497 if (input == NULL)
13498 return (NULL);
13499 ctxt = xmlNewParserCtxt();
13500 if (ctxt == NULL) {
13501 xmlFreeParserInputBuffer(input);
13502 return (NULL);
13503 }
13504 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
13505 if (stream == NULL) {
13506 xmlFreeParserInputBuffer(input);
13507 xmlFreeParserCtxt(ctxt);
13508 return (NULL);
13509 }
13510 inputPush(ctxt, stream);
13511 return (xmlDoRead(ctxt, URL, encoding, options, 0));
13512}
13513
13514/**
13515 * xmlCtxtReadDoc:
13516 * @ctxt: an XML parser context
13517 * @cur: a pointer to a zero terminated string
13518 * @URL: the base URL to use for the document
13519 * @encoding: the document encoding, or NULL
13520 * @options: a combination of xmlParserOption
13521 *
13522 * parse an XML in-memory document and build a tree.
13523 * This reuses the existing @ctxt parser context
13524 *
13525 * Returns the resulting document tree
13526 */
13527xmlDocPtr
13528xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
13529 const char *URL, const char *encoding, int options)
13530{
13531 xmlParserInputPtr stream;
13532
13533 if (cur == NULL)
13534 return (NULL);
13535 if (ctxt == NULL)
13536 return (NULL);
13537
13538 xmlCtxtReset(ctxt);
13539
13540 stream = xmlNewStringInputStream(ctxt, cur);
13541 if (stream == NULL) {
13542 return (NULL);
13543 }
13544 inputPush(ctxt, stream);
13545 return (xmlDoRead(ctxt, URL, encoding, options, 1));
13546}
13547
13548/**
13549 * xmlCtxtReadFile:
13550 * @ctxt: an XML parser context
13551 * @filename: a file or URL
13552 * @encoding: the document encoding, or NULL
13553 * @options: a combination of xmlParserOption
13554 *
13555 * parse an XML file from the filesystem or the network.
13556 * This reuses the existing @ctxt parser context
13557 *
13558 * Returns the resulting document tree
13559 */
13560xmlDocPtr
13561xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
13562 const char *encoding, int options)
13563{
13564 xmlParserInputPtr stream;
13565
13566 if (filename == NULL)
13567 return (NULL);
13568 if (ctxt == NULL)
13569 return (NULL);
13570
13571 xmlCtxtReset(ctxt);
13572
13573 stream = xmlLoadExternalEntity(filename, NULL, ctxt);
13574 if (stream == NULL) {
13575 return (NULL);
13576 }
13577 inputPush(ctxt, stream);
13578 return (xmlDoRead(ctxt, NULL, encoding, options, 1));
13579}
13580
13581/**
13582 * xmlCtxtReadMemory:
13583 * @ctxt: an XML parser context
13584 * @buffer: a pointer to a char array
13585 * @size: the size of the array
13586 * @URL: the base URL to use for the document
13587 * @encoding: the document encoding, or NULL
13588 * @options: a combination of xmlParserOption
13589 *
13590 * parse an XML in-memory document and build a tree.
13591 * This reuses the existing @ctxt parser context
13592 *
13593 * Returns the resulting document tree
13594 */
13595xmlDocPtr
13596xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
13597 const char *URL, const char *encoding, int options)
13598{
13599 xmlParserInputBufferPtr input;
13600 xmlParserInputPtr stream;
13601
13602 if (ctxt == NULL)
13603 return (NULL);
13604 if (buffer == NULL)
13605 return (NULL);
13606
13607 xmlCtxtReset(ctxt);
13608
13609 input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
13610 if (input == NULL) {
13611 return(NULL);
13612 }
13613
13614 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
13615 if (stream == NULL) {
13616 xmlFreeParserInputBuffer(input);
13617 return(NULL);
13618 }
13619
13620 inputPush(ctxt, stream);
13621 return (xmlDoRead(ctxt, URL, encoding, options, 1));
13622}
13623
13624/**
13625 * xmlCtxtReadFd:
13626 * @ctxt: an XML parser context
13627 * @fd: an open file descriptor
13628 * @URL: the base URL to use for the document
13629 * @encoding: the document encoding, or NULL
13630 * @options: a combination of xmlParserOption
13631 *
13632 * parse an XML from a file descriptor and build a tree.
13633 * This reuses the existing @ctxt parser context
13634 * NOTE that the file descriptor will not be closed when the
13635 * reader is closed or reset.
13636 *
13637 * Returns the resulting document tree
13638 */
13639xmlDocPtr
13640xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
13641 const char *URL, const char *encoding, int options)
13642{
13643 xmlParserInputBufferPtr input;
13644 xmlParserInputPtr stream;
13645
13646 if (fd < 0)
13647 return (NULL);
13648 if (ctxt == NULL)
13649 return (NULL);
13650
13651 xmlCtxtReset(ctxt);
13652
13653
13654 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
13655 if (input == NULL)
13656 return (NULL);
13657 input->closecallback = NULL;
13658 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
13659 if (stream == NULL) {
13660 xmlFreeParserInputBuffer(input);
13661 return (NULL);
13662 }
13663 inputPush(ctxt, stream);
13664 return (xmlDoRead(ctxt, URL, encoding, options, 1));
13665}
13666
13667/**
13668 * xmlCtxtReadIO:
13669 * @ctxt: an XML parser context
13670 * @ioread: an I/O read function
13671 * @ioclose: an I/O close function
13672 * @ioctx: an I/O handler
13673 * @URL: the base URL to use for the document
13674 * @encoding: the document encoding, or NULL
13675 * @options: a combination of xmlParserOption
13676 *
13677 * parse an XML document from I/O functions and source and build a tree.
13678 * This reuses the existing @ctxt parser context
13679 *
13680 * Returns the resulting document tree
13681 */
13682xmlDocPtr
13683xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
13684 xmlInputCloseCallback ioclose, void *ioctx,
13685 const char *URL,
13686 const char *encoding, int options)
13687{
13688 xmlParserInputBufferPtr input;
13689 xmlParserInputPtr stream;
13690
13691 if (ioread == NULL)
13692 return (NULL);
13693 if (ctxt == NULL)
13694 return (NULL);
13695
13696 xmlCtxtReset(ctxt);
13697
13698 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
13699 XML_CHAR_ENCODING_NONE);
13700 if (input == NULL)
13701 return (NULL);
13702 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
13703 if (stream == NULL) {
13704 xmlFreeParserInputBuffer(input);
13705 return (NULL);
13706 }
13707 inputPush(ctxt, stream);
13708 return (xmlDoRead(ctxt, URL, encoding, options, 1));
13709}
13710
13711#define bottom_parser
13712#include "elfgcchack.h"
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette