VirtualBox

source: vbox/trunk/src/libs/libxml2-2.6.31/parser.c@ 47296

Last change on this file since 47296 was 45449, checked in by vboxsync, 12 years ago

libxml2: fixes from upstream

  • Property svn:eol-style set to native
File size: 374.5 KB
Line 
1/*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
15 * high level APIs to call the parser and a few miscellaneous functions.
16 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
25 * from the SAX callbacks or as standalone functions using a preparsed
26 * document.
27 *
28 * See Copyright for the status of this software.
29 *
30 * [email protected]
31 */
32
33#define IN_LIBXML
34#include "libxml.h"
35
36#if defined(WIN32) && !defined (__CYGWIN__)
37#define XML_DIR_SEP '\\'
38#else
39#define XML_DIR_SEP '/'
40#endif
41
42#include <stdlib.h>
43#include <limits.h>
44#include <string.h>
45#include <stdarg.h>
46#include <libxml/xmlmemory.h>
47#include <libxml/threads.h>
48#include <libxml/globals.h>
49#include <libxml/tree.h>
50#include <libxml/parser.h>
51#include <libxml/parserInternals.h>
52#include <libxml/valid.h>
53#include <libxml/entities.h>
54#include <libxml/xmlerror.h>
55#include <libxml/encoding.h>
56#include <libxml/xmlIO.h>
57#include <libxml/uri.h>
58#ifdef LIBXML_CATALOG_ENABLED
59#include <libxml/catalog.h>
60#endif
61#ifdef LIBXML_SCHEMAS_ENABLED
62#include <libxml/xmlschemastypes.h>
63#include <libxml/relaxng.h>
64#endif
65#ifdef HAVE_CTYPE_H
66#include <ctype.h>
67#endif
68#ifdef HAVE_STDLIB_H
69#include <stdlib.h>
70#endif
71#ifdef HAVE_SYS_STAT_H
72#include <sys/stat.h>
73#endif
74#ifdef HAVE_FCNTL_H
75#include <fcntl.h>
76#endif
77#ifdef HAVE_UNISTD_H
78#include <unistd.h>
79#endif
80#ifdef HAVE_ZLIB_H
81#include <zlib.h>
82#endif
83
84static void
85xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info);
86
87/************************************************************************
88 * *
89 * Arbitrary limits set in the parser. *
90 * *
91 ************************************************************************/
92
93#define XML_PARSER_BIG_ENTITY 1000
94#define XML_PARSER_LOT_ENTITY 5000
95
96/*
97 * XML_PARSER_NON_LINEAR is the threshold where the ratio of parsed entity
98 * replacement over the size in byte of the input indicates that you have
99 * and eponential behaviour. A value of 10 correspond to at least 3 entity
100 * replacement per byte of input.
101 */
102#define XML_PARSER_NON_LINEAR 10
103
104/*
105 * xmlParserEntityCheck
106 *
107 * Function to check non-linear entity expansion behaviour
108 * This is here to detect and stop exponential linear entity expansion
109 * This is not a limitation of the parser but a safety
110 * boundary feature.
111 */
112static int
113xmlParserEntityCheck(xmlParserCtxtPtr ctxt, size_t size,
114 xmlEntityPtr ent, size_t replacement)
115{
116 size_t consumed = 0;
117
118 if (ctxt == NULL)
119 return (0);
120 if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
121 return (1);
122 if (replacement != 0) {
123 if (replacement < XML_MAX_TEXT_LENGTH)
124 return(0);
125
126 /*
127 * If the volume of entity copy reaches 10 times the
128 * amount of parsed data and over the large text threshold
129 * then that's very likely to be an abuse.
130 */
131 if (ctxt->input != NULL) {
132 consumed = ctxt->input->consumed +
133 (ctxt->input->cur - ctxt->input->base);
134 }
135 consumed += ctxt->sizeentities;
136
137 if (replacement < XML_PARSER_NON_LINEAR * consumed)
138 return(0);
139 } else if (size != 0) {
140 /*
141 * Do the check based on the replacement size of the entity
142 */
143 if (size < XML_PARSER_BIG_ENTITY)
144 return(0);
145
146 /*
147 * A limit on the amount of text data reasonably used
148 */
149 if (ctxt->input != NULL) {
150 consumed = ctxt->input->consumed +
151 (ctxt->input->cur - ctxt->input->base);
152 }
153 consumed += ctxt->sizeentities;
154
155 if ((size < XML_PARSER_NON_LINEAR * consumed) &&
156 (ctxt->nbentities * 3 < XML_PARSER_NON_LINEAR * consumed))
157 return (0);
158 } else if (ent != NULL) {
159 /*
160 * use the number of parsed entities in the replacement
161 */
162 size = ent->owner;
163
164 /*
165 * The amount of data parsed counting entities size only once
166 */
167 if (ctxt->input != NULL) {
168 consumed = ctxt->input->consumed +
169 (ctxt->input->cur - ctxt->input->base);
170 }
171 consumed += ctxt->sizeentities;
172
173 /*
174 * Check the density of entities for the amount of data
175 * knowing an entity reference will take at least 3 bytes
176 */
177 if (size * 3 < consumed * XML_PARSER_NON_LINEAR)
178 return (0);
179 } else {
180 /*
181 * strange we got no data for checking just return
182 */
183 return (0);
184 }
185 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
186 return (1);
187}
188
189/**
190 * xmlParserMaxDepth:
191 *
192 * arbitrary depth limit for the XML documents that we allow to
193 * process. This is not a limitation of the parser but a safety
194 * boundary feature.
195 */
196unsigned int xmlParserMaxDepth = 1024;
197
198#define SAX2 1
199
200#define XML_PARSER_BIG_BUFFER_SIZE 300
201#define XML_PARSER_BUFFER_SIZE 100
202
203#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
204
205/*
206 * List of XML prefixed PI allowed by W3C specs
207 */
208
209static const char *xmlW3CPIs[] = {
210 "xml-stylesheet",
211 NULL
212};
213
214
215/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
216xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
217 const xmlChar **str);
218
219static xmlParserErrors
220xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
221 xmlSAXHandlerPtr sax,
222 void *user_data, int depth, const xmlChar *URL,
223 const xmlChar *ID, xmlNodePtr *list);
224
225#ifdef LIBXML_LEGACY_ENABLED
226static void
227xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
228 xmlNodePtr lastNode);
229#endif /* LIBXML_LEGACY_ENABLED */
230
231static xmlParserErrors
232xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
233 const xmlChar *string, void *user_data, xmlNodePtr *lst);
234
235/************************************************************************
236 * *
237 * Some factorized error routines *
238 * *
239 ************************************************************************/
240
241/**
242 * xmlErrAttributeDup:
243 * @ctxt: an XML parser context
244 * @prefix: the attribute prefix
245 * @localname: the attribute localname
246 *
247 * Handle a redefinition of attribute error
248 */
249static void
250xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
251 const xmlChar * localname)
252{
253 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
254 (ctxt->instate == XML_PARSER_EOF))
255 return;
256 if (ctxt != NULL)
257 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
258 if (prefix == NULL)
259 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
260 ctxt->errNo, XML_ERR_FATAL, NULL, 0,
261 (const char *) localname, NULL, NULL, 0, 0,
262 "Attribute %s redefined\n", localname);
263 else
264 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
265 ctxt->errNo, XML_ERR_FATAL, NULL, 0,
266 (const char *) prefix, (const char *) localname,
267 NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
268 localname);
269 if (ctxt != NULL) {
270 ctxt->wellFormed = 0;
271 if (ctxt->recovery == 0)
272 ctxt->disableSAX = 1;
273 }
274}
275
276/**
277 * xmlFatalErr:
278 * @ctxt: an XML parser context
279 * @error: the error number
280 * @extra: extra information string
281 *
282 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
283 */
284static void
285xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
286{
287 const char *errmsg;
288
289 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
290 (ctxt->instate == XML_PARSER_EOF))
291 return;
292 switch (error) {
293 case XML_ERR_INVALID_HEX_CHARREF:
294 errmsg = "CharRef: invalid hexadecimal value\n";
295 break;
296 case XML_ERR_INVALID_DEC_CHARREF:
297 errmsg = "CharRef: invalid decimal value\n";
298 break;
299 case XML_ERR_INVALID_CHARREF:
300 errmsg = "CharRef: invalid value\n";
301 break;
302 case XML_ERR_INTERNAL_ERROR:
303 errmsg = "internal error";
304 break;
305 case XML_ERR_PEREF_AT_EOF:
306 errmsg = "PEReference at end of document\n";
307 break;
308 case XML_ERR_PEREF_IN_PROLOG:
309 errmsg = "PEReference in prolog\n";
310 break;
311 case XML_ERR_PEREF_IN_EPILOG:
312 errmsg = "PEReference in epilog\n";
313 break;
314 case XML_ERR_PEREF_NO_NAME:
315 errmsg = "PEReference: no name\n";
316 break;
317 case XML_ERR_PEREF_SEMICOL_MISSING:
318 errmsg = "PEReference: expecting ';'\n";
319 break;
320 case XML_ERR_ENTITY_LOOP:
321 errmsg = "Detected an entity reference loop\n";
322 break;
323 case XML_ERR_ENTITY_NOT_STARTED:
324 errmsg = "EntityValue: \" or ' expected\n";
325 break;
326 case XML_ERR_ENTITY_PE_INTERNAL:
327 errmsg = "PEReferences forbidden in internal subset\n";
328 break;
329 case XML_ERR_ENTITY_NOT_FINISHED:
330 errmsg = "EntityValue: \" or ' expected\n";
331 break;
332 case XML_ERR_ATTRIBUTE_NOT_STARTED:
333 errmsg = "AttValue: \" or ' expected\n";
334 break;
335 case XML_ERR_LT_IN_ATTRIBUTE:
336 errmsg = "Unescaped '<' not allowed in attributes values\n";
337 break;
338 case XML_ERR_LITERAL_NOT_STARTED:
339 errmsg = "SystemLiteral \" or ' expected\n";
340 break;
341 case XML_ERR_LITERAL_NOT_FINISHED:
342 errmsg = "Unfinished System or Public ID \" or ' expected\n";
343 break;
344 case XML_ERR_MISPLACED_CDATA_END:
345 errmsg = "Sequence ']]>' not allowed in content\n";
346 break;
347 case XML_ERR_URI_REQUIRED:
348 errmsg = "SYSTEM or PUBLIC, the URI is missing\n";
349 break;
350 case XML_ERR_PUBID_REQUIRED:
351 errmsg = "PUBLIC, the Public Identifier is missing\n";
352 break;
353 case XML_ERR_HYPHEN_IN_COMMENT:
354 errmsg = "Comment must not contain '--' (double-hyphen)\n";
355 break;
356 case XML_ERR_PI_NOT_STARTED:
357 errmsg = "xmlParsePI : no target name\n";
358 break;
359 case XML_ERR_RESERVED_XML_NAME:
360 errmsg = "Invalid PI name\n";
361 break;
362 case XML_ERR_NOTATION_NOT_STARTED:
363 errmsg = "NOTATION: Name expected here\n";
364 break;
365 case XML_ERR_NOTATION_NOT_FINISHED:
366 errmsg = "'>' required to close NOTATION declaration\n";
367 break;
368 case XML_ERR_VALUE_REQUIRED:
369 errmsg = "Entity value required\n";
370 break;
371 case XML_ERR_URI_FRAGMENT:
372 errmsg = "Fragment not allowed";
373 break;
374 case XML_ERR_ATTLIST_NOT_STARTED:
375 errmsg = "'(' required to start ATTLIST enumeration\n";
376 break;
377 case XML_ERR_NMTOKEN_REQUIRED:
378 errmsg = "NmToken expected in ATTLIST enumeration\n";
379 break;
380 case XML_ERR_ATTLIST_NOT_FINISHED:
381 errmsg = "')' required to finish ATTLIST enumeration\n";
382 break;
383 case XML_ERR_MIXED_NOT_STARTED:
384 errmsg = "MixedContentDecl : '|' or ')*' expected\n";
385 break;
386 case XML_ERR_PCDATA_REQUIRED:
387 errmsg = "MixedContentDecl : '#PCDATA' expected\n";
388 break;
389 case XML_ERR_ELEMCONTENT_NOT_STARTED:
390 errmsg = "ContentDecl : Name or '(' expected\n";
391 break;
392 case XML_ERR_ELEMCONTENT_NOT_FINISHED:
393 errmsg = "ContentDecl : ',' '|' or ')' expected\n";
394 break;
395 case XML_ERR_PEREF_IN_INT_SUBSET:
396 errmsg =
397 "PEReference: forbidden within markup decl in internal subset\n";
398 break;
399 case XML_ERR_GT_REQUIRED:
400 errmsg = "expected '>'\n";
401 break;
402 case XML_ERR_CONDSEC_INVALID:
403 errmsg = "XML conditional section '[' expected\n";
404 break;
405 case XML_ERR_EXT_SUBSET_NOT_FINISHED:
406 errmsg = "Content error in the external subset\n";
407 break;
408 case XML_ERR_CONDSEC_INVALID_KEYWORD:
409 errmsg =
410 "conditional section INCLUDE or IGNORE keyword expected\n";
411 break;
412 case XML_ERR_CONDSEC_NOT_FINISHED:
413 errmsg = "XML conditional section not closed\n";
414 break;
415 case XML_ERR_XMLDECL_NOT_STARTED:
416 errmsg = "Text declaration '<?xml' required\n";
417 break;
418 case XML_ERR_XMLDECL_NOT_FINISHED:
419 errmsg = "parsing XML declaration: '?>' expected\n";
420 break;
421 case XML_ERR_EXT_ENTITY_STANDALONE:
422 errmsg = "external parsed entities cannot be standalone\n";
423 break;
424 case XML_ERR_ENTITYREF_SEMICOL_MISSING:
425 errmsg = "EntityRef: expecting ';'\n";
426 break;
427 case XML_ERR_DOCTYPE_NOT_FINISHED:
428 errmsg = "DOCTYPE improperly terminated\n";
429 break;
430 case XML_ERR_LTSLASH_REQUIRED:
431 errmsg = "EndTag: '</' not found\n";
432 break;
433 case XML_ERR_EQUAL_REQUIRED:
434 errmsg = "expected '='\n";
435 break;
436 case XML_ERR_STRING_NOT_CLOSED:
437 errmsg = "String not closed expecting \" or '\n";
438 break;
439 case XML_ERR_STRING_NOT_STARTED:
440 errmsg = "String not started expecting ' or \"\n";
441 break;
442 case XML_ERR_ENCODING_NAME:
443 errmsg = "Invalid XML encoding name\n";
444 break;
445 case XML_ERR_STANDALONE_VALUE:
446 errmsg = "standalone accepts only 'yes' or 'no'\n";
447 break;
448 case XML_ERR_DOCUMENT_EMPTY:
449 errmsg = "Document is empty\n";
450 break;
451 case XML_ERR_DOCUMENT_END:
452 errmsg = "Extra content at the end of the document\n";
453 break;
454 case XML_ERR_NOT_WELL_BALANCED:
455 errmsg = "chunk is not well balanced\n";
456 break;
457 case XML_ERR_EXTRA_CONTENT:
458 errmsg = "extra content at the end of well balanced chunk\n";
459 break;
460 case XML_ERR_VERSION_MISSING:
461 errmsg = "Malformed declaration expecting version\n";
462 break;
463#if 0
464 case:
465 errmsg = "\n";
466 break;
467#endif
468 default:
469 errmsg = "Unregistered error message\n";
470 }
471 if (ctxt != NULL)
472 ctxt->errNo = error;
473 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
474 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, errmsg,
475 info);
476 if (ctxt != NULL) {
477 ctxt->wellFormed = 0;
478 if (ctxt->recovery == 0)
479 ctxt->disableSAX = 1;
480 }
481}
482
483/**
484 * xmlFatalErrMsg:
485 * @ctxt: an XML parser context
486 * @error: the error number
487 * @msg: the error message
488 *
489 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
490 */
491static void
492xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
493 const char *msg)
494{
495 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
496 (ctxt->instate == XML_PARSER_EOF))
497 return;
498 if (ctxt != NULL)
499 ctxt->errNo = error;
500 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
501 XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, msg);
502 if (ctxt != NULL) {
503 ctxt->wellFormed = 0;
504 if (ctxt->recovery == 0)
505 ctxt->disableSAX = 1;
506 }
507}
508
509/**
510 * xmlWarningMsg:
511 * @ctxt: an XML parser context
512 * @error: the error number
513 * @msg: the error message
514 * @str1: extra data
515 * @str2: extra data
516 *
517 * Handle a warning.
518 */
519static void
520xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
521 const char *msg, const xmlChar *str1, const xmlChar *str2)
522{
523 xmlStructuredErrorFunc schannel = NULL;
524
525 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
526 (ctxt->instate == XML_PARSER_EOF))
527 return;
528 if ((ctxt != NULL) && (ctxt->sax != NULL) &&
529 (ctxt->sax->initialized == XML_SAX2_MAGIC))
530 schannel = ctxt->sax->serror;
531 __xmlRaiseError(schannel,
532 (ctxt->sax) ? ctxt->sax->warning : NULL,
533 ctxt->userData,
534 ctxt, NULL, XML_FROM_PARSER, error,
535 XML_ERR_WARNING, NULL, 0,
536 (const char *) str1, (const char *) str2, NULL, 0, 0,
537 msg, (const char *) str1, (const char *) str2);
538}
539
540/**
541 * xmlValidityError:
542 * @ctxt: an XML parser context
543 * @error: the error number
544 * @msg: the error message
545 * @str1: extra data
546 *
547 * Handle a validity error.
548 */
549static void
550xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
551 const char *msg, const xmlChar *str1)
552{
553 xmlStructuredErrorFunc schannel = NULL;
554
555 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
556 (ctxt->instate == XML_PARSER_EOF))
557 return;
558 if (ctxt != NULL) {
559 ctxt->errNo = error;
560 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
561 schannel = ctxt->sax->serror;
562 }
563 __xmlRaiseError(schannel,
564 ctxt->vctxt.error, ctxt->vctxt.userData,
565 ctxt, NULL, XML_FROM_DTD, error,
566 XML_ERR_ERROR, NULL, 0, (const char *) str1,
567 NULL, NULL, 0, 0,
568 msg, (const char *) str1);
569 if (ctxt != NULL) {
570 ctxt->valid = 0;
571 }
572}
573
574/**
575 * xmlFatalErrMsgInt:
576 * @ctxt: an XML parser context
577 * @error: the error number
578 * @msg: the error message
579 * @val: an integer value
580 *
581 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
582 */
583static void
584xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
585 const char *msg, int val)
586{
587 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
588 (ctxt->instate == XML_PARSER_EOF))
589 return;
590 if (ctxt != NULL)
591 ctxt->errNo = error;
592 __xmlRaiseError(NULL, NULL, NULL,
593 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
594 NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
595 if (ctxt != NULL) {
596 ctxt->wellFormed = 0;
597 if (ctxt->recovery == 0)
598 ctxt->disableSAX = 1;
599 }
600}
601
602/**
603 * xmlFatalErrMsgStrIntStr:
604 * @ctxt: an XML parser context
605 * @error: the error number
606 * @msg: the error message
607 * @str1: an string info
608 * @val: an integer value
609 * @str2: an string info
610 *
611 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
612 */
613static void
614xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
615 const char *msg, const xmlChar *str1, int val,
616 const xmlChar *str2)
617{
618 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
619 (ctxt->instate == XML_PARSER_EOF))
620 return;
621 if (ctxt != NULL)
622 ctxt->errNo = error;
623 __xmlRaiseError(NULL, NULL, NULL,
624 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
625 NULL, 0, (const char *) str1, (const char *) str2,
626 NULL, val, 0, msg, str1, val, str2);
627 if (ctxt != NULL) {
628 ctxt->wellFormed = 0;
629 if (ctxt->recovery == 0)
630 ctxt->disableSAX = 1;
631 }
632}
633
634/**
635 * xmlFatalErrMsgStr:
636 * @ctxt: an XML parser context
637 * @error: the error number
638 * @msg: the error message
639 * @val: a string value
640 *
641 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
642 */
643static void
644xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
645 const char *msg, const xmlChar * val)
646{
647 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
648 (ctxt->instate == XML_PARSER_EOF))
649 return;
650 if (ctxt != NULL)
651 ctxt->errNo = error;
652 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
653 XML_FROM_PARSER, error, XML_ERR_FATAL,
654 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
655 val);
656 if (ctxt != NULL) {
657 ctxt->wellFormed = 0;
658 if (ctxt->recovery == 0)
659 ctxt->disableSAX = 1;
660 }
661}
662
663/**
664 * xmlErrMsgStr:
665 * @ctxt: an XML parser context
666 * @error: the error number
667 * @msg: the error message
668 * @val: a string value
669 *
670 * Handle a non fatal parser error
671 */
672static void
673xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
674 const char *msg, const xmlChar * val)
675{
676 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
677 (ctxt->instate == XML_PARSER_EOF))
678 return;
679 if (ctxt != NULL)
680 ctxt->errNo = error;
681 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
682 XML_FROM_PARSER, error, XML_ERR_ERROR,
683 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
684 val);
685}
686
687/**
688 * xmlNsErr:
689 * @ctxt: an XML parser context
690 * @error: the error number
691 * @msg: the message
692 * @info1: extra information string
693 * @info2: extra information string
694 *
695 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
696 */
697static void
698xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
699 const char *msg,
700 const xmlChar * info1, const xmlChar * info2,
701 const xmlChar * info3)
702{
703 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
704 (ctxt->instate == XML_PARSER_EOF))
705 return;
706 if (ctxt != NULL)
707 ctxt->errNo = error;
708 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
709 XML_ERR_ERROR, NULL, 0, (const char *) info1,
710 (const char *) info2, (const char *) info3, 0, 0, msg,
711 info1, info2, info3);
712 if (ctxt != NULL)
713 ctxt->nsWellFormed = 0;
714}
715
716/************************************************************************
717 * *
718 * Library wide options *
719 * *
720 ************************************************************************/
721
722/**
723 * xmlHasFeature:
724 * @feature: the feature to be examined
725 *
726 * Examines if the library has been compiled with a given feature.
727 *
728 * Returns a non-zero value if the feature exist, otherwise zero.
729 * Returns zero (0) if the feature does not exist or an unknown
730 * unknown feature is requested, non-zero otherwise.
731 */
732int
733xmlHasFeature(xmlFeature feature)
734{
735 switch (feature) {
736 case XML_WITH_THREAD:
737#ifdef LIBXML_THREAD_ENABLED
738 return(1);
739#else
740 return(0);
741#endif
742 case XML_WITH_TREE:
743#ifdef LIBXML_TREE_ENABLED
744 return(1);
745#else
746 return(0);
747#endif
748 case XML_WITH_OUTPUT:
749#ifdef LIBXML_OUTPUT_ENABLED
750 return(1);
751#else
752 return(0);
753#endif
754 case XML_WITH_PUSH:
755#ifdef LIBXML_PUSH_ENABLED
756 return(1);
757#else
758 return(0);
759#endif
760 case XML_WITH_READER:
761#ifdef LIBXML_READER_ENABLED
762 return(1);
763#else
764 return(0);
765#endif
766 case XML_WITH_PATTERN:
767#ifdef LIBXML_PATTERN_ENABLED
768 return(1);
769#else
770 return(0);
771#endif
772 case XML_WITH_WRITER:
773#ifdef LIBXML_WRITER_ENABLED
774 return(1);
775#else
776 return(0);
777#endif
778 case XML_WITH_SAX1:
779#ifdef LIBXML_SAX1_ENABLED
780 return(1);
781#else
782 return(0);
783#endif
784 case XML_WITH_FTP:
785#ifdef LIBXML_FTP_ENABLED
786 return(1);
787#else
788 return(0);
789#endif
790 case XML_WITH_HTTP:
791#ifdef LIBXML_HTTP_ENABLED
792 return(1);
793#else
794 return(0);
795#endif
796 case XML_WITH_VALID:
797#ifdef LIBXML_VALID_ENABLED
798 return(1);
799#else
800 return(0);
801#endif
802 case XML_WITH_HTML:
803#ifdef LIBXML_HTML_ENABLED
804 return(1);
805#else
806 return(0);
807#endif
808 case XML_WITH_LEGACY:
809#ifdef LIBXML_LEGACY_ENABLED
810 return(1);
811#else
812 return(0);
813#endif
814 case XML_WITH_C14N:
815#ifdef LIBXML_C14N_ENABLED
816 return(1);
817#else
818 return(0);
819#endif
820 case XML_WITH_CATALOG:
821#ifdef LIBXML_CATALOG_ENABLED
822 return(1);
823#else
824 return(0);
825#endif
826 case XML_WITH_XPATH:
827#ifdef LIBXML_XPATH_ENABLED
828 return(1);
829#else
830 return(0);
831#endif
832 case XML_WITH_XPTR:
833#ifdef LIBXML_XPTR_ENABLED
834 return(1);
835#else
836 return(0);
837#endif
838 case XML_WITH_XINCLUDE:
839#ifdef LIBXML_XINCLUDE_ENABLED
840 return(1);
841#else
842 return(0);
843#endif
844 case XML_WITH_ICONV:
845#ifdef LIBXML_ICONV_ENABLED
846 return(1);
847#else
848 return(0);
849#endif
850 case XML_WITH_ISO8859X:
851#ifdef LIBXML_ISO8859X_ENABLED
852 return(1);
853#else
854 return(0);
855#endif
856 case XML_WITH_UNICODE:
857#ifdef LIBXML_UNICODE_ENABLED
858 return(1);
859#else
860 return(0);
861#endif
862 case XML_WITH_REGEXP:
863#ifdef LIBXML_REGEXP_ENABLED
864 return(1);
865#else
866 return(0);
867#endif
868 case XML_WITH_AUTOMATA:
869#ifdef LIBXML_AUTOMATA_ENABLED
870 return(1);
871#else
872 return(0);
873#endif
874 case XML_WITH_EXPR:
875#ifdef LIBXML_EXPR_ENABLED
876 return(1);
877#else
878 return(0);
879#endif
880 case XML_WITH_SCHEMAS:
881#ifdef LIBXML_SCHEMAS_ENABLED
882 return(1);
883#else
884 return(0);
885#endif
886 case XML_WITH_SCHEMATRON:
887#ifdef LIBXML_SCHEMATRON_ENABLED
888 return(1);
889#else
890 return(0);
891#endif
892 case XML_WITH_MODULES:
893#ifdef LIBXML_MODULES_ENABLED
894 return(1);
895#else
896 return(0);
897#endif
898 case XML_WITH_DEBUG:
899#ifdef LIBXML_DEBUG_ENABLED
900 return(1);
901#else
902 return(0);
903#endif
904 case XML_WITH_DEBUG_MEM:
905#ifdef DEBUG_MEMORY_LOCATION
906 return(1);
907#else
908 return(0);
909#endif
910 case XML_WITH_DEBUG_RUN:
911#ifdef LIBXML_DEBUG_RUNTIME
912 return(1);
913#else
914 return(0);
915#endif
916 case XML_WITH_ZLIB:
917#ifdef LIBXML_ZLIB_ENABLED
918 return(1);
919#else
920 return(0);
921#endif
922 default:
923 break;
924 }
925 return(0);
926}
927
928/************************************************************************
929 * *
930 * SAX2 defaulted attributes handling *
931 * *
932 ************************************************************************/
933
934/**
935 * xmlDetectSAX2:
936 * @ctxt: an XML parser context
937 *
938 * Do the SAX2 detection and specific intialization
939 */
940static void
941xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
942 if (ctxt == NULL) return;
943#ifdef LIBXML_SAX1_ENABLED
944 if ((ctxt->sax) && (ctxt->sax->initialized == XML_SAX2_MAGIC) &&
945 ((ctxt->sax->startElementNs != NULL) ||
946 (ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1;
947#else
948 ctxt->sax2 = 1;
949#endif /* LIBXML_SAX1_ENABLED */
950
951 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
952 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
953 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
954 if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
955 (ctxt->str_xml_ns == NULL)) {
956 xmlErrMemory(ctxt, NULL);
957 }
958}
959
960typedef struct _xmlDefAttrs xmlDefAttrs;
961typedef xmlDefAttrs *xmlDefAttrsPtr;
962struct _xmlDefAttrs {
963 int nbAttrs; /* number of defaulted attributes on that element */
964 int maxAttrs; /* the size of the array */
965 const xmlChar *values[4]; /* array of localname/prefix/values */
966};
967
968/**
969 * xmlAddDefAttrs:
970 * @ctxt: an XML parser context
971 * @fullname: the element fullname
972 * @fullattr: the attribute fullname
973 * @value: the attribute value
974 *
975 * Add a defaulted attribute for an element
976 */
977static void
978xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
979 const xmlChar *fullname,
980 const xmlChar *fullattr,
981 const xmlChar *value) {
982 xmlDefAttrsPtr defaults;
983 int len;
984 const xmlChar *name;
985 const xmlChar *prefix;
986
987 if (ctxt->attsDefault == NULL) {
988 ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
989 if (ctxt->attsDefault == NULL)
990 goto mem_error;
991 }
992
993 /*
994 * split the element name into prefix:localname , the string found
995 * are within the DTD and then not associated to namespace names.
996 */
997 name = xmlSplitQName3(fullname, &len);
998 if (name == NULL) {
999 name = xmlDictLookup(ctxt->dict, fullname, -1);
1000 prefix = NULL;
1001 } else {
1002 name = xmlDictLookup(ctxt->dict, name, -1);
1003 prefix = xmlDictLookup(ctxt->dict, fullname, len);
1004 }
1005
1006 /*
1007 * make sure there is some storage
1008 */
1009 defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
1010 if (defaults == NULL) {
1011 defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
1012 (4 * 4) * sizeof(const xmlChar *));
1013 if (defaults == NULL)
1014 goto mem_error;
1015 defaults->nbAttrs = 0;
1016 defaults->maxAttrs = 4;
1017 xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, defaults, NULL);
1018 } else if (defaults->nbAttrs >= defaults->maxAttrs) {
1019 xmlDefAttrsPtr temp;
1020
1021 temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
1022 (2 * defaults->maxAttrs * 4) * sizeof(const xmlChar *));
1023 if (temp == NULL)
1024 goto mem_error;
1025 defaults = temp;
1026 defaults->maxAttrs *= 2;
1027 xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, defaults, NULL);
1028 }
1029
1030 /*
1031 * Split the element name into prefix:localname , the string found
1032 * are within the DTD and hen not associated to namespace names.
1033 */
1034 name = xmlSplitQName3(fullattr, &len);
1035 if (name == NULL) {
1036 name = xmlDictLookup(ctxt->dict, fullattr, -1);
1037 prefix = NULL;
1038 } else {
1039 name = xmlDictLookup(ctxt->dict, name, -1);
1040 prefix = xmlDictLookup(ctxt->dict, fullattr, len);
1041 }
1042
1043 defaults->values[4 * defaults->nbAttrs] = name;
1044 defaults->values[4 * defaults->nbAttrs + 1] = prefix;
1045 /* intern the string and precompute the end */
1046 len = xmlStrlen(value);
1047 value = xmlDictLookup(ctxt->dict, value, len);
1048 defaults->values[4 * defaults->nbAttrs + 2] = value;
1049 defaults->values[4 * defaults->nbAttrs + 3] = value + len;
1050 defaults->nbAttrs++;
1051
1052 return;
1053
1054mem_error:
1055 xmlErrMemory(ctxt, NULL);
1056 return;
1057}
1058
1059/**
1060 * xmlAddSpecialAttr:
1061 * @ctxt: an XML parser context
1062 * @fullname: the element fullname
1063 * @fullattr: the attribute fullname
1064 * @type: the attribute type
1065 *
1066 * Register this attribute type
1067 */
1068static void
1069xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1070 const xmlChar *fullname,
1071 const xmlChar *fullattr,
1072 int type)
1073{
1074 if (ctxt->attsSpecial == NULL) {
1075 ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
1076 if (ctxt->attsSpecial == NULL)
1077 goto mem_error;
1078 }
1079
1080 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1081 return;
1082
1083 xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
1084 (void *) (long) type);
1085 return;
1086
1087mem_error:
1088 xmlErrMemory(ctxt, NULL);
1089 return;
1090}
1091
1092/**
1093 * xmlCleanSpecialAttrCallback:
1094 *
1095 * Removes CDATA attributes from the special attribute table
1096 */
1097static void
1098xmlCleanSpecialAttrCallback(void *payload, void *data,
1099 const xmlChar *fullname, const xmlChar *fullattr,
1100 const xmlChar *unused ATTRIBUTE_UNUSED) {
1101 xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1102
1103 if (((long) payload) == XML_ATTRIBUTE_CDATA) {
1104 xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1105 }
1106}
1107
1108/**
1109 * xmlCleanSpecialAttr:
1110 * @ctxt: an XML parser context
1111 *
1112 * Trim the list of attributes defined to remove all those of type
1113 * CDATA as they are not special. This call should be done when finishing
1114 * to parse the DTD and before starting to parse the document root.
1115 */
1116static void
1117xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1118{
1119 if (ctxt->attsSpecial == NULL)
1120 return;
1121
1122 xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1123
1124 if (xmlHashSize(ctxt->attsSpecial) == 0) {
1125 xmlHashFree(ctxt->attsSpecial, NULL);
1126 ctxt->attsSpecial = NULL;
1127 }
1128 return;
1129}
1130
1131/**
1132 * xmlCheckLanguageID:
1133 * @lang: pointer to the string value
1134 *
1135 * Checks that the value conforms to the LanguageID production:
1136 *
1137 * NOTE: this is somewhat deprecated, those productions were removed from
1138 * the XML Second edition.
1139 *
1140 * [33] LanguageID ::= Langcode ('-' Subcode)*
1141 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
1142 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1143 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1144 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1145 * [38] Subcode ::= ([a-z] | [A-Z])+
1146 *
1147 * Returns 1 if correct 0 otherwise
1148 **/
1149int
1150xmlCheckLanguageID(const xmlChar * lang)
1151{
1152 const xmlChar *cur = lang;
1153
1154 if (cur == NULL)
1155 return (0);
1156 if (((cur[0] == 'i') && (cur[1] == '-')) ||
1157 ((cur[0] == 'I') && (cur[1] == '-'))) {
1158 /*
1159 * IANA code
1160 */
1161 cur += 2;
1162 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
1163 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1164 cur++;
1165 } else if (((cur[0] == 'x') && (cur[1] == '-')) ||
1166 ((cur[0] == 'X') && (cur[1] == '-'))) {
1167 /*
1168 * User code
1169 */
1170 cur += 2;
1171 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
1172 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1173 cur++;
1174 } else if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1175 ((cur[0] >= 'a') && (cur[0] <= 'z'))) {
1176 /*
1177 * ISO639
1178 */
1179 cur++;
1180 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1181 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1182 cur++;
1183 else
1184 return (0);
1185 } else
1186 return (0);
1187 while (cur[0] != 0) { /* non input consuming */
1188 if (cur[0] != '-')
1189 return (0);
1190 cur++;
1191 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1192 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1193 cur++;
1194 else
1195 return (0);
1196 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
1197 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1198 cur++;
1199 }
1200 return (1);
1201}
1202
1203/************************************************************************
1204 * *
1205 * Parser stacks related functions and macros *
1206 * *
1207 ************************************************************************/
1208
1209xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
1210 const xmlChar ** str);
1211
1212#ifdef SAX2
1213/**
1214 * nsPush:
1215 * @ctxt: an XML parser context
1216 * @prefix: the namespace prefix or NULL
1217 * @URL: the namespace name
1218 *
1219 * Pushes a new parser namespace on top of the ns stack
1220 *
1221 * Returns -1 in case of error, -2 if the namespace should be discarded
1222 * and the index in the stack otherwise.
1223 */
1224static int
1225nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
1226{
1227 if (ctxt->options & XML_PARSE_NSCLEAN) {
1228 int i;
1229 for (i = 0;i < ctxt->nsNr;i += 2) {
1230 if (ctxt->nsTab[i] == prefix) {
1231 /* in scope */
1232 if (ctxt->nsTab[i + 1] == URL)
1233 return(-2);
1234 /* out of scope keep it */
1235 break;
1236 }
1237 }
1238 }
1239 if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
1240 ctxt->nsMax = 10;
1241 ctxt->nsNr = 0;
1242 ctxt->nsTab = (const xmlChar **)
1243 xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
1244 if (ctxt->nsTab == NULL) {
1245 xmlErrMemory(ctxt, NULL);
1246 ctxt->nsMax = 0;
1247 return (-1);
1248 }
1249 } else if (ctxt->nsNr >= ctxt->nsMax) {
1250 ctxt->nsMax *= 2;
1251 ctxt->nsTab = (const xmlChar **)
1252 xmlRealloc((char *) ctxt->nsTab,
1253 ctxt->nsMax * sizeof(ctxt->nsTab[0]));
1254 if (ctxt->nsTab == NULL) {
1255 xmlErrMemory(ctxt, NULL);
1256 ctxt->nsMax /= 2;
1257 return (-1);
1258 }
1259 }
1260 ctxt->nsTab[ctxt->nsNr++] = prefix;
1261 ctxt->nsTab[ctxt->nsNr++] = URL;
1262 return (ctxt->nsNr);
1263}
1264/**
1265 * nsPop:
1266 * @ctxt: an XML parser context
1267 * @nr: the number to pop
1268 *
1269 * Pops the top @nr parser prefix/namespace from the ns stack
1270 *
1271 * Returns the number of namespaces removed
1272 */
1273static int
1274nsPop(xmlParserCtxtPtr ctxt, int nr)
1275{
1276 int i;
1277
1278 if (ctxt->nsTab == NULL) return(0);
1279 if (ctxt->nsNr < nr) {
1280 xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
1281 nr = ctxt->nsNr;
1282 }
1283 if (ctxt->nsNr <= 0)
1284 return (0);
1285
1286 for (i = 0;i < nr;i++) {
1287 ctxt->nsNr--;
1288 ctxt->nsTab[ctxt->nsNr] = NULL;
1289 }
1290 return(nr);
1291}
1292#endif
1293
1294static int
1295xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1296 const xmlChar **atts;
1297 int *attallocs;
1298 int maxatts;
1299
1300 if (ctxt->atts == NULL) {
1301 maxatts = 55; /* allow for 10 attrs by default */
1302 atts = (const xmlChar **)
1303 xmlMalloc(maxatts * sizeof(xmlChar *));
1304 if (atts == NULL) goto mem_error;
1305 ctxt->atts = atts;
1306 attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
1307 if (attallocs == NULL) goto mem_error;
1308 ctxt->attallocs = attallocs;
1309 ctxt->maxatts = maxatts;
1310 } else if (nr + 5 > ctxt->maxatts) {
1311 maxatts = (nr + 5) * 2;
1312 atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
1313 maxatts * sizeof(const xmlChar *));
1314 if (atts == NULL) goto mem_error;
1315 ctxt->atts = atts;
1316 attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
1317 (maxatts / 5) * sizeof(int));
1318 if (attallocs == NULL) goto mem_error;
1319 ctxt->attallocs = attallocs;
1320 ctxt->maxatts = maxatts;
1321 }
1322 return(ctxt->maxatts);
1323mem_error:
1324 xmlErrMemory(ctxt, NULL);
1325 return(-1);
1326}
1327
1328/**
1329 * inputPush:
1330 * @ctxt: an XML parser context
1331 * @value: the parser input
1332 *
1333 * Pushes a new parser input on top of the input stack
1334 *
1335 * Returns 0 in case of error, the index in the stack otherwise
1336 */
1337int
1338inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1339{
1340 if ((ctxt == NULL) || (value == NULL))
1341 return(0);
1342 if (ctxt->inputNr >= ctxt->inputMax) {
1343 ctxt->inputMax *= 2;
1344 ctxt->inputTab =
1345 (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1346 ctxt->inputMax *
1347 sizeof(ctxt->inputTab[0]));
1348 if (ctxt->inputTab == NULL) {
1349 xmlErrMemory(ctxt, NULL);
1350 return (0);
1351 }
1352 }
1353 ctxt->inputTab[ctxt->inputNr] = value;
1354 ctxt->input = value;
1355 return (ctxt->inputNr++);
1356}
1357/**
1358 * inputPop:
1359 * @ctxt: an XML parser context
1360 *
1361 * Pops the top parser input from the input stack
1362 *
1363 * Returns the input just removed
1364 */
1365xmlParserInputPtr
1366inputPop(xmlParserCtxtPtr ctxt)
1367{
1368 xmlParserInputPtr ret;
1369
1370 if (ctxt == NULL)
1371 return(NULL);
1372 if (ctxt->inputNr <= 0)
1373 return (NULL);
1374 ctxt->inputNr--;
1375 if (ctxt->inputNr > 0)
1376 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1377 else
1378 ctxt->input = NULL;
1379 ret = ctxt->inputTab[ctxt->inputNr];
1380 ctxt->inputTab[ctxt->inputNr] = NULL;
1381 return (ret);
1382}
1383/**
1384 * nodePush:
1385 * @ctxt: an XML parser context
1386 * @value: the element node
1387 *
1388 * Pushes a new element node on top of the node stack
1389 *
1390 * Returns 0 in case of error, the index in the stack otherwise
1391 */
1392int
1393nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1394{
1395 if (ctxt == NULL) return(0);
1396 if (ctxt->nodeNr >= ctxt->nodeMax) {
1397 xmlNodePtr *tmp;
1398
1399 tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1400 ctxt->nodeMax * 2 *
1401 sizeof(ctxt->nodeTab[0]));
1402 if (tmp == NULL) {
1403 xmlErrMemory(ctxt, NULL);
1404 return (0);
1405 }
1406 ctxt->nodeTab = tmp;
1407 ctxt->nodeMax *= 2;
1408 }
1409 if (((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) {
1410 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
1411 "Excessive depth in document: change xmlParserMaxDepth = %d\n",
1412 xmlParserMaxDepth);
1413 ctxt->instate = XML_PARSER_EOF;
1414 return(0);
1415 }
1416 ctxt->nodeTab[ctxt->nodeNr] = value;
1417 ctxt->node = value;
1418 return (ctxt->nodeNr++);
1419}
1420/**
1421 * nodePop:
1422 * @ctxt: an XML parser context
1423 *
1424 * Pops the top element node from the node stack
1425 *
1426 * Returns the node just removed
1427 */
1428xmlNodePtr
1429nodePop(xmlParserCtxtPtr ctxt)
1430{
1431 xmlNodePtr ret;
1432
1433 if (ctxt == NULL) return(NULL);
1434 if (ctxt->nodeNr <= 0)
1435 return (NULL);
1436 ctxt->nodeNr--;
1437 if (ctxt->nodeNr > 0)
1438 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1439 else
1440 ctxt->node = NULL;
1441 ret = ctxt->nodeTab[ctxt->nodeNr];
1442 ctxt->nodeTab[ctxt->nodeNr] = NULL;
1443 return (ret);
1444}
1445
1446#ifdef LIBXML_PUSH_ENABLED
1447/**
1448 * nameNsPush:
1449 * @ctxt: an XML parser context
1450 * @value: the element name
1451 * @prefix: the element prefix
1452 * @URI: the element namespace name
1453 *
1454 * Pushes a new element name/prefix/URL on top of the name stack
1455 *
1456 * Returns -1 in case of error, the index in the stack otherwise
1457 */
1458static int
1459nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1460 const xmlChar *prefix, const xmlChar *URI, int nsNr)
1461{
1462 if (ctxt->nameNr >= ctxt->nameMax) {
1463 const xmlChar * *tmp;
1464 void **tmp2;
1465 ctxt->nameMax *= 2;
1466 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1467 ctxt->nameMax *
1468 sizeof(ctxt->nameTab[0]));
1469 if (tmp == NULL) {
1470 ctxt->nameMax /= 2;
1471 goto mem_error;
1472 }
1473 ctxt->nameTab = tmp;
1474 tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab,
1475 ctxt->nameMax * 3 *
1476 sizeof(ctxt->pushTab[0]));
1477 if (tmp2 == NULL) {
1478 ctxt->nameMax /= 2;
1479 goto mem_error;
1480 }
1481 ctxt->pushTab = tmp2;
1482 }
1483 ctxt->nameTab[ctxt->nameNr] = value;
1484 ctxt->name = value;
1485 ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix;
1486 ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI;
1487 ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (long) nsNr;
1488 return (ctxt->nameNr++);
1489mem_error:
1490 xmlErrMemory(ctxt, NULL);
1491 return (-1);
1492}
1493/**
1494 * nameNsPop:
1495 * @ctxt: an XML parser context
1496 *
1497 * Pops the top element/prefix/URI name from the name stack
1498 *
1499 * Returns the name just removed
1500 */
1501static const xmlChar *
1502nameNsPop(xmlParserCtxtPtr ctxt)
1503{
1504 const xmlChar *ret;
1505
1506 if (ctxt->nameNr <= 0)
1507 return (NULL);
1508 ctxt->nameNr--;
1509 if (ctxt->nameNr > 0)
1510 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1511 else
1512 ctxt->name = NULL;
1513 ret = ctxt->nameTab[ctxt->nameNr];
1514 ctxt->nameTab[ctxt->nameNr] = NULL;
1515 return (ret);
1516}
1517#endif /* LIBXML_PUSH_ENABLED */
1518
1519/**
1520 * namePush:
1521 * @ctxt: an XML parser context
1522 * @value: the element name
1523 *
1524 * Pushes a new element name on top of the name stack
1525 *
1526 * Returns -1 in case of error, the index in the stack otherwise
1527 */
1528int
1529namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
1530{
1531 if (ctxt == NULL) return (-1);
1532
1533 if (ctxt->nameNr >= ctxt->nameMax) {
1534 const xmlChar * *tmp;
1535 ctxt->nameMax *= 2;
1536 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1537 ctxt->nameMax *
1538 sizeof(ctxt->nameTab[0]));
1539 if (tmp == NULL) {
1540 ctxt->nameMax /= 2;
1541 goto mem_error;
1542 }
1543 ctxt->nameTab = tmp;
1544 }
1545 ctxt->nameTab[ctxt->nameNr] = value;
1546 ctxt->name = value;
1547 return (ctxt->nameNr++);
1548mem_error:
1549 xmlErrMemory(ctxt, NULL);
1550 return (-1);
1551}
1552/**
1553 * namePop:
1554 * @ctxt: an XML parser context
1555 *
1556 * Pops the top element name from the name stack
1557 *
1558 * Returns the name just removed
1559 */
1560const xmlChar *
1561namePop(xmlParserCtxtPtr ctxt)
1562{
1563 const xmlChar *ret;
1564
1565 if ((ctxt == NULL) || (ctxt->nameNr <= 0))
1566 return (NULL);
1567 ctxt->nameNr--;
1568 if (ctxt->nameNr > 0)
1569 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1570 else
1571 ctxt->name = NULL;
1572 ret = ctxt->nameTab[ctxt->nameNr];
1573 ctxt->nameTab[ctxt->nameNr] = NULL;
1574 return (ret);
1575}
1576
1577static int spacePush(xmlParserCtxtPtr ctxt, int val) {
1578 if (ctxt->spaceNr >= ctxt->spaceMax) {
1579 ctxt->spaceMax *= 2;
1580 ctxt->spaceTab = (int *) xmlRealloc(ctxt->spaceTab,
1581 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
1582 if (ctxt->spaceTab == NULL) {
1583 xmlErrMemory(ctxt, NULL);
1584 return(0);
1585 }
1586 }
1587 ctxt->spaceTab[ctxt->spaceNr] = val;
1588 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
1589 return(ctxt->spaceNr++);
1590}
1591
1592static int spacePop(xmlParserCtxtPtr ctxt) {
1593 int ret;
1594 if (ctxt->spaceNr <= 0) return(0);
1595 ctxt->spaceNr--;
1596 if (ctxt->spaceNr > 0)
1597 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
1598 else
1599 ctxt->space = &ctxt->spaceTab[0];
1600 ret = ctxt->spaceTab[ctxt->spaceNr];
1601 ctxt->spaceTab[ctxt->spaceNr] = -1;
1602 return(ret);
1603}
1604
1605/*
1606 * Macros for accessing the content. Those should be used only by the parser,
1607 * and not exported.
1608 *
1609 * Dirty macros, i.e. one often need to make assumption on the context to
1610 * use them
1611 *
1612 * CUR_PTR return the current pointer to the xmlChar to be parsed.
1613 * To be used with extreme caution since operations consuming
1614 * characters may move the input buffer to a different location !
1615 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
1616 * This should be used internally by the parser
1617 * only to compare to ASCII values otherwise it would break when
1618 * running with UTF-8 encoding.
1619 * RAW same as CUR but in the input buffer, bypass any token
1620 * extraction that may have been done
1621 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
1622 * to compare on ASCII based substring.
1623 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
1624 * strings without newlines within the parser.
1625 * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
1626 * defined char within the parser.
1627 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
1628 *
1629 * NEXT Skip to the next character, this does the proper decoding
1630 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
1631 * NEXTL(l) Skip the current unicode character of l xmlChars long.
1632 * CUR_CHAR(l) returns the current unicode character (int), set l
1633 * to the number of xmlChars used for the encoding [0-5].
1634 * CUR_SCHAR same but operate on a string instead of the context
1635 * COPY_BUF copy the current unicode char to the target buffer, increment
1636 * the index
1637 * GROW, SHRINK handling of input buffers
1638 */
1639
1640#define RAW (*ctxt->input->cur)
1641#define CUR (*ctxt->input->cur)
1642#define NXT(val) ctxt->input->cur[(val)]
1643#define CUR_PTR ctxt->input->cur
1644
1645#define CMP4( s, c1, c2, c3, c4 ) \
1646 ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
1647 ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
1648#define CMP5( s, c1, c2, c3, c4, c5 ) \
1649 ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
1650#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
1651 ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
1652#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
1653 ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
1654#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
1655 ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
1656#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
1657 ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
1658 ((unsigned char *) s)[ 8 ] == c9 )
1659#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
1660 ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
1661 ((unsigned char *) s)[ 9 ] == c10 )
1662
1663#define SKIP(val) do { \
1664 ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \
1665 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
1666 if ((*ctxt->input->cur == 0) && \
1667 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1668 xmlPopInput(ctxt); \
1669 } while (0)
1670
1671#define SKIPL(val) do { \
1672 int skipl; \
1673 for(skipl=0; skipl<val; skipl++) { \
1674 if (*(ctxt->input->cur) == '\n') { \
1675 ctxt->input->line++; ctxt->input->col = 1; \
1676 } else ctxt->input->col++; \
1677 ctxt->nbChars++; \
1678 ctxt->input->cur++; \
1679 } \
1680 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
1681 if ((*ctxt->input->cur == 0) && \
1682 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1683 xmlPopInput(ctxt); \
1684 } while (0)
1685
1686#define SHRINK if ((ctxt->progressive == 0) && \
1687 (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
1688 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
1689 xmlSHRINK (ctxt);
1690
1691static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
1692 xmlParserInputShrink(ctxt->input);
1693 if ((*ctxt->input->cur == 0) &&
1694 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1695 xmlPopInput(ctxt);
1696 }
1697
1698#define GROW if ((ctxt->progressive == 0) && \
1699 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
1700 xmlGROW (ctxt);
1701
1702static void xmlGROW (xmlParserCtxtPtr ctxt) {
1703 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1704 if ((*ctxt->input->cur == 0) &&
1705 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1706 xmlPopInput(ctxt);
1707}
1708
1709#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
1710
1711#define NEXT xmlNextChar(ctxt)
1712
1713#define NEXT1 { \
1714 ctxt->input->col++; \
1715 ctxt->input->cur++; \
1716 ctxt->nbChars++; \
1717 if (*ctxt->input->cur == 0) \
1718 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
1719 }
1720
1721#define NEXTL(l) do { \
1722 if (*(ctxt->input->cur) == '\n') { \
1723 ctxt->input->line++; ctxt->input->col = 1; \
1724 } else ctxt->input->col++; \
1725 ctxt->input->cur += l; \
1726 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
1727 } while (0)
1728
1729#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
1730#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
1731
1732#define COPY_BUF(l,b,i,v) \
1733 if (l == 1) b[i++] = (xmlChar) v; \
1734 else i += xmlCopyCharMultiByte(&b[i],v)
1735
1736/**
1737 * xmlSkipBlankChars:
1738 * @ctxt: the XML parser context
1739 *
1740 * skip all blanks character found at that point in the input streams.
1741 * It pops up finished entities in the process if allowable at that point.
1742 *
1743 * Returns the number of space chars skipped
1744 */
1745
1746int
1747xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
1748 int res = 0;
1749
1750 /*
1751 * It's Okay to use CUR/NEXT here since all the blanks are on
1752 * the ASCII range.
1753 */
1754 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
1755 const xmlChar *cur;
1756 /*
1757 * if we are in the document content, go really fast
1758 */
1759 cur = ctxt->input->cur;
1760 while (IS_BLANK_CH(*cur)) {
1761 if (*cur == '\n') {
1762 ctxt->input->line++; ctxt->input->col = 1;
1763 }
1764 cur++;
1765 res++;
1766 if (*cur == 0) {
1767 ctxt->input->cur = cur;
1768 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1769 cur = ctxt->input->cur;
1770 }
1771 }
1772 ctxt->input->cur = cur;
1773 } else {
1774 int cur;
1775 do {
1776 cur = CUR;
1777 while (IS_BLANK_CH(cur)) { /* CHECKED tstblanks.xml */
1778 NEXT;
1779 cur = CUR;
1780 res++;
1781 }
1782 while ((cur == 0) && (ctxt->inputNr > 1) &&
1783 (ctxt->instate != XML_PARSER_COMMENT)) {
1784 xmlPopInput(ctxt);
1785 cur = CUR;
1786 }
1787 /*
1788 * Need to handle support of entities branching here
1789 */
1790 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
1791 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
1792 }
1793 return(res);
1794}
1795
1796/************************************************************************
1797 * *
1798 * Commodity functions to handle entities *
1799 * *
1800 ************************************************************************/
1801
1802/**
1803 * xmlPopInput:
1804 * @ctxt: an XML parser context
1805 *
1806 * xmlPopInput: the current input pointed by ctxt->input came to an end
1807 * pop it and return the next char.
1808 *
1809 * Returns the current xmlChar in the parser context
1810 */
1811xmlChar
1812xmlPopInput(xmlParserCtxtPtr ctxt) {
1813 if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
1814 if (xmlParserDebugEntities)
1815 xmlGenericError(xmlGenericErrorContext,
1816 "Popping input %d\n", ctxt->inputNr);
1817 xmlFreeInputStream(inputPop(ctxt));
1818 if ((*ctxt->input->cur == 0) &&
1819 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1820 return(xmlPopInput(ctxt));
1821 return(CUR);
1822}
1823
1824/**
1825 * xmlPushInput:
1826 * @ctxt: an XML parser context
1827 * @input: an XML parser input fragment (entity, XML fragment ...).
1828 *
1829 * xmlPushInput: switch to a new input stream which is stacked on top
1830 * of the previous one(s).
1831 */
1832void
1833xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
1834 if (input == NULL) return;
1835
1836 if (xmlParserDebugEntities) {
1837 if ((ctxt->input != NULL) && (ctxt->input->filename))
1838 xmlGenericError(xmlGenericErrorContext,
1839 "%s(%d): ", ctxt->input->filename,
1840 ctxt->input->line);
1841 xmlGenericError(xmlGenericErrorContext,
1842 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
1843 }
1844 inputPush(ctxt, input);
1845 GROW;
1846}
1847
1848/**
1849 * xmlParseCharRef:
1850 * @ctxt: an XML parser context
1851 *
1852 * parse Reference declarations
1853 *
1854 * [66] CharRef ::= '&#' [0-9]+ ';' |
1855 * '&#x' [0-9a-fA-F]+ ';'
1856 *
1857 * [ WFC: Legal Character ]
1858 * Characters referred to using character references must match the
1859 * production for Char.
1860 *
1861 * Returns the value parsed (as an int), 0 in case of error
1862 */
1863int
1864xmlParseCharRef(xmlParserCtxtPtr ctxt) {
1865 unsigned int val = 0;
1866 int count = 0;
1867 unsigned int outofrange = 0;
1868
1869 /*
1870 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
1871 */
1872 if ((RAW == '&') && (NXT(1) == '#') &&
1873 (NXT(2) == 'x')) {
1874 SKIP(3);
1875 GROW;
1876 while (RAW != ';') { /* loop blocked by count */
1877 if (count++ > 20) {
1878 count = 0;
1879 GROW;
1880 }
1881 if ((RAW >= '0') && (RAW <= '9'))
1882 val = val * 16 + (CUR - '0');
1883 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
1884 val = val * 16 + (CUR - 'a') + 10;
1885 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
1886 val = val * 16 + (CUR - 'A') + 10;
1887 else {
1888 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
1889 val = 0;
1890 break;
1891 }
1892 if (val > 0x10FFFF)
1893 outofrange = val;
1894
1895 NEXT;
1896 count++;
1897 }
1898 if (RAW == ';') {
1899 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
1900 ctxt->input->col++;
1901 ctxt->nbChars ++;
1902 ctxt->input->cur++;
1903 }
1904 } else if ((RAW == '&') && (NXT(1) == '#')) {
1905 SKIP(2);
1906 GROW;
1907 while (RAW != ';') { /* loop blocked by count */
1908 if (count++ > 20) {
1909 count = 0;
1910 GROW;
1911 }
1912 if ((RAW >= '0') && (RAW <= '9'))
1913 val = val * 10 + (CUR - '0');
1914 else {
1915 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
1916 val = 0;
1917 break;
1918 }
1919 if (val > 0x10FFFF)
1920 outofrange = val;
1921
1922 NEXT;
1923 count++;
1924 }
1925 if (RAW == ';') {
1926 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
1927 ctxt->input->col++;
1928 ctxt->nbChars ++;
1929 ctxt->input->cur++;
1930 }
1931 } else {
1932 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
1933 }
1934
1935 /*
1936 * [ WFC: Legal Character ]
1937 * Characters referred to using character references must match the
1938 * production for Char.
1939 */
1940 if ((IS_CHAR(val) && (outofrange == 0))) {
1941 return(val);
1942 } else {
1943 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
1944 "xmlParseCharRef: invalid xmlChar value %d\n",
1945 val);
1946 }
1947 return(0);
1948}
1949
1950/**
1951 * xmlParseStringCharRef:
1952 * @ctxt: an XML parser context
1953 * @str: a pointer to an index in the string
1954 *
1955 * parse Reference declarations, variant parsing from a string rather
1956 * than an an input flow.
1957 *
1958 * [66] CharRef ::= '&#' [0-9]+ ';' |
1959 * '&#x' [0-9a-fA-F]+ ';'
1960 *
1961 * [ WFC: Legal Character ]
1962 * Characters referred to using character references must match the
1963 * production for Char.
1964 *
1965 * Returns the value parsed (as an int), 0 in case of error, str will be
1966 * updated to the current value of the index
1967 */
1968static int
1969xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
1970 const xmlChar *ptr;
1971 xmlChar cur;
1972 unsigned int val = 0;
1973 unsigned int outofrange = 0;
1974
1975 if ((str == NULL) || (*str == NULL)) return(0);
1976 ptr = *str;
1977 cur = *ptr;
1978 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
1979 ptr += 3;
1980 cur = *ptr;
1981 while (cur != ';') { /* Non input consuming loop */
1982 if ((cur >= '0') && (cur <= '9'))
1983 val = val * 16 + (cur - '0');
1984 else if ((cur >= 'a') && (cur <= 'f'))
1985 val = val * 16 + (cur - 'a') + 10;
1986 else if ((cur >= 'A') && (cur <= 'F'))
1987 val = val * 16 + (cur - 'A') + 10;
1988 else {
1989 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
1990 val = 0;
1991 break;
1992 }
1993 if (val > 0x10FFFF)
1994 outofrange = val;
1995
1996 ptr++;
1997 cur = *ptr;
1998 }
1999 if (cur == ';')
2000 ptr++;
2001 } else if ((cur == '&') && (ptr[1] == '#')){
2002 ptr += 2;
2003 cur = *ptr;
2004 while (cur != ';') { /* Non input consuming loops */
2005 if ((cur >= '0') && (cur <= '9'))
2006 val = val * 10 + (cur - '0');
2007 else {
2008 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2009 val = 0;
2010 break;
2011 }
2012 if (val > 0x10FFFF)
2013 outofrange = val;
2014
2015 ptr++;
2016 cur = *ptr;
2017 }
2018 if (cur == ';')
2019 ptr++;
2020 } else {
2021 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2022 return(0);
2023 }
2024 *str = ptr;
2025
2026 /*
2027 * [ WFC: Legal Character ]
2028 * Characters referred to using character references must match the
2029 * production for Char.
2030 */
2031 if ((IS_CHAR(val) && (outofrange == 0))) {
2032 return(val);
2033 } else {
2034 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2035 "xmlParseStringCharRef: invalid xmlChar value %d\n",
2036 val);
2037 }
2038 return(0);
2039}
2040
2041/**
2042 * xmlNewBlanksWrapperInputStream:
2043 * @ctxt: an XML parser context
2044 * @entity: an Entity pointer
2045 *
2046 * Create a new input stream for wrapping
2047 * blanks around a PEReference
2048 *
2049 * Returns the new input stream or NULL
2050 */
2051
2052static void deallocblankswrapper (xmlChar *str) {xmlFree(str);}
2053
2054static xmlParserInputPtr
2055xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
2056 xmlParserInputPtr input;
2057 xmlChar *buffer;
2058 size_t length;
2059 if (entity == NULL) {
2060 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
2061 "xmlNewBlanksWrapperInputStream entity\n");
2062 return(NULL);
2063 }
2064 if (xmlParserDebugEntities)
2065 xmlGenericError(xmlGenericErrorContext,
2066 "new blanks wrapper for entity: %s\n", entity->name);
2067 input = xmlNewInputStream(ctxt);
2068 if (input == NULL) {
2069 return(NULL);
2070 }
2071 length = xmlStrlen(entity->name) + 5;
2072 buffer = xmlMallocAtomic(length);
2073 if (buffer == NULL) {
2074 xmlErrMemory(ctxt, NULL);
2075 return(NULL);
2076 }
2077 buffer [0] = ' ';
2078 buffer [1] = '%';
2079 buffer [length-3] = ';';
2080 buffer [length-2] = ' ';
2081 buffer [length-1] = 0;
2082 memcpy(buffer + 2, entity->name, length - 5);
2083 input->free = deallocblankswrapper;
2084 input->base = buffer;
2085 input->cur = buffer;
2086 input->length = length;
2087 input->end = &buffer[length];
2088 return(input);
2089}
2090
2091/**
2092 * xmlParserHandlePEReference:
2093 * @ctxt: the parser context
2094 *
2095 * [69] PEReference ::= '%' Name ';'
2096 *
2097 * [ WFC: No Recursion ]
2098 * A parsed entity must not contain a recursive
2099 * reference to itself, either directly or indirectly.
2100 *
2101 * [ WFC: Entity Declared ]
2102 * In a document without any DTD, a document with only an internal DTD
2103 * subset which contains no parameter entity references, or a document
2104 * with "standalone='yes'", ... ... The declaration of a parameter
2105 * entity must precede any reference to it...
2106 *
2107 * [ VC: Entity Declared ]
2108 * In a document with an external subset or external parameter entities
2109 * with "standalone='no'", ... ... The declaration of a parameter entity
2110 * must precede any reference to it...
2111 *
2112 * [ WFC: In DTD ]
2113 * Parameter-entity references may only appear in the DTD.
2114 * NOTE: misleading but this is handled.
2115 *
2116 * A PEReference may have been detected in the current input stream
2117 * the handling is done accordingly to
2118 * http://www.w3.org/TR/REC-xml#entproc
2119 * i.e.
2120 * - Included in literal in entity values
2121 * - Included as Parameter Entity reference within DTDs
2122 */
2123void
2124xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
2125 const xmlChar *name;
2126 xmlEntityPtr entity = NULL;
2127 xmlParserInputPtr input;
2128
2129 if (RAW != '%') return;
2130 switch(ctxt->instate) {
2131 case XML_PARSER_CDATA_SECTION:
2132 return;
2133 case XML_PARSER_COMMENT:
2134 return;
2135 case XML_PARSER_START_TAG:
2136 return;
2137 case XML_PARSER_END_TAG:
2138 return;
2139 case XML_PARSER_EOF:
2140 xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
2141 return;
2142 case XML_PARSER_PROLOG:
2143 case XML_PARSER_START:
2144 case XML_PARSER_MISC:
2145 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
2146 return;
2147 case XML_PARSER_ENTITY_DECL:
2148 case XML_PARSER_CONTENT:
2149 case XML_PARSER_ATTRIBUTE_VALUE:
2150 case XML_PARSER_PI:
2151 case XML_PARSER_SYSTEM_LITERAL:
2152 case XML_PARSER_PUBLIC_LITERAL:
2153 /* we just ignore it there */
2154 return;
2155 case XML_PARSER_EPILOG:
2156 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
2157 return;
2158 case XML_PARSER_ENTITY_VALUE:
2159 /*
2160 * NOTE: in the case of entity values, we don't do the
2161 * substitution here since we need the literal
2162 * entity value to be able to save the internal
2163 * subset of the document.
2164 * This will be handled by xmlStringDecodeEntities
2165 */
2166 return;
2167 case XML_PARSER_DTD:
2168 /*
2169 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
2170 * In the internal DTD subset, parameter-entity references
2171 * can occur only where markup declarations can occur, not
2172 * within markup declarations.
2173 * In that case this is handled in xmlParseMarkupDecl
2174 */
2175 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
2176 return;
2177 if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
2178 return;
2179 break;
2180 case XML_PARSER_IGNORE:
2181 return;
2182 }
2183
2184 NEXT;
2185 name = xmlParseName(ctxt);
2186 if (xmlParserDebugEntities)
2187 xmlGenericError(xmlGenericErrorContext,
2188 "PEReference: %s\n", name);
2189 if (name == NULL) {
2190 xmlFatalErr(ctxt, XML_ERR_PEREF_NO_NAME, NULL);
2191 } else {
2192 if (RAW == ';') {
2193 NEXT;
2194 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
2195 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
2196 if (entity == NULL) {
2197
2198 /*
2199 * [ WFC: Entity Declared ]
2200 * In a document without any DTD, a document with only an
2201 * internal DTD subset which contains no parameter entity
2202 * references, or a document with "standalone='yes'", ...
2203 * ... The declaration of a parameter entity must precede
2204 * any reference to it...
2205 */
2206 if ((ctxt->standalone == 1) ||
2207 ((ctxt->hasExternalSubset == 0) &&
2208 (ctxt->hasPErefs == 0))) {
2209 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
2210 "PEReference: %%%s; not found\n", name);
2211 } else {
2212 /*
2213 * [ VC: Entity Declared ]
2214 * In a document with an external subset or external
2215 * parameter entities with "standalone='no'", ...
2216 * ... The declaration of a parameter entity must precede
2217 * any reference to it...
2218 */
2219 if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
2220 xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
2221 "PEReference: %%%s; not found\n",
2222 name);
2223 } else
2224 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
2225 "PEReference: %%%s; not found\n",
2226 name, NULL);
2227 ctxt->valid = 0;
2228 }
2229 } else if (ctxt->input->free != deallocblankswrapper) {
2230 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
2231 xmlPushInput(ctxt, input);
2232 } else {
2233 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
2234 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
2235 xmlChar start[4];
2236 xmlCharEncoding enc;
2237
2238 /*
2239 * handle the extra spaces added before and after
2240 * c.f. http://www.w3.org/TR/REC-xml#as-PE
2241 * this is done independently.
2242 */
2243 input = xmlNewEntityInputStream(ctxt, entity);
2244 xmlPushInput(ctxt, input);
2245
2246 /*
2247 * Get the 4 first bytes and decode the charset
2248 * if enc != XML_CHAR_ENCODING_NONE
2249 * plug some encoding conversion routines.
2250 * Note that, since we may have some non-UTF8
2251 * encoding (like UTF16, bug 135229), the 'length'
2252 * is not known, but we can calculate based upon
2253 * the amount of data in the buffer.
2254 */
2255 GROW
2256 if ((ctxt->input->end - ctxt->input->cur)>=4) {
2257 start[0] = RAW;
2258 start[1] = NXT(1);
2259 start[2] = NXT(2);
2260 start[3] = NXT(3);
2261 enc = xmlDetectCharEncoding(start, 4);
2262 if (enc != XML_CHAR_ENCODING_NONE) {
2263 xmlSwitchEncoding(ctxt, enc);
2264 }
2265 }
2266
2267 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
2268 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l' )) &&
2269 (IS_BLANK_CH(NXT(5)))) {
2270 xmlParseTextDecl(ctxt);
2271 }
2272 } else {
2273 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
2274 "PEReference: %s is not a parameter entity\n",
2275 name);
2276 }
2277 }
2278 } else {
2279 xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
2280 }
2281 }
2282}
2283
2284/*
2285 * Macro used to grow the current buffer.
2286 */
2287#define growBuffer(buffer, n) { \
2288 xmlChar *tmp; \
2289 size_t new_size = buffer##_size * 2 + n; \
2290 if (new_size < buffer##_size) goto mem_error; \
2291 tmp = (xmlChar *) xmlRealloc(buffer, new_size); \
2292 if (tmp == NULL) goto mem_error; \
2293 buffer = tmp; \
2294 buffer##_size = new_size; \
2295}
2296
2297/**
2298 * xmlStringLenDecodeEntities:
2299 * @ctxt: the parser context
2300 * @str: the input string
2301 * @len: the string length
2302 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2303 * @end: an end marker xmlChar, 0 if none
2304 * @end2: an end marker xmlChar, 0 if none
2305 * @end3: an end marker xmlChar, 0 if none
2306 *
2307 * Takes a entity string content and process to do the adequate substitutions.
2308 *
2309 * [67] Reference ::= EntityRef | CharRef
2310 *
2311 * [69] PEReference ::= '%' Name ';'
2312 *
2313 * Returns A newly allocated string with the substitution done. The caller
2314 * must deallocate it !
2315 */
2316xmlChar *
2317xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2318 int what, xmlChar end, xmlChar end2, xmlChar end3) {
2319 xmlChar *buffer = NULL;
2320 size_t buffer_size = 0;
2321 size_t nbchars = 0;
2322
2323 xmlChar *current = NULL;
2324 const xmlChar *last;
2325 xmlEntityPtr ent;
2326 int c,l;
2327
2328 if ((ctxt == NULL) || (str == NULL) || (len < 0))
2329 return(NULL);
2330 last = str + len;
2331
2332 if (ctxt->depth > 40) {
2333 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2334 return(NULL);
2335 }
2336
2337 /*
2338 * allocate a translation buffer.
2339 */
2340 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
2341 buffer = (xmlChar *) xmlMallocAtomic(buffer_size);
2342 if (buffer == NULL) goto mem_error;
2343
2344 /*
2345 * OK loop until we reach one of the ending char or a size limit.
2346 * we are operating on already parsed values.
2347 */
2348 if (str < last)
2349 c = CUR_SCHAR(str, l);
2350 else
2351 c = 0;
2352 while ((c != 0) && (c != end) && /* non input consuming loop */
2353 (c != end2) && (c != end3)) {
2354
2355 if (c == 0) break;
2356 if ((c == '&') && (str[1] == '#')) {
2357 int val = xmlParseStringCharRef(ctxt, &str);
2358 if (val != 0) {
2359 COPY_BUF(0,buffer,nbchars,val);
2360 }
2361 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2362 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2363 }
2364 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
2365 if (xmlParserDebugEntities)
2366 xmlGenericError(xmlGenericErrorContext,
2367 "String decoding Entity Reference: %.30s\n",
2368 str);
2369 ent = xmlParseStringEntityRef(ctxt, &str);
2370 if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
2371 goto int_error;
2372 if (ent != NULL)
2373 ctxt->nbentities += ent->owner;
2374 if ((ent != NULL) &&
2375 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2376 if (ent->content != NULL) {
2377 COPY_BUF(0,buffer,nbchars,ent->content[0]);
2378 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2379 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2380 }
2381 } else {
2382 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
2383 "predefined entity has no content\n");
2384 }
2385 } else if ((ent != NULL) && (ent->content != NULL)) {
2386 xmlChar *rep;
2387
2388 ctxt->depth++;
2389 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2390 0, 0, 0);
2391 ctxt->depth--;
2392 if (rep != NULL) {
2393 current = rep;
2394 while (*current != 0) { /* non input consuming loop */
2395 buffer[nbchars++] = *current++;
2396 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2397 if (xmlParserEntityCheck(ctxt, nbchars, ent, 0)) {
2398 xmlFree(rep);
2399 goto int_error;
2400 }
2401 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2402 }
2403 }
2404 xmlFree(rep);
2405 }
2406 } else if (ent != NULL) {
2407 int i = xmlStrlen(ent->name);
2408 const xmlChar *cur = ent->name;
2409
2410 buffer[nbchars++] = '&';
2411 if (nbchars + i + XML_PARSER_BUFFER_SIZE > buffer_size) {
2412 growBuffer(buffer, i + XML_PARSER_BUFFER_SIZE);
2413 }
2414 for (;i > 0;i--)
2415 buffer[nbchars++] = *cur++;
2416 buffer[nbchars++] = ';';
2417 }
2418 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2419 if (xmlParserDebugEntities)
2420 xmlGenericError(xmlGenericErrorContext,
2421 "String decoding PE Reference: %.30s\n", str);
2422 ent = xmlParseStringPEReference(ctxt, &str);
2423 if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
2424 goto int_error;
2425 if (ent != NULL)
2426 ctxt->nbentities += ent->owner;
2427 if (ent != NULL) {
2428 xmlChar *rep;
2429
2430 ctxt->depth++;
2431 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2432 0, 0, 0);
2433 ctxt->depth--;
2434 if (rep != NULL) {
2435 current = rep;
2436 while (*current != 0) { /* non input consuming loop */
2437 buffer[nbchars++] = *current++;
2438 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2439 if (xmlParserEntityCheck(ctxt, nbchars, ent, 0)) {
2440 xmlFree(rep);
2441 goto int_error;
2442 }
2443 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2444 }
2445 }
2446 xmlFree(rep);
2447 }
2448 }
2449 } else {
2450 COPY_BUF(l,buffer,nbchars,c);
2451 str += l;
2452 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2453 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2454 }
2455 }
2456 if (str < last)
2457 c = CUR_SCHAR(str, l);
2458 else
2459 c = 0;
2460 }
2461 buffer[nbchars++] = 0;
2462 return(buffer);
2463
2464mem_error:
2465 xmlErrMemory(ctxt, NULL);
2466int_error:
2467 if (buffer != NULL)
2468 xmlFree(buffer);
2469 return(NULL);
2470}
2471
2472/**
2473 * xmlStringDecodeEntities:
2474 * @ctxt: the parser context
2475 * @str: the input string
2476 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2477 * @end: an end marker xmlChar, 0 if none
2478 * @end2: an end marker xmlChar, 0 if none
2479 * @end3: an end marker xmlChar, 0 if none
2480 *
2481 * Takes a entity string content and process to do the adequate substitutions.
2482 *
2483 * [67] Reference ::= EntityRef | CharRef
2484 *
2485 * [69] PEReference ::= '%' Name ';'
2486 *
2487 * Returns A newly allocated string with the substitution done. The caller
2488 * must deallocate it !
2489 */
2490xmlChar *
2491xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2492 xmlChar end, xmlChar end2, xmlChar end3) {
2493 if ((ctxt == NULL) || (str == NULL)) return(NULL);
2494 return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what,
2495 end, end2, end3));
2496}
2497
2498/************************************************************************
2499 * *
2500 * Commodity functions, cleanup needed ? *
2501 * *
2502 ************************************************************************/
2503
2504/**
2505 * areBlanks:
2506 * @ctxt: an XML parser context
2507 * @str: a xmlChar *
2508 * @len: the size of @str
2509 * @blank_chars: we know the chars are blanks
2510 *
2511 * Is this a sequence of blank chars that one can ignore ?
2512 *
2513 * Returns 1 if ignorable 0 otherwise.
2514 */
2515
2516static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2517 int blank_chars) {
2518 int i, ret;
2519 xmlNodePtr lastChild;
2520
2521 /*
2522 * Don't spend time trying to differentiate them, the same callback is
2523 * used !
2524 */
2525 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
2526 return(0);
2527
2528 /*
2529 * Check for xml:space value.
2530 */
2531 if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2532 (*(ctxt->space) == -2))
2533 return(0);
2534
2535 /*
2536 * Check that the string is made of blanks
2537 */
2538 if (blank_chars == 0) {
2539 for (i = 0;i < len;i++)
2540 if (!(IS_BLANK_CH(str[i]))) return(0);
2541 }
2542
2543 /*
2544 * Look if the element is mixed content in the DTD if available
2545 */
2546 if (ctxt->node == NULL) return(0);
2547 if (ctxt->myDoc != NULL) {
2548 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2549 if (ret == 0) return(1);
2550 if (ret == 1) return(0);
2551 }
2552
2553 /*
2554 * Otherwise, heuristic :-\
2555 */
2556 if ((RAW != '<') && (RAW != 0xD)) return(0);
2557 if ((ctxt->node->children == NULL) &&
2558 (RAW == '<') && (NXT(1) == '/')) return(0);
2559
2560 lastChild = xmlGetLastChild(ctxt->node);
2561 if (lastChild == NULL) {
2562 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2563 (ctxt->node->content != NULL)) return(0);
2564 } else if (xmlNodeIsText(lastChild))
2565 return(0);
2566 else if ((ctxt->node->children != NULL) &&
2567 (xmlNodeIsText(ctxt->node->children)))
2568 return(0);
2569 return(1);
2570}
2571
2572/************************************************************************
2573 * *
2574 * Extra stuff for namespace support *
2575 * Relates to http://www.w3.org/TR/WD-xml-names *
2576 * *
2577 ************************************************************************/
2578
2579/**
2580 * xmlSplitQName:
2581 * @ctxt: an XML parser context
2582 * @name: an XML parser context
2583 * @prefix: a xmlChar **
2584 *
2585 * parse an UTF8 encoded XML qualified name string
2586 *
2587 * [NS 5] QName ::= (Prefix ':')? LocalPart
2588 *
2589 * [NS 6] Prefix ::= NCName
2590 *
2591 * [NS 7] LocalPart ::= NCName
2592 *
2593 * Returns the local part, and prefix is updated
2594 * to get the Prefix if any.
2595 */
2596
2597xmlChar *
2598xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2599 xmlChar buf[XML_MAX_NAMELEN + 5];
2600 xmlChar *buffer = NULL;
2601 int len = 0;
2602 int max = XML_MAX_NAMELEN;
2603 xmlChar *ret = NULL;
2604 const xmlChar *cur = name;
2605 int c;
2606
2607 if (prefix == NULL) return(NULL);
2608 *prefix = NULL;
2609
2610 if (cur == NULL) return(NULL);
2611
2612#ifndef XML_XML_NAMESPACE
2613 /* xml: prefix is not really a namespace */
2614 if ((cur[0] == 'x') && (cur[1] == 'm') &&
2615 (cur[2] == 'l') && (cur[3] == ':'))
2616 return(xmlStrdup(name));
2617#endif
2618
2619 /* nasty but well=formed */
2620 if (cur[0] == ':')
2621 return(xmlStrdup(name));
2622
2623 c = *cur++;
2624 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2625 buf[len++] = c;
2626 c = *cur++;
2627 }
2628 if (len >= max) {
2629 /*
2630 * Okay someone managed to make a huge name, so he's ready to pay
2631 * for the processing speed.
2632 */
2633 max = len * 2;
2634
2635 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
2636 if (buffer == NULL) {
2637 xmlErrMemory(ctxt, NULL);
2638 return(NULL);
2639 }
2640 memcpy(buffer, buf, len);
2641 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
2642 if (len + 10 > max) {
2643 xmlChar *tmp;
2644
2645 max *= 2;
2646 tmp = (xmlChar *) xmlRealloc(buffer,
2647 max * sizeof(xmlChar));
2648 if (tmp == NULL) {
2649 xmlFree(tmp);
2650 xmlErrMemory(ctxt, NULL);
2651 return(NULL);
2652 }
2653 buffer = tmp;
2654 }
2655 buffer[len++] = c;
2656 c = *cur++;
2657 }
2658 buffer[len] = 0;
2659 }
2660
2661 if ((c == ':') && (*cur == 0)) {
2662 if (buffer != NULL)
2663 xmlFree(buffer);
2664 *prefix = NULL;
2665 return(xmlStrdup(name));
2666 }
2667
2668 if (buffer == NULL)
2669 ret = xmlStrndup(buf, len);
2670 else {
2671 ret = buffer;
2672 buffer = NULL;
2673 max = XML_MAX_NAMELEN;
2674 }
2675
2676
2677 if (c == ':') {
2678 c = *cur;
2679 *prefix = ret;
2680 if (c == 0) {
2681 return(xmlStrndup(BAD_CAST "", 0));
2682 }
2683 len = 0;
2684
2685 /*
2686 * Check that the first character is proper to start
2687 * a new name
2688 */
2689 if (!(((c >= 0x61) && (c <= 0x7A)) ||
2690 ((c >= 0x41) && (c <= 0x5A)) ||
2691 (c == '_') || (c == ':'))) {
2692 int l;
2693 int first = CUR_SCHAR(cur, l);
2694
2695 if (!IS_LETTER(first) && (first != '_')) {
2696 xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
2697 "Name %s is not XML Namespace compliant\n",
2698 name);
2699 }
2700 }
2701 cur++;
2702
2703 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
2704 buf[len++] = c;
2705 c = *cur++;
2706 }
2707 if (len >= max) {
2708 /*
2709 * Okay someone managed to make a huge name, so he's ready to pay
2710 * for the processing speed.
2711 */
2712 max = len * 2;
2713
2714 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
2715 if (buffer == NULL) {
2716 xmlErrMemory(ctxt, NULL);
2717 return(NULL);
2718 }
2719 memcpy(buffer, buf, len);
2720 while (c != 0) { /* tested bigname2.xml */
2721 if (len + 10 > max) {
2722 xmlChar *tmp;
2723
2724 max *= 2;
2725 tmp = (xmlChar *) xmlRealloc(buffer,
2726 max * sizeof(xmlChar));
2727 if (tmp == NULL) {
2728 xmlErrMemory(ctxt, NULL);
2729 xmlFree(buffer);
2730 return(NULL);
2731 }
2732 buffer = tmp;
2733 }
2734 buffer[len++] = c;
2735 c = *cur++;
2736 }
2737 buffer[len] = 0;
2738 }
2739
2740 if (buffer == NULL)
2741 ret = xmlStrndup(buf, len);
2742 else {
2743 ret = buffer;
2744 }
2745 }
2746
2747 return(ret);
2748}
2749
2750/************************************************************************
2751 * *
2752 * The parser itself *
2753 * Relates to http://www.w3.org/TR/REC-xml *
2754 * *
2755 ************************************************************************/
2756
2757static const xmlChar * xmlParseNameComplex(xmlParserCtxtPtr ctxt);
2758static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
2759 int *len, int *alloc, int normalize);
2760
2761/**
2762 * xmlParseName:
2763 * @ctxt: an XML parser context
2764 *
2765 * parse an XML name.
2766 *
2767 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
2768 * CombiningChar | Extender
2769 *
2770 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
2771 *
2772 * [6] Names ::= Name (#x20 Name)*
2773 *
2774 * Returns the Name parsed or NULL
2775 */
2776
2777const xmlChar *
2778xmlParseName(xmlParserCtxtPtr ctxt) {
2779 const xmlChar *in;
2780 const xmlChar *ret;
2781 int count = 0;
2782
2783 GROW;
2784
2785 /*
2786 * Accelerator for simple ASCII names
2787 */
2788 in = ctxt->input->cur;
2789 if (((*in >= 0x61) && (*in <= 0x7A)) ||
2790 ((*in >= 0x41) && (*in <= 0x5A)) ||
2791 (*in == '_') || (*in == ':')) {
2792 in++;
2793 while (((*in >= 0x61) && (*in <= 0x7A)) ||
2794 ((*in >= 0x41) && (*in <= 0x5A)) ||
2795 ((*in >= 0x30) && (*in <= 0x39)) ||
2796 (*in == '_') || (*in == '-') ||
2797 (*in == ':') || (*in == '.'))
2798 in++;
2799 if ((*in > 0) && (*in < 0x80)) {
2800 count = in - ctxt->input->cur;
2801 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
2802 ctxt->input->cur = in;
2803 ctxt->nbChars += count;
2804 ctxt->input->col += count;
2805 if (ret == NULL)
2806 xmlErrMemory(ctxt, NULL);
2807 return(ret);
2808 }
2809 }
2810 return(xmlParseNameComplex(ctxt));
2811}
2812
2813/**
2814 * xmlParseNameAndCompare:
2815 * @ctxt: an XML parser context
2816 *
2817 * parse an XML name and compares for match
2818 * (specialized for endtag parsing)
2819 *
2820 * Returns NULL for an illegal name, (xmlChar*) 1 for success
2821 * and the name for mismatch
2822 */
2823
2824static const xmlChar *
2825xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
2826 register const xmlChar *cmp = other;
2827 register const xmlChar *in;
2828 const xmlChar *ret;
2829
2830 GROW;
2831
2832 in = ctxt->input->cur;
2833 while (*in != 0 && *in == *cmp) {
2834 ++in;
2835 ++cmp;
2836 ctxt->input->col++;
2837 }
2838 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
2839 /* success */
2840 ctxt->input->cur = in;
2841 return (const xmlChar*) 1;
2842 }
2843 /* failure (or end of input buffer), check with full function */
2844 ret = xmlParseName (ctxt);
2845 /* strings coming from the dictionnary direct compare possible */
2846 if (ret == other) {
2847 return (const xmlChar*) 1;
2848 }
2849 return ret;
2850}
2851
2852static const xmlChar *
2853xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
2854 int len = 0, l;
2855 int c;
2856 int count = 0;
2857
2858 /*
2859 * Handler for more complex cases
2860 */
2861 GROW;
2862 c = CUR_CHAR(l);
2863 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
2864 (!IS_LETTER(c) && (c != '_') &&
2865 (c != ':'))) {
2866 return(NULL);
2867 }
2868
2869 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
2870 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
2871 (c == '.') || (c == '-') ||
2872 (c == '_') || (c == ':') ||
2873 (IS_COMBINING(c)) ||
2874 (IS_EXTENDER(c)))) {
2875 if (count++ > 100) {
2876 count = 0;
2877 GROW;
2878 }
2879 len += l;
2880 NEXTL(l);
2881 c = CUR_CHAR(l);
2882 }
2883 if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
2884 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
2885 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
2886}
2887
2888/**
2889 * xmlParseStringName:
2890 * @ctxt: an XML parser context
2891 * @str: a pointer to the string pointer (IN/OUT)
2892 *
2893 * parse an XML name.
2894 *
2895 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
2896 * CombiningChar | Extender
2897 *
2898 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
2899 *
2900 * [6] Names ::= Name (#x20 Name)*
2901 *
2902 * Returns the Name parsed or NULL. The @str pointer
2903 * is updated to the current location in the string.
2904 */
2905
2906static xmlChar *
2907xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
2908 xmlChar buf[XML_MAX_NAMELEN + 5];
2909 const xmlChar *cur = *str;
2910 int len = 0, l;
2911 int c;
2912
2913 c = CUR_SCHAR(cur, l);
2914 if (!IS_LETTER(c) && (c != '_') &&
2915 (c != ':')) {
2916 return(NULL);
2917 }
2918
2919 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
2920 (c == '.') || (c == '-') ||
2921 (c == '_') || (c == ':') ||
2922 (IS_COMBINING(c)) ||
2923 (IS_EXTENDER(c))) {
2924 COPY_BUF(l,buf,len,c);
2925 cur += l;
2926 c = CUR_SCHAR(cur, l);
2927 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
2928 /*
2929 * Okay someone managed to make a huge name, so he's ready to pay
2930 * for the processing speed.
2931 */
2932 xmlChar *buffer;
2933 int max = len * 2;
2934
2935 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
2936 if (buffer == NULL) {
2937 xmlErrMemory(ctxt, NULL);
2938 return(NULL);
2939 }
2940 memcpy(buffer, buf, len);
2941 while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
2942 /* test bigentname.xml */
2943 (c == '.') || (c == '-') ||
2944 (c == '_') || (c == ':') ||
2945 (IS_COMBINING(c)) ||
2946 (IS_EXTENDER(c))) {
2947 if (len + 10 > max) {
2948 xmlChar *tmp;
2949 max *= 2;
2950 tmp = (xmlChar *) xmlRealloc(buffer,
2951 max * sizeof(xmlChar));
2952 if (tmp == NULL) {
2953 xmlErrMemory(ctxt, NULL);
2954 xmlFree(buffer);
2955 return(NULL);
2956 }
2957 buffer = tmp;
2958 }
2959 COPY_BUF(l,buffer,len,c);
2960 cur += l;
2961 c = CUR_SCHAR(cur, l);
2962 }
2963 buffer[len] = 0;
2964 *str = cur;
2965 return(buffer);
2966 }
2967 }
2968 *str = cur;
2969 return(xmlStrndup(buf, len));
2970}
2971
2972/**
2973 * xmlParseNmtoken:
2974 * @ctxt: an XML parser context
2975 *
2976 * parse an XML Nmtoken.
2977 *
2978 * [7] Nmtoken ::= (NameChar)+
2979 *
2980 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
2981 *
2982 * Returns the Nmtoken parsed or NULL
2983 */
2984
2985xmlChar *
2986xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
2987 xmlChar buf[XML_MAX_NAMELEN + 5];
2988 int len = 0, l;
2989 int c;
2990 int count = 0;
2991
2992 GROW;
2993 c = CUR_CHAR(l);
2994
2995 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
2996 (c == '.') || (c == '-') ||
2997 (c == '_') || (c == ':') ||
2998 (IS_COMBINING(c)) ||
2999 (IS_EXTENDER(c))) {
3000 if (count++ > 100) {
3001 count = 0;
3002 GROW;
3003 }
3004 COPY_BUF(l,buf,len,c);
3005 NEXTL(l);
3006 c = CUR_CHAR(l);
3007 if (len >= XML_MAX_NAMELEN) {
3008 /*
3009 * Okay someone managed to make a huge token, so he's ready to pay
3010 * for the processing speed.
3011 */
3012 xmlChar *buffer;
3013 int max = len * 2;
3014
3015 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3016 if (buffer == NULL) {
3017 xmlErrMemory(ctxt, NULL);
3018 return(NULL);
3019 }
3020 memcpy(buffer, buf, len);
3021 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
3022 (c == '.') || (c == '-') ||
3023 (c == '_') || (c == ':') ||
3024 (IS_COMBINING(c)) ||
3025 (IS_EXTENDER(c))) {
3026 if (count++ > 100) {
3027 count = 0;
3028 GROW;
3029 }
3030 if (len + 10 > max) {
3031 xmlChar *tmp;
3032
3033 max *= 2;
3034 tmp = (xmlChar *) xmlRealloc(buffer,
3035 max * sizeof(xmlChar));
3036 if (tmp == NULL) {
3037 xmlErrMemory(ctxt, NULL);
3038 xmlFree(buffer);
3039 return(NULL);
3040 }
3041 buffer = tmp;
3042 }
3043 COPY_BUF(l,buffer,len,c);
3044 NEXTL(l);
3045 c = CUR_CHAR(l);
3046 }
3047 buffer[len] = 0;
3048 return(buffer);
3049 }
3050 }
3051 if (len == 0)
3052 return(NULL);
3053 return(xmlStrndup(buf, len));
3054}
3055
3056/**
3057 * xmlParseEntityValue:
3058 * @ctxt: an XML parser context
3059 * @orig: if non-NULL store a copy of the original entity value
3060 *
3061 * parse a value for ENTITY declarations
3062 *
3063 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3064 * "'" ([^%&'] | PEReference | Reference)* "'"
3065 *
3066 * Returns the EntityValue parsed with reference substituted or NULL
3067 */
3068
3069xmlChar *
3070xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
3071 xmlChar *buf = NULL;
3072 int len = 0;
3073 int size = XML_PARSER_BUFFER_SIZE;
3074 int c, l;
3075 xmlChar stop;
3076 xmlChar *ret = NULL;
3077 const xmlChar *cur = NULL;
3078 xmlParserInputPtr input;
3079
3080 if (RAW == '"') stop = '"';
3081 else if (RAW == '\'') stop = '\'';
3082 else {
3083 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
3084 return(NULL);
3085 }
3086 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
3087 if (buf == NULL) {
3088 xmlErrMemory(ctxt, NULL);
3089 return(NULL);
3090 }
3091
3092 /*
3093 * The content of the entity definition is copied in a buffer.
3094 */
3095
3096 ctxt->instate = XML_PARSER_ENTITY_VALUE;
3097 input = ctxt->input;
3098 GROW;
3099 NEXT;
3100 c = CUR_CHAR(l);
3101 /*
3102 * NOTE: 4.4.5 Included in Literal
3103 * When a parameter entity reference appears in a literal entity
3104 * value, ... a single or double quote character in the replacement
3105 * text is always treated as a normal data character and will not
3106 * terminate the literal.
3107 * In practice it means we stop the loop only when back at parsing
3108 * the initial entity and the quote is found
3109 */
3110 while ((IS_CHAR(c)) && ((c != stop) || /* checked */
3111 (ctxt->input != input))) {
3112 if (len + 5 >= size) {
3113 xmlChar *tmp;
3114
3115 size *= 2;
3116 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3117 if (tmp == NULL) {
3118 xmlErrMemory(ctxt, NULL);
3119 xmlFree(buf);
3120 return(NULL);
3121 }
3122 buf = tmp;
3123 }
3124 COPY_BUF(l,buf,len,c);
3125 NEXTL(l);
3126 /*
3127 * Pop-up of finished entities.
3128 */
3129 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
3130 xmlPopInput(ctxt);
3131
3132 GROW;
3133 c = CUR_CHAR(l);
3134 if (c == 0) {
3135 GROW;
3136 c = CUR_CHAR(l);
3137 }
3138 }
3139 buf[len] = 0;
3140
3141 /*
3142 * Raise problem w.r.t. '&' and '%' being used in non-entities
3143 * reference constructs. Note Charref will be handled in
3144 * xmlStringDecodeEntities()
3145 */
3146 cur = buf;
3147 while (*cur != 0) { /* non input consuming */
3148 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
3149 xmlChar *name;
3150 xmlChar tmp = *cur;
3151
3152 cur++;
3153 name = xmlParseStringName(ctxt, &cur);
3154 if ((name == NULL) || (*cur != ';')) {
3155 xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
3156 "EntityValue: '%c' forbidden except for entities references\n",
3157 tmp);
3158 }
3159 if ((tmp == '%') && (ctxt->inSubset == 1) &&
3160 (ctxt->inputNr == 1)) {
3161 xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
3162 }
3163 if (name != NULL)
3164 xmlFree(name);
3165 if (*cur == 0)
3166 break;
3167 }
3168 cur++;
3169 }
3170
3171 /*
3172 * Then PEReference entities are substituted.
3173 */
3174 if (c != stop) {
3175 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
3176 xmlFree(buf);
3177 } else {
3178 NEXT;
3179 /*
3180 * NOTE: 4.4.7 Bypassed
3181 * When a general entity reference appears in the EntityValue in
3182 * an entity declaration, it is bypassed and left as is.
3183 * so XML_SUBSTITUTE_REF is not set here.
3184 */
3185 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
3186 0, 0, 0);
3187 if (orig != NULL)
3188 *orig = buf;
3189 else
3190 xmlFree(buf);
3191 }
3192
3193 return(ret);
3194}
3195
3196/**
3197 * xmlParseAttValueComplex:
3198 * @ctxt: an XML parser context
3199 * @len: the resulting attribute len
3200 * @normalize: wether to apply the inner normalization
3201 *
3202 * parse a value for an attribute, this is the fallback function
3203 * of xmlParseAttValue() when the attribute parsing requires handling
3204 * of non-ASCII characters, or normalization compaction.
3205 *
3206 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3207 */
3208static xmlChar *
3209xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
3210 xmlChar limit = 0;
3211 xmlChar *buf = NULL;
3212 size_t len = 0;
3213 size_t buf_size = 0;
3214 int c, l, in_space = 0;
3215 xmlChar *current = NULL;
3216 xmlEntityPtr ent;
3217
3218 if (NXT(0) == '"') {
3219 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3220 limit = '"';
3221 NEXT;
3222 } else if (NXT(0) == '\'') {
3223 limit = '\'';
3224 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3225 NEXT;
3226 } else {
3227 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
3228 return(NULL);
3229 }
3230
3231 /*
3232 * allocate a translation buffer.
3233 */
3234 buf_size = XML_PARSER_BUFFER_SIZE;
3235 buf = (xmlChar *) xmlMallocAtomic(buf_size);
3236 if (buf == NULL) goto mem_error;
3237
3238 /*
3239 * OK loop until we reach one of the ending char or a size limit.
3240 */
3241 c = CUR_CHAR(l);
3242 while ((NXT(0) != limit) && /* checked */
3243 (IS_CHAR(c)) && (c != '<')) {
3244 if (c == 0) break;
3245 if (c == '&') {
3246 in_space = 0;
3247 if (NXT(1) == '#') {
3248 int val = xmlParseCharRef(ctxt);
3249
3250 if (val == '&') {
3251 if (ctxt->replaceEntities) {
3252 if (len + 10 > buf_size) {
3253 growBuffer(buf, 10);
3254 }
3255 buf[len++] = '&';
3256 } else {
3257 /*
3258 * The reparsing will be done in xmlStringGetNodeList()
3259 * called by the attribute() function in SAX.c
3260 */
3261 if (len + 10 > buf_size) {
3262 growBuffer(buf, 10);
3263 }
3264 buf[len++] = '&';
3265 buf[len++] = '#';
3266 buf[len++] = '3';
3267 buf[len++] = '8';
3268 buf[len++] = ';';
3269 }
3270 } else {
3271 if (len + 10 > buf_size) {
3272 growBuffer(buf, 10);
3273 }
3274 len += xmlCopyChar(0, &buf[len], val);
3275 }
3276 } else {
3277 ent = xmlParseEntityRef(ctxt);
3278 ctxt->nbentities++;
3279 if (ent != NULL)
3280 ctxt->nbentities += ent->owner;
3281 if ((ent != NULL) &&
3282 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
3283 if (len + 10 > buf_size) {
3284 growBuffer(buf, 10);
3285 }
3286 if ((ctxt->replaceEntities == 0) &&
3287 (ent->content[0] == '&')) {
3288 buf[len++] = '&';
3289 buf[len++] = '#';
3290 buf[len++] = '3';
3291 buf[len++] = '8';
3292 buf[len++] = ';';
3293 } else {
3294 buf[len++] = ent->content[0];
3295 }
3296 } else if ((ent != NULL) &&
3297 (ctxt->replaceEntities != 0)) {
3298 xmlChar *rep;
3299
3300 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
3301 rep = xmlStringDecodeEntities(ctxt, ent->content,
3302 XML_SUBSTITUTE_REF,
3303 0, 0, 0);
3304 if (rep != NULL) {
3305 current = rep;
3306 while (*current != 0) { /* non input consuming */
3307 buf[len++] = *current++;
3308 if (len + 10 > buf_size) {
3309 growBuffer(buf, 10);
3310 }
3311 }
3312 xmlFree(rep);
3313 }
3314 } else {
3315 if (len + 10 > buf_size) {
3316 growBuffer(buf, 10);
3317 }
3318 if (ent->content != NULL)
3319 buf[len++] = ent->content[0];
3320 }
3321 } else if (ent != NULL) {
3322 int i = xmlStrlen(ent->name);
3323 const xmlChar *cur = ent->name;
3324
3325 /*
3326 * This may look absurd but is needed to detect
3327 * entities problems
3328 */
3329 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
3330 (ent->content != NULL)) {
3331 xmlChar *rep;
3332 rep = xmlStringDecodeEntities(ctxt, ent->content,
3333 XML_SUBSTITUTE_REF, 0, 0, 0);
3334 if (rep != NULL)
3335 xmlFree(rep);
3336 }
3337
3338 /*
3339 * Just output the reference
3340 */
3341 buf[len++] = '&';
3342 while (len + i + 10 > buf_size) {
3343 growBuffer(buf, i + 10);
3344 }
3345 for (;i > 0;i--)
3346 buf[len++] = *cur++;
3347 buf[len++] = ';';
3348 }
3349 }
3350 } else {
3351 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
3352 if ((len != 0) || (!normalize)) {
3353 if ((!normalize) || (!in_space)) {
3354 COPY_BUF(l,buf,len,0x20);
3355 if (len + 10 > buf_size) {
3356 growBuffer(buf, 10);
3357 }
3358 }
3359 in_space = 1;
3360 }
3361 } else {
3362 in_space = 0;
3363 COPY_BUF(l,buf,len,c);
3364 if (len + 10 > buf_size) {
3365 growBuffer(buf, 10);
3366 }
3367 }
3368 NEXTL(l);
3369 }
3370 GROW;
3371 c = CUR_CHAR(l);
3372 }
3373 if ((in_space) && (normalize)) {
3374 while ((len > 0) && (buf[len - 1] == 0x20)) len--;
3375 }
3376 buf[len] = 0;
3377 if (RAW == '<') {
3378 xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
3379 } else if (RAW != limit) {
3380 if ((c != 0) && (!IS_CHAR(c))) {
3381 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
3382 "invalid character in attribute value\n");
3383 } else {
3384 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
3385 "AttValue: ' expected\n");
3386 }
3387 } else
3388 NEXT;
3389
3390 /*
3391 * There we potentially risk an overflow, don't allow attribute value of
3392 * lenght more than INT_MAX it is a very reasonnable assumption !
3393 */
3394 if (len >= INT_MAX) {
3395 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
3396 "AttValue lenght too long\n");
3397 goto mem_error;
3398 }
3399
3400 if (attlen != NULL) *attlen = (int) len;
3401 return(buf);
3402
3403mem_error:
3404 xmlErrMemory(ctxt, NULL);
3405 return(NULL);
3406}
3407
3408/**
3409 * xmlParseAttValue:
3410 * @ctxt: an XML parser context
3411 *
3412 * parse a value for an attribute
3413 * Note: the parser won't do substitution of entities here, this
3414 * will be handled later in xmlStringGetNodeList
3415 *
3416 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
3417 * "'" ([^<&'] | Reference)* "'"
3418 *
3419 * 3.3.3 Attribute-Value Normalization:
3420 * Before the value of an attribute is passed to the application or
3421 * checked for validity, the XML processor must normalize it as follows:
3422 * - a character reference is processed by appending the referenced
3423 * character to the attribute value
3424 * - an entity reference is processed by recursively processing the
3425 * replacement text of the entity
3426 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
3427 * appending #x20 to the normalized value, except that only a single
3428 * #x20 is appended for a "#xD#xA" sequence that is part of an external
3429 * parsed entity or the literal entity value of an internal parsed entity
3430 * - other characters are processed by appending them to the normalized value
3431 * If the declared value is not CDATA, then the XML processor must further
3432 * process the normalized attribute value by discarding any leading and
3433 * trailing space (#x20) characters, and by replacing sequences of space
3434 * (#x20) characters by a single space (#x20) character.
3435 * All attributes for which no declaration has been read should be treated
3436 * by a non-validating parser as if declared CDATA.
3437 *
3438 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3439 */
3440
3441
3442xmlChar *
3443xmlParseAttValue(xmlParserCtxtPtr ctxt) {
3444 if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
3445 return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
3446}
3447
3448/**
3449 * xmlParseSystemLiteral:
3450 * @ctxt: an XML parser context
3451 *
3452 * parse an XML Literal
3453 *
3454 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
3455 *
3456 * Returns the SystemLiteral parsed or NULL
3457 */
3458
3459xmlChar *
3460xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
3461 xmlChar *buf = NULL;
3462 int len = 0;
3463 int size = XML_PARSER_BUFFER_SIZE;
3464 int cur, l;
3465 xmlChar stop;
3466 int state = ctxt->instate;
3467 int count = 0;
3468
3469 SHRINK;
3470 if (RAW == '"') {
3471 NEXT;
3472 stop = '"';
3473 } else if (RAW == '\'') {
3474 NEXT;
3475 stop = '\'';
3476 } else {
3477 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
3478 return(NULL);
3479 }
3480
3481 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
3482 if (buf == NULL) {
3483 xmlErrMemory(ctxt, NULL);
3484 return(NULL);
3485 }
3486 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
3487 cur = CUR_CHAR(l);
3488 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
3489 if (len + 5 >= size) {
3490 xmlChar *tmp;
3491
3492 size *= 2;
3493 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3494 if (tmp == NULL) {
3495 xmlFree(buf);
3496 xmlErrMemory(ctxt, NULL);
3497 ctxt->instate = (xmlParserInputState) state;
3498 return(NULL);
3499 }
3500 buf = tmp;
3501 }
3502 count++;
3503 if (count > 50) {
3504 GROW;
3505 count = 0;
3506 }
3507 COPY_BUF(l,buf,len,cur);
3508 NEXTL(l);
3509 cur = CUR_CHAR(l);
3510 if (cur == 0) {
3511 GROW;
3512 SHRINK;
3513 cur = CUR_CHAR(l);
3514 }
3515 }
3516 buf[len] = 0;
3517 ctxt->instate = (xmlParserInputState) state;
3518 if (!IS_CHAR(cur)) {
3519 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
3520 } else {
3521 NEXT;
3522 }
3523 return(buf);
3524}
3525
3526/**
3527 * xmlParsePubidLiteral:
3528 * @ctxt: an XML parser context
3529 *
3530 * parse an XML public literal
3531 *
3532 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
3533 *
3534 * Returns the PubidLiteral parsed or NULL.
3535 */
3536
3537xmlChar *
3538xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
3539 xmlChar *buf = NULL;
3540 int len = 0;
3541 int size = XML_PARSER_BUFFER_SIZE;
3542 xmlChar cur;
3543 xmlChar stop;
3544 int count = 0;
3545 xmlParserInputState oldstate = ctxt->instate;
3546
3547 SHRINK;
3548 if (RAW == '"') {
3549 NEXT;
3550 stop = '"';
3551 } else if (RAW == '\'') {
3552 NEXT;
3553 stop = '\'';
3554 } else {
3555 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
3556 return(NULL);
3557 }
3558 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
3559 if (buf == NULL) {
3560 xmlErrMemory(ctxt, NULL);
3561 return(NULL);
3562 }
3563 ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
3564 cur = CUR;
3565 while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
3566 if (len + 1 >= size) {
3567 xmlChar *tmp;
3568
3569 size *= 2;
3570 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3571 if (tmp == NULL) {
3572 xmlErrMemory(ctxt, NULL);
3573 xmlFree(buf);
3574 return(NULL);
3575 }
3576 buf = tmp;
3577 }
3578 buf[len++] = cur;
3579 count++;
3580 if (count > 50) {
3581 GROW;
3582 count = 0;
3583 }
3584 NEXT;
3585 cur = CUR;
3586 if (cur == 0) {
3587 GROW;
3588 SHRINK;
3589 cur = CUR;
3590 }
3591 }
3592 buf[len] = 0;
3593 if (cur != stop) {
3594 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
3595 } else {
3596 NEXT;
3597 }
3598 ctxt->instate = oldstate;
3599 return(buf);
3600}
3601
3602void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
3603
3604/*
3605 * used for the test in the inner loop of the char data testing
3606 */
3607static const unsigned char test_char_data[256] = {
3608 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3609 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
3610 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3611 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3612 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
3613 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
3614 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
3615 0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
3616 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
3617 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
3618 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
3619 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
3620 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
3621 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
3622 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
3623 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
3624 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
3625 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3626 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3627 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3628 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3629 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3630 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3631 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3632 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3633 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3634 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3635 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3636 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3637 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3638 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3639 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
3640};
3641
3642/**
3643 * xmlParseCharData:
3644 * @ctxt: an XML parser context
3645 * @cdata: int indicating whether we are within a CDATA section
3646 *
3647 * parse a CharData section.
3648 * if we are within a CDATA section ']]>' marks an end of section.
3649 *
3650 * The right angle bracket (>) may be represented using the string "&gt;",
3651 * and must, for compatibility, be escaped using "&gt;" or a character
3652 * reference when it appears in the string "]]>" in content, when that
3653 * string is not marking the end of a CDATA section.
3654 *
3655 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
3656 */
3657
3658void
3659xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
3660 const xmlChar *in;
3661 int nbchar = 0;
3662 int line = ctxt->input->line;
3663 int col = ctxt->input->col;
3664 int ccol;
3665
3666 SHRINK;
3667 GROW;
3668 /*
3669 * Accelerated common case where input don't need to be
3670 * modified before passing it to the handler.
3671 */
3672 if (!cdata) {
3673 in = ctxt->input->cur;
3674 do {
3675get_more_space:
3676 while (*in == 0x20) { in++; ctxt->input->col++; }
3677 if (*in == 0xA) {
3678 do {
3679 ctxt->input->line++; ctxt->input->col = 1;
3680 in++;
3681 } while (*in == 0xA);
3682 goto get_more_space;
3683 }
3684 if (*in == '<') {
3685 nbchar = in - ctxt->input->cur;
3686 if (nbchar > 0) {
3687 const xmlChar *tmp = ctxt->input->cur;
3688 ctxt->input->cur = in;
3689
3690 if ((ctxt->sax != NULL) &&
3691 (ctxt->sax->ignorableWhitespace !=
3692 ctxt->sax->characters)) {
3693 if (areBlanks(ctxt, tmp, nbchar, 1)) {
3694 if (ctxt->sax->ignorableWhitespace != NULL)
3695 ctxt->sax->ignorableWhitespace(ctxt->userData,
3696 tmp, nbchar);
3697 } else {
3698 if (ctxt->sax->characters != NULL)
3699 ctxt->sax->characters(ctxt->userData,
3700 tmp, nbchar);
3701 if (*ctxt->space == -1)
3702 *ctxt->space = -2;
3703 }
3704 } else if ((ctxt->sax != NULL) &&
3705 (ctxt->sax->characters != NULL)) {
3706 ctxt->sax->characters(ctxt->userData,
3707 tmp, nbchar);
3708 }
3709 }
3710 return;
3711 }
3712
3713get_more:
3714 ccol = ctxt->input->col;
3715 while (test_char_data[*in]) {
3716 in++;
3717 ccol++;
3718 }
3719 ctxt->input->col = ccol;
3720 if (*in == 0xA) {
3721 do {
3722 ctxt->input->line++; ctxt->input->col = 1;
3723 in++;
3724 } while (*in == 0xA);
3725 goto get_more;
3726 }
3727 if (*in == ']') {
3728 if ((in[1] == ']') && (in[2] == '>')) {
3729 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
3730 ctxt->input->cur = in;
3731 return;
3732 }
3733 in++;
3734 ctxt->input->col++;
3735 goto get_more;
3736 }
3737 nbchar = in - ctxt->input->cur;
3738 if (nbchar > 0) {
3739 if ((ctxt->sax != NULL) &&
3740 (ctxt->sax->ignorableWhitespace !=
3741 ctxt->sax->characters) &&
3742 (IS_BLANK_CH(*ctxt->input->cur))) {
3743 const xmlChar *tmp = ctxt->input->cur;
3744 ctxt->input->cur = in;
3745
3746 if (areBlanks(ctxt, tmp, nbchar, 0)) {
3747 if (ctxt->sax->ignorableWhitespace != NULL)
3748 ctxt->sax->ignorableWhitespace(ctxt->userData,
3749 tmp, nbchar);
3750 } else {
3751 if (ctxt->sax->characters != NULL)
3752 ctxt->sax->characters(ctxt->userData,
3753 tmp, nbchar);
3754 if (*ctxt->space == -1)
3755 *ctxt->space = -2;
3756 }
3757 line = ctxt->input->line;
3758 col = ctxt->input->col;
3759 } else if (ctxt->sax != NULL) {
3760 if (ctxt->sax->characters != NULL)
3761 ctxt->sax->characters(ctxt->userData,
3762 ctxt->input->cur, nbchar);
3763 line = ctxt->input->line;
3764 col = ctxt->input->col;
3765 }
3766 }
3767 ctxt->input->cur = in;
3768 if (*in == 0xD) {
3769 in++;
3770 if (*in == 0xA) {
3771 ctxt->input->cur = in;
3772 in++;
3773 ctxt->input->line++; ctxt->input->col = 1;
3774 continue; /* while */
3775 }
3776 in--;
3777 }
3778 if (*in == '<') {
3779 return;
3780 }
3781 if (*in == '&') {
3782 return;
3783 }
3784 SHRINK;
3785 GROW;
3786 in = ctxt->input->cur;
3787 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
3788 nbchar = 0;
3789 }
3790 ctxt->input->line = line;
3791 ctxt->input->col = col;
3792 xmlParseCharDataComplex(ctxt, cdata);
3793}
3794
3795/**
3796 * xmlParseCharDataComplex:
3797 * @ctxt: an XML parser context
3798 * @cdata: int indicating whether we are within a CDATA section
3799 *
3800 * parse a CharData section.this is the fallback function
3801 * of xmlParseCharData() when the parsing requires handling
3802 * of non-ASCII characters.
3803 */
3804void
3805xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
3806 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
3807 int nbchar = 0;
3808 int cur, l;
3809 int count = 0;
3810
3811 SHRINK;
3812 GROW;
3813 cur = CUR_CHAR(l);
3814 while ((cur != '<') && /* checked */
3815 (cur != '&') &&
3816 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
3817 if ((cur == ']') && (NXT(1) == ']') &&
3818 (NXT(2) == '>')) {
3819 if (cdata) break;
3820 else {
3821 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
3822 }
3823 }
3824 COPY_BUF(l,buf,nbchar,cur);
3825 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
3826 buf[nbchar] = 0;
3827
3828 /*
3829 * OK the segment is to be consumed as chars.
3830 */
3831 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
3832 if (areBlanks(ctxt, buf, nbchar, 0)) {
3833 if (ctxt->sax->ignorableWhitespace != NULL)
3834 ctxt->sax->ignorableWhitespace(ctxt->userData,
3835 buf, nbchar);
3836 } else {
3837 if (ctxt->sax->characters != NULL)
3838 ctxt->sax->characters(ctxt->userData, buf, nbchar);
3839 if ((ctxt->sax->characters !=
3840 ctxt->sax->ignorableWhitespace) &&
3841 (*ctxt->space == -1))
3842 *ctxt->space = -2;
3843 }
3844 }
3845 nbchar = 0;
3846 }
3847 count++;
3848 if (count > 50) {
3849 GROW;
3850 count = 0;
3851 }
3852 NEXTL(l);
3853 cur = CUR_CHAR(l);
3854 }
3855 if (nbchar != 0) {
3856 buf[nbchar] = 0;
3857 /*
3858 * OK the segment is to be consumed as chars.
3859 */
3860 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
3861 if (areBlanks(ctxt, buf, nbchar, 0)) {
3862 if (ctxt->sax->ignorableWhitespace != NULL)
3863 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
3864 } else {
3865 if (ctxt->sax->characters != NULL)
3866 ctxt->sax->characters(ctxt->userData, buf, nbchar);
3867 if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
3868 (*ctxt->space == -1))
3869 *ctxt->space = -2;
3870 }
3871 }
3872 }
3873 if ((cur != 0) && (!IS_CHAR(cur))) {
3874 /* Generate the error and skip the offending character */
3875 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
3876 "PCDATA invalid Char value %d\n",
3877 cur);
3878 NEXTL(l);
3879 }
3880}
3881
3882/**
3883 * xmlParseExternalID:
3884 * @ctxt: an XML parser context
3885 * @publicID: a xmlChar** receiving PubidLiteral
3886 * @strict: indicate whether we should restrict parsing to only
3887 * production [75], see NOTE below
3888 *
3889 * Parse an External ID or a Public ID
3890 *
3891 * NOTE: Productions [75] and [83] interact badly since [75] can generate
3892 * 'PUBLIC' S PubidLiteral S SystemLiteral
3893 *
3894 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
3895 * | 'PUBLIC' S PubidLiteral S SystemLiteral
3896 *
3897 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
3898 *
3899 * Returns the function returns SystemLiteral and in the second
3900 * case publicID receives PubidLiteral, is strict is off
3901 * it is possible to return NULL and have publicID set.
3902 */
3903
3904xmlChar *
3905xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
3906 xmlChar *URI = NULL;
3907
3908 SHRINK;
3909
3910 *publicID = NULL;
3911 if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
3912 SKIP(6);
3913 if (!IS_BLANK_CH(CUR)) {
3914 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3915 "Space required after 'SYSTEM'\n");
3916 }
3917 SKIP_BLANKS;
3918 URI = xmlParseSystemLiteral(ctxt);
3919 if (URI == NULL) {
3920 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
3921 }
3922 } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
3923 SKIP(6);
3924 if (!IS_BLANK_CH(CUR)) {
3925 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3926 "Space required after 'PUBLIC'\n");
3927 }
3928 SKIP_BLANKS;
3929 *publicID = xmlParsePubidLiteral(ctxt);
3930 if (*publicID == NULL) {
3931 xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
3932 }
3933 if (strict) {
3934 /*
3935 * We don't handle [83] so "S SystemLiteral" is required.
3936 */
3937 if (!IS_BLANK_CH(CUR)) {
3938 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3939 "Space required after the Public Identifier\n");
3940 }
3941 } else {
3942 /*
3943 * We handle [83] so we return immediately, if
3944 * "S SystemLiteral" is not detected. From a purely parsing
3945 * point of view that's a nice mess.
3946 */
3947 const xmlChar *ptr;
3948 GROW;
3949
3950 ptr = CUR_PTR;
3951 if (!IS_BLANK_CH(*ptr)) return(NULL);
3952
3953 while (IS_BLANK_CH(*ptr)) ptr++; /* TODO: dangerous, fix ! */
3954 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
3955 }
3956 SKIP_BLANKS;
3957 URI = xmlParseSystemLiteral(ctxt);
3958 if (URI == NULL) {
3959 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
3960 }
3961 }
3962 return(URI);
3963}
3964
3965/**
3966 * xmlParseCommentComplex:
3967 * @ctxt: an XML parser context
3968 * @buf: the already parsed part of the buffer
3969 * @len: number of bytes filles in the buffer
3970 * @size: allocated size of the buffer
3971 *
3972 * Skip an XML (SGML) comment <!-- .... -->
3973 * The spec says that "For compatibility, the string "--" (double-hyphen)
3974 * must not occur within comments. "
3975 * This is the slow routine in case the accelerator for ascii didn't work
3976 *
3977 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
3978 */
3979static void
3980xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf, int len, int size) {
3981 int q, ql;
3982 int r, rl;
3983 int cur, l;
3984 xmlParserInputPtr input = ctxt->input;
3985 int count = 0;
3986
3987 if (buf == NULL) {
3988 len = 0;
3989 size = XML_PARSER_BUFFER_SIZE;
3990 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
3991 if (buf == NULL) {
3992 xmlErrMemory(ctxt, NULL);
3993 return;
3994 }
3995 }
3996 GROW; /* Assure there's enough input data */
3997 q = CUR_CHAR(ql);
3998 if (q == 0)
3999 goto not_terminated;
4000 if (!IS_CHAR(q)) {
4001 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4002 "xmlParseComment: invalid xmlChar value %d\n",
4003 q);
4004 xmlFree (buf);
4005 return;
4006 }
4007 NEXTL(ql);
4008 r = CUR_CHAR(rl);
4009 if (r == 0)
4010 goto not_terminated;
4011 if (!IS_CHAR(r)) {
4012 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4013 "xmlParseComment: invalid xmlChar value %d\n",
4014 q);
4015 xmlFree (buf);
4016 return;
4017 }
4018 NEXTL(rl);
4019 cur = CUR_CHAR(l);
4020 if (cur == 0)
4021 goto not_terminated;
4022 while (IS_CHAR(cur) && /* checked */
4023 ((cur != '>') ||
4024 (r != '-') || (q != '-'))) {
4025 if ((r == '-') && (q == '-')) {
4026 xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
4027 }
4028 if (len + 5 >= size) {
4029 xmlChar *new_buf;
4030 size *= 2;
4031 new_buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4032 if (new_buf == NULL) {
4033 xmlFree (buf);
4034 xmlErrMemory(ctxt, NULL);
4035 return;
4036 }
4037 buf = new_buf;
4038 }
4039 COPY_BUF(ql,buf,len,q);
4040 q = r;
4041 ql = rl;
4042 r = cur;
4043 rl = l;
4044
4045 count++;
4046 if (count > 50) {
4047 GROW;
4048 count = 0;
4049 }
4050 NEXTL(l);
4051 cur = CUR_CHAR(l);
4052 if (cur == 0) {
4053 SHRINK;
4054 GROW;
4055 cur = CUR_CHAR(l);
4056 }
4057 }
4058 buf[len] = 0;
4059 if (cur == 0) {
4060 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4061 "Comment not terminated \n<!--%.50s\n", buf);
4062 } else if (!IS_CHAR(cur)) {
4063 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4064 "xmlParseComment: invalid xmlChar value %d\n",
4065 cur);
4066 } else {
4067 if (input != ctxt->input) {
4068 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4069 "Comment doesn't start and stop in the same entity\n");
4070 }
4071 NEXT;
4072 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4073 (!ctxt->disableSAX))
4074 ctxt->sax->comment(ctxt->userData, buf);
4075 }
4076 xmlFree(buf);
4077 return;
4078not_terminated:
4079 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4080 "Comment not terminated\n", NULL);
4081 xmlFree(buf);
4082 return;
4083}
4084
4085/**
4086 * xmlParseComment:
4087 * @ctxt: an XML parser context
4088 *
4089 * Skip an XML (SGML) comment <!-- .... -->
4090 * The spec says that "For compatibility, the string "--" (double-hyphen)
4091 * must not occur within comments. "
4092 *
4093 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4094 */
4095void
4096xmlParseComment(xmlParserCtxtPtr ctxt) {
4097 xmlChar *buf = NULL;
4098 int size = XML_PARSER_BUFFER_SIZE;
4099 int len = 0;
4100 xmlParserInputState state;
4101 const xmlChar *in;
4102 int nbchar = 0, ccol;
4103
4104 /*
4105 * Check that there is a comment right here.
4106 */
4107 if ((RAW != '<') || (NXT(1) != '!') ||
4108 (NXT(2) != '-') || (NXT(3) != '-')) return;
4109
4110 state = ctxt->instate;
4111 ctxt->instate = XML_PARSER_COMMENT;
4112 SKIP(4);
4113 SHRINK;
4114 GROW;
4115
4116 /*
4117 * Accelerated common case where input don't need to be
4118 * modified before passing it to the handler.
4119 */
4120 in = ctxt->input->cur;
4121 do {
4122 if (*in == 0xA) {
4123 do {
4124 ctxt->input->line++; ctxt->input->col = 1;
4125 in++;
4126 } while (*in == 0xA);
4127 }
4128get_more:
4129 ccol = ctxt->input->col;
4130 while (((*in > '-') && (*in <= 0x7F)) ||
4131 ((*in >= 0x20) && (*in < '-')) ||
4132 (*in == 0x09)) {
4133 in++;
4134 ccol++;
4135 }
4136 ctxt->input->col = ccol;
4137 if (*in == 0xA) {
4138 do {
4139 ctxt->input->line++; ctxt->input->col = 1;
4140 in++;
4141 } while (*in == 0xA);
4142 goto get_more;
4143 }
4144 nbchar = in - ctxt->input->cur;
4145 /*
4146 * save current set of data
4147 */
4148 if (nbchar > 0) {
4149 if ((ctxt->sax != NULL) &&
4150 (ctxt->sax->comment != NULL)) {
4151 if (buf == NULL) {
4152 if ((*in == '-') && (in[1] == '-'))
4153 size = nbchar + 1;
4154 else
4155 size = XML_PARSER_BUFFER_SIZE + nbchar;
4156 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4157 if (buf == NULL) {
4158 xmlErrMemory(ctxt, NULL);
4159 ctxt->instate = state;
4160 return;
4161 }
4162 len = 0;
4163 } else if (len + nbchar + 1 >= size) {
4164 xmlChar *new_buf;
4165 size += len + nbchar + XML_PARSER_BUFFER_SIZE;
4166 new_buf = (xmlChar *) xmlRealloc(buf,
4167 size * sizeof(xmlChar));
4168 if (new_buf == NULL) {
4169 xmlFree (buf);
4170 xmlErrMemory(ctxt, NULL);
4171 ctxt->instate = state;
4172 return;
4173 }
4174 buf = new_buf;
4175 }
4176 memcpy(&buf[len], ctxt->input->cur, nbchar);
4177 len += nbchar;
4178 buf[len] = 0;
4179 }
4180 }
4181 ctxt->input->cur = in;
4182 if (*in == 0xA) {
4183 in++;
4184 ctxt->input->line++; ctxt->input->col = 1;
4185 }
4186 if (*in == 0xD) {
4187 in++;
4188 if (*in == 0xA) {
4189 ctxt->input->cur = in;
4190 in++;
4191 ctxt->input->line++; ctxt->input->col = 1;
4192 continue; /* while */
4193 }
4194 in--;
4195 }
4196 SHRINK;
4197 GROW;
4198 in = ctxt->input->cur;
4199 if (*in == '-') {
4200 if (in[1] == '-') {
4201 if (in[2] == '>') {
4202 SKIP(3);
4203 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4204 (!ctxt->disableSAX)) {
4205 if (buf != NULL)
4206 ctxt->sax->comment(ctxt->userData, buf);
4207 else
4208 ctxt->sax->comment(ctxt->userData, BAD_CAST "");
4209 }
4210 if (buf != NULL)
4211 xmlFree(buf);
4212 ctxt->instate = state;
4213 return;
4214 }
4215 if (buf != NULL)
4216 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4217 "Comment not terminated \n<!--%.50s\n",
4218 buf);
4219 else
4220 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4221 "Comment not terminated \n", NULL);
4222 in++;
4223 ctxt->input->col++;
4224 }
4225 in++;
4226 ctxt->input->col++;
4227 goto get_more;
4228 }
4229 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
4230 xmlParseCommentComplex(ctxt, buf, len, size);
4231 ctxt->instate = state;
4232 return;
4233}
4234
4235
4236/**
4237 * xmlParsePITarget:
4238 * @ctxt: an XML parser context
4239 *
4240 * parse the name of a PI
4241 *
4242 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
4243 *
4244 * Returns the PITarget name or NULL
4245 */
4246
4247const xmlChar *
4248xmlParsePITarget(xmlParserCtxtPtr ctxt) {
4249 const xmlChar *name;
4250
4251 name = xmlParseName(ctxt);
4252 if ((name != NULL) &&
4253 ((name[0] == 'x') || (name[0] == 'X')) &&
4254 ((name[1] == 'm') || (name[1] == 'M')) &&
4255 ((name[2] == 'l') || (name[2] == 'L'))) {
4256 int i;
4257 if ((name[0] == 'x') && (name[1] == 'm') &&
4258 (name[2] == 'l') && (name[3] == 0)) {
4259 xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
4260 "XML declaration allowed only at the start of the document\n");
4261 return(name);
4262 } else if (name[3] == 0) {
4263 xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
4264 return(name);
4265 }
4266 for (i = 0;;i++) {
4267 if (xmlW3CPIs[i] == NULL) break;
4268 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
4269 return(name);
4270 }
4271 xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
4272 "xmlParsePITarget: invalid name prefix 'xml'\n",
4273 NULL, NULL);
4274 }
4275 return(name);
4276}
4277
4278#ifdef LIBXML_CATALOG_ENABLED
4279/**
4280 * xmlParseCatalogPI:
4281 * @ctxt: an XML parser context
4282 * @catalog: the PI value string
4283 *
4284 * parse an XML Catalog Processing Instruction.
4285 *
4286 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
4287 *
4288 * Occurs only if allowed by the user and if happening in the Misc
4289 * part of the document before any doctype informations
4290 * This will add the given catalog to the parsing context in order
4291 * to be used if there is a resolution need further down in the document
4292 */
4293
4294static void
4295xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
4296 xmlChar *URL = NULL;
4297 const xmlChar *tmp, *base;
4298 xmlChar marker;
4299
4300 tmp = catalog;
4301 while (IS_BLANK_CH(*tmp)) tmp++;
4302 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
4303 goto error;
4304 tmp += 7;
4305 while (IS_BLANK_CH(*tmp)) tmp++;
4306 if (*tmp != '=') {
4307 return;
4308 }
4309 tmp++;
4310 while (IS_BLANK_CH(*tmp)) tmp++;
4311 marker = *tmp;
4312 if ((marker != '\'') && (marker != '"'))
4313 goto error;
4314 tmp++;
4315 base = tmp;
4316 while ((*tmp != 0) && (*tmp != marker)) tmp++;
4317 if (*tmp == 0)
4318 goto error;
4319 URL = xmlStrndup(base, tmp - base);
4320 tmp++;
4321 while (IS_BLANK_CH(*tmp)) tmp++;
4322 if (*tmp != 0)
4323 goto error;
4324
4325 if (URL != NULL) {
4326 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
4327 xmlFree(URL);
4328 }
4329 return;
4330
4331error:
4332 xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
4333 "Catalog PI syntax error: %s\n",
4334 catalog, NULL);
4335 if (URL != NULL)
4336 xmlFree(URL);
4337}
4338#endif
4339
4340/**
4341 * xmlParsePI:
4342 * @ctxt: an XML parser context
4343 *
4344 * parse an XML Processing Instruction.
4345 *
4346 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
4347 *
4348 * The processing is transfered to SAX once parsed.
4349 */
4350
4351void
4352xmlParsePI(xmlParserCtxtPtr ctxt) {
4353 xmlChar *buf = NULL;
4354 int len = 0;
4355 int size = XML_PARSER_BUFFER_SIZE;
4356 int cur, l;
4357 const xmlChar *target;
4358 xmlParserInputState state;
4359 int count = 0;
4360
4361 if ((RAW == '<') && (NXT(1) == '?')) {
4362 xmlParserInputPtr input = ctxt->input;
4363 state = ctxt->instate;
4364 ctxt->instate = XML_PARSER_PI;
4365 /*
4366 * this is a Processing Instruction.
4367 */
4368 SKIP(2);
4369 SHRINK;
4370
4371 /*
4372 * Parse the target name and check for special support like
4373 * namespace.
4374 */
4375 target = xmlParsePITarget(ctxt);
4376 if (target != NULL) {
4377 if ((RAW == '?') && (NXT(1) == '>')) {
4378 if (input != ctxt->input) {
4379 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4380 "PI declaration doesn't start and stop in the same entity\n");
4381 }
4382 SKIP(2);
4383
4384 /*
4385 * SAX: PI detected.
4386 */
4387 if ((ctxt->sax) && (!ctxt->disableSAX) &&
4388 (ctxt->sax->processingInstruction != NULL))
4389 ctxt->sax->processingInstruction(ctxt->userData,
4390 target, NULL);
4391 if (ctxt->instate != XML_PARSER_EOF)
4392 ctxt->instate = state;
4393 return;
4394 }
4395 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4396 if (buf == NULL) {
4397 xmlErrMemory(ctxt, NULL);
4398 ctxt->instate = state;
4399 return;
4400 }
4401 cur = CUR;
4402 if (!IS_BLANK(cur)) {
4403 xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
4404 "ParsePI: PI %s space expected\n", target);
4405 }
4406 SKIP_BLANKS;
4407 cur = CUR_CHAR(l);
4408 while (IS_CHAR(cur) && /* checked */
4409 ((cur != '?') || (NXT(1) != '>'))) {
4410 if (len + 5 >= size) {
4411 xmlChar *tmp;
4412
4413 size *= 2;
4414 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4415 if (tmp == NULL) {
4416 xmlErrMemory(ctxt, NULL);
4417 xmlFree(buf);
4418 ctxt->instate = state;
4419 return;
4420 }
4421 buf = tmp;
4422 }
4423 count++;
4424 if (count > 50) {
4425 GROW;
4426 count = 0;
4427 }
4428 COPY_BUF(l,buf,len,cur);
4429 NEXTL(l);
4430 cur = CUR_CHAR(l);
4431 if (cur == 0) {
4432 SHRINK;
4433 GROW;
4434 cur = CUR_CHAR(l);
4435 }
4436 }
4437 buf[len] = 0;
4438 if (cur != '?') {
4439 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
4440 "ParsePI: PI %s never end ...\n", target);
4441 } else {
4442 if (input != ctxt->input) {
4443 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4444 "PI declaration doesn't start and stop in the same entity\n");
4445 }
4446 SKIP(2);
4447
4448#ifdef LIBXML_CATALOG_ENABLED
4449 if (((state == XML_PARSER_MISC) ||
4450 (state == XML_PARSER_START)) &&
4451 (xmlStrEqual(target, XML_CATALOG_PI))) {
4452 xmlCatalogAllow allow = xmlCatalogGetDefaults();
4453 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
4454 (allow == XML_CATA_ALLOW_ALL))
4455 xmlParseCatalogPI(ctxt, buf);
4456 }
4457#endif
4458
4459
4460 /*
4461 * SAX: PI detected.
4462 */
4463 if ((ctxt->sax) && (!ctxt->disableSAX) &&
4464 (ctxt->sax->processingInstruction != NULL))
4465 ctxt->sax->processingInstruction(ctxt->userData,
4466 target, buf);
4467 }
4468 xmlFree(buf);
4469 } else {
4470 xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
4471 }
4472 if (ctxt->instate != XML_PARSER_EOF)
4473 ctxt->instate = state;
4474 }
4475}
4476
4477/**
4478 * xmlParseNotationDecl:
4479 * @ctxt: an XML parser context
4480 *
4481 * parse a notation declaration
4482 *
4483 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
4484 *
4485 * Hence there is actually 3 choices:
4486 * 'PUBLIC' S PubidLiteral
4487 * 'PUBLIC' S PubidLiteral S SystemLiteral
4488 * and 'SYSTEM' S SystemLiteral
4489 *
4490 * See the NOTE on xmlParseExternalID().
4491 */
4492
4493void
4494xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
4495 const xmlChar *name;
4496 xmlChar *Pubid;
4497 xmlChar *Systemid;
4498
4499 if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
4500 xmlParserInputPtr input = ctxt->input;
4501 SHRINK;
4502 SKIP(10);
4503 if (!IS_BLANK_CH(CUR)) {
4504 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4505 "Space required after '<!NOTATION'\n");
4506 return;
4507 }
4508 SKIP_BLANKS;
4509
4510 name = xmlParseName(ctxt);
4511 if (name == NULL) {
4512 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
4513 return;
4514 }
4515 if (!IS_BLANK_CH(CUR)) {
4516 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4517 "Space required after the NOTATION name'\n");
4518 return;
4519 }
4520 SKIP_BLANKS;
4521
4522 /*
4523 * Parse the IDs.
4524 */
4525 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
4526 SKIP_BLANKS;
4527
4528 if (RAW == '>') {
4529 if (input != ctxt->input) {
4530 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4531 "Notation declaration doesn't start and stop in the same entity\n");
4532 }
4533 NEXT;
4534 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4535 (ctxt->sax->notationDecl != NULL))
4536 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
4537 } else {
4538 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
4539 }
4540 if (Systemid != NULL) xmlFree(Systemid);
4541 if (Pubid != NULL) xmlFree(Pubid);
4542 }
4543}
4544
4545/**
4546 * xmlParseEntityDecl:
4547 * @ctxt: an XML parser context
4548 *
4549 * parse <!ENTITY declarations
4550 *
4551 * [70] EntityDecl ::= GEDecl | PEDecl
4552 *
4553 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
4554 *
4555 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
4556 *
4557 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
4558 *
4559 * [74] PEDef ::= EntityValue | ExternalID
4560 *
4561 * [76] NDataDecl ::= S 'NDATA' S Name
4562 *
4563 * [ VC: Notation Declared ]
4564 * The Name must match the declared name of a notation.
4565 */
4566
4567void
4568xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
4569 const xmlChar *name = NULL;
4570 xmlChar *value = NULL;
4571 xmlChar *URI = NULL, *literal = NULL;
4572 const xmlChar *ndata = NULL;
4573 int isParameter = 0;
4574 xmlChar *orig = NULL;
4575 int skipped;
4576 unsigned long oldnbent = ctxt->nbentities;
4577
4578 /* GROW; done in the caller */
4579 if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) {
4580 xmlParserInputPtr input = ctxt->input;
4581 SHRINK;
4582 SKIP(8);
4583 skipped = SKIP_BLANKS;
4584 if (skipped == 0) {
4585 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4586 "Space required after '<!ENTITY'\n");
4587 }
4588
4589 if (RAW == '%') {
4590 NEXT;
4591 skipped = SKIP_BLANKS;
4592 if (skipped == 0) {
4593 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4594 "Space required after '%'\n");
4595 }
4596 isParameter = 1;
4597 }
4598
4599 name = xmlParseName(ctxt);
4600 if (name == NULL) {
4601 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4602 "xmlParseEntityDecl: no name\n");
4603 return;
4604 }
4605 skipped = SKIP_BLANKS;
4606 if (skipped == 0) {
4607 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4608 "Space required after the entity name\n");
4609 }
4610
4611 ctxt->instate = XML_PARSER_ENTITY_DECL;
4612 /*
4613 * handle the various case of definitions...
4614 */
4615 if (isParameter) {
4616 if ((RAW == '"') || (RAW == '\'')) {
4617 value = xmlParseEntityValue(ctxt, &orig);
4618 if (value) {
4619 if ((ctxt->sax != NULL) &&
4620 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
4621 ctxt->sax->entityDecl(ctxt->userData, name,
4622 XML_INTERNAL_PARAMETER_ENTITY,
4623 NULL, NULL, value);
4624 }
4625 } else {
4626 URI = xmlParseExternalID(ctxt, &literal, 1);
4627 if ((URI == NULL) && (literal == NULL)) {
4628 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
4629 }
4630 if (URI) {
4631 xmlURIPtr uri;
4632
4633 uri = xmlParseURI((const char *) URI);
4634 if (uri == NULL) {
4635 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
4636 "Invalid URI: %s\n", URI);
4637 /*
4638 * This really ought to be a well formedness error
4639 * but the XML Core WG decided otherwise c.f. issue
4640 * E26 of the XML erratas.
4641 */
4642 } else {
4643 if (uri->fragment != NULL) {
4644 /*
4645 * Okay this is foolish to block those but not
4646 * invalid URIs.
4647 */
4648 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
4649 } else {
4650 if ((ctxt->sax != NULL) &&
4651 (!ctxt->disableSAX) &&
4652 (ctxt->sax->entityDecl != NULL))
4653 ctxt->sax->entityDecl(ctxt->userData, name,
4654 XML_EXTERNAL_PARAMETER_ENTITY,
4655 literal, URI, NULL);
4656 }
4657 xmlFreeURI(uri);
4658 }
4659 }
4660 }
4661 } else {
4662 if ((RAW == '"') || (RAW == '\'')) {
4663 value = xmlParseEntityValue(ctxt, &orig);
4664 if ((ctxt->sax != NULL) &&
4665 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
4666 ctxt->sax->entityDecl(ctxt->userData, name,
4667 XML_INTERNAL_GENERAL_ENTITY,
4668 NULL, NULL, value);
4669 /*
4670 * For expat compatibility in SAX mode.
4671 */
4672 if ((ctxt->myDoc == NULL) ||
4673 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
4674 if (ctxt->myDoc == NULL) {
4675 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
4676 }
4677 if (ctxt->myDoc->intSubset == NULL)
4678 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
4679 BAD_CAST "fake", NULL, NULL);
4680
4681 xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
4682 NULL, NULL, value);
4683 }
4684 } else {
4685 URI = xmlParseExternalID(ctxt, &literal, 1);
4686 if ((URI == NULL) && (literal == NULL)) {
4687 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
4688 }
4689 if (URI) {
4690 xmlURIPtr uri;
4691
4692 uri = xmlParseURI((const char *)URI);
4693 if (uri == NULL) {
4694 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
4695 "Invalid URI: %s\n", URI);
4696 /*
4697 * This really ought to be a well formedness error
4698 * but the XML Core WG decided otherwise c.f. issue
4699 * E26 of the XML erratas.
4700 */
4701 } else {
4702 if (uri->fragment != NULL) {
4703 /*
4704 * Okay this is foolish to block those but not
4705 * invalid URIs.
4706 */
4707 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
4708 }
4709 xmlFreeURI(uri);
4710 }
4711 }
4712 if ((RAW != '>') && (!IS_BLANK_CH(CUR))) {
4713 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4714 "Space required before 'NDATA'\n");
4715 }
4716 SKIP_BLANKS;
4717 if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
4718 SKIP(5);
4719 if (!IS_BLANK_CH(CUR)) {
4720 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4721 "Space required after 'NDATA'\n");
4722 }
4723 SKIP_BLANKS;
4724 ndata = xmlParseName(ctxt);
4725 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4726 (ctxt->sax->unparsedEntityDecl != NULL))
4727 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
4728 literal, URI, ndata);
4729 } else {
4730 if ((ctxt->sax != NULL) &&
4731 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
4732 ctxt->sax->entityDecl(ctxt->userData, name,
4733 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
4734 literal, URI, NULL);
4735 /*
4736 * For expat compatibility in SAX mode.
4737 * assuming the entity repalcement was asked for
4738 */
4739 if ((ctxt->replaceEntities != 0) &&
4740 ((ctxt->myDoc == NULL) ||
4741 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
4742 if (ctxt->myDoc == NULL) {
4743 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
4744 }
4745
4746 if (ctxt->myDoc->intSubset == NULL)
4747 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
4748 BAD_CAST "fake", NULL, NULL);
4749 xmlSAX2EntityDecl(ctxt, name,
4750 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
4751 literal, URI, NULL);
4752 }
4753 }
4754 }
4755 }
4756 SKIP_BLANKS;
4757 if (RAW != '>') {
4758 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
4759 "xmlParseEntityDecl: entity %s not terminated\n", name);
4760 } else {
4761 if (input != ctxt->input) {
4762 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4763 "Entity declaration doesn't start and stop in the same entity\n");
4764 }
4765 NEXT;
4766 }
4767 if (orig != NULL) {
4768 /*
4769 * Ugly mechanism to save the raw entity value.
4770 */
4771 xmlEntityPtr cur = NULL;
4772
4773 if (isParameter) {
4774 if ((ctxt->sax != NULL) &&
4775 (ctxt->sax->getParameterEntity != NULL))
4776 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
4777 } else {
4778 if ((ctxt->sax != NULL) &&
4779 (ctxt->sax->getEntity != NULL))
4780 cur = ctxt->sax->getEntity(ctxt->userData, name);
4781 if ((cur == NULL) && (ctxt->userData==ctxt)) {
4782 cur = xmlSAX2GetEntity(ctxt, name);
4783 }
4784 }
4785 if (cur != NULL) {
4786 if ((cur->owner != 0) || (cur->children == NULL)) {
4787 cur->owner = ctxt->nbentities - oldnbent;
4788 if (cur->owner == 0)
4789 cur->owner = 1;
4790 }
4791 if (cur->orig != NULL)
4792 xmlFree(orig);
4793 else
4794 cur->orig = orig;
4795 } else
4796 xmlFree(orig);
4797 }
4798 if (value != NULL) xmlFree(value);
4799 if (URI != NULL) xmlFree(URI);
4800 if (literal != NULL) xmlFree(literal);
4801 }
4802}
4803
4804/**
4805 * xmlParseDefaultDecl:
4806 * @ctxt: an XML parser context
4807 * @value: Receive a possible fixed default value for the attribute
4808 *
4809 * Parse an attribute default declaration
4810 *
4811 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
4812 *
4813 * [ VC: Required Attribute ]
4814 * if the default declaration is the keyword #REQUIRED, then the
4815 * attribute must be specified for all elements of the type in the
4816 * attribute-list declaration.
4817 *
4818 * [ VC: Attribute Default Legal ]
4819 * The declared default value must meet the lexical constraints of
4820 * the declared attribute type c.f. xmlValidateAttributeDecl()
4821 *
4822 * [ VC: Fixed Attribute Default ]
4823 * if an attribute has a default value declared with the #FIXED
4824 * keyword, instances of that attribute must match the default value.
4825 *
4826 * [ WFC: No < in Attribute Values ]
4827 * handled in xmlParseAttValue()
4828 *
4829 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
4830 * or XML_ATTRIBUTE_FIXED.
4831 */
4832
4833int
4834xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
4835 int val;
4836 xmlChar *ret;
4837
4838 *value = NULL;
4839 if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
4840 SKIP(9);
4841 return(XML_ATTRIBUTE_REQUIRED);
4842 }
4843 if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
4844 SKIP(8);
4845 return(XML_ATTRIBUTE_IMPLIED);
4846 }
4847 val = XML_ATTRIBUTE_NONE;
4848 if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
4849 SKIP(6);
4850 val = XML_ATTRIBUTE_FIXED;
4851 if (!IS_BLANK_CH(CUR)) {
4852 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4853 "Space required after '#FIXED'\n");
4854 }
4855 SKIP_BLANKS;
4856 }
4857 ret = xmlParseAttValue(ctxt);
4858 ctxt->instate = XML_PARSER_DTD;
4859 if (ret == NULL) {
4860 xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
4861 "Attribute default value declaration error\n");
4862 } else
4863 *value = ret;
4864 return(val);
4865}
4866
4867/**
4868 * xmlParseNotationType:
4869 * @ctxt: an XML parser context
4870 *
4871 * parse an Notation attribute type.
4872 *
4873 * Note: the leading 'NOTATION' S part has already being parsed...
4874 *
4875 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
4876 *
4877 * [ VC: Notation Attributes ]
4878 * Values of this type must match one of the notation names included
4879 * in the declaration; all notation names in the declaration must be declared.
4880 *
4881 * Returns: the notation attribute tree built while parsing
4882 */
4883
4884xmlEnumerationPtr
4885xmlParseNotationType(xmlParserCtxtPtr ctxt) {
4886 const xmlChar *name;
4887 xmlEnumerationPtr ret = NULL, last = NULL, cur;
4888
4889 if (RAW != '(') {
4890 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
4891 return(NULL);
4892 }
4893 SHRINK;
4894 do {
4895 NEXT;
4896 SKIP_BLANKS;
4897 name = xmlParseName(ctxt);
4898 if (name == NULL) {
4899 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4900 "Name expected in NOTATION declaration\n");
4901 xmlFreeEnumeration(ret);
4902 return(NULL);
4903 }
4904 cur = xmlCreateEnumeration(name);
4905 if (cur == NULL) {
4906 xmlFreeEnumeration(ret);
4907 return(NULL);
4908 }
4909 if (last == NULL) ret = last = cur;
4910 else {
4911 last->next = cur;
4912 last = cur;
4913 }
4914 SKIP_BLANKS;
4915 } while (RAW == '|');
4916 if (RAW != ')') {
4917 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
4918 xmlFreeEnumeration(ret);
4919 return(NULL);
4920 }
4921 NEXT;
4922 return(ret);
4923}
4924
4925/**
4926 * xmlParseEnumerationType:
4927 * @ctxt: an XML parser context
4928 *
4929 * parse an Enumeration attribute type.
4930 *
4931 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
4932 *
4933 * [ VC: Enumeration ]
4934 * Values of this type must match one of the Nmtoken tokens in
4935 * the declaration
4936 *
4937 * Returns: the enumeration attribute tree built while parsing
4938 */
4939
4940xmlEnumerationPtr
4941xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
4942 xmlChar *name;
4943 xmlEnumerationPtr ret = NULL, last = NULL, cur;
4944
4945 if (RAW != '(') {
4946 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
4947 return(NULL);
4948 }
4949 SHRINK;
4950 do {
4951 NEXT;
4952 SKIP_BLANKS;
4953 name = xmlParseNmtoken(ctxt);
4954 if (name == NULL) {
4955 xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
4956 return(ret);
4957 }
4958 cur = xmlCreateEnumeration(name);
4959 xmlFree(name);
4960 if (cur == NULL) {
4961 xmlFreeEnumeration(ret);
4962 return(NULL);
4963 }
4964 if (last == NULL) ret = last = cur;
4965 else {
4966 last->next = cur;
4967 last = cur;
4968 }
4969 SKIP_BLANKS;
4970 } while (RAW == '|');
4971 if (RAW != ')') {
4972 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
4973 return(ret);
4974 }
4975 NEXT;
4976 return(ret);
4977}
4978
4979/**
4980 * xmlParseEnumeratedType:
4981 * @ctxt: an XML parser context
4982 * @tree: the enumeration tree built while parsing
4983 *
4984 * parse an Enumerated attribute type.
4985 *
4986 * [57] EnumeratedType ::= NotationType | Enumeration
4987 *
4988 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
4989 *
4990 *
4991 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
4992 */
4993
4994int
4995xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
4996 if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
4997 SKIP(8);
4998 if (!IS_BLANK_CH(CUR)) {
4999 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5000 "Space required after 'NOTATION'\n");
5001 return(0);
5002 }
5003 SKIP_BLANKS;
5004 *tree = xmlParseNotationType(ctxt);
5005 if (*tree == NULL) return(0);
5006 return(XML_ATTRIBUTE_NOTATION);
5007 }
5008 *tree = xmlParseEnumerationType(ctxt);
5009 if (*tree == NULL) return(0);
5010 return(XML_ATTRIBUTE_ENUMERATION);
5011}
5012
5013/**
5014 * xmlParseAttributeType:
5015 * @ctxt: an XML parser context
5016 * @tree: the enumeration tree built while parsing
5017 *
5018 * parse the Attribute list def for an element
5019 *
5020 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
5021 *
5022 * [55] StringType ::= 'CDATA'
5023 *
5024 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
5025 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
5026 *
5027 * Validity constraints for attribute values syntax are checked in
5028 * xmlValidateAttributeValue()
5029 *
5030 * [ VC: ID ]
5031 * Values of type ID must match the Name production. A name must not
5032 * appear more than once in an XML document as a value of this type;
5033 * i.e., ID values must uniquely identify the elements which bear them.
5034 *
5035 * [ VC: One ID per Element Type ]
5036 * No element type may have more than one ID attribute specified.
5037 *
5038 * [ VC: ID Attribute Default ]
5039 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
5040 *
5041 * [ VC: IDREF ]
5042 * Values of type IDREF must match the Name production, and values
5043 * of type IDREFS must match Names; each IDREF Name must match the value
5044 * of an ID attribute on some element in the XML document; i.e. IDREF
5045 * values must match the value of some ID attribute.
5046 *
5047 * [ VC: Entity Name ]
5048 * Values of type ENTITY must match the Name production, values
5049 * of type ENTITIES must match Names; each Entity Name must match the
5050 * name of an unparsed entity declared in the DTD.
5051 *
5052 * [ VC: Name Token ]
5053 * Values of type NMTOKEN must match the Nmtoken production; values
5054 * of type NMTOKENS must match Nmtokens.
5055 *
5056 * Returns the attribute type
5057 */
5058int
5059xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5060 SHRINK;
5061 if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
5062 SKIP(5);
5063 return(XML_ATTRIBUTE_CDATA);
5064 } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
5065 SKIP(6);
5066 return(XML_ATTRIBUTE_IDREFS);
5067 } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
5068 SKIP(5);
5069 return(XML_ATTRIBUTE_IDREF);
5070 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
5071 SKIP(2);
5072 return(XML_ATTRIBUTE_ID);
5073 } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
5074 SKIP(6);
5075 return(XML_ATTRIBUTE_ENTITY);
5076 } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
5077 SKIP(8);
5078 return(XML_ATTRIBUTE_ENTITIES);
5079 } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
5080 SKIP(8);
5081 return(XML_ATTRIBUTE_NMTOKENS);
5082 } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
5083 SKIP(7);
5084 return(XML_ATTRIBUTE_NMTOKEN);
5085 }
5086 return(xmlParseEnumeratedType(ctxt, tree));
5087}
5088
5089/**
5090 * xmlParseAttributeListDecl:
5091 * @ctxt: an XML parser context
5092 *
5093 * : parse the Attribute list def for an element
5094 *
5095 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
5096 *
5097 * [53] AttDef ::= S Name S AttType S DefaultDecl
5098 *
5099 */
5100void
5101xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
5102 const xmlChar *elemName;
5103 const xmlChar *attrName;
5104 xmlEnumerationPtr tree;
5105
5106 if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
5107 xmlParserInputPtr input = ctxt->input;
5108
5109 SKIP(9);
5110 if (!IS_BLANK_CH(CUR)) {
5111 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5112 "Space required after '<!ATTLIST'\n");
5113 }
5114 SKIP_BLANKS;
5115 elemName = xmlParseName(ctxt);
5116 if (elemName == NULL) {
5117 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5118 "ATTLIST: no name for Element\n");
5119 return;
5120 }
5121 SKIP_BLANKS;
5122 GROW;
5123 while (RAW != '>') {
5124 const xmlChar *check = CUR_PTR;
5125 int type;
5126 int def;
5127 xmlChar *defaultValue = NULL;
5128
5129 GROW;
5130 tree = NULL;
5131 attrName = xmlParseName(ctxt);
5132 if (attrName == NULL) {
5133 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5134 "ATTLIST: no name for Attribute\n");
5135 break;
5136 }
5137 GROW;
5138 if (!IS_BLANK_CH(CUR)) {
5139 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5140 "Space required after the attribute name\n");
5141 break;
5142 }
5143 SKIP_BLANKS;
5144
5145 type = xmlParseAttributeType(ctxt, &tree);
5146 if (type <= 0) {
5147 break;
5148 }
5149
5150 GROW;
5151 if (!IS_BLANK_CH(CUR)) {
5152 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5153 "Space required after the attribute type\n");
5154 if (tree != NULL)
5155 xmlFreeEnumeration(tree);
5156 break;
5157 }
5158 SKIP_BLANKS;
5159
5160 def = xmlParseDefaultDecl(ctxt, &defaultValue);
5161 if (def <= 0) {
5162 if (defaultValue != NULL)
5163 xmlFree(defaultValue);
5164 if (tree != NULL)
5165 xmlFreeEnumeration(tree);
5166 break;
5167 }
5168
5169 GROW;
5170 if (RAW != '>') {
5171 if (!IS_BLANK_CH(CUR)) {
5172 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5173 "Space required after the attribute default value\n");
5174 if (defaultValue != NULL)
5175 xmlFree(defaultValue);
5176 if (tree != NULL)
5177 xmlFreeEnumeration(tree);
5178 break;
5179 }
5180 SKIP_BLANKS;
5181 }
5182 if (check == CUR_PTR) {
5183 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
5184 "in xmlParseAttributeListDecl\n");
5185 if (defaultValue != NULL)
5186 xmlFree(defaultValue);
5187 if (tree != NULL)
5188 xmlFreeEnumeration(tree);
5189 break;
5190 }
5191 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5192 (ctxt->sax->attributeDecl != NULL))
5193 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
5194 type, def, defaultValue, tree);
5195 else if (tree != NULL)
5196 xmlFreeEnumeration(tree);
5197
5198 if ((ctxt->sax2) && (defaultValue != NULL) &&
5199 (def != XML_ATTRIBUTE_IMPLIED) &&
5200 (def != XML_ATTRIBUTE_REQUIRED)) {
5201 xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
5202 }
5203 if (ctxt->sax2) {
5204 xmlAddSpecialAttr(ctxt, elemName, attrName, type);
5205 }
5206 if (defaultValue != NULL)
5207 xmlFree(defaultValue);
5208 GROW;
5209 }
5210 if (RAW == '>') {
5211 if (input != ctxt->input) {
5212 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5213 "Attribute list declaration doesn't start and stop in the same entity\n");
5214 }
5215 NEXT;
5216 }
5217 }
5218}
5219
5220/**
5221 * xmlParseElementMixedContentDecl:
5222 * @ctxt: an XML parser context
5223 * @inputchk: the input used for the current entity, needed for boundary checks
5224 *
5225 * parse the declaration for a Mixed Element content
5226 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
5227 *
5228 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
5229 * '(' S? '#PCDATA' S? ')'
5230 *
5231 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
5232 *
5233 * [ VC: No Duplicate Types ]
5234 * The same name must not appear more than once in a single
5235 * mixed-content declaration.
5236 *
5237 * returns: the list of the xmlElementContentPtr describing the element choices
5238 */
5239xmlElementContentPtr
5240xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
5241 xmlElementContentPtr ret = NULL, cur = NULL, n;
5242 const xmlChar *elem = NULL;
5243
5244 GROW;
5245 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
5246 SKIP(7);
5247 SKIP_BLANKS;
5248 SHRINK;
5249 if (RAW == ')') {
5250 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
5251 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5252"Element content declaration doesn't start and stop in the same entity\n",
5253 NULL);
5254 }
5255 NEXT;
5256 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
5257 if (RAW == '*') {
5258 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5259 NEXT;
5260 }
5261 return(ret);
5262 }
5263 if ((RAW == '(') || (RAW == '|')) {
5264 ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
5265 if (ret == NULL) return(NULL);
5266 }
5267 while (RAW == '|') {
5268 NEXT;
5269 if (elem == NULL) {
5270 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
5271 if (ret == NULL) return(NULL);
5272 ret->c1 = cur;
5273 if (cur != NULL)
5274 cur->parent = ret;
5275 cur = ret;
5276 } else {
5277 n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
5278 if (n == NULL) return(NULL);
5279 n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
5280 if (n->c1 != NULL)
5281 n->c1->parent = n;
5282 cur->c2 = n;
5283 if (n != NULL)
5284 n->parent = cur;
5285 cur = n;
5286 }
5287 SKIP_BLANKS;
5288 elem = xmlParseName(ctxt);
5289 if (elem == NULL) {
5290 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5291 "xmlParseElementMixedContentDecl : Name expected\n");
5292 xmlFreeDocElementContent(ctxt->myDoc, cur);
5293 return(NULL);
5294 }
5295 SKIP_BLANKS;
5296 GROW;
5297 }
5298 if ((RAW == ')') && (NXT(1) == '*')) {
5299 if (elem != NULL) {
5300 cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
5301 XML_ELEMENT_CONTENT_ELEMENT);
5302 if (cur->c2 != NULL)
5303 cur->c2->parent = cur;
5304 }
5305 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5306 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
5307 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5308"Element content declaration doesn't start and stop in the same entity\n",
5309 NULL);
5310 }
5311 SKIP(2);
5312 } else {
5313 xmlFreeDocElementContent(ctxt->myDoc, ret);
5314 xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
5315 return(NULL);
5316 }
5317
5318 } else {
5319 xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
5320 }
5321 return(ret);
5322}
5323
5324/**
5325 * xmlParseElementChildrenContentDecl:
5326 * @ctxt: an XML parser context
5327 * @inputchk: the input used for the current entity, needed for boundary checks
5328 *
5329 * parse the declaration for a Mixed Element content
5330 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
5331 *
5332 *
5333 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
5334 *
5335 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
5336 *
5337 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
5338 *
5339 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
5340 *
5341 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
5342 * TODO Parameter-entity replacement text must be properly nested
5343 * with parenthesized groups. That is to say, if either of the
5344 * opening or closing parentheses in a choice, seq, or Mixed
5345 * construct is contained in the replacement text for a parameter
5346 * entity, both must be contained in the same replacement text. For
5347 * interoperability, if a parameter-entity reference appears in a
5348 * choice, seq, or Mixed construct, its replacement text should not
5349 * be empty, and neither the first nor last non-blank character of
5350 * the replacement text should be a connector (| or ,).
5351 *
5352 * Returns the tree of xmlElementContentPtr describing the element
5353 * hierarchy.
5354 */
5355xmlElementContentPtr
5356xmlParseElementChildrenContentDecl (xmlParserCtxtPtr ctxt, int inputchk) {
5357 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
5358 const xmlChar *elem;
5359 xmlChar type = 0;
5360
5361 if (ctxt->depth > 128) {
5362 xmlFatalErrMsgInt(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED,
5363 "xmlParseElementChildrenContentDecl : depth %d too deep\n",
5364 ctxt->depth);
5365 return(NULL);
5366 }
5367 SKIP_BLANKS;
5368 GROW;
5369 if (RAW == '(') {
5370 int inputid = ctxt->input->id;
5371
5372 /* Recurse on first child */
5373 NEXT;
5374 SKIP_BLANKS;
5375 ctxt->depth++;
5376 cur = ret = xmlParseElementChildrenContentDecl(ctxt, inputid);
5377 ctxt->depth--;
5378 SKIP_BLANKS;
5379 GROW;
5380 } else {
5381 elem = xmlParseName(ctxt);
5382 if (elem == NULL) {
5383 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
5384 return(NULL);
5385 }
5386 cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
5387 if (cur == NULL) {
5388 xmlErrMemory(ctxt, NULL);
5389 return(NULL);
5390 }
5391 GROW;
5392 if (RAW == '?') {
5393 cur->ocur = XML_ELEMENT_CONTENT_OPT;
5394 NEXT;
5395 } else if (RAW == '*') {
5396 cur->ocur = XML_ELEMENT_CONTENT_MULT;
5397 NEXT;
5398 } else if (RAW == '+') {
5399 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
5400 NEXT;
5401 } else {
5402 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
5403 }
5404 GROW;
5405 }
5406 SKIP_BLANKS;
5407 SHRINK;
5408 while (RAW != ')') {
5409 /*
5410 * Each loop we parse one separator and one element.
5411 */
5412 if (RAW == ',') {
5413 if (type == 0) type = CUR;
5414
5415 /*
5416 * Detect "Name | Name , Name" error
5417 */
5418 else if (type != CUR) {
5419 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
5420 "xmlParseElementChildrenContentDecl : '%c' expected\n",
5421 type);
5422 if ((last != NULL) && (last != ret))
5423 xmlFreeDocElementContent(ctxt->myDoc, last);
5424 if (ret != NULL)
5425 xmlFreeDocElementContent(ctxt->myDoc, ret);
5426 return(NULL);
5427 }
5428 NEXT;
5429
5430 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
5431 if (op == NULL) {
5432 if ((last != NULL) && (last != ret))
5433 xmlFreeDocElementContent(ctxt->myDoc, last);
5434 xmlFreeDocElementContent(ctxt->myDoc, ret);
5435 return(NULL);
5436 }
5437 if (last == NULL) {
5438 op->c1 = ret;
5439 if (ret != NULL)
5440 ret->parent = op;
5441 ret = cur = op;
5442 } else {
5443 cur->c2 = op;
5444 if (op != NULL)
5445 op->parent = cur;
5446 op->c1 = last;
5447 if (last != NULL)
5448 last->parent = op;
5449 cur =op;
5450 last = NULL;
5451 }
5452 } else if (RAW == '|') {
5453 if (type == 0) type = CUR;
5454
5455 /*
5456 * Detect "Name , Name | Name" error
5457 */
5458 else if (type != CUR) {
5459 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
5460 "xmlParseElementChildrenContentDecl : '%c' expected\n",
5461 type);
5462 if ((last != NULL) && (last != ret))
5463 xmlFreeDocElementContent(ctxt->myDoc, last);
5464 if (ret != NULL)
5465 xmlFreeDocElementContent(ctxt->myDoc, ret);
5466 return(NULL);
5467 }
5468 NEXT;
5469
5470 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
5471 if (op == NULL) {
5472 if ((last != NULL) && (last != ret))
5473 xmlFreeDocElementContent(ctxt->myDoc, last);
5474 if (ret != NULL)
5475 xmlFreeDocElementContent(ctxt->myDoc, ret);
5476 return(NULL);
5477 }
5478 if (last == NULL) {
5479 op->c1 = ret;
5480 if (ret != NULL)
5481 ret->parent = op;
5482 ret = cur = op;
5483 } else {
5484 cur->c2 = op;
5485 if (op != NULL)
5486 op->parent = cur;
5487 op->c1 = last;
5488 if (last != NULL)
5489 last->parent = op;
5490 cur =op;
5491 last = NULL;
5492 }
5493 } else {
5494 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
5495 if (ret != NULL)
5496 xmlFreeDocElementContent(ctxt->myDoc, ret);
5497 return(NULL);
5498 }
5499 GROW;
5500 SKIP_BLANKS;
5501 GROW;
5502 if (RAW == '(') {
5503 int inputid = ctxt->input->id;
5504 /* Recurse on second child */
5505 NEXT;
5506 SKIP_BLANKS;
5507 ctxt->depth++;
5508 last = xmlParseElementChildrenContentDecl(ctxt, inputid);
5509 ctxt->depth--;
5510 SKIP_BLANKS;
5511 } else {
5512 elem = xmlParseName(ctxt);
5513 if (elem == NULL) {
5514 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
5515 if (ret != NULL)
5516 xmlFreeDocElementContent(ctxt->myDoc, ret);
5517 return(NULL);
5518 }
5519 last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
5520 if (RAW == '?') {
5521 last->ocur = XML_ELEMENT_CONTENT_OPT;
5522 NEXT;
5523 } else if (RAW == '*') {
5524 last->ocur = XML_ELEMENT_CONTENT_MULT;
5525 NEXT;
5526 } else if (RAW == '+') {
5527 last->ocur = XML_ELEMENT_CONTENT_PLUS;
5528 NEXT;
5529 } else {
5530 last->ocur = XML_ELEMENT_CONTENT_ONCE;
5531 }
5532 }
5533 SKIP_BLANKS;
5534 GROW;
5535 }
5536 if ((cur != NULL) && (last != NULL)) {
5537 cur->c2 = last;
5538 if (last != NULL)
5539 last->parent = cur;
5540 }
5541 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
5542 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5543"Element content declaration doesn't start and stop in the same entity\n",
5544 NULL);
5545 }
5546 NEXT;
5547 if (RAW == '?') {
5548 if (ret != NULL) {
5549 if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
5550 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
5551 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5552 else
5553 ret->ocur = XML_ELEMENT_CONTENT_OPT;
5554 }
5555 NEXT;
5556 } else if (RAW == '*') {
5557 if (ret != NULL) {
5558 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5559 cur = ret;
5560 /*
5561 * Some normalization:
5562 * (a | b* | c?)* == (a | b | c)*
5563 */
5564 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
5565 if ((cur->c1 != NULL) &&
5566 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
5567 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
5568 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
5569 if ((cur->c2 != NULL) &&
5570 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
5571 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
5572 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
5573 cur = cur->c2;
5574 }
5575 }
5576 NEXT;
5577 } else if (RAW == '+') {
5578 if (ret != NULL) {
5579 int found = 0;
5580
5581 if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
5582 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
5583 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5584 else
5585 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
5586 /*
5587 * Some normalization:
5588 * (a | b*)+ == (a | b)*
5589 * (a | b?)+ == (a | b)*
5590 */
5591 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
5592 if ((cur->c1 != NULL) &&
5593 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
5594 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
5595 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
5596 found = 1;
5597 }
5598 if ((cur->c2 != NULL) &&
5599 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
5600 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
5601 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
5602 found = 1;
5603 }
5604 cur = cur->c2;
5605 }
5606 if (found)
5607 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5608 }
5609 NEXT;
5610 }
5611 return(ret);
5612}
5613
5614/**
5615 * xmlParseElementContentDecl:
5616 * @ctxt: an XML parser context
5617 * @name: the name of the element being defined.
5618 * @result: the Element Content pointer will be stored here if any
5619 *
5620 * parse the declaration for an Element content either Mixed or Children,
5621 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
5622 *
5623 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
5624 *
5625 * returns: the type of element content XML_ELEMENT_TYPE_xxx
5626 */
5627
5628int
5629xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
5630 xmlElementContentPtr *result) {
5631
5632 xmlElementContentPtr tree = NULL;
5633 int inputid = ctxt->input->id;
5634 int res;
5635
5636 *result = NULL;
5637
5638 if (RAW != '(') {
5639 xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
5640 "xmlParseElementContentDecl : %s '(' expected\n", name);
5641 return(-1);
5642 }
5643 NEXT;
5644 GROW;
5645 SKIP_BLANKS;
5646 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
5647 tree = xmlParseElementMixedContentDecl(ctxt, inputid);
5648 res = XML_ELEMENT_TYPE_MIXED;
5649 } else {
5650 tree = xmlParseElementChildrenContentDecl(ctxt, inputid);
5651 res = XML_ELEMENT_TYPE_ELEMENT;
5652 }
5653 SKIP_BLANKS;
5654 *result = tree;
5655 return(res);
5656}
5657
5658/**
5659 * xmlParseElementDecl:
5660 * @ctxt: an XML parser context
5661 *
5662 * parse an Element declaration.
5663 *
5664 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
5665 *
5666 * [ VC: Unique Element Type Declaration ]
5667 * No element type may be declared more than once
5668 *
5669 * Returns the type of the element, or -1 in case of error
5670 */
5671int
5672xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
5673 const xmlChar *name;
5674 int ret = -1;
5675 xmlElementContentPtr content = NULL;
5676
5677 /* GROW; done in the caller */
5678 if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
5679 xmlParserInputPtr input = ctxt->input;
5680
5681 SKIP(9);
5682 if (!IS_BLANK_CH(CUR)) {
5683 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5684 "Space required after 'ELEMENT'\n");
5685 }
5686 SKIP_BLANKS;
5687 name = xmlParseName(ctxt);
5688 if (name == NULL) {
5689 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5690 "xmlParseElementDecl: no name for Element\n");
5691 return(-1);
5692 }
5693 while ((RAW == 0) && (ctxt->inputNr > 1))
5694 xmlPopInput(ctxt);
5695 if (!IS_BLANK_CH(CUR)) {
5696 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5697 "Space required after the element name\n");
5698 }
5699 SKIP_BLANKS;
5700 if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
5701 SKIP(5);
5702 /*
5703 * Element must always be empty.
5704 */
5705 ret = XML_ELEMENT_TYPE_EMPTY;
5706 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
5707 (NXT(2) == 'Y')) {
5708 SKIP(3);
5709 /*
5710 * Element is a generic container.
5711 */
5712 ret = XML_ELEMENT_TYPE_ANY;
5713 } else if (RAW == '(') {
5714 ret = xmlParseElementContentDecl(ctxt, name, &content);
5715 } else {
5716 /*
5717 * [ WFC: PEs in Internal Subset ] error handling.
5718 */
5719 if ((RAW == '%') && (ctxt->external == 0) &&
5720 (ctxt->inputNr == 1)) {
5721 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
5722 "PEReference: forbidden within markup decl in internal subset\n");
5723 } else {
5724 xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
5725 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
5726 }
5727 return(-1);
5728 }
5729
5730 SKIP_BLANKS;
5731 /*
5732 * Pop-up of finished entities.
5733 */
5734 while ((RAW == 0) && (ctxt->inputNr > 1))
5735 xmlPopInput(ctxt);
5736 SKIP_BLANKS;
5737
5738 if (RAW != '>') {
5739 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
5740 if (content != NULL) {
5741 xmlFreeDocElementContent(ctxt->myDoc, content);
5742 }
5743 } else {
5744 if (input != ctxt->input) {
5745 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5746 "Element declaration doesn't start and stop in the same entity\n");
5747 }
5748
5749 NEXT;
5750 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5751 (ctxt->sax->elementDecl != NULL)) {
5752 if (content != NULL)
5753 content->parent = NULL;
5754 ctxt->sax->elementDecl(ctxt->userData, name, ret,
5755 content);
5756 if ((content != NULL) && (content->parent == NULL)) {
5757 /*
5758 * this is a trick: if xmlAddElementDecl is called,
5759 * instead of copying the full tree it is plugged directly
5760 * if called from the parser. Avoid duplicating the
5761 * interfaces or change the API/ABI
5762 */
5763 xmlFreeDocElementContent(ctxt->myDoc, content);
5764 }
5765 } else if (content != NULL) {
5766 xmlFreeDocElementContent(ctxt->myDoc, content);
5767 }
5768 }
5769 }
5770 return(ret);
5771}
5772
5773/**
5774 * xmlParseConditionalSections
5775 * @ctxt: an XML parser context
5776 *
5777 * [61] conditionalSect ::= includeSect | ignoreSect
5778 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
5779 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
5780 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
5781 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
5782 */
5783
5784static void
5785xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
5786 SKIP(3);
5787 SKIP_BLANKS;
5788 if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
5789 SKIP(7);
5790 SKIP_BLANKS;
5791 if (RAW != '[') {
5792 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
5793 } else {
5794 NEXT;
5795 }
5796 if (xmlParserDebugEntities) {
5797 if ((ctxt->input != NULL) && (ctxt->input->filename))
5798 xmlGenericError(xmlGenericErrorContext,
5799 "%s(%d): ", ctxt->input->filename,
5800 ctxt->input->line);
5801 xmlGenericError(xmlGenericErrorContext,
5802 "Entering INCLUDE Conditional Section\n");
5803 }
5804
5805 while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
5806 (NXT(2) != '>'))) {
5807 const xmlChar *check = CUR_PTR;
5808 unsigned int cons = ctxt->input->consumed;
5809
5810 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5811 xmlParseConditionalSections(ctxt);
5812 } else if (IS_BLANK_CH(CUR)) {
5813 NEXT;
5814 } else if (RAW == '%') {
5815 xmlParsePEReference(ctxt);
5816 } else
5817 xmlParseMarkupDecl(ctxt);
5818
5819 /*
5820 * Pop-up of finished entities.
5821 */
5822 while ((RAW == 0) && (ctxt->inputNr > 1))
5823 xmlPopInput(ctxt);
5824
5825 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
5826 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
5827 break;
5828 }
5829 }
5830 if (xmlParserDebugEntities) {
5831 if ((ctxt->input != NULL) && (ctxt->input->filename))
5832 xmlGenericError(xmlGenericErrorContext,
5833 "%s(%d): ", ctxt->input->filename,
5834 ctxt->input->line);
5835 xmlGenericError(xmlGenericErrorContext,
5836 "Leaving INCLUDE Conditional Section\n");
5837 }
5838
5839 } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
5840 int state;
5841 xmlParserInputState instate;
5842 int depth = 0;
5843
5844 SKIP(6);
5845 SKIP_BLANKS;
5846 if (RAW != '[') {
5847 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
5848 } else {
5849 NEXT;
5850 }
5851 if (xmlParserDebugEntities) {
5852 if ((ctxt->input != NULL) && (ctxt->input->filename))
5853 xmlGenericError(xmlGenericErrorContext,
5854 "%s(%d): ", ctxt->input->filename,
5855 ctxt->input->line);
5856 xmlGenericError(xmlGenericErrorContext,
5857 "Entering IGNORE Conditional Section\n");
5858 }
5859
5860 /*
5861 * Parse up to the end of the conditional section
5862 * But disable SAX event generating DTD building in the meantime
5863 */
5864 state = ctxt->disableSAX;
5865 instate = ctxt->instate;
5866 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
5867 ctxt->instate = XML_PARSER_IGNORE;
5868
5869 while ((depth >= 0) && (RAW != 0)) {
5870 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5871 depth++;
5872 SKIP(3);
5873 continue;
5874 }
5875 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
5876 if (--depth >= 0) SKIP(3);
5877 continue;
5878 }
5879 NEXT;
5880 continue;
5881 }
5882
5883 ctxt->disableSAX = state;
5884 ctxt->instate = instate;
5885
5886 if (xmlParserDebugEntities) {
5887 if ((ctxt->input != NULL) && (ctxt->input->filename))
5888 xmlGenericError(xmlGenericErrorContext,
5889 "%s(%d): ", ctxt->input->filename,
5890 ctxt->input->line);
5891 xmlGenericError(xmlGenericErrorContext,
5892 "Leaving IGNORE Conditional Section\n");
5893 }
5894
5895 } else {
5896 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
5897 }
5898
5899 if (RAW == 0)
5900 SHRINK;
5901
5902 if (RAW == 0) {
5903 xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
5904 } else {
5905 SKIP(3);
5906 }
5907}
5908
5909/**
5910 * xmlParseMarkupDecl:
5911 * @ctxt: an XML parser context
5912 *
5913 * parse Markup declarations
5914 *
5915 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
5916 * NotationDecl | PI | Comment
5917 *
5918 * [ VC: Proper Declaration/PE Nesting ]
5919 * Parameter-entity replacement text must be properly nested with
5920 * markup declarations. That is to say, if either the first character
5921 * or the last character of a markup declaration (markupdecl above) is
5922 * contained in the replacement text for a parameter-entity reference,
5923 * both must be contained in the same replacement text.
5924 *
5925 * [ WFC: PEs in Internal Subset ]
5926 * In the internal DTD subset, parameter-entity references can occur
5927 * only where markup declarations can occur, not within markup declarations.
5928 * (This does not apply to references that occur in external parameter
5929 * entities or to the external subset.)
5930 */
5931void
5932xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
5933 GROW;
5934 if (CUR == '<') {
5935 if (NXT(1) == '!') {
5936 switch (NXT(2)) {
5937 case 'E':
5938 if (NXT(3) == 'L')
5939 xmlParseElementDecl(ctxt);
5940 else if (NXT(3) == 'N')
5941 xmlParseEntityDecl(ctxt);
5942 break;
5943 case 'A':
5944 xmlParseAttributeListDecl(ctxt);
5945 break;
5946 case 'N':
5947 xmlParseNotationDecl(ctxt);
5948 break;
5949 case '-':
5950 xmlParseComment(ctxt);
5951 break;
5952 default:
5953 /* there is an error but it will be detected later */
5954 break;
5955 }
5956 } else if (NXT(1) == '?') {
5957 xmlParsePI(ctxt);
5958 }
5959 }
5960 /*
5961 * This is only for internal subset. On external entities,
5962 * the replacement is done before parsing stage
5963 */
5964 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
5965 xmlParsePEReference(ctxt);
5966
5967 /*
5968 * Conditional sections are allowed from entities included
5969 * by PE References in the internal subset.
5970 */
5971 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
5972 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5973 xmlParseConditionalSections(ctxt);
5974 }
5975 }
5976
5977 ctxt->instate = XML_PARSER_DTD;
5978}
5979
5980/**
5981 * xmlParseTextDecl:
5982 * @ctxt: an XML parser context
5983 *
5984 * parse an XML declaration header for external entities
5985 *
5986 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
5987 *
5988 * Question: Seems that EncodingDecl is mandatory ? Is that a typo ?
5989 */
5990
5991void
5992xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
5993 xmlChar *version;
5994 const xmlChar *encoding;
5995
5996 /*
5997 * We know that '<?xml' is here.
5998 */
5999 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
6000 SKIP(5);
6001 } else {
6002 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
6003 return;
6004 }
6005
6006 if (!IS_BLANK_CH(CUR)) {
6007 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6008 "Space needed after '<?xml'\n");
6009 }
6010 SKIP_BLANKS;
6011
6012 /*
6013 * We may have the VersionInfo here.
6014 */
6015 version = xmlParseVersionInfo(ctxt);
6016 if (version == NULL)
6017 version = xmlCharStrdup(XML_DEFAULT_VERSION);
6018 else {
6019 if (!IS_BLANK_CH(CUR)) {
6020 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6021 "Space needed here\n");
6022 }
6023 }
6024 ctxt->input->version = version;
6025
6026 /*
6027 * We must have the encoding declaration
6028 */
6029 encoding = xmlParseEncodingDecl(ctxt);
6030 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6031 /*
6032 * The XML REC instructs us to stop parsing right here
6033 */
6034 return;
6035 }
6036 if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
6037 xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
6038 "Missing encoding in text declaration\n");
6039 }
6040
6041 SKIP_BLANKS;
6042 if ((RAW == '?') && (NXT(1) == '>')) {
6043 SKIP(2);
6044 } else if (RAW == '>') {
6045 /* Deprecated old WD ... */
6046 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
6047 NEXT;
6048 } else {
6049 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
6050 MOVETO_ENDTAG(CUR_PTR);
6051 NEXT;
6052 }
6053}
6054
6055/**
6056 * xmlParseExternalSubset:
6057 * @ctxt: an XML parser context
6058 * @ExternalID: the external identifier
6059 * @SystemID: the system identifier (or URL)
6060 *
6061 * parse Markup declarations from an external subset
6062 *
6063 * [30] extSubset ::= textDecl? extSubsetDecl
6064 *
6065 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
6066 */
6067void
6068xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
6069 const xmlChar *SystemID) {
6070 xmlDetectSAX2(ctxt);
6071 GROW;
6072 if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
6073 xmlParseTextDecl(ctxt);
6074 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6075 /*
6076 * The XML REC instructs us to stop parsing right here
6077 */
6078 ctxt->instate = XML_PARSER_EOF;
6079 return;
6080 }
6081 }
6082 if (ctxt->myDoc == NULL) {
6083 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
6084 }
6085 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
6086 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
6087
6088 ctxt->instate = XML_PARSER_DTD;
6089 ctxt->external = 1;
6090 while (((RAW == '<') && (NXT(1) == '?')) ||
6091 ((RAW == '<') && (NXT(1) == '!')) ||
6092 (RAW == '%') || IS_BLANK_CH(CUR)) {
6093 const xmlChar *check = CUR_PTR;
6094 unsigned int cons = ctxt->input->consumed;
6095
6096 GROW;
6097 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6098 xmlParseConditionalSections(ctxt);
6099 } else if (IS_BLANK_CH(CUR)) {
6100 NEXT;
6101 } else if (RAW == '%') {
6102 xmlParsePEReference(ctxt);
6103 } else
6104 xmlParseMarkupDecl(ctxt);
6105
6106 /*
6107 * Pop-up of finished entities.
6108 */
6109 while ((RAW == 0) && (ctxt->inputNr > 1))
6110 xmlPopInput(ctxt);
6111
6112 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
6113 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
6114 break;
6115 }
6116 }
6117
6118 if (RAW != 0) {
6119 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
6120 }
6121
6122}
6123
6124/**
6125 * xmlParseReference:
6126 * @ctxt: an XML parser context
6127 *
6128 * parse and handle entity references in content, depending on the SAX
6129 * interface, this may end-up in a call to character() if this is a
6130 * CharRef, a predefined entity, if there is no reference() callback.
6131 * or if the parser was asked to switch to that mode.
6132 *
6133 * [67] Reference ::= EntityRef | CharRef
6134 */
6135void
6136xmlParseReference(xmlParserCtxtPtr ctxt) {
6137 xmlEntityPtr ent;
6138 xmlChar *val;
6139 if (RAW != '&') return;
6140
6141 if (NXT(1) == '#') {
6142 int i = 0;
6143 xmlChar out[10];
6144 int hex = NXT(2);
6145 int value = xmlParseCharRef(ctxt);
6146
6147 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
6148 /*
6149 * So we are using non-UTF-8 buffers
6150 * Check that the char fit on 8bits, if not
6151 * generate a CharRef.
6152 */
6153 if (value <= 0xFF) {
6154 out[0] = value;
6155 out[1] = 0;
6156 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6157 (!ctxt->disableSAX))
6158 ctxt->sax->characters(ctxt->userData, out, 1);
6159 } else {
6160 if ((hex == 'x') || (hex == 'X'))
6161 snprintf((char *)out, sizeof(out), "#x%X", value);
6162 else
6163 snprintf((char *)out, sizeof(out), "#%d", value);
6164 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
6165 (!ctxt->disableSAX))
6166 ctxt->sax->reference(ctxt->userData, out);
6167 }
6168 } else {
6169 /*
6170 * Just encode the value in UTF-8
6171 */
6172 COPY_BUF(0 ,out, i, value);
6173 out[i] = 0;
6174 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6175 (!ctxt->disableSAX))
6176 ctxt->sax->characters(ctxt->userData, out, i);
6177 }
6178 } else {
6179 int was_checked;
6180
6181 ent = xmlParseEntityRef(ctxt);
6182 if (ent == NULL) return;
6183 if (!ctxt->wellFormed)
6184 return;
6185 was_checked = ent->checked;
6186 if ((ent->name != NULL) &&
6187 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
6188 xmlNodePtr list = NULL;
6189 xmlParserErrors ret = XML_ERR_OK;
6190
6191
6192 /*
6193 * The first reference to the entity trigger a parsing phase
6194 * where the ent->children is filled with the result from
6195 * the parsing.
6196 */
6197 if (ent->checked == 0) {
6198 xmlChar *value;
6199
6200 value = ent->content;
6201
6202 /*
6203 * Check that this entity is well formed
6204 */
6205 if ((value != NULL) && (value[0] != 0) &&
6206 (value[1] == 0) && (value[0] == '<') &&
6207 (xmlStrEqual(ent->name, BAD_CAST "lt"))) {
6208 /*
6209 * DONE: get definite answer on this !!!
6210 * Lots of entity decls are used to declare a single
6211 * char
6212 * <!ENTITY lt "<">
6213 * Which seems to be valid since
6214 * 2.4: The ampersand character (&) and the left angle
6215 * bracket (<) may appear in their literal form only
6216 * when used ... They are also legal within the literal
6217 * entity value of an internal entity declaration;i
6218 * see "4.3.2 Well-Formed Parsed Entities".
6219 * IMHO 2.4 and 4.3.2 are directly in contradiction.
6220 * Looking at the OASIS test suite and James Clark
6221 * tests, this is broken. However the XML REC uses
6222 * it. Is the XML REC not well-formed ????
6223 * This is a hack to avoid this problem
6224 *
6225 * ANSWER: since lt gt amp .. are already defined,
6226 * this is a redefinition and hence the fact that the
6227 * content is not well balanced is not a Wf error, this
6228 * is lousy but acceptable.
6229 */
6230 list = xmlNewDocText(ctxt->myDoc, value);
6231 if (list != NULL) {
6232 if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
6233 (ent->children == NULL)) {
6234 ent->children = list;
6235 ent->last = list;
6236 if (ent->owner == 0)
6237 ent->owner = 1;
6238 list->parent = (xmlNodePtr) ent;
6239 } else {
6240 xmlFreeNodeList(list);
6241 }
6242 } else if (list != NULL) {
6243 xmlFreeNodeList(list);
6244 }
6245 } else {
6246 unsigned long oldnbent = ctxt->nbentities;
6247 /*
6248 * 4.3.2: An internal general parsed entity is well-formed
6249 * if its replacement text matches the production labeled
6250 * content.
6251 */
6252
6253 void *user_data;
6254 /*
6255 * This is a bit hackish but this seems the best
6256 * way to make sure both SAX and DOM entity support
6257 * behaves okay.
6258 */
6259 if (ctxt->userData == ctxt)
6260 user_data = NULL;
6261 else
6262 user_data = ctxt->userData;
6263
6264 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
6265 ctxt->depth++;
6266 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
6267 value, user_data, &list);
6268 ctxt->depth--;
6269
6270 } else if (ent->etype ==
6271 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
6272 ctxt->depth++;
6273 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
6274 ctxt->sax, user_data, ctxt->depth,
6275 ent->URI, ent->ExternalID, &list);
6276 ctxt->depth--;
6277 } else {
6278 ret = XML_ERR_ENTITY_PE_INTERNAL;
6279 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
6280 "invalid entity type found\n", NULL);
6281 }
6282 /*
6283 * Store the number of entities needing parsing for entity
6284 * content and do checkings
6285 */
6286 if ((ent->owner != 0) || (ent->children == NULL)) {
6287 ent->owner = ctxt->nbentities - oldnbent;
6288 if (ent->owner == 0)
6289 ent->owner = 1;
6290 }
6291 if (ret == XML_ERR_ENTITY_LOOP) {
6292 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
6293 xmlFreeNodeList(list);
6294 return;
6295 }
6296 if (xmlParserEntityCheck(ctxt, 0, ent, 0)) {
6297 xmlFreeNodeList(list);
6298 return;
6299 }
6300 if (ret == XML_ERR_ENTITY_LOOP) {
6301 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
6302 return;
6303 } else if ((ret == XML_ERR_OK) && (list != NULL)) {
6304 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
6305 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
6306 (ent->children == NULL)) {
6307 ent->children = list;
6308 if (ctxt->replaceEntities) {
6309 /*
6310 * Prune it directly in the generated document
6311 * except for single text nodes.
6312 */
6313 if (((list->type == XML_TEXT_NODE) &&
6314 (list->next == NULL)) ||
6315 (ctxt->parseMode == XML_PARSE_READER)) {
6316 list->parent = (xmlNodePtr) ent;
6317 list = NULL;
6318 if (ent->owner == 0)
6319 ent->owner = 1;
6320 } else {
6321 ent->owner = 0;
6322 while (list != NULL) {
6323 list->parent = (xmlNodePtr) ctxt->node;
6324 list->doc = ctxt->myDoc;
6325 if (list->next == NULL)
6326 ent->last = list;
6327 list = list->next;
6328 }
6329 list = ent->children;
6330#ifdef LIBXML_LEGACY_ENABLED
6331 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
6332 xmlAddEntityReference(ent, list, NULL);
6333#endif /* LIBXML_LEGACY_ENABLED */
6334 }
6335 } else {
6336 if (ent->owner == 0)
6337 ent->owner = 1;
6338 while (list != NULL) {
6339 list->parent = (xmlNodePtr) ent;
6340 if (list->next == NULL)
6341 ent->last = list;
6342 list = list->next;
6343 }
6344 }
6345 } else {
6346 xmlFreeNodeList(list);
6347 list = NULL;
6348 }
6349 } else if ((ret != XML_ERR_OK) &&
6350 (ret != XML_WAR_UNDECLARED_ENTITY)) {
6351 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
6352 "Entity '%s' failed to parse\n", ent->name);
6353 } else if (list != NULL) {
6354 xmlFreeNodeList(list);
6355 list = NULL;
6356 } else if (ent->owner != 1) {
6357 ctxt->nbentities += ent->owner;
6358 }
6359 }
6360 ent->checked = 1;
6361 }
6362
6363 if (ent->children == NULL) {
6364 /*
6365 * Probably running in SAX mode and the callbacks don't
6366 * build the entity content. So unless we already went
6367 * though parsing for first checking go though the entity
6368 * content to generate callbacks associated to the entity
6369 */
6370 if (was_checked == 1) {
6371 void *user_data;
6372 /*
6373 * This is a bit hackish but this seems the best
6374 * way to make sure both SAX and DOM entity support
6375 * behaves okay.
6376 */
6377 if (ctxt->userData == ctxt)
6378 user_data = NULL;
6379 else
6380 user_data = ctxt->userData;
6381
6382 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
6383 ctxt->depth++;
6384 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
6385 ent->content, user_data, NULL);
6386 ctxt->depth--;
6387 } else if (ent->etype ==
6388 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
6389 ctxt->depth++;
6390 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
6391 ctxt->sax, user_data, ctxt->depth,
6392 ent->URI, ent->ExternalID, NULL);
6393 ctxt->depth--;
6394 } else {
6395 ret = XML_ERR_ENTITY_PE_INTERNAL;
6396 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
6397 "invalid entity type found\n", NULL);
6398 }
6399 if (ret == XML_ERR_ENTITY_LOOP) {
6400 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
6401 return;
6402 }
6403 }
6404 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
6405 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
6406 /*
6407 * Entity reference callback comes second, it's somewhat
6408 * superfluous but a compatibility to historical behaviour
6409 */
6410 ctxt->sax->reference(ctxt->userData, ent->name);
6411 }
6412 return;
6413 }
6414 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
6415 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
6416 /*
6417 * Create a node.
6418 */
6419 ctxt->sax->reference(ctxt->userData, ent->name);
6420 return;
6421 }
6422 if ((ctxt->replaceEntities) || (ent->children == NULL)) {
6423 /*
6424 * There is a problem on the handling of _private for entities
6425 * (bug 155816): Should we copy the content of the field from
6426 * the entity (possibly overwriting some value set by the user
6427 * when a copy is created), should we leave it alone, or should
6428 * we try to take care of different situations? The problem
6429 * is exacerbated by the usage of this field by the xmlReader.
6430 * To fix this bug, we look at _private on the created node
6431 * and, if it's NULL, we copy in whatever was in the entity.
6432 * If it's not NULL we leave it alone. This is somewhat of a
6433 * hack - maybe we should have further tests to determine
6434 * what to do.
6435 */
6436 if ((ctxt->node != NULL) && (ent->children != NULL)) {
6437 /*
6438 * Seems we are generating the DOM content, do
6439 * a simple tree copy for all references except the first
6440 * In the first occurrence list contains the replacement.
6441 * progressive == 2 means we are operating on the Reader
6442 * and since nodes are discarded we must copy all the time.
6443 */
6444 if (((list == NULL) && (ent->owner == 0)) ||
6445 (ctxt->parseMode == XML_PARSE_READER)) {
6446 xmlNodePtr nw = NULL, cur, firstChild = NULL;
6447
6448 /*
6449 * We are copying here, make sure there is no abuse
6450 */
6451 ctxt->sizeentcopy += ent->length;
6452 if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy))
6453 return;
6454
6455 /*
6456 * when operating on a reader, the entities definitions
6457 * are always owning the entities subtree.
6458 if (ctxt->parseMode == XML_PARSE_READER)
6459 ent->owner = 1;
6460 */
6461
6462 cur = ent->children;
6463 while (cur != NULL) {
6464 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
6465 if (nw != NULL) {
6466 if (nw->_private == NULL)
6467 nw->_private = cur->_private;
6468 if (firstChild == NULL){
6469 firstChild = nw;
6470 }
6471 nw = xmlAddChild(ctxt->node, nw);
6472 }
6473 if (cur == ent->last) {
6474 /*
6475 * needed to detect some strange empty
6476 * node cases in the reader tests
6477 */
6478 if ((ctxt->parseMode == XML_PARSE_READER) &&
6479 (nw != NULL) &&
6480 (nw->type == XML_ELEMENT_NODE) &&
6481 (nw->children == NULL))
6482 nw->extra = 1;
6483
6484 break;
6485 }
6486 cur = cur->next;
6487 }
6488#ifdef LIBXML_LEGACY_ENABLED
6489 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
6490 xmlAddEntityReference(ent, firstChild, nw);
6491#endif /* LIBXML_LEGACY_ENABLED */
6492 } else if (list == NULL) {
6493 xmlNodePtr nw = NULL, cur, next, last,
6494 firstChild = NULL;
6495
6496 /*
6497 * We are copying here, make sure there is no abuse
6498 */
6499 ctxt->sizeentcopy += ent->length;
6500 if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy))
6501 return;
6502
6503 /*
6504 * Copy the entity child list and make it the new
6505 * entity child list. The goal is to make sure any
6506 * ID or REF referenced will be the one from the
6507 * document content and not the entity copy.
6508 */
6509 cur = ent->children;
6510 ent->children = NULL;
6511 last = ent->last;
6512 ent->last = NULL;
6513 while (cur != NULL) {
6514 next = cur->next;
6515 cur->next = NULL;
6516 cur->parent = NULL;
6517 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
6518 if (nw != NULL) {
6519 if (nw->_private == NULL)
6520 nw->_private = cur->_private;
6521 if (firstChild == NULL){
6522 firstChild = cur;
6523 }
6524 xmlAddChild((xmlNodePtr) ent, nw);
6525 xmlAddChild(ctxt->node, cur);
6526 }
6527 if (cur == last)
6528 break;
6529 cur = next;
6530 }
6531 if (ent->owner == 0)
6532 ent->owner = 1;
6533#ifdef LIBXML_LEGACY_ENABLED
6534 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
6535 xmlAddEntityReference(ent, firstChild, nw);
6536#endif /* LIBXML_LEGACY_ENABLED */
6537 } else {
6538 const xmlChar *nbktext;
6539
6540 /*
6541 * the name change is to avoid coalescing of the
6542 * node with a possible previous text one which
6543 * would make ent->children a dangling pointer
6544 */
6545 nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
6546 -1);
6547 if (ent->children->type == XML_TEXT_NODE)
6548 ent->children->name = nbktext;
6549 if ((ent->last != ent->children) &&
6550 (ent->last->type == XML_TEXT_NODE))
6551 ent->last->name = nbktext;
6552 xmlAddChildList(ctxt->node, ent->children);
6553 }
6554
6555 /*
6556 * This is to avoid a nasty side effect, see
6557 * characters() in SAX.c
6558 */
6559 ctxt->nodemem = 0;
6560 ctxt->nodelen = 0;
6561 return;
6562 }
6563 }
6564 } else {
6565 val = ent->content;
6566 if (val == NULL) return;
6567 /*
6568 * inline the entity.
6569 */
6570 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6571 (!ctxt->disableSAX))
6572 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
6573 }
6574 }
6575}
6576
6577/**
6578 * xmlParseEntityRef:
6579 * @ctxt: an XML parser context
6580 *
6581 * parse ENTITY references declarations
6582 *
6583 * [68] EntityRef ::= '&' Name ';'
6584 *
6585 * [ WFC: Entity Declared ]
6586 * In a document without any DTD, a document with only an internal DTD
6587 * subset which contains no parameter entity references, or a document
6588 * with "standalone='yes'", the Name given in the entity reference
6589 * must match that in an entity declaration, except that well-formed
6590 * documents need not declare any of the following entities: amp, lt,
6591 * gt, apos, quot. The declaration of a parameter entity must precede
6592 * any reference to it. Similarly, the declaration of a general entity
6593 * must precede any reference to it which appears in a default value in an
6594 * attribute-list declaration. Note that if entities are declared in the
6595 * external subset or in external parameter entities, a non-validating
6596 * processor is not obligated to read and process their declarations;
6597 * for such documents, the rule that an entity must be declared is a
6598 * well-formedness constraint only if standalone='yes'.
6599 *
6600 * [ WFC: Parsed Entity ]
6601 * An entity reference must not contain the name of an unparsed entity
6602 *
6603 * Returns the xmlEntityPtr if found, or NULL otherwise.
6604 */
6605xmlEntityPtr
6606xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
6607 const xmlChar *name;
6608 xmlEntityPtr ent = NULL;
6609
6610 GROW;
6611
6612 if (RAW == '&') {
6613 NEXT;
6614 name = xmlParseName(ctxt);
6615 if (name == NULL) {
6616 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6617 "xmlParseEntityRef: no name\n");
6618 } else {
6619 if (RAW == ';') {
6620 NEXT;
6621 /*
6622 * Increase the number of entity references parsed
6623 */
6624 ctxt->nbentities++;
6625
6626 /*
6627 * Ask first SAX for entity resolution, otherwise try the
6628 * predefined set.
6629 */
6630 if (ctxt->sax != NULL) {
6631 if (ctxt->sax->getEntity != NULL)
6632 ent = ctxt->sax->getEntity(ctxt->userData, name);
6633 if ((ctxt->wellFormed == 1 ) && (ent == NULL))
6634 ent = xmlGetPredefinedEntity(name);
6635 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
6636 (ctxt->userData==ctxt)) {
6637 ent = xmlSAX2GetEntity(ctxt, name);
6638 }
6639 }
6640 /*
6641 * [ WFC: Entity Declared ]
6642 * In a document without any DTD, a document with only an
6643 * internal DTD subset which contains no parameter entity
6644 * references, or a document with "standalone='yes'", the
6645 * Name given in the entity reference must match that in an
6646 * entity declaration, except that well-formed documents
6647 * need not declare any of the following entities: amp, lt,
6648 * gt, apos, quot.
6649 * The declaration of a parameter entity must precede any
6650 * reference to it.
6651 * Similarly, the declaration of a general entity must
6652 * precede any reference to it which appears in a default
6653 * value in an attribute-list declaration. Note that if
6654 * entities are declared in the external subset or in
6655 * external parameter entities, a non-validating processor
6656 * is not obligated to read and process their declarations;
6657 * for such documents, the rule that an entity must be
6658 * declared is a well-formedness constraint only if
6659 * standalone='yes'.
6660 */
6661 if (ent == NULL) {
6662 if ((ctxt->standalone == 1) ||
6663 ((ctxt->hasExternalSubset == 0) &&
6664 (ctxt->hasPErefs == 0))) {
6665 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
6666 "Entity '%s' not defined\n", name);
6667 } else {
6668 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
6669 "Entity '%s' not defined\n", name);
6670 if ((ctxt->inSubset == 0) &&
6671 (ctxt->sax != NULL) &&
6672 (ctxt->sax->reference != NULL)) {
6673 ctxt->sax->reference(ctxt->userData, name);
6674 }
6675 }
6676 ctxt->valid = 0;
6677 }
6678
6679 /*
6680 * [ WFC: Parsed Entity ]
6681 * An entity reference must not contain the name of an
6682 * unparsed entity
6683 */
6684 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
6685 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
6686 "Entity reference to unparsed entity %s\n", name);
6687 }
6688
6689 /*
6690 * [ WFC: No External Entity References ]
6691 * Attribute values cannot contain direct or indirect
6692 * entity references to external entities.
6693 */
6694 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6695 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
6696 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
6697 "Attribute references external entity '%s'\n", name);
6698 }
6699 /*
6700 * [ WFC: No < in Attribute Values ]
6701 * The replacement text of any entity referred to directly or
6702 * indirectly in an attribute value (other than "&lt;") must
6703 * not contain a <.
6704 */
6705 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6706 (ent != NULL) &&
6707 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
6708 (ent->content != NULL) &&
6709 (xmlStrchr(ent->content, '<'))) {
6710 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
6711 "'<' in entity '%s' is not allowed in attributes values\n", name);
6712 }
6713
6714 /*
6715 * Internal check, no parameter entities here ...
6716 */
6717 else {
6718 switch (ent->etype) {
6719 case XML_INTERNAL_PARAMETER_ENTITY:
6720 case XML_EXTERNAL_PARAMETER_ENTITY:
6721 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
6722 "Attempt to reference the parameter entity '%s'\n",
6723 name);
6724 break;
6725 default:
6726 break;
6727 }
6728 }
6729
6730 /*
6731 * [ WFC: No Recursion ]
6732 * A parsed entity must not contain a recursive reference
6733 * to itself, either directly or indirectly.
6734 * Done somewhere else
6735 */
6736
6737 } else {
6738 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
6739 }
6740 }
6741 }
6742 return(ent);
6743}
6744
6745/**
6746 * xmlParseStringEntityRef:
6747 * @ctxt: an XML parser context
6748 * @str: a pointer to an index in the string
6749 *
6750 * parse ENTITY references declarations, but this version parses it from
6751 * a string value.
6752 *
6753 * [68] EntityRef ::= '&' Name ';'
6754 *
6755 * [ WFC: Entity Declared ]
6756 * In a document without any DTD, a document with only an internal DTD
6757 * subset which contains no parameter entity references, or a document
6758 * with "standalone='yes'", the Name given in the entity reference
6759 * must match that in an entity declaration, except that well-formed
6760 * documents need not declare any of the following entities: amp, lt,
6761 * gt, apos, quot. The declaration of a parameter entity must precede
6762 * any reference to it. Similarly, the declaration of a general entity
6763 * must precede any reference to it which appears in a default value in an
6764 * attribute-list declaration. Note that if entities are declared in the
6765 * external subset or in external parameter entities, a non-validating
6766 * processor is not obligated to read and process their declarations;
6767 * for such documents, the rule that an entity must be declared is a
6768 * well-formedness constraint only if standalone='yes'.
6769 *
6770 * [ WFC: Parsed Entity ]
6771 * An entity reference must not contain the name of an unparsed entity
6772 *
6773 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
6774 * is updated to the current location in the string.
6775 */
6776xmlEntityPtr
6777xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
6778 xmlChar *name;
6779 const xmlChar *ptr;
6780 xmlChar cur;
6781 xmlEntityPtr ent = NULL;
6782
6783 if ((str == NULL) || (*str == NULL))
6784 return(NULL);
6785 ptr = *str;
6786 cur = *ptr;
6787 if (cur == '&') {
6788 ptr++;
6789 cur = *ptr;
6790 name = xmlParseStringName(ctxt, &ptr);
6791 if (name == NULL) {
6792 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6793 "xmlParseStringEntityRef: no name\n");
6794 } else {
6795 if (*ptr == ';') {
6796 ptr++;
6797 /*
6798 * Increase the number of entity references parsed
6799 */
6800 ctxt->nbentities++;
6801 /*
6802 * Ask first SAX for entity resolution, otherwise try the
6803 * predefined set.
6804 */
6805 if (ctxt->sax != NULL) {
6806 if (ctxt->sax->getEntity != NULL)
6807 ent = ctxt->sax->getEntity(ctxt->userData, name);
6808 if (ent == NULL)
6809 ent = xmlGetPredefinedEntity(name);
6810 if ((ent == NULL) && (ctxt->userData==ctxt)) {
6811 ent = xmlSAX2GetEntity(ctxt, name);
6812 }
6813 }
6814 /*
6815 * [ WFC: Entity Declared ]
6816 * In a document without any DTD, a document with only an
6817 * internal DTD subset which contains no parameter entity
6818 * references, or a document with "standalone='yes'", the
6819 * Name given in the entity reference must match that in an
6820 * entity declaration, except that well-formed documents
6821 * need not declare any of the following entities: amp, lt,
6822 * gt, apos, quot.
6823 * The declaration of a parameter entity must precede any
6824 * reference to it.
6825 * Similarly, the declaration of a general entity must
6826 * precede any reference to it which appears in a default
6827 * value in an attribute-list declaration. Note that if
6828 * entities are declared in the external subset or in
6829 * external parameter entities, a non-validating processor
6830 * is not obligated to read and process their declarations;
6831 * for such documents, the rule that an entity must be
6832 * declared is a well-formedness constraint only if
6833 * standalone='yes'.
6834 */
6835 if (ent == NULL) {
6836 if ((ctxt->standalone == 1) ||
6837 ((ctxt->hasExternalSubset == 0) &&
6838 (ctxt->hasPErefs == 0))) {
6839 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
6840 "Entity '%s' not defined\n", name);
6841 } else {
6842 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
6843 "Entity '%s' not defined\n",
6844 name);
6845 }
6846 /* TODO ? check regressions ctxt->valid = 0; */
6847 }
6848
6849 /*
6850 * [ WFC: Parsed Entity ]
6851 * An entity reference must not contain the name of an
6852 * unparsed entity
6853 */
6854 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
6855 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
6856 "Entity reference to unparsed entity %s\n", name);
6857 }
6858
6859 /*
6860 * [ WFC: No External Entity References ]
6861 * Attribute values cannot contain direct or indirect
6862 * entity references to external entities.
6863 */
6864 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6865 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
6866 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
6867 "Attribute references external entity '%s'\n", name);
6868 }
6869 /*
6870 * [ WFC: No < in Attribute Values ]
6871 * The replacement text of any entity referred to directly or
6872 * indirectly in an attribute value (other than "&lt;") must
6873 * not contain a <.
6874 */
6875 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6876 (ent != NULL) &&
6877 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
6878 (ent->content != NULL) &&
6879 (xmlStrchr(ent->content, '<'))) {
6880 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
6881 "'<' in entity '%s' is not allowed in attributes values\n",
6882 name);
6883 }
6884
6885 /*
6886 * Internal check, no parameter entities here ...
6887 */
6888 else {
6889 switch (ent->etype) {
6890 case XML_INTERNAL_PARAMETER_ENTITY:
6891 case XML_EXTERNAL_PARAMETER_ENTITY:
6892 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
6893 "Attempt to reference the parameter entity '%s'\n",
6894 name);
6895 break;
6896 default:
6897 break;
6898 }
6899 }
6900
6901 /*
6902 * [ WFC: No Recursion ]
6903 * A parsed entity must not contain a recursive reference
6904 * to itself, either directly or indirectly.
6905 * Done somewhere else
6906 */
6907
6908 } else {
6909 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
6910 }
6911 xmlFree(name);
6912 }
6913 }
6914 *str = ptr;
6915 return(ent);
6916}
6917
6918/**
6919 * xmlParsePEReference:
6920 * @ctxt: an XML parser context
6921 *
6922 * parse PEReference declarations
6923 * The entity content is handled directly by pushing it's content as
6924 * a new input stream.
6925 *
6926 * [69] PEReference ::= '%' Name ';'
6927 *
6928 * [ WFC: No Recursion ]
6929 * A parsed entity must not contain a recursive
6930 * reference to itself, either directly or indirectly.
6931 *
6932 * [ WFC: Entity Declared ]
6933 * In a document without any DTD, a document with only an internal DTD
6934 * subset which contains no parameter entity references, or a document
6935 * with "standalone='yes'", ... ... The declaration of a parameter
6936 * entity must precede any reference to it...
6937 *
6938 * [ VC: Entity Declared ]
6939 * In a document with an external subset or external parameter entities
6940 * with "standalone='no'", ... ... The declaration of a parameter entity
6941 * must precede any reference to it...
6942 *
6943 * [ WFC: In DTD ]
6944 * Parameter-entity references may only appear in the DTD.
6945 * NOTE: misleading but this is handled.
6946 */
6947void
6948xmlParsePEReference(xmlParserCtxtPtr ctxt)
6949{
6950 const xmlChar *name;
6951 xmlEntityPtr entity = NULL;
6952 xmlParserInputPtr input;
6953
6954 if (RAW == '%') {
6955 NEXT;
6956 name = xmlParseName(ctxt);
6957 if (name == NULL) {
6958 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6959 "xmlParsePEReference: no name\n");
6960 } else {
6961 if (RAW == ';') {
6962 NEXT;
6963 /*
6964 * Increase the number of entity references parsed
6965 */
6966 ctxt->nbentities++;
6967
6968 if ((ctxt->sax != NULL) &&
6969 (ctxt->sax->getParameterEntity != NULL))
6970 entity = ctxt->sax->getParameterEntity(ctxt->userData,
6971 name);
6972 if (entity == NULL) {
6973 /*
6974 * [ WFC: Entity Declared ]
6975 * In a document without any DTD, a document with only an
6976 * internal DTD subset which contains no parameter entity
6977 * references, or a document with "standalone='yes'", ...
6978 * ... The declaration of a parameter entity must precede
6979 * any reference to it...
6980 */
6981 if ((ctxt->standalone == 1) ||
6982 ((ctxt->hasExternalSubset == 0) &&
6983 (ctxt->hasPErefs == 0))) {
6984 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
6985 "PEReference: %%%s; not found\n",
6986 name);
6987 } else {
6988 /*
6989 * [ VC: Entity Declared ]
6990 * In a document with an external subset or external
6991 * parameter entities with "standalone='no'", ...
6992 * ... The declaration of a parameter entity must
6993 * precede any reference to it...
6994 */
6995 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6996 "PEReference: %%%s; not found\n",
6997 name, NULL);
6998 ctxt->valid = 0;
6999 }
7000 } else {
7001 /*
7002 * Internal checking in case the entity quest barfed
7003 */
7004 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7005 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
7006 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7007 "Internal: %%%s; is not a parameter entity\n",
7008 name, NULL);
7009 } else if (ctxt->input->free != deallocblankswrapper) {
7010 input =
7011 xmlNewBlanksWrapperInputStream(ctxt, entity);
7012 xmlPushInput(ctxt, input);
7013 } else {
7014 /*
7015 * TODO !!!
7016 * handle the extra spaces added before and after
7017 * c.f. http://www.w3.org/TR/REC-xml#as-PE
7018 */
7019 input = xmlNewEntityInputStream(ctxt, entity);
7020 xmlPushInput(ctxt, input);
7021 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
7022 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
7023 (IS_BLANK_CH(NXT(5)))) {
7024 xmlParseTextDecl(ctxt);
7025 if (ctxt->errNo ==
7026 XML_ERR_UNSUPPORTED_ENCODING) {
7027 /*
7028 * The XML REC instructs us to stop parsing
7029 * right here
7030 */
7031 ctxt->instate = XML_PARSER_EOF;
7032 return;
7033 }
7034 }
7035 }
7036 }
7037 ctxt->hasPErefs = 1;
7038 } else {
7039 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7040 }
7041 }
7042 }
7043}
7044
7045/**
7046 * xmlParseStringPEReference:
7047 * @ctxt: an XML parser context
7048 * @str: a pointer to an index in the string
7049 *
7050 * parse PEReference declarations
7051 *
7052 * [69] PEReference ::= '%' Name ';'
7053 *
7054 * [ WFC: No Recursion ]
7055 * A parsed entity must not contain a recursive
7056 * reference to itself, either directly or indirectly.
7057 *
7058 * [ WFC: Entity Declared ]
7059 * In a document without any DTD, a document with only an internal DTD
7060 * subset which contains no parameter entity references, or a document
7061 * with "standalone='yes'", ... ... The declaration of a parameter
7062 * entity must precede any reference to it...
7063 *
7064 * [ VC: Entity Declared ]
7065 * In a document with an external subset or external parameter entities
7066 * with "standalone='no'", ... ... The declaration of a parameter entity
7067 * must precede any reference to it...
7068 *
7069 * [ WFC: In DTD ]
7070 * Parameter-entity references may only appear in the DTD.
7071 * NOTE: misleading but this is handled.
7072 *
7073 * Returns the string of the entity content.
7074 * str is updated to the current value of the index
7075 */
7076xmlEntityPtr
7077xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
7078 const xmlChar *ptr;
7079 xmlChar cur;
7080 xmlChar *name;
7081 xmlEntityPtr entity = NULL;
7082
7083 if ((str == NULL) || (*str == NULL)) return(NULL);
7084 ptr = *str;
7085 cur = *ptr;
7086 if (cur == '%') {
7087 ptr++;
7088 cur = *ptr;
7089 name = xmlParseStringName(ctxt, &ptr);
7090 if (name == NULL) {
7091 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7092 "xmlParseStringPEReference: no name\n");
7093 } else {
7094 cur = *ptr;
7095 if (cur == ';') {
7096 ptr++;
7097 cur = *ptr;
7098 /*
7099 * Increase the number of entity references parsed
7100 */
7101 ctxt->nbentities++;
7102
7103 if ((ctxt->sax != NULL) &&
7104 (ctxt->sax->getParameterEntity != NULL))
7105 entity = ctxt->sax->getParameterEntity(ctxt->userData,
7106 name);
7107 if (entity == NULL) {
7108 /*
7109 * [ WFC: Entity Declared ]
7110 * In a document without any DTD, a document with only an
7111 * internal DTD subset which contains no parameter entity
7112 * references, or a document with "standalone='yes'", ...
7113 * ... The declaration of a parameter entity must precede
7114 * any reference to it...
7115 */
7116 if ((ctxt->standalone == 1) ||
7117 ((ctxt->hasExternalSubset == 0) &&
7118 (ctxt->hasPErefs == 0))) {
7119 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7120 "PEReference: %%%s; not found\n", name);
7121 } else {
7122 /*
7123 * [ VC: Entity Declared ]
7124 * In a document with an external subset or external
7125 * parameter entities with "standalone='no'", ...
7126 * ... The declaration of a parameter entity must
7127 * precede any reference to it...
7128 */
7129 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7130 "PEReference: %%%s; not found\n",
7131 name, NULL);
7132 ctxt->valid = 0;
7133 }
7134 } else {
7135 /*
7136 * Internal checking in case the entity quest barfed
7137 */
7138 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7139 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
7140 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7141 "%%%s; is not a parameter entity\n",
7142 name, NULL);
7143 }
7144 }
7145 ctxt->hasPErefs = 1;
7146 } else {
7147 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7148 }
7149 xmlFree(name);
7150 }
7151 }
7152 *str = ptr;
7153 return(entity);
7154}
7155
7156/**
7157 * xmlParseDocTypeDecl:
7158 * @ctxt: an XML parser context
7159 *
7160 * parse a DOCTYPE declaration
7161 *
7162 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
7163 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
7164 *
7165 * [ VC: Root Element Type ]
7166 * The Name in the document type declaration must match the element
7167 * type of the root element.
7168 */
7169
7170void
7171xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
7172 const xmlChar *name = NULL;
7173 xmlChar *ExternalID = NULL;
7174 xmlChar *URI = NULL;
7175
7176 /*
7177 * We know that '<!DOCTYPE' has been detected.
7178 */
7179 SKIP(9);
7180
7181 SKIP_BLANKS;
7182
7183 /*
7184 * Parse the DOCTYPE name.
7185 */
7186 name = xmlParseName(ctxt);
7187 if (name == NULL) {
7188 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7189 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
7190 }
7191 ctxt->intSubName = name;
7192
7193 SKIP_BLANKS;
7194
7195 /*
7196 * Check for SystemID and ExternalID
7197 */
7198 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
7199
7200 if ((URI != NULL) || (ExternalID != NULL)) {
7201 ctxt->hasExternalSubset = 1;
7202 }
7203 ctxt->extSubURI = URI;
7204 ctxt->extSubSystem = ExternalID;
7205
7206 SKIP_BLANKS;
7207
7208 /*
7209 * Create and update the internal subset.
7210 */
7211 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
7212 (!ctxt->disableSAX))
7213 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
7214
7215 /*
7216 * Is there any internal subset declarations ?
7217 * they are handled separately in xmlParseInternalSubset()
7218 */
7219 if (RAW == '[')
7220 return;
7221
7222 /*
7223 * We should be at the end of the DOCTYPE declaration.
7224 */
7225 if (RAW != '>') {
7226 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
7227 }
7228 NEXT;
7229}
7230
7231/**
7232 * xmlParseInternalSubset:
7233 * @ctxt: an XML parser context
7234 *
7235 * parse the internal subset declaration
7236 *
7237 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
7238 */
7239
7240static void
7241xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
7242 /*
7243 * Is there any DTD definition ?
7244 */
7245 if (RAW == '[') {
7246 ctxt->instate = XML_PARSER_DTD;
7247 NEXT;
7248 /*
7249 * Parse the succession of Markup declarations and
7250 * PEReferences.
7251 * Subsequence (markupdecl | PEReference | S)*
7252 */
7253 while (RAW != ']') {
7254 const xmlChar *check = CUR_PTR;
7255 unsigned int cons = ctxt->input->consumed;
7256
7257 SKIP_BLANKS;
7258 xmlParseMarkupDecl(ctxt);
7259 xmlParsePEReference(ctxt);
7260
7261 /*
7262 * Pop-up of finished entities.
7263 */
7264 while ((RAW == 0) && (ctxt->inputNr > 1))
7265 xmlPopInput(ctxt);
7266
7267 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
7268 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7269 "xmlParseInternalSubset: error detected in Markup declaration\n");
7270 break;
7271 }
7272 }
7273 if (RAW == ']') {
7274 NEXT;
7275 SKIP_BLANKS;
7276 }
7277 }
7278
7279 /*
7280 * We should be at the end of the DOCTYPE declaration.
7281 */
7282 if (RAW != '>') {
7283 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
7284 }
7285 NEXT;
7286}
7287
7288#ifdef LIBXML_SAX1_ENABLED
7289/**
7290 * xmlParseAttribute:
7291 * @ctxt: an XML parser context
7292 * @value: a xmlChar ** used to store the value of the attribute
7293 *
7294 * parse an attribute
7295 *
7296 * [41] Attribute ::= Name Eq AttValue
7297 *
7298 * [ WFC: No External Entity References ]
7299 * Attribute values cannot contain direct or indirect entity references
7300 * to external entities.
7301 *
7302 * [ WFC: No < in Attribute Values ]
7303 * The replacement text of any entity referred to directly or indirectly in
7304 * an attribute value (other than "&lt;") must not contain a <.
7305 *
7306 * [ VC: Attribute Value Type ]
7307 * The attribute must have been declared; the value must be of the type
7308 * declared for it.
7309 *
7310 * [25] Eq ::= S? '=' S?
7311 *
7312 * With namespace:
7313 *
7314 * [NS 11] Attribute ::= QName Eq AttValue
7315 *
7316 * Also the case QName == xmlns:??? is handled independently as a namespace
7317 * definition.
7318 *
7319 * Returns the attribute name, and the value in *value.
7320 */
7321
7322const xmlChar *
7323xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
7324 const xmlChar *name;
7325 xmlChar *val;
7326
7327 *value = NULL;
7328 GROW;
7329 name = xmlParseName(ctxt);
7330 if (name == NULL) {
7331 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7332 "error parsing attribute name\n");
7333 return(NULL);
7334 }
7335
7336 /*
7337 * read the value
7338 */
7339 SKIP_BLANKS;
7340 if (RAW == '=') {
7341 NEXT;
7342 SKIP_BLANKS;
7343 val = xmlParseAttValue(ctxt);
7344 ctxt->instate = XML_PARSER_CONTENT;
7345 } else {
7346 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
7347 "Specification mandate value for attribute %s\n", name);
7348 return(NULL);
7349 }
7350
7351 /*
7352 * Check that xml:lang conforms to the specification
7353 * No more registered as an error, just generate a warning now
7354 * since this was deprecated in XML second edition
7355 */
7356 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
7357 if (!xmlCheckLanguageID(val)) {
7358 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
7359 "Malformed value for xml:lang : %s\n",
7360 val, NULL);
7361 }
7362 }
7363
7364 /*
7365 * Check that xml:space conforms to the specification
7366 */
7367 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
7368 if (xmlStrEqual(val, BAD_CAST "default"))
7369 *(ctxt->space) = 0;
7370 else if (xmlStrEqual(val, BAD_CAST "preserve"))
7371 *(ctxt->space) = 1;
7372 else {
7373 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
7374"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
7375 val, NULL);
7376 }
7377 }
7378
7379 *value = val;
7380 return(name);
7381}
7382
7383/**
7384 * xmlParseStartTag:
7385 * @ctxt: an XML parser context
7386 *
7387 * parse a start of tag either for rule element or
7388 * EmptyElement. In both case we don't parse the tag closing chars.
7389 *
7390 * [40] STag ::= '<' Name (S Attribute)* S? '>'
7391 *
7392 * [ WFC: Unique Att Spec ]
7393 * No attribute name may appear more than once in the same start-tag or
7394 * empty-element tag.
7395 *
7396 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
7397 *
7398 * [ WFC: Unique Att Spec ]
7399 * No attribute name may appear more than once in the same start-tag or
7400 * empty-element tag.
7401 *
7402 * With namespace:
7403 *
7404 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
7405 *
7406 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
7407 *
7408 * Returns the element name parsed
7409 */
7410
7411const xmlChar *
7412xmlParseStartTag(xmlParserCtxtPtr ctxt) {
7413 const xmlChar *name;
7414 const xmlChar *attname;
7415 xmlChar *attvalue;
7416 const xmlChar **atts = ctxt->atts;
7417 int nbatts = 0;
7418 int maxatts = ctxt->maxatts;
7419 int i;
7420
7421 if (RAW != '<') return(NULL);
7422 NEXT1;
7423
7424 name = xmlParseName(ctxt);
7425 if (name == NULL) {
7426 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7427 "xmlParseStartTag: invalid element name\n");
7428 return(NULL);
7429 }
7430
7431 /*
7432 * Now parse the attributes, it ends up with the ending
7433 *
7434 * (S Attribute)* S?
7435 */
7436 SKIP_BLANKS;
7437 GROW;
7438
7439 while ((RAW != '>') &&
7440 ((RAW != '/') || (NXT(1) != '>')) &&
7441 (IS_BYTE_CHAR(RAW))) {
7442 const xmlChar *q = CUR_PTR;
7443 unsigned int cons = ctxt->input->consumed;
7444
7445 attname = xmlParseAttribute(ctxt, &attvalue);
7446 if ((attname != NULL) && (attvalue != NULL)) {
7447 /*
7448 * [ WFC: Unique Att Spec ]
7449 * No attribute name may appear more than once in the same
7450 * start-tag or empty-element tag.
7451 */
7452 for (i = 0; i < nbatts;i += 2) {
7453 if (xmlStrEqual(atts[i], attname)) {
7454 xmlErrAttributeDup(ctxt, NULL, attname);
7455 xmlFree(attvalue);
7456 goto failed;
7457 }
7458 }
7459 /*
7460 * Add the pair to atts
7461 */
7462 if (atts == NULL) {
7463 maxatts = 22; /* allow for 10 attrs by default */
7464 atts = (const xmlChar **)
7465 xmlMalloc(maxatts * sizeof(xmlChar *));
7466 if (atts == NULL) {
7467 xmlErrMemory(ctxt, NULL);
7468 if (attvalue != NULL)
7469 xmlFree(attvalue);
7470 goto failed;
7471 }
7472 ctxt->atts = atts;
7473 ctxt->maxatts = maxatts;
7474 } else if (nbatts + 4 > maxatts) {
7475 const xmlChar **n;
7476
7477 maxatts *= 2;
7478 n = (const xmlChar **) xmlRealloc((void *) atts,
7479 maxatts * sizeof(const xmlChar *));
7480 if (n == NULL) {
7481 xmlErrMemory(ctxt, NULL);
7482 if (attvalue != NULL)
7483 xmlFree(attvalue);
7484 goto failed;
7485 }
7486 atts = n;
7487 ctxt->atts = atts;
7488 ctxt->maxatts = maxatts;
7489 }
7490 atts[nbatts++] = attname;
7491 atts[nbatts++] = attvalue;
7492 atts[nbatts] = NULL;
7493 atts[nbatts + 1] = NULL;
7494 } else {
7495 if (attvalue != NULL)
7496 xmlFree(attvalue);
7497 }
7498
7499failed:
7500
7501 GROW
7502 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
7503 break;
7504 if (!IS_BLANK_CH(RAW)) {
7505 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7506 "attributes construct error\n");
7507 }
7508 SKIP_BLANKS;
7509 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
7510 (attname == NULL) && (attvalue == NULL)) {
7511 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
7512 "xmlParseStartTag: problem parsing attributes\n");
7513 break;
7514 }
7515 SHRINK;
7516 GROW;
7517 }
7518
7519 /*
7520 * SAX: Start of Element !
7521 */
7522 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
7523 (!ctxt->disableSAX)) {
7524 if (nbatts > 0)
7525 ctxt->sax->startElement(ctxt->userData, name, atts);
7526 else
7527 ctxt->sax->startElement(ctxt->userData, name, NULL);
7528 }
7529
7530 if (atts != NULL) {
7531 /* Free only the content strings */
7532 for (i = 1;i < nbatts;i+=2)
7533 if (atts[i] != NULL)
7534 xmlFree((xmlChar *) atts[i]);
7535 }
7536 return(name);
7537}
7538
7539/**
7540 * xmlParseEndTag1:
7541 * @ctxt: an XML parser context
7542 * @line: line of the start tag
7543 * @nsNr: number of namespaces on the start tag
7544 *
7545 * parse an end of tag
7546 *
7547 * [42] ETag ::= '</' Name S? '>'
7548 *
7549 * With namespace
7550 *
7551 * [NS 9] ETag ::= '</' QName S? '>'
7552 */
7553
7554static void
7555xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
7556 const xmlChar *name;
7557
7558 GROW;
7559 if ((RAW != '<') || (NXT(1) != '/')) {
7560 xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
7561 "xmlParseEndTag: '</' not found\n");
7562 return;
7563 }
7564 SKIP(2);
7565
7566 name = xmlParseNameAndCompare(ctxt,ctxt->name);
7567
7568 /*
7569 * We should definitely be at the ending "S? '>'" part
7570 */
7571 GROW;
7572 SKIP_BLANKS;
7573 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
7574 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
7575 } else
7576 NEXT1;
7577
7578 /*
7579 * [ WFC: Element Type Match ]
7580 * The Name in an element's end-tag must match the element type in the
7581 * start-tag.
7582 *
7583 */
7584 if (name != (xmlChar*)1) {
7585 if (name == NULL) name = BAD_CAST "unparseable";
7586 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
7587 "Opening and ending tag mismatch: %s line %d and %s\n",
7588 ctxt->name, line, name);
7589 }
7590
7591 /*
7592 * SAX: End of Tag
7593 */
7594 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
7595 (!ctxt->disableSAX))
7596 ctxt->sax->endElement(ctxt->userData, ctxt->name);
7597
7598 namePop(ctxt);
7599 spacePop(ctxt);
7600 return;
7601}
7602
7603/**
7604 * xmlParseEndTag:
7605 * @ctxt: an XML parser context
7606 *
7607 * parse an end of tag
7608 *
7609 * [42] ETag ::= '</' Name S? '>'
7610 *
7611 * With namespace
7612 *
7613 * [NS 9] ETag ::= '</' QName S? '>'
7614 */
7615
7616void
7617xmlParseEndTag(xmlParserCtxtPtr ctxt) {
7618 xmlParseEndTag1(ctxt, 0);
7619}
7620#endif /* LIBXML_SAX1_ENABLED */
7621
7622/************************************************************************
7623 * *
7624 * SAX 2 specific operations *
7625 * *
7626 ************************************************************************/
7627
7628static const xmlChar *
7629xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
7630 int len = 0, l;
7631 int c;
7632 int count = 0;
7633
7634 /*
7635 * Handler for more complex cases
7636 */
7637 GROW;
7638 c = CUR_CHAR(l);
7639 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
7640 (!IS_LETTER(c) && (c != '_'))) {
7641 return(NULL);
7642 }
7643
7644 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
7645 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
7646 (c == '.') || (c == '-') || (c == '_') ||
7647 (IS_COMBINING(c)) ||
7648 (IS_EXTENDER(c)))) {
7649 if (count++ > 100) {
7650 count = 0;
7651 GROW;
7652 }
7653 len += l;
7654 NEXTL(l);
7655 c = CUR_CHAR(l);
7656 }
7657 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
7658}
7659
7660/*
7661 * xmlGetNamespace:
7662 * @ctxt: an XML parser context
7663 * @prefix: the prefix to lookup
7664 *
7665 * Lookup the namespace name for the @prefix (which ca be NULL)
7666 * The prefix must come from the @ctxt->dict dictionnary
7667 *
7668 * Returns the namespace name or NULL if not bound
7669 */
7670static const xmlChar *
7671xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
7672 int i;
7673
7674 if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
7675 for (i = ctxt->nsNr - 2;i >= 0;i-=2)
7676 if (ctxt->nsTab[i] == prefix) {
7677 if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
7678 return(NULL);
7679 return(ctxt->nsTab[i + 1]);
7680 }
7681 return(NULL);
7682}
7683
7684/**
7685 * xmlParseNCName:
7686 * @ctxt: an XML parser context
7687 * @len: lenght of the string parsed
7688 *
7689 * parse an XML name.
7690 *
7691 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
7692 * CombiningChar | Extender
7693 *
7694 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
7695 *
7696 * Returns the Name parsed or NULL
7697 */
7698
7699static const xmlChar *
7700xmlParseNCName(xmlParserCtxtPtr ctxt) {
7701 const xmlChar *in;
7702 const xmlChar *ret;
7703 int count = 0;
7704
7705 /*
7706 * Accelerator for simple ASCII names
7707 */
7708 in = ctxt->input->cur;
7709 if (((*in >= 0x61) && (*in <= 0x7A)) ||
7710 ((*in >= 0x41) && (*in <= 0x5A)) ||
7711 (*in == '_')) {
7712 in++;
7713 while (((*in >= 0x61) && (*in <= 0x7A)) ||
7714 ((*in >= 0x41) && (*in <= 0x5A)) ||
7715 ((*in >= 0x30) && (*in <= 0x39)) ||
7716 (*in == '_') || (*in == '-') ||
7717 (*in == '.'))
7718 in++;
7719 if ((*in > 0) && (*in < 0x80)) {
7720 count = in - ctxt->input->cur;
7721 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
7722 ctxt->input->cur = in;
7723 ctxt->nbChars += count;
7724 ctxt->input->col += count;
7725 if (ret == NULL) {
7726 xmlErrMemory(ctxt, NULL);
7727 }
7728 return(ret);
7729 }
7730 }
7731 return(xmlParseNCNameComplex(ctxt));
7732}
7733
7734/**
7735 * xmlParseQName:
7736 * @ctxt: an XML parser context
7737 * @prefix: pointer to store the prefix part
7738 *
7739 * parse an XML Namespace QName
7740 *
7741 * [6] QName ::= (Prefix ':')? LocalPart
7742 * [7] Prefix ::= NCName
7743 * [8] LocalPart ::= NCName
7744 *
7745 * Returns the Name parsed or NULL
7746 */
7747
7748static const xmlChar *
7749xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
7750 const xmlChar *l, *p;
7751
7752 GROW;
7753
7754 l = xmlParseNCName(ctxt);
7755 if (l == NULL) {
7756 if (CUR == ':') {
7757 l = xmlParseName(ctxt);
7758 if (l != NULL) {
7759 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
7760 "Failed to parse QName '%s'\n", l, NULL, NULL);
7761 *prefix = NULL;
7762 return(l);
7763 }
7764 }
7765 return(NULL);
7766 }
7767 if (CUR == ':') {
7768 NEXT;
7769 p = l;
7770 l = xmlParseNCName(ctxt);
7771 if (l == NULL) {
7772 xmlChar *tmp;
7773
7774 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
7775 "Failed to parse QName '%s:'\n", p, NULL, NULL);
7776 tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
7777 p = xmlDictLookup(ctxt->dict, tmp, -1);
7778 if (tmp != NULL) xmlFree(tmp);
7779 *prefix = NULL;
7780 return(p);
7781 }
7782 if (CUR == ':') {
7783 xmlChar *tmp;
7784
7785 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
7786 "Failed to parse QName '%s:%s:'\n", p, l, NULL);
7787 NEXT;
7788 tmp = (xmlChar *) xmlParseName(ctxt);
7789 if (tmp != NULL) {
7790 tmp = xmlBuildQName(tmp, l, NULL, 0);
7791 l = xmlDictLookup(ctxt->dict, tmp, -1);
7792 if (tmp != NULL) xmlFree(tmp);
7793 *prefix = p;
7794 return(l);
7795 }
7796 tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
7797 l = xmlDictLookup(ctxt->dict, tmp, -1);
7798 if (tmp != NULL) xmlFree(tmp);
7799 *prefix = p;
7800 return(l);
7801 }
7802 *prefix = p;
7803 } else
7804 *prefix = NULL;
7805 return(l);
7806}
7807
7808/**
7809 * xmlParseQNameAndCompare:
7810 * @ctxt: an XML parser context
7811 * @name: the localname
7812 * @prefix: the prefix, if any.
7813 *
7814 * parse an XML name and compares for match
7815 * (specialized for endtag parsing)
7816 *
7817 * Returns NULL for an illegal name, (xmlChar*) 1 for success
7818 * and the name for mismatch
7819 */
7820
7821static const xmlChar *
7822xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
7823 xmlChar const *prefix) {
7824 const xmlChar *cmp = name;
7825 const xmlChar *in;
7826 const xmlChar *ret;
7827 const xmlChar *prefix2;
7828
7829 if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
7830
7831 GROW;
7832 in = ctxt->input->cur;
7833
7834 cmp = prefix;
7835 while (*in != 0 && *in == *cmp) {
7836 ++in;
7837 ++cmp;
7838 }
7839 if ((*cmp == 0) && (*in == ':')) {
7840 in++;
7841 cmp = name;
7842 while (*in != 0 && *in == *cmp) {
7843 ++in;
7844 ++cmp;
7845 }
7846 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
7847 /* success */
7848 ctxt->input->cur = in;
7849 return((const xmlChar*) 1);
7850 }
7851 }
7852 /*
7853 * all strings coms from the dictionary, equality can be done directly
7854 */
7855 ret = xmlParseQName (ctxt, &prefix2);
7856 if ((ret == name) && (prefix == prefix2))
7857 return((const xmlChar*) 1);
7858 return ret;
7859}
7860
7861/**
7862 * xmlParseAttValueInternal:
7863 * @ctxt: an XML parser context
7864 * @len: attribute len result
7865 * @alloc: whether the attribute was reallocated as a new string
7866 * @normalize: if 1 then further non-CDATA normalization must be done
7867 *
7868 * parse a value for an attribute.
7869 * NOTE: if no normalization is needed, the routine will return pointers
7870 * directly from the data buffer.
7871 *
7872 * 3.3.3 Attribute-Value Normalization:
7873 * Before the value of an attribute is passed to the application or
7874 * checked for validity, the XML processor must normalize it as follows:
7875 * - a character reference is processed by appending the referenced
7876 * character to the attribute value
7877 * - an entity reference is processed by recursively processing the
7878 * replacement text of the entity
7879 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
7880 * appending #x20 to the normalized value, except that only a single
7881 * #x20 is appended for a "#xD#xA" sequence that is part of an external
7882 * parsed entity or the literal entity value of an internal parsed entity
7883 * - other characters are processed by appending them to the normalized value
7884 * If the declared value is not CDATA, then the XML processor must further
7885 * process the normalized attribute value by discarding any leading and
7886 * trailing space (#x20) characters, and by replacing sequences of space
7887 * (#x20) characters by a single space (#x20) character.
7888 * All attributes for which no declaration has been read should be treated
7889 * by a non-validating parser as if declared CDATA.
7890 *
7891 * Returns the AttValue parsed or NULL. The value has to be freed by the
7892 * caller if it was copied, this can be detected by val[*len] == 0.
7893 */
7894
7895static xmlChar *
7896xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
7897 int normalize)
7898{
7899 xmlChar limit = 0;
7900 const xmlChar *in = NULL, *start, *end, *last;
7901 xmlChar *ret = NULL;
7902
7903 GROW;
7904 in = (xmlChar *) CUR_PTR;
7905 if (*in != '"' && *in != '\'') {
7906 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
7907 return (NULL);
7908 }
7909 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
7910
7911 /*
7912 * try to handle in this routine the most common case where no
7913 * allocation of a new string is required and where content is
7914 * pure ASCII.
7915 */
7916 limit = *in++;
7917 end = ctxt->input->end;
7918 start = in;
7919 if (in >= end) {
7920 const xmlChar *oldbase = ctxt->input->base;
7921 GROW;
7922 if (oldbase != ctxt->input->base) {
7923 long delta = ctxt->input->base - oldbase;
7924 start = start + delta;
7925 in = in + delta;
7926 }
7927 end = ctxt->input->end;
7928 }
7929 if (normalize) {
7930 /*
7931 * Skip any leading spaces
7932 */
7933 while ((in < end) && (*in != limit) &&
7934 ((*in == 0x20) || (*in == 0x9) ||
7935 (*in == 0xA) || (*in == 0xD))) {
7936 in++;
7937 start = in;
7938 if (in >= end) {
7939 const xmlChar *oldbase = ctxt->input->base;
7940 GROW;
7941 if (oldbase != ctxt->input->base) {
7942 long delta = ctxt->input->base - oldbase;
7943 start = start + delta;
7944 in = in + delta;
7945 }
7946 end = ctxt->input->end;
7947 }
7948 }
7949 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
7950 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
7951 if ((*in++ == 0x20) && (*in == 0x20)) break;
7952 if (in >= end) {
7953 const xmlChar *oldbase = ctxt->input->base;
7954 GROW;
7955 if (oldbase != ctxt->input->base) {
7956 long delta = ctxt->input->base - oldbase;
7957 start = start + delta;
7958 in = in + delta;
7959 }
7960 end = ctxt->input->end;
7961 }
7962 }
7963 last = in;
7964 /*
7965 * skip the trailing blanks
7966 */
7967 while ((last[-1] == 0x20) && (last > start)) last--;
7968 while ((in < end) && (*in != limit) &&
7969 ((*in == 0x20) || (*in == 0x9) ||
7970 (*in == 0xA) || (*in == 0xD))) {
7971 in++;
7972 if (in >= end) {
7973 const xmlChar *oldbase = ctxt->input->base;
7974 GROW;
7975 if (oldbase != ctxt->input->base) {
7976 long delta = ctxt->input->base - oldbase;
7977 start = start + delta;
7978 in = in + delta;
7979 last = last + delta;
7980 }
7981 end = ctxt->input->end;
7982 }
7983 }
7984 if (*in != limit) goto need_complex;
7985 } else {
7986 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
7987 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
7988 in++;
7989 if (in >= end) {
7990 const xmlChar *oldbase = ctxt->input->base;
7991 GROW;
7992 if (oldbase != ctxt->input->base) {
7993 long delta = ctxt->input->base - oldbase;
7994 start = start + delta;
7995 in = in + delta;
7996 }
7997 end = ctxt->input->end;
7998 }
7999 }
8000 last = in;
8001 if (*in != limit) goto need_complex;
8002 }
8003 in++;
8004 if (len != NULL) {
8005 *len = last - start;
8006 ret = (xmlChar *) start;
8007 } else {
8008 if (alloc) *alloc = 1;
8009 ret = xmlStrndup(start, last - start);
8010 }
8011 CUR_PTR = in;
8012 if (alloc) *alloc = 0;
8013 return ret;
8014need_complex:
8015 if (alloc) *alloc = 1;
8016 return xmlParseAttValueComplex(ctxt, len, normalize);
8017}
8018
8019/**
8020 * xmlParseAttribute2:
8021 * @ctxt: an XML parser context
8022 * @pref: the element prefix
8023 * @elem: the element name
8024 * @prefix: a xmlChar ** used to store the value of the attribute prefix
8025 * @value: a xmlChar ** used to store the value of the attribute
8026 * @len: an int * to save the length of the attribute
8027 * @alloc: an int * to indicate if the attribute was allocated
8028 *
8029 * parse an attribute in the new SAX2 framework.
8030 *
8031 * Returns the attribute name, and the value in *value, .
8032 */
8033
8034static const xmlChar *
8035xmlParseAttribute2(xmlParserCtxtPtr ctxt,
8036 const xmlChar *pref, const xmlChar *elem,
8037 const xmlChar **prefix, xmlChar **value,
8038 int *len, int *alloc) {
8039 const xmlChar *name;
8040 xmlChar *val, *internal_val = NULL;
8041 int normalize = 0;
8042
8043 *value = NULL;
8044 GROW;
8045 name = xmlParseQName(ctxt, prefix);
8046 if (name == NULL) {
8047 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8048 "error parsing attribute name\n");
8049 return(NULL);
8050 }
8051
8052 /*
8053 * get the type if needed
8054 */
8055 if (ctxt->attsSpecial != NULL) {
8056 int type;
8057
8058 type = (int) (long) xmlHashQLookup2(ctxt->attsSpecial,
8059 pref, elem, *prefix, name);
8060 if (type != 0) normalize = 1;
8061 }
8062
8063 /*
8064 * read the value
8065 */
8066 SKIP_BLANKS;
8067 if (RAW == '=') {
8068 NEXT;
8069 SKIP_BLANKS;
8070 val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
8071 ctxt->instate = XML_PARSER_CONTENT;
8072 } else {
8073 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8074 "Specification mandate value for attribute %s\n", name);
8075 return(NULL);
8076 }
8077
8078 if (*prefix == ctxt->str_xml) {
8079 /*
8080 * Check that xml:lang conforms to the specification
8081 * No more registered as an error, just generate a warning now
8082 * since this was deprecated in XML second edition
8083 */
8084 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
8085 internal_val = xmlStrndup(val, *len);
8086 if (!xmlCheckLanguageID(internal_val)) {
8087 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8088 "Malformed value for xml:lang : %s\n",
8089 internal_val, NULL);
8090 }
8091 }
8092
8093 /*
8094 * Check that xml:space conforms to the specification
8095 */
8096 if (xmlStrEqual(name, BAD_CAST "space")) {
8097 internal_val = xmlStrndup(val, *len);
8098 if (xmlStrEqual(internal_val, BAD_CAST "default"))
8099 *(ctxt->space) = 0;
8100 else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
8101 *(ctxt->space) = 1;
8102 else {
8103 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8104"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8105 internal_val, NULL);
8106 }
8107 }
8108 if (internal_val) {
8109 xmlFree(internal_val);
8110 }
8111 }
8112
8113 *value = val;
8114 return(name);
8115}
8116
8117/**
8118 * xmlParseStartTag2:
8119 * @ctxt: an XML parser context
8120 *
8121 * parse a start of tag either for rule element or
8122 * EmptyElement. In both case we don't parse the tag closing chars.
8123 * This routine is called when running SAX2 parsing
8124 *
8125 * [40] STag ::= '<' Name (S Attribute)* S? '>'
8126 *
8127 * [ WFC: Unique Att Spec ]
8128 * No attribute name may appear more than once in the same start-tag or
8129 * empty-element tag.
8130 *
8131 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8132 *
8133 * [ WFC: Unique Att Spec ]
8134 * No attribute name may appear more than once in the same start-tag or
8135 * empty-element tag.
8136 *
8137 * With namespace:
8138 *
8139 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8140 *
8141 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8142 *
8143 * Returns the element name parsed
8144 */
8145
8146static const xmlChar *
8147xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
8148 const xmlChar **URI, int *tlen) {
8149 const xmlChar *localname;
8150 const xmlChar *prefix;
8151 const xmlChar *attname;
8152 const xmlChar *aprefix;
8153 const xmlChar *nsname;
8154 xmlChar *attvalue;
8155 const xmlChar **atts = ctxt->atts;
8156 int maxatts = ctxt->maxatts;
8157 int nratts, nbatts, nbdef;
8158 int i, j, nbNs, attval, oldline, oldcol;
8159 const xmlChar *base;
8160 unsigned long cur;
8161 int nsNr = ctxt->nsNr;
8162
8163 if (RAW != '<') return(NULL);
8164 NEXT1;
8165
8166 /*
8167 * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
8168 * point since the attribute values may be stored as pointers to
8169 * the buffer and calling SHRINK would destroy them !
8170 * The Shrinking is only possible once the full set of attribute
8171 * callbacks have been done.
8172 */
8173reparse:
8174 SHRINK;
8175 base = ctxt->input->base;
8176 cur = ctxt->input->cur - ctxt->input->base;
8177 oldline = ctxt->input->line;
8178 oldcol = ctxt->input->col;
8179 nbatts = 0;
8180 nratts = 0;
8181 nbdef = 0;
8182 nbNs = 0;
8183 attval = 0;
8184 /* Forget any namespaces added during an earlier parse of this element. */
8185 ctxt->nsNr = nsNr;
8186
8187 localname = xmlParseQName(ctxt, &prefix);
8188 if (localname == NULL) {
8189 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8190 "StartTag: invalid element name\n");
8191 return(NULL);
8192 }
8193 *tlen = ctxt->input->cur - ctxt->input->base - cur;
8194
8195 /*
8196 * Now parse the attributes, it ends up with the ending
8197 *
8198 * (S Attribute)* S?
8199 */
8200 SKIP_BLANKS;
8201 GROW;
8202 if (ctxt->input->base != base) goto base_changed;
8203
8204 while ((RAW != '>') &&
8205 ((RAW != '/') || (NXT(1) != '>')) &&
8206 (IS_BYTE_CHAR(RAW))) {
8207 const xmlChar *q = CUR_PTR;
8208 unsigned int cons = ctxt->input->consumed;
8209 int len = -1, alloc = 0;
8210
8211 attname = xmlParseAttribute2(ctxt, prefix, localname,
8212 &aprefix, &attvalue, &len, &alloc);
8213 if (ctxt->input->base != base) {
8214 if ((attvalue != NULL) && (alloc != 0))
8215 xmlFree(attvalue);
8216 attvalue = NULL;
8217 goto base_changed;
8218 }
8219 if ((attname != NULL) && (attvalue != NULL)) {
8220 if (len < 0) len = xmlStrlen(attvalue);
8221 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
8222 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
8223 xmlURIPtr uri;
8224
8225 if (*URL != 0) {
8226 uri = xmlParseURI((const char *) URL);
8227 if (uri == NULL) {
8228 xmlWarningMsg(ctxt, XML_WAR_NS_URI,
8229 "xmlns: %s not a valid URI\n",
8230 URL, NULL);
8231 } else {
8232 if (uri->scheme == NULL) {
8233 xmlWarningMsg(ctxt, XML_WAR_NS_URI_RELATIVE,
8234 "xmlns: URI %s is not absolute\n",
8235 URL, NULL);
8236 }
8237 xmlFreeURI(uri);
8238 }
8239 }
8240 /*
8241 * check that it's not a defined namespace
8242 */
8243 for (j = 1;j <= nbNs;j++)
8244 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
8245 break;
8246 if (j <= nbNs)
8247 xmlErrAttributeDup(ctxt, NULL, attname);
8248 else
8249 if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
8250 if (alloc != 0) xmlFree(attvalue);
8251 SKIP_BLANKS;
8252 continue;
8253 }
8254 if (aprefix == ctxt->str_xmlns) {
8255 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
8256 xmlURIPtr uri;
8257
8258 if (attname == ctxt->str_xml) {
8259 if (URL != ctxt->str_xml_ns) {
8260 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8261 "xml namespace prefix mapped to wrong URI\n",
8262 NULL, NULL, NULL);
8263 }
8264 /*
8265 * Do not keep a namespace definition node
8266 */
8267 if (alloc != 0) xmlFree(attvalue);
8268 SKIP_BLANKS;
8269 continue;
8270 }
8271 uri = xmlParseURI((const char *) URL);
8272 if (uri == NULL) {
8273 xmlWarningMsg(ctxt, XML_WAR_NS_URI,
8274 "xmlns:%s: '%s' is not a valid URI\n",
8275 attname, URL);
8276 } else {
8277 if ((ctxt->pedantic) && (uri->scheme == NULL)) {
8278 xmlWarningMsg(ctxt, XML_WAR_NS_URI_RELATIVE,
8279 "xmlns:%s: URI %s is not absolute\n",
8280 attname, URL);
8281 }
8282 xmlFreeURI(uri);
8283 }
8284
8285 /*
8286 * check that it's not a defined namespace
8287 */
8288 for (j = 1;j <= nbNs;j++)
8289 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
8290 break;
8291 if (j <= nbNs)
8292 xmlErrAttributeDup(ctxt, aprefix, attname);
8293 else
8294 if (nsPush(ctxt, attname, URL) > 0) nbNs++;
8295 if (alloc != 0) xmlFree(attvalue);
8296 SKIP_BLANKS;
8297 if (ctxt->input->base != base) goto base_changed;
8298 continue;
8299 }
8300
8301 /*
8302 * Add the pair to atts
8303 */
8304 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
8305 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
8306 if (attvalue[len] == 0)
8307 xmlFree(attvalue);
8308 goto failed;
8309 }
8310 maxatts = ctxt->maxatts;
8311 atts = ctxt->atts;
8312 }
8313 ctxt->attallocs[nratts++] = alloc;
8314 atts[nbatts++] = attname;
8315 atts[nbatts++] = aprefix;
8316 atts[nbatts++] = NULL; /* the URI will be fetched later */
8317 atts[nbatts++] = attvalue;
8318 attvalue += len;
8319 atts[nbatts++] = attvalue;
8320 /*
8321 * tag if some deallocation is needed
8322 */
8323 if (alloc != 0) attval = 1;
8324 } else {
8325 if ((attvalue != NULL) && (attvalue[len] == 0))
8326 xmlFree(attvalue);
8327 }
8328
8329failed:
8330
8331 GROW
8332 if (ctxt->input->base != base) goto base_changed;
8333 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8334 break;
8335 if (!IS_BLANK_CH(RAW)) {
8336 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8337 "attributes construct error\n");
8338 break;
8339 }
8340 SKIP_BLANKS;
8341 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
8342 (attname == NULL) && (attvalue == NULL)) {
8343 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8344 "xmlParseStartTag: problem parsing attributes\n");
8345 break;
8346 }
8347 GROW;
8348 if (ctxt->input->base != base) goto base_changed;
8349 }
8350
8351 /*
8352 * The attributes defaulting
8353 */
8354 if (ctxt->attsDefault != NULL) {
8355 xmlDefAttrsPtr defaults;
8356
8357 defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
8358 if (defaults != NULL) {
8359 for (i = 0;i < defaults->nbAttrs;i++) {
8360 attname = defaults->values[4 * i];
8361 aprefix = defaults->values[4 * i + 1];
8362
8363 /*
8364 * special work for namespaces defaulted defs
8365 */
8366 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
8367 /*
8368 * check that it's not a defined namespace
8369 */
8370 for (j = 1;j <= nbNs;j++)
8371 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
8372 break;
8373 if (j <= nbNs) continue;
8374
8375 nsname = xmlGetNamespace(ctxt, NULL);
8376 if (nsname != defaults->values[4 * i + 2]) {
8377 if (nsPush(ctxt, NULL,
8378 defaults->values[4 * i + 2]) > 0)
8379 nbNs++;
8380 }
8381 } else if (aprefix == ctxt->str_xmlns) {
8382 /*
8383 * check that it's not a defined namespace
8384 */
8385 for (j = 1;j <= nbNs;j++)
8386 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
8387 break;
8388 if (j <= nbNs) continue;
8389
8390 nsname = xmlGetNamespace(ctxt, attname);
8391 if (nsname != defaults->values[2]) {
8392 if (nsPush(ctxt, attname,
8393 defaults->values[4 * i + 2]) > 0)
8394 nbNs++;
8395 }
8396 } else {
8397 /*
8398 * check that it's not a defined attribute
8399 */
8400 for (j = 0;j < nbatts;j+=5) {
8401 if ((attname == atts[j]) && (aprefix == atts[j+1]))
8402 break;
8403 }
8404 if (j < nbatts) continue;
8405
8406 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
8407 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
8408 return(NULL);
8409 }
8410 maxatts = ctxt->maxatts;
8411 atts = ctxt->atts;
8412 }
8413 atts[nbatts++] = attname;
8414 atts[nbatts++] = aprefix;
8415 if (aprefix == NULL)
8416 atts[nbatts++] = NULL;
8417 else
8418 atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
8419 atts[nbatts++] = defaults->values[4 * i + 2];
8420 atts[nbatts++] = defaults->values[4 * i + 3];
8421 nbdef++;
8422 }
8423 }
8424 }
8425 }
8426
8427 /*
8428 * The attributes checkings
8429 */
8430 for (i = 0; i < nbatts;i += 5) {
8431 /*
8432 * The default namespace does not apply to attribute names.
8433 */
8434 if (atts[i + 1] != NULL) {
8435 nsname = xmlGetNamespace(ctxt, atts[i + 1]);
8436 if (nsname == NULL) {
8437 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
8438 "Namespace prefix %s for %s on %s is not defined\n",
8439 atts[i + 1], atts[i], localname);
8440 }
8441 atts[i + 2] = nsname;
8442 } else
8443 nsname = NULL;
8444 /*
8445 * [ WFC: Unique Att Spec ]
8446 * No attribute name may appear more than once in the same
8447 * start-tag or empty-element tag.
8448 * As extended by the Namespace in XML REC.
8449 */
8450 for (j = 0; j < i;j += 5) {
8451 if (atts[i] == atts[j]) {
8452 if (atts[i+1] == atts[j+1]) {
8453 xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
8454 break;
8455 }
8456 if ((nsname != NULL) && (atts[j + 2] == nsname)) {
8457 xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
8458 "Namespaced Attribute %s in '%s' redefined\n",
8459 atts[i], nsname, NULL);
8460 break;
8461 }
8462 }
8463 }
8464 }
8465
8466 nsname = xmlGetNamespace(ctxt, prefix);
8467 if ((prefix != NULL) && (nsname == NULL)) {
8468 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
8469 "Namespace prefix %s on %s is not defined\n",
8470 prefix, localname, NULL);
8471 }
8472 *pref = prefix;
8473 *URI = nsname;
8474
8475 /*
8476 * SAX: Start of Element !
8477 */
8478 if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
8479 (!ctxt->disableSAX)) {
8480 if (nbNs > 0)
8481 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
8482 nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
8483 nbatts / 5, nbdef, atts);
8484 else
8485 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
8486 nsname, 0, NULL, nbatts / 5, nbdef, atts);
8487 }
8488
8489 /*
8490 * Free up attribute allocated strings if needed
8491 */
8492 if (attval != 0) {
8493 for (i = 3,j = 0; j < nratts;i += 5,j++)
8494 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
8495 xmlFree((xmlChar *) atts[i]);
8496 }
8497
8498 return(localname);
8499
8500base_changed:
8501 /*
8502 * the attribute strings are valid iif the base didn't changed
8503 */
8504 if (attval != 0) {
8505 for (i = 3,j = 0; j < nratts;i += 5,j++)
8506 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
8507 xmlFree((xmlChar *) atts[i]);
8508 }
8509 ctxt->input->cur = ctxt->input->base + cur;
8510 ctxt->input->line = oldline;
8511 ctxt->input->col = oldcol;
8512 if (ctxt->wellFormed == 1) {
8513 goto reparse;
8514 }
8515 return(NULL);
8516}
8517
8518/**
8519 * xmlParseEndTag2:
8520 * @ctxt: an XML parser context
8521 * @line: line of the start tag
8522 * @nsNr: number of namespaces on the start tag
8523 *
8524 * parse an end of tag
8525 *
8526 * [42] ETag ::= '</' Name S? '>'
8527 *
8528 * With namespace
8529 *
8530 * [NS 9] ETag ::= '</' QName S? '>'
8531 */
8532
8533static void
8534xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
8535 const xmlChar *URI, int line, int nsNr, int tlen) {
8536 const xmlChar *name;
8537
8538 GROW;
8539 if ((RAW != '<') || (NXT(1) != '/')) {
8540 xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
8541 return;
8542 }
8543 SKIP(2);
8544
8545 if ((tlen > 0) && (xmlStrncmp(ctxt->input->cur, ctxt->name, tlen) == 0)) {
8546 if (ctxt->input->cur[tlen] == '>') {
8547 ctxt->input->cur += tlen + 1;
8548 goto done;
8549 }
8550 ctxt->input->cur += tlen;
8551 name = (xmlChar*)1;
8552 } else {
8553 if (prefix == NULL)
8554 name = xmlParseNameAndCompare(ctxt, ctxt->name);
8555 else
8556 name = xmlParseQNameAndCompare(ctxt, ctxt->name, prefix);
8557 }
8558
8559 /*
8560 * We should definitely be at the ending "S? '>'" part
8561 */
8562 GROW;
8563 SKIP_BLANKS;
8564 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
8565 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
8566 } else
8567 NEXT1;
8568
8569 /*
8570 * [ WFC: Element Type Match ]
8571 * The Name in an element's end-tag must match the element type in the
8572 * start-tag.
8573 *
8574 */
8575 if (name != (xmlChar*)1) {
8576 if (name == NULL) name = BAD_CAST "unparseable";
8577 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
8578 "Opening and ending tag mismatch: %s line %d and %s\n",
8579 ctxt->name, line, name);
8580 }
8581
8582 /*
8583 * SAX: End of Tag
8584 */
8585done:
8586 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
8587 (!ctxt->disableSAX))
8588 ctxt->sax->endElementNs(ctxt->userData, ctxt->name, prefix, URI);
8589
8590 spacePop(ctxt);
8591 if (nsNr != 0)
8592 nsPop(ctxt, nsNr);
8593 return;
8594}
8595
8596/**
8597 * xmlParseCDSect:
8598 * @ctxt: an XML parser context
8599 *
8600 * Parse escaped pure raw content.
8601 *
8602 * [18] CDSect ::= CDStart CData CDEnd
8603 *
8604 * [19] CDStart ::= '<![CDATA['
8605 *
8606 * [20] Data ::= (Char* - (Char* ']]>' Char*))
8607 *
8608 * [21] CDEnd ::= ']]>'
8609 */
8610void
8611xmlParseCDSect(xmlParserCtxtPtr ctxt) {
8612 xmlChar *buf = NULL;
8613 int len = 0;
8614 int size = XML_PARSER_BUFFER_SIZE;
8615 int r, rl;
8616 int s, sl;
8617 int cur, l;
8618 int count = 0;
8619
8620 /* Check 2.6.0 was NXT(0) not RAW */
8621 if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
8622 SKIP(9);
8623 } else
8624 return;
8625
8626 ctxt->instate = XML_PARSER_CDATA_SECTION;
8627 r = CUR_CHAR(rl);
8628 if (!IS_CHAR(r)) {
8629 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
8630 ctxt->instate = XML_PARSER_CONTENT;
8631 return;
8632 }
8633 NEXTL(rl);
8634 s = CUR_CHAR(sl);
8635 if (!IS_CHAR(s)) {
8636 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
8637 ctxt->instate = XML_PARSER_CONTENT;
8638 return;
8639 }
8640 NEXTL(sl);
8641 cur = CUR_CHAR(l);
8642 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
8643 if (buf == NULL) {
8644 xmlErrMemory(ctxt, NULL);
8645 return;
8646 }
8647 while (IS_CHAR(cur) &&
8648 ((r != ']') || (s != ']') || (cur != '>'))) {
8649 if (len + 5 >= size) {
8650 xmlChar *tmp;
8651
8652 size *= 2;
8653 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
8654 if (tmp == NULL) {
8655 xmlFree(buf);
8656 xmlErrMemory(ctxt, NULL);
8657 return;
8658 }
8659 buf = tmp;
8660 }
8661 COPY_BUF(rl,buf,len,r);
8662 r = s;
8663 rl = sl;
8664 s = cur;
8665 sl = l;
8666 count++;
8667 if (count > 50) {
8668 GROW;
8669 count = 0;
8670 }
8671 NEXTL(l);
8672 cur = CUR_CHAR(l);
8673 }
8674 buf[len] = 0;
8675 ctxt->instate = XML_PARSER_CONTENT;
8676 if (cur != '>') {
8677 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
8678 "CData section not finished\n%.50s\n", buf);
8679 xmlFree(buf);
8680 return;
8681 }
8682 NEXTL(l);
8683
8684 /*
8685 * OK the buffer is to be consumed as cdata.
8686 */
8687 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
8688 if (ctxt->sax->cdataBlock != NULL)
8689 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
8690 else if (ctxt->sax->characters != NULL)
8691 ctxt->sax->characters(ctxt->userData, buf, len);
8692 }
8693 xmlFree(buf);
8694}
8695
8696/**
8697 * xmlParseContent:
8698 * @ctxt: an XML parser context
8699 *
8700 * Parse a content:
8701 *
8702 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
8703 */
8704
8705void
8706xmlParseContent(xmlParserCtxtPtr ctxt) {
8707 GROW;
8708 while ((RAW != 0) &&
8709 ((RAW != '<') || (NXT(1) != '/')) &&
8710 (ctxt->instate != XML_PARSER_EOF)) {
8711 const xmlChar *test = CUR_PTR;
8712 unsigned int cons = ctxt->input->consumed;
8713 const xmlChar *cur = ctxt->input->cur;
8714
8715 /*
8716 * First case : a Processing Instruction.
8717 */
8718 if ((*cur == '<') && (cur[1] == '?')) {
8719 xmlParsePI(ctxt);
8720 }
8721
8722 /*
8723 * Second case : a CDSection
8724 */
8725 /* 2.6.0 test was *cur not RAW */
8726 else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
8727 xmlParseCDSect(ctxt);
8728 }
8729
8730 /*
8731 * Third case : a comment
8732 */
8733 else if ((*cur == '<') && (NXT(1) == '!') &&
8734 (NXT(2) == '-') && (NXT(3) == '-')) {
8735 xmlParseComment(ctxt);
8736 ctxt->instate = XML_PARSER_CONTENT;
8737 }
8738
8739 /*
8740 * Fourth case : a sub-element.
8741 */
8742 else if (*cur == '<') {
8743 xmlParseElement(ctxt);
8744 }
8745
8746 /*
8747 * Fifth case : a reference. If if has not been resolved,
8748 * parsing returns it's Name, create the node
8749 */
8750
8751 else if (*cur == '&') {
8752 xmlParseReference(ctxt);
8753 }
8754
8755 /*
8756 * Last case, text. Note that References are handled directly.
8757 */
8758 else {
8759 xmlParseCharData(ctxt, 0);
8760 }
8761
8762 GROW;
8763 /*
8764 * Pop-up of finished entities.
8765 */
8766 while ((RAW == 0) && (ctxt->inputNr > 1))
8767 xmlPopInput(ctxt);
8768 SHRINK;
8769
8770 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
8771 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8772 "detected an error in element content\n");
8773 ctxt->instate = XML_PARSER_EOF;
8774 break;
8775 }
8776 }
8777}
8778
8779/**
8780 * xmlParseElement:
8781 * @ctxt: an XML parser context
8782 *
8783 * parse an XML element, this is highly recursive
8784 *
8785 * [39] element ::= EmptyElemTag | STag content ETag
8786 *
8787 * [ WFC: Element Type Match ]
8788 * The Name in an element's end-tag must match the element type in the
8789 * start-tag.
8790 *
8791 */
8792
8793void
8794xmlParseElement(xmlParserCtxtPtr ctxt) {
8795 const xmlChar *name;
8796 const xmlChar *prefix;
8797 const xmlChar *URI;
8798 xmlParserNodeInfo node_info;
8799 int line, tlen;
8800 xmlNodePtr ret;
8801 int nsNr = ctxt->nsNr;
8802
8803 if ((unsigned int) ctxt->nameNr > xmlParserMaxDepth) {
8804 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
8805 "Excessive depth in document: change xmlParserMaxDepth = %d\n",
8806 xmlParserMaxDepth);
8807 ctxt->instate = XML_PARSER_EOF;
8808 return;
8809 }
8810
8811 /* Capture start position */
8812 if (ctxt->record_info) {
8813 node_info.begin_pos = ctxt->input->consumed +
8814 (CUR_PTR - ctxt->input->base);
8815 node_info.begin_line = ctxt->input->line;
8816 }
8817
8818 if (ctxt->spaceNr == 0)
8819 spacePush(ctxt, -1);
8820 else if (*ctxt->space == -2)
8821 spacePush(ctxt, -1);
8822 else
8823 spacePush(ctxt, *ctxt->space);
8824
8825 line = ctxt->input->line;
8826#ifdef LIBXML_SAX1_ENABLED
8827 if (ctxt->sax2)
8828#endif /* LIBXML_SAX1_ENABLED */
8829 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
8830#ifdef LIBXML_SAX1_ENABLED
8831 else
8832 name = xmlParseStartTag(ctxt);
8833#endif /* LIBXML_SAX1_ENABLED */
8834 if (ctxt->instate == XML_PARSER_EOF)
8835 return;
8836 if (name == NULL) {
8837 spacePop(ctxt);
8838 return;
8839 }
8840 namePush(ctxt, name);
8841 ret = ctxt->node;
8842
8843#ifdef LIBXML_VALID_ENABLED
8844 /*
8845 * [ VC: Root Element Type ]
8846 * The Name in the document type declaration must match the element
8847 * type of the root element.
8848 */
8849 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
8850 ctxt->node && (ctxt->node == ctxt->myDoc->children))
8851 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
8852#endif /* LIBXML_VALID_ENABLED */
8853
8854 /*
8855 * Check for an Empty Element.
8856 */
8857 if ((RAW == '/') && (NXT(1) == '>')) {
8858 SKIP(2);
8859 if (ctxt->sax2) {
8860 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
8861 (!ctxt->disableSAX))
8862 ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
8863#ifdef LIBXML_SAX1_ENABLED
8864 } else {
8865 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8866 (!ctxt->disableSAX))
8867 ctxt->sax->endElement(ctxt->userData, name);
8868#endif /* LIBXML_SAX1_ENABLED */
8869 }
8870 namePop(ctxt);
8871 spacePop(ctxt);
8872 if (nsNr != ctxt->nsNr)
8873 nsPop(ctxt, ctxt->nsNr - nsNr);
8874 if ( ret != NULL && ctxt->record_info ) {
8875 node_info.end_pos = ctxt->input->consumed +
8876 (CUR_PTR - ctxt->input->base);
8877 node_info.end_line = ctxt->input->line;
8878 node_info.node = ret;
8879 xmlParserAddNodeInfo(ctxt, &node_info);
8880 }
8881 return;
8882 }
8883 if (RAW == '>') {
8884 NEXT1;
8885 } else {
8886 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
8887 "Couldn't find end of Start Tag %s line %d\n",
8888 name, line, NULL);
8889
8890 /*
8891 * end of parsing of this node.
8892 */
8893 nodePop(ctxt);
8894 namePop(ctxt);
8895 spacePop(ctxt);
8896 if (nsNr != ctxt->nsNr)
8897 nsPop(ctxt, ctxt->nsNr - nsNr);
8898
8899 /*
8900 * Capture end position and add node
8901 */
8902 if ( ret != NULL && ctxt->record_info ) {
8903 node_info.end_pos = ctxt->input->consumed +
8904 (CUR_PTR - ctxt->input->base);
8905 node_info.end_line = ctxt->input->line;
8906 node_info.node = ret;
8907 xmlParserAddNodeInfo(ctxt, &node_info);
8908 }
8909 return;
8910 }
8911
8912 /*
8913 * Parse the content of the element:
8914 */
8915 xmlParseContent(ctxt);
8916 if (!IS_BYTE_CHAR(RAW)) {
8917 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
8918 "Premature end of data in tag %s line %d\n",
8919 name, line, NULL);
8920
8921 /*
8922 * end of parsing of this node.
8923 */
8924 nodePop(ctxt);
8925 namePop(ctxt);
8926 spacePop(ctxt);
8927 if (nsNr != ctxt->nsNr)
8928 nsPop(ctxt, ctxt->nsNr - nsNr);
8929 return;
8930 }
8931
8932 /*
8933 * parse the end of tag: '</' should be here.
8934 */
8935 if (ctxt->sax2) {
8936 xmlParseEndTag2(ctxt, prefix, URI, line, ctxt->nsNr - nsNr, tlen);
8937 namePop(ctxt);
8938 }
8939#ifdef LIBXML_SAX1_ENABLED
8940 else
8941 xmlParseEndTag1(ctxt, line);
8942#endif /* LIBXML_SAX1_ENABLED */
8943
8944 /*
8945 * Capture end position and add node
8946 */
8947 if ( ret != NULL && ctxt->record_info ) {
8948 node_info.end_pos = ctxt->input->consumed +
8949 (CUR_PTR - ctxt->input->base);
8950 node_info.end_line = ctxt->input->line;
8951 node_info.node = ret;
8952 xmlParserAddNodeInfo(ctxt, &node_info);
8953 }
8954}
8955
8956/**
8957 * xmlParseVersionNum:
8958 * @ctxt: an XML parser context
8959 *
8960 * parse the XML version value.
8961 *
8962 * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
8963 *
8964 * Returns the string giving the XML version number, or NULL
8965 */
8966xmlChar *
8967xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
8968 xmlChar *buf = NULL;
8969 int len = 0;
8970 int size = 10;
8971 xmlChar cur;
8972
8973 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
8974 if (buf == NULL) {
8975 xmlErrMemory(ctxt, NULL);
8976 return(NULL);
8977 }
8978 cur = CUR;
8979 while (((cur >= 'a') && (cur <= 'z')) ||
8980 ((cur >= 'A') && (cur <= 'Z')) ||
8981 ((cur >= '0') && (cur <= '9')) ||
8982 (cur == '_') || (cur == '.') ||
8983 (cur == ':') || (cur == '-')) {
8984 if (len + 1 >= size) {
8985 xmlChar *tmp;
8986
8987 size *= 2;
8988 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
8989 if (tmp == NULL) {
8990 xmlErrMemory(ctxt, NULL);
8991 return(NULL);
8992 }
8993 buf = tmp;
8994 }
8995 buf[len++] = cur;
8996 NEXT;
8997 cur=CUR;
8998 }
8999 buf[len] = 0;
9000 return(buf);
9001}
9002
9003/**
9004 * xmlParseVersionInfo:
9005 * @ctxt: an XML parser context
9006 *
9007 * parse the XML version.
9008 *
9009 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
9010 *
9011 * [25] Eq ::= S? '=' S?
9012 *
9013 * Returns the version string, e.g. "1.0"
9014 */
9015
9016xmlChar *
9017xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
9018 xmlChar *version = NULL;
9019
9020 if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
9021 SKIP(7);
9022 SKIP_BLANKS;
9023 if (RAW != '=') {
9024 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
9025 return(NULL);
9026 }
9027 NEXT;
9028 SKIP_BLANKS;
9029 if (RAW == '"') {
9030 NEXT;
9031 version = xmlParseVersionNum(ctxt);
9032 if (RAW != '"') {
9033 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
9034 } else
9035 NEXT;
9036 } else if (RAW == '\''){
9037 NEXT;
9038 version = xmlParseVersionNum(ctxt);
9039 if (RAW != '\'') {
9040 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
9041 } else
9042 NEXT;
9043 } else {
9044 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
9045 }
9046 }
9047 return(version);
9048}
9049
9050/**
9051 * xmlParseEncName:
9052 * @ctxt: an XML parser context
9053 *
9054 * parse the XML encoding name
9055 *
9056 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
9057 *
9058 * Returns the encoding name value or NULL
9059 */
9060xmlChar *
9061xmlParseEncName(xmlParserCtxtPtr ctxt) {
9062 xmlChar *buf = NULL;
9063 int len = 0;
9064 int size = 10;
9065 xmlChar cur;
9066
9067 cur = CUR;
9068 if (((cur >= 'a') && (cur <= 'z')) ||
9069 ((cur >= 'A') && (cur <= 'Z'))) {
9070 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
9071 if (buf == NULL) {
9072 xmlErrMemory(ctxt, NULL);
9073 return(NULL);
9074 }
9075
9076 buf[len++] = cur;
9077 NEXT;
9078 cur = CUR;
9079 while (((cur >= 'a') && (cur <= 'z')) ||
9080 ((cur >= 'A') && (cur <= 'Z')) ||
9081 ((cur >= '0') && (cur <= '9')) ||
9082 (cur == '.') || (cur == '_') ||
9083 (cur == '-')) {
9084 if (len + 1 >= size) {
9085 xmlChar *tmp;
9086
9087 size *= 2;
9088 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
9089 if (tmp == NULL) {
9090 xmlErrMemory(ctxt, NULL);
9091 xmlFree(buf);
9092 return(NULL);
9093 }
9094 buf = tmp;
9095 }
9096 buf[len++] = cur;
9097 NEXT;
9098 cur = CUR;
9099 if (cur == 0) {
9100 SHRINK;
9101 GROW;
9102 cur = CUR;
9103 }
9104 }
9105 buf[len] = 0;
9106 } else {
9107 xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
9108 }
9109 return(buf);
9110}
9111
9112/**
9113 * xmlParseEncodingDecl:
9114 * @ctxt: an XML parser context
9115 *
9116 * parse the XML encoding declaration
9117 *
9118 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
9119 *
9120 * this setups the conversion filters.
9121 *
9122 * Returns the encoding value or NULL
9123 */
9124
9125const xmlChar *
9126xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
9127 xmlChar *encoding = NULL;
9128
9129 SKIP_BLANKS;
9130 if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
9131 SKIP(8);
9132 SKIP_BLANKS;
9133 if (RAW != '=') {
9134 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
9135 return(NULL);
9136 }
9137 NEXT;
9138 SKIP_BLANKS;
9139 if (RAW == '"') {
9140 NEXT;
9141 encoding = xmlParseEncName(ctxt);
9142 if (RAW != '"') {
9143 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
9144 } else
9145 NEXT;
9146 } else if (RAW == '\''){
9147 NEXT;
9148 encoding = xmlParseEncName(ctxt);
9149 if (RAW != '\'') {
9150 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
9151 } else
9152 NEXT;
9153 } else {
9154 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
9155 }
9156 /*
9157 * UTF-16 encoding stwich has already taken place at this stage,
9158 * more over the little-endian/big-endian selection is already done
9159 */
9160 if ((encoding != NULL) &&
9161 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
9162 (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
9163 if (ctxt->encoding != NULL)
9164 xmlFree((xmlChar *) ctxt->encoding);
9165 ctxt->encoding = encoding;
9166 }
9167 /*
9168 * UTF-8 encoding is handled natively
9169 */
9170 else if ((encoding != NULL) &&
9171 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
9172 (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
9173 if (ctxt->encoding != NULL)
9174 xmlFree((xmlChar *) ctxt->encoding);
9175 ctxt->encoding = encoding;
9176 }
9177 else if (encoding != NULL) {
9178 xmlCharEncodingHandlerPtr handler;
9179
9180 if (ctxt->input->encoding != NULL)
9181 xmlFree((xmlChar *) ctxt->input->encoding);
9182 ctxt->input->encoding = encoding;
9183
9184 handler = xmlFindCharEncodingHandler((const char *) encoding);
9185 if (handler != NULL) {
9186 xmlSwitchToEncoding(ctxt, handler);
9187 } else {
9188 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
9189 "Unsupported encoding %s\n", encoding);
9190 return(NULL);
9191 }
9192 }
9193 }
9194 return(encoding);
9195}
9196
9197/**
9198 * xmlParseSDDecl:
9199 * @ctxt: an XML parser context
9200 *
9201 * parse the XML standalone declaration
9202 *
9203 * [32] SDDecl ::= S 'standalone' Eq
9204 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
9205 *
9206 * [ VC: Standalone Document Declaration ]
9207 * TODO The standalone document declaration must have the value "no"
9208 * if any external markup declarations contain declarations of:
9209 * - attributes with default values, if elements to which these
9210 * attributes apply appear in the document without specifications
9211 * of values for these attributes, or
9212 * - entities (other than amp, lt, gt, apos, quot), if references
9213 * to those entities appear in the document, or
9214 * - attributes with values subject to normalization, where the
9215 * attribute appears in the document with a value which will change
9216 * as a result of normalization, or
9217 * - element types with element content, if white space occurs directly
9218 * within any instance of those types.
9219 *
9220 * Returns:
9221 * 1 if standalone="yes"
9222 * 0 if standalone="no"
9223 * -2 if standalone attribute is missing or invalid
9224 * (A standalone value of -2 means that the XML declaration was found,
9225 * but no value was specified for the standalone attribute).
9226 */
9227
9228int
9229xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
9230 int standalone = -2;
9231
9232 SKIP_BLANKS;
9233 if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
9234 SKIP(10);
9235 SKIP_BLANKS;
9236 if (RAW != '=') {
9237 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
9238 return(standalone);
9239 }
9240 NEXT;
9241 SKIP_BLANKS;
9242 if (RAW == '\''){
9243 NEXT;
9244 if ((RAW == 'n') && (NXT(1) == 'o')) {
9245 standalone = 0;
9246 SKIP(2);
9247 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
9248 (NXT(2) == 's')) {
9249 standalone = 1;
9250 SKIP(3);
9251 } else {
9252 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
9253 }
9254 if (RAW != '\'') {
9255 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
9256 } else
9257 NEXT;
9258 } else if (RAW == '"'){
9259 NEXT;
9260 if ((RAW == 'n') && (NXT(1) == 'o')) {
9261 standalone = 0;
9262 SKIP(2);
9263 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
9264 (NXT(2) == 's')) {
9265 standalone = 1;
9266 SKIP(3);
9267 } else {
9268 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
9269 }
9270 if (RAW != '"') {
9271 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
9272 } else
9273 NEXT;
9274 } else {
9275 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
9276 }
9277 }
9278 return(standalone);
9279}
9280
9281/**
9282 * xmlParseXMLDecl:
9283 * @ctxt: an XML parser context
9284 *
9285 * parse an XML declaration header
9286 *
9287 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
9288 */
9289
9290void
9291xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
9292 xmlChar *version;
9293
9294 /*
9295 * This value for standalone indicates that the document has an
9296 * XML declaration but it does not have a standalone attribute.
9297 * It will be overwritten later if a standalone attribute is found.
9298 */
9299 ctxt->input->standalone = -2;
9300
9301 /*
9302 * We know that '<?xml' is here.
9303 */
9304 SKIP(5);
9305
9306 if (!IS_BLANK_CH(RAW)) {
9307 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9308 "Blank needed after '<?xml'\n");
9309 }
9310 SKIP_BLANKS;
9311
9312 /*
9313 * We must have the VersionInfo here.
9314 */
9315 version = xmlParseVersionInfo(ctxt);
9316 if (version == NULL) {
9317 xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
9318 } else {
9319 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
9320 /*
9321 * TODO: Blueberry should be detected here
9322 */
9323 xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
9324 "Unsupported version '%s'\n",
9325 version, NULL);
9326 }
9327 if (ctxt->version != NULL)
9328 xmlFree((void *) ctxt->version);
9329 ctxt->version = version;
9330 }
9331
9332 /*
9333 * We may have the encoding declaration
9334 */
9335 if (!IS_BLANK_CH(RAW)) {
9336 if ((RAW == '?') && (NXT(1) == '>')) {
9337 SKIP(2);
9338 return;
9339 }
9340 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
9341 }
9342 xmlParseEncodingDecl(ctxt);
9343 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
9344 /*
9345 * The XML REC instructs us to stop parsing right here
9346 */
9347 return;
9348 }
9349
9350 /*
9351 * We may have the standalone status.
9352 */
9353 if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
9354 if ((RAW == '?') && (NXT(1) == '>')) {
9355 SKIP(2);
9356 return;
9357 }
9358 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
9359 }
9360 SKIP_BLANKS;
9361 ctxt->input->standalone = xmlParseSDDecl(ctxt);
9362
9363 SKIP_BLANKS;
9364 if ((RAW == '?') && (NXT(1) == '>')) {
9365 SKIP(2);
9366 } else if (RAW == '>') {
9367 /* Deprecated old WD ... */
9368 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
9369 NEXT;
9370 } else {
9371 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
9372 MOVETO_ENDTAG(CUR_PTR);
9373 NEXT;
9374 }
9375}
9376
9377/**
9378 * xmlParseMisc:
9379 * @ctxt: an XML parser context
9380 *
9381 * parse an XML Misc* optional field.
9382 *
9383 * [27] Misc ::= Comment | PI | S
9384 */
9385
9386void
9387xmlParseMisc(xmlParserCtxtPtr ctxt) {
9388 while (((RAW == '<') && (NXT(1) == '?')) ||
9389 (CMP4(CUR_PTR, '<', '!', '-', '-')) ||
9390 IS_BLANK_CH(CUR)) {
9391 if ((RAW == '<') && (NXT(1) == '?')) {
9392 xmlParsePI(ctxt);
9393 } else if (IS_BLANK_CH(CUR)) {
9394 NEXT;
9395 } else
9396 xmlParseComment(ctxt);
9397 }
9398}
9399
9400/**
9401 * xmlParseDocument:
9402 * @ctxt: an XML parser context
9403 *
9404 * parse an XML document (and build a tree if using the standard SAX
9405 * interface).
9406 *
9407 * [1] document ::= prolog element Misc*
9408 *
9409 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
9410 *
9411 * Returns 0, -1 in case of error. the parser context is augmented
9412 * as a result of the parsing.
9413 */
9414
9415int
9416xmlParseDocument(xmlParserCtxtPtr ctxt) {
9417 xmlChar start[4];
9418 xmlCharEncoding enc;
9419
9420 xmlInitParser();
9421
9422 if ((ctxt == NULL) || (ctxt->input == NULL))
9423 return(-1);
9424
9425 GROW;
9426
9427 /*
9428 * SAX: detecting the level.
9429 */
9430 xmlDetectSAX2(ctxt);
9431
9432 /*
9433 * SAX: beginning of the document processing.
9434 */
9435 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9436 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
9437
9438 if ((ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) &&
9439 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
9440 /*
9441 * Get the 4 first bytes and decode the charset
9442 * if enc != XML_CHAR_ENCODING_NONE
9443 * plug some encoding conversion routines.
9444 */
9445 start[0] = RAW;
9446 start[1] = NXT(1);
9447 start[2] = NXT(2);
9448 start[3] = NXT(3);
9449 enc = xmlDetectCharEncoding(&start[0], 4);
9450 if (enc != XML_CHAR_ENCODING_NONE) {
9451 xmlSwitchEncoding(ctxt, enc);
9452 }
9453 }
9454
9455
9456 if (CUR == 0) {
9457 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
9458 }
9459
9460 /*
9461 * Check for the XMLDecl in the Prolog.
9462 */
9463 GROW;
9464 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
9465
9466 /*
9467 * Note that we will switch encoding on the fly.
9468 */
9469 xmlParseXMLDecl(ctxt);
9470 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
9471 /*
9472 * The XML REC instructs us to stop parsing right here
9473 */
9474 return(-1);
9475 }
9476 ctxt->standalone = ctxt->input->standalone;
9477 SKIP_BLANKS;
9478 } else {
9479 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
9480 }
9481 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
9482 ctxt->sax->startDocument(ctxt->userData);
9483
9484 /*
9485 * The Misc part of the Prolog
9486 */
9487 GROW;
9488 xmlParseMisc(ctxt);
9489
9490 /*
9491 * Then possibly doc type declaration(s) and more Misc
9492 * (doctypedecl Misc*)?
9493 */
9494 GROW;
9495 if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
9496
9497 ctxt->inSubset = 1;
9498 xmlParseDocTypeDecl(ctxt);
9499 if (RAW == '[') {
9500 ctxt->instate = XML_PARSER_DTD;
9501 xmlParseInternalSubset(ctxt);
9502 }
9503
9504 /*
9505 * Create and update the external subset.
9506 */
9507 ctxt->inSubset = 2;
9508 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
9509 (!ctxt->disableSAX))
9510 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
9511 ctxt->extSubSystem, ctxt->extSubURI);
9512 ctxt->inSubset = 0;
9513
9514 xmlCleanSpecialAttr(ctxt);
9515
9516 ctxt->instate = XML_PARSER_PROLOG;
9517 xmlParseMisc(ctxt);
9518 }
9519
9520 /*
9521 * Time to start parsing the tree itself
9522 */
9523 GROW;
9524 if (RAW != '<') {
9525 xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
9526 "Start tag expected, '<' not found\n");
9527 } else {
9528 ctxt->instate = XML_PARSER_CONTENT;
9529 xmlParseElement(ctxt);
9530 ctxt->instate = XML_PARSER_EPILOG;
9531
9532
9533 /*
9534 * The Misc part at the end
9535 */
9536 xmlParseMisc(ctxt);
9537
9538 if (RAW != 0) {
9539 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
9540 }
9541 ctxt->instate = XML_PARSER_EOF;
9542 }
9543
9544 /*
9545 * SAX: end of the document processing.
9546 */
9547 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
9548 ctxt->sax->endDocument(ctxt->userData);
9549
9550 /*
9551 * Remove locally kept entity definitions if the tree was not built
9552 */
9553 if ((ctxt->myDoc != NULL) &&
9554 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
9555 xmlFreeDoc(ctxt->myDoc);
9556 ctxt->myDoc = NULL;
9557 }
9558
9559 if (! ctxt->wellFormed) {
9560 ctxt->valid = 0;
9561 return(-1);
9562 }
9563 return(0);
9564}
9565
9566/**
9567 * xmlParseExtParsedEnt:
9568 * @ctxt: an XML parser context
9569 *
9570 * parse a general parsed entity
9571 * An external general parsed entity is well-formed if it matches the
9572 * production labeled extParsedEnt.
9573 *
9574 * [78] extParsedEnt ::= TextDecl? content
9575 *
9576 * Returns 0, -1 in case of error. the parser context is augmented
9577 * as a result of the parsing.
9578 */
9579
9580int
9581xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
9582 xmlChar start[4];
9583 xmlCharEncoding enc;
9584
9585 if ((ctxt == NULL) || (ctxt->input == NULL))
9586 return(-1);
9587
9588 xmlDefaultSAXHandlerInit();
9589
9590 xmlDetectSAX2(ctxt);
9591
9592 GROW;
9593
9594 /*
9595 * SAX: beginning of the document processing.
9596 */
9597 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9598 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
9599
9600 /*
9601 * Get the 4 first bytes and decode the charset
9602 * if enc != XML_CHAR_ENCODING_NONE
9603 * plug some encoding conversion routines.
9604 */
9605 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
9606 start[0] = RAW;
9607 start[1] = NXT(1);
9608 start[2] = NXT(2);
9609 start[3] = NXT(3);
9610 enc = xmlDetectCharEncoding(start, 4);
9611 if (enc != XML_CHAR_ENCODING_NONE) {
9612 xmlSwitchEncoding(ctxt, enc);
9613 }
9614 }
9615
9616
9617 if (CUR == 0) {
9618 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
9619 }
9620
9621 /*
9622 * Check for the XMLDecl in the Prolog.
9623 */
9624 GROW;
9625 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
9626
9627 /*
9628 * Note that we will switch encoding on the fly.
9629 */
9630 xmlParseXMLDecl(ctxt);
9631 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
9632 /*
9633 * The XML REC instructs us to stop parsing right here
9634 */
9635 return(-1);
9636 }
9637 SKIP_BLANKS;
9638 } else {
9639 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
9640 }
9641 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
9642 ctxt->sax->startDocument(ctxt->userData);
9643
9644 /*
9645 * Doing validity checking on chunk doesn't make sense
9646 */
9647 ctxt->instate = XML_PARSER_CONTENT;
9648 ctxt->validate = 0;
9649 ctxt->loadsubset = 0;
9650 ctxt->depth = 0;
9651
9652 xmlParseContent(ctxt);
9653
9654 if ((RAW == '<') && (NXT(1) == '/')) {
9655 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
9656 } else if (RAW != 0) {
9657 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
9658 }
9659
9660 /*
9661 * SAX: end of the document processing.
9662 */
9663 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
9664 ctxt->sax->endDocument(ctxt->userData);
9665
9666 if (! ctxt->wellFormed) return(-1);
9667 return(0);
9668}
9669
9670#ifdef LIBXML_PUSH_ENABLED
9671/************************************************************************
9672 * *
9673 * Progressive parsing interfaces *
9674 * *
9675 ************************************************************************/
9676
9677/**
9678 * xmlParseLookupSequence:
9679 * @ctxt: an XML parser context
9680 * @first: the first char to lookup
9681 * @next: the next char to lookup or zero
9682 * @third: the next char to lookup or zero
9683 *
9684 * Try to find if a sequence (first, next, third) or just (first next) or
9685 * (first) is available in the input stream.
9686 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
9687 * to avoid rescanning sequences of bytes, it DOES change the state of the
9688 * parser, do not use liberally.
9689 *
9690 * Returns the index to the current parsing point if the full sequence
9691 * is available, -1 otherwise.
9692 */
9693static int
9694xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
9695 xmlChar next, xmlChar third) {
9696 int base, len;
9697 xmlParserInputPtr in;
9698 const xmlChar *buf;
9699
9700 in = ctxt->input;
9701 if (in == NULL) return(-1);
9702 base = in->cur - in->base;
9703 if (base < 0) return(-1);
9704 if (ctxt->checkIndex > base)
9705 base = ctxt->checkIndex;
9706 if (in->buf == NULL) {
9707 buf = in->base;
9708 len = in->length;
9709 } else {
9710 buf = in->buf->buffer->content;
9711 len = in->buf->buffer->use;
9712 }
9713 /* take into account the sequence length */
9714 if (third) len -= 2;
9715 else if (next) len --;
9716 for (;base < len;base++) {
9717 if (buf[base] == first) {
9718 if (third != 0) {
9719 if ((buf[base + 1] != next) ||
9720 (buf[base + 2] != third)) continue;
9721 } else if (next != 0) {
9722 if (buf[base + 1] != next) continue;
9723 }
9724 ctxt->checkIndex = 0;
9725#ifdef DEBUG_PUSH
9726 if (next == 0)
9727 xmlGenericError(xmlGenericErrorContext,
9728 "PP: lookup '%c' found at %d\n",
9729 first, base);
9730 else if (third == 0)
9731 xmlGenericError(xmlGenericErrorContext,
9732 "PP: lookup '%c%c' found at %d\n",
9733 first, next, base);
9734 else
9735 xmlGenericError(xmlGenericErrorContext,
9736 "PP: lookup '%c%c%c' found at %d\n",
9737 first, next, third, base);
9738#endif
9739 return(base - (in->cur - in->base));
9740 }
9741 }
9742 ctxt->checkIndex = base;
9743#ifdef DEBUG_PUSH
9744 if (next == 0)
9745 xmlGenericError(xmlGenericErrorContext,
9746 "PP: lookup '%c' failed\n", first);
9747 else if (third == 0)
9748 xmlGenericError(xmlGenericErrorContext,
9749 "PP: lookup '%c%c' failed\n", first, next);
9750 else
9751 xmlGenericError(xmlGenericErrorContext,
9752 "PP: lookup '%c%c%c' failed\n", first, next, third);
9753#endif
9754 return(-1);
9755}
9756
9757/**
9758 * xmlParseGetLasts:
9759 * @ctxt: an XML parser context
9760 * @lastlt: pointer to store the last '<' from the input
9761 * @lastgt: pointer to store the last '>' from the input
9762 *
9763 * Lookup the last < and > in the current chunk
9764 */
9765static void
9766xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt,
9767 const xmlChar **lastgt) {
9768 const xmlChar *tmp;
9769
9770 if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) {
9771 xmlGenericError(xmlGenericErrorContext,
9772 "Internal error: xmlParseGetLasts\n");
9773 return;
9774 }
9775 if ((ctxt->progressive != 0) && (ctxt->inputNr == 1)) {
9776 tmp = ctxt->input->end;
9777 tmp--;
9778 while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--;
9779 if (tmp < ctxt->input->base) {
9780 *lastlt = NULL;
9781 *lastgt = NULL;
9782 } else {
9783 *lastlt = tmp;
9784 tmp++;
9785 while ((tmp < ctxt->input->end) && (*tmp != '>')) {
9786 if (*tmp == '\'') {
9787 tmp++;
9788 while ((tmp < ctxt->input->end) && (*tmp != '\'')) tmp++;
9789 if (tmp < ctxt->input->end) tmp++;
9790 } else if (*tmp == '"') {
9791 tmp++;
9792 while ((tmp < ctxt->input->end) && (*tmp != '"')) tmp++;
9793 if (tmp < ctxt->input->end) tmp++;
9794 } else
9795 tmp++;
9796 }
9797 if (tmp < ctxt->input->end)
9798 *lastgt = tmp;
9799 else {
9800 tmp = *lastlt;
9801 tmp--;
9802 while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--;
9803 if (tmp >= ctxt->input->base)
9804 *lastgt = tmp;
9805 else
9806 *lastgt = NULL;
9807 }
9808 }
9809 } else {
9810 *lastlt = NULL;
9811 *lastgt = NULL;
9812 }
9813}
9814/**
9815 * xmlCheckCdataPush:
9816 * @cur: pointer to the bock of characters
9817 * @len: length of the block in bytes
9818 *
9819 * Check that the block of characters is okay as SCdata content [20]
9820 *
9821 * Returns the number of bytes to pass if okay, a negative index where an
9822 * UTF-8 error occured otherwise
9823 */
9824static int
9825xmlCheckCdataPush(const xmlChar *utf, int len) {
9826 int ix;
9827 unsigned char c;
9828 int codepoint;
9829
9830 if ((utf == NULL) || (len <= 0))
9831 return(0);
9832
9833 for (ix = 0; ix < len;) { /* string is 0-terminated */
9834 c = utf[ix];
9835 if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */
9836 if (c >= 0x20)
9837 ix++;
9838 else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
9839 ix++;
9840 else
9841 return(-ix);
9842 } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
9843 if (ix + 2 > len) return(ix);
9844 if ((utf[ix+1] & 0xc0 ) != 0x80)
9845 return(-ix);
9846 codepoint = (utf[ix] & 0x1f) << 6;
9847 codepoint |= utf[ix+1] & 0x3f;
9848 if (!xmlIsCharQ(codepoint))
9849 return(-ix);
9850 ix += 2;
9851 } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
9852 if (ix + 3 > len) return(ix);
9853 if (((utf[ix+1] & 0xc0) != 0x80) ||
9854 ((utf[ix+2] & 0xc0) != 0x80))
9855 return(-ix);
9856 codepoint = (utf[ix] & 0xf) << 12;
9857 codepoint |= (utf[ix+1] & 0x3f) << 6;
9858 codepoint |= utf[ix+2] & 0x3f;
9859 if (!xmlIsCharQ(codepoint))
9860 return(-ix);
9861 ix += 3;
9862 } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
9863 if (ix + 4 > len) return(ix);
9864 if (((utf[ix+1] & 0xc0) != 0x80) ||
9865 ((utf[ix+2] & 0xc0) != 0x80) ||
9866 ((utf[ix+3] & 0xc0) != 0x80))
9867 return(-ix);
9868 codepoint = (utf[ix] & 0x7) << 18;
9869 codepoint |= (utf[ix+1] & 0x3f) << 12;
9870 codepoint |= (utf[ix+2] & 0x3f) << 6;
9871 codepoint |= utf[ix+3] & 0x3f;
9872 if (!xmlIsCharQ(codepoint))
9873 return(-ix);
9874 ix += 4;
9875 } else /* unknown encoding */
9876 return(-ix);
9877 }
9878 return(ix);
9879}
9880
9881/**
9882 * xmlParseTryOrFinish:
9883 * @ctxt: an XML parser context
9884 * @terminate: last chunk indicator
9885 *
9886 * Try to progress on parsing
9887 *
9888 * Returns zero if no parsing was possible
9889 */
9890static int
9891xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
9892 int ret = 0;
9893 int avail, tlen;
9894 xmlChar cur, next;
9895 const xmlChar *lastlt, *lastgt;
9896
9897 if (ctxt->input == NULL)
9898 return(0);
9899
9900#ifdef DEBUG_PUSH
9901 switch (ctxt->instate) {
9902 case XML_PARSER_EOF:
9903 xmlGenericError(xmlGenericErrorContext,
9904 "PP: try EOF\n"); break;
9905 case XML_PARSER_START:
9906 xmlGenericError(xmlGenericErrorContext,
9907 "PP: try START\n"); break;
9908 case XML_PARSER_MISC:
9909 xmlGenericError(xmlGenericErrorContext,
9910 "PP: try MISC\n");break;
9911 case XML_PARSER_COMMENT:
9912 xmlGenericError(xmlGenericErrorContext,
9913 "PP: try COMMENT\n");break;
9914 case XML_PARSER_PROLOG:
9915 xmlGenericError(xmlGenericErrorContext,
9916 "PP: try PROLOG\n");break;
9917 case XML_PARSER_START_TAG:
9918 xmlGenericError(xmlGenericErrorContext,
9919 "PP: try START_TAG\n");break;
9920 case XML_PARSER_CONTENT:
9921 xmlGenericError(xmlGenericErrorContext,
9922 "PP: try CONTENT\n");break;
9923 case XML_PARSER_CDATA_SECTION:
9924 xmlGenericError(xmlGenericErrorContext,
9925 "PP: try CDATA_SECTION\n");break;
9926 case XML_PARSER_END_TAG:
9927 xmlGenericError(xmlGenericErrorContext,
9928 "PP: try END_TAG\n");break;
9929 case XML_PARSER_ENTITY_DECL:
9930 xmlGenericError(xmlGenericErrorContext,
9931 "PP: try ENTITY_DECL\n");break;
9932 case XML_PARSER_ENTITY_VALUE:
9933 xmlGenericError(xmlGenericErrorContext,
9934 "PP: try ENTITY_VALUE\n");break;
9935 case XML_PARSER_ATTRIBUTE_VALUE:
9936 xmlGenericError(xmlGenericErrorContext,
9937 "PP: try ATTRIBUTE_VALUE\n");break;
9938 case XML_PARSER_DTD:
9939 xmlGenericError(xmlGenericErrorContext,
9940 "PP: try DTD\n");break;
9941 case XML_PARSER_EPILOG:
9942 xmlGenericError(xmlGenericErrorContext,
9943 "PP: try EPILOG\n");break;
9944 case XML_PARSER_PI:
9945 xmlGenericError(xmlGenericErrorContext,
9946 "PP: try PI\n");break;
9947 case XML_PARSER_IGNORE:
9948 xmlGenericError(xmlGenericErrorContext,
9949 "PP: try IGNORE\n");break;
9950 }
9951#endif
9952
9953 if ((ctxt->input != NULL) &&
9954 (ctxt->input->cur - ctxt->input->base > 4096)) {
9955 xmlSHRINK(ctxt);
9956 ctxt->checkIndex = 0;
9957 }
9958 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
9959
9960 while (1) {
9961 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
9962 return(0);
9963
9964
9965 /*
9966 * Pop-up of finished entities.
9967 */
9968 while ((RAW == 0) && (ctxt->inputNr > 1))
9969 xmlPopInput(ctxt);
9970
9971 if (ctxt->input == NULL) break;
9972 if (ctxt->input->buf == NULL)
9973 avail = ctxt->input->length -
9974 (ctxt->input->cur - ctxt->input->base);
9975 else {
9976 /*
9977 * If we are operating on converted input, try to flush
9978 * remainng chars to avoid them stalling in the non-converted
9979 * buffer.
9980 */
9981 if ((ctxt->input->buf->raw != NULL) &&
9982 (ctxt->input->buf->raw->use > 0)) {
9983 int base = ctxt->input->base -
9984 ctxt->input->buf->buffer->content;
9985 int current = ctxt->input->cur - ctxt->input->base;
9986
9987 xmlParserInputBufferPush(ctxt->input->buf, 0, "");
9988 ctxt->input->base = ctxt->input->buf->buffer->content + base;
9989 ctxt->input->cur = ctxt->input->base + current;
9990 ctxt->input->end =
9991 &ctxt->input->buf->buffer->content[
9992 ctxt->input->buf->buffer->use];
9993 }
9994 avail = ctxt->input->buf->buffer->use -
9995 (ctxt->input->cur - ctxt->input->base);
9996 }
9997 if (avail < 1)
9998 goto done;
9999 switch (ctxt->instate) {
10000 case XML_PARSER_EOF:
10001 /*
10002 * Document parsing is done !
10003 */
10004 goto done;
10005 case XML_PARSER_START:
10006 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
10007 xmlChar start[4];
10008 xmlCharEncoding enc;
10009
10010 /*
10011 * Very first chars read from the document flow.
10012 */
10013 if (avail < 4)
10014 goto done;
10015
10016 /*
10017 * Get the 4 first bytes and decode the charset
10018 * if enc != XML_CHAR_ENCODING_NONE
10019 * plug some encoding conversion routines,
10020 * else xmlSwitchEncoding will set to (default)
10021 * UTF8.
10022 */
10023 start[0] = RAW;
10024 start[1] = NXT(1);
10025 start[2] = NXT(2);
10026 start[3] = NXT(3);
10027 enc = xmlDetectCharEncoding(start, 4);
10028 xmlSwitchEncoding(ctxt, enc);
10029 break;
10030 }
10031
10032 if (avail < 2)
10033 goto done;
10034 cur = ctxt->input->cur[0];
10035 next = ctxt->input->cur[1];
10036 if (cur == 0) {
10037 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10038 ctxt->sax->setDocumentLocator(ctxt->userData,
10039 &xmlDefaultSAXLocator);
10040 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10041 ctxt->instate = XML_PARSER_EOF;
10042#ifdef DEBUG_PUSH
10043 xmlGenericError(xmlGenericErrorContext,
10044 "PP: entering EOF\n");
10045#endif
10046 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10047 ctxt->sax->endDocument(ctxt->userData);
10048 goto done;
10049 }
10050 if ((cur == '<') && (next == '?')) {
10051 /* PI or XML decl */
10052 if (avail < 5) return(ret);
10053 if ((!terminate) &&
10054 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10055 return(ret);
10056 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10057 ctxt->sax->setDocumentLocator(ctxt->userData,
10058 &xmlDefaultSAXLocator);
10059 if ((ctxt->input->cur[2] == 'x') &&
10060 (ctxt->input->cur[3] == 'm') &&
10061 (ctxt->input->cur[4] == 'l') &&
10062 (IS_BLANK_CH(ctxt->input->cur[5]))) {
10063 ret += 5;
10064#ifdef DEBUG_PUSH
10065 xmlGenericError(xmlGenericErrorContext,
10066 "PP: Parsing XML Decl\n");
10067#endif
10068 xmlParseXMLDecl(ctxt);
10069 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10070 /*
10071 * The XML REC instructs us to stop parsing right
10072 * here
10073 */
10074 ctxt->instate = XML_PARSER_EOF;
10075 return(0);
10076 }
10077 ctxt->standalone = ctxt->input->standalone;
10078 if ((ctxt->encoding == NULL) &&
10079 (ctxt->input->encoding != NULL))
10080 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
10081 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
10082 (!ctxt->disableSAX))
10083 ctxt->sax->startDocument(ctxt->userData);
10084 ctxt->instate = XML_PARSER_MISC;
10085#ifdef DEBUG_PUSH
10086 xmlGenericError(xmlGenericErrorContext,
10087 "PP: entering MISC\n");
10088#endif
10089 } else {
10090 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10091 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
10092 (!ctxt->disableSAX))
10093 ctxt->sax->startDocument(ctxt->userData);
10094 ctxt->instate = XML_PARSER_MISC;
10095#ifdef DEBUG_PUSH
10096 xmlGenericError(xmlGenericErrorContext,
10097 "PP: entering MISC\n");
10098#endif
10099 }
10100 } else {
10101 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10102 ctxt->sax->setDocumentLocator(ctxt->userData,
10103 &xmlDefaultSAXLocator);
10104 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10105 if (ctxt->version == NULL) {
10106 xmlErrMemory(ctxt, NULL);
10107 break;
10108 }
10109 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
10110 (!ctxt->disableSAX))
10111 ctxt->sax->startDocument(ctxt->userData);
10112 ctxt->instate = XML_PARSER_MISC;
10113#ifdef DEBUG_PUSH
10114 xmlGenericError(xmlGenericErrorContext,
10115 "PP: entering MISC\n");
10116#endif
10117 }
10118 break;
10119 case XML_PARSER_START_TAG: {
10120 const xmlChar *name;
10121 const xmlChar *prefix;
10122 const xmlChar *URI;
10123 int nsNr = ctxt->nsNr;
10124
10125 if ((avail < 2) && (ctxt->inputNr == 1))
10126 goto done;
10127 cur = ctxt->input->cur[0];
10128 if (cur != '<') {
10129 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10130 ctxt->instate = XML_PARSER_EOF;
10131 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10132 ctxt->sax->endDocument(ctxt->userData);
10133 goto done;
10134 }
10135 if (!terminate) {
10136 if (ctxt->progressive) {
10137 /* > can be found unescaped in attribute values */
10138 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
10139 goto done;
10140 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
10141 goto done;
10142 }
10143 }
10144 if (ctxt->spaceNr == 0)
10145 spacePush(ctxt, -1);
10146 else if (*ctxt->space == -2)
10147 spacePush(ctxt, -1);
10148 else
10149 spacePush(ctxt, *ctxt->space);
10150#ifdef LIBXML_SAX1_ENABLED
10151 if (ctxt->sax2)
10152#endif /* LIBXML_SAX1_ENABLED */
10153 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
10154#ifdef LIBXML_SAX1_ENABLED
10155 else
10156 name = xmlParseStartTag(ctxt);
10157#endif /* LIBXML_SAX1_ENABLED */
10158 if (ctxt->instate == XML_PARSER_EOF)
10159 goto done;
10160 if (name == NULL) {
10161 spacePop(ctxt);
10162 ctxt->instate = XML_PARSER_EOF;
10163 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10164 ctxt->sax->endDocument(ctxt->userData);
10165 goto done;
10166 }
10167#ifdef LIBXML_VALID_ENABLED
10168 /*
10169 * [ VC: Root Element Type ]
10170 * The Name in the document type declaration must match
10171 * the element type of the root element.
10172 */
10173 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
10174 ctxt->node && (ctxt->node == ctxt->myDoc->children))
10175 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
10176#endif /* LIBXML_VALID_ENABLED */
10177
10178 /*
10179 * Check for an Empty Element.
10180 */
10181 if ((RAW == '/') && (NXT(1) == '>')) {
10182 SKIP(2);
10183
10184 if (ctxt->sax2) {
10185 if ((ctxt->sax != NULL) &&
10186 (ctxt->sax->endElementNs != NULL) &&
10187 (!ctxt->disableSAX))
10188 ctxt->sax->endElementNs(ctxt->userData, name,
10189 prefix, URI);
10190 if (ctxt->nsNr - nsNr > 0)
10191 nsPop(ctxt, ctxt->nsNr - nsNr);
10192#ifdef LIBXML_SAX1_ENABLED
10193 } else {
10194 if ((ctxt->sax != NULL) &&
10195 (ctxt->sax->endElement != NULL) &&
10196 (!ctxt->disableSAX))
10197 ctxt->sax->endElement(ctxt->userData, name);
10198#endif /* LIBXML_SAX1_ENABLED */
10199 }
10200 spacePop(ctxt);
10201 if (ctxt->nameNr == 0) {
10202 ctxt->instate = XML_PARSER_EPILOG;
10203 } else {
10204 ctxt->instate = XML_PARSER_CONTENT;
10205 }
10206 break;
10207 }
10208 if (RAW == '>') {
10209 NEXT;
10210 } else {
10211 xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
10212 "Couldn't find end of Start Tag %s\n",
10213 name);
10214 nodePop(ctxt);
10215 spacePop(ctxt);
10216 }
10217 if (ctxt->sax2)
10218 nameNsPush(ctxt, name, prefix, URI, ctxt->nsNr - nsNr);
10219#ifdef LIBXML_SAX1_ENABLED
10220 else
10221 namePush(ctxt, name);
10222#endif /* LIBXML_SAX1_ENABLED */
10223
10224 ctxt->instate = XML_PARSER_CONTENT;
10225 break;
10226 }
10227 case XML_PARSER_CONTENT: {
10228 const xmlChar *test;
10229 unsigned int cons;
10230 if ((avail < 2) && (ctxt->inputNr == 1))
10231 goto done;
10232 cur = ctxt->input->cur[0];
10233 next = ctxt->input->cur[1];
10234
10235 test = CUR_PTR;
10236 cons = ctxt->input->consumed;
10237 if ((cur == '<') && (next == '/')) {
10238 ctxt->instate = XML_PARSER_END_TAG;
10239 break;
10240 } else if ((cur == '<') && (next == '?')) {
10241 if ((!terminate) &&
10242 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10243 goto done;
10244 xmlParsePI(ctxt);
10245 } else if ((cur == '<') && (next != '!')) {
10246 ctxt->instate = XML_PARSER_START_TAG;
10247 break;
10248 } else if ((cur == '<') && (next == '!') &&
10249 (ctxt->input->cur[2] == '-') &&
10250 (ctxt->input->cur[3] == '-')) {
10251 int term;
10252
10253 if (avail < 4)
10254 goto done;
10255 ctxt->input->cur += 4;
10256 term = xmlParseLookupSequence(ctxt, '-', '-', '>');
10257 ctxt->input->cur -= 4;
10258 if ((!terminate) && (term < 0))
10259 goto done;
10260 xmlParseComment(ctxt);
10261 ctxt->instate = XML_PARSER_CONTENT;
10262 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
10263 (ctxt->input->cur[2] == '[') &&
10264 (ctxt->input->cur[3] == 'C') &&
10265 (ctxt->input->cur[4] == 'D') &&
10266 (ctxt->input->cur[5] == 'A') &&
10267 (ctxt->input->cur[6] == 'T') &&
10268 (ctxt->input->cur[7] == 'A') &&
10269 (ctxt->input->cur[8] == '[')) {
10270 SKIP(9);
10271 ctxt->instate = XML_PARSER_CDATA_SECTION;
10272 break;
10273 } else if ((cur == '<') && (next == '!') &&
10274 (avail < 9)) {
10275 goto done;
10276 } else if (cur == '&') {
10277 if ((!terminate) &&
10278 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
10279 goto done;
10280 xmlParseReference(ctxt);
10281 } else {
10282 /* TODO Avoid the extra copy, handle directly !!! */
10283 /*
10284 * Goal of the following test is:
10285 * - minimize calls to the SAX 'character' callback
10286 * when they are mergeable
10287 * - handle an problem for isBlank when we only parse
10288 * a sequence of blank chars and the next one is
10289 * not available to check against '<' presence.
10290 * - tries to homogenize the differences in SAX
10291 * callbacks between the push and pull versions
10292 * of the parser.
10293 */
10294 if ((ctxt->inputNr == 1) &&
10295 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
10296 if (!terminate) {
10297 if (ctxt->progressive) {
10298 if ((lastlt == NULL) ||
10299 (ctxt->input->cur > lastlt))
10300 goto done;
10301 } else if (xmlParseLookupSequence(ctxt,
10302 '<', 0, 0) < 0) {
10303 goto done;
10304 }
10305 }
10306 }
10307 ctxt->checkIndex = 0;
10308 xmlParseCharData(ctxt, 0);
10309 }
10310 /*
10311 * Pop-up of finished entities.
10312 */
10313 while ((RAW == 0) && (ctxt->inputNr > 1))
10314 xmlPopInput(ctxt);
10315 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
10316 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
10317 "detected an error in element content\n");
10318 ctxt->instate = XML_PARSER_EOF;
10319 break;
10320 }
10321 break;
10322 }
10323 case XML_PARSER_END_TAG:
10324 if (avail < 2)
10325 goto done;
10326 if (!terminate) {
10327 if (ctxt->progressive) {
10328 /* > can be found unescaped in attribute values */
10329 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
10330 goto done;
10331 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
10332 goto done;
10333 }
10334 }
10335 if (ctxt->sax2) {
10336 xmlParseEndTag2(ctxt,
10337 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 3],
10338 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 2], 0,
10339 (int) (long) ctxt->pushTab[ctxt->nameNr * 3 - 1], 0);
10340 nameNsPop(ctxt);
10341 }
10342#ifdef LIBXML_SAX1_ENABLED
10343 else
10344 xmlParseEndTag1(ctxt, 0);
10345#endif /* LIBXML_SAX1_ENABLED */
10346 if (ctxt->instate == XML_PARSER_EOF) {
10347 /* Nothing */
10348 } else if (ctxt->nameNr == 0) {
10349 ctxt->instate = XML_PARSER_EPILOG;
10350 } else {
10351 ctxt->instate = XML_PARSER_CONTENT;
10352 }
10353 break;
10354 case XML_PARSER_CDATA_SECTION: {
10355 /*
10356 * The Push mode need to have the SAX callback for
10357 * cdataBlock merge back contiguous callbacks.
10358 */
10359 int base;
10360
10361 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
10362 if (base < 0) {
10363 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
10364 int tmp;
10365
10366 tmp = xmlCheckCdataPush(ctxt->input->cur,
10367 XML_PARSER_BIG_BUFFER_SIZE);
10368 if (tmp < 0) {
10369 tmp = -tmp;
10370 ctxt->input->cur += tmp;
10371 goto encoding_error;
10372 }
10373 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
10374 if (ctxt->sax->cdataBlock != NULL)
10375 ctxt->sax->cdataBlock(ctxt->userData,
10376 ctxt->input->cur, tmp);
10377 else if (ctxt->sax->characters != NULL)
10378 ctxt->sax->characters(ctxt->userData,
10379 ctxt->input->cur, tmp);
10380 }
10381 SKIPL(tmp);
10382 ctxt->checkIndex = 0;
10383 }
10384 goto done;
10385 } else {
10386 int tmp;
10387
10388 tmp = xmlCheckCdataPush(ctxt->input->cur, base);
10389 if ((tmp < 0) || (tmp != base)) {
10390 tmp = -tmp;
10391 ctxt->input->cur += tmp;
10392 goto encoding_error;
10393 }
10394 if ((ctxt->sax != NULL) && (base > 0) &&
10395 (!ctxt->disableSAX)) {
10396 if (ctxt->sax->cdataBlock != NULL)
10397 ctxt->sax->cdataBlock(ctxt->userData,
10398 ctxt->input->cur, base);
10399 else if (ctxt->sax->characters != NULL)
10400 ctxt->sax->characters(ctxt->userData,
10401 ctxt->input->cur, base);
10402 }
10403 SKIPL(base + 3);
10404 ctxt->checkIndex = 0;
10405 ctxt->instate = XML_PARSER_CONTENT;
10406#ifdef DEBUG_PUSH
10407 xmlGenericError(xmlGenericErrorContext,
10408 "PP: entering CONTENT\n");
10409#endif
10410 }
10411 break;
10412 }
10413 case XML_PARSER_MISC:
10414 SKIP_BLANKS;
10415 if (ctxt->input->buf == NULL)
10416 avail = ctxt->input->length -
10417 (ctxt->input->cur - ctxt->input->base);
10418 else
10419 avail = ctxt->input->buf->buffer->use -
10420 (ctxt->input->cur - ctxt->input->base);
10421 if (avail < 2)
10422 goto done;
10423 cur = ctxt->input->cur[0];
10424 next = ctxt->input->cur[1];
10425 if ((cur == '<') && (next == '?')) {
10426 if ((!terminate) &&
10427 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10428 goto done;
10429#ifdef DEBUG_PUSH
10430 xmlGenericError(xmlGenericErrorContext,
10431 "PP: Parsing PI\n");
10432#endif
10433 xmlParsePI(ctxt);
10434 ctxt->checkIndex = 0;
10435 } else if ((cur == '<') && (next == '!') &&
10436 (ctxt->input->cur[2] == '-') &&
10437 (ctxt->input->cur[3] == '-')) {
10438 if ((!terminate) &&
10439 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
10440 goto done;
10441#ifdef DEBUG_PUSH
10442 xmlGenericError(xmlGenericErrorContext,
10443 "PP: Parsing Comment\n");
10444#endif
10445 xmlParseComment(ctxt);
10446 ctxt->instate = XML_PARSER_MISC;
10447 ctxt->checkIndex = 0;
10448 } else if ((cur == '<') && (next == '!') &&
10449 (ctxt->input->cur[2] == 'D') &&
10450 (ctxt->input->cur[3] == 'O') &&
10451 (ctxt->input->cur[4] == 'C') &&
10452 (ctxt->input->cur[5] == 'T') &&
10453 (ctxt->input->cur[6] == 'Y') &&
10454 (ctxt->input->cur[7] == 'P') &&
10455 (ctxt->input->cur[8] == 'E')) {
10456 if ((!terminate) &&
10457 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
10458 goto done;
10459#ifdef DEBUG_PUSH
10460 xmlGenericError(xmlGenericErrorContext,
10461 "PP: Parsing internal subset\n");
10462#endif
10463 ctxt->inSubset = 1;
10464 xmlParseDocTypeDecl(ctxt);
10465 if (RAW == '[') {
10466 ctxt->instate = XML_PARSER_DTD;
10467#ifdef DEBUG_PUSH
10468 xmlGenericError(xmlGenericErrorContext,
10469 "PP: entering DTD\n");
10470#endif
10471 } else {
10472 /*
10473 * Create and update the external subset.
10474 */
10475 ctxt->inSubset = 2;
10476 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
10477 (ctxt->sax->externalSubset != NULL))
10478 ctxt->sax->externalSubset(ctxt->userData,
10479 ctxt->intSubName, ctxt->extSubSystem,
10480 ctxt->extSubURI);
10481 ctxt->inSubset = 0;
10482 xmlCleanSpecialAttr(ctxt);
10483 ctxt->instate = XML_PARSER_PROLOG;
10484#ifdef DEBUG_PUSH
10485 xmlGenericError(xmlGenericErrorContext,
10486 "PP: entering PROLOG\n");
10487#endif
10488 }
10489 } else if ((cur == '<') && (next == '!') &&
10490 (avail < 9)) {
10491 goto done;
10492 } else {
10493 ctxt->instate = XML_PARSER_START_TAG;
10494 ctxt->progressive = 1;
10495 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
10496#ifdef DEBUG_PUSH
10497 xmlGenericError(xmlGenericErrorContext,
10498 "PP: entering START_TAG\n");
10499#endif
10500 }
10501 break;
10502 case XML_PARSER_PROLOG:
10503 SKIP_BLANKS;
10504 if (ctxt->input->buf == NULL)
10505 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
10506 else
10507 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
10508 if (avail < 2)
10509 goto done;
10510 cur = ctxt->input->cur[0];
10511 next = ctxt->input->cur[1];
10512 if ((cur == '<') && (next == '?')) {
10513 if ((!terminate) &&
10514 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10515 goto done;
10516#ifdef DEBUG_PUSH
10517 xmlGenericError(xmlGenericErrorContext,
10518 "PP: Parsing PI\n");
10519#endif
10520 xmlParsePI(ctxt);
10521 } else if ((cur == '<') && (next == '!') &&
10522 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
10523 if ((!terminate) &&
10524 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
10525 goto done;
10526#ifdef DEBUG_PUSH
10527 xmlGenericError(xmlGenericErrorContext,
10528 "PP: Parsing Comment\n");
10529#endif
10530 xmlParseComment(ctxt);
10531 ctxt->instate = XML_PARSER_PROLOG;
10532 } else if ((cur == '<') && (next == '!') &&
10533 (avail < 4)) {
10534 goto done;
10535 } else {
10536 ctxt->instate = XML_PARSER_START_TAG;
10537 if (ctxt->progressive == 0)
10538 ctxt->progressive = 1;
10539 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
10540#ifdef DEBUG_PUSH
10541 xmlGenericError(xmlGenericErrorContext,
10542 "PP: entering START_TAG\n");
10543#endif
10544 }
10545 break;
10546 case XML_PARSER_EPILOG:
10547 SKIP_BLANKS;
10548 if (ctxt->input->buf == NULL)
10549 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
10550 else
10551 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
10552 if (avail < 2)
10553 goto done;
10554 cur = ctxt->input->cur[0];
10555 next = ctxt->input->cur[1];
10556 if ((cur == '<') && (next == '?')) {
10557 if ((!terminate) &&
10558 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10559 goto done;
10560#ifdef DEBUG_PUSH
10561 xmlGenericError(xmlGenericErrorContext,
10562 "PP: Parsing PI\n");
10563#endif
10564 xmlParsePI(ctxt);
10565 ctxt->instate = XML_PARSER_EPILOG;
10566 } else if ((cur == '<') && (next == '!') &&
10567 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
10568 if ((!terminate) &&
10569 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
10570 goto done;
10571#ifdef DEBUG_PUSH
10572 xmlGenericError(xmlGenericErrorContext,
10573 "PP: Parsing Comment\n");
10574#endif
10575 xmlParseComment(ctxt);
10576 ctxt->instate = XML_PARSER_EPILOG;
10577 } else if ((cur == '<') && (next == '!') &&
10578 (avail < 4)) {
10579 goto done;
10580 } else {
10581 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
10582 ctxt->instate = XML_PARSER_EOF;
10583#ifdef DEBUG_PUSH
10584 xmlGenericError(xmlGenericErrorContext,
10585 "PP: entering EOF\n");
10586#endif
10587 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10588 ctxt->sax->endDocument(ctxt->userData);
10589 goto done;
10590 }
10591 break;
10592 case XML_PARSER_DTD: {
10593 /*
10594 * Sorry but progressive parsing of the internal subset
10595 * is not expected to be supported. We first check that
10596 * the full content of the internal subset is available and
10597 * the parsing is launched only at that point.
10598 * Internal subset ends up with "']' S? '>'" in an unescaped
10599 * section and not in a ']]>' sequence which are conditional
10600 * sections (whoever argued to keep that crap in XML deserve
10601 * a place in hell !).
10602 */
10603 int base, i;
10604 xmlChar *buf;
10605 xmlChar quote = 0;
10606
10607 base = ctxt->input->cur - ctxt->input->base;
10608 if (base < 0) return(0);
10609 if (ctxt->checkIndex > base)
10610 base = ctxt->checkIndex;
10611 buf = ctxt->input->buf->buffer->content;
10612 for (;(unsigned int) base < ctxt->input->buf->buffer->use;
10613 base++) {
10614 if (quote != 0) {
10615 if (buf[base] == quote)
10616 quote = 0;
10617 continue;
10618 }
10619 if ((quote == 0) && (buf[base] == '<')) {
10620 int found = 0;
10621 /* special handling of comments */
10622 if (((unsigned int) base + 4 <
10623 ctxt->input->buf->buffer->use) &&
10624 (buf[base + 1] == '!') &&
10625 (buf[base + 2] == '-') &&
10626 (buf[base + 3] == '-')) {
10627 for (;(unsigned int) base + 3 <
10628 ctxt->input->buf->buffer->use; base++) {
10629 if ((buf[base] == '-') &&
10630 (buf[base + 1] == '-') &&
10631 (buf[base + 2] == '>')) {
10632 found = 1;
10633 base += 2;
10634 break;
10635 }
10636 }
10637 if (!found) {
10638#if 0
10639 fprintf(stderr, "unfinished comment\n");
10640#endif
10641 break; /* for */
10642 }
10643 continue;
10644 }
10645 }
10646 if (buf[base] == '"') {
10647 quote = '"';
10648 continue;
10649 }
10650 if (buf[base] == '\'') {
10651 quote = '\'';
10652 continue;
10653 }
10654 if (buf[base] == ']') {
10655#if 0
10656 fprintf(stderr, "%c%c%c%c: ", buf[base],
10657 buf[base + 1], buf[base + 2], buf[base + 3]);
10658#endif
10659 if ((unsigned int) base +1 >=
10660 ctxt->input->buf->buffer->use)
10661 break;
10662 if (buf[base + 1] == ']') {
10663 /* conditional crap, skip both ']' ! */
10664 base++;
10665 continue;
10666 }
10667 for (i = 1;
10668 (unsigned int) base + i < ctxt->input->buf->buffer->use;
10669 i++) {
10670 if (buf[base + i] == '>') {
10671#if 0
10672 fprintf(stderr, "found\n");
10673#endif
10674 goto found_end_int_subset;
10675 }
10676 if (!IS_BLANK_CH(buf[base + i])) {
10677#if 0
10678 fprintf(stderr, "not found\n");
10679#endif
10680 goto not_end_of_int_subset;
10681 }
10682 }
10683#if 0
10684 fprintf(stderr, "end of stream\n");
10685#endif
10686 break;
10687
10688 }
10689not_end_of_int_subset:
10690 continue; /* for */
10691 }
10692 /*
10693 * We didn't found the end of the Internal subset
10694 */
10695#ifdef DEBUG_PUSH
10696 if (next == 0)
10697 xmlGenericError(xmlGenericErrorContext,
10698 "PP: lookup of int subset end filed\n");
10699#endif
10700 goto done;
10701
10702found_end_int_subset:
10703 xmlParseInternalSubset(ctxt);
10704 ctxt->inSubset = 2;
10705 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
10706 (ctxt->sax->externalSubset != NULL))
10707 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10708 ctxt->extSubSystem, ctxt->extSubURI);
10709 ctxt->inSubset = 0;
10710 xmlCleanSpecialAttr(ctxt);
10711 ctxt->instate = XML_PARSER_PROLOG;
10712 ctxt->checkIndex = 0;
10713#ifdef DEBUG_PUSH
10714 xmlGenericError(xmlGenericErrorContext,
10715 "PP: entering PROLOG\n");
10716#endif
10717 break;
10718 }
10719 case XML_PARSER_COMMENT:
10720 xmlGenericError(xmlGenericErrorContext,
10721 "PP: internal error, state == COMMENT\n");
10722 ctxt->instate = XML_PARSER_CONTENT;
10723#ifdef DEBUG_PUSH
10724 xmlGenericError(xmlGenericErrorContext,
10725 "PP: entering CONTENT\n");
10726#endif
10727 break;
10728 case XML_PARSER_IGNORE:
10729 xmlGenericError(xmlGenericErrorContext,
10730 "PP: internal error, state == IGNORE");
10731 ctxt->instate = XML_PARSER_DTD;
10732#ifdef DEBUG_PUSH
10733 xmlGenericError(xmlGenericErrorContext,
10734 "PP: entering DTD\n");
10735#endif
10736 break;
10737 case XML_PARSER_PI:
10738 xmlGenericError(xmlGenericErrorContext,
10739 "PP: internal error, state == PI\n");
10740 ctxt->instate = XML_PARSER_CONTENT;
10741#ifdef DEBUG_PUSH
10742 xmlGenericError(xmlGenericErrorContext,
10743 "PP: entering CONTENT\n");
10744#endif
10745 break;
10746 case XML_PARSER_ENTITY_DECL:
10747 xmlGenericError(xmlGenericErrorContext,
10748 "PP: internal error, state == ENTITY_DECL\n");
10749 ctxt->instate = XML_PARSER_DTD;
10750#ifdef DEBUG_PUSH
10751 xmlGenericError(xmlGenericErrorContext,
10752 "PP: entering DTD\n");
10753#endif
10754 break;
10755 case XML_PARSER_ENTITY_VALUE:
10756 xmlGenericError(xmlGenericErrorContext,
10757 "PP: internal error, state == ENTITY_VALUE\n");
10758 ctxt->instate = XML_PARSER_CONTENT;
10759#ifdef DEBUG_PUSH
10760 xmlGenericError(xmlGenericErrorContext,
10761 "PP: entering DTD\n");
10762#endif
10763 break;
10764 case XML_PARSER_ATTRIBUTE_VALUE:
10765 xmlGenericError(xmlGenericErrorContext,
10766 "PP: internal error, state == ATTRIBUTE_VALUE\n");
10767 ctxt->instate = XML_PARSER_START_TAG;
10768#ifdef DEBUG_PUSH
10769 xmlGenericError(xmlGenericErrorContext,
10770 "PP: entering START_TAG\n");
10771#endif
10772 break;
10773 case XML_PARSER_SYSTEM_LITERAL:
10774 xmlGenericError(xmlGenericErrorContext,
10775 "PP: internal error, state == SYSTEM_LITERAL\n");
10776 ctxt->instate = XML_PARSER_START_TAG;
10777#ifdef DEBUG_PUSH
10778 xmlGenericError(xmlGenericErrorContext,
10779 "PP: entering START_TAG\n");
10780#endif
10781 break;
10782 case XML_PARSER_PUBLIC_LITERAL:
10783 xmlGenericError(xmlGenericErrorContext,
10784 "PP: internal error, state == PUBLIC_LITERAL\n");
10785 ctxt->instate = XML_PARSER_START_TAG;
10786#ifdef DEBUG_PUSH
10787 xmlGenericError(xmlGenericErrorContext,
10788 "PP: entering START_TAG\n");
10789#endif
10790 break;
10791 }
10792 }
10793done:
10794#ifdef DEBUG_PUSH
10795 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
10796#endif
10797 return(ret);
10798encoding_error:
10799 {
10800 char buffer[150];
10801
10802 snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
10803 ctxt->input->cur[0], ctxt->input->cur[1],
10804 ctxt->input->cur[2], ctxt->input->cur[3]);
10805 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
10806 "Input is not proper UTF-8, indicate encoding !\n%s",
10807 BAD_CAST buffer, NULL);
10808 }
10809 return(0);
10810}
10811
10812/**
10813 * xmlParseChunk:
10814 * @ctxt: an XML parser context
10815 * @chunk: an char array
10816 * @size: the size in byte of the chunk
10817 * @terminate: last chunk indicator
10818 *
10819 * Parse a Chunk of memory
10820 *
10821 * Returns zero if no error, the xmlParserErrors otherwise.
10822 */
10823int
10824xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
10825 int terminate) {
10826 int end_in_lf = 0;
10827
10828 if (ctxt == NULL)
10829 return(XML_ERR_INTERNAL_ERROR);
10830 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
10831 return(ctxt->errNo);
10832 if (ctxt->instate == XML_PARSER_START)
10833 xmlDetectSAX2(ctxt);
10834 if ((size > 0) && (chunk != NULL) && (!terminate) &&
10835 (chunk[size - 1] == '\r')) {
10836 end_in_lf = 1;
10837 size--;
10838 }
10839 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
10840 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
10841 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
10842 int cur = ctxt->input->cur - ctxt->input->base;
10843 int res;
10844
10845 res =xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
10846 if (res < 0) {
10847 ctxt->errNo = XML_PARSER_EOF;
10848 ctxt->disableSAX = 1;
10849 return (XML_PARSER_EOF);
10850 }
10851 ctxt->input->base = ctxt->input->buf->buffer->content + base;
10852 ctxt->input->cur = ctxt->input->base + cur;
10853 ctxt->input->end =
10854 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
10855#ifdef DEBUG_PUSH
10856 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
10857#endif
10858
10859 } else if (ctxt->instate != XML_PARSER_EOF) {
10860 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
10861 xmlParserInputBufferPtr in = ctxt->input->buf;
10862 if ((in->encoder != NULL) && (in->buffer != NULL) &&
10863 (in->raw != NULL)) {
10864 int nbchars;
10865
10866 nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
10867 if (nbchars < 0) {
10868 /* TODO 2.6.0 */
10869 xmlGenericError(xmlGenericErrorContext,
10870 "xmlParseChunk: encoder error\n");
10871 return(XML_ERR_INVALID_ENCODING);
10872 }
10873 }
10874 }
10875 }
10876 xmlParseTryOrFinish(ctxt, terminate);
10877 if ((end_in_lf == 1) && (ctxt->input != NULL) &&
10878 (ctxt->input->buf != NULL)) {
10879 xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
10880 }
10881 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
10882 return(ctxt->errNo);
10883 if (terminate) {
10884 /*
10885 * Check for termination
10886 */
10887 int avail = 0;
10888
10889 if (ctxt->input != NULL) {
10890 if (ctxt->input->buf == NULL)
10891 avail = ctxt->input->length -
10892 (ctxt->input->cur - ctxt->input->base);
10893 else
10894 avail = ctxt->input->buf->buffer->use -
10895 (ctxt->input->cur - ctxt->input->base);
10896 }
10897
10898 if ((ctxt->instate != XML_PARSER_EOF) &&
10899 (ctxt->instate != XML_PARSER_EPILOG)) {
10900 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
10901 }
10902 if ((ctxt->instate == XML_PARSER_EPILOG) && (avail > 0)) {
10903 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
10904 }
10905 if (ctxt->instate != XML_PARSER_EOF) {
10906 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10907 ctxt->sax->endDocument(ctxt->userData);
10908 }
10909 ctxt->instate = XML_PARSER_EOF;
10910 }
10911 return((xmlParserErrors) ctxt->errNo);
10912}
10913
10914/************************************************************************
10915 * *
10916 * I/O front end functions to the parser *
10917 * *
10918 ************************************************************************/
10919
10920/**
10921 * xmlCreatePushParserCtxt:
10922 * @sax: a SAX handler
10923 * @user_data: The user data returned on SAX callbacks
10924 * @chunk: a pointer to an array of chars
10925 * @size: number of chars in the array
10926 * @filename: an optional file name or URI
10927 *
10928 * Create a parser context for using the XML parser in push mode.
10929 * If @buffer and @size are non-NULL, the data is used to detect
10930 * the encoding. The remaining characters will be parsed so they
10931 * don't need to be fed in again through xmlParseChunk.
10932 * To allow content encoding detection, @size should be >= 4
10933 * The value of @filename is used for fetching external entities
10934 * and error/warning reports.
10935 *
10936 * Returns the new parser context or NULL
10937 */
10938
10939xmlParserCtxtPtr
10940xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
10941 const char *chunk, int size, const char *filename) {
10942 xmlParserCtxtPtr ctxt;
10943 xmlParserInputPtr inputStream;
10944 xmlParserInputBufferPtr buf;
10945 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
10946
10947 /*
10948 * plug some encoding conversion routines
10949 */
10950 if ((chunk != NULL) && (size >= 4))
10951 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
10952
10953 buf = xmlAllocParserInputBuffer(enc);
10954 if (buf == NULL) return(NULL);
10955
10956 ctxt = xmlNewParserCtxt();
10957 if (ctxt == NULL) {
10958 xmlErrMemory(NULL, "creating parser: out of memory\n");
10959 xmlFreeParserInputBuffer(buf);
10960 return(NULL);
10961 }
10962 ctxt->dictNames = 1;
10963 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * sizeof(xmlChar *));
10964 if (ctxt->pushTab == NULL) {
10965 xmlErrMemory(ctxt, NULL);
10966 xmlFreeParserInputBuffer(buf);
10967 xmlFreeParserCtxt(ctxt);
10968 return(NULL);
10969 }
10970 if (sax != NULL) {
10971#ifdef LIBXML_SAX1_ENABLED
10972 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
10973#endif /* LIBXML_SAX1_ENABLED */
10974 xmlFree(ctxt->sax);
10975 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
10976 if (ctxt->sax == NULL) {
10977 xmlErrMemory(ctxt, NULL);
10978 xmlFreeParserInputBuffer(buf);
10979 xmlFreeParserCtxt(ctxt);
10980 return(NULL);
10981 }
10982 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
10983 if (sax->initialized == XML_SAX2_MAGIC)
10984 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
10985 else
10986 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
10987 if (user_data != NULL)
10988 ctxt->userData = user_data;
10989 }
10990 if (filename == NULL) {
10991 ctxt->directory = NULL;
10992 } else {
10993 ctxt->directory = xmlParserGetDirectory(filename);
10994 }
10995
10996 inputStream = xmlNewInputStream(ctxt);
10997 if (inputStream == NULL) {
10998 xmlFreeParserCtxt(ctxt);
10999 xmlFreeParserInputBuffer(buf);
11000 return(NULL);
11001 }
11002
11003 if (filename == NULL)
11004 inputStream->filename = NULL;
11005 else {
11006 inputStream->filename = (char *)
11007 xmlCanonicPath((const xmlChar *) filename);
11008 if (inputStream->filename == NULL) {
11009 xmlFreeParserCtxt(ctxt);
11010 xmlFreeParserInputBuffer(buf);
11011 return(NULL);
11012 }
11013 }
11014 inputStream->buf = buf;
11015 inputStream->base = inputStream->buf->buffer->content;
11016 inputStream->cur = inputStream->buf->buffer->content;
11017 inputStream->end =
11018 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
11019
11020 inputPush(ctxt, inputStream);
11021
11022 /*
11023 * If the caller didn't provide an initial 'chunk' for determining
11024 * the encoding, we set the context to XML_CHAR_ENCODING_NONE so
11025 * that it can be automatically determined later
11026 */
11027 if ((size == 0) || (chunk == NULL)) {
11028 ctxt->charset = XML_CHAR_ENCODING_NONE;
11029 } else if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) {
11030 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
11031 int cur = ctxt->input->cur - ctxt->input->base;
11032
11033 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
11034
11035 ctxt->input->base = ctxt->input->buf->buffer->content + base;
11036 ctxt->input->cur = ctxt->input->base + cur;
11037 ctxt->input->end =
11038 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
11039#ifdef DEBUG_PUSH
11040 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
11041#endif
11042 }
11043
11044 if (enc != XML_CHAR_ENCODING_NONE) {
11045 xmlSwitchEncoding(ctxt, enc);
11046 }
11047
11048 return(ctxt);
11049}
11050#endif /* LIBXML_PUSH_ENABLED */
11051
11052/**
11053 * xmlStopParser:
11054 * @ctxt: an XML parser context
11055 *
11056 * Blocks further parser processing
11057 */
11058void
11059xmlStopParser(xmlParserCtxtPtr ctxt) {
11060 if (ctxt == NULL)
11061 return;
11062 ctxt->instate = XML_PARSER_EOF;
11063 ctxt->disableSAX = 1;
11064 if (ctxt->input != NULL) {
11065 ctxt->input->cur = BAD_CAST"";
11066 ctxt->input->base = ctxt->input->cur;
11067 }
11068}
11069
11070/**
11071 * xmlCreateIOParserCtxt:
11072 * @sax: a SAX handler
11073 * @user_data: The user data returned on SAX callbacks
11074 * @ioread: an I/O read function
11075 * @ioclose: an I/O close function
11076 * @ioctx: an I/O handler
11077 * @enc: the charset encoding if known
11078 *
11079 * Create a parser context for using the XML parser with an existing
11080 * I/O stream
11081 *
11082 * Returns the new parser context or NULL
11083 */
11084xmlParserCtxtPtr
11085xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
11086 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
11087 void *ioctx, xmlCharEncoding enc) {
11088 xmlParserCtxtPtr ctxt;
11089 xmlParserInputPtr inputStream;
11090 xmlParserInputBufferPtr buf;
11091
11092 if (ioread == NULL) return(NULL);
11093
11094 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
11095 if (buf == NULL) return(NULL);
11096
11097 ctxt = xmlNewParserCtxt();
11098 if (ctxt == NULL) {
11099 xmlFreeParserInputBuffer(buf);
11100 return(NULL);
11101 }
11102 if (sax != NULL) {
11103#ifdef LIBXML_SAX1_ENABLED
11104 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
11105#endif /* LIBXML_SAX1_ENABLED */
11106 xmlFree(ctxt->sax);
11107 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
11108 if (ctxt->sax == NULL) {
11109 xmlErrMemory(ctxt, NULL);
11110 xmlFreeParserCtxt(ctxt);
11111 return(NULL);
11112 }
11113 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
11114 if (sax->initialized == XML_SAX2_MAGIC)
11115 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
11116 else
11117 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
11118 if (user_data != NULL)
11119 ctxt->userData = user_data;
11120 }
11121
11122 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
11123 if (inputStream == NULL) {
11124 xmlFreeParserCtxt(ctxt);
11125 return(NULL);
11126 }
11127 inputPush(ctxt, inputStream);
11128
11129 return(ctxt);
11130}
11131
11132#ifdef LIBXML_VALID_ENABLED
11133/************************************************************************
11134 * *
11135 * Front ends when parsing a DTD *
11136 * *
11137 ************************************************************************/
11138
11139/**
11140 * xmlIOParseDTD:
11141 * @sax: the SAX handler block or NULL
11142 * @input: an Input Buffer
11143 * @enc: the charset encoding if known
11144 *
11145 * Load and parse a DTD
11146 *
11147 * Returns the resulting xmlDtdPtr or NULL in case of error.
11148 * @input will be freed by the function in any case.
11149 */
11150
11151xmlDtdPtr
11152xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
11153 xmlCharEncoding enc) {
11154 xmlDtdPtr ret = NULL;
11155 xmlParserCtxtPtr ctxt;
11156 xmlParserInputPtr pinput = NULL;
11157 xmlChar start[4];
11158
11159 if (input == NULL)
11160 return(NULL);
11161
11162 ctxt = xmlNewParserCtxt();
11163 if (ctxt == NULL) {
11164 xmlFreeParserInputBuffer(input);
11165 return(NULL);
11166 }
11167
11168 /*
11169 * Set-up the SAX context
11170 */
11171 if (sax != NULL) {
11172 if (ctxt->sax != NULL)
11173 xmlFree(ctxt->sax);
11174 ctxt->sax = sax;
11175 ctxt->userData = ctxt;
11176 }
11177 xmlDetectSAX2(ctxt);
11178
11179 /*
11180 * generate a parser input from the I/O handler
11181 */
11182
11183 pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
11184 if (pinput == NULL) {
11185 if (sax != NULL) ctxt->sax = NULL;
11186 xmlFreeParserInputBuffer(input);
11187 xmlFreeParserCtxt(ctxt);
11188 return(NULL);
11189 }
11190
11191 /*
11192 * plug some encoding conversion routines here.
11193 */
11194 xmlPushInput(ctxt, pinput);
11195 if (enc != XML_CHAR_ENCODING_NONE) {
11196 xmlSwitchEncoding(ctxt, enc);
11197 }
11198
11199 pinput->filename = NULL;
11200 pinput->line = 1;
11201 pinput->col = 1;
11202 pinput->base = ctxt->input->cur;
11203 pinput->cur = ctxt->input->cur;
11204 pinput->free = NULL;
11205
11206 /*
11207 * let's parse that entity knowing it's an external subset.
11208 */
11209 ctxt->inSubset = 2;
11210 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
11211 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
11212 BAD_CAST "none", BAD_CAST "none");
11213
11214 if ((enc == XML_CHAR_ENCODING_NONE) &&
11215 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
11216 /*
11217 * Get the 4 first bytes and decode the charset
11218 * if enc != XML_CHAR_ENCODING_NONE
11219 * plug some encoding conversion routines.
11220 */
11221 start[0] = RAW;
11222 start[1] = NXT(1);
11223 start[2] = NXT(2);
11224 start[3] = NXT(3);
11225 enc = xmlDetectCharEncoding(start, 4);
11226 if (enc != XML_CHAR_ENCODING_NONE) {
11227 xmlSwitchEncoding(ctxt, enc);
11228 }
11229 }
11230
11231 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
11232
11233 if (ctxt->myDoc != NULL) {
11234 if (ctxt->wellFormed) {
11235 ret = ctxt->myDoc->extSubset;
11236 ctxt->myDoc->extSubset = NULL;
11237 if (ret != NULL) {
11238 xmlNodePtr tmp;
11239
11240 ret->doc = NULL;
11241 tmp = ret->children;
11242 while (tmp != NULL) {
11243 tmp->doc = NULL;
11244 tmp = tmp->next;
11245 }
11246 }
11247 } else {
11248 ret = NULL;
11249 }
11250 xmlFreeDoc(ctxt->myDoc);
11251 ctxt->myDoc = NULL;
11252 }
11253 if (sax != NULL) ctxt->sax = NULL;
11254 xmlFreeParserCtxt(ctxt);
11255
11256 return(ret);
11257}
11258
11259/**
11260 * xmlSAXParseDTD:
11261 * @sax: the SAX handler block
11262 * @ExternalID: a NAME* containing the External ID of the DTD
11263 * @SystemID: a NAME* containing the URL to the DTD
11264 *
11265 * Load and parse an external subset.
11266 *
11267 * Returns the resulting xmlDtdPtr or NULL in case of error.
11268 */
11269
11270xmlDtdPtr
11271xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
11272 const xmlChar *SystemID) {
11273 xmlDtdPtr ret = NULL;
11274 xmlParserCtxtPtr ctxt;
11275 xmlParserInputPtr input = NULL;
11276 xmlCharEncoding enc;
11277 xmlChar* systemIdCanonic;
11278
11279 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
11280
11281 ctxt = xmlNewParserCtxt();
11282 if (ctxt == NULL) {
11283 return(NULL);
11284 }
11285
11286 /*
11287 * Set-up the SAX context
11288 */
11289 if (sax != NULL) {
11290 if (ctxt->sax != NULL)
11291 xmlFree(ctxt->sax);
11292 ctxt->sax = sax;
11293 ctxt->userData = ctxt;
11294 }
11295
11296 /*
11297 * Canonicalise the system ID
11298 */
11299 systemIdCanonic = xmlCanonicPath(SystemID);
11300 if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
11301 xmlFreeParserCtxt(ctxt);
11302 return(NULL);
11303 }
11304
11305 /*
11306 * Ask the Entity resolver to load the damn thing
11307 */
11308
11309 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
11310 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
11311 systemIdCanonic);
11312 if (input == NULL) {
11313 if (sax != NULL) ctxt->sax = NULL;
11314 xmlFreeParserCtxt(ctxt);
11315 if (systemIdCanonic != NULL)
11316 xmlFree(systemIdCanonic);
11317 return(NULL);
11318 }
11319
11320 /*
11321 * plug some encoding conversion routines here.
11322 */
11323 xmlPushInput(ctxt, input);
11324 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
11325 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
11326 xmlSwitchEncoding(ctxt, enc);
11327 }
11328
11329 if (input->filename == NULL)
11330 input->filename = (char *) systemIdCanonic;
11331 else
11332 xmlFree(systemIdCanonic);
11333 input->line = 1;
11334 input->col = 1;
11335 input->base = ctxt->input->cur;
11336 input->cur = ctxt->input->cur;
11337 input->free = NULL;
11338
11339 /*
11340 * let's parse that entity knowing it's an external subset.
11341 */
11342 ctxt->inSubset = 2;
11343 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
11344 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
11345 ExternalID, SystemID);
11346 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
11347
11348 if (ctxt->myDoc != NULL) {
11349 if (ctxt->wellFormed) {
11350 ret = ctxt->myDoc->extSubset;
11351 ctxt->myDoc->extSubset = NULL;
11352 if (ret != NULL) {
11353 xmlNodePtr tmp;
11354
11355 ret->doc = NULL;
11356 tmp = ret->children;
11357 while (tmp != NULL) {
11358 tmp->doc = NULL;
11359 tmp = tmp->next;
11360 }
11361 }
11362 } else {
11363 ret = NULL;
11364 }
11365 xmlFreeDoc(ctxt->myDoc);
11366 ctxt->myDoc = NULL;
11367 }
11368 if (sax != NULL) ctxt->sax = NULL;
11369 xmlFreeParserCtxt(ctxt);
11370
11371 return(ret);
11372}
11373
11374
11375/**
11376 * xmlParseDTD:
11377 * @ExternalID: a NAME* containing the External ID of the DTD
11378 * @SystemID: a NAME* containing the URL to the DTD
11379 *
11380 * Load and parse an external subset.
11381 *
11382 * Returns the resulting xmlDtdPtr or NULL in case of error.
11383 */
11384
11385xmlDtdPtr
11386xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
11387 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
11388}
11389#endif /* LIBXML_VALID_ENABLED */
11390
11391/************************************************************************
11392 * *
11393 * Front ends when parsing an Entity *
11394 * *
11395 ************************************************************************/
11396
11397/**
11398 * xmlParseCtxtExternalEntity:
11399 * @ctx: the existing parsing context
11400 * @URL: the URL for the entity to load
11401 * @ID: the System ID for the entity to load
11402 * @lst: the return value for the set of parsed nodes
11403 *
11404 * Parse an external general entity within an existing parsing context
11405 * An external general parsed entity is well-formed if it matches the
11406 * production labeled extParsedEnt.
11407 *
11408 * [78] extParsedEnt ::= TextDecl? content
11409 *
11410 * Returns 0 if the entity is well formed, -1 in case of args problem and
11411 * the parser error code otherwise
11412 */
11413
11414int
11415xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
11416 const xmlChar *ID, xmlNodePtr *lst) {
11417 xmlParserCtxtPtr ctxt;
11418 xmlDocPtr newDoc;
11419 xmlNodePtr newRoot;
11420 xmlSAXHandlerPtr oldsax = NULL;
11421 int ret = 0;
11422 xmlChar start[4];
11423 xmlCharEncoding enc;
11424 xmlParserInputPtr inputStream;
11425 char *directory = NULL;
11426
11427 if (ctx == NULL) return(-1);
11428
11429 if (ctx->depth > 40) {
11430 return(XML_ERR_ENTITY_LOOP);
11431 }
11432
11433 if (lst != NULL)
11434 *lst = NULL;
11435 if ((URL == NULL) && (ID == NULL))
11436 return(-1);
11437 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
11438 return(-1);
11439
11440 ctxt = xmlNewParserCtxt();
11441 if (ctxt == NULL) {
11442 return(-1);
11443 }
11444
11445 ctxt->userData = ctxt;
11446 ctxt->_private = ctx->_private;
11447
11448 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
11449 if (inputStream == NULL) {
11450 xmlFreeParserCtxt(ctxt);
11451 return(-1);
11452 }
11453
11454 inputPush(ctxt, inputStream);
11455
11456 if ((ctxt->directory == NULL) && (directory == NULL))
11457 directory = xmlParserGetDirectory((char *)URL);
11458 if ((ctxt->directory == NULL) && (directory != NULL))
11459 ctxt->directory = directory;
11460
11461 oldsax = ctxt->sax;
11462 ctxt->sax = ctx->sax;
11463 xmlDetectSAX2(ctxt);
11464 newDoc = xmlNewDoc(BAD_CAST "1.0");
11465 if (newDoc == NULL) {
11466 xmlFreeParserCtxt(ctxt);
11467 return(-1);
11468 }
11469 if (ctx->myDoc->dict) {
11470 newDoc->dict = ctx->myDoc->dict;
11471 xmlDictReference(newDoc->dict);
11472 }
11473 if (ctx->myDoc != NULL) {
11474 newDoc->intSubset = ctx->myDoc->intSubset;
11475 newDoc->extSubset = ctx->myDoc->extSubset;
11476 }
11477 if (ctx->myDoc->URL != NULL) {
11478 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
11479 }
11480 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
11481 if (newRoot == NULL) {
11482 ctxt->sax = oldsax;
11483 xmlFreeParserCtxt(ctxt);
11484 newDoc->intSubset = NULL;
11485 newDoc->extSubset = NULL;
11486 xmlFreeDoc(newDoc);
11487 return(-1);
11488 }
11489 xmlAddChild((xmlNodePtr) newDoc, newRoot);
11490 nodePush(ctxt, newDoc->children);
11491 if (ctx->myDoc == NULL) {
11492 ctxt->myDoc = newDoc;
11493 } else {
11494 ctxt->myDoc = ctx->myDoc;
11495 newDoc->children->doc = ctx->myDoc;
11496 }
11497
11498 /*
11499 * Get the 4 first bytes and decode the charset
11500 * if enc != XML_CHAR_ENCODING_NONE
11501 * plug some encoding conversion routines.
11502 */
11503 GROW
11504 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
11505 start[0] = RAW;
11506 start[1] = NXT(1);
11507 start[2] = NXT(2);
11508 start[3] = NXT(3);
11509 enc = xmlDetectCharEncoding(start, 4);
11510 if (enc != XML_CHAR_ENCODING_NONE) {
11511 xmlSwitchEncoding(ctxt, enc);
11512 }
11513 }
11514
11515 /*
11516 * Parse a possible text declaration first
11517 */
11518 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
11519 xmlParseTextDecl(ctxt);
11520 }
11521
11522 /*
11523 * Doing validity checking on chunk doesn't make sense
11524 */
11525 ctxt->instate = XML_PARSER_CONTENT;
11526 ctxt->validate = ctx->validate;
11527 ctxt->valid = ctx->valid;
11528 ctxt->loadsubset = ctx->loadsubset;
11529 ctxt->depth = ctx->depth + 1;
11530 ctxt->replaceEntities = ctx->replaceEntities;
11531 if (ctxt->validate) {
11532 ctxt->vctxt.error = ctx->vctxt.error;
11533 ctxt->vctxt.warning = ctx->vctxt.warning;
11534 } else {
11535 ctxt->vctxt.error = NULL;
11536 ctxt->vctxt.warning = NULL;
11537 }
11538 ctxt->vctxt.nodeTab = NULL;
11539 ctxt->vctxt.nodeNr = 0;
11540 ctxt->vctxt.nodeMax = 0;
11541 ctxt->vctxt.node = NULL;
11542 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
11543 ctxt->dict = ctx->dict;
11544 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
11545 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
11546 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
11547 ctxt->dictNames = ctx->dictNames;
11548 ctxt->attsDefault = ctx->attsDefault;
11549 ctxt->attsSpecial = ctx->attsSpecial;
11550 ctxt->linenumbers = ctx->linenumbers;
11551
11552 xmlParseContent(ctxt);
11553
11554 ctx->validate = ctxt->validate;
11555 ctx->valid = ctxt->valid;
11556 if ((RAW == '<') && (NXT(1) == '/')) {
11557 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
11558 } else if (RAW != 0) {
11559 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
11560 }
11561 if (ctxt->node != newDoc->children) {
11562 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
11563 }
11564
11565 if (!ctxt->wellFormed) {
11566 if (ctxt->errNo == 0)
11567 ret = 1;
11568 else
11569 ret = ctxt->errNo;
11570 } else {
11571 if (lst != NULL) {
11572 xmlNodePtr cur;
11573
11574 /*
11575 * Return the newly created nodeset after unlinking it from
11576 * they pseudo parent.
11577 */
11578 cur = newDoc->children->children;
11579 *lst = cur;
11580 while (cur != NULL) {
11581 cur->parent = NULL;
11582 cur = cur->next;
11583 }
11584 newDoc->children->children = NULL;
11585 }
11586 ret = 0;
11587 }
11588 ctxt->sax = oldsax;
11589 ctxt->dict = NULL;
11590 ctxt->attsDefault = NULL;
11591 ctxt->attsSpecial = NULL;
11592 xmlFreeParserCtxt(ctxt);
11593 newDoc->intSubset = NULL;
11594 newDoc->extSubset = NULL;
11595 xmlFreeDoc(newDoc);
11596
11597 return(ret);
11598}
11599
11600/**
11601 * xmlParseExternalEntityPrivate:
11602 * @doc: the document the chunk pertains to
11603 * @oldctxt: the previous parser context if available
11604 * @sax: the SAX handler bloc (possibly NULL)
11605 * @user_data: The user data returned on SAX callbacks (possibly NULL)
11606 * @depth: Used for loop detection, use 0
11607 * @URL: the URL for the entity to load
11608 * @ID: the System ID for the entity to load
11609 * @list: the return value for the set of parsed nodes
11610 *
11611 * Private version of xmlParseExternalEntity()
11612 *
11613 * Returns 0 if the entity is well formed, -1 in case of args problem and
11614 * the parser error code otherwise
11615 */
11616
11617static xmlParserErrors
11618xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
11619 xmlSAXHandlerPtr sax,
11620 void *user_data, int depth, const xmlChar *URL,
11621 const xmlChar *ID, xmlNodePtr *list) {
11622 xmlParserCtxtPtr ctxt;
11623 xmlDocPtr newDoc;
11624 xmlNodePtr newRoot;
11625 xmlSAXHandlerPtr oldsax = NULL;
11626 xmlParserErrors ret = XML_ERR_OK;
11627 xmlChar start[4];
11628 xmlCharEncoding enc;
11629
11630 if (depth > 40) {
11631 return(XML_ERR_ENTITY_LOOP);
11632 }
11633
11634
11635
11636 if (list != NULL)
11637 *list = NULL;
11638 if ((URL == NULL) && (ID == NULL))
11639 return(XML_ERR_INTERNAL_ERROR);
11640 if (doc == NULL)
11641 return(XML_ERR_INTERNAL_ERROR);
11642
11643
11644 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
11645 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
11646 ctxt->userData = ctxt;
11647 if (oldctxt != NULL) {
11648 ctxt->_private = oldctxt->_private;
11649 ctxt->loadsubset = oldctxt->loadsubset;
11650 ctxt->validate = oldctxt->validate;
11651 ctxt->external = oldctxt->external;
11652 ctxt->record_info = oldctxt->record_info;
11653 ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
11654 ctxt->node_seq.length = oldctxt->node_seq.length;
11655 ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
11656 } else {
11657 /*
11658 * Doing validity checking on chunk without context
11659 * doesn't make sense
11660 */
11661 ctxt->_private = NULL;
11662 ctxt->validate = 0;
11663 ctxt->external = 2;
11664 ctxt->loadsubset = 0;
11665 }
11666 if (sax != NULL) {
11667 oldsax = ctxt->sax;
11668 ctxt->sax = sax;
11669 if (user_data != NULL)
11670 ctxt->userData = user_data;
11671 }
11672 xmlDetectSAX2(ctxt);
11673 newDoc = xmlNewDoc(BAD_CAST "1.0");
11674 if (newDoc == NULL) {
11675 ctxt->node_seq.maximum = 0;
11676 ctxt->node_seq.length = 0;
11677 ctxt->node_seq.buffer = NULL;
11678 xmlFreeParserCtxt(ctxt);
11679 return(XML_ERR_INTERNAL_ERROR);
11680 }
11681 newDoc->intSubset = doc->intSubset;
11682 newDoc->extSubset = doc->extSubset;
11683 newDoc->dict = doc->dict;
11684 xmlDictReference(newDoc->dict);
11685
11686 if (doc->URL != NULL) {
11687 newDoc->URL = xmlStrdup(doc->URL);
11688 }
11689 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
11690 if (newRoot == NULL) {
11691 if (sax != NULL)
11692 ctxt->sax = oldsax;
11693 ctxt->node_seq.maximum = 0;
11694 ctxt->node_seq.length = 0;
11695 ctxt->node_seq.buffer = NULL;
11696 xmlFreeParserCtxt(ctxt);
11697 newDoc->intSubset = NULL;
11698 newDoc->extSubset = NULL;
11699 xmlFreeDoc(newDoc);
11700 return(XML_ERR_INTERNAL_ERROR);
11701 }
11702 xmlAddChild((xmlNodePtr) newDoc, newRoot);
11703 nodePush(ctxt, newDoc->children);
11704 ctxt->myDoc = doc;
11705 newRoot->doc = doc;
11706
11707 /*
11708 * Get the 4 first bytes and decode the charset
11709 * if enc != XML_CHAR_ENCODING_NONE
11710 * plug some encoding conversion routines.
11711 */
11712 GROW;
11713 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
11714 start[0] = RAW;
11715 start[1] = NXT(1);
11716 start[2] = NXT(2);
11717 start[3] = NXT(3);
11718 enc = xmlDetectCharEncoding(start, 4);
11719 if (enc != XML_CHAR_ENCODING_NONE) {
11720 xmlSwitchEncoding(ctxt, enc);
11721 }
11722 }
11723
11724 /*
11725 * Parse a possible text declaration first
11726 */
11727 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
11728 xmlParseTextDecl(ctxt);
11729 }
11730
11731 ctxt->instate = XML_PARSER_CONTENT;
11732 ctxt->depth = depth;
11733
11734 xmlParseContent(ctxt);
11735
11736 if ((RAW == '<') && (NXT(1) == '/')) {
11737 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
11738 } else if (RAW != 0) {
11739 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
11740 }
11741 if (ctxt->node != newDoc->children) {
11742 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
11743 }
11744
11745 if (!ctxt->wellFormed) {
11746 if (ctxt->errNo == 0)
11747 ret = XML_ERR_INTERNAL_ERROR;
11748 else
11749 ret = (xmlParserErrors)ctxt->errNo;
11750 } else {
11751 if (list != NULL) {
11752 xmlNodePtr cur;
11753
11754 /*
11755 * Return the newly created nodeset after unlinking it from
11756 * they pseudo parent.
11757 */
11758 cur = newDoc->children->children;
11759 *list = cur;
11760 while (cur != NULL) {
11761 cur->parent = NULL;
11762 cur = cur->next;
11763 }
11764 newDoc->children->children = NULL;
11765 }
11766 ret = XML_ERR_OK;
11767 }
11768
11769 /*
11770 * Record in the parent context the number of entities replacement
11771 * done when parsing that reference.
11772 */
11773 oldctxt->nbentities += ctxt->nbentities;
11774 /*
11775 * Also record the size of the entity parsed
11776 */
11777 if (ctxt->input != NULL) {
11778 oldctxt->sizeentities += ctxt->input->consumed;
11779 oldctxt->sizeentities += (ctxt->input->cur - ctxt->input->base);
11780 }
11781 /*
11782 * And record the last error if any
11783 */
11784 if (ctxt->lastError.code != XML_ERR_OK)
11785 xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
11786
11787 if (sax != NULL)
11788 ctxt->sax = oldsax;
11789 oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
11790 oldctxt->node_seq.length = ctxt->node_seq.length;
11791 oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
11792 oldctxt->nbentities += ctxt->nbentities;
11793 ctxt->node_seq.maximum = 0;
11794 ctxt->node_seq.length = 0;
11795 ctxt->node_seq.buffer = NULL;
11796 xmlFreeParserCtxt(ctxt);
11797 newDoc->intSubset = NULL;
11798 newDoc->extSubset = NULL;
11799 xmlFreeDoc(newDoc);
11800
11801 return(ret);
11802}
11803
11804#ifdef LIBXML_SAX1_ENABLED
11805/**
11806 * xmlParseExternalEntity:
11807 * @doc: the document the chunk pertains to
11808 * @sax: the SAX handler bloc (possibly NULL)
11809 * @user_data: The user data returned on SAX callbacks (possibly NULL)
11810 * @depth: Used for loop detection, use 0
11811 * @URL: the URL for the entity to load
11812 * @ID: the System ID for the entity to load
11813 * @lst: the return value for the set of parsed nodes
11814 *
11815 * Parse an external general entity
11816 * An external general parsed entity is well-formed if it matches the
11817 * production labeled extParsedEnt.
11818 *
11819 * [78] extParsedEnt ::= TextDecl? content
11820 *
11821 * Returns 0 if the entity is well formed, -1 in case of args problem and
11822 * the parser error code otherwise
11823 */
11824
11825int
11826xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
11827 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
11828 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
11829 ID, lst));
11830}
11831
11832/**
11833 * xmlParseBalancedChunkMemory:
11834 * @doc: the document the chunk pertains to
11835 * @sax: the SAX handler bloc (possibly NULL)
11836 * @user_data: The user data returned on SAX callbacks (possibly NULL)
11837 * @depth: Used for loop detection, use 0
11838 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
11839 * @lst: the return value for the set of parsed nodes
11840 *
11841 * Parse a well-balanced chunk of an XML document
11842 * called by the parser
11843 * The allowed sequence for the Well Balanced Chunk is the one defined by
11844 * the content production in the XML grammar:
11845 *
11846 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
11847 *
11848 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
11849 * the parser error code otherwise
11850 */
11851
11852int
11853xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
11854 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
11855 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
11856 depth, string, lst, 0 );
11857}
11858#endif /* LIBXML_SAX1_ENABLED */
11859
11860/**
11861 * xmlParseBalancedChunkMemoryInternal:
11862 * @oldctxt: the existing parsing context
11863 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
11864 * @user_data: the user data field for the parser context
11865 * @lst: the return value for the set of parsed nodes
11866 *
11867 *
11868 * Parse a well-balanced chunk of an XML document
11869 * called by the parser
11870 * The allowed sequence for the Well Balanced Chunk is the one defined by
11871 * the content production in the XML grammar:
11872 *
11873 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
11874 *
11875 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
11876 * error code otherwise
11877 *
11878 * In case recover is set to 1, the nodelist will not be empty even if
11879 * the parsed chunk is not well balanced.
11880 */
11881static xmlParserErrors
11882xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
11883 const xmlChar *string, void *user_data, xmlNodePtr *lst) {
11884 xmlParserCtxtPtr ctxt;
11885 xmlDocPtr newDoc = NULL;
11886 xmlNodePtr newRoot;
11887 xmlSAXHandlerPtr oldsax = NULL;
11888 xmlNodePtr content = NULL;
11889 xmlNodePtr last = NULL;
11890 int size;
11891 xmlParserErrors ret = XML_ERR_OK;
11892
11893 if (oldctxt->depth > 40) {
11894 return(XML_ERR_ENTITY_LOOP);
11895 }
11896
11897
11898 if (lst != NULL)
11899 *lst = NULL;
11900 if (string == NULL)
11901 return(XML_ERR_INTERNAL_ERROR);
11902
11903 size = xmlStrlen(string);
11904
11905 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
11906 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
11907 if (user_data != NULL)
11908 ctxt->userData = user_data;
11909 else
11910 ctxt->userData = ctxt;
11911 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
11912 ctxt->dict = oldctxt->dict;
11913 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
11914 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
11915 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
11916
11917 oldsax = ctxt->sax;
11918 ctxt->sax = oldctxt->sax;
11919 xmlDetectSAX2(ctxt);
11920 ctxt->replaceEntities = oldctxt->replaceEntities;
11921 ctxt->options = oldctxt->options;
11922
11923 ctxt->_private = oldctxt->_private;
11924 if (oldctxt->myDoc == NULL) {
11925 newDoc = xmlNewDoc(BAD_CAST "1.0");
11926 if (newDoc == NULL) {
11927 ctxt->sax = oldsax;
11928 ctxt->dict = NULL;
11929 xmlFreeParserCtxt(ctxt);
11930 return(XML_ERR_INTERNAL_ERROR);
11931 }
11932 newDoc->dict = ctxt->dict;
11933 xmlDictReference(newDoc->dict);
11934 ctxt->myDoc = newDoc;
11935 } else {
11936 ctxt->myDoc = oldctxt->myDoc;
11937 content = ctxt->myDoc->children;
11938 last = ctxt->myDoc->last;
11939 }
11940 newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL);
11941 if (newRoot == NULL) {
11942 ctxt->sax = oldsax;
11943 ctxt->dict = NULL;
11944 xmlFreeParserCtxt(ctxt);
11945 if (newDoc != NULL) {
11946 xmlFreeDoc(newDoc);
11947 }
11948 return(XML_ERR_INTERNAL_ERROR);
11949 }
11950 ctxt->myDoc->children = NULL;
11951 ctxt->myDoc->last = NULL;
11952 xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot);
11953 nodePush(ctxt, ctxt->myDoc->children);
11954 ctxt->instate = XML_PARSER_CONTENT;
11955 ctxt->depth = oldctxt->depth + 1;
11956
11957 ctxt->validate = 0;
11958 ctxt->loadsubset = oldctxt->loadsubset;
11959 if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
11960 /*
11961 * ID/IDREF registration will be done in xmlValidateElement below
11962 */
11963 ctxt->loadsubset |= XML_SKIP_IDS;
11964 }
11965 ctxt->dictNames = oldctxt->dictNames;
11966 ctxt->attsDefault = oldctxt->attsDefault;
11967 ctxt->attsSpecial = oldctxt->attsSpecial;
11968
11969 xmlParseContent(ctxt);
11970 if ((RAW == '<') && (NXT(1) == '/')) {
11971 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
11972 } else if (RAW != 0) {
11973 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
11974 }
11975 if (ctxt->node != ctxt->myDoc->children) {
11976 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
11977 }
11978
11979 if (!ctxt->wellFormed) {
11980 if (ctxt->errNo == 0)
11981 ret = XML_ERR_INTERNAL_ERROR;
11982 else
11983 ret = (xmlParserErrors)ctxt->errNo;
11984 } else {
11985 ret = XML_ERR_OK;
11986 }
11987
11988 if ((lst != NULL) && (ret == XML_ERR_OK)) {
11989 xmlNodePtr cur;
11990
11991 /*
11992 * Return the newly created nodeset after unlinking it from
11993 * they pseudo parent.
11994 */
11995 cur = ctxt->myDoc->children->children;
11996 *lst = cur;
11997 while (cur != NULL) {
11998#ifdef LIBXML_VALID_ENABLED
11999 if ((oldctxt->validate) && (oldctxt->wellFormed) &&
12000 (oldctxt->myDoc) && (oldctxt->myDoc->intSubset) &&
12001 (cur->type == XML_ELEMENT_NODE)) {
12002 oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
12003 oldctxt->myDoc, cur);
12004 }
12005#endif /* LIBXML_VALID_ENABLED */
12006 cur->parent = NULL;
12007 cur = cur->next;
12008 }
12009 ctxt->myDoc->children->children = NULL;
12010 }
12011 if (ctxt->myDoc != NULL) {
12012 xmlFreeNode(ctxt->myDoc->children);
12013 ctxt->myDoc->children = content;
12014 ctxt->myDoc->last = last;
12015 }
12016
12017 /*
12018 * Record in the parent context the number of entities replacement
12019 * done when parsing that reference.
12020 */
12021 oldctxt->nbentities += ctxt->nbentities;
12022 /*
12023 * Also record the last error if any
12024 */
12025 if (ctxt->lastError.code != XML_ERR_OK)
12026 xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
12027
12028 ctxt->sax = oldsax;
12029 ctxt->dict = NULL;
12030 ctxt->attsDefault = NULL;
12031 ctxt->attsSpecial = NULL;
12032 xmlFreeParserCtxt(ctxt);
12033 if (newDoc != NULL) {
12034 xmlFreeDoc(newDoc);
12035 }
12036
12037 return(ret);
12038}
12039
12040/**
12041 * xmlParseInNodeContext:
12042 * @node: the context node
12043 * @data: the input string
12044 * @datalen: the input string length in bytes
12045 * @options: a combination of xmlParserOption
12046 * @lst: the return value for the set of parsed nodes
12047 *
12048 * Parse a well-balanced chunk of an XML document
12049 * within the context (DTD, namespaces, etc ...) of the given node.
12050 *
12051 * The allowed sequence for the data is a Well Balanced Chunk defined by
12052 * the content production in the XML grammar:
12053 *
12054 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12055 *
12056 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
12057 * error code otherwise
12058 */
12059xmlParserErrors
12060xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
12061 int options, xmlNodePtr *lst) {
12062#ifdef SAX2
12063 xmlParserCtxtPtr ctxt;
12064 xmlDocPtr doc = NULL;
12065 xmlNodePtr fake, cur;
12066 int nsnr = 0;
12067
12068 xmlParserErrors ret = XML_ERR_OK;
12069
12070 /*
12071 * check all input parameters, grab the document
12072 */
12073 if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
12074 return(XML_ERR_INTERNAL_ERROR);
12075 switch (node->type) {
12076 case XML_ELEMENT_NODE:
12077 case XML_ATTRIBUTE_NODE:
12078 case XML_TEXT_NODE:
12079 case XML_CDATA_SECTION_NODE:
12080 case XML_ENTITY_REF_NODE:
12081 case XML_PI_NODE:
12082 case XML_COMMENT_NODE:
12083 case XML_DOCUMENT_NODE:
12084 case XML_HTML_DOCUMENT_NODE:
12085 break;
12086 default:
12087 return(XML_ERR_INTERNAL_ERROR);
12088
12089 }
12090 while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
12091 (node->type != XML_DOCUMENT_NODE) &&
12092 (node->type != XML_HTML_DOCUMENT_NODE))
12093 node = node->parent;
12094 if (node == NULL)
12095 return(XML_ERR_INTERNAL_ERROR);
12096 if (node->type == XML_ELEMENT_NODE)
12097 doc = node->doc;
12098 else
12099 doc = (xmlDocPtr) node;
12100 if (doc == NULL)
12101 return(XML_ERR_INTERNAL_ERROR);
12102
12103 /*
12104 * allocate a context and set-up everything not related to the
12105 * node position in the tree
12106 */
12107 if (doc->type == XML_DOCUMENT_NODE)
12108 ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
12109#ifdef LIBXML_HTML_ENABLED
12110 else if (doc->type == XML_HTML_DOCUMENT_NODE)
12111 ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
12112#endif
12113 else
12114 return(XML_ERR_INTERNAL_ERROR);
12115
12116 if (ctxt == NULL)
12117 return(XML_ERR_NO_MEMORY);
12118 fake = xmlNewComment(NULL);
12119 if (fake == NULL) {
12120 xmlFreeParserCtxt(ctxt);
12121 return(XML_ERR_NO_MEMORY);
12122 }
12123 xmlAddChild(node, fake);
12124
12125 /*
12126 * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
12127 * We need a dictionary for xmlDetectSAX2, so if there's no doc dict
12128 * we must wait until the last moment to free the original one.
12129 */
12130 if (doc->dict != NULL) {
12131 if (ctxt->dict != NULL)
12132 xmlDictFree(ctxt->dict);
12133 ctxt->dict = doc->dict;
12134 } else
12135 options |= XML_PARSE_NODICT;
12136
12137 xmlCtxtUseOptions(ctxt, options);
12138 xmlDetectSAX2(ctxt);
12139 ctxt->myDoc = doc;
12140
12141 if (node->type == XML_ELEMENT_NODE) {
12142 nodePush(ctxt, node);
12143 /*
12144 * initialize the SAX2 namespaces stack
12145 */
12146 cur = node;
12147 while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
12148 xmlNsPtr ns = cur->nsDef;
12149 const xmlChar *iprefix, *ihref;
12150
12151 while (ns != NULL) {
12152 if (ctxt->dict) {
12153 iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1);
12154 ihref = xmlDictLookup(ctxt->dict, ns->href, -1);
12155 } else {
12156 iprefix = ns->prefix;
12157 ihref = ns->href;
12158 }
12159
12160 if (xmlGetNamespace(ctxt, iprefix) == NULL) {
12161 nsPush(ctxt, iprefix, ihref);
12162 nsnr++;
12163 }
12164 ns = ns->next;
12165 }
12166 cur = cur->parent;
12167 }
12168 ctxt->instate = XML_PARSER_CONTENT;
12169 }
12170
12171 if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
12172 /*
12173 * ID/IDREF registration will be done in xmlValidateElement below
12174 */
12175 ctxt->loadsubset |= XML_SKIP_IDS;
12176 }
12177
12178#ifdef LIBXML_HTML_ENABLED
12179 if (doc->type == XML_HTML_DOCUMENT_NODE)
12180 __htmlParseContent(ctxt);
12181 else
12182#endif
12183 xmlParseContent(ctxt);
12184
12185 nsPop(ctxt, nsnr);
12186 if ((RAW == '<') && (NXT(1) == '/')) {
12187 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12188 } else if (RAW != 0) {
12189 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
12190 }
12191 if ((ctxt->node != NULL) && (ctxt->node != node)) {
12192 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12193 ctxt->wellFormed = 0;
12194 }
12195
12196 if (!ctxt->wellFormed) {
12197 if (ctxt->errNo == 0)
12198 ret = XML_ERR_INTERNAL_ERROR;
12199 else
12200 ret = (xmlParserErrors)ctxt->errNo;
12201 } else {
12202 ret = XML_ERR_OK;
12203 }
12204
12205 /*
12206 * Return the newly created nodeset after unlinking it from
12207 * the pseudo sibling.
12208 */
12209
12210 cur = fake->next;
12211 fake->next = NULL;
12212 node->last = fake;
12213
12214 if (cur != NULL) {
12215 cur->prev = NULL;
12216 }
12217
12218 *lst = cur;
12219
12220 while (cur != NULL) {
12221 cur->parent = NULL;
12222 cur = cur->next;
12223 }
12224
12225 xmlUnlinkNode(fake);
12226 xmlFreeNode(fake);
12227
12228
12229 if (ret != XML_ERR_OK) {
12230 xmlFreeNodeList(*lst);
12231 *lst = NULL;
12232 }
12233
12234 if (doc->dict != NULL)
12235 ctxt->dict = NULL;
12236 xmlFreeParserCtxt(ctxt);
12237
12238 return(ret);
12239#else /* !SAX2 */
12240 return(XML_ERR_INTERNAL_ERROR);
12241#endif
12242}
12243
12244#ifdef LIBXML_SAX1_ENABLED
12245/**
12246 * xmlParseBalancedChunkMemoryRecover:
12247 * @doc: the document the chunk pertains to
12248 * @sax: the SAX handler bloc (possibly NULL)
12249 * @user_data: The user data returned on SAX callbacks (possibly NULL)
12250 * @depth: Used for loop detection, use 0
12251 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
12252 * @lst: the return value for the set of parsed nodes
12253 * @recover: return nodes even if the data is broken (use 0)
12254 *
12255 *
12256 * Parse a well-balanced chunk of an XML document
12257 * called by the parser
12258 * The allowed sequence for the Well Balanced Chunk is the one defined by
12259 * the content production in the XML grammar:
12260 *
12261 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12262 *
12263 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
12264 * the parser error code otherwise
12265 *
12266 * In case recover is set to 1, the nodelist will not be empty even if
12267 * the parsed chunk is not well balanced.
12268 */
12269int
12270xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
12271 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
12272 int recover) {
12273 xmlParserCtxtPtr ctxt;
12274 xmlDocPtr newDoc;
12275 xmlSAXHandlerPtr oldsax = NULL;
12276 xmlNodePtr content, newRoot;
12277 int size;
12278 int ret = 0;
12279
12280 if (depth > 40) {
12281 return(XML_ERR_ENTITY_LOOP);
12282 }
12283
12284
12285 if (lst != NULL)
12286 *lst = NULL;
12287 if (string == NULL)
12288 return(-1);
12289
12290 size = xmlStrlen(string);
12291
12292 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
12293 if (ctxt == NULL) return(-1);
12294 ctxt->userData = ctxt;
12295 if (sax != NULL) {
12296 oldsax = ctxt->sax;
12297 ctxt->sax = sax;
12298 if (user_data != NULL)
12299 ctxt->userData = user_data;
12300 }
12301 newDoc = xmlNewDoc(BAD_CAST "1.0");
12302 if (newDoc == NULL) {
12303 xmlFreeParserCtxt(ctxt);
12304 return(-1);
12305 }
12306 if ((doc != NULL) && (doc->dict != NULL)) {
12307 xmlDictFree(ctxt->dict);
12308 ctxt->dict = doc->dict;
12309 xmlDictReference(ctxt->dict);
12310 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
12311 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
12312 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
12313 ctxt->dictNames = 1;
12314 } else {
12315 xmlCtxtUseOptions(ctxt, XML_PARSE_NODICT);
12316 }
12317 if (doc != NULL) {
12318 newDoc->intSubset = doc->intSubset;
12319 newDoc->extSubset = doc->extSubset;
12320 }
12321 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
12322 if (newRoot == NULL) {
12323 if (sax != NULL)
12324 ctxt->sax = oldsax;
12325 xmlFreeParserCtxt(ctxt);
12326 newDoc->intSubset = NULL;
12327 newDoc->extSubset = NULL;
12328 xmlFreeDoc(newDoc);
12329 return(-1);
12330 }
12331 xmlAddChild((xmlNodePtr) newDoc, newRoot);
12332 nodePush(ctxt, newRoot);
12333 if (doc == NULL) {
12334 ctxt->myDoc = newDoc;
12335 } else {
12336 ctxt->myDoc = newDoc;
12337 newDoc->children->doc = doc;
12338 /* Ensure that doc has XML spec namespace */
12339 xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE);
12340 newDoc->oldNs = doc->oldNs;
12341 }
12342 ctxt->instate = XML_PARSER_CONTENT;
12343 ctxt->depth = depth;
12344
12345 /*
12346 * Doing validity checking on chunk doesn't make sense
12347 */
12348 ctxt->validate = 0;
12349 ctxt->loadsubset = 0;
12350 xmlDetectSAX2(ctxt);
12351
12352 if ( doc != NULL ){
12353 content = doc->children;
12354 doc->children = NULL;
12355 xmlParseContent(ctxt);
12356 doc->children = content;
12357 }
12358 else {
12359 xmlParseContent(ctxt);
12360 }
12361 if ((RAW == '<') && (NXT(1) == '/')) {
12362 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12363 } else if (RAW != 0) {
12364 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
12365 }
12366 if (ctxt->node != newDoc->children) {
12367 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12368 }
12369
12370 if (!ctxt->wellFormed) {
12371 if (ctxt->errNo == 0)
12372 ret = 1;
12373 else
12374 ret = ctxt->errNo;
12375 } else {
12376 ret = 0;
12377 }
12378
12379 if ((lst != NULL) && ((ret == 0) || (recover == 1))) {
12380 xmlNodePtr cur;
12381
12382 /*
12383 * Return the newly created nodeset after unlinking it from
12384 * they pseudo parent.
12385 */
12386 cur = newDoc->children->children;
12387 *lst = cur;
12388 while (cur != NULL) {
12389 xmlSetTreeDoc(cur, doc);
12390 cur->parent = NULL;
12391 cur = cur->next;
12392 }
12393 newDoc->children->children = NULL;
12394 }
12395
12396 if (sax != NULL)
12397 ctxt->sax = oldsax;
12398 xmlFreeParserCtxt(ctxt);
12399 newDoc->intSubset = NULL;
12400 newDoc->extSubset = NULL;
12401 newDoc->oldNs = NULL;
12402 xmlFreeDoc(newDoc);
12403
12404 return(ret);
12405}
12406
12407/**
12408 * xmlSAXParseEntity:
12409 * @sax: the SAX handler block
12410 * @filename: the filename
12411 *
12412 * parse an XML external entity out of context and build a tree.
12413 * It use the given SAX function block to handle the parsing callback.
12414 * If sax is NULL, fallback to the default DOM tree building routines.
12415 *
12416 * [78] extParsedEnt ::= TextDecl? content
12417 *
12418 * This correspond to a "Well Balanced" chunk
12419 *
12420 * Returns the resulting document tree
12421 */
12422
12423xmlDocPtr
12424xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
12425 xmlDocPtr ret;
12426 xmlParserCtxtPtr ctxt;
12427
12428 ctxt = xmlCreateFileParserCtxt(filename);
12429 if (ctxt == NULL) {
12430 return(NULL);
12431 }
12432 if (sax != NULL) {
12433 if (ctxt->sax != NULL)
12434 xmlFree(ctxt->sax);
12435 ctxt->sax = sax;
12436 ctxt->userData = NULL;
12437 }
12438
12439 xmlParseExtParsedEnt(ctxt);
12440
12441 if (ctxt->wellFormed)
12442 ret = ctxt->myDoc;
12443 else {
12444 ret = NULL;
12445 xmlFreeDoc(ctxt->myDoc);
12446 ctxt->myDoc = NULL;
12447 }
12448 if (sax != NULL)
12449 ctxt->sax = NULL;
12450 xmlFreeParserCtxt(ctxt);
12451
12452 return(ret);
12453}
12454
12455/**
12456 * xmlParseEntity:
12457 * @filename: the filename
12458 *
12459 * parse an XML external entity out of context and build a tree.
12460 *
12461 * [78] extParsedEnt ::= TextDecl? content
12462 *
12463 * This correspond to a "Well Balanced" chunk
12464 *
12465 * Returns the resulting document tree
12466 */
12467
12468xmlDocPtr
12469xmlParseEntity(const char *filename) {
12470 return(xmlSAXParseEntity(NULL, filename));
12471}
12472#endif /* LIBXML_SAX1_ENABLED */
12473
12474/**
12475 * xmlCreateEntityParserCtxt:
12476 * @URL: the entity URL
12477 * @ID: the entity PUBLIC ID
12478 * @base: a possible base for the target URI
12479 *
12480 * Create a parser context for an external entity
12481 * Automatic support for ZLIB/Compress compressed document is provided
12482 * by default if found at compile-time.
12483 *
12484 * Returns the new parser context or NULL
12485 */
12486xmlParserCtxtPtr
12487xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
12488 const xmlChar *base) {
12489 xmlParserCtxtPtr ctxt;
12490 xmlParserInputPtr inputStream;
12491 char *directory = NULL;
12492 xmlChar *uri;
12493
12494 ctxt = xmlNewParserCtxt();
12495 if (ctxt == NULL) {
12496 return(NULL);
12497 }
12498
12499 uri = xmlBuildURI(URL, base);
12500
12501 if (uri == NULL) {
12502 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
12503 if (inputStream == NULL) {
12504 xmlFreeParserCtxt(ctxt);
12505 return(NULL);
12506 }
12507
12508 inputPush(ctxt, inputStream);
12509
12510 if ((ctxt->directory == NULL) && (directory == NULL))
12511 directory = xmlParserGetDirectory((char *)URL);
12512 if ((ctxt->directory == NULL) && (directory != NULL))
12513 ctxt->directory = directory;
12514 } else {
12515 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
12516 if (inputStream == NULL) {
12517 xmlFree(uri);
12518 xmlFreeParserCtxt(ctxt);
12519 return(NULL);
12520 }
12521
12522 inputPush(ctxt, inputStream);
12523
12524 if ((ctxt->directory == NULL) && (directory == NULL))
12525 directory = xmlParserGetDirectory((char *)uri);
12526 if ((ctxt->directory == NULL) && (directory != NULL))
12527 ctxt->directory = directory;
12528 xmlFree(uri);
12529 }
12530 return(ctxt);
12531}
12532
12533/************************************************************************
12534 * *
12535 * Front ends when parsing from a file *
12536 * *
12537 ************************************************************************/
12538
12539/**
12540 * xmlCreateURLParserCtxt:
12541 * @filename: the filename or URL
12542 * @options: a combination of xmlParserOption
12543 *
12544 * Create a parser context for a file or URL content.
12545 * Automatic support for ZLIB/Compress compressed document is provided
12546 * by default if found at compile-time and for file accesses
12547 *
12548 * Returns the new parser context or NULL
12549 */
12550xmlParserCtxtPtr
12551xmlCreateURLParserCtxt(const char *filename, int options)
12552{
12553 xmlParserCtxtPtr ctxt;
12554 xmlParserInputPtr inputStream;
12555 char *directory = NULL;
12556
12557 ctxt = xmlNewParserCtxt();
12558 if (ctxt == NULL) {
12559 xmlErrMemory(NULL, "cannot allocate parser context");
12560 return(NULL);
12561 }
12562
12563 if (options)
12564 xmlCtxtUseOptions(ctxt, options);
12565 ctxt->linenumbers = 1;
12566
12567 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
12568 if (inputStream == NULL) {
12569 xmlFreeParserCtxt(ctxt);
12570 return(NULL);
12571 }
12572
12573 inputPush(ctxt, inputStream);
12574 if ((ctxt->directory == NULL) && (directory == NULL))
12575 directory = xmlParserGetDirectory(filename);
12576 if ((ctxt->directory == NULL) && (directory != NULL))
12577 ctxt->directory = directory;
12578
12579 return(ctxt);
12580}
12581
12582/**
12583 * xmlCreateFileParserCtxt:
12584 * @filename: the filename
12585 *
12586 * Create a parser context for a file content.
12587 * Automatic support for ZLIB/Compress compressed document is provided
12588 * by default if found at compile-time.
12589 *
12590 * Returns the new parser context or NULL
12591 */
12592xmlParserCtxtPtr
12593xmlCreateFileParserCtxt(const char *filename)
12594{
12595 return(xmlCreateURLParserCtxt(filename, 0));
12596}
12597
12598#ifdef LIBXML_SAX1_ENABLED
12599/**
12600 * xmlSAXParseFileWithData:
12601 * @sax: the SAX handler block
12602 * @filename: the filename
12603 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
12604 * documents
12605 * @data: the userdata
12606 *
12607 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12608 * compressed document is provided by default if found at compile-time.
12609 * It use the given SAX function block to handle the parsing callback.
12610 * If sax is NULL, fallback to the default DOM tree building routines.
12611 *
12612 * User data (void *) is stored within the parser context in the
12613 * context's _private member, so it is available nearly everywhere in libxml
12614 *
12615 * Returns the resulting document tree
12616 */
12617
12618xmlDocPtr
12619xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
12620 int recovery, void *data) {
12621 xmlDocPtr ret;
12622 xmlParserCtxtPtr ctxt;
12623 char *directory = NULL;
12624
12625 xmlInitParser();
12626
12627 ctxt = xmlCreateFileParserCtxt(filename);
12628 if (ctxt == NULL) {
12629 return(NULL);
12630 }
12631 if (sax != NULL) {
12632 if (ctxt->sax != NULL)
12633 xmlFree(ctxt->sax);
12634 ctxt->sax = sax;
12635 }
12636 xmlDetectSAX2(ctxt);
12637 if (data!=NULL) {
12638 ctxt->_private = data;
12639 }
12640
12641 if ((ctxt->directory == NULL) && (directory == NULL))
12642 directory = xmlParserGetDirectory(filename);
12643 if ((ctxt->directory == NULL) && (directory != NULL))
12644 ctxt->directory = (char *) xmlStrdup((xmlChar *) directory);
12645
12646 ctxt->recovery = recovery;
12647
12648 xmlParseDocument(ctxt);
12649
12650 if ((ctxt->wellFormed) || recovery) {
12651 ret = ctxt->myDoc;
12652 if (ret != NULL) {
12653 if (ctxt->input->buf->compressed > 0)
12654 ret->compression = 9;
12655 else
12656 ret->compression = ctxt->input->buf->compressed;
12657 }
12658 }
12659 else {
12660 ret = NULL;
12661 xmlFreeDoc(ctxt->myDoc);
12662 ctxt->myDoc = NULL;
12663 }
12664 if (sax != NULL)
12665 ctxt->sax = NULL;
12666 xmlFreeParserCtxt(ctxt);
12667
12668 return(ret);
12669}
12670
12671/**
12672 * xmlSAXParseFile:
12673 * @sax: the SAX handler block
12674 * @filename: the filename
12675 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
12676 * documents
12677 *
12678 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12679 * compressed document is provided by default if found at compile-time.
12680 * It use the given SAX function block to handle the parsing callback.
12681 * If sax is NULL, fallback to the default DOM tree building routines.
12682 *
12683 * Returns the resulting document tree
12684 */
12685
12686xmlDocPtr
12687xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
12688 int recovery) {
12689 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
12690}
12691
12692/**
12693 * xmlRecoverDoc:
12694 * @cur: a pointer to an array of xmlChar
12695 *
12696 * parse an XML in-memory document and build a tree.
12697 * In the case the document is not Well Formed, a tree is built anyway
12698 *
12699 * Returns the resulting document tree
12700 */
12701
12702xmlDocPtr
12703xmlRecoverDoc(xmlChar *cur) {
12704 return(xmlSAXParseDoc(NULL, cur, 1));
12705}
12706
12707/**
12708 * xmlParseFile:
12709 * @filename: the filename
12710 *
12711 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12712 * compressed document is provided by default if found at compile-time.
12713 *
12714 * Returns the resulting document tree if the file was wellformed,
12715 * NULL otherwise.
12716 */
12717
12718xmlDocPtr
12719xmlParseFile(const char *filename) {
12720 return(xmlSAXParseFile(NULL, filename, 0));
12721}
12722
12723/**
12724 * xmlRecoverFile:
12725 * @filename: the filename
12726 *
12727 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12728 * compressed document is provided by default if found at compile-time.
12729 * In the case the document is not Well Formed, a tree is built anyway
12730 *
12731 * Returns the resulting document tree
12732 */
12733
12734xmlDocPtr
12735xmlRecoverFile(const char *filename) {
12736 return(xmlSAXParseFile(NULL, filename, 1));
12737}
12738
12739
12740/**
12741 * xmlSetupParserForBuffer:
12742 * @ctxt: an XML parser context
12743 * @buffer: a xmlChar * buffer
12744 * @filename: a file name
12745 *
12746 * Setup the parser context to parse a new buffer; Clears any prior
12747 * contents from the parser context. The buffer parameter must not be
12748 * NULL, but the filename parameter can be
12749 */
12750void
12751xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
12752 const char* filename)
12753{
12754 xmlParserInputPtr input;
12755
12756 if ((ctxt == NULL) || (buffer == NULL))
12757 return;
12758
12759 input = xmlNewInputStream(ctxt);
12760 if (input == NULL) {
12761 xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
12762 xmlClearParserCtxt(ctxt);
12763 return;
12764 }
12765
12766 xmlClearParserCtxt(ctxt);
12767 if (filename != NULL)
12768 input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
12769 input->base = buffer;
12770 input->cur = buffer;
12771 input->end = &buffer[xmlStrlen(buffer)];
12772 inputPush(ctxt, input);
12773}
12774
12775/**
12776 * xmlSAXUserParseFile:
12777 * @sax: a SAX handler
12778 * @user_data: The user data returned on SAX callbacks
12779 * @filename: a file name
12780 *
12781 * parse an XML file and call the given SAX handler routines.
12782 * Automatic support for ZLIB/Compress compressed document is provided
12783 *
12784 * Returns 0 in case of success or a error number otherwise
12785 */
12786int
12787xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
12788 const char *filename) {
12789 int ret = 0;
12790 xmlParserCtxtPtr ctxt;
12791
12792 ctxt = xmlCreateFileParserCtxt(filename);
12793 if (ctxt == NULL) return -1;
12794 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
12795 xmlFree(ctxt->sax);
12796 ctxt->sax = sax;
12797 xmlDetectSAX2(ctxt);
12798
12799 if (user_data != NULL)
12800 ctxt->userData = user_data;
12801
12802 xmlParseDocument(ctxt);
12803
12804 if (ctxt->wellFormed)
12805 ret = 0;
12806 else {
12807 if (ctxt->errNo != 0)
12808 ret = ctxt->errNo;
12809 else
12810 ret = -1;
12811 }
12812 if (sax != NULL)
12813 ctxt->sax = NULL;
12814 if (ctxt->myDoc != NULL) {
12815 xmlFreeDoc(ctxt->myDoc);
12816 ctxt->myDoc = NULL;
12817 }
12818 xmlFreeParserCtxt(ctxt);
12819
12820 return ret;
12821}
12822#endif /* LIBXML_SAX1_ENABLED */
12823
12824/************************************************************************
12825 * *
12826 * Front ends when parsing from memory *
12827 * *
12828 ************************************************************************/
12829
12830/**
12831 * xmlCreateMemoryParserCtxt:
12832 * @buffer: a pointer to a char array
12833 * @size: the size of the array
12834 *
12835 * Create a parser context for an XML in-memory document.
12836 *
12837 * Returns the new parser context or NULL
12838 */
12839xmlParserCtxtPtr
12840xmlCreateMemoryParserCtxt(const char *buffer, int size) {
12841 xmlParserCtxtPtr ctxt;
12842 xmlParserInputPtr input;
12843 xmlParserInputBufferPtr buf;
12844
12845 if (buffer == NULL)
12846 return(NULL);
12847 if (size <= 0)
12848 return(NULL);
12849
12850 ctxt = xmlNewParserCtxt();
12851 if (ctxt == NULL)
12852 return(NULL);
12853
12854 /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */
12855 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
12856 if (buf == NULL) {
12857 xmlFreeParserCtxt(ctxt);
12858 return(NULL);
12859 }
12860
12861 input = xmlNewInputStream(ctxt);
12862 if (input == NULL) {
12863 xmlFreeParserInputBuffer(buf);
12864 xmlFreeParserCtxt(ctxt);
12865 return(NULL);
12866 }
12867
12868 input->filename = NULL;
12869 input->buf = buf;
12870 input->base = input->buf->buffer->content;
12871 input->cur = input->buf->buffer->content;
12872 input->end = &input->buf->buffer->content[input->buf->buffer->use];
12873
12874 inputPush(ctxt, input);
12875 return(ctxt);
12876}
12877
12878#ifdef LIBXML_SAX1_ENABLED
12879/**
12880 * xmlSAXParseMemoryWithData:
12881 * @sax: the SAX handler block
12882 * @buffer: an pointer to a char array
12883 * @size: the size of the array
12884 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
12885 * documents
12886 * @data: the userdata
12887 *
12888 * parse an XML in-memory block and use the given SAX function block
12889 * to handle the parsing callback. If sax is NULL, fallback to the default
12890 * DOM tree building routines.
12891 *
12892 * User data (void *) is stored within the parser context in the
12893 * context's _private member, so it is available nearly everywhere in libxml
12894 *
12895 * Returns the resulting document tree
12896 */
12897
12898xmlDocPtr
12899xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
12900 int size, int recovery, void *data) {
12901 xmlDocPtr ret;
12902 xmlParserCtxtPtr ctxt;
12903
12904 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
12905 if (ctxt == NULL) return(NULL);
12906 if (sax != NULL) {
12907 if (ctxt->sax != NULL)
12908 xmlFree(ctxt->sax);
12909 ctxt->sax = sax;
12910 }
12911 xmlDetectSAX2(ctxt);
12912 if (data!=NULL) {
12913 ctxt->_private=data;
12914 }
12915
12916 ctxt->recovery = recovery;
12917
12918 xmlParseDocument(ctxt);
12919
12920 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
12921 else {
12922 ret = NULL;
12923 xmlFreeDoc(ctxt->myDoc);
12924 ctxt->myDoc = NULL;
12925 }
12926 if (sax != NULL)
12927 ctxt->sax = NULL;
12928 xmlFreeParserCtxt(ctxt);
12929
12930 return(ret);
12931}
12932
12933/**
12934 * xmlSAXParseMemory:
12935 * @sax: the SAX handler block
12936 * @buffer: an pointer to a char array
12937 * @size: the size of the array
12938 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
12939 * documents
12940 *
12941 * parse an XML in-memory block and use the given SAX function block
12942 * to handle the parsing callback. If sax is NULL, fallback to the default
12943 * DOM tree building routines.
12944 *
12945 * Returns the resulting document tree
12946 */
12947xmlDocPtr
12948xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
12949 int size, int recovery) {
12950 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
12951}
12952
12953/**
12954 * xmlParseMemory:
12955 * @buffer: an pointer to a char array
12956 * @size: the size of the array
12957 *
12958 * parse an XML in-memory block and build a tree.
12959 *
12960 * Returns the resulting document tree
12961 */
12962
12963xmlDocPtr xmlParseMemory(const char *buffer, int size) {
12964 return(xmlSAXParseMemory(NULL, buffer, size, 0));
12965}
12966
12967/**
12968 * xmlRecoverMemory:
12969 * @buffer: an pointer to a char array
12970 * @size: the size of the array
12971 *
12972 * parse an XML in-memory block and build a tree.
12973 * In the case the document is not Well Formed, a tree is built anyway
12974 *
12975 * Returns the resulting document tree
12976 */
12977
12978xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
12979 return(xmlSAXParseMemory(NULL, buffer, size, 1));
12980}
12981
12982/**
12983 * xmlSAXUserParseMemory:
12984 * @sax: a SAX handler
12985 * @user_data: The user data returned on SAX callbacks
12986 * @buffer: an in-memory XML document input
12987 * @size: the length of the XML document in bytes
12988 *
12989 * A better SAX parsing routine.
12990 * parse an XML in-memory buffer and call the given SAX handler routines.
12991 *
12992 * Returns 0 in case of success or a error number otherwise
12993 */
12994int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
12995 const char *buffer, int size) {
12996 int ret = 0;
12997 xmlParserCtxtPtr ctxt;
12998
12999 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
13000 if (ctxt == NULL) return -1;
13001 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
13002 xmlFree(ctxt->sax);
13003 ctxt->sax = sax;
13004 xmlDetectSAX2(ctxt);
13005
13006 if (user_data != NULL)
13007 ctxt->userData = user_data;
13008
13009 xmlParseDocument(ctxt);
13010
13011 if (ctxt->wellFormed)
13012 ret = 0;
13013 else {
13014 if (ctxt->errNo != 0)
13015 ret = ctxt->errNo;
13016 else
13017 ret = -1;
13018 }
13019 if (sax != NULL)
13020 ctxt->sax = NULL;
13021 if (ctxt->myDoc != NULL) {
13022 xmlFreeDoc(ctxt->myDoc);
13023 ctxt->myDoc = NULL;
13024 }
13025 xmlFreeParserCtxt(ctxt);
13026
13027 return ret;
13028}
13029#endif /* LIBXML_SAX1_ENABLED */
13030
13031/**
13032 * xmlCreateDocParserCtxt:
13033 * @cur: a pointer to an array of xmlChar
13034 *
13035 * Creates a parser context for an XML in-memory document.
13036 *
13037 * Returns the new parser context or NULL
13038 */
13039xmlParserCtxtPtr
13040xmlCreateDocParserCtxt(const xmlChar *cur) {
13041 int len;
13042
13043 if (cur == NULL)
13044 return(NULL);
13045 len = xmlStrlen(cur);
13046 return(xmlCreateMemoryParserCtxt((const char *)cur, len));
13047}
13048
13049#ifdef LIBXML_SAX1_ENABLED
13050/**
13051 * xmlSAXParseDoc:
13052 * @sax: the SAX handler block
13053 * @cur: a pointer to an array of xmlChar
13054 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
13055 * documents
13056 *
13057 * parse an XML in-memory document and build a tree.
13058 * It use the given SAX function block to handle the parsing callback.
13059 * If sax is NULL, fallback to the default DOM tree building routines.
13060 *
13061 * Returns the resulting document tree
13062 */
13063
13064xmlDocPtr
13065xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
13066 xmlDocPtr ret;
13067 xmlParserCtxtPtr ctxt;
13068 xmlSAXHandlerPtr oldsax = NULL;
13069
13070 if (cur == NULL) return(NULL);
13071
13072
13073 ctxt = xmlCreateDocParserCtxt(cur);
13074 if (ctxt == NULL) return(NULL);
13075 if (sax != NULL) {
13076 oldsax = ctxt->sax;
13077 ctxt->sax = sax;
13078 ctxt->userData = NULL;
13079 }
13080 xmlDetectSAX2(ctxt);
13081
13082 xmlParseDocument(ctxt);
13083 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
13084 else {
13085 ret = NULL;
13086 xmlFreeDoc(ctxt->myDoc);
13087 ctxt->myDoc = NULL;
13088 }
13089 if (sax != NULL)
13090 ctxt->sax = oldsax;
13091 xmlFreeParserCtxt(ctxt);
13092
13093 return(ret);
13094}
13095
13096/**
13097 * xmlParseDoc:
13098 * @cur: a pointer to an array of xmlChar
13099 *
13100 * parse an XML in-memory document and build a tree.
13101 *
13102 * Returns the resulting document tree
13103 */
13104
13105xmlDocPtr
13106xmlParseDoc(const xmlChar *cur) {
13107 return(xmlSAXParseDoc(NULL, cur, 0));
13108}
13109#endif /* LIBXML_SAX1_ENABLED */
13110
13111#ifdef LIBXML_LEGACY_ENABLED
13112/************************************************************************
13113 * *
13114 * Specific function to keep track of entities references *
13115 * and used by the XSLT debugger *
13116 * *
13117 ************************************************************************/
13118
13119static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
13120
13121/**
13122 * xmlAddEntityReference:
13123 * @ent : A valid entity
13124 * @firstNode : A valid first node for children of entity
13125 * @lastNode : A valid last node of children entity
13126 *
13127 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
13128 */
13129static void
13130xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
13131 xmlNodePtr lastNode)
13132{
13133 if (xmlEntityRefFunc != NULL) {
13134 (*xmlEntityRefFunc) (ent, firstNode, lastNode);
13135 }
13136}
13137
13138
13139/**
13140 * xmlSetEntityReferenceFunc:
13141 * @func: A valid function
13142 *
13143 * Set the function to call call back when a xml reference has been made
13144 */
13145void
13146xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
13147{
13148 xmlEntityRefFunc = func;
13149}
13150#endif /* LIBXML_LEGACY_ENABLED */
13151
13152/************************************************************************
13153 * *
13154 * Miscellaneous *
13155 * *
13156 ************************************************************************/
13157
13158#ifdef LIBXML_XPATH_ENABLED
13159#include <libxml/xpath.h>
13160#endif
13161
13162extern void XMLCDECL xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
13163static int xmlParserInitialized = 0;
13164
13165/**
13166 * xmlInitParser:
13167 *
13168 * Initialization function for the XML parser.
13169 * This is not reentrant. Call once before processing in case of
13170 * use in multithreaded programs.
13171 */
13172
13173void
13174xmlInitParser(void) {
13175 if (xmlParserInitialized != 0)
13176 return;
13177
13178#ifdef LIBXML_THREAD_ENABLED
13179 __xmlGlobalInitMutexLock();
13180 if (xmlParserInitialized == 0) {
13181#endif
13182 if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
13183 (xmlGenericError == NULL))
13184 initGenericErrorDefaultFunc(NULL);
13185 xmlInitGlobals();
13186 xmlInitThreads();
13187 xmlInitMemory();
13188 xmlInitCharEncodingHandlers();
13189 xmlDefaultSAXHandlerInit();
13190 xmlRegisterDefaultInputCallbacks();
13191#ifdef LIBXML_OUTPUT_ENABLED
13192 xmlRegisterDefaultOutputCallbacks();
13193#endif /* LIBXML_OUTPUT_ENABLED */
13194#ifdef LIBXML_HTML_ENABLED
13195 htmlInitAutoClose();
13196 htmlDefaultSAXHandlerInit();
13197#endif
13198#ifdef LIBXML_XPATH_ENABLED
13199 xmlXPathInit();
13200#endif
13201 xmlParserInitialized = 1;
13202#ifdef LIBXML_THREAD_ENABLED
13203 }
13204 __xmlGlobalInitMutexUnlock();
13205#endif
13206}
13207
13208/**
13209 * xmlCleanupParser:
13210 *
13211 * Cleanup function for the XML library. It tries to reclaim all
13212 * parsing related global memory allocated for the library processing.
13213 * It doesn't deallocate any document related memory. Calling this
13214 * function should not prevent reusing the library but one should
13215 * call xmlCleanupParser() only when the process has
13216 * finished using the library or XML document built with it.
13217 */
13218
13219void
13220xmlCleanupParser(void) {
13221 if (!xmlParserInitialized)
13222 return;
13223
13224 xmlCleanupCharEncodingHandlers();
13225#ifdef LIBXML_CATALOG_ENABLED
13226 xmlCatalogCleanup();
13227#endif
13228 xmlDictCleanup();
13229 xmlCleanupInputCallbacks();
13230#ifdef LIBXML_OUTPUT_ENABLED
13231 xmlCleanupOutputCallbacks();
13232#endif
13233#ifdef LIBXML_SCHEMAS_ENABLED
13234 xmlSchemaCleanupTypes();
13235 xmlRelaxNGCleanupTypes();
13236#endif
13237 xmlCleanupGlobals();
13238 xmlResetLastError();
13239 xmlCleanupThreads(); /* must be last if called not from the main thread */
13240 xmlCleanupMemory();
13241 xmlParserInitialized = 0;
13242}
13243
13244/************************************************************************
13245 * *
13246 * New set (2.6.0) of simpler and more flexible APIs *
13247 * *
13248 ************************************************************************/
13249
13250/**
13251 * DICT_FREE:
13252 * @str: a string
13253 *
13254 * Free a string if it is not owned by the "dict" dictionnary in the
13255 * current scope
13256 */
13257#define DICT_FREE(str) \
13258 if ((str) && ((!dict) || \
13259 (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
13260 xmlFree((char *)(str));
13261
13262/**
13263 * xmlCtxtReset:
13264 * @ctxt: an XML parser context
13265 *
13266 * Reset a parser context
13267 */
13268void
13269xmlCtxtReset(xmlParserCtxtPtr ctxt)
13270{
13271 xmlParserInputPtr input;
13272 xmlDictPtr dict;
13273
13274 if (ctxt == NULL)
13275 return;
13276
13277 dict = ctxt->dict;
13278
13279 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
13280 xmlFreeInputStream(input);
13281 }
13282 ctxt->inputNr = 0;
13283 ctxt->input = NULL;
13284
13285 ctxt->spaceNr = 0;
13286 if (ctxt->spaceTab != NULL) {
13287 ctxt->spaceTab[0] = -1;
13288 ctxt->space = &ctxt->spaceTab[0];
13289 } else {
13290 ctxt->space = NULL;
13291 }
13292
13293
13294 ctxt->nodeNr = 0;
13295 ctxt->node = NULL;
13296
13297 ctxt->nameNr = 0;
13298 ctxt->name = NULL;
13299
13300 DICT_FREE(ctxt->version);
13301 ctxt->version = NULL;
13302 DICT_FREE(ctxt->encoding);
13303 ctxt->encoding = NULL;
13304 DICT_FREE(ctxt->directory);
13305 ctxt->directory = NULL;
13306 DICT_FREE(ctxt->extSubURI);
13307 ctxt->extSubURI = NULL;
13308 DICT_FREE(ctxt->extSubSystem);
13309 ctxt->extSubSystem = NULL;
13310 if (ctxt->myDoc != NULL)
13311 xmlFreeDoc(ctxt->myDoc);
13312 ctxt->myDoc = NULL;
13313
13314 ctxt->standalone = -1;
13315 ctxt->hasExternalSubset = 0;
13316 ctxt->hasPErefs = 0;
13317 ctxt->html = 0;
13318 ctxt->external = 0;
13319 ctxt->instate = XML_PARSER_START;
13320 ctxt->token = 0;
13321
13322 ctxt->wellFormed = 1;
13323 ctxt->nsWellFormed = 1;
13324 ctxt->disableSAX = 0;
13325 ctxt->valid = 1;
13326#if 0
13327 ctxt->vctxt.userData = ctxt;
13328 ctxt->vctxt.error = xmlParserValidityError;
13329 ctxt->vctxt.warning = xmlParserValidityWarning;
13330#endif
13331 ctxt->record_info = 0;
13332 ctxt->nbChars = 0;
13333 ctxt->checkIndex = 0;
13334 ctxt->inSubset = 0;
13335 ctxt->errNo = XML_ERR_OK;
13336 ctxt->depth = 0;
13337 ctxt->charset = XML_CHAR_ENCODING_UTF8;
13338 ctxt->catalogs = NULL;
13339 ctxt->nbentities = 0;
13340 ctxt->sizeentities = 0;
13341 ctxt->sizeentcopy = 0;
13342 xmlInitNodeInfoSeq(&ctxt->node_seq);
13343
13344 if (ctxt->attsDefault != NULL) {
13345 xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree);
13346 ctxt->attsDefault = NULL;
13347 }
13348 if (ctxt->attsSpecial != NULL) {
13349 xmlHashFree(ctxt->attsSpecial, NULL);
13350 ctxt->attsSpecial = NULL;
13351 }
13352
13353#ifdef LIBXML_CATALOG_ENABLED
13354 if (ctxt->catalogs != NULL)
13355 xmlCatalogFreeLocal(ctxt->catalogs);
13356#endif
13357 if (ctxt->lastError.code != XML_ERR_OK)
13358 xmlResetError(&ctxt->lastError);
13359}
13360
13361/**
13362 * xmlCtxtResetPush:
13363 * @ctxt: an XML parser context
13364 * @chunk: a pointer to an array of chars
13365 * @size: number of chars in the array
13366 * @filename: an optional file name or URI
13367 * @encoding: the document encoding, or NULL
13368 *
13369 * Reset a push parser context
13370 *
13371 * Returns 0 in case of success and 1 in case of error
13372 */
13373int
13374xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
13375 int size, const char *filename, const char *encoding)
13376{
13377 xmlParserInputPtr inputStream;
13378 xmlParserInputBufferPtr buf;
13379 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
13380
13381 if (ctxt == NULL)
13382 return(1);
13383
13384 if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
13385 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
13386
13387 buf = xmlAllocParserInputBuffer(enc);
13388 if (buf == NULL)
13389 return(1);
13390
13391 if (ctxt == NULL) {
13392 xmlFreeParserInputBuffer(buf);
13393 return(1);
13394 }
13395
13396 xmlCtxtReset(ctxt);
13397
13398 if (ctxt->pushTab == NULL) {
13399 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 *
13400 sizeof(xmlChar *));
13401 if (ctxt->pushTab == NULL) {
13402 xmlErrMemory(ctxt, NULL);
13403 xmlFreeParserInputBuffer(buf);
13404 return(1);
13405 }
13406 }
13407
13408 if (filename == NULL) {
13409 ctxt->directory = NULL;
13410 } else {
13411 ctxt->directory = xmlParserGetDirectory(filename);
13412 }
13413
13414 inputStream = xmlNewInputStream(ctxt);
13415 if (inputStream == NULL) {
13416 xmlFreeParserInputBuffer(buf);
13417 return(1);
13418 }
13419
13420 if (filename == NULL)
13421 inputStream->filename = NULL;
13422 else
13423 inputStream->filename = (char *)
13424 xmlCanonicPath((const xmlChar *) filename);
13425 inputStream->buf = buf;
13426 inputStream->base = inputStream->buf->buffer->content;
13427 inputStream->cur = inputStream->buf->buffer->content;
13428 inputStream->end =
13429 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
13430
13431 inputPush(ctxt, inputStream);
13432
13433 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
13434 (ctxt->input->buf != NULL)) {
13435 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
13436 int cur = ctxt->input->cur - ctxt->input->base;
13437
13438 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
13439
13440 ctxt->input->base = ctxt->input->buf->buffer->content + base;
13441 ctxt->input->cur = ctxt->input->base + cur;
13442 ctxt->input->end =
13443 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->
13444 use];
13445#ifdef DEBUG_PUSH
13446 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
13447#endif
13448 }
13449
13450 if (encoding != NULL) {
13451 xmlCharEncodingHandlerPtr hdlr;
13452
13453 hdlr = xmlFindCharEncodingHandler(encoding);
13454 if (hdlr != NULL) {
13455 xmlSwitchToEncoding(ctxt, hdlr);
13456 } else {
13457 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
13458 "Unsupported encoding %s\n", BAD_CAST encoding);
13459 }
13460 } else if (enc != XML_CHAR_ENCODING_NONE) {
13461 xmlSwitchEncoding(ctxt, enc);
13462 }
13463
13464 return(0);
13465}
13466
13467/**
13468 * xmlCtxtUseOptions:
13469 * @ctxt: an XML parser context
13470 * @options: a combination of xmlParserOption
13471 *
13472 * Applies the options to the parser context
13473 *
13474 * Returns 0 in case of success, the set of unknown or unimplemented options
13475 * in case of error.
13476 */
13477int
13478xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
13479{
13480 if (ctxt == NULL)
13481 return(-1);
13482 if (options & XML_PARSE_RECOVER) {
13483 ctxt->recovery = 1;
13484 options -= XML_PARSE_RECOVER;
13485 } else
13486 ctxt->recovery = 0;
13487 if (options & XML_PARSE_DTDLOAD) {
13488 ctxt->loadsubset = XML_DETECT_IDS;
13489 options -= XML_PARSE_DTDLOAD;
13490 } else
13491 ctxt->loadsubset = 0;
13492 if (options & XML_PARSE_DTDATTR) {
13493 ctxt->loadsubset |= XML_COMPLETE_ATTRS;
13494 options -= XML_PARSE_DTDATTR;
13495 }
13496 if (options & XML_PARSE_NOENT) {
13497 ctxt->replaceEntities = 1;
13498 /* ctxt->loadsubset |= XML_DETECT_IDS; */
13499 options -= XML_PARSE_NOENT;
13500 } else
13501 ctxt->replaceEntities = 0;
13502 if (options & XML_PARSE_PEDANTIC) {
13503 ctxt->pedantic = 1;
13504 options -= XML_PARSE_PEDANTIC;
13505 } else
13506 ctxt->pedantic = 0;
13507 if (options & XML_PARSE_NOBLANKS) {
13508 ctxt->keepBlanks = 0;
13509 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
13510 options -= XML_PARSE_NOBLANKS;
13511 } else
13512 ctxt->keepBlanks = 1;
13513 if (options & XML_PARSE_DTDVALID) {
13514 ctxt->validate = 1;
13515 if (options & XML_PARSE_NOWARNING)
13516 ctxt->vctxt.warning = NULL;
13517 if (options & XML_PARSE_NOERROR)
13518 ctxt->vctxt.error = NULL;
13519 options -= XML_PARSE_DTDVALID;
13520 } else
13521 ctxt->validate = 0;
13522 if (options & XML_PARSE_NOWARNING) {
13523 ctxt->sax->warning = NULL;
13524 options -= XML_PARSE_NOWARNING;
13525 }
13526 if (options & XML_PARSE_NOERROR) {
13527 ctxt->sax->error = NULL;
13528 ctxt->sax->fatalError = NULL;
13529 options -= XML_PARSE_NOERROR;
13530 }
13531#ifdef LIBXML_SAX1_ENABLED
13532 if (options & XML_PARSE_SAX1) {
13533 ctxt->sax->startElement = xmlSAX2StartElement;
13534 ctxt->sax->endElement = xmlSAX2EndElement;
13535 ctxt->sax->startElementNs = NULL;
13536 ctxt->sax->endElementNs = NULL;
13537 ctxt->sax->initialized = 1;
13538 options -= XML_PARSE_SAX1;
13539 }
13540#endif /* LIBXML_SAX1_ENABLED */
13541 if (options & XML_PARSE_NODICT) {
13542 ctxt->dictNames = 0;
13543 options -= XML_PARSE_NODICT;
13544 } else {
13545 ctxt->dictNames = 1;
13546 }
13547 if (options & XML_PARSE_NOCDATA) {
13548 ctxt->sax->cdataBlock = NULL;
13549 options -= XML_PARSE_NOCDATA;
13550 }
13551 if (options & XML_PARSE_NSCLEAN) {
13552 ctxt->options |= XML_PARSE_NSCLEAN;
13553 options -= XML_PARSE_NSCLEAN;
13554 }
13555 if (options & XML_PARSE_NONET) {
13556 ctxt->options |= XML_PARSE_NONET;
13557 options -= XML_PARSE_NONET;
13558 }
13559 if (options & XML_PARSE_COMPACT) {
13560 ctxt->options |= XML_PARSE_COMPACT;
13561 options -= XML_PARSE_COMPACT;
13562 }
13563 ctxt->linenumbers = 1;
13564 return (options);
13565}
13566
13567/**
13568 * xmlDoRead:
13569 * @ctxt: an XML parser context
13570 * @URL: the base URL to use for the document
13571 * @encoding: the document encoding, or NULL
13572 * @options: a combination of xmlParserOption
13573 * @reuse: keep the context for reuse
13574 *
13575 * Common front-end for the xmlRead functions
13576 *
13577 * Returns the resulting document tree or NULL
13578 */
13579static xmlDocPtr
13580xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
13581 int options, int reuse)
13582{
13583 xmlDocPtr ret;
13584
13585 xmlCtxtUseOptions(ctxt, options);
13586 if (encoding != NULL) {
13587 xmlCharEncodingHandlerPtr hdlr;
13588
13589 hdlr = xmlFindCharEncodingHandler(encoding);
13590 if (hdlr != NULL)
13591 xmlSwitchToEncoding(ctxt, hdlr);
13592 }
13593 if ((URL != NULL) && (ctxt->input != NULL) &&
13594 (ctxt->input->filename == NULL))
13595 ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
13596 xmlParseDocument(ctxt);
13597 if ((ctxt->wellFormed) || ctxt->recovery)
13598 ret = ctxt->myDoc;
13599 else {
13600 ret = NULL;
13601 if (ctxt->myDoc != NULL) {
13602 xmlFreeDoc(ctxt->myDoc);
13603 }
13604 }
13605 ctxt->myDoc = NULL;
13606 if (!reuse) {
13607 xmlFreeParserCtxt(ctxt);
13608 }
13609
13610 return (ret);
13611}
13612
13613/**
13614 * xmlReadDoc:
13615 * @cur: a pointer to a zero terminated string
13616 * @URL: the base URL to use for the document
13617 * @encoding: the document encoding, or NULL
13618 * @options: a combination of xmlParserOption
13619 *
13620 * parse an XML in-memory document and build a tree.
13621 *
13622 * Returns the resulting document tree
13623 */
13624xmlDocPtr
13625xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
13626{
13627 xmlParserCtxtPtr ctxt;
13628
13629 if (cur == NULL)
13630 return (NULL);
13631
13632 ctxt = xmlCreateDocParserCtxt(cur);
13633 if (ctxt == NULL)
13634 return (NULL);
13635 return (xmlDoRead(ctxt, URL, encoding, options, 0));
13636}
13637
13638/**
13639 * xmlReadFile:
13640 * @filename: a file or URL
13641 * @encoding: the document encoding, or NULL
13642 * @options: a combination of xmlParserOption
13643 *
13644 * parse an XML file from the filesystem or the network.
13645 *
13646 * Returns the resulting document tree
13647 */
13648xmlDocPtr
13649xmlReadFile(const char *filename, const char *encoding, int options)
13650{
13651 xmlParserCtxtPtr ctxt;
13652
13653 ctxt = xmlCreateURLParserCtxt(filename, options);
13654 if (ctxt == NULL)
13655 return (NULL);
13656 return (xmlDoRead(ctxt, NULL, encoding, options, 0));
13657}
13658
13659/**
13660 * xmlReadMemory:
13661 * @buffer: a pointer to a char array
13662 * @size: the size of the array
13663 * @URL: the base URL to use for the document
13664 * @encoding: the document encoding, or NULL
13665 * @options: a combination of xmlParserOption
13666 *
13667 * parse an XML in-memory document and build a tree.
13668 *
13669 * Returns the resulting document tree
13670 */
13671xmlDocPtr
13672xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
13673{
13674 xmlParserCtxtPtr ctxt;
13675
13676 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
13677 if (ctxt == NULL)
13678 return (NULL);
13679 return (xmlDoRead(ctxt, URL, encoding, options, 0));
13680}
13681
13682/**
13683 * xmlReadFd:
13684 * @fd: an open file descriptor
13685 * @URL: the base URL to use for the document
13686 * @encoding: the document encoding, or NULL
13687 * @options: a combination of xmlParserOption
13688 *
13689 * parse an XML from a file descriptor and build a tree.
13690 * NOTE that the file descriptor will not be closed when the
13691 * reader is closed or reset.
13692 *
13693 * Returns the resulting document tree
13694 */
13695xmlDocPtr
13696xmlReadFd(int fd, const char *URL, const char *encoding, int options)
13697{
13698 xmlParserCtxtPtr ctxt;
13699 xmlParserInputBufferPtr input;
13700 xmlParserInputPtr stream;
13701
13702 if (fd < 0)
13703 return (NULL);
13704
13705 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
13706 if (input == NULL)
13707 return (NULL);
13708 input->closecallback = NULL;
13709 ctxt = xmlNewParserCtxt();
13710 if (ctxt == NULL) {
13711 xmlFreeParserInputBuffer(input);
13712 return (NULL);
13713 }
13714 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
13715 if (stream == NULL) {
13716 xmlFreeParserInputBuffer(input);
13717 xmlFreeParserCtxt(ctxt);
13718 return (NULL);
13719 }
13720 inputPush(ctxt, stream);
13721 return (xmlDoRead(ctxt, URL, encoding, options, 0));
13722}
13723
13724/**
13725 * xmlReadIO:
13726 * @ioread: an I/O read function
13727 * @ioclose: an I/O close function
13728 * @ioctx: an I/O handler
13729 * @URL: the base URL to use for the document
13730 * @encoding: the document encoding, or NULL
13731 * @options: a combination of xmlParserOption
13732 *
13733 * parse an XML document from I/O functions and source and build a tree.
13734 *
13735 * Returns the resulting document tree
13736 */
13737xmlDocPtr
13738xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
13739 void *ioctx, const char *URL, const char *encoding, int options)
13740{
13741 xmlParserCtxtPtr ctxt;
13742 xmlParserInputBufferPtr input;
13743 xmlParserInputPtr stream;
13744
13745 if (ioread == NULL)
13746 return (NULL);
13747
13748 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
13749 XML_CHAR_ENCODING_NONE);
13750 if (input == NULL)
13751 return (NULL);
13752 ctxt = xmlNewParserCtxt();
13753 if (ctxt == NULL) {
13754 xmlFreeParserInputBuffer(input);
13755 return (NULL);
13756 }
13757 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
13758 if (stream == NULL) {
13759 xmlFreeParserInputBuffer(input);
13760 xmlFreeParserCtxt(ctxt);
13761 return (NULL);
13762 }
13763 inputPush(ctxt, stream);
13764 return (xmlDoRead(ctxt, URL, encoding, options, 0));
13765}
13766
13767/**
13768 * xmlCtxtReadDoc:
13769 * @ctxt: an XML parser context
13770 * @cur: a pointer to a zero terminated string
13771 * @URL: the base URL to use for the document
13772 * @encoding: the document encoding, or NULL
13773 * @options: a combination of xmlParserOption
13774 *
13775 * parse an XML in-memory document and build a tree.
13776 * This reuses the existing @ctxt parser context
13777 *
13778 * Returns the resulting document tree
13779 */
13780xmlDocPtr
13781xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
13782 const char *URL, const char *encoding, int options)
13783{
13784 xmlParserInputPtr stream;
13785
13786 if (cur == NULL)
13787 return (NULL);
13788 if (ctxt == NULL)
13789 return (NULL);
13790
13791 xmlCtxtReset(ctxt);
13792
13793 stream = xmlNewStringInputStream(ctxt, cur);
13794 if (stream == NULL) {
13795 return (NULL);
13796 }
13797 inputPush(ctxt, stream);
13798 return (xmlDoRead(ctxt, URL, encoding, options, 1));
13799}
13800
13801/**
13802 * xmlCtxtReadFile:
13803 * @ctxt: an XML parser context
13804 * @filename: a file or URL
13805 * @encoding: the document encoding, or NULL
13806 * @options: a combination of xmlParserOption
13807 *
13808 * parse an XML file from the filesystem or the network.
13809 * This reuses the existing @ctxt parser context
13810 *
13811 * Returns the resulting document tree
13812 */
13813xmlDocPtr
13814xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
13815 const char *encoding, int options)
13816{
13817 xmlParserInputPtr stream;
13818
13819 if (filename == NULL)
13820 return (NULL);
13821 if (ctxt == NULL)
13822 return (NULL);
13823
13824 xmlCtxtReset(ctxt);
13825
13826 stream = xmlLoadExternalEntity(filename, NULL, ctxt);
13827 if (stream == NULL) {
13828 return (NULL);
13829 }
13830 inputPush(ctxt, stream);
13831 return (xmlDoRead(ctxt, NULL, encoding, options, 1));
13832}
13833
13834/**
13835 * xmlCtxtReadMemory:
13836 * @ctxt: an XML parser context
13837 * @buffer: a pointer to a char array
13838 * @size: the size of the array
13839 * @URL: the base URL to use for the document
13840 * @encoding: the document encoding, or NULL
13841 * @options: a combination of xmlParserOption
13842 *
13843 * parse an XML in-memory document and build a tree.
13844 * This reuses the existing @ctxt parser context
13845 *
13846 * Returns the resulting document tree
13847 */
13848xmlDocPtr
13849xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
13850 const char *URL, const char *encoding, int options)
13851{
13852 xmlParserInputBufferPtr input;
13853 xmlParserInputPtr stream;
13854
13855 if (ctxt == NULL)
13856 return (NULL);
13857 if (buffer == NULL)
13858 return (NULL);
13859
13860 xmlCtxtReset(ctxt);
13861
13862 input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
13863 if (input == NULL) {
13864 return(NULL);
13865 }
13866
13867 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
13868 if (stream == NULL) {
13869 xmlFreeParserInputBuffer(input);
13870 return(NULL);
13871 }
13872
13873 inputPush(ctxt, stream);
13874 return (xmlDoRead(ctxt, URL, encoding, options, 1));
13875}
13876
13877/**
13878 * xmlCtxtReadFd:
13879 * @ctxt: an XML parser context
13880 * @fd: an open file descriptor
13881 * @URL: the base URL to use for the document
13882 * @encoding: the document encoding, or NULL
13883 * @options: a combination of xmlParserOption
13884 *
13885 * parse an XML from a file descriptor and build a tree.
13886 * This reuses the existing @ctxt parser context
13887 * NOTE that the file descriptor will not be closed when the
13888 * reader is closed or reset.
13889 *
13890 * Returns the resulting document tree
13891 */
13892xmlDocPtr
13893xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
13894 const char *URL, const char *encoding, int options)
13895{
13896 xmlParserInputBufferPtr input;
13897 xmlParserInputPtr stream;
13898
13899 if (fd < 0)
13900 return (NULL);
13901 if (ctxt == NULL)
13902 return (NULL);
13903
13904 xmlCtxtReset(ctxt);
13905
13906
13907 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
13908 if (input == NULL)
13909 return (NULL);
13910 input->closecallback = NULL;
13911 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
13912 if (stream == NULL) {
13913 xmlFreeParserInputBuffer(input);
13914 return (NULL);
13915 }
13916 inputPush(ctxt, stream);
13917 return (xmlDoRead(ctxt, URL, encoding, options, 1));
13918}
13919
13920/**
13921 * xmlCtxtReadIO:
13922 * @ctxt: an XML parser context
13923 * @ioread: an I/O read function
13924 * @ioclose: an I/O close function
13925 * @ioctx: an I/O handler
13926 * @URL: the base URL to use for the document
13927 * @encoding: the document encoding, or NULL
13928 * @options: a combination of xmlParserOption
13929 *
13930 * parse an XML document from I/O functions and source and build a tree.
13931 * This reuses the existing @ctxt parser context
13932 *
13933 * Returns the resulting document tree
13934 */
13935xmlDocPtr
13936xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
13937 xmlInputCloseCallback ioclose, void *ioctx,
13938 const char *URL,
13939 const char *encoding, int options)
13940{
13941 xmlParserInputBufferPtr input;
13942 xmlParserInputPtr stream;
13943
13944 if (ioread == NULL)
13945 return (NULL);
13946 if (ctxt == NULL)
13947 return (NULL);
13948
13949 xmlCtxtReset(ctxt);
13950
13951 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
13952 XML_CHAR_ENCODING_NONE);
13953 if (input == NULL)
13954 return (NULL);
13955 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
13956 if (stream == NULL) {
13957 xmlFreeParserInputBuffer(input);
13958 return (NULL);
13959 }
13960 inputPush(ctxt, stream);
13961 return (xmlDoRead(ctxt, URL, encoding, options, 1));
13962}
13963
13964#define bottom_parser
13965#include "elfgcchack.h"
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette