VirtualBox

source: vbox/trunk/src/libs/libxml2-2.6.31/parser.c@ 44536

Last change on this file since 44536 was 44084, checked in by vboxsync, 12 years ago

libxml2: fixes from upstream

  • Property svn:eol-style set to native
File size: 373.6 KB
Line 
1/*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
15 * high level APIs to call the parser and a few miscellaneous functions.
16 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
25 * from the SAX callbacks or as standalone functions using a preparsed
26 * document.
27 *
28 * See Copyright for the status of this software.
29 *
30 * [email protected]
31 */
32
33#define IN_LIBXML
34#include "libxml.h"
35
36#if defined(WIN32) && !defined (__CYGWIN__)
37#define XML_DIR_SEP '\\'
38#else
39#define XML_DIR_SEP '/'
40#endif
41
42#include <stdlib.h>
43#include <limits.h>
44#include <string.h>
45#include <stdarg.h>
46#include <libxml/xmlmemory.h>
47#include <libxml/threads.h>
48#include <libxml/globals.h>
49#include <libxml/tree.h>
50#include <libxml/parser.h>
51#include <libxml/parserInternals.h>
52#include <libxml/valid.h>
53#include <libxml/entities.h>
54#include <libxml/xmlerror.h>
55#include <libxml/encoding.h>
56#include <libxml/xmlIO.h>
57#include <libxml/uri.h>
58#ifdef LIBXML_CATALOG_ENABLED
59#include <libxml/catalog.h>
60#endif
61#ifdef LIBXML_SCHEMAS_ENABLED
62#include <libxml/xmlschemastypes.h>
63#include <libxml/relaxng.h>
64#endif
65#ifdef HAVE_CTYPE_H
66#include <ctype.h>
67#endif
68#ifdef HAVE_STDLIB_H
69#include <stdlib.h>
70#endif
71#ifdef HAVE_SYS_STAT_H
72#include <sys/stat.h>
73#endif
74#ifdef HAVE_FCNTL_H
75#include <fcntl.h>
76#endif
77#ifdef HAVE_UNISTD_H
78#include <unistd.h>
79#endif
80#ifdef HAVE_ZLIB_H
81#include <zlib.h>
82#endif
83
84static void
85xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info);
86
87/************************************************************************
88 * *
89 * Arbitrary limits set in the parser. *
90 * *
91 ************************************************************************/
92
93#define XML_PARSER_BIG_ENTITY 1000
94#define XML_PARSER_LOT_ENTITY 5000
95
96/*
97 * XML_PARSER_NON_LINEAR is the threshold where the ratio of parsed entity
98 * replacement over the size in byte of the input indicates that you have
99 * and eponential behaviour. A value of 10 correspond to at least 3 entity
100 * replacement per byte of input.
101 */
102#define XML_PARSER_NON_LINEAR 10
103
104/*
105 * xmlParserEntityCheck
106 *
107 * Function to check non-linear entity expansion behaviour
108 * This is here to detect and stop exponential linear entity expansion
109 * This is not a limitation of the parser but a safety
110 * boundary feature.
111 */
112static int
113xmlParserEntityCheck(xmlParserCtxtPtr ctxt, size_t size,
114 xmlEntityPtr ent)
115{
116 size_t consumed = 0;
117
118 if (ctxt == NULL)
119 return (0);
120 if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
121 return (1);
122 if (size != 0) {
123 /*
124 * Do the check based on the replacement size of the entity
125 */
126 if (size < XML_PARSER_BIG_ENTITY)
127 return(0);
128
129 /*
130 * A limit on the amount of text data reasonably used
131 */
132 if (ctxt->input != NULL) {
133 consumed = ctxt->input->consumed +
134 (ctxt->input->cur - ctxt->input->base);
135 }
136 consumed += ctxt->sizeentities;
137
138 if ((size < XML_PARSER_NON_LINEAR * consumed) &&
139 (ctxt->nbentities * 3 < XML_PARSER_NON_LINEAR * consumed))
140 return (0);
141 } else if (ent != NULL) {
142 /*
143 * use the number of parsed entities in the replacement
144 */
145 size = ent->owner;
146
147 /*
148 * The amount of data parsed counting entities size only once
149 */
150 if (ctxt->input != NULL) {
151 consumed = ctxt->input->consumed +
152 (ctxt->input->cur - ctxt->input->base);
153 }
154 consumed += ctxt->sizeentities;
155
156 /*
157 * Check the density of entities for the amount of data
158 * knowing an entity reference will take at least 3 bytes
159 */
160 if (size * 3 < consumed * XML_PARSER_NON_LINEAR)
161 return (0);
162 } else {
163 /*
164 * strange we got no data for checking just return
165 */
166 return (0);
167 }
168
169 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
170 return (1);
171}
172
173/**
174 * xmlParserMaxDepth:
175 *
176 * arbitrary depth limit for the XML documents that we allow to
177 * process. This is not a limitation of the parser but a safety
178 * boundary feature.
179 */
180unsigned int xmlParserMaxDepth = 1024;
181
182#define SAX2 1
183
184#define XML_PARSER_BIG_BUFFER_SIZE 300
185#define XML_PARSER_BUFFER_SIZE 100
186
187#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
188
189/*
190 * List of XML prefixed PI allowed by W3C specs
191 */
192
193static const char *xmlW3CPIs[] = {
194 "xml-stylesheet",
195 NULL
196};
197
198
199/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
200xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
201 const xmlChar **str);
202
203static xmlParserErrors
204xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
205 xmlSAXHandlerPtr sax,
206 void *user_data, int depth, const xmlChar *URL,
207 const xmlChar *ID, xmlNodePtr *list);
208
209#ifdef LIBXML_LEGACY_ENABLED
210static void
211xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
212 xmlNodePtr lastNode);
213#endif /* LIBXML_LEGACY_ENABLED */
214
215static xmlParserErrors
216xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
217 const xmlChar *string, void *user_data, xmlNodePtr *lst);
218
219/************************************************************************
220 * *
221 * Some factorized error routines *
222 * *
223 ************************************************************************/
224
225/**
226 * xmlErrAttributeDup:
227 * @ctxt: an XML parser context
228 * @prefix: the attribute prefix
229 * @localname: the attribute localname
230 *
231 * Handle a redefinition of attribute error
232 */
233static void
234xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
235 const xmlChar * localname)
236{
237 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
238 (ctxt->instate == XML_PARSER_EOF))
239 return;
240 if (ctxt != NULL)
241 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
242 if (prefix == NULL)
243 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
244 ctxt->errNo, XML_ERR_FATAL, NULL, 0,
245 (const char *) localname, NULL, NULL, 0, 0,
246 "Attribute %s redefined\n", localname);
247 else
248 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
249 ctxt->errNo, XML_ERR_FATAL, NULL, 0,
250 (const char *) prefix, (const char *) localname,
251 NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
252 localname);
253 if (ctxt != NULL) {
254 ctxt->wellFormed = 0;
255 if (ctxt->recovery == 0)
256 ctxt->disableSAX = 1;
257 }
258}
259
260/**
261 * xmlFatalErr:
262 * @ctxt: an XML parser context
263 * @error: the error number
264 * @extra: extra information string
265 *
266 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
267 */
268static void
269xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
270{
271 const char *errmsg;
272
273 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
274 (ctxt->instate == XML_PARSER_EOF))
275 return;
276 switch (error) {
277 case XML_ERR_INVALID_HEX_CHARREF:
278 errmsg = "CharRef: invalid hexadecimal value\n";
279 break;
280 case XML_ERR_INVALID_DEC_CHARREF:
281 errmsg = "CharRef: invalid decimal value\n";
282 break;
283 case XML_ERR_INVALID_CHARREF:
284 errmsg = "CharRef: invalid value\n";
285 break;
286 case XML_ERR_INTERNAL_ERROR:
287 errmsg = "internal error";
288 break;
289 case XML_ERR_PEREF_AT_EOF:
290 errmsg = "PEReference at end of document\n";
291 break;
292 case XML_ERR_PEREF_IN_PROLOG:
293 errmsg = "PEReference in prolog\n";
294 break;
295 case XML_ERR_PEREF_IN_EPILOG:
296 errmsg = "PEReference in epilog\n";
297 break;
298 case XML_ERR_PEREF_NO_NAME:
299 errmsg = "PEReference: no name\n";
300 break;
301 case XML_ERR_PEREF_SEMICOL_MISSING:
302 errmsg = "PEReference: expecting ';'\n";
303 break;
304 case XML_ERR_ENTITY_LOOP:
305 errmsg = "Detected an entity reference loop\n";
306 break;
307 case XML_ERR_ENTITY_NOT_STARTED:
308 errmsg = "EntityValue: \" or ' expected\n";
309 break;
310 case XML_ERR_ENTITY_PE_INTERNAL:
311 errmsg = "PEReferences forbidden in internal subset\n";
312 break;
313 case XML_ERR_ENTITY_NOT_FINISHED:
314 errmsg = "EntityValue: \" or ' expected\n";
315 break;
316 case XML_ERR_ATTRIBUTE_NOT_STARTED:
317 errmsg = "AttValue: \" or ' expected\n";
318 break;
319 case XML_ERR_LT_IN_ATTRIBUTE:
320 errmsg = "Unescaped '<' not allowed in attributes values\n";
321 break;
322 case XML_ERR_LITERAL_NOT_STARTED:
323 errmsg = "SystemLiteral \" or ' expected\n";
324 break;
325 case XML_ERR_LITERAL_NOT_FINISHED:
326 errmsg = "Unfinished System or Public ID \" or ' expected\n";
327 break;
328 case XML_ERR_MISPLACED_CDATA_END:
329 errmsg = "Sequence ']]>' not allowed in content\n";
330 break;
331 case XML_ERR_URI_REQUIRED:
332 errmsg = "SYSTEM or PUBLIC, the URI is missing\n";
333 break;
334 case XML_ERR_PUBID_REQUIRED:
335 errmsg = "PUBLIC, the Public Identifier is missing\n";
336 break;
337 case XML_ERR_HYPHEN_IN_COMMENT:
338 errmsg = "Comment must not contain '--' (double-hyphen)\n";
339 break;
340 case XML_ERR_PI_NOT_STARTED:
341 errmsg = "xmlParsePI : no target name\n";
342 break;
343 case XML_ERR_RESERVED_XML_NAME:
344 errmsg = "Invalid PI name\n";
345 break;
346 case XML_ERR_NOTATION_NOT_STARTED:
347 errmsg = "NOTATION: Name expected here\n";
348 break;
349 case XML_ERR_NOTATION_NOT_FINISHED:
350 errmsg = "'>' required to close NOTATION declaration\n";
351 break;
352 case XML_ERR_VALUE_REQUIRED:
353 errmsg = "Entity value required\n";
354 break;
355 case XML_ERR_URI_FRAGMENT:
356 errmsg = "Fragment not allowed";
357 break;
358 case XML_ERR_ATTLIST_NOT_STARTED:
359 errmsg = "'(' required to start ATTLIST enumeration\n";
360 break;
361 case XML_ERR_NMTOKEN_REQUIRED:
362 errmsg = "NmToken expected in ATTLIST enumeration\n";
363 break;
364 case XML_ERR_ATTLIST_NOT_FINISHED:
365 errmsg = "')' required to finish ATTLIST enumeration\n";
366 break;
367 case XML_ERR_MIXED_NOT_STARTED:
368 errmsg = "MixedContentDecl : '|' or ')*' expected\n";
369 break;
370 case XML_ERR_PCDATA_REQUIRED:
371 errmsg = "MixedContentDecl : '#PCDATA' expected\n";
372 break;
373 case XML_ERR_ELEMCONTENT_NOT_STARTED:
374 errmsg = "ContentDecl : Name or '(' expected\n";
375 break;
376 case XML_ERR_ELEMCONTENT_NOT_FINISHED:
377 errmsg = "ContentDecl : ',' '|' or ')' expected\n";
378 break;
379 case XML_ERR_PEREF_IN_INT_SUBSET:
380 errmsg =
381 "PEReference: forbidden within markup decl in internal subset\n";
382 break;
383 case XML_ERR_GT_REQUIRED:
384 errmsg = "expected '>'\n";
385 break;
386 case XML_ERR_CONDSEC_INVALID:
387 errmsg = "XML conditional section '[' expected\n";
388 break;
389 case XML_ERR_EXT_SUBSET_NOT_FINISHED:
390 errmsg = "Content error in the external subset\n";
391 break;
392 case XML_ERR_CONDSEC_INVALID_KEYWORD:
393 errmsg =
394 "conditional section INCLUDE or IGNORE keyword expected\n";
395 break;
396 case XML_ERR_CONDSEC_NOT_FINISHED:
397 errmsg = "XML conditional section not closed\n";
398 break;
399 case XML_ERR_XMLDECL_NOT_STARTED:
400 errmsg = "Text declaration '<?xml' required\n";
401 break;
402 case XML_ERR_XMLDECL_NOT_FINISHED:
403 errmsg = "parsing XML declaration: '?>' expected\n";
404 break;
405 case XML_ERR_EXT_ENTITY_STANDALONE:
406 errmsg = "external parsed entities cannot be standalone\n";
407 break;
408 case XML_ERR_ENTITYREF_SEMICOL_MISSING:
409 errmsg = "EntityRef: expecting ';'\n";
410 break;
411 case XML_ERR_DOCTYPE_NOT_FINISHED:
412 errmsg = "DOCTYPE improperly terminated\n";
413 break;
414 case XML_ERR_LTSLASH_REQUIRED:
415 errmsg = "EndTag: '</' not found\n";
416 break;
417 case XML_ERR_EQUAL_REQUIRED:
418 errmsg = "expected '='\n";
419 break;
420 case XML_ERR_STRING_NOT_CLOSED:
421 errmsg = "String not closed expecting \" or '\n";
422 break;
423 case XML_ERR_STRING_NOT_STARTED:
424 errmsg = "String not started expecting ' or \"\n";
425 break;
426 case XML_ERR_ENCODING_NAME:
427 errmsg = "Invalid XML encoding name\n";
428 break;
429 case XML_ERR_STANDALONE_VALUE:
430 errmsg = "standalone accepts only 'yes' or 'no'\n";
431 break;
432 case XML_ERR_DOCUMENT_EMPTY:
433 errmsg = "Document is empty\n";
434 break;
435 case XML_ERR_DOCUMENT_END:
436 errmsg = "Extra content at the end of the document\n";
437 break;
438 case XML_ERR_NOT_WELL_BALANCED:
439 errmsg = "chunk is not well balanced\n";
440 break;
441 case XML_ERR_EXTRA_CONTENT:
442 errmsg = "extra content at the end of well balanced chunk\n";
443 break;
444 case XML_ERR_VERSION_MISSING:
445 errmsg = "Malformed declaration expecting version\n";
446 break;
447#if 0
448 case:
449 errmsg = "\n";
450 break;
451#endif
452 default:
453 errmsg = "Unregistered error message\n";
454 }
455 if (ctxt != NULL)
456 ctxt->errNo = error;
457 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
458 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, errmsg,
459 info);
460 if (ctxt != NULL) {
461 ctxt->wellFormed = 0;
462 if (ctxt->recovery == 0)
463 ctxt->disableSAX = 1;
464 }
465}
466
467/**
468 * xmlFatalErrMsg:
469 * @ctxt: an XML parser context
470 * @error: the error number
471 * @msg: the error message
472 *
473 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
474 */
475static void
476xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
477 const char *msg)
478{
479 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
480 (ctxt->instate == XML_PARSER_EOF))
481 return;
482 if (ctxt != NULL)
483 ctxt->errNo = error;
484 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
485 XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, msg);
486 if (ctxt != NULL) {
487 ctxt->wellFormed = 0;
488 if (ctxt->recovery == 0)
489 ctxt->disableSAX = 1;
490 }
491}
492
493/**
494 * xmlWarningMsg:
495 * @ctxt: an XML parser context
496 * @error: the error number
497 * @msg: the error message
498 * @str1: extra data
499 * @str2: extra data
500 *
501 * Handle a warning.
502 */
503static void
504xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
505 const char *msg, const xmlChar *str1, const xmlChar *str2)
506{
507 xmlStructuredErrorFunc schannel = NULL;
508
509 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
510 (ctxt->instate == XML_PARSER_EOF))
511 return;
512 if ((ctxt != NULL) && (ctxt->sax != NULL) &&
513 (ctxt->sax->initialized == XML_SAX2_MAGIC))
514 schannel = ctxt->sax->serror;
515 __xmlRaiseError(schannel,
516 (ctxt->sax) ? ctxt->sax->warning : NULL,
517 ctxt->userData,
518 ctxt, NULL, XML_FROM_PARSER, error,
519 XML_ERR_WARNING, NULL, 0,
520 (const char *) str1, (const char *) str2, NULL, 0, 0,
521 msg, (const char *) str1, (const char *) str2);
522}
523
524/**
525 * xmlValidityError:
526 * @ctxt: an XML parser context
527 * @error: the error number
528 * @msg: the error message
529 * @str1: extra data
530 *
531 * Handle a validity error.
532 */
533static void
534xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
535 const char *msg, const xmlChar *str1)
536{
537 xmlStructuredErrorFunc schannel = NULL;
538
539 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
540 (ctxt->instate == XML_PARSER_EOF))
541 return;
542 if (ctxt != NULL) {
543 ctxt->errNo = error;
544 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
545 schannel = ctxt->sax->serror;
546 }
547 __xmlRaiseError(schannel,
548 ctxt->vctxt.error, ctxt->vctxt.userData,
549 ctxt, NULL, XML_FROM_DTD, error,
550 XML_ERR_ERROR, NULL, 0, (const char *) str1,
551 NULL, NULL, 0, 0,
552 msg, (const char *) str1);
553 if (ctxt != NULL) {
554 ctxt->valid = 0;
555 }
556}
557
558/**
559 * xmlFatalErrMsgInt:
560 * @ctxt: an XML parser context
561 * @error: the error number
562 * @msg: the error message
563 * @val: an integer value
564 *
565 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
566 */
567static void
568xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
569 const char *msg, int val)
570{
571 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
572 (ctxt->instate == XML_PARSER_EOF))
573 return;
574 if (ctxt != NULL)
575 ctxt->errNo = error;
576 __xmlRaiseError(NULL, NULL, NULL,
577 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
578 NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
579 if (ctxt != NULL) {
580 ctxt->wellFormed = 0;
581 if (ctxt->recovery == 0)
582 ctxt->disableSAX = 1;
583 }
584}
585
586/**
587 * xmlFatalErrMsgStrIntStr:
588 * @ctxt: an XML parser context
589 * @error: the error number
590 * @msg: the error message
591 * @str1: an string info
592 * @val: an integer value
593 * @str2: an string info
594 *
595 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
596 */
597static void
598xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
599 const char *msg, const xmlChar *str1, int val,
600 const xmlChar *str2)
601{
602 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
603 (ctxt->instate == XML_PARSER_EOF))
604 return;
605 if (ctxt != NULL)
606 ctxt->errNo = error;
607 __xmlRaiseError(NULL, NULL, NULL,
608 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
609 NULL, 0, (const char *) str1, (const char *) str2,
610 NULL, val, 0, msg, str1, val, str2);
611 if (ctxt != NULL) {
612 ctxt->wellFormed = 0;
613 if (ctxt->recovery == 0)
614 ctxt->disableSAX = 1;
615 }
616}
617
618/**
619 * xmlFatalErrMsgStr:
620 * @ctxt: an XML parser context
621 * @error: the error number
622 * @msg: the error message
623 * @val: a string value
624 *
625 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
626 */
627static void
628xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
629 const char *msg, const xmlChar * val)
630{
631 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
632 (ctxt->instate == XML_PARSER_EOF))
633 return;
634 if (ctxt != NULL)
635 ctxt->errNo = error;
636 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
637 XML_FROM_PARSER, error, XML_ERR_FATAL,
638 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
639 val);
640 if (ctxt != NULL) {
641 ctxt->wellFormed = 0;
642 if (ctxt->recovery == 0)
643 ctxt->disableSAX = 1;
644 }
645}
646
647/**
648 * xmlErrMsgStr:
649 * @ctxt: an XML parser context
650 * @error: the error number
651 * @msg: the error message
652 * @val: a string value
653 *
654 * Handle a non fatal parser error
655 */
656static void
657xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
658 const char *msg, const xmlChar * val)
659{
660 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
661 (ctxt->instate == XML_PARSER_EOF))
662 return;
663 if (ctxt != NULL)
664 ctxt->errNo = error;
665 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
666 XML_FROM_PARSER, error, XML_ERR_ERROR,
667 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
668 val);
669}
670
671/**
672 * xmlNsErr:
673 * @ctxt: an XML parser context
674 * @error: the error number
675 * @msg: the message
676 * @info1: extra information string
677 * @info2: extra information string
678 *
679 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
680 */
681static void
682xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
683 const char *msg,
684 const xmlChar * info1, const xmlChar * info2,
685 const xmlChar * info3)
686{
687 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
688 (ctxt->instate == XML_PARSER_EOF))
689 return;
690 if (ctxt != NULL)
691 ctxt->errNo = error;
692 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
693 XML_ERR_ERROR, NULL, 0, (const char *) info1,
694 (const char *) info2, (const char *) info3, 0, 0, msg,
695 info1, info2, info3);
696 if (ctxt != NULL)
697 ctxt->nsWellFormed = 0;
698}
699
700/************************************************************************
701 * *
702 * Library wide options *
703 * *
704 ************************************************************************/
705
706/**
707 * xmlHasFeature:
708 * @feature: the feature to be examined
709 *
710 * Examines if the library has been compiled with a given feature.
711 *
712 * Returns a non-zero value if the feature exist, otherwise zero.
713 * Returns zero (0) if the feature does not exist or an unknown
714 * unknown feature is requested, non-zero otherwise.
715 */
716int
717xmlHasFeature(xmlFeature feature)
718{
719 switch (feature) {
720 case XML_WITH_THREAD:
721#ifdef LIBXML_THREAD_ENABLED
722 return(1);
723#else
724 return(0);
725#endif
726 case XML_WITH_TREE:
727#ifdef LIBXML_TREE_ENABLED
728 return(1);
729#else
730 return(0);
731#endif
732 case XML_WITH_OUTPUT:
733#ifdef LIBXML_OUTPUT_ENABLED
734 return(1);
735#else
736 return(0);
737#endif
738 case XML_WITH_PUSH:
739#ifdef LIBXML_PUSH_ENABLED
740 return(1);
741#else
742 return(0);
743#endif
744 case XML_WITH_READER:
745#ifdef LIBXML_READER_ENABLED
746 return(1);
747#else
748 return(0);
749#endif
750 case XML_WITH_PATTERN:
751#ifdef LIBXML_PATTERN_ENABLED
752 return(1);
753#else
754 return(0);
755#endif
756 case XML_WITH_WRITER:
757#ifdef LIBXML_WRITER_ENABLED
758 return(1);
759#else
760 return(0);
761#endif
762 case XML_WITH_SAX1:
763#ifdef LIBXML_SAX1_ENABLED
764 return(1);
765#else
766 return(0);
767#endif
768 case XML_WITH_FTP:
769#ifdef LIBXML_FTP_ENABLED
770 return(1);
771#else
772 return(0);
773#endif
774 case XML_WITH_HTTP:
775#ifdef LIBXML_HTTP_ENABLED
776 return(1);
777#else
778 return(0);
779#endif
780 case XML_WITH_VALID:
781#ifdef LIBXML_VALID_ENABLED
782 return(1);
783#else
784 return(0);
785#endif
786 case XML_WITH_HTML:
787#ifdef LIBXML_HTML_ENABLED
788 return(1);
789#else
790 return(0);
791#endif
792 case XML_WITH_LEGACY:
793#ifdef LIBXML_LEGACY_ENABLED
794 return(1);
795#else
796 return(0);
797#endif
798 case XML_WITH_C14N:
799#ifdef LIBXML_C14N_ENABLED
800 return(1);
801#else
802 return(0);
803#endif
804 case XML_WITH_CATALOG:
805#ifdef LIBXML_CATALOG_ENABLED
806 return(1);
807#else
808 return(0);
809#endif
810 case XML_WITH_XPATH:
811#ifdef LIBXML_XPATH_ENABLED
812 return(1);
813#else
814 return(0);
815#endif
816 case XML_WITH_XPTR:
817#ifdef LIBXML_XPTR_ENABLED
818 return(1);
819#else
820 return(0);
821#endif
822 case XML_WITH_XINCLUDE:
823#ifdef LIBXML_XINCLUDE_ENABLED
824 return(1);
825#else
826 return(0);
827#endif
828 case XML_WITH_ICONV:
829#ifdef LIBXML_ICONV_ENABLED
830 return(1);
831#else
832 return(0);
833#endif
834 case XML_WITH_ISO8859X:
835#ifdef LIBXML_ISO8859X_ENABLED
836 return(1);
837#else
838 return(0);
839#endif
840 case XML_WITH_UNICODE:
841#ifdef LIBXML_UNICODE_ENABLED
842 return(1);
843#else
844 return(0);
845#endif
846 case XML_WITH_REGEXP:
847#ifdef LIBXML_REGEXP_ENABLED
848 return(1);
849#else
850 return(0);
851#endif
852 case XML_WITH_AUTOMATA:
853#ifdef LIBXML_AUTOMATA_ENABLED
854 return(1);
855#else
856 return(0);
857#endif
858 case XML_WITH_EXPR:
859#ifdef LIBXML_EXPR_ENABLED
860 return(1);
861#else
862 return(0);
863#endif
864 case XML_WITH_SCHEMAS:
865#ifdef LIBXML_SCHEMAS_ENABLED
866 return(1);
867#else
868 return(0);
869#endif
870 case XML_WITH_SCHEMATRON:
871#ifdef LIBXML_SCHEMATRON_ENABLED
872 return(1);
873#else
874 return(0);
875#endif
876 case XML_WITH_MODULES:
877#ifdef LIBXML_MODULES_ENABLED
878 return(1);
879#else
880 return(0);
881#endif
882 case XML_WITH_DEBUG:
883#ifdef LIBXML_DEBUG_ENABLED
884 return(1);
885#else
886 return(0);
887#endif
888 case XML_WITH_DEBUG_MEM:
889#ifdef DEBUG_MEMORY_LOCATION
890 return(1);
891#else
892 return(0);
893#endif
894 case XML_WITH_DEBUG_RUN:
895#ifdef LIBXML_DEBUG_RUNTIME
896 return(1);
897#else
898 return(0);
899#endif
900 case XML_WITH_ZLIB:
901#ifdef LIBXML_ZLIB_ENABLED
902 return(1);
903#else
904 return(0);
905#endif
906 default:
907 break;
908 }
909 return(0);
910}
911
912/************************************************************************
913 * *
914 * SAX2 defaulted attributes handling *
915 * *
916 ************************************************************************/
917
918/**
919 * xmlDetectSAX2:
920 * @ctxt: an XML parser context
921 *
922 * Do the SAX2 detection and specific intialization
923 */
924static void
925xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
926 if (ctxt == NULL) return;
927#ifdef LIBXML_SAX1_ENABLED
928 if ((ctxt->sax) && (ctxt->sax->initialized == XML_SAX2_MAGIC) &&
929 ((ctxt->sax->startElementNs != NULL) ||
930 (ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1;
931#else
932 ctxt->sax2 = 1;
933#endif /* LIBXML_SAX1_ENABLED */
934
935 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
936 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
937 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
938 if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
939 (ctxt->str_xml_ns == NULL)) {
940 xmlErrMemory(ctxt, NULL);
941 }
942}
943
944typedef struct _xmlDefAttrs xmlDefAttrs;
945typedef xmlDefAttrs *xmlDefAttrsPtr;
946struct _xmlDefAttrs {
947 int nbAttrs; /* number of defaulted attributes on that element */
948 int maxAttrs; /* the size of the array */
949 const xmlChar *values[4]; /* array of localname/prefix/values */
950};
951
952/**
953 * xmlAddDefAttrs:
954 * @ctxt: an XML parser context
955 * @fullname: the element fullname
956 * @fullattr: the attribute fullname
957 * @value: the attribute value
958 *
959 * Add a defaulted attribute for an element
960 */
961static void
962xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
963 const xmlChar *fullname,
964 const xmlChar *fullattr,
965 const xmlChar *value) {
966 xmlDefAttrsPtr defaults;
967 int len;
968 const xmlChar *name;
969 const xmlChar *prefix;
970
971 if (ctxt->attsDefault == NULL) {
972 ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
973 if (ctxt->attsDefault == NULL)
974 goto mem_error;
975 }
976
977 /*
978 * split the element name into prefix:localname , the string found
979 * are within the DTD and then not associated to namespace names.
980 */
981 name = xmlSplitQName3(fullname, &len);
982 if (name == NULL) {
983 name = xmlDictLookup(ctxt->dict, fullname, -1);
984 prefix = NULL;
985 } else {
986 name = xmlDictLookup(ctxt->dict, name, -1);
987 prefix = xmlDictLookup(ctxt->dict, fullname, len);
988 }
989
990 /*
991 * make sure there is some storage
992 */
993 defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
994 if (defaults == NULL) {
995 defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
996 (4 * 4) * sizeof(const xmlChar *));
997 if (defaults == NULL)
998 goto mem_error;
999 defaults->nbAttrs = 0;
1000 defaults->maxAttrs = 4;
1001 xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, defaults, NULL);
1002 } else if (defaults->nbAttrs >= defaults->maxAttrs) {
1003 xmlDefAttrsPtr temp;
1004
1005 temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
1006 (2 * defaults->maxAttrs * 4) * sizeof(const xmlChar *));
1007 if (temp == NULL)
1008 goto mem_error;
1009 defaults = temp;
1010 defaults->maxAttrs *= 2;
1011 xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, defaults, NULL);
1012 }
1013
1014 /*
1015 * Split the element name into prefix:localname , the string found
1016 * are within the DTD and hen not associated to namespace names.
1017 */
1018 name = xmlSplitQName3(fullattr, &len);
1019 if (name == NULL) {
1020 name = xmlDictLookup(ctxt->dict, fullattr, -1);
1021 prefix = NULL;
1022 } else {
1023 name = xmlDictLookup(ctxt->dict, name, -1);
1024 prefix = xmlDictLookup(ctxt->dict, fullattr, len);
1025 }
1026
1027 defaults->values[4 * defaults->nbAttrs] = name;
1028 defaults->values[4 * defaults->nbAttrs + 1] = prefix;
1029 /* intern the string and precompute the end */
1030 len = xmlStrlen(value);
1031 value = xmlDictLookup(ctxt->dict, value, len);
1032 defaults->values[4 * defaults->nbAttrs + 2] = value;
1033 defaults->values[4 * defaults->nbAttrs + 3] = value + len;
1034 defaults->nbAttrs++;
1035
1036 return;
1037
1038mem_error:
1039 xmlErrMemory(ctxt, NULL);
1040 return;
1041}
1042
1043/**
1044 * xmlAddSpecialAttr:
1045 * @ctxt: an XML parser context
1046 * @fullname: the element fullname
1047 * @fullattr: the attribute fullname
1048 * @type: the attribute type
1049 *
1050 * Register this attribute type
1051 */
1052static void
1053xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1054 const xmlChar *fullname,
1055 const xmlChar *fullattr,
1056 int type)
1057{
1058 if (ctxt->attsSpecial == NULL) {
1059 ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
1060 if (ctxt->attsSpecial == NULL)
1061 goto mem_error;
1062 }
1063
1064 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1065 return;
1066
1067 xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
1068 (void *) (long) type);
1069 return;
1070
1071mem_error:
1072 xmlErrMemory(ctxt, NULL);
1073 return;
1074}
1075
1076/**
1077 * xmlCleanSpecialAttrCallback:
1078 *
1079 * Removes CDATA attributes from the special attribute table
1080 */
1081static void
1082xmlCleanSpecialAttrCallback(void *payload, void *data,
1083 const xmlChar *fullname, const xmlChar *fullattr,
1084 const xmlChar *unused ATTRIBUTE_UNUSED) {
1085 xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1086
1087 if (((long) payload) == XML_ATTRIBUTE_CDATA) {
1088 xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1089 }
1090}
1091
1092/**
1093 * xmlCleanSpecialAttr:
1094 * @ctxt: an XML parser context
1095 *
1096 * Trim the list of attributes defined to remove all those of type
1097 * CDATA as they are not special. This call should be done when finishing
1098 * to parse the DTD and before starting to parse the document root.
1099 */
1100static void
1101xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1102{
1103 if (ctxt->attsSpecial == NULL)
1104 return;
1105
1106 xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1107
1108 if (xmlHashSize(ctxt->attsSpecial) == 0) {
1109 xmlHashFree(ctxt->attsSpecial, NULL);
1110 ctxt->attsSpecial = NULL;
1111 }
1112 return;
1113}
1114
1115/**
1116 * xmlCheckLanguageID:
1117 * @lang: pointer to the string value
1118 *
1119 * Checks that the value conforms to the LanguageID production:
1120 *
1121 * NOTE: this is somewhat deprecated, those productions were removed from
1122 * the XML Second edition.
1123 *
1124 * [33] LanguageID ::= Langcode ('-' Subcode)*
1125 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
1126 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1127 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1128 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1129 * [38] Subcode ::= ([a-z] | [A-Z])+
1130 *
1131 * Returns 1 if correct 0 otherwise
1132 **/
1133int
1134xmlCheckLanguageID(const xmlChar * lang)
1135{
1136 const xmlChar *cur = lang;
1137
1138 if (cur == NULL)
1139 return (0);
1140 if (((cur[0] == 'i') && (cur[1] == '-')) ||
1141 ((cur[0] == 'I') && (cur[1] == '-'))) {
1142 /*
1143 * IANA code
1144 */
1145 cur += 2;
1146 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
1147 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1148 cur++;
1149 } else if (((cur[0] == 'x') && (cur[1] == '-')) ||
1150 ((cur[0] == 'X') && (cur[1] == '-'))) {
1151 /*
1152 * User code
1153 */
1154 cur += 2;
1155 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
1156 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1157 cur++;
1158 } else if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1159 ((cur[0] >= 'a') && (cur[0] <= 'z'))) {
1160 /*
1161 * ISO639
1162 */
1163 cur++;
1164 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1165 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1166 cur++;
1167 else
1168 return (0);
1169 } else
1170 return (0);
1171 while (cur[0] != 0) { /* non input consuming */
1172 if (cur[0] != '-')
1173 return (0);
1174 cur++;
1175 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1176 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1177 cur++;
1178 else
1179 return (0);
1180 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
1181 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1182 cur++;
1183 }
1184 return (1);
1185}
1186
1187/************************************************************************
1188 * *
1189 * Parser stacks related functions and macros *
1190 * *
1191 ************************************************************************/
1192
1193xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
1194 const xmlChar ** str);
1195
1196#ifdef SAX2
1197/**
1198 * nsPush:
1199 * @ctxt: an XML parser context
1200 * @prefix: the namespace prefix or NULL
1201 * @URL: the namespace name
1202 *
1203 * Pushes a new parser namespace on top of the ns stack
1204 *
1205 * Returns -1 in case of error, -2 if the namespace should be discarded
1206 * and the index in the stack otherwise.
1207 */
1208static int
1209nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
1210{
1211 if (ctxt->options & XML_PARSE_NSCLEAN) {
1212 int i;
1213 for (i = 0;i < ctxt->nsNr;i += 2) {
1214 if (ctxt->nsTab[i] == prefix) {
1215 /* in scope */
1216 if (ctxt->nsTab[i + 1] == URL)
1217 return(-2);
1218 /* out of scope keep it */
1219 break;
1220 }
1221 }
1222 }
1223 if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
1224 ctxt->nsMax = 10;
1225 ctxt->nsNr = 0;
1226 ctxt->nsTab = (const xmlChar **)
1227 xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
1228 if (ctxt->nsTab == NULL) {
1229 xmlErrMemory(ctxt, NULL);
1230 ctxt->nsMax = 0;
1231 return (-1);
1232 }
1233 } else if (ctxt->nsNr >= ctxt->nsMax) {
1234 ctxt->nsMax *= 2;
1235 ctxt->nsTab = (const xmlChar **)
1236 xmlRealloc((char *) ctxt->nsTab,
1237 ctxt->nsMax * sizeof(ctxt->nsTab[0]));
1238 if (ctxt->nsTab == NULL) {
1239 xmlErrMemory(ctxt, NULL);
1240 ctxt->nsMax /= 2;
1241 return (-1);
1242 }
1243 }
1244 ctxt->nsTab[ctxt->nsNr++] = prefix;
1245 ctxt->nsTab[ctxt->nsNr++] = URL;
1246 return (ctxt->nsNr);
1247}
1248/**
1249 * nsPop:
1250 * @ctxt: an XML parser context
1251 * @nr: the number to pop
1252 *
1253 * Pops the top @nr parser prefix/namespace from the ns stack
1254 *
1255 * Returns the number of namespaces removed
1256 */
1257static int
1258nsPop(xmlParserCtxtPtr ctxt, int nr)
1259{
1260 int i;
1261
1262 if (ctxt->nsTab == NULL) return(0);
1263 if (ctxt->nsNr < nr) {
1264 xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
1265 nr = ctxt->nsNr;
1266 }
1267 if (ctxt->nsNr <= 0)
1268 return (0);
1269
1270 for (i = 0;i < nr;i++) {
1271 ctxt->nsNr--;
1272 ctxt->nsTab[ctxt->nsNr] = NULL;
1273 }
1274 return(nr);
1275}
1276#endif
1277
1278static int
1279xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1280 const xmlChar **atts;
1281 int *attallocs;
1282 int maxatts;
1283
1284 if (ctxt->atts == NULL) {
1285 maxatts = 55; /* allow for 10 attrs by default */
1286 atts = (const xmlChar **)
1287 xmlMalloc(maxatts * sizeof(xmlChar *));
1288 if (atts == NULL) goto mem_error;
1289 ctxt->atts = atts;
1290 attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
1291 if (attallocs == NULL) goto mem_error;
1292 ctxt->attallocs = attallocs;
1293 ctxt->maxatts = maxatts;
1294 } else if (nr + 5 > ctxt->maxatts) {
1295 maxatts = (nr + 5) * 2;
1296 atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
1297 maxatts * sizeof(const xmlChar *));
1298 if (atts == NULL) goto mem_error;
1299 ctxt->atts = atts;
1300 attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
1301 (maxatts / 5) * sizeof(int));
1302 if (attallocs == NULL) goto mem_error;
1303 ctxt->attallocs = attallocs;
1304 ctxt->maxatts = maxatts;
1305 }
1306 return(ctxt->maxatts);
1307mem_error:
1308 xmlErrMemory(ctxt, NULL);
1309 return(-1);
1310}
1311
1312/**
1313 * inputPush:
1314 * @ctxt: an XML parser context
1315 * @value: the parser input
1316 *
1317 * Pushes a new parser input on top of the input stack
1318 *
1319 * Returns 0 in case of error, the index in the stack otherwise
1320 */
1321int
1322inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1323{
1324 if ((ctxt == NULL) || (value == NULL))
1325 return(0);
1326 if (ctxt->inputNr >= ctxt->inputMax) {
1327 ctxt->inputMax *= 2;
1328 ctxt->inputTab =
1329 (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1330 ctxt->inputMax *
1331 sizeof(ctxt->inputTab[0]));
1332 if (ctxt->inputTab == NULL) {
1333 xmlErrMemory(ctxt, NULL);
1334 return (0);
1335 }
1336 }
1337 ctxt->inputTab[ctxt->inputNr] = value;
1338 ctxt->input = value;
1339 return (ctxt->inputNr++);
1340}
1341/**
1342 * inputPop:
1343 * @ctxt: an XML parser context
1344 *
1345 * Pops the top parser input from the input stack
1346 *
1347 * Returns the input just removed
1348 */
1349xmlParserInputPtr
1350inputPop(xmlParserCtxtPtr ctxt)
1351{
1352 xmlParserInputPtr ret;
1353
1354 if (ctxt == NULL)
1355 return(NULL);
1356 if (ctxt->inputNr <= 0)
1357 return (NULL);
1358 ctxt->inputNr--;
1359 if (ctxt->inputNr > 0)
1360 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1361 else
1362 ctxt->input = NULL;
1363 ret = ctxt->inputTab[ctxt->inputNr];
1364 ctxt->inputTab[ctxt->inputNr] = NULL;
1365 return (ret);
1366}
1367/**
1368 * nodePush:
1369 * @ctxt: an XML parser context
1370 * @value: the element node
1371 *
1372 * Pushes a new element node on top of the node stack
1373 *
1374 * Returns 0 in case of error, the index in the stack otherwise
1375 */
1376int
1377nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1378{
1379 if (ctxt == NULL) return(0);
1380 if (ctxt->nodeNr >= ctxt->nodeMax) {
1381 xmlNodePtr *tmp;
1382
1383 tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1384 ctxt->nodeMax * 2 *
1385 sizeof(ctxt->nodeTab[0]));
1386 if (tmp == NULL) {
1387 xmlErrMemory(ctxt, NULL);
1388 return (0);
1389 }
1390 ctxt->nodeTab = tmp;
1391 ctxt->nodeMax *= 2;
1392 }
1393 if (((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) {
1394 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
1395 "Excessive depth in document: change xmlParserMaxDepth = %d\n",
1396 xmlParserMaxDepth);
1397 ctxt->instate = XML_PARSER_EOF;
1398 return(0);
1399 }
1400 ctxt->nodeTab[ctxt->nodeNr] = value;
1401 ctxt->node = value;
1402 return (ctxt->nodeNr++);
1403}
1404/**
1405 * nodePop:
1406 * @ctxt: an XML parser context
1407 *
1408 * Pops the top element node from the node stack
1409 *
1410 * Returns the node just removed
1411 */
1412xmlNodePtr
1413nodePop(xmlParserCtxtPtr ctxt)
1414{
1415 xmlNodePtr ret;
1416
1417 if (ctxt == NULL) return(NULL);
1418 if (ctxt->nodeNr <= 0)
1419 return (NULL);
1420 ctxt->nodeNr--;
1421 if (ctxt->nodeNr > 0)
1422 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1423 else
1424 ctxt->node = NULL;
1425 ret = ctxt->nodeTab[ctxt->nodeNr];
1426 ctxt->nodeTab[ctxt->nodeNr] = NULL;
1427 return (ret);
1428}
1429
1430#ifdef LIBXML_PUSH_ENABLED
1431/**
1432 * nameNsPush:
1433 * @ctxt: an XML parser context
1434 * @value: the element name
1435 * @prefix: the element prefix
1436 * @URI: the element namespace name
1437 *
1438 * Pushes a new element name/prefix/URL on top of the name stack
1439 *
1440 * Returns -1 in case of error, the index in the stack otherwise
1441 */
1442static int
1443nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1444 const xmlChar *prefix, const xmlChar *URI, int nsNr)
1445{
1446 if (ctxt->nameNr >= ctxt->nameMax) {
1447 const xmlChar * *tmp;
1448 void **tmp2;
1449 ctxt->nameMax *= 2;
1450 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1451 ctxt->nameMax *
1452 sizeof(ctxt->nameTab[0]));
1453 if (tmp == NULL) {
1454 ctxt->nameMax /= 2;
1455 goto mem_error;
1456 }
1457 ctxt->nameTab = tmp;
1458 tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab,
1459 ctxt->nameMax * 3 *
1460 sizeof(ctxt->pushTab[0]));
1461 if (tmp2 == NULL) {
1462 ctxt->nameMax /= 2;
1463 goto mem_error;
1464 }
1465 ctxt->pushTab = tmp2;
1466 }
1467 ctxt->nameTab[ctxt->nameNr] = value;
1468 ctxt->name = value;
1469 ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix;
1470 ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI;
1471 ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (long) nsNr;
1472 return (ctxt->nameNr++);
1473mem_error:
1474 xmlErrMemory(ctxt, NULL);
1475 return (-1);
1476}
1477/**
1478 * nameNsPop:
1479 * @ctxt: an XML parser context
1480 *
1481 * Pops the top element/prefix/URI name from the name stack
1482 *
1483 * Returns the name just removed
1484 */
1485static const xmlChar *
1486nameNsPop(xmlParserCtxtPtr ctxt)
1487{
1488 const xmlChar *ret;
1489
1490 if (ctxt->nameNr <= 0)
1491 return (NULL);
1492 ctxt->nameNr--;
1493 if (ctxt->nameNr > 0)
1494 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1495 else
1496 ctxt->name = NULL;
1497 ret = ctxt->nameTab[ctxt->nameNr];
1498 ctxt->nameTab[ctxt->nameNr] = NULL;
1499 return (ret);
1500}
1501#endif /* LIBXML_PUSH_ENABLED */
1502
1503/**
1504 * namePush:
1505 * @ctxt: an XML parser context
1506 * @value: the element name
1507 *
1508 * Pushes a new element name on top of the name stack
1509 *
1510 * Returns -1 in case of error, the index in the stack otherwise
1511 */
1512int
1513namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
1514{
1515 if (ctxt == NULL) return (-1);
1516
1517 if (ctxt->nameNr >= ctxt->nameMax) {
1518 const xmlChar * *tmp;
1519 ctxt->nameMax *= 2;
1520 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1521 ctxt->nameMax *
1522 sizeof(ctxt->nameTab[0]));
1523 if (tmp == NULL) {
1524 ctxt->nameMax /= 2;
1525 goto mem_error;
1526 }
1527 ctxt->nameTab = tmp;
1528 }
1529 ctxt->nameTab[ctxt->nameNr] = value;
1530 ctxt->name = value;
1531 return (ctxt->nameNr++);
1532mem_error:
1533 xmlErrMemory(ctxt, NULL);
1534 return (-1);
1535}
1536/**
1537 * namePop:
1538 * @ctxt: an XML parser context
1539 *
1540 * Pops the top element name from the name stack
1541 *
1542 * Returns the name just removed
1543 */
1544const xmlChar *
1545namePop(xmlParserCtxtPtr ctxt)
1546{
1547 const xmlChar *ret;
1548
1549 if ((ctxt == NULL) || (ctxt->nameNr <= 0))
1550 return (NULL);
1551 ctxt->nameNr--;
1552 if (ctxt->nameNr > 0)
1553 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1554 else
1555 ctxt->name = NULL;
1556 ret = ctxt->nameTab[ctxt->nameNr];
1557 ctxt->nameTab[ctxt->nameNr] = NULL;
1558 return (ret);
1559}
1560
1561static int spacePush(xmlParserCtxtPtr ctxt, int val) {
1562 if (ctxt->spaceNr >= ctxt->spaceMax) {
1563 ctxt->spaceMax *= 2;
1564 ctxt->spaceTab = (int *) xmlRealloc(ctxt->spaceTab,
1565 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
1566 if (ctxt->spaceTab == NULL) {
1567 xmlErrMemory(ctxt, NULL);
1568 return(0);
1569 }
1570 }
1571 ctxt->spaceTab[ctxt->spaceNr] = val;
1572 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
1573 return(ctxt->spaceNr++);
1574}
1575
1576static int spacePop(xmlParserCtxtPtr ctxt) {
1577 int ret;
1578 if (ctxt->spaceNr <= 0) return(0);
1579 ctxt->spaceNr--;
1580 if (ctxt->spaceNr > 0)
1581 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
1582 else
1583 ctxt->space = &ctxt->spaceTab[0];
1584 ret = ctxt->spaceTab[ctxt->spaceNr];
1585 ctxt->spaceTab[ctxt->spaceNr] = -1;
1586 return(ret);
1587}
1588
1589/*
1590 * Macros for accessing the content. Those should be used only by the parser,
1591 * and not exported.
1592 *
1593 * Dirty macros, i.e. one often need to make assumption on the context to
1594 * use them
1595 *
1596 * CUR_PTR return the current pointer to the xmlChar to be parsed.
1597 * To be used with extreme caution since operations consuming
1598 * characters may move the input buffer to a different location !
1599 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
1600 * This should be used internally by the parser
1601 * only to compare to ASCII values otherwise it would break when
1602 * running with UTF-8 encoding.
1603 * RAW same as CUR but in the input buffer, bypass any token
1604 * extraction that may have been done
1605 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
1606 * to compare on ASCII based substring.
1607 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
1608 * strings without newlines within the parser.
1609 * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
1610 * defined char within the parser.
1611 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
1612 *
1613 * NEXT Skip to the next character, this does the proper decoding
1614 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
1615 * NEXTL(l) Skip the current unicode character of l xmlChars long.
1616 * CUR_CHAR(l) returns the current unicode character (int), set l
1617 * to the number of xmlChars used for the encoding [0-5].
1618 * CUR_SCHAR same but operate on a string instead of the context
1619 * COPY_BUF copy the current unicode char to the target buffer, increment
1620 * the index
1621 * GROW, SHRINK handling of input buffers
1622 */
1623
1624#define RAW (*ctxt->input->cur)
1625#define CUR (*ctxt->input->cur)
1626#define NXT(val) ctxt->input->cur[(val)]
1627#define CUR_PTR ctxt->input->cur
1628
1629#define CMP4( s, c1, c2, c3, c4 ) \
1630 ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
1631 ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
1632#define CMP5( s, c1, c2, c3, c4, c5 ) \
1633 ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
1634#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
1635 ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
1636#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
1637 ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
1638#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
1639 ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
1640#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
1641 ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
1642 ((unsigned char *) s)[ 8 ] == c9 )
1643#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
1644 ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
1645 ((unsigned char *) s)[ 9 ] == c10 )
1646
1647#define SKIP(val) do { \
1648 ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \
1649 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
1650 if ((*ctxt->input->cur == 0) && \
1651 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1652 xmlPopInput(ctxt); \
1653 } while (0)
1654
1655#define SKIPL(val) do { \
1656 int skipl; \
1657 for(skipl=0; skipl<val; skipl++) { \
1658 if (*(ctxt->input->cur) == '\n') { \
1659 ctxt->input->line++; ctxt->input->col = 1; \
1660 } else ctxt->input->col++; \
1661 ctxt->nbChars++; \
1662 ctxt->input->cur++; \
1663 } \
1664 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
1665 if ((*ctxt->input->cur == 0) && \
1666 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1667 xmlPopInput(ctxt); \
1668 } while (0)
1669
1670#define SHRINK if ((ctxt->progressive == 0) && \
1671 (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
1672 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
1673 xmlSHRINK (ctxt);
1674
1675static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
1676 xmlParserInputShrink(ctxt->input);
1677 if ((*ctxt->input->cur == 0) &&
1678 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1679 xmlPopInput(ctxt);
1680 }
1681
1682#define GROW if ((ctxt->progressive == 0) && \
1683 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
1684 xmlGROW (ctxt);
1685
1686static void xmlGROW (xmlParserCtxtPtr ctxt) {
1687 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1688 if ((*ctxt->input->cur == 0) &&
1689 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1690 xmlPopInput(ctxt);
1691}
1692
1693#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
1694
1695#define NEXT xmlNextChar(ctxt)
1696
1697#define NEXT1 { \
1698 ctxt->input->col++; \
1699 ctxt->input->cur++; \
1700 ctxt->nbChars++; \
1701 if (*ctxt->input->cur == 0) \
1702 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
1703 }
1704
1705#define NEXTL(l) do { \
1706 if (*(ctxt->input->cur) == '\n') { \
1707 ctxt->input->line++; ctxt->input->col = 1; \
1708 } else ctxt->input->col++; \
1709 ctxt->input->cur += l; \
1710 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
1711 } while (0)
1712
1713#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
1714#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
1715
1716#define COPY_BUF(l,b,i,v) \
1717 if (l == 1) b[i++] = (xmlChar) v; \
1718 else i += xmlCopyCharMultiByte(&b[i],v)
1719
1720/**
1721 * xmlSkipBlankChars:
1722 * @ctxt: the XML parser context
1723 *
1724 * skip all blanks character found at that point in the input streams.
1725 * It pops up finished entities in the process if allowable at that point.
1726 *
1727 * Returns the number of space chars skipped
1728 */
1729
1730int
1731xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
1732 int res = 0;
1733
1734 /*
1735 * It's Okay to use CUR/NEXT here since all the blanks are on
1736 * the ASCII range.
1737 */
1738 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
1739 const xmlChar *cur;
1740 /*
1741 * if we are in the document content, go really fast
1742 */
1743 cur = ctxt->input->cur;
1744 while (IS_BLANK_CH(*cur)) {
1745 if (*cur == '\n') {
1746 ctxt->input->line++; ctxt->input->col = 1;
1747 }
1748 cur++;
1749 res++;
1750 if (*cur == 0) {
1751 ctxt->input->cur = cur;
1752 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1753 cur = ctxt->input->cur;
1754 }
1755 }
1756 ctxt->input->cur = cur;
1757 } else {
1758 int cur;
1759 do {
1760 cur = CUR;
1761 while (IS_BLANK_CH(cur)) { /* CHECKED tstblanks.xml */
1762 NEXT;
1763 cur = CUR;
1764 res++;
1765 }
1766 while ((cur == 0) && (ctxt->inputNr > 1) &&
1767 (ctxt->instate != XML_PARSER_COMMENT)) {
1768 xmlPopInput(ctxt);
1769 cur = CUR;
1770 }
1771 /*
1772 * Need to handle support of entities branching here
1773 */
1774 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
1775 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
1776 }
1777 return(res);
1778}
1779
1780/************************************************************************
1781 * *
1782 * Commodity functions to handle entities *
1783 * *
1784 ************************************************************************/
1785
1786/**
1787 * xmlPopInput:
1788 * @ctxt: an XML parser context
1789 *
1790 * xmlPopInput: the current input pointed by ctxt->input came to an end
1791 * pop it and return the next char.
1792 *
1793 * Returns the current xmlChar in the parser context
1794 */
1795xmlChar
1796xmlPopInput(xmlParserCtxtPtr ctxt) {
1797 if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
1798 if (xmlParserDebugEntities)
1799 xmlGenericError(xmlGenericErrorContext,
1800 "Popping input %d\n", ctxt->inputNr);
1801 xmlFreeInputStream(inputPop(ctxt));
1802 if ((*ctxt->input->cur == 0) &&
1803 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1804 return(xmlPopInput(ctxt));
1805 return(CUR);
1806}
1807
1808/**
1809 * xmlPushInput:
1810 * @ctxt: an XML parser context
1811 * @input: an XML parser input fragment (entity, XML fragment ...).
1812 *
1813 * xmlPushInput: switch to a new input stream which is stacked on top
1814 * of the previous one(s).
1815 */
1816void
1817xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
1818 if (input == NULL) return;
1819
1820 if (xmlParserDebugEntities) {
1821 if ((ctxt->input != NULL) && (ctxt->input->filename))
1822 xmlGenericError(xmlGenericErrorContext,
1823 "%s(%d): ", ctxt->input->filename,
1824 ctxt->input->line);
1825 xmlGenericError(xmlGenericErrorContext,
1826 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
1827 }
1828 inputPush(ctxt, input);
1829 GROW;
1830}
1831
1832/**
1833 * xmlParseCharRef:
1834 * @ctxt: an XML parser context
1835 *
1836 * parse Reference declarations
1837 *
1838 * [66] CharRef ::= '&#' [0-9]+ ';' |
1839 * '&#x' [0-9a-fA-F]+ ';'
1840 *
1841 * [ WFC: Legal Character ]
1842 * Characters referred to using character references must match the
1843 * production for Char.
1844 *
1845 * Returns the value parsed (as an int), 0 in case of error
1846 */
1847int
1848xmlParseCharRef(xmlParserCtxtPtr ctxt) {
1849 unsigned int val = 0;
1850 int count = 0;
1851 unsigned int outofrange = 0;
1852
1853 /*
1854 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
1855 */
1856 if ((RAW == '&') && (NXT(1) == '#') &&
1857 (NXT(2) == 'x')) {
1858 SKIP(3);
1859 GROW;
1860 while (RAW != ';') { /* loop blocked by count */
1861 if (count++ > 20) {
1862 count = 0;
1863 GROW;
1864 }
1865 if ((RAW >= '0') && (RAW <= '9'))
1866 val = val * 16 + (CUR - '0');
1867 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
1868 val = val * 16 + (CUR - 'a') + 10;
1869 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
1870 val = val * 16 + (CUR - 'A') + 10;
1871 else {
1872 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
1873 val = 0;
1874 break;
1875 }
1876 if (val > 0x10FFFF)
1877 outofrange = val;
1878
1879 NEXT;
1880 count++;
1881 }
1882 if (RAW == ';') {
1883 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
1884 ctxt->input->col++;
1885 ctxt->nbChars ++;
1886 ctxt->input->cur++;
1887 }
1888 } else if ((RAW == '&') && (NXT(1) == '#')) {
1889 SKIP(2);
1890 GROW;
1891 while (RAW != ';') { /* loop blocked by count */
1892 if (count++ > 20) {
1893 count = 0;
1894 GROW;
1895 }
1896 if ((RAW >= '0') && (RAW <= '9'))
1897 val = val * 10 + (CUR - '0');
1898 else {
1899 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
1900 val = 0;
1901 break;
1902 }
1903 if (val > 0x10FFFF)
1904 outofrange = val;
1905
1906 NEXT;
1907 count++;
1908 }
1909 if (RAW == ';') {
1910 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
1911 ctxt->input->col++;
1912 ctxt->nbChars ++;
1913 ctxt->input->cur++;
1914 }
1915 } else {
1916 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
1917 }
1918
1919 /*
1920 * [ WFC: Legal Character ]
1921 * Characters referred to using character references must match the
1922 * production for Char.
1923 */
1924 if ((IS_CHAR(val) && (outofrange == 0))) {
1925 return(val);
1926 } else {
1927 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
1928 "xmlParseCharRef: invalid xmlChar value %d\n",
1929 val);
1930 }
1931 return(0);
1932}
1933
1934/**
1935 * xmlParseStringCharRef:
1936 * @ctxt: an XML parser context
1937 * @str: a pointer to an index in the string
1938 *
1939 * parse Reference declarations, variant parsing from a string rather
1940 * than an an input flow.
1941 *
1942 * [66] CharRef ::= '&#' [0-9]+ ';' |
1943 * '&#x' [0-9a-fA-F]+ ';'
1944 *
1945 * [ WFC: Legal Character ]
1946 * Characters referred to using character references must match the
1947 * production for Char.
1948 *
1949 * Returns the value parsed (as an int), 0 in case of error, str will be
1950 * updated to the current value of the index
1951 */
1952static int
1953xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
1954 const xmlChar *ptr;
1955 xmlChar cur;
1956 unsigned int val = 0;
1957 unsigned int outofrange = 0;
1958
1959 if ((str == NULL) || (*str == NULL)) return(0);
1960 ptr = *str;
1961 cur = *ptr;
1962 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
1963 ptr += 3;
1964 cur = *ptr;
1965 while (cur != ';') { /* Non input consuming loop */
1966 if ((cur >= '0') && (cur <= '9'))
1967 val = val * 16 + (cur - '0');
1968 else if ((cur >= 'a') && (cur <= 'f'))
1969 val = val * 16 + (cur - 'a') + 10;
1970 else if ((cur >= 'A') && (cur <= 'F'))
1971 val = val * 16 + (cur - 'A') + 10;
1972 else {
1973 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
1974 val = 0;
1975 break;
1976 }
1977 if (val > 0x10FFFF)
1978 outofrange = val;
1979
1980 ptr++;
1981 cur = *ptr;
1982 }
1983 if (cur == ';')
1984 ptr++;
1985 } else if ((cur == '&') && (ptr[1] == '#')){
1986 ptr += 2;
1987 cur = *ptr;
1988 while (cur != ';') { /* Non input consuming loops */
1989 if ((cur >= '0') && (cur <= '9'))
1990 val = val * 10 + (cur - '0');
1991 else {
1992 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
1993 val = 0;
1994 break;
1995 }
1996 if (val > 0x10FFFF)
1997 outofrange = val;
1998
1999 ptr++;
2000 cur = *ptr;
2001 }
2002 if (cur == ';')
2003 ptr++;
2004 } else {
2005 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2006 return(0);
2007 }
2008 *str = ptr;
2009
2010 /*
2011 * [ WFC: Legal Character ]
2012 * Characters referred to using character references must match the
2013 * production for Char.
2014 */
2015 if ((IS_CHAR(val) && (outofrange == 0))) {
2016 return(val);
2017 } else {
2018 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2019 "xmlParseStringCharRef: invalid xmlChar value %d\n",
2020 val);
2021 }
2022 return(0);
2023}
2024
2025/**
2026 * xmlNewBlanksWrapperInputStream:
2027 * @ctxt: an XML parser context
2028 * @entity: an Entity pointer
2029 *
2030 * Create a new input stream for wrapping
2031 * blanks around a PEReference
2032 *
2033 * Returns the new input stream or NULL
2034 */
2035
2036static void deallocblankswrapper (xmlChar *str) {xmlFree(str);}
2037
2038static xmlParserInputPtr
2039xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
2040 xmlParserInputPtr input;
2041 xmlChar *buffer;
2042 size_t length;
2043 if (entity == NULL) {
2044 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
2045 "xmlNewBlanksWrapperInputStream entity\n");
2046 return(NULL);
2047 }
2048 if (xmlParserDebugEntities)
2049 xmlGenericError(xmlGenericErrorContext,
2050 "new blanks wrapper for entity: %s\n", entity->name);
2051 input = xmlNewInputStream(ctxt);
2052 if (input == NULL) {
2053 return(NULL);
2054 }
2055 length = xmlStrlen(entity->name) + 5;
2056 buffer = xmlMallocAtomic(length);
2057 if (buffer == NULL) {
2058 xmlErrMemory(ctxt, NULL);
2059 return(NULL);
2060 }
2061 buffer [0] = ' ';
2062 buffer [1] = '%';
2063 buffer [length-3] = ';';
2064 buffer [length-2] = ' ';
2065 buffer [length-1] = 0;
2066 memcpy(buffer + 2, entity->name, length - 5);
2067 input->free = deallocblankswrapper;
2068 input->base = buffer;
2069 input->cur = buffer;
2070 input->length = length;
2071 input->end = &buffer[length];
2072 return(input);
2073}
2074
2075/**
2076 * xmlParserHandlePEReference:
2077 * @ctxt: the parser context
2078 *
2079 * [69] PEReference ::= '%' Name ';'
2080 *
2081 * [ WFC: No Recursion ]
2082 * A parsed entity must not contain a recursive
2083 * reference to itself, either directly or indirectly.
2084 *
2085 * [ WFC: Entity Declared ]
2086 * In a document without any DTD, a document with only an internal DTD
2087 * subset which contains no parameter entity references, or a document
2088 * with "standalone='yes'", ... ... The declaration of a parameter
2089 * entity must precede any reference to it...
2090 *
2091 * [ VC: Entity Declared ]
2092 * In a document with an external subset or external parameter entities
2093 * with "standalone='no'", ... ... The declaration of a parameter entity
2094 * must precede any reference to it...
2095 *
2096 * [ WFC: In DTD ]
2097 * Parameter-entity references may only appear in the DTD.
2098 * NOTE: misleading but this is handled.
2099 *
2100 * A PEReference may have been detected in the current input stream
2101 * the handling is done accordingly to
2102 * http://www.w3.org/TR/REC-xml#entproc
2103 * i.e.
2104 * - Included in literal in entity values
2105 * - Included as Parameter Entity reference within DTDs
2106 */
2107void
2108xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
2109 const xmlChar *name;
2110 xmlEntityPtr entity = NULL;
2111 xmlParserInputPtr input;
2112
2113 if (RAW != '%') return;
2114 switch(ctxt->instate) {
2115 case XML_PARSER_CDATA_SECTION:
2116 return;
2117 case XML_PARSER_COMMENT:
2118 return;
2119 case XML_PARSER_START_TAG:
2120 return;
2121 case XML_PARSER_END_TAG:
2122 return;
2123 case XML_PARSER_EOF:
2124 xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
2125 return;
2126 case XML_PARSER_PROLOG:
2127 case XML_PARSER_START:
2128 case XML_PARSER_MISC:
2129 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
2130 return;
2131 case XML_PARSER_ENTITY_DECL:
2132 case XML_PARSER_CONTENT:
2133 case XML_PARSER_ATTRIBUTE_VALUE:
2134 case XML_PARSER_PI:
2135 case XML_PARSER_SYSTEM_LITERAL:
2136 case XML_PARSER_PUBLIC_LITERAL:
2137 /* we just ignore it there */
2138 return;
2139 case XML_PARSER_EPILOG:
2140 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
2141 return;
2142 case XML_PARSER_ENTITY_VALUE:
2143 /*
2144 * NOTE: in the case of entity values, we don't do the
2145 * substitution here since we need the literal
2146 * entity value to be able to save the internal
2147 * subset of the document.
2148 * This will be handled by xmlStringDecodeEntities
2149 */
2150 return;
2151 case XML_PARSER_DTD:
2152 /*
2153 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
2154 * In the internal DTD subset, parameter-entity references
2155 * can occur only where markup declarations can occur, not
2156 * within markup declarations.
2157 * In that case this is handled in xmlParseMarkupDecl
2158 */
2159 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
2160 return;
2161 if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
2162 return;
2163 break;
2164 case XML_PARSER_IGNORE:
2165 return;
2166 }
2167
2168 NEXT;
2169 name = xmlParseName(ctxt);
2170 if (xmlParserDebugEntities)
2171 xmlGenericError(xmlGenericErrorContext,
2172 "PEReference: %s\n", name);
2173 if (name == NULL) {
2174 xmlFatalErr(ctxt, XML_ERR_PEREF_NO_NAME, NULL);
2175 } else {
2176 if (RAW == ';') {
2177 NEXT;
2178 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
2179 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
2180 if (entity == NULL) {
2181
2182 /*
2183 * [ WFC: Entity Declared ]
2184 * In a document without any DTD, a document with only an
2185 * internal DTD subset which contains no parameter entity
2186 * references, or a document with "standalone='yes'", ...
2187 * ... The declaration of a parameter entity must precede
2188 * any reference to it...
2189 */
2190 if ((ctxt->standalone == 1) ||
2191 ((ctxt->hasExternalSubset == 0) &&
2192 (ctxt->hasPErefs == 0))) {
2193 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
2194 "PEReference: %%%s; not found\n", name);
2195 } else {
2196 /*
2197 * [ VC: Entity Declared ]
2198 * In a document with an external subset or external
2199 * parameter entities with "standalone='no'", ...
2200 * ... The declaration of a parameter entity must precede
2201 * any reference to it...
2202 */
2203 if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
2204 xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
2205 "PEReference: %%%s; not found\n",
2206 name);
2207 } else
2208 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
2209 "PEReference: %%%s; not found\n",
2210 name, NULL);
2211 ctxt->valid = 0;
2212 }
2213 } else if (ctxt->input->free != deallocblankswrapper) {
2214 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
2215 xmlPushInput(ctxt, input);
2216 } else {
2217 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
2218 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
2219 xmlChar start[4];
2220 xmlCharEncoding enc;
2221
2222 /*
2223 * handle the extra spaces added before and after
2224 * c.f. http://www.w3.org/TR/REC-xml#as-PE
2225 * this is done independently.
2226 */
2227 input = xmlNewEntityInputStream(ctxt, entity);
2228 xmlPushInput(ctxt, input);
2229
2230 /*
2231 * Get the 4 first bytes and decode the charset
2232 * if enc != XML_CHAR_ENCODING_NONE
2233 * plug some encoding conversion routines.
2234 * Note that, since we may have some non-UTF8
2235 * encoding (like UTF16, bug 135229), the 'length'
2236 * is not known, but we can calculate based upon
2237 * the amount of data in the buffer.
2238 */
2239 GROW
2240 if ((ctxt->input->end - ctxt->input->cur)>=4) {
2241 start[0] = RAW;
2242 start[1] = NXT(1);
2243 start[2] = NXT(2);
2244 start[3] = NXT(3);
2245 enc = xmlDetectCharEncoding(start, 4);
2246 if (enc != XML_CHAR_ENCODING_NONE) {
2247 xmlSwitchEncoding(ctxt, enc);
2248 }
2249 }
2250
2251 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
2252 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l' )) &&
2253 (IS_BLANK_CH(NXT(5)))) {
2254 xmlParseTextDecl(ctxt);
2255 }
2256 } else {
2257 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
2258 "PEReference: %s is not a parameter entity\n",
2259 name);
2260 }
2261 }
2262 } else {
2263 xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
2264 }
2265 }
2266}
2267
2268/*
2269 * Macro used to grow the current buffer.
2270 */
2271#define growBuffer(buffer, n) { \
2272 xmlChar *tmp; \
2273 size_t new_size = buffer##_size * 2 + n; \
2274 if (new_size < buffer##_size) goto mem_error; \
2275 tmp = (xmlChar *) xmlRealloc(buffer, new_size); \
2276 if (tmp == NULL) goto mem_error; \
2277 buffer = tmp; \
2278 buffer##_size = new_size; \
2279}
2280
2281/**
2282 * xmlStringLenDecodeEntities:
2283 * @ctxt: the parser context
2284 * @str: the input string
2285 * @len: the string length
2286 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2287 * @end: an end marker xmlChar, 0 if none
2288 * @end2: an end marker xmlChar, 0 if none
2289 * @end3: an end marker xmlChar, 0 if none
2290 *
2291 * Takes a entity string content and process to do the adequate substitutions.
2292 *
2293 * [67] Reference ::= EntityRef | CharRef
2294 *
2295 * [69] PEReference ::= '%' Name ';'
2296 *
2297 * Returns A newly allocated string with the substitution done. The caller
2298 * must deallocate it !
2299 */
2300xmlChar *
2301xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2302 int what, xmlChar end, xmlChar end2, xmlChar end3) {
2303 xmlChar *buffer = NULL;
2304 size_t buffer_size = 0;
2305 size_t nbchars = 0;
2306
2307 xmlChar *current = NULL;
2308 const xmlChar *last;
2309 xmlEntityPtr ent;
2310 int c,l;
2311
2312 if ((ctxt == NULL) || (str == NULL) || (len < 0))
2313 return(NULL);
2314 last = str + len;
2315
2316 if (ctxt->depth > 40) {
2317 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2318 return(NULL);
2319 }
2320
2321 /*
2322 * allocate a translation buffer.
2323 */
2324 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
2325 buffer = (xmlChar *) xmlMallocAtomic(buffer_size);
2326 if (buffer == NULL) goto mem_error;
2327
2328 /*
2329 * OK loop until we reach one of the ending char or a size limit.
2330 * we are operating on already parsed values.
2331 */
2332 if (str < last)
2333 c = CUR_SCHAR(str, l);
2334 else
2335 c = 0;
2336 while ((c != 0) && (c != end) && /* non input consuming loop */
2337 (c != end2) && (c != end3)) {
2338
2339 if (c == 0) break;
2340 if ((c == '&') && (str[1] == '#')) {
2341 int val = xmlParseStringCharRef(ctxt, &str);
2342 if (val != 0) {
2343 COPY_BUF(0,buffer,nbchars,val);
2344 }
2345 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2346 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2347 }
2348 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
2349 if (xmlParserDebugEntities)
2350 xmlGenericError(xmlGenericErrorContext,
2351 "String decoding Entity Reference: %.30s\n",
2352 str);
2353 ent = xmlParseStringEntityRef(ctxt, &str);
2354 if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
2355 goto int_error;
2356 if (ent != NULL)
2357 ctxt->nbentities += ent->owner;
2358 if ((ent != NULL) &&
2359 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2360 if (ent->content != NULL) {
2361 COPY_BUF(0,buffer,nbchars,ent->content[0]);
2362 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2363 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2364 }
2365 } else {
2366 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
2367 "predefined entity has no content\n");
2368 }
2369 } else if ((ent != NULL) && (ent->content != NULL)) {
2370 xmlChar *rep;
2371
2372 ctxt->depth++;
2373 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2374 0, 0, 0);
2375 ctxt->depth--;
2376 if (rep != NULL) {
2377 current = rep;
2378 while (*current != 0) { /* non input consuming loop */
2379 buffer[nbchars++] = *current++;
2380 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2381 if (xmlParserEntityCheck(ctxt, nbchars, ent)) {
2382 xmlFree(rep);
2383 goto int_error;
2384 }
2385 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2386 }
2387 }
2388 xmlFree(rep);
2389 }
2390 } else if (ent != NULL) {
2391 int i = xmlStrlen(ent->name);
2392 const xmlChar *cur = ent->name;
2393
2394 buffer[nbchars++] = '&';
2395 if (nbchars + i + XML_PARSER_BUFFER_SIZE > buffer_size) {
2396 growBuffer(buffer, i + XML_PARSER_BUFFER_SIZE);
2397 }
2398 for (;i > 0;i--)
2399 buffer[nbchars++] = *cur++;
2400 buffer[nbchars++] = ';';
2401 }
2402 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2403 if (xmlParserDebugEntities)
2404 xmlGenericError(xmlGenericErrorContext,
2405 "String decoding PE Reference: %.30s\n", str);
2406 ent = xmlParseStringPEReference(ctxt, &str);
2407 if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
2408 goto int_error;
2409 if (ent != NULL)
2410 ctxt->nbentities += ent->owner;
2411 if (ent != NULL) {
2412 xmlChar *rep;
2413
2414 ctxt->depth++;
2415 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2416 0, 0, 0);
2417 ctxt->depth--;
2418 if (rep != NULL) {
2419 current = rep;
2420 while (*current != 0) { /* non input consuming loop */
2421 buffer[nbchars++] = *current++;
2422 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2423 if (xmlParserEntityCheck(ctxt, nbchars, ent)) {
2424 xmlFree(rep);
2425 goto int_error;
2426 }
2427 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2428 }
2429 }
2430 xmlFree(rep);
2431 }
2432 }
2433 } else {
2434 COPY_BUF(l,buffer,nbchars,c);
2435 str += l;
2436 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2437 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2438 }
2439 }
2440 if (str < last)
2441 c = CUR_SCHAR(str, l);
2442 else
2443 c = 0;
2444 }
2445 buffer[nbchars++] = 0;
2446 return(buffer);
2447
2448mem_error:
2449 xmlErrMemory(ctxt, NULL);
2450int_error:
2451 if (buffer != NULL)
2452 xmlFree(buffer);
2453 return(NULL);
2454}
2455
2456/**
2457 * xmlStringDecodeEntities:
2458 * @ctxt: the parser context
2459 * @str: the input string
2460 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2461 * @end: an end marker xmlChar, 0 if none
2462 * @end2: an end marker xmlChar, 0 if none
2463 * @end3: an end marker xmlChar, 0 if none
2464 *
2465 * Takes a entity string content and process to do the adequate substitutions.
2466 *
2467 * [67] Reference ::= EntityRef | CharRef
2468 *
2469 * [69] PEReference ::= '%' Name ';'
2470 *
2471 * Returns A newly allocated string with the substitution done. The caller
2472 * must deallocate it !
2473 */
2474xmlChar *
2475xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2476 xmlChar end, xmlChar end2, xmlChar end3) {
2477 if ((ctxt == NULL) || (str == NULL)) return(NULL);
2478 return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what,
2479 end, end2, end3));
2480}
2481
2482/************************************************************************
2483 * *
2484 * Commodity functions, cleanup needed ? *
2485 * *
2486 ************************************************************************/
2487
2488/**
2489 * areBlanks:
2490 * @ctxt: an XML parser context
2491 * @str: a xmlChar *
2492 * @len: the size of @str
2493 * @blank_chars: we know the chars are blanks
2494 *
2495 * Is this a sequence of blank chars that one can ignore ?
2496 *
2497 * Returns 1 if ignorable 0 otherwise.
2498 */
2499
2500static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2501 int blank_chars) {
2502 int i, ret;
2503 xmlNodePtr lastChild;
2504
2505 /*
2506 * Don't spend time trying to differentiate them, the same callback is
2507 * used !
2508 */
2509 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
2510 return(0);
2511
2512 /*
2513 * Check for xml:space value.
2514 */
2515 if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2516 (*(ctxt->space) == -2))
2517 return(0);
2518
2519 /*
2520 * Check that the string is made of blanks
2521 */
2522 if (blank_chars == 0) {
2523 for (i = 0;i < len;i++)
2524 if (!(IS_BLANK_CH(str[i]))) return(0);
2525 }
2526
2527 /*
2528 * Look if the element is mixed content in the DTD if available
2529 */
2530 if (ctxt->node == NULL) return(0);
2531 if (ctxt->myDoc != NULL) {
2532 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2533 if (ret == 0) return(1);
2534 if (ret == 1) return(0);
2535 }
2536
2537 /*
2538 * Otherwise, heuristic :-\
2539 */
2540 if ((RAW != '<') && (RAW != 0xD)) return(0);
2541 if ((ctxt->node->children == NULL) &&
2542 (RAW == '<') && (NXT(1) == '/')) return(0);
2543
2544 lastChild = xmlGetLastChild(ctxt->node);
2545 if (lastChild == NULL) {
2546 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2547 (ctxt->node->content != NULL)) return(0);
2548 } else if (xmlNodeIsText(lastChild))
2549 return(0);
2550 else if ((ctxt->node->children != NULL) &&
2551 (xmlNodeIsText(ctxt->node->children)))
2552 return(0);
2553 return(1);
2554}
2555
2556/************************************************************************
2557 * *
2558 * Extra stuff for namespace support *
2559 * Relates to http://www.w3.org/TR/WD-xml-names *
2560 * *
2561 ************************************************************************/
2562
2563/**
2564 * xmlSplitQName:
2565 * @ctxt: an XML parser context
2566 * @name: an XML parser context
2567 * @prefix: a xmlChar **
2568 *
2569 * parse an UTF8 encoded XML qualified name string
2570 *
2571 * [NS 5] QName ::= (Prefix ':')? LocalPart
2572 *
2573 * [NS 6] Prefix ::= NCName
2574 *
2575 * [NS 7] LocalPart ::= NCName
2576 *
2577 * Returns the local part, and prefix is updated
2578 * to get the Prefix if any.
2579 */
2580
2581xmlChar *
2582xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2583 xmlChar buf[XML_MAX_NAMELEN + 5];
2584 xmlChar *buffer = NULL;
2585 int len = 0;
2586 int max = XML_MAX_NAMELEN;
2587 xmlChar *ret = NULL;
2588 const xmlChar *cur = name;
2589 int c;
2590
2591 if (prefix == NULL) return(NULL);
2592 *prefix = NULL;
2593
2594 if (cur == NULL) return(NULL);
2595
2596#ifndef XML_XML_NAMESPACE
2597 /* xml: prefix is not really a namespace */
2598 if ((cur[0] == 'x') && (cur[1] == 'm') &&
2599 (cur[2] == 'l') && (cur[3] == ':'))
2600 return(xmlStrdup(name));
2601#endif
2602
2603 /* nasty but well=formed */
2604 if (cur[0] == ':')
2605 return(xmlStrdup(name));
2606
2607 c = *cur++;
2608 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2609 buf[len++] = c;
2610 c = *cur++;
2611 }
2612 if (len >= max) {
2613 /*
2614 * Okay someone managed to make a huge name, so he's ready to pay
2615 * for the processing speed.
2616 */
2617 max = len * 2;
2618
2619 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
2620 if (buffer == NULL) {
2621 xmlErrMemory(ctxt, NULL);
2622 return(NULL);
2623 }
2624 memcpy(buffer, buf, len);
2625 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
2626 if (len + 10 > max) {
2627 xmlChar *tmp;
2628
2629 max *= 2;
2630 tmp = (xmlChar *) xmlRealloc(buffer,
2631 max * sizeof(xmlChar));
2632 if (tmp == NULL) {
2633 xmlFree(tmp);
2634 xmlErrMemory(ctxt, NULL);
2635 return(NULL);
2636 }
2637 buffer = tmp;
2638 }
2639 buffer[len++] = c;
2640 c = *cur++;
2641 }
2642 buffer[len] = 0;
2643 }
2644
2645 if ((c == ':') && (*cur == 0)) {
2646 if (buffer != NULL)
2647 xmlFree(buffer);
2648 *prefix = NULL;
2649 return(xmlStrdup(name));
2650 }
2651
2652 if (buffer == NULL)
2653 ret = xmlStrndup(buf, len);
2654 else {
2655 ret = buffer;
2656 buffer = NULL;
2657 max = XML_MAX_NAMELEN;
2658 }
2659
2660
2661 if (c == ':') {
2662 c = *cur;
2663 *prefix = ret;
2664 if (c == 0) {
2665 return(xmlStrndup(BAD_CAST "", 0));
2666 }
2667 len = 0;
2668
2669 /*
2670 * Check that the first character is proper to start
2671 * a new name
2672 */
2673 if (!(((c >= 0x61) && (c <= 0x7A)) ||
2674 ((c >= 0x41) && (c <= 0x5A)) ||
2675 (c == '_') || (c == ':'))) {
2676 int l;
2677 int first = CUR_SCHAR(cur, l);
2678
2679 if (!IS_LETTER(first) && (first != '_')) {
2680 xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
2681 "Name %s is not XML Namespace compliant\n",
2682 name);
2683 }
2684 }
2685 cur++;
2686
2687 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
2688 buf[len++] = c;
2689 c = *cur++;
2690 }
2691 if (len >= max) {
2692 /*
2693 * Okay someone managed to make a huge name, so he's ready to pay
2694 * for the processing speed.
2695 */
2696 max = len * 2;
2697
2698 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
2699 if (buffer == NULL) {
2700 xmlErrMemory(ctxt, NULL);
2701 return(NULL);
2702 }
2703 memcpy(buffer, buf, len);
2704 while (c != 0) { /* tested bigname2.xml */
2705 if (len + 10 > max) {
2706 xmlChar *tmp;
2707
2708 max *= 2;
2709 tmp = (xmlChar *) xmlRealloc(buffer,
2710 max * sizeof(xmlChar));
2711 if (tmp == NULL) {
2712 xmlErrMemory(ctxt, NULL);
2713 xmlFree(buffer);
2714 return(NULL);
2715 }
2716 buffer = tmp;
2717 }
2718 buffer[len++] = c;
2719 c = *cur++;
2720 }
2721 buffer[len] = 0;
2722 }
2723
2724 if (buffer == NULL)
2725 ret = xmlStrndup(buf, len);
2726 else {
2727 ret = buffer;
2728 }
2729 }
2730
2731 return(ret);
2732}
2733
2734/************************************************************************
2735 * *
2736 * The parser itself *
2737 * Relates to http://www.w3.org/TR/REC-xml *
2738 * *
2739 ************************************************************************/
2740
2741static const xmlChar * xmlParseNameComplex(xmlParserCtxtPtr ctxt);
2742static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
2743 int *len, int *alloc, int normalize);
2744
2745/**
2746 * xmlParseName:
2747 * @ctxt: an XML parser context
2748 *
2749 * parse an XML name.
2750 *
2751 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
2752 * CombiningChar | Extender
2753 *
2754 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
2755 *
2756 * [6] Names ::= Name (#x20 Name)*
2757 *
2758 * Returns the Name parsed or NULL
2759 */
2760
2761const xmlChar *
2762xmlParseName(xmlParserCtxtPtr ctxt) {
2763 const xmlChar *in;
2764 const xmlChar *ret;
2765 int count = 0;
2766
2767 GROW;
2768
2769 /*
2770 * Accelerator for simple ASCII names
2771 */
2772 in = ctxt->input->cur;
2773 if (((*in >= 0x61) && (*in <= 0x7A)) ||
2774 ((*in >= 0x41) && (*in <= 0x5A)) ||
2775 (*in == '_') || (*in == ':')) {
2776 in++;
2777 while (((*in >= 0x61) && (*in <= 0x7A)) ||
2778 ((*in >= 0x41) && (*in <= 0x5A)) ||
2779 ((*in >= 0x30) && (*in <= 0x39)) ||
2780 (*in == '_') || (*in == '-') ||
2781 (*in == ':') || (*in == '.'))
2782 in++;
2783 if ((*in > 0) && (*in < 0x80)) {
2784 count = in - ctxt->input->cur;
2785 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
2786 ctxt->input->cur = in;
2787 ctxt->nbChars += count;
2788 ctxt->input->col += count;
2789 if (ret == NULL)
2790 xmlErrMemory(ctxt, NULL);
2791 return(ret);
2792 }
2793 }
2794 return(xmlParseNameComplex(ctxt));
2795}
2796
2797/**
2798 * xmlParseNameAndCompare:
2799 * @ctxt: an XML parser context
2800 *
2801 * parse an XML name and compares for match
2802 * (specialized for endtag parsing)
2803 *
2804 * Returns NULL for an illegal name, (xmlChar*) 1 for success
2805 * and the name for mismatch
2806 */
2807
2808static const xmlChar *
2809xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
2810 register const xmlChar *cmp = other;
2811 register const xmlChar *in;
2812 const xmlChar *ret;
2813
2814 GROW;
2815
2816 in = ctxt->input->cur;
2817 while (*in != 0 && *in == *cmp) {
2818 ++in;
2819 ++cmp;
2820 ctxt->input->col++;
2821 }
2822 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
2823 /* success */
2824 ctxt->input->cur = in;
2825 return (const xmlChar*) 1;
2826 }
2827 /* failure (or end of input buffer), check with full function */
2828 ret = xmlParseName (ctxt);
2829 /* strings coming from the dictionnary direct compare possible */
2830 if (ret == other) {
2831 return (const xmlChar*) 1;
2832 }
2833 return ret;
2834}
2835
2836static const xmlChar *
2837xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
2838 int len = 0, l;
2839 int c;
2840 int count = 0;
2841
2842 /*
2843 * Handler for more complex cases
2844 */
2845 GROW;
2846 c = CUR_CHAR(l);
2847 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
2848 (!IS_LETTER(c) && (c != '_') &&
2849 (c != ':'))) {
2850 return(NULL);
2851 }
2852
2853 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
2854 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
2855 (c == '.') || (c == '-') ||
2856 (c == '_') || (c == ':') ||
2857 (IS_COMBINING(c)) ||
2858 (IS_EXTENDER(c)))) {
2859 if (count++ > 100) {
2860 count = 0;
2861 GROW;
2862 }
2863 len += l;
2864 NEXTL(l);
2865 c = CUR_CHAR(l);
2866 }
2867 if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
2868 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
2869 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
2870}
2871
2872/**
2873 * xmlParseStringName:
2874 * @ctxt: an XML parser context
2875 * @str: a pointer to the string pointer (IN/OUT)
2876 *
2877 * parse an XML name.
2878 *
2879 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
2880 * CombiningChar | Extender
2881 *
2882 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
2883 *
2884 * [6] Names ::= Name (#x20 Name)*
2885 *
2886 * Returns the Name parsed or NULL. The @str pointer
2887 * is updated to the current location in the string.
2888 */
2889
2890static xmlChar *
2891xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
2892 xmlChar buf[XML_MAX_NAMELEN + 5];
2893 const xmlChar *cur = *str;
2894 int len = 0, l;
2895 int c;
2896
2897 c = CUR_SCHAR(cur, l);
2898 if (!IS_LETTER(c) && (c != '_') &&
2899 (c != ':')) {
2900 return(NULL);
2901 }
2902
2903 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
2904 (c == '.') || (c == '-') ||
2905 (c == '_') || (c == ':') ||
2906 (IS_COMBINING(c)) ||
2907 (IS_EXTENDER(c))) {
2908 COPY_BUF(l,buf,len,c);
2909 cur += l;
2910 c = CUR_SCHAR(cur, l);
2911 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
2912 /*
2913 * Okay someone managed to make a huge name, so he's ready to pay
2914 * for the processing speed.
2915 */
2916 xmlChar *buffer;
2917 int max = len * 2;
2918
2919 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
2920 if (buffer == NULL) {
2921 xmlErrMemory(ctxt, NULL);
2922 return(NULL);
2923 }
2924 memcpy(buffer, buf, len);
2925 while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
2926 /* test bigentname.xml */
2927 (c == '.') || (c == '-') ||
2928 (c == '_') || (c == ':') ||
2929 (IS_COMBINING(c)) ||
2930 (IS_EXTENDER(c))) {
2931 if (len + 10 > max) {
2932 xmlChar *tmp;
2933 max *= 2;
2934 tmp = (xmlChar *) xmlRealloc(buffer,
2935 max * sizeof(xmlChar));
2936 if (tmp == NULL) {
2937 xmlErrMemory(ctxt, NULL);
2938 xmlFree(buffer);
2939 return(NULL);
2940 }
2941 buffer = tmp;
2942 }
2943 COPY_BUF(l,buffer,len,c);
2944 cur += l;
2945 c = CUR_SCHAR(cur, l);
2946 }
2947 buffer[len] = 0;
2948 *str = cur;
2949 return(buffer);
2950 }
2951 }
2952 *str = cur;
2953 return(xmlStrndup(buf, len));
2954}
2955
2956/**
2957 * xmlParseNmtoken:
2958 * @ctxt: an XML parser context
2959 *
2960 * parse an XML Nmtoken.
2961 *
2962 * [7] Nmtoken ::= (NameChar)+
2963 *
2964 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
2965 *
2966 * Returns the Nmtoken parsed or NULL
2967 */
2968
2969xmlChar *
2970xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
2971 xmlChar buf[XML_MAX_NAMELEN + 5];
2972 int len = 0, l;
2973 int c;
2974 int count = 0;
2975
2976 GROW;
2977 c = CUR_CHAR(l);
2978
2979 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
2980 (c == '.') || (c == '-') ||
2981 (c == '_') || (c == ':') ||
2982 (IS_COMBINING(c)) ||
2983 (IS_EXTENDER(c))) {
2984 if (count++ > 100) {
2985 count = 0;
2986 GROW;
2987 }
2988 COPY_BUF(l,buf,len,c);
2989 NEXTL(l);
2990 c = CUR_CHAR(l);
2991 if (len >= XML_MAX_NAMELEN) {
2992 /*
2993 * Okay someone managed to make a huge token, so he's ready to pay
2994 * for the processing speed.
2995 */
2996 xmlChar *buffer;
2997 int max = len * 2;
2998
2999 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3000 if (buffer == NULL) {
3001 xmlErrMemory(ctxt, NULL);
3002 return(NULL);
3003 }
3004 memcpy(buffer, buf, len);
3005 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
3006 (c == '.') || (c == '-') ||
3007 (c == '_') || (c == ':') ||
3008 (IS_COMBINING(c)) ||
3009 (IS_EXTENDER(c))) {
3010 if (count++ > 100) {
3011 count = 0;
3012 GROW;
3013 }
3014 if (len + 10 > max) {
3015 xmlChar *tmp;
3016
3017 max *= 2;
3018 tmp = (xmlChar *) xmlRealloc(buffer,
3019 max * sizeof(xmlChar));
3020 if (tmp == NULL) {
3021 xmlErrMemory(ctxt, NULL);
3022 xmlFree(buffer);
3023 return(NULL);
3024 }
3025 buffer = tmp;
3026 }
3027 COPY_BUF(l,buffer,len,c);
3028 NEXTL(l);
3029 c = CUR_CHAR(l);
3030 }
3031 buffer[len] = 0;
3032 return(buffer);
3033 }
3034 }
3035 if (len == 0)
3036 return(NULL);
3037 return(xmlStrndup(buf, len));
3038}
3039
3040/**
3041 * xmlParseEntityValue:
3042 * @ctxt: an XML parser context
3043 * @orig: if non-NULL store a copy of the original entity value
3044 *
3045 * parse a value for ENTITY declarations
3046 *
3047 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3048 * "'" ([^%&'] | PEReference | Reference)* "'"
3049 *
3050 * Returns the EntityValue parsed with reference substituted or NULL
3051 */
3052
3053xmlChar *
3054xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
3055 xmlChar *buf = NULL;
3056 int len = 0;
3057 int size = XML_PARSER_BUFFER_SIZE;
3058 int c, l;
3059 xmlChar stop;
3060 xmlChar *ret = NULL;
3061 const xmlChar *cur = NULL;
3062 xmlParserInputPtr input;
3063
3064 if (RAW == '"') stop = '"';
3065 else if (RAW == '\'') stop = '\'';
3066 else {
3067 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
3068 return(NULL);
3069 }
3070 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
3071 if (buf == NULL) {
3072 xmlErrMemory(ctxt, NULL);
3073 return(NULL);
3074 }
3075
3076 /*
3077 * The content of the entity definition is copied in a buffer.
3078 */
3079
3080 ctxt->instate = XML_PARSER_ENTITY_VALUE;
3081 input = ctxt->input;
3082 GROW;
3083 NEXT;
3084 c = CUR_CHAR(l);
3085 /*
3086 * NOTE: 4.4.5 Included in Literal
3087 * When a parameter entity reference appears in a literal entity
3088 * value, ... a single or double quote character in the replacement
3089 * text is always treated as a normal data character and will not
3090 * terminate the literal.
3091 * In practice it means we stop the loop only when back at parsing
3092 * the initial entity and the quote is found
3093 */
3094 while ((IS_CHAR(c)) && ((c != stop) || /* checked */
3095 (ctxt->input != input))) {
3096 if (len + 5 >= size) {
3097 xmlChar *tmp;
3098
3099 size *= 2;
3100 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3101 if (tmp == NULL) {
3102 xmlErrMemory(ctxt, NULL);
3103 xmlFree(buf);
3104 return(NULL);
3105 }
3106 buf = tmp;
3107 }
3108 COPY_BUF(l,buf,len,c);
3109 NEXTL(l);
3110 /*
3111 * Pop-up of finished entities.
3112 */
3113 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
3114 xmlPopInput(ctxt);
3115
3116 GROW;
3117 c = CUR_CHAR(l);
3118 if (c == 0) {
3119 GROW;
3120 c = CUR_CHAR(l);
3121 }
3122 }
3123 buf[len] = 0;
3124
3125 /*
3126 * Raise problem w.r.t. '&' and '%' being used in non-entities
3127 * reference constructs. Note Charref will be handled in
3128 * xmlStringDecodeEntities()
3129 */
3130 cur = buf;
3131 while (*cur != 0) { /* non input consuming */
3132 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
3133 xmlChar *name;
3134 xmlChar tmp = *cur;
3135
3136 cur++;
3137 name = xmlParseStringName(ctxt, &cur);
3138 if ((name == NULL) || (*cur != ';')) {
3139 xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
3140 "EntityValue: '%c' forbidden except for entities references\n",
3141 tmp);
3142 }
3143 if ((tmp == '%') && (ctxt->inSubset == 1) &&
3144 (ctxt->inputNr == 1)) {
3145 xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
3146 }
3147 if (name != NULL)
3148 xmlFree(name);
3149 if (*cur == 0)
3150 break;
3151 }
3152 cur++;
3153 }
3154
3155 /*
3156 * Then PEReference entities are substituted.
3157 */
3158 if (c != stop) {
3159 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
3160 xmlFree(buf);
3161 } else {
3162 NEXT;
3163 /*
3164 * NOTE: 4.4.7 Bypassed
3165 * When a general entity reference appears in the EntityValue in
3166 * an entity declaration, it is bypassed and left as is.
3167 * so XML_SUBSTITUTE_REF is not set here.
3168 */
3169 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
3170 0, 0, 0);
3171 if (orig != NULL)
3172 *orig = buf;
3173 else
3174 xmlFree(buf);
3175 }
3176
3177 return(ret);
3178}
3179
3180/**
3181 * xmlParseAttValueComplex:
3182 * @ctxt: an XML parser context
3183 * @len: the resulting attribute len
3184 * @normalize: wether to apply the inner normalization
3185 *
3186 * parse a value for an attribute, this is the fallback function
3187 * of xmlParseAttValue() when the attribute parsing requires handling
3188 * of non-ASCII characters, or normalization compaction.
3189 *
3190 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3191 */
3192static xmlChar *
3193xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
3194 xmlChar limit = 0;
3195 xmlChar *buf = NULL;
3196 size_t len = 0;
3197 size_t buf_size = 0;
3198 int c, l, in_space = 0;
3199 xmlChar *current = NULL;
3200 xmlEntityPtr ent;
3201
3202 if (NXT(0) == '"') {
3203 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3204 limit = '"';
3205 NEXT;
3206 } else if (NXT(0) == '\'') {
3207 limit = '\'';
3208 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3209 NEXT;
3210 } else {
3211 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
3212 return(NULL);
3213 }
3214
3215 /*
3216 * allocate a translation buffer.
3217 */
3218 buf_size = XML_PARSER_BUFFER_SIZE;
3219 buf = (xmlChar *) xmlMallocAtomic(buf_size);
3220 if (buf == NULL) goto mem_error;
3221
3222 /*
3223 * OK loop until we reach one of the ending char or a size limit.
3224 */
3225 c = CUR_CHAR(l);
3226 while ((NXT(0) != limit) && /* checked */
3227 (IS_CHAR(c)) && (c != '<')) {
3228 if (c == 0) break;
3229 if (c == '&') {
3230 in_space = 0;
3231 if (NXT(1) == '#') {
3232 int val = xmlParseCharRef(ctxt);
3233
3234 if (val == '&') {
3235 if (ctxt->replaceEntities) {
3236 if (len + 10 > buf_size) {
3237 growBuffer(buf, 10);
3238 }
3239 buf[len++] = '&';
3240 } else {
3241 /*
3242 * The reparsing will be done in xmlStringGetNodeList()
3243 * called by the attribute() function in SAX.c
3244 */
3245 if (len + 10 > buf_size) {
3246 growBuffer(buf, 10);
3247 }
3248 buf[len++] = '&';
3249 buf[len++] = '#';
3250 buf[len++] = '3';
3251 buf[len++] = '8';
3252 buf[len++] = ';';
3253 }
3254 } else {
3255 if (len + 10 > buf_size) {
3256 growBuffer(buf, 10);
3257 }
3258 len += xmlCopyChar(0, &buf[len], val);
3259 }
3260 } else {
3261 ent = xmlParseEntityRef(ctxt);
3262 ctxt->nbentities++;
3263 if (ent != NULL)
3264 ctxt->nbentities += ent->owner;
3265 if ((ent != NULL) &&
3266 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
3267 if (len + 10 > buf_size) {
3268 growBuffer(buf, 10);
3269 }
3270 if ((ctxt->replaceEntities == 0) &&
3271 (ent->content[0] == '&')) {
3272 buf[len++] = '&';
3273 buf[len++] = '#';
3274 buf[len++] = '3';
3275 buf[len++] = '8';
3276 buf[len++] = ';';
3277 } else {
3278 buf[len++] = ent->content[0];
3279 }
3280 } else if ((ent != NULL) &&
3281 (ctxt->replaceEntities != 0)) {
3282 xmlChar *rep;
3283
3284 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
3285 rep = xmlStringDecodeEntities(ctxt, ent->content,
3286 XML_SUBSTITUTE_REF,
3287 0, 0, 0);
3288 if (rep != NULL) {
3289 current = rep;
3290 while (*current != 0) { /* non input consuming */
3291 buf[len++] = *current++;
3292 if (len + 10 > buf_size) {
3293 growBuffer(buf, 10);
3294 }
3295 }
3296 xmlFree(rep);
3297 }
3298 } else {
3299 if (len + 10 > buf_size) {
3300 growBuffer(buf, 10);
3301 }
3302 if (ent->content != NULL)
3303 buf[len++] = ent->content[0];
3304 }
3305 } else if (ent != NULL) {
3306 int i = xmlStrlen(ent->name);
3307 const xmlChar *cur = ent->name;
3308
3309 /*
3310 * This may look absurd but is needed to detect
3311 * entities problems
3312 */
3313 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
3314 (ent->content != NULL)) {
3315 xmlChar *rep;
3316 rep = xmlStringDecodeEntities(ctxt, ent->content,
3317 XML_SUBSTITUTE_REF, 0, 0, 0);
3318 if (rep != NULL)
3319 xmlFree(rep);
3320 }
3321
3322 /*
3323 * Just output the reference
3324 */
3325 buf[len++] = '&';
3326 while (len + i + 10 > buf_size) {
3327 growBuffer(buf, i + 10);
3328 }
3329 for (;i > 0;i--)
3330 buf[len++] = *cur++;
3331 buf[len++] = ';';
3332 }
3333 }
3334 } else {
3335 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
3336 if ((len != 0) || (!normalize)) {
3337 if ((!normalize) || (!in_space)) {
3338 COPY_BUF(l,buf,len,0x20);
3339 if (len + 10 > buf_size) {
3340 growBuffer(buf, 10);
3341 }
3342 }
3343 in_space = 1;
3344 }
3345 } else {
3346 in_space = 0;
3347 COPY_BUF(l,buf,len,c);
3348 if (len + 10 > buf_size) {
3349 growBuffer(buf, 10);
3350 }
3351 }
3352 NEXTL(l);
3353 }
3354 GROW;
3355 c = CUR_CHAR(l);
3356 }
3357 if ((in_space) && (normalize)) {
3358 while ((len > 0) && (buf[len - 1] == 0x20)) len--;
3359 }
3360 buf[len] = 0;
3361 if (RAW == '<') {
3362 xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
3363 } else if (RAW != limit) {
3364 if ((c != 0) && (!IS_CHAR(c))) {
3365 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
3366 "invalid character in attribute value\n");
3367 } else {
3368 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
3369 "AttValue: ' expected\n");
3370 }
3371 } else
3372 NEXT;
3373
3374 /*
3375 * There we potentially risk an overflow, don't allow attribute value of
3376 * lenght more than INT_MAX it is a very reasonnable assumption !
3377 */
3378 if (len >= INT_MAX) {
3379 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
3380 "AttValue lenght too long\n");
3381 goto mem_error;
3382 }
3383
3384 if (attlen != NULL) *attlen = (int) len;
3385 return(buf);
3386
3387mem_error:
3388 xmlErrMemory(ctxt, NULL);
3389 return(NULL);
3390}
3391
3392/**
3393 * xmlParseAttValue:
3394 * @ctxt: an XML parser context
3395 *
3396 * parse a value for an attribute
3397 * Note: the parser won't do substitution of entities here, this
3398 * will be handled later in xmlStringGetNodeList
3399 *
3400 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
3401 * "'" ([^<&'] | Reference)* "'"
3402 *
3403 * 3.3.3 Attribute-Value Normalization:
3404 * Before the value of an attribute is passed to the application or
3405 * checked for validity, the XML processor must normalize it as follows:
3406 * - a character reference is processed by appending the referenced
3407 * character to the attribute value
3408 * - an entity reference is processed by recursively processing the
3409 * replacement text of the entity
3410 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
3411 * appending #x20 to the normalized value, except that only a single
3412 * #x20 is appended for a "#xD#xA" sequence that is part of an external
3413 * parsed entity or the literal entity value of an internal parsed entity
3414 * - other characters are processed by appending them to the normalized value
3415 * If the declared value is not CDATA, then the XML processor must further
3416 * process the normalized attribute value by discarding any leading and
3417 * trailing space (#x20) characters, and by replacing sequences of space
3418 * (#x20) characters by a single space (#x20) character.
3419 * All attributes for which no declaration has been read should be treated
3420 * by a non-validating parser as if declared CDATA.
3421 *
3422 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3423 */
3424
3425
3426xmlChar *
3427xmlParseAttValue(xmlParserCtxtPtr ctxt) {
3428 if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
3429 return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
3430}
3431
3432/**
3433 * xmlParseSystemLiteral:
3434 * @ctxt: an XML parser context
3435 *
3436 * parse an XML Literal
3437 *
3438 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
3439 *
3440 * Returns the SystemLiteral parsed or NULL
3441 */
3442
3443xmlChar *
3444xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
3445 xmlChar *buf = NULL;
3446 int len = 0;
3447 int size = XML_PARSER_BUFFER_SIZE;
3448 int cur, l;
3449 xmlChar stop;
3450 int state = ctxt->instate;
3451 int count = 0;
3452
3453 SHRINK;
3454 if (RAW == '"') {
3455 NEXT;
3456 stop = '"';
3457 } else if (RAW == '\'') {
3458 NEXT;
3459 stop = '\'';
3460 } else {
3461 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
3462 return(NULL);
3463 }
3464
3465 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
3466 if (buf == NULL) {
3467 xmlErrMemory(ctxt, NULL);
3468 return(NULL);
3469 }
3470 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
3471 cur = CUR_CHAR(l);
3472 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
3473 if (len + 5 >= size) {
3474 xmlChar *tmp;
3475
3476 size *= 2;
3477 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3478 if (tmp == NULL) {
3479 xmlFree(buf);
3480 xmlErrMemory(ctxt, NULL);
3481 ctxt->instate = (xmlParserInputState) state;
3482 return(NULL);
3483 }
3484 buf = tmp;
3485 }
3486 count++;
3487 if (count > 50) {
3488 GROW;
3489 count = 0;
3490 }
3491 COPY_BUF(l,buf,len,cur);
3492 NEXTL(l);
3493 cur = CUR_CHAR(l);
3494 if (cur == 0) {
3495 GROW;
3496 SHRINK;
3497 cur = CUR_CHAR(l);
3498 }
3499 }
3500 buf[len] = 0;
3501 ctxt->instate = (xmlParserInputState) state;
3502 if (!IS_CHAR(cur)) {
3503 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
3504 } else {
3505 NEXT;
3506 }
3507 return(buf);
3508}
3509
3510/**
3511 * xmlParsePubidLiteral:
3512 * @ctxt: an XML parser context
3513 *
3514 * parse an XML public literal
3515 *
3516 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
3517 *
3518 * Returns the PubidLiteral parsed or NULL.
3519 */
3520
3521xmlChar *
3522xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
3523 xmlChar *buf = NULL;
3524 int len = 0;
3525 int size = XML_PARSER_BUFFER_SIZE;
3526 xmlChar cur;
3527 xmlChar stop;
3528 int count = 0;
3529 xmlParserInputState oldstate = ctxt->instate;
3530
3531 SHRINK;
3532 if (RAW == '"') {
3533 NEXT;
3534 stop = '"';
3535 } else if (RAW == '\'') {
3536 NEXT;
3537 stop = '\'';
3538 } else {
3539 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
3540 return(NULL);
3541 }
3542 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
3543 if (buf == NULL) {
3544 xmlErrMemory(ctxt, NULL);
3545 return(NULL);
3546 }
3547 ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
3548 cur = CUR;
3549 while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
3550 if (len + 1 >= size) {
3551 xmlChar *tmp;
3552
3553 size *= 2;
3554 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3555 if (tmp == NULL) {
3556 xmlErrMemory(ctxt, NULL);
3557 xmlFree(buf);
3558 return(NULL);
3559 }
3560 buf = tmp;
3561 }
3562 buf[len++] = cur;
3563 count++;
3564 if (count > 50) {
3565 GROW;
3566 count = 0;
3567 }
3568 NEXT;
3569 cur = CUR;
3570 if (cur == 0) {
3571 GROW;
3572 SHRINK;
3573 cur = CUR;
3574 }
3575 }
3576 buf[len] = 0;
3577 if (cur != stop) {
3578 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
3579 } else {
3580 NEXT;
3581 }
3582 ctxt->instate = oldstate;
3583 return(buf);
3584}
3585
3586void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
3587
3588/*
3589 * used for the test in the inner loop of the char data testing
3590 */
3591static const unsigned char test_char_data[256] = {
3592 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3593 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
3594 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3595 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3596 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
3597 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
3598 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
3599 0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
3600 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
3601 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
3602 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
3603 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
3604 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
3605 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
3606 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
3607 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
3608 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
3609 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3610 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3611 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3612 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3613 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3614 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3615 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3616 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3617 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3618 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3619 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3620 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3621 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3622 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3623 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
3624};
3625
3626/**
3627 * xmlParseCharData:
3628 * @ctxt: an XML parser context
3629 * @cdata: int indicating whether we are within a CDATA section
3630 *
3631 * parse a CharData section.
3632 * if we are within a CDATA section ']]>' marks an end of section.
3633 *
3634 * The right angle bracket (>) may be represented using the string "&gt;",
3635 * and must, for compatibility, be escaped using "&gt;" or a character
3636 * reference when it appears in the string "]]>" in content, when that
3637 * string is not marking the end of a CDATA section.
3638 *
3639 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
3640 */
3641
3642void
3643xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
3644 const xmlChar *in;
3645 int nbchar = 0;
3646 int line = ctxt->input->line;
3647 int col = ctxt->input->col;
3648 int ccol;
3649
3650 SHRINK;
3651 GROW;
3652 /*
3653 * Accelerated common case where input don't need to be
3654 * modified before passing it to the handler.
3655 */
3656 if (!cdata) {
3657 in = ctxt->input->cur;
3658 do {
3659get_more_space:
3660 while (*in == 0x20) { in++; ctxt->input->col++; }
3661 if (*in == 0xA) {
3662 do {
3663 ctxt->input->line++; ctxt->input->col = 1;
3664 in++;
3665 } while (*in == 0xA);
3666 goto get_more_space;
3667 }
3668 if (*in == '<') {
3669 nbchar = in - ctxt->input->cur;
3670 if (nbchar > 0) {
3671 const xmlChar *tmp = ctxt->input->cur;
3672 ctxt->input->cur = in;
3673
3674 if ((ctxt->sax != NULL) &&
3675 (ctxt->sax->ignorableWhitespace !=
3676 ctxt->sax->characters)) {
3677 if (areBlanks(ctxt, tmp, nbchar, 1)) {
3678 if (ctxt->sax->ignorableWhitespace != NULL)
3679 ctxt->sax->ignorableWhitespace(ctxt->userData,
3680 tmp, nbchar);
3681 } else {
3682 if (ctxt->sax->characters != NULL)
3683 ctxt->sax->characters(ctxt->userData,
3684 tmp, nbchar);
3685 if (*ctxt->space == -1)
3686 *ctxt->space = -2;
3687 }
3688 } else if ((ctxt->sax != NULL) &&
3689 (ctxt->sax->characters != NULL)) {
3690 ctxt->sax->characters(ctxt->userData,
3691 tmp, nbchar);
3692 }
3693 }
3694 return;
3695 }
3696
3697get_more:
3698 ccol = ctxt->input->col;
3699 while (test_char_data[*in]) {
3700 in++;
3701 ccol++;
3702 }
3703 ctxt->input->col = ccol;
3704 if (*in == 0xA) {
3705 do {
3706 ctxt->input->line++; ctxt->input->col = 1;
3707 in++;
3708 } while (*in == 0xA);
3709 goto get_more;
3710 }
3711 if (*in == ']') {
3712 if ((in[1] == ']') && (in[2] == '>')) {
3713 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
3714 ctxt->input->cur = in;
3715 return;
3716 }
3717 in++;
3718 ctxt->input->col++;
3719 goto get_more;
3720 }
3721 nbchar = in - ctxt->input->cur;
3722 if (nbchar > 0) {
3723 if ((ctxt->sax != NULL) &&
3724 (ctxt->sax->ignorableWhitespace !=
3725 ctxt->sax->characters) &&
3726 (IS_BLANK_CH(*ctxt->input->cur))) {
3727 const xmlChar *tmp = ctxt->input->cur;
3728 ctxt->input->cur = in;
3729
3730 if (areBlanks(ctxt, tmp, nbchar, 0)) {
3731 if (ctxt->sax->ignorableWhitespace != NULL)
3732 ctxt->sax->ignorableWhitespace(ctxt->userData,
3733 tmp, nbchar);
3734 } else {
3735 if (ctxt->sax->characters != NULL)
3736 ctxt->sax->characters(ctxt->userData,
3737 tmp, nbchar);
3738 if (*ctxt->space == -1)
3739 *ctxt->space = -2;
3740 }
3741 line = ctxt->input->line;
3742 col = ctxt->input->col;
3743 } else if (ctxt->sax != NULL) {
3744 if (ctxt->sax->characters != NULL)
3745 ctxt->sax->characters(ctxt->userData,
3746 ctxt->input->cur, nbchar);
3747 line = ctxt->input->line;
3748 col = ctxt->input->col;
3749 }
3750 }
3751 ctxt->input->cur = in;
3752 if (*in == 0xD) {
3753 in++;
3754 if (*in == 0xA) {
3755 ctxt->input->cur = in;
3756 in++;
3757 ctxt->input->line++; ctxt->input->col = 1;
3758 continue; /* while */
3759 }
3760 in--;
3761 }
3762 if (*in == '<') {
3763 return;
3764 }
3765 if (*in == '&') {
3766 return;
3767 }
3768 SHRINK;
3769 GROW;
3770 in = ctxt->input->cur;
3771 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
3772 nbchar = 0;
3773 }
3774 ctxt->input->line = line;
3775 ctxt->input->col = col;
3776 xmlParseCharDataComplex(ctxt, cdata);
3777}
3778
3779/**
3780 * xmlParseCharDataComplex:
3781 * @ctxt: an XML parser context
3782 * @cdata: int indicating whether we are within a CDATA section
3783 *
3784 * parse a CharData section.this is the fallback function
3785 * of xmlParseCharData() when the parsing requires handling
3786 * of non-ASCII characters.
3787 */
3788void
3789xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
3790 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
3791 int nbchar = 0;
3792 int cur, l;
3793 int count = 0;
3794
3795 SHRINK;
3796 GROW;
3797 cur = CUR_CHAR(l);
3798 while ((cur != '<') && /* checked */
3799 (cur != '&') &&
3800 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
3801 if ((cur == ']') && (NXT(1) == ']') &&
3802 (NXT(2) == '>')) {
3803 if (cdata) break;
3804 else {
3805 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
3806 }
3807 }
3808 COPY_BUF(l,buf,nbchar,cur);
3809 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
3810 buf[nbchar] = 0;
3811
3812 /*
3813 * OK the segment is to be consumed as chars.
3814 */
3815 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
3816 if (areBlanks(ctxt, buf, nbchar, 0)) {
3817 if (ctxt->sax->ignorableWhitespace != NULL)
3818 ctxt->sax->ignorableWhitespace(ctxt->userData,
3819 buf, nbchar);
3820 } else {
3821 if (ctxt->sax->characters != NULL)
3822 ctxt->sax->characters(ctxt->userData, buf, nbchar);
3823 if ((ctxt->sax->characters !=
3824 ctxt->sax->ignorableWhitespace) &&
3825 (*ctxt->space == -1))
3826 *ctxt->space = -2;
3827 }
3828 }
3829 nbchar = 0;
3830 }
3831 count++;
3832 if (count > 50) {
3833 GROW;
3834 count = 0;
3835 }
3836 NEXTL(l);
3837 cur = CUR_CHAR(l);
3838 }
3839 if (nbchar != 0) {
3840 buf[nbchar] = 0;
3841 /*
3842 * OK the segment is to be consumed as chars.
3843 */
3844 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
3845 if (areBlanks(ctxt, buf, nbchar, 0)) {
3846 if (ctxt->sax->ignorableWhitespace != NULL)
3847 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
3848 } else {
3849 if (ctxt->sax->characters != NULL)
3850 ctxt->sax->characters(ctxt->userData, buf, nbchar);
3851 if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
3852 (*ctxt->space == -1))
3853 *ctxt->space = -2;
3854 }
3855 }
3856 }
3857 if ((cur != 0) && (!IS_CHAR(cur))) {
3858 /* Generate the error and skip the offending character */
3859 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
3860 "PCDATA invalid Char value %d\n",
3861 cur);
3862 NEXTL(l);
3863 }
3864}
3865
3866/**
3867 * xmlParseExternalID:
3868 * @ctxt: an XML parser context
3869 * @publicID: a xmlChar** receiving PubidLiteral
3870 * @strict: indicate whether we should restrict parsing to only
3871 * production [75], see NOTE below
3872 *
3873 * Parse an External ID or a Public ID
3874 *
3875 * NOTE: Productions [75] and [83] interact badly since [75] can generate
3876 * 'PUBLIC' S PubidLiteral S SystemLiteral
3877 *
3878 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
3879 * | 'PUBLIC' S PubidLiteral S SystemLiteral
3880 *
3881 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
3882 *
3883 * Returns the function returns SystemLiteral and in the second
3884 * case publicID receives PubidLiteral, is strict is off
3885 * it is possible to return NULL and have publicID set.
3886 */
3887
3888xmlChar *
3889xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
3890 xmlChar *URI = NULL;
3891
3892 SHRINK;
3893
3894 *publicID = NULL;
3895 if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
3896 SKIP(6);
3897 if (!IS_BLANK_CH(CUR)) {
3898 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3899 "Space required after 'SYSTEM'\n");
3900 }
3901 SKIP_BLANKS;
3902 URI = xmlParseSystemLiteral(ctxt);
3903 if (URI == NULL) {
3904 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
3905 }
3906 } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
3907 SKIP(6);
3908 if (!IS_BLANK_CH(CUR)) {
3909 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3910 "Space required after 'PUBLIC'\n");
3911 }
3912 SKIP_BLANKS;
3913 *publicID = xmlParsePubidLiteral(ctxt);
3914 if (*publicID == NULL) {
3915 xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
3916 }
3917 if (strict) {
3918 /*
3919 * We don't handle [83] so "S SystemLiteral" is required.
3920 */
3921 if (!IS_BLANK_CH(CUR)) {
3922 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3923 "Space required after the Public Identifier\n");
3924 }
3925 } else {
3926 /*
3927 * We handle [83] so we return immediately, if
3928 * "S SystemLiteral" is not detected. From a purely parsing
3929 * point of view that's a nice mess.
3930 */
3931 const xmlChar *ptr;
3932 GROW;
3933
3934 ptr = CUR_PTR;
3935 if (!IS_BLANK_CH(*ptr)) return(NULL);
3936
3937 while (IS_BLANK_CH(*ptr)) ptr++; /* TODO: dangerous, fix ! */
3938 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
3939 }
3940 SKIP_BLANKS;
3941 URI = xmlParseSystemLiteral(ctxt);
3942 if (URI == NULL) {
3943 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
3944 }
3945 }
3946 return(URI);
3947}
3948
3949/**
3950 * xmlParseCommentComplex:
3951 * @ctxt: an XML parser context
3952 * @buf: the already parsed part of the buffer
3953 * @len: number of bytes filles in the buffer
3954 * @size: allocated size of the buffer
3955 *
3956 * Skip an XML (SGML) comment <!-- .... -->
3957 * The spec says that "For compatibility, the string "--" (double-hyphen)
3958 * must not occur within comments. "
3959 * This is the slow routine in case the accelerator for ascii didn't work
3960 *
3961 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
3962 */
3963static void
3964xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf, int len, int size) {
3965 int q, ql;
3966 int r, rl;
3967 int cur, l;
3968 xmlParserInputPtr input = ctxt->input;
3969 int count = 0;
3970
3971 if (buf == NULL) {
3972 len = 0;
3973 size = XML_PARSER_BUFFER_SIZE;
3974 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
3975 if (buf == NULL) {
3976 xmlErrMemory(ctxt, NULL);
3977 return;
3978 }
3979 }
3980 GROW; /* Assure there's enough input data */
3981 q = CUR_CHAR(ql);
3982 if (q == 0)
3983 goto not_terminated;
3984 if (!IS_CHAR(q)) {
3985 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
3986 "xmlParseComment: invalid xmlChar value %d\n",
3987 q);
3988 xmlFree (buf);
3989 return;
3990 }
3991 NEXTL(ql);
3992 r = CUR_CHAR(rl);
3993 if (r == 0)
3994 goto not_terminated;
3995 if (!IS_CHAR(r)) {
3996 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
3997 "xmlParseComment: invalid xmlChar value %d\n",
3998 q);
3999 xmlFree (buf);
4000 return;
4001 }
4002 NEXTL(rl);
4003 cur = CUR_CHAR(l);
4004 if (cur == 0)
4005 goto not_terminated;
4006 while (IS_CHAR(cur) && /* checked */
4007 ((cur != '>') ||
4008 (r != '-') || (q != '-'))) {
4009 if ((r == '-') && (q == '-')) {
4010 xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
4011 }
4012 if (len + 5 >= size) {
4013 xmlChar *new_buf;
4014 size *= 2;
4015 new_buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4016 if (new_buf == NULL) {
4017 xmlFree (buf);
4018 xmlErrMemory(ctxt, NULL);
4019 return;
4020 }
4021 buf = new_buf;
4022 }
4023 COPY_BUF(ql,buf,len,q);
4024 q = r;
4025 ql = rl;
4026 r = cur;
4027 rl = l;
4028
4029 count++;
4030 if (count > 50) {
4031 GROW;
4032 count = 0;
4033 }
4034 NEXTL(l);
4035 cur = CUR_CHAR(l);
4036 if (cur == 0) {
4037 SHRINK;
4038 GROW;
4039 cur = CUR_CHAR(l);
4040 }
4041 }
4042 buf[len] = 0;
4043 if (cur == 0) {
4044 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4045 "Comment not terminated \n<!--%.50s\n", buf);
4046 } else if (!IS_CHAR(cur)) {
4047 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4048 "xmlParseComment: invalid xmlChar value %d\n",
4049 cur);
4050 } else {
4051 if (input != ctxt->input) {
4052 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4053 "Comment doesn't start and stop in the same entity\n");
4054 }
4055 NEXT;
4056 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4057 (!ctxt->disableSAX))
4058 ctxt->sax->comment(ctxt->userData, buf);
4059 }
4060 xmlFree(buf);
4061 return;
4062not_terminated:
4063 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4064 "Comment not terminated\n", NULL);
4065 xmlFree(buf);
4066 return;
4067}
4068
4069/**
4070 * xmlParseComment:
4071 * @ctxt: an XML parser context
4072 *
4073 * Skip an XML (SGML) comment <!-- .... -->
4074 * The spec says that "For compatibility, the string "--" (double-hyphen)
4075 * must not occur within comments. "
4076 *
4077 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4078 */
4079void
4080xmlParseComment(xmlParserCtxtPtr ctxt) {
4081 xmlChar *buf = NULL;
4082 int size = XML_PARSER_BUFFER_SIZE;
4083 int len = 0;
4084 xmlParserInputState state;
4085 const xmlChar *in;
4086 int nbchar = 0, ccol;
4087
4088 /*
4089 * Check that there is a comment right here.
4090 */
4091 if ((RAW != '<') || (NXT(1) != '!') ||
4092 (NXT(2) != '-') || (NXT(3) != '-')) return;
4093
4094 state = ctxt->instate;
4095 ctxt->instate = XML_PARSER_COMMENT;
4096 SKIP(4);
4097 SHRINK;
4098 GROW;
4099
4100 /*
4101 * Accelerated common case where input don't need to be
4102 * modified before passing it to the handler.
4103 */
4104 in = ctxt->input->cur;
4105 do {
4106 if (*in == 0xA) {
4107 do {
4108 ctxt->input->line++; ctxt->input->col = 1;
4109 in++;
4110 } while (*in == 0xA);
4111 }
4112get_more:
4113 ccol = ctxt->input->col;
4114 while (((*in > '-') && (*in <= 0x7F)) ||
4115 ((*in >= 0x20) && (*in < '-')) ||
4116 (*in == 0x09)) {
4117 in++;
4118 ccol++;
4119 }
4120 ctxt->input->col = ccol;
4121 if (*in == 0xA) {
4122 do {
4123 ctxt->input->line++; ctxt->input->col = 1;
4124 in++;
4125 } while (*in == 0xA);
4126 goto get_more;
4127 }
4128 nbchar = in - ctxt->input->cur;
4129 /*
4130 * save current set of data
4131 */
4132 if (nbchar > 0) {
4133 if ((ctxt->sax != NULL) &&
4134 (ctxt->sax->comment != NULL)) {
4135 if (buf == NULL) {
4136 if ((*in == '-') && (in[1] == '-'))
4137 size = nbchar + 1;
4138 else
4139 size = XML_PARSER_BUFFER_SIZE + nbchar;
4140 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4141 if (buf == NULL) {
4142 xmlErrMemory(ctxt, NULL);
4143 ctxt->instate = state;
4144 return;
4145 }
4146 len = 0;
4147 } else if (len + nbchar + 1 >= size) {
4148 xmlChar *new_buf;
4149 size += len + nbchar + XML_PARSER_BUFFER_SIZE;
4150 new_buf = (xmlChar *) xmlRealloc(buf,
4151 size * sizeof(xmlChar));
4152 if (new_buf == NULL) {
4153 xmlFree (buf);
4154 xmlErrMemory(ctxt, NULL);
4155 ctxt->instate = state;
4156 return;
4157 }
4158 buf = new_buf;
4159 }
4160 memcpy(&buf[len], ctxt->input->cur, nbchar);
4161 len += nbchar;
4162 buf[len] = 0;
4163 }
4164 }
4165 ctxt->input->cur = in;
4166 if (*in == 0xA) {
4167 in++;
4168 ctxt->input->line++; ctxt->input->col = 1;
4169 }
4170 if (*in == 0xD) {
4171 in++;
4172 if (*in == 0xA) {
4173 ctxt->input->cur = in;
4174 in++;
4175 ctxt->input->line++; ctxt->input->col = 1;
4176 continue; /* while */
4177 }
4178 in--;
4179 }
4180 SHRINK;
4181 GROW;
4182 in = ctxt->input->cur;
4183 if (*in == '-') {
4184 if (in[1] == '-') {
4185 if (in[2] == '>') {
4186 SKIP(3);
4187 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4188 (!ctxt->disableSAX)) {
4189 if (buf != NULL)
4190 ctxt->sax->comment(ctxt->userData, buf);
4191 else
4192 ctxt->sax->comment(ctxt->userData, BAD_CAST "");
4193 }
4194 if (buf != NULL)
4195 xmlFree(buf);
4196 ctxt->instate = state;
4197 return;
4198 }
4199 if (buf != NULL)
4200 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4201 "Comment not terminated \n<!--%.50s\n",
4202 buf);
4203 else
4204 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4205 "Comment not terminated \n", NULL);
4206 in++;
4207 ctxt->input->col++;
4208 }
4209 in++;
4210 ctxt->input->col++;
4211 goto get_more;
4212 }
4213 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
4214 xmlParseCommentComplex(ctxt, buf, len, size);
4215 ctxt->instate = state;
4216 return;
4217}
4218
4219
4220/**
4221 * xmlParsePITarget:
4222 * @ctxt: an XML parser context
4223 *
4224 * parse the name of a PI
4225 *
4226 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
4227 *
4228 * Returns the PITarget name or NULL
4229 */
4230
4231const xmlChar *
4232xmlParsePITarget(xmlParserCtxtPtr ctxt) {
4233 const xmlChar *name;
4234
4235 name = xmlParseName(ctxt);
4236 if ((name != NULL) &&
4237 ((name[0] == 'x') || (name[0] == 'X')) &&
4238 ((name[1] == 'm') || (name[1] == 'M')) &&
4239 ((name[2] == 'l') || (name[2] == 'L'))) {
4240 int i;
4241 if ((name[0] == 'x') && (name[1] == 'm') &&
4242 (name[2] == 'l') && (name[3] == 0)) {
4243 xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
4244 "XML declaration allowed only at the start of the document\n");
4245 return(name);
4246 } else if (name[3] == 0) {
4247 xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
4248 return(name);
4249 }
4250 for (i = 0;;i++) {
4251 if (xmlW3CPIs[i] == NULL) break;
4252 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
4253 return(name);
4254 }
4255 xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
4256 "xmlParsePITarget: invalid name prefix 'xml'\n",
4257 NULL, NULL);
4258 }
4259 return(name);
4260}
4261
4262#ifdef LIBXML_CATALOG_ENABLED
4263/**
4264 * xmlParseCatalogPI:
4265 * @ctxt: an XML parser context
4266 * @catalog: the PI value string
4267 *
4268 * parse an XML Catalog Processing Instruction.
4269 *
4270 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
4271 *
4272 * Occurs only if allowed by the user and if happening in the Misc
4273 * part of the document before any doctype informations
4274 * This will add the given catalog to the parsing context in order
4275 * to be used if there is a resolution need further down in the document
4276 */
4277
4278static void
4279xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
4280 xmlChar *URL = NULL;
4281 const xmlChar *tmp, *base;
4282 xmlChar marker;
4283
4284 tmp = catalog;
4285 while (IS_BLANK_CH(*tmp)) tmp++;
4286 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
4287 goto error;
4288 tmp += 7;
4289 while (IS_BLANK_CH(*tmp)) tmp++;
4290 if (*tmp != '=') {
4291 return;
4292 }
4293 tmp++;
4294 while (IS_BLANK_CH(*tmp)) tmp++;
4295 marker = *tmp;
4296 if ((marker != '\'') && (marker != '"'))
4297 goto error;
4298 tmp++;
4299 base = tmp;
4300 while ((*tmp != 0) && (*tmp != marker)) tmp++;
4301 if (*tmp == 0)
4302 goto error;
4303 URL = xmlStrndup(base, tmp - base);
4304 tmp++;
4305 while (IS_BLANK_CH(*tmp)) tmp++;
4306 if (*tmp != 0)
4307 goto error;
4308
4309 if (URL != NULL) {
4310 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
4311 xmlFree(URL);
4312 }
4313 return;
4314
4315error:
4316 xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
4317 "Catalog PI syntax error: %s\n",
4318 catalog, NULL);
4319 if (URL != NULL)
4320 xmlFree(URL);
4321}
4322#endif
4323
4324/**
4325 * xmlParsePI:
4326 * @ctxt: an XML parser context
4327 *
4328 * parse an XML Processing Instruction.
4329 *
4330 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
4331 *
4332 * The processing is transfered to SAX once parsed.
4333 */
4334
4335void
4336xmlParsePI(xmlParserCtxtPtr ctxt) {
4337 xmlChar *buf = NULL;
4338 int len = 0;
4339 int size = XML_PARSER_BUFFER_SIZE;
4340 int cur, l;
4341 const xmlChar *target;
4342 xmlParserInputState state;
4343 int count = 0;
4344
4345 if ((RAW == '<') && (NXT(1) == '?')) {
4346 xmlParserInputPtr input = ctxt->input;
4347 state = ctxt->instate;
4348 ctxt->instate = XML_PARSER_PI;
4349 /*
4350 * this is a Processing Instruction.
4351 */
4352 SKIP(2);
4353 SHRINK;
4354
4355 /*
4356 * Parse the target name and check for special support like
4357 * namespace.
4358 */
4359 target = xmlParsePITarget(ctxt);
4360 if (target != NULL) {
4361 if ((RAW == '?') && (NXT(1) == '>')) {
4362 if (input != ctxt->input) {
4363 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4364 "PI declaration doesn't start and stop in the same entity\n");
4365 }
4366 SKIP(2);
4367
4368 /*
4369 * SAX: PI detected.
4370 */
4371 if ((ctxt->sax) && (!ctxt->disableSAX) &&
4372 (ctxt->sax->processingInstruction != NULL))
4373 ctxt->sax->processingInstruction(ctxt->userData,
4374 target, NULL);
4375 if (ctxt->instate != XML_PARSER_EOF)
4376 ctxt->instate = state;
4377 return;
4378 }
4379 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4380 if (buf == NULL) {
4381 xmlErrMemory(ctxt, NULL);
4382 ctxt->instate = state;
4383 return;
4384 }
4385 cur = CUR;
4386 if (!IS_BLANK(cur)) {
4387 xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
4388 "ParsePI: PI %s space expected\n", target);
4389 }
4390 SKIP_BLANKS;
4391 cur = CUR_CHAR(l);
4392 while (IS_CHAR(cur) && /* checked */
4393 ((cur != '?') || (NXT(1) != '>'))) {
4394 if (len + 5 >= size) {
4395 xmlChar *tmp;
4396
4397 size *= 2;
4398 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4399 if (tmp == NULL) {
4400 xmlErrMemory(ctxt, NULL);
4401 xmlFree(buf);
4402 ctxt->instate = state;
4403 return;
4404 }
4405 buf = tmp;
4406 }
4407 count++;
4408 if (count > 50) {
4409 GROW;
4410 count = 0;
4411 }
4412 COPY_BUF(l,buf,len,cur);
4413 NEXTL(l);
4414 cur = CUR_CHAR(l);
4415 if (cur == 0) {
4416 SHRINK;
4417 GROW;
4418 cur = CUR_CHAR(l);
4419 }
4420 }
4421 buf[len] = 0;
4422 if (cur != '?') {
4423 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
4424 "ParsePI: PI %s never end ...\n", target);
4425 } else {
4426 if (input != ctxt->input) {
4427 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4428 "PI declaration doesn't start and stop in the same entity\n");
4429 }
4430 SKIP(2);
4431
4432#ifdef LIBXML_CATALOG_ENABLED
4433 if (((state == XML_PARSER_MISC) ||
4434 (state == XML_PARSER_START)) &&
4435 (xmlStrEqual(target, XML_CATALOG_PI))) {
4436 xmlCatalogAllow allow = xmlCatalogGetDefaults();
4437 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
4438 (allow == XML_CATA_ALLOW_ALL))
4439 xmlParseCatalogPI(ctxt, buf);
4440 }
4441#endif
4442
4443
4444 /*
4445 * SAX: PI detected.
4446 */
4447 if ((ctxt->sax) && (!ctxt->disableSAX) &&
4448 (ctxt->sax->processingInstruction != NULL))
4449 ctxt->sax->processingInstruction(ctxt->userData,
4450 target, buf);
4451 }
4452 xmlFree(buf);
4453 } else {
4454 xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
4455 }
4456 if (ctxt->instate != XML_PARSER_EOF)
4457 ctxt->instate = state;
4458 }
4459}
4460
4461/**
4462 * xmlParseNotationDecl:
4463 * @ctxt: an XML parser context
4464 *
4465 * parse a notation declaration
4466 *
4467 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
4468 *
4469 * Hence there is actually 3 choices:
4470 * 'PUBLIC' S PubidLiteral
4471 * 'PUBLIC' S PubidLiteral S SystemLiteral
4472 * and 'SYSTEM' S SystemLiteral
4473 *
4474 * See the NOTE on xmlParseExternalID().
4475 */
4476
4477void
4478xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
4479 const xmlChar *name;
4480 xmlChar *Pubid;
4481 xmlChar *Systemid;
4482
4483 if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
4484 xmlParserInputPtr input = ctxt->input;
4485 SHRINK;
4486 SKIP(10);
4487 if (!IS_BLANK_CH(CUR)) {
4488 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4489 "Space required after '<!NOTATION'\n");
4490 return;
4491 }
4492 SKIP_BLANKS;
4493
4494 name = xmlParseName(ctxt);
4495 if (name == NULL) {
4496 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
4497 return;
4498 }
4499 if (!IS_BLANK_CH(CUR)) {
4500 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4501 "Space required after the NOTATION name'\n");
4502 return;
4503 }
4504 SKIP_BLANKS;
4505
4506 /*
4507 * Parse the IDs.
4508 */
4509 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
4510 SKIP_BLANKS;
4511
4512 if (RAW == '>') {
4513 if (input != ctxt->input) {
4514 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4515 "Notation declaration doesn't start and stop in the same entity\n");
4516 }
4517 NEXT;
4518 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4519 (ctxt->sax->notationDecl != NULL))
4520 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
4521 } else {
4522 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
4523 }
4524 if (Systemid != NULL) xmlFree(Systemid);
4525 if (Pubid != NULL) xmlFree(Pubid);
4526 }
4527}
4528
4529/**
4530 * xmlParseEntityDecl:
4531 * @ctxt: an XML parser context
4532 *
4533 * parse <!ENTITY declarations
4534 *
4535 * [70] EntityDecl ::= GEDecl | PEDecl
4536 *
4537 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
4538 *
4539 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
4540 *
4541 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
4542 *
4543 * [74] PEDef ::= EntityValue | ExternalID
4544 *
4545 * [76] NDataDecl ::= S 'NDATA' S Name
4546 *
4547 * [ VC: Notation Declared ]
4548 * The Name must match the declared name of a notation.
4549 */
4550
4551void
4552xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
4553 const xmlChar *name = NULL;
4554 xmlChar *value = NULL;
4555 xmlChar *URI = NULL, *literal = NULL;
4556 const xmlChar *ndata = NULL;
4557 int isParameter = 0;
4558 xmlChar *orig = NULL;
4559 int skipped;
4560 unsigned long oldnbent = ctxt->nbentities;
4561
4562 /* GROW; done in the caller */
4563 if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) {
4564 xmlParserInputPtr input = ctxt->input;
4565 SHRINK;
4566 SKIP(8);
4567 skipped = SKIP_BLANKS;
4568 if (skipped == 0) {
4569 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4570 "Space required after '<!ENTITY'\n");
4571 }
4572
4573 if (RAW == '%') {
4574 NEXT;
4575 skipped = SKIP_BLANKS;
4576 if (skipped == 0) {
4577 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4578 "Space required after '%'\n");
4579 }
4580 isParameter = 1;
4581 }
4582
4583 name = xmlParseName(ctxt);
4584 if (name == NULL) {
4585 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4586 "xmlParseEntityDecl: no name\n");
4587 return;
4588 }
4589 skipped = SKIP_BLANKS;
4590 if (skipped == 0) {
4591 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4592 "Space required after the entity name\n");
4593 }
4594
4595 ctxt->instate = XML_PARSER_ENTITY_DECL;
4596 /*
4597 * handle the various case of definitions...
4598 */
4599 if (isParameter) {
4600 if ((RAW == '"') || (RAW == '\'')) {
4601 value = xmlParseEntityValue(ctxt, &orig);
4602 if (value) {
4603 if ((ctxt->sax != NULL) &&
4604 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
4605 ctxt->sax->entityDecl(ctxt->userData, name,
4606 XML_INTERNAL_PARAMETER_ENTITY,
4607 NULL, NULL, value);
4608 }
4609 } else {
4610 URI = xmlParseExternalID(ctxt, &literal, 1);
4611 if ((URI == NULL) && (literal == NULL)) {
4612 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
4613 }
4614 if (URI) {
4615 xmlURIPtr uri;
4616
4617 uri = xmlParseURI((const char *) URI);
4618 if (uri == NULL) {
4619 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
4620 "Invalid URI: %s\n", URI);
4621 /*
4622 * This really ought to be a well formedness error
4623 * but the XML Core WG decided otherwise c.f. issue
4624 * E26 of the XML erratas.
4625 */
4626 } else {
4627 if (uri->fragment != NULL) {
4628 /*
4629 * Okay this is foolish to block those but not
4630 * invalid URIs.
4631 */
4632 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
4633 } else {
4634 if ((ctxt->sax != NULL) &&
4635 (!ctxt->disableSAX) &&
4636 (ctxt->sax->entityDecl != NULL))
4637 ctxt->sax->entityDecl(ctxt->userData, name,
4638 XML_EXTERNAL_PARAMETER_ENTITY,
4639 literal, URI, NULL);
4640 }
4641 xmlFreeURI(uri);
4642 }
4643 }
4644 }
4645 } else {
4646 if ((RAW == '"') || (RAW == '\'')) {
4647 value = xmlParseEntityValue(ctxt, &orig);
4648 if ((ctxt->sax != NULL) &&
4649 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
4650 ctxt->sax->entityDecl(ctxt->userData, name,
4651 XML_INTERNAL_GENERAL_ENTITY,
4652 NULL, NULL, value);
4653 /*
4654 * For expat compatibility in SAX mode.
4655 */
4656 if ((ctxt->myDoc == NULL) ||
4657 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
4658 if (ctxt->myDoc == NULL) {
4659 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
4660 }
4661 if (ctxt->myDoc->intSubset == NULL)
4662 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
4663 BAD_CAST "fake", NULL, NULL);
4664
4665 xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
4666 NULL, NULL, value);
4667 }
4668 } else {
4669 URI = xmlParseExternalID(ctxt, &literal, 1);
4670 if ((URI == NULL) && (literal == NULL)) {
4671 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
4672 }
4673 if (URI) {
4674 xmlURIPtr uri;
4675
4676 uri = xmlParseURI((const char *)URI);
4677 if (uri == NULL) {
4678 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
4679 "Invalid URI: %s\n", URI);
4680 /*
4681 * This really ought to be a well formedness error
4682 * but the XML Core WG decided otherwise c.f. issue
4683 * E26 of the XML erratas.
4684 */
4685 } else {
4686 if (uri->fragment != NULL) {
4687 /*
4688 * Okay this is foolish to block those but not
4689 * invalid URIs.
4690 */
4691 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
4692 }
4693 xmlFreeURI(uri);
4694 }
4695 }
4696 if ((RAW != '>') && (!IS_BLANK_CH(CUR))) {
4697 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4698 "Space required before 'NDATA'\n");
4699 }
4700 SKIP_BLANKS;
4701 if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
4702 SKIP(5);
4703 if (!IS_BLANK_CH(CUR)) {
4704 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4705 "Space required after 'NDATA'\n");
4706 }
4707 SKIP_BLANKS;
4708 ndata = xmlParseName(ctxt);
4709 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4710 (ctxt->sax->unparsedEntityDecl != NULL))
4711 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
4712 literal, URI, ndata);
4713 } else {
4714 if ((ctxt->sax != NULL) &&
4715 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
4716 ctxt->sax->entityDecl(ctxt->userData, name,
4717 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
4718 literal, URI, NULL);
4719 /*
4720 * For expat compatibility in SAX mode.
4721 * assuming the entity repalcement was asked for
4722 */
4723 if ((ctxt->replaceEntities != 0) &&
4724 ((ctxt->myDoc == NULL) ||
4725 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
4726 if (ctxt->myDoc == NULL) {
4727 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
4728 }
4729
4730 if (ctxt->myDoc->intSubset == NULL)
4731 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
4732 BAD_CAST "fake", NULL, NULL);
4733 xmlSAX2EntityDecl(ctxt, name,
4734 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
4735 literal, URI, NULL);
4736 }
4737 }
4738 }
4739 }
4740 SKIP_BLANKS;
4741 if (RAW != '>') {
4742 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
4743 "xmlParseEntityDecl: entity %s not terminated\n", name);
4744 } else {
4745 if (input != ctxt->input) {
4746 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4747 "Entity declaration doesn't start and stop in the same entity\n");
4748 }
4749 NEXT;
4750 }
4751 if (orig != NULL) {
4752 /*
4753 * Ugly mechanism to save the raw entity value.
4754 */
4755 xmlEntityPtr cur = NULL;
4756
4757 if (isParameter) {
4758 if ((ctxt->sax != NULL) &&
4759 (ctxt->sax->getParameterEntity != NULL))
4760 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
4761 } else {
4762 if ((ctxt->sax != NULL) &&
4763 (ctxt->sax->getEntity != NULL))
4764 cur = ctxt->sax->getEntity(ctxt->userData, name);
4765 if ((cur == NULL) && (ctxt->userData==ctxt)) {
4766 cur = xmlSAX2GetEntity(ctxt, name);
4767 }
4768 }
4769 if (cur != NULL) {
4770 if ((cur->owner != 0) || (cur->children == NULL)) {
4771 cur->owner = ctxt->nbentities - oldnbent;
4772 if (cur->owner == 0)
4773 cur->owner = 1;
4774 }
4775 if (cur->orig != NULL)
4776 xmlFree(orig);
4777 else
4778 cur->orig = orig;
4779 } else
4780 xmlFree(orig);
4781 }
4782 if (value != NULL) xmlFree(value);
4783 if (URI != NULL) xmlFree(URI);
4784 if (literal != NULL) xmlFree(literal);
4785 }
4786}
4787
4788/**
4789 * xmlParseDefaultDecl:
4790 * @ctxt: an XML parser context
4791 * @value: Receive a possible fixed default value for the attribute
4792 *
4793 * Parse an attribute default declaration
4794 *
4795 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
4796 *
4797 * [ VC: Required Attribute ]
4798 * if the default declaration is the keyword #REQUIRED, then the
4799 * attribute must be specified for all elements of the type in the
4800 * attribute-list declaration.
4801 *
4802 * [ VC: Attribute Default Legal ]
4803 * The declared default value must meet the lexical constraints of
4804 * the declared attribute type c.f. xmlValidateAttributeDecl()
4805 *
4806 * [ VC: Fixed Attribute Default ]
4807 * if an attribute has a default value declared with the #FIXED
4808 * keyword, instances of that attribute must match the default value.
4809 *
4810 * [ WFC: No < in Attribute Values ]
4811 * handled in xmlParseAttValue()
4812 *
4813 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
4814 * or XML_ATTRIBUTE_FIXED.
4815 */
4816
4817int
4818xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
4819 int val;
4820 xmlChar *ret;
4821
4822 *value = NULL;
4823 if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
4824 SKIP(9);
4825 return(XML_ATTRIBUTE_REQUIRED);
4826 }
4827 if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
4828 SKIP(8);
4829 return(XML_ATTRIBUTE_IMPLIED);
4830 }
4831 val = XML_ATTRIBUTE_NONE;
4832 if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
4833 SKIP(6);
4834 val = XML_ATTRIBUTE_FIXED;
4835 if (!IS_BLANK_CH(CUR)) {
4836 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4837 "Space required after '#FIXED'\n");
4838 }
4839 SKIP_BLANKS;
4840 }
4841 ret = xmlParseAttValue(ctxt);
4842 ctxt->instate = XML_PARSER_DTD;
4843 if (ret == NULL) {
4844 xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
4845 "Attribute default value declaration error\n");
4846 } else
4847 *value = ret;
4848 return(val);
4849}
4850
4851/**
4852 * xmlParseNotationType:
4853 * @ctxt: an XML parser context
4854 *
4855 * parse an Notation attribute type.
4856 *
4857 * Note: the leading 'NOTATION' S part has already being parsed...
4858 *
4859 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
4860 *
4861 * [ VC: Notation Attributes ]
4862 * Values of this type must match one of the notation names included
4863 * in the declaration; all notation names in the declaration must be declared.
4864 *
4865 * Returns: the notation attribute tree built while parsing
4866 */
4867
4868xmlEnumerationPtr
4869xmlParseNotationType(xmlParserCtxtPtr ctxt) {
4870 const xmlChar *name;
4871 xmlEnumerationPtr ret = NULL, last = NULL, cur;
4872
4873 if (RAW != '(') {
4874 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
4875 return(NULL);
4876 }
4877 SHRINK;
4878 do {
4879 NEXT;
4880 SKIP_BLANKS;
4881 name = xmlParseName(ctxt);
4882 if (name == NULL) {
4883 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4884 "Name expected in NOTATION declaration\n");
4885 xmlFreeEnumeration(ret);
4886 return(NULL);
4887 }
4888 cur = xmlCreateEnumeration(name);
4889 if (cur == NULL) {
4890 xmlFreeEnumeration(ret);
4891 return(NULL);
4892 }
4893 if (last == NULL) ret = last = cur;
4894 else {
4895 last->next = cur;
4896 last = cur;
4897 }
4898 SKIP_BLANKS;
4899 } while (RAW == '|');
4900 if (RAW != ')') {
4901 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
4902 xmlFreeEnumeration(ret);
4903 return(NULL);
4904 }
4905 NEXT;
4906 return(ret);
4907}
4908
4909/**
4910 * xmlParseEnumerationType:
4911 * @ctxt: an XML parser context
4912 *
4913 * parse an Enumeration attribute type.
4914 *
4915 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
4916 *
4917 * [ VC: Enumeration ]
4918 * Values of this type must match one of the Nmtoken tokens in
4919 * the declaration
4920 *
4921 * Returns: the enumeration attribute tree built while parsing
4922 */
4923
4924xmlEnumerationPtr
4925xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
4926 xmlChar *name;
4927 xmlEnumerationPtr ret = NULL, last = NULL, cur;
4928
4929 if (RAW != '(') {
4930 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
4931 return(NULL);
4932 }
4933 SHRINK;
4934 do {
4935 NEXT;
4936 SKIP_BLANKS;
4937 name = xmlParseNmtoken(ctxt);
4938 if (name == NULL) {
4939 xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
4940 return(ret);
4941 }
4942 cur = xmlCreateEnumeration(name);
4943 xmlFree(name);
4944 if (cur == NULL) {
4945 xmlFreeEnumeration(ret);
4946 return(NULL);
4947 }
4948 if (last == NULL) ret = last = cur;
4949 else {
4950 last->next = cur;
4951 last = cur;
4952 }
4953 SKIP_BLANKS;
4954 } while (RAW == '|');
4955 if (RAW != ')') {
4956 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
4957 return(ret);
4958 }
4959 NEXT;
4960 return(ret);
4961}
4962
4963/**
4964 * xmlParseEnumeratedType:
4965 * @ctxt: an XML parser context
4966 * @tree: the enumeration tree built while parsing
4967 *
4968 * parse an Enumerated attribute type.
4969 *
4970 * [57] EnumeratedType ::= NotationType | Enumeration
4971 *
4972 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
4973 *
4974 *
4975 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
4976 */
4977
4978int
4979xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
4980 if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
4981 SKIP(8);
4982 if (!IS_BLANK_CH(CUR)) {
4983 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4984 "Space required after 'NOTATION'\n");
4985 return(0);
4986 }
4987 SKIP_BLANKS;
4988 *tree = xmlParseNotationType(ctxt);
4989 if (*tree == NULL) return(0);
4990 return(XML_ATTRIBUTE_NOTATION);
4991 }
4992 *tree = xmlParseEnumerationType(ctxt);
4993 if (*tree == NULL) return(0);
4994 return(XML_ATTRIBUTE_ENUMERATION);
4995}
4996
4997/**
4998 * xmlParseAttributeType:
4999 * @ctxt: an XML parser context
5000 * @tree: the enumeration tree built while parsing
5001 *
5002 * parse the Attribute list def for an element
5003 *
5004 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
5005 *
5006 * [55] StringType ::= 'CDATA'
5007 *
5008 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
5009 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
5010 *
5011 * Validity constraints for attribute values syntax are checked in
5012 * xmlValidateAttributeValue()
5013 *
5014 * [ VC: ID ]
5015 * Values of type ID must match the Name production. A name must not
5016 * appear more than once in an XML document as a value of this type;
5017 * i.e., ID values must uniquely identify the elements which bear them.
5018 *
5019 * [ VC: One ID per Element Type ]
5020 * No element type may have more than one ID attribute specified.
5021 *
5022 * [ VC: ID Attribute Default ]
5023 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
5024 *
5025 * [ VC: IDREF ]
5026 * Values of type IDREF must match the Name production, and values
5027 * of type IDREFS must match Names; each IDREF Name must match the value
5028 * of an ID attribute on some element in the XML document; i.e. IDREF
5029 * values must match the value of some ID attribute.
5030 *
5031 * [ VC: Entity Name ]
5032 * Values of type ENTITY must match the Name production, values
5033 * of type ENTITIES must match Names; each Entity Name must match the
5034 * name of an unparsed entity declared in the DTD.
5035 *
5036 * [ VC: Name Token ]
5037 * Values of type NMTOKEN must match the Nmtoken production; values
5038 * of type NMTOKENS must match Nmtokens.
5039 *
5040 * Returns the attribute type
5041 */
5042int
5043xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5044 SHRINK;
5045 if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
5046 SKIP(5);
5047 return(XML_ATTRIBUTE_CDATA);
5048 } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
5049 SKIP(6);
5050 return(XML_ATTRIBUTE_IDREFS);
5051 } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
5052 SKIP(5);
5053 return(XML_ATTRIBUTE_IDREF);
5054 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
5055 SKIP(2);
5056 return(XML_ATTRIBUTE_ID);
5057 } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
5058 SKIP(6);
5059 return(XML_ATTRIBUTE_ENTITY);
5060 } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
5061 SKIP(8);
5062 return(XML_ATTRIBUTE_ENTITIES);
5063 } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
5064 SKIP(8);
5065 return(XML_ATTRIBUTE_NMTOKENS);
5066 } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
5067 SKIP(7);
5068 return(XML_ATTRIBUTE_NMTOKEN);
5069 }
5070 return(xmlParseEnumeratedType(ctxt, tree));
5071}
5072
5073/**
5074 * xmlParseAttributeListDecl:
5075 * @ctxt: an XML parser context
5076 *
5077 * : parse the Attribute list def for an element
5078 *
5079 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
5080 *
5081 * [53] AttDef ::= S Name S AttType S DefaultDecl
5082 *
5083 */
5084void
5085xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
5086 const xmlChar *elemName;
5087 const xmlChar *attrName;
5088 xmlEnumerationPtr tree;
5089
5090 if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
5091 xmlParserInputPtr input = ctxt->input;
5092
5093 SKIP(9);
5094 if (!IS_BLANK_CH(CUR)) {
5095 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5096 "Space required after '<!ATTLIST'\n");
5097 }
5098 SKIP_BLANKS;
5099 elemName = xmlParseName(ctxt);
5100 if (elemName == NULL) {
5101 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5102 "ATTLIST: no name for Element\n");
5103 return;
5104 }
5105 SKIP_BLANKS;
5106 GROW;
5107 while (RAW != '>') {
5108 const xmlChar *check = CUR_PTR;
5109 int type;
5110 int def;
5111 xmlChar *defaultValue = NULL;
5112
5113 GROW;
5114 tree = NULL;
5115 attrName = xmlParseName(ctxt);
5116 if (attrName == NULL) {
5117 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5118 "ATTLIST: no name for Attribute\n");
5119 break;
5120 }
5121 GROW;
5122 if (!IS_BLANK_CH(CUR)) {
5123 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5124 "Space required after the attribute name\n");
5125 break;
5126 }
5127 SKIP_BLANKS;
5128
5129 type = xmlParseAttributeType(ctxt, &tree);
5130 if (type <= 0) {
5131 break;
5132 }
5133
5134 GROW;
5135 if (!IS_BLANK_CH(CUR)) {
5136 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5137 "Space required after the attribute type\n");
5138 if (tree != NULL)
5139 xmlFreeEnumeration(tree);
5140 break;
5141 }
5142 SKIP_BLANKS;
5143
5144 def = xmlParseDefaultDecl(ctxt, &defaultValue);
5145 if (def <= 0) {
5146 if (defaultValue != NULL)
5147 xmlFree(defaultValue);
5148 if (tree != NULL)
5149 xmlFreeEnumeration(tree);
5150 break;
5151 }
5152
5153 GROW;
5154 if (RAW != '>') {
5155 if (!IS_BLANK_CH(CUR)) {
5156 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5157 "Space required after the attribute default value\n");
5158 if (defaultValue != NULL)
5159 xmlFree(defaultValue);
5160 if (tree != NULL)
5161 xmlFreeEnumeration(tree);
5162 break;
5163 }
5164 SKIP_BLANKS;
5165 }
5166 if (check == CUR_PTR) {
5167 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
5168 "in xmlParseAttributeListDecl\n");
5169 if (defaultValue != NULL)
5170 xmlFree(defaultValue);
5171 if (tree != NULL)
5172 xmlFreeEnumeration(tree);
5173 break;
5174 }
5175 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5176 (ctxt->sax->attributeDecl != NULL))
5177 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
5178 type, def, defaultValue, tree);
5179 else if (tree != NULL)
5180 xmlFreeEnumeration(tree);
5181
5182 if ((ctxt->sax2) && (defaultValue != NULL) &&
5183 (def != XML_ATTRIBUTE_IMPLIED) &&
5184 (def != XML_ATTRIBUTE_REQUIRED)) {
5185 xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
5186 }
5187 if (ctxt->sax2) {
5188 xmlAddSpecialAttr(ctxt, elemName, attrName, type);
5189 }
5190 if (defaultValue != NULL)
5191 xmlFree(defaultValue);
5192 GROW;
5193 }
5194 if (RAW == '>') {
5195 if (input != ctxt->input) {
5196 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5197 "Attribute list declaration doesn't start and stop in the same entity\n");
5198 }
5199 NEXT;
5200 }
5201 }
5202}
5203
5204/**
5205 * xmlParseElementMixedContentDecl:
5206 * @ctxt: an XML parser context
5207 * @inputchk: the input used for the current entity, needed for boundary checks
5208 *
5209 * parse the declaration for a Mixed Element content
5210 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
5211 *
5212 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
5213 * '(' S? '#PCDATA' S? ')'
5214 *
5215 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
5216 *
5217 * [ VC: No Duplicate Types ]
5218 * The same name must not appear more than once in a single
5219 * mixed-content declaration.
5220 *
5221 * returns: the list of the xmlElementContentPtr describing the element choices
5222 */
5223xmlElementContentPtr
5224xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
5225 xmlElementContentPtr ret = NULL, cur = NULL, n;
5226 const xmlChar *elem = NULL;
5227
5228 GROW;
5229 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
5230 SKIP(7);
5231 SKIP_BLANKS;
5232 SHRINK;
5233 if (RAW == ')') {
5234 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
5235 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5236"Element content declaration doesn't start and stop in the same entity\n",
5237 NULL);
5238 }
5239 NEXT;
5240 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
5241 if (RAW == '*') {
5242 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5243 NEXT;
5244 }
5245 return(ret);
5246 }
5247 if ((RAW == '(') || (RAW == '|')) {
5248 ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
5249 if (ret == NULL) return(NULL);
5250 }
5251 while (RAW == '|') {
5252 NEXT;
5253 if (elem == NULL) {
5254 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
5255 if (ret == NULL) return(NULL);
5256 ret->c1 = cur;
5257 if (cur != NULL)
5258 cur->parent = ret;
5259 cur = ret;
5260 } else {
5261 n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
5262 if (n == NULL) return(NULL);
5263 n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
5264 if (n->c1 != NULL)
5265 n->c1->parent = n;
5266 cur->c2 = n;
5267 if (n != NULL)
5268 n->parent = cur;
5269 cur = n;
5270 }
5271 SKIP_BLANKS;
5272 elem = xmlParseName(ctxt);
5273 if (elem == NULL) {
5274 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5275 "xmlParseElementMixedContentDecl : Name expected\n");
5276 xmlFreeDocElementContent(ctxt->myDoc, cur);
5277 return(NULL);
5278 }
5279 SKIP_BLANKS;
5280 GROW;
5281 }
5282 if ((RAW == ')') && (NXT(1) == '*')) {
5283 if (elem != NULL) {
5284 cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
5285 XML_ELEMENT_CONTENT_ELEMENT);
5286 if (cur->c2 != NULL)
5287 cur->c2->parent = cur;
5288 }
5289 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5290 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
5291 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5292"Element content declaration doesn't start and stop in the same entity\n",
5293 NULL);
5294 }
5295 SKIP(2);
5296 } else {
5297 xmlFreeDocElementContent(ctxt->myDoc, ret);
5298 xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
5299 return(NULL);
5300 }
5301
5302 } else {
5303 xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
5304 }
5305 return(ret);
5306}
5307
5308/**
5309 * xmlParseElementChildrenContentDecl:
5310 * @ctxt: an XML parser context
5311 * @inputchk: the input used for the current entity, needed for boundary checks
5312 *
5313 * parse the declaration for a Mixed Element content
5314 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
5315 *
5316 *
5317 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
5318 *
5319 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
5320 *
5321 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
5322 *
5323 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
5324 *
5325 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
5326 * TODO Parameter-entity replacement text must be properly nested
5327 * with parenthesized groups. That is to say, if either of the
5328 * opening or closing parentheses in a choice, seq, or Mixed
5329 * construct is contained in the replacement text for a parameter
5330 * entity, both must be contained in the same replacement text. For
5331 * interoperability, if a parameter-entity reference appears in a
5332 * choice, seq, or Mixed construct, its replacement text should not
5333 * be empty, and neither the first nor last non-blank character of
5334 * the replacement text should be a connector (| or ,).
5335 *
5336 * Returns the tree of xmlElementContentPtr describing the element
5337 * hierarchy.
5338 */
5339xmlElementContentPtr
5340xmlParseElementChildrenContentDecl (xmlParserCtxtPtr ctxt, int inputchk) {
5341 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
5342 const xmlChar *elem;
5343 xmlChar type = 0;
5344
5345 if (ctxt->depth > 128) {
5346 xmlFatalErrMsgInt(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED,
5347 "xmlParseElementChildrenContentDecl : depth %d too deep\n",
5348 ctxt->depth);
5349 return(NULL);
5350 }
5351 SKIP_BLANKS;
5352 GROW;
5353 if (RAW == '(') {
5354 int inputid = ctxt->input->id;
5355
5356 /* Recurse on first child */
5357 NEXT;
5358 SKIP_BLANKS;
5359 ctxt->depth++;
5360 cur = ret = xmlParseElementChildrenContentDecl(ctxt, inputid);
5361 ctxt->depth--;
5362 SKIP_BLANKS;
5363 GROW;
5364 } else {
5365 elem = xmlParseName(ctxt);
5366 if (elem == NULL) {
5367 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
5368 return(NULL);
5369 }
5370 cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
5371 if (cur == NULL) {
5372 xmlErrMemory(ctxt, NULL);
5373 return(NULL);
5374 }
5375 GROW;
5376 if (RAW == '?') {
5377 cur->ocur = XML_ELEMENT_CONTENT_OPT;
5378 NEXT;
5379 } else if (RAW == '*') {
5380 cur->ocur = XML_ELEMENT_CONTENT_MULT;
5381 NEXT;
5382 } else if (RAW == '+') {
5383 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
5384 NEXT;
5385 } else {
5386 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
5387 }
5388 GROW;
5389 }
5390 SKIP_BLANKS;
5391 SHRINK;
5392 while (RAW != ')') {
5393 /*
5394 * Each loop we parse one separator and one element.
5395 */
5396 if (RAW == ',') {
5397 if (type == 0) type = CUR;
5398
5399 /*
5400 * Detect "Name | Name , Name" error
5401 */
5402 else if (type != CUR) {
5403 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
5404 "xmlParseElementChildrenContentDecl : '%c' expected\n",
5405 type);
5406 if ((last != NULL) && (last != ret))
5407 xmlFreeDocElementContent(ctxt->myDoc, last);
5408 if (ret != NULL)
5409 xmlFreeDocElementContent(ctxt->myDoc, ret);
5410 return(NULL);
5411 }
5412 NEXT;
5413
5414 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
5415 if (op == NULL) {
5416 if ((last != NULL) && (last != ret))
5417 xmlFreeDocElementContent(ctxt->myDoc, last);
5418 xmlFreeDocElementContent(ctxt->myDoc, ret);
5419 return(NULL);
5420 }
5421 if (last == NULL) {
5422 op->c1 = ret;
5423 if (ret != NULL)
5424 ret->parent = op;
5425 ret = cur = op;
5426 } else {
5427 cur->c2 = op;
5428 if (op != NULL)
5429 op->parent = cur;
5430 op->c1 = last;
5431 if (last != NULL)
5432 last->parent = op;
5433 cur =op;
5434 last = NULL;
5435 }
5436 } else if (RAW == '|') {
5437 if (type == 0) type = CUR;
5438
5439 /*
5440 * Detect "Name , Name | Name" error
5441 */
5442 else if (type != CUR) {
5443 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
5444 "xmlParseElementChildrenContentDecl : '%c' expected\n",
5445 type);
5446 if ((last != NULL) && (last != ret))
5447 xmlFreeDocElementContent(ctxt->myDoc, last);
5448 if (ret != NULL)
5449 xmlFreeDocElementContent(ctxt->myDoc, ret);
5450 return(NULL);
5451 }
5452 NEXT;
5453
5454 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
5455 if (op == NULL) {
5456 if ((last != NULL) && (last != ret))
5457 xmlFreeDocElementContent(ctxt->myDoc, last);
5458 if (ret != NULL)
5459 xmlFreeDocElementContent(ctxt->myDoc, ret);
5460 return(NULL);
5461 }
5462 if (last == NULL) {
5463 op->c1 = ret;
5464 if (ret != NULL)
5465 ret->parent = op;
5466 ret = cur = op;
5467 } else {
5468 cur->c2 = op;
5469 if (op != NULL)
5470 op->parent = cur;
5471 op->c1 = last;
5472 if (last != NULL)
5473 last->parent = op;
5474 cur =op;
5475 last = NULL;
5476 }
5477 } else {
5478 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
5479 if (ret != NULL)
5480 xmlFreeDocElementContent(ctxt->myDoc, ret);
5481 return(NULL);
5482 }
5483 GROW;
5484 SKIP_BLANKS;
5485 GROW;
5486 if (RAW == '(') {
5487 int inputid = ctxt->input->id;
5488 /* Recurse on second child */
5489 NEXT;
5490 SKIP_BLANKS;
5491 ctxt->depth++;
5492 last = xmlParseElementChildrenContentDecl(ctxt, inputid);
5493 ctxt->depth--;
5494 SKIP_BLANKS;
5495 } else {
5496 elem = xmlParseName(ctxt);
5497 if (elem == NULL) {
5498 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
5499 if (ret != NULL)
5500 xmlFreeDocElementContent(ctxt->myDoc, ret);
5501 return(NULL);
5502 }
5503 last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
5504 if (RAW == '?') {
5505 last->ocur = XML_ELEMENT_CONTENT_OPT;
5506 NEXT;
5507 } else if (RAW == '*') {
5508 last->ocur = XML_ELEMENT_CONTENT_MULT;
5509 NEXT;
5510 } else if (RAW == '+') {
5511 last->ocur = XML_ELEMENT_CONTENT_PLUS;
5512 NEXT;
5513 } else {
5514 last->ocur = XML_ELEMENT_CONTENT_ONCE;
5515 }
5516 }
5517 SKIP_BLANKS;
5518 GROW;
5519 }
5520 if ((cur != NULL) && (last != NULL)) {
5521 cur->c2 = last;
5522 if (last != NULL)
5523 last->parent = cur;
5524 }
5525 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
5526 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5527"Element content declaration doesn't start and stop in the same entity\n",
5528 NULL);
5529 }
5530 NEXT;
5531 if (RAW == '?') {
5532 if (ret != NULL) {
5533 if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
5534 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
5535 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5536 else
5537 ret->ocur = XML_ELEMENT_CONTENT_OPT;
5538 }
5539 NEXT;
5540 } else if (RAW == '*') {
5541 if (ret != NULL) {
5542 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5543 cur = ret;
5544 /*
5545 * Some normalization:
5546 * (a | b* | c?)* == (a | b | c)*
5547 */
5548 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
5549 if ((cur->c1 != NULL) &&
5550 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
5551 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
5552 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
5553 if ((cur->c2 != NULL) &&
5554 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
5555 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
5556 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
5557 cur = cur->c2;
5558 }
5559 }
5560 NEXT;
5561 } else if (RAW == '+') {
5562 if (ret != NULL) {
5563 int found = 0;
5564
5565 if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
5566 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
5567 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5568 else
5569 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
5570 /*
5571 * Some normalization:
5572 * (a | b*)+ == (a | b)*
5573 * (a | b?)+ == (a | b)*
5574 */
5575 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
5576 if ((cur->c1 != NULL) &&
5577 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
5578 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
5579 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
5580 found = 1;
5581 }
5582 if ((cur->c2 != NULL) &&
5583 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
5584 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
5585 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
5586 found = 1;
5587 }
5588 cur = cur->c2;
5589 }
5590 if (found)
5591 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5592 }
5593 NEXT;
5594 }
5595 return(ret);
5596}
5597
5598/**
5599 * xmlParseElementContentDecl:
5600 * @ctxt: an XML parser context
5601 * @name: the name of the element being defined.
5602 * @result: the Element Content pointer will be stored here if any
5603 *
5604 * parse the declaration for an Element content either Mixed or Children,
5605 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
5606 *
5607 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
5608 *
5609 * returns: the type of element content XML_ELEMENT_TYPE_xxx
5610 */
5611
5612int
5613xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
5614 xmlElementContentPtr *result) {
5615
5616 xmlElementContentPtr tree = NULL;
5617 int inputid = ctxt->input->id;
5618 int res;
5619
5620 *result = NULL;
5621
5622 if (RAW != '(') {
5623 xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
5624 "xmlParseElementContentDecl : %s '(' expected\n", name);
5625 return(-1);
5626 }
5627 NEXT;
5628 GROW;
5629 SKIP_BLANKS;
5630 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
5631 tree = xmlParseElementMixedContentDecl(ctxt, inputid);
5632 res = XML_ELEMENT_TYPE_MIXED;
5633 } else {
5634 tree = xmlParseElementChildrenContentDecl(ctxt, inputid);
5635 res = XML_ELEMENT_TYPE_ELEMENT;
5636 }
5637 SKIP_BLANKS;
5638 *result = tree;
5639 return(res);
5640}
5641
5642/**
5643 * xmlParseElementDecl:
5644 * @ctxt: an XML parser context
5645 *
5646 * parse an Element declaration.
5647 *
5648 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
5649 *
5650 * [ VC: Unique Element Type Declaration ]
5651 * No element type may be declared more than once
5652 *
5653 * Returns the type of the element, or -1 in case of error
5654 */
5655int
5656xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
5657 const xmlChar *name;
5658 int ret = -1;
5659 xmlElementContentPtr content = NULL;
5660
5661 /* GROW; done in the caller */
5662 if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
5663 xmlParserInputPtr input = ctxt->input;
5664
5665 SKIP(9);
5666 if (!IS_BLANK_CH(CUR)) {
5667 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5668 "Space required after 'ELEMENT'\n");
5669 }
5670 SKIP_BLANKS;
5671 name = xmlParseName(ctxt);
5672 if (name == NULL) {
5673 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5674 "xmlParseElementDecl: no name for Element\n");
5675 return(-1);
5676 }
5677 while ((RAW == 0) && (ctxt->inputNr > 1))
5678 xmlPopInput(ctxt);
5679 if (!IS_BLANK_CH(CUR)) {
5680 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5681 "Space required after the element name\n");
5682 }
5683 SKIP_BLANKS;
5684 if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
5685 SKIP(5);
5686 /*
5687 * Element must always be empty.
5688 */
5689 ret = XML_ELEMENT_TYPE_EMPTY;
5690 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
5691 (NXT(2) == 'Y')) {
5692 SKIP(3);
5693 /*
5694 * Element is a generic container.
5695 */
5696 ret = XML_ELEMENT_TYPE_ANY;
5697 } else if (RAW == '(') {
5698 ret = xmlParseElementContentDecl(ctxt, name, &content);
5699 } else {
5700 /*
5701 * [ WFC: PEs in Internal Subset ] error handling.
5702 */
5703 if ((RAW == '%') && (ctxt->external == 0) &&
5704 (ctxt->inputNr == 1)) {
5705 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
5706 "PEReference: forbidden within markup decl in internal subset\n");
5707 } else {
5708 xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
5709 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
5710 }
5711 return(-1);
5712 }
5713
5714 SKIP_BLANKS;
5715 /*
5716 * Pop-up of finished entities.
5717 */
5718 while ((RAW == 0) && (ctxt->inputNr > 1))
5719 xmlPopInput(ctxt);
5720 SKIP_BLANKS;
5721
5722 if (RAW != '>') {
5723 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
5724 if (content != NULL) {
5725 xmlFreeDocElementContent(ctxt->myDoc, content);
5726 }
5727 } else {
5728 if (input != ctxt->input) {
5729 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5730 "Element declaration doesn't start and stop in the same entity\n");
5731 }
5732
5733 NEXT;
5734 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5735 (ctxt->sax->elementDecl != NULL)) {
5736 if (content != NULL)
5737 content->parent = NULL;
5738 ctxt->sax->elementDecl(ctxt->userData, name, ret,
5739 content);
5740 if ((content != NULL) && (content->parent == NULL)) {
5741 /*
5742 * this is a trick: if xmlAddElementDecl is called,
5743 * instead of copying the full tree it is plugged directly
5744 * if called from the parser. Avoid duplicating the
5745 * interfaces or change the API/ABI
5746 */
5747 xmlFreeDocElementContent(ctxt->myDoc, content);
5748 }
5749 } else if (content != NULL) {
5750 xmlFreeDocElementContent(ctxt->myDoc, content);
5751 }
5752 }
5753 }
5754 return(ret);
5755}
5756
5757/**
5758 * xmlParseConditionalSections
5759 * @ctxt: an XML parser context
5760 *
5761 * [61] conditionalSect ::= includeSect | ignoreSect
5762 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
5763 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
5764 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
5765 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
5766 */
5767
5768static void
5769xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
5770 SKIP(3);
5771 SKIP_BLANKS;
5772 if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
5773 SKIP(7);
5774 SKIP_BLANKS;
5775 if (RAW != '[') {
5776 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
5777 } else {
5778 NEXT;
5779 }
5780 if (xmlParserDebugEntities) {
5781 if ((ctxt->input != NULL) && (ctxt->input->filename))
5782 xmlGenericError(xmlGenericErrorContext,
5783 "%s(%d): ", ctxt->input->filename,
5784 ctxt->input->line);
5785 xmlGenericError(xmlGenericErrorContext,
5786 "Entering INCLUDE Conditional Section\n");
5787 }
5788
5789 while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
5790 (NXT(2) != '>'))) {
5791 const xmlChar *check = CUR_PTR;
5792 unsigned int cons = ctxt->input->consumed;
5793
5794 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5795 xmlParseConditionalSections(ctxt);
5796 } else if (IS_BLANK_CH(CUR)) {
5797 NEXT;
5798 } else if (RAW == '%') {
5799 xmlParsePEReference(ctxt);
5800 } else
5801 xmlParseMarkupDecl(ctxt);
5802
5803 /*
5804 * Pop-up of finished entities.
5805 */
5806 while ((RAW == 0) && (ctxt->inputNr > 1))
5807 xmlPopInput(ctxt);
5808
5809 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
5810 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
5811 break;
5812 }
5813 }
5814 if (xmlParserDebugEntities) {
5815 if ((ctxt->input != NULL) && (ctxt->input->filename))
5816 xmlGenericError(xmlGenericErrorContext,
5817 "%s(%d): ", ctxt->input->filename,
5818 ctxt->input->line);
5819 xmlGenericError(xmlGenericErrorContext,
5820 "Leaving INCLUDE Conditional Section\n");
5821 }
5822
5823 } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
5824 int state;
5825 xmlParserInputState instate;
5826 int depth = 0;
5827
5828 SKIP(6);
5829 SKIP_BLANKS;
5830 if (RAW != '[') {
5831 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
5832 } else {
5833 NEXT;
5834 }
5835 if (xmlParserDebugEntities) {
5836 if ((ctxt->input != NULL) && (ctxt->input->filename))
5837 xmlGenericError(xmlGenericErrorContext,
5838 "%s(%d): ", ctxt->input->filename,
5839 ctxt->input->line);
5840 xmlGenericError(xmlGenericErrorContext,
5841 "Entering IGNORE Conditional Section\n");
5842 }
5843
5844 /*
5845 * Parse up to the end of the conditional section
5846 * But disable SAX event generating DTD building in the meantime
5847 */
5848 state = ctxt->disableSAX;
5849 instate = ctxt->instate;
5850 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
5851 ctxt->instate = XML_PARSER_IGNORE;
5852
5853 while ((depth >= 0) && (RAW != 0)) {
5854 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5855 depth++;
5856 SKIP(3);
5857 continue;
5858 }
5859 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
5860 if (--depth >= 0) SKIP(3);
5861 continue;
5862 }
5863 NEXT;
5864 continue;
5865 }
5866
5867 ctxt->disableSAX = state;
5868 ctxt->instate = instate;
5869
5870 if (xmlParserDebugEntities) {
5871 if ((ctxt->input != NULL) && (ctxt->input->filename))
5872 xmlGenericError(xmlGenericErrorContext,
5873 "%s(%d): ", ctxt->input->filename,
5874 ctxt->input->line);
5875 xmlGenericError(xmlGenericErrorContext,
5876 "Leaving IGNORE Conditional Section\n");
5877 }
5878
5879 } else {
5880 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
5881 }
5882
5883 if (RAW == 0)
5884 SHRINK;
5885
5886 if (RAW == 0) {
5887 xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
5888 } else {
5889 SKIP(3);
5890 }
5891}
5892
5893/**
5894 * xmlParseMarkupDecl:
5895 * @ctxt: an XML parser context
5896 *
5897 * parse Markup declarations
5898 *
5899 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
5900 * NotationDecl | PI | Comment
5901 *
5902 * [ VC: Proper Declaration/PE Nesting ]
5903 * Parameter-entity replacement text must be properly nested with
5904 * markup declarations. That is to say, if either the first character
5905 * or the last character of a markup declaration (markupdecl above) is
5906 * contained in the replacement text for a parameter-entity reference,
5907 * both must be contained in the same replacement text.
5908 *
5909 * [ WFC: PEs in Internal Subset ]
5910 * In the internal DTD subset, parameter-entity references can occur
5911 * only where markup declarations can occur, not within markup declarations.
5912 * (This does not apply to references that occur in external parameter
5913 * entities or to the external subset.)
5914 */
5915void
5916xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
5917 GROW;
5918 if (CUR == '<') {
5919 if (NXT(1) == '!') {
5920 switch (NXT(2)) {
5921 case 'E':
5922 if (NXT(3) == 'L')
5923 xmlParseElementDecl(ctxt);
5924 else if (NXT(3) == 'N')
5925 xmlParseEntityDecl(ctxt);
5926 break;
5927 case 'A':
5928 xmlParseAttributeListDecl(ctxt);
5929 break;
5930 case 'N':
5931 xmlParseNotationDecl(ctxt);
5932 break;
5933 case '-':
5934 xmlParseComment(ctxt);
5935 break;
5936 default:
5937 /* there is an error but it will be detected later */
5938 break;
5939 }
5940 } else if (NXT(1) == '?') {
5941 xmlParsePI(ctxt);
5942 }
5943 }
5944 /*
5945 * This is only for internal subset. On external entities,
5946 * the replacement is done before parsing stage
5947 */
5948 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
5949 xmlParsePEReference(ctxt);
5950
5951 /*
5952 * Conditional sections are allowed from entities included
5953 * by PE References in the internal subset.
5954 */
5955 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
5956 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5957 xmlParseConditionalSections(ctxt);
5958 }
5959 }
5960
5961 ctxt->instate = XML_PARSER_DTD;
5962}
5963
5964/**
5965 * xmlParseTextDecl:
5966 * @ctxt: an XML parser context
5967 *
5968 * parse an XML declaration header for external entities
5969 *
5970 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
5971 *
5972 * Question: Seems that EncodingDecl is mandatory ? Is that a typo ?
5973 */
5974
5975void
5976xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
5977 xmlChar *version;
5978 const xmlChar *encoding;
5979
5980 /*
5981 * We know that '<?xml' is here.
5982 */
5983 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
5984 SKIP(5);
5985 } else {
5986 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
5987 return;
5988 }
5989
5990 if (!IS_BLANK_CH(CUR)) {
5991 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5992 "Space needed after '<?xml'\n");
5993 }
5994 SKIP_BLANKS;
5995
5996 /*
5997 * We may have the VersionInfo here.
5998 */
5999 version = xmlParseVersionInfo(ctxt);
6000 if (version == NULL)
6001 version = xmlCharStrdup(XML_DEFAULT_VERSION);
6002 else {
6003 if (!IS_BLANK_CH(CUR)) {
6004 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6005 "Space needed here\n");
6006 }
6007 }
6008 ctxt->input->version = version;
6009
6010 /*
6011 * We must have the encoding declaration
6012 */
6013 encoding = xmlParseEncodingDecl(ctxt);
6014 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6015 /*
6016 * The XML REC instructs us to stop parsing right here
6017 */
6018 return;
6019 }
6020 if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
6021 xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
6022 "Missing encoding in text declaration\n");
6023 }
6024
6025 SKIP_BLANKS;
6026 if ((RAW == '?') && (NXT(1) == '>')) {
6027 SKIP(2);
6028 } else if (RAW == '>') {
6029 /* Deprecated old WD ... */
6030 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
6031 NEXT;
6032 } else {
6033 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
6034 MOVETO_ENDTAG(CUR_PTR);
6035 NEXT;
6036 }
6037}
6038
6039/**
6040 * xmlParseExternalSubset:
6041 * @ctxt: an XML parser context
6042 * @ExternalID: the external identifier
6043 * @SystemID: the system identifier (or URL)
6044 *
6045 * parse Markup declarations from an external subset
6046 *
6047 * [30] extSubset ::= textDecl? extSubsetDecl
6048 *
6049 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
6050 */
6051void
6052xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
6053 const xmlChar *SystemID) {
6054 xmlDetectSAX2(ctxt);
6055 GROW;
6056 if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
6057 xmlParseTextDecl(ctxt);
6058 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6059 /*
6060 * The XML REC instructs us to stop parsing right here
6061 */
6062 ctxt->instate = XML_PARSER_EOF;
6063 return;
6064 }
6065 }
6066 if (ctxt->myDoc == NULL) {
6067 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
6068 }
6069 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
6070 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
6071
6072 ctxt->instate = XML_PARSER_DTD;
6073 ctxt->external = 1;
6074 while (((RAW == '<') && (NXT(1) == '?')) ||
6075 ((RAW == '<') && (NXT(1) == '!')) ||
6076 (RAW == '%') || IS_BLANK_CH(CUR)) {
6077 const xmlChar *check = CUR_PTR;
6078 unsigned int cons = ctxt->input->consumed;
6079
6080 GROW;
6081 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6082 xmlParseConditionalSections(ctxt);
6083 } else if (IS_BLANK_CH(CUR)) {
6084 NEXT;
6085 } else if (RAW == '%') {
6086 xmlParsePEReference(ctxt);
6087 } else
6088 xmlParseMarkupDecl(ctxt);
6089
6090 /*
6091 * Pop-up of finished entities.
6092 */
6093 while ((RAW == 0) && (ctxt->inputNr > 1))
6094 xmlPopInput(ctxt);
6095
6096 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
6097 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
6098 break;
6099 }
6100 }
6101
6102 if (RAW != 0) {
6103 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
6104 }
6105
6106}
6107
6108/**
6109 * xmlParseReference:
6110 * @ctxt: an XML parser context
6111 *
6112 * parse and handle entity references in content, depending on the SAX
6113 * interface, this may end-up in a call to character() if this is a
6114 * CharRef, a predefined entity, if there is no reference() callback.
6115 * or if the parser was asked to switch to that mode.
6116 *
6117 * [67] Reference ::= EntityRef | CharRef
6118 */
6119void
6120xmlParseReference(xmlParserCtxtPtr ctxt) {
6121 xmlEntityPtr ent;
6122 xmlChar *val;
6123 if (RAW != '&') return;
6124
6125 if (NXT(1) == '#') {
6126 int i = 0;
6127 xmlChar out[10];
6128 int hex = NXT(2);
6129 int value = xmlParseCharRef(ctxt);
6130
6131 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
6132 /*
6133 * So we are using non-UTF-8 buffers
6134 * Check that the char fit on 8bits, if not
6135 * generate a CharRef.
6136 */
6137 if (value <= 0xFF) {
6138 out[0] = value;
6139 out[1] = 0;
6140 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6141 (!ctxt->disableSAX))
6142 ctxt->sax->characters(ctxt->userData, out, 1);
6143 } else {
6144 if ((hex == 'x') || (hex == 'X'))
6145 snprintf((char *)out, sizeof(out), "#x%X", value);
6146 else
6147 snprintf((char *)out, sizeof(out), "#%d", value);
6148 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
6149 (!ctxt->disableSAX))
6150 ctxt->sax->reference(ctxt->userData, out);
6151 }
6152 } else {
6153 /*
6154 * Just encode the value in UTF-8
6155 */
6156 COPY_BUF(0 ,out, i, value);
6157 out[i] = 0;
6158 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6159 (!ctxt->disableSAX))
6160 ctxt->sax->characters(ctxt->userData, out, i);
6161 }
6162 } else {
6163 int was_checked;
6164
6165 ent = xmlParseEntityRef(ctxt);
6166 if (ent == NULL) return;
6167 if (!ctxt->wellFormed)
6168 return;
6169 was_checked = ent->checked;
6170 if ((ent->name != NULL) &&
6171 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
6172 xmlNodePtr list = NULL;
6173 xmlParserErrors ret = XML_ERR_OK;
6174
6175
6176 /*
6177 * The first reference to the entity trigger a parsing phase
6178 * where the ent->children is filled with the result from
6179 * the parsing.
6180 */
6181 if (ent->checked == 0) {
6182 xmlChar *value;
6183
6184 value = ent->content;
6185
6186 /*
6187 * Check that this entity is well formed
6188 */
6189 if ((value != NULL) && (value[0] != 0) &&
6190 (value[1] == 0) && (value[0] == '<') &&
6191 (xmlStrEqual(ent->name, BAD_CAST "lt"))) {
6192 /*
6193 * DONE: get definite answer on this !!!
6194 * Lots of entity decls are used to declare a single
6195 * char
6196 * <!ENTITY lt "<">
6197 * Which seems to be valid since
6198 * 2.4: The ampersand character (&) and the left angle
6199 * bracket (<) may appear in their literal form only
6200 * when used ... They are also legal within the literal
6201 * entity value of an internal entity declaration;i
6202 * see "4.3.2 Well-Formed Parsed Entities".
6203 * IMHO 2.4 and 4.3.2 are directly in contradiction.
6204 * Looking at the OASIS test suite and James Clark
6205 * tests, this is broken. However the XML REC uses
6206 * it. Is the XML REC not well-formed ????
6207 * This is a hack to avoid this problem
6208 *
6209 * ANSWER: since lt gt amp .. are already defined,
6210 * this is a redefinition and hence the fact that the
6211 * content is not well balanced is not a Wf error, this
6212 * is lousy but acceptable.
6213 */
6214 list = xmlNewDocText(ctxt->myDoc, value);
6215 if (list != NULL) {
6216 if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
6217 (ent->children == NULL)) {
6218 ent->children = list;
6219 ent->last = list;
6220 if (ent->owner == 0)
6221 ent->owner = 1;
6222 list->parent = (xmlNodePtr) ent;
6223 } else {
6224 xmlFreeNodeList(list);
6225 }
6226 } else if (list != NULL) {
6227 xmlFreeNodeList(list);
6228 }
6229 } else {
6230 unsigned long oldnbent = ctxt->nbentities;
6231 /*
6232 * 4.3.2: An internal general parsed entity is well-formed
6233 * if its replacement text matches the production labeled
6234 * content.
6235 */
6236
6237 void *user_data;
6238 /*
6239 * This is a bit hackish but this seems the best
6240 * way to make sure both SAX and DOM entity support
6241 * behaves okay.
6242 */
6243 if (ctxt->userData == ctxt)
6244 user_data = NULL;
6245 else
6246 user_data = ctxt->userData;
6247
6248 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
6249 ctxt->depth++;
6250 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
6251 value, user_data, &list);
6252 ctxt->depth--;
6253
6254 } else if (ent->etype ==
6255 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
6256 ctxt->depth++;
6257 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
6258 ctxt->sax, user_data, ctxt->depth,
6259 ent->URI, ent->ExternalID, &list);
6260 ctxt->depth--;
6261 } else {
6262 ret = XML_ERR_ENTITY_PE_INTERNAL;
6263 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
6264 "invalid entity type found\n", NULL);
6265 }
6266 /*
6267 * Store the number of entities needing parsing for entity
6268 * content and do checkings
6269 */
6270 if ((ent->owner != 0) || (ent->children == NULL)) {
6271 ent->owner = ctxt->nbentities - oldnbent;
6272 if (ent->owner == 0)
6273 ent->owner = 1;
6274 }
6275 if (ret == XML_ERR_ENTITY_LOOP) {
6276 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
6277 xmlFreeNodeList(list);
6278 return;
6279 }
6280 if (xmlParserEntityCheck(ctxt, 0, ent)) {
6281 xmlFreeNodeList(list);
6282 return;
6283 }
6284 if (ret == XML_ERR_ENTITY_LOOP) {
6285 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
6286 return;
6287 } else if ((ret == XML_ERR_OK) && (list != NULL)) {
6288 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
6289 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
6290 (ent->children == NULL)) {
6291 ent->children = list;
6292 if (ctxt->replaceEntities) {
6293 /*
6294 * Prune it directly in the generated document
6295 * except for single text nodes.
6296 */
6297 if (((list->type == XML_TEXT_NODE) &&
6298 (list->next == NULL)) ||
6299 (ctxt->parseMode == XML_PARSE_READER)) {
6300 list->parent = (xmlNodePtr) ent;
6301 list = NULL;
6302 if (ent->owner == 0)
6303 ent->owner = 1;
6304 } else {
6305 ent->owner = 0;
6306 while (list != NULL) {
6307 list->parent = (xmlNodePtr) ctxt->node;
6308 list->doc = ctxt->myDoc;
6309 if (list->next == NULL)
6310 ent->last = list;
6311 list = list->next;
6312 }
6313 list = ent->children;
6314#ifdef LIBXML_LEGACY_ENABLED
6315 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
6316 xmlAddEntityReference(ent, list, NULL);
6317#endif /* LIBXML_LEGACY_ENABLED */
6318 }
6319 } else {
6320 if (ent->owner == 0)
6321 ent->owner = 1;
6322 while (list != NULL) {
6323 list->parent = (xmlNodePtr) ent;
6324 if (list->next == NULL)
6325 ent->last = list;
6326 list = list->next;
6327 }
6328 }
6329 } else {
6330 xmlFreeNodeList(list);
6331 list = NULL;
6332 }
6333 } else if ((ret != XML_ERR_OK) &&
6334 (ret != XML_WAR_UNDECLARED_ENTITY)) {
6335 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
6336 "Entity '%s' failed to parse\n", ent->name);
6337 } else if (list != NULL) {
6338 xmlFreeNodeList(list);
6339 list = NULL;
6340 } else if (ent->owner != 1) {
6341 ctxt->nbentities += ent->owner;
6342 }
6343 }
6344 ent->checked = 1;
6345 }
6346
6347 if (ent->children == NULL) {
6348 /*
6349 * Probably running in SAX mode and the callbacks don't
6350 * build the entity content. So unless we already went
6351 * though parsing for first checking go though the entity
6352 * content to generate callbacks associated to the entity
6353 */
6354 if (was_checked == 1) {
6355 void *user_data;
6356 /*
6357 * This is a bit hackish but this seems the best
6358 * way to make sure both SAX and DOM entity support
6359 * behaves okay.
6360 */
6361 if (ctxt->userData == ctxt)
6362 user_data = NULL;
6363 else
6364 user_data = ctxt->userData;
6365
6366 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
6367 ctxt->depth++;
6368 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
6369 ent->content, user_data, NULL);
6370 ctxt->depth--;
6371 } else if (ent->etype ==
6372 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
6373 ctxt->depth++;
6374 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
6375 ctxt->sax, user_data, ctxt->depth,
6376 ent->URI, ent->ExternalID, NULL);
6377 ctxt->depth--;
6378 } else {
6379 ret = XML_ERR_ENTITY_PE_INTERNAL;
6380 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
6381 "invalid entity type found\n", NULL);
6382 }
6383 if (ret == XML_ERR_ENTITY_LOOP) {
6384 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
6385 return;
6386 }
6387 }
6388 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
6389 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
6390 /*
6391 * Entity reference callback comes second, it's somewhat
6392 * superfluous but a compatibility to historical behaviour
6393 */
6394 ctxt->sax->reference(ctxt->userData, ent->name);
6395 }
6396 return;
6397 }
6398 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
6399 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
6400 /*
6401 * Create a node.
6402 */
6403 ctxt->sax->reference(ctxt->userData, ent->name);
6404 return;
6405 }
6406 if ((ctxt->replaceEntities) || (ent->children == NULL)) {
6407 /*
6408 * There is a problem on the handling of _private for entities
6409 * (bug 155816): Should we copy the content of the field from
6410 * the entity (possibly overwriting some value set by the user
6411 * when a copy is created), should we leave it alone, or should
6412 * we try to take care of different situations? The problem
6413 * is exacerbated by the usage of this field by the xmlReader.
6414 * To fix this bug, we look at _private on the created node
6415 * and, if it's NULL, we copy in whatever was in the entity.
6416 * If it's not NULL we leave it alone. This is somewhat of a
6417 * hack - maybe we should have further tests to determine
6418 * what to do.
6419 */
6420 if ((ctxt->node != NULL) && (ent->children != NULL)) {
6421 /*
6422 * Seems we are generating the DOM content, do
6423 * a simple tree copy for all references except the first
6424 * In the first occurrence list contains the replacement.
6425 * progressive == 2 means we are operating on the Reader
6426 * and since nodes are discarded we must copy all the time.
6427 */
6428 if (((list == NULL) && (ent->owner == 0)) ||
6429 (ctxt->parseMode == XML_PARSE_READER)) {
6430 xmlNodePtr nw = NULL, cur, firstChild = NULL;
6431
6432 /*
6433 * when operating on a reader, the entities definitions
6434 * are always owning the entities subtree.
6435 if (ctxt->parseMode == XML_PARSE_READER)
6436 ent->owner = 1;
6437 */
6438
6439 cur = ent->children;
6440 while (cur != NULL) {
6441 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
6442 if (nw != NULL) {
6443 if (nw->_private == NULL)
6444 nw->_private = cur->_private;
6445 if (firstChild == NULL){
6446 firstChild = nw;
6447 }
6448 nw = xmlAddChild(ctxt->node, nw);
6449 }
6450 if (cur == ent->last) {
6451 /*
6452 * needed to detect some strange empty
6453 * node cases in the reader tests
6454 */
6455 if ((ctxt->parseMode == XML_PARSE_READER) &&
6456 (nw != NULL) &&
6457 (nw->type == XML_ELEMENT_NODE) &&
6458 (nw->children == NULL))
6459 nw->extra = 1;
6460
6461 break;
6462 }
6463 cur = cur->next;
6464 }
6465#ifdef LIBXML_LEGACY_ENABLED
6466 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
6467 xmlAddEntityReference(ent, firstChild, nw);
6468#endif /* LIBXML_LEGACY_ENABLED */
6469 } else if (list == NULL) {
6470 xmlNodePtr nw = NULL, cur, next, last,
6471 firstChild = NULL;
6472 /*
6473 * Copy the entity child list and make it the new
6474 * entity child list. The goal is to make sure any
6475 * ID or REF referenced will be the one from the
6476 * document content and not the entity copy.
6477 */
6478 cur = ent->children;
6479 ent->children = NULL;
6480 last = ent->last;
6481 ent->last = NULL;
6482 while (cur != NULL) {
6483 next = cur->next;
6484 cur->next = NULL;
6485 cur->parent = NULL;
6486 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
6487 if (nw != NULL) {
6488 if (nw->_private == NULL)
6489 nw->_private = cur->_private;
6490 if (firstChild == NULL){
6491 firstChild = cur;
6492 }
6493 xmlAddChild((xmlNodePtr) ent, nw);
6494 xmlAddChild(ctxt->node, cur);
6495 }
6496 if (cur == last)
6497 break;
6498 cur = next;
6499 }
6500 if (ent->owner == 0)
6501 ent->owner = 1;
6502#ifdef LIBXML_LEGACY_ENABLED
6503 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
6504 xmlAddEntityReference(ent, firstChild, nw);
6505#endif /* LIBXML_LEGACY_ENABLED */
6506 } else {
6507 const xmlChar *nbktext;
6508
6509 /*
6510 * the name change is to avoid coalescing of the
6511 * node with a possible previous text one which
6512 * would make ent->children a dangling pointer
6513 */
6514 nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
6515 -1);
6516 if (ent->children->type == XML_TEXT_NODE)
6517 ent->children->name = nbktext;
6518 if ((ent->last != ent->children) &&
6519 (ent->last->type == XML_TEXT_NODE))
6520 ent->last->name = nbktext;
6521 xmlAddChildList(ctxt->node, ent->children);
6522 }
6523
6524 /*
6525 * This is to avoid a nasty side effect, see
6526 * characters() in SAX.c
6527 */
6528 ctxt->nodemem = 0;
6529 ctxt->nodelen = 0;
6530 return;
6531 }
6532 }
6533 } else {
6534 val = ent->content;
6535 if (val == NULL) return;
6536 /*
6537 * inline the entity.
6538 */
6539 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6540 (!ctxt->disableSAX))
6541 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
6542 }
6543 }
6544}
6545
6546/**
6547 * xmlParseEntityRef:
6548 * @ctxt: an XML parser context
6549 *
6550 * parse ENTITY references declarations
6551 *
6552 * [68] EntityRef ::= '&' Name ';'
6553 *
6554 * [ WFC: Entity Declared ]
6555 * In a document without any DTD, a document with only an internal DTD
6556 * subset which contains no parameter entity references, or a document
6557 * with "standalone='yes'", the Name given in the entity reference
6558 * must match that in an entity declaration, except that well-formed
6559 * documents need not declare any of the following entities: amp, lt,
6560 * gt, apos, quot. The declaration of a parameter entity must precede
6561 * any reference to it. Similarly, the declaration of a general entity
6562 * must precede any reference to it which appears in a default value in an
6563 * attribute-list declaration. Note that if entities are declared in the
6564 * external subset or in external parameter entities, a non-validating
6565 * processor is not obligated to read and process their declarations;
6566 * for such documents, the rule that an entity must be declared is a
6567 * well-formedness constraint only if standalone='yes'.
6568 *
6569 * [ WFC: Parsed Entity ]
6570 * An entity reference must not contain the name of an unparsed entity
6571 *
6572 * Returns the xmlEntityPtr if found, or NULL otherwise.
6573 */
6574xmlEntityPtr
6575xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
6576 const xmlChar *name;
6577 xmlEntityPtr ent = NULL;
6578
6579 GROW;
6580
6581 if (RAW == '&') {
6582 NEXT;
6583 name = xmlParseName(ctxt);
6584 if (name == NULL) {
6585 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6586 "xmlParseEntityRef: no name\n");
6587 } else {
6588 if (RAW == ';') {
6589 NEXT;
6590 /*
6591 * Increase the number of entity references parsed
6592 */
6593 ctxt->nbentities++;
6594
6595 /*
6596 * Ask first SAX for entity resolution, otherwise try the
6597 * predefined set.
6598 */
6599 if (ctxt->sax != NULL) {
6600 if (ctxt->sax->getEntity != NULL)
6601 ent = ctxt->sax->getEntity(ctxt->userData, name);
6602 if ((ctxt->wellFormed == 1 ) && (ent == NULL))
6603 ent = xmlGetPredefinedEntity(name);
6604 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
6605 (ctxt->userData==ctxt)) {
6606 ent = xmlSAX2GetEntity(ctxt, name);
6607 }
6608 }
6609 /*
6610 * [ WFC: Entity Declared ]
6611 * In a document without any DTD, a document with only an
6612 * internal DTD subset which contains no parameter entity
6613 * references, or a document with "standalone='yes'", the
6614 * Name given in the entity reference must match that in an
6615 * entity declaration, except that well-formed documents
6616 * need not declare any of the following entities: amp, lt,
6617 * gt, apos, quot.
6618 * The declaration of a parameter entity must precede any
6619 * reference to it.
6620 * Similarly, the declaration of a general entity must
6621 * precede any reference to it which appears in a default
6622 * value in an attribute-list declaration. Note that if
6623 * entities are declared in the external subset or in
6624 * external parameter entities, a non-validating processor
6625 * is not obligated to read and process their declarations;
6626 * for such documents, the rule that an entity must be
6627 * declared is a well-formedness constraint only if
6628 * standalone='yes'.
6629 */
6630 if (ent == NULL) {
6631 if ((ctxt->standalone == 1) ||
6632 ((ctxt->hasExternalSubset == 0) &&
6633 (ctxt->hasPErefs == 0))) {
6634 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
6635 "Entity '%s' not defined\n", name);
6636 } else {
6637 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
6638 "Entity '%s' not defined\n", name);
6639 if ((ctxt->inSubset == 0) &&
6640 (ctxt->sax != NULL) &&
6641 (ctxt->sax->reference != NULL)) {
6642 ctxt->sax->reference(ctxt->userData, name);
6643 }
6644 }
6645 ctxt->valid = 0;
6646 }
6647
6648 /*
6649 * [ WFC: Parsed Entity ]
6650 * An entity reference must not contain the name of an
6651 * unparsed entity
6652 */
6653 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
6654 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
6655 "Entity reference to unparsed entity %s\n", name);
6656 }
6657
6658 /*
6659 * [ WFC: No External Entity References ]
6660 * Attribute values cannot contain direct or indirect
6661 * entity references to external entities.
6662 */
6663 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6664 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
6665 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
6666 "Attribute references external entity '%s'\n", name);
6667 }
6668 /*
6669 * [ WFC: No < in Attribute Values ]
6670 * The replacement text of any entity referred to directly or
6671 * indirectly in an attribute value (other than "&lt;") must
6672 * not contain a <.
6673 */
6674 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6675 (ent != NULL) &&
6676 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
6677 (ent->content != NULL) &&
6678 (xmlStrchr(ent->content, '<'))) {
6679 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
6680 "'<' in entity '%s' is not allowed in attributes values\n", name);
6681 }
6682
6683 /*
6684 * Internal check, no parameter entities here ...
6685 */
6686 else {
6687 switch (ent->etype) {
6688 case XML_INTERNAL_PARAMETER_ENTITY:
6689 case XML_EXTERNAL_PARAMETER_ENTITY:
6690 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
6691 "Attempt to reference the parameter entity '%s'\n",
6692 name);
6693 break;
6694 default:
6695 break;
6696 }
6697 }
6698
6699 /*
6700 * [ WFC: No Recursion ]
6701 * A parsed entity must not contain a recursive reference
6702 * to itself, either directly or indirectly.
6703 * Done somewhere else
6704 */
6705
6706 } else {
6707 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
6708 }
6709 }
6710 }
6711 return(ent);
6712}
6713
6714/**
6715 * xmlParseStringEntityRef:
6716 * @ctxt: an XML parser context
6717 * @str: a pointer to an index in the string
6718 *
6719 * parse ENTITY references declarations, but this version parses it from
6720 * a string value.
6721 *
6722 * [68] EntityRef ::= '&' Name ';'
6723 *
6724 * [ WFC: Entity Declared ]
6725 * In a document without any DTD, a document with only an internal DTD
6726 * subset which contains no parameter entity references, or a document
6727 * with "standalone='yes'", the Name given in the entity reference
6728 * must match that in an entity declaration, except that well-formed
6729 * documents need not declare any of the following entities: amp, lt,
6730 * gt, apos, quot. The declaration of a parameter entity must precede
6731 * any reference to it. Similarly, the declaration of a general entity
6732 * must precede any reference to it which appears in a default value in an
6733 * attribute-list declaration. Note that if entities are declared in the
6734 * external subset or in external parameter entities, a non-validating
6735 * processor is not obligated to read and process their declarations;
6736 * for such documents, the rule that an entity must be declared is a
6737 * well-formedness constraint only if standalone='yes'.
6738 *
6739 * [ WFC: Parsed Entity ]
6740 * An entity reference must not contain the name of an unparsed entity
6741 *
6742 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
6743 * is updated to the current location in the string.
6744 */
6745xmlEntityPtr
6746xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
6747 xmlChar *name;
6748 const xmlChar *ptr;
6749 xmlChar cur;
6750 xmlEntityPtr ent = NULL;
6751
6752 if ((str == NULL) || (*str == NULL))
6753 return(NULL);
6754 ptr = *str;
6755 cur = *ptr;
6756 if (cur == '&') {
6757 ptr++;
6758 cur = *ptr;
6759 name = xmlParseStringName(ctxt, &ptr);
6760 if (name == NULL) {
6761 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6762 "xmlParseStringEntityRef: no name\n");
6763 } else {
6764 if (*ptr == ';') {
6765 ptr++;
6766 /*
6767 * Increase the number of entity references parsed
6768 */
6769 ctxt->nbentities++;
6770 /*
6771 * Ask first SAX for entity resolution, otherwise try the
6772 * predefined set.
6773 */
6774 if (ctxt->sax != NULL) {
6775 if (ctxt->sax->getEntity != NULL)
6776 ent = ctxt->sax->getEntity(ctxt->userData, name);
6777 if (ent == NULL)
6778 ent = xmlGetPredefinedEntity(name);
6779 if ((ent == NULL) && (ctxt->userData==ctxt)) {
6780 ent = xmlSAX2GetEntity(ctxt, name);
6781 }
6782 }
6783 /*
6784 * [ WFC: Entity Declared ]
6785 * In a document without any DTD, a document with only an
6786 * internal DTD subset which contains no parameter entity
6787 * references, or a document with "standalone='yes'", the
6788 * Name given in the entity reference must match that in an
6789 * entity declaration, except that well-formed documents
6790 * need not declare any of the following entities: amp, lt,
6791 * gt, apos, quot.
6792 * The declaration of a parameter entity must precede any
6793 * reference to it.
6794 * Similarly, the declaration of a general entity must
6795 * precede any reference to it which appears in a default
6796 * value in an attribute-list declaration. Note that if
6797 * entities are declared in the external subset or in
6798 * external parameter entities, a non-validating processor
6799 * is not obligated to read and process their declarations;
6800 * for such documents, the rule that an entity must be
6801 * declared is a well-formedness constraint only if
6802 * standalone='yes'.
6803 */
6804 if (ent == NULL) {
6805 if ((ctxt->standalone == 1) ||
6806 ((ctxt->hasExternalSubset == 0) &&
6807 (ctxt->hasPErefs == 0))) {
6808 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
6809 "Entity '%s' not defined\n", name);
6810 } else {
6811 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
6812 "Entity '%s' not defined\n",
6813 name);
6814 }
6815 /* TODO ? check regressions ctxt->valid = 0; */
6816 }
6817
6818 /*
6819 * [ WFC: Parsed Entity ]
6820 * An entity reference must not contain the name of an
6821 * unparsed entity
6822 */
6823 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
6824 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
6825 "Entity reference to unparsed entity %s\n", name);
6826 }
6827
6828 /*
6829 * [ WFC: No External Entity References ]
6830 * Attribute values cannot contain direct or indirect
6831 * entity references to external entities.
6832 */
6833 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6834 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
6835 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
6836 "Attribute references external entity '%s'\n", name);
6837 }
6838 /*
6839 * [ WFC: No < in Attribute Values ]
6840 * The replacement text of any entity referred to directly or
6841 * indirectly in an attribute value (other than "&lt;") must
6842 * not contain a <.
6843 */
6844 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6845 (ent != NULL) &&
6846 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
6847 (ent->content != NULL) &&
6848 (xmlStrchr(ent->content, '<'))) {
6849 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
6850 "'<' in entity '%s' is not allowed in attributes values\n",
6851 name);
6852 }
6853
6854 /*
6855 * Internal check, no parameter entities here ...
6856 */
6857 else {
6858 switch (ent->etype) {
6859 case XML_INTERNAL_PARAMETER_ENTITY:
6860 case XML_EXTERNAL_PARAMETER_ENTITY:
6861 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
6862 "Attempt to reference the parameter entity '%s'\n",
6863 name);
6864 break;
6865 default:
6866 break;
6867 }
6868 }
6869
6870 /*
6871 * [ WFC: No Recursion ]
6872 * A parsed entity must not contain a recursive reference
6873 * to itself, either directly or indirectly.
6874 * Done somewhere else
6875 */
6876
6877 } else {
6878 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
6879 }
6880 xmlFree(name);
6881 }
6882 }
6883 *str = ptr;
6884 return(ent);
6885}
6886
6887/**
6888 * xmlParsePEReference:
6889 * @ctxt: an XML parser context
6890 *
6891 * parse PEReference declarations
6892 * The entity content is handled directly by pushing it's content as
6893 * a new input stream.
6894 *
6895 * [69] PEReference ::= '%' Name ';'
6896 *
6897 * [ WFC: No Recursion ]
6898 * A parsed entity must not contain a recursive
6899 * reference to itself, either directly or indirectly.
6900 *
6901 * [ WFC: Entity Declared ]
6902 * In a document without any DTD, a document with only an internal DTD
6903 * subset which contains no parameter entity references, or a document
6904 * with "standalone='yes'", ... ... The declaration of a parameter
6905 * entity must precede any reference to it...
6906 *
6907 * [ VC: Entity Declared ]
6908 * In a document with an external subset or external parameter entities
6909 * with "standalone='no'", ... ... The declaration of a parameter entity
6910 * must precede any reference to it...
6911 *
6912 * [ WFC: In DTD ]
6913 * Parameter-entity references may only appear in the DTD.
6914 * NOTE: misleading but this is handled.
6915 */
6916void
6917xmlParsePEReference(xmlParserCtxtPtr ctxt)
6918{
6919 const xmlChar *name;
6920 xmlEntityPtr entity = NULL;
6921 xmlParserInputPtr input;
6922
6923 if (RAW == '%') {
6924 NEXT;
6925 name = xmlParseName(ctxt);
6926 if (name == NULL) {
6927 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6928 "xmlParsePEReference: no name\n");
6929 } else {
6930 if (RAW == ';') {
6931 NEXT;
6932 /*
6933 * Increase the number of entity references parsed
6934 */
6935 ctxt->nbentities++;
6936
6937 if ((ctxt->sax != NULL) &&
6938 (ctxt->sax->getParameterEntity != NULL))
6939 entity = ctxt->sax->getParameterEntity(ctxt->userData,
6940 name);
6941 if (entity == NULL) {
6942 /*
6943 * [ WFC: Entity Declared ]
6944 * In a document without any DTD, a document with only an
6945 * internal DTD subset which contains no parameter entity
6946 * references, or a document with "standalone='yes'", ...
6947 * ... The declaration of a parameter entity must precede
6948 * any reference to it...
6949 */
6950 if ((ctxt->standalone == 1) ||
6951 ((ctxt->hasExternalSubset == 0) &&
6952 (ctxt->hasPErefs == 0))) {
6953 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
6954 "PEReference: %%%s; not found\n",
6955 name);
6956 } else {
6957 /*
6958 * [ VC: Entity Declared ]
6959 * In a document with an external subset or external
6960 * parameter entities with "standalone='no'", ...
6961 * ... The declaration of a parameter entity must
6962 * precede any reference to it...
6963 */
6964 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6965 "PEReference: %%%s; not found\n",
6966 name, NULL);
6967 ctxt->valid = 0;
6968 }
6969 } else {
6970 /*
6971 * Internal checking in case the entity quest barfed
6972 */
6973 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6974 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
6975 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6976 "Internal: %%%s; is not a parameter entity\n",
6977 name, NULL);
6978 } else if (ctxt->input->free != deallocblankswrapper) {
6979 input =
6980 xmlNewBlanksWrapperInputStream(ctxt, entity);
6981 xmlPushInput(ctxt, input);
6982 } else {
6983 /*
6984 * TODO !!!
6985 * handle the extra spaces added before and after
6986 * c.f. http://www.w3.org/TR/REC-xml#as-PE
6987 */
6988 input = xmlNewEntityInputStream(ctxt, entity);
6989 xmlPushInput(ctxt, input);
6990 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
6991 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
6992 (IS_BLANK_CH(NXT(5)))) {
6993 xmlParseTextDecl(ctxt);
6994 if (ctxt->errNo ==
6995 XML_ERR_UNSUPPORTED_ENCODING) {
6996 /*
6997 * The XML REC instructs us to stop parsing
6998 * right here
6999 */
7000 ctxt->instate = XML_PARSER_EOF;
7001 return;
7002 }
7003 }
7004 }
7005 }
7006 ctxt->hasPErefs = 1;
7007 } else {
7008 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7009 }
7010 }
7011 }
7012}
7013
7014/**
7015 * xmlParseStringPEReference:
7016 * @ctxt: an XML parser context
7017 * @str: a pointer to an index in the string
7018 *
7019 * parse PEReference declarations
7020 *
7021 * [69] PEReference ::= '%' Name ';'
7022 *
7023 * [ WFC: No Recursion ]
7024 * A parsed entity must not contain a recursive
7025 * reference to itself, either directly or indirectly.
7026 *
7027 * [ WFC: Entity Declared ]
7028 * In a document without any DTD, a document with only an internal DTD
7029 * subset which contains no parameter entity references, or a document
7030 * with "standalone='yes'", ... ... The declaration of a parameter
7031 * entity must precede any reference to it...
7032 *
7033 * [ VC: Entity Declared ]
7034 * In a document with an external subset or external parameter entities
7035 * with "standalone='no'", ... ... The declaration of a parameter entity
7036 * must precede any reference to it...
7037 *
7038 * [ WFC: In DTD ]
7039 * Parameter-entity references may only appear in the DTD.
7040 * NOTE: misleading but this is handled.
7041 *
7042 * Returns the string of the entity content.
7043 * str is updated to the current value of the index
7044 */
7045xmlEntityPtr
7046xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
7047 const xmlChar *ptr;
7048 xmlChar cur;
7049 xmlChar *name;
7050 xmlEntityPtr entity = NULL;
7051
7052 if ((str == NULL) || (*str == NULL)) return(NULL);
7053 ptr = *str;
7054 cur = *ptr;
7055 if (cur == '%') {
7056 ptr++;
7057 cur = *ptr;
7058 name = xmlParseStringName(ctxt, &ptr);
7059 if (name == NULL) {
7060 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7061 "xmlParseStringPEReference: no name\n");
7062 } else {
7063 cur = *ptr;
7064 if (cur == ';') {
7065 ptr++;
7066 cur = *ptr;
7067 /*
7068 * Increase the number of entity references parsed
7069 */
7070 ctxt->nbentities++;
7071
7072 if ((ctxt->sax != NULL) &&
7073 (ctxt->sax->getParameterEntity != NULL))
7074 entity = ctxt->sax->getParameterEntity(ctxt->userData,
7075 name);
7076 if (entity == NULL) {
7077 /*
7078 * [ WFC: Entity Declared ]
7079 * In a document without any DTD, a document with only an
7080 * internal DTD subset which contains no parameter entity
7081 * references, or a document with "standalone='yes'", ...
7082 * ... The declaration of a parameter entity must precede
7083 * any reference to it...
7084 */
7085 if ((ctxt->standalone == 1) ||
7086 ((ctxt->hasExternalSubset == 0) &&
7087 (ctxt->hasPErefs == 0))) {
7088 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7089 "PEReference: %%%s; not found\n", name);
7090 } else {
7091 /*
7092 * [ VC: Entity Declared ]
7093 * In a document with an external subset or external
7094 * parameter entities with "standalone='no'", ...
7095 * ... The declaration of a parameter entity must
7096 * precede any reference to it...
7097 */
7098 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7099 "PEReference: %%%s; not found\n",
7100 name, NULL);
7101 ctxt->valid = 0;
7102 }
7103 } else {
7104 /*
7105 * Internal checking in case the entity quest barfed
7106 */
7107 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7108 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
7109 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7110 "%%%s; is not a parameter entity\n",
7111 name, NULL);
7112 }
7113 }
7114 ctxt->hasPErefs = 1;
7115 } else {
7116 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7117 }
7118 xmlFree(name);
7119 }
7120 }
7121 *str = ptr;
7122 return(entity);
7123}
7124
7125/**
7126 * xmlParseDocTypeDecl:
7127 * @ctxt: an XML parser context
7128 *
7129 * parse a DOCTYPE declaration
7130 *
7131 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
7132 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
7133 *
7134 * [ VC: Root Element Type ]
7135 * The Name in the document type declaration must match the element
7136 * type of the root element.
7137 */
7138
7139void
7140xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
7141 const xmlChar *name = NULL;
7142 xmlChar *ExternalID = NULL;
7143 xmlChar *URI = NULL;
7144
7145 /*
7146 * We know that '<!DOCTYPE' has been detected.
7147 */
7148 SKIP(9);
7149
7150 SKIP_BLANKS;
7151
7152 /*
7153 * Parse the DOCTYPE name.
7154 */
7155 name = xmlParseName(ctxt);
7156 if (name == NULL) {
7157 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7158 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
7159 }
7160 ctxt->intSubName = name;
7161
7162 SKIP_BLANKS;
7163
7164 /*
7165 * Check for SystemID and ExternalID
7166 */
7167 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
7168
7169 if ((URI != NULL) || (ExternalID != NULL)) {
7170 ctxt->hasExternalSubset = 1;
7171 }
7172 ctxt->extSubURI = URI;
7173 ctxt->extSubSystem = ExternalID;
7174
7175 SKIP_BLANKS;
7176
7177 /*
7178 * Create and update the internal subset.
7179 */
7180 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
7181 (!ctxt->disableSAX))
7182 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
7183
7184 /*
7185 * Is there any internal subset declarations ?
7186 * they are handled separately in xmlParseInternalSubset()
7187 */
7188 if (RAW == '[')
7189 return;
7190
7191 /*
7192 * We should be at the end of the DOCTYPE declaration.
7193 */
7194 if (RAW != '>') {
7195 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
7196 }
7197 NEXT;
7198}
7199
7200/**
7201 * xmlParseInternalSubset:
7202 * @ctxt: an XML parser context
7203 *
7204 * parse the internal subset declaration
7205 *
7206 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
7207 */
7208
7209static void
7210xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
7211 /*
7212 * Is there any DTD definition ?
7213 */
7214 if (RAW == '[') {
7215 ctxt->instate = XML_PARSER_DTD;
7216 NEXT;
7217 /*
7218 * Parse the succession of Markup declarations and
7219 * PEReferences.
7220 * Subsequence (markupdecl | PEReference | S)*
7221 */
7222 while (RAW != ']') {
7223 const xmlChar *check = CUR_PTR;
7224 unsigned int cons = ctxt->input->consumed;
7225
7226 SKIP_BLANKS;
7227 xmlParseMarkupDecl(ctxt);
7228 xmlParsePEReference(ctxt);
7229
7230 /*
7231 * Pop-up of finished entities.
7232 */
7233 while ((RAW == 0) && (ctxt->inputNr > 1))
7234 xmlPopInput(ctxt);
7235
7236 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
7237 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7238 "xmlParseInternalSubset: error detected in Markup declaration\n");
7239 break;
7240 }
7241 }
7242 if (RAW == ']') {
7243 NEXT;
7244 SKIP_BLANKS;
7245 }
7246 }
7247
7248 /*
7249 * We should be at the end of the DOCTYPE declaration.
7250 */
7251 if (RAW != '>') {
7252 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
7253 }
7254 NEXT;
7255}
7256
7257#ifdef LIBXML_SAX1_ENABLED
7258/**
7259 * xmlParseAttribute:
7260 * @ctxt: an XML parser context
7261 * @value: a xmlChar ** used to store the value of the attribute
7262 *
7263 * parse an attribute
7264 *
7265 * [41] Attribute ::= Name Eq AttValue
7266 *
7267 * [ WFC: No External Entity References ]
7268 * Attribute values cannot contain direct or indirect entity references
7269 * to external entities.
7270 *
7271 * [ WFC: No < in Attribute Values ]
7272 * The replacement text of any entity referred to directly or indirectly in
7273 * an attribute value (other than "&lt;") must not contain a <.
7274 *
7275 * [ VC: Attribute Value Type ]
7276 * The attribute must have been declared; the value must be of the type
7277 * declared for it.
7278 *
7279 * [25] Eq ::= S? '=' S?
7280 *
7281 * With namespace:
7282 *
7283 * [NS 11] Attribute ::= QName Eq AttValue
7284 *
7285 * Also the case QName == xmlns:??? is handled independently as a namespace
7286 * definition.
7287 *
7288 * Returns the attribute name, and the value in *value.
7289 */
7290
7291const xmlChar *
7292xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
7293 const xmlChar *name;
7294 xmlChar *val;
7295
7296 *value = NULL;
7297 GROW;
7298 name = xmlParseName(ctxt);
7299 if (name == NULL) {
7300 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7301 "error parsing attribute name\n");
7302 return(NULL);
7303 }
7304
7305 /*
7306 * read the value
7307 */
7308 SKIP_BLANKS;
7309 if (RAW == '=') {
7310 NEXT;
7311 SKIP_BLANKS;
7312 val = xmlParseAttValue(ctxt);
7313 ctxt->instate = XML_PARSER_CONTENT;
7314 } else {
7315 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
7316 "Specification mandate value for attribute %s\n", name);
7317 return(NULL);
7318 }
7319
7320 /*
7321 * Check that xml:lang conforms to the specification
7322 * No more registered as an error, just generate a warning now
7323 * since this was deprecated in XML second edition
7324 */
7325 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
7326 if (!xmlCheckLanguageID(val)) {
7327 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
7328 "Malformed value for xml:lang : %s\n",
7329 val, NULL);
7330 }
7331 }
7332
7333 /*
7334 * Check that xml:space conforms to the specification
7335 */
7336 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
7337 if (xmlStrEqual(val, BAD_CAST "default"))
7338 *(ctxt->space) = 0;
7339 else if (xmlStrEqual(val, BAD_CAST "preserve"))
7340 *(ctxt->space) = 1;
7341 else {
7342 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
7343"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
7344 val, NULL);
7345 }
7346 }
7347
7348 *value = val;
7349 return(name);
7350}
7351
7352/**
7353 * xmlParseStartTag:
7354 * @ctxt: an XML parser context
7355 *
7356 * parse a start of tag either for rule element or
7357 * EmptyElement. In both case we don't parse the tag closing chars.
7358 *
7359 * [40] STag ::= '<' Name (S Attribute)* S? '>'
7360 *
7361 * [ WFC: Unique Att Spec ]
7362 * No attribute name may appear more than once in the same start-tag or
7363 * empty-element tag.
7364 *
7365 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
7366 *
7367 * [ WFC: Unique Att Spec ]
7368 * No attribute name may appear more than once in the same start-tag or
7369 * empty-element tag.
7370 *
7371 * With namespace:
7372 *
7373 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
7374 *
7375 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
7376 *
7377 * Returns the element name parsed
7378 */
7379
7380const xmlChar *
7381xmlParseStartTag(xmlParserCtxtPtr ctxt) {
7382 const xmlChar *name;
7383 const xmlChar *attname;
7384 xmlChar *attvalue;
7385 const xmlChar **atts = ctxt->atts;
7386 int nbatts = 0;
7387 int maxatts = ctxt->maxatts;
7388 int i;
7389
7390 if (RAW != '<') return(NULL);
7391 NEXT1;
7392
7393 name = xmlParseName(ctxt);
7394 if (name == NULL) {
7395 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7396 "xmlParseStartTag: invalid element name\n");
7397 return(NULL);
7398 }
7399
7400 /*
7401 * Now parse the attributes, it ends up with the ending
7402 *
7403 * (S Attribute)* S?
7404 */
7405 SKIP_BLANKS;
7406 GROW;
7407
7408 while ((RAW != '>') &&
7409 ((RAW != '/') || (NXT(1) != '>')) &&
7410 (IS_BYTE_CHAR(RAW))) {
7411 const xmlChar *q = CUR_PTR;
7412 unsigned int cons = ctxt->input->consumed;
7413
7414 attname = xmlParseAttribute(ctxt, &attvalue);
7415 if ((attname != NULL) && (attvalue != NULL)) {
7416 /*
7417 * [ WFC: Unique Att Spec ]
7418 * No attribute name may appear more than once in the same
7419 * start-tag or empty-element tag.
7420 */
7421 for (i = 0; i < nbatts;i += 2) {
7422 if (xmlStrEqual(atts[i], attname)) {
7423 xmlErrAttributeDup(ctxt, NULL, attname);
7424 xmlFree(attvalue);
7425 goto failed;
7426 }
7427 }
7428 /*
7429 * Add the pair to atts
7430 */
7431 if (atts == NULL) {
7432 maxatts = 22; /* allow for 10 attrs by default */
7433 atts = (const xmlChar **)
7434 xmlMalloc(maxatts * sizeof(xmlChar *));
7435 if (atts == NULL) {
7436 xmlErrMemory(ctxt, NULL);
7437 if (attvalue != NULL)
7438 xmlFree(attvalue);
7439 goto failed;
7440 }
7441 ctxt->atts = atts;
7442 ctxt->maxatts = maxatts;
7443 } else if (nbatts + 4 > maxatts) {
7444 const xmlChar **n;
7445
7446 maxatts *= 2;
7447 n = (const xmlChar **) xmlRealloc((void *) atts,
7448 maxatts * sizeof(const xmlChar *));
7449 if (n == NULL) {
7450 xmlErrMemory(ctxt, NULL);
7451 if (attvalue != NULL)
7452 xmlFree(attvalue);
7453 goto failed;
7454 }
7455 atts = n;
7456 ctxt->atts = atts;
7457 ctxt->maxatts = maxatts;
7458 }
7459 atts[nbatts++] = attname;
7460 atts[nbatts++] = attvalue;
7461 atts[nbatts] = NULL;
7462 atts[nbatts + 1] = NULL;
7463 } else {
7464 if (attvalue != NULL)
7465 xmlFree(attvalue);
7466 }
7467
7468failed:
7469
7470 GROW
7471 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
7472 break;
7473 if (!IS_BLANK_CH(RAW)) {
7474 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7475 "attributes construct error\n");
7476 }
7477 SKIP_BLANKS;
7478 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
7479 (attname == NULL) && (attvalue == NULL)) {
7480 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
7481 "xmlParseStartTag: problem parsing attributes\n");
7482 break;
7483 }
7484 SHRINK;
7485 GROW;
7486 }
7487
7488 /*
7489 * SAX: Start of Element !
7490 */
7491 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
7492 (!ctxt->disableSAX)) {
7493 if (nbatts > 0)
7494 ctxt->sax->startElement(ctxt->userData, name, atts);
7495 else
7496 ctxt->sax->startElement(ctxt->userData, name, NULL);
7497 }
7498
7499 if (atts != NULL) {
7500 /* Free only the content strings */
7501 for (i = 1;i < nbatts;i+=2)
7502 if (atts[i] != NULL)
7503 xmlFree((xmlChar *) atts[i]);
7504 }
7505 return(name);
7506}
7507
7508/**
7509 * xmlParseEndTag1:
7510 * @ctxt: an XML parser context
7511 * @line: line of the start tag
7512 * @nsNr: number of namespaces on the start tag
7513 *
7514 * parse an end of tag
7515 *
7516 * [42] ETag ::= '</' Name S? '>'
7517 *
7518 * With namespace
7519 *
7520 * [NS 9] ETag ::= '</' QName S? '>'
7521 */
7522
7523static void
7524xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
7525 const xmlChar *name;
7526
7527 GROW;
7528 if ((RAW != '<') || (NXT(1) != '/')) {
7529 xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
7530 "xmlParseEndTag: '</' not found\n");
7531 return;
7532 }
7533 SKIP(2);
7534
7535 name = xmlParseNameAndCompare(ctxt,ctxt->name);
7536
7537 /*
7538 * We should definitely be at the ending "S? '>'" part
7539 */
7540 GROW;
7541 SKIP_BLANKS;
7542 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
7543 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
7544 } else
7545 NEXT1;
7546
7547 /*
7548 * [ WFC: Element Type Match ]
7549 * The Name in an element's end-tag must match the element type in the
7550 * start-tag.
7551 *
7552 */
7553 if (name != (xmlChar*)1) {
7554 if (name == NULL) name = BAD_CAST "unparseable";
7555 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
7556 "Opening and ending tag mismatch: %s line %d and %s\n",
7557 ctxt->name, line, name);
7558 }
7559
7560 /*
7561 * SAX: End of Tag
7562 */
7563 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
7564 (!ctxt->disableSAX))
7565 ctxt->sax->endElement(ctxt->userData, ctxt->name);
7566
7567 namePop(ctxt);
7568 spacePop(ctxt);
7569 return;
7570}
7571
7572/**
7573 * xmlParseEndTag:
7574 * @ctxt: an XML parser context
7575 *
7576 * parse an end of tag
7577 *
7578 * [42] ETag ::= '</' Name S? '>'
7579 *
7580 * With namespace
7581 *
7582 * [NS 9] ETag ::= '</' QName S? '>'
7583 */
7584
7585void
7586xmlParseEndTag(xmlParserCtxtPtr ctxt) {
7587 xmlParseEndTag1(ctxt, 0);
7588}
7589#endif /* LIBXML_SAX1_ENABLED */
7590
7591/************************************************************************
7592 * *
7593 * SAX 2 specific operations *
7594 * *
7595 ************************************************************************/
7596
7597static const xmlChar *
7598xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
7599 int len = 0, l;
7600 int c;
7601 int count = 0;
7602
7603 /*
7604 * Handler for more complex cases
7605 */
7606 GROW;
7607 c = CUR_CHAR(l);
7608 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
7609 (!IS_LETTER(c) && (c != '_'))) {
7610 return(NULL);
7611 }
7612
7613 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
7614 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
7615 (c == '.') || (c == '-') || (c == '_') ||
7616 (IS_COMBINING(c)) ||
7617 (IS_EXTENDER(c)))) {
7618 if (count++ > 100) {
7619 count = 0;
7620 GROW;
7621 }
7622 len += l;
7623 NEXTL(l);
7624 c = CUR_CHAR(l);
7625 }
7626 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
7627}
7628
7629/*
7630 * xmlGetNamespace:
7631 * @ctxt: an XML parser context
7632 * @prefix: the prefix to lookup
7633 *
7634 * Lookup the namespace name for the @prefix (which ca be NULL)
7635 * The prefix must come from the @ctxt->dict dictionnary
7636 *
7637 * Returns the namespace name or NULL if not bound
7638 */
7639static const xmlChar *
7640xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
7641 int i;
7642
7643 if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
7644 for (i = ctxt->nsNr - 2;i >= 0;i-=2)
7645 if (ctxt->nsTab[i] == prefix) {
7646 if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
7647 return(NULL);
7648 return(ctxt->nsTab[i + 1]);
7649 }
7650 return(NULL);
7651}
7652
7653/**
7654 * xmlParseNCName:
7655 * @ctxt: an XML parser context
7656 * @len: lenght of the string parsed
7657 *
7658 * parse an XML name.
7659 *
7660 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
7661 * CombiningChar | Extender
7662 *
7663 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
7664 *
7665 * Returns the Name parsed or NULL
7666 */
7667
7668static const xmlChar *
7669xmlParseNCName(xmlParserCtxtPtr ctxt) {
7670 const xmlChar *in;
7671 const xmlChar *ret;
7672 int count = 0;
7673
7674 /*
7675 * Accelerator for simple ASCII names
7676 */
7677 in = ctxt->input->cur;
7678 if (((*in >= 0x61) && (*in <= 0x7A)) ||
7679 ((*in >= 0x41) && (*in <= 0x5A)) ||
7680 (*in == '_')) {
7681 in++;
7682 while (((*in >= 0x61) && (*in <= 0x7A)) ||
7683 ((*in >= 0x41) && (*in <= 0x5A)) ||
7684 ((*in >= 0x30) && (*in <= 0x39)) ||
7685 (*in == '_') || (*in == '-') ||
7686 (*in == '.'))
7687 in++;
7688 if ((*in > 0) && (*in < 0x80)) {
7689 count = in - ctxt->input->cur;
7690 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
7691 ctxt->input->cur = in;
7692 ctxt->nbChars += count;
7693 ctxt->input->col += count;
7694 if (ret == NULL) {
7695 xmlErrMemory(ctxt, NULL);
7696 }
7697 return(ret);
7698 }
7699 }
7700 return(xmlParseNCNameComplex(ctxt));
7701}
7702
7703/**
7704 * xmlParseQName:
7705 * @ctxt: an XML parser context
7706 * @prefix: pointer to store the prefix part
7707 *
7708 * parse an XML Namespace QName
7709 *
7710 * [6] QName ::= (Prefix ':')? LocalPart
7711 * [7] Prefix ::= NCName
7712 * [8] LocalPart ::= NCName
7713 *
7714 * Returns the Name parsed or NULL
7715 */
7716
7717static const xmlChar *
7718xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
7719 const xmlChar *l, *p;
7720
7721 GROW;
7722
7723 l = xmlParseNCName(ctxt);
7724 if (l == NULL) {
7725 if (CUR == ':') {
7726 l = xmlParseName(ctxt);
7727 if (l != NULL) {
7728 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
7729 "Failed to parse QName '%s'\n", l, NULL, NULL);
7730 *prefix = NULL;
7731 return(l);
7732 }
7733 }
7734 return(NULL);
7735 }
7736 if (CUR == ':') {
7737 NEXT;
7738 p = l;
7739 l = xmlParseNCName(ctxt);
7740 if (l == NULL) {
7741 xmlChar *tmp;
7742
7743 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
7744 "Failed to parse QName '%s:'\n", p, NULL, NULL);
7745 tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
7746 p = xmlDictLookup(ctxt->dict, tmp, -1);
7747 if (tmp != NULL) xmlFree(tmp);
7748 *prefix = NULL;
7749 return(p);
7750 }
7751 if (CUR == ':') {
7752 xmlChar *tmp;
7753
7754 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
7755 "Failed to parse QName '%s:%s:'\n", p, l, NULL);
7756 NEXT;
7757 tmp = (xmlChar *) xmlParseName(ctxt);
7758 if (tmp != NULL) {
7759 tmp = xmlBuildQName(tmp, l, NULL, 0);
7760 l = xmlDictLookup(ctxt->dict, tmp, -1);
7761 if (tmp != NULL) xmlFree(tmp);
7762 *prefix = p;
7763 return(l);
7764 }
7765 tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
7766 l = xmlDictLookup(ctxt->dict, tmp, -1);
7767 if (tmp != NULL) xmlFree(tmp);
7768 *prefix = p;
7769 return(l);
7770 }
7771 *prefix = p;
7772 } else
7773 *prefix = NULL;
7774 return(l);
7775}
7776
7777/**
7778 * xmlParseQNameAndCompare:
7779 * @ctxt: an XML parser context
7780 * @name: the localname
7781 * @prefix: the prefix, if any.
7782 *
7783 * parse an XML name and compares for match
7784 * (specialized for endtag parsing)
7785 *
7786 * Returns NULL for an illegal name, (xmlChar*) 1 for success
7787 * and the name for mismatch
7788 */
7789
7790static const xmlChar *
7791xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
7792 xmlChar const *prefix) {
7793 const xmlChar *cmp = name;
7794 const xmlChar *in;
7795 const xmlChar *ret;
7796 const xmlChar *prefix2;
7797
7798 if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
7799
7800 GROW;
7801 in = ctxt->input->cur;
7802
7803 cmp = prefix;
7804 while (*in != 0 && *in == *cmp) {
7805 ++in;
7806 ++cmp;
7807 }
7808 if ((*cmp == 0) && (*in == ':')) {
7809 in++;
7810 cmp = name;
7811 while (*in != 0 && *in == *cmp) {
7812 ++in;
7813 ++cmp;
7814 }
7815 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
7816 /* success */
7817 ctxt->input->cur = in;
7818 return((const xmlChar*) 1);
7819 }
7820 }
7821 /*
7822 * all strings coms from the dictionary, equality can be done directly
7823 */
7824 ret = xmlParseQName (ctxt, &prefix2);
7825 if ((ret == name) && (prefix == prefix2))
7826 return((const xmlChar*) 1);
7827 return ret;
7828}
7829
7830/**
7831 * xmlParseAttValueInternal:
7832 * @ctxt: an XML parser context
7833 * @len: attribute len result
7834 * @alloc: whether the attribute was reallocated as a new string
7835 * @normalize: if 1 then further non-CDATA normalization must be done
7836 *
7837 * parse a value for an attribute.
7838 * NOTE: if no normalization is needed, the routine will return pointers
7839 * directly from the data buffer.
7840 *
7841 * 3.3.3 Attribute-Value Normalization:
7842 * Before the value of an attribute is passed to the application or
7843 * checked for validity, the XML processor must normalize it as follows:
7844 * - a character reference is processed by appending the referenced
7845 * character to the attribute value
7846 * - an entity reference is processed by recursively processing the
7847 * replacement text of the entity
7848 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
7849 * appending #x20 to the normalized value, except that only a single
7850 * #x20 is appended for a "#xD#xA" sequence that is part of an external
7851 * parsed entity or the literal entity value of an internal parsed entity
7852 * - other characters are processed by appending them to the normalized value
7853 * If the declared value is not CDATA, then the XML processor must further
7854 * process the normalized attribute value by discarding any leading and
7855 * trailing space (#x20) characters, and by replacing sequences of space
7856 * (#x20) characters by a single space (#x20) character.
7857 * All attributes for which no declaration has been read should be treated
7858 * by a non-validating parser as if declared CDATA.
7859 *
7860 * Returns the AttValue parsed or NULL. The value has to be freed by the
7861 * caller if it was copied, this can be detected by val[*len] == 0.
7862 */
7863
7864static xmlChar *
7865xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
7866 int normalize)
7867{
7868 xmlChar limit = 0;
7869 const xmlChar *in = NULL, *start, *end, *last;
7870 xmlChar *ret = NULL;
7871
7872 GROW;
7873 in = (xmlChar *) CUR_PTR;
7874 if (*in != '"' && *in != '\'') {
7875 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
7876 return (NULL);
7877 }
7878 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
7879
7880 /*
7881 * try to handle in this routine the most common case where no
7882 * allocation of a new string is required and where content is
7883 * pure ASCII.
7884 */
7885 limit = *in++;
7886 end = ctxt->input->end;
7887 start = in;
7888 if (in >= end) {
7889 const xmlChar *oldbase = ctxt->input->base;
7890 GROW;
7891 if (oldbase != ctxt->input->base) {
7892 long delta = ctxt->input->base - oldbase;
7893 start = start + delta;
7894 in = in + delta;
7895 }
7896 end = ctxt->input->end;
7897 }
7898 if (normalize) {
7899 /*
7900 * Skip any leading spaces
7901 */
7902 while ((in < end) && (*in != limit) &&
7903 ((*in == 0x20) || (*in == 0x9) ||
7904 (*in == 0xA) || (*in == 0xD))) {
7905 in++;
7906 start = in;
7907 if (in >= end) {
7908 const xmlChar *oldbase = ctxt->input->base;
7909 GROW;
7910 if (oldbase != ctxt->input->base) {
7911 long delta = ctxt->input->base - oldbase;
7912 start = start + delta;
7913 in = in + delta;
7914 }
7915 end = ctxt->input->end;
7916 }
7917 }
7918 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
7919 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
7920 if ((*in++ == 0x20) && (*in == 0x20)) break;
7921 if (in >= end) {
7922 const xmlChar *oldbase = ctxt->input->base;
7923 GROW;
7924 if (oldbase != ctxt->input->base) {
7925 long delta = ctxt->input->base - oldbase;
7926 start = start + delta;
7927 in = in + delta;
7928 }
7929 end = ctxt->input->end;
7930 }
7931 }
7932 last = in;
7933 /*
7934 * skip the trailing blanks
7935 */
7936 while ((last[-1] == 0x20) && (last > start)) last--;
7937 while ((in < end) && (*in != limit) &&
7938 ((*in == 0x20) || (*in == 0x9) ||
7939 (*in == 0xA) || (*in == 0xD))) {
7940 in++;
7941 if (in >= end) {
7942 const xmlChar *oldbase = ctxt->input->base;
7943 GROW;
7944 if (oldbase != ctxt->input->base) {
7945 long delta = ctxt->input->base - oldbase;
7946 start = start + delta;
7947 in = in + delta;
7948 last = last + delta;
7949 }
7950 end = ctxt->input->end;
7951 }
7952 }
7953 if (*in != limit) goto need_complex;
7954 } else {
7955 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
7956 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
7957 in++;
7958 if (in >= end) {
7959 const xmlChar *oldbase = ctxt->input->base;
7960 GROW;
7961 if (oldbase != ctxt->input->base) {
7962 long delta = ctxt->input->base - oldbase;
7963 start = start + delta;
7964 in = in + delta;
7965 }
7966 end = ctxt->input->end;
7967 }
7968 }
7969 last = in;
7970 if (*in != limit) goto need_complex;
7971 }
7972 in++;
7973 if (len != NULL) {
7974 *len = last - start;
7975 ret = (xmlChar *) start;
7976 } else {
7977 if (alloc) *alloc = 1;
7978 ret = xmlStrndup(start, last - start);
7979 }
7980 CUR_PTR = in;
7981 if (alloc) *alloc = 0;
7982 return ret;
7983need_complex:
7984 if (alloc) *alloc = 1;
7985 return xmlParseAttValueComplex(ctxt, len, normalize);
7986}
7987
7988/**
7989 * xmlParseAttribute2:
7990 * @ctxt: an XML parser context
7991 * @pref: the element prefix
7992 * @elem: the element name
7993 * @prefix: a xmlChar ** used to store the value of the attribute prefix
7994 * @value: a xmlChar ** used to store the value of the attribute
7995 * @len: an int * to save the length of the attribute
7996 * @alloc: an int * to indicate if the attribute was allocated
7997 *
7998 * parse an attribute in the new SAX2 framework.
7999 *
8000 * Returns the attribute name, and the value in *value, .
8001 */
8002
8003static const xmlChar *
8004xmlParseAttribute2(xmlParserCtxtPtr ctxt,
8005 const xmlChar *pref, const xmlChar *elem,
8006 const xmlChar **prefix, xmlChar **value,
8007 int *len, int *alloc) {
8008 const xmlChar *name;
8009 xmlChar *val, *internal_val = NULL;
8010 int normalize = 0;
8011
8012 *value = NULL;
8013 GROW;
8014 name = xmlParseQName(ctxt, prefix);
8015 if (name == NULL) {
8016 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8017 "error parsing attribute name\n");
8018 return(NULL);
8019 }
8020
8021 /*
8022 * get the type if needed
8023 */
8024 if (ctxt->attsSpecial != NULL) {
8025 int type;
8026
8027 type = (int) (long) xmlHashQLookup2(ctxt->attsSpecial,
8028 pref, elem, *prefix, name);
8029 if (type != 0) normalize = 1;
8030 }
8031
8032 /*
8033 * read the value
8034 */
8035 SKIP_BLANKS;
8036 if (RAW == '=') {
8037 NEXT;
8038 SKIP_BLANKS;
8039 val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
8040 ctxt->instate = XML_PARSER_CONTENT;
8041 } else {
8042 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8043 "Specification mandate value for attribute %s\n", name);
8044 return(NULL);
8045 }
8046
8047 if (*prefix == ctxt->str_xml) {
8048 /*
8049 * Check that xml:lang conforms to the specification
8050 * No more registered as an error, just generate a warning now
8051 * since this was deprecated in XML second edition
8052 */
8053 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
8054 internal_val = xmlStrndup(val, *len);
8055 if (!xmlCheckLanguageID(internal_val)) {
8056 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8057 "Malformed value for xml:lang : %s\n",
8058 internal_val, NULL);
8059 }
8060 }
8061
8062 /*
8063 * Check that xml:space conforms to the specification
8064 */
8065 if (xmlStrEqual(name, BAD_CAST "space")) {
8066 internal_val = xmlStrndup(val, *len);
8067 if (xmlStrEqual(internal_val, BAD_CAST "default"))
8068 *(ctxt->space) = 0;
8069 else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
8070 *(ctxt->space) = 1;
8071 else {
8072 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8073"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8074 internal_val, NULL);
8075 }
8076 }
8077 if (internal_val) {
8078 xmlFree(internal_val);
8079 }
8080 }
8081
8082 *value = val;
8083 return(name);
8084}
8085
8086/**
8087 * xmlParseStartTag2:
8088 * @ctxt: an XML parser context
8089 *
8090 * parse a start of tag either for rule element or
8091 * EmptyElement. In both case we don't parse the tag closing chars.
8092 * This routine is called when running SAX2 parsing
8093 *
8094 * [40] STag ::= '<' Name (S Attribute)* S? '>'
8095 *
8096 * [ WFC: Unique Att Spec ]
8097 * No attribute name may appear more than once in the same start-tag or
8098 * empty-element tag.
8099 *
8100 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8101 *
8102 * [ WFC: Unique Att Spec ]
8103 * No attribute name may appear more than once in the same start-tag or
8104 * empty-element tag.
8105 *
8106 * With namespace:
8107 *
8108 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8109 *
8110 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8111 *
8112 * Returns the element name parsed
8113 */
8114
8115static const xmlChar *
8116xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
8117 const xmlChar **URI, int *tlen) {
8118 const xmlChar *localname;
8119 const xmlChar *prefix;
8120 const xmlChar *attname;
8121 const xmlChar *aprefix;
8122 const xmlChar *nsname;
8123 xmlChar *attvalue;
8124 const xmlChar **atts = ctxt->atts;
8125 int maxatts = ctxt->maxatts;
8126 int nratts, nbatts, nbdef;
8127 int i, j, nbNs, attval, oldline, oldcol;
8128 const xmlChar *base;
8129 unsigned long cur;
8130 int nsNr = ctxt->nsNr;
8131
8132 if (RAW != '<') return(NULL);
8133 NEXT1;
8134
8135 /*
8136 * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
8137 * point since the attribute values may be stored as pointers to
8138 * the buffer and calling SHRINK would destroy them !
8139 * The Shrinking is only possible once the full set of attribute
8140 * callbacks have been done.
8141 */
8142reparse:
8143 SHRINK;
8144 base = ctxt->input->base;
8145 cur = ctxt->input->cur - ctxt->input->base;
8146 oldline = ctxt->input->line;
8147 oldcol = ctxt->input->col;
8148 nbatts = 0;
8149 nratts = 0;
8150 nbdef = 0;
8151 nbNs = 0;
8152 attval = 0;
8153 /* Forget any namespaces added during an earlier parse of this element. */
8154 ctxt->nsNr = nsNr;
8155
8156 localname = xmlParseQName(ctxt, &prefix);
8157 if (localname == NULL) {
8158 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8159 "StartTag: invalid element name\n");
8160 return(NULL);
8161 }
8162 *tlen = ctxt->input->cur - ctxt->input->base - cur;
8163
8164 /*
8165 * Now parse the attributes, it ends up with the ending
8166 *
8167 * (S Attribute)* S?
8168 */
8169 SKIP_BLANKS;
8170 GROW;
8171 if (ctxt->input->base != base) goto base_changed;
8172
8173 while ((RAW != '>') &&
8174 ((RAW != '/') || (NXT(1) != '>')) &&
8175 (IS_BYTE_CHAR(RAW))) {
8176 const xmlChar *q = CUR_PTR;
8177 unsigned int cons = ctxt->input->consumed;
8178 int len = -1, alloc = 0;
8179
8180 attname = xmlParseAttribute2(ctxt, prefix, localname,
8181 &aprefix, &attvalue, &len, &alloc);
8182 if (ctxt->input->base != base) {
8183 if ((attvalue != NULL) && (alloc != 0))
8184 xmlFree(attvalue);
8185 attvalue = NULL;
8186 goto base_changed;
8187 }
8188 if ((attname != NULL) && (attvalue != NULL)) {
8189 if (len < 0) len = xmlStrlen(attvalue);
8190 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
8191 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
8192 xmlURIPtr uri;
8193
8194 if (*URL != 0) {
8195 uri = xmlParseURI((const char *) URL);
8196 if (uri == NULL) {
8197 xmlWarningMsg(ctxt, XML_WAR_NS_URI,
8198 "xmlns: %s not a valid URI\n",
8199 URL, NULL);
8200 } else {
8201 if (uri->scheme == NULL) {
8202 xmlWarningMsg(ctxt, XML_WAR_NS_URI_RELATIVE,
8203 "xmlns: URI %s is not absolute\n",
8204 URL, NULL);
8205 }
8206 xmlFreeURI(uri);
8207 }
8208 }
8209 /*
8210 * check that it's not a defined namespace
8211 */
8212 for (j = 1;j <= nbNs;j++)
8213 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
8214 break;
8215 if (j <= nbNs)
8216 xmlErrAttributeDup(ctxt, NULL, attname);
8217 else
8218 if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
8219 if (alloc != 0) xmlFree(attvalue);
8220 SKIP_BLANKS;
8221 continue;
8222 }
8223 if (aprefix == ctxt->str_xmlns) {
8224 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
8225 xmlURIPtr uri;
8226
8227 if (attname == ctxt->str_xml) {
8228 if (URL != ctxt->str_xml_ns) {
8229 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8230 "xml namespace prefix mapped to wrong URI\n",
8231 NULL, NULL, NULL);
8232 }
8233 /*
8234 * Do not keep a namespace definition node
8235 */
8236 if (alloc != 0) xmlFree(attvalue);
8237 SKIP_BLANKS;
8238 continue;
8239 }
8240 uri = xmlParseURI((const char *) URL);
8241 if (uri == NULL) {
8242 xmlWarningMsg(ctxt, XML_WAR_NS_URI,
8243 "xmlns:%s: '%s' is not a valid URI\n",
8244 attname, URL);
8245 } else {
8246 if ((ctxt->pedantic) && (uri->scheme == NULL)) {
8247 xmlWarningMsg(ctxt, XML_WAR_NS_URI_RELATIVE,
8248 "xmlns:%s: URI %s is not absolute\n",
8249 attname, URL);
8250 }
8251 xmlFreeURI(uri);
8252 }
8253
8254 /*
8255 * check that it's not a defined namespace
8256 */
8257 for (j = 1;j <= nbNs;j++)
8258 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
8259 break;
8260 if (j <= nbNs)
8261 xmlErrAttributeDup(ctxt, aprefix, attname);
8262 else
8263 if (nsPush(ctxt, attname, URL) > 0) nbNs++;
8264 if (alloc != 0) xmlFree(attvalue);
8265 SKIP_BLANKS;
8266 if (ctxt->input->base != base) goto base_changed;
8267 continue;
8268 }
8269
8270 /*
8271 * Add the pair to atts
8272 */
8273 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
8274 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
8275 if (attvalue[len] == 0)
8276 xmlFree(attvalue);
8277 goto failed;
8278 }
8279 maxatts = ctxt->maxatts;
8280 atts = ctxt->atts;
8281 }
8282 ctxt->attallocs[nratts++] = alloc;
8283 atts[nbatts++] = attname;
8284 atts[nbatts++] = aprefix;
8285 atts[nbatts++] = NULL; /* the URI will be fetched later */
8286 atts[nbatts++] = attvalue;
8287 attvalue += len;
8288 atts[nbatts++] = attvalue;
8289 /*
8290 * tag if some deallocation is needed
8291 */
8292 if (alloc != 0) attval = 1;
8293 } else {
8294 if ((attvalue != NULL) && (attvalue[len] == 0))
8295 xmlFree(attvalue);
8296 }
8297
8298failed:
8299
8300 GROW
8301 if (ctxt->input->base != base) goto base_changed;
8302 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8303 break;
8304 if (!IS_BLANK_CH(RAW)) {
8305 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8306 "attributes construct error\n");
8307 break;
8308 }
8309 SKIP_BLANKS;
8310 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
8311 (attname == NULL) && (attvalue == NULL)) {
8312 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8313 "xmlParseStartTag: problem parsing attributes\n");
8314 break;
8315 }
8316 GROW;
8317 if (ctxt->input->base != base) goto base_changed;
8318 }
8319
8320 /*
8321 * The attributes defaulting
8322 */
8323 if (ctxt->attsDefault != NULL) {
8324 xmlDefAttrsPtr defaults;
8325
8326 defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
8327 if (defaults != NULL) {
8328 for (i = 0;i < defaults->nbAttrs;i++) {
8329 attname = defaults->values[4 * i];
8330 aprefix = defaults->values[4 * i + 1];
8331
8332 /*
8333 * special work for namespaces defaulted defs
8334 */
8335 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
8336 /*
8337 * check that it's not a defined namespace
8338 */
8339 for (j = 1;j <= nbNs;j++)
8340 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
8341 break;
8342 if (j <= nbNs) continue;
8343
8344 nsname = xmlGetNamespace(ctxt, NULL);
8345 if (nsname != defaults->values[4 * i + 2]) {
8346 if (nsPush(ctxt, NULL,
8347 defaults->values[4 * i + 2]) > 0)
8348 nbNs++;
8349 }
8350 } else if (aprefix == ctxt->str_xmlns) {
8351 /*
8352 * check that it's not a defined namespace
8353 */
8354 for (j = 1;j <= nbNs;j++)
8355 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
8356 break;
8357 if (j <= nbNs) continue;
8358
8359 nsname = xmlGetNamespace(ctxt, attname);
8360 if (nsname != defaults->values[2]) {
8361 if (nsPush(ctxt, attname,
8362 defaults->values[4 * i + 2]) > 0)
8363 nbNs++;
8364 }
8365 } else {
8366 /*
8367 * check that it's not a defined attribute
8368 */
8369 for (j = 0;j < nbatts;j+=5) {
8370 if ((attname == atts[j]) && (aprefix == atts[j+1]))
8371 break;
8372 }
8373 if (j < nbatts) continue;
8374
8375 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
8376 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
8377 return(NULL);
8378 }
8379 maxatts = ctxt->maxatts;
8380 atts = ctxt->atts;
8381 }
8382 atts[nbatts++] = attname;
8383 atts[nbatts++] = aprefix;
8384 if (aprefix == NULL)
8385 atts[nbatts++] = NULL;
8386 else
8387 atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
8388 atts[nbatts++] = defaults->values[4 * i + 2];
8389 atts[nbatts++] = defaults->values[4 * i + 3];
8390 nbdef++;
8391 }
8392 }
8393 }
8394 }
8395
8396 /*
8397 * The attributes checkings
8398 */
8399 for (i = 0; i < nbatts;i += 5) {
8400 /*
8401 * The default namespace does not apply to attribute names.
8402 */
8403 if (atts[i + 1] != NULL) {
8404 nsname = xmlGetNamespace(ctxt, atts[i + 1]);
8405 if (nsname == NULL) {
8406 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
8407 "Namespace prefix %s for %s on %s is not defined\n",
8408 atts[i + 1], atts[i], localname);
8409 }
8410 atts[i + 2] = nsname;
8411 } else
8412 nsname = NULL;
8413 /*
8414 * [ WFC: Unique Att Spec ]
8415 * No attribute name may appear more than once in the same
8416 * start-tag or empty-element tag.
8417 * As extended by the Namespace in XML REC.
8418 */
8419 for (j = 0; j < i;j += 5) {
8420 if (atts[i] == atts[j]) {
8421 if (atts[i+1] == atts[j+1]) {
8422 xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
8423 break;
8424 }
8425 if ((nsname != NULL) && (atts[j + 2] == nsname)) {
8426 xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
8427 "Namespaced Attribute %s in '%s' redefined\n",
8428 atts[i], nsname, NULL);
8429 break;
8430 }
8431 }
8432 }
8433 }
8434
8435 nsname = xmlGetNamespace(ctxt, prefix);
8436 if ((prefix != NULL) && (nsname == NULL)) {
8437 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
8438 "Namespace prefix %s on %s is not defined\n",
8439 prefix, localname, NULL);
8440 }
8441 *pref = prefix;
8442 *URI = nsname;
8443
8444 /*
8445 * SAX: Start of Element !
8446 */
8447 if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
8448 (!ctxt->disableSAX)) {
8449 if (nbNs > 0)
8450 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
8451 nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
8452 nbatts / 5, nbdef, atts);
8453 else
8454 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
8455 nsname, 0, NULL, nbatts / 5, nbdef, atts);
8456 }
8457
8458 /*
8459 * Free up attribute allocated strings if needed
8460 */
8461 if (attval != 0) {
8462 for (i = 3,j = 0; j < nratts;i += 5,j++)
8463 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
8464 xmlFree((xmlChar *) atts[i]);
8465 }
8466
8467 return(localname);
8468
8469base_changed:
8470 /*
8471 * the attribute strings are valid iif the base didn't changed
8472 */
8473 if (attval != 0) {
8474 for (i = 3,j = 0; j < nratts;i += 5,j++)
8475 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
8476 xmlFree((xmlChar *) atts[i]);
8477 }
8478 ctxt->input->cur = ctxt->input->base + cur;
8479 ctxt->input->line = oldline;
8480 ctxt->input->col = oldcol;
8481 if (ctxt->wellFormed == 1) {
8482 goto reparse;
8483 }
8484 return(NULL);
8485}
8486
8487/**
8488 * xmlParseEndTag2:
8489 * @ctxt: an XML parser context
8490 * @line: line of the start tag
8491 * @nsNr: number of namespaces on the start tag
8492 *
8493 * parse an end of tag
8494 *
8495 * [42] ETag ::= '</' Name S? '>'
8496 *
8497 * With namespace
8498 *
8499 * [NS 9] ETag ::= '</' QName S? '>'
8500 */
8501
8502static void
8503xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
8504 const xmlChar *URI, int line, int nsNr, int tlen) {
8505 const xmlChar *name;
8506
8507 GROW;
8508 if ((RAW != '<') || (NXT(1) != '/')) {
8509 xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
8510 return;
8511 }
8512 SKIP(2);
8513
8514 if ((tlen > 0) && (xmlStrncmp(ctxt->input->cur, ctxt->name, tlen) == 0)) {
8515 if (ctxt->input->cur[tlen] == '>') {
8516 ctxt->input->cur += tlen + 1;
8517 goto done;
8518 }
8519 ctxt->input->cur += tlen;
8520 name = (xmlChar*)1;
8521 } else {
8522 if (prefix == NULL)
8523 name = xmlParseNameAndCompare(ctxt, ctxt->name);
8524 else
8525 name = xmlParseQNameAndCompare(ctxt, ctxt->name, prefix);
8526 }
8527
8528 /*
8529 * We should definitely be at the ending "S? '>'" part
8530 */
8531 GROW;
8532 SKIP_BLANKS;
8533 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
8534 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
8535 } else
8536 NEXT1;
8537
8538 /*
8539 * [ WFC: Element Type Match ]
8540 * The Name in an element's end-tag must match the element type in the
8541 * start-tag.
8542 *
8543 */
8544 if (name != (xmlChar*)1) {
8545 if (name == NULL) name = BAD_CAST "unparseable";
8546 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
8547 "Opening and ending tag mismatch: %s line %d and %s\n",
8548 ctxt->name, line, name);
8549 }
8550
8551 /*
8552 * SAX: End of Tag
8553 */
8554done:
8555 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
8556 (!ctxt->disableSAX))
8557 ctxt->sax->endElementNs(ctxt->userData, ctxt->name, prefix, URI);
8558
8559 spacePop(ctxt);
8560 if (nsNr != 0)
8561 nsPop(ctxt, nsNr);
8562 return;
8563}
8564
8565/**
8566 * xmlParseCDSect:
8567 * @ctxt: an XML parser context
8568 *
8569 * Parse escaped pure raw content.
8570 *
8571 * [18] CDSect ::= CDStart CData CDEnd
8572 *
8573 * [19] CDStart ::= '<![CDATA['
8574 *
8575 * [20] Data ::= (Char* - (Char* ']]>' Char*))
8576 *
8577 * [21] CDEnd ::= ']]>'
8578 */
8579void
8580xmlParseCDSect(xmlParserCtxtPtr ctxt) {
8581 xmlChar *buf = NULL;
8582 int len = 0;
8583 int size = XML_PARSER_BUFFER_SIZE;
8584 int r, rl;
8585 int s, sl;
8586 int cur, l;
8587 int count = 0;
8588
8589 /* Check 2.6.0 was NXT(0) not RAW */
8590 if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
8591 SKIP(9);
8592 } else
8593 return;
8594
8595 ctxt->instate = XML_PARSER_CDATA_SECTION;
8596 r = CUR_CHAR(rl);
8597 if (!IS_CHAR(r)) {
8598 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
8599 ctxt->instate = XML_PARSER_CONTENT;
8600 return;
8601 }
8602 NEXTL(rl);
8603 s = CUR_CHAR(sl);
8604 if (!IS_CHAR(s)) {
8605 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
8606 ctxt->instate = XML_PARSER_CONTENT;
8607 return;
8608 }
8609 NEXTL(sl);
8610 cur = CUR_CHAR(l);
8611 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
8612 if (buf == NULL) {
8613 xmlErrMemory(ctxt, NULL);
8614 return;
8615 }
8616 while (IS_CHAR(cur) &&
8617 ((r != ']') || (s != ']') || (cur != '>'))) {
8618 if (len + 5 >= size) {
8619 xmlChar *tmp;
8620
8621 size *= 2;
8622 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
8623 if (tmp == NULL) {
8624 xmlFree(buf);
8625 xmlErrMemory(ctxt, NULL);
8626 return;
8627 }
8628 buf = tmp;
8629 }
8630 COPY_BUF(rl,buf,len,r);
8631 r = s;
8632 rl = sl;
8633 s = cur;
8634 sl = l;
8635 count++;
8636 if (count > 50) {
8637 GROW;
8638 count = 0;
8639 }
8640 NEXTL(l);
8641 cur = CUR_CHAR(l);
8642 }
8643 buf[len] = 0;
8644 ctxt->instate = XML_PARSER_CONTENT;
8645 if (cur != '>') {
8646 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
8647 "CData section not finished\n%.50s\n", buf);
8648 xmlFree(buf);
8649 return;
8650 }
8651 NEXTL(l);
8652
8653 /*
8654 * OK the buffer is to be consumed as cdata.
8655 */
8656 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
8657 if (ctxt->sax->cdataBlock != NULL)
8658 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
8659 else if (ctxt->sax->characters != NULL)
8660 ctxt->sax->characters(ctxt->userData, buf, len);
8661 }
8662 xmlFree(buf);
8663}
8664
8665/**
8666 * xmlParseContent:
8667 * @ctxt: an XML parser context
8668 *
8669 * Parse a content:
8670 *
8671 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
8672 */
8673
8674void
8675xmlParseContent(xmlParserCtxtPtr ctxt) {
8676 GROW;
8677 while ((RAW != 0) &&
8678 ((RAW != '<') || (NXT(1) != '/')) &&
8679 (ctxt->instate != XML_PARSER_EOF)) {
8680 const xmlChar *test = CUR_PTR;
8681 unsigned int cons = ctxt->input->consumed;
8682 const xmlChar *cur = ctxt->input->cur;
8683
8684 /*
8685 * First case : a Processing Instruction.
8686 */
8687 if ((*cur == '<') && (cur[1] == '?')) {
8688 xmlParsePI(ctxt);
8689 }
8690
8691 /*
8692 * Second case : a CDSection
8693 */
8694 /* 2.6.0 test was *cur not RAW */
8695 else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
8696 xmlParseCDSect(ctxt);
8697 }
8698
8699 /*
8700 * Third case : a comment
8701 */
8702 else if ((*cur == '<') && (NXT(1) == '!') &&
8703 (NXT(2) == '-') && (NXT(3) == '-')) {
8704 xmlParseComment(ctxt);
8705 ctxt->instate = XML_PARSER_CONTENT;
8706 }
8707
8708 /*
8709 * Fourth case : a sub-element.
8710 */
8711 else if (*cur == '<') {
8712 xmlParseElement(ctxt);
8713 }
8714
8715 /*
8716 * Fifth case : a reference. If if has not been resolved,
8717 * parsing returns it's Name, create the node
8718 */
8719
8720 else if (*cur == '&') {
8721 xmlParseReference(ctxt);
8722 }
8723
8724 /*
8725 * Last case, text. Note that References are handled directly.
8726 */
8727 else {
8728 xmlParseCharData(ctxt, 0);
8729 }
8730
8731 GROW;
8732 /*
8733 * Pop-up of finished entities.
8734 */
8735 while ((RAW == 0) && (ctxt->inputNr > 1))
8736 xmlPopInput(ctxt);
8737 SHRINK;
8738
8739 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
8740 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8741 "detected an error in element content\n");
8742 ctxt->instate = XML_PARSER_EOF;
8743 break;
8744 }
8745 }
8746}
8747
8748/**
8749 * xmlParseElement:
8750 * @ctxt: an XML parser context
8751 *
8752 * parse an XML element, this is highly recursive
8753 *
8754 * [39] element ::= EmptyElemTag | STag content ETag
8755 *
8756 * [ WFC: Element Type Match ]
8757 * The Name in an element's end-tag must match the element type in the
8758 * start-tag.
8759 *
8760 */
8761
8762void
8763xmlParseElement(xmlParserCtxtPtr ctxt) {
8764 const xmlChar *name;
8765 const xmlChar *prefix;
8766 const xmlChar *URI;
8767 xmlParserNodeInfo node_info;
8768 int line, tlen;
8769 xmlNodePtr ret;
8770 int nsNr = ctxt->nsNr;
8771
8772 if ((unsigned int) ctxt->nameNr > xmlParserMaxDepth) {
8773 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
8774 "Excessive depth in document: change xmlParserMaxDepth = %d\n",
8775 xmlParserMaxDepth);
8776 ctxt->instate = XML_PARSER_EOF;
8777 return;
8778 }
8779
8780 /* Capture start position */
8781 if (ctxt->record_info) {
8782 node_info.begin_pos = ctxt->input->consumed +
8783 (CUR_PTR - ctxt->input->base);
8784 node_info.begin_line = ctxt->input->line;
8785 }
8786
8787 if (ctxt->spaceNr == 0)
8788 spacePush(ctxt, -1);
8789 else if (*ctxt->space == -2)
8790 spacePush(ctxt, -1);
8791 else
8792 spacePush(ctxt, *ctxt->space);
8793
8794 line = ctxt->input->line;
8795#ifdef LIBXML_SAX1_ENABLED
8796 if (ctxt->sax2)
8797#endif /* LIBXML_SAX1_ENABLED */
8798 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
8799#ifdef LIBXML_SAX1_ENABLED
8800 else
8801 name = xmlParseStartTag(ctxt);
8802#endif /* LIBXML_SAX1_ENABLED */
8803 if (ctxt->instate == XML_PARSER_EOF)
8804 return;
8805 if (name == NULL) {
8806 spacePop(ctxt);
8807 return;
8808 }
8809 namePush(ctxt, name);
8810 ret = ctxt->node;
8811
8812#ifdef LIBXML_VALID_ENABLED
8813 /*
8814 * [ VC: Root Element Type ]
8815 * The Name in the document type declaration must match the element
8816 * type of the root element.
8817 */
8818 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
8819 ctxt->node && (ctxt->node == ctxt->myDoc->children))
8820 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
8821#endif /* LIBXML_VALID_ENABLED */
8822
8823 /*
8824 * Check for an Empty Element.
8825 */
8826 if ((RAW == '/') && (NXT(1) == '>')) {
8827 SKIP(2);
8828 if (ctxt->sax2) {
8829 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
8830 (!ctxt->disableSAX))
8831 ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
8832#ifdef LIBXML_SAX1_ENABLED
8833 } else {
8834 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8835 (!ctxt->disableSAX))
8836 ctxt->sax->endElement(ctxt->userData, name);
8837#endif /* LIBXML_SAX1_ENABLED */
8838 }
8839 namePop(ctxt);
8840 spacePop(ctxt);
8841 if (nsNr != ctxt->nsNr)
8842 nsPop(ctxt, ctxt->nsNr - nsNr);
8843 if ( ret != NULL && ctxt->record_info ) {
8844 node_info.end_pos = ctxt->input->consumed +
8845 (CUR_PTR - ctxt->input->base);
8846 node_info.end_line = ctxt->input->line;
8847 node_info.node = ret;
8848 xmlParserAddNodeInfo(ctxt, &node_info);
8849 }
8850 return;
8851 }
8852 if (RAW == '>') {
8853 NEXT1;
8854 } else {
8855 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
8856 "Couldn't find end of Start Tag %s line %d\n",
8857 name, line, NULL);
8858
8859 /*
8860 * end of parsing of this node.
8861 */
8862 nodePop(ctxt);
8863 namePop(ctxt);
8864 spacePop(ctxt);
8865 if (nsNr != ctxt->nsNr)
8866 nsPop(ctxt, ctxt->nsNr - nsNr);
8867
8868 /*
8869 * Capture end position and add node
8870 */
8871 if ( ret != NULL && ctxt->record_info ) {
8872 node_info.end_pos = ctxt->input->consumed +
8873 (CUR_PTR - ctxt->input->base);
8874 node_info.end_line = ctxt->input->line;
8875 node_info.node = ret;
8876 xmlParserAddNodeInfo(ctxt, &node_info);
8877 }
8878 return;
8879 }
8880
8881 /*
8882 * Parse the content of the element:
8883 */
8884 xmlParseContent(ctxt);
8885 if (!IS_BYTE_CHAR(RAW)) {
8886 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
8887 "Premature end of data in tag %s line %d\n",
8888 name, line, NULL);
8889
8890 /*
8891 * end of parsing of this node.
8892 */
8893 nodePop(ctxt);
8894 namePop(ctxt);
8895 spacePop(ctxt);
8896 if (nsNr != ctxt->nsNr)
8897 nsPop(ctxt, ctxt->nsNr - nsNr);
8898 return;
8899 }
8900
8901 /*
8902 * parse the end of tag: '</' should be here.
8903 */
8904 if (ctxt->sax2) {
8905 xmlParseEndTag2(ctxt, prefix, URI, line, ctxt->nsNr - nsNr, tlen);
8906 namePop(ctxt);
8907 }
8908#ifdef LIBXML_SAX1_ENABLED
8909 else
8910 xmlParseEndTag1(ctxt, line);
8911#endif /* LIBXML_SAX1_ENABLED */
8912
8913 /*
8914 * Capture end position and add node
8915 */
8916 if ( ret != NULL && ctxt->record_info ) {
8917 node_info.end_pos = ctxt->input->consumed +
8918 (CUR_PTR - ctxt->input->base);
8919 node_info.end_line = ctxt->input->line;
8920 node_info.node = ret;
8921 xmlParserAddNodeInfo(ctxt, &node_info);
8922 }
8923}
8924
8925/**
8926 * xmlParseVersionNum:
8927 * @ctxt: an XML parser context
8928 *
8929 * parse the XML version value.
8930 *
8931 * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
8932 *
8933 * Returns the string giving the XML version number, or NULL
8934 */
8935xmlChar *
8936xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
8937 xmlChar *buf = NULL;
8938 int len = 0;
8939 int size = 10;
8940 xmlChar cur;
8941
8942 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
8943 if (buf == NULL) {
8944 xmlErrMemory(ctxt, NULL);
8945 return(NULL);
8946 }
8947 cur = CUR;
8948 while (((cur >= 'a') && (cur <= 'z')) ||
8949 ((cur >= 'A') && (cur <= 'Z')) ||
8950 ((cur >= '0') && (cur <= '9')) ||
8951 (cur == '_') || (cur == '.') ||
8952 (cur == ':') || (cur == '-')) {
8953 if (len + 1 >= size) {
8954 xmlChar *tmp;
8955
8956 size *= 2;
8957 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
8958 if (tmp == NULL) {
8959 xmlErrMemory(ctxt, NULL);
8960 return(NULL);
8961 }
8962 buf = tmp;
8963 }
8964 buf[len++] = cur;
8965 NEXT;
8966 cur=CUR;
8967 }
8968 buf[len] = 0;
8969 return(buf);
8970}
8971
8972/**
8973 * xmlParseVersionInfo:
8974 * @ctxt: an XML parser context
8975 *
8976 * parse the XML version.
8977 *
8978 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
8979 *
8980 * [25] Eq ::= S? '=' S?
8981 *
8982 * Returns the version string, e.g. "1.0"
8983 */
8984
8985xmlChar *
8986xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
8987 xmlChar *version = NULL;
8988
8989 if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
8990 SKIP(7);
8991 SKIP_BLANKS;
8992 if (RAW != '=') {
8993 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
8994 return(NULL);
8995 }
8996 NEXT;
8997 SKIP_BLANKS;
8998 if (RAW == '"') {
8999 NEXT;
9000 version = xmlParseVersionNum(ctxt);
9001 if (RAW != '"') {
9002 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
9003 } else
9004 NEXT;
9005 } else if (RAW == '\''){
9006 NEXT;
9007 version = xmlParseVersionNum(ctxt);
9008 if (RAW != '\'') {
9009 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
9010 } else
9011 NEXT;
9012 } else {
9013 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
9014 }
9015 }
9016 return(version);
9017}
9018
9019/**
9020 * xmlParseEncName:
9021 * @ctxt: an XML parser context
9022 *
9023 * parse the XML encoding name
9024 *
9025 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
9026 *
9027 * Returns the encoding name value or NULL
9028 */
9029xmlChar *
9030xmlParseEncName(xmlParserCtxtPtr ctxt) {
9031 xmlChar *buf = NULL;
9032 int len = 0;
9033 int size = 10;
9034 xmlChar cur;
9035
9036 cur = CUR;
9037 if (((cur >= 'a') && (cur <= 'z')) ||
9038 ((cur >= 'A') && (cur <= 'Z'))) {
9039 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
9040 if (buf == NULL) {
9041 xmlErrMemory(ctxt, NULL);
9042 return(NULL);
9043 }
9044
9045 buf[len++] = cur;
9046 NEXT;
9047 cur = CUR;
9048 while (((cur >= 'a') && (cur <= 'z')) ||
9049 ((cur >= 'A') && (cur <= 'Z')) ||
9050 ((cur >= '0') && (cur <= '9')) ||
9051 (cur == '.') || (cur == '_') ||
9052 (cur == '-')) {
9053 if (len + 1 >= size) {
9054 xmlChar *tmp;
9055
9056 size *= 2;
9057 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
9058 if (tmp == NULL) {
9059 xmlErrMemory(ctxt, NULL);
9060 xmlFree(buf);
9061 return(NULL);
9062 }
9063 buf = tmp;
9064 }
9065 buf[len++] = cur;
9066 NEXT;
9067 cur = CUR;
9068 if (cur == 0) {
9069 SHRINK;
9070 GROW;
9071 cur = CUR;
9072 }
9073 }
9074 buf[len] = 0;
9075 } else {
9076 xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
9077 }
9078 return(buf);
9079}
9080
9081/**
9082 * xmlParseEncodingDecl:
9083 * @ctxt: an XML parser context
9084 *
9085 * parse the XML encoding declaration
9086 *
9087 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
9088 *
9089 * this setups the conversion filters.
9090 *
9091 * Returns the encoding value or NULL
9092 */
9093
9094const xmlChar *
9095xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
9096 xmlChar *encoding = NULL;
9097
9098 SKIP_BLANKS;
9099 if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
9100 SKIP(8);
9101 SKIP_BLANKS;
9102 if (RAW != '=') {
9103 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
9104 return(NULL);
9105 }
9106 NEXT;
9107 SKIP_BLANKS;
9108 if (RAW == '"') {
9109 NEXT;
9110 encoding = xmlParseEncName(ctxt);
9111 if (RAW != '"') {
9112 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
9113 } else
9114 NEXT;
9115 } else if (RAW == '\''){
9116 NEXT;
9117 encoding = xmlParseEncName(ctxt);
9118 if (RAW != '\'') {
9119 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
9120 } else
9121 NEXT;
9122 } else {
9123 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
9124 }
9125 /*
9126 * UTF-16 encoding stwich has already taken place at this stage,
9127 * more over the little-endian/big-endian selection is already done
9128 */
9129 if ((encoding != NULL) &&
9130 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
9131 (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
9132 if (ctxt->encoding != NULL)
9133 xmlFree((xmlChar *) ctxt->encoding);
9134 ctxt->encoding = encoding;
9135 }
9136 /*
9137 * UTF-8 encoding is handled natively
9138 */
9139 else if ((encoding != NULL) &&
9140 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
9141 (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
9142 if (ctxt->encoding != NULL)
9143 xmlFree((xmlChar *) ctxt->encoding);
9144 ctxt->encoding = encoding;
9145 }
9146 else if (encoding != NULL) {
9147 xmlCharEncodingHandlerPtr handler;
9148
9149 if (ctxt->input->encoding != NULL)
9150 xmlFree((xmlChar *) ctxt->input->encoding);
9151 ctxt->input->encoding = encoding;
9152
9153 handler = xmlFindCharEncodingHandler((const char *) encoding);
9154 if (handler != NULL) {
9155 xmlSwitchToEncoding(ctxt, handler);
9156 } else {
9157 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
9158 "Unsupported encoding %s\n", encoding);
9159 return(NULL);
9160 }
9161 }
9162 }
9163 return(encoding);
9164}
9165
9166/**
9167 * xmlParseSDDecl:
9168 * @ctxt: an XML parser context
9169 *
9170 * parse the XML standalone declaration
9171 *
9172 * [32] SDDecl ::= S 'standalone' Eq
9173 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
9174 *
9175 * [ VC: Standalone Document Declaration ]
9176 * TODO The standalone document declaration must have the value "no"
9177 * if any external markup declarations contain declarations of:
9178 * - attributes with default values, if elements to which these
9179 * attributes apply appear in the document without specifications
9180 * of values for these attributes, or
9181 * - entities (other than amp, lt, gt, apos, quot), if references
9182 * to those entities appear in the document, or
9183 * - attributes with values subject to normalization, where the
9184 * attribute appears in the document with a value which will change
9185 * as a result of normalization, or
9186 * - element types with element content, if white space occurs directly
9187 * within any instance of those types.
9188 *
9189 * Returns:
9190 * 1 if standalone="yes"
9191 * 0 if standalone="no"
9192 * -2 if standalone attribute is missing or invalid
9193 * (A standalone value of -2 means that the XML declaration was found,
9194 * but no value was specified for the standalone attribute).
9195 */
9196
9197int
9198xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
9199 int standalone = -2;
9200
9201 SKIP_BLANKS;
9202 if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
9203 SKIP(10);
9204 SKIP_BLANKS;
9205 if (RAW != '=') {
9206 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
9207 return(standalone);
9208 }
9209 NEXT;
9210 SKIP_BLANKS;
9211 if (RAW == '\''){
9212 NEXT;
9213 if ((RAW == 'n') && (NXT(1) == 'o')) {
9214 standalone = 0;
9215 SKIP(2);
9216 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
9217 (NXT(2) == 's')) {
9218 standalone = 1;
9219 SKIP(3);
9220 } else {
9221 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
9222 }
9223 if (RAW != '\'') {
9224 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
9225 } else
9226 NEXT;
9227 } else if (RAW == '"'){
9228 NEXT;
9229 if ((RAW == 'n') && (NXT(1) == 'o')) {
9230 standalone = 0;
9231 SKIP(2);
9232 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
9233 (NXT(2) == 's')) {
9234 standalone = 1;
9235 SKIP(3);
9236 } else {
9237 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
9238 }
9239 if (RAW != '"') {
9240 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
9241 } else
9242 NEXT;
9243 } else {
9244 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
9245 }
9246 }
9247 return(standalone);
9248}
9249
9250/**
9251 * xmlParseXMLDecl:
9252 * @ctxt: an XML parser context
9253 *
9254 * parse an XML declaration header
9255 *
9256 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
9257 */
9258
9259void
9260xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
9261 xmlChar *version;
9262
9263 /*
9264 * This value for standalone indicates that the document has an
9265 * XML declaration but it does not have a standalone attribute.
9266 * It will be overwritten later if a standalone attribute is found.
9267 */
9268 ctxt->input->standalone = -2;
9269
9270 /*
9271 * We know that '<?xml' is here.
9272 */
9273 SKIP(5);
9274
9275 if (!IS_BLANK_CH(RAW)) {
9276 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9277 "Blank needed after '<?xml'\n");
9278 }
9279 SKIP_BLANKS;
9280
9281 /*
9282 * We must have the VersionInfo here.
9283 */
9284 version = xmlParseVersionInfo(ctxt);
9285 if (version == NULL) {
9286 xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
9287 } else {
9288 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
9289 /*
9290 * TODO: Blueberry should be detected here
9291 */
9292 xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
9293 "Unsupported version '%s'\n",
9294 version, NULL);
9295 }
9296 if (ctxt->version != NULL)
9297 xmlFree((void *) ctxt->version);
9298 ctxt->version = version;
9299 }
9300
9301 /*
9302 * We may have the encoding declaration
9303 */
9304 if (!IS_BLANK_CH(RAW)) {
9305 if ((RAW == '?') && (NXT(1) == '>')) {
9306 SKIP(2);
9307 return;
9308 }
9309 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
9310 }
9311 xmlParseEncodingDecl(ctxt);
9312 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
9313 /*
9314 * The XML REC instructs us to stop parsing right here
9315 */
9316 return;
9317 }
9318
9319 /*
9320 * We may have the standalone status.
9321 */
9322 if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
9323 if ((RAW == '?') && (NXT(1) == '>')) {
9324 SKIP(2);
9325 return;
9326 }
9327 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
9328 }
9329 SKIP_BLANKS;
9330 ctxt->input->standalone = xmlParseSDDecl(ctxt);
9331
9332 SKIP_BLANKS;
9333 if ((RAW == '?') && (NXT(1) == '>')) {
9334 SKIP(2);
9335 } else if (RAW == '>') {
9336 /* Deprecated old WD ... */
9337 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
9338 NEXT;
9339 } else {
9340 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
9341 MOVETO_ENDTAG(CUR_PTR);
9342 NEXT;
9343 }
9344}
9345
9346/**
9347 * xmlParseMisc:
9348 * @ctxt: an XML parser context
9349 *
9350 * parse an XML Misc* optional field.
9351 *
9352 * [27] Misc ::= Comment | PI | S
9353 */
9354
9355void
9356xmlParseMisc(xmlParserCtxtPtr ctxt) {
9357 while (((RAW == '<') && (NXT(1) == '?')) ||
9358 (CMP4(CUR_PTR, '<', '!', '-', '-')) ||
9359 IS_BLANK_CH(CUR)) {
9360 if ((RAW == '<') && (NXT(1) == '?')) {
9361 xmlParsePI(ctxt);
9362 } else if (IS_BLANK_CH(CUR)) {
9363 NEXT;
9364 } else
9365 xmlParseComment(ctxt);
9366 }
9367}
9368
9369/**
9370 * xmlParseDocument:
9371 * @ctxt: an XML parser context
9372 *
9373 * parse an XML document (and build a tree if using the standard SAX
9374 * interface).
9375 *
9376 * [1] document ::= prolog element Misc*
9377 *
9378 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
9379 *
9380 * Returns 0, -1 in case of error. the parser context is augmented
9381 * as a result of the parsing.
9382 */
9383
9384int
9385xmlParseDocument(xmlParserCtxtPtr ctxt) {
9386 xmlChar start[4];
9387 xmlCharEncoding enc;
9388
9389 xmlInitParser();
9390
9391 if ((ctxt == NULL) || (ctxt->input == NULL))
9392 return(-1);
9393
9394 GROW;
9395
9396 /*
9397 * SAX: detecting the level.
9398 */
9399 xmlDetectSAX2(ctxt);
9400
9401 /*
9402 * SAX: beginning of the document processing.
9403 */
9404 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9405 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
9406
9407 if ((ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) &&
9408 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
9409 /*
9410 * Get the 4 first bytes and decode the charset
9411 * if enc != XML_CHAR_ENCODING_NONE
9412 * plug some encoding conversion routines.
9413 */
9414 start[0] = RAW;
9415 start[1] = NXT(1);
9416 start[2] = NXT(2);
9417 start[3] = NXT(3);
9418 enc = xmlDetectCharEncoding(&start[0], 4);
9419 if (enc != XML_CHAR_ENCODING_NONE) {
9420 xmlSwitchEncoding(ctxt, enc);
9421 }
9422 }
9423
9424
9425 if (CUR == 0) {
9426 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
9427 }
9428
9429 /*
9430 * Check for the XMLDecl in the Prolog.
9431 */
9432 GROW;
9433 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
9434
9435 /*
9436 * Note that we will switch encoding on the fly.
9437 */
9438 xmlParseXMLDecl(ctxt);
9439 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
9440 /*
9441 * The XML REC instructs us to stop parsing right here
9442 */
9443 return(-1);
9444 }
9445 ctxt->standalone = ctxt->input->standalone;
9446 SKIP_BLANKS;
9447 } else {
9448 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
9449 }
9450 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
9451 ctxt->sax->startDocument(ctxt->userData);
9452
9453 /*
9454 * The Misc part of the Prolog
9455 */
9456 GROW;
9457 xmlParseMisc(ctxt);
9458
9459 /*
9460 * Then possibly doc type declaration(s) and more Misc
9461 * (doctypedecl Misc*)?
9462 */
9463 GROW;
9464 if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
9465
9466 ctxt->inSubset = 1;
9467 xmlParseDocTypeDecl(ctxt);
9468 if (RAW == '[') {
9469 ctxt->instate = XML_PARSER_DTD;
9470 xmlParseInternalSubset(ctxt);
9471 }
9472
9473 /*
9474 * Create and update the external subset.
9475 */
9476 ctxt->inSubset = 2;
9477 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
9478 (!ctxt->disableSAX))
9479 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
9480 ctxt->extSubSystem, ctxt->extSubURI);
9481 ctxt->inSubset = 0;
9482
9483 xmlCleanSpecialAttr(ctxt);
9484
9485 ctxt->instate = XML_PARSER_PROLOG;
9486 xmlParseMisc(ctxt);
9487 }
9488
9489 /*
9490 * Time to start parsing the tree itself
9491 */
9492 GROW;
9493 if (RAW != '<') {
9494 xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
9495 "Start tag expected, '<' not found\n");
9496 } else {
9497 ctxt->instate = XML_PARSER_CONTENT;
9498 xmlParseElement(ctxt);
9499 ctxt->instate = XML_PARSER_EPILOG;
9500
9501
9502 /*
9503 * The Misc part at the end
9504 */
9505 xmlParseMisc(ctxt);
9506
9507 if (RAW != 0) {
9508 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
9509 }
9510 ctxt->instate = XML_PARSER_EOF;
9511 }
9512
9513 /*
9514 * SAX: end of the document processing.
9515 */
9516 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
9517 ctxt->sax->endDocument(ctxt->userData);
9518
9519 /*
9520 * Remove locally kept entity definitions if the tree was not built
9521 */
9522 if ((ctxt->myDoc != NULL) &&
9523 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
9524 xmlFreeDoc(ctxt->myDoc);
9525 ctxt->myDoc = NULL;
9526 }
9527
9528 if (! ctxt->wellFormed) {
9529 ctxt->valid = 0;
9530 return(-1);
9531 }
9532 return(0);
9533}
9534
9535/**
9536 * xmlParseExtParsedEnt:
9537 * @ctxt: an XML parser context
9538 *
9539 * parse a general parsed entity
9540 * An external general parsed entity is well-formed if it matches the
9541 * production labeled extParsedEnt.
9542 *
9543 * [78] extParsedEnt ::= TextDecl? content
9544 *
9545 * Returns 0, -1 in case of error. the parser context is augmented
9546 * as a result of the parsing.
9547 */
9548
9549int
9550xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
9551 xmlChar start[4];
9552 xmlCharEncoding enc;
9553
9554 if ((ctxt == NULL) || (ctxt->input == NULL))
9555 return(-1);
9556
9557 xmlDefaultSAXHandlerInit();
9558
9559 xmlDetectSAX2(ctxt);
9560
9561 GROW;
9562
9563 /*
9564 * SAX: beginning of the document processing.
9565 */
9566 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9567 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
9568
9569 /*
9570 * Get the 4 first bytes and decode the charset
9571 * if enc != XML_CHAR_ENCODING_NONE
9572 * plug some encoding conversion routines.
9573 */
9574 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
9575 start[0] = RAW;
9576 start[1] = NXT(1);
9577 start[2] = NXT(2);
9578 start[3] = NXT(3);
9579 enc = xmlDetectCharEncoding(start, 4);
9580 if (enc != XML_CHAR_ENCODING_NONE) {
9581 xmlSwitchEncoding(ctxt, enc);
9582 }
9583 }
9584
9585
9586 if (CUR == 0) {
9587 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
9588 }
9589
9590 /*
9591 * Check for the XMLDecl in the Prolog.
9592 */
9593 GROW;
9594 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
9595
9596 /*
9597 * Note that we will switch encoding on the fly.
9598 */
9599 xmlParseXMLDecl(ctxt);
9600 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
9601 /*
9602 * The XML REC instructs us to stop parsing right here
9603 */
9604 return(-1);
9605 }
9606 SKIP_BLANKS;
9607 } else {
9608 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
9609 }
9610 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
9611 ctxt->sax->startDocument(ctxt->userData);
9612
9613 /*
9614 * Doing validity checking on chunk doesn't make sense
9615 */
9616 ctxt->instate = XML_PARSER_CONTENT;
9617 ctxt->validate = 0;
9618 ctxt->loadsubset = 0;
9619 ctxt->depth = 0;
9620
9621 xmlParseContent(ctxt);
9622
9623 if ((RAW == '<') && (NXT(1) == '/')) {
9624 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
9625 } else if (RAW != 0) {
9626 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
9627 }
9628
9629 /*
9630 * SAX: end of the document processing.
9631 */
9632 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
9633 ctxt->sax->endDocument(ctxt->userData);
9634
9635 if (! ctxt->wellFormed) return(-1);
9636 return(0);
9637}
9638
9639#ifdef LIBXML_PUSH_ENABLED
9640/************************************************************************
9641 * *
9642 * Progressive parsing interfaces *
9643 * *
9644 ************************************************************************/
9645
9646/**
9647 * xmlParseLookupSequence:
9648 * @ctxt: an XML parser context
9649 * @first: the first char to lookup
9650 * @next: the next char to lookup or zero
9651 * @third: the next char to lookup or zero
9652 *
9653 * Try to find if a sequence (first, next, third) or just (first next) or
9654 * (first) is available in the input stream.
9655 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
9656 * to avoid rescanning sequences of bytes, it DOES change the state of the
9657 * parser, do not use liberally.
9658 *
9659 * Returns the index to the current parsing point if the full sequence
9660 * is available, -1 otherwise.
9661 */
9662static int
9663xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
9664 xmlChar next, xmlChar third) {
9665 int base, len;
9666 xmlParserInputPtr in;
9667 const xmlChar *buf;
9668
9669 in = ctxt->input;
9670 if (in == NULL) return(-1);
9671 base = in->cur - in->base;
9672 if (base < 0) return(-1);
9673 if (ctxt->checkIndex > base)
9674 base = ctxt->checkIndex;
9675 if (in->buf == NULL) {
9676 buf = in->base;
9677 len = in->length;
9678 } else {
9679 buf = in->buf->buffer->content;
9680 len = in->buf->buffer->use;
9681 }
9682 /* take into account the sequence length */
9683 if (third) len -= 2;
9684 else if (next) len --;
9685 for (;base < len;base++) {
9686 if (buf[base] == first) {
9687 if (third != 0) {
9688 if ((buf[base + 1] != next) ||
9689 (buf[base + 2] != third)) continue;
9690 } else if (next != 0) {
9691 if (buf[base + 1] != next) continue;
9692 }
9693 ctxt->checkIndex = 0;
9694#ifdef DEBUG_PUSH
9695 if (next == 0)
9696 xmlGenericError(xmlGenericErrorContext,
9697 "PP: lookup '%c' found at %d\n",
9698 first, base);
9699 else if (third == 0)
9700 xmlGenericError(xmlGenericErrorContext,
9701 "PP: lookup '%c%c' found at %d\n",
9702 first, next, base);
9703 else
9704 xmlGenericError(xmlGenericErrorContext,
9705 "PP: lookup '%c%c%c' found at %d\n",
9706 first, next, third, base);
9707#endif
9708 return(base - (in->cur - in->base));
9709 }
9710 }
9711 ctxt->checkIndex = base;
9712#ifdef DEBUG_PUSH
9713 if (next == 0)
9714 xmlGenericError(xmlGenericErrorContext,
9715 "PP: lookup '%c' failed\n", first);
9716 else if (third == 0)
9717 xmlGenericError(xmlGenericErrorContext,
9718 "PP: lookup '%c%c' failed\n", first, next);
9719 else
9720 xmlGenericError(xmlGenericErrorContext,
9721 "PP: lookup '%c%c%c' failed\n", first, next, third);
9722#endif
9723 return(-1);
9724}
9725
9726/**
9727 * xmlParseGetLasts:
9728 * @ctxt: an XML parser context
9729 * @lastlt: pointer to store the last '<' from the input
9730 * @lastgt: pointer to store the last '>' from the input
9731 *
9732 * Lookup the last < and > in the current chunk
9733 */
9734static void
9735xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt,
9736 const xmlChar **lastgt) {
9737 const xmlChar *tmp;
9738
9739 if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) {
9740 xmlGenericError(xmlGenericErrorContext,
9741 "Internal error: xmlParseGetLasts\n");
9742 return;
9743 }
9744 if ((ctxt->progressive != 0) && (ctxt->inputNr == 1)) {
9745 tmp = ctxt->input->end;
9746 tmp--;
9747 while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--;
9748 if (tmp < ctxt->input->base) {
9749 *lastlt = NULL;
9750 *lastgt = NULL;
9751 } else {
9752 *lastlt = tmp;
9753 tmp++;
9754 while ((tmp < ctxt->input->end) && (*tmp != '>')) {
9755 if (*tmp == '\'') {
9756 tmp++;
9757 while ((tmp < ctxt->input->end) && (*tmp != '\'')) tmp++;
9758 if (tmp < ctxt->input->end) tmp++;
9759 } else if (*tmp == '"') {
9760 tmp++;
9761 while ((tmp < ctxt->input->end) && (*tmp != '"')) tmp++;
9762 if (tmp < ctxt->input->end) tmp++;
9763 } else
9764 tmp++;
9765 }
9766 if (tmp < ctxt->input->end)
9767 *lastgt = tmp;
9768 else {
9769 tmp = *lastlt;
9770 tmp--;
9771 while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--;
9772 if (tmp >= ctxt->input->base)
9773 *lastgt = tmp;
9774 else
9775 *lastgt = NULL;
9776 }
9777 }
9778 } else {
9779 *lastlt = NULL;
9780 *lastgt = NULL;
9781 }
9782}
9783/**
9784 * xmlCheckCdataPush:
9785 * @cur: pointer to the bock of characters
9786 * @len: length of the block in bytes
9787 *
9788 * Check that the block of characters is okay as SCdata content [20]
9789 *
9790 * Returns the number of bytes to pass if okay, a negative index where an
9791 * UTF-8 error occured otherwise
9792 */
9793static int
9794xmlCheckCdataPush(const xmlChar *utf, int len) {
9795 int ix;
9796 unsigned char c;
9797 int codepoint;
9798
9799 if ((utf == NULL) || (len <= 0))
9800 return(0);
9801
9802 for (ix = 0; ix < len;) { /* string is 0-terminated */
9803 c = utf[ix];
9804 if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */
9805 if (c >= 0x20)
9806 ix++;
9807 else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
9808 ix++;
9809 else
9810 return(-ix);
9811 } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
9812 if (ix + 2 > len) return(ix);
9813 if ((utf[ix+1] & 0xc0 ) != 0x80)
9814 return(-ix);
9815 codepoint = (utf[ix] & 0x1f) << 6;
9816 codepoint |= utf[ix+1] & 0x3f;
9817 if (!xmlIsCharQ(codepoint))
9818 return(-ix);
9819 ix += 2;
9820 } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
9821 if (ix + 3 > len) return(ix);
9822 if (((utf[ix+1] & 0xc0) != 0x80) ||
9823 ((utf[ix+2] & 0xc0) != 0x80))
9824 return(-ix);
9825 codepoint = (utf[ix] & 0xf) << 12;
9826 codepoint |= (utf[ix+1] & 0x3f) << 6;
9827 codepoint |= utf[ix+2] & 0x3f;
9828 if (!xmlIsCharQ(codepoint))
9829 return(-ix);
9830 ix += 3;
9831 } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
9832 if (ix + 4 > len) return(ix);
9833 if (((utf[ix+1] & 0xc0) != 0x80) ||
9834 ((utf[ix+2] & 0xc0) != 0x80) ||
9835 ((utf[ix+3] & 0xc0) != 0x80))
9836 return(-ix);
9837 codepoint = (utf[ix] & 0x7) << 18;
9838 codepoint |= (utf[ix+1] & 0x3f) << 12;
9839 codepoint |= (utf[ix+2] & 0x3f) << 6;
9840 codepoint |= utf[ix+3] & 0x3f;
9841 if (!xmlIsCharQ(codepoint))
9842 return(-ix);
9843 ix += 4;
9844 } else /* unknown encoding */
9845 return(-ix);
9846 }
9847 return(ix);
9848}
9849
9850/**
9851 * xmlParseTryOrFinish:
9852 * @ctxt: an XML parser context
9853 * @terminate: last chunk indicator
9854 *
9855 * Try to progress on parsing
9856 *
9857 * Returns zero if no parsing was possible
9858 */
9859static int
9860xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
9861 int ret = 0;
9862 int avail, tlen;
9863 xmlChar cur, next;
9864 const xmlChar *lastlt, *lastgt;
9865
9866 if (ctxt->input == NULL)
9867 return(0);
9868
9869#ifdef DEBUG_PUSH
9870 switch (ctxt->instate) {
9871 case XML_PARSER_EOF:
9872 xmlGenericError(xmlGenericErrorContext,
9873 "PP: try EOF\n"); break;
9874 case XML_PARSER_START:
9875 xmlGenericError(xmlGenericErrorContext,
9876 "PP: try START\n"); break;
9877 case XML_PARSER_MISC:
9878 xmlGenericError(xmlGenericErrorContext,
9879 "PP: try MISC\n");break;
9880 case XML_PARSER_COMMENT:
9881 xmlGenericError(xmlGenericErrorContext,
9882 "PP: try COMMENT\n");break;
9883 case XML_PARSER_PROLOG:
9884 xmlGenericError(xmlGenericErrorContext,
9885 "PP: try PROLOG\n");break;
9886 case XML_PARSER_START_TAG:
9887 xmlGenericError(xmlGenericErrorContext,
9888 "PP: try START_TAG\n");break;
9889 case XML_PARSER_CONTENT:
9890 xmlGenericError(xmlGenericErrorContext,
9891 "PP: try CONTENT\n");break;
9892 case XML_PARSER_CDATA_SECTION:
9893 xmlGenericError(xmlGenericErrorContext,
9894 "PP: try CDATA_SECTION\n");break;
9895 case XML_PARSER_END_TAG:
9896 xmlGenericError(xmlGenericErrorContext,
9897 "PP: try END_TAG\n");break;
9898 case XML_PARSER_ENTITY_DECL:
9899 xmlGenericError(xmlGenericErrorContext,
9900 "PP: try ENTITY_DECL\n");break;
9901 case XML_PARSER_ENTITY_VALUE:
9902 xmlGenericError(xmlGenericErrorContext,
9903 "PP: try ENTITY_VALUE\n");break;
9904 case XML_PARSER_ATTRIBUTE_VALUE:
9905 xmlGenericError(xmlGenericErrorContext,
9906 "PP: try ATTRIBUTE_VALUE\n");break;
9907 case XML_PARSER_DTD:
9908 xmlGenericError(xmlGenericErrorContext,
9909 "PP: try DTD\n");break;
9910 case XML_PARSER_EPILOG:
9911 xmlGenericError(xmlGenericErrorContext,
9912 "PP: try EPILOG\n");break;
9913 case XML_PARSER_PI:
9914 xmlGenericError(xmlGenericErrorContext,
9915 "PP: try PI\n");break;
9916 case XML_PARSER_IGNORE:
9917 xmlGenericError(xmlGenericErrorContext,
9918 "PP: try IGNORE\n");break;
9919 }
9920#endif
9921
9922 if ((ctxt->input != NULL) &&
9923 (ctxt->input->cur - ctxt->input->base > 4096)) {
9924 xmlSHRINK(ctxt);
9925 ctxt->checkIndex = 0;
9926 }
9927 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
9928
9929 while (1) {
9930 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
9931 return(0);
9932
9933
9934 /*
9935 * Pop-up of finished entities.
9936 */
9937 while ((RAW == 0) && (ctxt->inputNr > 1))
9938 xmlPopInput(ctxt);
9939
9940 if (ctxt->input == NULL) break;
9941 if (ctxt->input->buf == NULL)
9942 avail = ctxt->input->length -
9943 (ctxt->input->cur - ctxt->input->base);
9944 else {
9945 /*
9946 * If we are operating on converted input, try to flush
9947 * remainng chars to avoid them stalling in the non-converted
9948 * buffer.
9949 */
9950 if ((ctxt->input->buf->raw != NULL) &&
9951 (ctxt->input->buf->raw->use > 0)) {
9952 int base = ctxt->input->base -
9953 ctxt->input->buf->buffer->content;
9954 int current = ctxt->input->cur - ctxt->input->base;
9955
9956 xmlParserInputBufferPush(ctxt->input->buf, 0, "");
9957 ctxt->input->base = ctxt->input->buf->buffer->content + base;
9958 ctxt->input->cur = ctxt->input->base + current;
9959 ctxt->input->end =
9960 &ctxt->input->buf->buffer->content[
9961 ctxt->input->buf->buffer->use];
9962 }
9963 avail = ctxt->input->buf->buffer->use -
9964 (ctxt->input->cur - ctxt->input->base);
9965 }
9966 if (avail < 1)
9967 goto done;
9968 switch (ctxt->instate) {
9969 case XML_PARSER_EOF:
9970 /*
9971 * Document parsing is done !
9972 */
9973 goto done;
9974 case XML_PARSER_START:
9975 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
9976 xmlChar start[4];
9977 xmlCharEncoding enc;
9978
9979 /*
9980 * Very first chars read from the document flow.
9981 */
9982 if (avail < 4)
9983 goto done;
9984
9985 /*
9986 * Get the 4 first bytes and decode the charset
9987 * if enc != XML_CHAR_ENCODING_NONE
9988 * plug some encoding conversion routines,
9989 * else xmlSwitchEncoding will set to (default)
9990 * UTF8.
9991 */
9992 start[0] = RAW;
9993 start[1] = NXT(1);
9994 start[2] = NXT(2);
9995 start[3] = NXT(3);
9996 enc = xmlDetectCharEncoding(start, 4);
9997 xmlSwitchEncoding(ctxt, enc);
9998 break;
9999 }
10000
10001 if (avail < 2)
10002 goto done;
10003 cur = ctxt->input->cur[0];
10004 next = ctxt->input->cur[1];
10005 if (cur == 0) {
10006 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10007 ctxt->sax->setDocumentLocator(ctxt->userData,
10008 &xmlDefaultSAXLocator);
10009 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10010 ctxt->instate = XML_PARSER_EOF;
10011#ifdef DEBUG_PUSH
10012 xmlGenericError(xmlGenericErrorContext,
10013 "PP: entering EOF\n");
10014#endif
10015 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10016 ctxt->sax->endDocument(ctxt->userData);
10017 goto done;
10018 }
10019 if ((cur == '<') && (next == '?')) {
10020 /* PI or XML decl */
10021 if (avail < 5) return(ret);
10022 if ((!terminate) &&
10023 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10024 return(ret);
10025 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10026 ctxt->sax->setDocumentLocator(ctxt->userData,
10027 &xmlDefaultSAXLocator);
10028 if ((ctxt->input->cur[2] == 'x') &&
10029 (ctxt->input->cur[3] == 'm') &&
10030 (ctxt->input->cur[4] == 'l') &&
10031 (IS_BLANK_CH(ctxt->input->cur[5]))) {
10032 ret += 5;
10033#ifdef DEBUG_PUSH
10034 xmlGenericError(xmlGenericErrorContext,
10035 "PP: Parsing XML Decl\n");
10036#endif
10037 xmlParseXMLDecl(ctxt);
10038 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10039 /*
10040 * The XML REC instructs us to stop parsing right
10041 * here
10042 */
10043 ctxt->instate = XML_PARSER_EOF;
10044 return(0);
10045 }
10046 ctxt->standalone = ctxt->input->standalone;
10047 if ((ctxt->encoding == NULL) &&
10048 (ctxt->input->encoding != NULL))
10049 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
10050 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
10051 (!ctxt->disableSAX))
10052 ctxt->sax->startDocument(ctxt->userData);
10053 ctxt->instate = XML_PARSER_MISC;
10054#ifdef DEBUG_PUSH
10055 xmlGenericError(xmlGenericErrorContext,
10056 "PP: entering MISC\n");
10057#endif
10058 } else {
10059 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10060 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
10061 (!ctxt->disableSAX))
10062 ctxt->sax->startDocument(ctxt->userData);
10063 ctxt->instate = XML_PARSER_MISC;
10064#ifdef DEBUG_PUSH
10065 xmlGenericError(xmlGenericErrorContext,
10066 "PP: entering MISC\n");
10067#endif
10068 }
10069 } else {
10070 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10071 ctxt->sax->setDocumentLocator(ctxt->userData,
10072 &xmlDefaultSAXLocator);
10073 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10074 if (ctxt->version == NULL) {
10075 xmlErrMemory(ctxt, NULL);
10076 break;
10077 }
10078 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
10079 (!ctxt->disableSAX))
10080 ctxt->sax->startDocument(ctxt->userData);
10081 ctxt->instate = XML_PARSER_MISC;
10082#ifdef DEBUG_PUSH
10083 xmlGenericError(xmlGenericErrorContext,
10084 "PP: entering MISC\n");
10085#endif
10086 }
10087 break;
10088 case XML_PARSER_START_TAG: {
10089 const xmlChar *name;
10090 const xmlChar *prefix;
10091 const xmlChar *URI;
10092 int nsNr = ctxt->nsNr;
10093
10094 if ((avail < 2) && (ctxt->inputNr == 1))
10095 goto done;
10096 cur = ctxt->input->cur[0];
10097 if (cur != '<') {
10098 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10099 ctxt->instate = XML_PARSER_EOF;
10100 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10101 ctxt->sax->endDocument(ctxt->userData);
10102 goto done;
10103 }
10104 if (!terminate) {
10105 if (ctxt->progressive) {
10106 /* > can be found unescaped in attribute values */
10107 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
10108 goto done;
10109 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
10110 goto done;
10111 }
10112 }
10113 if (ctxt->spaceNr == 0)
10114 spacePush(ctxt, -1);
10115 else if (*ctxt->space == -2)
10116 spacePush(ctxt, -1);
10117 else
10118 spacePush(ctxt, *ctxt->space);
10119#ifdef LIBXML_SAX1_ENABLED
10120 if (ctxt->sax2)
10121#endif /* LIBXML_SAX1_ENABLED */
10122 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
10123#ifdef LIBXML_SAX1_ENABLED
10124 else
10125 name = xmlParseStartTag(ctxt);
10126#endif /* LIBXML_SAX1_ENABLED */
10127 if (ctxt->instate == XML_PARSER_EOF)
10128 goto done;
10129 if (name == NULL) {
10130 spacePop(ctxt);
10131 ctxt->instate = XML_PARSER_EOF;
10132 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10133 ctxt->sax->endDocument(ctxt->userData);
10134 goto done;
10135 }
10136#ifdef LIBXML_VALID_ENABLED
10137 /*
10138 * [ VC: Root Element Type ]
10139 * The Name in the document type declaration must match
10140 * the element type of the root element.
10141 */
10142 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
10143 ctxt->node && (ctxt->node == ctxt->myDoc->children))
10144 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
10145#endif /* LIBXML_VALID_ENABLED */
10146
10147 /*
10148 * Check for an Empty Element.
10149 */
10150 if ((RAW == '/') && (NXT(1) == '>')) {
10151 SKIP(2);
10152
10153 if (ctxt->sax2) {
10154 if ((ctxt->sax != NULL) &&
10155 (ctxt->sax->endElementNs != NULL) &&
10156 (!ctxt->disableSAX))
10157 ctxt->sax->endElementNs(ctxt->userData, name,
10158 prefix, URI);
10159 if (ctxt->nsNr - nsNr > 0)
10160 nsPop(ctxt, ctxt->nsNr - nsNr);
10161#ifdef LIBXML_SAX1_ENABLED
10162 } else {
10163 if ((ctxt->sax != NULL) &&
10164 (ctxt->sax->endElement != NULL) &&
10165 (!ctxt->disableSAX))
10166 ctxt->sax->endElement(ctxt->userData, name);
10167#endif /* LIBXML_SAX1_ENABLED */
10168 }
10169 spacePop(ctxt);
10170 if (ctxt->nameNr == 0) {
10171 ctxt->instate = XML_PARSER_EPILOG;
10172 } else {
10173 ctxt->instate = XML_PARSER_CONTENT;
10174 }
10175 break;
10176 }
10177 if (RAW == '>') {
10178 NEXT;
10179 } else {
10180 xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
10181 "Couldn't find end of Start Tag %s\n",
10182 name);
10183 nodePop(ctxt);
10184 spacePop(ctxt);
10185 }
10186 if (ctxt->sax2)
10187 nameNsPush(ctxt, name, prefix, URI, ctxt->nsNr - nsNr);
10188#ifdef LIBXML_SAX1_ENABLED
10189 else
10190 namePush(ctxt, name);
10191#endif /* LIBXML_SAX1_ENABLED */
10192
10193 ctxt->instate = XML_PARSER_CONTENT;
10194 break;
10195 }
10196 case XML_PARSER_CONTENT: {
10197 const xmlChar *test;
10198 unsigned int cons;
10199 if ((avail < 2) && (ctxt->inputNr == 1))
10200 goto done;
10201 cur = ctxt->input->cur[0];
10202 next = ctxt->input->cur[1];
10203
10204 test = CUR_PTR;
10205 cons = ctxt->input->consumed;
10206 if ((cur == '<') && (next == '/')) {
10207 ctxt->instate = XML_PARSER_END_TAG;
10208 break;
10209 } else if ((cur == '<') && (next == '?')) {
10210 if ((!terminate) &&
10211 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10212 goto done;
10213 xmlParsePI(ctxt);
10214 } else if ((cur == '<') && (next != '!')) {
10215 ctxt->instate = XML_PARSER_START_TAG;
10216 break;
10217 } else if ((cur == '<') && (next == '!') &&
10218 (ctxt->input->cur[2] == '-') &&
10219 (ctxt->input->cur[3] == '-')) {
10220 int term;
10221
10222 if (avail < 4)
10223 goto done;
10224 ctxt->input->cur += 4;
10225 term = xmlParseLookupSequence(ctxt, '-', '-', '>');
10226 ctxt->input->cur -= 4;
10227 if ((!terminate) && (term < 0))
10228 goto done;
10229 xmlParseComment(ctxt);
10230 ctxt->instate = XML_PARSER_CONTENT;
10231 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
10232 (ctxt->input->cur[2] == '[') &&
10233 (ctxt->input->cur[3] == 'C') &&
10234 (ctxt->input->cur[4] == 'D') &&
10235 (ctxt->input->cur[5] == 'A') &&
10236 (ctxt->input->cur[6] == 'T') &&
10237 (ctxt->input->cur[7] == 'A') &&
10238 (ctxt->input->cur[8] == '[')) {
10239 SKIP(9);
10240 ctxt->instate = XML_PARSER_CDATA_SECTION;
10241 break;
10242 } else if ((cur == '<') && (next == '!') &&
10243 (avail < 9)) {
10244 goto done;
10245 } else if (cur == '&') {
10246 if ((!terminate) &&
10247 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
10248 goto done;
10249 xmlParseReference(ctxt);
10250 } else {
10251 /* TODO Avoid the extra copy, handle directly !!! */
10252 /*
10253 * Goal of the following test is:
10254 * - minimize calls to the SAX 'character' callback
10255 * when they are mergeable
10256 * - handle an problem for isBlank when we only parse
10257 * a sequence of blank chars and the next one is
10258 * not available to check against '<' presence.
10259 * - tries to homogenize the differences in SAX
10260 * callbacks between the push and pull versions
10261 * of the parser.
10262 */
10263 if ((ctxt->inputNr == 1) &&
10264 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
10265 if (!terminate) {
10266 if (ctxt->progressive) {
10267 if ((lastlt == NULL) ||
10268 (ctxt->input->cur > lastlt))
10269 goto done;
10270 } else if (xmlParseLookupSequence(ctxt,
10271 '<', 0, 0) < 0) {
10272 goto done;
10273 }
10274 }
10275 }
10276 ctxt->checkIndex = 0;
10277 xmlParseCharData(ctxt, 0);
10278 }
10279 /*
10280 * Pop-up of finished entities.
10281 */
10282 while ((RAW == 0) && (ctxt->inputNr > 1))
10283 xmlPopInput(ctxt);
10284 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
10285 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
10286 "detected an error in element content\n");
10287 ctxt->instate = XML_PARSER_EOF;
10288 break;
10289 }
10290 break;
10291 }
10292 case XML_PARSER_END_TAG:
10293 if (avail < 2)
10294 goto done;
10295 if (!terminate) {
10296 if (ctxt->progressive) {
10297 /* > can be found unescaped in attribute values */
10298 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
10299 goto done;
10300 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
10301 goto done;
10302 }
10303 }
10304 if (ctxt->sax2) {
10305 xmlParseEndTag2(ctxt,
10306 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 3],
10307 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 2], 0,
10308 (int) (long) ctxt->pushTab[ctxt->nameNr * 3 - 1], 0);
10309 nameNsPop(ctxt);
10310 }
10311#ifdef LIBXML_SAX1_ENABLED
10312 else
10313 xmlParseEndTag1(ctxt, 0);
10314#endif /* LIBXML_SAX1_ENABLED */
10315 if (ctxt->instate == XML_PARSER_EOF) {
10316 /* Nothing */
10317 } else if (ctxt->nameNr == 0) {
10318 ctxt->instate = XML_PARSER_EPILOG;
10319 } else {
10320 ctxt->instate = XML_PARSER_CONTENT;
10321 }
10322 break;
10323 case XML_PARSER_CDATA_SECTION: {
10324 /*
10325 * The Push mode need to have the SAX callback for
10326 * cdataBlock merge back contiguous callbacks.
10327 */
10328 int base;
10329
10330 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
10331 if (base < 0) {
10332 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
10333 int tmp;
10334
10335 tmp = xmlCheckCdataPush(ctxt->input->cur,
10336 XML_PARSER_BIG_BUFFER_SIZE);
10337 if (tmp < 0) {
10338 tmp = -tmp;
10339 ctxt->input->cur += tmp;
10340 goto encoding_error;
10341 }
10342 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
10343 if (ctxt->sax->cdataBlock != NULL)
10344 ctxt->sax->cdataBlock(ctxt->userData,
10345 ctxt->input->cur, tmp);
10346 else if (ctxt->sax->characters != NULL)
10347 ctxt->sax->characters(ctxt->userData,
10348 ctxt->input->cur, tmp);
10349 }
10350 SKIPL(tmp);
10351 ctxt->checkIndex = 0;
10352 }
10353 goto done;
10354 } else {
10355 int tmp;
10356
10357 tmp = xmlCheckCdataPush(ctxt->input->cur, base);
10358 if ((tmp < 0) || (tmp != base)) {
10359 tmp = -tmp;
10360 ctxt->input->cur += tmp;
10361 goto encoding_error;
10362 }
10363 if ((ctxt->sax != NULL) && (base > 0) &&
10364 (!ctxt->disableSAX)) {
10365 if (ctxt->sax->cdataBlock != NULL)
10366 ctxt->sax->cdataBlock(ctxt->userData,
10367 ctxt->input->cur, base);
10368 else if (ctxt->sax->characters != NULL)
10369 ctxt->sax->characters(ctxt->userData,
10370 ctxt->input->cur, base);
10371 }
10372 SKIPL(base + 3);
10373 ctxt->checkIndex = 0;
10374 ctxt->instate = XML_PARSER_CONTENT;
10375#ifdef DEBUG_PUSH
10376 xmlGenericError(xmlGenericErrorContext,
10377 "PP: entering CONTENT\n");
10378#endif
10379 }
10380 break;
10381 }
10382 case XML_PARSER_MISC:
10383 SKIP_BLANKS;
10384 if (ctxt->input->buf == NULL)
10385 avail = ctxt->input->length -
10386 (ctxt->input->cur - ctxt->input->base);
10387 else
10388 avail = ctxt->input->buf->buffer->use -
10389 (ctxt->input->cur - ctxt->input->base);
10390 if (avail < 2)
10391 goto done;
10392 cur = ctxt->input->cur[0];
10393 next = ctxt->input->cur[1];
10394 if ((cur == '<') && (next == '?')) {
10395 if ((!terminate) &&
10396 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10397 goto done;
10398#ifdef DEBUG_PUSH
10399 xmlGenericError(xmlGenericErrorContext,
10400 "PP: Parsing PI\n");
10401#endif
10402 xmlParsePI(ctxt);
10403 ctxt->checkIndex = 0;
10404 } else if ((cur == '<') && (next == '!') &&
10405 (ctxt->input->cur[2] == '-') &&
10406 (ctxt->input->cur[3] == '-')) {
10407 if ((!terminate) &&
10408 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
10409 goto done;
10410#ifdef DEBUG_PUSH
10411 xmlGenericError(xmlGenericErrorContext,
10412 "PP: Parsing Comment\n");
10413#endif
10414 xmlParseComment(ctxt);
10415 ctxt->instate = XML_PARSER_MISC;
10416 ctxt->checkIndex = 0;
10417 } else if ((cur == '<') && (next == '!') &&
10418 (ctxt->input->cur[2] == 'D') &&
10419 (ctxt->input->cur[3] == 'O') &&
10420 (ctxt->input->cur[4] == 'C') &&
10421 (ctxt->input->cur[5] == 'T') &&
10422 (ctxt->input->cur[6] == 'Y') &&
10423 (ctxt->input->cur[7] == 'P') &&
10424 (ctxt->input->cur[8] == 'E')) {
10425 if ((!terminate) &&
10426 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
10427 goto done;
10428#ifdef DEBUG_PUSH
10429 xmlGenericError(xmlGenericErrorContext,
10430 "PP: Parsing internal subset\n");
10431#endif
10432 ctxt->inSubset = 1;
10433 xmlParseDocTypeDecl(ctxt);
10434 if (RAW == '[') {
10435 ctxt->instate = XML_PARSER_DTD;
10436#ifdef DEBUG_PUSH
10437 xmlGenericError(xmlGenericErrorContext,
10438 "PP: entering DTD\n");
10439#endif
10440 } else {
10441 /*
10442 * Create and update the external subset.
10443 */
10444 ctxt->inSubset = 2;
10445 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
10446 (ctxt->sax->externalSubset != NULL))
10447 ctxt->sax->externalSubset(ctxt->userData,
10448 ctxt->intSubName, ctxt->extSubSystem,
10449 ctxt->extSubURI);
10450 ctxt->inSubset = 0;
10451 xmlCleanSpecialAttr(ctxt);
10452 ctxt->instate = XML_PARSER_PROLOG;
10453#ifdef DEBUG_PUSH
10454 xmlGenericError(xmlGenericErrorContext,
10455 "PP: entering PROLOG\n");
10456#endif
10457 }
10458 } else if ((cur == '<') && (next == '!') &&
10459 (avail < 9)) {
10460 goto done;
10461 } else {
10462 ctxt->instate = XML_PARSER_START_TAG;
10463 ctxt->progressive = 1;
10464 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
10465#ifdef DEBUG_PUSH
10466 xmlGenericError(xmlGenericErrorContext,
10467 "PP: entering START_TAG\n");
10468#endif
10469 }
10470 break;
10471 case XML_PARSER_PROLOG:
10472 SKIP_BLANKS;
10473 if (ctxt->input->buf == NULL)
10474 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
10475 else
10476 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
10477 if (avail < 2)
10478 goto done;
10479 cur = ctxt->input->cur[0];
10480 next = ctxt->input->cur[1];
10481 if ((cur == '<') && (next == '?')) {
10482 if ((!terminate) &&
10483 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10484 goto done;
10485#ifdef DEBUG_PUSH
10486 xmlGenericError(xmlGenericErrorContext,
10487 "PP: Parsing PI\n");
10488#endif
10489 xmlParsePI(ctxt);
10490 } else if ((cur == '<') && (next == '!') &&
10491 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
10492 if ((!terminate) &&
10493 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
10494 goto done;
10495#ifdef DEBUG_PUSH
10496 xmlGenericError(xmlGenericErrorContext,
10497 "PP: Parsing Comment\n");
10498#endif
10499 xmlParseComment(ctxt);
10500 ctxt->instate = XML_PARSER_PROLOG;
10501 } else if ((cur == '<') && (next == '!') &&
10502 (avail < 4)) {
10503 goto done;
10504 } else {
10505 ctxt->instate = XML_PARSER_START_TAG;
10506 if (ctxt->progressive == 0)
10507 ctxt->progressive = 1;
10508 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
10509#ifdef DEBUG_PUSH
10510 xmlGenericError(xmlGenericErrorContext,
10511 "PP: entering START_TAG\n");
10512#endif
10513 }
10514 break;
10515 case XML_PARSER_EPILOG:
10516 SKIP_BLANKS;
10517 if (ctxt->input->buf == NULL)
10518 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
10519 else
10520 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
10521 if (avail < 2)
10522 goto done;
10523 cur = ctxt->input->cur[0];
10524 next = ctxt->input->cur[1];
10525 if ((cur == '<') && (next == '?')) {
10526 if ((!terminate) &&
10527 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10528 goto done;
10529#ifdef DEBUG_PUSH
10530 xmlGenericError(xmlGenericErrorContext,
10531 "PP: Parsing PI\n");
10532#endif
10533 xmlParsePI(ctxt);
10534 ctxt->instate = XML_PARSER_EPILOG;
10535 } else if ((cur == '<') && (next == '!') &&
10536 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
10537 if ((!terminate) &&
10538 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
10539 goto done;
10540#ifdef DEBUG_PUSH
10541 xmlGenericError(xmlGenericErrorContext,
10542 "PP: Parsing Comment\n");
10543#endif
10544 xmlParseComment(ctxt);
10545 ctxt->instate = XML_PARSER_EPILOG;
10546 } else if ((cur == '<') && (next == '!') &&
10547 (avail < 4)) {
10548 goto done;
10549 } else {
10550 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
10551 ctxt->instate = XML_PARSER_EOF;
10552#ifdef DEBUG_PUSH
10553 xmlGenericError(xmlGenericErrorContext,
10554 "PP: entering EOF\n");
10555#endif
10556 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10557 ctxt->sax->endDocument(ctxt->userData);
10558 goto done;
10559 }
10560 break;
10561 case XML_PARSER_DTD: {
10562 /*
10563 * Sorry but progressive parsing of the internal subset
10564 * is not expected to be supported. We first check that
10565 * the full content of the internal subset is available and
10566 * the parsing is launched only at that point.
10567 * Internal subset ends up with "']' S? '>'" in an unescaped
10568 * section and not in a ']]>' sequence which are conditional
10569 * sections (whoever argued to keep that crap in XML deserve
10570 * a place in hell !).
10571 */
10572 int base, i;
10573 xmlChar *buf;
10574 xmlChar quote = 0;
10575
10576 base = ctxt->input->cur - ctxt->input->base;
10577 if (base < 0) return(0);
10578 if (ctxt->checkIndex > base)
10579 base = ctxt->checkIndex;
10580 buf = ctxt->input->buf->buffer->content;
10581 for (;(unsigned int) base < ctxt->input->buf->buffer->use;
10582 base++) {
10583 if (quote != 0) {
10584 if (buf[base] == quote)
10585 quote = 0;
10586 continue;
10587 }
10588 if ((quote == 0) && (buf[base] == '<')) {
10589 int found = 0;
10590 /* special handling of comments */
10591 if (((unsigned int) base + 4 <
10592 ctxt->input->buf->buffer->use) &&
10593 (buf[base + 1] == '!') &&
10594 (buf[base + 2] == '-') &&
10595 (buf[base + 3] == '-')) {
10596 for (;(unsigned int) base + 3 <
10597 ctxt->input->buf->buffer->use; base++) {
10598 if ((buf[base] == '-') &&
10599 (buf[base + 1] == '-') &&
10600 (buf[base + 2] == '>')) {
10601 found = 1;
10602 base += 2;
10603 break;
10604 }
10605 }
10606 if (!found) {
10607#if 0
10608 fprintf(stderr, "unfinished comment\n");
10609#endif
10610 break; /* for */
10611 }
10612 continue;
10613 }
10614 }
10615 if (buf[base] == '"') {
10616 quote = '"';
10617 continue;
10618 }
10619 if (buf[base] == '\'') {
10620 quote = '\'';
10621 continue;
10622 }
10623 if (buf[base] == ']') {
10624#if 0
10625 fprintf(stderr, "%c%c%c%c: ", buf[base],
10626 buf[base + 1], buf[base + 2], buf[base + 3]);
10627#endif
10628 if ((unsigned int) base +1 >=
10629 ctxt->input->buf->buffer->use)
10630 break;
10631 if (buf[base + 1] == ']') {
10632 /* conditional crap, skip both ']' ! */
10633 base++;
10634 continue;
10635 }
10636 for (i = 1;
10637 (unsigned int) base + i < ctxt->input->buf->buffer->use;
10638 i++) {
10639 if (buf[base + i] == '>') {
10640#if 0
10641 fprintf(stderr, "found\n");
10642#endif
10643 goto found_end_int_subset;
10644 }
10645 if (!IS_BLANK_CH(buf[base + i])) {
10646#if 0
10647 fprintf(stderr, "not found\n");
10648#endif
10649 goto not_end_of_int_subset;
10650 }
10651 }
10652#if 0
10653 fprintf(stderr, "end of stream\n");
10654#endif
10655 break;
10656
10657 }
10658not_end_of_int_subset:
10659 continue; /* for */
10660 }
10661 /*
10662 * We didn't found the end of the Internal subset
10663 */
10664#ifdef DEBUG_PUSH
10665 if (next == 0)
10666 xmlGenericError(xmlGenericErrorContext,
10667 "PP: lookup of int subset end filed\n");
10668#endif
10669 goto done;
10670
10671found_end_int_subset:
10672 xmlParseInternalSubset(ctxt);
10673 ctxt->inSubset = 2;
10674 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
10675 (ctxt->sax->externalSubset != NULL))
10676 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10677 ctxt->extSubSystem, ctxt->extSubURI);
10678 ctxt->inSubset = 0;
10679 xmlCleanSpecialAttr(ctxt);
10680 ctxt->instate = XML_PARSER_PROLOG;
10681 ctxt->checkIndex = 0;
10682#ifdef DEBUG_PUSH
10683 xmlGenericError(xmlGenericErrorContext,
10684 "PP: entering PROLOG\n");
10685#endif
10686 break;
10687 }
10688 case XML_PARSER_COMMENT:
10689 xmlGenericError(xmlGenericErrorContext,
10690 "PP: internal error, state == COMMENT\n");
10691 ctxt->instate = XML_PARSER_CONTENT;
10692#ifdef DEBUG_PUSH
10693 xmlGenericError(xmlGenericErrorContext,
10694 "PP: entering CONTENT\n");
10695#endif
10696 break;
10697 case XML_PARSER_IGNORE:
10698 xmlGenericError(xmlGenericErrorContext,
10699 "PP: internal error, state == IGNORE");
10700 ctxt->instate = XML_PARSER_DTD;
10701#ifdef DEBUG_PUSH
10702 xmlGenericError(xmlGenericErrorContext,
10703 "PP: entering DTD\n");
10704#endif
10705 break;
10706 case XML_PARSER_PI:
10707 xmlGenericError(xmlGenericErrorContext,
10708 "PP: internal error, state == PI\n");
10709 ctxt->instate = XML_PARSER_CONTENT;
10710#ifdef DEBUG_PUSH
10711 xmlGenericError(xmlGenericErrorContext,
10712 "PP: entering CONTENT\n");
10713#endif
10714 break;
10715 case XML_PARSER_ENTITY_DECL:
10716 xmlGenericError(xmlGenericErrorContext,
10717 "PP: internal error, state == ENTITY_DECL\n");
10718 ctxt->instate = XML_PARSER_DTD;
10719#ifdef DEBUG_PUSH
10720 xmlGenericError(xmlGenericErrorContext,
10721 "PP: entering DTD\n");
10722#endif
10723 break;
10724 case XML_PARSER_ENTITY_VALUE:
10725 xmlGenericError(xmlGenericErrorContext,
10726 "PP: internal error, state == ENTITY_VALUE\n");
10727 ctxt->instate = XML_PARSER_CONTENT;
10728#ifdef DEBUG_PUSH
10729 xmlGenericError(xmlGenericErrorContext,
10730 "PP: entering DTD\n");
10731#endif
10732 break;
10733 case XML_PARSER_ATTRIBUTE_VALUE:
10734 xmlGenericError(xmlGenericErrorContext,
10735 "PP: internal error, state == ATTRIBUTE_VALUE\n");
10736 ctxt->instate = XML_PARSER_START_TAG;
10737#ifdef DEBUG_PUSH
10738 xmlGenericError(xmlGenericErrorContext,
10739 "PP: entering START_TAG\n");
10740#endif
10741 break;
10742 case XML_PARSER_SYSTEM_LITERAL:
10743 xmlGenericError(xmlGenericErrorContext,
10744 "PP: internal error, state == SYSTEM_LITERAL\n");
10745 ctxt->instate = XML_PARSER_START_TAG;
10746#ifdef DEBUG_PUSH
10747 xmlGenericError(xmlGenericErrorContext,
10748 "PP: entering START_TAG\n");
10749#endif
10750 break;
10751 case XML_PARSER_PUBLIC_LITERAL:
10752 xmlGenericError(xmlGenericErrorContext,
10753 "PP: internal error, state == PUBLIC_LITERAL\n");
10754 ctxt->instate = XML_PARSER_START_TAG;
10755#ifdef DEBUG_PUSH
10756 xmlGenericError(xmlGenericErrorContext,
10757 "PP: entering START_TAG\n");
10758#endif
10759 break;
10760 }
10761 }
10762done:
10763#ifdef DEBUG_PUSH
10764 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
10765#endif
10766 return(ret);
10767encoding_error:
10768 {
10769 char buffer[150];
10770
10771 snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
10772 ctxt->input->cur[0], ctxt->input->cur[1],
10773 ctxt->input->cur[2], ctxt->input->cur[3]);
10774 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
10775 "Input is not proper UTF-8, indicate encoding !\n%s",
10776 BAD_CAST buffer, NULL);
10777 }
10778 return(0);
10779}
10780
10781/**
10782 * xmlParseChunk:
10783 * @ctxt: an XML parser context
10784 * @chunk: an char array
10785 * @size: the size in byte of the chunk
10786 * @terminate: last chunk indicator
10787 *
10788 * Parse a Chunk of memory
10789 *
10790 * Returns zero if no error, the xmlParserErrors otherwise.
10791 */
10792int
10793xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
10794 int terminate) {
10795 int end_in_lf = 0;
10796
10797 if (ctxt == NULL)
10798 return(XML_ERR_INTERNAL_ERROR);
10799 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
10800 return(ctxt->errNo);
10801 if (ctxt->instate == XML_PARSER_START)
10802 xmlDetectSAX2(ctxt);
10803 if ((size > 0) && (chunk != NULL) && (!terminate) &&
10804 (chunk[size - 1] == '\r')) {
10805 end_in_lf = 1;
10806 size--;
10807 }
10808 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
10809 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
10810 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
10811 int cur = ctxt->input->cur - ctxt->input->base;
10812 int res;
10813
10814 res =xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
10815 if (res < 0) {
10816 ctxt->errNo = XML_PARSER_EOF;
10817 ctxt->disableSAX = 1;
10818 return (XML_PARSER_EOF);
10819 }
10820 ctxt->input->base = ctxt->input->buf->buffer->content + base;
10821 ctxt->input->cur = ctxt->input->base + cur;
10822 ctxt->input->end =
10823 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
10824#ifdef DEBUG_PUSH
10825 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
10826#endif
10827
10828 } else if (ctxt->instate != XML_PARSER_EOF) {
10829 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
10830 xmlParserInputBufferPtr in = ctxt->input->buf;
10831 if ((in->encoder != NULL) && (in->buffer != NULL) &&
10832 (in->raw != NULL)) {
10833 int nbchars;
10834
10835 nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
10836 if (nbchars < 0) {
10837 /* TODO 2.6.0 */
10838 xmlGenericError(xmlGenericErrorContext,
10839 "xmlParseChunk: encoder error\n");
10840 return(XML_ERR_INVALID_ENCODING);
10841 }
10842 }
10843 }
10844 }
10845 xmlParseTryOrFinish(ctxt, terminate);
10846 if ((end_in_lf == 1) && (ctxt->input != NULL) &&
10847 (ctxt->input->buf != NULL)) {
10848 xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
10849 }
10850 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
10851 return(ctxt->errNo);
10852 if (terminate) {
10853 /*
10854 * Check for termination
10855 */
10856 int avail = 0;
10857
10858 if (ctxt->input != NULL) {
10859 if (ctxt->input->buf == NULL)
10860 avail = ctxt->input->length -
10861 (ctxt->input->cur - ctxt->input->base);
10862 else
10863 avail = ctxt->input->buf->buffer->use -
10864 (ctxt->input->cur - ctxt->input->base);
10865 }
10866
10867 if ((ctxt->instate != XML_PARSER_EOF) &&
10868 (ctxt->instate != XML_PARSER_EPILOG)) {
10869 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
10870 }
10871 if ((ctxt->instate == XML_PARSER_EPILOG) && (avail > 0)) {
10872 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
10873 }
10874 if (ctxt->instate != XML_PARSER_EOF) {
10875 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10876 ctxt->sax->endDocument(ctxt->userData);
10877 }
10878 ctxt->instate = XML_PARSER_EOF;
10879 }
10880 return((xmlParserErrors) ctxt->errNo);
10881}
10882
10883/************************************************************************
10884 * *
10885 * I/O front end functions to the parser *
10886 * *
10887 ************************************************************************/
10888
10889/**
10890 * xmlCreatePushParserCtxt:
10891 * @sax: a SAX handler
10892 * @user_data: The user data returned on SAX callbacks
10893 * @chunk: a pointer to an array of chars
10894 * @size: number of chars in the array
10895 * @filename: an optional file name or URI
10896 *
10897 * Create a parser context for using the XML parser in push mode.
10898 * If @buffer and @size are non-NULL, the data is used to detect
10899 * the encoding. The remaining characters will be parsed so they
10900 * don't need to be fed in again through xmlParseChunk.
10901 * To allow content encoding detection, @size should be >= 4
10902 * The value of @filename is used for fetching external entities
10903 * and error/warning reports.
10904 *
10905 * Returns the new parser context or NULL
10906 */
10907
10908xmlParserCtxtPtr
10909xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
10910 const char *chunk, int size, const char *filename) {
10911 xmlParserCtxtPtr ctxt;
10912 xmlParserInputPtr inputStream;
10913 xmlParserInputBufferPtr buf;
10914 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
10915
10916 /*
10917 * plug some encoding conversion routines
10918 */
10919 if ((chunk != NULL) && (size >= 4))
10920 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
10921
10922 buf = xmlAllocParserInputBuffer(enc);
10923 if (buf == NULL) return(NULL);
10924
10925 ctxt = xmlNewParserCtxt();
10926 if (ctxt == NULL) {
10927 xmlErrMemory(NULL, "creating parser: out of memory\n");
10928 xmlFreeParserInputBuffer(buf);
10929 return(NULL);
10930 }
10931 ctxt->dictNames = 1;
10932 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * sizeof(xmlChar *));
10933 if (ctxt->pushTab == NULL) {
10934 xmlErrMemory(ctxt, NULL);
10935 xmlFreeParserInputBuffer(buf);
10936 xmlFreeParserCtxt(ctxt);
10937 return(NULL);
10938 }
10939 if (sax != NULL) {
10940#ifdef LIBXML_SAX1_ENABLED
10941 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
10942#endif /* LIBXML_SAX1_ENABLED */
10943 xmlFree(ctxt->sax);
10944 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
10945 if (ctxt->sax == NULL) {
10946 xmlErrMemory(ctxt, NULL);
10947 xmlFreeParserInputBuffer(buf);
10948 xmlFreeParserCtxt(ctxt);
10949 return(NULL);
10950 }
10951 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
10952 if (sax->initialized == XML_SAX2_MAGIC)
10953 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
10954 else
10955 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
10956 if (user_data != NULL)
10957 ctxt->userData = user_data;
10958 }
10959 if (filename == NULL) {
10960 ctxt->directory = NULL;
10961 } else {
10962 ctxt->directory = xmlParserGetDirectory(filename);
10963 }
10964
10965 inputStream = xmlNewInputStream(ctxt);
10966 if (inputStream == NULL) {
10967 xmlFreeParserCtxt(ctxt);
10968 xmlFreeParserInputBuffer(buf);
10969 return(NULL);
10970 }
10971
10972 if (filename == NULL)
10973 inputStream->filename = NULL;
10974 else {
10975 inputStream->filename = (char *)
10976 xmlCanonicPath((const xmlChar *) filename);
10977 if (inputStream->filename == NULL) {
10978 xmlFreeParserCtxt(ctxt);
10979 xmlFreeParserInputBuffer(buf);
10980 return(NULL);
10981 }
10982 }
10983 inputStream->buf = buf;
10984 inputStream->base = inputStream->buf->buffer->content;
10985 inputStream->cur = inputStream->buf->buffer->content;
10986 inputStream->end =
10987 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
10988
10989 inputPush(ctxt, inputStream);
10990
10991 /*
10992 * If the caller didn't provide an initial 'chunk' for determining
10993 * the encoding, we set the context to XML_CHAR_ENCODING_NONE so
10994 * that it can be automatically determined later
10995 */
10996 if ((size == 0) || (chunk == NULL)) {
10997 ctxt->charset = XML_CHAR_ENCODING_NONE;
10998 } else if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) {
10999 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
11000 int cur = ctxt->input->cur - ctxt->input->base;
11001
11002 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
11003
11004 ctxt->input->base = ctxt->input->buf->buffer->content + base;
11005 ctxt->input->cur = ctxt->input->base + cur;
11006 ctxt->input->end =
11007 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
11008#ifdef DEBUG_PUSH
11009 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
11010#endif
11011 }
11012
11013 if (enc != XML_CHAR_ENCODING_NONE) {
11014 xmlSwitchEncoding(ctxt, enc);
11015 }
11016
11017 return(ctxt);
11018}
11019#endif /* LIBXML_PUSH_ENABLED */
11020
11021/**
11022 * xmlStopParser:
11023 * @ctxt: an XML parser context
11024 *
11025 * Blocks further parser processing
11026 */
11027void
11028xmlStopParser(xmlParserCtxtPtr ctxt) {
11029 if (ctxt == NULL)
11030 return;
11031 ctxt->instate = XML_PARSER_EOF;
11032 ctxt->disableSAX = 1;
11033 if (ctxt->input != NULL) {
11034 ctxt->input->cur = BAD_CAST"";
11035 ctxt->input->base = ctxt->input->cur;
11036 }
11037}
11038
11039/**
11040 * xmlCreateIOParserCtxt:
11041 * @sax: a SAX handler
11042 * @user_data: The user data returned on SAX callbacks
11043 * @ioread: an I/O read function
11044 * @ioclose: an I/O close function
11045 * @ioctx: an I/O handler
11046 * @enc: the charset encoding if known
11047 *
11048 * Create a parser context for using the XML parser with an existing
11049 * I/O stream
11050 *
11051 * Returns the new parser context or NULL
11052 */
11053xmlParserCtxtPtr
11054xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
11055 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
11056 void *ioctx, xmlCharEncoding enc) {
11057 xmlParserCtxtPtr ctxt;
11058 xmlParserInputPtr inputStream;
11059 xmlParserInputBufferPtr buf;
11060
11061 if (ioread == NULL) return(NULL);
11062
11063 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
11064 if (buf == NULL) return(NULL);
11065
11066 ctxt = xmlNewParserCtxt();
11067 if (ctxt == NULL) {
11068 xmlFreeParserInputBuffer(buf);
11069 return(NULL);
11070 }
11071 if (sax != NULL) {
11072#ifdef LIBXML_SAX1_ENABLED
11073 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
11074#endif /* LIBXML_SAX1_ENABLED */
11075 xmlFree(ctxt->sax);
11076 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
11077 if (ctxt->sax == NULL) {
11078 xmlErrMemory(ctxt, NULL);
11079 xmlFreeParserCtxt(ctxt);
11080 return(NULL);
11081 }
11082 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
11083 if (sax->initialized == XML_SAX2_MAGIC)
11084 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
11085 else
11086 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
11087 if (user_data != NULL)
11088 ctxt->userData = user_data;
11089 }
11090
11091 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
11092 if (inputStream == NULL) {
11093 xmlFreeParserCtxt(ctxt);
11094 return(NULL);
11095 }
11096 inputPush(ctxt, inputStream);
11097
11098 return(ctxt);
11099}
11100
11101#ifdef LIBXML_VALID_ENABLED
11102/************************************************************************
11103 * *
11104 * Front ends when parsing a DTD *
11105 * *
11106 ************************************************************************/
11107
11108/**
11109 * xmlIOParseDTD:
11110 * @sax: the SAX handler block or NULL
11111 * @input: an Input Buffer
11112 * @enc: the charset encoding if known
11113 *
11114 * Load and parse a DTD
11115 *
11116 * Returns the resulting xmlDtdPtr or NULL in case of error.
11117 * @input will be freed by the function in any case.
11118 */
11119
11120xmlDtdPtr
11121xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
11122 xmlCharEncoding enc) {
11123 xmlDtdPtr ret = NULL;
11124 xmlParserCtxtPtr ctxt;
11125 xmlParserInputPtr pinput = NULL;
11126 xmlChar start[4];
11127
11128 if (input == NULL)
11129 return(NULL);
11130
11131 ctxt = xmlNewParserCtxt();
11132 if (ctxt == NULL) {
11133 xmlFreeParserInputBuffer(input);
11134 return(NULL);
11135 }
11136
11137 /*
11138 * Set-up the SAX context
11139 */
11140 if (sax != NULL) {
11141 if (ctxt->sax != NULL)
11142 xmlFree(ctxt->sax);
11143 ctxt->sax = sax;
11144 ctxt->userData = ctxt;
11145 }
11146 xmlDetectSAX2(ctxt);
11147
11148 /*
11149 * generate a parser input from the I/O handler
11150 */
11151
11152 pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
11153 if (pinput == NULL) {
11154 if (sax != NULL) ctxt->sax = NULL;
11155 xmlFreeParserInputBuffer(input);
11156 xmlFreeParserCtxt(ctxt);
11157 return(NULL);
11158 }
11159
11160 /*
11161 * plug some encoding conversion routines here.
11162 */
11163 xmlPushInput(ctxt, pinput);
11164 if (enc != XML_CHAR_ENCODING_NONE) {
11165 xmlSwitchEncoding(ctxt, enc);
11166 }
11167
11168 pinput->filename = NULL;
11169 pinput->line = 1;
11170 pinput->col = 1;
11171 pinput->base = ctxt->input->cur;
11172 pinput->cur = ctxt->input->cur;
11173 pinput->free = NULL;
11174
11175 /*
11176 * let's parse that entity knowing it's an external subset.
11177 */
11178 ctxt->inSubset = 2;
11179 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
11180 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
11181 BAD_CAST "none", BAD_CAST "none");
11182
11183 if ((enc == XML_CHAR_ENCODING_NONE) &&
11184 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
11185 /*
11186 * Get the 4 first bytes and decode the charset
11187 * if enc != XML_CHAR_ENCODING_NONE
11188 * plug some encoding conversion routines.
11189 */
11190 start[0] = RAW;
11191 start[1] = NXT(1);
11192 start[2] = NXT(2);
11193 start[3] = NXT(3);
11194 enc = xmlDetectCharEncoding(start, 4);
11195 if (enc != XML_CHAR_ENCODING_NONE) {
11196 xmlSwitchEncoding(ctxt, enc);
11197 }
11198 }
11199
11200 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
11201
11202 if (ctxt->myDoc != NULL) {
11203 if (ctxt->wellFormed) {
11204 ret = ctxt->myDoc->extSubset;
11205 ctxt->myDoc->extSubset = NULL;
11206 if (ret != NULL) {
11207 xmlNodePtr tmp;
11208
11209 ret->doc = NULL;
11210 tmp = ret->children;
11211 while (tmp != NULL) {
11212 tmp->doc = NULL;
11213 tmp = tmp->next;
11214 }
11215 }
11216 } else {
11217 ret = NULL;
11218 }
11219 xmlFreeDoc(ctxt->myDoc);
11220 ctxt->myDoc = NULL;
11221 }
11222 if (sax != NULL) ctxt->sax = NULL;
11223 xmlFreeParserCtxt(ctxt);
11224
11225 return(ret);
11226}
11227
11228/**
11229 * xmlSAXParseDTD:
11230 * @sax: the SAX handler block
11231 * @ExternalID: a NAME* containing the External ID of the DTD
11232 * @SystemID: a NAME* containing the URL to the DTD
11233 *
11234 * Load and parse an external subset.
11235 *
11236 * Returns the resulting xmlDtdPtr or NULL in case of error.
11237 */
11238
11239xmlDtdPtr
11240xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
11241 const xmlChar *SystemID) {
11242 xmlDtdPtr ret = NULL;
11243 xmlParserCtxtPtr ctxt;
11244 xmlParserInputPtr input = NULL;
11245 xmlCharEncoding enc;
11246 xmlChar* systemIdCanonic;
11247
11248 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
11249
11250 ctxt = xmlNewParserCtxt();
11251 if (ctxt == NULL) {
11252 return(NULL);
11253 }
11254
11255 /*
11256 * Set-up the SAX context
11257 */
11258 if (sax != NULL) {
11259 if (ctxt->sax != NULL)
11260 xmlFree(ctxt->sax);
11261 ctxt->sax = sax;
11262 ctxt->userData = ctxt;
11263 }
11264
11265 /*
11266 * Canonicalise the system ID
11267 */
11268 systemIdCanonic = xmlCanonicPath(SystemID);
11269 if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
11270 xmlFreeParserCtxt(ctxt);
11271 return(NULL);
11272 }
11273
11274 /*
11275 * Ask the Entity resolver to load the damn thing
11276 */
11277
11278 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
11279 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
11280 systemIdCanonic);
11281 if (input == NULL) {
11282 if (sax != NULL) ctxt->sax = NULL;
11283 xmlFreeParserCtxt(ctxt);
11284 if (systemIdCanonic != NULL)
11285 xmlFree(systemIdCanonic);
11286 return(NULL);
11287 }
11288
11289 /*
11290 * plug some encoding conversion routines here.
11291 */
11292 xmlPushInput(ctxt, input);
11293 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
11294 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
11295 xmlSwitchEncoding(ctxt, enc);
11296 }
11297
11298 if (input->filename == NULL)
11299 input->filename = (char *) systemIdCanonic;
11300 else
11301 xmlFree(systemIdCanonic);
11302 input->line = 1;
11303 input->col = 1;
11304 input->base = ctxt->input->cur;
11305 input->cur = ctxt->input->cur;
11306 input->free = NULL;
11307
11308 /*
11309 * let's parse that entity knowing it's an external subset.
11310 */
11311 ctxt->inSubset = 2;
11312 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
11313 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
11314 ExternalID, SystemID);
11315 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
11316
11317 if (ctxt->myDoc != NULL) {
11318 if (ctxt->wellFormed) {
11319 ret = ctxt->myDoc->extSubset;
11320 ctxt->myDoc->extSubset = NULL;
11321 if (ret != NULL) {
11322 xmlNodePtr tmp;
11323
11324 ret->doc = NULL;
11325 tmp = ret->children;
11326 while (tmp != NULL) {
11327 tmp->doc = NULL;
11328 tmp = tmp->next;
11329 }
11330 }
11331 } else {
11332 ret = NULL;
11333 }
11334 xmlFreeDoc(ctxt->myDoc);
11335 ctxt->myDoc = NULL;
11336 }
11337 if (sax != NULL) ctxt->sax = NULL;
11338 xmlFreeParserCtxt(ctxt);
11339
11340 return(ret);
11341}
11342
11343
11344/**
11345 * xmlParseDTD:
11346 * @ExternalID: a NAME* containing the External ID of the DTD
11347 * @SystemID: a NAME* containing the URL to the DTD
11348 *
11349 * Load and parse an external subset.
11350 *
11351 * Returns the resulting xmlDtdPtr or NULL in case of error.
11352 */
11353
11354xmlDtdPtr
11355xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
11356 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
11357}
11358#endif /* LIBXML_VALID_ENABLED */
11359
11360/************************************************************************
11361 * *
11362 * Front ends when parsing an Entity *
11363 * *
11364 ************************************************************************/
11365
11366/**
11367 * xmlParseCtxtExternalEntity:
11368 * @ctx: the existing parsing context
11369 * @URL: the URL for the entity to load
11370 * @ID: the System ID for the entity to load
11371 * @lst: the return value for the set of parsed nodes
11372 *
11373 * Parse an external general entity within an existing parsing context
11374 * An external general parsed entity is well-formed if it matches the
11375 * production labeled extParsedEnt.
11376 *
11377 * [78] extParsedEnt ::= TextDecl? content
11378 *
11379 * Returns 0 if the entity is well formed, -1 in case of args problem and
11380 * the parser error code otherwise
11381 */
11382
11383int
11384xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
11385 const xmlChar *ID, xmlNodePtr *lst) {
11386 xmlParserCtxtPtr ctxt;
11387 xmlDocPtr newDoc;
11388 xmlNodePtr newRoot;
11389 xmlSAXHandlerPtr oldsax = NULL;
11390 int ret = 0;
11391 xmlChar start[4];
11392 xmlCharEncoding enc;
11393 xmlParserInputPtr inputStream;
11394 char *directory = NULL;
11395
11396 if (ctx == NULL) return(-1);
11397
11398 if (ctx->depth > 40) {
11399 return(XML_ERR_ENTITY_LOOP);
11400 }
11401
11402 if (lst != NULL)
11403 *lst = NULL;
11404 if ((URL == NULL) && (ID == NULL))
11405 return(-1);
11406 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
11407 return(-1);
11408
11409 ctxt = xmlNewParserCtxt();
11410 if (ctxt == NULL) {
11411 return(-1);
11412 }
11413
11414 ctxt->userData = ctxt;
11415 ctxt->_private = ctx->_private;
11416
11417 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
11418 if (inputStream == NULL) {
11419 xmlFreeParserCtxt(ctxt);
11420 return(-1);
11421 }
11422
11423 inputPush(ctxt, inputStream);
11424
11425 if ((ctxt->directory == NULL) && (directory == NULL))
11426 directory = xmlParserGetDirectory((char *)URL);
11427 if ((ctxt->directory == NULL) && (directory != NULL))
11428 ctxt->directory = directory;
11429
11430 oldsax = ctxt->sax;
11431 ctxt->sax = ctx->sax;
11432 xmlDetectSAX2(ctxt);
11433 newDoc = xmlNewDoc(BAD_CAST "1.0");
11434 if (newDoc == NULL) {
11435 xmlFreeParserCtxt(ctxt);
11436 return(-1);
11437 }
11438 if (ctx->myDoc->dict) {
11439 newDoc->dict = ctx->myDoc->dict;
11440 xmlDictReference(newDoc->dict);
11441 }
11442 if (ctx->myDoc != NULL) {
11443 newDoc->intSubset = ctx->myDoc->intSubset;
11444 newDoc->extSubset = ctx->myDoc->extSubset;
11445 }
11446 if (ctx->myDoc->URL != NULL) {
11447 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
11448 }
11449 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
11450 if (newRoot == NULL) {
11451 ctxt->sax = oldsax;
11452 xmlFreeParserCtxt(ctxt);
11453 newDoc->intSubset = NULL;
11454 newDoc->extSubset = NULL;
11455 xmlFreeDoc(newDoc);
11456 return(-1);
11457 }
11458 xmlAddChild((xmlNodePtr) newDoc, newRoot);
11459 nodePush(ctxt, newDoc->children);
11460 if (ctx->myDoc == NULL) {
11461 ctxt->myDoc = newDoc;
11462 } else {
11463 ctxt->myDoc = ctx->myDoc;
11464 newDoc->children->doc = ctx->myDoc;
11465 }
11466
11467 /*
11468 * Get the 4 first bytes and decode the charset
11469 * if enc != XML_CHAR_ENCODING_NONE
11470 * plug some encoding conversion routines.
11471 */
11472 GROW
11473 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
11474 start[0] = RAW;
11475 start[1] = NXT(1);
11476 start[2] = NXT(2);
11477 start[3] = NXT(3);
11478 enc = xmlDetectCharEncoding(start, 4);
11479 if (enc != XML_CHAR_ENCODING_NONE) {
11480 xmlSwitchEncoding(ctxt, enc);
11481 }
11482 }
11483
11484 /*
11485 * Parse a possible text declaration first
11486 */
11487 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
11488 xmlParseTextDecl(ctxt);
11489 }
11490
11491 /*
11492 * Doing validity checking on chunk doesn't make sense
11493 */
11494 ctxt->instate = XML_PARSER_CONTENT;
11495 ctxt->validate = ctx->validate;
11496 ctxt->valid = ctx->valid;
11497 ctxt->loadsubset = ctx->loadsubset;
11498 ctxt->depth = ctx->depth + 1;
11499 ctxt->replaceEntities = ctx->replaceEntities;
11500 if (ctxt->validate) {
11501 ctxt->vctxt.error = ctx->vctxt.error;
11502 ctxt->vctxt.warning = ctx->vctxt.warning;
11503 } else {
11504 ctxt->vctxt.error = NULL;
11505 ctxt->vctxt.warning = NULL;
11506 }
11507 ctxt->vctxt.nodeTab = NULL;
11508 ctxt->vctxt.nodeNr = 0;
11509 ctxt->vctxt.nodeMax = 0;
11510 ctxt->vctxt.node = NULL;
11511 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
11512 ctxt->dict = ctx->dict;
11513 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
11514 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
11515 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
11516 ctxt->dictNames = ctx->dictNames;
11517 ctxt->attsDefault = ctx->attsDefault;
11518 ctxt->attsSpecial = ctx->attsSpecial;
11519 ctxt->linenumbers = ctx->linenumbers;
11520
11521 xmlParseContent(ctxt);
11522
11523 ctx->validate = ctxt->validate;
11524 ctx->valid = ctxt->valid;
11525 if ((RAW == '<') && (NXT(1) == '/')) {
11526 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
11527 } else if (RAW != 0) {
11528 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
11529 }
11530 if (ctxt->node != newDoc->children) {
11531 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
11532 }
11533
11534 if (!ctxt->wellFormed) {
11535 if (ctxt->errNo == 0)
11536 ret = 1;
11537 else
11538 ret = ctxt->errNo;
11539 } else {
11540 if (lst != NULL) {
11541 xmlNodePtr cur;
11542
11543 /*
11544 * Return the newly created nodeset after unlinking it from
11545 * they pseudo parent.
11546 */
11547 cur = newDoc->children->children;
11548 *lst = cur;
11549 while (cur != NULL) {
11550 cur->parent = NULL;
11551 cur = cur->next;
11552 }
11553 newDoc->children->children = NULL;
11554 }
11555 ret = 0;
11556 }
11557 ctxt->sax = oldsax;
11558 ctxt->dict = NULL;
11559 ctxt->attsDefault = NULL;
11560 ctxt->attsSpecial = NULL;
11561 xmlFreeParserCtxt(ctxt);
11562 newDoc->intSubset = NULL;
11563 newDoc->extSubset = NULL;
11564 xmlFreeDoc(newDoc);
11565
11566 return(ret);
11567}
11568
11569/**
11570 * xmlParseExternalEntityPrivate:
11571 * @doc: the document the chunk pertains to
11572 * @oldctxt: the previous parser context if available
11573 * @sax: the SAX handler bloc (possibly NULL)
11574 * @user_data: The user data returned on SAX callbacks (possibly NULL)
11575 * @depth: Used for loop detection, use 0
11576 * @URL: the URL for the entity to load
11577 * @ID: the System ID for the entity to load
11578 * @list: the return value for the set of parsed nodes
11579 *
11580 * Private version of xmlParseExternalEntity()
11581 *
11582 * Returns 0 if the entity is well formed, -1 in case of args problem and
11583 * the parser error code otherwise
11584 */
11585
11586static xmlParserErrors
11587xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
11588 xmlSAXHandlerPtr sax,
11589 void *user_data, int depth, const xmlChar *URL,
11590 const xmlChar *ID, xmlNodePtr *list) {
11591 xmlParserCtxtPtr ctxt;
11592 xmlDocPtr newDoc;
11593 xmlNodePtr newRoot;
11594 xmlSAXHandlerPtr oldsax = NULL;
11595 xmlParserErrors ret = XML_ERR_OK;
11596 xmlChar start[4];
11597 xmlCharEncoding enc;
11598
11599 if (depth > 40) {
11600 return(XML_ERR_ENTITY_LOOP);
11601 }
11602
11603
11604
11605 if (list != NULL)
11606 *list = NULL;
11607 if ((URL == NULL) && (ID == NULL))
11608 return(XML_ERR_INTERNAL_ERROR);
11609 if (doc == NULL)
11610 return(XML_ERR_INTERNAL_ERROR);
11611
11612
11613 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
11614 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
11615 ctxt->userData = ctxt;
11616 if (oldctxt != NULL) {
11617 ctxt->_private = oldctxt->_private;
11618 ctxt->loadsubset = oldctxt->loadsubset;
11619 ctxt->validate = oldctxt->validate;
11620 ctxt->external = oldctxt->external;
11621 ctxt->record_info = oldctxt->record_info;
11622 ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
11623 ctxt->node_seq.length = oldctxt->node_seq.length;
11624 ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
11625 } else {
11626 /*
11627 * Doing validity checking on chunk without context
11628 * doesn't make sense
11629 */
11630 ctxt->_private = NULL;
11631 ctxt->validate = 0;
11632 ctxt->external = 2;
11633 ctxt->loadsubset = 0;
11634 }
11635 if (sax != NULL) {
11636 oldsax = ctxt->sax;
11637 ctxt->sax = sax;
11638 if (user_data != NULL)
11639 ctxt->userData = user_data;
11640 }
11641 xmlDetectSAX2(ctxt);
11642 newDoc = xmlNewDoc(BAD_CAST "1.0");
11643 if (newDoc == NULL) {
11644 ctxt->node_seq.maximum = 0;
11645 ctxt->node_seq.length = 0;
11646 ctxt->node_seq.buffer = NULL;
11647 xmlFreeParserCtxt(ctxt);
11648 return(XML_ERR_INTERNAL_ERROR);
11649 }
11650 newDoc->intSubset = doc->intSubset;
11651 newDoc->extSubset = doc->extSubset;
11652 newDoc->dict = doc->dict;
11653 xmlDictReference(newDoc->dict);
11654
11655 if (doc->URL != NULL) {
11656 newDoc->URL = xmlStrdup(doc->URL);
11657 }
11658 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
11659 if (newRoot == NULL) {
11660 if (sax != NULL)
11661 ctxt->sax = oldsax;
11662 ctxt->node_seq.maximum = 0;
11663 ctxt->node_seq.length = 0;
11664 ctxt->node_seq.buffer = NULL;
11665 xmlFreeParserCtxt(ctxt);
11666 newDoc->intSubset = NULL;
11667 newDoc->extSubset = NULL;
11668 xmlFreeDoc(newDoc);
11669 return(XML_ERR_INTERNAL_ERROR);
11670 }
11671 xmlAddChild((xmlNodePtr) newDoc, newRoot);
11672 nodePush(ctxt, newDoc->children);
11673 ctxt->myDoc = doc;
11674 newRoot->doc = doc;
11675
11676 /*
11677 * Get the 4 first bytes and decode the charset
11678 * if enc != XML_CHAR_ENCODING_NONE
11679 * plug some encoding conversion routines.
11680 */
11681 GROW;
11682 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
11683 start[0] = RAW;
11684 start[1] = NXT(1);
11685 start[2] = NXT(2);
11686 start[3] = NXT(3);
11687 enc = xmlDetectCharEncoding(start, 4);
11688 if (enc != XML_CHAR_ENCODING_NONE) {
11689 xmlSwitchEncoding(ctxt, enc);
11690 }
11691 }
11692
11693 /*
11694 * Parse a possible text declaration first
11695 */
11696 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
11697 xmlParseTextDecl(ctxt);
11698 }
11699
11700 ctxt->instate = XML_PARSER_CONTENT;
11701 ctxt->depth = depth;
11702
11703 xmlParseContent(ctxt);
11704
11705 if ((RAW == '<') && (NXT(1) == '/')) {
11706 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
11707 } else if (RAW != 0) {
11708 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
11709 }
11710 if (ctxt->node != newDoc->children) {
11711 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
11712 }
11713
11714 if (!ctxt->wellFormed) {
11715 if (ctxt->errNo == 0)
11716 ret = XML_ERR_INTERNAL_ERROR;
11717 else
11718 ret = (xmlParserErrors)ctxt->errNo;
11719 } else {
11720 if (list != NULL) {
11721 xmlNodePtr cur;
11722
11723 /*
11724 * Return the newly created nodeset after unlinking it from
11725 * they pseudo parent.
11726 */
11727 cur = newDoc->children->children;
11728 *list = cur;
11729 while (cur != NULL) {
11730 cur->parent = NULL;
11731 cur = cur->next;
11732 }
11733 newDoc->children->children = NULL;
11734 }
11735 ret = XML_ERR_OK;
11736 }
11737
11738 /*
11739 * Record in the parent context the number of entities replacement
11740 * done when parsing that reference.
11741 */
11742 oldctxt->nbentities += ctxt->nbentities;
11743 /*
11744 * Also record the size of the entity parsed
11745 */
11746 if (ctxt->input != NULL) {
11747 oldctxt->sizeentities += ctxt->input->consumed;
11748 oldctxt->sizeentities += (ctxt->input->cur - ctxt->input->base);
11749 }
11750 /*
11751 * And record the last error if any
11752 */
11753 if (ctxt->lastError.code != XML_ERR_OK)
11754 xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
11755
11756 if (sax != NULL)
11757 ctxt->sax = oldsax;
11758 oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
11759 oldctxt->node_seq.length = ctxt->node_seq.length;
11760 oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
11761 oldctxt->nbentities += ctxt->nbentities;
11762 ctxt->node_seq.maximum = 0;
11763 ctxt->node_seq.length = 0;
11764 ctxt->node_seq.buffer = NULL;
11765 xmlFreeParserCtxt(ctxt);
11766 newDoc->intSubset = NULL;
11767 newDoc->extSubset = NULL;
11768 xmlFreeDoc(newDoc);
11769
11770 return(ret);
11771}
11772
11773#ifdef LIBXML_SAX1_ENABLED
11774/**
11775 * xmlParseExternalEntity:
11776 * @doc: the document the chunk pertains to
11777 * @sax: the SAX handler bloc (possibly NULL)
11778 * @user_data: The user data returned on SAX callbacks (possibly NULL)
11779 * @depth: Used for loop detection, use 0
11780 * @URL: the URL for the entity to load
11781 * @ID: the System ID for the entity to load
11782 * @lst: the return value for the set of parsed nodes
11783 *
11784 * Parse an external general entity
11785 * An external general parsed entity is well-formed if it matches the
11786 * production labeled extParsedEnt.
11787 *
11788 * [78] extParsedEnt ::= TextDecl? content
11789 *
11790 * Returns 0 if the entity is well formed, -1 in case of args problem and
11791 * the parser error code otherwise
11792 */
11793
11794int
11795xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
11796 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
11797 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
11798 ID, lst));
11799}
11800
11801/**
11802 * xmlParseBalancedChunkMemory:
11803 * @doc: the document the chunk pertains to
11804 * @sax: the SAX handler bloc (possibly NULL)
11805 * @user_data: The user data returned on SAX callbacks (possibly NULL)
11806 * @depth: Used for loop detection, use 0
11807 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
11808 * @lst: the return value for the set of parsed nodes
11809 *
11810 * Parse a well-balanced chunk of an XML document
11811 * called by the parser
11812 * The allowed sequence for the Well Balanced Chunk is the one defined by
11813 * the content production in the XML grammar:
11814 *
11815 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
11816 *
11817 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
11818 * the parser error code otherwise
11819 */
11820
11821int
11822xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
11823 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
11824 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
11825 depth, string, lst, 0 );
11826}
11827#endif /* LIBXML_SAX1_ENABLED */
11828
11829/**
11830 * xmlParseBalancedChunkMemoryInternal:
11831 * @oldctxt: the existing parsing context
11832 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
11833 * @user_data: the user data field for the parser context
11834 * @lst: the return value for the set of parsed nodes
11835 *
11836 *
11837 * Parse a well-balanced chunk of an XML document
11838 * called by the parser
11839 * The allowed sequence for the Well Balanced Chunk is the one defined by
11840 * the content production in the XML grammar:
11841 *
11842 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
11843 *
11844 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
11845 * error code otherwise
11846 *
11847 * In case recover is set to 1, the nodelist will not be empty even if
11848 * the parsed chunk is not well balanced.
11849 */
11850static xmlParserErrors
11851xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
11852 const xmlChar *string, void *user_data, xmlNodePtr *lst) {
11853 xmlParserCtxtPtr ctxt;
11854 xmlDocPtr newDoc = NULL;
11855 xmlNodePtr newRoot;
11856 xmlSAXHandlerPtr oldsax = NULL;
11857 xmlNodePtr content = NULL;
11858 xmlNodePtr last = NULL;
11859 int size;
11860 xmlParserErrors ret = XML_ERR_OK;
11861
11862 if (oldctxt->depth > 40) {
11863 return(XML_ERR_ENTITY_LOOP);
11864 }
11865
11866
11867 if (lst != NULL)
11868 *lst = NULL;
11869 if (string == NULL)
11870 return(XML_ERR_INTERNAL_ERROR);
11871
11872 size = xmlStrlen(string);
11873
11874 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
11875 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
11876 if (user_data != NULL)
11877 ctxt->userData = user_data;
11878 else
11879 ctxt->userData = ctxt;
11880 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
11881 ctxt->dict = oldctxt->dict;
11882 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
11883 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
11884 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
11885
11886 oldsax = ctxt->sax;
11887 ctxt->sax = oldctxt->sax;
11888 xmlDetectSAX2(ctxt);
11889 ctxt->replaceEntities = oldctxt->replaceEntities;
11890 ctxt->options = oldctxt->options;
11891
11892 ctxt->_private = oldctxt->_private;
11893 if (oldctxt->myDoc == NULL) {
11894 newDoc = xmlNewDoc(BAD_CAST "1.0");
11895 if (newDoc == NULL) {
11896 ctxt->sax = oldsax;
11897 ctxt->dict = NULL;
11898 xmlFreeParserCtxt(ctxt);
11899 return(XML_ERR_INTERNAL_ERROR);
11900 }
11901 newDoc->dict = ctxt->dict;
11902 xmlDictReference(newDoc->dict);
11903 ctxt->myDoc = newDoc;
11904 } else {
11905 ctxt->myDoc = oldctxt->myDoc;
11906 content = ctxt->myDoc->children;
11907 last = ctxt->myDoc->last;
11908 }
11909 newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL);
11910 if (newRoot == NULL) {
11911 ctxt->sax = oldsax;
11912 ctxt->dict = NULL;
11913 xmlFreeParserCtxt(ctxt);
11914 if (newDoc != NULL) {
11915 xmlFreeDoc(newDoc);
11916 }
11917 return(XML_ERR_INTERNAL_ERROR);
11918 }
11919 ctxt->myDoc->children = NULL;
11920 ctxt->myDoc->last = NULL;
11921 xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot);
11922 nodePush(ctxt, ctxt->myDoc->children);
11923 ctxt->instate = XML_PARSER_CONTENT;
11924 ctxt->depth = oldctxt->depth + 1;
11925
11926 ctxt->validate = 0;
11927 ctxt->loadsubset = oldctxt->loadsubset;
11928 if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
11929 /*
11930 * ID/IDREF registration will be done in xmlValidateElement below
11931 */
11932 ctxt->loadsubset |= XML_SKIP_IDS;
11933 }
11934 ctxt->dictNames = oldctxt->dictNames;
11935 ctxt->attsDefault = oldctxt->attsDefault;
11936 ctxt->attsSpecial = oldctxt->attsSpecial;
11937
11938 xmlParseContent(ctxt);
11939 if ((RAW == '<') && (NXT(1) == '/')) {
11940 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
11941 } else if (RAW != 0) {
11942 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
11943 }
11944 if (ctxt->node != ctxt->myDoc->children) {
11945 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
11946 }
11947
11948 if (!ctxt->wellFormed) {
11949 if (ctxt->errNo == 0)
11950 ret = XML_ERR_INTERNAL_ERROR;
11951 else
11952 ret = (xmlParserErrors)ctxt->errNo;
11953 } else {
11954 ret = XML_ERR_OK;
11955 }
11956
11957 if ((lst != NULL) && (ret == XML_ERR_OK)) {
11958 xmlNodePtr cur;
11959
11960 /*
11961 * Return the newly created nodeset after unlinking it from
11962 * they pseudo parent.
11963 */
11964 cur = ctxt->myDoc->children->children;
11965 *lst = cur;
11966 while (cur != NULL) {
11967#ifdef LIBXML_VALID_ENABLED
11968 if ((oldctxt->validate) && (oldctxt->wellFormed) &&
11969 (oldctxt->myDoc) && (oldctxt->myDoc->intSubset) &&
11970 (cur->type == XML_ELEMENT_NODE)) {
11971 oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
11972 oldctxt->myDoc, cur);
11973 }
11974#endif /* LIBXML_VALID_ENABLED */
11975 cur->parent = NULL;
11976 cur = cur->next;
11977 }
11978 ctxt->myDoc->children->children = NULL;
11979 }
11980 if (ctxt->myDoc != NULL) {
11981 xmlFreeNode(ctxt->myDoc->children);
11982 ctxt->myDoc->children = content;
11983 ctxt->myDoc->last = last;
11984 }
11985
11986 /*
11987 * Record in the parent context the number of entities replacement
11988 * done when parsing that reference.
11989 */
11990 oldctxt->nbentities += ctxt->nbentities;
11991 /*
11992 * Also record the last error if any
11993 */
11994 if (ctxt->lastError.code != XML_ERR_OK)
11995 xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
11996
11997 ctxt->sax = oldsax;
11998 ctxt->dict = NULL;
11999 ctxt->attsDefault = NULL;
12000 ctxt->attsSpecial = NULL;
12001 xmlFreeParserCtxt(ctxt);
12002 if (newDoc != NULL) {
12003 xmlFreeDoc(newDoc);
12004 }
12005
12006 return(ret);
12007}
12008
12009/**
12010 * xmlParseInNodeContext:
12011 * @node: the context node
12012 * @data: the input string
12013 * @datalen: the input string length in bytes
12014 * @options: a combination of xmlParserOption
12015 * @lst: the return value for the set of parsed nodes
12016 *
12017 * Parse a well-balanced chunk of an XML document
12018 * within the context (DTD, namespaces, etc ...) of the given node.
12019 *
12020 * The allowed sequence for the data is a Well Balanced Chunk defined by
12021 * the content production in the XML grammar:
12022 *
12023 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12024 *
12025 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
12026 * error code otherwise
12027 */
12028xmlParserErrors
12029xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
12030 int options, xmlNodePtr *lst) {
12031#ifdef SAX2
12032 xmlParserCtxtPtr ctxt;
12033 xmlDocPtr doc = NULL;
12034 xmlNodePtr fake, cur;
12035 int nsnr = 0;
12036
12037 xmlParserErrors ret = XML_ERR_OK;
12038
12039 /*
12040 * check all input parameters, grab the document
12041 */
12042 if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
12043 return(XML_ERR_INTERNAL_ERROR);
12044 switch (node->type) {
12045 case XML_ELEMENT_NODE:
12046 case XML_ATTRIBUTE_NODE:
12047 case XML_TEXT_NODE:
12048 case XML_CDATA_SECTION_NODE:
12049 case XML_ENTITY_REF_NODE:
12050 case XML_PI_NODE:
12051 case XML_COMMENT_NODE:
12052 case XML_DOCUMENT_NODE:
12053 case XML_HTML_DOCUMENT_NODE:
12054 break;
12055 default:
12056 return(XML_ERR_INTERNAL_ERROR);
12057
12058 }
12059 while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
12060 (node->type != XML_DOCUMENT_NODE) &&
12061 (node->type != XML_HTML_DOCUMENT_NODE))
12062 node = node->parent;
12063 if (node == NULL)
12064 return(XML_ERR_INTERNAL_ERROR);
12065 if (node->type == XML_ELEMENT_NODE)
12066 doc = node->doc;
12067 else
12068 doc = (xmlDocPtr) node;
12069 if (doc == NULL)
12070 return(XML_ERR_INTERNAL_ERROR);
12071
12072 /*
12073 * allocate a context and set-up everything not related to the
12074 * node position in the tree
12075 */
12076 if (doc->type == XML_DOCUMENT_NODE)
12077 ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
12078#ifdef LIBXML_HTML_ENABLED
12079 else if (doc->type == XML_HTML_DOCUMENT_NODE)
12080 ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
12081#endif
12082 else
12083 return(XML_ERR_INTERNAL_ERROR);
12084
12085 if (ctxt == NULL)
12086 return(XML_ERR_NO_MEMORY);
12087 fake = xmlNewComment(NULL);
12088 if (fake == NULL) {
12089 xmlFreeParserCtxt(ctxt);
12090 return(XML_ERR_NO_MEMORY);
12091 }
12092 xmlAddChild(node, fake);
12093
12094 /*
12095 * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
12096 * We need a dictionary for xmlDetectSAX2, so if there's no doc dict
12097 * we must wait until the last moment to free the original one.
12098 */
12099 if (doc->dict != NULL) {
12100 if (ctxt->dict != NULL)
12101 xmlDictFree(ctxt->dict);
12102 ctxt->dict = doc->dict;
12103 } else
12104 options |= XML_PARSE_NODICT;
12105
12106 xmlCtxtUseOptions(ctxt, options);
12107 xmlDetectSAX2(ctxt);
12108 ctxt->myDoc = doc;
12109
12110 if (node->type == XML_ELEMENT_NODE) {
12111 nodePush(ctxt, node);
12112 /*
12113 * initialize the SAX2 namespaces stack
12114 */
12115 cur = node;
12116 while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
12117 xmlNsPtr ns = cur->nsDef;
12118 const xmlChar *iprefix, *ihref;
12119
12120 while (ns != NULL) {
12121 if (ctxt->dict) {
12122 iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1);
12123 ihref = xmlDictLookup(ctxt->dict, ns->href, -1);
12124 } else {
12125 iprefix = ns->prefix;
12126 ihref = ns->href;
12127 }
12128
12129 if (xmlGetNamespace(ctxt, iprefix) == NULL) {
12130 nsPush(ctxt, iprefix, ihref);
12131 nsnr++;
12132 }
12133 ns = ns->next;
12134 }
12135 cur = cur->parent;
12136 }
12137 ctxt->instate = XML_PARSER_CONTENT;
12138 }
12139
12140 if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
12141 /*
12142 * ID/IDREF registration will be done in xmlValidateElement below
12143 */
12144 ctxt->loadsubset |= XML_SKIP_IDS;
12145 }
12146
12147#ifdef LIBXML_HTML_ENABLED
12148 if (doc->type == XML_HTML_DOCUMENT_NODE)
12149 __htmlParseContent(ctxt);
12150 else
12151#endif
12152 xmlParseContent(ctxt);
12153
12154 nsPop(ctxt, nsnr);
12155 if ((RAW == '<') && (NXT(1) == '/')) {
12156 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12157 } else if (RAW != 0) {
12158 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
12159 }
12160 if ((ctxt->node != NULL) && (ctxt->node != node)) {
12161 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12162 ctxt->wellFormed = 0;
12163 }
12164
12165 if (!ctxt->wellFormed) {
12166 if (ctxt->errNo == 0)
12167 ret = XML_ERR_INTERNAL_ERROR;
12168 else
12169 ret = (xmlParserErrors)ctxt->errNo;
12170 } else {
12171 ret = XML_ERR_OK;
12172 }
12173
12174 /*
12175 * Return the newly created nodeset after unlinking it from
12176 * the pseudo sibling.
12177 */
12178
12179 cur = fake->next;
12180 fake->next = NULL;
12181 node->last = fake;
12182
12183 if (cur != NULL) {
12184 cur->prev = NULL;
12185 }
12186
12187 *lst = cur;
12188
12189 while (cur != NULL) {
12190 cur->parent = NULL;
12191 cur = cur->next;
12192 }
12193
12194 xmlUnlinkNode(fake);
12195 xmlFreeNode(fake);
12196
12197
12198 if (ret != XML_ERR_OK) {
12199 xmlFreeNodeList(*lst);
12200 *lst = NULL;
12201 }
12202
12203 if (doc->dict != NULL)
12204 ctxt->dict = NULL;
12205 xmlFreeParserCtxt(ctxt);
12206
12207 return(ret);
12208#else /* !SAX2 */
12209 return(XML_ERR_INTERNAL_ERROR);
12210#endif
12211}
12212
12213#ifdef LIBXML_SAX1_ENABLED
12214/**
12215 * xmlParseBalancedChunkMemoryRecover:
12216 * @doc: the document the chunk pertains to
12217 * @sax: the SAX handler bloc (possibly NULL)
12218 * @user_data: The user data returned on SAX callbacks (possibly NULL)
12219 * @depth: Used for loop detection, use 0
12220 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
12221 * @lst: the return value for the set of parsed nodes
12222 * @recover: return nodes even if the data is broken (use 0)
12223 *
12224 *
12225 * Parse a well-balanced chunk of an XML document
12226 * called by the parser
12227 * The allowed sequence for the Well Balanced Chunk is the one defined by
12228 * the content production in the XML grammar:
12229 *
12230 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12231 *
12232 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
12233 * the parser error code otherwise
12234 *
12235 * In case recover is set to 1, the nodelist will not be empty even if
12236 * the parsed chunk is not well balanced.
12237 */
12238int
12239xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
12240 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
12241 int recover) {
12242 xmlParserCtxtPtr ctxt;
12243 xmlDocPtr newDoc;
12244 xmlSAXHandlerPtr oldsax = NULL;
12245 xmlNodePtr content, newRoot;
12246 int size;
12247 int ret = 0;
12248
12249 if (depth > 40) {
12250 return(XML_ERR_ENTITY_LOOP);
12251 }
12252
12253
12254 if (lst != NULL)
12255 *lst = NULL;
12256 if (string == NULL)
12257 return(-1);
12258
12259 size = xmlStrlen(string);
12260
12261 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
12262 if (ctxt == NULL) return(-1);
12263 ctxt->userData = ctxt;
12264 if (sax != NULL) {
12265 oldsax = ctxt->sax;
12266 ctxt->sax = sax;
12267 if (user_data != NULL)
12268 ctxt->userData = user_data;
12269 }
12270 newDoc = xmlNewDoc(BAD_CAST "1.0");
12271 if (newDoc == NULL) {
12272 xmlFreeParserCtxt(ctxt);
12273 return(-1);
12274 }
12275 if ((doc != NULL) && (doc->dict != NULL)) {
12276 xmlDictFree(ctxt->dict);
12277 ctxt->dict = doc->dict;
12278 xmlDictReference(ctxt->dict);
12279 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
12280 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
12281 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
12282 ctxt->dictNames = 1;
12283 } else {
12284 xmlCtxtUseOptions(ctxt, XML_PARSE_NODICT);
12285 }
12286 if (doc != NULL) {
12287 newDoc->intSubset = doc->intSubset;
12288 newDoc->extSubset = doc->extSubset;
12289 }
12290 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
12291 if (newRoot == NULL) {
12292 if (sax != NULL)
12293 ctxt->sax = oldsax;
12294 xmlFreeParserCtxt(ctxt);
12295 newDoc->intSubset = NULL;
12296 newDoc->extSubset = NULL;
12297 xmlFreeDoc(newDoc);
12298 return(-1);
12299 }
12300 xmlAddChild((xmlNodePtr) newDoc, newRoot);
12301 nodePush(ctxt, newRoot);
12302 if (doc == NULL) {
12303 ctxt->myDoc = newDoc;
12304 } else {
12305 ctxt->myDoc = newDoc;
12306 newDoc->children->doc = doc;
12307 /* Ensure that doc has XML spec namespace */
12308 xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE);
12309 newDoc->oldNs = doc->oldNs;
12310 }
12311 ctxt->instate = XML_PARSER_CONTENT;
12312 ctxt->depth = depth;
12313
12314 /*
12315 * Doing validity checking on chunk doesn't make sense
12316 */
12317 ctxt->validate = 0;
12318 ctxt->loadsubset = 0;
12319 xmlDetectSAX2(ctxt);
12320
12321 if ( doc != NULL ){
12322 content = doc->children;
12323 doc->children = NULL;
12324 xmlParseContent(ctxt);
12325 doc->children = content;
12326 }
12327 else {
12328 xmlParseContent(ctxt);
12329 }
12330 if ((RAW == '<') && (NXT(1) == '/')) {
12331 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12332 } else if (RAW != 0) {
12333 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
12334 }
12335 if (ctxt->node != newDoc->children) {
12336 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12337 }
12338
12339 if (!ctxt->wellFormed) {
12340 if (ctxt->errNo == 0)
12341 ret = 1;
12342 else
12343 ret = ctxt->errNo;
12344 } else {
12345 ret = 0;
12346 }
12347
12348 if ((lst != NULL) && ((ret == 0) || (recover == 1))) {
12349 xmlNodePtr cur;
12350
12351 /*
12352 * Return the newly created nodeset after unlinking it from
12353 * they pseudo parent.
12354 */
12355 cur = newDoc->children->children;
12356 *lst = cur;
12357 while (cur != NULL) {
12358 xmlSetTreeDoc(cur, doc);
12359 cur->parent = NULL;
12360 cur = cur->next;
12361 }
12362 newDoc->children->children = NULL;
12363 }
12364
12365 if (sax != NULL)
12366 ctxt->sax = oldsax;
12367 xmlFreeParserCtxt(ctxt);
12368 newDoc->intSubset = NULL;
12369 newDoc->extSubset = NULL;
12370 newDoc->oldNs = NULL;
12371 xmlFreeDoc(newDoc);
12372
12373 return(ret);
12374}
12375
12376/**
12377 * xmlSAXParseEntity:
12378 * @sax: the SAX handler block
12379 * @filename: the filename
12380 *
12381 * parse an XML external entity out of context and build a tree.
12382 * It use the given SAX function block to handle the parsing callback.
12383 * If sax is NULL, fallback to the default DOM tree building routines.
12384 *
12385 * [78] extParsedEnt ::= TextDecl? content
12386 *
12387 * This correspond to a "Well Balanced" chunk
12388 *
12389 * Returns the resulting document tree
12390 */
12391
12392xmlDocPtr
12393xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
12394 xmlDocPtr ret;
12395 xmlParserCtxtPtr ctxt;
12396
12397 ctxt = xmlCreateFileParserCtxt(filename);
12398 if (ctxt == NULL) {
12399 return(NULL);
12400 }
12401 if (sax != NULL) {
12402 if (ctxt->sax != NULL)
12403 xmlFree(ctxt->sax);
12404 ctxt->sax = sax;
12405 ctxt->userData = NULL;
12406 }
12407
12408 xmlParseExtParsedEnt(ctxt);
12409
12410 if (ctxt->wellFormed)
12411 ret = ctxt->myDoc;
12412 else {
12413 ret = NULL;
12414 xmlFreeDoc(ctxt->myDoc);
12415 ctxt->myDoc = NULL;
12416 }
12417 if (sax != NULL)
12418 ctxt->sax = NULL;
12419 xmlFreeParserCtxt(ctxt);
12420
12421 return(ret);
12422}
12423
12424/**
12425 * xmlParseEntity:
12426 * @filename: the filename
12427 *
12428 * parse an XML external entity out of context and build a tree.
12429 *
12430 * [78] extParsedEnt ::= TextDecl? content
12431 *
12432 * This correspond to a "Well Balanced" chunk
12433 *
12434 * Returns the resulting document tree
12435 */
12436
12437xmlDocPtr
12438xmlParseEntity(const char *filename) {
12439 return(xmlSAXParseEntity(NULL, filename));
12440}
12441#endif /* LIBXML_SAX1_ENABLED */
12442
12443/**
12444 * xmlCreateEntityParserCtxt:
12445 * @URL: the entity URL
12446 * @ID: the entity PUBLIC ID
12447 * @base: a possible base for the target URI
12448 *
12449 * Create a parser context for an external entity
12450 * Automatic support for ZLIB/Compress compressed document is provided
12451 * by default if found at compile-time.
12452 *
12453 * Returns the new parser context or NULL
12454 */
12455xmlParserCtxtPtr
12456xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
12457 const xmlChar *base) {
12458 xmlParserCtxtPtr ctxt;
12459 xmlParserInputPtr inputStream;
12460 char *directory = NULL;
12461 xmlChar *uri;
12462
12463 ctxt = xmlNewParserCtxt();
12464 if (ctxt == NULL) {
12465 return(NULL);
12466 }
12467
12468 uri = xmlBuildURI(URL, base);
12469
12470 if (uri == NULL) {
12471 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
12472 if (inputStream == NULL) {
12473 xmlFreeParserCtxt(ctxt);
12474 return(NULL);
12475 }
12476
12477 inputPush(ctxt, inputStream);
12478
12479 if ((ctxt->directory == NULL) && (directory == NULL))
12480 directory = xmlParserGetDirectory((char *)URL);
12481 if ((ctxt->directory == NULL) && (directory != NULL))
12482 ctxt->directory = directory;
12483 } else {
12484 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
12485 if (inputStream == NULL) {
12486 xmlFree(uri);
12487 xmlFreeParserCtxt(ctxt);
12488 return(NULL);
12489 }
12490
12491 inputPush(ctxt, inputStream);
12492
12493 if ((ctxt->directory == NULL) && (directory == NULL))
12494 directory = xmlParserGetDirectory((char *)uri);
12495 if ((ctxt->directory == NULL) && (directory != NULL))
12496 ctxt->directory = directory;
12497 xmlFree(uri);
12498 }
12499 return(ctxt);
12500}
12501
12502/************************************************************************
12503 * *
12504 * Front ends when parsing from a file *
12505 * *
12506 ************************************************************************/
12507
12508/**
12509 * xmlCreateURLParserCtxt:
12510 * @filename: the filename or URL
12511 * @options: a combination of xmlParserOption
12512 *
12513 * Create a parser context for a file or URL content.
12514 * Automatic support for ZLIB/Compress compressed document is provided
12515 * by default if found at compile-time and for file accesses
12516 *
12517 * Returns the new parser context or NULL
12518 */
12519xmlParserCtxtPtr
12520xmlCreateURLParserCtxt(const char *filename, int options)
12521{
12522 xmlParserCtxtPtr ctxt;
12523 xmlParserInputPtr inputStream;
12524 char *directory = NULL;
12525
12526 ctxt = xmlNewParserCtxt();
12527 if (ctxt == NULL) {
12528 xmlErrMemory(NULL, "cannot allocate parser context");
12529 return(NULL);
12530 }
12531
12532 if (options)
12533 xmlCtxtUseOptions(ctxt, options);
12534 ctxt->linenumbers = 1;
12535
12536 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
12537 if (inputStream == NULL) {
12538 xmlFreeParserCtxt(ctxt);
12539 return(NULL);
12540 }
12541
12542 inputPush(ctxt, inputStream);
12543 if ((ctxt->directory == NULL) && (directory == NULL))
12544 directory = xmlParserGetDirectory(filename);
12545 if ((ctxt->directory == NULL) && (directory != NULL))
12546 ctxt->directory = directory;
12547
12548 return(ctxt);
12549}
12550
12551/**
12552 * xmlCreateFileParserCtxt:
12553 * @filename: the filename
12554 *
12555 * Create a parser context for a file content.
12556 * Automatic support for ZLIB/Compress compressed document is provided
12557 * by default if found at compile-time.
12558 *
12559 * Returns the new parser context or NULL
12560 */
12561xmlParserCtxtPtr
12562xmlCreateFileParserCtxt(const char *filename)
12563{
12564 return(xmlCreateURLParserCtxt(filename, 0));
12565}
12566
12567#ifdef LIBXML_SAX1_ENABLED
12568/**
12569 * xmlSAXParseFileWithData:
12570 * @sax: the SAX handler block
12571 * @filename: the filename
12572 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
12573 * documents
12574 * @data: the userdata
12575 *
12576 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12577 * compressed document is provided by default if found at compile-time.
12578 * It use the given SAX function block to handle the parsing callback.
12579 * If sax is NULL, fallback to the default DOM tree building routines.
12580 *
12581 * User data (void *) is stored within the parser context in the
12582 * context's _private member, so it is available nearly everywhere in libxml
12583 *
12584 * Returns the resulting document tree
12585 */
12586
12587xmlDocPtr
12588xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
12589 int recovery, void *data) {
12590 xmlDocPtr ret;
12591 xmlParserCtxtPtr ctxt;
12592 char *directory = NULL;
12593
12594 xmlInitParser();
12595
12596 ctxt = xmlCreateFileParserCtxt(filename);
12597 if (ctxt == NULL) {
12598 return(NULL);
12599 }
12600 if (sax != NULL) {
12601 if (ctxt->sax != NULL)
12602 xmlFree(ctxt->sax);
12603 ctxt->sax = sax;
12604 }
12605 xmlDetectSAX2(ctxt);
12606 if (data!=NULL) {
12607 ctxt->_private = data;
12608 }
12609
12610 if ((ctxt->directory == NULL) && (directory == NULL))
12611 directory = xmlParserGetDirectory(filename);
12612 if ((ctxt->directory == NULL) && (directory != NULL))
12613 ctxt->directory = (char *) xmlStrdup((xmlChar *) directory);
12614
12615 ctxt->recovery = recovery;
12616
12617 xmlParseDocument(ctxt);
12618
12619 if ((ctxt->wellFormed) || recovery) {
12620 ret = ctxt->myDoc;
12621 if (ret != NULL) {
12622 if (ctxt->input->buf->compressed > 0)
12623 ret->compression = 9;
12624 else
12625 ret->compression = ctxt->input->buf->compressed;
12626 }
12627 }
12628 else {
12629 ret = NULL;
12630 xmlFreeDoc(ctxt->myDoc);
12631 ctxt->myDoc = NULL;
12632 }
12633 if (sax != NULL)
12634 ctxt->sax = NULL;
12635 xmlFreeParserCtxt(ctxt);
12636
12637 return(ret);
12638}
12639
12640/**
12641 * xmlSAXParseFile:
12642 * @sax: the SAX handler block
12643 * @filename: the filename
12644 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
12645 * documents
12646 *
12647 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12648 * compressed document is provided by default if found at compile-time.
12649 * It use the given SAX function block to handle the parsing callback.
12650 * If sax is NULL, fallback to the default DOM tree building routines.
12651 *
12652 * Returns the resulting document tree
12653 */
12654
12655xmlDocPtr
12656xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
12657 int recovery) {
12658 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
12659}
12660
12661/**
12662 * xmlRecoverDoc:
12663 * @cur: a pointer to an array of xmlChar
12664 *
12665 * parse an XML in-memory document and build a tree.
12666 * In the case the document is not Well Formed, a tree is built anyway
12667 *
12668 * Returns the resulting document tree
12669 */
12670
12671xmlDocPtr
12672xmlRecoverDoc(xmlChar *cur) {
12673 return(xmlSAXParseDoc(NULL, cur, 1));
12674}
12675
12676/**
12677 * xmlParseFile:
12678 * @filename: the filename
12679 *
12680 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12681 * compressed document is provided by default if found at compile-time.
12682 *
12683 * Returns the resulting document tree if the file was wellformed,
12684 * NULL otherwise.
12685 */
12686
12687xmlDocPtr
12688xmlParseFile(const char *filename) {
12689 return(xmlSAXParseFile(NULL, filename, 0));
12690}
12691
12692/**
12693 * xmlRecoverFile:
12694 * @filename: the filename
12695 *
12696 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12697 * compressed document is provided by default if found at compile-time.
12698 * In the case the document is not Well Formed, a tree is built anyway
12699 *
12700 * Returns the resulting document tree
12701 */
12702
12703xmlDocPtr
12704xmlRecoverFile(const char *filename) {
12705 return(xmlSAXParseFile(NULL, filename, 1));
12706}
12707
12708
12709/**
12710 * xmlSetupParserForBuffer:
12711 * @ctxt: an XML parser context
12712 * @buffer: a xmlChar * buffer
12713 * @filename: a file name
12714 *
12715 * Setup the parser context to parse a new buffer; Clears any prior
12716 * contents from the parser context. The buffer parameter must not be
12717 * NULL, but the filename parameter can be
12718 */
12719void
12720xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
12721 const char* filename)
12722{
12723 xmlParserInputPtr input;
12724
12725 if ((ctxt == NULL) || (buffer == NULL))
12726 return;
12727
12728 input = xmlNewInputStream(ctxt);
12729 if (input == NULL) {
12730 xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
12731 xmlClearParserCtxt(ctxt);
12732 return;
12733 }
12734
12735 xmlClearParserCtxt(ctxt);
12736 if (filename != NULL)
12737 input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
12738 input->base = buffer;
12739 input->cur = buffer;
12740 input->end = &buffer[xmlStrlen(buffer)];
12741 inputPush(ctxt, input);
12742}
12743
12744/**
12745 * xmlSAXUserParseFile:
12746 * @sax: a SAX handler
12747 * @user_data: The user data returned on SAX callbacks
12748 * @filename: a file name
12749 *
12750 * parse an XML file and call the given SAX handler routines.
12751 * Automatic support for ZLIB/Compress compressed document is provided
12752 *
12753 * Returns 0 in case of success or a error number otherwise
12754 */
12755int
12756xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
12757 const char *filename) {
12758 int ret = 0;
12759 xmlParserCtxtPtr ctxt;
12760
12761 ctxt = xmlCreateFileParserCtxt(filename);
12762 if (ctxt == NULL) return -1;
12763 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
12764 xmlFree(ctxt->sax);
12765 ctxt->sax = sax;
12766 xmlDetectSAX2(ctxt);
12767
12768 if (user_data != NULL)
12769 ctxt->userData = user_data;
12770
12771 xmlParseDocument(ctxt);
12772
12773 if (ctxt->wellFormed)
12774 ret = 0;
12775 else {
12776 if (ctxt->errNo != 0)
12777 ret = ctxt->errNo;
12778 else
12779 ret = -1;
12780 }
12781 if (sax != NULL)
12782 ctxt->sax = NULL;
12783 if (ctxt->myDoc != NULL) {
12784 xmlFreeDoc(ctxt->myDoc);
12785 ctxt->myDoc = NULL;
12786 }
12787 xmlFreeParserCtxt(ctxt);
12788
12789 return ret;
12790}
12791#endif /* LIBXML_SAX1_ENABLED */
12792
12793/************************************************************************
12794 * *
12795 * Front ends when parsing from memory *
12796 * *
12797 ************************************************************************/
12798
12799/**
12800 * xmlCreateMemoryParserCtxt:
12801 * @buffer: a pointer to a char array
12802 * @size: the size of the array
12803 *
12804 * Create a parser context for an XML in-memory document.
12805 *
12806 * Returns the new parser context or NULL
12807 */
12808xmlParserCtxtPtr
12809xmlCreateMemoryParserCtxt(const char *buffer, int size) {
12810 xmlParserCtxtPtr ctxt;
12811 xmlParserInputPtr input;
12812 xmlParserInputBufferPtr buf;
12813
12814 if (buffer == NULL)
12815 return(NULL);
12816 if (size <= 0)
12817 return(NULL);
12818
12819 ctxt = xmlNewParserCtxt();
12820 if (ctxt == NULL)
12821 return(NULL);
12822
12823 /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */
12824 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
12825 if (buf == NULL) {
12826 xmlFreeParserCtxt(ctxt);
12827 return(NULL);
12828 }
12829
12830 input = xmlNewInputStream(ctxt);
12831 if (input == NULL) {
12832 xmlFreeParserInputBuffer(buf);
12833 xmlFreeParserCtxt(ctxt);
12834 return(NULL);
12835 }
12836
12837 input->filename = NULL;
12838 input->buf = buf;
12839 input->base = input->buf->buffer->content;
12840 input->cur = input->buf->buffer->content;
12841 input->end = &input->buf->buffer->content[input->buf->buffer->use];
12842
12843 inputPush(ctxt, input);
12844 return(ctxt);
12845}
12846
12847#ifdef LIBXML_SAX1_ENABLED
12848/**
12849 * xmlSAXParseMemoryWithData:
12850 * @sax: the SAX handler block
12851 * @buffer: an pointer to a char array
12852 * @size: the size of the array
12853 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
12854 * documents
12855 * @data: the userdata
12856 *
12857 * parse an XML in-memory block and use the given SAX function block
12858 * to handle the parsing callback. If sax is NULL, fallback to the default
12859 * DOM tree building routines.
12860 *
12861 * User data (void *) is stored within the parser context in the
12862 * context's _private member, so it is available nearly everywhere in libxml
12863 *
12864 * Returns the resulting document tree
12865 */
12866
12867xmlDocPtr
12868xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
12869 int size, int recovery, void *data) {
12870 xmlDocPtr ret;
12871 xmlParserCtxtPtr ctxt;
12872
12873 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
12874 if (ctxt == NULL) return(NULL);
12875 if (sax != NULL) {
12876 if (ctxt->sax != NULL)
12877 xmlFree(ctxt->sax);
12878 ctxt->sax = sax;
12879 }
12880 xmlDetectSAX2(ctxt);
12881 if (data!=NULL) {
12882 ctxt->_private=data;
12883 }
12884
12885 ctxt->recovery = recovery;
12886
12887 xmlParseDocument(ctxt);
12888
12889 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
12890 else {
12891 ret = NULL;
12892 xmlFreeDoc(ctxt->myDoc);
12893 ctxt->myDoc = NULL;
12894 }
12895 if (sax != NULL)
12896 ctxt->sax = NULL;
12897 xmlFreeParserCtxt(ctxt);
12898
12899 return(ret);
12900}
12901
12902/**
12903 * xmlSAXParseMemory:
12904 * @sax: the SAX handler block
12905 * @buffer: an pointer to a char array
12906 * @size: the size of the array
12907 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
12908 * documents
12909 *
12910 * parse an XML in-memory block and use the given SAX function block
12911 * to handle the parsing callback. If sax is NULL, fallback to the default
12912 * DOM tree building routines.
12913 *
12914 * Returns the resulting document tree
12915 */
12916xmlDocPtr
12917xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
12918 int size, int recovery) {
12919 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
12920}
12921
12922/**
12923 * xmlParseMemory:
12924 * @buffer: an pointer to a char array
12925 * @size: the size of the array
12926 *
12927 * parse an XML in-memory block and build a tree.
12928 *
12929 * Returns the resulting document tree
12930 */
12931
12932xmlDocPtr xmlParseMemory(const char *buffer, int size) {
12933 return(xmlSAXParseMemory(NULL, buffer, size, 0));
12934}
12935
12936/**
12937 * xmlRecoverMemory:
12938 * @buffer: an pointer to a char array
12939 * @size: the size of the array
12940 *
12941 * parse an XML in-memory block and build a tree.
12942 * In the case the document is not Well Formed, a tree is built anyway
12943 *
12944 * Returns the resulting document tree
12945 */
12946
12947xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
12948 return(xmlSAXParseMemory(NULL, buffer, size, 1));
12949}
12950
12951/**
12952 * xmlSAXUserParseMemory:
12953 * @sax: a SAX handler
12954 * @user_data: The user data returned on SAX callbacks
12955 * @buffer: an in-memory XML document input
12956 * @size: the length of the XML document in bytes
12957 *
12958 * A better SAX parsing routine.
12959 * parse an XML in-memory buffer and call the given SAX handler routines.
12960 *
12961 * Returns 0 in case of success or a error number otherwise
12962 */
12963int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
12964 const char *buffer, int size) {
12965 int ret = 0;
12966 xmlParserCtxtPtr ctxt;
12967
12968 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
12969 if (ctxt == NULL) return -1;
12970 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
12971 xmlFree(ctxt->sax);
12972 ctxt->sax = sax;
12973 xmlDetectSAX2(ctxt);
12974
12975 if (user_data != NULL)
12976 ctxt->userData = user_data;
12977
12978 xmlParseDocument(ctxt);
12979
12980 if (ctxt->wellFormed)
12981 ret = 0;
12982 else {
12983 if (ctxt->errNo != 0)
12984 ret = ctxt->errNo;
12985 else
12986 ret = -1;
12987 }
12988 if (sax != NULL)
12989 ctxt->sax = NULL;
12990 if (ctxt->myDoc != NULL) {
12991 xmlFreeDoc(ctxt->myDoc);
12992 ctxt->myDoc = NULL;
12993 }
12994 xmlFreeParserCtxt(ctxt);
12995
12996 return ret;
12997}
12998#endif /* LIBXML_SAX1_ENABLED */
12999
13000/**
13001 * xmlCreateDocParserCtxt:
13002 * @cur: a pointer to an array of xmlChar
13003 *
13004 * Creates a parser context for an XML in-memory document.
13005 *
13006 * Returns the new parser context or NULL
13007 */
13008xmlParserCtxtPtr
13009xmlCreateDocParserCtxt(const xmlChar *cur) {
13010 int len;
13011
13012 if (cur == NULL)
13013 return(NULL);
13014 len = xmlStrlen(cur);
13015 return(xmlCreateMemoryParserCtxt((const char *)cur, len));
13016}
13017
13018#ifdef LIBXML_SAX1_ENABLED
13019/**
13020 * xmlSAXParseDoc:
13021 * @sax: the SAX handler block
13022 * @cur: a pointer to an array of xmlChar
13023 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
13024 * documents
13025 *
13026 * parse an XML in-memory document and build a tree.
13027 * It use the given SAX function block to handle the parsing callback.
13028 * If sax is NULL, fallback to the default DOM tree building routines.
13029 *
13030 * Returns the resulting document tree
13031 */
13032
13033xmlDocPtr
13034xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
13035 xmlDocPtr ret;
13036 xmlParserCtxtPtr ctxt;
13037 xmlSAXHandlerPtr oldsax = NULL;
13038
13039 if (cur == NULL) return(NULL);
13040
13041
13042 ctxt = xmlCreateDocParserCtxt(cur);
13043 if (ctxt == NULL) return(NULL);
13044 if (sax != NULL) {
13045 oldsax = ctxt->sax;
13046 ctxt->sax = sax;
13047 ctxt->userData = NULL;
13048 }
13049 xmlDetectSAX2(ctxt);
13050
13051 xmlParseDocument(ctxt);
13052 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
13053 else {
13054 ret = NULL;
13055 xmlFreeDoc(ctxt->myDoc);
13056 ctxt->myDoc = NULL;
13057 }
13058 if (sax != NULL)
13059 ctxt->sax = oldsax;
13060 xmlFreeParserCtxt(ctxt);
13061
13062 return(ret);
13063}
13064
13065/**
13066 * xmlParseDoc:
13067 * @cur: a pointer to an array of xmlChar
13068 *
13069 * parse an XML in-memory document and build a tree.
13070 *
13071 * Returns the resulting document tree
13072 */
13073
13074xmlDocPtr
13075xmlParseDoc(const xmlChar *cur) {
13076 return(xmlSAXParseDoc(NULL, cur, 0));
13077}
13078#endif /* LIBXML_SAX1_ENABLED */
13079
13080#ifdef LIBXML_LEGACY_ENABLED
13081/************************************************************************
13082 * *
13083 * Specific function to keep track of entities references *
13084 * and used by the XSLT debugger *
13085 * *
13086 ************************************************************************/
13087
13088static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
13089
13090/**
13091 * xmlAddEntityReference:
13092 * @ent : A valid entity
13093 * @firstNode : A valid first node for children of entity
13094 * @lastNode : A valid last node of children entity
13095 *
13096 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
13097 */
13098static void
13099xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
13100 xmlNodePtr lastNode)
13101{
13102 if (xmlEntityRefFunc != NULL) {
13103 (*xmlEntityRefFunc) (ent, firstNode, lastNode);
13104 }
13105}
13106
13107
13108/**
13109 * xmlSetEntityReferenceFunc:
13110 * @func: A valid function
13111 *
13112 * Set the function to call call back when a xml reference has been made
13113 */
13114void
13115xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
13116{
13117 xmlEntityRefFunc = func;
13118}
13119#endif /* LIBXML_LEGACY_ENABLED */
13120
13121/************************************************************************
13122 * *
13123 * Miscellaneous *
13124 * *
13125 ************************************************************************/
13126
13127#ifdef LIBXML_XPATH_ENABLED
13128#include <libxml/xpath.h>
13129#endif
13130
13131extern void XMLCDECL xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
13132static int xmlParserInitialized = 0;
13133
13134/**
13135 * xmlInitParser:
13136 *
13137 * Initialization function for the XML parser.
13138 * This is not reentrant. Call once before processing in case of
13139 * use in multithreaded programs.
13140 */
13141
13142void
13143xmlInitParser(void) {
13144 if (xmlParserInitialized != 0)
13145 return;
13146
13147#ifdef LIBXML_THREAD_ENABLED
13148 __xmlGlobalInitMutexLock();
13149 if (xmlParserInitialized == 0) {
13150#endif
13151 if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
13152 (xmlGenericError == NULL))
13153 initGenericErrorDefaultFunc(NULL);
13154 xmlInitGlobals();
13155 xmlInitThreads();
13156 xmlInitMemory();
13157 xmlInitCharEncodingHandlers();
13158 xmlDefaultSAXHandlerInit();
13159 xmlRegisterDefaultInputCallbacks();
13160#ifdef LIBXML_OUTPUT_ENABLED
13161 xmlRegisterDefaultOutputCallbacks();
13162#endif /* LIBXML_OUTPUT_ENABLED */
13163#ifdef LIBXML_HTML_ENABLED
13164 htmlInitAutoClose();
13165 htmlDefaultSAXHandlerInit();
13166#endif
13167#ifdef LIBXML_XPATH_ENABLED
13168 xmlXPathInit();
13169#endif
13170 xmlParserInitialized = 1;
13171#ifdef LIBXML_THREAD_ENABLED
13172 }
13173 __xmlGlobalInitMutexUnlock();
13174#endif
13175}
13176
13177/**
13178 * xmlCleanupParser:
13179 *
13180 * Cleanup function for the XML library. It tries to reclaim all
13181 * parsing related global memory allocated for the library processing.
13182 * It doesn't deallocate any document related memory. Calling this
13183 * function should not prevent reusing the library but one should
13184 * call xmlCleanupParser() only when the process has
13185 * finished using the library or XML document built with it.
13186 */
13187
13188void
13189xmlCleanupParser(void) {
13190 if (!xmlParserInitialized)
13191 return;
13192
13193 xmlCleanupCharEncodingHandlers();
13194#ifdef LIBXML_CATALOG_ENABLED
13195 xmlCatalogCleanup();
13196#endif
13197 xmlDictCleanup();
13198 xmlCleanupInputCallbacks();
13199#ifdef LIBXML_OUTPUT_ENABLED
13200 xmlCleanupOutputCallbacks();
13201#endif
13202#ifdef LIBXML_SCHEMAS_ENABLED
13203 xmlSchemaCleanupTypes();
13204 xmlRelaxNGCleanupTypes();
13205#endif
13206 xmlCleanupGlobals();
13207 xmlResetLastError();
13208 xmlCleanupThreads(); /* must be last if called not from the main thread */
13209 xmlCleanupMemory();
13210 xmlParserInitialized = 0;
13211}
13212
13213/************************************************************************
13214 * *
13215 * New set (2.6.0) of simpler and more flexible APIs *
13216 * *
13217 ************************************************************************/
13218
13219/**
13220 * DICT_FREE:
13221 * @str: a string
13222 *
13223 * Free a string if it is not owned by the "dict" dictionnary in the
13224 * current scope
13225 */
13226#define DICT_FREE(str) \
13227 if ((str) && ((!dict) || \
13228 (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
13229 xmlFree((char *)(str));
13230
13231/**
13232 * xmlCtxtReset:
13233 * @ctxt: an XML parser context
13234 *
13235 * Reset a parser context
13236 */
13237void
13238xmlCtxtReset(xmlParserCtxtPtr ctxt)
13239{
13240 xmlParserInputPtr input;
13241 xmlDictPtr dict;
13242
13243 if (ctxt == NULL)
13244 return;
13245
13246 dict = ctxt->dict;
13247
13248 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
13249 xmlFreeInputStream(input);
13250 }
13251 ctxt->inputNr = 0;
13252 ctxt->input = NULL;
13253
13254 ctxt->spaceNr = 0;
13255 if (ctxt->spaceTab != NULL) {
13256 ctxt->spaceTab[0] = -1;
13257 ctxt->space = &ctxt->spaceTab[0];
13258 } else {
13259 ctxt->space = NULL;
13260 }
13261
13262
13263 ctxt->nodeNr = 0;
13264 ctxt->node = NULL;
13265
13266 ctxt->nameNr = 0;
13267 ctxt->name = NULL;
13268
13269 DICT_FREE(ctxt->version);
13270 ctxt->version = NULL;
13271 DICT_FREE(ctxt->encoding);
13272 ctxt->encoding = NULL;
13273 DICT_FREE(ctxt->directory);
13274 ctxt->directory = NULL;
13275 DICT_FREE(ctxt->extSubURI);
13276 ctxt->extSubURI = NULL;
13277 DICT_FREE(ctxt->extSubSystem);
13278 ctxt->extSubSystem = NULL;
13279 if (ctxt->myDoc != NULL)
13280 xmlFreeDoc(ctxt->myDoc);
13281 ctxt->myDoc = NULL;
13282
13283 ctxt->standalone = -1;
13284 ctxt->hasExternalSubset = 0;
13285 ctxt->hasPErefs = 0;
13286 ctxt->html = 0;
13287 ctxt->external = 0;
13288 ctxt->instate = XML_PARSER_START;
13289 ctxt->token = 0;
13290
13291 ctxt->wellFormed = 1;
13292 ctxt->nsWellFormed = 1;
13293 ctxt->disableSAX = 0;
13294 ctxt->valid = 1;
13295#if 0
13296 ctxt->vctxt.userData = ctxt;
13297 ctxt->vctxt.error = xmlParserValidityError;
13298 ctxt->vctxt.warning = xmlParserValidityWarning;
13299#endif
13300 ctxt->record_info = 0;
13301 ctxt->nbChars = 0;
13302 ctxt->checkIndex = 0;
13303 ctxt->inSubset = 0;
13304 ctxt->errNo = XML_ERR_OK;
13305 ctxt->depth = 0;
13306 ctxt->charset = XML_CHAR_ENCODING_UTF8;
13307 ctxt->catalogs = NULL;
13308 ctxt->nbentities = 0;
13309 ctxt->sizeentities = 0;
13310 xmlInitNodeInfoSeq(&ctxt->node_seq);
13311
13312 if (ctxt->attsDefault != NULL) {
13313 xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree);
13314 ctxt->attsDefault = NULL;
13315 }
13316 if (ctxt->attsSpecial != NULL) {
13317 xmlHashFree(ctxt->attsSpecial, NULL);
13318 ctxt->attsSpecial = NULL;
13319 }
13320
13321#ifdef LIBXML_CATALOG_ENABLED
13322 if (ctxt->catalogs != NULL)
13323 xmlCatalogFreeLocal(ctxt->catalogs);
13324#endif
13325 if (ctxt->lastError.code != XML_ERR_OK)
13326 xmlResetError(&ctxt->lastError);
13327}
13328
13329/**
13330 * xmlCtxtResetPush:
13331 * @ctxt: an XML parser context
13332 * @chunk: a pointer to an array of chars
13333 * @size: number of chars in the array
13334 * @filename: an optional file name or URI
13335 * @encoding: the document encoding, or NULL
13336 *
13337 * Reset a push parser context
13338 *
13339 * Returns 0 in case of success and 1 in case of error
13340 */
13341int
13342xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
13343 int size, const char *filename, const char *encoding)
13344{
13345 xmlParserInputPtr inputStream;
13346 xmlParserInputBufferPtr buf;
13347 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
13348
13349 if (ctxt == NULL)
13350 return(1);
13351
13352 if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
13353 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
13354
13355 buf = xmlAllocParserInputBuffer(enc);
13356 if (buf == NULL)
13357 return(1);
13358
13359 if (ctxt == NULL) {
13360 xmlFreeParserInputBuffer(buf);
13361 return(1);
13362 }
13363
13364 xmlCtxtReset(ctxt);
13365
13366 if (ctxt->pushTab == NULL) {
13367 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 *
13368 sizeof(xmlChar *));
13369 if (ctxt->pushTab == NULL) {
13370 xmlErrMemory(ctxt, NULL);
13371 xmlFreeParserInputBuffer(buf);
13372 return(1);
13373 }
13374 }
13375
13376 if (filename == NULL) {
13377 ctxt->directory = NULL;
13378 } else {
13379 ctxt->directory = xmlParserGetDirectory(filename);
13380 }
13381
13382 inputStream = xmlNewInputStream(ctxt);
13383 if (inputStream == NULL) {
13384 xmlFreeParserInputBuffer(buf);
13385 return(1);
13386 }
13387
13388 if (filename == NULL)
13389 inputStream->filename = NULL;
13390 else
13391 inputStream->filename = (char *)
13392 xmlCanonicPath((const xmlChar *) filename);
13393 inputStream->buf = buf;
13394 inputStream->base = inputStream->buf->buffer->content;
13395 inputStream->cur = inputStream->buf->buffer->content;
13396 inputStream->end =
13397 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
13398
13399 inputPush(ctxt, inputStream);
13400
13401 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
13402 (ctxt->input->buf != NULL)) {
13403 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
13404 int cur = ctxt->input->cur - ctxt->input->base;
13405
13406 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
13407
13408 ctxt->input->base = ctxt->input->buf->buffer->content + base;
13409 ctxt->input->cur = ctxt->input->base + cur;
13410 ctxt->input->end =
13411 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->
13412 use];
13413#ifdef DEBUG_PUSH
13414 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
13415#endif
13416 }
13417
13418 if (encoding != NULL) {
13419 xmlCharEncodingHandlerPtr hdlr;
13420
13421 hdlr = xmlFindCharEncodingHandler(encoding);
13422 if (hdlr != NULL) {
13423 xmlSwitchToEncoding(ctxt, hdlr);
13424 } else {
13425 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
13426 "Unsupported encoding %s\n", BAD_CAST encoding);
13427 }
13428 } else if (enc != XML_CHAR_ENCODING_NONE) {
13429 xmlSwitchEncoding(ctxt, enc);
13430 }
13431
13432 return(0);
13433}
13434
13435/**
13436 * xmlCtxtUseOptions:
13437 * @ctxt: an XML parser context
13438 * @options: a combination of xmlParserOption
13439 *
13440 * Applies the options to the parser context
13441 *
13442 * Returns 0 in case of success, the set of unknown or unimplemented options
13443 * in case of error.
13444 */
13445int
13446xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
13447{
13448 if (ctxt == NULL)
13449 return(-1);
13450 if (options & XML_PARSE_RECOVER) {
13451 ctxt->recovery = 1;
13452 options -= XML_PARSE_RECOVER;
13453 } else
13454 ctxt->recovery = 0;
13455 if (options & XML_PARSE_DTDLOAD) {
13456 ctxt->loadsubset = XML_DETECT_IDS;
13457 options -= XML_PARSE_DTDLOAD;
13458 } else
13459 ctxt->loadsubset = 0;
13460 if (options & XML_PARSE_DTDATTR) {
13461 ctxt->loadsubset |= XML_COMPLETE_ATTRS;
13462 options -= XML_PARSE_DTDATTR;
13463 }
13464 if (options & XML_PARSE_NOENT) {
13465 ctxt->replaceEntities = 1;
13466 /* ctxt->loadsubset |= XML_DETECT_IDS; */
13467 options -= XML_PARSE_NOENT;
13468 } else
13469 ctxt->replaceEntities = 0;
13470 if (options & XML_PARSE_PEDANTIC) {
13471 ctxt->pedantic = 1;
13472 options -= XML_PARSE_PEDANTIC;
13473 } else
13474 ctxt->pedantic = 0;
13475 if (options & XML_PARSE_NOBLANKS) {
13476 ctxt->keepBlanks = 0;
13477 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
13478 options -= XML_PARSE_NOBLANKS;
13479 } else
13480 ctxt->keepBlanks = 1;
13481 if (options & XML_PARSE_DTDVALID) {
13482 ctxt->validate = 1;
13483 if (options & XML_PARSE_NOWARNING)
13484 ctxt->vctxt.warning = NULL;
13485 if (options & XML_PARSE_NOERROR)
13486 ctxt->vctxt.error = NULL;
13487 options -= XML_PARSE_DTDVALID;
13488 } else
13489 ctxt->validate = 0;
13490 if (options & XML_PARSE_NOWARNING) {
13491 ctxt->sax->warning = NULL;
13492 options -= XML_PARSE_NOWARNING;
13493 }
13494 if (options & XML_PARSE_NOERROR) {
13495 ctxt->sax->error = NULL;
13496 ctxt->sax->fatalError = NULL;
13497 options -= XML_PARSE_NOERROR;
13498 }
13499#ifdef LIBXML_SAX1_ENABLED
13500 if (options & XML_PARSE_SAX1) {
13501 ctxt->sax->startElement = xmlSAX2StartElement;
13502 ctxt->sax->endElement = xmlSAX2EndElement;
13503 ctxt->sax->startElementNs = NULL;
13504 ctxt->sax->endElementNs = NULL;
13505 ctxt->sax->initialized = 1;
13506 options -= XML_PARSE_SAX1;
13507 }
13508#endif /* LIBXML_SAX1_ENABLED */
13509 if (options & XML_PARSE_NODICT) {
13510 ctxt->dictNames = 0;
13511 options -= XML_PARSE_NODICT;
13512 } else {
13513 ctxt->dictNames = 1;
13514 }
13515 if (options & XML_PARSE_NOCDATA) {
13516 ctxt->sax->cdataBlock = NULL;
13517 options -= XML_PARSE_NOCDATA;
13518 }
13519 if (options & XML_PARSE_NSCLEAN) {
13520 ctxt->options |= XML_PARSE_NSCLEAN;
13521 options -= XML_PARSE_NSCLEAN;
13522 }
13523 if (options & XML_PARSE_NONET) {
13524 ctxt->options |= XML_PARSE_NONET;
13525 options -= XML_PARSE_NONET;
13526 }
13527 if (options & XML_PARSE_COMPACT) {
13528 ctxt->options |= XML_PARSE_COMPACT;
13529 options -= XML_PARSE_COMPACT;
13530 }
13531 ctxt->linenumbers = 1;
13532 return (options);
13533}
13534
13535/**
13536 * xmlDoRead:
13537 * @ctxt: an XML parser context
13538 * @URL: the base URL to use for the document
13539 * @encoding: the document encoding, or NULL
13540 * @options: a combination of xmlParserOption
13541 * @reuse: keep the context for reuse
13542 *
13543 * Common front-end for the xmlRead functions
13544 *
13545 * Returns the resulting document tree or NULL
13546 */
13547static xmlDocPtr
13548xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
13549 int options, int reuse)
13550{
13551 xmlDocPtr ret;
13552
13553 xmlCtxtUseOptions(ctxt, options);
13554 if (encoding != NULL) {
13555 xmlCharEncodingHandlerPtr hdlr;
13556
13557 hdlr = xmlFindCharEncodingHandler(encoding);
13558 if (hdlr != NULL)
13559 xmlSwitchToEncoding(ctxt, hdlr);
13560 }
13561 if ((URL != NULL) && (ctxt->input != NULL) &&
13562 (ctxt->input->filename == NULL))
13563 ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
13564 xmlParseDocument(ctxt);
13565 if ((ctxt->wellFormed) || ctxt->recovery)
13566 ret = ctxt->myDoc;
13567 else {
13568 ret = NULL;
13569 if (ctxt->myDoc != NULL) {
13570 xmlFreeDoc(ctxt->myDoc);
13571 }
13572 }
13573 ctxt->myDoc = NULL;
13574 if (!reuse) {
13575 xmlFreeParserCtxt(ctxt);
13576 }
13577
13578 return (ret);
13579}
13580
13581/**
13582 * xmlReadDoc:
13583 * @cur: a pointer to a zero terminated string
13584 * @URL: the base URL to use for the document
13585 * @encoding: the document encoding, or NULL
13586 * @options: a combination of xmlParserOption
13587 *
13588 * parse an XML in-memory document and build a tree.
13589 *
13590 * Returns the resulting document tree
13591 */
13592xmlDocPtr
13593xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
13594{
13595 xmlParserCtxtPtr ctxt;
13596
13597 if (cur == NULL)
13598 return (NULL);
13599
13600 ctxt = xmlCreateDocParserCtxt(cur);
13601 if (ctxt == NULL)
13602 return (NULL);
13603 return (xmlDoRead(ctxt, URL, encoding, options, 0));
13604}
13605
13606/**
13607 * xmlReadFile:
13608 * @filename: a file or URL
13609 * @encoding: the document encoding, or NULL
13610 * @options: a combination of xmlParserOption
13611 *
13612 * parse an XML file from the filesystem or the network.
13613 *
13614 * Returns the resulting document tree
13615 */
13616xmlDocPtr
13617xmlReadFile(const char *filename, const char *encoding, int options)
13618{
13619 xmlParserCtxtPtr ctxt;
13620
13621 ctxt = xmlCreateURLParserCtxt(filename, options);
13622 if (ctxt == NULL)
13623 return (NULL);
13624 return (xmlDoRead(ctxt, NULL, encoding, options, 0));
13625}
13626
13627/**
13628 * xmlReadMemory:
13629 * @buffer: a pointer to a char array
13630 * @size: the size of the array
13631 * @URL: the base URL to use for the document
13632 * @encoding: the document encoding, or NULL
13633 * @options: a combination of xmlParserOption
13634 *
13635 * parse an XML in-memory document and build a tree.
13636 *
13637 * Returns the resulting document tree
13638 */
13639xmlDocPtr
13640xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
13641{
13642 xmlParserCtxtPtr ctxt;
13643
13644 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
13645 if (ctxt == NULL)
13646 return (NULL);
13647 return (xmlDoRead(ctxt, URL, encoding, options, 0));
13648}
13649
13650/**
13651 * xmlReadFd:
13652 * @fd: an open file descriptor
13653 * @URL: the base URL to use for the document
13654 * @encoding: the document encoding, or NULL
13655 * @options: a combination of xmlParserOption
13656 *
13657 * parse an XML from a file descriptor and build a tree.
13658 * NOTE that the file descriptor will not be closed when the
13659 * reader is closed or reset.
13660 *
13661 * Returns the resulting document tree
13662 */
13663xmlDocPtr
13664xmlReadFd(int fd, const char *URL, const char *encoding, int options)
13665{
13666 xmlParserCtxtPtr ctxt;
13667 xmlParserInputBufferPtr input;
13668 xmlParserInputPtr stream;
13669
13670 if (fd < 0)
13671 return (NULL);
13672
13673 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
13674 if (input == NULL)
13675 return (NULL);
13676 input->closecallback = NULL;
13677 ctxt = xmlNewParserCtxt();
13678 if (ctxt == NULL) {
13679 xmlFreeParserInputBuffer(input);
13680 return (NULL);
13681 }
13682 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
13683 if (stream == NULL) {
13684 xmlFreeParserInputBuffer(input);
13685 xmlFreeParserCtxt(ctxt);
13686 return (NULL);
13687 }
13688 inputPush(ctxt, stream);
13689 return (xmlDoRead(ctxt, URL, encoding, options, 0));
13690}
13691
13692/**
13693 * xmlReadIO:
13694 * @ioread: an I/O read function
13695 * @ioclose: an I/O close function
13696 * @ioctx: an I/O handler
13697 * @URL: the base URL to use for the document
13698 * @encoding: the document encoding, or NULL
13699 * @options: a combination of xmlParserOption
13700 *
13701 * parse an XML document from I/O functions and source and build a tree.
13702 *
13703 * Returns the resulting document tree
13704 */
13705xmlDocPtr
13706xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
13707 void *ioctx, const char *URL, const char *encoding, int options)
13708{
13709 xmlParserCtxtPtr ctxt;
13710 xmlParserInputBufferPtr input;
13711 xmlParserInputPtr stream;
13712
13713 if (ioread == NULL)
13714 return (NULL);
13715
13716 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
13717 XML_CHAR_ENCODING_NONE);
13718 if (input == NULL)
13719 return (NULL);
13720 ctxt = xmlNewParserCtxt();
13721 if (ctxt == NULL) {
13722 xmlFreeParserInputBuffer(input);
13723 return (NULL);
13724 }
13725 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
13726 if (stream == NULL) {
13727 xmlFreeParserInputBuffer(input);
13728 xmlFreeParserCtxt(ctxt);
13729 return (NULL);
13730 }
13731 inputPush(ctxt, stream);
13732 return (xmlDoRead(ctxt, URL, encoding, options, 0));
13733}
13734
13735/**
13736 * xmlCtxtReadDoc:
13737 * @ctxt: an XML parser context
13738 * @cur: a pointer to a zero terminated string
13739 * @URL: the base URL to use for the document
13740 * @encoding: the document encoding, or NULL
13741 * @options: a combination of xmlParserOption
13742 *
13743 * parse an XML in-memory document and build a tree.
13744 * This reuses the existing @ctxt parser context
13745 *
13746 * Returns the resulting document tree
13747 */
13748xmlDocPtr
13749xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
13750 const char *URL, const char *encoding, int options)
13751{
13752 xmlParserInputPtr stream;
13753
13754 if (cur == NULL)
13755 return (NULL);
13756 if (ctxt == NULL)
13757 return (NULL);
13758
13759 xmlCtxtReset(ctxt);
13760
13761 stream = xmlNewStringInputStream(ctxt, cur);
13762 if (stream == NULL) {
13763 return (NULL);
13764 }
13765 inputPush(ctxt, stream);
13766 return (xmlDoRead(ctxt, URL, encoding, options, 1));
13767}
13768
13769/**
13770 * xmlCtxtReadFile:
13771 * @ctxt: an XML parser context
13772 * @filename: a file or URL
13773 * @encoding: the document encoding, or NULL
13774 * @options: a combination of xmlParserOption
13775 *
13776 * parse an XML file from the filesystem or the network.
13777 * This reuses the existing @ctxt parser context
13778 *
13779 * Returns the resulting document tree
13780 */
13781xmlDocPtr
13782xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
13783 const char *encoding, int options)
13784{
13785 xmlParserInputPtr stream;
13786
13787 if (filename == NULL)
13788 return (NULL);
13789 if (ctxt == NULL)
13790 return (NULL);
13791
13792 xmlCtxtReset(ctxt);
13793
13794 stream = xmlLoadExternalEntity(filename, NULL, ctxt);
13795 if (stream == NULL) {
13796 return (NULL);
13797 }
13798 inputPush(ctxt, stream);
13799 return (xmlDoRead(ctxt, NULL, encoding, options, 1));
13800}
13801
13802/**
13803 * xmlCtxtReadMemory:
13804 * @ctxt: an XML parser context
13805 * @buffer: a pointer to a char array
13806 * @size: the size of the array
13807 * @URL: the base URL to use for the document
13808 * @encoding: the document encoding, or NULL
13809 * @options: a combination of xmlParserOption
13810 *
13811 * parse an XML in-memory document and build a tree.
13812 * This reuses the existing @ctxt parser context
13813 *
13814 * Returns the resulting document tree
13815 */
13816xmlDocPtr
13817xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
13818 const char *URL, const char *encoding, int options)
13819{
13820 xmlParserInputBufferPtr input;
13821 xmlParserInputPtr stream;
13822
13823 if (ctxt == NULL)
13824 return (NULL);
13825 if (buffer == NULL)
13826 return (NULL);
13827
13828 xmlCtxtReset(ctxt);
13829
13830 input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
13831 if (input == NULL) {
13832 return(NULL);
13833 }
13834
13835 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
13836 if (stream == NULL) {
13837 xmlFreeParserInputBuffer(input);
13838 return(NULL);
13839 }
13840
13841 inputPush(ctxt, stream);
13842 return (xmlDoRead(ctxt, URL, encoding, options, 1));
13843}
13844
13845/**
13846 * xmlCtxtReadFd:
13847 * @ctxt: an XML parser context
13848 * @fd: an open file descriptor
13849 * @URL: the base URL to use for the document
13850 * @encoding: the document encoding, or NULL
13851 * @options: a combination of xmlParserOption
13852 *
13853 * parse an XML from a file descriptor and build a tree.
13854 * This reuses the existing @ctxt parser context
13855 * NOTE that the file descriptor will not be closed when the
13856 * reader is closed or reset.
13857 *
13858 * Returns the resulting document tree
13859 */
13860xmlDocPtr
13861xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
13862 const char *URL, const char *encoding, int options)
13863{
13864 xmlParserInputBufferPtr input;
13865 xmlParserInputPtr stream;
13866
13867 if (fd < 0)
13868 return (NULL);
13869 if (ctxt == NULL)
13870 return (NULL);
13871
13872 xmlCtxtReset(ctxt);
13873
13874
13875 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
13876 if (input == NULL)
13877 return (NULL);
13878 input->closecallback = NULL;
13879 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
13880 if (stream == NULL) {
13881 xmlFreeParserInputBuffer(input);
13882 return (NULL);
13883 }
13884 inputPush(ctxt, stream);
13885 return (xmlDoRead(ctxt, URL, encoding, options, 1));
13886}
13887
13888/**
13889 * xmlCtxtReadIO:
13890 * @ctxt: an XML parser context
13891 * @ioread: an I/O read function
13892 * @ioclose: an I/O close function
13893 * @ioctx: an I/O handler
13894 * @URL: the base URL to use for the document
13895 * @encoding: the document encoding, or NULL
13896 * @options: a combination of xmlParserOption
13897 *
13898 * parse an XML document from I/O functions and source and build a tree.
13899 * This reuses the existing @ctxt parser context
13900 *
13901 * Returns the resulting document tree
13902 */
13903xmlDocPtr
13904xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
13905 xmlInputCloseCallback ioclose, void *ioctx,
13906 const char *URL,
13907 const char *encoding, int options)
13908{
13909 xmlParserInputBufferPtr input;
13910 xmlParserInputPtr stream;
13911
13912 if (ioread == NULL)
13913 return (NULL);
13914 if (ctxt == NULL)
13915 return (NULL);
13916
13917 xmlCtxtReset(ctxt);
13918
13919 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
13920 XML_CHAR_ENCODING_NONE);
13921 if (input == NULL)
13922 return (NULL);
13923 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
13924 if (stream == NULL) {
13925 xmlFreeParserInputBuffer(input);
13926 return (NULL);
13927 }
13928 inputPush(ctxt, stream);
13929 return (xmlDoRead(ctxt, URL, encoding, options, 1));
13930}
13931
13932#define bottom_parser
13933#include "elfgcchack.h"
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette