1 | <?xml version="1.0" encoding="UTF-8"?>
|
---|
2 | <html>
|
---|
3 | <head>
|
---|
4 | <meta http-equiv="Content-Type" content="text/html; charset=UTF-8"/>
|
---|
5 | <title>HTMLparser: interface for an HTML 4.0 non-verifying parser</title>
|
---|
6 | <meta name="generator" content="Libxml2 devhelp stylesheet"/>
|
---|
7 | <link rel="start" href="index.html" title="libxml2 Reference Manual"/>
|
---|
8 | <link rel="up" href="general.html" title="API"/>
|
---|
9 | <link rel="stylesheet" href="style.css" type="text/css"/>
|
---|
10 | <link rel="chapter" href="general.html" title="API"/>
|
---|
11 | </head>
|
---|
12 | <body bgcolor="white" text="black" link="#0000FF" vlink="#840084" alink="#0000FF">
|
---|
13 | <table class="navigation" width="100%" summary="Navigation header" cellpadding="2" cellspacing="2">
|
---|
14 | <tr valign="middle">
|
---|
15 | <td>
|
---|
16 | <a accesskey="p" href="libxml2-DOCBparser.html">
|
---|
17 | <img src="left.png" width="24" height="24" border="0" alt="Prev"/>
|
---|
18 | </a>
|
---|
19 | </td>
|
---|
20 | <td>
|
---|
21 | <a accesskey="u" href="general.html">
|
---|
22 | <img src="up.png" width="24" height="24" border="0" alt="Up"/>
|
---|
23 | </a>
|
---|
24 | </td>
|
---|
25 | <td>
|
---|
26 | <a accesskey="h" href="index.html">
|
---|
27 | <img src="home.png" width="24" height="24" border="0" alt="Home"/>
|
---|
28 | </a>
|
---|
29 | </td>
|
---|
30 | <td>
|
---|
31 | <a accesskey="n" href="libxml2-HTMLtree.html">
|
---|
32 | <img src="right.png" width="24" height="24" border="0" alt="Next"/>
|
---|
33 | </a>
|
---|
34 | </td>
|
---|
35 | <th width="100%" align="center">libxml2 Reference Manual</th>
|
---|
36 | </tr>
|
---|
37 | </table>
|
---|
38 | <h2>
|
---|
39 | <span class="refentrytitle">HTMLparser</span>
|
---|
40 | </h2>
|
---|
41 | <p>HTMLparser - interface for an HTML 4.0 non-verifying parser</p>
|
---|
42 | <p>this module implements an HTML 4.0 non-verifying parser with API compatible with the XML parser ones. It should be able to parse "real world" HTML, even if severely broken from a specification point of view. </p>
|
---|
43 | <p>Author(s): Daniel Veillard </p>
|
---|
44 | <div class="refsynopsisdiv">
|
---|
45 | <h2>Synopsis</h2>
|
---|
46 | <pre class="synopsis">#define <a href="#htmlDefaultSubelement">htmlDefaultSubelement</a>(elt);
|
---|
47 | #define <a href="#htmlElementAllowedHereDesc">htmlElementAllowedHereDesc</a>(parent, elt);
|
---|
48 | #define <a href="#htmlRequiredAttrs">htmlRequiredAttrs</a>(elt);
|
---|
49 | typedef <a href="libxml2-parser.html#xmlParserNodeInfo">xmlParserNodeInfo</a> <a href="#htmlParserNodeInfo">htmlParserNodeInfo</a>;
|
---|
50 | typedef <a href="libxml2-tree.html#xmlParserInput">xmlParserInput</a> <a href="#htmlParserInput">htmlParserInput</a>;
|
---|
51 | typedef <a href="libxml2-tree.html#xmlParserCtxtPtr">xmlParserCtxtPtr</a> <a href="#htmlParserCtxtPtr">htmlParserCtxtPtr</a>;
|
---|
52 | typedef struct _htmlEntityDesc <a href="#htmlEntityDesc">htmlEntityDesc</a>;
|
---|
53 | typedef <a href="libxml2-tree.html#xmlDocPtr">xmlDocPtr</a> <a href="#htmlDocPtr">htmlDocPtr</a>;
|
---|
54 | typedef <a href="libxml2-tree.html#xmlSAXHandlerPtr">xmlSAXHandlerPtr</a> <a href="#htmlSAXHandlerPtr">htmlSAXHandlerPtr</a>;
|
---|
55 | typedef enum <a href="#htmlStatus">htmlStatus</a>;
|
---|
56 | typedef <a href="libxml2-tree.html#xmlNodePtr">xmlNodePtr</a> <a href="#htmlNodePtr">htmlNodePtr</a>;
|
---|
57 | typedef <a href="libxml2-HTMLparser.html#htmlElemDesc">htmlElemDesc</a> * <a href="#htmlElemDescPtr">htmlElemDescPtr</a>;
|
---|
58 | typedef struct _htmlElemDesc <a href="#htmlElemDesc">htmlElemDesc</a>;
|
---|
59 | typedef <a href="libxml2-tree.html#xmlSAXHandler">xmlSAXHandler</a> <a href="#htmlSAXHandler">htmlSAXHandler</a>;
|
---|
60 | typedef <a href="libxml2-tree.html#xmlParserInputPtr">xmlParserInputPtr</a> <a href="#htmlParserInputPtr">htmlParserInputPtr</a>;
|
---|
61 | typedef enum <a href="#htmlParserOption">htmlParserOption</a>;
|
---|
62 | typedef <a href="libxml2-HTMLparser.html#htmlEntityDesc">htmlEntityDesc</a> * <a href="#htmlEntityDescPtr">htmlEntityDescPtr</a>;
|
---|
63 | typedef <a href="libxml2-tree.html#xmlParserCtxt">xmlParserCtxt</a> <a href="#htmlParserCtxt">htmlParserCtxt</a>;
|
---|
64 | int <a href="#htmlIsScriptAttribute">htmlIsScriptAttribute</a> (const <a href="libxml2-xmlstring.html#xmlChar">xmlChar</a> * name);
|
---|
65 | int <a href="#htmlHandleOmittedElem">htmlHandleOmittedElem</a> (int val);
|
---|
66 | <a href="libxml2-HTMLparser.html#htmlDocPtr">htmlDocPtr</a> <a href="#htmlReadFd">htmlReadFd</a> (int fd, <br/> const char * URL, <br/> const char * encoding, <br/> int options);
|
---|
67 | <a href="libxml2-HTMLparser.html#htmlDocPtr">htmlDocPtr</a> <a href="#htmlReadIO">htmlReadIO</a> (<a href="libxml2-xmlIO.html#xmlInputReadCallback">xmlInputReadCallback</a> ioread, <br/> <a href="libxml2-xmlIO.html#xmlInputCloseCallback">xmlInputCloseCallback</a> ioclose, <br/> void * ioctx, <br/> const char * URL, <br/> const char * encoding, <br/> int options);
|
---|
68 | <a href="libxml2-HTMLparser.html#htmlDocPtr">htmlDocPtr</a> <a href="#htmlParseFile">htmlParseFile</a> (const char * filename, <br/> const char * encoding);
|
---|
69 | <a href="libxml2-HTMLparser.html#htmlDocPtr">htmlDocPtr</a> <a href="#htmlCtxtReadDoc">htmlCtxtReadDoc</a> (<a href="libxml2-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr</a> ctxt, <br/> const <a href="libxml2-xmlstring.html#xmlChar">xmlChar</a> * cur, <br/> const char * URL, <br/> const char * encoding, <br/> int options);
|
---|
70 | int <a href="#htmlAutoCloseTag">htmlAutoCloseTag</a> (<a href="libxml2-HTMLparser.html#htmlDocPtr">htmlDocPtr</a> doc, <br/> const <a href="libxml2-xmlstring.html#xmlChar">xmlChar</a> * name, <br/> <a href="libxml2-HTMLparser.html#htmlNodePtr">htmlNodePtr</a> elem);
|
---|
71 | int <a href="#htmlParseChunk">htmlParseChunk</a> (<a href="libxml2-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr</a> ctxt, <br/> const char * chunk, <br/> int size, <br/> int terminate);
|
---|
72 | const <a href="libxml2-HTMLparser.html#htmlElemDesc">htmlElemDesc</a> * <a href="#htmlTagLookup">htmlTagLookup</a> (const <a href="libxml2-xmlstring.html#xmlChar">xmlChar</a> * tag);
|
---|
73 | <a href="libxml2-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr</a> <a href="#htmlCreateMemoryParserCtxt">htmlCreateMemoryParserCtxt</a> (const char * buffer, <br/> int size);
|
---|
74 | void <a href="#htmlCtxtReset">htmlCtxtReset</a> (<a href="libxml2-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr</a> ctxt);
|
---|
75 | int <a href="#htmlElementAllowedHere">htmlElementAllowedHere</a> (const <a href="libxml2-HTMLparser.html#htmlElemDesc">htmlElemDesc</a> * parent, <br/> const <a href="libxml2-xmlstring.html#xmlChar">xmlChar</a> * elt);
|
---|
76 | <a href="libxml2-HTMLparser.html#htmlDocPtr">htmlDocPtr</a> <a href="#htmlCtxtReadIO">htmlCtxtReadIO</a> (<a href="libxml2-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr</a> ctxt, <br/> <a href="libxml2-xmlIO.html#xmlInputReadCallback">xmlInputReadCallback</a> ioread, <br/> <a href="libxml2-xmlIO.html#xmlInputCloseCallback">xmlInputCloseCallback</a> ioclose, <br/> void * ioctx, <br/> const char * URL, <br/> const char * encoding, <br/> int options);
|
---|
77 | <a href="libxml2-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr</a> <a href="#htmlCreatePushParserCtxt">htmlCreatePushParserCtxt</a> (<a href="libxml2-HTMLparser.html#htmlSAXHandlerPtr">htmlSAXHandlerPtr</a> sax, <br/> void * user_data, <br/> const char * chunk, <br/> int size, <br/> const char * filename, <br/> <a href="libxml2-encoding.html#xmlCharEncoding">xmlCharEncoding</a> enc);
|
---|
78 | <a href="libxml2-HTMLparser.html#htmlDocPtr">htmlDocPtr</a> <a href="#htmlReadMemory">htmlReadMemory</a> (const char * buffer, <br/> int size, <br/> const char * URL, <br/> const char * encoding, <br/> int options);
|
---|
79 | int <a href="#htmlIsAutoClosed">htmlIsAutoClosed</a> (<a href="libxml2-HTMLparser.html#htmlDocPtr">htmlDocPtr</a> doc, <br/> <a href="libxml2-HTMLparser.html#htmlNodePtr">htmlNodePtr</a> elem);
|
---|
80 | int <a href="#htmlParseCharRef">htmlParseCharRef</a> (<a href="libxml2-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr</a> ctxt);
|
---|
81 | <a href="libxml2-HTMLparser.html#htmlDocPtr">htmlDocPtr</a> <a href="#htmlReadDoc">htmlReadDoc</a> (const <a href="libxml2-xmlstring.html#xmlChar">xmlChar</a> * cur, <br/> const char * URL, <br/> const char * encoding, <br/> int options);
|
---|
82 | int <a href="#htmlEncodeEntities">htmlEncodeEntities</a> (unsigned char * out, <br/> int * outlen, <br/> const unsigned char * in, <br/> int * inlen, <br/> int quoteChar);
|
---|
83 | <a href="libxml2-HTMLparser.html#htmlStatus">htmlStatus</a> <a href="#htmlNodeStatus">htmlNodeStatus</a> (const <a href="libxml2-HTMLparser.html#htmlNodePtr">htmlNodePtr</a> node, <br/> int legacy);
|
---|
84 | <a href="libxml2-HTMLparser.html#htmlStatus">htmlStatus</a> <a href="#htmlAttrAllowed">htmlAttrAllowed</a> (const <a href="libxml2-HTMLparser.html#htmlElemDesc">htmlElemDesc</a> * elt, <br/> const <a href="libxml2-xmlstring.html#xmlChar">xmlChar</a> * attr, <br/> int legacy);
|
---|
85 | <a href="libxml2-HTMLparser.html#htmlDocPtr">htmlDocPtr</a> <a href="#htmlSAXParseFile">htmlSAXParseFile</a> (const char * filename, <br/> const char * encoding, <br/> <a href="libxml2-HTMLparser.html#htmlSAXHandlerPtr">htmlSAXHandlerPtr</a> sax, <br/> void * userData);
|
---|
86 | const <a href="libxml2-HTMLparser.html#htmlEntityDesc">htmlEntityDesc</a> * <a href="#htmlParseEntityRef">htmlParseEntityRef</a> (<a href="libxml2-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr</a> ctxt, <br/> const <a href="libxml2-xmlstring.html#xmlChar">xmlChar</a> ** str);
|
---|
87 | <a href="libxml2-HTMLparser.html#htmlStatus">htmlStatus</a> <a href="#htmlElementStatusHere">htmlElementStatusHere</a> (const <a href="libxml2-HTMLparser.html#htmlElemDesc">htmlElemDesc</a> * parent, <br/> const <a href="libxml2-HTMLparser.html#htmlElemDesc">htmlElemDesc</a> * elt);
|
---|
88 | const <a href="libxml2-HTMLparser.html#htmlEntityDesc">htmlEntityDesc</a> * <a href="#htmlEntityValueLookup">htmlEntityValueLookup</a> (unsigned int value);
|
---|
89 | void <a href="#htmlParseElement">htmlParseElement</a> (<a href="libxml2-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr</a> ctxt);
|
---|
90 | int <a href="#UTF8ToHtml">UTF8ToHtml</a> (unsigned char * out, <br/> int * outlen, <br/> const unsigned char * in, <br/> int * inlen);
|
---|
91 | const <a href="libxml2-HTMLparser.html#htmlEntityDesc">htmlEntityDesc</a> * <a href="#htmlEntityLookup">htmlEntityLookup</a> (const <a href="libxml2-xmlstring.html#xmlChar">xmlChar</a> * name);
|
---|
92 | void <a href="#htmlFreeParserCtxt">htmlFreeParserCtxt</a> (<a href="libxml2-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr</a> ctxt);
|
---|
93 | <a href="libxml2-HTMLparser.html#htmlDocPtr">htmlDocPtr</a> <a href="#htmlCtxtReadMemory">htmlCtxtReadMemory</a> (<a href="libxml2-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr</a> ctxt, <br/> const char * buffer, <br/> int size, <br/> const char * URL, <br/> const char * encoding, <br/> int options);
|
---|
94 | <a href="libxml2-HTMLparser.html#htmlDocPtr">htmlDocPtr</a> <a href="#htmlCtxtReadFd">htmlCtxtReadFd</a> (<a href="libxml2-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr</a> ctxt, <br/> int fd, <br/> const char * URL, <br/> const char * encoding, <br/> int options);
|
---|
95 | <a href="libxml2-HTMLparser.html#htmlDocPtr">htmlDocPtr</a> <a href="#htmlReadFile">htmlReadFile</a> (const char * filename, <br/> const char * encoding, <br/> int options);
|
---|
96 | <a href="libxml2-HTMLparser.html#htmlDocPtr">htmlDocPtr</a> <a href="#htmlCtxtReadFile">htmlCtxtReadFile</a> (<a href="libxml2-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr</a> ctxt, <br/> const char * filename, <br/> const char * encoding, <br/> int options);
|
---|
97 | int <a href="#htmlParseDocument">htmlParseDocument</a> (<a href="libxml2-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr</a> ctxt);
|
---|
98 | <a href="libxml2-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr</a> <a href="#htmlNewParserCtxt">htmlNewParserCtxt</a> (void);
|
---|
99 | <a href="libxml2-HTMLparser.html#htmlDocPtr">htmlDocPtr</a> <a href="#htmlSAXParseDoc">htmlSAXParseDoc</a> (<a href="libxml2-xmlstring.html#xmlChar">xmlChar</a> * cur, <br/> const char * encoding, <br/> <a href="libxml2-HTMLparser.html#htmlSAXHandlerPtr">htmlSAXHandlerPtr</a> sax, <br/> void * userData);
|
---|
100 | int <a href="#htmlCtxtUseOptions">htmlCtxtUseOptions</a> (<a href="libxml2-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr</a> ctxt, <br/> int options);
|
---|
101 | <a href="libxml2-HTMLparser.html#htmlDocPtr">htmlDocPtr</a> <a href="#htmlParseDoc">htmlParseDoc</a> (<a href="libxml2-xmlstring.html#xmlChar">xmlChar</a> * cur, <br/> const char * encoding);
|
---|
102 | </pre>
|
---|
103 | </div>
|
---|
104 | <div class="refsect1" lang="en">
|
---|
105 | <h2>Description</h2>
|
---|
106 | </div>
|
---|
107 | <div class="refsect1" lang="en">
|
---|
108 | <h2>Details</h2>
|
---|
109 | <div class="refsect2" lang="en">
|
---|
110 | <div class="refsect2" lang="en"><h3><a name="htmlDefaultSubelement">Macro </a>htmlDefaultSubelement</h3><pre class="programlisting">#define <a href="#htmlDefaultSubelement">htmlDefaultSubelement</a>(elt);
|
---|
111 | </pre><p>Returns the default subelement for this element</p><div class="variablelist"><table border="0"><col align="left"/><tbody><tr><td><span class="term"><i><tt>elt</tt></i>:</span></td><td>HTML element</td></tr></tbody></table></div>
|
---|
112 | </div>
|
---|
113 | <hr/>
|
---|
114 | <div class="refsect2" lang="en"><h3><a name="htmlElementAllowedHereDesc">Macro </a>htmlElementAllowedHereDesc</h3><pre class="programlisting">#define <a href="#htmlElementAllowedHereDesc">htmlElementAllowedHereDesc</a>(parent, elt);
|
---|
115 | </pre><p>Checks whether an HTML element description may be a direct child of the specified element. Returns 1 if allowed; 0 otherwise.</p><div class="variablelist"><table border="0"><col align="left"/><tbody><tr><td><span class="term"><i><tt>parent</tt></i>:</span></td><td>HTML parent element</td></tr><tr><td><span class="term"><i><tt>elt</tt></i>:</span></td><td>HTML element</td></tr></tbody></table></div>
|
---|
116 | </div>
|
---|
117 | <hr/>
|
---|
118 | <div class="refsect2" lang="en"><h3><a name="htmlRequiredAttrs">Macro </a>htmlRequiredAttrs</h3><pre class="programlisting">#define <a href="#htmlRequiredAttrs">htmlRequiredAttrs</a>(elt);
|
---|
119 | </pre><p>Returns the attributes required for the specified element.</p><div class="variablelist"><table border="0"><col align="left"/><tbody><tr><td><span class="term"><i><tt>elt</tt></i>:</span></td><td>HTML element</td></tr></tbody></table></div>
|
---|
120 | </div>
|
---|
121 | <hr/>
|
---|
122 | <div class="refsect2" lang="en"><h3><a name="htmlDocPtr">Typedef </a>htmlDocPtr</h3><pre class="programlisting"><a href="libxml2-tree.html#xmlDocPtr">xmlDocPtr</a> htmlDocPtr;
|
---|
123 | </pre><p/>
|
---|
124 | </div>
|
---|
125 | <hr/>
|
---|
126 | <div class="refsect2" lang="en"><h3><a name="htmlElemDesc">Structure </a>htmlElemDesc</h3><pre class="programlisting">struct _htmlElemDesc {
|
---|
127 | const char * name : The tag name
|
---|
128 | char startTag : Whether the start tag can be implied
|
---|
129 | char endTag : Whether the end tag can be implied
|
---|
130 | char saveEndTag : Whether the end tag should be saved
|
---|
131 | char empty : Is this an empty element ?
|
---|
132 | char depr : Is this a deprecated element ?
|
---|
133 | char dtd : 1: only in Loose DTD, 2: only Frameset one
|
---|
134 | char isinline : is this a block 0 or inline 1 element
|
---|
135 | const char * desc : the description NRK Jan.2003 * New fields encapsulating HTML structur
|
---|
136 | const char ** subelts : allowed sub-elements of this element
|
---|
137 | const char * defaultsubelt : subelement for suggested auto-repair if necessary or NULL
|
---|
138 | const char ** attrs_opt : Optional Attributes
|
---|
139 | const char ** attrs_depr : Additional deprecated attributes
|
---|
140 | const char ** attrs_req : Required attributes
|
---|
141 | } htmlElemDesc;
|
---|
142 | </pre><p/>
|
---|
143 | </div>
|
---|
144 | <hr/>
|
---|
145 | <div class="refsect2" lang="en"><h3><a name="htmlElemDescPtr">Typedef </a>htmlElemDescPtr</h3><pre class="programlisting"><a href="libxml2-HTMLparser.html#htmlElemDesc">htmlElemDesc</a> * htmlElemDescPtr;
|
---|
146 | </pre><p/>
|
---|
147 | </div>
|
---|
148 | <hr/>
|
---|
149 | <div class="refsect2" lang="en"><h3><a name="htmlEntityDesc">Structure </a>htmlEntityDesc</h3><pre class="programlisting">struct _htmlEntityDesc {
|
---|
150 | unsigned int value : the UNICODE value for the character
|
---|
151 | const char * name : The entity name
|
---|
152 | const char * desc : the description
|
---|
153 | } htmlEntityDesc;
|
---|
154 | </pre><p/>
|
---|
155 | </div>
|
---|
156 | <hr/>
|
---|
157 | <div class="refsect2" lang="en"><h3><a name="htmlEntityDescPtr">Typedef </a>htmlEntityDescPtr</h3><pre class="programlisting"><a href="libxml2-HTMLparser.html#htmlEntityDesc">htmlEntityDesc</a> * htmlEntityDescPtr;
|
---|
158 | </pre><p/>
|
---|
159 | </div>
|
---|
160 | <hr/>
|
---|
161 | <div class="refsect2" lang="en"><h3><a name="htmlNodePtr">Typedef </a>htmlNodePtr</h3><pre class="programlisting"><a href="libxml2-tree.html#xmlNodePtr">xmlNodePtr</a> htmlNodePtr;
|
---|
162 | </pre><p/>
|
---|
163 | </div>
|
---|
164 | <hr/>
|
---|
165 | <div class="refsect2" lang="en"><h3><a name="htmlParserCtxt">Typedef </a>htmlParserCtxt</h3><pre class="programlisting"><a href="libxml2-tree.html#xmlParserCtxt">xmlParserCtxt</a> htmlParserCtxt;
|
---|
166 | </pre><p/>
|
---|
167 | </div>
|
---|
168 | <hr/>
|
---|
169 | <div class="refsect2" lang="en"><h3><a name="htmlParserCtxtPtr">Typedef </a>htmlParserCtxtPtr</h3><pre class="programlisting"><a href="libxml2-tree.html#xmlParserCtxtPtr">xmlParserCtxtPtr</a> htmlParserCtxtPtr;
|
---|
170 | </pre><p/>
|
---|
171 | </div>
|
---|
172 | <hr/>
|
---|
173 | <div class="refsect2" lang="en"><h3><a name="htmlParserInput">Typedef </a>htmlParserInput</h3><pre class="programlisting"><a href="libxml2-tree.html#xmlParserInput">xmlParserInput</a> htmlParserInput;
|
---|
174 | </pre><p/>
|
---|
175 | </div>
|
---|
176 | <hr/>
|
---|
177 | <div class="refsect2" lang="en"><h3><a name="htmlParserInputPtr">Typedef </a>htmlParserInputPtr</h3><pre class="programlisting"><a href="libxml2-tree.html#xmlParserInputPtr">xmlParserInputPtr</a> htmlParserInputPtr;
|
---|
178 | </pre><p/>
|
---|
179 | </div>
|
---|
180 | <hr/>
|
---|
181 | <div class="refsect2" lang="en"><h3><a name="htmlParserNodeInfo">Typedef </a>htmlParserNodeInfo</h3><pre class="programlisting"><a href="libxml2-parser.html#xmlParserNodeInfo">xmlParserNodeInfo</a> htmlParserNodeInfo;
|
---|
182 | </pre><p/>
|
---|
183 | </div>
|
---|
184 | <hr/>
|
---|
185 | <div class="refsect2" lang="en"><h3><a name="htmlParserOption">Enum </a>htmlParserOption</h3><pre class="programlisting">enum <a href="#htmlParserOption">htmlParserOption</a> {
|
---|
186 | <a name="HTML_PARSE_RECOVER">HTML_PARSE_RECOVER</a> = 1 /* Relaxed parsing */
|
---|
187 | <a name="HTML_PARSE_NODEFDTD">HTML_PARSE_NODEFDTD</a> = 4 /* do not default a doctype if not found */
|
---|
188 | <a name="HTML_PARSE_NOERROR">HTML_PARSE_NOERROR</a> = 32 /* suppress error reports */
|
---|
189 | <a name="HTML_PARSE_NOWARNING">HTML_PARSE_NOWARNING</a> = 64 /* suppress warning reports */
|
---|
190 | <a name="HTML_PARSE_PEDANTIC">HTML_PARSE_PEDANTIC</a> = 128 /* pedantic error reporting */
|
---|
191 | <a name="HTML_PARSE_NOBLANKS">HTML_PARSE_NOBLANKS</a> = 256 /* remove blank nodes */
|
---|
192 | <a name="HTML_PARSE_NONET">HTML_PARSE_NONET</a> = 2048 /* Forbid network access */
|
---|
193 | <a name="HTML_PARSE_NOIMPLIED">HTML_PARSE_NOIMPLIED</a> = 8192 /* Do not add implied html/body... elements */
|
---|
194 | <a name="HTML_PARSE_COMPACT">HTML_PARSE_COMPACT</a> = 65536 /* compact small text nodes */
|
---|
195 | <a name="HTML_PARSE_IGNORE_ENC">HTML_PARSE_IGNORE_ENC</a> = 2097152 /* ignore internal document encoding hint */
|
---|
196 | };
|
---|
197 | </pre><p/>
|
---|
198 | </div>
|
---|
199 | <hr/>
|
---|
200 | <div class="refsect2" lang="en"><h3><a name="htmlSAXHandler">Typedef </a>htmlSAXHandler</h3><pre class="programlisting"><a href="libxml2-tree.html#xmlSAXHandler">xmlSAXHandler</a> htmlSAXHandler;
|
---|
201 | </pre><p/>
|
---|
202 | </div>
|
---|
203 | <hr/>
|
---|
204 | <div class="refsect2" lang="en"><h3><a name="htmlSAXHandlerPtr">Typedef </a>htmlSAXHandlerPtr</h3><pre class="programlisting"><a href="libxml2-tree.html#xmlSAXHandlerPtr">xmlSAXHandlerPtr</a> htmlSAXHandlerPtr;
|
---|
205 | </pre><p/>
|
---|
206 | </div>
|
---|
207 | <hr/>
|
---|
208 | <div class="refsect2" lang="en"><h3><a name="htmlStatus">Enum </a>htmlStatus</h3><pre class="programlisting">enum <a href="#htmlStatus">htmlStatus</a> {
|
---|
209 | <a name="HTML_NA">HTML_NA</a> = 0 /* something we don't check at all */
|
---|
210 | <a name="HTML_INVALID">HTML_INVALID</a> = 1
|
---|
211 | <a name="HTML_DEPRECATED">HTML_DEPRECATED</a> = 2
|
---|
212 | <a name="HTML_VALID">HTML_VALID</a> = 4
|
---|
213 | <a name="HTML_REQUIRED">HTML_REQUIRED</a> = 12 /* VALID bit set so ( & HTML_VALID ) is TRUE */
|
---|
214 | };
|
---|
215 | </pre><p/>
|
---|
216 | </div>
|
---|
217 | <hr/>
|
---|
218 | <div class="refsect2" lang="en"><h3><a name="UTF8ToHtml"/>UTF8ToHtml ()</h3><pre class="programlisting">int UTF8ToHtml (unsigned char * out, <br/> int * outlen, <br/> const unsigned char * in, <br/> int * inlen)<br/>
|
---|
219 | </pre><p>Take a block of UTF-8 chars in and try to convert it to an ASCII plus HTML entities block of chars out.</p>
|
---|
220 | <div class="variablelist"><table border="0"><col align="left"/><tbody><tr><td><span class="term"><i><tt>out</tt></i>:</span></td><td>a pointer to an array of bytes to store the result</td></tr><tr><td><span class="term"><i><tt>outlen</tt></i>:</span></td><td>the length of @out</td></tr><tr><td><span class="term"><i><tt>in</tt></i>:</span></td><td>a pointer to an array of UTF-8 chars</td></tr><tr><td><span class="term"><i><tt>inlen</tt></i>:</span></td><td>the length of @in</td></tr><tr><td><span class="term"><i><tt>Returns</tt></i>:</span></td><td>0 if success, -2 if the transcoding fails, or -1 otherwise The value of @inlen after return is the number of octets consumed as the return value is positive, else unpredictable. The value of @outlen after return is the number of octets consumed.</td></tr></tbody></table></div></div>
|
---|
221 | <hr/>
|
---|
222 | <div class="refsect2" lang="en"><h3><a name="htmlAttrAllowed"/>htmlAttrAllowed ()</h3><pre class="programlisting"><a href="libxml2-HTMLparser.html#htmlStatus">htmlStatus</a> htmlAttrAllowed (const <a href="libxml2-HTMLparser.html#htmlElemDesc">htmlElemDesc</a> * elt, <br/> const <a href="libxml2-xmlstring.html#xmlChar">xmlChar</a> * attr, <br/> int legacy)<br/>
|
---|
223 | </pre><p>Checks whether an <a href="libxml2-SAX.html#attribute">attribute</a> is valid for an element Has full knowledge of Required and Deprecated attributes</p>
|
---|
224 | <div class="variablelist"><table border="0"><col align="left"/><tbody><tr><td><span class="term"><i><tt>elt</tt></i>:</span></td><td>HTML element</td></tr><tr><td><span class="term"><i><tt>attr</tt></i>:</span></td><td>HTML <a href="libxml2-SAX.html#attribute">attribute</a></td></tr><tr><td><span class="term"><i><tt>legacy</tt></i>:</span></td><td>whether to allow deprecated attributes</td></tr><tr><td><span class="term"><i><tt>Returns</tt></i>:</span></td><td>one of HTML_REQUIRED, HTML_VALID, HTML_DEPRECATED, <a href="libxml2-HTMLparser.html#HTML_INVALID">HTML_INVALID</a></td></tr></tbody></table></div></div>
|
---|
225 | <hr/>
|
---|
226 | <div class="refsect2" lang="en"><h3><a name="htmlAutoCloseTag"/>htmlAutoCloseTag ()</h3><pre class="programlisting">int htmlAutoCloseTag (<a href="libxml2-HTMLparser.html#htmlDocPtr">htmlDocPtr</a> doc, <br/> const <a href="libxml2-xmlstring.html#xmlChar">xmlChar</a> * name, <br/> <a href="libxml2-HTMLparser.html#htmlNodePtr">htmlNodePtr</a> elem)<br/>
|
---|
227 | </pre><p>The HTML DTD allows a tag to implicitly close other tags. The list is kept in htmlStartClose array. This function checks if the element or one of it's children would autoclose the given tag.</p>
|
---|
228 | <div class="variablelist"><table border="0"><col align="left"/><tbody><tr><td><span class="term"><i><tt>doc</tt></i>:</span></td><td>the HTML document</td></tr><tr><td><span class="term"><i><tt>name</tt></i>:</span></td><td>The tag name</td></tr><tr><td><span class="term"><i><tt>elem</tt></i>:</span></td><td>the HTML element</td></tr><tr><td><span class="term"><i><tt>Returns</tt></i>:</span></td><td>1 if autoclose, 0 otherwise</td></tr></tbody></table></div></div>
|
---|
229 | <hr/>
|
---|
230 | <div class="refsect2" lang="en"><h3><a name="htmlCreateMemoryParserCtxt"/>htmlCreateMemoryParserCtxt ()</h3><pre class="programlisting"><a href="libxml2-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr</a> htmlCreateMemoryParserCtxt (const char * buffer, <br/> int size)<br/>
|
---|
231 | </pre><p>Create a parser context for an HTML in-memory document.</p>
|
---|
232 | <div class="variablelist"><table border="0"><col align="left"/><tbody><tr><td><span class="term"><i><tt>buffer</tt></i>:</span></td><td>a pointer to a char array</td></tr><tr><td><span class="term"><i><tt>size</tt></i>:</span></td><td>the size of the array</td></tr><tr><td><span class="term"><i><tt>Returns</tt></i>:</span></td><td>the new parser context or NULL</td></tr></tbody></table></div></div>
|
---|
233 | <hr/>
|
---|
234 | <div class="refsect2" lang="en"><h3><a name="htmlCreatePushParserCtxt"/>htmlCreatePushParserCtxt ()</h3><pre class="programlisting"><a href="libxml2-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr</a> htmlCreatePushParserCtxt (<a href="libxml2-HTMLparser.html#htmlSAXHandlerPtr">htmlSAXHandlerPtr</a> sax, <br/> void * user_data, <br/> const char * chunk, <br/> int size, <br/> const char * filename, <br/> <a href="libxml2-encoding.html#xmlCharEncoding">xmlCharEncoding</a> enc)<br/>
|
---|
235 | </pre><p>Create a parser context for using the HTML parser in push mode The value of @filename is used for fetching external entities and error/warning reports.</p>
|
---|
236 | <div class="variablelist"><table border="0"><col align="left"/><tbody><tr><td><span class="term"><i><tt>sax</tt></i>:</span></td><td>a SAX handler</td></tr><tr><td><span class="term"><i><tt>user_data</tt></i>:</span></td><td>The user data returned on SAX callbacks</td></tr><tr><td><span class="term"><i><tt>chunk</tt></i>:</span></td><td>a pointer to an array of chars</td></tr><tr><td><span class="term"><i><tt>size</tt></i>:</span></td><td>number of chars in the array</td></tr><tr><td><span class="term"><i><tt>filename</tt></i>:</span></td><td>an optional file name or URI</td></tr><tr><td><span class="term"><i><tt>enc</tt></i>:</span></td><td>an optional encoding</td></tr><tr><td><span class="term"><i><tt>Returns</tt></i>:</span></td><td>the new parser context or NULL</td></tr></tbody></table></div></div>
|
---|
237 | <hr/>
|
---|
238 | <div class="refsect2" lang="en"><h3><a name="htmlCtxtReadDoc"/>htmlCtxtReadDoc ()</h3><pre class="programlisting"><a href="libxml2-HTMLparser.html#htmlDocPtr">htmlDocPtr</a> htmlCtxtReadDoc (<a href="libxml2-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr</a> ctxt, <br/> const <a href="libxml2-xmlstring.html#xmlChar">xmlChar</a> * cur, <br/> const char * URL, <br/> const char * encoding, <br/> int options)<br/>
|
---|
239 | </pre><p>parse an XML in-memory document and build a tree. This reuses the existing @ctxt parser context</p>
|
---|
240 | <div class="variablelist"><table border="0"><col align="left"/><tbody><tr><td><span class="term"><i><tt>ctxt</tt></i>:</span></td><td>an HTML parser context</td></tr><tr><td><span class="term"><i><tt>cur</tt></i>:</span></td><td>a pointer to a zero terminated string</td></tr><tr><td><span class="term"><i><tt>URL</tt></i>:</span></td><td>the base URL to use for the document</td></tr><tr><td><span class="term"><i><tt>encoding</tt></i>:</span></td><td>the document encoding, or NULL</td></tr><tr><td><span class="term"><i><tt>options</tt></i>:</span></td><td>a combination of htmlParserOption(s)</td></tr><tr><td><span class="term"><i><tt>Returns</tt></i>:</span></td><td>the resulting document tree</td></tr></tbody></table></div></div>
|
---|
241 | <hr/>
|
---|
242 | <div class="refsect2" lang="en"><h3><a name="htmlCtxtReadFd"/>htmlCtxtReadFd ()</h3><pre class="programlisting"><a href="libxml2-HTMLparser.html#htmlDocPtr">htmlDocPtr</a> htmlCtxtReadFd (<a href="libxml2-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr</a> ctxt, <br/> int fd, <br/> const char * URL, <br/> const char * encoding, <br/> int options)<br/>
|
---|
243 | </pre><p>parse an XML from a file descriptor and build a tree. This reuses the existing @ctxt parser context</p>
|
---|
244 | <div class="variablelist"><table border="0"><col align="left"/><tbody><tr><td><span class="term"><i><tt>ctxt</tt></i>:</span></td><td>an HTML parser context</td></tr><tr><td><span class="term"><i><tt>fd</tt></i>:</span></td><td>an open file descriptor</td></tr><tr><td><span class="term"><i><tt>URL</tt></i>:</span></td><td>the base URL to use for the document</td></tr><tr><td><span class="term"><i><tt>encoding</tt></i>:</span></td><td>the document encoding, or NULL</td></tr><tr><td><span class="term"><i><tt>options</tt></i>:</span></td><td>a combination of htmlParserOption(s)</td></tr><tr><td><span class="term"><i><tt>Returns</tt></i>:</span></td><td>the resulting document tree</td></tr></tbody></table></div></div>
|
---|
245 | <hr/>
|
---|
246 | <div class="refsect2" lang="en"><h3><a name="htmlCtxtReadFile"/>htmlCtxtReadFile ()</h3><pre class="programlisting"><a href="libxml2-HTMLparser.html#htmlDocPtr">htmlDocPtr</a> htmlCtxtReadFile (<a href="libxml2-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr</a> ctxt, <br/> const char * filename, <br/> const char * encoding, <br/> int options)<br/>
|
---|
247 | </pre><p>parse an XML file from the filesystem or the network. This reuses the existing @ctxt parser context</p>
|
---|
248 | <div class="variablelist"><table border="0"><col align="left"/><tbody><tr><td><span class="term"><i><tt>ctxt</tt></i>:</span></td><td>an HTML parser context</td></tr><tr><td><span class="term"><i><tt>filename</tt></i>:</span></td><td>a file or URL</td></tr><tr><td><span class="term"><i><tt>encoding</tt></i>:</span></td><td>the document encoding, or NULL</td></tr><tr><td><span class="term"><i><tt>options</tt></i>:</span></td><td>a combination of htmlParserOption(s)</td></tr><tr><td><span class="term"><i><tt>Returns</tt></i>:</span></td><td>the resulting document tree</td></tr></tbody></table></div></div>
|
---|
249 | <hr/>
|
---|
250 | <div class="refsect2" lang="en"><h3><a name="htmlCtxtReadIO"/>htmlCtxtReadIO ()</h3><pre class="programlisting"><a href="libxml2-HTMLparser.html#htmlDocPtr">htmlDocPtr</a> htmlCtxtReadIO (<a href="libxml2-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr</a> ctxt, <br/> <a href="libxml2-xmlIO.html#xmlInputReadCallback">xmlInputReadCallback</a> ioread, <br/> <a href="libxml2-xmlIO.html#xmlInputCloseCallback">xmlInputCloseCallback</a> ioclose, <br/> void * ioctx, <br/> const char * URL, <br/> const char * encoding, <br/> int options)<br/>
|
---|
251 | </pre><p>parse an HTML document from I/O functions and source and build a tree. This reuses the existing @ctxt parser context</p>
|
---|
252 | <div class="variablelist"><table border="0"><col align="left"/><tbody><tr><td><span class="term"><i><tt>ctxt</tt></i>:</span></td><td>an HTML parser context</td></tr><tr><td><span class="term"><i><tt>ioread</tt></i>:</span></td><td>an I/O read function</td></tr><tr><td><span class="term"><i><tt>ioclose</tt></i>:</span></td><td>an I/O close function</td></tr><tr><td><span class="term"><i><tt>ioctx</tt></i>:</span></td><td>an I/O handler</td></tr><tr><td><span class="term"><i><tt>URL</tt></i>:</span></td><td>the base URL to use for the document</td></tr><tr><td><span class="term"><i><tt>encoding</tt></i>:</span></td><td>the document encoding, or NULL</td></tr><tr><td><span class="term"><i><tt>options</tt></i>:</span></td><td>a combination of htmlParserOption(s)</td></tr><tr><td><span class="term"><i><tt>Returns</tt></i>:</span></td><td>the resulting document tree</td></tr></tbody></table></div></div>
|
---|
253 | <hr/>
|
---|
254 | <div class="refsect2" lang="en"><h3><a name="htmlCtxtReadMemory"/>htmlCtxtReadMemory ()</h3><pre class="programlisting"><a href="libxml2-HTMLparser.html#htmlDocPtr">htmlDocPtr</a> htmlCtxtReadMemory (<a href="libxml2-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr</a> ctxt, <br/> const char * buffer, <br/> int size, <br/> const char * URL, <br/> const char * encoding, <br/> int options)<br/>
|
---|
255 | </pre><p>parse an XML in-memory document and build a tree. This reuses the existing @ctxt parser context</p>
|
---|
256 | <div class="variablelist"><table border="0"><col align="left"/><tbody><tr><td><span class="term"><i><tt>ctxt</tt></i>:</span></td><td>an HTML parser context</td></tr><tr><td><span class="term"><i><tt>buffer</tt></i>:</span></td><td>a pointer to a char array</td></tr><tr><td><span class="term"><i><tt>size</tt></i>:</span></td><td>the size of the array</td></tr><tr><td><span class="term"><i><tt>URL</tt></i>:</span></td><td>the base URL to use for the document</td></tr><tr><td><span class="term"><i><tt>encoding</tt></i>:</span></td><td>the document encoding, or NULL</td></tr><tr><td><span class="term"><i><tt>options</tt></i>:</span></td><td>a combination of htmlParserOption(s)</td></tr><tr><td><span class="term"><i><tt>Returns</tt></i>:</span></td><td>the resulting document tree</td></tr></tbody></table></div></div>
|
---|
257 | <hr/>
|
---|
258 | <div class="refsect2" lang="en"><h3><a name="htmlCtxtReset"/>htmlCtxtReset ()</h3><pre class="programlisting">void htmlCtxtReset (<a href="libxml2-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr</a> ctxt)<br/>
|
---|
259 | </pre><p>Reset a parser context</p>
|
---|
260 | <div class="variablelist"><table border="0"><col align="left"/><tbody><tr><td><span class="term"><i><tt>ctxt</tt></i>:</span></td><td>an HTML parser context</td></tr></tbody></table></div></div>
|
---|
261 | <hr/>
|
---|
262 | <div class="refsect2" lang="en"><h3><a name="htmlCtxtUseOptions"/>htmlCtxtUseOptions ()</h3><pre class="programlisting">int htmlCtxtUseOptions (<a href="libxml2-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr</a> ctxt, <br/> int options)<br/>
|
---|
263 | </pre><p>Applies the options to the parser context</p>
|
---|
264 | <div class="variablelist"><table border="0"><col align="left"/><tbody><tr><td><span class="term"><i><tt>ctxt</tt></i>:</span></td><td>an HTML parser context</td></tr><tr><td><span class="term"><i><tt>options</tt></i>:</span></td><td>a combination of htmlParserOption(s)</td></tr><tr><td><span class="term"><i><tt>Returns</tt></i>:</span></td><td>0 in case of success, the set of unknown or unimplemented options in case of error.</td></tr></tbody></table></div></div>
|
---|
265 | <hr/>
|
---|
266 | <div class="refsect2" lang="en"><h3><a name="htmlElementAllowedHere"/>htmlElementAllowedHere ()</h3><pre class="programlisting">int htmlElementAllowedHere (const <a href="libxml2-HTMLparser.html#htmlElemDesc">htmlElemDesc</a> * parent, <br/> const <a href="libxml2-xmlstring.html#xmlChar">xmlChar</a> * elt)<br/>
|
---|
267 | </pre><p>Checks whether an HTML element may be a direct child of a parent element. Note - doesn't check for deprecated elements</p>
|
---|
268 | <div class="variablelist"><table border="0"><col align="left"/><tbody><tr><td><span class="term"><i><tt>parent</tt></i>:</span></td><td>HTML parent element</td></tr><tr><td><span class="term"><i><tt>elt</tt></i>:</span></td><td>HTML element</td></tr><tr><td><span class="term"><i><tt>Returns</tt></i>:</span></td><td>1 if allowed; 0 otherwise.</td></tr></tbody></table></div></div>
|
---|
269 | <hr/>
|
---|
270 | <div class="refsect2" lang="en"><h3><a name="htmlElementStatusHere"/>htmlElementStatusHere ()</h3><pre class="programlisting"><a href="libxml2-HTMLparser.html#htmlStatus">htmlStatus</a> htmlElementStatusHere (const <a href="libxml2-HTMLparser.html#htmlElemDesc">htmlElemDesc</a> * parent, <br/> const <a href="libxml2-HTMLparser.html#htmlElemDesc">htmlElemDesc</a> * elt)<br/>
|
---|
271 | </pre><p>Checks whether an HTML element may be a direct child of a parent element. and if so whether it is valid or deprecated.</p>
|
---|
272 | <div class="variablelist"><table border="0"><col align="left"/><tbody><tr><td><span class="term"><i><tt>parent</tt></i>:</span></td><td>HTML parent element</td></tr><tr><td><span class="term"><i><tt>elt</tt></i>:</span></td><td>HTML element</td></tr><tr><td><span class="term"><i><tt>Returns</tt></i>:</span></td><td>one of HTML_VALID, HTML_DEPRECATED, <a href="libxml2-HTMLparser.html#HTML_INVALID">HTML_INVALID</a></td></tr></tbody></table></div></div>
|
---|
273 | <hr/>
|
---|
274 | <div class="refsect2" lang="en"><h3><a name="htmlEncodeEntities"/>htmlEncodeEntities ()</h3><pre class="programlisting">int htmlEncodeEntities (unsigned char * out, <br/> int * outlen, <br/> const unsigned char * in, <br/> int * inlen, <br/> int quoteChar)<br/>
|
---|
275 | </pre><p>Take a block of UTF-8 chars in and try to convert it to an ASCII plus HTML entities block of chars out.</p>
|
---|
276 | <div class="variablelist"><table border="0"><col align="left"/><tbody><tr><td><span class="term"><i><tt>out</tt></i>:</span></td><td>a pointer to an array of bytes to store the result</td></tr><tr><td><span class="term"><i><tt>outlen</tt></i>:</span></td><td>the length of @out</td></tr><tr><td><span class="term"><i><tt>in</tt></i>:</span></td><td>a pointer to an array of UTF-8 chars</td></tr><tr><td><span class="term"><i><tt>inlen</tt></i>:</span></td><td>the length of @in</td></tr><tr><td><span class="term"><i><tt>quoteChar</tt></i>:</span></td><td>the quote character to escape (' or ") or zero.</td></tr><tr><td><span class="term"><i><tt>Returns</tt></i>:</span></td><td>0 if success, -2 if the transcoding fails, or -1 otherwise The value of @inlen after return is the number of octets consumed as the return value is positive, else unpredictable. The value of @outlen after return is the number of octets consumed.</td></tr></tbody></table></div></div>
|
---|
277 | <hr/>
|
---|
278 | <div class="refsect2" lang="en"><h3><a name="htmlEntityLookup"/>htmlEntityLookup ()</h3><pre class="programlisting">const <a href="libxml2-HTMLparser.html#htmlEntityDesc">htmlEntityDesc</a> * htmlEntityLookup (const <a href="libxml2-xmlstring.html#xmlChar">xmlChar</a> * name)<br/>
|
---|
279 | </pre><p>Lookup the given entity in EntitiesTable TODO: the linear scan is really ugly, an hash table is really needed.</p>
|
---|
280 | <div class="variablelist"><table border="0"><col align="left"/><tbody><tr><td><span class="term"><i><tt>name</tt></i>:</span></td><td>the entity name</td></tr><tr><td><span class="term"><i><tt>Returns</tt></i>:</span></td><td>the associated <a href="libxml2-HTMLparser.html#htmlEntityDescPtr">htmlEntityDescPtr</a> if found, NULL otherwise.</td></tr></tbody></table></div></div>
|
---|
281 | <hr/>
|
---|
282 | <div class="refsect2" lang="en"><h3><a name="htmlEntityValueLookup"/>htmlEntityValueLookup ()</h3><pre class="programlisting">const <a href="libxml2-HTMLparser.html#htmlEntityDesc">htmlEntityDesc</a> * htmlEntityValueLookup (unsigned int value)<br/>
|
---|
283 | </pre><p>Lookup the given entity in EntitiesTable TODO: the linear scan is really ugly, an hash table is really needed.</p>
|
---|
284 | <div class="variablelist"><table border="0"><col align="left"/><tbody><tr><td><span class="term"><i><tt>value</tt></i>:</span></td><td>the entity's unicode value</td></tr><tr><td><span class="term"><i><tt>Returns</tt></i>:</span></td><td>the associated <a href="libxml2-HTMLparser.html#htmlEntityDescPtr">htmlEntityDescPtr</a> if found, NULL otherwise.</td></tr></tbody></table></div></div>
|
---|
285 | <hr/>
|
---|
286 | <div class="refsect2" lang="en"><h3><a name="htmlFreeParserCtxt"/>htmlFreeParserCtxt ()</h3><pre class="programlisting">void htmlFreeParserCtxt (<a href="libxml2-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr</a> ctxt)<br/>
|
---|
287 | </pre><p>Free all the memory used by a parser context. However the parsed document in ctxt->myDoc is not freed.</p>
|
---|
288 | <div class="variablelist"><table border="0"><col align="left"/><tbody><tr><td><span class="term"><i><tt>ctxt</tt></i>:</span></td><td>an HTML parser context</td></tr></tbody></table></div></div>
|
---|
289 | <hr/>
|
---|
290 | <div class="refsect2" lang="en"><h3><a name="htmlHandleOmittedElem"/>htmlHandleOmittedElem ()</h3><pre class="programlisting">int htmlHandleOmittedElem (int val)<br/>
|
---|
291 | </pre><p>Set and return the previous value for handling HTML omitted tags.</p>
|
---|
292 | <div class="variablelist"><table border="0"><col align="left"/><tbody><tr><td><span class="term"><i><tt>val</tt></i>:</span></td><td>int 0 or 1</td></tr><tr><td><span class="term"><i><tt>Returns</tt></i>:</span></td><td>the last value for 0 for no handling, 1 for auto insertion.</td></tr></tbody></table></div></div>
|
---|
293 | <hr/>
|
---|
294 | <div class="refsect2" lang="en"><h3><a name="htmlIsAutoClosed"/>htmlIsAutoClosed ()</h3><pre class="programlisting">int htmlIsAutoClosed (<a href="libxml2-HTMLparser.html#htmlDocPtr">htmlDocPtr</a> doc, <br/> <a href="libxml2-HTMLparser.html#htmlNodePtr">htmlNodePtr</a> elem)<br/>
|
---|
295 | </pre><p>The HTML DTD allows a tag to implicitly close other tags. The list is kept in htmlStartClose array. This function checks if a tag is autoclosed by one of it's child</p>
|
---|
296 | <div class="variablelist"><table border="0"><col align="left"/><tbody><tr><td><span class="term"><i><tt>doc</tt></i>:</span></td><td>the HTML document</td></tr><tr><td><span class="term"><i><tt>elem</tt></i>:</span></td><td>the HTML element</td></tr><tr><td><span class="term"><i><tt>Returns</tt></i>:</span></td><td>1 if autoclosed, 0 otherwise</td></tr></tbody></table></div></div>
|
---|
297 | <hr/>
|
---|
298 | <div class="refsect2" lang="en"><h3><a name="htmlIsScriptAttribute"/>htmlIsScriptAttribute ()</h3><pre class="programlisting">int htmlIsScriptAttribute (const <a href="libxml2-xmlstring.html#xmlChar">xmlChar</a> * name)<br/>
|
---|
299 | </pre><p>Check if an <a href="libxml2-SAX.html#attribute">attribute</a> is of content type Script</p>
|
---|
300 | <div class="variablelist"><table border="0"><col align="left"/><tbody><tr><td><span class="term"><i><tt>name</tt></i>:</span></td><td>an <a href="libxml2-SAX.html#attribute">attribute</a> name</td></tr><tr><td><span class="term"><i><tt>Returns</tt></i>:</span></td><td>1 is the <a href="libxml2-SAX.html#attribute">attribute</a> is a script 0 otherwise</td></tr></tbody></table></div></div>
|
---|
301 | <hr/>
|
---|
302 | <div class="refsect2" lang="en"><h3><a name="htmlNewParserCtxt"/>htmlNewParserCtxt ()</h3><pre class="programlisting"><a href="libxml2-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr</a> htmlNewParserCtxt (void)<br/>
|
---|
303 | </pre><p>Allocate and initialize a new parser context.</p>
|
---|
304 | <div class="variablelist"><table border="0"><col align="left"/><tbody><tr><td><span class="term"><i><tt>Returns</tt></i>:</span></td><td>the <a href="libxml2-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr</a> or NULL in case of allocation error</td></tr></tbody></table></div></div>
|
---|
305 | <hr/>
|
---|
306 | <div class="refsect2" lang="en"><h3><a name="htmlNodeStatus"/>htmlNodeStatus ()</h3><pre class="programlisting"><a href="libxml2-HTMLparser.html#htmlStatus">htmlStatus</a> htmlNodeStatus (const <a href="libxml2-HTMLparser.html#htmlNodePtr">htmlNodePtr</a> node, <br/> int legacy)<br/>
|
---|
307 | </pre><p>Checks whether the tree node is valid. Experimental (the author only uses the HTML enhancements in a SAX parser)</p>
|
---|
308 | <div class="variablelist"><table border="0"><col align="left"/><tbody><tr><td><span class="term"><i><tt>node</tt></i>:</span></td><td>an <a href="libxml2-HTMLparser.html#htmlNodePtr">htmlNodePtr</a> in a tree</td></tr><tr><td><span class="term"><i><tt>legacy</tt></i>:</span></td><td>whether to allow deprecated elements (YES is faster here for Element nodes)</td></tr><tr><td><span class="term"><i><tt>Returns</tt></i>:</span></td><td>for Element nodes, a return from <a href="libxml2-HTMLparser.html#htmlElementAllowedHere">htmlElementAllowedHere</a> (if legacy allowed) or <a href="libxml2-HTMLparser.html#htmlElementStatusHere">htmlElementStatusHere</a> (otherwise). for Attribute nodes, a return from <a href="libxml2-HTMLparser.html#htmlAttrAllowed">htmlAttrAllowed</a> for other nodes, <a href="libxml2-HTMLparser.html#HTML_NA">HTML_NA</a> (no checks performed)</td></tr></tbody></table></div></div>
|
---|
309 | <hr/>
|
---|
310 | <div class="refsect2" lang="en"><h3><a name="htmlParseCharRef"/>htmlParseCharRef ()</h3><pre class="programlisting">int htmlParseCharRef (<a href="libxml2-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr</a> ctxt)<br/>
|
---|
311 | </pre><p>parse Reference declarations [66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';'</p>
|
---|
312 | <div class="variablelist"><table border="0"><col align="left"/><tbody><tr><td><span class="term"><i><tt>ctxt</tt></i>:</span></td><td>an HTML parser context</td></tr><tr><td><span class="term"><i><tt>Returns</tt></i>:</span></td><td>the value parsed (as an int)</td></tr></tbody></table></div></div>
|
---|
313 | <hr/>
|
---|
314 | <div class="refsect2" lang="en"><h3><a name="htmlParseChunk"/>htmlParseChunk ()</h3><pre class="programlisting">int htmlParseChunk (<a href="libxml2-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr</a> ctxt, <br/> const char * chunk, <br/> int size, <br/> int terminate)<br/>
|
---|
315 | </pre><p>Parse a Chunk of memory</p>
|
---|
316 | <div class="variablelist"><table border="0"><col align="left"/><tbody><tr><td><span class="term"><i><tt>ctxt</tt></i>:</span></td><td>an HTML parser context</td></tr><tr><td><span class="term"><i><tt>chunk</tt></i>:</span></td><td>an char array</td></tr><tr><td><span class="term"><i><tt>size</tt></i>:</span></td><td>the size in byte of the chunk</td></tr><tr><td><span class="term"><i><tt>terminate</tt></i>:</span></td><td>last chunk indicator</td></tr><tr><td><span class="term"><i><tt>Returns</tt></i>:</span></td><td>zero if no error, the <a href="libxml2-xmlerror.html#xmlParserErrors">xmlParserErrors</a> otherwise.</td></tr></tbody></table></div></div>
|
---|
317 | <hr/>
|
---|
318 | <div class="refsect2" lang="en"><h3><a name="htmlParseDoc"/>htmlParseDoc ()</h3><pre class="programlisting"><a href="libxml2-HTMLparser.html#htmlDocPtr">htmlDocPtr</a> htmlParseDoc (<a href="libxml2-xmlstring.html#xmlChar">xmlChar</a> * cur, <br/> const char * encoding)<br/>
|
---|
319 | </pre><p>parse an HTML in-memory document and build a tree.</p>
|
---|
320 | <div class="variablelist"><table border="0"><col align="left"/><tbody><tr><td><span class="term"><i><tt>cur</tt></i>:</span></td><td>a pointer to an array of <a href="libxml2-xmlstring.html#xmlChar">xmlChar</a></td></tr><tr><td><span class="term"><i><tt>encoding</tt></i>:</span></td><td>a free form C string describing the HTML document encoding, or NULL</td></tr><tr><td><span class="term"><i><tt>Returns</tt></i>:</span></td><td>the resulting document tree</td></tr></tbody></table></div></div>
|
---|
321 | <hr/>
|
---|
322 | <div class="refsect2" lang="en"><h3><a name="htmlParseDocument"/>htmlParseDocument ()</h3><pre class="programlisting">int htmlParseDocument (<a href="libxml2-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr</a> ctxt)<br/>
|
---|
323 | </pre><p>parse an HTML document (and build a tree if using the standard SAX interface).</p>
|
---|
324 | <div class="variablelist"><table border="0"><col align="left"/><tbody><tr><td><span class="term"><i><tt>ctxt</tt></i>:</span></td><td>an HTML parser context</td></tr><tr><td><span class="term"><i><tt>Returns</tt></i>:</span></td><td>0, -1 in case of error. the parser context is augmented as a result of the parsing.</td></tr></tbody></table></div></div>
|
---|
325 | <hr/>
|
---|
326 | <div class="refsect2" lang="en"><h3><a name="htmlParseElement"/>htmlParseElement ()</h3><pre class="programlisting">void htmlParseElement (<a href="libxml2-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr</a> ctxt)<br/>
|
---|
327 | </pre><p>parse an HTML element, this is highly recursive this is kept for compatibility with previous code versions [39] element ::= EmptyElemTag | STag content ETag [41] Attribute ::= Name Eq AttValue</p>
|
---|
328 | <div class="variablelist"><table border="0"><col align="left"/><tbody><tr><td><span class="term"><i><tt>ctxt</tt></i>:</span></td><td>an HTML parser context</td></tr></tbody></table></div></div>
|
---|
329 | <hr/>
|
---|
330 | <div class="refsect2" lang="en"><h3><a name="htmlParseEntityRef"/>htmlParseEntityRef ()</h3><pre class="programlisting">const <a href="libxml2-HTMLparser.html#htmlEntityDesc">htmlEntityDesc</a> * htmlParseEntityRef (<a href="libxml2-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr</a> ctxt, <br/> const <a href="libxml2-xmlstring.html#xmlChar">xmlChar</a> ** str)<br/>
|
---|
331 | </pre><p>parse an HTML ENTITY references [68] EntityRef ::= '&' Name ';'</p>
|
---|
332 | <div class="variablelist"><table border="0"><col align="left"/><tbody><tr><td><span class="term"><i><tt>ctxt</tt></i>:</span></td><td>an HTML parser context</td></tr><tr><td><span class="term"><i><tt>str</tt></i>:</span></td><td>location to store the entity name</td></tr><tr><td><span class="term"><i><tt>Returns</tt></i>:</span></td><td>the associated <a href="libxml2-HTMLparser.html#htmlEntityDescPtr">htmlEntityDescPtr</a> if found, or NULL otherwise, if non-NULL *str will have to be freed by the caller.</td></tr></tbody></table></div></div>
|
---|
333 | <hr/>
|
---|
334 | <div class="refsect2" lang="en"><h3><a name="htmlParseFile"/>htmlParseFile ()</h3><pre class="programlisting"><a href="libxml2-HTMLparser.html#htmlDocPtr">htmlDocPtr</a> htmlParseFile (const char * filename, <br/> const char * encoding)<br/>
|
---|
335 | </pre><p>parse an HTML file and build a tree. Automatic support for ZLIB/Compress compressed document is provided by default if found at compile-time.</p>
|
---|
336 | <div class="variablelist"><table border="0"><col align="left"/><tbody><tr><td><span class="term"><i><tt>filename</tt></i>:</span></td><td>the filename</td></tr><tr><td><span class="term"><i><tt>encoding</tt></i>:</span></td><td>a free form C string describing the HTML document encoding, or NULL</td></tr><tr><td><span class="term"><i><tt>Returns</tt></i>:</span></td><td>the resulting document tree</td></tr></tbody></table></div></div>
|
---|
337 | <hr/>
|
---|
338 | <div class="refsect2" lang="en"><h3><a name="htmlReadDoc"/>htmlReadDoc ()</h3><pre class="programlisting"><a href="libxml2-HTMLparser.html#htmlDocPtr">htmlDocPtr</a> htmlReadDoc (const <a href="libxml2-xmlstring.html#xmlChar">xmlChar</a> * cur, <br/> const char * URL, <br/> const char * encoding, <br/> int options)<br/>
|
---|
339 | </pre><p>parse an XML in-memory document and build a tree.</p>
|
---|
340 | <div class="variablelist"><table border="0"><col align="left"/><tbody><tr><td><span class="term"><i><tt>cur</tt></i>:</span></td><td>a pointer to a zero terminated string</td></tr><tr><td><span class="term"><i><tt>URL</tt></i>:</span></td><td>the base URL to use for the document</td></tr><tr><td><span class="term"><i><tt>encoding</tt></i>:</span></td><td>the document encoding, or NULL</td></tr><tr><td><span class="term"><i><tt>options</tt></i>:</span></td><td>a combination of htmlParserOption(s)</td></tr><tr><td><span class="term"><i><tt>Returns</tt></i>:</span></td><td>the resulting document tree</td></tr></tbody></table></div></div>
|
---|
341 | <hr/>
|
---|
342 | <div class="refsect2" lang="en"><h3><a name="htmlReadFd"/>htmlReadFd ()</h3><pre class="programlisting"><a href="libxml2-HTMLparser.html#htmlDocPtr">htmlDocPtr</a> htmlReadFd (int fd, <br/> const char * URL, <br/> const char * encoding, <br/> int options)<br/>
|
---|
343 | </pre><p>parse an XML from a file descriptor and build a tree.</p>
|
---|
344 | <div class="variablelist"><table border="0"><col align="left"/><tbody><tr><td><span class="term"><i><tt>fd</tt></i>:</span></td><td>an open file descriptor</td></tr><tr><td><span class="term"><i><tt>URL</tt></i>:</span></td><td>the base URL to use for the document</td></tr><tr><td><span class="term"><i><tt>encoding</tt></i>:</span></td><td>the document encoding, or NULL</td></tr><tr><td><span class="term"><i><tt>options</tt></i>:</span></td><td>a combination of htmlParserOption(s)</td></tr><tr><td><span class="term"><i><tt>Returns</tt></i>:</span></td><td>the resulting document tree</td></tr></tbody></table></div></div>
|
---|
345 | <hr/>
|
---|
346 | <div class="refsect2" lang="en"><h3><a name="htmlReadFile"/>htmlReadFile ()</h3><pre class="programlisting"><a href="libxml2-HTMLparser.html#htmlDocPtr">htmlDocPtr</a> htmlReadFile (const char * filename, <br/> const char * encoding, <br/> int options)<br/>
|
---|
347 | </pre><p>parse an XML file from the filesystem or the network.</p>
|
---|
348 | <div class="variablelist"><table border="0"><col align="left"/><tbody><tr><td><span class="term"><i><tt>filename</tt></i>:</span></td><td>a file or URL</td></tr><tr><td><span class="term"><i><tt>encoding</tt></i>:</span></td><td>the document encoding, or NULL</td></tr><tr><td><span class="term"><i><tt>options</tt></i>:</span></td><td>a combination of htmlParserOption(s)</td></tr><tr><td><span class="term"><i><tt>Returns</tt></i>:</span></td><td>the resulting document tree</td></tr></tbody></table></div></div>
|
---|
349 | <hr/>
|
---|
350 | <div class="refsect2" lang="en"><h3><a name="htmlReadIO"/>htmlReadIO ()</h3><pre class="programlisting"><a href="libxml2-HTMLparser.html#htmlDocPtr">htmlDocPtr</a> htmlReadIO (<a href="libxml2-xmlIO.html#xmlInputReadCallback">xmlInputReadCallback</a> ioread, <br/> <a href="libxml2-xmlIO.html#xmlInputCloseCallback">xmlInputCloseCallback</a> ioclose, <br/> void * ioctx, <br/> const char * URL, <br/> const char * encoding, <br/> int options)<br/>
|
---|
351 | </pre><p>parse an HTML document from I/O functions and source and build a tree.</p>
|
---|
352 | <div class="variablelist"><table border="0"><col align="left"/><tbody><tr><td><span class="term"><i><tt>ioread</tt></i>:</span></td><td>an I/O read function</td></tr><tr><td><span class="term"><i><tt>ioclose</tt></i>:</span></td><td>an I/O close function</td></tr><tr><td><span class="term"><i><tt>ioctx</tt></i>:</span></td><td>an I/O handler</td></tr><tr><td><span class="term"><i><tt>URL</tt></i>:</span></td><td>the base URL to use for the document</td></tr><tr><td><span class="term"><i><tt>encoding</tt></i>:</span></td><td>the document encoding, or NULL</td></tr><tr><td><span class="term"><i><tt>options</tt></i>:</span></td><td>a combination of htmlParserOption(s)</td></tr><tr><td><span class="term"><i><tt>Returns</tt></i>:</span></td><td>the resulting document tree</td></tr></tbody></table></div></div>
|
---|
353 | <hr/>
|
---|
354 | <div class="refsect2" lang="en"><h3><a name="htmlReadMemory"/>htmlReadMemory ()</h3><pre class="programlisting"><a href="libxml2-HTMLparser.html#htmlDocPtr">htmlDocPtr</a> htmlReadMemory (const char * buffer, <br/> int size, <br/> const char * URL, <br/> const char * encoding, <br/> int options)<br/>
|
---|
355 | </pre><p>parse an XML in-memory document and build a tree.</p>
|
---|
356 | <div class="variablelist"><table border="0"><col align="left"/><tbody><tr><td><span class="term"><i><tt>buffer</tt></i>:</span></td><td>a pointer to a char array</td></tr><tr><td><span class="term"><i><tt>size</tt></i>:</span></td><td>the size of the array</td></tr><tr><td><span class="term"><i><tt>URL</tt></i>:</span></td><td>the base URL to use for the document</td></tr><tr><td><span class="term"><i><tt>encoding</tt></i>:</span></td><td>the document encoding, or NULL</td></tr><tr><td><span class="term"><i><tt>options</tt></i>:</span></td><td>a combination of htmlParserOption(s)</td></tr><tr><td><span class="term"><i><tt>Returns</tt></i>:</span></td><td>the resulting document tree</td></tr></tbody></table></div></div>
|
---|
357 | <hr/>
|
---|
358 | <div class="refsect2" lang="en"><h3><a name="htmlSAXParseDoc"/>htmlSAXParseDoc ()</h3><pre class="programlisting"><a href="libxml2-HTMLparser.html#htmlDocPtr">htmlDocPtr</a> htmlSAXParseDoc (<a href="libxml2-xmlstring.html#xmlChar">xmlChar</a> * cur, <br/> const char * encoding, <br/> <a href="libxml2-HTMLparser.html#htmlSAXHandlerPtr">htmlSAXHandlerPtr</a> sax, <br/> void * userData)<br/>
|
---|
359 | </pre><p>Parse an HTML in-memory document. If sax is not NULL, use the SAX callbacks to handle parse events. If sax is NULL, fallback to the default DOM behavior and return a tree.</p>
|
---|
360 | <div class="variablelist"><table border="0"><col align="left"/><tbody><tr><td><span class="term"><i><tt>cur</tt></i>:</span></td><td>a pointer to an array of <a href="libxml2-xmlstring.html#xmlChar">xmlChar</a></td></tr><tr><td><span class="term"><i><tt>encoding</tt></i>:</span></td><td>a free form C string describing the HTML document encoding, or NULL</td></tr><tr><td><span class="term"><i><tt>sax</tt></i>:</span></td><td>the SAX handler block</td></tr><tr><td><span class="term"><i><tt>userData</tt></i>:</span></td><td>if using SAX, this pointer will be provided on callbacks.</td></tr><tr><td><span class="term"><i><tt>Returns</tt></i>:</span></td><td>the resulting document tree unless SAX is NULL or the document is not well formed.</td></tr></tbody></table></div></div>
|
---|
361 | <hr/>
|
---|
362 | <div class="refsect2" lang="en"><h3><a name="htmlSAXParseFile"/>htmlSAXParseFile ()</h3><pre class="programlisting"><a href="libxml2-HTMLparser.html#htmlDocPtr">htmlDocPtr</a> htmlSAXParseFile (const char * filename, <br/> const char * encoding, <br/> <a href="libxml2-HTMLparser.html#htmlSAXHandlerPtr">htmlSAXHandlerPtr</a> sax, <br/> void * userData)<br/>
|
---|
363 | </pre><p>parse an HTML file and build a tree. Automatic support for ZLIB/Compress compressed document is provided by default if found at compile-time. It use the given SAX function block to handle the parsing callback. If sax is NULL, fallback to the default DOM tree building routines.</p>
|
---|
364 | <div class="variablelist"><table border="0"><col align="left"/><tbody><tr><td><span class="term"><i><tt>filename</tt></i>:</span></td><td>the filename</td></tr><tr><td><span class="term"><i><tt>encoding</tt></i>:</span></td><td>a free form C string describing the HTML document encoding, or NULL</td></tr><tr><td><span class="term"><i><tt>sax</tt></i>:</span></td><td>the SAX handler block</td></tr><tr><td><span class="term"><i><tt>userData</tt></i>:</span></td><td>if using SAX, this pointer will be provided on callbacks.</td></tr><tr><td><span class="term"><i><tt>Returns</tt></i>:</span></td><td>the resulting document tree unless SAX is NULL or the document is not well formed.</td></tr></tbody></table></div></div>
|
---|
365 | <hr/>
|
---|
366 | <div class="refsect2" lang="en"><h3><a name="htmlTagLookup"/>htmlTagLookup ()</h3><pre class="programlisting">const <a href="libxml2-HTMLparser.html#htmlElemDesc">htmlElemDesc</a> * htmlTagLookup (const <a href="libxml2-xmlstring.html#xmlChar">xmlChar</a> * tag)<br/>
|
---|
367 | </pre><p>Lookup the HTML tag in the ElementTable</p>
|
---|
368 | <div class="variablelist"><table border="0"><col align="left"/><tbody><tr><td><span class="term"><i><tt>tag</tt></i>:</span></td><td>The tag name in lowercase</td></tr><tr><td><span class="term"><i><tt>Returns</tt></i>:</span></td><td>the related <a href="libxml2-HTMLparser.html#htmlElemDescPtr">htmlElemDescPtr</a> or NULL if not found.</td></tr></tbody></table></div></div>
|
---|
369 | <hr/>
|
---|
370 | </div>
|
---|
371 | </div>
|
---|
372 | </body>
|
---|
373 | </html>
|
---|