1 | * Summary: interface for the encoding conversion functions
|
---|
2 | * Description: interface for the encoding conversion functions needed for
|
---|
3 | * XML basic encoding and iconv() support.
|
---|
4 | *
|
---|
5 | * Related specs are
|
---|
6 | * rfc2044 (UTF-8 and UTF-16) F. Yergeau Alis Technologies
|
---|
7 | * [ISO-10646] UTF-8 and UTF-16 in Annexes
|
---|
8 | * [ISO-8859-1] ISO Latin-1 characters codes.
|
---|
9 | * [UNICODE] The Unicode Consortium, "The Unicode Standard --
|
---|
10 | * Worldwide Character Encoding -- Version 1.0", Addison-
|
---|
11 | * Wesley, Volume 1, 1991, Volume 2, 1992. UTF-8 is
|
---|
12 | * described in Unicode Technical Report #4.
|
---|
13 | * [US-ASCII] Coded Character Set--7-bit American Standard Code for
|
---|
14 | * Information Interchange, ANSI X3.4-1986.
|
---|
15 | *
|
---|
16 | * Copy: See Copyright for the status of this software.
|
---|
17 | *
|
---|
18 | * Author: Patrick Monnerat <[email protected]>, DATASPHERE S.A.
|
---|
19 |
|
---|
20 | /if not defined(XML_CHAR_ENCODING_H__)
|
---|
21 | /define XML_CHAR_ENCODING_H__
|
---|
22 |
|
---|
23 | /include "libxmlrpg/xmlversion"
|
---|
24 | /include "libxmlrpg/xmlTypesC"
|
---|
25 |
|
---|
26 | * xmlCharEncoding:
|
---|
27 | *
|
---|
28 | * Predefined values for some standard encodings.
|
---|
29 | * Libxml does not do beforehand translation on UTF8 and ISOLatinX.
|
---|
30 | * It also supports ASCII, ISO-8859-1, and UTF16 (LE and BE) by default.
|
---|
31 | *
|
---|
32 | * Anything else would have to be translated to UTF8 before being
|
---|
33 | * given to the parser itself. The BOM for UTF16 and the encoding
|
---|
34 | * declaration are looked at and a converter is looked for at that
|
---|
35 | * point. If not found the parser stops here as asked by the XML REC. A
|
---|
36 | * converter can be registered by the user
|
---|
37 | * xmlRegisterCharEncodingHandler but the current form doesn't allow
|
---|
38 | * stateful transcoding (a serious problem agreed !). If iconv has been
|
---|
39 | * found it will be used automatically and allow stateful transcoding,
|
---|
40 | * the simplest is then to be sure to enable iconv and to provide iconv
|
---|
41 | * libs for the encoding support needed.
|
---|
42 | *
|
---|
43 | * Note that the generic "UTF-16" is not a predefined value. Instead, only
|
---|
44 | * the specific UTF-16LE and UTF-16BE are present.
|
---|
45 |
|
---|
46 | d xmlCharEncoding...
|
---|
47 | d s based(######typedef######)
|
---|
48 | d like(xmlCenum)
|
---|
49 | d XML_CHAR_ENCODING_ERROR... No encoding detected
|
---|
50 | d c -1
|
---|
51 | d XML_CHAR_ENCODING_NONE... No encoding detected
|
---|
52 | d c 0
|
---|
53 | d XML_CHAR_ENCODING_UTF8... UTF-8
|
---|
54 | d c 1
|
---|
55 | d XML_CHAR_ENCODING_UTF16LE... UTF-16 little endian
|
---|
56 | d c 2
|
---|
57 | d XML_CHAR_ENCODING_UTF16BE... UTF-16 big endian
|
---|
58 | d c 3
|
---|
59 | d XML_CHAR_ENCODING_UCS4LE... UCS-4 little endian
|
---|
60 | d c 4
|
---|
61 | d XML_CHAR_ENCODING_UCS4BE... UCS-4 big endian
|
---|
62 | d c 5
|
---|
63 | d XML_CHAR_ENCODING_EBCDIC... EBCDIC uh!
|
---|
64 | d c 6
|
---|
65 | d XML_CHAR_ENCODING_UCS4_2143... UCS-4 unusual order
|
---|
66 | d c 7
|
---|
67 | d XML_CHAR_ENCODING_UCS4_3412... UCS-4 unusual order
|
---|
68 | d c 8
|
---|
69 | d XML_CHAR_ENCODING_UCS2... UCS-2
|
---|
70 | d c 9
|
---|
71 | d XML_CHAR_ENCODING_8859_1... ISO-8859-1 ISOLatin1
|
---|
72 | d c 10
|
---|
73 | d XML_CHAR_ENCODING_8859_2... ISO-8859-2 ISOLatin2
|
---|
74 | d c 11
|
---|
75 | d XML_CHAR_ENCODING_8859_3... ISO-8859-3
|
---|
76 | d c 12
|
---|
77 | d XML_CHAR_ENCODING_8859_4... ISO-8859-4
|
---|
78 | d c 13
|
---|
79 | d XML_CHAR_ENCODING_8859_5... ISO-8859-5
|
---|
80 | d c 14
|
---|
81 | d XML_CHAR_ENCODING_8859_6... ISO-8859-6
|
---|
82 | d c 15
|
---|
83 | d XML_CHAR_ENCODING_8859_7... ISO-8859-7
|
---|
84 | d c 16
|
---|
85 | d XML_CHAR_ENCODING_8859_8... ISO-8859-8
|
---|
86 | d c 17
|
---|
87 | d XML_CHAR_ENCODING_8859_9... ISO-8859-9
|
---|
88 | d c 18
|
---|
89 | d XML_CHAR_ENCODING_2022_JP... ISO-2022-JP
|
---|
90 | d c 19
|
---|
91 | d XML_CHAR_ENCODING_SHIFT_JIS... Shift_JIS
|
---|
92 | d c 20
|
---|
93 | d XML_CHAR_ENCODING_EUC_JP... EUC-JP
|
---|
94 | d c 21
|
---|
95 | d XML_CHAR_ENCODING_ASCII... Pure ASCII
|
---|
96 | d c 22
|
---|
97 |
|
---|
98 | * xmlCharEncodingInputFunc:
|
---|
99 | * @out: a pointer to an array of bytes to store the UTF-8 result
|
---|
100 | * @outlen: the length of @out
|
---|
101 | * @in: a pointer to an array of chars in the original encoding
|
---|
102 | * @inlen: the length of @in
|
---|
103 | *
|
---|
104 | * Take a block of chars in the original encoding and try to convert
|
---|
105 | * it to an UTF-8 block of chars out.
|
---|
106 | *
|
---|
107 | * Returns the number of bytes written, -1 if lack of space, or -2
|
---|
108 | * if the transcoding failed.
|
---|
109 | * The value of @inlen after return is the number of octets consumed
|
---|
110 | * if the return value is positive, else unpredictiable.
|
---|
111 | * The value of @outlen after return is the number of octets consumed.
|
---|
112 |
|
---|
113 | d xmlCharEncodingInputFunc...
|
---|
114 | d s * based(######typedef######)
|
---|
115 | d procptr
|
---|
116 |
|
---|
117 | * xmlCharEncodingOutputFunc:
|
---|
118 | * @out: a pointer to an array of bytes to store the result
|
---|
119 | * @outlen: the length of @out
|
---|
120 | * @in: a pointer to an array of UTF-8 chars
|
---|
121 | * @inlen: the length of @in
|
---|
122 | *
|
---|
123 | * Take a block of UTF-8 chars in and try to convert it to another
|
---|
124 | * encoding.
|
---|
125 | * Note: a first call designed to produce heading info is called with
|
---|
126 | * in = NULL. If stateful this should also initialize the encoder state.
|
---|
127 | *
|
---|
128 | * Returns the number of bytes written, -1 if lack of space, or -2
|
---|
129 | * if the transcoding failed.
|
---|
130 | * The value of @inlen after return is the number of octets consumed
|
---|
131 | * if the return value is positive, else unpredictiable.
|
---|
132 | * The value of @outlen after return is the number of octets produced.
|
---|
133 |
|
---|
134 | d xmlCharEncodingOutputFunc...
|
---|
135 | d s * based(######typedef######)
|
---|
136 | d procptr
|
---|
137 |
|
---|
138 | * Block defining the handlers for non UTF-8 encodings.
|
---|
139 | * If iconv is supported, there are two extra fields.
|
---|
140 |
|
---|
141 | /if defined(LIBXML_ICU_ENABLED)
|
---|
142 | d uconv_t ds based(######typedef######)
|
---|
143 | d align qualified
|
---|
144 | d uconv * UConverter *
|
---|
145 | d utf8 * UConverter *
|
---|
146 | /endif
|
---|
147 |
|
---|
148 | d xmlCharEncodingHandlerPtr...
|
---|
149 | d s * based(######typedef######)
|
---|
150 |
|
---|
151 | d xmlCharEncodingHandler...
|
---|
152 | d ds based(xmlCharEncodingHandlerPtr)
|
---|
153 | d align qualified
|
---|
154 | d name * char *
|
---|
155 | d input like(xmlCharEncodingInputFunc)
|
---|
156 | d output like(xmlCharEncodingOutputFunc)
|
---|
157 | *
|
---|
158 | /if defined(LIBXML_ICONV_ENABLED)
|
---|
159 | d iconv_in * iconv_t
|
---|
160 | d iconv_out * iconv_t
|
---|
161 | /endif LIBXML_ICONV_ENABLED
|
---|
162 | *
|
---|
163 | /if defined(LIBXML_ICU_ENABLED)
|
---|
164 | d uconv_in * uconv_t *
|
---|
165 | d uconv_out * uconv_t *
|
---|
166 | /endif LIBXML_ICU_ENABLED
|
---|
167 |
|
---|
168 | /include "libxmlrpg/tree"
|
---|
169 |
|
---|
170 | * Interfaces for encoding handlers.
|
---|
171 |
|
---|
172 | d xmlInitCharEncodingHandlers...
|
---|
173 | d pr extproc(
|
---|
174 | d 'xmlInitCharEncodingHandlers')
|
---|
175 |
|
---|
176 | d xmlCleanupCharEncodingHandlers...
|
---|
177 | d pr extproc(
|
---|
178 | d 'xmlCleanupCharEncodingHandlers')
|
---|
179 |
|
---|
180 | d xmlRegisterCharEncodingHandler...
|
---|
181 | d pr extproc(
|
---|
182 | d 'xmlRegisterCharEncodingHandler')
|
---|
183 | d handler value like(xmlCharEncodingHandlerPtr)
|
---|
184 |
|
---|
185 | d xmlGetCharEncodingHandler...
|
---|
186 | d pr extproc('xmlGetCharEncodingHandler')
|
---|
187 | d like(xmlCharEncodingHandlerPtr)
|
---|
188 | d enc value like(xmlCharEncoding)
|
---|
189 |
|
---|
190 | d xmlFindCharEncodingHandler...
|
---|
191 | d pr extproc('xmlFindCharEncodingHandler')
|
---|
192 | d like(xmlCharEncodingHandlerPtr)
|
---|
193 | d name * value options(*string) const char *
|
---|
194 |
|
---|
195 | d xmlNewCharEncodingHandler...
|
---|
196 | d pr extproc('xmlNewCharEncodingHandler')
|
---|
197 | d like(xmlCharEncodingHandlerPtr)
|
---|
198 | d name * value options(*string) const char *
|
---|
199 | d input value like(xmlCharEncodingInputFunc)
|
---|
200 | d output value like(xmlCharEncodingOutputFunc)
|
---|
201 |
|
---|
202 | * Interfaces for encoding names and aliases.
|
---|
203 |
|
---|
204 | d xmlAddEncodingAlias...
|
---|
205 | d pr extproc('xmlAddEncodingAlias')
|
---|
206 | d like(xmlCint)
|
---|
207 | d name * value options(*string) const char *
|
---|
208 | d alias * value options(*string) const char *
|
---|
209 |
|
---|
210 | d xmlDelEncodingAlias...
|
---|
211 | d pr extproc('xmlDelEncodingAlias')
|
---|
212 | d like(xmlCint)
|
---|
213 | d alias * value options(*string) const char *
|
---|
214 |
|
---|
215 | d xmlGetEncodingAlias...
|
---|
216 | d pr * extproc('xmlGetEncodingAlias') const char *
|
---|
217 | d alias * value options(*string) const char *
|
---|
218 |
|
---|
219 | d xmlCleanupEncodingAliases...
|
---|
220 | d pr extproc('xmlCleanupEncodingAliases')
|
---|
221 |
|
---|
222 | d xmlParseCharEncoding...
|
---|
223 | d pr extproc('xmlParseCharEncoding')
|
---|
224 | d like(xmlCharEncoding)
|
---|
225 | d name * value options(*string) const char *
|
---|
226 |
|
---|
227 | d xmlGetCharEncodingName...
|
---|
228 | d pr * extproc('xmlGetCharEncodingName') const char *
|
---|
229 | d enc value like(xmlCharEncoding)
|
---|
230 |
|
---|
231 | * Interfaces directly used by the parsers.
|
---|
232 |
|
---|
233 | d xmlDetectCharEncoding...
|
---|
234 | d pr extproc('xmlDetectCharEncoding')
|
---|
235 | d like(xmlCharEncoding)
|
---|
236 | d in * value options(*string) const unsigned char*
|
---|
237 | d len value like(xmlCint)
|
---|
238 |
|
---|
239 | d xmlCharEncOutFunc...
|
---|
240 | d pr extproc('xmlCharEncOutFunc')
|
---|
241 | d like(xmlCint)
|
---|
242 | d handler likeds(xmlCharEncodingHandler)
|
---|
243 | d out value like(xmlBufferPtr)
|
---|
244 | d in value like(xmlBufferPtr)
|
---|
245 |
|
---|
246 | d xmlCharEncInFunc...
|
---|
247 | d pr extproc('xmlCharEncInFunc')
|
---|
248 | d like(xmlCint)
|
---|
249 | d handler likeds(xmlCharEncodingHandler)
|
---|
250 | d out value like(xmlBufferPtr)
|
---|
251 | d in value like(xmlBufferPtr)
|
---|
252 |
|
---|
253 | d xmlCharEncFirstLine...
|
---|
254 | d pr extproc('xmlCharEncFirstLine')
|
---|
255 | d like(xmlCint)
|
---|
256 | d handler likeds(xmlCharEncodingHandler)
|
---|
257 | d out value like(xmlBufferPtr)
|
---|
258 | d in value like(xmlBufferPtr)
|
---|
259 |
|
---|
260 | d xmlCharEncCloseFunc...
|
---|
261 | d pr extproc('xmlCharEncCloseFunc')
|
---|
262 | d like(xmlCint)
|
---|
263 | d handler likeds(xmlCharEncodingHandler)
|
---|
264 |
|
---|
265 | * Export a few useful functions
|
---|
266 |
|
---|
267 | /if defined(LIBXML_OUTPUT_ENABLED)
|
---|
268 | d UTF8Toisolat1 pr extproc('UTF8Toisolat1')
|
---|
269 | d like(xmlCint)
|
---|
270 | d out 65535 options(*varsize) unsigned char (*)
|
---|
271 | d outlen like(xmlCint)
|
---|
272 | d in * value options(*string) const unsigned char*
|
---|
273 | d inlen like(xmlCint)
|
---|
274 |
|
---|
275 | /endif LIBXML_OUTPUT_ENABLD
|
---|
276 |
|
---|
277 | d isolat1ToUTF8 pr extproc('isolat1ToUTF8')
|
---|
278 | d like(xmlCint)
|
---|
279 | d out 65535 options(*varsize) unsigned char (*)
|
---|
280 | d outlen like(xmlCint)
|
---|
281 | d in * value options(*string) const unsigned char*
|
---|
282 | d inlen like(xmlCint)
|
---|
283 |
|
---|
284 | /endif XML_CHAR_ENCODING_H
|
---|