1 | /**
|
---|
2 | *** Transcoding support and wrappers.
|
---|
3 | ***
|
---|
4 | *** See Copyright for the status of this software.
|
---|
5 | ***
|
---|
6 | *** Author: Patrick Monnerat <[email protected]>, DATASPHERE S.A.
|
---|
7 | **/
|
---|
8 |
|
---|
9 | #define IN_LIBXML
|
---|
10 | #include "libxml.h"
|
---|
11 |
|
---|
12 | #include <sys/types.h>
|
---|
13 | #include <iconv.h>
|
---|
14 | #include "libxml/xmlmemory.h"
|
---|
15 | #include "libxml/dict.h"
|
---|
16 | #include "transcode.h"
|
---|
17 |
|
---|
18 |
|
---|
19 | /**
|
---|
20 | *** Destroy a dictionary and mark as destroyed.
|
---|
21 | **/
|
---|
22 |
|
---|
23 | void
|
---|
24 | xmlZapDict(xmlDictPtr * dict)
|
---|
25 |
|
---|
26 | {
|
---|
27 | if (dict && *dict) {
|
---|
28 | xmlDictFree(*dict);
|
---|
29 | *dict = (xmlDictPtr) NULL;
|
---|
30 | }
|
---|
31 | }
|
---|
32 |
|
---|
33 |
|
---|
34 | /**
|
---|
35 | *** Support for inline conversion from/to UTF-8.
|
---|
36 | *** This is targeted to function parameter encoding conversion.
|
---|
37 | *** Method is:
|
---|
38 | *** - Convert string from/to UTF-8.
|
---|
39 | *** - Keep it in a dictionary.
|
---|
40 | *** - Free original string if a release procedure is provided.
|
---|
41 | *** Can also be called without dictionary to convert a string from/to UTF-8
|
---|
42 | *** into xmlMalloc'ed dynamic storage.
|
---|
43 | **/
|
---|
44 |
|
---|
45 | const char *
|
---|
46 | xmlTranscodeResult(const xmlChar * s, const char * encoding,
|
---|
47 | xmlDictPtr * dict, void (*freeproc)(const void *))
|
---|
48 |
|
---|
49 | {
|
---|
50 | size_t l;
|
---|
51 | iconv_t cd;
|
---|
52 | char * srcp;
|
---|
53 | char * dstp;
|
---|
54 | size_t srcc;
|
---|
55 | size_t dstc;
|
---|
56 | char * ts;
|
---|
57 | const char * ret;
|
---|
58 | int err;
|
---|
59 | static const int nullstring[] = { 0 };
|
---|
60 |
|
---|
61 | /* Convert from UTF-8. */
|
---|
62 |
|
---|
63 | if (!s)
|
---|
64 | return (const char *) NULL;
|
---|
65 |
|
---|
66 | ret = (const char *) NULL;
|
---|
67 | ts = (char *) NULL;
|
---|
68 | err = 0;
|
---|
69 | l = xmlStrlen(s);
|
---|
70 |
|
---|
71 | if (!l && dict)
|
---|
72 | ret = (const char *) nullstring;
|
---|
73 | else {
|
---|
74 | if (dict && !*dict)
|
---|
75 | err = !(*dict = xmlDictCreate());
|
---|
76 |
|
---|
77 | if (!err)
|
---|
78 | err = !(ts = xmlMalloc(4 * l + 4));
|
---|
79 |
|
---|
80 | dstp = ts;
|
---|
81 | dstc = 4 * l;
|
---|
82 |
|
---|
83 | if (!err && l) {
|
---|
84 | if (!encoding)
|
---|
85 | encoding = "ibm-0"; /* Job's encoding. */
|
---|
86 |
|
---|
87 | cd = iconv_open(encoding, "UTF-8");
|
---|
88 |
|
---|
89 | if (cd == (iconv_t) -1)
|
---|
90 | err = 1;
|
---|
91 | else {
|
---|
92 | srcp = (char *) s;
|
---|
93 | srcc = l;
|
---|
94 | srcc = iconv(cd, &srcp, &srcc, &dstp, &dstc);
|
---|
95 | iconv_close(cd);
|
---|
96 | err = srcc == (size_t) -1;
|
---|
97 | }
|
---|
98 | }
|
---|
99 |
|
---|
100 | if (!err) {
|
---|
101 | dstp[0] = dstp[1] = dstp[2] = dstp[3] = '\0';
|
---|
102 |
|
---|
103 | if (!dict) {
|
---|
104 | if (dstc)
|
---|
105 | ts = xmlRealloc(ts, (dstp - ts) + 4);
|
---|
106 |
|
---|
107 | ret = (const char *) ts;
|
---|
108 | ts = (char *) NULL;
|
---|
109 | }
|
---|
110 | else
|
---|
111 | ret = (char *) xmlDictLookup(*dict,
|
---|
112 | (xmlChar *) ts, dstp - ts + 1);
|
---|
113 | }
|
---|
114 | }
|
---|
115 |
|
---|
116 | if (ts)
|
---|
117 | xmlFree(ts);
|
---|
118 |
|
---|
119 | if (freeproc)
|
---|
120 | (*freeproc)(s);
|
---|
121 |
|
---|
122 | return ret;
|
---|
123 | }
|
---|
124 |
|
---|
125 |
|
---|
126 | /**
|
---|
127 | *** Support for inline conversion to UTF-8.
|
---|
128 | *** Method is:
|
---|
129 | *** - Convert string to UTF-8.
|
---|
130 | *** - Keep it in a dictionary.
|
---|
131 | *** Can also be called without dictionary to convert a string to UTF-8 into
|
---|
132 | *** xmlMalloc'ed dynamic storage.
|
---|
133 | **/
|
---|
134 |
|
---|
135 | static const xmlChar *
|
---|
136 | inTranscode(const char * s, size_t l, const char * encoding, xmlDictPtr * dict)
|
---|
137 |
|
---|
138 | {
|
---|
139 | iconv_t cd;
|
---|
140 | char * srcp;
|
---|
141 | char * dstp;
|
---|
142 | size_t srcc;
|
---|
143 | size_t dstc;
|
---|
144 | xmlChar * ts;
|
---|
145 | const xmlChar * ret;
|
---|
146 | static const xmlChar nullstring[] = { 0 };
|
---|
147 |
|
---|
148 | if (!l && dict)
|
---|
149 | return nullstring;
|
---|
150 |
|
---|
151 | if (dict && !*dict)
|
---|
152 | if (!(*dict = xmlDictCreate()))
|
---|
153 | return (const xmlChar *) NULL;
|
---|
154 |
|
---|
155 | ts = (xmlChar *) xmlMalloc(6 * l + 1);
|
---|
156 |
|
---|
157 | if (!ts)
|
---|
158 | return (const xmlChar *) NULL;
|
---|
159 |
|
---|
160 | dstp = (char *) ts;
|
---|
161 | dstc = 6 * l;
|
---|
162 |
|
---|
163 | if (l) {
|
---|
164 | if (!encoding)
|
---|
165 | encoding = "ibm-0"; /* Use job's encoding. */
|
---|
166 |
|
---|
167 | cd = iconv_open("UTF-8", encoding);
|
---|
168 |
|
---|
169 | if (cd == (iconv_t) -1) {
|
---|
170 | xmlFree((char *) ts);
|
---|
171 | return (const xmlChar *) NULL;
|
---|
172 | }
|
---|
173 |
|
---|
174 | srcp = (char *) s;
|
---|
175 | srcc = l;
|
---|
176 | srcc = iconv(cd, &srcp, &srcc, &dstp, &dstc);
|
---|
177 | iconv_close(cd);
|
---|
178 |
|
---|
179 | if (srcc == (size_t) -1) {
|
---|
180 | xmlFree((char *) ts);
|
---|
181 | return (const xmlChar *) NULL;
|
---|
182 | }
|
---|
183 | }
|
---|
184 |
|
---|
185 | *dstp = '\0';
|
---|
186 |
|
---|
187 | if (!dict) {
|
---|
188 | if (dstc)
|
---|
189 | ts = xmlRealloc(ts, (dstp - ts) + 1);
|
---|
190 |
|
---|
191 | return ts;
|
---|
192 | }
|
---|
193 |
|
---|
194 | ret = xmlDictLookup(*dict, ts, dstp - ts + 1);
|
---|
195 | xmlFree((char *) ts);
|
---|
196 | return ret;
|
---|
197 | }
|
---|
198 |
|
---|
199 |
|
---|
200 | /**
|
---|
201 | *** Input 8-bit character string parameter.
|
---|
202 | **/
|
---|
203 |
|
---|
204 | const xmlChar *
|
---|
205 | xmlTranscodeString(const char * s, const char * encoding, xmlDictPtr * dict)
|
---|
206 |
|
---|
207 | {
|
---|
208 | if (!s)
|
---|
209 | return (const xmlChar *) NULL;
|
---|
210 |
|
---|
211 | return inTranscode(s, xmlStrlen(s), encoding, dict);
|
---|
212 | }
|
---|
213 |
|
---|
214 |
|
---|
215 | /**
|
---|
216 | *** Input 16-bit character string parameter.
|
---|
217 | **/
|
---|
218 |
|
---|
219 | const xmlChar *
|
---|
220 | xmlTranscodeWString(const char * s, const char * encoding, xmlDictPtr * dict)
|
---|
221 |
|
---|
222 | {
|
---|
223 | size_t i;
|
---|
224 |
|
---|
225 | if (!s)
|
---|
226 | return (const xmlChar *) NULL;
|
---|
227 |
|
---|
228 | for (i = 0; s[i] && s[i + 1]; i += 2)
|
---|
229 | ;
|
---|
230 |
|
---|
231 | return inTranscode(s, i, encoding, dict);
|
---|
232 | }
|
---|
233 |
|
---|
234 |
|
---|
235 | /**
|
---|
236 | *** Input 32-bit character string parameter.
|
---|
237 | **/
|
---|
238 |
|
---|
239 | const xmlChar *
|
---|
240 | xmlTranscodeHString(const char * s, const char * encoding, xmlDictPtr * dict)
|
---|
241 |
|
---|
242 | {
|
---|
243 | size_t i;
|
---|
244 |
|
---|
245 | if (!s)
|
---|
246 | return (const xmlChar *) NULL;
|
---|
247 |
|
---|
248 | for (i = 0; s[i] && s[i + 1] && s[i + 2] && s[i + 3]; i += 4)
|
---|
249 | ;
|
---|
250 |
|
---|
251 | return inTranscode(s, i, encoding, dict);
|
---|
252 | }
|
---|
253 |
|
---|
254 |
|
---|
255 | /**
|
---|
256 | *** vasprintf() implementation with result transcoding.
|
---|
257 | **/
|
---|
258 |
|
---|
259 | const char *
|
---|
260 | xmlVasprintf(xmlDictPtr * dict, const char * encoding,
|
---|
261 | const xmlChar * fmt, va_list args)
|
---|
262 |
|
---|
263 | {
|
---|
264 | char * s = NULL;
|
---|
265 |
|
---|
266 | vasprintf(&s, fmt, args);
|
---|
267 | return xmlTranscodeResult((const xmlChar *) s, encoding, dict, free);
|
---|
268 | }
|
---|