1 | /*
|
---|
2 | * string.c : an XML string utilities module
|
---|
3 | *
|
---|
4 | * This module provides various utility functions for manipulating
|
---|
5 | * the xmlChar* type. All functions named xmlStr* have been moved here
|
---|
6 | * from the parser.c file (their original home).
|
---|
7 | *
|
---|
8 | * See Copyright for the status of this software.
|
---|
9 | *
|
---|
10 | * UTF8 string routines from:
|
---|
11 | * William Brack <[email protected]>
|
---|
12 | *
|
---|
13 | * [email protected]
|
---|
14 | */
|
---|
15 |
|
---|
16 | #define IN_LIBXML
|
---|
17 | #include "libxml.h"
|
---|
18 |
|
---|
19 | #include <stdlib.h>
|
---|
20 | #include <string.h>
|
---|
21 | #include <limits.h>
|
---|
22 | #include <libxml/xmlmemory.h>
|
---|
23 | #include <libxml/parserInternals.h>
|
---|
24 | #include <libxml/xmlstring.h>
|
---|
25 |
|
---|
26 | #include "private/parser.h"
|
---|
27 | #include "private/string.h"
|
---|
28 |
|
---|
29 | #ifndef va_copy
|
---|
30 | #ifdef __va_copy
|
---|
31 | #define va_copy(dest, src) __va_copy(dest, src)
|
---|
32 | #else
|
---|
33 | #define va_copy(dest, src) memcpy(dest, src, sizeof(va_list))
|
---|
34 | #endif
|
---|
35 | #endif
|
---|
36 |
|
---|
37 | /************************************************************************
|
---|
38 | * *
|
---|
39 | * Commodity functions to handle xmlChars *
|
---|
40 | * *
|
---|
41 | ************************************************************************/
|
---|
42 |
|
---|
43 | /**
|
---|
44 | * xmlStrndup:
|
---|
45 | * @cur: the input xmlChar *
|
---|
46 | * @len: the len of @cur
|
---|
47 | *
|
---|
48 | * a strndup for array of xmlChar's
|
---|
49 | *
|
---|
50 | * Returns a new xmlChar * or NULL
|
---|
51 | */
|
---|
52 | xmlChar *
|
---|
53 | xmlStrndup(const xmlChar *cur, int len) {
|
---|
54 | xmlChar *ret;
|
---|
55 |
|
---|
56 | if ((cur == NULL) || (len < 0)) return(NULL);
|
---|
57 | ret = (xmlChar *) xmlMallocAtomic((size_t) len + 1);
|
---|
58 | if (ret == NULL) {
|
---|
59 | return(NULL);
|
---|
60 | }
|
---|
61 | memcpy(ret, cur, len);
|
---|
62 | ret[len] = 0;
|
---|
63 | return(ret);
|
---|
64 | }
|
---|
65 |
|
---|
66 | /**
|
---|
67 | * xmlStrdup:
|
---|
68 | * @cur: the input xmlChar *
|
---|
69 | *
|
---|
70 | * a strdup for array of xmlChar's. Since they are supposed to be
|
---|
71 | * encoded in UTF-8 or an encoding with 8bit based chars, we assume
|
---|
72 | * a termination mark of '0'.
|
---|
73 | *
|
---|
74 | * Returns a new xmlChar * or NULL
|
---|
75 | */
|
---|
76 | xmlChar *
|
---|
77 | xmlStrdup(const xmlChar *cur) {
|
---|
78 | const xmlChar *p = cur;
|
---|
79 |
|
---|
80 | if (cur == NULL) return(NULL);
|
---|
81 | while (*p != 0) p++; /* non input consuming */
|
---|
82 | return(xmlStrndup(cur, p - cur));
|
---|
83 | }
|
---|
84 |
|
---|
85 | /**
|
---|
86 | * xmlCharStrndup:
|
---|
87 | * @cur: the input char *
|
---|
88 | * @len: the len of @cur
|
---|
89 | *
|
---|
90 | * a strndup for char's to xmlChar's
|
---|
91 | *
|
---|
92 | * Returns a new xmlChar * or NULL
|
---|
93 | */
|
---|
94 |
|
---|
95 | xmlChar *
|
---|
96 | xmlCharStrndup(const char *cur, int len) {
|
---|
97 | int i;
|
---|
98 | xmlChar *ret;
|
---|
99 |
|
---|
100 | if ((cur == NULL) || (len < 0)) return(NULL);
|
---|
101 | ret = (xmlChar *) xmlMallocAtomic((size_t) len + 1);
|
---|
102 | if (ret == NULL) {
|
---|
103 | return(NULL);
|
---|
104 | }
|
---|
105 | for (i = 0;i < len;i++) {
|
---|
106 | /* Explicit sign change */
|
---|
107 | ret[i] = (xmlChar) cur[i];
|
---|
108 | if (ret[i] == 0) return(ret);
|
---|
109 | }
|
---|
110 | ret[len] = 0;
|
---|
111 | return(ret);
|
---|
112 | }
|
---|
113 |
|
---|
114 | /**
|
---|
115 | * xmlCharStrdup:
|
---|
116 | * @cur: the input char *
|
---|
117 | *
|
---|
118 | * a strdup for char's to xmlChar's
|
---|
119 | *
|
---|
120 | * Returns a new xmlChar * or NULL
|
---|
121 | */
|
---|
122 |
|
---|
123 | xmlChar *
|
---|
124 | xmlCharStrdup(const char *cur) {
|
---|
125 | const char *p = cur;
|
---|
126 |
|
---|
127 | if (cur == NULL) return(NULL);
|
---|
128 | while (*p != '\0') p++; /* non input consuming */
|
---|
129 | return(xmlCharStrndup(cur, p - cur));
|
---|
130 | }
|
---|
131 |
|
---|
132 | /**
|
---|
133 | * xmlStrcmp:
|
---|
134 | * @str1: the first xmlChar *
|
---|
135 | * @str2: the second xmlChar *
|
---|
136 | *
|
---|
137 | * a strcmp for xmlChar's
|
---|
138 | *
|
---|
139 | * Returns the integer result of the comparison
|
---|
140 | */
|
---|
141 |
|
---|
142 | int
|
---|
143 | xmlStrcmp(const xmlChar *str1, const xmlChar *str2) {
|
---|
144 | if (str1 == str2) return(0);
|
---|
145 | if (str1 == NULL) return(-1);
|
---|
146 | if (str2 == NULL) return(1);
|
---|
147 | #ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
|
---|
148 | return(strcmp((const char *)str1, (const char *)str2));
|
---|
149 | #else
|
---|
150 | do {
|
---|
151 | int tmp = *str1++ - *str2;
|
---|
152 | if (tmp != 0) return(tmp);
|
---|
153 | } while (*str2++ != 0);
|
---|
154 | return 0;
|
---|
155 | #endif
|
---|
156 | }
|
---|
157 |
|
---|
158 | /**
|
---|
159 | * xmlStrEqual:
|
---|
160 | * @str1: the first xmlChar *
|
---|
161 | * @str2: the second xmlChar *
|
---|
162 | *
|
---|
163 | * Check if both strings are equal of have same content.
|
---|
164 | * Should be a bit more readable and faster than xmlStrcmp()
|
---|
165 | *
|
---|
166 | * Returns 1 if they are equal, 0 if they are different
|
---|
167 | */
|
---|
168 |
|
---|
169 | int
|
---|
170 | xmlStrEqual(const xmlChar *str1, const xmlChar *str2) {
|
---|
171 | if (str1 == str2) return(1);
|
---|
172 | if (str1 == NULL) return(0);
|
---|
173 | if (str2 == NULL) return(0);
|
---|
174 | #ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
|
---|
175 | return(strcmp((const char *)str1, (const char *)str2) == 0);
|
---|
176 | #else
|
---|
177 | do {
|
---|
178 | if (*str1++ != *str2) return(0);
|
---|
179 | } while (*str2++);
|
---|
180 | return(1);
|
---|
181 | #endif
|
---|
182 | }
|
---|
183 |
|
---|
184 | /**
|
---|
185 | * xmlStrQEqual:
|
---|
186 | * @pref: the prefix of the QName
|
---|
187 | * @name: the localname of the QName
|
---|
188 | * @str: the second xmlChar *
|
---|
189 | *
|
---|
190 | * Check if a QName is Equal to a given string
|
---|
191 | *
|
---|
192 | * Returns 1 if they are equal, 0 if they are different
|
---|
193 | */
|
---|
194 |
|
---|
195 | int
|
---|
196 | xmlStrQEqual(const xmlChar *pref, const xmlChar *name, const xmlChar *str) {
|
---|
197 | if (pref == NULL) return(xmlStrEqual(name, str));
|
---|
198 | if (name == NULL) return(0);
|
---|
199 | if (str == NULL) return(0);
|
---|
200 |
|
---|
201 | do {
|
---|
202 | if (*pref++ != *str) return(0);
|
---|
203 | } while ((*str++) && (*pref));
|
---|
204 | if (*str++ != ':') return(0);
|
---|
205 | do {
|
---|
206 | if (*name++ != *str) return(0);
|
---|
207 | } while (*str++);
|
---|
208 | return(1);
|
---|
209 | }
|
---|
210 |
|
---|
211 | /**
|
---|
212 | * xmlStrncmp:
|
---|
213 | * @str1: the first xmlChar *
|
---|
214 | * @str2: the second xmlChar *
|
---|
215 | * @len: the max comparison length
|
---|
216 | *
|
---|
217 | * a strncmp for xmlChar's
|
---|
218 | *
|
---|
219 | * Returns the integer result of the comparison
|
---|
220 | */
|
---|
221 |
|
---|
222 | int
|
---|
223 | xmlStrncmp(const xmlChar *str1, const xmlChar *str2, int len) {
|
---|
224 | if (len <= 0) return(0);
|
---|
225 | if (str1 == str2) return(0);
|
---|
226 | if (str1 == NULL) return(-1);
|
---|
227 | if (str2 == NULL) return(1);
|
---|
228 | #ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
|
---|
229 | return(strncmp((const char *)str1, (const char *)str2, len));
|
---|
230 | #else
|
---|
231 | do {
|
---|
232 | int tmp = *str1++ - *str2;
|
---|
233 | if (tmp != 0 || --len == 0) return(tmp);
|
---|
234 | } while (*str2++ != 0);
|
---|
235 | return 0;
|
---|
236 | #endif
|
---|
237 | }
|
---|
238 |
|
---|
239 | static const xmlChar casemap[256] = {
|
---|
240 | 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,
|
---|
241 | 0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
|
---|
242 | 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,
|
---|
243 | 0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
|
---|
244 | 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,
|
---|
245 | 0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
|
---|
246 | 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,
|
---|
247 | 0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
|
---|
248 | 0x40,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
|
---|
249 | 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
|
---|
250 | 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
|
---|
251 | 0x78,0x79,0x7A,0x7B,0x5C,0x5D,0x5E,0x5F,
|
---|
252 | 0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
|
---|
253 | 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
|
---|
254 | 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
|
---|
255 | 0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F,
|
---|
256 | 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,
|
---|
257 | 0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,
|
---|
258 | 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,
|
---|
259 | 0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,
|
---|
260 | 0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,
|
---|
261 | 0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,
|
---|
262 | 0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,
|
---|
263 | 0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,
|
---|
264 | 0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,
|
---|
265 | 0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,
|
---|
266 | 0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,
|
---|
267 | 0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF,
|
---|
268 | 0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,
|
---|
269 | 0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,
|
---|
270 | 0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,
|
---|
271 | 0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF
|
---|
272 | };
|
---|
273 |
|
---|
274 | /**
|
---|
275 | * xmlStrcasecmp:
|
---|
276 | * @str1: the first xmlChar *
|
---|
277 | * @str2: the second xmlChar *
|
---|
278 | *
|
---|
279 | * a strcasecmp for xmlChar's
|
---|
280 | *
|
---|
281 | * Returns the integer result of the comparison
|
---|
282 | */
|
---|
283 |
|
---|
284 | int
|
---|
285 | xmlStrcasecmp(const xmlChar *str1, const xmlChar *str2) {
|
---|
286 | register int tmp;
|
---|
287 |
|
---|
288 | if (str1 == str2) return(0);
|
---|
289 | if (str1 == NULL) return(-1);
|
---|
290 | if (str2 == NULL) return(1);
|
---|
291 | do {
|
---|
292 | tmp = casemap[*str1++] - casemap[*str2];
|
---|
293 | if (tmp != 0) return(tmp);
|
---|
294 | } while (*str2++ != 0);
|
---|
295 | return 0;
|
---|
296 | }
|
---|
297 |
|
---|
298 | /**
|
---|
299 | * xmlStrncasecmp:
|
---|
300 | * @str1: the first xmlChar *
|
---|
301 | * @str2: the second xmlChar *
|
---|
302 | * @len: the max comparison length
|
---|
303 | *
|
---|
304 | * a strncasecmp for xmlChar's
|
---|
305 | *
|
---|
306 | * Returns the integer result of the comparison
|
---|
307 | */
|
---|
308 |
|
---|
309 | int
|
---|
310 | xmlStrncasecmp(const xmlChar *str1, const xmlChar *str2, int len) {
|
---|
311 | register int tmp;
|
---|
312 |
|
---|
313 | if (len <= 0) return(0);
|
---|
314 | if (str1 == str2) return(0);
|
---|
315 | if (str1 == NULL) return(-1);
|
---|
316 | if (str2 == NULL) return(1);
|
---|
317 | do {
|
---|
318 | tmp = casemap[*str1++] - casemap[*str2];
|
---|
319 | if (tmp != 0 || --len == 0) return(tmp);
|
---|
320 | } while (*str2++ != 0);
|
---|
321 | return 0;
|
---|
322 | }
|
---|
323 |
|
---|
324 | /**
|
---|
325 | * xmlStrchr:
|
---|
326 | * @str: the xmlChar * array
|
---|
327 | * @val: the xmlChar to search
|
---|
328 | *
|
---|
329 | * a strchr for xmlChar's
|
---|
330 | *
|
---|
331 | * Returns the xmlChar * for the first occurrence or NULL.
|
---|
332 | */
|
---|
333 |
|
---|
334 | const xmlChar *
|
---|
335 | xmlStrchr(const xmlChar *str, xmlChar val) {
|
---|
336 | if (str == NULL) return(NULL);
|
---|
337 | while (*str != 0) { /* non input consuming */
|
---|
338 | if (*str == val) return((xmlChar *) str);
|
---|
339 | str++;
|
---|
340 | }
|
---|
341 | return(NULL);
|
---|
342 | }
|
---|
343 |
|
---|
344 | /**
|
---|
345 | * xmlStrstr:
|
---|
346 | * @str: the xmlChar * array (haystack)
|
---|
347 | * @val: the xmlChar to search (needle)
|
---|
348 | *
|
---|
349 | * a strstr for xmlChar's
|
---|
350 | *
|
---|
351 | * Returns the xmlChar * for the first occurrence or NULL.
|
---|
352 | */
|
---|
353 |
|
---|
354 | const xmlChar *
|
---|
355 | xmlStrstr(const xmlChar *str, const xmlChar *val) {
|
---|
356 | int n;
|
---|
357 |
|
---|
358 | if (str == NULL) return(NULL);
|
---|
359 | if (val == NULL) return(NULL);
|
---|
360 | n = xmlStrlen(val);
|
---|
361 |
|
---|
362 | if (n == 0) return(str);
|
---|
363 | while (*str != 0) { /* non input consuming */
|
---|
364 | if (*str == *val) {
|
---|
365 | if (!xmlStrncmp(str, val, n)) return((const xmlChar *) str);
|
---|
366 | }
|
---|
367 | str++;
|
---|
368 | }
|
---|
369 | return(NULL);
|
---|
370 | }
|
---|
371 |
|
---|
372 | /**
|
---|
373 | * xmlStrcasestr:
|
---|
374 | * @str: the xmlChar * array (haystack)
|
---|
375 | * @val: the xmlChar to search (needle)
|
---|
376 | *
|
---|
377 | * a case-ignoring strstr for xmlChar's
|
---|
378 | *
|
---|
379 | * Returns the xmlChar * for the first occurrence or NULL.
|
---|
380 | */
|
---|
381 |
|
---|
382 | const xmlChar *
|
---|
383 | xmlStrcasestr(const xmlChar *str, const xmlChar *val) {
|
---|
384 | int n;
|
---|
385 |
|
---|
386 | if (str == NULL) return(NULL);
|
---|
387 | if (val == NULL) return(NULL);
|
---|
388 | n = xmlStrlen(val);
|
---|
389 |
|
---|
390 | if (n == 0) return(str);
|
---|
391 | while (*str != 0) { /* non input consuming */
|
---|
392 | if (casemap[*str] == casemap[*val])
|
---|
393 | if (!xmlStrncasecmp(str, val, n)) return(str);
|
---|
394 | str++;
|
---|
395 | }
|
---|
396 | return(NULL);
|
---|
397 | }
|
---|
398 |
|
---|
399 | /**
|
---|
400 | * xmlStrsub:
|
---|
401 | * @str: the xmlChar * array (haystack)
|
---|
402 | * @start: the index of the first char (zero based)
|
---|
403 | * @len: the length of the substring
|
---|
404 | *
|
---|
405 | * Extract a substring of a given string
|
---|
406 | *
|
---|
407 | * Returns the xmlChar * for the first occurrence or NULL.
|
---|
408 | */
|
---|
409 |
|
---|
410 | xmlChar *
|
---|
411 | xmlStrsub(const xmlChar *str, int start, int len) {
|
---|
412 | int i;
|
---|
413 |
|
---|
414 | if (str == NULL) return(NULL);
|
---|
415 | if (start < 0) return(NULL);
|
---|
416 | if (len < 0) return(NULL);
|
---|
417 |
|
---|
418 | for (i = 0;i < start;i++) {
|
---|
419 | if (*str == 0) return(NULL);
|
---|
420 | str++;
|
---|
421 | }
|
---|
422 | if (*str == 0) return(NULL);
|
---|
423 | return(xmlStrndup(str, len));
|
---|
424 | }
|
---|
425 |
|
---|
426 | /**
|
---|
427 | * xmlStrlen:
|
---|
428 | * @str: the xmlChar * array
|
---|
429 | *
|
---|
430 | * length of a xmlChar's string
|
---|
431 | *
|
---|
432 | * Returns the number of xmlChar contained in the ARRAY.
|
---|
433 | */
|
---|
434 |
|
---|
435 | int
|
---|
436 | xmlStrlen(const xmlChar *str) {
|
---|
437 | size_t len = str ? strlen((const char *)str) : 0;
|
---|
438 | return(len > INT_MAX ? 0 : len);
|
---|
439 | }
|
---|
440 |
|
---|
441 | /**
|
---|
442 | * xmlStrncat:
|
---|
443 | * @cur: the original xmlChar * array
|
---|
444 | * @add: the xmlChar * array added
|
---|
445 | * @len: the length of @add
|
---|
446 | *
|
---|
447 | * a strncat for array of xmlChar's, it will extend @cur with the len
|
---|
448 | * first bytes of @add. Note that if @len < 0 then this is an API error
|
---|
449 | * and NULL will be returned.
|
---|
450 | *
|
---|
451 | * Returns a new xmlChar *, the original @cur is reallocated and should
|
---|
452 | * not be freed.
|
---|
453 | */
|
---|
454 |
|
---|
455 | xmlChar *
|
---|
456 | xmlStrncat(xmlChar *cur, const xmlChar *add, int len) {
|
---|
457 | int size;
|
---|
458 | xmlChar *ret;
|
---|
459 |
|
---|
460 | if ((add == NULL) || (len == 0))
|
---|
461 | return(cur);
|
---|
462 | if (len < 0)
|
---|
463 | return(NULL);
|
---|
464 | if (cur == NULL)
|
---|
465 | return(xmlStrndup(add, len));
|
---|
466 |
|
---|
467 | size = xmlStrlen(cur);
|
---|
468 | if ((size < 0) || (size > INT_MAX - len))
|
---|
469 | return(NULL);
|
---|
470 | ret = (xmlChar *) xmlRealloc(cur, (size_t) size + len + 1);
|
---|
471 | if (ret == NULL) {
|
---|
472 | xmlFree(cur);
|
---|
473 | return(NULL);
|
---|
474 | }
|
---|
475 | memcpy(&ret[size], add, len);
|
---|
476 | ret[size + len] = 0;
|
---|
477 | return(ret);
|
---|
478 | }
|
---|
479 |
|
---|
480 | /**
|
---|
481 | * xmlStrncatNew:
|
---|
482 | * @str1: first xmlChar string
|
---|
483 | * @str2: second xmlChar string
|
---|
484 | * @len: the len of @str2 or < 0
|
---|
485 | *
|
---|
486 | * same as xmlStrncat, but creates a new string. The original
|
---|
487 | * two strings are not freed. If @len is < 0 then the length
|
---|
488 | * will be calculated automatically.
|
---|
489 | *
|
---|
490 | * Returns a new xmlChar * or NULL
|
---|
491 | */
|
---|
492 | xmlChar *
|
---|
493 | xmlStrncatNew(const xmlChar *str1, const xmlChar *str2, int len) {
|
---|
494 | int size;
|
---|
495 | xmlChar *ret;
|
---|
496 |
|
---|
497 | if (len < 0) {
|
---|
498 | len = xmlStrlen(str2);
|
---|
499 | if (len < 0)
|
---|
500 | return(NULL);
|
---|
501 | }
|
---|
502 | if (str1 == NULL)
|
---|
503 | return(xmlStrndup(str2, len));
|
---|
504 | if ((str2 == NULL) || (len == 0))
|
---|
505 | return(xmlStrdup(str1));
|
---|
506 |
|
---|
507 | size = xmlStrlen(str1);
|
---|
508 | if ((size < 0) || (size > INT_MAX - len))
|
---|
509 | return(NULL);
|
---|
510 | ret = (xmlChar *) xmlMalloc((size_t) size + len + 1);
|
---|
511 | if (ret == NULL)
|
---|
512 | return(NULL);
|
---|
513 | memcpy(ret, str1, size);
|
---|
514 | memcpy(&ret[size], str2, len);
|
---|
515 | ret[size + len] = 0;
|
---|
516 | return(ret);
|
---|
517 | }
|
---|
518 |
|
---|
519 | /**
|
---|
520 | * xmlStrcat:
|
---|
521 | * @cur: the original xmlChar * array
|
---|
522 | * @add: the xmlChar * array added
|
---|
523 | *
|
---|
524 | * a strcat for array of xmlChar's. Since they are supposed to be
|
---|
525 | * encoded in UTF-8 or an encoding with 8bit based chars, we assume
|
---|
526 | * a termination mark of '0'.
|
---|
527 | *
|
---|
528 | * Returns a new xmlChar * containing the concatenated string. The original
|
---|
529 | * @cur is reallocated and should not be freed.
|
---|
530 | */
|
---|
531 | xmlChar *
|
---|
532 | xmlStrcat(xmlChar *cur, const xmlChar *add) {
|
---|
533 | const xmlChar *p = add;
|
---|
534 |
|
---|
535 | if (add == NULL) return(cur);
|
---|
536 | if (cur == NULL)
|
---|
537 | return(xmlStrdup(add));
|
---|
538 |
|
---|
539 | while (*p != 0) p++; /* non input consuming */
|
---|
540 | return(xmlStrncat(cur, add, p - add));
|
---|
541 | }
|
---|
542 |
|
---|
543 | /**
|
---|
544 | * xmlStrPrintf:
|
---|
545 | * @buf: the result buffer.
|
---|
546 | * @len: the result buffer length.
|
---|
547 | * @msg: the message with printf formatting.
|
---|
548 | * @...: extra parameters for the message.
|
---|
549 | *
|
---|
550 | * Formats @msg and places result into @buf.
|
---|
551 | *
|
---|
552 | * Returns the number of characters written to @buf or -1 if an error occurs.
|
---|
553 | */
|
---|
554 | int
|
---|
555 | xmlStrPrintf(xmlChar *buf, int len, const char *msg, ...) {
|
---|
556 | va_list args;
|
---|
557 | int ret;
|
---|
558 |
|
---|
559 | if((buf == NULL) || (msg == NULL)) {
|
---|
560 | return(-1);
|
---|
561 | }
|
---|
562 |
|
---|
563 | va_start(args, msg);
|
---|
564 | ret = vsnprintf((char *) buf, len, (const char *) msg, args);
|
---|
565 | va_end(args);
|
---|
566 | buf[len - 1] = 0; /* be safe ! */
|
---|
567 |
|
---|
568 | return(ret);
|
---|
569 | }
|
---|
570 |
|
---|
571 | /**
|
---|
572 | * xmlStrVPrintf:
|
---|
573 | * @buf: the result buffer.
|
---|
574 | * @len: the result buffer length.
|
---|
575 | * @msg: the message with printf formatting.
|
---|
576 | * @ap: extra parameters for the message.
|
---|
577 | *
|
---|
578 | * Formats @msg and places result into @buf.
|
---|
579 | *
|
---|
580 | * Returns the number of characters written to @buf or -1 if an error occurs.
|
---|
581 | */
|
---|
582 | int
|
---|
583 | xmlStrVPrintf(xmlChar *buf, int len, const char *msg, va_list ap) {
|
---|
584 | int ret;
|
---|
585 |
|
---|
586 | if((buf == NULL) || (msg == NULL)) {
|
---|
587 | return(-1);
|
---|
588 | }
|
---|
589 |
|
---|
590 | ret = vsnprintf((char *) buf, len, (const char *) msg, ap);
|
---|
591 | buf[len - 1] = 0; /* be safe ! */
|
---|
592 |
|
---|
593 | return(ret);
|
---|
594 | }
|
---|
595 |
|
---|
596 | /**
|
---|
597 | * xmlStrVASPrintf:
|
---|
598 | * @out: pointer to the resulting string
|
---|
599 | * @maxSize: maximum size of the output buffer
|
---|
600 | * @msg: printf format string
|
---|
601 | * @ap: arguments for format string
|
---|
602 | *
|
---|
603 | * Creates a newly allocated string according to format.
|
---|
604 | *
|
---|
605 | * Returns 0 on success, 1 if the result was truncated or on other
|
---|
606 | * errors, -1 if a memory allocation failed.
|
---|
607 | */
|
---|
608 | int
|
---|
609 | xmlStrVASPrintf(xmlChar **out, int maxSize, const char *msg, va_list ap) {
|
---|
610 | char empty[1];
|
---|
611 | va_list copy;
|
---|
612 | xmlChar *buf;
|
---|
613 | int res, size;
|
---|
614 | int truncated = 0;
|
---|
615 |
|
---|
616 | if (out == NULL)
|
---|
617 | return(1);
|
---|
618 | *out = NULL;
|
---|
619 | if (msg == NULL)
|
---|
620 | return(1);
|
---|
621 | if (maxSize < 32)
|
---|
622 | maxSize = 32;
|
---|
623 |
|
---|
624 | va_copy(copy, ap);
|
---|
625 | res = vsnprintf(empty, 1, msg, copy);
|
---|
626 | va_end(copy);
|
---|
627 |
|
---|
628 | if (res > 0) {
|
---|
629 | /* snprintf seems to work according to C99. */
|
---|
630 |
|
---|
631 | if (res < maxSize) {
|
---|
632 | size = res + 1;
|
---|
633 | } else {
|
---|
634 | size = maxSize;
|
---|
635 | truncated = 1;
|
---|
636 | }
|
---|
637 | buf = xmlMalloc(size);
|
---|
638 | if (buf == NULL)
|
---|
639 | return(-1);
|
---|
640 | if (vsnprintf((char *) buf, size, msg, ap) < 0) {
|
---|
641 | xmlFree(buf);
|
---|
642 | return(1);
|
---|
643 | }
|
---|
644 | } else {
|
---|
645 | /*
|
---|
646 | * Unfortunately, older snprintf implementations don't follow the
|
---|
647 | * C99 spec. If the output exceeds the size of the buffer, they can
|
---|
648 | * return -1, 0 or the number of characters written instead of the
|
---|
649 | * needed size. Older MSCVRT also won't write a terminating null
|
---|
650 | * byte if the buffer is too small.
|
---|
651 | *
|
---|
652 | * If the value returned is non-negative and strictly less than
|
---|
653 | * the buffer size (without terminating null), the result should
|
---|
654 | * have been written completely, so we double the buffer size
|
---|
655 | * until this condition is true. This assumes that snprintf will
|
---|
656 | * eventually return a non-negative value. Otherwise, we will
|
---|
657 | * allocate more and more memory until we run out.
|
---|
658 | *
|
---|
659 | * Note that this code path is also executed on conforming
|
---|
660 | * platforms if the output is the empty string.
|
---|
661 | */
|
---|
662 |
|
---|
663 | buf = NULL;
|
---|
664 | size = 32;
|
---|
665 | while (1) {
|
---|
666 | buf = xmlMalloc(size);
|
---|
667 | if (buf == NULL)
|
---|
668 | return(-1);
|
---|
669 |
|
---|
670 | va_copy(copy, ap);
|
---|
671 | res = vsnprintf((char *) buf, size, msg, copy);
|
---|
672 | va_end(copy);
|
---|
673 | if ((res >= 0) && (res < size - 1))
|
---|
674 | break;
|
---|
675 |
|
---|
676 | if (size >= maxSize) {
|
---|
677 | truncated = 1;
|
---|
678 | break;
|
---|
679 | }
|
---|
680 |
|
---|
681 | xmlFree(buf);
|
---|
682 |
|
---|
683 | if (size > maxSize / 2)
|
---|
684 | size = maxSize;
|
---|
685 | else
|
---|
686 | size *= 2;
|
---|
687 | }
|
---|
688 | }
|
---|
689 |
|
---|
690 | /*
|
---|
691 | * If the output was truncated, make sure that the buffer doesn't
|
---|
692 | * end with a truncated UTF-8 sequence.
|
---|
693 | */
|
---|
694 | if (truncated != 0) {
|
---|
695 | int i = size - 1;
|
---|
696 |
|
---|
697 | while (i > 0) {
|
---|
698 | /* Break after ASCII */
|
---|
699 | if (buf[i-1] < 0x80)
|
---|
700 | break;
|
---|
701 | i -= 1;
|
---|
702 | /* Break before non-ASCII */
|
---|
703 | if (buf[i] >= 0xc0)
|
---|
704 | break;
|
---|
705 | }
|
---|
706 |
|
---|
707 | buf[i] = 0;
|
---|
708 | }
|
---|
709 |
|
---|
710 | *out = (xmlChar *) buf;
|
---|
711 | return(truncated);
|
---|
712 | }
|
---|
713 |
|
---|
714 | /**
|
---|
715 | * xmlStrASPrintf:
|
---|
716 | * @out: pointer to the resulting string
|
---|
717 | * @maxSize: maximum size of the output buffer
|
---|
718 | * @msg: printf format string
|
---|
719 | * @...: arguments for format string
|
---|
720 | *
|
---|
721 | * See xmlStrVASPrintf.
|
---|
722 | *
|
---|
723 | * Returns 0 on success, 1 if the result was truncated or on other
|
---|
724 | * errors, -1 if a memory allocation failed.
|
---|
725 | */
|
---|
726 | int
|
---|
727 | xmlStrASPrintf(xmlChar **out, int maxSize, const char *msg, ...) {
|
---|
728 | va_list ap;
|
---|
729 | int ret;
|
---|
730 |
|
---|
731 | va_start(ap, msg);
|
---|
732 | ret = xmlStrVASPrintf(out, maxSize, msg, ap);
|
---|
733 | va_end(ap);
|
---|
734 |
|
---|
735 | return(ret);
|
---|
736 | }
|
---|
737 |
|
---|
738 | /************************************************************************
|
---|
739 | * *
|
---|
740 | * Generic UTF8 handling routines *
|
---|
741 | * *
|
---|
742 | * From rfc2044: encoding of the Unicode values on UTF-8: *
|
---|
743 | * *
|
---|
744 | * UCS-4 range (hex.) UTF-8 octet sequence (binary) *
|
---|
745 | * 0000 0000-0000 007F 0xxxxxxx *
|
---|
746 | * 0000 0080-0000 07FF 110xxxxx 10xxxxxx *
|
---|
747 | * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx *
|
---|
748 | * *
|
---|
749 | * I hope we won't use values > 0xFFFF anytime soon ! *
|
---|
750 | * *
|
---|
751 | ************************************************************************/
|
---|
752 |
|
---|
753 |
|
---|
754 | /**
|
---|
755 | * xmlUTF8Size:
|
---|
756 | * @utf: pointer to the UTF8 character
|
---|
757 | *
|
---|
758 | * calculates the internal size of a UTF8 character
|
---|
759 | *
|
---|
760 | * returns the numbers of bytes in the character, -1 on format error
|
---|
761 | */
|
---|
762 | int
|
---|
763 | xmlUTF8Size(const xmlChar *utf) {
|
---|
764 | xmlChar mask;
|
---|
765 | int len;
|
---|
766 |
|
---|
767 | if (utf == NULL)
|
---|
768 | return -1;
|
---|
769 | if (*utf < 0x80)
|
---|
770 | return 1;
|
---|
771 | /* check valid UTF8 character */
|
---|
772 | if (!(*utf & 0x40))
|
---|
773 | return -1;
|
---|
774 | /* determine number of bytes in char */
|
---|
775 | len = 2;
|
---|
776 | for (mask=0x20; mask != 0; mask>>=1) {
|
---|
777 | if (!(*utf & mask))
|
---|
778 | return len;
|
---|
779 | len++;
|
---|
780 | }
|
---|
781 | return -1;
|
---|
782 | }
|
---|
783 |
|
---|
784 | /**
|
---|
785 | * xmlUTF8Charcmp:
|
---|
786 | * @utf1: pointer to first UTF8 char
|
---|
787 | * @utf2: pointer to second UTF8 char
|
---|
788 | *
|
---|
789 | * compares the two UCS4 values
|
---|
790 | *
|
---|
791 | * returns result of the compare as with xmlStrncmp
|
---|
792 | */
|
---|
793 | int
|
---|
794 | xmlUTF8Charcmp(const xmlChar *utf1, const xmlChar *utf2) {
|
---|
795 |
|
---|
796 | if (utf1 == NULL ) {
|
---|
797 | if (utf2 == NULL)
|
---|
798 | return 0;
|
---|
799 | return -1;
|
---|
800 | }
|
---|
801 | return xmlStrncmp(utf1, utf2, xmlUTF8Size(utf1));
|
---|
802 | }
|
---|
803 |
|
---|
804 | /**
|
---|
805 | * xmlUTF8Strlen:
|
---|
806 | * @utf: a sequence of UTF-8 encoded bytes
|
---|
807 | *
|
---|
808 | * compute the length of an UTF8 string, it doesn't do a full UTF8
|
---|
809 | * checking of the content of the string.
|
---|
810 | *
|
---|
811 | * Returns the number of characters in the string or -1 in case of error
|
---|
812 | */
|
---|
813 | int
|
---|
814 | xmlUTF8Strlen(const xmlChar *utf) {
|
---|
815 | size_t ret = 0;
|
---|
816 |
|
---|
817 | if (utf == NULL)
|
---|
818 | return(-1);
|
---|
819 |
|
---|
820 | while (*utf != 0) {
|
---|
821 | if (utf[0] & 0x80) {
|
---|
822 | if ((utf[1] & 0xc0) != 0x80)
|
---|
823 | return(-1);
|
---|
824 | if ((utf[0] & 0xe0) == 0xe0) {
|
---|
825 | if ((utf[2] & 0xc0) != 0x80)
|
---|
826 | return(-1);
|
---|
827 | if ((utf[0] & 0xf0) == 0xf0) {
|
---|
828 | if ((utf[0] & 0xf8) != 0xf0 || (utf[3] & 0xc0) != 0x80)
|
---|
829 | return(-1);
|
---|
830 | utf += 4;
|
---|
831 | } else {
|
---|
832 | utf += 3;
|
---|
833 | }
|
---|
834 | } else {
|
---|
835 | utf += 2;
|
---|
836 | }
|
---|
837 | } else {
|
---|
838 | utf++;
|
---|
839 | }
|
---|
840 | ret++;
|
---|
841 | }
|
---|
842 | return(ret > INT_MAX ? 0 : ret);
|
---|
843 | }
|
---|
844 |
|
---|
845 | /**
|
---|
846 | * xmlGetUTF8Char:
|
---|
847 | * @utf: a sequence of UTF-8 encoded bytes
|
---|
848 | * @len: a pointer to the minimum number of bytes present in
|
---|
849 | * the sequence. This is used to assure the next character
|
---|
850 | * is completely contained within the sequence.
|
---|
851 | *
|
---|
852 | * Read the first UTF8 character from @utf
|
---|
853 | *
|
---|
854 | * Returns the char value or -1 in case of error, and sets *len to
|
---|
855 | * the actual number of bytes consumed (0 in case of error)
|
---|
856 | */
|
---|
857 | int
|
---|
858 | xmlGetUTF8Char(const unsigned char *utf, int *len) {
|
---|
859 | unsigned int c;
|
---|
860 |
|
---|
861 | if (utf == NULL)
|
---|
862 | goto error;
|
---|
863 | if (len == NULL)
|
---|
864 | goto error;
|
---|
865 |
|
---|
866 | c = utf[0];
|
---|
867 | if (c < 0x80) {
|
---|
868 | if (*len < 1)
|
---|
869 | goto error;
|
---|
870 | /* 1-byte code */
|
---|
871 | *len = 1;
|
---|
872 | } else {
|
---|
873 | if ((*len < 2) || ((utf[1] & 0xc0) != 0x80))
|
---|
874 | goto error;
|
---|
875 | if (c < 0xe0) {
|
---|
876 | if (c < 0xc2)
|
---|
877 | goto error;
|
---|
878 | /* 2-byte code */
|
---|
879 | *len = 2;
|
---|
880 | c = (c & 0x1f) << 6;
|
---|
881 | c |= utf[1] & 0x3f;
|
---|
882 | } else {
|
---|
883 | if ((*len < 3) || ((utf[2] & 0xc0) != 0x80))
|
---|
884 | goto error;
|
---|
885 | if (c < 0xf0) {
|
---|
886 | /* 3-byte code */
|
---|
887 | *len = 3;
|
---|
888 | c = (c & 0xf) << 12;
|
---|
889 | c |= (utf[1] & 0x3f) << 6;
|
---|
890 | c |= utf[2] & 0x3f;
|
---|
891 | if ((c < 0x800) || ((c >= 0xd800) && (c < 0xe000)))
|
---|
892 | goto error;
|
---|
893 | } else {
|
---|
894 | if ((*len < 4) || ((utf[3] & 0xc0) != 0x80))
|
---|
895 | goto error;
|
---|
896 | *len = 4;
|
---|
897 | /* 4-byte code */
|
---|
898 | c = (c & 0x7) << 18;
|
---|
899 | c |= (utf[1] & 0x3f) << 12;
|
---|
900 | c |= (utf[2] & 0x3f) << 6;
|
---|
901 | c |= utf[3] & 0x3f;
|
---|
902 | if ((c < 0x10000) || (c >= 0x110000))
|
---|
903 | goto error;
|
---|
904 | }
|
---|
905 | }
|
---|
906 | }
|
---|
907 | return(c);
|
---|
908 |
|
---|
909 | error:
|
---|
910 | if (len != NULL)
|
---|
911 | *len = 0;
|
---|
912 | return(-1);
|
---|
913 | }
|
---|
914 |
|
---|
915 | /**
|
---|
916 | * xmlCheckUTF8:
|
---|
917 | * @utf: Pointer to putative UTF-8 encoded string.
|
---|
918 | *
|
---|
919 | * Checks @utf for being valid UTF-8. @utf is assumed to be
|
---|
920 | * null-terminated. This function is not super-strict, as it will
|
---|
921 | * allow longer UTF-8 sequences than necessary. Note that Java is
|
---|
922 | * capable of producing these sequences if provoked. Also note, this
|
---|
923 | * routine checks for the 4-byte maximum size, but does not check for
|
---|
924 | * 0x10ffff maximum value.
|
---|
925 | *
|
---|
926 | * Return value: true if @utf is valid.
|
---|
927 | **/
|
---|
928 | int
|
---|
929 | xmlCheckUTF8(const unsigned char *utf)
|
---|
930 | {
|
---|
931 | int ix;
|
---|
932 | unsigned char c;
|
---|
933 |
|
---|
934 | if (utf == NULL)
|
---|
935 | return(0);
|
---|
936 | /*
|
---|
937 | * utf is a string of 1, 2, 3 or 4 bytes. The valid strings
|
---|
938 | * are as follows (in "bit format"):
|
---|
939 | * 0xxxxxxx valid 1-byte
|
---|
940 | * 110xxxxx 10xxxxxx valid 2-byte
|
---|
941 | * 1110xxxx 10xxxxxx 10xxxxxx valid 3-byte
|
---|
942 | * 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx valid 4-byte
|
---|
943 | */
|
---|
944 | while ((c = utf[0])) { /* string is 0-terminated */
|
---|
945 | ix = 0;
|
---|
946 | if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */
|
---|
947 | ix = 1;
|
---|
948 | } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
|
---|
949 | if ((utf[1] & 0xc0 ) != 0x80)
|
---|
950 | return 0;
|
---|
951 | ix = 2;
|
---|
952 | } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
|
---|
953 | if (((utf[1] & 0xc0) != 0x80) ||
|
---|
954 | ((utf[2] & 0xc0) != 0x80))
|
---|
955 | return 0;
|
---|
956 | ix = 3;
|
---|
957 | } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
|
---|
958 | if (((utf[1] & 0xc0) != 0x80) ||
|
---|
959 | ((utf[2] & 0xc0) != 0x80) ||
|
---|
960 | ((utf[3] & 0xc0) != 0x80))
|
---|
961 | return 0;
|
---|
962 | ix = 4;
|
---|
963 | } else /* unknown encoding */
|
---|
964 | return 0;
|
---|
965 | utf += ix;
|
---|
966 | }
|
---|
967 | return(1);
|
---|
968 | }
|
---|
969 |
|
---|
970 | /**
|
---|
971 | * xmlUTF8Strsize:
|
---|
972 | * @utf: a sequence of UTF-8 encoded bytes
|
---|
973 | * @len: the number of characters in the array
|
---|
974 | *
|
---|
975 | * storage size of an UTF8 string
|
---|
976 | * the behaviour is not guaranteed if the input string is not UTF-8
|
---|
977 | *
|
---|
978 | * Returns the storage size of
|
---|
979 | * the first 'len' characters of ARRAY
|
---|
980 | */
|
---|
981 |
|
---|
982 | int
|
---|
983 | xmlUTF8Strsize(const xmlChar *utf, int len) {
|
---|
984 | const xmlChar *ptr=utf;
|
---|
985 | int ch;
|
---|
986 | size_t ret;
|
---|
987 |
|
---|
988 | if (utf == NULL)
|
---|
989 | return(0);
|
---|
990 |
|
---|
991 | if (len <= 0)
|
---|
992 | return(0);
|
---|
993 |
|
---|
994 | while ( len-- > 0) {
|
---|
995 | if ( !*ptr )
|
---|
996 | break;
|
---|
997 | if ( (ch = *ptr++) & 0x80)
|
---|
998 | while ((ch<<=1) & 0x80 ) {
|
---|
999 | if (*ptr == 0) break;
|
---|
1000 | ptr++;
|
---|
1001 | }
|
---|
1002 | }
|
---|
1003 | ret = ptr - utf;
|
---|
1004 | return (ret > INT_MAX ? 0 : ret);
|
---|
1005 | }
|
---|
1006 |
|
---|
1007 |
|
---|
1008 | /**
|
---|
1009 | * xmlUTF8Strndup:
|
---|
1010 | * @utf: the input UTF8 *
|
---|
1011 | * @len: the len of @utf (in chars)
|
---|
1012 | *
|
---|
1013 | * a strndup for array of UTF8's
|
---|
1014 | *
|
---|
1015 | * Returns a new UTF8 * or NULL
|
---|
1016 | */
|
---|
1017 | xmlChar *
|
---|
1018 | xmlUTF8Strndup(const xmlChar *utf, int len) {
|
---|
1019 | xmlChar *ret;
|
---|
1020 | int i;
|
---|
1021 |
|
---|
1022 | if ((utf == NULL) || (len < 0)) return(NULL);
|
---|
1023 | i = xmlUTF8Strsize(utf, len);
|
---|
1024 | ret = (xmlChar *) xmlMallocAtomic((size_t) i + 1);
|
---|
1025 | if (ret == NULL) {
|
---|
1026 | return(NULL);
|
---|
1027 | }
|
---|
1028 | memcpy(ret, utf, i);
|
---|
1029 | ret[i] = 0;
|
---|
1030 | return(ret);
|
---|
1031 | }
|
---|
1032 |
|
---|
1033 | /**
|
---|
1034 | * xmlUTF8Strpos:
|
---|
1035 | * @utf: the input UTF8 *
|
---|
1036 | * @pos: the position of the desired UTF8 char (in chars)
|
---|
1037 | *
|
---|
1038 | * a function to provide the equivalent of fetching a
|
---|
1039 | * character from a string array
|
---|
1040 | *
|
---|
1041 | * Returns a pointer to the UTF8 character or NULL
|
---|
1042 | */
|
---|
1043 | const xmlChar *
|
---|
1044 | xmlUTF8Strpos(const xmlChar *utf, int pos) {
|
---|
1045 | int ch;
|
---|
1046 |
|
---|
1047 | if (utf == NULL) return(NULL);
|
---|
1048 | if (pos < 0)
|
---|
1049 | return(NULL);
|
---|
1050 | while (pos--) {
|
---|
1051 | if ((ch=*utf++) == 0) return(NULL);
|
---|
1052 | if ( ch & 0x80 ) {
|
---|
1053 | /* if not simple ascii, verify proper format */
|
---|
1054 | if ( (ch & 0xc0) != 0xc0 )
|
---|
1055 | return(NULL);
|
---|
1056 | /* then skip over remaining bytes for this char */
|
---|
1057 | while ( (ch <<= 1) & 0x80 )
|
---|
1058 | if ( (*utf++ & 0xc0) != 0x80 )
|
---|
1059 | return(NULL);
|
---|
1060 | }
|
---|
1061 | }
|
---|
1062 | return((xmlChar *)utf);
|
---|
1063 | }
|
---|
1064 |
|
---|
1065 | /**
|
---|
1066 | * xmlUTF8Strloc:
|
---|
1067 | * @utf: the input UTF8 *
|
---|
1068 | * @utfchar: the UTF8 character to be found
|
---|
1069 | *
|
---|
1070 | * a function to provide the relative location of a UTF8 char
|
---|
1071 | *
|
---|
1072 | * Returns the relative character position of the desired char
|
---|
1073 | * or -1 if not found
|
---|
1074 | */
|
---|
1075 | int
|
---|
1076 | xmlUTF8Strloc(const xmlChar *utf, const xmlChar *utfchar) {
|
---|
1077 | size_t i;
|
---|
1078 | int size;
|
---|
1079 | int ch;
|
---|
1080 |
|
---|
1081 | if (utf==NULL || utfchar==NULL) return -1;
|
---|
1082 | size = xmlUTF8Strsize(utfchar, 1);
|
---|
1083 | for(i=0; (ch=*utf) != 0; i++) {
|
---|
1084 | if (xmlStrncmp(utf, utfchar, size)==0)
|
---|
1085 | return(i > INT_MAX ? 0 : i);
|
---|
1086 | utf++;
|
---|
1087 | if ( ch & 0x80 ) {
|
---|
1088 | /* if not simple ascii, verify proper format */
|
---|
1089 | if ( (ch & 0xc0) != 0xc0 )
|
---|
1090 | return(-1);
|
---|
1091 | /* then skip over remaining bytes for this char */
|
---|
1092 | while ( (ch <<= 1) & 0x80 )
|
---|
1093 | if ( (*utf++ & 0xc0) != 0x80 )
|
---|
1094 | return(-1);
|
---|
1095 | }
|
---|
1096 | }
|
---|
1097 |
|
---|
1098 | return(-1);
|
---|
1099 | }
|
---|
1100 | /**
|
---|
1101 | * xmlUTF8Strsub:
|
---|
1102 | * @utf: a sequence of UTF-8 encoded bytes
|
---|
1103 | * @start: relative pos of first char
|
---|
1104 | * @len: total number to copy
|
---|
1105 | *
|
---|
1106 | * Create a substring from a given UTF-8 string
|
---|
1107 | * Note: positions are given in units of UTF-8 chars
|
---|
1108 | *
|
---|
1109 | * Returns a pointer to a newly created string or NULL if the
|
---|
1110 | * start index is out of bounds or a memory allocation failed.
|
---|
1111 | * If len is too large, the result is truncated.
|
---|
1112 | */
|
---|
1113 |
|
---|
1114 | xmlChar *
|
---|
1115 | xmlUTF8Strsub(const xmlChar *utf, int start, int len) {
|
---|
1116 | int i;
|
---|
1117 | int ch;
|
---|
1118 |
|
---|
1119 | if (utf == NULL) return(NULL);
|
---|
1120 | if (start < 0) return(NULL);
|
---|
1121 | if (len < 0) return(NULL);
|
---|
1122 |
|
---|
1123 | /*
|
---|
1124 | * Skip over any leading chars
|
---|
1125 | */
|
---|
1126 | for (i = 0; i < start; i++) {
|
---|
1127 | ch = *utf++;
|
---|
1128 | if (ch == 0)
|
---|
1129 | return(NULL);
|
---|
1130 | /* skip over remaining bytes for this char */
|
---|
1131 | if (ch & 0x80) {
|
---|
1132 | ch <<= 1;
|
---|
1133 | while (ch & 0x80) {
|
---|
1134 | if (*utf++ == 0)
|
---|
1135 | return(NULL);
|
---|
1136 | ch <<= 1;
|
---|
1137 | }
|
---|
1138 | }
|
---|
1139 | }
|
---|
1140 |
|
---|
1141 | return(xmlUTF8Strndup(utf, len));
|
---|
1142 | }
|
---|
1143 |
|
---|
1144 | /**
|
---|
1145 | * xmlEscapeFormatString:
|
---|
1146 | * @msg: a pointer to the string in which to escape '%' characters.
|
---|
1147 | * Must be a heap-allocated buffer created by libxml2 that may be
|
---|
1148 | * returned, or that may be freed and replaced.
|
---|
1149 | *
|
---|
1150 | * Replaces the string pointed to by 'msg' with an escaped string.
|
---|
1151 | * Returns the same string with all '%' characters escaped.
|
---|
1152 | */
|
---|
1153 | xmlChar *
|
---|
1154 | xmlEscapeFormatString(xmlChar **msg)
|
---|
1155 | {
|
---|
1156 | xmlChar *msgPtr = NULL;
|
---|
1157 | xmlChar *result = NULL;
|
---|
1158 | xmlChar *resultPtr = NULL;
|
---|
1159 | size_t count = 0;
|
---|
1160 | size_t msgLen = 0;
|
---|
1161 | size_t resultLen = 0;
|
---|
1162 |
|
---|
1163 | if (!msg || !*msg)
|
---|
1164 | return(NULL);
|
---|
1165 |
|
---|
1166 | for (msgPtr = *msg; *msgPtr != '\0'; ++msgPtr) {
|
---|
1167 | ++msgLen;
|
---|
1168 | if (*msgPtr == '%')
|
---|
1169 | ++count;
|
---|
1170 | }
|
---|
1171 |
|
---|
1172 | if (count == 0)
|
---|
1173 | return(*msg);
|
---|
1174 |
|
---|
1175 | if ((count > INT_MAX) || (msgLen > INT_MAX - count))
|
---|
1176 | return(NULL);
|
---|
1177 | resultLen = msgLen + count + 1;
|
---|
1178 | result = (xmlChar *) xmlMallocAtomic(resultLen);
|
---|
1179 | if (result == NULL) {
|
---|
1180 | /* Clear *msg to prevent format string vulnerabilities in
|
---|
1181 | out-of-memory situations. */
|
---|
1182 | xmlFree(*msg);
|
---|
1183 | *msg = NULL;
|
---|
1184 | return(NULL);
|
---|
1185 | }
|
---|
1186 |
|
---|
1187 | for (msgPtr = *msg, resultPtr = result; *msgPtr != '\0'; ++msgPtr, ++resultPtr) {
|
---|
1188 | *resultPtr = *msgPtr;
|
---|
1189 | if (*msgPtr == '%')
|
---|
1190 | *(++resultPtr) = '%';
|
---|
1191 | }
|
---|
1192 | result[resultLen - 1] = '\0';
|
---|
1193 |
|
---|
1194 | xmlFree(*msg);
|
---|
1195 | *msg = result;
|
---|
1196 |
|
---|
1197 | return *msg;
|
---|
1198 | }
|
---|
1199 |
|
---|