1 | /*
|
---|
2 | * xmlSeed.c: Generate the XML seed corpus for fuzzing.
|
---|
3 | *
|
---|
4 | * See Copyright for the status of this software.
|
---|
5 | */
|
---|
6 |
|
---|
7 | #include <stdio.h>
|
---|
8 | #include <string.h>
|
---|
9 | #include <glob.h>
|
---|
10 | #include <libgen.h>
|
---|
11 | #include <sys/stat.h>
|
---|
12 |
|
---|
13 | #ifdef _WIN32
|
---|
14 | #include <direct.h>
|
---|
15 | #else
|
---|
16 | #include <unistd.h>
|
---|
17 | #endif
|
---|
18 |
|
---|
19 | #include <libxml/parser.h>
|
---|
20 | #include <libxml/parserInternals.h>
|
---|
21 | #include <libxml/HTMLparser.h>
|
---|
22 | #include <libxml/xinclude.h>
|
---|
23 | #include <libxml/xmlschemas.h>
|
---|
24 | #include "fuzz.h"
|
---|
25 |
|
---|
26 | #define PATH_SIZE 500
|
---|
27 | #define SEED_BUF_SIZE 16384
|
---|
28 | #define EXPR_SIZE 4500
|
---|
29 |
|
---|
30 | typedef int
|
---|
31 | (*fileFunc)(const char *base, FILE *out);
|
---|
32 |
|
---|
33 | typedef int
|
---|
34 | (*mainFunc)(const char *arg);
|
---|
35 |
|
---|
36 | static struct {
|
---|
37 | FILE *out;
|
---|
38 | xmlHashTablePtr entities; /* Maps URLs to xmlFuzzEntityInfos */
|
---|
39 | xmlExternalEntityLoader oldLoader;
|
---|
40 | fileFunc processFile;
|
---|
41 | const char *fuzzer;
|
---|
42 | int counter;
|
---|
43 | char cwd[PATH_SIZE];
|
---|
44 | } globalData;
|
---|
45 |
|
---|
46 | /*
|
---|
47 | * A custom entity loader that writes all external DTDs or entities to a
|
---|
48 | * single file in the format expected by xmlFuzzEntityLoader.
|
---|
49 | */
|
---|
50 | static xmlParserInputPtr
|
---|
51 | fuzzEntityRecorder(const char *URL, const char *ID,
|
---|
52 | xmlParserCtxtPtr ctxt) {
|
---|
53 | xmlParserInputPtr in;
|
---|
54 | static const int chunkSize = 16384;
|
---|
55 | int len;
|
---|
56 |
|
---|
57 | in = xmlNoNetExternalEntityLoader(URL, ID, ctxt);
|
---|
58 | if (in == NULL)
|
---|
59 | return(NULL);
|
---|
60 |
|
---|
61 | if (globalData.entities == NULL) {
|
---|
62 | globalData.entities = xmlHashCreate(4);
|
---|
63 | } else if (xmlHashLookup(globalData.entities,
|
---|
64 | (const xmlChar *) URL) != NULL) {
|
---|
65 | return(in);
|
---|
66 | }
|
---|
67 |
|
---|
68 | do {
|
---|
69 | len = xmlParserInputBufferGrow(in->buf, chunkSize);
|
---|
70 | if (len < 0) {
|
---|
71 | fprintf(stderr, "Error reading %s\n", URL);
|
---|
72 | xmlFreeInputStream(in);
|
---|
73 | return(NULL);
|
---|
74 | }
|
---|
75 | } while (len > 0);
|
---|
76 |
|
---|
77 | xmlFuzzWriteString(globalData.out, URL);
|
---|
78 | xmlFuzzWriteString(globalData.out,
|
---|
79 | (char *) xmlBufContent(in->buf->buffer));
|
---|
80 |
|
---|
81 | xmlFreeInputStream(in);
|
---|
82 |
|
---|
83 | xmlHashAddEntry(globalData.entities, (const xmlChar *) URL, NULL);
|
---|
84 |
|
---|
85 | return(xmlNoNetExternalEntityLoader(URL, ID, ctxt));
|
---|
86 | }
|
---|
87 |
|
---|
88 | static void
|
---|
89 | fuzzRecorderInit(FILE *out) {
|
---|
90 | globalData.out = out;
|
---|
91 | globalData.entities = xmlHashCreate(8);
|
---|
92 | globalData.oldLoader = xmlGetExternalEntityLoader();
|
---|
93 | xmlSetExternalEntityLoader(fuzzEntityRecorder);
|
---|
94 | }
|
---|
95 |
|
---|
96 | static void
|
---|
97 | fuzzRecorderCleanup() {
|
---|
98 | xmlSetExternalEntityLoader(globalData.oldLoader);
|
---|
99 | xmlHashFree(globalData.entities, xmlHashDefaultDeallocator);
|
---|
100 | globalData.out = NULL;
|
---|
101 | globalData.entities = NULL;
|
---|
102 | globalData.oldLoader = NULL;
|
---|
103 | }
|
---|
104 |
|
---|
105 | #ifdef HAVE_XML_FUZZER
|
---|
106 | static int
|
---|
107 | processXml(const char *docFile, FILE *out) {
|
---|
108 | int opts = XML_PARSE_NOENT | XML_PARSE_DTDLOAD;
|
---|
109 | xmlDocPtr doc;
|
---|
110 |
|
---|
111 | fwrite(&opts, sizeof(opts), 1, out);
|
---|
112 |
|
---|
113 | fuzzRecorderInit(out);
|
---|
114 |
|
---|
115 | doc = xmlReadFile(docFile, NULL, opts);
|
---|
116 | xmlXIncludeProcessFlags(doc, opts);
|
---|
117 | xmlFreeDoc(doc);
|
---|
118 |
|
---|
119 | fuzzRecorderCleanup();
|
---|
120 |
|
---|
121 | return(0);
|
---|
122 | }
|
---|
123 | #endif
|
---|
124 |
|
---|
125 | #ifdef HAVE_HTML_FUZZER
|
---|
126 | static int
|
---|
127 | processHtml(const char *docFile, FILE *out) {
|
---|
128 | char buf[SEED_BUF_SIZE];
|
---|
129 | FILE *file;
|
---|
130 | size_t size;
|
---|
131 | int opts = 0;
|
---|
132 |
|
---|
133 | fwrite(&opts, sizeof(opts), 1, out);
|
---|
134 |
|
---|
135 | /* Copy file */
|
---|
136 | file = fopen(docFile, "rb");
|
---|
137 | if (file == NULL) {
|
---|
138 | fprintf(stderr, "couldn't open %s\n", docFile);
|
---|
139 | return(0);
|
---|
140 | }
|
---|
141 | do {
|
---|
142 | size = fread(buf, 1, SEED_BUF_SIZE, file);
|
---|
143 | if (size > 0)
|
---|
144 | fwrite(buf, 1, size, out);
|
---|
145 | } while (size == SEED_BUF_SIZE);
|
---|
146 | fclose(file);
|
---|
147 |
|
---|
148 | return(0);
|
---|
149 | }
|
---|
150 | #endif
|
---|
151 |
|
---|
152 | #ifdef HAVE_SCHEMA_FUZZER
|
---|
153 | static int
|
---|
154 | processSchema(const char *docFile, FILE *out) {
|
---|
155 | xmlSchemaPtr schema;
|
---|
156 | xmlSchemaParserCtxtPtr pctxt;
|
---|
157 |
|
---|
158 | fuzzRecorderInit(out);
|
---|
159 |
|
---|
160 | pctxt = xmlSchemaNewParserCtxt(docFile);
|
---|
161 | xmlSchemaSetParserErrors(pctxt, xmlFuzzErrorFunc, xmlFuzzErrorFunc, NULL);
|
---|
162 | schema = xmlSchemaParse(pctxt);
|
---|
163 | xmlSchemaFreeParserCtxt(pctxt);
|
---|
164 | xmlSchemaFree(schema);
|
---|
165 |
|
---|
166 | fuzzRecorderCleanup();
|
---|
167 |
|
---|
168 | return(0);
|
---|
169 | }
|
---|
170 | #endif
|
---|
171 |
|
---|
172 | static int
|
---|
173 | processPattern(const char *pattern) {
|
---|
174 | glob_t globbuf;
|
---|
175 | int ret = 0;
|
---|
176 | int res, i;
|
---|
177 |
|
---|
178 | res = glob(pattern, 0, NULL, &globbuf);
|
---|
179 | if (res == GLOB_NOMATCH)
|
---|
180 | return(0);
|
---|
181 | if (res != 0) {
|
---|
182 | fprintf(stderr, "couldn't match pattern %s\n", pattern);
|
---|
183 | return(-1);
|
---|
184 | }
|
---|
185 |
|
---|
186 | for (i = 0; i < globbuf.gl_pathc; i++) {
|
---|
187 | struct stat statbuf;
|
---|
188 | char outPath[PATH_SIZE];
|
---|
189 | char *dirBuf = NULL;
|
---|
190 | char *baseBuf = NULL;
|
---|
191 | const char *path, *dir, *base;
|
---|
192 | FILE *out = NULL;
|
---|
193 | int dirChanged = 0;
|
---|
194 | size_t size;
|
---|
195 |
|
---|
196 | path = globbuf.gl_pathv[i];
|
---|
197 |
|
---|
198 | if ((stat(path, &statbuf) != 0) || (!S_ISREG(statbuf.st_mode)))
|
---|
199 | continue;
|
---|
200 |
|
---|
201 | dirBuf = (char *) xmlCharStrdup(path);
|
---|
202 | baseBuf = (char *) xmlCharStrdup(path);
|
---|
203 | if ((dirBuf == NULL) || (baseBuf == NULL)) {
|
---|
204 | fprintf(stderr, "memory allocation failed\n");
|
---|
205 | ret = -1;
|
---|
206 | goto error;
|
---|
207 | }
|
---|
208 | dir = dirname(dirBuf);
|
---|
209 | base = basename(baseBuf);
|
---|
210 |
|
---|
211 | size = snprintf(outPath, sizeof(outPath), "seed/%s/%s",
|
---|
212 | globalData.fuzzer, base);
|
---|
213 | if (size >= PATH_SIZE) {
|
---|
214 | fprintf(stderr, "creating path failed\n");
|
---|
215 | ret = -1;
|
---|
216 | goto error;
|
---|
217 | }
|
---|
218 | out = fopen(outPath, "wb");
|
---|
219 | if (out == NULL) {
|
---|
220 | fprintf(stderr, "couldn't open %s for writing\n", outPath);
|
---|
221 | ret = -1;
|
---|
222 | goto error;
|
---|
223 | }
|
---|
224 | if (chdir(dir) != 0) {
|
---|
225 | fprintf(stderr, "couldn't chdir to %s\n", dir);
|
---|
226 | ret = -1;
|
---|
227 | goto error;
|
---|
228 | }
|
---|
229 | dirChanged = 1;
|
---|
230 | if (globalData.processFile(base, out) != 0)
|
---|
231 | ret = -1;
|
---|
232 |
|
---|
233 | error:
|
---|
234 | if (out != NULL)
|
---|
235 | fclose(out);
|
---|
236 | xmlFree(dirBuf);
|
---|
237 | xmlFree(baseBuf);
|
---|
238 | if ((dirChanged) && (chdir(globalData.cwd) != 0)) {
|
---|
239 | fprintf(stderr, "couldn't chdir to %s\n", globalData.cwd);
|
---|
240 | ret = -1;
|
---|
241 | break;
|
---|
242 | }
|
---|
243 | }
|
---|
244 |
|
---|
245 | globfree(&globbuf);
|
---|
246 | return(ret);
|
---|
247 | }
|
---|
248 |
|
---|
249 | #ifdef HAVE_XPATH_FUZZER
|
---|
250 | static int
|
---|
251 | processXPath(const char *testDir, const char *prefix, const char *name,
|
---|
252 | const char *data, const char *subdir, int xptr) {
|
---|
253 | char pattern[PATH_SIZE];
|
---|
254 | glob_t globbuf;
|
---|
255 | size_t i, size;
|
---|
256 | int ret = 0, res;
|
---|
257 |
|
---|
258 | size = snprintf(pattern, sizeof(pattern), "%s/%s/%s*",
|
---|
259 | testDir, subdir, prefix);
|
---|
260 | if (size >= PATH_SIZE)
|
---|
261 | return(-1);
|
---|
262 | res = glob(pattern, 0, NULL, &globbuf);
|
---|
263 | if (res == GLOB_NOMATCH)
|
---|
264 | return(0);
|
---|
265 | if (res != 0) {
|
---|
266 | fprintf(stderr, "couldn't match pattern %s\n", pattern);
|
---|
267 | return(-1);
|
---|
268 | }
|
---|
269 |
|
---|
270 | for (i = 0; i < globbuf.gl_pathc; i++) {
|
---|
271 | char *path = globbuf.gl_pathv[i];
|
---|
272 | struct stat statbuf;
|
---|
273 | FILE *in;
|
---|
274 | char expr[EXPR_SIZE];
|
---|
275 |
|
---|
276 | if ((stat(path, &statbuf) != 0) || (!S_ISREG(statbuf.st_mode)))
|
---|
277 | continue;
|
---|
278 |
|
---|
279 | in = fopen(path, "rb");
|
---|
280 | if (in == NULL) {
|
---|
281 | ret = -1;
|
---|
282 | continue;
|
---|
283 | }
|
---|
284 |
|
---|
285 | while (fgets(expr, EXPR_SIZE, in) > 0) {
|
---|
286 | char outPath[PATH_SIZE];
|
---|
287 | FILE *out;
|
---|
288 | int j;
|
---|
289 |
|
---|
290 | for (j = 0; expr[j] != 0; j++)
|
---|
291 | if (expr[j] == '\r' || expr[j] == '\n')
|
---|
292 | break;
|
---|
293 | expr[j] = 0;
|
---|
294 |
|
---|
295 | size = snprintf(outPath, sizeof(outPath), "seed/xpath/%s-%d",
|
---|
296 | name, globalData.counter);
|
---|
297 | if (size >= PATH_SIZE) {
|
---|
298 | ret = -1;
|
---|
299 | continue;
|
---|
300 | }
|
---|
301 | out = fopen(outPath, "wb");
|
---|
302 | if (out == NULL) {
|
---|
303 | ret = -1;
|
---|
304 | continue;
|
---|
305 | }
|
---|
306 |
|
---|
307 | if (xptr) {
|
---|
308 | xmlFuzzWriteString(out, expr);
|
---|
309 | } else {
|
---|
310 | char xptrExpr[EXPR_SIZE+100];
|
---|
311 |
|
---|
312 | /* Wrap XPath expressions as XPointer */
|
---|
313 | snprintf(xptrExpr, sizeof(xptrExpr), "xpointer(%s)", expr);
|
---|
314 | xmlFuzzWriteString(out, xptrExpr);
|
---|
315 | }
|
---|
316 |
|
---|
317 | xmlFuzzWriteString(out, data);
|
---|
318 |
|
---|
319 | fclose(out);
|
---|
320 | globalData.counter++;
|
---|
321 | }
|
---|
322 |
|
---|
323 | fclose(in);
|
---|
324 | }
|
---|
325 |
|
---|
326 | globfree(&globbuf);
|
---|
327 |
|
---|
328 | return(ret);
|
---|
329 | }
|
---|
330 |
|
---|
331 | int
|
---|
332 | processXPathDir(const char *testDir) {
|
---|
333 | char pattern[PATH_SIZE];
|
---|
334 | glob_t globbuf;
|
---|
335 | size_t i, size;
|
---|
336 | int ret = 0;
|
---|
337 |
|
---|
338 | globalData.counter = 1;
|
---|
339 | if (processXPath(testDir, "", "expr", "<d></d>", "expr", 0) != 0)
|
---|
340 | ret = -1;
|
---|
341 |
|
---|
342 | size = snprintf(pattern, sizeof(pattern), "%s/docs/*", testDir);
|
---|
343 | if (size >= PATH_SIZE)
|
---|
344 | return(1);
|
---|
345 | if (glob(pattern, 0, NULL, &globbuf) != 0)
|
---|
346 | return(1);
|
---|
347 |
|
---|
348 | for (i = 0; i < globbuf.gl_pathc; i++) {
|
---|
349 | char *path = globbuf.gl_pathv[i];
|
---|
350 | char *data;
|
---|
351 | const char *docFile;
|
---|
352 |
|
---|
353 | data = xmlSlurpFile(path, NULL);
|
---|
354 | if (data == NULL) {
|
---|
355 | ret = -1;
|
---|
356 | continue;
|
---|
357 | }
|
---|
358 | docFile = basename(path);
|
---|
359 |
|
---|
360 | globalData.counter = 1;
|
---|
361 | if (processXPath(testDir, docFile, docFile, data, "tests", 0) != 0)
|
---|
362 | ret = -1;
|
---|
363 | if (processXPath(testDir, docFile, docFile, data, "xptr", 1) != 0)
|
---|
364 | ret = -1;
|
---|
365 |
|
---|
366 | xmlFree(data);
|
---|
367 | }
|
---|
368 |
|
---|
369 | globfree(&globbuf);
|
---|
370 |
|
---|
371 | return(ret);
|
---|
372 | }
|
---|
373 | #endif
|
---|
374 |
|
---|
375 | int
|
---|
376 | main(int argc, const char **argv) {
|
---|
377 | mainFunc processArg = NULL;
|
---|
378 | const char *fuzzer;
|
---|
379 | int ret = 0;
|
---|
380 | int xpath = 0;
|
---|
381 | int i;
|
---|
382 |
|
---|
383 | if (argc < 3) {
|
---|
384 | fprintf(stderr, "usage: seed [FUZZER] [PATTERN...]\n");
|
---|
385 | return(1);
|
---|
386 | }
|
---|
387 |
|
---|
388 | xmlSetGenericErrorFunc(NULL, xmlFuzzErrorFunc);
|
---|
389 |
|
---|
390 | fuzzer = argv[1];
|
---|
391 | if (strcmp(fuzzer, "html") == 0) {
|
---|
392 | #ifdef HAVE_HTML_FUZZER
|
---|
393 | processArg = processPattern;
|
---|
394 | globalData.processFile = processHtml;
|
---|
395 | #endif
|
---|
396 | } else if (strcmp(fuzzer, "schema") == 0) {
|
---|
397 | #ifdef HAVE_SCHEMA_FUZZER
|
---|
398 | processArg = processPattern;
|
---|
399 | globalData.processFile = processSchema;
|
---|
400 | #endif
|
---|
401 | } else if (strcmp(fuzzer, "xml") == 0) {
|
---|
402 | #ifdef HAVE_XML_FUZZER
|
---|
403 | processArg = processPattern;
|
---|
404 | globalData.processFile = processXml;
|
---|
405 | #endif
|
---|
406 | } else if (strcmp(fuzzer, "xpath") == 0) {
|
---|
407 | #ifdef HAVE_XPATH_FUZZER
|
---|
408 | processArg = processXPathDir;
|
---|
409 | #endif
|
---|
410 | } else {
|
---|
411 | fprintf(stderr, "unknown fuzzer %s\n", fuzzer);
|
---|
412 | return(1);
|
---|
413 | }
|
---|
414 | globalData.fuzzer = fuzzer;
|
---|
415 |
|
---|
416 | if (getcwd(globalData.cwd, PATH_SIZE) == NULL) {
|
---|
417 | fprintf(stderr, "couldn't get current directory\n");
|
---|
418 | return(1);
|
---|
419 | }
|
---|
420 |
|
---|
421 | if (processArg != NULL)
|
---|
422 | for (i = 2; i < argc; i++)
|
---|
423 | processArg(argv[i]);
|
---|
424 |
|
---|
425 | return(ret);
|
---|
426 | }
|
---|
427 |
|
---|