VirtualBox

source: vbox/trunk/src/libs/libxml2-2.9.14/fuzz/genSeed.c@ 98989

Last change on this file since 98989 was 95312, checked in by vboxsync, 3 years ago

libs/{curl,libxml2}: OSE export fixes, bugref:8515

  • Property svn:eol-style set to native
File size: 10.3 KB
Line 
1/*
2 * xmlSeed.c: Generate the XML seed corpus for fuzzing.
3 *
4 * See Copyright for the status of this software.
5 */
6
7#include <stdio.h>
8#include <string.h>
9#include <glob.h>
10#include <libgen.h>
11#include <sys/stat.h>
12
13#ifdef _WIN32
14#include <direct.h>
15#else
16#include <unistd.h>
17#endif
18
19#include <libxml/parser.h>
20#include <libxml/parserInternals.h>
21#include <libxml/HTMLparser.h>
22#include <libxml/xinclude.h>
23#include <libxml/xmlschemas.h>
24#include "fuzz.h"
25
26#define PATH_SIZE 500
27#define SEED_BUF_SIZE 16384
28#define EXPR_SIZE 4500
29
30typedef int
31(*fileFunc)(const char *base, FILE *out);
32
33typedef int
34(*mainFunc)(const char *arg);
35
36static struct {
37 FILE *out;
38 xmlHashTablePtr entities; /* Maps URLs to xmlFuzzEntityInfos */
39 xmlExternalEntityLoader oldLoader;
40 fileFunc processFile;
41 const char *fuzzer;
42 int counter;
43 char cwd[PATH_SIZE];
44} globalData;
45
46/*
47 * A custom entity loader that writes all external DTDs or entities to a
48 * single file in the format expected by xmlFuzzEntityLoader.
49 */
50static xmlParserInputPtr
51fuzzEntityRecorder(const char *URL, const char *ID,
52 xmlParserCtxtPtr ctxt) {
53 xmlParserInputPtr in;
54 static const int chunkSize = 16384;
55 int len;
56
57 in = xmlNoNetExternalEntityLoader(URL, ID, ctxt);
58 if (in == NULL)
59 return(NULL);
60
61 if (globalData.entities == NULL) {
62 globalData.entities = xmlHashCreate(4);
63 } else if (xmlHashLookup(globalData.entities,
64 (const xmlChar *) URL) != NULL) {
65 return(in);
66 }
67
68 do {
69 len = xmlParserInputBufferGrow(in->buf, chunkSize);
70 if (len < 0) {
71 fprintf(stderr, "Error reading %s\n", URL);
72 xmlFreeInputStream(in);
73 return(NULL);
74 }
75 } while (len > 0);
76
77 xmlFuzzWriteString(globalData.out, URL);
78 xmlFuzzWriteString(globalData.out,
79 (char *) xmlBufContent(in->buf->buffer));
80
81 xmlFreeInputStream(in);
82
83 xmlHashAddEntry(globalData.entities, (const xmlChar *) URL, NULL);
84
85 return(xmlNoNetExternalEntityLoader(URL, ID, ctxt));
86}
87
88static void
89fuzzRecorderInit(FILE *out) {
90 globalData.out = out;
91 globalData.entities = xmlHashCreate(8);
92 globalData.oldLoader = xmlGetExternalEntityLoader();
93 xmlSetExternalEntityLoader(fuzzEntityRecorder);
94}
95
96static void
97fuzzRecorderCleanup() {
98 xmlSetExternalEntityLoader(globalData.oldLoader);
99 xmlHashFree(globalData.entities, xmlHashDefaultDeallocator);
100 globalData.out = NULL;
101 globalData.entities = NULL;
102 globalData.oldLoader = NULL;
103}
104
105#ifdef HAVE_XML_FUZZER
106static int
107processXml(const char *docFile, FILE *out) {
108 int opts = XML_PARSE_NOENT | XML_PARSE_DTDLOAD;
109 xmlDocPtr doc;
110
111 fwrite(&opts, sizeof(opts), 1, out);
112
113 fuzzRecorderInit(out);
114
115 doc = xmlReadFile(docFile, NULL, opts);
116 xmlXIncludeProcessFlags(doc, opts);
117 xmlFreeDoc(doc);
118
119 fuzzRecorderCleanup();
120
121 return(0);
122}
123#endif
124
125#ifdef HAVE_HTML_FUZZER
126static int
127processHtml(const char *docFile, FILE *out) {
128 char buf[SEED_BUF_SIZE];
129 FILE *file;
130 size_t size;
131 int opts = 0;
132
133 fwrite(&opts, sizeof(opts), 1, out);
134
135 /* Copy file */
136 file = fopen(docFile, "rb");
137 if (file == NULL) {
138 fprintf(stderr, "couldn't open %s\n", docFile);
139 return(0);
140 }
141 do {
142 size = fread(buf, 1, SEED_BUF_SIZE, file);
143 if (size > 0)
144 fwrite(buf, 1, size, out);
145 } while (size == SEED_BUF_SIZE);
146 fclose(file);
147
148 return(0);
149}
150#endif
151
152#ifdef HAVE_SCHEMA_FUZZER
153static int
154processSchema(const char *docFile, FILE *out) {
155 xmlSchemaPtr schema;
156 xmlSchemaParserCtxtPtr pctxt;
157
158 fuzzRecorderInit(out);
159
160 pctxt = xmlSchemaNewParserCtxt(docFile);
161 xmlSchemaSetParserErrors(pctxt, xmlFuzzErrorFunc, xmlFuzzErrorFunc, NULL);
162 schema = xmlSchemaParse(pctxt);
163 xmlSchemaFreeParserCtxt(pctxt);
164 xmlSchemaFree(schema);
165
166 fuzzRecorderCleanup();
167
168 return(0);
169}
170#endif
171
172static int
173processPattern(const char *pattern) {
174 glob_t globbuf;
175 int ret = 0;
176 int res, i;
177
178 res = glob(pattern, 0, NULL, &globbuf);
179 if (res == GLOB_NOMATCH)
180 return(0);
181 if (res != 0) {
182 fprintf(stderr, "couldn't match pattern %s\n", pattern);
183 return(-1);
184 }
185
186 for (i = 0; i < globbuf.gl_pathc; i++) {
187 struct stat statbuf;
188 char outPath[PATH_SIZE];
189 char *dirBuf = NULL;
190 char *baseBuf = NULL;
191 const char *path, *dir, *base;
192 FILE *out = NULL;
193 int dirChanged = 0;
194 size_t size;
195
196 path = globbuf.gl_pathv[i];
197
198 if ((stat(path, &statbuf) != 0) || (!S_ISREG(statbuf.st_mode)))
199 continue;
200
201 dirBuf = (char *) xmlCharStrdup(path);
202 baseBuf = (char *) xmlCharStrdup(path);
203 if ((dirBuf == NULL) || (baseBuf == NULL)) {
204 fprintf(stderr, "memory allocation failed\n");
205 ret = -1;
206 goto error;
207 }
208 dir = dirname(dirBuf);
209 base = basename(baseBuf);
210
211 size = snprintf(outPath, sizeof(outPath), "seed/%s/%s",
212 globalData.fuzzer, base);
213 if (size >= PATH_SIZE) {
214 fprintf(stderr, "creating path failed\n");
215 ret = -1;
216 goto error;
217 }
218 out = fopen(outPath, "wb");
219 if (out == NULL) {
220 fprintf(stderr, "couldn't open %s for writing\n", outPath);
221 ret = -1;
222 goto error;
223 }
224 if (chdir(dir) != 0) {
225 fprintf(stderr, "couldn't chdir to %s\n", dir);
226 ret = -1;
227 goto error;
228 }
229 dirChanged = 1;
230 if (globalData.processFile(base, out) != 0)
231 ret = -1;
232
233error:
234 if (out != NULL)
235 fclose(out);
236 xmlFree(dirBuf);
237 xmlFree(baseBuf);
238 if ((dirChanged) && (chdir(globalData.cwd) != 0)) {
239 fprintf(stderr, "couldn't chdir to %s\n", globalData.cwd);
240 ret = -1;
241 break;
242 }
243 }
244
245 globfree(&globbuf);
246 return(ret);
247}
248
249#ifdef HAVE_XPATH_FUZZER
250static int
251processXPath(const char *testDir, const char *prefix, const char *name,
252 const char *data, const char *subdir, int xptr) {
253 char pattern[PATH_SIZE];
254 glob_t globbuf;
255 size_t i, size;
256 int ret = 0, res;
257
258 size = snprintf(pattern, sizeof(pattern), "%s/%s/%s*",
259 testDir, subdir, prefix);
260 if (size >= PATH_SIZE)
261 return(-1);
262 res = glob(pattern, 0, NULL, &globbuf);
263 if (res == GLOB_NOMATCH)
264 return(0);
265 if (res != 0) {
266 fprintf(stderr, "couldn't match pattern %s\n", pattern);
267 return(-1);
268 }
269
270 for (i = 0; i < globbuf.gl_pathc; i++) {
271 char *path = globbuf.gl_pathv[i];
272 struct stat statbuf;
273 FILE *in;
274 char expr[EXPR_SIZE];
275
276 if ((stat(path, &statbuf) != 0) || (!S_ISREG(statbuf.st_mode)))
277 continue;
278
279 in = fopen(path, "rb");
280 if (in == NULL) {
281 ret = -1;
282 continue;
283 }
284
285 while (fgets(expr, EXPR_SIZE, in) > 0) {
286 char outPath[PATH_SIZE];
287 FILE *out;
288 int j;
289
290 for (j = 0; expr[j] != 0; j++)
291 if (expr[j] == '\r' || expr[j] == '\n')
292 break;
293 expr[j] = 0;
294
295 size = snprintf(outPath, sizeof(outPath), "seed/xpath/%s-%d",
296 name, globalData.counter);
297 if (size >= PATH_SIZE) {
298 ret = -1;
299 continue;
300 }
301 out = fopen(outPath, "wb");
302 if (out == NULL) {
303 ret = -1;
304 continue;
305 }
306
307 if (xptr) {
308 xmlFuzzWriteString(out, expr);
309 } else {
310 char xptrExpr[EXPR_SIZE+100];
311
312 /* Wrap XPath expressions as XPointer */
313 snprintf(xptrExpr, sizeof(xptrExpr), "xpointer(%s)", expr);
314 xmlFuzzWriteString(out, xptrExpr);
315 }
316
317 xmlFuzzWriteString(out, data);
318
319 fclose(out);
320 globalData.counter++;
321 }
322
323 fclose(in);
324 }
325
326 globfree(&globbuf);
327
328 return(ret);
329}
330
331int
332processXPathDir(const char *testDir) {
333 char pattern[PATH_SIZE];
334 glob_t globbuf;
335 size_t i, size;
336 int ret = 0;
337
338 globalData.counter = 1;
339 if (processXPath(testDir, "", "expr", "<d></d>", "expr", 0) != 0)
340 ret = -1;
341
342 size = snprintf(pattern, sizeof(pattern), "%s/docs/*", testDir);
343 if (size >= PATH_SIZE)
344 return(1);
345 if (glob(pattern, 0, NULL, &globbuf) != 0)
346 return(1);
347
348 for (i = 0; i < globbuf.gl_pathc; i++) {
349 char *path = globbuf.gl_pathv[i];
350 char *data;
351 const char *docFile;
352
353 data = xmlSlurpFile(path, NULL);
354 if (data == NULL) {
355 ret = -1;
356 continue;
357 }
358 docFile = basename(path);
359
360 globalData.counter = 1;
361 if (processXPath(testDir, docFile, docFile, data, "tests", 0) != 0)
362 ret = -1;
363 if (processXPath(testDir, docFile, docFile, data, "xptr", 1) != 0)
364 ret = -1;
365
366 xmlFree(data);
367 }
368
369 globfree(&globbuf);
370
371 return(ret);
372}
373#endif
374
375int
376main(int argc, const char **argv) {
377 mainFunc processArg = NULL;
378 const char *fuzzer;
379 int ret = 0;
380 int xpath = 0;
381 int i;
382
383 if (argc < 3) {
384 fprintf(stderr, "usage: seed [FUZZER] [PATTERN...]\n");
385 return(1);
386 }
387
388 xmlSetGenericErrorFunc(NULL, xmlFuzzErrorFunc);
389
390 fuzzer = argv[1];
391 if (strcmp(fuzzer, "html") == 0) {
392#ifdef HAVE_HTML_FUZZER
393 processArg = processPattern;
394 globalData.processFile = processHtml;
395#endif
396 } else if (strcmp(fuzzer, "schema") == 0) {
397#ifdef HAVE_SCHEMA_FUZZER
398 processArg = processPattern;
399 globalData.processFile = processSchema;
400#endif
401 } else if (strcmp(fuzzer, "xml") == 0) {
402#ifdef HAVE_XML_FUZZER
403 processArg = processPattern;
404 globalData.processFile = processXml;
405#endif
406 } else if (strcmp(fuzzer, "xpath") == 0) {
407#ifdef HAVE_XPATH_FUZZER
408 processArg = processXPathDir;
409#endif
410 } else {
411 fprintf(stderr, "unknown fuzzer %s\n", fuzzer);
412 return(1);
413 }
414 globalData.fuzzer = fuzzer;
415
416 if (getcwd(globalData.cwd, PATH_SIZE) == NULL) {
417 fprintf(stderr, "couldn't get current directory\n");
418 return(1);
419 }
420
421 if (processArg != NULL)
422 for (i = 2; i < argc; i++)
423 processArg(argv[i]);
424
425 return(ret);
426}
427
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette