VirtualBox

source: vbox/trunk/src/libs/libxml2-2.9.4/doc/apibuild.py@ 82520

Last change on this file since 82520 was 65950, checked in by vboxsync, 8 years ago

libxml 2.9.4: fix export

  • Property svn:eol-style set to LF
  • Property svn:executable set to *
File size: 79.8 KB
Line 
1#!/usr/bin/python -u
2#
3# This is the API builder, it parses the C sources and build the
4# API formal description in XML.
5#
6# See Copyright for the status of this software.
7#
8# [email protected]
9#
10import os, sys
11import string
12import glob
13
14debug=0
15#debugsym='ignorableWhitespaceSAXFunc'
16debugsym=None
17
18#
19# C parser analysis code
20#
21ignored_files = {
22 "trio": "too many non standard macros",
23 "trio.c": "too many non standard macros",
24 "trionan.c": "too many non standard macros",
25 "triostr.c": "too many non standard macros",
26 "acconfig.h": "generated portability layer",
27 "config.h": "generated portability layer",
28 "libxml.h": "internal only",
29 "testOOM.c": "out of memory tester",
30 "testOOMlib.h": "out of memory tester",
31 "testOOMlib.c": "out of memory tester",
32 "rngparser.c": "not yet integrated",
33 "rngparser.h": "not yet integrated",
34 "elfgcchack.h": "not a normal header",
35 "testHTML.c": "test tool",
36 "testReader.c": "test tool",
37 "testSchemas.c": "test tool",
38 "testXPath.c": "test tool",
39 "testAutomata.c": "test tool",
40 "testModule.c": "test tool",
41 "testRegexp.c": "test tool",
42 "testThreads.c": "test tool",
43 "testC14N.c": "test tool",
44 "testRelax.c": "test tool",
45 "testThreadsWin32.c": "test tool",
46 "testSAX.c": "test tool",
47 "testURI.c": "test tool",
48 "testapi.c": "generated regression tests",
49 "runtest.c": "regression tests program",
50 "runsuite.c": "regression tests program",
51 "tst.c": "not part of the library",
52 "test.c": "not part of the library",
53 "testdso.c": "test for dynamid shared libraries",
54 "testrecurse.c": "test for entities recursions",
55 "xzlib.h": "Internal API only 2.8.0",
56 "buf.h": "Internal API only 2.9.0",
57 "enc.h": "Internal API only 2.9.0",
58 "/save.h": "Internal API only 2.9.0",
59 "timsort.h": "Internal header only for xpath.c 2.9.0",
60}
61
62ignored_words = {
63 "WINAPI": (0, "Windows keyword"),
64 "LIBXML_DLL_IMPORT": (0, "Special macro to flag external keywords"),
65 "XMLPUBVAR": (0, "Special macro for extern vars for win32"),
66 "XSLTPUBVAR": (0, "Special macro for extern vars for win32"),
67 "EXSLTPUBVAR": (0, "Special macro for extern vars for win32"),
68 "XMLPUBFUN": (0, "Special macro for extern funcs for win32"),
69 "XSLTPUBFUN": (0, "Special macro for extern funcs for win32"),
70 "EXSLTPUBFUN": (0, "Special macro for extern funcs for win32"),
71 "XMLCALL": (0, "Special macro for win32 calls"),
72 "XSLTCALL": (0, "Special macro for win32 calls"),
73 "XMLCDECL": (0, "Special macro for win32 calls"),
74 "EXSLTCALL": (0, "Special macro for win32 calls"),
75 "__declspec": (3, "Windows keyword"),
76 "__stdcall": (0, "Windows keyword"),
77 "ATTRIBUTE_UNUSED": (0, "macro keyword"),
78 "LIBEXSLT_PUBLIC": (0, "macro keyword"),
79 "X_IN_Y": (5, "macro function builder"),
80 "ATTRIBUTE_ALLOC_SIZE": (3, "macro for gcc checking extension"),
81 "ATTRIBUTE_PRINTF": (5, "macro for gcc printf args checking extension"),
82 "LIBXML_ATTR_FORMAT": (5, "macro for gcc printf args checking extension"),
83 "LIBXML_ATTR_ALLOC_SIZE": (3, "macro for gcc checking extension"),
84 "__XML_EXTERNC": (0, "Special macro added for os400"),
85}
86
87def escape(raw):
88 raw = raw.replace('&', '&')
89 raw = raw.replace('<', '&lt;')
90 raw = raw.replace('>', '&gt;')
91 raw = raw.replace("'", '&apos;')
92 raw = raw.replace('"', '&quot;')
93 return raw
94
95def uniq(items):
96 d = {}
97 for item in items:
98 d[item]=1
99 return list(d.keys())
100
101class identifier:
102 def __init__(self, name, header=None, module=None, type=None, lineno = 0,
103 info=None, extra=None, conditionals = None):
104 self.name = name
105 self.header = header
106 self.module = module
107 self.type = type
108 self.info = info
109 self.extra = extra
110 self.lineno = lineno
111 self.static = 0
112 if conditionals == None or len(conditionals) == 0:
113 self.conditionals = None
114 else:
115 self.conditionals = conditionals[:]
116 if self.name == debugsym:
117 print("=> define %s : %s" % (debugsym, (module, type, info,
118 extra, conditionals)))
119
120 def __repr__(self):
121 r = "%s %s:" % (self.type, self.name)
122 if self.static:
123 r = r + " static"
124 if self.module != None:
125 r = r + " from %s" % (self.module)
126 if self.info != None:
127 r = r + " " + repr(self.info)
128 if self.extra != None:
129 r = r + " " + repr(self.extra)
130 if self.conditionals != None:
131 r = r + " " + repr(self.conditionals)
132 return r
133
134
135 def set_header(self, header):
136 self.header = header
137 def set_module(self, module):
138 self.module = module
139 def set_type(self, type):
140 self.type = type
141 def set_info(self, info):
142 self.info = info
143 def set_extra(self, extra):
144 self.extra = extra
145 def set_lineno(self, lineno):
146 self.lineno = lineno
147 def set_static(self, static):
148 self.static = static
149 def set_conditionals(self, conditionals):
150 if conditionals == None or len(conditionals) == 0:
151 self.conditionals = None
152 else:
153 self.conditionals = conditionals[:]
154
155 def get_name(self):
156 return self.name
157 def get_header(self):
158 return self.module
159 def get_module(self):
160 return self.module
161 def get_type(self):
162 return self.type
163 def get_info(self):
164 return self.info
165 def get_lineno(self):
166 return self.lineno
167 def get_extra(self):
168 return self.extra
169 def get_static(self):
170 return self.static
171 def get_conditionals(self):
172 return self.conditionals
173
174 def update(self, header, module, type = None, info = None, extra=None,
175 conditionals=None):
176 if self.name == debugsym:
177 print("=> update %s : %s" % (debugsym, (module, type, info,
178 extra, conditionals)))
179 if header != None and self.header == None:
180 self.set_header(module)
181 if module != None and (self.module == None or self.header == self.module):
182 self.set_module(module)
183 if type != None and self.type == None:
184 self.set_type(type)
185 if info != None:
186 self.set_info(info)
187 if extra != None:
188 self.set_extra(extra)
189 if conditionals != None:
190 self.set_conditionals(conditionals)
191
192class index:
193 def __init__(self, name = "noname"):
194 self.name = name
195 self.identifiers = {}
196 self.functions = {}
197 self.variables = {}
198 self.includes = {}
199 self.structs = {}
200 self.enums = {}
201 self.typedefs = {}
202 self.macros = {}
203 self.references = {}
204 self.info = {}
205
206 def add_ref(self, name, header, module, static, type, lineno, info=None, extra=None, conditionals = None):
207 if name[0:2] == '__':
208 return None
209 d = None
210 try:
211 d = self.identifiers[name]
212 d.update(header, module, type, lineno, info, extra, conditionals)
213 except:
214 d = identifier(name, header, module, type, lineno, info, extra, conditionals)
215 self.identifiers[name] = d
216
217 if d != None and static == 1:
218 d.set_static(1)
219
220 if d != None and name != None and type != None:
221 self.references[name] = d
222
223 if name == debugsym:
224 print("New ref: %s" % (d))
225
226 return d
227
228 def add(self, name, header, module, static, type, lineno, info=None, extra=None, conditionals = None):
229 if name[0:2] == '__':
230 return None
231 d = None
232 try:
233 d = self.identifiers[name]
234 d.update(header, module, type, lineno, info, extra, conditionals)
235 except:
236 d = identifier(name, header, module, type, lineno, info, extra, conditionals)
237 self.identifiers[name] = d
238
239 if d != None and static == 1:
240 d.set_static(1)
241
242 if d != None and name != None and type != None:
243 if type == "function":
244 self.functions[name] = d
245 elif type == "functype":
246 self.functions[name] = d
247 elif type == "variable":
248 self.variables[name] = d
249 elif type == "include":
250 self.includes[name] = d
251 elif type == "struct":
252 self.structs[name] = d
253 elif type == "enum":
254 self.enums[name] = d
255 elif type == "typedef":
256 self.typedefs[name] = d
257 elif type == "macro":
258 self.macros[name] = d
259 else:
260 print("Unable to register type ", type)
261
262 if name == debugsym:
263 print("New symbol: %s" % (d))
264
265 return d
266
267 def merge(self, idx):
268 for id in list(idx.functions.keys()):
269 #
270 # macro might be used to override functions or variables
271 # definitions
272 #
273 if id in self.macros:
274 del self.macros[id]
275 if id in self.functions:
276 print("function %s from %s redeclared in %s" % (
277 id, self.functions[id].header, idx.functions[id].header))
278 else:
279 self.functions[id] = idx.functions[id]
280 self.identifiers[id] = idx.functions[id]
281 for id in list(idx.variables.keys()):
282 #
283 # macro might be used to override functions or variables
284 # definitions
285 #
286 if id in self.macros:
287 del self.macros[id]
288 if id in self.variables:
289 print("variable %s from %s redeclared in %s" % (
290 id, self.variables[id].header, idx.variables[id].header))
291 else:
292 self.variables[id] = idx.variables[id]
293 self.identifiers[id] = idx.variables[id]
294 for id in list(idx.structs.keys()):
295 if id in self.structs:
296 print("struct %s from %s redeclared in %s" % (
297 id, self.structs[id].header, idx.structs[id].header))
298 else:
299 self.structs[id] = idx.structs[id]
300 self.identifiers[id] = idx.structs[id]
301 for id in list(idx.typedefs.keys()):
302 if id in self.typedefs:
303 print("typedef %s from %s redeclared in %s" % (
304 id, self.typedefs[id].header, idx.typedefs[id].header))
305 else:
306 self.typedefs[id] = idx.typedefs[id]
307 self.identifiers[id] = idx.typedefs[id]
308 for id in list(idx.macros.keys()):
309 #
310 # macro might be used to override functions or variables
311 # definitions
312 #
313 if id in self.variables:
314 continue
315 if id in self.functions:
316 continue
317 if id in self.enums:
318 continue
319 if id in self.macros:
320 print("macro %s from %s redeclared in %s" % (
321 id, self.macros[id].header, idx.macros[id].header))
322 else:
323 self.macros[id] = idx.macros[id]
324 self.identifiers[id] = idx.macros[id]
325 for id in list(idx.enums.keys()):
326 if id in self.enums:
327 print("enum %s from %s redeclared in %s" % (
328 id, self.enums[id].header, idx.enums[id].header))
329 else:
330 self.enums[id] = idx.enums[id]
331 self.identifiers[id] = idx.enums[id]
332
333 def merge_public(self, idx):
334 for id in list(idx.functions.keys()):
335 if id in self.functions:
336 # check that function condition agrees with header
337 if idx.functions[id].conditionals != \
338 self.functions[id].conditionals:
339 print("Header condition differs from Function for %s:" \
340 % id)
341 print(" H: %s" % self.functions[id].conditionals)
342 print(" C: %s" % idx.functions[id].conditionals)
343 up = idx.functions[id]
344 self.functions[id].update(None, up.module, up.type, up.info, up.extra)
345 # else:
346 # print "Function %s from %s is not declared in headers" % (
347 # id, idx.functions[id].module)
348 # TODO: do the same for variables.
349
350 def analyze_dict(self, type, dict):
351 count = 0
352 public = 0
353 for name in list(dict.keys()):
354 id = dict[name]
355 count = count + 1
356 if id.static == 0:
357 public = public + 1
358 if count != public:
359 print(" %d %s , %d public" % (count, type, public))
360 elif count != 0:
361 print(" %d public %s" % (count, type))
362
363
364 def analyze(self):
365 self.analyze_dict("functions", self.functions)
366 self.analyze_dict("variables", self.variables)
367 self.analyze_dict("structs", self.structs)
368 self.analyze_dict("typedefs", self.typedefs)
369 self.analyze_dict("macros", self.macros)
370
371class CLexer:
372 """A lexer for the C language, tokenize the input by reading and
373 analyzing it line by line"""
374 def __init__(self, input):
375 self.input = input
376 self.tokens = []
377 self.line = ""
378 self.lineno = 0
379
380 def getline(self):
381 line = ''
382 while line == '':
383 line = self.input.readline()
384 if not line:
385 return None
386 self.lineno = self.lineno + 1
387 line = line.lstrip()
388 line = line.rstrip()
389 if line == '':
390 continue
391 while line[-1] == '\\':
392 line = line[:-1]
393 n = self.input.readline()
394 self.lineno = self.lineno + 1
395 n = n.lstrip()
396 n = n.rstrip()
397 if not n:
398 break
399 else:
400 line = line + n
401 return line
402
403 def getlineno(self):
404 return self.lineno
405
406 def push(self, token):
407 self.tokens.insert(0, token);
408
409 def debug(self):
410 print("Last token: ", self.last)
411 print("Token queue: ", self.tokens)
412 print("Line %d end: " % (self.lineno), self.line)
413
414 def token(self):
415 while self.tokens == []:
416 if self.line == "":
417 line = self.getline()
418 else:
419 line = self.line
420 self.line = ""
421 if line == None:
422 return None
423
424 if line[0] == '#':
425 self.tokens = list(map((lambda x: ('preproc', x)),
426 line.split()))
427 break;
428 l = len(line)
429 if line[0] == '"' or line[0] == "'":
430 end = line[0]
431 line = line[1:]
432 found = 0
433 tok = ""
434 while found == 0:
435 i = 0
436 l = len(line)
437 while i < l:
438 if line[i] == end:
439 self.line = line[i+1:]
440 line = line[:i]
441 l = i
442 found = 1
443 break
444 if line[i] == '\\':
445 i = i + 1
446 i = i + 1
447 tok = tok + line
448 if found == 0:
449 line = self.getline()
450 if line == None:
451 return None
452 self.last = ('string', tok)
453 return self.last
454
455 if l >= 2 and line[0] == '/' and line[1] == '*':
456 line = line[2:]
457 found = 0
458 tok = ""
459 while found == 0:
460 i = 0
461 l = len(line)
462 while i < l:
463 if line[i] == '*' and i+1 < l and line[i+1] == '/':
464 self.line = line[i+2:]
465 line = line[:i-1]
466 l = i
467 found = 1
468 break
469 i = i + 1
470 if tok != "":
471 tok = tok + "\n"
472 tok = tok + line
473 if found == 0:
474 line = self.getline()
475 if line == None:
476 return None
477 self.last = ('comment', tok)
478 return self.last
479 if l >= 2 and line[0] == '/' and line[1] == '/':
480 line = line[2:]
481 self.last = ('comment', line)
482 return self.last
483 i = 0
484 while i < l:
485 if line[i] == '/' and i+1 < l and line[i+1] == '/':
486 self.line = line[i:]
487 line = line[:i]
488 break
489 if line[i] == '/' and i+1 < l and line[i+1] == '*':
490 self.line = line[i:]
491 line = line[:i]
492 break
493 if line[i] == '"' or line[i] == "'":
494 self.line = line[i:]
495 line = line[:i]
496 break
497 i = i + 1
498 l = len(line)
499 i = 0
500 while i < l:
501 if line[i] == ' ' or line[i] == '\t':
502 i = i + 1
503 continue
504 o = ord(line[i])
505 if (o >= 97 and o <= 122) or (o >= 65 and o <= 90) or \
506 (o >= 48 and o <= 57):
507 s = i
508 while i < l:
509 o = ord(line[i])
510 if (o >= 97 and o <= 122) or (o >= 65 and o <= 90) or \
511 (o >= 48 and o <= 57) or \
512 (" \t(){}:;,+-*/%&!|[]=><".find(line[i])) == -1:
513 i = i + 1
514 else:
515 break
516 self.tokens.append(('name', line[s:i]))
517 continue
518 if "(){}:;,[]".find(line[i]) != -1:
519# if line[i] == '(' or line[i] == ')' or line[i] == '{' or \
520# line[i] == '}' or line[i] == ':' or line[i] == ';' or \
521# line[i] == ',' or line[i] == '[' or line[i] == ']':
522 self.tokens.append(('sep', line[i]))
523 i = i + 1
524 continue
525 if "+-*><=/%&!|.".find(line[i]) != -1:
526# if line[i] == '+' or line[i] == '-' or line[i] == '*' or \
527# line[i] == '>' or line[i] == '<' or line[i] == '=' or \
528# line[i] == '/' or line[i] == '%' or line[i] == '&' or \
529# line[i] == '!' or line[i] == '|' or line[i] == '.':
530 if line[i] == '.' and i + 2 < l and \
531 line[i+1] == '.' and line[i+2] == '.':
532 self.tokens.append(('name', '...'))
533 i = i + 3
534 continue
535
536 j = i + 1
537 if j < l and (
538 "+-*><=/%&!|".find(line[j]) != -1):
539# line[j] == '+' or line[j] == '-' or line[j] == '*' or \
540# line[j] == '>' or line[j] == '<' or line[j] == '=' or \
541# line[j] == '/' or line[j] == '%' or line[j] == '&' or \
542# line[j] == '!' or line[j] == '|'):
543 self.tokens.append(('op', line[i:j+1]))
544 i = j + 1
545 else:
546 self.tokens.append(('op', line[i]))
547 i = i + 1
548 continue
549 s = i
550 while i < l:
551 o = ord(line[i])
552 if (o >= 97 and o <= 122) or (o >= 65 and o <= 90) or \
553 (o >= 48 and o <= 57) or (
554 " \t(){}:;,+-*/%&!|[]=><".find(line[i]) == -1):
555# line[i] != ' ' and line[i] != '\t' and
556# line[i] != '(' and line[i] != ')' and
557# line[i] != '{' and line[i] != '}' and
558# line[i] != ':' and line[i] != ';' and
559# line[i] != ',' and line[i] != '+' and
560# line[i] != '-' and line[i] != '*' and
561# line[i] != '/' and line[i] != '%' and
562# line[i] != '&' and line[i] != '!' and
563# line[i] != '|' and line[i] != '[' and
564# line[i] != ']' and line[i] != '=' and
565# line[i] != '*' and line[i] != '>' and
566# line[i] != '<'):
567 i = i + 1
568 else:
569 break
570 self.tokens.append(('name', line[s:i]))
571
572 tok = self.tokens[0]
573 self.tokens = self.tokens[1:]
574 self.last = tok
575 return tok
576
577class CParser:
578 """The C module parser"""
579 def __init__(self, filename, idx = None):
580 self.filename = filename
581 if len(filename) > 2 and filename[-2:] == '.h':
582 self.is_header = 1
583 else:
584 self.is_header = 0
585 self.input = open(filename)
586 self.lexer = CLexer(self.input)
587 if idx == None:
588 self.index = index()
589 else:
590 self.index = idx
591 self.top_comment = ""
592 self.last_comment = ""
593 self.comment = None
594 self.collect_ref = 0
595 self.no_error = 0
596 self.conditionals = []
597 self.defines = []
598
599 def collect_references(self):
600 self.collect_ref = 1
601
602 def stop_error(self):
603 self.no_error = 1
604
605 def start_error(self):
606 self.no_error = 0
607
608 def lineno(self):
609 return self.lexer.getlineno()
610
611 def index_add(self, name, module, static, type, info=None, extra = None):
612 if self.is_header == 1:
613 self.index.add(name, module, module, static, type, self.lineno(),
614 info, extra, self.conditionals)
615 else:
616 self.index.add(name, None, module, static, type, self.lineno(),
617 info, extra, self.conditionals)
618
619 def index_add_ref(self, name, module, static, type, info=None,
620 extra = None):
621 if self.is_header == 1:
622 self.index.add_ref(name, module, module, static, type,
623 self.lineno(), info, extra, self.conditionals)
624 else:
625 self.index.add_ref(name, None, module, static, type, self.lineno(),
626 info, extra, self.conditionals)
627
628 def warning(self, msg):
629 if self.no_error:
630 return
631 print(msg)
632
633 def error(self, msg, token=-1):
634 if self.no_error:
635 return
636
637 print("Parse Error: " + msg)
638 if token != -1:
639 print("Got token ", token)
640 self.lexer.debug()
641 sys.exit(1)
642
643 def debug(self, msg, token=-1):
644 print("Debug: " + msg)
645 if token != -1:
646 print("Got token ", token)
647 self.lexer.debug()
648
649 def parseTopComment(self, comment):
650 res = {}
651 lines = comment.split("\n")
652 item = None
653 for line in lines:
654 while line != "" and (line[0] == ' ' or line[0] == '\t'):
655 line = line[1:]
656 while line != "" and line[0] == '*':
657 line = line[1:]
658 while line != "" and (line[0] == ' ' or line[0] == '\t'):
659 line = line[1:]
660 try:
661 (it, line) = line.split(":", 1)
662 item = it
663 while line != "" and (line[0] == ' ' or line[0] == '\t'):
664 line = line[1:]
665 if item in res:
666 res[item] = res[item] + " " + line
667 else:
668 res[item] = line
669 except:
670 if item != None:
671 if item in res:
672 res[item] = res[item] + " " + line
673 else:
674 res[item] = line
675 self.index.info = res
676
677 def parseComment(self, token):
678 if self.top_comment == "":
679 self.top_comment = token[1]
680 if self.comment == None or token[1][0] == '*':
681 self.comment = token[1];
682 else:
683 self.comment = self.comment + token[1]
684 token = self.lexer.token()
685
686 if self.comment.find("DOC_DISABLE") != -1:
687 self.stop_error()
688
689 if self.comment.find("DOC_ENABLE") != -1:
690 self.start_error()
691
692 return token
693
694 #
695 # Parse a comment block associate to a typedef
696 #
697 def parseTypeComment(self, name, quiet = 0):
698 if name[0:2] == '__':
699 quiet = 1
700
701 args = []
702 desc = ""
703
704 if self.comment == None:
705 if not quiet:
706 self.warning("Missing comment for type %s" % (name))
707 return((args, desc))
708 if self.comment[0] != '*':
709 if not quiet:
710 self.warning("Missing * in type comment for %s" % (name))
711 return((args, desc))
712 lines = self.comment.split('\n')
713 if lines[0] == '*':
714 del lines[0]
715 if lines[0] != "* %s:" % (name):
716 if not quiet:
717 self.warning("Misformatted type comment for %s" % (name))
718 self.warning(" Expecting '* %s:' got '%s'" % (name, lines[0]))
719 return((args, desc))
720 del lines[0]
721 while len(lines) > 0 and lines[0] == '*':
722 del lines[0]
723 desc = ""
724 while len(lines) > 0:
725 l = lines[0]
726 while len(l) > 0 and l[0] == '*':
727 l = l[1:]
728 l = l.strip()
729 desc = desc + " " + l
730 del lines[0]
731
732 desc = desc.strip()
733
734 if quiet == 0:
735 if desc == "":
736 self.warning("Type comment for %s lack description of the macro" % (name))
737
738 return(desc)
739 #
740 # Parse a comment block associate to a macro
741 #
742 def parseMacroComment(self, name, quiet = 0):
743 if name[0:2] == '__':
744 quiet = 1
745
746 args = []
747 desc = ""
748
749 if self.comment == None:
750 if not quiet:
751 self.warning("Missing comment for macro %s" % (name))
752 return((args, desc))
753 if self.comment[0] != '*':
754 if not quiet:
755 self.warning("Missing * in macro comment for %s" % (name))
756 return((args, desc))
757 lines = self.comment.split('\n')
758 if lines[0] == '*':
759 del lines[0]
760 if lines[0] != "* %s:" % (name):
761 if not quiet:
762 self.warning("Misformatted macro comment for %s" % (name))
763 self.warning(" Expecting '* %s:' got '%s'" % (name, lines[0]))
764 return((args, desc))
765 del lines[0]
766 while lines[0] == '*':
767 del lines[0]
768 while len(lines) > 0 and lines[0][0:3] == '* @':
769 l = lines[0][3:]
770 try:
771 (arg, desc) = l.split(':', 1)
772 desc=desc.strip()
773 arg=arg.strip()
774 except:
775 if not quiet:
776 self.warning("Misformatted macro comment for %s" % (name))
777 self.warning(" problem with '%s'" % (lines[0]))
778 del lines[0]
779 continue
780 del lines[0]
781 l = lines[0].strip()
782 while len(l) > 2 and l[0:3] != '* @':
783 while l[0] == '*':
784 l = l[1:]
785 desc = desc + ' ' + l.strip()
786 del lines[0]
787 if len(lines) == 0:
788 break
789 l = lines[0]
790 args.append((arg, desc))
791 while len(lines) > 0 and lines[0] == '*':
792 del lines[0]
793 desc = ""
794 while len(lines) > 0:
795 l = lines[0]
796 while len(l) > 0 and l[0] == '*':
797 l = l[1:]
798 l = l.strip()
799 desc = desc + " " + l
800 del lines[0]
801
802 desc = desc.strip()
803
804 if quiet == 0:
805 if desc == "":
806 self.warning("Macro comment for %s lack description of the macro" % (name))
807
808 return((args, desc))
809
810 #
811 # Parse a comment block and merge the informations found in the
812 # parameters descriptions, finally returns a block as complete
813 # as possible
814 #
815 def mergeFunctionComment(self, name, description, quiet = 0):
816 if name == 'main':
817 quiet = 1
818 if name[0:2] == '__':
819 quiet = 1
820
821 (ret, args) = description
822 desc = ""
823 retdesc = ""
824
825 if self.comment == None:
826 if not quiet:
827 self.warning("Missing comment for function %s" % (name))
828 return(((ret[0], retdesc), args, desc))
829 if self.comment[0] != '*':
830 if not quiet:
831 self.warning("Missing * in function comment for %s" % (name))
832 return(((ret[0], retdesc), args, desc))
833 lines = self.comment.split('\n')
834 if lines[0] == '*':
835 del lines[0]
836 if lines[0] != "* %s:" % (name):
837 if not quiet:
838 self.warning("Misformatted function comment for %s" % (name))
839 self.warning(" Expecting '* %s:' got '%s'" % (name, lines[0]))
840 return(((ret[0], retdesc), args, desc))
841 del lines[0]
842 while lines[0] == '*':
843 del lines[0]
844 nbargs = len(args)
845 while len(lines) > 0 and lines[0][0:3] == '* @':
846 l = lines[0][3:]
847 try:
848 (arg, desc) = l.split(':', 1)
849 desc=desc.strip()
850 arg=arg.strip()
851 except:
852 if not quiet:
853 self.warning("Misformatted function comment for %s" % (name))
854 self.warning(" problem with '%s'" % (lines[0]))
855 del lines[0]
856 continue
857 del lines[0]
858 l = lines[0].strip()
859 while len(l) > 2 and l[0:3] != '* @':
860 while l[0] == '*':
861 l = l[1:]
862 desc = desc + ' ' + l.strip()
863 del lines[0]
864 if len(lines) == 0:
865 break
866 l = lines[0]
867 i = 0
868 while i < nbargs:
869 if args[i][1] == arg:
870 args[i] = (args[i][0], arg, desc)
871 break;
872 i = i + 1
873 if i >= nbargs:
874 if not quiet:
875 self.warning("Unable to find arg %s from function comment for %s" % (
876 arg, name))
877 while len(lines) > 0 and lines[0] == '*':
878 del lines[0]
879 desc = ""
880 while len(lines) > 0:
881 l = lines[0]
882 while len(l) > 0 and l[0] == '*':
883 l = l[1:]
884 l = l.strip()
885 if len(l) >= 6 and l[0:6] == "return" or l[0:6] == "Return":
886 try:
887 l = l.split(' ', 1)[1]
888 except:
889 l = ""
890 retdesc = l.strip()
891 del lines[0]
892 while len(lines) > 0:
893 l = lines[0]
894 while len(l) > 0 and l[0] == '*':
895 l = l[1:]
896 l = l.strip()
897 retdesc = retdesc + " " + l
898 del lines[0]
899 else:
900 desc = desc + " " + l
901 del lines[0]
902
903 retdesc = retdesc.strip()
904 desc = desc.strip()
905
906 if quiet == 0:
907 #
908 # report missing comments
909 #
910 i = 0
911 while i < nbargs:
912 if args[i][2] == None and args[i][0] != "void" and \
913 ((args[i][1] != None) or (args[i][1] == '')):
914 self.warning("Function comment for %s lacks description of arg %s" % (name, args[i][1]))
915 i = i + 1
916 if retdesc == "" and ret[0] != "void":
917 self.warning("Function comment for %s lacks description of return value" % (name))
918 if desc == "":
919 self.warning("Function comment for %s lacks description of the function" % (name))
920
921 return(((ret[0], retdesc), args, desc))
922
923 def parsePreproc(self, token):
924 if debug:
925 print("=> preproc ", token, self.lexer.tokens)
926 name = token[1]
927 if name == "#include":
928 token = self.lexer.token()
929 if token == None:
930 return None
931 if token[0] == 'preproc':
932 self.index_add(token[1], self.filename, not self.is_header,
933 "include")
934 return self.lexer.token()
935 return token
936 if name == "#define":
937 token = self.lexer.token()
938 if token == None:
939 return None
940 if token[0] == 'preproc':
941 # TODO macros with arguments
942 name = token[1]
943 lst = []
944 token = self.lexer.token()
945 while token != None and token[0] == 'preproc' and \
946 token[1][0] != '#':
947 lst.append(token[1])
948 token = self.lexer.token()
949 try:
950 name = name.split('(') [0]
951 except:
952 pass
953 info = self.parseMacroComment(name, not self.is_header)
954 self.index_add(name, self.filename, not self.is_header,
955 "macro", info)
956 return token
957
958 #
959 # Processing of conditionals modified by Bill 1/1/05
960 #
961 # We process conditionals (i.e. tokens from #ifdef, #ifndef,
962 # #if, #else and #endif) for headers and mainline code,
963 # store the ones from the header in libxml2-api.xml, and later
964 # (in the routine merge_public) verify that the two (header and
965 # mainline code) agree.
966 #
967 # There is a small problem with processing the headers. Some of
968 # the variables are not concerned with enabling / disabling of
969 # library functions (e.g. '__XML_PARSER_H__'), and we don't want
970 # them to be included in libxml2-api.xml, or involved in
971 # the check between the header and the mainline code. To
972 # accomplish this, we ignore any conditional which doesn't include
973 # the string 'ENABLED'
974 #
975 if name == "#ifdef":
976 apstr = self.lexer.tokens[0][1]
977 try:
978 self.defines.append(apstr)
979 if apstr.find('ENABLED') != -1:
980 self.conditionals.append("defined(%s)" % apstr)
981 except:
982 pass
983 elif name == "#ifndef":
984 apstr = self.lexer.tokens[0][1]
985 try:
986 self.defines.append(apstr)
987 if apstr.find('ENABLED') != -1:
988 self.conditionals.append("!defined(%s)" % apstr)
989 except:
990 pass
991 elif name == "#if":
992 apstr = ""
993 for tok in self.lexer.tokens:
994 if apstr != "":
995 apstr = apstr + " "
996 apstr = apstr + tok[1]
997 try:
998 self.defines.append(apstr)
999 if apstr.find('ENABLED') != -1:
1000 self.conditionals.append(apstr)
1001 except:
1002 pass
1003 elif name == "#else":
1004 if self.conditionals != [] and \
1005 self.defines[-1].find('ENABLED') != -1:
1006 self.conditionals[-1] = "!(%s)" % self.conditionals[-1]
1007 elif name == "#endif":
1008 if self.conditionals != [] and \
1009 self.defines[-1].find('ENABLED') != -1:
1010 self.conditionals = self.conditionals[:-1]
1011 self.defines = self.defines[:-1]
1012 token = self.lexer.token()
1013 while token != None and token[0] == 'preproc' and \
1014 token[1][0] != '#':
1015 token = self.lexer.token()
1016 return token
1017
1018 #
1019 # token acquisition on top of the lexer, it handle internally
1020 # preprocessor and comments since they are logically not part of
1021 # the program structure.
1022 #
1023 def token(self):
1024 global ignored_words
1025
1026 token = self.lexer.token()
1027 while token != None:
1028 if token[0] == 'comment':
1029 token = self.parseComment(token)
1030 continue
1031 elif token[0] == 'preproc':
1032 token = self.parsePreproc(token)
1033 continue
1034 elif token[0] == "name" and token[1] == "__const":
1035 token = ("name", "const")
1036 return token
1037 elif token[0] == "name" and token[1] == "__attribute":
1038 token = self.lexer.token()
1039 while token != None and token[1] != ";":
1040 token = self.lexer.token()
1041 return token
1042 elif token[0] == "name" and token[1] in ignored_words:
1043 (n, info) = ignored_words[token[1]]
1044 i = 0
1045 while i < n:
1046 token = self.lexer.token()
1047 i = i + 1
1048 token = self.lexer.token()
1049 continue
1050 else:
1051 if debug:
1052 print("=> ", token)
1053 return token
1054 return None
1055
1056 #
1057 # Parse a typedef, it records the type and its name.
1058 #
1059 def parseTypedef(self, token):
1060 if token == None:
1061 return None
1062 token = self.parseType(token)
1063 if token == None:
1064 self.error("parsing typedef")
1065 return None
1066 base_type = self.type
1067 type = base_type
1068 #self.debug("end typedef type", token)
1069 while token != None:
1070 if token[0] == "name":
1071 name = token[1]
1072 signature = self.signature
1073 if signature != None:
1074 type = type.split('(')[0]
1075 d = self.mergeFunctionComment(name,
1076 ((type, None), signature), 1)
1077 self.index_add(name, self.filename, not self.is_header,
1078 "functype", d)
1079 else:
1080 if base_type == "struct":
1081 self.index_add(name, self.filename, not self.is_header,
1082 "struct", type)
1083 base_type = "struct " + name
1084 else:
1085 # TODO report missing or misformatted comments
1086 info = self.parseTypeComment(name, 1)
1087 self.index_add(name, self.filename, not self.is_header,
1088 "typedef", type, info)
1089 token = self.token()
1090 else:
1091 self.error("parsing typedef: expecting a name")
1092 return token
1093 #self.debug("end typedef", token)
1094 if token != None and token[0] == 'sep' and token[1] == ',':
1095 type = base_type
1096 token = self.token()
1097 while token != None and token[0] == "op":
1098 type = type + token[1]
1099 token = self.token()
1100 elif token != None and token[0] == 'sep' and token[1] == ';':
1101 break;
1102 elif token != None and token[0] == 'name':
1103 type = base_type
1104 continue;
1105 else:
1106 self.error("parsing typedef: expecting ';'", token)
1107 return token
1108 token = self.token()
1109 return token
1110
1111 #
1112 # Parse a C code block, used for functions it parse till
1113 # the balancing } included
1114 #
1115 def parseBlock(self, token):
1116 while token != None:
1117 if token[0] == "sep" and token[1] == "{":
1118 token = self.token()
1119 token = self.parseBlock(token)
1120 elif token[0] == "sep" and token[1] == "}":
1121 self.comment = None
1122 token = self.token()
1123 return token
1124 else:
1125 if self.collect_ref == 1:
1126 oldtok = token
1127 token = self.token()
1128 if oldtok[0] == "name" and oldtok[1][0:3] == "xml":
1129 if token[0] == "sep" and token[1] == "(":
1130 self.index_add_ref(oldtok[1], self.filename,
1131 0, "function")
1132 token = self.token()
1133 elif token[0] == "name":
1134 token = self.token()
1135 if token[0] == "sep" and (token[1] == ";" or
1136 token[1] == "," or token[1] == "="):
1137 self.index_add_ref(oldtok[1], self.filename,
1138 0, "type")
1139 elif oldtok[0] == "name" and oldtok[1][0:4] == "XML_":
1140 self.index_add_ref(oldtok[1], self.filename,
1141 0, "typedef")
1142 elif oldtok[0] == "name" and oldtok[1][0:7] == "LIBXML_":
1143 self.index_add_ref(oldtok[1], self.filename,
1144 0, "typedef")
1145
1146 else:
1147 token = self.token()
1148 return token
1149
1150 #
1151 # Parse a C struct definition till the balancing }
1152 #
1153 def parseStruct(self, token):
1154 fields = []
1155 #self.debug("start parseStruct", token)
1156 while token != None:
1157 if token[0] == "sep" and token[1] == "{":
1158 token = self.token()
1159 token = self.parseTypeBlock(token)
1160 elif token[0] == "sep" and token[1] == "}":
1161 self.struct_fields = fields
1162 #self.debug("end parseStruct", token)
1163 #print fields
1164 token = self.token()
1165 return token
1166 else:
1167 base_type = self.type
1168 #self.debug("before parseType", token)
1169 token = self.parseType(token)
1170 #self.debug("after parseType", token)
1171 if token != None and token[0] == "name":
1172 fname = token[1]
1173 token = self.token()
1174 if token[0] == "sep" and token[1] == ";":
1175 self.comment = None
1176 token = self.token()
1177 fields.append((self.type, fname, self.comment))
1178 self.comment = None
1179 else:
1180 self.error("parseStruct: expecting ;", token)
1181 elif token != None and token[0] == "sep" and token[1] == "{":
1182 token = self.token()
1183 token = self.parseTypeBlock(token)
1184 if token != None and token[0] == "name":
1185 token = self.token()
1186 if token != None and token[0] == "sep" and token[1] == ";":
1187 token = self.token()
1188 else:
1189 self.error("parseStruct: expecting ;", token)
1190 else:
1191 self.error("parseStruct: name", token)
1192 token = self.token()
1193 self.type = base_type;
1194 self.struct_fields = fields
1195 #self.debug("end parseStruct", token)
1196 #print fields
1197 return token
1198
1199 #
1200 # Parse a C enum block, parse till the balancing }
1201 #
1202 def parseEnumBlock(self, token):
1203 self.enums = []
1204 name = None
1205 self.comment = None
1206 comment = ""
1207 value = "0"
1208 while token != None:
1209 if token[0] == "sep" and token[1] == "{":
1210 token = self.token()
1211 token = self.parseTypeBlock(token)
1212 elif token[0] == "sep" and token[1] == "}":
1213 if name != None:
1214 if self.comment != None:
1215 comment = self.comment
1216 self.comment = None
1217 self.enums.append((name, value, comment))
1218 token = self.token()
1219 return token
1220 elif token[0] == "name":
1221 if name != None:
1222 if self.comment != None:
1223 comment = self.comment.strip()
1224 self.comment = None
1225 self.enums.append((name, value, comment))
1226 name = token[1]
1227 comment = ""
1228 token = self.token()
1229 if token[0] == "op" and token[1][0] == "=":
1230 value = ""
1231 if len(token[1]) > 1:
1232 value = token[1][1:]
1233 token = self.token()
1234 while token[0] != "sep" or (token[1] != ',' and
1235 token[1] != '}'):
1236 value = value + token[1]
1237 token = self.token()
1238 else:
1239 try:
1240 value = "%d" % (int(value) + 1)
1241 except:
1242 self.warning("Failed to compute value of enum %s" % (name))
1243 value=""
1244 if token[0] == "sep" and token[1] == ",":
1245 token = self.token()
1246 else:
1247 token = self.token()
1248 return token
1249
1250 #
1251 # Parse a C definition block, used for structs it parse till
1252 # the balancing }
1253 #
1254 def parseTypeBlock(self, token):
1255 while token != None:
1256 if token[0] == "sep" and token[1] == "{":
1257 token = self.token()
1258 token = self.parseTypeBlock(token)
1259 elif token[0] == "sep" and token[1] == "}":
1260 token = self.token()
1261 return token
1262 else:
1263 token = self.token()
1264 return token
1265
1266 #
1267 # Parse a type: the fact that the type name can either occur after
1268 # the definition or within the definition makes it a little harder
1269 # if inside, the name token is pushed back before returning
1270 #
1271 def parseType(self, token):
1272 self.type = ""
1273 self.struct_fields = []
1274 self.signature = None
1275 if token == None:
1276 return token
1277
1278 while token[0] == "name" and (
1279 token[1] == "const" or \
1280 token[1] == "unsigned" or \
1281 token[1] == "signed"):
1282 if self.type == "":
1283 self.type = token[1]
1284 else:
1285 self.type = self.type + " " + token[1]
1286 token = self.token()
1287
1288 if token[0] == "name" and (token[1] == "long" or token[1] == "short"):
1289 if self.type == "":
1290 self.type = token[1]
1291 else:
1292 self.type = self.type + " " + token[1]
1293 if token[0] == "name" and token[1] == "int":
1294 if self.type == "":
1295 self.type = tmp[1]
1296 else:
1297 self.type = self.type + " " + tmp[1]
1298
1299 elif token[0] == "name" and token[1] == "struct":
1300 if self.type == "":
1301 self.type = token[1]
1302 else:
1303 self.type = self.type + " " + token[1]
1304 token = self.token()
1305 nametok = None
1306 if token[0] == "name":
1307 nametok = token
1308 token = self.token()
1309 if token != None and token[0] == "sep" and token[1] == "{":
1310 token = self.token()
1311 token = self.parseStruct(token)
1312 elif token != None and token[0] == "op" and token[1] == "*":
1313 self.type = self.type + " " + nametok[1] + " *"
1314 token = self.token()
1315 while token != None and token[0] == "op" and token[1] == "*":
1316 self.type = self.type + " *"
1317 token = self.token()
1318 if token[0] == "name":
1319 nametok = token
1320 token = self.token()
1321 else:
1322 self.error("struct : expecting name", token)
1323 return token
1324 elif token != None and token[0] == "name" and nametok != None:
1325 self.type = self.type + " " + nametok[1]
1326 return token
1327
1328 if nametok != None:
1329 self.lexer.push(token)
1330 token = nametok
1331 return token
1332
1333 elif token[0] == "name" and token[1] == "enum":
1334 if self.type == "":
1335 self.type = token[1]
1336 else:
1337 self.type = self.type + " " + token[1]
1338 self.enums = []
1339 token = self.token()
1340 if token != None and token[0] == "sep" and token[1] == "{":
1341 token = self.token()
1342 token = self.parseEnumBlock(token)
1343 else:
1344 self.error("parsing enum: expecting '{'", token)
1345 enum_type = None
1346 if token != None and token[0] != "name":
1347 self.lexer.push(token)
1348 token = ("name", "enum")
1349 else:
1350 enum_type = token[1]
1351 for enum in self.enums:
1352 self.index_add(enum[0], self.filename,
1353 not self.is_header, "enum",
1354 (enum[1], enum[2], enum_type))
1355 return token
1356
1357 elif token[0] == "name":
1358 if self.type == "":
1359 self.type = token[1]
1360 else:
1361 self.type = self.type + " " + token[1]
1362 else:
1363 self.error("parsing type %s: expecting a name" % (self.type),
1364 token)
1365 return token
1366 token = self.token()
1367 while token != None and (token[0] == "op" or
1368 token[0] == "name" and token[1] == "const"):
1369 self.type = self.type + " " + token[1]
1370 token = self.token()
1371
1372 #
1373 # if there is a parenthesis here, this means a function type
1374 #
1375 if token != None and token[0] == "sep" and token[1] == '(':
1376 self.type = self.type + token[1]
1377 token = self.token()
1378 while token != None and token[0] == "op" and token[1] == '*':
1379 self.type = self.type + token[1]
1380 token = self.token()
1381 if token == None or token[0] != "name" :
1382 self.error("parsing function type, name expected", token);
1383 return token
1384 self.type = self.type + token[1]
1385 nametok = token
1386 token = self.token()
1387 if token != None and token[0] == "sep" and token[1] == ')':
1388 self.type = self.type + token[1]
1389 token = self.token()
1390 if token != None and token[0] == "sep" and token[1] == '(':
1391 token = self.token()
1392 type = self.type;
1393 token = self.parseSignature(token);
1394 self.type = type;
1395 else:
1396 self.error("parsing function type, '(' expected", token);
1397 return token
1398 else:
1399 self.error("parsing function type, ')' expected", token);
1400 return token
1401 self.lexer.push(token)
1402 token = nametok
1403 return token
1404
1405 #
1406 # do some lookahead for arrays
1407 #
1408 if token != None and token[0] == "name":
1409 nametok = token
1410 token = self.token()
1411 if token != None and token[0] == "sep" and token[1] == '[':
1412 self.type = self.type + nametok[1]
1413 while token != None and token[0] == "sep" and token[1] == '[':
1414 self.type = self.type + token[1]
1415 token = self.token()
1416 while token != None and token[0] != 'sep' and \
1417 token[1] != ']' and token[1] != ';':
1418 self.type = self.type + token[1]
1419 token = self.token()
1420 if token != None and token[0] == 'sep' and token[1] == ']':
1421 self.type = self.type + token[1]
1422 token = self.token()
1423 else:
1424 self.error("parsing array type, ']' expected", token);
1425 return token
1426 elif token != None and token[0] == "sep" and token[1] == ':':
1427 # remove :12 in case it's a limited int size
1428 token = self.token()
1429 token = self.token()
1430 self.lexer.push(token)
1431 token = nametok
1432
1433 return token
1434
1435 #
1436 # Parse a signature: '(' has been parsed and we scan the type definition
1437 # up to the ')' included
1438 def parseSignature(self, token):
1439 signature = []
1440 if token != None and token[0] == "sep" and token[1] == ')':
1441 self.signature = []
1442 token = self.token()
1443 return token
1444 while token != None:
1445 token = self.parseType(token)
1446 if token != None and token[0] == "name":
1447 signature.append((self.type, token[1], None))
1448 token = self.token()
1449 elif token != None and token[0] == "sep" and token[1] == ',':
1450 token = self.token()
1451 continue
1452 elif token != None and token[0] == "sep" and token[1] == ')':
1453 # only the type was provided
1454 if self.type == "...":
1455 signature.append((self.type, "...", None))
1456 else:
1457 signature.append((self.type, None, None))
1458 if token != None and token[0] == "sep":
1459 if token[1] == ',':
1460 token = self.token()
1461 continue
1462 elif token[1] == ')':
1463 token = self.token()
1464 break
1465 self.signature = signature
1466 return token
1467
1468 #
1469 # Parse a global definition, be it a type, variable or function
1470 # the extern "C" blocks are a bit nasty and require it to recurse.
1471 #
1472 def parseGlobal(self, token):
1473 static = 0
1474 if token[1] == 'extern':
1475 token = self.token()
1476 if token == None:
1477 return token
1478 if token[0] == 'string':
1479 if token[1] == 'C':
1480 token = self.token()
1481 if token == None:
1482 return token
1483 if token[0] == 'sep' and token[1] == "{":
1484 token = self.token()
1485# print 'Entering extern "C line ', self.lineno()
1486 while token != None and (token[0] != 'sep' or
1487 token[1] != "}"):
1488 if token[0] == 'name':
1489 token = self.parseGlobal(token)
1490 else:
1491 self.error(
1492 "token %s %s unexpected at the top level" % (
1493 token[0], token[1]))
1494 token = self.parseGlobal(token)
1495# print 'Exiting extern "C" line', self.lineno()
1496 token = self.token()
1497 return token
1498 else:
1499 return token
1500 elif token[1] == 'static':
1501 static = 1
1502 token = self.token()
1503 if token == None or token[0] != 'name':
1504 return token
1505
1506 if token[1] == 'typedef':
1507 token = self.token()
1508 return self.parseTypedef(token)
1509 else:
1510 token = self.parseType(token)
1511 type_orig = self.type
1512 if token == None or token[0] != "name":
1513 return token
1514 type = type_orig
1515 self.name = token[1]
1516 token = self.token()
1517 while token != None and (token[0] == "sep" or token[0] == "op"):
1518 if token[0] == "sep":
1519 if token[1] == "[":
1520 type = type + token[1]
1521 token = self.token()
1522 while token != None and (token[0] != "sep" or \
1523 token[1] != ";"):
1524 type = type + token[1]
1525 token = self.token()
1526
1527 if token != None and token[0] == "op" and token[1] == "=":
1528 #
1529 # Skip the initialization of the variable
1530 #
1531 token = self.token()
1532 if token[0] == 'sep' and token[1] == '{':
1533 token = self.token()
1534 token = self.parseBlock(token)
1535 else:
1536 self.comment = None
1537 while token != None and (token[0] != "sep" or \
1538 (token[1] != ';' and token[1] != ',')):
1539 token = self.token()
1540 self.comment = None
1541 if token == None or token[0] != "sep" or (token[1] != ';' and
1542 token[1] != ','):
1543 self.error("missing ';' or ',' after value")
1544
1545 if token != None and token[0] == "sep":
1546 if token[1] == ";":
1547 self.comment = None
1548 token = self.token()
1549 if type == "struct":
1550 self.index_add(self.name, self.filename,
1551 not self.is_header, "struct", self.struct_fields)
1552 else:
1553 self.index_add(self.name, self.filename,
1554 not self.is_header, "variable", type)
1555 break
1556 elif token[1] == "(":
1557 token = self.token()
1558 token = self.parseSignature(token)
1559 if token == None:
1560 return None
1561 if token[0] == "sep" and token[1] == ";":
1562 d = self.mergeFunctionComment(self.name,
1563 ((type, None), self.signature), 1)
1564 self.index_add(self.name, self.filename, static,
1565 "function", d)
1566 token = self.token()
1567 elif token[0] == "sep" and token[1] == "{":
1568 d = self.mergeFunctionComment(self.name,
1569 ((type, None), self.signature), static)
1570 self.index_add(self.name, self.filename, static,
1571 "function", d)
1572 token = self.token()
1573 token = self.parseBlock(token);
1574 elif token[1] == ',':
1575 self.comment = None
1576 self.index_add(self.name, self.filename, static,
1577 "variable", type)
1578 type = type_orig
1579 token = self.token()
1580 while token != None and token[0] == "sep":
1581 type = type + token[1]
1582 token = self.token()
1583 if token != None and token[0] == "name":
1584 self.name = token[1]
1585 token = self.token()
1586 else:
1587 break
1588
1589 return token
1590
1591 def parse(self):
1592 self.warning("Parsing %s" % (self.filename))
1593 token = self.token()
1594 while token != None:
1595 if token[0] == 'name':
1596 token = self.parseGlobal(token)
1597 else:
1598 self.error("token %s %s unexpected at the top level" % (
1599 token[0], token[1]))
1600 token = self.parseGlobal(token)
1601 return
1602 self.parseTopComment(self.top_comment)
1603 return self.index
1604
1605
1606class docBuilder:
1607 """A documentation builder"""
1608 def __init__(self, name, directories=['.'], excludes=[]):
1609 self.name = name
1610 self.directories = directories
1611 self.excludes = excludes + list(ignored_files.keys())
1612 self.modules = {}
1613 self.headers = {}
1614 self.idx = index()
1615 self.xref = {}
1616 self.index = {}
1617 if name == 'libxml2':
1618 self.basename = 'libxml'
1619 else:
1620 self.basename = name
1621
1622 def indexString(self, id, str):
1623 if str == None:
1624 return
1625 str = str.replace("'", ' ')
1626 str = str.replace('"', ' ')
1627 str = str.replace("/", ' ')
1628 str = str.replace('*', ' ')
1629 str = str.replace("[", ' ')
1630 str = str.replace("]", ' ')
1631 str = str.replace("(", ' ')
1632 str = str.replace(")", ' ')
1633 str = str.replace("<", ' ')
1634 str = str.replace('>', ' ')
1635 str = str.replace("&", ' ')
1636 str = str.replace('#', ' ')
1637 str = str.replace(",", ' ')
1638 str = str.replace('.', ' ')
1639 str = str.replace(';', ' ')
1640 tokens = str.split()
1641 for token in tokens:
1642 try:
1643 c = token[0]
1644 if string.ascii_letters.find(c) < 0:
1645 pass
1646 elif len(token) < 3:
1647 pass
1648 else:
1649 lower = token.lower()
1650 # TODO: generalize this a bit
1651 if lower == 'and' or lower == 'the':
1652 pass
1653 elif token in self.xref:
1654 self.xref[token].append(id)
1655 else:
1656 self.xref[token] = [id]
1657 except:
1658 pass
1659
1660 def analyze(self):
1661 print("Project %s : %d headers, %d modules" % (self.name, len(list(self.headers.keys())), len(list(self.modules.keys()))))
1662 self.idx.analyze()
1663
1664 def scanHeaders(self):
1665 for header in list(self.headers.keys()):
1666 parser = CParser(header)
1667 idx = parser.parse()
1668 self.headers[header] = idx;
1669 self.idx.merge(idx)
1670
1671 def scanModules(self):
1672 for module in list(self.modules.keys()):
1673 parser = CParser(module)
1674 idx = parser.parse()
1675 # idx.analyze()
1676 self.modules[module] = idx
1677 self.idx.merge_public(idx)
1678
1679 def scan(self):
1680 for directory in self.directories:
1681 files = glob.glob(directory + "/*.c")
1682 for file in files:
1683 skip = 0
1684 for excl in self.excludes:
1685 if file.find(excl) != -1:
1686 print("Skipping %s" % file)
1687 skip = 1
1688 break
1689 if skip == 0:
1690 self.modules[file] = None;
1691 files = glob.glob(directory + "/*.h")
1692 for file in files:
1693 skip = 0
1694 for excl in self.excludes:
1695 if file.find(excl) != -1:
1696 print("Skipping %s" % file)
1697 skip = 1
1698 break
1699 if skip == 0:
1700 self.headers[file] = None;
1701 self.scanHeaders()
1702 self.scanModules()
1703
1704 def modulename_file(self, file):
1705 module = os.path.basename(file)
1706 if module[-2:] == '.h':
1707 module = module[:-2]
1708 elif module[-2:] == '.c':
1709 module = module[:-2]
1710 return module
1711
1712 def serialize_enum(self, output, name):
1713 id = self.idx.enums[name]
1714 output.write(" <enum name='%s' file='%s'" % (name,
1715 self.modulename_file(id.header)))
1716 if id.info != None:
1717 info = id.info
1718 if info[0] != None and info[0] != '':
1719 try:
1720 val = eval(info[0])
1721 except:
1722 val = info[0]
1723 output.write(" value='%s'" % (val));
1724 if info[2] != None and info[2] != '':
1725 output.write(" type='%s'" % info[2]);
1726 if info[1] != None and info[1] != '':
1727 output.write(" info='%s'" % escape(info[1]));
1728 output.write("/>\n")
1729
1730 def serialize_macro(self, output, name):
1731 id = self.idx.macros[name]
1732 output.write(" <macro name='%s' file='%s'>\n" % (name,
1733 self.modulename_file(id.header)))
1734 if id.info != None:
1735 try:
1736 (args, desc) = id.info
1737 if desc != None and desc != "":
1738 output.write(" <info>%s</info>\n" % (escape(desc)))
1739 self.indexString(name, desc)
1740 for arg in args:
1741 (name, desc) = arg
1742 if desc != None and desc != "":
1743 output.write(" <arg name='%s' info='%s'/>\n" % (
1744 name, escape(desc)))
1745 self.indexString(name, desc)
1746 else:
1747 output.write(" <arg name='%s'/>\n" % (name))
1748 except:
1749 pass
1750 output.write(" </macro>\n")
1751
1752 def serialize_typedef(self, output, name):
1753 id = self.idx.typedefs[name]
1754 if id.info[0:7] == 'struct ':
1755 output.write(" <struct name='%s' file='%s' type='%s'" % (
1756 name, self.modulename_file(id.header), id.info))
1757 name = id.info[7:]
1758 if name in self.idx.structs and ( \
1759 type(self.idx.structs[name].info) == type(()) or
1760 type(self.idx.structs[name].info) == type([])):
1761 output.write(">\n");
1762 try:
1763 for field in self.idx.structs[name].info:
1764 desc = field[2]
1765 self.indexString(name, desc)
1766 if desc == None:
1767 desc = ''
1768 else:
1769 desc = escape(desc)
1770 output.write(" <field name='%s' type='%s' info='%s'/>\n" % (field[1] , field[0], desc))
1771 except:
1772 print("Failed to serialize struct %s" % (name))
1773 output.write(" </struct>\n")
1774 else:
1775 output.write("/>\n");
1776 else :
1777 output.write(" <typedef name='%s' file='%s' type='%s'" % (
1778 name, self.modulename_file(id.header), id.info))
1779 try:
1780 desc = id.extra
1781 if desc != None and desc != "":
1782 output.write(">\n <info>%s</info>\n" % (escape(desc)))
1783 output.write(" </typedef>\n")
1784 else:
1785 output.write("/>\n")
1786 except:
1787 output.write("/>\n")
1788
1789 def serialize_variable(self, output, name):
1790 id = self.idx.variables[name]
1791 if id.info != None:
1792 output.write(" <variable name='%s' file='%s' type='%s'/>\n" % (
1793 name, self.modulename_file(id.header), id.info))
1794 else:
1795 output.write(" <variable name='%s' file='%s'/>\n" % (
1796 name, self.modulename_file(id.header)))
1797
1798 def serialize_function(self, output, name):
1799 id = self.idx.functions[name]
1800 if name == debugsym:
1801 print("=>", id)
1802
1803 output.write(" <%s name='%s' file='%s' module='%s'>\n" % (id.type,
1804 name, self.modulename_file(id.header),
1805 self.modulename_file(id.module)))
1806 #
1807 # Processing of conditionals modified by Bill 1/1/05
1808 #
1809 if id.conditionals != None:
1810 apstr = ""
1811 for cond in id.conditionals:
1812 if apstr != "":
1813 apstr = apstr + " &amp;&amp; "
1814 apstr = apstr + cond
1815 output.write(" <cond>%s</cond>\n"% (apstr));
1816 try:
1817 (ret, params, desc) = id.info
1818 if (desc == None or desc == '') and \
1819 name[0:9] != "xmlThrDef" and name != "xmlDllMain":
1820 print("%s %s from %s has no description" % (id.type, name,
1821 self.modulename_file(id.module)))
1822
1823 output.write(" <info>%s</info>\n" % (escape(desc)))
1824 self.indexString(name, desc)
1825 if ret[0] != None:
1826 if ret[0] == "void":
1827 output.write(" <return type='void'/>\n")
1828 else:
1829 output.write(" <return type='%s' info='%s'/>\n" % (
1830 ret[0], escape(ret[1])))
1831 self.indexString(name, ret[1])
1832 for param in params:
1833 if param[0] == 'void':
1834 continue
1835 if param[2] == None:
1836 output.write(" <arg name='%s' type='%s' info=''/>\n" % (param[1], param[0]))
1837 else:
1838 output.write(" <arg name='%s' type='%s' info='%s'/>\n" % (param[1], param[0], escape(param[2])))
1839 self.indexString(name, param[2])
1840 except:
1841 print("Failed to save function %s info: " % name, repr(id.info))
1842 output.write(" </%s>\n" % (id.type))
1843
1844 def serialize_exports(self, output, file):
1845 module = self.modulename_file(file)
1846 output.write(" <file name='%s'>\n" % (module))
1847 dict = self.headers[file]
1848 if dict.info != None:
1849 for data in ('Summary', 'Description', 'Author'):
1850 try:
1851 output.write(" <%s>%s</%s>\n" % (
1852 data.lower(),
1853 escape(dict.info[data]),
1854 data.lower()))
1855 except:
1856 print("Header %s lacks a %s description" % (module, data))
1857 if 'Description' in dict.info:
1858 desc = dict.info['Description']
1859 if desc.find("DEPRECATED") != -1:
1860 output.write(" <deprecated/>\n")
1861
1862 ids = list(dict.macros.keys())
1863 ids.sort()
1864 for id in uniq(ids):
1865 # Macros are sometime used to masquerade other types.
1866 if id in dict.functions:
1867 continue
1868 if id in dict.variables:
1869 continue
1870 if id in dict.typedefs:
1871 continue
1872 if id in dict.structs:
1873 continue
1874 if id in dict.enums:
1875 continue
1876 output.write(" <exports symbol='%s' type='macro'/>\n" % (id))
1877 ids = list(dict.enums.keys())
1878 ids.sort()
1879 for id in uniq(ids):
1880 output.write(" <exports symbol='%s' type='enum'/>\n" % (id))
1881 ids = list(dict.typedefs.keys())
1882 ids.sort()
1883 for id in uniq(ids):
1884 output.write(" <exports symbol='%s' type='typedef'/>\n" % (id))
1885 ids = list(dict.structs.keys())
1886 ids.sort()
1887 for id in uniq(ids):
1888 output.write(" <exports symbol='%s' type='struct'/>\n" % (id))
1889 ids = list(dict.variables.keys())
1890 ids.sort()
1891 for id in uniq(ids):
1892 output.write(" <exports symbol='%s' type='variable'/>\n" % (id))
1893 ids = list(dict.functions.keys())
1894 ids.sort()
1895 for id in uniq(ids):
1896 output.write(" <exports symbol='%s' type='function'/>\n" % (id))
1897 output.write(" </file>\n")
1898
1899 def serialize_xrefs_files(self, output):
1900 headers = list(self.headers.keys())
1901 headers.sort()
1902 for file in headers:
1903 module = self.modulename_file(file)
1904 output.write(" <file name='%s'>\n" % (module))
1905 dict = self.headers[file]
1906 ids = uniq(list(dict.functions.keys()) + list(dict.variables.keys()) + \
1907 list(dict.macros.keys()) + list(dict.typedefs.keys()) + \
1908 list(dict.structs.keys()) + list(dict.enums.keys()))
1909 ids.sort()
1910 for id in ids:
1911 output.write(" <ref name='%s'/>\n" % (id))
1912 output.write(" </file>\n")
1913 pass
1914
1915 def serialize_xrefs_functions(self, output):
1916 funcs = {}
1917 for name in list(self.idx.functions.keys()):
1918 id = self.idx.functions[name]
1919 try:
1920 (ret, params, desc) = id.info
1921 for param in params:
1922 if param[0] == 'void':
1923 continue
1924 if param[0] in funcs:
1925 funcs[param[0]].append(name)
1926 else:
1927 funcs[param[0]] = [name]
1928 except:
1929 pass
1930 typ = list(funcs.keys())
1931 typ.sort()
1932 for type in typ:
1933 if type == '' or type == 'void' or type == "int" or \
1934 type == "char *" or type == "const char *" :
1935 continue
1936 output.write(" <type name='%s'>\n" % (type))
1937 ids = funcs[type]
1938 ids.sort()
1939 pid = '' # not sure why we have dups, but get rid of them!
1940 for id in ids:
1941 if id != pid:
1942 output.write(" <ref name='%s'/>\n" % (id))
1943 pid = id
1944 output.write(" </type>\n")
1945
1946 def serialize_xrefs_constructors(self, output):
1947 funcs = {}
1948 for name in list(self.idx.functions.keys()):
1949 id = self.idx.functions[name]
1950 try:
1951 (ret, params, desc) = id.info
1952 if ret[0] == "void":
1953 continue
1954 if ret[0] in funcs:
1955 funcs[ret[0]].append(name)
1956 else:
1957 funcs[ret[0]] = [name]
1958 except:
1959 pass
1960 typ = list(funcs.keys())
1961 typ.sort()
1962 for type in typ:
1963 if type == '' or type == 'void' or type == "int" or \
1964 type == "char *" or type == "const char *" :
1965 continue
1966 output.write(" <type name='%s'>\n" % (type))
1967 ids = funcs[type]
1968 ids.sort()
1969 for id in ids:
1970 output.write(" <ref name='%s'/>\n" % (id))
1971 output.write(" </type>\n")
1972
1973 def serialize_xrefs_alpha(self, output):
1974 letter = None
1975 ids = list(self.idx.identifiers.keys())
1976 ids.sort()
1977 for id in ids:
1978 if id[0] != letter:
1979 if letter != None:
1980 output.write(" </letter>\n")
1981 letter = id[0]
1982 output.write(" <letter name='%s'>\n" % (letter))
1983 output.write(" <ref name='%s'/>\n" % (id))
1984 if letter != None:
1985 output.write(" </letter>\n")
1986
1987 def serialize_xrefs_references(self, output):
1988 typ = list(self.idx.identifiers.keys())
1989 typ.sort()
1990 for id in typ:
1991 idf = self.idx.identifiers[id]
1992 module = idf.header
1993 output.write(" <reference name='%s' href='%s'/>\n" % (id,
1994 'html/' + self.basename + '-' +
1995 self.modulename_file(module) + '.html#' +
1996 id))
1997
1998 def serialize_xrefs_index(self, output):
1999 index = self.xref
2000 typ = list(index.keys())
2001 typ.sort()
2002 letter = None
2003 count = 0
2004 chunk = 0
2005 chunks = []
2006 for id in typ:
2007 if len(index[id]) > 30:
2008 continue
2009 if id[0] != letter:
2010 if letter == None or count > 200:
2011 if letter != None:
2012 output.write(" </letter>\n")
2013 output.write(" </chunk>\n")
2014 count = 0
2015 chunks.append(["chunk%s" % (chunk -1), first_letter, letter])
2016 output.write(" <chunk name='chunk%s'>\n" % (chunk))
2017 first_letter = id[0]
2018 chunk = chunk + 1
2019 elif letter != None:
2020 output.write(" </letter>\n")
2021 letter = id[0]
2022 output.write(" <letter name='%s'>\n" % (letter))
2023 output.write(" <word name='%s'>\n" % (id))
2024 tokens = index[id];
2025 tokens.sort()
2026 tok = None
2027 for token in tokens:
2028 if tok == token:
2029 continue
2030 tok = token
2031 output.write(" <ref name='%s'/>\n" % (token))
2032 count = count + 1
2033 output.write(" </word>\n")
2034 if letter != None:
2035 output.write(" </letter>\n")
2036 output.write(" </chunk>\n")
2037 if count != 0:
2038 chunks.append(["chunk%s" % (chunk -1), first_letter, letter])
2039 output.write(" <chunks>\n")
2040 for ch in chunks:
2041 output.write(" <chunk name='%s' start='%s' end='%s'/>\n" % (
2042 ch[0], ch[1], ch[2]))
2043 output.write(" </chunks>\n")
2044
2045 def serialize_xrefs(self, output):
2046 output.write(" <references>\n")
2047 self.serialize_xrefs_references(output)
2048 output.write(" </references>\n")
2049 output.write(" <alpha>\n")
2050 self.serialize_xrefs_alpha(output)
2051 output.write(" </alpha>\n")
2052 output.write(" <constructors>\n")
2053 self.serialize_xrefs_constructors(output)
2054 output.write(" </constructors>\n")
2055 output.write(" <functions>\n")
2056 self.serialize_xrefs_functions(output)
2057 output.write(" </functions>\n")
2058 output.write(" <files>\n")
2059 self.serialize_xrefs_files(output)
2060 output.write(" </files>\n")
2061 output.write(" <index>\n")
2062 self.serialize_xrefs_index(output)
2063 output.write(" </index>\n")
2064
2065 def serialize(self):
2066 filename = "%s-api.xml" % self.name
2067 print("Saving XML description %s" % (filename))
2068 output = open(filename, "w")
2069 output.write('<?xml version="1.0" encoding="ISO-8859-1"?>\n')
2070 output.write("<api name='%s'>\n" % self.name)
2071 output.write(" <files>\n")
2072 headers = list(self.headers.keys())
2073 headers.sort()
2074 for file in headers:
2075 self.serialize_exports(output, file)
2076 output.write(" </files>\n")
2077 output.write(" <symbols>\n")
2078 macros = list(self.idx.macros.keys())
2079 macros.sort()
2080 for macro in macros:
2081 self.serialize_macro(output, macro)
2082 enums = list(self.idx.enums.keys())
2083 enums.sort()
2084 for enum in enums:
2085 self.serialize_enum(output, enum)
2086 typedefs = list(self.idx.typedefs.keys())
2087 typedefs.sort()
2088 for typedef in typedefs:
2089 self.serialize_typedef(output, typedef)
2090 variables = list(self.idx.variables.keys())
2091 variables.sort()
2092 for variable in variables:
2093 self.serialize_variable(output, variable)
2094 functions = list(self.idx.functions.keys())
2095 functions.sort()
2096 for function in functions:
2097 self.serialize_function(output, function)
2098 output.write(" </symbols>\n")
2099 output.write("</api>\n")
2100 output.close()
2101
2102 filename = "%s-refs.xml" % self.name
2103 print("Saving XML Cross References %s" % (filename))
2104 output = open(filename, "w")
2105 output.write('<?xml version="1.0" encoding="ISO-8859-1"?>\n')
2106 output.write("<apirefs name='%s'>\n" % self.name)
2107 self.serialize_xrefs(output)
2108 output.write("</apirefs>\n")
2109 output.close()
2110
2111
2112def rebuild():
2113 builder = None
2114 if glob.glob("parser.c") != [] :
2115 print("Rebuilding API description for libxml2")
2116 builder = docBuilder("libxml2", [".", "."],
2117 ["xmlwin32version.h", "tst.c"])
2118 elif glob.glob("../parser.c") != [] :
2119 print("Rebuilding API description for libxml2")
2120 builder = docBuilder("libxml2", ["..", "../include/libxml"],
2121 ["xmlwin32version.h", "tst.c"])
2122 elif glob.glob("../libxslt/transform.c") != [] :
2123 print("Rebuilding API description for libxslt")
2124 builder = docBuilder("libxslt", ["../libxslt"],
2125 ["win32config.h", "libxslt.h", "tst.c"])
2126 else:
2127 print("rebuild() failed, unable to guess the module")
2128 return None
2129 builder.scan()
2130 builder.analyze()
2131 builder.serialize()
2132 if glob.glob("../libexslt/exslt.c") != [] :
2133 extra = docBuilder("libexslt", ["../libexslt"], ["libexslt.h"])
2134 extra.scan()
2135 extra.analyze()
2136 extra.serialize()
2137 return builder
2138
2139#
2140# for debugging the parser
2141#
2142def parse(filename):
2143 parser = CParser(filename)
2144 idx = parser.parse()
2145 return idx
2146
2147if __name__ == "__main__":
2148 if len(sys.argv) > 1:
2149 debug = 1
2150 parse(sys.argv[1])
2151 else:
2152 rebuild()
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette