VirtualBox

source: vbox/trunk/doc/manual/htmlhelp-qthelp.py@ 87630

Last change on this file since 87630 was 87630, checked in by vboxsync, 4 years ago

Doc: Some small comment correction

  • Property svn:eol-style set to LF
  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 9.0 KB
Line 
1#!/usr/bin/python
2
3# $Id: htmlhelp-qthelp.py 87630 2021-02-05 18:00:24Z vboxsync $
4## @file
5# A python 2.x script to create a .qhp file outof a given htmlhelp
6# folder. Lots of things about the said folder is assumed. Please
7# read the code and inlined comments.
8
9import sys, getopt
10import os.path
11import re
12import codecs
13import logging
14from HTMLParser import HTMLParser
15
16__copyright__ = \
17"""
18Copyright (C) 2006-2020 Oracle Corporation
19
20This file is part of VirtualBox Open Source Edition (OSE), as
21available from http://www.virtualbox.org. This file is free software;
22you can redistribute it and/or modify it under the terms of the GNU
23General Public License (GPL) as published by the Free Software
24Foundation, in version 2 as it comes in the "COPYING" file of the
25VirtualBox OSE distribution. VirtualBox OSE is distributed in the
26hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
27"""
28
29
30# number of opened and not yet closed section tags of toc section
31open_section_tags = 0
32
33html_files = []
34
35class html_parser(HTMLParser):
36 def __init__(self):
37 HTMLParser.__init__(self)
38 self.a_tag=[]
39
40 def handle_starttag(self, tag, attributes):
41 if tag != 'div' and tag != 'a':
42 return
43 if tag == 'a':
44 for a in attributes:
45 if a[0] == 'name':
46 self.a_tag.append(a[1])
47
48# use html_parser stuff to collect <a name tags
49def create_keywords_section(folder):
50 keywords_section_lines = ['<keywords>']
51 for html_file_name in html_files:
52 full_html_path = os.path.join(folder, html_file_name)
53 file_content = open(full_html_path, 'r').read()
54 parser = html_parser()
55 parser.feed(file_content)
56 for k in parser.a_tag:
57 line = '<keyword name="' + k + '" id="' + k + '" ref="' + html_file_name + '#' + k + '"/>'
58 keywords_section_lines.append(line);
59 keywords_section_lines.append('</keywords>')
60 return keywords_section_lines
61
62# find the png files under /images folder and create a part of the
63# qhelp project file with <file> tags
64def create_image_list(folder):
65 image_folder_name = 'images'
66 image_files_list = []
67 # Look for 'images' sub folder
68 subdirs = [x[0] for x in os.walk(folder)]
69 full_folder_path = os.path.join(folder, image_folder_name)
70 if full_folder_path not in subdirs:
71 logging.error('Image subfolder "%s" is not found under "%s".', image_folder_name, folder)
72 return image_files_list;
73 png_files = []
74 for f in os.listdir(full_folder_path):
75 png_files.append(image_folder_name + '/' + f)
76 image_files_list.append('<file>images/' + f + '</file>')
77 return image_files_list
78
79# open htmlhelp.hhp files and read the list of html files from there
80def create_html_list(folder):
81 global html_files
82 file_name = 'htmlhelp.hhp'
83 html_file_lines = []
84 if not file_name in os.listdir(folder):
85 logging.error('Could not find the file "%s" in "%s"', file_name, folder)
86 return html_file_lines
87 full_path = os.path.join(folder, 'htmlhelp.hhp')
88 file = open(full_path, "r")
89 lines = file.readlines()
90 file.close()
91 # first search for the [FILES] marker then collect .html lines
92 marker_found = 0
93 for line in lines:
94 if '[FILES]' in line:
95 marker_found = 1
96 continue
97 if marker_found == 0:
98 continue
99 if '.html' in line:
100 html_file_lines.append('<file>' + line.strip('\n') + '</file>')
101 html_files.append(line.strip('\n'))
102 return html_file_lines
103
104
105def create_files_section(folder):
106 files_section_lines = ['<files>']
107 files_section_lines += create_image_list(folder)
108 files_section_lines += create_html_list(folder)
109 files_section_lines.append('</files>')
110 return files_section_lines
111
112def parse_param_tag(line):
113 label = 'value="'
114 start = line.find(label);
115 if start == -1:
116 return ''
117 start += len(label)
118 end = line.find('"', start)
119 if end == -1:
120 return '';
121 return line[start:end]
122
123# look at next two lines. they are supposed to look like the following
124# <param name="Name" value="Oracle VM VirtualBox">
125# <param name="Local" value="index.html">
126# parse out value fields and return
127# title="Oracle VM VirtualBox" ref="index.html
128def parse_object_tag(lines, index):
129 result=''
130 if index + 2 > len(lines):
131 logging.warning('Not enough tags after this one "%s"',lines[index])
132 return result
133 if not re.match(r'^\s*<param', lines[index + 1], re.IGNORECASE) or \
134 not re.match(r'^\s*<param', lines[index + 2], re.IGNORECASE):
135 logging.warning('Skipping the line "%s" since next two tags are supposed to be param tags', lines[index])
136 return result
137
138 title = parse_param_tag(lines[index + 1])
139 ref = parse_param_tag(lines[index + 2])
140 global open_section_tags
141 if title and ref:
142 open_section_tags += 1
143 result = '<section title="' + title + '" ref="' + ref + '">'
144 else:
145 logging.warning('Title or ref part is empty for the tag "%s"', lines[index])
146 return result
147
148# parse any string other than staring with <OBJECT
149# decide if <session tag should be closed
150def parse_non_object_tag(lines, index):
151 if index + 1 > len(lines):
152 return ''
153 global open_section_tags
154 if open_section_tags <= 0:
155 return ''
156 # replace </OBJECT with </section only if the next tag is not <UL
157 if re.match(r'^\s*</OBJECT', lines[index], re.IGNORECASE):
158 if not re.match(r'^\s*<UL', lines[index + 1], re.IGNORECASE):
159 open_section_tags -= 1
160 return '</section>'
161 elif re.match(r'^\s*</UL', lines[index], re.IGNORECASE):
162 open_section_tags -= 1
163 return '</section>'
164 return ''
165
166def parse_line(lines, index):
167 result=''
168
169 # if the line starts with <OBJECT
170 if re.match(r'^\s*<OBJECT', lines[index], re.IGNORECASE):
171 result = parse_object_tag(lines, index)
172 else:
173 result = parse_non_object_tag(lines, index)
174 return result
175
176# parse toc.hhc file. assuming all the relevant information
177# is stored in tags and attributes. data "whatever is outside of
178# <... > pairs is filtered out. we also assume < ..> are not nested
179# and each < matches to a >
180def create_toc(folder):
181 toc_file = 'toc.hhc'
182 content = [x[2] for x in os.walk(folder)]
183 if toc_file not in content[0]:
184 logging.error('Could not find toc file "%s" under "%s"', toc_file, folder)
185 return
186 full_path = os.path.join(folder, toc_file)
187 file = codecs.open(full_path, encoding='iso-8859-1')
188 content = file.read()
189 file.close()
190 # convert the file string into a list of tags there by eliminating whatever
191 # char reside outside of tags.
192 char_pos = 0
193 tag_list = []
194 while char_pos < len(content):
195 start = content.find('<', char_pos)
196 if start == -1:
197 break
198 end = content.find('>', start)
199 if end == -1 or end >= len(content) - 1:
200 break
201 char_pos = end
202 tag_list.append(content[start:end +1])
203
204 # # insert new line chars. to make sure each line includes at most one tag
205 # content = re.sub(r'>.*?<', r'>\n<', content)
206 # lines = content.split('\n')
207 toc_string_list = ['<toc>']
208 index = 0
209 for tag in tag_list:
210 str = parse_line(tag_list, index)
211 if str:
212 toc_string_list.append(str)
213 index += 1
214 toc_string_list.append('</toc>')
215 toc_string = '\n'.join(toc_string_list)
216
217 return toc_string_list
218
219def usage(arg):
220 print 'htmlhelp-qthelp.py -d <helphtmlfolder> -o <outputfilename>'
221 sys.exit(arg)
222
223def main(argv):
224 helphtmlfolder = ''
225 output_filename = ''
226 try:
227 opts, args = getopt.getopt(sys.argv[1:],"hd:o:")
228 except getopt.GetoptError as err:
229 print err
230 usage(2)
231 for opt, arg in opts:
232 if opt == '-h':
233 usage(0)
234 elif opt in ("-d"):
235 helphtmlfolder = arg
236 elif opt in ("-o"):
237 output_filename = arg
238
239 # check supplied helphtml folder argument
240 if not helphtmlfolder:
241 logging.error('No helphtml folder is provided. Exiting')
242 usage(2)
243 if not os.path.exists(helphtmlfolder):
244 logging.error('folder "%s" does not exist. Exiting', helphtmlfolder)
245 usage(2)
246 helphtmlfolder = os.path.normpath(helphtmlfolder)
247
248 # check supplied output file name
249 if not output_filename:
250 logging.error('No filename for output is given. Exiting')
251 usage(2)
252
253 out_xml_lines = ['<?xml version="1.0" encoding="UTF-8"?>', \
254 '<QtHelpProject version="1.0">' , \
255 '<namespace>org.virtualbox</namespace>', \
256 '<virtualFolder>doc</virtualFolder>', \
257 '<filterSection>']
258 out_xml_lines += create_toc(helphtmlfolder) + create_files_section(helphtmlfolder)
259 out_xml_lines += create_keywords_section(helphtmlfolder)
260 out_xml_lines += ['</filterSection>', '</QtHelpProject>']
261
262 out_file = open(output_filename, 'w')
263 out_file.write('\n'.join(out_xml_lines).encode('utf8'))
264 out_file.close()
265
266if __name__ == '__main__':
267 main(sys.argv[1:])
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette