VirtualBox

source: vbox/trunk/doc/manual/htmlhelp-qthelp.py@ 88979

Last change on this file since 88979 was 87727, checked in by vboxsync, 4 years ago

doc/manual: bugref:9831. Make htmlhelp-qthelp.py compatible with older Python 2.

  • Property svn:eol-style set to LF
  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 9.2 KB
Line 
1#!/usr/bin/python3
2
3# $Id: htmlhelp-qthelp.py 87727 2021-02-11 21:47:15Z vboxsync $
4## @file
5# A python script to create a .qhp file out of a given htmlhelp
6# folder. Lots of things about the said folder is assumed. Please
7# see the code and inlined comments.
8
9import sys, getopt
10import os.path
11import re
12import codecs
13import logging
14
15if sys.version_info >= (3, 0):
16 from html.parser import HTMLParser
17else:
18 from HTMLParser import HTMLParser
19
20
21__copyright__ = \
22"""
23Copyright (C) 2006-2020 Oracle Corporation
24
25This file is part of VirtualBox Open Source Edition (OSE), as
26available from http://www.virtualbox.org. This file is free software;
27you can redistribute it and/or modify it under the terms of the GNU
28General Public License (GPL) as published by the Free Software
29Foundation, in version 2 as it comes in the "COPYING" file of the
30VirtualBox OSE distribution. VirtualBox OSE is distributed in the
31hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
32"""
33
34# number of opened and not yet closed section tags of toc section
35open_section_tags = 0
36
37html_files = []
38
39# use html_parser stuff to collect <a name tags
40def create_keywords_section(folder):
41 keywords_section_lines = ['<keywords>']
42 for html_file_name in html_files:
43 full_html_path = os.path.join(folder, html_file_name)
44 file_content = codecs.open(full_html_path, encoding='iso-8859-1').read()
45
46 class html_parser(HTMLParser):
47 def __init__(self):
48 HTMLParser.__init__(self)
49 self.a_tag=[]
50 def handle_starttag(self, tag, attributes):
51 if tag != 'div' and tag != 'a':
52 return
53 if tag == 'a':
54 for a in attributes:
55 if a[0] == 'name':
56 self.a_tag.append(a[1])
57
58 parser = html_parser()
59 parser.feed(file_content)
60 for k in parser.a_tag:
61 line = '<keyword name="' + k + '" id="' + k + '" ref="' + html_file_name + '#' + k + '"/>'
62 keywords_section_lines.append(line);
63 keywords_section_lines.append('</keywords>')
64 return keywords_section_lines
65
66# find the png files under /images folder and create a part of the
67# qhelp project file with <file> tags
68def create_image_list(folder):
69 image_folder_name = 'images'
70 image_files_list = []
71 # Look for 'images' sub folder
72 subdirs = [x[0] for x in os.walk(folder)]
73 full_folder_path = os.path.join(folder, image_folder_name)
74 if full_folder_path not in subdirs:
75 logging.error('Image subfolder "%s" is not found under "%s".', image_folder_name, folder)
76 return image_files_list;
77 png_files = []
78 for f in os.listdir(full_folder_path):
79 png_files.append(image_folder_name + '/' + f)
80 image_files_list.append('<file>images/' + f + '</file>')
81 return image_files_list
82
83# open htmlhelp.hhp files and read the list of html files from there
84def create_html_list(folder):
85 global html_files
86 file_name = 'htmlhelp.hhp'
87 html_file_lines = []
88 if not file_name in os.listdir(folder):
89 logging.error('Could not find the file "%s" in "%s"', file_name, folder)
90 return html_file_lines
91 full_path = os.path.join(folder, 'htmlhelp.hhp')
92 file = codecs.open(full_path, encoding='iso-8859-1')
93
94 lines = file.readlines()
95 file.close()
96 # first search for the [FILES] marker then collect .html lines
97 marker_found = 0
98 for line in lines:
99 if '[FILES]' in line:
100 marker_found = 1
101 continue
102 if marker_found == 0:
103 continue
104 if '.html' in line:
105 html_file_lines.append('<file>' + line.strip('\n') + '</file>')
106 html_files.append(line.strip('\n'))
107 return html_file_lines
108
109
110def create_files_section(folder):
111 files_section_lines = ['<files>']
112 files_section_lines += create_image_list(folder)
113 files_section_lines += create_html_list(folder)
114 files_section_lines.append('</files>')
115 return files_section_lines
116
117def parse_param_tag(line):
118 label = 'value="'
119 start = line.find(label);
120 if start == -1:
121 return ''
122 start += len(label)
123 end = line.find('"', start)
124 if end == -1:
125 return '';
126 return line[start:end]
127
128# look at next two lines. they are supposed to look like the following
129# <param name="Name" value="Oracle VM VirtualBox">
130# <param name="Local" value="index.html">
131# parse out value fields and return
132# title="Oracle VM VirtualBox" ref="index.html
133def parse_object_tag(lines, index):
134 result=''
135 if index + 2 > len(lines):
136 logging.warning('Not enough tags after this one "%s"',lines[index])
137 return result
138 if not re.match(r'^\s*<param', lines[index + 1], re.IGNORECASE) or \
139 not re.match(r'^\s*<param', lines[index + 2], re.IGNORECASE):
140 logging.warning('Skipping the line "%s" since next two tags are supposed to be param tags', lines[index])
141 return result
142 title = parse_param_tag(lines[index + 1])
143 ref = parse_param_tag(lines[index + 2])
144 global open_section_tags
145 if title and ref:
146 open_section_tags += 1
147 result = '<section title="' + title + '" ref="' + ref + '">'
148 else:
149 logging.warning('Title or ref part is empty for the tag "%s"', lines[index])
150 return result
151
152# parse any string other than staring with <OBJECT
153# decide if <session tag should be closed
154def parse_non_object_tag(lines, index):
155 if index + 1 > len(lines):
156 return ''
157 global open_section_tags
158 if open_section_tags <= 0:
159 return ''
160 # replace </OBJECT with </section only if the next tag is not <UL
161 if re.match(r'^\s*</OBJECT', lines[index], re.IGNORECASE):
162 if not re.match(r'^\s*<UL', lines[index + 1], re.IGNORECASE):
163 open_section_tags -= 1
164 return '</section>'
165 elif re.match(r'^\s*</UL', lines[index], re.IGNORECASE):
166 open_section_tags -= 1
167 return '</section>'
168 return ''
169
170def parse_line(lines, index):
171 result=''
172
173 # if the line starts with <OBJECT
174 if re.match(r'^\s*<OBJECT', lines[index], re.IGNORECASE):
175 result = parse_object_tag(lines, index)
176 else:
177 result = parse_non_object_tag(lines, index)
178 return result
179
180# parse toc.hhc file. assuming all the relevant information
181# is stored in tags and attributes. whatever is outside of
182# <... > pairs is filtered out. we also assume < ..> are not nested
183# and each < matches to a >
184def create_toc(folder):
185 toc_file = 'toc.hhc'
186 content = [x[2] for x in os.walk(folder)]
187 if toc_file not in content[0]:
188 logging.error('Could not find toc file "%s" under "%s"', toc_file, folder)
189 return
190 full_path = os.path.join(folder, toc_file)
191 file = codecs.open(full_path, encoding='iso-8859-1')
192 content = file.read()
193 file.close()
194 # convert the file string into a list of tags there by eliminating whatever
195 # char reside outside of tags.
196 char_pos = 0
197 tag_list = []
198 while char_pos < len(content):
199 start = content.find('<', char_pos)
200 if start == -1:
201 break
202 end = content.find('>', start)
203 if end == -1 or end >= len(content) - 1:
204 break
205 char_pos = end
206 tag_list.append(content[start:end +1])
207
208 # # insert new line chars. to make sure each line includes at most one tag
209 # content = re.sub(r'>.*?<', r'>\n<', content)
210 # lines = content.split('\n')
211 toc_string_list = ['<toc>']
212 index = 0
213 for tag in tag_list:
214 str = parse_line(tag_list, index)
215 if str:
216 toc_string_list.append(str)
217 index += 1
218 toc_string_list.append('</toc>')
219 toc_string = '\n'.join(toc_string_list)
220
221 return toc_string_list
222
223def usage(arg):
224 print('htmlhelp-qthelp.py -d <helphtmlfolder> -o <outputfilename>')
225 sys.exit()
226
227def main(argv):
228 helphtmlfolder = ''
229 output_filename = ''
230 try:
231 opts, args = getopt.getopt(sys.argv[1:],"hd:o:")
232 except getopt.GetoptError as err:
233 print(err)
234 usage(2)
235 for opt, arg in opts:
236 if opt == '-h':
237 usage(0)
238 elif opt in ("-d"):
239 helphtmlfolder = arg
240 elif opt in ("-o"):
241 output_filename = arg
242
243 # check supplied helphtml folder argument
244 if not helphtmlfolder:
245 logging.error('No helphtml folder is provided. Exiting')
246 usage(2)
247 if not os.path.exists(helphtmlfolder):
248 logging.error('folder "%s" does not exist. Exiting', helphtmlfolder)
249 usage(2)
250 helphtmlfolder = os.path.normpath(helphtmlfolder)
251
252 # check supplied output file name
253 if not output_filename:
254 logging.error('No filename for output is given. Exiting')
255 usage(2)
256
257 out_xml_lines = ['<?xml version="1.0" encoding="UTF-8"?>', \
258 '<QtHelpProject version="1.0">' , \
259 '<namespace>org.virtualbox</namespace>', \
260 '<virtualFolder>doc</virtualFolder>', \
261 '<filterSection>']
262 out_xml_lines += create_toc(helphtmlfolder) + create_files_section(helphtmlfolder)
263 out_xml_lines += create_keywords_section(helphtmlfolder)
264 out_xml_lines += ['</filterSection>', '</QtHelpProject>']
265
266 out_file = open(output_filename, 'wb')
267 out_file.write('\n'.join(out_xml_lines).encode('utf8'))
268 out_file.close()
269
270if __name__ == '__main__':
271 main(sys.argv[1:])
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette