htmlhelp-qthelp.py@ 92052

Last change on this file since 92052 was 87727, checked in by vboxsync, 4 years ago
doc/manual: bugref:9831. Make htmlhelp-qthelp.py compatible with older Python 2.
Property svn:eol-style set to `LF` Property svn:executable set to ``* Property svn:keywords set to `Author Date Id Revision`
File size: 9.2 KB

Line
1	#!/usr/bin/python3
2
3	# $Id: htmlhelp-qthelp.py 87727 2021-02-11 21:47:15Z vboxsync $
4	## @file
5	# A python script to create a .qhp file out of a given htmlhelp
6	# folder. Lots of things about the said folder is assumed. Please
7	# see the code and inlined comments.
8
9	import sys, getopt
10	import os.path
11	import re
12	import codecs
13	import logging
14
15	if sys.version_info >= (3, 0):
16	from html.parser import HTMLParser
17	else:
18	from HTMLParser import HTMLParser
19
20
21	__copyright__ = \
22	"""
23	Copyright (C) 2006-2020 Oracle Corporation
24
25	This file is part of VirtualBox Open Source Edition (OSE), as
26	available from http://www.virtualbox.org. This file is free software;
27	you can redistribute it and/or modify it under the terms of the GNU
28	General Public License (GPL) as published by the Free Software
29	Foundation, in version 2 as it comes in the "COPYING" file of the
30	VirtualBox OSE distribution. VirtualBox OSE is distributed in the
31	hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
32	"""
33
34	# number of opened and not yet closed section tags of toc section
35	open_section_tags = 0
36
37	html_files = []
38
39	# use html_parser stuff to collect <a name tags
40	def create_keywords_section(folder):
41	keywords_section_lines = ['<keywords>']
42	for html_file_name in html_files:
43	full_html_path = os.path.join(folder, html_file_name)
44	file_content = codecs.open(full_html_path, encoding='iso-8859-1').read()
45
46	class html_parser(HTMLParser):
47	def __init__(self):
48	HTMLParser.__init__(self)
49	self.a_tag=[]
50	def handle_starttag(self, tag, attributes):
51	if tag != 'div' and tag != 'a':
52	return
53	if tag == 'a':
54	for a in attributes:
55	if a[0] == 'name':
56	self.a_tag.append(a[1])
57
58	parser = html_parser()
59	parser.feed(file_content)
60	for k in parser.a_tag:
61	line = '<keyword name="' + k + '" id="' + k + '" ref="' + html_file_name + '#' + k + '"/>'
62	keywords_section_lines.append(line);
63	keywords_section_lines.append('</keywords>')
64	return keywords_section_lines
65
66	# find the png files under /images folder and create a part of the
67	# qhelp project file with <file> tags
68	def create_image_list(folder):
69	image_folder_name = 'images'
70	image_files_list = []
71	# Look for 'images' sub folder
72	subdirs = [x[0] for x in os.walk(folder)]
73	full_folder_path = os.path.join(folder, image_folder_name)
74	if full_folder_path not in subdirs:
75	logging.error('Image subfolder "%s" is not found under "%s".', image_folder_name, folder)
76	return image_files_list;
77	png_files = []
78	for f in os.listdir(full_folder_path):
79	png_files.append(image_folder_name + '/' + f)
80	image_files_list.append('<file>images/' + f + '</file>')
81	return image_files_list
82
83	# open htmlhelp.hhp files and read the list of html files from there
84	def create_html_list(folder):
85	global html_files
86	file_name = 'htmlhelp.hhp'
87	html_file_lines = []
88	if not file_name in os.listdir(folder):
89	logging.error('Could not find the file "%s" in "%s"', file_name, folder)
90	return html_file_lines
91	full_path = os.path.join(folder, 'htmlhelp.hhp')
92	file = codecs.open(full_path, encoding='iso-8859-1')
93
94	lines = file.readlines()
95	file.close()
96	# first search for the [FILES] marker then collect .html lines
97	marker_found = 0
98	for line in lines:
99	if '[FILES]' in line:
100	marker_found = 1
101	continue
102	if marker_found == 0:
103	continue
104	if '.html' in line:
105	html_file_lines.append('<file>' + line.strip('\n') + '</file>')
106	html_files.append(line.strip('\n'))
107	return html_file_lines
108
109
110	def create_files_section(folder):
111	files_section_lines = ['<files>']
112	files_section_lines += create_image_list(folder)
113	files_section_lines += create_html_list(folder)
114	files_section_lines.append('</files>')
115	return files_section_lines
116
117	def parse_param_tag(line):
118	label = 'value="'
119	start = line.find(label);
120	if start == -1:
121	return ''
122	start += len(label)
123	end = line.find('"', start)
124	if end == -1:
125	return '';
126	return line[start:end]
127
128	# look at next two lines. they are supposed to look like the following
129	# <param name="Name" value="Oracle VM VirtualBox">
130	# <param name="Local" value="index.html">
131	# parse out value fields and return
132	# title="Oracle VM VirtualBox" ref="index.html
133	def parse_object_tag(lines, index):
134	result=''
135	if index + 2 > len(lines):
136	logging.warning('Not enough tags after this one "%s"',lines[index])
137	return result
138	if not re.match(r'^\s*<param', lines[index + 1], re.IGNORECASE) or \
139	not re.match(r'^\s*<param', lines[index + 2], re.IGNORECASE):
140	logging.warning('Skipping the line "%s" since next two tags are supposed to be param tags', lines[index])
141	return result
142	title = parse_param_tag(lines[index + 1])
143	ref = parse_param_tag(lines[index + 2])
144	global open_section_tags
145	if title and ref:
146	open_section_tags += 1
147	result = '<section title="' + title + '" ref="' + ref + '">'
148	else:
149	logging.warning('Title or ref part is empty for the tag "%s"', lines[index])
150	return result
151
152	# parse any string other than staring with <OBJECT
153	# decide if <session tag should be closed
154	def parse_non_object_tag(lines, index):
155	if index + 1 > len(lines):
156	return ''
157	global open_section_tags
158	if open_section_tags <= 0:
159	return ''
160	# replace </OBJECT with </section only if the next tag is not <UL
161	if re.match(r'^\s*</OBJECT', lines[index], re.IGNORECASE):
162	if not re.match(r'^\s*<UL', lines[index + 1], re.IGNORECASE):
163	open_section_tags -= 1
164	return '</section>'
165	elif re.match(r'^\s*</UL', lines[index], re.IGNORECASE):
166	open_section_tags -= 1
167	return '</section>'
168	return ''
169
170	def parse_line(lines, index):
171	result=''
172
173	# if the line starts with <OBJECT
174	if re.match(r'^\s*<OBJECT', lines[index], re.IGNORECASE):
175	result = parse_object_tag(lines, index)
176	else:
177	result = parse_non_object_tag(lines, index)
178	return result
179
180	# parse toc.hhc file. assuming all the relevant information
181	# is stored in tags and attributes. whatever is outside of
182	# <... > pairs is filtered out. we also assume < ..> are not nested
183	# and each < matches to a >
184	def create_toc(folder):
185	toc_file = 'toc.hhc'
186	content = [x[2] for x in os.walk(folder)]
187	if toc_file not in content[0]:
188	logging.error('Could not find toc file "%s" under "%s"', toc_file, folder)
189	return
190	full_path = os.path.join(folder, toc_file)
191	file = codecs.open(full_path, encoding='iso-8859-1')
192	content = file.read()
193	file.close()
194	# convert the file string into a list of tags there by eliminating whatever
195	# char reside outside of tags.
196	char_pos = 0
197	tag_list = []
198	while char_pos < len(content):
199	start = content.find('<', char_pos)
200	if start == -1:
201	break
202	end = content.find('>', start)
203	if end == -1 or end >= len(content) - 1:
204	break
205	char_pos = end
206	tag_list.append(content[start:end +1])
207
208	# # insert new line chars. to make sure each line includes at most one tag
209	# content = re.sub(r'>.*?<', r'>\n<', content)
210	# lines = content.split('\n')
211	toc_string_list = ['<toc>']
212	index = 0
213	for tag in tag_list:
214	str = parse_line(tag_list, index)
215	if str:
216	toc_string_list.append(str)
217	index += 1
218	toc_string_list.append('</toc>')
219	toc_string = '\n'.join(toc_string_list)
220
221	return toc_string_list
222
223	def usage(arg):
224	print('htmlhelp-qthelp.py -d <helphtmlfolder> -o <outputfilename>')
225	sys.exit()
226
227	def main(argv):
228	helphtmlfolder = ''
229	output_filename = ''
230	try:
231	opts, args = getopt.getopt(sys.argv[1:],"hd:o:")
232	except getopt.GetoptError as err:
233	print(err)
234	usage(2)
235	for opt, arg in opts:
236	if opt == '-h':
237	usage(0)
238	elif opt in ("-d"):
239	helphtmlfolder = arg
240	elif opt in ("-o"):
241	output_filename = arg
242
243	# check supplied helphtml folder argument
244	if not helphtmlfolder:
245	logging.error('No helphtml folder is provided. Exiting')
246	usage(2)
247	if not os.path.exists(helphtmlfolder):
248	logging.error('folder "%s" does not exist. Exiting', helphtmlfolder)
249	usage(2)
250	helphtmlfolder = os.path.normpath(helphtmlfolder)
251
252	# check supplied output file name
253	if not output_filename:
254	logging.error('No filename for output is given. Exiting')
255	usage(2)
256
257	out_xml_lines = ['<?xml version="1.0" encoding="UTF-8"?>', \
258	'<QtHelpProject version="1.0">' , \
259	'<namespace>org.virtualbox</namespace>', \
260	'<virtualFolder>doc</virtualFolder>', \
261	'<filterSection>']
262	out_xml_lines += create_toc(helphtmlfolder) + create_files_section(helphtmlfolder)
263	out_xml_lines += create_keywords_section(helphtmlfolder)
264	out_xml_lines += ['</filterSection>', '</QtHelpProject>']
265
266	out_file = open(output_filename, 'wb')
267	out_file.write('\n'.join(out_xml_lines).encode('utf8'))
268	out_file.close()
269
270	if __name__ == '__main__':
271	main(sys.argv[1:])

Note: See TracBrowser for help on using the repository browser.

source: vbox/trunk/doc/manual/htmlhelp-qthelp.py@ 92052

Download in other formats: