htmlhelp-qthelp.py@ 87630

Last change on this file since 87630 was 87630, checked in by vboxsync, 4 years ago
Doc: Some small comment correction
Property svn:eol-style set to `LF` Property svn:executable set to ``* Property svn:keywords set to `Author Date Id Revision`
File size: 9.0 KB

Line
1	#!/usr/bin/python
2
3	# $Id: htmlhelp-qthelp.py 87630 2021-02-05 18:00:24Z vboxsync $
4	## @file
5	# A python 2.x script to create a .qhp file outof a given htmlhelp
6	# folder. Lots of things about the said folder is assumed. Please
7	# read the code and inlined comments.
8
9	import sys, getopt
10	import os.path
11	import re
12	import codecs
13	import logging
14	from HTMLParser import HTMLParser
15
16	__copyright__ = \
17	"""
18	Copyright (C) 2006-2020 Oracle Corporation
19
20	This file is part of VirtualBox Open Source Edition (OSE), as
21	available from http://www.virtualbox.org. This file is free software;
22	you can redistribute it and/or modify it under the terms of the GNU
23	General Public License (GPL) as published by the Free Software
24	Foundation, in version 2 as it comes in the "COPYING" file of the
25	VirtualBox OSE distribution. VirtualBox OSE is distributed in the
26	hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
27	"""
28
29
30	# number of opened and not yet closed section tags of toc section
31	open_section_tags = 0
32
33	html_files = []
34
35	class html_parser(HTMLParser):
36	def __init__(self):
37	HTMLParser.__init__(self)
38	self.a_tag=[]
39
40	def handle_starttag(self, tag, attributes):
41	if tag != 'div' and tag != 'a':
42	return
43	if tag == 'a':
44	for a in attributes:
45	if a[0] == 'name':
46	self.a_tag.append(a[1])
47
48	# use html_parser stuff to collect <a name tags
49	def create_keywords_section(folder):
50	keywords_section_lines = ['<keywords>']
51	for html_file_name in html_files:
52	full_html_path = os.path.join(folder, html_file_name)
53	file_content = open(full_html_path, 'r').read()
54	parser = html_parser()
55	parser.feed(file_content)
56	for k in parser.a_tag:
57	line = '<keyword name="' + k + '" id="' + k + '" ref="' + html_file_name + '#' + k + '"/>'
58	keywords_section_lines.append(line);
59	keywords_section_lines.append('</keywords>')
60	return keywords_section_lines
61
62	# find the png files under /images folder and create a part of the
63	# qhelp project file with <file> tags
64	def create_image_list(folder):
65	image_folder_name = 'images'
66	image_files_list = []
67	# Look for 'images' sub folder
68	subdirs = [x[0] for x in os.walk(folder)]
69	full_folder_path = os.path.join(folder, image_folder_name)
70	if full_folder_path not in subdirs:
71	logging.error('Image subfolder "%s" is not found under "%s".', image_folder_name, folder)
72	return image_files_list;
73	png_files = []
74	for f in os.listdir(full_folder_path):
75	png_files.append(image_folder_name + '/' + f)
76	image_files_list.append('<file>images/' + f + '</file>')
77	return image_files_list
78
79	# open htmlhelp.hhp files and read the list of html files from there
80	def create_html_list(folder):
81	global html_files
82	file_name = 'htmlhelp.hhp'
83	html_file_lines = []
84	if not file_name in os.listdir(folder):
85	logging.error('Could not find the file "%s" in "%s"', file_name, folder)
86	return html_file_lines
87	full_path = os.path.join(folder, 'htmlhelp.hhp')
88	file = open(full_path, "r")
89	lines = file.readlines()
90	file.close()
91	# first search for the [FILES] marker then collect .html lines
92	marker_found = 0
93	for line in lines:
94	if '[FILES]' in line:
95	marker_found = 1
96	continue
97	if marker_found == 0:
98	continue
99	if '.html' in line:
100	html_file_lines.append('<file>' + line.strip('\n') + '</file>')
101	html_files.append(line.strip('\n'))
102	return html_file_lines
103
104
105	def create_files_section(folder):
106	files_section_lines = ['<files>']
107	files_section_lines += create_image_list(folder)
108	files_section_lines += create_html_list(folder)
109	files_section_lines.append('</files>')
110	return files_section_lines
111
112	def parse_param_tag(line):
113	label = 'value="'
114	start = line.find(label);
115	if start == -1:
116	return ''
117	start += len(label)
118	end = line.find('"', start)
119	if end == -1:
120	return '';
121	return line[start:end]
122
123	# look at next two lines. they are supposed to look like the following
124	# <param name="Name" value="Oracle VM VirtualBox">
125	# <param name="Local" value="index.html">
126	# parse out value fields and return
127	# title="Oracle VM VirtualBox" ref="index.html
128	def parse_object_tag(lines, index):
129	result=''
130	if index + 2 > len(lines):
131	logging.warning('Not enough tags after this one "%s"',lines[index])
132	return result
133	if not re.match(r'^\s*<param', lines[index + 1], re.IGNORECASE) or \
134	not re.match(r'^\s*<param', lines[index + 2], re.IGNORECASE):
135	logging.warning('Skipping the line "%s" since next two tags are supposed to be param tags', lines[index])
136	return result
137
138	title = parse_param_tag(lines[index + 1])
139	ref = parse_param_tag(lines[index + 2])
140	global open_section_tags
141	if title and ref:
142	open_section_tags += 1
143	result = '<section title="' + title + '" ref="' + ref + '">'
144	else:
145	logging.warning('Title or ref part is empty for the tag "%s"', lines[index])
146	return result
147
148	# parse any string other than staring with <OBJECT
149	# decide if <session tag should be closed
150	def parse_non_object_tag(lines, index):
151	if index + 1 > len(lines):
152	return ''
153	global open_section_tags
154	if open_section_tags <= 0:
155	return ''
156	# replace </OBJECT with </section only if the next tag is not <UL
157	if re.match(r'^\s*</OBJECT', lines[index], re.IGNORECASE):
158	if not re.match(r'^\s*<UL', lines[index + 1], re.IGNORECASE):
159	open_section_tags -= 1
160	return '</section>'
161	elif re.match(r'^\s*</UL', lines[index], re.IGNORECASE):
162	open_section_tags -= 1
163	return '</section>'
164	return ''
165
166	def parse_line(lines, index):
167	result=''
168
169	# if the line starts with <OBJECT
170	if re.match(r'^\s*<OBJECT', lines[index], re.IGNORECASE):
171	result = parse_object_tag(lines, index)
172	else:
173	result = parse_non_object_tag(lines, index)
174	return result
175
176	# parse toc.hhc file. assuming all the relevant information
177	# is stored in tags and attributes. data "whatever is outside of
178	# <... > pairs is filtered out. we also assume < ..> are not nested
179	# and each < matches to a >
180	def create_toc(folder):
181	toc_file = 'toc.hhc'
182	content = [x[2] for x in os.walk(folder)]
183	if toc_file not in content[0]:
184	logging.error('Could not find toc file "%s" under "%s"', toc_file, folder)
185	return
186	full_path = os.path.join(folder, toc_file)
187	file = codecs.open(full_path, encoding='iso-8859-1')
188	content = file.read()
189	file.close()
190	# convert the file string into a list of tags there by eliminating whatever
191	# char reside outside of tags.
192	char_pos = 0
193	tag_list = []
194	while char_pos < len(content):
195	start = content.find('<', char_pos)
196	if start == -1:
197	break
198	end = content.find('>', start)
199	if end == -1 or end >= len(content) - 1:
200	break
201	char_pos = end
202	tag_list.append(content[start:end +1])
203
204	# # insert new line chars. to make sure each line includes at most one tag
205	# content = re.sub(r'>.*?<', r'>\n<', content)
206	# lines = content.split('\n')
207	toc_string_list = ['<toc>']
208	index = 0
209	for tag in tag_list:
210	str = parse_line(tag_list, index)
211	if str:
212	toc_string_list.append(str)
213	index += 1
214	toc_string_list.append('</toc>')
215	toc_string = '\n'.join(toc_string_list)
216
217	return toc_string_list
218
219	def usage(arg):
220	print 'htmlhelp-qthelp.py -d <helphtmlfolder> -o <outputfilename>'
221	sys.exit(arg)
222
223	def main(argv):
224	helphtmlfolder = ''
225	output_filename = ''
226	try:
227	opts, args = getopt.getopt(sys.argv[1:],"hd:o:")
228	except getopt.GetoptError as err:
229	print err
230	usage(2)
231	for opt, arg in opts:
232	if opt == '-h':
233	usage(0)
234	elif opt in ("-d"):
235	helphtmlfolder = arg
236	elif opt in ("-o"):
237	output_filename = arg
238
239	# check supplied helphtml folder argument
240	if not helphtmlfolder:
241	logging.error('No helphtml folder is provided. Exiting')
242	usage(2)
243	if not os.path.exists(helphtmlfolder):
244	logging.error('folder "%s" does not exist. Exiting', helphtmlfolder)
245	usage(2)
246	helphtmlfolder = os.path.normpath(helphtmlfolder)
247
248	# check supplied output file name
249	if not output_filename:
250	logging.error('No filename for output is given. Exiting')
251	usage(2)
252
253	out_xml_lines = ['<?xml version="1.0" encoding="UTF-8"?>', \
254	'<QtHelpProject version="1.0">' , \
255	'<namespace>org.virtualbox</namespace>', \
256	'<virtualFolder>doc</virtualFolder>', \
257	'<filterSection>']
258	out_xml_lines += create_toc(helphtmlfolder) + create_files_section(helphtmlfolder)
259	out_xml_lines += create_keywords_section(helphtmlfolder)
260	out_xml_lines += ['</filterSection>', '</QtHelpProject>']
261
262	out_file = open(output_filename, 'w')
263	out_file.write('\n'.join(out_xml_lines).encode('utf8'))
264	out_file.close()
265
266	if __name__ == '__main__':
267	main(sys.argv[1:])

Note: See TracBrowser for help on using the repository browser.

source: vbox/trunk/doc/manual/htmlhelp-qthelp.py@ 87630

Download in other formats: