VirtualBox

source: vbox/trunk/src/VBox/Runtime/common/time/timezoneinfo-gen.py@ 98103

Last change on this file since 98103 was 98103, checked in by vboxsync, 2 years ago

Copyright year updates by scm.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 17.2 KB
Line 
1# -*- coding: utf-8 -*-
2# $Id: timezoneinfo-gen.py 98103 2023-01-17 14:15:46Z vboxsync $
3
4"""
5Generates timezone mapping info from public domain tz data and
6simple windows tables.
7"""
8from __future__ import print_function;
9
10__copyright__ = \
11"""
12Copyright (C) 2017-2023 Oracle and/or its affiliates.
13
14This file is part of VirtualBox base platform packages, as
15available from https://www.virtualbox.org.
16
17This program is free software; you can redistribute it and/or
18modify it under the terms of the GNU General Public License
19as published by the Free Software Foundation, in version 3 of the
20License.
21
22This program is distributed in the hope that it will be useful, but
23WITHOUT ANY WARRANTY; without even the implied warranty of
24MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
25General Public License for more details.
26
27You should have received a copy of the GNU General Public License
28along with this program; if not, see <https://www.gnu.org/licenses>.
29
30The contents of this file may alternatively be used under the terms
31of the Common Development and Distribution License Version 1.0
32(CDDL), a copy of it is provided in the "COPYING.CDDL" file included
33in the VirtualBox distribution, in which case the provisions of the
34CDDL are applicable instead of those of the GPL.
35
36You may elect to license modified versions of this file under the
37terms and conditions of either the GPL or the CDDL or both.
38
39SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0
40"""
41__version__ = "$Revision: 98103 $"
42
43import os;
44import sys;
45import xml.etree.ElementTree as ElementTree;
46
47
48class TzWinZoneEntry(object):
49 def __init__(self):
50 self.sWinName = None;
51 self.sWinTerritory = None;
52 self.fWinGolden = False;
53 self.idxWin = 0;
54
55class TzLinkEntry(TzWinZoneEntry):
56 def __init__(self, sLinkNm, sTarget):
57 TzWinZoneEntry.__init__(self);
58 self.sLinkNm = sLinkNm;
59 self.sTarget = sTarget;
60
61class TzZoneOffset(object):
62 def __init__(self, asFields):
63 self.sOffset = asFields[0]; # GMT offset expression
64 self.sRules = asFields[1] if len(asFields) > 1 and asFields[1] not in [ '-', '' ] else None;
65 self.sFormat = asFields[2] if len(asFields) > 2 and asFields[2] not in [ '-', '' ] else None;
66 self.sUntil = asFields[3] if len(asFields) > 3 and asFields[3] not in [ '-', '' ] else None;
67
68class TzZoneEntry(TzWinZoneEntry):
69 def __init__(self, sName):
70 TzWinZoneEntry.__init__(self);
71 self.sName = sName;
72 self.sTerritory = 'ZZ';
73 self.aOffsets = []; # type: list(TzZoneOffset)
74
75class TzZoneRule(object):
76 def __init__(self, sName, sFrom, sTo, sType, sIn, sOn, sAt, sSave, sLetter):
77 self.sName = sName;
78 self.sFrom = sFrom if sFrom not in [ '-', '' ] else None;
79 self.sTo = sTo if sFrom not in [ '-', '' ] else None;
80 self.sType = sType if sType not in [ '-', '' ] else None;
81 self.sIn = sIn if sIn not in [ '-', '' ] else None;
82 self.sAt = sAt if sAt not in [ '-', '' ] else None;
83 self.sSave = sSave if sSave not in [ '-', '' ] else None;
84 self.sLetter = sLetter if sLetter not in [ '-', '' ] else None;
85
86def info(sMsg):
87 """
88 Outputs an informational message to stderr.
89 """
90 print('info: ' + sMsg, file=sys.stderr);
91
92def warning(sMsg):
93 """
94 Outputs a warning (to stderr).
95 """
96 print('warning: ' + sMsg, file=sys.stderr);
97
98def error(sMsg):
99 """
100 Outputs a warning (to stderr).
101 """
102 print('error: ' + sMsg, file=sys.stderr);
103
104def readTzDataFile(sFile):
105 """ Reads the given data file into memory, stripping comments. """
106 oInFile = open(sFile, 'r');
107 asLines = oInFile.readlines();
108 oInFile.close();
109 iLine = 0;
110 while iLine < len(asLines):
111 offHash = asLines[iLine].find('#');
112 if offHash >= 0:
113 asLines[iLine] = asLines[iLine][:offHash].rstrip();
114 else:
115 asLines[iLine] = asLines[iLine].rstrip();
116 iLine += 1;
117 return asLines;
118
119#
120# tzdata structures.
121#
122g_dZones = {};
123g_dRules = {};
124g_dLinks = {};
125
126def readTzData(sTzDataDir):
127 """
128 Reads in the bits we want from tz data. Assumes 2017b edition.
129 """
130
131 #
132 # Parse the tzdata files.
133 #
134 for sFile in [ 'africa', 'antarctica', 'asia', 'australasia', 'europe', 'northamerica', 'southamerica',
135 'pacificnew', 'etcetera', 'backward', 'systemv', 'factory', #'backzone'
136 ]:
137 sIn = 'none';
138 asLines = readTzDataFile(os.path.join(sTzDataDir, sFile));
139 iLine = 0;
140 while iLine < len(asLines):
141 sLine = asLines[iLine];
142 sStrippedLine = sLine.strip(); # Fully stripped version.
143 if sStrippedLine:
144 asFields = sLine.split();
145 try:
146 if sLine.startswith('Zone'): # 'Rule' NAME FROM TO TYPE IN ON AT SAVE LETTER/S
147 sIn = 'Zone';
148 oZone = TzZoneEntry(asFields[1]);
149 if oZone.sName in g_dZones: raise Exception('duplicate: %s' % (oZone.sName,));
150 g_dZones[oZone.sName] = oZone;
151 oZone.aOffsets.append(TzZoneOffset(asFields[2:]));
152 elif sLine.startswith('Rule'): # 'Rule' NAME FROM TO TYPE IN ON AT SAVE LETTER/S
153 oRule = TzZoneRule(asFields[1], asFields[2], asFields[3], asFields[4], asFields[5],
154 asFields[6], asFields[7], asFields[8], asFields[9]);
155 if oRule.sName not in g_dRules:
156 g_dRules[oRule] = [oRule,];
157 else:
158 g_dRules[oRule].append(oRule);
159 elif sLine.startswith('Link'):
160 if len(asFields) != 3: raise Exception("malformed link: len(asFields) = %d" % (len(asFields)));
161 oLink = TzLinkEntry(asFields[2].strip(), asFields[1].strip());
162 if oLink.sLinkNm not in g_dLinks:
163 g_dLinks[oLink.sLinkNm] = oLink;
164 elif g_dLinks[oLink.sLinkNm].sTarget != oLink.sTarget:
165 warning('duplicate link for %s: new target %s, previous %s'
166 % (oLink.sLinkNm, oLink.sTarget, g_dLinks[oLink.sLinkNm].sTarget,));
167 elif sIn == 'Zone':
168 oZone.aOffsets.append(TzZoneEntry(asFields[3:]));
169 else:
170 raise Exception('what is this?')
171 except Exception as oXcpt:
172 error("line %u in %s: '%s'" % (iLine + 1, sFile, type(oXcpt) if not str(oXcpt) else str(oXcpt),));
173 info("'%s'" % (asLines[iLine],));
174 return 1;
175 iLine += 1;
176
177 #
178 # Process the country <-> zone mapping file.
179 #
180 asLines = readTzDataFile(os.path.join(sTzDataDir, 'zone.tab'));
181 iLine = 0;
182 while iLine < len(asLines):
183 sLine = asLines[iLine];
184 if sLine and sLine[0] != ' ':
185 asFields = sLine.split('\t');
186 try:
187 sTerritory = asFields[0];
188 if len(sTerritory) != 2: raise Exception('malformed country: %s' % (sTerritory,));
189 sZone = asFields[2];
190 oZone = g_dZones.get(sZone);
191 if oZone:
192 if oZone.sTerritory and oZone.sTerritory != 'ZZ':
193 raise Exception('zone %s already have country %s associated with it (setting %s)'
194 % (sZone, oZone.sTerritory, sTerritory));
195 oZone.sTerritory = sTerritory;
196 else:
197 oLink = g_dLinks.get(sZone);
198 if oLink:
199 pass; # ignore country<->link associations for now.
200 else: raise Exception('country zone not found: %s' % (sZone,));
201
202 except Exception as oXcpt:
203 error("line %u in %s: '%s'" % (iLine + 1, 'zone.tab', type(oXcpt) if not str(oXcpt) else str(oXcpt),));
204 info("'%s'" % (asLines[iLine],));
205 return 1;
206 iLine += 1;
207 return 0
208
209
210def readWindowsToTzMap(sMapXml):
211 """
212 Reads the 'common/supplemental/windowsZones.xml' file from http://cldr.unicode.org/.
213 """
214 oXmlDoc = ElementTree.parse(sMapXml);
215 oMap = oXmlDoc.getroot().find('windowsZones').find('mapTimezones');
216 # <mapZone other="Line Islands Standard Time" territory="001" type="Pacific/Kiritimati"/>
217 for oChild in oMap.findall('mapZone'):
218 sTerritory = oChild.attrib['territory'];
219 sWinZone = oChild.attrib['other'];
220 asUnixZones = oChild.attrib['type'].split();
221 for sZone in asUnixZones:
222 oZone = g_dZones.get(sZone);
223 if oZone:
224 if oZone.sWinName is None or (oZone.sWinTerritory == '001' and oZone.sWinName == sWinZone):
225 oZone.sWinName = sWinZone;
226 oZone.sWinTerritory = sTerritory;
227 if sTerritory == '001':
228 oZone.fWinGolden = True;
229 else:
230 warning('zone "%s" have more than one windows mapping: %s (%s) and now %s (%s)'
231 % (sZone, oZone.sWinName, oZone.sWinTerritory, sWinZone, sTerritory));
232 else:
233 oLink = g_dLinks.get(sZone);
234 if oLink:
235 if oLink.sWinName is None or (oLink.sWinTerritory == '001' and oLink.sWinName == sWinZone):
236 oLink.sWinName = sWinZone;
237 oLink.sWinTerritory = sTerritory;
238 if sTerritory == '001':
239 oLink.fWinGolden = True;
240 else:
241 warning('zone-link "%s" have more than one windows mapping: %s (%s) and now %s (%s)'
242 % (sZone, oLink.sWinName, oLink.sWinTerritory, sWinZone, sTerritory));
243 else:
244 warning('could not find zone "%s" (for mapping win zone "%s" to) - got the right data sets?'
245 % (sZone, sWinZone));
246 return 0;
247
248
249def readWindowsIndexes(sFile):
250 """
251 Reads the windows time zone index from the table in the given file and sets idxWin.
252
253 Assumes format: index{tab}name{tab}(GMT{offset}){space}{cities}
254
255 For instance: https://support.microsoft.com/en-gb/help/973627/microsoft-time-zone-index-values
256 """
257 # Read the file.
258 oInFile = open(sFile, "r");
259 asLines = oInFile.readlines();
260 oInFile.close();
261
262 # Check the header.
263 if not asLines[0].startswith('Index'):
264 error('expected first line of "%s" to start with "Index"' % (sFile,));
265 return 1;
266 fHexIndex = asLines[0].find('hex') > 0;
267 iLine = 1;
268 while iLine < len(asLines):
269 # Parse.
270 asFields = asLines[iLine].split('\t');
271 try:
272 idxWin = int(asFields[0].strip(), 16 if fHexIndex else 10);
273 sWinName = asFields[1].strip();
274 sLocations = ' '.join(asFields[2].split());
275 if sWinName.find('(GMT') >= 0: raise Exception("oops #1");
276 if not sLocations.startswith('(GMT'): raise Exception("oops #2");
277 sStdOffset = sLocations[sLocations.find('(') + 1 : sLocations.find(')')].strip().replace(' ','');
278 sLocations = sLocations[sLocations.find(')') + 1 : ].strip();
279 except Exception as oXcpt:
280 error("line %u in %s: '%s'" % (iLine + 1, sFile, type(oXcpt) if not str(oXcpt) else str(oXcpt),));
281 info("'%s'" % (asLines[iLine],));
282 return 1;
283
284 # Some name adjustments.
285 sWinName = sWinName.lower();
286 if sWinName.startswith('a.u.s.'):
287 sWinName = 'aus' + sWinName[6:];
288 elif sWinName.startswith('u.s. '):
289 sWinName = 'us ' + sWinName[5:];
290 elif sWinName.startswith('s.a. '):
291 sWinName = 'sa ' + sWinName[5:];
292 elif sWinName.startswith('s.e. '):
293 sWinName = 'se ' + sWinName[5:];
294 elif sWinName.startswith('pacific s.a. '):
295 sWinName = 'pacific sa ' + sWinName[13:];
296
297 # Update zone entries with matching windows names.
298 cUpdates = 0;
299 for sZone in g_dZones:
300 oZone = g_dZones[sZone];
301 if oZone.sWinName and oZone.sWinName.lower() == sWinName:
302 oZone.idxWin = idxWin;
303 cUpdates += 1;
304 #info('idxWin=%#x - %s / %s' % (idxWin, oZone.sName, oZone.sWinName,));
305 if cUpdates == 0:
306 warning('No matching zone found for index zone "%s" (%#x, %s)' % (sWinName, idxWin, sLocations));
307
308 # Advance.
309 iLine += 1;
310 return 0;
311
312def getPadding(sField, cchWidth):
313 """ Returns space padding for the given field string. """
314 if len(sField) < cchWidth:
315 return ' ' * (cchWidth - len(sField));
316 return '';
317
318def formatFields(sName, oZone, oWinZone):
319 """ Formats the table fields. """
320
321 # RTTIMEZONEINFO:
322 # const char *pszUnixName;
323 # const char *pszWindowsName;
324 # uint8_t cchUnixName;
325 # uint8_t cchWindowsName;
326 # char szCountry[3];
327 # char szWindowsCountry[3];
328 # uint32_t idxWindows;
329 # uint32_t uReserved;
330
331 asFields = [ '"%s"' % sName, ];
332 if oWinZone.sWinName:
333 asFields.append('"%s"' % oWinZone.sWinName);
334 else:
335 asFields.append('NULL');
336
337 asFields.append('%u' % (len(sName),));
338 if oWinZone.sWinName:
339 asFields.append('%u' % (len(oWinZone.sWinName),));
340 else:
341 asFields.append('0');
342
343 asFields.append('"%s"' % (oZone.sTerritory,));
344 if oWinZone.sWinTerritory:
345 asFields.append('"%s"' % (oWinZone.sWinTerritory,));
346 else:
347 asFields.append('""');
348 asFields.append('%#010x' % (oWinZone.idxWin,));
349
350 asFlags = [];
351 if oWinZone.fWinGolden:
352 asFlags.append('RTTIMEZONEINFO_F_GOLDEN');
353 if asFlags:
354 asFields.append(' | '.join(asFlags));
355 else:
356 asFields.append('0');
357 return asFields;
358
359def produceCode(oDst):
360 """
361 Produces the tables.
362 """
363
364 #
365 # Produce the info table.
366 #
367 aasEntries = [];
368
369 # The straight zones.
370 for sZone in g_dZones:
371 asFields = formatFields(sZone, g_dZones[sZone], g_dZones[sZone]);
372 aasEntries.append(asFields);
373
374 # The links.
375 for sZone in g_dLinks:
376 oLink = g_dLinks[sZone];
377 asFields = formatFields(sZone, g_dZones[oLink.sTarget], oLink);
378 aasEntries.append(asFields);
379
380 # Figure field lengths.
381 acchFields = [ 2, 2, 2, 2, 4, 4, 10, 1 ];
382 for asFields in aasEntries:
383 assert len(asFields) == len(acchFields);
384 for iField, sField in enumerate(asFields):
385 if len(sField) > acchFields[iField]:
386 acchFields[iField] = len(sField);
387
388 # Sort the data on zone name.
389 aasEntries.sort();
390
391 # Do the formatting.
392 oDst.write('/**\n'
393 ' * Static time zone mapping info. Sorted by pszUnixName.\n'
394 ' */\n'
395 'static const RTTIMEZONEINFO g_aTimeZones[] =\n'
396 '{\n');
397 for iEntry, asFields in enumerate(aasEntries):
398 sLine = ' { ';
399 for iField, sField in enumerate(asFields):
400 sLine += sField;
401 sLine += ', ';
402 sLine += getPadding(sField, acchFields[iField]);
403 sLine += ' }, /* %#05x */\n' % (iEntry,);
404 oDst.write(sLine);
405 oDst.write('};\n'
406 '\n');
407
408 #
409 # Now produce a lookup table for windows time zone names, with indexes into
410 # the g_aTimeZone table.
411 #
412 aasLookup = [];
413 for iEntry, asFields in enumerate(aasEntries):
414 if asFields[1] != 'NULL':
415 aasLookup.append([ asFields[1], # sWinName
416 -1 if asFields[7].find('RTTIMEZONEINFO_F_GOLDEN') >= 0 else 1,
417 asFields[5], # sWinTerritory
418 iEntry,
419 asFields[0]]); # sZone
420 aasLookup.sort();
421
422 oDst.write('/**\n'
423 ' * Windows time zone lookup table. Sorted by name, golden flag and territory.\n'
424 ' */\n'
425 'static const uint16_t g_aidxWinTimeZones[] = \n'
426 '{\n');
427 for asFields in aasLookup:
428 sLine = ' %#05x, /* %s' % (asFields[3], asFields[0][1:-1]);
429 sLine += getPadding(asFields[0], acchFields[1]);
430 sLine += ' / %s%s' % (asFields[2][1:-1], '+' if asFields[1] < 0 else ' ');
431 if len(asFields[2]) == 2:
432 sLine += ' ';
433 sLine += ' ==> %s */\n' % (asFields[4][1:-1],)
434 oDst.write(sLine);
435
436 oDst.write('};\n'
437 '\n');
438
439 return 0;
440
441
442def main(asArgs):
443 """
444 C-like main function.
445 """
446 if len(asArgs) != 4:
447 error("Takes exacty three arguments: <ms-index-file> <ms-key-file> <tz-data-dir>");
448 return 1;
449 sTzDataDir = asArgs[1];
450 sWinToTzMap = asArgs[2];
451 sWinIndexTable = asArgs[3];
452
453 #
454 # Read in the data first.
455 #
456 iRc = readTzData(sTzDataDir);
457 if iRc == 0:
458 iRc = readWindowsToTzMap(sWinToTzMap);
459 if iRc == 0:
460 iRc = readWindowsIndexes(sWinIndexTable);
461 if iRc == 0:
462 #
463 # Produce the C table.
464 #
465 iRc = produceCode(sys.stdout);
466 return iRc;
467
468if __name__ == '__main__':
469 sys.exit(main(sys.argv));
470
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette