uniread.cpp@ 51770

Last change on this file since 51770 was 51770, checked in by vboxsync, 10 years ago
Merged in iprt++ dev branch.
Property svn:eol-style set to `native` Property svn:keywords set to `Id Revision`
File size: 40.8 KB

Line
1	/* $Id: uniread.cpp 51770 2014-07-01 18:14:02Z vboxsync $ */
2	/** @file
3	* IPRT - Unicode Specification Reader.
4	*/
5
6	/*
7	* Copyright (C) 2006-2012 Oracle Corporation
8	*
9	* This file is part of VirtualBox Open Source Edition (OSE), as
10	* available from http://www.virtualbox.org. This file is free software;
11	* you can redistribute it and/or modify it under the terms of the GNU
12	* General Public License (GPL) as published by the Free Software
13	* Foundation, in version 2 as it comes in the "COPYING" file of the
14	* VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15	* hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16	*
17	* The contents of this file may alternatively be used under the terms
18	* of the Common Development and Distribution License Version 1.0
19	* (CDDL) only, as it comes in the "COPYING.CDDL" file of the
20	* VirtualBox OSE distribution, in which case the provisions of the
21	* CDDL are applicable instead of those of the GPL.
22	*
23	* You may elect to license modified versions of this file under the
24	* terms and conditions of either the GPL or the CDDL or both.
25	*/
26
27	/*******************************************************************************
28	* Header Files *
29	*******************************************************************************/
30	#include <iprt/types.h>
31	#include <iprt/stdarg.h>
32	#include <iprt/ctype.h>
33
34	#include <stdio.h>
35	#include <string.h>
36	#include <stdlib.h>
37	#ifdef _MSC_VER
38	# include <direct.h>
39	#else
40	# include <unistd.h>
41	#endif
42
43
44	/*******************************************************************************
45	* Global Variables *
46	*******************************************************************************/
47	/** The file we're currently parsing. */
48	static const char *g_pszCurFile;
49	/** The current line number. */
50	static unsigned g_iLine;
51	/** The current output file. */
52	static FILE *g_pCurOutFile;
53
54
55	/**
56	* Exit the program after printing a parse error.
57	*
58	* @param pszFormat The message.
59	* @param ... Format arguments.
60	*/
61	static void ParseError(const char *pszFormat, ...)
62	{
63	va_list va;
64	va_start(va, pszFormat);
65	fprintf(stderr, "parse error: %s:%u: ", g_pszCurFile, g_iLine);
66	vfprintf(stderr, pszFormat, va);
67	va_end(va);
68	exit(1);
69	}
70
71	/**
72	* Strip a line.
73	* @returns pointer to first non-blank char.
74	* @param pszLine The line string to strip.
75	*/
76	static char StripLine(char pszLine)
77	{
78	while (pszLine == ' ' \|\| pszLine == '\t')
79	pszLine++;
80
81	char *psz = strchr(pszLine, '#');
82	if (psz)
83	*psz = '\0';
84	else
85	psz = strchr(pszLine, '\0');
86	while (psz > pszLine)
87	{
88	switch (psz[-1])
89	{
90	case ' ':
91	case '\t':
92	case '\n':
93	case '\r':
94	*--psz = '\0';
95	continue;
96	}
97	break;
98	}
99
100	return pszLine;
101	}
102
103
104	/**
105	* Checks if the line is blank or a comment line and should be skipped.
106	* @returns true/false.
107	* @param pszLine The line to consider.
108	*/
109	static bool IsCommentOrBlankLine(const char *pszLine)
110	{
111	while (pszLine == ' ' \|\| pszLine == '\t' \|\| pszLine == '\n' \|\| pszLine == '\r')
112	pszLine++;
113	return pszLine == '#' \|\| pszLine == '\0';
114	}
115
116
117	/**
118	* Get the first field in the string.
119	*
120	* @returns Pointer to the next field.
121	* @param ppsz Where to store the pointer to the next field.
122	* @param pszLine The line string. (could also be *ppsz from a FirstNext call)
123	*/
124	static char FirstField(char ppsz, char pszLine)
125	{
126	char *psz = strchr(pszLine, ';');
127	if (!psz)
128	*ppsz = psz = strchr(pszLine, '\0');
129	else
130	{
131	*psz = '\0';
132	*ppsz = psz + 1;
133	}
134
135	/* strip */
136	while (pszLine == ' ' \|\| pszLine == '\t' \|\| pszLine == '\r' \|\| pszLine == '\n')
137	pszLine++;
138	while (psz > pszLine)
139	{
140	switch (psz[-1])
141	{
142	case ' ':
143	case '\t':
144	case '\n':
145	case '\r':
146	*--psz = '\0';
147	continue;
148	}
149	break;
150	}
151	return pszLine;
152	}
153
154
155	/**
156	* Get the next field in a field enumeration.
157	*
158	* @returns Pointer to the next field.
159	* @param ppsz Where to get and store the string position.
160	*/
161	static char NextField(char *ppsz)
162	{
163	return FirstField(ppsz, *ppsz);
164	}
165
166
167	/**
168	* Splits a decomposition field.
169	*
170	* This may start with a type that is enclosed in angle brackets.
171	*
172	* @returns Pointer to the mapping values following the type. @a *ppsz if empty.
173	* @param ppszType Pointer to the type field pointer. On input the type
174	* field contains the combined type and mapping string. On
175	* output this should only contain the type, no angle
176	* brackets. If no type specified, it is replaced with an
177	* empty string (const).
178	*/
179	static char SplitDecompField(char *ppszType)
180	{
181	/* Empty field? */
182	char psz = ppszType;
183	if (!*psz)
184	return psz;
185
186	/* No type? */
187	if (*psz != '<')
188	{
189	ppszType = (char )"";
190	return psz;
191	}
192
193	/* Split out the type. */
194	*ppszType = ++psz;
195	psz = strchr(psz, '>');
196	if (!psz)
197	{
198	ParseError("Bad Decomposition Type/Mappings\n");
199	return *ppszType;
200	}
201	*psz++ = '\0';
202
203	psz = StripLine(psz);
204	if (!*psz)
205	ParseError("Missing decomposition mappings\n");
206	return psz;
207	}
208
209	/**
210	* Converts a code point field to a number.
211	* @returns Code point.
212	* @param psz The field string.
213	*/
214	static RTUNICP ToNum(const char *psz)
215	{
216	char *pszEnd = NULL;
217	unsigned long ul = strtoul(psz, &pszEnd, 16);
218	if (pszEnd && *pszEnd)
219	ParseError("failed converting '%s' to a number!\n", psz);
220	return (RTUNICP)ul;
221	}
222
223
224	/**
225	* Same as ToNum except that if the field is empty the Default is returned.
226	*/
227	static RTUNICP ToNumDefault(const char *psz, RTUNICP Default)
228	{
229	if (*psz)
230	return ToNum(psz);
231	return Default;
232	}
233
234
235	/**
236	* Converts a code point range to numbers.
237	* @returns The start code point.\
238	* @returns ~(RTUNICP)0 on failure.
239	* @param psz The field string.
240	* @param pLast Where to store the last code point in the range.
241	*/
242	static RTUNICP ToRange(const char *psz, PRTUNICP pLast)
243	{
244	char *pszEnd = NULL;
245	unsigned long ulStart = strtoul(psz, &pszEnd, 16);
246	unsigned long ulLast = ulStart;
247	if (pszEnd && *pszEnd)
248	{
249	if (*pszEnd == '.')
250	{
251	while (*pszEnd == '.')
252	pszEnd++;
253	ulLast = strtoul(pszEnd, &pszEnd, 16);
254	if (pszEnd && *pszEnd)
255	{
256	ParseError("failed converting '%s' to a number!\n", psz);
257	return ~(RTUNICP)0;
258	}
259	}
260	else
261	{
262	ParseError("failed converting '%s' to a number!\n", psz);
263	return ~(RTUNICP)0;
264	}
265	}
266	*pLast = (RTUNICP)ulLast;
267	return (RTUNICP)ulStart;
268
269	}
270
271	/**
272	* For converting the decomposition mappings field and similar.
273	*
274	* @returns Mapping array or NULL if none.
275	* @param psz The string to convert. Can be empty.
276	* @param pcEntries Where to store the number of entries.
277	* @param cMax The max number of entries.
278	*/
279	static PRTUNICP ToMapping(char psz, unsigned pcEntries, unsigned cMax)
280	{
281	PRTUNICP paCps = NULL;
282	unsigned cAlloc = 0;
283	unsigned i = 0;
284
285	/* Convert the code points. */
286	while (psz)
287	{
288	/* skip leading spaces */
289	while (RT_C_IS_BLANK(*psz))
290	psz++;
291
292	/* the end? */
293	if (!*psz)
294	break;
295
296	/* room left? */
297	if (i >= cMax)
298	{
299	ParseError("Too many mappings.\n");
300	break;
301	}
302	if (i >= cAlloc)
303	{
304	cAlloc += 4;
305	paCps = (PRTUNICP)realloc(paCps, cAlloc * sizeof(paCps[0]));
306	if (!paCps)
307	{
308	fprintf(stderr, "out of memory (%u)\n", (unsigned)(cAlloc * sizeof(paCps[0])));
309	exit(1);
310	}
311	}
312
313	/* Find the end. */
314	char *pszThis = psz;
315	while (RT_C_IS_XDIGIT(*psz))
316	psz++;
317	if (psz && !RT_C_IS_BLANK(psz))
318	ParseError("Malformed mappings.\n");
319	if (*psz)
320	*psz++ = '\0';
321
322	/* Convert to number and add it. */
323	paCps[i++] = ToNum(pszThis);
324	}
325
326	*pcEntries = i;
327	return paCps;
328	}
329
330
331	/**
332	* Duplicate a string, optimize certain strings to save memory.
333	*
334	* @returns Pointer to string copy.
335	* @param pszStr The string to duplicate.
336	*/
337	static char DupStr(const char pszStr)
338	{
339	if (!*pszStr)
340	return (char*)"";
341	char *psz = strdup(pszStr);
342	if (psz)
343	return psz;
344
345	fprintf(stderr, "out of memory!\n");
346	exit(1);
347	}
348
349
350	/**
351	* Array of all possible and impossible unicode code points as of 4.1
352	*/
353	struct CPINFO
354	{
355	RTUNICP CodePoint;
356	RTUNICP SimpleUpperCaseMapping;
357	RTUNICP SimpleLowerCaseMapping;
358	RTUNICP SimpleTitleCaseMapping;
359	unsigned CanonicalCombiningClass;
360	const char *pszDecompositionType;
361	unsigned cDecompositionMapping;
362	PRTUNICP paDecompositionMapping;
363	const char *pszName;
364	/** Set if this is an unused entry */
365	unsigned fNullEntry : 1;
366
367	unsigned fAlphabetic : 1;
368	unsigned fASCIIHexDigit : 1;
369	unsigned fBidiControl : 1;
370	unsigned fCaseIgnorable : 1;
371	unsigned fCased : 1;
372	unsigned fChangesWhenCasefolded : 1;
373	unsigned fChangesWhenCasemapped : 1;
374	unsigned fChangesWhenLowercased : 1;
375	unsigned fChangesWhenTitlecased : 1;
376	unsigned fChangesWhenUppercased : 1;
377	unsigned fDash : 1;
378	unsigned fDefaultIgnorableCodePoint : 1;
379	unsigned fDeprecated : 1;
380	unsigned fDiacritic : 1;
381	unsigned fExtender : 1;
382	unsigned fGraphemeBase : 1;
383	unsigned fGraphemeExtend : 1;
384	unsigned fGraphemeLink : 1;
385	unsigned fHexDigit : 1;
386	unsigned fHyphen : 1;
387	unsigned fIDContinue : 1;
388	unsigned fIdeographic : 1;
389	unsigned fIDSBinaryOperator : 1;
390	unsigned fIDStart : 1;
391	unsigned fIDSTrinaryOperator : 1;
392	unsigned fJoinControl : 1;
393	unsigned fLogicalOrderException : 1;
394	unsigned fLowercase : 1;
395	unsigned fMath : 1;
396	unsigned fNoncharacterCodePoint : 1;
397	unsigned fOtherAlphabetic : 1;
398	unsigned fOtherDefaultIgnorableCodePoint : 1;
399	unsigned fOtherGraphemeExtend : 1;
400	unsigned fOtherIDContinue : 1;
401	unsigned fOtherIDStart : 1;
402	unsigned fOtherLowercase : 1;
403	unsigned fOtherMath : 1;
404	unsigned fOtherUppercase : 1;
405	unsigned fPatternSyntax : 1;
406	unsigned fPatternWhiteSpace : 1;
407	unsigned fQuotationMark : 1;
408	unsigned fRadical : 1;
409	unsigned fSoftDotted : 1;
410	unsigned fSTerm : 1;
411	unsigned fTerminalPunctuation : 1;
412	unsigned fUnifiedIdeograph : 1;
413	unsigned fUppercase : 1;
414	unsigned fVariationSelector : 1;
415	unsigned fWhiteSpace : 1;
416	unsigned fXIDContinue : 1;
417	unsigned fXIDStart : 1;
418
419	/** @name DerivedNormalizationProps.txt
420	* @{ */
421	unsigned fFullCompositionExclusion : 1;
422	unsigned fInvNFC_QC : 2; /*< If 1 (NFC_QC == N) then code point 100% sure not part of NFC string. /
423	unsigned fInvNFD_QC : 2; /*< If 1 (NFD_QC == N) then code point 100% sure not part of NFD string. /
424	unsigned fInvNFKC_QC : 2;
425	unsigned fInvNFKD_QC : 2;
426	unsigned fExpandsOnNFC : 1;
427	unsigned fExpandsOnNFD : 1;
428	unsigned fExpandsOnNFKC : 1;
429	unsigned fExpandsOnNFKD : 1;
430	/** @} */
431
432	/* unprocessed stuff, so far. */
433	const char *pszGeneralCategory;
434	const char *pszBidiClass;
435	const char *pszNumericType;
436	const char *pszNumericValueD;
437	const char *pszNumericValueN;
438	const char *pszBidiMirrored;
439	const char *pszUnicode1Name;
440	const char *pszISOComment;
441	} g_aCPInfo[0x110000];
442
443
444	/**
445	* Creates a 'null' entry at i.
446	* @param i The entry in question.
447	*/
448	static void NullEntry(unsigned i)
449	{
450	g_aCPInfo[i].CodePoint = i;
451	g_aCPInfo[i].fNullEntry = 1;
452	g_aCPInfo[i].SimpleUpperCaseMapping = i;
453	g_aCPInfo[i].SimpleLowerCaseMapping = i;
454	g_aCPInfo[i].SimpleTitleCaseMapping = i;
455	g_aCPInfo[i].pszDecompositionType = "";
456	g_aCPInfo[i].cDecompositionMapping = 0;
457	g_aCPInfo[i].paDecompositionMapping = NULL;
458	g_aCPInfo[i].pszName = "";
459	g_aCPInfo[i].pszGeneralCategory = "";
460	g_aCPInfo[i].pszBidiClass = "";
461	g_aCPInfo[i].pszNumericType = "";
462	g_aCPInfo[i].pszNumericValueD = "";
463	g_aCPInfo[i].pszNumericValueN = "";
464	g_aCPInfo[i].pszBidiMirrored = "";
465	g_aCPInfo[i].pszUnicode1Name = "";
466	g_aCPInfo[i].pszISOComment = "";
467	}
468
469
470	/**
471	* Open a file for reading, optionally with a base path prefixed.
472	*
473	* @returns file stream on success, NULL w/ complaint on failure.
474	* @param pszBasePath The base path, can be NULL.
475	* @param pszFilename The name of the file to open.
476	*/
477	static FILE OpenFile(const char pszBasePath, const char *pszFilename)
478	{
479	FILE *pFile;
480	if ( !pszBasePath
481	\|\| *pszFilename == '/'
482	#if defined(_MSC_VER) \|\| defined(__OS2__)
483	\|\| *pszFilename == '\\'
484	\|\| (*pszFilename && pszFilename[1] == ':')
485	#endif
486	)
487	{
488	pFile = fopen(pszFilename, "r");
489	if (!pFile)
490	fprintf(stderr, "uniread: failed to open '%s' for reading\n", pszFilename);
491	}
492	else
493	{
494	size_t cchBasePath = strlen(pszBasePath);
495	size_t cchFilename = strlen(pszFilename);
496	char pszFullName = (char )malloc(cchBasePath + 1 + cchFilename + 1);
497	if (!pszFullName)
498	{
499	fprintf(stderr, "uniread: failed to allocate %d bytes\n", (int)(cchBasePath + 1 + cchFilename + 1));
500	return NULL;
501	}
502
503	memcpy(pszFullName, pszBasePath, cchBasePath);
504	pszFullName[cchBasePath] = '/';
505	memcpy(&pszFullName[cchBasePath + 1], pszFilename, cchFilename + 1);
506
507	pFile = fopen(pszFullName, "r");
508	if (!pFile)
509	fprintf(stderr, "uniread: failed to open '%s' for reading\n", pszFullName);
510	free(pszFullName);
511	}
512	g_pszCurFile = pszFilename;
513	g_iLine = 0;
514	return pFile;
515	}
516
517
518	/**
519	* Wrapper around fgets that keep track of the line number.
520	*
521	* @returns See fgets.
522	* @param pszBuf The buffer. See fgets for output definition.
523	* @param cbBuf The buffer size.
524	* @param pFile The file to read from.
525	*/
526	static char GetLineFromFile(char pszBuf, int cbBuf, FILE *pFile)
527	{
528	g_iLine++;
529	return fgets(pszBuf, cbBuf, pFile);
530	}
531
532
533	/**
534	* Closes a file opened by OpenFile
535	*
536	* @param pFile The file to close.
537	*/
538	static void CloseFile(FILE *pFile)
539	{
540	g_pszCurFile = NULL;
541	g_iLine = 0;
542	fclose(pFile);
543	}
544
545
546	/**
547	* Read the UnicodeData.txt file.
548	* @returns 0 on success.
549	* @returns !0 on failure.
550	* @param pszBasePath The base path, can be NULL.
551	* @param pszFilename The name of the file.
552	*/
553	static int ReadUnicodeData(const char pszBasePath, const char pszFilename)
554	{
555	/*
556	* Open input.
557	*/
558	FILE *pFile = OpenFile(pszBasePath, pszFilename);
559	if (!pFile)
560	return 1;
561
562	/*
563	* Parse the input and spit out the output.
564	*/
565	char szLine[4096];
566	RTUNICP i = 0;
567	while (GetLineFromFile(szLine, sizeof(szLine), pFile) != NULL)
568	{
569	if (IsCommentOrBlankLine(szLine))
570	continue;
571
572	char *pszCurField;
573	char pszCodePoint = FirstField(&pszCurField, StripLine(szLine)); / 0 */
574	char pszName = NextField(&pszCurField); / 1 */
575	char pszGeneralCategory = NextField(&pszCurField); / 2 */
576	char pszCanonicalCombiningClass = NextField(&pszCurField); / 3 */
577	char pszBidiClass = NextField(&pszCurField); / 4 */
578	char pszDecompositionType = NextField(&pszCurField); / 5 */
579	char *pszDecompositionMapping = SplitDecompField(&pszDecompositionType);
580	char pszNumericType = NextField(&pszCurField); / 6 */
581	char pszNumericValueD = NextField(&pszCurField); / 7 */
582	char pszNumericValueN = NextField(&pszCurField); / 8 */
583	char pszBidiMirrored = NextField(&pszCurField); / 9 */
584	char pszUnicode1Name = NextField(&pszCurField); / 10 */
585	char pszISOComment = NextField(&pszCurField); / 11 */
586	char pszSimpleUpperCaseMapping = NextField(&pszCurField); / 12 */
587	char pszSimpleLowerCaseMapping = NextField(&pszCurField); / 13 */
588	char pszSimpleTitleCaseMapping = NextField(&pszCurField); / 14 */
589
590	RTUNICP CodePoint = ToNum(pszCodePoint);
591	if (CodePoint >= RT_ELEMENTS(g_aCPInfo))
592	{
593	ParseError("U+05X is out of range\n", CodePoint);
594	continue;
595	}
596
597	/* catchup? */
598	while (i < CodePoint)
599	NullEntry(i++);
600	if (i != CodePoint)
601	{
602	ParseError("i=%d CodePoint=%u\n", i, CodePoint);
603	CloseFile(pFile);
604	return 1;
605	}
606
607	/* this one */
608	g_aCPInfo[i].CodePoint = i;
609	g_aCPInfo[i].fNullEntry = 0;
610	g_aCPInfo[i].pszName = DupStr(pszName);
611	g_aCPInfo[i].SimpleUpperCaseMapping = ToNumDefault(pszSimpleUpperCaseMapping, CodePoint);
612	g_aCPInfo[i].SimpleLowerCaseMapping = ToNumDefault(pszSimpleLowerCaseMapping, CodePoint);
613	g_aCPInfo[i].SimpleTitleCaseMapping = ToNumDefault(pszSimpleTitleCaseMapping, CodePoint);
614	g_aCPInfo[i].CanonicalCombiningClass = ToNum(pszCanonicalCombiningClass);
615	g_aCPInfo[i].pszDecompositionType = DupStr(pszDecompositionType);
616	g_aCPInfo[i].paDecompositionMapping = ToMapping(pszDecompositionMapping, &g_aCPInfo[i].cDecompositionMapping, 20);
617	g_aCPInfo[i].pszGeneralCategory = DupStr(pszGeneralCategory);
618	g_aCPInfo[i].pszBidiClass = DupStr(pszBidiClass);
619	g_aCPInfo[i].pszNumericType = DupStr(pszNumericType);
620	g_aCPInfo[i].pszNumericValueD = DupStr(pszNumericValueD);
621	g_aCPInfo[i].pszNumericValueN = DupStr(pszNumericValueN);
622	g_aCPInfo[i].pszBidiMirrored = DupStr(pszBidiMirrored);
623	g_aCPInfo[i].pszUnicode1Name = DupStr(pszUnicode1Name);
624	g_aCPInfo[i].pszISOComment = DupStr(pszISOComment);
625	i++;
626	}
627
628	/* catchup? */
629	while (i < RT_ELEMENTS(g_aCPInfo))
630	NullEntry(i++);
631	CloseFile(pFile);
632
633	return 0;
634	}
635
636
637	/**
638	* Generates excluded data.
639	*
640	* @returns 0 on success, exit code on failure.
641	*/
642	static int GenerateExcludedData(void)
643	{
644	/*
645	* Hangul Syllables U+AC00 to U+D7A3.
646	*/
647	for (RTUNICP i = 0xac00; i <= 0xd7a3; i++)
648	{
649	g_aCPInfo[i].fNullEntry = 0;
650	g_aCPInfo[i].fInvNFD_QC = 1;
651	/** @todo generate the decomposition: http://unicode.org/reports/tr15/#Hangul
652	* */
653	}
654
655	/** @todo
656	* CJK Ideographs Extension A (U+3400 - U+4DB5)
657	* CJK Ideographs (U+4E00 - U+9FA5)
658	* CJK Ideograph Extension B (U+20000 - U+2A6D6)
659	* CJK Ideograph Extension C (U+2A700 - U+2B734)
660	*/
661
662	return 0;
663	}
664
665
666
667	/**
668	* Worker for ApplyProperty that handles a yes, no, maybe property value.
669	*
670	* @returns 0 (NO), 1 (YES), 2 (MAYBE).
671	* @param ppszNextField The field cursor, input and output.
672	*/
673	static int YesNoMaybePropertyValue(char **ppszNextField)
674	{
675	if (!**ppszNextField)
676	{
677	ParseError("Missing Y/N/M field\n");
678	return 0;
679	}
680	char *psz = NextField(ppszNextField);
681	if (!strcmp(psz, "N"))
682	return 0;
683	if (!strcmp(psz, "Y"))
684	return 1;
685	if (!strcmp(psz, "M"))
686	return 2;
687	ParseError("Unexpected Y/N/M value: '%s'\n", psz);
688	return 0;
689	}
690
691
692	/**
693	* Inverted version of YesNoMaybePropertyValue
694	*
695	* @returns 1 (NO), 0 (YES), 2 (MAYBE).
696	* @param ppszNextField The field cursor, input and output.
697	*/
698	static int YesNoMaybePropertyValueInv(char **ppszNextField)
699	{
700	unsigned rc = YesNoMaybePropertyValue(ppszNextField);
701	switch (rc)
702	{
703	case 0: return 1;
704	case 1: return 0;
705	default: return rc;
706	}
707	}
708
709
710	/**
711	* Applies a property to a code point.
712	*
713	* @param StartCP The code point.
714	* @param pszProperty The property name.
715	*/
716	static void ApplyProperty(RTUNICP StartCP, const char pszProperty, char pszNextField)
717	{
718	if (StartCP >= RT_ELEMENTS(g_aCPInfo))
719	{
720	ParseError("U+%06X is out of the g_aCPInfo range.\n", StartCP);
721	return;
722	}
723	struct CPINFO *pCPInfo = &g_aCPInfo[StartCP];
724	/* string switch */
725	if (!strcmp(pszProperty, "ASCII_Hex_Digit")) pCPInfo->fASCIIHexDigit = 1;
726	else if (!strcmp(pszProperty, "Alphabetic")) pCPInfo->fAlphabetic = 1;
727	else if (!strcmp(pszProperty, "Bidi_Control")) pCPInfo->fBidiControl = 1;
728	else if (!strcmp(pszProperty, "Case_Ignorable")) pCPInfo->fCaseIgnorable = 1;
729	else if (!strcmp(pszProperty, "Cased")) pCPInfo->fCased = 1;
730	else if (!strcmp(pszProperty, "Changes_When_Casefolded")) pCPInfo->fChangesWhenCasefolded = 1;
731	else if (!strcmp(pszProperty, "Changes_When_Casemapped")) pCPInfo->fChangesWhenCasemapped = 1;
732	else if (!strcmp(pszProperty, "Changes_When_Lowercased")) pCPInfo->fChangesWhenLowercased = 1;
733	else if (!strcmp(pszProperty, "Changes_When_Titlecased")) pCPInfo->fChangesWhenTitlecased = 1;
734	else if (!strcmp(pszProperty, "Changes_When_Uppercased")) pCPInfo->fChangesWhenUppercased = 1;
735	else if (!strcmp(pszProperty, "Dash")) pCPInfo->fDash = 1;
736	else if (!strcmp(pszProperty, "Default_Ignorable_Code_Point")) pCPInfo->fDefaultIgnorableCodePoint = 1;
737	else if (!strcmp(pszProperty, "Deprecated")) pCPInfo->fDeprecated = 1;
738	else if (!strcmp(pszProperty, "Diacritic")) pCPInfo->fDiacritic = 1;
739	else if (!strcmp(pszProperty, "Extender")) pCPInfo->fExtender = 1;
740	else if (!strcmp(pszProperty, "Grapheme_Base")) pCPInfo->fGraphemeBase = 1;
741	else if (!strcmp(pszProperty, "Grapheme_Extend")) pCPInfo->fGraphemeExtend = 1;
742	else if (!strcmp(pszProperty, "Grapheme_Link")) pCPInfo->fGraphemeLink = 1;
743	else if (!strcmp(pszProperty, "Hex_Digit")) pCPInfo->fHexDigit = 1;
744	else if (!strcmp(pszProperty, "Hyphen")) pCPInfo->fHyphen = 1;
745	else if (!strcmp(pszProperty, "ID_Continue")) pCPInfo->fIDContinue = 1;
746	else if (!strcmp(pszProperty, "ID_Start")) pCPInfo->fIDStart = 1;
747	else if (!strcmp(pszProperty, "Ideographic")) pCPInfo->fIdeographic = 1;
748	else if (!strcmp(pszProperty, "IDS_Binary_Operator")) pCPInfo->fIDSBinaryOperator = 1;
749	else if (!strcmp(pszProperty, "IDS_Trinary_Operator")) pCPInfo->fIDSTrinaryOperator = 1;
750	else if (!strcmp(pszProperty, "Join_Control")) pCPInfo->fJoinControl = 1;
751	else if (!strcmp(pszProperty, "Logical_Order_Exception")) pCPInfo->fLogicalOrderException = 1;
752	else if (!strcmp(pszProperty, "Lowercase")) pCPInfo->fLowercase = 1;
753	else if (!strcmp(pszProperty, "Math")) pCPInfo->fMath = 1;
754	else if (!strcmp(pszProperty, "Noncharacter_Code_Point")) pCPInfo->fNoncharacterCodePoint = 1;
755	else if (!strcmp(pszProperty, "Other_Alphabetic")) pCPInfo->fOtherAlphabetic = 1;
756	else if (!strcmp(pszProperty, "Other_Default_Ignorable_Code_Point")) pCPInfo->fOtherDefaultIgnorableCodePoint = 1;
757	else if (!strcmp(pszProperty, "Other_Grapheme_Extend")) pCPInfo->fOtherGraphemeExtend = 1;
758	else if (!strcmp(pszProperty, "Other_ID_Continue")) pCPInfo->fOtherIDContinue = 1;
759	else if (!strcmp(pszProperty, "Other_ID_Start")) pCPInfo->fOtherIDStart = 1;
760	else if (!strcmp(pszProperty, "Other_Lowercase")) pCPInfo->fOtherLowercase = 1;
761	else if (!strcmp(pszProperty, "Other_Math")) pCPInfo->fOtherMath = 1;
762	else if (!strcmp(pszProperty, "Other_Uppercase")) pCPInfo->fOtherUppercase = 1;
763	else if (!strcmp(pszProperty, "Pattern_Syntax")) pCPInfo->fPatternSyntax = 1;
764	else if (!strcmp(pszProperty, "Pattern_White_Space")) pCPInfo->fPatternWhiteSpace = 1;
765	else if (!strcmp(pszProperty, "Quotation_Mark")) pCPInfo->fQuotationMark = 1;
766	else if (!strcmp(pszProperty, "Radical")) pCPInfo->fRadical = 1;
767	else if (!strcmp(pszProperty, "Soft_Dotted")) pCPInfo->fSoftDotted = 1;
768	else if (!strcmp(pszProperty, "STerm")) pCPInfo->fSTerm = 1;
769	else if (!strcmp(pszProperty, "Terminal_Punctuation")) pCPInfo->fTerminalPunctuation = 1;
770	else if (!strcmp(pszProperty, "Unified_Ideograph")) pCPInfo->fUnifiedIdeograph = 1;
771	else if (!strcmp(pszProperty, "Uppercase")) pCPInfo->fUppercase = 1;
772	else if (!strcmp(pszProperty, "Variation_Selector")) pCPInfo->fVariationSelector = 1;
773	else if (!strcmp(pszProperty, "White_Space")) pCPInfo->fWhiteSpace = 1;
774	else if (!strcmp(pszProperty, "XID_Continue")) pCPInfo->fXIDContinue = 1;
775	else if (!strcmp(pszProperty, "XID_Start")) pCPInfo->fXIDStart = 1;
776	/* DerivedNormalizationProps: */
777	else if (!strcmp(pszProperty, "FC_NFKC")) return; /* ignored */
778	else if (!strcmp(pszProperty, "Full_Composition_Exclusion")) pCPInfo->fFullCompositionExclusion = 1;
779	else if (!strcmp(pszProperty, "NFC_QC")) pCPInfo->fInvNFC_QC = YesNoMaybePropertyValueInv(&pszNextField);
780	else if (!strcmp(pszProperty, "NFD_QC")) pCPInfo->fInvNFD_QC = YesNoMaybePropertyValueInv(&pszNextField);
781	else if (!strcmp(pszProperty, "NFKC_QC")) pCPInfo->fInvNFKC_QC = YesNoMaybePropertyValueInv(&pszNextField);
782	else if (!strcmp(pszProperty, "NFKD_QC")) pCPInfo->fInvNFKD_QC = YesNoMaybePropertyValueInv(&pszNextField);
783	else if (!strcmp(pszProperty, "Expands_On_NFC")) pCPInfo->fExpandsOnNFC = 1;
784	else if (!strcmp(pszProperty, "Expands_On_NFD")) pCPInfo->fExpandsOnNFD = 1;
785	else if (!strcmp(pszProperty, "Expands_On_NFKC")) pCPInfo->fExpandsOnNFKC = 1;
786	else if (!strcmp(pszProperty, "Expands_On_NFKD")) pCPInfo->fExpandsOnNFKD = 1;
787	else if (!strcmp(pszProperty, "NFKC_CF")) return; /ignore /
788	else if (!strcmp(pszProperty, "Changes_When_NFKC_Casefolded")) return; /ignore /
789	else
790	{
791	ParseError("Unknown property '%s'\n", pszProperty);
792	return;
793	}
794
795	if (pszNextField && *pszNextField)
796	ParseError("Unexpected next field: '%s'\n", pszNextField);
797	}
798
799
800	/**
801	* Reads a property file.
802	*
803	* There are several property files, this code can read all
804	* of those but will only make use of the properties it recognizes.
805	*
806	* @returns 0 on success.
807	* @returns !0 on failure.
808	* @param pszBasePath The base path, can be NULL.
809	* @param pszFilename The name of the file.
810	*/
811	static int ReadProperties(const char pszBasePath, const char pszFilename)
812	{
813	/*
814	* Open input.
815	*/
816	FILE *pFile = OpenFile(pszBasePath, pszFilename);
817	if (!pFile)
818	return 1;
819
820	/*
821	* Parse the input and spit out the output.
822	*/
823	char szLine[4096];
824	while (GetLineFromFile(szLine, sizeof(szLine), pFile) != NULL)
825	{
826	if (IsCommentOrBlankLine(szLine))
827	continue;
828	char *pszCurField;
829	char *pszRange = FirstField(&pszCurField, StripLine(szLine));
830	char *pszProperty = NextField(&pszCurField);
831	if (!*pszProperty)
832	{
833	ParseError("no property field.\n");
834	continue;
835	}
836
837	RTUNICP LastCP;
838	RTUNICP StartCP = ToRange(pszRange, &LastCP);
839	if (StartCP == ~(RTUNICP)0)
840	continue;
841
842	while (StartCP <= LastCP)
843	ApplyProperty(StartCP++, pszProperty, pszCurField);
844	}
845
846	CloseFile(pFile);
847
848	return 0;
849	}
850
851
852	/**
853	* Append a flag to the string.
854	*/
855	static char AppendFlag(char psz, const char *pszFlag)
856	{
857	char *pszEnd = strchr(psz, '\0');
858	if (pszEnd != psz)
859	{
860	*pszEnd++ = ' ';
861	*pszEnd++ = '\|';
862	*pszEnd++ = ' ';
863	}
864	strcpy(pszEnd, pszFlag);
865	return psz;
866	}
867
868	/**
869	* Calcs the flags for a code point.
870	* @returns true if there is a flag.
871	* @returns false if the isn't.
872	*/
873	static bool CalcFlags(struct CPINFO pInfo, char pszFlags)
874	{
875	pszFlags[0] = '\0';
876	/** @todo read the specs on this other vs standard stuff, and check out the finer points */
877	if (pInfo->fAlphabetic \|\| pInfo->fOtherAlphabetic)
878	AppendFlag(pszFlags, "RTUNI_ALPHA");
879	if (pInfo->fHexDigit \|\| pInfo->fASCIIHexDigit)
880	AppendFlag(pszFlags, "RTUNI_XDIGIT");
881	if (!strcmp(pInfo->pszGeneralCategory, "Nd"))
882	AppendFlag(pszFlags, "RTUNI_DDIGIT");
883	if (pInfo->fWhiteSpace)
884	AppendFlag(pszFlags, "RTUNI_WSPACE");
885	if (pInfo->fUppercase \|\| pInfo->fOtherUppercase)
886	AppendFlag(pszFlags, "RTUNI_UPPER");
887	if (pInfo->fLowercase \|\| pInfo->fOtherLowercase)
888	AppendFlag(pszFlags, "RTUNI_LOWER");
889	//if (pInfo->???)
890	// AppendFlag(pszFlags, "RTUNI_BSPACE");
891	#if 0
892	if (pInfo->fInvNFD_QC != 0 \|\| pInfo->fInvNFC_QC != 0)
893	{
894	AppendFlag(pszFlags, "RTUNI_QC_NFX");
895	if (!pInfo->paDecompositionMapping && pInfo->fInvNFD_QC)
896	fprintf(stderr, "uniread: U+%05X is QC_NFD but has no mappings.\n", pInfo->CodePoint);
897	else if (*pInfo->pszDecompositionType && pInfo->fInvNFD_QC)
898	fprintf(stderr, "uniread: U+%05X is QC_NFD but has no canonical mappings.\n", pInfo->CodePoint);
899	}
900	else if (pInfo->paDecompositionMapping && !*pInfo->pszDecompositionType)
901	fprintf(stderr, "uniread: U+%05X is not QC_NFX but has canonical mappings.\n", pInfo->CodePoint);
902	#endif
903
904	if (!*pszFlags)
905	{
906	pszFlags[0] = '0';
907	pszFlags[1] = '\0';
908	return false;
909	}
910	return true;
911	}
912
913
914	/**
915	* Closes the primary output stream.
916	*/
917	static int Stream1Close(void)
918	{
919	if (g_pCurOutFile && g_pCurOutFile != stdout && g_pCurOutFile != stderr)
920	{
921	if (fclose(g_pCurOutFile) != 0)
922	{
923	fprintf(stderr, "Error closing output file.\n");
924	return -1;
925	}
926	}
927	g_pCurOutFile = NULL;
928	return 0;
929	}
930
931
932	/**
933	* Initializes the 1st stream to output to a given file.
934	*/
935	static int Stream1Init(const char *pszName)
936	{
937	int rc = Stream1Close();
938	if (!rc)
939	{
940	g_pCurOutFile = fopen(pszName, "w");
941	if (!g_pCurOutFile)
942	{
943	fprintf(stderr, "Error opening output file '%s'.\n", pszName);
944	rc = -1;
945	}
946	}
947	return rc;
948	}
949
950
951	/**
952	* printf wrapper for the primary output stream.
953	*
954	* @returns See vfprintf.
955	* @param pszFormat The vfprintf format string.
956	* @param ... The format arguments.
957	*/
958	static int Stream1Printf(const char *pszFormat, ...)
959	{
960	int cch;
961	va_list va;
962	va_start(va, pszFormat);
963	cch = vfprintf(g_pCurOutFile, pszFormat, va);
964	va_end(va);
965	return cch;
966	}
967
968
969	/** the data store for stream two. */
970	static char g_szStream2[10240];
971	static unsigned volatile g_offStream2 = 0;
972
973	/**
974	* Initializes the 2nd steam.
975	*/
976	static void Stream2Init(void)
977	{
978	g_szStream2[0] = '\0';
979	g_offStream2 = 0;
980	}
981
982	/**
983	* Flushes the 2nd stream to stdout.
984	*/
985	static int Stream2Flush(void)
986	{
987	g_szStream2[g_offStream2] = '\0';
988	Stream1Printf("%s", g_szStream2);
989	Stream2Init();
990	return 0;
991	}
992
993	/**
994	* printf to the 2nd stream.
995	*/
996	static int Stream2Printf(const char *pszFormat, ...)
997	{
998	unsigned offStream2 = g_offStream2;
999	va_list va;
1000	va_start(va, pszFormat);
1001	int cch = vsprintf(&g_szStream2[offStream2], pszFormat, va);
1002	va_end(va);
1003	offStream2 += cch;
1004	if (offStream2 >= sizeof(g_szStream2))
1005	{
1006	fprintf(stderr, "error: stream2 overflow!\n");
1007	exit(1);
1008	}
1009	g_offStream2 = offStream2;
1010	return cch;
1011	}
1012
1013
1014	/**
1015	* Print the unidata.cpp file header and include list.
1016	*/
1017	int PrintHeader(const char argv0, const char pszBaseDir)
1018	{
1019	char szBuf[1024];
1020	if (!pszBaseDir)
1021	{
1022	memset(szBuf, 0, sizeof(szBuf));
1023	#ifdef _MSC_VER
1024	_getcwd(szBuf, sizeof(szBuf));
1025	#else
1026	getcwd(szBuf, sizeof(szBuf));
1027	#endif
1028	pszBaseDir = szBuf;
1029	}
1030
1031	Stream1Printf("/* $" "Id" "$ */\n"
1032	"/** @file\n"
1033	" * IPRT - Unicode Tables.\n"
1034	" *\n"
1035	" * Automatically Generated from %s\n"
1036	" * by %s (" __DATE__ " " __TIME__ ")\n"
1037	" */\n"
1038	"\n"
1039	"/*\n"
1040	" * Copyright (C) 2006-2014 Oracle Corporation\n"
1041	" *\n"
1042	" * This file is part of VirtualBox Open Source Edition (OSE), as\n"
1043	" * available from http://www.virtualbox.org. This file is free software;\n"
1044	" * you can redistribute it and/or modify it under the terms of the GNU\n"
1045	" * General Public License (GPL) as published by the Free Software\n"
1046	" * Foundation, in version 2 as it comes in the \"COPYING\" file of the\n"
1047	" * VirtualBox OSE distribution. VirtualBox OSE is distributed in the\n"
1048	" * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.\n"
1049	" *\n"
1050	" * The contents of this file may alternatively be used under the terms\n"
1051	" * of the Common Development and Distribution License Version 1.0\n"
1052	" * (CDDL) only, as it comes in the \"COPYING.CDDL\" file of the\n"
1053	" * VirtualBox OSE distribution, in which case the provisions of the\n"
1054	" * CDDL are applicable instead of those of the GPL.\n"
1055	" *\n"
1056	" * You may elect to license modified versions of this file under the\n"
1057	" * terms and conditions of either the GPL or the CDDL or both.\n"
1058	" */\n"
1059	"\n"
1060	"#include <iprt/uni.h>\n"
1061	"\n",
1062	pszBaseDir, argv0);
1063	return 0;
1064	}
1065
1066
1067	/**
1068	* Print the flag tables.
1069	*/
1070	int PrintFlags(void)
1071	{
1072	/*
1073	* Print flags table.
1074	*/
1075	Stream2Init();
1076	Stream2Printf("RT_DECL_DATA_CONST(const RTUNIFLAGSRANGE) g_aRTUniFlagsRanges[] =\n"
1077	"{\n");
1078	RTUNICP i = 0;
1079	int iStart = -1;
1080	while (i < RT_ELEMENTS(g_aCPInfo))
1081	{
1082	/* figure how far off the next chunk is */
1083	char szFlags[256];
1084	unsigned iNonNull = i;
1085	while ( iNonNull < RT_ELEMENTS(g_aCPInfo)
1086	&& iNonNull >= 256
1087	&& (g_aCPInfo[iNonNull].fNullEntry \|\| !CalcFlags(&g_aCPInfo[iNonNull], szFlags)) )
1088	iNonNull++;
1089	if (iNonNull - i > 4096 \|\| iNonNull == RT_ELEMENTS(g_aCPInfo))
1090	{
1091	if (iStart >= 0)
1092	{
1093	Stream1Printf("};\n\n");
1094	Stream2Printf(" { 0x%06x, 0x%06x, &g_afRTUniFlags0x%06x[0] },\n", iStart, i, iStart);
1095	iStart = -1;
1096	}
1097	i = iNonNull;
1098	}
1099	else
1100	{
1101	if (iStart < 0)
1102	{
1103	Stream1Printf("static const uint8_t g_afRTUniFlags0x%06x[] =\n"
1104	"{\n", i);
1105	iStart = i;
1106	}
1107	CalcFlags(&g_aCPInfo[i], szFlags);
1108	Stream1Printf(" %50s, /* U+%06x: %s*/\n", szFlags, g_aCPInfo[i].CodePoint, g_aCPInfo[i].pszName);
1109	i++;
1110	}
1111	}
1112	Stream2Printf(" { ~(RTUNICP)0, ~(RTUNICP)0, NULL }\n"
1113	"};\n\n\n");
1114	Stream1Printf("\n");
1115	return Stream2Flush();
1116	}
1117
1118
1119	/**
1120	* Prints the upper case tables.
1121	*/
1122	static int PrintUpper(void)
1123	{
1124	Stream2Init();
1125	Stream2Printf("RT_DECL_DATA_CONST(const RTUNICASERANGE) g_aRTUniUpperRanges[] =\n"
1126	"{\n");
1127	RTUNICP i = 0;
1128	int iStart = -1;
1129	while (i < RT_ELEMENTS(g_aCPInfo))
1130	{
1131	/* figure how far off the next chunk is */
1132	unsigned iSameCase = i;
1133	while ( iSameCase < RT_ELEMENTS(g_aCPInfo)
1134	&& g_aCPInfo[iSameCase].SimpleUpperCaseMapping == g_aCPInfo[iSameCase].CodePoint
1135	&& iSameCase >= 256)
1136	iSameCase++;
1137	if (iSameCase - i > 4096/sizeof(RTUNICP) \|\| iSameCase == RT_ELEMENTS(g_aCPInfo))
1138	{
1139	if (iStart >= 0)
1140	{
1141	Stream1Printf("};\n\n");
1142	Stream2Printf(" { 0x%06x, 0x%06x, &g_afRTUniUpper0x%06x[0] },\n", iStart, i, iStart);
1143	iStart = -1;
1144	}
1145	i = iSameCase;
1146	}
1147	else
1148	{
1149	if (iStart < 0)
1150	{
1151	Stream1Printf("static const RTUNICP g_afRTUniUpper0x%06x[] =\n"
1152	"{\n", i);
1153	iStart = i;
1154	}
1155	Stream1Printf(" 0x%02x, /* U+%06x: %s*/\n", g_aCPInfo[i].SimpleUpperCaseMapping, g_aCPInfo[i].CodePoint, g_aCPInfo[i].pszName);
1156	i++;
1157	}
1158	}
1159	Stream2Printf(" { ~(RTUNICP)0, ~(RTUNICP)0, NULL }\n"
1160	"};\n\n\n");
1161	Stream1Printf("\n");
1162	return Stream2Flush();
1163	}
1164
1165
1166	/**
1167	* Prints the lowercase tables.
1168	*/
1169	static int PrintLower(void)
1170	{
1171	Stream2Init();
1172	Stream2Printf("RT_DECL_DATA_CONST(const RTUNICASERANGE) g_aRTUniLowerRanges[] =\n"
1173	"{\n");
1174	RTUNICP i = 0;
1175	int iStart = -1;
1176	while (i < RT_ELEMENTS(g_aCPInfo))
1177	{
1178	/* figure how far off the next chunk is */
1179	unsigned iSameCase = i;
1180	while ( iSameCase < RT_ELEMENTS(g_aCPInfo)
1181	&& g_aCPInfo[iSameCase].SimpleLowerCaseMapping == g_aCPInfo[iSameCase].CodePoint
1182	&& iSameCase >= 256)
1183	iSameCase++;
1184	if (iSameCase - i > 4096/sizeof(RTUNICP) \|\| iSameCase == RT_ELEMENTS(g_aCPInfo))
1185	{
1186	if (iStart >= 0)
1187	{
1188	Stream1Printf("};\n\n");
1189	Stream2Printf(" { 0x%06x, 0x%06x, &g_afRTUniLower0x%06x[0] },\n", iStart, i, iStart);
1190	iStart = -1;
1191	}
1192	i = iSameCase;
1193	}
1194	else
1195	{
1196	if (iStart < 0)
1197	{
1198	Stream1Printf("static const RTUNICP g_afRTUniLower0x%06x[] =\n"
1199	"{\n", i);
1200	iStart = i;
1201	}
1202	Stream1Printf(" 0x%02x, /* U+%06x: %s*/\n",
1203	g_aCPInfo[i].SimpleLowerCaseMapping, g_aCPInfo[i].CodePoint, g_aCPInfo[i].pszName);
1204	i++;
1205	}
1206	}
1207	Stream2Printf(" { ~(RTUNICP)0, ~(RTUNICP)0, NULL }\n"
1208	"};\n\n\n");
1209	Stream1Printf("\n");
1210	return Stream2Flush();
1211	}
1212
1213
1214	int main(int argc, char **argv)
1215	{
1216	/*
1217	* Parse args.
1218	*/
1219	if (argc <= 1)
1220	{
1221	printf("usage: %s [-C\|--dir <UCD-dir>] [UnicodeData.txt [DerivedCoreProperties.txt [PropList.txt] [DerivedNormalizationProps.txt]]]\n",
1222	argv[0]);
1223	return 1;
1224	}
1225
1226	const char *pszBaseDir = NULL;
1227	const char *pszUnicodeData = "UnicodeData.txt";
1228	const char *pszDerivedCoreProperties = "DerivedCoreProperties.txt";
1229	const char *pszPropList = "PropList.txt";
1230	const char *pszDerivedNormalizationProps = "DerivedNormalizationProps.txt";
1231	int iFile = 0;
1232	for (int argi = 1; argi < argc; argi++)
1233	{
1234	if (argv[argi][0] != '-')
1235	{
1236	switch (iFile++)
1237	{
1238	case 0: pszUnicodeData = argv[argi]; break;
1239	case 1: pszDerivedCoreProperties = argv[argi]; break;
1240	case 2: pszPropList = argv[argi]; break;
1241	case 3: pszDerivedNormalizationProps = argv[argi]; break;
1242	default:
1243	fprintf(stderr, "uniread: syntax error at '%s': too many filenames\n", argv[argi]);
1244	return 1;
1245	}
1246	}
1247	else if ( !strcmp(argv[argi], "--dir")
1248	\|\| !strcmp(argv[argi], "-C"))
1249	{
1250	if (argi + 1 >= argc)
1251	{
1252	fprintf(stderr, "uniread: syntax error: '%s' is missing the directory name.\n", argv[argi]);
1253	return 1;
1254	}
1255	argi++;
1256	pszBaseDir = argv[argi];
1257	}
1258	else
1259	{
1260	fprintf(stderr, "uniread: syntax error at '%s': Unknown argument\n", argv[argi]);
1261	return 1;
1262	}
1263	}
1264
1265	/*
1266	* Read the data.
1267	*/
1268	int rc = ReadUnicodeData(pszBaseDir, pszUnicodeData);
1269	if (rc)
1270	return rc;
1271	rc = GenerateExcludedData();
1272	if (rc)
1273	return rc;
1274	rc = ReadProperties(pszBaseDir, pszPropList);
1275	if (rc)
1276	return rc;
1277	rc = ReadProperties(pszBaseDir, pszDerivedCoreProperties);
1278	if (rc)
1279	return rc;
1280	rc = ReadProperties(pszBaseDir, pszDerivedNormalizationProps);
1281	if (rc)
1282	return rc;
1283
1284	/*
1285	* Produce output files.
1286	*/
1287	rc = Stream1Init("unidata-flags.cpp");
1288	if (!rc)
1289	rc = PrintHeader(argv[0], pszBaseDir);
1290	if (!rc)
1291	rc = PrintFlags();
1292
1293	rc = Stream1Init("unidata-upper.cpp");
1294	if (!rc)
1295	rc = PrintHeader(argv[0], pszBaseDir);
1296	if (!rc)
1297	rc = PrintUpper();
1298
1299	rc = Stream1Init("unidata-lower.cpp");
1300	if (!rc)
1301	rc = PrintHeader(argv[0], pszBaseDir);
1302	if (!rc)
1303	rc = PrintLower();
1304	if (!rc)
1305	rc = Stream1Close();
1306
1307	/* done */
1308	return rc;
1309	}
1310

Note: See TracBrowser for help on using the repository browser.

source: vbox/trunk/src/VBox/Runtime/common/string/uniread.cpp@ 51770

Download in other formats: