uniread.cpp@ 59706

Last change on this file since 59706 was 57944, checked in by vboxsync, 9 years ago
iprt: More doxygen corrections.
Property svn:eol-style set to `native` Property svn:keywords set to `Id Revision`
File size: 41.1 KB

Line
1	/* $Id: uniread.cpp 57944 2015-09-29 15:07:09Z vboxsync $ */
2	/** @file
3	* IPRT - Unicode Specification Reader.
4	*/
5
6	/*
7	* Copyright (C) 2006-2015 Oracle Corporation
8	*
9	* This file is part of VirtualBox Open Source Edition (OSE), as
10	* available from http://www.virtualbox.org. This file is free software;
11	* you can redistribute it and/or modify it under the terms of the GNU
12	* General Public License (GPL) as published by the Free Software
13	* Foundation, in version 2 as it comes in the "COPYING" file of the
14	* VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15	* hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16	*
17	* The contents of this file may alternatively be used under the terms
18	* of the Common Development and Distribution License Version 1.0
19	* (CDDL) only, as it comes in the "COPYING.CDDL" file of the
20	* VirtualBox OSE distribution, in which case the provisions of the
21	* CDDL are applicable instead of those of the GPL.
22	*
23	* You may elect to license modified versions of this file under the
24	* terms and conditions of either the GPL or the CDDL or both.
25	*/
26
27
28	/*********************************************************************************************************************************
29	* Header Files *
30	*********************************************************************************************************************************/
31	#include <iprt/types.h>
32	#include <iprt/stdarg.h>
33	#include <iprt/ctype.h>
34
35	#include <stdio.h>
36	#include <string.h>
37	#include <stdlib.h>
38	#ifdef _MSC_VER
39	# include <direct.h>
40	#else
41	# include <unistd.h>
42	#endif
43
44
45	/*********************************************************************************************************************************
46	* Global Variables *
47	*********************************************************************************************************************************/
48	/** The file we're currently parsing. */
49	static const char *g_pszCurFile;
50	/** The current line number. */
51	static unsigned g_iLine;
52	/** The current output file. */
53	static FILE *g_pCurOutFile;
54
55
56	/**
57	* Exit the program after printing a parse error.
58	*
59	* @param pszFormat The message.
60	* @param ... Format arguments.
61	*/
62	static void ParseError(const char *pszFormat, ...)
63	{
64	va_list va;
65	va_start(va, pszFormat);
66	fprintf(stderr, "parse error: %s:%u: ", g_pszCurFile, g_iLine);
67	vfprintf(stderr, pszFormat, va);
68	va_end(va);
69	exit(1);
70	}
71
72	/**
73	* Strip a line.
74	* @returns pointer to first non-blank char.
75	* @param pszLine The line string to strip.
76	*/
77	static char StripLine(char pszLine)
78	{
79	while (pszLine == ' ' \|\| pszLine == '\t')
80	pszLine++;
81
82	char *psz = strchr(pszLine, '#');
83	if (psz)
84	*psz = '\0';
85	else
86	psz = strchr(pszLine, '\0');
87	while (psz > pszLine)
88	{
89	switch (psz[-1])
90	{
91	case ' ':
92	case '\t':
93	case '\n':
94	case '\r':
95	*--psz = '\0';
96	continue;
97	}
98	break;
99	}
100
101	return pszLine;
102	}
103
104
105	/**
106	* Checks if the line is blank or a comment line and should be skipped.
107	* @returns true/false.
108	* @param pszLine The line to consider.
109	*/
110	static bool IsCommentOrBlankLine(const char *pszLine)
111	{
112	while (pszLine == ' ' \|\| pszLine == '\t' \|\| pszLine == '\n' \|\| pszLine == '\r')
113	pszLine++;
114	return pszLine == '#' \|\| pszLine == '\0';
115	}
116
117
118	/**
119	* Get the first field in the string.
120	*
121	* @returns Pointer to the next field.
122	* @param ppsz Where to store the pointer to the next field.
123	* @param pszLine The line string. (could also be *ppsz from a FirstNext call)
124	*/
125	static char FirstField(char ppsz, char pszLine)
126	{
127	char *psz = strchr(pszLine, ';');
128	if (!psz)
129	*ppsz = psz = strchr(pszLine, '\0');
130	else
131	{
132	*psz = '\0';
133	*ppsz = psz + 1;
134	}
135
136	/* strip */
137	while (pszLine == ' ' \|\| pszLine == '\t' \|\| pszLine == '\r' \|\| pszLine == '\n')
138	pszLine++;
139	while (psz > pszLine)
140	{
141	switch (psz[-1])
142	{
143	case ' ':
144	case '\t':
145	case '\n':
146	case '\r':
147	*--psz = '\0';
148	continue;
149	}
150	break;
151	}
152	return pszLine;
153	}
154
155
156	/**
157	* Get the next field in a field enumeration.
158	*
159	* @returns Pointer to the next field.
160	* @param ppsz Where to get and store the string position.
161	*/
162	static char NextField(char *ppsz)
163	{
164	return FirstField(ppsz, *ppsz);
165	}
166
167
168	/**
169	* Splits a decomposition field.
170	*
171	* This may start with a type that is enclosed in angle brackets.
172	*
173	* @returns Pointer to the mapping values following the type. @a *ppsz if empty.
174	* @param ppszType Pointer to the type field pointer. On input the type
175	* field contains the combined type and mapping string. On
176	* output this should only contain the type, no angle
177	* brackets. If no type specified, it is replaced with an
178	* empty string (const).
179	*/
180	static char SplitDecompField(char *ppszType)
181	{
182	/* Empty field? */
183	char psz = ppszType;
184	if (!*psz)
185	return psz;
186
187	/* No type? */
188	if (*psz != '<')
189	{
190	ppszType = (char )"";
191	return psz;
192	}
193
194	/* Split out the type. */
195	*ppszType = ++psz;
196	psz = strchr(psz, '>');
197	if (!psz)
198	{
199	ParseError("Bad Decomposition Type/Mappings\n");
200	return *ppszType;
201	}
202	*psz++ = '\0';
203
204	psz = StripLine(psz);
205	if (!*psz)
206	ParseError("Missing decomposition mappings\n");
207	return psz;
208	}
209
210	/**
211	* Converts a code point field to a number.
212	* @returns Code point.
213	* @param psz The field string.
214	*/
215	static RTUNICP ToNum(const char *psz)
216	{
217	char *pszEnd = NULL;
218	unsigned long ul = strtoul(psz, &pszEnd, 16);
219	if (pszEnd && *pszEnd)
220	ParseError("failed converting '%s' to a number!\n", psz);
221	return (RTUNICP)ul;
222	}
223
224
225	/**
226	* Same as ToNum except that if the field is empty the Default is returned.
227	*/
228	static RTUNICP ToNumDefault(const char *psz, RTUNICP Default)
229	{
230	if (*psz)
231	return ToNum(psz);
232	return Default;
233	}
234
235
236	/**
237	* Converts a code point range to numbers.
238	* @returns The start code point.\
239	* @returns ~(RTUNICP)0 on failure.
240	* @param psz The field string.
241	* @param pLast Where to store the last code point in the range.
242	*/
243	static RTUNICP ToRange(const char *psz, PRTUNICP pLast)
244	{
245	char *pszEnd = NULL;
246	unsigned long ulStart = strtoul(psz, &pszEnd, 16);
247	unsigned long ulLast = ulStart;
248	if (pszEnd && *pszEnd)
249	{
250	if (*pszEnd == '.')
251	{
252	while (*pszEnd == '.')
253	pszEnd++;
254	ulLast = strtoul(pszEnd, &pszEnd, 16);
255	if (pszEnd && *pszEnd)
256	{
257	ParseError("failed converting '%s' to a number!\n", psz);
258	return ~(RTUNICP)0;
259	}
260	}
261	else
262	{
263	ParseError("failed converting '%s' to a number!\n", psz);
264	return ~(RTUNICP)0;
265	}
266	}
267	*pLast = (RTUNICP)ulLast;
268	return (RTUNICP)ulStart;
269
270	}
271
272	/**
273	* For converting the decomposition mappings field and similar.
274	*
275	* @returns Mapping array or NULL if none.
276	* @param psz The string to convert. Can be empty.
277	* @param pcEntries Where to store the number of entries.
278	* @param cMax The max number of entries.
279	*/
280	static PRTUNICP ToMapping(char psz, unsigned pcEntries, unsigned cMax)
281	{
282	PRTUNICP paCps = NULL;
283	unsigned cAlloc = 0;
284	unsigned i = 0;
285
286	/* Convert the code points. */
287	while (psz)
288	{
289	/* skip leading spaces */
290	while (RT_C_IS_BLANK(*psz))
291	psz++;
292
293	/* the end? */
294	if (!*psz)
295	break;
296
297	/* room left? */
298	if (i >= cMax)
299	{
300	ParseError("Too many mappings.\n");
301	break;
302	}
303	if (i >= cAlloc)
304	{
305	cAlloc += 4;
306	paCps = (PRTUNICP)realloc(paCps, cAlloc * sizeof(paCps[0]));
307	if (!paCps)
308	{
309	fprintf(stderr, "out of memory (%u)\n", (unsigned)(cAlloc * sizeof(paCps[0])));
310	exit(1);
311	}
312	}
313
314	/* Find the end. */
315	char *pszThis = psz;
316	while (RT_C_IS_XDIGIT(*psz))
317	psz++;
318	if (psz && !RT_C_IS_BLANK(psz))
319	ParseError("Malformed mappings.\n");
320	if (*psz)
321	*psz++ = '\0';
322
323	/* Convert to number and add it. */
324	paCps[i++] = ToNum(pszThis);
325	}
326
327	*pcEntries = i;
328	return paCps;
329	}
330
331
332	/**
333	* Duplicate a string, optimize certain strings to save memory.
334	*
335	* @returns Pointer to string copy.
336	* @param pszStr The string to duplicate.
337	*/
338	static char DupStr(const char pszStr)
339	{
340	if (!*pszStr)
341	return (char*)"";
342	char *psz = strdup(pszStr);
343	if (psz)
344	return psz;
345
346	fprintf(stderr, "out of memory!\n");
347	exit(1);
348	}
349
350
351	/**
352	* Array of all possible and impossible unicode code points as of 4.1
353	*/
354	struct CPINFO
355	{
356	RTUNICP CodePoint;
357	RTUNICP SimpleUpperCaseMapping;
358	RTUNICP SimpleLowerCaseMapping;
359	RTUNICP SimpleTitleCaseMapping;
360	unsigned CanonicalCombiningClass;
361	const char *pszDecompositionType;
362	unsigned cDecompositionMapping;
363	PRTUNICP paDecompositionMapping;
364	const char *pszName;
365	/** Set if this is an unused entry */
366	unsigned fNullEntry : 1;
367
368	unsigned fAlphabetic : 1;
369	unsigned fASCIIHexDigit : 1;
370	unsigned fBidiControl : 1;
371	unsigned fCaseIgnorable : 1;
372	unsigned fCased : 1;
373	unsigned fChangesWhenCasefolded : 1;
374	unsigned fChangesWhenCasemapped : 1;
375	unsigned fChangesWhenLowercased : 1;
376	unsigned fChangesWhenTitlecased : 1;
377	unsigned fChangesWhenUppercased : 1;
378	unsigned fDash : 1;
379	unsigned fDefaultIgnorableCodePoint : 1;
380	unsigned fDeprecated : 1;
381	unsigned fDiacritic : 1;
382	unsigned fExtender : 1;
383	unsigned fGraphemeBase : 1;
384	unsigned fGraphemeExtend : 1;
385	unsigned fGraphemeLink : 1;
386	unsigned fHexDigit : 1;
387	unsigned fHyphen : 1;
388	unsigned fIDContinue : 1;
389	unsigned fIdeographic : 1;
390	unsigned fIDSBinaryOperator : 1;
391	unsigned fIDStart : 1;
392	unsigned fIDSTrinaryOperator : 1;
393	unsigned fJoinControl : 1;
394	unsigned fLogicalOrderException : 1;
395	unsigned fLowercase : 1;
396	unsigned fMath : 1;
397	unsigned fNoncharacterCodePoint : 1;
398	unsigned fOtherAlphabetic : 1;
399	unsigned fOtherDefaultIgnorableCodePoint : 1;
400	unsigned fOtherGraphemeExtend : 1;
401	unsigned fOtherIDContinue : 1;
402	unsigned fOtherIDStart : 1;
403	unsigned fOtherLowercase : 1;
404	unsigned fOtherMath : 1;
405	unsigned fOtherUppercase : 1;
406	unsigned fPatternSyntax : 1;
407	unsigned fPatternWhiteSpace : 1;
408	unsigned fQuotationMark : 1;
409	unsigned fRadical : 1;
410	unsigned fSoftDotted : 1;
411	unsigned fSTerm : 1;
412	unsigned fTerminalPunctuation : 1;
413	unsigned fUnifiedIdeograph : 1;
414	unsigned fUppercase : 1;
415	unsigned fVariationSelector : 1;
416	unsigned fWhiteSpace : 1;
417	unsigned fXIDContinue : 1;
418	unsigned fXIDStart : 1;
419
420	/** @name DerivedNormalizationProps.txt
421	* @{ */
422	unsigned fFullCompositionExclusion : 1;
423	unsigned fInvNFC_QC : 2; /*< If 1 (NFC_QC == N) then code point 100% sure not part of NFC string. /
424	unsigned fInvNFD_QC : 2; /*< If 1 (NFD_QC == N) then code point 100% sure not part of NFD string. /
425	unsigned fInvNFKC_QC : 2;
426	unsigned fInvNFKD_QC : 2;
427	unsigned fExpandsOnNFC : 1;
428	unsigned fExpandsOnNFD : 1;
429	unsigned fExpandsOnNFKC : 1;
430	unsigned fExpandsOnNFKD : 1;
431	/** @} */
432
433	/* unprocessed stuff, so far. */
434	const char *pszGeneralCategory;
435	const char *pszBidiClass;
436	const char *pszNumericType;
437	const char *pszNumericValueD;
438	const char *pszNumericValueN;
439	const char *pszBidiMirrored;
440	const char *pszUnicode1Name;
441	const char *pszISOComment;
442	} g_aCPInfo[0x110000];
443
444
445	/**
446	* Creates a 'null' entry at i.
447	* @param i The entry in question.
448	*/
449	static void NullEntry(unsigned i)
450	{
451	g_aCPInfo[i].CodePoint = i;
452	g_aCPInfo[i].fNullEntry = 1;
453	g_aCPInfo[i].SimpleUpperCaseMapping = i;
454	g_aCPInfo[i].SimpleLowerCaseMapping = i;
455	g_aCPInfo[i].SimpleTitleCaseMapping = i;
456	g_aCPInfo[i].pszDecompositionType = "";
457	g_aCPInfo[i].cDecompositionMapping = 0;
458	g_aCPInfo[i].paDecompositionMapping = NULL;
459	g_aCPInfo[i].pszName = "";
460	g_aCPInfo[i].pszGeneralCategory = "";
461	g_aCPInfo[i].pszBidiClass = "";
462	g_aCPInfo[i].pszNumericType = "";
463	g_aCPInfo[i].pszNumericValueD = "";
464	g_aCPInfo[i].pszNumericValueN = "";
465	g_aCPInfo[i].pszBidiMirrored = "";
466	g_aCPInfo[i].pszUnicode1Name = "";
467	g_aCPInfo[i].pszISOComment = "";
468	}
469
470
471	/**
472	* Open a file for reading, optionally with a base path prefixed.
473	*
474	* @returns file stream on success, NULL w/ complaint on failure.
475	* @param pszBasePath The base path, can be NULL.
476	* @param pszFilename The name of the file to open.
477	*/
478	static FILE OpenFile(const char pszBasePath, const char *pszFilename)
479	{
480	FILE *pFile;
481	if ( !pszBasePath
482	\|\| *pszFilename == '/'
483	#if defined(_MSC_VER) \|\| defined(__OS2__)
484	\|\| *pszFilename == '\\'
485	\|\| (*pszFilename && pszFilename[1] == ':')
486	#endif
487	)
488	{
489	pFile = fopen(pszFilename, "r");
490	if (!pFile)
491	fprintf(stderr, "uniread: failed to open '%s' for reading\n", pszFilename);
492	}
493	else
494	{
495	size_t cchBasePath = strlen(pszBasePath);
496	size_t cchFilename = strlen(pszFilename);
497	char pszFullName = (char )malloc(cchBasePath + 1 + cchFilename + 1);
498	if (!pszFullName)
499	{
500	fprintf(stderr, "uniread: failed to allocate %d bytes\n", (int)(cchBasePath + 1 + cchFilename + 1));
501	return NULL;
502	}
503
504	memcpy(pszFullName, pszBasePath, cchBasePath);
505	pszFullName[cchBasePath] = '/';
506	memcpy(&pszFullName[cchBasePath + 1], pszFilename, cchFilename + 1);
507
508	pFile = fopen(pszFullName, "r");
509	if (!pFile)
510	fprintf(stderr, "uniread: failed to open '%s' for reading\n", pszFullName);
511	free(pszFullName);
512	}
513	g_pszCurFile = pszFilename;
514	g_iLine = 0;
515	return pFile;
516	}
517
518
519	/**
520	* Wrapper around fgets that keep track of the line number.
521	*
522	* @returns See fgets.
523	* @param pszBuf The buffer. See fgets for output definition.
524	* @param cbBuf The buffer size.
525	* @param pFile The file to read from.
526	*/
527	static char GetLineFromFile(char pszBuf, int cbBuf, FILE *pFile)
528	{
529	g_iLine++;
530	return fgets(pszBuf, cbBuf, pFile);
531	}
532
533
534	/**
535	* Closes a file opened by OpenFile
536	*
537	* @param pFile The file to close.
538	*/
539	static void CloseFile(FILE *pFile)
540	{
541	g_pszCurFile = NULL;
542	g_iLine = 0;
543	fclose(pFile);
544	}
545
546
547	/**
548	* Read the UnicodeData.txt file.
549	* @returns 0 on success.
550	* @returns !0 on failure.
551	* @param pszBasePath The base path, can be NULL.
552	* @param pszFilename The name of the file.
553	*/
554	static int ReadUnicodeData(const char pszBasePath, const char pszFilename)
555	{
556	/*
557	* Open input.
558	*/
559	FILE *pFile = OpenFile(pszBasePath, pszFilename);
560	if (!pFile)
561	return 1;
562
563	/*
564	* Parse the input and spit out the output.
565	*/
566	char szLine[4096];
567	RTUNICP i = 0;
568	while (GetLineFromFile(szLine, sizeof(szLine), pFile) != NULL)
569	{
570	if (IsCommentOrBlankLine(szLine))
571	continue;
572
573	char *pszCurField;
574	char pszCodePoint = FirstField(&pszCurField, StripLine(szLine)); / 0 */
575	char pszName = NextField(&pszCurField); / 1 */
576	char pszGeneralCategory = NextField(&pszCurField); / 2 */
577	char pszCanonicalCombiningClass = NextField(&pszCurField); / 3 */
578	char pszBidiClass = NextField(&pszCurField); / 4 */
579	char pszDecompositionType = NextField(&pszCurField); / 5 */
580	char *pszDecompositionMapping = SplitDecompField(&pszDecompositionType);
581	char pszNumericType = NextField(&pszCurField); / 6 */
582	char pszNumericValueD = NextField(&pszCurField); / 7 */
583	char pszNumericValueN = NextField(&pszCurField); / 8 */
584	char pszBidiMirrored = NextField(&pszCurField); / 9 */
585	char pszUnicode1Name = NextField(&pszCurField); / 10 */
586	char pszISOComment = NextField(&pszCurField); / 11 */
587	char pszSimpleUpperCaseMapping = NextField(&pszCurField); / 12 */
588	char pszSimpleLowerCaseMapping = NextField(&pszCurField); / 13 */
589	char pszSimpleTitleCaseMapping = NextField(&pszCurField); / 14 */
590
591	RTUNICP CodePoint = ToNum(pszCodePoint);
592	if (CodePoint >= RT_ELEMENTS(g_aCPInfo))
593	{
594	ParseError("U+05X is out of range\n", CodePoint);
595	continue;
596	}
597
598	/* catchup? */
599	while (i < CodePoint)
600	NullEntry(i++);
601	if (i != CodePoint)
602	{
603	ParseError("i=%d CodePoint=%u\n", i, CodePoint);
604	CloseFile(pFile);
605	return 1;
606	}
607
608	/* this one */
609	g_aCPInfo[i].CodePoint = i;
610	g_aCPInfo[i].fNullEntry = 0;
611	g_aCPInfo[i].pszName = DupStr(pszName);
612	g_aCPInfo[i].SimpleUpperCaseMapping = ToNumDefault(pszSimpleUpperCaseMapping, CodePoint);
613	g_aCPInfo[i].SimpleLowerCaseMapping = ToNumDefault(pszSimpleLowerCaseMapping, CodePoint);
614	g_aCPInfo[i].SimpleTitleCaseMapping = ToNumDefault(pszSimpleTitleCaseMapping, CodePoint);
615	g_aCPInfo[i].CanonicalCombiningClass = ToNum(pszCanonicalCombiningClass);
616	g_aCPInfo[i].pszDecompositionType = DupStr(pszDecompositionType);
617	g_aCPInfo[i].paDecompositionMapping = ToMapping(pszDecompositionMapping, &g_aCPInfo[i].cDecompositionMapping, 20);
618	g_aCPInfo[i].pszGeneralCategory = DupStr(pszGeneralCategory);
619	g_aCPInfo[i].pszBidiClass = DupStr(pszBidiClass);
620	g_aCPInfo[i].pszNumericType = DupStr(pszNumericType);
621	g_aCPInfo[i].pszNumericValueD = DupStr(pszNumericValueD);
622	g_aCPInfo[i].pszNumericValueN = DupStr(pszNumericValueN);
623	g_aCPInfo[i].pszBidiMirrored = DupStr(pszBidiMirrored);
624	g_aCPInfo[i].pszUnicode1Name = DupStr(pszUnicode1Name);
625	g_aCPInfo[i].pszISOComment = DupStr(pszISOComment);
626	i++;
627	}
628
629	/* catchup? */
630	while (i < RT_ELEMENTS(g_aCPInfo))
631	NullEntry(i++);
632	CloseFile(pFile);
633
634	return 0;
635	}
636
637
638	/**
639	* Generates excluded data.
640	*
641	* @returns 0 on success, exit code on failure.
642	*/
643	static int GenerateExcludedData(void)
644	{
645	/*
646	* Hangul Syllables U+AC00 to U+D7A3.
647	*/
648	for (RTUNICP i = 0xac00; i <= 0xd7a3; i++)
649	{
650	g_aCPInfo[i].fNullEntry = 0;
651	g_aCPInfo[i].fInvNFD_QC = 1;
652	/** @todo generate the decomposition: http://unicode.org/reports/tr15/#Hangul
653	* */
654	}
655
656	/** @todo
657	* CJK Ideographs Extension A (U+3400 - U+4DB5)
658	* CJK Ideographs (U+4E00 - U+9FA5)
659	* CJK Ideograph Extension B (U+20000 - U+2A6D6)
660	* CJK Ideograph Extension C (U+2A700 - U+2B734)
661	*/
662
663	return 0;
664	}
665
666
667
668	/**
669	* Worker for ApplyProperty that handles a yes, no, maybe property value.
670	*
671	* @returns 0 (NO), 1 (YES), 2 (MAYBE).
672	* @param ppszNextField The field cursor, input and output.
673	*/
674	static int YesNoMaybePropertyValue(char **ppszNextField)
675	{
676	if (!**ppszNextField)
677	{
678	ParseError("Missing Y/N/M field\n");
679	return 0;
680	}
681	char *psz = NextField(ppszNextField);
682	if (!strcmp(psz, "N"))
683	return 0;
684	if (!strcmp(psz, "Y"))
685	return 1;
686	if (!strcmp(psz, "M"))
687	return 2;
688	ParseError("Unexpected Y/N/M value: '%s'\n", psz);
689	return 0;
690	}
691
692
693	/**
694	* Inverted version of YesNoMaybePropertyValue
695	*
696	* @returns 1 (NO), 0 (YES), 2 (MAYBE).
697	* @param ppszNextField The field cursor, input and output.
698	*/
699	static int YesNoMaybePropertyValueInv(char **ppszNextField)
700	{
701	unsigned rc = YesNoMaybePropertyValue(ppszNextField);
702	switch (rc)
703	{
704	case 0: return 1;
705	case 1: return 0;
706	default: return rc;
707	}
708	}
709
710
711	/**
712	* Applies a property to a code point.
713	*
714	* @param StartCP The code point.
715	* @param pszProperty The property name.
716	* @param pszNextField The next field.
717	*/
718	static void ApplyProperty(RTUNICP StartCP, const char pszProperty, char pszNextField)
719	{
720	if (StartCP >= RT_ELEMENTS(g_aCPInfo))
721	{
722	ParseError("U+%06X is out of the g_aCPInfo range.\n", StartCP);
723	return;
724	}
725	struct CPINFO *pCPInfo = &g_aCPInfo[StartCP];
726	/* string switch */
727	if (!strcmp(pszProperty, "ASCII_Hex_Digit")) pCPInfo->fASCIIHexDigit = 1;
728	else if (!strcmp(pszProperty, "Alphabetic")) pCPInfo->fAlphabetic = 1;
729	else if (!strcmp(pszProperty, "Bidi_Control")) pCPInfo->fBidiControl = 1;
730	else if (!strcmp(pszProperty, "Case_Ignorable")) pCPInfo->fCaseIgnorable = 1;
731	else if (!strcmp(pszProperty, "Cased")) pCPInfo->fCased = 1;
732	else if (!strcmp(pszProperty, "Changes_When_Casefolded")) pCPInfo->fChangesWhenCasefolded = 1;
733	else if (!strcmp(pszProperty, "Changes_When_Casemapped")) pCPInfo->fChangesWhenCasemapped = 1;
734	else if (!strcmp(pszProperty, "Changes_When_Lowercased")) pCPInfo->fChangesWhenLowercased = 1;
735	else if (!strcmp(pszProperty, "Changes_When_Titlecased")) pCPInfo->fChangesWhenTitlecased = 1;
736	else if (!strcmp(pszProperty, "Changes_When_Uppercased")) pCPInfo->fChangesWhenUppercased = 1;
737	else if (!strcmp(pszProperty, "Dash")) pCPInfo->fDash = 1;
738	else if (!strcmp(pszProperty, "Default_Ignorable_Code_Point")) pCPInfo->fDefaultIgnorableCodePoint = 1;
739	else if (!strcmp(pszProperty, "Deprecated")) pCPInfo->fDeprecated = 1;
740	else if (!strcmp(pszProperty, "Diacritic")) pCPInfo->fDiacritic = 1;
741	else if (!strcmp(pszProperty, "Extender")) pCPInfo->fExtender = 1;
742	else if (!strcmp(pszProperty, "Grapheme_Base")) pCPInfo->fGraphemeBase = 1;
743	else if (!strcmp(pszProperty, "Grapheme_Extend")) pCPInfo->fGraphemeExtend = 1;
744	else if (!strcmp(pszProperty, "Grapheme_Link")) pCPInfo->fGraphemeLink = 1;
745	else if (!strcmp(pszProperty, "Hex_Digit")) pCPInfo->fHexDigit = 1;
746	else if (!strcmp(pszProperty, "Hyphen")) pCPInfo->fHyphen = 1;
747	else if (!strcmp(pszProperty, "ID_Continue")) pCPInfo->fIDContinue = 1;
748	else if (!strcmp(pszProperty, "ID_Start")) pCPInfo->fIDStart = 1;
749	else if (!strcmp(pszProperty, "Ideographic")) pCPInfo->fIdeographic = 1;
750	else if (!strcmp(pszProperty, "IDS_Binary_Operator")) pCPInfo->fIDSBinaryOperator = 1;
751	else if (!strcmp(pszProperty, "IDS_Trinary_Operator")) pCPInfo->fIDSTrinaryOperator = 1;
752	else if (!strcmp(pszProperty, "Join_Control")) pCPInfo->fJoinControl = 1;
753	else if (!strcmp(pszProperty, "Logical_Order_Exception")) pCPInfo->fLogicalOrderException = 1;
754	else if (!strcmp(pszProperty, "Lowercase")) pCPInfo->fLowercase = 1;
755	else if (!strcmp(pszProperty, "Math")) pCPInfo->fMath = 1;
756	else if (!strcmp(pszProperty, "Noncharacter_Code_Point")) pCPInfo->fNoncharacterCodePoint = 1;
757	else if (!strcmp(pszProperty, "Other_Alphabetic")) pCPInfo->fOtherAlphabetic = 1;
758	else if (!strcmp(pszProperty, "Other_Default_Ignorable_Code_Point")) pCPInfo->fOtherDefaultIgnorableCodePoint = 1;
759	else if (!strcmp(pszProperty, "Other_Grapheme_Extend")) pCPInfo->fOtherGraphemeExtend = 1;
760	else if (!strcmp(pszProperty, "Other_ID_Continue")) pCPInfo->fOtherIDContinue = 1;
761	else if (!strcmp(pszProperty, "Other_ID_Start")) pCPInfo->fOtherIDStart = 1;
762	else if (!strcmp(pszProperty, "Other_Lowercase")) pCPInfo->fOtherLowercase = 1;
763	else if (!strcmp(pszProperty, "Other_Math")) pCPInfo->fOtherMath = 1;
764	else if (!strcmp(pszProperty, "Other_Uppercase")) pCPInfo->fOtherUppercase = 1;
765	else if (!strcmp(pszProperty, "Pattern_Syntax")) pCPInfo->fPatternSyntax = 1;
766	else if (!strcmp(pszProperty, "Pattern_White_Space")) pCPInfo->fPatternWhiteSpace = 1;
767	else if (!strcmp(pszProperty, "Quotation_Mark")) pCPInfo->fQuotationMark = 1;
768	else if (!strcmp(pszProperty, "Radical")) pCPInfo->fRadical = 1;
769	else if (!strcmp(pszProperty, "Soft_Dotted")) pCPInfo->fSoftDotted = 1;
770	else if (!strcmp(pszProperty, "STerm")) pCPInfo->fSTerm = 1;
771	else if (!strcmp(pszProperty, "Terminal_Punctuation")) pCPInfo->fTerminalPunctuation = 1;
772	else if (!strcmp(pszProperty, "Unified_Ideograph")) pCPInfo->fUnifiedIdeograph = 1;
773	else if (!strcmp(pszProperty, "Uppercase")) pCPInfo->fUppercase = 1;
774	else if (!strcmp(pszProperty, "Variation_Selector")) pCPInfo->fVariationSelector = 1;
775	else if (!strcmp(pszProperty, "White_Space")) pCPInfo->fWhiteSpace = 1;
776	else if (!strcmp(pszProperty, "XID_Continue")) pCPInfo->fXIDContinue = 1;
777	else if (!strcmp(pszProperty, "XID_Start")) pCPInfo->fXIDStart = 1;
778	/* DerivedNormalizationProps: */
779	else if (!strcmp(pszProperty, "FC_NFKC")) return; /* ignored */
780	else if (!strcmp(pszProperty, "Full_Composition_Exclusion")) pCPInfo->fFullCompositionExclusion = 1;
781	else if (!strcmp(pszProperty, "NFC_QC")) pCPInfo->fInvNFC_QC = YesNoMaybePropertyValueInv(&pszNextField);
782	else if (!strcmp(pszProperty, "NFD_QC")) pCPInfo->fInvNFD_QC = YesNoMaybePropertyValueInv(&pszNextField);
783	else if (!strcmp(pszProperty, "NFKC_QC")) pCPInfo->fInvNFKC_QC = YesNoMaybePropertyValueInv(&pszNextField);
784	else if (!strcmp(pszProperty, "NFKD_QC")) pCPInfo->fInvNFKD_QC = YesNoMaybePropertyValueInv(&pszNextField);
785	else if (!strcmp(pszProperty, "Expands_On_NFC")) pCPInfo->fExpandsOnNFC = 1;
786	else if (!strcmp(pszProperty, "Expands_On_NFD")) pCPInfo->fExpandsOnNFD = 1;
787	else if (!strcmp(pszProperty, "Expands_On_NFKC")) pCPInfo->fExpandsOnNFKC = 1;
788	else if (!strcmp(pszProperty, "Expands_On_NFKD")) pCPInfo->fExpandsOnNFKD = 1;
789	else if (!strcmp(pszProperty, "NFKC_CF")) return; /ignore /
790	else if (!strcmp(pszProperty, "Changes_When_NFKC_Casefolded")) return; /ignore /
791	else
792	{
793	ParseError("Unknown property '%s'\n", pszProperty);
794	return;
795	}
796
797	if (pszNextField && *pszNextField)
798	ParseError("Unexpected next field: '%s'\n", pszNextField);
799	}
800
801
802	/**
803	* Reads a property file.
804	*
805	* There are several property files, this code can read all
806	* of those but will only make use of the properties it recognizes.
807	*
808	* @returns 0 on success.
809	* @returns !0 on failure.
810	* @param pszBasePath The base path, can be NULL.
811	* @param pszFilename The name of the file.
812	*/
813	static int ReadProperties(const char pszBasePath, const char pszFilename)
814	{
815	/*
816	* Open input.
817	*/
818	FILE *pFile = OpenFile(pszBasePath, pszFilename);
819	if (!pFile)
820	return 1;
821
822	/*
823	* Parse the input and spit out the output.
824	*/
825	char szLine[4096];
826	while (GetLineFromFile(szLine, sizeof(szLine), pFile) != NULL)
827	{
828	if (IsCommentOrBlankLine(szLine))
829	continue;
830	char *pszCurField;
831	char *pszRange = FirstField(&pszCurField, StripLine(szLine));
832	char *pszProperty = NextField(&pszCurField);
833	if (!*pszProperty)
834	{
835	ParseError("no property field.\n");
836	continue;
837	}
838
839	RTUNICP LastCP;
840	RTUNICP StartCP = ToRange(pszRange, &LastCP);
841	if (StartCP == ~(RTUNICP)0)
842	continue;
843
844	while (StartCP <= LastCP)
845	ApplyProperty(StartCP++, pszProperty, pszCurField);
846	}
847
848	CloseFile(pFile);
849
850	return 0;
851	}
852
853
854	/**
855	* Append a flag to the string.
856	*/
857	static char AppendFlag(char psz, const char *pszFlag)
858	{
859	char *pszEnd = strchr(psz, '\0');
860	if (pszEnd != psz)
861	{
862	*pszEnd++ = ' ';
863	*pszEnd++ = '\|';
864	*pszEnd++ = ' ';
865	}
866	strcpy(pszEnd, pszFlag);
867	return psz;
868	}
869
870	/**
871	* Calcs the flags for a code point.
872	* @returns true if there is a flag.
873	* @returns false if the isn't.
874	*/
875	static bool CalcFlags(struct CPINFO pInfo, char pszFlags)
876	{
877	pszFlags[0] = '\0';
878	/** @todo read the specs on this other vs standard stuff, and check out the finer points */
879	if (pInfo->fAlphabetic \|\| pInfo->fOtherAlphabetic)
880	AppendFlag(pszFlags, "RTUNI_ALPHA");
881	if (pInfo->fHexDigit \|\| pInfo->fASCIIHexDigit)
882	AppendFlag(pszFlags, "RTUNI_XDIGIT");
883	if (!strcmp(pInfo->pszGeneralCategory, "Nd"))
884	AppendFlag(pszFlags, "RTUNI_DDIGIT");
885	if (pInfo->fWhiteSpace)
886	AppendFlag(pszFlags, "RTUNI_WSPACE");
887	if (pInfo->fUppercase \|\| pInfo->fOtherUppercase)
888	AppendFlag(pszFlags, "RTUNI_UPPER");
889	if (pInfo->fLowercase \|\| pInfo->fOtherLowercase)
890	AppendFlag(pszFlags, "RTUNI_LOWER");
891	//if (pInfo->???)
892	// AppendFlag(pszFlags, "RTUNI_BSPACE");
893	#if 0
894	if (pInfo->fInvNFD_QC != 0 \|\| pInfo->fInvNFC_QC != 0)
895	{
896	AppendFlag(pszFlags, "RTUNI_QC_NFX");
897	if (!pInfo->paDecompositionMapping && pInfo->fInvNFD_QC)
898	fprintf(stderr, "uniread: U+%05X is QC_NFD but has no mappings.\n", pInfo->CodePoint);
899	else if (*pInfo->pszDecompositionType && pInfo->fInvNFD_QC)
900	fprintf(stderr, "uniread: U+%05X is QC_NFD but has no canonical mappings.\n", pInfo->CodePoint);
901	}
902	else if (pInfo->paDecompositionMapping && !*pInfo->pszDecompositionType)
903	fprintf(stderr, "uniread: U+%05X is not QC_NFX but has canonical mappings.\n", pInfo->CodePoint);
904	#endif
905
906	if (!*pszFlags)
907	{
908	pszFlags[0] = '0';
909	pszFlags[1] = '\0';
910	return false;
911	}
912	return true;
913	}
914
915
916	/**
917	* Closes the primary output stream.
918	*/
919	static int Stream1Close(void)
920	{
921	if (g_pCurOutFile && g_pCurOutFile != stdout && g_pCurOutFile != stderr)
922	{
923	if (fclose(g_pCurOutFile) != 0)
924	{
925	fprintf(stderr, "Error closing output file.\n");
926	return -1;
927	}
928	}
929	g_pCurOutFile = NULL;
930	return 0;
931	}
932
933
934	/**
935	* Initializes the 1st stream to output to a given file.
936	*/
937	static int Stream1Init(const char *pszName)
938	{
939	int rc = Stream1Close();
940	if (!rc)
941	{
942	g_pCurOutFile = fopen(pszName, "w");
943	if (!g_pCurOutFile)
944	{
945	fprintf(stderr, "Error opening output file '%s'.\n", pszName);
946	rc = -1;
947	}
948	}
949	return rc;
950	}
951
952
953	/**
954	* printf wrapper for the primary output stream.
955	*
956	* @returns See vfprintf.
957	* @param pszFormat The vfprintf format string.
958	* @param ... The format arguments.
959	*/
960	static int Stream1Printf(const char *pszFormat, ...)
961	{
962	int cch;
963	va_list va;
964	va_start(va, pszFormat);
965	cch = vfprintf(g_pCurOutFile, pszFormat, va);
966	va_end(va);
967	return cch;
968	}
969
970
971	/** the data store for stream two. */
972	static char g_szStream2[10240];
973	static unsigned volatile g_offStream2 = 0;
974
975	/**
976	* Initializes the 2nd steam.
977	*/
978	static void Stream2Init(void)
979	{
980	g_szStream2[0] = '\0';
981	g_offStream2 = 0;
982	}
983
984	/**
985	* Flushes the 2nd stream to stdout.
986	*/
987	static int Stream2Flush(void)
988	{
989	g_szStream2[g_offStream2] = '\0';
990	Stream1Printf("%s", g_szStream2);
991	Stream2Init();
992	return 0;
993	}
994
995	/**
996	* printf to the 2nd stream.
997	*/
998	static int Stream2Printf(const char *pszFormat, ...)
999	{
1000	unsigned offStream2 = g_offStream2;
1001	va_list va;
1002	va_start(va, pszFormat);
1003	int cch = vsprintf(&g_szStream2[offStream2], pszFormat, va);
1004	va_end(va);
1005	offStream2 += cch;
1006	if (offStream2 >= sizeof(g_szStream2))
1007	{
1008	fprintf(stderr, "error: stream2 overflow!\n");
1009	exit(1);
1010	}
1011	g_offStream2 = offStream2;
1012	return cch;
1013	}
1014
1015
1016	/**
1017	* Print the unidata.cpp file header and include list.
1018	*/
1019	int PrintHeader(const char argv0, const char pszBaseDir)
1020	{
1021	char szBuf[1024];
1022	if (!pszBaseDir)
1023	{
1024	memset(szBuf, 0, sizeof(szBuf));
1025	#ifdef _MSC_VER
1026	_getcwd(szBuf, sizeof(szBuf));
1027	#else
1028	getcwd(szBuf, sizeof(szBuf));
1029	#endif
1030	pszBaseDir = szBuf;
1031	}
1032
1033	Stream1Printf("/* $" "Id" "$ */\n"
1034	"/** @file\n"
1035	" * IPRT - Unicode Tables.\n"
1036	" *\n"
1037	" * Automatically Generated from %s\n"
1038	" * by %s (" __DATE__ " " __TIME__ ")\n"
1039	" */\n"
1040	"\n"
1041	"/*\n"
1042	" * Copyright (C) 2006-2015 Oracle Corporation \n"
1043	" *\n"
1044	" * This file is part of VirtualBox Open Source Edition (OSE), as\n"
1045	" * available from http://www.virtualbox.org. This file is free software;\n"
1046	" * you can redistribute it and/or modify it under the terms of the GNU\n"
1047	" * General Public License (GPL) as published by the Free Software\n"
1048	" * Foundation, in version 2 as it comes in the \"COPYING\" file of the\n"
1049	" * VirtualBox OSE distribution. VirtualBox OSE is distributed in the\n"
1050	" * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.\n"
1051	" *\n"
1052	" * The contents of this file may alternatively be used under the terms\n"
1053	" * of the Common Development and Distribution License Version 1.0\n"
1054	" * (CDDL) only, as it comes in the \"COPYING.CDDL\" file of the\n"
1055	" * VirtualBox OSE distribution, in which case the provisions of the\n"
1056	" * CDDL are applicable instead of those of the GPL.\n"
1057	" *\n"
1058	" * You may elect to license modified versions of this file under the\n"
1059	" * terms and conditions of either the GPL or the CDDL or both.\n"
1060	" */\n"
1061	"\n"
1062	"#include <iprt/uni.h>\n"
1063	"\n",
1064	pszBaseDir, argv0);
1065	return 0;
1066	}
1067
1068
1069	/**
1070	* Print the flag tables.
1071	*/
1072	int PrintFlags(void)
1073	{
1074	/*
1075	* Print flags table.
1076	*/
1077	Stream2Init();
1078	Stream2Printf("RT_DECL_DATA_CONST(const RTUNIFLAGSRANGE) g_aRTUniFlagsRanges[] =\n"
1079	"{\n");
1080	RTUNICP i = 0;
1081	int iStart = -1;
1082	while (i < RT_ELEMENTS(g_aCPInfo))
1083	{
1084	/* figure how far off the next chunk is */
1085	char szFlags[256];
1086	unsigned iNonNull = i;
1087	while ( iNonNull < RT_ELEMENTS(g_aCPInfo)
1088	&& iNonNull >= 256
1089	&& (g_aCPInfo[iNonNull].fNullEntry \|\| !CalcFlags(&g_aCPInfo[iNonNull], szFlags)) )
1090	iNonNull++;
1091	if (iNonNull - i > 4096 \|\| iNonNull == RT_ELEMENTS(g_aCPInfo))
1092	{
1093	if (iStart >= 0)
1094	{
1095	Stream1Printf("};\n\n");
1096	Stream2Printf(" { 0x%06x, 0x%06x, &g_afRTUniFlags0x%06x[0] },\n", iStart, i, iStart);
1097	iStart = -1;
1098	}
1099	i = iNonNull;
1100	}
1101	else
1102	{
1103	if (iStart < 0)
1104	{
1105	Stream1Printf("static const uint8_t g_afRTUniFlags0x%06x[] =\n"
1106	"{\n", i);
1107	iStart = i;
1108	}
1109	CalcFlags(&g_aCPInfo[i], szFlags);
1110	Stream1Printf(" %50s, /* U+%06x: %s*/\n", szFlags, g_aCPInfo[i].CodePoint, g_aCPInfo[i].pszName);
1111	i++;
1112	}
1113	}
1114	Stream2Printf(" { ~(RTUNICP)0, ~(RTUNICP)0, NULL }\n"
1115	"};\n\n\n");
1116	Stream1Printf("\n");
1117	return Stream2Flush();
1118	}
1119
1120
1121	/**
1122	* Prints the upper case tables.
1123	*/
1124	static int PrintUpper(void)
1125	{
1126	Stream2Init();
1127	Stream2Printf("RT_DECL_DATA_CONST(const RTUNICASERANGE) g_aRTUniUpperRanges[] =\n"
1128	"{\n");
1129	RTUNICP i = 0;
1130	int iStart = -1;
1131	while (i < RT_ELEMENTS(g_aCPInfo))
1132	{
1133	/* figure how far off the next chunk is */
1134	unsigned iSameCase = i;
1135	while ( iSameCase < RT_ELEMENTS(g_aCPInfo)
1136	&& g_aCPInfo[iSameCase].SimpleUpperCaseMapping == g_aCPInfo[iSameCase].CodePoint
1137	&& iSameCase >= 256)
1138	iSameCase++;
1139	if (iSameCase - i > 4096/sizeof(RTUNICP) \|\| iSameCase == RT_ELEMENTS(g_aCPInfo))
1140	{
1141	if (iStart >= 0)
1142	{
1143	Stream1Printf("};\n\n");
1144	Stream2Printf(" { 0x%06x, 0x%06x, &g_afRTUniUpper0x%06x[0] },\n", iStart, i, iStart);
1145	iStart = -1;
1146	}
1147	i = iSameCase;
1148	}
1149	else
1150	{
1151	if (iStart < 0)
1152	{
1153	Stream1Printf("static const RTUNICP g_afRTUniUpper0x%06x[] =\n"
1154	"{\n", i);
1155	iStart = i;
1156	}
1157	Stream1Printf(" 0x%02x, /* U+%06x: %s*/\n", g_aCPInfo[i].SimpleUpperCaseMapping, g_aCPInfo[i].CodePoint, g_aCPInfo[i].pszName);
1158	i++;
1159	}
1160	}
1161	Stream2Printf(" { ~(RTUNICP)0, ~(RTUNICP)0, NULL }\n"
1162	"};\n\n\n");
1163	Stream1Printf("\n");
1164	return Stream2Flush();
1165	}
1166
1167
1168	/**
1169	* Prints the lowercase tables.
1170	*/
1171	static int PrintLower(void)
1172	{
1173	Stream2Init();
1174	Stream2Printf("RT_DECL_DATA_CONST(const RTUNICASERANGE) g_aRTUniLowerRanges[] =\n"
1175	"{\n");
1176	RTUNICP i = 0;
1177	int iStart = -1;
1178	while (i < RT_ELEMENTS(g_aCPInfo))
1179	{
1180	/* figure how far off the next chunk is */
1181	unsigned iSameCase = i;
1182	while ( iSameCase < RT_ELEMENTS(g_aCPInfo)
1183	&& g_aCPInfo[iSameCase].SimpleLowerCaseMapping == g_aCPInfo[iSameCase].CodePoint
1184	&& iSameCase >= 256)
1185	iSameCase++;
1186	if (iSameCase - i > 4096/sizeof(RTUNICP) \|\| iSameCase == RT_ELEMENTS(g_aCPInfo))
1187	{
1188	if (iStart >= 0)
1189	{
1190	Stream1Printf("};\n\n");
1191	Stream2Printf(" { 0x%06x, 0x%06x, &g_afRTUniLower0x%06x[0] },\n", iStart, i, iStart);
1192	iStart = -1;
1193	}
1194	i = iSameCase;
1195	}
1196	else
1197	{
1198	if (iStart < 0)
1199	{
1200	Stream1Printf("static const RTUNICP g_afRTUniLower0x%06x[] =\n"
1201	"{\n", i);
1202	iStart = i;
1203	}
1204	Stream1Printf(" 0x%02x, /* U+%06x: %s*/\n",
1205	g_aCPInfo[i].SimpleLowerCaseMapping, g_aCPInfo[i].CodePoint, g_aCPInfo[i].pszName);
1206	i++;
1207	}
1208	}
1209	Stream2Printf(" { ~(RTUNICP)0, ~(RTUNICP)0, NULL }\n"
1210	"};\n\n\n");
1211	Stream1Printf("\n");
1212	return Stream2Flush();
1213	}
1214
1215
1216	int main(int argc, char **argv)
1217	{
1218	/*
1219	* Parse args.
1220	*/
1221	if (argc <= 1)
1222	{
1223	printf("usage: %s [-C\|--dir <UCD-dir>] [UnicodeData.txt [DerivedCoreProperties.txt [PropList.txt] [DerivedNormalizationProps.txt]]]\n",
1224	argv[0]);
1225	return 1;
1226	}
1227
1228	const char *pszBaseDir = NULL;
1229	const char *pszUnicodeData = "UnicodeData.txt";
1230	const char *pszDerivedCoreProperties = "DerivedCoreProperties.txt";
1231	const char *pszPropList = "PropList.txt";
1232	const char *pszDerivedNormalizationProps = "DerivedNormalizationProps.txt";
1233	int iFile = 0;
1234	for (int argi = 1; argi < argc; argi++)
1235	{
1236	if (argv[argi][0] != '-')
1237	{
1238	switch (iFile++)
1239	{
1240	case 0: pszUnicodeData = argv[argi]; break;
1241	case 1: pszDerivedCoreProperties = argv[argi]; break;
1242	case 2: pszPropList = argv[argi]; break;
1243	case 3: pszDerivedNormalizationProps = argv[argi]; break;
1244	default:
1245	fprintf(stderr, "uniread: syntax error at '%s': too many filenames\n", argv[argi]);
1246	return 1;
1247	}
1248	}
1249	else if ( !strcmp(argv[argi], "--dir")
1250	\|\| !strcmp(argv[argi], "-C"))
1251	{
1252	if (argi + 1 >= argc)
1253	{
1254	fprintf(stderr, "uniread: syntax error: '%s' is missing the directory name.\n", argv[argi]);
1255	return 1;
1256	}
1257	argi++;
1258	pszBaseDir = argv[argi];
1259	}
1260	else
1261	{
1262	fprintf(stderr, "uniread: syntax error at '%s': Unknown argument\n", argv[argi]);
1263	return 1;
1264	}
1265	}
1266
1267	/*
1268	* Read the data.
1269	*/
1270	int rc = ReadUnicodeData(pszBaseDir, pszUnicodeData);
1271	if (rc)
1272	return rc;
1273	rc = GenerateExcludedData();
1274	if (rc)
1275	return rc;
1276	rc = ReadProperties(pszBaseDir, pszPropList);
1277	if (rc)
1278	return rc;
1279	rc = ReadProperties(pszBaseDir, pszDerivedCoreProperties);
1280	if (rc)
1281	return rc;
1282	rc = ReadProperties(pszBaseDir, pszDerivedNormalizationProps);
1283	if (rc)
1284	return rc;
1285
1286	/*
1287	* Produce output files.
1288	*/
1289	rc = Stream1Init("unidata-flags.cpp");
1290	if (!rc)
1291	rc = PrintHeader(argv[0], pszBaseDir);
1292	if (!rc)
1293	rc = PrintFlags();
1294
1295	rc = Stream1Init("unidata-upper.cpp");
1296	if (!rc)
1297	rc = PrintHeader(argv[0], pszBaseDir);
1298	if (!rc)
1299	rc = PrintUpper();
1300
1301	rc = Stream1Init("unidata-lower.cpp");
1302	if (!rc)
1303	rc = PrintHeader(argv[0], pszBaseDir);
1304	if (!rc)
1305	rc = PrintLower();
1306	if (!rc)
1307	rc = Stream1Close();
1308
1309	/* done */
1310	return rc;
1311	}
1312

Note: See TracBrowser for help on using the repository browser.

source: vbox/trunk/src/VBox/Runtime/common/string/uniread.cpp@ 59706

Download in other formats: