VirtualBox

source: vbox/trunk/src/VBox/Runtime/common/string/uniread.cpp@ 78052

Last change on this file since 78052 was 76553, checked in by vboxsync, 6 years ago

scm --update-copyright-year

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id Revision
File size: 41.4 KB
Line 
1/* $Id: uniread.cpp 76553 2019-01-01 01:45:53Z vboxsync $ */
2/** @file
3 * IPRT - Unicode Specification Reader.
4 */
5
6/*
7 * Copyright (C) 2006-2019 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 *
17 * The contents of this file may alternatively be used under the terms
18 * of the Common Development and Distribution License Version 1.0
19 * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
20 * VirtualBox OSE distribution, in which case the provisions of the
21 * CDDL are applicable instead of those of the GPL.
22 *
23 * You may elect to license modified versions of this file under the
24 * terms and conditions of either the GPL or the CDDL or both.
25 */
26
27
28/*********************************************************************************************************************************
29* Header Files *
30*********************************************************************************************************************************/
31#include <iprt/types.h>
32#include <iprt/stdarg.h>
33#include <iprt/ctype.h>
34
35#include <stdio.h>
36#include <string.h>
37#include <stdlib.h>
38#ifdef _MSC_VER
39# include <direct.h>
40#else
41# include <unistd.h>
42#endif
43
44
45/*********************************************************************************************************************************
46* Global Variables *
47*********************************************************************************************************************************/
48/** The file we're currently parsing. */
49static const char *g_pszCurFile;
50/** The current line number. */
51static unsigned g_iLine;
52/** The current output file. */
53static FILE *g_pCurOutFile;
54
55
56/**
57 * Exit the program after printing a parse error.
58 *
59 * @param pszFormat The message.
60 * @param ... Format arguments.
61 */
62static DECL_NO_RETURN(void) ParseError(const char *pszFormat, ...)
63{
64 va_list va;
65 va_start(va, pszFormat);
66 fprintf(stderr, "parse error: %s:%u: ", g_pszCurFile, g_iLine);
67 vfprintf(stderr, pszFormat, va);
68 va_end(va);
69 exit(1);
70}
71
72/**
73 * Strip a line.
74 * @returns pointer to first non-blank char.
75 * @param pszLine The line string to strip.
76 */
77static char *StripLine(char *pszLine)
78{
79 while (*pszLine == ' ' || *pszLine == '\t')
80 pszLine++;
81
82 char *psz = strchr(pszLine, '#');
83 if (psz)
84 *psz = '\0';
85 else
86 psz = strchr(pszLine, '\0');
87 while (psz > pszLine)
88 {
89 switch (psz[-1])
90 {
91 case ' ':
92 case '\t':
93 case '\n':
94 case '\r':
95 *--psz = '\0';
96 continue;
97 }
98 break;
99 }
100
101 return pszLine;
102}
103
104
105/**
106 * Checks if the line is blank or a comment line and should be skipped.
107 * @returns true/false.
108 * @param pszLine The line to consider.
109 */
110static bool IsCommentOrBlankLine(const char *pszLine)
111{
112 while (*pszLine == ' ' || *pszLine == '\t' || *pszLine == '\n' || *pszLine == '\r')
113 pszLine++;
114 return *pszLine == '#' || *pszLine == '\0';
115}
116
117
118/**
119 * Get the first field in the string.
120 *
121 * @returns Pointer to the next field.
122 * @param ppsz Where to store the pointer to the next field.
123 * @param pszLine The line string. (could also be *ppsz from a FirstNext call)
124 */
125static char *FirstField(char **ppsz, char *pszLine)
126{
127 char *psz = strchr(pszLine, ';');
128 if (!psz)
129 *ppsz = psz = strchr(pszLine, '\0');
130 else
131 {
132 *psz = '\0';
133 *ppsz = psz + 1;
134 }
135
136 /* strip */
137 while (*pszLine == ' ' || *pszLine == '\t' || *pszLine == '\r' || *pszLine == '\n')
138 pszLine++;
139 while (psz > pszLine)
140 {
141 switch (psz[-1])
142 {
143 case ' ':
144 case '\t':
145 case '\n':
146 case '\r':
147 *--psz = '\0';
148 continue;
149 }
150 break;
151 }
152 return pszLine;
153}
154
155
156/**
157 * Get the next field in a field enumeration.
158 *
159 * @returns Pointer to the next field.
160 * @param ppsz Where to get and store the string position.
161 */
162static char *NextField(char **ppsz)
163{
164 return FirstField(ppsz, *ppsz);
165}
166
167
168/**
169 * Splits a decomposition field.
170 *
171 * This may start with a type that is enclosed in angle brackets.
172 *
173 * @returns Pointer to the mapping values following the type. @a *ppsz if empty.
174 * @param ppszType Pointer to the type field pointer. On input the type
175 * field contains the combined type and mapping string. On
176 * output this should only contain the type, no angle
177 * brackets. If no type specified, it is replaced with an
178 * empty string (const).
179 */
180static char *SplitDecompField(char **ppszType)
181{
182 /* Empty field? */
183 char *psz = *ppszType;
184 if (!*psz)
185 return psz;
186
187 /* No type? */
188 if (*psz != '<')
189 {
190 *ppszType = (char *)"";
191 return psz;
192 }
193
194 /* Split out the type. */
195 *ppszType = ++psz;
196 psz = strchr(psz, '>');
197 if (!psz)
198 {
199 ParseError("Bad Decomposition Type/Mappings\n");
200 /* not reached: return *ppszType; */
201 }
202 *psz++ = '\0';
203
204 psz = StripLine(psz);
205 if (!*psz)
206 ParseError("Missing decomposition mappings\n");
207 return psz;
208}
209
210/**
211 * Converts a code point field to a number.
212 * @returns Code point.
213 * @param psz The field string.
214 */
215static RTUNICP ToNum(const char *psz)
216{
217 char *pszEnd = NULL;
218 unsigned long ul = strtoul(psz, &pszEnd, 16);
219 if (pszEnd && *pszEnd)
220 ParseError("failed converting '%s' to a number!\n", psz);
221 return (RTUNICP)ul;
222}
223
224
225/**
226 * Same as ToNum except that if the field is empty the Default is returned.
227 */
228static RTUNICP ToNumDefault(const char *psz, RTUNICP Default)
229{
230 if (*psz)
231 return ToNum(psz);
232 return Default;
233}
234
235
236/**
237 * Converts a code point range to numbers.
238 * @returns The start code point.\
239 * @returns ~(RTUNICP)0 on failure.
240 * @param psz The field string.
241 * @param pLast Where to store the last code point in the range.
242 */
243static RTUNICP ToRange(const char *psz, PRTUNICP pLast)
244{
245 char *pszEnd = NULL;
246 unsigned long ulStart = strtoul(psz, &pszEnd, 16);
247 unsigned long ulLast = ulStart;
248 if (pszEnd && *pszEnd)
249 {
250 if (*pszEnd == '.')
251 {
252 while (*pszEnd == '.')
253 pszEnd++;
254 ulLast = strtoul(pszEnd, &pszEnd, 16);
255 if (pszEnd && *pszEnd)
256 {
257 ParseError("failed converting '%s' to a number!\n", psz);
258 /* not reached: return ~(RTUNICP)0;*/
259 }
260 }
261 else
262 {
263 ParseError("failed converting '%s' to a number!\n", psz);
264 /* not reached: return ~(RTUNICP)0; */
265 }
266 }
267 *pLast = (RTUNICP)ulLast;
268 return (RTUNICP)ulStart;
269
270}
271
272/**
273 * For converting the decomposition mappings field and similar.
274 *
275 * @returns Mapping array or NULL if none.
276 * @param psz The string to convert. Can be empty.
277 * @param pcEntries Where to store the number of entries.
278 * @param cMax The max number of entries.
279 */
280static PRTUNICP ToMapping(char *psz, unsigned *pcEntries, unsigned cMax)
281{
282 PRTUNICP paCps = NULL;
283 unsigned cAlloc = 0;
284 unsigned i = 0;
285
286 /* Convert the code points. */
287 while (psz)
288 {
289 /* skip leading spaces */
290 while (RT_C_IS_BLANK(*psz))
291 psz++;
292
293 /* the end? */
294 if (!*psz)
295 break;
296
297 /* room left? */
298 if (i >= cMax)
299 {
300 ParseError("Too many mappings.\n");
301 /* not reached: break; */
302 }
303 if (i >= cAlloc)
304 {
305 cAlloc += 4;
306 paCps = (PRTUNICP)realloc(paCps, cAlloc * sizeof(paCps[0]));
307 if (!paCps)
308 {
309 fprintf(stderr, "out of memory (%u)\n", (unsigned)(cAlloc * sizeof(paCps[0])));
310 exit(1);
311 }
312 }
313
314 /* Find the end. */
315 char *pszThis = psz;
316 while (RT_C_IS_XDIGIT(*psz))
317 psz++;
318 if (*psz && !RT_C_IS_BLANK(*psz))
319 ParseError("Malformed mappings.\n");
320 if (*psz)
321 *psz++ = '\0';
322
323 /* Convert to number and add it. */
324 paCps[i++] = ToNum(pszThis);
325 }
326
327 *pcEntries = i;
328 return paCps;
329}
330
331
332/**
333 * Duplicate a string, optimize certain strings to save memory.
334 *
335 * @returns Pointer to string copy.
336 * @param pszStr The string to duplicate.
337 */
338static char *DupStr(const char *pszStr)
339{
340 if (!*pszStr)
341 return (char*)"";
342 char *psz = strdup(pszStr);
343 if (psz)
344 return psz;
345
346 fprintf(stderr, "out of memory!\n");
347 exit(1);
348}
349
350
351/**
352 * Array of all possible and impossible unicode code points as of 4.1
353 */
354struct CPINFO
355{
356 RTUNICP CodePoint;
357 RTUNICP SimpleUpperCaseMapping;
358 RTUNICP SimpleLowerCaseMapping;
359 RTUNICP SimpleTitleCaseMapping;
360 unsigned CanonicalCombiningClass;
361 const char *pszDecompositionType;
362 unsigned cDecompositionMapping;
363 PRTUNICP paDecompositionMapping;
364 const char *pszName;
365 /** Set if this is an unused entry */
366 unsigned fNullEntry : 1;
367
368 unsigned fAlphabetic : 1;
369 unsigned fASCIIHexDigit : 1;
370 unsigned fBidiControl : 1;
371 unsigned fCaseIgnorable : 1;
372 unsigned fCased : 1;
373 unsigned fChangesWhenCasefolded : 1;
374 unsigned fChangesWhenCasemapped : 1;
375 unsigned fChangesWhenLowercased : 1;
376 unsigned fChangesWhenTitlecased : 1;
377 unsigned fChangesWhenUppercased : 1;
378 unsigned fDash : 1;
379 unsigned fDefaultIgnorableCodePoint : 1;
380 unsigned fDeprecated : 1;
381 unsigned fDiacritic : 1;
382 unsigned fExtender : 1;
383 unsigned fGraphemeBase : 1;
384 unsigned fGraphemeExtend : 1;
385 unsigned fGraphemeLink : 1;
386 unsigned fHexDigit : 1;
387 unsigned fHyphen : 1;
388 unsigned fIDContinue : 1;
389 unsigned fIdeographic : 1;
390 unsigned fIDSBinaryOperator : 1;
391 unsigned fIDStart : 1;
392 unsigned fIDSTrinaryOperator : 1;
393 unsigned fJoinControl : 1;
394 unsigned fLogicalOrderException : 1;
395 unsigned fLowercase : 1;
396 unsigned fMath : 1;
397 unsigned fNoncharacterCodePoint : 1;
398 unsigned fOtherAlphabetic : 1;
399 unsigned fOtherDefaultIgnorableCodePoint : 1;
400 unsigned fOtherGraphemeExtend : 1;
401 unsigned fOtherIDContinue : 1;
402 unsigned fOtherIDStart : 1;
403 unsigned fOtherLowercase : 1;
404 unsigned fOtherMath : 1;
405 unsigned fOtherUppercase : 1;
406 unsigned fPatternSyntax : 1;
407 unsigned fPatternWhiteSpace : 1;
408 unsigned fQuotationMark : 1;
409 unsigned fRadical : 1;
410 unsigned fSoftDotted : 1;
411 unsigned fSTerm : 1;
412 unsigned fTerminalPunctuation : 1;
413 unsigned fUnifiedIdeograph : 1;
414 unsigned fUppercase : 1;
415 unsigned fVariationSelector : 1;
416 unsigned fWhiteSpace : 1;
417 unsigned fXIDContinue : 1;
418 unsigned fXIDStart : 1;
419
420 /** @name DerivedNormalizationProps.txt
421 * @{ */
422 unsigned fFullCompositionExclusion : 1;
423 unsigned fInvNFC_QC : 2; /**< If 1 (NFC_QC == N) then code point 100% sure not part of NFC string. */
424 unsigned fInvNFD_QC : 2; /**< If 1 (NFD_QC == N) then code point 100% sure not part of NFD string. */
425 unsigned fInvNFKC_QC : 2;
426 unsigned fInvNFKD_QC : 2;
427 unsigned fExpandsOnNFC : 1;
428 unsigned fExpandsOnNFD : 1;
429 unsigned fExpandsOnNFKC : 1;
430 unsigned fExpandsOnNFKD : 1;
431 /** @} */
432
433 /* unprocessed stuff, so far. */
434 const char *pszGeneralCategory;
435 const char *pszBidiClass;
436 const char *pszNumericType;
437 const char *pszNumericValueD;
438 const char *pszNumericValueN;
439 const char *pszBidiMirrored;
440 const char *pszUnicode1Name;
441 const char *pszISOComment;
442} g_aCPInfo[0x110000];
443
444
445/**
446 * Creates a 'null' entry at i.
447 * @param i The entry in question.
448 */
449static void NullEntry(unsigned i)
450{
451 g_aCPInfo[i].CodePoint = i;
452 g_aCPInfo[i].fNullEntry = 1;
453 g_aCPInfo[i].SimpleUpperCaseMapping = i;
454 g_aCPInfo[i].SimpleLowerCaseMapping = i;
455 g_aCPInfo[i].SimpleTitleCaseMapping = i;
456 g_aCPInfo[i].pszDecompositionType = "";
457 g_aCPInfo[i].cDecompositionMapping = 0;
458 g_aCPInfo[i].paDecompositionMapping = NULL;
459 g_aCPInfo[i].pszName = "";
460 g_aCPInfo[i].pszGeneralCategory = "";
461 g_aCPInfo[i].pszBidiClass = "";
462 g_aCPInfo[i].pszNumericType = "";
463 g_aCPInfo[i].pszNumericValueD = "";
464 g_aCPInfo[i].pszNumericValueN = "";
465 g_aCPInfo[i].pszBidiMirrored = "";
466 g_aCPInfo[i].pszUnicode1Name = "";
467 g_aCPInfo[i].pszISOComment = "";
468}
469
470
471/**
472 * Open a file for reading, optionally with a base path prefixed.
473 *
474 * @returns file stream on success, NULL w/ complaint on failure.
475 * @param pszBasePath The base path, can be NULL.
476 * @param pszFilename The name of the file to open.
477 */
478static FILE *OpenFile(const char *pszBasePath, const char *pszFilename)
479{
480 FILE *pFile;
481 if ( !pszBasePath
482 || *pszFilename == '/'
483#if defined(_MSC_VER) || defined(__OS2__)
484 || *pszFilename == '\\'
485 || (*pszFilename && pszFilename[1] == ':')
486#endif
487 )
488 {
489 pFile = fopen(pszFilename, "r");
490 if (!pFile)
491 fprintf(stderr, "uniread: failed to open '%s' for reading\n", pszFilename);
492 }
493 else
494 {
495 size_t cchBasePath = strlen(pszBasePath);
496 size_t cchFilename = strlen(pszFilename);
497 char *pszFullName = (char *)malloc(cchBasePath + 1 + cchFilename + 1);
498 if (!pszFullName)
499 {
500 fprintf(stderr, "uniread: failed to allocate %d bytes\n", (int)(cchBasePath + 1 + cchFilename + 1));
501 return NULL;
502 }
503
504 memcpy(pszFullName, pszBasePath, cchBasePath);
505 pszFullName[cchBasePath] = '/';
506 memcpy(&pszFullName[cchBasePath + 1], pszFilename, cchFilename + 1);
507
508 pFile = fopen(pszFullName, "r");
509 if (!pFile)
510 fprintf(stderr, "uniread: failed to open '%s' for reading\n", pszFullName);
511 free(pszFullName);
512 }
513 g_pszCurFile = pszFilename;
514 g_iLine = 0;
515 return pFile;
516}
517
518
519/**
520 * Wrapper around fgets that keep track of the line number.
521 *
522 * @returns See fgets.
523 * @param pszBuf The buffer. See fgets for output definition.
524 * @param cbBuf The buffer size.
525 * @param pFile The file to read from.
526 */
527static char *GetLineFromFile(char *pszBuf, int cbBuf, FILE *pFile)
528{
529 g_iLine++;
530 return fgets(pszBuf, cbBuf, pFile);
531}
532
533
534/**
535 * Closes a file opened by OpenFile
536 *
537 * @param pFile The file to close.
538 */
539static void CloseFile(FILE *pFile)
540{
541 g_pszCurFile = NULL;
542 g_iLine = 0;
543 fclose(pFile);
544}
545
546
547/**
548 * Read the UnicodeData.txt file.
549 * @returns 0 on success.
550 * @returns !0 on failure.
551 * @param pszBasePath The base path, can be NULL.
552 * @param pszFilename The name of the file.
553 */
554static int ReadUnicodeData(const char *pszBasePath, const char *pszFilename)
555{
556 /*
557 * Open input.
558 */
559 FILE *pFile = OpenFile(pszBasePath, pszFilename);
560 if (!pFile)
561 return 1;
562
563 /*
564 * Parse the input and spit out the output.
565 */
566 char szLine[4096];
567 RTUNICP i = 0;
568 while (GetLineFromFile(szLine, sizeof(szLine), pFile) != NULL)
569 {
570 if (IsCommentOrBlankLine(szLine))
571 continue;
572
573 char *pszCurField;
574 char *pszCodePoint = FirstField(&pszCurField, StripLine(szLine)); /* 0 */
575 char *pszName = NextField(&pszCurField); /* 1 */
576 char *pszGeneralCategory = NextField(&pszCurField); /* 2 */
577 char *pszCanonicalCombiningClass = NextField(&pszCurField); /* 3 */
578 char *pszBidiClass = NextField(&pszCurField); /* 4 */
579 char *pszDecompositionType = NextField(&pszCurField); /* 5 */
580 char *pszDecompositionMapping = SplitDecompField(&pszDecompositionType);
581 char *pszNumericType = NextField(&pszCurField); /* 6 */
582 char *pszNumericValueD = NextField(&pszCurField); /* 7 */
583 char *pszNumericValueN = NextField(&pszCurField); /* 8 */
584 char *pszBidiMirrored = NextField(&pszCurField); /* 9 */
585 char *pszUnicode1Name = NextField(&pszCurField); /* 10 */
586 char *pszISOComment = NextField(&pszCurField); /* 11 */
587 char *pszSimpleUpperCaseMapping = NextField(&pszCurField); /* 12 */
588 char *pszSimpleLowerCaseMapping = NextField(&pszCurField); /* 13 */
589 char *pszSimpleTitleCaseMapping = NextField(&pszCurField); /* 14 */
590
591 RTUNICP CodePoint = ToNum(pszCodePoint);
592 if (CodePoint >= RT_ELEMENTS(g_aCPInfo))
593 {
594 ParseError("U+05X is out of range\n", CodePoint);
595 /* not reached: continue;*/
596 }
597
598 /* catchup? */
599 while (i < CodePoint)
600 NullEntry(i++);
601 if (i != CodePoint)
602 {
603 ParseError("i=%d CodePoint=%u\n", i, CodePoint);
604 /* not reached: CloseFile(pFile);
605 return 1; */
606 }
607
608 /* this one */
609 g_aCPInfo[i].CodePoint = i;
610 g_aCPInfo[i].fNullEntry = 0;
611 g_aCPInfo[i].pszName = DupStr(pszName);
612 g_aCPInfo[i].SimpleUpperCaseMapping = ToNumDefault(pszSimpleUpperCaseMapping, CodePoint);
613 g_aCPInfo[i].SimpleLowerCaseMapping = ToNumDefault(pszSimpleLowerCaseMapping, CodePoint);
614 g_aCPInfo[i].SimpleTitleCaseMapping = ToNumDefault(pszSimpleTitleCaseMapping, CodePoint);
615 g_aCPInfo[i].CanonicalCombiningClass = ToNum(pszCanonicalCombiningClass);
616 g_aCPInfo[i].pszDecompositionType = DupStr(pszDecompositionType);
617 g_aCPInfo[i].paDecompositionMapping = ToMapping(pszDecompositionMapping, &g_aCPInfo[i].cDecompositionMapping, 20);
618 g_aCPInfo[i].pszGeneralCategory = DupStr(pszGeneralCategory);
619 g_aCPInfo[i].pszBidiClass = DupStr(pszBidiClass);
620 g_aCPInfo[i].pszNumericType = DupStr(pszNumericType);
621 g_aCPInfo[i].pszNumericValueD = DupStr(pszNumericValueD);
622 g_aCPInfo[i].pszNumericValueN = DupStr(pszNumericValueN);
623 g_aCPInfo[i].pszBidiMirrored = DupStr(pszBidiMirrored);
624 g_aCPInfo[i].pszUnicode1Name = DupStr(pszUnicode1Name);
625 g_aCPInfo[i].pszISOComment = DupStr(pszISOComment);
626 i++;
627 }
628
629 /* catchup? */
630 while (i < RT_ELEMENTS(g_aCPInfo))
631 NullEntry(i++);
632 CloseFile(pFile);
633
634 return 0;
635}
636
637
638/**
639 * Generates excluded data.
640 *
641 * @returns 0 on success, exit code on failure.
642 */
643static int GenerateExcludedData(void)
644{
645 /*
646 * Hangul Syllables U+AC00 to U+D7A3.
647 */
648 for (RTUNICP i = 0xac00; i <= 0xd7a3; i++)
649 {
650 g_aCPInfo[i].fNullEntry = 0;
651 g_aCPInfo[i].fInvNFD_QC = 1;
652 /** @todo generate the decomposition: http://unicode.org/reports/tr15/#Hangul
653 * */
654 }
655
656 /** @todo
657 * CJK Ideographs Extension A (U+3400 - U+4DB5)
658 * CJK Ideographs (U+4E00 - U+9FA5)
659 * CJK Ideograph Extension B (U+20000 - U+2A6D6)
660 * CJK Ideograph Extension C (U+2A700 - U+2B734)
661 */
662
663 return 0;
664}
665
666
667
668/**
669 * Worker for ApplyProperty that handles a yes, no, maybe property value.
670 *
671 * @returns 0 (NO), 1 (YES), 2 (MAYBE).
672 * @param ppszNextField The field cursor, input and output.
673 */
674static int YesNoMaybePropertyValue(char **ppszNextField)
675{
676 if (!**ppszNextField)
677 ParseError("Missing Y/N/M field\n");
678 else
679 {
680 char *psz = NextField(ppszNextField);
681 if (!strcmp(psz, "N"))
682 return 0;
683 if (!strcmp(psz, "Y"))
684 return 1;
685 if (!strcmp(psz, "M"))
686 return 2;
687 ParseError("Unexpected Y/N/M value: '%s'\n", psz);
688 }
689 /* not reached: return 0; */
690}
691
692
693/**
694 * Inverted version of YesNoMaybePropertyValue
695 *
696 * @returns 1 (NO), 0 (YES), 2 (MAYBE).
697 * @param ppszNextField The field cursor, input and output.
698 */
699static int YesNoMaybePropertyValueInv(char **ppszNextField)
700{
701 unsigned rc = YesNoMaybePropertyValue(ppszNextField);
702 switch (rc)
703 {
704 case 0: return 1;
705 case 1: return 0;
706 default: return rc;
707 }
708}
709
710
711/**
712 * Applies a property to a code point.
713 *
714 * @param StartCP The code point.
715 * @param pszProperty The property name.
716 * @param pszNextField The next field.
717 */
718static void ApplyProperty(RTUNICP StartCP, const char *pszProperty, char *pszNextField)
719{
720 if (StartCP >= RT_ELEMENTS(g_aCPInfo))
721 {
722 ParseError("U+%06X is out of the g_aCPInfo range.\n", StartCP);
723 /* not reached: return; */
724 }
725 struct CPINFO *pCPInfo = &g_aCPInfo[StartCP];
726 /* string switch */
727 if (!strcmp(pszProperty, "ASCII_Hex_Digit")) pCPInfo->fASCIIHexDigit = 1;
728 else if (!strcmp(pszProperty, "Alphabetic")) pCPInfo->fAlphabetic = 1;
729 else if (!strcmp(pszProperty, "Bidi_Control")) pCPInfo->fBidiControl = 1;
730 else if (!strcmp(pszProperty, "Case_Ignorable")) pCPInfo->fCaseIgnorable = 1;
731 else if (!strcmp(pszProperty, "Cased")) pCPInfo->fCased = 1;
732 else if (!strcmp(pszProperty, "Changes_When_Casefolded")) pCPInfo->fChangesWhenCasefolded = 1;
733 else if (!strcmp(pszProperty, "Changes_When_Casemapped")) pCPInfo->fChangesWhenCasemapped = 1;
734 else if (!strcmp(pszProperty, "Changes_When_Lowercased")) pCPInfo->fChangesWhenLowercased = 1;
735 else if (!strcmp(pszProperty, "Changes_When_Titlecased")) pCPInfo->fChangesWhenTitlecased = 1;
736 else if (!strcmp(pszProperty, "Changes_When_Uppercased")) pCPInfo->fChangesWhenUppercased = 1;
737 else if (!strcmp(pszProperty, "Dash")) pCPInfo->fDash = 1;
738 else if (!strcmp(pszProperty, "Default_Ignorable_Code_Point")) pCPInfo->fDefaultIgnorableCodePoint = 1;
739 else if (!strcmp(pszProperty, "Deprecated")) pCPInfo->fDeprecated = 1;
740 else if (!strcmp(pszProperty, "Diacritic")) pCPInfo->fDiacritic = 1;
741 else if (!strcmp(pszProperty, "Extender")) pCPInfo->fExtender = 1;
742 else if (!strcmp(pszProperty, "Grapheme_Base")) pCPInfo->fGraphemeBase = 1;
743 else if (!strcmp(pszProperty, "Grapheme_Extend")) pCPInfo->fGraphemeExtend = 1;
744 else if (!strcmp(pszProperty, "Grapheme_Link")) pCPInfo->fGraphemeLink = 1;
745 else if (!strcmp(pszProperty, "Hex_Digit")) pCPInfo->fHexDigit = 1;
746 else if (!strcmp(pszProperty, "Hyphen")) pCPInfo->fHyphen = 1;
747 else if (!strcmp(pszProperty, "ID_Continue")) pCPInfo->fIDContinue = 1;
748 else if (!strcmp(pszProperty, "ID_Start")) pCPInfo->fIDStart = 1;
749 else if (!strcmp(pszProperty, "Ideographic")) pCPInfo->fIdeographic = 1;
750 else if (!strcmp(pszProperty, "IDS_Binary_Operator")) pCPInfo->fIDSBinaryOperator = 1;
751 else if (!strcmp(pszProperty, "IDS_Trinary_Operator")) pCPInfo->fIDSTrinaryOperator = 1;
752 else if (!strcmp(pszProperty, "Join_Control")) pCPInfo->fJoinControl = 1;
753 else if (!strcmp(pszProperty, "Logical_Order_Exception")) pCPInfo->fLogicalOrderException = 1;
754 else if (!strcmp(pszProperty, "Lowercase")) pCPInfo->fLowercase = 1;
755 else if (!strcmp(pszProperty, "Math")) pCPInfo->fMath = 1;
756 else if (!strcmp(pszProperty, "Noncharacter_Code_Point")) pCPInfo->fNoncharacterCodePoint = 1;
757 else if (!strcmp(pszProperty, "Other_Alphabetic")) pCPInfo->fOtherAlphabetic = 1;
758 else if (!strcmp(pszProperty, "Other_Default_Ignorable_Code_Point")) pCPInfo->fOtherDefaultIgnorableCodePoint = 1;
759 else if (!strcmp(pszProperty, "Other_Grapheme_Extend")) pCPInfo->fOtherGraphemeExtend = 1;
760 else if (!strcmp(pszProperty, "Other_ID_Continue")) pCPInfo->fOtherIDContinue = 1;
761 else if (!strcmp(pszProperty, "Other_ID_Start")) pCPInfo->fOtherIDStart = 1;
762 else if (!strcmp(pszProperty, "Other_Lowercase")) pCPInfo->fOtherLowercase = 1;
763 else if (!strcmp(pszProperty, "Other_Math")) pCPInfo->fOtherMath = 1;
764 else if (!strcmp(pszProperty, "Other_Uppercase")) pCPInfo->fOtherUppercase = 1;
765 else if (!strcmp(pszProperty, "Pattern_Syntax")) pCPInfo->fPatternSyntax = 1;
766 else if (!strcmp(pszProperty, "Pattern_White_Space")) pCPInfo->fPatternWhiteSpace = 1;
767 else if (!strcmp(pszProperty, "Quotation_Mark")) pCPInfo->fQuotationMark = 1;
768 else if (!strcmp(pszProperty, "Radical")) pCPInfo->fRadical = 1;
769 else if (!strcmp(pszProperty, "Soft_Dotted")) pCPInfo->fSoftDotted = 1;
770 else if (!strcmp(pszProperty, "STerm")) pCPInfo->fSTerm = 1;
771 else if (!strcmp(pszProperty, "Terminal_Punctuation")) pCPInfo->fTerminalPunctuation = 1;
772 else if (!strcmp(pszProperty, "Unified_Ideograph")) pCPInfo->fUnifiedIdeograph = 1;
773 else if (!strcmp(pszProperty, "Uppercase")) pCPInfo->fUppercase = 1;
774 else if (!strcmp(pszProperty, "Variation_Selector")) pCPInfo->fVariationSelector = 1;
775 else if (!strcmp(pszProperty, "White_Space")) pCPInfo->fWhiteSpace = 1;
776 else if (!strcmp(pszProperty, "XID_Continue")) pCPInfo->fXIDContinue = 1;
777 else if (!strcmp(pszProperty, "XID_Start")) pCPInfo->fXIDStart = 1;
778 /* DerivedNormalizationProps: */
779 else if (!strcmp(pszProperty, "FC_NFKC")) return; /* ignored */
780 else if (!strcmp(pszProperty, "Full_Composition_Exclusion")) pCPInfo->fFullCompositionExclusion = 1;
781 else if (!strcmp(pszProperty, "NFC_QC")) pCPInfo->fInvNFC_QC = YesNoMaybePropertyValueInv(&pszNextField);
782 else if (!strcmp(pszProperty, "NFD_QC")) pCPInfo->fInvNFD_QC = YesNoMaybePropertyValueInv(&pszNextField);
783 else if (!strcmp(pszProperty, "NFKC_QC")) pCPInfo->fInvNFKC_QC = YesNoMaybePropertyValueInv(&pszNextField);
784 else if (!strcmp(pszProperty, "NFKD_QC")) pCPInfo->fInvNFKD_QC = YesNoMaybePropertyValueInv(&pszNextField);
785 else if (!strcmp(pszProperty, "Expands_On_NFC")) pCPInfo->fExpandsOnNFC = 1;
786 else if (!strcmp(pszProperty, "Expands_On_NFD")) pCPInfo->fExpandsOnNFD = 1;
787 else if (!strcmp(pszProperty, "Expands_On_NFKC")) pCPInfo->fExpandsOnNFKC = 1;
788 else if (!strcmp(pszProperty, "Expands_On_NFKD")) pCPInfo->fExpandsOnNFKD = 1;
789 else if (!strcmp(pszProperty, "NFKC_CF")) return; /*ignore */
790 else if (!strcmp(pszProperty, "Changes_When_NFKC_Casefolded")) return; /*ignore */
791 else
792 {
793 ParseError("Unknown property '%s'\n", pszProperty);
794 /* not reached: return; */
795 }
796
797 if (pszNextField && *pszNextField)
798 ParseError("Unexpected next field: '%s'\n", pszNextField);
799}
800
801
802/**
803 * Reads a property file.
804 *
805 * There are several property files, this code can read all
806 * of those but will only make use of the properties it recognizes.
807 *
808 * @returns 0 on success.
809 * @returns !0 on failure.
810 * @param pszBasePath The base path, can be NULL.
811 * @param pszFilename The name of the file.
812 */
813static int ReadProperties(const char *pszBasePath, const char *pszFilename)
814{
815 /*
816 * Open input.
817 */
818 FILE *pFile = OpenFile(pszBasePath, pszFilename);
819 if (!pFile)
820 return 1;
821
822 /*
823 * Parse the input and spit out the output.
824 */
825 char szLine[4096];
826 while (GetLineFromFile(szLine, sizeof(szLine), pFile) != NULL)
827 {
828 if (IsCommentOrBlankLine(szLine))
829 continue;
830 char *pszCurField;
831 char *pszRange = FirstField(&pszCurField, StripLine(szLine));
832 char *pszProperty = NextField(&pszCurField);
833 if (!*pszProperty)
834 {
835 ParseError("no property field.\n");
836 /* not reached: continue; */
837 }
838
839 RTUNICP LastCP;
840 RTUNICP StartCP = ToRange(pszRange, &LastCP);
841 if (StartCP == ~(RTUNICP)0)
842 continue;
843
844 while (StartCP <= LastCP)
845 ApplyProperty(StartCP++, pszProperty, pszCurField);
846 }
847
848 CloseFile(pFile);
849
850 return 0;
851}
852
853
854/**
855 * Append a flag to the string.
856 */
857static char *AppendFlag(char *psz, const char *pszFlag)
858{
859 char *pszEnd = strchr(psz, '\0');
860 if (pszEnd != psz)
861 {
862 *pszEnd++ = ' ';
863 *pszEnd++ = '|';
864 *pszEnd++ = ' ';
865 }
866 strcpy(pszEnd, pszFlag);
867 return psz;
868}
869
870/**
871 * Calcs the flags for a code point.
872 * @returns true if there is a flag.
873 * @returns false if the isn't.
874 */
875static bool CalcFlags(struct CPINFO *pInfo, char *pszFlags)
876{
877 pszFlags[0] = '\0';
878 /** @todo read the specs on this other vs standard stuff, and check out the finer points */
879 if (pInfo->fAlphabetic || pInfo->fOtherAlphabetic)
880 AppendFlag(pszFlags, "RTUNI_ALPHA");
881 if (pInfo->fHexDigit || pInfo->fASCIIHexDigit)
882 AppendFlag(pszFlags, "RTUNI_XDIGIT");
883 if (!strcmp(pInfo->pszGeneralCategory, "Nd"))
884 AppendFlag(pszFlags, "RTUNI_DDIGIT");
885 if (pInfo->fWhiteSpace)
886 AppendFlag(pszFlags, "RTUNI_WSPACE");
887 if (pInfo->fUppercase || pInfo->fOtherUppercase)
888 AppendFlag(pszFlags, "RTUNI_UPPER");
889 if (pInfo->fLowercase || pInfo->fOtherLowercase)
890 AppendFlag(pszFlags, "RTUNI_LOWER");
891 //if (pInfo->???)
892 // AppendFlag(pszFlags, "RTUNI_BSPACE");
893#if 0
894 if (pInfo->fInvNFD_QC != 0 || pInfo->fInvNFC_QC != 0)
895 {
896 AppendFlag(pszFlags, "RTUNI_QC_NFX");
897 if (!pInfo->paDecompositionMapping && pInfo->fInvNFD_QC)
898 fprintf(stderr, "uniread: U+%05X is QC_NFD but has no mappings.\n", pInfo->CodePoint);
899 else if (*pInfo->pszDecompositionType && pInfo->fInvNFD_QC)
900 fprintf(stderr, "uniread: U+%05X is QC_NFD but has no canonical mappings.\n", pInfo->CodePoint);
901 }
902 else if (pInfo->paDecompositionMapping && !*pInfo->pszDecompositionType)
903 fprintf(stderr, "uniread: U+%05X is not QC_NFX but has canonical mappings.\n", pInfo->CodePoint);
904#endif
905
906 if (!*pszFlags)
907 {
908 pszFlags[0] = '0';
909 pszFlags[1] = '\0';
910 return false;
911 }
912 return true;
913}
914
915
916/**
917 * Closes the primary output stream.
918 */
919static int Stream1Close(void)
920{
921 if (g_pCurOutFile && g_pCurOutFile != stdout && g_pCurOutFile != stderr)
922 {
923 if (fclose(g_pCurOutFile) != 0)
924 {
925 fprintf(stderr, "Error closing output file.\n");
926 return -1;
927 }
928 }
929 g_pCurOutFile = NULL;
930 return 0;
931}
932
933
934/**
935 * Initializes the 1st stream to output to a given file.
936 */
937static int Stream1Init(const char *pszName)
938{
939 int rc = Stream1Close();
940 if (!rc)
941 {
942 g_pCurOutFile = fopen(pszName, "w");
943 if (!g_pCurOutFile)
944 {
945 fprintf(stderr, "Error opening output file '%s'.\n", pszName);
946 rc = -1;
947 }
948 }
949 return rc;
950}
951
952
953/**
954 * printf wrapper for the primary output stream.
955 *
956 * @returns See vfprintf.
957 * @param pszFormat The vfprintf format string.
958 * @param ... The format arguments.
959 */
960static int Stream1Printf(const char *pszFormat, ...)
961{
962 int cch;
963 va_list va;
964 va_start(va, pszFormat);
965 cch = vfprintf(g_pCurOutFile, pszFormat, va);
966 va_end(va);
967 return cch;
968}
969
970
971/** the data store for stream two. */
972static char g_szStream2[10240];
973static unsigned volatile g_offStream2 = 0;
974
975/**
976 * Initializes the 2nd steam.
977 */
978static void Stream2Init(void)
979{
980 g_szStream2[0] = '\0';
981 g_offStream2 = 0;
982}
983
984/**
985 * Flushes the 2nd stream to stdout.
986 */
987static int Stream2Flush(void)
988{
989 g_szStream2[g_offStream2] = '\0';
990 Stream1Printf("%s", g_szStream2);
991 Stream2Init();
992 return 0;
993}
994
995/**
996 * printf to the 2nd stream.
997 */
998static int Stream2Printf(const char *pszFormat, ...)
999{
1000 unsigned offStream2 = g_offStream2;
1001 va_list va;
1002 va_start(va, pszFormat);
1003 int cch = vsprintf(&g_szStream2[offStream2], pszFormat, va);
1004 va_end(va);
1005 offStream2 += cch;
1006 if (offStream2 >= sizeof(g_szStream2))
1007 {
1008 fprintf(stderr, "error: stream2 overflow!\n");
1009 exit(1);
1010 }
1011 g_offStream2 = offStream2;
1012 return cch;
1013}
1014
1015
1016/**
1017 * Print the unidata.cpp file header and include list.
1018 */
1019int PrintHeader(const char *argv0, const char *pszBaseDir)
1020{
1021 char szBuf[1024];
1022 if (!pszBaseDir)
1023 {
1024 memset(szBuf, 0, sizeof(szBuf));
1025#ifdef _MSC_VER
1026 if (!_getcwd(szBuf, sizeof(szBuf)))
1027#else
1028 if (!getcwd(szBuf, sizeof(szBuf)))
1029#endif
1030 return RTEXITCODE_FAILURE;
1031 pszBaseDir = szBuf;
1032 }
1033
1034 Stream1Printf("/* $" "Id" "$ */\n"
1035 "/** @file\n"
1036 " * IPRT - Unicode Tables.\n"
1037 " *\n"
1038 " * Automatically Generated from %s\n"
1039 " * by %s (" __DATE__ " " __TIME__ ")\n"
1040 " */\n"
1041 "\n"
1042 "/*\n"
1043 " * Copyright (C) 2006-2017 Oracle Corporation \n"
1044 " *\n"
1045 " * This file is part of VirtualBox Open Source Edition (OSE), as\n"
1046 " * available from http://www.virtualbox.org. This file is free software;\n"
1047 " * you can redistribute it and/or modify it under the terms of the GNU\n"
1048 " * General Public License (GPL) as published by the Free Software\n"
1049 " * Foundation, in version 2 as it comes in the \"COPYING\" file of the\n"
1050 " * VirtualBox OSE distribution. VirtualBox OSE is distributed in the\n"
1051 " * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.\n"
1052 " *\n"
1053 " * The contents of this file may alternatively be used under the terms\n"
1054 " * of the Common Development and Distribution License Version 1.0\n"
1055 " * (CDDL) only, as it comes in the \"COPYING.CDDL\" file of the\n"
1056 " * VirtualBox OSE distribution, in which case the provisions of the\n"
1057 " * CDDL are applicable instead of those of the GPL.\n"
1058 " *\n"
1059 " * You may elect to license modified versions of this file under the\n"
1060 " * terms and conditions of either the GPL or the CDDL or both.\n"
1061 " */\n"
1062 "\n"
1063 "#include <iprt/uni.h>\n"
1064 "\n",
1065 pszBaseDir, argv0);
1066 return 0;
1067}
1068
1069
1070/**
1071 * Print the flag tables.
1072 */
1073int PrintFlags(void)
1074{
1075 /*
1076 * Print flags table.
1077 */
1078 Stream2Init();
1079 Stream2Printf("RT_DECL_DATA_CONST(const RTUNIFLAGSRANGE) g_aRTUniFlagsRanges[] =\n"
1080 "{\n");
1081 RTUNICP i = 0;
1082 int iStart = -1;
1083 while (i < RT_ELEMENTS(g_aCPInfo))
1084 {
1085 /* figure how far off the next chunk is */
1086 char szFlags[256];
1087 unsigned iNonNull = i;
1088 while ( iNonNull < RT_ELEMENTS(g_aCPInfo)
1089 && iNonNull >= 256
1090 && (g_aCPInfo[iNonNull].fNullEntry || !CalcFlags(&g_aCPInfo[iNonNull], szFlags)) )
1091 iNonNull++;
1092 if (iNonNull - i > 4096 || iNonNull == RT_ELEMENTS(g_aCPInfo))
1093 {
1094 if (iStart >= 0)
1095 {
1096 Stream1Printf("};\n\n");
1097 Stream2Printf(" { 0x%06x, 0x%06x, &g_afRTUniFlags0x%06x[0] },\n", iStart, i, iStart);
1098 iStart = -1;
1099 }
1100 i = iNonNull;
1101 }
1102 else
1103 {
1104 if (iStart < 0)
1105 {
1106 Stream1Printf("static const uint8_t g_afRTUniFlags0x%06x[] =\n"
1107 "{\n", i);
1108 iStart = i;
1109 }
1110 CalcFlags(&g_aCPInfo[i], szFlags);
1111 Stream1Printf(" %50s, /* U+%06x: %s*/\n", szFlags, g_aCPInfo[i].CodePoint, g_aCPInfo[i].pszName);
1112 i++;
1113 }
1114 }
1115 Stream2Printf(" { ~(RTUNICP)0, ~(RTUNICP)0, NULL }\n"
1116 "};\n\n\n");
1117 Stream1Printf("\n");
1118 return Stream2Flush();
1119}
1120
1121
1122/**
1123 * Prints the upper case tables.
1124 */
1125static int PrintUpper(void)
1126{
1127 Stream2Init();
1128 Stream2Printf("RT_DECL_DATA_CONST(const RTUNICASERANGE) g_aRTUniUpperRanges[] =\n"
1129 "{\n");
1130 RTUNICP i = 0;
1131 int iStart = -1;
1132 while (i < RT_ELEMENTS(g_aCPInfo))
1133 {
1134 /* figure how far off the next chunk is */
1135 unsigned iSameCase = i;
1136 while ( iSameCase < RT_ELEMENTS(g_aCPInfo)
1137 && g_aCPInfo[iSameCase].SimpleUpperCaseMapping == g_aCPInfo[iSameCase].CodePoint
1138 && iSameCase >= 256)
1139 iSameCase++;
1140 if (iSameCase - i > 4096/sizeof(RTUNICP) || iSameCase == RT_ELEMENTS(g_aCPInfo))
1141 {
1142 if (iStart >= 0)
1143 {
1144 Stream1Printf("};\n\n");
1145 Stream2Printf(" { 0x%06x, 0x%06x, &g_afRTUniUpper0x%06x[0] },\n", iStart, i, iStart);
1146 iStart = -1;
1147 }
1148 i = iSameCase;
1149 }
1150 else
1151 {
1152 if (iStart < 0)
1153 {
1154 Stream1Printf("static const RTUNICP g_afRTUniUpper0x%06x[] =\n"
1155 "{\n", i);
1156 iStart = i;
1157 }
1158 Stream1Printf(" 0x%02x, /* U+%06x: %s*/\n", g_aCPInfo[i].SimpleUpperCaseMapping, g_aCPInfo[i].CodePoint, g_aCPInfo[i].pszName);
1159 i++;
1160 }
1161 }
1162 Stream2Printf(" { ~(RTUNICP)0, ~(RTUNICP)0, NULL }\n"
1163 "};\n\n\n");
1164 Stream1Printf("\n");
1165 return Stream2Flush();
1166}
1167
1168
1169/**
1170 * Prints the lowercase tables.
1171 */
1172static int PrintLower(void)
1173{
1174 Stream2Init();
1175 Stream2Printf("RT_DECL_DATA_CONST(const RTUNICASERANGE) g_aRTUniLowerRanges[] =\n"
1176 "{\n");
1177 RTUNICP i = 0;
1178 int iStart = -1;
1179 while (i < RT_ELEMENTS(g_aCPInfo))
1180 {
1181 /* figure how far off the next chunk is */
1182 unsigned iSameCase = i;
1183 while ( iSameCase < RT_ELEMENTS(g_aCPInfo)
1184 && g_aCPInfo[iSameCase].SimpleLowerCaseMapping == g_aCPInfo[iSameCase].CodePoint
1185 && iSameCase >= 256)
1186 iSameCase++;
1187 if (iSameCase - i > 4096/sizeof(RTUNICP) || iSameCase == RT_ELEMENTS(g_aCPInfo))
1188 {
1189 if (iStart >= 0)
1190 {
1191 Stream1Printf("};\n\n");
1192 Stream2Printf(" { 0x%06x, 0x%06x, &g_afRTUniLower0x%06x[0] },\n", iStart, i, iStart);
1193 iStart = -1;
1194 }
1195 i = iSameCase;
1196 }
1197 else
1198 {
1199 if (iStart < 0)
1200 {
1201 Stream1Printf("static const RTUNICP g_afRTUniLower0x%06x[] =\n"
1202 "{\n", i);
1203 iStart = i;
1204 }
1205 Stream1Printf(" 0x%02x, /* U+%06x: %s*/\n",
1206 g_aCPInfo[i].SimpleLowerCaseMapping, g_aCPInfo[i].CodePoint, g_aCPInfo[i].pszName);
1207 i++;
1208 }
1209 }
1210 Stream2Printf(" { ~(RTUNICP)0, ~(RTUNICP)0, NULL }\n"
1211 "};\n\n\n");
1212 Stream1Printf("\n");
1213 return Stream2Flush();
1214}
1215
1216
1217int main(int argc, char **argv)
1218{
1219 /*
1220 * Parse args.
1221 */
1222 if (argc <= 1)
1223 {
1224 printf("usage: %s [-C|--dir <UCD-dir>] [UnicodeData.txt [DerivedCoreProperties.txt [PropList.txt] [DerivedNormalizationProps.txt]]]\n",
1225 argv[0]);
1226 return 1;
1227 }
1228
1229 const char *pszBaseDir = NULL;
1230 const char *pszUnicodeData = "UnicodeData.txt";
1231 const char *pszDerivedCoreProperties = "DerivedCoreProperties.txt";
1232 const char *pszPropList = "PropList.txt";
1233 const char *pszDerivedNormalizationProps = "DerivedNormalizationProps.txt";
1234 int iFile = 0;
1235 for (int argi = 1; argi < argc; argi++)
1236 {
1237 if (argv[argi][0] != '-')
1238 {
1239 switch (iFile++)
1240 {
1241 case 0: pszUnicodeData = argv[argi]; break;
1242 case 1: pszDerivedCoreProperties = argv[argi]; break;
1243 case 2: pszPropList = argv[argi]; break;
1244 case 3: pszDerivedNormalizationProps = argv[argi]; break;
1245 default:
1246 fprintf(stderr, "uniread: syntax error at '%s': too many filenames\n", argv[argi]);
1247 return 1;
1248 }
1249 }
1250 else if ( !strcmp(argv[argi], "--dir")
1251 || !strcmp(argv[argi], "-C"))
1252 {
1253 if (argi + 1 >= argc)
1254 {
1255 fprintf(stderr, "uniread: syntax error: '%s' is missing the directory name.\n", argv[argi]);
1256 return 1;
1257 }
1258 argi++;
1259 pszBaseDir = argv[argi];
1260 }
1261 else
1262 {
1263 fprintf(stderr, "uniread: syntax error at '%s': Unknown argument\n", argv[argi]);
1264 return 1;
1265 }
1266 }
1267
1268 /*
1269 * Read the data.
1270 */
1271 int rc = ReadUnicodeData(pszBaseDir, pszUnicodeData);
1272 if (rc)
1273 return rc;
1274 rc = GenerateExcludedData();
1275 if (rc)
1276 return rc;
1277 rc = ReadProperties(pszBaseDir, pszPropList);
1278 if (rc)
1279 return rc;
1280 rc = ReadProperties(pszBaseDir, pszDerivedCoreProperties);
1281 if (rc)
1282 return rc;
1283 rc = ReadProperties(pszBaseDir, pszDerivedNormalizationProps);
1284 if (rc)
1285 return rc;
1286
1287 /*
1288 * Produce output files.
1289 */
1290 rc = Stream1Init("unidata-flags.cpp");
1291 if (!rc)
1292 rc = PrintHeader(argv[0], pszBaseDir);
1293 if (!rc)
1294 rc = PrintFlags();
1295
1296 rc = Stream1Init("unidata-upper.cpp");
1297 if (!rc)
1298 rc = PrintHeader(argv[0], pszBaseDir);
1299 if (!rc)
1300 rc = PrintUpper();
1301
1302 rc = Stream1Init("unidata-lower.cpp");
1303 if (!rc)
1304 rc = PrintHeader(argv[0], pszBaseDir);
1305 if (!rc)
1306 rc = PrintLower();
1307 if (!rc)
1308 rc = Stream1Close();
1309
1310 /* done */
1311 return rc;
1312}
1313
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette