VirtualBox

source: vbox/trunk/src/VBox/Runtime/common/string/uniread.cpp@ 51770

Last change on this file since 51770 was 51770, checked in by vboxsync, 10 years ago

Merged in iprt++ dev branch.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id Revision
File size: 40.8 KB
Line 
1/* $Id: uniread.cpp 51770 2014-07-01 18:14:02Z vboxsync $ */
2/** @file
3 * IPRT - Unicode Specification Reader.
4 */
5
6/*
7 * Copyright (C) 2006-2012 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 *
17 * The contents of this file may alternatively be used under the terms
18 * of the Common Development and Distribution License Version 1.0
19 * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
20 * VirtualBox OSE distribution, in which case the provisions of the
21 * CDDL are applicable instead of those of the GPL.
22 *
23 * You may elect to license modified versions of this file under the
24 * terms and conditions of either the GPL or the CDDL or both.
25 */
26
27/*******************************************************************************
28* Header Files *
29*******************************************************************************/
30#include <iprt/types.h>
31#include <iprt/stdarg.h>
32#include <iprt/ctype.h>
33
34#include <stdio.h>
35#include <string.h>
36#include <stdlib.h>
37#ifdef _MSC_VER
38# include <direct.h>
39#else
40# include <unistd.h>
41#endif
42
43
44/*******************************************************************************
45* Global Variables *
46*******************************************************************************/
47/** The file we're currently parsing. */
48static const char *g_pszCurFile;
49/** The current line number. */
50static unsigned g_iLine;
51/** The current output file. */
52static FILE *g_pCurOutFile;
53
54
55/**
56 * Exit the program after printing a parse error.
57 *
58 * @param pszFormat The message.
59 * @param ... Format arguments.
60 */
61static void ParseError(const char *pszFormat, ...)
62{
63 va_list va;
64 va_start(va, pszFormat);
65 fprintf(stderr, "parse error: %s:%u: ", g_pszCurFile, g_iLine);
66 vfprintf(stderr, pszFormat, va);
67 va_end(va);
68 exit(1);
69}
70
71/**
72 * Strip a line.
73 * @returns pointer to first non-blank char.
74 * @param pszLine The line string to strip.
75 */
76static char *StripLine(char *pszLine)
77{
78 while (*pszLine == ' ' || *pszLine == '\t')
79 pszLine++;
80
81 char *psz = strchr(pszLine, '#');
82 if (psz)
83 *psz = '\0';
84 else
85 psz = strchr(pszLine, '\0');
86 while (psz > pszLine)
87 {
88 switch (psz[-1])
89 {
90 case ' ':
91 case '\t':
92 case '\n':
93 case '\r':
94 *--psz = '\0';
95 continue;
96 }
97 break;
98 }
99
100 return pszLine;
101}
102
103
104/**
105 * Checks if the line is blank or a comment line and should be skipped.
106 * @returns true/false.
107 * @param pszLine The line to consider.
108 */
109static bool IsCommentOrBlankLine(const char *pszLine)
110{
111 while (*pszLine == ' ' || *pszLine == '\t' || *pszLine == '\n' || *pszLine == '\r')
112 pszLine++;
113 return *pszLine == '#' || *pszLine == '\0';
114}
115
116
117/**
118 * Get the first field in the string.
119 *
120 * @returns Pointer to the next field.
121 * @param ppsz Where to store the pointer to the next field.
122 * @param pszLine The line string. (could also be *ppsz from a FirstNext call)
123 */
124static char *FirstField(char **ppsz, char *pszLine)
125{
126 char *psz = strchr(pszLine, ';');
127 if (!psz)
128 *ppsz = psz = strchr(pszLine, '\0');
129 else
130 {
131 *psz = '\0';
132 *ppsz = psz + 1;
133 }
134
135 /* strip */
136 while (*pszLine == ' ' || *pszLine == '\t' || *pszLine == '\r' || *pszLine == '\n')
137 pszLine++;
138 while (psz > pszLine)
139 {
140 switch (psz[-1])
141 {
142 case ' ':
143 case '\t':
144 case '\n':
145 case '\r':
146 *--psz = '\0';
147 continue;
148 }
149 break;
150 }
151 return pszLine;
152}
153
154
155/**
156 * Get the next field in a field enumeration.
157 *
158 * @returns Pointer to the next field.
159 * @param ppsz Where to get and store the string position.
160 */
161static char *NextField(char **ppsz)
162{
163 return FirstField(ppsz, *ppsz);
164}
165
166
167/**
168 * Splits a decomposition field.
169 *
170 * This may start with a type that is enclosed in angle brackets.
171 *
172 * @returns Pointer to the mapping values following the type. @a *ppsz if empty.
173 * @param ppszType Pointer to the type field pointer. On input the type
174 * field contains the combined type and mapping string. On
175 * output this should only contain the type, no angle
176 * brackets. If no type specified, it is replaced with an
177 * empty string (const).
178 */
179static char *SplitDecompField(char **ppszType)
180{
181 /* Empty field? */
182 char *psz = *ppszType;
183 if (!*psz)
184 return psz;
185
186 /* No type? */
187 if (*psz != '<')
188 {
189 *ppszType = (char *)"";
190 return psz;
191 }
192
193 /* Split out the type. */
194 *ppszType = ++psz;
195 psz = strchr(psz, '>');
196 if (!psz)
197 {
198 ParseError("Bad Decomposition Type/Mappings\n");
199 return *ppszType;
200 }
201 *psz++ = '\0';
202
203 psz = StripLine(psz);
204 if (!*psz)
205 ParseError("Missing decomposition mappings\n");
206 return psz;
207}
208
209/**
210 * Converts a code point field to a number.
211 * @returns Code point.
212 * @param psz The field string.
213 */
214static RTUNICP ToNum(const char *psz)
215{
216 char *pszEnd = NULL;
217 unsigned long ul = strtoul(psz, &pszEnd, 16);
218 if (pszEnd && *pszEnd)
219 ParseError("failed converting '%s' to a number!\n", psz);
220 return (RTUNICP)ul;
221}
222
223
224/**
225 * Same as ToNum except that if the field is empty the Default is returned.
226 */
227static RTUNICP ToNumDefault(const char *psz, RTUNICP Default)
228{
229 if (*psz)
230 return ToNum(psz);
231 return Default;
232}
233
234
235/**
236 * Converts a code point range to numbers.
237 * @returns The start code point.\
238 * @returns ~(RTUNICP)0 on failure.
239 * @param psz The field string.
240 * @param pLast Where to store the last code point in the range.
241 */
242static RTUNICP ToRange(const char *psz, PRTUNICP pLast)
243{
244 char *pszEnd = NULL;
245 unsigned long ulStart = strtoul(psz, &pszEnd, 16);
246 unsigned long ulLast = ulStart;
247 if (pszEnd && *pszEnd)
248 {
249 if (*pszEnd == '.')
250 {
251 while (*pszEnd == '.')
252 pszEnd++;
253 ulLast = strtoul(pszEnd, &pszEnd, 16);
254 if (pszEnd && *pszEnd)
255 {
256 ParseError("failed converting '%s' to a number!\n", psz);
257 return ~(RTUNICP)0;
258 }
259 }
260 else
261 {
262 ParseError("failed converting '%s' to a number!\n", psz);
263 return ~(RTUNICP)0;
264 }
265 }
266 *pLast = (RTUNICP)ulLast;
267 return (RTUNICP)ulStart;
268
269}
270
271/**
272 * For converting the decomposition mappings field and similar.
273 *
274 * @returns Mapping array or NULL if none.
275 * @param psz The string to convert. Can be empty.
276 * @param pcEntries Where to store the number of entries.
277 * @param cMax The max number of entries.
278 */
279static PRTUNICP ToMapping(char *psz, unsigned *pcEntries, unsigned cMax)
280{
281 PRTUNICP paCps = NULL;
282 unsigned cAlloc = 0;
283 unsigned i = 0;
284
285 /* Convert the code points. */
286 while (psz)
287 {
288 /* skip leading spaces */
289 while (RT_C_IS_BLANK(*psz))
290 psz++;
291
292 /* the end? */
293 if (!*psz)
294 break;
295
296 /* room left? */
297 if (i >= cMax)
298 {
299 ParseError("Too many mappings.\n");
300 break;
301 }
302 if (i >= cAlloc)
303 {
304 cAlloc += 4;
305 paCps = (PRTUNICP)realloc(paCps, cAlloc * sizeof(paCps[0]));
306 if (!paCps)
307 {
308 fprintf(stderr, "out of memory (%u)\n", (unsigned)(cAlloc * sizeof(paCps[0])));
309 exit(1);
310 }
311 }
312
313 /* Find the end. */
314 char *pszThis = psz;
315 while (RT_C_IS_XDIGIT(*psz))
316 psz++;
317 if (*psz && !RT_C_IS_BLANK(*psz))
318 ParseError("Malformed mappings.\n");
319 if (*psz)
320 *psz++ = '\0';
321
322 /* Convert to number and add it. */
323 paCps[i++] = ToNum(pszThis);
324 }
325
326 *pcEntries = i;
327 return paCps;
328}
329
330
331/**
332 * Duplicate a string, optimize certain strings to save memory.
333 *
334 * @returns Pointer to string copy.
335 * @param pszStr The string to duplicate.
336 */
337static char *DupStr(const char *pszStr)
338{
339 if (!*pszStr)
340 return (char*)"";
341 char *psz = strdup(pszStr);
342 if (psz)
343 return psz;
344
345 fprintf(stderr, "out of memory!\n");
346 exit(1);
347}
348
349
350/**
351 * Array of all possible and impossible unicode code points as of 4.1
352 */
353struct CPINFO
354{
355 RTUNICP CodePoint;
356 RTUNICP SimpleUpperCaseMapping;
357 RTUNICP SimpleLowerCaseMapping;
358 RTUNICP SimpleTitleCaseMapping;
359 unsigned CanonicalCombiningClass;
360 const char *pszDecompositionType;
361 unsigned cDecompositionMapping;
362 PRTUNICP paDecompositionMapping;
363 const char *pszName;
364 /** Set if this is an unused entry */
365 unsigned fNullEntry : 1;
366
367 unsigned fAlphabetic : 1;
368 unsigned fASCIIHexDigit : 1;
369 unsigned fBidiControl : 1;
370 unsigned fCaseIgnorable : 1;
371 unsigned fCased : 1;
372 unsigned fChangesWhenCasefolded : 1;
373 unsigned fChangesWhenCasemapped : 1;
374 unsigned fChangesWhenLowercased : 1;
375 unsigned fChangesWhenTitlecased : 1;
376 unsigned fChangesWhenUppercased : 1;
377 unsigned fDash : 1;
378 unsigned fDefaultIgnorableCodePoint : 1;
379 unsigned fDeprecated : 1;
380 unsigned fDiacritic : 1;
381 unsigned fExtender : 1;
382 unsigned fGraphemeBase : 1;
383 unsigned fGraphemeExtend : 1;
384 unsigned fGraphemeLink : 1;
385 unsigned fHexDigit : 1;
386 unsigned fHyphen : 1;
387 unsigned fIDContinue : 1;
388 unsigned fIdeographic : 1;
389 unsigned fIDSBinaryOperator : 1;
390 unsigned fIDStart : 1;
391 unsigned fIDSTrinaryOperator : 1;
392 unsigned fJoinControl : 1;
393 unsigned fLogicalOrderException : 1;
394 unsigned fLowercase : 1;
395 unsigned fMath : 1;
396 unsigned fNoncharacterCodePoint : 1;
397 unsigned fOtherAlphabetic : 1;
398 unsigned fOtherDefaultIgnorableCodePoint : 1;
399 unsigned fOtherGraphemeExtend : 1;
400 unsigned fOtherIDContinue : 1;
401 unsigned fOtherIDStart : 1;
402 unsigned fOtherLowercase : 1;
403 unsigned fOtherMath : 1;
404 unsigned fOtherUppercase : 1;
405 unsigned fPatternSyntax : 1;
406 unsigned fPatternWhiteSpace : 1;
407 unsigned fQuotationMark : 1;
408 unsigned fRadical : 1;
409 unsigned fSoftDotted : 1;
410 unsigned fSTerm : 1;
411 unsigned fTerminalPunctuation : 1;
412 unsigned fUnifiedIdeograph : 1;
413 unsigned fUppercase : 1;
414 unsigned fVariationSelector : 1;
415 unsigned fWhiteSpace : 1;
416 unsigned fXIDContinue : 1;
417 unsigned fXIDStart : 1;
418
419 /** @name DerivedNormalizationProps.txt
420 * @{ */
421 unsigned fFullCompositionExclusion : 1;
422 unsigned fInvNFC_QC : 2; /**< If 1 (NFC_QC == N) then code point 100% sure not part of NFC string. */
423 unsigned fInvNFD_QC : 2; /**< If 1 (NFD_QC == N) then code point 100% sure not part of NFD string. */
424 unsigned fInvNFKC_QC : 2;
425 unsigned fInvNFKD_QC : 2;
426 unsigned fExpandsOnNFC : 1;
427 unsigned fExpandsOnNFD : 1;
428 unsigned fExpandsOnNFKC : 1;
429 unsigned fExpandsOnNFKD : 1;
430 /** @} */
431
432 /* unprocessed stuff, so far. */
433 const char *pszGeneralCategory;
434 const char *pszBidiClass;
435 const char *pszNumericType;
436 const char *pszNumericValueD;
437 const char *pszNumericValueN;
438 const char *pszBidiMirrored;
439 const char *pszUnicode1Name;
440 const char *pszISOComment;
441} g_aCPInfo[0x110000];
442
443
444/**
445 * Creates a 'null' entry at i.
446 * @param i The entry in question.
447 */
448static void NullEntry(unsigned i)
449{
450 g_aCPInfo[i].CodePoint = i;
451 g_aCPInfo[i].fNullEntry = 1;
452 g_aCPInfo[i].SimpleUpperCaseMapping = i;
453 g_aCPInfo[i].SimpleLowerCaseMapping = i;
454 g_aCPInfo[i].SimpleTitleCaseMapping = i;
455 g_aCPInfo[i].pszDecompositionType = "";
456 g_aCPInfo[i].cDecompositionMapping = 0;
457 g_aCPInfo[i].paDecompositionMapping = NULL;
458 g_aCPInfo[i].pszName = "";
459 g_aCPInfo[i].pszGeneralCategory = "";
460 g_aCPInfo[i].pszBidiClass = "";
461 g_aCPInfo[i].pszNumericType = "";
462 g_aCPInfo[i].pszNumericValueD = "";
463 g_aCPInfo[i].pszNumericValueN = "";
464 g_aCPInfo[i].pszBidiMirrored = "";
465 g_aCPInfo[i].pszUnicode1Name = "";
466 g_aCPInfo[i].pszISOComment = "";
467}
468
469
470/**
471 * Open a file for reading, optionally with a base path prefixed.
472 *
473 * @returns file stream on success, NULL w/ complaint on failure.
474 * @param pszBasePath The base path, can be NULL.
475 * @param pszFilename The name of the file to open.
476 */
477static FILE *OpenFile(const char *pszBasePath, const char *pszFilename)
478{
479 FILE *pFile;
480 if ( !pszBasePath
481 || *pszFilename == '/'
482#if defined(_MSC_VER) || defined(__OS2__)
483 || *pszFilename == '\\'
484 || (*pszFilename && pszFilename[1] == ':')
485#endif
486 )
487 {
488 pFile = fopen(pszFilename, "r");
489 if (!pFile)
490 fprintf(stderr, "uniread: failed to open '%s' for reading\n", pszFilename);
491 }
492 else
493 {
494 size_t cchBasePath = strlen(pszBasePath);
495 size_t cchFilename = strlen(pszFilename);
496 char *pszFullName = (char *)malloc(cchBasePath + 1 + cchFilename + 1);
497 if (!pszFullName)
498 {
499 fprintf(stderr, "uniread: failed to allocate %d bytes\n", (int)(cchBasePath + 1 + cchFilename + 1));
500 return NULL;
501 }
502
503 memcpy(pszFullName, pszBasePath, cchBasePath);
504 pszFullName[cchBasePath] = '/';
505 memcpy(&pszFullName[cchBasePath + 1], pszFilename, cchFilename + 1);
506
507 pFile = fopen(pszFullName, "r");
508 if (!pFile)
509 fprintf(stderr, "uniread: failed to open '%s' for reading\n", pszFullName);
510 free(pszFullName);
511 }
512 g_pszCurFile = pszFilename;
513 g_iLine = 0;
514 return pFile;
515}
516
517
518/**
519 * Wrapper around fgets that keep track of the line number.
520 *
521 * @returns See fgets.
522 * @param pszBuf The buffer. See fgets for output definition.
523 * @param cbBuf The buffer size.
524 * @param pFile The file to read from.
525 */
526static char *GetLineFromFile(char *pszBuf, int cbBuf, FILE *pFile)
527{
528 g_iLine++;
529 return fgets(pszBuf, cbBuf, pFile);
530}
531
532
533/**
534 * Closes a file opened by OpenFile
535 *
536 * @param pFile The file to close.
537 */
538static void CloseFile(FILE *pFile)
539{
540 g_pszCurFile = NULL;
541 g_iLine = 0;
542 fclose(pFile);
543}
544
545
546/**
547 * Read the UnicodeData.txt file.
548 * @returns 0 on success.
549 * @returns !0 on failure.
550 * @param pszBasePath The base path, can be NULL.
551 * @param pszFilename The name of the file.
552 */
553static int ReadUnicodeData(const char *pszBasePath, const char *pszFilename)
554{
555 /*
556 * Open input.
557 */
558 FILE *pFile = OpenFile(pszBasePath, pszFilename);
559 if (!pFile)
560 return 1;
561
562 /*
563 * Parse the input and spit out the output.
564 */
565 char szLine[4096];
566 RTUNICP i = 0;
567 while (GetLineFromFile(szLine, sizeof(szLine), pFile) != NULL)
568 {
569 if (IsCommentOrBlankLine(szLine))
570 continue;
571
572 char *pszCurField;
573 char *pszCodePoint = FirstField(&pszCurField, StripLine(szLine)); /* 0 */
574 char *pszName = NextField(&pszCurField); /* 1 */
575 char *pszGeneralCategory = NextField(&pszCurField); /* 2 */
576 char *pszCanonicalCombiningClass = NextField(&pszCurField); /* 3 */
577 char *pszBidiClass = NextField(&pszCurField); /* 4 */
578 char *pszDecompositionType = NextField(&pszCurField); /* 5 */
579 char *pszDecompositionMapping = SplitDecompField(&pszDecompositionType);
580 char *pszNumericType = NextField(&pszCurField); /* 6 */
581 char *pszNumericValueD = NextField(&pszCurField); /* 7 */
582 char *pszNumericValueN = NextField(&pszCurField); /* 8 */
583 char *pszBidiMirrored = NextField(&pszCurField); /* 9 */
584 char *pszUnicode1Name = NextField(&pszCurField); /* 10 */
585 char *pszISOComment = NextField(&pszCurField); /* 11 */
586 char *pszSimpleUpperCaseMapping = NextField(&pszCurField); /* 12 */
587 char *pszSimpleLowerCaseMapping = NextField(&pszCurField); /* 13 */
588 char *pszSimpleTitleCaseMapping = NextField(&pszCurField); /* 14 */
589
590 RTUNICP CodePoint = ToNum(pszCodePoint);
591 if (CodePoint >= RT_ELEMENTS(g_aCPInfo))
592 {
593 ParseError("U+05X is out of range\n", CodePoint);
594 continue;
595 }
596
597 /* catchup? */
598 while (i < CodePoint)
599 NullEntry(i++);
600 if (i != CodePoint)
601 {
602 ParseError("i=%d CodePoint=%u\n", i, CodePoint);
603 CloseFile(pFile);
604 return 1;
605 }
606
607 /* this one */
608 g_aCPInfo[i].CodePoint = i;
609 g_aCPInfo[i].fNullEntry = 0;
610 g_aCPInfo[i].pszName = DupStr(pszName);
611 g_aCPInfo[i].SimpleUpperCaseMapping = ToNumDefault(pszSimpleUpperCaseMapping, CodePoint);
612 g_aCPInfo[i].SimpleLowerCaseMapping = ToNumDefault(pszSimpleLowerCaseMapping, CodePoint);
613 g_aCPInfo[i].SimpleTitleCaseMapping = ToNumDefault(pszSimpleTitleCaseMapping, CodePoint);
614 g_aCPInfo[i].CanonicalCombiningClass = ToNum(pszCanonicalCombiningClass);
615 g_aCPInfo[i].pszDecompositionType = DupStr(pszDecompositionType);
616 g_aCPInfo[i].paDecompositionMapping = ToMapping(pszDecompositionMapping, &g_aCPInfo[i].cDecompositionMapping, 20);
617 g_aCPInfo[i].pszGeneralCategory = DupStr(pszGeneralCategory);
618 g_aCPInfo[i].pszBidiClass = DupStr(pszBidiClass);
619 g_aCPInfo[i].pszNumericType = DupStr(pszNumericType);
620 g_aCPInfo[i].pszNumericValueD = DupStr(pszNumericValueD);
621 g_aCPInfo[i].pszNumericValueN = DupStr(pszNumericValueN);
622 g_aCPInfo[i].pszBidiMirrored = DupStr(pszBidiMirrored);
623 g_aCPInfo[i].pszUnicode1Name = DupStr(pszUnicode1Name);
624 g_aCPInfo[i].pszISOComment = DupStr(pszISOComment);
625 i++;
626 }
627
628 /* catchup? */
629 while (i < RT_ELEMENTS(g_aCPInfo))
630 NullEntry(i++);
631 CloseFile(pFile);
632
633 return 0;
634}
635
636
637/**
638 * Generates excluded data.
639 *
640 * @returns 0 on success, exit code on failure.
641 */
642static int GenerateExcludedData(void)
643{
644 /*
645 * Hangul Syllables U+AC00 to U+D7A3.
646 */
647 for (RTUNICP i = 0xac00; i <= 0xd7a3; i++)
648 {
649 g_aCPInfo[i].fNullEntry = 0;
650 g_aCPInfo[i].fInvNFD_QC = 1;
651 /** @todo generate the decomposition: http://unicode.org/reports/tr15/#Hangul
652 * */
653 }
654
655 /** @todo
656 * CJK Ideographs Extension A (U+3400 - U+4DB5)
657 * CJK Ideographs (U+4E00 - U+9FA5)
658 * CJK Ideograph Extension B (U+20000 - U+2A6D6)
659 * CJK Ideograph Extension C (U+2A700 - U+2B734)
660 */
661
662 return 0;
663}
664
665
666
667/**
668 * Worker for ApplyProperty that handles a yes, no, maybe property value.
669 *
670 * @returns 0 (NO), 1 (YES), 2 (MAYBE).
671 * @param ppszNextField The field cursor, input and output.
672 */
673static int YesNoMaybePropertyValue(char **ppszNextField)
674{
675 if (!**ppszNextField)
676 {
677 ParseError("Missing Y/N/M field\n");
678 return 0;
679 }
680 char *psz = NextField(ppszNextField);
681 if (!strcmp(psz, "N"))
682 return 0;
683 if (!strcmp(psz, "Y"))
684 return 1;
685 if (!strcmp(psz, "M"))
686 return 2;
687 ParseError("Unexpected Y/N/M value: '%s'\n", psz);
688 return 0;
689}
690
691
692/**
693 * Inverted version of YesNoMaybePropertyValue
694 *
695 * @returns 1 (NO), 0 (YES), 2 (MAYBE).
696 * @param ppszNextField The field cursor, input and output.
697 */
698static int YesNoMaybePropertyValueInv(char **ppszNextField)
699{
700 unsigned rc = YesNoMaybePropertyValue(ppszNextField);
701 switch (rc)
702 {
703 case 0: return 1;
704 case 1: return 0;
705 default: return rc;
706 }
707}
708
709
710/**
711 * Applies a property to a code point.
712 *
713 * @param StartCP The code point.
714 * @param pszProperty The property name.
715 */
716static void ApplyProperty(RTUNICP StartCP, const char *pszProperty, char *pszNextField)
717{
718 if (StartCP >= RT_ELEMENTS(g_aCPInfo))
719 {
720 ParseError("U+%06X is out of the g_aCPInfo range.\n", StartCP);
721 return;
722 }
723 struct CPINFO *pCPInfo = &g_aCPInfo[StartCP];
724 /* string switch */
725 if (!strcmp(pszProperty, "ASCII_Hex_Digit")) pCPInfo->fASCIIHexDigit = 1;
726 else if (!strcmp(pszProperty, "Alphabetic")) pCPInfo->fAlphabetic = 1;
727 else if (!strcmp(pszProperty, "Bidi_Control")) pCPInfo->fBidiControl = 1;
728 else if (!strcmp(pszProperty, "Case_Ignorable")) pCPInfo->fCaseIgnorable = 1;
729 else if (!strcmp(pszProperty, "Cased")) pCPInfo->fCased = 1;
730 else if (!strcmp(pszProperty, "Changes_When_Casefolded")) pCPInfo->fChangesWhenCasefolded = 1;
731 else if (!strcmp(pszProperty, "Changes_When_Casemapped")) pCPInfo->fChangesWhenCasemapped = 1;
732 else if (!strcmp(pszProperty, "Changes_When_Lowercased")) pCPInfo->fChangesWhenLowercased = 1;
733 else if (!strcmp(pszProperty, "Changes_When_Titlecased")) pCPInfo->fChangesWhenTitlecased = 1;
734 else if (!strcmp(pszProperty, "Changes_When_Uppercased")) pCPInfo->fChangesWhenUppercased = 1;
735 else if (!strcmp(pszProperty, "Dash")) pCPInfo->fDash = 1;
736 else if (!strcmp(pszProperty, "Default_Ignorable_Code_Point")) pCPInfo->fDefaultIgnorableCodePoint = 1;
737 else if (!strcmp(pszProperty, "Deprecated")) pCPInfo->fDeprecated = 1;
738 else if (!strcmp(pszProperty, "Diacritic")) pCPInfo->fDiacritic = 1;
739 else if (!strcmp(pszProperty, "Extender")) pCPInfo->fExtender = 1;
740 else if (!strcmp(pszProperty, "Grapheme_Base")) pCPInfo->fGraphemeBase = 1;
741 else if (!strcmp(pszProperty, "Grapheme_Extend")) pCPInfo->fGraphemeExtend = 1;
742 else if (!strcmp(pszProperty, "Grapheme_Link")) pCPInfo->fGraphemeLink = 1;
743 else if (!strcmp(pszProperty, "Hex_Digit")) pCPInfo->fHexDigit = 1;
744 else if (!strcmp(pszProperty, "Hyphen")) pCPInfo->fHyphen = 1;
745 else if (!strcmp(pszProperty, "ID_Continue")) pCPInfo->fIDContinue = 1;
746 else if (!strcmp(pszProperty, "ID_Start")) pCPInfo->fIDStart = 1;
747 else if (!strcmp(pszProperty, "Ideographic")) pCPInfo->fIdeographic = 1;
748 else if (!strcmp(pszProperty, "IDS_Binary_Operator")) pCPInfo->fIDSBinaryOperator = 1;
749 else if (!strcmp(pszProperty, "IDS_Trinary_Operator")) pCPInfo->fIDSTrinaryOperator = 1;
750 else if (!strcmp(pszProperty, "Join_Control")) pCPInfo->fJoinControl = 1;
751 else if (!strcmp(pszProperty, "Logical_Order_Exception")) pCPInfo->fLogicalOrderException = 1;
752 else if (!strcmp(pszProperty, "Lowercase")) pCPInfo->fLowercase = 1;
753 else if (!strcmp(pszProperty, "Math")) pCPInfo->fMath = 1;
754 else if (!strcmp(pszProperty, "Noncharacter_Code_Point")) pCPInfo->fNoncharacterCodePoint = 1;
755 else if (!strcmp(pszProperty, "Other_Alphabetic")) pCPInfo->fOtherAlphabetic = 1;
756 else if (!strcmp(pszProperty, "Other_Default_Ignorable_Code_Point")) pCPInfo->fOtherDefaultIgnorableCodePoint = 1;
757 else if (!strcmp(pszProperty, "Other_Grapheme_Extend")) pCPInfo->fOtherGraphemeExtend = 1;
758 else if (!strcmp(pszProperty, "Other_ID_Continue")) pCPInfo->fOtherIDContinue = 1;
759 else if (!strcmp(pszProperty, "Other_ID_Start")) pCPInfo->fOtherIDStart = 1;
760 else if (!strcmp(pszProperty, "Other_Lowercase")) pCPInfo->fOtherLowercase = 1;
761 else if (!strcmp(pszProperty, "Other_Math")) pCPInfo->fOtherMath = 1;
762 else if (!strcmp(pszProperty, "Other_Uppercase")) pCPInfo->fOtherUppercase = 1;
763 else if (!strcmp(pszProperty, "Pattern_Syntax")) pCPInfo->fPatternSyntax = 1;
764 else if (!strcmp(pszProperty, "Pattern_White_Space")) pCPInfo->fPatternWhiteSpace = 1;
765 else if (!strcmp(pszProperty, "Quotation_Mark")) pCPInfo->fQuotationMark = 1;
766 else if (!strcmp(pszProperty, "Radical")) pCPInfo->fRadical = 1;
767 else if (!strcmp(pszProperty, "Soft_Dotted")) pCPInfo->fSoftDotted = 1;
768 else if (!strcmp(pszProperty, "STerm")) pCPInfo->fSTerm = 1;
769 else if (!strcmp(pszProperty, "Terminal_Punctuation")) pCPInfo->fTerminalPunctuation = 1;
770 else if (!strcmp(pszProperty, "Unified_Ideograph")) pCPInfo->fUnifiedIdeograph = 1;
771 else if (!strcmp(pszProperty, "Uppercase")) pCPInfo->fUppercase = 1;
772 else if (!strcmp(pszProperty, "Variation_Selector")) pCPInfo->fVariationSelector = 1;
773 else if (!strcmp(pszProperty, "White_Space")) pCPInfo->fWhiteSpace = 1;
774 else if (!strcmp(pszProperty, "XID_Continue")) pCPInfo->fXIDContinue = 1;
775 else if (!strcmp(pszProperty, "XID_Start")) pCPInfo->fXIDStart = 1;
776 /* DerivedNormalizationProps: */
777 else if (!strcmp(pszProperty, "FC_NFKC")) return; /* ignored */
778 else if (!strcmp(pszProperty, "Full_Composition_Exclusion")) pCPInfo->fFullCompositionExclusion = 1;
779 else if (!strcmp(pszProperty, "NFC_QC")) pCPInfo->fInvNFC_QC = YesNoMaybePropertyValueInv(&pszNextField);
780 else if (!strcmp(pszProperty, "NFD_QC")) pCPInfo->fInvNFD_QC = YesNoMaybePropertyValueInv(&pszNextField);
781 else if (!strcmp(pszProperty, "NFKC_QC")) pCPInfo->fInvNFKC_QC = YesNoMaybePropertyValueInv(&pszNextField);
782 else if (!strcmp(pszProperty, "NFKD_QC")) pCPInfo->fInvNFKD_QC = YesNoMaybePropertyValueInv(&pszNextField);
783 else if (!strcmp(pszProperty, "Expands_On_NFC")) pCPInfo->fExpandsOnNFC = 1;
784 else if (!strcmp(pszProperty, "Expands_On_NFD")) pCPInfo->fExpandsOnNFD = 1;
785 else if (!strcmp(pszProperty, "Expands_On_NFKC")) pCPInfo->fExpandsOnNFKC = 1;
786 else if (!strcmp(pszProperty, "Expands_On_NFKD")) pCPInfo->fExpandsOnNFKD = 1;
787 else if (!strcmp(pszProperty, "NFKC_CF")) return; /*ignore */
788 else if (!strcmp(pszProperty, "Changes_When_NFKC_Casefolded")) return; /*ignore */
789 else
790 {
791 ParseError("Unknown property '%s'\n", pszProperty);
792 return;
793 }
794
795 if (pszNextField && *pszNextField)
796 ParseError("Unexpected next field: '%s'\n", pszNextField);
797}
798
799
800/**
801 * Reads a property file.
802 *
803 * There are several property files, this code can read all
804 * of those but will only make use of the properties it recognizes.
805 *
806 * @returns 0 on success.
807 * @returns !0 on failure.
808 * @param pszBasePath The base path, can be NULL.
809 * @param pszFilename The name of the file.
810 */
811static int ReadProperties(const char *pszBasePath, const char *pszFilename)
812{
813 /*
814 * Open input.
815 */
816 FILE *pFile = OpenFile(pszBasePath, pszFilename);
817 if (!pFile)
818 return 1;
819
820 /*
821 * Parse the input and spit out the output.
822 */
823 char szLine[4096];
824 while (GetLineFromFile(szLine, sizeof(szLine), pFile) != NULL)
825 {
826 if (IsCommentOrBlankLine(szLine))
827 continue;
828 char *pszCurField;
829 char *pszRange = FirstField(&pszCurField, StripLine(szLine));
830 char *pszProperty = NextField(&pszCurField);
831 if (!*pszProperty)
832 {
833 ParseError("no property field.\n");
834 continue;
835 }
836
837 RTUNICP LastCP;
838 RTUNICP StartCP = ToRange(pszRange, &LastCP);
839 if (StartCP == ~(RTUNICP)0)
840 continue;
841
842 while (StartCP <= LastCP)
843 ApplyProperty(StartCP++, pszProperty, pszCurField);
844 }
845
846 CloseFile(pFile);
847
848 return 0;
849}
850
851
852/**
853 * Append a flag to the string.
854 */
855static char *AppendFlag(char *psz, const char *pszFlag)
856{
857 char *pszEnd = strchr(psz, '\0');
858 if (pszEnd != psz)
859 {
860 *pszEnd++ = ' ';
861 *pszEnd++ = '|';
862 *pszEnd++ = ' ';
863 }
864 strcpy(pszEnd, pszFlag);
865 return psz;
866}
867
868/**
869 * Calcs the flags for a code point.
870 * @returns true if there is a flag.
871 * @returns false if the isn't.
872 */
873static bool CalcFlags(struct CPINFO *pInfo, char *pszFlags)
874{
875 pszFlags[0] = '\0';
876 /** @todo read the specs on this other vs standard stuff, and check out the finer points */
877 if (pInfo->fAlphabetic || pInfo->fOtherAlphabetic)
878 AppendFlag(pszFlags, "RTUNI_ALPHA");
879 if (pInfo->fHexDigit || pInfo->fASCIIHexDigit)
880 AppendFlag(pszFlags, "RTUNI_XDIGIT");
881 if (!strcmp(pInfo->pszGeneralCategory, "Nd"))
882 AppendFlag(pszFlags, "RTUNI_DDIGIT");
883 if (pInfo->fWhiteSpace)
884 AppendFlag(pszFlags, "RTUNI_WSPACE");
885 if (pInfo->fUppercase || pInfo->fOtherUppercase)
886 AppendFlag(pszFlags, "RTUNI_UPPER");
887 if (pInfo->fLowercase || pInfo->fOtherLowercase)
888 AppendFlag(pszFlags, "RTUNI_LOWER");
889 //if (pInfo->???)
890 // AppendFlag(pszFlags, "RTUNI_BSPACE");
891#if 0
892 if (pInfo->fInvNFD_QC != 0 || pInfo->fInvNFC_QC != 0)
893 {
894 AppendFlag(pszFlags, "RTUNI_QC_NFX");
895 if (!pInfo->paDecompositionMapping && pInfo->fInvNFD_QC)
896 fprintf(stderr, "uniread: U+%05X is QC_NFD but has no mappings.\n", pInfo->CodePoint);
897 else if (*pInfo->pszDecompositionType && pInfo->fInvNFD_QC)
898 fprintf(stderr, "uniread: U+%05X is QC_NFD but has no canonical mappings.\n", pInfo->CodePoint);
899 }
900 else if (pInfo->paDecompositionMapping && !*pInfo->pszDecompositionType)
901 fprintf(stderr, "uniread: U+%05X is not QC_NFX but has canonical mappings.\n", pInfo->CodePoint);
902#endif
903
904 if (!*pszFlags)
905 {
906 pszFlags[0] = '0';
907 pszFlags[1] = '\0';
908 return false;
909 }
910 return true;
911}
912
913
914/**
915 * Closes the primary output stream.
916 */
917static int Stream1Close(void)
918{
919 if (g_pCurOutFile && g_pCurOutFile != stdout && g_pCurOutFile != stderr)
920 {
921 if (fclose(g_pCurOutFile) != 0)
922 {
923 fprintf(stderr, "Error closing output file.\n");
924 return -1;
925 }
926 }
927 g_pCurOutFile = NULL;
928 return 0;
929}
930
931
932/**
933 * Initializes the 1st stream to output to a given file.
934 */
935static int Stream1Init(const char *pszName)
936{
937 int rc = Stream1Close();
938 if (!rc)
939 {
940 g_pCurOutFile = fopen(pszName, "w");
941 if (!g_pCurOutFile)
942 {
943 fprintf(stderr, "Error opening output file '%s'.\n", pszName);
944 rc = -1;
945 }
946 }
947 return rc;
948}
949
950
951/**
952 * printf wrapper for the primary output stream.
953 *
954 * @returns See vfprintf.
955 * @param pszFormat The vfprintf format string.
956 * @param ... The format arguments.
957 */
958static int Stream1Printf(const char *pszFormat, ...)
959{
960 int cch;
961 va_list va;
962 va_start(va, pszFormat);
963 cch = vfprintf(g_pCurOutFile, pszFormat, va);
964 va_end(va);
965 return cch;
966}
967
968
969/** the data store for stream two. */
970static char g_szStream2[10240];
971static unsigned volatile g_offStream2 = 0;
972
973/**
974 * Initializes the 2nd steam.
975 */
976static void Stream2Init(void)
977{
978 g_szStream2[0] = '\0';
979 g_offStream2 = 0;
980}
981
982/**
983 * Flushes the 2nd stream to stdout.
984 */
985static int Stream2Flush(void)
986{
987 g_szStream2[g_offStream2] = '\0';
988 Stream1Printf("%s", g_szStream2);
989 Stream2Init();
990 return 0;
991}
992
993/**
994 * printf to the 2nd stream.
995 */
996static int Stream2Printf(const char *pszFormat, ...)
997{
998 unsigned offStream2 = g_offStream2;
999 va_list va;
1000 va_start(va, pszFormat);
1001 int cch = vsprintf(&g_szStream2[offStream2], pszFormat, va);
1002 va_end(va);
1003 offStream2 += cch;
1004 if (offStream2 >= sizeof(g_szStream2))
1005 {
1006 fprintf(stderr, "error: stream2 overflow!\n");
1007 exit(1);
1008 }
1009 g_offStream2 = offStream2;
1010 return cch;
1011}
1012
1013
1014/**
1015 * Print the unidata.cpp file header and include list.
1016 */
1017int PrintHeader(const char *argv0, const char *pszBaseDir)
1018{
1019 char szBuf[1024];
1020 if (!pszBaseDir)
1021 {
1022 memset(szBuf, 0, sizeof(szBuf));
1023#ifdef _MSC_VER
1024 _getcwd(szBuf, sizeof(szBuf));
1025#else
1026 getcwd(szBuf, sizeof(szBuf));
1027#endif
1028 pszBaseDir = szBuf;
1029 }
1030
1031 Stream1Printf("/* $" "Id" "$ */\n"
1032 "/** @file\n"
1033 " * IPRT - Unicode Tables.\n"
1034 " *\n"
1035 " * Automatically Generated from %s\n"
1036 " * by %s (" __DATE__ " " __TIME__ ")\n"
1037 " */\n"
1038 "\n"
1039 "/*\n"
1040 " * Copyright (C) 2006-2014 Oracle Corporation\n"
1041 " *\n"
1042 " * This file is part of VirtualBox Open Source Edition (OSE), as\n"
1043 " * available from http://www.virtualbox.org. This file is free software;\n"
1044 " * you can redistribute it and/or modify it under the terms of the GNU\n"
1045 " * General Public License (GPL) as published by the Free Software\n"
1046 " * Foundation, in version 2 as it comes in the \"COPYING\" file of the\n"
1047 " * VirtualBox OSE distribution. VirtualBox OSE is distributed in the\n"
1048 " * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.\n"
1049 " *\n"
1050 " * The contents of this file may alternatively be used under the terms\n"
1051 " * of the Common Development and Distribution License Version 1.0\n"
1052 " * (CDDL) only, as it comes in the \"COPYING.CDDL\" file of the\n"
1053 " * VirtualBox OSE distribution, in which case the provisions of the\n"
1054 " * CDDL are applicable instead of those of the GPL.\n"
1055 " *\n"
1056 " * You may elect to license modified versions of this file under the\n"
1057 " * terms and conditions of either the GPL or the CDDL or both.\n"
1058 " */\n"
1059 "\n"
1060 "#include <iprt/uni.h>\n"
1061 "\n",
1062 pszBaseDir, argv0);
1063 return 0;
1064}
1065
1066
1067/**
1068 * Print the flag tables.
1069 */
1070int PrintFlags(void)
1071{
1072 /*
1073 * Print flags table.
1074 */
1075 Stream2Init();
1076 Stream2Printf("RT_DECL_DATA_CONST(const RTUNIFLAGSRANGE) g_aRTUniFlagsRanges[] =\n"
1077 "{\n");
1078 RTUNICP i = 0;
1079 int iStart = -1;
1080 while (i < RT_ELEMENTS(g_aCPInfo))
1081 {
1082 /* figure how far off the next chunk is */
1083 char szFlags[256];
1084 unsigned iNonNull = i;
1085 while ( iNonNull < RT_ELEMENTS(g_aCPInfo)
1086 && iNonNull >= 256
1087 && (g_aCPInfo[iNonNull].fNullEntry || !CalcFlags(&g_aCPInfo[iNonNull], szFlags)) )
1088 iNonNull++;
1089 if (iNonNull - i > 4096 || iNonNull == RT_ELEMENTS(g_aCPInfo))
1090 {
1091 if (iStart >= 0)
1092 {
1093 Stream1Printf("};\n\n");
1094 Stream2Printf(" { 0x%06x, 0x%06x, &g_afRTUniFlags0x%06x[0] },\n", iStart, i, iStart);
1095 iStart = -1;
1096 }
1097 i = iNonNull;
1098 }
1099 else
1100 {
1101 if (iStart < 0)
1102 {
1103 Stream1Printf("static const uint8_t g_afRTUniFlags0x%06x[] =\n"
1104 "{\n", i);
1105 iStart = i;
1106 }
1107 CalcFlags(&g_aCPInfo[i], szFlags);
1108 Stream1Printf(" %50s, /* U+%06x: %s*/\n", szFlags, g_aCPInfo[i].CodePoint, g_aCPInfo[i].pszName);
1109 i++;
1110 }
1111 }
1112 Stream2Printf(" { ~(RTUNICP)0, ~(RTUNICP)0, NULL }\n"
1113 "};\n\n\n");
1114 Stream1Printf("\n");
1115 return Stream2Flush();
1116}
1117
1118
1119/**
1120 * Prints the upper case tables.
1121 */
1122static int PrintUpper(void)
1123{
1124 Stream2Init();
1125 Stream2Printf("RT_DECL_DATA_CONST(const RTUNICASERANGE) g_aRTUniUpperRanges[] =\n"
1126 "{\n");
1127 RTUNICP i = 0;
1128 int iStart = -1;
1129 while (i < RT_ELEMENTS(g_aCPInfo))
1130 {
1131 /* figure how far off the next chunk is */
1132 unsigned iSameCase = i;
1133 while ( iSameCase < RT_ELEMENTS(g_aCPInfo)
1134 && g_aCPInfo[iSameCase].SimpleUpperCaseMapping == g_aCPInfo[iSameCase].CodePoint
1135 && iSameCase >= 256)
1136 iSameCase++;
1137 if (iSameCase - i > 4096/sizeof(RTUNICP) || iSameCase == RT_ELEMENTS(g_aCPInfo))
1138 {
1139 if (iStart >= 0)
1140 {
1141 Stream1Printf("};\n\n");
1142 Stream2Printf(" { 0x%06x, 0x%06x, &g_afRTUniUpper0x%06x[0] },\n", iStart, i, iStart);
1143 iStart = -1;
1144 }
1145 i = iSameCase;
1146 }
1147 else
1148 {
1149 if (iStart < 0)
1150 {
1151 Stream1Printf("static const RTUNICP g_afRTUniUpper0x%06x[] =\n"
1152 "{\n", i);
1153 iStart = i;
1154 }
1155 Stream1Printf(" 0x%02x, /* U+%06x: %s*/\n", g_aCPInfo[i].SimpleUpperCaseMapping, g_aCPInfo[i].CodePoint, g_aCPInfo[i].pszName);
1156 i++;
1157 }
1158 }
1159 Stream2Printf(" { ~(RTUNICP)0, ~(RTUNICP)0, NULL }\n"
1160 "};\n\n\n");
1161 Stream1Printf("\n");
1162 return Stream2Flush();
1163}
1164
1165
1166/**
1167 * Prints the lowercase tables.
1168 */
1169static int PrintLower(void)
1170{
1171 Stream2Init();
1172 Stream2Printf("RT_DECL_DATA_CONST(const RTUNICASERANGE) g_aRTUniLowerRanges[] =\n"
1173 "{\n");
1174 RTUNICP i = 0;
1175 int iStart = -1;
1176 while (i < RT_ELEMENTS(g_aCPInfo))
1177 {
1178 /* figure how far off the next chunk is */
1179 unsigned iSameCase = i;
1180 while ( iSameCase < RT_ELEMENTS(g_aCPInfo)
1181 && g_aCPInfo[iSameCase].SimpleLowerCaseMapping == g_aCPInfo[iSameCase].CodePoint
1182 && iSameCase >= 256)
1183 iSameCase++;
1184 if (iSameCase - i > 4096/sizeof(RTUNICP) || iSameCase == RT_ELEMENTS(g_aCPInfo))
1185 {
1186 if (iStart >= 0)
1187 {
1188 Stream1Printf("};\n\n");
1189 Stream2Printf(" { 0x%06x, 0x%06x, &g_afRTUniLower0x%06x[0] },\n", iStart, i, iStart);
1190 iStart = -1;
1191 }
1192 i = iSameCase;
1193 }
1194 else
1195 {
1196 if (iStart < 0)
1197 {
1198 Stream1Printf("static const RTUNICP g_afRTUniLower0x%06x[] =\n"
1199 "{\n", i);
1200 iStart = i;
1201 }
1202 Stream1Printf(" 0x%02x, /* U+%06x: %s*/\n",
1203 g_aCPInfo[i].SimpleLowerCaseMapping, g_aCPInfo[i].CodePoint, g_aCPInfo[i].pszName);
1204 i++;
1205 }
1206 }
1207 Stream2Printf(" { ~(RTUNICP)0, ~(RTUNICP)0, NULL }\n"
1208 "};\n\n\n");
1209 Stream1Printf("\n");
1210 return Stream2Flush();
1211}
1212
1213
1214int main(int argc, char **argv)
1215{
1216 /*
1217 * Parse args.
1218 */
1219 if (argc <= 1)
1220 {
1221 printf("usage: %s [-C|--dir <UCD-dir>] [UnicodeData.txt [DerivedCoreProperties.txt [PropList.txt] [DerivedNormalizationProps.txt]]]\n",
1222 argv[0]);
1223 return 1;
1224 }
1225
1226 const char *pszBaseDir = NULL;
1227 const char *pszUnicodeData = "UnicodeData.txt";
1228 const char *pszDerivedCoreProperties = "DerivedCoreProperties.txt";
1229 const char *pszPropList = "PropList.txt";
1230 const char *pszDerivedNormalizationProps = "DerivedNormalizationProps.txt";
1231 int iFile = 0;
1232 for (int argi = 1; argi < argc; argi++)
1233 {
1234 if (argv[argi][0] != '-')
1235 {
1236 switch (iFile++)
1237 {
1238 case 0: pszUnicodeData = argv[argi]; break;
1239 case 1: pszDerivedCoreProperties = argv[argi]; break;
1240 case 2: pszPropList = argv[argi]; break;
1241 case 3: pszDerivedNormalizationProps = argv[argi]; break;
1242 default:
1243 fprintf(stderr, "uniread: syntax error at '%s': too many filenames\n", argv[argi]);
1244 return 1;
1245 }
1246 }
1247 else if ( !strcmp(argv[argi], "--dir")
1248 || !strcmp(argv[argi], "-C"))
1249 {
1250 if (argi + 1 >= argc)
1251 {
1252 fprintf(stderr, "uniread: syntax error: '%s' is missing the directory name.\n", argv[argi]);
1253 return 1;
1254 }
1255 argi++;
1256 pszBaseDir = argv[argi];
1257 }
1258 else
1259 {
1260 fprintf(stderr, "uniread: syntax error at '%s': Unknown argument\n", argv[argi]);
1261 return 1;
1262 }
1263 }
1264
1265 /*
1266 * Read the data.
1267 */
1268 int rc = ReadUnicodeData(pszBaseDir, pszUnicodeData);
1269 if (rc)
1270 return rc;
1271 rc = GenerateExcludedData();
1272 if (rc)
1273 return rc;
1274 rc = ReadProperties(pszBaseDir, pszPropList);
1275 if (rc)
1276 return rc;
1277 rc = ReadProperties(pszBaseDir, pszDerivedCoreProperties);
1278 if (rc)
1279 return rc;
1280 rc = ReadProperties(pszBaseDir, pszDerivedNormalizationProps);
1281 if (rc)
1282 return rc;
1283
1284 /*
1285 * Produce output files.
1286 */
1287 rc = Stream1Init("unidata-flags.cpp");
1288 if (!rc)
1289 rc = PrintHeader(argv[0], pszBaseDir);
1290 if (!rc)
1291 rc = PrintFlags();
1292
1293 rc = Stream1Init("unidata-upper.cpp");
1294 if (!rc)
1295 rc = PrintHeader(argv[0], pszBaseDir);
1296 if (!rc)
1297 rc = PrintUpper();
1298
1299 rc = Stream1Init("unidata-lower.cpp");
1300 if (!rc)
1301 rc = PrintHeader(argv[0], pszBaseDir);
1302 if (!rc)
1303 rc = PrintLower();
1304 if (!rc)
1305 rc = Stream1Close();
1306
1307 /* done */
1308 return rc;
1309}
1310
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette