VirtualBox

source: vbox/trunk/src/VBox/Runtime/uniread.cpp@ 4725

Last change on this file since 4725 was 4715, checked in by vboxsync, 17 years ago

warning

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 25.6 KB
Line 
1/* $Id: uniread.cpp 4715 2007-09-11 19:44:45Z vboxsync $ */
2/** @file
3 * innotek Portable Runtime - Unicode Specification Reader.
4 */
5
6/*
7 * Copyright (C) 2006-2007 innotek GmbH
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License as published by the Free Software Foundation,
13 * in version 2 as it comes in the "COPYING" file of the VirtualBox OSE
14 * distribution. VirtualBox OSE is distributed in the hope that it will
15 * be useful, but WITHOUT ANY WARRANTY of any kind.
16 */
17
18/*******************************************************************************
19* Header Files *
20*******************************************************************************/
21#include <iprt/types.h>
22#include <iprt/stdarg.h>
23
24#include <stdio.h>
25#include <string.h>
26#include <stdlib.h>
27
28
29/**
30 * Strip a line.
31 * @returns pointer to first non-blank char.
32 * @param pszLine The line string to strip.
33 */
34static char *StripLine(char *pszLine)
35{
36 while (*pszLine == ' ' || *pszLine == '\t')
37 pszLine++;
38
39 char *psz = strchr(pszLine, '#');
40 if (psz)
41 *psz = '\0';
42 else
43 psz = strchr(pszLine, '\0');
44 while (psz > pszLine)
45 {
46 switch (psz[-1])
47 {
48 case ' ':
49 case '\t':
50 case '\n':
51 case '\r':
52 *--psz = '\0';
53 continue;
54 }
55 break;
56 }
57
58 return pszLine;
59}
60
61
62/**
63 * Checks if the line is blank or a comment line and should be skipped.
64 * @returns true/false.
65 * @param pszLine The line to consider.
66 */
67static bool IsCommentOrBlankLine(const char *pszLine)
68{
69 while (*pszLine == ' ' || *pszLine == '\t' || *pszLine == '\n' || *pszLine == '\r')
70 pszLine++;
71 return *pszLine == '#' || *pszLine == '\0';
72}
73
74
75/**
76 * Get the first field in the string.
77 *
78 * @returns Pointer to the next field.
79 * @param ppsz Where to store the pointer to the next field.
80 * @param pszLine The line string. (could also be *ppsz from a FirstNext call)
81 */
82static char *FirstField(char **ppsz, char *pszLine)
83{
84 char *psz = strchr(pszLine, ';');
85 if (!psz)
86 *ppsz = psz = strchr(pszLine, '\0');
87 else
88 {
89 *psz = '\0';
90 *ppsz = psz + 1;
91 }
92
93 /* strip */
94 while (*pszLine == ' ' || *pszLine == '\t' || *pszLine == '\r' || *pszLine == '\n')
95 pszLine++;
96 while (psz > pszLine)
97 {
98 switch (psz[-1])
99 {
100 case ' ':
101 case '\t':
102 case '\n':
103 case '\r':
104 *--psz = '\0';
105 continue;
106 }
107 break;
108 }
109 return pszLine;
110}
111
112
113/**
114 * Get the next field in a field enumeration.
115 *
116 * @returns Pointer to the next field.
117 * @param ppsz Where to get and store the string postition.
118 */
119static char *NextField(char **ppsz)
120{
121 return FirstField(ppsz, *ppsz);
122}
123
124
125/**
126 * Converts a code point field to a number.
127 * @returns Code point.
128 * @param psz The field string.
129 */
130static RTUNICP ToNum(const char *psz)
131{
132 char *pszEnd = NULL;
133 unsigned long ul = strtoul(psz, &pszEnd, 16);
134 if (pszEnd && *pszEnd)
135 fprintf(stderr, "warning: failed converting '%s' to a number!\n", psz);
136 return (RTUNICP)ul;
137}
138
139
140/**
141 * Same as ToNum except that if the field is empty the Default is returned.
142 */
143static RTUNICP ToNumDefault(const char *psz, RTUNICP Default)
144{
145 if (*psz)
146 return ToNum(psz);
147 return Default;
148}
149
150
151/**
152 * Converts a code point range to numbers.
153 * @returns The start code point.\
154 * @returns ~(RTUNICP)0 on failure.
155 * @param psz The field string.
156 * @param pLast Where to store the last code point in the range.
157 */
158static RTUNICP ToRange(const char *psz, PRTUNICP pLast)
159{
160 char *pszEnd = NULL;
161 unsigned long ulStart = strtoul(psz, &pszEnd, 16);
162 unsigned long ulLast = ulStart;
163 if (pszEnd && *pszEnd)
164 {
165 if (*pszEnd == '.')
166 {
167 while (*pszEnd == '.')
168 pszEnd++;
169 ulLast = strtoul(pszEnd, &pszEnd, 16);
170 if (pszEnd && *pszEnd)
171 {
172 fprintf(stderr, "warning: failed converting '%s' to a number!\n", psz);
173 return ~(RTUNICP)0;
174 }
175 }
176 else
177 {
178 fprintf(stderr, "warning: failed converting '%s' to a number!\n", psz);
179 return ~(RTUNICP)0;
180 }
181 }
182 *pLast = (RTUNICP)ulLast;
183 return (RTUNICP)ulStart;
184
185}
186
187
188/**
189 * Duplicate a string, optimize certain strings to save memory.
190 *
191 * @returns Pointer to string copy.
192 * @param pszStr The string to duplicate.
193 */
194static char *DupStr(const char *pszStr)
195{
196 if (!*pszStr)
197 return (char*)"";
198 char *psz = strdup(pszStr);
199 if (psz)
200 return psz;
201
202 fprintf(stderr, "out of memory!\n");
203 exit(1);
204}
205
206
207/**
208 * Array of all possible and impossible unicode code points as of 4.1
209 */
210struct CPINFO
211{
212 RTUNICP CodePoint;
213 RTUNICP SimpleUpperCaseMapping;
214 RTUNICP SimpleLowerCaseMapping;
215 RTUNICP SimpleTitleCaseMapping;
216 const char *pszName;
217 /** Set if this is an unused entry */
218 unsigned fNullEntry : 1;
219
220 unsigned fAlphabetic : 1;
221 unsigned fASCIIHexDigit : 1;
222 unsigned fBidiControl : 1;
223 unsigned fDash : 1;
224 unsigned fDefaultIgnorableCodePoint : 1;
225 unsigned fDeprecated : 1;
226 unsigned fDiacritic : 1;
227 unsigned fExtender : 1;
228 unsigned fGraphemeBase : 1;
229 unsigned fGraphemeExtend : 1;
230 unsigned fGraphemeLink : 1;
231 unsigned fHexDigit : 1;
232 unsigned fHyphen : 1;
233 unsigned fIDContinue : 1;
234 unsigned fIdeographic : 1;
235 unsigned fIDSBinaryOperator : 1;
236 unsigned fIDStart : 1;
237 unsigned fIDSTrinaryOperator : 1;
238 unsigned fJoinControl : 1;
239 unsigned fLogicalOrderException : 1;
240 unsigned fLowercase : 1;
241 unsigned fMath : 1;
242 unsigned fNoncharacterCodePoint : 1;
243 unsigned fOtherAlphabetic : 1;
244 unsigned fOtherDefaultIgnorableCodePoint : 1;
245 unsigned fOtherGraphemeExtend : 1;
246 unsigned fOtherIDContinue : 1;
247 unsigned fOtherIDStart : 1;
248 unsigned fOtherLowercase : 1;
249 unsigned fOtherMath : 1;
250 unsigned fOtherUppercase : 1;
251 unsigned fPatternSyntax : 1;
252 unsigned fPatternWhiteSpace : 1;
253 unsigned fQuotationMark : 1;
254 unsigned fRadical : 1;
255 unsigned fSoftDotted : 1;
256 unsigned fSTerm : 1;
257 unsigned fTerminalPunctuation : 1;
258 unsigned fUnifiedIdeograph : 1;
259 unsigned fUppercase : 1;
260 unsigned fVariationSelector : 1;
261 unsigned fWhiteSpace : 1;
262 unsigned fXIDContinue : 1;
263 unsigned fXIDStart : 1;
264
265 /* unprocess stuff, so far. */
266 const char *pszGeneralCategory;
267 const char *pszCanonicalCombiningClass;
268 const char *pszBidiClass;
269 const char *pszDecompositionType;
270 const char *pszDecompositionMapping;
271 const char *pszNumericType;
272 const char *pszNumericValue;
273 const char *pszBidiMirrored;
274 const char *pszUnicode1Name;
275 const char *pszISOComment;
276} g_aCPInfo[0xf0000];
277
278
279/**
280 * Creates a 'null' entry at i.
281 * @param i The entry in question.
282 */
283static void NullEntry(unsigned i)
284{
285 g_aCPInfo[i].CodePoint = i;
286 g_aCPInfo[i].fNullEntry = 1;
287 g_aCPInfo[i].pszName = "";
288 g_aCPInfo[i].SimpleUpperCaseMapping = i;
289 g_aCPInfo[i].SimpleLowerCaseMapping = i;
290 g_aCPInfo[i].SimpleTitleCaseMapping = i;
291 g_aCPInfo[i].pszGeneralCategory = "";
292 g_aCPInfo[i].pszCanonicalCombiningClass = "";
293 g_aCPInfo[i].pszBidiClass = "";
294 g_aCPInfo[i].pszDecompositionType = "";
295 g_aCPInfo[i].pszDecompositionMapping = "";
296 g_aCPInfo[i].pszNumericType = "";
297 g_aCPInfo[i].pszNumericValue = "";
298 g_aCPInfo[i].pszBidiMirrored = "";
299 g_aCPInfo[i].pszUnicode1Name = "";
300 g_aCPInfo[i].pszISOComment = "";
301}
302
303
304/**
305 * Read the UnicodeData.txt file.
306 * @returns 0 on success.
307 * @returns !0 on failure.
308 * @param pszFilename The name of the file.
309 */
310static int ReadUnicodeData(const char *pszFilename)
311{
312 /*
313 * Open input.
314 */
315 FILE *pFile = fopen(pszFilename, "r");
316 if (!pFile)
317 {
318 printf("uniread: failed to open '%s' for reading\n", pszFilename);
319 return 1;
320 }
321
322 /*
323 * Parse the input and spit out the output.
324 */
325 char szLine[4096];
326 RTUNICP i = 0;
327 while (fgets(szLine, sizeof(szLine), pFile) != NULL)
328 {
329 if (IsCommentOrBlankLine(szLine))
330 continue;
331
332 char *pszCurField;
333 char *pszCodePoint = FirstField(&pszCurField, StripLine(szLine)); /* 0 */
334 char *pszName = NextField(&pszCurField); /* 1 */
335 char *pszGeneralCategory = NextField(&pszCurField); /* 2 */
336 char *pszCanonicalCombiningClass = NextField(&pszCurField); /* 3 */
337 char *pszBidiClass = NextField(&pszCurField); /* 4 */
338 char *pszDecompositionType = NextField(&pszCurField); /* 5 */
339 char *pszDecompositionMapping = NextField(&pszCurField); /* 6 */
340 char *pszNumericType = NextField(&pszCurField); /* 7 */
341 char *pszNumericValue = NextField(&pszCurField); /* 8 */
342 char *pszBidiMirrored = NextField(&pszCurField); /* 9 */
343 char *pszUnicode1Name = NextField(&pszCurField); /* 10 */
344 char *pszISOComment = NextField(&pszCurField); /* 11 */
345 char *pszSimpleUpperCaseMapping = NextField(&pszCurField); /* 12 */
346 char *pszSimpleLowerCaseMapping = NextField(&pszCurField); /* 13 */
347 char *pszSimpleTitleCaseMapping = NextField(&pszCurField); /* 14 */
348
349 RTUNICP CodePoint = ToNum(pszCodePoint);
350 if (CodePoint >= ELEMENTS(g_aCPInfo))
351 continue;
352
353 /* catchup? */
354 while (i < CodePoint)
355 NullEntry(i++);
356 if (i != CodePoint)
357 {
358 fprintf(stderr, "unitest: error: i=%d CodePoint=%u\n", i, CodePoint);
359 fclose(pFile);
360 return 1;
361 }
362
363 /* this one */
364 g_aCPInfo[i].CodePoint = i;
365 g_aCPInfo[i].fNullEntry = 0;
366 g_aCPInfo[i].pszName = DupStr(pszName);
367 g_aCPInfo[i].SimpleUpperCaseMapping = ToNumDefault(pszSimpleUpperCaseMapping, CodePoint);
368 g_aCPInfo[i].SimpleLowerCaseMapping = ToNumDefault(pszSimpleLowerCaseMapping, CodePoint);
369 g_aCPInfo[i].SimpleTitleCaseMapping = ToNumDefault(pszSimpleTitleCaseMapping, CodePoint);
370 g_aCPInfo[i].pszGeneralCategory = DupStr(pszGeneralCategory);
371 g_aCPInfo[i].pszCanonicalCombiningClass = DupStr(pszCanonicalCombiningClass);
372 g_aCPInfo[i].pszBidiClass = DupStr(pszBidiClass);
373 g_aCPInfo[i].pszDecompositionType = DupStr(pszDecompositionType);
374 g_aCPInfo[i].pszDecompositionMapping = DupStr(pszDecompositionMapping);
375 g_aCPInfo[i].pszNumericType = DupStr(pszNumericType);
376 g_aCPInfo[i].pszNumericValue = DupStr(pszNumericValue);
377 g_aCPInfo[i].pszBidiMirrored = DupStr(pszBidiMirrored);
378 g_aCPInfo[i].pszUnicode1Name = DupStr(pszUnicode1Name);
379 g_aCPInfo[i].pszISOComment = DupStr(pszISOComment);
380 i++;
381 }
382 /* catchup? */
383 while (i < ELEMENTS(g_aCPInfo))
384 NullEntry(i++);
385 fclose(pFile);
386
387 return 0;
388}
389
390
391/**
392 * Applies a property to a code point.
393 *
394 * @param StartCP The code point.
395 * @param pszProperty The property name.
396 */
397static void ApplyProperty(RTUNICP StartCP, const char *pszProperty)
398{
399 if (StartCP >= ELEMENTS(g_aCPInfo))
400 return;
401 struct CPINFO *pCPInfo = &g_aCPInfo[StartCP];
402 /* string switch */
403 if (!strcmp(pszProperty, "ASCII_Hex_Digit")) pCPInfo->fASCIIHexDigit = 1;
404 else if (!strcmp(pszProperty, "Bidi_Control")) pCPInfo->fBidiControl = 1;
405 else if (!strcmp(pszProperty, "Dash")) pCPInfo->fDash = 1;
406 else if (!strcmp(pszProperty, "Deprecated")) pCPInfo->fDeprecated = 1;
407 else if (!strcmp(pszProperty, "Diacritic")) pCPInfo->fDiacritic = 1;
408 else if (!strcmp(pszProperty, "Extender")) pCPInfo->fExtender = 1;
409 else if (!strcmp(pszProperty, "Grapheme_Link")) pCPInfo->fGraphemeLink = 1;
410 else if (!strcmp(pszProperty, "Hex_Digit")) pCPInfo->fHexDigit = 1;
411 else if (!strcmp(pszProperty, "Hyphen")) pCPInfo->fHyphen = 1;
412 else if (!strcmp(pszProperty, "Ideographic")) pCPInfo->fIdeographic = 1;
413 else if (!strcmp(pszProperty, "IDS_Binary_Operator")) pCPInfo->fIDSBinaryOperator = 1;
414 else if (!strcmp(pszProperty, "IDS_Trinary_Operator")) pCPInfo->fIDSTrinaryOperator = 1;
415 else if (!strcmp(pszProperty, "Join_Control")) pCPInfo->fJoinControl = 1;
416 else if (!strcmp(pszProperty, "Logical_Order_Exception")) pCPInfo->fLogicalOrderException = 1;
417 else if (!strcmp(pszProperty, "Noncharacter_Code_Point")) pCPInfo->fNoncharacterCodePoint = 1;
418 else if (!strcmp(pszProperty, "Other_Alphabetic")) pCPInfo->fOtherAlphabetic = 1;
419 else if (!strcmp(pszProperty, "Other_Default_Ignorable_Code_Point")) pCPInfo->fOtherDefaultIgnorableCodePoint = 1;
420 else if (!strcmp(pszProperty, "Other_Grapheme_Extend")) pCPInfo->fOtherGraphemeExtend = 1;
421 else if (!strcmp(pszProperty, "Other_ID_Continue")) pCPInfo->fOtherIDContinue = 1;
422 else if (!strcmp(pszProperty, "Other_ID_Start")) pCPInfo->fOtherIDStart = 1;
423 else if (!strcmp(pszProperty, "Other_Lowercase")) pCPInfo->fOtherLowercase = 1;
424 else if (!strcmp(pszProperty, "Other_Math")) pCPInfo->fOtherMath = 1;
425 else if (!strcmp(pszProperty, "Other_Uppercase")) pCPInfo->fOtherUppercase = 1;
426 else if (!strcmp(pszProperty, "Alphabetic")) pCPInfo->fAlphabetic = 1;
427 else if (!strcmp(pszProperty, "Default_Ignorable_Code_Point")) pCPInfo->fDefaultIgnorableCodePoint = 1;
428 else if (!strcmp(pszProperty, "Grapheme_Base")) pCPInfo->fGraphemeBase = 1;
429 else if (!strcmp(pszProperty, "Grapheme_Extend")) pCPInfo->fGraphemeExtend = 1;
430 else if (!strcmp(pszProperty, "ID_Continue")) pCPInfo->fIDContinue = 1;
431 else if (!strcmp(pszProperty, "ID_Start")) pCPInfo->fIDStart = 1;
432 else if (!strcmp(pszProperty, "XID_Continue")) pCPInfo->fXIDContinue = 1;
433 else if (!strcmp(pszProperty, "XID_Start")) pCPInfo->fXIDStart = 1;
434 else if (!strcmp(pszProperty, "Lowercase")) pCPInfo->fLowercase = 1;
435 else if (!strcmp(pszProperty, "Math")) pCPInfo->fMath = 1;
436 else if (!strcmp(pszProperty, "Uppercase")) pCPInfo->fUppercase = 1;
437 else if (!strcmp(pszProperty, "Pattern_Syntax")) pCPInfo->fPatternSyntax = 1;
438 else if (!strcmp(pszProperty, "Pattern_White_Space")) pCPInfo->fPatternWhiteSpace = 1;
439 else if (!strcmp(pszProperty, "Quotation_Mark")) pCPInfo->fQuotationMark = 1;
440 else if (!strcmp(pszProperty, "Radical")) pCPInfo->fRadical = 1;
441 else if (!strcmp(pszProperty, "Soft_Dotted")) pCPInfo->fSoftDotted = 1;
442 else if (!strcmp(pszProperty, "STerm")) pCPInfo->fSTerm = 1;
443 else if (!strcmp(pszProperty, "Terminal_Punctuation")) pCPInfo->fTerminalPunctuation = 1;
444 else if (!strcmp(pszProperty, "Unified_Ideograph")) pCPInfo->fUnifiedIdeograph = 1;
445 else if (!strcmp(pszProperty, "Variation_Selector")) pCPInfo->fVariationSelector = 1;
446 else if (!strcmp(pszProperty, "White_Space")) pCPInfo->fWhiteSpace = 1;
447 else
448 fprintf(stderr, "uniread: Unknown property '%s'\n", pszProperty);
449}
450
451
452/**
453 * Reads a property file.
454 *
455 * There are several property files, this code can read all
456 * of those but will only make use of the properties it recognizes.
457 *
458 * @returns 0 on success.
459 * @returns !0 on failure.
460 * @param pszFilename The name of the file.
461 */
462static int ReadProperties(const char *pszFilename)
463{
464 /*
465 * Open input.
466 */
467 FILE *pFile = fopen(pszFilename, "r");
468 if (!pFile)
469 {
470 printf("uniread: failed to open '%s' for reading\n", pszFilename);
471 return 1;
472 }
473
474 /*
475 * Parse the input and spit out the output.
476 */
477 char szLine[4096];
478 while (fgets(szLine, sizeof(szLine), pFile) != NULL)
479 {
480 if (IsCommentOrBlankLine(szLine))
481 continue;
482 char *pszCurField;
483 char *pszRange = FirstField(&pszCurField, StripLine(szLine));
484 char *pszProperty = NextField(&pszCurField);
485 if (!*pszProperty)
486 continue;
487
488 RTUNICP LastCP;
489 RTUNICP StartCP = ToRange(pszRange, &LastCP);
490 if (StartCP == ~(RTUNICP)0)
491 continue;
492
493 while (StartCP <= LastCP)
494 ApplyProperty(StartCP++, pszProperty);
495 }
496
497 fclose(pFile);
498
499 return 0;
500}
501
502
503/**
504 * Append a flag to the string.
505 */
506static char *AppendFlag(char *psz, const char *pszFlag)
507{
508 char *pszEnd = strchr(psz, '\0');
509 if (pszEnd != psz)
510 {
511 *pszEnd++ = ' ';
512 *pszEnd++ = '|';
513 *pszEnd++ = ' ';
514 }
515 strcpy(pszEnd, pszFlag);
516 return psz;
517}
518
519/**
520 * Calcs the flags for a code point.
521 * @returns true if there is a flag.
522 * @returns false if the isn't.
523 */
524static bool CalcFlags(struct CPINFO *pInfo, char *pszFlags)
525{
526 pszFlags[0] = '\0';
527 /** @todo read the specs on this other vs standard stuff, and check out the finer points */
528 if (pInfo->fAlphabetic || pInfo->fOtherAlphabetic)
529 AppendFlag(pszFlags, "RTUNI_ALPHA");
530 if (pInfo->fHexDigit || pInfo->fASCIIHexDigit)
531 AppendFlag(pszFlags, "RTUNI_XDIGIT");
532 if (!strcmp(pInfo->pszGeneralCategory, "Nd"))
533 AppendFlag(pszFlags, "RTUNI_DDIGIT");
534 if (pInfo->fWhiteSpace)
535 AppendFlag(pszFlags, "RTUNI_WSPACE");
536 if (pInfo->fUppercase || pInfo->fOtherUppercase)
537 AppendFlag(pszFlags, "RTUNI_UPPER");
538 if (pInfo->fLowercase || pInfo->fOtherLowercase)
539 AppendFlag(pszFlags, "RTUNI_LOWER");
540 //if (pInfo->fNumeric)
541 // AppendFlag(pszFlags, "RTUNI_NUMERIC");
542 if (!*pszFlags)
543 {
544 pszFlags[0] = '0';
545 pszFlags[1] = '\0';
546 return false;
547 }
548 return true;
549}
550
551/** the data store for stream two. */
552static char g_szStream2[10240];
553static unsigned g_offStream2 = 0;
554
555/**
556 * Initializes the 2nd steam.
557 */
558static void Stream2Init(void)
559{
560 g_szStream2[0] = '\0';
561 g_offStream2 = 0;
562}
563
564/**
565 * Flushes the 2nd stream to stdout.
566 */
567static int Stream2Flush(void)
568{
569 fwrite(g_szStream2, 1, g_offStream2, stdout);
570 return 0;
571}
572
573/**
574 * printf to the 2nd stream.
575 */
576static int Stream2Printf(const char *pszFormat, ...)
577{
578 va_list va;
579 va_start(va, pszFormat);
580 int cch = vsprintf(&g_szStream2[g_offStream2], pszFormat, va);
581 va_end(va);
582 g_offStream2 += cch;
583 if (g_offStream2 >= sizeof(g_szStream2))
584 {
585 fprintf(stderr, "error: stream2 overflow!\n");
586 exit(1);
587 }
588 return cch;
589}
590
591
592/**
593 * Print the unidata.cpp file header and include list.
594 */
595int PrintHeader(const char *argv0)
596{
597 /*
598 * Print file header.
599 */
600 printf("/** @file\n"
601 " *\n"
602 " * innotek Portable Runtime - Unicode Tables\n"
603 " *\n"
604 " * Automatically Generated by %s (" __DATE__ " " __TIME__ ")\n"
605 " */\n\n"
606 "/*\n"
607 " * Copyright (C) 2006-2007 innotek GmbH\n"
608 " *\n"
609 " * This file is part of VirtualBox Open Source Edition (OSE), as\n"
610 " * available from http://www.virtualbox.org. This file is free software;\n"
611 " * you can redistribute it and/or modify it under the terms of the GNU\n"
612 " * General Public License as published by the Free Software Foundation,\n"
613 " * in version 2 as it comes in the \"COPYING\" file of the VirtualBox OSE\n"
614 " * distribution. VirtualBox OSE is distributed in the hope that it will\n"
615 " * be useful, but WITHOUT ANY WARRANTY of any kind.\n"
616 " *\n"
617 "\n"
618 "#include <iprt/uni.h>\n"
619 "\n",
620 argv0);
621 return 0;
622}
623
624
625/**
626 * Print the flag tables.
627 */
628int PrintFlags(void)
629{
630 /*
631 * Print flags table.
632 */
633 Stream2Init();
634 Stream2Printf("const RTUNIFLAGSRANGE g_aRTUniFlagRanges[] =\n"
635 "{\n");
636 RTUNICP i = 0;
637 int iStart = -1;
638 while (i < ELEMENTS(g_aCPInfo))
639 {
640 /* figure how far off the next chunk is */
641 char szFlags[256];
642 unsigned iNonNull = i;
643 while ( (g_aCPInfo[iNonNull].fNullEntry || !CalcFlags(&g_aCPInfo[iNonNull], szFlags))
644 && iNonNull < ELEMENTS(g_aCPInfo)
645 && iNonNull >= 256)
646 iNonNull++;
647 if (iNonNull - i > 4096 || iNonNull == ELEMENTS(g_aCPInfo))
648 {
649 if (iStart >= 0)
650 {
651 printf("};\n\n");
652 Stream2Printf(" { 0x%06x, 0x%06x, &g_afRTUniFlags0x%06x[0] },\n", iStart, i, iStart);
653 iStart = -1;
654 }
655 i = iNonNull;
656 }
657 else
658 {
659 if (iStart < 0)
660 {
661 printf("static const uint8_t g_afRTUniFlags0x%06x[] = \n"
662 "{\n", i);
663 iStart = i;
664 }
665 CalcFlags(&g_aCPInfo[i], szFlags);
666 printf(" %50s, /* U+%06x: %s*/\n", szFlags, g_aCPInfo[i].CodePoint, g_aCPInfo[i].pszName);
667 i++;
668 }
669 }
670 Stream2Printf(" { ~(RTUNICP)0, ~(RTUNICP)0, NULL }\n"
671 "};\n\n\n");
672 printf("\n");
673 return Stream2Flush();
674}
675
676
677/**
678 * Prints the upper case tables.
679 */
680static int PrintUpper(void)
681{
682 Stream2Init();
683 Stream2Printf("const RTUNICASERANGE g_aRTUniUpperRanges[] =\n"
684 "{\n");
685 RTUNICP i = 0;
686 int iStart = -1;
687 while (i < ELEMENTS(g_aCPInfo))
688 {
689 /* figure how far off the next chunk is */
690 unsigned iSameCase = i;
691 while ( g_aCPInfo[iSameCase].SimpleUpperCaseMapping == g_aCPInfo[iSameCase].CodePoint
692 && iSameCase < ELEMENTS(g_aCPInfo)
693 && iSameCase >= 256)
694 iSameCase++;
695 if (iSameCase - i > 4096/sizeof(RTUNICP) || iSameCase == ELEMENTS(g_aCPInfo))
696 {
697 if (iStart >= 0)
698 {
699 printf("};\n\n");
700 Stream2Printf(" { 0x%06x, 0x%06x, &g_afRTUniUpper0x%06x[0] },\n", iStart, i, iStart);
701 iStart = -1;
702 }
703 i = iSameCase;
704 }
705 else
706 {
707 if (iStart < 0)
708 {
709 printf("static const RTUNICP g_afRTUniUpper0x%06x[] = \n"
710 "{\n", i);
711 iStart = i;
712 }
713 printf(" 0x%02x, /* U+%06x: %s*/\n", g_aCPInfo[i].SimpleUpperCaseMapping, g_aCPInfo[i].CodePoint, g_aCPInfo[i].pszName);
714 i++;
715 }
716 }
717 Stream2Printf(" { ~(RTUNICP)0, ~(RTUNICP)0, NULL }\n"
718 "};\n\n\n");
719 printf("\n");
720 return Stream2Flush();
721}
722
723
724/**
725 * Prints the lowercase tables.
726 */
727static int PrintLower(void)
728{
729 Stream2Init();
730 Stream2Printf("const RTUNICASERANGE g_aRTUniLowerRanges[] =\n"
731 "{\n");
732 RTUNICP i = 0;
733 int iStart = -1;
734 while (i < ELEMENTS(g_aCPInfo))
735 {
736 /* figure how far off the next chunk is */
737 unsigned iSameCase = i;
738 while ( g_aCPInfo[iSameCase].SimpleLowerCaseMapping == g_aCPInfo[iSameCase].CodePoint
739 && iSameCase < ELEMENTS(g_aCPInfo)
740 && iSameCase >= 256)
741 iSameCase++;
742 if (iSameCase - i > 4096/sizeof(RTUNICP) || iSameCase == ELEMENTS(g_aCPInfo))
743 {
744 if (iStart >= 0)
745 {
746 printf("};\n\n");
747 Stream2Printf(" { 0x%06x, 0x%06x, &g_afRTUniLower0x%06x[0] },\n", iStart, i, iStart);
748 iStart = -1;
749 }
750 i = iSameCase;
751 }
752 else
753 {
754 if (iStart < 0)
755 {
756 printf("static const RTUNICP g_afRTUniLower0x%06x[] = \n"
757 "{\n", i);
758 iStart = i;
759 }
760 printf(" 0x%02x, /* U+%06x: %s*/\n", g_aCPInfo[i].SimpleLowerCaseMapping, g_aCPInfo[i].CodePoint, g_aCPInfo[i].pszName);
761 i++;
762 }
763 }
764 Stream2Printf(" { ~(RTUNICP)0, ~(RTUNICP)0, NULL }\n"
765 "};\n\n\n");
766 printf("\n");
767 return Stream2Flush();
768}
769
770
771int main(int argc, char **argv)
772{
773 /*
774 * Parse args.
775 */
776 if (argc <= 1)
777 {
778 printf("usage: %s [UnicodeData.txt [DerivedCoreProperties.txt [PropList.txt]]]\n", argv[0]);
779 return 1;
780 }
781
782 const char *pszUnicodeData = "UnicodeData.txt";
783 const char *pszDerivedCoreProperties = "DerivedCoreProperties.txt";
784 const char *pszPropList = "PropList.txt";
785 int iFile = 0;
786 for (int argi = 1; argi < argc; argi++)
787 {
788 if (argv[argi][0] != '-')
789 {
790 switch (iFile++)
791 {
792 case 0: pszUnicodeData = argv[argi]; break;
793 case 1: pszDerivedCoreProperties = argv[argi]; break;
794 case 2: pszPropList = argv[argi]; break;
795 default:
796 printf("uniread: syntax error at '%s': too many filenames\n", argv[argi]);
797 return 1;
798 }
799 }
800 else
801 {
802 printf("uniread: syntax error at '%s': Unknown argument\n", argv[argi]);
803 return 1;
804 }
805 }
806
807 /*
808 * Read the data.
809 */
810 int rc = ReadUnicodeData(pszUnicodeData);
811 if (rc)
812 return rc;
813 rc = ReadProperties(pszPropList);
814 if (rc)
815 return rc;
816 rc = ReadProperties(pszDerivedCoreProperties);
817 if (rc)
818 return rc;
819
820 /*
821 * Print stuff.
822 */
823 rc = PrintHeader(argv[0]);
824 if (rc)
825 return rc;
826 rc = PrintFlags();
827 if (rc)
828 return rc;
829 rc = PrintUpper();
830 if (rc)
831 return rc;
832 rc = PrintLower();
833 if (rc)
834 return rc;
835
836 /* done */
837 fflush(stdout);
838
839 return rc;
840}
841
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette