VirtualBox

source: vbox/trunk/src/VBox/Runtime/common/string/uniread.cpp@ 7389

Last change on this file since 7389 was 6285, checked in by vboxsync, 17 years ago

(C) 2008

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 26.1 KB
Line 
1/* $Id: uniread.cpp 6285 2008-01-08 19:58:53Z vboxsync $ */
2/** @file
3 * innotek Portable Runtime - Unicode Specification Reader.
4 */
5
6/*
7 * Copyright (C) 2006-2007 innotek GmbH
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 *
17 * The contents of this file may alternatively be used under the terms
18 * of the Common Development and Distribution License Version 1.0
19 * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
20 * VirtualBox OSE distribution, in which case the provisions of the
21 * CDDL are applicable instead of those of the GPL.
22 *
23 * You may elect to license modified versions of this file under the
24 * terms and conditions of either the GPL or the CDDL or both.
25 */
26
27/*******************************************************************************
28* Header Files *
29*******************************************************************************/
30#include <iprt/types.h>
31#include <iprt/stdarg.h>
32
33#include <stdio.h>
34#include <string.h>
35#include <stdlib.h>
36
37
38/**
39 * Strip a line.
40 * @returns pointer to first non-blank char.
41 * @param pszLine The line string to strip.
42 */
43static char *StripLine(char *pszLine)
44{
45 while (*pszLine == ' ' || *pszLine == '\t')
46 pszLine++;
47
48 char *psz = strchr(pszLine, '#');
49 if (psz)
50 *psz = '\0';
51 else
52 psz = strchr(pszLine, '\0');
53 while (psz > pszLine)
54 {
55 switch (psz[-1])
56 {
57 case ' ':
58 case '\t':
59 case '\n':
60 case '\r':
61 *--psz = '\0';
62 continue;
63 }
64 break;
65 }
66
67 return pszLine;
68}
69
70
71/**
72 * Checks if the line is blank or a comment line and should be skipped.
73 * @returns true/false.
74 * @param pszLine The line to consider.
75 */
76static bool IsCommentOrBlankLine(const char *pszLine)
77{
78 while (*pszLine == ' ' || *pszLine == '\t' || *pszLine == '\n' || *pszLine == '\r')
79 pszLine++;
80 return *pszLine == '#' || *pszLine == '\0';
81}
82
83
84/**
85 * Get the first field in the string.
86 *
87 * @returns Pointer to the next field.
88 * @param ppsz Where to store the pointer to the next field.
89 * @param pszLine The line string. (could also be *ppsz from a FirstNext call)
90 */
91static char *FirstField(char **ppsz, char *pszLine)
92{
93 char *psz = strchr(pszLine, ';');
94 if (!psz)
95 *ppsz = psz = strchr(pszLine, '\0');
96 else
97 {
98 *psz = '\0';
99 *ppsz = psz + 1;
100 }
101
102 /* strip */
103 while (*pszLine == ' ' || *pszLine == '\t' || *pszLine == '\r' || *pszLine == '\n')
104 pszLine++;
105 while (psz > pszLine)
106 {
107 switch (psz[-1])
108 {
109 case ' ':
110 case '\t':
111 case '\n':
112 case '\r':
113 *--psz = '\0';
114 continue;
115 }
116 break;
117 }
118 return pszLine;
119}
120
121
122/**
123 * Get the next field in a field enumeration.
124 *
125 * @returns Pointer to the next field.
126 * @param ppsz Where to get and store the string postition.
127 */
128static char *NextField(char **ppsz)
129{
130 return FirstField(ppsz, *ppsz);
131}
132
133
134/**
135 * Converts a code point field to a number.
136 * @returns Code point.
137 * @param psz The field string.
138 */
139static RTUNICP ToNum(const char *psz)
140{
141 char *pszEnd = NULL;
142 unsigned long ul = strtoul(psz, &pszEnd, 16);
143 if (pszEnd && *pszEnd)
144 fprintf(stderr, "warning: failed converting '%s' to a number!\n", psz);
145 return (RTUNICP)ul;
146}
147
148
149/**
150 * Same as ToNum except that if the field is empty the Default is returned.
151 */
152static RTUNICP ToNumDefault(const char *psz, RTUNICP Default)
153{
154 if (*psz)
155 return ToNum(psz);
156 return Default;
157}
158
159
160/**
161 * Converts a code point range to numbers.
162 * @returns The start code point.\
163 * @returns ~(RTUNICP)0 on failure.
164 * @param psz The field string.
165 * @param pLast Where to store the last code point in the range.
166 */
167static RTUNICP ToRange(const char *psz, PRTUNICP pLast)
168{
169 char *pszEnd = NULL;
170 unsigned long ulStart = strtoul(psz, &pszEnd, 16);
171 unsigned long ulLast = ulStart;
172 if (pszEnd && *pszEnd)
173 {
174 if (*pszEnd == '.')
175 {
176 while (*pszEnd == '.')
177 pszEnd++;
178 ulLast = strtoul(pszEnd, &pszEnd, 16);
179 if (pszEnd && *pszEnd)
180 {
181 fprintf(stderr, "warning: failed converting '%s' to a number!\n", psz);
182 return ~(RTUNICP)0;
183 }
184 }
185 else
186 {
187 fprintf(stderr, "warning: failed converting '%s' to a number!\n", psz);
188 return ~(RTUNICP)0;
189 }
190 }
191 *pLast = (RTUNICP)ulLast;
192 return (RTUNICP)ulStart;
193
194}
195
196
197/**
198 * Duplicate a string, optimize certain strings to save memory.
199 *
200 * @returns Pointer to string copy.
201 * @param pszStr The string to duplicate.
202 */
203static char *DupStr(const char *pszStr)
204{
205 if (!*pszStr)
206 return (char*)"";
207 char *psz = strdup(pszStr);
208 if (psz)
209 return psz;
210
211 fprintf(stderr, "out of memory!\n");
212 exit(1);
213}
214
215
216/**
217 * Array of all possible and impossible unicode code points as of 4.1
218 */
219struct CPINFO
220{
221 RTUNICP CodePoint;
222 RTUNICP SimpleUpperCaseMapping;
223 RTUNICP SimpleLowerCaseMapping;
224 RTUNICP SimpleTitleCaseMapping;
225 const char *pszName;
226 /** Set if this is an unused entry */
227 unsigned fNullEntry : 1;
228
229 unsigned fAlphabetic : 1;
230 unsigned fASCIIHexDigit : 1;
231 unsigned fBidiControl : 1;
232 unsigned fDash : 1;
233 unsigned fDefaultIgnorableCodePoint : 1;
234 unsigned fDeprecated : 1;
235 unsigned fDiacritic : 1;
236 unsigned fExtender : 1;
237 unsigned fGraphemeBase : 1;
238 unsigned fGraphemeExtend : 1;
239 unsigned fGraphemeLink : 1;
240 unsigned fHexDigit : 1;
241 unsigned fHyphen : 1;
242 unsigned fIDContinue : 1;
243 unsigned fIdeographic : 1;
244 unsigned fIDSBinaryOperator : 1;
245 unsigned fIDStart : 1;
246 unsigned fIDSTrinaryOperator : 1;
247 unsigned fJoinControl : 1;
248 unsigned fLogicalOrderException : 1;
249 unsigned fLowercase : 1;
250 unsigned fMath : 1;
251 unsigned fNoncharacterCodePoint : 1;
252 unsigned fOtherAlphabetic : 1;
253 unsigned fOtherDefaultIgnorableCodePoint : 1;
254 unsigned fOtherGraphemeExtend : 1;
255 unsigned fOtherIDContinue : 1;
256 unsigned fOtherIDStart : 1;
257 unsigned fOtherLowercase : 1;
258 unsigned fOtherMath : 1;
259 unsigned fOtherUppercase : 1;
260 unsigned fPatternSyntax : 1;
261 unsigned fPatternWhiteSpace : 1;
262 unsigned fQuotationMark : 1;
263 unsigned fRadical : 1;
264 unsigned fSoftDotted : 1;
265 unsigned fSTerm : 1;
266 unsigned fTerminalPunctuation : 1;
267 unsigned fUnifiedIdeograph : 1;
268 unsigned fUppercase : 1;
269 unsigned fVariationSelector : 1;
270 unsigned fWhiteSpace : 1;
271 unsigned fXIDContinue : 1;
272 unsigned fXIDStart : 1;
273
274 /* unprocess stuff, so far. */
275 const char *pszGeneralCategory;
276 const char *pszCanonicalCombiningClass;
277 const char *pszBidiClass;
278 const char *pszDecompositionType;
279 const char *pszDecompositionMapping;
280 const char *pszNumericType;
281 const char *pszNumericValue;
282 const char *pszBidiMirrored;
283 const char *pszUnicode1Name;
284 const char *pszISOComment;
285} g_aCPInfo[0xf0000];
286
287
288/**
289 * Creates a 'null' entry at i.
290 * @param i The entry in question.
291 */
292static void NullEntry(unsigned i)
293{
294 g_aCPInfo[i].CodePoint = i;
295 g_aCPInfo[i].fNullEntry = 1;
296 g_aCPInfo[i].pszName = "";
297 g_aCPInfo[i].SimpleUpperCaseMapping = i;
298 g_aCPInfo[i].SimpleLowerCaseMapping = i;
299 g_aCPInfo[i].SimpleTitleCaseMapping = i;
300 g_aCPInfo[i].pszGeneralCategory = "";
301 g_aCPInfo[i].pszCanonicalCombiningClass = "";
302 g_aCPInfo[i].pszBidiClass = "";
303 g_aCPInfo[i].pszDecompositionType = "";
304 g_aCPInfo[i].pszDecompositionMapping = "";
305 g_aCPInfo[i].pszNumericType = "";
306 g_aCPInfo[i].pszNumericValue = "";
307 g_aCPInfo[i].pszBidiMirrored = "";
308 g_aCPInfo[i].pszUnicode1Name = "";
309 g_aCPInfo[i].pszISOComment = "";
310}
311
312
313/**
314 * Read the UnicodeData.txt file.
315 * @returns 0 on success.
316 * @returns !0 on failure.
317 * @param pszFilename The name of the file.
318 */
319static int ReadUnicodeData(const char *pszFilename)
320{
321 /*
322 * Open input.
323 */
324 FILE *pFile = fopen(pszFilename, "r");
325 if (!pFile)
326 {
327 printf("uniread: failed to open '%s' for reading\n", pszFilename);
328 return 1;
329 }
330
331 /*
332 * Parse the input and spit out the output.
333 */
334 char szLine[4096];
335 RTUNICP i = 0;
336 while (fgets(szLine, sizeof(szLine), pFile) != NULL)
337 {
338 if (IsCommentOrBlankLine(szLine))
339 continue;
340
341 char *pszCurField;
342 char *pszCodePoint = FirstField(&pszCurField, StripLine(szLine)); /* 0 */
343 char *pszName = NextField(&pszCurField); /* 1 */
344 char *pszGeneralCategory = NextField(&pszCurField); /* 2 */
345 char *pszCanonicalCombiningClass = NextField(&pszCurField); /* 3 */
346 char *pszBidiClass = NextField(&pszCurField); /* 4 */
347 char *pszDecompositionType = NextField(&pszCurField); /* 5 */
348 char *pszDecompositionMapping = NextField(&pszCurField); /* 6 */
349 char *pszNumericType = NextField(&pszCurField); /* 7 */
350 char *pszNumericValue = NextField(&pszCurField); /* 8 */
351 char *pszBidiMirrored = NextField(&pszCurField); /* 9 */
352 char *pszUnicode1Name = NextField(&pszCurField); /* 10 */
353 char *pszISOComment = NextField(&pszCurField); /* 11 */
354 char *pszSimpleUpperCaseMapping = NextField(&pszCurField); /* 12 */
355 char *pszSimpleLowerCaseMapping = NextField(&pszCurField); /* 13 */
356 char *pszSimpleTitleCaseMapping = NextField(&pszCurField); /* 14 */
357
358 RTUNICP CodePoint = ToNum(pszCodePoint);
359 if (CodePoint >= ELEMENTS(g_aCPInfo))
360 continue;
361
362 /* catchup? */
363 while (i < CodePoint)
364 NullEntry(i++);
365 if (i != CodePoint)
366 {
367 fprintf(stderr, "unitest: error: i=%d CodePoint=%u\n", i, CodePoint);
368 fclose(pFile);
369 return 1;
370 }
371
372 /* this one */
373 g_aCPInfo[i].CodePoint = i;
374 g_aCPInfo[i].fNullEntry = 0;
375 g_aCPInfo[i].pszName = DupStr(pszName);
376 g_aCPInfo[i].SimpleUpperCaseMapping = ToNumDefault(pszSimpleUpperCaseMapping, CodePoint);
377 g_aCPInfo[i].SimpleLowerCaseMapping = ToNumDefault(pszSimpleLowerCaseMapping, CodePoint);
378 g_aCPInfo[i].SimpleTitleCaseMapping = ToNumDefault(pszSimpleTitleCaseMapping, CodePoint);
379 g_aCPInfo[i].pszGeneralCategory = DupStr(pszGeneralCategory);
380 g_aCPInfo[i].pszCanonicalCombiningClass = DupStr(pszCanonicalCombiningClass);
381 g_aCPInfo[i].pszBidiClass = DupStr(pszBidiClass);
382 g_aCPInfo[i].pszDecompositionType = DupStr(pszDecompositionType);
383 g_aCPInfo[i].pszDecompositionMapping = DupStr(pszDecompositionMapping);
384 g_aCPInfo[i].pszNumericType = DupStr(pszNumericType);
385 g_aCPInfo[i].pszNumericValue = DupStr(pszNumericValue);
386 g_aCPInfo[i].pszBidiMirrored = DupStr(pszBidiMirrored);
387 g_aCPInfo[i].pszUnicode1Name = DupStr(pszUnicode1Name);
388 g_aCPInfo[i].pszISOComment = DupStr(pszISOComment);
389 i++;
390 }
391 /* catchup? */
392 while (i < ELEMENTS(g_aCPInfo))
393 NullEntry(i++);
394 fclose(pFile);
395
396 return 0;
397}
398
399
400/**
401 * Applies a property to a code point.
402 *
403 * @param StartCP The code point.
404 * @param pszProperty The property name.
405 */
406static void ApplyProperty(RTUNICP StartCP, const char *pszProperty)
407{
408 if (StartCP >= ELEMENTS(g_aCPInfo))
409 return;
410 struct CPINFO *pCPInfo = &g_aCPInfo[StartCP];
411 /* string switch */
412 if (!strcmp(pszProperty, "ASCII_Hex_Digit")) pCPInfo->fASCIIHexDigit = 1;
413 else if (!strcmp(pszProperty, "Bidi_Control")) pCPInfo->fBidiControl = 1;
414 else if (!strcmp(pszProperty, "Dash")) pCPInfo->fDash = 1;
415 else if (!strcmp(pszProperty, "Deprecated")) pCPInfo->fDeprecated = 1;
416 else if (!strcmp(pszProperty, "Diacritic")) pCPInfo->fDiacritic = 1;
417 else if (!strcmp(pszProperty, "Extender")) pCPInfo->fExtender = 1;
418 else if (!strcmp(pszProperty, "Grapheme_Link")) pCPInfo->fGraphemeLink = 1;
419 else if (!strcmp(pszProperty, "Hex_Digit")) pCPInfo->fHexDigit = 1;
420 else if (!strcmp(pszProperty, "Hyphen")) pCPInfo->fHyphen = 1;
421 else if (!strcmp(pszProperty, "Ideographic")) pCPInfo->fIdeographic = 1;
422 else if (!strcmp(pszProperty, "IDS_Binary_Operator")) pCPInfo->fIDSBinaryOperator = 1;
423 else if (!strcmp(pszProperty, "IDS_Trinary_Operator")) pCPInfo->fIDSTrinaryOperator = 1;
424 else if (!strcmp(pszProperty, "Join_Control")) pCPInfo->fJoinControl = 1;
425 else if (!strcmp(pszProperty, "Logical_Order_Exception")) pCPInfo->fLogicalOrderException = 1;
426 else if (!strcmp(pszProperty, "Noncharacter_Code_Point")) pCPInfo->fNoncharacterCodePoint = 1;
427 else if (!strcmp(pszProperty, "Other_Alphabetic")) pCPInfo->fOtherAlphabetic = 1;
428 else if (!strcmp(pszProperty, "Other_Default_Ignorable_Code_Point")) pCPInfo->fOtherDefaultIgnorableCodePoint = 1;
429 else if (!strcmp(pszProperty, "Other_Grapheme_Extend")) pCPInfo->fOtherGraphemeExtend = 1;
430 else if (!strcmp(pszProperty, "Other_ID_Continue")) pCPInfo->fOtherIDContinue = 1;
431 else if (!strcmp(pszProperty, "Other_ID_Start")) pCPInfo->fOtherIDStart = 1;
432 else if (!strcmp(pszProperty, "Other_Lowercase")) pCPInfo->fOtherLowercase = 1;
433 else if (!strcmp(pszProperty, "Other_Math")) pCPInfo->fOtherMath = 1;
434 else if (!strcmp(pszProperty, "Other_Uppercase")) pCPInfo->fOtherUppercase = 1;
435 else if (!strcmp(pszProperty, "Alphabetic")) pCPInfo->fAlphabetic = 1;
436 else if (!strcmp(pszProperty, "Default_Ignorable_Code_Point")) pCPInfo->fDefaultIgnorableCodePoint = 1;
437 else if (!strcmp(pszProperty, "Grapheme_Base")) pCPInfo->fGraphemeBase = 1;
438 else if (!strcmp(pszProperty, "Grapheme_Extend")) pCPInfo->fGraphemeExtend = 1;
439 else if (!strcmp(pszProperty, "ID_Continue")) pCPInfo->fIDContinue = 1;
440 else if (!strcmp(pszProperty, "ID_Start")) pCPInfo->fIDStart = 1;
441 else if (!strcmp(pszProperty, "XID_Continue")) pCPInfo->fXIDContinue = 1;
442 else if (!strcmp(pszProperty, "XID_Start")) pCPInfo->fXIDStart = 1;
443 else if (!strcmp(pszProperty, "Lowercase")) pCPInfo->fLowercase = 1;
444 else if (!strcmp(pszProperty, "Math")) pCPInfo->fMath = 1;
445 else if (!strcmp(pszProperty, "Uppercase")) pCPInfo->fUppercase = 1;
446 else if (!strcmp(pszProperty, "Pattern_Syntax")) pCPInfo->fPatternSyntax = 1;
447 else if (!strcmp(pszProperty, "Pattern_White_Space")) pCPInfo->fPatternWhiteSpace = 1;
448 else if (!strcmp(pszProperty, "Quotation_Mark")) pCPInfo->fQuotationMark = 1;
449 else if (!strcmp(pszProperty, "Radical")) pCPInfo->fRadical = 1;
450 else if (!strcmp(pszProperty, "Soft_Dotted")) pCPInfo->fSoftDotted = 1;
451 else if (!strcmp(pszProperty, "STerm")) pCPInfo->fSTerm = 1;
452 else if (!strcmp(pszProperty, "Terminal_Punctuation")) pCPInfo->fTerminalPunctuation = 1;
453 else if (!strcmp(pszProperty, "Unified_Ideograph")) pCPInfo->fUnifiedIdeograph = 1;
454 else if (!strcmp(pszProperty, "Variation_Selector")) pCPInfo->fVariationSelector = 1;
455 else if (!strcmp(pszProperty, "White_Space")) pCPInfo->fWhiteSpace = 1;
456 else
457 fprintf(stderr, "uniread: Unknown property '%s'\n", pszProperty);
458}
459
460
461/**
462 * Reads a property file.
463 *
464 * There are several property files, this code can read all
465 * of those but will only make use of the properties it recognizes.
466 *
467 * @returns 0 on success.
468 * @returns !0 on failure.
469 * @param pszFilename The name of the file.
470 */
471static int ReadProperties(const char *pszFilename)
472{
473 /*
474 * Open input.
475 */
476 FILE *pFile = fopen(pszFilename, "r");
477 if (!pFile)
478 {
479 printf("uniread: failed to open '%s' for reading\n", pszFilename);
480 return 1;
481 }
482
483 /*
484 * Parse the input and spit out the output.
485 */
486 char szLine[4096];
487 while (fgets(szLine, sizeof(szLine), pFile) != NULL)
488 {
489 if (IsCommentOrBlankLine(szLine))
490 continue;
491 char *pszCurField;
492 char *pszRange = FirstField(&pszCurField, StripLine(szLine));
493 char *pszProperty = NextField(&pszCurField);
494 if (!*pszProperty)
495 continue;
496
497 RTUNICP LastCP;
498 RTUNICP StartCP = ToRange(pszRange, &LastCP);
499 if (StartCP == ~(RTUNICP)0)
500 continue;
501
502 while (StartCP <= LastCP)
503 ApplyProperty(StartCP++, pszProperty);
504 }
505
506 fclose(pFile);
507
508 return 0;
509}
510
511
512/**
513 * Append a flag to the string.
514 */
515static char *AppendFlag(char *psz, const char *pszFlag)
516{
517 char *pszEnd = strchr(psz, '\0');
518 if (pszEnd != psz)
519 {
520 *pszEnd++ = ' ';
521 *pszEnd++ = '|';
522 *pszEnd++ = ' ';
523 }
524 strcpy(pszEnd, pszFlag);
525 return psz;
526}
527
528/**
529 * Calcs the flags for a code point.
530 * @returns true if there is a flag.
531 * @returns false if the isn't.
532 */
533static bool CalcFlags(struct CPINFO *pInfo, char *pszFlags)
534{
535 pszFlags[0] = '\0';
536 /** @todo read the specs on this other vs standard stuff, and check out the finer points */
537 if (pInfo->fAlphabetic || pInfo->fOtherAlphabetic)
538 AppendFlag(pszFlags, "RTUNI_ALPHA");
539 if (pInfo->fHexDigit || pInfo->fASCIIHexDigit)
540 AppendFlag(pszFlags, "RTUNI_XDIGIT");
541 if (!strcmp(pInfo->pszGeneralCategory, "Nd"))
542 AppendFlag(pszFlags, "RTUNI_DDIGIT");
543 if (pInfo->fWhiteSpace)
544 AppendFlag(pszFlags, "RTUNI_WSPACE");
545 if (pInfo->fUppercase || pInfo->fOtherUppercase)
546 AppendFlag(pszFlags, "RTUNI_UPPER");
547 if (pInfo->fLowercase || pInfo->fOtherLowercase)
548 AppendFlag(pszFlags, "RTUNI_LOWER");
549 //if (pInfo->fNumeric)
550 // AppendFlag(pszFlags, "RTUNI_NUMERIC");
551 if (!*pszFlags)
552 {
553 pszFlags[0] = '0';
554 pszFlags[1] = '\0';
555 return false;
556 }
557 return true;
558}
559
560/** the data store for stream two. */
561static char g_szStream2[10240];
562static unsigned g_offStream2 = 0;
563
564/**
565 * Initializes the 2nd steam.
566 */
567static void Stream2Init(void)
568{
569 g_szStream2[0] = '\0';
570 g_offStream2 = 0;
571}
572
573/**
574 * Flushes the 2nd stream to stdout.
575 */
576static int Stream2Flush(void)
577{
578 fwrite(g_szStream2, 1, g_offStream2, stdout);
579 return 0;
580}
581
582/**
583 * printf to the 2nd stream.
584 */
585static int Stream2Printf(const char *pszFormat, ...)
586{
587 va_list va;
588 va_start(va, pszFormat);
589 int cch = vsprintf(&g_szStream2[g_offStream2], pszFormat, va);
590 va_end(va);
591 g_offStream2 += cch;
592 if (g_offStream2 >= sizeof(g_szStream2))
593 {
594 fprintf(stderr, "error: stream2 overflow!\n");
595 exit(1);
596 }
597 return cch;
598}
599
600
601/**
602 * Print the unidata.cpp file header and include list.
603 */
604int PrintHeader(const char *argv0)
605{
606 /*
607 * Print file header.
608 */
609 printf("/** @file\n"
610 " *\n"
611 " * innotek Portable Runtime - Unicode Tables\n"
612 " *\n"
613 " * Automatically Generated by %s (" __DATE__ " " __TIME__ ")\n"
614 " */\n\n"
615 "/*\n"
616 " * Copyright (C) 2006-2008 innotek GmbH\n"
617 " *\n"
618 " * This file is part of VirtualBox Open Source Edition (OSE), as\n"
619 " * available from http://www.virtualbox.org. This file is free software;\n"
620 " * you can redistribute it and/or modify it under the terms of the GNU\n"
621 " * General Public License as published by the Free Software Foundation,\n"
622 " * in version 2 as it comes in the \"COPYING\" file of the VirtualBox OSE\n"
623 " * distribution. VirtualBox OSE is distributed in the hope that it will\n"
624 " * be useful, but WITHOUT ANY WARRANTY of any kind.\n"
625 " *\n"
626 "\n"
627 "#include <iprt/uni.h>\n"
628 "\n",
629 argv0);
630 return 0;
631}
632
633
634/**
635 * Print the flag tables.
636 */
637int PrintFlags(void)
638{
639 /*
640 * Print flags table.
641 */
642 Stream2Init();
643 Stream2Printf("const RTUNIFLAGSRANGE g_aRTUniFlagRanges[] =\n"
644 "{\n");
645 RTUNICP i = 0;
646 int iStart = -1;
647 while (i < ELEMENTS(g_aCPInfo))
648 {
649 /* figure how far off the next chunk is */
650 char szFlags[256];
651 unsigned iNonNull = i;
652 while ( (g_aCPInfo[iNonNull].fNullEntry || !CalcFlags(&g_aCPInfo[iNonNull], szFlags))
653 && iNonNull < ELEMENTS(g_aCPInfo)
654 && iNonNull >= 256)
655 iNonNull++;
656 if (iNonNull - i > 4096 || iNonNull == ELEMENTS(g_aCPInfo))
657 {
658 if (iStart >= 0)
659 {
660 printf("};\n\n");
661 Stream2Printf(" { 0x%06x, 0x%06x, &g_afRTUniFlags0x%06x[0] },\n", iStart, i, iStart);
662 iStart = -1;
663 }
664 i = iNonNull;
665 }
666 else
667 {
668 if (iStart < 0)
669 {
670 printf("static const uint8_t g_afRTUniFlags0x%06x[] = \n"
671 "{\n", i);
672 iStart = i;
673 }
674 CalcFlags(&g_aCPInfo[i], szFlags);
675 printf(" %50s, /* U+%06x: %s*/\n", szFlags, g_aCPInfo[i].CodePoint, g_aCPInfo[i].pszName);
676 i++;
677 }
678 }
679 Stream2Printf(" { ~(RTUNICP)0, ~(RTUNICP)0, NULL }\n"
680 "};\n\n\n");
681 printf("\n");
682 return Stream2Flush();
683}
684
685
686/**
687 * Prints the upper case tables.
688 */
689static int PrintUpper(void)
690{
691 Stream2Init();
692 Stream2Printf("const RTUNICASERANGE g_aRTUniUpperRanges[] =\n"
693 "{\n");
694 RTUNICP i = 0;
695 int iStart = -1;
696 while (i < ELEMENTS(g_aCPInfo))
697 {
698 /* figure how far off the next chunk is */
699 unsigned iSameCase = i;
700 while ( g_aCPInfo[iSameCase].SimpleUpperCaseMapping == g_aCPInfo[iSameCase].CodePoint
701 && iSameCase < ELEMENTS(g_aCPInfo)
702 && iSameCase >= 256)
703 iSameCase++;
704 if (iSameCase - i > 4096/sizeof(RTUNICP) || iSameCase == ELEMENTS(g_aCPInfo))
705 {
706 if (iStart >= 0)
707 {
708 printf("};\n\n");
709 Stream2Printf(" { 0x%06x, 0x%06x, &g_afRTUniUpper0x%06x[0] },\n", iStart, i, iStart);
710 iStart = -1;
711 }
712 i = iSameCase;
713 }
714 else
715 {
716 if (iStart < 0)
717 {
718 printf("static const RTUNICP g_afRTUniUpper0x%06x[] = \n"
719 "{\n", i);
720 iStart = i;
721 }
722 printf(" 0x%02x, /* U+%06x: %s*/\n", g_aCPInfo[i].SimpleUpperCaseMapping, g_aCPInfo[i].CodePoint, g_aCPInfo[i].pszName);
723 i++;
724 }
725 }
726 Stream2Printf(" { ~(RTUNICP)0, ~(RTUNICP)0, NULL }\n"
727 "};\n\n\n");
728 printf("\n");
729 return Stream2Flush();
730}
731
732
733/**
734 * Prints the lowercase tables.
735 */
736static int PrintLower(void)
737{
738 Stream2Init();
739 Stream2Printf("const RTUNICASERANGE g_aRTUniLowerRanges[] =\n"
740 "{\n");
741 RTUNICP i = 0;
742 int iStart = -1;
743 while (i < ELEMENTS(g_aCPInfo))
744 {
745 /* figure how far off the next chunk is */
746 unsigned iSameCase = i;
747 while ( g_aCPInfo[iSameCase].SimpleLowerCaseMapping == g_aCPInfo[iSameCase].CodePoint
748 && iSameCase < ELEMENTS(g_aCPInfo)
749 && iSameCase >= 256)
750 iSameCase++;
751 if (iSameCase - i > 4096/sizeof(RTUNICP) || iSameCase == ELEMENTS(g_aCPInfo))
752 {
753 if (iStart >= 0)
754 {
755 printf("};\n\n");
756 Stream2Printf(" { 0x%06x, 0x%06x, &g_afRTUniLower0x%06x[0] },\n", iStart, i, iStart);
757 iStart = -1;
758 }
759 i = iSameCase;
760 }
761 else
762 {
763 if (iStart < 0)
764 {
765 printf("static const RTUNICP g_afRTUniLower0x%06x[] = \n"
766 "{\n", i);
767 iStart = i;
768 }
769 printf(" 0x%02x, /* U+%06x: %s*/\n", g_aCPInfo[i].SimpleLowerCaseMapping, g_aCPInfo[i].CodePoint, g_aCPInfo[i].pszName);
770 i++;
771 }
772 }
773 Stream2Printf(" { ~(RTUNICP)0, ~(RTUNICP)0, NULL }\n"
774 "};\n\n\n");
775 printf("\n");
776 return Stream2Flush();
777}
778
779
780int main(int argc, char **argv)
781{
782 /*
783 * Parse args.
784 */
785 if (argc <= 1)
786 {
787 printf("usage: %s [UnicodeData.txt [DerivedCoreProperties.txt [PropList.txt]]]\n", argv[0]);
788 return 1;
789 }
790
791 const char *pszUnicodeData = "UnicodeData.txt";
792 const char *pszDerivedCoreProperties = "DerivedCoreProperties.txt";
793 const char *pszPropList = "PropList.txt";
794 int iFile = 0;
795 for (int argi = 1; argi < argc; argi++)
796 {
797 if (argv[argi][0] != '-')
798 {
799 switch (iFile++)
800 {
801 case 0: pszUnicodeData = argv[argi]; break;
802 case 1: pszDerivedCoreProperties = argv[argi]; break;
803 case 2: pszPropList = argv[argi]; break;
804 default:
805 printf("uniread: syntax error at '%s': too many filenames\n", argv[argi]);
806 return 1;
807 }
808 }
809 else
810 {
811 printf("uniread: syntax error at '%s': Unknown argument\n", argv[argi]);
812 return 1;
813 }
814 }
815
816 /*
817 * Read the data.
818 */
819 int rc = ReadUnicodeData(pszUnicodeData);
820 if (rc)
821 return rc;
822 rc = ReadProperties(pszPropList);
823 if (rc)
824 return rc;
825 rc = ReadProperties(pszDerivedCoreProperties);
826 if (rc)
827 return rc;
828
829 /*
830 * Print stuff.
831 */
832 rc = PrintHeader(argv[0]);
833 if (rc)
834 return rc;
835 rc = PrintFlags();
836 if (rc)
837 return rc;
838 rc = PrintUpper();
839 if (rc)
840 return rc;
841 rc = PrintLower();
842 if (rc)
843 return rc;
844
845 /* done */
846 fflush(stdout);
847
848 return rc;
849}
850
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette