VirtualBox

source: vbox/trunk/src/VBox/Main/src-server/USBIdDatabaseGenerator.cpp@ 58018

Last change on this file since 58018 was 58018, checked in by vboxsync, 9 years ago

USBIdDatabase*: build fix attempt

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 35.9 KB
Line 
1/* $Id: USBIdDatabaseGenerator.cpp 58018 2015-10-03 18:56:48Z vboxsync $ */
2/** @file
3 * USB device vendor and product ID database - generator.
4 */
5
6/*
7 * Copyright (C) 2015 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 */
17
18/*********************************************************************************************************************************
19* Header Files *
20*********************************************************************************************************************************/
21#include <stdio.h>
22
23#include <fstream>
24#include <iostream>
25#include <iomanip>
26#include <algorithm>
27#include <map>
28#include <string>
29#include <vector>
30
31#include <iprt/initterm.h>
32#include <iprt/message.h>
33#include <iprt/string.h>
34#include <iprt/stream.h>
35#include "../../Runtime/include/internal/strhash.h" /** @todo make this one public */
36
37#include "../include/USBIdDatabase.h"
38
39
40/** For verbose output. */
41static bool g_fVerbose = false;
42/** Output prefix for informational output. */
43#define INFO_PREF "USBIdDatabaseGenerator: Info: "
44
45
46using namespace std;
47
48static const char * const header =
49 "/** @file\n"
50 " * USB device vendor and product ID database - Autogenerated from <stupid C++ cannot do %s>\n"
51 " */\n"
52 "\n"
53 "/*\n"
54 " * Copyright (C) 2015 Oracle Corporation\n"
55 " *\n"
56 " * This file is part of VirtualBox Open Source Edition(OSE), as\n"
57 " * available from http ://www.virtualbox.org. This file is free software;\n"
58 " * you can redistribute it and / or modify it under the terms of the GNU\n"
59 " * General Public License(GPL) as published by the Free Software\n"
60 " * Foundation, in version 2 as it comes in the \"COPYING\" file of the\n"
61 " * VirtualBox OSE distribution.VirtualBox OSE is distributed in the\n"
62 " * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.\n"
63 " */"
64 "\n"
65 "\n"
66 "#include \"USBIdDatabase.h\"\n"
67 "\n";
68static const char * const product_header =
69 "/**\n"
70 " * USB devices aliases array.\n"
71 " * Format: VendorId, ProductId, Vendor Name, Product Name\n"
72 " * The source of the list is http://www.linux-usb.org/usb.ids\n"
73 " */\n"
74 "USBIDDBPROD const USBIdDatabase::s_aProducts[] =\n"
75 "{\n";
76const char *product_part2 =
77 "};\n"
78 "\n"
79 "\nconst USBIDDBSTR USBIdDatabase::s_aProductNames[] =\n"
80 "{\n";
81const char *product_footer =
82 "};\n"
83 "\n"
84 "const size_t USBIdDatabase::s_cProducts = RT_ELEMENTS(USBIdDatabase::s_aProducts);\n";
85
86const char *vendor_header =
87 "\nUSBIDDBVENDOR const USBIdDatabase::s_aVendors[] =\n"
88 "{\n";
89const char *vendor_part2 =
90 "};\n"
91 "\n"
92 "\nconst USBIDDBSTR USBIdDatabase::s_aVendorNames[] =\n"
93 "{\n";
94const char *vendor_footer =
95 "};\n"
96 "\n"
97 "const size_t USBIdDatabase::s_cVendors = RT_ELEMENTS(USBIdDatabase::s_aVendors);\n";
98
99const char *start_block = "# Vendors, devices and interfaces. Please keep sorted.";
100const char *end_block = "# List of known device classes, subclasses and protocols";
101
102
103// error codes (complements RTEXITCODE_XXX).
104#define ERROR_OPEN_FILE (12)
105#define ERROR_IN_PARSE_LINE (13)
106#define ERROR_DUPLICATE_ENTRY (14)
107#define ERROR_WRONG_FILE_FORMAT (15)
108#define ERROR_TOO_MANY_PRODUCTS (16)
109
110
111/**
112 * String that will end up in the string table.
113 */
114struct StrTabString
115{
116 /** The string. */
117 std::string str;
118 /** The string hash value. */
119 uint32_t uHash;
120 /** The string table reference. */
121 USBIDDBSTR StrRef;
122 /** Pointer to the next string reference (same string table entry). */
123 struct StrTabString *pNextRef;
124 /** Pointer to the next string with the same hash value (collision). */
125 struct StrTabString *pNextCollision;
126
127 void printRef(ostream &rStrm) const
128 {
129 rStrm << " { 0x" << setfill('0') << setw(6) << hex << StrRef.off << ", 0x"
130 << setfill('0') << setw(2) << hex << StrRef.cch << " }, ";
131 }
132
133 void printRefLine(ostream &rStrm) const
134 {
135 printRef(rStrm);
136 rStrm << endl;
137 }
138};
139typedef struct StrTabString *PSTRTABSTRING;
140
141struct VendorRecord
142{
143 size_t vendorID;
144 size_t iProduct;
145 size_t cProducts;
146 StrTabString vendor;
147};
148
149struct ProductRecord
150{
151 size_t key;
152 size_t vendorID;
153 size_t productID;
154 StrTabString product;
155};
156
157bool operator < (const ProductRecord& lh, const ProductRecord& rh)
158{
159 return lh.key < rh.key;
160}
161
162bool operator < (const VendorRecord& lh, const VendorRecord& rh)
163{
164 return lh.vendorID < rh.vendorID;
165}
166
167bool operator == (const ProductRecord& lh, const ProductRecord& rh)
168{
169 return lh.key == rh.key;
170}
171
172bool operator == (const VendorRecord& lh, const VendorRecord& rh)
173{
174 return lh.vendorID == rh.vendorID;
175}
176
177ostream& operator <<(ostream& stream, const ProductRecord product)
178{
179 stream << " { 0x" << setfill('0') << setw(4) << product.productID << " }, " << endl;
180 return stream;
181}
182
183ostream& operator <<(ostream& stream, const VendorRecord vendor)
184{
185 stream << " { 0x" << setfill('0') << setw(4) << hex << vendor.vendorID
186 << ", 0x" << setfill('0') << setw(4) << hex << vendor.iProduct
187 << ", 0x" << setfill('0') << setw(4) << hex << vendor.cProducts << " }, " << endl;
188 return stream;
189}
190
191namespace State
192{
193 typedef int Value;
194 enum
195 {
196 lookForStartBlock,
197 lookForEndBlock,
198 finished
199 };
200}
201
202typedef vector<ProductRecord> ProductsSet;
203typedef vector<VendorRecord> VendorsSet;
204ProductsSet g_products;
205VendorsSet g_vendors;
206
207
208
209/*
210 * String "compression". We replace the 127 most used words with references.
211 */
212#ifdef USB_ID_DATABASE_WITH_COMPRESSION
213
214typedef std::map<std::string, size_t> WORDFREQMAP;
215typedef WORDFREQMAP::value_type WORDFREQPAIR;
216
217/** The 127 words we've picked to be indexed by reference. */
218static StrTabString g_aCompDict[127];
219
220/**
221 * For sorting the frequency fidning in descending order.
222 *
223 * Comparison operators are put outside to make older gcc versions (like 4.1.1
224 * on lnx64-rel) happy.
225 */
226class WordFreqSortEntry
227{
228public:
229 WORDFREQPAIR const *m_pPair;
230
231public:
232 WordFreqSortEntry(WORDFREQPAIR const *pPair) : m_pPair(pPair) {}
233};
234
235bool operator == (WordFreqSortEntry const &rLeft, WordFreqSortEntry const &rRight)
236{
237 return rLeft.m_pPair->second == rRight.m_pPair->second;
238}
239
240bool operator < (WordFreqSortEntry const &rLeft, WordFreqSortEntry const &rRight)
241{
242 return rLeft.m_pPair->second > rRight.m_pPair->second;
243}
244
245
246/**
247 * Replaces the dictionary words and escapes non-ascii chars in a string.
248 *
249 * @param pString The string to fixup.
250 * @param pcchOld The old string length is added to this (stats)
251 * @param pcchNew The new string length is added to this (stats)
252 */
253static void FixupString(std::string *pString, size_t *pcchOld, size_t *pcchNew)
254{
255 char szNew[USB_ID_DATABASE_MAX_STRING * 2];
256 char *pszDst = szNew;
257 const char *pszSrc = pString->c_str();
258 const char *pszSrcEnd = strchr(pszSrc, '\0');
259
260 *pcchOld += pszSrcEnd - pszSrc;
261
262 char ch;
263 while ((ch = *pszSrc) != '\0')
264 {
265 /* Spaces. */
266 while (ch == ' ')
267 {
268 *pszDst++ = ' ';
269 ch = *++pszSrc;
270 }
271 if (!ch)
272 break;
273
274 /* Find the end of the current word. */
275 size_t cchWord = 1;
276 while ((ch = pszSrc[cchWord]) != ' ' && ch != '\0')
277 cchWord++;
278
279 /* Check for g_aWord matches. */
280 size_t cchMax = pszSrcEnd - pszSrc;
281 for (unsigned i = 0; i < RT_ELEMENTS(g_aCompDict); i++)
282 {
283 size_t cchLen = g_aCompDict[i].str.length();
284 if ( cchLen >= cchWord
285 && cchLen <= cchMax
286 && g_aCompDict[i].str.compare(0, cchLen, pszSrc, cchLen) == 0)
287 {
288 *pszDst++ = (unsigned char)(0x80 | i);
289 pszSrc += cchLen;
290 cchWord = 0;
291 break;
292 }
293 }
294
295 if (cchWord)
296 {
297 /* Copy the current word. */
298 ch = *pszSrc;
299 do
300 {
301 if (!((unsigned char)ch & 0x80))
302 {
303 *pszDst++ = ch;
304 pszSrc++;
305 }
306 else
307 {
308 RTUNICP uc;
309 int rc = RTStrGetCpEx(&pszSrc, &uc);
310 if (RT_SUCCESS(rc))
311 {
312 *pszDst++ = (unsigned char)0xff; /* escape single code point. */
313 pszDst = RTStrPutCp(pszDst, uc);
314 }
315 else
316 {
317 cerr << "Error: RTStrGetCpEx failed with rc=" << rc << endl;
318 exit(3);
319 }
320 }
321 } while ((ch = *pszSrc) != '\0' && ch != ' ');
322 }
323 }
324 *pszDst = '\0';
325 *pcchNew += pszDst - &szNew[0];
326
327 *pString = szNew;
328}
329
330
331/**
332 * Analyzes a string.
333 *
334 * @param pFrequencies The word frequency map.
335 * @param rString The string to analyze.
336 */
337static void AnalyzeString(WORDFREQMAP *pFrequencies, std::string const &rString)
338{
339 const char *psz = rString.c_str();
340
341 /*
342 * For now we just consider words.
343 */
344 char ch;
345 while ((ch = *psz) != '\0')
346 {
347 /* Skip leading spaces. */
348 while (ch == ' ')
349 ch = *++psz;
350 if (!ch)
351 return;
352
353 /* Find end of word. */
354 size_t cchWord = 1;
355 while ((ch = psz[cchWord]) != ' ' && ch != '\0')
356 cchWord++;
357 if (cchWord > 1)
358 {
359 std::string strWord(psz, cchWord);
360 WORDFREQMAP::iterator it = pFrequencies->find(strWord);
361 if (it != pFrequencies->end())
362 it->second += cchWord - 1;
363 else
364 (*pFrequencies)[strWord] = 0;
365 /** @todo could gain hits by including the space after the word, but that
366 * has the same accounting problems as the two words scenario below. */
367
368# if 0 /** @todo need better accounting for overlapping alternatives before this can be enabled. */
369 /* Two words - immediate yields calc may lie when this enabled and we may pick the wrong words. */
370 if (ch == ' ')
371 {
372 ch = psz[++cchWord];
373 if (ch != ' ' && ch != '\0')
374 {
375 size_t const cchSaved = cchWord;
376
377 do
378 cchWord++;
379 while ((ch = psz[cchWord]) != ' ' && ch != '\0');
380
381 strWord = std::string(psz, cchWord);
382 WORDFREQMAP::iterator it = pFrequencies->find(strWord);
383 if (it != pFrequencies->end())
384 it->second += cchWord - 1;
385 else
386 (*pFrequencies)[strWord] = 0;
387
388 cchWord = cchSaved;
389 }
390 }
391# endif
392 }
393
394 /* Advance. */
395 psz += cchWord;
396 }
397}
398
399
400/**
401 * Compresses the vendor and product strings.
402 *
403 * This is very very simple (a lot less work that the string table for
404 * instance).
405 */
406static void DoStringCompression(void)
407{
408 /*
409 * Analyze the strings collecting stats on potential sequences to replace.
410 */
411 WORDFREQMAP Frequencies;
412
413 uint32_t cProducts = 0;
414 for (ProductsSet::iterator it = g_products.begin(); it != g_products.end(); ++it, cProducts++)
415 AnalyzeString(&Frequencies, it->product.str);
416
417 uint32_t cVendors = 0;
418 for (VendorsSet::iterator it = g_vendors.begin(); it != g_vendors.end(); ++it, cVendors++)
419 AnalyzeString(&Frequencies, it->vendor.str);
420
421 if (g_fVerbose)
422 {
423 size_t const cbVendorEntry = sizeof(USBIdDatabase::s_aVendors[0]) + sizeof(USBIdDatabase::s_aVendorNames[0]);
424 size_t const cbVendors = cVendors * cbVendorEntry;
425 cout << INFO_PREF << cVendors << " vendors (" << cbVendors << " bytes)" << endl;
426
427 size_t const cbProductEntry = sizeof(USBIdDatabase::s_aProducts[0]) + sizeof(USBIdDatabase::s_aProductNames[0]);
428 size_t const cbProducts = cProducts * cbProductEntry;
429 cout << INFO_PREF << cProducts << " products (" << cbProducts << " bytes)" << endl;
430 }
431
432 /*
433 * Sort the result and pick the top 127 ones.
434 */
435 std::vector<WordFreqSortEntry> SortMap;
436 for (WORDFREQMAP::iterator it = Frequencies.begin(); it != Frequencies.end(); ++it)
437 {
438 WORDFREQPAIR const &rPair = *it;
439 SortMap.push_back(WordFreqSortEntry(&rPair));
440 }
441
442 sort(SortMap.begin(), SortMap.end());
443
444 size_t cb = 0;
445 unsigned i = 0;
446 for (std::vector<WordFreqSortEntry>::iterator it = SortMap.begin();
447 it != SortMap.end() && i < RT_ELEMENTS(g_aCompDict);
448 ++it, i++)
449 {
450 g_aCompDict[i].str = it->m_pPair->first;
451 cb += it->m_pPair->second;
452 }
453
454 if (g_fVerbose)
455 cout << INFO_PREF "Estimated compression saving " << cb << " bytes" << endl;
456
457 /*
458 * Rework the strings.
459 */
460 size_t cchNew = 0;
461 size_t cchOld = 0;
462 for (ProductsSet::iterator it = g_products.begin(); it != g_products.end(); ++it)
463 FixupString(&it->product.str, &cchOld, &cchNew);
464 for (VendorsSet::iterator it = g_vendors.begin(); it != g_vendors.end(); ++it)
465 FixupString(&it->vendor.str, &cchOld, &cchNew);
466
467 for (i = 0; i < RT_ELEMENTS(g_aCompDict); i++)
468 cchNew += g_aCompDict[i].str.length() + 1;
469
470 if (g_fVerbose)
471 {
472 cout << INFO_PREF "Strings: original: " << cchOld << " bytes; compressed: " << cchNew << " bytes;";
473 if (cchNew < cchOld)
474 cout << " saving " << (cchOld - cchNew) << " bytes (" << ((cchOld - cchNew) * 100 / cchOld) << "%)" << endl;
475 else
476 cout << " wasting " << (cchOld - cchNew) << " bytes!" << endl;
477 cout << INFO_PREF "Average string length is " << (cchOld / (cVendors + cProducts)) << endl;
478 }
479}
480
481
482/**
483 * Writes the compression dictionary to the output stream.
484 *
485 * @param rStrm The output stream.
486 */
487static void WriteCompressionDictionary(ostream &rStrm)
488{
489 rStrm << "const USBIDDBSTR USBIdDatabase::s_aCompDict[" << dec << RT_ELEMENTS(g_aCompDict) << "] = " << endl;
490 rStrm << "{" << endl;
491 for (unsigned i = 0; i < RT_ELEMENTS(g_aCompDict); i++)
492 {
493 g_aCompDict[i].printRef(rStrm);
494 rStrm << " // " << g_aCompDict[i].str << endl;
495 }
496 rStrm << "};" << endl << endl;
497}
498
499#endif /* USB_ID_DATABASE_WITH_COMPRESSION */
500
501
502/*
503 * Compile a string table.
504 */
505
506/** The size of g_papStrHash. */
507static size_t g_cStrHash = 0;
508/** String hash table. */
509static PSTRTABSTRING *g_papStrHash = NULL;
510/** Duplicate strings found by AddString. */
511static size_t g_cDuplicateStrings = 0;
512/** Total length of the unique strings (no terminators). */
513static size_t g_cchUniqueStrings = 0;
514/** Number of unique strings after AddString. */
515static size_t g_cUniqueStrings = 0;
516/** Number of collisions. */
517static size_t g_cCollisions = 0;
518
519/** Number of entries in g_apSortedStrings. */
520static size_t g_cSortedStrings = 0;
521/** The sorted string table. */
522static PSTRTABSTRING *g_papSortedStrings = NULL;
523
524/** The string table. */
525static char *g_pachStrTab = NULL;
526/** The actual string table size. */
527static size_t g_cchStrTab = 0;
528
529
530/**
531 * Adds a string to the hash table.
532 * @param pStr The string.
533 */
534static void AddString(PSTRTABSTRING pStr)
535{
536 pStr->pNextRef = NULL;
537 pStr->pNextCollision = NULL;
538 pStr->StrRef.off = 0;
539 pStr->StrRef.cch = pStr->str.length();
540 size_t cchIgnored;
541 pStr->uHash = sdbm(pStr->str.c_str(), &cchIgnored);
542 Assert(cchIgnored == pStr->str.length());
543
544 size_t idxHash = pStr->uHash % g_cStrHash;
545 PSTRTABSTRING pCur = g_papStrHash[idxHash];
546 if (!pCur)
547 g_papStrHash[idxHash] = pStr;
548 else
549 {
550 /* Look for matching string. */
551 do
552 {
553 if ( pCur->uHash == pStr->uHash
554 && pCur->StrRef.cch == pStr->StrRef.cch
555 && pCur->str == pStr->str)
556 {
557 pStr->pNextRef = pCur->pNextRef;
558 pCur->pNextRef = pStr;
559 g_cDuplicateStrings++;
560 return;
561 }
562 pCur = pCur->pNextCollision;
563 } while (pCur != NULL);
564
565 /* No matching string, insert. */
566 g_cCollisions++;
567 pStr->pNextCollision = g_papStrHash[idxHash];
568 g_papStrHash[idxHash] = pStr;
569 }
570
571 g_cUniqueStrings++;
572 g_cchUniqueStrings += pStr->StrRef.cch;
573}
574
575
576/**
577 * Inserts a string into g_apUniqueStrings.
578 * @param pStr The string.
579 */
580static void InsertUniqueString(PSTRTABSTRING pStr)
581{
582 size_t iIdx;
583 size_t iStart = 0;
584 size_t iEnd = g_cSortedStrings;
585 for (;;)
586 {
587 iIdx = iStart + (iEnd - iStart) / 2;
588 if (g_papSortedStrings[iIdx]->StrRef.cch < pStr->StrRef.cch)
589 {
590 if (iIdx <= iStart)
591 break;
592 iEnd = iIdx;
593 }
594 else if (g_papSortedStrings[iIdx]->StrRef.cch > pStr->StrRef.cch)
595 {
596 if (++iIdx >= g_cSortedStrings)
597 break;
598 iStart = iIdx;
599 }
600 else
601 break;
602 }
603
604 if (iIdx != g_cSortedStrings)
605 memmove(&g_papSortedStrings[iIdx + 1], &g_papSortedStrings[iIdx],
606 (g_cSortedStrings - iIdx) * sizeof(g_papSortedStrings[iIdx]));
607 g_papSortedStrings[iIdx] = pStr;
608 g_cSortedStrings++;
609}
610
611
612/**
613 * Creates a string table.
614 *
615 * This will save space by dropping string terminators, eliminating duplicates
616 * and try find strings that are sub-strings of others.
617 *
618 * Will initialize the StrRef of all StrTabString instances.
619 */
620static void CreateStringTable(void)
621{
622 /*
623 * Allocate a hash table double the size of all strings (to avoid too
624 * many collisions). Add all strings to it, finding duplicates in the
625 * process.
626 */
627 size_t cMaxStrings = g_products.size() + g_vendors.size();
628#ifdef USB_ID_DATABASE_WITH_COMPRESSION
629 cMaxStrings += RT_ELEMENTS(g_aCompDict);
630#endif
631 cMaxStrings *= 2;
632 g_papStrHash = new PSTRTABSTRING[cMaxStrings];
633 g_cStrHash = cMaxStrings;
634 memset(g_papStrHash, 0, cMaxStrings * sizeof(g_papStrHash[0]));
635
636 for (ProductsSet::iterator it = g_products.begin(); it != g_products.end(); ++it)
637 AddString(&it->product);
638 for (VendorsSet::iterator it = g_vendors.begin(); it != g_vendors.end(); ++it)
639 AddString(&it->vendor);
640#ifdef USB_ID_DATABASE_WITH_COMPRESSION
641 for (unsigned i = 0; i < RT_ELEMENTS(g_aCompDict); i++)
642 AddString(&g_aCompDict[i]);
643#endif
644 if (g_fVerbose)
645 cout << INFO_PREF "" << g_cUniqueStrings << " unique string (" << g_cchUniqueStrings << " bytes), "
646 << g_cDuplicateStrings << " duplicates, " << g_cCollisions << " collisions" << endl;
647
648 /*
649 * Create g_papSortedStrings from the hash table. The table is sorted by
650 * string length, with the longer strings first.
651 */
652 g_papSortedStrings = new PSTRTABSTRING[g_cUniqueStrings];
653 g_cSortedStrings = 0;
654 size_t idxHash = g_cStrHash;
655 while (idxHash-- > 0)
656 {
657 PSTRTABSTRING pCur = g_papStrHash[idxHash];
658 if (pCur)
659 {
660 do
661 {
662 InsertUniqueString(pCur);
663 pCur = pCur->pNextCollision;
664 } while (pCur);
665 }
666 }
667
668 /*
669 * Create the actual string table.
670 */
671 g_pachStrTab = new char [g_cchUniqueStrings + 1];
672 g_cchStrTab = 0;
673 for (size_t i = 0; i < g_cSortedStrings; i++)
674 {
675 PSTRTABSTRING pCur = g_papSortedStrings[i];
676 const char * const pszCur = pCur->str.c_str();
677 size_t const cchCur = pCur->StrRef.cch;
678 size_t offStrTab = g_cchStrTab;
679
680 /*
681 * See if the string is a substring already found in the string table.
682 * Excluding the zero terminator increases the chances for this.
683 */
684 size_t cchLeft = g_cchStrTab >= cchCur ? g_cchStrTab - cchCur : 0;
685 const char *pchLeft = g_pachStrTab;
686 char const chFirst = *pszCur;
687 while (cchLeft > 0)
688 {
689 const char *pchCandidate = (const char *)memchr(pchLeft, chFirst, cchLeft);
690 if (!pchCandidate)
691 break;
692 if (memcmp(pchCandidate, pszCur, cchCur) == 0)
693 {
694 offStrTab = pchCandidate - g_pachStrTab;
695 break;
696 }
697
698 cchLeft -= pchCandidate + 1 - pchLeft;
699 pchLeft = pchCandidate + 1;
700 }
701
702 if (offStrTab == g_cchStrTab)
703 {
704 /*
705 * See if the start of the string overlaps the end of the string table.
706 * (Currently saves 1 byte...)
707 */
708 if (g_cchStrTab && cchCur > 1)
709 {
710 cchLeft = RT_MIN(g_cchStrTab, cchCur - 1);
711 pchLeft = &g_pachStrTab[g_cchStrTab - cchLeft];
712 while (cchLeft > 0)
713 {
714 const char *pchCandidate = (const char *)memchr(pchLeft, chFirst, cchLeft);
715 if (!pchCandidate)
716 break;
717 if (memcmp(pchCandidate, pszCur, cchLeft) == 0)
718 {
719 size_t cchToCopy = cchCur - cchLeft;
720 memcpy(&g_pachStrTab[offStrTab], &pszCur[cchLeft], cchToCopy);
721 g_cchStrTab += cchToCopy;
722 offStrTab = pchCandidate - g_pachStrTab;
723 break;
724 }
725
726 cchLeft -= pchCandidate + 1 - pchLeft;
727 pchLeft = pchCandidate + 1;
728 }
729 }
730
731 /*
732 * If we didn't have any luck above, just append the string.
733 */
734 if (offStrTab == g_cchStrTab)
735 {
736 memcpy(&g_pachStrTab[offStrTab], pszCur, cchCur);
737 g_cchStrTab += cchCur;
738 }
739 }
740
741 /*
742 * Set the string table offset for all the references to this string.
743 */
744 do
745 {
746 pCur->StrRef.off = (uint32_t)offStrTab;
747 pCur = pCur->pNextRef;
748 } while (pCur != NULL);
749 }
750
751 if (g_fVerbose)
752 cout << INFO_PREF "String table: " << g_cchStrTab << " bytes" << endl;
753}
754
755
756#ifdef VBOX_STRICT
757/**
758 * Sanity checks a string table string.
759 * @param pStr The string to check.
760 */
761static void CheckStrTabString(PSTRTABSTRING pStr)
762{
763 AssertFailed();
764 Assert(pStr->StrRef.cch == pStr->str.length());
765 Assert(pStr->StrRef.off < g_cchStrTab);
766 Assert(pStr->StrRef.off + pStr->StrRef.cch <= g_cchStrTab);
767 Assert(memcmp(pStr->str.c_str(), &g_pachStrTab[pStr->StrRef.off], pStr->str.length()) == 0);
768}
769#endif
770
771
772/**
773 * Writes the string table code to the output stream.
774 *
775 * @param rStrm The output stream.
776 */
777static void WriteStringTable(ostream &rStrm)
778{
779#ifdef VBOX_STRICT
780 /*
781 * Do some quick sanity checks while we're here.
782 */
783 for (ProductsSet::iterator it = g_products.begin(); it != g_products.end(); ++it)
784 CheckStrTabString(&it->product);
785 for (VendorsSet::iterator it = g_vendors.begin(); it != g_vendors.end(); ++it)
786 CheckStrTabString(&it->vendor);
787# ifdef USB_ID_DATABASE_WITH_COMPRESSION
788 for (unsigned i = 0; i < RT_ELEMENTS(g_aCompDict); i++)
789 CheckStrTabString(&g_aCompDict[i]);
790# endif
791#endif
792
793 /*
794 * Create a table for speeding up the character categorization.
795 */
796 uint8_t abCharCat[256];
797 RT_ZERO(abCharCat);
798 abCharCat[(unsigned char)'\\'] = 1;
799 abCharCat[(unsigned char)'\''] = 1;
800 for (unsigned i = 0; i < 0x20; i++)
801 abCharCat[i] = 2;
802 for (unsigned i = 0x7f; i < 0x100; i++)
803 abCharCat[i] = 2;
804
805 /*
806 * We follow the sorted string table, one string per line.
807 */
808 rStrm << endl;
809 rStrm << "const size_t USBIdDatabase::s_cchStrTab = " << g_cchStrTab << ";" << endl;
810 rStrm << "const char USBIdDatabase::s_achStrTab[] =" << endl;
811 rStrm << "{" << endl;
812
813 uint32_t off = 0;
814 for (uint32_t i = 0; i < g_cSortedStrings; i++)
815 {
816 PSTRTABSTRING pCur = g_papSortedStrings[i];
817 uint32_t offEnd = pCur->StrRef.off + pCur->StrRef.cch;
818 if (offEnd > off)
819 {
820 /* Comment with a more readable version of the string. */
821 if (off == pCur->StrRef.off)
822 rStrm << " /* 0x";
823 else
824 rStrm << " /* 0X";
825 rStrm << hex << setfill('0') << setw(5) << off << " = \"";
826 for (uint32_t offTmp = off; offTmp < offEnd; offTmp++)
827 {
828 unsigned char uch = g_pachStrTab[offTmp];
829 if (abCharCat[uch] == 0)
830 rStrm << (char)uch;
831 else if (abCharCat[uch] != 1)
832 rStrm << "\\x" << setw(2) << hex << (size_t)uch;
833 else
834 rStrm << "\\" << (char)uch;
835 }
836 rStrm << "\" */" << endl;
837
838 /* Must use char by char here or we may trigger the max string
839 length limit in the compiler, */
840 rStrm << " ";
841 for (; off < offEnd; off++)
842 {
843 unsigned char uch = g_pachStrTab[off];
844 rStrm << "'";
845 if (abCharCat[uch] == 0)
846 rStrm << (char)uch;
847 else if (abCharCat[uch] != 1)
848 rStrm << "\\x" << setw(2) << hex << (size_t)uch;
849 else
850 rStrm << "\\" << (char)uch;
851 rStrm << "',";
852 }
853 rStrm << endl;
854 }
855 }
856
857 rStrm << "};" << endl;
858 rStrm << "AssertCompile(sizeof(USBIdDatabase::s_achStrTab) == 0x" << hex << g_cchStrTab << ");" << endl << endl;
859}
860
861
862/*
863 * Input file parsing.
864 */
865
866/** The size of all the raw strings, including terminators. */
867static size_t g_cbRawStrings = 0;
868
869int ParseAlias(const string& src, size_t& id, string& desc)
870{
871 unsigned int i = 0;
872 if (sscanf(src.c_str(), "%x", &i) != 1)
873 return ERROR_IN_PARSE_LINE;
874
875 /* skip the number and following whitespace. */
876 size_t offNext = src.find_first_of(" \t", 1);
877 offNext = src.find_first_not_of(" \t", offNext);
878 if (offNext != string::npos)
879 {
880 size_t cchLength = src.length() - offNext;
881 if (cchLength <= USB_ID_DATABASE_MAX_STRING)
882 {
883 id = i;
884 desc = src.substr(offNext);
885
886 /* Check the string encoding. */
887 int rc = RTStrValidateEncoding(desc.c_str());
888 if (RT_SUCCESS(rc))
889 {
890 g_cbRawStrings += desc.length() + 1;
891 return RTEXITCODE_SUCCESS;
892 }
893
894 cerr << "Error: Invalid encoding: '" << desc << "' (rc=" << rc << ")" << endl;
895 }
896 cerr << "Error: String to long (" << cchLength << ")" << endl;
897 }
898 else
899 cerr << "Error: Error parsing \"" << src << "\"" << endl;
900 return ERROR_IN_PARSE_LINE;
901}
902
903bool IsCommentOrEmptyLine(const string& str)
904{
905 size_t index = str.find_first_not_of(" \t");// skip left spaces
906 return index == string::npos || str[index] == '#';
907}
908
909bool getline(PRTSTREAM instream, string& resString)
910{
911 const size_t szBuf = 4096;
912 char buf[szBuf] = { 0 };
913
914 int rc = RTStrmGetLine(instream, buf, szBuf);
915 if (RT_SUCCESS(rc))
916 {
917 resString = buf;
918 return true;
919 }
920 else if (rc != VERR_EOF)
921 {
922 cerr << "Warning: Invalid line in file. Error: " << rc << endl;
923 }
924 return false;
925}
926
927int ParseUsbIds(PRTSTREAM instream)
928{
929 State::Value state = State::lookForStartBlock;
930 string line;
931 int res = 0;
932 VendorRecord vendor = { 0, 0, 0, "" };
933
934 while (state != State::finished && getline(instream, line))
935 {
936 switch (state)
937 {
938 case State::lookForStartBlock:
939 {
940 if (line.find(start_block) != string::npos)
941 state = State::lookForEndBlock;
942 break;
943 }
944 case State::lookForEndBlock:
945 {
946 if (line.find(end_block) != string::npos)
947 state = State::finished;
948 else
949 {
950 if (!IsCommentOrEmptyLine(line))
951 {
952 if (line[0] == '\t')
953 {
954 // Parse Product line
955 // first line should be vendor
956 if (vendor.vendorID == 0)
957 {
958 cerr << "Wrong file format. Product before vendor: " << line.c_str() << "'" << endl;
959 return ERROR_WRONG_FILE_FORMAT;
960 }
961 ProductRecord product = { 0, vendor.vendorID, 0, "" };
962 if (ParseAlias(line.substr(1), product.productID, product.product.str) != 0)
963 {
964 cerr << "Error in parsing product line: '" << line.c_str() << "'" << endl;
965 return ERROR_IN_PARSE_LINE;
966 }
967 product.key = RT_MAKE_U32(product.productID, product.vendorID);
968 Assert(product.vendorID != 0);
969 g_products.push_back(product);
970 }
971 else
972 {
973 // Parse vendor line
974 if (ParseAlias(line, vendor.vendorID, vendor.vendor.str) != 0)
975 {
976 cerr << "Error in parsing vendor line: '"
977 << line.c_str() << "'" << endl;
978 return ERROR_IN_PARSE_LINE;
979 }
980 g_vendors.push_back(vendor);
981 }
982 }
983 }
984 break;
985 }
986 }
987 }
988 if (state == State::lookForStartBlock)
989 {
990 cerr << "Error: wrong format of input file. Start line is not found." << endl;
991 return ERROR_WRONG_FILE_FORMAT;
992 }
993 return 0;
994}
995
996
997static int usage(ostream &rOut, const char *argv0)
998{
999 rOut << "Usage: " << argv0
1000 << " [linux.org usb list file] [custom usb list file] [-o output file]" << endl;
1001 return RTEXITCODE_SYNTAX;
1002}
1003
1004int main(int argc, char *argv[])
1005{
1006 /*
1007 * Initialize IPRT and convert argv to UTF-8.
1008 */
1009 int rc = RTR3InitExe(argc, &argv, 0);
1010 if (RT_FAILURE(rc))
1011 return RTMsgInitFailure(rc);
1012
1013 /*
1014 * Parse arguments and read input files.
1015 */
1016 if (argc < 4)
1017 {
1018 usage(cerr, argv[0]);
1019 cerr << "Error: Not enough arguments." << endl;
1020 return RTEXITCODE_SYNTAX;
1021 }
1022 ofstream fout;
1023 PRTSTREAM fin;
1024 g_products.reserve(20000);
1025 g_vendors.reserve(3500);
1026
1027 const char *outName = NULL;
1028 for (int i = 1; i < argc; i++)
1029 {
1030 if (strcmp(argv[i], "-o") == 0)
1031 {
1032 outName = argv[++i];
1033 continue;
1034 }
1035 if ( strcmp(argv[i], "-h") == 0
1036 || strcmp(argv[i], "-?") == 0
1037 || strcmp(argv[i], "--help") == 0)
1038 {
1039 usage(cout, argv[0]);
1040 return RTEXITCODE_SUCCESS;
1041 }
1042
1043 rc = RTStrmOpen(argv[i], "r", &fin);
1044 if (RT_FAILURE(rc))
1045 {
1046 cerr << "Error: Failed to open file '" << argv[i] << "' for reading. rc=" << rc << endl;
1047 return ERROR_OPEN_FILE;
1048 }
1049
1050 rc = ParseUsbIds(fin);
1051 if (rc != 0)
1052 {
1053 cerr << "Error: Failed parsing USB devices file '" << argv[i] << "'" << endl;
1054 RTStrmClose(fin);
1055 return rc;
1056 }
1057 RTStrmClose(fin);
1058 }
1059
1060 /*
1061 * Due to USBIDDBVENDOR::iProduct, there is currently a max of 64KB products.
1062 * (Not a problem as we've only have less that 54K products currently.)
1063 */
1064 if (g_products.size() > _64K)
1065 {
1066 cerr << "Error: More than 64K products is not supported (input: " << g_products.size() << ")" << endl;
1067 return ERROR_TOO_MANY_PRODUCTS;
1068 }
1069
1070 /*
1071 * Sort the IDs and fill in the iProduct and cProduct members.
1072 */
1073 sort(g_products.begin(), g_products.end());
1074 sort(g_vendors.begin(), g_vendors.end());
1075
1076 size_t iProduct = 0;
1077 for (size_t iVendor = 0; iVendor < g_vendors.size(); iVendor++)
1078 {
1079 size_t const idVendor = g_vendors[iVendor].vendorID;
1080 g_vendors[iVendor].iProduct = iProduct;
1081 if ( iProduct < g_products.size()
1082 && g_products[iProduct].vendorID <= idVendor)
1083 {
1084 if (g_products[iProduct].vendorID == idVendor)
1085 do
1086 iProduct++;
1087 while (g_products[iProduct].vendorID == idVendor);
1088 else
1089 {
1090 cerr << "Error: product without vendor after sorting. impossible!" << endl;
1091 return ERROR_IN_PARSE_LINE;
1092 }
1093 }
1094 g_vendors[iVendor].cProducts = iProduct - g_vendors[iVendor].iProduct;
1095 }
1096
1097 /*
1098 * Verify that all IDs are unique.
1099 */
1100 ProductsSet::iterator ita = adjacent_find(g_products.begin(), g_products.end());
1101 if (ita != g_products.end())
1102 {
1103 cerr << "Error: Duplicate alias detected. " << *ita << endl;
1104 return ERROR_DUPLICATE_ENTRY;
1105 }
1106
1107 /*
1108 * Do string compression and create the string table.
1109 */
1110#ifdef USB_ID_DATABASE_WITH_COMPRESSION
1111 DoStringCompression();
1112#endif
1113 CreateStringTable();
1114
1115 /*
1116 * Print stats.
1117 */
1118 size_t const cbVendorEntry = sizeof(USBIdDatabase::s_aVendors[0]) + sizeof(USBIdDatabase::s_aVendorNames[0]);
1119 size_t const cbProductEntry = sizeof(USBIdDatabase::s_aProducts[0]) + sizeof(USBIdDatabase::s_aProductNames[0]);
1120
1121 size_t cbOldRaw = (g_products.size() + g_vendors.size()) * sizeof(const char *) * 2 + g_cbRawStrings;
1122 size_t cbRaw = g_vendors.size() * cbVendorEntry + g_products.size() * cbProductEntry + g_cbRawStrings;
1123 size_t cbActual = g_vendors.size() * cbVendorEntry + g_products.size() * cbProductEntry + g_cchStrTab;
1124#ifdef USB_ID_DATABASE_WITH_COMPRESSION
1125 cbActual += sizeof(USBIdDatabase::s_aCompDict);
1126#endif
1127 cout << INFO_PREF "Total " << dec << cbActual << " bytes";
1128 if (cbActual < cbRaw)
1129 cout << " saving " << dec << ((cbRaw - cbActual) * 100 / cbRaw) << "% (" << (cbRaw - cbActual) << " bytes)";
1130 else
1131 cout << " wasting " << dec << (cbActual - cbRaw) << " bytes";
1132 cout << "; old version " << cbOldRaw << " bytes + relocs ("
1133 << ((cbOldRaw - cbActual) * 100 / cbOldRaw) << "% save)." << endl;
1134
1135
1136 /*
1137 * Produce the source file.
1138 */
1139 if (!outName)
1140 {
1141 cerr << "Error: Output file is not specified." << endl;
1142 return ERROR_OPEN_FILE;
1143 }
1144
1145 fout.open(outName);
1146 if (!fout.is_open())
1147 {
1148 cerr << "Error: Can not open file to write '" << outName << "'." << endl;
1149 return ERROR_OPEN_FILE;
1150 }
1151
1152 fout << header;
1153
1154 WriteStringTable(fout);
1155#ifdef USB_ID_DATABASE_WITH_COMPRESSION
1156 WriteCompressionDictionary(fout);
1157#endif
1158
1159 fout << product_header;
1160 for (ProductsSet::iterator itp = g_products.begin(); itp != g_products.end(); ++itp)
1161 fout << *itp;
1162 fout << product_part2;
1163 for (ProductsSet::iterator itp = g_products.begin(); itp != g_products.end(); ++itp)
1164 itp->product.printRefLine(fout);
1165 fout << product_footer;
1166
1167 fout << vendor_header;
1168 for (VendorsSet::iterator itv = g_vendors.begin(); itv != g_vendors.end(); ++itv)
1169 fout << *itv;
1170 fout << vendor_part2;
1171 for (VendorsSet::iterator itv = g_vendors.begin(); itv != g_vendors.end(); ++itv)
1172 itv->vendor.printRefLine(fout);
1173 fout << vendor_footer;
1174
1175 fout.close();
1176
1177
1178 return RTEXITCODE_SUCCESS;
1179}
1180
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette