USBIdDatabaseGenerator.cpp@ 58018

Last change on this file since 58018 was 58018, checked in by vboxsync, 9 years ago
USBIdDatabase*: build fix attempt
Property svn:eol-style set to `native` Property svn:keywords set to `Author Date Id Revision`
File size: 35.9 KB

Line
1	/* $Id: USBIdDatabaseGenerator.cpp 58018 2015-10-03 18:56:48Z vboxsync $ */
2	/** @file
3	* USB device vendor and product ID database - generator.
4	*/
5
6	/*
7	* Copyright (C) 2015 Oracle Corporation
8	*
9	* This file is part of VirtualBox Open Source Edition (OSE), as
10	* available from http://www.virtualbox.org. This file is free software;
11	* you can redistribute it and/or modify it under the terms of the GNU
12	* General Public License (GPL) as published by the Free Software
13	* Foundation, in version 2 as it comes in the "COPYING" file of the
14	* VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15	* hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16	*/
17
18	/*********************************************************************************************************************************
19	* Header Files *
20	*********************************************************************************************************************************/
21	#include <stdio.h>
22
23	#include <fstream>
24	#include <iostream>
25	#include <iomanip>
26	#include <algorithm>
27	#include <map>
28	#include <string>
29	#include <vector>
30
31	#include <iprt/initterm.h>
32	#include <iprt/message.h>
33	#include <iprt/string.h>
34	#include <iprt/stream.h>
35	#include "../../Runtime/include/internal/strhash.h" /** @todo make this one public */
36
37	#include "../include/USBIdDatabase.h"
38
39
40	/** For verbose output. */
41	static bool g_fVerbose = false;
42	/** Output prefix for informational output. */
43	#define INFO_PREF "USBIdDatabaseGenerator: Info: "
44
45
46	using namespace std;
47
48	static const char * const header =
49	"/** @file\n"
50	" * USB device vendor and product ID database - Autogenerated from <stupid C++ cannot do %s>\n"
51	" */\n"
52	"\n"
53	"/*\n"
54	" * Copyright (C) 2015 Oracle Corporation\n"
55	" *\n"
56	" * This file is part of VirtualBox Open Source Edition(OSE), as\n"
57	" * available from http ://www.virtualbox.org. This file is free software;\n"
58	" * you can redistribute it and / or modify it under the terms of the GNU\n"
59	" * General Public License(GPL) as published by the Free Software\n"
60	" * Foundation, in version 2 as it comes in the \"COPYING\" file of the\n"
61	" * VirtualBox OSE distribution.VirtualBox OSE is distributed in the\n"
62	" * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.\n"
63	" */"
64	"\n"
65	"\n"
66	"#include \"USBIdDatabase.h\"\n"
67	"\n";
68	static const char * const product_header =
69	"/**\n"
70	" * USB devices aliases array.\n"
71	" * Format: VendorId, ProductId, Vendor Name, Product Name\n"
72	" * The source of the list is http://www.linux-usb.org/usb.ids\n"
73	" */\n"
74	"USBIDDBPROD const USBIdDatabase::s_aProducts[] =\n"
75	"{\n";
76	const char *product_part2 =
77	"};\n"
78	"\n"
79	"\nconst USBIDDBSTR USBIdDatabase::s_aProductNames[] =\n"
80	"{\n";
81	const char *product_footer =
82	"};\n"
83	"\n"
84	"const size_t USBIdDatabase::s_cProducts = RT_ELEMENTS(USBIdDatabase::s_aProducts);\n";
85
86	const char *vendor_header =
87	"\nUSBIDDBVENDOR const USBIdDatabase::s_aVendors[] =\n"
88	"{\n";
89	const char *vendor_part2 =
90	"};\n"
91	"\n"
92	"\nconst USBIDDBSTR USBIdDatabase::s_aVendorNames[] =\n"
93	"{\n";
94	const char *vendor_footer =
95	"};\n"
96	"\n"
97	"const size_t USBIdDatabase::s_cVendors = RT_ELEMENTS(USBIdDatabase::s_aVendors);\n";
98
99	const char *start_block = "# Vendors, devices and interfaces. Please keep sorted.";
100	const char *end_block = "# List of known device classes, subclasses and protocols";
101
102
103	// error codes (complements RTEXITCODE_XXX).
104	#define ERROR_OPEN_FILE (12)
105	#define ERROR_IN_PARSE_LINE (13)
106	#define ERROR_DUPLICATE_ENTRY (14)
107	#define ERROR_WRONG_FILE_FORMAT (15)
108	#define ERROR_TOO_MANY_PRODUCTS (16)
109
110
111	/**
112	* String that will end up in the string table.
113	*/
114	struct StrTabString
115	{
116	/** The string. */
117	std::string str;
118	/** The string hash value. */
119	uint32_t uHash;
120	/** The string table reference. */
121	USBIDDBSTR StrRef;
122	/** Pointer to the next string reference (same string table entry). */
123	struct StrTabString *pNextRef;
124	/** Pointer to the next string with the same hash value (collision). */
125	struct StrTabString *pNextCollision;
126
127	void printRef(ostream &rStrm) const
128	{
129	rStrm << " { 0x" << setfill('0') << setw(6) << hex << StrRef.off << ", 0x"
130	<< setfill('0') << setw(2) << hex << StrRef.cch << " }, ";
131	}
132
133	void printRefLine(ostream &rStrm) const
134	{
135	printRef(rStrm);
136	rStrm << endl;
137	}
138	};
139	typedef struct StrTabString *PSTRTABSTRING;
140
141	struct VendorRecord
142	{
143	size_t vendorID;
144	size_t iProduct;
145	size_t cProducts;
146	StrTabString vendor;
147	};
148
149	struct ProductRecord
150	{
151	size_t key;
152	size_t vendorID;
153	size_t productID;
154	StrTabString product;
155	};
156
157	bool operator < (const ProductRecord& lh, const ProductRecord& rh)
158	{
159	return lh.key < rh.key;
160	}
161
162	bool operator < (const VendorRecord& lh, const VendorRecord& rh)
163	{
164	return lh.vendorID < rh.vendorID;
165	}
166
167	bool operator == (const ProductRecord& lh, const ProductRecord& rh)
168	{
169	return lh.key == rh.key;
170	}
171
172	bool operator == (const VendorRecord& lh, const VendorRecord& rh)
173	{
174	return lh.vendorID == rh.vendorID;
175	}
176
177	ostream& operator <<(ostream& stream, const ProductRecord product)
178	{
179	stream << " { 0x" << setfill('0') << setw(4) << product.productID << " }, " << endl;
180	return stream;
181	}
182
183	ostream& operator <<(ostream& stream, const VendorRecord vendor)
184	{
185	stream << " { 0x" << setfill('0') << setw(4) << hex << vendor.vendorID
186	<< ", 0x" << setfill('0') << setw(4) << hex << vendor.iProduct
187	<< ", 0x" << setfill('0') << setw(4) << hex << vendor.cProducts << " }, " << endl;
188	return stream;
189	}
190
191	namespace State
192	{
193	typedef int Value;
194	enum
195	{
196	lookForStartBlock,
197	lookForEndBlock,
198	finished
199	};
200	}
201
202	typedef vector<ProductRecord> ProductsSet;
203	typedef vector<VendorRecord> VendorsSet;
204	ProductsSet g_products;
205	VendorsSet g_vendors;
206
207
208
209	/*
210	* String "compression". We replace the 127 most used words with references.
211	*/
212	#ifdef USB_ID_DATABASE_WITH_COMPRESSION
213
214	typedef std::map<std::string, size_t> WORDFREQMAP;
215	typedef WORDFREQMAP::value_type WORDFREQPAIR;
216
217	/** The 127 words we've picked to be indexed by reference. */
218	static StrTabString g_aCompDict[127];
219
220	/**
221	* For sorting the frequency fidning in descending order.
222	*
223	* Comparison operators are put outside to make older gcc versions (like 4.1.1
224	* on lnx64-rel) happy.
225	*/
226	class WordFreqSortEntry
227	{
228	public:
229	WORDFREQPAIR const *m_pPair;
230
231	public:
232	WordFreqSortEntry(WORDFREQPAIR const *pPair) : m_pPair(pPair) {}
233	};
234
235	bool operator == (WordFreqSortEntry const &rLeft, WordFreqSortEntry const &rRight)
236	{
237	return rLeft.m_pPair->second == rRight.m_pPair->second;
238	}
239
240	bool operator < (WordFreqSortEntry const &rLeft, WordFreqSortEntry const &rRight)
241	{
242	return rLeft.m_pPair->second > rRight.m_pPair->second;
243	}
244
245
246	/**
247	* Replaces the dictionary words and escapes non-ascii chars in a string.
248	*
249	* @param pString The string to fixup.
250	* @param pcchOld The old string length is added to this (stats)
251	* @param pcchNew The new string length is added to this (stats)
252	*/
253	static void FixupString(std::string pString, size_t pcchOld, size_t *pcchNew)
254	{
255	char szNew[USB_ID_DATABASE_MAX_STRING * 2];
256	char *pszDst = szNew;
257	const char *pszSrc = pString->c_str();
258	const char *pszSrcEnd = strchr(pszSrc, '\0');
259
260	*pcchOld += pszSrcEnd - pszSrc;
261
262	char ch;
263	while ((ch = *pszSrc) != '\0')
264	{
265	/* Spaces. */
266	while (ch == ' ')
267	{
268	*pszDst++ = ' ';
269	ch = *++pszSrc;
270	}
271	if (!ch)
272	break;
273
274	/* Find the end of the current word. */
275	size_t cchWord = 1;
276	while ((ch = pszSrc[cchWord]) != ' ' && ch != '\0')
277	cchWord++;
278
279	/* Check for g_aWord matches. */
280	size_t cchMax = pszSrcEnd - pszSrc;
281	for (unsigned i = 0; i < RT_ELEMENTS(g_aCompDict); i++)
282	{
283	size_t cchLen = g_aCompDict[i].str.length();
284	if ( cchLen >= cchWord
285	&& cchLen <= cchMax
286	&& g_aCompDict[i].str.compare(0, cchLen, pszSrc, cchLen) == 0)
287	{
288	*pszDst++ = (unsigned char)(0x80 \| i);
289	pszSrc += cchLen;
290	cchWord = 0;
291	break;
292	}
293	}
294
295	if (cchWord)
296	{
297	/* Copy the current word. */
298	ch = *pszSrc;
299	do
300	{
301	if (!((unsigned char)ch & 0x80))
302	{
303	*pszDst++ = ch;
304	pszSrc++;
305	}
306	else
307	{
308	RTUNICP uc;
309	int rc = RTStrGetCpEx(&pszSrc, &uc);
310	if (RT_SUCCESS(rc))
311	{
312	pszDst++ = (unsigned char)0xff; / escape single code point. */
313	pszDst = RTStrPutCp(pszDst, uc);
314	}
315	else
316	{
317	cerr << "Error: RTStrGetCpEx failed with rc=" << rc << endl;
318	exit(3);
319	}
320	}
321	} while ((ch = *pszSrc) != '\0' && ch != ' ');
322	}
323	}
324	*pszDst = '\0';
325	*pcchNew += pszDst - &szNew[0];
326
327	*pString = szNew;
328	}
329
330
331	/**
332	* Analyzes a string.
333	*
334	* @param pFrequencies The word frequency map.
335	* @param rString The string to analyze.
336	*/
337	static void AnalyzeString(WORDFREQMAP *pFrequencies, std::string const &rString)
338	{
339	const char *psz = rString.c_str();
340
341	/*
342	* For now we just consider words.
343	*/
344	char ch;
345	while ((ch = *psz) != '\0')
346	{
347	/* Skip leading spaces. */
348	while (ch == ' ')
349	ch = *++psz;
350	if (!ch)
351	return;
352
353	/* Find end of word. */
354	size_t cchWord = 1;
355	while ((ch = psz[cchWord]) != ' ' && ch != '\0')
356	cchWord++;
357	if (cchWord > 1)
358	{
359	std::string strWord(psz, cchWord);
360	WORDFREQMAP::iterator it = pFrequencies->find(strWord);
361	if (it != pFrequencies->end())
362	it->second += cchWord - 1;
363	else
364	(*pFrequencies)[strWord] = 0;
365	/** @todo could gain hits by including the space after the word, but that
366	* has the same accounting problems as the two words scenario below. */
367
368	# if 0 /** @todo need better accounting for overlapping alternatives before this can be enabled. */
369	/* Two words - immediate yields calc may lie when this enabled and we may pick the wrong words. */
370	if (ch == ' ')
371	{
372	ch = psz[++cchWord];
373	if (ch != ' ' && ch != '\0')
374	{
375	size_t const cchSaved = cchWord;
376
377	do
378	cchWord++;
379	while ((ch = psz[cchWord]) != ' ' && ch != '\0');
380
381	strWord = std::string(psz, cchWord);
382	WORDFREQMAP::iterator it = pFrequencies->find(strWord);
383	if (it != pFrequencies->end())
384	it->second += cchWord - 1;
385	else
386	(*pFrequencies)[strWord] = 0;
387
388	cchWord = cchSaved;
389	}
390	}
391	# endif
392	}
393
394	/* Advance. */
395	psz += cchWord;
396	}
397	}
398
399
400	/**
401	* Compresses the vendor and product strings.
402	*
403	* This is very very simple (a lot less work that the string table for
404	* instance).
405	*/
406	static void DoStringCompression(void)
407	{
408	/*
409	* Analyze the strings collecting stats on potential sequences to replace.
410	*/
411	WORDFREQMAP Frequencies;
412
413	uint32_t cProducts = 0;
414	for (ProductsSet::iterator it = g_products.begin(); it != g_products.end(); ++it, cProducts++)
415	AnalyzeString(&Frequencies, it->product.str);
416
417	uint32_t cVendors = 0;
418	for (VendorsSet::iterator it = g_vendors.begin(); it != g_vendors.end(); ++it, cVendors++)
419	AnalyzeString(&Frequencies, it->vendor.str);
420
421	if (g_fVerbose)
422	{
423	size_t const cbVendorEntry = sizeof(USBIdDatabase::s_aVendors[0]) + sizeof(USBIdDatabase::s_aVendorNames[0]);
424	size_t const cbVendors = cVendors * cbVendorEntry;
425	cout << INFO_PREF << cVendors << " vendors (" << cbVendors << " bytes)" << endl;
426
427	size_t const cbProductEntry = sizeof(USBIdDatabase::s_aProducts[0]) + sizeof(USBIdDatabase::s_aProductNames[0]);
428	size_t const cbProducts = cProducts * cbProductEntry;
429	cout << INFO_PREF << cProducts << " products (" << cbProducts << " bytes)" << endl;
430	}
431
432	/*
433	* Sort the result and pick the top 127 ones.
434	*/
435	std::vector<WordFreqSortEntry> SortMap;
436	for (WORDFREQMAP::iterator it = Frequencies.begin(); it != Frequencies.end(); ++it)
437	{
438	WORDFREQPAIR const &rPair = *it;
439	SortMap.push_back(WordFreqSortEntry(&rPair));
440	}
441
442	sort(SortMap.begin(), SortMap.end());
443
444	size_t cb = 0;
445	unsigned i = 0;
446	for (std::vector<WordFreqSortEntry>::iterator it = SortMap.begin();
447	it != SortMap.end() && i < RT_ELEMENTS(g_aCompDict);
448	++it, i++)
449	{
450	g_aCompDict[i].str = it->m_pPair->first;
451	cb += it->m_pPair->second;
452	}
453
454	if (g_fVerbose)
455	cout << INFO_PREF "Estimated compression saving " << cb << " bytes" << endl;
456
457	/*
458	* Rework the strings.
459	*/
460	size_t cchNew = 0;
461	size_t cchOld = 0;
462	for (ProductsSet::iterator it = g_products.begin(); it != g_products.end(); ++it)
463	FixupString(&it->product.str, &cchOld, &cchNew);
464	for (VendorsSet::iterator it = g_vendors.begin(); it != g_vendors.end(); ++it)
465	FixupString(&it->vendor.str, &cchOld, &cchNew);
466
467	for (i = 0; i < RT_ELEMENTS(g_aCompDict); i++)
468	cchNew += g_aCompDict[i].str.length() + 1;
469
470	if (g_fVerbose)
471	{
472	cout << INFO_PREF "Strings: original: " << cchOld << " bytes; compressed: " << cchNew << " bytes;";
473	if (cchNew < cchOld)
474	cout << " saving " << (cchOld - cchNew) << " bytes (" << ((cchOld - cchNew) * 100 / cchOld) << "%)" << endl;
475	else
476	cout << " wasting " << (cchOld - cchNew) << " bytes!" << endl;
477	cout << INFO_PREF "Average string length is " << (cchOld / (cVendors + cProducts)) << endl;
478	}
479	}
480
481
482	/**
483	* Writes the compression dictionary to the output stream.
484	*
485	* @param rStrm The output stream.
486	*/
487	static void WriteCompressionDictionary(ostream &rStrm)
488	{
489	rStrm << "const USBIDDBSTR USBIdDatabase::s_aCompDict[" << dec << RT_ELEMENTS(g_aCompDict) << "] = " << endl;
490	rStrm << "{" << endl;
491	for (unsigned i = 0; i < RT_ELEMENTS(g_aCompDict); i++)
492	{
493	g_aCompDict[i].printRef(rStrm);
494	rStrm << " // " << g_aCompDict[i].str << endl;
495	}
496	rStrm << "};" << endl << endl;
497	}
498
499	#endif /* USB_ID_DATABASE_WITH_COMPRESSION */
500
501
502	/*
503	* Compile a string table.
504	*/
505
506	/** The size of g_papStrHash. */
507	static size_t g_cStrHash = 0;
508	/** String hash table. */
509	static PSTRTABSTRING *g_papStrHash = NULL;
510	/** Duplicate strings found by AddString. */
511	static size_t g_cDuplicateStrings = 0;
512	/** Total length of the unique strings (no terminators). */
513	static size_t g_cchUniqueStrings = 0;
514	/** Number of unique strings after AddString. */
515	static size_t g_cUniqueStrings = 0;
516	/** Number of collisions. */
517	static size_t g_cCollisions = 0;
518
519	/** Number of entries in g_apSortedStrings. */
520	static size_t g_cSortedStrings = 0;
521	/** The sorted string table. */
522	static PSTRTABSTRING *g_papSortedStrings = NULL;
523
524	/** The string table. */
525	static char *g_pachStrTab = NULL;
526	/** The actual string table size. */
527	static size_t g_cchStrTab = 0;
528
529
530	/**
531	* Adds a string to the hash table.
532	* @param pStr The string.
533	*/
534	static void AddString(PSTRTABSTRING pStr)
535	{
536	pStr->pNextRef = NULL;
537	pStr->pNextCollision = NULL;
538	pStr->StrRef.off = 0;
539	pStr->StrRef.cch = pStr->str.length();
540	size_t cchIgnored;
541	pStr->uHash = sdbm(pStr->str.c_str(), &cchIgnored);
542	Assert(cchIgnored == pStr->str.length());
543
544	size_t idxHash = pStr->uHash % g_cStrHash;
545	PSTRTABSTRING pCur = g_papStrHash[idxHash];
546	if (!pCur)
547	g_papStrHash[idxHash] = pStr;
548	else
549	{
550	/* Look for matching string. */
551	do
552	{
553	if ( pCur->uHash == pStr->uHash
554	&& pCur->StrRef.cch == pStr->StrRef.cch
555	&& pCur->str == pStr->str)
556	{
557	pStr->pNextRef = pCur->pNextRef;
558	pCur->pNextRef = pStr;
559	g_cDuplicateStrings++;
560	return;
561	}
562	pCur = pCur->pNextCollision;
563	} while (pCur != NULL);
564
565	/* No matching string, insert. */
566	g_cCollisions++;
567	pStr->pNextCollision = g_papStrHash[idxHash];
568	g_papStrHash[idxHash] = pStr;
569	}
570
571	g_cUniqueStrings++;
572	g_cchUniqueStrings += pStr->StrRef.cch;
573	}
574
575
576	/**
577	* Inserts a string into g_apUniqueStrings.
578	* @param pStr The string.
579	*/
580	static void InsertUniqueString(PSTRTABSTRING pStr)
581	{
582	size_t iIdx;
583	size_t iStart = 0;
584	size_t iEnd = g_cSortedStrings;
585	for (;;)
586	{
587	iIdx = iStart + (iEnd - iStart) / 2;
588	if (g_papSortedStrings[iIdx]->StrRef.cch < pStr->StrRef.cch)
589	{
590	if (iIdx <= iStart)
591	break;
592	iEnd = iIdx;
593	}
594	else if (g_papSortedStrings[iIdx]->StrRef.cch > pStr->StrRef.cch)
595	{
596	if (++iIdx >= g_cSortedStrings)
597	break;
598	iStart = iIdx;
599	}
600	else
601	break;
602	}
603
604	if (iIdx != g_cSortedStrings)
605	memmove(&g_papSortedStrings[iIdx + 1], &g_papSortedStrings[iIdx],
606	(g_cSortedStrings - iIdx) * sizeof(g_papSortedStrings[iIdx]));
607	g_papSortedStrings[iIdx] = pStr;
608	g_cSortedStrings++;
609	}
610
611
612	/**
613	* Creates a string table.
614	*
615	* This will save space by dropping string terminators, eliminating duplicates
616	* and try find strings that are sub-strings of others.
617	*
618	* Will initialize the StrRef of all StrTabString instances.
619	*/
620	static void CreateStringTable(void)
621	{
622	/*
623	* Allocate a hash table double the size of all strings (to avoid too
624	* many collisions). Add all strings to it, finding duplicates in the
625	* process.
626	*/
627	size_t cMaxStrings = g_products.size() + g_vendors.size();
628	#ifdef USB_ID_DATABASE_WITH_COMPRESSION
629	cMaxStrings += RT_ELEMENTS(g_aCompDict);
630	#endif
631	cMaxStrings *= 2;
632	g_papStrHash = new PSTRTABSTRING[cMaxStrings];
633	g_cStrHash = cMaxStrings;
634	memset(g_papStrHash, 0, cMaxStrings * sizeof(g_papStrHash[0]));
635
636	for (ProductsSet::iterator it = g_products.begin(); it != g_products.end(); ++it)
637	AddString(&it->product);
638	for (VendorsSet::iterator it = g_vendors.begin(); it != g_vendors.end(); ++it)
639	AddString(&it->vendor);
640	#ifdef USB_ID_DATABASE_WITH_COMPRESSION
641	for (unsigned i = 0; i < RT_ELEMENTS(g_aCompDict); i++)
642	AddString(&g_aCompDict[i]);
643	#endif
644	if (g_fVerbose)
645	cout << INFO_PREF "" << g_cUniqueStrings << " unique string (" << g_cchUniqueStrings << " bytes), "
646	<< g_cDuplicateStrings << " duplicates, " << g_cCollisions << " collisions" << endl;
647
648	/*
649	* Create g_papSortedStrings from the hash table. The table is sorted by
650	* string length, with the longer strings first.
651	*/
652	g_papSortedStrings = new PSTRTABSTRING[g_cUniqueStrings];
653	g_cSortedStrings = 0;
654	size_t idxHash = g_cStrHash;
655	while (idxHash-- > 0)
656	{
657	PSTRTABSTRING pCur = g_papStrHash[idxHash];
658	if (pCur)
659	{
660	do
661	{
662	InsertUniqueString(pCur);
663	pCur = pCur->pNextCollision;
664	} while (pCur);
665	}
666	}
667
668	/*
669	* Create the actual string table.
670	*/
671	g_pachStrTab = new char [g_cchUniqueStrings + 1];
672	g_cchStrTab = 0;
673	for (size_t i = 0; i < g_cSortedStrings; i++)
674	{
675	PSTRTABSTRING pCur = g_papSortedStrings[i];
676	const char * const pszCur = pCur->str.c_str();
677	size_t const cchCur = pCur->StrRef.cch;
678	size_t offStrTab = g_cchStrTab;
679
680	/*
681	* See if the string is a substring already found in the string table.
682	* Excluding the zero terminator increases the chances for this.
683	*/
684	size_t cchLeft = g_cchStrTab >= cchCur ? g_cchStrTab - cchCur : 0;
685	const char *pchLeft = g_pachStrTab;
686	char const chFirst = *pszCur;
687	while (cchLeft > 0)
688	{
689	const char pchCandidate = (const char )memchr(pchLeft, chFirst, cchLeft);
690	if (!pchCandidate)
691	break;
692	if (memcmp(pchCandidate, pszCur, cchCur) == 0)
693	{
694	offStrTab = pchCandidate - g_pachStrTab;
695	break;
696	}
697
698	cchLeft -= pchCandidate + 1 - pchLeft;
699	pchLeft = pchCandidate + 1;
700	}
701
702	if (offStrTab == g_cchStrTab)
703	{
704	/*
705	* See if the start of the string overlaps the end of the string table.
706	* (Currently saves 1 byte...)
707	*/
708	if (g_cchStrTab && cchCur > 1)
709	{
710	cchLeft = RT_MIN(g_cchStrTab, cchCur - 1);
711	pchLeft = &g_pachStrTab[g_cchStrTab - cchLeft];
712	while (cchLeft > 0)
713	{
714	const char pchCandidate = (const char )memchr(pchLeft, chFirst, cchLeft);
715	if (!pchCandidate)
716	break;
717	if (memcmp(pchCandidate, pszCur, cchLeft) == 0)
718	{
719	size_t cchToCopy = cchCur - cchLeft;
720	memcpy(&g_pachStrTab[offStrTab], &pszCur[cchLeft], cchToCopy);
721	g_cchStrTab += cchToCopy;
722	offStrTab = pchCandidate - g_pachStrTab;
723	break;
724	}
725
726	cchLeft -= pchCandidate + 1 - pchLeft;
727	pchLeft = pchCandidate + 1;
728	}
729	}
730
731	/*
732	* If we didn't have any luck above, just append the string.
733	*/
734	if (offStrTab == g_cchStrTab)
735	{
736	memcpy(&g_pachStrTab[offStrTab], pszCur, cchCur);
737	g_cchStrTab += cchCur;
738	}
739	}
740
741	/*
742	* Set the string table offset for all the references to this string.
743	*/
744	do
745	{
746	pCur->StrRef.off = (uint32_t)offStrTab;
747	pCur = pCur->pNextRef;
748	} while (pCur != NULL);
749	}
750
751	if (g_fVerbose)
752	cout << INFO_PREF "String table: " << g_cchStrTab << " bytes" << endl;
753	}
754
755
756	#ifdef VBOX_STRICT
757	/**
758	* Sanity checks a string table string.
759	* @param pStr The string to check.
760	*/
761	static void CheckStrTabString(PSTRTABSTRING pStr)
762	{
763	AssertFailed();
764	Assert(pStr->StrRef.cch == pStr->str.length());
765	Assert(pStr->StrRef.off < g_cchStrTab);
766	Assert(pStr->StrRef.off + pStr->StrRef.cch <= g_cchStrTab);
767	Assert(memcmp(pStr->str.c_str(), &g_pachStrTab[pStr->StrRef.off], pStr->str.length()) == 0);
768	}
769	#endif
770
771
772	/**
773	* Writes the string table code to the output stream.
774	*
775	* @param rStrm The output stream.
776	*/
777	static void WriteStringTable(ostream &rStrm)
778	{
779	#ifdef VBOX_STRICT
780	/*
781	* Do some quick sanity checks while we're here.
782	*/
783	for (ProductsSet::iterator it = g_products.begin(); it != g_products.end(); ++it)
784	CheckStrTabString(&it->product);
785	for (VendorsSet::iterator it = g_vendors.begin(); it != g_vendors.end(); ++it)
786	CheckStrTabString(&it->vendor);
787	# ifdef USB_ID_DATABASE_WITH_COMPRESSION
788	for (unsigned i = 0; i < RT_ELEMENTS(g_aCompDict); i++)
789	CheckStrTabString(&g_aCompDict[i]);
790	# endif
791	#endif
792
793	/*
794	* Create a table for speeding up the character categorization.
795	*/
796	uint8_t abCharCat[256];
797	RT_ZERO(abCharCat);
798	abCharCat[(unsigned char)'\\'] = 1;
799	abCharCat[(unsigned char)'\''] = 1;
800	for (unsigned i = 0; i < 0x20; i++)
801	abCharCat[i] = 2;
802	for (unsigned i = 0x7f; i < 0x100; i++)
803	abCharCat[i] = 2;
804
805	/*
806	* We follow the sorted string table, one string per line.
807	*/
808	rStrm << endl;
809	rStrm << "const size_t USBIdDatabase::s_cchStrTab = " << g_cchStrTab << ";" << endl;
810	rStrm << "const char USBIdDatabase::s_achStrTab[] =" << endl;
811	rStrm << "{" << endl;
812
813	uint32_t off = 0;
814	for (uint32_t i = 0; i < g_cSortedStrings; i++)
815	{
816	PSTRTABSTRING pCur = g_papSortedStrings[i];
817	uint32_t offEnd = pCur->StrRef.off + pCur->StrRef.cch;
818	if (offEnd > off)
819	{
820	/* Comment with a more readable version of the string. */
821	if (off == pCur->StrRef.off)
822	rStrm << " /* 0x";
823	else
824	rStrm << " /* 0X";
825	rStrm << hex << setfill('0') << setw(5) << off << " = \"";
826	for (uint32_t offTmp = off; offTmp < offEnd; offTmp++)
827	{
828	unsigned char uch = g_pachStrTab[offTmp];
829	if (abCharCat[uch] == 0)
830	rStrm << (char)uch;
831	else if (abCharCat[uch] != 1)
832	rStrm << "\\x" << setw(2) << hex << (size_t)uch;
833	else
834	rStrm << "\\" << (char)uch;
835	}
836	rStrm << "\" */" << endl;
837
838	/* Must use char by char here or we may trigger the max string
839	length limit in the compiler, */
840	rStrm << " ";
841	for (; off < offEnd; off++)
842	{
843	unsigned char uch = g_pachStrTab[off];
844	rStrm << "'";
845	if (abCharCat[uch] == 0)
846	rStrm << (char)uch;
847	else if (abCharCat[uch] != 1)
848	rStrm << "\\x" << setw(2) << hex << (size_t)uch;
849	else
850	rStrm << "\\" << (char)uch;
851	rStrm << "',";
852	}
853	rStrm << endl;
854	}
855	}
856
857	rStrm << "};" << endl;
858	rStrm << "AssertCompile(sizeof(USBIdDatabase::s_achStrTab) == 0x" << hex << g_cchStrTab << ");" << endl << endl;
859	}
860
861
862	/*
863	* Input file parsing.
864	*/
865
866	/** The size of all the raw strings, including terminators. */
867	static size_t g_cbRawStrings = 0;
868
869	int ParseAlias(const string& src, size_t& id, string& desc)
870	{
871	unsigned int i = 0;
872	if (sscanf(src.c_str(), "%x", &i) != 1)
873	return ERROR_IN_PARSE_LINE;
874
875	/* skip the number and following whitespace. */
876	size_t offNext = src.find_first_of(" \t", 1);
877	offNext = src.find_first_not_of(" \t", offNext);
878	if (offNext != string::npos)
879	{
880	size_t cchLength = src.length() - offNext;
881	if (cchLength <= USB_ID_DATABASE_MAX_STRING)
882	{
883	id = i;
884	desc = src.substr(offNext);
885
886	/* Check the string encoding. */
887	int rc = RTStrValidateEncoding(desc.c_str());
888	if (RT_SUCCESS(rc))
889	{
890	g_cbRawStrings += desc.length() + 1;
891	return RTEXITCODE_SUCCESS;
892	}
893
894	cerr << "Error: Invalid encoding: '" << desc << "' (rc=" << rc << ")" << endl;
895	}
896	cerr << "Error: String to long (" << cchLength << ")" << endl;
897	}
898	else
899	cerr << "Error: Error parsing \"" << src << "\"" << endl;
900	return ERROR_IN_PARSE_LINE;
901	}
902
903	bool IsCommentOrEmptyLine(const string& str)
904	{
905	size_t index = str.find_first_not_of(" \t");// skip left spaces
906	return index == string::npos \|\| str[index] == '#';
907	}
908
909	bool getline(PRTSTREAM instream, string& resString)
910	{
911	const size_t szBuf = 4096;
912	char buf[szBuf] = { 0 };
913
914	int rc = RTStrmGetLine(instream, buf, szBuf);
915	if (RT_SUCCESS(rc))
916	{
917	resString = buf;
918	return true;
919	}
920	else if (rc != VERR_EOF)
921	{
922	cerr << "Warning: Invalid line in file. Error: " << rc << endl;
923	}
924	return false;
925	}
926
927	int ParseUsbIds(PRTSTREAM instream)
928	{
929	State::Value state = State::lookForStartBlock;
930	string line;
931	int res = 0;
932	VendorRecord vendor = { 0, 0, 0, "" };
933
934	while (state != State::finished && getline(instream, line))
935	{
936	switch (state)
937	{
938	case State::lookForStartBlock:
939	{
940	if (line.find(start_block) != string::npos)
941	state = State::lookForEndBlock;
942	break;
943	}
944	case State::lookForEndBlock:
945	{
946	if (line.find(end_block) != string::npos)
947	state = State::finished;
948	else
949	{
950	if (!IsCommentOrEmptyLine(line))
951	{
952	if (line[0] == '\t')
953	{
954	// Parse Product line
955	// first line should be vendor
956	if (vendor.vendorID == 0)
957	{
958	cerr << "Wrong file format. Product before vendor: " << line.c_str() << "'" << endl;
959	return ERROR_WRONG_FILE_FORMAT;
960	}
961	ProductRecord product = { 0, vendor.vendorID, 0, "" };
962	if (ParseAlias(line.substr(1), product.productID, product.product.str) != 0)
963	{
964	cerr << "Error in parsing product line: '" << line.c_str() << "'" << endl;
965	return ERROR_IN_PARSE_LINE;
966	}
967	product.key = RT_MAKE_U32(product.productID, product.vendorID);
968	Assert(product.vendorID != 0);
969	g_products.push_back(product);
970	}
971	else
972	{
973	// Parse vendor line
974	if (ParseAlias(line, vendor.vendorID, vendor.vendor.str) != 0)
975	{
976	cerr << "Error in parsing vendor line: '"
977	<< line.c_str() << "'" << endl;
978	return ERROR_IN_PARSE_LINE;
979	}
980	g_vendors.push_back(vendor);
981	}
982	}
983	}
984	break;
985	}
986	}
987	}
988	if (state == State::lookForStartBlock)
989	{
990	cerr << "Error: wrong format of input file. Start line is not found." << endl;
991	return ERROR_WRONG_FILE_FORMAT;
992	}
993	return 0;
994	}
995
996
997	static int usage(ostream &rOut, const char *argv0)
998	{
999	rOut << "Usage: " << argv0
1000	<< " [linux.org usb list file] [custom usb list file] [-o output file]" << endl;
1001	return RTEXITCODE_SYNTAX;
1002	}
1003
1004	int main(int argc, char *argv[])
1005	{
1006	/*
1007	* Initialize IPRT and convert argv to UTF-8.
1008	*/
1009	int rc = RTR3InitExe(argc, &argv, 0);
1010	if (RT_FAILURE(rc))
1011	return RTMsgInitFailure(rc);
1012
1013	/*
1014	* Parse arguments and read input files.
1015	*/
1016	if (argc < 4)
1017	{
1018	usage(cerr, argv[0]);
1019	cerr << "Error: Not enough arguments." << endl;
1020	return RTEXITCODE_SYNTAX;
1021	}
1022	ofstream fout;
1023	PRTSTREAM fin;
1024	g_products.reserve(20000);
1025	g_vendors.reserve(3500);
1026
1027	const char *outName = NULL;
1028	for (int i = 1; i < argc; i++)
1029	{
1030	if (strcmp(argv[i], "-o") == 0)
1031	{
1032	outName = argv[++i];
1033	continue;
1034	}
1035	if ( strcmp(argv[i], "-h") == 0
1036	\|\| strcmp(argv[i], "-?") == 0
1037	\|\| strcmp(argv[i], "--help") == 0)
1038	{
1039	usage(cout, argv[0]);
1040	return RTEXITCODE_SUCCESS;
1041	}
1042
1043	rc = RTStrmOpen(argv[i], "r", &fin);
1044	if (RT_FAILURE(rc))
1045	{
1046	cerr << "Error: Failed to open file '" << argv[i] << "' for reading. rc=" << rc << endl;
1047	return ERROR_OPEN_FILE;
1048	}
1049
1050	rc = ParseUsbIds(fin);
1051	if (rc != 0)
1052	{
1053	cerr << "Error: Failed parsing USB devices file '" << argv[i] << "'" << endl;
1054	RTStrmClose(fin);
1055	return rc;
1056	}
1057	RTStrmClose(fin);
1058	}
1059
1060	/*
1061	* Due to USBIDDBVENDOR::iProduct, there is currently a max of 64KB products.
1062	* (Not a problem as we've only have less that 54K products currently.)
1063	*/
1064	if (g_products.size() > _64K)
1065	{
1066	cerr << "Error: More than 64K products is not supported (input: " << g_products.size() << ")" << endl;
1067	return ERROR_TOO_MANY_PRODUCTS;
1068	}
1069
1070	/*
1071	* Sort the IDs and fill in the iProduct and cProduct members.
1072	*/
1073	sort(g_products.begin(), g_products.end());
1074	sort(g_vendors.begin(), g_vendors.end());
1075
1076	size_t iProduct = 0;
1077	for (size_t iVendor = 0; iVendor < g_vendors.size(); iVendor++)
1078	{
1079	size_t const idVendor = g_vendors[iVendor].vendorID;
1080	g_vendors[iVendor].iProduct = iProduct;
1081	if ( iProduct < g_products.size()
1082	&& g_products[iProduct].vendorID <= idVendor)
1083	{
1084	if (g_products[iProduct].vendorID == idVendor)
1085	do
1086	iProduct++;
1087	while (g_products[iProduct].vendorID == idVendor);
1088	else
1089	{
1090	cerr << "Error: product without vendor after sorting. impossible!" << endl;
1091	return ERROR_IN_PARSE_LINE;
1092	}
1093	}
1094	g_vendors[iVendor].cProducts = iProduct - g_vendors[iVendor].iProduct;
1095	}
1096
1097	/*
1098	* Verify that all IDs are unique.
1099	*/
1100	ProductsSet::iterator ita = adjacent_find(g_products.begin(), g_products.end());
1101	if (ita != g_products.end())
1102	{
1103	cerr << "Error: Duplicate alias detected. " << *ita << endl;
1104	return ERROR_DUPLICATE_ENTRY;
1105	}
1106
1107	/*
1108	* Do string compression and create the string table.
1109	*/
1110	#ifdef USB_ID_DATABASE_WITH_COMPRESSION
1111	DoStringCompression();
1112	#endif
1113	CreateStringTable();
1114
1115	/*
1116	* Print stats.
1117	*/
1118	size_t const cbVendorEntry = sizeof(USBIdDatabase::s_aVendors[0]) + sizeof(USBIdDatabase::s_aVendorNames[0]);
1119	size_t const cbProductEntry = sizeof(USBIdDatabase::s_aProducts[0]) + sizeof(USBIdDatabase::s_aProductNames[0]);
1120
1121	size_t cbOldRaw = (g_products.size() + g_vendors.size()) * sizeof(const char ) 2 + g_cbRawStrings;
1122	size_t cbRaw = g_vendors.size() * cbVendorEntry + g_products.size() * cbProductEntry + g_cbRawStrings;
1123	size_t cbActual = g_vendors.size() * cbVendorEntry + g_products.size() * cbProductEntry + g_cchStrTab;
1124	#ifdef USB_ID_DATABASE_WITH_COMPRESSION
1125	cbActual += sizeof(USBIdDatabase::s_aCompDict);
1126	#endif
1127	cout << INFO_PREF "Total " << dec << cbActual << " bytes";
1128	if (cbActual < cbRaw)
1129	cout << " saving " << dec << ((cbRaw - cbActual) * 100 / cbRaw) << "% (" << (cbRaw - cbActual) << " bytes)";
1130	else
1131	cout << " wasting " << dec << (cbActual - cbRaw) << " bytes";
1132	cout << "; old version " << cbOldRaw << " bytes + relocs ("
1133	<< ((cbOldRaw - cbActual) * 100 / cbOldRaw) << "% save)." << endl;
1134
1135
1136	/*
1137	* Produce the source file.
1138	*/
1139	if (!outName)
1140	{
1141	cerr << "Error: Output file is not specified." << endl;
1142	return ERROR_OPEN_FILE;
1143	}
1144
1145	fout.open(outName);
1146	if (!fout.is_open())
1147	{
1148	cerr << "Error: Can not open file to write '" << outName << "'." << endl;
1149	return ERROR_OPEN_FILE;
1150	}
1151
1152	fout << header;
1153
1154	WriteStringTable(fout);
1155	#ifdef USB_ID_DATABASE_WITH_COMPRESSION
1156	WriteCompressionDictionary(fout);
1157	#endif
1158
1159	fout << product_header;
1160	for (ProductsSet::iterator itp = g_products.begin(); itp != g_products.end(); ++itp)
1161	fout << *itp;
1162	fout << product_part2;
1163	for (ProductsSet::iterator itp = g_products.begin(); itp != g_products.end(); ++itp)
1164	itp->product.printRefLine(fout);
1165	fout << product_footer;
1166
1167	fout << vendor_header;
1168	for (VendorsSet::iterator itv = g_vendors.begin(); itv != g_vendors.end(); ++itv)
1169	fout << *itv;
1170	fout << vendor_part2;
1171	for (VendorsSet::iterator itv = g_vendors.begin(); itv != g_vendors.end(); ++itv)
1172	itv->vendor.printRefLine(fout);
1173	fout << vendor_footer;
1174
1175	fout.close();
1176
1177
1178	return RTEXITCODE_SUCCESS;
1179	}
1180

Note: See TracBrowser for help on using the repository browser.

source: vbox/trunk/src/VBox/Main/src-server/USBIdDatabaseGenerator.cpp@ 58018

Download in other formats: