iprt

Timestamp:

Aug 30, 2022 1:04:57 AM (2 years ago)

Author:

vboxsync

Message:

iprt/bldprog-strtab*.cpp.h: Increased the size dictionary to make use of character codes needed for string encoding as well as slot 0xff in ASCII mode (otherwise used for UTF-8 sequence escaping), however slot zero is not yet usable. Improved the compression a bit further by consindering one separator character following a word, thus using 'VERR_' instead of 'VERR'. The the iprt defines-only header claims a 53% compression rate, up from 38%. bugref:9726

Location:

trunk/include/iprt

Files:

: 2 edited

bldprog-strtab-template.cpp.h (modified) (19 diffs)
bldprog-strtab.h (modified) (5 diffs)

Legend:

: Unmodified
: Added
: Removed

trunk/include/iprt/bldprog-strtab-template.cpp.h

-              r96407
+              r96550
 #ifdef BLDPROG_STRTAB_WITH_COMPRESSION
 # include <algorithm>
+# include <iprt/asm.h>
 # include <map>
 # include <iprt/sanitized/string>
+# include <vector>
+typedef std::map<std::string, size_t> BLDPROGWORDFREQMAP;
+typedef BLDPROGWORDFREQMAP::value_type BLDPROGWORDFREQPAIR;
+# define BLDPROG_STRTAB_WITH_WORD_SEP_ALTERNATIVE
+typedef struct BLDPROGWORDFREQSTATS
+{
+    uint32_t cWithoutSep;   /**< Number of occurances without a separator. */
+# ifdef BLDPROG_STRTAB_WITH_WORD_SEP_ALTERNATIVE
+    uint32_t cWithSep;      /**< Number of occurance with a separator. */
+    char     chSep;         /**< The separator.  First come basis. */
+# endif
+} BLDPROGWORDFREQSTATS;
+typedef std::map<std::string, BLDPROGWORDFREQSTATS> BLDPROGWORDFREQMAP;
 #endif
 …
 #ifdef BLDPROG_STRTAB_WITH_COMPRESSION
     /** The 127 words we've picked to be indexed by reference.  */
     BLDPROGSTRING       aCompDict[127];
     /** The frequency of the 127 dictionary entries.  */
     size_t              auCompDictFreq[127];
+    /** The 256 words we've picked to be indexed by reference. */
+    BLDPROGSTRING       aCompDict[256];
+    /** The frequency of the 256 dictionary entries.  */
+    size_t              auCompDictFreq[256];
     /** Incoming strings pending compression. */
     PBLDPROGSTRING     *papPendingStrings;
 …
      * @todo rewrite in plain C.  */
     BLDPROGWORDFREQMAP  Frequencies;
+    /** Map of characters used by input strings. */
+    uint64_t            bmUsedChars[256/64];
 #endif
 …
     pThis->cPendingStrings = 0;
     pThis->cMaxPendingStrings = cMaxStrings;
+    memset(pThis->bmUsedChars, 0, sizeof(pThis->bmUsedChars));
+    ASMBitSet(pThis->bmUsedChars, 0);    /* Some parts of the code still thinks zero is a terminator, so don't use it for now. */
+# ifndef BLDPROG_STRTAB_PURE_ASCII
+    ASMBitSet(pThis->bmUsedChars, 0xff); /* Reserve escape byte for codepoints above 127. */
+# endif
 #endif
 …
 static void bldProgStrTab_compressorAnalyzeString(PBLDPROGSTRTAB pThis, PBLDPROGSTRING pStr)
+{
+    /*
+     * Mark all the string characters as used.
+     */
     const char *psz = pStr->pszString;
+    char ch;
+    while ((ch = *psz++) != '\0')
+        ASMBitSet(pThis->bmUsedChars, (uint8_t)ch);
     /*
      * For now we just consider words.
      */
     char ch;
+    psz = pStr->pszString;
     while ((ch = *psz) != '\0')
+    {
 …
             BLDPROGWORDFREQMAP::iterator it = pThis->Frequencies.find(strWord);
             if (it != pThis->Frequencies.end())
+                it->second += cchWord - 1;
+            {
+# ifdef BLDPROG_STRTAB_WITH_WORD_SEP_ALTERNATIVE
+                char const chSep = psz[cchWord];
+                if (chSep != '\0' && (it->second.chSep == chSep || it->second.chSep == '\0'))
+                {
+                    it->second.chSep = chSep;
+                    it->second.cWithSep++;
+                }
+                else
+# endif
+                    it->second.cWithoutSep++;
+            }
             else
+                pThis->Frequencies[strWord] = 0;
+            /** @todo could gain hits by including the space after the word, but that
+             *        has the same accounting problems as the two words scenario below. */
+            {
+# ifdef BLDPROG_STRTAB_WITH_WORD_SEP_ALTERNATIVE
+                char const chSep = psz[cchWord];
+                if (chSep != '\0')
+                {
+                    BLDPROGWORDFREQSTATS NewWord = { 0, 0, chSep };
+                    pThis->Frequencies[strWord] = NewWord;
+                }
+                else
+# endif
+                {
+                    static BLDPROGWORDFREQSTATS s_NewWord = { 0 };
+                    pThis->Frequencies[strWord] = s_NewWord;
+                }
+            }
 # if 0 /** @todo need better accounting for overlapping alternatives before this can be enabled. */
 …
         /* Check for g_aWord matches. */
+        size_t cchMax = pszSrcEnd - pszSrc;
+        for (unsigned i = 0; i < RT_ELEMENTS(pThis->aCompDict); i++)
+        {
+            size_t cchLen = pThis->aCompDict[i].cchString;
+            if (   cchLen >= cchWord
+                && cchLen <= cchMax
+                && memcmp(pThis->aCompDict[i].pszString, pszSrc, cchLen) == 0)
+            {
+                *pszDst++ = (unsigned char)(0x80 | i);
+                pszSrc += cchLen;
+                cchWord = 0;
+                break;
+            }
+        }
+        if (cchWord)
+        if (cchWord > 1)
+        {
+            size_t cchMax = pszSrcEnd - pszSrc;
+            for (unsigned i = 0; i < RT_ELEMENTS(pThis->aCompDict); i++)
+            {
+                size_t cchLen = pThis->aCompDict[i].cchString;
+                if (   cchLen >= cchWord
+                    && cchLen <= cchMax
+                    && memcmp(pThis->aCompDict[i].pszString, pszSrc, cchLen) == 0)
+                {
+                    *pszDst++ = (unsigned char)i;
+                    pszSrc += cchLen;
+                    cchWord = 0;
+                    break;
+                }
+            }
+        }
+        if (cchWord > 0)
+        {
             /* Copy the current word. */
 …
 /**
+ * For sorting the frequency fidning in descending order.
+ *
+ * Comparison operators are put outside to make older gcc versions (like 4.1.1
+ * on lnx64-rel) happy.
+ */
+class WordFreqSortEntry
+ * Entry in SortedDictionary.
+ *
+ * Uses variable length string member, so not class and allocated via malloc.
+ */
+struct SortedDictionaryEntry
+{
+    size_t      m_cchGain;
+    size_t      m_cchString;
+    char        m_szString[RT_FLEXIBLE_ARRAY];
+    /** Allocates and initializes a new entry. */
+    static SortedDictionaryEntry *allocate(const char *a_pch, size_t a_cch, size_t a_cchGain, char a_chSep)
+    {
+        size_t cbString = a_cch + !!a_chSep + 1;
+        SortedDictionaryEntry *pNew = (SortedDictionaryEntry *)malloc(RT_UOFFSETOF(SortedDictionaryEntry, m_szString) + cbString);
+        if (pNew)
+        {
+            pNew->m_cchGain   = a_cchGain;
+            memcpy(pNew->m_szString, a_pch, a_cch);
+            if (a_chSep)
+                pNew->m_szString[a_cch++] = a_chSep;
+            pNew->m_szString[a_cch] = '\0';
+            pNew->m_cchString = a_cch;
+        }
+        return pNew;
+    }
+    /** Compares this dictionary entry with an incoming one.
+     * @retval -1 if this entry is of less worth than the new one.
+     * @retval  0 if this entry is of equal worth to the new one.
+     * @retval +1 if this entry is of more worth than the new one.
+     */
+    int compare(size_t a_cchGain, size_t a_cchString)
+    {
+        /* Higher gain is preferred of course: */
+        if (m_cchGain < a_cchGain)
+            return -1;
+        if (m_cchGain > a_cchGain)
+            return 1;
+        /* Gain is the same. Prefer the shorter string, as it will result in a shorter string table: */
+        if (m_cchString > a_cchString)
+            return -1;
+        if (m_cchString < a_cchString)
+            return 1;
+        return 0;
+    }
+};
+/**
+ * Insertion sort dictionary that keeps the 256 best words.
+ *
+ * Used by bldProgStrTab_compressorDoStringCompression to pick the dictionary
+ * words.
+ */
+class SortedDictionary
+{
 public:
+    BLDPROGWORDFREQPAIR const *m_pPair;
+public:
+    WordFreqSortEntry(BLDPROGWORDFREQPAIR const *pPair) : m_pPair(pPair) {}
+    size_t                  m_cEntries;
+    SortedDictionaryEntry  *m_apEntries[256];
+    SortedDictionary()
+        : m_cEntries(0)
+    {
+        for (size_t i = 0; i < RT_ELEMENTS(m_apEntries); i++)
+            m_apEntries[i] = NULL;
+    }
+    ~SortedDictionary()
+    {
+        while (m_cEntries > 0)
+        {
+            free(m_apEntries[--m_cEntries]);
+            m_apEntries[m_cEntries] = NULL;
+        }
+    }
+    /**
+     * Inserts a new entry, if it's worth it.
+     * @returns true on succes, false if out of memory.
+     */
+    bool insert(const char *a_pchString, size_t a_cchStringBase, size_t a_cchGain, char a_chSep = 0)
+    {
+        size_t const cchString = a_cchStringBase + (a_chSep + 1);
+        /*
+         * Drop the insert if the symbol table is full and the insert is less worth the last entry:
+         */
+        if (   m_cEntries >= RT_ELEMENTS(m_apEntries)
+            && m_apEntries[RT_ELEMENTS(m_apEntries) - 1]->compare(a_cchGain, cchString) >= 0)
+            return true;
+        /*
+         * Create a new entry to insert.
+         */
+        SortedDictionaryEntry *pNewEntry = SortedDictionaryEntry::allocate(a_pchString, a_cchStringBase, a_cchGain, a_chSep);
+        if (!pNewEntry)
+            return false;
+        /*
+         * Find the insert point.
+         */
+        if (m_cEntries == 0)
+        {
+            m_apEntries[0] = pNewEntry;
+            m_cEntries     = 1;
+        }
+        else
+        {
+            /* If table is full, drop the last entry before we start (already made
+               sure the incoming entry is preferable to the one were dropping): */
+            if (m_cEntries >= RT_ELEMENTS(m_apEntries))
+            {
+                free(m_apEntries[RT_ELEMENTS(m_apEntries) - 1]);
+                m_apEntries[RT_ELEMENTS(m_apEntries) - 1] = NULL;
+                m_cEntries = RT_ELEMENTS(m_apEntries) - 1;
+            }
+            /* Find where to insert the new entry: */
+            /** @todo use binary search. */
+            size_t i = m_cEntries;
+            while (i > 0 && m_apEntries[i - 1]->compare(a_cchGain, cchString) < 0)
+                i--;
+            /* Shift entries to make room and insert the new entry. */
+            if (i < m_cEntries)
+                memmove(&m_apEntries[i + 1], &m_apEntries[i], (m_cEntries - i) * sizeof(m_apEntries[0]));
+            m_apEntries[i] = pNewEntry;
+            m_cEntries++;
+        }
+        return true;
+    }
 };
-bool operator == (WordFreqSortEntry const &rLeft, WordFreqSortEntry const &rRight)
+{
-    return rLeft.m_pPair->second == rRight.m_pPair->second;
+}
-bool operator <  (WordFreqSortEntry const &rLeft, WordFreqSortEntry const &rRight)
+{
-    return rLeft.m_pPair->second >  rRight.m_pPair->second;
+}
 …
+{
     /*
+     * Sort the frequency analyzis result and pick the top 127 ones.
+     */
+    std::vector<WordFreqSortEntry> SortMap;
+     * Sort the frequency analyzis result and pick the top entries for any
+     * available dictionary slots.
+     */
+    SortedDictionary SortedDict;
     for (BLDPROGWORDFREQMAP::iterator it = pThis->Frequencies.begin(); it != pThis->Frequencies.end(); ++it)
+    {
+        BLDPROGWORDFREQPAIR const &rPair = *it;
+        SortMap.push_back(WordFreqSortEntry(&rPair));
+    }
+    sort(SortMap.begin(), SortMap.end());
+    size_t cb = 0;
+    size_t i  = 0;
+    for (std::vector<WordFreqSortEntry>::iterator it = SortMap.begin();
+         it != SortMap.end() && i < RT_ELEMENTS(pThis->aCompDict);
+         ++it, i++)
+    {
+        pThis->auCompDictFreq[i]      = it->m_pPair->second;
+        pThis->aCompDict[i].cchString = it->m_pPair->first.length();
+        bool fInsert;
+        size_t const cchString      = it->first.length();
+# ifndef BLDPROG_STRTAB_WITH_WORD_SEP_ALTERNATIVE
+        size_t const cchGainWithout = it->second.cWithoutSep * cchString;
+# else
+        size_t const cchGainWithout = (it->second.cWithoutSep + it->second.cWithSep) * cchString;
+        size_t const cchGainWith    = it->second.cWithSep * (cchString + 1);
+        if (cchGainWith > cchGainWithout)
+            fInsert = SortedDict.insert(it->first.c_str(), cchString, cchGainWith, it->second.chSep);
+        else
+# endif
+            fInsert = SortedDict.insert(it->first.c_str(), cchString, cchGainWithout);
+        if (!fInsert)
+            return false;
+    }
+    size_t cb     = 0;
+    size_t cWords = 0;
+    size_t iDict  = 0;
+    for (size_t i = 0; i < RT_ELEMENTS(pThis->aCompDict); i++)
+    {
+        char        szTmp[2] = { (char)i, '\0' };
+        const char *psz      = szTmp;
+        if (   ASMBitTest(pThis->bmUsedChars, (int32_t)i)
+            || iDict >= SortedDict.m_cEntries)
+        {
+            /* character entry  */
+            pThis->auCompDictFreq[i]      = 0;
+            pThis->aCompDict[i].cchString = 1;
+        }
+        else
+        {
+            /* word entry */
+            cb += SortedDict.m_apEntries[iDict]->m_cchGain;
+            pThis->auCompDictFreq[i]      = SortedDict.m_apEntries[iDict]->m_cchGain;
+            pThis->aCompDict[i].cchString = SortedDict.m_apEntries[iDict]->m_cchString;
+            psz = SortedDict.m_apEntries[iDict]->m_szString;
+            cWords++;
+            iDict++;
+        }
         pThis->aCompDict[i].pszString = (char *)malloc(pThis->aCompDict[i].cchString + 1);
         if (pThis->aCompDict[i].pszString)
             memcpy(pThis->aCompDict[i].pszString, it->m_pPair->first.c_str(), pThis->aCompDict[i].cchString + 1);
+            memcpy(pThis->aCompDict[i].pszString, psz, pThis->aCompDict[i].cchString + 1);
         else
             return false;
-        cb += it->m_pPair->second;
+    }
     if (fVerbose)
+        printf("debug: Estimated string compression saving: %u bytes\n", (unsigned)cb);
+        printf("debug: Estimated string compression saving: %u bytes\n"
+               "debug: %u words, %u characters\n"
+               , (unsigned)cb, (unsigned)cWords, (unsigned)(RT_ELEMENTS(pThis->aCompDict) - cWords));
     /*
 …
     size_t cchNewMax = 0;
     size_t cchNewMin = BLDPROG_STRTAB_MAX_STRLEN;
     i = pThis->cPendingStrings;
+    size_t i         = pThis->cPendingStrings;
     while (i-- > 0)
+    {
 …
      */
     for (unsigned i = 0; i < RT_ELEMENTS(pThis->aCompDict); i++)
+        bldProgStrTab_AddStringToHashTab(pThis, &pThis->aCompDict[i]);
+        if (pThis->aCompDict[i].cchString > 1)
+            bldProgStrTab_AddStringToHashTab(pThis, &pThis->aCompDict[i]);
+# ifdef RT_STRICT
+        else if (pThis->aCompDict[i].cchString != 1)
+            abort();
+# endif
 #endif
     if (fVerbose)
 …
     for (size_t i = 0; i < pThis->cSortedStrings; i++)
+    {
         PBLDPROGSTRING       pCur    = pThis->papSortedStrings[i];
+        PBLDPROGSTRING      pCur      = pThis->papSortedStrings[i];
         const char * const  pszCur    = pCur->pszString;
         size_t       const  cchCur    = pCur->cchString;
 …
     while ((uch = *psz++) != '\0')
+    {
+#ifdef BLDPROG_STRTAB_WITH_COMPRESSION
+        if (pThis->aCompDict[uch].cchString == 1)
+#else
         if (!(uch & 0x80))
+#endif
+        {
             if (uch != '\'' && uch != '\\')
 …
+        }
 #ifdef BLDPROG_STRTAB_WITH_COMPRESSION
+        else if (uch != 0xff)
+            fputs(pThis->aCompDict[uch & 0x7f].pszString, pOut);
+        else
+        {
+# ifdef BLDPROG_STRTAB_PURE_ASCII
+            abort();
+# else
+# ifndef BLDPROG_STRTAB_PURE_ASCII
+        else if (uch == 0xff)
+        {
             RTUNICP uc = RTStrGetCp((const char *)psz);
             psz += RTStrCpSize(uc);
             fprintf(pOut, "\\u%04x", uc);
+# endif
+        }
+        }
+# else
+        else
+            fputs(pThis->aCompDict[uch].pszString, pOut);
+# endif
 #else
         else
 …
 # ifdef BLDPROG_STRTAB_WITH_COMPRESSION
     for (unsigned i = 0; i < RT_ELEMENTS(pThis->aCompDict); i++)
+        BldProgStrTab_CheckStrTabString(pThis, &pThis->aCompDict[i]);
+    {
+        if (ASMBitTest(pThis->bmUsedChars, (int32_t)i)
+            ? pThis->aCompDict[i].cchString != 1 : pThis->aCompDict[i].cchString < 1)
+            abort();
+        if (pThis->aCompDict[i].cchString > 1)
+            BldProgStrTab_CheckStrTabString(pThis, &pThis->aCompDict[i]);
+    }
 # endif
 #endif
 …
     for (unsigned i = 0x7f; i < 0x100; i++)
         abCharCat[i] = 2;
+#ifdef BLDPROG_STRTAB_WITH_COMPRESSION
+    for (unsigned i = 0; i < 0x100; i++)
+        if (!ASMBitTest(pThis->bmUsedChars, (int32_t)i)) /* Encode table references using '\xYY'. */
+            abCharCat[i] = 2;
+#endif
     /*
 …
         if (offEnd > off)
+        {
             /* Comment with a uncompressed and more readable version of the string. */
+            /* Comment with an uncompressed and more readable version of the string. */
             if (off == pCur->offStrTab)
                 fprintf(pOut, "/* 0x%05x = \"", off);
 …
             pszBaseName, (unsigned)RT_ELEMENTS(pThis->aCompDict));
     for (unsigned i = 0; i < RT_ELEMENTS(pThis->aCompDict); i++)
+        fprintf(pOut, "    { %#08x, %#04x }, // %6lu - %s\n",
+                pThis->aCompDict[i].offStrTab, (unsigned)pThis->aCompDict[i].cchString,
+                (unsigned long)pThis->auCompDictFreq[i], pThis->aCompDict[i].pszString);
+        if (pThis->aCompDict[i].cchString > 1)
+            fprintf(pOut, "    /*[%3u]=*/ { %#08x, %#04x }, // %6lu - %s\n", i,
+                    pThis->aCompDict[i].offStrTab, (unsigned)pThis->aCompDict[i].cchString,
+                    (unsigned long)pThis->auCompDictFreq[i], pThis->aCompDict[i].pszString);
+# ifndef BLDPROG_STRTAB_PURE_ASCII
+        else if (i == 0xff)
+            fprintf(pOut, "    /*[%3u]=*/ { 0x000000, 0x00 }, // UTF-8 escape\n", i);
+# endif
+        else if (i == 0)
+            fprintf(pOut, "    /*[%3u]=*/ { 0x000000, 0x00 }, // unused, because zero terminator\n", i);
+        else if (i < 0x20)
+            fprintf(pOut, "    /*[%3u]=*/ { 0x000000, 0x00 }, // %02x\n", i, i);
+        else
+            fprintf(pOut, "    /*[%3u]=*/ { 0x000000, 0x00 }, // '%c'\n", i,  (char)i);
     fprintf(pOut, "};\n\n");
 #endif
 …
             "    /*.paCompDict = */ &g_aCompDict%s[0]\n"
             "};\n"
+            , (unsigned)RT_ELEMENTS(pThis->aCompDict), pszBaseName);
+# ifndef BLDPROG_STRTAB_PURE_ASCII /* 255 or 256 entries is how the decoder knows  */
+            , (unsigned)RT_ELEMENTS(pThis->aCompDict) - 1,
+# else
+            , (unsigned)RT_ELEMENTS(pThis->aCompDict),
+# endif
+            pszBaseName);
 #else
     fprintf(pOut,

trunk/include/iprt/bldprog-strtab.h

-              r96407
+              r96550
     const char         *pchStrTab;
     uint32_t            cchStrTab;
     uint8_t             cCompDict;
+    uint32_t            cCompDict;
     PCRTBLDPROGSTRREF   paCompDict;
 } RTBLDPROGSTRTAB;
 …
     if (pStrTab->cCompDict)
+    {
+        Assert(pStrTab->cCompDict == 256 || pStrTab->cCompDict == 255);
         /*
          * Could be compressed, decompress it.
+         * Is compressed, decompress it.
          */
         char * const pchDstStart = pszDst;
 …
+        {
             unsigned char uch = *(unsigned char *)pchSrc++;
+            if (!(uch & 0x80))
+            {
+                /*
+                 * Plain text.
+                 */
+                AssertReturn(cbDst > 1, RTBldProgStrTabQueryStringFail(VERR_BUFFER_OVERFLOW, pchDstStart, pszDst, cbDst));
+                cbDst    -= 1;
+                *pszDst++ = (char)uch;
+                Assert(uch != 0);
+            }
+            else if (uch != 0xff)
+            {
+                /*
+                 * Dictionary reference. (No UTF-8 unescaping necessary here.)
+                 */
+                PCRTBLDPROGSTRREF   pWord   = &pStrTab->paCompDict[uch & 0x7f];
+            if (uch != 0xff || pStrTab->cCompDict > 0xff)
+            {
+                /*
+                 * Look it up in the dictionary, either a single 7-bit character or a word.
+                 * Either way, no UTF-8 unescaping necessary.
+                 */
+                PCRTBLDPROGSTRREF   pWord   = &pStrTab->paCompDict[uch];
                 size_t const        cchWord = pWord->cch;
+                AssertReturn((size_t)pWord->off + cchWord <= pStrTab->cchStrTab,
+                             RTBldProgStrTabQueryStringFail(VERR_INVALID_PARAMETER, pchDstStart, pszDst, cbDst));
+                AssertReturn(cbDst > cchWord,
+                             RTBldProgStrTabQueryStringFail(VERR_BUFFER_OVERFLOW, pchDstStart, pszDst, cbDst));
+                memcpy(pszDst, &pStrTab->pchStrTab[pWord->off], cchWord);
+                pszDst += cchWord;
+                cbDst  -= cchWord;
+                if (cchWord <= 1)
+                {
+                    Assert(uch != 0);
+                    Assert(uch <= 127);
+                    AssertReturn(cbDst > 1, RTBldProgStrTabQueryStringFail(VERR_BUFFER_OVERFLOW, pchDstStart, pszDst, cbDst));
+                    cbDst    -= 1;
+                    *pszDst++ = (char)uch;
+                }
+                else
+                {
+                    Assert(cchWord > 1);
+                    AssertReturn((size_t)pWord->off + cchWord <= pStrTab->cchStrTab,
+                                 RTBldProgStrTabQueryStringFail(VERR_INVALID_PARAMETER, pchDstStart, pszDst, cbDst));
+                    AssertReturn(cbDst > cchWord,
+                                 RTBldProgStrTabQueryStringFail(VERR_BUFFER_OVERFLOW, pchDstStart, pszDst, cbDst));
+                    memcpy(pszDst, &pStrTab->pchStrTab[pWord->off], cchWord);
+                    pszDst += cchWord;
+                    cbDst  -= cchWord;
+                }
+            }
             else
 …
     if (pStrTab->cCompDict)
+    {
+        Assert(pStrTab->cCompDict == 256 || pStrTab->cCompDict == 255);
         /*
          * Could be compressed, decompress it.
 …
+        {
             unsigned char uch = *(unsigned char *)pchSrc++;
+            if (!(uch & 0x80))
+            {
+                /*
+                 * Plain text.
+                 */
+                Assert(uch != 0);
+                cchRet += pfnOutput(pvArgOutput, (const char *)&uch, 1);
+            }
+            else if (uch != 0xff)
+            {
+                /*
+                 * Dictionary reference. (No UTF-8 unescaping necessary here.)
+                 */
+                PCRTBLDPROGSTRREF   pWord   = &pStrTab->paCompDict[uch & 0x7f];
+            if (uch != 0xff || pStrTab->cCompDict > 0xff)
+            {
+                /*
+                 * Look it up in the dictionary, either a single 7-bit character or a word.
+                 * Either way, no UTF-8 unescaping necessary.
+                 */
+                PCRTBLDPROGSTRREF   pWord   = &pStrTab->paCompDict[uch];
                 size_t const        cchWord = pWord->cch;
+                AssertReturn((size_t)pWord->off + cchWord <= pStrTab->cchStrTab, cchRet);
+                cchRet += pfnOutput(pvArgOutput, &pStrTab->pchStrTab[pWord->off], cchWord);
+                if (cchWord <= 1)
+                {
+                    Assert(uch != 0);
+                    Assert(uch <= 127);
+                    cchRet += pfnOutput(pvArgOutput, (const char *)&uch, 1);
+                }
+                else
+                {
+                    Assert(cchWord > 1);
+                    AssertReturn((size_t)pWord->off + cchWord <= pStrTab->cchStrTab, cchRet);
+                    cchRet += pfnOutput(pvArgOutput, &pStrTab->pchStrTab[pWord->off], cchWord);
+                }
+            }
             else

Note: See TracChangeset for help on using the changeset viewer.

Changeset 96550 in vbox for trunk/include/iprt

Legend:

trunk/include/iprt/bldprog-strtab-template.cpp.h

trunk/include/iprt/bldprog-strtab.h

Download in other formats: