VirtualBox

source: vbox/trunk/include/iprt/bldprog-strtab.h@ 97967

Last change on this file since 97967 was 96550, checked in by vboxsync, 2 years ago

iprt/bldprog-strtab*.cpp.h: Increased the size dictionary to make use of character codes needed for string encoding as well as slot 0xff in ASCII mode (otherwise used for UTF-8 sequence escaping), however slot zero is not yet usable. Improved the compression a bit further by consindering one separator character following a word, thus using 'VERR_' instead of 'VERR'. The the iprt defines-only header claims a 53% compression rate, up from 38%. bugref:9726

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 8.8 KB
Line 
1/** @file
2 * IPRT - Build Program - String Table Generator, Accessors.
3 */
4
5/*
6 * Copyright (C) 2006-2022 Oracle and/or its affiliates.
7 *
8 * This file is part of VirtualBox base platform packages, as
9 * available from https://www.virtualbox.org.
10 *
11 * This program is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU General Public License
13 * as published by the Free Software Foundation, in version 3 of the
14 * License.
15 *
16 * This program is distributed in the hope that it will be useful, but
17 * WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * General Public License for more details.
20 *
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, see <https://www.gnu.org/licenses>.
23 *
24 * The contents of this file may alternatively be used under the terms
25 * of the Common Development and Distribution License Version 1.0
26 * (CDDL), a copy of it is provided in the "COPYING.CDDL" file included
27 * in the VirtualBox distribution, in which case the provisions of the
28 * CDDL are applicable instead of those of the GPL.
29 *
30 * You may elect to license modified versions of this file under the
31 * terms and conditions of either the GPL or the CDDL or both.
32 *
33 * SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0
34 */
35
36#ifndef IPRT_INCLUDED_bldprog_strtab_h
37#define IPRT_INCLUDED_bldprog_strtab_h
38#ifndef RT_WITHOUT_PRAGMA_ONCE
39# pragma once
40#endif
41
42#include <iprt/assert.h>
43#include <iprt/err.h>
44#include <iprt/string.h>
45
46
47/**
48 * The default build program string table reference.
49 */
50typedef struct RTBLDPROGSTRREF
51{
52 /** Offset of the string in the string table. */
53 uint32_t off : 22;
54 /** The length of the string. */
55 uint32_t cch : 10;
56} RTBLDPROGSTRREF;
57AssertCompileSize(RTBLDPROGSTRREF, sizeof(uint32_t));
58/** Pointer to a build program string table reference. */
59typedef RTBLDPROGSTRREF const *PCRTBLDPROGSTRREF;
60
61
62typedef struct RTBLDPROGSTRTAB
63{
64 const char *pchStrTab;
65 uint32_t cchStrTab;
66 uint32_t cCompDict;
67 PCRTBLDPROGSTRREF paCompDict;
68} RTBLDPROGSTRTAB;
69typedef const RTBLDPROGSTRTAB *PCRTBLDPROGSTRTAB;
70
71
72/**
73 * Tries to ensure the buffer is terminated when failing.
74 */
75DECLINLINE(ssize_t) RTBldProgStrTabQueryStringFail(int rc, char *pszDstStart, char *pszDst, size_t cbDst)
76{
77 if (cbDst)
78 *pszDst = '\0';
79 else if (pszDstStart != pszDst)
80 pszDst[-1] = '\0';
81 return rc;
82}
83
84
85/**
86 * Retrieves the decompressed string.
87 *
88 * @returns The string size on success, IPRT status code on failure.
89 * @param pStrTab The string table.
90 * @param offString The offset of the string.
91 * @param cchString The length of the string.
92 * @param pszDst The return buffer.
93 * @param cbDst The size of the return buffer.
94 */
95DECLINLINE(ssize_t) RTBldProgStrTabQueryString(PCRTBLDPROGSTRTAB pStrTab, uint32_t offString, size_t cchString,
96 char *pszDst, size_t cbDst)
97{
98 AssertReturn(offString < pStrTab->cchStrTab, VERR_OUT_OF_RANGE);
99 AssertReturn(offString + cchString <= pStrTab->cchStrTab, VERR_OUT_OF_RANGE);
100
101 if (pStrTab->cCompDict)
102 {
103 Assert(pStrTab->cCompDict == 256 || pStrTab->cCompDict == 255);
104
105 /*
106 * Is compressed, decompress it.
107 */
108 char * const pchDstStart = pszDst;
109 const char *pchSrc = &pStrTab->pchStrTab[offString];
110 while (cchString-- > 0)
111 {
112 unsigned char uch = *(unsigned char *)pchSrc++;
113 if (uch != 0xff || pStrTab->cCompDict > 0xff)
114 {
115 /*
116 * Look it up in the dictionary, either a single 7-bit character or a word.
117 * Either way, no UTF-8 unescaping necessary.
118 */
119 PCRTBLDPROGSTRREF pWord = &pStrTab->paCompDict[uch];
120 size_t const cchWord = pWord->cch;
121 if (cchWord <= 1)
122 {
123 Assert(uch != 0);
124 Assert(uch <= 127);
125 AssertReturn(cbDst > 1, RTBldProgStrTabQueryStringFail(VERR_BUFFER_OVERFLOW, pchDstStart, pszDst, cbDst));
126 cbDst -= 1;
127 *pszDst++ = (char)uch;
128 }
129 else
130 {
131 Assert(cchWord > 1);
132 AssertReturn((size_t)pWord->off + cchWord <= pStrTab->cchStrTab,
133 RTBldProgStrTabQueryStringFail(VERR_INVALID_PARAMETER, pchDstStart, pszDst, cbDst));
134 AssertReturn(cbDst > cchWord,
135 RTBldProgStrTabQueryStringFail(VERR_BUFFER_OVERFLOW, pchDstStart, pszDst, cbDst));
136 memcpy(pszDst, &pStrTab->pchStrTab[pWord->off], cchWord);
137 pszDst += cchWord;
138 cbDst -= cchWord;
139 }
140 }
141 else
142 {
143 /*
144 * UTF-8 encoded unicode codepoint.
145 */
146 size_t cchCp;
147 RTUNICP uc = ' ';
148 int rc = RTStrGetCpNEx(&pchSrc, &cchString, &uc);
149 AssertStmt(RT_SUCCESS(rc), (uc = '?', pchSrc++, cchString--));
150
151 cchCp = RTStrCpSize(uc);
152 AssertReturn(cbDst > cchCp,
153 RTBldProgStrTabQueryStringFail(VERR_BUFFER_OVERFLOW, pchDstStart, pszDst, cbDst));
154
155 RTStrPutCp(pszDst, uc);
156 pszDst += cchCp;
157 cbDst -= cchCp;
158 }
159 }
160 AssertReturn(cbDst > 0, RTBldProgStrTabQueryStringFail(VERR_BUFFER_OVERFLOW, pchDstStart, pszDst, cbDst));
161 *pszDst = '\0';
162 return pszDst - pchDstStart;
163 }
164
165 /*
166 * Not compressed.
167 */
168 if (cbDst > cchString)
169 {
170 memcpy(pszDst, &pStrTab->pchStrTab[offString], cchString);
171 pszDst[cchString] = '\0';
172 return (ssize_t)cchString;
173 }
174 if (cbDst > 0)
175 {
176 memcpy(pszDst, &pStrTab->pchStrTab[offString], cbDst - 1);
177 pszDst[cbDst - 1] = '\0';
178 }
179 return VERR_BUFFER_OVERFLOW;
180}
181
182
183/**
184 * Outputs the decompressed string.
185 *
186 * @returns The sum of the pfnOutput return values.
187 * @param pStrTab The string table.
188 * @param offString The offset of the string.
189 * @param cchString The length of the string.
190 * @param pfnOutput The output function.
191 * @param pvArgOutput The argument to pass to the output function.
192 *
193 */
194DECLINLINE(size_t) RTBldProgStrTabQueryOutput(PCRTBLDPROGSTRTAB pStrTab, uint32_t offString, size_t cchString,
195 PFNRTSTROUTPUT pfnOutput, void *pvArgOutput)
196{
197 AssertReturn(offString < pStrTab->cchStrTab, 0);
198 AssertReturn(offString + cchString <= pStrTab->cchStrTab, 0);
199
200 if (pStrTab->cCompDict)
201 {
202 Assert(pStrTab->cCompDict == 256 || pStrTab->cCompDict == 255);
203
204 /*
205 * Could be compressed, decompress it.
206 */
207 size_t cchRet = 0;
208 const char *pchSrc = &pStrTab->pchStrTab[offString];
209 while (cchString-- > 0)
210 {
211 unsigned char uch = *(unsigned char *)pchSrc++;
212 if (uch != 0xff || pStrTab->cCompDict > 0xff)
213 {
214 /*
215 * Look it up in the dictionary, either a single 7-bit character or a word.
216 * Either way, no UTF-8 unescaping necessary.
217 */
218 PCRTBLDPROGSTRREF pWord = &pStrTab->paCompDict[uch];
219 size_t const cchWord = pWord->cch;
220 if (cchWord <= 1)
221 {
222 Assert(uch != 0);
223 Assert(uch <= 127);
224 cchRet += pfnOutput(pvArgOutput, (const char *)&uch, 1);
225 }
226 else
227 {
228 Assert(cchWord > 1);
229 AssertReturn((size_t)pWord->off + cchWord <= pStrTab->cchStrTab, cchRet);
230
231 cchRet += pfnOutput(pvArgOutput, &pStrTab->pchStrTab[pWord->off], cchWord);
232 }
233 }
234 else
235 {
236 /*
237 * UTF-8 encoded unicode codepoint.
238 */
239 const char * const pchUtf8Seq = pchSrc;
240 RTUNICP uc = ' ';
241 int rc = RTStrGetCpNEx(&pchSrc, &cchString, &uc);
242 if (RT_SUCCESS(rc))
243 cchRet += pfnOutput(pvArgOutput, pchUtf8Seq, (size_t)(pchSrc - pchUtf8Seq));
244 else
245 cchRet += pfnOutput(pvArgOutput, "?", 1);
246 }
247 }
248 return cchRet;
249 }
250
251 /*
252 * Not compressed.
253 */
254 return pfnOutput(pvArgOutput, &pStrTab->pchStrTab[offString], cchString);
255}
256
257
258#endif /* !IPRT_INCLUDED_bldprog_strtab_h */
259
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette