1 | // SPDX-License-Identifier: 0BSD
|
---|
2 |
|
---|
3 | ///////////////////////////////////////////////////////////////////////////////
|
---|
4 | //
|
---|
5 | /// \file lz_decoder.h
|
---|
6 | /// \brief LZ out window
|
---|
7 | ///
|
---|
8 | // Authors: Igor Pavlov
|
---|
9 | // Lasse Collin
|
---|
10 | //
|
---|
11 | ///////////////////////////////////////////////////////////////////////////////
|
---|
12 |
|
---|
13 | #ifndef LZMA_LZ_DECODER_H
|
---|
14 | #define LZMA_LZ_DECODER_H
|
---|
15 |
|
---|
16 | #include "common.h"
|
---|
17 |
|
---|
18 | #ifdef HAVE_IMMINTRIN_H
|
---|
19 | # include <immintrin.h>
|
---|
20 | #endif
|
---|
21 |
|
---|
22 |
|
---|
23 | // dict_repeat() implementation variant:
|
---|
24 | // 0 = Byte-by-byte copying only.
|
---|
25 | // 1 = Use memcpy() for non-overlapping copies.
|
---|
26 | // 2 = Use x86 SSE2 for non-overlapping copies.
|
---|
27 | #ifndef LZMA_LZ_DECODER_CONFIG
|
---|
28 | # if defined(TUKLIB_FAST_UNALIGNED_ACCESS) \
|
---|
29 | && defined(HAVE_IMMINTRIN_H) \
|
---|
30 | && (defined(__SSE2__) || defined(_M_X64) \
|
---|
31 | || (defined(_M_IX86_FP) && _M_IX86_FP >= 2))
|
---|
32 | # define LZMA_LZ_DECODER_CONFIG 2
|
---|
33 | # else
|
---|
34 | # define LZMA_LZ_DECODER_CONFIG 1
|
---|
35 | # endif
|
---|
36 | #endif
|
---|
37 |
|
---|
38 | /// Byte-by-byte and memcpy() copy exactly the amount needed. Other methods
|
---|
39 | /// can copy up to LZ_DICT_EXTRA bytes more than requested, and this amount
|
---|
40 | /// of extra space is needed at the end of the allocated dictionary buffer.
|
---|
41 | ///
|
---|
42 | /// NOTE: If this is increased, update LZMA_DICT_REPEAT_MAX too.
|
---|
43 | #if LZMA_LZ_DECODER_CONFIG >= 2
|
---|
44 | # define LZ_DICT_EXTRA 32
|
---|
45 | #else
|
---|
46 | # define LZ_DICT_EXTRA 0
|
---|
47 | #endif
|
---|
48 |
|
---|
49 | /// Maximum number of bytes that dict_repeat() may copy. The allocated
|
---|
50 | /// dictionary buffer will be 2 * LZ_DICT_REPEAT_MAX + LZMA_DICT_EXTRA bytes
|
---|
51 | /// larger than the actual dictionary size:
|
---|
52 | ///
|
---|
53 | /// (1) Every time the decoder reaches the end of the dictionary buffer,
|
---|
54 | /// the last LZ_DICT_REPEAT_MAX bytes will be copied to the beginning.
|
---|
55 | /// This way dict_repeat() will only need to copy from one place,
|
---|
56 | /// never from both the end and beginning of the buffer.
|
---|
57 | ///
|
---|
58 | /// (2) The other LZ_DICT_REPEAT_MAX bytes is kept as a buffer between
|
---|
59 | /// the oldest byte still in the dictionary and the current write
|
---|
60 | /// position. This way dict_repeat() with the maximum valid distance
|
---|
61 | /// won't need memmove() as the copying cannot overlap.
|
---|
62 | ///
|
---|
63 | /// (3) LZ_DICT_EXTRA bytes are required at the end of the dictionary buffer
|
---|
64 | /// so that extra copying done by dict_repeat() won't write or read past
|
---|
65 | /// the end of the allocated buffer. This amount is *not* counted as part
|
---|
66 | /// of lzma_dict.size.
|
---|
67 | ///
|
---|
68 | /// Note that memcpy() still cannot be used if distance < len.
|
---|
69 | ///
|
---|
70 | /// LZMA's longest match length is 273 bytes. The LZMA decoder looks at
|
---|
71 | /// the lowest four bits of the dictionary position, thus 273 must be
|
---|
72 | /// rounded up to the next multiple of 16 (288). In addition, optimized
|
---|
73 | /// dict_repeat() copies 32 bytes at a time, thus this must also be
|
---|
74 | /// a multiple of 32.
|
---|
75 | #define LZ_DICT_REPEAT_MAX 288
|
---|
76 |
|
---|
77 | /// Initial position in lzma_dict.buf when the dictionary is empty.
|
---|
78 | #define LZ_DICT_INIT_POS (2 * LZ_DICT_REPEAT_MAX)
|
---|
79 |
|
---|
80 |
|
---|
81 | typedef struct {
|
---|
82 | /// Pointer to the dictionary buffer.
|
---|
83 | uint8_t *buf;
|
---|
84 |
|
---|
85 | /// Write position in dictionary. The next byte will be written to
|
---|
86 | /// buf[pos].
|
---|
87 | size_t pos;
|
---|
88 |
|
---|
89 | /// Indicates how full the dictionary is. This is used by
|
---|
90 | /// dict_is_distance_valid() to detect corrupt files that would
|
---|
91 | /// read beyond the beginning of the dictionary.
|
---|
92 | size_t full;
|
---|
93 |
|
---|
94 | /// Write limit
|
---|
95 | size_t limit;
|
---|
96 |
|
---|
97 | /// Allocated size of buf. This is 2 * LZ_DICT_REPEAT_MAX bytes
|
---|
98 | /// larger than the actual dictionary size. This is enforced by
|
---|
99 | /// how the value for "full" is set; it can be at most
|
---|
100 | /// "size - 2 * LZ_DICT_REPEAT_MAX".
|
---|
101 | size_t size;
|
---|
102 |
|
---|
103 | /// True once the dictionary has become full and the writing position
|
---|
104 | /// has been wrapped in decode_buffer() in lz_decoder.c.
|
---|
105 | bool has_wrapped;
|
---|
106 |
|
---|
107 | /// True when dictionary should be reset before decoding more data.
|
---|
108 | bool need_reset;
|
---|
109 |
|
---|
110 | } lzma_dict;
|
---|
111 |
|
---|
112 |
|
---|
113 | typedef struct {
|
---|
114 | size_t dict_size;
|
---|
115 | const uint8_t *preset_dict;
|
---|
116 | size_t preset_dict_size;
|
---|
117 | } lzma_lz_options;
|
---|
118 |
|
---|
119 |
|
---|
120 | typedef struct {
|
---|
121 | /// Data specific to the LZ-based decoder
|
---|
122 | void *coder;
|
---|
123 |
|
---|
124 | /// Function to decode from in[] to *dict
|
---|
125 | lzma_ret (*code)(void *coder,
|
---|
126 | lzma_dict *restrict dict, const uint8_t *restrict in,
|
---|
127 | size_t *restrict in_pos, size_t in_size);
|
---|
128 |
|
---|
129 | void (*reset)(void *coder, const void *options);
|
---|
130 |
|
---|
131 | /// Set the uncompressed size. If uncompressed_size == LZMA_VLI_UNKNOWN
|
---|
132 | /// then allow_eopm will always be true.
|
---|
133 | void (*set_uncompressed)(void *coder, lzma_vli uncompressed_size,
|
---|
134 | bool allow_eopm);
|
---|
135 |
|
---|
136 | /// Free allocated resources
|
---|
137 | void (*end)(void *coder, const lzma_allocator *allocator);
|
---|
138 |
|
---|
139 | } lzma_lz_decoder;
|
---|
140 |
|
---|
141 |
|
---|
142 | #define LZMA_LZ_DECODER_INIT \
|
---|
143 | (lzma_lz_decoder){ \
|
---|
144 | .coder = NULL, \
|
---|
145 | .code = NULL, \
|
---|
146 | .reset = NULL, \
|
---|
147 | .set_uncompressed = NULL, \
|
---|
148 | .end = NULL, \
|
---|
149 | }
|
---|
150 |
|
---|
151 |
|
---|
152 | extern lzma_ret lzma_lz_decoder_init(lzma_next_coder *next,
|
---|
153 | const lzma_allocator *allocator,
|
---|
154 | const lzma_filter_info *filters,
|
---|
155 | lzma_ret (*lz_init)(lzma_lz_decoder *lz,
|
---|
156 | const lzma_allocator *allocator,
|
---|
157 | lzma_vli id, const void *options,
|
---|
158 | lzma_lz_options *lz_options));
|
---|
159 |
|
---|
160 | extern uint64_t lzma_lz_decoder_memusage(size_t dictionary_size);
|
---|
161 |
|
---|
162 |
|
---|
163 | //////////////////////
|
---|
164 | // Inline functions //
|
---|
165 | //////////////////////
|
---|
166 |
|
---|
167 | /// Get a byte from the history buffer.
|
---|
168 | static inline uint8_t
|
---|
169 | dict_get(const lzma_dict *const dict, const uint32_t distance)
|
---|
170 | {
|
---|
171 | return dict->buf[dict->pos - distance - 1
|
---|
172 | + (distance < dict->pos
|
---|
173 | ? 0 : dict->size - LZ_DICT_REPEAT_MAX)];
|
---|
174 | }
|
---|
175 |
|
---|
176 |
|
---|
177 | /// Optimized version of dict_get(dict, 0)
|
---|
178 | static inline uint8_t
|
---|
179 | dict_get0(const lzma_dict *const dict)
|
---|
180 | {
|
---|
181 | return dict->buf[dict->pos - 1];
|
---|
182 | }
|
---|
183 |
|
---|
184 |
|
---|
185 | /// Test if dictionary is empty.
|
---|
186 | static inline bool
|
---|
187 | dict_is_empty(const lzma_dict *const dict)
|
---|
188 | {
|
---|
189 | return dict->full == 0;
|
---|
190 | }
|
---|
191 |
|
---|
192 |
|
---|
193 | /// Validate the match distance
|
---|
194 | static inline bool
|
---|
195 | dict_is_distance_valid(const lzma_dict *const dict, const size_t distance)
|
---|
196 | {
|
---|
197 | return dict->full > distance;
|
---|
198 | }
|
---|
199 |
|
---|
200 |
|
---|
201 | /// Repeat *len bytes at distance.
|
---|
202 | static inline bool
|
---|
203 | dict_repeat(lzma_dict *restrict dict,
|
---|
204 | uint32_t distance, uint32_t *restrict len)
|
---|
205 | {
|
---|
206 | // Don't write past the end of the dictionary.
|
---|
207 | const size_t dict_avail = dict->limit - dict->pos;
|
---|
208 | uint32_t left = my_min(dict_avail, *len);
|
---|
209 | *len -= left;
|
---|
210 |
|
---|
211 | size_t back = dict->pos - distance - 1;
|
---|
212 | if (distance >= dict->pos)
|
---|
213 | back += dict->size - LZ_DICT_REPEAT_MAX;
|
---|
214 |
|
---|
215 | #if LZMA_LZ_DECODER_CONFIG == 0
|
---|
216 | // Minimal byte-by-byte method. This might be the least bad choice
|
---|
217 | // if memcpy() isn't fast and there's no replacement for it below.
|
---|
218 | while (left-- > 0) {
|
---|
219 | dict->buf[dict->pos++] = dict->buf[back++];
|
---|
220 | }
|
---|
221 |
|
---|
222 | #else
|
---|
223 | // Because memcpy() or a similar method can be faster than copying
|
---|
224 | // byte by byte in a loop, the copying process is split into
|
---|
225 | // two cases.
|
---|
226 | if (distance < left) {
|
---|
227 | // Source and target areas overlap, thus we can't use
|
---|
228 | // memcpy() nor even memmove() safely.
|
---|
229 | do {
|
---|
230 | dict->buf[dict->pos++] = dict->buf[back++];
|
---|
231 | } while (--left > 0);
|
---|
232 | } else {
|
---|
233 | # if LZMA_LZ_DECODER_CONFIG == 1
|
---|
234 | memcpy(dict->buf + dict->pos, dict->buf + back, left);
|
---|
235 | dict->pos += left;
|
---|
236 |
|
---|
237 | # elif LZMA_LZ_DECODER_CONFIG == 2
|
---|
238 | // This can copy up to 32 bytes more than required.
|
---|
239 | // (If left == 0, we still copy 32 bytes.)
|
---|
240 | size_t pos = dict->pos;
|
---|
241 | dict->pos += left;
|
---|
242 | do {
|
---|
243 | const __m128i x0 = _mm_loadu_si128(
|
---|
244 | (__m128i *)(dict->buf + back));
|
---|
245 | const __m128i x1 = _mm_loadu_si128(
|
---|
246 | (__m128i *)(dict->buf + back + 16));
|
---|
247 | back += 32;
|
---|
248 | _mm_storeu_si128(
|
---|
249 | (__m128i *)(dict->buf + pos), x0);
|
---|
250 | _mm_storeu_si128(
|
---|
251 | (__m128i *)(dict->buf + pos + 16), x1);
|
---|
252 | pos += 32;
|
---|
253 | } while (pos < dict->pos);
|
---|
254 |
|
---|
255 | # else
|
---|
256 | # error "Invalid LZMA_LZ_DECODER_CONFIG value"
|
---|
257 | # endif
|
---|
258 | }
|
---|
259 | #endif
|
---|
260 |
|
---|
261 | // Update how full the dictionary is.
|
---|
262 | if (!dict->has_wrapped)
|
---|
263 | dict->full = dict->pos - LZ_DICT_INIT_POS;
|
---|
264 |
|
---|
265 | return *len != 0;
|
---|
266 | }
|
---|
267 |
|
---|
268 |
|
---|
269 | static inline void
|
---|
270 | dict_put(lzma_dict *restrict dict, uint8_t byte)
|
---|
271 | {
|
---|
272 | dict->buf[dict->pos++] = byte;
|
---|
273 |
|
---|
274 | if (!dict->has_wrapped)
|
---|
275 | dict->full = dict->pos - LZ_DICT_INIT_POS;
|
---|
276 | }
|
---|
277 |
|
---|
278 |
|
---|
279 | /// Puts one byte into the dictionary. Returns true if the dictionary was
|
---|
280 | /// already full and the byte couldn't be added.
|
---|
281 | static inline bool
|
---|
282 | dict_put_safe(lzma_dict *restrict dict, uint8_t byte)
|
---|
283 | {
|
---|
284 | if (unlikely(dict->pos == dict->limit))
|
---|
285 | return true;
|
---|
286 |
|
---|
287 | dict_put(dict, byte);
|
---|
288 | return false;
|
---|
289 | }
|
---|
290 |
|
---|
291 |
|
---|
292 | /// Copies arbitrary amount of data into the dictionary.
|
---|
293 | static inline void
|
---|
294 | dict_write(lzma_dict *restrict dict, const uint8_t *restrict in,
|
---|
295 | size_t *restrict in_pos, size_t in_size,
|
---|
296 | size_t *restrict left)
|
---|
297 | {
|
---|
298 | // NOTE: If we are being given more data than the size of the
|
---|
299 | // dictionary, it could be possible to optimize the LZ decoder
|
---|
300 | // so that not everything needs to go through the dictionary.
|
---|
301 | // This shouldn't be very common thing in practice though, and
|
---|
302 | // the slowdown of one extra memcpy() isn't bad compared to how
|
---|
303 | // much time it would have taken if the data were compressed.
|
---|
304 |
|
---|
305 | if (in_size - *in_pos > *left)
|
---|
306 | in_size = *in_pos + *left;
|
---|
307 |
|
---|
308 | *left -= lzma_bufcpy(in, in_pos, in_size,
|
---|
309 | dict->buf, &dict->pos, dict->limit);
|
---|
310 |
|
---|
311 | if (!dict->has_wrapped)
|
---|
312 | dict->full = dict->pos - LZ_DICT_INIT_POS;
|
---|
313 |
|
---|
314 | return;
|
---|
315 | }
|
---|
316 |
|
---|
317 |
|
---|
318 | static inline void
|
---|
319 | dict_reset(lzma_dict *dict)
|
---|
320 | {
|
---|
321 | dict->need_reset = true;
|
---|
322 | return;
|
---|
323 | }
|
---|
324 |
|
---|
325 | #endif
|
---|