lz_decoder.h@ 108911

Last change on this file since 108911 was 108911, checked in by vboxsync, 4 weeks ago
libs/liblzma: Applied and adjusted our liblzma changes to 5.8.1 and export to OSE. jiraref:VBP-1635
Property svn:eol-style set to `native` Property svn:keywords set to `Author Date Id Revision` Property sync-process set to `export`
File size: 8.8 KB

Line
1	// SPDX-License-Identifier: 0BSD
2
3	///////////////////////////////////////////////////////////////////////////////
4	//
5	/// \file lz_decoder.h
6	/// \brief LZ out window
7	///
8	// Authors: Igor Pavlov
9	// Lasse Collin
10	//
11	///////////////////////////////////////////////////////////////////////////////
12
13	#ifndef LZMA_LZ_DECODER_H
14	#define LZMA_LZ_DECODER_H
15
16	#include "common.h"
17
18	#ifdef HAVE_IMMINTRIN_H
19	# include <immintrin.h>
20	#endif
21
22
23	// dict_repeat() implementation variant:
24	// 0 = Byte-by-byte copying only.
25	// 1 = Use memcpy() for non-overlapping copies.
26	// 2 = Use x86 SSE2 for non-overlapping copies.
27	#ifndef LZMA_LZ_DECODER_CONFIG
28	# if defined(TUKLIB_FAST_UNALIGNED_ACCESS) \
29	&& defined(HAVE_IMMINTRIN_H) \
30	&& (defined(__SSE2__) \|\| defined(_M_X64) \
31	\|\| (defined(_M_IX86_FP) && _M_IX86_FP >= 2))
32	# define LZMA_LZ_DECODER_CONFIG 2
33	# else
34	# define LZMA_LZ_DECODER_CONFIG 1
35	# endif
36	#endif
37
38	/// Byte-by-byte and memcpy() copy exactly the amount needed. Other methods
39	/// can copy up to LZ_DICT_EXTRA bytes more than requested, and this amount
40	/// of extra space is needed at the end of the allocated dictionary buffer.
41	///
42	/// NOTE: If this is increased, update LZMA_DICT_REPEAT_MAX too.
43	#if LZMA_LZ_DECODER_CONFIG >= 2
44	# define LZ_DICT_EXTRA 32
45	#else
46	# define LZ_DICT_EXTRA 0
47	#endif
48
49	/// Maximum number of bytes that dict_repeat() may copy. The allocated
50	/// dictionary buffer will be 2 * LZ_DICT_REPEAT_MAX + LZMA_DICT_EXTRA bytes
51	/// larger than the actual dictionary size:
52	///
53	/// (1) Every time the decoder reaches the end of the dictionary buffer,
54	/// the last LZ_DICT_REPEAT_MAX bytes will be copied to the beginning.
55	/// This way dict_repeat() will only need to copy from one place,
56	/// never from both the end and beginning of the buffer.
57	///
58	/// (2) The other LZ_DICT_REPEAT_MAX bytes is kept as a buffer between
59	/// the oldest byte still in the dictionary and the current write
60	/// position. This way dict_repeat() with the maximum valid distance
61	/// won't need memmove() as the copying cannot overlap.
62	///
63	/// (3) LZ_DICT_EXTRA bytes are required at the end of the dictionary buffer
64	/// so that extra copying done by dict_repeat() won't write or read past
65	/// the end of the allocated buffer. This amount is not counted as part
66	/// of lzma_dict.size.
67	///
68	/// Note that memcpy() still cannot be used if distance < len.
69	///
70	/// LZMA's longest match length is 273 bytes. The LZMA decoder looks at
71	/// the lowest four bits of the dictionary position, thus 273 must be
72	/// rounded up to the next multiple of 16 (288). In addition, optimized
73	/// dict_repeat() copies 32 bytes at a time, thus this must also be
74	/// a multiple of 32.
75	#define LZ_DICT_REPEAT_MAX 288
76
77	/// Initial position in lzma_dict.buf when the dictionary is empty.
78	#define LZ_DICT_INIT_POS (2 * LZ_DICT_REPEAT_MAX)
79
80
81	typedef struct {
82	/// Pointer to the dictionary buffer.
83	uint8_t *buf;
84
85	/// Write position in dictionary. The next byte will be written to
86	/// buf[pos].
87	size_t pos;
88
89	/// Indicates how full the dictionary is. This is used by
90	/// dict_is_distance_valid() to detect corrupt files that would
91	/// read beyond the beginning of the dictionary.
92	size_t full;
93
94	/// Write limit
95	size_t limit;
96
97	/// Allocated size of buf. This is 2 * LZ_DICT_REPEAT_MAX bytes
98	/// larger than the actual dictionary size. This is enforced by
99	/// how the value for "full" is set; it can be at most
100	/// "size - 2 * LZ_DICT_REPEAT_MAX".
101	size_t size;
102
103	/// True once the dictionary has become full and the writing position
104	/// has been wrapped in decode_buffer() in lz_decoder.c.
105	bool has_wrapped;
106
107	/// True when dictionary should be reset before decoding more data.
108	bool need_reset;
109
110	} lzma_dict;
111
112
113	typedef struct {
114	size_t dict_size;
115	const uint8_t *preset_dict;
116	size_t preset_dict_size;
117	} lzma_lz_options;
118
119
120	typedef struct {
121	/// Data specific to the LZ-based decoder
122	void *coder;
123
124	/// Function to decode from in[] to *dict
125	lzma_ret (code)(void coder,
126	lzma_dict restrict dict, const uint8_t restrict in,
127	size_t *restrict in_pos, size_t in_size);
128
129	void (reset)(void coder, const void *options);
130
131	/// Set the uncompressed size. If uncompressed_size == LZMA_VLI_UNKNOWN
132	/// then allow_eopm will always be true.
133	void (set_uncompressed)(void coder, lzma_vli uncompressed_size,
134	bool allow_eopm);
135
136	/// Free allocated resources
137	void (end)(void coder, const lzma_allocator *allocator);
138
139	} lzma_lz_decoder;
140
141
142	#define LZMA_LZ_DECODER_INIT \
143	(lzma_lz_decoder){ \
144	.coder = NULL, \
145	.code = NULL, \
146	.reset = NULL, \
147	.set_uncompressed = NULL, \
148	.end = NULL, \
149	}
150
151
152	extern lzma_ret lzma_lz_decoder_init(lzma_next_coder *next,
153	const lzma_allocator *allocator,
154	const lzma_filter_info *filters,
155	lzma_ret (lz_init)(lzma_lz_decoder lz,
156	const lzma_allocator *allocator,
157	lzma_vli id, const void *options,
158	lzma_lz_options *lz_options));
159
160	extern uint64_t lzma_lz_decoder_memusage(size_t dictionary_size);
161
162
163	//////////////////////
164	// Inline functions //
165	//////////////////////
166
167	/// Get a byte from the history buffer.
168	static inline uint8_t
169	dict_get(const lzma_dict *const dict, const uint32_t distance)
170	{
171	return dict->buf[dict->pos - distance - 1
172	+ (distance < dict->pos
173	? 0 : dict->size - LZ_DICT_REPEAT_MAX)];
174	}
175
176
177	/// Optimized version of dict_get(dict, 0)
178	static inline uint8_t
179	dict_get0(const lzma_dict *const dict)
180	{
181	return dict->buf[dict->pos - 1];
182	}
183
184
185	/// Test if dictionary is empty.
186	static inline bool
187	dict_is_empty(const lzma_dict *const dict)
188	{
189	return dict->full == 0;
190	}
191
192
193	/// Validate the match distance
194	static inline bool
195	dict_is_distance_valid(const lzma_dict *const dict, const size_t distance)
196	{
197	return dict->full > distance;
198	}
199
200
201	/// Repeat *len bytes at distance.
202	static inline bool
203	dict_repeat(lzma_dict *restrict dict,
204	uint32_t distance, uint32_t *restrict len)
205	{
206	// Don't write past the end of the dictionary.
207	const size_t dict_avail = dict->limit - dict->pos;
208	uint32_t left = my_min(dict_avail, *len);
209	*len -= left;
210
211	size_t back = dict->pos - distance - 1;
212	if (distance >= dict->pos)
213	back += dict->size - LZ_DICT_REPEAT_MAX;
214
215	#if LZMA_LZ_DECODER_CONFIG == 0
216	// Minimal byte-by-byte method. This might be the least bad choice
217	// if memcpy() isn't fast and there's no replacement for it below.
218	while (left-- > 0) {
219	dict->buf[dict->pos++] = dict->buf[back++];
220	}
221
222	#else
223	// Because memcpy() or a similar method can be faster than copying
224	// byte by byte in a loop, the copying process is split into
225	// two cases.
226	if (distance < left) {
227	// Source and target areas overlap, thus we can't use
228	// memcpy() nor even memmove() safely.
229	do {
230	dict->buf[dict->pos++] = dict->buf[back++];
231	} while (--left > 0);
232	} else {
233	# if LZMA_LZ_DECODER_CONFIG == 1
234	memcpy(dict->buf + dict->pos, dict->buf + back, left);
235	dict->pos += left;
236
237	# elif LZMA_LZ_DECODER_CONFIG == 2
238	// This can copy up to 32 bytes more than required.
239	// (If left == 0, we still copy 32 bytes.)
240	size_t pos = dict->pos;
241	dict->pos += left;
242	do {
243	const __m128i x0 = _mm_loadu_si128(
244	(__m128i *)(dict->buf + back));
245	const __m128i x1 = _mm_loadu_si128(
246	(__m128i *)(dict->buf + back + 16));
247	back += 32;
248	_mm_storeu_si128(
249	(__m128i *)(dict->buf + pos), x0);
250	_mm_storeu_si128(
251	(__m128i *)(dict->buf + pos + 16), x1);
252	pos += 32;
253	} while (pos < dict->pos);
254
255	# else
256	# error "Invalid LZMA_LZ_DECODER_CONFIG value"
257	# endif
258	}
259	#endif
260
261	// Update how full the dictionary is.
262	if (!dict->has_wrapped)
263	dict->full = dict->pos - LZ_DICT_INIT_POS;
264
265	return *len != 0;
266	}
267
268
269	static inline void
270	dict_put(lzma_dict *restrict dict, uint8_t byte)
271	{
272	dict->buf[dict->pos++] = byte;
273
274	if (!dict->has_wrapped)
275	dict->full = dict->pos - LZ_DICT_INIT_POS;
276	}
277
278
279	/// Puts one byte into the dictionary. Returns true if the dictionary was
280	/// already full and the byte couldn't be added.
281	static inline bool
282	dict_put_safe(lzma_dict *restrict dict, uint8_t byte)
283	{
284	if (unlikely(dict->pos == dict->limit))
285	return true;
286
287	dict_put(dict, byte);
288	return false;
289	}
290
291
292	/// Copies arbitrary amount of data into the dictionary.
293	static inline void
294	dict_write(lzma_dict restrict dict, const uint8_t restrict in,
295	size_t *restrict in_pos, size_t in_size,
296	size_t *restrict left)
297	{
298	// NOTE: If we are being given more data than the size of the
299	// dictionary, it could be possible to optimize the LZ decoder
300	// so that not everything needs to go through the dictionary.
301	// This shouldn't be very common thing in practice though, and
302	// the slowdown of one extra memcpy() isn't bad compared to how
303	// much time it would have taken if the data were compressed.
304
305	if (in_size - in_pos > left)
306	in_size = in_pos + left;
307
308	*left -= lzma_bufcpy(in, in_pos, in_size,
309	dict->buf, &dict->pos, dict->limit);
310
311	if (!dict->has_wrapped)
312	dict->full = dict->pos - LZ_DICT_INIT_POS;
313
314	return;
315	}
316
317
318	static inline void
319	dict_reset(lzma_dict *dict)
320	{
321	dict->need_reset = true;
322	return;
323	}
324
325	#endif

Note: See TracBrowser for help on using the repository browser.

source: vbox/trunk/src/libs/liblzma-5.8.1/lz/lz_decoder.h@ 108911

Download in other formats: