1 | ///////////////////////////////////////////////////////////////////////////////
|
---|
2 | //
|
---|
3 | /// \file lzma_decoder.c
|
---|
4 | /// \brief LZMA decoder
|
---|
5 | ///
|
---|
6 | // Authors: Igor Pavlov
|
---|
7 | // Lasse Collin
|
---|
8 | //
|
---|
9 | // This file has been put into the public domain.
|
---|
10 | // You can do whatever you want with this file.
|
---|
11 | //
|
---|
12 | ///////////////////////////////////////////////////////////////////////////////
|
---|
13 |
|
---|
14 | #include "lz_decoder.h"
|
---|
15 | #include "lzma_common.h"
|
---|
16 | #include "lzma_decoder.h"
|
---|
17 | #include "range_decoder.h"
|
---|
18 |
|
---|
19 | // The macros unroll loops with switch statements.
|
---|
20 | // Silence warnings about missing fall-through comments.
|
---|
21 | #if TUKLIB_GNUC_REQ(7, 0)
|
---|
22 | # pragma GCC diagnostic ignored "-Wimplicit-fallthrough"
|
---|
23 | #endif
|
---|
24 |
|
---|
25 |
|
---|
26 | #ifdef HAVE_SMALL
|
---|
27 |
|
---|
28 | // Macros for (somewhat) size-optimized code.
|
---|
29 | #define seq_4(seq) seq
|
---|
30 |
|
---|
31 | #define seq_6(seq) seq
|
---|
32 |
|
---|
33 | #define seq_8(seq) seq
|
---|
34 |
|
---|
35 | #define seq_len(seq) \
|
---|
36 | seq ## _CHOICE, \
|
---|
37 | seq ## _CHOICE2, \
|
---|
38 | seq ## _BITTREE
|
---|
39 |
|
---|
40 | #define len_decode(target, ld, pos_state, seq) \
|
---|
41 | do { \
|
---|
42 | case seq ## _CHOICE: \
|
---|
43 | rc_if_0(ld.choice, seq ## _CHOICE) { \
|
---|
44 | rc_update_0(ld.choice); \
|
---|
45 | probs = ld.low[pos_state];\
|
---|
46 | limit = LEN_LOW_SYMBOLS; \
|
---|
47 | target = MATCH_LEN_MIN; \
|
---|
48 | } else { \
|
---|
49 | rc_update_1(ld.choice); \
|
---|
50 | case seq ## _CHOICE2: \
|
---|
51 | rc_if_0(ld.choice2, seq ## _CHOICE2) { \
|
---|
52 | rc_update_0(ld.choice2); \
|
---|
53 | probs = ld.mid[pos_state]; \
|
---|
54 | limit = LEN_MID_SYMBOLS; \
|
---|
55 | target = MATCH_LEN_MIN + LEN_LOW_SYMBOLS; \
|
---|
56 | } else { \
|
---|
57 | rc_update_1(ld.choice2); \
|
---|
58 | probs = ld.high; \
|
---|
59 | limit = LEN_HIGH_SYMBOLS; \
|
---|
60 | target = MATCH_LEN_MIN + LEN_LOW_SYMBOLS \
|
---|
61 | + LEN_MID_SYMBOLS; \
|
---|
62 | } \
|
---|
63 | } \
|
---|
64 | symbol = 1; \
|
---|
65 | case seq ## _BITTREE: \
|
---|
66 | do { \
|
---|
67 | rc_bit(probs[symbol], , , seq ## _BITTREE); \
|
---|
68 | } while (symbol < limit); \
|
---|
69 | target += symbol - limit; \
|
---|
70 | } while (0)
|
---|
71 |
|
---|
72 | #else // HAVE_SMALL
|
---|
73 |
|
---|
74 | // Unrolled versions
|
---|
75 | #define seq_4(seq) \
|
---|
76 | seq ## 0, \
|
---|
77 | seq ## 1, \
|
---|
78 | seq ## 2, \
|
---|
79 | seq ## 3
|
---|
80 |
|
---|
81 | #define seq_6(seq) \
|
---|
82 | seq ## 0, \
|
---|
83 | seq ## 1, \
|
---|
84 | seq ## 2, \
|
---|
85 | seq ## 3, \
|
---|
86 | seq ## 4, \
|
---|
87 | seq ## 5
|
---|
88 |
|
---|
89 | #define seq_8(seq) \
|
---|
90 | seq ## 0, \
|
---|
91 | seq ## 1, \
|
---|
92 | seq ## 2, \
|
---|
93 | seq ## 3, \
|
---|
94 | seq ## 4, \
|
---|
95 | seq ## 5, \
|
---|
96 | seq ## 6, \
|
---|
97 | seq ## 7
|
---|
98 |
|
---|
99 | #define seq_len(seq) \
|
---|
100 | seq ## _CHOICE, \
|
---|
101 | seq ## _LOW0, \
|
---|
102 | seq ## _LOW1, \
|
---|
103 | seq ## _LOW2, \
|
---|
104 | seq ## _CHOICE2, \
|
---|
105 | seq ## _MID0, \
|
---|
106 | seq ## _MID1, \
|
---|
107 | seq ## _MID2, \
|
---|
108 | seq ## _HIGH0, \
|
---|
109 | seq ## _HIGH1, \
|
---|
110 | seq ## _HIGH2, \
|
---|
111 | seq ## _HIGH3, \
|
---|
112 | seq ## _HIGH4, \
|
---|
113 | seq ## _HIGH5, \
|
---|
114 | seq ## _HIGH6, \
|
---|
115 | seq ## _HIGH7
|
---|
116 |
|
---|
117 | #define len_decode(target, ld, pos_state, seq) \
|
---|
118 | do { \
|
---|
119 | symbol = 1; \
|
---|
120 | case seq ## _CHOICE: \
|
---|
121 | rc_if_0(ld.choice, seq ## _CHOICE) { \
|
---|
122 | rc_update_0(ld.choice); \
|
---|
123 | rc_bit_case(ld.low[pos_state][symbol], , , seq ## _LOW0); \
|
---|
124 | rc_bit_case(ld.low[pos_state][symbol], , , seq ## _LOW1); \
|
---|
125 | rc_bit_case(ld.low[pos_state][symbol], , , seq ## _LOW2); \
|
---|
126 | target = symbol - LEN_LOW_SYMBOLS + MATCH_LEN_MIN; \
|
---|
127 | } else { \
|
---|
128 | rc_update_1(ld.choice); \
|
---|
129 | case seq ## _CHOICE2: \
|
---|
130 | rc_if_0(ld.choice2, seq ## _CHOICE2) { \
|
---|
131 | rc_update_0(ld.choice2); \
|
---|
132 | rc_bit_case(ld.mid[pos_state][symbol], , , \
|
---|
133 | seq ## _MID0); \
|
---|
134 | rc_bit_case(ld.mid[pos_state][symbol], , , \
|
---|
135 | seq ## _MID1); \
|
---|
136 | rc_bit_case(ld.mid[pos_state][symbol], , , \
|
---|
137 | seq ## _MID2); \
|
---|
138 | target = symbol - LEN_MID_SYMBOLS \
|
---|
139 | + MATCH_LEN_MIN + LEN_LOW_SYMBOLS; \
|
---|
140 | } else { \
|
---|
141 | rc_update_1(ld.choice2); \
|
---|
142 | rc_bit_case(ld.high[symbol], , , seq ## _HIGH0); \
|
---|
143 | rc_bit_case(ld.high[symbol], , , seq ## _HIGH1); \
|
---|
144 | rc_bit_case(ld.high[symbol], , , seq ## _HIGH2); \
|
---|
145 | rc_bit_case(ld.high[symbol], , , seq ## _HIGH3); \
|
---|
146 | rc_bit_case(ld.high[symbol], , , seq ## _HIGH4); \
|
---|
147 | rc_bit_case(ld.high[symbol], , , seq ## _HIGH5); \
|
---|
148 | rc_bit_case(ld.high[symbol], , , seq ## _HIGH6); \
|
---|
149 | rc_bit_case(ld.high[symbol], , , seq ## _HIGH7); \
|
---|
150 | target = symbol - LEN_HIGH_SYMBOLS \
|
---|
151 | + MATCH_LEN_MIN \
|
---|
152 | + LEN_LOW_SYMBOLS + LEN_MID_SYMBOLS; \
|
---|
153 | } \
|
---|
154 | } \
|
---|
155 | } while (0)
|
---|
156 |
|
---|
157 | #endif // HAVE_SMALL
|
---|
158 |
|
---|
159 |
|
---|
160 | /// Length decoder probabilities; see comments in lzma_common.h.
|
---|
161 | typedef struct {
|
---|
162 | probability choice;
|
---|
163 | probability choice2;
|
---|
164 | probability low[POS_STATES_MAX][LEN_LOW_SYMBOLS];
|
---|
165 | probability mid[POS_STATES_MAX][LEN_MID_SYMBOLS];
|
---|
166 | probability high[LEN_HIGH_SYMBOLS];
|
---|
167 | } lzma_length_decoder;
|
---|
168 |
|
---|
169 |
|
---|
170 | typedef struct {
|
---|
171 | ///////////////////
|
---|
172 | // Probabilities //
|
---|
173 | ///////////////////
|
---|
174 |
|
---|
175 | /// Literals; see comments in lzma_common.h.
|
---|
176 | probability literal[LITERAL_CODERS_MAX][LITERAL_CODER_SIZE];
|
---|
177 |
|
---|
178 | /// If 1, it's a match. Otherwise it's a single 8-bit literal.
|
---|
179 | probability is_match[STATES][POS_STATES_MAX];
|
---|
180 |
|
---|
181 | /// If 1, it's a repeated match. The distance is one of rep0 .. rep3.
|
---|
182 | probability is_rep[STATES];
|
---|
183 |
|
---|
184 | /// If 0, distance of a repeated match is rep0.
|
---|
185 | /// Otherwise check is_rep1.
|
---|
186 | probability is_rep0[STATES];
|
---|
187 |
|
---|
188 | /// If 0, distance of a repeated match is rep1.
|
---|
189 | /// Otherwise check is_rep2.
|
---|
190 | probability is_rep1[STATES];
|
---|
191 |
|
---|
192 | /// If 0, distance of a repeated match is rep2. Otherwise it is rep3.
|
---|
193 | probability is_rep2[STATES];
|
---|
194 |
|
---|
195 | /// If 1, the repeated match has length of one byte. Otherwise
|
---|
196 | /// the length is decoded from rep_len_decoder.
|
---|
197 | probability is_rep0_long[STATES][POS_STATES_MAX];
|
---|
198 |
|
---|
199 | /// Probability tree for the highest two bits of the match distance.
|
---|
200 | /// There is a separate probability tree for match lengths of
|
---|
201 | /// 2 (i.e. MATCH_LEN_MIN), 3, 4, and [5, 273].
|
---|
202 | probability dist_slot[DIST_STATES][DIST_SLOTS];
|
---|
203 |
|
---|
204 | /// Probability trees for additional bits for match distance when the
|
---|
205 | /// distance is in the range [4, 127].
|
---|
206 | probability pos_special[FULL_DISTANCES - DIST_MODEL_END];
|
---|
207 |
|
---|
208 | /// Probability tree for the lowest four bits of a match distance
|
---|
209 | /// that is equal to or greater than 128.
|
---|
210 | probability pos_align[ALIGN_SIZE];
|
---|
211 |
|
---|
212 | /// Length of a normal match
|
---|
213 | lzma_length_decoder match_len_decoder;
|
---|
214 |
|
---|
215 | /// Length of a repeated match
|
---|
216 | lzma_length_decoder rep_len_decoder;
|
---|
217 |
|
---|
218 | ///////////////////
|
---|
219 | // Decoder state //
|
---|
220 | ///////////////////
|
---|
221 |
|
---|
222 | // Range coder
|
---|
223 | lzma_range_decoder rc;
|
---|
224 |
|
---|
225 | // Types of the most recently seen LZMA symbols
|
---|
226 | lzma_lzma_state state;
|
---|
227 |
|
---|
228 | uint32_t rep0; ///< Distance of the latest match
|
---|
229 | uint32_t rep1; ///< Distance of second latest match
|
---|
230 | uint32_t rep2; ///< Distance of third latest match
|
---|
231 | uint32_t rep3; ///< Distance of fourth latest match
|
---|
232 |
|
---|
233 | uint32_t pos_mask; // (1U << pb) - 1
|
---|
234 | uint32_t literal_context_bits;
|
---|
235 | uint32_t literal_pos_mask;
|
---|
236 |
|
---|
237 | /// Uncompressed size as bytes, or LZMA_VLI_UNKNOWN if end of
|
---|
238 | /// payload marker is expected.
|
---|
239 | lzma_vli uncompressed_size;
|
---|
240 |
|
---|
241 | /// True if end of payload marker (EOPM) is allowed even when
|
---|
242 | /// uncompressed_size is known; false if EOPM must not be present.
|
---|
243 | /// This is ignored if uncompressed_size == LZMA_VLI_UNKNOWN.
|
---|
244 | bool allow_eopm;
|
---|
245 |
|
---|
246 | ////////////////////////////////
|
---|
247 | // State of incomplete symbol //
|
---|
248 | ////////////////////////////////
|
---|
249 |
|
---|
250 | /// Position where to continue the decoder loop
|
---|
251 | enum {
|
---|
252 | SEQ_NORMALIZE,
|
---|
253 | SEQ_IS_MATCH,
|
---|
254 | seq_8(SEQ_LITERAL),
|
---|
255 | seq_8(SEQ_LITERAL_MATCHED),
|
---|
256 | SEQ_LITERAL_WRITE,
|
---|
257 | SEQ_IS_REP,
|
---|
258 | seq_len(SEQ_MATCH_LEN),
|
---|
259 | seq_6(SEQ_DIST_SLOT),
|
---|
260 | SEQ_DIST_MODEL,
|
---|
261 | SEQ_DIRECT,
|
---|
262 | seq_4(SEQ_ALIGN),
|
---|
263 | SEQ_EOPM,
|
---|
264 | SEQ_IS_REP0,
|
---|
265 | SEQ_SHORTREP,
|
---|
266 | SEQ_IS_REP0_LONG,
|
---|
267 | SEQ_IS_REP1,
|
---|
268 | SEQ_IS_REP2,
|
---|
269 | seq_len(SEQ_REP_LEN),
|
---|
270 | SEQ_COPY,
|
---|
271 | } sequence;
|
---|
272 |
|
---|
273 | /// Base of the current probability tree
|
---|
274 | probability *probs;
|
---|
275 |
|
---|
276 | /// Symbol being decoded. This is also used as an index variable in
|
---|
277 | /// bittree decoders: probs[symbol]
|
---|
278 | uint32_t symbol;
|
---|
279 |
|
---|
280 | /// Used as a loop termination condition on bittree decoders and
|
---|
281 | /// direct bits decoder.
|
---|
282 | uint32_t limit;
|
---|
283 |
|
---|
284 | /// Matched literal decoder: 0x100 or 0 to help avoiding branches.
|
---|
285 | /// Bittree reverse decoders: Offset of the next bit: 1 << offset
|
---|
286 | uint32_t offset;
|
---|
287 |
|
---|
288 | /// If decoding a literal: match byte.
|
---|
289 | /// If decoding a match: length of the match.
|
---|
290 | uint32_t len;
|
---|
291 | } lzma_lzma1_decoder;
|
---|
292 |
|
---|
293 |
|
---|
294 | static lzma_ret
|
---|
295 | lzma_decode(void *coder_ptr, lzma_dict *restrict dictptr,
|
---|
296 | const uint8_t *restrict in,
|
---|
297 | size_t *restrict in_pos, size_t in_size)
|
---|
298 | {
|
---|
299 | lzma_lzma1_decoder *restrict coder = coder_ptr;
|
---|
300 |
|
---|
301 | ////////////////////
|
---|
302 | // Initialization //
|
---|
303 | ////////////////////
|
---|
304 |
|
---|
305 | {
|
---|
306 | const lzma_ret ret = rc_read_init(
|
---|
307 | &coder->rc, in, in_pos, in_size);
|
---|
308 | if (ret != LZMA_STREAM_END)
|
---|
309 | return ret;
|
---|
310 | }
|
---|
311 |
|
---|
312 | ///////////////
|
---|
313 | // Variables //
|
---|
314 | ///////////////
|
---|
315 |
|
---|
316 | // Making local copies of often-used variables improves both
|
---|
317 | // speed and readability.
|
---|
318 |
|
---|
319 | lzma_dict dict = *dictptr;
|
---|
320 |
|
---|
321 | const size_t dict_start = dict.pos;
|
---|
322 |
|
---|
323 | // Range decoder
|
---|
324 | rc_to_local(coder->rc, *in_pos);
|
---|
325 |
|
---|
326 | // State
|
---|
327 | uint32_t state = coder->state;
|
---|
328 | uint32_t rep0 = coder->rep0;
|
---|
329 | uint32_t rep1 = coder->rep1;
|
---|
330 | uint32_t rep2 = coder->rep2;
|
---|
331 | uint32_t rep3 = coder->rep3;
|
---|
332 |
|
---|
333 | const uint32_t pos_mask = coder->pos_mask;
|
---|
334 |
|
---|
335 | // These variables are actually needed only if we last time ran
|
---|
336 | // out of input in the middle of the decoder loop.
|
---|
337 | probability *probs = coder->probs;
|
---|
338 | uint32_t symbol = coder->symbol;
|
---|
339 | uint32_t limit = coder->limit;
|
---|
340 | uint32_t offset = coder->offset;
|
---|
341 | uint32_t len = coder->len;
|
---|
342 |
|
---|
343 | const uint32_t literal_pos_mask = coder->literal_pos_mask;
|
---|
344 | const uint32_t literal_context_bits = coder->literal_context_bits;
|
---|
345 |
|
---|
346 | // Temporary variables
|
---|
347 | uint32_t pos_state = dict.pos & pos_mask;
|
---|
348 |
|
---|
349 | lzma_ret ret = LZMA_OK;
|
---|
350 |
|
---|
351 | // This is true when the next LZMA symbol is allowed to be EOPM.
|
---|
352 | // That is, if this is false, then EOPM is considered
|
---|
353 | // an invalid symbol and we will return LZMA_DATA_ERROR.
|
---|
354 | //
|
---|
355 | // EOPM is always required (not just allowed) when
|
---|
356 | // the uncompressed size isn't known. When uncompressed size
|
---|
357 | // is known, eopm_is_valid may be set to true later.
|
---|
358 | bool eopm_is_valid = coder->uncompressed_size == LZMA_VLI_UNKNOWN;
|
---|
359 |
|
---|
360 | // If uncompressed size is known and there is enough output space
|
---|
361 | // to decode all the data, limit the available buffer space so that
|
---|
362 | // the main loop won't try to decode past the end of the stream.
|
---|
363 | bool might_finish_without_eopm = false;
|
---|
364 | if (coder->uncompressed_size != LZMA_VLI_UNKNOWN
|
---|
365 | && coder->uncompressed_size <= dict.limit - dict.pos) {
|
---|
366 | dict.limit = dict.pos + (size_t)(coder->uncompressed_size);
|
---|
367 | might_finish_without_eopm = true;
|
---|
368 | }
|
---|
369 |
|
---|
370 | // The main decoder loop. The "switch" is used to restart the decoder at
|
---|
371 | // correct location. Once restarted, the "switch" is no longer used.
|
---|
372 | switch (coder->sequence)
|
---|
373 | while (true) {
|
---|
374 | // Calculate new pos_state. This is skipped on the first loop
|
---|
375 | // since we already calculated it when setting up the local
|
---|
376 | // variables.
|
---|
377 | pos_state = dict.pos & pos_mask;
|
---|
378 |
|
---|
379 | case SEQ_NORMALIZE:
|
---|
380 | case SEQ_IS_MATCH:
|
---|
381 | if (unlikely(might_finish_without_eopm
|
---|
382 | && dict.pos == dict.limit)) {
|
---|
383 | // In rare cases there is a useless byte that needs
|
---|
384 | // to be read anyway.
|
---|
385 | rc_normalize(SEQ_NORMALIZE);
|
---|
386 |
|
---|
387 | // If the range decoder state is such that we can
|
---|
388 | // be at the end of the LZMA stream, then the
|
---|
389 | // decoding is finished.
|
---|
390 | if (rc_is_finished(rc)) {
|
---|
391 | ret = LZMA_STREAM_END;
|
---|
392 | goto out;
|
---|
393 | }
|
---|
394 |
|
---|
395 | // If the caller hasn't allowed EOPM to be present
|
---|
396 | // together with known uncompressed size, then the
|
---|
397 | // LZMA stream is corrupt.
|
---|
398 | if (!coder->allow_eopm) {
|
---|
399 | ret = LZMA_DATA_ERROR;
|
---|
400 | goto out;
|
---|
401 | }
|
---|
402 |
|
---|
403 | // Otherwise continue decoding with the expectation
|
---|
404 | // that the next LZMA symbol is EOPM.
|
---|
405 | eopm_is_valid = true;
|
---|
406 | }
|
---|
407 |
|
---|
408 | rc_if_0(coder->is_match[state][pos_state], SEQ_IS_MATCH) {
|
---|
409 | rc_update_0(coder->is_match[state][pos_state]);
|
---|
410 |
|
---|
411 | // It's a literal i.e. a single 8-bit byte.
|
---|
412 |
|
---|
413 | probs = literal_subcoder(coder->literal,
|
---|
414 | literal_context_bits, literal_pos_mask,
|
---|
415 | dict.pos, dict_get(&dict, 0));
|
---|
416 | symbol = 1;
|
---|
417 |
|
---|
418 | if (is_literal_state(state)) {
|
---|
419 | // Decode literal without match byte.
|
---|
420 | #ifdef HAVE_SMALL
|
---|
421 | case SEQ_LITERAL:
|
---|
422 | do {
|
---|
423 | rc_bit(probs[symbol], , , SEQ_LITERAL);
|
---|
424 | } while (symbol < (1 << 8));
|
---|
425 | #else
|
---|
426 | rc_bit_case(probs[symbol], , , SEQ_LITERAL0);
|
---|
427 | rc_bit_case(probs[symbol], , , SEQ_LITERAL1);
|
---|
428 | rc_bit_case(probs[symbol], , , SEQ_LITERAL2);
|
---|
429 | rc_bit_case(probs[symbol], , , SEQ_LITERAL3);
|
---|
430 | rc_bit_case(probs[symbol], , , SEQ_LITERAL4);
|
---|
431 | rc_bit_case(probs[symbol], , , SEQ_LITERAL5);
|
---|
432 | rc_bit_case(probs[symbol], , , SEQ_LITERAL6);
|
---|
433 | rc_bit_case(probs[symbol], , , SEQ_LITERAL7);
|
---|
434 | #endif
|
---|
435 | } else {
|
---|
436 | // Decode literal with match byte.
|
---|
437 | //
|
---|
438 | // We store the byte we compare against
|
---|
439 | // ("match byte") to "len" to minimize the
|
---|
440 | // number of variables we need to store
|
---|
441 | // between decoder calls.
|
---|
442 | len = (uint32_t)(dict_get(&dict, rep0)) << 1;
|
---|
443 |
|
---|
444 | // The usage of "offset" allows omitting some
|
---|
445 | // branches, which should give tiny speed
|
---|
446 | // improvement on some CPUs. "offset" gets
|
---|
447 | // set to zero if match_bit didn't match.
|
---|
448 | offset = 0x100;
|
---|
449 |
|
---|
450 | #ifdef HAVE_SMALL
|
---|
451 | case SEQ_LITERAL_MATCHED:
|
---|
452 | do {
|
---|
453 | const uint32_t match_bit
|
---|
454 | = len & offset;
|
---|
455 | const uint32_t subcoder_index
|
---|
456 | = offset + match_bit
|
---|
457 | + symbol;
|
---|
458 |
|
---|
459 | rc_bit(probs[subcoder_index],
|
---|
460 | offset &= ~match_bit,
|
---|
461 | offset &= match_bit,
|
---|
462 | SEQ_LITERAL_MATCHED);
|
---|
463 |
|
---|
464 | // It seems to be faster to do this
|
---|
465 | // here instead of putting it to the
|
---|
466 | // beginning of the loop and then
|
---|
467 | // putting the "case" in the middle
|
---|
468 | // of the loop.
|
---|
469 | len <<= 1;
|
---|
470 |
|
---|
471 | } while (symbol < (1 << 8));
|
---|
472 | #else
|
---|
473 | // Unroll the loop.
|
---|
474 | uint32_t match_bit;
|
---|
475 | uint32_t subcoder_index;
|
---|
476 |
|
---|
477 | # define d(seq) \
|
---|
478 | case seq: \
|
---|
479 | match_bit = len & offset; \
|
---|
480 | subcoder_index = offset + match_bit + symbol; \
|
---|
481 | rc_bit(probs[subcoder_index], \
|
---|
482 | offset &= ~match_bit, \
|
---|
483 | offset &= match_bit, \
|
---|
484 | seq)
|
---|
485 |
|
---|
486 | d(SEQ_LITERAL_MATCHED0);
|
---|
487 | len <<= 1;
|
---|
488 | d(SEQ_LITERAL_MATCHED1);
|
---|
489 | len <<= 1;
|
---|
490 | d(SEQ_LITERAL_MATCHED2);
|
---|
491 | len <<= 1;
|
---|
492 | d(SEQ_LITERAL_MATCHED3);
|
---|
493 | len <<= 1;
|
---|
494 | d(SEQ_LITERAL_MATCHED4);
|
---|
495 | len <<= 1;
|
---|
496 | d(SEQ_LITERAL_MATCHED5);
|
---|
497 | len <<= 1;
|
---|
498 | d(SEQ_LITERAL_MATCHED6);
|
---|
499 | len <<= 1;
|
---|
500 | d(SEQ_LITERAL_MATCHED7);
|
---|
501 | # undef d
|
---|
502 | #endif
|
---|
503 | }
|
---|
504 |
|
---|
505 | //update_literal(state);
|
---|
506 | // Use a lookup table to update to literal state,
|
---|
507 | // since compared to other state updates, this would
|
---|
508 | // need two branches.
|
---|
509 | static const lzma_lzma_state next_state[] = {
|
---|
510 | STATE_LIT_LIT,
|
---|
511 | STATE_LIT_LIT,
|
---|
512 | STATE_LIT_LIT,
|
---|
513 | STATE_LIT_LIT,
|
---|
514 | STATE_MATCH_LIT_LIT,
|
---|
515 | STATE_REP_LIT_LIT,
|
---|
516 | STATE_SHORTREP_LIT_LIT,
|
---|
517 | STATE_MATCH_LIT,
|
---|
518 | STATE_REP_LIT,
|
---|
519 | STATE_SHORTREP_LIT,
|
---|
520 | STATE_MATCH_LIT,
|
---|
521 | STATE_REP_LIT
|
---|
522 | };
|
---|
523 | state = next_state[state];
|
---|
524 |
|
---|
525 | case SEQ_LITERAL_WRITE:
|
---|
526 | if (unlikely(dict_put(&dict, symbol))) {
|
---|
527 | coder->sequence = SEQ_LITERAL_WRITE;
|
---|
528 | goto out;
|
---|
529 | }
|
---|
530 |
|
---|
531 | continue;
|
---|
532 | }
|
---|
533 |
|
---|
534 | // Instead of a new byte we are going to get a byte range
|
---|
535 | // (distance and length) which will be repeated from our
|
---|
536 | // output history.
|
---|
537 |
|
---|
538 | rc_update_1(coder->is_match[state][pos_state]);
|
---|
539 |
|
---|
540 | case SEQ_IS_REP:
|
---|
541 | rc_if_0(coder->is_rep[state], SEQ_IS_REP) {
|
---|
542 | // Not a repeated match
|
---|
543 | rc_update_0(coder->is_rep[state]);
|
---|
544 | update_match(state);
|
---|
545 |
|
---|
546 | // The latest three match distances are kept in
|
---|
547 | // memory in case there are repeated matches.
|
---|
548 | rep3 = rep2;
|
---|
549 | rep2 = rep1;
|
---|
550 | rep1 = rep0;
|
---|
551 |
|
---|
552 | // Decode the length of the match.
|
---|
553 | len_decode(len, coder->match_len_decoder,
|
---|
554 | pos_state, SEQ_MATCH_LEN);
|
---|
555 |
|
---|
556 | // Prepare to decode the highest two bits of the
|
---|
557 | // match distance.
|
---|
558 | probs = coder->dist_slot[get_dist_state(len)];
|
---|
559 | symbol = 1;
|
---|
560 |
|
---|
561 | #ifdef HAVE_SMALL
|
---|
562 | case SEQ_DIST_SLOT:
|
---|
563 | do {
|
---|
564 | rc_bit(probs[symbol], , , SEQ_DIST_SLOT);
|
---|
565 | } while (symbol < DIST_SLOTS);
|
---|
566 | #else
|
---|
567 | rc_bit_case(probs[symbol], , , SEQ_DIST_SLOT0);
|
---|
568 | rc_bit_case(probs[symbol], , , SEQ_DIST_SLOT1);
|
---|
569 | rc_bit_case(probs[symbol], , , SEQ_DIST_SLOT2);
|
---|
570 | rc_bit_case(probs[symbol], , , SEQ_DIST_SLOT3);
|
---|
571 | rc_bit_case(probs[symbol], , , SEQ_DIST_SLOT4);
|
---|
572 | rc_bit_case(probs[symbol], , , SEQ_DIST_SLOT5);
|
---|
573 | #endif
|
---|
574 | // Get rid of the highest bit that was needed for
|
---|
575 | // indexing of the probability array.
|
---|
576 | symbol -= DIST_SLOTS;
|
---|
577 | assert(symbol <= 63);
|
---|
578 |
|
---|
579 | if (symbol < DIST_MODEL_START) {
|
---|
580 | // Match distances [0, 3] have only two bits.
|
---|
581 | rep0 = symbol;
|
---|
582 | } else {
|
---|
583 | // Decode the lowest [1, 29] bits of
|
---|
584 | // the match distance.
|
---|
585 | limit = (symbol >> 1) - 1;
|
---|
586 | assert(limit >= 1 && limit <= 30);
|
---|
587 | rep0 = 2 + (symbol & 1);
|
---|
588 |
|
---|
589 | if (symbol < DIST_MODEL_END) {
|
---|
590 | // Prepare to decode the low bits for
|
---|
591 | // a distance of [4, 127].
|
---|
592 | assert(limit <= 5);
|
---|
593 | rep0 <<= limit;
|
---|
594 | assert(rep0 <= 96);
|
---|
595 | // -1 is fine, because we start
|
---|
596 | // decoding at probs[1], not probs[0].
|
---|
597 | // NOTE: This violates the C standard,
|
---|
598 | // since we are doing pointer
|
---|
599 | // arithmetic past the beginning of
|
---|
600 | // the array.
|
---|
601 | assert((int32_t)(rep0 - symbol - 1)
|
---|
602 | >= -1);
|
---|
603 | assert((int32_t)(rep0 - symbol - 1)
|
---|
604 | <= 82);
|
---|
605 | probs = coder->pos_special + rep0
|
---|
606 | - symbol - 1;
|
---|
607 | symbol = 1;
|
---|
608 | offset = 0;
|
---|
609 | case SEQ_DIST_MODEL:
|
---|
610 | #ifdef HAVE_SMALL
|
---|
611 | do {
|
---|
612 | rc_bit(probs[symbol], ,
|
---|
613 | rep0 += 1U << offset,
|
---|
614 | SEQ_DIST_MODEL);
|
---|
615 | } while (++offset < limit);
|
---|
616 | #else
|
---|
617 | switch (limit) {
|
---|
618 | case 5:
|
---|
619 | assert(offset == 0);
|
---|
620 | rc_bit(probs[symbol], ,
|
---|
621 | rep0 += 1U,
|
---|
622 | SEQ_DIST_MODEL);
|
---|
623 | ++offset;
|
---|
624 | --limit;
|
---|
625 | case 4:
|
---|
626 | rc_bit(probs[symbol], ,
|
---|
627 | rep0 += 1U << offset,
|
---|
628 | SEQ_DIST_MODEL);
|
---|
629 | ++offset;
|
---|
630 | --limit;
|
---|
631 | case 3:
|
---|
632 | rc_bit(probs[symbol], ,
|
---|
633 | rep0 += 1U << offset,
|
---|
634 | SEQ_DIST_MODEL);
|
---|
635 | ++offset;
|
---|
636 | --limit;
|
---|
637 | case 2:
|
---|
638 | rc_bit(probs[symbol], ,
|
---|
639 | rep0 += 1U << offset,
|
---|
640 | SEQ_DIST_MODEL);
|
---|
641 | ++offset;
|
---|
642 | --limit;
|
---|
643 | case 1:
|
---|
644 | // We need "symbol" only for
|
---|
645 | // indexing the probability
|
---|
646 | // array, thus we can use
|
---|
647 | // rc_bit_last() here to omit
|
---|
648 | // the unneeded updating of
|
---|
649 | // "symbol".
|
---|
650 | rc_bit_last(probs[symbol], ,
|
---|
651 | rep0 += 1U << offset,
|
---|
652 | SEQ_DIST_MODEL);
|
---|
653 | }
|
---|
654 | #endif
|
---|
655 | } else {
|
---|
656 | // The distance is >= 128. Decode the
|
---|
657 | // lower bits without probabilities
|
---|
658 | // except the lowest four bits.
|
---|
659 | assert(symbol >= 14);
|
---|
660 | assert(limit >= 6);
|
---|
661 | limit -= ALIGN_BITS;
|
---|
662 | assert(limit >= 2);
|
---|
663 | case SEQ_DIRECT:
|
---|
664 | // Not worth manual unrolling
|
---|
665 | do {
|
---|
666 | rc_direct(rep0, SEQ_DIRECT);
|
---|
667 | } while (--limit > 0);
|
---|
668 |
|
---|
669 | // Decode the lowest four bits using
|
---|
670 | // probabilities.
|
---|
671 | rep0 <<= ALIGN_BITS;
|
---|
672 | symbol = 1;
|
---|
673 | #ifdef HAVE_SMALL
|
---|
674 | offset = 0;
|
---|
675 | case SEQ_ALIGN:
|
---|
676 | do {
|
---|
677 | rc_bit(coder->pos_align[
|
---|
678 | symbol], ,
|
---|
679 | rep0 += 1U << offset,
|
---|
680 | SEQ_ALIGN);
|
---|
681 | } while (++offset < ALIGN_BITS);
|
---|
682 | #else
|
---|
683 | case SEQ_ALIGN0:
|
---|
684 | rc_bit(coder->pos_align[symbol], ,
|
---|
685 | rep0 += 1, SEQ_ALIGN0);
|
---|
686 | case SEQ_ALIGN1:
|
---|
687 | rc_bit(coder->pos_align[symbol], ,
|
---|
688 | rep0 += 2, SEQ_ALIGN1);
|
---|
689 | case SEQ_ALIGN2:
|
---|
690 | rc_bit(coder->pos_align[symbol], ,
|
---|
691 | rep0 += 4, SEQ_ALIGN2);
|
---|
692 | case SEQ_ALIGN3:
|
---|
693 | // Like in SEQ_DIST_MODEL, we don't
|
---|
694 | // need "symbol" for anything else
|
---|
695 | // than indexing the probability array.
|
---|
696 | rc_bit_last(coder->pos_align[symbol], ,
|
---|
697 | rep0 += 8, SEQ_ALIGN3);
|
---|
698 | #endif
|
---|
699 |
|
---|
700 | if (rep0 == UINT32_MAX) {
|
---|
701 | // End of payload marker was
|
---|
702 | // found. It may only be
|
---|
703 | // present if
|
---|
704 | // - uncompressed size is
|
---|
705 | // unknown or
|
---|
706 | // - after known uncompressed
|
---|
707 | // size amount of bytes has
|
---|
708 | // been decompressed and
|
---|
709 | // caller has indicated
|
---|
710 | // that EOPM might be used
|
---|
711 | // (it's not allowed in
|
---|
712 | // LZMA2).
|
---|
713 | if (!eopm_is_valid) {
|
---|
714 | ret = LZMA_DATA_ERROR;
|
---|
715 | goto out;
|
---|
716 | }
|
---|
717 |
|
---|
718 | case SEQ_EOPM:
|
---|
719 | // LZMA1 stream with
|
---|
720 | // end-of-payload marker.
|
---|
721 | rc_normalize(SEQ_EOPM);
|
---|
722 | ret = rc_is_finished(rc)
|
---|
723 | ? LZMA_STREAM_END
|
---|
724 | : LZMA_DATA_ERROR;
|
---|
725 | goto out;
|
---|
726 | }
|
---|
727 | }
|
---|
728 | }
|
---|
729 |
|
---|
730 | // Validate the distance we just decoded.
|
---|
731 | if (unlikely(!dict_is_distance_valid(&dict, rep0))) {
|
---|
732 | ret = LZMA_DATA_ERROR;
|
---|
733 | goto out;
|
---|
734 | }
|
---|
735 |
|
---|
736 | } else {
|
---|
737 | rc_update_1(coder->is_rep[state]);
|
---|
738 |
|
---|
739 | // Repeated match
|
---|
740 | //
|
---|
741 | // The match distance is a value that we have had
|
---|
742 | // earlier. The latest four match distances are
|
---|
743 | // available as rep0, rep1, rep2 and rep3. We will
|
---|
744 | // now decode which of them is the new distance.
|
---|
745 | //
|
---|
746 | // There cannot be a match if we haven't produced
|
---|
747 | // any output, so check that first.
|
---|
748 | if (unlikely(!dict_is_distance_valid(&dict, 0))) {
|
---|
749 | ret = LZMA_DATA_ERROR;
|
---|
750 | goto out;
|
---|
751 | }
|
---|
752 |
|
---|
753 | case SEQ_IS_REP0:
|
---|
754 | rc_if_0(coder->is_rep0[state], SEQ_IS_REP0) {
|
---|
755 | rc_update_0(coder->is_rep0[state]);
|
---|
756 | // The distance is rep0.
|
---|
757 |
|
---|
758 | case SEQ_IS_REP0_LONG:
|
---|
759 | rc_if_0(coder->is_rep0_long[state][pos_state],
|
---|
760 | SEQ_IS_REP0_LONG) {
|
---|
761 | rc_update_0(coder->is_rep0_long[
|
---|
762 | state][pos_state]);
|
---|
763 |
|
---|
764 | update_short_rep(state);
|
---|
765 |
|
---|
766 | case SEQ_SHORTREP:
|
---|
767 | if (unlikely(dict_put(&dict, dict_get(
|
---|
768 | &dict, rep0)))) {
|
---|
769 | coder->sequence = SEQ_SHORTREP;
|
---|
770 | goto out;
|
---|
771 | }
|
---|
772 |
|
---|
773 | continue;
|
---|
774 | }
|
---|
775 |
|
---|
776 | // Repeating more than one byte at
|
---|
777 | // distance of rep0.
|
---|
778 | rc_update_1(coder->is_rep0_long[
|
---|
779 | state][pos_state]);
|
---|
780 |
|
---|
781 | } else {
|
---|
782 | rc_update_1(coder->is_rep0[state]);
|
---|
783 |
|
---|
784 | case SEQ_IS_REP1:
|
---|
785 | // The distance is rep1, rep2 or rep3. Once
|
---|
786 | // we find out which one of these three, it
|
---|
787 | // is stored to rep0 and rep1, rep2 and rep3
|
---|
788 | // are updated accordingly.
|
---|
789 | rc_if_0(coder->is_rep1[state], SEQ_IS_REP1) {
|
---|
790 | rc_update_0(coder->is_rep1[state]);
|
---|
791 |
|
---|
792 | const uint32_t distance = rep1;
|
---|
793 | rep1 = rep0;
|
---|
794 | rep0 = distance;
|
---|
795 |
|
---|
796 | } else {
|
---|
797 | rc_update_1(coder->is_rep1[state]);
|
---|
798 | case SEQ_IS_REP2:
|
---|
799 | rc_if_0(coder->is_rep2[state],
|
---|
800 | SEQ_IS_REP2) {
|
---|
801 | rc_update_0(coder->is_rep2[
|
---|
802 | state]);
|
---|
803 |
|
---|
804 | const uint32_t distance = rep2;
|
---|
805 | rep2 = rep1;
|
---|
806 | rep1 = rep0;
|
---|
807 | rep0 = distance;
|
---|
808 |
|
---|
809 | } else {
|
---|
810 | rc_update_1(coder->is_rep2[
|
---|
811 | state]);
|
---|
812 |
|
---|
813 | const uint32_t distance = rep3;
|
---|
814 | rep3 = rep2;
|
---|
815 | rep2 = rep1;
|
---|
816 | rep1 = rep0;
|
---|
817 | rep0 = distance;
|
---|
818 | }
|
---|
819 | }
|
---|
820 | }
|
---|
821 |
|
---|
822 | update_long_rep(state);
|
---|
823 |
|
---|
824 | // Decode the length of the repeated match.
|
---|
825 | len_decode(len, coder->rep_len_decoder,
|
---|
826 | pos_state, SEQ_REP_LEN);
|
---|
827 | }
|
---|
828 |
|
---|
829 | /////////////////////////////////
|
---|
830 | // Repeat from history buffer. //
|
---|
831 | /////////////////////////////////
|
---|
832 |
|
---|
833 | // The length is always between these limits. There is no way
|
---|
834 | // to trigger the algorithm to set len outside this range.
|
---|
835 | assert(len >= MATCH_LEN_MIN);
|
---|
836 | assert(len <= MATCH_LEN_MAX);
|
---|
837 |
|
---|
838 | case SEQ_COPY:
|
---|
839 | // Repeat len bytes from distance of rep0.
|
---|
840 | if (unlikely(dict_repeat(&dict, rep0, &len))) {
|
---|
841 | coder->sequence = SEQ_COPY;
|
---|
842 | goto out;
|
---|
843 | }
|
---|
844 | }
|
---|
845 |
|
---|
846 | out:
|
---|
847 | // Save state
|
---|
848 |
|
---|
849 | // NOTE: Must not copy dict.limit.
|
---|
850 | dictptr->pos = dict.pos;
|
---|
851 | dictptr->full = dict.full;
|
---|
852 |
|
---|
853 | rc_from_local(coder->rc, *in_pos);
|
---|
854 |
|
---|
855 | coder->state = state;
|
---|
856 | coder->rep0 = rep0;
|
---|
857 | coder->rep1 = rep1;
|
---|
858 | coder->rep2 = rep2;
|
---|
859 | coder->rep3 = rep3;
|
---|
860 |
|
---|
861 | coder->probs = probs;
|
---|
862 | coder->symbol = symbol;
|
---|
863 | coder->limit = limit;
|
---|
864 | coder->offset = offset;
|
---|
865 | coder->len = len;
|
---|
866 |
|
---|
867 | // Update the remaining amount of uncompressed data if uncompressed
|
---|
868 | // size was known.
|
---|
869 | if (coder->uncompressed_size != LZMA_VLI_UNKNOWN) {
|
---|
870 | coder->uncompressed_size -= dict.pos - dict_start;
|
---|
871 |
|
---|
872 | // If we have gotten all the output but the decoder wants
|
---|
873 | // to write more output, the file is corrupt. There are
|
---|
874 | // three SEQ values where output is produced.
|
---|
875 | if (coder->uncompressed_size == 0 && ret == LZMA_OK
|
---|
876 | && (coder->sequence == SEQ_LITERAL_WRITE
|
---|
877 | || coder->sequence == SEQ_SHORTREP
|
---|
878 | || coder->sequence == SEQ_COPY))
|
---|
879 | ret = LZMA_DATA_ERROR;
|
---|
880 | }
|
---|
881 |
|
---|
882 | if (ret == LZMA_STREAM_END) {
|
---|
883 | // Reset the range decoder so that it is ready to reinitialize
|
---|
884 | // for a new LZMA2 chunk.
|
---|
885 | rc_reset(coder->rc);
|
---|
886 | coder->sequence = SEQ_IS_MATCH;
|
---|
887 | }
|
---|
888 |
|
---|
889 | return ret;
|
---|
890 | }
|
---|
891 |
|
---|
892 |
|
---|
893 |
|
---|
894 | static void
|
---|
895 | lzma_decoder_uncompressed(void *coder_ptr, lzma_vli uncompressed_size,
|
---|
896 | bool allow_eopm)
|
---|
897 | {
|
---|
898 | lzma_lzma1_decoder *coder = coder_ptr;
|
---|
899 | coder->uncompressed_size = uncompressed_size;
|
---|
900 | coder->allow_eopm = allow_eopm;
|
---|
901 | }
|
---|
902 |
|
---|
903 |
|
---|
904 | static void
|
---|
905 | lzma_decoder_reset(void *coder_ptr, const void *opt)
|
---|
906 | {
|
---|
907 | lzma_lzma1_decoder *coder = coder_ptr;
|
---|
908 | const lzma_options_lzma *options = opt;
|
---|
909 |
|
---|
910 | // NOTE: We assume that lc/lp/pb are valid since they were
|
---|
911 | // successfully decoded with lzma_lzma_decode_properties().
|
---|
912 |
|
---|
913 | // Calculate pos_mask. We don't need pos_bits as is for anything.
|
---|
914 | coder->pos_mask = (1U << options->pb) - 1;
|
---|
915 |
|
---|
916 | // Initialize the literal decoder.
|
---|
917 | literal_init(coder->literal, options->lc, options->lp);
|
---|
918 |
|
---|
919 | coder->literal_context_bits = options->lc;
|
---|
920 | coder->literal_pos_mask = (1U << options->lp) - 1;
|
---|
921 |
|
---|
922 | // State
|
---|
923 | coder->state = STATE_LIT_LIT;
|
---|
924 | coder->rep0 = 0;
|
---|
925 | coder->rep1 = 0;
|
---|
926 | coder->rep2 = 0;
|
---|
927 | coder->rep3 = 0;
|
---|
928 | coder->pos_mask = (1U << options->pb) - 1;
|
---|
929 |
|
---|
930 | // Range decoder
|
---|
931 | rc_reset(coder->rc);
|
---|
932 |
|
---|
933 | // Bit and bittree decoders
|
---|
934 | for (uint32_t i = 0; i < STATES; ++i) {
|
---|
935 | for (uint32_t j = 0; j <= coder->pos_mask; ++j) {
|
---|
936 | bit_reset(coder->is_match[i][j]);
|
---|
937 | bit_reset(coder->is_rep0_long[i][j]);
|
---|
938 | }
|
---|
939 |
|
---|
940 | bit_reset(coder->is_rep[i]);
|
---|
941 | bit_reset(coder->is_rep0[i]);
|
---|
942 | bit_reset(coder->is_rep1[i]);
|
---|
943 | bit_reset(coder->is_rep2[i]);
|
---|
944 | }
|
---|
945 |
|
---|
946 | for (uint32_t i = 0; i < DIST_STATES; ++i)
|
---|
947 | bittree_reset(coder->dist_slot[i], DIST_SLOT_BITS);
|
---|
948 |
|
---|
949 | for (uint32_t i = 0; i < FULL_DISTANCES - DIST_MODEL_END; ++i)
|
---|
950 | bit_reset(coder->pos_special[i]);
|
---|
951 |
|
---|
952 | bittree_reset(coder->pos_align, ALIGN_BITS);
|
---|
953 |
|
---|
954 | // Len decoders (also bit/bittree)
|
---|
955 | const uint32_t num_pos_states = 1U << options->pb;
|
---|
956 | bit_reset(coder->match_len_decoder.choice);
|
---|
957 | bit_reset(coder->match_len_decoder.choice2);
|
---|
958 | bit_reset(coder->rep_len_decoder.choice);
|
---|
959 | bit_reset(coder->rep_len_decoder.choice2);
|
---|
960 |
|
---|
961 | for (uint32_t pos_state = 0; pos_state < num_pos_states; ++pos_state) {
|
---|
962 | bittree_reset(coder->match_len_decoder.low[pos_state],
|
---|
963 | LEN_LOW_BITS);
|
---|
964 | bittree_reset(coder->match_len_decoder.mid[pos_state],
|
---|
965 | LEN_MID_BITS);
|
---|
966 |
|
---|
967 | bittree_reset(coder->rep_len_decoder.low[pos_state],
|
---|
968 | LEN_LOW_BITS);
|
---|
969 | bittree_reset(coder->rep_len_decoder.mid[pos_state],
|
---|
970 | LEN_MID_BITS);
|
---|
971 | }
|
---|
972 |
|
---|
973 | bittree_reset(coder->match_len_decoder.high, LEN_HIGH_BITS);
|
---|
974 | bittree_reset(coder->rep_len_decoder.high, LEN_HIGH_BITS);
|
---|
975 |
|
---|
976 | coder->sequence = SEQ_IS_MATCH;
|
---|
977 | coder->probs = NULL;
|
---|
978 | coder->symbol = 0;
|
---|
979 | coder->limit = 0;
|
---|
980 | coder->offset = 0;
|
---|
981 | coder->len = 0;
|
---|
982 |
|
---|
983 | return;
|
---|
984 | }
|
---|
985 |
|
---|
986 |
|
---|
987 | extern lzma_ret
|
---|
988 | lzma_lzma_decoder_create(lzma_lz_decoder *lz, const lzma_allocator *allocator,
|
---|
989 | const lzma_options_lzma *options, lzma_lz_options *lz_options)
|
---|
990 | {
|
---|
991 | if (lz->coder == NULL) {
|
---|
992 | lz->coder = lzma_alloc(sizeof(lzma_lzma1_decoder), allocator);
|
---|
993 | if (lz->coder == NULL)
|
---|
994 | return LZMA_MEM_ERROR;
|
---|
995 |
|
---|
996 | lz->code = &lzma_decode;
|
---|
997 | lz->reset = &lzma_decoder_reset;
|
---|
998 | lz->set_uncompressed = &lzma_decoder_uncompressed;
|
---|
999 | }
|
---|
1000 |
|
---|
1001 | // All dictionary sizes are OK here. LZ decoder will take care of
|
---|
1002 | // the special cases.
|
---|
1003 | lz_options->dict_size = options->dict_size;
|
---|
1004 | lz_options->preset_dict = options->preset_dict;
|
---|
1005 | lz_options->preset_dict_size = options->preset_dict_size;
|
---|
1006 |
|
---|
1007 | return LZMA_OK;
|
---|
1008 | }
|
---|
1009 |
|
---|
1010 |
|
---|
1011 | /// Allocate and initialize LZMA decoder. This is used only via LZ
|
---|
1012 | /// initialization (lzma_lzma_decoder_init() passes function pointer to
|
---|
1013 | /// the LZ initialization).
|
---|
1014 | static lzma_ret
|
---|
1015 | lzma_decoder_init(lzma_lz_decoder *lz, const lzma_allocator *allocator,
|
---|
1016 | lzma_vli id, const void *options, lzma_lz_options *lz_options)
|
---|
1017 | {
|
---|
1018 | if (!is_lclppb_valid(options))
|
---|
1019 | return LZMA_PROG_ERROR;
|
---|
1020 |
|
---|
1021 | lzma_vli uncomp_size = LZMA_VLI_UNKNOWN;
|
---|
1022 | bool allow_eopm = true;
|
---|
1023 |
|
---|
1024 | if (id == LZMA_FILTER_LZMA1EXT) {
|
---|
1025 | const lzma_options_lzma *opt = options;
|
---|
1026 |
|
---|
1027 | // Only one flag is supported.
|
---|
1028 | if (opt->ext_flags & ~LZMA_LZMA1EXT_ALLOW_EOPM)
|
---|
1029 | return LZMA_OPTIONS_ERROR;
|
---|
1030 |
|
---|
1031 | // FIXME? Using lzma_vli instead of uint64_t is weird because
|
---|
1032 | // this has nothing to do with .xz headers and variable-length
|
---|
1033 | // integer encoding. On the other hand, using LZMA_VLI_UNKNOWN
|
---|
1034 | // instead of UINT64_MAX is clearer when unknown size is
|
---|
1035 | // meant. A problem with using lzma_vli is that now we
|
---|
1036 | // allow > LZMA_VLI_MAX which is fine in this file but
|
---|
1037 | // it's still confusing. Note that alone_decoder.c also
|
---|
1038 | // allows > LZMA_VLI_MAX when setting uncompressed size.
|
---|
1039 | uncomp_size = opt->ext_size_low
|
---|
1040 | + ((uint64_t)(opt->ext_size_high) << 32);
|
---|
1041 | allow_eopm = (opt->ext_flags & LZMA_LZMA1EXT_ALLOW_EOPM) != 0
|
---|
1042 | || uncomp_size == LZMA_VLI_UNKNOWN;
|
---|
1043 | }
|
---|
1044 |
|
---|
1045 | return_if_error(lzma_lzma_decoder_create(
|
---|
1046 | lz, allocator, options, lz_options));
|
---|
1047 |
|
---|
1048 | lzma_decoder_reset(lz->coder, options);
|
---|
1049 | lzma_decoder_uncompressed(lz->coder, uncomp_size, allow_eopm);
|
---|
1050 |
|
---|
1051 | return LZMA_OK;
|
---|
1052 | }
|
---|
1053 |
|
---|
1054 |
|
---|
1055 | extern lzma_ret
|
---|
1056 | lzma_lzma_decoder_init(lzma_next_coder *next, const lzma_allocator *allocator,
|
---|
1057 | const lzma_filter_info *filters)
|
---|
1058 | {
|
---|
1059 | // LZMA can only be the last filter in the chain. This is enforced
|
---|
1060 | // by the raw_decoder initialization.
|
---|
1061 | assert(filters[1].init == NULL);
|
---|
1062 |
|
---|
1063 | return lzma_lz_decoder_init(next, allocator, filters,
|
---|
1064 | &lzma_decoder_init);
|
---|
1065 | }
|
---|
1066 |
|
---|
1067 |
|
---|
1068 | extern bool
|
---|
1069 | lzma_lzma_lclppb_decode(lzma_options_lzma *options, uint8_t byte)
|
---|
1070 | {
|
---|
1071 | if (byte > (4 * 5 + 4) * 9 + 8)
|
---|
1072 | return true;
|
---|
1073 |
|
---|
1074 | // See the file format specification to understand this.
|
---|
1075 | options->pb = byte / (9 * 5);
|
---|
1076 | byte -= options->pb * 9 * 5;
|
---|
1077 | options->lp = byte / 9;
|
---|
1078 | options->lc = byte - options->lp * 9;
|
---|
1079 |
|
---|
1080 | return options->lc + options->lp > LZMA_LCLP_MAX;
|
---|
1081 | }
|
---|
1082 |
|
---|
1083 |
|
---|
1084 | extern uint64_t
|
---|
1085 | lzma_lzma_decoder_memusage_nocheck(const void *options)
|
---|
1086 | {
|
---|
1087 | const lzma_options_lzma *const opt = options;
|
---|
1088 | return sizeof(lzma_lzma1_decoder)
|
---|
1089 | + lzma_lz_decoder_memusage(opt->dict_size);
|
---|
1090 | }
|
---|
1091 |
|
---|
1092 |
|
---|
1093 | extern uint64_t
|
---|
1094 | lzma_lzma_decoder_memusage(const void *options)
|
---|
1095 | {
|
---|
1096 | if (!is_lclppb_valid(options))
|
---|
1097 | return UINT64_MAX;
|
---|
1098 |
|
---|
1099 | return lzma_lzma_decoder_memusage_nocheck(options);
|
---|
1100 | }
|
---|
1101 |
|
---|
1102 |
|
---|
1103 | extern lzma_ret
|
---|
1104 | lzma_lzma_props_decode(void **options, const lzma_allocator *allocator,
|
---|
1105 | const uint8_t *props, size_t props_size)
|
---|
1106 | {
|
---|
1107 | if (props_size != 5)
|
---|
1108 | return LZMA_OPTIONS_ERROR;
|
---|
1109 |
|
---|
1110 | lzma_options_lzma *opt
|
---|
1111 | = lzma_alloc(sizeof(lzma_options_lzma), allocator);
|
---|
1112 | if (opt == NULL)
|
---|
1113 | return LZMA_MEM_ERROR;
|
---|
1114 |
|
---|
1115 | if (lzma_lzma_lclppb_decode(opt, props[0]))
|
---|
1116 | goto error;
|
---|
1117 |
|
---|
1118 | // All dictionary sizes are accepted, including zero. LZ decoder
|
---|
1119 | // will automatically use a dictionary at least a few KiB even if
|
---|
1120 | // a smaller dictionary is requested.
|
---|
1121 | opt->dict_size = read32le(props + 1);
|
---|
1122 |
|
---|
1123 | opt->preset_dict = NULL;
|
---|
1124 | opt->preset_dict_size = 0;
|
---|
1125 |
|
---|
1126 | *options = opt;
|
---|
1127 |
|
---|
1128 | return LZMA_OK;
|
---|
1129 |
|
---|
1130 | error:
|
---|
1131 | lzma_free(opt, allocator);
|
---|
1132 | return LZMA_OPTIONS_ERROR;
|
---|
1133 | }
|
---|