VirtualBox

source: vbox/trunk/src/libs/liblzma-5.8.1/common/file_info.c@ 108911

Last change on this file since 108911 was 108911, checked in by vboxsync, 4 weeks ago

libs/liblzma: Applied and adjusted our liblzma changes to 5.8.1 and export to OSE. jiraref:VBP-1635

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
  • Property sync-process set to export
File size: 27.9 KB
Line 
1// SPDX-License-Identifier: 0BSD
2
3///////////////////////////////////////////////////////////////////////////////
4//
5/// \file file_info.c
6/// \brief Decode .xz file information into a lzma_index structure
7//
8// Author: Lasse Collin
9//
10///////////////////////////////////////////////////////////////////////////////
11
12#include "index_decoder.h"
13
14
15typedef struct {
16 enum {
17 SEQ_MAGIC_BYTES,
18 SEQ_PADDING_SEEK,
19 SEQ_PADDING_DECODE,
20 SEQ_FOOTER,
21 SEQ_INDEX_INIT,
22 SEQ_INDEX_DECODE,
23 SEQ_HEADER_DECODE,
24 SEQ_HEADER_COMPARE,
25 } sequence;
26
27 /// Absolute position of in[*in_pos] in the file. All code that
28 /// modifies *in_pos also updates this. seek_to_pos() needs this
29 /// to determine if we need to request the application to seek for
30 /// us or if we can do the seeking internally by adjusting *in_pos.
31 uint64_t file_cur_pos;
32
33 /// This refers to absolute positions of interesting parts of the
34 /// input file. Sometimes it points to the *beginning* of a specific
35 /// field and sometimes to the *end* of a field. The current target
36 /// position at each moment is explained in the comments.
37 uint64_t file_target_pos;
38
39 /// Size of the .xz file (from the application).
40 uint64_t file_size;
41
42 /// Index decoder
43 lzma_next_coder index_decoder;
44
45 /// Number of bytes remaining in the Index field that is currently
46 /// being decoded.
47 lzma_vli index_remaining;
48
49 /// The Index decoder will store the decoded Index in this pointer.
50 lzma_index *this_index;
51
52 /// Amount of Stream Padding in the current Stream.
53 lzma_vli stream_padding;
54
55 /// The final combined index is collected here.
56 lzma_index *combined_index;
57
58 /// Pointer from the application where to store the index information
59 /// after successful decoding.
60 lzma_index **dest_index;
61
62 /// Pointer to lzma_stream.seek_pos to be used when returning
63 /// LZMA_SEEK_NEEDED. This is set by seek_to_pos() when needed.
64 uint64_t *external_seek_pos;
65
66 /// Memory usage limit
67 uint64_t memlimit;
68
69 /// Stream Flags from the very beginning of the file.
70 lzma_stream_flags first_header_flags;
71
72 /// Stream Flags from Stream Header of the current Stream.
73 lzma_stream_flags header_flags;
74
75 /// Stream Flags from Stream Footer of the current Stream.
76 lzma_stream_flags footer_flags;
77
78 size_t temp_pos;
79 size_t temp_size;
80 uint8_t temp[8192];
81
82} lzma_file_info_coder;
83
84
85/// Copies data from in[*in_pos] into coder->temp until
86/// coder->temp_pos == coder->temp_size. This also keeps coder->file_cur_pos
87/// in sync with *in_pos. Returns true if more input is needed.
88static bool
89fill_temp(lzma_file_info_coder *coder, const uint8_t *restrict in,
90 size_t *restrict in_pos, size_t in_size)
91{
92 coder->file_cur_pos += lzma_bufcpy(in, in_pos, in_size,
93 coder->temp, &coder->temp_pos, coder->temp_size);
94 return coder->temp_pos < coder->temp_size;
95}
96
97
98/// Seeks to the absolute file position specified by target_pos.
99/// This tries to do the seeking by only modifying *in_pos, if possible.
100/// The main benefit of this is that if one passes the whole file at once
101/// to lzma_code(), the decoder will never need to return LZMA_SEEK_NEEDED
102/// as all the seeking can be done by adjusting *in_pos in this function.
103///
104/// Returns true if an external seek is needed and the caller must return
105/// LZMA_SEEK_NEEDED.
106static bool
107seek_to_pos(lzma_file_info_coder *coder, uint64_t target_pos,
108 size_t in_start, size_t *in_pos, size_t in_size)
109{
110 // The input buffer doesn't extend beyond the end of the file.
111 // This has been checked by file_info_decode() already.
112 assert(coder->file_size - coder->file_cur_pos >= in_size - *in_pos);
113
114 const uint64_t pos_min = coder->file_cur_pos - (*in_pos - in_start);
115 const uint64_t pos_max = coder->file_cur_pos + (in_size - *in_pos);
116
117 bool external_seek_needed;
118
119 if (target_pos >= pos_min && target_pos <= pos_max) {
120 // The requested position is available in the current input
121 // buffer or right after it. That is, in a corner case we
122 // end up setting *in_pos == in_size and thus will immediately
123 // need new input bytes from the application.
124 *in_pos += (size_t)(target_pos - coder->file_cur_pos);
125 external_seek_needed = false;
126 } else {
127 // Ask the application to seek the input file.
128 *coder->external_seek_pos = target_pos;
129 external_seek_needed = true;
130
131 // Mark the whole input buffer as used. This way
132 // lzma_stream.total_in will have a better estimate
133 // of the amount of data read. It still won't be perfect
134 // as the value will depend on the input buffer size that
135 // the application uses, but it should be good enough for
136 // those few who want an estimate.
137 *in_pos = in_size;
138 }
139
140 // After seeking (internal or external) the current position
141 // will match the requested target position.
142 coder->file_cur_pos = target_pos;
143
144 return external_seek_needed;
145}
146
147
148/// The caller sets coder->file_target_pos so that it points to the *end*
149/// of the desired file position. This function then determines how far
150/// backwards from that position we can seek. After seeking fill_temp()
151/// can be used to read data into coder->temp. When fill_temp() has finished,
152/// coder->temp[coder->temp_size] will match coder->file_target_pos.
153///
154/// This also validates that coder->target_file_pos is sane in sense that
155/// we aren't trying to seek too far backwards (too close or beyond the
156/// beginning of the file).
157static lzma_ret
158reverse_seek(lzma_file_info_coder *coder,
159 size_t in_start, size_t *in_pos, size_t in_size)
160{
161 // Check that there is enough data before the target position
162 // to contain at least Stream Header and Stream Footer. If there
163 // isn't, the file cannot be valid.
164 if (coder->file_target_pos < 2 * LZMA_STREAM_HEADER_SIZE)
165 return LZMA_DATA_ERROR;
166
167 coder->temp_pos = 0;
168
169 // The Stream Header at the very beginning of the file gets handled
170 // specially in SEQ_MAGIC_BYTES and thus we will never need to seek
171 // there. By not seeking to the first LZMA_STREAM_HEADER_SIZE bytes
172 // we avoid a useless external seek after SEQ_MAGIC_BYTES if the
173 // application uses an extremely small input buffer and the input
174 // file is very small.
175 if (coder->file_target_pos - LZMA_STREAM_HEADER_SIZE
176 < sizeof(coder->temp))
177 coder->temp_size = (size_t)(coder->file_target_pos
178 - LZMA_STREAM_HEADER_SIZE);
179 else
180 coder->temp_size = sizeof(coder->temp);
181
182 // The above if-statements guarantee this. This is important because
183 // the Stream Header/Footer decoders assume that there's at least
184 // LZMA_STREAM_HEADER_SIZE bytes in coder->temp.
185 assert(coder->temp_size >= LZMA_STREAM_HEADER_SIZE);
186
187 if (seek_to_pos(coder, coder->file_target_pos - coder->temp_size,
188 in_start, in_pos, in_size))
189 return LZMA_SEEK_NEEDED;
190
191 return LZMA_OK;
192}
193
194
195/// Gets the number of zero-bytes at the end of the buffer.
196static size_t
197get_padding_size(const uint8_t *buf, size_t buf_size)
198{
199 size_t padding = 0;
200 while (buf_size > 0 && buf[--buf_size] == 0x00)
201 ++padding;
202
203 return padding;
204}
205
206
207/// With the Stream Header at the very beginning of the file, LZMA_FORMAT_ERROR
208/// is used to tell the application that Magic Bytes didn't match. In other
209/// Stream Header/Footer fields (in the middle/end of the file) it could be
210/// a bit confusing to return LZMA_FORMAT_ERROR as we already know that there
211/// is a valid Stream Header at the beginning of the file. For those cases
212/// this function is used to convert LZMA_FORMAT_ERROR to LZMA_DATA_ERROR.
213static lzma_ret
214hide_format_error(lzma_ret ret)
215{
216 if (ret == LZMA_FORMAT_ERROR)
217 ret = LZMA_DATA_ERROR;
218
219 return ret;
220}
221
222
223/// Calls the Index decoder and updates coder->index_remaining.
224/// This is a separate function because the input can be either directly
225/// from the application or from coder->temp.
226static lzma_ret
227decode_index(lzma_file_info_coder *coder, const lzma_allocator *allocator,
228 const uint8_t *restrict in, size_t *restrict in_pos,
229 size_t in_size, bool update_file_cur_pos)
230{
231 const size_t in_start = *in_pos;
232
233 const lzma_ret ret = coder->index_decoder.code(
234 coder->index_decoder.coder,
235 allocator, in, in_pos, in_size,
236 NULL, NULL, 0, LZMA_RUN);
237
238 coder->index_remaining -= *in_pos - in_start;
239
240 if (update_file_cur_pos)
241 coder->file_cur_pos += *in_pos - in_start;
242
243 return ret;
244}
245
246
247static lzma_ret
248file_info_decode(void *coder_ptr, const lzma_allocator *allocator,
249 const uint8_t *restrict in, size_t *restrict in_pos,
250 size_t in_size,
251 uint8_t *restrict out lzma_attribute((__unused__)),
252 size_t *restrict out_pos lzma_attribute((__unused__)),
253 size_t out_size lzma_attribute((__unused__)),
254 lzma_action action lzma_attribute((__unused__)))
255{
256 lzma_file_info_coder *coder = coder_ptr;
257 const size_t in_start = *in_pos;
258
259 // If the caller provides input past the end of the file, trim
260 // the extra bytes from the buffer so that we won't read too far.
261 assert(coder->file_size >= coder->file_cur_pos);
262 if (coder->file_size - coder->file_cur_pos < in_size - in_start)
263 in_size = in_start
264 + (size_t)(coder->file_size - coder->file_cur_pos);
265
266 while (true)
267 switch (coder->sequence) {
268 case SEQ_MAGIC_BYTES:
269 // Decode the Stream Header at the beginning of the file
270 // first to check if the Magic Bytes match. The flags
271 // are stored in coder->first_header_flags so that we
272 // don't need to seek to it again.
273 //
274 // Check that the file is big enough to contain at least
275 // Stream Header.
276 if (coder->file_size < LZMA_STREAM_HEADER_SIZE)
277 return LZMA_FORMAT_ERROR;
278
279 // Read the Stream Header field into coder->temp.
280 if (fill_temp(coder, in, in_pos, in_size))
281 return LZMA_OK;
282
283 // This is the only Stream Header/Footer decoding where we
284 // want to return LZMA_FORMAT_ERROR if the Magic Bytes don't
285 // match. Elsewhere it will be converted to LZMA_DATA_ERROR.
286 return_if_error(lzma_stream_header_decode(
287 &coder->first_header_flags, coder->temp));
288
289 // Now that we know that the Magic Bytes match, check the
290 // file size. It's better to do this here after checking the
291 // Magic Bytes since this way we can give LZMA_FORMAT_ERROR
292 // instead of LZMA_DATA_ERROR when the Magic Bytes don't
293 // match in a file that is too big or isn't a multiple of
294 // four bytes.
295 if (coder->file_size > LZMA_VLI_MAX || (coder->file_size & 3))
296 return LZMA_DATA_ERROR;
297
298 // Start looking for Stream Padding and Stream Footer
299 // at the end of the file.
300 coder->file_target_pos = coder->file_size;
301 FALLTHROUGH;
302
303 case SEQ_PADDING_SEEK:
304 coder->sequence = SEQ_PADDING_DECODE;
305 return_if_error(reverse_seek(
306 coder, in_start, in_pos, in_size));
307 FALLTHROUGH;
308
309 case SEQ_PADDING_DECODE: {
310 // Copy to coder->temp first. This keeps the code simpler if
311 // the application only provides input a few bytes at a time.
312 if (fill_temp(coder, in, in_pos, in_size))
313 return LZMA_OK;
314
315 // Scan the buffer backwards to get the size of the
316 // Stream Padding field (if any).
317 const size_t new_padding = get_padding_size(
318 coder->temp, coder->temp_size);
319 coder->stream_padding += new_padding;
320
321 // Set the target position to the beginning of Stream Padding
322 // that has been observed so far. If all Stream Padding has
323 // been seen, then the target position will be at the end
324 // of the Stream Footer field.
325 coder->file_target_pos -= new_padding;
326
327 if (new_padding == coder->temp_size) {
328 // The whole buffer was padding. Seek backwards in
329 // the file to get more input.
330 coder->sequence = SEQ_PADDING_SEEK;
331 break;
332 }
333
334 // Size of Stream Padding must be a multiple of 4 bytes.
335 if (coder->stream_padding & 3)
336 return LZMA_DATA_ERROR;
337
338 coder->sequence = SEQ_FOOTER;
339
340 // Calculate the amount of non-padding data in coder->temp.
341 coder->temp_size -= new_padding;
342 coder->temp_pos = coder->temp_size;
343
344 // We can avoid an external seek if the whole Stream Footer
345 // is already in coder->temp. In that case SEQ_FOOTER won't
346 // read more input and will find the Stream Footer from
347 // coder->temp[coder->temp_size - LZMA_STREAM_HEADER_SIZE].
348 //
349 // Otherwise we will need to seek. The seeking is done so
350 // that Stream Footer will be at the end of coder->temp.
351 // This way it's likely that we also get a complete Index
352 // field into coder->temp without needing a separate seek
353 // for that (unless the Index field is big).
354 if (coder->temp_size < LZMA_STREAM_HEADER_SIZE)
355 return_if_error(reverse_seek(
356 coder, in_start, in_pos, in_size));
357
358 FALLTHROUGH;
359 }
360
361 case SEQ_FOOTER:
362 // Copy the Stream Footer field into coder->temp.
363 // If Stream Footer was already available in coder->temp
364 // in SEQ_PADDING_DECODE, then this does nothing.
365 if (fill_temp(coder, in, in_pos, in_size))
366 return LZMA_OK;
367
368 // Make coder->file_target_pos and coder->temp_size point
369 // to the beginning of Stream Footer and thus to the end
370 // of the Index field. coder->temp_pos will be updated
371 // a bit later.
372 coder->file_target_pos -= LZMA_STREAM_HEADER_SIZE;
373 coder->temp_size -= LZMA_STREAM_HEADER_SIZE;
374
375 // Decode Stream Footer.
376 return_if_error(hide_format_error(lzma_stream_footer_decode(
377 &coder->footer_flags,
378 coder->temp + coder->temp_size)));
379
380 // Check that we won't seek past the beginning of the file.
381 //
382 // LZMA_STREAM_HEADER_SIZE is added because there must be
383 // space for Stream Header too even though we won't seek
384 // there before decoding the Index field.
385 //
386 // There's no risk of integer overflow here because
387 // Backward Size cannot be greater than 2^34.
388 if (coder->file_target_pos < coder->footer_flags.backward_size
389 + LZMA_STREAM_HEADER_SIZE)
390 return LZMA_DATA_ERROR;
391
392 // Set the target position to the beginning of the Index field.
393 coder->file_target_pos -= coder->footer_flags.backward_size;
394 coder->sequence = SEQ_INDEX_INIT;
395
396 // We can avoid an external seek if the whole Index field is
397 // already available in coder->temp.
398 if (coder->temp_size >= coder->footer_flags.backward_size) {
399 // Set coder->temp_pos to point to the beginning
400 // of the Index.
401 coder->temp_pos = coder->temp_size
402 - coder->footer_flags.backward_size;
403 } else {
404 // These are set to zero to indicate that there's no
405 // useful data (Index or anything else) in coder->temp.
406 coder->temp_pos = 0;
407 coder->temp_size = 0;
408
409 // Seek to the beginning of the Index field.
410 if (seek_to_pos(coder, coder->file_target_pos,
411 in_start, in_pos, in_size))
412 return LZMA_SEEK_NEEDED;
413 }
414
415 FALLTHROUGH;
416
417 case SEQ_INDEX_INIT: {
418 // Calculate the amount of memory already used by the earlier
419 // Indexes so that we know how big memory limit to pass to
420 // the Index decoder.
421 //
422 // NOTE: When there are multiple Streams, the separate
423 // lzma_index structures can use more RAM (as measured by
424 // lzma_index_memused()) than the final combined lzma_index.
425 // Thus memlimit may need to be slightly higher than the final
426 // calculated memory usage will be. This is perhaps a bit
427 // confusing to the application, but I think it shouldn't
428 // cause problems in practice.
429 uint64_t memused = 0;
430 if (coder->combined_index != NULL) {
431 memused = lzma_index_memused(coder->combined_index);
432 assert(memused <= coder->memlimit);
433 if (memused > coder->memlimit) // Extra sanity check
434 return LZMA_PROG_ERROR;
435 }
436
437 // Initialize the Index decoder.
438 return_if_error(lzma_index_decoder_init(
439 &coder->index_decoder, allocator,
440 &coder->this_index,
441 coder->memlimit - memused));
442
443 coder->index_remaining = coder->footer_flags.backward_size;
444 coder->sequence = SEQ_INDEX_DECODE;
445 FALLTHROUGH;
446 }
447
448 case SEQ_INDEX_DECODE: {
449 // Decode (a part of) the Index. If the whole Index is already
450 // in coder->temp, read it from there. Otherwise read from
451 // in[*in_pos] onwards. Note that index_decode() updates
452 // coder->index_remaining and optionally coder->file_cur_pos.
453 lzma_ret ret;
454 if (coder->temp_size != 0) {
455 assert(coder->temp_size - coder->temp_pos
456 == coder->index_remaining);
457 ret = decode_index(coder, allocator, coder->temp,
458 &coder->temp_pos, coder->temp_size,
459 false);
460 } else {
461 // Don't give the decoder more input than the known
462 // remaining size of the Index field.
463 size_t in_stop = in_size;
464 if (in_size - *in_pos > coder->index_remaining)
465 in_stop = *in_pos
466 + (size_t)(coder->index_remaining);
467
468 ret = decode_index(coder, allocator,
469 in, in_pos, in_stop, true);
470 }
471
472 switch (ret) {
473 case LZMA_OK:
474 // If the Index docoder asks for more input when we
475 // have already given it as much input as Backward Size
476 // indicated, the file is invalid.
477 if (coder->index_remaining == 0)
478 return LZMA_DATA_ERROR;
479
480 // We cannot get here if we were reading Index from
481 // coder->temp because when reading from coder->temp
482 // we give the Index decoder exactly
483 // coder->index_remaining bytes of input.
484 assert(coder->temp_size == 0);
485
486 return LZMA_OK;
487
488 case LZMA_STREAM_END:
489 // If the decoding seems to be successful, check also
490 // that the Index decoder consumed as much input as
491 // indicated by the Backward Size field.
492 if (coder->index_remaining != 0)
493 return LZMA_DATA_ERROR;
494
495 break;
496
497 default:
498 return ret;
499 }
500
501 // Calculate how much the Index tells us to seek backwards
502 // (relative to the beginning of the Index): Total size of
503 // all Blocks plus the size of the Stream Header field.
504 // No integer overflow here because lzma_index_total_size()
505 // cannot return a value greater than LZMA_VLI_MAX.
506 const uint64_t seek_amount
507 = lzma_index_total_size(coder->this_index)
508 + LZMA_STREAM_HEADER_SIZE;
509
510 // Check that Index is sane in sense that seek_amount won't
511 // make us seek past the beginning of the file when locating
512 // the Stream Header.
513 //
514 // coder->file_target_pos still points to the beginning of
515 // the Index field.
516 if (coder->file_target_pos < seek_amount)
517 return LZMA_DATA_ERROR;
518
519 // Set the target to the beginning of Stream Header.
520 coder->file_target_pos -= seek_amount;
521
522 if (coder->file_target_pos == 0) {
523 // We would seek to the beginning of the file, but
524 // since we already decoded that Stream Header in
525 // SEQ_MAGIC_BYTES, we can use the cached value from
526 // coder->first_header_flags to avoid the seek.
527 coder->header_flags = coder->first_header_flags;
528 coder->sequence = SEQ_HEADER_COMPARE;
529 break;
530 }
531
532 coder->sequence = SEQ_HEADER_DECODE;
533
534 // Make coder->file_target_pos point to the end of
535 // the Stream Header field.
536 coder->file_target_pos += LZMA_STREAM_HEADER_SIZE;
537
538 // If coder->temp_size is non-zero, it points to the end
539 // of the Index field. Then the beginning of the Index
540 // field is at coder->temp[coder->temp_size
541 // - coder->footer_flags.backward_size].
542 assert(coder->temp_size == 0 || coder->temp_size
543 >= coder->footer_flags.backward_size);
544
545 // If coder->temp contained the whole Index, see if it has
546 // enough data to contain also the Stream Header. If so,
547 // we avoid an external seek.
548 //
549 // NOTE: This can happen only with small .xz files and only
550 // for the non-first Stream as the Stream Flags of the first
551 // Stream are cached and already handled a few lines above.
552 // So this isn't as useful as the other seek-avoidance cases.
553 if (coder->temp_size != 0 && coder->temp_size
554 - coder->footer_flags.backward_size
555 >= seek_amount) {
556 // Make temp_pos and temp_size point to the *end* of
557 // Stream Header so that SEQ_HEADER_DECODE will find
558 // the start of Stream Header from coder->temp[
559 // coder->temp_size - LZMA_STREAM_HEADER_SIZE].
560 coder->temp_pos = coder->temp_size
561 - coder->footer_flags.backward_size
562 - seek_amount
563 + LZMA_STREAM_HEADER_SIZE;
564 coder->temp_size = coder->temp_pos;
565 } else {
566 // Seek so that Stream Header will be at the end of
567 // coder->temp. With typical multi-Stream files we
568 // will usually also get the Stream Footer and Index
569 // of the *previous* Stream in coder->temp and thus
570 // won't need a separate seek for them.
571 return_if_error(reverse_seek(coder,
572 in_start, in_pos, in_size));
573 }
574
575 FALLTHROUGH;
576 }
577
578 case SEQ_HEADER_DECODE:
579 // Copy the Stream Header field into coder->temp.
580 // If Stream Header was already available in coder->temp
581 // in SEQ_INDEX_DECODE, then this does nothing.
582 if (fill_temp(coder, in, in_pos, in_size))
583 return LZMA_OK;
584
585 // Make all these point to the beginning of Stream Header.
586 coder->file_target_pos -= LZMA_STREAM_HEADER_SIZE;
587 coder->temp_size -= LZMA_STREAM_HEADER_SIZE;
588 coder->temp_pos = coder->temp_size;
589
590 // Decode the Stream Header.
591 return_if_error(hide_format_error(lzma_stream_header_decode(
592 &coder->header_flags,
593 coder->temp + coder->temp_size)));
594
595 coder->sequence = SEQ_HEADER_COMPARE;
596 FALLTHROUGH;
597
598 case SEQ_HEADER_COMPARE:
599 // Compare Stream Header against Stream Footer. They must
600 // match.
601 return_if_error(lzma_stream_flags_compare(
602 &coder->header_flags, &coder->footer_flags));
603
604 // Store the decoded Stream Flags into the Index. Use the
605 // Footer Flags because it contains Backward Size, although
606 // it shouldn't matter in practice.
607 if (lzma_index_stream_flags(coder->this_index,
608 &coder->footer_flags) != LZMA_OK)
609 return LZMA_PROG_ERROR;
610
611 // Store also the size of the Stream Padding field. It is
612 // needed to calculate the offsets of the Streams correctly.
613 if (lzma_index_stream_padding(coder->this_index,
614 coder->stream_padding) != LZMA_OK)
615 return LZMA_PROG_ERROR;
616
617 // Reset it so that it's ready for the next Stream.
618 coder->stream_padding = 0;
619
620 // Append the earlier decoded Indexes after this_index.
621 if (coder->combined_index != NULL)
622 return_if_error(lzma_index_cat(coder->this_index,
623 coder->combined_index, allocator));
624
625 coder->combined_index = coder->this_index;
626 coder->this_index = NULL;
627
628 // If the whole file was decoded, tell the caller that we
629 // are finished.
630 if (coder->file_target_pos == 0) {
631 // The combined index must indicate the same file
632 // size as was told to us at initialization.
633 assert(lzma_index_file_size(coder->combined_index)
634 == coder->file_size);
635
636 // Make the combined index available to
637 // the application.
638 *coder->dest_index = coder->combined_index;
639 coder->combined_index = NULL;
640
641 // Mark the input buffer as used since we may have
642 // done internal seeking and thus don't know how
643 // many input bytes were actually used. This way
644 // lzma_stream.total_in gets a slightly better
645 // estimate of the amount of input used.
646 *in_pos = in_size;
647 return LZMA_STREAM_END;
648 }
649
650 // We didn't hit the beginning of the file yet, so continue
651 // reading backwards in the file. If we have unprocessed
652 // data in coder->temp, use it before requesting more data
653 // from the application.
654 //
655 // coder->file_target_pos, coder->temp_size, and
656 // coder->temp_pos all point to the beginning of Stream Header
657 // and thus the end of the previous Stream in the file.
658 coder->sequence = coder->temp_size > 0
659 ? SEQ_PADDING_DECODE : SEQ_PADDING_SEEK;
660 break;
661
662 default:
663 assert(0);
664 return LZMA_PROG_ERROR;
665 }
666}
667
668
669static lzma_ret
670file_info_decoder_memconfig(void *coder_ptr, uint64_t *memusage,
671 uint64_t *old_memlimit, uint64_t new_memlimit)
672{
673 lzma_file_info_coder *coder = coder_ptr;
674
675 // The memory usage calculation comes from three things:
676 //
677 // (1) The Indexes that have already been decoded and processed into
678 // coder->combined_index.
679 //
680 // (2) The latest Index in coder->this_index that has been decoded but
681 // not yet put into coder->combined_index.
682 //
683 // (3) The latest Index that we have started decoding but haven't
684 // finished and thus isn't available in coder->this_index yet.
685 // Memory usage and limit information needs to be communicated
686 // from/to coder->index_decoder.
687 //
688 // Care has to be taken to not do both (2) and (3) when calculating
689 // the memory usage.
690 uint64_t combined_index_memusage = 0;
691 uint64_t this_index_memusage = 0;
692
693 // (1) If we have already successfully decoded one or more Indexes,
694 // get their memory usage.
695 if (coder->combined_index != NULL)
696 combined_index_memusage = lzma_index_memused(
697 coder->combined_index);
698
699 // Choose between (2), (3), or neither.
700 if (coder->this_index != NULL) {
701 // (2) The latest Index is available. Use its memory usage.
702 this_index_memusage = lzma_index_memused(coder->this_index);
703
704 } else if (coder->sequence == SEQ_INDEX_DECODE) {
705 // (3) The Index decoder is activate and hasn't yet stored
706 // the new index in coder->this_index. Get the memory usage
707 // information from the Index decoder.
708 //
709 // NOTE: If the Index decoder doesn't yet know how much memory
710 // it will eventually need, it will return a tiny value here.
711 uint64_t dummy;
712 if (coder->index_decoder.memconfig(coder->index_decoder.coder,
713 &this_index_memusage, &dummy, 0)
714 != LZMA_OK) {
715 assert(0);
716 return LZMA_PROG_ERROR;
717 }
718 }
719
720 // Now we know the total memory usage/requirement. If we had neither
721 // old Indexes nor a new Index, this will be zero which isn't
722 // acceptable as lzma_memusage() has to return non-zero on success
723 // and even with an empty .xz file we will end up with a lzma_index
724 // that takes some memory.
725 *memusage = combined_index_memusage + this_index_memusage;
726 if (*memusage == 0)
727 *memusage = lzma_index_memusage(1, 0);
728
729 *old_memlimit = coder->memlimit;
730
731 // If requested, set a new memory usage limit.
732 if (new_memlimit != 0) {
733 if (new_memlimit < *memusage)
734 return LZMA_MEMLIMIT_ERROR;
735
736 // In the condition (3) we need to tell the Index decoder
737 // its new memory usage limit.
738 if (coder->this_index == NULL
739 && coder->sequence == SEQ_INDEX_DECODE) {
740 const uint64_t idec_new_memlimit = new_memlimit
741 - combined_index_memusage;
742
743 assert(this_index_memusage > 0);
744 assert(idec_new_memlimit > 0);
745
746 uint64_t dummy1;
747 uint64_t dummy2;
748
749 if (coder->index_decoder.memconfig(
750 coder->index_decoder.coder,
751 &dummy1, &dummy2, idec_new_memlimit)
752 != LZMA_OK) {
753 assert(0);
754 return LZMA_PROG_ERROR;
755 }
756 }
757
758 coder->memlimit = new_memlimit;
759 }
760
761 return LZMA_OK;
762}
763
764
765static void
766file_info_decoder_end(void *coder_ptr, const lzma_allocator *allocator)
767{
768 lzma_file_info_coder *coder = coder_ptr;
769
770 lzma_next_end(&coder->index_decoder, allocator);
771 lzma_index_end(coder->this_index, allocator);
772 lzma_index_end(coder->combined_index, allocator);
773
774 lzma_free(coder, allocator);
775 return;
776}
777
778
779static lzma_ret
780lzma_file_info_decoder_init(lzma_next_coder *next,
781 const lzma_allocator *allocator, uint64_t *seek_pos,
782 lzma_index **dest_index,
783 uint64_t memlimit, uint64_t file_size)
784{
785 lzma_next_coder_init(&lzma_file_info_decoder_init, next, allocator);
786
787 if (dest_index == NULL)
788 return LZMA_PROG_ERROR;
789
790 lzma_file_info_coder *coder = next->coder;
791 if (coder == NULL) {
792 coder = lzma_alloc(sizeof(lzma_file_info_coder), allocator);
793 if (coder == NULL)
794 return LZMA_MEM_ERROR;
795
796 next->coder = coder;
797 next->code = &file_info_decode;
798 next->end = &file_info_decoder_end;
799 next->memconfig = &file_info_decoder_memconfig;
800
801 coder->index_decoder = LZMA_NEXT_CODER_INIT;
802 coder->this_index = NULL;
803 coder->combined_index = NULL;
804 }
805
806 coder->sequence = SEQ_MAGIC_BYTES;
807 coder->file_cur_pos = 0;
808 coder->file_target_pos = 0;
809 coder->file_size = file_size;
810
811 lzma_index_end(coder->this_index, allocator);
812 coder->this_index = NULL;
813
814 lzma_index_end(coder->combined_index, allocator);
815 coder->combined_index = NULL;
816
817 coder->stream_padding = 0;
818
819 coder->dest_index = dest_index;
820 coder->external_seek_pos = seek_pos;
821
822 // If memlimit is 0, make it 1 to ensure that lzma_memlimit_get()
823 // won't return 0 (which would indicate an error).
824 coder->memlimit = my_max(1, memlimit);
825
826 // Prepare these for reading the first Stream Header into coder->temp.
827 coder->temp_pos = 0;
828 coder->temp_size = LZMA_STREAM_HEADER_SIZE;
829
830 return LZMA_OK;
831}
832
833
834extern LZMA_API(lzma_ret)
835lzma_file_info_decoder(lzma_stream *strm, lzma_index **dest_index,
836 uint64_t memlimit, uint64_t file_size)
837{
838 lzma_next_strm_init(lzma_file_info_decoder_init, strm, &strm->seek_pos,
839 dest_index, memlimit, file_size);
840
841 // We allow LZMA_FINISH in addition to LZMA_RUN for convenience.
842 // lzma_code() is able to handle the LZMA_FINISH + LZMA_SEEK_NEEDED
843 // combination in a sane way. Applications still need to be careful
844 // if they use LZMA_FINISH so that they remember to reset it back
845 // to LZMA_RUN after seeking if needed.
846 strm->internal->supported_actions[LZMA_RUN] = true;
847 strm->internal->supported_actions[LZMA_FINISH] = true;
848
849 return LZMA_OK;
850}
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette