file_info.c@ 108911

Last change on this file since 108911 was 108911, checked in by vboxsync, 4 weeks ago
libs/liblzma: Applied and adjusted our liblzma changes to 5.8.1 and export to OSE. jiraref:VBP-1635
Property svn:eol-style set to `native` Property svn:keywords set to `Author Date Id Revision` Property sync-process set to `export`
File size: 27.9 KB

Line
1	// SPDX-License-Identifier: 0BSD
2
3	///////////////////////////////////////////////////////////////////////////////
4	//
5	/// \file file_info.c
6	/// \brief Decode .xz file information into a lzma_index structure
7	//
8	// Author: Lasse Collin
9	//
10	///////////////////////////////////////////////////////////////////////////////
11
12	#include "index_decoder.h"
13
14
15	typedef struct {
16	enum {
17	SEQ_MAGIC_BYTES,
18	SEQ_PADDING_SEEK,
19	SEQ_PADDING_DECODE,
20	SEQ_FOOTER,
21	SEQ_INDEX_INIT,
22	SEQ_INDEX_DECODE,
23	SEQ_HEADER_DECODE,
24	SEQ_HEADER_COMPARE,
25	} sequence;
26
27	/// Absolute position of in[*in_pos] in the file. All code that
28	/// modifies *in_pos also updates this. seek_to_pos() needs this
29	/// to determine if we need to request the application to seek for
30	/// us or if we can do the seeking internally by adjusting *in_pos.
31	uint64_t file_cur_pos;
32
33	/// This refers to absolute positions of interesting parts of the
34	/// input file. Sometimes it points to the beginning of a specific
35	/// field and sometimes to the end of a field. The current target
36	/// position at each moment is explained in the comments.
37	uint64_t file_target_pos;
38
39	/// Size of the .xz file (from the application).
40	uint64_t file_size;
41
42	/// Index decoder
43	lzma_next_coder index_decoder;
44
45	/// Number of bytes remaining in the Index field that is currently
46	/// being decoded.
47	lzma_vli index_remaining;
48
49	/// The Index decoder will store the decoded Index in this pointer.
50	lzma_index *this_index;
51
52	/// Amount of Stream Padding in the current Stream.
53	lzma_vli stream_padding;
54
55	/// The final combined index is collected here.
56	lzma_index *combined_index;
57
58	/// Pointer from the application where to store the index information
59	/// after successful decoding.
60	lzma_index **dest_index;
61
62	/// Pointer to lzma_stream.seek_pos to be used when returning
63	/// LZMA_SEEK_NEEDED. This is set by seek_to_pos() when needed.
64	uint64_t *external_seek_pos;
65
66	/// Memory usage limit
67	uint64_t memlimit;
68
69	/// Stream Flags from the very beginning of the file.
70	lzma_stream_flags first_header_flags;
71
72	/// Stream Flags from Stream Header of the current Stream.
73	lzma_stream_flags header_flags;
74
75	/// Stream Flags from Stream Footer of the current Stream.
76	lzma_stream_flags footer_flags;
77
78	size_t temp_pos;
79	size_t temp_size;
80	uint8_t temp[8192];
81
82	} lzma_file_info_coder;
83
84
85	/// Copies data from in[*in_pos] into coder->temp until
86	/// coder->temp_pos == coder->temp_size. This also keeps coder->file_cur_pos
87	/// in sync with *in_pos. Returns true if more input is needed.
88	static bool
89	fill_temp(lzma_file_info_coder coder, const uint8_t restrict in,
90	size_t *restrict in_pos, size_t in_size)
91	{
92	coder->file_cur_pos += lzma_bufcpy(in, in_pos, in_size,
93	coder->temp, &coder->temp_pos, coder->temp_size);
94	return coder->temp_pos < coder->temp_size;
95	}
96
97
98	/// Seeks to the absolute file position specified by target_pos.
99	/// This tries to do the seeking by only modifying *in_pos, if possible.
100	/// The main benefit of this is that if one passes the whole file at once
101	/// to lzma_code(), the decoder will never need to return LZMA_SEEK_NEEDED
102	/// as all the seeking can be done by adjusting *in_pos in this function.
103	///
104	/// Returns true if an external seek is needed and the caller must return
105	/// LZMA_SEEK_NEEDED.
106	static bool
107	seek_to_pos(lzma_file_info_coder *coder, uint64_t target_pos,
108	size_t in_start, size_t *in_pos, size_t in_size)
109	{
110	// The input buffer doesn't extend beyond the end of the file.
111	// This has been checked by file_info_decode() already.
112	assert(coder->file_size - coder->file_cur_pos >= in_size - *in_pos);
113
114	const uint64_t pos_min = coder->file_cur_pos - (*in_pos - in_start);
115	const uint64_t pos_max = coder->file_cur_pos + (in_size - *in_pos);
116
117	bool external_seek_needed;
118
119	if (target_pos >= pos_min && target_pos <= pos_max) {
120	// The requested position is available in the current input
121	// buffer or right after it. That is, in a corner case we
122	// end up setting *in_pos == in_size and thus will immediately
123	// need new input bytes from the application.
124	*in_pos += (size_t)(target_pos - coder->file_cur_pos);
125	external_seek_needed = false;
126	} else {
127	// Ask the application to seek the input file.
128	*coder->external_seek_pos = target_pos;
129	external_seek_needed = true;
130
131	// Mark the whole input buffer as used. This way
132	// lzma_stream.total_in will have a better estimate
133	// of the amount of data read. It still won't be perfect
134	// as the value will depend on the input buffer size that
135	// the application uses, but it should be good enough for
136	// those few who want an estimate.
137	*in_pos = in_size;
138	}
139
140	// After seeking (internal or external) the current position
141	// will match the requested target position.
142	coder->file_cur_pos = target_pos;
143
144	return external_seek_needed;
145	}
146
147
148	/// The caller sets coder->file_target_pos so that it points to the end
149	/// of the desired file position. This function then determines how far
150	/// backwards from that position we can seek. After seeking fill_temp()
151	/// can be used to read data into coder->temp. When fill_temp() has finished,
152	/// coder->temp[coder->temp_size] will match coder->file_target_pos.
153	///
154	/// This also validates that coder->target_file_pos is sane in sense that
155	/// we aren't trying to seek too far backwards (too close or beyond the
156	/// beginning of the file).
157	static lzma_ret
158	reverse_seek(lzma_file_info_coder *coder,
159	size_t in_start, size_t *in_pos, size_t in_size)
160	{
161	// Check that there is enough data before the target position
162	// to contain at least Stream Header and Stream Footer. If there
163	// isn't, the file cannot be valid.
164	if (coder->file_target_pos < 2 * LZMA_STREAM_HEADER_SIZE)
165	return LZMA_DATA_ERROR;
166
167	coder->temp_pos = 0;
168
169	// The Stream Header at the very beginning of the file gets handled
170	// specially in SEQ_MAGIC_BYTES and thus we will never need to seek
171	// there. By not seeking to the first LZMA_STREAM_HEADER_SIZE bytes
172	// we avoid a useless external seek after SEQ_MAGIC_BYTES if the
173	// application uses an extremely small input buffer and the input
174	// file is very small.
175	if (coder->file_target_pos - LZMA_STREAM_HEADER_SIZE
176	< sizeof(coder->temp))
177	coder->temp_size = (size_t)(coder->file_target_pos
178	- LZMA_STREAM_HEADER_SIZE);
179	else
180	coder->temp_size = sizeof(coder->temp);
181
182	// The above if-statements guarantee this. This is important because
183	// the Stream Header/Footer decoders assume that there's at least
184	// LZMA_STREAM_HEADER_SIZE bytes in coder->temp.
185	assert(coder->temp_size >= LZMA_STREAM_HEADER_SIZE);
186
187	if (seek_to_pos(coder, coder->file_target_pos - coder->temp_size,
188	in_start, in_pos, in_size))
189	return LZMA_SEEK_NEEDED;
190
191	return LZMA_OK;
192	}
193
194
195	/// Gets the number of zero-bytes at the end of the buffer.
196	static size_t
197	get_padding_size(const uint8_t *buf, size_t buf_size)
198	{
199	size_t padding = 0;
200	while (buf_size > 0 && buf[--buf_size] == 0x00)
201	++padding;
202
203	return padding;
204	}
205
206
207	/// With the Stream Header at the very beginning of the file, LZMA_FORMAT_ERROR
208	/// is used to tell the application that Magic Bytes didn't match. In other
209	/// Stream Header/Footer fields (in the middle/end of the file) it could be
210	/// a bit confusing to return LZMA_FORMAT_ERROR as we already know that there
211	/// is a valid Stream Header at the beginning of the file. For those cases
212	/// this function is used to convert LZMA_FORMAT_ERROR to LZMA_DATA_ERROR.
213	static lzma_ret
214	hide_format_error(lzma_ret ret)
215	{
216	if (ret == LZMA_FORMAT_ERROR)
217	ret = LZMA_DATA_ERROR;
218
219	return ret;
220	}
221
222
223	/// Calls the Index decoder and updates coder->index_remaining.
224	/// This is a separate function because the input can be either directly
225	/// from the application or from coder->temp.
226	static lzma_ret
227	decode_index(lzma_file_info_coder coder, const lzma_allocator allocator,
228	const uint8_t restrict in, size_t restrict in_pos,
229	size_t in_size, bool update_file_cur_pos)
230	{
231	const size_t in_start = *in_pos;
232
233	const lzma_ret ret = coder->index_decoder.code(
234	coder->index_decoder.coder,
235	allocator, in, in_pos, in_size,
236	NULL, NULL, 0, LZMA_RUN);
237
238	coder->index_remaining -= *in_pos - in_start;
239
240	if (update_file_cur_pos)
241	coder->file_cur_pos += *in_pos - in_start;
242
243	return ret;
244	}
245
246
247	static lzma_ret
248	file_info_decode(void coder_ptr, const lzma_allocator allocator,
249	const uint8_t restrict in, size_t restrict in_pos,
250	size_t in_size,
251	uint8_t *restrict out lzma_attribute((__unused__)),
252	size_t *restrict out_pos lzma_attribute((__unused__)),
253	size_t out_size lzma_attribute((__unused__)),
254	lzma_action action lzma_attribute((__unused__)))
255	{
256	lzma_file_info_coder *coder = coder_ptr;
257	const size_t in_start = *in_pos;
258
259	// If the caller provides input past the end of the file, trim
260	// the extra bytes from the buffer so that we won't read too far.
261	assert(coder->file_size >= coder->file_cur_pos);
262	if (coder->file_size - coder->file_cur_pos < in_size - in_start)
263	in_size = in_start
264	+ (size_t)(coder->file_size - coder->file_cur_pos);
265
266	while (true)
267	switch (coder->sequence) {
268	case SEQ_MAGIC_BYTES:
269	// Decode the Stream Header at the beginning of the file
270	// first to check if the Magic Bytes match. The flags
271	// are stored in coder->first_header_flags so that we
272	// don't need to seek to it again.
273	//
274	// Check that the file is big enough to contain at least
275	// Stream Header.
276	if (coder->file_size < LZMA_STREAM_HEADER_SIZE)
277	return LZMA_FORMAT_ERROR;
278
279	// Read the Stream Header field into coder->temp.
280	if (fill_temp(coder, in, in_pos, in_size))
281	return LZMA_OK;
282
283	// This is the only Stream Header/Footer decoding where we
284	// want to return LZMA_FORMAT_ERROR if the Magic Bytes don't
285	// match. Elsewhere it will be converted to LZMA_DATA_ERROR.
286	return_if_error(lzma_stream_header_decode(
287	&coder->first_header_flags, coder->temp));
288
289	// Now that we know that the Magic Bytes match, check the
290	// file size. It's better to do this here after checking the
291	// Magic Bytes since this way we can give LZMA_FORMAT_ERROR
292	// instead of LZMA_DATA_ERROR when the Magic Bytes don't
293	// match in a file that is too big or isn't a multiple of
294	// four bytes.
295	if (coder->file_size > LZMA_VLI_MAX \|\| (coder->file_size & 3))
296	return LZMA_DATA_ERROR;
297
298	// Start looking for Stream Padding and Stream Footer
299	// at the end of the file.
300	coder->file_target_pos = coder->file_size;
301	FALLTHROUGH;
302
303	case SEQ_PADDING_SEEK:
304	coder->sequence = SEQ_PADDING_DECODE;
305	return_if_error(reverse_seek(
306	coder, in_start, in_pos, in_size));
307	FALLTHROUGH;
308
309	case SEQ_PADDING_DECODE: {
310	// Copy to coder->temp first. This keeps the code simpler if
311	// the application only provides input a few bytes at a time.
312	if (fill_temp(coder, in, in_pos, in_size))
313	return LZMA_OK;
314
315	// Scan the buffer backwards to get the size of the
316	// Stream Padding field (if any).
317	const size_t new_padding = get_padding_size(
318	coder->temp, coder->temp_size);
319	coder->stream_padding += new_padding;
320
321	// Set the target position to the beginning of Stream Padding
322	// that has been observed so far. If all Stream Padding has
323	// been seen, then the target position will be at the end
324	// of the Stream Footer field.
325	coder->file_target_pos -= new_padding;
326
327	if (new_padding == coder->temp_size) {
328	// The whole buffer was padding. Seek backwards in
329	// the file to get more input.
330	coder->sequence = SEQ_PADDING_SEEK;
331	break;
332	}
333
334	// Size of Stream Padding must be a multiple of 4 bytes.
335	if (coder->stream_padding & 3)
336	return LZMA_DATA_ERROR;
337
338	coder->sequence = SEQ_FOOTER;
339
340	// Calculate the amount of non-padding data in coder->temp.
341	coder->temp_size -= new_padding;
342	coder->temp_pos = coder->temp_size;
343
344	// We can avoid an external seek if the whole Stream Footer
345	// is already in coder->temp. In that case SEQ_FOOTER won't
346	// read more input and will find the Stream Footer from
347	// coder->temp[coder->temp_size - LZMA_STREAM_HEADER_SIZE].
348	//
349	// Otherwise we will need to seek. The seeking is done so
350	// that Stream Footer will be at the end of coder->temp.
351	// This way it's likely that we also get a complete Index
352	// field into coder->temp without needing a separate seek
353	// for that (unless the Index field is big).
354	if (coder->temp_size < LZMA_STREAM_HEADER_SIZE)
355	return_if_error(reverse_seek(
356	coder, in_start, in_pos, in_size));
357
358	FALLTHROUGH;
359	}
360
361	case SEQ_FOOTER:
362	// Copy the Stream Footer field into coder->temp.
363	// If Stream Footer was already available in coder->temp
364	// in SEQ_PADDING_DECODE, then this does nothing.
365	if (fill_temp(coder, in, in_pos, in_size))
366	return LZMA_OK;
367
368	// Make coder->file_target_pos and coder->temp_size point
369	// to the beginning of Stream Footer and thus to the end
370	// of the Index field. coder->temp_pos will be updated
371	// a bit later.
372	coder->file_target_pos -= LZMA_STREAM_HEADER_SIZE;
373	coder->temp_size -= LZMA_STREAM_HEADER_SIZE;
374
375	// Decode Stream Footer.
376	return_if_error(hide_format_error(lzma_stream_footer_decode(
377	&coder->footer_flags,
378	coder->temp + coder->temp_size)));
379
380	// Check that we won't seek past the beginning of the file.
381	//
382	// LZMA_STREAM_HEADER_SIZE is added because there must be
383	// space for Stream Header too even though we won't seek
384	// there before decoding the Index field.
385	//
386	// There's no risk of integer overflow here because
387	// Backward Size cannot be greater than 2^34.
388	if (coder->file_target_pos < coder->footer_flags.backward_size
389	+ LZMA_STREAM_HEADER_SIZE)
390	return LZMA_DATA_ERROR;
391
392	// Set the target position to the beginning of the Index field.
393	coder->file_target_pos -= coder->footer_flags.backward_size;
394	coder->sequence = SEQ_INDEX_INIT;
395
396	// We can avoid an external seek if the whole Index field is
397	// already available in coder->temp.
398	if (coder->temp_size >= coder->footer_flags.backward_size) {
399	// Set coder->temp_pos to point to the beginning
400	// of the Index.
401	coder->temp_pos = coder->temp_size
402	- coder->footer_flags.backward_size;
403	} else {
404	// These are set to zero to indicate that there's no
405	// useful data (Index or anything else) in coder->temp.
406	coder->temp_pos = 0;
407	coder->temp_size = 0;
408
409	// Seek to the beginning of the Index field.
410	if (seek_to_pos(coder, coder->file_target_pos,
411	in_start, in_pos, in_size))
412	return LZMA_SEEK_NEEDED;
413	}
414
415	FALLTHROUGH;
416
417	case SEQ_INDEX_INIT: {
418	// Calculate the amount of memory already used by the earlier
419	// Indexes so that we know how big memory limit to pass to
420	// the Index decoder.
421	//
422	// NOTE: When there are multiple Streams, the separate
423	// lzma_index structures can use more RAM (as measured by
424	// lzma_index_memused()) than the final combined lzma_index.
425	// Thus memlimit may need to be slightly higher than the final
426	// calculated memory usage will be. This is perhaps a bit
427	// confusing to the application, but I think it shouldn't
428	// cause problems in practice.
429	uint64_t memused = 0;
430	if (coder->combined_index != NULL) {
431	memused = lzma_index_memused(coder->combined_index);
432	assert(memused <= coder->memlimit);
433	if (memused > coder->memlimit) // Extra sanity check
434	return LZMA_PROG_ERROR;
435	}
436
437	// Initialize the Index decoder.
438	return_if_error(lzma_index_decoder_init(
439	&coder->index_decoder, allocator,
440	&coder->this_index,
441	coder->memlimit - memused));
442
443	coder->index_remaining = coder->footer_flags.backward_size;
444	coder->sequence = SEQ_INDEX_DECODE;
445	FALLTHROUGH;
446	}
447
448	case SEQ_INDEX_DECODE: {
449	// Decode (a part of) the Index. If the whole Index is already
450	// in coder->temp, read it from there. Otherwise read from
451	// in[*in_pos] onwards. Note that index_decode() updates
452	// coder->index_remaining and optionally coder->file_cur_pos.
453	lzma_ret ret;
454	if (coder->temp_size != 0) {
455	assert(coder->temp_size - coder->temp_pos
456	== coder->index_remaining);
457	ret = decode_index(coder, allocator, coder->temp,
458	&coder->temp_pos, coder->temp_size,
459	false);
460	} else {
461	// Don't give the decoder more input than the known
462	// remaining size of the Index field.
463	size_t in_stop = in_size;
464	if (in_size - *in_pos > coder->index_remaining)
465	in_stop = *in_pos
466	+ (size_t)(coder->index_remaining);
467
468	ret = decode_index(coder, allocator,
469	in, in_pos, in_stop, true);
470	}
471
472	switch (ret) {
473	case LZMA_OK:
474	// If the Index docoder asks for more input when we
475	// have already given it as much input as Backward Size
476	// indicated, the file is invalid.
477	if (coder->index_remaining == 0)
478	return LZMA_DATA_ERROR;
479
480	// We cannot get here if we were reading Index from
481	// coder->temp because when reading from coder->temp
482	// we give the Index decoder exactly
483	// coder->index_remaining bytes of input.
484	assert(coder->temp_size == 0);
485
486	return LZMA_OK;
487
488	case LZMA_STREAM_END:
489	// If the decoding seems to be successful, check also
490	// that the Index decoder consumed as much input as
491	// indicated by the Backward Size field.
492	if (coder->index_remaining != 0)
493	return LZMA_DATA_ERROR;
494
495	break;
496
497	default:
498	return ret;
499	}
500
501	// Calculate how much the Index tells us to seek backwards
502	// (relative to the beginning of the Index): Total size of
503	// all Blocks plus the size of the Stream Header field.
504	// No integer overflow here because lzma_index_total_size()
505	// cannot return a value greater than LZMA_VLI_MAX.
506	const uint64_t seek_amount
507	= lzma_index_total_size(coder->this_index)
508	+ LZMA_STREAM_HEADER_SIZE;
509
510	// Check that Index is sane in sense that seek_amount won't
511	// make us seek past the beginning of the file when locating
512	// the Stream Header.
513	//
514	// coder->file_target_pos still points to the beginning of
515	// the Index field.
516	if (coder->file_target_pos < seek_amount)
517	return LZMA_DATA_ERROR;
518
519	// Set the target to the beginning of Stream Header.
520	coder->file_target_pos -= seek_amount;
521
522	if (coder->file_target_pos == 0) {
523	// We would seek to the beginning of the file, but
524	// since we already decoded that Stream Header in
525	// SEQ_MAGIC_BYTES, we can use the cached value from
526	// coder->first_header_flags to avoid the seek.
527	coder->header_flags = coder->first_header_flags;
528	coder->sequence = SEQ_HEADER_COMPARE;
529	break;
530	}
531
532	coder->sequence = SEQ_HEADER_DECODE;
533
534	// Make coder->file_target_pos point to the end of
535	// the Stream Header field.
536	coder->file_target_pos += LZMA_STREAM_HEADER_SIZE;
537
538	// If coder->temp_size is non-zero, it points to the end
539	// of the Index field. Then the beginning of the Index
540	// field is at coder->temp[coder->temp_size
541	// - coder->footer_flags.backward_size].
542	assert(coder->temp_size == 0 \|\| coder->temp_size
543	>= coder->footer_flags.backward_size);
544
545	// If coder->temp contained the whole Index, see if it has
546	// enough data to contain also the Stream Header. If so,
547	// we avoid an external seek.
548	//
549	// NOTE: This can happen only with small .xz files and only
550	// for the non-first Stream as the Stream Flags of the first
551	// Stream are cached and already handled a few lines above.
552	// So this isn't as useful as the other seek-avoidance cases.
553	if (coder->temp_size != 0 && coder->temp_size
554	- coder->footer_flags.backward_size
555	>= seek_amount) {
556	// Make temp_pos and temp_size point to the end of
557	// Stream Header so that SEQ_HEADER_DECODE will find
558	// the start of Stream Header from coder->temp[
559	// coder->temp_size - LZMA_STREAM_HEADER_SIZE].
560	coder->temp_pos = coder->temp_size
561	- coder->footer_flags.backward_size
562	- seek_amount
563	+ LZMA_STREAM_HEADER_SIZE;
564	coder->temp_size = coder->temp_pos;
565	} else {
566	// Seek so that Stream Header will be at the end of
567	// coder->temp. With typical multi-Stream files we
568	// will usually also get the Stream Footer and Index
569	// of the previous Stream in coder->temp and thus
570	// won't need a separate seek for them.
571	return_if_error(reverse_seek(coder,
572	in_start, in_pos, in_size));
573	}
574
575	FALLTHROUGH;
576	}
577
578	case SEQ_HEADER_DECODE:
579	// Copy the Stream Header field into coder->temp.
580	// If Stream Header was already available in coder->temp
581	// in SEQ_INDEX_DECODE, then this does nothing.
582	if (fill_temp(coder, in, in_pos, in_size))
583	return LZMA_OK;
584
585	// Make all these point to the beginning of Stream Header.
586	coder->file_target_pos -= LZMA_STREAM_HEADER_SIZE;
587	coder->temp_size -= LZMA_STREAM_HEADER_SIZE;
588	coder->temp_pos = coder->temp_size;
589
590	// Decode the Stream Header.
591	return_if_error(hide_format_error(lzma_stream_header_decode(
592	&coder->header_flags,
593	coder->temp + coder->temp_size)));
594
595	coder->sequence = SEQ_HEADER_COMPARE;
596	FALLTHROUGH;
597
598	case SEQ_HEADER_COMPARE:
599	// Compare Stream Header against Stream Footer. They must
600	// match.
601	return_if_error(lzma_stream_flags_compare(
602	&coder->header_flags, &coder->footer_flags));
603
604	// Store the decoded Stream Flags into the Index. Use the
605	// Footer Flags because it contains Backward Size, although
606	// it shouldn't matter in practice.
607	if (lzma_index_stream_flags(coder->this_index,
608	&coder->footer_flags) != LZMA_OK)
609	return LZMA_PROG_ERROR;
610
611	// Store also the size of the Stream Padding field. It is
612	// needed to calculate the offsets of the Streams correctly.
613	if (lzma_index_stream_padding(coder->this_index,
614	coder->stream_padding) != LZMA_OK)
615	return LZMA_PROG_ERROR;
616
617	// Reset it so that it's ready for the next Stream.
618	coder->stream_padding = 0;
619
620	// Append the earlier decoded Indexes after this_index.
621	if (coder->combined_index != NULL)
622	return_if_error(lzma_index_cat(coder->this_index,
623	coder->combined_index, allocator));
624
625	coder->combined_index = coder->this_index;
626	coder->this_index = NULL;
627
628	// If the whole file was decoded, tell the caller that we
629	// are finished.
630	if (coder->file_target_pos == 0) {
631	// The combined index must indicate the same file
632	// size as was told to us at initialization.
633	assert(lzma_index_file_size(coder->combined_index)
634	== coder->file_size);
635
636	// Make the combined index available to
637	// the application.
638	*coder->dest_index = coder->combined_index;
639	coder->combined_index = NULL;
640
641	// Mark the input buffer as used since we may have
642	// done internal seeking and thus don't know how
643	// many input bytes were actually used. This way
644	// lzma_stream.total_in gets a slightly better
645	// estimate of the amount of input used.
646	*in_pos = in_size;
647	return LZMA_STREAM_END;
648	}
649
650	// We didn't hit the beginning of the file yet, so continue
651	// reading backwards in the file. If we have unprocessed
652	// data in coder->temp, use it before requesting more data
653	// from the application.
654	//
655	// coder->file_target_pos, coder->temp_size, and
656	// coder->temp_pos all point to the beginning of Stream Header
657	// and thus the end of the previous Stream in the file.
658	coder->sequence = coder->temp_size > 0
659	? SEQ_PADDING_DECODE : SEQ_PADDING_SEEK;
660	break;
661
662	default:
663	assert(0);
664	return LZMA_PROG_ERROR;
665	}
666	}
667
668
669	static lzma_ret
670	file_info_decoder_memconfig(void coder_ptr, uint64_t memusage,
671	uint64_t *old_memlimit, uint64_t new_memlimit)
672	{
673	lzma_file_info_coder *coder = coder_ptr;
674
675	// The memory usage calculation comes from three things:
676	//
677	// (1) The Indexes that have already been decoded and processed into
678	// coder->combined_index.
679	//
680	// (2) The latest Index in coder->this_index that has been decoded but
681	// not yet put into coder->combined_index.
682	//
683	// (3) The latest Index that we have started decoding but haven't
684	// finished and thus isn't available in coder->this_index yet.
685	// Memory usage and limit information needs to be communicated
686	// from/to coder->index_decoder.
687	//
688	// Care has to be taken to not do both (2) and (3) when calculating
689	// the memory usage.
690	uint64_t combined_index_memusage = 0;
691	uint64_t this_index_memusage = 0;
692
693	// (1) If we have already successfully decoded one or more Indexes,
694	// get their memory usage.
695	if (coder->combined_index != NULL)
696	combined_index_memusage = lzma_index_memused(
697	coder->combined_index);
698
699	// Choose between (2), (3), or neither.
700	if (coder->this_index != NULL) {
701	// (2) The latest Index is available. Use its memory usage.
702	this_index_memusage = lzma_index_memused(coder->this_index);
703
704	} else if (coder->sequence == SEQ_INDEX_DECODE) {
705	// (3) The Index decoder is activate and hasn't yet stored
706	// the new index in coder->this_index. Get the memory usage
707	// information from the Index decoder.
708	//
709	// NOTE: If the Index decoder doesn't yet know how much memory
710	// it will eventually need, it will return a tiny value here.
711	uint64_t dummy;
712	if (coder->index_decoder.memconfig(coder->index_decoder.coder,
713	&this_index_memusage, &dummy, 0)
714	!= LZMA_OK) {
715	assert(0);
716	return LZMA_PROG_ERROR;
717	}
718	}
719
720	// Now we know the total memory usage/requirement. If we had neither
721	// old Indexes nor a new Index, this will be zero which isn't
722	// acceptable as lzma_memusage() has to return non-zero on success
723	// and even with an empty .xz file we will end up with a lzma_index
724	// that takes some memory.
725	*memusage = combined_index_memusage + this_index_memusage;
726	if (*memusage == 0)
727	*memusage = lzma_index_memusage(1, 0);
728
729	*old_memlimit = coder->memlimit;
730
731	// If requested, set a new memory usage limit.
732	if (new_memlimit != 0) {
733	if (new_memlimit < *memusage)
734	return LZMA_MEMLIMIT_ERROR;
735
736	// In the condition (3) we need to tell the Index decoder
737	// its new memory usage limit.
738	if (coder->this_index == NULL
739	&& coder->sequence == SEQ_INDEX_DECODE) {
740	const uint64_t idec_new_memlimit = new_memlimit
741	- combined_index_memusage;
742
743	assert(this_index_memusage > 0);
744	assert(idec_new_memlimit > 0);
745
746	uint64_t dummy1;
747	uint64_t dummy2;
748
749	if (coder->index_decoder.memconfig(
750	coder->index_decoder.coder,
751	&dummy1, &dummy2, idec_new_memlimit)
752	!= LZMA_OK) {
753	assert(0);
754	return LZMA_PROG_ERROR;
755	}
756	}
757
758	coder->memlimit = new_memlimit;
759	}
760
761	return LZMA_OK;
762	}
763
764
765	static void
766	file_info_decoder_end(void coder_ptr, const lzma_allocator allocator)
767	{
768	lzma_file_info_coder *coder = coder_ptr;
769
770	lzma_next_end(&coder->index_decoder, allocator);
771	lzma_index_end(coder->this_index, allocator);
772	lzma_index_end(coder->combined_index, allocator);
773
774	lzma_free(coder, allocator);
775	return;
776	}
777
778
779	static lzma_ret
780	lzma_file_info_decoder_init(lzma_next_coder *next,
781	const lzma_allocator allocator, uint64_t seek_pos,
782	lzma_index **dest_index,
783	uint64_t memlimit, uint64_t file_size)
784	{
785	lzma_next_coder_init(&lzma_file_info_decoder_init, next, allocator);
786
787	if (dest_index == NULL)
788	return LZMA_PROG_ERROR;
789
790	lzma_file_info_coder *coder = next->coder;
791	if (coder == NULL) {
792	coder = lzma_alloc(sizeof(lzma_file_info_coder), allocator);
793	if (coder == NULL)
794	return LZMA_MEM_ERROR;
795
796	next->coder = coder;
797	next->code = &file_info_decode;
798	next->end = &file_info_decoder_end;
799	next->memconfig = &file_info_decoder_memconfig;
800
801	coder->index_decoder = LZMA_NEXT_CODER_INIT;
802	coder->this_index = NULL;
803	coder->combined_index = NULL;
804	}
805
806	coder->sequence = SEQ_MAGIC_BYTES;
807	coder->file_cur_pos = 0;
808	coder->file_target_pos = 0;
809	coder->file_size = file_size;
810
811	lzma_index_end(coder->this_index, allocator);
812	coder->this_index = NULL;
813
814	lzma_index_end(coder->combined_index, allocator);
815	coder->combined_index = NULL;
816
817	coder->stream_padding = 0;
818
819	coder->dest_index = dest_index;
820	coder->external_seek_pos = seek_pos;
821
822	// If memlimit is 0, make it 1 to ensure that lzma_memlimit_get()
823	// won't return 0 (which would indicate an error).
824	coder->memlimit = my_max(1, memlimit);
825
826	// Prepare these for reading the first Stream Header into coder->temp.
827	coder->temp_pos = 0;
828	coder->temp_size = LZMA_STREAM_HEADER_SIZE;
829
830	return LZMA_OK;
831	}
832
833
834	extern LZMA_API(lzma_ret)
835	lzma_file_info_decoder(lzma_stream strm, lzma_index *dest_index,
836	uint64_t memlimit, uint64_t file_size)
837	{
838	lzma_next_strm_init(lzma_file_info_decoder_init, strm, &strm->seek_pos,
839	dest_index, memlimit, file_size);
840
841	// We allow LZMA_FINISH in addition to LZMA_RUN for convenience.
842	// lzma_code() is able to handle the LZMA_FINISH + LZMA_SEEK_NEEDED
843	// combination in a sane way. Applications still need to be careful
844	// if they use LZMA_FINISH so that they remember to reset it back
845	// to LZMA_RUN after seeking if needed.
846	strm->internal->supported_actions[LZMA_RUN] = true;
847	strm->internal->supported_actions[LZMA_FINISH] = true;
848
849	return LZMA_OK;
850	}

Note: See TracBrowser for help on using the repository browser.

source: vbox/trunk/src/libs/liblzma-5.8.1/common/file_info.c@ 108911

Download in other formats: