VirtualBox

source: vbox/trunk/src/libs/liblzma-5.4.1/api/lzma/lzma12.h@ 102535

Last change on this file since 102535 was 98730, checked in by vboxsync, 2 years ago

libs/liblzma-5.4.1: Export to OSE, bugref:10254

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 19.4 KB
Line 
1/**
2 * \file lzma/lzma12.h
3 * \brief LZMA1 and LZMA2 filters
4 */
5
6/*
7 * Author: Lasse Collin
8 *
9 * This file has been put into the public domain.
10 * You can do whatever you want with this file.
11 *
12 * See ../lzma.h for information about liblzma as a whole.
13 */
14
15#ifndef LZMA_H_INTERNAL
16# error Never include this file directly. Use <lzma.h> instead.
17#endif
18
19
20/**
21 * \brief LZMA1 Filter ID (for raw encoder/decoder only, not in .xz)
22 *
23 * LZMA1 is the very same thing as what was called just LZMA in LZMA Utils,
24 * 7-Zip, and LZMA SDK. It's called LZMA1 here to prevent developers from
25 * accidentally using LZMA when they actually want LZMA2.
26 */
27#define LZMA_FILTER_LZMA1 LZMA_VLI_C(0x4000000000000001)
28
29/**
30 * \brief LZMA1 Filter ID with extended options (for raw encoder/decoder)
31 *
32 * This is like LZMA_FILTER_LZMA1 but with this ID a few extra options
33 * are supported in the lzma_options_lzma structure:
34 *
35 * - A flag to tell the encoder if the end of payload marker (EOPM) alias
36 * end of stream (EOS) marker must be written at the end of the stream.
37 * In contrast, LZMA_FILTER_LZMA1 always writes the end marker.
38 *
39 * - Decoder needs to be told the uncompressed size of the stream
40 * or that it is unknown (using the special value UINT64_MAX).
41 * If the size is known, a flag can be set to allow the presence of
42 * the end marker anyway. In contrast, LZMA_FILTER_LZMA1 always
43 * behaves as if the uncompressed size was unknown.
44 *
45 * This allows handling file formats where LZMA1 streams are used but where
46 * the end marker isn't allowed or where it might not (always) be present.
47 * This extended LZMA1 functionality is provided as a Filter ID for raw
48 * encoder and decoder instead of adding new encoder and decoder initialization
49 * functions because this way it is possible to also use extra filters,
50 * for example, LZMA_FILTER_X86 in a filter chain with LZMA_FILTER_LZMA1EXT,
51 * which might be needed to handle some file formats.
52 */
53#define LZMA_FILTER_LZMA1EXT LZMA_VLI_C(0x4000000000000002)
54
55/**
56 * \brief LZMA2 Filter ID
57 *
58 * Usually you want this instead of LZMA1. Compared to LZMA1, LZMA2 adds
59 * support for LZMA_SYNC_FLUSH, uncompressed chunks (smaller expansion
60 * when trying to compress uncompressible data), possibility to change
61 * lc/lp/pb in the middle of encoding, and some other internal improvements.
62 */
63#define LZMA_FILTER_LZMA2 LZMA_VLI_C(0x21)
64
65
66/**
67 * \brief Match finders
68 *
69 * Match finder has major effect on both speed and compression ratio.
70 * Usually hash chains are faster than binary trees.
71 *
72 * If you will use LZMA_SYNC_FLUSH often, the hash chains may be a better
73 * choice, because binary trees get much higher compression ratio penalty
74 * with LZMA_SYNC_FLUSH.
75 *
76 * The memory usage formulas are only rough estimates, which are closest to
77 * reality when dict_size is a power of two. The formulas are more complex
78 * in reality, and can also change a little between liblzma versions. Use
79 * lzma_raw_encoder_memusage() to get more accurate estimate of memory usage.
80 */
81typedef enum {
82 LZMA_MF_HC3 = 0x03,
83 /**<
84 * \brief Hash Chain with 2- and 3-byte hashing
85 *
86 * Minimum nice_len: 3
87 *
88 * Memory usage:
89 * - dict_size <= 16 MiB: dict_size * 7.5
90 * - dict_size > 16 MiB: dict_size * 5.5 + 64 MiB
91 */
92
93 LZMA_MF_HC4 = 0x04,
94 /**<
95 * \brief Hash Chain with 2-, 3-, and 4-byte hashing
96 *
97 * Minimum nice_len: 4
98 *
99 * Memory usage:
100 * - dict_size <= 32 MiB: dict_size * 7.5
101 * - dict_size > 32 MiB: dict_size * 6.5
102 */
103
104 LZMA_MF_BT2 = 0x12,
105 /**<
106 * \brief Binary Tree with 2-byte hashing
107 *
108 * Minimum nice_len: 2
109 *
110 * Memory usage: dict_size * 9.5
111 */
112
113 LZMA_MF_BT3 = 0x13,
114 /**<
115 * \brief Binary Tree with 2- and 3-byte hashing
116 *
117 * Minimum nice_len: 3
118 *
119 * Memory usage:
120 * - dict_size <= 16 MiB: dict_size * 11.5
121 * - dict_size > 16 MiB: dict_size * 9.5 + 64 MiB
122 */
123
124 LZMA_MF_BT4 = 0x14
125 /**<
126 * \brief Binary Tree with 2-, 3-, and 4-byte hashing
127 *
128 * Minimum nice_len: 4
129 *
130 * Memory usage:
131 * - dict_size <= 32 MiB: dict_size * 11.5
132 * - dict_size > 32 MiB: dict_size * 10.5
133 */
134} lzma_match_finder;
135
136
137/**
138 * \brief Test if given match finder is supported
139 *
140 * Return true if the given match finder is supported by this liblzma build.
141 * Otherwise false is returned. It is safe to call this with a value that
142 * isn't listed in lzma_match_finder enumeration; the return value will be
143 * false.
144 *
145 * There is no way to list which match finders are available in this
146 * particular liblzma version and build. It would be useless, because
147 * a new match finder, which the application developer wasn't aware,
148 * could require giving additional options to the encoder that the older
149 * match finders don't need.
150 */
151extern LZMA_API(lzma_bool) lzma_mf_is_supported(lzma_match_finder match_finder)
152 lzma_nothrow lzma_attr_const;
153
154
155/**
156 * \brief Compression modes
157 *
158 * This selects the function used to analyze the data produced by the match
159 * finder.
160 */
161typedef enum {
162 LZMA_MODE_FAST = 1,
163 /**<
164 * \brief Fast compression
165 *
166 * Fast mode is usually at its best when combined with
167 * a hash chain match finder.
168 */
169
170 LZMA_MODE_NORMAL = 2
171 /**<
172 * \brief Normal compression
173 *
174 * This is usually notably slower than fast mode. Use this
175 * together with binary tree match finders to expose the
176 * full potential of the LZMA1 or LZMA2 encoder.
177 */
178} lzma_mode;
179
180
181/**
182 * \brief Test if given compression mode is supported
183 *
184 * Return true if the given compression mode is supported by this liblzma
185 * build. Otherwise false is returned. It is safe to call this with a value
186 * that isn't listed in lzma_mode enumeration; the return value will be false.
187 *
188 * There is no way to list which modes are available in this particular
189 * liblzma version and build. It would be useless, because a new compression
190 * mode, which the application developer wasn't aware, could require giving
191 * additional options to the encoder that the older modes don't need.
192 */
193extern LZMA_API(lzma_bool) lzma_mode_is_supported(lzma_mode mode)
194 lzma_nothrow lzma_attr_const;
195
196
197/**
198 * \brief Options specific to the LZMA1 and LZMA2 filters
199 *
200 * Since LZMA1 and LZMA2 share most of the code, it's simplest to share
201 * the options structure too. For encoding, all but the reserved variables
202 * need to be initialized unless specifically mentioned otherwise.
203 * lzma_lzma_preset() can be used to get a good starting point.
204 *
205 * For raw decoding, both LZMA1 and LZMA2 need dict_size, preset_dict, and
206 * preset_dict_size (if preset_dict != NULL). LZMA1 needs also lc, lp, and pb.
207 */
208typedef struct {
209 /**
210 * \brief Dictionary size in bytes
211 *
212 * Dictionary size indicates how many bytes of the recently processed
213 * uncompressed data is kept in memory. One method to reduce size of
214 * the uncompressed data is to store distance-length pairs, which
215 * indicate what data to repeat from the dictionary buffer. Thus,
216 * the bigger the dictionary, the better the compression ratio
217 * usually is.
218 *
219 * Maximum size of the dictionary depends on multiple things:
220 * - Memory usage limit
221 * - Available address space (not a problem on 64-bit systems)
222 * - Selected match finder (encoder only)
223 *
224 * Currently the maximum dictionary size for encoding is 1.5 GiB
225 * (i.e. (UINT32_C(1) << 30) + (UINT32_C(1) << 29)) even on 64-bit
226 * systems for certain match finder implementation reasons. In the
227 * future, there may be match finders that support bigger
228 * dictionaries.
229 *
230 * Decoder already supports dictionaries up to 4 GiB - 1 B (i.e.
231 * UINT32_MAX), so increasing the maximum dictionary size of the
232 * encoder won't cause problems for old decoders.
233 *
234 * Because extremely small dictionaries sizes would have unneeded
235 * overhead in the decoder, the minimum dictionary size is 4096 bytes.
236 *
237 * \note When decoding, too big dictionary does no other harm
238 * than wasting memory.
239 */
240 uint32_t dict_size;
241# define LZMA_DICT_SIZE_MIN UINT32_C(4096)
242# define LZMA_DICT_SIZE_DEFAULT (UINT32_C(1) << 23)
243
244 /**
245 * \brief Pointer to an initial dictionary
246 *
247 * It is possible to initialize the LZ77 history window using
248 * a preset dictionary. It is useful when compressing many
249 * similar, relatively small chunks of data independently from
250 * each other. The preset dictionary should contain typical
251 * strings that occur in the files being compressed. The most
252 * probable strings should be near the end of the preset dictionary.
253 *
254 * This feature should be used only in special situations. For
255 * now, it works correctly only with raw encoding and decoding.
256 * Currently none of the container formats supported by
257 * liblzma allow preset dictionary when decoding, thus if
258 * you create a .xz or .lzma file with preset dictionary, it
259 * cannot be decoded with the regular decoder functions. In the
260 * future, the .xz format will likely get support for preset
261 * dictionary though.
262 */
263 const uint8_t *preset_dict;
264
265 /**
266 * \brief Size of the preset dictionary
267 *
268 * Specifies the size of the preset dictionary. If the size is
269 * bigger than dict_size, only the last dict_size bytes are
270 * processed.
271 *
272 * This variable is read only when preset_dict is not NULL.
273 * If preset_dict is not NULL but preset_dict_size is zero,
274 * no preset dictionary is used (identical to only setting
275 * preset_dict to NULL).
276 */
277 uint32_t preset_dict_size;
278
279 /**
280 * \brief Number of literal context bits
281 *
282 * How many of the highest bits of the previous uncompressed
283 * eight-bit byte (also known as `literal') are taken into
284 * account when predicting the bits of the next literal.
285 *
286 * E.g. in typical English text, an upper-case letter is
287 * often followed by a lower-case letter, and a lower-case
288 * letter is usually followed by another lower-case letter.
289 * In the US-ASCII character set, the highest three bits are 010
290 * for upper-case letters and 011 for lower-case letters.
291 * When lc is at least 3, the literal coding can take advantage of
292 * this property in the uncompressed data.
293 *
294 * There is a limit that applies to literal context bits and literal
295 * position bits together: lc + lp <= 4. Without this limit the
296 * decoding could become very slow, which could have security related
297 * results in some cases like email servers doing virus scanning.
298 * This limit also simplifies the internal implementation in liblzma.
299 *
300 * There may be LZMA1 streams that have lc + lp > 4 (maximum possible
301 * lc would be 8). It is not possible to decode such streams with
302 * liblzma.
303 */
304 uint32_t lc;
305# define LZMA_LCLP_MIN 0
306# define LZMA_LCLP_MAX 4
307# define LZMA_LC_DEFAULT 3
308
309 /**
310 * \brief Number of literal position bits
311 *
312 * lp affects what kind of alignment in the uncompressed data is
313 * assumed when encoding literals. A literal is a single 8-bit byte.
314 * See pb below for more information about alignment.
315 */
316 uint32_t lp;
317# define LZMA_LP_DEFAULT 0
318
319 /**
320 * \brief Number of position bits
321 *
322 * pb affects what kind of alignment in the uncompressed data is
323 * assumed in general. The default means four-byte alignment
324 * (2^ pb =2^2=4), which is often a good choice when there's
325 * no better guess.
326 *
327 * When the alignment is known, setting pb accordingly may reduce
328 * the file size a little. E.g. with text files having one-byte
329 * alignment (US-ASCII, ISO-8859-*, UTF-8), setting pb=0 can
330 * improve compression slightly. For UTF-16 text, pb=1 is a good
331 * choice. If the alignment is an odd number like 3 bytes, pb=0
332 * might be the best choice.
333 *
334 * Even though the assumed alignment can be adjusted with pb and
335 * lp, LZMA1 and LZMA2 still slightly favor 16-byte alignment.
336 * It might be worth taking into account when designing file formats
337 * that are likely to be often compressed with LZMA1 or LZMA2.
338 */
339 uint32_t pb;
340# define LZMA_PB_MIN 0
341# define LZMA_PB_MAX 4
342# define LZMA_PB_DEFAULT 2
343
344 /** Compression mode */
345 lzma_mode mode;
346
347 /**
348 * \brief Nice length of a match
349 *
350 * This determines how many bytes the encoder compares from the match
351 * candidates when looking for the best match. Once a match of at
352 * least nice_len bytes long is found, the encoder stops looking for
353 * better candidates and encodes the match. (Naturally, if the found
354 * match is actually longer than nice_len, the actual length is
355 * encoded; it's not truncated to nice_len.)
356 *
357 * Bigger values usually increase the compression ratio and
358 * compression time. For most files, 32 to 128 is a good value,
359 * which gives very good compression ratio at good speed.
360 *
361 * The exact minimum value depends on the match finder. The maximum
362 * is 273, which is the maximum length of a match that LZMA1 and
363 * LZMA2 can encode.
364 */
365 uint32_t nice_len;
366
367 /** Match finder ID */
368 lzma_match_finder mf;
369
370 /**
371 * \brief Maximum search depth in the match finder
372 *
373 * For every input byte, match finder searches through the hash chain
374 * or binary tree in a loop, each iteration going one step deeper in
375 * the chain or tree. The searching stops if
376 * - a match of at least nice_len bytes long is found;
377 * - all match candidates from the hash chain or binary tree have
378 * been checked; or
379 * - maximum search depth is reached.
380 *
381 * Maximum search depth is needed to prevent the match finder from
382 * wasting too much time in case there are lots of short match
383 * candidates. On the other hand, stopping the search before all
384 * candidates have been checked can reduce compression ratio.
385 *
386 * Setting depth to zero tells liblzma to use an automatic default
387 * value, that depends on the selected match finder and nice_len.
388 * The default is in the range [4, 200] or so (it may vary between
389 * liblzma versions).
390 *
391 * Using a bigger depth value than the default can increase
392 * compression ratio in some cases. There is no strict maximum value,
393 * but high values (thousands or millions) should be used with care:
394 * the encoder could remain fast enough with typical input, but
395 * malicious input could cause the match finder to slow down
396 * dramatically, possibly creating a denial of service attack.
397 */
398 uint32_t depth;
399
400 /**
401 * \brief For LZMA_FILTER_LZMA1EXT: Extended flags
402 *
403 * This is used only with LZMA_FILTER_LZMA1EXT.
404 *
405 * Currently only one flag is supported, LZMA_LZMA1EXT_ALLOW_EOPM:
406 *
407 * - Encoder: If the flag is set, then end marker is written just
408 * like it is with LZMA_FILTER_LZMA1. Without this flag the
409 * end marker isn't written and the application has to store
410 * the uncompressed size somewhere outside the compressed stream.
411 * To decompress streams without the end marker, the appliation
412 * has to set the correct uncompressed size in ext_size_low and
413 * ext_size_high.
414 *
415 * - Decoder: If the uncompressed size in ext_size_low and
416 * ext_size_high is set to the special value UINT64_MAX
417 * (indicating unknown uncompressed size) then this flag is
418 * ignored and the end marker must always be present, that is,
419 * the behavior is identical to LZMA_FILTER_LZMA1.
420 *
421 * Otherwise, if this flag isn't set, then the input stream
422 * must not have the end marker; if the end marker is detected
423 * then it will result in LZMA_DATA_ERROR. This is useful when
424 * it is known that the stream must not have the end marker and
425 * strict validation is wanted.
426 *
427 * If this flag is set, then it is autodetected if the end marker
428 * is present after the specified number of uncompressed bytes
429 * has been decompressed (ext_size_low and ext_size_high). The
430 * end marker isn't allowed in any other position. This behavior
431 * is useful when uncompressed size is known but the end marker
432 * may or may not be present. This is the case, for example,
433 * in .7z files (valid .7z files that have the end marker in
434 * LZMA1 streams are rare but they do exist).
435 */
436 uint32_t ext_flags;
437# define LZMA_LZMA1EXT_ALLOW_EOPM UINT32_C(0x01)
438
439 /**
440 * \brief For LZMA_FILTER_LZMA1EXT: Uncompressed size (low bits)
441 *
442 * The 64-bit uncompressed size is needed for decompression with
443 * LZMA_FILTER_LZMA1EXT. The size is ignored by the encoder.
444 *
445 * The special value UINT64_MAX indicates that the uncompressed size
446 * is unknown and that the end of payload marker (also known as
447 * end of stream marker) must be present to indicate the end of
448 * the LZMA1 stream. Any other value indicates the expected
449 * uncompressed size of the LZMA1 stream. (If LZMA1 was used together
450 * with filters that change the size of the data then the uncompressed
451 * size of the LZMA1 stream could be different than the final
452 * uncompressed size of the filtered stream.)
453 *
454 * ext_size_low holds the least significant 32 bits of the
455 * uncompressed size. The most significant 32 bits must be set
456 * in ext_size_high. The macro lzma_ext_size_set(opt_lzma, u64size)
457 * can be used to set these members.
458 *
459 * The 64-bit uncompressed size is split into two uint32_t variables
460 * because there were no reserved uint64_t members and using the
461 * same options structure for LZMA_FILTER_LZMA1, LZMA_FILTER_LZMA1EXT,
462 * and LZMA_FILTER_LZMA2 was otherwise more convenient than having
463 * a new options structure for LZMA_FILTER_LZMA1EXT. (Replacing two
464 * uint32_t members with one uint64_t changes the ABI on some systems
465 * as the alignment of this struct can increase from 4 bytes to 8.)
466 */
467 uint32_t ext_size_low;
468
469 /**
470 * \brief For LZMA_FILTER_LZMA1EXT: Uncompressed size (high bits)
471 *
472 * This holds the most significant 32 bits of the uncompressed size.
473 */
474 uint32_t ext_size_high;
475
476 /*
477 * Reserved space to allow possible future extensions without
478 * breaking the ABI. You should not touch these, because the names
479 * of these variables may change. These are and will never be used
480 * with the currently supported options, so it is safe to leave these
481 * uninitialized.
482 */
483 uint32_t reserved_int4;
484 uint32_t reserved_int5;
485 uint32_t reserved_int6;
486 uint32_t reserved_int7;
487 uint32_t reserved_int8;
488 lzma_reserved_enum reserved_enum1;
489 lzma_reserved_enum reserved_enum2;
490 lzma_reserved_enum reserved_enum3;
491 lzma_reserved_enum reserved_enum4;
492 void *reserved_ptr1;
493 void *reserved_ptr2;
494
495} lzma_options_lzma;
496
497
498/**
499 * \brief Macro to set the 64-bit uncompressed size in ext_size_*
500 *
501 * This might be convenient when decoding using LZMA_FILTER_LZMA1EXT.
502 * This isn't used with LZMA_FILTER_LZMA1 or LZMA_FILTER_LZMA2.
503 */
504#define lzma_set_ext_size(opt_lzma2, u64size) \
505do { \
506 (opt_lzma2).ext_size_low = (uint32_t)(u64size); \
507 (opt_lzma2).ext_size_high = (uint32_t)((uint64_t)(u64size) >> 32); \
508} while (0)
509
510
511/**
512 * \brief Set a compression preset to lzma_options_lzma structure
513 *
514 * 0 is the fastest and 9 is the slowest. These match the switches -0 .. -9
515 * of the xz command line tool. In addition, it is possible to bitwise-or
516 * flags to the preset. Currently only LZMA_PRESET_EXTREME is supported.
517 * The flags are defined in container.h, because the flags are used also
518 * with lzma_easy_encoder().
519 *
520 * The preset values are subject to changes between liblzma versions.
521 *
522 * This function is available only if LZMA1 or LZMA2 encoder has been enabled
523 * when building liblzma.
524 *
525 * \return On success, false is returned. If the preset is not
526 * supported, true is returned.
527 */
528extern LZMA_API(lzma_bool) lzma_lzma_preset(
529 lzma_options_lzma *options, uint32_t preset) lzma_nothrow;
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette