VirtualBox

source: vbox/trunk/src/libs/liblzma-5.6.4/common/string_conversion.c@ 109042

Last change on this file since 109042 was 108905, checked in by vboxsync, 4 weeks ago

liblzma-5.6.4: Applied and adjusted our liblzma changes to 5.6.4. jiraref:VBP-1613

  • Property svn:eol-style set to LF
  • Property svn:keywords set to Author Date Id Revision
File size: 36.4 KB
Line 
1// SPDX-License-Identifier: 0BSD
2
3///////////////////////////////////////////////////////////////////////////////
4//
5/// \file string_conversion.c
6/// \brief Conversion of strings to filter chain and vice versa
7//
8// Author: Lasse Collin
9//
10///////////////////////////////////////////////////////////////////////////////
11
12#include "filter_common.h"
13
14
15/////////////////////
16// String building //
17/////////////////////
18
19/// How much memory to allocate for strings. For now, no realloc is used
20/// so this needs to be big enough even though there of course is
21/// an overflow check still.
22///
23/// FIXME? Using a fixed size is wasteful if the application doesn't free
24/// the string fairly quickly but this can be improved later if needed.
25#define STR_ALLOC_SIZE 800
26
27
28typedef struct {
29 char *buf;
30 size_t pos;
31} lzma_str;
32
33
34static lzma_ret
35str_init(lzma_str *str, const lzma_allocator *allocator)
36{
37 str->buf = lzma_alloc(STR_ALLOC_SIZE, allocator);
38 if (str->buf == NULL)
39 return LZMA_MEM_ERROR;
40
41 str->pos = 0;
42 return LZMA_OK;
43}
44
45
46static void
47str_free(lzma_str *str, const lzma_allocator *allocator)
48{
49 lzma_free(str->buf, allocator);
50 return;
51}
52
53
54static bool
55str_is_full(const lzma_str *str)
56{
57 return str->pos == STR_ALLOC_SIZE - 1;
58}
59
60
61static lzma_ret
62str_finish(char **dest, lzma_str *str, const lzma_allocator *allocator)
63{
64 if (str_is_full(str)) {
65 // The preallocated buffer was too small.
66 // This shouldn't happen as STR_ALLOC_SIZE should
67 // be adjusted if new filters are added.
68 lzma_free(str->buf, allocator);
69 *dest = NULL;
70 assert(0);
71 return LZMA_PROG_ERROR;
72 }
73
74 str->buf[str->pos] = '\0';
75 *dest = str->buf;
76 return LZMA_OK;
77}
78
79
80static void
81str_append_str(lzma_str *str, const char *s)
82{
83 const size_t len = strlen(s);
84 const size_t limit = STR_ALLOC_SIZE - 1 - str->pos;
85 const size_t copy_size = my_min(len, limit);
86
87 memcpy(str->buf + str->pos, s, copy_size);
88 str->pos += copy_size;
89 return;
90}
91
92
93static void
94str_append_u32(lzma_str *str, uint32_t v, bool use_byte_suffix)
95{
96 if (v == 0) {
97 str_append_str(str, "0");
98 } else {
99 // NOTE: Don't use plain "B" because xz and the parser in this
100 // file don't support it and at glance it may look like 8
101 // (there cannot be a space before the suffix).
102 static const char suffixes[4][4] = { "", "KiB", "MiB", "GiB" };
103
104 size_t suf = 0;
105 if (use_byte_suffix) {
106 while ((v & 1023) == 0
107 && suf < ARRAY_SIZE(suffixes) - 1) {
108 v >>= 10;
109 ++suf;
110 }
111 }
112
113 // UINT32_MAX in base 10 would need 10 + 1 bytes. Remember
114 // that initializing to "" initializes all elements to
115 // zero so '\0'-termination gets handled by this.
116 char buf[16] = "";
117 size_t pos = sizeof(buf) - 1;
118
119 do {
120 buf[--pos] = '0' + (v % 10);
121 v /= 10;
122 } while (v != 0);
123
124 str_append_str(str, buf + pos);
125 str_append_str(str, suffixes[suf]);
126 }
127
128 return;
129}
130
131
132//////////////////////////////////////////////
133// Parsing and stringification declarations //
134//////////////////////////////////////////////
135
136/// Maximum length for filter and option names.
137/// 11 chars + terminating '\0' + sizeof(uint32_t) = 16 bytes
138#define NAME_LEN_MAX 11
139
140
141/// For option_map.flags: Use .u.map to do convert the input value
142/// to an integer. Without this flag, .u.range.{min,max} are used
143/// as the allowed range for the integer.
144#define OPTMAP_USE_NAME_VALUE_MAP 0x01
145
146/// For option_map.flags: Allow KiB/MiB/GiB in input string and use them in
147/// the stringified output if the value is an exact multiple of these.
148/// This is used e.g. for LZMA1/2 dictionary size.
149#define OPTMAP_USE_BYTE_SUFFIX 0x02
150
151/// For option_map.flags: If the integer value is zero then this option
152/// won't be included in the stringified output. It's used e.g. for
153/// BCJ filter start offset which usually is zero.
154#define OPTMAP_NO_STRFY_ZERO 0x04
155
156/// Possible values for option_map.type. Since OPTMAP_TYPE_UINT32 is 0,
157/// it doesn't need to be specified in the initializers as it is
158/// the implicit value.
159enum {
160 OPTMAP_TYPE_UINT32,
161 OPTMAP_TYPE_LZMA_MODE,
162 OPTMAP_TYPE_LZMA_MATCH_FINDER,
163 OPTMAP_TYPE_LZMA_PRESET,
164};
165
166
167/// This is for mapping string values in options to integers.
168/// The last element of an array must have "" as the name.
169/// It's used e.g. for match finder names in LZMA1/2.
170typedef struct {
171 const char name[NAME_LEN_MAX + 1];
172 const uint32_t value;
173} name_value_map;
174
175
176/// Each filter that has options needs an array of option_map structures.
177/// The array doesn't need to be terminated as the functions take the
178/// length of the array as an argument.
179///
180/// When converting a string to filter options structure, option values
181/// will be handled in a few different ways:
182///
183/// (1) If .type equals OPTMAP_TYPE_LZMA_PRESET then LZMA1/2 preset string
184/// is handled specially.
185///
186/// (2) If .flags has OPTMAP_USE_NAME_VALUE_MAP set then the string is
187/// converted to an integer using the name_value_map pointed by .u.map.
188/// The last element in .u.map must have .name = "" as the terminator.
189///
190/// (3) Otherwise the string is treated as a non-negative unsigned decimal
191/// integer which must be in the range set in .u.range. If .flags has
192/// OPTMAP_USE_BYTE_SUFFIX then KiB, MiB, and GiB suffixes are allowed.
193///
194/// The integer value from (2) or (3) is then stored to filter_options
195/// at the offset specified in .offset using the type specified in .type
196/// (default is uint32_t).
197///
198/// Stringifying a filter is done by processing a given number of options
199/// in order from the beginning of an option_map array. The integer is
200/// read from filter_options at .offset using the type from .type.
201///
202/// If the integer is zero and .flags has OPTMAP_NO_STRFY_ZERO then the
203/// option is skipped.
204///
205/// If .flags has OPTMAP_USE_NAME_VALUE_MAP set then .u.map will be used
206/// to convert the option to a string. If the map doesn't contain a string
207/// for the integer value then "UNKNOWN" is used.
208///
209/// If .flags doesn't have OPTMAP_USE_NAME_VALUE_MAP set then the integer is
210/// converted to a decimal value. If OPTMAP_USE_BYTE_SUFFIX is used then KiB,
211/// MiB, or GiB suffix is used if the value is an exact multiple of these.
212/// Plain "B" suffix is never used.
213typedef struct {
214 char name[NAME_LEN_MAX + 1];
215 uint8_t type;
216 uint8_t flags;
217 uint16_t offset;
218
219 union {
220 // NVHPC has problems with unions that contain pointers that
221 // are not the first members, so keep "map" at the top.
222 const name_value_map *map;
223
224 struct {
225 uint32_t min;
226 uint32_t max;
227 } range;
228 } u;
229} option_map;
230
231
232static const char *parse_options(const char **const str, const char *str_end,
233 void *filter_options,
234 const option_map *const optmap, const size_t optmap_size);
235
236
237/////////
238// BCJ //
239/////////
240
241#if defined(HAVE_ENCODER_X86) \
242 || defined(HAVE_DECODER_X86) \
243 || defined(HAVE_ENCODER_ARM) \
244 || defined(HAVE_DECODER_ARM) \
245 || defined(HAVE_ENCODER_ARMTHUMB) \
246 || defined(HAVE_DECODER_ARMTHUMB) \
247 || defined(HAVE_ENCODER_ARM64) \
248 || defined(HAVE_DECODER_ARM64) \
249 || defined(HAVE_ENCODER_POWERPC) \
250 || defined(HAVE_DECODER_POWERPC) \
251 || defined(HAVE_ENCODER_IA64) \
252 || defined(HAVE_DECODER_IA64) \
253 || defined(HAVE_ENCODER_SPARC) \
254 || defined(HAVE_DECODER_SPARC) \
255 || defined(HAVE_ENCODER_RISCV) \
256 || defined(HAVE_DECODER_RISCV)
257static const option_map bcj_optmap[] = {
258 {
259 .name = "start",
260 .flags = OPTMAP_NO_STRFY_ZERO | OPTMAP_USE_BYTE_SUFFIX,
261 .offset = offsetof(lzma_options_bcj, start_offset),
262 .u.range.min = 0,
263 .u.range.max = UINT32_MAX,
264 }
265};
266
267
268static const char *
269parse_bcj(const char **const str, const char *str_end, void *filter_options)
270{
271 // filter_options was zeroed on allocation and that is enough
272 // for the default value.
273 return parse_options(str, str_end, filter_options,
274 bcj_optmap, ARRAY_SIZE(bcj_optmap));
275}
276#endif
277
278
279///////////
280// Delta //
281///////////
282
283#if defined(HAVE_ENCODER_DELTA) || defined(HAVE_DECODER_DELTA)
284static const option_map delta_optmap[] = {
285 {
286 .name = "dist",
287 .offset = offsetof(lzma_options_delta, dist),
288 .u.range.min = LZMA_DELTA_DIST_MIN,
289 .u.range.max = LZMA_DELTA_DIST_MAX,
290 }
291};
292
293
294static const char *
295parse_delta(const char **const str, const char *str_end, void *filter_options)
296{
297 lzma_options_delta *opts = filter_options;
298 opts->type = LZMA_DELTA_TYPE_BYTE;
299 opts->dist = LZMA_DELTA_DIST_MIN;
300
301 return parse_options(str, str_end, filter_options,
302 delta_optmap, ARRAY_SIZE(delta_optmap));
303}
304#endif
305
306
307///////////////////
308// LZMA1 & LZMA2 //
309///////////////////
310
311/// Help string for presets
312#define LZMA12_PRESET_STR "0-9[e]"
313
314
315static const char *
316parse_lzma12_preset(const char **const str, const char *str_end,
317 uint32_t *preset)
318{
319 assert(*str < str_end);
320
321 if (!(**str >= '0' && **str <= '9'))
322 return "Unsupported preset";
323
324 *preset = (uint32_t)(**str - '0');
325
326 // NOTE: Remember to update LZMA12_PRESET_STR if this is modified!
327 while (++*str < str_end) {
328 switch (**str) {
329 case 'e':
330 *preset |= LZMA_PRESET_EXTREME;
331 break;
332
333 default:
334 return "Unsupported preset flag";
335 }
336 }
337
338 return NULL;
339}
340
341
342static const char *
343set_lzma12_preset(const char **const str, const char *str_end,
344 void *filter_options)
345{
346 uint32_t preset;
347 const char *errmsg = parse_lzma12_preset(str, str_end, &preset);
348 if (errmsg != NULL)
349 return errmsg;
350
351 lzma_options_lzma *opts = filter_options;
352 if (lzma_lzma_preset(opts, preset))
353 return "Unsupported preset";
354
355 return NULL;
356}
357
358
359static const name_value_map lzma12_mode_map[] = {
360 { "fast", LZMA_MODE_FAST },
361 { "normal", LZMA_MODE_NORMAL },
362 { "", 0 }
363};
364
365
366static const name_value_map lzma12_mf_map[] = {
367 { "hc3", LZMA_MF_HC3 },
368 { "hc4", LZMA_MF_HC4 },
369 { "bt2", LZMA_MF_BT2 },
370 { "bt3", LZMA_MF_BT3 },
371 { "bt4", LZMA_MF_BT4 },
372 { "", 0 }
373};
374
375
376static const option_map lzma12_optmap[] = {
377 {
378 .name = "preset",
379 .type = OPTMAP_TYPE_LZMA_PRESET,
380 }, {
381 .name = "dict",
382 .flags = OPTMAP_USE_BYTE_SUFFIX,
383 .offset = offsetof(lzma_options_lzma, dict_size),
384 .u.range.min = LZMA_DICT_SIZE_MIN,
385 // FIXME? The max is really max for encoding but decoding
386 // would allow 4 GiB - 1 B.
387 .u.range.max = (UINT32_C(1) << 30) + (UINT32_C(1) << 29),
388 }, {
389 .name = "lc",
390 .offset = offsetof(lzma_options_lzma, lc),
391 .u.range.min = LZMA_LCLP_MIN,
392 .u.range.max = LZMA_LCLP_MAX,
393 }, {
394 .name = "lp",
395 .offset = offsetof(lzma_options_lzma, lp),
396 .u.range.min = LZMA_LCLP_MIN,
397 .u.range.max = LZMA_LCLP_MAX,
398 }, {
399 .name = "pb",
400 .offset = offsetof(lzma_options_lzma, pb),
401 .u.range.min = LZMA_PB_MIN,
402 .u.range.max = LZMA_PB_MAX,
403 }, {
404 .name = "mode",
405 .type = OPTMAP_TYPE_LZMA_MODE,
406 .flags = OPTMAP_USE_NAME_VALUE_MAP,
407 .offset = offsetof(lzma_options_lzma, mode),
408 .u.map = lzma12_mode_map,
409 }, {
410 .name = "nice",
411 .offset = offsetof(lzma_options_lzma, nice_len),
412 .u.range.min = 2,
413 .u.range.max = 273,
414 }, {
415 .name = "mf",
416 .type = OPTMAP_TYPE_LZMA_MATCH_FINDER,
417 .flags = OPTMAP_USE_NAME_VALUE_MAP,
418 .offset = offsetof(lzma_options_lzma, mf),
419 .u.map = lzma12_mf_map,
420 }, {
421 .name = "depth",
422 .offset = offsetof(lzma_options_lzma, depth),
423 .u.range.min = 0,
424 .u.range.max = UINT32_MAX,
425 }
426};
427
428
429static const char *
430parse_lzma12(const char **const str, const char *str_end, void *filter_options)
431{
432 lzma_options_lzma *opts = filter_options;
433
434 // It cannot fail.
435 const bool preset_ret = lzma_lzma_preset(opts, LZMA_PRESET_DEFAULT);
436 assert(!preset_ret);
437 (void)preset_ret;
438
439 const char *errmsg = parse_options(str, str_end, filter_options,
440 lzma12_optmap, ARRAY_SIZE(lzma12_optmap));
441 if (errmsg != NULL)
442 return errmsg;
443
444 if (opts->lc + opts->lp > LZMA_LCLP_MAX)
445 return "The sum of lc and lp must not exceed 4";
446
447 return NULL;
448}
449
450
451/////////////////////////////////////////
452// Generic parsing and stringification //
453/////////////////////////////////////////
454
455static const struct {
456 /// Name of the filter
457 char name[NAME_LEN_MAX + 1];
458
459 /// For lzma_str_to_filters:
460 /// Size of the filter-specific options structure.
461 uint32_t opts_size;
462
463 /// Filter ID
464 lzma_vli id;
465
466 /// For lzma_str_to_filters:
467 /// Function to parse the filter-specific options. The filter_options
468 /// will already have been allocated using lzma_alloc_zero().
469 const char *(*parse)(const char **str, const char *str_end,
470 void *filter_options);
471
472 /// For lzma_str_from_filters:
473 /// If the flag LZMA_STR_ENCODER is used then the first
474 /// strfy_encoder elements of optmap are stringified.
475 /// With LZMA_STR_DECODER strfy_decoder is used.
476 /// Currently encoders use all options that decoders do but if
477 /// that changes then this needs to be changed too, for example,
478 /// add a new OPTMAP flag to skip printing some decoder-only options.
479 const option_map *optmap;
480 uint8_t strfy_encoder;
481 uint8_t strfy_decoder;
482
483 /// For lzma_str_from_filters:
484 /// If true, lzma_filter.options is allowed to be NULL. In that case,
485 /// only the filter name is printed without any options.
486 bool allow_null;
487
488} filter_name_map[] = {
489#if defined (HAVE_ENCODER_LZMA1) || defined(HAVE_DECODER_LZMA1)
490 { "lzma1", sizeof(lzma_options_lzma), LZMA_FILTER_LZMA1,
491 &parse_lzma12, lzma12_optmap, 9, 5, false },
492#endif
493
494#if defined(HAVE_ENCODER_LZMA2) || defined(HAVE_DECODER_LZMA2)
495 { "lzma2", sizeof(lzma_options_lzma), LZMA_FILTER_LZMA2,
496 &parse_lzma12, lzma12_optmap, 9, 2, false },
497#endif
498
499#if defined(HAVE_ENCODER_X86) || defined(HAVE_DECODER_X86)
500 { "x86", sizeof(lzma_options_bcj), LZMA_FILTER_X86,
501 &parse_bcj, bcj_optmap, 1, 1, true },
502#endif
503
504#if defined(HAVE_ENCODER_ARM) || defined(HAVE_DECODER_ARM)
505 { "arm", sizeof(lzma_options_bcj), LZMA_FILTER_ARM,
506 &parse_bcj, bcj_optmap, 1, 1, true },
507#endif
508
509#if defined(HAVE_ENCODER_ARMTHUMB) || defined(HAVE_DECODER_ARMTHUMB)
510 { "armthumb", sizeof(lzma_options_bcj), LZMA_FILTER_ARMTHUMB,
511 &parse_bcj, bcj_optmap, 1, 1, true },
512#endif
513
514#if defined(HAVE_ENCODER_ARM64) || defined(HAVE_DECODER_ARM64)
515 { "arm64", sizeof(lzma_options_bcj), LZMA_FILTER_ARM64,
516 &parse_bcj, bcj_optmap, 1, 1, true },
517#endif
518
519#if defined(HAVE_ENCODER_RISCV) || defined(HAVE_DECODER_RISCV)
520 { "riscv", sizeof(lzma_options_bcj), LZMA_FILTER_RISCV,
521 &parse_bcj, bcj_optmap, 1, 1, true },
522#endif
523
524#if defined(HAVE_ENCODER_POWERPC) || defined(HAVE_DECODER_POWERPC)
525 { "powerpc", sizeof(lzma_options_bcj), LZMA_FILTER_POWERPC,
526 &parse_bcj, bcj_optmap, 1, 1, true },
527#endif
528
529#if defined(HAVE_ENCODER_IA64) || defined(HAVE_DECODER_IA64)
530 { "ia64", sizeof(lzma_options_bcj), LZMA_FILTER_IA64,
531 &parse_bcj, bcj_optmap, 1, 1, true },
532#endif
533
534#if defined(HAVE_ENCODER_SPARC) || defined(HAVE_DECODER_SPARC)
535 { "sparc", sizeof(lzma_options_bcj), LZMA_FILTER_SPARC,
536 &parse_bcj, bcj_optmap, 1, 1, true },
537#endif
538
539#if defined(HAVE_ENCODER_DELTA) || defined(HAVE_DECODER_DELTA)
540 { "delta", sizeof(lzma_options_delta), LZMA_FILTER_DELTA,
541 &parse_delta, delta_optmap, 1, 1, false },
542#endif
543};
544
545
546/// Decodes options from a string for one filter (name1=value1,name2=value2).
547/// Caller must have allocated memory for filter_options already and set
548/// the initial default values. This is called from the filter-specific
549/// parse_* functions.
550///
551/// The input string starts at *str and the address in str_end is the first
552/// char that is not part of the string anymore. So no '\0' terminator is
553/// used. *str is advanced every time something has been decoded successfully.
554static const char *
555parse_options(const char **const str, const char *str_end,
556 void *filter_options,
557 const option_map *const optmap, const size_t optmap_size)
558{
559 while (*str < str_end && **str != '\0') {
560 // Each option is of the form name=value.
561 // Commas (',') separate options. Extra commas are ignored.
562 // Ignoring extra commas makes it simpler if an optional
563 // option stored in a shell variable which can be empty.
564 if (**str == ',') {
565 ++*str;
566 continue;
567 }
568
569 // Find where the next name=value ends.
570 const size_t str_len = (size_t)(str_end - *str);
571 const char *name_eq_value_end = memchr(*str, ',', str_len);
572 if (name_eq_value_end == NULL)
573 name_eq_value_end = str_end;
574
575 const char *equals_sign = memchr(*str, '=',
576 (size_t)(name_eq_value_end - *str));
577
578 // Fail if the '=' wasn't found or the option name is missing
579 // (the first char is '=').
580 if (equals_sign == NULL || **str == '=')
581 return "Options must be 'name=value' pairs separated "
582 "with commas";
583
584 // Reject a too long option name so that the memcmp()
585 // in the loop below won't read past the end of the
586 // string in optmap[i].name.
587 const size_t name_len = (size_t)(equals_sign - *str);
588 if (name_len > NAME_LEN_MAX)
589 return "Unknown option name";
590
591 // Find the option name from optmap[].
592 size_t i = 0;
593 while (true) {
594 if (i == optmap_size)
595 return "Unknown option name";
596
597 if (memcmp(*str, optmap[i].name, name_len) == 0
598 && optmap[i].name[name_len] == '\0')
599 break;
600
601 ++i;
602 }
603
604 // The input string is good at least until the start of
605 // the option value.
606 *str = equals_sign + 1;
607
608 // The code assumes that the option value isn't an empty
609 // string so check it here.
610 const size_t value_len = (size_t)(name_eq_value_end - *str);
611 if (value_len == 0)
612 return "Option value cannot be empty";
613
614 // LZMA1/2 preset has its own parsing function.
615 if (optmap[i].type == OPTMAP_TYPE_LZMA_PRESET) {
616 const char *errmsg = set_lzma12_preset(str,
617 name_eq_value_end, filter_options);
618 if (errmsg != NULL)
619 return errmsg;
620
621 continue;
622 }
623
624 // It's an integer value.
625 uint32_t v;
626 if (optmap[i].flags & OPTMAP_USE_NAME_VALUE_MAP) {
627 // The integer is picked from a string-to-integer map.
628 //
629 // Reject a too long value string so that the memcmp()
630 // in the loop below won't read past the end of the
631 // string in optmap[i].u.map[j].name.
632 if (value_len > NAME_LEN_MAX)
633 return "Invalid option value";
634
635 const name_value_map *map = optmap[i].u.map;
636 size_t j = 0;
637 while (true) {
638 // The array is terminated with an empty name.
639 if (map[j].name[0] == '\0')
640 return "Invalid option value";
641
642 if (memcmp(*str, map[j].name, value_len) == 0
643 && map[j].name[value_len]
644 == '\0') {
645 v = map[j].value;
646 break;
647 }
648
649 ++j;
650 }
651 } else if (**str < '0' || **str > '9') {
652 // Note that "max" isn't supported while it is
653 // supported in xz. It's not useful here.
654 return "Value is not a non-negative decimal integer";
655 } else {
656 // strtoul() has locale-specific behavior so it cannot
657 // be relied on to get reproducible results since we
658 // cannot change the locate in a thread-safe library.
659 // It also needs '\0'-termination.
660 //
661 // Use a temporary pointer so that *str will point
662 // to the beginning of the value string in case
663 // an error occurs.
664 const char *p = *str;
665 v = 0;
666 do {
667 if (v > UINT32_MAX / 10)
668 return "Value out of range";
669
670 v *= 10;
671
672 const uint32_t add = (uint32_t)(*p - '0');
673 if (UINT32_MAX - add < v)
674 return "Value out of range";
675
676 v += add;
677 ++p;
678 } while (p < name_eq_value_end
679 && *p >= '0' && *p <= '9');
680
681 if (p < name_eq_value_end) {
682 // Remember this position so that it can be
683 // used for error messages that are
684 // specifically about the suffix. (Out of
685 // range values are about the whole value
686 // and those error messages point to the
687 // beginning of the number part,
688 // not to the suffix.)
689 const char *multiplier_start = p;
690
691 // If multiplier suffix shouldn't be used
692 // then don't allow them even if the value
693 // would stay within limits. This is a somewhat
694 // unnecessary check but it rejects silly
695 // things like lzma2:pb=0MiB which xz allows.
696 if ((optmap[i].flags & OPTMAP_USE_BYTE_SUFFIX)
697 == 0) {
698 *str = multiplier_start;
699 return "This option does not support "
700 "any integer suffixes";
701 }
702
703 uint32_t shift;
704
705 switch (*p) {
706 case 'k':
707 case 'K':
708 shift = 10;
709 break;
710
711 case 'm':
712 case 'M':
713 shift = 20;
714 break;
715
716 case 'g':
717 case 'G':
718 shift = 30;
719 break;
720
721 default:
722 *str = multiplier_start;
723 return "Invalid multiplier suffix "
724 "(KiB, MiB, or GiB)";
725 }
726
727 ++p;
728
729 // Allow "M", "Mi", "MB", "MiB" and the same
730 // for the other five characters from the
731 // switch-statement above. All are handled
732 // as base-2 (perhaps a mistake, perhaps not).
733 // Note that 'i' and 'B' are case sensitive.
734 if (p < name_eq_value_end && *p == 'i')
735 ++p;
736
737 if (p < name_eq_value_end && *p == 'B')
738 ++p;
739
740 // Now we must have no chars remaining.
741 if (p < name_eq_value_end) {
742 *str = multiplier_start;
743 return "Invalid multiplier suffix "
744 "(KiB, MiB, or GiB)";
745 }
746
747 if (v > (UINT32_MAX >> shift))
748 return "Value out of range";
749
750 v <<= shift;
751 }
752
753 if (v < optmap[i].u.range.min
754 || v > optmap[i].u.range.max)
755 return "Value out of range";
756 }
757
758 // Set the value in filter_options. Enums are handled
759 // specially since the underlying type isn't the same
760 // as uint32_t on all systems.
761 void *ptr = (char *)filter_options + optmap[i].offset;
762 switch (optmap[i].type) {
763 case OPTMAP_TYPE_LZMA_MODE:
764 *(lzma_mode *)ptr = (lzma_mode)v;
765 break;
766
767 case OPTMAP_TYPE_LZMA_MATCH_FINDER:
768 *(lzma_match_finder *)ptr = (lzma_match_finder)v;
769 break;
770
771 default:
772 *(uint32_t *)ptr = v;
773 break;
774 }
775
776 // This option has been successfully handled.
777 *str = name_eq_value_end;
778 }
779
780 // No errors.
781 return NULL;
782}
783
784
785/// Finds the name of the filter at the beginning of the string and
786/// calls filter_name_map[i].parse() to decode the filter-specific options.
787/// The caller must have set str_end so that exactly one filter and its
788/// options are present without any trailing characters.
789static const char *
790parse_filter(const char **const str, const char *str_end, lzma_filter *filter,
791 const lzma_allocator *allocator, bool only_xz)
792{
793 // Search for a colon or equals sign that would separate the filter
794 // name from filter options. If neither is found, then the input
795 // string only contains a filter name and there are no options.
796 //
797 // First assume that a colon or equals sign won't be found:
798 const char *name_end = str_end;
799 const char *opts_start = str_end;
800
801 for (const char *p = *str; p < str_end; ++p) {
802 if (*p == ':' || *p == '=') {
803 name_end = p;
804
805 // Filter options (name1=value1,name2=value2,...)
806 // begin after the colon or equals sign.
807 opts_start = p + 1;
808 break;
809 }
810 }
811
812 // Reject a too long filter name so that the memcmp()
813 // in the loop below won't read past the end of the
814 // string in filter_name_map[i].name.
815 const size_t name_len = (size_t)(name_end - *str);
816 if (name_len > NAME_LEN_MAX)
817 return "Unknown filter name";
818
819 for (size_t i = 0; i < ARRAY_SIZE(filter_name_map); ++i) {
820 if (memcmp(*str, filter_name_map[i].name, name_len) == 0
821 && filter_name_map[i].name[name_len] == '\0') {
822 if (only_xz && filter_name_map[i].id
823 >= LZMA_FILTER_RESERVED_START)
824 return "This filter cannot be used in "
825 "the .xz format";
826
827 // Allocate the filter-specific options and
828 // initialize the memory with zeros.
829 void *options = lzma_alloc_zero(
830 filter_name_map[i].opts_size,
831 allocator);
832 if (options == NULL)
833 return "Memory allocation failed";
834
835 // Filter name was found so the input string is good
836 // at least this far.
837 *str = opts_start;
838
839 const char *errmsg = filter_name_map[i].parse(
840 str, str_end, options);
841 if (errmsg != NULL) {
842 lzma_free(options, allocator);
843 return errmsg;
844 }
845
846 // *filter is modified only when parsing is successful.
847 filter->id = filter_name_map[i].id;
848 filter->options = options;
849 return NULL;
850 }
851 }
852
853 return "Unknown filter name";
854}
855
856
857/// Converts the string to a filter chain (array of lzma_filter structures).
858///
859/// *str is advanced every time something has been decoded successfully.
860/// This way the caller knows where in the string a possible error occurred.
861static const char *
862str_to_filters(const char **const str, lzma_filter *filters, uint32_t flags,
863 const lzma_allocator *allocator)
864{
865 const char *errmsg;
866
867 // Skip leading spaces.
868 while (**str == ' ')
869 ++*str;
870
871 if (**str == '\0')
872 return "Empty string is not allowed, "
873 "try \"6\" if a default value is needed";
874
875 // Detect the type of the string.
876 //
877 // A string beginning with a digit or a string beginning with
878 // one dash and a digit are treated as presets. Trailing spaces
879 // will be ignored too (leading spaces were already ignored above).
880 //
881 // For example, "6", "7 ", "-9e", or " -3 " are treated as presets.
882 // Strings like "-" or "- " aren't preset.
883#define MY_IS_DIGIT(c) ((c) >= '0' && (c) <= '9')
884 if (MY_IS_DIGIT(**str) || (**str == '-' && MY_IS_DIGIT((*str)[1]))) {
885 if (**str == '-')
886 ++*str;
887
888 // Ignore trailing spaces.
889 const size_t str_len = strlen(*str);
890 const char *str_end = memchr(*str, ' ', str_len);
891 if (str_end != NULL) {
892 // There is at least one trailing space. Check that
893 // there are no chars other than spaces.
894 for (size_t i = 1; str_end[i] != '\0'; ++i)
895 if (str_end[i] != ' ')
896 return "Unsupported preset";
897 } else {
898 // There are no trailing spaces. Use the whole string.
899 str_end = *str + str_len;
900 }
901
902 uint32_t preset;
903 errmsg = parse_lzma12_preset(str, str_end, &preset);
904 if (errmsg != NULL)
905 return errmsg;
906
907 lzma_options_lzma *opts = lzma_alloc(sizeof(*opts), allocator);
908 if (opts == NULL)
909 return "Memory allocation failed";
910
911 if (lzma_lzma_preset(opts, preset)) {
912 lzma_free(opts, allocator);
913 return "Unsupported preset";
914 }
915
916 filters[0].id = LZMA_FILTER_LZMA2;
917 filters[0].options = opts;
918 filters[1].id = LZMA_VLI_UNKNOWN;
919 filters[1].options = NULL;
920
921 return NULL;
922 }
923
924 // Not a preset so it must be a filter chain.
925 //
926 // If LZMA_STR_ALL_FILTERS isn't used we allow only filters that
927 // can be used in .xz.
928 const bool only_xz = (flags & LZMA_STR_ALL_FILTERS) == 0;
929
930 // Use a temporary array so that we don't modify the caller-supplied
931 // one until we know that no errors occurred.
932 lzma_filter temp_filters[LZMA_FILTERS_MAX + 1];
933
934 size_t i = 0;
935 do {
936 if (i == LZMA_FILTERS_MAX) {
937 errmsg = "The maximum number of filters is four";
938 goto error;
939 }
940
941 // Skip "--" if present.
942 if ((*str)[0] == '-' && (*str)[1] == '-')
943 *str += 2;
944
945 // Locate the end of "filter:name1=value1,name2=value2",
946 // stopping at the first "--" or a single space.
947 const char *filter_end = *str;
948 while (filter_end[0] != '\0') {
949 if ((filter_end[0] == '-' && filter_end[1] == '-')
950 || filter_end[0] == ' ')
951 break;
952
953 ++filter_end;
954 }
955
956 // Inputs that have "--" at the end or "-- " in the middle
957 // will result in an empty filter name.
958 if (filter_end == *str) {
959 errmsg = "Filter name is missing";
960 goto error;
961 }
962
963 errmsg = parse_filter(str, filter_end, &temp_filters[i],
964 allocator, only_xz);
965 if (errmsg != NULL)
966 goto error;
967
968 // Skip trailing spaces.
969 while (**str == ' ')
970 ++*str;
971
972 ++i;
973 } while (**str != '\0');
974
975 // Seems to be good, terminate the array so that
976 // basic validation can be done.
977 temp_filters[i].id = LZMA_VLI_UNKNOWN;
978 temp_filters[i].options = NULL;
979
980 // Do basic validation if the application didn't prohibit it.
981 if ((flags & LZMA_STR_NO_VALIDATION) == 0) {
982 size_t dummy;
983 const lzma_ret ret = lzma_validate_chain(temp_filters, &dummy);
984 assert(ret == LZMA_OK || ret == LZMA_OPTIONS_ERROR);
985 if (ret != LZMA_OK) {
986 errmsg = "Invalid filter chain "
987 "('lzma2' missing at the end?)";
988 goto error;
989 }
990 }
991
992 // All good. Copy the filters to the application supplied array.
993 memcpy(filters, temp_filters, (i + 1) * sizeof(lzma_filter));
994 return NULL;
995
996error:
997 // Free the filter options that were successfully decoded.
998 while (i-- > 0)
999 lzma_free(temp_filters[i].options, allocator);
1000
1001 return errmsg;
1002}
1003
1004
1005extern LZMA_API(const char *)
1006lzma_str_to_filters(const char *str, int *error_pos, lzma_filter *filters,
1007 uint32_t flags, const lzma_allocator *allocator)
1008{
1009 // If error_pos isn't NULL, *error_pos must always be set.
1010 // liblzma <= 5.4.6 and <= 5.6.1 have a bug and don't do this
1011 // when str == NULL or filters == NULL or flags are unsupported.
1012 if (error_pos != NULL)
1013 *error_pos = 0;
1014
1015 if (str == NULL || filters == NULL)
1016 return "Unexpected NULL pointer argument(s) "
1017 "to lzma_str_to_filters()";
1018
1019 // Validate the flags.
1020 const uint32_t supported_flags
1021 = LZMA_STR_ALL_FILTERS
1022 | LZMA_STR_NO_VALIDATION;
1023
1024 if (flags & ~supported_flags)
1025 return "Unsupported flags to lzma_str_to_filters()";
1026
1027 const char *used = str;
1028 const char *errmsg = str_to_filters(&used, filters, flags, allocator);
1029
1030 if (error_pos != NULL) {
1031 const size_t n = (size_t)(used - str);
1032 *error_pos = n > INT_MAX ? INT_MAX : (int)n;
1033 }
1034
1035 return errmsg;
1036}
1037
1038
1039/// Converts options of one filter to a string.
1040///
1041/// The caller must have already put the filter name in the destination
1042/// string. Since it is possible that no options will be needed, the caller
1043/// won't have put a delimiter character (':' or '=') in the string yet.
1044/// We will add it if at least one option will be added to the string.
1045static void
1046strfy_filter(lzma_str *dest, const char *delimiter,
1047 const option_map *optmap, size_t optmap_count,
1048 const void *filter_options)
1049{
1050 for (size_t i = 0; i < optmap_count; ++i) {
1051 // No attempt is made to reverse LZMA1/2 preset.
1052 if (optmap[i].type == OPTMAP_TYPE_LZMA_PRESET)
1053 continue;
1054
1055 // All options have integer values, some just are mapped
1056 // to a string with a name_value_map. LZMA1/2 preset
1057 // isn't reversed back to preset=PRESET form.
1058 uint32_t v;
1059 const void *ptr
1060 = (const char *)filter_options + optmap[i].offset;
1061 switch (optmap[i].type) {
1062 case OPTMAP_TYPE_LZMA_MODE:
1063 v = *(const lzma_mode *)ptr;
1064 break;
1065
1066 case OPTMAP_TYPE_LZMA_MATCH_FINDER:
1067 v = *(const lzma_match_finder *)ptr;
1068 break;
1069
1070 default:
1071 v = *(const uint32_t *)ptr;
1072 break;
1073 }
1074
1075 // Skip this if this option should be omitted from
1076 // the string when the value is zero.
1077 if (v == 0 && (optmap[i].flags & OPTMAP_NO_STRFY_ZERO))
1078 continue;
1079
1080 // Before the first option we add whatever delimiter
1081 // the caller gave us. For later options a comma is used.
1082 str_append_str(dest, delimiter);
1083 delimiter = ",";
1084
1085 // Add the option name and equals sign.
1086 str_append_str(dest, optmap[i].name);
1087 str_append_str(dest, "=");
1088
1089 if (optmap[i].flags & OPTMAP_USE_NAME_VALUE_MAP) {
1090 const name_value_map *map = optmap[i].u.map;
1091 size_t j = 0;
1092 while (true) {
1093 if (map[j].name[0] == '\0') {
1094 str_append_str(dest, "UNKNOWN");
1095 break;
1096 }
1097
1098 if (map[j].value == v) {
1099 str_append_str(dest, map[j].name);
1100 break;
1101 }
1102
1103 ++j;
1104 }
1105 } else {
1106 str_append_u32(dest, v,
1107 optmap[i].flags & OPTMAP_USE_BYTE_SUFFIX);
1108 }
1109 }
1110
1111 return;
1112}
1113
1114
1115extern LZMA_API(lzma_ret)
1116lzma_str_from_filters(char **output_str, const lzma_filter *filters,
1117 uint32_t flags, const lzma_allocator *allocator)
1118{
1119 // On error *output_str is always set to NULL.
1120 // Do it as the very first step.
1121 if (output_str == NULL)
1122 return LZMA_PROG_ERROR;
1123
1124 *output_str = NULL;
1125
1126 if (filters == NULL)
1127 return LZMA_PROG_ERROR;
1128
1129 // Validate the flags.
1130 const uint32_t supported_flags
1131 = LZMA_STR_ENCODER
1132 | LZMA_STR_DECODER
1133 | LZMA_STR_GETOPT_LONG
1134 | LZMA_STR_NO_SPACES;
1135
1136 if (flags & ~supported_flags)
1137 return LZMA_OPTIONS_ERROR;
1138
1139 // There must be at least one filter.
1140 if (filters[0].id == LZMA_VLI_UNKNOWN)
1141 return LZMA_OPTIONS_ERROR;
1142
1143 // Allocate memory for the output string.
1144 lzma_str dest;
1145 return_if_error(str_init(&dest, allocator));
1146
1147 const bool show_opts = (flags & (LZMA_STR_ENCODER | LZMA_STR_DECODER));
1148
1149 const char *opt_delim = (flags & LZMA_STR_GETOPT_LONG) ? "=" : ":";
1150
1151 for (size_t i = 0; filters[i].id != LZMA_VLI_UNKNOWN; ++i) {
1152 // If we reach LZMA_FILTERS_MAX, then the filters array
1153 // is too large since the ID cannot be LZMA_VLI_UNKNOWN here.
1154 if (i == LZMA_FILTERS_MAX) {
1155 str_free(&dest, allocator);
1156 return LZMA_OPTIONS_ERROR;
1157 }
1158
1159 // Don't add a space between filters if the caller
1160 // doesn't want them.
1161 if (i > 0 && !(flags & LZMA_STR_NO_SPACES))
1162 str_append_str(&dest, " ");
1163
1164 // Use dashes for xz getopt_long() compatible syntax but also
1165 // use dashes to separate filters when spaces weren't wanted.
1166 if ((flags & LZMA_STR_GETOPT_LONG)
1167 || (i > 0 && (flags & LZMA_STR_NO_SPACES)))
1168 str_append_str(&dest, "--");
1169
1170 size_t j = 0;
1171 while (true) {
1172 if (j == ARRAY_SIZE(filter_name_map)) {
1173 // Filter ID in filters[i].id isn't supported.
1174 str_free(&dest, allocator);
1175 return LZMA_OPTIONS_ERROR;
1176 }
1177
1178 if (filter_name_map[j].id == filters[i].id) {
1179 // Add the filter name.
1180 str_append_str(&dest, filter_name_map[j].name);
1181
1182 // If only the filter names were wanted then
1183 // skip to the next filter. In this case
1184 // .options is ignored and may be NULL even
1185 // when the filter doesn't allow NULL options.
1186 if (!show_opts)
1187 break;
1188
1189 if (filters[i].options == NULL) {
1190 if (!filter_name_map[j].allow_null) {
1191 // Filter-specific options
1192 // are missing but with
1193 // this filter the options
1194 // structure is mandatory.
1195 str_free(&dest, allocator);
1196 return LZMA_OPTIONS_ERROR;
1197 }
1198
1199 // .options is allowed to be NULL.
1200 // There is no need to add any
1201 // options to the string.
1202 break;
1203 }
1204
1205 // Options structure is available. Add
1206 // the filter options to the string.
1207 const size_t optmap_count
1208 = (flags & LZMA_STR_ENCODER)
1209 ? filter_name_map[j].strfy_encoder
1210 : filter_name_map[j].strfy_decoder;
1211 strfy_filter(&dest, opt_delim,
1212 filter_name_map[j].optmap,
1213 optmap_count,
1214 filters[i].options);
1215 break;
1216 }
1217
1218 ++j;
1219 }
1220 }
1221
1222 return str_finish(output_str, &dest, allocator);
1223}
1224
1225
1226extern LZMA_API(lzma_ret)
1227lzma_str_list_filters(char **output_str, lzma_vli filter_id, uint32_t flags,
1228 const lzma_allocator *allocator)
1229{
1230 // On error *output_str is always set to NULL.
1231 // Do it as the very first step.
1232 if (output_str == NULL)
1233 return LZMA_PROG_ERROR;
1234
1235 *output_str = NULL;
1236
1237 // Validate the flags.
1238 const uint32_t supported_flags
1239 = LZMA_STR_ALL_FILTERS
1240 | LZMA_STR_ENCODER
1241 | LZMA_STR_DECODER
1242 | LZMA_STR_GETOPT_LONG;
1243
1244 if (flags & ~supported_flags)
1245 return LZMA_OPTIONS_ERROR;
1246
1247 // Allocate memory for the output string.
1248 lzma_str dest;
1249 return_if_error(str_init(&dest, allocator));
1250
1251 // If only listing the filter names then separate them with spaces.
1252 // Otherwise use newlines.
1253 const bool show_opts = (flags & (LZMA_STR_ENCODER | LZMA_STR_DECODER));
1254 const char *filter_delim = show_opts ? "\n" : " ";
1255
1256 const char *opt_delim = (flags & LZMA_STR_GETOPT_LONG) ? "=" : ":";
1257 bool first_filter_printed = false;
1258
1259 for (size_t i = 0; i < ARRAY_SIZE(filter_name_map); ++i) {
1260 // If we are printing only one filter then skip others.
1261 if (filter_id != LZMA_VLI_UNKNOWN
1262 && filter_id != filter_name_map[i].id)
1263 continue;
1264
1265 // If we are printing only .xz filters then skip the others.
1266 if (filter_name_map[i].id >= LZMA_FILTER_RESERVED_START
1267 && (flags & LZMA_STR_ALL_FILTERS) == 0
1268 && filter_id == LZMA_VLI_UNKNOWN)
1269 continue;
1270
1271 // Add a new line if this isn't the first filter being
1272 // written to the string.
1273 if (first_filter_printed)
1274 str_append_str(&dest, filter_delim);
1275
1276 first_filter_printed = true;
1277
1278 if (flags & LZMA_STR_GETOPT_LONG)
1279 str_append_str(&dest, "--");
1280
1281 str_append_str(&dest, filter_name_map[i].name);
1282
1283 // If only the filter names were wanted then continue
1284 // to the next filter.
1285 if (!show_opts)
1286 continue;
1287
1288 const option_map *optmap = filter_name_map[i].optmap;
1289 const char *d = opt_delim;
1290
1291 const size_t end = (flags & LZMA_STR_ENCODER)
1292 ? filter_name_map[i].strfy_encoder
1293 : filter_name_map[i].strfy_decoder;
1294
1295 for (size_t j = 0; j < end; ++j) {
1296 // The first option is delimited from the filter
1297 // name using "=" or ":" and the rest of the options
1298 // are separated with ",".
1299 str_append_str(&dest, d);
1300 d = ",";
1301
1302 // optname=<possible_values>
1303 str_append_str(&dest, optmap[j].name);
1304 str_append_str(&dest, "=<");
1305
1306 if (optmap[j].type == OPTMAP_TYPE_LZMA_PRESET) {
1307 // LZMA1/2 preset has its custom help string.
1308 str_append_str(&dest, LZMA12_PRESET_STR);
1309 } else if (optmap[j].flags
1310 & OPTMAP_USE_NAME_VALUE_MAP) {
1311 // Separate the possible option values by "|".
1312 const name_value_map *m = optmap[j].u.map;
1313 for (size_t k = 0; m[k].name[0] != '\0'; ++k) {
1314 if (k > 0)
1315 str_append_str(&dest, "|");
1316
1317 str_append_str(&dest, m[k].name);
1318 }
1319 } else {
1320 // Integer range is shown as min-max.
1321 const bool use_byte_suffix = optmap[j].flags
1322 & OPTMAP_USE_BYTE_SUFFIX;
1323 str_append_u32(&dest, optmap[j].u.range.min,
1324 use_byte_suffix);
1325 str_append_str(&dest, "-");
1326 str_append_u32(&dest, optmap[j].u.range.max,
1327 use_byte_suffix);
1328 }
1329
1330 str_append_str(&dest, ">");
1331 }
1332 }
1333
1334 // If no filters were added to the string then it must be because
1335 // the caller provided an unsupported Filter ID.
1336 if (!first_filter_printed) {
1337 str_free(&dest, allocator);
1338 return LZMA_OPTIONS_ERROR;
1339 }
1340
1341 return str_finish(output_str, &dest, allocator);
1342}
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette