VirtualBox

source: vbox/trunk/src/libs/liblzma-5.8.1/common/string_conversion.c@ 108911

Last change on this file since 108911 was 108911, checked in by vboxsync, 4 weeks ago

libs/liblzma: Applied and adjusted our liblzma changes to 5.8.1 and export to OSE. jiraref:VBP-1635

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
  • Property sync-process set to export
File size: 37.2 KB
Line 
1// SPDX-License-Identifier: 0BSD
2
3///////////////////////////////////////////////////////////////////////////////
4//
5/// \file string_conversion.c
6/// \brief Conversion of strings to filter chain and vice versa
7//
8// Author: Lasse Collin
9//
10///////////////////////////////////////////////////////////////////////////////
11
12#include "filter_common.h"
13
14
15// liblzma itself doesn't use gettext to translate messages.
16// Mark the strings still so that xz can translate them.
17#define N_(msgid) msgid
18
19
20/////////////////////
21// String building //
22/////////////////////
23
24/// How much memory to allocate for strings. For now, no realloc is used
25/// so this needs to be big enough even though there of course is
26/// an overflow check still.
27///
28/// FIXME? Using a fixed size is wasteful if the application doesn't free
29/// the string fairly quickly but this can be improved later if needed.
30#define STR_ALLOC_SIZE 800
31
32
33typedef struct {
34 char *buf;
35 size_t pos;
36} lzma_str;
37
38
39static lzma_ret
40str_init(lzma_str *str, const lzma_allocator *allocator)
41{
42 str->buf = lzma_alloc(STR_ALLOC_SIZE, allocator);
43 if (str->buf == NULL)
44 return LZMA_MEM_ERROR;
45
46 str->pos = 0;
47 return LZMA_OK;
48}
49
50
51static void
52str_free(lzma_str *str, const lzma_allocator *allocator)
53{
54 lzma_free(str->buf, allocator);
55 return;
56}
57
58
59static bool
60str_is_full(const lzma_str *str)
61{
62 return str->pos == STR_ALLOC_SIZE - 1;
63}
64
65
66static lzma_ret
67str_finish(char **dest, lzma_str *str, const lzma_allocator *allocator)
68{
69 if (str_is_full(str)) {
70 // The preallocated buffer was too small.
71 // This shouldn't happen as STR_ALLOC_SIZE should
72 // be adjusted if new filters are added.
73 lzma_free(str->buf, allocator);
74 *dest = NULL;
75 assert(0);
76 return LZMA_PROG_ERROR;
77 }
78
79 str->buf[str->pos] = '\0';
80 *dest = str->buf;
81 return LZMA_OK;
82}
83
84
85static void
86str_append_str(lzma_str *str, const char *s)
87{
88 const size_t len = strlen(s);
89 const size_t limit = STR_ALLOC_SIZE - 1 - str->pos;
90 const size_t copy_size = my_min(len, limit);
91
92 memcpy(str->buf + str->pos, s, copy_size);
93 str->pos += copy_size;
94 return;
95}
96
97
98static void
99str_append_u32(lzma_str *str, uint32_t v, bool use_byte_suffix)
100{
101 if (v == 0) {
102 str_append_str(str, "0");
103 } else {
104 // NOTE: Don't use plain "B" because xz and the parser in this
105 // file don't support it and at glance it may look like 8
106 // (there cannot be a space before the suffix).
107 static const char suffixes[4][4] = { "", "KiB", "MiB", "GiB" };
108
109 size_t suf = 0;
110 if (use_byte_suffix) {
111 while ((v & 1023) == 0
112 && suf < ARRAY_SIZE(suffixes) - 1) {
113 v >>= 10;
114 ++suf;
115 }
116 }
117
118 // UINT32_MAX in base 10 would need 10 + 1 bytes. Remember
119 // that initializing to "" initializes all elements to
120 // zero so '\0'-termination gets handled by this.
121 char buf[16] = "";
122 size_t pos = sizeof(buf) - 1;
123
124 do {
125 buf[--pos] = '0' + (v % 10);
126 v /= 10;
127 } while (v != 0);
128
129 str_append_str(str, buf + pos);
130 str_append_str(str, suffixes[suf]);
131 }
132
133 return;
134}
135
136
137//////////////////////////////////////////////
138// Parsing and stringification declarations //
139//////////////////////////////////////////////
140
141/// Maximum length for filter and option names.
142/// 11 chars + terminating '\0' + sizeof(uint32_t) = 16 bytes
143#define NAME_LEN_MAX 11
144
145
146/// For option_map.flags: Use .u.map to do convert the input value
147/// to an integer. Without this flag, .u.range.{min,max} are used
148/// as the allowed range for the integer.
149#define OPTMAP_USE_NAME_VALUE_MAP 0x01
150
151/// For option_map.flags: Allow KiB/MiB/GiB in input string and use them in
152/// the stringified output if the value is an exact multiple of these.
153/// This is used e.g. for LZMA1/2 dictionary size.
154#define OPTMAP_USE_BYTE_SUFFIX 0x02
155
156/// For option_map.flags: If the integer value is zero then this option
157/// won't be included in the stringified output. It's used e.g. for
158/// BCJ filter start offset which usually is zero.
159#define OPTMAP_NO_STRFY_ZERO 0x04
160
161/// Possible values for option_map.type. Since OPTMAP_TYPE_UINT32 is 0,
162/// it doesn't need to be specified in the initializers as it is
163/// the implicit value.
164enum {
165 OPTMAP_TYPE_UINT32,
166 OPTMAP_TYPE_LZMA_MODE,
167 OPTMAP_TYPE_LZMA_MATCH_FINDER,
168 OPTMAP_TYPE_LZMA_PRESET,
169};
170
171
172/// This is for mapping string values in options to integers.
173/// The last element of an array must have "" as the name.
174/// It's used e.g. for match finder names in LZMA1/2.
175typedef struct {
176 const char name[NAME_LEN_MAX + 1];
177 const uint32_t value;
178} name_value_map;
179
180
181/// Each filter that has options needs an array of option_map structures.
182/// The array doesn't need to be terminated as the functions take the
183/// length of the array as an argument.
184///
185/// When converting a string to filter options structure, option values
186/// will be handled in a few different ways:
187///
188/// (1) If .type equals OPTMAP_TYPE_LZMA_PRESET then LZMA1/2 preset string
189/// is handled specially.
190///
191/// (2) If .flags has OPTMAP_USE_NAME_VALUE_MAP set then the string is
192/// converted to an integer using the name_value_map pointed by .u.map.
193/// The last element in .u.map must have .name = "" as the terminator.
194///
195/// (3) Otherwise the string is treated as a non-negative unsigned decimal
196/// integer which must be in the range set in .u.range. If .flags has
197/// OPTMAP_USE_BYTE_SUFFIX then KiB, MiB, and GiB suffixes are allowed.
198///
199/// The integer value from (2) or (3) is then stored to filter_options
200/// at the offset specified in .offset using the type specified in .type
201/// (default is uint32_t).
202///
203/// Stringifying a filter is done by processing a given number of options
204/// in order from the beginning of an option_map array. The integer is
205/// read from filter_options at .offset using the type from .type.
206///
207/// If the integer is zero and .flags has OPTMAP_NO_STRFY_ZERO then the
208/// option is skipped.
209///
210/// If .flags has OPTMAP_USE_NAME_VALUE_MAP set then .u.map will be used
211/// to convert the option to a string. If the map doesn't contain a string
212/// for the integer value then "UNKNOWN" is used.
213///
214/// If .flags doesn't have OPTMAP_USE_NAME_VALUE_MAP set then the integer is
215/// converted to a decimal value. If OPTMAP_USE_BYTE_SUFFIX is used then KiB,
216/// MiB, or GiB suffix is used if the value is an exact multiple of these.
217/// Plain "B" suffix is never used.
218typedef struct {
219 char name[NAME_LEN_MAX + 1];
220 uint8_t type;
221 uint8_t flags;
222 uint16_t offset;
223
224 union {
225 // NVHPC has problems with unions that contain pointers that
226 // are not the first members, so keep "map" at the top.
227 const name_value_map *map;
228
229 struct {
230 uint32_t min;
231 uint32_t max;
232 } range;
233 } u;
234} option_map;
235
236
237static const char *parse_options(const char **const str, const char *str_end,
238 void *filter_options,
239 const option_map *const optmap, const size_t optmap_size);
240
241
242/////////
243// BCJ //
244/////////
245
246#if defined(HAVE_ENCODER_X86) \
247 || defined(HAVE_DECODER_X86) \
248 || defined(HAVE_ENCODER_ARM) \
249 || defined(HAVE_DECODER_ARM) \
250 || defined(HAVE_ENCODER_ARMTHUMB) \
251 || defined(HAVE_DECODER_ARMTHUMB) \
252 || defined(HAVE_ENCODER_ARM64) \
253 || defined(HAVE_DECODER_ARM64) \
254 || defined(HAVE_ENCODER_POWERPC) \
255 || defined(HAVE_DECODER_POWERPC) \
256 || defined(HAVE_ENCODER_IA64) \
257 || defined(HAVE_DECODER_IA64) \
258 || defined(HAVE_ENCODER_SPARC) \
259 || defined(HAVE_DECODER_SPARC) \
260 || defined(HAVE_ENCODER_RISCV) \
261 || defined(HAVE_DECODER_RISCV)
262static const option_map bcj_optmap[] = {
263 {
264 .name = "start",
265 .flags = OPTMAP_NO_STRFY_ZERO | OPTMAP_USE_BYTE_SUFFIX,
266 .offset = offsetof(lzma_options_bcj, start_offset),
267 .u.range.min = 0,
268 .u.range.max = UINT32_MAX,
269 }
270};
271
272
273static const char *
274parse_bcj(const char **const str, const char *str_end, void *filter_options)
275{
276 // filter_options was zeroed on allocation and that is enough
277 // for the default value.
278 return parse_options(str, str_end, filter_options,
279 bcj_optmap, ARRAY_SIZE(bcj_optmap));
280}
281#endif
282
283
284///////////
285// Delta //
286///////////
287
288#if defined(HAVE_ENCODER_DELTA) || defined(HAVE_DECODER_DELTA)
289static const option_map delta_optmap[] = {
290 {
291 .name = "dist",
292 .offset = offsetof(lzma_options_delta, dist),
293 .u.range.min = LZMA_DELTA_DIST_MIN,
294 .u.range.max = LZMA_DELTA_DIST_MAX,
295 }
296};
297
298
299static const char *
300parse_delta(const char **const str, const char *str_end, void *filter_options)
301{
302 lzma_options_delta *opts = filter_options;
303 opts->type = LZMA_DELTA_TYPE_BYTE;
304 opts->dist = LZMA_DELTA_DIST_MIN;
305
306 return parse_options(str, str_end, filter_options,
307 delta_optmap, ARRAY_SIZE(delta_optmap));
308}
309#endif
310
311
312///////////////////
313// LZMA1 & LZMA2 //
314///////////////////
315
316/// Help string for presets
317#define LZMA12_PRESET_STR "0-9[e]"
318
319
320static const char *
321parse_lzma12_preset(const char **const str, const char *str_end,
322 uint32_t *preset)
323{
324 assert(*str < str_end);
325
326 if (!(**str >= '0' && **str <= '9'))
327 return N_("Unsupported preset");
328
329 *preset = (uint32_t)(**str - '0');
330
331 // NOTE: Remember to update LZMA12_PRESET_STR if this is modified!
332 while (++*str < str_end) {
333 switch (**str) {
334 case 'e':
335 *preset |= LZMA_PRESET_EXTREME;
336 break;
337
338 default:
339 return N_("Unsupported flag in the preset");
340 }
341 }
342
343 return NULL;
344}
345
346
347static const char *
348set_lzma12_preset(const char **const str, const char *str_end,
349 void *filter_options)
350{
351 uint32_t preset;
352 const char *errmsg = parse_lzma12_preset(str, str_end, &preset);
353 if (errmsg != NULL)
354 return errmsg;
355
356 lzma_options_lzma *opts = filter_options;
357 if (lzma_lzma_preset(opts, preset))
358 return N_("Unsupported preset");
359
360 return NULL;
361}
362
363
364static const name_value_map lzma12_mode_map[] = {
365 { "fast", LZMA_MODE_FAST },
366 { "normal", LZMA_MODE_NORMAL },
367 { "", 0 }
368};
369
370
371static const name_value_map lzma12_mf_map[] = {
372 { "hc3", LZMA_MF_HC3 },
373 { "hc4", LZMA_MF_HC4 },
374 { "bt2", LZMA_MF_BT2 },
375 { "bt3", LZMA_MF_BT3 },
376 { "bt4", LZMA_MF_BT4 },
377 { "", 0 }
378};
379
380
381static const option_map lzma12_optmap[] = {
382 {
383 .name = "preset",
384 .type = OPTMAP_TYPE_LZMA_PRESET,
385 }, {
386 .name = "dict",
387 .flags = OPTMAP_USE_BYTE_SUFFIX,
388 .offset = offsetof(lzma_options_lzma, dict_size),
389 .u.range.min = LZMA_DICT_SIZE_MIN,
390 // FIXME? The max is really max for encoding but decoding
391 // would allow 4 GiB - 1 B.
392 .u.range.max = (UINT32_C(1) << 30) + (UINT32_C(1) << 29),
393 }, {
394 .name = "lc",
395 .offset = offsetof(lzma_options_lzma, lc),
396 .u.range.min = LZMA_LCLP_MIN,
397 .u.range.max = LZMA_LCLP_MAX,
398 }, {
399 .name = "lp",
400 .offset = offsetof(lzma_options_lzma, lp),
401 .u.range.min = LZMA_LCLP_MIN,
402 .u.range.max = LZMA_LCLP_MAX,
403 }, {
404 .name = "pb",
405 .offset = offsetof(lzma_options_lzma, pb),
406 .u.range.min = LZMA_PB_MIN,
407 .u.range.max = LZMA_PB_MAX,
408 }, {
409 .name = "mode",
410 .type = OPTMAP_TYPE_LZMA_MODE,
411 .flags = OPTMAP_USE_NAME_VALUE_MAP,
412 .offset = offsetof(lzma_options_lzma, mode),
413 .u.map = lzma12_mode_map,
414 }, {
415 .name = "nice",
416 .offset = offsetof(lzma_options_lzma, nice_len),
417 .u.range.min = 2,
418 .u.range.max = 273,
419 }, {
420 .name = "mf",
421 .type = OPTMAP_TYPE_LZMA_MATCH_FINDER,
422 .flags = OPTMAP_USE_NAME_VALUE_MAP,
423 .offset = offsetof(lzma_options_lzma, mf),
424 .u.map = lzma12_mf_map,
425 }, {
426 .name = "depth",
427 .offset = offsetof(lzma_options_lzma, depth),
428 .u.range.min = 0,
429 .u.range.max = UINT32_MAX,
430 }
431};
432
433
434static const char *
435parse_lzma12(const char **const str, const char *str_end, void *filter_options)
436{
437 lzma_options_lzma *opts = filter_options;
438
439 // It cannot fail.
440 const bool preset_ret = lzma_lzma_preset(opts, LZMA_PRESET_DEFAULT);
441 assert(!preset_ret);
442 (void)preset_ret;
443
444 const char *errmsg = parse_options(str, str_end, filter_options,
445 lzma12_optmap, ARRAY_SIZE(lzma12_optmap));
446 if (errmsg != NULL)
447 return errmsg;
448
449 if (opts->lc + opts->lp > LZMA_LCLP_MAX)
450 return N_("The sum of lc and lp must not exceed 4");
451
452 return NULL;
453}
454
455
456/////////////////////////////////////////
457// Generic parsing and stringification //
458/////////////////////////////////////////
459
460static const struct {
461 /// Name of the filter
462 char name[NAME_LEN_MAX + 1];
463
464 /// For lzma_str_to_filters:
465 /// Size of the filter-specific options structure.
466 uint32_t opts_size;
467
468 /// Filter ID
469 lzma_vli id;
470
471 /// For lzma_str_to_filters:
472 /// Function to parse the filter-specific options. The filter_options
473 /// will already have been allocated using lzma_alloc_zero().
474 const char *(*parse)(const char **str, const char *str_end,
475 void *filter_options);
476
477 /// For lzma_str_from_filters:
478 /// If the flag LZMA_STR_ENCODER is used then the first
479 /// strfy_encoder elements of optmap are stringified.
480 /// With LZMA_STR_DECODER strfy_decoder is used.
481 /// Currently encoders use all options that decoders do but if
482 /// that changes then this needs to be changed too, for example,
483 /// add a new OPTMAP flag to skip printing some decoder-only options.
484 const option_map *optmap;
485 uint8_t strfy_encoder;
486 uint8_t strfy_decoder;
487
488 /// For lzma_str_from_filters:
489 /// If true, lzma_filter.options is allowed to be NULL. In that case,
490 /// only the filter name is printed without any options.
491 bool allow_null;
492
493} filter_name_map[] = {
494#if defined (HAVE_ENCODER_LZMA1) || defined(HAVE_DECODER_LZMA1)
495 { "lzma1", sizeof(lzma_options_lzma), LZMA_FILTER_LZMA1,
496 &parse_lzma12, lzma12_optmap, 9, 5, false },
497#endif
498
499#if defined(HAVE_ENCODER_LZMA2) || defined(HAVE_DECODER_LZMA2)
500 { "lzma2", sizeof(lzma_options_lzma), LZMA_FILTER_LZMA2,
501 &parse_lzma12, lzma12_optmap, 9, 2, false },
502#endif
503
504#if defined(HAVE_ENCODER_X86) || defined(HAVE_DECODER_X86)
505 { "x86", sizeof(lzma_options_bcj), LZMA_FILTER_X86,
506 &parse_bcj, bcj_optmap, 1, 1, true },
507#endif
508
509#if defined(HAVE_ENCODER_ARM) || defined(HAVE_DECODER_ARM)
510 { "arm", sizeof(lzma_options_bcj), LZMA_FILTER_ARM,
511 &parse_bcj, bcj_optmap, 1, 1, true },
512#endif
513
514#if defined(HAVE_ENCODER_ARMTHUMB) || defined(HAVE_DECODER_ARMTHUMB)
515 { "armthumb", sizeof(lzma_options_bcj), LZMA_FILTER_ARMTHUMB,
516 &parse_bcj, bcj_optmap, 1, 1, true },
517#endif
518
519#if defined(HAVE_ENCODER_ARM64) || defined(HAVE_DECODER_ARM64)
520 { "arm64", sizeof(lzma_options_bcj), LZMA_FILTER_ARM64,
521 &parse_bcj, bcj_optmap, 1, 1, true },
522#endif
523
524#if defined(HAVE_ENCODER_RISCV) || defined(HAVE_DECODER_RISCV)
525 { "riscv", sizeof(lzma_options_bcj), LZMA_FILTER_RISCV,
526 &parse_bcj, bcj_optmap, 1, 1, true },
527#endif
528
529#if defined(HAVE_ENCODER_POWERPC) || defined(HAVE_DECODER_POWERPC)
530 { "powerpc", sizeof(lzma_options_bcj), LZMA_FILTER_POWERPC,
531 &parse_bcj, bcj_optmap, 1, 1, true },
532#endif
533
534#if defined(HAVE_ENCODER_IA64) || defined(HAVE_DECODER_IA64)
535 { "ia64", sizeof(lzma_options_bcj), LZMA_FILTER_IA64,
536 &parse_bcj, bcj_optmap, 1, 1, true },
537#endif
538
539#if defined(HAVE_ENCODER_SPARC) || defined(HAVE_DECODER_SPARC)
540 { "sparc", sizeof(lzma_options_bcj), LZMA_FILTER_SPARC,
541 &parse_bcj, bcj_optmap, 1, 1, true },
542#endif
543
544#if defined(HAVE_ENCODER_DELTA) || defined(HAVE_DECODER_DELTA)
545 { "delta", sizeof(lzma_options_delta), LZMA_FILTER_DELTA,
546 &parse_delta, delta_optmap, 1, 1, false },
547#endif
548};
549
550
551/// Decodes options from a string for one filter (name1=value1,name2=value2).
552/// Caller must have allocated memory for filter_options already and set
553/// the initial default values. This is called from the filter-specific
554/// parse_* functions.
555///
556/// The input string starts at *str and the address in str_end is the first
557/// char that is not part of the string anymore. So no '\0' terminator is
558/// used. *str is advanced every time something has been decoded successfully.
559static const char *
560parse_options(const char **const str, const char *str_end,
561 void *filter_options,
562 const option_map *const optmap, const size_t optmap_size)
563{
564 while (*str < str_end && **str != '\0') {
565 // Each option is of the form name=value.
566 // Commas (',') separate options. Extra commas are ignored.
567 // Ignoring extra commas makes it simpler if an optional
568 // option stored in a shell variable which can be empty.
569 if (**str == ',') {
570 ++*str;
571 continue;
572 }
573
574 // Find where the next name=value ends.
575 const size_t str_len = (size_t)(str_end - *str);
576 const char *name_eq_value_end = memchr(*str, ',', str_len);
577 if (name_eq_value_end == NULL)
578 name_eq_value_end = str_end;
579
580 const char *equals_sign = memchr(*str, '=',
581 (size_t)(name_eq_value_end - *str));
582
583 // Fail if the '=' wasn't found or the option name is missing
584 // (the first char is '=').
585 if (equals_sign == NULL || **str == '=')
586 return N_("Options must be 'name=value' pairs "
587 "separated with commas");
588
589 // Reject a too long option name so that the memcmp()
590 // in the loop below won't read past the end of the
591 // string in optmap[i].name.
592 const size_t name_len = (size_t)(equals_sign - *str);
593 if (name_len > NAME_LEN_MAX)
594 return N_("Unknown option name");
595
596 // Find the option name from optmap[].
597 size_t i = 0;
598 while (true) {
599 if (i == optmap_size)
600 return N_("Unknown option name");
601
602 if (memcmp(*str, optmap[i].name, name_len) == 0
603 && optmap[i].name[name_len] == '\0')
604 break;
605
606 ++i;
607 }
608
609 // The input string is good at least until the start of
610 // the option value.
611 *str = equals_sign + 1;
612
613 // The code assumes that the option value isn't an empty
614 // string so check it here.
615 const size_t value_len = (size_t)(name_eq_value_end - *str);
616 if (value_len == 0)
617 return N_("Option value cannot be empty");
618
619 // LZMA1/2 preset has its own parsing function.
620 if (optmap[i].type == OPTMAP_TYPE_LZMA_PRESET) {
621 const char *errmsg = set_lzma12_preset(str,
622 name_eq_value_end, filter_options);
623 if (errmsg != NULL)
624 return errmsg;
625
626 continue;
627 }
628
629 // It's an integer value.
630 uint32_t v;
631 if (optmap[i].flags & OPTMAP_USE_NAME_VALUE_MAP) {
632 // The integer is picked from a string-to-integer map.
633 //
634 // Reject a too long value string so that the memcmp()
635 // in the loop below won't read past the end of the
636 // string in optmap[i].u.map[j].name.
637 if (value_len > NAME_LEN_MAX)
638 return N_("Invalid option value");
639
640 const name_value_map *map = optmap[i].u.map;
641 size_t j = 0;
642 while (true) {
643 // The array is terminated with an empty name.
644 if (map[j].name[0] == '\0')
645 return N_("Invalid option value");
646
647 if (memcmp(*str, map[j].name, value_len) == 0
648 && map[j].name[value_len]
649 == '\0') {
650 v = map[j].value;
651 break;
652 }
653
654 ++j;
655 }
656 } else if (**str < '0' || **str > '9') {
657 // Note that "max" isn't supported while it is
658 // supported in xz. It's not useful here.
659 return N_("Value is not a non-negative "
660 "decimal integer");
661 } else {
662 // strtoul() has locale-specific behavior so it cannot
663 // be relied on to get reproducible results since we
664 // cannot change the locate in a thread-safe library.
665 // It also needs '\0'-termination.
666 //
667 // Use a temporary pointer so that *str will point
668 // to the beginning of the value string in case
669 // an error occurs.
670 const char *p = *str;
671 v = 0;
672 do {
673 if (v > UINT32_MAX / 10)
674 return N_("Value out of range");
675
676 v *= 10;
677
678 const uint32_t add = (uint32_t)(*p - '0');
679 if (UINT32_MAX - add < v)
680 return N_("Value out of range");
681
682 v += add;
683 ++p;
684 } while (p < name_eq_value_end
685 && *p >= '0' && *p <= '9');
686
687 if (p < name_eq_value_end) {
688 // Remember this position so that it can be
689 // used for error messages that are
690 // specifically about the suffix. (Out of
691 // range values are about the whole value
692 // and those error messages point to the
693 // beginning of the number part,
694 // not to the suffix.)
695 const char *multiplier_start = p;
696
697 // If multiplier suffix shouldn't be used
698 // then don't allow them even if the value
699 // would stay within limits. This is a somewhat
700 // unnecessary check but it rejects silly
701 // things like lzma2:pb=0MiB which xz allows.
702 if ((optmap[i].flags & OPTMAP_USE_BYTE_SUFFIX)
703 == 0) {
704 *str = multiplier_start;
705 return N_("This option does not "
706 "support any multiplier "
707 "suffixes");
708 }
709
710 uint32_t shift;
711
712 switch (*p) {
713 case 'k':
714 case 'K':
715 shift = 10;
716 break;
717
718 case 'm':
719 case 'M':
720 shift = 20;
721 break;
722
723 case 'g':
724 case 'G':
725 shift = 30;
726 break;
727
728 default:
729 *str = multiplier_start;
730
731 // TRANSLATORS: Don't translate the
732 // suffixes "KiB", "MiB", or "GiB"
733 // because a user can only specify
734 // untranslated suffixes.
735 return N_("Invalid multiplier suffix "
736 "(KiB, MiB, or GiB)");
737 }
738
739 ++p;
740
741 // Allow "M", "Mi", "MB", "MiB" and the same
742 // for the other five characters from the
743 // switch-statement above. All are handled
744 // as base-2 (perhaps a mistake, perhaps not).
745 // Note that 'i' and 'B' are case sensitive.
746 if (p < name_eq_value_end && *p == 'i')
747 ++p;
748
749 if (p < name_eq_value_end && *p == 'B')
750 ++p;
751
752 // Now we must have no chars remaining.
753 if (p < name_eq_value_end) {
754 *str = multiplier_start;
755 return N_("Invalid multiplier suffix "
756 "(KiB, MiB, or GiB)");
757 }
758
759 if (v > (UINT32_MAX >> shift))
760 return N_("Value out of range");
761
762 v <<= shift;
763 }
764
765 if (v < optmap[i].u.range.min
766 || v > optmap[i].u.range.max)
767 return N_("Value out of range");
768 }
769
770 // Set the value in filter_options. Enums are handled
771 // specially since the underlying type isn't the same
772 // as uint32_t on all systems.
773 void *ptr = (char *)filter_options + optmap[i].offset;
774 switch (optmap[i].type) {
775 case OPTMAP_TYPE_LZMA_MODE:
776 *(lzma_mode *)ptr = (lzma_mode)v;
777 break;
778
779 case OPTMAP_TYPE_LZMA_MATCH_FINDER:
780 *(lzma_match_finder *)ptr = (lzma_match_finder)v;
781 break;
782
783 default:
784 *(uint32_t *)ptr = v;
785 break;
786 }
787
788 // This option has been successfully handled.
789 *str = name_eq_value_end;
790 }
791
792 // No errors.
793 return NULL;
794}
795
796
797/// Finds the name of the filter at the beginning of the string and
798/// calls filter_name_map[i].parse() to decode the filter-specific options.
799/// The caller must have set str_end so that exactly one filter and its
800/// options are present without any trailing characters.
801static const char *
802parse_filter(const char **const str, const char *str_end, lzma_filter *filter,
803 const lzma_allocator *allocator, bool only_xz)
804{
805 // Search for a colon or equals sign that would separate the filter
806 // name from filter options. If neither is found, then the input
807 // string only contains a filter name and there are no options.
808 //
809 // First assume that a colon or equals sign won't be found:
810 const char *name_end = str_end;
811 const char *opts_start = str_end;
812
813 for (const char *p = *str; p < str_end; ++p) {
814 if (*p == ':' || *p == '=') {
815 name_end = p;
816
817 // Filter options (name1=value1,name2=value2,...)
818 // begin after the colon or equals sign.
819 opts_start = p + 1;
820 break;
821 }
822 }
823
824 // Reject a too long filter name so that the memcmp()
825 // in the loop below won't read past the end of the
826 // string in filter_name_map[i].name.
827 const size_t name_len = (size_t)(name_end - *str);
828 if (name_len > NAME_LEN_MAX)
829 return N_("Unknown filter name");
830
831 for (size_t i = 0; i < ARRAY_SIZE(filter_name_map); ++i) {
832 if (memcmp(*str, filter_name_map[i].name, name_len) == 0
833 && filter_name_map[i].name[name_len] == '\0') {
834 if (only_xz && filter_name_map[i].id
835 >= LZMA_FILTER_RESERVED_START)
836 return N_("This filter cannot be used in "
837 "the .xz format");
838
839 // Allocate the filter-specific options and
840 // initialize the memory with zeros.
841 void *options = lzma_alloc_zero(
842 filter_name_map[i].opts_size,
843 allocator);
844 if (options == NULL)
845 return N_("Memory allocation failed");
846
847 // Filter name was found so the input string is good
848 // at least this far.
849 *str = opts_start;
850
851 const char *errmsg = filter_name_map[i].parse(
852 str, str_end, options);
853 if (errmsg != NULL) {
854 lzma_free(options, allocator);
855 return errmsg;
856 }
857
858 // *filter is modified only when parsing is successful.
859 filter->id = filter_name_map[i].id;
860 filter->options = options;
861 return NULL;
862 }
863 }
864
865 return N_("Unknown filter name");
866}
867
868
869/// Converts the string to a filter chain (array of lzma_filter structures).
870///
871/// *str is advanced every time something has been decoded successfully.
872/// This way the caller knows where in the string a possible error occurred.
873static const char *
874str_to_filters(const char **const str, lzma_filter *filters, uint32_t flags,
875 const lzma_allocator *allocator)
876{
877 const char *errmsg;
878
879 // Skip leading spaces.
880 while (**str == ' ')
881 ++*str;
882
883 if (**str == '\0')
884 return N_("Empty string is not allowed, "
885 "try '6' if a default value is needed");
886
887 // Detect the type of the string.
888 //
889 // A string beginning with a digit or a string beginning with
890 // one dash and a digit are treated as presets. Trailing spaces
891 // will be ignored too (leading spaces were already ignored above).
892 //
893 // For example, "6", "7 ", "-9e", or " -3 " are treated as presets.
894 // Strings like "-" or "- " aren't preset.
895#define MY_IS_DIGIT(c) ((c) >= '0' && (c) <= '9')
896 if (MY_IS_DIGIT(**str) || (**str == '-' && MY_IS_DIGIT((*str)[1]))) {
897 if (**str == '-')
898 ++*str;
899
900 // Ignore trailing spaces.
901 const size_t str_len = strlen(*str);
902 const char *str_end = memchr(*str, ' ', str_len);
903 if (str_end != NULL) {
904 // There is at least one trailing space. Check that
905 // there are no chars other than spaces.
906 for (size_t i = 1; str_end[i] != '\0'; ++i)
907 if (str_end[i] != ' ')
908 return N_("Unsupported preset");
909 } else {
910 // There are no trailing spaces. Use the whole string.
911 str_end = *str + str_len;
912 }
913
914 uint32_t preset;
915 errmsg = parse_lzma12_preset(str, str_end, &preset);
916 if (errmsg != NULL)
917 return errmsg;
918
919 lzma_options_lzma *opts = lzma_alloc(sizeof(*opts), allocator);
920 if (opts == NULL)
921 return N_("Memory allocation failed");
922
923 if (lzma_lzma_preset(opts, preset)) {
924 lzma_free(opts, allocator);
925 return N_("Unsupported preset");
926 }
927
928 filters[0].id = LZMA_FILTER_LZMA2;
929 filters[0].options = opts;
930 filters[1].id = LZMA_VLI_UNKNOWN;
931 filters[1].options = NULL;
932
933 return NULL;
934 }
935
936 // Not a preset so it must be a filter chain.
937 //
938 // If LZMA_STR_ALL_FILTERS isn't used we allow only filters that
939 // can be used in .xz.
940 const bool only_xz = (flags & LZMA_STR_ALL_FILTERS) == 0;
941
942 // Use a temporary array so that we don't modify the caller-supplied
943 // one until we know that no errors occurred.
944 lzma_filter temp_filters[LZMA_FILTERS_MAX + 1];
945
946 size_t i = 0;
947 do {
948 if (i == LZMA_FILTERS_MAX) {
949 errmsg = N_("The maximum number of filters is four");
950 goto error;
951 }
952
953 // Skip "--" if present.
954 if ((*str)[0] == '-' && (*str)[1] == '-')
955 *str += 2;
956
957 // Locate the end of "filter:name1=value1,name2=value2",
958 // stopping at the first "--" or a single space.
959 const char *filter_end = *str;
960 while (filter_end[0] != '\0') {
961 if ((filter_end[0] == '-' && filter_end[1] == '-')
962 || filter_end[0] == ' ')
963 break;
964
965 ++filter_end;
966 }
967
968 // Inputs that have "--" at the end or "-- " in the middle
969 // will result in an empty filter name.
970 if (filter_end == *str) {
971 errmsg = N_("Filter name is missing");
972 goto error;
973 }
974
975 errmsg = parse_filter(str, filter_end, &temp_filters[i],
976 allocator, only_xz);
977 if (errmsg != NULL)
978 goto error;
979
980 // Skip trailing spaces.
981 while (**str == ' ')
982 ++*str;
983
984 ++i;
985 } while (**str != '\0');
986
987 // Seems to be good, terminate the array so that
988 // basic validation can be done.
989 temp_filters[i].id = LZMA_VLI_UNKNOWN;
990 temp_filters[i].options = NULL;
991
992 // Do basic validation if the application didn't prohibit it.
993 if ((flags & LZMA_STR_NO_VALIDATION) == 0) {
994 size_t dummy;
995 const lzma_ret ret = lzma_validate_chain(temp_filters, &dummy);
996 assert(ret == LZMA_OK || ret == LZMA_OPTIONS_ERROR);
997 if (ret != LZMA_OK) {
998 errmsg = N_("Invalid filter chain "
999 "('lzma2' missing at the end?)");
1000 goto error;
1001 }
1002 }
1003
1004 // All good. Copy the filters to the application supplied array.
1005 memcpy(filters, temp_filters, (i + 1) * sizeof(lzma_filter));
1006 return NULL;
1007
1008error:
1009 // Free the filter options that were successfully decoded.
1010 while (i-- > 0)
1011 lzma_free(temp_filters[i].options, allocator);
1012
1013 return errmsg;
1014}
1015
1016
1017extern LZMA_API(const char *)
1018lzma_str_to_filters(const char *str, int *error_pos, lzma_filter *filters,
1019 uint32_t flags, const lzma_allocator *allocator)
1020{
1021 // If error_pos isn't NULL, *error_pos must always be set.
1022 // liblzma <= 5.4.6 and <= 5.6.1 have a bug and don't do this
1023 // when str == NULL or filters == NULL or flags are unsupported.
1024 if (error_pos != NULL)
1025 *error_pos = 0;
1026
1027 if (str == NULL || filters == NULL) {
1028 // Don't translate this because it's only shown in case of
1029 // a programming error.
1030 return "Unexpected NULL pointer argument(s) "
1031 "to lzma_str_to_filters()";
1032 }
1033
1034 // Validate the flags.
1035 const uint32_t supported_flags
1036 = LZMA_STR_ALL_FILTERS
1037 | LZMA_STR_NO_VALIDATION;
1038
1039 if (flags & ~supported_flags) {
1040 // This message is possible only if the caller uses flags
1041 // that are only supported in a newer liblzma version (or
1042 // the flags are simply buggy). Don't translate this at least
1043 // when liblzma itself doesn't use gettext; xz and liblzma
1044 // are usually upgraded at the same time.
1045 return "Unsupported flags to lzma_str_to_filters()";
1046 }
1047
1048 const char *used = str;
1049 const char *errmsg = str_to_filters(&used, filters, flags, allocator);
1050
1051 if (error_pos != NULL) {
1052 const size_t n = (size_t)(used - str);
1053 *error_pos = n > INT_MAX ? INT_MAX : (int)n;
1054 }
1055
1056 return errmsg;
1057}
1058
1059
1060/// Converts options of one filter to a string.
1061///
1062/// The caller must have already put the filter name in the destination
1063/// string. Since it is possible that no options will be needed, the caller
1064/// won't have put a delimiter character (':' or '=') in the string yet.
1065/// We will add it if at least one option will be added to the string.
1066static void
1067strfy_filter(lzma_str *dest, const char *delimiter,
1068 const option_map *optmap, size_t optmap_count,
1069 const void *filter_options)
1070{
1071 for (size_t i = 0; i < optmap_count; ++i) {
1072 // No attempt is made to reverse LZMA1/2 preset.
1073 if (optmap[i].type == OPTMAP_TYPE_LZMA_PRESET)
1074 continue;
1075
1076 // All options have integer values, some just are mapped
1077 // to a string with a name_value_map. LZMA1/2 preset
1078 // isn't reversed back to preset=PRESET form.
1079 uint32_t v;
1080 const void *ptr
1081 = (const char *)filter_options + optmap[i].offset;
1082 switch (optmap[i].type) {
1083 case OPTMAP_TYPE_LZMA_MODE:
1084 v = *(const lzma_mode *)ptr;
1085 break;
1086
1087 case OPTMAP_TYPE_LZMA_MATCH_FINDER:
1088 v = *(const lzma_match_finder *)ptr;
1089 break;
1090
1091 default:
1092 v = *(const uint32_t *)ptr;
1093 break;
1094 }
1095
1096 // Skip this if this option should be omitted from
1097 // the string when the value is zero.
1098 if (v == 0 && (optmap[i].flags & OPTMAP_NO_STRFY_ZERO))
1099 continue;
1100
1101 // Before the first option we add whatever delimiter
1102 // the caller gave us. For later options a comma is used.
1103 str_append_str(dest, delimiter);
1104 delimiter = ",";
1105
1106 // Add the option name and equals sign.
1107 str_append_str(dest, optmap[i].name);
1108 str_append_str(dest, "=");
1109
1110 if (optmap[i].flags & OPTMAP_USE_NAME_VALUE_MAP) {
1111 const name_value_map *map = optmap[i].u.map;
1112 size_t j = 0;
1113 while (true) {
1114 if (map[j].name[0] == '\0') {
1115 str_append_str(dest, "UNKNOWN");
1116 break;
1117 }
1118
1119 if (map[j].value == v) {
1120 str_append_str(dest, map[j].name);
1121 break;
1122 }
1123
1124 ++j;
1125 }
1126 } else {
1127 str_append_u32(dest, v,
1128 optmap[i].flags & OPTMAP_USE_BYTE_SUFFIX);
1129 }
1130 }
1131
1132 return;
1133}
1134
1135
1136extern LZMA_API(lzma_ret)
1137lzma_str_from_filters(char **output_str, const lzma_filter *filters,
1138 uint32_t flags, const lzma_allocator *allocator)
1139{
1140 // On error *output_str is always set to NULL.
1141 // Do it as the very first step.
1142 if (output_str == NULL)
1143 return LZMA_PROG_ERROR;
1144
1145 *output_str = NULL;
1146
1147 if (filters == NULL)
1148 return LZMA_PROG_ERROR;
1149
1150 // Validate the flags.
1151 const uint32_t supported_flags
1152 = LZMA_STR_ENCODER
1153 | LZMA_STR_DECODER
1154 | LZMA_STR_GETOPT_LONG
1155 | LZMA_STR_NO_SPACES;
1156
1157 if (flags & ~supported_flags)
1158 return LZMA_OPTIONS_ERROR;
1159
1160 // There must be at least one filter.
1161 if (filters[0].id == LZMA_VLI_UNKNOWN)
1162 return LZMA_OPTIONS_ERROR;
1163
1164 // Allocate memory for the output string.
1165 lzma_str dest;
1166 return_if_error(str_init(&dest, allocator));
1167
1168 const bool show_opts = (flags & (LZMA_STR_ENCODER | LZMA_STR_DECODER));
1169
1170 const char *opt_delim = (flags & LZMA_STR_GETOPT_LONG) ? "=" : ":";
1171
1172 for (size_t i = 0; filters[i].id != LZMA_VLI_UNKNOWN; ++i) {
1173 // If we reach LZMA_FILTERS_MAX, then the filters array
1174 // is too large since the ID cannot be LZMA_VLI_UNKNOWN here.
1175 if (i == LZMA_FILTERS_MAX) {
1176 str_free(&dest, allocator);
1177 return LZMA_OPTIONS_ERROR;
1178 }
1179
1180 // Don't add a space between filters if the caller
1181 // doesn't want them.
1182 if (i > 0 && !(flags & LZMA_STR_NO_SPACES))
1183 str_append_str(&dest, " ");
1184
1185 // Use dashes for xz getopt_long() compatible syntax but also
1186 // use dashes to separate filters when spaces weren't wanted.
1187 if ((flags & LZMA_STR_GETOPT_LONG)
1188 || (i > 0 && (flags & LZMA_STR_NO_SPACES)))
1189 str_append_str(&dest, "--");
1190
1191 size_t j = 0;
1192 while (true) {
1193 if (j == ARRAY_SIZE(filter_name_map)) {
1194 // Filter ID in filters[i].id isn't supported.
1195 str_free(&dest, allocator);
1196 return LZMA_OPTIONS_ERROR;
1197 }
1198
1199 if (filter_name_map[j].id == filters[i].id) {
1200 // Add the filter name.
1201 str_append_str(&dest, filter_name_map[j].name);
1202
1203 // If only the filter names were wanted then
1204 // skip to the next filter. In this case
1205 // .options is ignored and may be NULL even
1206 // when the filter doesn't allow NULL options.
1207 if (!show_opts)
1208 break;
1209
1210 if (filters[i].options == NULL) {
1211 if (!filter_name_map[j].allow_null) {
1212 // Filter-specific options
1213 // are missing but with
1214 // this filter the options
1215 // structure is mandatory.
1216 str_free(&dest, allocator);
1217 return LZMA_OPTIONS_ERROR;
1218 }
1219
1220 // .options is allowed to be NULL.
1221 // There is no need to add any
1222 // options to the string.
1223 break;
1224 }
1225
1226 // Options structure is available. Add
1227 // the filter options to the string.
1228 const size_t optmap_count
1229 = (flags & LZMA_STR_ENCODER)
1230 ? filter_name_map[j].strfy_encoder
1231 : filter_name_map[j].strfy_decoder;
1232 strfy_filter(&dest, opt_delim,
1233 filter_name_map[j].optmap,
1234 optmap_count,
1235 filters[i].options);
1236 break;
1237 }
1238
1239 ++j;
1240 }
1241 }
1242
1243 return str_finish(output_str, &dest, allocator);
1244}
1245
1246
1247extern LZMA_API(lzma_ret)
1248lzma_str_list_filters(char **output_str, lzma_vli filter_id, uint32_t flags,
1249 const lzma_allocator *allocator)
1250{
1251 // On error *output_str is always set to NULL.
1252 // Do it as the very first step.
1253 if (output_str == NULL)
1254 return LZMA_PROG_ERROR;
1255
1256 *output_str = NULL;
1257
1258 // Validate the flags.
1259 const uint32_t supported_flags
1260 = LZMA_STR_ALL_FILTERS
1261 | LZMA_STR_ENCODER
1262 | LZMA_STR_DECODER
1263 | LZMA_STR_GETOPT_LONG;
1264
1265 if (flags & ~supported_flags)
1266 return LZMA_OPTIONS_ERROR;
1267
1268 // Allocate memory for the output string.
1269 lzma_str dest;
1270 return_if_error(str_init(&dest, allocator));
1271
1272 // If only listing the filter names then separate them with spaces.
1273 // Otherwise use newlines.
1274 const bool show_opts = (flags & (LZMA_STR_ENCODER | LZMA_STR_DECODER));
1275 const char *filter_delim = show_opts ? "\n" : " ";
1276
1277 const char *opt_delim = (flags & LZMA_STR_GETOPT_LONG) ? "=" : ":";
1278 bool first_filter_printed = false;
1279
1280 for (size_t i = 0; i < ARRAY_SIZE(filter_name_map); ++i) {
1281 // If we are printing only one filter then skip others.
1282 if (filter_id != LZMA_VLI_UNKNOWN
1283 && filter_id != filter_name_map[i].id)
1284 continue;
1285
1286 // If we are printing only .xz filters then skip the others.
1287 if (filter_name_map[i].id >= LZMA_FILTER_RESERVED_START
1288 && (flags & LZMA_STR_ALL_FILTERS) == 0
1289 && filter_id == LZMA_VLI_UNKNOWN)
1290 continue;
1291
1292 // Add a new line if this isn't the first filter being
1293 // written to the string.
1294 if (first_filter_printed)
1295 str_append_str(&dest, filter_delim);
1296
1297 first_filter_printed = true;
1298
1299 if (flags & LZMA_STR_GETOPT_LONG)
1300 str_append_str(&dest, "--");
1301
1302 str_append_str(&dest, filter_name_map[i].name);
1303
1304 // If only the filter names were wanted then continue
1305 // to the next filter.
1306 if (!show_opts)
1307 continue;
1308
1309 const option_map *optmap = filter_name_map[i].optmap;
1310 const char *d = opt_delim;
1311
1312 const size_t end = (flags & LZMA_STR_ENCODER)
1313 ? filter_name_map[i].strfy_encoder
1314 : filter_name_map[i].strfy_decoder;
1315
1316 for (size_t j = 0; j < end; ++j) {
1317 // The first option is delimited from the filter
1318 // name using "=" or ":" and the rest of the options
1319 // are separated with ",".
1320 str_append_str(&dest, d);
1321 d = ",";
1322
1323 // optname=<possible_values>
1324 str_append_str(&dest, optmap[j].name);
1325 str_append_str(&dest, "=<");
1326
1327 if (optmap[j].type == OPTMAP_TYPE_LZMA_PRESET) {
1328 // LZMA1/2 preset has its custom help string.
1329 str_append_str(&dest, LZMA12_PRESET_STR);
1330 } else if (optmap[j].flags
1331 & OPTMAP_USE_NAME_VALUE_MAP) {
1332 // Separate the possible option values by "|".
1333 const name_value_map *m = optmap[j].u.map;
1334 for (size_t k = 0; m[k].name[0] != '\0'; ++k) {
1335 if (k > 0)
1336 str_append_str(&dest, "|");
1337
1338 str_append_str(&dest, m[k].name);
1339 }
1340 } else {
1341 // Integer range is shown as min-max.
1342 const bool use_byte_suffix = optmap[j].flags
1343 & OPTMAP_USE_BYTE_SUFFIX;
1344 str_append_u32(&dest, optmap[j].u.range.min,
1345 use_byte_suffix);
1346 str_append_str(&dest, "-");
1347 str_append_u32(&dest, optmap[j].u.range.max,
1348 use_byte_suffix);
1349 }
1350
1351 str_append_str(&dest, ">");
1352 }
1353 }
1354
1355 // If no filters were added to the string then it must be because
1356 // the caller provided an unsupported Filter ID.
1357 if (!first_filter_printed) {
1358 str_free(&dest, allocator);
1359 return LZMA_OPTIONS_ERROR;
1360 }
1361
1362 return str_finish(output_str, &dest, allocator);
1363}
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette