arm64.c@ 109042

Last change on this file since 109042 was 108913, checked in by vboxsync, 4 weeks ago
libs/liblzma: Liblzma ose fix. jiraref:VBP-1635
Property svn:eol-style set to `native` Property svn:keywords set to `Author Date Id Revision`
File size: 4.5 KB

Line
1	// SPDX-License-Identifier: 0BSD
2
3	///////////////////////////////////////////////////////////////////////////////
4	//
5	/// \file arm64.c
6	/// \brief Filter for ARM64 binaries
7	///
8	/// This converts ARM64 relative addresses in the BL and ADRP immediates
9	/// to absolute values to increase redundancy of ARM64 code.
10	///
11	/// Converting B or ADR instructions was also tested but it's not useful.
12	/// A majority of the jumps for the B instruction are very small (+/- 0xFF).
13	/// These are typical for loops and if-statements. Encoding them to their
14	/// absolute address reduces redundancy since many of the small relative
15	/// jump values are repeated, but very few of the absolute addresses are.
16	//
17	// Authors: Lasse Collin
18	// Jia Tan
19	// Igor Pavlov
20	//
21	///////////////////////////////////////////////////////////////////////////////
22
23	#include "simple_private.h"
24
25
26	static size_t
27	arm64_code(void *simple lzma_attribute((__unused__)),
28	uint32_t now_pos, bool is_encoder,
29	uint8_t *buffer, size_t size)
30	{
31	size &= ~(size_t)3;
32
33	size_t i;
34
35	// Clang 14.0.6 on x86-64 makes this four times bigger and 40 % slower
36	// with auto-vectorization that is enabled by default with -O2.
37	// Such vectorization bloat happens with -O2 when targeting ARM64 too
38	// but performance hasn't been tested.
39	#ifdef __clang__
40	# pragma clang loop vectorize(disable)
41	#endif
42	for (i = 0; i < size; i += 4) {
43	uint32_t pc = (uint32_t)(now_pos + i);
44	uint32_t instr = read32le(buffer + i);
45
46	if ((instr >> 26) == 0x25) {
47	// BL instruction:
48	// The full 26-bit immediate is converted.
49	// The range is +/-128 MiB.
50	//
51	// Using the full range helps quite a lot with
52	// big executables. Smaller range would reduce false
53	// positives in non-code sections of the input though
54	// so this is a compromise that slightly favors big
55	// files. With the full range, only six bits of the 32
56	// need to match to trigger a conversion.
57	const uint32_t src = instr;
58	instr = 0x94000000;
59
60	pc >>= 2;
61	if (!is_encoder)
62	pc = 0U - pc;
63
64	instr \|= (src + pc) & 0x03FFFFFF;
65	write32le(buffer + i, instr);
66
67	} else if ((instr & 0x9F000000) == 0x90000000) {
68	// ADRP instruction:
69	// Only values in the range +/-512 MiB are converted.
70	//
71	// Using less than the full +/-4 GiB range reduces
72	// false positives on non-code sections of the input
73	// while being excellent for executables up to 512 MiB.
74	// The positive effect of ADRP conversion is smaller
75	// than that of BL but it also doesn't hurt so much in
76	// non-code sections of input because, with +/-512 MiB
77	// range, nine bits of 32 need to match to trigger a
78	// conversion (two 10-bit match choices = 9 bits).
79	const uint32_t src = ((instr >> 29) & 3)
80	\| ((instr >> 3) & 0x001FFFFC);
81
82	// With the addition only one branch is needed to
83	// check the +/- range. This is usually false when
84	// processing ARM64 code so branch prediction will
85	// handle it well in terms of performance.
86	//
87	//if ((src & 0x001E0000) != 0
88	// && (src & 0x001E0000) != 0x001E0000)
89	if ((src + 0x00020000) & 0x001C0000)
90	continue;
91
92	instr &= 0x9000001F;
93
94	pc >>= 12;
95	if (!is_encoder)
96	pc = 0U - pc;
97
98	const uint32_t dest = src + pc;
99	instr \|= (dest & 3) << 29;
100	instr \|= (dest & 0x0003FFFC) << 3;
101	instr \|= (0U - (dest & 0x00020000)) & 0x00E00000;
102	write32le(buffer + i, instr);
103	}
104	}
105
106	return i;
107	}
108
109
110	static lzma_ret
111	arm64_coder_init(lzma_next_coder next, const lzma_allocator allocator,
112	const lzma_filter_info *filters, bool is_encoder)
113	{
114	return lzma_simple_coder_init(next, allocator, filters,
115	&arm64_code, 0, 4, 4, is_encoder);
116	}
117
118
119	#ifdef HAVE_ENCODER_ARM64
120	extern lzma_ret
121	lzma_simple_arm64_encoder_init(lzma_next_coder *next,
122	const lzma_allocator *allocator,
123	const lzma_filter_info *filters)
124	{
125	return arm64_coder_init(next, allocator, filters, true);
126	}
127
128
129	extern LZMA_API(size_t)
130	lzma_bcj_arm64_encode(uint32_t start_offset, uint8_t *buf, size_t size)
131	{
132	// start_offset must be a multiple of four.
133	start_offset &= ~UINT32_C(3);
134	return arm64_code(NULL, start_offset, true, buf, size);
135	}
136	#endif
137
138
139	#ifdef HAVE_DECODER_ARM64
140	extern lzma_ret
141	lzma_simple_arm64_decoder_init(lzma_next_coder *next,
142	const lzma_allocator *allocator,
143	const lzma_filter_info *filters)
144	{
145	return arm64_coder_init(next, allocator, filters, false);
146	}
147
148
149	extern LZMA_API(size_t)
150	lzma_bcj_arm64_decode(uint32_t start_offset, uint8_t *buf, size_t size)
151	{
152	// start_offset must be a multiple of four.
153	start_offset &= ~UINT32_C(3);
154	return arm64_code(NULL, start_offset, false, buf, size);
155	}
156	#endif

Note: See TracBrowser for help on using the repository browser.

source: vbox/trunk/src/libs/liblzma-5.8.1/simple/arm64.c@ 109042

Download in other formats: