VirtualBox

source: vbox/trunk/src/libs/liblzma-5.6.4/simple/arm64.c@ 108905

Last change on this file since 108905 was 108905, checked in by vboxsync, 4 weeks ago

liblzma-5.6.4: Applied and adjusted our liblzma changes to 5.6.4. jiraref:VBP-1613

  • Property svn:eol-style set to LF
  • Property svn:keywords set to Author Date Id Revision
File size: 4.0 KB
Line 
1// SPDX-License-Identifier: 0BSD
2
3///////////////////////////////////////////////////////////////////////////////
4//
5/// \file arm64.c
6/// \brief Filter for ARM64 binaries
7///
8/// This converts ARM64 relative addresses in the BL and ADRP immediates
9/// to absolute values to increase redundancy of ARM64 code.
10///
11/// Converting B or ADR instructions was also tested but it's not useful.
12/// A majority of the jumps for the B instruction are very small (+/- 0xFF).
13/// These are typical for loops and if-statements. Encoding them to their
14/// absolute address reduces redundancy since many of the small relative
15/// jump values are repeated, but very few of the absolute addresses are.
16//
17// Authors: Lasse Collin
18// Jia Tan
19// Igor Pavlov
20//
21///////////////////////////////////////////////////////////////////////////////
22
23#include "simple_private.h"
24
25
26static size_t
27arm64_code(void *simple lzma_attribute((__unused__)),
28 uint32_t now_pos, bool is_encoder,
29 uint8_t *buffer, size_t size)
30{
31 size_t i;
32
33 // Clang 14.0.6 on x86-64 makes this four times bigger and 40 % slower
34 // with auto-vectorization that is enabled by default with -O2.
35 // Such vectorization bloat happens with -O2 when targeting ARM64 too
36 // but performance hasn't been tested.
37#ifdef __clang__
38# pragma clang loop vectorize(disable)
39#endif
40 for (i = 0; i + 4 <= size; i += 4) {
41 uint32_t pc = (uint32_t)(now_pos + i);
42 uint32_t instr = read32le(buffer + i);
43
44 if ((instr >> 26) == 0x25) {
45 // BL instruction:
46 // The full 26-bit immediate is converted.
47 // The range is +/-128 MiB.
48 //
49 // Using the full range helps quite a lot with
50 // big executables. Smaller range would reduce false
51 // positives in non-code sections of the input though
52 // so this is a compromise that slightly favors big
53 // files. With the full range, only six bits of the 32
54 // need to match to trigger a conversion.
55 const uint32_t src = instr;
56 instr = 0x94000000;
57
58 pc >>= 2;
59 if (!is_encoder)
60 pc = 0U - pc;
61
62 instr |= (src + pc) & 0x03FFFFFF;
63 write32le(buffer + i, instr);
64
65 } else if ((instr & 0x9F000000) == 0x90000000) {
66 // ADRP instruction:
67 // Only values in the range +/-512 MiB are converted.
68 //
69 // Using less than the full +/-4 GiB range reduces
70 // false positives on non-code sections of the input
71 // while being excellent for executables up to 512 MiB.
72 // The positive effect of ADRP conversion is smaller
73 // than that of BL but it also doesn't hurt so much in
74 // non-code sections of input because, with +/-512 MiB
75 // range, nine bits of 32 need to match to trigger a
76 // conversion (two 10-bit match choices = 9 bits).
77 const uint32_t src = ((instr >> 29) & 3)
78 | ((instr >> 3) & 0x001FFFFC);
79
80 // With the addition only one branch is needed to
81 // check the +/- range. This is usually false when
82 // processing ARM64 code so branch prediction will
83 // handle it well in terms of performance.
84 //
85 //if ((src & 0x001E0000) != 0
86 // && (src & 0x001E0000) != 0x001E0000)
87 if ((src + 0x00020000) & 0x001C0000)
88 continue;
89
90 instr &= 0x9000001F;
91
92 pc >>= 12;
93 if (!is_encoder)
94 pc = 0U - pc;
95
96 const uint32_t dest = src + pc;
97 instr |= (dest & 3) << 29;
98 instr |= (dest & 0x0003FFFC) << 3;
99 instr |= (0U - (dest & 0x00020000)) & 0x00E00000;
100 write32le(buffer + i, instr);
101 }
102 }
103
104 return i;
105}
106
107
108static lzma_ret
109arm64_coder_init(lzma_next_coder *next, const lzma_allocator *allocator,
110 const lzma_filter_info *filters, bool is_encoder)
111{
112 return lzma_simple_coder_init(next, allocator, filters,
113 &arm64_code, 0, 4, 4, is_encoder);
114}
115
116
117#ifdef HAVE_ENCODER_ARM64
118extern lzma_ret
119lzma_simple_arm64_encoder_init(lzma_next_coder *next,
120 const lzma_allocator *allocator,
121 const lzma_filter_info *filters)
122{
123 return arm64_coder_init(next, allocator, filters, true);
124}
125#endif
126
127
128#ifdef HAVE_DECODER_ARM64
129extern lzma_ret
130lzma_simple_arm64_decoder_init(lzma_next_coder *next,
131 const lzma_allocator *allocator,
132 const lzma_filter_info *filters)
133{
134 return arm64_coder_init(next, allocator, filters, false);
135}
136#endif
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette