VirtualBox

source: vbox/trunk/src/libs/liblzma-5.8.1/check/crc_clmul_consts_gen.c@ 109307

Last change on this file since 109307 was 108915, checked in by vboxsync, 7 weeks ago

libs/liblzma: Liblzma ose fix. jiraref:VBP-1635

File size: 5.5 KB
Line 
1// SPDX-License-Identifier: 0BSD
2
3///////////////////////////////////////////////////////////////////////////////
4//
5/// \file crc_clmul_consts_gen.c
6/// \brief Generate constants for CLMUL CRC code
7///
8/// Compiling: gcc -std=c99 -o crc_clmul_consts_gen crc_clmul_consts_gen.c
9///
10/// This is for CRCs that use reversed bit order (bit reflection).
11/// The same CLMUL CRC code can be used with CRC64 and smaller ones like
12/// CRC32 apart from one special case: CRC64 needs an extra step in the
13/// Barrett reduction to handle the 65th bit; the smaller ones don't.
14/// Otherwise it's enough to just change the polynomial and the derived
15/// constants and use the same code.
16///
17/// See the Intel white paper "Fast CRC Computation for Generic Polynomials
18/// Using PCLMULQDQ Instruction" from 2009.
19//
20// Author: Lasse Collin
21//
22///////////////////////////////////////////////////////////////////////////////
23
24#include <inttypes.h>
25#include <stdio.h>
26
27
28/// CRC32 (Ethernet) polynomial in reversed representation
29static const uint64_t p32 = 0xedb88320;
30
31// CRC64 (ECMA-182) polynomial in reversed representation
32static const uint64_t p64 = 0xc96c5795d7870f42;
33
34
35/// Calculates floor(x^128 / p) where p is a CRC64 polynomial in
36/// reversed representation. The result is in reversed representation too.
37static uint64_t
38calc_cldiv(uint64_t p)
39{
40 // Quotient
41 uint64_t q = 0;
42
43 // Align the x^64 term with the x^128 (the implied high bits of the
44 // divisor and the dividend) and do the first step of polynomial long
45 // division, calculating the first remainder. The variable q remains
46 // zero because the highest bit of the quotient is an implied bit 1
47 // (we kind of set q = 1 << -1).
48 uint64_t r = p;
49
50 // Then process the remaining 64 terms. Note that r has no implied
51 // high bit, only q and p do. (And remember that a high bit in the
52 // polynomial is stored at a low bit in the variable due to the
53 // reversed bit order.)
54 for (unsigned i = 0; i < 64; ++i) {
55 q |= (r & 1) << i;
56 r = (r >> 1) ^ (r & 1 ? p : 0);
57 }
58
59 return q;
60}
61
62
63/// Calculate the remainder of carryless division:
64///
65/// x^(bits + n - 1) % p, where n=64 (for CRC64)
66///
67/// p must be in reversed representation which omits the bit of
68/// the highest term of the polynomial. Instead, it is an implied bit
69/// at kind of like "1 << -1" position, as if it had just been shifted out.
70///
71/// The return value is in the reversed bit order. (There are no implied bits.)
72static uint64_t
73calc_clrem(uint64_t p, unsigned bits)
74{
75 // Do the first step of polynomial long division.
76 uint64_t r = p;
77
78 // Then process the remaining terms. Start with i = 1 instead of i = 0
79 // to account for the -1 in x^(bits + n - 1). This -1 is convenient
80 // with the reversed bit order. See the "Bit-Reflection" section in
81 // the Intel white paper.
82 for (unsigned i = 1; i < bits; ++i)
83 r = (r >> 1) ^ (r & 1 ? p : 0);
84
85 return r;
86}
87
88
89extern int
90main(void)
91{
92 puts("// CRC64");
93
94 // The order of the two 64-bit constants in a vector don't matter.
95 // It feels logical to put them in this order as it matches the
96 // order in which the input bytes are read.
97 printf("const __m128i fold512 = _mm_set_epi64x("
98 "0x%016" PRIx64 ", 0x%016" PRIx64 ");\n",
99 calc_clrem(p64, 4 * 128 - 64),
100 calc_clrem(p64, 4 * 128));
101
102 printf("const __m128i fold128 = _mm_set_epi64x("
103 "0x%016" PRIx64 ", 0x%016" PRIx64 ");\n",
104 calc_clrem(p64, 128 - 64),
105 calc_clrem(p64, 128));
106
107 // When we multiply by mu, we care about the high bits of the result
108 // (in reversed bit order!). It doesn't matter that the low bit gets
109 // shifted out because the affected output bits will be ignored.
110 // Below we add the implied high bit with "| 1" after the shifting
111 // so that the high bits of the multiplication will be correct.
112 //
113 // p64 is shifted left by one so that the final multiplication
114 // in Barrett reduction won't be misaligned by one bit. We could
115 // use "(p64 << 1) | 1" instead of "p64 << 1" too but it makes
116 // no difference as that bit won't affect the relevant output bits
117 // (we only care about the lowest 64 bits of the result, that is,
118 // lowest in the reversed bit order).
119 //
120 // NOTE: The 65rd bit of p64 gets shifted out. It needs to be
121 // compensated with 64-bit shift and xor in the CRC64 code.
122 printf("const __m128i mu_p = _mm_set_epi64x("
123 "0x%016" PRIx64 ", 0x%016" PRIx64 ");\n",
124 (calc_cldiv(p64) << 1) | 1,
125 p64 << 1);
126
127 puts("");
128
129 puts("// CRC32");
130
131 printf("const __m128i fold512 = _mm_set_epi64x("
132 "0x%08" PRIx64 ", 0x%08" PRIx64 ");\n",
133 calc_clrem(p32, 4 * 128 - 64),
134 calc_clrem(p32, 4 * 128));
135
136 printf("const __m128i fold128 = _mm_set_epi64x("
137 "0x%08" PRIx64 ", 0x%08" PRIx64 ");\n",
138 calc_clrem(p32, 128 - 64),
139 calc_clrem(p32, 128));
140
141 // CRC32 calculation is done by modulus scaling it to a CRC64.
142 // Since the CRC is in reversed representation, only the mu
143 // constant changes with the modulus scaling. This method avoids
144 // one additional constant and one additional clmul in the final
145 // reduction steps, making the code both simpler and faster.
146 //
147 // p32 is shifted left by one so that the final multiplication
148 // in Barrett reduction won't be misaligned by one bit. We could
149 // use "(p32 << 1) | 1" instead of "p32 << 1" too but it makes
150 // no difference as that bit won't affect the relevant output bits.
151 //
152 // NOTE: The 33-bit value fits in 64 bits so, unlike with CRC64,
153 // there is no need to compensate for any missing bits in the code.
154 printf("const __m128i mu_p = _mm_set_epi64x("
155 "0x%016" PRIx64 ", 0x%" PRIx64 ");\n",
156 (calc_cldiv(p32) << 1) | 1,
157 p32 << 1);
158
159 return 0;
160}
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette