crc_clmul_consts_gen.c@ 109307

Last change on this file since 109307 was 108915, checked in by vboxsync, 7 weeks ago
libs/liblzma: Liblzma ose fix. jiraref:VBP-1635
File size: 5.5 KB

Line
1	// SPDX-License-Identifier: 0BSD
2
3	///////////////////////////////////////////////////////////////////////////////
4	//
5	/// \file crc_clmul_consts_gen.c
6	/// \brief Generate constants for CLMUL CRC code
7	///
8	/// Compiling: gcc -std=c99 -o crc_clmul_consts_gen crc_clmul_consts_gen.c
9	///
10	/// This is for CRCs that use reversed bit order (bit reflection).
11	/// The same CLMUL CRC code can be used with CRC64 and smaller ones like
12	/// CRC32 apart from one special case: CRC64 needs an extra step in the
13	/// Barrett reduction to handle the 65th bit; the smaller ones don't.
14	/// Otherwise it's enough to just change the polynomial and the derived
15	/// constants and use the same code.
16	///
17	/// See the Intel white paper "Fast CRC Computation for Generic Polynomials
18	/// Using PCLMULQDQ Instruction" from 2009.
19	//
20	// Author: Lasse Collin
21	//
22	///////////////////////////////////////////////////////////////////////////////
23
24	#include <inttypes.h>
25	#include <stdio.h>
26
27
28	/// CRC32 (Ethernet) polynomial in reversed representation
29	static const uint64_t p32 = 0xedb88320;
30
31	// CRC64 (ECMA-182) polynomial in reversed representation
32	static const uint64_t p64 = 0xc96c5795d7870f42;
33
34
35	/// Calculates floor(x^128 / p) where p is a CRC64 polynomial in
36	/// reversed representation. The result is in reversed representation too.
37	static uint64_t
38	calc_cldiv(uint64_t p)
39	{
40	// Quotient
41	uint64_t q = 0;
42
43	// Align the x^64 term with the x^128 (the implied high bits of the
44	// divisor and the dividend) and do the first step of polynomial long
45	// division, calculating the first remainder. The variable q remains
46	// zero because the highest bit of the quotient is an implied bit 1
47	// (we kind of set q = 1 << -1).
48	uint64_t r = p;
49
50	// Then process the remaining 64 terms. Note that r has no implied
51	// high bit, only q and p do. (And remember that a high bit in the
52	// polynomial is stored at a low bit in the variable due to the
53	// reversed bit order.)
54	for (unsigned i = 0; i < 64; ++i) {
55	q \|= (r & 1) << i;
56	r = (r >> 1) ^ (r & 1 ? p : 0);
57	}
58
59	return q;
60	}
61
62
63	/// Calculate the remainder of carryless division:
64	///
65	/// x^(bits + n - 1) % p, where n=64 (for CRC64)
66	///
67	/// p must be in reversed representation which omits the bit of
68	/// the highest term of the polynomial. Instead, it is an implied bit
69	/// at kind of like "1 << -1" position, as if it had just been shifted out.
70	///
71	/// The return value is in the reversed bit order. (There are no implied bits.)
72	static uint64_t
73	calc_clrem(uint64_t p, unsigned bits)
74	{
75	// Do the first step of polynomial long division.
76	uint64_t r = p;
77
78	// Then process the remaining terms. Start with i = 1 instead of i = 0
79	// to account for the -1 in x^(bits + n - 1). This -1 is convenient
80	// with the reversed bit order. See the "Bit-Reflection" section in
81	// the Intel white paper.
82	for (unsigned i = 1; i < bits; ++i)
83	r = (r >> 1) ^ (r & 1 ? p : 0);
84
85	return r;
86	}
87
88
89	extern int
90	main(void)
91	{
92	puts("// CRC64");
93
94	// The order of the two 64-bit constants in a vector don't matter.
95	// It feels logical to put them in this order as it matches the
96	// order in which the input bytes are read.
97	printf("const __m128i fold512 = _mm_set_epi64x("
98	"0x%016" PRIx64 ", 0x%016" PRIx64 ");\n",
99	calc_clrem(p64, 4 * 128 - 64),
100	calc_clrem(p64, 4 * 128));
101
102	printf("const __m128i fold128 = _mm_set_epi64x("
103	"0x%016" PRIx64 ", 0x%016" PRIx64 ");\n",
104	calc_clrem(p64, 128 - 64),
105	calc_clrem(p64, 128));
106
107	// When we multiply by mu, we care about the high bits of the result
108	// (in reversed bit order!). It doesn't matter that the low bit gets
109	// shifted out because the affected output bits will be ignored.
110	// Below we add the implied high bit with "\| 1" after the shifting
111	// so that the high bits of the multiplication will be correct.
112	//
113	// p64 is shifted left by one so that the final multiplication
114	// in Barrett reduction won't be misaligned by one bit. We could
115	// use "(p64 << 1) \| 1" instead of "p64 << 1" too but it makes
116	// no difference as that bit won't affect the relevant output bits
117	// (we only care about the lowest 64 bits of the result, that is,
118	// lowest in the reversed bit order).
119	//
120	// NOTE: The 65rd bit of p64 gets shifted out. It needs to be
121	// compensated with 64-bit shift and xor in the CRC64 code.
122	printf("const __m128i mu_p = _mm_set_epi64x("
123	"0x%016" PRIx64 ", 0x%016" PRIx64 ");\n",
124	(calc_cldiv(p64) << 1) \| 1,
125	p64 << 1);
126
127	puts("");
128
129	puts("// CRC32");
130
131	printf("const __m128i fold512 = _mm_set_epi64x("
132	"0x%08" PRIx64 ", 0x%08" PRIx64 ");\n",
133	calc_clrem(p32, 4 * 128 - 64),
134	calc_clrem(p32, 4 * 128));
135
136	printf("const __m128i fold128 = _mm_set_epi64x("
137	"0x%08" PRIx64 ", 0x%08" PRIx64 ");\n",
138	calc_clrem(p32, 128 - 64),
139	calc_clrem(p32, 128));
140
141	// CRC32 calculation is done by modulus scaling it to a CRC64.
142	// Since the CRC is in reversed representation, only the mu
143	// constant changes with the modulus scaling. This method avoids
144	// one additional constant and one additional clmul in the final
145	// reduction steps, making the code both simpler and faster.
146	//
147	// p32 is shifted left by one so that the final multiplication
148	// in Barrett reduction won't be misaligned by one bit. We could
149	// use "(p32 << 1) \| 1" instead of "p32 << 1" too but it makes
150	// no difference as that bit won't affect the relevant output bits.
151	//
152	// NOTE: The 33-bit value fits in 64 bits so, unlike with CRC64,
153	// there is no need to compensate for any missing bits in the code.
154	printf("const __m128i mu_p = _mm_set_epi64x("
155	"0x%016" PRIx64 ", 0x%" PRIx64 ");\n",
156	(calc_cldiv(p32) << 1) \| 1,
157	p32 << 1);
158
159	return 0;
160	}

Note: See TracBrowser for help on using the repository browser.

source: vbox/trunk/src/libs/liblzma-5.8.1/check/crc_clmul_consts_gen.c@ 109307

Download in other formats: