1 /**********************************************************************
2 Copyright(c) 2011-2023 Intel Corporation All rights reserved.
3
4 Redistribution and use in source and binary forms, with or without
5 modification, are permitted provided that the following conditions
6 are met:
7 * Redistributions of source code must retain the above copyright
8 notice, this list of conditions and the following disclaimer.
9 * Redistributions in binary form must reproduce the above copyright
10 notice, this list of conditions and the following disclaimer in
11 the documentation and/or other materials provided with the
12 distribution.
13 * Neither the name of Intel Corporation nor the names of its
14 contributors may be used to endorse or promote products derived
15 from this software without specific prior written permission.
16
17 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 **********************************************************************/
29
30 /**
31 * @file crc_combine_example.c
32 * @brief Example of CRC combine logic.
33 *
34 * Combine functions can produce the CRC from independent pieces as though they
35 * were computed sequentially. The example includes combine functions that are
36 * split into two parts; one that depends on length only, and another with
37 * optimizations that uses the previous and individual CRCs to combine. By
38 * splitting, the length-dependent constants can be pre-computed and the
39 * remaining combine logic kept fast and simple.
40 *
41 */
42
43 // Compile as c++ for multi-function versions
44
45 #include <stdio.h>
46 #include <inttypes.h>
47 #include <immintrin.h>
48 #include <isa-l.h>
49
50 int verbose; // Global for tests
51
52 #if defined(_MSC_VER)
53 #define __builtin_parity(x) (__popcnt64(x) & 1)
54 #endif
55
56 #if defined(__GNUC__) || defined(__clang__)
57 #define ATTRIBUTE_TARGET(x) __attribute__((target(x)))
58 #else
59 #define ATTRIBUTE_TARGET(x)
60 #endif
61
62 struct crc64_desc {
63 uint64_t poly;
64 uint64_t k5;
65 uint64_t k7;
66 uint64_t k8;
67 };
68
69 void
gen_crc64_refl_consts(uint64_t poly,struct crc64_desc * c)70 gen_crc64_refl_consts(uint64_t poly, struct crc64_desc *c)
71 {
72 uint64_t quotienth = 0;
73 uint64_t div;
74 uint64_t rem = 1ull;
75 int i;
76
77 for (i = 0; i < 64; i++) {
78 div = (rem & 1ull) != 0;
79 quotienth = (quotienth >> 1) | (div ? 0x8000000000000000ull : 0);
80 rem = (div ? poly : 0) ^ (rem >> 1);
81 }
82 c->k5 = rem;
83 c->poly = poly;
84 c->k7 = quotienth;
85 c->k8 = poly << 1;
86 }
87
88 void
gen_crc64_norm_consts(uint64_t poly,struct crc64_desc * c)89 gen_crc64_norm_consts(uint64_t poly, struct crc64_desc *c)
90 {
91 uint64_t quotientl = 0;
92 uint64_t div;
93 uint64_t rem = 1ull << 63;
94 int i;
95
96 for (i = 0; i < 65; i++) {
97 div = (rem & 0x8000000000000000ull) != 0;
98 quotientl = (quotientl << 1) | div;
99 rem = (div ? poly : 0) ^ (rem << 1);
100 }
101
102 c->poly = poly;
103 c->k5 = rem;
104 c->k7 = quotientl;
105 c->k8 = poly;
106 }
107
108 uint32_t
calc_xi_mod(int n)109 calc_xi_mod(int n)
110 {
111 uint32_t rem = 0x1ul;
112 int i, j;
113
114 const uint32_t poly = 0x82f63b78;
115
116 if (n < 16)
117 return 0;
118
119 for (i = 0; i < n - 8; i++)
120 for (j = 0; j < 8; j++)
121 rem = (rem & 0x1ul) ? (rem >> 1) ^ poly : (rem >> 1);
122
123 return rem;
124 }
125
126 uint64_t
calc64_refl_xi_mod(int n,struct crc64_desc * c)127 calc64_refl_xi_mod(int n, struct crc64_desc *c)
128 {
129 uint64_t rem = 1ull;
130 int i, j;
131
132 const uint64_t poly = c->poly;
133
134 if (n < 32)
135 return 0;
136
137 for (i = 0; i < n - 16; i++)
138 for (j = 0; j < 8; j++)
139 rem = (rem & 0x1ull) ? (rem >> 1) ^ poly : (rem >> 1);
140
141 return rem;
142 }
143
144 uint64_t
calc64_norm_xi_mod(int n,struct crc64_desc * c)145 calc64_norm_xi_mod(int n, struct crc64_desc *c)
146 {
147 uint64_t rem = 1ull;
148 int i, j;
149
150 const uint64_t poly = c->poly;
151
152 if (n < 32)
153 return 0;
154
155 for (i = 0; i < n - 8; i++)
156 for (j = 0; j < 8; j++)
157 rem = (rem & 0x8000000000000000ull ? poly : 0) ^ (rem << 1);
158
159 return rem;
160 }
161
162 // Base function for crc32_iscsi_shiftx() if c++ multi-function versioning
163 #ifdef __cplusplus
164
165 static inline uint32_t
bit_reverse32(uint32_t x)166 bit_reverse32(uint32_t x)
167 {
168 x = (((x & 0xaaaaaaaa) >> 1) | ((x & 0x55555555) << 1));
169 x = (((x & 0xcccccccc) >> 2) | ((x & 0x33333333) << 2));
170 x = (((x & 0xf0f0f0f0) >> 4) | ((x & 0x0f0f0f0f) << 4));
171 x = (((x & 0xff00ff00) >> 8) | ((x & 0x00ff00ff) << 8));
172 return ((x >> 16) | (x << 16));
173 }
174
175 // Base function for crc32_iscsi_shiftx without clmul optimizations
176
177 ATTRIBUTE_TARGET("default")
178 uint32_t
crc32_iscsi_shiftx(uint32_t crc1,uint32_t x)179 crc32_iscsi_shiftx(uint32_t crc1, uint32_t x)
180 {
181 int i;
182 uint64_t xrev, q = 0;
183 union {
184 uint8_t a[8];
185 uint64_t q;
186 } qu;
187
188 xrev = bit_reverse32(x);
189 xrev <<= 32;
190
191 for (i = 0; i < 64; i++, xrev >>= 1)
192 q = (q << 1) | __builtin_parity(crc1 & xrev);
193
194 qu.q = q;
195 return crc32_iscsi(qu.a, 8, 0);
196 }
197 #endif // cplusplus
198
199 ATTRIBUTE_TARGET("pclmul,sse4.2")
200 uint32_t
crc32_iscsi_shiftx(uint32_t crc1,uint32_t x)201 crc32_iscsi_shiftx(uint32_t crc1, uint32_t x)
202 {
203 __m128i crc1x, constx;
204 uint64_t crc64;
205
206 crc1x = _mm_setr_epi32(crc1, 0, 0, 0);
207 constx = _mm_setr_epi32(x, 0, 0, 0);
208 crc1x = _mm_clmulepi64_si128(crc1x, constx, 0);
209 crc64 = _mm_cvtsi128_si64(crc1x);
210 crc64 = _mm_crc32_u64(0, crc64);
211 return crc64 & 0xffffffff;
212 }
213
214 ATTRIBUTE_TARGET("pclmul,sse4.2")
215 uint64_t
crc64_refl_shiftx(uint64_t crc1,uint64_t x,struct crc64_desc * c)216 crc64_refl_shiftx(uint64_t crc1, uint64_t x, struct crc64_desc *c)
217 {
218 __m128i crc1x, crc2x, crc3x, constx;
219 const __m128i rk5 = _mm_loadu_si64(&c->k5);
220 const __m128i rk7 = _mm_loadu_si128((__m128i *) &c->k7);
221
222 crc1x = _mm_cvtsi64_si128(crc1);
223 constx = _mm_cvtsi64_si128(x);
224 crc1x = _mm_clmulepi64_si128(crc1x, constx, 0x00);
225
226 // Fold to 64b
227 crc2x = _mm_clmulepi64_si128(crc1x, rk5, 0x00);
228 crc3x = _mm_bsrli_si128(crc1x, 8);
229 crc1x = _mm_xor_si128(crc2x, crc3x);
230
231 // Reduce
232 crc2x = _mm_clmulepi64_si128(crc1x, rk7, 0x00);
233 crc3x = _mm_clmulepi64_si128(crc2x, rk7, 0x10);
234 crc2x = _mm_bslli_si128(crc2x, 8);
235 crc1x = _mm_xor_si128(crc1x, crc2x);
236 crc1x = _mm_xor_si128(crc1x, crc3x);
237 return _mm_extract_epi64(crc1x, 1);
238 }
239
240 ATTRIBUTE_TARGET("pclmul,sse4.2")
241 uint64_t
crc64_norm_shiftx(uint64_t crc1,uint64_t x,struct crc64_desc * c)242 crc64_norm_shiftx(uint64_t crc1, uint64_t x, struct crc64_desc *c)
243 {
244 __m128i crc1x, crc2x, crc3x, constx;
245 const __m128i rk5 = _mm_loadu_si64(&c->k5);
246 const __m128i rk7 = _mm_loadu_si128((__m128i *) &c->k7);
247
248 crc1x = _mm_cvtsi64_si128(crc1);
249 constx = _mm_cvtsi64_si128(x);
250 crc1x = _mm_clmulepi64_si128(crc1x, constx, 0x00);
251
252 // Fold to 64b
253 crc2x = _mm_clmulepi64_si128(crc1x, rk5, 0x01);
254 crc3x = _mm_bslli_si128(crc1x, 8);
255 crc1x = _mm_xor_si128(crc2x, crc3x);
256
257 // Reduce
258 crc2x = _mm_clmulepi64_si128(crc1x, rk7, 0x01);
259 crc2x = _mm_xor_si128(crc1x, crc2x);
260 crc3x = _mm_clmulepi64_si128(crc2x, rk7, 0x11);
261 crc1x = _mm_xor_si128(crc1x, crc3x);
262 return _mm_extract_epi64(crc1x, 0);
263 }
264
265 uint32_t
crc32_iscsi_combine_4k(uint32_t * crc_array,int n)266 crc32_iscsi_combine_4k(uint32_t *crc_array, int n)
267 {
268 const uint32_t cn4k = 0x82f89c77; // calc_xi_mod(4*1024);
269 int i;
270
271 if (n < 1)
272 return 0;
273
274 uint32_t crc = crc_array[0];
275
276 for (i = 1; i < n; i++)
277 crc = crc32_iscsi_shiftx(crc, cn4k) ^ crc_array[i];
278
279 return crc;
280 }
281
282 // Tests
283
284 #define printv(...) \
285 { \
286 if (verbose) \
287 printf(__VA_ARGS__); \
288 else \
289 printf("."); \
290 }
291
292 uint64_t
test_combine64(uint8_t * inp,size_t len,uint64_t poly,int reflected,uint64_t (* func)(uint64_t,const uint8_t *,uint64_t))293 test_combine64(uint8_t *inp, size_t len, uint64_t poly, int reflected,
294 uint64_t (*func)(uint64_t, const uint8_t *, uint64_t))
295 {
296
297 uint64_t crc64_init, crc64, crc64a, crc64b;
298 uint64_t crc64_1, crc64_2, crc64_3, crc64_n, err = 0;
299 uint64_t xi_mod;
300 struct crc64_desc crc64_c;
301 size_t l1, l2, l3;
302
303 l1 = len / 2;
304 l2 = len - l1;
305
306 crc64_init = rand();
307 crc64 = func(crc64_init, inp, len);
308 printv("\ncrc64 all = 0x%" PRIx64 "\n", crc64);
309
310 // Do a sequential crc update
311 crc64a = func(crc64_init, &inp[0], l1);
312 crc64b = func(crc64a, &inp[l1], l2);
313 printv("crc64 seq = 0x%" PRIx64 "\n", crc64b);
314
315 // Split into 2 independent crc calc and combine
316 crc64_1 = func(crc64_init, &inp[0], l1);
317 crc64_2 = func(0, &inp[l1], l2);
318
319 if (reflected) {
320 gen_crc64_refl_consts(poly, &crc64_c);
321 xi_mod = calc64_refl_xi_mod(l1, &crc64_c);
322 crc64_1 = crc64_refl_shiftx(crc64_1, xi_mod, &crc64_c);
323 } else {
324 gen_crc64_norm_consts(poly, &crc64_c);
325 xi_mod = calc64_norm_xi_mod(l1, &crc64_c);
326 crc64_1 = crc64_norm_shiftx(crc64_1, xi_mod, &crc64_c);
327 }
328 crc64_n = crc64_1 ^ crc64_2;
329
330 printv("crc64 combined2 = 0x%" PRIx64 "\n", crc64_n);
331 err |= crc64_n ^ crc64;
332 if (err)
333 return err;
334
335 // Split into 3 uneven segments and combine
336 l1 = len / 3;
337 l2 = (len / 3) - 3;
338 l3 = len - l2 - l1;
339 crc64_1 = func(crc64_init, &inp[0], l1);
340 crc64_2 = func(0, &inp[l1], l2);
341 crc64_3 = func(0, &inp[l1 + l2], l3);
342 if (reflected) {
343 xi_mod = calc64_refl_xi_mod(l3, &crc64_c);
344 crc64_2 = crc64_refl_shiftx(crc64_2, xi_mod, &crc64_c);
345 xi_mod = calc64_refl_xi_mod(len - l1, &crc64_c);
346 crc64_1 = crc64_refl_shiftx(crc64_1, xi_mod, &crc64_c);
347 } else {
348 xi_mod = calc64_norm_xi_mod(l3, &crc64_c);
349 crc64_2 = crc64_norm_shiftx(crc64_2, xi_mod, &crc64_c);
350 xi_mod = calc64_norm_xi_mod(len - l1, &crc64_c);
351 crc64_1 = crc64_norm_shiftx(crc64_1, xi_mod, &crc64_c);
352 }
353 crc64_n = crc64_1 ^ crc64_2 ^ crc64_3;
354
355 printv("crc64 combined3 = 0x%" PRIx64 "\n", crc64_n);
356 err |= crc64_n ^ crc64;
357
358 return err;
359 }
360
361 #define N (1024)
362 #define B (2 * N)
363 #define T (3 * N)
364 #define N4k (4 * 1024)
365 #define NMAX 32
366 #define NMAX_SIZE (NMAX * N4k)
367
368 int
main(int argc,char * argv[])369 main(int argc, char *argv[])
370 {
371 int i;
372 uint32_t crc, crca, crcb, crc1, crc2, crc3, crcn;
373 uint32_t crc_init = rand();
374 uint32_t err = 0;
375 uint8_t *inp = (uint8_t *) malloc(NMAX_SIZE);
376 verbose = argc - 1;
377
378 if (NULL == inp)
379 return -1;
380
381 for (int i = 0; i < NMAX_SIZE; i++)
382 inp[i] = rand();
383
384 printf("crc_combine_test:");
385
386 // Calc crc all at once
387 crc = crc32_iscsi(inp, B, crc_init);
388 printv("\ncrcB all = 0x%" PRIx32 "\n", crc);
389
390 // Do a sequential crc update
391 crca = crc32_iscsi(&inp[0], N, crc_init);
392 crcb = crc32_iscsi(&inp[N], N, crca);
393 printv("crcB seq = 0x%" PRIx32 "\n", crcb);
394
395 // Split into 2 independent crc calc and combine
396 crc1 = crc32_iscsi(&inp[0], N, crc_init);
397 crc2 = crc32_iscsi(&inp[N], N, 0);
398 crcn = crc32_iscsi_shiftx(crc1, calc_xi_mod(N)) ^ crc2;
399 printv("crcB combined2 = 0x%" PRIx32 "\n", crcn);
400 err |= crcn ^ crc;
401
402 // Split into 3 uneven segments and combine
403 crc1 = crc32_iscsi(&inp[0], 100, crc_init);
404 crc2 = crc32_iscsi(&inp[100], 100, 0);
405 crc3 = crc32_iscsi(&inp[200], B - 200, 0);
406 crcn = crc3 ^ crc32_iscsi_shiftx(crc2, calc_xi_mod(B - 200)) ^
407 crc32_iscsi_shiftx(crc1, calc_xi_mod(B - 100));
408 printv("crcB combined3 = 0x%" PRIx32 "\n\n", crcn);
409 err |= crcn ^ crc;
410
411 // Call all size T at once
412 crc = crc32_iscsi(inp, T, crc_init);
413 printv("crcT all = 0x%" PRIx32 "\n", crc);
414
415 // Split into 3 segments and combine with 2 consts
416 crc1 = crc32_iscsi(&inp[0], N, crc_init);
417 crc2 = crc32_iscsi(&inp[N], N, 0);
418 crc3 = crc32_iscsi(&inp[2 * N], N, 0);
419 crcn = crc3 ^ crc32_iscsi_shiftx(crc2, calc_xi_mod(N)) ^
420 crc32_iscsi_shiftx(crc1, calc_xi_mod(2 * N));
421 printv("crcT combined3 = 0x%" PRIx32 "\n", crcn);
422 err |= crcn ^ crc;
423
424 // Combine 3 segments with one const by sequential shift
425 uint32_t xi_mod_n = calc_xi_mod(N);
426 crcn = crc3 ^ crc32_iscsi_shiftx(crc32_iscsi_shiftx(crc1, xi_mod_n) ^ crc2, xi_mod_n);
427 printv("crcT comb3 seq = 0x%" PRIx32 "\n\n", crcn);
428 err |= crcn ^ crc;
429
430 // Test 4k array function
431 crc = crc32_iscsi(inp, NMAX_SIZE, crc_init);
432 printv("crc 4k x n all = 0x%" PRIx32 "\n", crc);
433
434 // Test crc 4k array combine function
435 uint32_t crcs[NMAX];
436 crcs[0] = crc32_iscsi(&inp[0], N4k, crc_init);
437 for (i = 1; i < NMAX; i++)
438 crcs[i] = crc32_iscsi(&inp[i * N4k], N4k, 0);
439
440 crcn = crc32_iscsi_combine_4k(crcs, NMAX);
441 printv("crc4k_array = 0x%" PRIx32 "\n", crcn);
442 err |= crcn ^ crc;
443
444 // CRC64 generic poly tests - reflected
445 uint64_t len = NMAX_SIZE;
446 err |= test_combine64(inp, len, 0xc96c5795d7870f42ull, 1, crc64_ecma_refl);
447 err |= test_combine64(inp, len, 0xd800000000000000ull, 1, crc64_iso_refl);
448 err |= test_combine64(inp, len, 0x95ac9329ac4bc9b5ull, 1, crc64_jones_refl);
449
450 // CRC64 non-reflected polynomial tests
451 err |= test_combine64(inp, len, 0x42f0e1eba9ea3693ull, 0, crc64_ecma_norm);
452 err |= test_combine64(inp, len, 0x000000000000001bull, 0, crc64_iso_norm);
453 err |= test_combine64(inp, len, 0xad93d23594c935a9ull, 0, crc64_jones_norm);
454
455 printf(err == 0 ? "pass\n" : "fail\n");
456 free(inp);
457 return err;
458 }
459