xref: /isa-l/examples/crc/crc_combine_example.c (revision 9d99f8215d315fe67f178ce3849b0f40e13ee704)
1 /**********************************************************************
2   Copyright(c) 2011-2023 Intel Corporation All rights reserved.
3 
4   Redistribution and use in source and binary forms, with or without
5   modification, are permitted provided that the following conditions
6   are met:
7     * Redistributions of source code must retain the above copyright
8       notice, this list of conditions and the following disclaimer.
9     * Redistributions in binary form must reproduce the above copyright
10       notice, this list of conditions and the following disclaimer in
11       the documentation and/or other materials provided with the
12       distribution.
13     * Neither the name of Intel Corporation nor the names of its
14       contributors may be used to endorse or promote products derived
15       from this software without specific prior written permission.
16 
17   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 **********************************************************************/
29 
30 /**
31  *  @file crc_combine_example.c
32  *  @brief Example of CRC combine logic.
33  *
34  *  Combine functions can produce the CRC from independent pieces as though they
35  *  were computed sequentially. The example includes combine functions that are
36  *  split into two parts; one that depends on length only, and another with
37  *  optimizations that uses the previous and individual CRCs to combine. By
38  *  splitting, the length-dependent constants can be pre-computed and the
39  *  remaining combine logic kept fast and simple.
40  *
41  */
42 
43 // Compile as c++ for multi-function versions
44 
45 #include <stdio.h>
46 #include <inttypes.h>
47 #include <immintrin.h>
48 #include <isa-l.h>
49 
50 int verbose; // Global for tests
51 
52 #if defined(_MSC_VER)
53 #define __builtin_parity(x) (__popcnt64(x) & 1)
54 #endif
55 
56 #if defined(__GNUC__) || defined(__clang__)
57 #define ATTRIBUTE_TARGET(x) __attribute__((target(x)))
58 #else
59 #define ATTRIBUTE_TARGET(x)
60 #endif
61 
62 struct crc64_desc {
63         uint64_t poly;
64         uint64_t k5;
65         uint64_t k7;
66         uint64_t k8;
67 };
68 
69 void
gen_crc64_refl_consts(uint64_t poly,struct crc64_desc * c)70 gen_crc64_refl_consts(uint64_t poly, struct crc64_desc *c)
71 {
72         uint64_t quotienth = 0;
73         uint64_t div;
74         uint64_t rem = 1ull;
75         int i;
76 
77         for (i = 0; i < 64; i++) {
78                 div = (rem & 1ull) != 0;
79                 quotienth = (quotienth >> 1) | (div ? 0x8000000000000000ull : 0);
80                 rem = (div ? poly : 0) ^ (rem >> 1);
81         }
82         c->k5 = rem;
83         c->poly = poly;
84         c->k7 = quotienth;
85         c->k8 = poly << 1;
86 }
87 
88 void
gen_crc64_norm_consts(uint64_t poly,struct crc64_desc * c)89 gen_crc64_norm_consts(uint64_t poly, struct crc64_desc *c)
90 {
91         uint64_t quotientl = 0;
92         uint64_t div;
93         uint64_t rem = 1ull << 63;
94         int i;
95 
96         for (i = 0; i < 65; i++) {
97                 div = (rem & 0x8000000000000000ull) != 0;
98                 quotientl = (quotientl << 1) | div;
99                 rem = (div ? poly : 0) ^ (rem << 1);
100         }
101 
102         c->poly = poly;
103         c->k5 = rem;
104         c->k7 = quotientl;
105         c->k8 = poly;
106 }
107 
108 uint32_t
calc_xi_mod(int n)109 calc_xi_mod(int n)
110 {
111         uint32_t rem = 0x1ul;
112         int i, j;
113 
114         const uint32_t poly = 0x82f63b78;
115 
116         if (n < 16)
117                 return 0;
118 
119         for (i = 0; i < n - 8; i++)
120                 for (j = 0; j < 8; j++)
121                         rem = (rem & 0x1ul) ? (rem >> 1) ^ poly : (rem >> 1);
122 
123         return rem;
124 }
125 
126 uint64_t
calc64_refl_xi_mod(int n,struct crc64_desc * c)127 calc64_refl_xi_mod(int n, struct crc64_desc *c)
128 {
129         uint64_t rem = 1ull;
130         int i, j;
131 
132         const uint64_t poly = c->poly;
133 
134         if (n < 32)
135                 return 0;
136 
137         for (i = 0; i < n - 16; i++)
138                 for (j = 0; j < 8; j++)
139                         rem = (rem & 0x1ull) ? (rem >> 1) ^ poly : (rem >> 1);
140 
141         return rem;
142 }
143 
144 uint64_t
calc64_norm_xi_mod(int n,struct crc64_desc * c)145 calc64_norm_xi_mod(int n, struct crc64_desc *c)
146 {
147         uint64_t rem = 1ull;
148         int i, j;
149 
150         const uint64_t poly = c->poly;
151 
152         if (n < 32)
153                 return 0;
154 
155         for (i = 0; i < n - 8; i++)
156                 for (j = 0; j < 8; j++)
157                         rem = (rem & 0x8000000000000000ull ? poly : 0) ^ (rem << 1);
158 
159         return rem;
160 }
161 
162 // Base function for crc32_iscsi_shiftx() if c++ multi-function versioning
163 #ifdef __cplusplus
164 
165 static inline uint32_t
bit_reverse32(uint32_t x)166 bit_reverse32(uint32_t x)
167 {
168         x = (((x & 0xaaaaaaaa) >> 1) | ((x & 0x55555555) << 1));
169         x = (((x & 0xcccccccc) >> 2) | ((x & 0x33333333) << 2));
170         x = (((x & 0xf0f0f0f0) >> 4) | ((x & 0x0f0f0f0f) << 4));
171         x = (((x & 0xff00ff00) >> 8) | ((x & 0x00ff00ff) << 8));
172         return ((x >> 16) | (x << 16));
173 }
174 
175 // Base function for crc32_iscsi_shiftx without clmul optimizations
176 
177 ATTRIBUTE_TARGET("default")
178 uint32_t
crc32_iscsi_shiftx(uint32_t crc1,uint32_t x)179 crc32_iscsi_shiftx(uint32_t crc1, uint32_t x)
180 {
181         int i;
182         uint64_t xrev, q = 0;
183         union {
184                 uint8_t a[8];
185                 uint64_t q;
186         } qu;
187 
188         xrev = bit_reverse32(x);
189         xrev <<= 32;
190 
191         for (i = 0; i < 64; i++, xrev >>= 1)
192                 q = (q << 1) | __builtin_parity(crc1 & xrev);
193 
194         qu.q = q;
195         return crc32_iscsi(qu.a, 8, 0);
196 }
197 #endif // cplusplus
198 
199 ATTRIBUTE_TARGET("pclmul,sse4.2")
200 uint32_t
crc32_iscsi_shiftx(uint32_t crc1,uint32_t x)201 crc32_iscsi_shiftx(uint32_t crc1, uint32_t x)
202 {
203         __m128i crc1x, constx;
204         uint64_t crc64;
205 
206         crc1x = _mm_setr_epi32(crc1, 0, 0, 0);
207         constx = _mm_setr_epi32(x, 0, 0, 0);
208         crc1x = _mm_clmulepi64_si128(crc1x, constx, 0);
209         crc64 = _mm_cvtsi128_si64(crc1x);
210         crc64 = _mm_crc32_u64(0, crc64);
211         return crc64 & 0xffffffff;
212 }
213 
214 ATTRIBUTE_TARGET("pclmul,sse4.2")
215 uint64_t
crc64_refl_shiftx(uint64_t crc1,uint64_t x,struct crc64_desc * c)216 crc64_refl_shiftx(uint64_t crc1, uint64_t x, struct crc64_desc *c)
217 {
218         __m128i crc1x, crc2x, crc3x, constx;
219         const __m128i rk5 = _mm_loadu_si64(&c->k5);
220         const __m128i rk7 = _mm_loadu_si128((__m128i *) &c->k7);
221 
222         crc1x = _mm_cvtsi64_si128(crc1);
223         constx = _mm_cvtsi64_si128(x);
224         crc1x = _mm_clmulepi64_si128(crc1x, constx, 0x00);
225 
226         // Fold to 64b
227         crc2x = _mm_clmulepi64_si128(crc1x, rk5, 0x00);
228         crc3x = _mm_bsrli_si128(crc1x, 8);
229         crc1x = _mm_xor_si128(crc2x, crc3x);
230 
231         // Reduce
232         crc2x = _mm_clmulepi64_si128(crc1x, rk7, 0x00);
233         crc3x = _mm_clmulepi64_si128(crc2x, rk7, 0x10);
234         crc2x = _mm_bslli_si128(crc2x, 8);
235         crc1x = _mm_xor_si128(crc1x, crc2x);
236         crc1x = _mm_xor_si128(crc1x, crc3x);
237         return _mm_extract_epi64(crc1x, 1);
238 }
239 
240 ATTRIBUTE_TARGET("pclmul,sse4.2")
241 uint64_t
crc64_norm_shiftx(uint64_t crc1,uint64_t x,struct crc64_desc * c)242 crc64_norm_shiftx(uint64_t crc1, uint64_t x, struct crc64_desc *c)
243 {
244         __m128i crc1x, crc2x, crc3x, constx;
245         const __m128i rk5 = _mm_loadu_si64(&c->k5);
246         const __m128i rk7 = _mm_loadu_si128((__m128i *) &c->k7);
247 
248         crc1x = _mm_cvtsi64_si128(crc1);
249         constx = _mm_cvtsi64_si128(x);
250         crc1x = _mm_clmulepi64_si128(crc1x, constx, 0x00);
251 
252         // Fold to 64b
253         crc2x = _mm_clmulepi64_si128(crc1x, rk5, 0x01);
254         crc3x = _mm_bslli_si128(crc1x, 8);
255         crc1x = _mm_xor_si128(crc2x, crc3x);
256 
257         // Reduce
258         crc2x = _mm_clmulepi64_si128(crc1x, rk7, 0x01);
259         crc2x = _mm_xor_si128(crc1x, crc2x);
260         crc3x = _mm_clmulepi64_si128(crc2x, rk7, 0x11);
261         crc1x = _mm_xor_si128(crc1x, crc3x);
262         return _mm_extract_epi64(crc1x, 0);
263 }
264 
265 uint32_t
crc32_iscsi_combine_4k(uint32_t * crc_array,int n)266 crc32_iscsi_combine_4k(uint32_t *crc_array, int n)
267 {
268         const uint32_t cn4k = 0x82f89c77; // calc_xi_mod(4*1024);
269         int i;
270 
271         if (n < 1)
272                 return 0;
273 
274         uint32_t crc = crc_array[0];
275 
276         for (i = 1; i < n; i++)
277                 crc = crc32_iscsi_shiftx(crc, cn4k) ^ crc_array[i];
278 
279         return crc;
280 }
281 
282 // Tests
283 
284 #define printv(...)                                                                                \
285         {                                                                                          \
286                 if (verbose)                                                                       \
287                         printf(__VA_ARGS__);                                                       \
288                 else                                                                               \
289                         printf(".");                                                               \
290         }
291 
292 uint64_t
test_combine64(uint8_t * inp,size_t len,uint64_t poly,int reflected,uint64_t (* func)(uint64_t,const uint8_t *,uint64_t))293 test_combine64(uint8_t *inp, size_t len, uint64_t poly, int reflected,
294                uint64_t (*func)(uint64_t, const uint8_t *, uint64_t))
295 {
296 
297         uint64_t crc64_init, crc64, crc64a, crc64b;
298         uint64_t crc64_1, crc64_2, crc64_3, crc64_n, err = 0;
299         uint64_t xi_mod;
300         struct crc64_desc crc64_c;
301         size_t l1, l2, l3;
302 
303         l1 = len / 2;
304         l2 = len - l1;
305 
306         crc64_init = rand();
307         crc64 = func(crc64_init, inp, len);
308         printv("\ncrc64 all       = 0x%" PRIx64 "\n", crc64);
309 
310         // Do a sequential crc update
311         crc64a = func(crc64_init, &inp[0], l1);
312         crc64b = func(crc64a, &inp[l1], l2);
313         printv("crc64 seq       = 0x%" PRIx64 "\n", crc64b);
314 
315         // Split into 2 independent crc calc and combine
316         crc64_1 = func(crc64_init, &inp[0], l1);
317         crc64_2 = func(0, &inp[l1], l2);
318 
319         if (reflected) {
320                 gen_crc64_refl_consts(poly, &crc64_c);
321                 xi_mod = calc64_refl_xi_mod(l1, &crc64_c);
322                 crc64_1 = crc64_refl_shiftx(crc64_1, xi_mod, &crc64_c);
323         } else {
324                 gen_crc64_norm_consts(poly, &crc64_c);
325                 xi_mod = calc64_norm_xi_mod(l1, &crc64_c);
326                 crc64_1 = crc64_norm_shiftx(crc64_1, xi_mod, &crc64_c);
327         }
328         crc64_n = crc64_1 ^ crc64_2;
329 
330         printv("crc64 combined2 = 0x%" PRIx64 "\n", crc64_n);
331         err |= crc64_n ^ crc64;
332         if (err)
333                 return err;
334 
335         // Split into 3 uneven segments and combine
336         l1 = len / 3;
337         l2 = (len / 3) - 3;
338         l3 = len - l2 - l1;
339         crc64_1 = func(crc64_init, &inp[0], l1);
340         crc64_2 = func(0, &inp[l1], l2);
341         crc64_3 = func(0, &inp[l1 + l2], l3);
342         if (reflected) {
343                 xi_mod = calc64_refl_xi_mod(l3, &crc64_c);
344                 crc64_2 = crc64_refl_shiftx(crc64_2, xi_mod, &crc64_c);
345                 xi_mod = calc64_refl_xi_mod(len - l1, &crc64_c);
346                 crc64_1 = crc64_refl_shiftx(crc64_1, xi_mod, &crc64_c);
347         } else {
348                 xi_mod = calc64_norm_xi_mod(l3, &crc64_c);
349                 crc64_2 = crc64_norm_shiftx(crc64_2, xi_mod, &crc64_c);
350                 xi_mod = calc64_norm_xi_mod(len - l1, &crc64_c);
351                 crc64_1 = crc64_norm_shiftx(crc64_1, xi_mod, &crc64_c);
352         }
353         crc64_n = crc64_1 ^ crc64_2 ^ crc64_3;
354 
355         printv("crc64 combined3 = 0x%" PRIx64 "\n", crc64_n);
356         err |= crc64_n ^ crc64;
357 
358         return err;
359 }
360 
361 #define N         (1024)
362 #define B         (2 * N)
363 #define T         (3 * N)
364 #define N4k       (4 * 1024)
365 #define NMAX      32
366 #define NMAX_SIZE (NMAX * N4k)
367 
368 int
main(int argc,char * argv[])369 main(int argc, char *argv[])
370 {
371         int i;
372         uint32_t crc, crca, crcb, crc1, crc2, crc3, crcn;
373         uint32_t crc_init = rand();
374         uint32_t err = 0;
375         uint8_t *inp = (uint8_t *) malloc(NMAX_SIZE);
376         verbose = argc - 1;
377 
378         if (NULL == inp)
379                 return -1;
380 
381         for (int i = 0; i < NMAX_SIZE; i++)
382                 inp[i] = rand();
383 
384         printf("crc_combine_test:");
385 
386         // Calc crc all at once
387         crc = crc32_iscsi(inp, B, crc_init);
388         printv("\ncrcB all       = 0x%" PRIx32 "\n", crc);
389 
390         // Do a sequential crc update
391         crca = crc32_iscsi(&inp[0], N, crc_init);
392         crcb = crc32_iscsi(&inp[N], N, crca);
393         printv("crcB seq       = 0x%" PRIx32 "\n", crcb);
394 
395         // Split into 2 independent crc calc and combine
396         crc1 = crc32_iscsi(&inp[0], N, crc_init);
397         crc2 = crc32_iscsi(&inp[N], N, 0);
398         crcn = crc32_iscsi_shiftx(crc1, calc_xi_mod(N)) ^ crc2;
399         printv("crcB combined2 = 0x%" PRIx32 "\n", crcn);
400         err |= crcn ^ crc;
401 
402         // Split into 3 uneven segments and combine
403         crc1 = crc32_iscsi(&inp[0], 100, crc_init);
404         crc2 = crc32_iscsi(&inp[100], 100, 0);
405         crc3 = crc32_iscsi(&inp[200], B - 200, 0);
406         crcn = crc3 ^ crc32_iscsi_shiftx(crc2, calc_xi_mod(B - 200)) ^
407                crc32_iscsi_shiftx(crc1, calc_xi_mod(B - 100));
408         printv("crcB combined3 = 0x%" PRIx32 "\n\n", crcn);
409         err |= crcn ^ crc;
410 
411         // Call all size T at once
412         crc = crc32_iscsi(inp, T, crc_init);
413         printv("crcT all       = 0x%" PRIx32 "\n", crc);
414 
415         // Split into 3 segments and combine with 2 consts
416         crc1 = crc32_iscsi(&inp[0], N, crc_init);
417         crc2 = crc32_iscsi(&inp[N], N, 0);
418         crc3 = crc32_iscsi(&inp[2 * N], N, 0);
419         crcn = crc3 ^ crc32_iscsi_shiftx(crc2, calc_xi_mod(N)) ^
420                crc32_iscsi_shiftx(crc1, calc_xi_mod(2 * N));
421         printv("crcT combined3 = 0x%" PRIx32 "\n", crcn);
422         err |= crcn ^ crc;
423 
424         // Combine 3 segments with one const by sequential shift
425         uint32_t xi_mod_n = calc_xi_mod(N);
426         crcn = crc3 ^ crc32_iscsi_shiftx(crc32_iscsi_shiftx(crc1, xi_mod_n) ^ crc2, xi_mod_n);
427         printv("crcT comb3 seq = 0x%" PRIx32 "\n\n", crcn);
428         err |= crcn ^ crc;
429 
430         // Test 4k array function
431         crc = crc32_iscsi(inp, NMAX_SIZE, crc_init);
432         printv("crc 4k x n all = 0x%" PRIx32 "\n", crc);
433 
434         // Test crc 4k array combine function
435         uint32_t crcs[NMAX];
436         crcs[0] = crc32_iscsi(&inp[0], N4k, crc_init);
437         for (i = 1; i < NMAX; i++)
438                 crcs[i] = crc32_iscsi(&inp[i * N4k], N4k, 0);
439 
440         crcn = crc32_iscsi_combine_4k(crcs, NMAX);
441         printv("crc4k_array    = 0x%" PRIx32 "\n", crcn);
442         err |= crcn ^ crc;
443 
444         // CRC64 generic poly tests - reflected
445         uint64_t len = NMAX_SIZE;
446         err |= test_combine64(inp, len, 0xc96c5795d7870f42ull, 1, crc64_ecma_refl);
447         err |= test_combine64(inp, len, 0xd800000000000000ull, 1, crc64_iso_refl);
448         err |= test_combine64(inp, len, 0x95ac9329ac4bc9b5ull, 1, crc64_jones_refl);
449 
450         // CRC64 non-reflected polynomial tests
451         err |= test_combine64(inp, len, 0x42f0e1eba9ea3693ull, 0, crc64_ecma_norm);
452         err |= test_combine64(inp, len, 0x000000000000001bull, 0, crc64_iso_norm);
453         err |= test_combine64(inp, len, 0xad93d23594c935a9ull, 0, crc64_jones_norm);
454 
455         printf(err == 0 ? "pass\n" : "fail\n");
456         free(inp);
457         return err;
458 }
459