175fd0b74Schristos /* crc32.c -- compute the CRC-32 of a data stream
2*e992f068Schristos * Copyright (C) 1995-2022 Mark Adler
375fd0b74Schristos * For conditions of distribution and use, see copyright notice in zlib.h
475fd0b74Schristos *
5*e992f068Schristos * This interleaved implementation of a CRC makes use of pipelined multiple
6*e992f068Schristos * arithmetic-logic units, commonly found in modern CPU cores. It is due to
7*e992f068Schristos * Kadatch and Jenkins (2010). See doc/crc-doc.1.0.pdf in this distribution.
875fd0b74Schristos */
975fd0b74Schristos
10*e992f068Schristos /* @(#) Id */
1175fd0b74Schristos
1275fd0b74Schristos /*
1375fd0b74Schristos Note on the use of DYNAMIC_CRC_TABLE: there is no mutex or semaphore
1475fd0b74Schristos protection on the static variables used to control the first-use generation
1575fd0b74Schristos of the crc tables. Therefore, if you #define DYNAMIC_CRC_TABLE, you should
1675fd0b74Schristos first call get_crc_table() to initialize the tables before allowing more than
1775fd0b74Schristos one thread to use crc32().
1875fd0b74Schristos
19*e992f068Schristos MAKECRCH can be #defined to write out crc32.h. A main() routine is also
20*e992f068Schristos produced, so that this one source file can be compiled to an executable.
2175fd0b74Schristos */
2275fd0b74Schristos
2375fd0b74Schristos #ifdef MAKECRCH
2475fd0b74Schristos # include <stdio.h>
2575fd0b74Schristos # ifndef DYNAMIC_CRC_TABLE
2675fd0b74Schristos # define DYNAMIC_CRC_TABLE
2775fd0b74Schristos # endif /* !DYNAMIC_CRC_TABLE */
2875fd0b74Schristos #endif /* MAKECRCH */
2975fd0b74Schristos
30*e992f068Schristos #include "zutil.h" /* for Z_U4, Z_U8, z_crc_t, and FAR definitions */
3175fd0b74Schristos
32*e992f068Schristos /*
33*e992f068Schristos A CRC of a message is computed on N braids of words in the message, where
34*e992f068Schristos each word consists of W bytes (4 or 8). If N is 3, for example, then three
35*e992f068Schristos running sparse CRCs are calculated respectively on each braid, at these
36*e992f068Schristos indices in the array of words: 0, 3, 6, ..., 1, 4, 7, ..., and 2, 5, 8, ...
37*e992f068Schristos This is done starting at a word boundary, and continues until as many blocks
38*e992f068Schristos of N * W bytes as are available have been processed. The results are combined
39*e992f068Schristos into a single CRC at the end. For this code, N must be in the range 1..6 and
40*e992f068Schristos W must be 4 or 8. The upper limit on N can be increased if desired by adding
41*e992f068Schristos more #if blocks, extending the patterns apparent in the code. In addition,
42*e992f068Schristos crc32.h would need to be regenerated, if the maximum N value is increased.
43*e992f068Schristos
44*e992f068Schristos N and W are chosen empirically by benchmarking the execution time on a given
45*e992f068Schristos processor. The choices for N and W below were based on testing on Intel Kaby
46*e992f068Schristos Lake i7, AMD Ryzen 7, ARM Cortex-A57, Sparc64-VII, PowerPC POWER9, and MIPS64
47*e992f068Schristos Octeon II processors. The Intel, AMD, and ARM processors were all fastest
48*e992f068Schristos with N=5, W=8. The Sparc, PowerPC, and MIPS64 were all fastest at N=5, W=4.
49*e992f068Schristos They were all tested with either gcc or clang, all using the -O3 optimization
50*e992f068Schristos level. Your mileage may vary.
51*e992f068Schristos */
52*e992f068Schristos
53*e992f068Schristos /* Define N */
54*e992f068Schristos #ifdef Z_TESTN
55*e992f068Schristos # define N Z_TESTN
5675fd0b74Schristos #else
57*e992f068Schristos # define N 5
58*e992f068Schristos #endif
59*e992f068Schristos #if N < 1 || N > 6
60*e992f068Schristos # error N must be in 1..6
61*e992f068Schristos #endif
6275fd0b74Schristos
63*e992f068Schristos /*
64*e992f068Schristos z_crc_t must be at least 32 bits. z_word_t must be at least as long as
65*e992f068Schristos z_crc_t. It is assumed here that z_word_t is either 32 bits or 64 bits, and
66*e992f068Schristos that bytes are eight bits.
67*e992f068Schristos */
6875fd0b74Schristos
69*e992f068Schristos /*
70*e992f068Schristos Define W and the associated z_word_t type. If W is not defined, then a
71*e992f068Schristos braided calculation is not used, and the associated tables and code are not
72*e992f068Schristos compiled.
73*e992f068Schristos */
74*e992f068Schristos #ifdef Z_TESTW
75*e992f068Schristos # if Z_TESTW-1 != -1
76*e992f068Schristos # define W Z_TESTW
77*e992f068Schristos # endif
78*e992f068Schristos #else
79*e992f068Schristos # ifdef MAKECRCH
80*e992f068Schristos # define W 8 /* required for MAKECRCH */
81*e992f068Schristos # else
82*e992f068Schristos # if defined(__x86_64__) || defined(__aarch64__)
83*e992f068Schristos # define W 8
84*e992f068Schristos # else
85*e992f068Schristos # define W 4
86*e992f068Schristos # endif
87*e992f068Schristos # endif
88*e992f068Schristos #endif
89*e992f068Schristos #ifdef W
90*e992f068Schristos # if W == 8 && defined(Z_U8)
91*e992f068Schristos typedef Z_U8 z_word_t;
92*e992f068Schristos # elif defined(Z_U4)
93*e992f068Schristos # undef W
94*e992f068Schristos # define W 4
95*e992f068Schristos typedef Z_U4 z_word_t;
96*e992f068Schristos # else
97*e992f068Schristos # undef W
98*e992f068Schristos # endif
99*e992f068Schristos #endif
100*e992f068Schristos
101*e992f068Schristos /* Local functions. */
102*e992f068Schristos local z_crc_t multmodp OF((z_crc_t a, z_crc_t b));
103*e992f068Schristos local z_crc_t x2nmodp OF((z_off64_t n, unsigned k));
104*e992f068Schristos
105*e992f068Schristos /* If available, use the ARM processor CRC32 instruction. */
106*e992f068Schristos #if defined(__aarch64__) && defined(__ARM_FEATURE_CRC32) && W == 8
107*e992f068Schristos # define ARMCRC32
108*e992f068Schristos #endif
109*e992f068Schristos
110*e992f068Schristos #if defined(W) && (!defined(ARMCRC32) || defined(DYNAMIC_CRC_TABLE))
111*e992f068Schristos /*
112*e992f068Schristos Swap the bytes in a z_word_t to convert between little and big endian. Any
113*e992f068Schristos self-respecting compiler will optimize this to a single machine byte-swap
114*e992f068Schristos instruction, if one is available. This assumes that word_t is either 32 bits
115*e992f068Schristos or 64 bits.
116*e992f068Schristos */
byte_swap(word)117*e992f068Schristos local z_word_t byte_swap(word)
118*e992f068Schristos z_word_t word;
119*e992f068Schristos {
120*e992f068Schristos # if W == 8
121*e992f068Schristos return
122*e992f068Schristos (word & 0xff00000000000000) >> 56 |
123*e992f068Schristos (word & 0xff000000000000) >> 40 |
124*e992f068Schristos (word & 0xff0000000000) >> 24 |
125*e992f068Schristos (word & 0xff00000000) >> 8 |
126*e992f068Schristos (word & 0xff000000) << 8 |
127*e992f068Schristos (word & 0xff0000) << 24 |
128*e992f068Schristos (word & 0xff00) << 40 |
129*e992f068Schristos (word & 0xff) << 56;
130*e992f068Schristos # else /* W == 4 */
131*e992f068Schristos return
132*e992f068Schristos (word & 0xff000000) >> 24 |
133*e992f068Schristos (word & 0xff0000) >> 8 |
134*e992f068Schristos (word & 0xff00) << 8 |
135*e992f068Schristos (word & 0xff) << 24;
136*e992f068Schristos # endif
137*e992f068Schristos }
138*e992f068Schristos #endif
139*e992f068Schristos
140*e992f068Schristos /* CRC polynomial. */
141*e992f068Schristos #define POLY 0xedb88320 /* p(x) reflected, with x^32 implied */
14275fd0b74Schristos
14375fd0b74Schristos #ifdef DYNAMIC_CRC_TABLE
14475fd0b74Schristos
145*e992f068Schristos local z_crc_t FAR crc_table[256];
146*e992f068Schristos local z_crc_t FAR x2n_table[32];
14775fd0b74Schristos local void make_crc_table OF((void));
148*e992f068Schristos #ifdef W
149*e992f068Schristos local z_word_t FAR crc_big_table[256];
150*e992f068Schristos local z_crc_t FAR crc_braid_table[W][256];
151*e992f068Schristos local z_word_t FAR crc_braid_big_table[W][256];
152*e992f068Schristos local void braid OF((z_crc_t [][256], z_word_t [][256], int, int));
153*e992f068Schristos #endif
15475fd0b74Schristos #ifdef MAKECRCH
155*e992f068Schristos local void write_table OF((FILE *, const z_crc_t FAR *, int));
156*e992f068Schristos local void write_table32hi OF((FILE *, const z_word_t FAR *, int));
157*e992f068Schristos local void write_table64 OF((FILE *, const z_word_t FAR *, int));
15875fd0b74Schristos #endif /* MAKECRCH */
159*e992f068Schristos
160*e992f068Schristos /*
161*e992f068Schristos Define a once() function depending on the availability of atomics. If this is
162*e992f068Schristos compiled with DYNAMIC_CRC_TABLE defined, and if CRCs will be computed in
163*e992f068Schristos multiple threads, and if atomics are not available, then get_crc_table() must
164*e992f068Schristos be called to initialize the tables and must return before any threads are
165*e992f068Schristos allowed to compute or combine CRCs.
166*e992f068Schristos */
167*e992f068Schristos
168*e992f068Schristos /* Definition of once functionality. */
169*e992f068Schristos typedef struct once_s once_t;
170*e992f068Schristos local void once OF((once_t *, void (*)(void)));
171*e992f068Schristos
172*e992f068Schristos /* Check for the availability of atomics. */
173*e992f068Schristos #if defined(__STDC__) && __STDC_VERSION__ >= 201112L && \
174*e992f068Schristos !defined(__STDC_NO_ATOMICS__)
175*e992f068Schristos
176*e992f068Schristos #include <stdatomic.h>
177*e992f068Schristos
178*e992f068Schristos /* Structure for once(), which must be initialized with ONCE_INIT. */
179*e992f068Schristos struct once_s {
180*e992f068Schristos atomic_flag begun;
181*e992f068Schristos atomic_int done;
182*e992f068Schristos };
183*e992f068Schristos #define ONCE_INIT {ATOMIC_FLAG_INIT, 0}
184*e992f068Schristos
185*e992f068Schristos /*
186*e992f068Schristos Run the provided init() function exactly once, even if multiple threads
187*e992f068Schristos invoke once() at the same time. The state must be a once_t initialized with
188*e992f068Schristos ONCE_INIT.
189*e992f068Schristos */
once(state,init)190*e992f068Schristos local void once(state, init)
191*e992f068Schristos once_t *state;
192*e992f068Schristos void (*init)(void);
193*e992f068Schristos {
194*e992f068Schristos if (!atomic_load(&state->done)) {
195*e992f068Schristos if (atomic_flag_test_and_set(&state->begun))
196*e992f068Schristos while (!atomic_load(&state->done))
197*e992f068Schristos ;
198*e992f068Schristos else {
199*e992f068Schristos init();
200*e992f068Schristos atomic_store(&state->done, 1);
201*e992f068Schristos }
202*e992f068Schristos }
203*e992f068Schristos }
204*e992f068Schristos
205*e992f068Schristos #else /* no atomics */
206*e992f068Schristos
207*e992f068Schristos /* Structure for once(), which must be initialized with ONCE_INIT. */
208*e992f068Schristos struct once_s {
209*e992f068Schristos volatile int begun;
210*e992f068Schristos volatile int done;
211*e992f068Schristos };
212*e992f068Schristos #define ONCE_INIT {0, 0}
213*e992f068Schristos
214*e992f068Schristos /* Test and set. Alas, not atomic, but tries to minimize the period of
215*e992f068Schristos vulnerability. */
216*e992f068Schristos local int test_and_set OF((int volatile *));
test_and_set(flag)217*e992f068Schristos local int test_and_set(flag)
218*e992f068Schristos int volatile *flag;
219*e992f068Schristos {
220*e992f068Schristos int was;
221*e992f068Schristos
222*e992f068Schristos was = *flag;
223*e992f068Schristos *flag = 1;
224*e992f068Schristos return was;
225*e992f068Schristos }
226*e992f068Schristos
227*e992f068Schristos /* Run the provided init() function once. This is not thread-safe. */
once(state,init)228*e992f068Schristos local void once(state, init)
229*e992f068Schristos once_t *state;
230*e992f068Schristos void (*init)(void);
231*e992f068Schristos {
232*e992f068Schristos if (!state->done) {
233*e992f068Schristos if (test_and_set(&state->begun))
234*e992f068Schristos while (!state->done)
235*e992f068Schristos ;
236*e992f068Schristos else {
237*e992f068Schristos init();
238*e992f068Schristos state->done = 1;
239*e992f068Schristos }
240*e992f068Schristos }
241*e992f068Schristos }
242*e992f068Schristos
243*e992f068Schristos #endif
244*e992f068Schristos
245*e992f068Schristos /* State for once(). */
246*e992f068Schristos local once_t made = ONCE_INIT;
247*e992f068Schristos
24875fd0b74Schristos /*
24975fd0b74Schristos Generate tables for a byte-wise 32-bit CRC calculation on the polynomial:
25075fd0b74Schristos x^32+x^26+x^23+x^22+x^16+x^12+x^11+x^10+x^8+x^7+x^5+x^4+x^2+x+1.
25175fd0b74Schristos
25275fd0b74Schristos Polynomials over GF(2) are represented in binary, one bit per coefficient,
25375fd0b74Schristos with the lowest powers in the most significant bit. Then adding polynomials
25475fd0b74Schristos is just exclusive-or, and multiplying a polynomial by x is a right shift by
25575fd0b74Schristos one. If we call the above polynomial p, and represent a byte as the
25675fd0b74Schristos polynomial q, also with the lowest power in the most significant bit (so the
257*e992f068Schristos byte 0xb1 is the polynomial x^7+x^3+x^2+1), then the CRC is (q*x^32) mod p,
25875fd0b74Schristos where a mod b means the remainder after dividing a by b.
25975fd0b74Schristos
26075fd0b74Schristos This calculation is done using the shift-register method of multiplying and
26175fd0b74Schristos taking the remainder. The register is initialized to zero, and for each
26275fd0b74Schristos incoming bit, x^32 is added mod p to the register if the bit is a one (where
263*e992f068Schristos x^32 mod p is p+x^32 = x^26+...+1), and the register is multiplied mod p by x
264*e992f068Schristos (which is shifting right by one and adding x^32 mod p if the bit shifted out
265*e992f068Schristos is a one). We start with the highest power (least significant bit) of q and
266*e992f068Schristos repeat for all eight bits of q.
26775fd0b74Schristos
268*e992f068Schristos The table is simply the CRC of all possible eight bit values. This is all the
269*e992f068Schristos information needed to generate CRCs on data a byte at a time for all
270*e992f068Schristos combinations of CRC register values and incoming bytes.
27175fd0b74Schristos */
272*e992f068Schristos
make_crc_table()27375fd0b74Schristos local void make_crc_table()
27475fd0b74Schristos {
275*e992f068Schristos unsigned i, j, n;
276*e992f068Schristos z_crc_t p;
27775fd0b74Schristos
278*e992f068Schristos /* initialize the CRC of bytes tables */
279*e992f068Schristos for (i = 0; i < 256; i++) {
280*e992f068Schristos p = i;
281*e992f068Schristos for (j = 0; j < 8; j++)
282*e992f068Schristos p = p & 1 ? (p >> 1) ^ POLY : p >> 1;
283*e992f068Schristos crc_table[i] = p;
284*e992f068Schristos #ifdef W
285*e992f068Schristos crc_big_table[i] = byte_swap(p);
286*e992f068Schristos #endif
28775fd0b74Schristos }
28875fd0b74Schristos
289*e992f068Schristos /* initialize the x^2^n mod p(x) table */
290*e992f068Schristos p = (z_crc_t)1 << 30; /* x^1 */
291*e992f068Schristos x2n_table[0] = p;
292*e992f068Schristos for (n = 1; n < 32; n++)
293*e992f068Schristos x2n_table[n] = p = multmodp(p, p);
29475fd0b74Schristos
295*e992f068Schristos #ifdef W
296*e992f068Schristos /* initialize the braiding tables -- needs x2n_table[] */
297*e992f068Schristos braid(crc_braid_table, crc_braid_big_table, N, W);
298*e992f068Schristos #endif
29975fd0b74Schristos
30075fd0b74Schristos #ifdef MAKECRCH
30175fd0b74Schristos {
302*e992f068Schristos /*
303*e992f068Schristos The crc32.h header file contains tables for both 32-bit and 64-bit
304*e992f068Schristos z_word_t's, and so requires a 64-bit type be available. In that case,
305*e992f068Schristos z_word_t must be defined to be 64-bits. This code then also generates
306*e992f068Schristos and writes out the tables for the case that z_word_t is 32 bits.
307*e992f068Schristos */
308*e992f068Schristos #if !defined(W) || W != 8
309*e992f068Schristos # error Need a 64-bit integer type in order to generate crc32.h.
310*e992f068Schristos #endif
31175fd0b74Schristos FILE *out;
312*e992f068Schristos int k, n;
313*e992f068Schristos z_crc_t ltl[8][256];
314*e992f068Schristos z_word_t big[8][256];
31575fd0b74Schristos
31675fd0b74Schristos out = fopen("crc32.h", "w");
31775fd0b74Schristos if (out == NULL) return;
318*e992f068Schristos
319*e992f068Schristos /* write out little-endian CRC table to crc32.h */
320*e992f068Schristos fprintf(out,
321*e992f068Schristos "/* crc32.h -- tables for rapid CRC calculation\n"
322*e992f068Schristos " * Generated automatically by crc32.c\n */\n"
323*e992f068Schristos "\n"
324*e992f068Schristos "local const z_crc_t FAR crc_table[] = {\n"
325*e992f068Schristos " ");
326*e992f068Schristos write_table(out, crc_table, 256);
327*e992f068Schristos fprintf(out,
328*e992f068Schristos "};\n");
329*e992f068Schristos
330*e992f068Schristos /* write out big-endian CRC table for 64-bit z_word_t to crc32.h */
331*e992f068Schristos fprintf(out,
332*e992f068Schristos "\n"
333*e992f068Schristos "#ifdef W\n"
334*e992f068Schristos "\n"
335*e992f068Schristos "#if W == 8\n"
336*e992f068Schristos "\n"
337*e992f068Schristos "local const z_word_t FAR crc_big_table[] = {\n"
338*e992f068Schristos " ");
339*e992f068Schristos write_table64(out, crc_big_table, 256);
340*e992f068Schristos fprintf(out,
341*e992f068Schristos "};\n");
342*e992f068Schristos
343*e992f068Schristos /* write out big-endian CRC table for 32-bit z_word_t to crc32.h */
344*e992f068Schristos fprintf(out,
345*e992f068Schristos "\n"
346*e992f068Schristos "#else /* W == 4 */\n"
347*e992f068Schristos "\n"
348*e992f068Schristos "local const z_word_t FAR crc_big_table[] = {\n"
349*e992f068Schristos " ");
350*e992f068Schristos write_table32hi(out, crc_big_table, 256);
351*e992f068Schristos fprintf(out,
352*e992f068Schristos "};\n"
353*e992f068Schristos "\n"
354*e992f068Schristos "#endif\n");
355*e992f068Schristos
356*e992f068Schristos /* write out braid tables for each value of N */
357*e992f068Schristos for (n = 1; n <= 6; n++) {
358*e992f068Schristos fprintf(out,
359*e992f068Schristos "\n"
360*e992f068Schristos "#if N == %d\n", n);
361*e992f068Schristos
362*e992f068Schristos /* compute braid tables for this N and 64-bit word_t */
363*e992f068Schristos braid(ltl, big, n, 8);
364*e992f068Schristos
365*e992f068Schristos /* write out braid tables for 64-bit z_word_t to crc32.h */
366*e992f068Schristos fprintf(out,
367*e992f068Schristos "\n"
368*e992f068Schristos "#if W == 8\n"
369*e992f068Schristos "\n"
370*e992f068Schristos "local const z_crc_t FAR crc_braid_table[][256] = {\n");
371*e992f068Schristos for (k = 0; k < 8; k++) {
372*e992f068Schristos fprintf(out, " {");
373*e992f068Schristos write_table(out, ltl[k], 256);
374*e992f068Schristos fprintf(out, "}%s", k < 7 ? ",\n" : "");
37575fd0b74Schristos }
376*e992f068Schristos fprintf(out,
377*e992f068Schristos "};\n"
378*e992f068Schristos "\n"
379*e992f068Schristos "local const z_word_t FAR crc_braid_big_table[][256] = {\n");
380*e992f068Schristos for (k = 0; k < 8; k++) {
381*e992f068Schristos fprintf(out, " {");
382*e992f068Schristos write_table64(out, big[k], 256);
383*e992f068Schristos fprintf(out, "}%s", k < 7 ? ",\n" : "");
384*e992f068Schristos }
385*e992f068Schristos fprintf(out,
386*e992f068Schristos "};\n");
387*e992f068Schristos
388*e992f068Schristos /* compute braid tables for this N and 32-bit word_t */
389*e992f068Schristos braid(ltl, big, n, 4);
390*e992f068Schristos
391*e992f068Schristos /* write out braid tables for 32-bit z_word_t to crc32.h */
392*e992f068Schristos fprintf(out,
393*e992f068Schristos "\n"
394*e992f068Schristos "#else /* W == 4 */\n"
395*e992f068Schristos "\n"
396*e992f068Schristos "local const z_crc_t FAR crc_braid_table[][256] = {\n");
397*e992f068Schristos for (k = 0; k < 4; k++) {
398*e992f068Schristos fprintf(out, " {");
399*e992f068Schristos write_table(out, ltl[k], 256);
400*e992f068Schristos fprintf(out, "}%s", k < 3 ? ",\n" : "");
401*e992f068Schristos }
402*e992f068Schristos fprintf(out,
403*e992f068Schristos "};\n"
404*e992f068Schristos "\n"
405*e992f068Schristos "local const z_word_t FAR crc_braid_big_table[][256] = {\n");
406*e992f068Schristos for (k = 0; k < 4; k++) {
407*e992f068Schristos fprintf(out, " {");
408*e992f068Schristos write_table32hi(out, big[k], 256);
409*e992f068Schristos fprintf(out, "}%s", k < 3 ? ",\n" : "");
410*e992f068Schristos }
411*e992f068Schristos fprintf(out,
412*e992f068Schristos "};\n"
413*e992f068Schristos "\n"
414*e992f068Schristos "#endif\n"
415*e992f068Schristos "\n"
416*e992f068Schristos "#endif\n");
417*e992f068Schristos }
418*e992f068Schristos fprintf(out,
419*e992f068Schristos "\n"
420*e992f068Schristos "#endif\n");
421*e992f068Schristos
422*e992f068Schristos /* write out zeros operator table to crc32.h */
423*e992f068Schristos fprintf(out,
424*e992f068Schristos "\n"
425*e992f068Schristos "local const z_crc_t FAR x2n_table[] = {\n"
426*e992f068Schristos " ");
427*e992f068Schristos write_table(out, x2n_table, 32);
428*e992f068Schristos fprintf(out,
429*e992f068Schristos "};\n");
43075fd0b74Schristos fclose(out);
43175fd0b74Schristos }
43275fd0b74Schristos #endif /* MAKECRCH */
43375fd0b74Schristos }
43475fd0b74Schristos
43575fd0b74Schristos #ifdef MAKECRCH
436*e992f068Schristos
437*e992f068Schristos /*
438*e992f068Schristos Write the 32-bit values in table[0..k-1] to out, five per line in
439*e992f068Schristos hexadecimal separated by commas.
440*e992f068Schristos */
write_table(out,table,k)441*e992f068Schristos local void write_table(out, table, k)
44275fd0b74Schristos FILE *out;
44375fd0b74Schristos const z_crc_t FAR *table;
444*e992f068Schristos int k;
44575fd0b74Schristos {
44675fd0b74Schristos int n;
44775fd0b74Schristos
448*e992f068Schristos for (n = 0; n < k; n++)
449*e992f068Schristos fprintf(out, "%s0x%08lx%s", n == 0 || n % 5 ? "" : " ",
45075fd0b74Schristos (unsigned long)(table[n]),
451*e992f068Schristos n == k - 1 ? "" : (n % 5 == 4 ? ",\n" : ", "));
45275fd0b74Schristos }
453*e992f068Schristos
454*e992f068Schristos /*
455*e992f068Schristos Write the high 32-bits of each value in table[0..k-1] to out, five per line
456*e992f068Schristos in hexadecimal separated by commas.
457*e992f068Schristos */
write_table32hi(out,table,k)458*e992f068Schristos local void write_table32hi(out, table, k)
459*e992f068Schristos FILE *out;
460*e992f068Schristos const z_word_t FAR *table;
461*e992f068Schristos int k;
462*e992f068Schristos {
463*e992f068Schristos int n;
464*e992f068Schristos
465*e992f068Schristos for (n = 0; n < k; n++)
466*e992f068Schristos fprintf(out, "%s0x%08lx%s", n == 0 || n % 5 ? "" : " ",
467*e992f068Schristos (unsigned long)(table[n] >> 32),
468*e992f068Schristos n == k - 1 ? "" : (n % 5 == 4 ? ",\n" : ", "));
469*e992f068Schristos }
470*e992f068Schristos
471*e992f068Schristos /*
472*e992f068Schristos Write the 64-bit values in table[0..k-1] to out, three per line in
473*e992f068Schristos hexadecimal separated by commas. This assumes that if there is a 64-bit
474*e992f068Schristos type, then there is also a long long integer type, and it is at least 64
475*e992f068Schristos bits. If not, then the type cast and format string can be adjusted
476*e992f068Schristos accordingly.
477*e992f068Schristos */
write_table64(out,table,k)478*e992f068Schristos local void write_table64(out, table, k)
479*e992f068Schristos FILE *out;
480*e992f068Schristos const z_word_t FAR *table;
481*e992f068Schristos int k;
482*e992f068Schristos {
483*e992f068Schristos int n;
484*e992f068Schristos
485*e992f068Schristos for (n = 0; n < k; n++)
486*e992f068Schristos fprintf(out, "%s0x%016llx%s", n == 0 || n % 3 ? "" : " ",
487*e992f068Schristos (unsigned long long)(table[n]),
488*e992f068Schristos n == k - 1 ? "" : (n % 3 == 2 ? ",\n" : ", "));
489*e992f068Schristos }
490*e992f068Schristos
491*e992f068Schristos /* Actually do the deed. */
main()492*e992f068Schristos int main()
493*e992f068Schristos {
494*e992f068Schristos make_crc_table();
495*e992f068Schristos return 0;
496*e992f068Schristos }
497*e992f068Schristos
49875fd0b74Schristos #endif /* MAKECRCH */
49975fd0b74Schristos
500*e992f068Schristos #ifdef W
501*e992f068Schristos /*
502*e992f068Schristos Generate the little and big-endian braid tables for the given n and z_word_t
503*e992f068Schristos size w. Each array must have room for w blocks of 256 elements.
504*e992f068Schristos */
braid(ltl,big,n,w)505*e992f068Schristos local void braid(ltl, big, n, w)
506*e992f068Schristos z_crc_t ltl[][256];
507*e992f068Schristos z_word_t big[][256];
508*e992f068Schristos int n;
509*e992f068Schristos int w;
510*e992f068Schristos {
511*e992f068Schristos int k;
512*e992f068Schristos z_crc_t i, p, q;
513*e992f068Schristos for (k = 0; k < w; k++) {
514*e992f068Schristos p = x2nmodp((n * w + 3 - k) << 3, 0);
515*e992f068Schristos ltl[k][0] = 0;
516*e992f068Schristos big[w - 1 - k][0] = 0;
517*e992f068Schristos for (i = 1; i < 256; i++) {
518*e992f068Schristos ltl[k][i] = q = multmodp(i << 24, p);
519*e992f068Schristos big[w - 1 - k][i] = byte_swap(q);
520*e992f068Schristos }
521*e992f068Schristos }
522*e992f068Schristos }
523*e992f068Schristos #endif
524*e992f068Schristos
52575fd0b74Schristos #else /* !DYNAMIC_CRC_TABLE */
52675fd0b74Schristos /* ========================================================================
527*e992f068Schristos * Tables for byte-wise and braided CRC-32 calculations, and a table of powers
528*e992f068Schristos * of x for combining CRC-32s, all made by make_crc_table().
52975fd0b74Schristos */
53075fd0b74Schristos #include "crc32.h"
53175fd0b74Schristos #endif /* DYNAMIC_CRC_TABLE */
53275fd0b74Schristos
533*e992f068Schristos /* ========================================================================
534*e992f068Schristos * Routines used for CRC calculation. Some are also required for the table
535*e992f068Schristos * generation above.
536*e992f068Schristos */
537*e992f068Schristos
538*e992f068Schristos /*
539*e992f068Schristos Return a(x) multiplied by b(x) modulo p(x), where p(x) is the CRC polynomial,
540*e992f068Schristos reflected. For speed, this requires that a not be zero.
541*e992f068Schristos */
multmodp(a,b)542*e992f068Schristos local z_crc_t multmodp(a, b)
543*e992f068Schristos z_crc_t a;
544*e992f068Schristos z_crc_t b;
545*e992f068Schristos {
546*e992f068Schristos z_crc_t m, p;
547*e992f068Schristos
548*e992f068Schristos m = (z_crc_t)1 << 31;
549*e992f068Schristos p = 0;
550*e992f068Schristos for (;;) {
551*e992f068Schristos if (a & m) {
552*e992f068Schristos p ^= b;
553*e992f068Schristos if ((a & (m - 1)) == 0)
554*e992f068Schristos break;
555*e992f068Schristos }
556*e992f068Schristos m >>= 1;
557*e992f068Schristos b = b & 1 ? (b >> 1) ^ POLY : b >> 1;
558*e992f068Schristos }
559*e992f068Schristos return p;
560*e992f068Schristos }
561*e992f068Schristos
562*e992f068Schristos /*
563*e992f068Schristos Return x^(n * 2^k) modulo p(x). Requires that x2n_table[] has been
564*e992f068Schristos initialized.
565*e992f068Schristos */
x2nmodp(n,k)566*e992f068Schristos local z_crc_t x2nmodp(n, k)
567*e992f068Schristos z_off64_t n;
568*e992f068Schristos unsigned k;
569*e992f068Schristos {
570*e992f068Schristos z_crc_t p;
571*e992f068Schristos
572*e992f068Schristos p = (z_crc_t)1 << 31; /* x^0 == 1 */
573*e992f068Schristos while (n) {
574*e992f068Schristos if (n & 1)
575*e992f068Schristos p = multmodp(x2n_table[k & 31], p);
576*e992f068Schristos n >>= 1;
577*e992f068Schristos k++;
578*e992f068Schristos }
579*e992f068Schristos return p;
580*e992f068Schristos }
581*e992f068Schristos
58275fd0b74Schristos /* =========================================================================
583*e992f068Schristos * This function can be used by asm versions of crc32(), and to force the
584*e992f068Schristos * generation of the CRC tables in a threaded application.
58575fd0b74Schristos */
get_crc_table()58675fd0b74Schristos const z_crc_t FAR * ZEXPORT get_crc_table()
58775fd0b74Schristos {
58875fd0b74Schristos #ifdef DYNAMIC_CRC_TABLE
589*e992f068Schristos once(&made, make_crc_table);
59075fd0b74Schristos #endif /* DYNAMIC_CRC_TABLE */
59175fd0b74Schristos return (const z_crc_t FAR *)crc_table;
59275fd0b74Schristos }
59375fd0b74Schristos
594*e992f068Schristos /* =========================================================================
595*e992f068Schristos * Use ARM machine instructions if available. This will compute the CRC about
596*e992f068Schristos * ten times faster than the braided calculation. This code does not check for
597*e992f068Schristos * the presence of the CRC instruction at run time. __ARM_FEATURE_CRC32 will
598*e992f068Schristos * only be defined if the compilation specifies an ARM processor architecture
599*e992f068Schristos * that has the instructions. For example, compiling with -march=armv8.1-a or
600*e992f068Schristos * -march=armv8-a+crc, or -march=native if the compile machine has the crc32
601*e992f068Schristos * instructions.
602*e992f068Schristos */
603*e992f068Schristos #ifdef ARMCRC32
604*e992f068Schristos
605*e992f068Schristos /*
606*e992f068Schristos Constants empirically determined to maximize speed. These values are from
607*e992f068Schristos measurements on a Cortex-A57. Your mileage may vary.
608*e992f068Schristos */
609*e992f068Schristos #define Z_BATCH 3990 /* number of words in a batch */
610*e992f068Schristos #define Z_BATCH_ZEROS 0xa10d3d0c /* computed from Z_BATCH = 3990 */
611*e992f068Schristos #define Z_BATCH_MIN 800 /* fewest words in a final batch */
612*e992f068Schristos
crc32_z(crc,buf,len)613*e992f068Schristos unsigned long ZEXPORT crc32_z(crc, buf, len)
614*e992f068Schristos unsigned long crc;
615*e992f068Schristos const unsigned char FAR *buf;
616*e992f068Schristos z_size_t len;
617*e992f068Schristos {
618*e992f068Schristos z_crc_t val;
619*e992f068Schristos z_word_t crc1, crc2;
620*e992f068Schristos const z_word_t *word;
621*e992f068Schristos z_word_t val0, val1, val2;
622*e992f068Schristos z_size_t last, last2, i;
623*e992f068Schristos z_size_t num;
624*e992f068Schristos
625*e992f068Schristos /* Return initial CRC, if requested. */
626*e992f068Schristos if (buf == Z_NULL) return 0;
627*e992f068Schristos
628*e992f068Schristos #ifdef DYNAMIC_CRC_TABLE
629*e992f068Schristos once(&made, make_crc_table);
630*e992f068Schristos #endif /* DYNAMIC_CRC_TABLE */
631*e992f068Schristos
632*e992f068Schristos /* Pre-condition the CRC */
633*e992f068Schristos crc ^= 0xffffffff;
634*e992f068Schristos
635*e992f068Schristos /* Compute the CRC up to a word boundary. */
636*e992f068Schristos while (len && ((z_size_t)buf & 7) != 0) {
637*e992f068Schristos len--;
638*e992f068Schristos val = *buf++;
639*e992f068Schristos __asm__ volatile("crc32b %w0, %w0, %w1" : "+r"(crc) : "r"(val));
640*e992f068Schristos }
641*e992f068Schristos
642*e992f068Schristos /* Prepare to compute the CRC on full 64-bit words word[0..num-1]. */
643*e992f068Schristos word = (z_word_t const *)buf;
644*e992f068Schristos num = len >> 3;
645*e992f068Schristos len &= 7;
646*e992f068Schristos
647*e992f068Schristos /* Do three interleaved CRCs to realize the throughput of one crc32x
648*e992f068Schristos instruction per cycle. Each CRC is calcuated on Z_BATCH words. The three
649*e992f068Schristos CRCs are combined into a single CRC after each set of batches. */
650*e992f068Schristos while (num >= 3 * Z_BATCH) {
651*e992f068Schristos crc1 = 0;
652*e992f068Schristos crc2 = 0;
653*e992f068Schristos for (i = 0; i < Z_BATCH; i++) {
654*e992f068Schristos val0 = word[i];
655*e992f068Schristos val1 = word[i + Z_BATCH];
656*e992f068Schristos val2 = word[i + 2 * Z_BATCH];
657*e992f068Schristos __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc) : "r"(val0));
658*e992f068Schristos __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc1) : "r"(val1));
659*e992f068Schristos __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc2) : "r"(val2));
660*e992f068Schristos }
661*e992f068Schristos word += 3 * Z_BATCH;
662*e992f068Schristos num -= 3 * Z_BATCH;
663*e992f068Schristos crc = multmodp(Z_BATCH_ZEROS, crc) ^ crc1;
664*e992f068Schristos crc = multmodp(Z_BATCH_ZEROS, crc) ^ crc2;
665*e992f068Schristos }
666*e992f068Schristos
667*e992f068Schristos /* Do one last smaller batch with the remaining words, if there are enough
668*e992f068Schristos to pay for the combination of CRCs. */
669*e992f068Schristos last = num / 3;
670*e992f068Schristos if (last >= Z_BATCH_MIN) {
671*e992f068Schristos last2 = last << 1;
672*e992f068Schristos crc1 = 0;
673*e992f068Schristos crc2 = 0;
674*e992f068Schristos for (i = 0; i < last; i++) {
675*e992f068Schristos val0 = word[i];
676*e992f068Schristos val1 = word[i + last];
677*e992f068Schristos val2 = word[i + last2];
678*e992f068Schristos __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc) : "r"(val0));
679*e992f068Schristos __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc1) : "r"(val1));
680*e992f068Schristos __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc2) : "r"(val2));
681*e992f068Schristos }
682*e992f068Schristos word += 3 * last;
683*e992f068Schristos num -= 3 * last;
684*e992f068Schristos val = x2nmodp(last, 6);
685*e992f068Schristos crc = multmodp(val, crc) ^ crc1;
686*e992f068Schristos crc = multmodp(val, crc) ^ crc2;
687*e992f068Schristos }
688*e992f068Schristos
689*e992f068Schristos /* Compute the CRC on any remaining words. */
690*e992f068Schristos for (i = 0; i < num; i++) {
691*e992f068Schristos val0 = word[i];
692*e992f068Schristos __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc) : "r"(val0));
693*e992f068Schristos }
694*e992f068Schristos word += num;
695*e992f068Schristos
696*e992f068Schristos /* Complete the CRC on any remaining bytes. */
697*e992f068Schristos buf = (const unsigned char FAR *)word;
698*e992f068Schristos while (len) {
699*e992f068Schristos len--;
700*e992f068Schristos val = *buf++;
701*e992f068Schristos __asm__ volatile("crc32b %w0, %w0, %w1" : "+r"(crc) : "r"(val));
702*e992f068Schristos }
703*e992f068Schristos
704*e992f068Schristos /* Return the CRC, post-conditioned. */
705*e992f068Schristos return crc ^ 0xffffffff;
706*e992f068Schristos }
707*e992f068Schristos
708*e992f068Schristos #else
709*e992f068Schristos
710*e992f068Schristos #ifdef W
711*e992f068Schristos
712*e992f068Schristos /*
713*e992f068Schristos Return the CRC of the W bytes in the word_t data, taking the
714*e992f068Schristos least-significant byte of the word as the first byte of data, without any pre
715*e992f068Schristos or post conditioning. This is used to combine the CRCs of each braid.
716*e992f068Schristos */
crc_word(data)717*e992f068Schristos local z_crc_t crc_word(data)
718*e992f068Schristos z_word_t data;
719*e992f068Schristos {
720*e992f068Schristos int k;
721*e992f068Schristos for (k = 0; k < W; k++)
722*e992f068Schristos data = (data >> 8) ^ crc_table[data & 0xff];
723*e992f068Schristos return (z_crc_t)data;
724*e992f068Schristos }
725*e992f068Schristos
crc_word_big(data)726*e992f068Schristos local z_word_t crc_word_big(data)
727*e992f068Schristos z_word_t data;
728*e992f068Schristos {
729*e992f068Schristos int k;
730*e992f068Schristos for (k = 0; k < W; k++)
731*e992f068Schristos data = (data << 8) ^
732*e992f068Schristos crc_big_table[(data >> ((W - 1) << 3)) & 0xff];
733*e992f068Schristos return data;
734*e992f068Schristos }
735*e992f068Schristos
736*e992f068Schristos #endif
73775fd0b74Schristos
73875fd0b74Schristos /* ========================================================================= */
crc32_z(crc,buf,len)739ede78133Schristos unsigned long ZEXPORT crc32_z(crc, buf, len)
74075fd0b74Schristos unsigned long crc;
74175fd0b74Schristos const unsigned char FAR *buf;
742ede78133Schristos z_size_t len;
74375fd0b74Schristos {
744*e992f068Schristos /* Return initial CRC, if requested. */
745*e992f068Schristos if (buf == Z_NULL) return 0;
74675fd0b74Schristos
74775fd0b74Schristos #ifdef DYNAMIC_CRC_TABLE
748*e992f068Schristos once(&made, make_crc_table);
74975fd0b74Schristos #endif /* DYNAMIC_CRC_TABLE */
75075fd0b74Schristos
751*e992f068Schristos /* Pre-condition the CRC */
752*e992f068Schristos crc ^= 0xffffffff;
75375fd0b74Schristos
754*e992f068Schristos #ifdef W
755*e992f068Schristos
756*e992f068Schristos /* If provided enough bytes, do a braided CRC calculation. */
757*e992f068Schristos if (len >= N * W + W - 1) {
758*e992f068Schristos z_size_t blks;
759*e992f068Schristos z_word_t const *words;
760*e992f068Schristos unsigned endian;
761*e992f068Schristos int k;
762*e992f068Schristos
763*e992f068Schristos /* Compute the CRC up to a z_word_t boundary. */
764*e992f068Schristos while (len && ((z_size_t)buf & (W - 1)) != 0) {
765*e992f068Schristos len--;
766*e992f068Schristos crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
767*e992f068Schristos }
768*e992f068Schristos
769*e992f068Schristos /* Compute the CRC on as many N z_word_t blocks as are available. */
770*e992f068Schristos blks = len / (N * W);
771*e992f068Schristos len -= blks * N * W;
772*e992f068Schristos words = (z_word_t const *)buf;
773*e992f068Schristos
774*e992f068Schristos /* Do endian check at execution time instead of compile time, since ARM
775*e992f068Schristos processors can change the endianess at execution time. If the
776*e992f068Schristos compiler knows what the endianess will be, it can optimize out the
777*e992f068Schristos check and the unused branch. */
77875fd0b74Schristos endian = 1;
779*e992f068Schristos if (*(unsigned char *)&endian) {
780*e992f068Schristos /* Little endian. */
781*e992f068Schristos
782*e992f068Schristos z_crc_t crc0;
783*e992f068Schristos z_word_t word0;
784*e992f068Schristos #if N > 1
785*e992f068Schristos z_crc_t crc1;
786*e992f068Schristos z_word_t word1;
787*e992f068Schristos #if N > 2
788*e992f068Schristos z_crc_t crc2;
789*e992f068Schristos z_word_t word2;
790*e992f068Schristos #if N > 3
791*e992f068Schristos z_crc_t crc3;
792*e992f068Schristos z_word_t word3;
793*e992f068Schristos #if N > 4
794*e992f068Schristos z_crc_t crc4;
795*e992f068Schristos z_word_t word4;
796*e992f068Schristos #if N > 5
797*e992f068Schristos z_crc_t crc5;
798*e992f068Schristos z_word_t word5;
799*e992f068Schristos #endif
800*e992f068Schristos #endif
801*e992f068Schristos #endif
802*e992f068Schristos #endif
803*e992f068Schristos #endif
804*e992f068Schristos
805*e992f068Schristos /* Initialize the CRC for each braid. */
806*e992f068Schristos crc0 = crc;
807*e992f068Schristos #if N > 1
808*e992f068Schristos crc1 = 0;
809*e992f068Schristos #if N > 2
810*e992f068Schristos crc2 = 0;
811*e992f068Schristos #if N > 3
812*e992f068Schristos crc3 = 0;
813*e992f068Schristos #if N > 4
814*e992f068Schristos crc4 = 0;
815*e992f068Schristos #if N > 5
816*e992f068Schristos crc5 = 0;
817*e992f068Schristos #endif
818*e992f068Schristos #endif
819*e992f068Schristos #endif
820*e992f068Schristos #endif
821*e992f068Schristos #endif
822*e992f068Schristos
823*e992f068Schristos /*
824*e992f068Schristos Process the first blks-1 blocks, computing the CRCs on each braid
825*e992f068Schristos independently.
826*e992f068Schristos */
827*e992f068Schristos while (--blks) {
828*e992f068Schristos /* Load the word for each braid into registers. */
829*e992f068Schristos word0 = crc0 ^ words[0];
830*e992f068Schristos #if N > 1
831*e992f068Schristos word1 = crc1 ^ words[1];
832*e992f068Schristos #if N > 2
833*e992f068Schristos word2 = crc2 ^ words[2];
834*e992f068Schristos #if N > 3
835*e992f068Schristos word3 = crc3 ^ words[3];
836*e992f068Schristos #if N > 4
837*e992f068Schristos word4 = crc4 ^ words[4];
838*e992f068Schristos #if N > 5
839*e992f068Schristos word5 = crc5 ^ words[5];
840*e992f068Schristos #endif
841*e992f068Schristos #endif
842*e992f068Schristos #endif
843*e992f068Schristos #endif
844*e992f068Schristos #endif
845*e992f068Schristos words += N;
846*e992f068Schristos
847*e992f068Schristos /* Compute and update the CRC for each word. The loop should
848*e992f068Schristos get unrolled. */
849*e992f068Schristos crc0 = crc_braid_table[0][word0 & 0xff];
850*e992f068Schristos #if N > 1
851*e992f068Schristos crc1 = crc_braid_table[0][word1 & 0xff];
852*e992f068Schristos #if N > 2
853*e992f068Schristos crc2 = crc_braid_table[0][word2 & 0xff];
854*e992f068Schristos #if N > 3
855*e992f068Schristos crc3 = crc_braid_table[0][word3 & 0xff];
856*e992f068Schristos #if N > 4
857*e992f068Schristos crc4 = crc_braid_table[0][word4 & 0xff];
858*e992f068Schristos #if N > 5
859*e992f068Schristos crc5 = crc_braid_table[0][word5 & 0xff];
860*e992f068Schristos #endif
861*e992f068Schristos #endif
862*e992f068Schristos #endif
863*e992f068Schristos #endif
864*e992f068Schristos #endif
865*e992f068Schristos for (k = 1; k < W; k++) {
866*e992f068Schristos crc0 ^= crc_braid_table[k][(word0 >> (k << 3)) & 0xff];
867*e992f068Schristos #if N > 1
868*e992f068Schristos crc1 ^= crc_braid_table[k][(word1 >> (k << 3)) & 0xff];
869*e992f068Schristos #if N > 2
870*e992f068Schristos crc2 ^= crc_braid_table[k][(word2 >> (k << 3)) & 0xff];
871*e992f068Schristos #if N > 3
872*e992f068Schristos crc3 ^= crc_braid_table[k][(word3 >> (k << 3)) & 0xff];
873*e992f068Schristos #if N > 4
874*e992f068Schristos crc4 ^= crc_braid_table[k][(word4 >> (k << 3)) & 0xff];
875*e992f068Schristos #if N > 5
876*e992f068Schristos crc5 ^= crc_braid_table[k][(word5 >> (k << 3)) & 0xff];
877*e992f068Schristos #endif
878*e992f068Schristos #endif
879*e992f068Schristos #endif
880*e992f068Schristos #endif
881*e992f068Schristos #endif
88275fd0b74Schristos }
883*e992f068Schristos }
884*e992f068Schristos
885*e992f068Schristos /*
886*e992f068Schristos Process the last block, combining the CRCs of the N braids at the
887*e992f068Schristos same time.
888*e992f068Schristos */
889*e992f068Schristos crc = crc_word(crc0 ^ words[0]);
890*e992f068Schristos #if N > 1
891*e992f068Schristos crc = crc_word(crc1 ^ words[1] ^ crc);
892*e992f068Schristos #if N > 2
893*e992f068Schristos crc = crc_word(crc2 ^ words[2] ^ crc);
894*e992f068Schristos #if N > 3
895*e992f068Schristos crc = crc_word(crc3 ^ words[3] ^ crc);
896*e992f068Schristos #if N > 4
897*e992f068Schristos crc = crc_word(crc4 ^ words[4] ^ crc);
898*e992f068Schristos #if N > 5
899*e992f068Schristos crc = crc_word(crc5 ^ words[5] ^ crc);
900*e992f068Schristos #endif
901*e992f068Schristos #endif
902*e992f068Schristos #endif
903*e992f068Schristos #endif
904*e992f068Schristos #endif
905*e992f068Schristos words += N;
906*e992f068Schristos }
907*e992f068Schristos else {
908*e992f068Schristos /* Big endian. */
909*e992f068Schristos
910*e992f068Schristos z_word_t crc0, word0, comb;
911*e992f068Schristos #if N > 1
912*e992f068Schristos z_word_t crc1, word1;
913*e992f068Schristos #if N > 2
914*e992f068Schristos z_word_t crc2, word2;
915*e992f068Schristos #if N > 3
916*e992f068Schristos z_word_t crc3, word3;
917*e992f068Schristos #if N > 4
918*e992f068Schristos z_word_t crc4, word4;
919*e992f068Schristos #if N > 5
920*e992f068Schristos z_word_t crc5, word5;
921*e992f068Schristos #endif
922*e992f068Schristos #endif
923*e992f068Schristos #endif
924*e992f068Schristos #endif
925*e992f068Schristos #endif
926*e992f068Schristos
927*e992f068Schristos /* Initialize the CRC for each braid. */
928*e992f068Schristos crc0 = byte_swap(crc);
929*e992f068Schristos #if N > 1
930*e992f068Schristos crc1 = 0;
931*e992f068Schristos #if N > 2
932*e992f068Schristos crc2 = 0;
933*e992f068Schristos #if N > 3
934*e992f068Schristos crc3 = 0;
935*e992f068Schristos #if N > 4
936*e992f068Schristos crc4 = 0;
937*e992f068Schristos #if N > 5
938*e992f068Schristos crc5 = 0;
939*e992f068Schristos #endif
940*e992f068Schristos #endif
941*e992f068Schristos #endif
942*e992f068Schristos #endif
943*e992f068Schristos #endif
944*e992f068Schristos
945*e992f068Schristos /*
946*e992f068Schristos Process the first blks-1 blocks, computing the CRCs on each braid
947*e992f068Schristos independently.
948*e992f068Schristos */
949*e992f068Schristos while (--blks) {
950*e992f068Schristos /* Load the word for each braid into registers. */
951*e992f068Schristos word0 = crc0 ^ words[0];
952*e992f068Schristos #if N > 1
953*e992f068Schristos word1 = crc1 ^ words[1];
954*e992f068Schristos #if N > 2
955*e992f068Schristos word2 = crc2 ^ words[2];
956*e992f068Schristos #if N > 3
957*e992f068Schristos word3 = crc3 ^ words[3];
958*e992f068Schristos #if N > 4
959*e992f068Schristos word4 = crc4 ^ words[4];
960*e992f068Schristos #if N > 5
961*e992f068Schristos word5 = crc5 ^ words[5];
962*e992f068Schristos #endif
963*e992f068Schristos #endif
964*e992f068Schristos #endif
965*e992f068Schristos #endif
966*e992f068Schristos #endif
967*e992f068Schristos words += N;
968*e992f068Schristos
969*e992f068Schristos /* Compute and update the CRC for each word. The loop should
970*e992f068Schristos get unrolled. */
971*e992f068Schristos crc0 = crc_braid_big_table[0][word0 & 0xff];
972*e992f068Schristos #if N > 1
973*e992f068Schristos crc1 = crc_braid_big_table[0][word1 & 0xff];
974*e992f068Schristos #if N > 2
975*e992f068Schristos crc2 = crc_braid_big_table[0][word2 & 0xff];
976*e992f068Schristos #if N > 3
977*e992f068Schristos crc3 = crc_braid_big_table[0][word3 & 0xff];
978*e992f068Schristos #if N > 4
979*e992f068Schristos crc4 = crc_braid_big_table[0][word4 & 0xff];
980*e992f068Schristos #if N > 5
981*e992f068Schristos crc5 = crc_braid_big_table[0][word5 & 0xff];
982*e992f068Schristos #endif
983*e992f068Schristos #endif
984*e992f068Schristos #endif
985*e992f068Schristos #endif
986*e992f068Schristos #endif
987*e992f068Schristos for (k = 1; k < W; k++) {
988*e992f068Schristos crc0 ^= crc_braid_big_table[k][(word0 >> (k << 3)) & 0xff];
989*e992f068Schristos #if N > 1
990*e992f068Schristos crc1 ^= crc_braid_big_table[k][(word1 >> (k << 3)) & 0xff];
991*e992f068Schristos #if N > 2
992*e992f068Schristos crc2 ^= crc_braid_big_table[k][(word2 >> (k << 3)) & 0xff];
993*e992f068Schristos #if N > 3
994*e992f068Schristos crc3 ^= crc_braid_big_table[k][(word3 >> (k << 3)) & 0xff];
995*e992f068Schristos #if N > 4
996*e992f068Schristos crc4 ^= crc_braid_big_table[k][(word4 >> (k << 3)) & 0xff];
997*e992f068Schristos #if N > 5
998*e992f068Schristos crc5 ^= crc_braid_big_table[k][(word5 >> (k << 3)) & 0xff];
999*e992f068Schristos #endif
1000*e992f068Schristos #endif
1001*e992f068Schristos #endif
1002*e992f068Schristos #endif
1003*e992f068Schristos #endif
1004*e992f068Schristos }
1005*e992f068Schristos }
1006*e992f068Schristos
1007*e992f068Schristos /*
1008*e992f068Schristos Process the last block, combining the CRCs of the N braids at the
1009*e992f068Schristos same time.
1010*e992f068Schristos */
1011*e992f068Schristos comb = crc_word_big(crc0 ^ words[0]);
1012*e992f068Schristos #if N > 1
1013*e992f068Schristos comb = crc_word_big(crc1 ^ words[1] ^ comb);
1014*e992f068Schristos #if N > 2
1015*e992f068Schristos comb = crc_word_big(crc2 ^ words[2] ^ comb);
1016*e992f068Schristos #if N > 3
1017*e992f068Schristos comb = crc_word_big(crc3 ^ words[3] ^ comb);
1018*e992f068Schristos #if N > 4
1019*e992f068Schristos comb = crc_word_big(crc4 ^ words[4] ^ comb);
1020*e992f068Schristos #if N > 5
1021*e992f068Schristos comb = crc_word_big(crc5 ^ words[5] ^ comb);
1022*e992f068Schristos #endif
1023*e992f068Schristos #endif
1024*e992f068Schristos #endif
1025*e992f068Schristos #endif
1026*e992f068Schristos #endif
1027*e992f068Schristos words += N;
1028*e992f068Schristos crc = byte_swap(comb);
1029*e992f068Schristos }
1030*e992f068Schristos
1031*e992f068Schristos /*
1032*e992f068Schristos Update the pointer to the remaining bytes to process.
1033*e992f068Schristos */
1034*e992f068Schristos buf = (unsigned char const *)words;
1035*e992f068Schristos }
1036*e992f068Schristos
1037*e992f068Schristos #endif /* W */
1038*e992f068Schristos
1039*e992f068Schristos /* Complete the computation of the CRC on any remaining bytes. */
104075fd0b74Schristos while (len >= 8) {
104175fd0b74Schristos len -= 8;
1042*e992f068Schristos crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
1043*e992f068Schristos crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
1044*e992f068Schristos crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
1045*e992f068Schristos crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
1046*e992f068Schristos crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
1047*e992f068Schristos crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
1048*e992f068Schristos crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
1049*e992f068Schristos crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
105075fd0b74Schristos }
1051*e992f068Schristos while (len) {
1052*e992f068Schristos len--;
1053*e992f068Schristos crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
105475fd0b74Schristos }
105575fd0b74Schristos
1056*e992f068Schristos /* Return the CRC, post-conditioned. */
1057*e992f068Schristos return crc ^ 0xffffffff;
1058*e992f068Schristos }
1059*e992f068Schristos
1060*e992f068Schristos #endif
1061*e992f068Schristos
1062ede78133Schristos /* ========================================================================= */
crc32(crc,buf,len)1063ede78133Schristos unsigned long ZEXPORT crc32(crc, buf, len)
1064ede78133Schristos unsigned long crc;
1065ede78133Schristos const unsigned char FAR *buf;
1066ede78133Schristos uInt len;
1067ede78133Schristos {
1068ede78133Schristos return crc32_z(crc, buf, len);
1069ede78133Schristos }
1070ede78133Schristos
107175fd0b74Schristos /* ========================================================================= */
crc32_combine64(crc1,crc2,len2)1072*e992f068Schristos uLong ZEXPORT crc32_combine64(crc1, crc2, len2)
107375fd0b74Schristos uLong crc1;
107475fd0b74Schristos uLong crc2;
107575fd0b74Schristos z_off64_t len2;
107675fd0b74Schristos {
1077*e992f068Schristos #ifdef DYNAMIC_CRC_TABLE
1078*e992f068Schristos once(&made, make_crc_table);
1079*e992f068Schristos #endif /* DYNAMIC_CRC_TABLE */
1080*e992f068Schristos return multmodp(x2nmodp(len2, 3), crc1) ^ crc2;
108175fd0b74Schristos }
108275fd0b74Schristos
108375fd0b74Schristos /* ========================================================================= */
crc32_combine(crc1,crc2,len2)108475fd0b74Schristos uLong ZEXPORT crc32_combine(crc1, crc2, len2)
108575fd0b74Schristos uLong crc1;
108675fd0b74Schristos uLong crc2;
108775fd0b74Schristos z_off_t len2;
108875fd0b74Schristos {
1089*e992f068Schristos return crc32_combine64(crc1, crc2, len2);
109075fd0b74Schristos }
109175fd0b74Schristos
1092*e992f068Schristos /* ========================================================================= */
crc32_combine_gen64(len2)1093*e992f068Schristos uLong ZEXPORT crc32_combine_gen64(len2)
109475fd0b74Schristos z_off64_t len2;
109575fd0b74Schristos {
1096*e992f068Schristos #ifdef DYNAMIC_CRC_TABLE
1097*e992f068Schristos once(&made, make_crc_table);
1098*e992f068Schristos #endif /* DYNAMIC_CRC_TABLE */
1099*e992f068Schristos return x2nmodp(len2, 3);
1100*e992f068Schristos }
1101*e992f068Schristos
1102*e992f068Schristos /* ========================================================================= */
crc32_combine_gen(len2)1103*e992f068Schristos uLong ZEXPORT crc32_combine_gen(len2)
1104*e992f068Schristos z_off_t len2;
1105*e992f068Schristos {
1106*e992f068Schristos return crc32_combine_gen64(len2);
1107*e992f068Schristos }
1108*e992f068Schristos
1109*e992f068Schristos /* ========================================================================= */
crc32_combine_op(crc1,crc2,op)1110*e992f068Schristos uLong crc32_combine_op(crc1, crc2, op)
1111*e992f068Schristos uLong crc1;
1112*e992f068Schristos uLong crc2;
1113*e992f068Schristos uLong op;
1114*e992f068Schristos {
1115*e992f068Schristos return multmodp(op, crc1) ^ crc2;
111675fd0b74Schristos }
1117