xref: /netbsd-src/external/gpl3/gdb/dist/zlib/crc32.c (revision 924795e69c8bb3f17afd8fcbb799710cc1719dc4)
1212397c6Schristos /* crc32.c -- compute the CRC-32 of a data stream
2*924795e6Schristos  * Copyright (C) 1995-2022 Mark Adler
3212397c6Schristos  * For conditions of distribution and use, see copyright notice in zlib.h
4212397c6Schristos  *
5*924795e6Schristos  * This interleaved implementation of a CRC makes use of pipelined multiple
6*924795e6Schristos  * arithmetic-logic units, commonly found in modern CPU cores. It is due to
7*924795e6Schristos  * Kadatch and Jenkins (2010). See doc/crc-doc.1.0.pdf in this distribution.
8212397c6Schristos  */
9212397c6Schristos 
10*924795e6Schristos /* @(#) Id */
11212397c6Schristos 
12212397c6Schristos /*
13212397c6Schristos   Note on the use of DYNAMIC_CRC_TABLE: there is no mutex or semaphore
14212397c6Schristos   protection on the static variables used to control the first-use generation
15212397c6Schristos   of the crc tables. Therefore, if you #define DYNAMIC_CRC_TABLE, you should
16212397c6Schristos   first call get_crc_table() to initialize the tables before allowing more than
17212397c6Schristos   one thread to use crc32().
18212397c6Schristos 
19*924795e6Schristos   MAKECRCH can be #defined to write out crc32.h. A main() routine is also
20*924795e6Schristos   produced, so that this one source file can be compiled to an executable.
21212397c6Schristos  */
22212397c6Schristos 
23212397c6Schristos #ifdef MAKECRCH
24212397c6Schristos #  include <stdio.h>
25212397c6Schristos #  ifndef DYNAMIC_CRC_TABLE
26212397c6Schristos #    define DYNAMIC_CRC_TABLE
27212397c6Schristos #  endif /* !DYNAMIC_CRC_TABLE */
28212397c6Schristos #endif /* MAKECRCH */
29212397c6Schristos 
30*924795e6Schristos #include "zutil.h"      /* for Z_U4, Z_U8, z_crc_t, and FAR definitions */
31212397c6Schristos 
32*924795e6Schristos  /*
33*924795e6Schristos   A CRC of a message is computed on N braids of words in the message, where
34*924795e6Schristos   each word consists of W bytes (4 or 8). If N is 3, for example, then three
35*924795e6Schristos   running sparse CRCs are calculated respectively on each braid, at these
36*924795e6Schristos   indices in the array of words: 0, 3, 6, ..., 1, 4, 7, ..., and 2, 5, 8, ...
37*924795e6Schristos   This is done starting at a word boundary, and continues until as many blocks
38*924795e6Schristos   of N * W bytes as are available have been processed. The results are combined
39*924795e6Schristos   into a single CRC at the end. For this code, N must be in the range 1..6 and
40*924795e6Schristos   W must be 4 or 8. The upper limit on N can be increased if desired by adding
41*924795e6Schristos   more #if blocks, extending the patterns apparent in the code. In addition,
42*924795e6Schristos   crc32.h would need to be regenerated, if the maximum N value is increased.
43*924795e6Schristos 
44*924795e6Schristos   N and W are chosen empirically by benchmarking the execution time on a given
45*924795e6Schristos   processor. The choices for N and W below were based on testing on Intel Kaby
46*924795e6Schristos   Lake i7, AMD Ryzen 7, ARM Cortex-A57, Sparc64-VII, PowerPC POWER9, and MIPS64
47*924795e6Schristos   Octeon II processors. The Intel, AMD, and ARM processors were all fastest
48*924795e6Schristos   with N=5, W=8. The Sparc, PowerPC, and MIPS64 were all fastest at N=5, W=4.
49*924795e6Schristos   They were all tested with either gcc or clang, all using the -O3 optimization
50*924795e6Schristos   level. Your mileage may vary.
51*924795e6Schristos  */
52*924795e6Schristos 
53*924795e6Schristos /* Define N */
54*924795e6Schristos #ifdef Z_TESTN
55*924795e6Schristos #  define N Z_TESTN
56212397c6Schristos #else
57*924795e6Schristos #  define N 5
58*924795e6Schristos #endif
59*924795e6Schristos #if N < 1 || N > 6
60*924795e6Schristos #  error N must be in 1..6
61*924795e6Schristos #endif
62212397c6Schristos 
63*924795e6Schristos /*
64*924795e6Schristos   z_crc_t must be at least 32 bits. z_word_t must be at least as long as
65*924795e6Schristos   z_crc_t. It is assumed here that z_word_t is either 32 bits or 64 bits, and
66*924795e6Schristos   that bytes are eight bits.
67*924795e6Schristos  */
68212397c6Schristos 
69*924795e6Schristos /*
70*924795e6Schristos   Define W and the associated z_word_t type. If W is not defined, then a
71*924795e6Schristos   braided calculation is not used, and the associated tables and code are not
72*924795e6Schristos   compiled.
73*924795e6Schristos  */
74*924795e6Schristos #ifdef Z_TESTW
75*924795e6Schristos #  if Z_TESTW-1 != -1
76*924795e6Schristos #    define W Z_TESTW
77*924795e6Schristos #  endif
78*924795e6Schristos #else
79*924795e6Schristos #  ifdef MAKECRCH
80*924795e6Schristos #    define W 8         /* required for MAKECRCH */
81*924795e6Schristos #  else
82*924795e6Schristos #    if defined(__x86_64__) || defined(__aarch64__)
83*924795e6Schristos #      define W 8
84*924795e6Schristos #    else
85*924795e6Schristos #      define W 4
86*924795e6Schristos #    endif
87*924795e6Schristos #  endif
88*924795e6Schristos #endif
89*924795e6Schristos #ifdef W
90*924795e6Schristos #  if W == 8 && defined(Z_U8)
91*924795e6Schristos      typedef Z_U8 z_word_t;
92*924795e6Schristos #  elif defined(Z_U4)
93*924795e6Schristos #    undef W
94*924795e6Schristos #    define W 4
95*924795e6Schristos      typedef Z_U4 z_word_t;
96*924795e6Schristos #  else
97*924795e6Schristos #    undef W
98*924795e6Schristos #  endif
99*924795e6Schristos #endif
100*924795e6Schristos 
101*924795e6Schristos /* Local functions. */
102*924795e6Schristos local z_crc_t multmodp OF((z_crc_t a, z_crc_t b));
103*924795e6Schristos local z_crc_t x2nmodp OF((z_off64_t n, unsigned k));
104*924795e6Schristos 
105*924795e6Schristos /* If available, use the ARM processor CRC32 instruction. */
106*924795e6Schristos #if defined(__aarch64__) && defined(__ARM_FEATURE_CRC32) && W == 8
107*924795e6Schristos #  define ARMCRC32
108*924795e6Schristos #endif
109*924795e6Schristos 
110*924795e6Schristos #if defined(W) && (!defined(ARMCRC32) || defined(DYNAMIC_CRC_TABLE))
111*924795e6Schristos /*
112*924795e6Schristos   Swap the bytes in a z_word_t to convert between little and big endian. Any
113*924795e6Schristos   self-respecting compiler will optimize this to a single machine byte-swap
114*924795e6Schristos   instruction, if one is available. This assumes that word_t is either 32 bits
115*924795e6Schristos   or 64 bits.
116*924795e6Schristos  */
byte_swap(word)117*924795e6Schristos local z_word_t byte_swap(word)
118*924795e6Schristos     z_word_t word;
119*924795e6Schristos {
120*924795e6Schristos #  if W == 8
121*924795e6Schristos     return
122*924795e6Schristos         (word & 0xff00000000000000) >> 56 |
123*924795e6Schristos         (word & 0xff000000000000) >> 40 |
124*924795e6Schristos         (word & 0xff0000000000) >> 24 |
125*924795e6Schristos         (word & 0xff00000000) >> 8 |
126*924795e6Schristos         (word & 0xff000000) << 8 |
127*924795e6Schristos         (word & 0xff0000) << 24 |
128*924795e6Schristos         (word & 0xff00) << 40 |
129*924795e6Schristos         (word & 0xff) << 56;
130*924795e6Schristos #  else   /* W == 4 */
131*924795e6Schristos     return
132*924795e6Schristos         (word & 0xff000000) >> 24 |
133*924795e6Schristos         (word & 0xff0000) >> 8 |
134*924795e6Schristos         (word & 0xff00) << 8 |
135*924795e6Schristos         (word & 0xff) << 24;
136*924795e6Schristos #  endif
137*924795e6Schristos }
138*924795e6Schristos #endif
139*924795e6Schristos 
140*924795e6Schristos /* CRC polynomial. */
141*924795e6Schristos #define POLY 0xedb88320         /* p(x) reflected, with x^32 implied */
142212397c6Schristos 
143212397c6Schristos #ifdef DYNAMIC_CRC_TABLE
144212397c6Schristos 
145*924795e6Schristos local z_crc_t FAR crc_table[256];
146*924795e6Schristos local z_crc_t FAR x2n_table[32];
147212397c6Schristos local void make_crc_table OF((void));
148*924795e6Schristos #ifdef W
149*924795e6Schristos    local z_word_t FAR crc_big_table[256];
150*924795e6Schristos    local z_crc_t FAR crc_braid_table[W][256];
151*924795e6Schristos    local z_word_t FAR crc_braid_big_table[W][256];
152*924795e6Schristos    local void braid OF((z_crc_t [][256], z_word_t [][256], int, int));
153*924795e6Schristos #endif
154212397c6Schristos #ifdef MAKECRCH
155*924795e6Schristos    local void write_table OF((FILE *, const z_crc_t FAR *, int));
156*924795e6Schristos    local void write_table32hi OF((FILE *, const z_word_t FAR *, int));
157*924795e6Schristos    local void write_table64 OF((FILE *, const z_word_t FAR *, int));
158212397c6Schristos #endif /* MAKECRCH */
159*924795e6Schristos 
160*924795e6Schristos /*
161*924795e6Schristos   Define a once() function depending on the availability of atomics. If this is
162*924795e6Schristos   compiled with DYNAMIC_CRC_TABLE defined, and if CRCs will be computed in
163*924795e6Schristos   multiple threads, and if atomics are not available, then get_crc_table() must
164*924795e6Schristos   be called to initialize the tables and must return before any threads are
165*924795e6Schristos   allowed to compute or combine CRCs.
166*924795e6Schristos  */
167*924795e6Schristos 
168*924795e6Schristos /* Definition of once functionality. */
169*924795e6Schristos typedef struct once_s once_t;
170*924795e6Schristos local void once OF((once_t *, void (*)(void)));
171*924795e6Schristos 
172*924795e6Schristos /* Check for the availability of atomics. */
173*924795e6Schristos #if defined(__STDC__) && __STDC_VERSION__ >= 201112L && \
174*924795e6Schristos     !defined(__STDC_NO_ATOMICS__)
175*924795e6Schristos 
176*924795e6Schristos #include <stdatomic.h>
177*924795e6Schristos 
178*924795e6Schristos /* Structure for once(), which must be initialized with ONCE_INIT. */
179*924795e6Schristos struct once_s {
180*924795e6Schristos     atomic_flag begun;
181*924795e6Schristos     atomic_int done;
182*924795e6Schristos };
183*924795e6Schristos #define ONCE_INIT {ATOMIC_FLAG_INIT, 0}
184*924795e6Schristos 
185*924795e6Schristos /*
186*924795e6Schristos   Run the provided init() function exactly once, even if multiple threads
187*924795e6Schristos   invoke once() at the same time. The state must be a once_t initialized with
188*924795e6Schristos   ONCE_INIT.
189*924795e6Schristos  */
once(state,init)190*924795e6Schristos local void once(state, init)
191*924795e6Schristos     once_t *state;
192*924795e6Schristos     void (*init)(void);
193*924795e6Schristos {
194*924795e6Schristos     if (!atomic_load(&state->done)) {
195*924795e6Schristos         if (atomic_flag_test_and_set(&state->begun))
196*924795e6Schristos             while (!atomic_load(&state->done))
197*924795e6Schristos                 ;
198*924795e6Schristos         else {
199*924795e6Schristos             init();
200*924795e6Schristos             atomic_store(&state->done, 1);
201*924795e6Schristos         }
202*924795e6Schristos     }
203*924795e6Schristos }
204*924795e6Schristos 
205*924795e6Schristos #else   /* no atomics */
206*924795e6Schristos 
207*924795e6Schristos /* Structure for once(), which must be initialized with ONCE_INIT. */
208*924795e6Schristos struct once_s {
209*924795e6Schristos     volatile int begun;
210*924795e6Schristos     volatile int done;
211*924795e6Schristos };
212*924795e6Schristos #define ONCE_INIT {0, 0}
213*924795e6Schristos 
214*924795e6Schristos /* Test and set. Alas, not atomic, but tries to minimize the period of
215*924795e6Schristos    vulnerability. */
216*924795e6Schristos local int test_and_set OF((int volatile *));
test_and_set(flag)217*924795e6Schristos local int test_and_set(flag)
218*924795e6Schristos     int volatile *flag;
219*924795e6Schristos {
220*924795e6Schristos     int was;
221*924795e6Schristos 
222*924795e6Schristos     was = *flag;
223*924795e6Schristos     *flag = 1;
224*924795e6Schristos     return was;
225*924795e6Schristos }
226*924795e6Schristos 
227*924795e6Schristos /* Run the provided init() function once. This is not thread-safe. */
once(state,init)228*924795e6Schristos local void once(state, init)
229*924795e6Schristos     once_t *state;
230*924795e6Schristos     void (*init)(void);
231*924795e6Schristos {
232*924795e6Schristos     if (!state->done) {
233*924795e6Schristos         if (test_and_set(&state->begun))
234*924795e6Schristos             while (!state->done)
235*924795e6Schristos                 ;
236*924795e6Schristos         else {
237*924795e6Schristos             init();
238*924795e6Schristos             state->done = 1;
239*924795e6Schristos         }
240*924795e6Schristos     }
241*924795e6Schristos }
242*924795e6Schristos 
243*924795e6Schristos #endif
244*924795e6Schristos 
245*924795e6Schristos /* State for once(). */
246*924795e6Schristos local once_t made = ONCE_INIT;
247*924795e6Schristos 
248212397c6Schristos /*
249212397c6Schristos   Generate tables for a byte-wise 32-bit CRC calculation on the polynomial:
250212397c6Schristos   x^32+x^26+x^23+x^22+x^16+x^12+x^11+x^10+x^8+x^7+x^5+x^4+x^2+x+1.
251212397c6Schristos 
252212397c6Schristos   Polynomials over GF(2) are represented in binary, one bit per coefficient,
253212397c6Schristos   with the lowest powers in the most significant bit. Then adding polynomials
254212397c6Schristos   is just exclusive-or, and multiplying a polynomial by x is a right shift by
255212397c6Schristos   one. If we call the above polynomial p, and represent a byte as the
256212397c6Schristos   polynomial q, also with the lowest power in the most significant bit (so the
257*924795e6Schristos   byte 0xb1 is the polynomial x^7+x^3+x^2+1), then the CRC is (q*x^32) mod p,
258212397c6Schristos   where a mod b means the remainder after dividing a by b.
259212397c6Schristos 
260212397c6Schristos   This calculation is done using the shift-register method of multiplying and
261212397c6Schristos   taking the remainder. The register is initialized to zero, and for each
262212397c6Schristos   incoming bit, x^32 is added mod p to the register if the bit is a one (where
263*924795e6Schristos   x^32 mod p is p+x^32 = x^26+...+1), and the register is multiplied mod p by x
264*924795e6Schristos   (which is shifting right by one and adding x^32 mod p if the bit shifted out
265*924795e6Schristos   is a one). We start with the highest power (least significant bit) of q and
266*924795e6Schristos   repeat for all eight bits of q.
267212397c6Schristos 
268*924795e6Schristos   The table is simply the CRC of all possible eight bit values. This is all the
269*924795e6Schristos   information needed to generate CRCs on data a byte at a time for all
270*924795e6Schristos   combinations of CRC register values and incoming bytes.
271212397c6Schristos  */
272*924795e6Schristos 
make_crc_table()273212397c6Schristos local void make_crc_table()
274212397c6Schristos {
275*924795e6Schristos     unsigned i, j, n;
276*924795e6Schristos     z_crc_t p;
277212397c6Schristos 
278*924795e6Schristos     /* initialize the CRC of bytes tables */
279*924795e6Schristos     for (i = 0; i < 256; i++) {
280*924795e6Schristos         p = i;
281*924795e6Schristos         for (j = 0; j < 8; j++)
282*924795e6Schristos             p = p & 1 ? (p >> 1) ^ POLY : p >> 1;
283*924795e6Schristos         crc_table[i] = p;
284*924795e6Schristos #ifdef W
285*924795e6Schristos         crc_big_table[i] = byte_swap(p);
286*924795e6Schristos #endif
287212397c6Schristos     }
288212397c6Schristos 
289*924795e6Schristos     /* initialize the x^2^n mod p(x) table */
290*924795e6Schristos     p = (z_crc_t)1 << 30;         /* x^1 */
291*924795e6Schristos     x2n_table[0] = p;
292*924795e6Schristos     for (n = 1; n < 32; n++)
293*924795e6Schristos         x2n_table[n] = p = multmodp(p, p);
294212397c6Schristos 
295*924795e6Schristos #ifdef W
296*924795e6Schristos     /* initialize the braiding tables -- needs x2n_table[] */
297*924795e6Schristos     braid(crc_braid_table, crc_braid_big_table, N, W);
298*924795e6Schristos #endif
299212397c6Schristos 
300212397c6Schristos #ifdef MAKECRCH
301212397c6Schristos     {
302*924795e6Schristos         /*
303*924795e6Schristos           The crc32.h header file contains tables for both 32-bit and 64-bit
304*924795e6Schristos           z_word_t's, and so requires a 64-bit type be available. In that case,
305*924795e6Schristos           z_word_t must be defined to be 64-bits. This code then also generates
306*924795e6Schristos           and writes out the tables for the case that z_word_t is 32 bits.
307*924795e6Schristos          */
308*924795e6Schristos #if !defined(W) || W != 8
309*924795e6Schristos #  error Need a 64-bit integer type in order to generate crc32.h.
310*924795e6Schristos #endif
311212397c6Schristos         FILE *out;
312*924795e6Schristos         int k, n;
313*924795e6Schristos         z_crc_t ltl[8][256];
314*924795e6Schristos         z_word_t big[8][256];
315212397c6Schristos 
316212397c6Schristos         out = fopen("crc32.h", "w");
317212397c6Schristos         if (out == NULL) return;
318*924795e6Schristos 
319*924795e6Schristos         /* write out little-endian CRC table to crc32.h */
320*924795e6Schristos         fprintf(out,
321*924795e6Schristos             "/* crc32.h -- tables for rapid CRC calculation\n"
322*924795e6Schristos             " * Generated automatically by crc32.c\n */\n"
323*924795e6Schristos             "\n"
324*924795e6Schristos             "local const z_crc_t FAR crc_table[] = {\n"
325*924795e6Schristos             "    ");
326*924795e6Schristos         write_table(out, crc_table, 256);
327*924795e6Schristos         fprintf(out,
328*924795e6Schristos             "};\n");
329*924795e6Schristos 
330*924795e6Schristos         /* write out big-endian CRC table for 64-bit z_word_t to crc32.h */
331*924795e6Schristos         fprintf(out,
332*924795e6Schristos             "\n"
333*924795e6Schristos             "#ifdef W\n"
334*924795e6Schristos             "\n"
335*924795e6Schristos             "#if W == 8\n"
336*924795e6Schristos             "\n"
337*924795e6Schristos             "local const z_word_t FAR crc_big_table[] = {\n"
338*924795e6Schristos             "    ");
339*924795e6Schristos         write_table64(out, crc_big_table, 256);
340*924795e6Schristos         fprintf(out,
341*924795e6Schristos             "};\n");
342*924795e6Schristos 
343*924795e6Schristos         /* write out big-endian CRC table for 32-bit z_word_t to crc32.h */
344*924795e6Schristos         fprintf(out,
345*924795e6Schristos             "\n"
346*924795e6Schristos             "#else /* W == 4 */\n"
347*924795e6Schristos             "\n"
348*924795e6Schristos             "local const z_word_t FAR crc_big_table[] = {\n"
349*924795e6Schristos             "    ");
350*924795e6Schristos         write_table32hi(out, crc_big_table, 256);
351*924795e6Schristos         fprintf(out,
352*924795e6Schristos             "};\n"
353*924795e6Schristos             "\n"
354*924795e6Schristos             "#endif\n");
355*924795e6Schristos 
356*924795e6Schristos         /* write out braid tables for each value of N */
357*924795e6Schristos         for (n = 1; n <= 6; n++) {
358*924795e6Schristos             fprintf(out,
359*924795e6Schristos             "\n"
360*924795e6Schristos             "#if N == %d\n", n);
361*924795e6Schristos 
362*924795e6Schristos             /* compute braid tables for this N and 64-bit word_t */
363*924795e6Schristos             braid(ltl, big, n, 8);
364*924795e6Schristos 
365*924795e6Schristos             /* write out braid tables for 64-bit z_word_t to crc32.h */
366*924795e6Schristos             fprintf(out,
367*924795e6Schristos             "\n"
368*924795e6Schristos             "#if W == 8\n"
369*924795e6Schristos             "\n"
370*924795e6Schristos             "local const z_crc_t FAR crc_braid_table[][256] = {\n");
371*924795e6Schristos             for (k = 0; k < 8; k++) {
372*924795e6Schristos                 fprintf(out, "   {");
373*924795e6Schristos                 write_table(out, ltl[k], 256);
374*924795e6Schristos                 fprintf(out, "}%s", k < 7 ? ",\n" : "");
375212397c6Schristos             }
376*924795e6Schristos             fprintf(out,
377*924795e6Schristos             "};\n"
378*924795e6Schristos             "\n"
379*924795e6Schristos             "local const z_word_t FAR crc_braid_big_table[][256] = {\n");
380*924795e6Schristos             for (k = 0; k < 8; k++) {
381*924795e6Schristos                 fprintf(out, "   {");
382*924795e6Schristos                 write_table64(out, big[k], 256);
383*924795e6Schristos                 fprintf(out, "}%s", k < 7 ? ",\n" : "");
384*924795e6Schristos             }
385*924795e6Schristos             fprintf(out,
386*924795e6Schristos             "};\n");
387*924795e6Schristos 
388*924795e6Schristos             /* compute braid tables for this N and 32-bit word_t */
389*924795e6Schristos             braid(ltl, big, n, 4);
390*924795e6Schristos 
391*924795e6Schristos             /* write out braid tables for 32-bit z_word_t to crc32.h */
392*924795e6Schristos             fprintf(out,
393*924795e6Schristos             "\n"
394*924795e6Schristos             "#else /* W == 4 */\n"
395*924795e6Schristos             "\n"
396*924795e6Schristos             "local const z_crc_t FAR crc_braid_table[][256] = {\n");
397*924795e6Schristos             for (k = 0; k < 4; k++) {
398*924795e6Schristos                 fprintf(out, "   {");
399*924795e6Schristos                 write_table(out, ltl[k], 256);
400*924795e6Schristos                 fprintf(out, "}%s", k < 3 ? ",\n" : "");
401*924795e6Schristos             }
402*924795e6Schristos             fprintf(out,
403*924795e6Schristos             "};\n"
404*924795e6Schristos             "\n"
405*924795e6Schristos             "local const z_word_t FAR crc_braid_big_table[][256] = {\n");
406*924795e6Schristos             for (k = 0; k < 4; k++) {
407*924795e6Schristos                 fprintf(out, "   {");
408*924795e6Schristos                 write_table32hi(out, big[k], 256);
409*924795e6Schristos                 fprintf(out, "}%s", k < 3 ? ",\n" : "");
410*924795e6Schristos             }
411*924795e6Schristos             fprintf(out,
412*924795e6Schristos             "};\n"
413*924795e6Schristos             "\n"
414*924795e6Schristos             "#endif\n"
415*924795e6Schristos             "\n"
416*924795e6Schristos             "#endif\n");
417*924795e6Schristos         }
418*924795e6Schristos         fprintf(out,
419*924795e6Schristos             "\n"
420*924795e6Schristos             "#endif\n");
421*924795e6Schristos 
422*924795e6Schristos         /* write out zeros operator table to crc32.h */
423*924795e6Schristos         fprintf(out,
424*924795e6Schristos             "\n"
425*924795e6Schristos             "local const z_crc_t FAR x2n_table[] = {\n"
426*924795e6Schristos             "    ");
427*924795e6Schristos         write_table(out, x2n_table, 32);
428*924795e6Schristos         fprintf(out,
429*924795e6Schristos             "};\n");
430212397c6Schristos         fclose(out);
431212397c6Schristos     }
432212397c6Schristos #endif /* MAKECRCH */
433212397c6Schristos }
434212397c6Schristos 
435212397c6Schristos #ifdef MAKECRCH
436*924795e6Schristos 
437*924795e6Schristos /*
438*924795e6Schristos    Write the 32-bit values in table[0..k-1] to out, five per line in
439*924795e6Schristos    hexadecimal separated by commas.
440*924795e6Schristos  */
write_table(out,table,k)441*924795e6Schristos local void write_table(out, table, k)
442212397c6Schristos     FILE *out;
443212397c6Schristos     const z_crc_t FAR *table;
444*924795e6Schristos     int k;
445212397c6Schristos {
446212397c6Schristos     int n;
447212397c6Schristos 
448*924795e6Schristos     for (n = 0; n < k; n++)
449*924795e6Schristos         fprintf(out, "%s0x%08lx%s", n == 0 || n % 5 ? "" : "    ",
450212397c6Schristos                 (unsigned long)(table[n]),
451*924795e6Schristos                 n == k - 1 ? "" : (n % 5 == 4 ? ",\n" : ", "));
452212397c6Schristos }
453*924795e6Schristos 
454*924795e6Schristos /*
455*924795e6Schristos    Write the high 32-bits of each value in table[0..k-1] to out, five per line
456*924795e6Schristos    in hexadecimal separated by commas.
457*924795e6Schristos  */
write_table32hi(out,table,k)458*924795e6Schristos local void write_table32hi(out, table, k)
459*924795e6Schristos FILE *out;
460*924795e6Schristos const z_word_t FAR *table;
461*924795e6Schristos int k;
462*924795e6Schristos {
463*924795e6Schristos     int n;
464*924795e6Schristos 
465*924795e6Schristos     for (n = 0; n < k; n++)
466*924795e6Schristos         fprintf(out, "%s0x%08lx%s", n == 0 || n % 5 ? "" : "    ",
467*924795e6Schristos                 (unsigned long)(table[n] >> 32),
468*924795e6Schristos                 n == k - 1 ? "" : (n % 5 == 4 ? ",\n" : ", "));
469*924795e6Schristos }
470*924795e6Schristos 
471*924795e6Schristos /*
472*924795e6Schristos   Write the 64-bit values in table[0..k-1] to out, three per line in
473*924795e6Schristos   hexadecimal separated by commas. This assumes that if there is a 64-bit
474*924795e6Schristos   type, then there is also a long long integer type, and it is at least 64
475*924795e6Schristos   bits. If not, then the type cast and format string can be adjusted
476*924795e6Schristos   accordingly.
477*924795e6Schristos  */
write_table64(out,table,k)478*924795e6Schristos local void write_table64(out, table, k)
479*924795e6Schristos     FILE *out;
480*924795e6Schristos     const z_word_t FAR *table;
481*924795e6Schristos     int k;
482*924795e6Schristos {
483*924795e6Schristos     int n;
484*924795e6Schristos 
485*924795e6Schristos     for (n = 0; n < k; n++)
486*924795e6Schristos         fprintf(out, "%s0x%016llx%s", n == 0 || n % 3 ? "" : "    ",
487*924795e6Schristos                 (unsigned long long)(table[n]),
488*924795e6Schristos                 n == k - 1 ? "" : (n % 3 == 2 ? ",\n" : ", "));
489*924795e6Schristos }
490*924795e6Schristos 
491*924795e6Schristos /* Actually do the deed. */
main()492*924795e6Schristos int main()
493*924795e6Schristos {
494*924795e6Schristos     make_crc_table();
495*924795e6Schristos     return 0;
496*924795e6Schristos }
497*924795e6Schristos 
498212397c6Schristos #endif /* MAKECRCH */
499212397c6Schristos 
500*924795e6Schristos #ifdef W
501*924795e6Schristos /*
502*924795e6Schristos   Generate the little and big-endian braid tables for the given n and z_word_t
503*924795e6Schristos   size w. Each array must have room for w blocks of 256 elements.
504*924795e6Schristos  */
braid(ltl,big,n,w)505*924795e6Schristos local void braid(ltl, big, n, w)
506*924795e6Schristos     z_crc_t ltl[][256];
507*924795e6Schristos     z_word_t big[][256];
508*924795e6Schristos     int n;
509*924795e6Schristos     int w;
510*924795e6Schristos {
511*924795e6Schristos     int k;
512*924795e6Schristos     z_crc_t i, p, q;
513*924795e6Schristos     for (k = 0; k < w; k++) {
514*924795e6Schristos         p = x2nmodp((n * w + 3 - k) << 3, 0);
515*924795e6Schristos         ltl[k][0] = 0;
516*924795e6Schristos         big[w - 1 - k][0] = 0;
517*924795e6Schristos         for (i = 1; i < 256; i++) {
518*924795e6Schristos             ltl[k][i] = q = multmodp(i << 24, p);
519*924795e6Schristos             big[w - 1 - k][i] = byte_swap(q);
520*924795e6Schristos         }
521*924795e6Schristos     }
522*924795e6Schristos }
523*924795e6Schristos #endif
524*924795e6Schristos 
525212397c6Schristos #else /* !DYNAMIC_CRC_TABLE */
526212397c6Schristos /* ========================================================================
527*924795e6Schristos  * Tables for byte-wise and braided CRC-32 calculations, and a table of powers
528*924795e6Schristos  * of x for combining CRC-32s, all made by make_crc_table().
529212397c6Schristos  */
530212397c6Schristos #include "crc32.h"
531212397c6Schristos #endif /* DYNAMIC_CRC_TABLE */
532212397c6Schristos 
533*924795e6Schristos /* ========================================================================
534*924795e6Schristos  * Routines used for CRC calculation. Some are also required for the table
535*924795e6Schristos  * generation above.
536*924795e6Schristos  */
537*924795e6Schristos 
538*924795e6Schristos /*
539*924795e6Schristos   Return a(x) multiplied by b(x) modulo p(x), where p(x) is the CRC polynomial,
540*924795e6Schristos   reflected. For speed, this requires that a not be zero.
541*924795e6Schristos  */
multmodp(a,b)542*924795e6Schristos local z_crc_t multmodp(a, b)
543*924795e6Schristos     z_crc_t a;
544*924795e6Schristos     z_crc_t b;
545*924795e6Schristos {
546*924795e6Schristos     z_crc_t m, p;
547*924795e6Schristos 
548*924795e6Schristos     m = (z_crc_t)1 << 31;
549*924795e6Schristos     p = 0;
550*924795e6Schristos     for (;;) {
551*924795e6Schristos         if (a & m) {
552*924795e6Schristos             p ^= b;
553*924795e6Schristos             if ((a & (m - 1)) == 0)
554*924795e6Schristos                 break;
555*924795e6Schristos         }
556*924795e6Schristos         m >>= 1;
557*924795e6Schristos         b = b & 1 ? (b >> 1) ^ POLY : b >> 1;
558*924795e6Schristos     }
559*924795e6Schristos     return p;
560*924795e6Schristos }
561*924795e6Schristos 
562*924795e6Schristos /*
563*924795e6Schristos   Return x^(n * 2^k) modulo p(x). Requires that x2n_table[] has been
564*924795e6Schristos   initialized.
565*924795e6Schristos  */
x2nmodp(n,k)566*924795e6Schristos local z_crc_t x2nmodp(n, k)
567*924795e6Schristos     z_off64_t n;
568*924795e6Schristos     unsigned k;
569*924795e6Schristos {
570*924795e6Schristos     z_crc_t p;
571*924795e6Schristos 
572*924795e6Schristos     p = (z_crc_t)1 << 31;           /* x^0 == 1 */
573*924795e6Schristos     while (n) {
574*924795e6Schristos         if (n & 1)
575*924795e6Schristos             p = multmodp(x2n_table[k & 31], p);
576*924795e6Schristos         n >>= 1;
577*924795e6Schristos         k++;
578*924795e6Schristos     }
579*924795e6Schristos     return p;
580*924795e6Schristos }
581*924795e6Schristos 
582212397c6Schristos /* =========================================================================
583*924795e6Schristos  * This function can be used by asm versions of crc32(), and to force the
584*924795e6Schristos  * generation of the CRC tables in a threaded application.
585212397c6Schristos  */
get_crc_table()586212397c6Schristos const z_crc_t FAR * ZEXPORT get_crc_table()
587212397c6Schristos {
588212397c6Schristos #ifdef DYNAMIC_CRC_TABLE
589*924795e6Schristos     once(&made, make_crc_table);
590212397c6Schristos #endif /* DYNAMIC_CRC_TABLE */
591212397c6Schristos     return (const z_crc_t FAR *)crc_table;
592212397c6Schristos }
593212397c6Schristos 
594*924795e6Schristos /* =========================================================================
595*924795e6Schristos  * Use ARM machine instructions if available. This will compute the CRC about
596*924795e6Schristos  * ten times faster than the braided calculation. This code does not check for
597*924795e6Schristos  * the presence of the CRC instruction at run time. __ARM_FEATURE_CRC32 will
598*924795e6Schristos  * only be defined if the compilation specifies an ARM processor architecture
599*924795e6Schristos  * that has the instructions. For example, compiling with -march=armv8.1-a or
600*924795e6Schristos  * -march=armv8-a+crc, or -march=native if the compile machine has the crc32
601*924795e6Schristos  * instructions.
602*924795e6Schristos  */
603*924795e6Schristos #ifdef ARMCRC32
604*924795e6Schristos 
605*924795e6Schristos /*
606*924795e6Schristos    Constants empirically determined to maximize speed. These values are from
607*924795e6Schristos    measurements on a Cortex-A57. Your mileage may vary.
608*924795e6Schristos  */
609*924795e6Schristos #define Z_BATCH 3990                /* number of words in a batch */
610*924795e6Schristos #define Z_BATCH_ZEROS 0xa10d3d0c    /* computed from Z_BATCH = 3990 */
611*924795e6Schristos #define Z_BATCH_MIN 800             /* fewest words in a final batch */
612*924795e6Schristos 
crc32_z(crc,buf,len)613*924795e6Schristos unsigned long ZEXPORT crc32_z(crc, buf, len)
614*924795e6Schristos     unsigned long crc;
615*924795e6Schristos     const unsigned char FAR *buf;
616*924795e6Schristos     z_size_t len;
617*924795e6Schristos {
618*924795e6Schristos     z_crc_t val;
619*924795e6Schristos     z_word_t crc1, crc2;
620*924795e6Schristos     const z_word_t *word;
621*924795e6Schristos     z_word_t val0, val1, val2;
622*924795e6Schristos     z_size_t last, last2, i;
623*924795e6Schristos     z_size_t num;
624*924795e6Schristos 
625*924795e6Schristos     /* Return initial CRC, if requested. */
626*924795e6Schristos     if (buf == Z_NULL) return 0;
627*924795e6Schristos 
628*924795e6Schristos #ifdef DYNAMIC_CRC_TABLE
629*924795e6Schristos     once(&made, make_crc_table);
630*924795e6Schristos #endif /* DYNAMIC_CRC_TABLE */
631*924795e6Schristos 
632*924795e6Schristos     /* Pre-condition the CRC */
633*924795e6Schristos     crc ^= 0xffffffff;
634*924795e6Schristos 
635*924795e6Schristos     /* Compute the CRC up to a word boundary. */
636*924795e6Schristos     while (len && ((z_size_t)buf & 7) != 0) {
637*924795e6Schristos         len--;
638*924795e6Schristos         val = *buf++;
639*924795e6Schristos         __asm__ volatile("crc32b %w0, %w0, %w1" : "+r"(crc) : "r"(val));
640*924795e6Schristos     }
641*924795e6Schristos 
642*924795e6Schristos     /* Prepare to compute the CRC on full 64-bit words word[0..num-1]. */
643*924795e6Schristos     word = (z_word_t const *)buf;
644*924795e6Schristos     num = len >> 3;
645*924795e6Schristos     len &= 7;
646*924795e6Schristos 
647*924795e6Schristos     /* Do three interleaved CRCs to realize the throughput of one crc32x
648*924795e6Schristos        instruction per cycle. Each CRC is calcuated on Z_BATCH words. The three
649*924795e6Schristos        CRCs are combined into a single CRC after each set of batches. */
650*924795e6Schristos     while (num >= 3 * Z_BATCH) {
651*924795e6Schristos         crc1 = 0;
652*924795e6Schristos         crc2 = 0;
653*924795e6Schristos         for (i = 0; i < Z_BATCH; i++) {
654*924795e6Schristos             val0 = word[i];
655*924795e6Schristos             val1 = word[i + Z_BATCH];
656*924795e6Schristos             val2 = word[i + 2 * Z_BATCH];
657*924795e6Schristos             __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc) : "r"(val0));
658*924795e6Schristos             __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc1) : "r"(val1));
659*924795e6Schristos             __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc2) : "r"(val2));
660*924795e6Schristos         }
661*924795e6Schristos         word += 3 * Z_BATCH;
662*924795e6Schristos         num -= 3 * Z_BATCH;
663*924795e6Schristos         crc = multmodp(Z_BATCH_ZEROS, crc) ^ crc1;
664*924795e6Schristos         crc = multmodp(Z_BATCH_ZEROS, crc) ^ crc2;
665*924795e6Schristos     }
666*924795e6Schristos 
667*924795e6Schristos     /* Do one last smaller batch with the remaining words, if there are enough
668*924795e6Schristos        to pay for the combination of CRCs. */
669*924795e6Schristos     last = num / 3;
670*924795e6Schristos     if (last >= Z_BATCH_MIN) {
671*924795e6Schristos         last2 = last << 1;
672*924795e6Schristos         crc1 = 0;
673*924795e6Schristos         crc2 = 0;
674*924795e6Schristos         for (i = 0; i < last; i++) {
675*924795e6Schristos             val0 = word[i];
676*924795e6Schristos             val1 = word[i + last];
677*924795e6Schristos             val2 = word[i + last2];
678*924795e6Schristos             __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc) : "r"(val0));
679*924795e6Schristos             __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc1) : "r"(val1));
680*924795e6Schristos             __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc2) : "r"(val2));
681*924795e6Schristos         }
682*924795e6Schristos         word += 3 * last;
683*924795e6Schristos         num -= 3 * last;
684*924795e6Schristos         val = x2nmodp(last, 6);
685*924795e6Schristos         crc = multmodp(val, crc) ^ crc1;
686*924795e6Schristos         crc = multmodp(val, crc) ^ crc2;
687*924795e6Schristos     }
688*924795e6Schristos 
689*924795e6Schristos     /* Compute the CRC on any remaining words. */
690*924795e6Schristos     for (i = 0; i < num; i++) {
691*924795e6Schristos         val0 = word[i];
692*924795e6Schristos         __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc) : "r"(val0));
693*924795e6Schristos     }
694*924795e6Schristos     word += num;
695*924795e6Schristos 
696*924795e6Schristos     /* Complete the CRC on any remaining bytes. */
697*924795e6Schristos     buf = (const unsigned char FAR *)word;
698*924795e6Schristos     while (len) {
699*924795e6Schristos         len--;
700*924795e6Schristos         val = *buf++;
701*924795e6Schristos         __asm__ volatile("crc32b %w0, %w0, %w1" : "+r"(crc) : "r"(val));
702*924795e6Schristos     }
703*924795e6Schristos 
704*924795e6Schristos     /* Return the CRC, post-conditioned. */
705*924795e6Schristos     return crc ^ 0xffffffff;
706*924795e6Schristos }
707*924795e6Schristos 
708*924795e6Schristos #else
709*924795e6Schristos 
710*924795e6Schristos #ifdef W
711*924795e6Schristos 
712*924795e6Schristos /*
713*924795e6Schristos   Return the CRC of the W bytes in the word_t data, taking the
714*924795e6Schristos   least-significant byte of the word as the first byte of data, without any pre
715*924795e6Schristos   or post conditioning. This is used to combine the CRCs of each braid.
716*924795e6Schristos  */
crc_word(data)717*924795e6Schristos local z_crc_t crc_word(data)
718*924795e6Schristos     z_word_t data;
719*924795e6Schristos {
720*924795e6Schristos     int k;
721*924795e6Schristos     for (k = 0; k < W; k++)
722*924795e6Schristos         data = (data >> 8) ^ crc_table[data & 0xff];
723*924795e6Schristos     return (z_crc_t)data;
724*924795e6Schristos }
725*924795e6Schristos 
crc_word_big(data)726*924795e6Schristos local z_word_t crc_word_big(data)
727*924795e6Schristos     z_word_t data;
728*924795e6Schristos {
729*924795e6Schristos     int k;
730*924795e6Schristos     for (k = 0; k < W; k++)
731*924795e6Schristos         data = (data << 8) ^
732*924795e6Schristos             crc_big_table[(data >> ((W - 1) << 3)) & 0xff];
733*924795e6Schristos     return data;
734*924795e6Schristos }
735*924795e6Schristos 
736*924795e6Schristos #endif
737212397c6Schristos 
738212397c6Schristos /* ========================================================================= */
crc32_z(crc,buf,len)739796c32c9Schristos unsigned long ZEXPORT crc32_z(crc, buf, len)
740212397c6Schristos     unsigned long crc;
741212397c6Schristos     const unsigned char FAR *buf;
742796c32c9Schristos     z_size_t len;
743212397c6Schristos {
744*924795e6Schristos     /* Return initial CRC, if requested. */
745*924795e6Schristos     if (buf == Z_NULL) return 0;
746212397c6Schristos 
747212397c6Schristos #ifdef DYNAMIC_CRC_TABLE
748*924795e6Schristos     once(&made, make_crc_table);
749212397c6Schristos #endif /* DYNAMIC_CRC_TABLE */
750212397c6Schristos 
751*924795e6Schristos     /* Pre-condition the CRC */
752*924795e6Schristos     crc ^= 0xffffffff;
753212397c6Schristos 
754*924795e6Schristos #ifdef W
755*924795e6Schristos 
756*924795e6Schristos     /* If provided enough bytes, do a braided CRC calculation. */
757*924795e6Schristos     if (len >= N * W + W - 1) {
758*924795e6Schristos         z_size_t blks;
759*924795e6Schristos         z_word_t const *words;
760*924795e6Schristos         unsigned endian;
761*924795e6Schristos         int k;
762*924795e6Schristos 
763*924795e6Schristos         /* Compute the CRC up to a z_word_t boundary. */
764*924795e6Schristos         while (len && ((z_size_t)buf & (W - 1)) != 0) {
765*924795e6Schristos             len--;
766*924795e6Schristos             crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
767*924795e6Schristos         }
768*924795e6Schristos 
769*924795e6Schristos         /* Compute the CRC on as many N z_word_t blocks as are available. */
770*924795e6Schristos         blks = len / (N * W);
771*924795e6Schristos         len -= blks * N * W;
772*924795e6Schristos         words = (z_word_t const *)buf;
773*924795e6Schristos 
774*924795e6Schristos         /* Do endian check at execution time instead of compile time, since ARM
775*924795e6Schristos            processors can change the endianess at execution time. If the
776*924795e6Schristos            compiler knows what the endianess will be, it can optimize out the
777*924795e6Schristos            check and the unused branch. */
778212397c6Schristos         endian = 1;
779*924795e6Schristos         if (*(unsigned char *)&endian) {
780*924795e6Schristos             /* Little endian. */
781*924795e6Schristos 
782*924795e6Schristos             z_crc_t crc0;
783*924795e6Schristos             z_word_t word0;
784*924795e6Schristos #if N > 1
785*924795e6Schristos             z_crc_t crc1;
786*924795e6Schristos             z_word_t word1;
787*924795e6Schristos #if N > 2
788*924795e6Schristos             z_crc_t crc2;
789*924795e6Schristos             z_word_t word2;
790*924795e6Schristos #if N > 3
791*924795e6Schristos             z_crc_t crc3;
792*924795e6Schristos             z_word_t word3;
793*924795e6Schristos #if N > 4
794*924795e6Schristos             z_crc_t crc4;
795*924795e6Schristos             z_word_t word4;
796*924795e6Schristos #if N > 5
797*924795e6Schristos             z_crc_t crc5;
798*924795e6Schristos             z_word_t word5;
799*924795e6Schristos #endif
800*924795e6Schristos #endif
801*924795e6Schristos #endif
802*924795e6Schristos #endif
803*924795e6Schristos #endif
804*924795e6Schristos 
805*924795e6Schristos             /* Initialize the CRC for each braid. */
806*924795e6Schristos             crc0 = crc;
807*924795e6Schristos #if N > 1
808*924795e6Schristos             crc1 = 0;
809*924795e6Schristos #if N > 2
810*924795e6Schristos             crc2 = 0;
811*924795e6Schristos #if N > 3
812*924795e6Schristos             crc3 = 0;
813*924795e6Schristos #if N > 4
814*924795e6Schristos             crc4 = 0;
815*924795e6Schristos #if N > 5
816*924795e6Schristos             crc5 = 0;
817*924795e6Schristos #endif
818*924795e6Schristos #endif
819*924795e6Schristos #endif
820*924795e6Schristos #endif
821*924795e6Schristos #endif
822*924795e6Schristos 
823*924795e6Schristos             /*
824*924795e6Schristos               Process the first blks-1 blocks, computing the CRCs on each braid
825*924795e6Schristos               independently.
826*924795e6Schristos              */
827*924795e6Schristos             while (--blks) {
828*924795e6Schristos                 /* Load the word for each braid into registers. */
829*924795e6Schristos                 word0 = crc0 ^ words[0];
830*924795e6Schristos #if N > 1
831*924795e6Schristos                 word1 = crc1 ^ words[1];
832*924795e6Schristos #if N > 2
833*924795e6Schristos                 word2 = crc2 ^ words[2];
834*924795e6Schristos #if N > 3
835*924795e6Schristos                 word3 = crc3 ^ words[3];
836*924795e6Schristos #if N > 4
837*924795e6Schristos                 word4 = crc4 ^ words[4];
838*924795e6Schristos #if N > 5
839*924795e6Schristos                 word5 = crc5 ^ words[5];
840*924795e6Schristos #endif
841*924795e6Schristos #endif
842*924795e6Schristos #endif
843*924795e6Schristos #endif
844*924795e6Schristos #endif
845*924795e6Schristos                 words += N;
846*924795e6Schristos 
847*924795e6Schristos                 /* Compute and update the CRC for each word. The loop should
848*924795e6Schristos                    get unrolled. */
849*924795e6Schristos                 crc0 = crc_braid_table[0][word0 & 0xff];
850*924795e6Schristos #if N > 1
851*924795e6Schristos                 crc1 = crc_braid_table[0][word1 & 0xff];
852*924795e6Schristos #if N > 2
853*924795e6Schristos                 crc2 = crc_braid_table[0][word2 & 0xff];
854*924795e6Schristos #if N > 3
855*924795e6Schristos                 crc3 = crc_braid_table[0][word3 & 0xff];
856*924795e6Schristos #if N > 4
857*924795e6Schristos                 crc4 = crc_braid_table[0][word4 & 0xff];
858*924795e6Schristos #if N > 5
859*924795e6Schristos                 crc5 = crc_braid_table[0][word5 & 0xff];
860*924795e6Schristos #endif
861*924795e6Schristos #endif
862*924795e6Schristos #endif
863*924795e6Schristos #endif
864*924795e6Schristos #endif
865*924795e6Schristos                 for (k = 1; k < W; k++) {
866*924795e6Schristos                     crc0 ^= crc_braid_table[k][(word0 >> (k << 3)) & 0xff];
867*924795e6Schristos #if N > 1
868*924795e6Schristos                     crc1 ^= crc_braid_table[k][(word1 >> (k << 3)) & 0xff];
869*924795e6Schristos #if N > 2
870*924795e6Schristos                     crc2 ^= crc_braid_table[k][(word2 >> (k << 3)) & 0xff];
871*924795e6Schristos #if N > 3
872*924795e6Schristos                     crc3 ^= crc_braid_table[k][(word3 >> (k << 3)) & 0xff];
873*924795e6Schristos #if N > 4
874*924795e6Schristos                     crc4 ^= crc_braid_table[k][(word4 >> (k << 3)) & 0xff];
875*924795e6Schristos #if N > 5
876*924795e6Schristos                     crc5 ^= crc_braid_table[k][(word5 >> (k << 3)) & 0xff];
877*924795e6Schristos #endif
878*924795e6Schristos #endif
879*924795e6Schristos #endif
880*924795e6Schristos #endif
881*924795e6Schristos #endif
882212397c6Schristos                 }
883*924795e6Schristos             }
884*924795e6Schristos 
885*924795e6Schristos             /*
886*924795e6Schristos               Process the last block, combining the CRCs of the N braids at the
887*924795e6Schristos               same time.
888*924795e6Schristos              */
889*924795e6Schristos             crc = crc_word(crc0 ^ words[0]);
890*924795e6Schristos #if N > 1
891*924795e6Schristos             crc = crc_word(crc1 ^ words[1] ^ crc);
892*924795e6Schristos #if N > 2
893*924795e6Schristos             crc = crc_word(crc2 ^ words[2] ^ crc);
894*924795e6Schristos #if N > 3
895*924795e6Schristos             crc = crc_word(crc3 ^ words[3] ^ crc);
896*924795e6Schristos #if N > 4
897*924795e6Schristos             crc = crc_word(crc4 ^ words[4] ^ crc);
898*924795e6Schristos #if N > 5
899*924795e6Schristos             crc = crc_word(crc5 ^ words[5] ^ crc);
900*924795e6Schristos #endif
901*924795e6Schristos #endif
902*924795e6Schristos #endif
903*924795e6Schristos #endif
904*924795e6Schristos #endif
905*924795e6Schristos             words += N;
906*924795e6Schristos         }
907*924795e6Schristos         else {
908*924795e6Schristos             /* Big endian. */
909*924795e6Schristos 
910*924795e6Schristos             z_word_t crc0, word0, comb;
911*924795e6Schristos #if N > 1
912*924795e6Schristos             z_word_t crc1, word1;
913*924795e6Schristos #if N > 2
914*924795e6Schristos             z_word_t crc2, word2;
915*924795e6Schristos #if N > 3
916*924795e6Schristos             z_word_t crc3, word3;
917*924795e6Schristos #if N > 4
918*924795e6Schristos             z_word_t crc4, word4;
919*924795e6Schristos #if N > 5
920*924795e6Schristos             z_word_t crc5, word5;
921*924795e6Schristos #endif
922*924795e6Schristos #endif
923*924795e6Schristos #endif
924*924795e6Schristos #endif
925*924795e6Schristos #endif
926*924795e6Schristos 
927*924795e6Schristos             /* Initialize the CRC for each braid. */
928*924795e6Schristos             crc0 = byte_swap(crc);
929*924795e6Schristos #if N > 1
930*924795e6Schristos             crc1 = 0;
931*924795e6Schristos #if N > 2
932*924795e6Schristos             crc2 = 0;
933*924795e6Schristos #if N > 3
934*924795e6Schristos             crc3 = 0;
935*924795e6Schristos #if N > 4
936*924795e6Schristos             crc4 = 0;
937*924795e6Schristos #if N > 5
938*924795e6Schristos             crc5 = 0;
939*924795e6Schristos #endif
940*924795e6Schristos #endif
941*924795e6Schristos #endif
942*924795e6Schristos #endif
943*924795e6Schristos #endif
944*924795e6Schristos 
945*924795e6Schristos             /*
946*924795e6Schristos               Process the first blks-1 blocks, computing the CRCs on each braid
947*924795e6Schristos               independently.
948*924795e6Schristos              */
949*924795e6Schristos             while (--blks) {
950*924795e6Schristos                 /* Load the word for each braid into registers. */
951*924795e6Schristos                 word0 = crc0 ^ words[0];
952*924795e6Schristos #if N > 1
953*924795e6Schristos                 word1 = crc1 ^ words[1];
954*924795e6Schristos #if N > 2
955*924795e6Schristos                 word2 = crc2 ^ words[2];
956*924795e6Schristos #if N > 3
957*924795e6Schristos                 word3 = crc3 ^ words[3];
958*924795e6Schristos #if N > 4
959*924795e6Schristos                 word4 = crc4 ^ words[4];
960*924795e6Schristos #if N > 5
961*924795e6Schristos                 word5 = crc5 ^ words[5];
962*924795e6Schristos #endif
963*924795e6Schristos #endif
964*924795e6Schristos #endif
965*924795e6Schristos #endif
966*924795e6Schristos #endif
967*924795e6Schristos                 words += N;
968*924795e6Schristos 
969*924795e6Schristos                 /* Compute and update the CRC for each word. The loop should
970*924795e6Schristos                    get unrolled. */
971*924795e6Schristos                 crc0 = crc_braid_big_table[0][word0 & 0xff];
972*924795e6Schristos #if N > 1
973*924795e6Schristos                 crc1 = crc_braid_big_table[0][word1 & 0xff];
974*924795e6Schristos #if N > 2
975*924795e6Schristos                 crc2 = crc_braid_big_table[0][word2 & 0xff];
976*924795e6Schristos #if N > 3
977*924795e6Schristos                 crc3 = crc_braid_big_table[0][word3 & 0xff];
978*924795e6Schristos #if N > 4
979*924795e6Schristos                 crc4 = crc_braid_big_table[0][word4 & 0xff];
980*924795e6Schristos #if N > 5
981*924795e6Schristos                 crc5 = crc_braid_big_table[0][word5 & 0xff];
982*924795e6Schristos #endif
983*924795e6Schristos #endif
984*924795e6Schristos #endif
985*924795e6Schristos #endif
986*924795e6Schristos #endif
987*924795e6Schristos                 for (k = 1; k < W; k++) {
988*924795e6Schristos                     crc0 ^= crc_braid_big_table[k][(word0 >> (k << 3)) & 0xff];
989*924795e6Schristos #if N > 1
990*924795e6Schristos                     crc1 ^= crc_braid_big_table[k][(word1 >> (k << 3)) & 0xff];
991*924795e6Schristos #if N > 2
992*924795e6Schristos                     crc2 ^= crc_braid_big_table[k][(word2 >> (k << 3)) & 0xff];
993*924795e6Schristos #if N > 3
994*924795e6Schristos                     crc3 ^= crc_braid_big_table[k][(word3 >> (k << 3)) & 0xff];
995*924795e6Schristos #if N > 4
996*924795e6Schristos                     crc4 ^= crc_braid_big_table[k][(word4 >> (k << 3)) & 0xff];
997*924795e6Schristos #if N > 5
998*924795e6Schristos                     crc5 ^= crc_braid_big_table[k][(word5 >> (k << 3)) & 0xff];
999*924795e6Schristos #endif
1000*924795e6Schristos #endif
1001*924795e6Schristos #endif
1002*924795e6Schristos #endif
1003*924795e6Schristos #endif
1004*924795e6Schristos                 }
1005*924795e6Schristos             }
1006*924795e6Schristos 
1007*924795e6Schristos             /*
1008*924795e6Schristos               Process the last block, combining the CRCs of the N braids at the
1009*924795e6Schristos               same time.
1010*924795e6Schristos              */
1011*924795e6Schristos             comb = crc_word_big(crc0 ^ words[0]);
1012*924795e6Schristos #if N > 1
1013*924795e6Schristos             comb = crc_word_big(crc1 ^ words[1] ^ comb);
1014*924795e6Schristos #if N > 2
1015*924795e6Schristos             comb = crc_word_big(crc2 ^ words[2] ^ comb);
1016*924795e6Schristos #if N > 3
1017*924795e6Schristos             comb = crc_word_big(crc3 ^ words[3] ^ comb);
1018*924795e6Schristos #if N > 4
1019*924795e6Schristos             comb = crc_word_big(crc4 ^ words[4] ^ comb);
1020*924795e6Schristos #if N > 5
1021*924795e6Schristos             comb = crc_word_big(crc5 ^ words[5] ^ comb);
1022*924795e6Schristos #endif
1023*924795e6Schristos #endif
1024*924795e6Schristos #endif
1025*924795e6Schristos #endif
1026*924795e6Schristos #endif
1027*924795e6Schristos             words += N;
1028*924795e6Schristos             crc = byte_swap(comb);
1029*924795e6Schristos         }
1030*924795e6Schristos 
1031*924795e6Schristos         /*
1032*924795e6Schristos           Update the pointer to the remaining bytes to process.
1033*924795e6Schristos          */
1034*924795e6Schristos         buf = (unsigned char const *)words;
1035*924795e6Schristos     }
1036*924795e6Schristos 
1037*924795e6Schristos #endif /* W */
1038*924795e6Schristos 
1039*924795e6Schristos     /* Complete the computation of the CRC on any remaining bytes. */
1040212397c6Schristos     while (len >= 8) {
1041212397c6Schristos         len -= 8;
1042*924795e6Schristos         crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
1043*924795e6Schristos         crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
1044*924795e6Schristos         crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
1045*924795e6Schristos         crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
1046*924795e6Schristos         crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
1047*924795e6Schristos         crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
1048*924795e6Schristos         crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
1049*924795e6Schristos         crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
1050212397c6Schristos     }
1051*924795e6Schristos     while (len) {
1052*924795e6Schristos         len--;
1053*924795e6Schristos         crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
1054212397c6Schristos     }
1055212397c6Schristos 
1056*924795e6Schristos     /* Return the CRC, post-conditioned. */
1057*924795e6Schristos     return crc ^ 0xffffffff;
1058*924795e6Schristos }
1059*924795e6Schristos 
1060*924795e6Schristos #endif
1061*924795e6Schristos 
1062796c32c9Schristos /* ========================================================================= */
crc32(crc,buf,len)1063796c32c9Schristos unsigned long ZEXPORT crc32(crc, buf, len)
1064796c32c9Schristos     unsigned long crc;
1065796c32c9Schristos     const unsigned char FAR *buf;
1066796c32c9Schristos     uInt len;
1067796c32c9Schristos {
1068796c32c9Schristos     return crc32_z(crc, buf, len);
1069796c32c9Schristos }
1070796c32c9Schristos 
1071212397c6Schristos /* ========================================================================= */
crc32_combine64(crc1,crc2,len2)1072*924795e6Schristos uLong ZEXPORT crc32_combine64(crc1, crc2, len2)
1073212397c6Schristos     uLong crc1;
1074212397c6Schristos     uLong crc2;
1075212397c6Schristos     z_off64_t len2;
1076212397c6Schristos {
1077*924795e6Schristos #ifdef DYNAMIC_CRC_TABLE
1078*924795e6Schristos     once(&made, make_crc_table);
1079*924795e6Schristos #endif /* DYNAMIC_CRC_TABLE */
1080*924795e6Schristos     return multmodp(x2nmodp(len2, 3), crc1) ^ crc2;
1081212397c6Schristos }
1082212397c6Schristos 
1083212397c6Schristos /* ========================================================================= */
crc32_combine(crc1,crc2,len2)1084212397c6Schristos uLong ZEXPORT crc32_combine(crc1, crc2, len2)
1085212397c6Schristos     uLong crc1;
1086212397c6Schristos     uLong crc2;
1087212397c6Schristos     z_off_t len2;
1088212397c6Schristos {
1089*924795e6Schristos     return crc32_combine64(crc1, crc2, len2);
1090212397c6Schristos }
1091212397c6Schristos 
1092*924795e6Schristos /* ========================================================================= */
crc32_combine_gen64(len2)1093*924795e6Schristos uLong ZEXPORT crc32_combine_gen64(len2)
1094212397c6Schristos     z_off64_t len2;
1095212397c6Schristos {
1096*924795e6Schristos #ifdef DYNAMIC_CRC_TABLE
1097*924795e6Schristos     once(&made, make_crc_table);
1098*924795e6Schristos #endif /* DYNAMIC_CRC_TABLE */
1099*924795e6Schristos     return x2nmodp(len2, 3);
1100*924795e6Schristos }
1101*924795e6Schristos 
1102*924795e6Schristos /* ========================================================================= */
crc32_combine_gen(len2)1103*924795e6Schristos uLong ZEXPORT crc32_combine_gen(len2)
1104*924795e6Schristos     z_off_t len2;
1105*924795e6Schristos {
1106*924795e6Schristos     return crc32_combine_gen64(len2);
1107*924795e6Schristos }
1108*924795e6Schristos 
1109*924795e6Schristos /* ========================================================================= */
crc32_combine_op(crc1,crc2,op)1110*924795e6Schristos uLong crc32_combine_op(crc1, crc2, op)
1111*924795e6Schristos     uLong crc1;
1112*924795e6Schristos     uLong crc2;
1113*924795e6Schristos     uLong op;
1114*924795e6Schristos {
1115*924795e6Schristos     return multmodp(op, crc1) ^ crc2;
1116212397c6Schristos }
1117