xref: /netbsd-src/common/dist/zlib/crc32.c (revision 96c3282121aac2037abbd5952fd638784deb5ab1)
1*96c32821Schristos /*	$NetBSD: crc32.c,v 1.7 2024/09/22 19:12:27 christos Exp $	*/
2aaf4ece6Schristos 
3aaf4ece6Schristos /* crc32.c -- compute the CRC-32 of a data stream
43b1253e8Schristos  * Copyright (C) 1995-2022 Mark Adler
5aaf4ece6Schristos  * For conditions of distribution and use, see copyright notice in zlib.h
6aaf4ece6Schristos  *
73b1253e8Schristos  * This interleaved implementation of a CRC makes use of pipelined multiple
83b1253e8Schristos  * arithmetic-logic units, commonly found in modern CPU cores. It is due to
93b1253e8Schristos  * Kadatch and Jenkins (2010). See doc/crc-doc.1.0.pdf in this distribution.
10aaf4ece6Schristos  */
11aaf4ece6Schristos 
123b1253e8Schristos /* @(#) Id */
13aaf4ece6Schristos 
14aaf4ece6Schristos /*
15aaf4ece6Schristos   Note on the use of DYNAMIC_CRC_TABLE: there is no mutex or semaphore
16aaf4ece6Schristos   protection on the static variables used to control the first-use generation
17aaf4ece6Schristos   of the crc tables. Therefore, if you #define DYNAMIC_CRC_TABLE, you should
18aaf4ece6Schristos   first call get_crc_table() to initialize the tables before allowing more than
19aaf4ece6Schristos   one thread to use crc32().
206db8c6e9Schristos 
213b1253e8Schristos   MAKECRCH can be #defined to write out crc32.h. A main() routine is also
223b1253e8Schristos   produced, so that this one source file can be compiled to an executable.
23aaf4ece6Schristos  */
24aaf4ece6Schristos 
25aaf4ece6Schristos #ifdef MAKECRCH
26aaf4ece6Schristos #  include <stdio.h>
27aaf4ece6Schristos #  ifndef DYNAMIC_CRC_TABLE
28aaf4ece6Schristos #    define DYNAMIC_CRC_TABLE
29aaf4ece6Schristos #  endif /* !DYNAMIC_CRC_TABLE */
30aaf4ece6Schristos #endif /* MAKECRCH */
31aaf4ece6Schristos 
323b1253e8Schristos #include "zutil.h"      /* for Z_U4, Z_U8, z_crc_t, and FAR definitions */
33aaf4ece6Schristos 
343b1253e8Schristos  /*
353b1253e8Schristos   A CRC of a message is computed on N braids of words in the message, where
363b1253e8Schristos   each word consists of W bytes (4 or 8). If N is 3, for example, then three
373b1253e8Schristos   running sparse CRCs are calculated respectively on each braid, at these
383b1253e8Schristos   indices in the array of words: 0, 3, 6, ..., 1, 4, 7, ..., and 2, 5, 8, ...
393b1253e8Schristos   This is done starting at a word boundary, and continues until as many blocks
403b1253e8Schristos   of N * W bytes as are available have been processed. The results are combined
413b1253e8Schristos   into a single CRC at the end. For this code, N must be in the range 1..6 and
423b1253e8Schristos   W must be 4 or 8. The upper limit on N can be increased if desired by adding
433b1253e8Schristos   more #if blocks, extending the patterns apparent in the code. In addition,
443b1253e8Schristos   crc32.h would need to be regenerated, if the maximum N value is increased.
45aaf4ece6Schristos 
463b1253e8Schristos   N and W are chosen empirically by benchmarking the execution time on a given
473b1253e8Schristos   processor. The choices for N and W below were based on testing on Intel Kaby
483b1253e8Schristos   Lake i7, AMD Ryzen 7, ARM Cortex-A57, Sparc64-VII, PowerPC POWER9, and MIPS64
493b1253e8Schristos   Octeon II processors. The Intel, AMD, and ARM processors were all fastest
503b1253e8Schristos   with N=5, W=8. The Sparc, PowerPC, and MIPS64 were all fastest at N=5, W=4.
513b1253e8Schristos   They were all tested with either gcc or clang, all using the -O3 optimization
523b1253e8Schristos   level. Your mileage may vary.
533b1253e8Schristos  */
542fd2e12bSdsl 
553b1253e8Schristos /* Define N */
563b1253e8Schristos #ifdef Z_TESTN
573b1253e8Schristos #  define N Z_TESTN
58aaf4ece6Schristos #else
593b1253e8Schristos #  define N 5
603b1253e8Schristos #endif
613b1253e8Schristos #if N < 1 || N > 6
623b1253e8Schristos #  error N must be in 1..6
633b1253e8Schristos #endif
64aaf4ece6Schristos 
653b1253e8Schristos /*
663b1253e8Schristos   z_crc_t must be at least 32 bits. z_word_t must be at least as long as
673b1253e8Schristos   z_crc_t. It is assumed here that z_word_t is either 32 bits or 64 bits, and
683b1253e8Schristos   that bytes are eight bits.
693b1253e8Schristos  */
706db8c6e9Schristos 
713b1253e8Schristos /*
723b1253e8Schristos   Define W and the associated z_word_t type. If W is not defined, then a
733b1253e8Schristos   braided calculation is not used, and the associated tables and code are not
743b1253e8Schristos   compiled.
753b1253e8Schristos  */
763b1253e8Schristos #ifdef Z_TESTW
773b1253e8Schristos #  if Z_TESTW-1 != -1
783b1253e8Schristos #    define W Z_TESTW
793b1253e8Schristos #  endif
803b1253e8Schristos #else
813b1253e8Schristos #  ifdef MAKECRCH
823b1253e8Schristos #    define W 8         /* required for MAKECRCH */
833b1253e8Schristos #  else
843b1253e8Schristos #    if defined(__x86_64__) || defined(__aarch64__)
853b1253e8Schristos #      define W 8
863b1253e8Schristos #    else
873b1253e8Schristos #      define W 4
883b1253e8Schristos #    endif
893b1253e8Schristos #  endif
903b1253e8Schristos #endif
913b1253e8Schristos #ifdef W
923b1253e8Schristos #  if W == 8 && defined(Z_U8)
933b1253e8Schristos      typedef Z_U8 z_word_t;
943b1253e8Schristos #  elif defined(Z_U4)
953b1253e8Schristos #    undef W
963b1253e8Schristos #    define W 4
973b1253e8Schristos      typedef Z_U4 z_word_t;
983b1253e8Schristos #  else
993b1253e8Schristos #    undef W
1003b1253e8Schristos #  endif
1013b1253e8Schristos #endif
1023b1253e8Schristos 
1033b1253e8Schristos /* If available, use the ARM processor CRC32 instruction. */
1043b1253e8Schristos #if defined(__aarch64__) && defined(__ARM_FEATURE_CRC32) && W == 8
1053b1253e8Schristos #  define ARMCRC32
1063b1253e8Schristos #endif
1073b1253e8Schristos 
1083b1253e8Schristos #if defined(W) && (!defined(ARMCRC32) || defined(DYNAMIC_CRC_TABLE))
1093b1253e8Schristos /*
1103b1253e8Schristos   Swap the bytes in a z_word_t to convert between little and big endian. Any
1113b1253e8Schristos   self-respecting compiler will optimize this to a single machine byte-swap
1123b1253e8Schristos   instruction, if one is available. This assumes that word_t is either 32 bits
1133b1253e8Schristos   or 64 bits.
1143b1253e8Schristos  */
115*96c32821Schristos local z_word_t byte_swap(z_word_t word) {
1163b1253e8Schristos #  if W == 8
1173b1253e8Schristos     return
1183b1253e8Schristos         (word & 0xff00000000000000) >> 56 |
1193b1253e8Schristos         (word & 0xff000000000000) >> 40 |
1203b1253e8Schristos         (word & 0xff0000000000) >> 24 |
1213b1253e8Schristos         (word & 0xff00000000) >> 8 |
1223b1253e8Schristos         (word & 0xff000000) << 8 |
1233b1253e8Schristos         (word & 0xff0000) << 24 |
1243b1253e8Schristos         (word & 0xff00) << 40 |
1253b1253e8Schristos         (word & 0xff) << 56;
1263b1253e8Schristos #  else   /* W == 4 */
1273b1253e8Schristos     return
1283b1253e8Schristos         (word & 0xff000000) >> 24 |
1293b1253e8Schristos         (word & 0xff0000) >> 8 |
1303b1253e8Schristos         (word & 0xff00) << 8 |
1313b1253e8Schristos         (word & 0xff) << 24;
1323b1253e8Schristos #  endif
1333b1253e8Schristos }
1343b1253e8Schristos #endif
1353b1253e8Schristos 
136*96c32821Schristos #ifdef DYNAMIC_CRC_TABLE
137*96c32821Schristos /* =========================================================================
138*96c32821Schristos  * Table of powers of x for combining CRC-32s, filled in by make_crc_table()
139*96c32821Schristos  * below.
140*96c32821Schristos  */
141*96c32821Schristos    local z_crc_t FAR x2n_table[32];
142*96c32821Schristos #else
143*96c32821Schristos /* =========================================================================
144*96c32821Schristos  * Tables for byte-wise and braided CRC-32 calculations, and a table of powers
145*96c32821Schristos  * of x for combining CRC-32s, all made by make_crc_table().
146*96c32821Schristos  */
147*96c32821Schristos #  include "crc32.h"
148*96c32821Schristos #endif
149*96c32821Schristos 
1503b1253e8Schristos /* CRC polynomial. */
1513b1253e8Schristos #define POLY 0xedb88320         /* p(x) reflected, with x^32 implied */
152aaf4ece6Schristos 
153*96c32821Schristos /*
154*96c32821Schristos   Return a(x) multiplied by b(x) modulo p(x), where p(x) is the CRC polynomial,
155*96c32821Schristos   reflected. For speed, this requires that a not be zero.
156*96c32821Schristos  */
157*96c32821Schristos local z_crc_t multmodp(z_crc_t a, z_crc_t b) {
158*96c32821Schristos     z_crc_t m, p;
159aaf4ece6Schristos 
160*96c32821Schristos     m = (z_crc_t)1 << 31;
161*96c32821Schristos     p = 0;
162*96c32821Schristos     for (;;) {
163*96c32821Schristos         if (a & m) {
164*96c32821Schristos             p ^= b;
165*96c32821Schristos             if ((a & (m - 1)) == 0)
166*96c32821Schristos                 break;
167*96c32821Schristos         }
168*96c32821Schristos         m >>= 1;
169*96c32821Schristos         b = b & 1 ? (b >> 1) ^ POLY : b >> 1;
170*96c32821Schristos     }
171*96c32821Schristos     return p;
172*96c32821Schristos }
173*96c32821Schristos 
174*96c32821Schristos /*
175*96c32821Schristos   Return x^(n * 2^k) modulo p(x). Requires that x2n_table[] has been
176*96c32821Schristos   initialized.
177*96c32821Schristos  */
178*96c32821Schristos local z_crc_t x2nmodp(z_off64_t n, unsigned k) {
179*96c32821Schristos     z_crc_t p;
180*96c32821Schristos 
181*96c32821Schristos     p = (z_crc_t)1 << 31;           /* x^0 == 1 */
182*96c32821Schristos     while (n) {
183*96c32821Schristos         if (n & 1)
184*96c32821Schristos             p = multmodp(x2n_table[k & 31], p);
185*96c32821Schristos         n >>= 1;
186*96c32821Schristos         k++;
187*96c32821Schristos     }
188*96c32821Schristos     return p;
189*96c32821Schristos }
190*96c32821Schristos 
191*96c32821Schristos #ifdef DYNAMIC_CRC_TABLE
192*96c32821Schristos /* =========================================================================
193*96c32821Schristos  * Build the tables for byte-wise and braided CRC-32 calculations, and a table
194*96c32821Schristos  * of powers of x for combining CRC-32s.
195*96c32821Schristos  */
1963b1253e8Schristos local z_crc_t FAR crc_table[256];
1973b1253e8Schristos #ifdef W
1983b1253e8Schristos    local z_word_t FAR crc_big_table[256];
1993b1253e8Schristos    local z_crc_t FAR crc_braid_table[W][256];
2003b1253e8Schristos    local z_word_t FAR crc_braid_big_table[W][256];
201*96c32821Schristos    local void braid(z_crc_t [][256], z_word_t [][256], int, int);
2023b1253e8Schristos #endif
203aaf4ece6Schristos #ifdef MAKECRCH
204*96c32821Schristos    local void write_table(FILE *, const z_crc_t FAR *, int);
205*96c32821Schristos    local void write_table32hi(FILE *, const z_word_t FAR *, int);
206*96c32821Schristos    local void write_table64(FILE *, const z_word_t FAR *, int);
207aaf4ece6Schristos #endif /* MAKECRCH */
2083b1253e8Schristos 
2093b1253e8Schristos /*
2103b1253e8Schristos   Define a once() function depending on the availability of atomics. If this is
2113b1253e8Schristos   compiled with DYNAMIC_CRC_TABLE defined, and if CRCs will be computed in
2123b1253e8Schristos   multiple threads, and if atomics are not available, then get_crc_table() must
2133b1253e8Schristos   be called to initialize the tables and must return before any threads are
2143b1253e8Schristos   allowed to compute or combine CRCs.
2153b1253e8Schristos  */
2163b1253e8Schristos 
2173b1253e8Schristos /* Definition of once functionality. */
2183b1253e8Schristos typedef struct once_s once_t;
2193b1253e8Schristos 
2203b1253e8Schristos /* Check for the availability of atomics. */
2213b1253e8Schristos #if defined(__STDC__) && __STDC_VERSION__ >= 201112L && \
2223b1253e8Schristos     !defined(__STDC_NO_ATOMICS__)
2233b1253e8Schristos 
2243b1253e8Schristos #include <stdatomic.h>
2253b1253e8Schristos 
2263b1253e8Schristos /* Structure for once(), which must be initialized with ONCE_INIT. */
2273b1253e8Schristos struct once_s {
2283b1253e8Schristos     atomic_flag begun;
2293b1253e8Schristos     atomic_int done;
2303b1253e8Schristos };
2313b1253e8Schristos #define ONCE_INIT {ATOMIC_FLAG_INIT, 0}
2323b1253e8Schristos 
2333b1253e8Schristos /*
2343b1253e8Schristos   Run the provided init() function exactly once, even if multiple threads
2353b1253e8Schristos   invoke once() at the same time. The state must be a once_t initialized with
2363b1253e8Schristos   ONCE_INIT.
2373b1253e8Schristos  */
238*96c32821Schristos local void once(once_t *state, void (*init)(void)) {
2393b1253e8Schristos     if (!atomic_load(&state->done)) {
2403b1253e8Schristos         if (atomic_flag_test_and_set(&state->begun))
2413b1253e8Schristos             while (!atomic_load(&state->done))
2423b1253e8Schristos                 ;
2433b1253e8Schristos         else {
2443b1253e8Schristos             init();
2453b1253e8Schristos             atomic_store(&state->done, 1);
2463b1253e8Schristos         }
2473b1253e8Schristos     }
2483b1253e8Schristos }
2493b1253e8Schristos 
2503b1253e8Schristos #else   /* no atomics */
2513b1253e8Schristos 
2523b1253e8Schristos /* Structure for once(), which must be initialized with ONCE_INIT. */
2533b1253e8Schristos struct once_s {
2543b1253e8Schristos     volatile int begun;
2553b1253e8Schristos     volatile int done;
2563b1253e8Schristos };
2573b1253e8Schristos #define ONCE_INIT {0, 0}
2583b1253e8Schristos 
2593b1253e8Schristos /* Test and set. Alas, not atomic, but tries to minimize the period of
2603b1253e8Schristos    vulnerability. */
261*96c32821Schristos local int test_and_set(int volatile *flag) {
2623b1253e8Schristos     int was;
2633b1253e8Schristos 
2643b1253e8Schristos     was = *flag;
2653b1253e8Schristos     *flag = 1;
2663b1253e8Schristos     return was;
2673b1253e8Schristos }
2683b1253e8Schristos 
2693b1253e8Schristos /* Run the provided init() function once. This is not thread-safe. */
270*96c32821Schristos local void once(once_t *state, void (*init)(void)) {
2713b1253e8Schristos     if (!state->done) {
2723b1253e8Schristos         if (test_and_set(&state->begun))
2733b1253e8Schristos             while (!state->done)
2743b1253e8Schristos                 ;
2753b1253e8Schristos         else {
2763b1253e8Schristos             init();
2773b1253e8Schristos             state->done = 1;
2783b1253e8Schristos         }
2793b1253e8Schristos     }
2803b1253e8Schristos }
2813b1253e8Schristos 
2823b1253e8Schristos #endif
2833b1253e8Schristos 
2843b1253e8Schristos /* State for once(). */
2853b1253e8Schristos local once_t made = ONCE_INIT;
2863b1253e8Schristos 
287aaf4ece6Schristos /*
288aaf4ece6Schristos   Generate tables for a byte-wise 32-bit CRC calculation on the polynomial:
289aaf4ece6Schristos   x^32+x^26+x^23+x^22+x^16+x^12+x^11+x^10+x^8+x^7+x^5+x^4+x^2+x+1.
290aaf4ece6Schristos 
291aaf4ece6Schristos   Polynomials over GF(2) are represented in binary, one bit per coefficient,
292aaf4ece6Schristos   with the lowest powers in the most significant bit. Then adding polynomials
293aaf4ece6Schristos   is just exclusive-or, and multiplying a polynomial by x is a right shift by
294aaf4ece6Schristos   one. If we call the above polynomial p, and represent a byte as the
295aaf4ece6Schristos   polynomial q, also with the lowest power in the most significant bit (so the
2963b1253e8Schristos   byte 0xb1 is the polynomial x^7+x^3+x^2+1), then the CRC is (q*x^32) mod p,
297aaf4ece6Schristos   where a mod b means the remainder after dividing a by b.
298aaf4ece6Schristos 
299aaf4ece6Schristos   This calculation is done using the shift-register method of multiplying and
300aaf4ece6Schristos   taking the remainder. The register is initialized to zero, and for each
301aaf4ece6Schristos   incoming bit, x^32 is added mod p to the register if the bit is a one (where
3023b1253e8Schristos   x^32 mod p is p+x^32 = x^26+...+1), and the register is multiplied mod p by x
3033b1253e8Schristos   (which is shifting right by one and adding x^32 mod p if the bit shifted out
3043b1253e8Schristos   is a one). We start with the highest power (least significant bit) of q and
3053b1253e8Schristos   repeat for all eight bits of q.
306aaf4ece6Schristos 
3073b1253e8Schristos   The table is simply the CRC of all possible eight bit values. This is all the
3083b1253e8Schristos   information needed to generate CRCs on data a byte at a time for all
3093b1253e8Schristos   combinations of CRC register values and incoming bytes.
310aaf4ece6Schristos  */
3113b1253e8Schristos 
312*96c32821Schristos local void make_crc_table(void) {
3133b1253e8Schristos     unsigned i, j, n;
3143b1253e8Schristos     z_crc_t p;
315aaf4ece6Schristos 
3163b1253e8Schristos     /* initialize the CRC of bytes tables */
3173b1253e8Schristos     for (i = 0; i < 256; i++) {
3183b1253e8Schristos         p = i;
3193b1253e8Schristos         for (j = 0; j < 8; j++)
3203b1253e8Schristos             p = p & 1 ? (p >> 1) ^ POLY : p >> 1;
3213b1253e8Schristos         crc_table[i] = p;
3223b1253e8Schristos #ifdef W
3233b1253e8Schristos         crc_big_table[i] = byte_swap(p);
3243b1253e8Schristos #endif
325aaf4ece6Schristos     }
326aaf4ece6Schristos 
3273b1253e8Schristos     /* initialize the x^2^n mod p(x) table */
3283b1253e8Schristos     p = (z_crc_t)1 << 30;         /* x^1 */
3293b1253e8Schristos     x2n_table[0] = p;
3303b1253e8Schristos     for (n = 1; n < 32; n++)
3313b1253e8Schristos         x2n_table[n] = p = multmodp(p, p);
332aaf4ece6Schristos 
3333b1253e8Schristos #ifdef W
3343b1253e8Schristos     /* initialize the braiding tables -- needs x2n_table[] */
3353b1253e8Schristos     braid(crc_braid_table, crc_braid_big_table, N, W);
3363b1253e8Schristos #endif
337aaf4ece6Schristos 
338aaf4ece6Schristos #ifdef MAKECRCH
339aaf4ece6Schristos     {
3403b1253e8Schristos         /*
3413b1253e8Schristos           The crc32.h header file contains tables for both 32-bit and 64-bit
3423b1253e8Schristos           z_word_t's, and so requires a 64-bit type be available. In that case,
3433b1253e8Schristos           z_word_t must be defined to be 64-bits. This code then also generates
3443b1253e8Schristos           and writes out the tables for the case that z_word_t is 32 bits.
3453b1253e8Schristos          */
3463b1253e8Schristos #if !defined(W) || W != 8
3473b1253e8Schristos #  error Need a 64-bit integer type in order to generate crc32.h.
3483b1253e8Schristos #endif
349aaf4ece6Schristos         FILE *out;
3503b1253e8Schristos         int k, n;
3513b1253e8Schristos         z_crc_t ltl[8][256];
3523b1253e8Schristos         z_word_t big[8][256];
353aaf4ece6Schristos 
354aaf4ece6Schristos         out = fopen("crc32.h", "w");
355aaf4ece6Schristos         if (out == NULL) return;
3563b1253e8Schristos 
3573b1253e8Schristos         /* write out little-endian CRC table to crc32.h */
3583b1253e8Schristos         fprintf(out,
3593b1253e8Schristos             "/* crc32.h -- tables for rapid CRC calculation\n"
3603b1253e8Schristos             " * Generated automatically by crc32.c\n */\n"
3613b1253e8Schristos             "\n"
3623b1253e8Schristos             "local const z_crc_t FAR crc_table[] = {\n"
3633b1253e8Schristos             "    ");
3643b1253e8Schristos         write_table(out, crc_table, 256);
3653b1253e8Schristos         fprintf(out,
3663b1253e8Schristos             "};\n");
3673b1253e8Schristos 
3683b1253e8Schristos         /* write out big-endian CRC table for 64-bit z_word_t to crc32.h */
3693b1253e8Schristos         fprintf(out,
3703b1253e8Schristos             "\n"
3713b1253e8Schristos             "#ifdef W\n"
3723b1253e8Schristos             "\n"
3733b1253e8Schristos             "#if W == 8\n"
3743b1253e8Schristos             "\n"
3753b1253e8Schristos             "local const z_word_t FAR crc_big_table[] = {\n"
3763b1253e8Schristos             "    ");
3773b1253e8Schristos         write_table64(out, crc_big_table, 256);
3783b1253e8Schristos         fprintf(out,
3793b1253e8Schristos             "};\n");
3803b1253e8Schristos 
3813b1253e8Schristos         /* write out big-endian CRC table for 32-bit z_word_t to crc32.h */
3823b1253e8Schristos         fprintf(out,
3833b1253e8Schristos             "\n"
3843b1253e8Schristos             "#else /* W == 4 */\n"
3853b1253e8Schristos             "\n"
3863b1253e8Schristos             "local const z_word_t FAR crc_big_table[] = {\n"
3873b1253e8Schristos             "    ");
3883b1253e8Schristos         write_table32hi(out, crc_big_table, 256);
3893b1253e8Schristos         fprintf(out,
3903b1253e8Schristos             "};\n"
3913b1253e8Schristos             "\n"
3923b1253e8Schristos             "#endif\n");
3933b1253e8Schristos 
3943b1253e8Schristos         /* write out braid tables for each value of N */
3953b1253e8Schristos         for (n = 1; n <= 6; n++) {
3963b1253e8Schristos             fprintf(out,
3973b1253e8Schristos             "\n"
3983b1253e8Schristos             "#if N == %d\n", n);
3993b1253e8Schristos 
4003b1253e8Schristos             /* compute braid tables for this N and 64-bit word_t */
4013b1253e8Schristos             braid(ltl, big, n, 8);
4023b1253e8Schristos 
4033b1253e8Schristos             /* write out braid tables for 64-bit z_word_t to crc32.h */
4043b1253e8Schristos             fprintf(out,
4053b1253e8Schristos             "\n"
4063b1253e8Schristos             "#if W == 8\n"
4073b1253e8Schristos             "\n"
4083b1253e8Schristos             "local const z_crc_t FAR crc_braid_table[][256] = {\n");
4093b1253e8Schristos             for (k = 0; k < 8; k++) {
4103b1253e8Schristos                 fprintf(out, "   {");
4113b1253e8Schristos                 write_table(out, ltl[k], 256);
4123b1253e8Schristos                 fprintf(out, "}%s", k < 7 ? ",\n" : "");
413aaf4ece6Schristos             }
4143b1253e8Schristos             fprintf(out,
4153b1253e8Schristos             "};\n"
4163b1253e8Schristos             "\n"
4173b1253e8Schristos             "local const z_word_t FAR crc_braid_big_table[][256] = {\n");
4183b1253e8Schristos             for (k = 0; k < 8; k++) {
4193b1253e8Schristos                 fprintf(out, "   {");
4203b1253e8Schristos                 write_table64(out, big[k], 256);
4213b1253e8Schristos                 fprintf(out, "}%s", k < 7 ? ",\n" : "");
4223b1253e8Schristos             }
4233b1253e8Schristos             fprintf(out,
4243b1253e8Schristos             "};\n");
4253b1253e8Schristos 
4263b1253e8Schristos             /* compute braid tables for this N and 32-bit word_t */
4273b1253e8Schristos             braid(ltl, big, n, 4);
4283b1253e8Schristos 
4293b1253e8Schristos             /* write out braid tables for 32-bit z_word_t to crc32.h */
4303b1253e8Schristos             fprintf(out,
4313b1253e8Schristos             "\n"
4323b1253e8Schristos             "#else /* W == 4 */\n"
4333b1253e8Schristos             "\n"
4343b1253e8Schristos             "local const z_crc_t FAR crc_braid_table[][256] = {\n");
4353b1253e8Schristos             for (k = 0; k < 4; k++) {
4363b1253e8Schristos                 fprintf(out, "   {");
4373b1253e8Schristos                 write_table(out, ltl[k], 256);
4383b1253e8Schristos                 fprintf(out, "}%s", k < 3 ? ",\n" : "");
4393b1253e8Schristos             }
4403b1253e8Schristos             fprintf(out,
4413b1253e8Schristos             "};\n"
4423b1253e8Schristos             "\n"
4433b1253e8Schristos             "local const z_word_t FAR crc_braid_big_table[][256] = {\n");
4443b1253e8Schristos             for (k = 0; k < 4; k++) {
4453b1253e8Schristos                 fprintf(out, "   {");
4463b1253e8Schristos                 write_table32hi(out, big[k], 256);
4473b1253e8Schristos                 fprintf(out, "}%s", k < 3 ? ",\n" : "");
4483b1253e8Schristos             }
4493b1253e8Schristos             fprintf(out,
4503b1253e8Schristos             "};\n"
4513b1253e8Schristos             "\n"
4523b1253e8Schristos             "#endif\n"
4533b1253e8Schristos             "\n"
4543b1253e8Schristos             "#endif\n");
4553b1253e8Schristos         }
4563b1253e8Schristos         fprintf(out,
4573b1253e8Schristos             "\n"
4583b1253e8Schristos             "#endif\n");
4593b1253e8Schristos 
4603b1253e8Schristos         /* write out zeros operator table to crc32.h */
4613b1253e8Schristos         fprintf(out,
4623b1253e8Schristos             "\n"
4633b1253e8Schristos             "local const z_crc_t FAR x2n_table[] = {\n"
4643b1253e8Schristos             "    ");
4653b1253e8Schristos         write_table(out, x2n_table, 32);
4663b1253e8Schristos         fprintf(out,
4673b1253e8Schristos             "};\n");
468aaf4ece6Schristos         fclose(out);
469aaf4ece6Schristos     }
470aaf4ece6Schristos #endif /* MAKECRCH */
471aaf4ece6Schristos }
472aaf4ece6Schristos 
473aaf4ece6Schristos #ifdef MAKECRCH
4743b1253e8Schristos 
4753b1253e8Schristos /*
4763b1253e8Schristos    Write the 32-bit values in table[0..k-1] to out, five per line in
4773b1253e8Schristos    hexadecimal separated by commas.
4783b1253e8Schristos  */
479*96c32821Schristos local void write_table(FILE *out, const z_crc_t FAR *table, int k) {
480aaf4ece6Schristos     int n;
481aaf4ece6Schristos 
4823b1253e8Schristos     for (n = 0; n < k; n++)
4833b1253e8Schristos         fprintf(out, "%s0x%08lx%s", n == 0 || n % 5 ? "" : "    ",
4846db8c6e9Schristos                 (unsigned long)(table[n]),
4853b1253e8Schristos                 n == k - 1 ? "" : (n % 5 == 4 ? ",\n" : ", "));
486aaf4ece6Schristos }
4873b1253e8Schristos 
4883b1253e8Schristos /*
4893b1253e8Schristos    Write the high 32-bits of each value in table[0..k-1] to out, five per line
4903b1253e8Schristos    in hexadecimal separated by commas.
4913b1253e8Schristos  */
492*96c32821Schristos local void write_table32hi(FILE *out, const z_word_t FAR *table, int k) {
4933b1253e8Schristos     int n;
4943b1253e8Schristos 
4953b1253e8Schristos     for (n = 0; n < k; n++)
4963b1253e8Schristos         fprintf(out, "%s0x%08lx%s", n == 0 || n % 5 ? "" : "    ",
4973b1253e8Schristos                 (unsigned long)(table[n] >> 32),
4983b1253e8Schristos                 n == k - 1 ? "" : (n % 5 == 4 ? ",\n" : ", "));
4993b1253e8Schristos }
5003b1253e8Schristos 
5013b1253e8Schristos /*
5023b1253e8Schristos   Write the 64-bit values in table[0..k-1] to out, three per line in
5033b1253e8Schristos   hexadecimal separated by commas. This assumes that if there is a 64-bit
5043b1253e8Schristos   type, then there is also a long long integer type, and it is at least 64
5053b1253e8Schristos   bits. If not, then the type cast and format string can be adjusted
5063b1253e8Schristos   accordingly.
5073b1253e8Schristos  */
508*96c32821Schristos local void write_table64(FILE *out, const z_word_t FAR *table, int k) {
5093b1253e8Schristos     int n;
5103b1253e8Schristos 
5113b1253e8Schristos     for (n = 0; n < k; n++)
5123b1253e8Schristos         fprintf(out, "%s0x%016llx%s", n == 0 || n % 3 ? "" : "    ",
5133b1253e8Schristos                 (unsigned long long)(table[n]),
5143b1253e8Schristos                 n == k - 1 ? "" : (n % 3 == 2 ? ",\n" : ", "));
5153b1253e8Schristos }
5163b1253e8Schristos 
5173b1253e8Schristos /* Actually do the deed. */
518*96c32821Schristos int main(void) {
5193b1253e8Schristos     make_crc_table();
5203b1253e8Schristos     return 0;
5213b1253e8Schristos }
5223b1253e8Schristos 
523aaf4ece6Schristos #endif /* MAKECRCH */
524aaf4ece6Schristos 
5253b1253e8Schristos #ifdef W
5263b1253e8Schristos /*
5273b1253e8Schristos   Generate the little and big-endian braid tables for the given n and z_word_t
5283b1253e8Schristos   size w. Each array must have room for w blocks of 256 elements.
5293b1253e8Schristos  */
530*96c32821Schristos local void braid(z_crc_t ltl[][256], z_word_t big[][256], int n, int w) {
5313b1253e8Schristos     int k;
5323b1253e8Schristos     z_crc_t i, p, q;
5333b1253e8Schristos     for (k = 0; k < w; k++) {
5343b1253e8Schristos         p = x2nmodp((n * w + 3 - k) << 3, 0);
5353b1253e8Schristos         ltl[k][0] = 0;
5363b1253e8Schristos         big[w - 1 - k][0] = 0;
5373b1253e8Schristos         for (i = 1; i < 256; i++) {
5383b1253e8Schristos             ltl[k][i] = q = multmodp(i << 24, p);
5393b1253e8Schristos             big[w - 1 - k][i] = byte_swap(q);
5403b1253e8Schristos         }
5413b1253e8Schristos     }
5423b1253e8Schristos }
5433b1253e8Schristos #endif
5443b1253e8Schristos 
545aaf4ece6Schristos #endif /* DYNAMIC_CRC_TABLE */
546aaf4ece6Schristos 
547aaf4ece6Schristos /* =========================================================================
5483b1253e8Schristos  * This function can be used by asm versions of crc32(), and to force the
5493b1253e8Schristos  * generation of the CRC tables in a threaded application.
550aaf4ece6Schristos  */
551*96c32821Schristos const z_crc_t FAR * ZEXPORT get_crc_table(void) {
552aaf4ece6Schristos #ifdef DYNAMIC_CRC_TABLE
5533b1253e8Schristos     once(&made, make_crc_table);
554aaf4ece6Schristos #endif /* DYNAMIC_CRC_TABLE */
5556db8c6e9Schristos     return (const z_crc_t FAR *)crc_table;
556aaf4ece6Schristos }
557aaf4ece6Schristos 
5583b1253e8Schristos /* =========================================================================
5593b1253e8Schristos  * Use ARM machine instructions if available. This will compute the CRC about
5603b1253e8Schristos  * ten times faster than the braided calculation. This code does not check for
5613b1253e8Schristos  * the presence of the CRC instruction at run time. __ARM_FEATURE_CRC32 will
5623b1253e8Schristos  * only be defined if the compilation specifies an ARM processor architecture
5633b1253e8Schristos  * that has the instructions. For example, compiling with -march=armv8.1-a or
5643b1253e8Schristos  * -march=armv8-a+crc, or -march=native if the compile machine has the crc32
5653b1253e8Schristos  * instructions.
5663b1253e8Schristos  */
5673b1253e8Schristos #ifdef ARMCRC32
5683b1253e8Schristos 
5693b1253e8Schristos /*
5703b1253e8Schristos    Constants empirically determined to maximize speed. These values are from
5713b1253e8Schristos    measurements on a Cortex-A57. Your mileage may vary.
5723b1253e8Schristos  */
5733b1253e8Schristos #define Z_BATCH 3990                /* number of words in a batch */
5743b1253e8Schristos #define Z_BATCH_ZEROS 0xa10d3d0c    /* computed from Z_BATCH = 3990 */
5753b1253e8Schristos #define Z_BATCH_MIN 800             /* fewest words in a final batch */
5763b1253e8Schristos 
577*96c32821Schristos unsigned long ZEXPORT crc32_z(unsigned long crc, const unsigned char FAR *buf,
578*96c32821Schristos                               z_size_t len) {
5793b1253e8Schristos     z_crc_t val;
5803b1253e8Schristos     z_word_t crc1, crc2;
5813b1253e8Schristos     const z_word_t *word;
5823b1253e8Schristos     z_word_t val0, val1, val2;
5833b1253e8Schristos     z_size_t last, last2, i;
5843b1253e8Schristos     z_size_t num;
5853b1253e8Schristos 
5863b1253e8Schristos     /* Return initial CRC, if requested. */
5873b1253e8Schristos     if (buf == Z_NULL) return 0;
5883b1253e8Schristos 
5893b1253e8Schristos #ifdef DYNAMIC_CRC_TABLE
5903b1253e8Schristos     once(&made, make_crc_table);
5913b1253e8Schristos #endif /* DYNAMIC_CRC_TABLE */
5923b1253e8Schristos 
5933b1253e8Schristos     /* Pre-condition the CRC */
5943b1253e8Schristos     crc = (~crc) & 0xffffffff;
5953b1253e8Schristos 
5963b1253e8Schristos     /* Compute the CRC up to a word boundary. */
5973b1253e8Schristos     while (len && ((z_size_t)buf & 7) != 0) {
5983b1253e8Schristos         len--;
5993b1253e8Schristos         val = *buf++;
6003b1253e8Schristos         __asm__ volatile("crc32b %w0, %w0, %w1" : "+r"(crc) : "r"(val));
6013b1253e8Schristos     }
6023b1253e8Schristos 
6033b1253e8Schristos     /* Prepare to compute the CRC on full 64-bit words word[0..num-1]. */
6043b1253e8Schristos     word = (z_word_t const *)buf;
6053b1253e8Schristos     num = len >> 3;
6063b1253e8Schristos     len &= 7;
6073b1253e8Schristos 
6083b1253e8Schristos     /* Do three interleaved CRCs to realize the throughput of one crc32x
6093b1253e8Schristos        instruction per cycle. Each CRC is calculated on Z_BATCH words. The
6103b1253e8Schristos        three CRCs are combined into a single CRC after each set of batches. */
6113b1253e8Schristos     while (num >= 3 * Z_BATCH) {
6123b1253e8Schristos         crc1 = 0;
6133b1253e8Schristos         crc2 = 0;
6143b1253e8Schristos         for (i = 0; i < Z_BATCH; i++) {
6153b1253e8Schristos             val0 = word[i];
6163b1253e8Schristos             val1 = word[i + Z_BATCH];
6173b1253e8Schristos             val2 = word[i + 2 * Z_BATCH];
6183b1253e8Schristos             __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc) : "r"(val0));
6193b1253e8Schristos             __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc1) : "r"(val1));
6203b1253e8Schristos             __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc2) : "r"(val2));
6213b1253e8Schristos         }
6223b1253e8Schristos         word += 3 * Z_BATCH;
6233b1253e8Schristos         num -= 3 * Z_BATCH;
6243b1253e8Schristos         crc = multmodp(Z_BATCH_ZEROS, crc) ^ crc1;
6253b1253e8Schristos         crc = multmodp(Z_BATCH_ZEROS, crc) ^ crc2;
6263b1253e8Schristos     }
6273b1253e8Schristos 
6283b1253e8Schristos     /* Do one last smaller batch with the remaining words, if there are enough
6293b1253e8Schristos        to pay for the combination of CRCs. */
6303b1253e8Schristos     last = num / 3;
6313b1253e8Schristos     if (last >= Z_BATCH_MIN) {
6323b1253e8Schristos         last2 = last << 1;
6333b1253e8Schristos         crc1 = 0;
6343b1253e8Schristos         crc2 = 0;
6353b1253e8Schristos         for (i = 0; i < last; i++) {
6363b1253e8Schristos             val0 = word[i];
6373b1253e8Schristos             val1 = word[i + last];
6383b1253e8Schristos             val2 = word[i + last2];
6393b1253e8Schristos             __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc) : "r"(val0));
6403b1253e8Schristos             __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc1) : "r"(val1));
6413b1253e8Schristos             __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc2) : "r"(val2));
6423b1253e8Schristos         }
6433b1253e8Schristos         word += 3 * last;
6443b1253e8Schristos         num -= 3 * last;
6453b1253e8Schristos         val = x2nmodp(last, 6);
6463b1253e8Schristos         crc = multmodp(val, crc) ^ crc1;
6473b1253e8Schristos         crc = multmodp(val, crc) ^ crc2;
6483b1253e8Schristos     }
6493b1253e8Schristos 
6503b1253e8Schristos     /* Compute the CRC on any remaining words. */
6513b1253e8Schristos     for (i = 0; i < num; i++) {
6523b1253e8Schristos         val0 = word[i];
6533b1253e8Schristos         __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc) : "r"(val0));
6543b1253e8Schristos     }
6553b1253e8Schristos     word += num;
6563b1253e8Schristos 
6573b1253e8Schristos     /* Complete the CRC on any remaining bytes. */
6583b1253e8Schristos     buf = (const unsigned char FAR *)word;
6593b1253e8Schristos     while (len) {
6603b1253e8Schristos         len--;
6613b1253e8Schristos         val = *buf++;
6623b1253e8Schristos         __asm__ volatile("crc32b %w0, %w0, %w1" : "+r"(crc) : "r"(val));
6633b1253e8Schristos     }
6643b1253e8Schristos 
6653b1253e8Schristos     /* Return the CRC, post-conditioned. */
6663b1253e8Schristos     return crc ^ 0xffffffff;
6673b1253e8Schristos }
6683b1253e8Schristos 
6693b1253e8Schristos #else
6703b1253e8Schristos 
6713b1253e8Schristos #ifdef W
6723b1253e8Schristos 
6733b1253e8Schristos /*
6743b1253e8Schristos   Return the CRC of the W bytes in the word_t data, taking the
6753b1253e8Schristos   least-significant byte of the word as the first byte of data, without any pre
6763b1253e8Schristos   or post conditioning. This is used to combine the CRCs of each braid.
6773b1253e8Schristos  */
678*96c32821Schristos local z_crc_t crc_word(z_word_t data) {
6793b1253e8Schristos     int k;
6803b1253e8Schristos     for (k = 0; k < W; k++)
6813b1253e8Schristos         data = (data >> 8) ^ crc_table[data & 0xff];
6823b1253e8Schristos     return (z_crc_t)data;
6833b1253e8Schristos }
6843b1253e8Schristos 
685*96c32821Schristos local z_word_t crc_word_big(z_word_t data) {
6863b1253e8Schristos     int k;
6873b1253e8Schristos     for (k = 0; k < W; k++)
6883b1253e8Schristos         data = (data << 8) ^
6893b1253e8Schristos             crc_big_table[(data >> ((W - 1) << 3)) & 0xff];
6903b1253e8Schristos     return data;
6913b1253e8Schristos }
6923b1253e8Schristos 
6933b1253e8Schristos #endif
694aaf4ece6Schristos 
695aaf4ece6Schristos /* ========================================================================= */
696*96c32821Schristos unsigned long ZEXPORT crc32_z(unsigned long crc, const unsigned char FAR *buf,
697*96c32821Schristos                               z_size_t len) {
6983b1253e8Schristos     /* Return initial CRC, if requested. */
6993b1253e8Schristos     if (buf == Z_NULL) return 0;
700aaf4ece6Schristos 
701aaf4ece6Schristos #ifdef DYNAMIC_CRC_TABLE
7023b1253e8Schristos     once(&made, make_crc_table);
703aaf4ece6Schristos #endif /* DYNAMIC_CRC_TABLE */
704aaf4ece6Schristos 
7053b1253e8Schristos     /* Pre-condition the CRC */
7063b1253e8Schristos     crc = (~crc) & 0xffffffff;
707aaf4ece6Schristos 
7083b1253e8Schristos #ifdef W
7093b1253e8Schristos 
7103b1253e8Schristos     /* If provided enough bytes, do a braided CRC calculation. */
7113b1253e8Schristos     if (len >= N * W + W - 1) {
7123b1253e8Schristos         z_size_t blks;
7133b1253e8Schristos         z_word_t const *words;
7143b1253e8Schristos         unsigned endian;
7153b1253e8Schristos         int k;
7163b1253e8Schristos 
7173b1253e8Schristos         /* Compute the CRC up to a z_word_t boundary. */
7183b1253e8Schristos         while (len && ((z_size_t)buf & (W - 1)) != 0) {
7193b1253e8Schristos             len--;
7203b1253e8Schristos             crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
7213b1253e8Schristos         }
7223b1253e8Schristos 
7233b1253e8Schristos         /* Compute the CRC on as many N z_word_t blocks as are available. */
7243b1253e8Schristos         blks = len / (N * W);
7253b1253e8Schristos         len -= blks * N * W;
7263b1253e8Schristos         words = (z_word_t const *)buf;
7273b1253e8Schristos 
7283b1253e8Schristos         /* Do endian check at execution time instead of compile time, since ARM
729*96c32821Schristos            processors can change the endianness at execution time. If the
730*96c32821Schristos            compiler knows what the endianness will be, it can optimize out the
7313b1253e8Schristos            check and the unused branch. */
732aaf4ece6Schristos         endian = 1;
7333b1253e8Schristos         if (*(unsigned char *)&endian) {
7343b1253e8Schristos             /* Little endian. */
7353b1253e8Schristos 
7363b1253e8Schristos             z_crc_t crc0;
7373b1253e8Schristos             z_word_t word0;
7383b1253e8Schristos #if N > 1
7393b1253e8Schristos             z_crc_t crc1;
7403b1253e8Schristos             z_word_t word1;
7413b1253e8Schristos #if N > 2
7423b1253e8Schristos             z_crc_t crc2;
7433b1253e8Schristos             z_word_t word2;
7443b1253e8Schristos #if N > 3
7453b1253e8Schristos             z_crc_t crc3;
7463b1253e8Schristos             z_word_t word3;
7473b1253e8Schristos #if N > 4
7483b1253e8Schristos             z_crc_t crc4;
7493b1253e8Schristos             z_word_t word4;
7503b1253e8Schristos #if N > 5
7513b1253e8Schristos             z_crc_t crc5;
7523b1253e8Schristos             z_word_t word5;
7533b1253e8Schristos #endif
7543b1253e8Schristos #endif
7553b1253e8Schristos #endif
7563b1253e8Schristos #endif
7573b1253e8Schristos #endif
7583b1253e8Schristos 
7593b1253e8Schristos             /* Initialize the CRC for each braid. */
7603b1253e8Schristos             crc0 = crc;
7613b1253e8Schristos #if N > 1
7623b1253e8Schristos             crc1 = 0;
7633b1253e8Schristos #if N > 2
7643b1253e8Schristos             crc2 = 0;
7653b1253e8Schristos #if N > 3
7663b1253e8Schristos             crc3 = 0;
7673b1253e8Schristos #if N > 4
7683b1253e8Schristos             crc4 = 0;
7693b1253e8Schristos #if N > 5
7703b1253e8Schristos             crc5 = 0;
7713b1253e8Schristos #endif
7723b1253e8Schristos #endif
7733b1253e8Schristos #endif
7743b1253e8Schristos #endif
7753b1253e8Schristos #endif
7763b1253e8Schristos 
7773b1253e8Schristos             /*
7783b1253e8Schristos               Process the first blks-1 blocks, computing the CRCs on each braid
7793b1253e8Schristos               independently.
7803b1253e8Schristos              */
7813b1253e8Schristos             while (--blks) {
7823b1253e8Schristos                 /* Load the word for each braid into registers. */
7833b1253e8Schristos                 word0 = crc0 ^ words[0];
7843b1253e8Schristos #if N > 1
7853b1253e8Schristos                 word1 = crc1 ^ words[1];
7863b1253e8Schristos #if N > 2
7873b1253e8Schristos                 word2 = crc2 ^ words[2];
7883b1253e8Schristos #if N > 3
7893b1253e8Schristos                 word3 = crc3 ^ words[3];
7903b1253e8Schristos #if N > 4
7913b1253e8Schristos                 word4 = crc4 ^ words[4];
7923b1253e8Schristos #if N > 5
7933b1253e8Schristos                 word5 = crc5 ^ words[5];
7943b1253e8Schristos #endif
7953b1253e8Schristos #endif
7963b1253e8Schristos #endif
7973b1253e8Schristos #endif
7983b1253e8Schristos #endif
7993b1253e8Schristos                 words += N;
8003b1253e8Schristos 
8013b1253e8Schristos                 /* Compute and update the CRC for each word. The loop should
8023b1253e8Schristos                    get unrolled. */
8033b1253e8Schristos                 crc0 = crc_braid_table[0][word0 & 0xff];
8043b1253e8Schristos #if N > 1
8053b1253e8Schristos                 crc1 = crc_braid_table[0][word1 & 0xff];
8063b1253e8Schristos #if N > 2
8073b1253e8Schristos                 crc2 = crc_braid_table[0][word2 & 0xff];
8083b1253e8Schristos #if N > 3
8093b1253e8Schristos                 crc3 = crc_braid_table[0][word3 & 0xff];
8103b1253e8Schristos #if N > 4
8113b1253e8Schristos                 crc4 = crc_braid_table[0][word4 & 0xff];
8123b1253e8Schristos #if N > 5
8133b1253e8Schristos                 crc5 = crc_braid_table[0][word5 & 0xff];
8143b1253e8Schristos #endif
8153b1253e8Schristos #endif
8163b1253e8Schristos #endif
8173b1253e8Schristos #endif
8183b1253e8Schristos #endif
8193b1253e8Schristos                 for (k = 1; k < W; k++) {
8203b1253e8Schristos                     crc0 ^= crc_braid_table[k][(word0 >> (k << 3)) & 0xff];
8213b1253e8Schristos #if N > 1
8223b1253e8Schristos                     crc1 ^= crc_braid_table[k][(word1 >> (k << 3)) & 0xff];
8233b1253e8Schristos #if N > 2
8243b1253e8Schristos                     crc2 ^= crc_braid_table[k][(word2 >> (k << 3)) & 0xff];
8253b1253e8Schristos #if N > 3
8263b1253e8Schristos                     crc3 ^= crc_braid_table[k][(word3 >> (k << 3)) & 0xff];
8273b1253e8Schristos #if N > 4
8283b1253e8Schristos                     crc4 ^= crc_braid_table[k][(word4 >> (k << 3)) & 0xff];
8293b1253e8Schristos #if N > 5
8303b1253e8Schristos                     crc5 ^= crc_braid_table[k][(word5 >> (k << 3)) & 0xff];
8313b1253e8Schristos #endif
8323b1253e8Schristos #endif
8333b1253e8Schristos #endif
8343b1253e8Schristos #endif
8353b1253e8Schristos #endif
836aaf4ece6Schristos                 }
8373b1253e8Schristos             }
8383b1253e8Schristos 
8393b1253e8Schristos             /*
8403b1253e8Schristos               Process the last block, combining the CRCs of the N braids at the
8413b1253e8Schristos               same time.
8423b1253e8Schristos              */
8433b1253e8Schristos             crc = crc_word(crc0 ^ words[0]);
8443b1253e8Schristos #if N > 1
8453b1253e8Schristos             crc = crc_word(crc1 ^ words[1] ^ crc);
8463b1253e8Schristos #if N > 2
8473b1253e8Schristos             crc = crc_word(crc2 ^ words[2] ^ crc);
8483b1253e8Schristos #if N > 3
8493b1253e8Schristos             crc = crc_word(crc3 ^ words[3] ^ crc);
8503b1253e8Schristos #if N > 4
8513b1253e8Schristos             crc = crc_word(crc4 ^ words[4] ^ crc);
8523b1253e8Schristos #if N > 5
8533b1253e8Schristos             crc = crc_word(crc5 ^ words[5] ^ crc);
8543b1253e8Schristos #endif
8553b1253e8Schristos #endif
8563b1253e8Schristos #endif
8573b1253e8Schristos #endif
8583b1253e8Schristos #endif
8593b1253e8Schristos             words += N;
8603b1253e8Schristos         }
8613b1253e8Schristos         else {
8623b1253e8Schristos             /* Big endian. */
8633b1253e8Schristos 
8643b1253e8Schristos             z_word_t crc0, word0, comb;
8653b1253e8Schristos #if N > 1
8663b1253e8Schristos             z_word_t crc1, word1;
8673b1253e8Schristos #if N > 2
8683b1253e8Schristos             z_word_t crc2, word2;
8693b1253e8Schristos #if N > 3
8703b1253e8Schristos             z_word_t crc3, word3;
8713b1253e8Schristos #if N > 4
8723b1253e8Schristos             z_word_t crc4, word4;
8733b1253e8Schristos #if N > 5
8743b1253e8Schristos             z_word_t crc5, word5;
8753b1253e8Schristos #endif
8763b1253e8Schristos #endif
8773b1253e8Schristos #endif
8783b1253e8Schristos #endif
8793b1253e8Schristos #endif
8803b1253e8Schristos 
8813b1253e8Schristos             /* Initialize the CRC for each braid. */
8823b1253e8Schristos             crc0 = byte_swap(crc);
8833b1253e8Schristos #if N > 1
8843b1253e8Schristos             crc1 = 0;
8853b1253e8Schristos #if N > 2
8863b1253e8Schristos             crc2 = 0;
8873b1253e8Schristos #if N > 3
8883b1253e8Schristos             crc3 = 0;
8893b1253e8Schristos #if N > 4
8903b1253e8Schristos             crc4 = 0;
8913b1253e8Schristos #if N > 5
8923b1253e8Schristos             crc5 = 0;
8933b1253e8Schristos #endif
8943b1253e8Schristos #endif
8953b1253e8Schristos #endif
8963b1253e8Schristos #endif
8973b1253e8Schristos #endif
8983b1253e8Schristos 
8993b1253e8Schristos             /*
9003b1253e8Schristos               Process the first blks-1 blocks, computing the CRCs on each braid
9013b1253e8Schristos               independently.
9023b1253e8Schristos              */
9033b1253e8Schristos             while (--blks) {
9043b1253e8Schristos                 /* Load the word for each braid into registers. */
9053b1253e8Schristos                 word0 = crc0 ^ words[0];
9063b1253e8Schristos #if N > 1
9073b1253e8Schristos                 word1 = crc1 ^ words[1];
9083b1253e8Schristos #if N > 2
9093b1253e8Schristos                 word2 = crc2 ^ words[2];
9103b1253e8Schristos #if N > 3
9113b1253e8Schristos                 word3 = crc3 ^ words[3];
9123b1253e8Schristos #if N > 4
9133b1253e8Schristos                 word4 = crc4 ^ words[4];
9143b1253e8Schristos #if N > 5
9153b1253e8Schristos                 word5 = crc5 ^ words[5];
9163b1253e8Schristos #endif
9173b1253e8Schristos #endif
9183b1253e8Schristos #endif
9193b1253e8Schristos #endif
9203b1253e8Schristos #endif
9213b1253e8Schristos                 words += N;
9223b1253e8Schristos 
9233b1253e8Schristos                 /* Compute and update the CRC for each word. The loop should
9243b1253e8Schristos                    get unrolled. */
9253b1253e8Schristos                 crc0 = crc_braid_big_table[0][word0 & 0xff];
9263b1253e8Schristos #if N > 1
9273b1253e8Schristos                 crc1 = crc_braid_big_table[0][word1 & 0xff];
9283b1253e8Schristos #if N > 2
9293b1253e8Schristos                 crc2 = crc_braid_big_table[0][word2 & 0xff];
9303b1253e8Schristos #if N > 3
9313b1253e8Schristos                 crc3 = crc_braid_big_table[0][word3 & 0xff];
9323b1253e8Schristos #if N > 4
9333b1253e8Schristos                 crc4 = crc_braid_big_table[0][word4 & 0xff];
9343b1253e8Schristos #if N > 5
9353b1253e8Schristos                 crc5 = crc_braid_big_table[0][word5 & 0xff];
9363b1253e8Schristos #endif
9373b1253e8Schristos #endif
9383b1253e8Schristos #endif
9393b1253e8Schristos #endif
9403b1253e8Schristos #endif
9413b1253e8Schristos                 for (k = 1; k < W; k++) {
9423b1253e8Schristos                     crc0 ^= crc_braid_big_table[k][(word0 >> (k << 3)) & 0xff];
9433b1253e8Schristos #if N > 1
9443b1253e8Schristos                     crc1 ^= crc_braid_big_table[k][(word1 >> (k << 3)) & 0xff];
9453b1253e8Schristos #if N > 2
9463b1253e8Schristos                     crc2 ^= crc_braid_big_table[k][(word2 >> (k << 3)) & 0xff];
9473b1253e8Schristos #if N > 3
9483b1253e8Schristos                     crc3 ^= crc_braid_big_table[k][(word3 >> (k << 3)) & 0xff];
9493b1253e8Schristos #if N > 4
9503b1253e8Schristos                     crc4 ^= crc_braid_big_table[k][(word4 >> (k << 3)) & 0xff];
9513b1253e8Schristos #if N > 5
9523b1253e8Schristos                     crc5 ^= crc_braid_big_table[k][(word5 >> (k << 3)) & 0xff];
9533b1253e8Schristos #endif
9543b1253e8Schristos #endif
9553b1253e8Schristos #endif
9563b1253e8Schristos #endif
9573b1253e8Schristos #endif
9583b1253e8Schristos                 }
9593b1253e8Schristos             }
9603b1253e8Schristos 
9613b1253e8Schristos             /*
9623b1253e8Schristos               Process the last block, combining the CRCs of the N braids at the
9633b1253e8Schristos               same time.
9643b1253e8Schristos              */
9653b1253e8Schristos             comb = crc_word_big(crc0 ^ words[0]);
9663b1253e8Schristos #if N > 1
9673b1253e8Schristos             comb = crc_word_big(crc1 ^ words[1] ^ comb);
9683b1253e8Schristos #if N > 2
9693b1253e8Schristos             comb = crc_word_big(crc2 ^ words[2] ^ comb);
9703b1253e8Schristos #if N > 3
9713b1253e8Schristos             comb = crc_word_big(crc3 ^ words[3] ^ comb);
9723b1253e8Schristos #if N > 4
9733b1253e8Schristos             comb = crc_word_big(crc4 ^ words[4] ^ comb);
9743b1253e8Schristos #if N > 5
9753b1253e8Schristos             comb = crc_word_big(crc5 ^ words[5] ^ comb);
9763b1253e8Schristos #endif
9773b1253e8Schristos #endif
9783b1253e8Schristos #endif
9793b1253e8Schristos #endif
9803b1253e8Schristos #endif
9813b1253e8Schristos             words += N;
9823b1253e8Schristos             crc = byte_swap(comb);
9833b1253e8Schristos         }
9843b1253e8Schristos 
9853b1253e8Schristos         /*
9863b1253e8Schristos           Update the pointer to the remaining bytes to process.
9873b1253e8Schristos          */
9883b1253e8Schristos         buf = (unsigned char const *)words;
9893b1253e8Schristos     }
9903b1253e8Schristos 
9913b1253e8Schristos #endif /* W */
9923b1253e8Schristos 
9933b1253e8Schristos     /* Complete the computation of the CRC on any remaining bytes. */
994aaf4ece6Schristos     while (len >= 8) {
995aaf4ece6Schristos         len -= 8;
9963b1253e8Schristos         crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
9973b1253e8Schristos         crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
9983b1253e8Schristos         crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
9993b1253e8Schristos         crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
10003b1253e8Schristos         crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
10013b1253e8Schristos         crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
10023b1253e8Schristos         crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
10033b1253e8Schristos         crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
1004aaf4ece6Schristos     }
10053b1253e8Schristos     while (len) {
10063b1253e8Schristos         len--;
10073b1253e8Schristos         crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
1008aaf4ece6Schristos     }
1009aaf4ece6Schristos 
10103b1253e8Schristos     /* Return the CRC, post-conditioned. */
10113b1253e8Schristos     return crc ^ 0xffffffff;
10123b1253e8Schristos }
10133b1253e8Schristos 
10143b1253e8Schristos #endif
10153b1253e8Schristos 
10166db8c6e9Schristos /* ========================================================================= */
1017*96c32821Schristos unsigned long ZEXPORT crc32(unsigned long crc, const unsigned char FAR *buf,
1018*96c32821Schristos                             uInt len) {
10196db8c6e9Schristos     return crc32_z(crc, buf, len);
10206db8c6e9Schristos }
10216db8c6e9Schristos 
1022aaf4ece6Schristos /* ========================================================================= */
1023*96c32821Schristos uLong ZEXPORT crc32_combine64(uLong crc1, uLong crc2, z_off64_t len2) {
10243b1253e8Schristos #ifdef DYNAMIC_CRC_TABLE
10253b1253e8Schristos     once(&made, make_crc_table);
10263b1253e8Schristos #endif /* DYNAMIC_CRC_TABLE */
10273b1253e8Schristos     return multmodp(x2nmodp(len2, 3), crc1) ^ (crc2 & 0xffffffff);
1028aaf4ece6Schristos }
10296db8c6e9Schristos 
10306db8c6e9Schristos /* ========================================================================= */
1031*96c32821Schristos uLong ZEXPORT crc32_combine(uLong crc1, uLong crc2, z_off_t len2) {
10323b1253e8Schristos     return crc32_combine64(crc1, crc2, (z_off64_t)len2);
10336db8c6e9Schristos }
10346db8c6e9Schristos 
10353b1253e8Schristos /* ========================================================================= */
1036*96c32821Schristos uLong ZEXPORT crc32_combine_gen64(z_off64_t len2) {
10373b1253e8Schristos #ifdef DYNAMIC_CRC_TABLE
10383b1253e8Schristos     once(&made, make_crc_table);
10393b1253e8Schristos #endif /* DYNAMIC_CRC_TABLE */
10403b1253e8Schristos     return x2nmodp(len2, 3);
10413b1253e8Schristos }
10423b1253e8Schristos 
10433b1253e8Schristos /* ========================================================================= */
1044*96c32821Schristos uLong ZEXPORT crc32_combine_gen(z_off_t len2) {
10453b1253e8Schristos     return crc32_combine_gen64((z_off64_t)len2);
10463b1253e8Schristos }
10473b1253e8Schristos 
10483b1253e8Schristos /* ========================================================================= */
1049*96c32821Schristos uLong ZEXPORT crc32_combine_op(uLong crc1, uLong crc2, uLong op) {
10503b1253e8Schristos     return multmodp(op, crc1) ^ (crc2 & 0xffffffff);
10516db8c6e9Schristos }
1052