1*96c32821Schristos /* $NetBSD: crc32.c,v 1.7 2024/09/22 19:12:27 christos Exp $ */ 2aaf4ece6Schristos 3aaf4ece6Schristos /* crc32.c -- compute the CRC-32 of a data stream 43b1253e8Schristos * Copyright (C) 1995-2022 Mark Adler 5aaf4ece6Schristos * For conditions of distribution and use, see copyright notice in zlib.h 6aaf4ece6Schristos * 73b1253e8Schristos * This interleaved implementation of a CRC makes use of pipelined multiple 83b1253e8Schristos * arithmetic-logic units, commonly found in modern CPU cores. It is due to 93b1253e8Schristos * Kadatch and Jenkins (2010). See doc/crc-doc.1.0.pdf in this distribution. 10aaf4ece6Schristos */ 11aaf4ece6Schristos 123b1253e8Schristos /* @(#) Id */ 13aaf4ece6Schristos 14aaf4ece6Schristos /* 15aaf4ece6Schristos Note on the use of DYNAMIC_CRC_TABLE: there is no mutex or semaphore 16aaf4ece6Schristos protection on the static variables used to control the first-use generation 17aaf4ece6Schristos of the crc tables. Therefore, if you #define DYNAMIC_CRC_TABLE, you should 18aaf4ece6Schristos first call get_crc_table() to initialize the tables before allowing more than 19aaf4ece6Schristos one thread to use crc32(). 206db8c6e9Schristos 213b1253e8Schristos MAKECRCH can be #defined to write out crc32.h. A main() routine is also 223b1253e8Schristos produced, so that this one source file can be compiled to an executable. 23aaf4ece6Schristos */ 24aaf4ece6Schristos 25aaf4ece6Schristos #ifdef MAKECRCH 26aaf4ece6Schristos # include <stdio.h> 27aaf4ece6Schristos # ifndef DYNAMIC_CRC_TABLE 28aaf4ece6Schristos # define DYNAMIC_CRC_TABLE 29aaf4ece6Schristos # endif /* !DYNAMIC_CRC_TABLE */ 30aaf4ece6Schristos #endif /* MAKECRCH */ 31aaf4ece6Schristos 323b1253e8Schristos #include "zutil.h" /* for Z_U4, Z_U8, z_crc_t, and FAR definitions */ 33aaf4ece6Schristos 343b1253e8Schristos /* 353b1253e8Schristos A CRC of a message is computed on N braids of words in the message, where 363b1253e8Schristos each word consists of W bytes (4 or 8). If N is 3, for example, then three 373b1253e8Schristos running sparse CRCs are calculated respectively on each braid, at these 383b1253e8Schristos indices in the array of words: 0, 3, 6, ..., 1, 4, 7, ..., and 2, 5, 8, ... 393b1253e8Schristos This is done starting at a word boundary, and continues until as many blocks 403b1253e8Schristos of N * W bytes as are available have been processed. The results are combined 413b1253e8Schristos into a single CRC at the end. For this code, N must be in the range 1..6 and 423b1253e8Schristos W must be 4 or 8. The upper limit on N can be increased if desired by adding 433b1253e8Schristos more #if blocks, extending the patterns apparent in the code. In addition, 443b1253e8Schristos crc32.h would need to be regenerated, if the maximum N value is increased. 45aaf4ece6Schristos 463b1253e8Schristos N and W are chosen empirically by benchmarking the execution time on a given 473b1253e8Schristos processor. The choices for N and W below were based on testing on Intel Kaby 483b1253e8Schristos Lake i7, AMD Ryzen 7, ARM Cortex-A57, Sparc64-VII, PowerPC POWER9, and MIPS64 493b1253e8Schristos Octeon II processors. The Intel, AMD, and ARM processors were all fastest 503b1253e8Schristos with N=5, W=8. The Sparc, PowerPC, and MIPS64 were all fastest at N=5, W=4. 513b1253e8Schristos They were all tested with either gcc or clang, all using the -O3 optimization 523b1253e8Schristos level. Your mileage may vary. 533b1253e8Schristos */ 542fd2e12bSdsl 553b1253e8Schristos /* Define N */ 563b1253e8Schristos #ifdef Z_TESTN 573b1253e8Schristos # define N Z_TESTN 58aaf4ece6Schristos #else 593b1253e8Schristos # define N 5 603b1253e8Schristos #endif 613b1253e8Schristos #if N < 1 || N > 6 623b1253e8Schristos # error N must be in 1..6 633b1253e8Schristos #endif 64aaf4ece6Schristos 653b1253e8Schristos /* 663b1253e8Schristos z_crc_t must be at least 32 bits. z_word_t must be at least as long as 673b1253e8Schristos z_crc_t. It is assumed here that z_word_t is either 32 bits or 64 bits, and 683b1253e8Schristos that bytes are eight bits. 693b1253e8Schristos */ 706db8c6e9Schristos 713b1253e8Schristos /* 723b1253e8Schristos Define W and the associated z_word_t type. If W is not defined, then a 733b1253e8Schristos braided calculation is not used, and the associated tables and code are not 743b1253e8Schristos compiled. 753b1253e8Schristos */ 763b1253e8Schristos #ifdef Z_TESTW 773b1253e8Schristos # if Z_TESTW-1 != -1 783b1253e8Schristos # define W Z_TESTW 793b1253e8Schristos # endif 803b1253e8Schristos #else 813b1253e8Schristos # ifdef MAKECRCH 823b1253e8Schristos # define W 8 /* required for MAKECRCH */ 833b1253e8Schristos # else 843b1253e8Schristos # if defined(__x86_64__) || defined(__aarch64__) 853b1253e8Schristos # define W 8 863b1253e8Schristos # else 873b1253e8Schristos # define W 4 883b1253e8Schristos # endif 893b1253e8Schristos # endif 903b1253e8Schristos #endif 913b1253e8Schristos #ifdef W 923b1253e8Schristos # if W == 8 && defined(Z_U8) 933b1253e8Schristos typedef Z_U8 z_word_t; 943b1253e8Schristos # elif defined(Z_U4) 953b1253e8Schristos # undef W 963b1253e8Schristos # define W 4 973b1253e8Schristos typedef Z_U4 z_word_t; 983b1253e8Schristos # else 993b1253e8Schristos # undef W 1003b1253e8Schristos # endif 1013b1253e8Schristos #endif 1023b1253e8Schristos 1033b1253e8Schristos /* If available, use the ARM processor CRC32 instruction. */ 1043b1253e8Schristos #if defined(__aarch64__) && defined(__ARM_FEATURE_CRC32) && W == 8 1053b1253e8Schristos # define ARMCRC32 1063b1253e8Schristos #endif 1073b1253e8Schristos 1083b1253e8Schristos #if defined(W) && (!defined(ARMCRC32) || defined(DYNAMIC_CRC_TABLE)) 1093b1253e8Schristos /* 1103b1253e8Schristos Swap the bytes in a z_word_t to convert between little and big endian. Any 1113b1253e8Schristos self-respecting compiler will optimize this to a single machine byte-swap 1123b1253e8Schristos instruction, if one is available. This assumes that word_t is either 32 bits 1133b1253e8Schristos or 64 bits. 1143b1253e8Schristos */ 115*96c32821Schristos local z_word_t byte_swap(z_word_t word) { 1163b1253e8Schristos # if W == 8 1173b1253e8Schristos return 1183b1253e8Schristos (word & 0xff00000000000000) >> 56 | 1193b1253e8Schristos (word & 0xff000000000000) >> 40 | 1203b1253e8Schristos (word & 0xff0000000000) >> 24 | 1213b1253e8Schristos (word & 0xff00000000) >> 8 | 1223b1253e8Schristos (word & 0xff000000) << 8 | 1233b1253e8Schristos (word & 0xff0000) << 24 | 1243b1253e8Schristos (word & 0xff00) << 40 | 1253b1253e8Schristos (word & 0xff) << 56; 1263b1253e8Schristos # else /* W == 4 */ 1273b1253e8Schristos return 1283b1253e8Schristos (word & 0xff000000) >> 24 | 1293b1253e8Schristos (word & 0xff0000) >> 8 | 1303b1253e8Schristos (word & 0xff00) << 8 | 1313b1253e8Schristos (word & 0xff) << 24; 1323b1253e8Schristos # endif 1333b1253e8Schristos } 1343b1253e8Schristos #endif 1353b1253e8Schristos 136*96c32821Schristos #ifdef DYNAMIC_CRC_TABLE 137*96c32821Schristos /* ========================================================================= 138*96c32821Schristos * Table of powers of x for combining CRC-32s, filled in by make_crc_table() 139*96c32821Schristos * below. 140*96c32821Schristos */ 141*96c32821Schristos local z_crc_t FAR x2n_table[32]; 142*96c32821Schristos #else 143*96c32821Schristos /* ========================================================================= 144*96c32821Schristos * Tables for byte-wise and braided CRC-32 calculations, and a table of powers 145*96c32821Schristos * of x for combining CRC-32s, all made by make_crc_table(). 146*96c32821Schristos */ 147*96c32821Schristos # include "crc32.h" 148*96c32821Schristos #endif 149*96c32821Schristos 1503b1253e8Schristos /* CRC polynomial. */ 1513b1253e8Schristos #define POLY 0xedb88320 /* p(x) reflected, with x^32 implied */ 152aaf4ece6Schristos 153*96c32821Schristos /* 154*96c32821Schristos Return a(x) multiplied by b(x) modulo p(x), where p(x) is the CRC polynomial, 155*96c32821Schristos reflected. For speed, this requires that a not be zero. 156*96c32821Schristos */ 157*96c32821Schristos local z_crc_t multmodp(z_crc_t a, z_crc_t b) { 158*96c32821Schristos z_crc_t m, p; 159aaf4ece6Schristos 160*96c32821Schristos m = (z_crc_t)1 << 31; 161*96c32821Schristos p = 0; 162*96c32821Schristos for (;;) { 163*96c32821Schristos if (a & m) { 164*96c32821Schristos p ^= b; 165*96c32821Schristos if ((a & (m - 1)) == 0) 166*96c32821Schristos break; 167*96c32821Schristos } 168*96c32821Schristos m >>= 1; 169*96c32821Schristos b = b & 1 ? (b >> 1) ^ POLY : b >> 1; 170*96c32821Schristos } 171*96c32821Schristos return p; 172*96c32821Schristos } 173*96c32821Schristos 174*96c32821Schristos /* 175*96c32821Schristos Return x^(n * 2^k) modulo p(x). Requires that x2n_table[] has been 176*96c32821Schristos initialized. 177*96c32821Schristos */ 178*96c32821Schristos local z_crc_t x2nmodp(z_off64_t n, unsigned k) { 179*96c32821Schristos z_crc_t p; 180*96c32821Schristos 181*96c32821Schristos p = (z_crc_t)1 << 31; /* x^0 == 1 */ 182*96c32821Schristos while (n) { 183*96c32821Schristos if (n & 1) 184*96c32821Schristos p = multmodp(x2n_table[k & 31], p); 185*96c32821Schristos n >>= 1; 186*96c32821Schristos k++; 187*96c32821Schristos } 188*96c32821Schristos return p; 189*96c32821Schristos } 190*96c32821Schristos 191*96c32821Schristos #ifdef DYNAMIC_CRC_TABLE 192*96c32821Schristos /* ========================================================================= 193*96c32821Schristos * Build the tables for byte-wise and braided CRC-32 calculations, and a table 194*96c32821Schristos * of powers of x for combining CRC-32s. 195*96c32821Schristos */ 1963b1253e8Schristos local z_crc_t FAR crc_table[256]; 1973b1253e8Schristos #ifdef W 1983b1253e8Schristos local z_word_t FAR crc_big_table[256]; 1993b1253e8Schristos local z_crc_t FAR crc_braid_table[W][256]; 2003b1253e8Schristos local z_word_t FAR crc_braid_big_table[W][256]; 201*96c32821Schristos local void braid(z_crc_t [][256], z_word_t [][256], int, int); 2023b1253e8Schristos #endif 203aaf4ece6Schristos #ifdef MAKECRCH 204*96c32821Schristos local void write_table(FILE *, const z_crc_t FAR *, int); 205*96c32821Schristos local void write_table32hi(FILE *, const z_word_t FAR *, int); 206*96c32821Schristos local void write_table64(FILE *, const z_word_t FAR *, int); 207aaf4ece6Schristos #endif /* MAKECRCH */ 2083b1253e8Schristos 2093b1253e8Schristos /* 2103b1253e8Schristos Define a once() function depending on the availability of atomics. If this is 2113b1253e8Schristos compiled with DYNAMIC_CRC_TABLE defined, and if CRCs will be computed in 2123b1253e8Schristos multiple threads, and if atomics are not available, then get_crc_table() must 2133b1253e8Schristos be called to initialize the tables and must return before any threads are 2143b1253e8Schristos allowed to compute or combine CRCs. 2153b1253e8Schristos */ 2163b1253e8Schristos 2173b1253e8Schristos /* Definition of once functionality. */ 2183b1253e8Schristos typedef struct once_s once_t; 2193b1253e8Schristos 2203b1253e8Schristos /* Check for the availability of atomics. */ 2213b1253e8Schristos #if defined(__STDC__) && __STDC_VERSION__ >= 201112L && \ 2223b1253e8Schristos !defined(__STDC_NO_ATOMICS__) 2233b1253e8Schristos 2243b1253e8Schristos #include <stdatomic.h> 2253b1253e8Schristos 2263b1253e8Schristos /* Structure for once(), which must be initialized with ONCE_INIT. */ 2273b1253e8Schristos struct once_s { 2283b1253e8Schristos atomic_flag begun; 2293b1253e8Schristos atomic_int done; 2303b1253e8Schristos }; 2313b1253e8Schristos #define ONCE_INIT {ATOMIC_FLAG_INIT, 0} 2323b1253e8Schristos 2333b1253e8Schristos /* 2343b1253e8Schristos Run the provided init() function exactly once, even if multiple threads 2353b1253e8Schristos invoke once() at the same time. The state must be a once_t initialized with 2363b1253e8Schristos ONCE_INIT. 2373b1253e8Schristos */ 238*96c32821Schristos local void once(once_t *state, void (*init)(void)) { 2393b1253e8Schristos if (!atomic_load(&state->done)) { 2403b1253e8Schristos if (atomic_flag_test_and_set(&state->begun)) 2413b1253e8Schristos while (!atomic_load(&state->done)) 2423b1253e8Schristos ; 2433b1253e8Schristos else { 2443b1253e8Schristos init(); 2453b1253e8Schristos atomic_store(&state->done, 1); 2463b1253e8Schristos } 2473b1253e8Schristos } 2483b1253e8Schristos } 2493b1253e8Schristos 2503b1253e8Schristos #else /* no atomics */ 2513b1253e8Schristos 2523b1253e8Schristos /* Structure for once(), which must be initialized with ONCE_INIT. */ 2533b1253e8Schristos struct once_s { 2543b1253e8Schristos volatile int begun; 2553b1253e8Schristos volatile int done; 2563b1253e8Schristos }; 2573b1253e8Schristos #define ONCE_INIT {0, 0} 2583b1253e8Schristos 2593b1253e8Schristos /* Test and set. Alas, not atomic, but tries to minimize the period of 2603b1253e8Schristos vulnerability. */ 261*96c32821Schristos local int test_and_set(int volatile *flag) { 2623b1253e8Schristos int was; 2633b1253e8Schristos 2643b1253e8Schristos was = *flag; 2653b1253e8Schristos *flag = 1; 2663b1253e8Schristos return was; 2673b1253e8Schristos } 2683b1253e8Schristos 2693b1253e8Schristos /* Run the provided init() function once. This is not thread-safe. */ 270*96c32821Schristos local void once(once_t *state, void (*init)(void)) { 2713b1253e8Schristos if (!state->done) { 2723b1253e8Schristos if (test_and_set(&state->begun)) 2733b1253e8Schristos while (!state->done) 2743b1253e8Schristos ; 2753b1253e8Schristos else { 2763b1253e8Schristos init(); 2773b1253e8Schristos state->done = 1; 2783b1253e8Schristos } 2793b1253e8Schristos } 2803b1253e8Schristos } 2813b1253e8Schristos 2823b1253e8Schristos #endif 2833b1253e8Schristos 2843b1253e8Schristos /* State for once(). */ 2853b1253e8Schristos local once_t made = ONCE_INIT; 2863b1253e8Schristos 287aaf4ece6Schristos /* 288aaf4ece6Schristos Generate tables for a byte-wise 32-bit CRC calculation on the polynomial: 289aaf4ece6Schristos x^32+x^26+x^23+x^22+x^16+x^12+x^11+x^10+x^8+x^7+x^5+x^4+x^2+x+1. 290aaf4ece6Schristos 291aaf4ece6Schristos Polynomials over GF(2) are represented in binary, one bit per coefficient, 292aaf4ece6Schristos with the lowest powers in the most significant bit. Then adding polynomials 293aaf4ece6Schristos is just exclusive-or, and multiplying a polynomial by x is a right shift by 294aaf4ece6Schristos one. If we call the above polynomial p, and represent a byte as the 295aaf4ece6Schristos polynomial q, also with the lowest power in the most significant bit (so the 2963b1253e8Schristos byte 0xb1 is the polynomial x^7+x^3+x^2+1), then the CRC is (q*x^32) mod p, 297aaf4ece6Schristos where a mod b means the remainder after dividing a by b. 298aaf4ece6Schristos 299aaf4ece6Schristos This calculation is done using the shift-register method of multiplying and 300aaf4ece6Schristos taking the remainder. The register is initialized to zero, and for each 301aaf4ece6Schristos incoming bit, x^32 is added mod p to the register if the bit is a one (where 3023b1253e8Schristos x^32 mod p is p+x^32 = x^26+...+1), and the register is multiplied mod p by x 3033b1253e8Schristos (which is shifting right by one and adding x^32 mod p if the bit shifted out 3043b1253e8Schristos is a one). We start with the highest power (least significant bit) of q and 3053b1253e8Schristos repeat for all eight bits of q. 306aaf4ece6Schristos 3073b1253e8Schristos The table is simply the CRC of all possible eight bit values. This is all the 3083b1253e8Schristos information needed to generate CRCs on data a byte at a time for all 3093b1253e8Schristos combinations of CRC register values and incoming bytes. 310aaf4ece6Schristos */ 3113b1253e8Schristos 312*96c32821Schristos local void make_crc_table(void) { 3133b1253e8Schristos unsigned i, j, n; 3143b1253e8Schristos z_crc_t p; 315aaf4ece6Schristos 3163b1253e8Schristos /* initialize the CRC of bytes tables */ 3173b1253e8Schristos for (i = 0; i < 256; i++) { 3183b1253e8Schristos p = i; 3193b1253e8Schristos for (j = 0; j < 8; j++) 3203b1253e8Schristos p = p & 1 ? (p >> 1) ^ POLY : p >> 1; 3213b1253e8Schristos crc_table[i] = p; 3223b1253e8Schristos #ifdef W 3233b1253e8Schristos crc_big_table[i] = byte_swap(p); 3243b1253e8Schristos #endif 325aaf4ece6Schristos } 326aaf4ece6Schristos 3273b1253e8Schristos /* initialize the x^2^n mod p(x) table */ 3283b1253e8Schristos p = (z_crc_t)1 << 30; /* x^1 */ 3293b1253e8Schristos x2n_table[0] = p; 3303b1253e8Schristos for (n = 1; n < 32; n++) 3313b1253e8Schristos x2n_table[n] = p = multmodp(p, p); 332aaf4ece6Schristos 3333b1253e8Schristos #ifdef W 3343b1253e8Schristos /* initialize the braiding tables -- needs x2n_table[] */ 3353b1253e8Schristos braid(crc_braid_table, crc_braid_big_table, N, W); 3363b1253e8Schristos #endif 337aaf4ece6Schristos 338aaf4ece6Schristos #ifdef MAKECRCH 339aaf4ece6Schristos { 3403b1253e8Schristos /* 3413b1253e8Schristos The crc32.h header file contains tables for both 32-bit and 64-bit 3423b1253e8Schristos z_word_t's, and so requires a 64-bit type be available. In that case, 3433b1253e8Schristos z_word_t must be defined to be 64-bits. This code then also generates 3443b1253e8Schristos and writes out the tables for the case that z_word_t is 32 bits. 3453b1253e8Schristos */ 3463b1253e8Schristos #if !defined(W) || W != 8 3473b1253e8Schristos # error Need a 64-bit integer type in order to generate crc32.h. 3483b1253e8Schristos #endif 349aaf4ece6Schristos FILE *out; 3503b1253e8Schristos int k, n; 3513b1253e8Schristos z_crc_t ltl[8][256]; 3523b1253e8Schristos z_word_t big[8][256]; 353aaf4ece6Schristos 354aaf4ece6Schristos out = fopen("crc32.h", "w"); 355aaf4ece6Schristos if (out == NULL) return; 3563b1253e8Schristos 3573b1253e8Schristos /* write out little-endian CRC table to crc32.h */ 3583b1253e8Schristos fprintf(out, 3593b1253e8Schristos "/* crc32.h -- tables for rapid CRC calculation\n" 3603b1253e8Schristos " * Generated automatically by crc32.c\n */\n" 3613b1253e8Schristos "\n" 3623b1253e8Schristos "local const z_crc_t FAR crc_table[] = {\n" 3633b1253e8Schristos " "); 3643b1253e8Schristos write_table(out, crc_table, 256); 3653b1253e8Schristos fprintf(out, 3663b1253e8Schristos "};\n"); 3673b1253e8Schristos 3683b1253e8Schristos /* write out big-endian CRC table for 64-bit z_word_t to crc32.h */ 3693b1253e8Schristos fprintf(out, 3703b1253e8Schristos "\n" 3713b1253e8Schristos "#ifdef W\n" 3723b1253e8Schristos "\n" 3733b1253e8Schristos "#if W == 8\n" 3743b1253e8Schristos "\n" 3753b1253e8Schristos "local const z_word_t FAR crc_big_table[] = {\n" 3763b1253e8Schristos " "); 3773b1253e8Schristos write_table64(out, crc_big_table, 256); 3783b1253e8Schristos fprintf(out, 3793b1253e8Schristos "};\n"); 3803b1253e8Schristos 3813b1253e8Schristos /* write out big-endian CRC table for 32-bit z_word_t to crc32.h */ 3823b1253e8Schristos fprintf(out, 3833b1253e8Schristos "\n" 3843b1253e8Schristos "#else /* W == 4 */\n" 3853b1253e8Schristos "\n" 3863b1253e8Schristos "local const z_word_t FAR crc_big_table[] = {\n" 3873b1253e8Schristos " "); 3883b1253e8Schristos write_table32hi(out, crc_big_table, 256); 3893b1253e8Schristos fprintf(out, 3903b1253e8Schristos "};\n" 3913b1253e8Schristos "\n" 3923b1253e8Schristos "#endif\n"); 3933b1253e8Schristos 3943b1253e8Schristos /* write out braid tables for each value of N */ 3953b1253e8Schristos for (n = 1; n <= 6; n++) { 3963b1253e8Schristos fprintf(out, 3973b1253e8Schristos "\n" 3983b1253e8Schristos "#if N == %d\n", n); 3993b1253e8Schristos 4003b1253e8Schristos /* compute braid tables for this N and 64-bit word_t */ 4013b1253e8Schristos braid(ltl, big, n, 8); 4023b1253e8Schristos 4033b1253e8Schristos /* write out braid tables for 64-bit z_word_t to crc32.h */ 4043b1253e8Schristos fprintf(out, 4053b1253e8Schristos "\n" 4063b1253e8Schristos "#if W == 8\n" 4073b1253e8Schristos "\n" 4083b1253e8Schristos "local const z_crc_t FAR crc_braid_table[][256] = {\n"); 4093b1253e8Schristos for (k = 0; k < 8; k++) { 4103b1253e8Schristos fprintf(out, " {"); 4113b1253e8Schristos write_table(out, ltl[k], 256); 4123b1253e8Schristos fprintf(out, "}%s", k < 7 ? ",\n" : ""); 413aaf4ece6Schristos } 4143b1253e8Schristos fprintf(out, 4153b1253e8Schristos "};\n" 4163b1253e8Schristos "\n" 4173b1253e8Schristos "local const z_word_t FAR crc_braid_big_table[][256] = {\n"); 4183b1253e8Schristos for (k = 0; k < 8; k++) { 4193b1253e8Schristos fprintf(out, " {"); 4203b1253e8Schristos write_table64(out, big[k], 256); 4213b1253e8Schristos fprintf(out, "}%s", k < 7 ? ",\n" : ""); 4223b1253e8Schristos } 4233b1253e8Schristos fprintf(out, 4243b1253e8Schristos "};\n"); 4253b1253e8Schristos 4263b1253e8Schristos /* compute braid tables for this N and 32-bit word_t */ 4273b1253e8Schristos braid(ltl, big, n, 4); 4283b1253e8Schristos 4293b1253e8Schristos /* write out braid tables for 32-bit z_word_t to crc32.h */ 4303b1253e8Schristos fprintf(out, 4313b1253e8Schristos "\n" 4323b1253e8Schristos "#else /* W == 4 */\n" 4333b1253e8Schristos "\n" 4343b1253e8Schristos "local const z_crc_t FAR crc_braid_table[][256] = {\n"); 4353b1253e8Schristos for (k = 0; k < 4; k++) { 4363b1253e8Schristos fprintf(out, " {"); 4373b1253e8Schristos write_table(out, ltl[k], 256); 4383b1253e8Schristos fprintf(out, "}%s", k < 3 ? ",\n" : ""); 4393b1253e8Schristos } 4403b1253e8Schristos fprintf(out, 4413b1253e8Schristos "};\n" 4423b1253e8Schristos "\n" 4433b1253e8Schristos "local const z_word_t FAR crc_braid_big_table[][256] = {\n"); 4443b1253e8Schristos for (k = 0; k < 4; k++) { 4453b1253e8Schristos fprintf(out, " {"); 4463b1253e8Schristos write_table32hi(out, big[k], 256); 4473b1253e8Schristos fprintf(out, "}%s", k < 3 ? ",\n" : ""); 4483b1253e8Schristos } 4493b1253e8Schristos fprintf(out, 4503b1253e8Schristos "};\n" 4513b1253e8Schristos "\n" 4523b1253e8Schristos "#endif\n" 4533b1253e8Schristos "\n" 4543b1253e8Schristos "#endif\n"); 4553b1253e8Schristos } 4563b1253e8Schristos fprintf(out, 4573b1253e8Schristos "\n" 4583b1253e8Schristos "#endif\n"); 4593b1253e8Schristos 4603b1253e8Schristos /* write out zeros operator table to crc32.h */ 4613b1253e8Schristos fprintf(out, 4623b1253e8Schristos "\n" 4633b1253e8Schristos "local const z_crc_t FAR x2n_table[] = {\n" 4643b1253e8Schristos " "); 4653b1253e8Schristos write_table(out, x2n_table, 32); 4663b1253e8Schristos fprintf(out, 4673b1253e8Schristos "};\n"); 468aaf4ece6Schristos fclose(out); 469aaf4ece6Schristos } 470aaf4ece6Schristos #endif /* MAKECRCH */ 471aaf4ece6Schristos } 472aaf4ece6Schristos 473aaf4ece6Schristos #ifdef MAKECRCH 4743b1253e8Schristos 4753b1253e8Schristos /* 4763b1253e8Schristos Write the 32-bit values in table[0..k-1] to out, five per line in 4773b1253e8Schristos hexadecimal separated by commas. 4783b1253e8Schristos */ 479*96c32821Schristos local void write_table(FILE *out, const z_crc_t FAR *table, int k) { 480aaf4ece6Schristos int n; 481aaf4ece6Schristos 4823b1253e8Schristos for (n = 0; n < k; n++) 4833b1253e8Schristos fprintf(out, "%s0x%08lx%s", n == 0 || n % 5 ? "" : " ", 4846db8c6e9Schristos (unsigned long)(table[n]), 4853b1253e8Schristos n == k - 1 ? "" : (n % 5 == 4 ? ",\n" : ", ")); 486aaf4ece6Schristos } 4873b1253e8Schristos 4883b1253e8Schristos /* 4893b1253e8Schristos Write the high 32-bits of each value in table[0..k-1] to out, five per line 4903b1253e8Schristos in hexadecimal separated by commas. 4913b1253e8Schristos */ 492*96c32821Schristos local void write_table32hi(FILE *out, const z_word_t FAR *table, int k) { 4933b1253e8Schristos int n; 4943b1253e8Schristos 4953b1253e8Schristos for (n = 0; n < k; n++) 4963b1253e8Schristos fprintf(out, "%s0x%08lx%s", n == 0 || n % 5 ? "" : " ", 4973b1253e8Schristos (unsigned long)(table[n] >> 32), 4983b1253e8Schristos n == k - 1 ? "" : (n % 5 == 4 ? ",\n" : ", ")); 4993b1253e8Schristos } 5003b1253e8Schristos 5013b1253e8Schristos /* 5023b1253e8Schristos Write the 64-bit values in table[0..k-1] to out, three per line in 5033b1253e8Schristos hexadecimal separated by commas. This assumes that if there is a 64-bit 5043b1253e8Schristos type, then there is also a long long integer type, and it is at least 64 5053b1253e8Schristos bits. If not, then the type cast and format string can be adjusted 5063b1253e8Schristos accordingly. 5073b1253e8Schristos */ 508*96c32821Schristos local void write_table64(FILE *out, const z_word_t FAR *table, int k) { 5093b1253e8Schristos int n; 5103b1253e8Schristos 5113b1253e8Schristos for (n = 0; n < k; n++) 5123b1253e8Schristos fprintf(out, "%s0x%016llx%s", n == 0 || n % 3 ? "" : " ", 5133b1253e8Schristos (unsigned long long)(table[n]), 5143b1253e8Schristos n == k - 1 ? "" : (n % 3 == 2 ? ",\n" : ", ")); 5153b1253e8Schristos } 5163b1253e8Schristos 5173b1253e8Schristos /* Actually do the deed. */ 518*96c32821Schristos int main(void) { 5193b1253e8Schristos make_crc_table(); 5203b1253e8Schristos return 0; 5213b1253e8Schristos } 5223b1253e8Schristos 523aaf4ece6Schristos #endif /* MAKECRCH */ 524aaf4ece6Schristos 5253b1253e8Schristos #ifdef W 5263b1253e8Schristos /* 5273b1253e8Schristos Generate the little and big-endian braid tables for the given n and z_word_t 5283b1253e8Schristos size w. Each array must have room for w blocks of 256 elements. 5293b1253e8Schristos */ 530*96c32821Schristos local void braid(z_crc_t ltl[][256], z_word_t big[][256], int n, int w) { 5313b1253e8Schristos int k; 5323b1253e8Schristos z_crc_t i, p, q; 5333b1253e8Schristos for (k = 0; k < w; k++) { 5343b1253e8Schristos p = x2nmodp((n * w + 3 - k) << 3, 0); 5353b1253e8Schristos ltl[k][0] = 0; 5363b1253e8Schristos big[w - 1 - k][0] = 0; 5373b1253e8Schristos for (i = 1; i < 256; i++) { 5383b1253e8Schristos ltl[k][i] = q = multmodp(i << 24, p); 5393b1253e8Schristos big[w - 1 - k][i] = byte_swap(q); 5403b1253e8Schristos } 5413b1253e8Schristos } 5423b1253e8Schristos } 5433b1253e8Schristos #endif 5443b1253e8Schristos 545aaf4ece6Schristos #endif /* DYNAMIC_CRC_TABLE */ 546aaf4ece6Schristos 547aaf4ece6Schristos /* ========================================================================= 5483b1253e8Schristos * This function can be used by asm versions of crc32(), and to force the 5493b1253e8Schristos * generation of the CRC tables in a threaded application. 550aaf4ece6Schristos */ 551*96c32821Schristos const z_crc_t FAR * ZEXPORT get_crc_table(void) { 552aaf4ece6Schristos #ifdef DYNAMIC_CRC_TABLE 5533b1253e8Schristos once(&made, make_crc_table); 554aaf4ece6Schristos #endif /* DYNAMIC_CRC_TABLE */ 5556db8c6e9Schristos return (const z_crc_t FAR *)crc_table; 556aaf4ece6Schristos } 557aaf4ece6Schristos 5583b1253e8Schristos /* ========================================================================= 5593b1253e8Schristos * Use ARM machine instructions if available. This will compute the CRC about 5603b1253e8Schristos * ten times faster than the braided calculation. This code does not check for 5613b1253e8Schristos * the presence of the CRC instruction at run time. __ARM_FEATURE_CRC32 will 5623b1253e8Schristos * only be defined if the compilation specifies an ARM processor architecture 5633b1253e8Schristos * that has the instructions. For example, compiling with -march=armv8.1-a or 5643b1253e8Schristos * -march=armv8-a+crc, or -march=native if the compile machine has the crc32 5653b1253e8Schristos * instructions. 5663b1253e8Schristos */ 5673b1253e8Schristos #ifdef ARMCRC32 5683b1253e8Schristos 5693b1253e8Schristos /* 5703b1253e8Schristos Constants empirically determined to maximize speed. These values are from 5713b1253e8Schristos measurements on a Cortex-A57. Your mileage may vary. 5723b1253e8Schristos */ 5733b1253e8Schristos #define Z_BATCH 3990 /* number of words in a batch */ 5743b1253e8Schristos #define Z_BATCH_ZEROS 0xa10d3d0c /* computed from Z_BATCH = 3990 */ 5753b1253e8Schristos #define Z_BATCH_MIN 800 /* fewest words in a final batch */ 5763b1253e8Schristos 577*96c32821Schristos unsigned long ZEXPORT crc32_z(unsigned long crc, const unsigned char FAR *buf, 578*96c32821Schristos z_size_t len) { 5793b1253e8Schristos z_crc_t val; 5803b1253e8Schristos z_word_t crc1, crc2; 5813b1253e8Schristos const z_word_t *word; 5823b1253e8Schristos z_word_t val0, val1, val2; 5833b1253e8Schristos z_size_t last, last2, i; 5843b1253e8Schristos z_size_t num; 5853b1253e8Schristos 5863b1253e8Schristos /* Return initial CRC, if requested. */ 5873b1253e8Schristos if (buf == Z_NULL) return 0; 5883b1253e8Schristos 5893b1253e8Schristos #ifdef DYNAMIC_CRC_TABLE 5903b1253e8Schristos once(&made, make_crc_table); 5913b1253e8Schristos #endif /* DYNAMIC_CRC_TABLE */ 5923b1253e8Schristos 5933b1253e8Schristos /* Pre-condition the CRC */ 5943b1253e8Schristos crc = (~crc) & 0xffffffff; 5953b1253e8Schristos 5963b1253e8Schristos /* Compute the CRC up to a word boundary. */ 5973b1253e8Schristos while (len && ((z_size_t)buf & 7) != 0) { 5983b1253e8Schristos len--; 5993b1253e8Schristos val = *buf++; 6003b1253e8Schristos __asm__ volatile("crc32b %w0, %w0, %w1" : "+r"(crc) : "r"(val)); 6013b1253e8Schristos } 6023b1253e8Schristos 6033b1253e8Schristos /* Prepare to compute the CRC on full 64-bit words word[0..num-1]. */ 6043b1253e8Schristos word = (z_word_t const *)buf; 6053b1253e8Schristos num = len >> 3; 6063b1253e8Schristos len &= 7; 6073b1253e8Schristos 6083b1253e8Schristos /* Do three interleaved CRCs to realize the throughput of one crc32x 6093b1253e8Schristos instruction per cycle. Each CRC is calculated on Z_BATCH words. The 6103b1253e8Schristos three CRCs are combined into a single CRC after each set of batches. */ 6113b1253e8Schristos while (num >= 3 * Z_BATCH) { 6123b1253e8Schristos crc1 = 0; 6133b1253e8Schristos crc2 = 0; 6143b1253e8Schristos for (i = 0; i < Z_BATCH; i++) { 6153b1253e8Schristos val0 = word[i]; 6163b1253e8Schristos val1 = word[i + Z_BATCH]; 6173b1253e8Schristos val2 = word[i + 2 * Z_BATCH]; 6183b1253e8Schristos __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc) : "r"(val0)); 6193b1253e8Schristos __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc1) : "r"(val1)); 6203b1253e8Schristos __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc2) : "r"(val2)); 6213b1253e8Schristos } 6223b1253e8Schristos word += 3 * Z_BATCH; 6233b1253e8Schristos num -= 3 * Z_BATCH; 6243b1253e8Schristos crc = multmodp(Z_BATCH_ZEROS, crc) ^ crc1; 6253b1253e8Schristos crc = multmodp(Z_BATCH_ZEROS, crc) ^ crc2; 6263b1253e8Schristos } 6273b1253e8Schristos 6283b1253e8Schristos /* Do one last smaller batch with the remaining words, if there are enough 6293b1253e8Schristos to pay for the combination of CRCs. */ 6303b1253e8Schristos last = num / 3; 6313b1253e8Schristos if (last >= Z_BATCH_MIN) { 6323b1253e8Schristos last2 = last << 1; 6333b1253e8Schristos crc1 = 0; 6343b1253e8Schristos crc2 = 0; 6353b1253e8Schristos for (i = 0; i < last; i++) { 6363b1253e8Schristos val0 = word[i]; 6373b1253e8Schristos val1 = word[i + last]; 6383b1253e8Schristos val2 = word[i + last2]; 6393b1253e8Schristos __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc) : "r"(val0)); 6403b1253e8Schristos __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc1) : "r"(val1)); 6413b1253e8Schristos __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc2) : "r"(val2)); 6423b1253e8Schristos } 6433b1253e8Schristos word += 3 * last; 6443b1253e8Schristos num -= 3 * last; 6453b1253e8Schristos val = x2nmodp(last, 6); 6463b1253e8Schristos crc = multmodp(val, crc) ^ crc1; 6473b1253e8Schristos crc = multmodp(val, crc) ^ crc2; 6483b1253e8Schristos } 6493b1253e8Schristos 6503b1253e8Schristos /* Compute the CRC on any remaining words. */ 6513b1253e8Schristos for (i = 0; i < num; i++) { 6523b1253e8Schristos val0 = word[i]; 6533b1253e8Schristos __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc) : "r"(val0)); 6543b1253e8Schristos } 6553b1253e8Schristos word += num; 6563b1253e8Schristos 6573b1253e8Schristos /* Complete the CRC on any remaining bytes. */ 6583b1253e8Schristos buf = (const unsigned char FAR *)word; 6593b1253e8Schristos while (len) { 6603b1253e8Schristos len--; 6613b1253e8Schristos val = *buf++; 6623b1253e8Schristos __asm__ volatile("crc32b %w0, %w0, %w1" : "+r"(crc) : "r"(val)); 6633b1253e8Schristos } 6643b1253e8Schristos 6653b1253e8Schristos /* Return the CRC, post-conditioned. */ 6663b1253e8Schristos return crc ^ 0xffffffff; 6673b1253e8Schristos } 6683b1253e8Schristos 6693b1253e8Schristos #else 6703b1253e8Schristos 6713b1253e8Schristos #ifdef W 6723b1253e8Schristos 6733b1253e8Schristos /* 6743b1253e8Schristos Return the CRC of the W bytes in the word_t data, taking the 6753b1253e8Schristos least-significant byte of the word as the first byte of data, without any pre 6763b1253e8Schristos or post conditioning. This is used to combine the CRCs of each braid. 6773b1253e8Schristos */ 678*96c32821Schristos local z_crc_t crc_word(z_word_t data) { 6793b1253e8Schristos int k; 6803b1253e8Schristos for (k = 0; k < W; k++) 6813b1253e8Schristos data = (data >> 8) ^ crc_table[data & 0xff]; 6823b1253e8Schristos return (z_crc_t)data; 6833b1253e8Schristos } 6843b1253e8Schristos 685*96c32821Schristos local z_word_t crc_word_big(z_word_t data) { 6863b1253e8Schristos int k; 6873b1253e8Schristos for (k = 0; k < W; k++) 6883b1253e8Schristos data = (data << 8) ^ 6893b1253e8Schristos crc_big_table[(data >> ((W - 1) << 3)) & 0xff]; 6903b1253e8Schristos return data; 6913b1253e8Schristos } 6923b1253e8Schristos 6933b1253e8Schristos #endif 694aaf4ece6Schristos 695aaf4ece6Schristos /* ========================================================================= */ 696*96c32821Schristos unsigned long ZEXPORT crc32_z(unsigned long crc, const unsigned char FAR *buf, 697*96c32821Schristos z_size_t len) { 6983b1253e8Schristos /* Return initial CRC, if requested. */ 6993b1253e8Schristos if (buf == Z_NULL) return 0; 700aaf4ece6Schristos 701aaf4ece6Schristos #ifdef DYNAMIC_CRC_TABLE 7023b1253e8Schristos once(&made, make_crc_table); 703aaf4ece6Schristos #endif /* DYNAMIC_CRC_TABLE */ 704aaf4ece6Schristos 7053b1253e8Schristos /* Pre-condition the CRC */ 7063b1253e8Schristos crc = (~crc) & 0xffffffff; 707aaf4ece6Schristos 7083b1253e8Schristos #ifdef W 7093b1253e8Schristos 7103b1253e8Schristos /* If provided enough bytes, do a braided CRC calculation. */ 7113b1253e8Schristos if (len >= N * W + W - 1) { 7123b1253e8Schristos z_size_t blks; 7133b1253e8Schristos z_word_t const *words; 7143b1253e8Schristos unsigned endian; 7153b1253e8Schristos int k; 7163b1253e8Schristos 7173b1253e8Schristos /* Compute the CRC up to a z_word_t boundary. */ 7183b1253e8Schristos while (len && ((z_size_t)buf & (W - 1)) != 0) { 7193b1253e8Schristos len--; 7203b1253e8Schristos crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff]; 7213b1253e8Schristos } 7223b1253e8Schristos 7233b1253e8Schristos /* Compute the CRC on as many N z_word_t blocks as are available. */ 7243b1253e8Schristos blks = len / (N * W); 7253b1253e8Schristos len -= blks * N * W; 7263b1253e8Schristos words = (z_word_t const *)buf; 7273b1253e8Schristos 7283b1253e8Schristos /* Do endian check at execution time instead of compile time, since ARM 729*96c32821Schristos processors can change the endianness at execution time. If the 730*96c32821Schristos compiler knows what the endianness will be, it can optimize out the 7313b1253e8Schristos check and the unused branch. */ 732aaf4ece6Schristos endian = 1; 7333b1253e8Schristos if (*(unsigned char *)&endian) { 7343b1253e8Schristos /* Little endian. */ 7353b1253e8Schristos 7363b1253e8Schristos z_crc_t crc0; 7373b1253e8Schristos z_word_t word0; 7383b1253e8Schristos #if N > 1 7393b1253e8Schristos z_crc_t crc1; 7403b1253e8Schristos z_word_t word1; 7413b1253e8Schristos #if N > 2 7423b1253e8Schristos z_crc_t crc2; 7433b1253e8Schristos z_word_t word2; 7443b1253e8Schristos #if N > 3 7453b1253e8Schristos z_crc_t crc3; 7463b1253e8Schristos z_word_t word3; 7473b1253e8Schristos #if N > 4 7483b1253e8Schristos z_crc_t crc4; 7493b1253e8Schristos z_word_t word4; 7503b1253e8Schristos #if N > 5 7513b1253e8Schristos z_crc_t crc5; 7523b1253e8Schristos z_word_t word5; 7533b1253e8Schristos #endif 7543b1253e8Schristos #endif 7553b1253e8Schristos #endif 7563b1253e8Schristos #endif 7573b1253e8Schristos #endif 7583b1253e8Schristos 7593b1253e8Schristos /* Initialize the CRC for each braid. */ 7603b1253e8Schristos crc0 = crc; 7613b1253e8Schristos #if N > 1 7623b1253e8Schristos crc1 = 0; 7633b1253e8Schristos #if N > 2 7643b1253e8Schristos crc2 = 0; 7653b1253e8Schristos #if N > 3 7663b1253e8Schristos crc3 = 0; 7673b1253e8Schristos #if N > 4 7683b1253e8Schristos crc4 = 0; 7693b1253e8Schristos #if N > 5 7703b1253e8Schristos crc5 = 0; 7713b1253e8Schristos #endif 7723b1253e8Schristos #endif 7733b1253e8Schristos #endif 7743b1253e8Schristos #endif 7753b1253e8Schristos #endif 7763b1253e8Schristos 7773b1253e8Schristos /* 7783b1253e8Schristos Process the first blks-1 blocks, computing the CRCs on each braid 7793b1253e8Schristos independently. 7803b1253e8Schristos */ 7813b1253e8Schristos while (--blks) { 7823b1253e8Schristos /* Load the word for each braid into registers. */ 7833b1253e8Schristos word0 = crc0 ^ words[0]; 7843b1253e8Schristos #if N > 1 7853b1253e8Schristos word1 = crc1 ^ words[1]; 7863b1253e8Schristos #if N > 2 7873b1253e8Schristos word2 = crc2 ^ words[2]; 7883b1253e8Schristos #if N > 3 7893b1253e8Schristos word3 = crc3 ^ words[3]; 7903b1253e8Schristos #if N > 4 7913b1253e8Schristos word4 = crc4 ^ words[4]; 7923b1253e8Schristos #if N > 5 7933b1253e8Schristos word5 = crc5 ^ words[5]; 7943b1253e8Schristos #endif 7953b1253e8Schristos #endif 7963b1253e8Schristos #endif 7973b1253e8Schristos #endif 7983b1253e8Schristos #endif 7993b1253e8Schristos words += N; 8003b1253e8Schristos 8013b1253e8Schristos /* Compute and update the CRC for each word. The loop should 8023b1253e8Schristos get unrolled. */ 8033b1253e8Schristos crc0 = crc_braid_table[0][word0 & 0xff]; 8043b1253e8Schristos #if N > 1 8053b1253e8Schristos crc1 = crc_braid_table[0][word1 & 0xff]; 8063b1253e8Schristos #if N > 2 8073b1253e8Schristos crc2 = crc_braid_table[0][word2 & 0xff]; 8083b1253e8Schristos #if N > 3 8093b1253e8Schristos crc3 = crc_braid_table[0][word3 & 0xff]; 8103b1253e8Schristos #if N > 4 8113b1253e8Schristos crc4 = crc_braid_table[0][word4 & 0xff]; 8123b1253e8Schristos #if N > 5 8133b1253e8Schristos crc5 = crc_braid_table[0][word5 & 0xff]; 8143b1253e8Schristos #endif 8153b1253e8Schristos #endif 8163b1253e8Schristos #endif 8173b1253e8Schristos #endif 8183b1253e8Schristos #endif 8193b1253e8Schristos for (k = 1; k < W; k++) { 8203b1253e8Schristos crc0 ^= crc_braid_table[k][(word0 >> (k << 3)) & 0xff]; 8213b1253e8Schristos #if N > 1 8223b1253e8Schristos crc1 ^= crc_braid_table[k][(word1 >> (k << 3)) & 0xff]; 8233b1253e8Schristos #if N > 2 8243b1253e8Schristos crc2 ^= crc_braid_table[k][(word2 >> (k << 3)) & 0xff]; 8253b1253e8Schristos #if N > 3 8263b1253e8Schristos crc3 ^= crc_braid_table[k][(word3 >> (k << 3)) & 0xff]; 8273b1253e8Schristos #if N > 4 8283b1253e8Schristos crc4 ^= crc_braid_table[k][(word4 >> (k << 3)) & 0xff]; 8293b1253e8Schristos #if N > 5 8303b1253e8Schristos crc5 ^= crc_braid_table[k][(word5 >> (k << 3)) & 0xff]; 8313b1253e8Schristos #endif 8323b1253e8Schristos #endif 8333b1253e8Schristos #endif 8343b1253e8Schristos #endif 8353b1253e8Schristos #endif 836aaf4ece6Schristos } 8373b1253e8Schristos } 8383b1253e8Schristos 8393b1253e8Schristos /* 8403b1253e8Schristos Process the last block, combining the CRCs of the N braids at the 8413b1253e8Schristos same time. 8423b1253e8Schristos */ 8433b1253e8Schristos crc = crc_word(crc0 ^ words[0]); 8443b1253e8Schristos #if N > 1 8453b1253e8Schristos crc = crc_word(crc1 ^ words[1] ^ crc); 8463b1253e8Schristos #if N > 2 8473b1253e8Schristos crc = crc_word(crc2 ^ words[2] ^ crc); 8483b1253e8Schristos #if N > 3 8493b1253e8Schristos crc = crc_word(crc3 ^ words[3] ^ crc); 8503b1253e8Schristos #if N > 4 8513b1253e8Schristos crc = crc_word(crc4 ^ words[4] ^ crc); 8523b1253e8Schristos #if N > 5 8533b1253e8Schristos crc = crc_word(crc5 ^ words[5] ^ crc); 8543b1253e8Schristos #endif 8553b1253e8Schristos #endif 8563b1253e8Schristos #endif 8573b1253e8Schristos #endif 8583b1253e8Schristos #endif 8593b1253e8Schristos words += N; 8603b1253e8Schristos } 8613b1253e8Schristos else { 8623b1253e8Schristos /* Big endian. */ 8633b1253e8Schristos 8643b1253e8Schristos z_word_t crc0, word0, comb; 8653b1253e8Schristos #if N > 1 8663b1253e8Schristos z_word_t crc1, word1; 8673b1253e8Schristos #if N > 2 8683b1253e8Schristos z_word_t crc2, word2; 8693b1253e8Schristos #if N > 3 8703b1253e8Schristos z_word_t crc3, word3; 8713b1253e8Schristos #if N > 4 8723b1253e8Schristos z_word_t crc4, word4; 8733b1253e8Schristos #if N > 5 8743b1253e8Schristos z_word_t crc5, word5; 8753b1253e8Schristos #endif 8763b1253e8Schristos #endif 8773b1253e8Schristos #endif 8783b1253e8Schristos #endif 8793b1253e8Schristos #endif 8803b1253e8Schristos 8813b1253e8Schristos /* Initialize the CRC for each braid. */ 8823b1253e8Schristos crc0 = byte_swap(crc); 8833b1253e8Schristos #if N > 1 8843b1253e8Schristos crc1 = 0; 8853b1253e8Schristos #if N > 2 8863b1253e8Schristos crc2 = 0; 8873b1253e8Schristos #if N > 3 8883b1253e8Schristos crc3 = 0; 8893b1253e8Schristos #if N > 4 8903b1253e8Schristos crc4 = 0; 8913b1253e8Schristos #if N > 5 8923b1253e8Schristos crc5 = 0; 8933b1253e8Schristos #endif 8943b1253e8Schristos #endif 8953b1253e8Schristos #endif 8963b1253e8Schristos #endif 8973b1253e8Schristos #endif 8983b1253e8Schristos 8993b1253e8Schristos /* 9003b1253e8Schristos Process the first blks-1 blocks, computing the CRCs on each braid 9013b1253e8Schristos independently. 9023b1253e8Schristos */ 9033b1253e8Schristos while (--blks) { 9043b1253e8Schristos /* Load the word for each braid into registers. */ 9053b1253e8Schristos word0 = crc0 ^ words[0]; 9063b1253e8Schristos #if N > 1 9073b1253e8Schristos word1 = crc1 ^ words[1]; 9083b1253e8Schristos #if N > 2 9093b1253e8Schristos word2 = crc2 ^ words[2]; 9103b1253e8Schristos #if N > 3 9113b1253e8Schristos word3 = crc3 ^ words[3]; 9123b1253e8Schristos #if N > 4 9133b1253e8Schristos word4 = crc4 ^ words[4]; 9143b1253e8Schristos #if N > 5 9153b1253e8Schristos word5 = crc5 ^ words[5]; 9163b1253e8Schristos #endif 9173b1253e8Schristos #endif 9183b1253e8Schristos #endif 9193b1253e8Schristos #endif 9203b1253e8Schristos #endif 9213b1253e8Schristos words += N; 9223b1253e8Schristos 9233b1253e8Schristos /* Compute and update the CRC for each word. The loop should 9243b1253e8Schristos get unrolled. */ 9253b1253e8Schristos crc0 = crc_braid_big_table[0][word0 & 0xff]; 9263b1253e8Schristos #if N > 1 9273b1253e8Schristos crc1 = crc_braid_big_table[0][word1 & 0xff]; 9283b1253e8Schristos #if N > 2 9293b1253e8Schristos crc2 = crc_braid_big_table[0][word2 & 0xff]; 9303b1253e8Schristos #if N > 3 9313b1253e8Schristos crc3 = crc_braid_big_table[0][word3 & 0xff]; 9323b1253e8Schristos #if N > 4 9333b1253e8Schristos crc4 = crc_braid_big_table[0][word4 & 0xff]; 9343b1253e8Schristos #if N > 5 9353b1253e8Schristos crc5 = crc_braid_big_table[0][word5 & 0xff]; 9363b1253e8Schristos #endif 9373b1253e8Schristos #endif 9383b1253e8Schristos #endif 9393b1253e8Schristos #endif 9403b1253e8Schristos #endif 9413b1253e8Schristos for (k = 1; k < W; k++) { 9423b1253e8Schristos crc0 ^= crc_braid_big_table[k][(word0 >> (k << 3)) & 0xff]; 9433b1253e8Schristos #if N > 1 9443b1253e8Schristos crc1 ^= crc_braid_big_table[k][(word1 >> (k << 3)) & 0xff]; 9453b1253e8Schristos #if N > 2 9463b1253e8Schristos crc2 ^= crc_braid_big_table[k][(word2 >> (k << 3)) & 0xff]; 9473b1253e8Schristos #if N > 3 9483b1253e8Schristos crc3 ^= crc_braid_big_table[k][(word3 >> (k << 3)) & 0xff]; 9493b1253e8Schristos #if N > 4 9503b1253e8Schristos crc4 ^= crc_braid_big_table[k][(word4 >> (k << 3)) & 0xff]; 9513b1253e8Schristos #if N > 5 9523b1253e8Schristos crc5 ^= crc_braid_big_table[k][(word5 >> (k << 3)) & 0xff]; 9533b1253e8Schristos #endif 9543b1253e8Schristos #endif 9553b1253e8Schristos #endif 9563b1253e8Schristos #endif 9573b1253e8Schristos #endif 9583b1253e8Schristos } 9593b1253e8Schristos } 9603b1253e8Schristos 9613b1253e8Schristos /* 9623b1253e8Schristos Process the last block, combining the CRCs of the N braids at the 9633b1253e8Schristos same time. 9643b1253e8Schristos */ 9653b1253e8Schristos comb = crc_word_big(crc0 ^ words[0]); 9663b1253e8Schristos #if N > 1 9673b1253e8Schristos comb = crc_word_big(crc1 ^ words[1] ^ comb); 9683b1253e8Schristos #if N > 2 9693b1253e8Schristos comb = crc_word_big(crc2 ^ words[2] ^ comb); 9703b1253e8Schristos #if N > 3 9713b1253e8Schristos comb = crc_word_big(crc3 ^ words[3] ^ comb); 9723b1253e8Schristos #if N > 4 9733b1253e8Schristos comb = crc_word_big(crc4 ^ words[4] ^ comb); 9743b1253e8Schristos #if N > 5 9753b1253e8Schristos comb = crc_word_big(crc5 ^ words[5] ^ comb); 9763b1253e8Schristos #endif 9773b1253e8Schristos #endif 9783b1253e8Schristos #endif 9793b1253e8Schristos #endif 9803b1253e8Schristos #endif 9813b1253e8Schristos words += N; 9823b1253e8Schristos crc = byte_swap(comb); 9833b1253e8Schristos } 9843b1253e8Schristos 9853b1253e8Schristos /* 9863b1253e8Schristos Update the pointer to the remaining bytes to process. 9873b1253e8Schristos */ 9883b1253e8Schristos buf = (unsigned char const *)words; 9893b1253e8Schristos } 9903b1253e8Schristos 9913b1253e8Schristos #endif /* W */ 9923b1253e8Schristos 9933b1253e8Schristos /* Complete the computation of the CRC on any remaining bytes. */ 994aaf4ece6Schristos while (len >= 8) { 995aaf4ece6Schristos len -= 8; 9963b1253e8Schristos crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff]; 9973b1253e8Schristos crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff]; 9983b1253e8Schristos crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff]; 9993b1253e8Schristos crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff]; 10003b1253e8Schristos crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff]; 10013b1253e8Schristos crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff]; 10023b1253e8Schristos crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff]; 10033b1253e8Schristos crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff]; 1004aaf4ece6Schristos } 10053b1253e8Schristos while (len) { 10063b1253e8Schristos len--; 10073b1253e8Schristos crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff]; 1008aaf4ece6Schristos } 1009aaf4ece6Schristos 10103b1253e8Schristos /* Return the CRC, post-conditioned. */ 10113b1253e8Schristos return crc ^ 0xffffffff; 10123b1253e8Schristos } 10133b1253e8Schristos 10143b1253e8Schristos #endif 10153b1253e8Schristos 10166db8c6e9Schristos /* ========================================================================= */ 1017*96c32821Schristos unsigned long ZEXPORT crc32(unsigned long crc, const unsigned char FAR *buf, 1018*96c32821Schristos uInt len) { 10196db8c6e9Schristos return crc32_z(crc, buf, len); 10206db8c6e9Schristos } 10216db8c6e9Schristos 1022aaf4ece6Schristos /* ========================================================================= */ 1023*96c32821Schristos uLong ZEXPORT crc32_combine64(uLong crc1, uLong crc2, z_off64_t len2) { 10243b1253e8Schristos #ifdef DYNAMIC_CRC_TABLE 10253b1253e8Schristos once(&made, make_crc_table); 10263b1253e8Schristos #endif /* DYNAMIC_CRC_TABLE */ 10273b1253e8Schristos return multmodp(x2nmodp(len2, 3), crc1) ^ (crc2 & 0xffffffff); 1028aaf4ece6Schristos } 10296db8c6e9Schristos 10306db8c6e9Schristos /* ========================================================================= */ 1031*96c32821Schristos uLong ZEXPORT crc32_combine(uLong crc1, uLong crc2, z_off_t len2) { 10323b1253e8Schristos return crc32_combine64(crc1, crc2, (z_off64_t)len2); 10336db8c6e9Schristos } 10346db8c6e9Schristos 10353b1253e8Schristos /* ========================================================================= */ 1036*96c32821Schristos uLong ZEXPORT crc32_combine_gen64(z_off64_t len2) { 10373b1253e8Schristos #ifdef DYNAMIC_CRC_TABLE 10383b1253e8Schristos once(&made, make_crc_table); 10393b1253e8Schristos #endif /* DYNAMIC_CRC_TABLE */ 10403b1253e8Schristos return x2nmodp(len2, 3); 10413b1253e8Schristos } 10423b1253e8Schristos 10433b1253e8Schristos /* ========================================================================= */ 1044*96c32821Schristos uLong ZEXPORT crc32_combine_gen(z_off_t len2) { 10453b1253e8Schristos return crc32_combine_gen64((z_off64_t)len2); 10463b1253e8Schristos } 10473b1253e8Schristos 10483b1253e8Schristos /* ========================================================================= */ 1049*96c32821Schristos uLong ZEXPORT crc32_combine_op(uLong crc1, uLong crc2, uLong op) { 10503b1253e8Schristos return multmodp(op, crc1) ^ (crc2 & 0xffffffff); 10516db8c6e9Schristos } 1052