1 /* $NetBSD: crc32.c,v 1.7 2024/09/22 19:12:27 christos Exp $ */ 2 3 /* crc32.c -- compute the CRC-32 of a data stream 4 * Copyright (C) 1995-2022 Mark Adler 5 * For conditions of distribution and use, see copyright notice in zlib.h 6 * 7 * This interleaved implementation of a CRC makes use of pipelined multiple 8 * arithmetic-logic units, commonly found in modern CPU cores. It is due to 9 * Kadatch and Jenkins (2010). See doc/crc-doc.1.0.pdf in this distribution. 10 */ 11 12 /* @(#) Id */ 13 14 /* 15 Note on the use of DYNAMIC_CRC_TABLE: there is no mutex or semaphore 16 protection on the static variables used to control the first-use generation 17 of the crc tables. Therefore, if you #define DYNAMIC_CRC_TABLE, you should 18 first call get_crc_table() to initialize the tables before allowing more than 19 one thread to use crc32(). 20 21 MAKECRCH can be #defined to write out crc32.h. A main() routine is also 22 produced, so that this one source file can be compiled to an executable. 23 */ 24 25 #ifdef MAKECRCH 26 # include <stdio.h> 27 # ifndef DYNAMIC_CRC_TABLE 28 # define DYNAMIC_CRC_TABLE 29 # endif /* !DYNAMIC_CRC_TABLE */ 30 #endif /* MAKECRCH */ 31 32 #include "zutil.h" /* for Z_U4, Z_U8, z_crc_t, and FAR definitions */ 33 34 /* 35 A CRC of a message is computed on N braids of words in the message, where 36 each word consists of W bytes (4 or 8). If N is 3, for example, then three 37 running sparse CRCs are calculated respectively on each braid, at these 38 indices in the array of words: 0, 3, 6, ..., 1, 4, 7, ..., and 2, 5, 8, ... 39 This is done starting at a word boundary, and continues until as many blocks 40 of N * W bytes as are available have been processed. The results are combined 41 into a single CRC at the end. For this code, N must be in the range 1..6 and 42 W must be 4 or 8. The upper limit on N can be increased if desired by adding 43 more #if blocks, extending the patterns apparent in the code. In addition, 44 crc32.h would need to be regenerated, if the maximum N value is increased. 45 46 N and W are chosen empirically by benchmarking the execution time on a given 47 processor. The choices for N and W below were based on testing on Intel Kaby 48 Lake i7, AMD Ryzen 7, ARM Cortex-A57, Sparc64-VII, PowerPC POWER9, and MIPS64 49 Octeon II processors. The Intel, AMD, and ARM processors were all fastest 50 with N=5, W=8. The Sparc, PowerPC, and MIPS64 were all fastest at N=5, W=4. 51 They were all tested with either gcc or clang, all using the -O3 optimization 52 level. Your mileage may vary. 53 */ 54 55 /* Define N */ 56 #ifdef Z_TESTN 57 # define N Z_TESTN 58 #else 59 # define N 5 60 #endif 61 #if N < 1 || N > 6 62 # error N must be in 1..6 63 #endif 64 65 /* 66 z_crc_t must be at least 32 bits. z_word_t must be at least as long as 67 z_crc_t. It is assumed here that z_word_t is either 32 bits or 64 bits, and 68 that bytes are eight bits. 69 */ 70 71 /* 72 Define W and the associated z_word_t type. If W is not defined, then a 73 braided calculation is not used, and the associated tables and code are not 74 compiled. 75 */ 76 #ifdef Z_TESTW 77 # if Z_TESTW-1 != -1 78 # define W Z_TESTW 79 # endif 80 #else 81 # ifdef MAKECRCH 82 # define W 8 /* required for MAKECRCH */ 83 # else 84 # if defined(__x86_64__) || defined(__aarch64__) 85 # define W 8 86 # else 87 # define W 4 88 # endif 89 # endif 90 #endif 91 #ifdef W 92 # if W == 8 && defined(Z_U8) 93 typedef Z_U8 z_word_t; 94 # elif defined(Z_U4) 95 # undef W 96 # define W 4 97 typedef Z_U4 z_word_t; 98 # else 99 # undef W 100 # endif 101 #endif 102 103 /* If available, use the ARM processor CRC32 instruction. */ 104 #if defined(__aarch64__) && defined(__ARM_FEATURE_CRC32) && W == 8 105 # define ARMCRC32 106 #endif 107 108 #if defined(W) && (!defined(ARMCRC32) || defined(DYNAMIC_CRC_TABLE)) 109 /* 110 Swap the bytes in a z_word_t to convert between little and big endian. Any 111 self-respecting compiler will optimize this to a single machine byte-swap 112 instruction, if one is available. This assumes that word_t is either 32 bits 113 or 64 bits. 114 */ 115 local z_word_t byte_swap(z_word_t word) { 116 # if W == 8 117 return 118 (word & 0xff00000000000000) >> 56 | 119 (word & 0xff000000000000) >> 40 | 120 (word & 0xff0000000000) >> 24 | 121 (word & 0xff00000000) >> 8 | 122 (word & 0xff000000) << 8 | 123 (word & 0xff0000) << 24 | 124 (word & 0xff00) << 40 | 125 (word & 0xff) << 56; 126 # else /* W == 4 */ 127 return 128 (word & 0xff000000) >> 24 | 129 (word & 0xff0000) >> 8 | 130 (word & 0xff00) << 8 | 131 (word & 0xff) << 24; 132 # endif 133 } 134 #endif 135 136 #ifdef DYNAMIC_CRC_TABLE 137 /* ========================================================================= 138 * Table of powers of x for combining CRC-32s, filled in by make_crc_table() 139 * below. 140 */ 141 local z_crc_t FAR x2n_table[32]; 142 #else 143 /* ========================================================================= 144 * Tables for byte-wise and braided CRC-32 calculations, and a table of powers 145 * of x for combining CRC-32s, all made by make_crc_table(). 146 */ 147 # include "crc32.h" 148 #endif 149 150 /* CRC polynomial. */ 151 #define POLY 0xedb88320 /* p(x) reflected, with x^32 implied */ 152 153 /* 154 Return a(x) multiplied by b(x) modulo p(x), where p(x) is the CRC polynomial, 155 reflected. For speed, this requires that a not be zero. 156 */ 157 local z_crc_t multmodp(z_crc_t a, z_crc_t b) { 158 z_crc_t m, p; 159 160 m = (z_crc_t)1 << 31; 161 p = 0; 162 for (;;) { 163 if (a & m) { 164 p ^= b; 165 if ((a & (m - 1)) == 0) 166 break; 167 } 168 m >>= 1; 169 b = b & 1 ? (b >> 1) ^ POLY : b >> 1; 170 } 171 return p; 172 } 173 174 /* 175 Return x^(n * 2^k) modulo p(x). Requires that x2n_table[] has been 176 initialized. 177 */ 178 local z_crc_t x2nmodp(z_off64_t n, unsigned k) { 179 z_crc_t p; 180 181 p = (z_crc_t)1 << 31; /* x^0 == 1 */ 182 while (n) { 183 if (n & 1) 184 p = multmodp(x2n_table[k & 31], p); 185 n >>= 1; 186 k++; 187 } 188 return p; 189 } 190 191 #ifdef DYNAMIC_CRC_TABLE 192 /* ========================================================================= 193 * Build the tables for byte-wise and braided CRC-32 calculations, and a table 194 * of powers of x for combining CRC-32s. 195 */ 196 local z_crc_t FAR crc_table[256]; 197 #ifdef W 198 local z_word_t FAR crc_big_table[256]; 199 local z_crc_t FAR crc_braid_table[W][256]; 200 local z_word_t FAR crc_braid_big_table[W][256]; 201 local void braid(z_crc_t [][256], z_word_t [][256], int, int); 202 #endif 203 #ifdef MAKECRCH 204 local void write_table(FILE *, const z_crc_t FAR *, int); 205 local void write_table32hi(FILE *, const z_word_t FAR *, int); 206 local void write_table64(FILE *, const z_word_t FAR *, int); 207 #endif /* MAKECRCH */ 208 209 /* 210 Define a once() function depending on the availability of atomics. If this is 211 compiled with DYNAMIC_CRC_TABLE defined, and if CRCs will be computed in 212 multiple threads, and if atomics are not available, then get_crc_table() must 213 be called to initialize the tables and must return before any threads are 214 allowed to compute or combine CRCs. 215 */ 216 217 /* Definition of once functionality. */ 218 typedef struct once_s once_t; 219 220 /* Check for the availability of atomics. */ 221 #if defined(__STDC__) && __STDC_VERSION__ >= 201112L && \ 222 !defined(__STDC_NO_ATOMICS__) 223 224 #include <stdatomic.h> 225 226 /* Structure for once(), which must be initialized with ONCE_INIT. */ 227 struct once_s { 228 atomic_flag begun; 229 atomic_int done; 230 }; 231 #define ONCE_INIT {ATOMIC_FLAG_INIT, 0} 232 233 /* 234 Run the provided init() function exactly once, even if multiple threads 235 invoke once() at the same time. The state must be a once_t initialized with 236 ONCE_INIT. 237 */ 238 local void once(once_t *state, void (*init)(void)) { 239 if (!atomic_load(&state->done)) { 240 if (atomic_flag_test_and_set(&state->begun)) 241 while (!atomic_load(&state->done)) 242 ; 243 else { 244 init(); 245 atomic_store(&state->done, 1); 246 } 247 } 248 } 249 250 #else /* no atomics */ 251 252 /* Structure for once(), which must be initialized with ONCE_INIT. */ 253 struct once_s { 254 volatile int begun; 255 volatile int done; 256 }; 257 #define ONCE_INIT {0, 0} 258 259 /* Test and set. Alas, not atomic, but tries to minimize the period of 260 vulnerability. */ 261 local int test_and_set(int volatile *flag) { 262 int was; 263 264 was = *flag; 265 *flag = 1; 266 return was; 267 } 268 269 /* Run the provided init() function once. This is not thread-safe. */ 270 local void once(once_t *state, void (*init)(void)) { 271 if (!state->done) { 272 if (test_and_set(&state->begun)) 273 while (!state->done) 274 ; 275 else { 276 init(); 277 state->done = 1; 278 } 279 } 280 } 281 282 #endif 283 284 /* State for once(). */ 285 local once_t made = ONCE_INIT; 286 287 /* 288 Generate tables for a byte-wise 32-bit CRC calculation on the polynomial: 289 x^32+x^26+x^23+x^22+x^16+x^12+x^11+x^10+x^8+x^7+x^5+x^4+x^2+x+1. 290 291 Polynomials over GF(2) are represented in binary, one bit per coefficient, 292 with the lowest powers in the most significant bit. Then adding polynomials 293 is just exclusive-or, and multiplying a polynomial by x is a right shift by 294 one. If we call the above polynomial p, and represent a byte as the 295 polynomial q, also with the lowest power in the most significant bit (so the 296 byte 0xb1 is the polynomial x^7+x^3+x^2+1), then the CRC is (q*x^32) mod p, 297 where a mod b means the remainder after dividing a by b. 298 299 This calculation is done using the shift-register method of multiplying and 300 taking the remainder. The register is initialized to zero, and for each 301 incoming bit, x^32 is added mod p to the register if the bit is a one (where 302 x^32 mod p is p+x^32 = x^26+...+1), and the register is multiplied mod p by x 303 (which is shifting right by one and adding x^32 mod p if the bit shifted out 304 is a one). We start with the highest power (least significant bit) of q and 305 repeat for all eight bits of q. 306 307 The table is simply the CRC of all possible eight bit values. This is all the 308 information needed to generate CRCs on data a byte at a time for all 309 combinations of CRC register values and incoming bytes. 310 */ 311 312 local void make_crc_table(void) { 313 unsigned i, j, n; 314 z_crc_t p; 315 316 /* initialize the CRC of bytes tables */ 317 for (i = 0; i < 256; i++) { 318 p = i; 319 for (j = 0; j < 8; j++) 320 p = p & 1 ? (p >> 1) ^ POLY : p >> 1; 321 crc_table[i] = p; 322 #ifdef W 323 crc_big_table[i] = byte_swap(p); 324 #endif 325 } 326 327 /* initialize the x^2^n mod p(x) table */ 328 p = (z_crc_t)1 << 30; /* x^1 */ 329 x2n_table[0] = p; 330 for (n = 1; n < 32; n++) 331 x2n_table[n] = p = multmodp(p, p); 332 333 #ifdef W 334 /* initialize the braiding tables -- needs x2n_table[] */ 335 braid(crc_braid_table, crc_braid_big_table, N, W); 336 #endif 337 338 #ifdef MAKECRCH 339 { 340 /* 341 The crc32.h header file contains tables for both 32-bit and 64-bit 342 z_word_t's, and so requires a 64-bit type be available. In that case, 343 z_word_t must be defined to be 64-bits. This code then also generates 344 and writes out the tables for the case that z_word_t is 32 bits. 345 */ 346 #if !defined(W) || W != 8 347 # error Need a 64-bit integer type in order to generate crc32.h. 348 #endif 349 FILE *out; 350 int k, n; 351 z_crc_t ltl[8][256]; 352 z_word_t big[8][256]; 353 354 out = fopen("crc32.h", "w"); 355 if (out == NULL) return; 356 357 /* write out little-endian CRC table to crc32.h */ 358 fprintf(out, 359 "/* crc32.h -- tables for rapid CRC calculation\n" 360 " * Generated automatically by crc32.c\n */\n" 361 "\n" 362 "local const z_crc_t FAR crc_table[] = {\n" 363 " "); 364 write_table(out, crc_table, 256); 365 fprintf(out, 366 "};\n"); 367 368 /* write out big-endian CRC table for 64-bit z_word_t to crc32.h */ 369 fprintf(out, 370 "\n" 371 "#ifdef W\n" 372 "\n" 373 "#if W == 8\n" 374 "\n" 375 "local const z_word_t FAR crc_big_table[] = {\n" 376 " "); 377 write_table64(out, crc_big_table, 256); 378 fprintf(out, 379 "};\n"); 380 381 /* write out big-endian CRC table for 32-bit z_word_t to crc32.h */ 382 fprintf(out, 383 "\n" 384 "#else /* W == 4 */\n" 385 "\n" 386 "local const z_word_t FAR crc_big_table[] = {\n" 387 " "); 388 write_table32hi(out, crc_big_table, 256); 389 fprintf(out, 390 "};\n" 391 "\n" 392 "#endif\n"); 393 394 /* write out braid tables for each value of N */ 395 for (n = 1; n <= 6; n++) { 396 fprintf(out, 397 "\n" 398 "#if N == %d\n", n); 399 400 /* compute braid tables for this N and 64-bit word_t */ 401 braid(ltl, big, n, 8); 402 403 /* write out braid tables for 64-bit z_word_t to crc32.h */ 404 fprintf(out, 405 "\n" 406 "#if W == 8\n" 407 "\n" 408 "local const z_crc_t FAR crc_braid_table[][256] = {\n"); 409 for (k = 0; k < 8; k++) { 410 fprintf(out, " {"); 411 write_table(out, ltl[k], 256); 412 fprintf(out, "}%s", k < 7 ? ",\n" : ""); 413 } 414 fprintf(out, 415 "};\n" 416 "\n" 417 "local const z_word_t FAR crc_braid_big_table[][256] = {\n"); 418 for (k = 0; k < 8; k++) { 419 fprintf(out, " {"); 420 write_table64(out, big[k], 256); 421 fprintf(out, "}%s", k < 7 ? ",\n" : ""); 422 } 423 fprintf(out, 424 "};\n"); 425 426 /* compute braid tables for this N and 32-bit word_t */ 427 braid(ltl, big, n, 4); 428 429 /* write out braid tables for 32-bit z_word_t to crc32.h */ 430 fprintf(out, 431 "\n" 432 "#else /* W == 4 */\n" 433 "\n" 434 "local const z_crc_t FAR crc_braid_table[][256] = {\n"); 435 for (k = 0; k < 4; k++) { 436 fprintf(out, " {"); 437 write_table(out, ltl[k], 256); 438 fprintf(out, "}%s", k < 3 ? ",\n" : ""); 439 } 440 fprintf(out, 441 "};\n" 442 "\n" 443 "local const z_word_t FAR crc_braid_big_table[][256] = {\n"); 444 for (k = 0; k < 4; k++) { 445 fprintf(out, " {"); 446 write_table32hi(out, big[k], 256); 447 fprintf(out, "}%s", k < 3 ? ",\n" : ""); 448 } 449 fprintf(out, 450 "};\n" 451 "\n" 452 "#endif\n" 453 "\n" 454 "#endif\n"); 455 } 456 fprintf(out, 457 "\n" 458 "#endif\n"); 459 460 /* write out zeros operator table to crc32.h */ 461 fprintf(out, 462 "\n" 463 "local const z_crc_t FAR x2n_table[] = {\n" 464 " "); 465 write_table(out, x2n_table, 32); 466 fprintf(out, 467 "};\n"); 468 fclose(out); 469 } 470 #endif /* MAKECRCH */ 471 } 472 473 #ifdef MAKECRCH 474 475 /* 476 Write the 32-bit values in table[0..k-1] to out, five per line in 477 hexadecimal separated by commas. 478 */ 479 local void write_table(FILE *out, const z_crc_t FAR *table, int k) { 480 int n; 481 482 for (n = 0; n < k; n++) 483 fprintf(out, "%s0x%08lx%s", n == 0 || n % 5 ? "" : " ", 484 (unsigned long)(table[n]), 485 n == k - 1 ? "" : (n % 5 == 4 ? ",\n" : ", ")); 486 } 487 488 /* 489 Write the high 32-bits of each value in table[0..k-1] to out, five per line 490 in hexadecimal separated by commas. 491 */ 492 local void write_table32hi(FILE *out, const z_word_t FAR *table, int k) { 493 int n; 494 495 for (n = 0; n < k; n++) 496 fprintf(out, "%s0x%08lx%s", n == 0 || n % 5 ? "" : " ", 497 (unsigned long)(table[n] >> 32), 498 n == k - 1 ? "" : (n % 5 == 4 ? ",\n" : ", ")); 499 } 500 501 /* 502 Write the 64-bit values in table[0..k-1] to out, three per line in 503 hexadecimal separated by commas. This assumes that if there is a 64-bit 504 type, then there is also a long long integer type, and it is at least 64 505 bits. If not, then the type cast and format string can be adjusted 506 accordingly. 507 */ 508 local void write_table64(FILE *out, const z_word_t FAR *table, int k) { 509 int n; 510 511 for (n = 0; n < k; n++) 512 fprintf(out, "%s0x%016llx%s", n == 0 || n % 3 ? "" : " ", 513 (unsigned long long)(table[n]), 514 n == k - 1 ? "" : (n % 3 == 2 ? ",\n" : ", ")); 515 } 516 517 /* Actually do the deed. */ 518 int main(void) { 519 make_crc_table(); 520 return 0; 521 } 522 523 #endif /* MAKECRCH */ 524 525 #ifdef W 526 /* 527 Generate the little and big-endian braid tables for the given n and z_word_t 528 size w. Each array must have room for w blocks of 256 elements. 529 */ 530 local void braid(z_crc_t ltl[][256], z_word_t big[][256], int n, int w) { 531 int k; 532 z_crc_t i, p, q; 533 for (k = 0; k < w; k++) { 534 p = x2nmodp((n * w + 3 - k) << 3, 0); 535 ltl[k][0] = 0; 536 big[w - 1 - k][0] = 0; 537 for (i = 1; i < 256; i++) { 538 ltl[k][i] = q = multmodp(i << 24, p); 539 big[w - 1 - k][i] = byte_swap(q); 540 } 541 } 542 } 543 #endif 544 545 #endif /* DYNAMIC_CRC_TABLE */ 546 547 /* ========================================================================= 548 * This function can be used by asm versions of crc32(), and to force the 549 * generation of the CRC tables in a threaded application. 550 */ 551 const z_crc_t FAR * ZEXPORT get_crc_table(void) { 552 #ifdef DYNAMIC_CRC_TABLE 553 once(&made, make_crc_table); 554 #endif /* DYNAMIC_CRC_TABLE */ 555 return (const z_crc_t FAR *)crc_table; 556 } 557 558 /* ========================================================================= 559 * Use ARM machine instructions if available. This will compute the CRC about 560 * ten times faster than the braided calculation. This code does not check for 561 * the presence of the CRC instruction at run time. __ARM_FEATURE_CRC32 will 562 * only be defined if the compilation specifies an ARM processor architecture 563 * that has the instructions. For example, compiling with -march=armv8.1-a or 564 * -march=armv8-a+crc, or -march=native if the compile machine has the crc32 565 * instructions. 566 */ 567 #ifdef ARMCRC32 568 569 /* 570 Constants empirically determined to maximize speed. These values are from 571 measurements on a Cortex-A57. Your mileage may vary. 572 */ 573 #define Z_BATCH 3990 /* number of words in a batch */ 574 #define Z_BATCH_ZEROS 0xa10d3d0c /* computed from Z_BATCH = 3990 */ 575 #define Z_BATCH_MIN 800 /* fewest words in a final batch */ 576 577 unsigned long ZEXPORT crc32_z(unsigned long crc, const unsigned char FAR *buf, 578 z_size_t len) { 579 z_crc_t val; 580 z_word_t crc1, crc2; 581 const z_word_t *word; 582 z_word_t val0, val1, val2; 583 z_size_t last, last2, i; 584 z_size_t num; 585 586 /* Return initial CRC, if requested. */ 587 if (buf == Z_NULL) return 0; 588 589 #ifdef DYNAMIC_CRC_TABLE 590 once(&made, make_crc_table); 591 #endif /* DYNAMIC_CRC_TABLE */ 592 593 /* Pre-condition the CRC */ 594 crc = (~crc) & 0xffffffff; 595 596 /* Compute the CRC up to a word boundary. */ 597 while (len && ((z_size_t)buf & 7) != 0) { 598 len--; 599 val = *buf++; 600 __asm__ volatile("crc32b %w0, %w0, %w1" : "+r"(crc) : "r"(val)); 601 } 602 603 /* Prepare to compute the CRC on full 64-bit words word[0..num-1]. */ 604 word = (z_word_t const *)buf; 605 num = len >> 3; 606 len &= 7; 607 608 /* Do three interleaved CRCs to realize the throughput of one crc32x 609 instruction per cycle. Each CRC is calculated on Z_BATCH words. The 610 three CRCs are combined into a single CRC after each set of batches. */ 611 while (num >= 3 * Z_BATCH) { 612 crc1 = 0; 613 crc2 = 0; 614 for (i = 0; i < Z_BATCH; i++) { 615 val0 = word[i]; 616 val1 = word[i + Z_BATCH]; 617 val2 = word[i + 2 * Z_BATCH]; 618 __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc) : "r"(val0)); 619 __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc1) : "r"(val1)); 620 __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc2) : "r"(val2)); 621 } 622 word += 3 * Z_BATCH; 623 num -= 3 * Z_BATCH; 624 crc = multmodp(Z_BATCH_ZEROS, crc) ^ crc1; 625 crc = multmodp(Z_BATCH_ZEROS, crc) ^ crc2; 626 } 627 628 /* Do one last smaller batch with the remaining words, if there are enough 629 to pay for the combination of CRCs. */ 630 last = num / 3; 631 if (last >= Z_BATCH_MIN) { 632 last2 = last << 1; 633 crc1 = 0; 634 crc2 = 0; 635 for (i = 0; i < last; i++) { 636 val0 = word[i]; 637 val1 = word[i + last]; 638 val2 = word[i + last2]; 639 __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc) : "r"(val0)); 640 __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc1) : "r"(val1)); 641 __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc2) : "r"(val2)); 642 } 643 word += 3 * last; 644 num -= 3 * last; 645 val = x2nmodp(last, 6); 646 crc = multmodp(val, crc) ^ crc1; 647 crc = multmodp(val, crc) ^ crc2; 648 } 649 650 /* Compute the CRC on any remaining words. */ 651 for (i = 0; i < num; i++) { 652 val0 = word[i]; 653 __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc) : "r"(val0)); 654 } 655 word += num; 656 657 /* Complete the CRC on any remaining bytes. */ 658 buf = (const unsigned char FAR *)word; 659 while (len) { 660 len--; 661 val = *buf++; 662 __asm__ volatile("crc32b %w0, %w0, %w1" : "+r"(crc) : "r"(val)); 663 } 664 665 /* Return the CRC, post-conditioned. */ 666 return crc ^ 0xffffffff; 667 } 668 669 #else 670 671 #ifdef W 672 673 /* 674 Return the CRC of the W bytes in the word_t data, taking the 675 least-significant byte of the word as the first byte of data, without any pre 676 or post conditioning. This is used to combine the CRCs of each braid. 677 */ 678 local z_crc_t crc_word(z_word_t data) { 679 int k; 680 for (k = 0; k < W; k++) 681 data = (data >> 8) ^ crc_table[data & 0xff]; 682 return (z_crc_t)data; 683 } 684 685 local z_word_t crc_word_big(z_word_t data) { 686 int k; 687 for (k = 0; k < W; k++) 688 data = (data << 8) ^ 689 crc_big_table[(data >> ((W - 1) << 3)) & 0xff]; 690 return data; 691 } 692 693 #endif 694 695 /* ========================================================================= */ 696 unsigned long ZEXPORT crc32_z(unsigned long crc, const unsigned char FAR *buf, 697 z_size_t len) { 698 /* Return initial CRC, if requested. */ 699 if (buf == Z_NULL) return 0; 700 701 #ifdef DYNAMIC_CRC_TABLE 702 once(&made, make_crc_table); 703 #endif /* DYNAMIC_CRC_TABLE */ 704 705 /* Pre-condition the CRC */ 706 crc = (~crc) & 0xffffffff; 707 708 #ifdef W 709 710 /* If provided enough bytes, do a braided CRC calculation. */ 711 if (len >= N * W + W - 1) { 712 z_size_t blks; 713 z_word_t const *words; 714 unsigned endian; 715 int k; 716 717 /* Compute the CRC up to a z_word_t boundary. */ 718 while (len && ((z_size_t)buf & (W - 1)) != 0) { 719 len--; 720 crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff]; 721 } 722 723 /* Compute the CRC on as many N z_word_t blocks as are available. */ 724 blks = len / (N * W); 725 len -= blks * N * W; 726 words = (z_word_t const *)buf; 727 728 /* Do endian check at execution time instead of compile time, since ARM 729 processors can change the endianness at execution time. If the 730 compiler knows what the endianness will be, it can optimize out the 731 check and the unused branch. */ 732 endian = 1; 733 if (*(unsigned char *)&endian) { 734 /* Little endian. */ 735 736 z_crc_t crc0; 737 z_word_t word0; 738 #if N > 1 739 z_crc_t crc1; 740 z_word_t word1; 741 #if N > 2 742 z_crc_t crc2; 743 z_word_t word2; 744 #if N > 3 745 z_crc_t crc3; 746 z_word_t word3; 747 #if N > 4 748 z_crc_t crc4; 749 z_word_t word4; 750 #if N > 5 751 z_crc_t crc5; 752 z_word_t word5; 753 #endif 754 #endif 755 #endif 756 #endif 757 #endif 758 759 /* Initialize the CRC for each braid. */ 760 crc0 = crc; 761 #if N > 1 762 crc1 = 0; 763 #if N > 2 764 crc2 = 0; 765 #if N > 3 766 crc3 = 0; 767 #if N > 4 768 crc4 = 0; 769 #if N > 5 770 crc5 = 0; 771 #endif 772 #endif 773 #endif 774 #endif 775 #endif 776 777 /* 778 Process the first blks-1 blocks, computing the CRCs on each braid 779 independently. 780 */ 781 while (--blks) { 782 /* Load the word for each braid into registers. */ 783 word0 = crc0 ^ words[0]; 784 #if N > 1 785 word1 = crc1 ^ words[1]; 786 #if N > 2 787 word2 = crc2 ^ words[2]; 788 #if N > 3 789 word3 = crc3 ^ words[3]; 790 #if N > 4 791 word4 = crc4 ^ words[4]; 792 #if N > 5 793 word5 = crc5 ^ words[5]; 794 #endif 795 #endif 796 #endif 797 #endif 798 #endif 799 words += N; 800 801 /* Compute and update the CRC for each word. The loop should 802 get unrolled. */ 803 crc0 = crc_braid_table[0][word0 & 0xff]; 804 #if N > 1 805 crc1 = crc_braid_table[0][word1 & 0xff]; 806 #if N > 2 807 crc2 = crc_braid_table[0][word2 & 0xff]; 808 #if N > 3 809 crc3 = crc_braid_table[0][word3 & 0xff]; 810 #if N > 4 811 crc4 = crc_braid_table[0][word4 & 0xff]; 812 #if N > 5 813 crc5 = crc_braid_table[0][word5 & 0xff]; 814 #endif 815 #endif 816 #endif 817 #endif 818 #endif 819 for (k = 1; k < W; k++) { 820 crc0 ^= crc_braid_table[k][(word0 >> (k << 3)) & 0xff]; 821 #if N > 1 822 crc1 ^= crc_braid_table[k][(word1 >> (k << 3)) & 0xff]; 823 #if N > 2 824 crc2 ^= crc_braid_table[k][(word2 >> (k << 3)) & 0xff]; 825 #if N > 3 826 crc3 ^= crc_braid_table[k][(word3 >> (k << 3)) & 0xff]; 827 #if N > 4 828 crc4 ^= crc_braid_table[k][(word4 >> (k << 3)) & 0xff]; 829 #if N > 5 830 crc5 ^= crc_braid_table[k][(word5 >> (k << 3)) & 0xff]; 831 #endif 832 #endif 833 #endif 834 #endif 835 #endif 836 } 837 } 838 839 /* 840 Process the last block, combining the CRCs of the N braids at the 841 same time. 842 */ 843 crc = crc_word(crc0 ^ words[0]); 844 #if N > 1 845 crc = crc_word(crc1 ^ words[1] ^ crc); 846 #if N > 2 847 crc = crc_word(crc2 ^ words[2] ^ crc); 848 #if N > 3 849 crc = crc_word(crc3 ^ words[3] ^ crc); 850 #if N > 4 851 crc = crc_word(crc4 ^ words[4] ^ crc); 852 #if N > 5 853 crc = crc_word(crc5 ^ words[5] ^ crc); 854 #endif 855 #endif 856 #endif 857 #endif 858 #endif 859 words += N; 860 } 861 else { 862 /* Big endian. */ 863 864 z_word_t crc0, word0, comb; 865 #if N > 1 866 z_word_t crc1, word1; 867 #if N > 2 868 z_word_t crc2, word2; 869 #if N > 3 870 z_word_t crc3, word3; 871 #if N > 4 872 z_word_t crc4, word4; 873 #if N > 5 874 z_word_t crc5, word5; 875 #endif 876 #endif 877 #endif 878 #endif 879 #endif 880 881 /* Initialize the CRC for each braid. */ 882 crc0 = byte_swap(crc); 883 #if N > 1 884 crc1 = 0; 885 #if N > 2 886 crc2 = 0; 887 #if N > 3 888 crc3 = 0; 889 #if N > 4 890 crc4 = 0; 891 #if N > 5 892 crc5 = 0; 893 #endif 894 #endif 895 #endif 896 #endif 897 #endif 898 899 /* 900 Process the first blks-1 blocks, computing the CRCs on each braid 901 independently. 902 */ 903 while (--blks) { 904 /* Load the word for each braid into registers. */ 905 word0 = crc0 ^ words[0]; 906 #if N > 1 907 word1 = crc1 ^ words[1]; 908 #if N > 2 909 word2 = crc2 ^ words[2]; 910 #if N > 3 911 word3 = crc3 ^ words[3]; 912 #if N > 4 913 word4 = crc4 ^ words[4]; 914 #if N > 5 915 word5 = crc5 ^ words[5]; 916 #endif 917 #endif 918 #endif 919 #endif 920 #endif 921 words += N; 922 923 /* Compute and update the CRC for each word. The loop should 924 get unrolled. */ 925 crc0 = crc_braid_big_table[0][word0 & 0xff]; 926 #if N > 1 927 crc1 = crc_braid_big_table[0][word1 & 0xff]; 928 #if N > 2 929 crc2 = crc_braid_big_table[0][word2 & 0xff]; 930 #if N > 3 931 crc3 = crc_braid_big_table[0][word3 & 0xff]; 932 #if N > 4 933 crc4 = crc_braid_big_table[0][word4 & 0xff]; 934 #if N > 5 935 crc5 = crc_braid_big_table[0][word5 & 0xff]; 936 #endif 937 #endif 938 #endif 939 #endif 940 #endif 941 for (k = 1; k < W; k++) { 942 crc0 ^= crc_braid_big_table[k][(word0 >> (k << 3)) & 0xff]; 943 #if N > 1 944 crc1 ^= crc_braid_big_table[k][(word1 >> (k << 3)) & 0xff]; 945 #if N > 2 946 crc2 ^= crc_braid_big_table[k][(word2 >> (k << 3)) & 0xff]; 947 #if N > 3 948 crc3 ^= crc_braid_big_table[k][(word3 >> (k << 3)) & 0xff]; 949 #if N > 4 950 crc4 ^= crc_braid_big_table[k][(word4 >> (k << 3)) & 0xff]; 951 #if N > 5 952 crc5 ^= crc_braid_big_table[k][(word5 >> (k << 3)) & 0xff]; 953 #endif 954 #endif 955 #endif 956 #endif 957 #endif 958 } 959 } 960 961 /* 962 Process the last block, combining the CRCs of the N braids at the 963 same time. 964 */ 965 comb = crc_word_big(crc0 ^ words[0]); 966 #if N > 1 967 comb = crc_word_big(crc1 ^ words[1] ^ comb); 968 #if N > 2 969 comb = crc_word_big(crc2 ^ words[2] ^ comb); 970 #if N > 3 971 comb = crc_word_big(crc3 ^ words[3] ^ comb); 972 #if N > 4 973 comb = crc_word_big(crc4 ^ words[4] ^ comb); 974 #if N > 5 975 comb = crc_word_big(crc5 ^ words[5] ^ comb); 976 #endif 977 #endif 978 #endif 979 #endif 980 #endif 981 words += N; 982 crc = byte_swap(comb); 983 } 984 985 /* 986 Update the pointer to the remaining bytes to process. 987 */ 988 buf = (unsigned char const *)words; 989 } 990 991 #endif /* W */ 992 993 /* Complete the computation of the CRC on any remaining bytes. */ 994 while (len >= 8) { 995 len -= 8; 996 crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff]; 997 crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff]; 998 crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff]; 999 crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff]; 1000 crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff]; 1001 crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff]; 1002 crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff]; 1003 crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff]; 1004 } 1005 while (len) { 1006 len--; 1007 crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff]; 1008 } 1009 1010 /* Return the CRC, post-conditioned. */ 1011 return crc ^ 0xffffffff; 1012 } 1013 1014 #endif 1015 1016 /* ========================================================================= */ 1017 unsigned long ZEXPORT crc32(unsigned long crc, const unsigned char FAR *buf, 1018 uInt len) { 1019 return crc32_z(crc, buf, len); 1020 } 1021 1022 /* ========================================================================= */ 1023 uLong ZEXPORT crc32_combine64(uLong crc1, uLong crc2, z_off64_t len2) { 1024 #ifdef DYNAMIC_CRC_TABLE 1025 once(&made, make_crc_table); 1026 #endif /* DYNAMIC_CRC_TABLE */ 1027 return multmodp(x2nmodp(len2, 3), crc1) ^ (crc2 & 0xffffffff); 1028 } 1029 1030 /* ========================================================================= */ 1031 uLong ZEXPORT crc32_combine(uLong crc1, uLong crc2, z_off_t len2) { 1032 return crc32_combine64(crc1, crc2, (z_off64_t)len2); 1033 } 1034 1035 /* ========================================================================= */ 1036 uLong ZEXPORT crc32_combine_gen64(z_off64_t len2) { 1037 #ifdef DYNAMIC_CRC_TABLE 1038 once(&made, make_crc_table); 1039 #endif /* DYNAMIC_CRC_TABLE */ 1040 return x2nmodp(len2, 3); 1041 } 1042 1043 /* ========================================================================= */ 1044 uLong ZEXPORT crc32_combine_gen(z_off_t len2) { 1045 return crc32_combine_gen64((z_off64_t)len2); 1046 } 1047 1048 /* ========================================================================= */ 1049 uLong ZEXPORT crc32_combine_op(uLong crc1, uLong crc2, uLong op) { 1050 return multmodp(op, crc1) ^ (crc2 & 0xffffffff); 1051 } 1052