1 /* crc32.c -- compute the CRC-32 of a data stream 2 * Copyright (C) 1995-2022 Mark Adler 3 * For conditions of distribution and use, see copyright notice in zlib.h 4 * 5 * This interleaved implementation of a CRC makes use of pipelined multiple 6 * arithmetic-logic units, commonly found in modern CPU cores. It is due to 7 * Kadatch and Jenkins (2010). See doc/crc-doc.1.0.pdf in this distribution. 8 */ 9 10 /* 11 Note on the use of DYNAMIC_CRC_TABLE: there is no mutex or semaphore 12 protection on the static variables used to control the first-use generation 13 of the crc tables. Therefore, if you #define DYNAMIC_CRC_TABLE, you should 14 first call get_crc_table() to initialize the tables before allowing more than 15 one thread to use crc32(). 16 17 MAKECRCH can be #defined to write out crc32.h. A main() routine is also 18 produced, so that this one source file can be compiled to an executable. 19 */ 20 21 #ifdef MAKECRCH 22 # include <stdio.h> 23 # ifndef DYNAMIC_CRC_TABLE 24 # define DYNAMIC_CRC_TABLE 25 # endif /* !DYNAMIC_CRC_TABLE */ 26 #endif /* MAKECRCH */ 27 28 #include "zutil.h" /* for Z_U4, Z_U8, z_crc_t, and FAR definitions */ 29 30 /* 31 A CRC of a message is computed on N braids of words in the message, where 32 each word consists of W bytes (4 or 8). If N is 3, for example, then three 33 running sparse CRCs are calculated respectively on each braid, at these 34 indices in the array of words: 0, 3, 6, ..., 1, 4, 7, ..., and 2, 5, 8, ... 35 This is done starting at a word boundary, and continues until as many blocks 36 of N * W bytes as are available have been processed. The results are combined 37 into a single CRC at the end. For this code, N must be in the range 1..6 and 38 W must be 4 or 8. The upper limit on N can be increased if desired by adding 39 more #if blocks, extending the patterns apparent in the code. In addition, 40 crc32.h would need to be regenerated, if the maximum N value is increased. 41 42 N and W are chosen empirically by benchmarking the execution time on a given 43 processor. The choices for N and W below were based on testing on Intel Kaby 44 Lake i7, AMD Ryzen 7, ARM Cortex-A57, Sparc64-VII, PowerPC POWER9, and MIPS64 45 Octeon II processors. The Intel, AMD, and ARM processors were all fastest 46 with N=5, W=8. The Sparc, PowerPC, and MIPS64 were all fastest at N=5, W=4. 47 They were all tested with either gcc or clang, all using the -O3 optimization 48 level. Your mileage may vary. 49 */ 50 51 /* Define N */ 52 #ifdef Z_TESTN 53 # define N Z_TESTN 54 #else 55 # define N 5 56 #endif 57 #if N < 1 || N > 6 58 # error N must be in 1..6 59 #endif 60 61 /* 62 z_crc_t must be at least 32 bits. z_word_t must be at least as long as 63 z_crc_t. It is assumed here that z_word_t is either 32 bits or 64 bits, and 64 that bytes are eight bits. 65 */ 66 67 /* 68 Define W and the associated z_word_t type. If W is not defined, then a 69 braided calculation is not used, and the associated tables and code are not 70 compiled. 71 */ 72 #ifdef Z_TESTW 73 # if Z_TESTW-1 != -1 74 # define W Z_TESTW 75 # endif 76 #else 77 # ifdef MAKECRCH 78 # define W 8 /* required for MAKECRCH */ 79 # else 80 # if defined(__x86_64__) || defined(__aarch64__) 81 # define W 8 82 # else 83 # define W 4 84 # endif 85 # endif 86 #endif 87 #ifdef W 88 # if W == 8 && defined(Z_U8) 89 typedef Z_U8 z_word_t; 90 # elif defined(Z_U4) 91 # undef W 92 # define W 4 93 typedef Z_U4 z_word_t; 94 # else 95 # undef W 96 # endif 97 #endif 98 99 /* If available, use the ARM processor CRC32 instruction. */ 100 #if defined(__aarch64__) && defined(__ARM_FEATURE_CRC32) && W == 8 101 # define ARMCRC32 102 #endif 103 104 /* Local functions. */ 105 local z_crc_t multmodp OF((z_crc_t a, z_crc_t b)); 106 local z_crc_t x2nmodp OF((z_off64_t n, unsigned k)); 107 108 #if defined(W) && (!defined(ARMCRC32) || defined(DYNAMIC_CRC_TABLE)) 109 local z_word_t byte_swap OF((z_word_t word)); 110 #endif 111 112 #if defined(W) && !defined(ARMCRC32) 113 local z_crc_t crc_word OF((z_word_t data)); 114 local z_word_t crc_word_big OF((z_word_t data)); 115 #endif 116 117 #if defined(W) && (!defined(ARMCRC32) || defined(DYNAMIC_CRC_TABLE)) 118 /* 119 Swap the bytes in a z_word_t to convert between little and big endian. Any 120 self-respecting compiler will optimize this to a single machine byte-swap 121 instruction, if one is available. This assumes that word_t is either 32 bits 122 or 64 bits. 123 */ 124 local z_word_t byte_swap(word) 125 z_word_t word; 126 { 127 # if W == 8 128 return 129 (word & 0xff00000000000000) >> 56 | 130 (word & 0xff000000000000) >> 40 | 131 (word & 0xff0000000000) >> 24 | 132 (word & 0xff00000000) >> 8 | 133 (word & 0xff000000) << 8 | 134 (word & 0xff0000) << 24 | 135 (word & 0xff00) << 40 | 136 (word & 0xff) << 56; 137 # else /* W == 4 */ 138 return 139 (word & 0xff000000) >> 24 | 140 (word & 0xff0000) >> 8 | 141 (word & 0xff00) << 8 | 142 (word & 0xff) << 24; 143 # endif 144 } 145 #endif 146 147 /* CRC polynomial. */ 148 #define POLY 0xedb88320 /* p(x) reflected, with x^32 implied */ 149 150 #ifdef DYNAMIC_CRC_TABLE 151 152 local z_crc_t FAR crc_table[256]; 153 local z_crc_t FAR x2n_table[32]; 154 local void make_crc_table OF((void)); 155 #ifdef W 156 local z_word_t FAR crc_big_table[256]; 157 local z_crc_t FAR crc_braid_table[W][256]; 158 local z_word_t FAR crc_braid_big_table[W][256]; 159 local void braid OF((z_crc_t [][256], z_word_t [][256], int, int)); 160 #endif 161 #ifdef MAKECRCH 162 local void write_table OF((FILE *, const z_crc_t FAR *, int)); 163 local void write_table32hi OF((FILE *, const z_word_t FAR *, int)); 164 local void write_table64 OF((FILE *, const z_word_t FAR *, int)); 165 #endif /* MAKECRCH */ 166 167 /* 168 Define a once() function depending on the availability of atomics. If this is 169 compiled with DYNAMIC_CRC_TABLE defined, and if CRCs will be computed in 170 multiple threads, and if atomics are not available, then get_crc_table() must 171 be called to initialize the tables and must return before any threads are 172 allowed to compute or combine CRCs. 173 */ 174 175 /* Definition of once functionality. */ 176 typedef struct once_s once_t; 177 local void once OF((once_t *, void (*)(void))); 178 179 /* Check for the availability of atomics. */ 180 #if defined(__STDC__) && __STDC_VERSION__ >= 201112L && \ 181 !defined(__STDC_NO_ATOMICS__) && !defined(SMALL) 182 183 #include <stdatomic.h> 184 185 /* Structure for once(), which must be initialized with ONCE_INIT. */ 186 struct once_s { 187 atomic_flag begun; 188 atomic_int done; 189 }; 190 #define ONCE_INIT {ATOMIC_FLAG_INIT, 0} 191 192 /* 193 Run the provided init() function exactly once, even if multiple threads 194 invoke once() at the same time. The state must be a once_t initialized with 195 ONCE_INIT. 196 */ 197 local void once(state, init) 198 once_t *state; 199 void (*init)(void); 200 { 201 if (!atomic_load(&state->done)) { 202 if (atomic_flag_test_and_set(&state->begun)) 203 while (!atomic_load(&state->done)) 204 ; 205 else { 206 init(); 207 atomic_store(&state->done, 1); 208 } 209 } 210 } 211 212 #else /* no atomics */ 213 214 /* Structure for once(), which must be initialized with ONCE_INIT. */ 215 struct once_s { 216 volatile int begun; 217 volatile int done; 218 }; 219 #define ONCE_INIT {0, 0} 220 221 /* Test and set. Alas, not atomic, but tries to minimize the period of 222 vulnerability. */ 223 local int test_and_set OF((int volatile *)); 224 local int test_and_set(flag) 225 int volatile *flag; 226 { 227 int was; 228 229 was = *flag; 230 *flag = 1; 231 return was; 232 } 233 234 /* Run the provided init() function once. This is not thread-safe. */ 235 local void once(state, init) 236 once_t *state; 237 void (*init)(void); 238 { 239 if (!state->done) { 240 if (test_and_set(&state->begun)) 241 while (!state->done) 242 ; 243 else { 244 init(); 245 state->done = 1; 246 } 247 } 248 } 249 250 #endif 251 252 /* State for once(). */ 253 local once_t made = ONCE_INIT; 254 255 /* 256 Generate tables for a byte-wise 32-bit CRC calculation on the polynomial: 257 x^32+x^26+x^23+x^22+x^16+x^12+x^11+x^10+x^8+x^7+x^5+x^4+x^2+x+1. 258 259 Polynomials over GF(2) are represented in binary, one bit per coefficient, 260 with the lowest powers in the most significant bit. Then adding polynomials 261 is just exclusive-or, and multiplying a polynomial by x is a right shift by 262 one. If we call the above polynomial p, and represent a byte as the 263 polynomial q, also with the lowest power in the most significant bit (so the 264 byte 0xb1 is the polynomial x^7+x^3+x^2+1), then the CRC is (q*x^32) mod p, 265 where a mod b means the remainder after dividing a by b. 266 267 This calculation is done using the shift-register method of multiplying and 268 taking the remainder. The register is initialized to zero, and for each 269 incoming bit, x^32 is added mod p to the register if the bit is a one (where 270 x^32 mod p is p+x^32 = x^26+...+1), and the register is multiplied mod p by x 271 (which is shifting right by one and adding x^32 mod p if the bit shifted out 272 is a one). We start with the highest power (least significant bit) of q and 273 repeat for all eight bits of q. 274 275 The table is simply the CRC of all possible eight bit values. This is all the 276 information needed to generate CRCs on data a byte at a time for all 277 combinations of CRC register values and incoming bytes. 278 */ 279 280 local void make_crc_table() 281 { 282 unsigned i, j, n; 283 z_crc_t p; 284 285 /* initialize the CRC of bytes tables */ 286 for (i = 0; i < 256; i++) { 287 p = i; 288 for (j = 0; j < 8; j++) 289 p = p & 1 ? (p >> 1) ^ POLY : p >> 1; 290 crc_table[i] = p; 291 #ifdef W 292 crc_big_table[i] = byte_swap(p); 293 #endif 294 } 295 296 /* initialize the x^2^n mod p(x) table */ 297 p = (z_crc_t)1 << 30; /* x^1 */ 298 x2n_table[0] = p; 299 for (n = 1; n < 32; n++) 300 x2n_table[n] = p = multmodp(p, p); 301 302 #ifdef W 303 /* initialize the braiding tables -- needs x2n_table[] */ 304 braid(crc_braid_table, crc_braid_big_table, N, W); 305 #endif 306 307 #ifdef MAKECRCH 308 { 309 /* 310 The crc32.h header file contains tables for both 32-bit and 64-bit 311 z_word_t's, and so requires a 64-bit type be available. In that case, 312 z_word_t must be defined to be 64-bits. This code then also generates 313 and writes out the tables for the case that z_word_t is 32 bits. 314 */ 315 #if !defined(W) || W != 8 316 # error Need a 64-bit integer type in order to generate crc32.h. 317 #endif 318 FILE *out; 319 int k, n; 320 z_crc_t ltl[8][256]; 321 z_word_t big[8][256]; 322 323 out = fopen("crc32.h", "w"); 324 if (out == NULL) return; 325 326 /* write out little-endian CRC table to crc32.h */ 327 fprintf(out, 328 "/* crc32.h -- tables for rapid CRC calculation\n" 329 " * Generated automatically by crc32.c\n */\n" 330 "\n" 331 "local const z_crc_t FAR crc_table[] = {\n" 332 " "); 333 write_table(out, crc_table, 256); 334 fprintf(out, 335 "};\n"); 336 337 /* write out big-endian CRC table for 64-bit z_word_t to crc32.h */ 338 fprintf(out, 339 "\n" 340 "#ifdef W\n" 341 "\n" 342 "#if W == 8\n" 343 "\n" 344 "local const z_word_t FAR crc_big_table[] = {\n" 345 " "); 346 write_table64(out, crc_big_table, 256); 347 fprintf(out, 348 "};\n"); 349 350 /* write out big-endian CRC table for 32-bit z_word_t to crc32.h */ 351 fprintf(out, 352 "\n" 353 "#else /* W == 4 */\n" 354 "\n" 355 "local const z_word_t FAR crc_big_table[] = {\n" 356 " "); 357 write_table32hi(out, crc_big_table, 256); 358 fprintf(out, 359 "};\n" 360 "\n" 361 "#endif\n"); 362 363 /* write out braid tables for each value of N */ 364 for (n = 1; n <= 6; n++) { 365 fprintf(out, 366 "\n" 367 "#if N == %d\n", n); 368 369 /* compute braid tables for this N and 64-bit word_t */ 370 braid(ltl, big, n, 8); 371 372 /* write out braid tables for 64-bit z_word_t to crc32.h */ 373 fprintf(out, 374 "\n" 375 "#if W == 8\n" 376 "\n" 377 "local const z_crc_t FAR crc_braid_table[][256] = {\n"); 378 for (k = 0; k < 8; k++) { 379 fprintf(out, " {"); 380 write_table(out, ltl[k], 256); 381 fprintf(out, "}%s", k < 7 ? ",\n" : ""); 382 } 383 fprintf(out, 384 "};\n" 385 "\n" 386 "local const z_word_t FAR crc_braid_big_table[][256] = {\n"); 387 for (k = 0; k < 8; k++) { 388 fprintf(out, " {"); 389 write_table64(out, big[k], 256); 390 fprintf(out, "}%s", k < 7 ? ",\n" : ""); 391 } 392 fprintf(out, 393 "};\n"); 394 395 /* compute braid tables for this N and 32-bit word_t */ 396 braid(ltl, big, n, 4); 397 398 /* write out braid tables for 32-bit z_word_t to crc32.h */ 399 fprintf(out, 400 "\n" 401 "#else /* W == 4 */\n" 402 "\n" 403 "local const z_crc_t FAR crc_braid_table[][256] = {\n"); 404 for (k = 0; k < 4; k++) { 405 fprintf(out, " {"); 406 write_table(out, ltl[k], 256); 407 fprintf(out, "}%s", k < 3 ? ",\n" : ""); 408 } 409 fprintf(out, 410 "};\n" 411 "\n" 412 "local const z_word_t FAR crc_braid_big_table[][256] = {\n"); 413 for (k = 0; k < 4; k++) { 414 fprintf(out, " {"); 415 write_table32hi(out, big[k], 256); 416 fprintf(out, "}%s", k < 3 ? ",\n" : ""); 417 } 418 fprintf(out, 419 "};\n" 420 "\n" 421 "#endif\n" 422 "\n" 423 "#endif\n"); 424 } 425 fprintf(out, 426 "\n" 427 "#endif\n"); 428 429 /* write out zeros operator table to crc32.h */ 430 fprintf(out, 431 "\n" 432 "local const z_crc_t FAR x2n_table[] = {\n" 433 " "); 434 write_table(out, x2n_table, 32); 435 fprintf(out, 436 "};\n"); 437 fclose(out); 438 } 439 #endif /* MAKECRCH */ 440 } 441 442 #ifdef MAKECRCH 443 444 /* 445 Write the 32-bit values in table[0..k-1] to out, five per line in 446 hexadecimal separated by commas. 447 */ 448 local void write_table(out, table, k) 449 FILE *out; 450 const z_crc_t FAR *table; 451 int k; 452 { 453 int n; 454 455 for (n = 0; n < k; n++) 456 fprintf(out, "%s0x%08lx%s", n == 0 || n % 5 ? "" : " ", 457 (unsigned long)(table[n]), 458 n == k - 1 ? "" : (n % 5 == 4 ? ",\n" : ", ")); 459 } 460 461 /* 462 Write the high 32-bits of each value in table[0..k-1] to out, five per line 463 in hexadecimal separated by commas. 464 */ 465 local void write_table32hi(out, table, k) 466 FILE *out; 467 const z_word_t FAR *table; 468 int k; 469 { 470 int n; 471 472 for (n = 0; n < k; n++) 473 fprintf(out, "%s0x%08lx%s", n == 0 || n % 5 ? "" : " ", 474 (unsigned long)(table[n] >> 32), 475 n == k - 1 ? "" : (n % 5 == 4 ? ",\n" : ", ")); 476 } 477 478 /* 479 Write the 64-bit values in table[0..k-1] to out, three per line in 480 hexadecimal separated by commas. This assumes that if there is a 64-bit 481 type, then there is also a long long integer type, and it is at least 64 482 bits. If not, then the type cast and format string can be adjusted 483 accordingly. 484 */ 485 local void write_table64(out, table, k) 486 FILE *out; 487 const z_word_t FAR *table; 488 int k; 489 { 490 int n; 491 492 for (n = 0; n < k; n++) 493 fprintf(out, "%s0x%016llx%s", n == 0 || n % 3 ? "" : " ", 494 (unsigned long long)(table[n]), 495 n == k - 1 ? "" : (n % 3 == 2 ? ",\n" : ", ")); 496 } 497 498 /* Actually do the deed. */ 499 int main() 500 { 501 make_crc_table(); 502 return 0; 503 } 504 505 #endif /* MAKECRCH */ 506 507 #ifdef W 508 /* 509 Generate the little and big-endian braid tables for the given n and z_word_t 510 size w. Each array must have room for w blocks of 256 elements. 511 */ 512 local void braid(ltl, big, n, w) 513 z_crc_t ltl[][256]; 514 z_word_t big[][256]; 515 int n; 516 int w; 517 { 518 int k; 519 z_crc_t i, p, q; 520 for (k = 0; k < w; k++) { 521 p = x2nmodp((n * w + 3 - k) << 3, 0); 522 ltl[k][0] = 0; 523 big[w - 1 - k][0] = 0; 524 for (i = 1; i < 256; i++) { 525 ltl[k][i] = q = multmodp(i << 24, p); 526 big[w - 1 - k][i] = byte_swap(q); 527 } 528 } 529 } 530 #endif 531 532 #else /* !DYNAMIC_CRC_TABLE */ 533 /* ======================================================================== 534 * Tables for byte-wise and braided CRC-32 calculations, and a table of powers 535 * of x for combining CRC-32s, all made by make_crc_table(). 536 */ 537 #include "crc32.h" 538 #endif /* DYNAMIC_CRC_TABLE */ 539 540 /* ======================================================================== 541 * Routines used for CRC calculation. Some are also required for the table 542 * generation above. 543 */ 544 545 /* 546 Return a(x) multiplied by b(x) modulo p(x), where p(x) is the CRC polynomial, 547 reflected. For speed, this requires that a not be zero. 548 */ 549 local z_crc_t multmodp(a, b) 550 z_crc_t a; 551 z_crc_t b; 552 { 553 z_crc_t m, p; 554 555 m = (z_crc_t)1 << 31; 556 p = 0; 557 for (;;) { 558 if (a & m) { 559 p ^= b; 560 if ((a & (m - 1)) == 0) 561 break; 562 } 563 m >>= 1; 564 b = b & 1 ? (b >> 1) ^ POLY : b >> 1; 565 } 566 return p; 567 } 568 569 /* 570 Return x^(n * 2^k) modulo p(x). Requires that x2n_table[] has been 571 initialized. 572 */ 573 local z_crc_t x2nmodp(n, k) 574 z_off64_t n; 575 unsigned k; 576 { 577 z_crc_t p; 578 579 p = (z_crc_t)1 << 31; /* x^0 == 1 */ 580 while (n) { 581 if (n & 1) 582 p = multmodp(x2n_table[k & 31], p); 583 n >>= 1; 584 k++; 585 } 586 return p; 587 } 588 589 /* ========================================================================= 590 * This function can be used by asm versions of crc32(), and to force the 591 * generation of the CRC tables in a threaded application. 592 */ 593 const z_crc_t FAR * ZEXPORT get_crc_table() 594 { 595 #ifdef DYNAMIC_CRC_TABLE 596 once(&made, make_crc_table); 597 #endif /* DYNAMIC_CRC_TABLE */ 598 return (const z_crc_t FAR *)crc_table; 599 } 600 601 /* ========================================================================= 602 * Use ARM machine instructions if available. This will compute the CRC about 603 * ten times faster than the braided calculation. This code does not check for 604 * the presence of the CRC instruction at run time. __ARM_FEATURE_CRC32 will 605 * only be defined if the compilation specifies an ARM processor architecture 606 * that has the instructions. For example, compiling with -march=armv8.1-a or 607 * -march=armv8-a+crc, or -march=native if the compile machine has the crc32 608 * instructions. 609 */ 610 #ifdef ARMCRC32 611 612 /* 613 Constants empirically determined to maximize speed. These values are from 614 measurements on a Cortex-A57. Your mileage may vary. 615 */ 616 #define Z_BATCH 3990 /* number of words in a batch */ 617 #define Z_BATCH_ZEROS 0xa10d3d0c /* computed from Z_BATCH = 3990 */ 618 #define Z_BATCH_MIN 800 /* fewest words in a final batch */ 619 620 unsigned long ZEXPORT crc32_z(crc, buf, len) 621 unsigned long crc; 622 const unsigned char FAR *buf; 623 z_size_t len; 624 { 625 z_crc_t val; 626 z_word_t crc1, crc2; 627 const z_word_t *word; 628 z_word_t val0, val1, val2; 629 z_size_t last, last2, i; 630 z_size_t num; 631 632 /* Return initial CRC, if requested. */ 633 if (buf == Z_NULL) return 0; 634 635 #ifdef DYNAMIC_CRC_TABLE 636 once(&made, make_crc_table); 637 #endif /* DYNAMIC_CRC_TABLE */ 638 639 /* Pre-condition the CRC */ 640 crc = (~crc) & 0xffffffff; 641 642 /* Compute the CRC up to a word boundary. */ 643 while (len && ((z_size_t)buf & 7) != 0) { 644 len--; 645 val = *buf++; 646 __asm__ volatile("crc32b %w0, %w0, %w1" : "+r"(crc) : "r"(val)); 647 } 648 649 /* Prepare to compute the CRC on full 64-bit words word[0..num-1]. */ 650 word = (z_word_t const *)buf; 651 num = len >> 3; 652 len &= 7; 653 654 /* Do three interleaved CRCs to realize the throughput of one crc32x 655 instruction per cycle. Each CRC is calculated on Z_BATCH words. The 656 three CRCs are combined into a single CRC after each set of batches. */ 657 while (num >= 3 * Z_BATCH) { 658 crc1 = 0; 659 crc2 = 0; 660 for (i = 0; i < Z_BATCH; i++) { 661 val0 = word[i]; 662 val1 = word[i + Z_BATCH]; 663 val2 = word[i + 2 * Z_BATCH]; 664 __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc) : "r"(val0)); 665 __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc1) : "r"(val1)); 666 __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc2) : "r"(val2)); 667 } 668 word += 3 * Z_BATCH; 669 num -= 3 * Z_BATCH; 670 crc = multmodp(Z_BATCH_ZEROS, crc) ^ crc1; 671 crc = multmodp(Z_BATCH_ZEROS, crc) ^ crc2; 672 } 673 674 /* Do one last smaller batch with the remaining words, if there are enough 675 to pay for the combination of CRCs. */ 676 last = num / 3; 677 if (last >= Z_BATCH_MIN) { 678 last2 = last << 1; 679 crc1 = 0; 680 crc2 = 0; 681 for (i = 0; i < last; i++) { 682 val0 = word[i]; 683 val1 = word[i + last]; 684 val2 = word[i + last2]; 685 __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc) : "r"(val0)); 686 __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc1) : "r"(val1)); 687 __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc2) : "r"(val2)); 688 } 689 word += 3 * last; 690 num -= 3 * last; 691 val = x2nmodp(last, 6); 692 crc = multmodp(val, crc) ^ crc1; 693 crc = multmodp(val, crc) ^ crc2; 694 } 695 696 /* Compute the CRC on any remaining words. */ 697 for (i = 0; i < num; i++) { 698 val0 = word[i]; 699 __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc) : "r"(val0)); 700 } 701 word += num; 702 703 /* Complete the CRC on any remaining bytes. */ 704 buf = (const unsigned char FAR *)word; 705 while (len) { 706 len--; 707 val = *buf++; 708 __asm__ volatile("crc32b %w0, %w0, %w1" : "+r"(crc) : "r"(val)); 709 } 710 711 /* Return the CRC, post-conditioned. */ 712 return crc ^ 0xffffffff; 713 } 714 715 #else 716 717 #ifdef W 718 719 /* 720 Return the CRC of the W bytes in the word_t data, taking the 721 least-significant byte of the word as the first byte of data, without any pre 722 or post conditioning. This is used to combine the CRCs of each braid. 723 */ 724 local z_crc_t crc_word(data) 725 z_word_t data; 726 { 727 int k; 728 for (k = 0; k < W; k++) 729 data = (data >> 8) ^ crc_table[data & 0xff]; 730 return (z_crc_t)data; 731 } 732 733 local z_word_t crc_word_big(data) 734 z_word_t data; 735 { 736 int k; 737 for (k = 0; k < W; k++) 738 data = (data << 8) ^ 739 crc_big_table[(data >> ((W - 1) << 3)) & 0xff]; 740 return data; 741 } 742 743 #endif 744 745 /* ========================================================================= */ 746 unsigned long ZEXPORT crc32_z(crc, buf, len) 747 unsigned long crc; 748 const unsigned char FAR *buf; 749 z_size_t len; 750 { 751 /* Return initial CRC, if requested. */ 752 if (buf == Z_NULL) return 0; 753 754 #ifdef DYNAMIC_CRC_TABLE 755 once(&made, make_crc_table); 756 #endif /* DYNAMIC_CRC_TABLE */ 757 758 /* Pre-condition the CRC */ 759 crc = (~crc) & 0xffffffff; 760 761 #ifdef W 762 763 /* If provided enough bytes, do a braided CRC calculation. */ 764 if (len >= N * W + W - 1) { 765 z_size_t blks; 766 z_word_t const *words; 767 unsigned endian; 768 int k; 769 770 /* Compute the CRC up to a z_word_t boundary. */ 771 while (len && ((z_size_t)buf & (W - 1)) != 0) { 772 len--; 773 crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff]; 774 } 775 776 /* Compute the CRC on as many N z_word_t blocks as are available. */ 777 blks = len / (N * W); 778 len -= blks * N * W; 779 words = (z_word_t const *)buf; 780 781 /* Do endian check at execution time instead of compile time, since ARM 782 processors can change the endianess at execution time. If the 783 compiler knows what the endianess will be, it can optimize out the 784 check and the unused branch. */ 785 endian = 1; 786 if (*(unsigned char *)&endian) { 787 /* Little endian. */ 788 789 z_crc_t crc0; 790 z_word_t word0; 791 #if N > 1 792 z_crc_t crc1; 793 z_word_t word1; 794 #if N > 2 795 z_crc_t crc2; 796 z_word_t word2; 797 #if N > 3 798 z_crc_t crc3; 799 z_word_t word3; 800 #if N > 4 801 z_crc_t crc4; 802 z_word_t word4; 803 #if N > 5 804 z_crc_t crc5; 805 z_word_t word5; 806 #endif 807 #endif 808 #endif 809 #endif 810 #endif 811 812 /* Initialize the CRC for each braid. */ 813 crc0 = crc; 814 #if N > 1 815 crc1 = 0; 816 #if N > 2 817 crc2 = 0; 818 #if N > 3 819 crc3 = 0; 820 #if N > 4 821 crc4 = 0; 822 #if N > 5 823 crc5 = 0; 824 #endif 825 #endif 826 #endif 827 #endif 828 #endif 829 830 /* 831 Process the first blks-1 blocks, computing the CRCs on each braid 832 independently. 833 */ 834 while (--blks) { 835 /* Load the word for each braid into registers. */ 836 word0 = crc0 ^ words[0]; 837 #if N > 1 838 word1 = crc1 ^ words[1]; 839 #if N > 2 840 word2 = crc2 ^ words[2]; 841 #if N > 3 842 word3 = crc3 ^ words[3]; 843 #if N > 4 844 word4 = crc4 ^ words[4]; 845 #if N > 5 846 word5 = crc5 ^ words[5]; 847 #endif 848 #endif 849 #endif 850 #endif 851 #endif 852 words += N; 853 854 /* Compute and update the CRC for each word. The loop should 855 get unrolled. */ 856 crc0 = crc_braid_table[0][word0 & 0xff]; 857 #if N > 1 858 crc1 = crc_braid_table[0][word1 & 0xff]; 859 #if N > 2 860 crc2 = crc_braid_table[0][word2 & 0xff]; 861 #if N > 3 862 crc3 = crc_braid_table[0][word3 & 0xff]; 863 #if N > 4 864 crc4 = crc_braid_table[0][word4 & 0xff]; 865 #if N > 5 866 crc5 = crc_braid_table[0][word5 & 0xff]; 867 #endif 868 #endif 869 #endif 870 #endif 871 #endif 872 for (k = 1; k < W; k++) { 873 crc0 ^= crc_braid_table[k][(word0 >> (k << 3)) & 0xff]; 874 #if N > 1 875 crc1 ^= crc_braid_table[k][(word1 >> (k << 3)) & 0xff]; 876 #if N > 2 877 crc2 ^= crc_braid_table[k][(word2 >> (k << 3)) & 0xff]; 878 #if N > 3 879 crc3 ^= crc_braid_table[k][(word3 >> (k << 3)) & 0xff]; 880 #if N > 4 881 crc4 ^= crc_braid_table[k][(word4 >> (k << 3)) & 0xff]; 882 #if N > 5 883 crc5 ^= crc_braid_table[k][(word5 >> (k << 3)) & 0xff]; 884 #endif 885 #endif 886 #endif 887 #endif 888 #endif 889 } 890 } 891 892 /* 893 Process the last block, combining the CRCs of the N braids at the 894 same time. 895 */ 896 crc = crc_word(crc0 ^ words[0]); 897 #if N > 1 898 crc = crc_word(crc1 ^ words[1] ^ crc); 899 #if N > 2 900 crc = crc_word(crc2 ^ words[2] ^ crc); 901 #if N > 3 902 crc = crc_word(crc3 ^ words[3] ^ crc); 903 #if N > 4 904 crc = crc_word(crc4 ^ words[4] ^ crc); 905 #if N > 5 906 crc = crc_word(crc5 ^ words[5] ^ crc); 907 #endif 908 #endif 909 #endif 910 #endif 911 #endif 912 words += N; 913 } 914 else { 915 /* Big endian. */ 916 917 z_word_t crc0, word0, comb; 918 #if N > 1 919 z_word_t crc1, word1; 920 #if N > 2 921 z_word_t crc2, word2; 922 #if N > 3 923 z_word_t crc3, word3; 924 #if N > 4 925 z_word_t crc4, word4; 926 #if N > 5 927 z_word_t crc5, word5; 928 #endif 929 #endif 930 #endif 931 #endif 932 #endif 933 934 /* Initialize the CRC for each braid. */ 935 crc0 = byte_swap(crc); 936 #if N > 1 937 crc1 = 0; 938 #if N > 2 939 crc2 = 0; 940 #if N > 3 941 crc3 = 0; 942 #if N > 4 943 crc4 = 0; 944 #if N > 5 945 crc5 = 0; 946 #endif 947 #endif 948 #endif 949 #endif 950 #endif 951 952 /* 953 Process the first blks-1 blocks, computing the CRCs on each braid 954 independently. 955 */ 956 while (--blks) { 957 /* Load the word for each braid into registers. */ 958 word0 = crc0 ^ words[0]; 959 #if N > 1 960 word1 = crc1 ^ words[1]; 961 #if N > 2 962 word2 = crc2 ^ words[2]; 963 #if N > 3 964 word3 = crc3 ^ words[3]; 965 #if N > 4 966 word4 = crc4 ^ words[4]; 967 #if N > 5 968 word5 = crc5 ^ words[5]; 969 #endif 970 #endif 971 #endif 972 #endif 973 #endif 974 words += N; 975 976 /* Compute and update the CRC for each word. The loop should 977 get unrolled. */ 978 crc0 = crc_braid_big_table[0][word0 & 0xff]; 979 #if N > 1 980 crc1 = crc_braid_big_table[0][word1 & 0xff]; 981 #if N > 2 982 crc2 = crc_braid_big_table[0][word2 & 0xff]; 983 #if N > 3 984 crc3 = crc_braid_big_table[0][word3 & 0xff]; 985 #if N > 4 986 crc4 = crc_braid_big_table[0][word4 & 0xff]; 987 #if N > 5 988 crc5 = crc_braid_big_table[0][word5 & 0xff]; 989 #endif 990 #endif 991 #endif 992 #endif 993 #endif 994 for (k = 1; k < W; k++) { 995 crc0 ^= crc_braid_big_table[k][(word0 >> (k << 3)) & 0xff]; 996 #if N > 1 997 crc1 ^= crc_braid_big_table[k][(word1 >> (k << 3)) & 0xff]; 998 #if N > 2 999 crc2 ^= crc_braid_big_table[k][(word2 >> (k << 3)) & 0xff]; 1000 #if N > 3 1001 crc3 ^= crc_braid_big_table[k][(word3 >> (k << 3)) & 0xff]; 1002 #if N > 4 1003 crc4 ^= crc_braid_big_table[k][(word4 >> (k << 3)) & 0xff]; 1004 #if N > 5 1005 crc5 ^= crc_braid_big_table[k][(word5 >> (k << 3)) & 0xff]; 1006 #endif 1007 #endif 1008 #endif 1009 #endif 1010 #endif 1011 } 1012 } 1013 1014 /* 1015 Process the last block, combining the CRCs of the N braids at the 1016 same time. 1017 */ 1018 comb = crc_word_big(crc0 ^ words[0]); 1019 #if N > 1 1020 comb = crc_word_big(crc1 ^ words[1] ^ comb); 1021 #if N > 2 1022 comb = crc_word_big(crc2 ^ words[2] ^ comb); 1023 #if N > 3 1024 comb = crc_word_big(crc3 ^ words[3] ^ comb); 1025 #if N > 4 1026 comb = crc_word_big(crc4 ^ words[4] ^ comb); 1027 #if N > 5 1028 comb = crc_word_big(crc5 ^ words[5] ^ comb); 1029 #endif 1030 #endif 1031 #endif 1032 #endif 1033 #endif 1034 words += N; 1035 crc = byte_swap(comb); 1036 } 1037 1038 /* 1039 Update the pointer to the remaining bytes to process. 1040 */ 1041 buf = (unsigned char const *)words; 1042 } 1043 1044 #endif /* W */ 1045 1046 /* Complete the computation of the CRC on any remaining bytes. */ 1047 while (len >= 8) { 1048 len -= 8; 1049 crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff]; 1050 crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff]; 1051 crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff]; 1052 crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff]; 1053 crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff]; 1054 crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff]; 1055 crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff]; 1056 crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff]; 1057 } 1058 while (len) { 1059 len--; 1060 crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff]; 1061 } 1062 1063 /* Return the CRC, post-conditioned. */ 1064 return crc ^ 0xffffffff; 1065 } 1066 1067 #endif 1068 1069 /* ========================================================================= */ 1070 unsigned long ZEXPORT crc32(crc, buf, len) 1071 unsigned long crc; 1072 const unsigned char FAR *buf; 1073 uInt len; 1074 { 1075 return crc32_z(crc, buf, len); 1076 } 1077 1078 /* ========================================================================= */ 1079 uLong ZEXPORT crc32_combine64(crc1, crc2, len2) 1080 uLong crc1; 1081 uLong crc2; 1082 z_off64_t len2; 1083 { 1084 #ifdef DYNAMIC_CRC_TABLE 1085 once(&made, make_crc_table); 1086 #endif /* DYNAMIC_CRC_TABLE */ 1087 return multmodp(x2nmodp(len2, 3), crc1) ^ (crc2 & 0xffffffff); 1088 } 1089 1090 /* ========================================================================= */ 1091 uLong ZEXPORT crc32_combine(crc1, crc2, len2) 1092 uLong crc1; 1093 uLong crc2; 1094 z_off_t len2; 1095 { 1096 return crc32_combine64(crc1, crc2, (z_off64_t)len2); 1097 } 1098 1099 /* ========================================================================= */ 1100 uLong ZEXPORT crc32_combine_gen64(len2) 1101 z_off64_t len2; 1102 { 1103 #ifdef DYNAMIC_CRC_TABLE 1104 once(&made, make_crc_table); 1105 #endif /* DYNAMIC_CRC_TABLE */ 1106 return x2nmodp(len2, 3); 1107 } 1108 1109 /* ========================================================================= */ 1110 uLong ZEXPORT crc32_combine_gen(len2) 1111 z_off_t len2; 1112 { 1113 return crc32_combine_gen64((z_off64_t)len2); 1114 } 1115 1116 /* ========================================================================= */ 1117 uLong ZEXPORT crc32_combine_op(crc1, crc2, op) 1118 uLong crc1; 1119 uLong crc2; 1120 uLong op; 1121 { 1122 return multmodp(op, crc1) ^ (crc2 & 0xffffffff); 1123 } 1124