1 /* crc32.c -- compute the CRC-32 of a data stream 2 * Copyright (C) 1995-2022 Mark Adler 3 * For conditions of distribution and use, see copyright notice in zlib.h 4 * 5 * This interleaved implementation of a CRC makes use of pipelined multiple 6 * arithmetic-logic units, commonly found in modern CPU cores. It is due to 7 * Kadatch and Jenkins (2010). See doc/crc-doc.1.0.pdf in this distribution. 8 */ 9 10 /* 11 Note on the use of DYNAMIC_CRC_TABLE: there is no mutex or semaphore 12 protection on the static variables used to control the first-use generation 13 of the crc tables. Therefore, if you #define DYNAMIC_CRC_TABLE, you should 14 first call get_crc_table() to initialize the tables before allowing more than 15 one thread to use crc32(). 16 17 MAKECRCH can be #defined to write out crc32.h. A main() routine is also 18 produced, so that this one source file can be compiled to an executable. 19 */ 20 21 #ifdef MAKECRCH 22 # include <stdio.h> 23 # ifndef DYNAMIC_CRC_TABLE 24 # define DYNAMIC_CRC_TABLE 25 # endif /* !DYNAMIC_CRC_TABLE */ 26 #endif /* MAKECRCH */ 27 28 #include "zutil.h" /* for Z_U4, Z_U8, z_crc_t, and FAR definitions */ 29 30 /* 31 A CRC of a message is computed on N braids of words in the message, where 32 each word consists of W bytes (4 or 8). If N is 3, for example, then three 33 running sparse CRCs are calculated respectively on each braid, at these 34 indices in the array of words: 0, 3, 6, ..., 1, 4, 7, ..., and 2, 5, 8, ... 35 This is done starting at a word boundary, and continues until as many blocks 36 of N * W bytes as are available have been processed. The results are combined 37 into a single CRC at the end. For this code, N must be in the range 1..6 and 38 W must be 4 or 8. The upper limit on N can be increased if desired by adding 39 more #if blocks, extending the patterns apparent in the code. In addition, 40 crc32.h would need to be regenerated, if the maximum N value is increased. 41 42 N and W are chosen empirically by benchmarking the execution time on a given 43 processor. The choices for N and W below were based on testing on Intel Kaby 44 Lake i7, AMD Ryzen 7, ARM Cortex-A57, Sparc64-VII, PowerPC POWER9, and MIPS64 45 Octeon II processors. The Intel, AMD, and ARM processors were all fastest 46 with N=5, W=8. The Sparc, PowerPC, and MIPS64 were all fastest at N=5, W=4. 47 They were all tested with either gcc or clang, all using the -O3 optimization 48 level. Your mileage may vary. 49 */ 50 51 /* Define N */ 52 #ifdef Z_TESTN 53 # define N Z_TESTN 54 #else 55 # define N 5 56 #endif 57 #if N < 1 || N > 6 58 # error N must be in 1..6 59 #endif 60 61 /* 62 z_crc_t must be at least 32 bits. z_word_t must be at least as long as 63 z_crc_t. It is assumed here that z_word_t is either 32 bits or 64 bits, and 64 that bytes are eight bits. 65 */ 66 67 /* 68 Define W and the associated z_word_t type. If W is not defined, then a 69 braided calculation is not used, and the associated tables and code are not 70 compiled. 71 */ 72 #ifdef Z_TESTW 73 # if Z_TESTW-1 != -1 74 # define W Z_TESTW 75 # endif 76 #else 77 # ifdef MAKECRCH 78 # define W 8 /* required for MAKECRCH */ 79 # else 80 # if defined(__x86_64__) || defined(__aarch64__) 81 # define W 8 82 # else 83 # define W 4 84 # endif 85 # endif 86 #endif 87 #ifdef W 88 # if W == 8 && defined(Z_U8) 89 typedef Z_U8 z_word_t; 90 # elif defined(Z_U4) 91 # undef W 92 # define W 4 93 typedef Z_U4 z_word_t; 94 # else 95 # undef W 96 # endif 97 #endif 98 99 /* If available, use the ARM processor CRC32 instruction. */ 100 #if defined(__aarch64__) && defined(__ARM_FEATURE_CRC32) && W == 8 101 # define ARMCRC32 102 #endif 103 104 #if defined(W) && (!defined(ARMCRC32) || defined(DYNAMIC_CRC_TABLE)) 105 /* 106 Swap the bytes in a z_word_t to convert between little and big endian. Any 107 self-respecting compiler will optimize this to a single machine byte-swap 108 instruction, if one is available. This assumes that word_t is either 32 bits 109 or 64 bits. 110 */ 111 local z_word_t byte_swap(z_word_t word) { 112 # if W == 8 113 return 114 (word & 0xff00000000000000) >> 56 | 115 (word & 0xff000000000000) >> 40 | 116 (word & 0xff0000000000) >> 24 | 117 (word & 0xff00000000) >> 8 | 118 (word & 0xff000000) << 8 | 119 (word & 0xff0000) << 24 | 120 (word & 0xff00) << 40 | 121 (word & 0xff) << 56; 122 # else /* W == 4 */ 123 return 124 (word & 0xff000000) >> 24 | 125 (word & 0xff0000) >> 8 | 126 (word & 0xff00) << 8 | 127 (word & 0xff) << 24; 128 # endif 129 } 130 #endif 131 132 #ifdef DYNAMIC_CRC_TABLE 133 /* ========================================================================= 134 * Table of powers of x for combining CRC-32s, filled in by make_crc_table() 135 * below. 136 */ 137 local z_crc_t FAR x2n_table[32]; 138 #else 139 /* ========================================================================= 140 * Tables for byte-wise and braided CRC-32 calculations, and a table of powers 141 * of x for combining CRC-32s, all made by make_crc_table(). 142 */ 143 # include "crc32.h" 144 #endif 145 146 /* CRC polynomial. */ 147 #define POLY 0xedb88320 /* p(x) reflected, with x^32 implied */ 148 149 /* 150 Return a(x) multiplied by b(x) modulo p(x), where p(x) is the CRC polynomial, 151 reflected. For speed, this requires that a not be zero. 152 */ 153 local z_crc_t multmodp(z_crc_t a, z_crc_t b) { 154 z_crc_t m, p; 155 156 m = (z_crc_t)1 << 31; 157 p = 0; 158 for (;;) { 159 if (a & m) { 160 p ^= b; 161 if ((a & (m - 1)) == 0) 162 break; 163 } 164 m >>= 1; 165 b = b & 1 ? (b >> 1) ^ POLY : b >> 1; 166 } 167 return p; 168 } 169 170 /* 171 Return x^(n * 2^k) modulo p(x). Requires that x2n_table[] has been 172 initialized. 173 */ 174 local z_crc_t x2nmodp(z_off64_t n, unsigned k) { 175 z_crc_t p; 176 177 p = (z_crc_t)1 << 31; /* x^0 == 1 */ 178 while (n) { 179 if (n & 1) 180 p = multmodp(x2n_table[k & 31], p); 181 n >>= 1; 182 k++; 183 } 184 return p; 185 } 186 187 #ifdef DYNAMIC_CRC_TABLE 188 /* ========================================================================= 189 * Build the tables for byte-wise and braided CRC-32 calculations, and a table 190 * of powers of x for combining CRC-32s. 191 */ 192 local z_crc_t FAR crc_table[256]; 193 #ifdef W 194 local z_word_t FAR crc_big_table[256]; 195 local z_crc_t FAR crc_braid_table[W][256]; 196 local z_word_t FAR crc_braid_big_table[W][256]; 197 local void braid(z_crc_t [][256], z_word_t [][256], int, int); 198 #endif 199 #ifdef MAKECRCH 200 local void write_table(FILE *, const z_crc_t FAR *, int); 201 local void write_table32hi(FILE *, const z_word_t FAR *, int); 202 local void write_table64(FILE *, const z_word_t FAR *, int); 203 #endif /* MAKECRCH */ 204 205 /* 206 Define a once() function depending on the availability of atomics. If this is 207 compiled with DYNAMIC_CRC_TABLE defined, and if CRCs will be computed in 208 multiple threads, and if atomics are not available, then get_crc_table() must 209 be called to initialize the tables and must return before any threads are 210 allowed to compute or combine CRCs. 211 */ 212 213 /* Definition of once functionality. */ 214 typedef struct once_s once_t; 215 216 /* Check for the availability of atomics. */ 217 #if defined(__STDC__) && __STDC_VERSION__ >= 201112L && \ 218 !defined(__STDC_NO_ATOMICS__) && !defined(SMALL) 219 220 #include <stdatomic.h> 221 222 /* Structure for once(), which must be initialized with ONCE_INIT. */ 223 struct once_s { 224 atomic_flag begun; 225 atomic_int done; 226 }; 227 #define ONCE_INIT {ATOMIC_FLAG_INIT, 0} 228 229 /* 230 Run the provided init() function exactly once, even if multiple threads 231 invoke once() at the same time. The state must be a once_t initialized with 232 ONCE_INIT. 233 */ 234 local void once(once_t *state, void (*init)(void)) { 235 if (!atomic_load(&state->done)) { 236 if (atomic_flag_test_and_set(&state->begun)) 237 while (!atomic_load(&state->done)) 238 ; 239 else { 240 init(); 241 atomic_store(&state->done, 1); 242 } 243 } 244 } 245 246 #else /* no atomics */ 247 248 /* Structure for once(), which must be initialized with ONCE_INIT. */ 249 struct once_s { 250 volatile int begun; 251 volatile int done; 252 }; 253 #define ONCE_INIT {0, 0} 254 255 /* Test and set. Alas, not atomic, but tries to minimize the period of 256 vulnerability. */ 257 local int test_and_set(int volatile *flag) { 258 int was; 259 260 was = *flag; 261 *flag = 1; 262 return was; 263 } 264 265 /* Run the provided init() function once. This is not thread-safe. */ 266 local void once(once_t *state, void (*init)(void)) { 267 if (!state->done) { 268 if (test_and_set(&state->begun)) 269 while (!state->done) 270 ; 271 else { 272 init(); 273 state->done = 1; 274 } 275 } 276 } 277 278 #endif 279 280 /* State for once(). */ 281 local once_t made = ONCE_INIT; 282 283 /* 284 Generate tables for a byte-wise 32-bit CRC calculation on the polynomial: 285 x^32+x^26+x^23+x^22+x^16+x^12+x^11+x^10+x^8+x^7+x^5+x^4+x^2+x+1. 286 287 Polynomials over GF(2) are represented in binary, one bit per coefficient, 288 with the lowest powers in the most significant bit. Then adding polynomials 289 is just exclusive-or, and multiplying a polynomial by x is a right shift by 290 one. If we call the above polynomial p, and represent a byte as the 291 polynomial q, also with the lowest power in the most significant bit (so the 292 byte 0xb1 is the polynomial x^7+x^3+x^2+1), then the CRC is (q*x^32) mod p, 293 where a mod b means the remainder after dividing a by b. 294 295 This calculation is done using the shift-register method of multiplying and 296 taking the remainder. The register is initialized to zero, and for each 297 incoming bit, x^32 is added mod p to the register if the bit is a one (where 298 x^32 mod p is p+x^32 = x^26+...+1), and the register is multiplied mod p by x 299 (which is shifting right by one and adding x^32 mod p if the bit shifted out 300 is a one). We start with the highest power (least significant bit) of q and 301 repeat for all eight bits of q. 302 303 The table is simply the CRC of all possible eight bit values. This is all the 304 information needed to generate CRCs on data a byte at a time for all 305 combinations of CRC register values and incoming bytes. 306 */ 307 308 local void make_crc_table(void) { 309 unsigned i, j, n; 310 z_crc_t p; 311 312 /* initialize the CRC of bytes tables */ 313 for (i = 0; i < 256; i++) { 314 p = i; 315 for (j = 0; j < 8; j++) 316 p = p & 1 ? (p >> 1) ^ POLY : p >> 1; 317 crc_table[i] = p; 318 #ifdef W 319 crc_big_table[i] = byte_swap(p); 320 #endif 321 } 322 323 /* initialize the x^2^n mod p(x) table */ 324 p = (z_crc_t)1 << 30; /* x^1 */ 325 x2n_table[0] = p; 326 for (n = 1; n < 32; n++) 327 x2n_table[n] = p = multmodp(p, p); 328 329 #ifdef W 330 /* initialize the braiding tables -- needs x2n_table[] */ 331 braid(crc_braid_table, crc_braid_big_table, N, W); 332 #endif 333 334 #ifdef MAKECRCH 335 { 336 /* 337 The crc32.h header file contains tables for both 32-bit and 64-bit 338 z_word_t's, and so requires a 64-bit type be available. In that case, 339 z_word_t must be defined to be 64-bits. This code then also generates 340 and writes out the tables for the case that z_word_t is 32 bits. 341 */ 342 #if !defined(W) || W != 8 343 # error Need a 64-bit integer type in order to generate crc32.h. 344 #endif 345 FILE *out; 346 int k, n; 347 z_crc_t ltl[8][256]; 348 z_word_t big[8][256]; 349 350 out = fopen("crc32.h", "w"); 351 if (out == NULL) return; 352 353 /* write out little-endian CRC table to crc32.h */ 354 fprintf(out, 355 "/* crc32.h -- tables for rapid CRC calculation\n" 356 " * Generated automatically by crc32.c\n */\n" 357 "\n" 358 "local const z_crc_t FAR crc_table[] = {\n" 359 " "); 360 write_table(out, crc_table, 256); 361 fprintf(out, 362 "};\n"); 363 364 /* write out big-endian CRC table for 64-bit z_word_t to crc32.h */ 365 fprintf(out, 366 "\n" 367 "#ifdef W\n" 368 "\n" 369 "#if W == 8\n" 370 "\n" 371 "local const z_word_t FAR crc_big_table[] = {\n" 372 " "); 373 write_table64(out, crc_big_table, 256); 374 fprintf(out, 375 "};\n"); 376 377 /* write out big-endian CRC table for 32-bit z_word_t to crc32.h */ 378 fprintf(out, 379 "\n" 380 "#else /* W == 4 */\n" 381 "\n" 382 "local const z_word_t FAR crc_big_table[] = {\n" 383 " "); 384 write_table32hi(out, crc_big_table, 256); 385 fprintf(out, 386 "};\n" 387 "\n" 388 "#endif\n"); 389 390 /* write out braid tables for each value of N */ 391 for (n = 1; n <= 6; n++) { 392 fprintf(out, 393 "\n" 394 "#if N == %d\n", n); 395 396 /* compute braid tables for this N and 64-bit word_t */ 397 braid(ltl, big, n, 8); 398 399 /* write out braid tables for 64-bit z_word_t to crc32.h */ 400 fprintf(out, 401 "\n" 402 "#if W == 8\n" 403 "\n" 404 "local const z_crc_t FAR crc_braid_table[][256] = {\n"); 405 for (k = 0; k < 8; k++) { 406 fprintf(out, " {"); 407 write_table(out, ltl[k], 256); 408 fprintf(out, "}%s", k < 7 ? ",\n" : ""); 409 } 410 fprintf(out, 411 "};\n" 412 "\n" 413 "local const z_word_t FAR crc_braid_big_table[][256] = {\n"); 414 for (k = 0; k < 8; k++) { 415 fprintf(out, " {"); 416 write_table64(out, big[k], 256); 417 fprintf(out, "}%s", k < 7 ? ",\n" : ""); 418 } 419 fprintf(out, 420 "};\n"); 421 422 /* compute braid tables for this N and 32-bit word_t */ 423 braid(ltl, big, n, 4); 424 425 /* write out braid tables for 32-bit z_word_t to crc32.h */ 426 fprintf(out, 427 "\n" 428 "#else /* W == 4 */\n" 429 "\n" 430 "local const z_crc_t FAR crc_braid_table[][256] = {\n"); 431 for (k = 0; k < 4; k++) { 432 fprintf(out, " {"); 433 write_table(out, ltl[k], 256); 434 fprintf(out, "}%s", k < 3 ? ",\n" : ""); 435 } 436 fprintf(out, 437 "};\n" 438 "\n" 439 "local const z_word_t FAR crc_braid_big_table[][256] = {\n"); 440 for (k = 0; k < 4; k++) { 441 fprintf(out, " {"); 442 write_table32hi(out, big[k], 256); 443 fprintf(out, "}%s", k < 3 ? ",\n" : ""); 444 } 445 fprintf(out, 446 "};\n" 447 "\n" 448 "#endif\n" 449 "\n" 450 "#endif\n"); 451 } 452 fprintf(out, 453 "\n" 454 "#endif\n"); 455 456 /* write out zeros operator table to crc32.h */ 457 fprintf(out, 458 "\n" 459 "local const z_crc_t FAR x2n_table[] = {\n" 460 " "); 461 write_table(out, x2n_table, 32); 462 fprintf(out, 463 "};\n"); 464 fclose(out); 465 } 466 #endif /* MAKECRCH */ 467 } 468 469 #ifdef MAKECRCH 470 471 /* 472 Write the 32-bit values in table[0..k-1] to out, five per line in 473 hexadecimal separated by commas. 474 */ 475 local void write_table(FILE *out, const z_crc_t FAR *table, int k) { 476 int n; 477 478 for (n = 0; n < k; n++) 479 fprintf(out, "%s0x%08lx%s", n == 0 || n % 5 ? "" : " ", 480 (unsigned long)(table[n]), 481 n == k - 1 ? "" : (n % 5 == 4 ? ",\n" : ", ")); 482 } 483 484 /* 485 Write the high 32-bits of each value in table[0..k-1] to out, five per line 486 in hexadecimal separated by commas. 487 */ 488 local void write_table32hi(FILE *out, const z_word_t FAR *table, int k) { 489 int n; 490 491 for (n = 0; n < k; n++) 492 fprintf(out, "%s0x%08lx%s", n == 0 || n % 5 ? "" : " ", 493 (unsigned long)(table[n] >> 32), 494 n == k - 1 ? "" : (n % 5 == 4 ? ",\n" : ", ")); 495 } 496 497 /* 498 Write the 64-bit values in table[0..k-1] to out, three per line in 499 hexadecimal separated by commas. This assumes that if there is a 64-bit 500 type, then there is also a long long integer type, and it is at least 64 501 bits. If not, then the type cast and format string can be adjusted 502 accordingly. 503 */ 504 local void write_table64(FILE *out, const z_word_t FAR *table, int k) { 505 int n; 506 507 for (n = 0; n < k; n++) 508 fprintf(out, "%s0x%016llx%s", n == 0 || n % 3 ? "" : " ", 509 (unsigned long long)(table[n]), 510 n == k - 1 ? "" : (n % 3 == 2 ? ",\n" : ", ")); 511 } 512 513 /* Actually do the deed. */ 514 int main(void) { 515 make_crc_table(); 516 return 0; 517 } 518 519 #endif /* MAKECRCH */ 520 521 #ifdef W 522 /* 523 Generate the little and big-endian braid tables for the given n and z_word_t 524 size w. Each array must have room for w blocks of 256 elements. 525 */ 526 local void braid(z_crc_t ltl[][256], z_word_t big[][256], int n, int w) { 527 int k; 528 z_crc_t i, p, q; 529 for (k = 0; k < w; k++) { 530 p = x2nmodp((n * w + 3 - k) << 3, 0); 531 ltl[k][0] = 0; 532 big[w - 1 - k][0] = 0; 533 for (i = 1; i < 256; i++) { 534 ltl[k][i] = q = multmodp(i << 24, p); 535 big[w - 1 - k][i] = byte_swap(q); 536 } 537 } 538 } 539 #endif 540 541 #endif /* DYNAMIC_CRC_TABLE */ 542 543 /* ========================================================================= 544 * This function can be used by asm versions of crc32(), and to force the 545 * generation of the CRC tables in a threaded application. 546 */ 547 const z_crc_t FAR * ZEXPORT get_crc_table(void) { 548 #ifdef DYNAMIC_CRC_TABLE 549 once(&made, make_crc_table); 550 #endif /* DYNAMIC_CRC_TABLE */ 551 return (const z_crc_t FAR *)crc_table; 552 } 553 554 /* ========================================================================= 555 * Use ARM machine instructions if available. This will compute the CRC about 556 * ten times faster than the braided calculation. This code does not check for 557 * the presence of the CRC instruction at run time. __ARM_FEATURE_CRC32 will 558 * only be defined if the compilation specifies an ARM processor architecture 559 * that has the instructions. For example, compiling with -march=armv8.1-a or 560 * -march=armv8-a+crc, or -march=native if the compile machine has the crc32 561 * instructions. 562 */ 563 #ifdef ARMCRC32 564 565 /* 566 Constants empirically determined to maximize speed. These values are from 567 measurements on a Cortex-A57. Your mileage may vary. 568 */ 569 #define Z_BATCH 3990 /* number of words in a batch */ 570 #define Z_BATCH_ZEROS 0xa10d3d0c /* computed from Z_BATCH = 3990 */ 571 #define Z_BATCH_MIN 800 /* fewest words in a final batch */ 572 573 unsigned long ZEXPORT crc32_z(unsigned long crc, const unsigned char FAR *buf, 574 z_size_t len) { 575 z_crc_t val; 576 z_word_t crc1, crc2; 577 const z_word_t *word; 578 z_word_t val0, val1, val2; 579 z_size_t last, last2, i; 580 z_size_t num; 581 582 /* Return initial CRC, if requested. */ 583 if (buf == Z_NULL) return 0; 584 585 #ifdef DYNAMIC_CRC_TABLE 586 once(&made, make_crc_table); 587 #endif /* DYNAMIC_CRC_TABLE */ 588 589 /* Pre-condition the CRC */ 590 crc = (~crc) & 0xffffffff; 591 592 /* Compute the CRC up to a word boundary. */ 593 while (len && ((z_size_t)buf & 7) != 0) { 594 len--; 595 val = *buf++; 596 __asm__ volatile("crc32b %w0, %w0, %w1" : "+r"(crc) : "r"(val)); 597 } 598 599 /* Prepare to compute the CRC on full 64-bit words word[0..num-1]. */ 600 word = (z_word_t const *)buf; 601 num = len >> 3; 602 len &= 7; 603 604 /* Do three interleaved CRCs to realize the throughput of one crc32x 605 instruction per cycle. Each CRC is calculated on Z_BATCH words. The 606 three CRCs are combined into a single CRC after each set of batches. */ 607 while (num >= 3 * Z_BATCH) { 608 crc1 = 0; 609 crc2 = 0; 610 for (i = 0; i < Z_BATCH; i++) { 611 val0 = word[i]; 612 val1 = word[i + Z_BATCH]; 613 val2 = word[i + 2 * Z_BATCH]; 614 __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc) : "r"(val0)); 615 __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc1) : "r"(val1)); 616 __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc2) : "r"(val2)); 617 } 618 word += 3 * Z_BATCH; 619 num -= 3 * Z_BATCH; 620 crc = multmodp(Z_BATCH_ZEROS, crc) ^ crc1; 621 crc = multmodp(Z_BATCH_ZEROS, crc) ^ crc2; 622 } 623 624 /* Do one last smaller batch with the remaining words, if there are enough 625 to pay for the combination of CRCs. */ 626 last = num / 3; 627 if (last >= Z_BATCH_MIN) { 628 last2 = last << 1; 629 crc1 = 0; 630 crc2 = 0; 631 for (i = 0; i < last; i++) { 632 val0 = word[i]; 633 val1 = word[i + last]; 634 val2 = word[i + last2]; 635 __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc) : "r"(val0)); 636 __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc1) : "r"(val1)); 637 __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc2) : "r"(val2)); 638 } 639 word += 3 * last; 640 num -= 3 * last; 641 val = x2nmodp(last, 6); 642 crc = multmodp(val, crc) ^ crc1; 643 crc = multmodp(val, crc) ^ crc2; 644 } 645 646 /* Compute the CRC on any remaining words. */ 647 for (i = 0; i < num; i++) { 648 val0 = word[i]; 649 __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc) : "r"(val0)); 650 } 651 word += num; 652 653 /* Complete the CRC on any remaining bytes. */ 654 buf = (const unsigned char FAR *)word; 655 while (len) { 656 len--; 657 val = *buf++; 658 __asm__ volatile("crc32b %w0, %w0, %w1" : "+r"(crc) : "r"(val)); 659 } 660 661 /* Return the CRC, post-conditioned. */ 662 return crc ^ 0xffffffff; 663 } 664 665 #else 666 667 #ifdef W 668 669 /* 670 Return the CRC of the W bytes in the word_t data, taking the 671 least-significant byte of the word as the first byte of data, without any pre 672 or post conditioning. This is used to combine the CRCs of each braid. 673 */ 674 local z_crc_t crc_word(z_word_t data) { 675 int k; 676 for (k = 0; k < W; k++) 677 data = (data >> 8) ^ crc_table[data & 0xff]; 678 return (z_crc_t)data; 679 } 680 681 local z_word_t crc_word_big(z_word_t data) { 682 int k; 683 for (k = 0; k < W; k++) 684 data = (data << 8) ^ 685 crc_big_table[(data >> ((W - 1) << 3)) & 0xff]; 686 return data; 687 } 688 689 #endif 690 691 /* ========================================================================= */ 692 unsigned long ZEXPORT crc32_z(unsigned long crc, const unsigned char FAR *buf, 693 z_size_t len) { 694 /* Return initial CRC, if requested. */ 695 if (buf == Z_NULL) return 0; 696 697 #ifdef DYNAMIC_CRC_TABLE 698 once(&made, make_crc_table); 699 #endif /* DYNAMIC_CRC_TABLE */ 700 701 /* Pre-condition the CRC */ 702 crc = (~crc) & 0xffffffff; 703 704 #ifdef W 705 706 /* If provided enough bytes, do a braided CRC calculation. */ 707 if (len >= N * W + W - 1) { 708 z_size_t blks; 709 z_word_t const *words; 710 unsigned endian; 711 int k; 712 713 /* Compute the CRC up to a z_word_t boundary. */ 714 while (len && ((z_size_t)buf & (W - 1)) != 0) { 715 len--; 716 crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff]; 717 } 718 719 /* Compute the CRC on as many N z_word_t blocks as are available. */ 720 blks = len / (N * W); 721 len -= blks * N * W; 722 words = (z_word_t const *)buf; 723 724 /* Do endian check at execution time instead of compile time, since ARM 725 processors can change the endianness at execution time. If the 726 compiler knows what the endianness will be, it can optimize out the 727 check and the unused branch. */ 728 endian = 1; 729 if (*(unsigned char *)&endian) { 730 /* Little endian. */ 731 732 z_crc_t crc0; 733 z_word_t word0; 734 #if N > 1 735 z_crc_t crc1; 736 z_word_t word1; 737 #if N > 2 738 z_crc_t crc2; 739 z_word_t word2; 740 #if N > 3 741 z_crc_t crc3; 742 z_word_t word3; 743 #if N > 4 744 z_crc_t crc4; 745 z_word_t word4; 746 #if N > 5 747 z_crc_t crc5; 748 z_word_t word5; 749 #endif 750 #endif 751 #endif 752 #endif 753 #endif 754 755 /* Initialize the CRC for each braid. */ 756 crc0 = crc; 757 #if N > 1 758 crc1 = 0; 759 #if N > 2 760 crc2 = 0; 761 #if N > 3 762 crc3 = 0; 763 #if N > 4 764 crc4 = 0; 765 #if N > 5 766 crc5 = 0; 767 #endif 768 #endif 769 #endif 770 #endif 771 #endif 772 773 /* 774 Process the first blks-1 blocks, computing the CRCs on each braid 775 independently. 776 */ 777 while (--blks) { 778 /* Load the word for each braid into registers. */ 779 word0 = crc0 ^ words[0]; 780 #if N > 1 781 word1 = crc1 ^ words[1]; 782 #if N > 2 783 word2 = crc2 ^ words[2]; 784 #if N > 3 785 word3 = crc3 ^ words[3]; 786 #if N > 4 787 word4 = crc4 ^ words[4]; 788 #if N > 5 789 word5 = crc5 ^ words[5]; 790 #endif 791 #endif 792 #endif 793 #endif 794 #endif 795 words += N; 796 797 /* Compute and update the CRC for each word. The loop should 798 get unrolled. */ 799 crc0 = crc_braid_table[0][word0 & 0xff]; 800 #if N > 1 801 crc1 = crc_braid_table[0][word1 & 0xff]; 802 #if N > 2 803 crc2 = crc_braid_table[0][word2 & 0xff]; 804 #if N > 3 805 crc3 = crc_braid_table[0][word3 & 0xff]; 806 #if N > 4 807 crc4 = crc_braid_table[0][word4 & 0xff]; 808 #if N > 5 809 crc5 = crc_braid_table[0][word5 & 0xff]; 810 #endif 811 #endif 812 #endif 813 #endif 814 #endif 815 for (k = 1; k < W; k++) { 816 crc0 ^= crc_braid_table[k][(word0 >> (k << 3)) & 0xff]; 817 #if N > 1 818 crc1 ^= crc_braid_table[k][(word1 >> (k << 3)) & 0xff]; 819 #if N > 2 820 crc2 ^= crc_braid_table[k][(word2 >> (k << 3)) & 0xff]; 821 #if N > 3 822 crc3 ^= crc_braid_table[k][(word3 >> (k << 3)) & 0xff]; 823 #if N > 4 824 crc4 ^= crc_braid_table[k][(word4 >> (k << 3)) & 0xff]; 825 #if N > 5 826 crc5 ^= crc_braid_table[k][(word5 >> (k << 3)) & 0xff]; 827 #endif 828 #endif 829 #endif 830 #endif 831 #endif 832 } 833 } 834 835 /* 836 Process the last block, combining the CRCs of the N braids at the 837 same time. 838 */ 839 crc = crc_word(crc0 ^ words[0]); 840 #if N > 1 841 crc = crc_word(crc1 ^ words[1] ^ crc); 842 #if N > 2 843 crc = crc_word(crc2 ^ words[2] ^ crc); 844 #if N > 3 845 crc = crc_word(crc3 ^ words[3] ^ crc); 846 #if N > 4 847 crc = crc_word(crc4 ^ words[4] ^ crc); 848 #if N > 5 849 crc = crc_word(crc5 ^ words[5] ^ crc); 850 #endif 851 #endif 852 #endif 853 #endif 854 #endif 855 words += N; 856 } 857 else { 858 /* Big endian. */ 859 860 z_word_t crc0, word0, comb; 861 #if N > 1 862 z_word_t crc1, word1; 863 #if N > 2 864 z_word_t crc2, word2; 865 #if N > 3 866 z_word_t crc3, word3; 867 #if N > 4 868 z_word_t crc4, word4; 869 #if N > 5 870 z_word_t crc5, word5; 871 #endif 872 #endif 873 #endif 874 #endif 875 #endif 876 877 /* Initialize the CRC for each braid. */ 878 crc0 = byte_swap(crc); 879 #if N > 1 880 crc1 = 0; 881 #if N > 2 882 crc2 = 0; 883 #if N > 3 884 crc3 = 0; 885 #if N > 4 886 crc4 = 0; 887 #if N > 5 888 crc5 = 0; 889 #endif 890 #endif 891 #endif 892 #endif 893 #endif 894 895 /* 896 Process the first blks-1 blocks, computing the CRCs on each braid 897 independently. 898 */ 899 while (--blks) { 900 /* Load the word for each braid into registers. */ 901 word0 = crc0 ^ words[0]; 902 #if N > 1 903 word1 = crc1 ^ words[1]; 904 #if N > 2 905 word2 = crc2 ^ words[2]; 906 #if N > 3 907 word3 = crc3 ^ words[3]; 908 #if N > 4 909 word4 = crc4 ^ words[4]; 910 #if N > 5 911 word5 = crc5 ^ words[5]; 912 #endif 913 #endif 914 #endif 915 #endif 916 #endif 917 words += N; 918 919 /* Compute and update the CRC for each word. The loop should 920 get unrolled. */ 921 crc0 = crc_braid_big_table[0][word0 & 0xff]; 922 #if N > 1 923 crc1 = crc_braid_big_table[0][word1 & 0xff]; 924 #if N > 2 925 crc2 = crc_braid_big_table[0][word2 & 0xff]; 926 #if N > 3 927 crc3 = crc_braid_big_table[0][word3 & 0xff]; 928 #if N > 4 929 crc4 = crc_braid_big_table[0][word4 & 0xff]; 930 #if N > 5 931 crc5 = crc_braid_big_table[0][word5 & 0xff]; 932 #endif 933 #endif 934 #endif 935 #endif 936 #endif 937 for (k = 1; k < W; k++) { 938 crc0 ^= crc_braid_big_table[k][(word0 >> (k << 3)) & 0xff]; 939 #if N > 1 940 crc1 ^= crc_braid_big_table[k][(word1 >> (k << 3)) & 0xff]; 941 #if N > 2 942 crc2 ^= crc_braid_big_table[k][(word2 >> (k << 3)) & 0xff]; 943 #if N > 3 944 crc3 ^= crc_braid_big_table[k][(word3 >> (k << 3)) & 0xff]; 945 #if N > 4 946 crc4 ^= crc_braid_big_table[k][(word4 >> (k << 3)) & 0xff]; 947 #if N > 5 948 crc5 ^= crc_braid_big_table[k][(word5 >> (k << 3)) & 0xff]; 949 #endif 950 #endif 951 #endif 952 #endif 953 #endif 954 } 955 } 956 957 /* 958 Process the last block, combining the CRCs of the N braids at the 959 same time. 960 */ 961 comb = crc_word_big(crc0 ^ words[0]); 962 #if N > 1 963 comb = crc_word_big(crc1 ^ words[1] ^ comb); 964 #if N > 2 965 comb = crc_word_big(crc2 ^ words[2] ^ comb); 966 #if N > 3 967 comb = crc_word_big(crc3 ^ words[3] ^ comb); 968 #if N > 4 969 comb = crc_word_big(crc4 ^ words[4] ^ comb); 970 #if N > 5 971 comb = crc_word_big(crc5 ^ words[5] ^ comb); 972 #endif 973 #endif 974 #endif 975 #endif 976 #endif 977 words += N; 978 crc = byte_swap(comb); 979 } 980 981 /* 982 Update the pointer to the remaining bytes to process. 983 */ 984 buf = (unsigned char const *)words; 985 } 986 987 #endif /* W */ 988 989 /* Complete the computation of the CRC on any remaining bytes. */ 990 while (len >= 8) { 991 len -= 8; 992 crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff]; 993 crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff]; 994 crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff]; 995 crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff]; 996 crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff]; 997 crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff]; 998 crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff]; 999 crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff]; 1000 } 1001 while (len) { 1002 len--; 1003 crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff]; 1004 } 1005 1006 /* Return the CRC, post-conditioned. */ 1007 return crc ^ 0xffffffff; 1008 } 1009 1010 #endif 1011 1012 /* ========================================================================= */ 1013 unsigned long ZEXPORT crc32(unsigned long crc, const unsigned char FAR *buf, 1014 uInt len) { 1015 return crc32_z(crc, buf, len); 1016 } 1017 1018 /* ========================================================================= */ 1019 uLong ZEXPORT crc32_combine64(uLong crc1, uLong crc2, z_off64_t len2) { 1020 #ifdef DYNAMIC_CRC_TABLE 1021 once(&made, make_crc_table); 1022 #endif /* DYNAMIC_CRC_TABLE */ 1023 return multmodp(x2nmodp(len2, 3), crc1) ^ (crc2 & 0xffffffff); 1024 } 1025 1026 /* ========================================================================= */ 1027 uLong ZEXPORT crc32_combine(uLong crc1, uLong crc2, z_off_t len2) { 1028 return crc32_combine64(crc1, crc2, (z_off64_t)len2); 1029 } 1030 1031 /* ========================================================================= */ 1032 uLong ZEXPORT crc32_combine_gen64(z_off64_t len2) { 1033 #ifdef DYNAMIC_CRC_TABLE 1034 once(&made, make_crc_table); 1035 #endif /* DYNAMIC_CRC_TABLE */ 1036 return x2nmodp(len2, 3); 1037 } 1038 1039 /* ========================================================================= */ 1040 uLong ZEXPORT crc32_combine_gen(z_off_t len2) { 1041 return crc32_combine_gen64((z_off64_t)len2); 1042 } 1043 1044 /* ========================================================================= */ 1045 uLong ZEXPORT crc32_combine_op(uLong crc1, uLong crc2, uLong op) { 1046 return multmodp(op, crc1) ^ (crc2 & 0xffffffff); 1047 } 1048