1;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 2; Copyright(c) 2011-2015 Intel Corporation All rights reserved. 3; 4; Redistribution and use in source and binary forms, with or without 5; modification, are permitted provided that the following conditions 6; are met: 7; * Redistributions of source code must retain the above copyright 8; notice, this list of conditions and the following disclaimer. 9; * Redistributions in binary form must reproduce the above copyright 10; notice, this list of conditions and the following disclaimer in 11; the documentation and/or other materials provided with the 12; distribution. 13; * Neither the name of Intel Corporation nor the names of its 14; contributors may be used to endorse or promote products derived 15; from this software without specific prior written permission. 16; 17; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 20; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 21; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 22; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 23; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 29 30; Function API: 31; UINT16 crc16_t10dif_01( 32; UINT16 init_crc, //initial CRC value, 16 bits 33; const unsigned char *buf, //buffer pointer to calculate CRC on 34; UINT64 len //buffer length in bytes (64-bit data) 35; ); 36; 37; Authors: 38; Erdinc Ozturk 39; Vinodh Gopal 40; James Guilford 41; 42; Reference paper titled "Fast CRC Computation for Generic Polynomials Using PCLMULQDQ Instruction" 43; URL: http://www.intel.com/content/dam/www/public/us/en/documents/white-papers/fast-crc-computation-generic-polynomials-pclmulqdq-paper.pdf 44 45%include "reg_sizes.asm" 46 47%define fetch_dist 1024 48 49[bits 64] 50default rel 51 52section .text 53 54%ifidn __OUTPUT_FORMAT__, win64 55 %xdefine arg1 rcx 56 %xdefine arg2 rdx 57 %xdefine arg3 r8 58 59 %xdefine arg1_low32 ecx 60%else 61 %xdefine arg1 rdi 62 %xdefine arg2 rsi 63 %xdefine arg3 rdx 64 65 %xdefine arg1_low32 edi 66%endif 67 68%ifidn __OUTPUT_FORMAT__, win64 69 %define XMM_SAVE 16*2 70 %define VARIABLE_OFFSET 16*10+8 71%else 72 %define VARIABLE_OFFSET 16*2+8 73%endif 74 75align 16 76global crc16_t10dif_01:function 77crc16_t10dif_01: 78 79 ; adjust the 16-bit initial_crc value, scale it to 32 bits 80 shl arg1_low32, 16 81 82 ; After this point, code flow is exactly same as a 32-bit CRC. 83 ; The only difference is before returning eax, we will shift it right 16 bits, to scale back to 16 bits. 84 85 sub rsp, VARIABLE_OFFSET 86%ifidn __OUTPUT_FORMAT__, win64 87 ; push the xmm registers into the stack to maintain 88 movdqa [rsp+16*2],xmm6 89 movdqa [rsp+16*3],xmm7 90 movdqa [rsp+16*4],xmm8 91 movdqa [rsp+16*5],xmm9 92 movdqa [rsp+16*6],xmm10 93 movdqa [rsp+16*7],xmm11 94 movdqa [rsp+16*8],xmm12 95 movdqa [rsp+16*9],xmm13 96%endif 97 98 ; check if smaller than 256 99 cmp arg3, 256 100 101 ; for sizes less than 256, we can't fold 128B at a time... 102 jl _less_than_256 103 104 105 ; load the initial crc value 106 movd xmm10, arg1_low32 ; initial crc 107 108 ; crc value does not need to be byte-reflected, but it needs to be moved to the high part of the register. 109 ; because data will be byte-reflected and will align with initial crc at correct place. 110 pslldq xmm10, 12 111 112 movdqa xmm11, [SHUF_MASK] 113 ; receive the initial 128B data, xor the initial crc value 114 movdqu xmm0, [arg2+16*0] 115 movdqu xmm1, [arg2+16*1] 116 movdqu xmm2, [arg2+16*2] 117 movdqu xmm3, [arg2+16*3] 118 movdqu xmm4, [arg2+16*4] 119 movdqu xmm5, [arg2+16*5] 120 movdqu xmm6, [arg2+16*6] 121 movdqu xmm7, [arg2+16*7] 122 123 pshufb xmm0, xmm11 124 ; XOR the initial_crc value 125 pxor xmm0, xmm10 126 pshufb xmm1, xmm11 127 pshufb xmm2, xmm11 128 pshufb xmm3, xmm11 129 pshufb xmm4, xmm11 130 pshufb xmm5, xmm11 131 pshufb xmm6, xmm11 132 pshufb xmm7, xmm11 133 134 movdqa xmm10, [rk3] ;xmm10 has rk3 and rk4 135 ;imm value of pclmulqdq instruction will determine which constant to use 136 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 137 ; we subtract 256 instead of 128 to save one instruction from the loop 138 sub arg3, 256 139 140 ; at this section of the code, there is 128*x+y (0<=y<128) bytes of buffer. The _fold_128_B_loop 141 ; loop will fold 128B at a time until we have 128+y Bytes of buffer 142 143 144 ; fold 128B at a time. This section of the code folds 8 xmm registers in parallel 145_fold_128_B_loop: 146 147 ; update the buffer pointer 148 add arg2, 128 ; buf += 128; 149 150 prefetchnta [arg2+fetch_dist+0] 151 movdqu xmm9, [arg2+16*0] 152 movdqu xmm12, [arg2+16*1] 153 pshufb xmm9, xmm11 154 pshufb xmm12, xmm11 155 movdqa xmm8, xmm0 156 movdqa xmm13, xmm1 157 pclmulqdq xmm0, xmm10, 0x0 158 pclmulqdq xmm8, xmm10 , 0x11 159 pclmulqdq xmm1, xmm10, 0x0 160 pclmulqdq xmm13, xmm10 , 0x11 161 pxor xmm0, xmm9 162 xorps xmm0, xmm8 163 pxor xmm1, xmm12 164 xorps xmm1, xmm13 165 166 prefetchnta [arg2+fetch_dist+32] 167 movdqu xmm9, [arg2+16*2] 168 movdqu xmm12, [arg2+16*3] 169 pshufb xmm9, xmm11 170 pshufb xmm12, xmm11 171 movdqa xmm8, xmm2 172 movdqa xmm13, xmm3 173 pclmulqdq xmm2, xmm10, 0x0 174 pclmulqdq xmm8, xmm10 , 0x11 175 pclmulqdq xmm3, xmm10, 0x0 176 pclmulqdq xmm13, xmm10 , 0x11 177 pxor xmm2, xmm9 178 xorps xmm2, xmm8 179 pxor xmm3, xmm12 180 xorps xmm3, xmm13 181 182 prefetchnta [arg2+fetch_dist+64] 183 movdqu xmm9, [arg2+16*4] 184 movdqu xmm12, [arg2+16*5] 185 pshufb xmm9, xmm11 186 pshufb xmm12, xmm11 187 movdqa xmm8, xmm4 188 movdqa xmm13, xmm5 189 pclmulqdq xmm4, xmm10, 0x0 190 pclmulqdq xmm8, xmm10 , 0x11 191 pclmulqdq xmm5, xmm10, 0x0 192 pclmulqdq xmm13, xmm10 , 0x11 193 pxor xmm4, xmm9 194 xorps xmm4, xmm8 195 pxor xmm5, xmm12 196 xorps xmm5, xmm13 197 198 prefetchnta [arg2+fetch_dist+96] 199 movdqu xmm9, [arg2+16*6] 200 movdqu xmm12, [arg2+16*7] 201 pshufb xmm9, xmm11 202 pshufb xmm12, xmm11 203 movdqa xmm8, xmm6 204 movdqa xmm13, xmm7 205 pclmulqdq xmm6, xmm10, 0x0 206 pclmulqdq xmm8, xmm10 , 0x11 207 pclmulqdq xmm7, xmm10, 0x0 208 pclmulqdq xmm13, xmm10 , 0x11 209 pxor xmm6, xmm9 210 xorps xmm6, xmm8 211 pxor xmm7, xmm12 212 xorps xmm7, xmm13 213 214 sub arg3, 128 215 216 ; check if there is another 128B in the buffer to be able to fold 217 jge _fold_128_B_loop 218 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 219 220 221 add arg2, 128 222 ; at this point, the buffer pointer is pointing at the last y Bytes of the buffer 223 ; fold the 8 xmm registers to 1 xmm register with different constants 224 225 movdqa xmm10, [rk9] 226 movdqa xmm8, xmm0 227 pclmulqdq xmm0, xmm10, 0x11 228 pclmulqdq xmm8, xmm10, 0x0 229 pxor xmm7, xmm8 230 xorps xmm7, xmm0 231 232 movdqa xmm10, [rk11] 233 movdqa xmm8, xmm1 234 pclmulqdq xmm1, xmm10, 0x11 235 pclmulqdq xmm8, xmm10, 0x0 236 pxor xmm7, xmm8 237 xorps xmm7, xmm1 238 239 movdqa xmm10, [rk13] 240 movdqa xmm8, xmm2 241 pclmulqdq xmm2, xmm10, 0x11 242 pclmulqdq xmm8, xmm10, 0x0 243 pxor xmm7, xmm8 244 pxor xmm7, xmm2 245 246 movdqa xmm10, [rk15] 247 movdqa xmm8, xmm3 248 pclmulqdq xmm3, xmm10, 0x11 249 pclmulqdq xmm8, xmm10, 0x0 250 pxor xmm7, xmm8 251 xorps xmm7, xmm3 252 253 movdqa xmm10, [rk17] 254 movdqa xmm8, xmm4 255 pclmulqdq xmm4, xmm10, 0x11 256 pclmulqdq xmm8, xmm10, 0x0 257 pxor xmm7, xmm8 258 pxor xmm7, xmm4 259 260 movdqa xmm10, [rk19] 261 movdqa xmm8, xmm5 262 pclmulqdq xmm5, xmm10, 0x11 263 pclmulqdq xmm8, xmm10, 0x0 264 pxor xmm7, xmm8 265 xorps xmm7, xmm5 266 267 movdqa xmm10, [rk1] ;xmm10 has rk1 and rk2 268 ;imm value of pclmulqdq instruction will determine which constant to use 269 movdqa xmm8, xmm6 270 pclmulqdq xmm6, xmm10, 0x11 271 pclmulqdq xmm8, xmm10, 0x0 272 pxor xmm7, xmm8 273 pxor xmm7, xmm6 274 275 276 ; instead of 128, we add 112 to the loop counter to save 1 instruction from the loop 277 ; instead of a cmp instruction, we use the negative flag with the jl instruction 278 add arg3, 128-16 279 jl _final_reduction_for_128 280 281 ; now we have 16+y bytes left to reduce. 16 Bytes is in register xmm7 and the rest is in memory 282 ; we can fold 16 bytes at a time if y>=16 283 ; continue folding 16B at a time 284 285_16B_reduction_loop: 286 movdqa xmm8, xmm7 287 pclmulqdq xmm7, xmm10, 0x11 288 pclmulqdq xmm8, xmm10, 0x0 289 pxor xmm7, xmm8 290 movdqu xmm0, [arg2] 291 pshufb xmm0, xmm11 292 pxor xmm7, xmm0 293 add arg2, 16 294 sub arg3, 16 295 ; instead of a cmp instruction, we utilize the flags with the jge instruction 296 ; equivalent of: cmp arg3, 16-16 297 ; check if there is any more 16B in the buffer to be able to fold 298 jge _16B_reduction_loop 299 300 ;now we have 16+z bytes left to reduce, where 0<= z < 16. 301 ;first, we reduce the data in the xmm7 register 302 303 304_final_reduction_for_128: 305 ; check if any more data to fold. If not, compute the CRC of the final 128 bits 306 add arg3, 16 307 je _128_done 308 309 ; here we are getting data that is less than 16 bytes. 310 ; since we know that there was data before the pointer, we can offset the input pointer before the actual point, to receive exactly 16 bytes. 311 ; after that the registers need to be adjusted. 312_get_last_two_xmms: 313 movdqa xmm2, xmm7 314 315 movdqu xmm1, [arg2 - 16 + arg3] 316 pshufb xmm1, xmm11 317 318 ; get rid of the extra data that was loaded before 319 ; load the shift constant 320 lea rax, [pshufb_shf_table + 16] 321 sub rax, arg3 322 movdqu xmm0, [rax] 323 324 ; shift xmm2 to the left by arg3 bytes 325 pshufb xmm2, xmm0 326 327 ; shift xmm7 to the right by 16-arg3 bytes 328 pxor xmm0, [mask1] 329 pshufb xmm7, xmm0 330 pblendvb xmm1, xmm2 ;xmm0 is implicit 331 332 ; fold 16 Bytes 333 movdqa xmm2, xmm1 334 movdqa xmm8, xmm7 335 pclmulqdq xmm7, xmm10, 0x11 336 pclmulqdq xmm8, xmm10, 0x0 337 pxor xmm7, xmm8 338 pxor xmm7, xmm2 339 340_128_done: 341 ; compute crc of a 128-bit value 342 movdqa xmm10, [rk5] ; rk5 and rk6 in xmm10 343 movdqa xmm0, xmm7 344 345 ;64b fold 346 pclmulqdq xmm7, xmm10, 0x1 347 pslldq xmm0, 8 348 pxor xmm7, xmm0 349 350 ;32b fold 351 movdqa xmm0, xmm7 352 353 pand xmm0, [mask2] 354 355 psrldq xmm7, 12 356 pclmulqdq xmm7, xmm10, 0x10 357 pxor xmm7, xmm0 358 359 ;barrett reduction 360_barrett: 361 movdqa xmm10, [rk7] ; rk7 and rk8 in xmm10 362 movdqa xmm0, xmm7 363 pclmulqdq xmm7, xmm10, 0x01 364 pslldq xmm7, 4 365 pclmulqdq xmm7, xmm10, 0x11 366 367 pslldq xmm7, 4 368 pxor xmm7, xmm0 369 pextrd eax, xmm7,1 370 371_cleanup: 372 ; scale the result back to 16 bits 373 shr eax, 16 374%ifidn __OUTPUT_FORMAT__, win64 375 movdqa xmm6, [rsp+16*2] 376 movdqa xmm7, [rsp+16*3] 377 movdqa xmm8, [rsp+16*4] 378 movdqa xmm9, [rsp+16*5] 379 movdqa xmm10, [rsp+16*6] 380 movdqa xmm11, [rsp+16*7] 381 movdqa xmm12, [rsp+16*8] 382 movdqa xmm13, [rsp+16*9] 383%endif 384 add rsp, VARIABLE_OFFSET 385 ret 386 387 388;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 389;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 390;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 391;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 392 393align 16 394_less_than_256: 395 396 ; check if there is enough buffer to be able to fold 16B at a time 397 cmp arg3, 32 398 jl _less_than_32 399 movdqa xmm11, [SHUF_MASK] 400 401 ; if there is, load the constants 402 movdqa xmm10, [rk1] ; rk1 and rk2 in xmm10 403 404 movd xmm0, arg1_low32 ; get the initial crc value 405 pslldq xmm0, 12 ; align it to its correct place 406 movdqu xmm7, [arg2] ; load the plaintext 407 pshufb xmm7, xmm11 ; byte-reflect the plaintext 408 pxor xmm7, xmm0 409 410 411 ; update the buffer pointer 412 add arg2, 16 413 414 ; update the counter. subtract 32 instead of 16 to save one instruction from the loop 415 sub arg3, 32 416 417 jmp _16B_reduction_loop 418 419 420align 16 421_less_than_32: 422 ; mov initial crc to the return value. this is necessary for zero-length buffers. 423 mov eax, arg1_low32 424 test arg3, arg3 425 je _cleanup 426 427 movdqa xmm11, [SHUF_MASK] 428 429 movd xmm0, arg1_low32 ; get the initial crc value 430 pslldq xmm0, 12 ; align it to its correct place 431 432 cmp arg3, 16 433 je _exact_16_left 434 jl _less_than_16_left 435 436 movdqu xmm7, [arg2] ; load the plaintext 437 pshufb xmm7, xmm11 ; byte-reflect the plaintext 438 pxor xmm7, xmm0 ; xor the initial crc value 439 add arg2, 16 440 sub arg3, 16 441 movdqa xmm10, [rk1] ; rk1 and rk2 in xmm10 442 jmp _get_last_two_xmms 443 444 445align 16 446_less_than_16_left: 447 ; use stack space to load data less than 16 bytes, zero-out the 16B in memory first. 448 449 pxor xmm1, xmm1 450 mov r11, rsp 451 movdqa [r11], xmm1 452 453 cmp arg3, 4 454 jl _only_less_than_4 455 456 ; backup the counter value 457 mov r9, arg3 458 cmp arg3, 8 459 jl _less_than_8_left 460 461 ; load 8 Bytes 462 mov rax, [arg2] 463 mov [r11], rax 464 add r11, 8 465 sub arg3, 8 466 add arg2, 8 467_less_than_8_left: 468 469 cmp arg3, 4 470 jl _less_than_4_left 471 472 ; load 4 Bytes 473 mov eax, [arg2] 474 mov [r11], eax 475 add r11, 4 476 sub arg3, 4 477 add arg2, 4 478_less_than_4_left: 479 480 cmp arg3, 2 481 jl _less_than_2_left 482 483 ; load 2 Bytes 484 mov ax, [arg2] 485 mov [r11], ax 486 add r11, 2 487 sub arg3, 2 488 add arg2, 2 489_less_than_2_left: 490 cmp arg3, 1 491 jl _zero_left 492 493 ; load 1 Byte 494 mov al, [arg2] 495 mov [r11], al 496_zero_left: 497 movdqa xmm7, [rsp] 498 pshufb xmm7, xmm11 499 pxor xmm7, xmm0 ; xor the initial crc value 500 501 lea rax, [pshufb_shf_table + 16] 502 sub rax, r9 503 movdqu xmm0, [rax] 504 pxor xmm0, [mask1] 505 506 pshufb xmm7, xmm0 507 jmp _128_done 508 509align 16 510_exact_16_left: 511 movdqu xmm7, [arg2] 512 pshufb xmm7, xmm11 513 pxor xmm7, xmm0 ; xor the initial crc value 514 515 jmp _128_done 516 517_only_less_than_4: 518 cmp arg3, 3 519 jl _only_less_than_3 520 521 ; load 3 Bytes 522 mov al, [arg2] 523 mov [r11], al 524 525 mov al, [arg2+1] 526 mov [r11+1], al 527 528 mov al, [arg2+2] 529 mov [r11+2], al 530 531 movdqa xmm7, [rsp] 532 pshufb xmm7, xmm11 533 pxor xmm7, xmm0 ; xor the initial crc value 534 535 psrldq xmm7, 5 536 537 jmp _barrett 538_only_less_than_3: 539 cmp arg3, 2 540 jl _only_less_than_2 541 542 ; load 2 Bytes 543 mov al, [arg2] 544 mov [r11], al 545 546 mov al, [arg2+1] 547 mov [r11+1], al 548 549 movdqa xmm7, [rsp] 550 pshufb xmm7, xmm11 551 pxor xmm7, xmm0 ; xor the initial crc value 552 553 psrldq xmm7, 6 554 555 jmp _barrett 556_only_less_than_2: 557 558 ; load 1 Byte 559 mov al, [arg2] 560 mov [r11], al 561 562 movdqa xmm7, [rsp] 563 pshufb xmm7, xmm11 564 pxor xmm7, xmm0 ; xor the initial crc value 565 566 psrldq xmm7, 7 567 568 jmp _barrett 569 570section .data 571 572; precomputed constants 573; these constants are precomputed from the poly: 0x8bb70000 (0x8bb7 scaled to 32 bits) 574align 16 575; Q = 0x18BB70000 576; rk1 = 2^(32*3) mod Q << 32 577; rk2 = 2^(32*5) mod Q << 32 578; rk3 = 2^(32*15) mod Q << 32 579; rk4 = 2^(32*17) mod Q << 32 580; rk5 = 2^(32*3) mod Q << 32 581; rk6 = 2^(32*2) mod Q << 32 582; rk7 = floor(2^64/Q) 583; rk8 = Q 584rk1: 585DQ 0x2d56000000000000 586rk2: 587DQ 0x06df000000000000 588rk3: 589DQ 0x9d9d000000000000 590rk4: 591DQ 0x7cf5000000000000 592rk5: 593DQ 0x2d56000000000000 594rk6: 595DQ 0x1368000000000000 596rk7: 597DQ 0x00000001f65a57f8 598rk8: 599DQ 0x000000018bb70000 600 601rk9: 602DQ 0xceae000000000000 603rk10: 604DQ 0xbfd6000000000000 605rk11: 606DQ 0x1e16000000000000 607rk12: 608DQ 0x713c000000000000 609rk13: 610DQ 0xf7f9000000000000 611rk14: 612DQ 0x80a6000000000000 613rk15: 614DQ 0x044c000000000000 615rk16: 616DQ 0xe658000000000000 617rk17: 618DQ 0xad18000000000000 619rk18: 620DQ 0xa497000000000000 621rk19: 622DQ 0x6ee3000000000000 623rk20: 624DQ 0xe7b5000000000000 625 626 627 628 629 630 631 632 633 634mask1: 635dq 0x8080808080808080, 0x8080808080808080 636mask2: 637dq 0xFFFFFFFFFFFFFFFF, 0x00000000FFFFFFFF 638 639SHUF_MASK: 640dq 0x08090A0B0C0D0E0F, 0x0001020304050607 641 642pshufb_shf_table: 643; use these values for shift constants for the pshufb instruction 644; different alignments result in values as shown: 645; dq 0x8887868584838281, 0x008f8e8d8c8b8a89 ; shl 15 (16-1) / shr1 646; dq 0x8988878685848382, 0x01008f8e8d8c8b8a ; shl 14 (16-3) / shr2 647; dq 0x8a89888786858483, 0x0201008f8e8d8c8b ; shl 13 (16-4) / shr3 648; dq 0x8b8a898887868584, 0x030201008f8e8d8c ; shl 12 (16-4) / shr4 649; dq 0x8c8b8a8988878685, 0x04030201008f8e8d ; shl 11 (16-5) / shr5 650; dq 0x8d8c8b8a89888786, 0x0504030201008f8e ; shl 10 (16-6) / shr6 651; dq 0x8e8d8c8b8a898887, 0x060504030201008f ; shl 9 (16-7) / shr7 652; dq 0x8f8e8d8c8b8a8988, 0x0706050403020100 ; shl 8 (16-8) / shr8 653; dq 0x008f8e8d8c8b8a89, 0x0807060504030201 ; shl 7 (16-9) / shr9 654; dq 0x01008f8e8d8c8b8a, 0x0908070605040302 ; shl 6 (16-10) / shr10 655; dq 0x0201008f8e8d8c8b, 0x0a09080706050403 ; shl 5 (16-11) / shr11 656; dq 0x030201008f8e8d8c, 0x0b0a090807060504 ; shl 4 (16-12) / shr12 657; dq 0x04030201008f8e8d, 0x0c0b0a0908070605 ; shl 3 (16-13) / shr13 658; dq 0x0504030201008f8e, 0x0d0c0b0a09080706 ; shl 2 (16-14) / shr14 659; dq 0x060504030201008f, 0x0e0d0c0b0a090807 ; shl 1 (16-15) / shr15 660dq 0x8786858483828100, 0x8f8e8d8c8b8a8988 661dq 0x0706050403020100, 0x000e0d0c0b0a0908 662 663;;; func core, ver, snum 664slversion crc16_t10dif_01, 01, 06, 0010 665 666