1660f49b0SGreg Tucker;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 2660f49b0SGreg Tucker; Copyright(c) 2011-2016 Intel Corporation All rights reserved. 3660f49b0SGreg Tucker; 4660f49b0SGreg Tucker; Redistribution and use in source and binary forms, with or without 5660f49b0SGreg Tucker; modification, are permitted provided that the following conditions 6660f49b0SGreg Tucker; are met: 7660f49b0SGreg Tucker; * Redistributions of source code must retain the above copyright 8660f49b0SGreg Tucker; notice, this list of conditions and the following disclaimer. 9660f49b0SGreg Tucker; * Redistributions in binary form must reproduce the above copyright 10660f49b0SGreg Tucker; notice, this list of conditions and the following disclaimer in 11660f49b0SGreg Tucker; the documentation and/or other materials provided with the 12660f49b0SGreg Tucker; distribution. 13660f49b0SGreg Tucker; * Neither the name of Intel Corporation nor the names of its 14660f49b0SGreg Tucker; contributors may be used to endorse or promote products derived 15660f49b0SGreg Tucker; from this software without specific prior written permission. 16660f49b0SGreg Tucker; 17660f49b0SGreg Tucker; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18660f49b0SGreg Tucker; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19660f49b0SGreg Tucker; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 20660f49b0SGreg Tucker; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 21660f49b0SGreg Tucker; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 22660f49b0SGreg Tucker; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 23660f49b0SGreg Tucker; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24660f49b0SGreg Tucker; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25660f49b0SGreg Tucker; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26660f49b0SGreg Tucker; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27660f49b0SGreg Tucker; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28660f49b0SGreg Tucker;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 29660f49b0SGreg Tucker 30660f49b0SGreg Tucker%include "options.asm" 31f97de75fSRoy Oursler%include "stdmac.asm" 32f97de75fSRoy Oursler 33660f49b0SGreg Tucker%ifndef UTILS_ASM 34660f49b0SGreg Tucker%define UTILS_ASM 35660f49b0SGreg Tucker; compare macro 36660f49b0SGreg Tucker 37660f49b0SGreg Tucker;; sttni2 is faster, but it can't be debugged 38660f49b0SGreg Tucker;; so following code is based on "mine5" 39660f49b0SGreg Tucker 4073454909SRoy Oursler;; compares 8 bytes at a time, using xor 4173454909SRoy Oursler;; assumes the input buffer has size at least 8 4273454909SRoy Oursler;; compare_r src1, src2, result, result_max, tmp 4373454909SRoy Oursler%macro compare_r 5 44660f49b0SGreg Tucker%define %%src1 %1 45660f49b0SGreg Tucker%define %%src2 %2 46660f49b0SGreg Tucker%define %%result %3 4773454909SRoy Oursler%define %%result_max %4 4873454909SRoy Oursler%define %%tmp %5 4973454909SRoy Oursler%define %%tmp16 %5w ; tmp as a 16-bit register 50660f49b0SGreg Tucker 5173454909SRoy Oursler sub %%result_max, 16 5273454909SRoy Oursler cmp %%result, %%result_max 5373454909SRoy Oursler jg %%_by_8 5473454909SRoy Oursler 55660f49b0SGreg Tucker%%loop1: 56660f49b0SGreg Tucker mov %%tmp, [%%src1 + %%result] 57660f49b0SGreg Tucker xor %%tmp, [%%src2 + %%result] 5873454909SRoy Oursler jnz %%miscompare_reg 59660f49b0SGreg Tucker add %%result, 8 60660f49b0SGreg Tucker 61660f49b0SGreg Tucker mov %%tmp, [%%src1 + %%result] 62660f49b0SGreg Tucker xor %%tmp, [%%src2 + %%result] 6373454909SRoy Oursler jnz %%miscompare_reg 64660f49b0SGreg Tucker add %%result, 8 6573454909SRoy Oursler cmp %%result, %%result_max 6673454909SRoy Oursler jle %%loop1 67660f49b0SGreg Tucker 6873454909SRoy Oursler%%_by_8: 6973454909SRoy Oursler add %%result_max, 8 7073454909SRoy Oursler cmp %%result, %%result_max 7173454909SRoy Oursler jg %%_cmp_last 72660f49b0SGreg Tucker 73660f49b0SGreg Tucker ; compare last two bytes 7473454909SRoy Oursler mov %%tmp, [%%src1 + %%result] 7573454909SRoy Oursler xor %%tmp, [%%src2 + %%result] 7673454909SRoy Oursler jnz %%miscompare_reg 7773454909SRoy Oursler add %%result, 8 78660f49b0SGreg Tucker 7973454909SRoy Oursler%%_cmp_last: 8073454909SRoy Oursler add %%result_max, 8 8173454909SRoy Oursler cmp %%result, %%result_max 8273454909SRoy Oursler je %%end 8373454909SRoy Oursler 8473454909SRoy Oursler lea %%result, [%%result_max - 8] 8573454909SRoy Oursler 8673454909SRoy Oursler mov %%tmp, [%%src1 + %%result] 8773454909SRoy Oursler xor %%tmp, [%%src2 + %%result] 8873454909SRoy Oursler jnz %%miscompare_reg 8973454909SRoy Oursler add %%result, 8 90660f49b0SGreg Tucker jmp %%end 91660f49b0SGreg Tucker 9273454909SRoy Oursler%%miscompare_reg: 93660f49b0SGreg Tucker bsf %%tmp, %%tmp 94660f49b0SGreg Tucker shr %%tmp, 3 95660f49b0SGreg Tucker add %%result, %%tmp 96660f49b0SGreg Tucker%%end: 97660f49b0SGreg Tucker%endm 98660f49b0SGreg Tucker 99660f49b0SGreg Tucker;; compares 16 bytes at a time, using pcmpeqb/pmovmskb 10073454909SRoy Oursler;; assumes the input buffer has size at least 8 10173454909SRoy Oursler;; compare_x src1, src2, result, result_max, tmp, xtmp1, xtmp2 10273454909SRoy Oursler%macro compare_x 7 103660f49b0SGreg Tucker%define %%src1 %1 104660f49b0SGreg Tucker%define %%src2 %2 10573454909SRoy Oursler%define %%result %3 ; Accumulator for match_length 10673454909SRoy Oursler%define %%result_max %4 10773454909SRoy Oursler%define %%tmp %5 10873454909SRoy Oursler%define %%tmp16 %5w ; tmp as a 16-bit register 10973454909SRoy Oursler%define %%tmp32 %5d ; tmp as a 32-bit register 11073454909SRoy Oursler%define %%xtmp %6 11173454909SRoy Oursler%define %%xtmp2 %7 112660f49b0SGreg Tucker 11373454909SRoy Oursler sub %%result_max, 32 11473454909SRoy Oursler cmp %%result, %%result_max 11573454909SRoy Oursler jg %%_by_16 11673454909SRoy Oursler 117660f49b0SGreg Tucker%%loop1: 118f97de75fSRoy Oursler MOVDQU %%xtmp, [%%src1 + %%result] 119f97de75fSRoy Oursler MOVDQU %%xtmp2, [%%src2 + %%result] 120f97de75fSRoy Oursler PCMPEQB %%xtmp, %%xtmp, %%xtmp2 121f97de75fSRoy Oursler PMOVMSKB %%tmp32, %%xtmp 122660f49b0SGreg Tucker xor %%tmp, 0xFFFF 12373454909SRoy Oursler jnz %%miscompare_vect 124660f49b0SGreg Tucker add %%result, 16 125660f49b0SGreg Tucker 126f97de75fSRoy Oursler MOVDQU %%xtmp, [%%src1 + %%result] 127f97de75fSRoy Oursler MOVDQU %%xtmp2, [%%src2 + %%result] 128f97de75fSRoy Oursler PCMPEQB %%xtmp, %%xtmp, %%xtmp2 129f97de75fSRoy Oursler PMOVMSKB %%tmp32, %%xtmp 130660f49b0SGreg Tucker xor %%tmp, 0xFFFF 13173454909SRoy Oursler jnz %%miscompare_vect 132660f49b0SGreg Tucker add %%result, 16 133660f49b0SGreg Tucker 13473454909SRoy Oursler cmp %%result, %%result_max 13573454909SRoy Oursler jle %%loop1 13673454909SRoy Oursler 13773454909SRoy Oursler%%_by_16: 13873454909SRoy Oursler add %%result_max, 16 13973454909SRoy Oursler cmp %%result, %%result_max 14073454909SRoy Oursler jg %%_by_8 14173454909SRoy Oursler 14273454909SRoy Oursler MOVDQU %%xtmp, [%%src1 + %%result] 14373454909SRoy Oursler MOVDQU %%xtmp2, [%%src2 + %%result] 14473454909SRoy Oursler PCMPEQB %%xtmp, %%xtmp, %%xtmp2 14573454909SRoy Oursler PMOVMSKB %%tmp32, %%xtmp 14673454909SRoy Oursler xor %%tmp, 0xFFFF 14773454909SRoy Oursler jnz %%miscompare_vect 14873454909SRoy Oursler add %%result, 16 14973454909SRoy Oursler 15073454909SRoy Oursler%%_by_8: 15173454909SRoy Oursler add %%result_max, 8 15273454909SRoy Oursler cmp %%result, %%result_max 15373454909SRoy Oursler jg %%_cmp_last 154660f49b0SGreg Tucker 155660f49b0SGreg Tucker ; compare last two bytes 15673454909SRoy Oursler mov %%tmp, [%%src1 + %%result] 15773454909SRoy Oursler xor %%tmp, [%%src2 + %%result] 15873454909SRoy Oursler jnz %%miscompare_reg 15973454909SRoy Oursler add %%result, 8 160660f49b0SGreg Tucker 16173454909SRoy Oursler%%_cmp_last: 16273454909SRoy Oursler add %%result_max, 8 16373454909SRoy Oursler cmp %%result, %%result_max 16473454909SRoy Oursler je %%end 16573454909SRoy Oursler 16673454909SRoy Oursler lea %%result, [%%result_max - 8] 16773454909SRoy Oursler 16873454909SRoy Oursler mov %%tmp, [%%src1 + %%result] 16973454909SRoy Oursler xor %%tmp, [%%src2 + %%result] 17073454909SRoy Oursler jnz %%miscompare_reg 17173454909SRoy Oursler add %%result, 8 172660f49b0SGreg Tucker jmp %%end 173660f49b0SGreg Tucker 17473454909SRoy Oursler%%miscompare_reg: 175660f49b0SGreg Tucker bsf %%tmp, %%tmp 176660f49b0SGreg Tucker shr %%tmp, 3 177660f49b0SGreg Tucker add %%result, %%tmp 178660f49b0SGreg Tucker jmp %%end 17973454909SRoy Oursler 18073454909SRoy Oursler%%miscompare_vect: 181660f49b0SGreg Tucker bsf %%tmp, %%tmp 182660f49b0SGreg Tucker add %%result, %%tmp 183660f49b0SGreg Tucker%%end: 184660f49b0SGreg Tucker%endm 185660f49b0SGreg Tucker 186660f49b0SGreg Tucker;; compares 32 bytes at a time, using pcmpeqb/pmovmskb 18773454909SRoy Oursler;; assumes the input buffer has size at least 8 18873454909SRoy Oursler;; compare_y src1, src2, result, result_max, tmp, xtmp1, xtmp2 18973454909SRoy Oursler%macro compare_y 7 190660f49b0SGreg Tucker%define %%src1 %1 191660f49b0SGreg Tucker%define %%src2 %2 19273454909SRoy Oursler%define %%result %3 ; Accumulator for match_length 19373454909SRoy Oursler%define %%result_max %4 19473454909SRoy Oursler%define %%tmp %5 19573454909SRoy Oursler%define %%tmp16 %5w ; tmp as a 16-bit register 19673454909SRoy Oursler%define %%tmp32 %5d ; tmp as a 32-bit register 19773454909SRoy Oursler%define %%ytmp %6 19873454909SRoy Oursler%define %%ytmp2 %7 199660f49b0SGreg Tucker 20073454909SRoy Oursler sub %%result_max, 64 20173454909SRoy Oursler cmp %%result, %%result_max 20273454909SRoy Oursler jg %%_by_32 20373454909SRoy Oursler 204660f49b0SGreg Tucker%%loop1: 205660f49b0SGreg Tucker vmovdqu %%ytmp, [%%src1 + %%result] 206660f49b0SGreg Tucker vmovdqu %%ytmp2, [%%src2 + %%result] 207660f49b0SGreg Tucker vpcmpeqb %%ytmp, %%ytmp, %%ytmp2 208660f49b0SGreg Tucker vpmovmskb %%tmp, %%ytmp 209660f49b0SGreg Tucker xor %%tmp32, 0xFFFFFFFF 21073454909SRoy Oursler jnz %%miscompare_vect 211660f49b0SGreg Tucker add %%result, 32 212660f49b0SGreg Tucker 213660f49b0SGreg Tucker vmovdqu %%ytmp, [%%src1 + %%result] 214660f49b0SGreg Tucker vmovdqu %%ytmp2, [%%src2 + %%result] 215660f49b0SGreg Tucker vpcmpeqb %%ytmp, %%ytmp, %%ytmp2 216660f49b0SGreg Tucker vpmovmskb %%tmp, %%ytmp 217660f49b0SGreg Tucker xor %%tmp32, 0xFFFFFFFF 21873454909SRoy Oursler jnz %%miscompare_vect 219660f49b0SGreg Tucker add %%result, 32 220660f49b0SGreg Tucker 22173454909SRoy Oursler cmp %%result, %%result_max 22273454909SRoy Oursler jle %%loop1 22373454909SRoy Oursler 22473454909SRoy Oursler%%_by_32: 22573454909SRoy Oursler add %%result_max, 32 22673454909SRoy Oursler cmp %%result, %%result_max 22773454909SRoy Oursler jg %%_by_16 22873454909SRoy Oursler 22973454909SRoy Oursler vmovdqu %%ytmp, [%%src1 + %%result] 23073454909SRoy Oursler vmovdqu %%ytmp2, [%%src2 + %%result] 23173454909SRoy Oursler vpcmpeqb %%ytmp, %%ytmp, %%ytmp2 23273454909SRoy Oursler vpmovmskb %%tmp, %%ytmp 23373454909SRoy Oursler xor %%tmp32, 0xFFFFFFFF 23473454909SRoy Oursler jnz %%miscompare_vect 23573454909SRoy Oursler add %%result, 32 23673454909SRoy Oursler 23773454909SRoy Oursler%%_by_16: 23873454909SRoy Oursler add %%result_max, 16 23973454909SRoy Oursler cmp %%result, %%result_max 24073454909SRoy Oursler jg %%_by_8 24173454909SRoy Oursler 24273454909SRoy Oursler vmovdqu %%ytmp %+ x, [%%src1 + %%result] 24373454909SRoy Oursler vmovdqu %%ytmp2 %+ x, [%%src2 + %%result] 24473454909SRoy Oursler vpcmpeqb %%ytmp %+ x, %%ytmp %+ x, %%ytmp2 %+ x 24573454909SRoy Oursler vpmovmskb %%tmp, %%ytmp %+ x 24673454909SRoy Oursler xor %%tmp32, 0xFFFF 24773454909SRoy Oursler jnz %%miscompare_vect 24873454909SRoy Oursler add %%result, 16 24973454909SRoy Oursler 25073454909SRoy Oursler%%_by_8: 25173454909SRoy Oursler add %%result_max, 8 25273454909SRoy Oursler cmp %%result, %%result_max 25373454909SRoy Oursler jg %%_cmp_last 25473454909SRoy Oursler 25573454909SRoy Oursler mov %%tmp, [%%src1 + %%result] 25673454909SRoy Oursler xor %%tmp, [%%src2 + %%result] 25773454909SRoy Oursler jnz %%miscompare_reg 25873454909SRoy Oursler add %%result, 8 25973454909SRoy Oursler 26073454909SRoy Oursler%%_cmp_last: 26173454909SRoy Oursler add %%result_max, 8 26273454909SRoy Oursler cmp %%result, %%result_max 26373454909SRoy Oursler je %%end 26473454909SRoy Oursler 26573454909SRoy Oursler lea %%result, [%%result_max - 8] 266660f49b0SGreg Tucker 267660f49b0SGreg Tucker ; compare last two bytes 26873454909SRoy Oursler mov %%tmp, [%%src1 + %%result] 26973454909SRoy Oursler xor %%tmp, [%%src2 + %%result] 27073454909SRoy Oursler jnz %%miscompare_reg 27173454909SRoy Oursler add %%result, 8 272660f49b0SGreg Tucker jmp %%end 273660f49b0SGreg Tucker 27473454909SRoy Oursler%%miscompare_reg: 275660f49b0SGreg Tucker bsf %%tmp, %%tmp 276660f49b0SGreg Tucker shr %%tmp, 3 277660f49b0SGreg Tucker add %%result, %%tmp 278660f49b0SGreg Tucker jmp %%end 27973454909SRoy Oursler 28073454909SRoy Oursler%%miscompare_vect: 281*ba1a0006SRoy Oursler tzcnt %%tmp, %%tmp 282*ba1a0006SRoy Oursler add %%result, %%tmp 283*ba1a0006SRoy Oursler%%end: 284*ba1a0006SRoy Oursler%endm 285*ba1a0006SRoy Oursler 286*ba1a0006SRoy Oursler;; compares 64 bytes at a time 287*ba1a0006SRoy Oursler;; compare_z src1, src2, result, result_max, tmp, ktmp, ztmp1, ztmp2 288*ba1a0006SRoy Oursler;; Clobbers result_max 289*ba1a0006SRoy Oursler%macro compare_z 8 290*ba1a0006SRoy Oursler%define %%src1 %1 291*ba1a0006SRoy Oursler%define %%src2 %2 292*ba1a0006SRoy Oursler%define %%result %3 ; Accumulator for match_length 293*ba1a0006SRoy Oursler%define %%result_max %4 294*ba1a0006SRoy Oursler%define %%tmp %5 ; tmp as a 16-bit register 295*ba1a0006SRoy Oursler%define %%ktmp %6 296*ba1a0006SRoy Oursler%define %%ztmp %7 297*ba1a0006SRoy Oursler%define %%ztmp2 %8 298*ba1a0006SRoy Oursler 299*ba1a0006SRoy Oursler sub %%result_max, 128 300*ba1a0006SRoy Oursler cmp %%result, %%result_max 301*ba1a0006SRoy Oursler jg %%_by_64 302*ba1a0006SRoy Oursler 303*ba1a0006SRoy Oursler%%loop1: 304*ba1a0006SRoy Oursler vmovdqu8 %%ztmp, [%%src1 + %%result] 305*ba1a0006SRoy Oursler vmovdqu8 %%ztmp2, [%%src2 + %%result] 306*ba1a0006SRoy Oursler vpcmpb %%ktmp, %%ztmp, %%ztmp2, NEQ 307*ba1a0006SRoy Oursler ktestq %%ktmp, %%ktmp 308*ba1a0006SRoy Oursler jnz %%miscompare 309*ba1a0006SRoy Oursler add %%result, 64 310*ba1a0006SRoy Oursler 311*ba1a0006SRoy Oursler vmovdqu8 %%ztmp, [%%src1 + %%result] 312*ba1a0006SRoy Oursler vmovdqu8 %%ztmp2, [%%src2 + %%result] 313*ba1a0006SRoy Oursler vpcmpb %%ktmp, %%ztmp, %%ztmp2, NEQ 314*ba1a0006SRoy Oursler ktestq %%ktmp, %%ktmp 315*ba1a0006SRoy Oursler jnz %%miscompare 316*ba1a0006SRoy Oursler add %%result, 64 317*ba1a0006SRoy Oursler 318*ba1a0006SRoy Oursler cmp %%result, %%result_max 319*ba1a0006SRoy Oursler jle %%loop1 320*ba1a0006SRoy Oursler 321*ba1a0006SRoy Oursler%%_by_64: 322*ba1a0006SRoy Oursler add %%result_max, 64 323*ba1a0006SRoy Oursler cmp %%result, %%result_max 324*ba1a0006SRoy Oursler jg %%_less_than_64 325*ba1a0006SRoy Oursler 326*ba1a0006SRoy Oursler vmovdqu8 %%ztmp, [%%src1 + %%result] 327*ba1a0006SRoy Oursler vmovdqu8 %%ztmp2, [%%src2 + %%result] 328*ba1a0006SRoy Oursler vpcmpb %%ktmp, %%ztmp, %%ztmp2, NEQ 329*ba1a0006SRoy Oursler ktestq %%ktmp, %%ktmp 330*ba1a0006SRoy Oursler jnz %%miscompare 331*ba1a0006SRoy Oursler add %%result, 64 332*ba1a0006SRoy Oursler 333*ba1a0006SRoy Oursler%%_less_than_64: 334*ba1a0006SRoy Oursler add %%result_max, 64 335*ba1a0006SRoy Oursler sub %%result_max, %%result 336*ba1a0006SRoy Oursler jle %%end 337*ba1a0006SRoy Oursler 338*ba1a0006SRoy Oursler mov %%tmp, -1 339*ba1a0006SRoy Oursler bzhi %%tmp, %%tmp, %%result_max 340*ba1a0006SRoy Oursler kmovq %%ktmp, %%tmp 341*ba1a0006SRoy Oursler 342*ba1a0006SRoy Oursler vmovdqu8 %%ztmp {%%ktmp}{z}, [%%src1 + %%result] 343*ba1a0006SRoy Oursler vmovdqu8 %%ztmp2 {%%ktmp}{z}, [%%src2 + %%result] 344*ba1a0006SRoy Oursler vpcmpb %%ktmp, %%ztmp, %%ztmp2, NEQ 345*ba1a0006SRoy Oursler ktestq %%ktmp, %%ktmp 346*ba1a0006SRoy Oursler jnz %%miscompare 347*ba1a0006SRoy Oursler add %%result, %%result_max 348*ba1a0006SRoy Oursler 349*ba1a0006SRoy Oursler jmp %%end 350*ba1a0006SRoy Oursler%%miscompare: 351*ba1a0006SRoy Oursler kmovq %%tmp, %%ktmp 352*ba1a0006SRoy Oursler tzcnt %%tmp, %%tmp 353660f49b0SGreg Tucker add %%result, %%tmp 354660f49b0SGreg Tucker%%end: 355660f49b0SGreg Tucker%endm 356660f49b0SGreg Tucker 35773454909SRoy Oursler%macro compare250 7 358660f49b0SGreg Tucker%define %%src1 %1 359660f49b0SGreg Tucker%define %%src2 %2 360660f49b0SGreg Tucker%define %%result %3 36173454909SRoy Oursler%define %%result_max %4 36273454909SRoy Oursler%define %%tmp %5 36373454909SRoy Oursler%define %%xtmp0 %6x 36473454909SRoy Oursler%define %%xtmp1 %7x 36573454909SRoy Oursler%define %%ytmp0 %6 36673454909SRoy Oursler%define %%ytmp1 %7 367660f49b0SGreg Tucker 36873454909SRoy Oursler mov %%tmp, 250 36973454909SRoy Oursler cmp %%result_max, 250 37073454909SRoy Oursler cmovg %%result_max, %%tmp 371d389b8d6SRoy Oursler 372d389b8d6SRoy Oursler%if (COMPARE_TYPE == 1) 37373454909SRoy Oursler compare_r %%src1, %%src2, %%result, %%result_max, %%tmp 374d389b8d6SRoy Oursler%elif (COMPARE_TYPE == 2) 37573454909SRoy Oursler compare_x %%src1, %%src2, %%result, %%result_max, %%tmp, %%xtmp0, %%xtmp1 376d389b8d6SRoy Oursler%elif (COMPARE_TYPE == 3) 37773454909SRoy Oursler compare_y %%src1, %%src2, %%result, %%result_max, %%tmp, %%ytmp0, %%ytmp1 378d389b8d6SRoy Oursler%else 379d389b8d6SRoy Oursler%error Unknown Compare type COMPARE_TYPE 380d389b8d6SRoy Oursler % error 381d389b8d6SRoy Oursler%endif 382d389b8d6SRoy Oursler%endmacro 383d389b8d6SRoy Oursler 38473454909SRoy Oursler; Assumes the buffer has at least 8 bytes 38573454909SRoy Oursler; Accumulates match length onto result 38673454909SRoy Oursler%macro compare_large 7 38773454909SRoy Oursler%define %%src1 %1 38873454909SRoy Oursler%define %%src2 %2 38973454909SRoy Oursler%define %%result %3 39073454909SRoy Oursler%define %%result_max %4 39173454909SRoy Oursler%define %%tmp %5 39273454909SRoy Oursler%define %%xtmp0 %6x 39373454909SRoy Oursler%define %%xtmp1 %7x 39473454909SRoy Oursler%define %%ytmp0 %6 39573454909SRoy Oursler%define %%ytmp1 %7 39673454909SRoy Oursler 39773454909SRoy Oursler%if (COMPARE_TYPE == 1) 39873454909SRoy Oursler compare_r %%src1, %%src2, %%result, %%result_max, %%tmp 39973454909SRoy Oursler%elif (COMPARE_TYPE == 2) 40073454909SRoy Oursler compare_x %%src1, %%src2, %%result, %%result_max, %%tmp, %%xtmp0, %%xtmp1 40173454909SRoy Oursler%elif (COMPARE_TYPE == 3) 40273454909SRoy Oursler compare_y %%src1, %%src2, %%result, %%result_max, %%tmp, %%ytmp0, %%ytmp1 40373454909SRoy Oursler%else 40473454909SRoy Oursler%error Unknown Compare type COMPARE_TYPE 40573454909SRoy Oursler % error 40673454909SRoy Oursler%endif 40773454909SRoy Oursler%endmacro 408d389b8d6SRoy Oursler 409660f49b0SGreg Tucker;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 410660f49b0SGreg Tucker;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 411660f49b0SGreg Tucker 412660f49b0SGreg Tucker;; compare size, src1, src2, result, tmp 413660f49b0SGreg Tucker%macro compare 5 414660f49b0SGreg Tucker%define %%size %1 415660f49b0SGreg Tucker%define %%src1 %2 416660f49b0SGreg Tucker%define %%src2 %3 417660f49b0SGreg Tucker%define %%result %4 418660f49b0SGreg Tucker%define %%tmp %5 419660f49b0SGreg Tucker%define %%tmp8 %5b ; tmp as a 8-bit register 420660f49b0SGreg Tucker 421660f49b0SGreg Tucker xor %%result, %%result 422660f49b0SGreg Tucker sub %%size, 7 423660f49b0SGreg Tucker jle %%lab2 424660f49b0SGreg Tucker%%loop1: 425660f49b0SGreg Tucker mov %%tmp, [%%src1 + %%result] 426660f49b0SGreg Tucker xor %%tmp, [%%src2 + %%result] 427660f49b0SGreg Tucker jnz %%miscompare 428660f49b0SGreg Tucker add %%result, 8 429660f49b0SGreg Tucker sub %%size, 8 430660f49b0SGreg Tucker jg %%loop1 431660f49b0SGreg Tucker%%lab2: 432660f49b0SGreg Tucker ;; if we fall through from above, we have found no mismatches, 433660f49b0SGreg Tucker ;; %%size+7 is the number of bytes left to look at, and %%result is the 434660f49b0SGreg Tucker ;; number of bytes that have matched 435660f49b0SGreg Tucker add %%size, 7 436660f49b0SGreg Tucker jle %%end 437660f49b0SGreg Tucker%%loop3: 438660f49b0SGreg Tucker mov %%tmp8, [%%src1 + %%result] 439660f49b0SGreg Tucker cmp %%tmp8, [%%src2 + %%result] 440660f49b0SGreg Tucker jne %%end 441660f49b0SGreg Tucker inc %%result 442660f49b0SGreg Tucker dec %%size 443660f49b0SGreg Tucker jg %%loop3 444660f49b0SGreg Tucker jmp %%end 445660f49b0SGreg Tucker%%miscompare: 446660f49b0SGreg Tucker bsf %%tmp, %%tmp 447660f49b0SGreg Tucker shr %%tmp, 3 448660f49b0SGreg Tucker add %%result, %%tmp 449660f49b0SGreg Tucker%%end: 450660f49b0SGreg Tucker%endm 451660f49b0SGreg Tucker 452660f49b0SGreg Tucker%endif ;UTILS_ASM 453