15bbde333SMateusz Guzik/*- 28803f01eSRobert Clausecker * Copyright (c) 2018, 2023 The FreeBSD Foundation 35bbde333SMateusz Guzik * 45bbde333SMateusz Guzik * This software was developed by Mateusz Guzik <mjg@FreeBSD.org> 55bbde333SMateusz Guzik * under sponsorship from the FreeBSD Foundation. 65bbde333SMateusz Guzik * 78803f01eSRobert Clausecker * Portions of this software were developed by Robert Clausecker 88803f01eSRobert Clausecker * <fuz@FreeBSD.org> under sponsorship from the FreeBSD Foundation. 98803f01eSRobert Clausecker * 105bbde333SMateusz Guzik * Redistribution and use in source and binary forms, with or without 115bbde333SMateusz Guzik * modification, are permitted provided that the following conditions 125bbde333SMateusz Guzik * are met: 135bbde333SMateusz Guzik * 1. Redistributions of source code must retain the above copyright 145bbde333SMateusz Guzik * notice, this list of conditions and the following disclaimer. 155bbde333SMateusz Guzik * 2. Redistributions in binary form must reproduce the above copyright 165bbde333SMateusz Guzik * notice, this list of conditions and the following disclaimer in the 175bbde333SMateusz Guzik * documentation and/or other materials provided with the distribution. 185bbde333SMateusz Guzik * 195bbde333SMateusz Guzik * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 205bbde333SMateusz Guzik * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 215bbde333SMateusz Guzik * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 225bbde333SMateusz Guzik * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 235bbde333SMateusz Guzik * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 245bbde333SMateusz Guzik * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 255bbde333SMateusz Guzik * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 265bbde333SMateusz Guzik * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 275bbde333SMateusz Guzik * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 285bbde333SMateusz Guzik * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 295bbde333SMateusz Guzik * SUCH DAMAGE. 3026f6218bSAlan Cox */ 3126f6218bSAlan Cox 3226f6218bSAlan Cox#include <machine/asm.h> 338803f01eSRobert Clausecker#include <machine/param.h> 348803f01eSRobert Clausecker 358803f01eSRobert Clausecker#include "amd64_archlevel.h" 368803f01eSRobert Clausecker 370db6aef4SMateusz Guzik/* 380db6aef4SMateusz Guzik * Note: this routine was written with kernel use in mind (read: no simd), 390db6aef4SMateusz Guzik * it is only present in userspace as a temporary measure until something 400db6aef4SMateusz Guzik * better gets imported. 410db6aef4SMateusz Guzik */ 420db6aef4SMateusz Guzik 434846152aSMateusz Guzik#define ALIGN_TEXT .p2align 4,0x90 /* 16-byte alignment, nop filled */ 444846152aSMateusz Guzik 45fbc002cbSMateusz Guzik#ifdef BCMP 468803f01eSRobert Clausecker#define memcmp bcmp 47fbc002cbSMateusz Guzik#endif 488803f01eSRobert Clausecker 498803f01eSRobert ClauseckerARCHFUNCS(memcmp) 508803f01eSRobert Clausecker ARCHFUNC(memcmp, scalar) 518803f01eSRobert Clausecker ARCHFUNC(memcmp, baseline) 528803f01eSRobert ClauseckerENDARCHFUNCS(memcmp) 538803f01eSRobert Clausecker 548803f01eSRobert ClauseckerARCHENTRY(memcmp, scalar) 555bbde333SMateusz Guzik xorl %eax,%eax 564846152aSMateusz Guzik10: 574846152aSMateusz Guzik cmpq $16,%rdx 584846152aSMateusz Guzik ja 101632f 594846152aSMateusz Guzik 604846152aSMateusz Guzik cmpb $8,%dl 61f1be262eSMateusz Guzik jg 100816f 62f1be262eSMateusz Guzik 63f1be262eSMateusz Guzik cmpb $4,%dl 64f1be262eSMateusz Guzik jg 100408f 65f1be262eSMateusz Guzik 66f1be262eSMateusz Guzik cmpb $2,%dl 67f1be262eSMateusz Guzik jge 100204f 68f1be262eSMateusz Guzik 69f1be262eSMateusz Guzik cmpb $1,%dl 70f1be262eSMateusz Guzik jl 100000f 71f1be262eSMateusz Guzik movzbl (%rdi),%eax 72f1be262eSMateusz Guzik movzbl (%rsi),%r8d 73f1be262eSMateusz Guzik subl %r8d,%eax 74f1be262eSMateusz Guzik100000: 75f1be262eSMateusz Guzik ret 76f1be262eSMateusz Guzik 77f1be262eSMateusz Guzik ALIGN_TEXT 78f1be262eSMateusz Guzik100816: 795bbde333SMateusz Guzik movq (%rdi),%r8 805bbde333SMateusz Guzik movq (%rsi),%r9 815bbde333SMateusz Guzik cmpq %r8,%r9 828291e887SMateusz Guzik jne 80f 834846152aSMateusz Guzik movq -8(%rdi,%rdx),%r8 844846152aSMateusz Guzik movq -8(%rsi,%rdx),%r9 854846152aSMateusz Guzik cmpq %r8,%r9 864846152aSMateusz Guzik jne 10081608f 874846152aSMateusz Guzik ret 88f1be262eSMateusz Guzik ALIGN_TEXT 894846152aSMateusz Guzik100408: 908291e887SMateusz Guzik movl (%rdi),%r8d 918291e887SMateusz Guzik movl (%rsi),%r9d 924846152aSMateusz Guzik cmpl %r8d,%r9d 938291e887SMateusz Guzik jne 80f 948291e887SMateusz Guzik movl -4(%rdi,%rdx),%r8d 958291e887SMateusz Guzik movl -4(%rsi,%rdx),%r9d 964846152aSMateusz Guzik cmpl %r8d,%r9d 978291e887SMateusz Guzik jne 10040804f 984846152aSMateusz Guzik ret 99f1be262eSMateusz Guzik ALIGN_TEXT 1004846152aSMateusz Guzik100204: 1018291e887SMateusz Guzik movzwl (%rdi),%r8d 1028291e887SMateusz Guzik movzwl (%rsi),%r9d 1034846152aSMateusz Guzik cmpl %r8d,%r9d 1044846152aSMateusz Guzik jne 1f 1058291e887SMateusz Guzik movzwl -2(%rdi,%rdx),%r8d 1068291e887SMateusz Guzik movzwl -2(%rsi,%rdx),%r9d 1074846152aSMateusz Guzik cmpl %r8d,%r9d 1084846152aSMateusz Guzik jne 1f 1094846152aSMateusz Guzik ret 1104846152aSMateusz Guzik ALIGN_TEXT 1114846152aSMateusz Guzik101632: 1124846152aSMateusz Guzik cmpq $32,%rdx 1134846152aSMateusz Guzik ja 103200f 1144846152aSMateusz Guzik movq (%rdi),%r8 1154846152aSMateusz Guzik movq (%rsi),%r9 1164846152aSMateusz Guzik cmpq %r8,%r9 1178291e887SMateusz Guzik jne 80f 1184846152aSMateusz Guzik movq 8(%rdi),%r8 1195bbde333SMateusz Guzik movq 8(%rsi),%r9 1204846152aSMateusz Guzik cmpq %r8,%r9 1214846152aSMateusz Guzik jne 10163208f 1224846152aSMateusz Guzik movq -16(%rdi,%rdx),%r8 1234846152aSMateusz Guzik movq -16(%rsi,%rdx),%r9 1244846152aSMateusz Guzik cmpq %r8,%r9 1254846152aSMateusz Guzik jne 10163216f 1264846152aSMateusz Guzik movq -8(%rdi,%rdx),%r8 1274846152aSMateusz Guzik movq -8(%rsi,%rdx),%r9 1284846152aSMateusz Guzik cmpq %r8,%r9 1294846152aSMateusz Guzik jne 10163224f 1304846152aSMateusz Guzik ret 1314846152aSMateusz Guzik ALIGN_TEXT 1324846152aSMateusz Guzik103200: 1334846152aSMateusz Guzik movq (%rdi),%r8 1344846152aSMateusz Guzik movq 8(%rdi),%r9 1354846152aSMateusz Guzik subq (%rsi),%r8 1364846152aSMateusz Guzik subq 8(%rsi),%r9 1378291e887SMateusz Guzik orq %r8,%r9 1384846152aSMateusz Guzik jnz 10320000f 1395bbde333SMateusz Guzik 1404846152aSMateusz Guzik movq 16(%rdi),%r8 1414846152aSMateusz Guzik movq 24(%rdi),%r9 1424846152aSMateusz Guzik subq 16(%rsi),%r8 1434846152aSMateusz Guzik subq 24(%rsi),%r9 1448291e887SMateusz Guzik orq %r8,%r9 1454846152aSMateusz Guzik jnz 10320016f 1465bbde333SMateusz Guzik 1475bbde333SMateusz Guzik leaq 32(%rdi),%rdi 1485bbde333SMateusz Guzik leaq 32(%rsi),%rsi 1495bbde333SMateusz Guzik subq $32,%rdx 1505bbde333SMateusz Guzik cmpq $32,%rdx 1514846152aSMateusz Guzik jae 103200b 1524846152aSMateusz Guzik cmpb $0,%dl 1534846152aSMateusz Guzik jne 10b 1544846152aSMateusz Guzik ret 1554846152aSMateusz Guzik 1568291e887SMateusz Guzik/* 1578291e887SMateusz Guzik * Mismatch was found. 158fbc002cbSMateusz Guzik */ 159fbc002cbSMateusz Guzik#ifdef BCMP 160fbc002cbSMateusz Guzik ALIGN_TEXT 161fbc002cbSMateusz Guzik10320016: 162fbc002cbSMateusz Guzik10320000: 163fbc002cbSMateusz Guzik10081608: 164fbc002cbSMateusz Guzik10163224: 165fbc002cbSMateusz Guzik10163216: 166fbc002cbSMateusz Guzik10163208: 167fbc002cbSMateusz Guzik10040804: 168fbc002cbSMateusz Guzik80: 169fbc002cbSMateusz Guzik1: 170fbc002cbSMateusz Guzik leal 1(%eax),%eax 171fbc002cbSMateusz Guzik ret 172fbc002cbSMateusz Guzik#else 173fbc002cbSMateusz Guzik/* 174fbc002cbSMateusz Guzik * We need to compute the difference between strings. 175fbc002cbSMateusz Guzik * Start with narrowing the range down (16 -> 8 -> 4 bytes). 1768291e887SMateusz Guzik */ 1778291e887SMateusz Guzik ALIGN_TEXT 1784846152aSMateusz Guzik10320016: 1794846152aSMateusz Guzik leaq 16(%rdi),%rdi 1804846152aSMateusz Guzik leaq 16(%rsi),%rsi 1814846152aSMateusz Guzik10320000: 1824846152aSMateusz Guzik movq (%rdi),%r8 1834846152aSMateusz Guzik movq (%rsi),%r9 1844846152aSMateusz Guzik cmpq %r8,%r9 1858291e887SMateusz Guzik jne 80f 1864846152aSMateusz Guzik leaq 8(%rdi),%rdi 1874846152aSMateusz Guzik leaq 8(%rsi),%rsi 1888291e887SMateusz Guzik jmp 80f 1898291e887SMateusz Guzik ALIGN_TEXT 1908291e887SMateusz Guzik10081608: 1914846152aSMateusz Guzik10163224: 1924846152aSMateusz Guzik leaq -8(%rdi,%rdx),%rdi 1934846152aSMateusz Guzik leaq -8(%rsi,%rdx),%rsi 1948291e887SMateusz Guzik jmp 80f 1958291e887SMateusz Guzik ALIGN_TEXT 1964846152aSMateusz Guzik10163216: 1974846152aSMateusz Guzik leaq -16(%rdi,%rdx),%rdi 1984846152aSMateusz Guzik leaq -16(%rsi,%rdx),%rsi 1998291e887SMateusz Guzik jmp 80f 2008291e887SMateusz Guzik ALIGN_TEXT 2014846152aSMateusz Guzik10163208: 2024846152aSMateusz Guzik leaq 8(%rdi),%rdi 2034846152aSMateusz Guzik leaq 8(%rsi),%rsi 2048291e887SMateusz Guzik jmp 80f 2058291e887SMateusz Guzik ALIGN_TEXT 2068291e887SMateusz Guzik10040804: 2078291e887SMateusz Guzik leaq -4(%rdi,%rdx),%rdi 2088291e887SMateusz Guzik leaq -4(%rsi,%rdx),%rsi 2094846152aSMateusz Guzik jmp 1f 2104846152aSMateusz Guzik 2114846152aSMateusz Guzik ALIGN_TEXT 2128291e887SMateusz Guzik80: 2138291e887SMateusz Guzik movl (%rdi),%r8d 2148291e887SMateusz Guzik movl (%rsi),%r9d 2158291e887SMateusz Guzik cmpl %r8d,%r9d 2168291e887SMateusz Guzik jne 1f 2178291e887SMateusz Guzik leaq 4(%rdi),%rdi 2188291e887SMateusz Guzik leaq 4(%rsi),%rsi 2198291e887SMateusz Guzik 2208291e887SMateusz Guzik/* 2218291e887SMateusz Guzik * We have up to 4 bytes to inspect. 2228291e887SMateusz Guzik */ 2234846152aSMateusz Guzik1: 2244846152aSMateusz Guzik movzbl (%rdi),%eax 2254846152aSMateusz Guzik movzbl (%rsi),%r8d 2264846152aSMateusz Guzik cmpb %r8b,%al 2274846152aSMateusz Guzik jne 2f 2284846152aSMateusz Guzik 2294846152aSMateusz Guzik movzbl 1(%rdi),%eax 2304846152aSMateusz Guzik movzbl 1(%rsi),%r8d 2314846152aSMateusz Guzik cmpb %r8b,%al 2324846152aSMateusz Guzik jne 2f 2334846152aSMateusz Guzik 2344846152aSMateusz Guzik movzbl 2(%rdi),%eax 2354846152aSMateusz Guzik movzbl 2(%rsi),%r8d 2364846152aSMateusz Guzik cmpb %r8b,%al 2374846152aSMateusz Guzik jne 2f 2384846152aSMateusz Guzik 2394846152aSMateusz Guzik movzbl 3(%rdi),%eax 2404846152aSMateusz Guzik movzbl 3(%rsi),%r8d 2414846152aSMateusz Guzik2: 2424846152aSMateusz Guzik subl %r8d,%eax 2434846152aSMateusz Guzik ret 244fbc002cbSMateusz Guzik#endif 2458803f01eSRobert ClauseckerARCHEND(memcmp, scalar) 2468803f01eSRobert Clausecker 2478803f01eSRobert ClauseckerARCHENTRY(memcmp, baseline) 2488803f01eSRobert Clausecker cmp $32, %rdx # enough to permit use of the long kernel? 2498803f01eSRobert Clausecker ja .Llong 2508803f01eSRobert Clausecker 2518803f01eSRobert Clausecker test %rdx, %rdx # zero bytes buffer? 2528803f01eSRobert Clausecker je .L0 2538803f01eSRobert Clausecker 2548803f01eSRobert Clausecker /* 2558803f01eSRobert Clausecker * Compare strings of 1--32 bytes. We want to do this by 2568803f01eSRobert Clausecker * loading into two xmm registers and then comparing. To avoid 2578803f01eSRobert Clausecker * crossing into unmapped pages, we either load 32 bytes from 2588803f01eSRobert Clausecker * the start of the buffer or 32 bytes before its end, depending 2598803f01eSRobert Clausecker * on whether there is a page boundary between the overread area 2608803f01eSRobert Clausecker * or not. 2618803f01eSRobert Clausecker */ 2628803f01eSRobert Clausecker 2638803f01eSRobert Clausecker /* check for page boundaries overreads */ 2648803f01eSRobert Clausecker lea 31(%rdi), %eax # end of overread 2658803f01eSRobert Clausecker lea 31(%rsi), %r8d 2668803f01eSRobert Clausecker lea -1(%rdi, %rdx, 1), %ecx # last character in buffer 2678803f01eSRobert Clausecker lea -1(%rsi, %rdx, 1), %r9d 2688803f01eSRobert Clausecker xor %ecx, %eax 2698803f01eSRobert Clausecker xor %r9d, %r8d 2708803f01eSRobert Clausecker test $PAGE_SIZE, %eax # are they on different pages? 2718803f01eSRobert Clausecker jz 0f 2728803f01eSRobert Clausecker 2738803f01eSRobert Clausecker /* fix up rdi */ 2748803f01eSRobert Clausecker movdqu -32(%rdi, %rdx, 1), %xmm0 2758803f01eSRobert Clausecker movdqu -16(%rdi, %rdx, 1), %xmm1 2768803f01eSRobert Clausecker lea -8(%rsp), %rdi # end of replacement buffer 2778803f01eSRobert Clausecker sub %rdx, %rdi # start of replacement buffer 2788803f01eSRobert Clausecker movdqa %xmm0, -40(%rsp) # copy to replacement buffer 2798803f01eSRobert Clausecker movdqa %xmm1, -24(%rsp) 2808803f01eSRobert Clausecker 2818803f01eSRobert Clausecker0: test $PAGE_SIZE, %r8d 2828803f01eSRobert Clausecker jz 0f 2838803f01eSRobert Clausecker 2848803f01eSRobert Clausecker /* fix up rsi */ 2858803f01eSRobert Clausecker movdqu -32(%rsi, %rdx, 1), %xmm0 2868803f01eSRobert Clausecker movdqu -16(%rsi, %rdx, 1), %xmm1 2878803f01eSRobert Clausecker lea -40(%rsp), %rsi # end of replacement buffer 2888803f01eSRobert Clausecker sub %rdx, %rsi # start of replacement buffer 2898803f01eSRobert Clausecker movdqa %xmm0, -72(%rsp) # copy to replacement buffer 2908803f01eSRobert Clausecker movdqa %xmm1, -56(%rsp) 2918803f01eSRobert Clausecker 2928803f01eSRobert Clausecker /* load data and compare properly */ 2938803f01eSRobert Clausecker0: movdqu 16(%rdi), %xmm1 2948803f01eSRobert Clausecker movdqu 16(%rsi), %xmm3 2958803f01eSRobert Clausecker movdqu (%rdi), %xmm0 2968803f01eSRobert Clausecker movdqu (%rsi), %xmm2 2978803f01eSRobert Clausecker mov %edx, %ecx 2988803f01eSRobert Clausecker mov $-1, %edx 2998803f01eSRobert Clausecker shl %cl, %rdx # ones where the buffer is not 3008803f01eSRobert Clausecker pcmpeqb %xmm3, %xmm1 3018803f01eSRobert Clausecker pcmpeqb %xmm2, %xmm0 3028803f01eSRobert Clausecker pmovmskb %xmm1, %ecx 3038803f01eSRobert Clausecker pmovmskb %xmm0, %eax 3048803f01eSRobert Clausecker shl $16, %ecx 3058803f01eSRobert Clausecker or %ecx, %eax # ones where the buffers match 3068803f01eSRobert Clausecker or %edx, %eax # including where the buffer is not 3078803f01eSRobert Clausecker not %eax # ones where there is a mismatch 3088803f01eSRobert Clausecker#ifndef BCMP 3098803f01eSRobert Clausecker bsf %eax, %edx # location of the first mismatch 3108803f01eSRobert Clausecker cmovz %eax, %edx # including if there is no mismatch 3118803f01eSRobert Clausecker movzbl (%rdi, %rdx, 1), %eax # mismatching bytes 3128803f01eSRobert Clausecker movzbl (%rsi, %rdx, 1), %edx 3138803f01eSRobert Clausecker sub %edx, %eax 3148803f01eSRobert Clausecker#endif 3158803f01eSRobert Clausecker ret 3168803f01eSRobert Clausecker 3178803f01eSRobert Clausecker /* empty input */ 3188803f01eSRobert Clausecker.L0: xor %eax, %eax 3198803f01eSRobert Clausecker ret 3208803f01eSRobert Clausecker 3218803f01eSRobert Clausecker /* compare 33+ bytes */ 3228803f01eSRobert Clausecker ALIGN_TEXT 3238803f01eSRobert Clausecker.Llong: movdqu (%rdi), %xmm0 # load head 3248803f01eSRobert Clausecker movdqu (%rsi), %xmm2 3258803f01eSRobert Clausecker mov %rdi, %rcx 3268803f01eSRobert Clausecker sub %rdi, %rsi # express rsi as distance from rdi 3278803f01eSRobert Clausecker and $~0xf, %rdi # align rdi to 16 bytes 3288803f01eSRobert Clausecker movdqu 16(%rsi, %rdi, 1), %xmm1 3298803f01eSRobert Clausecker pcmpeqb 16(%rdi), %xmm1 # compare second half of this iteration 3308803f01eSRobert Clausecker add %rcx, %rdx # pointer to last byte in buffer 331*953b93cfSRobert Clausecker jc .Loverflow # did this overflow? 332*953b93cfSRobert Clausecker0: pcmpeqb %xmm2, %xmm0 3338803f01eSRobert Clausecker pmovmskb %xmm0, %eax 3348803f01eSRobert Clausecker xor $0xffff, %eax # any mismatch? 3358803f01eSRobert Clausecker jne .Lmismatch_head 3368803f01eSRobert Clausecker add $64, %rdi # advance to next iteration 3378803f01eSRobert Clausecker jmp 1f # and get going with the loop 3388803f01eSRobert Clausecker 339*953b93cfSRobert Clausecker /* 340*953b93cfSRobert Clausecker * If we got here, a buffer length was passed to memcmp(a, b, len) 341*953b93cfSRobert Clausecker * such that a + len < a. While this sort of usage is illegal, 342*953b93cfSRobert Clausecker * it is plausible that a caller tries to do something like 343*953b93cfSRobert Clausecker * memcmp(a, b, SIZE_MAX) if a and b are known to differ, intending 344*953b93cfSRobert Clausecker * for memcmp() to stop comparing at the first mismatch. This 345*953b93cfSRobert Clausecker * behaviour is not guaranteed by any version of ISO/IEC 9899, 346*953b93cfSRobert Clausecker * but usually works out in practice. Let's try to make this 347*953b93cfSRobert Clausecker * case work by comparing until the end of the address space. 348*953b93cfSRobert Clausecker */ 349*953b93cfSRobert Clausecker.Loverflow: 350*953b93cfSRobert Clausecker mov $-1, %rdx # compare until the end of memory 351*953b93cfSRobert Clausecker jmp 0b 352*953b93cfSRobert Clausecker 3538803f01eSRobert Clausecker /* process buffer 32 bytes at a time */ 3548803f01eSRobert Clausecker ALIGN_TEXT 3558803f01eSRobert Clausecker0: movdqu -32(%rsi, %rdi, 1), %xmm0 3568803f01eSRobert Clausecker movdqu -16(%rsi, %rdi, 1), %xmm1 3578803f01eSRobert Clausecker pcmpeqb -32(%rdi), %xmm0 3588803f01eSRobert Clausecker pcmpeqb -16(%rdi), %xmm1 3598803f01eSRobert Clausecker add $32, %rdi # advance to next iteration 3608803f01eSRobert Clausecker1: pand %xmm0, %xmm1 # 0xff where both halves matched 3618803f01eSRobert Clausecker pmovmskb %xmm1, %eax 3628803f01eSRobert Clausecker cmp $0xffff, %eax # all bytes matched? 3638803f01eSRobert Clausecker jne .Lmismatch 3648803f01eSRobert Clausecker cmp %rdx, %rdi # end of buffer reached? 3658803f01eSRobert Clausecker jb 0b 3668803f01eSRobert Clausecker 3678803f01eSRobert Clausecker /* less than 32 bytes left to compare */ 3688803f01eSRobert Clausecker movdqu -16(%rdx), %xmm1 # load 32 byte tail through end pointer 3698803f01eSRobert Clausecker movdqu -16(%rdx, %rsi, 1), %xmm3 3708803f01eSRobert Clausecker movdqu -32(%rdx), %xmm0 3718803f01eSRobert Clausecker movdqu -32(%rdx, %rsi, 1), %xmm2 3728803f01eSRobert Clausecker pcmpeqb %xmm3, %xmm1 3738803f01eSRobert Clausecker pcmpeqb %xmm2, %xmm0 3748803f01eSRobert Clausecker pmovmskb %xmm1, %ecx 3758803f01eSRobert Clausecker pmovmskb %xmm0, %eax 3768803f01eSRobert Clausecker shl $16, %ecx 3778803f01eSRobert Clausecker or %ecx, %eax # ones where the buffers match 3788803f01eSRobert Clausecker not %eax # ones where there is a mismatch 3798803f01eSRobert Clausecker#ifndef BCMP 3808803f01eSRobert Clausecker bsf %eax, %ecx # location of the first mismatch 3818803f01eSRobert Clausecker cmovz %eax, %ecx # including if there is no mismatch 3828803f01eSRobert Clausecker add %rcx, %rdx # pointer to potential mismatch 3838803f01eSRobert Clausecker movzbl -32(%rdx), %eax # mismatching bytes 3848803f01eSRobert Clausecker movzbl -32(%rdx, %rsi, 1), %edx 3858803f01eSRobert Clausecker sub %edx, %eax 3868803f01eSRobert Clausecker#endif 3878803f01eSRobert Clausecker ret 3888803f01eSRobert Clausecker 3898803f01eSRobert Clausecker#ifdef BCMP 3908803f01eSRobert Clausecker.Lmismatch: 3918803f01eSRobert Clausecker mov $1, %eax 3928803f01eSRobert Clausecker.Lmismatch_head: 3938803f01eSRobert Clausecker ret 3948803f01eSRobert Clausecker#else /* memcmp */ 3958803f01eSRobert Clausecker.Lmismatch_head: 3968803f01eSRobert Clausecker tzcnt %eax, %eax # location of mismatch 3978803f01eSRobert Clausecker add %rax, %rcx # pointer to mismatch 3988803f01eSRobert Clausecker movzbl (%rcx), %eax # mismatching bytes 3998803f01eSRobert Clausecker movzbl (%rcx, %rsi, 1), %ecx 4008803f01eSRobert Clausecker sub %ecx, %eax 4018803f01eSRobert Clausecker ret 4028803f01eSRobert Clausecker 4038803f01eSRobert Clausecker.Lmismatch: 4048803f01eSRobert Clausecker movdqu -48(%rsi, %rdi, 1), %xmm1 4058803f01eSRobert Clausecker pcmpeqb -48(%rdi), %xmm1 # reconstruct xmm1 before PAND 4068803f01eSRobert Clausecker pmovmskb %xmm0, %eax # mismatches in first 16 bytes 4078803f01eSRobert Clausecker pmovmskb %xmm1, %edx # mismatches in second 16 bytes 4088803f01eSRobert Clausecker shl $16, %edx 4098803f01eSRobert Clausecker or %edx, %eax # mismatches in both 4108803f01eSRobert Clausecker not %eax # matches in both 4118803f01eSRobert Clausecker tzcnt %eax, %eax # location of mismatch 4128803f01eSRobert Clausecker add %rax, %rdi # pointer to mismatch 4138803f01eSRobert Clausecker movzbl -64(%rdi), %eax # mismatching bytes 4148803f01eSRobert Clausecker movzbl -64(%rdi, %rsi, 1), %ecx 4158803f01eSRobert Clausecker sub %ecx, %eax 4168803f01eSRobert Clausecker ret 4178803f01eSRobert Clausecker#endif 4188803f01eSRobert ClauseckerARCHEND(memcmp, baseline) 41993ab7586SKonstantin Belousov 42093ab7586SKonstantin Belousov .section .note.GNU-stack,"",%progbits 421