1*5b3646a9SMatthew Dillon /*- 2*5b3646a9SMatthew Dillon * Copyright (c) 2018-2019 The FreeBSD Foundation 3*5b3646a9SMatthew Dillon * Copyright (c) 2003 Peter Wemm. 4*5b3646a9SMatthew Dillon * Copyright (c) 1993 The Regents of the University of California. 5*5b3646a9SMatthew Dillon * All rights reserved. 6*5b3646a9SMatthew Dillon * 7*5b3646a9SMatthew Dillon * Portions of this software were developed by 8*5b3646a9SMatthew Dillon * Konstantin Belousov <kib@FreeBSD.org> under sponsorship from 9*5b3646a9SMatthew Dillon * the FreeBSD Foundation. 10*5b3646a9SMatthew Dillon * 11*5b3646a9SMatthew Dillon * Primarily rewritten and redeveloped by Mateusz Guzik 12*5b3646a9SMatthew Dillon * 13*5b3646a9SMatthew Dillon * Redistribution and use in source and binary forms, with or without 14*5b3646a9SMatthew Dillon * modification, are permitted provided that the following conditions 15*5b3646a9SMatthew Dillon * are met: 16*5b3646a9SMatthew Dillon * 1. Redistributions of source code must retain the above copyright 17*5b3646a9SMatthew Dillon * notice, this list of conditions and the following disclaimer. 18*5b3646a9SMatthew Dillon * 2. Redistributions in binary form must reproduce the above copyright 19*5b3646a9SMatthew Dillon * notice, this list of conditions and the following disclaimer in the 20*5b3646a9SMatthew Dillon * documentation and/or other materials provided with the distribution. 21*5b3646a9SMatthew Dillon * 3. Neither the name of the University nor the names of its contributors 22*5b3646a9SMatthew Dillon * may be used to endorse or promote products derived from this software 23*5b3646a9SMatthew Dillon * without specific prior written permission. 24*5b3646a9SMatthew Dillon * 25*5b3646a9SMatthew Dillon * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 26*5b3646a9SMatthew Dillon * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 27*5b3646a9SMatthew Dillon * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 28*5b3646a9SMatthew Dillon * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 29*5b3646a9SMatthew Dillon * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 30*5b3646a9SMatthew Dillon * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 31*5b3646a9SMatthew Dillon * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 32*5b3646a9SMatthew Dillon * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 33*5b3646a9SMatthew Dillon * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 34*5b3646a9SMatthew Dillon * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 35*5b3646a9SMatthew Dillon * SUCH DAMAGE. 36*5b3646a9SMatthew Dillon * 37*5b3646a9SMatthew Dillon * $FreeBSD$ 38*5b3646a9SMatthew Dillon */ 39*5b3646a9SMatthew Dillon /* 40*5b3646a9SMatthew Dillon * Macros to help implement memcmp(), bcmp(), 41*5b3646a9SMatthew Dillon * bzero(), memset(), 42*5b3646a9SMatthew Dillon * memcpy(), bcopy(), memmove() 43*5b3646a9SMatthew Dillon */ 44*5b3646a9SMatthew Dillon 45*5b3646a9SMatthew Dillon /* 46*5b3646a9SMatthew Dillon * memcmp(b1, b2, len) 47*5b3646a9SMatthew Dillon * rdi,rsi,rdx 48*5b3646a9SMatthew Dillon */ 49*5b3646a9SMatthew Dillon .macro MEMCMP end 50*5b3646a9SMatthew Dillon xorl %eax,%eax 51*5b3646a9SMatthew Dillon 10: 52*5b3646a9SMatthew Dillon cmpq $16,%rdx 53*5b3646a9SMatthew Dillon ja 101632f 54*5b3646a9SMatthew Dillon 55*5b3646a9SMatthew Dillon 100816: 56*5b3646a9SMatthew Dillon cmpb $8,%dl 57*5b3646a9SMatthew Dillon jl 100408f 58*5b3646a9SMatthew Dillon movq (%rdi),%r8 59*5b3646a9SMatthew Dillon movq (%rsi),%r9 60*5b3646a9SMatthew Dillon cmpq %r8,%r9 61*5b3646a9SMatthew Dillon jne 80f 62*5b3646a9SMatthew Dillon movq -8(%rdi,%rdx),%r8 63*5b3646a9SMatthew Dillon movq -8(%rsi,%rdx),%r9 64*5b3646a9SMatthew Dillon cmpq %r8,%r9 65*5b3646a9SMatthew Dillon jne 10081608f 66*5b3646a9SMatthew Dillon \end 67*5b3646a9SMatthew Dillon 100408: 68*5b3646a9SMatthew Dillon cmpb $4,%dl 69*5b3646a9SMatthew Dillon jl 100204f 70*5b3646a9SMatthew Dillon movl (%rdi),%r8d 71*5b3646a9SMatthew Dillon movl (%rsi),%r9d 72*5b3646a9SMatthew Dillon cmpl %r8d,%r9d 73*5b3646a9SMatthew Dillon jne 80f 74*5b3646a9SMatthew Dillon movl -4(%rdi,%rdx),%r8d 75*5b3646a9SMatthew Dillon movl -4(%rsi,%rdx),%r9d 76*5b3646a9SMatthew Dillon cmpl %r8d,%r9d 77*5b3646a9SMatthew Dillon jne 10040804f 78*5b3646a9SMatthew Dillon \end 79*5b3646a9SMatthew Dillon 100204: 80*5b3646a9SMatthew Dillon cmpb $2,%dl 81*5b3646a9SMatthew Dillon jl 100001f 82*5b3646a9SMatthew Dillon movzwl (%rdi),%r8d 83*5b3646a9SMatthew Dillon movzwl (%rsi),%r9d 84*5b3646a9SMatthew Dillon cmpl %r8d,%r9d 85*5b3646a9SMatthew Dillon jne 1f 86*5b3646a9SMatthew Dillon movzwl -2(%rdi,%rdx),%r8d 87*5b3646a9SMatthew Dillon movzwl -2(%rsi,%rdx),%r9d 88*5b3646a9SMatthew Dillon cmpl %r8d,%r9d 89*5b3646a9SMatthew Dillon jne 1f 90*5b3646a9SMatthew Dillon \end 91*5b3646a9SMatthew Dillon 100001: 92*5b3646a9SMatthew Dillon cmpb $1,%dl 93*5b3646a9SMatthew Dillon jl 100000f 94*5b3646a9SMatthew Dillon movzbl (%rdi),%eax 95*5b3646a9SMatthew Dillon movzbl (%rsi),%r8d 96*5b3646a9SMatthew Dillon subl %r8d,%eax 97*5b3646a9SMatthew Dillon 100000: 98*5b3646a9SMatthew Dillon \end 99*5b3646a9SMatthew Dillon ALIGN_TEXT 100*5b3646a9SMatthew Dillon 101632: 101*5b3646a9SMatthew Dillon cmpq $32,%rdx 102*5b3646a9SMatthew Dillon ja 103200f 103*5b3646a9SMatthew Dillon movq (%rdi),%r8 104*5b3646a9SMatthew Dillon movq (%rsi),%r9 105*5b3646a9SMatthew Dillon cmpq %r8,%r9 106*5b3646a9SMatthew Dillon jne 80f 107*5b3646a9SMatthew Dillon movq 8(%rdi),%r8 108*5b3646a9SMatthew Dillon movq 8(%rsi),%r9 109*5b3646a9SMatthew Dillon cmpq %r8,%r9 110*5b3646a9SMatthew Dillon jne 10163208f 111*5b3646a9SMatthew Dillon movq -16(%rdi,%rdx),%r8 112*5b3646a9SMatthew Dillon movq -16(%rsi,%rdx),%r9 113*5b3646a9SMatthew Dillon cmpq %r8,%r9 114*5b3646a9SMatthew Dillon jne 10163216f 115*5b3646a9SMatthew Dillon movq -8(%rdi,%rdx),%r8 116*5b3646a9SMatthew Dillon movq -8(%rsi,%rdx),%r9 117*5b3646a9SMatthew Dillon cmpq %r8,%r9 118*5b3646a9SMatthew Dillon jne 10163224f 119*5b3646a9SMatthew Dillon \end 120*5b3646a9SMatthew Dillon ALIGN_TEXT 121*5b3646a9SMatthew Dillon 103200: 122*5b3646a9SMatthew Dillon movq (%rdi),%r8 123*5b3646a9SMatthew Dillon movq 8(%rdi),%r9 124*5b3646a9SMatthew Dillon subq (%rsi),%r8 125*5b3646a9SMatthew Dillon subq 8(%rsi),%r9 126*5b3646a9SMatthew Dillon orq %r8,%r9 127*5b3646a9SMatthew Dillon jnz 10320000f 128*5b3646a9SMatthew Dillon 129*5b3646a9SMatthew Dillon movq 16(%rdi),%r8 130*5b3646a9SMatthew Dillon movq 24(%rdi),%r9 131*5b3646a9SMatthew Dillon subq 16(%rsi),%r8 132*5b3646a9SMatthew Dillon subq 24(%rsi),%r9 133*5b3646a9SMatthew Dillon orq %r8,%r9 134*5b3646a9SMatthew Dillon jnz 10320016f 135*5b3646a9SMatthew Dillon 136*5b3646a9SMatthew Dillon leaq 32(%rdi),%rdi 137*5b3646a9SMatthew Dillon leaq 32(%rsi),%rsi 138*5b3646a9SMatthew Dillon subq $32,%rdx 139*5b3646a9SMatthew Dillon cmpq $32,%rdx 140*5b3646a9SMatthew Dillon jae 103200b 141*5b3646a9SMatthew Dillon cmpb $0,%dl 142*5b3646a9SMatthew Dillon jne 10b 143*5b3646a9SMatthew Dillon \end 144*5b3646a9SMatthew Dillon 145*5b3646a9SMatthew Dillon /* 146*5b3646a9SMatthew Dillon * Mismatch was found. 147*5b3646a9SMatthew Dillon * 148*5b3646a9SMatthew Dillon * Before we compute it we narrow down the range (16 -> 8 -> 4 bytes). 149*5b3646a9SMatthew Dillon */ 150*5b3646a9SMatthew Dillon ALIGN_TEXT 151*5b3646a9SMatthew Dillon 10320016: 152*5b3646a9SMatthew Dillon leaq 16(%rdi),%rdi 153*5b3646a9SMatthew Dillon leaq 16(%rsi),%rsi 154*5b3646a9SMatthew Dillon 10320000: 155*5b3646a9SMatthew Dillon movq (%rdi),%r8 156*5b3646a9SMatthew Dillon movq (%rsi),%r9 157*5b3646a9SMatthew Dillon cmpq %r8,%r9 158*5b3646a9SMatthew Dillon jne 80f 159*5b3646a9SMatthew Dillon leaq 8(%rdi),%rdi 160*5b3646a9SMatthew Dillon leaq 8(%rsi),%rsi 161*5b3646a9SMatthew Dillon jmp 80f 162*5b3646a9SMatthew Dillon ALIGN_TEXT 163*5b3646a9SMatthew Dillon 10081608: 164*5b3646a9SMatthew Dillon 10163224: 165*5b3646a9SMatthew Dillon leaq -8(%rdi,%rdx),%rdi 166*5b3646a9SMatthew Dillon leaq -8(%rsi,%rdx),%rsi 167*5b3646a9SMatthew Dillon jmp 80f 168*5b3646a9SMatthew Dillon ALIGN_TEXT 169*5b3646a9SMatthew Dillon 10163216: 170*5b3646a9SMatthew Dillon leaq -16(%rdi,%rdx),%rdi 171*5b3646a9SMatthew Dillon leaq -16(%rsi,%rdx),%rsi 172*5b3646a9SMatthew Dillon jmp 80f 173*5b3646a9SMatthew Dillon ALIGN_TEXT 174*5b3646a9SMatthew Dillon 10163208: 175*5b3646a9SMatthew Dillon leaq 8(%rdi),%rdi 176*5b3646a9SMatthew Dillon leaq 8(%rsi),%rsi 177*5b3646a9SMatthew Dillon jmp 80f 178*5b3646a9SMatthew Dillon ALIGN_TEXT 179*5b3646a9SMatthew Dillon 10040804: 180*5b3646a9SMatthew Dillon leaq -4(%rdi,%rdx),%rdi 181*5b3646a9SMatthew Dillon leaq -4(%rsi,%rdx),%rsi 182*5b3646a9SMatthew Dillon jmp 1f 183*5b3646a9SMatthew Dillon 184*5b3646a9SMatthew Dillon ALIGN_TEXT 185*5b3646a9SMatthew Dillon 80: 186*5b3646a9SMatthew Dillon movl (%rdi),%r8d 187*5b3646a9SMatthew Dillon movl (%rsi),%r9d 188*5b3646a9SMatthew Dillon cmpl %r8d,%r9d 189*5b3646a9SMatthew Dillon jne 1f 190*5b3646a9SMatthew Dillon leaq 4(%rdi),%rdi 191*5b3646a9SMatthew Dillon leaq 4(%rsi),%rsi 192*5b3646a9SMatthew Dillon 193*5b3646a9SMatthew Dillon /* 194*5b3646a9SMatthew Dillon * We have up to 4 bytes to inspect. 195*5b3646a9SMatthew Dillon */ 196*5b3646a9SMatthew Dillon 1: 197*5b3646a9SMatthew Dillon movzbl (%rdi),%eax 198*5b3646a9SMatthew Dillon movzbl (%rsi),%r8d 199*5b3646a9SMatthew Dillon cmpb %r8b,%al 200*5b3646a9SMatthew Dillon jne 2f 201*5b3646a9SMatthew Dillon 202*5b3646a9SMatthew Dillon movzbl 1(%rdi),%eax 203*5b3646a9SMatthew Dillon movzbl 1(%rsi),%r8d 204*5b3646a9SMatthew Dillon cmpb %r8b,%al 205*5b3646a9SMatthew Dillon jne 2f 206*5b3646a9SMatthew Dillon 207*5b3646a9SMatthew Dillon movzbl 2(%rdi),%eax 208*5b3646a9SMatthew Dillon movzbl 2(%rsi),%r8d 209*5b3646a9SMatthew Dillon cmpb %r8b,%al 210*5b3646a9SMatthew Dillon jne 2f 211*5b3646a9SMatthew Dillon 212*5b3646a9SMatthew Dillon movzbl 3(%rdi),%eax 213*5b3646a9SMatthew Dillon movzbl 3(%rsi),%r8d 214*5b3646a9SMatthew Dillon 2: 215*5b3646a9SMatthew Dillon subl %r8d,%eax 216*5b3646a9SMatthew Dillon \end 217*5b3646a9SMatthew Dillon .endm 218*5b3646a9SMatthew Dillon 219*5b3646a9SMatthew Dillon /* 220*5b3646a9SMatthew Dillon * memmove(dst, src, cnt) 221*5b3646a9SMatthew Dillon * rdi, rsi, rdx 222*5b3646a9SMatthew Dillon */ 223*5b3646a9SMatthew Dillon 224*5b3646a9SMatthew Dillon /* 225*5b3646a9SMatthew Dillon * Register state at entry is supposed to be as follows: 226*5b3646a9SMatthew Dillon * rdi - destination 227*5b3646a9SMatthew Dillon * rsi - source 228*5b3646a9SMatthew Dillon * rcx - count 229*5b3646a9SMatthew Dillon * 230*5b3646a9SMatthew Dillon * The macro possibly clobbers the above and: rcx, r8, r9, r10 231*5b3646a9SMatthew Dillon * It does not clobber rax nor r11. 232*5b3646a9SMatthew Dillon */ 233*5b3646a9SMatthew Dillon .macro MEMMOVE erms overlap end 234*5b3646a9SMatthew Dillon /* 235*5b3646a9SMatthew Dillon * For sizes 0..32 all data is read before it is written, so there 236*5b3646a9SMatthew Dillon * is no correctness issue with direction of copying. 237*5b3646a9SMatthew Dillon */ 238*5b3646a9SMatthew Dillon movq %rdx,%rcx 239*5b3646a9SMatthew Dillon cmpq $32,%rdx 240*5b3646a9SMatthew Dillon jbe 101632f 241*5b3646a9SMatthew Dillon 242*5b3646a9SMatthew Dillon .if \overlap == 1 243*5b3646a9SMatthew Dillon movq %rdi,%r8 244*5b3646a9SMatthew Dillon subq %rsi,%r8 245*5b3646a9SMatthew Dillon cmpq %rcx,%r8 /* overlapping && src < dst? */ 246*5b3646a9SMatthew Dillon jb 2f 247*5b3646a9SMatthew Dillon .endif 248*5b3646a9SMatthew Dillon 249*5b3646a9SMatthew Dillon /* 250*5b3646a9SMatthew Dillon * AMD's movsq gets better at around 1024 bytes, Intel's gets 251*5b3646a9SMatthew Dillon * better at around 256 bytes (Zen 2, 9900K era) 252*5b3646a9SMatthew Dillon */ 253*5b3646a9SMatthew Dillon cmpq $1024,%rcx 254*5b3646a9SMatthew Dillon ja 1256f 255*5b3646a9SMatthew Dillon 256*5b3646a9SMatthew Dillon 103200: 257*5b3646a9SMatthew Dillon movq (%rsi),%rdx 258*5b3646a9SMatthew Dillon movq %rdx,(%rdi) 259*5b3646a9SMatthew Dillon movq 8(%rsi),%rdx 260*5b3646a9SMatthew Dillon movq %rdx,8(%rdi) 261*5b3646a9SMatthew Dillon movq 16(%rsi),%rdx 262*5b3646a9SMatthew Dillon movq %rdx,16(%rdi) 263*5b3646a9SMatthew Dillon movq 24(%rsi),%rdx 264*5b3646a9SMatthew Dillon movq %rdx,24(%rdi) 265*5b3646a9SMatthew Dillon leaq 32(%rsi),%rsi 266*5b3646a9SMatthew Dillon leaq 32(%rdi),%rdi 267*5b3646a9SMatthew Dillon subq $32,%rcx 268*5b3646a9SMatthew Dillon cmpq $32,%rcx 269*5b3646a9SMatthew Dillon jae 103200b 270*5b3646a9SMatthew Dillon cmpb $0,%cl 271*5b3646a9SMatthew Dillon jne 101632f 272*5b3646a9SMatthew Dillon \end 273*5b3646a9SMatthew Dillon ALIGN_TEXT 274*5b3646a9SMatthew Dillon 101632: 275*5b3646a9SMatthew Dillon cmpb $16,%cl 276*5b3646a9SMatthew Dillon jl 100816f 277*5b3646a9SMatthew Dillon movq (%rsi),%rdx 278*5b3646a9SMatthew Dillon movq 8(%rsi),%r8 279*5b3646a9SMatthew Dillon movq -16(%rsi,%rcx),%r9 280*5b3646a9SMatthew Dillon movq -8(%rsi,%rcx),%r10 281*5b3646a9SMatthew Dillon movq %rdx,(%rdi) 282*5b3646a9SMatthew Dillon movq %r8,8(%rdi) 283*5b3646a9SMatthew Dillon movq %r9,-16(%rdi,%rcx) 284*5b3646a9SMatthew Dillon movq %r10,-8(%rdi,%rcx) 285*5b3646a9SMatthew Dillon \end 286*5b3646a9SMatthew Dillon ALIGN_TEXT 287*5b3646a9SMatthew Dillon 100816: 288*5b3646a9SMatthew Dillon cmpb $8,%cl 289*5b3646a9SMatthew Dillon jl 100408f 290*5b3646a9SMatthew Dillon movq (%rsi),%rdx 291*5b3646a9SMatthew Dillon movq -8(%rsi,%rcx),%r8 292*5b3646a9SMatthew Dillon movq %rdx,(%rdi) 293*5b3646a9SMatthew Dillon movq %r8,-8(%rdi,%rcx,) 294*5b3646a9SMatthew Dillon \end 295*5b3646a9SMatthew Dillon ALIGN_TEXT 296*5b3646a9SMatthew Dillon 100408: 297*5b3646a9SMatthew Dillon cmpb $4,%cl 298*5b3646a9SMatthew Dillon jl 100204f 299*5b3646a9SMatthew Dillon movl (%rsi),%edx 300*5b3646a9SMatthew Dillon movl -4(%rsi,%rcx),%r8d 301*5b3646a9SMatthew Dillon movl %edx,(%rdi) 302*5b3646a9SMatthew Dillon movl %r8d,-4(%rdi,%rcx) 303*5b3646a9SMatthew Dillon \end 304*5b3646a9SMatthew Dillon ALIGN_TEXT 305*5b3646a9SMatthew Dillon 100204: 306*5b3646a9SMatthew Dillon cmpb $2,%cl 307*5b3646a9SMatthew Dillon jl 100001f 308*5b3646a9SMatthew Dillon movzwl (%rsi),%edx 309*5b3646a9SMatthew Dillon movzwl -2(%rsi,%rcx),%r8d 310*5b3646a9SMatthew Dillon movw %dx,(%rdi) 311*5b3646a9SMatthew Dillon movw %r8w,-2(%rdi,%rcx) 312*5b3646a9SMatthew Dillon \end 313*5b3646a9SMatthew Dillon ALIGN_TEXT 314*5b3646a9SMatthew Dillon 100001: 315*5b3646a9SMatthew Dillon cmpb $1,%cl 316*5b3646a9SMatthew Dillon jl 100000f 317*5b3646a9SMatthew Dillon movb (%rsi),%dl 318*5b3646a9SMatthew Dillon movb %dl,(%rdi) 319*5b3646a9SMatthew Dillon 100000: 320*5b3646a9SMatthew Dillon \end 321*5b3646a9SMatthew Dillon 322*5b3646a9SMatthew Dillon /* 323*5b3646a9SMatthew Dillon * 256 or more bytes 324*5b3646a9SMatthew Dillon */ 325*5b3646a9SMatthew Dillon ALIGN_TEXT 326*5b3646a9SMatthew Dillon 1256: 327*5b3646a9SMatthew Dillon testb $15,%dil 328*5b3646a9SMatthew Dillon jnz 100f 329*5b3646a9SMatthew Dillon .if \erms == 1 330*5b3646a9SMatthew Dillon rep 331*5b3646a9SMatthew Dillon movsb 332*5b3646a9SMatthew Dillon .else 333*5b3646a9SMatthew Dillon shrq $3,%rcx /* copy by 64-bit words */ 334*5b3646a9SMatthew Dillon rep 335*5b3646a9SMatthew Dillon movsq 336*5b3646a9SMatthew Dillon movq %rdx,%rcx 337*5b3646a9SMatthew Dillon andl $7,%ecx /* any bytes left? */ 338*5b3646a9SMatthew Dillon jne 100408b 339*5b3646a9SMatthew Dillon .endif 340*5b3646a9SMatthew Dillon \end 341*5b3646a9SMatthew Dillon 100: 342*5b3646a9SMatthew Dillon movq (%rsi),%r8 343*5b3646a9SMatthew Dillon movq 8(%rsi),%r9 344*5b3646a9SMatthew Dillon movq %rdi,%r10 345*5b3646a9SMatthew Dillon movq %rdi,%rcx 346*5b3646a9SMatthew Dillon andq $15,%rcx 347*5b3646a9SMatthew Dillon leaq -16(%rdx,%rcx),%rdx 348*5b3646a9SMatthew Dillon neg %rcx 349*5b3646a9SMatthew Dillon leaq 16(%rdi,%rcx),%rdi 350*5b3646a9SMatthew Dillon leaq 16(%rsi,%rcx),%rsi 351*5b3646a9SMatthew Dillon movq %rdx,%rcx 352*5b3646a9SMatthew Dillon .if \erms == 1 353*5b3646a9SMatthew Dillon rep 354*5b3646a9SMatthew Dillon movsb 355*5b3646a9SMatthew Dillon movq %r8,(%r10) 356*5b3646a9SMatthew Dillon movq %r9,8(%r10) 357*5b3646a9SMatthew Dillon .else 358*5b3646a9SMatthew Dillon shrq $3,%rcx /* copy by 64-bit words */ 359*5b3646a9SMatthew Dillon rep 360*5b3646a9SMatthew Dillon movsq 361*5b3646a9SMatthew Dillon movq %r8,(%r10) 362*5b3646a9SMatthew Dillon movq %r9,8(%r10) 363*5b3646a9SMatthew Dillon movq %rdx,%rcx 364*5b3646a9SMatthew Dillon andl $7,%ecx /* any bytes left? */ 365*5b3646a9SMatthew Dillon jne 100408b 366*5b3646a9SMatthew Dillon .endif 367*5b3646a9SMatthew Dillon \end 368*5b3646a9SMatthew Dillon 369*5b3646a9SMatthew Dillon .if \overlap == 1 370*5b3646a9SMatthew Dillon /* 371*5b3646a9SMatthew Dillon * Copy backwards. 372*5b3646a9SMatthew Dillon */ 373*5b3646a9SMatthew Dillon ALIGN_TEXT 374*5b3646a9SMatthew Dillon 2: 375*5b3646a9SMatthew Dillon cmpq $256,%rcx 376*5b3646a9SMatthew Dillon ja 2256f 377*5b3646a9SMatthew Dillon 378*5b3646a9SMatthew Dillon leaq -8(%rdi,%rcx),%rdi 379*5b3646a9SMatthew Dillon leaq -8(%rsi,%rcx),%rsi 380*5b3646a9SMatthew Dillon 381*5b3646a9SMatthew Dillon cmpq $32,%rcx 382*5b3646a9SMatthew Dillon jb 2016f 383*5b3646a9SMatthew Dillon 384*5b3646a9SMatthew Dillon 2032: 385*5b3646a9SMatthew Dillon movq (%rsi),%rdx 386*5b3646a9SMatthew Dillon movq %rdx,(%rdi) 387*5b3646a9SMatthew Dillon movq -8(%rsi),%rdx 388*5b3646a9SMatthew Dillon movq %rdx,-8(%rdi) 389*5b3646a9SMatthew Dillon movq -16(%rsi),%rdx 390*5b3646a9SMatthew Dillon movq %rdx,-16(%rdi) 391*5b3646a9SMatthew Dillon movq -24(%rsi),%rdx 392*5b3646a9SMatthew Dillon movq %rdx,-24(%rdi) 393*5b3646a9SMatthew Dillon leaq -32(%rsi),%rsi 394*5b3646a9SMatthew Dillon leaq -32(%rdi),%rdi 395*5b3646a9SMatthew Dillon subq $32,%rcx 396*5b3646a9SMatthew Dillon cmpq $32,%rcx 397*5b3646a9SMatthew Dillon jae 2032b 398*5b3646a9SMatthew Dillon cmpb $0,%cl 399*5b3646a9SMatthew Dillon jne 2016f 400*5b3646a9SMatthew Dillon \end 401*5b3646a9SMatthew Dillon ALIGN_TEXT 402*5b3646a9SMatthew Dillon 2016: 403*5b3646a9SMatthew Dillon cmpb $16,%cl 404*5b3646a9SMatthew Dillon jl 2008f 405*5b3646a9SMatthew Dillon movq (%rsi),%rdx 406*5b3646a9SMatthew Dillon movq %rdx,(%rdi) 407*5b3646a9SMatthew Dillon movq -8(%rsi),%rdx 408*5b3646a9SMatthew Dillon movq %rdx,-8(%rdi) 409*5b3646a9SMatthew Dillon subb $16,%cl 410*5b3646a9SMatthew Dillon jz 2000f 411*5b3646a9SMatthew Dillon leaq -16(%rsi),%rsi 412*5b3646a9SMatthew Dillon leaq -16(%rdi),%rdi 413*5b3646a9SMatthew Dillon 2008: 414*5b3646a9SMatthew Dillon cmpb $8,%cl 415*5b3646a9SMatthew Dillon jl 2004f 416*5b3646a9SMatthew Dillon movq (%rsi),%rdx 417*5b3646a9SMatthew Dillon movq %rdx,(%rdi) 418*5b3646a9SMatthew Dillon subb $8,%cl 419*5b3646a9SMatthew Dillon jz 2000f 420*5b3646a9SMatthew Dillon leaq -8(%rsi),%rsi 421*5b3646a9SMatthew Dillon leaq -8(%rdi),%rdi 422*5b3646a9SMatthew Dillon 2004: 423*5b3646a9SMatthew Dillon cmpb $4,%cl 424*5b3646a9SMatthew Dillon jl 2002f 425*5b3646a9SMatthew Dillon movl 4(%rsi),%edx 426*5b3646a9SMatthew Dillon movl %edx,4(%rdi) 427*5b3646a9SMatthew Dillon subb $4,%cl 428*5b3646a9SMatthew Dillon jz 2000f 429*5b3646a9SMatthew Dillon leaq -4(%rsi),%rsi 430*5b3646a9SMatthew Dillon leaq -4(%rdi),%rdi 431*5b3646a9SMatthew Dillon 2002: 432*5b3646a9SMatthew Dillon cmpb $2,%cl 433*5b3646a9SMatthew Dillon jl 2001f 434*5b3646a9SMatthew Dillon movw 6(%rsi),%dx 435*5b3646a9SMatthew Dillon movw %dx,6(%rdi) 436*5b3646a9SMatthew Dillon subb $2,%cl 437*5b3646a9SMatthew Dillon jz 2000f 438*5b3646a9SMatthew Dillon leaq -2(%rsi),%rsi 439*5b3646a9SMatthew Dillon leaq -2(%rdi),%rdi 440*5b3646a9SMatthew Dillon 2001: 441*5b3646a9SMatthew Dillon cmpb $1,%cl 442*5b3646a9SMatthew Dillon jl 2000f 443*5b3646a9SMatthew Dillon movb 7(%rsi),%dl 444*5b3646a9SMatthew Dillon movb %dl,7(%rdi) 445*5b3646a9SMatthew Dillon 2000: 446*5b3646a9SMatthew Dillon \end 447*5b3646a9SMatthew Dillon ALIGN_TEXT 448*5b3646a9SMatthew Dillon 2256: 449*5b3646a9SMatthew Dillon std 450*5b3646a9SMatthew Dillon .if \erms == 1 451*5b3646a9SMatthew Dillon leaq -1(%rdi,%rcx),%rdi 452*5b3646a9SMatthew Dillon leaq -1(%rsi,%rcx),%rsi 453*5b3646a9SMatthew Dillon rep 454*5b3646a9SMatthew Dillon movsb 455*5b3646a9SMatthew Dillon cld 456*5b3646a9SMatthew Dillon .else 457*5b3646a9SMatthew Dillon leaq -8(%rdi,%rcx),%rdi 458*5b3646a9SMatthew Dillon leaq -8(%rsi,%rcx),%rsi 459*5b3646a9SMatthew Dillon shrq $3,%rcx 460*5b3646a9SMatthew Dillon rep 461*5b3646a9SMatthew Dillon movsq 462*5b3646a9SMatthew Dillon cld 463*5b3646a9SMatthew Dillon movq %rdx,%rcx 464*5b3646a9SMatthew Dillon andb $7,%cl 465*5b3646a9SMatthew Dillon jne 2004b 466*5b3646a9SMatthew Dillon .endif 467*5b3646a9SMatthew Dillon \end 468*5b3646a9SMatthew Dillon .endif 469*5b3646a9SMatthew Dillon .endm 470*5b3646a9SMatthew Dillon 471*5b3646a9SMatthew Dillon /* 472*5b3646a9SMatthew Dillon * memset(dst, c, len) 473*5b3646a9SMatthew Dillon * rdi, r10, rdx 474*5b3646a9SMatthew Dillon */ 475*5b3646a9SMatthew Dillon .macro MEMSET erms end 476*5b3646a9SMatthew Dillon movq %rdi,%rax 477*5b3646a9SMatthew Dillon movq %rdx,%rcx 478*5b3646a9SMatthew Dillon 479*5b3646a9SMatthew Dillon cmpq $32,%rcx 480*5b3646a9SMatthew Dillon jbe 101632f 481*5b3646a9SMatthew Dillon 482*5b3646a9SMatthew Dillon cmpq $256,%rcx 483*5b3646a9SMatthew Dillon ja 1256f 484*5b3646a9SMatthew Dillon 485*5b3646a9SMatthew Dillon 103200: 486*5b3646a9SMatthew Dillon movq %r10,(%rdi) 487*5b3646a9SMatthew Dillon movq %r10,8(%rdi) 488*5b3646a9SMatthew Dillon movq %r10,16(%rdi) 489*5b3646a9SMatthew Dillon movq %r10,24(%rdi) 490*5b3646a9SMatthew Dillon leaq 32(%rdi),%rdi 491*5b3646a9SMatthew Dillon subq $32,%rcx 492*5b3646a9SMatthew Dillon cmpq $32,%rcx 493*5b3646a9SMatthew Dillon ja 103200b 494*5b3646a9SMatthew Dillon cmpb $16,%cl 495*5b3646a9SMatthew Dillon ja 201632f 496*5b3646a9SMatthew Dillon movq %r10,-16(%rdi,%rcx) 497*5b3646a9SMatthew Dillon movq %r10,-8(%rdi,%rcx) 498*5b3646a9SMatthew Dillon \end 499*5b3646a9SMatthew Dillon ALIGN_TEXT 500*5b3646a9SMatthew Dillon 101632: 501*5b3646a9SMatthew Dillon cmpb $16,%cl 502*5b3646a9SMatthew Dillon jl 100816f 503*5b3646a9SMatthew Dillon 201632: 504*5b3646a9SMatthew Dillon movq %r10,(%rdi) 505*5b3646a9SMatthew Dillon movq %r10,8(%rdi) 506*5b3646a9SMatthew Dillon movq %r10,-16(%rdi,%rcx) 507*5b3646a9SMatthew Dillon movq %r10,-8(%rdi,%rcx) 508*5b3646a9SMatthew Dillon \end 509*5b3646a9SMatthew Dillon ALIGN_TEXT 510*5b3646a9SMatthew Dillon 100816: 511*5b3646a9SMatthew Dillon cmpb $8,%cl 512*5b3646a9SMatthew Dillon jl 100408f 513*5b3646a9SMatthew Dillon movq %r10,(%rdi) 514*5b3646a9SMatthew Dillon movq %r10,-8(%rdi,%rcx) 515*5b3646a9SMatthew Dillon \end 516*5b3646a9SMatthew Dillon ALIGN_TEXT 517*5b3646a9SMatthew Dillon 100408: 518*5b3646a9SMatthew Dillon cmpb $4,%cl 519*5b3646a9SMatthew Dillon jl 100204f 520*5b3646a9SMatthew Dillon movl %r10d,(%rdi) 521*5b3646a9SMatthew Dillon movl %r10d,-4(%rdi,%rcx) 522*5b3646a9SMatthew Dillon \end 523*5b3646a9SMatthew Dillon ALIGN_TEXT 524*5b3646a9SMatthew Dillon 100204: 525*5b3646a9SMatthew Dillon cmpb $2,%cl 526*5b3646a9SMatthew Dillon jl 100001f 527*5b3646a9SMatthew Dillon movw %r10w,(%rdi) 528*5b3646a9SMatthew Dillon movw %r10w,-2(%rdi,%rcx) 529*5b3646a9SMatthew Dillon \end 530*5b3646a9SMatthew Dillon ALIGN_TEXT 531*5b3646a9SMatthew Dillon 100001: 532*5b3646a9SMatthew Dillon cmpb $0,%cl 533*5b3646a9SMatthew Dillon je 100000f 534*5b3646a9SMatthew Dillon movb %r10b,(%rdi) 535*5b3646a9SMatthew Dillon 100000: 536*5b3646a9SMatthew Dillon \end 537*5b3646a9SMatthew Dillon ALIGN_TEXT 538*5b3646a9SMatthew Dillon 1256: 539*5b3646a9SMatthew Dillon movq %rdi,%r9 540*5b3646a9SMatthew Dillon movq %r10,%rax 541*5b3646a9SMatthew Dillon testl $15,%edi 542*5b3646a9SMatthew Dillon jnz 3f 543*5b3646a9SMatthew Dillon 1: 544*5b3646a9SMatthew Dillon .if \erms == 1 545*5b3646a9SMatthew Dillon rep 546*5b3646a9SMatthew Dillon stosb 547*5b3646a9SMatthew Dillon movq %r9,%rax 548*5b3646a9SMatthew Dillon .else 549*5b3646a9SMatthew Dillon movq %rcx,%rdx 550*5b3646a9SMatthew Dillon shrq $3,%rcx 551*5b3646a9SMatthew Dillon rep 552*5b3646a9SMatthew Dillon stosq 553*5b3646a9SMatthew Dillon movq %r9,%rax 554*5b3646a9SMatthew Dillon andl $7,%edx 555*5b3646a9SMatthew Dillon jnz 2f 556*5b3646a9SMatthew Dillon \end 557*5b3646a9SMatthew Dillon 2: 558*5b3646a9SMatthew Dillon movq %r10,-8(%rdi,%rdx) 559*5b3646a9SMatthew Dillon .endif 560*5b3646a9SMatthew Dillon \end 561*5b3646a9SMatthew Dillon ALIGN_TEXT 562*5b3646a9SMatthew Dillon 3: 563*5b3646a9SMatthew Dillon movq %r10,(%rdi) 564*5b3646a9SMatthew Dillon movq %r10,8(%rdi) 565*5b3646a9SMatthew Dillon movq %rdi,%r8 566*5b3646a9SMatthew Dillon andq $15,%r8 567*5b3646a9SMatthew Dillon leaq -16(%rcx,%r8),%rcx 568*5b3646a9SMatthew Dillon neg %r8 569*5b3646a9SMatthew Dillon leaq 16(%rdi,%r8),%rdi 570*5b3646a9SMatthew Dillon jmp 1b 571*5b3646a9SMatthew Dillon .endm 572*5b3646a9SMatthew Dillon 573*5b3646a9SMatthew Dillon .macro DUMMYARG 574*5b3646a9SMatthew Dillon .endm 575