10Sstevel@tonic-gate/* 2*10583SEdward.Gillett@Sun.COM * CDDL HEADER START 3*10583SEdward.Gillett@Sun.COM * 4*10583SEdward.Gillett@Sun.COM * The contents of this file are subject to the terms of the 5*10583SEdward.Gillett@Sun.COM * Common Development and Distribution License (the "License"). 6*10583SEdward.Gillett@Sun.COM * You may not use this file except in compliance with the License. 7*10583SEdward.Gillett@Sun.COM * 8*10583SEdward.Gillett@Sun.COM * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9*10583SEdward.Gillett@Sun.COM * or http://www.opensolaris.org/os/licensing. 10*10583SEdward.Gillett@Sun.COM * See the License for the specific language governing permissions 11*10583SEdward.Gillett@Sun.COM * and limitations under the License. 12*10583SEdward.Gillett@Sun.COM * 13*10583SEdward.Gillett@Sun.COM * When distributing Covered Code, include this CDDL HEADER in each 14*10583SEdward.Gillett@Sun.COM * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15*10583SEdward.Gillett@Sun.COM * If applicable, add the following below this CDDL HEADER, with the 16*10583SEdward.Gillett@Sun.COM * fields enclosed by brackets "[]" replaced with your own identifying 17*10583SEdward.Gillett@Sun.COM * information: Portions Copyright [yyyy] [name of copyright owner] 18*10583SEdward.Gillett@Sun.COM * 19*10583SEdward.Gillett@Sun.COM * CDDL HEADER END 200Sstevel@tonic-gate */ 210Sstevel@tonic-gate 220Sstevel@tonic-gate/* 23*10583SEdward.Gillett@Sun.COM * Copyright (c) 2009, Intel Corporation 240Sstevel@tonic-gate * All rights reserved. 250Sstevel@tonic-gate */ 260Sstevel@tonic-gate 27*10583SEdward.Gillett@Sun.COM/* 28*10583SEdward.Gillett@Sun.COM * str[n]cpy - copy [n] chars from second operand into first operand 29*10583SEdward.Gillett@Sun.COM */ 300Sstevel@tonic-gate#include "SYS.h" 31*10583SEdward.Gillett@Sun.COM#include "proc64_id.h" 320Sstevel@tonic-gate 330Sstevel@tonic-gate#define LABEL(s) .strcpy/**/s 340Sstevel@tonic-gate 350Sstevel@tonic-gate#ifdef USE_AS_STRNCPY 360Sstevel@tonic-gate ENTRY(strncpy) 37*10583SEdward.Gillett@Sun.COM test %edx, %edx 38*10583SEdward.Gillett@Sun.COM jz LABEL(strncpy_exitz) 39*10583SEdward.Gillett@Sun.COM mov %rdx, %r8 400Sstevel@tonic-gate#else 41*10583SEdward.Gillett@Sun.COM ENTRY(strcpy) /* (char *, const char *) */ 42*10583SEdward.Gillett@Sun.COM xor %rdx, %rdx 43*10583SEdward.Gillett@Sun.COM#endif 44*10583SEdward.Gillett@Sun.COM mov %esi, %ecx 45*10583SEdward.Gillett@Sun.COM and $0xfffffffffffffff0, %rsi /* force rsi 16 byte align */ 46*10583SEdward.Gillett@Sun.COM and $0xf, %rcx 47*10583SEdward.Gillett@Sun.COM mov %rdi, %rax /* save destination address for return value */ 48*10583SEdward.Gillett@Sun.COM 49*10583SEdward.Gillett@Sun.COM 50*10583SEdward.Gillett@Sun.COM pxor %xmm0, %xmm0 /* clear %xmm0 for null char checks */ 51*10583SEdward.Gillett@Sun.COM pcmpeqb (%rsi), %xmm0 /* check 16 bytes in src for null */ 52*10583SEdward.Gillett@Sun.COM pmovmskb %xmm0, %edx 53*10583SEdward.Gillett@Sun.COM shr %cl, %edx /* adjust for offset from 16byte boundary */ 54*10583SEdward.Gillett@Sun.COM test %edx, %edx /* edx will be 0 if chars are non-null */ 55*10583SEdward.Gillett@Sun.COM jnz LABEL(less16bytes) /* null char found in first 16 bytes examined */ 56*10583SEdward.Gillett@Sun.COM#ifdef USE_AS_STRNCPY 57*10583SEdward.Gillett@Sun.COM /* 58*10583SEdward.Gillett@Sun.COM * Check if the count is satisfied in first 16 bytes examined. 59*10583SEdward.Gillett@Sun.COM */ 60*10583SEdward.Gillett@Sun.COM lea -16(%r8, %rcx), %r11 61*10583SEdward.Gillett@Sun.COM cmp $0, %r11 62*10583SEdward.Gillett@Sun.COM jle LABEL(less16bytes) 63*10583SEdward.Gillett@Sun.COM#endif 64*10583SEdward.Gillett@Sun.COM mov %rcx, %r9 /* rsi alignment offset */ 65*10583SEdward.Gillett@Sun.COM or %edi, %ecx 66*10583SEdward.Gillett@Sun.COM and $0xf, %ecx 67*10583SEdward.Gillett@Sun.COM lea -16(%r9), %r10 68*10583SEdward.Gillett@Sun.COM jz LABEL(ashr_0) /* src and dest are both 16 byte aligned */ 69*10583SEdward.Gillett@Sun.COM 70*10583SEdward.Gillett@Sun.COM neg %r10 /* max src bytes remaining in current dqword */ 71*10583SEdward.Gillett@Sun.COM 72*10583SEdward.Gillett@Sun.COM pxor %xmm0, %xmm0 /* clear %xmm0, may be polluted by unaligned operation */ 73*10583SEdward.Gillett@Sun.COM pcmpeqb 16(%rsi), %xmm0 /* check next 16 bytes in src for a null */ 74*10583SEdward.Gillett@Sun.COM pmovmskb %xmm0, %edx 75*10583SEdward.Gillett@Sun.COM test %edx, %edx 76*10583SEdward.Gillett@Sun.COM jnz LABEL(less32bytes) /* null char found in first 32 bytes examined */ 77*10583SEdward.Gillett@Sun.COM 78*10583SEdward.Gillett@Sun.COM#ifdef USE_AS_STRNCPY 79*10583SEdward.Gillett@Sun.COM /* 80*10583SEdward.Gillett@Sun.COM * If strncpy count <= 16 go to exit case 81*10583SEdward.Gillett@Sun.COM */ 82*10583SEdward.Gillett@Sun.COM sub $16, %r8 83*10583SEdward.Gillett@Sun.COM jbe LABEL(less32bytes_strncpy_truncation) 84*10583SEdward.Gillett@Sun.COM#endif 85*10583SEdward.Gillett@Sun.COM /* 86*10583SEdward.Gillett@Sun.COM * At least 16 bytes to copy to destination string. Move them now. 87*10583SEdward.Gillett@Sun.COM * Don't worry about alignment. 88*10583SEdward.Gillett@Sun.COM */ 89*10583SEdward.Gillett@Sun.COM mov (%rsi, %r9), %rdx 90*10583SEdward.Gillett@Sun.COM mov %rdx, (%rdi) 91*10583SEdward.Gillett@Sun.COM mov 8(%rsi, %r9), %rdx 92*10583SEdward.Gillett@Sun.COM mov %rdx, 8(%rdi) 93*10583SEdward.Gillett@Sun.COM 94*10583SEdward.Gillett@Sun.COM /* 95*10583SEdward.Gillett@Sun.COM * so far destination rdi may be aligned by 16, re-calculate rsi and 96*10583SEdward.Gillett@Sun.COM * jump to corresponding src/dest relative offset case. 97*10583SEdward.Gillett@Sun.COM * rcx is offset of rsi 98*10583SEdward.Gillett@Sun.COM * rdx is offset of rdi 99*10583SEdward.Gillett@Sun.COM */ 100*10583SEdward.Gillett@Sun.COM and $0xfffffffffffffff0, %rdi /* force rdi 16 byte align */ 101*10583SEdward.Gillett@Sun.COM mov %rax, %rdx /* rax contains orignal rdi */ 102*10583SEdward.Gillett@Sun.COM xor %rdi, %rdx /* same effect as "and $0xf, %rdx" */ 103*10583SEdward.Gillett@Sun.COM#ifdef USE_AS_STRNCPY 104*10583SEdward.Gillett@Sun.COM /* 105*10583SEdward.Gillett@Sun.COM * Will now do 16 byte aligned stores. Stores may overlap some bytes 106*10583SEdward.Gillett@Sun.COM * (ie store twice) if destination was unaligned. Compensate here. 107*10583SEdward.Gillett@Sun.COM */ 108*10583SEdward.Gillett@Sun.COM add %rdx, %r8 /* compensate for overlap */ 109*10583SEdward.Gillett@Sun.COM#endif 110*10583SEdward.Gillett@Sun.COM 111*10583SEdward.Gillett@Sun.COM add $16, %rdi /* next 16 bytes for dest */ 112*10583SEdward.Gillett@Sun.COM 113*10583SEdward.Gillett@Sun.COM /* 114*10583SEdward.Gillett@Sun.COM * align src to 16-byte boundary. Could be up or down depending on 115*10583SEdward.Gillett@Sun.COM * whether src offset - dest offset > 0 (up) or 116*10583SEdward.Gillett@Sun.COM * src offset - dest offset < 0 (down). 117*10583SEdward.Gillett@Sun.COM */ 118*10583SEdward.Gillett@Sun.COM sub %rdx, %r9 /* src offset - dest offset */ 119*10583SEdward.Gillett@Sun.COM 120*10583SEdward.Gillett@Sun.COM lea 16(%r9, %rsi), %rsi 121*10583SEdward.Gillett@Sun.COM mov %esi, %ecx /* for new src offset */ 122*10583SEdward.Gillett@Sun.COM and $0xfffffffffffffff0, %rsi /* force rsi 16 byte align */ 123*10583SEdward.Gillett@Sun.COM 124*10583SEdward.Gillett@Sun.COM and $0xf, %ecx /* new src offset is 0 if rsi/rdi have same alignment */ 125*10583SEdward.Gillett@Sun.COM jz LABEL(ashr_0) 126*10583SEdward.Gillett@Sun.COM 127*10583SEdward.Gillett@Sun.COM#ifdef USE_AS_STRNCPY 128*10583SEdward.Gillett@Sun.COM xor %edx, %edx /* In case unaligned_exit is taken */ 129*10583SEdward.Gillett@Sun.COM#endif 130*10583SEdward.Gillett@Sun.COM /* 131*10583SEdward.Gillett@Sun.COM * Jump to case corresponding to source/dest string relative offsets 132*10583SEdward.Gillett@Sun.COM * Index = (16 + (src offset - dest offset)) % 16 133*10583SEdward.Gillett@Sun.COM */ 134*10583SEdward.Gillett@Sun.COM lea -16(%rcx), %r10 135*10583SEdward.Gillett@Sun.COM mov %rcx, %r9 136*10583SEdward.Gillett@Sun.COM neg %r10 /* max src bytes remaining in current dqword */ 137*10583SEdward.Gillett@Sun.COM lea LABEL(unaligned_table)(%rip), %r11 138*10583SEdward.Gillett@Sun.COM movslq (%r11, %rcx, 4), %rcx 139*10583SEdward.Gillett@Sun.COM lea (%r11, %rcx), %rcx 140*10583SEdward.Gillett@Sun.COM jmp *%rcx 141*10583SEdward.Gillett@Sun.COM 142*10583SEdward.Gillett@Sun.COM/* 143*10583SEdward.Gillett@Sun.COM * ashr_0 handles the following cases: 144*10583SEdward.Gillett@Sun.COM * src alignment offset = dest alignment offset 145*10583SEdward.Gillett@Sun.COM */ 146*10583SEdward.Gillett@Sun.COM .p2align 5 147*10583SEdward.Gillett@Sun.COMLABEL(ashr_0): 148*10583SEdward.Gillett@Sun.COM#ifdef USE_AS_STRNCPY 149*10583SEdward.Gillett@Sun.COM sub $16, %r8 150*10583SEdward.Gillett@Sun.COM jbe LABEL(strncpy_truncation_aligned) 151*10583SEdward.Gillett@Sun.COM#endif 152*10583SEdward.Gillett@Sun.COM movdqa (%rsi), %xmm1 /* fetch 16 bytes from src string */ 153*10583SEdward.Gillett@Sun.COM movdqa %xmm1, (%rdi) /* store 16 bytes into dest string */ 154*10583SEdward.Gillett@Sun.COM add $16, %rsi 155*10583SEdward.Gillett@Sun.COM add $16, %rdi 156*10583SEdward.Gillett@Sun.COM pcmpeqb (%rsi), %xmm0 /* check 16 bytes in src for a null */ 157*10583SEdward.Gillett@Sun.COM pmovmskb %xmm0, %edx 158*10583SEdward.Gillett@Sun.COM 159*10583SEdward.Gillett@Sun.COM test %edx, %edx /* edx will be 0 if chars are non-null */ 160*10583SEdward.Gillett@Sun.COM jnz LABEL(aligned_16bytes) /* exit tail */ 161*10583SEdward.Gillett@Sun.COM 162*10583SEdward.Gillett@Sun.COMLABEL(ashr_0_loop): 163*10583SEdward.Gillett@Sun.COM#ifdef USE_AS_STRNCPY 164*10583SEdward.Gillett@Sun.COM sub $16, %r8 165*10583SEdward.Gillett@Sun.COM jbe LABEL(strncpy_truncation_aligned) 166*10583SEdward.Gillett@Sun.COM#endif 167*10583SEdward.Gillett@Sun.COM movdqa (%rsi, %rcx), %xmm1 168*10583SEdward.Gillett@Sun.COM movdqa %xmm1, (%rdi, %rcx) 169*10583SEdward.Gillett@Sun.COM add $16, %rcx 170*10583SEdward.Gillett@Sun.COM pcmpeqb (%rsi, %rcx), %xmm0 171*10583SEdward.Gillett@Sun.COM pmovmskb %xmm0, %edx 172*10583SEdward.Gillett@Sun.COM test %edx, %edx 173*10583SEdward.Gillett@Sun.COM jnz LABEL(aligned_exit) 174*10583SEdward.Gillett@Sun.COM 175*10583SEdward.Gillett@Sun.COM#ifdef USE_AS_STRNCPY 176*10583SEdward.Gillett@Sun.COM sub $16, %r8 177*10583SEdward.Gillett@Sun.COM jbe LABEL(strncpy_truncation_aligned) 178*10583SEdward.Gillett@Sun.COM#endif 179*10583SEdward.Gillett@Sun.COM movdqa (%rsi, %rcx), %xmm1 180*10583SEdward.Gillett@Sun.COM movdqa %xmm1, (%rdi, %rcx) 181*10583SEdward.Gillett@Sun.COM add $16, %rcx 182*10583SEdward.Gillett@Sun.COM pcmpeqb (%rsi, %rcx), %xmm0 183*10583SEdward.Gillett@Sun.COM pmovmskb %xmm0, %edx 184*10583SEdward.Gillett@Sun.COM test %edx, %edx 185*10583SEdward.Gillett@Sun.COM jnz LABEL(aligned_exit) 186*10583SEdward.Gillett@Sun.COM 187*10583SEdward.Gillett@Sun.COM#ifdef USE_AS_STRNCPY 188*10583SEdward.Gillett@Sun.COM sub $16, %r8 189*10583SEdward.Gillett@Sun.COM jbe LABEL(strncpy_truncation_aligned) 190*10583SEdward.Gillett@Sun.COM#endif 191*10583SEdward.Gillett@Sun.COM movdqa (%rsi, %rcx), %xmm1 192*10583SEdward.Gillett@Sun.COM movdqa %xmm1, (%rdi, %rcx) 193*10583SEdward.Gillett@Sun.COM 194*10583SEdward.Gillett@Sun.COM add $16, %rcx 195*10583SEdward.Gillett@Sun.COM pcmpeqb (%rsi, %rcx), %xmm0 196*10583SEdward.Gillett@Sun.COM pmovmskb %xmm0, %edx 197*10583SEdward.Gillett@Sun.COM test %edx, %edx 198*10583SEdward.Gillett@Sun.COM jnz LABEL(aligned_exit) 199*10583SEdward.Gillett@Sun.COM 200*10583SEdward.Gillett@Sun.COM#ifdef USE_AS_STRNCPY 201*10583SEdward.Gillett@Sun.COM sub $16, %r8 202*10583SEdward.Gillett@Sun.COM jbe LABEL(strncpy_truncation_aligned) 203*10583SEdward.Gillett@Sun.COM#endif 204*10583SEdward.Gillett@Sun.COM movdqa (%rsi, %rcx), %xmm1 205*10583SEdward.Gillett@Sun.COM movdqa %xmm1, (%rdi, %rcx) 206*10583SEdward.Gillett@Sun.COM add $16, %rcx 207*10583SEdward.Gillett@Sun.COM pcmpeqb (%rsi, %rcx), %xmm0 208*10583SEdward.Gillett@Sun.COM pmovmskb %xmm0, %edx 209*10583SEdward.Gillett@Sun.COM test %edx, %edx 210*10583SEdward.Gillett@Sun.COM jz LABEL(ashr_0_loop) 211*10583SEdward.Gillett@Sun.COM jmp LABEL(aligned_exit) 212*10583SEdward.Gillett@Sun.COM 213*10583SEdward.Gillett@Sun.COM 214*10583SEdward.Gillett@Sun.COM/* 215*10583SEdward.Gillett@Sun.COM * ashr_15 handles the following cases: 216*10583SEdward.Gillett@Sun.COM * (16 + (src offset - dest offset)) % 16 = 15 217*10583SEdward.Gillett@Sun.COM * 218*10583SEdward.Gillett@Sun.COM * Based on above operation, start from (%r9 + rsi) to the left of this cache 219*10583SEdward.Gillett@Sun.COM * bank, there is no null byte. 220*10583SEdward.Gillett@Sun.COM */ 221*10583SEdward.Gillett@Sun.COM .p2align 4 222*10583SEdward.Gillett@Sun.COMLABEL(ashr_15): 223*10583SEdward.Gillett@Sun.COM xor %ecx, %ecx /* clear index */ 224*10583SEdward.Gillett@Sun.COM#ifdef USE_AS_STRNCPY 225*10583SEdward.Gillett@Sun.COM cmp %r10, %r8 226*10583SEdward.Gillett@Sun.COM jbe LABEL(unaligned_exit) 227*10583SEdward.Gillett@Sun.COM#endif 228*10583SEdward.Gillett@Sun.COM testl $USE_SSSE3, .memops_method(%rip) /* use sse2 or ssse3? */ 229*10583SEdward.Gillett@Sun.COM jz LABEL(ashr_15_use_sse2) 230*10583SEdward.Gillett@Sun.COM 231*10583SEdward.Gillett@Sun.COM .p2align 4 232*10583SEdward.Gillett@Sun.COMLABEL(ashr_15_use_ssse3): 233*10583SEdward.Gillett@Sun.COM movdqa 16(%rsi, %rcx), %xmm3 234*10583SEdward.Gillett@Sun.COM pcmpeqb %xmm3, %xmm0 235*10583SEdward.Gillett@Sun.COM pmovmskb %xmm0, %edx 236*10583SEdward.Gillett@Sun.COM test %edx, %edx 237*10583SEdward.Gillett@Sun.COM jnz LABEL(unaligned_exit) 238*10583SEdward.Gillett@Sun.COM#ifdef USE_AS_STRNCPY 239*10583SEdward.Gillett@Sun.COM sub $16, %r8 240*10583SEdward.Gillett@Sun.COM jbe LABEL(strncpy_truncation_unaligned) 241*10583SEdward.Gillett@Sun.COM#endif 242*10583SEdward.Gillett@Sun.COM 243*10583SEdward.Gillett@Sun.COM #palignr $15, (%rsi, %rcx), %xmm3 244*10583SEdward.Gillett@Sun.COM .byte 0x66, 0x0F, 0x3A ,0x0F 245*10583SEdward.Gillett@Sun.COM .byte 0x1c, 0x0e, 0x0f 246*10583SEdward.Gillett@Sun.COM 247*10583SEdward.Gillett@Sun.COM movdqa %xmm3, (%rdi, %rcx) 248*10583SEdward.Gillett@Sun.COM add $16, %rcx 249*10583SEdward.Gillett@Sun.COM 250*10583SEdward.Gillett@Sun.COM#ifdef USE_AS_STRNCPY 251*10583SEdward.Gillett@Sun.COM cmp %r10, %r8 252*10583SEdward.Gillett@Sun.COM jbe LABEL(unaligned_exit) 253*10583SEdward.Gillett@Sun.COM#endif 254*10583SEdward.Gillett@Sun.COM movdqa 16(%rsi, %rcx), %xmm3 255*10583SEdward.Gillett@Sun.COM pcmpeqb %xmm3, %xmm0 256*10583SEdward.Gillett@Sun.COM pmovmskb %xmm0, %edx 257*10583SEdward.Gillett@Sun.COM test %edx, %edx 258*10583SEdward.Gillett@Sun.COM jnz LABEL(unaligned_exit) 259*10583SEdward.Gillett@Sun.COM#ifdef USE_AS_STRNCPY 260*10583SEdward.Gillett@Sun.COM sub $16, %r8 261*10583SEdward.Gillett@Sun.COM jbe LABEL(strncpy_truncation_unaligned) 262*10583SEdward.Gillett@Sun.COM#endif 263*10583SEdward.Gillett@Sun.COM 264*10583SEdward.Gillett@Sun.COM #palignr $15, (%rsi, %rcx), %xmm3 265*10583SEdward.Gillett@Sun.COM .byte 0x66, 0x0F, 0x3A ,0x0F 266*10583SEdward.Gillett@Sun.COM .byte 0x1c, 0x0e, 0x0f 267*10583SEdward.Gillett@Sun.COM 268*10583SEdward.Gillett@Sun.COM movdqa %xmm3, (%rdi, %rcx) 269*10583SEdward.Gillett@Sun.COM add $16, %rcx 270*10583SEdward.Gillett@Sun.COM 271*10583SEdward.Gillett@Sun.COM#ifdef USE_AS_STRNCPY 272*10583SEdward.Gillett@Sun.COM cmp %r10, %r8 273*10583SEdward.Gillett@Sun.COM jbe LABEL(unaligned_exit) 274*10583SEdward.Gillett@Sun.COM#endif 275*10583SEdward.Gillett@Sun.COM jmp LABEL(ashr_15_use_ssse3) 276*10583SEdward.Gillett@Sun.COM 277*10583SEdward.Gillett@Sun.COM .p2align 4 278*10583SEdward.Gillett@Sun.COMLABEL(ashr_15_use_sse2): 279*10583SEdward.Gillett@Sun.COM pcmpeqb 16(%rsi, %rcx), %xmm0 280*10583SEdward.Gillett@Sun.COM pmovmskb %xmm0, %edx 281*10583SEdward.Gillett@Sun.COM test %edx, %edx 282*10583SEdward.Gillett@Sun.COM jnz LABEL(unaligned_exit) 283*10583SEdward.Gillett@Sun.COM#ifdef USE_AS_STRNCPY 284*10583SEdward.Gillett@Sun.COM sub $16, %r8 285*10583SEdward.Gillett@Sun.COM jbe LABEL(strncpy_truncation_unaligned) 286*10583SEdward.Gillett@Sun.COM#endif 287*10583SEdward.Gillett@Sun.COM 288*10583SEdward.Gillett@Sun.COM movdqa 16(%rsi, %rcx), %xmm3 289*10583SEdward.Gillett@Sun.COM movdqa (%rsi, %rcx), %xmm2 290*10583SEdward.Gillett@Sun.COM 291*10583SEdward.Gillett@Sun.COM psrldq $15, %xmm2 292*10583SEdward.Gillett@Sun.COM pslldq $1, %xmm3 293*10583SEdward.Gillett@Sun.COM por %xmm2, %xmm3 294*10583SEdward.Gillett@Sun.COM 295*10583SEdward.Gillett@Sun.COM movdqa %xmm3, (%rdi, %rcx) 296*10583SEdward.Gillett@Sun.COM add $16, %rcx 297*10583SEdward.Gillett@Sun.COM#ifdef USE_AS_STRNCPY 298*10583SEdward.Gillett@Sun.COM cmp %r10, %r8 299*10583SEdward.Gillett@Sun.COM jbe LABEL(unaligned_exit) 300*10583SEdward.Gillett@Sun.COM#endif 301*10583SEdward.Gillett@Sun.COM pcmpeqb 16(%rsi, %rcx), %xmm0 302*10583SEdward.Gillett@Sun.COM pmovmskb %xmm0, %edx 303*10583SEdward.Gillett@Sun.COM test %edx, %edx 304*10583SEdward.Gillett@Sun.COM jnz LABEL(unaligned_exit) 305*10583SEdward.Gillett@Sun.COM#ifdef USE_AS_STRNCPY 306*10583SEdward.Gillett@Sun.COM sub $16, %r8 307*10583SEdward.Gillett@Sun.COM jbe LABEL(strncpy_truncation_unaligned) 308*10583SEdward.Gillett@Sun.COM#endif 309*10583SEdward.Gillett@Sun.COM 310*10583SEdward.Gillett@Sun.COM movdqa 16(%rsi, %rcx), %xmm3 311*10583SEdward.Gillett@Sun.COM movdqa (%rsi, %rcx), %xmm2 312*10583SEdward.Gillett@Sun.COM 313*10583SEdward.Gillett@Sun.COM psrldq $15, %xmm2 314*10583SEdward.Gillett@Sun.COM pslldq $1, %xmm3 315*10583SEdward.Gillett@Sun.COM por %xmm2, %xmm3 316*10583SEdward.Gillett@Sun.COM 317*10583SEdward.Gillett@Sun.COM movdqa %xmm3, (%rdi, %rcx) 318*10583SEdward.Gillett@Sun.COM add $16, %rcx 319*10583SEdward.Gillett@Sun.COM#ifdef USE_AS_STRNCPY 320*10583SEdward.Gillett@Sun.COM cmp %r10, %r8 321*10583SEdward.Gillett@Sun.COM jbe LABEL(unaligned_exit) 322*10583SEdward.Gillett@Sun.COM#endif 323*10583SEdward.Gillett@Sun.COM jmp LABEL(ashr_15_use_sse2) 324*10583SEdward.Gillett@Sun.COM 325*10583SEdward.Gillett@Sun.COM 326*10583SEdward.Gillett@Sun.COM/* 327*10583SEdward.Gillett@Sun.COM * ashr_14 handles the following cases: 328*10583SEdward.Gillett@Sun.COM * (16 + (src offset - dest offset)) % 16 = 14 329*10583SEdward.Gillett@Sun.COM * 330*10583SEdward.Gillett@Sun.COM * Based on above operation, start from (%r9 + rsi) to the left of this cache 331*10583SEdward.Gillett@Sun.COM * bank, there is no null byte. 332*10583SEdward.Gillett@Sun.COM */ 333*10583SEdward.Gillett@Sun.COM .p2align 4 334*10583SEdward.Gillett@Sun.COMLABEL(ashr_14): 335*10583SEdward.Gillett@Sun.COM xor %ecx, %ecx /* clear index */ 336*10583SEdward.Gillett@Sun.COM#ifdef USE_AS_STRNCPY 337*10583SEdward.Gillett@Sun.COM cmp %r10, %r8 338*10583SEdward.Gillett@Sun.COM jbe LABEL(unaligned_exit) 339*10583SEdward.Gillett@Sun.COM#endif 340*10583SEdward.Gillett@Sun.COM testl $USE_SSSE3, .memops_method(%rip) /* use sse2 or ssse3? */ 341*10583SEdward.Gillett@Sun.COM jz LABEL(ashr_14_use_sse2) 342*10583SEdward.Gillett@Sun.COM 343*10583SEdward.Gillett@Sun.COM .p2align 4 344*10583SEdward.Gillett@Sun.COMLABEL(ashr_14_use_ssse3): 345*10583SEdward.Gillett@Sun.COM movdqa 16(%rsi, %rcx), %xmm3 346*10583SEdward.Gillett@Sun.COM pcmpeqb %xmm3, %xmm0 347*10583SEdward.Gillett@Sun.COM pmovmskb %xmm0, %edx 348*10583SEdward.Gillett@Sun.COM test %edx, %edx 349*10583SEdward.Gillett@Sun.COM jnz LABEL(unaligned_exit) 350*10583SEdward.Gillett@Sun.COM#ifdef USE_AS_STRNCPY 351*10583SEdward.Gillett@Sun.COM sub $16, %r8 352*10583SEdward.Gillett@Sun.COM jbe LABEL(strncpy_truncation_unaligned) 353*10583SEdward.Gillett@Sun.COM#endif 354*10583SEdward.Gillett@Sun.COM 355*10583SEdward.Gillett@Sun.COM #palignr $14, (%rsi, %rcx), %xmm3 356*10583SEdward.Gillett@Sun.COM .byte 0x66, 0x0F, 0x3A ,0x0F 357*10583SEdward.Gillett@Sun.COM .byte 0x1c, 0x0e, 0x0e 358*10583SEdward.Gillett@Sun.COM 359*10583SEdward.Gillett@Sun.COM movdqa %xmm3, (%rdi, %rcx) 360*10583SEdward.Gillett@Sun.COM add $16, %rcx 361*10583SEdward.Gillett@Sun.COM 362*10583SEdward.Gillett@Sun.COM#ifdef USE_AS_STRNCPY 363*10583SEdward.Gillett@Sun.COM cmp %r10, %r8 364*10583SEdward.Gillett@Sun.COM jbe LABEL(unaligned_exit) 365*10583SEdward.Gillett@Sun.COM#endif 366*10583SEdward.Gillett@Sun.COM movdqa 16(%rsi, %rcx), %xmm3 367*10583SEdward.Gillett@Sun.COM pcmpeqb %xmm3, %xmm0 368*10583SEdward.Gillett@Sun.COM pmovmskb %xmm0, %edx 369*10583SEdward.Gillett@Sun.COM test %edx, %edx 370*10583SEdward.Gillett@Sun.COM jnz LABEL(unaligned_exit) 371*10583SEdward.Gillett@Sun.COM#ifdef USE_AS_STRNCPY 372*10583SEdward.Gillett@Sun.COM sub $16, %r8 373*10583SEdward.Gillett@Sun.COM jbe LABEL(strncpy_truncation_unaligned) 374*10583SEdward.Gillett@Sun.COM#endif 375*10583SEdward.Gillett@Sun.COM 376*10583SEdward.Gillett@Sun.COM #palignr $14, (%rsi, %rcx), %xmm3 377*10583SEdward.Gillett@Sun.COM .byte 0x66, 0x0F, 0x3A ,0x0F 378*10583SEdward.Gillett@Sun.COM .byte 0x1c, 0x0e, 0x0e 379*10583SEdward.Gillett@Sun.COM 380*10583SEdward.Gillett@Sun.COM movdqa %xmm3, (%rdi, %rcx) 381*10583SEdward.Gillett@Sun.COM add $16, %rcx 382*10583SEdward.Gillett@Sun.COM#ifdef USE_AS_STRNCPY 383*10583SEdward.Gillett@Sun.COM cmp %r10, %r8 384*10583SEdward.Gillett@Sun.COM jbe LABEL(unaligned_exit) 385*10583SEdward.Gillett@Sun.COM#endif 386*10583SEdward.Gillett@Sun.COM jmp LABEL(ashr_14_use_ssse3) 387*10583SEdward.Gillett@Sun.COM 388*10583SEdward.Gillett@Sun.COM .p2align 4 389*10583SEdward.Gillett@Sun.COMLABEL(ashr_14_use_sse2): 390*10583SEdward.Gillett@Sun.COM pcmpeqb 16(%rsi, %rcx), %xmm0 391*10583SEdward.Gillett@Sun.COM pmovmskb %xmm0, %edx 392*10583SEdward.Gillett@Sun.COM test %edx, %edx 393*10583SEdward.Gillett@Sun.COM jnz LABEL(unaligned_exit) 394*10583SEdward.Gillett@Sun.COM#ifdef USE_AS_STRNCPY 395*10583SEdward.Gillett@Sun.COM sub $16, %r8 396*10583SEdward.Gillett@Sun.COM jbe LABEL(strncpy_truncation_unaligned) 397*10583SEdward.Gillett@Sun.COM#endif 398*10583SEdward.Gillett@Sun.COM 399*10583SEdward.Gillett@Sun.COM movdqa 16(%rsi, %rcx), %xmm3 400*10583SEdward.Gillett@Sun.COM movdqa (%rsi, %rcx), %xmm2 401*10583SEdward.Gillett@Sun.COM 402*10583SEdward.Gillett@Sun.COM psrldq $14, %xmm2 403*10583SEdward.Gillett@Sun.COM pslldq $2, %xmm3 404*10583SEdward.Gillett@Sun.COM por %xmm2, %xmm3 405*10583SEdward.Gillett@Sun.COM 406*10583SEdward.Gillett@Sun.COM movdqa %xmm3, (%rdi, %rcx) 407*10583SEdward.Gillett@Sun.COM add $16, %rcx 408*10583SEdward.Gillett@Sun.COM 409*10583SEdward.Gillett@Sun.COM#ifdef USE_AS_STRNCPY 410*10583SEdward.Gillett@Sun.COM cmp %r10, %r8 411*10583SEdward.Gillett@Sun.COM jbe LABEL(unaligned_exit) 412*10583SEdward.Gillett@Sun.COM#endif 413*10583SEdward.Gillett@Sun.COM pcmpeqb 16(%rsi, %rcx), %xmm0 414*10583SEdward.Gillett@Sun.COM pmovmskb %xmm0, %edx 415*10583SEdward.Gillett@Sun.COM test %edx, %edx 416*10583SEdward.Gillett@Sun.COM jnz LABEL(unaligned_exit) 417*10583SEdward.Gillett@Sun.COM#ifdef USE_AS_STRNCPY 418*10583SEdward.Gillett@Sun.COM sub $16, %r8 419*10583SEdward.Gillett@Sun.COM jbe LABEL(strncpy_truncation_unaligned) 420*10583SEdward.Gillett@Sun.COM#endif 421*10583SEdward.Gillett@Sun.COM 422*10583SEdward.Gillett@Sun.COM movdqa 16(%rsi, %rcx), %xmm3 423*10583SEdward.Gillett@Sun.COM movdqa (%rsi, %rcx), %xmm2 424*10583SEdward.Gillett@Sun.COM 425*10583SEdward.Gillett@Sun.COM psrldq $14, %xmm2 426*10583SEdward.Gillett@Sun.COM pslldq $2, %xmm3 427*10583SEdward.Gillett@Sun.COM por %xmm2, %xmm3 428*10583SEdward.Gillett@Sun.COM 429*10583SEdward.Gillett@Sun.COM movdqa %xmm3, (%rdi, %rcx) 430*10583SEdward.Gillett@Sun.COM add $16, %rcx 431*10583SEdward.Gillett@Sun.COM#ifdef USE_AS_STRNCPY 432*10583SEdward.Gillett@Sun.COM cmp %r10, %r8 433*10583SEdward.Gillett@Sun.COM jbe LABEL(unaligned_exit) 434*10583SEdward.Gillett@Sun.COM#endif 435*10583SEdward.Gillett@Sun.COM jmp LABEL(ashr_14_use_sse2) 436*10583SEdward.Gillett@Sun.COM 437*10583SEdward.Gillett@Sun.COM 438*10583SEdward.Gillett@Sun.COM/* 439*10583SEdward.Gillett@Sun.COM * ashr_13 handles the following cases: 440*10583SEdward.Gillett@Sun.COM * (16 + (src offset - dest offset)) % 16 = 13 441*10583SEdward.Gillett@Sun.COM * 442*10583SEdward.Gillett@Sun.COM * Based on above operation, start from (%r9 + rsi) to the left of this cache 443*10583SEdward.Gillett@Sun.COM * bank, there is no null byte. 444*10583SEdward.Gillett@Sun.COM */ 445*10583SEdward.Gillett@Sun.COM .p2align 4 446*10583SEdward.Gillett@Sun.COMLABEL(ashr_13): 447*10583SEdward.Gillett@Sun.COM xor %ecx, %ecx /* clear index */ 448*10583SEdward.Gillett@Sun.COM#ifdef USE_AS_STRNCPY 449*10583SEdward.Gillett@Sun.COM cmp %r10, %r8 450*10583SEdward.Gillett@Sun.COM jbe LABEL(unaligned_exit) 451*10583SEdward.Gillett@Sun.COM#endif 452*10583SEdward.Gillett@Sun.COM testl $USE_SSSE3, .memops_method(%rip) /* use sse2 or ssse3? */ 453*10583SEdward.Gillett@Sun.COM jz LABEL(ashr_13_use_sse2) 454*10583SEdward.Gillett@Sun.COM 455*10583SEdward.Gillett@Sun.COM .p2align 4 456*10583SEdward.Gillett@Sun.COMLABEL(ashr_13_use_ssse3): 457*10583SEdward.Gillett@Sun.COM movdqa 16(%rsi, %rcx), %xmm3 458*10583SEdward.Gillett@Sun.COM pcmpeqb %xmm3, %xmm0 459*10583SEdward.Gillett@Sun.COM pmovmskb %xmm0, %edx 460*10583SEdward.Gillett@Sun.COM test %edx, %edx 461*10583SEdward.Gillett@Sun.COM jnz LABEL(unaligned_exit) 462*10583SEdward.Gillett@Sun.COM#ifdef USE_AS_STRNCPY 463*10583SEdward.Gillett@Sun.COM sub $16, %r8 464*10583SEdward.Gillett@Sun.COM jbe LABEL(strncpy_truncation_unaligned) 465*10583SEdward.Gillett@Sun.COM#endif 466*10583SEdward.Gillett@Sun.COM 467*10583SEdward.Gillett@Sun.COM #palignr $13, (%rsi, %rcx), %xmm3 468*10583SEdward.Gillett@Sun.COM .byte 0x66, 0x0F, 0x3A ,0x0F 469*10583SEdward.Gillett@Sun.COM .byte 0x1c, 0x0e, 0x0d 470*10583SEdward.Gillett@Sun.COM 471*10583SEdward.Gillett@Sun.COM movdqa %xmm3, (%rdi, %rcx) 472*10583SEdward.Gillett@Sun.COM add $16, %rcx 473*10583SEdward.Gillett@Sun.COM 474*10583SEdward.Gillett@Sun.COM#ifdef USE_AS_STRNCPY 475*10583SEdward.Gillett@Sun.COM cmp %r10, %r8 476*10583SEdward.Gillett@Sun.COM jbe LABEL(unaligned_exit) 477*10583SEdward.Gillett@Sun.COM#endif 478*10583SEdward.Gillett@Sun.COM movdqa 16(%rsi, %rcx), %xmm3 479*10583SEdward.Gillett@Sun.COM pcmpeqb %xmm3, %xmm0 480*10583SEdward.Gillett@Sun.COM pmovmskb %xmm0, %edx 481*10583SEdward.Gillett@Sun.COM test %edx, %edx 482*10583SEdward.Gillett@Sun.COM jnz LABEL(unaligned_exit) 483*10583SEdward.Gillett@Sun.COM#ifdef USE_AS_STRNCPY 484*10583SEdward.Gillett@Sun.COM sub $16, %r8 485*10583SEdward.Gillett@Sun.COM jbe LABEL(strncpy_truncation_unaligned) 4860Sstevel@tonic-gate#endif 4870Sstevel@tonic-gate 488*10583SEdward.Gillett@Sun.COM #palignr $13, (%rsi, %rcx), %xmm3 489*10583SEdward.Gillett@Sun.COM .byte 0x66, 0x0F, 0x3A ,0x0F 490*10583SEdward.Gillett@Sun.COM .byte 0x1c, 0x0e, 0x0d 491*10583SEdward.Gillett@Sun.COM 492*10583SEdward.Gillett@Sun.COM movdqa %xmm3, (%rdi, %rcx) 493*10583SEdward.Gillett@Sun.COM add $16, %rcx 4940Sstevel@tonic-gate#ifdef USE_AS_STRNCPY 495*10583SEdward.Gillett@Sun.COM cmp %r10, %r8 496*10583SEdward.Gillett@Sun.COM jbe LABEL(unaligned_exit) 497*10583SEdward.Gillett@Sun.COM#endif 498*10583SEdward.Gillett@Sun.COM jmp LABEL(ashr_13_use_ssse3) 499*10583SEdward.Gillett@Sun.COM 500*10583SEdward.Gillett@Sun.COM .p2align 4 501*10583SEdward.Gillett@Sun.COMLABEL(ashr_13_use_sse2): 502*10583SEdward.Gillett@Sun.COM pcmpeqb 16(%rsi, %rcx), %xmm0 503*10583SEdward.Gillett@Sun.COM pmovmskb %xmm0, %edx 504*10583SEdward.Gillett@Sun.COM test %edx, %edx 505*10583SEdward.Gillett@Sun.COM jnz LABEL(unaligned_exit) 506*10583SEdward.Gillett@Sun.COM#ifdef USE_AS_STRNCPY 507*10583SEdward.Gillett@Sun.COM sub $16, %r8 508*10583SEdward.Gillett@Sun.COM jbe LABEL(strncpy_truncation_unaligned) 509*10583SEdward.Gillett@Sun.COM#endif 510*10583SEdward.Gillett@Sun.COM 511*10583SEdward.Gillett@Sun.COM movdqa 16(%rsi, %rcx), %xmm3 512*10583SEdward.Gillett@Sun.COM movdqa (%rsi, %rcx), %xmm2 513*10583SEdward.Gillett@Sun.COM 514*10583SEdward.Gillett@Sun.COM psrldq $13, %xmm2 515*10583SEdward.Gillett@Sun.COM pslldq $3, %xmm3 516*10583SEdward.Gillett@Sun.COM por %xmm2, %xmm3 517*10583SEdward.Gillett@Sun.COM 518*10583SEdward.Gillett@Sun.COM movdqa %xmm3, (%rdi, %rcx) 519*10583SEdward.Gillett@Sun.COM add $16, %rcx 520*10583SEdward.Gillett@Sun.COM 521*10583SEdward.Gillett@Sun.COM#ifdef USE_AS_STRNCPY 522*10583SEdward.Gillett@Sun.COM cmp %r10, %r8 523*10583SEdward.Gillett@Sun.COM jbe LABEL(unaligned_exit) 524*10583SEdward.Gillett@Sun.COM#endif 525*10583SEdward.Gillett@Sun.COM pcmpeqb 16(%rsi, %rcx), %xmm0 526*10583SEdward.Gillett@Sun.COM pmovmskb %xmm0, %edx 527*10583SEdward.Gillett@Sun.COM test %edx, %edx 528*10583SEdward.Gillett@Sun.COM jnz LABEL(unaligned_exit) 529*10583SEdward.Gillett@Sun.COM#ifdef USE_AS_STRNCPY 530*10583SEdward.Gillett@Sun.COM sub $16, %r8 531*10583SEdward.Gillett@Sun.COM jbe LABEL(strncpy_truncation_unaligned) 532*10583SEdward.Gillett@Sun.COM#endif 533*10583SEdward.Gillett@Sun.COM 534*10583SEdward.Gillett@Sun.COM movdqa 16(%rsi, %rcx), %xmm3 535*10583SEdward.Gillett@Sun.COM movdqa (%rsi, %rcx), %xmm2 536*10583SEdward.Gillett@Sun.COM 537*10583SEdward.Gillett@Sun.COM psrldq $13, %xmm2 538*10583SEdward.Gillett@Sun.COM pslldq $3, %xmm3 539*10583SEdward.Gillett@Sun.COM por %xmm2, %xmm3 540*10583SEdward.Gillett@Sun.COM 541*10583SEdward.Gillett@Sun.COM movdqa %xmm3, (%rdi, %rcx) 542*10583SEdward.Gillett@Sun.COM add $16, %rcx 543*10583SEdward.Gillett@Sun.COM#ifdef USE_AS_STRNCPY 544*10583SEdward.Gillett@Sun.COM cmp %r10, %r8 545*10583SEdward.Gillett@Sun.COM jbe LABEL(unaligned_exit) 546*10583SEdward.Gillett@Sun.COM#endif 547*10583SEdward.Gillett@Sun.COM jmp LABEL(ashr_13_use_sse2) 548*10583SEdward.Gillett@Sun.COM 549*10583SEdward.Gillett@Sun.COM 550*10583SEdward.Gillett@Sun.COM/* 551*10583SEdward.Gillett@Sun.COM * ashr_12 handles the following cases: 552*10583SEdward.Gillett@Sun.COM * (16 + (src offset - dest offset)) % 16 = 12 553*10583SEdward.Gillett@Sun.COM * 554*10583SEdward.Gillett@Sun.COM * Based on above operation, start from (%r9 + rsi) to the left of this cache 555*10583SEdward.Gillett@Sun.COM * bank, there is no null byte. 556*10583SEdward.Gillett@Sun.COM */ 557*10583SEdward.Gillett@Sun.COM .p2align 4 558*10583SEdward.Gillett@Sun.COMLABEL(ashr_12): 559*10583SEdward.Gillett@Sun.COM xor %ecx, %ecx /* clear index */ 560*10583SEdward.Gillett@Sun.COM#ifdef USE_AS_STRNCPY 561*10583SEdward.Gillett@Sun.COM cmp %r10, %r8 562*10583SEdward.Gillett@Sun.COM jbe LABEL(unaligned_exit) 563*10583SEdward.Gillett@Sun.COM#endif 564*10583SEdward.Gillett@Sun.COM testl $USE_SSSE3, .memops_method(%rip) /* use sse2 or ssse3? */ 565*10583SEdward.Gillett@Sun.COM jz LABEL(ashr_12_use_sse2) 566*10583SEdward.Gillett@Sun.COM 567*10583SEdward.Gillett@Sun.COM .p2align 4 568*10583SEdward.Gillett@Sun.COMLABEL(ashr_12_use_ssse3): 569*10583SEdward.Gillett@Sun.COM movdqa 16(%rsi, %rcx), %xmm3 570*10583SEdward.Gillett@Sun.COM pcmpeqb %xmm3, %xmm0 571*10583SEdward.Gillett@Sun.COM pmovmskb %xmm0, %edx 572*10583SEdward.Gillett@Sun.COM test %edx, %edx 573*10583SEdward.Gillett@Sun.COM jnz LABEL(unaligned_exit) 574*10583SEdward.Gillett@Sun.COM#ifdef USE_AS_STRNCPY 575*10583SEdward.Gillett@Sun.COM sub $16, %r8 576*10583SEdward.Gillett@Sun.COM jbe LABEL(strncpy_truncation_unaligned) 577*10583SEdward.Gillett@Sun.COM#endif 578*10583SEdward.Gillett@Sun.COM 579*10583SEdward.Gillett@Sun.COM #palignr $12, (%rsi, %rcx), %xmm3 580*10583SEdward.Gillett@Sun.COM .byte 0x66, 0x0F, 0x3A ,0x0F 581*10583SEdward.Gillett@Sun.COM .byte 0x1c, 0x0e, 0x0c 582*10583SEdward.Gillett@Sun.COM 583*10583SEdward.Gillett@Sun.COM movdqa %xmm3, (%rdi, %rcx) 584*10583SEdward.Gillett@Sun.COM add $16, %rcx 585*10583SEdward.Gillett@Sun.COM 586*10583SEdward.Gillett@Sun.COM#ifdef USE_AS_STRNCPY 587*10583SEdward.Gillett@Sun.COM cmp %r10, %r8 588*10583SEdward.Gillett@Sun.COM jbe LABEL(unaligned_exit) 589*10583SEdward.Gillett@Sun.COM#endif 590*10583SEdward.Gillett@Sun.COM movdqa 16(%rsi, %rcx), %xmm3 591*10583SEdward.Gillett@Sun.COM pcmpeqb %xmm3, %xmm0 592*10583SEdward.Gillett@Sun.COM pmovmskb %xmm0, %edx 593*10583SEdward.Gillett@Sun.COM test %edx, %edx 594*10583SEdward.Gillett@Sun.COM jnz LABEL(unaligned_exit) 595*10583SEdward.Gillett@Sun.COM#ifdef USE_AS_STRNCPY 596*10583SEdward.Gillett@Sun.COM sub $16, %r8 597*10583SEdward.Gillett@Sun.COM jbe LABEL(strncpy_truncation_unaligned) 598*10583SEdward.Gillett@Sun.COM#endif 599*10583SEdward.Gillett@Sun.COM 600*10583SEdward.Gillett@Sun.COM #palignr $12, (%rsi, %rcx), %xmm3 601*10583SEdward.Gillett@Sun.COM .byte 0x66, 0x0F, 0x3A ,0x0F 602*10583SEdward.Gillett@Sun.COM .byte 0x1c, 0x0e, 0x0c 603*10583SEdward.Gillett@Sun.COM 604*10583SEdward.Gillett@Sun.COM movdqa %xmm3, (%rdi, %rcx) 605*10583SEdward.Gillett@Sun.COM add $16, %rcx 606*10583SEdward.Gillett@Sun.COM#ifdef USE_AS_STRNCPY 607*10583SEdward.Gillett@Sun.COM cmp %r10, %r8 608*10583SEdward.Gillett@Sun.COM jbe LABEL(unaligned_exit) 609*10583SEdward.Gillett@Sun.COM#endif 610*10583SEdward.Gillett@Sun.COM jmp LABEL(ashr_12_use_ssse3) 611*10583SEdward.Gillett@Sun.COM 612*10583SEdward.Gillett@Sun.COM .p2align 4 613*10583SEdward.Gillett@Sun.COMLABEL(ashr_12_use_sse2): 614*10583SEdward.Gillett@Sun.COM pcmpeqb 16(%rsi, %rcx), %xmm0 615*10583SEdward.Gillett@Sun.COM pmovmskb %xmm0, %edx 616*10583SEdward.Gillett@Sun.COM test %edx, %edx 617*10583SEdward.Gillett@Sun.COM jnz LABEL(unaligned_exit) 618*10583SEdward.Gillett@Sun.COM#ifdef USE_AS_STRNCPY 619*10583SEdward.Gillett@Sun.COM sub $16, %r8 620*10583SEdward.Gillett@Sun.COM jbe LABEL(strncpy_truncation_unaligned) 621*10583SEdward.Gillett@Sun.COM#endif 622*10583SEdward.Gillett@Sun.COM 623*10583SEdward.Gillett@Sun.COM movdqa 16(%rsi, %rcx), %xmm3 624*10583SEdward.Gillett@Sun.COM movdqa (%rsi, %rcx), %xmm2 625*10583SEdward.Gillett@Sun.COM 626*10583SEdward.Gillett@Sun.COM psrldq $12, %xmm2 627*10583SEdward.Gillett@Sun.COM pslldq $4, %xmm3 628*10583SEdward.Gillett@Sun.COM por %xmm2, %xmm3 629*10583SEdward.Gillett@Sun.COM 630*10583SEdward.Gillett@Sun.COM movdqa %xmm3, (%rdi, %rcx) 631*10583SEdward.Gillett@Sun.COM add $16, %rcx 632*10583SEdward.Gillett@Sun.COM 633*10583SEdward.Gillett@Sun.COM#ifdef USE_AS_STRNCPY 634*10583SEdward.Gillett@Sun.COM cmp %r10, %r8 635*10583SEdward.Gillett@Sun.COM jbe LABEL(unaligned_exit) 636*10583SEdward.Gillett@Sun.COM#endif 637*10583SEdward.Gillett@Sun.COM pcmpeqb 16(%rsi, %rcx), %xmm0 638*10583SEdward.Gillett@Sun.COM pmovmskb %xmm0, %edx 639*10583SEdward.Gillett@Sun.COM test %edx, %edx 640*10583SEdward.Gillett@Sun.COM jnz LABEL(unaligned_exit) 641*10583SEdward.Gillett@Sun.COM#ifdef USE_AS_STRNCPY 642*10583SEdward.Gillett@Sun.COM sub $16, %r8 643*10583SEdward.Gillett@Sun.COM jbe LABEL(strncpy_truncation_unaligned) 644*10583SEdward.Gillett@Sun.COM#endif 645*10583SEdward.Gillett@Sun.COM 646*10583SEdward.Gillett@Sun.COM movdqa 16(%rsi, %rcx), %xmm3 647*10583SEdward.Gillett@Sun.COM movdqa (%rsi, %rcx), %xmm2 648*10583SEdward.Gillett@Sun.COM 649*10583SEdward.Gillett@Sun.COM psrldq $12, %xmm2 650*10583SEdward.Gillett@Sun.COM pslldq $4, %xmm3 651*10583SEdward.Gillett@Sun.COM por %xmm2, %xmm3 652*10583SEdward.Gillett@Sun.COM 653*10583SEdward.Gillett@Sun.COM movdqa %xmm3, (%rdi, %rcx) 654*10583SEdward.Gillett@Sun.COM add $16, %rcx 655*10583SEdward.Gillett@Sun.COM#ifdef USE_AS_STRNCPY 656*10583SEdward.Gillett@Sun.COM cmp %r10, %r8 657*10583SEdward.Gillett@Sun.COM jbe LABEL(unaligned_exit) 658*10583SEdward.Gillett@Sun.COM#endif 659*10583SEdward.Gillett@Sun.COM jmp LABEL(ashr_12_use_sse2) 660*10583SEdward.Gillett@Sun.COM 661*10583SEdward.Gillett@Sun.COM 662*10583SEdward.Gillett@Sun.COM/* 663*10583SEdward.Gillett@Sun.COM * ashr_11 handles the following cases: 664*10583SEdward.Gillett@Sun.COM * (16 + (src offset - dest offset)) % 16 = 11 665*10583SEdward.Gillett@Sun.COM * 666*10583SEdward.Gillett@Sun.COM * Based on above operation, start from (%r9 + rsi) to the left of this cache 667*10583SEdward.Gillett@Sun.COM * bank, there is no null byte. 668*10583SEdward.Gillett@Sun.COM */ 669*10583SEdward.Gillett@Sun.COM .p2align 4 670*10583SEdward.Gillett@Sun.COMLABEL(ashr_11): 671*10583SEdward.Gillett@Sun.COM xor %ecx, %ecx /* clear index */ 672*10583SEdward.Gillett@Sun.COM#ifdef USE_AS_STRNCPY 673*10583SEdward.Gillett@Sun.COM cmp %r10, %r8 674*10583SEdward.Gillett@Sun.COM jbe LABEL(unaligned_exit) 675*10583SEdward.Gillett@Sun.COM#endif 676*10583SEdward.Gillett@Sun.COM testl $USE_SSSE3, .memops_method(%rip) /* use sse2 or ssse3? */ 677*10583SEdward.Gillett@Sun.COM jz LABEL(ashr_11_use_sse2) 678*10583SEdward.Gillett@Sun.COM 679*10583SEdward.Gillett@Sun.COM .p2align 4 680*10583SEdward.Gillett@Sun.COMLABEL(ashr_11_use_ssse3): 681*10583SEdward.Gillett@Sun.COM movdqa 16(%rsi, %rcx), %xmm3 682*10583SEdward.Gillett@Sun.COM pcmpeqb %xmm3, %xmm0 683*10583SEdward.Gillett@Sun.COM pmovmskb %xmm0, %edx 684*10583SEdward.Gillett@Sun.COM test %edx, %edx 685*10583SEdward.Gillett@Sun.COM jnz LABEL(unaligned_exit) 686*10583SEdward.Gillett@Sun.COM#ifdef USE_AS_STRNCPY 687*10583SEdward.Gillett@Sun.COM sub $16, %r8 688*10583SEdward.Gillett@Sun.COM jbe LABEL(strncpy_truncation_unaligned) 689*10583SEdward.Gillett@Sun.COM#endif 690*10583SEdward.Gillett@Sun.COM 691*10583SEdward.Gillett@Sun.COM #palignr $11, (%rsi, %rcx), %xmm3 692*10583SEdward.Gillett@Sun.COM .byte 0x66, 0x0F, 0x3A ,0x0F 693*10583SEdward.Gillett@Sun.COM .byte 0x1c, 0x0e, 0x0b 694*10583SEdward.Gillett@Sun.COM 695*10583SEdward.Gillett@Sun.COM movdqa %xmm3, (%rdi, %rcx) 696*10583SEdward.Gillett@Sun.COM add $16, %rcx 697*10583SEdward.Gillett@Sun.COM 698*10583SEdward.Gillett@Sun.COM#ifdef USE_AS_STRNCPY 699*10583SEdward.Gillett@Sun.COM cmp %r10, %r8 700*10583SEdward.Gillett@Sun.COM jbe LABEL(unaligned_exit) 701*10583SEdward.Gillett@Sun.COM#endif 702*10583SEdward.Gillett@Sun.COM movdqa 16(%rsi, %rcx), %xmm3 703*10583SEdward.Gillett@Sun.COM pcmpeqb %xmm3, %xmm0 704*10583SEdward.Gillett@Sun.COM pmovmskb %xmm0, %edx 705*10583SEdward.Gillett@Sun.COM test %edx, %edx 706*10583SEdward.Gillett@Sun.COM jnz LABEL(unaligned_exit) 707*10583SEdward.Gillett@Sun.COM#ifdef USE_AS_STRNCPY 708*10583SEdward.Gillett@Sun.COM sub $16, %r8 709*10583SEdward.Gillett@Sun.COM jbe LABEL(strncpy_truncation_unaligned) 710*10583SEdward.Gillett@Sun.COM#endif 711*10583SEdward.Gillett@Sun.COM 712*10583SEdward.Gillett@Sun.COM #palignr $11, (%rsi, %rcx), %xmm3 713*10583SEdward.Gillett@Sun.COM .byte 0x66, 0x0F, 0x3A ,0x0F 714*10583SEdward.Gillett@Sun.COM .byte 0x1c, 0x0e, 0x0b 715*10583SEdward.Gillett@Sun.COM 716*10583SEdward.Gillett@Sun.COM movdqa %xmm3, (%rdi, %rcx) 717*10583SEdward.Gillett@Sun.COM add $16, %rcx 718*10583SEdward.Gillett@Sun.COM#ifdef USE_AS_STRNCPY 719*10583SEdward.Gillett@Sun.COM cmp %r10, %r8 720*10583SEdward.Gillett@Sun.COM jbe LABEL(unaligned_exit) 721*10583SEdward.Gillett@Sun.COM#endif 722*10583SEdward.Gillett@Sun.COM jmp LABEL(ashr_11_use_ssse3) 723*10583SEdward.Gillett@Sun.COM 724*10583SEdward.Gillett@Sun.COM .p2align 4 725*10583SEdward.Gillett@Sun.COMLABEL(ashr_11_use_sse2): 726*10583SEdward.Gillett@Sun.COM pcmpeqb 16(%rsi, %rcx), %xmm0 727*10583SEdward.Gillett@Sun.COM pmovmskb %xmm0, %edx 728*10583SEdward.Gillett@Sun.COM test %edx, %edx 729*10583SEdward.Gillett@Sun.COM jnz LABEL(unaligned_exit) 730*10583SEdward.Gillett@Sun.COM#ifdef USE_AS_STRNCPY 731*10583SEdward.Gillett@Sun.COM sub $16, %r8 732*10583SEdward.Gillett@Sun.COM jbe LABEL(strncpy_truncation_unaligned) 733*10583SEdward.Gillett@Sun.COM#endif 734*10583SEdward.Gillett@Sun.COM 735*10583SEdward.Gillett@Sun.COM movdqa 16(%rsi, %rcx), %xmm3 736*10583SEdward.Gillett@Sun.COM movdqa (%rsi, %rcx), %xmm2 737*10583SEdward.Gillett@Sun.COM 738*10583SEdward.Gillett@Sun.COM psrldq $11, %xmm2 739*10583SEdward.Gillett@Sun.COM pslldq $5, %xmm3 740*10583SEdward.Gillett@Sun.COM por %xmm2, %xmm3 741*10583SEdward.Gillett@Sun.COM 742*10583SEdward.Gillett@Sun.COM movdqa %xmm3, (%rdi, %rcx) 743*10583SEdward.Gillett@Sun.COM add $16, %rcx 744*10583SEdward.Gillett@Sun.COM 745*10583SEdward.Gillett@Sun.COM#ifdef USE_AS_STRNCPY 746*10583SEdward.Gillett@Sun.COM cmp %r10, %r8 747*10583SEdward.Gillett@Sun.COM jbe LABEL(unaligned_exit) 748*10583SEdward.Gillett@Sun.COM#endif 749*10583SEdward.Gillett@Sun.COM pcmpeqb 16(%rsi, %rcx), %xmm0 750*10583SEdward.Gillett@Sun.COM pmovmskb %xmm0, %edx 751*10583SEdward.Gillett@Sun.COM test %edx, %edx 752*10583SEdward.Gillett@Sun.COM jnz LABEL(unaligned_exit) 753*10583SEdward.Gillett@Sun.COM#ifdef USE_AS_STRNCPY 754*10583SEdward.Gillett@Sun.COM sub $16, %r8 755*10583SEdward.Gillett@Sun.COM jbe LABEL(strncpy_truncation_unaligned) 756*10583SEdward.Gillett@Sun.COM#endif 757*10583SEdward.Gillett@Sun.COM 758*10583SEdward.Gillett@Sun.COM movdqa 16(%rsi, %rcx), %xmm3 759*10583SEdward.Gillett@Sun.COM movdqa (%rsi, %rcx), %xmm2 760*10583SEdward.Gillett@Sun.COM 761*10583SEdward.Gillett@Sun.COM psrldq $11, %xmm2 762*10583SEdward.Gillett@Sun.COM pslldq $5, %xmm3 763*10583SEdward.Gillett@Sun.COM por %xmm2, %xmm3 764*10583SEdward.Gillett@Sun.COM 765*10583SEdward.Gillett@Sun.COM movdqa %xmm3, (%rdi, %rcx) 766*10583SEdward.Gillett@Sun.COM add $16, %rcx 767*10583SEdward.Gillett@Sun.COM#ifdef USE_AS_STRNCPY 768*10583SEdward.Gillett@Sun.COM cmp %r10, %r8 769*10583SEdward.Gillett@Sun.COM jbe LABEL(unaligned_exit) 770*10583SEdward.Gillett@Sun.COM#endif 771*10583SEdward.Gillett@Sun.COM jmp LABEL(ashr_11_use_sse2) 772*10583SEdward.Gillett@Sun.COM 773*10583SEdward.Gillett@Sun.COM 774*10583SEdward.Gillett@Sun.COM/* 775*10583SEdward.Gillett@Sun.COM * ashr_10 handles the following cases: 776*10583SEdward.Gillett@Sun.COM * (16 + (src offset - dest offset)) % 16 = 10 777*10583SEdward.Gillett@Sun.COM * 778*10583SEdward.Gillett@Sun.COM * Based on above operation, start from (%r9 + rsi) to the left of this cache 779*10583SEdward.Gillett@Sun.COM * bank, there is no null byte. 780*10583SEdward.Gillett@Sun.COM */ 781*10583SEdward.Gillett@Sun.COM .p2align 4 782*10583SEdward.Gillett@Sun.COMLABEL(ashr_10): 783*10583SEdward.Gillett@Sun.COM xor %ecx, %ecx /* clear index */ 784*10583SEdward.Gillett@Sun.COM#ifdef USE_AS_STRNCPY 785*10583SEdward.Gillett@Sun.COM cmp %r10, %r8 786*10583SEdward.Gillett@Sun.COM jbe LABEL(unaligned_exit) 787*10583SEdward.Gillett@Sun.COM#endif 788*10583SEdward.Gillett@Sun.COM testl $USE_SSSE3, .memops_method(%rip) /* use sse2 or ssse3? */ 789*10583SEdward.Gillett@Sun.COM jz LABEL(ashr_10_use_sse2) 790*10583SEdward.Gillett@Sun.COM 791*10583SEdward.Gillett@Sun.COM .p2align 4 792*10583SEdward.Gillett@Sun.COMLABEL(ashr_10_use_ssse3): 793*10583SEdward.Gillett@Sun.COM movdqa 16(%rsi, %rcx), %xmm3 794*10583SEdward.Gillett@Sun.COM pcmpeqb %xmm3, %xmm0 795*10583SEdward.Gillett@Sun.COM pmovmskb %xmm0, %edx 796*10583SEdward.Gillett@Sun.COM test %edx, %edx 797*10583SEdward.Gillett@Sun.COM jnz LABEL(unaligned_exit) 798*10583SEdward.Gillett@Sun.COM#ifdef USE_AS_STRNCPY 799*10583SEdward.Gillett@Sun.COM sub $16, %r8 800*10583SEdward.Gillett@Sun.COM jbe LABEL(strncpy_truncation_unaligned) 801*10583SEdward.Gillett@Sun.COM#endif 802*10583SEdward.Gillett@Sun.COM 803*10583SEdward.Gillett@Sun.COM #palignr $10, (%rsi, %rcx), %xmm3 804*10583SEdward.Gillett@Sun.COM .byte 0x66, 0x0F, 0x3A ,0x0F 805*10583SEdward.Gillett@Sun.COM .byte 0x1c, 0x0e, 0x0a 806*10583SEdward.Gillett@Sun.COM 807*10583SEdward.Gillett@Sun.COM movdqa %xmm3, (%rdi, %rcx) 808*10583SEdward.Gillett@Sun.COM add $16, %rcx 809*10583SEdward.Gillett@Sun.COM 810*10583SEdward.Gillett@Sun.COM#ifdef USE_AS_STRNCPY 811*10583SEdward.Gillett@Sun.COM cmp %r10, %r8 812*10583SEdward.Gillett@Sun.COM jbe LABEL(unaligned_exit) 813*10583SEdward.Gillett@Sun.COM#endif 814*10583SEdward.Gillett@Sun.COM movdqa 16(%rsi, %rcx), %xmm3 815*10583SEdward.Gillett@Sun.COM pcmpeqb %xmm3, %xmm0 816*10583SEdward.Gillett@Sun.COM pmovmskb %xmm0, %edx 817*10583SEdward.Gillett@Sun.COM test %edx, %edx 818*10583SEdward.Gillett@Sun.COM jnz LABEL(unaligned_exit) 819*10583SEdward.Gillett@Sun.COM#ifdef USE_AS_STRNCPY 820*10583SEdward.Gillett@Sun.COM sub $16, %r8 821*10583SEdward.Gillett@Sun.COM jbe LABEL(strncpy_truncation_unaligned) 822*10583SEdward.Gillett@Sun.COM#endif 823*10583SEdward.Gillett@Sun.COM 824*10583SEdward.Gillett@Sun.COM #palignr $10, (%rsi, %rcx), %xmm3 825*10583SEdward.Gillett@Sun.COM .byte 0x66, 0x0F, 0x3A ,0x0F 826*10583SEdward.Gillett@Sun.COM .byte 0x1c, 0x0e, 0x0a 827*10583SEdward.Gillett@Sun.COM 828*10583SEdward.Gillett@Sun.COM movdqa %xmm3, (%rdi, %rcx) 829*10583SEdward.Gillett@Sun.COM add $16, %rcx 830*10583SEdward.Gillett@Sun.COM#ifdef USE_AS_STRNCPY 831*10583SEdward.Gillett@Sun.COM cmp %r10, %r8 832*10583SEdward.Gillett@Sun.COM jbe LABEL(unaligned_exit) 833*10583SEdward.Gillett@Sun.COM#endif 834*10583SEdward.Gillett@Sun.COM jmp LABEL(ashr_10_use_ssse3) 835*10583SEdward.Gillett@Sun.COM 836*10583SEdward.Gillett@Sun.COM .p2align 4 837*10583SEdward.Gillett@Sun.COMLABEL(ashr_10_use_sse2): 838*10583SEdward.Gillett@Sun.COM pcmpeqb 16(%rsi, %rcx), %xmm0 839*10583SEdward.Gillett@Sun.COM pmovmskb %xmm0, %edx 840*10583SEdward.Gillett@Sun.COM test %edx, %edx 841*10583SEdward.Gillett@Sun.COM jnz LABEL(unaligned_exit) 842*10583SEdward.Gillett@Sun.COM#ifdef USE_AS_STRNCPY 843*10583SEdward.Gillett@Sun.COM sub $16, %r8 844*10583SEdward.Gillett@Sun.COM jbe LABEL(strncpy_truncation_unaligned) 845*10583SEdward.Gillett@Sun.COM#endif 846*10583SEdward.Gillett@Sun.COM 847*10583SEdward.Gillett@Sun.COM movdqa 16(%rsi, %rcx), %xmm3 848*10583SEdward.Gillett@Sun.COM movdqa (%rsi, %rcx), %xmm2 849*10583SEdward.Gillett@Sun.COM 850*10583SEdward.Gillett@Sun.COM psrldq $10, %xmm2 851*10583SEdward.Gillett@Sun.COM pslldq $6, %xmm3 852*10583SEdward.Gillett@Sun.COM por %xmm2, %xmm3 853*10583SEdward.Gillett@Sun.COM 854*10583SEdward.Gillett@Sun.COM movdqa %xmm3, (%rdi, %rcx) 855*10583SEdward.Gillett@Sun.COM add $16, %rcx 856*10583SEdward.Gillett@Sun.COM 857*10583SEdward.Gillett@Sun.COM#ifdef USE_AS_STRNCPY 858*10583SEdward.Gillett@Sun.COM cmp %r10, %r8 859*10583SEdward.Gillett@Sun.COM jbe LABEL(unaligned_exit) 860*10583SEdward.Gillett@Sun.COM#endif 861*10583SEdward.Gillett@Sun.COM pcmpeqb 16(%rsi, %rcx), %xmm0 862*10583SEdward.Gillett@Sun.COM pmovmskb %xmm0, %edx 863*10583SEdward.Gillett@Sun.COM test %edx, %edx 864*10583SEdward.Gillett@Sun.COM jnz LABEL(unaligned_exit) 865*10583SEdward.Gillett@Sun.COM#ifdef USE_AS_STRNCPY 866*10583SEdward.Gillett@Sun.COM sub $16, %r8 867*10583SEdward.Gillett@Sun.COM jbe LABEL(strncpy_truncation_unaligned) 868*10583SEdward.Gillett@Sun.COM#endif 869*10583SEdward.Gillett@Sun.COM 870*10583SEdward.Gillett@Sun.COM movdqa 16(%rsi, %rcx), %xmm3 871*10583SEdward.Gillett@Sun.COM movdqa (%rsi, %rcx), %xmm2 872*10583SEdward.Gillett@Sun.COM 873*10583SEdward.Gillett@Sun.COM psrldq $10, %xmm2 874*10583SEdward.Gillett@Sun.COM pslldq $6, %xmm3 875*10583SEdward.Gillett@Sun.COM por %xmm2, %xmm3 876*10583SEdward.Gillett@Sun.COM 877*10583SEdward.Gillett@Sun.COM movdqa %xmm3, (%rdi, %rcx) 878*10583SEdward.Gillett@Sun.COM add $16, %rcx 879*10583SEdward.Gillett@Sun.COM#ifdef USE_AS_STRNCPY 880*10583SEdward.Gillett@Sun.COM cmp %r10, %r8 881*10583SEdward.Gillett@Sun.COM jbe LABEL(unaligned_exit) 882*10583SEdward.Gillett@Sun.COM#endif 883*10583SEdward.Gillett@Sun.COM jmp LABEL(ashr_10_use_sse2) 884*10583SEdward.Gillett@Sun.COM 885*10583SEdward.Gillett@Sun.COM 886*10583SEdward.Gillett@Sun.COM/* 887*10583SEdward.Gillett@Sun.COM * ashr_9 handles the following cases: 888*10583SEdward.Gillett@Sun.COM * (16 + (src offset - dest offset)) % 16 = 9 889*10583SEdward.Gillett@Sun.COM * 890*10583SEdward.Gillett@Sun.COM * Based on above operation, start from (%r9 + rsi) to the left of this cache 891*10583SEdward.Gillett@Sun.COM * bank, there is no null byte. 892*10583SEdward.Gillett@Sun.COM */ 893*10583SEdward.Gillett@Sun.COM .p2align 4 894*10583SEdward.Gillett@Sun.COMLABEL(ashr_9): 895*10583SEdward.Gillett@Sun.COM xor %ecx, %ecx /* clear index */ 896*10583SEdward.Gillett@Sun.COM#ifdef USE_AS_STRNCPY 897*10583SEdward.Gillett@Sun.COM cmp %r10, %r8 898*10583SEdward.Gillett@Sun.COM jbe LABEL(unaligned_exit) 899*10583SEdward.Gillett@Sun.COM#endif 900*10583SEdward.Gillett@Sun.COM testl $USE_SSSE3, .memops_method(%rip) /* use sse2 or ssse3? */ 901*10583SEdward.Gillett@Sun.COM jz LABEL(ashr_9_use_sse2) 902*10583SEdward.Gillett@Sun.COM 903*10583SEdward.Gillett@Sun.COM .p2align 4 904*10583SEdward.Gillett@Sun.COMLABEL(ashr_9_use_ssse3): 905*10583SEdward.Gillett@Sun.COM movdqa 16(%rsi, %rcx), %xmm3 906*10583SEdward.Gillett@Sun.COM pcmpeqb %xmm3, %xmm0 907*10583SEdward.Gillett@Sun.COM pmovmskb %xmm0, %edx 908*10583SEdward.Gillett@Sun.COM test %edx, %edx 909*10583SEdward.Gillett@Sun.COM jnz LABEL(unaligned_exit) 910*10583SEdward.Gillett@Sun.COM#ifdef USE_AS_STRNCPY 911*10583SEdward.Gillett@Sun.COM sub $16, %r8 912*10583SEdward.Gillett@Sun.COM jbe LABEL(strncpy_truncation_unaligned) 913*10583SEdward.Gillett@Sun.COM#endif 914*10583SEdward.Gillett@Sun.COM 915*10583SEdward.Gillett@Sun.COM #palignr $9, (%rsi, %rcx), %xmm3 916*10583SEdward.Gillett@Sun.COM .byte 0x66, 0x0F, 0x3A ,0x0F 917*10583SEdward.Gillett@Sun.COM .byte 0x1c, 0x0e, 0x09 918*10583SEdward.Gillett@Sun.COM 919*10583SEdward.Gillett@Sun.COM movdqa %xmm3, (%rdi, %rcx) 920*10583SEdward.Gillett@Sun.COM add $16, %rcx 921*10583SEdward.Gillett@Sun.COM 922*10583SEdward.Gillett@Sun.COM#ifdef USE_AS_STRNCPY 923*10583SEdward.Gillett@Sun.COM cmp %r10, %r8 924*10583SEdward.Gillett@Sun.COM jbe LABEL(unaligned_exit) 925*10583SEdward.Gillett@Sun.COM#endif 926*10583SEdward.Gillett@Sun.COM movdqa 16(%rsi, %rcx), %xmm3 927*10583SEdward.Gillett@Sun.COM pcmpeqb %xmm3, %xmm0 928*10583SEdward.Gillett@Sun.COM pmovmskb %xmm0, %edx 929*10583SEdward.Gillett@Sun.COM test %edx, %edx 930*10583SEdward.Gillett@Sun.COM jnz LABEL(unaligned_exit) 931*10583SEdward.Gillett@Sun.COM#ifdef USE_AS_STRNCPY 932*10583SEdward.Gillett@Sun.COM sub $16, %r8 933*10583SEdward.Gillett@Sun.COM jbe LABEL(strncpy_truncation_unaligned) 934*10583SEdward.Gillett@Sun.COM#endif 935*10583SEdward.Gillett@Sun.COM 936*10583SEdward.Gillett@Sun.COM #palignr $9, (%rsi, %rcx), %xmm3 937*10583SEdward.Gillett@Sun.COM .byte 0x66, 0x0F, 0x3A ,0x0F 938*10583SEdward.Gillett@Sun.COM .byte 0x1c, 0x0e, 0x09 939*10583SEdward.Gillett@Sun.COM 940*10583SEdward.Gillett@Sun.COM movdqa %xmm3, (%rdi, %rcx) 941*10583SEdward.Gillett@Sun.COM add $16, %rcx 942*10583SEdward.Gillett@Sun.COM#ifdef USE_AS_STRNCPY 943*10583SEdward.Gillett@Sun.COM cmp %r10, %r8 944*10583SEdward.Gillett@Sun.COM jbe LABEL(unaligned_exit) 945*10583SEdward.Gillett@Sun.COM#endif 946*10583SEdward.Gillett@Sun.COM jmp LABEL(ashr_9_use_ssse3) 947*10583SEdward.Gillett@Sun.COM 948*10583SEdward.Gillett@Sun.COM .p2align 4 949*10583SEdward.Gillett@Sun.COMLABEL(ashr_9_use_sse2): 950*10583SEdward.Gillett@Sun.COM pcmpeqb 16(%rsi, %rcx), %xmm0 951*10583SEdward.Gillett@Sun.COM pmovmskb %xmm0, %edx 952*10583SEdward.Gillett@Sun.COM test %edx, %edx 953*10583SEdward.Gillett@Sun.COM jnz LABEL(unaligned_exit) 954*10583SEdward.Gillett@Sun.COM#ifdef USE_AS_STRNCPY 955*10583SEdward.Gillett@Sun.COM sub $16, %r8 956*10583SEdward.Gillett@Sun.COM jbe LABEL(strncpy_truncation_unaligned) 957*10583SEdward.Gillett@Sun.COM#endif 958*10583SEdward.Gillett@Sun.COM 959*10583SEdward.Gillett@Sun.COM movdqa 16(%rsi, %rcx), %xmm3 960*10583SEdward.Gillett@Sun.COM movdqa (%rsi, %rcx), %xmm2 961*10583SEdward.Gillett@Sun.COM 962*10583SEdward.Gillett@Sun.COM psrldq $9, %xmm2 963*10583SEdward.Gillett@Sun.COM pslldq $7, %xmm3 964*10583SEdward.Gillett@Sun.COM por %xmm2, %xmm3 965*10583SEdward.Gillett@Sun.COM 966*10583SEdward.Gillett@Sun.COM movdqa %xmm3, (%rdi, %rcx) 967*10583SEdward.Gillett@Sun.COM add $16, %rcx 968*10583SEdward.Gillett@Sun.COM 969*10583SEdward.Gillett@Sun.COM#ifdef USE_AS_STRNCPY 970*10583SEdward.Gillett@Sun.COM cmp %r10, %r8 971*10583SEdward.Gillett@Sun.COM jbe LABEL(unaligned_exit) 972*10583SEdward.Gillett@Sun.COM#endif 973*10583SEdward.Gillett@Sun.COM pcmpeqb 16(%rsi, %rcx), %xmm0 974*10583SEdward.Gillett@Sun.COM pmovmskb %xmm0, %edx 975*10583SEdward.Gillett@Sun.COM test %edx, %edx 976*10583SEdward.Gillett@Sun.COM jnz LABEL(unaligned_exit) 977*10583SEdward.Gillett@Sun.COM#ifdef USE_AS_STRNCPY 978*10583SEdward.Gillett@Sun.COM sub $16, %r8 979*10583SEdward.Gillett@Sun.COM jbe LABEL(strncpy_truncation_unaligned) 9800Sstevel@tonic-gate#endif 9810Sstevel@tonic-gate 982*10583SEdward.Gillett@Sun.COM movdqa 16(%rsi, %rcx), %xmm3 983*10583SEdward.Gillett@Sun.COM movdqa (%rsi, %rcx), %xmm2 984*10583SEdward.Gillett@Sun.COM 985*10583SEdward.Gillett@Sun.COM psrldq $9, %xmm2 986*10583SEdward.Gillett@Sun.COM pslldq $7, %xmm3 987*10583SEdward.Gillett@Sun.COM por %xmm2, %xmm3 988*10583SEdward.Gillett@Sun.COM 989*10583SEdward.Gillett@Sun.COM movdqa %xmm3, (%rdi, %rcx) 990*10583SEdward.Gillett@Sun.COM add $16, %rcx 991*10583SEdward.Gillett@Sun.COM#ifdef USE_AS_STRNCPY 992*10583SEdward.Gillett@Sun.COM cmp %r10, %r8 993*10583SEdward.Gillett@Sun.COM jbe LABEL(unaligned_exit) 994*10583SEdward.Gillett@Sun.COM#endif 995*10583SEdward.Gillett@Sun.COM jmp LABEL(ashr_9_use_sse2) 996*10583SEdward.Gillett@Sun.COM 997*10583SEdward.Gillett@Sun.COM 998*10583SEdward.Gillett@Sun.COM/* 999*10583SEdward.Gillett@Sun.COM * ashr_8 handles the following cases: 1000*10583SEdward.Gillett@Sun.COM * (16 + (src offset - dest offset)) % 16 = 8 1001*10583SEdward.Gillett@Sun.COM * 1002*10583SEdward.Gillett@Sun.COM * Based on above operation, start from (%r9 + rsi) to the left of this cache 1003*10583SEdward.Gillett@Sun.COM * bank, there is no null byte. 1004*10583SEdward.Gillett@Sun.COM */ 1005*10583SEdward.Gillett@Sun.COM .p2align 4 1006*10583SEdward.Gillett@Sun.COMLABEL(ashr_8): 1007*10583SEdward.Gillett@Sun.COM xor %ecx, %ecx /* clear index */ 1008*10583SEdward.Gillett@Sun.COM#ifdef USE_AS_STRNCPY 1009*10583SEdward.Gillett@Sun.COM cmp %r10, %r8 1010*10583SEdward.Gillett@Sun.COM jbe LABEL(unaligned_exit) 1011*10583SEdward.Gillett@Sun.COM#endif 1012*10583SEdward.Gillett@Sun.COM testl $USE_SSSE3, .memops_method(%rip) /* use sse2 or ssse3? */ 1013*10583SEdward.Gillett@Sun.COM jz LABEL(ashr_8_use_sse2) 1014*10583SEdward.Gillett@Sun.COM 1015*10583SEdward.Gillett@Sun.COM .p2align 4 1016*10583SEdward.Gillett@Sun.COMLABEL(ashr_8_use_ssse3): 1017*10583SEdward.Gillett@Sun.COM movdqa 16(%rsi, %rcx), %xmm3 1018*10583SEdward.Gillett@Sun.COM pcmpeqb %xmm3, %xmm0 1019*10583SEdward.Gillett@Sun.COM pmovmskb %xmm0, %edx 1020*10583SEdward.Gillett@Sun.COM test %edx, %edx 1021*10583SEdward.Gillett@Sun.COM jnz LABEL(unaligned_exit) 1022*10583SEdward.Gillett@Sun.COM#ifdef USE_AS_STRNCPY 1023*10583SEdward.Gillett@Sun.COM sub $16, %r8 1024*10583SEdward.Gillett@Sun.COM jbe LABEL(strncpy_truncation_unaligned) 1025*10583SEdward.Gillett@Sun.COM#endif 1026*10583SEdward.Gillett@Sun.COM 1027*10583SEdward.Gillett@Sun.COM #palignr $8, (%rsi, %rcx), %xmm3 1028*10583SEdward.Gillett@Sun.COM .byte 0x66, 0x0F, 0x3A ,0x0F 1029*10583SEdward.Gillett@Sun.COM .byte 0x1c, 0x0e, 0x08 1030*10583SEdward.Gillett@Sun.COM 1031*10583SEdward.Gillett@Sun.COM movdqa %xmm3, (%rdi, %rcx) 1032*10583SEdward.Gillett@Sun.COM add $16, %rcx 1033*10583SEdward.Gillett@Sun.COM 1034*10583SEdward.Gillett@Sun.COM#ifdef USE_AS_STRNCPY 1035*10583SEdward.Gillett@Sun.COM cmp %r10, %r8 1036*10583SEdward.Gillett@Sun.COM jbe LABEL(unaligned_exit) 1037*10583SEdward.Gillett@Sun.COM#endif 1038*10583SEdward.Gillett@Sun.COM movdqa 16(%rsi, %rcx), %xmm3 1039*10583SEdward.Gillett@Sun.COM pcmpeqb %xmm3, %xmm0 1040*10583SEdward.Gillett@Sun.COM pmovmskb %xmm0, %edx 1041*10583SEdward.Gillett@Sun.COM test %edx, %edx 1042*10583SEdward.Gillett@Sun.COM jnz LABEL(unaligned_exit) 1043*10583SEdward.Gillett@Sun.COM#ifdef USE_AS_STRNCPY 1044*10583SEdward.Gillett@Sun.COM sub $16, %r8 1045*10583SEdward.Gillett@Sun.COM jbe LABEL(strncpy_truncation_unaligned) 1046*10583SEdward.Gillett@Sun.COM#endif 1047*10583SEdward.Gillett@Sun.COM 1048*10583SEdward.Gillett@Sun.COM #palignr $8, (%rsi, %rcx), %xmm3 1049*10583SEdward.Gillett@Sun.COM .byte 0x66, 0x0F, 0x3A ,0x0F 1050*10583SEdward.Gillett@Sun.COM .byte 0x1c, 0x0e, 0x08 1051*10583SEdward.Gillett@Sun.COM 1052*10583SEdward.Gillett@Sun.COM movdqa %xmm3, (%rdi, %rcx) 1053*10583SEdward.Gillett@Sun.COM add $16, %rcx 1054*10583SEdward.Gillett@Sun.COM#ifdef USE_AS_STRNCPY 1055*10583SEdward.Gillett@Sun.COM cmp %r10, %r8 1056*10583SEdward.Gillett@Sun.COM jbe LABEL(unaligned_exit) 1057*10583SEdward.Gillett@Sun.COM#endif 1058*10583SEdward.Gillett@Sun.COM jmp LABEL(ashr_8_use_ssse3) 1059*10583SEdward.Gillett@Sun.COM 1060*10583SEdward.Gillett@Sun.COM .p2align 4 1061*10583SEdward.Gillett@Sun.COMLABEL(ashr_8_use_sse2): 1062*10583SEdward.Gillett@Sun.COM pcmpeqb 16(%rsi, %rcx), %xmm0 1063*10583SEdward.Gillett@Sun.COM pmovmskb %xmm0, %edx 1064*10583SEdward.Gillett@Sun.COM test %edx, %edx 1065*10583SEdward.Gillett@Sun.COM jnz LABEL(unaligned_exit) 1066*10583SEdward.Gillett@Sun.COM#ifdef USE_AS_STRNCPY 1067*10583SEdward.Gillett@Sun.COM sub $16, %r8 1068*10583SEdward.Gillett@Sun.COM jbe LABEL(strncpy_truncation_unaligned) 1069*10583SEdward.Gillett@Sun.COM#endif 1070*10583SEdward.Gillett@Sun.COM 1071*10583SEdward.Gillett@Sun.COM movdqa 16(%rsi, %rcx), %xmm3 1072*10583SEdward.Gillett@Sun.COM movdqa (%rsi, %rcx), %xmm2 1073*10583SEdward.Gillett@Sun.COM 1074*10583SEdward.Gillett@Sun.COM psrldq $8, %xmm2 1075*10583SEdward.Gillett@Sun.COM pslldq $8, %xmm3 1076*10583SEdward.Gillett@Sun.COM por %xmm2, %xmm3 1077*10583SEdward.Gillett@Sun.COM 1078*10583SEdward.Gillett@Sun.COM movdqa %xmm3, (%rdi, %rcx) 1079*10583SEdward.Gillett@Sun.COM add $16, %rcx 1080*10583SEdward.Gillett@Sun.COM 1081*10583SEdward.Gillett@Sun.COM#ifdef USE_AS_STRNCPY 1082*10583SEdward.Gillett@Sun.COM cmp %r10, %r8 1083*10583SEdward.Gillett@Sun.COM jbe LABEL(unaligned_exit) 1084*10583SEdward.Gillett@Sun.COM#endif 1085*10583SEdward.Gillett@Sun.COM pcmpeqb 16(%rsi, %rcx), %xmm0 1086*10583SEdward.Gillett@Sun.COM pmovmskb %xmm0, %edx 1087*10583SEdward.Gillett@Sun.COM test %edx, %edx 1088*10583SEdward.Gillett@Sun.COM jnz LABEL(unaligned_exit) 1089*10583SEdward.Gillett@Sun.COM#ifdef USE_AS_STRNCPY 1090*10583SEdward.Gillett@Sun.COM sub $16, %r8 1091*10583SEdward.Gillett@Sun.COM jbe LABEL(strncpy_truncation_unaligned) 1092*10583SEdward.Gillett@Sun.COM#endif 1093*10583SEdward.Gillett@Sun.COM 1094*10583SEdward.Gillett@Sun.COM movdqa 16(%rsi, %rcx), %xmm3 1095*10583SEdward.Gillett@Sun.COM movdqa (%rsi, %rcx), %xmm2 1096*10583SEdward.Gillett@Sun.COM 1097*10583SEdward.Gillett@Sun.COM psrldq $8, %xmm2 1098*10583SEdward.Gillett@Sun.COM pslldq $8, %xmm3 1099*10583SEdward.Gillett@Sun.COM por %xmm2, %xmm3 1100*10583SEdward.Gillett@Sun.COM 1101*10583SEdward.Gillett@Sun.COM movdqa %xmm3, (%rdi, %rcx) 1102*10583SEdward.Gillett@Sun.COM add $16, %rcx 1103*10583SEdward.Gillett@Sun.COM#ifdef USE_AS_STRNCPY 1104*10583SEdward.Gillett@Sun.COM cmp %r10, %r8 1105*10583SEdward.Gillett@Sun.COM jbe LABEL(unaligned_exit) 1106*10583SEdward.Gillett@Sun.COM#endif 1107*10583SEdward.Gillett@Sun.COM jmp LABEL(ashr_8_use_sse2) 1108*10583SEdward.Gillett@Sun.COM 1109*10583SEdward.Gillett@Sun.COM 1110*10583SEdward.Gillett@Sun.COM/* 1111*10583SEdward.Gillett@Sun.COM * ashr_7 handles the following cases: 1112*10583SEdward.Gillett@Sun.COM * (16 + (src offset - dest offset)) % 16 = 7 1113*10583SEdward.Gillett@Sun.COM * 1114*10583SEdward.Gillett@Sun.COM * Based on above operation, start from (%r9 + rsi) to the left of this cache 1115*10583SEdward.Gillett@Sun.COM * bank, there is no null byte. 1116*10583SEdward.Gillett@Sun.COM */ 1117*10583SEdward.Gillett@Sun.COM .p2align 4 1118*10583SEdward.Gillett@Sun.COMLABEL(ashr_7): 1119*10583SEdward.Gillett@Sun.COM xor %ecx, %ecx /* clear index */ 1120*10583SEdward.Gillett@Sun.COM#ifdef USE_AS_STRNCPY 1121*10583SEdward.Gillett@Sun.COM cmp %r10, %r8 1122*10583SEdward.Gillett@Sun.COM jbe LABEL(unaligned_exit) 1123*10583SEdward.Gillett@Sun.COM#endif 1124*10583SEdward.Gillett@Sun.COM testl $USE_SSSE3, .memops_method(%rip) /* use sse2 or ssse3? */ 1125*10583SEdward.Gillett@Sun.COM jz LABEL(ashr_7_use_sse2) 1126*10583SEdward.Gillett@Sun.COM 1127*10583SEdward.Gillett@Sun.COM .p2align 4 1128*10583SEdward.Gillett@Sun.COMLABEL(ashr_7_use_ssse3): 1129*10583SEdward.Gillett@Sun.COM movdqa 16(%rsi, %rcx), %xmm3 1130*10583SEdward.Gillett@Sun.COM pcmpeqb %xmm3, %xmm0 1131*10583SEdward.Gillett@Sun.COM pmovmskb %xmm0, %edx 1132*10583SEdward.Gillett@Sun.COM test %edx, %edx 1133*10583SEdward.Gillett@Sun.COM jnz LABEL(unaligned_exit) 1134*10583SEdward.Gillett@Sun.COM#ifdef USE_AS_STRNCPY 1135*10583SEdward.Gillett@Sun.COM sub $16, %r8 1136*10583SEdward.Gillett@Sun.COM jbe LABEL(strncpy_truncation_unaligned) 1137*10583SEdward.Gillett@Sun.COM#endif 1138*10583SEdward.Gillett@Sun.COM 1139*10583SEdward.Gillett@Sun.COM #palignr $7, (%rsi, %rcx), %xmm3 1140*10583SEdward.Gillett@Sun.COM .byte 0x66, 0x0F, 0x3A ,0x0F 1141*10583SEdward.Gillett@Sun.COM .byte 0x1c, 0x0e, 0x07 1142*10583SEdward.Gillett@Sun.COM 1143*10583SEdward.Gillett@Sun.COM movdqa %xmm3, (%rdi, %rcx) 1144*10583SEdward.Gillett@Sun.COM add $16, %rcx 1145*10583SEdward.Gillett@Sun.COM 1146*10583SEdward.Gillett@Sun.COM#ifdef USE_AS_STRNCPY 1147*10583SEdward.Gillett@Sun.COM cmp %r10, %r8 1148*10583SEdward.Gillett@Sun.COM jbe LABEL(unaligned_exit) 1149*10583SEdward.Gillett@Sun.COM#endif 1150*10583SEdward.Gillett@Sun.COM movdqa 16(%rsi, %rcx), %xmm3 1151*10583SEdward.Gillett@Sun.COM pcmpeqb %xmm3, %xmm0 1152*10583SEdward.Gillett@Sun.COM pmovmskb %xmm0, %edx 1153*10583SEdward.Gillett@Sun.COM test %edx, %edx 1154*10583SEdward.Gillett@Sun.COM jnz LABEL(unaligned_exit) 1155*10583SEdward.Gillett@Sun.COM#ifdef USE_AS_STRNCPY 1156*10583SEdward.Gillett@Sun.COM sub $16, %r8 1157*10583SEdward.Gillett@Sun.COM jbe LABEL(strncpy_truncation_unaligned) 1158*10583SEdward.Gillett@Sun.COM#endif 1159*10583SEdward.Gillett@Sun.COM 1160*10583SEdward.Gillett@Sun.COM #palignr $7, (%rsi, %rcx), %xmm3 1161*10583SEdward.Gillett@Sun.COM .byte 0x66, 0x0F, 0x3A ,0x0F 1162*10583SEdward.Gillett@Sun.COM .byte 0x1c, 0x0e, 0x07 1163*10583SEdward.Gillett@Sun.COM 1164*10583SEdward.Gillett@Sun.COM movdqa %xmm3, (%rdi, %rcx) 1165*10583SEdward.Gillett@Sun.COM add $16, %rcx 1166*10583SEdward.Gillett@Sun.COM#ifdef USE_AS_STRNCPY 1167*10583SEdward.Gillett@Sun.COM cmp %r10, %r8 1168*10583SEdward.Gillett@Sun.COM jbe LABEL(unaligned_exit) 1169*10583SEdward.Gillett@Sun.COM#endif 1170*10583SEdward.Gillett@Sun.COM jmp LABEL(ashr_7_use_ssse3) 1171*10583SEdward.Gillett@Sun.COM 1172*10583SEdward.Gillett@Sun.COM .p2align 4 1173*10583SEdward.Gillett@Sun.COMLABEL(ashr_7_use_sse2): 1174*10583SEdward.Gillett@Sun.COM pcmpeqb 16(%rsi, %rcx), %xmm0 1175*10583SEdward.Gillett@Sun.COM pmovmskb %xmm0, %edx 1176*10583SEdward.Gillett@Sun.COM test %edx, %edx 1177*10583SEdward.Gillett@Sun.COM jnz LABEL(unaligned_exit) 1178*10583SEdward.Gillett@Sun.COM#ifdef USE_AS_STRNCPY 1179*10583SEdward.Gillett@Sun.COM sub $16, %r8 1180*10583SEdward.Gillett@Sun.COM jbe LABEL(strncpy_truncation_unaligned) 1181*10583SEdward.Gillett@Sun.COM#endif 1182*10583SEdward.Gillett@Sun.COM 1183*10583SEdward.Gillett@Sun.COM movdqa 16(%rsi, %rcx), %xmm3 1184*10583SEdward.Gillett@Sun.COM movdqa (%rsi, %rcx), %xmm2 1185*10583SEdward.Gillett@Sun.COM 1186*10583SEdward.Gillett@Sun.COM psrldq $7, %xmm2 1187*10583SEdward.Gillett@Sun.COM pslldq $9, %xmm3 1188*10583SEdward.Gillett@Sun.COM por %xmm2, %xmm3 1189*10583SEdward.Gillett@Sun.COM 1190*10583SEdward.Gillett@Sun.COM movdqa %xmm3, (%rdi, %rcx) 1191*10583SEdward.Gillett@Sun.COM add $16, %rcx 1192*10583SEdward.Gillett@Sun.COM 1193*10583SEdward.Gillett@Sun.COM#ifdef USE_AS_STRNCPY 1194*10583SEdward.Gillett@Sun.COM cmp %r10, %r8 1195*10583SEdward.Gillett@Sun.COM jbe LABEL(unaligned_exit) 1196*10583SEdward.Gillett@Sun.COM#endif 1197*10583SEdward.Gillett@Sun.COM pcmpeqb 16(%rsi, %rcx), %xmm0 1198*10583SEdward.Gillett@Sun.COM pmovmskb %xmm0, %edx 1199*10583SEdward.Gillett@Sun.COM test %edx, %edx 1200*10583SEdward.Gillett@Sun.COM jnz LABEL(unaligned_exit) 1201*10583SEdward.Gillett@Sun.COM#ifdef USE_AS_STRNCPY 1202*10583SEdward.Gillett@Sun.COM sub $16, %r8 1203*10583SEdward.Gillett@Sun.COM jbe LABEL(strncpy_truncation_unaligned) 1204*10583SEdward.Gillett@Sun.COM#endif 1205*10583SEdward.Gillett@Sun.COM 1206*10583SEdward.Gillett@Sun.COM movdqa 16(%rsi, %rcx), %xmm3 1207*10583SEdward.Gillett@Sun.COM movdqa (%rsi, %rcx), %xmm2 1208*10583SEdward.Gillett@Sun.COM 1209*10583SEdward.Gillett@Sun.COM psrldq $7, %xmm2 1210*10583SEdward.Gillett@Sun.COM pslldq $9, %xmm3 1211*10583SEdward.Gillett@Sun.COM por %xmm2, %xmm3 1212*10583SEdward.Gillett@Sun.COM 1213*10583SEdward.Gillett@Sun.COM movdqa %xmm3, (%rdi, %rcx) 1214*10583SEdward.Gillett@Sun.COM add $16, %rcx 1215*10583SEdward.Gillett@Sun.COM#ifdef USE_AS_STRNCPY 1216*10583SEdward.Gillett@Sun.COM cmp %r10, %r8 1217*10583SEdward.Gillett@Sun.COM jbe LABEL(unaligned_exit) 1218*10583SEdward.Gillett@Sun.COM#endif 1219*10583SEdward.Gillett@Sun.COM jmp LABEL(ashr_7_use_sse2) 1220*10583SEdward.Gillett@Sun.COM 12210Sstevel@tonic-gate 1222*10583SEdward.Gillett@Sun.COM/* 1223*10583SEdward.Gillett@Sun.COM * ashr_6 handles the following cases: 1224*10583SEdward.Gillett@Sun.COM * (16 + (src offset - dest offset)) % 16 = 6 1225*10583SEdward.Gillett@Sun.COM * 1226*10583SEdward.Gillett@Sun.COM * Based on above operation, start from (%r9 + rsi) to the left of this cache 1227*10583SEdward.Gillett@Sun.COM * bank, there is no null byte. 1228*10583SEdward.Gillett@Sun.COM */ 1229*10583SEdward.Gillett@Sun.COM .p2align 4 1230*10583SEdward.Gillett@Sun.COMLABEL(ashr_6): 1231*10583SEdward.Gillett@Sun.COM xor %ecx, %ecx /* clear index */ 1232*10583SEdward.Gillett@Sun.COM#ifdef USE_AS_STRNCPY 1233*10583SEdward.Gillett@Sun.COM cmp %r10, %r8 1234*10583SEdward.Gillett@Sun.COM jbe LABEL(unaligned_exit) 1235*10583SEdward.Gillett@Sun.COM#endif 1236*10583SEdward.Gillett@Sun.COM testl $USE_SSSE3, .memops_method(%rip) /* use sse2 or ssse3? */ 1237*10583SEdward.Gillett@Sun.COM jz LABEL(ashr_6_use_sse2) 1238*10583SEdward.Gillett@Sun.COM 1239*10583SEdward.Gillett@Sun.COM .p2align 4 1240*10583SEdward.Gillett@Sun.COMLABEL(ashr_6_use_ssse3): 1241*10583SEdward.Gillett@Sun.COM movdqa 16(%rsi, %rcx), %xmm3 1242*10583SEdward.Gillett@Sun.COM pcmpeqb %xmm3, %xmm0 1243*10583SEdward.Gillett@Sun.COM pmovmskb %xmm0, %edx 1244*10583SEdward.Gillett@Sun.COM test %edx, %edx 1245*10583SEdward.Gillett@Sun.COM jnz LABEL(unaligned_exit) 1246*10583SEdward.Gillett@Sun.COM#ifdef USE_AS_STRNCPY 1247*10583SEdward.Gillett@Sun.COM sub $16, %r8 1248*10583SEdward.Gillett@Sun.COM jbe LABEL(strncpy_truncation_unaligned) 1249*10583SEdward.Gillett@Sun.COM#endif 1250*10583SEdward.Gillett@Sun.COM 1251*10583SEdward.Gillett@Sun.COM #palignr $6, (%rsi, %rcx), %xmm3 1252*10583SEdward.Gillett@Sun.COM .byte 0x66, 0x0F, 0x3A ,0x0F 1253*10583SEdward.Gillett@Sun.COM .byte 0x1c, 0x0e, 0x06 1254*10583SEdward.Gillett@Sun.COM 1255*10583SEdward.Gillett@Sun.COM movdqa %xmm3, (%rdi, %rcx) 1256*10583SEdward.Gillett@Sun.COM add $16, %rcx 1257*10583SEdward.Gillett@Sun.COM 1258*10583SEdward.Gillett@Sun.COM#ifdef USE_AS_STRNCPY 1259*10583SEdward.Gillett@Sun.COM cmp %r10, %r8 1260*10583SEdward.Gillett@Sun.COM jbe LABEL(unaligned_exit) 1261*10583SEdward.Gillett@Sun.COM#endif 1262*10583SEdward.Gillett@Sun.COM movdqa 16(%rsi, %rcx), %xmm3 1263*10583SEdward.Gillett@Sun.COM pcmpeqb %xmm3, %xmm0 1264*10583SEdward.Gillett@Sun.COM pmovmskb %xmm0, %edx 1265*10583SEdward.Gillett@Sun.COM test %edx, %edx 1266*10583SEdward.Gillett@Sun.COM jnz LABEL(unaligned_exit) 1267*10583SEdward.Gillett@Sun.COM#ifdef USE_AS_STRNCPY 1268*10583SEdward.Gillett@Sun.COM sub $16, %r8 1269*10583SEdward.Gillett@Sun.COM jbe LABEL(strncpy_truncation_unaligned) 1270*10583SEdward.Gillett@Sun.COM#endif 1271*10583SEdward.Gillett@Sun.COM 1272*10583SEdward.Gillett@Sun.COM #palignr $6, (%rsi, %rcx), %xmm3 1273*10583SEdward.Gillett@Sun.COM .byte 0x66, 0x0F, 0x3A ,0x0F 1274*10583SEdward.Gillett@Sun.COM .byte 0x1c, 0x0e, 0x06 1275*10583SEdward.Gillett@Sun.COM 1276*10583SEdward.Gillett@Sun.COM movdqa %xmm3, (%rdi, %rcx) 1277*10583SEdward.Gillett@Sun.COM add $16, %rcx 1278*10583SEdward.Gillett@Sun.COM#ifdef USE_AS_STRNCPY 1279*10583SEdward.Gillett@Sun.COM cmp %r10, %r8 1280*10583SEdward.Gillett@Sun.COM jbe LABEL(unaligned_exit) 1281*10583SEdward.Gillett@Sun.COM#endif 1282*10583SEdward.Gillett@Sun.COM jmp LABEL(ashr_6_use_ssse3) 1283*10583SEdward.Gillett@Sun.COM 1284*10583SEdward.Gillett@Sun.COM .p2align 4 1285*10583SEdward.Gillett@Sun.COMLABEL(ashr_6_use_sse2): 1286*10583SEdward.Gillett@Sun.COM pcmpeqb 16(%rsi, %rcx), %xmm0 1287*10583SEdward.Gillett@Sun.COM pmovmskb %xmm0, %edx 1288*10583SEdward.Gillett@Sun.COM test %edx, %edx 1289*10583SEdward.Gillett@Sun.COM jnz LABEL(unaligned_exit) 1290*10583SEdward.Gillett@Sun.COM#ifdef USE_AS_STRNCPY 1291*10583SEdward.Gillett@Sun.COM sub $16, %r8 1292*10583SEdward.Gillett@Sun.COM jbe LABEL(strncpy_truncation_unaligned) 1293*10583SEdward.Gillett@Sun.COM#endif 1294*10583SEdward.Gillett@Sun.COM 1295*10583SEdward.Gillett@Sun.COM movdqa 16(%rsi, %rcx), %xmm3 1296*10583SEdward.Gillett@Sun.COM movdqa (%rsi, %rcx), %xmm2 1297*10583SEdward.Gillett@Sun.COM 1298*10583SEdward.Gillett@Sun.COM psrldq $6, %xmm2 1299*10583SEdward.Gillett@Sun.COM pslldq $10, %xmm3 1300*10583SEdward.Gillett@Sun.COM por %xmm2, %xmm3 1301*10583SEdward.Gillett@Sun.COM 1302*10583SEdward.Gillett@Sun.COM movdqa %xmm3, (%rdi, %rcx) 1303*10583SEdward.Gillett@Sun.COM add $16, %rcx 1304*10583SEdward.Gillett@Sun.COM 1305*10583SEdward.Gillett@Sun.COM#ifdef USE_AS_STRNCPY 1306*10583SEdward.Gillett@Sun.COM cmp %r10, %r8 1307*10583SEdward.Gillett@Sun.COM jbe LABEL(unaligned_exit) 1308*10583SEdward.Gillett@Sun.COM#endif 1309*10583SEdward.Gillett@Sun.COM pcmpeqb 16(%rsi, %rcx), %xmm0 1310*10583SEdward.Gillett@Sun.COM pmovmskb %xmm0, %edx 1311*10583SEdward.Gillett@Sun.COM test %edx, %edx 1312*10583SEdward.Gillett@Sun.COM jnz LABEL(unaligned_exit) 1313*10583SEdward.Gillett@Sun.COM#ifdef USE_AS_STRNCPY 1314*10583SEdward.Gillett@Sun.COM sub $16, %r8 1315*10583SEdward.Gillett@Sun.COM jbe LABEL(strncpy_truncation_unaligned) 1316*10583SEdward.Gillett@Sun.COM#endif 1317*10583SEdward.Gillett@Sun.COM 1318*10583SEdward.Gillett@Sun.COM movdqa 16(%rsi, %rcx), %xmm3 1319*10583SEdward.Gillett@Sun.COM movdqa (%rsi, %rcx), %xmm2 1320*10583SEdward.Gillett@Sun.COM 1321*10583SEdward.Gillett@Sun.COM psrldq $6, %xmm2 1322*10583SEdward.Gillett@Sun.COM pslldq $10, %xmm3 1323*10583SEdward.Gillett@Sun.COM por %xmm2, %xmm3 1324*10583SEdward.Gillett@Sun.COM 1325*10583SEdward.Gillett@Sun.COM movdqa %xmm3, (%rdi, %rcx) 1326*10583SEdward.Gillett@Sun.COM add $16, %rcx 1327*10583SEdward.Gillett@Sun.COM#ifdef USE_AS_STRNCPY 1328*10583SEdward.Gillett@Sun.COM cmp %r10, %r8 1329*10583SEdward.Gillett@Sun.COM jbe LABEL(unaligned_exit) 1330*10583SEdward.Gillett@Sun.COM#endif 1331*10583SEdward.Gillett@Sun.COM jmp LABEL(ashr_6_use_sse2) 1332*10583SEdward.Gillett@Sun.COM 13330Sstevel@tonic-gate 1334*10583SEdward.Gillett@Sun.COM/* 1335*10583SEdward.Gillett@Sun.COM * ashr_5 handles the following cases: 1336*10583SEdward.Gillett@Sun.COM * (16 + (src offset - dest offset)) % 16 = 5 1337*10583SEdward.Gillett@Sun.COM * 1338*10583SEdward.Gillett@Sun.COM * Based on above operation, start from (%r9 + rsi) to the left of this cache 1339*10583SEdward.Gillett@Sun.COM * bank, there is no null byte. 1340*10583SEdward.Gillett@Sun.COM */ 1341*10583SEdward.Gillett@Sun.COM .p2align 4 1342*10583SEdward.Gillett@Sun.COMLABEL(ashr_5): 1343*10583SEdward.Gillett@Sun.COM xor %ecx, %ecx /* clear index */ 1344*10583SEdward.Gillett@Sun.COM#ifdef USE_AS_STRNCPY 1345*10583SEdward.Gillett@Sun.COM cmp %r10, %r8 1346*10583SEdward.Gillett@Sun.COM jbe LABEL(unaligned_exit) 1347*10583SEdward.Gillett@Sun.COM#endif 1348*10583SEdward.Gillett@Sun.COM testl $USE_SSSE3, .memops_method(%rip) /* use sse2 or ssse3? */ 1349*10583SEdward.Gillett@Sun.COM jz LABEL(ashr_5_use_sse2) 1350*10583SEdward.Gillett@Sun.COM 1351*10583SEdward.Gillett@Sun.COM .p2align 4 1352*10583SEdward.Gillett@Sun.COMLABEL(ashr_5_use_ssse3): 1353*10583SEdward.Gillett@Sun.COM movdqa 16(%rsi, %rcx), %xmm3 1354*10583SEdward.Gillett@Sun.COM pcmpeqb %xmm3, %xmm0 1355*10583SEdward.Gillett@Sun.COM pmovmskb %xmm0, %edx 1356*10583SEdward.Gillett@Sun.COM test %edx, %edx 1357*10583SEdward.Gillett@Sun.COM jnz LABEL(unaligned_exit) 1358*10583SEdward.Gillett@Sun.COM#ifdef USE_AS_STRNCPY 1359*10583SEdward.Gillett@Sun.COM sub $16, %r8 1360*10583SEdward.Gillett@Sun.COM jbe LABEL(strncpy_truncation_unaligned) 1361*10583SEdward.Gillett@Sun.COM#endif 1362*10583SEdward.Gillett@Sun.COM 1363*10583SEdward.Gillett@Sun.COM #palignr $5, (%rsi, %rcx), %xmm3 1364*10583SEdward.Gillett@Sun.COM .byte 0x66, 0x0F, 0x3A ,0x0F 1365*10583SEdward.Gillett@Sun.COM .byte 0x1c, 0x0e, 0x05 1366*10583SEdward.Gillett@Sun.COM 1367*10583SEdward.Gillett@Sun.COM movdqa %xmm3, (%rdi, %rcx) 1368*10583SEdward.Gillett@Sun.COM add $16, %rcx 1369*10583SEdward.Gillett@Sun.COM 1370*10583SEdward.Gillett@Sun.COM#ifdef USE_AS_STRNCPY 1371*10583SEdward.Gillett@Sun.COM cmp %r10, %r8 1372*10583SEdward.Gillett@Sun.COM jbe LABEL(unaligned_exit) 1373*10583SEdward.Gillett@Sun.COM#endif 1374*10583SEdward.Gillett@Sun.COM movdqa 16(%rsi, %rcx), %xmm3 1375*10583SEdward.Gillett@Sun.COM pcmpeqb %xmm3, %xmm0 1376*10583SEdward.Gillett@Sun.COM pmovmskb %xmm0, %edx 1377*10583SEdward.Gillett@Sun.COM test %edx, %edx 1378*10583SEdward.Gillett@Sun.COM jnz LABEL(unaligned_exit) 1379*10583SEdward.Gillett@Sun.COM#ifdef USE_AS_STRNCPY 1380*10583SEdward.Gillett@Sun.COM sub $16, %r8 1381*10583SEdward.Gillett@Sun.COM jbe LABEL(strncpy_truncation_unaligned) 1382*10583SEdward.Gillett@Sun.COM#endif 1383*10583SEdward.Gillett@Sun.COM 1384*10583SEdward.Gillett@Sun.COM #palignr $5, (%rsi, %rcx), %xmm3 1385*10583SEdward.Gillett@Sun.COM .byte 0x66, 0x0F, 0x3A ,0x0F 1386*10583SEdward.Gillett@Sun.COM .byte 0x1c, 0x0e, 0x05 1387*10583SEdward.Gillett@Sun.COM 1388*10583SEdward.Gillett@Sun.COM movdqa %xmm3, (%rdi, %rcx) 1389*10583SEdward.Gillett@Sun.COM add $16, %rcx 1390*10583SEdward.Gillett@Sun.COM#ifdef USE_AS_STRNCPY 1391*10583SEdward.Gillett@Sun.COM cmp %r10, %r8 1392*10583SEdward.Gillett@Sun.COM jbe LABEL(unaligned_exit) 1393*10583SEdward.Gillett@Sun.COM#endif 1394*10583SEdward.Gillett@Sun.COM jmp LABEL(ashr_5_use_ssse3) 1395*10583SEdward.Gillett@Sun.COM 1396*10583SEdward.Gillett@Sun.COM .p2align 4 1397*10583SEdward.Gillett@Sun.COMLABEL(ashr_5_use_sse2): 1398*10583SEdward.Gillett@Sun.COM pcmpeqb 16(%rsi, %rcx), %xmm0 1399*10583SEdward.Gillett@Sun.COM pmovmskb %xmm0, %edx 1400*10583SEdward.Gillett@Sun.COM test %edx, %edx 1401*10583SEdward.Gillett@Sun.COM jnz LABEL(unaligned_exit) 1402*10583SEdward.Gillett@Sun.COM#ifdef USE_AS_STRNCPY 1403*10583SEdward.Gillett@Sun.COM sub $16, %r8 1404*10583SEdward.Gillett@Sun.COM jbe LABEL(strncpy_truncation_unaligned) 1405*10583SEdward.Gillett@Sun.COM#endif 1406*10583SEdward.Gillett@Sun.COM 1407*10583SEdward.Gillett@Sun.COM movdqa 16(%rsi, %rcx), %xmm3 1408*10583SEdward.Gillett@Sun.COM movdqa (%rsi, %rcx), %xmm2 1409*10583SEdward.Gillett@Sun.COM 1410*10583SEdward.Gillett@Sun.COM psrldq $5, %xmm2 1411*10583SEdward.Gillett@Sun.COM pslldq $11, %xmm3 1412*10583SEdward.Gillett@Sun.COM por %xmm2, %xmm3 1413*10583SEdward.Gillett@Sun.COM 1414*10583SEdward.Gillett@Sun.COM movdqa %xmm3, (%rdi, %rcx) 1415*10583SEdward.Gillett@Sun.COM add $16, %rcx 1416*10583SEdward.Gillett@Sun.COM 1417*10583SEdward.Gillett@Sun.COM#ifdef USE_AS_STRNCPY 1418*10583SEdward.Gillett@Sun.COM cmp %r10, %r8 1419*10583SEdward.Gillett@Sun.COM jbe LABEL(unaligned_exit) 1420*10583SEdward.Gillett@Sun.COM#endif 1421*10583SEdward.Gillett@Sun.COM pcmpeqb 16(%rsi, %rcx), %xmm0 1422*10583SEdward.Gillett@Sun.COM pmovmskb %xmm0, %edx 1423*10583SEdward.Gillett@Sun.COM test %edx, %edx 1424*10583SEdward.Gillett@Sun.COM jnz LABEL(unaligned_exit) 1425*10583SEdward.Gillett@Sun.COM#ifdef USE_AS_STRNCPY 1426*10583SEdward.Gillett@Sun.COM sub $16, %r8 1427*10583SEdward.Gillett@Sun.COM jbe LABEL(strncpy_truncation_unaligned) 1428*10583SEdward.Gillett@Sun.COM#endif 1429*10583SEdward.Gillett@Sun.COM 1430*10583SEdward.Gillett@Sun.COM movdqa 16(%rsi, %rcx), %xmm3 1431*10583SEdward.Gillett@Sun.COM movdqa (%rsi, %rcx), %xmm2 1432*10583SEdward.Gillett@Sun.COM 1433*10583SEdward.Gillett@Sun.COM psrldq $5, %xmm2 1434*10583SEdward.Gillett@Sun.COM pslldq $11, %xmm3 1435*10583SEdward.Gillett@Sun.COM por %xmm2, %xmm3 1436*10583SEdward.Gillett@Sun.COM 1437*10583SEdward.Gillett@Sun.COM movdqa %xmm3, (%rdi, %rcx) 1438*10583SEdward.Gillett@Sun.COM add $16, %rcx 1439*10583SEdward.Gillett@Sun.COM#ifdef USE_AS_STRNCPY 1440*10583SEdward.Gillett@Sun.COM cmp %r10, %r8 1441*10583SEdward.Gillett@Sun.COM jbe LABEL(unaligned_exit) 1442*10583SEdward.Gillett@Sun.COM#endif 1443*10583SEdward.Gillett@Sun.COM jmp LABEL(ashr_5_use_sse2) 1444*10583SEdward.Gillett@Sun.COM 1445*10583SEdward.Gillett@Sun.COM 1446*10583SEdward.Gillett@Sun.COM/* 1447*10583SEdward.Gillett@Sun.COM * ashr_4 handles the following cases: 1448*10583SEdward.Gillett@Sun.COM * (16 + (src offset - dest offset)) % 16 = 4 1449*10583SEdward.Gillett@Sun.COM * 1450*10583SEdward.Gillett@Sun.COM * Based on above operation, start from (%r9 + rsi) to the left of this cache 1451*10583SEdward.Gillett@Sun.COM * bank, there is no null byte. 1452*10583SEdward.Gillett@Sun.COM */ 1453*10583SEdward.Gillett@Sun.COM .p2align 4 1454*10583SEdward.Gillett@Sun.COMLABEL(ashr_4): 1455*10583SEdward.Gillett@Sun.COM xor %ecx, %ecx /* clear index */ 1456*10583SEdward.Gillett@Sun.COM#ifdef USE_AS_STRNCPY 1457*10583SEdward.Gillett@Sun.COM cmp %r10, %r8 1458*10583SEdward.Gillett@Sun.COM jbe LABEL(unaligned_exit) 1459*10583SEdward.Gillett@Sun.COM#endif 1460*10583SEdward.Gillett@Sun.COM testl $USE_SSSE3, .memops_method(%rip) /* use sse2 or ssse3? */ 1461*10583SEdward.Gillett@Sun.COM jz LABEL(ashr_4_use_sse2) 14620Sstevel@tonic-gate 14630Sstevel@tonic-gate .p2align 4 1464*10583SEdward.Gillett@Sun.COMLABEL(ashr_4_use_ssse3): 1465*10583SEdward.Gillett@Sun.COM movdqa 16(%rsi, %rcx), %xmm3 1466*10583SEdward.Gillett@Sun.COM pcmpeqb %xmm3, %xmm0 1467*10583SEdward.Gillett@Sun.COM pmovmskb %xmm0, %edx 1468*10583SEdward.Gillett@Sun.COM test %edx, %edx 1469*10583SEdward.Gillett@Sun.COM jnz LABEL(unaligned_exit) 1470*10583SEdward.Gillett@Sun.COM#ifdef USE_AS_STRNCPY 1471*10583SEdward.Gillett@Sun.COM sub $16, %r8 1472*10583SEdward.Gillett@Sun.COM jbe LABEL(strncpy_truncation_unaligned) 1473*10583SEdward.Gillett@Sun.COM#endif 14740Sstevel@tonic-gate 1475*10583SEdward.Gillett@Sun.COM #palignr $4, (%rsi, %rcx), %xmm3 1476*10583SEdward.Gillett@Sun.COM .byte 0x66, 0x0F, 0x3A ,0x0F 1477*10583SEdward.Gillett@Sun.COM .byte 0x1c, 0x0e, 0x04 1478*10583SEdward.Gillett@Sun.COM 1479*10583SEdward.Gillett@Sun.COM movdqa %xmm3, (%rdi, %rcx) 1480*10583SEdward.Gillett@Sun.COM add $16, %rcx 1481*10583SEdward.Gillett@Sun.COM 1482*10583SEdward.Gillett@Sun.COM#ifdef USE_AS_STRNCPY 1483*10583SEdward.Gillett@Sun.COM cmp %r10, %r8 1484*10583SEdward.Gillett@Sun.COM jbe LABEL(unaligned_exit) 1485*10583SEdward.Gillett@Sun.COM#endif 1486*10583SEdward.Gillett@Sun.COM movdqa 16(%rsi, %rcx), %xmm3 1487*10583SEdward.Gillett@Sun.COM pcmpeqb %xmm3, %xmm0 1488*10583SEdward.Gillett@Sun.COM pmovmskb %xmm0, %edx 1489*10583SEdward.Gillett@Sun.COM test %edx, %edx 1490*10583SEdward.Gillett@Sun.COM jnz LABEL(unaligned_exit) 1491*10583SEdward.Gillett@Sun.COM#ifdef USE_AS_STRNCPY 1492*10583SEdward.Gillett@Sun.COM sub $16, %r8 1493*10583SEdward.Gillett@Sun.COM jbe LABEL(strncpy_truncation_unaligned) 1494*10583SEdward.Gillett@Sun.COM#endif 1495*10583SEdward.Gillett@Sun.COM 1496*10583SEdward.Gillett@Sun.COM #palignr $4, (%rsi, %rcx), %xmm3 1497*10583SEdward.Gillett@Sun.COM .byte 0x66, 0x0F, 0x3A ,0x0F 1498*10583SEdward.Gillett@Sun.COM .byte 0x1c, 0x0e, 0x04 1499*10583SEdward.Gillett@Sun.COM 1500*10583SEdward.Gillett@Sun.COM movdqa %xmm3, (%rdi, %rcx) 1501*10583SEdward.Gillett@Sun.COM add $16, %rcx 1502*10583SEdward.Gillett@Sun.COM#ifdef USE_AS_STRNCPY 1503*10583SEdward.Gillett@Sun.COM cmp %r10, %r8 1504*10583SEdward.Gillett@Sun.COM jbe LABEL(unaligned_exit) 1505*10583SEdward.Gillett@Sun.COM#endif 1506*10583SEdward.Gillett@Sun.COM jmp LABEL(ashr_4_use_ssse3) 1507*10583SEdward.Gillett@Sun.COM 1508*10583SEdward.Gillett@Sun.COM .p2align 4 1509*10583SEdward.Gillett@Sun.COMLABEL(ashr_4_use_sse2): 1510*10583SEdward.Gillett@Sun.COM pcmpeqb 16(%rsi, %rcx), %xmm0 1511*10583SEdward.Gillett@Sun.COM pmovmskb %xmm0, %edx 1512*10583SEdward.Gillett@Sun.COM test %edx, %edx 1513*10583SEdward.Gillett@Sun.COM jnz LABEL(unaligned_exit) 1514*10583SEdward.Gillett@Sun.COM#ifdef USE_AS_STRNCPY 1515*10583SEdward.Gillett@Sun.COM sub $16, %r8 1516*10583SEdward.Gillett@Sun.COM jbe LABEL(strncpy_truncation_unaligned) 1517*10583SEdward.Gillett@Sun.COM#endif 1518*10583SEdward.Gillett@Sun.COM 1519*10583SEdward.Gillett@Sun.COM movdqa 16(%rsi, %rcx), %xmm3 1520*10583SEdward.Gillett@Sun.COM movdqa (%rsi, %rcx), %xmm2 1521*10583SEdward.Gillett@Sun.COM 1522*10583SEdward.Gillett@Sun.COM psrldq $4, %xmm2 1523*10583SEdward.Gillett@Sun.COM pslldq $12, %xmm3 1524*10583SEdward.Gillett@Sun.COM por %xmm2, %xmm3 1525*10583SEdward.Gillett@Sun.COM 1526*10583SEdward.Gillett@Sun.COM movdqa %xmm3, (%rdi, %rcx) 1527*10583SEdward.Gillett@Sun.COM add $16, %rcx 1528*10583SEdward.Gillett@Sun.COM 1529*10583SEdward.Gillett@Sun.COM#ifdef USE_AS_STRNCPY 1530*10583SEdward.Gillett@Sun.COM cmp %r10, %r8 1531*10583SEdward.Gillett@Sun.COM jbe LABEL(unaligned_exit) 1532*10583SEdward.Gillett@Sun.COM#endif 1533*10583SEdward.Gillett@Sun.COM pcmpeqb 16(%rsi, %rcx), %xmm0 1534*10583SEdward.Gillett@Sun.COM pmovmskb %xmm0, %edx 1535*10583SEdward.Gillett@Sun.COM test %edx, %edx 1536*10583SEdward.Gillett@Sun.COM jnz LABEL(unaligned_exit) 1537*10583SEdward.Gillett@Sun.COM#ifdef USE_AS_STRNCPY 1538*10583SEdward.Gillett@Sun.COM sub $16, %r8 1539*10583SEdward.Gillett@Sun.COM jbe LABEL(strncpy_truncation_unaligned) 1540*10583SEdward.Gillett@Sun.COM#endif 1541*10583SEdward.Gillett@Sun.COM 1542*10583SEdward.Gillett@Sun.COM movdqa 16(%rsi, %rcx), %xmm3 1543*10583SEdward.Gillett@Sun.COM movdqa (%rsi, %rcx), %xmm2 1544*10583SEdward.Gillett@Sun.COM 1545*10583SEdward.Gillett@Sun.COM psrldq $4, %xmm2 1546*10583SEdward.Gillett@Sun.COM pslldq $12, %xmm3 1547*10583SEdward.Gillett@Sun.COM por %xmm2, %xmm3 1548*10583SEdward.Gillett@Sun.COM 1549*10583SEdward.Gillett@Sun.COM movdqa %xmm3, (%rdi, %rcx) 1550*10583SEdward.Gillett@Sun.COM add $16, %rcx 1551*10583SEdward.Gillett@Sun.COM#ifdef USE_AS_STRNCPY 1552*10583SEdward.Gillett@Sun.COM cmp %r10, %r8 1553*10583SEdward.Gillett@Sun.COM jbe LABEL(unaligned_exit) 1554*10583SEdward.Gillett@Sun.COM#endif 1555*10583SEdward.Gillett@Sun.COM jmp LABEL(ashr_4_use_sse2) 1556*10583SEdward.Gillett@Sun.COM 1557*10583SEdward.Gillett@Sun.COM 1558*10583SEdward.Gillett@Sun.COM/* 1559*10583SEdward.Gillett@Sun.COM * ashr_3 handles the following cases: 1560*10583SEdward.Gillett@Sun.COM * (16 + (src offset - dest offset)) % 16 = 3 1561*10583SEdward.Gillett@Sun.COM * 1562*10583SEdward.Gillett@Sun.COM * Based on above operation, start from (%r9 + rsi) to the left of this cache 1563*10583SEdward.Gillett@Sun.COM * bank, there is no null byte. 1564*10583SEdward.Gillett@Sun.COM */ 1565*10583SEdward.Gillett@Sun.COM .p2align 4 1566*10583SEdward.Gillett@Sun.COMLABEL(ashr_3): 1567*10583SEdward.Gillett@Sun.COM xor %ecx, %ecx /* clear index */ 15680Sstevel@tonic-gate#ifdef USE_AS_STRNCPY 1569*10583SEdward.Gillett@Sun.COM cmp %r10, %r8 1570*10583SEdward.Gillett@Sun.COM jbe LABEL(unaligned_exit) 1571*10583SEdward.Gillett@Sun.COM#endif 1572*10583SEdward.Gillett@Sun.COM testl $USE_SSSE3, .memops_method(%rip) /* use sse2 or ssse3? */ 1573*10583SEdward.Gillett@Sun.COM jz LABEL(ashr_3_use_sse2) 1574*10583SEdward.Gillett@Sun.COM 1575*10583SEdward.Gillett@Sun.COM .p2align 4 1576*10583SEdward.Gillett@Sun.COMLABEL(ashr_3_use_ssse3): 1577*10583SEdward.Gillett@Sun.COM movdqa 16(%rsi, %rcx), %xmm3 1578*10583SEdward.Gillett@Sun.COM pcmpeqb %xmm3, %xmm0 1579*10583SEdward.Gillett@Sun.COM pmovmskb %xmm0, %edx 1580*10583SEdward.Gillett@Sun.COM test %edx, %edx 1581*10583SEdward.Gillett@Sun.COM jnz LABEL(unaligned_exit) 1582*10583SEdward.Gillett@Sun.COM#ifdef USE_AS_STRNCPY 1583*10583SEdward.Gillett@Sun.COM sub $16, %r8 1584*10583SEdward.Gillett@Sun.COM jbe LABEL(strncpy_truncation_unaligned) 1585*10583SEdward.Gillett@Sun.COM#endif 1586*10583SEdward.Gillett@Sun.COM 1587*10583SEdward.Gillett@Sun.COM #palignr $3, (%rsi, %rcx), %xmm3 1588*10583SEdward.Gillett@Sun.COM .byte 0x66, 0x0F, 0x3A ,0x0F 1589*10583SEdward.Gillett@Sun.COM .byte 0x1c, 0x0e, 0x03 1590*10583SEdward.Gillett@Sun.COM 1591*10583SEdward.Gillett@Sun.COM movdqa %xmm3, (%rdi, %rcx) 1592*10583SEdward.Gillett@Sun.COM add $16, %rcx 1593*10583SEdward.Gillett@Sun.COM 1594*10583SEdward.Gillett@Sun.COM#ifdef USE_AS_STRNCPY 1595*10583SEdward.Gillett@Sun.COM cmp %r10, %r8 1596*10583SEdward.Gillett@Sun.COM jbe LABEL(unaligned_exit) 1597*10583SEdward.Gillett@Sun.COM#endif 1598*10583SEdward.Gillett@Sun.COM movdqa 16(%rsi, %rcx), %xmm3 1599*10583SEdward.Gillett@Sun.COM pcmpeqb %xmm3, %xmm0 1600*10583SEdward.Gillett@Sun.COM pmovmskb %xmm0, %edx 1601*10583SEdward.Gillett@Sun.COM test %edx, %edx 1602*10583SEdward.Gillett@Sun.COM jnz LABEL(unaligned_exit) 1603*10583SEdward.Gillett@Sun.COM#ifdef USE_AS_STRNCPY 1604*10583SEdward.Gillett@Sun.COM sub $16, %r8 1605*10583SEdward.Gillett@Sun.COM jbe LABEL(strncpy_truncation_unaligned) 1606*10583SEdward.Gillett@Sun.COM#endif 1607*10583SEdward.Gillett@Sun.COM 1608*10583SEdward.Gillett@Sun.COM #palignr $3, (%rsi, %rcx), %xmm3 1609*10583SEdward.Gillett@Sun.COM .byte 0x66, 0x0F, 0x3A ,0x0F 1610*10583SEdward.Gillett@Sun.COM .byte 0x1c, 0x0e, 0x03 1611*10583SEdward.Gillett@Sun.COM 1612*10583SEdward.Gillett@Sun.COM movdqa %xmm3, (%rdi, %rcx) 1613*10583SEdward.Gillett@Sun.COM add $16, %rcx 1614*10583SEdward.Gillett@Sun.COM#ifdef USE_AS_STRNCPY 1615*10583SEdward.Gillett@Sun.COM cmp %r10, %r8 1616*10583SEdward.Gillett@Sun.COM jbe LABEL(unaligned_exit) 1617*10583SEdward.Gillett@Sun.COM#endif 1618*10583SEdward.Gillett@Sun.COM jmp LABEL(ashr_3_use_ssse3) 1619*10583SEdward.Gillett@Sun.COM 1620*10583SEdward.Gillett@Sun.COM .p2align 4 1621*10583SEdward.Gillett@Sun.COMLABEL(ashr_3_use_sse2): 1622*10583SEdward.Gillett@Sun.COM pcmpeqb 16(%rsi, %rcx), %xmm0 1623*10583SEdward.Gillett@Sun.COM pmovmskb %xmm0, %edx 1624*10583SEdward.Gillett@Sun.COM test %edx, %edx 1625*10583SEdward.Gillett@Sun.COM jnz LABEL(unaligned_exit) 1626*10583SEdward.Gillett@Sun.COM#ifdef USE_AS_STRNCPY 1627*10583SEdward.Gillett@Sun.COM sub $16, %r8 1628*10583SEdward.Gillett@Sun.COM jbe LABEL(strncpy_truncation_unaligned) 1629*10583SEdward.Gillett@Sun.COM#endif 1630*10583SEdward.Gillett@Sun.COM 1631*10583SEdward.Gillett@Sun.COM movdqa 16(%rsi, %rcx), %xmm3 1632*10583SEdward.Gillett@Sun.COM movdqa (%rsi, %rcx), %xmm2 1633*10583SEdward.Gillett@Sun.COM 1634*10583SEdward.Gillett@Sun.COM psrldq $3, %xmm2 1635*10583SEdward.Gillett@Sun.COM pslldq $13, %xmm3 1636*10583SEdward.Gillett@Sun.COM por %xmm2, %xmm3 1637*10583SEdward.Gillett@Sun.COM 1638*10583SEdward.Gillett@Sun.COM movdqa %xmm3, (%rdi, %rcx) 1639*10583SEdward.Gillett@Sun.COM add $16, %rcx 1640*10583SEdward.Gillett@Sun.COM 1641*10583SEdward.Gillett@Sun.COM#ifdef USE_AS_STRNCPY 1642*10583SEdward.Gillett@Sun.COM cmp %r10, %r8 1643*10583SEdward.Gillett@Sun.COM jbe LABEL(unaligned_exit) 1644*10583SEdward.Gillett@Sun.COM#endif 1645*10583SEdward.Gillett@Sun.COM pcmpeqb 16(%rsi, %rcx), %xmm0 1646*10583SEdward.Gillett@Sun.COM pmovmskb %xmm0, %edx 1647*10583SEdward.Gillett@Sun.COM test %edx, %edx 1648*10583SEdward.Gillett@Sun.COM jnz LABEL(unaligned_exit) 1649*10583SEdward.Gillett@Sun.COM#ifdef USE_AS_STRNCPY 1650*10583SEdward.Gillett@Sun.COM sub $16, %r8 1651*10583SEdward.Gillett@Sun.COM jbe LABEL(strncpy_truncation_unaligned) 1652*10583SEdward.Gillett@Sun.COM#endif 1653*10583SEdward.Gillett@Sun.COM 1654*10583SEdward.Gillett@Sun.COM movdqa 16(%rsi, %rcx), %xmm3 1655*10583SEdward.Gillett@Sun.COM movdqa (%rsi, %rcx), %xmm2 1656*10583SEdward.Gillett@Sun.COM 1657*10583SEdward.Gillett@Sun.COM psrldq $3, %xmm2 1658*10583SEdward.Gillett@Sun.COM pslldq $13, %xmm3 1659*10583SEdward.Gillett@Sun.COM por %xmm2, %xmm3 1660*10583SEdward.Gillett@Sun.COM 1661*10583SEdward.Gillett@Sun.COM movdqa %xmm3, (%rdi, %rcx) 1662*10583SEdward.Gillett@Sun.COM add $16, %rcx 1663*10583SEdward.Gillett@Sun.COM#ifdef USE_AS_STRNCPY 1664*10583SEdward.Gillett@Sun.COM cmp %r10, %r8 1665*10583SEdward.Gillett@Sun.COM jbe LABEL(unaligned_exit) 1666*10583SEdward.Gillett@Sun.COM#endif 1667*10583SEdward.Gillett@Sun.COM jmp LABEL(ashr_3_use_sse2) 1668*10583SEdward.Gillett@Sun.COM 1669*10583SEdward.Gillett@Sun.COM 1670*10583SEdward.Gillett@Sun.COM/* 1671*10583SEdward.Gillett@Sun.COM * ashr_2 handles the following cases: 1672*10583SEdward.Gillett@Sun.COM * (16 + (src offset - dest offset)) % 16 = 2 1673*10583SEdward.Gillett@Sun.COM * 1674*10583SEdward.Gillett@Sun.COM * Based on above operation, start from (%r9 + rsi) to the left of this cache 1675*10583SEdward.Gillett@Sun.COM * bank, there is no null byte. 1676*10583SEdward.Gillett@Sun.COM */ 1677*10583SEdward.Gillett@Sun.COM .p2align 4 1678*10583SEdward.Gillett@Sun.COMLABEL(ashr_2): 1679*10583SEdward.Gillett@Sun.COM xor %ecx, %ecx /* clear index */ 1680*10583SEdward.Gillett@Sun.COM#ifdef USE_AS_STRNCPY 1681*10583SEdward.Gillett@Sun.COM cmp %r10, %r8 1682*10583SEdward.Gillett@Sun.COM jbe LABEL(unaligned_exit) 1683*10583SEdward.Gillett@Sun.COM#endif 1684*10583SEdward.Gillett@Sun.COM testl $USE_SSSE3, .memops_method(%rip) /* use sse2 or ssse3? */ 1685*10583SEdward.Gillett@Sun.COM jz LABEL(ashr_2_use_sse2) 1686*10583SEdward.Gillett@Sun.COM 1687*10583SEdward.Gillett@Sun.COM .p2align 4 1688*10583SEdward.Gillett@Sun.COMLABEL(ashr_2_use_ssse3): 1689*10583SEdward.Gillett@Sun.COM movdqa 16(%rsi, %rcx), %xmm3 1690*10583SEdward.Gillett@Sun.COM pcmpeqb %xmm3, %xmm0 1691*10583SEdward.Gillett@Sun.COM pmovmskb %xmm0, %edx 1692*10583SEdward.Gillett@Sun.COM test %edx, %edx 1693*10583SEdward.Gillett@Sun.COM jnz LABEL(unaligned_exit) 1694*10583SEdward.Gillett@Sun.COM#ifdef USE_AS_STRNCPY 1695*10583SEdward.Gillett@Sun.COM sub $16, %r8 1696*10583SEdward.Gillett@Sun.COM jbe LABEL(strncpy_truncation_unaligned) 16970Sstevel@tonic-gate#endif 16980Sstevel@tonic-gate 1699*10583SEdward.Gillett@Sun.COM #palignr $2, (%rsi, %rcx), %xmm3 1700*10583SEdward.Gillett@Sun.COM .byte 0x66, 0x0F, 0x3A ,0x0F 1701*10583SEdward.Gillett@Sun.COM .byte 0x1c, 0x0e, 0x02 1702*10583SEdward.Gillett@Sun.COM 1703*10583SEdward.Gillett@Sun.COM movdqa %xmm3, (%rdi, %rcx) 1704*10583SEdward.Gillett@Sun.COM add $16, %rcx 1705*10583SEdward.Gillett@Sun.COM 1706*10583SEdward.Gillett@Sun.COM#ifdef USE_AS_STRNCPY 1707*10583SEdward.Gillett@Sun.COM cmp %r10, %r8 1708*10583SEdward.Gillett@Sun.COM jbe LABEL(unaligned_exit) 1709*10583SEdward.Gillett@Sun.COM#endif 1710*10583SEdward.Gillett@Sun.COM movdqa 16(%rsi, %rcx), %xmm3 1711*10583SEdward.Gillett@Sun.COM pcmpeqb %xmm3, %xmm0 1712*10583SEdward.Gillett@Sun.COM pmovmskb %xmm0, %edx 1713*10583SEdward.Gillett@Sun.COM test %edx, %edx 1714*10583SEdward.Gillett@Sun.COM jnz LABEL(unaligned_exit) 1715*10583SEdward.Gillett@Sun.COM#ifdef USE_AS_STRNCPY 1716*10583SEdward.Gillett@Sun.COM sub $16, %r8 1717*10583SEdward.Gillett@Sun.COM jbe LABEL(strncpy_truncation_unaligned) 1718*10583SEdward.Gillett@Sun.COM#endif 1719*10583SEdward.Gillett@Sun.COM 1720*10583SEdward.Gillett@Sun.COM #palignr $2, (%rsi, %rcx), %xmm3 1721*10583SEdward.Gillett@Sun.COM .byte 0x66, 0x0F, 0x3A ,0x0F 1722*10583SEdward.Gillett@Sun.COM .byte 0x1c, 0x0e, 0x02 1723*10583SEdward.Gillett@Sun.COM 1724*10583SEdward.Gillett@Sun.COM movdqa %xmm3, (%rdi, %rcx) 1725*10583SEdward.Gillett@Sun.COM add $16, %rcx 1726*10583SEdward.Gillett@Sun.COM#ifdef USE_AS_STRNCPY 1727*10583SEdward.Gillett@Sun.COM cmp %r10, %r8 1728*10583SEdward.Gillett@Sun.COM jbe LABEL(unaligned_exit) 1729*10583SEdward.Gillett@Sun.COM#endif 1730*10583SEdward.Gillett@Sun.COM jmp LABEL(ashr_2_use_ssse3) 1731*10583SEdward.Gillett@Sun.COM 1732*10583SEdward.Gillett@Sun.COM .p2align 4 1733*10583SEdward.Gillett@Sun.COMLABEL(ashr_2_use_sse2): 1734*10583SEdward.Gillett@Sun.COM pcmpeqb 16(%rsi, %rcx), %xmm0 1735*10583SEdward.Gillett@Sun.COM pmovmskb %xmm0, %edx 1736*10583SEdward.Gillett@Sun.COM test %edx, %edx 1737*10583SEdward.Gillett@Sun.COM jnz LABEL(unaligned_exit) 1738*10583SEdward.Gillett@Sun.COM#ifdef USE_AS_STRNCPY 1739*10583SEdward.Gillett@Sun.COM sub $16, %r8 1740*10583SEdward.Gillett@Sun.COM jbe LABEL(strncpy_truncation_unaligned) 1741*10583SEdward.Gillett@Sun.COM#endif 1742*10583SEdward.Gillett@Sun.COM 1743*10583SEdward.Gillett@Sun.COM movdqa 16(%rsi, %rcx), %xmm3 1744*10583SEdward.Gillett@Sun.COM movdqa (%rsi, %rcx), %xmm2 1745*10583SEdward.Gillett@Sun.COM 1746*10583SEdward.Gillett@Sun.COM psrldq $2, %xmm2 1747*10583SEdward.Gillett@Sun.COM pslldq $14, %xmm3 1748*10583SEdward.Gillett@Sun.COM por %xmm2, %xmm3 1749*10583SEdward.Gillett@Sun.COM 1750*10583SEdward.Gillett@Sun.COM movdqa %xmm3, (%rdi, %rcx) 1751*10583SEdward.Gillett@Sun.COM add $16, %rcx 17520Sstevel@tonic-gate 1753*10583SEdward.Gillett@Sun.COM#ifdef USE_AS_STRNCPY 1754*10583SEdward.Gillett@Sun.COM cmp %r10, %r8 1755*10583SEdward.Gillett@Sun.COM jbe LABEL(unaligned_exit) 1756*10583SEdward.Gillett@Sun.COM#endif 1757*10583SEdward.Gillett@Sun.COM pcmpeqb 16(%rsi, %rcx), %xmm0 1758*10583SEdward.Gillett@Sun.COM pmovmskb %xmm0, %edx 1759*10583SEdward.Gillett@Sun.COM test %edx, %edx 1760*10583SEdward.Gillett@Sun.COM jnz LABEL(unaligned_exit) 1761*10583SEdward.Gillett@Sun.COM#ifdef USE_AS_STRNCPY 1762*10583SEdward.Gillett@Sun.COM sub $16, %r8 1763*10583SEdward.Gillett@Sun.COM jbe LABEL(strncpy_truncation_unaligned) 1764*10583SEdward.Gillett@Sun.COM#endif 1765*10583SEdward.Gillett@Sun.COM 1766*10583SEdward.Gillett@Sun.COM movdqa 16(%rsi, %rcx), %xmm3 1767*10583SEdward.Gillett@Sun.COM movdqa (%rsi, %rcx), %xmm2 1768*10583SEdward.Gillett@Sun.COM 1769*10583SEdward.Gillett@Sun.COM psrldq $2, %xmm2 1770*10583SEdward.Gillett@Sun.COM pslldq $14, %xmm3 1771*10583SEdward.Gillett@Sun.COM por %xmm2, %xmm3 1772*10583SEdward.Gillett@Sun.COM 1773*10583SEdward.Gillett@Sun.COM movdqa %xmm3, (%rdi, %rcx) 1774*10583SEdward.Gillett@Sun.COM add $16, %rcx 1775*10583SEdward.Gillett@Sun.COM#ifdef USE_AS_STRNCPY 1776*10583SEdward.Gillett@Sun.COM cmp %r10, %r8 1777*10583SEdward.Gillett@Sun.COM jbe LABEL(unaligned_exit) 1778*10583SEdward.Gillett@Sun.COM#endif 1779*10583SEdward.Gillett@Sun.COM jmp LABEL(ashr_2_use_sse2) 1780*10583SEdward.Gillett@Sun.COM 1781*10583SEdward.Gillett@Sun.COM 1782*10583SEdward.Gillett@Sun.COM/* 1783*10583SEdward.Gillett@Sun.COM * ashr_1 handles the following cases: 1784*10583SEdward.Gillett@Sun.COM * (16 + (src offset - dest offset)) % 16 = 1 1785*10583SEdward.Gillett@Sun.COM * 1786*10583SEdward.Gillett@Sun.COM * Based on above operation, start from (%r9 + rsi) to the left of this cache 1787*10583SEdward.Gillett@Sun.COM * bank, there is no null byte. 1788*10583SEdward.Gillett@Sun.COM */ 1789*10583SEdward.Gillett@Sun.COM .p2align 4 1790*10583SEdward.Gillett@Sun.COMLABEL(ashr_1): 1791*10583SEdward.Gillett@Sun.COM xor %ecx, %ecx /* clear index */ 1792*10583SEdward.Gillett@Sun.COM#ifdef USE_AS_STRNCPY 1793*10583SEdward.Gillett@Sun.COM cmp %r10, %r8 1794*10583SEdward.Gillett@Sun.COM jbe LABEL(unaligned_exit) 1795*10583SEdward.Gillett@Sun.COM#endif 1796*10583SEdward.Gillett@Sun.COM testl $USE_SSSE3, .memops_method(%rip) /* use sse2 or ssse3? */ 1797*10583SEdward.Gillett@Sun.COM jz LABEL(ashr_1_use_sse2) 1798*10583SEdward.Gillett@Sun.COM 1799*10583SEdward.Gillett@Sun.COM .p2align 4 1800*10583SEdward.Gillett@Sun.COMLABEL(ashr_1_use_ssse3): 1801*10583SEdward.Gillett@Sun.COM movdqa 16(%rsi, %rcx), %xmm3 1802*10583SEdward.Gillett@Sun.COM pcmpeqb %xmm3, %xmm0 1803*10583SEdward.Gillett@Sun.COM pmovmskb %xmm0, %edx 1804*10583SEdward.Gillett@Sun.COM test %edx, %edx 1805*10583SEdward.Gillett@Sun.COM jnz LABEL(unaligned_exit) 1806*10583SEdward.Gillett@Sun.COM#ifdef USE_AS_STRNCPY 1807*10583SEdward.Gillett@Sun.COM sub $16, %r8 1808*10583SEdward.Gillett@Sun.COM jbe LABEL(strncpy_truncation_unaligned) 1809*10583SEdward.Gillett@Sun.COM#endif 1810*10583SEdward.Gillett@Sun.COM 1811*10583SEdward.Gillett@Sun.COM #palignr $1, (%rsi, %rcx), %xmm3 1812*10583SEdward.Gillett@Sun.COM .byte 0x66, 0x0F, 0x3A ,0x0F 1813*10583SEdward.Gillett@Sun.COM .byte 0x1c, 0x0e, 0x01 1814*10583SEdward.Gillett@Sun.COM 1815*10583SEdward.Gillett@Sun.COM movdqa %xmm3, (%rdi, %rcx) 1816*10583SEdward.Gillett@Sun.COM add $16, %rcx 18170Sstevel@tonic-gate 18187953SNobutomo.Nakano@Sun.COM#ifdef USE_AS_STRNCPY 1819*10583SEdward.Gillett@Sun.COM cmp %r10, %r8 1820*10583SEdward.Gillett@Sun.COM jbe LABEL(unaligned_exit) 1821*10583SEdward.Gillett@Sun.COM#endif 1822*10583SEdward.Gillett@Sun.COM movdqa 16(%rsi, %rcx), %xmm3 1823*10583SEdward.Gillett@Sun.COM pcmpeqb %xmm3, %xmm0 1824*10583SEdward.Gillett@Sun.COM pmovmskb %xmm0, %edx 1825*10583SEdward.Gillett@Sun.COM test %edx, %edx 1826*10583SEdward.Gillett@Sun.COM jnz LABEL(unaligned_exit) 1827*10583SEdward.Gillett@Sun.COM#ifdef USE_AS_STRNCPY 1828*10583SEdward.Gillett@Sun.COM sub $16, %r8 1829*10583SEdward.Gillett@Sun.COM jbe LABEL(strncpy_truncation_unaligned) 1830*10583SEdward.Gillett@Sun.COM#endif 1831*10583SEdward.Gillett@Sun.COM #palignr $1, (%rsi, %rcx), %xmm3 1832*10583SEdward.Gillett@Sun.COM .byte 0x66, 0x0F, 0x3A ,0x0F 1833*10583SEdward.Gillett@Sun.COM .byte 0x1c, 0x0e, 0x01 1834*10583SEdward.Gillett@Sun.COM 1835*10583SEdward.Gillett@Sun.COM movdqa %xmm3, (%rdi, %rcx) 1836*10583SEdward.Gillett@Sun.COM add $16, %rcx 1837*10583SEdward.Gillett@Sun.COM#ifdef USE_AS_STRNCPY 1838*10583SEdward.Gillett@Sun.COM cmp %r10, %r8 1839*10583SEdward.Gillett@Sun.COM jbe LABEL(unaligned_exit) 1840*10583SEdward.Gillett@Sun.COM#endif 1841*10583SEdward.Gillett@Sun.COM jmp LABEL(ashr_1_use_ssse3) 1842*10583SEdward.Gillett@Sun.COM 1843*10583SEdward.Gillett@Sun.COM .p2align 4 1844*10583SEdward.Gillett@Sun.COMLABEL(ashr_1_use_sse2): 1845*10583SEdward.Gillett@Sun.COM pcmpeqb 16(%rsi, %rcx), %xmm0 1846*10583SEdward.Gillett@Sun.COM pmovmskb %xmm0, %edx 1847*10583SEdward.Gillett@Sun.COM test %edx, %edx 1848*10583SEdward.Gillett@Sun.COM jnz LABEL(unaligned_exit) 1849*10583SEdward.Gillett@Sun.COM#ifdef USE_AS_STRNCPY 1850*10583SEdward.Gillett@Sun.COM sub $16, %r8 1851*10583SEdward.Gillett@Sun.COM jbe LABEL(strncpy_truncation_unaligned) 1852*10583SEdward.Gillett@Sun.COM#endif 1853*10583SEdward.Gillett@Sun.COM movdqa 16(%rsi, %rcx), %xmm3 1854*10583SEdward.Gillett@Sun.COM movdqa (%rsi, %rcx), %xmm2 1855*10583SEdward.Gillett@Sun.COM 1856*10583SEdward.Gillett@Sun.COM psrldq $1, %xmm2 1857*10583SEdward.Gillett@Sun.COM pslldq $15, %xmm3 1858*10583SEdward.Gillett@Sun.COM por %xmm2, %xmm3 1859*10583SEdward.Gillett@Sun.COM 1860*10583SEdward.Gillett@Sun.COM movdqa %xmm3, (%rdi, %rcx) 1861*10583SEdward.Gillett@Sun.COM add $16, %rcx 1862*10583SEdward.Gillett@Sun.COM 1863*10583SEdward.Gillett@Sun.COM#ifdef USE_AS_STRNCPY 1864*10583SEdward.Gillett@Sun.COM cmp %r10, %r8 1865*10583SEdward.Gillett@Sun.COM jbe LABEL(unaligned_exit) 1866*10583SEdward.Gillett@Sun.COM#endif 1867*10583SEdward.Gillett@Sun.COM pcmpeqb 16(%rsi, %rcx), %xmm0 1868*10583SEdward.Gillett@Sun.COM pmovmskb %xmm0, %edx 1869*10583SEdward.Gillett@Sun.COM test %edx, %edx 1870*10583SEdward.Gillett@Sun.COM jnz LABEL(unaligned_exit) 1871*10583SEdward.Gillett@Sun.COM#ifdef USE_AS_STRNCPY 1872*10583SEdward.Gillett@Sun.COM sub $16, %r8 1873*10583SEdward.Gillett@Sun.COM jbe LABEL(strncpy_truncation_unaligned) 1874*10583SEdward.Gillett@Sun.COM#endif 1875*10583SEdward.Gillett@Sun.COM 1876*10583SEdward.Gillett@Sun.COM movdqa 16(%rsi, %rcx), %xmm3 1877*10583SEdward.Gillett@Sun.COM movdqa (%rsi, %rcx), %xmm2 1878*10583SEdward.Gillett@Sun.COM 1879*10583SEdward.Gillett@Sun.COM psrldq $1, %xmm2 1880*10583SEdward.Gillett@Sun.COM pslldq $15, %xmm3 1881*10583SEdward.Gillett@Sun.COM por %xmm2, %xmm3 1882*10583SEdward.Gillett@Sun.COM 1883*10583SEdward.Gillett@Sun.COM movdqa %xmm3, (%rdi, %rcx) 1884*10583SEdward.Gillett@Sun.COM add $16, %rcx 1885*10583SEdward.Gillett@Sun.COM#ifdef USE_AS_STRNCPY 1886*10583SEdward.Gillett@Sun.COM cmp %r10, %r8 1887*10583SEdward.Gillett@Sun.COM jbe LABEL(unaligned_exit) 1888*10583SEdward.Gillett@Sun.COM#endif 1889*10583SEdward.Gillett@Sun.COM jmp LABEL(ashr_1_use_sse2) 1890*10583SEdward.Gillett@Sun.COM 1891*10583SEdward.Gillett@Sun.COM 1892*10583SEdward.Gillett@Sun.COM /* 1893*10583SEdward.Gillett@Sun.COM * Exit tail code: 1894*10583SEdward.Gillett@Sun.COM * Up to 32 bytes are copied in the case of strcpy. 1895*10583SEdward.Gillett@Sun.COM */ 1896*10583SEdward.Gillett@Sun.COM .p2align 4 1897*10583SEdward.Gillett@Sun.COMLABEL(less32bytes): 1898*10583SEdward.Gillett@Sun.COM xor %ecx, %ecx 1899*10583SEdward.Gillett@Sun.COMLABEL(unaligned_exit): 1900*10583SEdward.Gillett@Sun.COM add %r9, %rsi /* r9 holds offset of rsi */ 1901*10583SEdward.Gillett@Sun.COM mov %rcx, %r9 1902*10583SEdward.Gillett@Sun.COM mov %r10, %rcx 1903*10583SEdward.Gillett@Sun.COM shl %cl, %edx /* after shl, calculate the exact number to be filled */ 1904*10583SEdward.Gillett@Sun.COM mov %r9, %rcx 1905*10583SEdward.Gillett@Sun.COM .p2align 4 1906*10583SEdward.Gillett@Sun.COMLABEL(aligned_exit): 1907*10583SEdward.Gillett@Sun.COM add %rcx, %rdi /* locate exact address for rdi */ 1908*10583SEdward.Gillett@Sun.COMLABEL(less16bytes): 1909*10583SEdward.Gillett@Sun.COM add %rcx, %rsi /* locate exact address for rsi */ 1910*10583SEdward.Gillett@Sun.COMLABEL(aligned_16bytes): 1911*10583SEdward.Gillett@Sun.COM#ifdef USE_AS_STRNCPY 1912*10583SEdward.Gillett@Sun.COM /* 1913*10583SEdward.Gillett@Sun.COM * Null found in 16bytes checked. Set bit in bitmask corresponding to 1914*10583SEdward.Gillett@Sun.COM * the strncpy count argument. We will copy to the null (inclusive) 1915*10583SEdward.Gillett@Sun.COM * or count whichever comes first. 1916*10583SEdward.Gillett@Sun.COM */ 1917*10583SEdward.Gillett@Sun.COM mov $1, %r9d 1918*10583SEdward.Gillett@Sun.COM lea -1(%r8), %rcx 1919*10583SEdward.Gillett@Sun.COM shl %cl, %r9d 1920*10583SEdward.Gillett@Sun.COM cmp $32, %r8 1921*10583SEdward.Gillett@Sun.COM ja LABEL(strncpy_tail) 1922*10583SEdward.Gillett@Sun.COM or %r9d, %edx 1923*10583SEdward.Gillett@Sun.COMLABEL(strncpy_tail): 1924*10583SEdward.Gillett@Sun.COM#endif 1925*10583SEdward.Gillett@Sun.COM /* 1926*10583SEdward.Gillett@Sun.COM * Check to see if BSF is fast on this processor. If not, use a 1927*10583SEdward.Gillett@Sun.COM * different exit tail. 1928*10583SEdward.Gillett@Sun.COM */ 1929*10583SEdward.Gillett@Sun.COM testb $USE_BSF, .memops_method(%rip) 1930*10583SEdward.Gillett@Sun.COM jz LABEL(AMD_exit) 1931*10583SEdward.Gillett@Sun.COM bsf %rdx, %rcx /* Find byte with null char */ 1932*10583SEdward.Gillett@Sun.COM lea LABEL(tail_table)(%rip), %r11 1933*10583SEdward.Gillett@Sun.COM movslq (%r11, %rcx, 4), %rcx 1934*10583SEdward.Gillett@Sun.COM lea (%r11, %rcx), %rcx 1935*10583SEdward.Gillett@Sun.COM jmp *%rcx 1936*10583SEdward.Gillett@Sun.COM 1937*10583SEdward.Gillett@Sun.COM#ifdef USE_AS_STRNCPY 1938*10583SEdward.Gillett@Sun.COM /* 1939*10583SEdward.Gillett@Sun.COM * Count reached before null found. 1940*10583SEdward.Gillett@Sun.COM */ 1941*10583SEdward.Gillett@Sun.COM .p2align 4 1942*10583SEdward.Gillett@Sun.COMLABEL(less32bytes_strncpy_truncation): 1943*10583SEdward.Gillett@Sun.COM xor %ecx, %ecx 1944*10583SEdward.Gillett@Sun.COMLABEL(strncpy_truncation_unaligned): 1945*10583SEdward.Gillett@Sun.COM add %r9, %rsi /* next src char to copy */ 1946*10583SEdward.Gillett@Sun.COMLABEL(strncpy_truncation_aligned): 1947*10583SEdward.Gillett@Sun.COM add %rcx, %rdi 1948*10583SEdward.Gillett@Sun.COM add %rcx, %rsi 1949*10583SEdward.Gillett@Sun.COM add $16, %r8 /* compensation */ 1950*10583SEdward.Gillett@Sun.COM lea -1(%r8), %rcx 1951*10583SEdward.Gillett@Sun.COM lea LABEL(tail_table)(%rip), %r11 1952*10583SEdward.Gillett@Sun.COM movslq (%r11, %rcx, 4), %rcx 1953*10583SEdward.Gillett@Sun.COM lea (%r11, %rcx), %rcx 1954*10583SEdward.Gillett@Sun.COM jmp *%rcx 1955*10583SEdward.Gillett@Sun.COM 1956*10583SEdward.Gillett@Sun.COM .p2align 4 1957*10583SEdward.Gillett@Sun.COMLABEL(strncpy_exitz): 1958*10583SEdward.Gillett@Sun.COM mov %rdi, %rax 1959*10583SEdward.Gillett@Sun.COM ret 19607953SNobutomo.Nakano@Sun.COM#endif 19617953SNobutomo.Nakano@Sun.COM 19620Sstevel@tonic-gate .p2align 4 1963*10583SEdward.Gillett@Sun.COMLABEL(AMD_exit): 1964*10583SEdward.Gillett@Sun.COM test %dl, %dl 1965*10583SEdward.Gillett@Sun.COM jz LABEL(AMD_exit_more_8) 1966*10583SEdward.Gillett@Sun.COM test $0x01, %dl 1967*10583SEdward.Gillett@Sun.COM jnz LABEL(tail_0) 1968*10583SEdward.Gillett@Sun.COM test $0x02, %dl 1969*10583SEdward.Gillett@Sun.COM jnz LABEL(tail_1) 1970*10583SEdward.Gillett@Sun.COM test $0x04, %dl 1971*10583SEdward.Gillett@Sun.COM jnz LABEL(tail_2) 1972*10583SEdward.Gillett@Sun.COM test $0x08, %dl 1973*10583SEdward.Gillett@Sun.COM jnz LABEL(tail_3) 1974*10583SEdward.Gillett@Sun.COM test $0x10, %dl 1975*10583SEdward.Gillett@Sun.COM jnz LABEL(tail_4) 1976*10583SEdward.Gillett@Sun.COM test $0x20, %dl 1977*10583SEdward.Gillett@Sun.COM jnz LABEL(tail_5) 1978*10583SEdward.Gillett@Sun.COM test $0x40, %dl 1979*10583SEdward.Gillett@Sun.COM jnz LABEL(tail_6) 19800Sstevel@tonic-gate 1981*10583SEdward.Gillett@Sun.COM .p2align 4 1982*10583SEdward.Gillett@Sun.COMLABEL(tail_7): /* 8 bytes */ 1983*10583SEdward.Gillett@Sun.COM mov (%rsi), %rcx 1984*10583SEdward.Gillett@Sun.COM mov %rcx, (%rdi) 19850Sstevel@tonic-gate#ifdef USE_AS_STRNCPY 1986*10583SEdward.Gillett@Sun.COM mov $8, %cl 1987*10583SEdward.Gillett@Sun.COM sub $8, %r8 1988*10583SEdward.Gillett@Sun.COM jnz LABEL(strncpy_fill_tail) 19890Sstevel@tonic-gate#endif 1990*10583SEdward.Gillett@Sun.COM ret 19910Sstevel@tonic-gate 19920Sstevel@tonic-gate#ifdef USE_AS_STRNCPY 1993*10583SEdward.Gillett@Sun.COM /* 1994*10583SEdward.Gillett@Sun.COM * Null terminated src string shorter than count. Fill the rest of the 1995*10583SEdward.Gillett@Sun.COM * destination with null chars. 1996*10583SEdward.Gillett@Sun.COM */ 1997*10583SEdward.Gillett@Sun.COM .p2align 4 1998*10583SEdward.Gillett@Sun.COMLABEL(strncpy_fill_tail): 1999*10583SEdward.Gillett@Sun.COM mov %rax, %rdx 2000*10583SEdward.Gillett@Sun.COM movzx %cl, %rax 2001*10583SEdward.Gillett@Sun.COM mov %r8, %rcx 2002*10583SEdward.Gillett@Sun.COM add %rax, %rdi 2003*10583SEdward.Gillett@Sun.COM xor %eax, %eax 2004*10583SEdward.Gillett@Sun.COM shr $3, %ecx 2005*10583SEdward.Gillett@Sun.COM jz LABEL(strncpy_fill_less_8) 20060Sstevel@tonic-gate 2007*10583SEdward.Gillett@Sun.COM rep stosq 2008*10583SEdward.Gillett@Sun.COMLABEL(strncpy_fill_less_8): 2009*10583SEdward.Gillett@Sun.COM mov %r8, %rcx 2010*10583SEdward.Gillett@Sun.COM and $7, %rcx 2011*10583SEdward.Gillett@Sun.COM jz LABEL(strncpy_fill_return) 2012*10583SEdward.Gillett@Sun.COMLABEL(strncpy_fill_less_7): 2013*10583SEdward.Gillett@Sun.COM sub $1, %ecx 2014*10583SEdward.Gillett@Sun.COM mov %al, (%rdi, %rcx) 2015*10583SEdward.Gillett@Sun.COM jnz LABEL(strncpy_fill_less_7) 2016*10583SEdward.Gillett@Sun.COMLABEL(strncpy_fill_return): 2017*10583SEdward.Gillett@Sun.COM mov %rdx, %rax 2018*10583SEdward.Gillett@Sun.COM ret 20190Sstevel@tonic-gate#endif 20200Sstevel@tonic-gate 2021*10583SEdward.Gillett@Sun.COM .p2align 4 2022*10583SEdward.Gillett@Sun.COMLABEL(tail_0): /* 1 byte */ 2023*10583SEdward.Gillett@Sun.COM mov (%rsi), %cl 2024*10583SEdward.Gillett@Sun.COM mov %cl, (%rdi) 2025*10583SEdward.Gillett@Sun.COM#ifdef USE_AS_STRNCPY 2026*10583SEdward.Gillett@Sun.COM mov $1, %cl 2027*10583SEdward.Gillett@Sun.COM sub $1, %r8 2028*10583SEdward.Gillett@Sun.COM jnz LABEL(strncpy_fill_tail) 2029*10583SEdward.Gillett@Sun.COM#endif 2030*10583SEdward.Gillett@Sun.COM ret 20310Sstevel@tonic-gate 2032*10583SEdward.Gillett@Sun.COM .p2align 4 2033*10583SEdward.Gillett@Sun.COMLABEL(tail_1): /* 2 bytes */ 2034*10583SEdward.Gillett@Sun.COM mov (%rsi), %cx 2035*10583SEdward.Gillett@Sun.COM mov %cx, (%rdi) 2036*10583SEdward.Gillett@Sun.COM#ifdef USE_AS_STRNCPY 2037*10583SEdward.Gillett@Sun.COM mov $2, %cl 2038*10583SEdward.Gillett@Sun.COM sub $2, %r8 2039*10583SEdward.Gillett@Sun.COM jnz LABEL(strncpy_fill_tail) 2040*10583SEdward.Gillett@Sun.COM#endif 2041*10583SEdward.Gillett@Sun.COM ret 20420Sstevel@tonic-gate 2043*10583SEdward.Gillett@Sun.COM .p2align 4 2044*10583SEdward.Gillett@Sun.COMLABEL(tail_2): /* 3 bytes */ 2045*10583SEdward.Gillett@Sun.COM mov (%rsi), %cx 2046*10583SEdward.Gillett@Sun.COM mov %cx, (%rdi) 2047*10583SEdward.Gillett@Sun.COM mov 1(%rsi), %cx 2048*10583SEdward.Gillett@Sun.COM mov %cx, 1(%rdi) 2049*10583SEdward.Gillett@Sun.COM#ifdef USE_AS_STRNCPY 2050*10583SEdward.Gillett@Sun.COM mov $3, %cl 2051*10583SEdward.Gillett@Sun.COM sub $3, %r8 2052*10583SEdward.Gillett@Sun.COM jnz LABEL(strncpy_fill_tail) 2053*10583SEdward.Gillett@Sun.COM#endif 2054*10583SEdward.Gillett@Sun.COM ret 20550Sstevel@tonic-gate 2056*10583SEdward.Gillett@Sun.COM .p2align 4 2057*10583SEdward.Gillett@Sun.COMLABEL(tail_3): /* 4 bytes */ 2058*10583SEdward.Gillett@Sun.COM mov (%rsi), %ecx 2059*10583SEdward.Gillett@Sun.COM mov %ecx, (%rdi) 20600Sstevel@tonic-gate#ifdef USE_AS_STRNCPY 2061*10583SEdward.Gillett@Sun.COM mov $4, %cl 2062*10583SEdward.Gillett@Sun.COM sub $4, %r8 2063*10583SEdward.Gillett@Sun.COM jnz LABEL(strncpy_fill_tail) 20640Sstevel@tonic-gate#endif 2065*10583SEdward.Gillett@Sun.COM ret 20660Sstevel@tonic-gate 2067*10583SEdward.Gillett@Sun.COM .p2align 4 2068*10583SEdward.Gillett@Sun.COMLABEL(tail_4): /* 5 bytes */ 2069*10583SEdward.Gillett@Sun.COM mov (%rsi), %ecx 2070*10583SEdward.Gillett@Sun.COM mov %ecx, (%rdi) 2071*10583SEdward.Gillett@Sun.COM mov 1(%rsi), %edx 2072*10583SEdward.Gillett@Sun.COM mov %edx, 1(%rdi) 2073*10583SEdward.Gillett@Sun.COM#ifdef USE_AS_STRNCPY 2074*10583SEdward.Gillett@Sun.COM mov $5, %cl 2075*10583SEdward.Gillett@Sun.COM sub $5, %r8 2076*10583SEdward.Gillett@Sun.COM jnz LABEL(strncpy_fill_tail) 2077*10583SEdward.Gillett@Sun.COM#endif 2078*10583SEdward.Gillett@Sun.COM ret 20790Sstevel@tonic-gate 2080*10583SEdward.Gillett@Sun.COM .p2align 4 2081*10583SEdward.Gillett@Sun.COMLABEL(tail_5): /* 6 bytes */ 2082*10583SEdward.Gillett@Sun.COM mov (%rsi), %ecx 2083*10583SEdward.Gillett@Sun.COM mov %ecx, (%rdi) 2084*10583SEdward.Gillett@Sun.COM mov 2(%rsi), %edx 2085*10583SEdward.Gillett@Sun.COM mov %edx, 2(%rdi) 2086*10583SEdward.Gillett@Sun.COM#ifdef USE_AS_STRNCPY 2087*10583SEdward.Gillett@Sun.COM mov $6, %cl 2088*10583SEdward.Gillett@Sun.COM sub $6, %r8 2089*10583SEdward.Gillett@Sun.COM jnz LABEL(strncpy_fill_tail) 2090*10583SEdward.Gillett@Sun.COM#endif 2091*10583SEdward.Gillett@Sun.COM ret 20920Sstevel@tonic-gate 2093*10583SEdward.Gillett@Sun.COM .p2align 4 2094*10583SEdward.Gillett@Sun.COMLABEL(tail_6): /* 7 bytes */ 2095*10583SEdward.Gillett@Sun.COM mov (%rsi), %ecx 2096*10583SEdward.Gillett@Sun.COM mov %ecx, (%rdi) 2097*10583SEdward.Gillett@Sun.COM mov 3(%rsi), %edx 2098*10583SEdward.Gillett@Sun.COM mov %edx,3(%rdi) 20990Sstevel@tonic-gate#ifdef USE_AS_STRNCPY 2100*10583SEdward.Gillett@Sun.COM mov $7, %cl 2101*10583SEdward.Gillett@Sun.COM sub $7, %r8 2102*10583SEdward.Gillett@Sun.COM jnz LABEL(strncpy_fill_tail) 21030Sstevel@tonic-gate#endif 2104*10583SEdward.Gillett@Sun.COM ret 21050Sstevel@tonic-gate 2106*10583SEdward.Gillett@Sun.COM .p2align 4 2107*10583SEdward.Gillett@Sun.COMLABEL(tail_8): /* 9 bytes */ 2108*10583SEdward.Gillett@Sun.COM mov (%rsi), %rcx 2109*10583SEdward.Gillett@Sun.COM mov %rcx, (%rdi) 2110*10583SEdward.Gillett@Sun.COM mov 5(%rsi), %edx 2111*10583SEdward.Gillett@Sun.COM mov %edx, 5(%rdi) 2112*10583SEdward.Gillett@Sun.COM#ifdef USE_AS_STRNCPY 2113*10583SEdward.Gillett@Sun.COM mov $9, %cl 2114*10583SEdward.Gillett@Sun.COM sub $9, %r8 2115*10583SEdward.Gillett@Sun.COM jnz LABEL(strncpy_fill_tail) 2116*10583SEdward.Gillett@Sun.COM#endif 2117*10583SEdward.Gillett@Sun.COM ret 21180Sstevel@tonic-gate 2119*10583SEdward.Gillett@Sun.COM .p2align 4 2120*10583SEdward.Gillett@Sun.COMLABEL(AMD_exit_more_8): 2121*10583SEdward.Gillett@Sun.COM test %dh, %dh 2122*10583SEdward.Gillett@Sun.COM jz LABEL(AMD_exit_more_16) 2123*10583SEdward.Gillett@Sun.COM test $0x01, %dh 2124*10583SEdward.Gillett@Sun.COM jnz LABEL(tail_8) 2125*10583SEdward.Gillett@Sun.COM test $0x02, %dh 2126*10583SEdward.Gillett@Sun.COM jnz LABEL(tail_9) 2127*10583SEdward.Gillett@Sun.COM test $0x04, %dh 2128*10583SEdward.Gillett@Sun.COM jnz LABEL(tail_10) 2129*10583SEdward.Gillett@Sun.COM test $0x08, %dh 2130*10583SEdward.Gillett@Sun.COM jnz LABEL(tail_11) 2131*10583SEdward.Gillett@Sun.COM test $0x10, %dh 2132*10583SEdward.Gillett@Sun.COM jnz LABEL(tail_12) 2133*10583SEdward.Gillett@Sun.COM test $0x20, %dh 2134*10583SEdward.Gillett@Sun.COM jnz LABEL(tail_13) 2135*10583SEdward.Gillett@Sun.COM test $0x40, %dh 2136*10583SEdward.Gillett@Sun.COM jnz LABEL(tail_14) 21370Sstevel@tonic-gate 2138*10583SEdward.Gillett@Sun.COM .p2align 4 2139*10583SEdward.Gillett@Sun.COMLABEL(tail_15): /* 16 bytes */ 2140*10583SEdward.Gillett@Sun.COM mov (%rsi), %rcx 2141*10583SEdward.Gillett@Sun.COM mov %rcx, (%rdi) 2142*10583SEdward.Gillett@Sun.COM mov 8(%rsi), %rdx 2143*10583SEdward.Gillett@Sun.COM mov %rdx, 8(%rdi) 2144*10583SEdward.Gillett@Sun.COM#ifdef USE_AS_STRNCPY 2145*10583SEdward.Gillett@Sun.COM mov $16, %cl 2146*10583SEdward.Gillett@Sun.COM sub $16, %r8 2147*10583SEdward.Gillett@Sun.COM jnz LABEL(strncpy_fill_tail) 2148*10583SEdward.Gillett@Sun.COM#endif 2149*10583SEdward.Gillett@Sun.COM ret 21500Sstevel@tonic-gate 2151*10583SEdward.Gillett@Sun.COM .p2align 4 2152*10583SEdward.Gillett@Sun.COMLABEL(tail_9): /* 10 bytes */ 2153*10583SEdward.Gillett@Sun.COM mov (%rsi), %rcx 2154*10583SEdward.Gillett@Sun.COM mov %rcx, (%rdi) 2155*10583SEdward.Gillett@Sun.COM mov 6(%rsi), %edx 2156*10583SEdward.Gillett@Sun.COM mov %edx, 6(%rdi) 21570Sstevel@tonic-gate#ifdef USE_AS_STRNCPY 2158*10583SEdward.Gillett@Sun.COM mov $10, %cl 2159*10583SEdward.Gillett@Sun.COM sub $10, %r8 2160*10583SEdward.Gillett@Sun.COM jnz LABEL(strncpy_fill_tail) 21610Sstevel@tonic-gate#endif 2162*10583SEdward.Gillett@Sun.COM ret 21630Sstevel@tonic-gate 2164*10583SEdward.Gillett@Sun.COM .p2align 4 2165*10583SEdward.Gillett@Sun.COMLABEL(tail_10): /* 11 bytes */ 2166*10583SEdward.Gillett@Sun.COM mov (%rsi), %rcx 2167*10583SEdward.Gillett@Sun.COM mov %rcx, (%rdi) 2168*10583SEdward.Gillett@Sun.COM mov 7(%rsi), %edx 2169*10583SEdward.Gillett@Sun.COM mov %edx, 7(%rdi) 2170*10583SEdward.Gillett@Sun.COM#ifdef USE_AS_STRNCPY 2171*10583SEdward.Gillett@Sun.COM mov $11, %cl 2172*10583SEdward.Gillett@Sun.COM sub $11, %r8 2173*10583SEdward.Gillett@Sun.COM jnz LABEL(strncpy_fill_tail) 2174*10583SEdward.Gillett@Sun.COM#endif 2175*10583SEdward.Gillett@Sun.COM ret 21760Sstevel@tonic-gate 2177*10583SEdward.Gillett@Sun.COM .p2align 4 2178*10583SEdward.Gillett@Sun.COMLABEL(tail_11): /* 12 bytes */ 2179*10583SEdward.Gillett@Sun.COM mov (%rsi), %rcx 2180*10583SEdward.Gillett@Sun.COM mov %rcx, (%rdi) 2181*10583SEdward.Gillett@Sun.COM mov 8(%rsi), %edx 2182*10583SEdward.Gillett@Sun.COM mov %edx, 8(%rdi) 2183*10583SEdward.Gillett@Sun.COM#ifdef USE_AS_STRNCPY 2184*10583SEdward.Gillett@Sun.COM mov $12, %cl 2185*10583SEdward.Gillett@Sun.COM sub $12, %r8 2186*10583SEdward.Gillett@Sun.COM jnz LABEL(strncpy_fill_tail) 2187*10583SEdward.Gillett@Sun.COM#endif 2188*10583SEdward.Gillett@Sun.COM ret 21890Sstevel@tonic-gate 2190*10583SEdward.Gillett@Sun.COM .p2align 4 2191*10583SEdward.Gillett@Sun.COMLABEL(tail_12): /* 13 bytes */ 2192*10583SEdward.Gillett@Sun.COM mov (%rsi), %rcx 2193*10583SEdward.Gillett@Sun.COM mov %rcx, (%rdi) 2194*10583SEdward.Gillett@Sun.COM mov 5(%rsi), %rcx 2195*10583SEdward.Gillett@Sun.COM mov %rcx, 5(%rdi) 2196*10583SEdward.Gillett@Sun.COM#ifdef USE_AS_STRNCPY 2197*10583SEdward.Gillett@Sun.COM mov $13, %cl 2198*10583SEdward.Gillett@Sun.COM sub $13, %r8 2199*10583SEdward.Gillett@Sun.COM jnz LABEL(strncpy_fill_tail) 2200*10583SEdward.Gillett@Sun.COM#endif 2201*10583SEdward.Gillett@Sun.COM ret 22020Sstevel@tonic-gate 2203*10583SEdward.Gillett@Sun.COM .p2align 4 2204*10583SEdward.Gillett@Sun.COMLABEL(tail_13): /* 14 bytes */ 2205*10583SEdward.Gillett@Sun.COM mov (%rsi), %rcx 2206*10583SEdward.Gillett@Sun.COM mov %rcx, (%rdi) 2207*10583SEdward.Gillett@Sun.COM mov 6(%rsi), %rcx 2208*10583SEdward.Gillett@Sun.COM mov %rcx, 6(%rdi) 2209*10583SEdward.Gillett@Sun.COM#ifdef USE_AS_STRNCPY 2210*10583SEdward.Gillett@Sun.COM mov $14, %cl 2211*10583SEdward.Gillett@Sun.COM sub $14, %r8 2212*10583SEdward.Gillett@Sun.COM jnz LABEL(strncpy_fill_tail) 2213*10583SEdward.Gillett@Sun.COM#endif 2214*10583SEdward.Gillett@Sun.COM ret 22150Sstevel@tonic-gate 2216*10583SEdward.Gillett@Sun.COM .p2align 4 2217*10583SEdward.Gillett@Sun.COMLABEL(tail_14): /* 15 bytes */ 2218*10583SEdward.Gillett@Sun.COM mov (%rsi), %rcx 2219*10583SEdward.Gillett@Sun.COM mov %rcx, (%rdi) 2220*10583SEdward.Gillett@Sun.COM mov 7(%rsi), %rcx 2221*10583SEdward.Gillett@Sun.COM mov %rcx, 7(%rdi) 22220Sstevel@tonic-gate#ifdef USE_AS_STRNCPY 2223*10583SEdward.Gillett@Sun.COM mov $15, %cl 2224*10583SEdward.Gillett@Sun.COM sub $15, %r8 2225*10583SEdward.Gillett@Sun.COM jnz LABEL(strncpy_fill_tail) 22260Sstevel@tonic-gate#endif 2227*10583SEdward.Gillett@Sun.COM ret 22280Sstevel@tonic-gate 2229*10583SEdward.Gillett@Sun.COM .p2align 4 2230*10583SEdward.Gillett@Sun.COMLABEL(AMD_exit_more_16): 2231*10583SEdward.Gillett@Sun.COM shr $16, %edx 2232*10583SEdward.Gillett@Sun.COM test %dl, %dl 2233*10583SEdward.Gillett@Sun.COM jz LABEL(AMD_exit_more_24) 2234*10583SEdward.Gillett@Sun.COM test $0x01, %dl 2235*10583SEdward.Gillett@Sun.COM jnz LABEL(tail_16) 2236*10583SEdward.Gillett@Sun.COM test $0x02, %dl 2237*10583SEdward.Gillett@Sun.COM jnz LABEL(tail_17) 2238*10583SEdward.Gillett@Sun.COM test $0x04, %dl 2239*10583SEdward.Gillett@Sun.COM jnz LABEL(tail_18) 2240*10583SEdward.Gillett@Sun.COM test $0x08, %dl 2241*10583SEdward.Gillett@Sun.COM jnz LABEL(tail_19) 2242*10583SEdward.Gillett@Sun.COM test $0x10, %dl 2243*10583SEdward.Gillett@Sun.COM jnz LABEL(tail_20) 2244*10583SEdward.Gillett@Sun.COM test $0x20, %dl 2245*10583SEdward.Gillett@Sun.COM jnz LABEL(tail_21) 2246*10583SEdward.Gillett@Sun.COM test $0x40, %dl 2247*10583SEdward.Gillett@Sun.COM jnz LABEL(tail_22) 22480Sstevel@tonic-gate 2249*10583SEdward.Gillett@Sun.COM .p2align 4 2250*10583SEdward.Gillett@Sun.COMLABEL(tail_23): /* 24 bytes */ 2251*10583SEdward.Gillett@Sun.COM mov (%rsi), %rcx 2252*10583SEdward.Gillett@Sun.COM mov %rcx, (%rdi) 2253*10583SEdward.Gillett@Sun.COM mov 8(%rsi), %rdx 2254*10583SEdward.Gillett@Sun.COM mov %rdx, 8(%rdi) 2255*10583SEdward.Gillett@Sun.COM mov 16(%rsi), %rcx 2256*10583SEdward.Gillett@Sun.COM mov %rcx, 16(%rdi) 22570Sstevel@tonic-gate#ifdef USE_AS_STRNCPY 2258*10583SEdward.Gillett@Sun.COM mov $24, %cl 2259*10583SEdward.Gillett@Sun.COM sub $24, %r8 2260*10583SEdward.Gillett@Sun.COM jnz LABEL(strncpy_fill_tail) 22610Sstevel@tonic-gate#endif 2262*10583SEdward.Gillett@Sun.COM ret 22630Sstevel@tonic-gate 2264*10583SEdward.Gillett@Sun.COM .p2align 4 2265*10583SEdward.Gillett@Sun.COMLABEL(tail_16): /* 17 bytes */ 2266*10583SEdward.Gillett@Sun.COM mov (%rsi), %rcx 2267*10583SEdward.Gillett@Sun.COM mov %rcx, (%rdi) 2268*10583SEdward.Gillett@Sun.COM mov 8(%rsi), %rdx 2269*10583SEdward.Gillett@Sun.COM mov %rdx, 8(%rdi) 2270*10583SEdward.Gillett@Sun.COM mov 16(%rsi), %cl 2271*10583SEdward.Gillett@Sun.COM mov %cl, 16(%rdi) 2272*10583SEdward.Gillett@Sun.COM#ifdef USE_AS_STRNCPY 2273*10583SEdward.Gillett@Sun.COM mov $17, %cl 2274*10583SEdward.Gillett@Sun.COM sub $17, %r8 2275*10583SEdward.Gillett@Sun.COM jnz LABEL(strncpy_fill_tail) 2276*10583SEdward.Gillett@Sun.COM#endif 2277*10583SEdward.Gillett@Sun.COM ret 2278*10583SEdward.Gillett@Sun.COM 2279*10583SEdward.Gillett@Sun.COM .p2align 4 2280*10583SEdward.Gillett@Sun.COMLABEL(tail_17): /* 18 bytes */ 2281*10583SEdward.Gillett@Sun.COM mov (%rsi), %rcx 2282*10583SEdward.Gillett@Sun.COM mov %rcx, (%rdi) 2283*10583SEdward.Gillett@Sun.COM mov 8(%rsi), %rdx 2284*10583SEdward.Gillett@Sun.COM mov %rdx, 8(%rdi) 2285*10583SEdward.Gillett@Sun.COM mov 16(%rsi), %cx 2286*10583SEdward.Gillett@Sun.COM mov %cx, 16(%rdi) 2287*10583SEdward.Gillett@Sun.COM#ifdef USE_AS_STRNCPY 2288*10583SEdward.Gillett@Sun.COM mov $18, %cl 2289*10583SEdward.Gillett@Sun.COM sub $18, %r8 2290*10583SEdward.Gillett@Sun.COM jnz LABEL(strncpy_fill_tail) 2291*10583SEdward.Gillett@Sun.COM#endif 2292*10583SEdward.Gillett@Sun.COM ret 22930Sstevel@tonic-gate 2294*10583SEdward.Gillett@Sun.COM .p2align 4 2295*10583SEdward.Gillett@Sun.COMLABEL(tail_18): /* 19 bytes */ 2296*10583SEdward.Gillett@Sun.COM mov (%rsi), %rcx 2297*10583SEdward.Gillett@Sun.COM mov %rcx, (%rdi) 2298*10583SEdward.Gillett@Sun.COM mov 8(%rsi), %rdx 2299*10583SEdward.Gillett@Sun.COM mov %rdx, 8(%rdi) 2300*10583SEdward.Gillett@Sun.COM mov 15(%rsi), %ecx 2301*10583SEdward.Gillett@Sun.COM mov %ecx,15(%rdi) 23020Sstevel@tonic-gate#ifdef USE_AS_STRNCPY 2303*10583SEdward.Gillett@Sun.COM mov $19, %cl 2304*10583SEdward.Gillett@Sun.COM sub $19, %r8 2305*10583SEdward.Gillett@Sun.COM jnz LABEL(strncpy_fill_tail) 2306*10583SEdward.Gillett@Sun.COM#endif 2307*10583SEdward.Gillett@Sun.COM ret 23080Sstevel@tonic-gate 2309*10583SEdward.Gillett@Sun.COM .p2align 4 2310*10583SEdward.Gillett@Sun.COMLABEL(tail_19): /* 20 bytes */ 2311*10583SEdward.Gillett@Sun.COM mov (%rsi), %rcx 2312*10583SEdward.Gillett@Sun.COM mov %rcx, (%rdi) 2313*10583SEdward.Gillett@Sun.COM mov 8(%rsi), %rdx 2314*10583SEdward.Gillett@Sun.COM mov %rdx, 8(%rdi) 2315*10583SEdward.Gillett@Sun.COM mov 16(%rsi), %ecx 2316*10583SEdward.Gillett@Sun.COM mov %ecx, 16(%rdi) 2317*10583SEdward.Gillett@Sun.COM#ifdef USE_AS_STRNCPY 2318*10583SEdward.Gillett@Sun.COM mov $20, %cl 2319*10583SEdward.Gillett@Sun.COM sub $20, %r8 2320*10583SEdward.Gillett@Sun.COM jnz LABEL(strncpy_fill_tail) 23210Sstevel@tonic-gate#endif 2322*10583SEdward.Gillett@Sun.COM ret 23230Sstevel@tonic-gate 2324*10583SEdward.Gillett@Sun.COM .p2align 4 2325*10583SEdward.Gillett@Sun.COMLABEL(tail_20): /* 21 bytes */ 2326*10583SEdward.Gillett@Sun.COM mov (%rsi), %rcx 2327*10583SEdward.Gillett@Sun.COM mov %rcx, (%rdi) 2328*10583SEdward.Gillett@Sun.COM mov 8(%rsi), %rdx 2329*10583SEdward.Gillett@Sun.COM mov %rdx, 8(%rdi) 2330*10583SEdward.Gillett@Sun.COM mov 13(%rsi), %rcx 2331*10583SEdward.Gillett@Sun.COM mov %rcx, 13(%rdi) 2332*10583SEdward.Gillett@Sun.COM#ifdef USE_AS_STRNCPY 2333*10583SEdward.Gillett@Sun.COM mov $21, %cl 2334*10583SEdward.Gillett@Sun.COM sub $21, %r8 2335*10583SEdward.Gillett@Sun.COM jnz LABEL(strncpy_fill_tail) 2336*10583SEdward.Gillett@Sun.COM#endif 2337*10583SEdward.Gillett@Sun.COM ret 23380Sstevel@tonic-gate 2339*10583SEdward.Gillett@Sun.COM .p2align 4 2340*10583SEdward.Gillett@Sun.COMLABEL(tail_21): /* 22 bytes */ 2341*10583SEdward.Gillett@Sun.COM mov (%rsi), %rcx 2342*10583SEdward.Gillett@Sun.COM mov %rcx, (%rdi) 2343*10583SEdward.Gillett@Sun.COM mov 8(%rsi), %rdx 2344*10583SEdward.Gillett@Sun.COM mov %rdx, 8(%rdi) 2345*10583SEdward.Gillett@Sun.COM mov 14(%rsi), %rcx 2346*10583SEdward.Gillett@Sun.COM mov %rcx, 14(%rdi) 23470Sstevel@tonic-gate#ifdef USE_AS_STRNCPY 2348*10583SEdward.Gillett@Sun.COM mov $22, %cl 2349*10583SEdward.Gillett@Sun.COM sub $22, %r8 2350*10583SEdward.Gillett@Sun.COM jnz LABEL(strncpy_fill_tail) 23510Sstevel@tonic-gate#endif 2352*10583SEdward.Gillett@Sun.COM ret 23530Sstevel@tonic-gate 2354*10583SEdward.Gillett@Sun.COM .p2align 4 2355*10583SEdward.Gillett@Sun.COMLABEL(tail_22): /* 23 bytes */ 2356*10583SEdward.Gillett@Sun.COM mov (%rsi), %rcx 2357*10583SEdward.Gillett@Sun.COM mov %rcx, (%rdi) 2358*10583SEdward.Gillett@Sun.COM mov 8(%rsi), %rdx 2359*10583SEdward.Gillett@Sun.COM mov %rdx, 8(%rdi) 2360*10583SEdward.Gillett@Sun.COM mov 15(%rsi), %rcx 2361*10583SEdward.Gillett@Sun.COM mov %rcx, 15(%rdi) 2362*10583SEdward.Gillett@Sun.COM#ifdef USE_AS_STRNCPY 2363*10583SEdward.Gillett@Sun.COM mov $23, %cl 2364*10583SEdward.Gillett@Sun.COM sub $23, %r8 2365*10583SEdward.Gillett@Sun.COM jnz LABEL(strncpy_fill_tail) 2366*10583SEdward.Gillett@Sun.COM#endif 2367*10583SEdward.Gillett@Sun.COM ret 23680Sstevel@tonic-gate 2369*10583SEdward.Gillett@Sun.COM .p2align 4 2370*10583SEdward.Gillett@Sun.COMLABEL(AMD_exit_more_24): 2371*10583SEdward.Gillett@Sun.COM test $0x01, %dh 2372*10583SEdward.Gillett@Sun.COM jnz LABEL(tail_24) 2373*10583SEdward.Gillett@Sun.COM test $0x02, %dh 2374*10583SEdward.Gillett@Sun.COM jnz LABEL(tail_25) 2375*10583SEdward.Gillett@Sun.COM test $0x04, %dh 2376*10583SEdward.Gillett@Sun.COM jnz LABEL(tail_26) 2377*10583SEdward.Gillett@Sun.COM test $0x08, %dh 2378*10583SEdward.Gillett@Sun.COM jnz LABEL(tail_27) 2379*10583SEdward.Gillett@Sun.COM test $0x10, %dh 2380*10583SEdward.Gillett@Sun.COM jnz LABEL(tail_28) 2381*10583SEdward.Gillett@Sun.COM test $0x20, %dh 2382*10583SEdward.Gillett@Sun.COM jnz LABEL(tail_29) 2383*10583SEdward.Gillett@Sun.COM test $0x40, %dh 2384*10583SEdward.Gillett@Sun.COM jnz LABEL(tail_30) 23850Sstevel@tonic-gate 2386*10583SEdward.Gillett@Sun.COM .p2align 4 2387*10583SEdward.Gillett@Sun.COMLABEL(tail_31): /* 32 bytes */ 2388*10583SEdward.Gillett@Sun.COM mov (%rsi), %rcx 2389*10583SEdward.Gillett@Sun.COM mov %rcx, (%rdi) 2390*10583SEdward.Gillett@Sun.COM mov 8(%rsi), %rdx 2391*10583SEdward.Gillett@Sun.COM mov %rdx, 8(%rdi) 2392*10583SEdward.Gillett@Sun.COM mov 16(%rsi), %rcx 2393*10583SEdward.Gillett@Sun.COM mov %rcx, 16(%rdi) 2394*10583SEdward.Gillett@Sun.COM mov 24(%rsi), %rdx 2395*10583SEdward.Gillett@Sun.COM mov %rdx, 24(%rdi) 2396*10583SEdward.Gillett@Sun.COM#ifdef USE_AS_STRNCPY 2397*10583SEdward.Gillett@Sun.COM mov $32, %cl 2398*10583SEdward.Gillett@Sun.COM sub $32, %r8 2399*10583SEdward.Gillett@Sun.COM jnz LABEL(strncpy_fill_tail) 2400*10583SEdward.Gillett@Sun.COM#endif 2401*10583SEdward.Gillett@Sun.COM ret 24020Sstevel@tonic-gate 2403*10583SEdward.Gillett@Sun.COM .p2align 4 2404*10583SEdward.Gillett@Sun.COMLABEL(tail_24): /* 25 bytes */ 2405*10583SEdward.Gillett@Sun.COM mov (%rsi), %rcx 2406*10583SEdward.Gillett@Sun.COM mov %rcx, (%rdi) 2407*10583SEdward.Gillett@Sun.COM mov 8(%rsi), %rdx 2408*10583SEdward.Gillett@Sun.COM mov %rdx, 8(%rdi) 2409*10583SEdward.Gillett@Sun.COM mov 16(%rsi), %rcx 2410*10583SEdward.Gillett@Sun.COM mov %rcx, 16(%rdi) 2411*10583SEdward.Gillett@Sun.COM mov 21(%rsi), %edx 2412*10583SEdward.Gillett@Sun.COM mov %edx, 21(%rdi) 2413*10583SEdward.Gillett@Sun.COM#ifdef USE_AS_STRNCPY 2414*10583SEdward.Gillett@Sun.COM mov $25, %cl 2415*10583SEdward.Gillett@Sun.COM sub $25, %r8 2416*10583SEdward.Gillett@Sun.COM jnz LABEL(strncpy_fill_tail) 2417*10583SEdward.Gillett@Sun.COM#endif 2418*10583SEdward.Gillett@Sun.COM ret 24190Sstevel@tonic-gate 2420*10583SEdward.Gillett@Sun.COM .p2align 4 2421*10583SEdward.Gillett@Sun.COMLABEL(tail_25): /* 26 bytes */ 2422*10583SEdward.Gillett@Sun.COM mov (%rsi), %rcx 2423*10583SEdward.Gillett@Sun.COM mov %rcx, (%rdi) 2424*10583SEdward.Gillett@Sun.COM mov 8(%rsi), %rdx 2425*10583SEdward.Gillett@Sun.COM mov %rdx, 8(%rdi) 2426*10583SEdward.Gillett@Sun.COM mov 16(%rsi), %rcx 2427*10583SEdward.Gillett@Sun.COM mov %rcx, 16(%rdi) 2428*10583SEdward.Gillett@Sun.COM mov 22(%rsi), %edx 2429*10583SEdward.Gillett@Sun.COM mov %edx, 22(%rdi) 24300Sstevel@tonic-gate#ifdef USE_AS_STRNCPY 2431*10583SEdward.Gillett@Sun.COM mov $26, %cl 2432*10583SEdward.Gillett@Sun.COM sub $26, %r8 2433*10583SEdward.Gillett@Sun.COM jnz LABEL(strncpy_fill_tail) 2434*10583SEdward.Gillett@Sun.COM#endif 2435*10583SEdward.Gillett@Sun.COM ret 24360Sstevel@tonic-gate 2437*10583SEdward.Gillett@Sun.COM .p2align 4 2438*10583SEdward.Gillett@Sun.COMLABEL(tail_26): /* 27 bytes */ 2439*10583SEdward.Gillett@Sun.COM mov (%rsi), %rcx 2440*10583SEdward.Gillett@Sun.COM mov %rcx, (%rdi) 2441*10583SEdward.Gillett@Sun.COM mov 8(%rsi), %rdx 2442*10583SEdward.Gillett@Sun.COM mov %rdx, 8(%rdi) 2443*10583SEdward.Gillett@Sun.COM mov 16(%rsi), %rcx 2444*10583SEdward.Gillett@Sun.COM mov %rcx, 16(%rdi) 2445*10583SEdward.Gillett@Sun.COM mov 23(%rsi), %edx 2446*10583SEdward.Gillett@Sun.COM mov %edx, 23(%rdi) 2447*10583SEdward.Gillett@Sun.COM#ifdef USE_AS_STRNCPY 2448*10583SEdward.Gillett@Sun.COM mov $27, %cl 2449*10583SEdward.Gillett@Sun.COM sub $27, %r8 2450*10583SEdward.Gillett@Sun.COM jnz LABEL(strncpy_fill_tail) 24510Sstevel@tonic-gate#endif 2452*10583SEdward.Gillett@Sun.COM ret 24530Sstevel@tonic-gate 2454*10583SEdward.Gillett@Sun.COM .p2align 4 2455*10583SEdward.Gillett@Sun.COMLABEL(tail_27): /* 28 bytes */ 2456*10583SEdward.Gillett@Sun.COM mov (%rsi), %rcx 2457*10583SEdward.Gillett@Sun.COM mov %rcx, (%rdi) 2458*10583SEdward.Gillett@Sun.COM mov 8(%rsi), %rdx 2459*10583SEdward.Gillett@Sun.COM mov %rdx, 8(%rdi) 2460*10583SEdward.Gillett@Sun.COM mov 16(%rsi), %rcx 2461*10583SEdward.Gillett@Sun.COM mov %rcx, 16(%rdi) 2462*10583SEdward.Gillett@Sun.COM mov 24(%rsi), %edx 2463*10583SEdward.Gillett@Sun.COM mov %edx, 24(%rdi) 2464*10583SEdward.Gillett@Sun.COM#ifdef USE_AS_STRNCPY 2465*10583SEdward.Gillett@Sun.COM mov $28, %cl 2466*10583SEdward.Gillett@Sun.COM sub $28, %r8 2467*10583SEdward.Gillett@Sun.COM jnz LABEL(strncpy_fill_tail) 2468*10583SEdward.Gillett@Sun.COM#endif 2469*10583SEdward.Gillett@Sun.COM ret 24700Sstevel@tonic-gate 2471*10583SEdward.Gillett@Sun.COM .p2align 4 2472*10583SEdward.Gillett@Sun.COMLABEL(tail_28): /* 29 bytes */ 2473*10583SEdward.Gillett@Sun.COM mov (%rsi), %rcx 2474*10583SEdward.Gillett@Sun.COM mov %rcx, (%rdi) 2475*10583SEdward.Gillett@Sun.COM mov 8(%rsi), %rdx 2476*10583SEdward.Gillett@Sun.COM mov %rdx, 8(%rdi) 2477*10583SEdward.Gillett@Sun.COM mov 16(%rsi), %rcx 2478*10583SEdward.Gillett@Sun.COM mov %rcx, 16(%rdi) 2479*10583SEdward.Gillett@Sun.COM mov 21(%rsi), %rdx 2480*10583SEdward.Gillett@Sun.COM mov %rdx, 21(%rdi) 2481*10583SEdward.Gillett@Sun.COM#ifdef USE_AS_STRNCPY 2482*10583SEdward.Gillett@Sun.COM mov $29, %cl 2483*10583SEdward.Gillett@Sun.COM sub $29, %r8 2484*10583SEdward.Gillett@Sun.COM jnz LABEL(strncpy_fill_tail) 2485*10583SEdward.Gillett@Sun.COM#endif 2486*10583SEdward.Gillett@Sun.COM ret 24870Sstevel@tonic-gate 2488*10583SEdward.Gillett@Sun.COM .p2align 4 2489*10583SEdward.Gillett@Sun.COMLABEL(tail_29): /* 30 bytes */ 2490*10583SEdward.Gillett@Sun.COM mov (%rsi), %rcx 2491*10583SEdward.Gillett@Sun.COM mov %rcx, (%rdi) 2492*10583SEdward.Gillett@Sun.COM mov 8(%rsi), %rdx 2493*10583SEdward.Gillett@Sun.COM mov %rdx, 8(%rdi) 2494*10583SEdward.Gillett@Sun.COM mov 16(%rsi), %rcx 2495*10583SEdward.Gillett@Sun.COM mov %rcx, 16(%rdi) 2496*10583SEdward.Gillett@Sun.COM mov 22(%rsi), %rdx 2497*10583SEdward.Gillett@Sun.COM mov %rdx, 22(%rdi) 2498*10583SEdward.Gillett@Sun.COM#ifdef USE_AS_STRNCPY 2499*10583SEdward.Gillett@Sun.COM mov $30, %cl 2500*10583SEdward.Gillett@Sun.COM sub $30, %r8 2501*10583SEdward.Gillett@Sun.COM jnz LABEL(strncpy_fill_tail) 2502*10583SEdward.Gillett@Sun.COM#endif 2503*10583SEdward.Gillett@Sun.COM ret 25040Sstevel@tonic-gate 2505*10583SEdward.Gillett@Sun.COM .p2align 4 2506*10583SEdward.Gillett@Sun.COMLABEL(tail_30): /* 31 bytes */ 2507*10583SEdward.Gillett@Sun.COM mov (%rsi), %rcx 2508*10583SEdward.Gillett@Sun.COM mov %rcx, (%rdi) 2509*10583SEdward.Gillett@Sun.COM mov 8(%rsi), %rdx 2510*10583SEdward.Gillett@Sun.COM mov %rdx, 8(%rdi) 2511*10583SEdward.Gillett@Sun.COM mov 16(%rsi), %rcx 2512*10583SEdward.Gillett@Sun.COM mov %rcx, 16(%rdi) 2513*10583SEdward.Gillett@Sun.COM mov 23(%rsi), %rdx 2514*10583SEdward.Gillett@Sun.COM mov %rdx, 23(%rdi) 2515*10583SEdward.Gillett@Sun.COM#ifdef USE_AS_STRNCPY 2516*10583SEdward.Gillett@Sun.COM mov $31, %cl 2517*10583SEdward.Gillett@Sun.COM sub $31, %r8 2518*10583SEdward.Gillett@Sun.COM jnz LABEL(strncpy_fill_tail) 2519*10583SEdward.Gillett@Sun.COM#endif 2520*10583SEdward.Gillett@Sun.COM ret 25210Sstevel@tonic-gate 2522*10583SEdward.Gillett@Sun.COM .pushsection .rodata 2523*10583SEdward.Gillett@Sun.COM .p2align 4 2524*10583SEdward.Gillett@Sun.COMLABEL(tail_table): 2525*10583SEdward.Gillett@Sun.COM .int LABEL(tail_0) - LABEL(tail_table) /* 1 byte */ 2526*10583SEdward.Gillett@Sun.COM .int LABEL(tail_1) - LABEL(tail_table) 2527*10583SEdward.Gillett@Sun.COM .int LABEL(tail_2) - LABEL(tail_table) 2528*10583SEdward.Gillett@Sun.COM .int LABEL(tail_3) - LABEL(tail_table) 2529*10583SEdward.Gillett@Sun.COM .int LABEL(tail_4) - LABEL(tail_table) 2530*10583SEdward.Gillett@Sun.COM .int LABEL(tail_5) - LABEL(tail_table) 2531*10583SEdward.Gillett@Sun.COM .int LABEL(tail_6) - LABEL(tail_table) 2532*10583SEdward.Gillett@Sun.COM .int LABEL(tail_7) - LABEL(tail_table) 2533*10583SEdward.Gillett@Sun.COM .int LABEL(tail_8) - LABEL(tail_table) 2534*10583SEdward.Gillett@Sun.COM .int LABEL(tail_9) - LABEL(tail_table) 2535*10583SEdward.Gillett@Sun.COM .int LABEL(tail_10) - LABEL(tail_table) 2536*10583SEdward.Gillett@Sun.COM .int LABEL(tail_11) - LABEL(tail_table) 2537*10583SEdward.Gillett@Sun.COM .int LABEL(tail_12) - LABEL(tail_table) 2538*10583SEdward.Gillett@Sun.COM .int LABEL(tail_13) - LABEL(tail_table) 2539*10583SEdward.Gillett@Sun.COM .int LABEL(tail_14) - LABEL(tail_table) 2540*10583SEdward.Gillett@Sun.COM .int LABEL(tail_15) - LABEL(tail_table) 2541*10583SEdward.Gillett@Sun.COM .int LABEL(tail_16) - LABEL(tail_table) 2542*10583SEdward.Gillett@Sun.COM .int LABEL(tail_17) - LABEL(tail_table) 2543*10583SEdward.Gillett@Sun.COM .int LABEL(tail_18) - LABEL(tail_table) 2544*10583SEdward.Gillett@Sun.COM .int LABEL(tail_19) - LABEL(tail_table) 2545*10583SEdward.Gillett@Sun.COM .int LABEL(tail_20) - LABEL(tail_table) 2546*10583SEdward.Gillett@Sun.COM .int LABEL(tail_21) - LABEL(tail_table) 2547*10583SEdward.Gillett@Sun.COM .int LABEL(tail_22) - LABEL(tail_table) 2548*10583SEdward.Gillett@Sun.COM .int LABEL(tail_23) - LABEL(tail_table) 2549*10583SEdward.Gillett@Sun.COM .int LABEL(tail_24) - LABEL(tail_table) 2550*10583SEdward.Gillett@Sun.COM .int LABEL(tail_25) - LABEL(tail_table) 2551*10583SEdward.Gillett@Sun.COM .int LABEL(tail_26) - LABEL(tail_table) 2552*10583SEdward.Gillett@Sun.COM .int LABEL(tail_27) - LABEL(tail_table) 2553*10583SEdward.Gillett@Sun.COM .int LABEL(tail_28) - LABEL(tail_table) 2554*10583SEdward.Gillett@Sun.COM .int LABEL(tail_29) - LABEL(tail_table) 2555*10583SEdward.Gillett@Sun.COM .int LABEL(tail_30) - LABEL(tail_table) 2556*10583SEdward.Gillett@Sun.COM .int LABEL(tail_31) - LABEL(tail_table) /* 32 bytes */ 25570Sstevel@tonic-gate 2558*10583SEdward.Gillett@Sun.COM .p2align 4 2559*10583SEdward.Gillett@Sun.COMLABEL(unaligned_table): 2560*10583SEdward.Gillett@Sun.COM .int LABEL(ashr_0) - LABEL(unaligned_table) 2561*10583SEdward.Gillett@Sun.COM .int LABEL(ashr_1) - LABEL(unaligned_table) 2562*10583SEdward.Gillett@Sun.COM .int LABEL(ashr_2) - LABEL(unaligned_table) 2563*10583SEdward.Gillett@Sun.COM .int LABEL(ashr_3) - LABEL(unaligned_table) 2564*10583SEdward.Gillett@Sun.COM .int LABEL(ashr_4) - LABEL(unaligned_table) 2565*10583SEdward.Gillett@Sun.COM .int LABEL(ashr_5) - LABEL(unaligned_table) 2566*10583SEdward.Gillett@Sun.COM .int LABEL(ashr_6) - LABEL(unaligned_table) 2567*10583SEdward.Gillett@Sun.COM .int LABEL(ashr_7) - LABEL(unaligned_table) 2568*10583SEdward.Gillett@Sun.COM .int LABEL(ashr_8) - LABEL(unaligned_table) 2569*10583SEdward.Gillett@Sun.COM .int LABEL(ashr_9) - LABEL(unaligned_table) 2570*10583SEdward.Gillett@Sun.COM .int LABEL(ashr_10) - LABEL(unaligned_table) 2571*10583SEdward.Gillett@Sun.COM .int LABEL(ashr_11) - LABEL(unaligned_table) 2572*10583SEdward.Gillett@Sun.COM .int LABEL(ashr_12) - LABEL(unaligned_table) 2573*10583SEdward.Gillett@Sun.COM .int LABEL(ashr_13) - LABEL(unaligned_table) 2574*10583SEdward.Gillett@Sun.COM .int LABEL(ashr_14) - LABEL(unaligned_table) 2575*10583SEdward.Gillett@Sun.COM .int LABEL(ashr_15) - LABEL(unaligned_table) 2576*10583SEdward.Gillett@Sun.COM .popsection 25770Sstevel@tonic-gate 25780Sstevel@tonic-gate#ifdef USE_AS_STRNCPY 25790Sstevel@tonic-gate SET_SIZE(strncpy) 25800Sstevel@tonic-gate#else 2581*10583SEdward.Gillett@Sun.COM SET_SIZE(strcpy) /* (char *, const char *) */ 25820Sstevel@tonic-gate#endif 2583