1*8ddb146aSEd Maste/* 2*8ddb146aSEd MasteCopyright (c) 2014, Intel Corporation 3*8ddb146aSEd MasteAll rights reserved. 4*8ddb146aSEd Maste 5*8ddb146aSEd MasteRedistribution and use in source and binary forms, with or without 6*8ddb146aSEd Mastemodification, are permitted provided that the following conditions are met: 7*8ddb146aSEd Maste 8*8ddb146aSEd Maste * Redistributions of source code must retain the above copyright notice, 9*8ddb146aSEd Maste * this list of conditions and the following disclaimer. 10*8ddb146aSEd Maste 11*8ddb146aSEd Maste * Redistributions in binary form must reproduce the above copyright notice, 12*8ddb146aSEd Maste * this list of conditions and the following disclaimer in the documentation 13*8ddb146aSEd Maste * and/or other materials provided with the distribution. 14*8ddb146aSEd Maste 15*8ddb146aSEd Maste * Neither the name of Intel Corporation nor the names of its contributors 16*8ddb146aSEd Maste * may be used to endorse or promote products derived from this software 17*8ddb146aSEd Maste * without specific prior written permission. 18*8ddb146aSEd Maste 19*8ddb146aSEd MasteTHIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 20*8ddb146aSEd MasteANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 21*8ddb146aSEd MasteWARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 22*8ddb146aSEd MasteDISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 23*8ddb146aSEd MasteANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 24*8ddb146aSEd Maste(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 25*8ddb146aSEd MasteLOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 26*8ddb146aSEd MasteANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27*8ddb146aSEd Maste(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 28*8ddb146aSEd MasteSOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29*8ddb146aSEd Maste*/ 30*8ddb146aSEd Maste 31*8ddb146aSEd Maste#include <private/bionic_asm.h> 32*8ddb146aSEd Maste 33*8ddb146aSEd Maste#include "cache.h" 34*8ddb146aSEd Maste 35*8ddb146aSEd Maste#ifndef L 36*8ddb146aSEd Maste# define L(label) .L##label 37*8ddb146aSEd Maste#endif 38*8ddb146aSEd Maste 39*8ddb146aSEd Maste#ifndef ALIGN 40*8ddb146aSEd Maste# define ALIGN(n) .p2align n 41*8ddb146aSEd Maste#endif 42*8ddb146aSEd Maste 43*8ddb146aSEd Maste 44*8ddb146aSEd MasteENTRY(__memset_chk_generic) 45*8ddb146aSEd Maste # %rdi = dst, %rsi = byte, %rdx = n, %rcx = dst_len 46*8ddb146aSEd Maste cmp %rcx, %rdx 47*8ddb146aSEd Maste ja __memset_chk_fail 48*8ddb146aSEd Maste // Fall through to memset... 49*8ddb146aSEd MasteEND(__memset_chk_generic) 50*8ddb146aSEd Maste 51*8ddb146aSEd Maste 52*8ddb146aSEd Maste .section .text.sse2,"ax",@progbits 53*8ddb146aSEd MasteENTRY(memset_generic) 54*8ddb146aSEd Maste movq %rdi, %rax 55*8ddb146aSEd Maste and $0xff, %rsi 56*8ddb146aSEd Maste mov $0x0101010101010101, %rcx 57*8ddb146aSEd Maste imul %rsi, %rcx 58*8ddb146aSEd Maste cmpq $16, %rdx 59*8ddb146aSEd Maste jae L(16bytesormore) 60*8ddb146aSEd Maste testb $8, %dl 61*8ddb146aSEd Maste jnz L(8_15bytes) 62*8ddb146aSEd Maste testb $4, %dl 63*8ddb146aSEd Maste jnz L(4_7bytes) 64*8ddb146aSEd Maste testb $2, %dl 65*8ddb146aSEd Maste jnz L(2_3bytes) 66*8ddb146aSEd Maste testb $1, %dl 67*8ddb146aSEd Maste jz L(return) 68*8ddb146aSEd Maste movb %cl, (%rdi) 69*8ddb146aSEd MasteL(return): 70*8ddb146aSEd Maste ret 71*8ddb146aSEd Maste 72*8ddb146aSEd MasteL(8_15bytes): 73*8ddb146aSEd Maste movq %rcx, (%rdi) 74*8ddb146aSEd Maste movq %rcx, -8(%rdi, %rdx) 75*8ddb146aSEd Maste ret 76*8ddb146aSEd Maste 77*8ddb146aSEd MasteL(4_7bytes): 78*8ddb146aSEd Maste movl %ecx, (%rdi) 79*8ddb146aSEd Maste movl %ecx, -4(%rdi, %rdx) 80*8ddb146aSEd Maste ret 81*8ddb146aSEd Maste 82*8ddb146aSEd MasteL(2_3bytes): 83*8ddb146aSEd Maste movw %cx, (%rdi) 84*8ddb146aSEd Maste movw %cx, -2(%rdi, %rdx) 85*8ddb146aSEd Maste ret 86*8ddb146aSEd Maste 87*8ddb146aSEd Maste ALIGN (4) 88*8ddb146aSEd MasteL(16bytesormore): 89*8ddb146aSEd Maste movd %rcx, %xmm0 90*8ddb146aSEd Maste pshufd $0, %xmm0, %xmm0 91*8ddb146aSEd Maste movdqu %xmm0, (%rdi) 92*8ddb146aSEd Maste movdqu %xmm0, -16(%rdi, %rdx) 93*8ddb146aSEd Maste cmpq $32, %rdx 94*8ddb146aSEd Maste jbe L(32bytesless) 95*8ddb146aSEd Maste movdqu %xmm0, 16(%rdi) 96*8ddb146aSEd Maste movdqu %xmm0, -32(%rdi, %rdx) 97*8ddb146aSEd Maste cmpq $64, %rdx 98*8ddb146aSEd Maste jbe L(64bytesless) 99*8ddb146aSEd Maste movdqu %xmm0, 32(%rdi) 100*8ddb146aSEd Maste movdqu %xmm0, 48(%rdi) 101*8ddb146aSEd Maste movdqu %xmm0, -64(%rdi, %rdx) 102*8ddb146aSEd Maste movdqu %xmm0, -48(%rdi, %rdx) 103*8ddb146aSEd Maste cmpq $128, %rdx 104*8ddb146aSEd Maste ja L(128bytesmore) 105*8ddb146aSEd MasteL(32bytesless): 106*8ddb146aSEd MasteL(64bytesless): 107*8ddb146aSEd Maste ret 108*8ddb146aSEd Maste 109*8ddb146aSEd Maste ALIGN (4) 110*8ddb146aSEd MasteL(128bytesmore): 111*8ddb146aSEd Maste leaq 64(%rdi), %rcx 112*8ddb146aSEd Maste andq $-64, %rcx 113*8ddb146aSEd Maste movq %rdx, %r8 114*8ddb146aSEd Maste addq %rdi, %rdx 115*8ddb146aSEd Maste andq $-64, %rdx 116*8ddb146aSEd Maste cmpq %rcx, %rdx 117*8ddb146aSEd Maste je L(return) 118*8ddb146aSEd Maste 119*8ddb146aSEd Maste#ifdef SHARED_CACHE_SIZE 120*8ddb146aSEd Maste cmp $SHARED_CACHE_SIZE, %r8 121*8ddb146aSEd Maste#else 122*8ddb146aSEd Maste cmp __x86_64_shared_cache_size(%rip), %r8 123*8ddb146aSEd Maste#endif 124*8ddb146aSEd Maste ja L(128bytesmore_nt) 125*8ddb146aSEd Maste 126*8ddb146aSEd Maste ALIGN (4) 127*8ddb146aSEd MasteL(128bytesmore_normal): 128*8ddb146aSEd Maste movdqa %xmm0, (%rcx) 129*8ddb146aSEd Maste movaps %xmm0, 0x10(%rcx) 130*8ddb146aSEd Maste movaps %xmm0, 0x20(%rcx) 131*8ddb146aSEd Maste movaps %xmm0, 0x30(%rcx) 132*8ddb146aSEd Maste addq $64, %rcx 133*8ddb146aSEd Maste cmpq %rcx, %rdx 134*8ddb146aSEd Maste jne L(128bytesmore_normal) 135*8ddb146aSEd Maste ret 136*8ddb146aSEd Maste 137*8ddb146aSEd Maste ALIGN (4) 138*8ddb146aSEd MasteL(128bytesmore_nt): 139*8ddb146aSEd Maste movntdq %xmm0, (%rcx) 140*8ddb146aSEd Maste movntdq %xmm0, 0x10(%rcx) 141*8ddb146aSEd Maste movntdq %xmm0, 0x20(%rcx) 142*8ddb146aSEd Maste movntdq %xmm0, 0x30(%rcx) 143*8ddb146aSEd Maste leaq 64(%rcx), %rcx 144*8ddb146aSEd Maste cmpq %rcx, %rdx 145*8ddb146aSEd Maste jne L(128bytesmore_nt) 146*8ddb146aSEd Maste sfence 147*8ddb146aSEd Maste ret 148*8ddb146aSEd Maste 149*8ddb146aSEd MasteEND(memset_generic) 150