1*8ddb146aSEd Maste/* 2*8ddb146aSEd MasteCopyright (c) 2014, Intel Corporation 3*8ddb146aSEd MasteAll rights reserved. 4*8ddb146aSEd Maste 5*8ddb146aSEd MasteRedistribution and use in source and binary forms, with or without 6*8ddb146aSEd Mastemodification, are permitted provided that the following conditions are met: 7*8ddb146aSEd Maste 8*8ddb146aSEd Maste * Redistributions of source code must retain the above copyright notice, 9*8ddb146aSEd Maste * this list of conditions and the following disclaimer. 10*8ddb146aSEd Maste 11*8ddb146aSEd Maste * Redistributions in binary form must reproduce the above copyright notice, 12*8ddb146aSEd Maste * this list of conditions and the following disclaimer in the documentation 13*8ddb146aSEd Maste * and/or other materials provided with the distribution. 14*8ddb146aSEd Maste 15*8ddb146aSEd Maste * Neither the name of Intel Corporation nor the names of its contributors 16*8ddb146aSEd Maste * may be used to endorse or promote products derived from this software 17*8ddb146aSEd Maste * without specific prior written permission. 18*8ddb146aSEd Maste 19*8ddb146aSEd MasteTHIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 20*8ddb146aSEd MasteANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 21*8ddb146aSEd MasteWARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 22*8ddb146aSEd MasteDISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 23*8ddb146aSEd MasteANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 24*8ddb146aSEd Maste(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 25*8ddb146aSEd MasteLOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 26*8ddb146aSEd MasteANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27*8ddb146aSEd Maste(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 28*8ddb146aSEd MasteSOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29*8ddb146aSEd Maste*/ 30*8ddb146aSEd Maste 31*8ddb146aSEd Maste#include <private/bionic_asm.h> 32*8ddb146aSEd Maste 33*8ddb146aSEd Maste#include "cache.h" 34*8ddb146aSEd Maste 35*8ddb146aSEd Maste#ifndef L 36*8ddb146aSEd Maste# define L(label) .L##label 37*8ddb146aSEd Maste#endif 38*8ddb146aSEd Maste 39*8ddb146aSEd Maste#ifndef ALIGN 40*8ddb146aSEd Maste# define ALIGN(n) .p2align n 41*8ddb146aSEd Maste#endif 42*8ddb146aSEd Maste 43*8ddb146aSEd Maste .section .text.avx2,"ax",@progbits 44*8ddb146aSEd Maste 45*8ddb146aSEd MasteENTRY(__memset_chk_avx2) 46*8ddb146aSEd Maste # %rdi = dst, %rsi = byte, %rdx = n, %rcx = dst_len 47*8ddb146aSEd Maste cmp %rcx, %rdx 48*8ddb146aSEd Maste ja __memset_chk_fail 49*8ddb146aSEd Maste // Fall through to memset... 50*8ddb146aSEd MasteEND(__memset_chk_avx2) 51*8ddb146aSEd Maste 52*8ddb146aSEd MasteENTRY(memset_avx2) 53*8ddb146aSEd Maste movq %rdi, %rax 54*8ddb146aSEd Maste and $0xff, %rsi 55*8ddb146aSEd Maste mov $0x0101010101010101, %rcx 56*8ddb146aSEd Maste imul %rsi, %rcx 57*8ddb146aSEd Maste cmpq $16, %rdx 58*8ddb146aSEd Maste jae L(16bytesormore) 59*8ddb146aSEd Maste testb $8, %dl 60*8ddb146aSEd Maste jnz L(8_15bytes) 61*8ddb146aSEd Maste testb $4, %dl 62*8ddb146aSEd Maste jnz L(4_7bytes) 63*8ddb146aSEd Maste testb $2, %dl 64*8ddb146aSEd Maste jnz L(2_3bytes) 65*8ddb146aSEd Maste testb $1, %dl 66*8ddb146aSEd Maste jz L(return) 67*8ddb146aSEd Maste movb %cl, (%rdi) 68*8ddb146aSEd MasteL(return): 69*8ddb146aSEd Maste ret 70*8ddb146aSEd Maste 71*8ddb146aSEd MasteL(8_15bytes): 72*8ddb146aSEd Maste movq %rcx, (%rdi) 73*8ddb146aSEd Maste movq %rcx, -8(%rdi, %rdx) 74*8ddb146aSEd Maste ret 75*8ddb146aSEd Maste 76*8ddb146aSEd MasteL(4_7bytes): 77*8ddb146aSEd Maste movl %ecx, (%rdi) 78*8ddb146aSEd Maste movl %ecx, -4(%rdi, %rdx) 79*8ddb146aSEd Maste ret 80*8ddb146aSEd Maste 81*8ddb146aSEd MasteL(2_3bytes): 82*8ddb146aSEd Maste movw %cx, (%rdi) 83*8ddb146aSEd Maste movw %cx, -2(%rdi, %rdx) 84*8ddb146aSEd Maste ret 85*8ddb146aSEd Maste 86*8ddb146aSEd Maste ALIGN (4) 87*8ddb146aSEd MasteL(16bytesormore): 88*8ddb146aSEd Maste movd %rcx, %xmm0 89*8ddb146aSEd Maste pshufd $0, %xmm0, %xmm0 90*8ddb146aSEd Maste movdqu %xmm0, (%rdi) 91*8ddb146aSEd Maste movdqu %xmm0, -16(%rdi, %rdx) 92*8ddb146aSEd Maste cmpq $32, %rdx 93*8ddb146aSEd Maste jbe L(32bytesless) 94*8ddb146aSEd Maste movdqu %xmm0, 16(%rdi) 95*8ddb146aSEd Maste movdqu %xmm0, -32(%rdi, %rdx) 96*8ddb146aSEd Maste cmpq $64, %rdx 97*8ddb146aSEd Maste jbe L(64bytesless) 98*8ddb146aSEd Maste movdqu %xmm0, 32(%rdi) 99*8ddb146aSEd Maste movdqu %xmm0, 48(%rdi) 100*8ddb146aSEd Maste movdqu %xmm0, -64(%rdi, %rdx) 101*8ddb146aSEd Maste movdqu %xmm0, -48(%rdi, %rdx) 102*8ddb146aSEd Maste cmpq $128, %rdx 103*8ddb146aSEd Maste jbe L(128bytesless) 104*8ddb146aSEd Maste vpbroadcastb %xmm0, %ymm0 105*8ddb146aSEd Maste vmovdqu %ymm0, 64(%rdi) 106*8ddb146aSEd Maste vmovdqu %ymm0, 96(%rdi) 107*8ddb146aSEd Maste vmovdqu %ymm0, -128(%rdi, %rdx) 108*8ddb146aSEd Maste vmovdqu %ymm0, -96(%rdi, %rdx) 109*8ddb146aSEd Maste cmpq $256, %rdx 110*8ddb146aSEd Maste ja L(256bytesmore) 111*8ddb146aSEd MasteL(32bytesless): 112*8ddb146aSEd MasteL(64bytesless): 113*8ddb146aSEd MasteL(128bytesless): 114*8ddb146aSEd Maste ret 115*8ddb146aSEd Maste 116*8ddb146aSEd Maste ALIGN (4) 117*8ddb146aSEd MasteL(256bytesmore): 118*8ddb146aSEd Maste leaq 128(%rdi), %rcx 119*8ddb146aSEd Maste andq $-128, %rcx 120*8ddb146aSEd Maste movq %rdx, %r8 121*8ddb146aSEd Maste addq %rdi, %rdx 122*8ddb146aSEd Maste andq $-128, %rdx 123*8ddb146aSEd Maste cmpq %rcx, %rdx 124*8ddb146aSEd Maste je L(return) 125*8ddb146aSEd Maste 126*8ddb146aSEd Maste#ifdef SHARED_CACHE_SIZE 127*8ddb146aSEd Maste cmp $SHARED_CACHE_SIZE, %r8 128*8ddb146aSEd Maste#else 129*8ddb146aSEd Maste cmp __x86_64_shared_cache_size(%rip), %r8 130*8ddb146aSEd Maste#endif 131*8ddb146aSEd Maste ja L(256bytesmore_nt) 132*8ddb146aSEd Maste 133*8ddb146aSEd Maste ALIGN (4) 134*8ddb146aSEd MasteL(256bytesmore_normal): 135*8ddb146aSEd Maste vmovdqa %ymm0, (%rcx) 136*8ddb146aSEd Maste vmovdqa %ymm0, 32(%rcx) 137*8ddb146aSEd Maste vmovdqa %ymm0, 64(%rcx) 138*8ddb146aSEd Maste vmovdqa %ymm0, 96(%rcx) 139*8ddb146aSEd Maste addq $128, %rcx 140*8ddb146aSEd Maste cmpq %rcx, %rdx 141*8ddb146aSEd Maste jne L(256bytesmore_normal) 142*8ddb146aSEd Maste ret 143*8ddb146aSEd Maste 144*8ddb146aSEd Maste ALIGN (4) 145*8ddb146aSEd MasteL(256bytesmore_nt): 146*8ddb146aSEd Maste movntdq %xmm0, (%rcx) 147*8ddb146aSEd Maste movntdq %xmm0, 16(%rcx) 148*8ddb146aSEd Maste movntdq %xmm0, 32(%rcx) 149*8ddb146aSEd Maste movntdq %xmm0, 48(%rcx) 150*8ddb146aSEd Maste movntdq %xmm0, 64(%rcx) 151*8ddb146aSEd Maste movntdq %xmm0, 80(%rcx) 152*8ddb146aSEd Maste movntdq %xmm0, 96(%rcx) 153*8ddb146aSEd Maste movntdq %xmm0, 112(%rcx) 154*8ddb146aSEd Maste leaq 128(%rcx), %rcx 155*8ddb146aSEd Maste cmpq %rcx, %rdx 156*8ddb146aSEd Maste jne L(256bytesmore_nt) 157*8ddb146aSEd Maste sfence 158*8ddb146aSEd Maste ret 159*8ddb146aSEd Maste 160*8ddb146aSEd MasteEND(memset_avx2) 161