xref: /freebsd-src/contrib/bionic-x86_64-string/sse2-memset-slm.S (revision 8ddb146abcdf061be9f2c0db7e391697dafad85c)
1*8ddb146aSEd Maste/*
2*8ddb146aSEd MasteCopyright (c) 2014, Intel Corporation
3*8ddb146aSEd MasteAll rights reserved.
4*8ddb146aSEd Maste
5*8ddb146aSEd MasteRedistribution and use in source and binary forms, with or without
6*8ddb146aSEd Mastemodification, are permitted provided that the following conditions are met:
7*8ddb146aSEd Maste
8*8ddb146aSEd Maste    * Redistributions of source code must retain the above copyright notice,
9*8ddb146aSEd Maste    * this list of conditions and the following disclaimer.
10*8ddb146aSEd Maste
11*8ddb146aSEd Maste    * Redistributions in binary form must reproduce the above copyright notice,
12*8ddb146aSEd Maste    * this list of conditions and the following disclaimer in the documentation
13*8ddb146aSEd Maste    * and/or other materials provided with the distribution.
14*8ddb146aSEd Maste
15*8ddb146aSEd Maste    * Neither the name of Intel Corporation nor the names of its contributors
16*8ddb146aSEd Maste    * may be used to endorse or promote products derived from this software
17*8ddb146aSEd Maste    * without specific prior written permission.
18*8ddb146aSEd Maste
19*8ddb146aSEd MasteTHIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
20*8ddb146aSEd MasteANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
21*8ddb146aSEd MasteWARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22*8ddb146aSEd MasteDISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
23*8ddb146aSEd MasteANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
24*8ddb146aSEd Maste(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25*8ddb146aSEd MasteLOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
26*8ddb146aSEd MasteANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27*8ddb146aSEd Maste(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28*8ddb146aSEd MasteSOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29*8ddb146aSEd Maste*/
30*8ddb146aSEd Maste
31*8ddb146aSEd Maste#include <private/bionic_asm.h>
32*8ddb146aSEd Maste
33*8ddb146aSEd Maste#include "cache.h"
34*8ddb146aSEd Maste
35*8ddb146aSEd Maste#ifndef L
36*8ddb146aSEd Maste# define L(label)	.L##label
37*8ddb146aSEd Maste#endif
38*8ddb146aSEd Maste
39*8ddb146aSEd Maste#ifndef ALIGN
40*8ddb146aSEd Maste# define ALIGN(n)	.p2align n
41*8ddb146aSEd Maste#endif
42*8ddb146aSEd Maste
43*8ddb146aSEd Maste
44*8ddb146aSEd MasteENTRY(__memset_chk_generic)
45*8ddb146aSEd Maste  # %rdi = dst, %rsi = byte, %rdx = n, %rcx = dst_len
46*8ddb146aSEd Maste  cmp %rcx, %rdx
47*8ddb146aSEd Maste  ja __memset_chk_fail
48*8ddb146aSEd Maste  // Fall through to memset...
49*8ddb146aSEd MasteEND(__memset_chk_generic)
50*8ddb146aSEd Maste
51*8ddb146aSEd Maste
52*8ddb146aSEd Maste	.section .text.sse2,"ax",@progbits
53*8ddb146aSEd MasteENTRY(memset_generic)
54*8ddb146aSEd Maste	movq	%rdi, %rax
55*8ddb146aSEd Maste	and	$0xff, %rsi
56*8ddb146aSEd Maste	mov	$0x0101010101010101, %rcx
57*8ddb146aSEd Maste	imul	%rsi, %rcx
58*8ddb146aSEd Maste	cmpq	$16, %rdx
59*8ddb146aSEd Maste	jae	L(16bytesormore)
60*8ddb146aSEd Maste	testb	$8, %dl
61*8ddb146aSEd Maste	jnz	L(8_15bytes)
62*8ddb146aSEd Maste	testb	$4, %dl
63*8ddb146aSEd Maste	jnz	L(4_7bytes)
64*8ddb146aSEd Maste	testb	$2, %dl
65*8ddb146aSEd Maste	jnz	L(2_3bytes)
66*8ddb146aSEd Maste	testb	$1, %dl
67*8ddb146aSEd Maste	jz	L(return)
68*8ddb146aSEd Maste	movb	%cl, (%rdi)
69*8ddb146aSEd MasteL(return):
70*8ddb146aSEd Maste	ret
71*8ddb146aSEd Maste
72*8ddb146aSEd MasteL(8_15bytes):
73*8ddb146aSEd Maste	movq	%rcx, (%rdi)
74*8ddb146aSEd Maste	movq	%rcx, -8(%rdi, %rdx)
75*8ddb146aSEd Maste	ret
76*8ddb146aSEd Maste
77*8ddb146aSEd MasteL(4_7bytes):
78*8ddb146aSEd Maste	movl	%ecx, (%rdi)
79*8ddb146aSEd Maste	movl	%ecx, -4(%rdi, %rdx)
80*8ddb146aSEd Maste	ret
81*8ddb146aSEd Maste
82*8ddb146aSEd MasteL(2_3bytes):
83*8ddb146aSEd Maste	movw	%cx, (%rdi)
84*8ddb146aSEd Maste	movw	%cx, -2(%rdi, %rdx)
85*8ddb146aSEd Maste	ret
86*8ddb146aSEd Maste
87*8ddb146aSEd Maste	ALIGN (4)
88*8ddb146aSEd MasteL(16bytesormore):
89*8ddb146aSEd Maste	movd	%rcx, %xmm0
90*8ddb146aSEd Maste	pshufd	$0, %xmm0, %xmm0
91*8ddb146aSEd Maste	movdqu	%xmm0, (%rdi)
92*8ddb146aSEd Maste	movdqu	%xmm0, -16(%rdi, %rdx)
93*8ddb146aSEd Maste	cmpq	$32, %rdx
94*8ddb146aSEd Maste	jbe	L(32bytesless)
95*8ddb146aSEd Maste	movdqu	%xmm0, 16(%rdi)
96*8ddb146aSEd Maste	movdqu	%xmm0, -32(%rdi, %rdx)
97*8ddb146aSEd Maste	cmpq	$64, %rdx
98*8ddb146aSEd Maste	jbe	L(64bytesless)
99*8ddb146aSEd Maste	movdqu	%xmm0, 32(%rdi)
100*8ddb146aSEd Maste	movdqu	%xmm0, 48(%rdi)
101*8ddb146aSEd Maste	movdqu	%xmm0, -64(%rdi, %rdx)
102*8ddb146aSEd Maste	movdqu	%xmm0, -48(%rdi, %rdx)
103*8ddb146aSEd Maste	cmpq	$128, %rdx
104*8ddb146aSEd Maste	ja	L(128bytesmore)
105*8ddb146aSEd MasteL(32bytesless):
106*8ddb146aSEd MasteL(64bytesless):
107*8ddb146aSEd Maste	ret
108*8ddb146aSEd Maste
109*8ddb146aSEd Maste	ALIGN (4)
110*8ddb146aSEd MasteL(128bytesmore):
111*8ddb146aSEd Maste	leaq	64(%rdi), %rcx
112*8ddb146aSEd Maste	andq	$-64, %rcx
113*8ddb146aSEd Maste	movq	%rdx, %r8
114*8ddb146aSEd Maste	addq	%rdi, %rdx
115*8ddb146aSEd Maste	andq	$-64, %rdx
116*8ddb146aSEd Maste	cmpq	%rcx, %rdx
117*8ddb146aSEd Maste	je	L(return)
118*8ddb146aSEd Maste
119*8ddb146aSEd Maste#ifdef SHARED_CACHE_SIZE
120*8ddb146aSEd Maste	cmp	$SHARED_CACHE_SIZE, %r8
121*8ddb146aSEd Maste#else
122*8ddb146aSEd Maste	cmp	__x86_64_shared_cache_size(%rip), %r8
123*8ddb146aSEd Maste#endif
124*8ddb146aSEd Maste	ja	L(128bytesmore_nt)
125*8ddb146aSEd Maste
126*8ddb146aSEd Maste	ALIGN (4)
127*8ddb146aSEd MasteL(128bytesmore_normal):
128*8ddb146aSEd Maste	movdqa	%xmm0, (%rcx)
129*8ddb146aSEd Maste	movaps	%xmm0, 0x10(%rcx)
130*8ddb146aSEd Maste	movaps	%xmm0, 0x20(%rcx)
131*8ddb146aSEd Maste	movaps	%xmm0, 0x30(%rcx)
132*8ddb146aSEd Maste	addq	$64, %rcx
133*8ddb146aSEd Maste	cmpq	%rcx, %rdx
134*8ddb146aSEd Maste	jne	L(128bytesmore_normal)
135*8ddb146aSEd Maste	ret
136*8ddb146aSEd Maste
137*8ddb146aSEd Maste	ALIGN (4)
138*8ddb146aSEd MasteL(128bytesmore_nt):
139*8ddb146aSEd Maste	movntdq	%xmm0, (%rcx)
140*8ddb146aSEd Maste	movntdq	%xmm0, 0x10(%rcx)
141*8ddb146aSEd Maste	movntdq	%xmm0, 0x20(%rcx)
142*8ddb146aSEd Maste	movntdq	%xmm0, 0x30(%rcx)
143*8ddb146aSEd Maste	leaq	64(%rcx), %rcx
144*8ddb146aSEd Maste	cmpq	%rcx, %rdx
145*8ddb146aSEd Maste	jne	L(128bytesmore_nt)
146*8ddb146aSEd Maste	sfence
147*8ddb146aSEd Maste	ret
148*8ddb146aSEd Maste
149*8ddb146aSEd MasteEND(memset_generic)
150