xref: /minix3/common/lib/libc/arch/arm/string/memset_arm.S (revision 84d9c625bfea59e274550651111ae9edfdc40fbd)
1*84d9c625SLionel Sambuc/*	$NetBSD: memset_arm.S,v 1.2 2013/01/14 19:15:13 matt Exp $	*/
2*84d9c625SLionel Sambuc
3*84d9c625SLionel Sambuc/*-
4*84d9c625SLionel Sambuc * Copyright (c) 2012 The NetBSD Foundation, Inc.
5*84d9c625SLionel Sambuc * All rights reserved.
6*84d9c625SLionel Sambuc *
7*84d9c625SLionel Sambuc * This code is derived from software contributed to The NetBSD Foundation
8*84d9c625SLionel Sambuc * by Matt Thomas of 3am Software Foundry.
9*84d9c625SLionel Sambuc *
10*84d9c625SLionel Sambuc * Redistribution and use in source and binary forms, with or without
11*84d9c625SLionel Sambuc * modification, are permitted provided that the following conditions
12*84d9c625SLionel Sambuc * are met:
13*84d9c625SLionel Sambuc * 1. Redistributions of source code must retain the above copyright
14*84d9c625SLionel Sambuc *    notice, this list of conditions and the following disclaimer.
15*84d9c625SLionel Sambuc * 2. Redistributions in binary form must reproduce the above copyright
16*84d9c625SLionel Sambuc *    notice, this list of conditions and the following disclaimer in the
17*84d9c625SLionel Sambuc *    documentation and/or other materials provided with the distribution.
18*84d9c625SLionel Sambuc *
19*84d9c625SLionel Sambuc * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20*84d9c625SLionel Sambuc * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21*84d9c625SLionel Sambuc * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22*84d9c625SLionel Sambuc * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23*84d9c625SLionel Sambuc * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24*84d9c625SLionel Sambuc * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25*84d9c625SLionel Sambuc * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26*84d9c625SLionel Sambuc * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27*84d9c625SLionel Sambuc * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28*84d9c625SLionel Sambuc * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29*84d9c625SLionel Sambuc * POSSIBILITY OF SUCH DAMAGE.
30*84d9c625SLionel Sambuc */
31*84d9c625SLionel Sambuc#include <machine/asm.h>
32*84d9c625SLionel Sambuc
33*84d9c625SLionel Sambuc#if defined(NEON)
34*84d9c625SLionel Sambuc#define	STORE8		vst1.32		{d0}, [ip:64]!
35*84d9c625SLionel Sambuc#define	STORE16		vst1.32		{d0-d1}, [ip:64]!
36*84d9c625SLionel Sambuc#define	STORE32		vst1.32		{d0-d3}, [ip:64]!
37*84d9c625SLionel Sambuc#elif defined(VFP)
38*84d9c625SLionel Sambuc#define	STORE8		vstmia		ip!, {d0}
39*84d9c625SLionel Sambuc#define	STORE16		vstmia		ip!, {d0-d1}
40*84d9c625SLionel Sambuc#define	STORE32		vstmia		ip!, {d0-d3}
41*84d9c625SLionel Sambuc#elif defined(_ARM_ARCH_DWORD_OK)
42*84d9c625SLionel Sambuc#define	STORE8		strd		r2, [ip], #8
43*84d9c625SLionel Sambuc#define	STORE16		STORE8; STORE8
44*84d9c625SLionel Sambuc#define	STORE32		STORE16; STORE16
45*84d9c625SLionel Sambuc#else
46*84d9c625SLionel Sambuc#define	STORE8		stmia		ip!, {r2,r3}
47*84d9c625SLionel Sambuc#define	STORE16		STORE8; STORE8
48*84d9c625SLionel Sambuc#define	STORE32		STORE16; STORE16
49*84d9c625SLionel Sambuc#endif
50*84d9c625SLionel Sambuc/*
51*84d9c625SLionel Sambuc * memset: Sets a block of memory to the specified value
52*84d9c625SLionel Sambuc * Using NEON instructions
53*84d9c625SLionel Sambuc *
54*84d9c625SLionel Sambuc * On entry:
55*84d9c625SLionel Sambuc *   r0 - dest address
56*84d9c625SLionel Sambuc *   r1 - byte to write
57*84d9c625SLionel Sambuc *   r2 - number of bytes to write
58*84d9c625SLionel Sambuc *
59*84d9c625SLionel Sambuc * On exit:
60*84d9c625SLionel Sambuc *   r0 - dest address
61*84d9c625SLionel Sambuc */
62*84d9c625SLionel Sambuc/* LINTSTUB: Func: void *memset(void *, int, size_t) */
63*84d9c625SLionel SambucENTRY(memset)
64*84d9c625SLionel Sambuc	ands		r3, r1, #0xff	/* We deal with bytes */
65*84d9c625SLionel Sambuc	orrne		r3, r3, r3, lsl #8	/* replicate to all bytes */
66*84d9c625SLionel Sambuc	orrne		r3, r3, r3, lsl #16	/* replicate to all bytes */
67*84d9c625SLionel Sambuc	movs		r1, r2		/* we need r2 & r3 */
68*84d9c625SLionel Sambuc	RETc(eq)			/* return if length is 0 */
69*84d9c625SLionel Sambuc	mov		ip, r0		/* r0 needs to stay the same */
70*84d9c625SLionel Sambuc
71*84d9c625SLionel Sambuc	cmp		r1, #12		/* is this a small memset? *?
72*84d9c625SLionel Sambuc	blt		.Lbyte_by_byte	/*   then do it byte by byte */
73*84d9c625SLionel Sambuc
74*84d9c625SLionel Sambuc	/* Ok first we will dword align the address */
75*84d9c625SLionel Sambuc	ands		r2, ip, #7	/* grab the bottom three bits */
76*84d9c625SLionel Sambuc	beq		.Lmemset_dwordaligned	/* The addr is dword aligned */
77*84d9c625SLionel Sambuc
78*84d9c625SLionel Sambuc	rsb		r2, r2, #8	/* how far until dword aligned? */
79*84d9c625SLionel Sambuc	sub		r1, r1, r2	/* subtract it from remaining length */
80*84d9c625SLionel Sambuc	mov		r2, r3		/* duplicate fill value */
81*84d9c625SLionel Sambuc
82*84d9c625SLionel Sambuc	tst		ip, #1		/* halfword aligned? */
83*84d9c625SLionel Sambuc	strneb		r3, [ip], #1	/*   no, write a byte */
84*84d9c625SLionel Sambuc	tst		ip, #2		/* word aligned? */
85*84d9c625SLionel Sambuc	strneh		r3, [ip], #2	/*   no, write a halfword */
86*84d9c625SLionel Sambuc	tst		ip, #4		/* dword aligned? */
87*84d9c625SLionel Sambuc	strne		r3, [ip], #4	/*   no, write a word */
88*84d9c625SLionel Sambuc
89*84d9c625SLionel Sambuc	/* We are now doubleword aligned */
90*84d9c625SLionel Sambuc.Lmemset_dwordaligned:
91*84d9c625SLionel Sambuc#if defined(NEON)
92*84d9c625SLionel Sambuc	vdup.8		q0, r3		/* move fill to SIMD */
93*84d9c625SLionel Sambuc	vmov		q1, q0		/* put fill in q1 (d2-d3) */
94*84d9c625SLionel Sambuc#elif defined(VFP)
95*84d9c625SLionel Sambuc	mov		r2, r3		/* duplicate fill value */
96*84d9c625SLionel Sambuc	vmov		d0, r2, r3	/* move to VFP */
97*84d9c625SLionel Sambuc	vmov		d1, r2, r3
98*84d9c625SLionel Sambuc	vmov		d2, r2, r3
99*84d9c625SLionel Sambuc	vmov		d3, r2, r3
100*84d9c625SLionel Sambuc#endif
101*84d9c625SLionel Sambuc
102*84d9c625SLionel Sambuc#if 1
103*84d9c625SLionel Sambuc	cmp		r1, #128
104*84d9c625SLionel Sambuc	blt		.Lmemset_mainloop
105*84d9c625SLionel Sambuc	ands		r2, ip, #63	/* check for 64-byte alignment */
106*84d9c625SLionel Sambuc	beq		.Lmemset_mainloop
107*84d9c625SLionel Sambuc	/*
108*84d9c625SLionel Sambuc	 * Let's align to a 64-byte boundary so that stores don't cross
109*84d9c625SLionel Sambuc	 * cacheline boundaries.  We also know we have at least 128-bytes to
110*84d9c625SLionel Sambuc	 * copy so we don't have to worry about the length at the moment.
111*84d9c625SLionel Sambuc	 */
112*84d9c625SLionel Sambuc	rsb		r2, r2, #64	/* how many bytes until 64 bytes */
113*84d9c625SLionel Sambuc	sub		r1, r1, r2	/* subtract from remaining length */
114*84d9c625SLionel Sambuc#if !defined(NEON) && !defined(VFP)
115*84d9c625SLionel Sambuc	mov		r2, r3		/* put fill back in r2 */
116*84d9c625SLionel Sambuc#endif
117*84d9c625SLionel Sambuc
118*84d9c625SLionel Sambuc	tst		ip, #8		/* quadword aligned? */
119*84d9c625SLionel Sambuc	beq		1f		/*   yes */
120*84d9c625SLionel Sambuc	STORE8				/*   no, store a dword */
121*84d9c625SLionel Sambuc1:	tst		ip, #16		/* octaword aligned? *?
122*84d9c625SLionel Sambuc	beq		2f		/*   yes */
123*84d9c625SLionel Sambuc	STORE16				/*   no, store a quadword */
124*84d9c625SLionel Sambuc2:	tst		ip, #32		/* 32 word aligned? */
125*84d9c625SLionel Sambuc	beq		.Lmemset_mainloop		/*   yes */
126*84d9c625SLionel Sambuc	STORE32				/*   no, make 64-byte aligned */
127*84d9c625SLionel Sambuc#endif
128*84d9c625SLionel Sambuc
129*84d9c625SLionel Sambuc.Lmemset_mainloop:
130*84d9c625SLionel Sambuc#if !defined(NEON) && !defined(VFP)
131*84d9c625SLionel Sambuc	mov		r2, r3		/* put fill back in r2 */
132*84d9c625SLionel Sambuc#endif
133*84d9c625SLionel Sambuc	subs		r1, r1, #64	/* subtract an initial 64 */
134*84d9c625SLionel Sambuc	blt		.Lmemset_lessthan_64bytes
135*84d9c625SLionel Sambuc
136*84d9c625SLionel Sambuc3:	STORE32				/* store first octaword */
137*84d9c625SLionel Sambuc	STORE32				/* store second octaword */
138*84d9c625SLionel Sambuc	RETc(eq)			/* return if done */
139*84d9c625SLionel Sambuc	subs		r1, r1, #64	/* subtract another 64 */
140*84d9c625SLionel Sambuc	bge		3b		/* and do other if still >= 0 */
141*84d9c625SLionel Sambuc.Lmemset_lessthan_64bytes:
142*84d9c625SLionel Sambuc	tst		r1, #32		/* do we have 16 bytes left? */
143*84d9c625SLionel Sambuc	beq		.Lmemset_lessthan_32bytes
144*84d9c625SLionel Sambuc	STORE32				/*    yes, store an octaword */
145*84d9c625SLionel Sambuc	bics		r1, r1, #32	/* subtract 16 */
146*84d9c625SLionel Sambuc	RETc(eq)			/* return if length is 0 */
147*84d9c625SLionel Sambuc.Lmemset_lessthan_32bytes:
148*84d9c625SLionel Sambuc	tst		r1, #16		/* do we have 16 bytes left? */
149*84d9c625SLionel Sambuc	beq		.Lmemset_lessthan_16bytes
150*84d9c625SLionel Sambuc	STORE16				/*   yes, store a quadword */
151*84d9c625SLionel Sambuc	bics		r1, r1, #16	/* subtract 16 */
152*84d9c625SLionel Sambuc	RETc(eq)			/* return if length is 0 */
153*84d9c625SLionel Sambuc.Lmemset_lessthan_16bytes:
154*84d9c625SLionel Sambuc	tst		r1, #8		/* do we have 8 bytes left? */
155*84d9c625SLionel Sambuc	beq		.Lmemset_lessthan_8bytes/*   no */
156*84d9c625SLionel Sambuc	STORE8				/*   yes, store a dword */
157*84d9c625SLionel Sambuc	bics		r1, r1, #8	/* subtract 8 */
158*84d9c625SLionel Sambuc	RETc(eq)			/* return if length is 0 */
159*84d9c625SLionel Sambuc.Lmemset_lessthan_8bytes:
160*84d9c625SLionel Sambuc	tst		r1, #4		/* do we have a word left? */
161*84d9c625SLionel Sambuc	strne		r2, [ip], #4	/*   yes, so write one */
162*84d9c625SLionel Sambuc	tst		r1, #2		/* do we have a halfword left? */
163*84d9c625SLionel Sambuc	strneh		r2, [ip], #2	/*   yes, so write one */
164*84d9c625SLionel Sambuc	tst		r1, #1		/* do we have a byte left? */
165*84d9c625SLionel Sambuc	strneb		r2, [ip], #1	/*   yes, so write one */
166*84d9c625SLionel Sambuc	RET				/* return */
167*84d9c625SLionel Sambuc
168*84d9c625SLionel Sambuc.Lbyte_by_byte:
169*84d9c625SLionel Sambuc	subs		r1, r1, #1	/* can we write a byte? */
170*84d9c625SLionel Sambuc	RETc(lt)			/*   no, we're done */
171*84d9c625SLionel Sambuc	strb		r3, [ip], #1	/*   yes, so do it */
172*84d9c625SLionel Sambuc	b		.Lbyte_by_byte	/* try next byte */
173*84d9c625SLionel SambucEND(memset)
174