xref: /minix3/common/lib/libc/arch/arm/string/memset_arm.S (revision 84d9c625bfea59e274550651111ae9edfdc40fbd)
1/*	$NetBSD: memset_arm.S,v 1.2 2013/01/14 19:15:13 matt Exp $	*/
2
3/*-
4 * Copyright (c) 2012 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Matt Thomas of 3am Software Foundry.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31#include <machine/asm.h>
32
33#if defined(NEON)
34#define	STORE8		vst1.32		{d0}, [ip:64]!
35#define	STORE16		vst1.32		{d0-d1}, [ip:64]!
36#define	STORE32		vst1.32		{d0-d3}, [ip:64]!
37#elif defined(VFP)
38#define	STORE8		vstmia		ip!, {d0}
39#define	STORE16		vstmia		ip!, {d0-d1}
40#define	STORE32		vstmia		ip!, {d0-d3}
41#elif defined(_ARM_ARCH_DWORD_OK)
42#define	STORE8		strd		r2, [ip], #8
43#define	STORE16		STORE8; STORE8
44#define	STORE32		STORE16; STORE16
45#else
46#define	STORE8		stmia		ip!, {r2,r3}
47#define	STORE16		STORE8; STORE8
48#define	STORE32		STORE16; STORE16
49#endif
50/*
51 * memset: Sets a block of memory to the specified value
52 * Using NEON instructions
53 *
54 * On entry:
55 *   r0 - dest address
56 *   r1 - byte to write
57 *   r2 - number of bytes to write
58 *
59 * On exit:
60 *   r0 - dest address
61 */
62/* LINTSTUB: Func: void *memset(void *, int, size_t) */
63ENTRY(memset)
64	ands		r3, r1, #0xff	/* We deal with bytes */
65	orrne		r3, r3, r3, lsl #8	/* replicate to all bytes */
66	orrne		r3, r3, r3, lsl #16	/* replicate to all bytes */
67	movs		r1, r2		/* we need r2 & r3 */
68	RETc(eq)			/* return if length is 0 */
69	mov		ip, r0		/* r0 needs to stay the same */
70
71	cmp		r1, #12		/* is this a small memset? *?
72	blt		.Lbyte_by_byte	/*   then do it byte by byte */
73
74	/* Ok first we will dword align the address */
75	ands		r2, ip, #7	/* grab the bottom three bits */
76	beq		.Lmemset_dwordaligned	/* The addr is dword aligned */
77
78	rsb		r2, r2, #8	/* how far until dword aligned? */
79	sub		r1, r1, r2	/* subtract it from remaining length */
80	mov		r2, r3		/* duplicate fill value */
81
82	tst		ip, #1		/* halfword aligned? */
83	strneb		r3, [ip], #1	/*   no, write a byte */
84	tst		ip, #2		/* word aligned? */
85	strneh		r3, [ip], #2	/*   no, write a halfword */
86	tst		ip, #4		/* dword aligned? */
87	strne		r3, [ip], #4	/*   no, write a word */
88
89	/* We are now doubleword aligned */
90.Lmemset_dwordaligned:
91#if defined(NEON)
92	vdup.8		q0, r3		/* move fill to SIMD */
93	vmov		q1, q0		/* put fill in q1 (d2-d3) */
94#elif defined(VFP)
95	mov		r2, r3		/* duplicate fill value */
96	vmov		d0, r2, r3	/* move to VFP */
97	vmov		d1, r2, r3
98	vmov		d2, r2, r3
99	vmov		d3, r2, r3
100#endif
101
102#if 1
103	cmp		r1, #128
104	blt		.Lmemset_mainloop
105	ands		r2, ip, #63	/* check for 64-byte alignment */
106	beq		.Lmemset_mainloop
107	/*
108	 * Let's align to a 64-byte boundary so that stores don't cross
109	 * cacheline boundaries.  We also know we have at least 128-bytes to
110	 * copy so we don't have to worry about the length at the moment.
111	 */
112	rsb		r2, r2, #64	/* how many bytes until 64 bytes */
113	sub		r1, r1, r2	/* subtract from remaining length */
114#if !defined(NEON) && !defined(VFP)
115	mov		r2, r3		/* put fill back in r2 */
116#endif
117
118	tst		ip, #8		/* quadword aligned? */
119	beq		1f		/*   yes */
120	STORE8				/*   no, store a dword */
1211:	tst		ip, #16		/* octaword aligned? *?
122	beq		2f		/*   yes */
123	STORE16				/*   no, store a quadword */
1242:	tst		ip, #32		/* 32 word aligned? */
125	beq		.Lmemset_mainloop		/*   yes */
126	STORE32				/*   no, make 64-byte aligned */
127#endif
128
129.Lmemset_mainloop:
130#if !defined(NEON) && !defined(VFP)
131	mov		r2, r3		/* put fill back in r2 */
132#endif
133	subs		r1, r1, #64	/* subtract an initial 64 */
134	blt		.Lmemset_lessthan_64bytes
135
1363:	STORE32				/* store first octaword */
137	STORE32				/* store second octaword */
138	RETc(eq)			/* return if done */
139	subs		r1, r1, #64	/* subtract another 64 */
140	bge		3b		/* and do other if still >= 0 */
141.Lmemset_lessthan_64bytes:
142	tst		r1, #32		/* do we have 16 bytes left? */
143	beq		.Lmemset_lessthan_32bytes
144	STORE32				/*    yes, store an octaword */
145	bics		r1, r1, #32	/* subtract 16 */
146	RETc(eq)			/* return if length is 0 */
147.Lmemset_lessthan_32bytes:
148	tst		r1, #16		/* do we have 16 bytes left? */
149	beq		.Lmemset_lessthan_16bytes
150	STORE16				/*   yes, store a quadword */
151	bics		r1, r1, #16	/* subtract 16 */
152	RETc(eq)			/* return if length is 0 */
153.Lmemset_lessthan_16bytes:
154	tst		r1, #8		/* do we have 8 bytes left? */
155	beq		.Lmemset_lessthan_8bytes/*   no */
156	STORE8				/*   yes, store a dword */
157	bics		r1, r1, #8	/* subtract 8 */
158	RETc(eq)			/* return if length is 0 */
159.Lmemset_lessthan_8bytes:
160	tst		r1, #4		/* do we have a word left? */
161	strne		r2, [ip], #4	/*   yes, so write one */
162	tst		r1, #2		/* do we have a halfword left? */
163	strneh		r2, [ip], #2	/*   yes, so write one */
164	tst		r1, #1		/* do we have a byte left? */
165	strneb		r2, [ip], #1	/*   yes, so write one */
166	RET				/* return */
167
168.Lbyte_by_byte:
169	subs		r1, r1, #1	/* can we write a byte? */
170	RETc(lt)			/*   no, we're done */
171	strb		r3, [ip], #1	/*   yes, so do it */
172	b		.Lbyte_by_byte	/* try next byte */
173END(memset)
174