xref: /minix3/common/lib/libc/arch/arm/string/memcpy_arm.S (revision 0a6a1f1d05b60e214de2f05a7310ddd1f0e590e7)
1*0a6a1f1dSLionel Sambuc/*	$NetBSD: memcpy_arm.S,v 1.5 2013/12/02 21:21:33 joerg Exp $	*/
2b6cbf720SGianluca Guida
3b6cbf720SGianluca Guida/*-
4b6cbf720SGianluca Guida * Copyright (c) 1997 The NetBSD Foundation, Inc.
5b6cbf720SGianluca Guida * All rights reserved.
6b6cbf720SGianluca Guida *
7b6cbf720SGianluca Guida * This code is derived from software contributed to The NetBSD Foundation
8b6cbf720SGianluca Guida * by Neil A. Carson and Mark Brinicombe
9b6cbf720SGianluca Guida *
10b6cbf720SGianluca Guida * Redistribution and use in source and binary forms, with or without
11b6cbf720SGianluca Guida * modification, are permitted provided that the following conditions
12b6cbf720SGianluca Guida * are met:
13b6cbf720SGianluca Guida * 1. Redistributions of source code must retain the above copyright
14b6cbf720SGianluca Guida *    notice, this list of conditions and the following disclaimer.
15b6cbf720SGianluca Guida * 2. Redistributions in binary form must reproduce the above copyright
16b6cbf720SGianluca Guida *    notice, this list of conditions and the following disclaimer in the
17b6cbf720SGianluca Guida *    documentation and/or other materials provided with the distribution.
18b6cbf720SGianluca Guida *
19b6cbf720SGianluca Guida * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20b6cbf720SGianluca Guida * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21b6cbf720SGianluca Guida * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22b6cbf720SGianluca Guida * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23b6cbf720SGianluca Guida * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24b6cbf720SGianluca Guida * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25b6cbf720SGianluca Guida * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26b6cbf720SGianluca Guida * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27b6cbf720SGianluca Guida * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28b6cbf720SGianluca Guida * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29b6cbf720SGianluca Guida * POSSIBILITY OF SUCH DAMAGE.
30b6cbf720SGianluca Guida */
31b6cbf720SGianluca Guida
32b6cbf720SGianluca Guida#include <machine/asm.h>
33b6cbf720SGianluca Guida
34b6cbf720SGianluca Guida/*
35b6cbf720SGianluca Guida * This is one fun bit of code ...
36b6cbf720SGianluca Guida * Some easy listening music is suggested while trying to understand this
37b6cbf720SGianluca Guida * code e.g. Iron Maiden
38b6cbf720SGianluca Guida *
39b6cbf720SGianluca Guida * For anyone attempting to understand it :
40b6cbf720SGianluca Guida *
41b6cbf720SGianluca Guida * The core code is implemented here with simple stubs for memcpy().
42b6cbf720SGianluca Guida *
43b6cbf720SGianluca Guida * All local labels are prefixed with Lmemcpy_
44b6cbf720SGianluca Guida * Following the prefix a label starting f is used in the forward copy code
45b6cbf720SGianluca Guida * while a label using b is used in the backwards copy code
46b6cbf720SGianluca Guida * The source and destination addresses determine whether a forward or
47b6cbf720SGianluca Guida * backward copy is performed.
48b6cbf720SGianluca Guida * Separate bits of code are used to deal with the following situations
49b6cbf720SGianluca Guida * for both the forward and backwards copy.
50b6cbf720SGianluca Guida * unaligned source address
51b6cbf720SGianluca Guida * unaligned destination address
52b6cbf720SGianluca Guida * Separate copy routines are used to produce an optimised result for each
53b6cbf720SGianluca Guida * of these cases.
54b6cbf720SGianluca Guida * The copy code will use LDM/STM instructions to copy up to 32 bytes at
55b6cbf720SGianluca Guida * a time where possible.
56b6cbf720SGianluca Guida *
57b6cbf720SGianluca Guida * Note: r12 (aka ip) can be trashed during the function along with
58b6cbf720SGianluca Guida * r0-r3 although r0-r2 have defined uses i.e. src, dest, len through out.
59b6cbf720SGianluca Guida * Additional registers are preserved prior to use i.e. r4, r5 & lr
60b6cbf720SGianluca Guida *
61b6cbf720SGianluca Guida * Apologies for the state of the comments ;-)
62b6cbf720SGianluca Guida */
63b6cbf720SGianluca Guida/* LINTSTUB: Func: void *memcpy(void *dst, const void *src, size_t len) */
64b6cbf720SGianluca GuidaENTRY(memcpy)
65b6cbf720SGianluca Guida	/* save leaf functions having to store this away */
6684d9c625SLionel Sambuc	push	{r0, lr}		/* memcpy() returns dest addr */
67b6cbf720SGianluca Guida
68b6cbf720SGianluca Guida	subs	r2, r2, #4
69b6cbf720SGianluca Guida	blt	.Lmemcpy_l4		/* less than 4 bytes */
70b6cbf720SGianluca Guida	ands	r12, r0, #3
71b6cbf720SGianluca Guida	bne	.Lmemcpy_destul		/* oh unaligned destination addr */
72b6cbf720SGianluca Guida	ands	r12, r1, #3
73b6cbf720SGianluca Guida	bne	.Lmemcpy_srcul		/* oh unaligned source addr */
74b6cbf720SGianluca Guida
75b6cbf720SGianluca Guida.Lmemcpy_t8:
76b6cbf720SGianluca Guida	/* We have aligned source and destination */
77b6cbf720SGianluca Guida	subs	r2, r2, #8
78b6cbf720SGianluca Guida	blt	.Lmemcpy_l12		/* less than 12 bytes (4 from above) */
79b6cbf720SGianluca Guida	subs	r2, r2, #0x14
80b6cbf720SGianluca Guida	blt	.Lmemcpy_l32		/* less than 32 bytes (12 from above) */
8184d9c625SLionel Sambuc	push	{r4}		/* borrow r4 */
82b6cbf720SGianluca Guida
83b6cbf720SGianluca Guida	/* blat 32 bytes at a time */
84b6cbf720SGianluca Guida	/* XXX for really big copies perhaps we should use more registers */
85b6cbf720SGianluca Guida.Lmemcpy_loop32:
86b6cbf720SGianluca Guida	ldmia	r1!, {r3, r4, r12, lr}
87b6cbf720SGianluca Guida	stmia	r0!, {r3, r4, r12, lr}
88b6cbf720SGianluca Guida	ldmia	r1!, {r3, r4, r12, lr}
89b6cbf720SGianluca Guida	stmia	r0!, {r3, r4, r12, lr}
90b6cbf720SGianluca Guida	subs	r2, r2, #0x20
91b6cbf720SGianluca Guida	bge	.Lmemcpy_loop32
92b6cbf720SGianluca Guida
93b6cbf720SGianluca Guida	cmn	r2, #0x10
9484d9c625SLionel Sambuc	ldmiage	r1!, {r3, r4, r12, lr}	/* blat a remaining 16 bytes */
9584d9c625SLionel Sambuc	stmiage	r0!, {r3, r4, r12, lr}
96b6cbf720SGianluca Guida	subge	r2, r2, #0x10
9784d9c625SLionel Sambuc	pop	{r4}		/* return r4 */
98b6cbf720SGianluca Guida
99b6cbf720SGianluca Guida.Lmemcpy_l32:
100b6cbf720SGianluca Guida	adds	r2, r2, #0x14
101b6cbf720SGianluca Guida
102b6cbf720SGianluca Guida	/* blat 12 bytes at a time */
103b6cbf720SGianluca Guida.Lmemcpy_loop12:
10484d9c625SLionel Sambuc	ldmiage	r1!, {r3, r12, lr}
10584d9c625SLionel Sambuc	stmiage	r0!, {r3, r12, lr}
10684d9c625SLionel Sambuc	subsge	r2, r2, #0x0c
107b6cbf720SGianluca Guida	bge	.Lmemcpy_loop12
108b6cbf720SGianluca Guida
109b6cbf720SGianluca Guida.Lmemcpy_l12:
110b6cbf720SGianluca Guida	adds	r2, r2, #8
111b6cbf720SGianluca Guida	blt	.Lmemcpy_l4
112b6cbf720SGianluca Guida
113b6cbf720SGianluca Guida	subs	r2, r2, #4
114b6cbf720SGianluca Guida	ldrlt	r3, [r1], #4
115b6cbf720SGianluca Guida	strlt	r3, [r0], #4
11684d9c625SLionel Sambuc	ldmiage	r1!, {r3, r12}
11784d9c625SLionel Sambuc	stmiage	r0!, {r3, r12}
118b6cbf720SGianluca Guida	subge	r2, r2, #4
119b6cbf720SGianluca Guida
120b6cbf720SGianluca Guida.Lmemcpy_l4:
121b6cbf720SGianluca Guida	/* less than 4 bytes to go */
122b6cbf720SGianluca Guida	adds	r2, r2, #4
123b6cbf720SGianluca Guida#ifdef __APCS_26_
12484d9c625SLionel Sambuc	ldmiaeq sp!, {r0, pc}^		/* done */
125b6cbf720SGianluca Guida#else
12684d9c625SLionel Sambuc	popeq	{r0, pc}		/* done */
127b6cbf720SGianluca Guida#endif
128b6cbf720SGianluca Guida	/* copy the crud byte at a time */
129b6cbf720SGianluca Guida	cmp	r2, #2
130b6cbf720SGianluca Guida	ldrb	r3, [r1], #1
131b6cbf720SGianluca Guida	strb	r3, [r0], #1
13284d9c625SLionel Sambuc	ldrbge	r3, [r1], #1
13384d9c625SLionel Sambuc	strbge	r3, [r0], #1
13484d9c625SLionel Sambuc	ldrbgt	r3, [r1], #1
13584d9c625SLionel Sambuc	strbgt	r3, [r0], #1
13684d9c625SLionel Sambuc	pop	{r0, pc}
137b6cbf720SGianluca Guida
138b6cbf720SGianluca Guida	/* erg - unaligned destination */
139b6cbf720SGianluca Guida.Lmemcpy_destul:
140b6cbf720SGianluca Guida	rsb	r12, r12, #4
141b6cbf720SGianluca Guida	cmp	r12, #2
142b6cbf720SGianluca Guida
143b6cbf720SGianluca Guida	/* align destination with byte copies */
144b6cbf720SGianluca Guida	ldrb	r3, [r1], #1
145b6cbf720SGianluca Guida	strb	r3, [r0], #1
14684d9c625SLionel Sambuc	ldrbge	r3, [r1], #1
14784d9c625SLionel Sambuc	strbge	r3, [r0], #1
14884d9c625SLionel Sambuc	ldrbgt	r3, [r1], #1
14984d9c625SLionel Sambuc	strbgt	r3, [r0], #1
150b6cbf720SGianluca Guida	subs	r2, r2, r12
151b6cbf720SGianluca Guida	blt	.Lmemcpy_l4		/* less the 4 bytes */
152b6cbf720SGianluca Guida
153b6cbf720SGianluca Guida	ands	r12, r1, #3
154b6cbf720SGianluca Guida	beq	.Lmemcpy_t8		/* we have an aligned source */
155b6cbf720SGianluca Guida
156b6cbf720SGianluca Guida	/* erg - unaligned source */
157b6cbf720SGianluca Guida	/* This is where it gets nasty ... */
158b6cbf720SGianluca Guida.Lmemcpy_srcul:
159b6cbf720SGianluca Guida	bic	r1, r1, #3
160b6cbf720SGianluca Guida	ldr	lr, [r1], #4
161b6cbf720SGianluca Guida	cmp	r12, #2
162b6cbf720SGianluca Guida	bgt	.Lmemcpy_srcul3
163b6cbf720SGianluca Guida	beq	.Lmemcpy_srcul2
164b6cbf720SGianluca Guida	cmp	r2, #0x0c
165b6cbf720SGianluca Guida	blt	.Lmemcpy_srcul1loop4
166b6cbf720SGianluca Guida	sub	r2, r2, #0x0c
16784d9c625SLionel Sambuc	push	{r4, r5}
168b6cbf720SGianluca Guida
169b6cbf720SGianluca Guida.Lmemcpy_srcul1loop16:
170b6cbf720SGianluca Guida#ifdef __ARMEB__
171b6cbf720SGianluca Guida	mov	r3, lr, lsl #8
172b6cbf720SGianluca Guida#else
173b6cbf720SGianluca Guida	mov	r3, lr, lsr #8
174b6cbf720SGianluca Guida#endif
175b6cbf720SGianluca Guida	ldmia	r1!, {r4, r5, r12, lr}
176b6cbf720SGianluca Guida#ifdef __ARMEB__
177b6cbf720SGianluca Guida	orr	r3, r3, r4, lsr #24
178b6cbf720SGianluca Guida	mov	r4, r4, lsl #8
179b6cbf720SGianluca Guida	orr	r4, r4, r5, lsr #24
180b6cbf720SGianluca Guida	mov	r5, r5, lsl #8
181b6cbf720SGianluca Guida	orr	r5, r5, r12, lsr #24
182b6cbf720SGianluca Guida	mov	r12, r12, lsl #8
183b6cbf720SGianluca Guida	orr	r12, r12, lr, lsr #24
184b6cbf720SGianluca Guida#else
185b6cbf720SGianluca Guida	orr	r3, r3, r4, lsl #24
186b6cbf720SGianluca Guida	mov	r4, r4, lsr #8
187b6cbf720SGianluca Guida	orr	r4, r4, r5, lsl #24
188b6cbf720SGianluca Guida	mov	r5, r5, lsr #8
189b6cbf720SGianluca Guida	orr	r5, r5, r12, lsl #24
190b6cbf720SGianluca Guida	mov	r12, r12, lsr #8
191b6cbf720SGianluca Guida	orr	r12, r12, lr, lsl #24
192b6cbf720SGianluca Guida#endif
193b6cbf720SGianluca Guida	stmia	r0!, {r3-r5, r12}
194b6cbf720SGianluca Guida	subs	r2, r2, #0x10
195b6cbf720SGianluca Guida	bge	.Lmemcpy_srcul1loop16
19684d9c625SLionel Sambuc	pop	{r4, r5}
197b6cbf720SGianluca Guida	adds	r2, r2, #0x0c
198b6cbf720SGianluca Guida	blt	.Lmemcpy_srcul1l4
199b6cbf720SGianluca Guida
200b6cbf720SGianluca Guida.Lmemcpy_srcul1loop4:
201b6cbf720SGianluca Guida#ifdef __ARMEB__
202b6cbf720SGianluca Guida	mov	r12, lr, lsl #8
203b6cbf720SGianluca Guida#else
204b6cbf720SGianluca Guida	mov	r12, lr, lsr #8
205b6cbf720SGianluca Guida#endif
206b6cbf720SGianluca Guida	ldr	lr, [r1], #4
207b6cbf720SGianluca Guida#ifdef __ARMEB__
208b6cbf720SGianluca Guida	orr	r12, r12, lr, lsr #24
209b6cbf720SGianluca Guida#else
210b6cbf720SGianluca Guida	orr	r12, r12, lr, lsl #24
211b6cbf720SGianluca Guida#endif
212b6cbf720SGianluca Guida	str	r12, [r0], #4
213b6cbf720SGianluca Guida	subs	r2, r2, #4
214b6cbf720SGianluca Guida	bge	.Lmemcpy_srcul1loop4
215b6cbf720SGianluca Guida
216b6cbf720SGianluca Guida.Lmemcpy_srcul1l4:
217b6cbf720SGianluca Guida	sub	r1, r1, #3
218b6cbf720SGianluca Guida	b	.Lmemcpy_l4
219b6cbf720SGianluca Guida
220b6cbf720SGianluca Guida.Lmemcpy_srcul2:
221b6cbf720SGianluca Guida	cmp	r2, #0x0c
222b6cbf720SGianluca Guida	blt	.Lmemcpy_srcul2loop4
223b6cbf720SGianluca Guida	sub	r2, r2, #0x0c
22484d9c625SLionel Sambuc	push	{r4, r5}
225b6cbf720SGianluca Guida
226b6cbf720SGianluca Guida.Lmemcpy_srcul2loop16:
227b6cbf720SGianluca Guida#ifdef __ARMEB__
228b6cbf720SGianluca Guida	mov	r3, lr, lsl #16
229b6cbf720SGianluca Guida#else
230b6cbf720SGianluca Guida	mov	r3, lr, lsr #16
231b6cbf720SGianluca Guida#endif
232b6cbf720SGianluca Guida	ldmia	r1!, {r4, r5, r12, lr}
233b6cbf720SGianluca Guida#ifdef __ARMEB__
234b6cbf720SGianluca Guida	orr	r3, r3, r4, lsr #16
235b6cbf720SGianluca Guida	mov	r4, r4, lsl #16
236b6cbf720SGianluca Guida	orr	r4, r4, r5, lsr #16
237b6cbf720SGianluca Guida	mov	r5, r5, lsl #16
238b6cbf720SGianluca Guida	orr	r5, r5, r12, lsr #16
239b6cbf720SGianluca Guida	mov	r12, r12, lsl #16
240b6cbf720SGianluca Guida	orr	r12, r12, lr, lsr #16
241b6cbf720SGianluca Guida#else
242b6cbf720SGianluca Guida	orr	r3, r3, r4, lsl #16
243b6cbf720SGianluca Guida	mov	r4, r4, lsr #16
244b6cbf720SGianluca Guida	orr	r4, r4, r5, lsl #16
245b6cbf720SGianluca Guida	mov	r5, r5, lsr #16
246b6cbf720SGianluca Guida	orr	r5, r5, r12, lsl #16
247b6cbf720SGianluca Guida	mov	r12, r12, lsr #16
248b6cbf720SGianluca Guida	orr	r12, r12, lr, lsl #16
249b6cbf720SGianluca Guida#endif
250b6cbf720SGianluca Guida	stmia	r0!, {r3-r5, r12}
251b6cbf720SGianluca Guida	subs	r2, r2, #0x10
252b6cbf720SGianluca Guida	bge	.Lmemcpy_srcul2loop16
25384d9c625SLionel Sambuc	pop	{r4, r5}
254b6cbf720SGianluca Guida	adds	r2, r2, #0x0c
255b6cbf720SGianluca Guida	blt	.Lmemcpy_srcul2l4
256b6cbf720SGianluca Guida
257b6cbf720SGianluca Guida.Lmemcpy_srcul2loop4:
258b6cbf720SGianluca Guida#ifdef __ARMEB__
259b6cbf720SGianluca Guida	mov	r12, lr, lsl #16
260b6cbf720SGianluca Guida#else
261b6cbf720SGianluca Guida	mov	r12, lr, lsr #16
262b6cbf720SGianluca Guida#endif
263b6cbf720SGianluca Guida	ldr	lr, [r1], #4
264b6cbf720SGianluca Guida#ifdef __ARMEB__
265b6cbf720SGianluca Guida	orr	r12, r12, lr, lsr #16
266b6cbf720SGianluca Guida#else
267b6cbf720SGianluca Guida	orr	r12, r12, lr, lsl #16
268b6cbf720SGianluca Guida#endif
269b6cbf720SGianluca Guida	str	r12, [r0], #4
270b6cbf720SGianluca Guida	subs	r2, r2, #4
271b6cbf720SGianluca Guida	bge	.Lmemcpy_srcul2loop4
272b6cbf720SGianluca Guida
273b6cbf720SGianluca Guida.Lmemcpy_srcul2l4:
274b6cbf720SGianluca Guida	sub	r1, r1, #2
275b6cbf720SGianluca Guida	b	.Lmemcpy_l4
276b6cbf720SGianluca Guida
277b6cbf720SGianluca Guida.Lmemcpy_srcul3:
278b6cbf720SGianluca Guida	cmp	r2, #0x0c
279b6cbf720SGianluca Guida	blt	.Lmemcpy_srcul3loop4
280b6cbf720SGianluca Guida	sub	r2, r2, #0x0c
28184d9c625SLionel Sambuc	push	{r4, r5}
282b6cbf720SGianluca Guida
283b6cbf720SGianluca Guida.Lmemcpy_srcul3loop16:
284b6cbf720SGianluca Guida#ifdef __ARMEB__
285b6cbf720SGianluca Guida	mov	r3, lr, lsl #24
286b6cbf720SGianluca Guida#else
287b6cbf720SGianluca Guida	mov	r3, lr, lsr #24
288b6cbf720SGianluca Guida#endif
289b6cbf720SGianluca Guida	ldmia	r1!, {r4, r5, r12, lr}
290b6cbf720SGianluca Guida#ifdef __ARMEB__
291b6cbf720SGianluca Guida	orr	r3, r3, r4, lsr #8
292b6cbf720SGianluca Guida	mov	r4, r4, lsl #24
293b6cbf720SGianluca Guida	orr	r4, r4, r5, lsr #8
294b6cbf720SGianluca Guida	mov	r5, r5, lsl #24
295b6cbf720SGianluca Guida	orr	r5, r5, r12, lsr #8
296b6cbf720SGianluca Guida	mov	r12, r12, lsl #24
297b6cbf720SGianluca Guida	orr	r12, r12, lr, lsr #8
298b6cbf720SGianluca Guida#else
299b6cbf720SGianluca Guida	orr	r3, r3, r4, lsl #8
300b6cbf720SGianluca Guida	mov	r4, r4, lsr #24
301b6cbf720SGianluca Guida	orr	r4, r4, r5, lsl #8
302b6cbf720SGianluca Guida	mov	r5, r5, lsr #24
303b6cbf720SGianluca Guida	orr	r5, r5, r12, lsl #8
304b6cbf720SGianluca Guida	mov	r12, r12, lsr #24
305b6cbf720SGianluca Guida	orr	r12, r12, lr, lsl #8
306b6cbf720SGianluca Guida#endif
307b6cbf720SGianluca Guida	stmia	r0!, {r3-r5, r12}
308b6cbf720SGianluca Guida	subs	r2, r2, #0x10
309b6cbf720SGianluca Guida	bge	.Lmemcpy_srcul3loop16
31084d9c625SLionel Sambuc	pop	{r4, r5}
311b6cbf720SGianluca Guida	adds	r2, r2, #0x0c
312b6cbf720SGianluca Guida	blt	.Lmemcpy_srcul3l4
313b6cbf720SGianluca Guida
314b6cbf720SGianluca Guida.Lmemcpy_srcul3loop4:
315b6cbf720SGianluca Guida#ifdef __ARMEB__
316b6cbf720SGianluca Guida	mov	r12, lr, lsl #24
317b6cbf720SGianluca Guida#else
318b6cbf720SGianluca Guida	mov	r12, lr, lsr #24
319b6cbf720SGianluca Guida#endif
320b6cbf720SGianluca Guida	ldr	lr, [r1], #4
321b6cbf720SGianluca Guida#ifdef __ARMEB__
322b6cbf720SGianluca Guida	orr	r12, r12, lr, lsr #8
323b6cbf720SGianluca Guida#else
324b6cbf720SGianluca Guida	orr	r12, r12, lr, lsl #8
325b6cbf720SGianluca Guida#endif
326b6cbf720SGianluca Guida	str	r12, [r0], #4
327b6cbf720SGianluca Guida	subs	r2, r2, #4
328b6cbf720SGianluca Guida	bge	.Lmemcpy_srcul3loop4
329b6cbf720SGianluca Guida
330b6cbf720SGianluca Guida.Lmemcpy_srcul3l4:
331b6cbf720SGianluca Guida	sub	r1, r1, #1
332b6cbf720SGianluca Guida	b	.Lmemcpy_l4
33384d9c625SLionel SambucEND(memcpy)
334