xref: /minix3/minix/kernel/arch/earm/phys_copy.S (revision 433d6423c39e34ec4b79c950597bb2d236f886be)
1*433d6423SLionel Sambuc/*	$NetBSD: memcpy_arm.S,v 1.4 2013/08/11 04:56:32 matt Exp $	*/
2*433d6423SLionel Sambuc
3*433d6423SLionel Sambuc/*-
4*433d6423SLionel Sambuc * Copyright (c) 1997 The NetBSD Foundation, Inc.
5*433d6423SLionel Sambuc * All rights reserved.
6*433d6423SLionel Sambuc *
7*433d6423SLionel Sambuc * This code is derived from software contributed to The NetBSD Foundation
8*433d6423SLionel Sambuc * by Neil A. Carson and Mark Brinicombe
9*433d6423SLionel Sambuc *
10*433d6423SLionel Sambuc * Redistribution and use in source and binary forms, with or without
11*433d6423SLionel Sambuc * modification, are permitted provided that the following conditions
12*433d6423SLionel Sambuc * are met:
13*433d6423SLionel Sambuc * 1. Redistributions of source code must retain the above copyright
14*433d6423SLionel Sambuc *    notice, this list of conditions and the following disclaimer.
15*433d6423SLionel Sambuc * 2. Redistributions in binary form must reproduce the above copyright
16*433d6423SLionel Sambuc *    notice, this list of conditions and the following disclaimer in the
17*433d6423SLionel Sambuc *    documentation and/or other materials provided with the distribution.
18*433d6423SLionel Sambuc *
19*433d6423SLionel Sambuc * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20*433d6423SLionel Sambuc * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21*433d6423SLionel Sambuc * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22*433d6423SLionel Sambuc * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23*433d6423SLionel Sambuc * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24*433d6423SLionel Sambuc * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25*433d6423SLionel Sambuc * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26*433d6423SLionel Sambuc * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27*433d6423SLionel Sambuc * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28*433d6423SLionel Sambuc * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29*433d6423SLionel Sambuc * POSSIBILITY OF SUCH DAMAGE.
30*433d6423SLionel Sambuc */
31*433d6423SLionel Sambuc
32*433d6423SLionel Sambuc#include <machine/asm.h>
33*433d6423SLionel Sambuc
34*433d6423SLionel Sambuc#if !defined(__minix)
35*433d6423SLionel Sambuc#if defined(__ARM_EABI__)
36*433d6423SLionel SambucSTRONG_ALIAS(__aeabi_memcpy, memcpy)
37*433d6423SLionel Sambuc#endif
38*433d6423SLionel Sambuc#endif /* !defined(__minix) */
39*433d6423SLionel Sambuc
40*433d6423SLionel Sambuc/*
41*433d6423SLionel Sambuc * This is one fun bit of code ...
42*433d6423SLionel Sambuc * Some easy listening music is suggested while trying to understand this
43*433d6423SLionel Sambuc * code e.g. Iron Maiden
44*433d6423SLionel Sambuc *
45*433d6423SLionel Sambuc * For anyone attempting to understand it :
46*433d6423SLionel Sambuc *
47*433d6423SLionel Sambuc * The core code is implemented here with simple stubs for memcpy().
48*433d6423SLionel Sambuc *
49*433d6423SLionel Sambuc * All local labels are prefixed with Lmemcpy_
50*433d6423SLionel Sambuc * Following the prefix a label starting f is used in the forward copy code
51*433d6423SLionel Sambuc * while a label using b is used in the backwards copy code
52*433d6423SLionel Sambuc * The source and destination addresses determine whether a forward or
53*433d6423SLionel Sambuc * backward copy is performed.
54*433d6423SLionel Sambuc * Separate bits of code are used to deal with the following situations
55*433d6423SLionel Sambuc * for both the forward and backwards copy.
56*433d6423SLionel Sambuc * unaligned source address
57*433d6423SLionel Sambuc * unaligned destination address
58*433d6423SLionel Sambuc * Separate copy routines are used to produce an optimised result for each
59*433d6423SLionel Sambuc * of these cases.
60*433d6423SLionel Sambuc * The copy code will use LDM/STM instructions to copy up to 32 bytes at
61*433d6423SLionel Sambuc * a time where possible.
62*433d6423SLionel Sambuc *
63*433d6423SLionel Sambuc * Note: r12 (aka ip) can be trashed during the function along with
64*433d6423SLionel Sambuc * r0-r3 although r0-r2 have defined uses i.e. src, dest, len through out.
65*433d6423SLionel Sambuc * Additional registers are preserved prior to use i.e. r4, r5 & lr
66*433d6423SLionel Sambuc *
67*433d6423SLionel Sambuc * Apologies for the state of the comments ;-)
68*433d6423SLionel Sambuc */
69*433d6423SLionel Sambuc
70*433d6423SLionel Sambuc/* For MINIX, we always spill r0, r4, r5, and lr, so we can easily
71*433d6423SLionel Sambuc * clean up the stack after a phys_copy fault. NetBSD, in contrast,
72*433d6423SLionel Sambuc * spills the minimum number of registers for each path.
73*433d6423SLionel Sambuc */
74*433d6423SLionel Sambuc#if defined(__minix)
75*433d6423SLionel Sambuc/* LINTSTUB: Func: void *phys_copy(void *src, void *dst, size_t len) */
76*433d6423SLionel SambucENTRY(phys_copy)
77*433d6423SLionel Sambuc	/* switch the source and destination registers */
78*433d6423SLionel Sambuc	eor     r0, r1, r0
79*433d6423SLionel Sambuc	eor     r1, r0, r1
80*433d6423SLionel Sambuc	eor     r0, r1, r0
81*433d6423SLionel Sambuc#else
82*433d6423SLionel Sambuc/* LINTSTUB: Func: void *memcpy(void *dst, const void *src, size_t len) */
83*433d6423SLionel SambucENTRY(memcpy)
84*433d6423SLionel Sambuc#endif
85*433d6423SLionel Sambuc	/* save leaf functions having to store this away */
86*433d6423SLionel Sambuc#if defined(__minix)
87*433d6423SLionel Sambuc	push	{r0, r4, r5, lr}	/* memcpy() returns dest addr */
88*433d6423SLionel Sambuc#else
89*433d6423SLionel Sambuc	push	{r0, lr}		/* memcpy() returns dest addr */
90*433d6423SLionel Sambuc#endif
91*433d6423SLionel Sambuc
92*433d6423SLionel Sambuc	subs	r2, r2, #4
93*433d6423SLionel Sambuc	blt	.Lmemcpy_l4		/* less than 4 bytes */
94*433d6423SLionel Sambuc	ands	r12, r0, #3
95*433d6423SLionel Sambuc	bne	.Lmemcpy_destul		/* oh unaligned destination addr */
96*433d6423SLionel Sambuc	ands	r12, r1, #3
97*433d6423SLionel Sambuc	bne	.Lmemcpy_srcul		/* oh unaligned source addr */
98*433d6423SLionel Sambuc
99*433d6423SLionel Sambuc.Lmemcpy_t8:
100*433d6423SLionel Sambuc	/* We have aligned source and destination */
101*433d6423SLionel Sambuc	subs	r2, r2, #8
102*433d6423SLionel Sambuc	blt	.Lmemcpy_l12		/* less than 12 bytes (4 from above) */
103*433d6423SLionel Sambuc	subs	r2, r2, #0x14
104*433d6423SLionel Sambuc	blt	.Lmemcpy_l32		/* less than 32 bytes (12 from above) */
105*433d6423SLionel Sambuc#if !defined(__minix)
106*433d6423SLionel Sambuc	push	{r4}		/* borrow r4 */
107*433d6423SLionel Sambuc#endif
108*433d6423SLionel Sambuc
109*433d6423SLionel Sambuc	/* blat 32 bytes at a time */
110*433d6423SLionel Sambuc	/* XXX for really big copies perhaps we should use more registers */
111*433d6423SLionel Sambuc.Lmemcpy_loop32:
112*433d6423SLionel Sambuc	ldmia	r1!, {r3, r4, r12, lr}
113*433d6423SLionel Sambuc	stmia	r0!, {r3, r4, r12, lr}
114*433d6423SLionel Sambuc	ldmia	r1!, {r3, r4, r12, lr}
115*433d6423SLionel Sambuc	stmia	r0!, {r3, r4, r12, lr}
116*433d6423SLionel Sambuc	subs	r2, r2, #0x20
117*433d6423SLionel Sambuc	bge	.Lmemcpy_loop32
118*433d6423SLionel Sambuc
119*433d6423SLionel Sambuc	cmn	r2, #0x10
120*433d6423SLionel Sambuc	ldmiage	r1!, {r3, r4, r12, lr}	/* blat a remaining 16 bytes */
121*433d6423SLionel Sambuc	stmiage	r0!, {r3, r4, r12, lr}
122*433d6423SLionel Sambuc	subge	r2, r2, #0x10
123*433d6423SLionel Sambuc#if !defined(__minix)
124*433d6423SLionel Sambuc	pop	{r4}		/* return r4 */
125*433d6423SLionel Sambuc#endif
126*433d6423SLionel Sambuc
127*433d6423SLionel Sambuc.Lmemcpy_l32:
128*433d6423SLionel Sambuc	adds	r2, r2, #0x14
129*433d6423SLionel Sambuc
130*433d6423SLionel Sambuc	/* blat 12 bytes at a time */
131*433d6423SLionel Sambuc.Lmemcpy_loop12:
132*433d6423SLionel Sambuc	ldmiage	r1!, {r3, r12, lr}
133*433d6423SLionel Sambuc	stmiage	r0!, {r3, r12, lr}
134*433d6423SLionel Sambuc	subsge	r2, r2, #0x0c
135*433d6423SLionel Sambuc	bge	.Lmemcpy_loop12
136*433d6423SLionel Sambuc
137*433d6423SLionel Sambuc.Lmemcpy_l12:
138*433d6423SLionel Sambuc	adds	r2, r2, #8
139*433d6423SLionel Sambuc	blt	.Lmemcpy_l4
140*433d6423SLionel Sambuc
141*433d6423SLionel Sambuc	subs	r2, r2, #4
142*433d6423SLionel Sambuc	ldrlt	r3, [r1], #4
143*433d6423SLionel Sambuc	strlt	r3, [r0], #4
144*433d6423SLionel Sambuc	ldmiage	r1!, {r3, r12}
145*433d6423SLionel Sambuc	stmiage	r0!, {r3, r12}
146*433d6423SLionel Sambuc	subge	r2, r2, #4
147*433d6423SLionel Sambuc
148*433d6423SLionel Sambuc.Lmemcpy_l4:
149*433d6423SLionel Sambuc	/* less than 4 bytes to go */
150*433d6423SLionel Sambuc	adds	r2, r2, #4
151*433d6423SLionel Sambuc#if defined(__minix)
152*433d6423SLionel Sambuc	popeq	{r0, r4, r5}
153*433d6423SLionel Sambuc	moveq	r0, #0
154*433d6423SLionel Sambuc	popeq	{pc}
155*433d6423SLionel Sambuc#else
156*433d6423SLionel Sambuc#ifdef __APCS_26_
157*433d6423SLionel Sambuc	ldmiaeq sp!, {r0, pc}^		/* done */
158*433d6423SLionel Sambuc#else
159*433d6423SLionel Sambuc	popeq	{r0, pc}		/* done */
160*433d6423SLionel Sambuc#endif
161*433d6423SLionel Sambuc#endif
162*433d6423SLionel Sambuc	/* copy the crud byte at a time */
163*433d6423SLionel Sambuc	cmp	r2, #2
164*433d6423SLionel Sambuc	ldrb	r3, [r1], #1
165*433d6423SLionel Sambuc	strb	r3, [r0], #1
166*433d6423SLionel Sambuc	ldrbge	r3, [r1], #1
167*433d6423SLionel Sambuc	strbge	r3, [r0], #1
168*433d6423SLionel Sambuc	ldrbgt	r3, [r1], #1
169*433d6423SLionel Sambuc	strbgt	r3, [r0], #1
170*433d6423SLionel Sambuc#if defined(__minix)
171*433d6423SLionel Sambuc	pop	{r0, r4, r5}
172*433d6423SLionel Sambuc	mov	r0, #0
173*433d6423SLionel Sambuc	pop	{pc}
174*433d6423SLionel Sambuc#else
175*433d6423SLionel Sambuc	pop	{r0, pc}
176*433d6423SLionel Sambuc#endif
177*433d6423SLionel Sambuc
178*433d6423SLionel Sambuc	/* erg - unaligned destination */
179*433d6423SLionel Sambuc.Lmemcpy_destul:
180*433d6423SLionel Sambuc	rsb	r12, r12, #4
181*433d6423SLionel Sambuc	cmp	r12, #2
182*433d6423SLionel Sambuc
183*433d6423SLionel Sambuc	/* align destination with byte copies */
184*433d6423SLionel Sambuc	ldrb	r3, [r1], #1
185*433d6423SLionel Sambuc	strb	r3, [r0], #1
186*433d6423SLionel Sambuc	ldrbge	r3, [r1], #1
187*433d6423SLionel Sambuc	strbge	r3, [r0], #1
188*433d6423SLionel Sambuc	ldrbgt	r3, [r1], #1
189*433d6423SLionel Sambuc	strbgt	r3, [r0], #1
190*433d6423SLionel Sambuc	subs	r2, r2, r12
191*433d6423SLionel Sambuc	blt	.Lmemcpy_l4		/* less the 4 bytes */
192*433d6423SLionel Sambuc
193*433d6423SLionel Sambuc	ands	r12, r1, #3
194*433d6423SLionel Sambuc	beq	.Lmemcpy_t8		/* we have an aligned source */
195*433d6423SLionel Sambuc
196*433d6423SLionel Sambuc	/* erg - unaligned source */
197*433d6423SLionel Sambuc	/* This is where it gets nasty ... */
198*433d6423SLionel Sambuc.Lmemcpy_srcul:
199*433d6423SLionel Sambuc	bic	r1, r1, #3
200*433d6423SLionel Sambuc	ldr	lr, [r1], #4
201*433d6423SLionel Sambuc	cmp	r12, #2
202*433d6423SLionel Sambuc	bgt	.Lmemcpy_srcul3
203*433d6423SLionel Sambuc	beq	.Lmemcpy_srcul2
204*433d6423SLionel Sambuc	cmp	r2, #0x0c
205*433d6423SLionel Sambuc	blt	.Lmemcpy_srcul1loop4
206*433d6423SLionel Sambuc	sub	r2, r2, #0x0c
207*433d6423SLionel Sambuc#if !defined(__minix)
208*433d6423SLionel Sambuc	push	{r4, r5}
209*433d6423SLionel Sambuc#endif
210*433d6423SLionel Sambuc
211*433d6423SLionel Sambuc.Lmemcpy_srcul1loop16:
212*433d6423SLionel Sambuc#ifdef __ARMEB__
213*433d6423SLionel Sambuc	mov	r3, lr, lsl #8
214*433d6423SLionel Sambuc#else
215*433d6423SLionel Sambuc	mov	r3, lr, lsr #8
216*433d6423SLionel Sambuc#endif
217*433d6423SLionel Sambuc	ldmia	r1!, {r4, r5, r12, lr}
218*433d6423SLionel Sambuc#ifdef __ARMEB__
219*433d6423SLionel Sambuc	orr	r3, r3, r4, lsr #24
220*433d6423SLionel Sambuc	mov	r4, r4, lsl #8
221*433d6423SLionel Sambuc	orr	r4, r4, r5, lsr #24
222*433d6423SLionel Sambuc	mov	r5, r5, lsl #8
223*433d6423SLionel Sambuc	orr	r5, r5, r12, lsr #24
224*433d6423SLionel Sambuc	mov	r12, r12, lsl #8
225*433d6423SLionel Sambuc	orr	r12, r12, lr, lsr #24
226*433d6423SLionel Sambuc#else
227*433d6423SLionel Sambuc	orr	r3, r3, r4, lsl #24
228*433d6423SLionel Sambuc	mov	r4, r4, lsr #8
229*433d6423SLionel Sambuc	orr	r4, r4, r5, lsl #24
230*433d6423SLionel Sambuc	mov	r5, r5, lsr #8
231*433d6423SLionel Sambuc	orr	r5, r5, r12, lsl #24
232*433d6423SLionel Sambuc	mov	r12, r12, lsr #8
233*433d6423SLionel Sambuc	orr	r12, r12, lr, lsl #24
234*433d6423SLionel Sambuc#endif
235*433d6423SLionel Sambuc	stmia	r0!, {r3-r5, r12}
236*433d6423SLionel Sambuc	subs	r2, r2, #0x10
237*433d6423SLionel Sambuc	bge	.Lmemcpy_srcul1loop16
238*433d6423SLionel Sambuc#if !defined(__minix)
239*433d6423SLionel Sambuc	pop	{r4, r5}
240*433d6423SLionel Sambuc#endif
241*433d6423SLionel Sambuc	adds	r2, r2, #0x0c
242*433d6423SLionel Sambuc	blt	.Lmemcpy_srcul1l4
243*433d6423SLionel Sambuc
244*433d6423SLionel Sambuc.Lmemcpy_srcul1loop4:
245*433d6423SLionel Sambuc#ifdef __ARMEB__
246*433d6423SLionel Sambuc	mov	r12, lr, lsl #8
247*433d6423SLionel Sambuc#else
248*433d6423SLionel Sambuc	mov	r12, lr, lsr #8
249*433d6423SLionel Sambuc#endif
250*433d6423SLionel Sambuc	ldr	lr, [r1], #4
251*433d6423SLionel Sambuc#ifdef __ARMEB__
252*433d6423SLionel Sambuc	orr	r12, r12, lr, lsr #24
253*433d6423SLionel Sambuc#else
254*433d6423SLionel Sambuc	orr	r12, r12, lr, lsl #24
255*433d6423SLionel Sambuc#endif
256*433d6423SLionel Sambuc	str	r12, [r0], #4
257*433d6423SLionel Sambuc	subs	r2, r2, #4
258*433d6423SLionel Sambuc	bge	.Lmemcpy_srcul1loop4
259*433d6423SLionel Sambuc
260*433d6423SLionel Sambuc.Lmemcpy_srcul1l4:
261*433d6423SLionel Sambuc	sub	r1, r1, #3
262*433d6423SLionel Sambuc	b	.Lmemcpy_l4
263*433d6423SLionel Sambuc
264*433d6423SLionel Sambuc.Lmemcpy_srcul2:
265*433d6423SLionel Sambuc	cmp	r2, #0x0c
266*433d6423SLionel Sambuc	blt	.Lmemcpy_srcul2loop4
267*433d6423SLionel Sambuc	sub	r2, r2, #0x0c
268*433d6423SLionel Sambuc#if !defined(__minix)
269*433d6423SLionel Sambuc	push	{r4, r5}
270*433d6423SLionel Sambuc#endif
271*433d6423SLionel Sambuc
272*433d6423SLionel Sambuc.Lmemcpy_srcul2loop16:
273*433d6423SLionel Sambuc#ifdef __ARMEB__
274*433d6423SLionel Sambuc	mov	r3, lr, lsl #16
275*433d6423SLionel Sambuc#else
276*433d6423SLionel Sambuc	mov	r3, lr, lsr #16
277*433d6423SLionel Sambuc#endif
278*433d6423SLionel Sambuc	ldmia	r1!, {r4, r5, r12, lr}
279*433d6423SLionel Sambuc#ifdef __ARMEB__
280*433d6423SLionel Sambuc	orr	r3, r3, r4, lsr #16
281*433d6423SLionel Sambuc	mov	r4, r4, lsl #16
282*433d6423SLionel Sambuc	orr	r4, r4, r5, lsr #16
283*433d6423SLionel Sambuc	mov	r5, r5, lsl #16
284*433d6423SLionel Sambuc	orr	r5, r5, r12, lsr #16
285*433d6423SLionel Sambuc	mov	r12, r12, lsl #16
286*433d6423SLionel Sambuc	orr	r12, r12, lr, lsr #16
287*433d6423SLionel Sambuc#else
288*433d6423SLionel Sambuc	orr	r3, r3, r4, lsl #16
289*433d6423SLionel Sambuc	mov	r4, r4, lsr #16
290*433d6423SLionel Sambuc	orr	r4, r4, r5, lsl #16
291*433d6423SLionel Sambuc	mov	r5, r5, lsr #16
292*433d6423SLionel Sambuc	orr	r5, r5, r12, lsl #16
293*433d6423SLionel Sambuc	mov	r12, r12, lsr #16
294*433d6423SLionel Sambuc	orr	r12, r12, lr, lsl #16
295*433d6423SLionel Sambuc#endif
296*433d6423SLionel Sambuc	stmia	r0!, {r3-r5, r12}
297*433d6423SLionel Sambuc	subs	r2, r2, #0x10
298*433d6423SLionel Sambuc	bge	.Lmemcpy_srcul2loop16
299*433d6423SLionel Sambuc#if !defined(__minix)
300*433d6423SLionel Sambuc	pop	{r4, r5}
301*433d6423SLionel Sambuc#endif
302*433d6423SLionel Sambuc	adds	r2, r2, #0x0c
303*433d6423SLionel Sambuc	blt	.Lmemcpy_srcul2l4
304*433d6423SLionel Sambuc
305*433d6423SLionel Sambuc.Lmemcpy_srcul2loop4:
306*433d6423SLionel Sambuc#ifdef __ARMEB__
307*433d6423SLionel Sambuc	mov	r12, lr, lsl #16
308*433d6423SLionel Sambuc#else
309*433d6423SLionel Sambuc	mov	r12, lr, lsr #16
310*433d6423SLionel Sambuc#endif
311*433d6423SLionel Sambuc	ldr	lr, [r1], #4
312*433d6423SLionel Sambuc#ifdef __ARMEB__
313*433d6423SLionel Sambuc	orr	r12, r12, lr, lsr #16
314*433d6423SLionel Sambuc#else
315*433d6423SLionel Sambuc	orr	r12, r12, lr, lsl #16
316*433d6423SLionel Sambuc#endif
317*433d6423SLionel Sambuc	str	r12, [r0], #4
318*433d6423SLionel Sambuc	subs	r2, r2, #4
319*433d6423SLionel Sambuc	bge	.Lmemcpy_srcul2loop4
320*433d6423SLionel Sambuc
321*433d6423SLionel Sambuc.Lmemcpy_srcul2l4:
322*433d6423SLionel Sambuc	sub	r1, r1, #2
323*433d6423SLionel Sambuc	b	.Lmemcpy_l4
324*433d6423SLionel Sambuc
325*433d6423SLionel Sambuc.Lmemcpy_srcul3:
326*433d6423SLionel Sambuc	cmp	r2, #0x0c
327*433d6423SLionel Sambuc	blt	.Lmemcpy_srcul3loop4
328*433d6423SLionel Sambuc	sub	r2, r2, #0x0c
329*433d6423SLionel Sambuc#if !defined(__minix)
330*433d6423SLionel Sambuc	push	{r4, r5}
331*433d6423SLionel Sambuc#endif
332*433d6423SLionel Sambuc
333*433d6423SLionel Sambuc.Lmemcpy_srcul3loop16:
334*433d6423SLionel Sambuc#ifdef __ARMEB__
335*433d6423SLionel Sambuc	mov	r3, lr, lsl #24
336*433d6423SLionel Sambuc#else
337*433d6423SLionel Sambuc	mov	r3, lr, lsr #24
338*433d6423SLionel Sambuc#endif
339*433d6423SLionel Sambuc	ldmia	r1!, {r4, r5, r12, lr}
340*433d6423SLionel Sambuc#ifdef __ARMEB__
341*433d6423SLionel Sambuc	orr	r3, r3, r4, lsr #8
342*433d6423SLionel Sambuc	mov	r4, r4, lsl #24
343*433d6423SLionel Sambuc	orr	r4, r4, r5, lsr #8
344*433d6423SLionel Sambuc	mov	r5, r5, lsl #24
345*433d6423SLionel Sambuc	orr	r5, r5, r12, lsr #8
346*433d6423SLionel Sambuc	mov	r12, r12, lsl #24
347*433d6423SLionel Sambuc	orr	r12, r12, lr, lsr #8
348*433d6423SLionel Sambuc#else
349*433d6423SLionel Sambuc	orr	r3, r3, r4, lsl #8
350*433d6423SLionel Sambuc	mov	r4, r4, lsr #24
351*433d6423SLionel Sambuc	orr	r4, r4, r5, lsl #8
352*433d6423SLionel Sambuc	mov	r5, r5, lsr #24
353*433d6423SLionel Sambuc	orr	r5, r5, r12, lsl #8
354*433d6423SLionel Sambuc	mov	r12, r12, lsr #24
355*433d6423SLionel Sambuc	orr	r12, r12, lr, lsl #8
356*433d6423SLionel Sambuc#endif
357*433d6423SLionel Sambuc	stmia	r0!, {r3-r5, r12}
358*433d6423SLionel Sambuc	subs	r2, r2, #0x10
359*433d6423SLionel Sambuc	bge	.Lmemcpy_srcul3loop16
360*433d6423SLionel Sambuc#if !defined(__minix)
361*433d6423SLionel Sambuc	pop	{r4, r5}
362*433d6423SLionel Sambuc#endif
363*433d6423SLionel Sambuc	adds	r2, r2, #0x0c
364*433d6423SLionel Sambuc	blt	.Lmemcpy_srcul3l4
365*433d6423SLionel Sambuc
366*433d6423SLionel Sambuc.Lmemcpy_srcul3loop4:
367*433d6423SLionel Sambuc#ifdef __ARMEB__
368*433d6423SLionel Sambuc	mov	r12, lr, lsl #24
369*433d6423SLionel Sambuc#else
370*433d6423SLionel Sambuc	mov	r12, lr, lsr #24
371*433d6423SLionel Sambuc#endif
372*433d6423SLionel Sambuc	ldr	lr, [r1], #4
373*433d6423SLionel Sambuc#ifdef __ARMEB__
374*433d6423SLionel Sambuc	orr	r12, r12, lr, lsr #8
375*433d6423SLionel Sambuc#else
376*433d6423SLionel Sambuc	orr	r12, r12, lr, lsl #8
377*433d6423SLionel Sambuc#endif
378*433d6423SLionel Sambuc	str	r12, [r0], #4
379*433d6423SLionel Sambuc	subs	r2, r2, #4
380*433d6423SLionel Sambuc	bge	.Lmemcpy_srcul3loop4
381*433d6423SLionel Sambuc
382*433d6423SLionel Sambuc.Lmemcpy_srcul3l4:
383*433d6423SLionel Sambuc	sub	r1, r1, #1
384*433d6423SLionel Sambuc	b	.Lmemcpy_l4
385*433d6423SLionel Sambuc
386*433d6423SLionel Sambuc#if defined(__minix)
387*433d6423SLionel SambucLABEL(phys_copy_fault)		/* kernel can send us here */
388*433d6423SLionel Sambuc	pop	{r0, r4, r5}
389*433d6423SLionel Sambuc	pop	{pc}
390*433d6423SLionel Sambuc
391*433d6423SLionel SambucLABEL(phys_copy_fault_in_kernel)	/* kernel can send us here */
392*433d6423SLionel Sambuc	pop	 {r0, r4, r5}
393*433d6423SLionel Sambuc	mrc	p15, 0, r0, c6, c0, 0	/* Read DFAR */
394*433d6423SLionel Sambuc	pop	{pc}
395*433d6423SLionel Sambuc#else
396*433d6423SLionel SambucEND(memcpy)
397*433d6423SLionel Sambuc#endif
398