xref: /minix3/common/lib/libc/arch/arm/string/memcpy_xscale.S (revision b6cbf7203b080219de306404f8022a65b7884f33)
1*b6cbf720SGianluca Guida/*	$NetBSD: memcpy_xscale.S,v 1.2 2007/06/21 21:37:04 scw Exp $	*/
2*b6cbf720SGianluca Guida
3*b6cbf720SGianluca Guida/*
4*b6cbf720SGianluca Guida * Copyright 2003 Wasabi Systems, Inc.
5*b6cbf720SGianluca Guida * All rights reserved.
6*b6cbf720SGianluca Guida *
7*b6cbf720SGianluca Guida * Written by Steve C. Woodford for Wasabi Systems, Inc.
8*b6cbf720SGianluca Guida *
9*b6cbf720SGianluca Guida * Redistribution and use in source and binary forms, with or without
10*b6cbf720SGianluca Guida * modification, are permitted provided that the following conditions
11*b6cbf720SGianluca Guida * are met:
12*b6cbf720SGianluca Guida * 1. Redistributions of source code must retain the above copyright
13*b6cbf720SGianluca Guida *    notice, this list of conditions and the following disclaimer.
14*b6cbf720SGianluca Guida * 2. Redistributions in binary form must reproduce the above copyright
15*b6cbf720SGianluca Guida *    notice, this list of conditions and the following disclaimer in the
16*b6cbf720SGianluca Guida *    documentation and/or other materials provided with the distribution.
17*b6cbf720SGianluca Guida * 3. All advertising materials mentioning features or use of this software
18*b6cbf720SGianluca Guida *    must display the following acknowledgement:
19*b6cbf720SGianluca Guida *      This product includes software developed for the NetBSD Project by
20*b6cbf720SGianluca Guida *      Wasabi Systems, Inc.
21*b6cbf720SGianluca Guida * 4. The name of Wasabi Systems, Inc. may not be used to endorse
22*b6cbf720SGianluca Guida *    or promote products derived from this software without specific prior
23*b6cbf720SGianluca Guida *    written permission.
24*b6cbf720SGianluca Guida *
25*b6cbf720SGianluca Guida * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
26*b6cbf720SGianluca Guida * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27*b6cbf720SGianluca Guida * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28*b6cbf720SGianluca Guida * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL WASABI SYSTEMS, INC
29*b6cbf720SGianluca Guida * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30*b6cbf720SGianluca Guida * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31*b6cbf720SGianluca Guida * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32*b6cbf720SGianluca Guida * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33*b6cbf720SGianluca Guida * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34*b6cbf720SGianluca Guida * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35*b6cbf720SGianluca Guida * POSSIBILITY OF SUCH DAMAGE.
36*b6cbf720SGianluca Guida */
37*b6cbf720SGianluca Guida
38*b6cbf720SGianluca Guida#include <machine/asm.h>
39*b6cbf720SGianluca Guida
40*b6cbf720SGianluca Guida/* LINTSTUB: Func: void *memcpy(void *dst, const void *src, size_t len) */
41*b6cbf720SGianluca GuidaENTRY(memcpy)
42*b6cbf720SGianluca Guida	pld	[r1]
43*b6cbf720SGianluca Guida	cmp	r2, #0x0c
44*b6cbf720SGianluca Guida	ble	.Lmemcpy_short		/* <= 12 bytes */
45*b6cbf720SGianluca Guida	mov	r3, r0			/* We must not clobber r0 */
46*b6cbf720SGianluca Guida
47*b6cbf720SGianluca Guida	/* Word-align the destination buffer */
48*b6cbf720SGianluca Guida	ands	ip, r3, #0x03		/* Already word aligned? */
49*b6cbf720SGianluca Guida	beq	.Lmemcpy_wordaligned	/* Yup */
50*b6cbf720SGianluca Guida	cmp	ip, #0x02
51*b6cbf720SGianluca Guida	ldrb	ip, [r1], #0x01
52*b6cbf720SGianluca Guida	sub	r2, r2, #0x01
53*b6cbf720SGianluca Guida	strb	ip, [r3], #0x01
54*b6cbf720SGianluca Guida	ldrleb	ip, [r1], #0x01
55*b6cbf720SGianluca Guida	suble	r2, r2, #0x01
56*b6cbf720SGianluca Guida	strleb	ip, [r3], #0x01
57*b6cbf720SGianluca Guida	ldrltb	ip, [r1], #0x01
58*b6cbf720SGianluca Guida	sublt	r2, r2, #0x01
59*b6cbf720SGianluca Guida	strltb	ip, [r3], #0x01
60*b6cbf720SGianluca Guida
61*b6cbf720SGianluca Guida	/* Destination buffer is now word aligned */
62*b6cbf720SGianluca Guida.Lmemcpy_wordaligned:
63*b6cbf720SGianluca Guida	ands	ip, r1, #0x03		/* Is src also word-aligned? */
64*b6cbf720SGianluca Guida	bne	.Lmemcpy_bad_align	/* Nope. Things just got bad */
65*b6cbf720SGianluca Guida
66*b6cbf720SGianluca Guida	/* Quad-align the destination buffer */
67*b6cbf720SGianluca Guida	tst	r3, #0x07		/* Already quad aligned? */
68*b6cbf720SGianluca Guida	ldrne	ip, [r1], #0x04
69*b6cbf720SGianluca Guida	stmfd	sp!, {r4-r9}		/* Free up some registers */
70*b6cbf720SGianluca Guida	subne	r2, r2, #0x04
71*b6cbf720SGianluca Guida	strne	ip, [r3], #0x04
72*b6cbf720SGianluca Guida
73*b6cbf720SGianluca Guida	/* Destination buffer quad aligned, source is at least word aligned */
74*b6cbf720SGianluca Guida	subs	r2, r2, #0x80
75*b6cbf720SGianluca Guida	blt	.Lmemcpy_w_lessthan128
76*b6cbf720SGianluca Guida
77*b6cbf720SGianluca Guida	/* Copy 128 bytes at a time */
78*b6cbf720SGianluca Guida.Lmemcpy_w_loop128:
79*b6cbf720SGianluca Guida	ldr	r4, [r1], #0x04		/* LD:00-03 */
80*b6cbf720SGianluca Guida	ldr	r5, [r1], #0x04		/* LD:04-07 */
81*b6cbf720SGianluca Guida	pld	[r1, #0x18]		/* Prefetch 0x20 */
82*b6cbf720SGianluca Guida	ldr	r6, [r1], #0x04		/* LD:08-0b */
83*b6cbf720SGianluca Guida	ldr	r7, [r1], #0x04		/* LD:0c-0f */
84*b6cbf720SGianluca Guida	ldr	r8, [r1], #0x04		/* LD:10-13 */
85*b6cbf720SGianluca Guida	ldr	r9, [r1], #0x04		/* LD:14-17 */
86*b6cbf720SGianluca Guida	strd	r4, [r3], #0x08		/* ST:00-07 */
87*b6cbf720SGianluca Guida	ldr	r4, [r1], #0x04		/* LD:18-1b */
88*b6cbf720SGianluca Guida	ldr	r5, [r1], #0x04		/* LD:1c-1f */
89*b6cbf720SGianluca Guida	strd	r6, [r3], #0x08		/* ST:08-0f */
90*b6cbf720SGianluca Guida	ldr	r6, [r1], #0x04		/* LD:20-23 */
91*b6cbf720SGianluca Guida	ldr	r7, [r1], #0x04		/* LD:24-27 */
92*b6cbf720SGianluca Guida	pld	[r1, #0x18]		/* Prefetch 0x40 */
93*b6cbf720SGianluca Guida	strd	r8, [r3], #0x08		/* ST:10-17 */
94*b6cbf720SGianluca Guida	ldr	r8, [r1], #0x04		/* LD:28-2b */
95*b6cbf720SGianluca Guida	ldr	r9, [r1], #0x04		/* LD:2c-2f */
96*b6cbf720SGianluca Guida	strd	r4, [r3], #0x08		/* ST:18-1f */
97*b6cbf720SGianluca Guida	ldr	r4, [r1], #0x04		/* LD:30-33 */
98*b6cbf720SGianluca Guida	ldr	r5, [r1], #0x04		/* LD:34-37 */
99*b6cbf720SGianluca Guida	strd	r6, [r3], #0x08		/* ST:20-27 */
100*b6cbf720SGianluca Guida	ldr	r6, [r1], #0x04		/* LD:38-3b */
101*b6cbf720SGianluca Guida	ldr	r7, [r1], #0x04		/* LD:3c-3f */
102*b6cbf720SGianluca Guida	strd	r8, [r3], #0x08		/* ST:28-2f */
103*b6cbf720SGianluca Guida	ldr	r8, [r1], #0x04		/* LD:40-43 */
104*b6cbf720SGianluca Guida	ldr	r9, [r1], #0x04		/* LD:44-47 */
105*b6cbf720SGianluca Guida	pld	[r1, #0x18]		/* Prefetch 0x60 */
106*b6cbf720SGianluca Guida	strd	r4, [r3], #0x08		/* ST:30-37 */
107*b6cbf720SGianluca Guida	ldr	r4, [r1], #0x04		/* LD:48-4b */
108*b6cbf720SGianluca Guida	ldr	r5, [r1], #0x04		/* LD:4c-4f */
109*b6cbf720SGianluca Guida	strd	r6, [r3], #0x08		/* ST:38-3f */
110*b6cbf720SGianluca Guida	ldr	r6, [r1], #0x04		/* LD:50-53 */
111*b6cbf720SGianluca Guida	ldr	r7, [r1], #0x04		/* LD:54-57 */
112*b6cbf720SGianluca Guida	strd	r8, [r3], #0x08		/* ST:40-47 */
113*b6cbf720SGianluca Guida	ldr	r8, [r1], #0x04		/* LD:58-5b */
114*b6cbf720SGianluca Guida	ldr	r9, [r1], #0x04		/* LD:5c-5f */
115*b6cbf720SGianluca Guida	strd	r4, [r3], #0x08		/* ST:48-4f */
116*b6cbf720SGianluca Guida	ldr	r4, [r1], #0x04		/* LD:60-63 */
117*b6cbf720SGianluca Guida	ldr	r5, [r1], #0x04		/* LD:64-67 */
118*b6cbf720SGianluca Guida	pld	[r1, #0x18]		/* Prefetch 0x80 */
119*b6cbf720SGianluca Guida	strd	r6, [r3], #0x08		/* ST:50-57 */
120*b6cbf720SGianluca Guida	ldr	r6, [r1], #0x04		/* LD:68-6b */
121*b6cbf720SGianluca Guida	ldr	r7, [r1], #0x04		/* LD:6c-6f */
122*b6cbf720SGianluca Guida	strd	r8, [r3], #0x08		/* ST:58-5f */
123*b6cbf720SGianluca Guida	ldr	r8, [r1], #0x04		/* LD:70-73 */
124*b6cbf720SGianluca Guida	ldr	r9, [r1], #0x04		/* LD:74-77 */
125*b6cbf720SGianluca Guida	strd	r4, [r3], #0x08		/* ST:60-67 */
126*b6cbf720SGianluca Guida	ldr	r4, [r1], #0x04		/* LD:78-7b */
127*b6cbf720SGianluca Guida	ldr	r5, [r1], #0x04		/* LD:7c-7f */
128*b6cbf720SGianluca Guida	strd	r6, [r3], #0x08		/* ST:68-6f */
129*b6cbf720SGianluca Guida	strd	r8, [r3], #0x08		/* ST:70-77 */
130*b6cbf720SGianluca Guida	subs	r2, r2, #0x80
131*b6cbf720SGianluca Guida	strd	r4, [r3], #0x08		/* ST:78-7f */
132*b6cbf720SGianluca Guida	bge	.Lmemcpy_w_loop128
133*b6cbf720SGianluca Guida
134*b6cbf720SGianluca Guida.Lmemcpy_w_lessthan128:
135*b6cbf720SGianluca Guida	adds	r2, r2, #0x80		/* Adjust for extra sub */
136*b6cbf720SGianluca Guida	ldmeqfd	sp!, {r4-r9}
137*b6cbf720SGianluca Guida	bxeq	lr			/* Return now if done */
138*b6cbf720SGianluca Guida	subs	r2, r2, #0x20
139*b6cbf720SGianluca Guida	blt	.Lmemcpy_w_lessthan32
140*b6cbf720SGianluca Guida
141*b6cbf720SGianluca Guida	/* Copy 32 bytes at a time */
142*b6cbf720SGianluca Guida.Lmemcpy_w_loop32:
143*b6cbf720SGianluca Guida	ldr	r4, [r1], #0x04
144*b6cbf720SGianluca Guida	ldr	r5, [r1], #0x04
145*b6cbf720SGianluca Guida	pld	[r1, #0x18]
146*b6cbf720SGianluca Guida	ldr	r6, [r1], #0x04
147*b6cbf720SGianluca Guida	ldr	r7, [r1], #0x04
148*b6cbf720SGianluca Guida	ldr	r8, [r1], #0x04
149*b6cbf720SGianluca Guida	ldr	r9, [r1], #0x04
150*b6cbf720SGianluca Guida	strd	r4, [r3], #0x08
151*b6cbf720SGianluca Guida	ldr	r4, [r1], #0x04
152*b6cbf720SGianluca Guida	ldr	r5, [r1], #0x04
153*b6cbf720SGianluca Guida	strd	r6, [r3], #0x08
154*b6cbf720SGianluca Guida	strd	r8, [r3], #0x08
155*b6cbf720SGianluca Guida	subs	r2, r2, #0x20
156*b6cbf720SGianluca Guida	strd	r4, [r3], #0x08
157*b6cbf720SGianluca Guida	bge	.Lmemcpy_w_loop32
158*b6cbf720SGianluca Guida
159*b6cbf720SGianluca Guida.Lmemcpy_w_lessthan32:
160*b6cbf720SGianluca Guida	adds	r2, r2, #0x20		/* Adjust for extra sub */
161*b6cbf720SGianluca Guida	ldmeqfd	sp!, {r4-r9}
162*b6cbf720SGianluca Guida	bxeq	lr			/* Return now if done */
163*b6cbf720SGianluca Guida
164*b6cbf720SGianluca Guida	and	r4, r2, #0x18
165*b6cbf720SGianluca Guida	rsbs	r4, r4, #0x18
166*b6cbf720SGianluca Guida	addne	pc, pc, r4, lsl #1
167*b6cbf720SGianluca Guida	nop
168*b6cbf720SGianluca Guida
169*b6cbf720SGianluca Guida	/* At least 24 bytes remaining */
170*b6cbf720SGianluca Guida	ldr	r4, [r1], #0x04
171*b6cbf720SGianluca Guida	ldr	r5, [r1], #0x04
172*b6cbf720SGianluca Guida	sub	r2, r2, #0x08
173*b6cbf720SGianluca Guida	strd	r4, [r3], #0x08
174*b6cbf720SGianluca Guida
175*b6cbf720SGianluca Guida	/* At least 16 bytes remaining */
176*b6cbf720SGianluca Guida	ldr	r4, [r1], #0x04
177*b6cbf720SGianluca Guida	ldr	r5, [r1], #0x04
178*b6cbf720SGianluca Guida	sub	r2, r2, #0x08
179*b6cbf720SGianluca Guida	strd	r4, [r3], #0x08
180*b6cbf720SGianluca Guida
181*b6cbf720SGianluca Guida	/* At least 8 bytes remaining */
182*b6cbf720SGianluca Guida	ldr	r4, [r1], #0x04
183*b6cbf720SGianluca Guida	ldr	r5, [r1], #0x04
184*b6cbf720SGianluca Guida	subs	r2, r2, #0x08
185*b6cbf720SGianluca Guida	strd	r4, [r3], #0x08
186*b6cbf720SGianluca Guida
187*b6cbf720SGianluca Guida	/* Less than 8 bytes remaining */
188*b6cbf720SGianluca Guida	ldmfd	sp!, {r4-r9}
189*b6cbf720SGianluca Guida	bxeq	lr			/* Return now if done */
190*b6cbf720SGianluca Guida	subs	r2, r2, #0x04
191*b6cbf720SGianluca Guida	ldrge	ip, [r1], #0x04
192*b6cbf720SGianluca Guida	strge	ip, [r3], #0x04
193*b6cbf720SGianluca Guida	bxeq	lr			/* Return now if done */
194*b6cbf720SGianluca Guida	addlt	r2, r2, #0x04
195*b6cbf720SGianluca Guida	ldrb	ip, [r1], #0x01
196*b6cbf720SGianluca Guida	cmp	r2, #0x02
197*b6cbf720SGianluca Guida	ldrgeb	r2, [r1], #0x01
198*b6cbf720SGianluca Guida	strb	ip, [r3], #0x01
199*b6cbf720SGianluca Guida	ldrgtb	ip, [r1]
200*b6cbf720SGianluca Guida	strgeb	r2, [r3], #0x01
201*b6cbf720SGianluca Guida	strgtb	ip, [r3]
202*b6cbf720SGianluca Guida	bx	lr
203*b6cbf720SGianluca Guida
204*b6cbf720SGianluca Guida
205*b6cbf720SGianluca Guida/*
206*b6cbf720SGianluca Guida * At this point, it has not been possible to word align both buffers.
207*b6cbf720SGianluca Guida * The destination buffer is word aligned, but the source buffer is not.
208*b6cbf720SGianluca Guida */
209*b6cbf720SGianluca Guida.Lmemcpy_bad_align:
210*b6cbf720SGianluca Guida	stmfd	sp!, {r4-r7}
211*b6cbf720SGianluca Guida	bic	r1, r1, #0x03
212*b6cbf720SGianluca Guida	cmp	ip, #2
213*b6cbf720SGianluca Guida	ldr	ip, [r1], #0x04
214*b6cbf720SGianluca Guida	bgt	.Lmemcpy_bad3
215*b6cbf720SGianluca Guida	beq	.Lmemcpy_bad2
216*b6cbf720SGianluca Guida	b	.Lmemcpy_bad1
217*b6cbf720SGianluca Guida
218*b6cbf720SGianluca Guida.Lmemcpy_bad1_loop16:
219*b6cbf720SGianluca Guida#ifdef __ARMEB__
220*b6cbf720SGianluca Guida	mov	r4, ip, lsl #8
221*b6cbf720SGianluca Guida#else
222*b6cbf720SGianluca Guida	mov	r4, ip, lsr #8
223*b6cbf720SGianluca Guida#endif
224*b6cbf720SGianluca Guida	ldr	r5, [r1], #0x04
225*b6cbf720SGianluca Guida	pld	[r1, #0x018]
226*b6cbf720SGianluca Guida	ldr	r6, [r1], #0x04
227*b6cbf720SGianluca Guida	ldr	r7, [r1], #0x04
228*b6cbf720SGianluca Guida	ldr	ip, [r1], #0x04
229*b6cbf720SGianluca Guida#ifdef __ARMEB__
230*b6cbf720SGianluca Guida	orr	r4, r4, r5, lsr #24
231*b6cbf720SGianluca Guida	mov	r5, r5, lsl #8
232*b6cbf720SGianluca Guida	orr	r5, r5, r6, lsr #24
233*b6cbf720SGianluca Guida	mov	r6, r6, lsl #8
234*b6cbf720SGianluca Guida	orr	r6, r6, r7, lsr #24
235*b6cbf720SGianluca Guida	mov	r7, r7, lsl #8
236*b6cbf720SGianluca Guida	orr	r7, r7, ip, lsr #24
237*b6cbf720SGianluca Guida#else
238*b6cbf720SGianluca Guida	orr	r4, r4, r5, lsl #24
239*b6cbf720SGianluca Guida	mov	r5, r5, lsr #8
240*b6cbf720SGianluca Guida	orr	r5, r5, r6, lsl #24
241*b6cbf720SGianluca Guida	mov	r6, r6, lsr #8
242*b6cbf720SGianluca Guida	orr	r6, r6, r7, lsl #24
243*b6cbf720SGianluca Guida	mov	r7, r7, lsr #8
244*b6cbf720SGianluca Guida	orr	r7, r7, ip, lsl #24
245*b6cbf720SGianluca Guida#endif
246*b6cbf720SGianluca Guida	str	r4, [r3], #0x04
247*b6cbf720SGianluca Guida	str	r5, [r3], #0x04
248*b6cbf720SGianluca Guida	str	r6, [r3], #0x04
249*b6cbf720SGianluca Guida	str	r7, [r3], #0x04
250*b6cbf720SGianluca Guida	sub	r2, r2, #0x10
251*b6cbf720SGianluca Guida
252*b6cbf720SGianluca Guida.Lmemcpy_bad1:
253*b6cbf720SGianluca Guida	cmp	r2, #0x20
254*b6cbf720SGianluca Guida	bge	.Lmemcpy_bad1_loop16
255*b6cbf720SGianluca Guida	cmp	r2, #0x10
256*b6cbf720SGianluca Guida	blt	.Lmemcpy_bad1_loop16_short
257*b6cbf720SGianluca Guida
258*b6cbf720SGianluca Guida	/* copy last 16 bytes (without preload) */
259*b6cbf720SGianluca Guida#ifdef __ARMEB__
260*b6cbf720SGianluca Guida	mov	r4, ip, lsl #8
261*b6cbf720SGianluca Guida#else
262*b6cbf720SGianluca Guida	mov	r4, ip, lsr #8
263*b6cbf720SGianluca Guida#endif
264*b6cbf720SGianluca Guida	ldr	r5, [r1], #0x04
265*b6cbf720SGianluca Guida	ldr	r6, [r1], #0x04
266*b6cbf720SGianluca Guida	ldr	r7, [r1], #0x04
267*b6cbf720SGianluca Guida	ldr	ip, [r1], #0x04
268*b6cbf720SGianluca Guida#ifdef __ARMEB__
269*b6cbf720SGianluca Guida	orr	r4, r4, r5, lsr #24
270*b6cbf720SGianluca Guida	mov	r5, r5, lsl #8
271*b6cbf720SGianluca Guida	orr	r5, r5, r6, lsr #24
272*b6cbf720SGianluca Guida	mov	r6, r6, lsl #8
273*b6cbf720SGianluca Guida	orr	r6, r6, r7, lsr #24
274*b6cbf720SGianluca Guida	mov	r7, r7, lsl #8
275*b6cbf720SGianluca Guida	orr	r7, r7, ip, lsr #24
276*b6cbf720SGianluca Guida#else
277*b6cbf720SGianluca Guida	orr	r4, r4, r5, lsl #24
278*b6cbf720SGianluca Guida	mov	r5, r5, lsr #8
279*b6cbf720SGianluca Guida	orr	r5, r5, r6, lsl #24
280*b6cbf720SGianluca Guida	mov	r6, r6, lsr #8
281*b6cbf720SGianluca Guida	orr	r6, r6, r7, lsl #24
282*b6cbf720SGianluca Guida	mov	r7, r7, lsr #8
283*b6cbf720SGianluca Guida	orr	r7, r7, ip, lsl #24
284*b6cbf720SGianluca Guida#endif
285*b6cbf720SGianluca Guida	str	r4, [r3], #0x04
286*b6cbf720SGianluca Guida	str	r5, [r3], #0x04
287*b6cbf720SGianluca Guida	str	r6, [r3], #0x04
288*b6cbf720SGianluca Guida	str	r7, [r3], #0x04
289*b6cbf720SGianluca Guida	subs	r2, r2, #0x10
290*b6cbf720SGianluca Guida	ldmeqfd	sp!, {r4-r7}
291*b6cbf720SGianluca Guida	bxeq	lr			/* Return now if done */
292*b6cbf720SGianluca Guida
293*b6cbf720SGianluca Guida.Lmemcpy_bad1_loop16_short:
294*b6cbf720SGianluca Guida	subs	r2, r2, #0x04
295*b6cbf720SGianluca Guida	sublt	r1, r1, #0x03
296*b6cbf720SGianluca Guida	blt	.Lmemcpy_bad_done
297*b6cbf720SGianluca Guida
298*b6cbf720SGianluca Guida.Lmemcpy_bad1_loop4:
299*b6cbf720SGianluca Guida#ifdef __ARMEB__
300*b6cbf720SGianluca Guida	mov	r4, ip, lsl #8
301*b6cbf720SGianluca Guida#else
302*b6cbf720SGianluca Guida	mov	r4, ip, lsr #8
303*b6cbf720SGianluca Guida#endif
304*b6cbf720SGianluca Guida	ldr	ip, [r1], #0x04
305*b6cbf720SGianluca Guida	subs	r2, r2, #0x04
306*b6cbf720SGianluca Guida#ifdef __ARMEB__
307*b6cbf720SGianluca Guida	orr	r4, r4, ip, lsr #24
308*b6cbf720SGianluca Guida#else
309*b6cbf720SGianluca Guida	orr	r4, r4, ip, lsl #24
310*b6cbf720SGianluca Guida#endif
311*b6cbf720SGianluca Guida	str	r4, [r3], #0x04
312*b6cbf720SGianluca Guida	bge	.Lmemcpy_bad1_loop4
313*b6cbf720SGianluca Guida	sub	r1, r1, #0x03
314*b6cbf720SGianluca Guida	b	.Lmemcpy_bad_done
315*b6cbf720SGianluca Guida
316*b6cbf720SGianluca Guida.Lmemcpy_bad2_loop16:
317*b6cbf720SGianluca Guida#ifdef __ARMEB__
318*b6cbf720SGianluca Guida	mov	r4, ip, lsl #16
319*b6cbf720SGianluca Guida#else
320*b6cbf720SGianluca Guida	mov	r4, ip, lsr #16
321*b6cbf720SGianluca Guida#endif
322*b6cbf720SGianluca Guida	ldr	r5, [r1], #0x04
323*b6cbf720SGianluca Guida	pld	[r1, #0x018]
324*b6cbf720SGianluca Guida	ldr	r6, [r1], #0x04
325*b6cbf720SGianluca Guida	ldr	r7, [r1], #0x04
326*b6cbf720SGianluca Guida	ldr	ip, [r1], #0x04
327*b6cbf720SGianluca Guida#ifdef __ARMEB__
328*b6cbf720SGianluca Guida	orr	r4, r4, r5, lsr #16
329*b6cbf720SGianluca Guida	mov	r5, r5, lsl #16
330*b6cbf720SGianluca Guida	orr	r5, r5, r6, lsr #16
331*b6cbf720SGianluca Guida	mov	r6, r6, lsl #16
332*b6cbf720SGianluca Guida	orr	r6, r6, r7, lsr #16
333*b6cbf720SGianluca Guida	mov	r7, r7, lsl #16
334*b6cbf720SGianluca Guida	orr	r7, r7, ip, lsr #16
335*b6cbf720SGianluca Guida#else
336*b6cbf720SGianluca Guida	orr	r4, r4, r5, lsl #16
337*b6cbf720SGianluca Guida	mov	r5, r5, lsr #16
338*b6cbf720SGianluca Guida	orr	r5, r5, r6, lsl #16
339*b6cbf720SGianluca Guida	mov	r6, r6, lsr #16
340*b6cbf720SGianluca Guida	orr	r6, r6, r7, lsl #16
341*b6cbf720SGianluca Guida	mov	r7, r7, lsr #16
342*b6cbf720SGianluca Guida	orr	r7, r7, ip, lsl #16
343*b6cbf720SGianluca Guida#endif
344*b6cbf720SGianluca Guida	str	r4, [r3], #0x04
345*b6cbf720SGianluca Guida	str	r5, [r3], #0x04
346*b6cbf720SGianluca Guida	str	r6, [r3], #0x04
347*b6cbf720SGianluca Guida	str	r7, [r3], #0x04
348*b6cbf720SGianluca Guida	sub	r2, r2, #0x10
349*b6cbf720SGianluca Guida
350*b6cbf720SGianluca Guida.Lmemcpy_bad2:
351*b6cbf720SGianluca Guida	cmp	r2, #0x20
352*b6cbf720SGianluca Guida	bge	.Lmemcpy_bad2_loop16
353*b6cbf720SGianluca Guida	cmp	r2, #0x10
354*b6cbf720SGianluca Guida	blt	.Lmemcpy_bad2_loop16_short
355*b6cbf720SGianluca Guida
356*b6cbf720SGianluca Guida	/* copy last 16 bytes (without preload) */
357*b6cbf720SGianluca Guida#ifdef __ARMEB__
358*b6cbf720SGianluca Guida	mov	r4, ip, lsl #16
359*b6cbf720SGianluca Guida#else
360*b6cbf720SGianluca Guida	mov	r4, ip, lsr #16
361*b6cbf720SGianluca Guida#endif
362*b6cbf720SGianluca Guida	ldr	r5, [r1], #0x04
363*b6cbf720SGianluca Guida	ldr	r6, [r1], #0x04
364*b6cbf720SGianluca Guida	ldr	r7, [r1], #0x04
365*b6cbf720SGianluca Guida	ldr	ip, [r1], #0x04
366*b6cbf720SGianluca Guida#ifdef __ARMEB__
367*b6cbf720SGianluca Guida	orr	r4, r4, r5, lsr #16
368*b6cbf720SGianluca Guida	mov	r5, r5, lsl #16
369*b6cbf720SGianluca Guida	orr	r5, r5, r6, lsr #16
370*b6cbf720SGianluca Guida	mov	r6, r6, lsl #16
371*b6cbf720SGianluca Guida	orr	r6, r6, r7, lsr #16
372*b6cbf720SGianluca Guida	mov	r7, r7, lsl #16
373*b6cbf720SGianluca Guida	orr	r7, r7, ip, lsr #16
374*b6cbf720SGianluca Guida#else
375*b6cbf720SGianluca Guida	orr	r4, r4, r5, lsl #16
376*b6cbf720SGianluca Guida	mov	r5, r5, lsr #16
377*b6cbf720SGianluca Guida	orr	r5, r5, r6, lsl #16
378*b6cbf720SGianluca Guida	mov	r6, r6, lsr #16
379*b6cbf720SGianluca Guida	orr	r6, r6, r7, lsl #16
380*b6cbf720SGianluca Guida	mov	r7, r7, lsr #16
381*b6cbf720SGianluca Guida	orr	r7, r7, ip, lsl #16
382*b6cbf720SGianluca Guida#endif
383*b6cbf720SGianluca Guida	str	r4, [r3], #0x04
384*b6cbf720SGianluca Guida	str	r5, [r3], #0x04
385*b6cbf720SGianluca Guida	str	r6, [r3], #0x04
386*b6cbf720SGianluca Guida	str	r7, [r3], #0x04
387*b6cbf720SGianluca Guida	subs	r2, r2, #0x10
388*b6cbf720SGianluca Guida	ldmeqfd	sp!, {r4-r7}
389*b6cbf720SGianluca Guida	bxeq	lr			/* Return now if done */
390*b6cbf720SGianluca Guida
391*b6cbf720SGianluca Guida.Lmemcpy_bad2_loop16_short:
392*b6cbf720SGianluca Guida	subs	r2, r2, #0x04
393*b6cbf720SGianluca Guida	sublt	r1, r1, #0x02
394*b6cbf720SGianluca Guida	blt	.Lmemcpy_bad_done
395*b6cbf720SGianluca Guida
396*b6cbf720SGianluca Guida.Lmemcpy_bad2_loop4:
397*b6cbf720SGianluca Guida#ifdef __ARMEB__
398*b6cbf720SGianluca Guida	mov	r4, ip, lsl #16
399*b6cbf720SGianluca Guida#else
400*b6cbf720SGianluca Guida	mov	r4, ip, lsr #16
401*b6cbf720SGianluca Guida#endif
402*b6cbf720SGianluca Guida	ldr	ip, [r1], #0x04
403*b6cbf720SGianluca Guida	subs	r2, r2, #0x04
404*b6cbf720SGianluca Guida#ifdef __ARMEB__
405*b6cbf720SGianluca Guida	orr	r4, r4, ip, lsr #16
406*b6cbf720SGianluca Guida#else
407*b6cbf720SGianluca Guida	orr	r4, r4, ip, lsl #16
408*b6cbf720SGianluca Guida#endif
409*b6cbf720SGianluca Guida	str	r4, [r3], #0x04
410*b6cbf720SGianluca Guida	bge	.Lmemcpy_bad2_loop4
411*b6cbf720SGianluca Guida	sub	r1, r1, #0x02
412*b6cbf720SGianluca Guida	b	.Lmemcpy_bad_done
413*b6cbf720SGianluca Guida
414*b6cbf720SGianluca Guida.Lmemcpy_bad3_loop16:
415*b6cbf720SGianluca Guida#ifdef __ARMEB__
416*b6cbf720SGianluca Guida	mov	r4, ip, lsl #24
417*b6cbf720SGianluca Guida#else
418*b6cbf720SGianluca Guida	mov	r4, ip, lsr #24
419*b6cbf720SGianluca Guida#endif
420*b6cbf720SGianluca Guida	ldr	r5, [r1], #0x04
421*b6cbf720SGianluca Guida	pld	[r1, #0x018]
422*b6cbf720SGianluca Guida	ldr	r6, [r1], #0x04
423*b6cbf720SGianluca Guida	ldr	r7, [r1], #0x04
424*b6cbf720SGianluca Guida	ldr	ip, [r1], #0x04
425*b6cbf720SGianluca Guida#ifdef __ARMEB__
426*b6cbf720SGianluca Guida	orr	r4, r4, r5, lsr #8
427*b6cbf720SGianluca Guida	mov	r5, r5, lsl #24
428*b6cbf720SGianluca Guida	orr	r5, r5, r6, lsr #8
429*b6cbf720SGianluca Guida	mov	r6, r6, lsl #24
430*b6cbf720SGianluca Guida	orr	r6, r6, r7, lsr #8
431*b6cbf720SGianluca Guida	mov	r7, r7, lsl #24
432*b6cbf720SGianluca Guida	orr	r7, r7, ip, lsr #8
433*b6cbf720SGianluca Guida#else
434*b6cbf720SGianluca Guida	orr	r4, r4, r5, lsl #8
435*b6cbf720SGianluca Guida	mov	r5, r5, lsr #24
436*b6cbf720SGianluca Guida	orr	r5, r5, r6, lsl #8
437*b6cbf720SGianluca Guida	mov	r6, r6, lsr #24
438*b6cbf720SGianluca Guida	orr	r6, r6, r7, lsl #8
439*b6cbf720SGianluca Guida	mov	r7, r7, lsr #24
440*b6cbf720SGianluca Guida	orr	r7, r7, ip, lsl #8
441*b6cbf720SGianluca Guida#endif
442*b6cbf720SGianluca Guida	str	r4, [r3], #0x04
443*b6cbf720SGianluca Guida	str	r5, [r3], #0x04
444*b6cbf720SGianluca Guida	str	r6, [r3], #0x04
445*b6cbf720SGianluca Guida	str	r7, [r3], #0x04
446*b6cbf720SGianluca Guida	sub	r2, r2, #0x10
447*b6cbf720SGianluca Guida
448*b6cbf720SGianluca Guida.Lmemcpy_bad3:
449*b6cbf720SGianluca Guida	cmp	r2, #0x20
450*b6cbf720SGianluca Guida	bge	.Lmemcpy_bad3_loop16
451*b6cbf720SGianluca Guida	cmp	r2, #0x10
452*b6cbf720SGianluca Guida	blt	.Lmemcpy_bad3_loop16_short
453*b6cbf720SGianluca Guida
454*b6cbf720SGianluca Guida	/* copy last 16 bytes (without preload) */
455*b6cbf720SGianluca Guida#ifdef __ARMEB__
456*b6cbf720SGianluca Guida	mov	r4, ip, lsl #24
457*b6cbf720SGianluca Guida#else
458*b6cbf720SGianluca Guida	mov	r4, ip, lsr #24
459*b6cbf720SGianluca Guida#endif
460*b6cbf720SGianluca Guida	ldr	r5, [r1], #0x04
461*b6cbf720SGianluca Guida	ldr	r6, [r1], #0x04
462*b6cbf720SGianluca Guida	ldr	r7, [r1], #0x04
463*b6cbf720SGianluca Guida	ldr	ip, [r1], #0x04
464*b6cbf720SGianluca Guida#ifdef __ARMEB__
465*b6cbf720SGianluca Guida	orr	r4, r4, r5, lsr #8
466*b6cbf720SGianluca Guida	mov	r5, r5, lsl #24
467*b6cbf720SGianluca Guida	orr	r5, r5, r6, lsr #8
468*b6cbf720SGianluca Guida	mov	r6, r6, lsl #24
469*b6cbf720SGianluca Guida	orr	r6, r6, r7, lsr #8
470*b6cbf720SGianluca Guida	mov	r7, r7, lsl #24
471*b6cbf720SGianluca Guida	orr	r7, r7, ip, lsr #8
472*b6cbf720SGianluca Guida#else
473*b6cbf720SGianluca Guida	orr	r4, r4, r5, lsl #8
474*b6cbf720SGianluca Guida	mov	r5, r5, lsr #24
475*b6cbf720SGianluca Guida	orr	r5, r5, r6, lsl #8
476*b6cbf720SGianluca Guida	mov	r6, r6, lsr #24
477*b6cbf720SGianluca Guida	orr	r6, r6, r7, lsl #8
478*b6cbf720SGianluca Guida	mov	r7, r7, lsr #24
479*b6cbf720SGianluca Guida	orr	r7, r7, ip, lsl #8
480*b6cbf720SGianluca Guida#endif
481*b6cbf720SGianluca Guida	str	r4, [r3], #0x04
482*b6cbf720SGianluca Guida	str	r5, [r3], #0x04
483*b6cbf720SGianluca Guida	str	r6, [r3], #0x04
484*b6cbf720SGianluca Guida	str	r7, [r3], #0x04
485*b6cbf720SGianluca Guida	subs	r2, r2, #0x10
486*b6cbf720SGianluca Guida	ldmeqfd	sp!, {r4-r7}
487*b6cbf720SGianluca Guida	bxeq	lr			/* Return now if done */
488*b6cbf720SGianluca Guida
489*b6cbf720SGianluca Guida.Lmemcpy_bad3_loop16_short:
490*b6cbf720SGianluca Guida	subs	r2, r2, #0x04
491*b6cbf720SGianluca Guida	sublt	r1, r1, #0x01
492*b6cbf720SGianluca Guida	blt	.Lmemcpy_bad_done
493*b6cbf720SGianluca Guida
494*b6cbf720SGianluca Guida.Lmemcpy_bad3_loop4:
495*b6cbf720SGianluca Guida#ifdef __ARMEB__
496*b6cbf720SGianluca Guida	mov	r4, ip, lsl #24
497*b6cbf720SGianluca Guida#else
498*b6cbf720SGianluca Guida	mov	r4, ip, lsr #24
499*b6cbf720SGianluca Guida#endif
500*b6cbf720SGianluca Guida	ldr	ip, [r1], #0x04
501*b6cbf720SGianluca Guida	subs	r2, r2, #0x04
502*b6cbf720SGianluca Guida#ifdef __ARMEB__
503*b6cbf720SGianluca Guida	orr	r4, r4, ip, lsr #8
504*b6cbf720SGianluca Guida#else
505*b6cbf720SGianluca Guida	orr	r4, r4, ip, lsl #8
506*b6cbf720SGianluca Guida#endif
507*b6cbf720SGianluca Guida	str	r4, [r3], #0x04
508*b6cbf720SGianluca Guida	bge	.Lmemcpy_bad3_loop4
509*b6cbf720SGianluca Guida	sub	r1, r1, #0x01
510*b6cbf720SGianluca Guida
511*b6cbf720SGianluca Guida.Lmemcpy_bad_done:
512*b6cbf720SGianluca Guida	ldmfd	sp!, {r4-r7}
513*b6cbf720SGianluca Guida	adds	r2, r2, #0x04
514*b6cbf720SGianluca Guida	bxeq	lr
515*b6cbf720SGianluca Guida	ldrb	ip, [r1], #0x01
516*b6cbf720SGianluca Guida	cmp	r2, #0x02
517*b6cbf720SGianluca Guida	ldrgeb	r2, [r1], #0x01
518*b6cbf720SGianluca Guida	strb	ip, [r3], #0x01
519*b6cbf720SGianluca Guida	ldrgtb	ip, [r1]
520*b6cbf720SGianluca Guida	strgeb	r2, [r3], #0x01
521*b6cbf720SGianluca Guida	strgtb	ip, [r3]
522*b6cbf720SGianluca Guida	bx	lr
523*b6cbf720SGianluca Guida
524*b6cbf720SGianluca Guida
525*b6cbf720SGianluca Guida/*
526*b6cbf720SGianluca Guida * Handle short copies (less than 16 bytes), possibly misaligned.
527*b6cbf720SGianluca Guida * Some of these are *very* common, thanks to the network stack,
528*b6cbf720SGianluca Guida * and so are handled specially.
529*b6cbf720SGianluca Guida */
530*b6cbf720SGianluca Guida.Lmemcpy_short:
531*b6cbf720SGianluca Guida#ifndef _STANDALONE
532*b6cbf720SGianluca Guida	add	pc, pc, r2, lsl #2
533*b6cbf720SGianluca Guida	nop
534*b6cbf720SGianluca Guida	bx	lr			/* 0x00 */
535*b6cbf720SGianluca Guida	b	.Lmemcpy_bytewise	/* 0x01 */
536*b6cbf720SGianluca Guida	b	.Lmemcpy_bytewise	/* 0x02 */
537*b6cbf720SGianluca Guida	b	.Lmemcpy_bytewise	/* 0x03 */
538*b6cbf720SGianluca Guida	b	.Lmemcpy_4		/* 0x04 */
539*b6cbf720SGianluca Guida	b	.Lmemcpy_bytewise	/* 0x05 */
540*b6cbf720SGianluca Guida	b	.Lmemcpy_6		/* 0x06 */
541*b6cbf720SGianluca Guida	b	.Lmemcpy_bytewise	/* 0x07 */
542*b6cbf720SGianluca Guida	b	.Lmemcpy_8		/* 0x08 */
543*b6cbf720SGianluca Guida	b	.Lmemcpy_bytewise	/* 0x09 */
544*b6cbf720SGianluca Guida	b	.Lmemcpy_bytewise	/* 0x0a */
545*b6cbf720SGianluca Guida	b	.Lmemcpy_bytewise	/* 0x0b */
546*b6cbf720SGianluca Guida	b	.Lmemcpy_c		/* 0x0c */
547*b6cbf720SGianluca Guida#endif
548*b6cbf720SGianluca Guida.Lmemcpy_bytewise:
549*b6cbf720SGianluca Guida	mov	r3, r0			/* We must not clobber r0 */
550*b6cbf720SGianluca Guida	ldrb	ip, [r1], #0x01
551*b6cbf720SGianluca Guida1:	subs	r2, r2, #0x01
552*b6cbf720SGianluca Guida	strb	ip, [r3], #0x01
553*b6cbf720SGianluca Guida	ldrneb	ip, [r1], #0x01
554*b6cbf720SGianluca Guida	bne	1b
555*b6cbf720SGianluca Guida	bx	lr
556*b6cbf720SGianluca Guida
557*b6cbf720SGianluca Guida#ifndef _STANDALONE
558*b6cbf720SGianluca Guida/******************************************************************************
559*b6cbf720SGianluca Guida * Special case for 4 byte copies
560*b6cbf720SGianluca Guida */
561*b6cbf720SGianluca Guida#define	LMEMCPY_4_LOG2	6	/* 64 bytes */
562*b6cbf720SGianluca Guida#define	LMEMCPY_4_PAD	.align LMEMCPY_4_LOG2
563*b6cbf720SGianluca Guida	LMEMCPY_4_PAD
564*b6cbf720SGianluca Guida.Lmemcpy_4:
565*b6cbf720SGianluca Guida	and	r2, r1, #0x03
566*b6cbf720SGianluca Guida	orr	r2, r2, r0, lsl #2
567*b6cbf720SGianluca Guida	ands	r2, r2, #0x0f
568*b6cbf720SGianluca Guida	sub	r3, pc, #0x14
569*b6cbf720SGianluca Guida	addne	pc, r3, r2, lsl #LMEMCPY_4_LOG2
570*b6cbf720SGianluca Guida
571*b6cbf720SGianluca Guida/*
572*b6cbf720SGianluca Guida * 0000: dst is 32-bit aligned, src is 32-bit aligned
573*b6cbf720SGianluca Guida */
574*b6cbf720SGianluca Guida	ldr	r2, [r1]
575*b6cbf720SGianluca Guida	str	r2, [r0]
576*b6cbf720SGianluca Guida	bx	lr
577*b6cbf720SGianluca Guida	LMEMCPY_4_PAD
578*b6cbf720SGianluca Guida
579*b6cbf720SGianluca Guida/*
580*b6cbf720SGianluca Guida * 0001: dst is 32-bit aligned, src is 8-bit aligned
581*b6cbf720SGianluca Guida */
582*b6cbf720SGianluca Guida	ldr	r3, [r1, #-1]		/* BE:r3 = x012  LE:r3 = 210x */
583*b6cbf720SGianluca Guida	ldr	r2, [r1, #3]		/* BE:r2 = 3xxx  LE:r2 = xxx3 */
584*b6cbf720SGianluca Guida#ifdef __ARMEB__
585*b6cbf720SGianluca Guida	mov	r3, r3, lsl #8		/* r3 = 012. */
586*b6cbf720SGianluca Guida	orr	r3, r3, r2, lsr #24	/* r3 = 0123 */
587*b6cbf720SGianluca Guida#else
588*b6cbf720SGianluca Guida	mov	r3, r3, lsr #8		/* r3 = .210 */
589*b6cbf720SGianluca Guida	orr	r3, r3, r2, lsl #24	/* r3 = 3210 */
590*b6cbf720SGianluca Guida#endif
591*b6cbf720SGianluca Guida	str	r3, [r0]
592*b6cbf720SGianluca Guida	bx	lr
593*b6cbf720SGianluca Guida	LMEMCPY_4_PAD
594*b6cbf720SGianluca Guida
595*b6cbf720SGianluca Guida/*
596*b6cbf720SGianluca Guida * 0010: dst is 32-bit aligned, src is 16-bit aligned
597*b6cbf720SGianluca Guida */
598*b6cbf720SGianluca Guida#ifdef __ARMEB__
599*b6cbf720SGianluca Guida	ldrh	r3, [r1]
600*b6cbf720SGianluca Guida	ldrh	r2, [r1, #0x02]
601*b6cbf720SGianluca Guida#else
602*b6cbf720SGianluca Guida	ldrh	r3, [r1, #0x02]
603*b6cbf720SGianluca Guida	ldrh	r2, [r1]
604*b6cbf720SGianluca Guida#endif
605*b6cbf720SGianluca Guida	orr	r3, r2, r3, lsl #16
606*b6cbf720SGianluca Guida	str	r3, [r0]
607*b6cbf720SGianluca Guida	bx	lr
608*b6cbf720SGianluca Guida	LMEMCPY_4_PAD
609*b6cbf720SGianluca Guida
610*b6cbf720SGianluca Guida/*
611*b6cbf720SGianluca Guida * 0011: dst is 32-bit aligned, src is 8-bit aligned
612*b6cbf720SGianluca Guida */
613*b6cbf720SGianluca Guida	ldr	r3, [r1, #-3]		/* BE:r3 = xxx0  LE:r3 = 0xxx */
614*b6cbf720SGianluca Guida	ldr	r2, [r1, #1]		/* BE:r2 = 123x  LE:r2 = x321 */
615*b6cbf720SGianluca Guida#ifdef __ARMEB__
616*b6cbf720SGianluca Guida	mov	r3, r3, lsl #24		/* r3 = 0... */
617*b6cbf720SGianluca Guida	orr	r3, r3, r2, lsr #8	/* r3 = 0123 */
618*b6cbf720SGianluca Guida#else
619*b6cbf720SGianluca Guida	mov	r3, r3, lsr #24		/* r3 = ...0 */
620*b6cbf720SGianluca Guida	orr	r3, r3, r2, lsl #8	/* r3 = 3210 */
621*b6cbf720SGianluca Guida#endif
622*b6cbf720SGianluca Guida	str	r3, [r0]
623*b6cbf720SGianluca Guida	bx	lr
624*b6cbf720SGianluca Guida	LMEMCPY_4_PAD
625*b6cbf720SGianluca Guida
626*b6cbf720SGianluca Guida/*
627*b6cbf720SGianluca Guida * 0100: dst is 8-bit aligned, src is 32-bit aligned
628*b6cbf720SGianluca Guida */
629*b6cbf720SGianluca Guida	ldr	r2, [r1]
630*b6cbf720SGianluca Guida#ifdef __ARMEB__
631*b6cbf720SGianluca Guida	strb	r2, [r0, #0x03]
632*b6cbf720SGianluca Guida	mov	r3, r2, lsr #8
633*b6cbf720SGianluca Guida	mov	r1, r2, lsr #24
634*b6cbf720SGianluca Guida	strb	r1, [r0]
635*b6cbf720SGianluca Guida#else
636*b6cbf720SGianluca Guida	strb	r2, [r0]
637*b6cbf720SGianluca Guida	mov	r3, r2, lsr #8
638*b6cbf720SGianluca Guida	mov	r1, r2, lsr #24
639*b6cbf720SGianluca Guida	strb	r1, [r0, #0x03]
640*b6cbf720SGianluca Guida#endif
641*b6cbf720SGianluca Guida	strh	r3, [r0, #0x01]
642*b6cbf720SGianluca Guida	bx	lr
643*b6cbf720SGianluca Guida	LMEMCPY_4_PAD
644*b6cbf720SGianluca Guida
645*b6cbf720SGianluca Guida/*
646*b6cbf720SGianluca Guida * 0101: dst is 8-bit aligned, src is 8-bit aligned
647*b6cbf720SGianluca Guida */
648*b6cbf720SGianluca Guida	ldrb	r2, [r1]
649*b6cbf720SGianluca Guida	ldrh	r3, [r1, #0x01]
650*b6cbf720SGianluca Guida	ldrb	r1, [r1, #0x03]
651*b6cbf720SGianluca Guida	strb	r2, [r0]
652*b6cbf720SGianluca Guida	strh	r3, [r0, #0x01]
653*b6cbf720SGianluca Guida	strb	r1, [r0, #0x03]
654*b6cbf720SGianluca Guida	bx	lr
655*b6cbf720SGianluca Guida	LMEMCPY_4_PAD
656*b6cbf720SGianluca Guida
657*b6cbf720SGianluca Guida/*
658*b6cbf720SGianluca Guida * 0110: dst is 8-bit aligned, src is 16-bit aligned
659*b6cbf720SGianluca Guida */
660*b6cbf720SGianluca Guida	ldrh	r2, [r1]		/* BE:r2 = ..01  LE:r2 = ..10 */
661*b6cbf720SGianluca Guida	ldrh	r3, [r1, #0x02]		/* LE:r3 = ..23  LE:r3 = ..32 */
662*b6cbf720SGianluca Guida#ifdef __ARMEB__
663*b6cbf720SGianluca Guida	mov	r1, r2, lsr #8		/* r1 = ...0 */
664*b6cbf720SGianluca Guida	strb	r1, [r0]
665*b6cbf720SGianluca Guida	mov	r2, r2, lsl #8		/* r2 = .01. */
666*b6cbf720SGianluca Guida	orr	r2, r2, r3, lsr #8	/* r2 = .012 */
667*b6cbf720SGianluca Guida#else
668*b6cbf720SGianluca Guida	strb	r2, [r0]
669*b6cbf720SGianluca Guida	mov	r2, r2, lsr #8		/* r2 = ...1 */
670*b6cbf720SGianluca Guida	orr	r2, r2, r3, lsl #8	/* r2 = .321 */
671*b6cbf720SGianluca Guida	mov	r3, r3, lsr #8		/* r3 = ...3 */
672*b6cbf720SGianluca Guida#endif
673*b6cbf720SGianluca Guida	strh	r2, [r0, #0x01]
674*b6cbf720SGianluca Guida	strb	r3, [r0, #0x03]
675*b6cbf720SGianluca Guida	bx	lr
676*b6cbf720SGianluca Guida	LMEMCPY_4_PAD
677*b6cbf720SGianluca Guida
678*b6cbf720SGianluca Guida/*
679*b6cbf720SGianluca Guida * 0111: dst is 8-bit aligned, src is 8-bit aligned
680*b6cbf720SGianluca Guida */
681*b6cbf720SGianluca Guida	ldrb	r2, [r1]
682*b6cbf720SGianluca Guida	ldrh	r3, [r1, #0x01]
683*b6cbf720SGianluca Guida	ldrb	r1, [r1, #0x03]
684*b6cbf720SGianluca Guida	strb	r2, [r0]
685*b6cbf720SGianluca Guida	strh	r3, [r0, #0x01]
686*b6cbf720SGianluca Guida	strb	r1, [r0, #0x03]
687*b6cbf720SGianluca Guida	bx	lr
688*b6cbf720SGianluca Guida	LMEMCPY_4_PAD
689*b6cbf720SGianluca Guida
690*b6cbf720SGianluca Guida/*
691*b6cbf720SGianluca Guida * 1000: dst is 16-bit aligned, src is 32-bit aligned
692*b6cbf720SGianluca Guida */
693*b6cbf720SGianluca Guida	ldr	r2, [r1]
694*b6cbf720SGianluca Guida#ifdef __ARMEB__
695*b6cbf720SGianluca Guida	strh	r2, [r0, #0x02]
696*b6cbf720SGianluca Guida	mov	r3, r2, lsr #16
697*b6cbf720SGianluca Guida	strh	r3, [r0]
698*b6cbf720SGianluca Guida#else
699*b6cbf720SGianluca Guida	strh	r2, [r0]
700*b6cbf720SGianluca Guida	mov	r3, r2, lsr #16
701*b6cbf720SGianluca Guida	strh	r3, [r0, #0x02]
702*b6cbf720SGianluca Guida#endif
703*b6cbf720SGianluca Guida	bx	lr
704*b6cbf720SGianluca Guida	LMEMCPY_4_PAD
705*b6cbf720SGianluca Guida
706*b6cbf720SGianluca Guida/*
707*b6cbf720SGianluca Guida * 1001: dst is 16-bit aligned, src is 8-bit aligned
708*b6cbf720SGianluca Guida */
709*b6cbf720SGianluca Guida	ldr	r2, [r1, #-1]		/* BE:r2 = x012  LE:r2 = 210x */
710*b6cbf720SGianluca Guida	ldr	r3, [r1, #3]		/* BE:r3 = 3xxx  LE:r3 = xxx3 */
711*b6cbf720SGianluca Guida	mov	r1, r2, lsr #8		/* BE:r1 = .x01  LE:r1 = .210 */
712*b6cbf720SGianluca Guida	strh	r1, [r0]
713*b6cbf720SGianluca Guida#ifdef __ARMEB__
714*b6cbf720SGianluca Guida	mov	r2, r2, lsl #8		/* r2 = 012. */
715*b6cbf720SGianluca Guida	orr	r2, r2, r3, lsr #24	/* r2 = 0123 */
716*b6cbf720SGianluca Guida#else
717*b6cbf720SGianluca Guida	mov	r2, r2, lsr #24		/* r2 = ...2 */
718*b6cbf720SGianluca Guida	orr	r2, r2, r3, lsl #8	/* r2 = xx32 */
719*b6cbf720SGianluca Guida#endif
720*b6cbf720SGianluca Guida	strh	r2, [r0, #0x02]
721*b6cbf720SGianluca Guida	bx	lr
722*b6cbf720SGianluca Guida	LMEMCPY_4_PAD
723*b6cbf720SGianluca Guida
724*b6cbf720SGianluca Guida/*
725*b6cbf720SGianluca Guida * 1010: dst is 16-bit aligned, src is 16-bit aligned
726*b6cbf720SGianluca Guida */
727*b6cbf720SGianluca Guida	ldrh	r2, [r1]
728*b6cbf720SGianluca Guida	ldrh	r3, [r1, #0x02]
729*b6cbf720SGianluca Guida	strh	r2, [r0]
730*b6cbf720SGianluca Guida	strh	r3, [r0, #0x02]
731*b6cbf720SGianluca Guida	bx	lr
732*b6cbf720SGianluca Guida	LMEMCPY_4_PAD
733*b6cbf720SGianluca Guida
734*b6cbf720SGianluca Guida/*
735*b6cbf720SGianluca Guida * 1011: dst is 16-bit aligned, src is 8-bit aligned
736*b6cbf720SGianluca Guida */
737*b6cbf720SGianluca Guida	ldr	r3, [r1, #1]		/* BE:r3 = 123x  LE:r3 = x321 */
738*b6cbf720SGianluca Guida	ldr	r2, [r1, #-3]		/* BE:r2 = xxx0  LE:r2 = 0xxx */
739*b6cbf720SGianluca Guida	mov	r1, r3, lsr #8		/* BE:r1 = .123  LE:r1 = .x32 */
740*b6cbf720SGianluca Guida	strh	r1, [r0, #0x02]
741*b6cbf720SGianluca Guida#ifdef __ARMEB__
742*b6cbf720SGianluca Guida	mov	r3, r3, lsr #24		/* r3 = ...1 */
743*b6cbf720SGianluca Guida	orr	r3, r3, r2, lsl #8	/* r3 = xx01 */
744*b6cbf720SGianluca Guida#else
745*b6cbf720SGianluca Guida	mov	r3, r3, lsl #8		/* r3 = 321. */
746*b6cbf720SGianluca Guida	orr	r3, r3, r2, lsr #24	/* r3 = 3210 */
747*b6cbf720SGianluca Guida#endif
748*b6cbf720SGianluca Guida	strh	r3, [r0]
749*b6cbf720SGianluca Guida	bx	lr
750*b6cbf720SGianluca Guida	LMEMCPY_4_PAD
751*b6cbf720SGianluca Guida
752*b6cbf720SGianluca Guida/*
753*b6cbf720SGianluca Guida * 1100: dst is 8-bit aligned, src is 32-bit aligned
754*b6cbf720SGianluca Guida */
755*b6cbf720SGianluca Guida	ldr	r2, [r1]		/* BE:r2 = 0123  LE:r2 = 3210 */
756*b6cbf720SGianluca Guida#ifdef __ARMEB__
757*b6cbf720SGianluca Guida	strb	r2, [r0, #0x03]
758*b6cbf720SGianluca Guida	mov	r3, r2, lsr #8
759*b6cbf720SGianluca Guida	mov	r1, r2, lsr #24
760*b6cbf720SGianluca Guida	strh	r3, [r0, #0x01]
761*b6cbf720SGianluca Guida	strb	r1, [r0]
762*b6cbf720SGianluca Guida#else
763*b6cbf720SGianluca Guida	strb	r2, [r0]
764*b6cbf720SGianluca Guida	mov	r3, r2, lsr #8
765*b6cbf720SGianluca Guida	mov	r1, r2, lsr #24
766*b6cbf720SGianluca Guida	strh	r3, [r0, #0x01]
767*b6cbf720SGianluca Guida	strb	r1, [r0, #0x03]
768*b6cbf720SGianluca Guida#endif
769*b6cbf720SGianluca Guida	bx	lr
770*b6cbf720SGianluca Guida	LMEMCPY_4_PAD
771*b6cbf720SGianluca Guida
772*b6cbf720SGianluca Guida/*
773*b6cbf720SGianluca Guida * 1101: dst is 8-bit aligned, src is 8-bit aligned
774*b6cbf720SGianluca Guida */
775*b6cbf720SGianluca Guida	ldrb	r2, [r1]
776*b6cbf720SGianluca Guida	ldrh	r3, [r1, #0x01]
777*b6cbf720SGianluca Guida	ldrb	r1, [r1, #0x03]
778*b6cbf720SGianluca Guida	strb	r2, [r0]
779*b6cbf720SGianluca Guida	strh	r3, [r0, #0x01]
780*b6cbf720SGianluca Guida	strb	r1, [r0, #0x03]
781*b6cbf720SGianluca Guida	bx	lr
782*b6cbf720SGianluca Guida	LMEMCPY_4_PAD
783*b6cbf720SGianluca Guida
784*b6cbf720SGianluca Guida/*
785*b6cbf720SGianluca Guida * 1110: dst is 8-bit aligned, src is 16-bit aligned
786*b6cbf720SGianluca Guida */
787*b6cbf720SGianluca Guida#ifdef __ARMEB__
788*b6cbf720SGianluca Guida	ldrh	r3, [r1, #0x02]		/* BE:r3 = ..23  LE:r3 = ..32 */
789*b6cbf720SGianluca Guida	ldrh	r2, [r1]		/* BE:r2 = ..01  LE:r2 = ..10 */
790*b6cbf720SGianluca Guida	strb	r3, [r0, #0x03]
791*b6cbf720SGianluca Guida	mov	r3, r3, lsr #8		/* r3 = ...2 */
792*b6cbf720SGianluca Guida	orr	r3, r3, r2, lsl #8	/* r3 = ..12 */
793*b6cbf720SGianluca Guida	strh	r3, [r0, #0x01]
794*b6cbf720SGianluca Guida	mov	r2, r2, lsr #8		/* r2 = ...0 */
795*b6cbf720SGianluca Guida	strb	r2, [r0]
796*b6cbf720SGianluca Guida#else
797*b6cbf720SGianluca Guida	ldrh	r2, [r1]		/* BE:r2 = ..01  LE:r2 = ..10 */
798*b6cbf720SGianluca Guida	ldrh	r3, [r1, #0x02]		/* BE:r3 = ..23  LE:r3 = ..32 */
799*b6cbf720SGianluca Guida	strb	r2, [r0]
800*b6cbf720SGianluca Guida	mov	r2, r2, lsr #8		/* r2 = ...1 */
801*b6cbf720SGianluca Guida	orr	r2, r2, r3, lsl #8	/* r2 = .321 */
802*b6cbf720SGianluca Guida	strh	r2, [r0, #0x01]
803*b6cbf720SGianluca Guida	mov	r3, r3, lsr #8		/* r3 = ...3 */
804*b6cbf720SGianluca Guida	strb	r3, [r0, #0x03]
805*b6cbf720SGianluca Guida#endif
806*b6cbf720SGianluca Guida	bx	lr
807*b6cbf720SGianluca Guida	LMEMCPY_4_PAD
808*b6cbf720SGianluca Guida
809*b6cbf720SGianluca Guida/*
810*b6cbf720SGianluca Guida * 1111: dst is 8-bit aligned, src is 8-bit aligned
811*b6cbf720SGianluca Guida */
812*b6cbf720SGianluca Guida	ldrb	r2, [r1]
813*b6cbf720SGianluca Guida	ldrh	r3, [r1, #0x01]
814*b6cbf720SGianluca Guida	ldrb	r1, [r1, #0x03]
815*b6cbf720SGianluca Guida	strb	r2, [r0]
816*b6cbf720SGianluca Guida	strh	r3, [r0, #0x01]
817*b6cbf720SGianluca Guida	strb	r1, [r0, #0x03]
818*b6cbf720SGianluca Guida	bx	lr
819*b6cbf720SGianluca Guida	LMEMCPY_4_PAD
820*b6cbf720SGianluca Guida
821*b6cbf720SGianluca Guida
822*b6cbf720SGianluca Guida/******************************************************************************
823*b6cbf720SGianluca Guida * Special case for 6 byte copies
824*b6cbf720SGianluca Guida */
825*b6cbf720SGianluca Guida#define	LMEMCPY_6_LOG2	6	/* 64 bytes */
826*b6cbf720SGianluca Guida#define	LMEMCPY_6_PAD	.align LMEMCPY_6_LOG2
827*b6cbf720SGianluca Guida	LMEMCPY_6_PAD
828*b6cbf720SGianluca Guida.Lmemcpy_6:
829*b6cbf720SGianluca Guida	and	r2, r1, #0x03
830*b6cbf720SGianluca Guida	orr	r2, r2, r0, lsl #2
831*b6cbf720SGianluca Guida	ands	r2, r2, #0x0f
832*b6cbf720SGianluca Guida	sub	r3, pc, #0x14
833*b6cbf720SGianluca Guida	addne	pc, r3, r2, lsl #LMEMCPY_6_LOG2
834*b6cbf720SGianluca Guida
835*b6cbf720SGianluca Guida/*
836*b6cbf720SGianluca Guida * 0000: dst is 32-bit aligned, src is 32-bit aligned
837*b6cbf720SGianluca Guida */
838*b6cbf720SGianluca Guida	ldr	r2, [r1]
839*b6cbf720SGianluca Guida	ldrh	r3, [r1, #0x04]
840*b6cbf720SGianluca Guida	str	r2, [r0]
841*b6cbf720SGianluca Guida	strh	r3, [r0, #0x04]
842*b6cbf720SGianluca Guida	bx	lr
843*b6cbf720SGianluca Guida	LMEMCPY_6_PAD
844*b6cbf720SGianluca Guida
845*b6cbf720SGianluca Guida/*
846*b6cbf720SGianluca Guida * 0001: dst is 32-bit aligned, src is 8-bit aligned
847*b6cbf720SGianluca Guida */
848*b6cbf720SGianluca Guida	ldr	r2, [r1, #-1]		/* BE:r2 = x012  LE:r2 = 210x */
849*b6cbf720SGianluca Guida	ldr	r3, [r1, #0x03]		/* BE:r3 = 345x  LE:r3 = x543 */
850*b6cbf720SGianluca Guida#ifdef __ARMEB__
851*b6cbf720SGianluca Guida	mov	r2, r2, lsl #8		/* r2 = 012. */
852*b6cbf720SGianluca Guida	orr	r2, r2, r3, lsr #24	/* r2 = 0123 */
853*b6cbf720SGianluca Guida#else
854*b6cbf720SGianluca Guida	mov	r2, r2, lsr #8		/* r2 = .210 */
855*b6cbf720SGianluca Guida	orr	r2, r2, r3, lsl #24	/* r2 = 3210 */
856*b6cbf720SGianluca Guida#endif
857*b6cbf720SGianluca Guida	mov	r3, r3, lsr #8		/* BE:r3 = .345  LE:r3 = .x54 */
858*b6cbf720SGianluca Guida	str	r2, [r0]
859*b6cbf720SGianluca Guida	strh	r3, [r0, #0x04]
860*b6cbf720SGianluca Guida	bx	lr
861*b6cbf720SGianluca Guida	LMEMCPY_6_PAD
862*b6cbf720SGianluca Guida
863*b6cbf720SGianluca Guida/*
864*b6cbf720SGianluca Guida * 0010: dst is 32-bit aligned, src is 16-bit aligned
865*b6cbf720SGianluca Guida */
866*b6cbf720SGianluca Guida	ldr	r3, [r1, #0x02]		/* BE:r3 = 2345  LE:r3 = 5432 */
867*b6cbf720SGianluca Guida	ldrh	r2, [r1]		/* BE:r2 = ..01  LE:r2 = ..10 */
868*b6cbf720SGianluca Guida#ifdef __ARMEB__
869*b6cbf720SGianluca Guida	mov	r1, r3, lsr #16		/* r1 = ..23 */
870*b6cbf720SGianluca Guida	orr	r1, r1, r2, lsl #16	/* r1 = 0123 */
871*b6cbf720SGianluca Guida	str	r1, [r0]
872*b6cbf720SGianluca Guida	strh	r3, [r0, #0x04]
873*b6cbf720SGianluca Guida#else
874*b6cbf720SGianluca Guida	mov	r1, r3, lsr #16		/* r1 = ..54 */
875*b6cbf720SGianluca Guida	orr	r2, r2, r3, lsl #16	/* r2 = 3210 */
876*b6cbf720SGianluca Guida	str	r2, [r0]
877*b6cbf720SGianluca Guida	strh	r1, [r0, #0x04]
878*b6cbf720SGianluca Guida#endif
879*b6cbf720SGianluca Guida	bx	lr
880*b6cbf720SGianluca Guida	LMEMCPY_6_PAD
881*b6cbf720SGianluca Guida
882*b6cbf720SGianluca Guida/*
883*b6cbf720SGianluca Guida * 0011: dst is 32-bit aligned, src is 8-bit aligned
884*b6cbf720SGianluca Guida */
885*b6cbf720SGianluca Guida	ldr	r2, [r1, #-3]		/* BE:r2 = xxx0  LE:r2 = 0xxx */
886*b6cbf720SGianluca Guida	ldr	r3, [r1, #1]		/* BE:r3 = 1234  LE:r3 = 4321 */
887*b6cbf720SGianluca Guida	ldr	r1, [r1, #5]		/* BE:r1 = 5xxx  LE:r3 = xxx5 */
888*b6cbf720SGianluca Guida#ifdef __ARMEB__
889*b6cbf720SGianluca Guida	mov	r2, r2, lsl #24		/* r2 = 0... */
890*b6cbf720SGianluca Guida	orr	r2, r2, r3, lsr #8	/* r2 = 0123 */
891*b6cbf720SGianluca Guida	mov	r3, r3, lsl #8		/* r3 = 234. */
892*b6cbf720SGianluca Guida	orr	r1, r3, r1, lsr #24	/* r1 = 2345 */
893*b6cbf720SGianluca Guida#else
894*b6cbf720SGianluca Guida	mov	r2, r2, lsr #24		/* r2 = ...0 */
895*b6cbf720SGianluca Guida	orr	r2, r2, r3, lsl #8	/* r2 = 3210 */
896*b6cbf720SGianluca Guida	mov	r1, r1, lsl #8		/* r1 = xx5. */
897*b6cbf720SGianluca Guida	orr	r1, r1, r3, lsr #24	/* r1 = xx54 */
898*b6cbf720SGianluca Guida#endif
899*b6cbf720SGianluca Guida	str	r2, [r0]
900*b6cbf720SGianluca Guida	strh	r1, [r0, #0x04]
901*b6cbf720SGianluca Guida	bx	lr
902*b6cbf720SGianluca Guida	LMEMCPY_6_PAD
903*b6cbf720SGianluca Guida
904*b6cbf720SGianluca Guida/*
905*b6cbf720SGianluca Guida * 0100: dst is 8-bit aligned, src is 32-bit aligned
906*b6cbf720SGianluca Guida */
907*b6cbf720SGianluca Guida	ldr	r3, [r1]		/* BE:r3 = 0123  LE:r3 = 3210 */
908*b6cbf720SGianluca Guida	ldrh	r2, [r1, #0x04]		/* BE:r2 = ..45  LE:r2 = ..54 */
909*b6cbf720SGianluca Guida	mov	r1, r3, lsr #8		/* BE:r1 = .012  LE:r1 = .321 */
910*b6cbf720SGianluca Guida	strh	r1, [r0, #0x01]
911*b6cbf720SGianluca Guida#ifdef __ARMEB__
912*b6cbf720SGianluca Guida	mov	r1, r3, lsr #24		/* r1 = ...0 */
913*b6cbf720SGianluca Guida	strb	r1, [r0]
914*b6cbf720SGianluca Guida	mov	r3, r3, lsl #8		/* r3 = 123. */
915*b6cbf720SGianluca Guida	orr	r3, r3, r2, lsr #8	/* r3 = 1234 */
916*b6cbf720SGianluca Guida#else
917*b6cbf720SGianluca Guida	strb	r3, [r0]
918*b6cbf720SGianluca Guida	mov	r3, r3, lsr #24		/* r3 = ...3 */
919*b6cbf720SGianluca Guida	orr	r3, r3, r2, lsl #8	/* r3 = .543 */
920*b6cbf720SGianluca Guida	mov	r2, r2, lsr #8		/* r2 = ...5 */
921*b6cbf720SGianluca Guida#endif
922*b6cbf720SGianluca Guida	strh	r3, [r0, #0x03]
923*b6cbf720SGianluca Guida	strb	r2, [r0, #0x05]
924*b6cbf720SGianluca Guida	bx	lr
925*b6cbf720SGianluca Guida	LMEMCPY_6_PAD
926*b6cbf720SGianluca Guida
927*b6cbf720SGianluca Guida/*
928*b6cbf720SGianluca Guida * 0101: dst is 8-bit aligned, src is 8-bit aligned
929*b6cbf720SGianluca Guida */
930*b6cbf720SGianluca Guida	ldrb	r2, [r1]
931*b6cbf720SGianluca Guida	ldrh	r3, [r1, #0x01]
932*b6cbf720SGianluca Guida	ldrh	ip, [r1, #0x03]
933*b6cbf720SGianluca Guida	ldrb	r1, [r1, #0x05]
934*b6cbf720SGianluca Guida	strb	r2, [r0]
935*b6cbf720SGianluca Guida	strh	r3, [r0, #0x01]
936*b6cbf720SGianluca Guida	strh	ip, [r0, #0x03]
937*b6cbf720SGianluca Guida	strb	r1, [r0, #0x05]
938*b6cbf720SGianluca Guida	bx	lr
939*b6cbf720SGianluca Guida	LMEMCPY_6_PAD
940*b6cbf720SGianluca Guida
941*b6cbf720SGianluca Guida/*
942*b6cbf720SGianluca Guida * 0110: dst is 8-bit aligned, src is 16-bit aligned
943*b6cbf720SGianluca Guida */
944*b6cbf720SGianluca Guida	ldrh	r2, [r1]		/* BE:r2 = ..01  LE:r2 = ..10 */
945*b6cbf720SGianluca Guida	ldr	r1, [r1, #0x02]		/* BE:r1 = 2345  LE:r1 = 5432 */
946*b6cbf720SGianluca Guida#ifdef __ARMEB__
947*b6cbf720SGianluca Guida	mov	r3, r2, lsr #8		/* r3 = ...0 */
948*b6cbf720SGianluca Guida	strb	r3, [r0]
949*b6cbf720SGianluca Guida	strb	r1, [r0, #0x05]
950*b6cbf720SGianluca Guida	mov	r3, r1, lsr #8		/* r3 = .234 */
951*b6cbf720SGianluca Guida	strh	r3, [r0, #0x03]
952*b6cbf720SGianluca Guida	mov	r3, r2, lsl #8		/* r3 = .01. */
953*b6cbf720SGianluca Guida	orr	r3, r3, r1, lsr #24	/* r3 = .012 */
954*b6cbf720SGianluca Guida	strh	r3, [r0, #0x01]
955*b6cbf720SGianluca Guida#else
956*b6cbf720SGianluca Guida	strb	r2, [r0]
957*b6cbf720SGianluca Guida	mov	r3, r1, lsr #24
958*b6cbf720SGianluca Guida	strb	r3, [r0, #0x05]
959*b6cbf720SGianluca Guida	mov	r3, r1, lsr #8		/* r3 = .543 */
960*b6cbf720SGianluca Guida	strh	r3, [r0, #0x03]
961*b6cbf720SGianluca Guida	mov	r3, r2, lsr #8		/* r3 = ...1 */
962*b6cbf720SGianluca Guida	orr	r3, r3, r1, lsl #8	/* r3 = 4321 */
963*b6cbf720SGianluca Guida	strh	r3, [r0, #0x01]
964*b6cbf720SGianluca Guida#endif
965*b6cbf720SGianluca Guida	bx	lr
966*b6cbf720SGianluca Guida	LMEMCPY_6_PAD
967*b6cbf720SGianluca Guida
968*b6cbf720SGianluca Guida/*
969*b6cbf720SGianluca Guida * 0111: dst is 8-bit aligned, src is 8-bit aligned
970*b6cbf720SGianluca Guida */
971*b6cbf720SGianluca Guida	ldrb	r2, [r1]
972*b6cbf720SGianluca Guida	ldrh	r3, [r1, #0x01]
973*b6cbf720SGianluca Guida	ldrh	ip, [r1, #0x03]
974*b6cbf720SGianluca Guida	ldrb	r1, [r1, #0x05]
975*b6cbf720SGianluca Guida	strb	r2, [r0]
976*b6cbf720SGianluca Guida	strh	r3, [r0, #0x01]
977*b6cbf720SGianluca Guida	strh	ip, [r0, #0x03]
978*b6cbf720SGianluca Guida	strb	r1, [r0, #0x05]
979*b6cbf720SGianluca Guida	bx	lr
980*b6cbf720SGianluca Guida	LMEMCPY_6_PAD
981*b6cbf720SGianluca Guida
982*b6cbf720SGianluca Guida/*
983*b6cbf720SGianluca Guida * 1000: dst is 16-bit aligned, src is 32-bit aligned
984*b6cbf720SGianluca Guida */
985*b6cbf720SGianluca Guida#ifdef __ARMEB__
986*b6cbf720SGianluca Guida	ldr	r2, [r1]		/* r2 = 0123 */
987*b6cbf720SGianluca Guida	ldrh	r3, [r1, #0x04]		/* r3 = ..45 */
988*b6cbf720SGianluca Guida	mov	r1, r2, lsr #16		/* r1 = ..01 */
989*b6cbf720SGianluca Guida	orr	r3, r3, r2, lsl#16	/* r3 = 2345 */
990*b6cbf720SGianluca Guida	strh	r1, [r0]
991*b6cbf720SGianluca Guida	str	r3, [r0, #0x02]
992*b6cbf720SGianluca Guida#else
993*b6cbf720SGianluca Guida	ldrh	r2, [r1, #0x04]		/* r2 = ..54 */
994*b6cbf720SGianluca Guida	ldr	r3, [r1]		/* r3 = 3210 */
995*b6cbf720SGianluca Guida	mov	r2, r2, lsl #16		/* r2 = 54.. */
996*b6cbf720SGianluca Guida	orr	r2, r2, r3, lsr #16	/* r2 = 5432 */
997*b6cbf720SGianluca Guida	strh	r3, [r0]
998*b6cbf720SGianluca Guida	str	r2, [r0, #0x02]
999*b6cbf720SGianluca Guida#endif
1000*b6cbf720SGianluca Guida	bx	lr
1001*b6cbf720SGianluca Guida	LMEMCPY_6_PAD
1002*b6cbf720SGianluca Guida
1003*b6cbf720SGianluca Guida/*
1004*b6cbf720SGianluca Guida * 1001: dst is 16-bit aligned, src is 8-bit aligned
1005*b6cbf720SGianluca Guida */
1006*b6cbf720SGianluca Guida	ldr	r3, [r1, #-1]		/* BE:r3 = x012  LE:r3 = 210x */
1007*b6cbf720SGianluca Guida	ldr	r2, [r1, #3]		/* BE:r2 = 345x  LE:r2 = x543 */
1008*b6cbf720SGianluca Guida	mov	r1, r3, lsr #8		/* BE:r1 = .x01  LE:r1 = .210 */
1009*b6cbf720SGianluca Guida#ifdef __ARMEB__
1010*b6cbf720SGianluca Guida	mov	r2, r2, lsr #8		/* r2 = .345 */
1011*b6cbf720SGianluca Guida	orr	r2, r2, r3, lsl #24	/* r2 = 2345 */
1012*b6cbf720SGianluca Guida#else
1013*b6cbf720SGianluca Guida	mov	r2, r2, lsl #8		/* r2 = 543. */
1014*b6cbf720SGianluca Guida	orr	r2, r2, r3, lsr #24	/* r2 = 5432 */
1015*b6cbf720SGianluca Guida#endif
1016*b6cbf720SGianluca Guida	strh	r1, [r0]
1017*b6cbf720SGianluca Guida	str	r2, [r0, #0x02]
1018*b6cbf720SGianluca Guida	bx	lr
1019*b6cbf720SGianluca Guida	LMEMCPY_6_PAD
1020*b6cbf720SGianluca Guida
1021*b6cbf720SGianluca Guida/*
1022*b6cbf720SGianluca Guida * 1010: dst is 16-bit aligned, src is 16-bit aligned
1023*b6cbf720SGianluca Guida */
1024*b6cbf720SGianluca Guida	ldrh	r2, [r1]
1025*b6cbf720SGianluca Guida	ldr	r3, [r1, #0x02]
1026*b6cbf720SGianluca Guida	strh	r2, [r0]
1027*b6cbf720SGianluca Guida	str	r3, [r0, #0x02]
1028*b6cbf720SGianluca Guida	bx	lr
1029*b6cbf720SGianluca Guida	LMEMCPY_6_PAD
1030*b6cbf720SGianluca Guida
1031*b6cbf720SGianluca Guida/*
1032*b6cbf720SGianluca Guida * 1011: dst is 16-bit aligned, src is 8-bit aligned
1033*b6cbf720SGianluca Guida */
1034*b6cbf720SGianluca Guida	ldrb	r3, [r1]		/* r3 = ...0 */
1035*b6cbf720SGianluca Guida	ldr	r2, [r1, #0x01]		/* BE:r2 = 1234  LE:r2 = 4321 */
1036*b6cbf720SGianluca Guida	ldrb	r1, [r1, #0x05]		/* r1 = ...5 */
1037*b6cbf720SGianluca Guida#ifdef __ARMEB__
1038*b6cbf720SGianluca Guida	mov	r3, r3, lsl #8		/* r3 = ..0. */
1039*b6cbf720SGianluca Guida	orr	r3, r3, r2, lsr #24	/* r3 = ..01 */
1040*b6cbf720SGianluca Guida	orr	r1, r1, r2, lsl #8	/* r1 = 2345 */
1041*b6cbf720SGianluca Guida#else
1042*b6cbf720SGianluca Guida	orr	r3, r3, r2, lsl #8	/* r3 = 3210 */
1043*b6cbf720SGianluca Guida	mov	r1, r1, lsl #24		/* r1 = 5... */
1044*b6cbf720SGianluca Guida	orr	r1, r1, r2, lsr #8	/* r1 = 5432 */
1045*b6cbf720SGianluca Guida#endif
1046*b6cbf720SGianluca Guida	strh	r3, [r0]
1047*b6cbf720SGianluca Guida	str	r1, [r0, #0x02]
1048*b6cbf720SGianluca Guida	bx	lr
1049*b6cbf720SGianluca Guida	LMEMCPY_6_PAD
1050*b6cbf720SGianluca Guida
1051*b6cbf720SGianluca Guida/*
1052*b6cbf720SGianluca Guida * 1100: dst is 8-bit aligned, src is 32-bit aligned
1053*b6cbf720SGianluca Guida */
1054*b6cbf720SGianluca Guida	ldr	r2, [r1]		/* BE:r2 = 0123  LE:r2 = 3210 */
1055*b6cbf720SGianluca Guida	ldrh	r1, [r1, #0x04]		/* BE:r1 = ..45  LE:r1 = ..54 */
1056*b6cbf720SGianluca Guida#ifdef __ARMEB__
1057*b6cbf720SGianluca Guida	mov	r3, r2, lsr #24		/* r3 = ...0 */
1058*b6cbf720SGianluca Guida	strb	r3, [r0]
1059*b6cbf720SGianluca Guida	mov	r2, r2, lsl #8		/* r2 = 123. */
1060*b6cbf720SGianluca Guida	orr	r2, r2, r1, lsr #8	/* r2 = 1234 */
1061*b6cbf720SGianluca Guida#else
1062*b6cbf720SGianluca Guida	strb	r2, [r0]
1063*b6cbf720SGianluca Guida	mov	r2, r2, lsr #8		/* r2 = .321 */
1064*b6cbf720SGianluca Guida	orr	r2, r2, r1, lsl #24	/* r2 = 4321 */
1065*b6cbf720SGianluca Guida	mov	r1, r1, lsr #8		/* r1 = ...5 */
1066*b6cbf720SGianluca Guida#endif
1067*b6cbf720SGianluca Guida	str	r2, [r0, #0x01]
1068*b6cbf720SGianluca Guida	strb	r1, [r0, #0x05]
1069*b6cbf720SGianluca Guida	bx	lr
1070*b6cbf720SGianluca Guida	LMEMCPY_6_PAD
1071*b6cbf720SGianluca Guida
1072*b6cbf720SGianluca Guida/*
1073*b6cbf720SGianluca Guida * 1101: dst is 8-bit aligned, src is 8-bit aligned
1074*b6cbf720SGianluca Guida */
1075*b6cbf720SGianluca Guida	ldrb	r2, [r1]
1076*b6cbf720SGianluca Guida	ldrh	r3, [r1, #0x01]
1077*b6cbf720SGianluca Guida	ldrh	ip, [r1, #0x03]
1078*b6cbf720SGianluca Guida	ldrb	r1, [r1, #0x05]
1079*b6cbf720SGianluca Guida	strb	r2, [r0]
1080*b6cbf720SGianluca Guida	strh	r3, [r0, #0x01]
1081*b6cbf720SGianluca Guida	strh	ip, [r0, #0x03]
1082*b6cbf720SGianluca Guida	strb	r1, [r0, #0x05]
1083*b6cbf720SGianluca Guida	bx	lr
1084*b6cbf720SGianluca Guida	LMEMCPY_6_PAD
1085*b6cbf720SGianluca Guida
1086*b6cbf720SGianluca Guida/*
1087*b6cbf720SGianluca Guida * 1110: dst is 8-bit aligned, src is 16-bit aligned
1088*b6cbf720SGianluca Guida */
1089*b6cbf720SGianluca Guida	ldrh	r2, [r1]		/* BE:r2 = ..01  LE:r2 = ..10 */
1090*b6cbf720SGianluca Guida	ldr	r1, [r1, #0x02]		/* BE:r1 = 2345  LE:r1 = 5432 */
1091*b6cbf720SGianluca Guida#ifdef __ARMEB__
1092*b6cbf720SGianluca Guida	mov	r3, r2, lsr #8		/* r3 = ...0 */
1093*b6cbf720SGianluca Guida	strb	r3, [r0]
1094*b6cbf720SGianluca Guida	mov	r2, r2, lsl #24		/* r2 = 1... */
1095*b6cbf720SGianluca Guida	orr	r2, r2, r1, lsr #8	/* r2 = 1234 */
1096*b6cbf720SGianluca Guida#else
1097*b6cbf720SGianluca Guida	strb	r2, [r0]
1098*b6cbf720SGianluca Guida	mov	r2, r2, lsr #8		/* r2 = ...1 */
1099*b6cbf720SGianluca Guida	orr	r2, r2, r1, lsl #8	/* r2 = 4321 */
1100*b6cbf720SGianluca Guida	mov	r1, r1, lsr #24		/* r1 = ...5 */
1101*b6cbf720SGianluca Guida#endif
1102*b6cbf720SGianluca Guida	str	r2, [r0, #0x01]
1103*b6cbf720SGianluca Guida	strb	r1, [r0, #0x05]
1104*b6cbf720SGianluca Guida	bx	lr
1105*b6cbf720SGianluca Guida	LMEMCPY_6_PAD
1106*b6cbf720SGianluca Guida
1107*b6cbf720SGianluca Guida/*
1108*b6cbf720SGianluca Guida * 1111: dst is 8-bit aligned, src is 8-bit aligned
1109*b6cbf720SGianluca Guida */
1110*b6cbf720SGianluca Guida	ldrb	r2, [r1]
1111*b6cbf720SGianluca Guida	ldr	r3, [r1, #0x01]
1112*b6cbf720SGianluca Guida	ldrb	r1, [r1, #0x05]
1113*b6cbf720SGianluca Guida	strb	r2, [r0]
1114*b6cbf720SGianluca Guida	str	r3, [r0, #0x01]
1115*b6cbf720SGianluca Guida	strb	r1, [r0, #0x05]
1116*b6cbf720SGianluca Guida	bx	lr
1117*b6cbf720SGianluca Guida	LMEMCPY_6_PAD
1118*b6cbf720SGianluca Guida
1119*b6cbf720SGianluca Guida
1120*b6cbf720SGianluca Guida/******************************************************************************
1121*b6cbf720SGianluca Guida * Special case for 8 byte copies
1122*b6cbf720SGianluca Guida */
1123*b6cbf720SGianluca Guida#define	LMEMCPY_8_LOG2	6	/* 64 bytes */
1124*b6cbf720SGianluca Guida#define	LMEMCPY_8_PAD	.align LMEMCPY_8_LOG2
1125*b6cbf720SGianluca Guida	LMEMCPY_8_PAD
1126*b6cbf720SGianluca Guida.Lmemcpy_8:
1127*b6cbf720SGianluca Guida	and	r2, r1, #0x03
1128*b6cbf720SGianluca Guida	orr	r2, r2, r0, lsl #2
1129*b6cbf720SGianluca Guida	ands	r2, r2, #0x0f
1130*b6cbf720SGianluca Guida	sub	r3, pc, #0x14
1131*b6cbf720SGianluca Guida	addne	pc, r3, r2, lsl #LMEMCPY_8_LOG2
1132*b6cbf720SGianluca Guida
1133*b6cbf720SGianluca Guida/*
1134*b6cbf720SGianluca Guida * 0000: dst is 32-bit aligned, src is 32-bit aligned
1135*b6cbf720SGianluca Guida */
1136*b6cbf720SGianluca Guida	ldr	r2, [r1]
1137*b6cbf720SGianluca Guida	ldr	r3, [r1, #0x04]
1138*b6cbf720SGianluca Guida	str	r2, [r0]
1139*b6cbf720SGianluca Guida	str	r3, [r0, #0x04]
1140*b6cbf720SGianluca Guida	bx	lr
1141*b6cbf720SGianluca Guida	LMEMCPY_8_PAD
1142*b6cbf720SGianluca Guida
1143*b6cbf720SGianluca Guida/*
1144*b6cbf720SGianluca Guida * 0001: dst is 32-bit aligned, src is 8-bit aligned
1145*b6cbf720SGianluca Guida */
1146*b6cbf720SGianluca Guida	ldr	r3, [r1, #-1]		/* BE:r3 = x012  LE:r3 = 210x */
1147*b6cbf720SGianluca Guida	ldr	r2, [r1, #0x03]		/* BE:r2 = 3456  LE:r2 = 6543 */
1148*b6cbf720SGianluca Guida	ldrb	r1, [r1, #0x07]		/* r1 = ...7 */
1149*b6cbf720SGianluca Guida#ifdef __ARMEB__
1150*b6cbf720SGianluca Guida	mov	r3, r3, lsl #8		/* r3 = 012. */
1151*b6cbf720SGianluca Guida	orr	r3, r3, r2, lsr #24	/* r3 = 0123 */
1152*b6cbf720SGianluca Guida	orr	r2, r1, r2, lsl #8	/* r2 = 4567 */
1153*b6cbf720SGianluca Guida#else
1154*b6cbf720SGianluca Guida	mov	r3, r3, lsr #8		/* r3 = .210 */
1155*b6cbf720SGianluca Guida	orr	r3, r3, r2, lsl #24	/* r3 = 3210 */
1156*b6cbf720SGianluca Guida	mov	r1, r1, lsl #24		/* r1 = 7... */
1157*b6cbf720SGianluca Guida	orr	r2, r1, r2, lsr #8	/* r2 = 7654 */
1158*b6cbf720SGianluca Guida#endif
1159*b6cbf720SGianluca Guida	str	r3, [r0]
1160*b6cbf720SGianluca Guida	str	r2, [r0, #0x04]
1161*b6cbf720SGianluca Guida	bx	lr
1162*b6cbf720SGianluca Guida	LMEMCPY_8_PAD
1163*b6cbf720SGianluca Guida
1164*b6cbf720SGianluca Guida/*
1165*b6cbf720SGianluca Guida * 0010: dst is 32-bit aligned, src is 16-bit aligned
1166*b6cbf720SGianluca Guida */
1167*b6cbf720SGianluca Guida	ldrh	r2, [r1]		/* BE:r2 = ..01  LE:r2 = ..10 */
1168*b6cbf720SGianluca Guida	ldr	r3, [r1, #0x02]		/* BE:r3 = 2345  LE:r3 = 5432 */
1169*b6cbf720SGianluca Guida	ldrh	r1, [r1, #0x06]		/* BE:r1 = ..67  LE:r1 = ..76 */
1170*b6cbf720SGianluca Guida#ifdef __ARMEB__
1171*b6cbf720SGianluca Guida	mov	r2, r2, lsl #16		/* r2 = 01.. */
1172*b6cbf720SGianluca Guida	orr	r2, r2, r3, lsr #16	/* r2 = 0123 */
1173*b6cbf720SGianluca Guida	orr	r3, r1, r3, lsl #16	/* r3 = 4567 */
1174*b6cbf720SGianluca Guida#else
1175*b6cbf720SGianluca Guida	orr	r2, r2, r3, lsl #16	/* r2 = 3210 */
1176*b6cbf720SGianluca Guida	mov	r3, r3, lsr #16		/* r3 = ..54 */
1177*b6cbf720SGianluca Guida	orr	r3, r3, r1, lsl #16	/* r3 = 7654 */
1178*b6cbf720SGianluca Guida#endif
1179*b6cbf720SGianluca Guida	str	r2, [r0]
1180*b6cbf720SGianluca Guida	str	r3, [r0, #0x04]
1181*b6cbf720SGianluca Guida	bx	lr
1182*b6cbf720SGianluca Guida	LMEMCPY_8_PAD
1183*b6cbf720SGianluca Guida
1184*b6cbf720SGianluca Guida/*
1185*b6cbf720SGianluca Guida * 0011: dst is 32-bit aligned, src is 8-bit aligned
1186*b6cbf720SGianluca Guida */
1187*b6cbf720SGianluca Guida	ldrb	r3, [r1]		/* r3 = ...0 */
1188*b6cbf720SGianluca Guida	ldr	r2, [r1, #0x01]		/* BE:r2 = 1234  LE:r2 = 4321 */
1189*b6cbf720SGianluca Guida	ldr	r1, [r1, #0x05]		/* BE:r1 = 567x  LE:r1 = x765 */
1190*b6cbf720SGianluca Guida#ifdef __ARMEB__
1191*b6cbf720SGianluca Guida	mov	r3, r3, lsl #24		/* r3 = 0... */
1192*b6cbf720SGianluca Guida	orr	r3, r3, r2, lsr #8	/* r3 = 0123 */
1193*b6cbf720SGianluca Guida	mov	r2, r2, lsl #24		/* r2 = 4... */
1194*b6cbf720SGianluca Guida	orr	r2, r2, r1, lsr #8	/* r2 = 4567 */
1195*b6cbf720SGianluca Guida#else
1196*b6cbf720SGianluca Guida	orr	r3, r3, r2, lsl #8	/* r3 = 3210 */
1197*b6cbf720SGianluca Guida	mov	r2, r2, lsr #24		/* r2 = ...4 */
1198*b6cbf720SGianluca Guida	orr	r2, r2, r1, lsl #8	/* r2 = 7654 */
1199*b6cbf720SGianluca Guida#endif
1200*b6cbf720SGianluca Guida	str	r3, [r0]
1201*b6cbf720SGianluca Guida	str	r2, [r0, #0x04]
1202*b6cbf720SGianluca Guida	bx	lr
1203*b6cbf720SGianluca Guida	LMEMCPY_8_PAD
1204*b6cbf720SGianluca Guida
1205*b6cbf720SGianluca Guida/*
1206*b6cbf720SGianluca Guida * 0100: dst is 8-bit aligned, src is 32-bit aligned
1207*b6cbf720SGianluca Guida */
1208*b6cbf720SGianluca Guida	ldr	r3, [r1]		/* BE:r3 = 0123  LE:r3 = 3210 */
1209*b6cbf720SGianluca Guida	ldr	r2, [r1, #0x04]		/* BE:r2 = 4567  LE:r2 = 7654 */
1210*b6cbf720SGianluca Guida#ifdef __ARMEB__
1211*b6cbf720SGianluca Guida	mov	r1, r3, lsr #24		/* r1 = ...0 */
1212*b6cbf720SGianluca Guida	strb	r1, [r0]
1213*b6cbf720SGianluca Guida	mov	r1, r3, lsr #8		/* r1 = .012 */
1214*b6cbf720SGianluca Guida	strb	r2, [r0, #0x07]
1215*b6cbf720SGianluca Guida	mov	r3, r3, lsl #24		/* r3 = 3... */
1216*b6cbf720SGianluca Guida	orr	r3, r3, r2, lsr #8	/* r3 = 3456 */
1217*b6cbf720SGianluca Guida#else
1218*b6cbf720SGianluca Guida	strb	r3, [r0]
1219*b6cbf720SGianluca Guida	mov	r1, r2, lsr #24		/* r1 = ...7 */
1220*b6cbf720SGianluca Guida	strb	r1, [r0, #0x07]
1221*b6cbf720SGianluca Guida	mov	r1, r3, lsr #8		/* r1 = .321 */
1222*b6cbf720SGianluca Guida	mov	r3, r3, lsr #24		/* r3 = ...3 */
1223*b6cbf720SGianluca Guida	orr	r3, r3, r2, lsl #8	/* r3 = 6543 */
1224*b6cbf720SGianluca Guida#endif
1225*b6cbf720SGianluca Guida	strh	r1, [r0, #0x01]
1226*b6cbf720SGianluca Guida	str	r3, [r0, #0x03]
1227*b6cbf720SGianluca Guida	bx	lr
1228*b6cbf720SGianluca Guida	LMEMCPY_8_PAD
1229*b6cbf720SGianluca Guida
1230*b6cbf720SGianluca Guida/*
1231*b6cbf720SGianluca Guida * 0101: dst is 8-bit aligned, src is 8-bit aligned
1232*b6cbf720SGianluca Guida */
1233*b6cbf720SGianluca Guida	ldrb	r2, [r1]
1234*b6cbf720SGianluca Guida	ldrh	r3, [r1, #0x01]
1235*b6cbf720SGianluca Guida	ldr	ip, [r1, #0x03]
1236*b6cbf720SGianluca Guida	ldrb	r1, [r1, #0x07]
1237*b6cbf720SGianluca Guida	strb	r2, [r0]
1238*b6cbf720SGianluca Guida	strh	r3, [r0, #0x01]
1239*b6cbf720SGianluca Guida	str	ip, [r0, #0x03]
1240*b6cbf720SGianluca Guida	strb	r1, [r0, #0x07]
1241*b6cbf720SGianluca Guida	bx	lr
1242*b6cbf720SGianluca Guida	LMEMCPY_8_PAD
1243*b6cbf720SGianluca Guida
1244*b6cbf720SGianluca Guida/*
1245*b6cbf720SGianluca Guida * 0110: dst is 8-bit aligned, src is 16-bit aligned
1246*b6cbf720SGianluca Guida */
1247*b6cbf720SGianluca Guida	ldrh	r2, [r1]		/* BE:r2 = ..01  LE:r2 = ..10 */
1248*b6cbf720SGianluca Guida	ldr	r3, [r1, #0x02]		/* BE:r3 = 2345  LE:r3 = 5432 */
1249*b6cbf720SGianluca Guida	ldrh	r1, [r1, #0x06]		/* BE:r1 = ..67  LE:r1 = ..76 */
1250*b6cbf720SGianluca Guida#ifdef __ARMEB__
1251*b6cbf720SGianluca Guida	mov	ip, r2, lsr #8		/* ip = ...0 */
1252*b6cbf720SGianluca Guida	strb	ip, [r0]
1253*b6cbf720SGianluca Guida	mov	ip, r2, lsl #8		/* ip = .01. */
1254*b6cbf720SGianluca Guida	orr	ip, ip, r3, lsr #24	/* ip = .012 */
1255*b6cbf720SGianluca Guida	strb	r1, [r0, #0x07]
1256*b6cbf720SGianluca Guida	mov	r3, r3, lsl #8		/* r3 = 345. */
1257*b6cbf720SGianluca Guida	orr	r3, r3, r1, lsr #8	/* r3 = 3456 */
1258*b6cbf720SGianluca Guida#else
1259*b6cbf720SGianluca Guida	strb	r2, [r0]		/* 0 */
1260*b6cbf720SGianluca Guida	mov	ip, r1, lsr #8		/* ip = ...7 */
1261*b6cbf720SGianluca Guida	strb	ip, [r0, #0x07]		/* 7 */
1262*b6cbf720SGianluca Guida	mov	ip, r2, lsr #8		/* ip = ...1 */
1263*b6cbf720SGianluca Guida	orr	ip, ip, r3, lsl #8	/* ip = 4321 */
1264*b6cbf720SGianluca Guida	mov	r3, r3, lsr #8		/* r3 = .543 */
1265*b6cbf720SGianluca Guida	orr	r3, r3, r1, lsl #24	/* r3 = 6543 */
1266*b6cbf720SGianluca Guida#endif
1267*b6cbf720SGianluca Guida	strh	ip, [r0, #0x01]
1268*b6cbf720SGianluca Guida	str	r3, [r0, #0x03]
1269*b6cbf720SGianluca Guida	bx	lr
1270*b6cbf720SGianluca Guida	LMEMCPY_8_PAD
1271*b6cbf720SGianluca Guida
1272*b6cbf720SGianluca Guida/*
1273*b6cbf720SGianluca Guida * 0111: dst is 8-bit aligned, src is 8-bit aligned
1274*b6cbf720SGianluca Guida */
1275*b6cbf720SGianluca Guida	ldrb	r3, [r1]		/* r3 = ...0 */
1276*b6cbf720SGianluca Guida	ldr	ip, [r1, #0x01]		/* BE:ip = 1234  LE:ip = 4321 */
1277*b6cbf720SGianluca Guida	ldrh	r2, [r1, #0x05]		/* BE:r2 = ..56  LE:r2 = ..65 */
1278*b6cbf720SGianluca Guida	ldrb	r1, [r1, #0x07]		/* r1 = ...7 */
1279*b6cbf720SGianluca Guida	strb	r3, [r0]
1280*b6cbf720SGianluca Guida	mov	r3, ip, lsr #16		/* BE:r3 = ..12  LE:r3 = ..43 */
1281*b6cbf720SGianluca Guida#ifdef __ARMEB__
1282*b6cbf720SGianluca Guida	strh	r3, [r0, #0x01]
1283*b6cbf720SGianluca Guida	orr	r2, r2, ip, lsl #16	/* r2 = 3456 */
1284*b6cbf720SGianluca Guida#else
1285*b6cbf720SGianluca Guida	strh	ip, [r0, #0x01]
1286*b6cbf720SGianluca Guida	orr	r2, r3, r2, lsl #16	/* r2 = 6543 */
1287*b6cbf720SGianluca Guida#endif
1288*b6cbf720SGianluca Guida	str	r2, [r0, #0x03]
1289*b6cbf720SGianluca Guida	strb	r1, [r0, #0x07]
1290*b6cbf720SGianluca Guida	bx	lr
1291*b6cbf720SGianluca Guida	LMEMCPY_8_PAD
1292*b6cbf720SGianluca Guida
1293*b6cbf720SGianluca Guida/*
1294*b6cbf720SGianluca Guida * 1000: dst is 16-bit aligned, src is 32-bit aligned
1295*b6cbf720SGianluca Guida */
1296*b6cbf720SGianluca Guida	ldr	r2, [r1]		/* BE:r2 = 0123  LE:r2 = 3210 */
1297*b6cbf720SGianluca Guida	ldr	r3, [r1, #0x04]		/* BE:r3 = 4567  LE:r3 = 7654 */
1298*b6cbf720SGianluca Guida	mov	r1, r2, lsr #16		/* BE:r1 = ..01  LE:r1 = ..32 */
1299*b6cbf720SGianluca Guida#ifdef __ARMEB__
1300*b6cbf720SGianluca Guida	strh	r1, [r0]
1301*b6cbf720SGianluca Guida	mov	r1, r3, lsr #16		/* r1 = ..45 */
1302*b6cbf720SGianluca Guida	orr	r2, r1 ,r2, lsl #16	/* r2 = 2345 */
1303*b6cbf720SGianluca Guida#else
1304*b6cbf720SGianluca Guida	strh	r2, [r0]
1305*b6cbf720SGianluca Guida	orr	r2, r1, r3, lsl #16	/* r2 = 5432 */
1306*b6cbf720SGianluca Guida	mov	r3, r3, lsr #16		/* r3 = ..76 */
1307*b6cbf720SGianluca Guida#endif
1308*b6cbf720SGianluca Guida	str	r2, [r0, #0x02]
1309*b6cbf720SGianluca Guida	strh	r3, [r0, #0x06]
1310*b6cbf720SGianluca Guida	bx	lr
1311*b6cbf720SGianluca Guida	LMEMCPY_8_PAD
1312*b6cbf720SGianluca Guida
1313*b6cbf720SGianluca Guida/*
1314*b6cbf720SGianluca Guida * 1001: dst is 16-bit aligned, src is 8-bit aligned
1315*b6cbf720SGianluca Guida */
1316*b6cbf720SGianluca Guida	ldr	r2, [r1, #-1]		/* BE:r2 = x012  LE:r2 = 210x */
1317*b6cbf720SGianluca Guida	ldr	r3, [r1, #0x03]		/* BE:r3 = 3456  LE:r3 = 6543 */
1318*b6cbf720SGianluca Guida	ldrb	ip, [r1, #0x07]		/* ip = ...7 */
1319*b6cbf720SGianluca Guida	mov	r1, r2, lsr #8		/* BE:r1 = .x01  LE:r1 = .210 */
1320*b6cbf720SGianluca Guida	strh	r1, [r0]
1321*b6cbf720SGianluca Guida#ifdef __ARMEB__
1322*b6cbf720SGianluca Guida	mov	r1, r2, lsl #24		/* r1 = 2... */
1323*b6cbf720SGianluca Guida	orr	r1, r1, r3, lsr #8	/* r1 = 2345 */
1324*b6cbf720SGianluca Guida	orr	r3, ip, r3, lsl #8	/* r3 = 4567 */
1325*b6cbf720SGianluca Guida#else
1326*b6cbf720SGianluca Guida	mov	r1, r2, lsr #24		/* r1 = ...2 */
1327*b6cbf720SGianluca Guida	orr	r1, r1, r3, lsl #8	/* r1 = 5432 */
1328*b6cbf720SGianluca Guida	mov	r3, r3, lsr #24		/* r3 = ...6 */
1329*b6cbf720SGianluca Guida	orr	r3, r3, ip, lsl #8	/* r3 = ..76 */
1330*b6cbf720SGianluca Guida#endif
1331*b6cbf720SGianluca Guida	str	r1, [r0, #0x02]
1332*b6cbf720SGianluca Guida	strh	r3, [r0, #0x06]
1333*b6cbf720SGianluca Guida	bx	lr
1334*b6cbf720SGianluca Guida	LMEMCPY_8_PAD
1335*b6cbf720SGianluca Guida
1336*b6cbf720SGianluca Guida/*
1337*b6cbf720SGianluca Guida * 1010: dst is 16-bit aligned, src is 16-bit aligned
1338*b6cbf720SGianluca Guida */
1339*b6cbf720SGianluca Guida	ldrh	r2, [r1]
1340*b6cbf720SGianluca Guida	ldr	ip, [r1, #0x02]
1341*b6cbf720SGianluca Guida	ldrh	r3, [r1, #0x06]
1342*b6cbf720SGianluca Guida	strh	r2, [r0]
1343*b6cbf720SGianluca Guida	str	ip, [r0, #0x02]
1344*b6cbf720SGianluca Guida	strh	r3, [r0, #0x06]
1345*b6cbf720SGianluca Guida	bx	lr
1346*b6cbf720SGianluca Guida	LMEMCPY_8_PAD
1347*b6cbf720SGianluca Guida
1348*b6cbf720SGianluca Guida/*
1349*b6cbf720SGianluca Guida * 1011: dst is 16-bit aligned, src is 8-bit aligned
1350*b6cbf720SGianluca Guida */
1351*b6cbf720SGianluca Guida	ldr	r3, [r1, #0x05]		/* BE:r3 = 567x  LE:r3 = x765 */
1352*b6cbf720SGianluca Guida	ldr	r2, [r1, #0x01]		/* BE:r2 = 1234  LE:r2 = 4321 */
1353*b6cbf720SGianluca Guida	ldrb	ip, [r1]		/* ip = ...0 */
1354*b6cbf720SGianluca Guida	mov	r1, r3, lsr #8		/* BE:r1 = .567  LE:r1 = .x76 */
1355*b6cbf720SGianluca Guida	strh	r1, [r0, #0x06]
1356*b6cbf720SGianluca Guida#ifdef __ARMEB__
1357*b6cbf720SGianluca Guida	mov	r3, r3, lsr #24		/* r3 = ...5 */
1358*b6cbf720SGianluca Guida	orr	r3, r3, r2, lsl #8	/* r3 = 2345 */
1359*b6cbf720SGianluca Guida	mov	r2, r2, lsr #24		/* r2 = ...1 */
1360*b6cbf720SGianluca Guida	orr	r2, r2, ip, lsl #8	/* r2 = ..01 */
1361*b6cbf720SGianluca Guida#else
1362*b6cbf720SGianluca Guida	mov	r3, r3, lsl #24		/* r3 = 5... */
1363*b6cbf720SGianluca Guida	orr	r3, r3, r2, lsr #8	/* r3 = 5432 */
1364*b6cbf720SGianluca Guida	orr	r2, ip, r2, lsl #8	/* r2 = 3210 */
1365*b6cbf720SGianluca Guida#endif
1366*b6cbf720SGianluca Guida	str	r3, [r0, #0x02]
1367*b6cbf720SGianluca Guida	strh	r2, [r0]
1368*b6cbf720SGianluca Guida	bx	lr
1369*b6cbf720SGianluca Guida	LMEMCPY_8_PAD
1370*b6cbf720SGianluca Guida
1371*b6cbf720SGianluca Guida/*
1372*b6cbf720SGianluca Guida * 1100: dst is 8-bit aligned, src is 32-bit aligned
1373*b6cbf720SGianluca Guida */
1374*b6cbf720SGianluca Guida	ldr	r3, [r1, #0x04]		/* BE:r3 = 4567  LE:r3 = 7654 */
1375*b6cbf720SGianluca Guida	ldr	r2, [r1]		/* BE:r2 = 0123  LE:r2 = 3210 */
1376*b6cbf720SGianluca Guida	mov	r1, r3, lsr #8		/* BE:r1 = .456  LE:r1 = .765 */
1377*b6cbf720SGianluca Guida	strh	r1, [r0, #0x05]
1378*b6cbf720SGianluca Guida#ifdef __ARMEB__
1379*b6cbf720SGianluca Guida	strb	r3, [r0, #0x07]
1380*b6cbf720SGianluca Guida	mov	r1, r2, lsr #24		/* r1 = ...0 */
1381*b6cbf720SGianluca Guida	strb	r1, [r0]
1382*b6cbf720SGianluca Guida	mov	r2, r2, lsl #8		/* r2 = 123. */
1383*b6cbf720SGianluca Guida	orr	r2, r2, r3, lsr #24	/* r2 = 1234 */
1384*b6cbf720SGianluca Guida	str	r2, [r0, #0x01]
1385*b6cbf720SGianluca Guida#else
1386*b6cbf720SGianluca Guida	strb	r2, [r0]
1387*b6cbf720SGianluca Guida	mov	r1, r3, lsr #24		/* r1 = ...7 */
1388*b6cbf720SGianluca Guida	strb	r1, [r0, #0x07]
1389*b6cbf720SGianluca Guida	mov	r2, r2, lsr #8		/* r2 = .321 */
1390*b6cbf720SGianluca Guida	orr	r2, r2, r3, lsl #24	/* r2 = 4321 */
1391*b6cbf720SGianluca Guida	str	r2, [r0, #0x01]
1392*b6cbf720SGianluca Guida#endif
1393*b6cbf720SGianluca Guida	bx	lr
1394*b6cbf720SGianluca Guida	LMEMCPY_8_PAD
1395*b6cbf720SGianluca Guida
1396*b6cbf720SGianluca Guida/*
1397*b6cbf720SGianluca Guida * 1101: dst is 8-bit aligned, src is 8-bit aligned
1398*b6cbf720SGianluca Guida */
1399*b6cbf720SGianluca Guida	ldrb	r3, [r1]		/* r3 = ...0 */
1400*b6cbf720SGianluca Guida	ldrh	r2, [r1, #0x01]		/* BE:r2 = ..12  LE:r2 = ..21 */
1401*b6cbf720SGianluca Guida	ldr	ip, [r1, #0x03]		/* BE:ip = 3456  LE:ip = 6543 */
1402*b6cbf720SGianluca Guida	ldrb	r1, [r1, #0x07]		/* r1 = ...7 */
1403*b6cbf720SGianluca Guida	strb	r3, [r0]
1404*b6cbf720SGianluca Guida	mov	r3, ip, lsr #16		/* BE:r3 = ..34  LE:r3 = ..65 */
1405*b6cbf720SGianluca Guida#ifdef __ARMEB__
1406*b6cbf720SGianluca Guida	strh	ip, [r0, #0x05]
1407*b6cbf720SGianluca Guida	orr	r2, r3, r2, lsl #16	/* r2 = 1234 */
1408*b6cbf720SGianluca Guida#else
1409*b6cbf720SGianluca Guida	strh	r3, [r0, #0x05]
1410*b6cbf720SGianluca Guida	orr	r2, r2, ip, lsl #16	/* r2 = 4321 */
1411*b6cbf720SGianluca Guida#endif
1412*b6cbf720SGianluca Guida	str	r2, [r0, #0x01]
1413*b6cbf720SGianluca Guida	strb	r1, [r0, #0x07]
1414*b6cbf720SGianluca Guida	bx	lr
1415*b6cbf720SGianluca Guida	LMEMCPY_8_PAD
1416*b6cbf720SGianluca Guida
1417*b6cbf720SGianluca Guida/*
1418*b6cbf720SGianluca Guida * 1110: dst is 8-bit aligned, src is 16-bit aligned
1419*b6cbf720SGianluca Guida */
1420*b6cbf720SGianluca Guida	ldrh	r2, [r1]		/* BE:r2 = ..01  LE:r2 = ..10 */
1421*b6cbf720SGianluca Guida	ldr	r3, [r1, #0x02]		/* BE:r3 = 2345  LE:r3 = 5432 */
1422*b6cbf720SGianluca Guida	ldrh	r1, [r1, #0x06]		/* BE:r1 = ..67  LE:r1 = ..76 */
1423*b6cbf720SGianluca Guida#ifdef __ARMEB__
1424*b6cbf720SGianluca Guida	mov	ip, r2, lsr #8		/* ip = ...0 */
1425*b6cbf720SGianluca Guida	strb	ip, [r0]
1426*b6cbf720SGianluca Guida	mov	ip, r2, lsl #24		/* ip = 1... */
1427*b6cbf720SGianluca Guida	orr	ip, ip, r3, lsr #8	/* ip = 1234 */
1428*b6cbf720SGianluca Guida	strb	r1, [r0, #0x07]
1429*b6cbf720SGianluca Guida	mov	r1, r1, lsr #8		/* r1 = ...6 */
1430*b6cbf720SGianluca Guida	orr	r1, r1, r3, lsl #8	/* r1 = 3456 */
1431*b6cbf720SGianluca Guida#else
1432*b6cbf720SGianluca Guida	strb	r2, [r0]
1433*b6cbf720SGianluca Guida	mov	ip, r2, lsr #8		/* ip = ...1 */
1434*b6cbf720SGianluca Guida	orr	ip, ip, r3, lsl #8	/* ip = 4321 */
1435*b6cbf720SGianluca Guida	mov	r2, r1, lsr #8		/* r2 = ...7 */
1436*b6cbf720SGianluca Guida	strb	r2, [r0, #0x07]
1437*b6cbf720SGianluca Guida	mov	r1, r1, lsl #8		/* r1 = .76. */
1438*b6cbf720SGianluca Guida	orr	r1, r1, r3, lsr #24	/* r1 = .765 */
1439*b6cbf720SGianluca Guida#endif
1440*b6cbf720SGianluca Guida	str	ip, [r0, #0x01]
1441*b6cbf720SGianluca Guida	strh	r1, [r0, #0x05]
1442*b6cbf720SGianluca Guida	bx	lr
1443*b6cbf720SGianluca Guida	LMEMCPY_8_PAD
1444*b6cbf720SGianluca Guida
1445*b6cbf720SGianluca Guida/*
1446*b6cbf720SGianluca Guida * 1111: dst is 8-bit aligned, src is 8-bit aligned
1447*b6cbf720SGianluca Guida */
1448*b6cbf720SGianluca Guida	ldrb	r2, [r1]
1449*b6cbf720SGianluca Guida	ldr	ip, [r1, #0x01]
1450*b6cbf720SGianluca Guida	ldrh	r3, [r1, #0x05]
1451*b6cbf720SGianluca Guida	ldrb	r1, [r1, #0x07]
1452*b6cbf720SGianluca Guida	strb	r2, [r0]
1453*b6cbf720SGianluca Guida	str	ip, [r0, #0x01]
1454*b6cbf720SGianluca Guida	strh	r3, [r0, #0x05]
1455*b6cbf720SGianluca Guida	strb	r1, [r0, #0x07]
1456*b6cbf720SGianluca Guida	bx	lr
1457*b6cbf720SGianluca Guida	LMEMCPY_8_PAD
1458*b6cbf720SGianluca Guida
1459*b6cbf720SGianluca Guida/******************************************************************************
1460*b6cbf720SGianluca Guida * Special case for 12 byte copies
1461*b6cbf720SGianluca Guida */
1462*b6cbf720SGianluca Guida#define	LMEMCPY_C_LOG2	7	/* 128 bytes */
1463*b6cbf720SGianluca Guida#define	LMEMCPY_C_PAD	.align LMEMCPY_C_LOG2
1464*b6cbf720SGianluca Guida	LMEMCPY_C_PAD
1465*b6cbf720SGianluca Guida.Lmemcpy_c:
1466*b6cbf720SGianluca Guida	and	r2, r1, #0x03
1467*b6cbf720SGianluca Guida	orr	r2, r2, r0, lsl #2
1468*b6cbf720SGianluca Guida	ands	r2, r2, #0x0f
1469*b6cbf720SGianluca Guida	sub	r3, pc, #0x14
1470*b6cbf720SGianluca Guida	addne	pc, r3, r2, lsl #LMEMCPY_C_LOG2
1471*b6cbf720SGianluca Guida
1472*b6cbf720SGianluca Guida/*
1473*b6cbf720SGianluca Guida * 0000: dst is 32-bit aligned, src is 32-bit aligned
1474*b6cbf720SGianluca Guida */
1475*b6cbf720SGianluca Guida	ldr	r2, [r1]
1476*b6cbf720SGianluca Guida	ldr	r3, [r1, #0x04]
1477*b6cbf720SGianluca Guida	ldr	r1, [r1, #0x08]
1478*b6cbf720SGianluca Guida	str	r2, [r0]
1479*b6cbf720SGianluca Guida	str	r3, [r0, #0x04]
1480*b6cbf720SGianluca Guida	str	r1, [r0, #0x08]
1481*b6cbf720SGianluca Guida	bx	lr
1482*b6cbf720SGianluca Guida	LMEMCPY_C_PAD
1483*b6cbf720SGianluca Guida
1484*b6cbf720SGianluca Guida/*
1485*b6cbf720SGianluca Guida * 0001: dst is 32-bit aligned, src is 8-bit aligned
1486*b6cbf720SGianluca Guida */
1487*b6cbf720SGianluca Guida	ldrb	r2, [r1, #0xb]		/* r2 = ...B */
1488*b6cbf720SGianluca Guida	ldr	ip, [r1, #0x07]		/* BE:ip = 789A  LE:ip = A987 */
1489*b6cbf720SGianluca Guida	ldr	r3, [r1, #0x03]		/* BE:r3 = 3456  LE:r3 = 6543 */
1490*b6cbf720SGianluca Guida	ldr	r1, [r1, #-1]		/* BE:r1 = x012  LE:r1 = 210x */
1491*b6cbf720SGianluca Guida#ifdef __ARMEB__
1492*b6cbf720SGianluca Guida	orr	r2, r2, ip, lsl #8	/* r2 = 89AB */
1493*b6cbf720SGianluca Guida	str	r2, [r0, #0x08]
1494*b6cbf720SGianluca Guida	mov	r2, ip, lsr #24		/* r2 = ...7 */
1495*b6cbf720SGianluca Guida	orr	r2, r2, r3, lsl #8	/* r2 = 4567 */
1496*b6cbf720SGianluca Guida	mov	r1, r1, lsl #8		/* r1 = 012. */
1497*b6cbf720SGianluca Guida	orr	r1, r1, r3, lsr #24	/* r1 = 0123 */
1498*b6cbf720SGianluca Guida#else
1499*b6cbf720SGianluca Guida	mov	r2, r2, lsl #24		/* r2 = B... */
1500*b6cbf720SGianluca Guida	orr	r2, r2, ip, lsr #8	/* r2 = BA98 */
1501*b6cbf720SGianluca Guida	str	r2, [r0, #0x08]
1502*b6cbf720SGianluca Guida	mov	r2, ip, lsl #24		/* r2 = 7... */
1503*b6cbf720SGianluca Guida	orr	r2, r2, r3, lsr #8	/* r2 = 7654 */
1504*b6cbf720SGianluca Guida	mov	r1, r1, lsr #8		/* r1 = .210 */
1505*b6cbf720SGianluca Guida	orr	r1, r1, r3, lsl #24	/* r1 = 3210 */
1506*b6cbf720SGianluca Guida#endif
1507*b6cbf720SGianluca Guida	str	r2, [r0, #0x04]
1508*b6cbf720SGianluca Guida	str	r1, [r0]
1509*b6cbf720SGianluca Guida	bx	lr
1510*b6cbf720SGianluca Guida	LMEMCPY_C_PAD
1511*b6cbf720SGianluca Guida
1512*b6cbf720SGianluca Guida/*
1513*b6cbf720SGianluca Guida * 0010: dst is 32-bit aligned, src is 16-bit aligned
1514*b6cbf720SGianluca Guida */
1515*b6cbf720SGianluca Guida	ldrh	r2, [r1]		/* BE:r2 = ..01  LE:r2 = ..10 */
1516*b6cbf720SGianluca Guida	ldr	r3, [r1, #0x02]		/* BE:r3 = 2345  LE:r3 = 5432 */
1517*b6cbf720SGianluca Guida	ldr	ip, [r1, #0x06]		/* BE:ip = 6789  LE:ip = 9876 */
1518*b6cbf720SGianluca Guida	ldrh	r1, [r1, #0x0a]		/* BE:r1 = ..AB  LE:r1 = ..BA */
1519*b6cbf720SGianluca Guida#ifdef __ARMEB__
1520*b6cbf720SGianluca Guida	mov	r2, r2, lsl #16		/* r2 = 01.. */
1521*b6cbf720SGianluca Guida	orr	r2, r2, r3, lsr #16	/* r2 = 0123 */
1522*b6cbf720SGianluca Guida	str	r2, [r0]
1523*b6cbf720SGianluca Guida	mov	r3, r3, lsl #16		/* r3 = 45.. */
1524*b6cbf720SGianluca Guida	orr	r3, r3, ip, lsr #16	/* r3 = 4567 */
1525*b6cbf720SGianluca Guida	orr	r1, r1, ip, lsl #16	/* r1 = 89AB */
1526*b6cbf720SGianluca Guida#else
1527*b6cbf720SGianluca Guida	orr	r2, r2, r3, lsl #16	/* r2 = 3210 */
1528*b6cbf720SGianluca Guida	str	r2, [r0]
1529*b6cbf720SGianluca Guida	mov	r3, r3, lsr #16		/* r3 = ..54 */
1530*b6cbf720SGianluca Guida	orr	r3, r3, ip, lsl #16	/* r3 = 7654 */
1531*b6cbf720SGianluca Guida	mov	r1, r1, lsl #16		/* r1 = BA.. */
1532*b6cbf720SGianluca Guida	orr	r1, r1, ip, lsr #16	/* r1 = BA98 */
1533*b6cbf720SGianluca Guida#endif
1534*b6cbf720SGianluca Guida	str	r3, [r0, #0x04]
1535*b6cbf720SGianluca Guida	str	r1, [r0, #0x08]
1536*b6cbf720SGianluca Guida	bx	lr
1537*b6cbf720SGianluca Guida	LMEMCPY_C_PAD
1538*b6cbf720SGianluca Guida
1539*b6cbf720SGianluca Guida/*
1540*b6cbf720SGianluca Guida * 0011: dst is 32-bit aligned, src is 8-bit aligned
1541*b6cbf720SGianluca Guida */
1542*b6cbf720SGianluca Guida	ldrb	r2, [r1]		/* r2 = ...0 */
1543*b6cbf720SGianluca Guida	ldr	r3, [r1, #0x01]		/* BE:r3 = 1234  LE:r3 = 4321 */
1544*b6cbf720SGianluca Guida	ldr	ip, [r1, #0x05]		/* BE:ip = 5678  LE:ip = 8765 */
1545*b6cbf720SGianluca Guida	ldr	r1, [r1, #0x09]		/* BE:r1 = 9ABx  LE:r1 = xBA9 */
1546*b6cbf720SGianluca Guida#ifdef __ARMEB__
1547*b6cbf720SGianluca Guida	mov	r2, r2, lsl #24		/* r2 = 0... */
1548*b6cbf720SGianluca Guida	orr	r2, r2, r3, lsr #8	/* r2 = 0123 */
1549*b6cbf720SGianluca Guida	str	r2, [r0]
1550*b6cbf720SGianluca Guida	mov	r3, r3, lsl #24		/* r3 = 4... */
1551*b6cbf720SGianluca Guida	orr	r3, r3, ip, lsr #8	/* r3 = 4567 */
1552*b6cbf720SGianluca Guida	mov	r1, r1, lsr #8		/* r1 = .9AB */
1553*b6cbf720SGianluca Guida	orr	r1, r1, ip, lsl #24	/* r1 = 89AB */
1554*b6cbf720SGianluca Guida#else
1555*b6cbf720SGianluca Guida	orr	r2, r2, r3, lsl #8	/* r2 = 3210 */
1556*b6cbf720SGianluca Guida	str	r2, [r0]
1557*b6cbf720SGianluca Guida	mov	r3, r3, lsr #24		/* r3 = ...4 */
1558*b6cbf720SGianluca Guida	orr	r3, r3, ip, lsl #8	/* r3 = 7654 */
1559*b6cbf720SGianluca Guida	mov	r1, r1, lsl #8		/* r1 = BA9. */
1560*b6cbf720SGianluca Guida	orr	r1, r1, ip, lsr #24	/* r1 = BA98 */
1561*b6cbf720SGianluca Guida#endif
1562*b6cbf720SGianluca Guida	str	r3, [r0, #0x04]
1563*b6cbf720SGianluca Guida	str	r1, [r0, #0x08]
1564*b6cbf720SGianluca Guida	bx	lr
1565*b6cbf720SGianluca Guida	LMEMCPY_C_PAD
1566*b6cbf720SGianluca Guida
1567*b6cbf720SGianluca Guida/*
1568*b6cbf720SGianluca Guida * 0100: dst is 8-bit aligned (byte 1), src is 32-bit aligned
1569*b6cbf720SGianluca Guida */
1570*b6cbf720SGianluca Guida	ldr	r2, [r1]		/* BE:r2 = 0123  LE:r2 = 3210 */
1571*b6cbf720SGianluca Guida	ldr	r3, [r1, #0x04]		/* BE:r3 = 4567  LE:r3 = 7654 */
1572*b6cbf720SGianluca Guida	ldr	ip, [r1, #0x08]		/* BE:ip = 89AB  LE:ip = BA98 */
1573*b6cbf720SGianluca Guida	mov	r1, r2, lsr #8		/* BE:r1 = .012  LE:r1 = .321 */
1574*b6cbf720SGianluca Guida	strh	r1, [r0, #0x01]
1575*b6cbf720SGianluca Guida#ifdef __ARMEB__
1576*b6cbf720SGianluca Guida	mov	r1, r2, lsr #24		/* r1 = ...0 */
1577*b6cbf720SGianluca Guida	strb	r1, [r0]
1578*b6cbf720SGianluca Guida	mov	r1, r2, lsl #24		/* r1 = 3... */
1579*b6cbf720SGianluca Guida	orr	r2, r1, r3, lsr #8	/* r1 = 3456 */
1580*b6cbf720SGianluca Guida	mov	r1, r3, lsl #24		/* r1 = 7... */
1581*b6cbf720SGianluca Guida	orr	r1, r1, ip, lsr #8	/* r1 = 789A */
1582*b6cbf720SGianluca Guida#else
1583*b6cbf720SGianluca Guida	strb	r2, [r0]
1584*b6cbf720SGianluca Guida	mov	r1, r2, lsr #24		/* r1 = ...3 */
1585*b6cbf720SGianluca Guida	orr	r2, r1, r3, lsl #8	/* r1 = 6543 */
1586*b6cbf720SGianluca Guida	mov	r1, r3, lsr #24		/* r1 = ...7 */
1587*b6cbf720SGianluca Guida	orr	r1, r1, ip, lsl #8	/* r1 = A987 */
1588*b6cbf720SGianluca Guida	mov	ip, ip, lsr #24		/* ip = ...B */
1589*b6cbf720SGianluca Guida#endif
1590*b6cbf720SGianluca Guida	str	r2, [r0, #0x03]
1591*b6cbf720SGianluca Guida	str	r1, [r0, #0x07]
1592*b6cbf720SGianluca Guida	strb	ip, [r0, #0x0b]
1593*b6cbf720SGianluca Guida	bx	lr
1594*b6cbf720SGianluca Guida	LMEMCPY_C_PAD
1595*b6cbf720SGianluca Guida
1596*b6cbf720SGianluca Guida/*
1597*b6cbf720SGianluca Guida * 0101: dst is 8-bit aligned (byte 1), src is 8-bit aligned (byte 1)
1598*b6cbf720SGianluca Guida */
1599*b6cbf720SGianluca Guida	ldrb	r2, [r1]
1600*b6cbf720SGianluca Guida	ldrh	r3, [r1, #0x01]
1601*b6cbf720SGianluca Guida	ldr	ip, [r1, #0x03]
1602*b6cbf720SGianluca Guida	strb	r2, [r0]
1603*b6cbf720SGianluca Guida	ldr	r2, [r1, #0x07]
1604*b6cbf720SGianluca Guida	ldrb	r1, [r1, #0x0b]
1605*b6cbf720SGianluca Guida	strh	r3, [r0, #0x01]
1606*b6cbf720SGianluca Guida	str	ip, [r0, #0x03]
1607*b6cbf720SGianluca Guida	str	r2, [r0, #0x07]
1608*b6cbf720SGianluca Guida	strb	r1, [r0, #0x0b]
1609*b6cbf720SGianluca Guida	bx	lr
1610*b6cbf720SGianluca Guida	LMEMCPY_C_PAD
1611*b6cbf720SGianluca Guida
1612*b6cbf720SGianluca Guida/*
1613*b6cbf720SGianluca Guida * 0110: dst is 8-bit aligned (byte 1), src is 16-bit aligned
1614*b6cbf720SGianluca Guida */
1615*b6cbf720SGianluca Guida	ldrh	r2, [r1]		/* BE:r2 = ..01  LE:r2 = ..10 */
1616*b6cbf720SGianluca Guida	ldr	r3, [r1, #0x02]		/* BE:r3 = 2345  LE:r3 = 5432 */
1617*b6cbf720SGianluca Guida	ldr	ip, [r1, #0x06]		/* BE:ip = 6789  LE:ip = 9876 */
1618*b6cbf720SGianluca Guida	ldrh	r1, [r1, #0x0a]		/* BE:r1 = ..AB  LE:r1 = ..BA */
1619*b6cbf720SGianluca Guida#ifdef __ARMEB__
1620*b6cbf720SGianluca Guida	mov	r2, r2, ror #8		/* r2 = 1..0 */
1621*b6cbf720SGianluca Guida	strb	r2, [r0]
1622*b6cbf720SGianluca Guida	mov	r2, r2, lsr #16		/* r2 = ..1. */
1623*b6cbf720SGianluca Guida	orr	r2, r2, r3, lsr #24	/* r2 = ..12 */
1624*b6cbf720SGianluca Guida	strh	r2, [r0, #0x01]
1625*b6cbf720SGianluca Guida	mov	r2, r3, lsl #8		/* r2 = 345. */
1626*b6cbf720SGianluca Guida	orr	r3, r2, ip, lsr #24	/* r3 = 3456 */
1627*b6cbf720SGianluca Guida	mov	r2, ip, lsl #8		/* r2 = 789. */
1628*b6cbf720SGianluca Guida	orr	r2, r2, r1, lsr #8	/* r2 = 789A */
1629*b6cbf720SGianluca Guida#else
1630*b6cbf720SGianluca Guida	strb	r2, [r0]
1631*b6cbf720SGianluca Guida	mov	r2, r2, lsr #8		/* r2 = ...1 */
1632*b6cbf720SGianluca Guida	orr	r2, r2, r3, lsl #8	/* r2 = 4321 */
1633*b6cbf720SGianluca Guida	strh	r2, [r0, #0x01]
1634*b6cbf720SGianluca Guida	mov	r2, r3, lsr #8		/* r2 = .543 */
1635*b6cbf720SGianluca Guida	orr	r3, r2, ip, lsl #24	/* r3 = 6543 */
1636*b6cbf720SGianluca Guida	mov	r2, ip, lsr #8		/* r2 = .987 */
1637*b6cbf720SGianluca Guida	orr	r2, r2, r1, lsl #24	/* r2 = A987 */
1638*b6cbf720SGianluca Guida	mov	r1, r1, lsr #8		/* r1 = ...B */
1639*b6cbf720SGianluca Guida#endif
1640*b6cbf720SGianluca Guida	str	r3, [r0, #0x03]
1641*b6cbf720SGianluca Guida	str	r2, [r0, #0x07]
1642*b6cbf720SGianluca Guida	strb	r1, [r0, #0x0b]
1643*b6cbf720SGianluca Guida	bx	lr
1644*b6cbf720SGianluca Guida	LMEMCPY_C_PAD
1645*b6cbf720SGianluca Guida
1646*b6cbf720SGianluca Guida/*
1647*b6cbf720SGianluca Guida * 0111: dst is 8-bit aligned (byte 1), src is 8-bit aligned (byte 3)
1648*b6cbf720SGianluca Guida */
1649*b6cbf720SGianluca Guida	ldrb	r2, [r1]
1650*b6cbf720SGianluca Guida	ldr	r3, [r1, #0x01]		/* BE:r3 = 1234  LE:r3 = 4321 */
1651*b6cbf720SGianluca Guida	ldr	ip, [r1, #0x05]		/* BE:ip = 5678  LE:ip = 8765 */
1652*b6cbf720SGianluca Guida	ldr	r1, [r1, #0x09]		/* BE:r1 = 9ABx  LE:r1 = xBA9 */
1653*b6cbf720SGianluca Guida	strb	r2, [r0]
1654*b6cbf720SGianluca Guida#ifdef __ARMEB__
1655*b6cbf720SGianluca Guida	mov	r2, r3, lsr #16		/* r2 = ..12 */
1656*b6cbf720SGianluca Guida	strh	r2, [r0, #0x01]
1657*b6cbf720SGianluca Guida	mov	r3, r3, lsl #16		/* r3 = 34.. */
1658*b6cbf720SGianluca Guida	orr	r3, r3, ip, lsr #16	/* r3 = 3456 */
1659*b6cbf720SGianluca Guida	mov	ip, ip, lsl #16		/* ip = 78.. */
1660*b6cbf720SGianluca Guida	orr	ip, ip, r1, lsr #16	/* ip = 789A */
1661*b6cbf720SGianluca Guida	mov	r1, r1, lsr #8		/* r1 = .9AB */
1662*b6cbf720SGianluca Guida#else
1663*b6cbf720SGianluca Guida	strh	r3, [r0, #0x01]
1664*b6cbf720SGianluca Guida	mov	r3, r3, lsr #16		/* r3 = ..43 */
1665*b6cbf720SGianluca Guida	orr	r3, r3, ip, lsl #16	/* r3 = 6543 */
1666*b6cbf720SGianluca Guida	mov	ip, ip, lsr #16		/* ip = ..87 */
1667*b6cbf720SGianluca Guida	orr	ip, ip, r1, lsl #16	/* ip = A987 */
1668*b6cbf720SGianluca Guida	mov	r1, r1, lsr #16		/* r1 = ..xB */
1669*b6cbf720SGianluca Guida#endif
1670*b6cbf720SGianluca Guida	str	r3, [r0, #0x03]
1671*b6cbf720SGianluca Guida	str	ip, [r0, #0x07]
1672*b6cbf720SGianluca Guida	strb	r1, [r0, #0x0b]
1673*b6cbf720SGianluca Guida	bx	lr
1674*b6cbf720SGianluca Guida	LMEMCPY_C_PAD
1675*b6cbf720SGianluca Guida
1676*b6cbf720SGianluca Guida/*
1677*b6cbf720SGianluca Guida * 1000: dst is 16-bit aligned, src is 32-bit aligned
1678*b6cbf720SGianluca Guida */
1679*b6cbf720SGianluca Guida	ldr	ip, [r1]		/* BE:ip = 0123  LE:ip = 3210 */
1680*b6cbf720SGianluca Guida	ldr	r3, [r1, #0x04]		/* BE:r3 = 4567  LE:r3 = 7654 */
1681*b6cbf720SGianluca Guida	ldr	r2, [r1, #0x08]		/* BE:r2 = 89AB  LE:r2 = BA98 */
1682*b6cbf720SGianluca Guida	mov	r1, ip, lsr #16		/* BE:r1 = ..01  LE:r1 = ..32 */
1683*b6cbf720SGianluca Guida#ifdef __ARMEB__
1684*b6cbf720SGianluca Guida	strh	r1, [r0]
1685*b6cbf720SGianluca Guida	mov	r1, ip, lsl #16		/* r1 = 23.. */
1686*b6cbf720SGianluca Guida	orr	r1, r1, r3, lsr #16	/* r1 = 2345 */
1687*b6cbf720SGianluca Guida	mov	r3, r3, lsl #16		/* r3 = 67.. */
1688*b6cbf720SGianluca Guida	orr	r3, r3, r2, lsr #16	/* r3 = 6789 */
1689*b6cbf720SGianluca Guida#else
1690*b6cbf720SGianluca Guida	strh	ip, [r0]
1691*b6cbf720SGianluca Guida	orr	r1, r1, r3, lsl #16	/* r1 = 5432 */
1692*b6cbf720SGianluca Guida	mov	r3, r3, lsr #16		/* r3 = ..76 */
1693*b6cbf720SGianluca Guida	orr	r3, r3, r2, lsl #16	/* r3 = 9876 */
1694*b6cbf720SGianluca Guida	mov	r2, r2, lsr #16		/* r2 = ..BA */
1695*b6cbf720SGianluca Guida#endif
1696*b6cbf720SGianluca Guida	str	r1, [r0, #0x02]
1697*b6cbf720SGianluca Guida	str	r3, [r0, #0x06]
1698*b6cbf720SGianluca Guida	strh	r2, [r0, #0x0a]
1699*b6cbf720SGianluca Guida	bx	lr
1700*b6cbf720SGianluca Guida	LMEMCPY_C_PAD
1701*b6cbf720SGianluca Guida
1702*b6cbf720SGianluca Guida/*
1703*b6cbf720SGianluca Guida * 1001: dst is 16-bit aligned, src is 8-bit aligned (byte 1)
1704*b6cbf720SGianluca Guida */
1705*b6cbf720SGianluca Guida	ldr	r2, [r1, #-1]		/* BE:r2 = x012  LE:r2 = 210x */
1706*b6cbf720SGianluca Guida	ldr	r3, [r1, #0x03]		/* BE:r3 = 3456  LE:r3 = 6543 */
1707*b6cbf720SGianluca Guida	mov	ip, r2, lsr #8		/* BE:ip = .x01  LE:ip = .210 */
1708*b6cbf720SGianluca Guida	strh	ip, [r0]
1709*b6cbf720SGianluca Guida	ldr	ip, [r1, #0x07]		/* BE:ip = 789A  LE:ip = A987 */
1710*b6cbf720SGianluca Guida	ldrb	r1, [r1, #0x0b]		/* r1 = ...B */
1711*b6cbf720SGianluca Guida#ifdef __ARMEB__
1712*b6cbf720SGianluca Guida	mov	r2, r2, lsl #24		/* r2 = 2... */
1713*b6cbf720SGianluca Guida	orr	r2, r2, r3, lsr #8	/* r2 = 2345 */
1714*b6cbf720SGianluca Guida	mov	r3, r3, lsl #24		/* r3 = 6... */
1715*b6cbf720SGianluca Guida	orr	r3, r3, ip, lsr #8	/* r3 = 6789 */
1716*b6cbf720SGianluca Guida	orr	r1, r1, ip, lsl #8	/* r1 = 89AB */
1717*b6cbf720SGianluca Guida#else
1718*b6cbf720SGianluca Guida	mov	r2, r2, lsr #24		/* r2 = ...2 */
1719*b6cbf720SGianluca Guida	orr	r2, r2, r3, lsl #8	/* r2 = 5432 */
1720*b6cbf720SGianluca Guida	mov	r3, r3, lsr #24		/* r3 = ...6 */
1721*b6cbf720SGianluca Guida	orr	r3, r3, ip, lsl #8	/* r3 = 9876 */
1722*b6cbf720SGianluca Guida	mov	r1, r1, lsl #8		/* r1 = ..B. */
1723*b6cbf720SGianluca Guida	orr	r1, r1, ip, lsr #24	/* r1 = ..BA */
1724*b6cbf720SGianluca Guida#endif
1725*b6cbf720SGianluca Guida	str	r2, [r0, #0x02]
1726*b6cbf720SGianluca Guida	str	r3, [r0, #0x06]
1727*b6cbf720SGianluca Guida	strh	r1, [r0, #0x0a]
1728*b6cbf720SGianluca Guida	bx	lr
1729*b6cbf720SGianluca Guida	LMEMCPY_C_PAD
1730*b6cbf720SGianluca Guida
1731*b6cbf720SGianluca Guida/*
1732*b6cbf720SGianluca Guida * 1010: dst is 16-bit aligned, src is 16-bit aligned
1733*b6cbf720SGianluca Guida */
1734*b6cbf720SGianluca Guida	ldrh	r2, [r1]
1735*b6cbf720SGianluca Guida	ldr	r3, [r1, #0x02]
1736*b6cbf720SGianluca Guida	ldr	ip, [r1, #0x06]
1737*b6cbf720SGianluca Guida	ldrh	r1, [r1, #0x0a]
1738*b6cbf720SGianluca Guida	strh	r2, [r0]
1739*b6cbf720SGianluca Guida	str	r3, [r0, #0x02]
1740*b6cbf720SGianluca Guida	str	ip, [r0, #0x06]
1741*b6cbf720SGianluca Guida	strh	r1, [r0, #0x0a]
1742*b6cbf720SGianluca Guida	bx	lr
1743*b6cbf720SGianluca Guida	LMEMCPY_C_PAD
1744*b6cbf720SGianluca Guida
1745*b6cbf720SGianluca Guida/*
1746*b6cbf720SGianluca Guida * 1011: dst is 16-bit aligned, src is 8-bit aligned (byte 3)
1747*b6cbf720SGianluca Guida */
1748*b6cbf720SGianluca Guida	ldr	r2, [r1, #0x09]		/* BE:r2 = 9ABx  LE:r2 = xBA9 */
1749*b6cbf720SGianluca Guida	ldr	r3, [r1, #0x05]		/* BE:r3 = 5678  LE:r3 = 8765 */
1750*b6cbf720SGianluca Guida	mov	ip, r2, lsr #8		/* BE:ip = .9AB  LE:ip = .xBA */
1751*b6cbf720SGianluca Guida	strh	ip, [r0, #0x0a]
1752*b6cbf720SGianluca Guida	ldr	ip, [r1, #0x01]		/* BE:ip = 1234  LE:ip = 4321 */
1753*b6cbf720SGianluca Guida	ldrb	r1, [r1]		/* r1 = ...0 */
1754*b6cbf720SGianluca Guida#ifdef __ARMEB__
1755*b6cbf720SGianluca Guida	mov	r2, r2, lsr #24		/* r2 = ...9 */
1756*b6cbf720SGianluca Guida	orr	r2, r2, r3, lsl #8	/* r2 = 6789 */
1757*b6cbf720SGianluca Guida	mov	r3, r3, lsr #24		/* r3 = ...5 */
1758*b6cbf720SGianluca Guida	orr	r3, r3, ip, lsl #8	/* r3 = 2345 */
1759*b6cbf720SGianluca Guida	mov	r1, r1, lsl #8		/* r1 = ..0. */
1760*b6cbf720SGianluca Guida	orr	r1, r1, ip, lsr #24	/* r1 = ..01 */
1761*b6cbf720SGianluca Guida#else
1762*b6cbf720SGianluca Guida	mov	r2, r2, lsl #24		/* r2 = 9... */
1763*b6cbf720SGianluca Guida	orr	r2, r2, r3, lsr #8	/* r2 = 9876 */
1764*b6cbf720SGianluca Guida	mov	r3, r3, lsl #24		/* r3 = 5... */
1765*b6cbf720SGianluca Guida	orr	r3, r3, ip, lsr #8	/* r3 = 5432 */
1766*b6cbf720SGianluca Guida	orr	r1, r1, ip, lsl #8	/* r1 = 3210 */
1767*b6cbf720SGianluca Guida#endif
1768*b6cbf720SGianluca Guida	str	r2, [r0, #0x06]
1769*b6cbf720SGianluca Guida	str	r3, [r0, #0x02]
1770*b6cbf720SGianluca Guida	strh	r1, [r0]
1771*b6cbf720SGianluca Guida	bx	lr
1772*b6cbf720SGianluca Guida	LMEMCPY_C_PAD
1773*b6cbf720SGianluca Guida
1774*b6cbf720SGianluca Guida/*
1775*b6cbf720SGianluca Guida * 1100: dst is 8-bit aligned (byte 3), src is 32-bit aligned
1776*b6cbf720SGianluca Guida */
1777*b6cbf720SGianluca Guida	ldr	r2, [r1]		/* BE:r2 = 0123  LE:r2 = 3210 */
1778*b6cbf720SGianluca Guida	ldr	ip, [r1, #0x04]		/* BE:ip = 4567  LE:ip = 7654 */
1779*b6cbf720SGianluca Guida	ldr	r1, [r1, #0x08]		/* BE:r1 = 89AB  LE:r1 = BA98 */
1780*b6cbf720SGianluca Guida#ifdef __ARMEB__
1781*b6cbf720SGianluca Guida	mov	r3, r2, lsr #24		/* r3 = ...0 */
1782*b6cbf720SGianluca Guida	strb	r3, [r0]
1783*b6cbf720SGianluca Guida	mov	r2, r2, lsl #8		/* r2 = 123. */
1784*b6cbf720SGianluca Guida	orr	r2, r2, ip, lsr #24	/* r2 = 1234 */
1785*b6cbf720SGianluca Guida	str	r2, [r0, #0x01]
1786*b6cbf720SGianluca Guida	mov	r2, ip, lsl #8		/* r2 = 567. */
1787*b6cbf720SGianluca Guida	orr	r2, r2, r1, lsr #24	/* r2 = 5678 */
1788*b6cbf720SGianluca Guida	str	r2, [r0, #0x05]
1789*b6cbf720SGianluca Guida	mov	r2, r1, lsr #8		/* r2 = ..9A */
1790*b6cbf720SGianluca Guida	strh	r2, [r0, #0x09]
1791*b6cbf720SGianluca Guida	strb	r1, [r0, #0x0b]
1792*b6cbf720SGianluca Guida#else
1793*b6cbf720SGianluca Guida	strb	r2, [r0]
1794*b6cbf720SGianluca Guida	mov	r3, r2, lsr #8		/* r3 = .321 */
1795*b6cbf720SGianluca Guida	orr	r3, r3, ip, lsl #24	/* r3 = 4321 */
1796*b6cbf720SGianluca Guida	str	r3, [r0, #0x01]
1797*b6cbf720SGianluca Guida	mov	r3, ip, lsr #8		/* r3 = .765 */
1798*b6cbf720SGianluca Guida	orr	r3, r3, r1, lsl #24	/* r3 = 8765 */
1799*b6cbf720SGianluca Guida	str	r3, [r0, #0x05]
1800*b6cbf720SGianluca Guida	mov	r1, r1, lsr #8		/* r1 = .BA9 */
1801*b6cbf720SGianluca Guida	strh	r1, [r0, #0x09]
1802*b6cbf720SGianluca Guida	mov	r1, r1, lsr #16		/* r1 = ...B */
1803*b6cbf720SGianluca Guida	strb	r1, [r0, #0x0b]
1804*b6cbf720SGianluca Guida#endif
1805*b6cbf720SGianluca Guida	bx	lr
1806*b6cbf720SGianluca Guida	LMEMCPY_C_PAD
1807*b6cbf720SGianluca Guida
1808*b6cbf720SGianluca Guida/*
1809*b6cbf720SGianluca Guida * 1101: dst is 8-bit aligned (byte 3), src is 8-bit aligned (byte 1)
1810*b6cbf720SGianluca Guida */
1811*b6cbf720SGianluca Guida	ldrb	r2, [r1, #0x0b]		/* r2 = ...B */
1812*b6cbf720SGianluca Guida	ldr	r3, [r1, #0x07]		/* BE:r3 = 789A  LE:r3 = A987 */
1813*b6cbf720SGianluca Guida	ldr	ip, [r1, #0x03]		/* BE:ip = 3456  LE:ip = 6543 */
1814*b6cbf720SGianluca Guida	ldr	r1, [r1, #-1]		/* BE:r1 = x012  LE:r1 = 210x */
1815*b6cbf720SGianluca Guida	strb	r2, [r0, #0x0b]
1816*b6cbf720SGianluca Guida#ifdef __ARMEB__
1817*b6cbf720SGianluca Guida	strh	r3, [r0, #0x09]
1818*b6cbf720SGianluca Guida	mov	r3, r3, lsr #16		/* r3 = ..78 */
1819*b6cbf720SGianluca Guida	orr	r3, r3, ip, lsl #16	/* r3 = 5678 */
1820*b6cbf720SGianluca Guida	mov	ip, ip, lsr #16		/* ip = ..34 */
1821*b6cbf720SGianluca Guida	orr	ip, ip, r1, lsl #16	/* ip = 1234 */
1822*b6cbf720SGianluca Guida	mov	r1, r1, lsr #16		/* r1 = ..x0 */
1823*b6cbf720SGianluca Guida#else
1824*b6cbf720SGianluca Guida	mov	r2, r3, lsr #16		/* r2 = ..A9 */
1825*b6cbf720SGianluca Guida	strh	r2, [r0, #0x09]
1826*b6cbf720SGianluca Guida	mov	r3, r3, lsl #16		/* r3 = 87.. */
1827*b6cbf720SGianluca Guida	orr	r3, r3, ip, lsr #16	/* r3 = 8765 */
1828*b6cbf720SGianluca Guida	mov	ip, ip, lsl #16		/* ip = 43.. */
1829*b6cbf720SGianluca Guida	orr	ip, ip, r1, lsr #16	/* ip = 4321 */
1830*b6cbf720SGianluca Guida	mov	r1, r1, lsr #8		/* r1 = .210 */
1831*b6cbf720SGianluca Guida#endif
1832*b6cbf720SGianluca Guida	str	r3, [r0, #0x05]
1833*b6cbf720SGianluca Guida	str	ip, [r0, #0x01]
1834*b6cbf720SGianluca Guida	strb	r1, [r0]
1835*b6cbf720SGianluca Guida	bx	lr
1836*b6cbf720SGianluca Guida	LMEMCPY_C_PAD
1837*b6cbf720SGianluca Guida
1838*b6cbf720SGianluca Guida/*
1839*b6cbf720SGianluca Guida * 1110: dst is 8-bit aligned (byte 3), src is 16-bit aligned
1840*b6cbf720SGianluca Guida */
1841*b6cbf720SGianluca Guida#ifdef __ARMEB__
1842*b6cbf720SGianluca Guida	ldrh	r2, [r1, #0x0a]		/* r2 = ..AB */
1843*b6cbf720SGianluca Guida	ldr	ip, [r1, #0x06]		/* ip = 6789 */
1844*b6cbf720SGianluca Guida	ldr	r3, [r1, #0x02]		/* r3 = 2345 */
1845*b6cbf720SGianluca Guida	ldrh	r1, [r1]		/* r1 = ..01 */
1846*b6cbf720SGianluca Guida	strb	r2, [r0, #0x0b]
1847*b6cbf720SGianluca Guida	mov	r2, r2, lsr #8		/* r2 = ...A */
1848*b6cbf720SGianluca Guida	orr	r2, r2, ip, lsl #8	/* r2 = 789A */
1849*b6cbf720SGianluca Guida	mov	ip, ip, lsr #8		/* ip = .678 */
1850*b6cbf720SGianluca Guida	orr	ip, ip, r3, lsl #24	/* ip = 5678 */
1851*b6cbf720SGianluca Guida	mov	r3, r3, lsr #8		/* r3 = .234 */
1852*b6cbf720SGianluca Guida	orr	r3, r3, r1, lsl #24	/* r3 = 1234 */
1853*b6cbf720SGianluca Guida	mov	r1, r1, lsr #8		/* r1 = ...0 */
1854*b6cbf720SGianluca Guida	strb	r1, [r0]
1855*b6cbf720SGianluca Guida	str	r3, [r0, #0x01]
1856*b6cbf720SGianluca Guida	str	ip, [r0, #0x05]
1857*b6cbf720SGianluca Guida	strh	r2, [r0, #0x09]
1858*b6cbf720SGianluca Guida#else
1859*b6cbf720SGianluca Guida	ldrh	r2, [r1]		/* r2 = ..10 */
1860*b6cbf720SGianluca Guida	ldr	r3, [r1, #0x02]		/* r3 = 5432 */
1861*b6cbf720SGianluca Guida	ldr	ip, [r1, #0x06]		/* ip = 9876 */
1862*b6cbf720SGianluca Guida	ldrh	r1, [r1, #0x0a]		/* r1 = ..BA */
1863*b6cbf720SGianluca Guida	strb	r2, [r0]
1864*b6cbf720SGianluca Guida	mov	r2, r2, lsr #8		/* r2 = ...1 */
1865*b6cbf720SGianluca Guida	orr	r2, r2, r3, lsl #8	/* r2 = 4321 */
1866*b6cbf720SGianluca Guida	mov	r3, r3, lsr #24		/* r3 = ...5 */
1867*b6cbf720SGianluca Guida	orr	r3, r3, ip, lsl #8	/* r3 = 8765 */
1868*b6cbf720SGianluca Guida	mov	ip, ip, lsr #24		/* ip = ...9 */
1869*b6cbf720SGianluca Guida	orr	ip, ip, r1, lsl #8	/* ip = .BA9 */
1870*b6cbf720SGianluca Guida	mov	r1, r1, lsr #8		/* r1 = ...B */
1871*b6cbf720SGianluca Guida	str	r2, [r0, #0x01]
1872*b6cbf720SGianluca Guida	str	r3, [r0, #0x05]
1873*b6cbf720SGianluca Guida	strh	ip, [r0, #0x09]
1874*b6cbf720SGianluca Guida	strb	r1, [r0, #0x0b]
1875*b6cbf720SGianluca Guida#endif
1876*b6cbf720SGianluca Guida	bx	lr
1877*b6cbf720SGianluca Guida	LMEMCPY_C_PAD
1878*b6cbf720SGianluca Guida
1879*b6cbf720SGianluca Guida/*
1880*b6cbf720SGianluca Guida * 1111: dst is 8-bit aligned (byte 3), src is 8-bit aligned (byte 3)
1881*b6cbf720SGianluca Guida */
1882*b6cbf720SGianluca Guida	ldrb	r2, [r1]
1883*b6cbf720SGianluca Guida	ldr	r3, [r1, #0x01]
1884*b6cbf720SGianluca Guida	ldr	ip, [r1, #0x05]
1885*b6cbf720SGianluca Guida	strb	r2, [r0]
1886*b6cbf720SGianluca Guida	ldrh	r2, [r1, #0x09]
1887*b6cbf720SGianluca Guida	ldrb	r1, [r1, #0x0b]
1888*b6cbf720SGianluca Guida	str	r3, [r0, #0x01]
1889*b6cbf720SGianluca Guida	str	ip, [r0, #0x05]
1890*b6cbf720SGianluca Guida	strh	r2, [r0, #0x09]
1891*b6cbf720SGianluca Guida	strb	r1, [r0, #0x0b]
1892*b6cbf720SGianluca Guida	bx	lr
1893*b6cbf720SGianluca Guida#endif	/* !_STANDALONE */
1894