xref: /netbsd-src/sys/arch/arm/arm/bcopyinout_xscale.S (revision beb9bdb00e5421761976d5c277c0da84fd703f9b)
1/*	$NetBSD: bcopyinout_xscale.S,v 1.12 2022/10/20 06:58:38 skrll Exp $	*/
2
3/*
4 * Copyright 2003 Wasabi Systems, Inc.
5 * All rights reserved.
6 *
7 * Written by Steve C. Woodford for Wasabi Systems, Inc.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 *    notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 *    notice, this list of conditions and the following disclaimer in the
16 *    documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 *    must display the following acknowledgement:
19 *      This product includes software developed for the NetBSD Project by
20 *      Wasabi Systems, Inc.
21 * 4. The name of Wasabi Systems, Inc. may not be used to endorse
22 *    or promote products derived from this software without specific prior
23 *    written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL WASABI SYSTEMS, INC
29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 * POSSIBILITY OF SUCH DAMAGE.
36 */
37
38RCSID("$NetBSD: bcopyinout_xscale.S,v 1.12 2022/10/20 06:58:38 skrll Exp $")
39
40	.text
41	.align	0
42
43/*
44 * r0 = user space address
45 * r1 = kernel space address
46 * r2 = length
47 *
48 * Copies bytes from user space to kernel space
49 */
50ENTRY(copyin)
51	cmp	r2, #0x00
52	movle	r0, #0x00
53	RETc(le)			/* Bail early if length is <= 0 */
54
55	push	{r10-r11, lr}
56
57	GET_CURPCB(r10)
58
59	mov	r3, #0x00
60	adr	ip, .Lcopyin_fault
61	ldr	r11, [r10, #PCB_ONFAULT]
62	str	ip, [r10, #PCB_ONFAULT]
63	bl	.Lcopyin_guts
64	str	r11, [r10, #PCB_ONFAULT]
65	mov	r0, #0x00
66	pop	{r10-r11, pc}
67
68.Lcopyin_fault:
69	str	r11, [r10, #PCB_ONFAULT]
70	cmp	r3, #0x00
71	popgt	{r4-r7}		/* r3 > 0 Restore r4-r7 */
72	poplt	{r4-r9}		/* r3 < 0 Restore r4-r9 */
73	pop	{r10-r11, pc}
74
75.Lcopyin_guts:
76	pld	[r0]
77	/* Word-align the destination buffer */
78	ands	ip, r1, #0x03		/* Already word aligned? */
79	beq	.Lcopyin_wordaligned	/* Yup */
80	rsb	ip, ip, #0x04
81	cmp	r2, ip			/* Enough bytes left to align it? */
82	blt	.Lcopyin_l4_2		/* Nope. Just copy bytewise */
83	sub	r2, r2, ip
84	rsbs	ip, ip, #0x03
85	addne	pc, pc, ip, lsl #3
86	nop
87	ldrbt	ip, [r0], #0x01
88	strb	ip, [r1], #0x01
89	ldrbt	ip, [r0], #0x01
90	strb	ip, [r1], #0x01
91	ldrbt	ip, [r0], #0x01
92	strb	ip, [r1], #0x01
93	cmp	r2, #0x00		/* All done? */
94	RETc(eq)
95
96	/* Destination buffer is now word aligned */
97.Lcopyin_wordaligned:
98	ands	ip, r0, #0x03		/* Is src also word-aligned? */
99	bne	.Lcopyin_bad_align	/* Nope. Things just got bad */
100	cmp	r2, #0x08		/* Less than 8 bytes remaining? */
101	blt	.Lcopyin_w_less_than8
102
103	/* Quad-align the destination buffer */
104	tst	r1, #0x07		/* Already quad aligned? */
105	ldrtne	ip, [r0], #0x04
106	push	{r4-r9}		/* Free up some registers */
107	mov	r3, #-1			/* Signal restore r4-r9 */
108	tst	r1, #0x07		/* XXX: bug work-around */
109	subne	r2, r2, #0x04
110	strne	ip, [r1], #0x04
111
112	/* Destination buffer quad aligned, source is word aligned */
113	subs	r2, r2, #0x80
114	blt	.Lcopyin_w_lessthan128
115
116	/* Copy 128 bytes at a time */
117.Lcopyin_w_loop128:
118	ldrt	r4, [r0], #0x04		/* LD:00-03 */
119	ldrt	r5, [r0], #0x04		/* LD:04-07 */
120	pld	[r0, #0x18]		/* Prefetch 0x20 */
121	ldrt	r6, [r0], #0x04		/* LD:08-0b */
122	ldrt	r7, [r0], #0x04		/* LD:0c-0f */
123	ldrt	r8, [r0], #0x04		/* LD:10-13 */
124	ldrt	r9, [r0], #0x04		/* LD:14-17 */
125	strd	r4, r5, [r1], #0x08	/* ST:00-07 */
126	ldrt	r4, [r0], #0x04		/* LD:18-1b */
127	ldrt	r5, [r0], #0x04		/* LD:1c-1f */
128	strd	r6, r7, [r1], #0x08	/* ST:08-0f */
129	ldrt	r6, [r0], #0x04		/* LD:20-23 */
130	ldrt	r7, [r0], #0x04		/* LD:24-27 */
131	pld	[r0, #0x18]		/* Prefetch 0x40 */
132	strd	r8, r9, [r1], #0x08	/* ST:10-17 */
133	ldrt	r8, [r0], #0x04		/* LD:28-2b */
134	ldrt	r9, [r0], #0x04		/* LD:2c-2f */
135	strd	r4, r5, [r1], #0x08	/* ST:18-1f */
136	ldrt	r4, [r0], #0x04		/* LD:30-33 */
137	ldrt	r5, [r0], #0x04		/* LD:34-37 */
138	strd	r6, r7, [r1], #0x08		/* ST:20-27 */
139	ldrt	r6, [r0], #0x04		/* LD:38-3b */
140	ldrt	r7, [r0], #0x04		/* LD:3c-3f */
141	strd	r8, r9, [r1], #0x08	/* ST:28-2f */
142	ldrt	r8, [r0], #0x04		/* LD:40-43 */
143	ldrt	r9, [r0], #0x04		/* LD:44-47 */
144	pld	[r0, #0x18]		/* Prefetch 0x60 */
145	strd	r4, r5, [r1], #0x08	/* ST:30-37 */
146	ldrt	r4, [r0], #0x04		/* LD:48-4b */
147	ldrt	r5, [r0], #0x04		/* LD:4c-4f */
148	strd	r6, r7, [r1], #0x08	/* ST:38-3f */
149	ldrt	r6, [r0], #0x04		/* LD:50-53 */
150	ldrt	r7, [r0], #0x04		/* LD:54-57 */
151	strd	r8, r9, [r1], #0x08	/* ST:40-47 */
152	ldrt	r8, [r0], #0x04		/* LD:58-5b */
153	ldrt	r9, [r0], #0x04		/* LD:5c-5f */
154	strd	r4, r5, [r1], #0x08	/* ST:48-4f */
155	ldrt	r4, [r0], #0x04		/* LD:60-63 */
156	ldrt	r5, [r0], #0x04		/* LD:64-67 */
157	pld	[r0, #0x18]		/* Prefetch 0x80 */
158	strd	r6, r7, [r1], #0x08	/* ST:50-57 */
159	ldrt	r6, [r0], #0x04		/* LD:68-6b */
160	ldrt	r7, [r0], #0x04		/* LD:6c-6f */
161	strd	r8, r9, [r1], #0x08	/* ST:58-5f */
162	ldrt	r8, [r0], #0x04		/* LD:70-73 */
163	ldrt	r9, [r0], #0x04		/* LD:74-77 */
164	strd	r4, r5, [r1], #0x08	/* ST:60-67 */
165	ldrt	r4, [r0], #0x04		/* LD:78-7b */
166	ldrt	r5, [r0], #0x04		/* LD:7c-7f */
167	strd	r6, r7, [r1], #0x08	/* ST:68-6f */
168	strd	r8, r9, [r1], #0x08	/* ST:70-77 */
169	subs	r2, r2, #0x80
170	strd	r4, r5, [r1], #0x08	/* ST:78-7f */
171	bge	.Lcopyin_w_loop128
172
173.Lcopyin_w_lessthan128:
174	adds	r2, r2, #0x80		/* Adjust for extra sub */
175	popeq	{r4-r9}
176	RETc(eq)			/* Return now if done */
177	subs	r2, r2, #0x20
178	blt	.Lcopyin_w_lessthan32
179
180	/* Copy 32 bytes at a time */
181.Lcopyin_w_loop32:
182	ldrt	r4, [r0], #0x04
183	ldrt	r5, [r0], #0x04
184	pld	[r0, #0x18]
185	ldrt	r6, [r0], #0x04
186	ldrt	r7, [r0], #0x04
187	ldrt	r8, [r0], #0x04
188	ldrt	r9, [r0], #0x04
189	strd	r4, r5, [r1], #0x08
190	ldrt	r4, [r0], #0x04
191	ldrt	r5, [r0], #0x04
192	strd	r6, r7, [r1], #0x08
193	strd	r8, r9, [r1], #0x08
194	subs	r2, r2, #0x20
195	strd	r4, r5, [r1], #0x08
196	bge	.Lcopyin_w_loop32
197
198.Lcopyin_w_lessthan32:
199	adds	r2, r2, #0x20		/* Adjust for extra sub */
200	popeq	{r4-r9}
201	RETc(eq)			/* Return now if done */
202
203	and	r4, r2, #0x18
204	rsb	r5, r4, #0x18
205	subs	r2, r2, r4
206	add	pc, pc, r5, lsl #1
207	nop
208
209	/* At least 24 bytes remaining */
210	ldrt	r4, [r0], #0x04
211	ldrt	r5, [r0], #0x04
212	nop
213	strd	r4, r5, [r1], #0x08
214
215	/* At least 16 bytes remaining */
216	ldrt	r4, [r0], #0x04
217	ldrt	r5, [r0], #0x04
218	nop
219	strd	r4, r5, [r1], #0x08
220
221	/* At least 8 bytes remaining */
222	ldrt	r4, [r0], #0x04
223	ldrt	r5, [r0], #0x04
224	nop
225	strd	r4, r5, [r1], #0x08
226
227	/* Less than 8 bytes remaining */
228	pop	{r4-r9}
229	RETc(eq)			/* Return now if done */
230	mov	r3, #0x00
231
232.Lcopyin_w_less_than8:
233	subs	r2, r2, #0x04
234	ldrtge	ip, [r0], #0x04
235	strge	ip, [r1], #0x04
236	RETc(eq)			/* Return now if done */
237	addlt	r2, r2, #0x04
238	ldrbt	ip, [r0], #0x01
239	cmp	r2, #0x02
240	ldrbtge	r2, [r0], #0x01
241	strb	ip, [r1], #0x01
242	ldrbtgt	ip, [r0]
243	strbge	r2, [r1], #0x01
244	strbgt	ip, [r1]
245	RET
246
247/*
248 * At this point, it has not been possible to word align both buffers.
249 * The destination buffer (r1) is word aligned, but the source buffer
250 * (r0) is not.
251 */
252.Lcopyin_bad_align:
253	push	{r4-r7}
254	mov	r3, #0x01
255	bic	r0, r0, #0x03
256	cmp	ip, #2
257	ldrt	ip, [r0], #0x04
258	bgt	.Lcopyin_bad3
259	beq	.Lcopyin_bad2
260	b	.Lcopyin_bad1
261
262.Lcopyin_bad1_loop16:
263#ifdef __ARMEB__
264	mov	r4, ip, lsl #8
265#else
266	mov	r4, ip, lsr #8
267#endif
268	ldrt	r5, [r0], #0x04
269	pld	[r0, #0x018]
270	ldrt	r6, [r0], #0x04
271	ldrt	r7, [r0], #0x04
272	ldrt	ip, [r0], #0x04
273#ifdef __ARMEB__
274	orr	r4, r4, r5, lsr #24
275	mov	r5, r5, lsl #8
276	orr	r5, r5, r6, lsr #24
277	mov	r6, r6, lsl #8
278	orr	r6, r6, r7, lsr #24
279	mov	r7, r7, lsl #8
280	orr	r7, r7, ip, lsr #24
281#else
282	orr	r4, r4, r5, lsl #24
283	mov	r5, r5, lsr #8
284	orr	r5, r5, r6, lsl #24
285	mov	r6, r6, lsr #8
286	orr	r6, r6, r7, lsl #24
287	mov	r7, r7, lsr #8
288	orr	r7, r7, ip, lsl #24
289#endif
290	str	r4, [r1], #0x04
291	str	r5, [r1], #0x04
292	str	r6, [r1], #0x04
293	str	r7, [r1], #0x04
294.Lcopyin_bad1:
295	subs	r2, r2, #0x10
296	bge	.Lcopyin_bad1_loop16
297
298	adds	r2, r2, #0x10
299	popeq	{r4-r7}
300	RETc(eq)			/* Return now if done */
301	subs	r2, r2, #0x04
302	sublt	r0, r0, #0x03
303	blt	.Lcopyin_l4
304
305.Lcopyin_bad1_loop4:
306#ifdef __ARMEB__
307	mov	r4, ip, lsl #8
308#else
309	mov	r4, ip, lsr #8
310#endif
311	ldrt	ip, [r0], #0x04
312	subs	r2, r2, #0x04
313#ifdef __ARMEB__
314	orr	r4, r4, ip, lsr #24
315#else
316	orr	r4, r4, ip, lsl #24
317#endif
318	str	r4, [r1], #0x04
319	bge	.Lcopyin_bad1_loop4
320	sub	r0, r0, #0x03
321	b	.Lcopyin_l4
322
323.Lcopyin_bad2_loop16:
324#ifdef __ARMEB__
325	mov	r4, ip, lsl #16
326#else
327	mov	r4, ip, lsr #16
328#endif
329	ldrt	r5, [r0], #0x04
330	pld	[r0, #0x018]
331	ldrt	r6, [r0], #0x04
332	ldrt	r7, [r0], #0x04
333	ldrt	ip, [r0], #0x04
334#ifdef __ARMEB__
335	orr	r4, r4, r5, lsr #16
336	mov	r5, r5, lsl #16
337	orr	r5, r5, r6, lsr #16
338	mov	r6, r6, lsl #16
339	orr	r6, r6, r7, lsr #16
340	mov	r7, r7, lsl #16
341	orr	r7, r7, ip, lsr #16
342#else
343	orr	r4, r4, r5, lsl #16
344	mov	r5, r5, lsr #16
345	orr	r5, r5, r6, lsl #16
346	mov	r6, r6, lsr #16
347	orr	r6, r6, r7, lsl #16
348	mov	r7, r7, lsr #16
349	orr	r7, r7, ip, lsl #16
350#endif
351	str	r4, [r1], #0x04
352	str	r5, [r1], #0x04
353	str	r6, [r1], #0x04
354	str	r7, [r1], #0x04
355.Lcopyin_bad2:
356	subs	r2, r2, #0x10
357	bge	.Lcopyin_bad2_loop16
358
359	adds	r2, r2, #0x10
360	popeq	{r4-r7}
361	RETc(eq)			/* Return now if done */
362	subs	r2, r2, #0x04
363	sublt	r0, r0, #0x02
364	blt	.Lcopyin_l4
365
366.Lcopyin_bad2_loop4:
367#ifdef __ARMEB__
368	mov	r4, ip, lsl #16
369#else
370	mov	r4, ip, lsr #16
371#endif
372	ldrt	ip, [r0], #0x04
373	subs	r2, r2, #0x04
374#ifdef __ARMEB__
375	orr	r4, r4, ip, lsr #16
376#else
377	orr	r4, r4, ip, lsl #16
378#endif
379	str	r4, [r1], #0x04
380	bge	.Lcopyin_bad2_loop4
381	sub	r0, r0, #0x02
382	b	.Lcopyin_l4
383
384.Lcopyin_bad3_loop16:
385#ifdef __ARMEB__
386	mov	r4, ip, lsl #24
387#else
388	mov	r4, ip, lsr #24
389#endif
390	ldrt	r5, [r0], #0x04
391	pld	[r0, #0x018]
392	ldrt	r6, [r0], #0x04
393	ldrt	r7, [r0], #0x04
394	ldrt	ip, [r0], #0x04
395#ifdef __ARMEB__
396	orr	r4, r4, r5, lsr #8
397	mov	r5, r5, lsl #24
398	orr	r5, r5, r6, lsr #8
399	mov	r6, r6, lsl #24
400	orr	r6, r6, r7, lsr #8
401	mov	r7, r7, lsl #24
402	orr	r7, r7, ip, lsr #8
403#else
404	orr	r4, r4, r5, lsl #8
405	mov	r5, r5, lsr #24
406	orr	r5, r5, r6, lsl #8
407	mov	r6, r6, lsr #24
408	orr	r6, r6, r7, lsl #8
409	mov	r7, r7, lsr #24
410	orr	r7, r7, ip, lsl #8
411#endif
412	str	r4, [r1], #0x04
413	str	r5, [r1], #0x04
414	str	r6, [r1], #0x04
415	str	r7, [r1], #0x04
416.Lcopyin_bad3:
417	subs	r2, r2, #0x10
418	bge	.Lcopyin_bad3_loop16
419
420	adds	r2, r2, #0x10
421	popeq	{r4-r7}
422	RETc(eq)			/* Return now if done */
423	subs	r2, r2, #0x04
424	sublt	r0, r0, #0x01
425	blt	.Lcopyin_l4
426
427.Lcopyin_bad3_loop4:
428#ifdef __ARMEB__
429	mov	r4, ip, lsl #24
430#else
431	mov	r4, ip, lsr #24
432#endif
433	ldrt	ip, [r0], #0x04
434	subs	r2, r2, #0x04
435#ifdef __ARMEB__
436	orr	r4, r4, ip, lsr #8
437#else
438	orr	r4, r4, ip, lsl #8
439#endif
440	str	r4, [r1], #0x04
441	bge	.Lcopyin_bad3_loop4
442	sub	r0, r0, #0x01
443
444.Lcopyin_l4:
445	pop	{r4-r7}
446	mov	r3, #0x00
447	adds	r2, r2, #0x04
448	RETc(eq)
449.Lcopyin_l4_2:
450	rsbs	r2, r2, #0x03
451	addne	pc, pc, r2, lsl #3
452	nop
453	ldrbt	ip, [r0], #0x01
454	strb	ip, [r1], #0x01
455	ldrbt	ip, [r0], #0x01
456	strb	ip, [r1], #0x01
457	ldrbt	ip, [r0]
458	strb	ip, [r1]
459	RET
460END(copyin)
461
462
463/*
464 * r0 = kernel space address
465 * r1 = user space address
466 * r2 = length
467 *
468 * Copies bytes from kernel space to user space
469 */
470ENTRY(copyout)
471	cmp	r2, #0x00
472	movle	r0, #0x00
473	RETc(le)			/* Bail early if length is <= 0 */
474
475	push	{r10-r11, lr}
476
477	GET_CURPCB(r10)
478
479	mov	r3, #0x00
480	adr	ip, .Lcopyout_fault
481	ldr	r11, [r10, #PCB_ONFAULT]
482	str	ip, [r10, #PCB_ONFAULT]
483	bl	.Lcopyout_guts
484	str	r11, [r10, #PCB_ONFAULT]
485	mov	r0, #0x00
486	pop	{r10-r11, pc}
487
488.Lcopyout_fault:
489	str	r11, [r10, #PCB_ONFAULT]
490	cmp	r3, #0x00
491	popgt	{r4-r7}		/* r3 > 0 Restore r4-r7 */
492	poplt	{r4-r9}		/* r3 < 0 Restore r4-r9 */
493	pop	{r10-r11, pc}
494
495.Lcopyout_guts:
496	pld	[r0]
497	/* Word-align the destination buffer */
498	ands	ip, r1, #0x03		/* Already word aligned? */
499	beq	.Lcopyout_wordaligned	/* Yup */
500	rsb	ip, ip, #0x04
501	cmp	r2, ip			/* Enough bytes left to align it? */
502	blt	.Lcopyout_l4_2		/* Nope. Just copy bytewise */
503	sub	r2, r2, ip
504	rsbs	ip, ip, #0x03
505	addne	pc, pc, ip, lsl #3
506	nop
507	ldrb	ip, [r0], #0x01
508	strbt	ip, [r1], #0x01
509	ldrb	ip, [r0], #0x01
510	strbt	ip, [r1], #0x01
511	ldrb	ip, [r0], #0x01
512	strbt	ip, [r1], #0x01
513	cmp	r2, #0x00		/* All done? */
514	RETc(eq)
515
516	/* Destination buffer is now word aligned */
517.Lcopyout_wordaligned:
518	ands	ip, r0, #0x03		/* Is src also word-aligned? */
519	bne	.Lcopyout_bad_align	/* Nope. Things just got bad */
520	cmp	r2, #0x08		/* Less than 8 bytes remaining? */
521	blt	.Lcopyout_w_less_than8
522
523	/* Quad-align the destination buffer */
524	tst	r1, #0x07		/* Already quad aligned? */
525	ldrne	ip, [r0], #0x04
526	push	{r4-r9}		/* Free up some registers */
527	mov	r3, #-1			/* Signal restore r4-r9 */
528	tst	r1, #0x07		/* XXX: bug work-around */
529	subne	r2, r2, #0x04
530	strtne	ip, [r1], #0x04
531
532	/* Destination buffer quad aligned, source is word aligned */
533	subs	r2, r2, #0x80
534	blt	.Lcopyout_w_lessthan128
535
536	/* Copy 128 bytes at a time */
537.Lcopyout_w_loop128:
538	ldr	r4, [r0], #0x04		/* LD:00-03 */
539	ldr	r5, [r0], #0x04		/* LD:04-07 */
540	pld	[r0, #0x18]		/* Prefetch 0x20 */
541	ldr	r6, [r0], #0x04		/* LD:08-0b */
542	ldr	r7, [r0], #0x04		/* LD:0c-0f */
543	ldr	r8, [r0], #0x04		/* LD:10-13 */
544	ldr	r9, [r0], #0x04		/* LD:14-17 */
545	strt	r4, [r1], #0x04		/* ST:00-03 */
546	strt	r5, [r1], #0x04		/* ST:04-07 */
547	ldr	r4, [r0], #0x04		/* LD:18-1b */
548	ldr	r5, [r0], #0x04		/* LD:1c-1f */
549	strt	r6, [r1], #0x04		/* ST:08-0b */
550	strt	r7, [r1], #0x04		/* ST:0c-0f */
551	ldr	r6, [r0], #0x04		/* LD:20-23 */
552	ldr	r7, [r0], #0x04		/* LD:24-27 */
553	pld	[r0, #0x18]		/* Prefetch 0x40 */
554	strt	r8, [r1], #0x04		/* ST:10-13 */
555	strt	r9, [r1], #0x04		/* ST:14-17 */
556	ldr	r8, [r0], #0x04		/* LD:28-2b */
557	ldr	r9, [r0], #0x04		/* LD:2c-2f */
558	strt	r4, [r1], #0x04		/* ST:18-1b */
559	strt	r5, [r1], #0x04		/* ST:1c-1f */
560	ldr	r4, [r0], #0x04		/* LD:30-33 */
561	ldr	r5, [r0], #0x04		/* LD:34-37 */
562	strt	r6, [r1], #0x04		/* ST:20-23 */
563	strt	r7, [r1], #0x04		/* ST:24-27 */
564	ldr	r6, [r0], #0x04		/* LD:38-3b */
565	ldr	r7, [r0], #0x04		/* LD:3c-3f */
566	strt	r8, [r1], #0x04		/* ST:28-2b */
567	strt	r9, [r1], #0x04		/* ST:2c-2f */
568	ldr	r8, [r0], #0x04		/* LD:40-43 */
569	ldr	r9, [r0], #0x04		/* LD:44-47 */
570	pld	[r0, #0x18]		/* Prefetch 0x60 */
571	strt	r4, [r1], #0x04		/* ST:30-33 */
572	strt	r5, [r1], #0x04		/* ST:34-37 */
573	ldr	r4, [r0], #0x04		/* LD:48-4b */
574	ldr	r5, [r0], #0x04		/* LD:4c-4f */
575	strt	r6, [r1], #0x04		/* ST:38-3b */
576	strt	r7, [r1], #0x04		/* ST:3c-3f */
577	ldr	r6, [r0], #0x04		/* LD:50-53 */
578	ldr	r7, [r0], #0x04		/* LD:54-57 */
579	strt	r8, [r1], #0x04		/* ST:40-43 */
580	strt	r9, [r1], #0x04		/* ST:44-47 */
581	ldr	r8, [r0], #0x04		/* LD:58-5b */
582	ldr	r9, [r0], #0x04		/* LD:5c-5f */
583	strt	r4, [r1], #0x04		/* ST:48-4b */
584	strt	r5, [r1], #0x04		/* ST:4c-4f */
585	ldr	r4, [r0], #0x04		/* LD:60-63 */
586	ldr	r5, [r0], #0x04		/* LD:64-67 */
587	pld	[r0, #0x18]		/* Prefetch 0x80 */
588	strt	r6, [r1], #0x04		/* ST:50-53 */
589	strt	r7, [r1], #0x04		/* ST:54-57 */
590	ldr	r6, [r0], #0x04		/* LD:68-6b */
591	ldr	r7, [r0], #0x04		/* LD:6c-6f */
592	strt	r8, [r1], #0x04		/* ST:58-5b */
593	strt	r9, [r1], #0x04		/* ST:5c-5f */
594	ldr	r8, [r0], #0x04		/* LD:70-73 */
595	ldr	r9, [r0], #0x04		/* LD:74-77 */
596	strt	r4, [r1], #0x04		/* ST:60-63 */
597	strt	r5, [r1], #0x04		/* ST:64-67 */
598	ldr	r4, [r0], #0x04		/* LD:78-7b */
599	ldr	r5, [r0], #0x04		/* LD:7c-7f */
600	strt	r6, [r1], #0x04		/* ST:68-6b */
601	strt	r7, [r1], #0x04		/* ST:6c-6f */
602	strt	r8, [r1], #0x04		/* ST:70-73 */
603	strt	r9, [r1], #0x04		/* ST:74-77 */
604	subs	r2, r2, #0x80
605	strt	r4, [r1], #0x04		/* ST:78-7b */
606	strt	r5, [r1], #0x04		/* ST:7c-7f */
607	bge	.Lcopyout_w_loop128
608
609.Lcopyout_w_lessthan128:
610	adds	r2, r2, #0x80		/* Adjust for extra sub */
611	popeq	{r4-r9}
612	RETc(eq)			/* Return now if done */
613	subs	r2, r2, #0x20
614	blt	.Lcopyout_w_lessthan32
615
616	/* Copy 32 bytes at a time */
617.Lcopyout_w_loop32:
618	ldr	r4, [r0], #0x04
619	ldr	r5, [r0], #0x04
620	pld	[r0, #0x18]
621	ldr	r6, [r0], #0x04
622	ldr	r7, [r0], #0x04
623	ldr	r8, [r0], #0x04
624	ldr	r9, [r0], #0x04
625	strt	r4, [r1], #0x04
626	strt	r5, [r1], #0x04
627	ldr	r4, [r0], #0x04
628	ldr	r5, [r0], #0x04
629	strt	r6, [r1], #0x04
630	strt	r7, [r1], #0x04
631	strt	r8, [r1], #0x04
632	strt	r9, [r1], #0x04
633	subs	r2, r2, #0x20
634	strt	r4, [r1], #0x04
635	strt	r5, [r1], #0x04
636	bge	.Lcopyout_w_loop32
637
638.Lcopyout_w_lessthan32:
639	adds	r2, r2, #0x20		/* Adjust for extra sub */
640	popeq	{r4-r9}
641	RETc(eq)			/* Return now if done */
642
643	and	r4, r2, #0x18
644	rsb	r5, r4, #0x18
645	subs	r2, r2, r4
646	add	pc, pc, r5, lsl #1
647	nop
648
649	/* At least 24 bytes remaining */
650	ldr	r4, [r0], #0x04
651	ldr	r5, [r0], #0x04
652	strt	r4, [r1], #0x04
653	strt	r5, [r1], #0x04
654
655	/* At least 16 bytes remaining */
656	ldr	r4, [r0], #0x04
657	ldr	r5, [r0], #0x04
658	strt	r4, [r1], #0x04
659	strt	r5, [r1], #0x04
660
661	/* At least 8 bytes remaining */
662	ldr	r4, [r0], #0x04
663	ldr	r5, [r0], #0x04
664	strt	r4, [r1], #0x04
665	strt	r5, [r1], #0x04
666
667	/* Less than 8 bytes remaining */
668	pop	{r4-r9}
669	RETc(eq)			/* Return now if done */
670	mov	r3, #0x00
671
672.Lcopyout_w_less_than8:
673	subs	r2, r2, #0x04
674	ldrge	ip, [r0], #0x04
675	strtge	ip, [r1], #0x04
676	RETc(eq)			/* Return now if done */
677	addlt	r2, r2, #0x04
678	ldrb	ip, [r0], #0x01
679	cmp	r2, #0x02
680	ldrbge	r2, [r0], #0x01
681	strbt	ip, [r1], #0x01
682	ldrbgt	ip, [r0]
683	strbtge	r2, [r1], #0x01
684	strbtgt	ip, [r1]
685	RET
686
687/*
688 * At this point, it has not been possible to word align both buffers.
689 * The destination buffer (r1) is word aligned, but the source buffer
690 * (r0) is not.
691 */
692.Lcopyout_bad_align:
693	push	{r4-r7}
694	mov	r3, #0x01
695	bic	r0, r0, #0x03
696	cmp	ip, #2
697	ldr	ip, [r0], #0x04
698	bgt	.Lcopyout_bad3
699	beq	.Lcopyout_bad2
700	b	.Lcopyout_bad1
701
702.Lcopyout_bad1_loop16:
703#ifdef	__ARMEB__
704	mov	r4, ip, lsl #8
705#else
706	mov	r4, ip, lsr #8
707#endif
708	ldr	r5, [r0], #0x04
709	pld	[r0, #0x018]
710	ldr	r6, [r0], #0x04
711	ldr	r7, [r0], #0x04
712	ldr	ip, [r0], #0x04
713#ifdef	__ARMEB__
714	orr	r4, r4, r5, lsr #24
715	mov	r5, r5, lsl #8
716	orr	r5, r5, r6, lsr #24
717	mov	r6, r6, lsl #8
718	orr	r6, r6, r7, lsr #24
719	mov	r7, r7, lsl #8
720	orr	r7, r7, ip, lsr #24
721#else
722	orr	r4, r4, r5, lsl #24
723	mov	r5, r5, lsr #8
724	orr	r5, r5, r6, lsl #24
725	mov	r6, r6, lsr #8
726	orr	r6, r6, r7, lsl #24
727	mov	r7, r7, lsr #8
728	orr	r7, r7, ip, lsl #24
729#endif
730	strt	r4, [r1], #0x04
731	strt	r5, [r1], #0x04
732	strt	r6, [r1], #0x04
733	strt	r7, [r1], #0x04
734.Lcopyout_bad1:
735	subs	r2, r2, #0x10
736	bge	.Lcopyout_bad1_loop16
737
738	adds	r2, r2, #0x10
739	popeq	{r4-r7}
740	RETc(eq)			/* Return now if done */
741	subs	r2, r2, #0x04
742	sublt	r0, r0, #0x03
743	blt	.Lcopyout_l4
744
745.Lcopyout_bad1_loop4:
746#ifdef __ARMEB__
747	mov	r4, ip, lsl #8
748#else
749	mov	r4, ip, lsr #8
750#endif
751	ldr	ip, [r0], #0x04
752	subs	r2, r2, #0x04
753#ifdef __ARMEB__
754	orr	r4, r4, ip, lsr #24
755#else
756	orr	r4, r4, ip, lsl #24
757#endif
758	strt	r4, [r1], #0x04
759	bge	.Lcopyout_bad1_loop4
760	sub	r0, r0, #0x03
761	b	.Lcopyout_l4
762
763.Lcopyout_bad2_loop16:
764#ifdef __ARMEB__
765	mov	r4, ip, lsl #16
766#else
767	mov	r4, ip, lsr #16
768#endif
769	ldr	r5, [r0], #0x04
770	pld	[r0, #0x018]
771	ldr	r6, [r0], #0x04
772	ldr	r7, [r0], #0x04
773	ldr	ip, [r0], #0x04
774#ifdef __ARMEB__
775	orr	r4, r4, r5, lsr #16
776	mov	r5, r5, lsl #16
777	orr	r5, r5, r6, lsr #16
778	mov	r6, r6, lsl #16
779	orr	r6, r6, r7, lsr #16
780	mov	r7, r7, lsl #16
781	orr	r7, r7, ip, lsr #16
782#else
783	orr	r4, r4, r5, lsl #16
784	mov	r5, r5, lsr #16
785	orr	r5, r5, r6, lsl #16
786	mov	r6, r6, lsr #16
787	orr	r6, r6, r7, lsl #16
788	mov	r7, r7, lsr #16
789	orr	r7, r7, ip, lsl #16
790#endif
791	strt	r4, [r1], #0x04
792	strt	r5, [r1], #0x04
793	strt	r6, [r1], #0x04
794	strt	r7, [r1], #0x04
795.Lcopyout_bad2:
796	subs	r2, r2, #0x10
797	bge	.Lcopyout_bad2_loop16
798
799	adds	r2, r2, #0x10
800	popeq	{r4-r7}
801	RETc(eq)			/* Return now if done */
802	subs	r2, r2, #0x04
803	sublt	r0, r0, #0x02
804	blt	.Lcopyout_l4
805
806.Lcopyout_bad2_loop4:
807#ifdef __ARMEB__
808	mov	r4, ip, lsl #16
809#else
810	mov	r4, ip, lsr #16
811#endif
812	ldr	ip, [r0], #0x04
813	subs	r2, r2, #0x04
814#ifdef __ARMEB__
815	orr	r4, r4, ip, lsr #16
816#else
817	orr	r4, r4, ip, lsl #16
818#endif
819	strt	r4, [r1], #0x04
820	bge	.Lcopyout_bad2_loop4
821	sub	r0, r0, #0x02
822	b	.Lcopyout_l4
823
824.Lcopyout_bad3_loop16:
825#ifdef __ARMEB__
826	mov	r4, ip, lsl #24
827#else
828	mov	r4, ip, lsr #24
829#endif
830	ldr	r5, [r0], #0x04
831	pld	[r0, #0x018]
832	ldr	r6, [r0], #0x04
833	ldr	r7, [r0], #0x04
834	ldr	ip, [r0], #0x04
835#ifdef __ARMEB__
836	orr	r4, r4, r5, lsr #8
837	mov	r5, r5, lsl #24
838	orr	r5, r5, r6, lsr #8
839	mov	r6, r6, lsl #24
840	orr	r6, r6, r7, lsr #8
841	mov	r7, r7, lsl #24
842	orr	r7, r7, ip, lsr #8
843#else
844	orr	r4, r4, r5, lsl #8
845	mov	r5, r5, lsr #24
846	orr	r5, r5, r6, lsl #8
847	mov	r6, r6, lsr #24
848	orr	r6, r6, r7, lsl #8
849	mov	r7, r7, lsr #24
850	orr	r7, r7, ip, lsl #8
851#endif
852	strt	r4, [r1], #0x04
853	strt	r5, [r1], #0x04
854	strt	r6, [r1], #0x04
855	strt	r7, [r1], #0x04
856.Lcopyout_bad3:
857	subs	r2, r2, #0x10
858	bge	.Lcopyout_bad3_loop16
859
860	adds	r2, r2, #0x10
861	popeq	{r4-r7}
862	RETc(eq)			/* Return now if done */
863	subs	r2, r2, #0x04
864	sublt	r0, r0, #0x01
865	blt	.Lcopyout_l4
866
867.Lcopyout_bad3_loop4:
868#ifdef __ARMEB__
869	mov	r4, ip, lsl #24
870#else
871	mov	r4, ip, lsr #24
872#endif
873	ldr	ip, [r0], #0x04
874	subs	r2, r2, #0x04
875#ifdef __ARMEB__
876	orr	r4, r4, ip, lsr #8
877#else
878	orr	r4, r4, ip, lsl #8
879#endif
880	strt	r4, [r1], #0x04
881	bge	.Lcopyout_bad3_loop4
882	sub	r0, r0, #0x01
883
884.Lcopyout_l4:
885	pop	{r4-r7}
886	mov	r3, #0x00
887	adds	r2, r2, #0x04
888	RETc(eq)
889.Lcopyout_l4_2:
890	rsbs	r2, r2, #0x03
891	addne	pc, pc, r2, lsl #3
892	nop
893	ldrb	ip, [r0], #0x01
894	strbt	ip, [r1], #0x01
895	ldrb	ip, [r0], #0x01
896	strbt	ip, [r1], #0x01
897	ldrb	ip, [r0]
898	strbt	ip, [r1]
899	RET
900END(copyout)
901
902/*
903 * r0 = kernel space source address
904 * r1 = kernel space destination address
905 * r2 = length
906 *
907 * Copies bytes from kernel space to kernel space, aborting on page fault
908 */
909ENTRY(kcopy)
910	cmp	r2, #0x00
911	movle	r0, #0x00
912	RETc(le)			/* Bail early if length is <= 0 */
913
914	push	{r10-r11, lr}
915
916	GET_CURPCB(r10)
917
918	mov	r3, #0x00
919	adr	ip, .Lkcopy_fault
920	ldr	r11, [r10, #PCB_ONFAULT]
921	str	ip, [r10, #PCB_ONFAULT]
922	bl	.Lkcopy_guts
923	str	r11, [r10, #PCB_ONFAULT]
924	mov	r0, #0x00
925	pop	{r10-r11, pc}
926
927.Lkcopy_fault:
928	str	r11, [r10, #PCB_ONFAULT]
929	cmp	r3, #0x00
930	popgt	{r4-r7}		/* r3 > 0 Restore r4-r7 */
931	poplt	{r4-r9}		/* r3 < 0 Restore r4-r9 */
932	pop	{r10-r11, pc}
933
934.Lkcopy_guts:
935	pld	[r0]
936	/* Word-align the destination buffer */
937	ands	ip, r1, #0x03		/* Already word aligned? */
938	beq	.Lkcopy_wordaligned	/* Yup */
939	rsb	ip, ip, #0x04
940	cmp	r2, ip			/* Enough bytes left to align it? */
941	blt	.Lkcopy_bad_endgame2	/* Nope. Just copy bytewise */
942	sub	r2, r2, ip
943	rsbs	ip, ip, #0x03
944	addne	pc, pc, ip, lsl #3
945	nop
946	ldrb	ip, [r0], #0x01
947	strb	ip, [r1], #0x01
948	ldrb	ip, [r0], #0x01
949	strb	ip, [r1], #0x01
950	ldrb	ip, [r0], #0x01
951	strb	ip, [r1], #0x01
952	cmp	r2, #0x00		/* All done? */
953	RETc(eq)
954
955	/* Destination buffer is now word aligned */
956.Lkcopy_wordaligned:
957	ands	ip, r0, #0x03		/* Is src also word-aligned? */
958	bne	.Lkcopy_bad_align	/* Nope. Things just got bad */
959	cmp	r2, #0x08		/* Less than 8 bytes remaining? */
960	blt	.Lkcopy_w_less_than8
961
962	/* Quad-align the destination buffer */
963	tst	r1, #0x07		/* Already quad aligned? */
964	ldrne	ip, [r0], #0x04
965	push	{r4-r9}		/* Free up some registers */
966	mov	r3, #-1			/* Signal restore r4-r9 */
967	subne	r2, r2, #0x04
968	strne	ip, [r1], #0x04
969
970	/* Destination buffer quad aligned, source is word aligned */
971	subs	r2, r2, #0x80
972	blt	.Lkcopy_w_lessthan128
973
974	/* Copy 128 bytes at a time */
975.Lkcopy_w_loop128:
976	ldr	r4, [r0], #0x04		/* LD:00-03 */
977	ldr	r5, [r0], #0x04		/* LD:04-07 */
978	pld	[r0, #0x18]		/* Prefetch 0x20 */
979	ldr	r6, [r0], #0x04		/* LD:08-0b */
980	ldr	r7, [r0], #0x04		/* LD:0c-0f */
981	ldr	r8, [r0], #0x04		/* LD:10-13 */
982	ldr	r9, [r0], #0x04		/* LD:14-17 */
983	strd	r4, r5, [r1], #0x08	/* ST:00-07 */
984	ldr	r4, [r0], #0x04		/* LD:18-1b */
985	ldr	r5, [r0], #0x04		/* LD:1c-1f */
986	strd	r6, r7, [r1], #0x08	/* ST:08-0f */
987	ldr	r6, [r0], #0x04		/* LD:20-23 */
988	ldr	r7, [r0], #0x04		/* LD:24-27 */
989	pld	[r0, #0x18]		/* Prefetch 0x40 */
990	strd	r8, r9, [r1], #0x08	/* ST:10-17 */
991	ldr	r8, [r0], #0x04		/* LD:28-2b */
992	ldr	r9, [r0], #0x04		/* LD:2c-2f */
993	strd	r4, r5, [r1], #0x08	/* ST:18-1f */
994	ldr	r4, [r0], #0x04		/* LD:30-33 */
995	ldr	r5, [r0], #0x04		/* LD:34-37 */
996	strd	r6, r7, [r1], #0x08	/* ST:20-27 */
997	ldr	r6, [r0], #0x04		/* LD:38-3b */
998	ldr	r7, [r0], #0x04		/* LD:3c-3f */
999	strd	r8, r9, [r1], #0x08	/* ST:28-2f */
1000	ldr	r8, [r0], #0x04		/* LD:40-43 */
1001	ldr	r9, [r0], #0x04		/* LD:44-47 */
1002	pld	[r0, #0x18]		/* Prefetch 0x60 */
1003	strd	r4, r5, [r1], #0x08	/* ST:30-37 */
1004	ldr	r4, [r0], #0x04		/* LD:48-4b */
1005	ldr	r5, [r0], #0x04		/* LD:4c-4f */
1006	strd	r6, r7, [r1], #0x08	/* ST:38-3f */
1007	ldr	r6, [r0], #0x04		/* LD:50-53 */
1008	ldr	r7, [r0], #0x04		/* LD:54-57 */
1009	strd	r8, r9, [r1], #0x08	/* ST:40-47 */
1010	ldr	r8, [r0], #0x04		/* LD:58-5b */
1011	ldr	r9, [r0], #0x04		/* LD:5c-5f */
1012	strd	r4, r5, [r1], #0x08	/* ST:48-4f */
1013	ldr	r4, [r0], #0x04		/* LD:60-63 */
1014	ldr	r5, [r0], #0x04		/* LD:64-67 */
1015	pld	[r0, #0x18]		/* Prefetch 0x80 */
1016	strd	r6, r7, [r1], #0x08	/* ST:50-57 */
1017	ldr	r6, [r0], #0x04		/* LD:68-6b */
1018	ldr	r7, [r0], #0x04		/* LD:6c-6f */
1019	strd	r8, r9, [r1], #0x08	/* ST:58-5f */
1020	ldr	r8, [r0], #0x04		/* LD:70-73 */
1021	ldr	r9, [r0], #0x04		/* LD:74-77 */
1022	strd	r4, r5, [r1], #0x08	/* ST:60-67 */
1023	ldr	r4, [r0], #0x04		/* LD:78-7b */
1024	ldr	r5, [r0], #0x04		/* LD:7c-7f */
1025	strd	r6, r7, [r1], #0x08	/* ST:68-6f */
1026	strd	r8, r9, [r1], #0x08	/* ST:70-77 */
1027	subs	r2, r2, #0x80
1028	strd	r4, r5, [r1], #0x08	/* ST:78-7f */
1029	bge	.Lkcopy_w_loop128
1030
1031.Lkcopy_w_lessthan128:
1032	adds	r2, r2, #0x80		/* Adjust for extra sub */
1033	popeq	{r4-r9}
1034	RETc(eq)			/* Return now if done */
1035	subs	r2, r2, #0x20
1036	blt	.Lkcopy_w_lessthan32
1037
1038	/* Copy 32 bytes at a time */
1039.Lkcopy_w_loop32:
1040	ldr	r4, [r0], #0x04
1041	ldr	r5, [r0], #0x04
1042	pld	[r0, #0x18]
1043	ldr	r6, [r0], #0x04
1044	ldr	r7, [r0], #0x04
1045	ldr	r8, [r0], #0x04
1046	ldr	r9, [r0], #0x04
1047	strd	r4, r5, [r1], #0x08
1048	ldr	r4, [r0], #0x04
1049	ldr	r5, [r0], #0x04
1050	strd	r6, r7, [r1], #0x08
1051	strd	r8, r9, [r1], #0x08
1052	subs	r2, r2, #0x20
1053	strd	r4, r5, [r1], #0x08
1054	bge	.Lkcopy_w_loop32
1055
1056.Lkcopy_w_lessthan32:
1057	adds	r2, r2, #0x20		/* Adjust for extra sub */
1058	popeq	{r4-r9}
1059	RETc(eq)			/* Return now if done */
1060
1061	and	r4, r2, #0x18
1062	rsb	r5, r4, #0x18
1063	subs	r2, r2, r4
1064	add	pc, pc, r5, lsl #1
1065	nop
1066
1067	/* At least 24 bytes remaining */
1068	ldr	r4, [r0], #0x04
1069	ldr	r5, [r0], #0x04
1070	nop
1071	strd	r4, r5, [r1], #0x08
1072
1073	/* At least 16 bytes remaining */
1074	ldr	r4, [r0], #0x04
1075	ldr	r5, [r0], #0x04
1076	nop
1077	strd	r4, r5, [r1], #0x08
1078
1079	/* At least 8 bytes remaining */
1080	ldr	r4, [r0], #0x04
1081	ldr	r5, [r0], #0x04
1082	nop
1083	strd	r4, r5, [r1], #0x08
1084
1085	/* Less than 8 bytes remaining */
1086	pop	{r4-r9}
1087	RETc(eq)			/* Return now if done */
1088	mov	r3, #0x00
1089
1090.Lkcopy_w_less_than8:
1091	subs	r2, r2, #0x04
1092	ldrge	ip, [r0], #0x04
1093	strge	ip, [r1], #0x04
1094	RETc(eq)			/* Return now if done */
1095	addlt	r2, r2, #0x04
1096	ldrb	ip, [r0], #0x01
1097	cmp	r2, #0x02
1098	ldrbge	r2, [r0], #0x01
1099	strb	ip, [r1], #0x01
1100	ldrbgt	ip, [r0]
1101	strbge	r2, [r1], #0x01
1102	strbgt	ip, [r1]
1103	RET
1104
1105/*
1106 * At this point, it has not been possible to word align both buffers.
1107 * The destination buffer (r1) is word aligned, but the source buffer
1108 * (r0) is not.
1109 */
1110.Lkcopy_bad_align:
1111	push	{r4-r7}
1112	mov	r3, #0x01
1113	bic	r0, r0, #0x03
1114	cmp	ip, #2
1115	ldr	ip, [r0], #0x04
1116	bgt	.Lkcopy_bad3
1117	beq	.Lkcopy_bad2
1118	b	.Lkcopy_bad1
1119
1120.Lkcopy_bad1_loop16:
1121#ifdef __ARMEB__
1122	mov	r4, ip, lsl #8
1123#else
1124	mov	r4, ip, lsr #8
1125#endif
1126	ldr	r5, [r0], #0x04
1127	pld	[r0, #0x018]
1128	ldr	r6, [r0], #0x04
1129	ldr	r7, [r0], #0x04
1130	ldr	ip, [r0], #0x04
1131#ifdef __ARMEB__
1132	orr	r4, r4, r5, lsr #24
1133	mov	r5, r5, lsl #8
1134	orr	r5, r5, r6, lsr #24
1135	mov	r6, r6, lsl #8
1136	orr	r6, r6, r7, lsr #24
1137	mov	r7, r7, lsl #8
1138	orr	r7, r7, ip, lsr #24
1139#else
1140	orr	r4, r4, r5, lsl #24
1141	mov	r5, r5, lsr #8
1142	orr	r5, r5, r6, lsl #24
1143	mov	r6, r6, lsr #8
1144	orr	r6, r6, r7, lsl #24
1145	mov	r7, r7, lsr #8
1146	orr	r7, r7, ip, lsl #24
1147#endif
1148	str	r4, [r1], #0x04
1149	str	r5, [r1], #0x04
1150	str	r6, [r1], #0x04
1151	str	r7, [r1], #0x04
1152.Lkcopy_bad1:
1153	subs	r2, r2, #0x10
1154	bge	.Lkcopy_bad1_loop16
1155
1156	adds	r2, r2, #0x10
1157	popeq	{r4-r7}
1158	RETc(eq)			/* Return now if done */
1159	subs	r2, r2, #0x04
1160	sublt	r0, r0, #0x03
1161	blt	.Lkcopy_bad_endgame
1162
1163.Lkcopy_bad1_loop4:
1164#ifdef __ARMEB__
1165	mov	r4, ip, lsl #8
1166#else
1167	mov	r4, ip, lsr #8
1168#endif
1169	ldr	ip, [r0], #0x04
1170	subs	r2, r2, #0x04
1171#ifdef __ARMEB__
1172	orr	r4, r4, ip, lsr #24
1173#else
1174	orr	r4, r4, ip, lsl #24
1175#endif
1176	str	r4, [r1], #0x04
1177	bge	.Lkcopy_bad1_loop4
1178	sub	r0, r0, #0x03
1179	b	.Lkcopy_bad_endgame
1180
1181.Lkcopy_bad2_loop16:
1182#ifdef __ARMEB__
1183	mov	r4, ip, lsl #16
1184#else
1185	mov	r4, ip, lsr #16
1186#endif
1187	ldr	r5, [r0], #0x04
1188	pld	[r0, #0x018]
1189	ldr	r6, [r0], #0x04
1190	ldr	r7, [r0], #0x04
1191	ldr	ip, [r0], #0x04
1192#ifdef __ARMEB__
1193	orr	r4, r4, r5, lsr #16
1194	mov	r5, r5, lsl #16
1195	orr	r5, r5, r6, lsr #16
1196	mov	r6, r6, lsl #16
1197	orr	r6, r6, r7, lsr #16
1198	mov	r7, r7, lsl #16
1199	orr	r7, r7, ip, lsr #16
1200#else
1201	orr	r4, r4, r5, lsl #16
1202	mov	r5, r5, lsr #16
1203	orr	r5, r5, r6, lsl #16
1204	mov	r6, r6, lsr #16
1205	orr	r6, r6, r7, lsl #16
1206	mov	r7, r7, lsr #16
1207	orr	r7, r7, ip, lsl #16
1208#endif
1209	str	r4, [r1], #0x04
1210	str	r5, [r1], #0x04
1211	str	r6, [r1], #0x04
1212	str	r7, [r1], #0x04
1213.Lkcopy_bad2:
1214	subs	r2, r2, #0x10
1215	bge	.Lkcopy_bad2_loop16
1216
1217	adds	r2, r2, #0x10
1218	popeq	{r4-r7}
1219	RETc(eq)			/* Return now if done */
1220	subs	r2, r2, #0x04
1221	sublt	r0, r0, #0x02
1222	blt	.Lkcopy_bad_endgame
1223
1224.Lkcopy_bad2_loop4:
1225#ifdef __ARMEB__
1226	mov	r4, ip, lsl #16
1227#else
1228	mov	r4, ip, lsr #16
1229#endif
1230	ldr	ip, [r0], #0x04
1231	subs	r2, r2, #0x04
1232#ifdef __ARMEB__
1233	orr	r4, r4, ip, lsr #16
1234#else
1235	orr	r4, r4, ip, lsl #16
1236#endif
1237	str	r4, [r1], #0x04
1238	bge	.Lkcopy_bad2_loop4
1239	sub	r0, r0, #0x02
1240	b	.Lkcopy_bad_endgame
1241
1242.Lkcopy_bad3_loop16:
1243#ifdef __ARMEB__
1244	mov	r4, ip, lsl #24
1245#else
1246	mov	r4, ip, lsr #24
1247#endif
1248	ldr	r5, [r0], #0x04
1249	pld	[r0, #0x018]
1250	ldr	r6, [r0], #0x04
1251	ldr	r7, [r0], #0x04
1252	ldr	ip, [r0], #0x04
1253#ifdef __ARMEB__
1254	orr	r4, r4, r5, lsr #8
1255	mov	r5, r5, lsl #24
1256	orr	r5, r5, r6, lsr #8
1257	mov	r6, r6, lsl #24
1258	orr	r6, r6, r7, lsr #8
1259	mov	r7, r7, lsl #24
1260	orr	r7, r7, ip, lsr #8
1261#else
1262	orr	r4, r4, r5, lsl #8
1263	mov	r5, r5, lsr #24
1264	orr	r5, r5, r6, lsl #8
1265	mov	r6, r6, lsr #24
1266	orr	r6, r6, r7, lsl #8
1267	mov	r7, r7, lsr #24
1268	orr	r7, r7, ip, lsl #8
1269#endif
1270	str	r4, [r1], #0x04
1271	str	r5, [r1], #0x04
1272	str	r6, [r1], #0x04
1273	str	r7, [r1], #0x04
1274.Lkcopy_bad3:
1275	subs	r2, r2, #0x10
1276	bge	.Lkcopy_bad3_loop16
1277
1278	adds	r2, r2, #0x10
1279	popeq	{r4-r7}
1280	RETc(eq)			/* Return now if done */
1281	subs	r2, r2, #0x04
1282	sublt	r0, r0, #0x01
1283	blt	.Lkcopy_bad_endgame
1284
1285.Lkcopy_bad3_loop4:
1286#ifdef __ARMEB__
1287	mov	r4, ip, lsl #24
1288#else
1289	mov	r4, ip, lsr #24
1290#endif
1291	ldr	ip, [r0], #0x04
1292	subs	r2, r2, #0x04
1293#ifdef __ARMEB__
1294	orr	r4, r4, ip, lsr #8
1295#else
1296	orr	r4, r4, ip, lsl #8
1297#endif
1298	str	r4, [r1], #0x04
1299	bge	.Lkcopy_bad3_loop4
1300	sub	r0, r0, #0x01
1301
1302.Lkcopy_bad_endgame:
1303	pop	{r4-r7}
1304	mov	r3, #0x00
1305	adds	r2, r2, #0x04
1306	RETc(eq)
1307.Lkcopy_bad_endgame2:
1308	rsbs	r2, r2, #0x03
1309	addne	pc, pc, r2, lsl #3
1310	nop
1311	ldrb	ip, [r0], #0x01
1312	strb	ip, [r1], #0x01
1313	ldrb	ip, [r0], #0x01
1314	strb	ip, [r1], #0x01
1315	ldrb	ip, [r0]
1316	strb	ip, [r1]
1317	RET
1318END(kcopy)
1319