xref: /netbsd-src/common/lib/libc/arch/arm/string/memmove.S (revision 8b0f9554ff8762542c4defc4f70e1eb76fb508fa)
1/*	$NetBSD: memmove.S,v 1.2 2007/06/20 22:31:21 scw Exp $	*/
2
3/*-
4 * Copyright (c) 1997 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Neil A. Carson and Mark Brinicombe
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 *    must display the following acknowledgement:
20 *	This product includes software developed by the NetBSD
21 *	Foundation, Inc. and its contributors.
22 * 4. Neither the name of The NetBSD Foundation nor the names of its
23 *    contributors may be used to endorse or promote products derived
24 *    from this software without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
27 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
28 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
30 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 * POSSIBILITY OF SUCH DAMAGE.
37 */
38
39#include <machine/asm.h>
40
41#ifndef _BCOPY
42/* LINTSTUB: Func: void *memmove(void *, const void *, size_t) */
43ENTRY(memmove)
44#else
45/* bcopy = memcpy/memmove with arguments reversed. */
46/* LINTSTUB: Func: void bcopy(void *, void *, size_t) */
47ENTRY(bcopy)
48	/* switch the source and destination registers */
49	eor     r0, r1, r0
50	eor     r1, r0, r1
51	eor     r0, r1, r0
52#endif
53	/* Do the buffers overlap? */
54	cmp	r0, r1
55	RETc(eq)		/* Bail now if src/dst are the same */
56	subhs	r3, r0, r1	/* if (dst > src) r3 = dst - src */
57	sublo	r3, r1, r0	/* if (src > dst) r3 = src - dst */
58	cmp	r3, r2		/* if (r3 >= len) we have an overlap */
59	bhs	PIC_SYM(_C_LABEL(memcpy), PLT)
60
61	/* Determine copy direction */
62	cmp	r1, r0
63	bcc	.Lmemmove_backwards
64
65	moveq	r0, #0			/* Quick abort for len=0 */
66	RETc(eq)
67
68	stmdb	sp!, {r0, lr}		/* memmove() returns dest addr */
69	subs	r2, r2, #4
70	blt	.Lmemmove_fl4		/* less than 4 bytes */
71	ands	r12, r0, #3
72	bne	.Lmemmove_fdestul	/* oh unaligned destination addr */
73	ands	r12, r1, #3
74	bne	.Lmemmove_fsrcul		/* oh unaligned source addr */
75
76.Lmemmove_ft8:
77	/* We have aligned source and destination */
78	subs	r2, r2, #8
79	blt	.Lmemmove_fl12		/* less than 12 bytes (4 from above) */
80	subs	r2, r2, #0x14
81	blt	.Lmemmove_fl32		/* less than 32 bytes (12 from above) */
82	stmdb	sp!, {r4}		/* borrow r4 */
83
84	/* blat 32 bytes at a time */
85	/* XXX for really big copies perhaps we should use more registers */
86.Lmemmove_floop32:
87	ldmia	r1!, {r3, r4, r12, lr}
88	stmia	r0!, {r3, r4, r12, lr}
89	ldmia	r1!, {r3, r4, r12, lr}
90	stmia	r0!, {r3, r4, r12, lr}
91	subs	r2, r2, #0x20
92	bge	.Lmemmove_floop32
93
94	cmn	r2, #0x10
95	ldmgeia	r1!, {r3, r4, r12, lr}	/* blat a remaining 16 bytes */
96	stmgeia	r0!, {r3, r4, r12, lr}
97	subge	r2, r2, #0x10
98	ldmia	sp!, {r4}		/* return r4 */
99
100.Lmemmove_fl32:
101	adds	r2, r2, #0x14
102
103	/* blat 12 bytes at a time */
104.Lmemmove_floop12:
105	ldmgeia	r1!, {r3, r12, lr}
106	stmgeia	r0!, {r3, r12, lr}
107	subges	r2, r2, #0x0c
108	bge	.Lmemmove_floop12
109
110.Lmemmove_fl12:
111	adds	r2, r2, #8
112	blt	.Lmemmove_fl4
113
114	subs	r2, r2, #4
115	ldrlt	r3, [r1], #4
116	strlt	r3, [r0], #4
117	ldmgeia	r1!, {r3, r12}
118	stmgeia	r0!, {r3, r12}
119	subge	r2, r2, #4
120
121.Lmemmove_fl4:
122	/* less than 4 bytes to go */
123	adds	r2, r2, #4
124	ldmeqia	sp!, {r0, pc}		/* done */
125
126	/* copy the crud byte at a time */
127	cmp	r2, #2
128	ldrb	r3, [r1], #1
129	strb	r3, [r0], #1
130	ldrgeb	r3, [r1], #1
131	strgeb	r3, [r0], #1
132	ldrgtb	r3, [r1], #1
133	strgtb	r3, [r0], #1
134	ldmia	sp!, {r0, pc}
135
136	/* erg - unaligned destination */
137.Lmemmove_fdestul:
138	rsb	r12, r12, #4
139	cmp	r12, #2
140
141	/* align destination with byte copies */
142	ldrb	r3, [r1], #1
143	strb	r3, [r0], #1
144	ldrgeb	r3, [r1], #1
145	strgeb	r3, [r0], #1
146	ldrgtb	r3, [r1], #1
147	strgtb	r3, [r0], #1
148	subs	r2, r2, r12
149	blt	.Lmemmove_fl4		/* less the 4 bytes */
150
151	ands	r12, r1, #3
152	beq	.Lmemmove_ft8		/* we have an aligned source */
153
154	/* erg - unaligned source */
155	/* This is where it gets nasty ... */
156.Lmemmove_fsrcul:
157	bic	r1, r1, #3
158	ldr	lr, [r1], #4
159	cmp	r12, #2
160	bgt	.Lmemmove_fsrcul3
161	beq	.Lmemmove_fsrcul2
162	cmp	r2, #0x0c
163	blt	.Lmemmove_fsrcul1loop4
164	sub	r2, r2, #0x0c
165	stmdb	sp!, {r4, r5}
166
167.Lmemmove_fsrcul1loop16:
168#ifdef __ARMEB__
169	mov	r3, lr, lsl #8
170#else
171	mov	r3, lr, lsr #8
172#endif
173	ldmia	r1!, {r4, r5, r12, lr}
174#ifdef __ARMEB__
175	orr	r3, r3, r4, lsr #24
176	mov	r4, r4, lsl #8
177	orr	r4, r4, r5, lsr #24
178	mov	r5, r5, lsl #8
179	orr	r5, r5, r12, lsr #24
180	mov	r12, r12, lsl #8
181	orr	r12, r12, lr, lsr #24
182#else
183	orr	r3, r3, r4, lsl #24
184	mov	r4, r4, lsr #8
185	orr	r4, r4, r5, lsl #24
186	mov	r5, r5, lsr #8
187	orr	r5, r5, r12, lsl #24
188	mov	r12, r12, lsr #8
189	orr	r12, r12, lr, lsl #24
190#endif
191	stmia	r0!, {r3-r5, r12}
192	subs	r2, r2, #0x10
193	bge	.Lmemmove_fsrcul1loop16
194	ldmia	sp!, {r4, r5}
195	adds	r2, r2, #0x0c
196	blt	.Lmemmove_fsrcul1l4
197
198.Lmemmove_fsrcul1loop4:
199#ifdef __ARMEB__
200	mov	r12, lr, lsl #8
201#else
202	mov	r12, lr, lsr #8
203#endif
204	ldr	lr, [r1], #4
205#ifdef __ARMEB__
206	orr	r12, r12, lr, lsr #24
207#else
208	orr	r12, r12, lr, lsl #24
209#endif
210	str	r12, [r0], #4
211	subs	r2, r2, #4
212	bge	.Lmemmove_fsrcul1loop4
213
214.Lmemmove_fsrcul1l4:
215	sub	r1, r1, #3
216	b	.Lmemmove_fl4
217
218.Lmemmove_fsrcul2:
219	cmp	r2, #0x0c
220	blt	.Lmemmove_fsrcul2loop4
221	sub	r2, r2, #0x0c
222	stmdb	sp!, {r4, r5}
223
224.Lmemmove_fsrcul2loop16:
225#ifdef __ARMEB__
226	mov	r3, lr, lsl #16
227#else
228	mov	r3, lr, lsr #16
229#endif
230	ldmia	r1!, {r4, r5, r12, lr}
231#ifdef __ARMEB__
232	orr	r3, r3, r4, lsr #16
233	mov	r4, r4, lsl #16
234	orr	r4, r4, r5, lsr #16
235	mov	r5, r5, lsl #16
236	orr	r5, r5, r12, lsr #16
237	mov	r12, r12, lsl #16
238	orr	r12, r12, lr, lsr #16
239#else
240	orr	r3, r3, r4, lsl #16
241	mov	r4, r4, lsr #16
242	orr	r4, r4, r5, lsl #16
243	mov	r5, r5, lsr #16
244	orr	r5, r5, r12, lsl #16
245	mov	r12, r12, lsr #16
246	orr	r12, r12, lr, lsl #16
247#endif
248	stmia	r0!, {r3-r5, r12}
249	subs	r2, r2, #0x10
250	bge	.Lmemmove_fsrcul2loop16
251	ldmia	sp!, {r4, r5}
252	adds	r2, r2, #0x0c
253	blt	.Lmemmove_fsrcul2l4
254
255.Lmemmove_fsrcul2loop4:
256#ifdef __ARMEB__
257	mov	r12, lr, lsl #16
258#else
259	mov	r12, lr, lsr #16
260#endif
261	ldr	lr, [r1], #4
262#ifdef __ARMEB__
263	orr	r12, r12, lr, lsr #16
264#else
265	orr	r12, r12, lr, lsl #16
266#endif
267	str	r12, [r0], #4
268	subs	r2, r2, #4
269	bge	.Lmemmove_fsrcul2loop4
270
271.Lmemmove_fsrcul2l4:
272	sub	r1, r1, #2
273	b	.Lmemmove_fl4
274
275.Lmemmove_fsrcul3:
276	cmp	r2, #0x0c
277	blt	.Lmemmove_fsrcul3loop4
278	sub	r2, r2, #0x0c
279	stmdb	sp!, {r4, r5}
280
281.Lmemmove_fsrcul3loop16:
282#ifdef __ARMEB__
283	mov	r3, lr, lsl #24
284#else
285	mov	r3, lr, lsr #24
286#endif
287	ldmia	r1!, {r4, r5, r12, lr}
288#ifdef __ARMEB__
289	orr	r3, r3, r4, lsr #8
290	mov	r4, r4, lsl #24
291	orr	r4, r4, r5, lsr #8
292	mov	r5, r5, lsl #24
293	orr	r5, r5, r12, lsr #8
294	mov	r12, r12, lsl #24
295	orr	r12, r12, lr, lsr #8
296#else
297	orr	r3, r3, r4, lsl #8
298	mov	r4, r4, lsr #24
299	orr	r4, r4, r5, lsl #8
300	mov	r5, r5, lsr #24
301	orr	r5, r5, r12, lsl #8
302	mov	r12, r12, lsr #24
303	orr	r12, r12, lr, lsl #8
304#endif
305	stmia	r0!, {r3-r5, r12}
306	subs	r2, r2, #0x10
307	bge	.Lmemmove_fsrcul3loop16
308	ldmia	sp!, {r4, r5}
309	adds	r2, r2, #0x0c
310	blt	.Lmemmove_fsrcul3l4
311
312.Lmemmove_fsrcul3loop4:
313#ifdef __ARMEB__
314	mov	r12, lr, lsl #24
315#else
316	mov	r12, lr, lsr #24
317#endif
318	ldr	lr, [r1], #4
319#ifdef __ARMEB__
320	orr	r12, r12, lr, lsr #8
321#else
322	orr	r12, r12, lr, lsl #8
323#endif
324	str	r12, [r0], #4
325	subs	r2, r2, #4
326	bge	.Lmemmove_fsrcul3loop4
327
328.Lmemmove_fsrcul3l4:
329	sub	r1, r1, #1
330	b	.Lmemmove_fl4
331
332.Lmemmove_backwards:
333	add	r1, r1, r2
334	add	r0, r0, r2
335	subs	r2, r2, #4
336	blt	.Lmemmove_bl4		/* less than 4 bytes */
337	ands	r12, r0, #3
338	bne	.Lmemmove_bdestul	/* oh unaligned destination addr */
339	ands	r12, r1, #3
340	bne	.Lmemmove_bsrcul		/* oh unaligned source addr */
341
342.Lmemmove_bt8:
343	/* We have aligned source and destination */
344	subs	r2, r2, #8
345	blt	.Lmemmove_bl12		/* less than 12 bytes (4 from above) */
346	stmdb	sp!, {r4, lr}
347	subs	r2, r2, #0x14		/* less than 32 bytes (12 from above) */
348	blt	.Lmemmove_bl32
349
350	/* blat 32 bytes at a time */
351	/* XXX for really big copies perhaps we should use more registers */
352.Lmemmove_bloop32:
353	ldmdb	r1!, {r3, r4, r12, lr}
354	stmdb	r0!, {r3, r4, r12, lr}
355	ldmdb	r1!, {r3, r4, r12, lr}
356	stmdb	r0!, {r3, r4, r12, lr}
357	subs	r2, r2, #0x20
358	bge	.Lmemmove_bloop32
359
360.Lmemmove_bl32:
361	cmn	r2, #0x10
362	ldmgedb	r1!, {r3, r4, r12, lr}	/* blat a remaining 16 bytes */
363	stmgedb	r0!, {r3, r4, r12, lr}
364	subge	r2, r2, #0x10
365	adds	r2, r2, #0x14
366	ldmgedb	r1!, {r3, r12, lr}	/* blat a remaining 12 bytes */
367	stmgedb	r0!, {r3, r12, lr}
368	subge	r2, r2, #0x0c
369	ldmia	sp!, {r4, lr}
370
371.Lmemmove_bl12:
372	adds	r2, r2, #8
373	blt	.Lmemmove_bl4
374	subs	r2, r2, #4
375	ldrlt	r3, [r1, #-4]!
376	strlt	r3, [r0, #-4]!
377	ldmgedb	r1!, {r3, r12}
378	stmgedb	r0!, {r3, r12}
379	subge	r2, r2, #4
380
381.Lmemmove_bl4:
382	/* less than 4 bytes to go */
383	adds	r2, r2, #4
384	RETc(eq)
385
386	/* copy the crud byte at a time */
387	cmp	r2, #2
388	ldrb	r3, [r1, #-1]!
389	strb	r3, [r0, #-1]!
390	ldrgeb	r3, [r1, #-1]!
391	strgeb	r3, [r0, #-1]!
392	ldrgtb	r3, [r1, #-1]!
393	strgtb	r3, [r0, #-1]!
394	RET
395
396	/* erg - unaligned destination */
397.Lmemmove_bdestul:
398	cmp	r12, #2
399
400	/* align destination with byte copies */
401	ldrb	r3, [r1, #-1]!
402	strb	r3, [r0, #-1]!
403	ldrgeb	r3, [r1, #-1]!
404	strgeb	r3, [r0, #-1]!
405	ldrgtb	r3, [r1, #-1]!
406	strgtb	r3, [r0, #-1]!
407	subs	r2, r2, r12
408	blt	.Lmemmove_bl4		/* less than 4 bytes to go */
409	ands	r12, r1, #3
410	beq	.Lmemmove_bt8		/* we have an aligned source */
411
412	/* erg - unaligned source */
413	/* This is where it gets nasty ... */
414.Lmemmove_bsrcul:
415	bic	r1, r1, #3
416	ldr	r3, [r1, #0]
417	cmp	r12, #2
418	blt	.Lmemmove_bsrcul1
419	beq	.Lmemmove_bsrcul2
420	cmp	r2, #0x0c
421	blt	.Lmemmove_bsrcul3loop4
422	sub	r2, r2, #0x0c
423	stmdb	sp!, {r4, r5, lr}
424
425.Lmemmove_bsrcul3loop16:
426#ifdef __ARMEB__
427	mov	lr, r3, lsr #8
428#else
429	mov	lr, r3, lsl #8
430#endif
431	ldmdb	r1!, {r3-r5, r12}
432#ifdef __ARMEB__
433	orr	lr, lr, r12, lsl #24
434	mov	r12, r12, lsr #8
435	orr	r12, r12, r5, lsl #24
436	mov	r5, r5, lsr #8
437	orr	r5, r5, r4, lsl #24
438	mov	r4, r4, lsr #8
439	orr	r4, r4, r3, lsl #24
440#else
441	orr	lr, lr, r12, lsr #24
442	mov	r12, r12, lsl #8
443	orr	r12, r12, r5, lsr #24
444	mov	r5, r5, lsl #8
445	orr	r5, r5, r4, lsr #24
446	mov	r4, r4, lsl #8
447	orr	r4, r4, r3, lsr #24
448#endif
449	stmdb	r0!, {r4, r5, r12, lr}
450	subs	r2, r2, #0x10
451	bge	.Lmemmove_bsrcul3loop16
452	ldmia	sp!, {r4, r5, lr}
453	adds	r2, r2, #0x0c
454	blt	.Lmemmove_bsrcul3l4
455
456.Lmemmove_bsrcul3loop4:
457#ifdef __ARMEB__
458	mov	r12, r3, lsr #8
459#else
460	mov	r12, r3, lsl #8
461#endif
462	ldr	r3, [r1, #-4]!
463#ifdef __ARMEB__
464	orr	r12, r12, r3, lsl #24
465#else
466	orr	r12, r12, r3, lsr #24
467#endif
468	str	r12, [r0, #-4]!
469	subs	r2, r2, #4
470	bge	.Lmemmove_bsrcul3loop4
471
472.Lmemmove_bsrcul3l4:
473	add	r1, r1, #3
474	b	.Lmemmove_bl4
475
476.Lmemmove_bsrcul2:
477	cmp	r2, #0x0c
478	blt	.Lmemmove_bsrcul2loop4
479	sub	r2, r2, #0x0c
480	stmdb	sp!, {r4, r5, lr}
481
482.Lmemmove_bsrcul2loop16:
483#ifdef __ARMEB__
484	mov	lr, r3, lsr #16
485#else
486	mov	lr, r3, lsl #16
487#endif
488	ldmdb	r1!, {r3-r5, r12}
489#ifdef __ARMEB__
490	orr	lr, lr, r12, lsl #16
491	mov	r12, r12, lsr #16
492	orr	r12, r12, r5, lsl #16
493	mov	r5, r5, lsr #16
494	orr	r5, r5, r4, lsl #16
495	mov	r4, r4, lsr #16
496	orr	r4, r4, r3, lsl #16
497#else
498	orr	lr, lr, r12, lsr #16
499	mov	r12, r12, lsl #16
500	orr	r12, r12, r5, lsr #16
501	mov	r5, r5, lsl #16
502	orr	r5, r5, r4, lsr #16
503	mov	r4, r4, lsl #16
504	orr	r4, r4, r3, lsr #16
505#endif
506	stmdb	r0!, {r4, r5, r12, lr}
507	subs	r2, r2, #0x10
508	bge	.Lmemmove_bsrcul2loop16
509	ldmia	sp!, {r4, r5, lr}
510	adds	r2, r2, #0x0c
511	blt	.Lmemmove_bsrcul2l4
512
513.Lmemmove_bsrcul2loop4:
514#ifdef __ARMEB__
515	mov	r12, r3, lsr #16
516#else
517	mov	r12, r3, lsl #16
518#endif
519	ldr	r3, [r1, #-4]!
520#ifdef __ARMEB__
521	orr	r12, r12, r3, lsl #16
522#else
523	orr	r12, r12, r3, lsr #16
524#endif
525	str	r12, [r0, #-4]!
526	subs	r2, r2, #4
527	bge	.Lmemmove_bsrcul2loop4
528
529.Lmemmove_bsrcul2l4:
530	add	r1, r1, #2
531	b	.Lmemmove_bl4
532
533.Lmemmove_bsrcul1:
534	cmp	r2, #0x0c
535	blt	.Lmemmove_bsrcul1loop4
536	sub	r2, r2, #0x0c
537	stmdb	sp!, {r4, r5, lr}
538
539.Lmemmove_bsrcul1loop32:
540#ifdef __ARMEB__
541	mov	lr, r3, lsr #24
542#else
543	mov	lr, r3, lsl #24
544#endif
545	ldmdb	r1!, {r3-r5, r12}
546#ifdef __ARMEB__
547	orr	lr, lr, r12, lsl #8
548	mov	r12, r12, lsr #24
549	orr	r12, r12, r5, lsl #8
550	mov	r5, r5, lsr #24
551	orr	r5, r5, r4, lsl #8
552	mov	r4, r4, lsr #24
553	orr	r4, r4, r3, lsl #8
554#else
555	orr	lr, lr, r12, lsr #8
556	mov	r12, r12, lsl #24
557	orr	r12, r12, r5, lsr #8
558	mov	r5, r5, lsl #24
559	orr	r5, r5, r4, lsr #8
560	mov	r4, r4, lsl #24
561	orr	r4, r4, r3, lsr #8
562#endif
563	stmdb	r0!, {r4, r5, r12, lr}
564	subs	r2, r2, #0x10
565	bge	.Lmemmove_bsrcul1loop32
566	ldmia	sp!, {r4, r5, lr}
567	adds	r2, r2, #0x0c
568	blt	.Lmemmove_bsrcul1l4
569
570.Lmemmove_bsrcul1loop4:
571#ifdef __ARMEB__
572	mov	r12, r3, lsr #24
573#else
574	mov	r12, r3, lsl #24
575#endif
576	ldr	r3, [r1, #-4]!
577#ifdef __ARMEB__
578	orr	r12, r12, r3, lsl #8
579#else
580	orr	r12, r12, r3, lsr #8
581#endif
582	str	r12, [r0, #-4]!
583	subs	r2, r2, #4
584	bge	.Lmemmove_bsrcul1loop4
585
586.Lmemmove_bsrcul1l4:
587	add	r1, r1, #1
588	b	.Lmemmove_bl4
589