xref: /netbsd-src/common/lib/libc/arch/arm/string/memmove.S (revision ba65fde2d7fefa7d39838fa5fa855e62bd606b5e)
1/*	$NetBSD: memmove.S,v 1.4 2013/01/28 06:23:44 matt Exp $	*/
2
3/*-
4 * Copyright (c) 1997 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Neil A. Carson and Mark Brinicombe
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32#include <machine/asm.h>
33
34#if defined(__ARM_EABI__) && !defined(BCOPY)
35STRONG_ALIAS(__aeabi_memmove, memmove)
36#endif
37
38#ifndef _BCOPY
39/* LINTSTUB: Func: void *memmove(void *, const void *, size_t) */
40ENTRY(memmove)
41#else
42/* bcopy = memcpy/memmove with arguments reversed. */
43/* LINTSTUB: Func: void bcopy(void *, void *, size_t) */
44ENTRY(bcopy)
45	/* switch the source and destination registers */
46	eor     r0, r1, r0
47	eor     r1, r0, r1
48	eor     r0, r1, r0
49#endif
50	/* Do the buffers overlap? */
51	cmp	r0, r1
52	RETc(eq)		/* Bail now if src/dst are the same */
53	subhs	r3, r0, r1	/* if (dst > src) r3 = dst - src */
54	sublo	r3, r1, r0	/* if (src > dst) r3 = src - dst */
55	cmp	r3, r2		/* if (r3 >= len) we have an overlap */
56	bhs	PIC_SYM(_C_LABEL(memcpy), PLT)
57
58	/* Determine copy direction */
59	cmp	r1, r0
60	bcc	.Lmemmove_backwards
61
62	moveq	r0, #0			/* Quick abort for len=0 */
63	RETc(eq)
64
65	stmdb	sp!, {r0, lr}		/* memmove() returns dest addr */
66	subs	r2, r2, #4
67	blt	.Lmemmove_fl4		/* less than 4 bytes */
68	ands	r12, r0, #3
69	bne	.Lmemmove_fdestul	/* oh unaligned destination addr */
70	ands	r12, r1, #3
71	bne	.Lmemmove_fsrcul		/* oh unaligned source addr */
72
73.Lmemmove_ft8:
74	/* We have aligned source and destination */
75	subs	r2, r2, #8
76	blt	.Lmemmove_fl12		/* less than 12 bytes (4 from above) */
77	subs	r2, r2, #0x14
78	blt	.Lmemmove_fl32		/* less than 32 bytes (12 from above) */
79	stmdb	sp!, {r4}		/* borrow r4 */
80
81	/* blat 32 bytes at a time */
82	/* XXX for really big copies perhaps we should use more registers */
83.Lmemmove_floop32:
84	ldmia	r1!, {r3, r4, r12, lr}
85	stmia	r0!, {r3, r4, r12, lr}
86	ldmia	r1!, {r3, r4, r12, lr}
87	stmia	r0!, {r3, r4, r12, lr}
88	subs	r2, r2, #0x20
89	bge	.Lmemmove_floop32
90
91	cmn	r2, #0x10
92	ldmgeia	r1!, {r3, r4, r12, lr}	/* blat a remaining 16 bytes */
93	stmgeia	r0!, {r3, r4, r12, lr}
94	subge	r2, r2, #0x10
95	ldmia	sp!, {r4}		/* return r4 */
96
97.Lmemmove_fl32:
98	adds	r2, r2, #0x14
99
100	/* blat 12 bytes at a time */
101.Lmemmove_floop12:
102	ldmgeia	r1!, {r3, r12, lr}
103	stmgeia	r0!, {r3, r12, lr}
104	subges	r2, r2, #0x0c
105	bge	.Lmemmove_floop12
106
107.Lmemmove_fl12:
108	adds	r2, r2, #8
109	blt	.Lmemmove_fl4
110
111	subs	r2, r2, #4
112	ldrlt	r3, [r1], #4
113	strlt	r3, [r0], #4
114	ldmgeia	r1!, {r3, r12}
115	stmgeia	r0!, {r3, r12}
116	subge	r2, r2, #4
117
118.Lmemmove_fl4:
119	/* less than 4 bytes to go */
120	adds	r2, r2, #4
121	ldmeqia	sp!, {r0, pc}		/* done */
122
123	/* copy the crud byte at a time */
124	cmp	r2, #2
125	ldrb	r3, [r1], #1
126	strb	r3, [r0], #1
127	ldrgeb	r3, [r1], #1
128	strgeb	r3, [r0], #1
129	ldrgtb	r3, [r1], #1
130	strgtb	r3, [r0], #1
131	ldmia	sp!, {r0, pc}
132
133	/* erg - unaligned destination */
134.Lmemmove_fdestul:
135	rsb	r12, r12, #4
136	cmp	r12, #2
137
138	/* align destination with byte copies */
139	ldrb	r3, [r1], #1
140	strb	r3, [r0], #1
141	ldrgeb	r3, [r1], #1
142	strgeb	r3, [r0], #1
143	ldrgtb	r3, [r1], #1
144	strgtb	r3, [r0], #1
145	subs	r2, r2, r12
146	blt	.Lmemmove_fl4		/* less the 4 bytes */
147
148	ands	r12, r1, #3
149	beq	.Lmemmove_ft8		/* we have an aligned source */
150
151	/* erg - unaligned source */
152	/* This is where it gets nasty ... */
153.Lmemmove_fsrcul:
154	bic	r1, r1, #3
155	ldr	lr, [r1], #4
156	cmp	r12, #2
157	bgt	.Lmemmove_fsrcul3
158	beq	.Lmemmove_fsrcul2
159	cmp	r2, #0x0c
160	blt	.Lmemmove_fsrcul1loop4
161	sub	r2, r2, #0x0c
162	stmdb	sp!, {r4, r5}
163
164.Lmemmove_fsrcul1loop16:
165#ifdef __ARMEB__
166	mov	r3, lr, lsl #8
167#else
168	mov	r3, lr, lsr #8
169#endif
170	ldmia	r1!, {r4, r5, r12, lr}
171#ifdef __ARMEB__
172	orr	r3, r3, r4, lsr #24
173	mov	r4, r4, lsl #8
174	orr	r4, r4, r5, lsr #24
175	mov	r5, r5, lsl #8
176	orr	r5, r5, r12, lsr #24
177	mov	r12, r12, lsl #8
178	orr	r12, r12, lr, lsr #24
179#else
180	orr	r3, r3, r4, lsl #24
181	mov	r4, r4, lsr #8
182	orr	r4, r4, r5, lsl #24
183	mov	r5, r5, lsr #8
184	orr	r5, r5, r12, lsl #24
185	mov	r12, r12, lsr #8
186	orr	r12, r12, lr, lsl #24
187#endif
188	stmia	r0!, {r3-r5, r12}
189	subs	r2, r2, #0x10
190	bge	.Lmemmove_fsrcul1loop16
191	ldmia	sp!, {r4, r5}
192	adds	r2, r2, #0x0c
193	blt	.Lmemmove_fsrcul1l4
194
195.Lmemmove_fsrcul1loop4:
196#ifdef __ARMEB__
197	mov	r12, lr, lsl #8
198#else
199	mov	r12, lr, lsr #8
200#endif
201	ldr	lr, [r1], #4
202#ifdef __ARMEB__
203	orr	r12, r12, lr, lsr #24
204#else
205	orr	r12, r12, lr, lsl #24
206#endif
207	str	r12, [r0], #4
208	subs	r2, r2, #4
209	bge	.Lmemmove_fsrcul1loop4
210
211.Lmemmove_fsrcul1l4:
212	sub	r1, r1, #3
213	b	.Lmemmove_fl4
214
215.Lmemmove_fsrcul2:
216	cmp	r2, #0x0c
217	blt	.Lmemmove_fsrcul2loop4
218	sub	r2, r2, #0x0c
219	stmdb	sp!, {r4, r5}
220
221.Lmemmove_fsrcul2loop16:
222#ifdef __ARMEB__
223	mov	r3, lr, lsl #16
224#else
225	mov	r3, lr, lsr #16
226#endif
227	ldmia	r1!, {r4, r5, r12, lr}
228#ifdef __ARMEB__
229	orr	r3, r3, r4, lsr #16
230	mov	r4, r4, lsl #16
231	orr	r4, r4, r5, lsr #16
232	mov	r5, r5, lsl #16
233	orr	r5, r5, r12, lsr #16
234	mov	r12, r12, lsl #16
235	orr	r12, r12, lr, lsr #16
236#else
237	orr	r3, r3, r4, lsl #16
238	mov	r4, r4, lsr #16
239	orr	r4, r4, r5, lsl #16
240	mov	r5, r5, lsr #16
241	orr	r5, r5, r12, lsl #16
242	mov	r12, r12, lsr #16
243	orr	r12, r12, lr, lsl #16
244#endif
245	stmia	r0!, {r3-r5, r12}
246	subs	r2, r2, #0x10
247	bge	.Lmemmove_fsrcul2loop16
248	ldmia	sp!, {r4, r5}
249	adds	r2, r2, #0x0c
250	blt	.Lmemmove_fsrcul2l4
251
252.Lmemmove_fsrcul2loop4:
253#ifdef __ARMEB__
254	mov	r12, lr, lsl #16
255#else
256	mov	r12, lr, lsr #16
257#endif
258	ldr	lr, [r1], #4
259#ifdef __ARMEB__
260	orr	r12, r12, lr, lsr #16
261#else
262	orr	r12, r12, lr, lsl #16
263#endif
264	str	r12, [r0], #4
265	subs	r2, r2, #4
266	bge	.Lmemmove_fsrcul2loop4
267
268.Lmemmove_fsrcul2l4:
269	sub	r1, r1, #2
270	b	.Lmemmove_fl4
271
272.Lmemmove_fsrcul3:
273	cmp	r2, #0x0c
274	blt	.Lmemmove_fsrcul3loop4
275	sub	r2, r2, #0x0c
276	stmdb	sp!, {r4, r5}
277
278.Lmemmove_fsrcul3loop16:
279#ifdef __ARMEB__
280	mov	r3, lr, lsl #24
281#else
282	mov	r3, lr, lsr #24
283#endif
284	ldmia	r1!, {r4, r5, r12, lr}
285#ifdef __ARMEB__
286	orr	r3, r3, r4, lsr #8
287	mov	r4, r4, lsl #24
288	orr	r4, r4, r5, lsr #8
289	mov	r5, r5, lsl #24
290	orr	r5, r5, r12, lsr #8
291	mov	r12, r12, lsl #24
292	orr	r12, r12, lr, lsr #8
293#else
294	orr	r3, r3, r4, lsl #8
295	mov	r4, r4, lsr #24
296	orr	r4, r4, r5, lsl #8
297	mov	r5, r5, lsr #24
298	orr	r5, r5, r12, lsl #8
299	mov	r12, r12, lsr #24
300	orr	r12, r12, lr, lsl #8
301#endif
302	stmia	r0!, {r3-r5, r12}
303	subs	r2, r2, #0x10
304	bge	.Lmemmove_fsrcul3loop16
305	ldmia	sp!, {r4, r5}
306	adds	r2, r2, #0x0c
307	blt	.Lmemmove_fsrcul3l4
308
309.Lmemmove_fsrcul3loop4:
310#ifdef __ARMEB__
311	mov	r12, lr, lsl #24
312#else
313	mov	r12, lr, lsr #24
314#endif
315	ldr	lr, [r1], #4
316#ifdef __ARMEB__
317	orr	r12, r12, lr, lsr #8
318#else
319	orr	r12, r12, lr, lsl #8
320#endif
321	str	r12, [r0], #4
322	subs	r2, r2, #4
323	bge	.Lmemmove_fsrcul3loop4
324
325.Lmemmove_fsrcul3l4:
326	sub	r1, r1, #1
327	b	.Lmemmove_fl4
328
329.Lmemmove_backwards:
330	add	r1, r1, r2
331	add	r0, r0, r2
332	subs	r2, r2, #4
333	blt	.Lmemmove_bl4		/* less than 4 bytes */
334	ands	r12, r0, #3
335	bne	.Lmemmove_bdestul	/* oh unaligned destination addr */
336	ands	r12, r1, #3
337	bne	.Lmemmove_bsrcul		/* oh unaligned source addr */
338
339.Lmemmove_bt8:
340	/* We have aligned source and destination */
341	subs	r2, r2, #8
342	blt	.Lmemmove_bl12		/* less than 12 bytes (4 from above) */
343	stmdb	sp!, {r4, lr}
344	subs	r2, r2, #0x14		/* less than 32 bytes (12 from above) */
345	blt	.Lmemmove_bl32
346
347	/* blat 32 bytes at a time */
348	/* XXX for really big copies perhaps we should use more registers */
349.Lmemmove_bloop32:
350	ldmdb	r1!, {r3, r4, r12, lr}
351	stmdb	r0!, {r3, r4, r12, lr}
352	ldmdb	r1!, {r3, r4, r12, lr}
353	stmdb	r0!, {r3, r4, r12, lr}
354	subs	r2, r2, #0x20
355	bge	.Lmemmove_bloop32
356
357.Lmemmove_bl32:
358	cmn	r2, #0x10
359	ldmgedb	r1!, {r3, r4, r12, lr}	/* blat a remaining 16 bytes */
360	stmgedb	r0!, {r3, r4, r12, lr}
361	subge	r2, r2, #0x10
362	adds	r2, r2, #0x14
363	ldmgedb	r1!, {r3, r12, lr}	/* blat a remaining 12 bytes */
364	stmgedb	r0!, {r3, r12, lr}
365	subge	r2, r2, #0x0c
366	ldmia	sp!, {r4, lr}
367
368.Lmemmove_bl12:
369	adds	r2, r2, #8
370	blt	.Lmemmove_bl4
371	subs	r2, r2, #4
372	ldrlt	r3, [r1, #-4]!
373	strlt	r3, [r0, #-4]!
374	ldmgedb	r1!, {r3, r12}
375	stmgedb	r0!, {r3, r12}
376	subge	r2, r2, #4
377
378.Lmemmove_bl4:
379	/* less than 4 bytes to go */
380	adds	r2, r2, #4
381	RETc(eq)
382
383	/* copy the crud byte at a time */
384	cmp	r2, #2
385	ldrb	r3, [r1, #-1]!
386	strb	r3, [r0, #-1]!
387	ldrgeb	r3, [r1, #-1]!
388	strgeb	r3, [r0, #-1]!
389	ldrgtb	r3, [r1, #-1]!
390	strgtb	r3, [r0, #-1]!
391	RET
392
393	/* erg - unaligned destination */
394.Lmemmove_bdestul:
395	cmp	r12, #2
396
397	/* align destination with byte copies */
398	ldrb	r3, [r1, #-1]!
399	strb	r3, [r0, #-1]!
400	ldrgeb	r3, [r1, #-1]!
401	strgeb	r3, [r0, #-1]!
402	ldrgtb	r3, [r1, #-1]!
403	strgtb	r3, [r0, #-1]!
404	subs	r2, r2, r12
405	blt	.Lmemmove_bl4		/* less than 4 bytes to go */
406	ands	r12, r1, #3
407	beq	.Lmemmove_bt8		/* we have an aligned source */
408
409	/* erg - unaligned source */
410	/* This is where it gets nasty ... */
411.Lmemmove_bsrcul:
412	bic	r1, r1, #3
413	ldr	r3, [r1, #0]
414	cmp	r12, #2
415	blt	.Lmemmove_bsrcul1
416	beq	.Lmemmove_bsrcul2
417	cmp	r2, #0x0c
418	blt	.Lmemmove_bsrcul3loop4
419	sub	r2, r2, #0x0c
420	stmdb	sp!, {r4, r5, lr}
421
422.Lmemmove_bsrcul3loop16:
423#ifdef __ARMEB__
424	mov	lr, r3, lsr #8
425#else
426	mov	lr, r3, lsl #8
427#endif
428	ldmdb	r1!, {r3-r5, r12}
429#ifdef __ARMEB__
430	orr	lr, lr, r12, lsl #24
431	mov	r12, r12, lsr #8
432	orr	r12, r12, r5, lsl #24
433	mov	r5, r5, lsr #8
434	orr	r5, r5, r4, lsl #24
435	mov	r4, r4, lsr #8
436	orr	r4, r4, r3, lsl #24
437#else
438	orr	lr, lr, r12, lsr #24
439	mov	r12, r12, lsl #8
440	orr	r12, r12, r5, lsr #24
441	mov	r5, r5, lsl #8
442	orr	r5, r5, r4, lsr #24
443	mov	r4, r4, lsl #8
444	orr	r4, r4, r3, lsr #24
445#endif
446	stmdb	r0!, {r4, r5, r12, lr}
447	subs	r2, r2, #0x10
448	bge	.Lmemmove_bsrcul3loop16
449	ldmia	sp!, {r4, r5, lr}
450	adds	r2, r2, #0x0c
451	blt	.Lmemmove_bsrcul3l4
452
453.Lmemmove_bsrcul3loop4:
454#ifdef __ARMEB__
455	mov	r12, r3, lsr #8
456#else
457	mov	r12, r3, lsl #8
458#endif
459	ldr	r3, [r1, #-4]!
460#ifdef __ARMEB__
461	orr	r12, r12, r3, lsl #24
462#else
463	orr	r12, r12, r3, lsr #24
464#endif
465	str	r12, [r0, #-4]!
466	subs	r2, r2, #4
467	bge	.Lmemmove_bsrcul3loop4
468
469.Lmemmove_bsrcul3l4:
470	add	r1, r1, #3
471	b	.Lmemmove_bl4
472
473.Lmemmove_bsrcul2:
474	cmp	r2, #0x0c
475	blt	.Lmemmove_bsrcul2loop4
476	sub	r2, r2, #0x0c
477	stmdb	sp!, {r4, r5, lr}
478
479.Lmemmove_bsrcul2loop16:
480#ifdef __ARMEB__
481	mov	lr, r3, lsr #16
482#else
483	mov	lr, r3, lsl #16
484#endif
485	ldmdb	r1!, {r3-r5, r12}
486#ifdef __ARMEB__
487	orr	lr, lr, r12, lsl #16
488	mov	r12, r12, lsr #16
489	orr	r12, r12, r5, lsl #16
490	mov	r5, r5, lsr #16
491	orr	r5, r5, r4, lsl #16
492	mov	r4, r4, lsr #16
493	orr	r4, r4, r3, lsl #16
494#else
495	orr	lr, lr, r12, lsr #16
496	mov	r12, r12, lsl #16
497	orr	r12, r12, r5, lsr #16
498	mov	r5, r5, lsl #16
499	orr	r5, r5, r4, lsr #16
500	mov	r4, r4, lsl #16
501	orr	r4, r4, r3, lsr #16
502#endif
503	stmdb	r0!, {r4, r5, r12, lr}
504	subs	r2, r2, #0x10
505	bge	.Lmemmove_bsrcul2loop16
506	ldmia	sp!, {r4, r5, lr}
507	adds	r2, r2, #0x0c
508	blt	.Lmemmove_bsrcul2l4
509
510.Lmemmove_bsrcul2loop4:
511#ifdef __ARMEB__
512	mov	r12, r3, lsr #16
513#else
514	mov	r12, r3, lsl #16
515#endif
516	ldr	r3, [r1, #-4]!
517#ifdef __ARMEB__
518	orr	r12, r12, r3, lsl #16
519#else
520	orr	r12, r12, r3, lsr #16
521#endif
522	str	r12, [r0, #-4]!
523	subs	r2, r2, #4
524	bge	.Lmemmove_bsrcul2loop4
525
526.Lmemmove_bsrcul2l4:
527	add	r1, r1, #2
528	b	.Lmemmove_bl4
529
530.Lmemmove_bsrcul1:
531	cmp	r2, #0x0c
532	blt	.Lmemmove_bsrcul1loop4
533	sub	r2, r2, #0x0c
534	stmdb	sp!, {r4, r5, lr}
535
536.Lmemmove_bsrcul1loop32:
537#ifdef __ARMEB__
538	mov	lr, r3, lsr #24
539#else
540	mov	lr, r3, lsl #24
541#endif
542	ldmdb	r1!, {r3-r5, r12}
543#ifdef __ARMEB__
544	orr	lr, lr, r12, lsl #8
545	mov	r12, r12, lsr #24
546	orr	r12, r12, r5, lsl #8
547	mov	r5, r5, lsr #24
548	orr	r5, r5, r4, lsl #8
549	mov	r4, r4, lsr #24
550	orr	r4, r4, r3, lsl #8
551#else
552	orr	lr, lr, r12, lsr #8
553	mov	r12, r12, lsl #24
554	orr	r12, r12, r5, lsr #8
555	mov	r5, r5, lsl #24
556	orr	r5, r5, r4, lsr #8
557	mov	r4, r4, lsl #24
558	orr	r4, r4, r3, lsr #8
559#endif
560	stmdb	r0!, {r4, r5, r12, lr}
561	subs	r2, r2, #0x10
562	bge	.Lmemmove_bsrcul1loop32
563	ldmia	sp!, {r4, r5, lr}
564	adds	r2, r2, #0x0c
565	blt	.Lmemmove_bsrcul1l4
566
567.Lmemmove_bsrcul1loop4:
568#ifdef __ARMEB__
569	mov	r12, r3, lsr #24
570#else
571	mov	r12, r3, lsl #24
572#endif
573	ldr	r3, [r1, #-4]!
574#ifdef __ARMEB__
575	orr	r12, r12, r3, lsl #8
576#else
577	orr	r12, r12, r3, lsr #8
578#endif
579	str	r12, [r0, #-4]!
580	subs	r2, r2, #4
581	bge	.Lmemmove_bsrcul1loop4
582
583.Lmemmove_bsrcul1l4:
584	add	r1, r1, #1
585	b	.Lmemmove_bl4
586