xref: /netbsd-src/common/lib/libc/arch/sh3/string/memcpy.S (revision 90313c06e62e910bf0d1bb24faa9d17dcefd0ab6)
1/*	$NetBSD: memcpy.S,v 1.3 2024/02/07 04:20:25 msaitoh Exp $	*/
2
3/*
4 * Copyright (c) 2000 SHIMIZU Ryo
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 * 3. The name of the author may not be used to endorse or promote products
16 *    derived from this software without specific prior written permission.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29
30#include <machine/asm.h>
31
32#if defined(LIBC_SCCS) && !defined(lint)
33	RCSID("$NetBSD: memcpy.S,v 1.3 2024/02/07 04:20:25 msaitoh Exp $")
34#endif
35
36#if !defined(MEMCOPY) && !defined(MEMMOVE) && !defined(BCOPY)
37#define MEMCOPY
38#endif
39
40#if defined(MEMCOPY) || defined(MEMMOVE)
41#define	REG_DST0	r3
42#define	REG_SRC		r5
43#define	REG_DST		r4
44#else
45#define	REG_SRC		r4
46#define	REG_DST		r5
47#endif
48
49#define	REG_LEN		r6
50
51#if defined(MEMCOPY)
52ENTRY(memcpy)
53#elif defined(MEMMOVE)
54ENTRY(memmove)
55#elif defined(BCOPY)
56ENTRY(bcopy)
57#endif
58#ifdef REG_DST0
59	mov	REG_DST,REG_DST0
60#endif
61	cmp/eq	REG_DST,REG_SRC	/* if ( src == dst ) return; */
62	bt/s	bcopy_return
63	cmp/hi	REG_DST,REG_SRC
64	bf/s	bcopy_overlap
65
66	mov	REG_SRC,r0
67	xor	REG_DST,r0
68	and	#3,r0
69	mov	r0,r1
70	tst	r0,r0		/* (src ^ dst) & 3         */
71	bf/s	word_align
72
73longword_align:
74	tst	REG_LEN,REG_LEN	/* if ( len==0 ) return;   */
75	bt/s	bcopy_return
76
77
78	mov	REG_SRC,r0
79	tst	#1,r0		/* if ( src & 1 )          */
80	bt	1f
81	mov.b	@REG_SRC+,r0	/*    *dst++ = *src++;     */
82	add	#-1,REG_LEN
83	mov.b	r0,@REG_DST
84	add	#1,REG_DST
851:
86
87
88	mov	#1,r0
89	cmp/hi	r0,REG_LEN	/* if ( (len > 1) &&       */
90	bf/s	1f
91	mov	REG_SRC,r0
92	tst	#2,r0		/*      (src & 2) {        */
93	bt	1f
94	mov.w	@REG_SRC+,r0	/*        *((unsigned short*)dst)++ = *((unsigned short*)src)++; */
95	add	#-2,REG_LEN	/*        len -= 2;                                              */
96	mov.w	r0,@REG_DST
97	add	#2,REG_DST	/* }                       */
981:
99
100
101	mov	#3,r1
102	cmp/hi	r1,REG_LEN	/* while ( len > 3 ) {     */
103	bf/s	no_align_delay
104	tst	REG_LEN,REG_LEN
1052:
106	mov.l	@REG_SRC+,r0	/*   *((unsigned long*)dst)++ = *((unsigned long*)src)++;        */
107	add	#-4,REG_LEN	/*   len -= 4;                                                   */
108	mov.l	r0,@REG_DST
109	cmp/hi	r1,REG_LEN
110	bt/s	2b
111	add	#4,REG_DST	/* }                       */
112
113	bra	no_align_delay
114	tst	REG_LEN,REG_LEN
115
116
117word_align:
118	mov	r1,r0
119	tst	#1,r0
120	bf/s	no_align_delay
121	tst	REG_LEN,REG_LEN	/* if ( len == 0 ) return; */
122	bt	bcopy_return
123
124
125	mov	REG_SRC,r0	/* if ( src & 1 )          */
126	tst	#1,r0
127	bt	1f
128	mov.b	@REG_SRC+,r0	/*    *dst++ = *src++;     */
129	add	#-1,REG_LEN
130	mov.b	r0,@REG_DST
131	add	#1,REG_DST
1321:
133
134
135	mov	#1,r1
136	cmp/hi	r1,REG_LEN	/* while ( len > 1 ) {     */
137	bf/s	no_align_delay
138	tst	REG_LEN,REG_LEN
1392:
140	mov.w	@REG_SRC+,r0	/*   *((unsigned short*)dst)++ = *((unsigned short*)src)++;      */
141	add	#-2,REG_LEN	/*   len -= 2;                                                   */
142	mov.w	r0,@REG_DST
143	cmp/hi	r1,REG_LEN
144	bt/s	2b
145	add	#2,REG_DST	/* }                       */
146
147
148no_align:
149	tst	REG_LEN,REG_LEN	/* while ( len!= ) {       */
150no_align_delay:
151	bt	bcopy_return
1521:
153	mov.b	@REG_SRC+,r0	/*    *dst++ = *src++;     */
154	add	#-1,REG_LEN	/*    len--;               */
155	mov.b	r0,@REG_DST
156	tst	REG_LEN,REG_LEN
157	bf/s	1b
158	add	#1,REG_DST	/* }                       */
159bcopy_return:
160	rts
161#ifdef REG_DST0
162	mov	REG_DST0,r0
163#else
164	nop
165#endif
166
167
168bcopy_overlap:
169	add	REG_LEN,REG_SRC
170	add	REG_LEN,REG_DST
171
172	mov	REG_SRC,r0
173	xor	REG_DST,r0
174	and	#3,r0
175	mov	r0,r1
176	tst	r0,r0		/* (src ^ dst) & 3         */
177	bf/s	ov_word_align
178
179ov_longword_align:
180	tst	REG_LEN,REG_LEN	/* if ( len==0 ) return;   */
181	bt/s	bcopy_return
182
183
184	mov	REG_SRC,r0
185	tst	#1,r0		/* if ( src & 1 )          */
186	bt	1f
187	add	#-1,REG_SRC	/*    *--dst = *--src;     */
188	mov.b	@REG_SRC,r0
189	mov.b	r0,@-REG_DST
190	add	#-1,REG_LEN
1911:
192
193
194	mov	#1,r0
195	cmp/hi	r0,REG_LEN	/* if ( (len > 1) &&       */
196	bf/s	1f
197	mov	REG_SRC,r0
198	tst	#2,r0		/*      (src & 2) {        */
199	bt	1f
200	add	#-2,REG_SRC	/*        *--((unsigned short*)dst) = *--((unsigned short*)src); */
201	mov.w	@REG_SRC,r0
202	add	#-2,REG_LEN	/*        len -= 2;                                              */
203	mov.w	r0,@-REG_DST	/* }                       */
2041:
205
206
207	mov	#3,r1
208	cmp/hi	r1,REG_LEN	/* while ( len > 3 ) {     */
209	bf/s	ov_no_align_delay
210	tst	REG_LEN,REG_LEN
2112:
212	add	#-4,REG_SRC
213	mov.l	@REG_SRC,r0	/*   *((unsigned long*)dst)++ = *((unsigned long*)src)++;        */
214	add	#-4,REG_LEN	/*   len -= 4;                                                   */
215	cmp/hi	r1,REG_LEN
216	bt/s	2b
217	mov.l	r0,@-REG_DST	/* }                       */
218
219	bra	ov_no_align_delay
220	tst	REG_LEN,REG_LEN
221
222
223ov_word_align:
224	mov	r1,r0
225	tst	#1,r0
226	bf/s	ov_no_align_delay
227	tst	REG_LEN,REG_LEN	/* if ( len == 0 ) return; */
228	bt	bcopy_return
229
230
231	mov	REG_SRC,r0	/* if ( src & 1 )          */
232	tst	#1,r0
233	bt	1f
234	add	#-1,REG_SRC
235	mov.b	@REG_SRC,r0	/*    *--dst = *--src;     */
236	add	#-1,REG_LEN
237	mov.b	r0,@-REG_DST
2381:
239
240
241	mov	#1,r1
242	cmp/hi	r1,REG_LEN	/* while ( len > 1 ) {     */
243	bf/s	ov_no_align_delay
244	tst	REG_LEN,REG_LEN
2452:
246	add	#-2,REG_SRC
247	mov.w	@REG_SRC,r0	/*   *--((unsigned short*)dst) = *--((unsigned short*)src);      */
248	add	#-2,REG_LEN	/*   len -= 2;                                                   */
249	cmp/hi	r1,REG_LEN
250	bt/s	2b
251	mov.w	r0,@-REG_DST	/* }                       */
252
253
254ov_no_align:
255	tst	REG_LEN,REG_LEN	/* while ( len!= ) {       */
256ov_no_align_delay:
257	bt	9f
2581:
259	add	#-1,REG_SRC
260	mov.b	@REG_SRC,r0	/*    *--dst = *--src;     */
261	add	#-1,REG_LEN	/*    len--;               */
262	tst	REG_LEN,REG_LEN
263	bf/s	1b
264	mov.b	r0,@-REG_DST	/* }                       */
2659:
266	rts
267#ifdef REG_DST0
268	mov	REG_DST0,r0
269#else
270	nop
271#endif
272