xref: /netbsd-src/common/lib/libc/arch/sh3/string/memcpy.S (revision 90313c06e62e910bf0d1bb24faa9d17dcefd0ab6)
1*90313c06Smsaitoh/*	$NetBSD: memcpy.S,v 1.3 2024/02/07 04:20:25 msaitoh Exp $	*/
237c9f0a6Schristos
3c71562d6Suwe/*
4*90313c06Smsaitoh * Copyright (c) 2000 SHIMIZU Ryo
5c71562d6Suwe * All rights reserved.
6c71562d6Suwe *
7c71562d6Suwe * Redistribution and use in source and binary forms, with or without
8c71562d6Suwe * modification, are permitted provided that the following conditions
9c71562d6Suwe * are met:
10c71562d6Suwe * 1. Redistributions of source code must retain the above copyright
11c71562d6Suwe *    notice, this list of conditions and the following disclaimer.
12c71562d6Suwe * 2. Redistributions in binary form must reproduce the above copyright
13c71562d6Suwe *    notice, this list of conditions and the following disclaimer in the
14c71562d6Suwe *    documentation and/or other materials provided with the distribution.
15c71562d6Suwe * 3. The name of the author may not be used to endorse or promote products
16c71562d6Suwe *    derived from this software without specific prior written permission.
17c71562d6Suwe *
18c71562d6Suwe * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19c71562d6Suwe * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20c71562d6Suwe * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21c71562d6Suwe * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22c71562d6Suwe * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23c71562d6Suwe * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24c71562d6Suwe * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25c71562d6Suwe * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26c71562d6Suwe * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27c71562d6Suwe * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28c71562d6Suwe */
29c71562d6Suwe
30c71562d6Suwe#include <machine/asm.h>
31c71562d6Suwe
32c71562d6Suwe#if defined(LIBC_SCCS) && !defined(lint)
33*90313c06Smsaitoh	RCSID("$NetBSD: memcpy.S,v 1.3 2024/02/07 04:20:25 msaitoh Exp $")
34c71562d6Suwe#endif
35c71562d6Suwe
36c71562d6Suwe#if !defined(MEMCOPY) && !defined(MEMMOVE) && !defined(BCOPY)
3737c9f0a6Schristos#define MEMCOPY
38c71562d6Suwe#endif
39c71562d6Suwe
40c71562d6Suwe#if defined(MEMCOPY) || defined(MEMMOVE)
41c71562d6Suwe#define	REG_DST0	r3
42c71562d6Suwe#define	REG_SRC		r5
43c71562d6Suwe#define	REG_DST		r4
44c71562d6Suwe#else
45c71562d6Suwe#define	REG_SRC		r4
46c71562d6Suwe#define	REG_DST		r5
47c71562d6Suwe#endif
48c71562d6Suwe
49c71562d6Suwe#define	REG_LEN		r6
50c71562d6Suwe
51c71562d6Suwe#if defined(MEMCOPY)
52c71562d6SuweENTRY(memcpy)
53c71562d6Suwe#elif defined(MEMMOVE)
54c71562d6SuweENTRY(memmove)
55c71562d6Suwe#elif defined(BCOPY)
56c71562d6SuweENTRY(bcopy)
57c71562d6Suwe#endif
58c71562d6Suwe#ifdef REG_DST0
59c71562d6Suwe	mov	REG_DST,REG_DST0
60c71562d6Suwe#endif
61c71562d6Suwe	cmp/eq	REG_DST,REG_SRC	/* if ( src == dst ) return; */
62c71562d6Suwe	bt/s	bcopy_return
63c71562d6Suwe	cmp/hi	REG_DST,REG_SRC
64c71562d6Suwe	bf/s	bcopy_overlap
65c71562d6Suwe
66c71562d6Suwe	mov	REG_SRC,r0
67c71562d6Suwe	xor	REG_DST,r0
68c71562d6Suwe	and	#3,r0
69c71562d6Suwe	mov	r0,r1
70c71562d6Suwe	tst	r0,r0		/* (src ^ dst) & 3         */
71c71562d6Suwe	bf/s	word_align
72c71562d6Suwe
73c71562d6Suwelongword_align:
74c71562d6Suwe	tst	REG_LEN,REG_LEN	/* if ( len==0 ) return;   */
75c71562d6Suwe	bt/s	bcopy_return
76c71562d6Suwe
77c71562d6Suwe
78c71562d6Suwe	mov	REG_SRC,r0
79c71562d6Suwe	tst	#1,r0		/* if ( src & 1 )          */
80c71562d6Suwe	bt	1f
81c71562d6Suwe	mov.b	@REG_SRC+,r0	/*    *dst++ = *src++;     */
82c71562d6Suwe	add	#-1,REG_LEN
83c71562d6Suwe	mov.b	r0,@REG_DST
84c71562d6Suwe	add	#1,REG_DST
85c71562d6Suwe1:
86c71562d6Suwe
87c71562d6Suwe
88c71562d6Suwe	mov	#1,r0
89c71562d6Suwe	cmp/hi	r0,REG_LEN	/* if ( (len > 1) &&       */
90c71562d6Suwe	bf/s	1f
91c71562d6Suwe	mov	REG_SRC,r0
92c71562d6Suwe	tst	#2,r0		/*      (src & 2) {        */
93c71562d6Suwe	bt	1f
94c71562d6Suwe	mov.w	@REG_SRC+,r0	/*        *((unsigned short*)dst)++ = *((unsigned short*)src)++; */
95c71562d6Suwe	add	#-2,REG_LEN	/*        len -= 2;                                              */
96c71562d6Suwe	mov.w	r0,@REG_DST
97c71562d6Suwe	add	#2,REG_DST	/* }                       */
98c71562d6Suwe1:
99c71562d6Suwe
100c71562d6Suwe
101c71562d6Suwe	mov	#3,r1
102c71562d6Suwe	cmp/hi	r1,REG_LEN	/* while ( len > 3 ) {     */
103c71562d6Suwe	bf/s	no_align_delay
104c71562d6Suwe	tst	REG_LEN,REG_LEN
105c71562d6Suwe2:
106c71562d6Suwe	mov.l	@REG_SRC+,r0	/*   *((unsigned long*)dst)++ = *((unsigned long*)src)++;        */
107c71562d6Suwe	add	#-4,REG_LEN	/*   len -= 4;                                                   */
108c71562d6Suwe	mov.l	r0,@REG_DST
109c71562d6Suwe	cmp/hi	r1,REG_LEN
110c71562d6Suwe	bt/s	2b
111c71562d6Suwe	add	#4,REG_DST	/* }                       */
112c71562d6Suwe
113c71562d6Suwe	bra	no_align_delay
114c71562d6Suwe	tst	REG_LEN,REG_LEN
115c71562d6Suwe
116c71562d6Suwe
117c71562d6Suweword_align:
118c71562d6Suwe	mov	r1,r0
119c71562d6Suwe	tst	#1,r0
120c71562d6Suwe	bf/s	no_align_delay
121c71562d6Suwe	tst	REG_LEN,REG_LEN	/* if ( len == 0 ) return; */
122c71562d6Suwe	bt	bcopy_return
123c71562d6Suwe
124c71562d6Suwe
125c71562d6Suwe	mov	REG_SRC,r0	/* if ( src & 1 )          */
126c71562d6Suwe	tst	#1,r0
127c71562d6Suwe	bt	1f
128c71562d6Suwe	mov.b	@REG_SRC+,r0	/*    *dst++ = *src++;     */
129c71562d6Suwe	add	#-1,REG_LEN
130c71562d6Suwe	mov.b	r0,@REG_DST
131c71562d6Suwe	add	#1,REG_DST
132c71562d6Suwe1:
133c71562d6Suwe
134c71562d6Suwe
135c71562d6Suwe	mov	#1,r1
136c71562d6Suwe	cmp/hi	r1,REG_LEN	/* while ( len > 1 ) {     */
137c71562d6Suwe	bf/s	no_align_delay
138c71562d6Suwe	tst	REG_LEN,REG_LEN
139c71562d6Suwe2:
140c71562d6Suwe	mov.w	@REG_SRC+,r0	/*   *((unsigned short*)dst)++ = *((unsigned short*)src)++;      */
141c71562d6Suwe	add	#-2,REG_LEN	/*   len -= 2;                                                   */
142c71562d6Suwe	mov.w	r0,@REG_DST
143c71562d6Suwe	cmp/hi	r1,REG_LEN
144c71562d6Suwe	bt/s	2b
145c71562d6Suwe	add	#2,REG_DST	/* }                       */
146c71562d6Suwe
147c71562d6Suwe
148c71562d6Suweno_align:
149c71562d6Suwe	tst	REG_LEN,REG_LEN	/* while ( len!= ) {       */
150c71562d6Suweno_align_delay:
151c71562d6Suwe	bt	bcopy_return
152c71562d6Suwe1:
153c71562d6Suwe	mov.b	@REG_SRC+,r0	/*    *dst++ = *src++;     */
154c71562d6Suwe	add	#-1,REG_LEN	/*    len--;               */
155c71562d6Suwe	mov.b	r0,@REG_DST
156c71562d6Suwe	tst	REG_LEN,REG_LEN
157c71562d6Suwe	bf/s	1b
158c71562d6Suwe	add	#1,REG_DST	/* }                       */
159c71562d6Suwebcopy_return:
160c71562d6Suwe	rts
161c71562d6Suwe#ifdef REG_DST0
162c71562d6Suwe	mov	REG_DST0,r0
163c71562d6Suwe#else
164c71562d6Suwe	nop
165c71562d6Suwe#endif
166c71562d6Suwe
167c71562d6Suwe
168c71562d6Suwebcopy_overlap:
169c71562d6Suwe	add	REG_LEN,REG_SRC
170c71562d6Suwe	add	REG_LEN,REG_DST
171c71562d6Suwe
172c71562d6Suwe	mov	REG_SRC,r0
173c71562d6Suwe	xor	REG_DST,r0
174c71562d6Suwe	and	#3,r0
175c71562d6Suwe	mov	r0,r1
176c71562d6Suwe	tst	r0,r0		/* (src ^ dst) & 3         */
177c71562d6Suwe	bf/s	ov_word_align
178c71562d6Suwe
179c71562d6Suweov_longword_align:
180c71562d6Suwe	tst	REG_LEN,REG_LEN	/* if ( len==0 ) return;   */
181c71562d6Suwe	bt/s	bcopy_return
182c71562d6Suwe
183c71562d6Suwe
184c71562d6Suwe	mov	REG_SRC,r0
185c71562d6Suwe	tst	#1,r0		/* if ( src & 1 )          */
186c71562d6Suwe	bt	1f
187c71562d6Suwe	add	#-1,REG_SRC	/*    *--dst = *--src;     */
188c71562d6Suwe	mov.b	@REG_SRC,r0
189c71562d6Suwe	mov.b	r0,@-REG_DST
190c71562d6Suwe	add	#-1,REG_LEN
191c71562d6Suwe1:
192c71562d6Suwe
193c71562d6Suwe
194c71562d6Suwe	mov	#1,r0
195c71562d6Suwe	cmp/hi	r0,REG_LEN	/* if ( (len > 1) &&       */
196c71562d6Suwe	bf/s	1f
197c71562d6Suwe	mov	REG_SRC,r0
198c71562d6Suwe	tst	#2,r0		/*      (src & 2) {        */
199c71562d6Suwe	bt	1f
200c71562d6Suwe	add	#-2,REG_SRC	/*        *--((unsigned short*)dst) = *--((unsigned short*)src); */
201c71562d6Suwe	mov.w	@REG_SRC,r0
202c71562d6Suwe	add	#-2,REG_LEN	/*        len -= 2;                                              */
203c71562d6Suwe	mov.w	r0,@-REG_DST	/* }                       */
204c71562d6Suwe1:
205c71562d6Suwe
206c71562d6Suwe
207c71562d6Suwe	mov	#3,r1
208c71562d6Suwe	cmp/hi	r1,REG_LEN	/* while ( len > 3 ) {     */
209c71562d6Suwe	bf/s	ov_no_align_delay
210c71562d6Suwe	tst	REG_LEN,REG_LEN
211c71562d6Suwe2:
212c71562d6Suwe	add	#-4,REG_SRC
213c71562d6Suwe	mov.l	@REG_SRC,r0	/*   *((unsigned long*)dst)++ = *((unsigned long*)src)++;        */
214c71562d6Suwe	add	#-4,REG_LEN	/*   len -= 4;                                                   */
215c71562d6Suwe	cmp/hi	r1,REG_LEN
216c71562d6Suwe	bt/s	2b
217c71562d6Suwe	mov.l	r0,@-REG_DST	/* }                       */
218c71562d6Suwe
219c71562d6Suwe	bra	ov_no_align_delay
220c71562d6Suwe	tst	REG_LEN,REG_LEN
221c71562d6Suwe
222c71562d6Suwe
223c71562d6Suweov_word_align:
224c71562d6Suwe	mov	r1,r0
225c71562d6Suwe	tst	#1,r0
226c71562d6Suwe	bf/s	ov_no_align_delay
227c71562d6Suwe	tst	REG_LEN,REG_LEN	/* if ( len == 0 ) return; */
228c71562d6Suwe	bt	bcopy_return
229c71562d6Suwe
230c71562d6Suwe
231c71562d6Suwe	mov	REG_SRC,r0	/* if ( src & 1 )          */
232c71562d6Suwe	tst	#1,r0
233c71562d6Suwe	bt	1f
234c71562d6Suwe	add	#-1,REG_SRC
235c71562d6Suwe	mov.b	@REG_SRC,r0	/*    *--dst = *--src;     */
236c71562d6Suwe	add	#-1,REG_LEN
237c71562d6Suwe	mov.b	r0,@-REG_DST
238c71562d6Suwe1:
239c71562d6Suwe
240c71562d6Suwe
241c71562d6Suwe	mov	#1,r1
242c71562d6Suwe	cmp/hi	r1,REG_LEN	/* while ( len > 1 ) {     */
243c71562d6Suwe	bf/s	ov_no_align_delay
244c71562d6Suwe	tst	REG_LEN,REG_LEN
245c71562d6Suwe2:
246c71562d6Suwe	add	#-2,REG_SRC
247c71562d6Suwe	mov.w	@REG_SRC,r0	/*   *--((unsigned short*)dst) = *--((unsigned short*)src);      */
248c71562d6Suwe	add	#-2,REG_LEN	/*   len -= 2;                                                   */
249c71562d6Suwe	cmp/hi	r1,REG_LEN
250c71562d6Suwe	bt/s	2b
251c71562d6Suwe	mov.w	r0,@-REG_DST	/* }                       */
252c71562d6Suwe
253c71562d6Suwe
254c71562d6Suweov_no_align:
255c71562d6Suwe	tst	REG_LEN,REG_LEN	/* while ( len!= ) {       */
256c71562d6Suweov_no_align_delay:
257c71562d6Suwe	bt	9f
258c71562d6Suwe1:
259c71562d6Suwe	add	#-1,REG_SRC
260c71562d6Suwe	mov.b	@REG_SRC,r0	/*    *--dst = *--src;     */
261c71562d6Suwe	add	#-1,REG_LEN	/*    len--;               */
262c71562d6Suwe	tst	REG_LEN,REG_LEN
263c71562d6Suwe	bf/s	1b
264c71562d6Suwe	mov.b	r0,@-REG_DST	/* }                       */
265c71562d6Suwe9:
266c71562d6Suwe	rts
267c71562d6Suwe#ifdef REG_DST0
268c71562d6Suwe	mov	REG_DST0,r0
269c71562d6Suwe#else
270c71562d6Suwe	nop
271c71562d6Suwe#endif
272