xref: /openbsd-src/sys/lib/libkern/arch/sh/memcpy.S (revision 799f675f6700f14e59124f9825c723e9f2ce19dc)
1/*	$NetBSD: memcpy.S,v 1.2 2006/04/22 23:53:47 uwe Exp $	*/
2
3/*
4 * Copyright (c) 2000 SHIMIZU Ryo <ryo@misakimix.org>
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 * 3. The name of the author may not be used to endorse or promote products
16 *    derived from this software without specific prior written permission.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29
30#include <machine/asm.h>
31
32#if defined(LIBC_SCCS) && !defined(lint)
33	RCSID("$NetBSD: memcpy.S,v 1.2 2006/04/22 23:53:47 uwe Exp $")
34#endif
35
36#if !defined(MEMCOPY) && !defined(MEMMOVE) && !defined(BCOPY)
37#define MEMCOPY
38#endif
39
40#if defined(MEMCOPY) || defined(MEMMOVE)
41#define	REG_DST0	r3
42#define	REG_SRC		r5
43#define	REG_DST		r4
44#else
45#define	REG_SRC		r4
46#define	REG_DST		r5
47#endif
48
49#define	REG_LEN		r6
50
51#if defined(MEMCOPY)
52ENTRY(memcpy)
53#elif defined(MEMMOVE)
54ENTRY(memmove)
55#elif defined(BCOPY)
56ENTRY(bcopy)
57ALTENTRY(ovbcopy)
58#endif
59#ifdef REG_DST0
60	mov	REG_DST,REG_DST0
61#endif
62	cmp/eq	REG_DST,REG_SRC	/* if ( src == dst ) return; */
63	bt/s	bcopy_return
64	cmp/hi	REG_DST,REG_SRC
65	bf/s	bcopy_overlap
66
67	mov	REG_SRC,r0
68	xor	REG_DST,r0
69	and	#3,r0
70	mov	r0,r1
71	tst	r0,r0		/* (src ^ dst) & 3         */
72	bf/s	word_align
73
74longword_align:
75	tst	REG_LEN,REG_LEN	/* if ( len==0 ) return;   */
76	bt/s	bcopy_return
77
78
79	mov	REG_SRC,r0
80	tst	#1,r0		/* if ( src & 1 )          */
81	bt	1f
82	mov.b	@REG_SRC+,r0	/*    *dst++ = *src++;     */
83	add	#-1,REG_LEN
84	mov.b	r0,@REG_DST
85	add	#1,REG_DST
861:
87
88
89	mov	#1,r0
90	cmp/hi	r0,REG_LEN	/* if ( (len > 1) &&       */
91	bf/s	1f
92	mov	REG_SRC,r0
93	tst	#2,r0		/*      (src & 2) {        */
94	bt	1f
95	mov.w	@REG_SRC+,r0	/*        *((unsigned short*)dst)++ = *((unsigned short*)src)++; */
96	add	#-2,REG_LEN	/*        len -= 2;                                              */
97	mov.w	r0,@REG_DST
98	add	#2,REG_DST	/* }                       */
991:
100
101
102	mov	#3,r1
103	cmp/hi	r1,REG_LEN	/* while ( len > 3 ) {     */
104	bf/s	no_align_delay
105	tst	REG_LEN,REG_LEN
1062:
107	mov.l	@REG_SRC+,r0	/*   *((unsigned long*)dst)++ = *((unsigned long*)src)++;        */
108	add	#-4,REG_LEN	/*   len -= 4;                                                   */
109	mov.l	r0,@REG_DST
110	cmp/hi	r1,REG_LEN
111	bt/s	2b
112	add	#4,REG_DST	/* }                       */
113
114	bra	no_align_delay
115	tst	REG_LEN,REG_LEN
116
117
118word_align:
119	mov	r1,r0
120	tst	#1,r0
121	bf/s	no_align_delay
122	tst	REG_LEN,REG_LEN	/* if ( len == 0 ) return; */
123	bt	bcopy_return
124
125
126	mov	REG_SRC,r0	/* if ( src & 1 )          */
127	tst	#1,r0
128	bt	1f
129	mov.b	@REG_SRC+,r0	/*    *dst++ = *src++;     */
130	add	#-1,REG_LEN
131	mov.b	r0,@REG_DST
132	add	#1,REG_DST
1331:
134
135
136	mov	#1,r1
137	cmp/hi	r1,REG_LEN	/* while ( len > 1 ) {     */
138	bf/s	no_align_delay
139	tst	REG_LEN,REG_LEN
1402:
141	mov.w	@REG_SRC+,r0	/*   *((unsigned short*)dst)++ = *((unsigned short*)src)++;      */
142	add	#-2,REG_LEN	/*   len -= 2;                                                   */
143	mov.w	r0,@REG_DST
144	cmp/hi	r1,REG_LEN
145	bt/s	2b
146	add	#2,REG_DST	/* }                       */
147
148
149no_align:
150	tst	REG_LEN,REG_LEN	/* while ( len!= ) {       */
151no_align_delay:
152	bt	bcopy_return
1531:
154	mov.b	@REG_SRC+,r0	/*    *dst++ = *src++;     */
155	add	#-1,REG_LEN	/*    len--;               */
156	mov.b	r0,@REG_DST
157	tst	REG_LEN,REG_LEN
158	bf/s	1b
159	add	#1,REG_DST	/* }                       */
160bcopy_return:
161	rts
162#ifdef REG_DST0
163	mov	REG_DST0,r0
164#else
165	nop
166#endif
167
168
169bcopy_overlap:
170	add	REG_LEN,REG_SRC
171	add	REG_LEN,REG_DST
172
173	mov	REG_SRC,r0
174	xor	REG_DST,r0
175	and	#3,r0
176	mov	r0,r1
177	tst	r0,r0		/* (src ^ dst) & 3         */
178	bf/s	ov_word_align
179
180ov_longword_align:
181	tst	REG_LEN,REG_LEN	/* if ( len==0 ) return;   */
182	bt/s	bcopy_return
183
184
185	mov	REG_SRC,r0
186	tst	#1,r0		/* if ( src & 1 )          */
187	bt	1f
188	add	#-1,REG_SRC	/*    *--dst = *--src;     */
189	mov.b	@REG_SRC,r0
190	mov.b	r0,@-REG_DST
191	add	#-1,REG_LEN
1921:
193
194
195	mov	#1,r0
196	cmp/hi	r0,REG_LEN	/* if ( (len > 1) &&       */
197	bf/s	1f
198	mov	REG_SRC,r0
199	tst	#2,r0		/*      (src & 2) {        */
200	bt	1f
201	add	#-2,REG_SRC	/*        *--((unsigned short*)dst) = *--((unsigned short*)src); */
202	mov.w	@REG_SRC,r0
203	add	#-2,REG_LEN	/*        len -= 2;                                              */
204	mov.w	r0,@-REG_DST	/* }                       */
2051:
206
207
208	mov	#3,r1
209	cmp/hi	r1,REG_LEN	/* while ( len > 3 ) {     */
210	bf/s	ov_no_align_delay
211	tst	REG_LEN,REG_LEN
2122:
213	add	#-4,REG_SRC
214	mov.l	@REG_SRC,r0	/*   *((unsigned long*)dst)++ = *((unsigned long*)src)++;        */
215	add	#-4,REG_LEN	/*   len -= 4;                                                   */
216	cmp/hi	r1,REG_LEN
217	bt/s	2b
218	mov.l	r0,@-REG_DST	/* }                       */
219
220	bra	ov_no_align_delay
221	tst	REG_LEN,REG_LEN
222
223
224ov_word_align:
225	mov	r1,r0
226	tst	#1,r0
227	bf/s	ov_no_align_delay
228	tst	REG_LEN,REG_LEN	/* if ( len == 0 ) return; */
229	bt	bcopy_return
230
231
232	mov	REG_SRC,r0	/* if ( src & 1 )          */
233	tst	#1,r0
234	bt	1f
235	add	#-1,REG_SRC
236	mov.b	@REG_SRC,r0	/*    *--dst = *--src;     */
237	add	#-1,REG_LEN
238	mov.b	r0,@-REG_DST
2391:
240
241
242	mov	#1,r1
243	cmp/hi	r1,REG_LEN	/* while ( len > 1 ) {     */
244	bf/s	ov_no_align_delay
245	tst	REG_LEN,REG_LEN
2462:
247	add	#-2,REG_SRC
248	mov.w	@REG_SRC,r0	/*   *--((unsigned short*)dst) = *--((unsigned short*)src);      */
249	add	#-2,REG_LEN	/*   len -= 2;                                                   */
250	cmp/hi	r1,REG_LEN
251	bt/s	2b
252	mov.w	r0,@-REG_DST	/* }                       */
253
254
255ov_no_align:
256	tst	REG_LEN,REG_LEN	/* while ( len!= ) {       */
257ov_no_align_delay:
258	bt	9f
2591:
260	add	#-1,REG_SRC
261	mov.b	@REG_SRC,r0	/*    *--dst = *--src;     */
262	add	#-1,REG_LEN	/*    len--;               */
263	tst	REG_LEN,REG_LEN
264	bf/s	1b
265	mov.b	r0,@-REG_DST	/* }                       */
2669:
267	rts
268#ifdef REG_DST0
269	mov	REG_DST0,r0
270#else
271	nop
272#endif
273