xref: /netbsd-src/common/lib/libc/arch/mips/string/bcopy.S (revision 460818ad029ce5f2efdb4feff19eb4e78f092f32)
1/*	$NetBSD: bcopy.S,v 1.1 2005/12/21 00:25:56 christos Exp $	*/
2
3/*
4 * Mach Operating System
5 * Copyright (c) 1993 Carnegie Mellon University
6 * All Rights Reserved.
7 *
8 * Permission to use, copy, modify and distribute this software and its
9 * documentation is hereby granted, provided that both the copyright
10 * notice and this permission notice appear in all copies of the
11 * software, derivative works or modified versions, and any portions
12 * thereof, and that both notices appear in supporting documentation.
13 *
14 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
15 * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
16 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
17 *
18 * Carnegie Mellon requests users of this software to return to
19 *
20 *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
21 *  School of Computer Science
22 *  Carnegie Mellon University
23 *  Pittsburgh PA 15213-3890
24 *
25 * any improvements or extensions that they make and grant Carnegie Mellon
26 * the rights to redistribute these changes.
27 */
28
29/*
30 *	File:	mips_bcopy.s
31 *	Author:	Chris Maeda
32 *	Date:	June 1993
33 *
34 *	Fast copy routine.  Derived from aligned_block_copy.
35 */
36
37
38#include <mips/asm.h>
39#define _LOCORE		/* XXX not really, just assembly-code source */
40#include <machine/endian.h>
41
42
43#if defined(LIBC_SCCS) && !defined(lint)
44	ASMSTR("from: @(#)mips_bcopy.s	2.2 CMU 18/06/93")
45	ASMSTR("$NetBSD: bcopy.S,v 1.1 2005/12/21 00:25:56 christos Exp $")
46#endif /* LIBC_SCCS and not lint */
47
48#ifdef __ABICALLS__
49	.abicalls
50#endif
51
52/*
53 *	bcopy(caddr_t src, caddr_t dst, unsigned int len)
54 *
55 *	a0 	src address
56 *	a1	dst address
57 *	a2	length
58 */
59
60#if defined(MEMCOPY) || defined(MEMMOVE)
61#ifdef MEMCOPY
62#define	FUNCTION	memcpy
63#else
64#define FUNCTION	memmove
65#endif
66#define	SRCREG		a1
67#define	DSTREG		a0
68#else
69#define	FUNCTION	bcopy
70#define	SRCREG		a0
71#define	DSTREG		a1
72#endif
73
74#define	SIZEREG		a2
75
76LEAF(FUNCTION)
77	.set	noat
78	.set	noreorder
79
80#if defined(MEMCOPY) || defined(MEMMOVE)
81	/* set up return value, while we still can */
82	move	v0,DSTREG
83#endif
84	/*
85	 *	Make sure we can copy forwards.
86	 */
87	sltu	t0,SRCREG,DSTREG	# t0 == SRCREG < DSTREG
88	bne	t0,zero,6f		# copy backwards
89
90	/*
91	 * 	There are four alignment cases (with frequency)
92	 *	(Based on measurements taken with a DECstation 5000/200
93	 *	 inside a Mach kernel.)
94	 *
95	 * 	aligned   -> aligned		(mostly)
96	 * 	unaligned -> aligned		(sometimes)
97	 * 	aligned,unaligned -> unaligned	(almost never)
98	 *
99	 *	Note that we could add another case that checks if
100	 *	the destination and source are unaligned but the
101	 *	copy is alignable.  eg if src and dest are both
102	 *	on a halfword boundary.
103	 */
104	andi	t1,DSTREG,3		# get last 3 bits of dest
105	bne	t1,zero,3f
106	andi	t0,SRCREG,3		# get last 3 bits of src
107	bne	t0,zero,5f
108
109	/*
110	 *	Forward aligned->aligned copy, 8*4 bytes at a time.
111	 */
112	li	AT,-32
113	and	t0,SIZEREG,AT		# count truncated to multiple of 32 */
114	addu	a3,SRCREG,t0		# run fast loop up to this address
115	sltu	AT,SRCREG,a3		# any work to do?
116	beq	AT,zero,2f
117	subu	SIZEREG,t0
118
119	/*
120	 *	loop body
121	 */
1221:	# cp
123	lw	t3,0(SRCREG)
124	lw	v1,4(SRCREG)
125	lw	t0,8(SRCREG)
126	lw	t1,12(SRCREG)
127	addu	SRCREG,32
128	sw	t3,0(DSTREG)
129	sw	v1,4(DSTREG)
130	sw	t0,8(DSTREG)
131	sw	t1,12(DSTREG)
132	lw	t1,-4(SRCREG)
133	lw	t0,-8(SRCREG)
134	lw	v1,-12(SRCREG)
135	lw	t3,-16(SRCREG)
136	addu	DSTREG,32
137	sw	t1,-4(DSTREG)
138	sw	t0,-8(DSTREG)
139	sw	v1,-12(DSTREG)
140	bne	SRCREG,a3,1b
141	sw	t3,-16(DSTREG)
142
143	/*
144	 *	Copy a word at a time, no loop unrolling.
145	 */
1462:	# wordcopy
147	andi	t2,SIZEREG,3		# get byte count / 4
148	subu	t2,SIZEREG,t2		# t2 = number of words to copy * 4
149	beq	t2,zero,3f
150	addu	t0,SRCREG,t2		# stop at t0
151	subu	SIZEREG,SIZEREG,t2
1521:
153	lw	t3,0(SRCREG)
154	addu	SRCREG,4
155	sw	t3,0(DSTREG)
156	bne	SRCREG,t0,1b
157	addu	DSTREG,4
158
1593:	# bytecopy
160	beq	SIZEREG,zero,4f	# nothing left to do?
161	nop
1621:
163	lb	t3,0(SRCREG)
164	addu	SRCREG,1
165	sb	t3,0(DSTREG)
166	subu	SIZEREG,1
167	bgtz	SIZEREG,1b
168	addu	DSTREG,1
169
1704:	# copydone
171	j	ra
172	nop
173
174	/*
175	 *	Copy from unaligned source to aligned dest.
176	 */
1775:	# destaligned
178	andi	t0,SIZEREG,3		# t0 = bytecount mod 4
179	subu	a3,SIZEREG,t0		# number of words to transfer
180	beq	a3,zero,3b
181	nop
182	move	SIZEREG,t0		# this many to do after we are done
183	addu	a3,SRCREG,a3		# stop point
184
1851:
186	LWHI	t3,0(SRCREG)
187	LWLO	t3,3(SRCREG)
188	addi	SRCREG,4
189	sw	t3,0(DSTREG)
190	bne	SRCREG,a3,1b
191	addi	DSTREG,4
192
193	j	3b
194	nop
195
1966:	# backcopy -- based on above
197	addu	SRCREG,SIZEREG
198	addu	DSTREG,SIZEREG
199	andi	t1,DSTREG,3		# get last 3 bits of dest
200	bne	t1,zero,3f
201	andi	t0,SRCREG,3		# get last 3 bits of src
202	bne	t0,zero,5f
203
204	/*
205	 *	Forward aligned->aligned copy, 8*4 bytes at a time.
206	 */
207	li	AT,-32
208	and	t0,SIZEREG,AT		# count truncated to multiple of 32
209	beq	t0,zero,2f		# any work to do?
210	subu	SIZEREG,t0
211	subu	a3,SRCREG,t0
212
213	/*
214	 *	loop body
215	 */
2161:	# cp
217	lw	t3,-16(SRCREG)
218	lw	v1,-12(SRCREG)
219	lw	t0,-8(SRCREG)
220	lw	t1,-4(SRCREG)
221	subu	SRCREG,32
222	sw	t3,-16(DSTREG)
223	sw	v1,-12(DSTREG)
224	sw	t0,-8(DSTREG)
225	sw	t1,-4(DSTREG)
226	lw	t1,12(SRCREG)
227	lw	t0,8(SRCREG)
228	lw	v1,4(SRCREG)
229	lw	t3,0(SRCREG)
230	subu	DSTREG,32
231	sw	t1,12(DSTREG)
232	sw	t0,8(DSTREG)
233	sw	v1,4(DSTREG)
234	bne	SRCREG,a3,1b
235	sw	t3,0(DSTREG)
236
237	/*
238	 *	Copy a word at a time, no loop unrolling.
239	 */
2402:	# wordcopy
241	andi	t2,SIZEREG,3		# get byte count / 4
242	subu	t2,SIZEREG,t2		# t2 = number of words to copy * 4
243	beq	t2,zero,3f
244	subu	t0,SRCREG,t2		# stop at t0
245	subu	SIZEREG,SIZEREG,t2
2461:
247	lw	t3,-4(SRCREG)
248	subu	SRCREG,4
249	sw	t3,-4(DSTREG)
250	bne	SRCREG,t0,1b
251	subu	DSTREG,4
252
2533:	# bytecopy
254	beq	SIZEREG,zero,4f		# nothing left to do?
255	nop
2561:
257	lb	t3,-1(SRCREG)
258	subu	SRCREG,1
259	sb	t3,-1(DSTREG)
260	subu	SIZEREG,1
261	bgtz	SIZEREG,1b
262	subu	DSTREG,1
263
2644:	# copydone
265	j	ra
266	nop
267
268	/*
269	 *	Copy from unaligned source to aligned dest.
270	 */
2715:	# destaligned
272	andi	t0,SIZEREG,3		# t0 = bytecount mod 4
273	subu	a3,SIZEREG,t0		# number of words to transfer
274	beq	a3,zero,3b
275	nop
276	move	SIZEREG,t0		# this many to do after we are done
277	subu	a3,SRCREG,a3		# stop point
278
2791:
280	LWHI	t3,-4(SRCREG)
281	LWLO	t3,-1(SRCREG)
282	subu	SRCREG,4
283	sw	t3,-4(DSTREG)
284	bne	SRCREG,a3,1b
285	subu	DSTREG,4
286
287	j	3b
288	nop
289
290	.set	reorder
291	.set	at
292	END(FUNCTION)
293