xref: /netbsd-src/common/lib/libc/arch/mips/string/bcopy.S (revision b5677b36047b601b9addaaa494a58ceae82c2a6c)
1/*	$NetBSD: bcopy.S,v 1.2 2005/12/27 11:23:53 tsutsui Exp $	*/
2
3/*
4 * Mach Operating System
5 * Copyright (c) 1993 Carnegie Mellon University
6 * All Rights Reserved.
7 *
8 * Permission to use, copy, modify and distribute this software and its
9 * documentation is hereby granted, provided that both the copyright
10 * notice and this permission notice appear in all copies of the
11 * software, derivative works or modified versions, and any portions
12 * thereof, and that both notices appear in supporting documentation.
13 *
14 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
15 * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
16 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
17 *
18 * Carnegie Mellon requests users of this software to return to
19 *
20 *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
21 *  School of Computer Science
22 *  Carnegie Mellon University
23 *  Pittsburgh PA 15213-3890
24 *
25 * any improvements or extensions that they make and grant Carnegie Mellon
26 * the rights to redistribute these changes.
27 */
28
29/*
30 *	File:	mips_bcopy.s
31 *	Author:	Chris Maeda
32 *	Date:	June 1993
33 *
34 *	Fast copy routine.  Derived from aligned_block_copy.
35 */
36
37
38#include <mips/asm.h>
39#ifndef _LOCORE
40#define _LOCORE		/* XXX not really, just assembly-code source */
41#endif
42#include <machine/endian.h>
43
44
45#if defined(LIBC_SCCS) && !defined(lint)
46	ASMSTR("from: @(#)mips_bcopy.s	2.2 CMU 18/06/93")
47	ASMSTR("$NetBSD: bcopy.S,v 1.2 2005/12/27 11:23:53 tsutsui Exp $")
48#endif /* LIBC_SCCS and not lint */
49
50#ifdef __ABICALLS__
51	.abicalls
52#endif
53
54/*
55 *	bcopy(caddr_t src, caddr_t dst, unsigned int len)
56 *
57 *	a0 	src address
58 *	a1	dst address
59 *	a2	length
60 */
61
62#if defined(MEMCOPY) || defined(MEMMOVE)
63#ifdef MEMCOPY
64#define	FUNCTION	memcpy
65#else
66#define FUNCTION	memmove
67#endif
68#define	SRCREG		a1
69#define	DSTREG		a0
70#else
71#define	FUNCTION	bcopy
72#define	SRCREG		a0
73#define	DSTREG		a1
74#endif
75
76#define	SIZEREG		a2
77
78LEAF(FUNCTION)
79	.set	noat
80	.set	noreorder
81
82#if defined(MEMCOPY) || defined(MEMMOVE)
83	/* set up return value, while we still can */
84	move	v0,DSTREG
85#endif
86	/*
87	 *	Make sure we can copy forwards.
88	 */
89	sltu	t0,SRCREG,DSTREG	# t0 == SRCREG < DSTREG
90	bne	t0,zero,6f		# copy backwards
91
92	/*
93	 * 	There are four alignment cases (with frequency)
94	 *	(Based on measurements taken with a DECstation 5000/200
95	 *	 inside a Mach kernel.)
96	 *
97	 * 	aligned   -> aligned		(mostly)
98	 * 	unaligned -> aligned		(sometimes)
99	 * 	aligned,unaligned -> unaligned	(almost never)
100	 *
101	 *	Note that we could add another case that checks if
102	 *	the destination and source are unaligned but the
103	 *	copy is alignable.  eg if src and dest are both
104	 *	on a halfword boundary.
105	 */
106	andi	t1,DSTREG,3		# get last 3 bits of dest
107	bne	t1,zero,3f
108	andi	t0,SRCREG,3		# get last 3 bits of src
109	bne	t0,zero,5f
110
111	/*
112	 *	Forward aligned->aligned copy, 8*4 bytes at a time.
113	 */
114	li	AT,-32
115	and	t0,SIZEREG,AT		# count truncated to multiple of 32 */
116	addu	a3,SRCREG,t0		# run fast loop up to this address
117	sltu	AT,SRCREG,a3		# any work to do?
118	beq	AT,zero,2f
119	subu	SIZEREG,t0
120
121	/*
122	 *	loop body
123	 */
1241:	# cp
125	lw	t3,0(SRCREG)
126	lw	v1,4(SRCREG)
127	lw	t0,8(SRCREG)
128	lw	t1,12(SRCREG)
129	addu	SRCREG,32
130	sw	t3,0(DSTREG)
131	sw	v1,4(DSTREG)
132	sw	t0,8(DSTREG)
133	sw	t1,12(DSTREG)
134	lw	t1,-4(SRCREG)
135	lw	t0,-8(SRCREG)
136	lw	v1,-12(SRCREG)
137	lw	t3,-16(SRCREG)
138	addu	DSTREG,32
139	sw	t1,-4(DSTREG)
140	sw	t0,-8(DSTREG)
141	sw	v1,-12(DSTREG)
142	bne	SRCREG,a3,1b
143	sw	t3,-16(DSTREG)
144
145	/*
146	 *	Copy a word at a time, no loop unrolling.
147	 */
1482:	# wordcopy
149	andi	t2,SIZEREG,3		# get byte count / 4
150	subu	t2,SIZEREG,t2		# t2 = number of words to copy * 4
151	beq	t2,zero,3f
152	addu	t0,SRCREG,t2		# stop at t0
153	subu	SIZEREG,SIZEREG,t2
1541:
155	lw	t3,0(SRCREG)
156	addu	SRCREG,4
157	sw	t3,0(DSTREG)
158	bne	SRCREG,t0,1b
159	addu	DSTREG,4
160
1613:	# bytecopy
162	beq	SIZEREG,zero,4f	# nothing left to do?
163	nop
1641:
165	lb	t3,0(SRCREG)
166	addu	SRCREG,1
167	sb	t3,0(DSTREG)
168	subu	SIZEREG,1
169	bgtz	SIZEREG,1b
170	addu	DSTREG,1
171
1724:	# copydone
173	j	ra
174	nop
175
176	/*
177	 *	Copy from unaligned source to aligned dest.
178	 */
1795:	# destaligned
180	andi	t0,SIZEREG,3		# t0 = bytecount mod 4
181	subu	a3,SIZEREG,t0		# number of words to transfer
182	beq	a3,zero,3b
183	nop
184	move	SIZEREG,t0		# this many to do after we are done
185	addu	a3,SRCREG,a3		# stop point
186
1871:
188	LWHI	t3,0(SRCREG)
189	LWLO	t3,3(SRCREG)
190	addi	SRCREG,4
191	sw	t3,0(DSTREG)
192	bne	SRCREG,a3,1b
193	addi	DSTREG,4
194
195	j	3b
196	nop
197
1986:	# backcopy -- based on above
199	addu	SRCREG,SIZEREG
200	addu	DSTREG,SIZEREG
201	andi	t1,DSTREG,3		# get last 3 bits of dest
202	bne	t1,zero,3f
203	andi	t0,SRCREG,3		# get last 3 bits of src
204	bne	t0,zero,5f
205
206	/*
207	 *	Forward aligned->aligned copy, 8*4 bytes at a time.
208	 */
209	li	AT,-32
210	and	t0,SIZEREG,AT		# count truncated to multiple of 32
211	beq	t0,zero,2f		# any work to do?
212	subu	SIZEREG,t0
213	subu	a3,SRCREG,t0
214
215	/*
216	 *	loop body
217	 */
2181:	# cp
219	lw	t3,-16(SRCREG)
220	lw	v1,-12(SRCREG)
221	lw	t0,-8(SRCREG)
222	lw	t1,-4(SRCREG)
223	subu	SRCREG,32
224	sw	t3,-16(DSTREG)
225	sw	v1,-12(DSTREG)
226	sw	t0,-8(DSTREG)
227	sw	t1,-4(DSTREG)
228	lw	t1,12(SRCREG)
229	lw	t0,8(SRCREG)
230	lw	v1,4(SRCREG)
231	lw	t3,0(SRCREG)
232	subu	DSTREG,32
233	sw	t1,12(DSTREG)
234	sw	t0,8(DSTREG)
235	sw	v1,4(DSTREG)
236	bne	SRCREG,a3,1b
237	sw	t3,0(DSTREG)
238
239	/*
240	 *	Copy a word at a time, no loop unrolling.
241	 */
2422:	# wordcopy
243	andi	t2,SIZEREG,3		# get byte count / 4
244	subu	t2,SIZEREG,t2		# t2 = number of words to copy * 4
245	beq	t2,zero,3f
246	subu	t0,SRCREG,t2		# stop at t0
247	subu	SIZEREG,SIZEREG,t2
2481:
249	lw	t3,-4(SRCREG)
250	subu	SRCREG,4
251	sw	t3,-4(DSTREG)
252	bne	SRCREG,t0,1b
253	subu	DSTREG,4
254
2553:	# bytecopy
256	beq	SIZEREG,zero,4f		# nothing left to do?
257	nop
2581:
259	lb	t3,-1(SRCREG)
260	subu	SRCREG,1
261	sb	t3,-1(DSTREG)
262	subu	SIZEREG,1
263	bgtz	SIZEREG,1b
264	subu	DSTREG,1
265
2664:	# copydone
267	j	ra
268	nop
269
270	/*
271	 *	Copy from unaligned source to aligned dest.
272	 */
2735:	# destaligned
274	andi	t0,SIZEREG,3		# t0 = bytecount mod 4
275	subu	a3,SIZEREG,t0		# number of words to transfer
276	beq	a3,zero,3b
277	nop
278	move	SIZEREG,t0		# this many to do after we are done
279	subu	a3,SRCREG,a3		# stop point
280
2811:
282	LWHI	t3,-4(SRCREG)
283	LWLO	t3,-1(SRCREG)
284	subu	SRCREG,4
285	sw	t3,-4(DSTREG)
286	bne	SRCREG,a3,1b
287	subu	DSTREG,4
288
289	j	3b
290	nop
291
292	.set	reorder
293	.set	at
294	END(FUNCTION)
295