xref: /netbsd-src/common/lib/libc/arch/mips/string/bcopy.S (revision 4c44c335c5f843ea8f2463e3c9aa46eeb8162a1d)
1*4c44c335Sbouyer/*	$NetBSD: bcopy.S,v 1.4 2011/08/27 13:23:52 bouyer Exp $	*/
2734fc1f9Schristos
3734fc1f9Schristos/*
4734fc1f9Schristos * Mach Operating System
5734fc1f9Schristos * Copyright (c) 1993 Carnegie Mellon University
6734fc1f9Schristos * All Rights Reserved.
7734fc1f9Schristos *
8734fc1f9Schristos * Permission to use, copy, modify and distribute this software and its
9734fc1f9Schristos * documentation is hereby granted, provided that both the copyright
10734fc1f9Schristos * notice and this permission notice appear in all copies of the
11734fc1f9Schristos * software, derivative works or modified versions, and any portions
12734fc1f9Schristos * thereof, and that both notices appear in supporting documentation.
13734fc1f9Schristos *
14734fc1f9Schristos * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
15734fc1f9Schristos * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
16734fc1f9Schristos * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
17734fc1f9Schristos *
18734fc1f9Schristos * Carnegie Mellon requests users of this software to return to
19734fc1f9Schristos *
20734fc1f9Schristos *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
21734fc1f9Schristos *  School of Computer Science
22734fc1f9Schristos *  Carnegie Mellon University
23734fc1f9Schristos *  Pittsburgh PA 15213-3890
24734fc1f9Schristos *
25734fc1f9Schristos * any improvements or extensions that they make and grant Carnegie Mellon
26734fc1f9Schristos * the rights to redistribute these changes.
27734fc1f9Schristos */
28734fc1f9Schristos
29734fc1f9Schristos/*
30734fc1f9Schristos *	File:	mips_bcopy.s
31734fc1f9Schristos *	Author:	Chris Maeda
32734fc1f9Schristos *	Date:	June 1993
33734fc1f9Schristos *
34734fc1f9Schristos *	Fast copy routine.  Derived from aligned_block_copy.
35734fc1f9Schristos */
36734fc1f9Schristos
37734fc1f9Schristos
38734fc1f9Schristos#include <mips/asm.h>
39d5f01723Stsutsui#ifndef _LOCORE
40734fc1f9Schristos#define _LOCORE		/* XXX not really, just assembly-code source */
41d5f01723Stsutsui#endif
42734fc1f9Schristos#include <machine/endian.h>
43734fc1f9Schristos
44734fc1f9Schristos
45734fc1f9Schristos#if defined(LIBC_SCCS) && !defined(lint)
468daf714eSmatt#if 0
478daf714eSmatt	RCSID("from: @(#)mips_bcopy.s	2.2 CMU 18/06/93")
488daf714eSmatt#else
49*4c44c335Sbouyer	RCSID("$NetBSD: bcopy.S,v 1.4 2011/08/27 13:23:52 bouyer Exp $")
50734fc1f9Schristos#endif
518daf714eSmatt#endif /* LIBC_SCCS and not lint */
52734fc1f9Schristos
53734fc1f9Schristos/*
54734fc1f9Schristos *	bcopy(caddr_t src, caddr_t dst, unsigned int len)
55734fc1f9Schristos *
56734fc1f9Schristos *	a0 	src address
57734fc1f9Schristos *	a1	dst address
58734fc1f9Schristos *	a2	length
59734fc1f9Schristos */
60734fc1f9Schristos
61734fc1f9Schristos#if defined(MEMCOPY) || defined(MEMMOVE)
62734fc1f9Schristos#ifdef MEMCOPY
63734fc1f9Schristos#define	FUNCTION	memcpy
64734fc1f9Schristos#else
65734fc1f9Schristos#define FUNCTION	memmove
66734fc1f9Schristos#endif
67734fc1f9Schristos#define	SRCREG		a1
68734fc1f9Schristos#define	DSTREG		a0
69734fc1f9Schristos#else
70734fc1f9Schristos#define	FUNCTION	bcopy
71734fc1f9Schristos#define	SRCREG		a0
72734fc1f9Schristos#define	DSTREG		a1
73734fc1f9Schristos#endif
74734fc1f9Schristos
75734fc1f9Schristos#define	SIZEREG		a2
76734fc1f9Schristos
77734fc1f9SchristosLEAF(FUNCTION)
78734fc1f9Schristos	.set	noat
79734fc1f9Schristos	.set	noreorder
80734fc1f9Schristos
81734fc1f9Schristos#if defined(MEMCOPY) || defined(MEMMOVE)
82734fc1f9Schristos	/* set up return value, while we still can */
83734fc1f9Schristos	move	v0,DSTREG
84734fc1f9Schristos#endif
85734fc1f9Schristos	/*
86734fc1f9Schristos	 *	Make sure we can copy forwards.
87734fc1f9Schristos	 */
88734fc1f9Schristos	sltu	t0,SRCREG,DSTREG	# t0 == SRCREG < DSTREG
89734fc1f9Schristos	bne	t0,zero,6f		# copy backwards
90734fc1f9Schristos
91734fc1f9Schristos	/*
92734fc1f9Schristos	 * 	There are four alignment cases (with frequency)
93734fc1f9Schristos	 *	(Based on measurements taken with a DECstation 5000/200
94734fc1f9Schristos	 *	 inside a Mach kernel.)
95734fc1f9Schristos	 *
96734fc1f9Schristos	 * 	aligned   -> aligned		(mostly)
97734fc1f9Schristos	 * 	unaligned -> aligned		(sometimes)
98734fc1f9Schristos	 * 	aligned,unaligned -> unaligned	(almost never)
99734fc1f9Schristos	 *
100734fc1f9Schristos	 *	Note that we could add another case that checks if
101734fc1f9Schristos	 *	the destination and source are unaligned but the
102734fc1f9Schristos	 *	copy is alignable.  eg if src and dest are both
103734fc1f9Schristos	 *	on a halfword boundary.
104734fc1f9Schristos	 */
1058daf714eSmatt	andi		t1,DSTREG,(SZREG-1)	# get last bits of dest
1068daf714eSmatt	bne		t1,zero,3f		# dest unaligned
1078daf714eSmatt	andi		t0,SRCREG,(SZREG-1)	# get last bits of src
108734fc1f9Schristos	bne		t0,zero,5f
109734fc1f9Schristos
110734fc1f9Schristos	/*
1118daf714eSmatt	 *	Forward aligned->aligned copy, 8 words at a time.
112734fc1f9Schristos	 */
1138daf714eSmatt98:
1148daf714eSmatt	li		AT,-(SZREG*8)
1158daf714eSmatt	and		t0,SIZEREG,AT		# count truncated to multiples
1168daf714eSmatt	PTR_ADDU	a3,SRCREG,t0		# run fast loop up to this addr
117734fc1f9Schristos	sltu		AT,SRCREG,a3		# any work to do?
118734fc1f9Schristos	beq		AT,zero,2f
1198daf714eSmatt	PTR_SUBU	SIZEREG,t0
120734fc1f9Schristos
121734fc1f9Schristos	/*
122734fc1f9Schristos	 *	loop body
123734fc1f9Schristos	 */
124734fc1f9Schristos1:	# cp
1258daf714eSmatt	REG_L		t3,(0*SZREG)(SRCREG)
1268daf714eSmatt	REG_L		v1,(1*SZREG)(SRCREG)
1278daf714eSmatt	REG_L		t0,(2*SZREG)(SRCREG)
1288daf714eSmatt	REG_L		t1,(3*SZREG)(SRCREG)
1298daf714eSmatt	PTR_ADDU	SRCREG,SZREG*8
1308daf714eSmatt	REG_S		t3,(0*SZREG)(DSTREG)
1318daf714eSmatt	REG_S		v1,(1*SZREG)(DSTREG)
1328daf714eSmatt	REG_S		t0,(2*SZREG)(DSTREG)
1338daf714eSmatt	REG_S		t1,(3*SZREG)(DSTREG)
1348daf714eSmatt	REG_L		t1,(-1*SZREG)(SRCREG)
1358daf714eSmatt	REG_L		t0,(-2*SZREG)(SRCREG)
1368daf714eSmatt	REG_L		v1,(-3*SZREG)(SRCREG)
1378daf714eSmatt	REG_L		t3,(-4*SZREG)(SRCREG)
1388daf714eSmatt	PTR_ADDU	DSTREG,SZREG*8
1398daf714eSmatt	REG_S		t1,(-1*SZREG)(DSTREG)
1408daf714eSmatt	REG_S		t0,(-2*SZREG)(DSTREG)
1418daf714eSmatt	REG_S		v1,(-3*SZREG)(DSTREG)
142734fc1f9Schristos	bne		SRCREG,a3,1b
1438daf714eSmatt	REG_S		t3,(-4*SZREG)(DSTREG)
144734fc1f9Schristos
145734fc1f9Schristos	/*
146734fc1f9Schristos	 *	Copy a word at a time, no loop unrolling.
147734fc1f9Schristos	 */
148734fc1f9Schristos2:	# wordcopy
1498daf714eSmatt	andi		t2,SIZEREG,(SZREG-1)	# get byte count / SZREG
1508daf714eSmatt	PTR_SUBU	t2,SIZEREG,t2		# t2 = words to copy * SZREG
151734fc1f9Schristos	beq		t2,zero,3f
1528daf714eSmatt	PTR_ADDU	t0,SRCREG,t2		# stop at t0
1538daf714eSmatt	PTR_SUBU	SIZEREG,SIZEREG,t2
154734fc1f9Schristos1:
1558daf714eSmatt	REG_L		t3,0(SRCREG)
1568daf714eSmatt	PTR_ADDU	SRCREG,SZREG
1578daf714eSmatt	REG_S		t3,0(DSTREG)
158734fc1f9Schristos	bne		SRCREG,t0,1b
1598daf714eSmatt	PTR_ADDU	DSTREG,SZREG
160734fc1f9Schristos
161734fc1f9Schristos3:	# bytecopy
162734fc1f9Schristos	beq		SIZEREG,zero,4f		# nothing left to do?
163734fc1f9Schristos	nop
164734fc1f9Schristos1:
165734fc1f9Schristos	lb		t3,0(SRCREG)
1668daf714eSmatt	PTR_ADDU	SRCREG,1
167734fc1f9Schristos	sb		t3,0(DSTREG)
1688daf714eSmatt	PTR_SUBU	SIZEREG,1
169734fc1f9Schristos	bgtz		SIZEREG,1b
1708daf714eSmatt	PTR_ADDU	DSTREG,1
171734fc1f9Schristos
172734fc1f9Schristos4:	# copydone
173*4c44c335Sbouyer	.set at		#-mfix-loongson2f-btb
174734fc1f9Schristos	j	ra
175734fc1f9Schristos	nop
176*4c44c335Sbouyer	.set noat
177734fc1f9Schristos
178734fc1f9Schristos	/*
179734fc1f9Schristos	 *	Copy from unaligned source to aligned dest.
180734fc1f9Schristos	 */
181734fc1f9Schristos5:	# destaligned
1828daf714eSmatt	andi		t0,SIZEREG,(SZREG-1)	# t0 = bytecount mod SZREG
1838daf714eSmatt	PTR_SUBU	a3,SIZEREG,t0		# number of words to transfer
184734fc1f9Schristos	beq		a3,zero,3b
185734fc1f9Schristos	nop
186734fc1f9Schristos	move		SIZEREG,t0		# this many to do after we are done
1878daf714eSmatt	PTR_ADDU	a3,SRCREG,a3		# stop point
188734fc1f9Schristos
189734fc1f9Schristos1:
1908daf714eSmatt	REG_LHI		t3,0(SRCREG)
1918daf714eSmatt	REG_LLO		t3,SZREG-1(SRCREG)
1928daf714eSmatt	PTR_ADDI	SRCREG,SZREG
1938daf714eSmatt	REG_S		t3,0(DSTREG)
194734fc1f9Schristos	bne		SRCREG,a3,1b
1958daf714eSmatt	PTR_ADDI	DSTREG,SZREG
196734fc1f9Schristos
1978daf714eSmatt	b		3b
198734fc1f9Schristos	nop
199734fc1f9Schristos
200734fc1f9Schristos6:	# backcopy -- based on above
2018daf714eSmatt	PTR_ADDU	SRCREG,SIZEREG
2028daf714eSmatt	PTR_ADDU	DSTREG,SIZEREG
2038daf714eSmatt	andi		t1,DSTREG,SZREG-1	# get last 3 bits of dest
204734fc1f9Schristos	bne		t1,zero,3f
2058daf714eSmatt	andi		t0,SRCREG,SZREG-1	# get last 3 bits of src
206734fc1f9Schristos	bne		t0,zero,5f
207734fc1f9Schristos
208734fc1f9Schristos	/*
209734fc1f9Schristos	 *	Forward aligned->aligned copy, 8*4 bytes at a time.
210734fc1f9Schristos	 */
2118daf714eSmatt	li		AT,(-8*SZREG)
212734fc1f9Schristos	and		t0,SIZEREG,AT		# count truncated to multiple of 32
213734fc1f9Schristos	beq		t0,zero,2f		# any work to do?
2148daf714eSmatt	PTR_SUBU	SIZEREG,t0
2158daf714eSmatt	PTR_SUBU	a3,SRCREG,t0
216734fc1f9Schristos
217734fc1f9Schristos	/*
218734fc1f9Schristos	 *	loop body
219734fc1f9Schristos	 */
220734fc1f9Schristos1:	# cp
2218daf714eSmatt	REG_L		t3,(-4*SZREG)(SRCREG)
2228daf714eSmatt	REG_L		v1,(-3*SZREG)(SRCREG)
2238daf714eSmatt	REG_L		t0,(-2*SZREG)(SRCREG)
2248daf714eSmatt	REG_L		t1,(-1*SZREG)(SRCREG)
2258daf714eSmatt	PTR_SUBU	SRCREG,8*SZREG
2268daf714eSmatt	REG_S		t3,(-4*SZREG)(DSTREG)
2278daf714eSmatt	REG_S		v1,(-3*SZREG)(DSTREG)
2288daf714eSmatt	REG_S		t0,(-2*SZREG)(DSTREG)
2298daf714eSmatt	REG_S		t1,(-1*SZREG)(DSTREG)
2308daf714eSmatt	REG_L		t1,(3*SZREG)(SRCREG)
2318daf714eSmatt	REG_L		t0,(2*SZREG)(SRCREG)
2328daf714eSmatt	REG_L		v1,(1*SZREG)(SRCREG)
2338daf714eSmatt	REG_L		t3,(0*SZREG)(SRCREG)
2348daf714eSmatt	PTR_SUBU	DSTREG,8*SZREG
2358daf714eSmatt	REG_S		t1,(3*SZREG)(DSTREG)
2368daf714eSmatt	REG_S		t0,(2*SZREG)(DSTREG)
2378daf714eSmatt	REG_S		v1,(1*SZREG)(DSTREG)
238734fc1f9Schristos	bne		SRCREG,a3,1b
2398daf714eSmatt	REG_S		t3,(0*SZREG)(DSTREG)
240734fc1f9Schristos
241734fc1f9Schristos	/*
242734fc1f9Schristos	 *	Copy a word at a time, no loop unrolling.
243734fc1f9Schristos	 */
244734fc1f9Schristos2:	# wordcopy
2458daf714eSmatt	andi		t2,SIZEREG,SZREG-1	# get byte count / 4
2468daf714eSmatt	PTR_SUBU	t2,SIZEREG,t2		# t2 = number of words to copy
247734fc1f9Schristos	beq		t2,zero,3f
2488daf714eSmatt	PTR_SUBU	t0,SRCREG,t2		# stop at t0
2498daf714eSmatt	PTR_SUBU	SIZEREG,SIZEREG,t2
250734fc1f9Schristos1:
2518daf714eSmatt	REG_L		t3,-SZREG(SRCREG)
2528daf714eSmatt	PTR_SUBU	SRCREG,SZREG
2538daf714eSmatt	REG_S		t3,-SZREG(DSTREG)
254734fc1f9Schristos	bne		SRCREG,t0,1b
2558daf714eSmatt	PTR_SUBU	DSTREG,SZREG
256734fc1f9Schristos
257734fc1f9Schristos3:	# bytecopy
258734fc1f9Schristos	beq		SIZEREG,zero,4f		# nothing left to do?
259734fc1f9Schristos	nop
260734fc1f9Schristos1:
261734fc1f9Schristos	lb		t3,-1(SRCREG)
2628daf714eSmatt	PTR_SUBU	SRCREG,1
263734fc1f9Schristos	sb		t3,-1(DSTREG)
2648daf714eSmatt	PTR_SUBU	SIZEREG,1
265734fc1f9Schristos	bgtz		SIZEREG,1b
2668daf714eSmatt	PTR_SUBU	DSTREG,1
267734fc1f9Schristos
268734fc1f9Schristos4:	# copydone
269*4c44c335Sbouyer	.set at		#-mfix-loongson2f-btb
270734fc1f9Schristos	j	ra
271734fc1f9Schristos	nop
272*4c44c335Sbouyer	.set noat
273734fc1f9Schristos
274734fc1f9Schristos	/*
275734fc1f9Schristos	 *	Copy from unaligned source to aligned dest.
276734fc1f9Schristos	 */
277734fc1f9Schristos5:	# destaligned
2788daf714eSmatt	andi		t0,SIZEREG,SZREG-1	# t0 = bytecount mod 4
2798daf714eSmatt	PTR_SUBU	a3,SIZEREG,t0		# number of words to transfer
280734fc1f9Schristos	beq		a3,zero,3b
281734fc1f9Schristos	nop
282734fc1f9Schristos	move		SIZEREG,t0		# this many to do after we are done
2838daf714eSmatt	PTR_SUBU	a3,SRCREG,a3		# stop point
284734fc1f9Schristos
285734fc1f9Schristos1:
2868daf714eSmatt	REG_LHI		t3,-SZREG(SRCREG)
2878daf714eSmatt	REG_LLO		t3,-1(SRCREG)
2888daf714eSmatt	PTR_SUBU	SRCREG,SZREG
2898daf714eSmatt	REG_S		t3,-SZREG(DSTREG)
290734fc1f9Schristos	bne		SRCREG,a3,1b
2918daf714eSmatt	PTR_SUBU	DSTREG,SZREG
292734fc1f9Schristos
2938daf714eSmatt	b		3b
294734fc1f9Schristos	nop
295734fc1f9Schristos
296734fc1f9Schristos	.set	reorder
297734fc1f9Schristos	.set	at
298734fc1f9Schristos	END(FUNCTION)
299