xref: /netbsd-src/common/lib/libc/arch/alpha/string/bcopy.S (revision 37c9f0a654f1f14465cded15c4577522438c5585)
1*37c9f0a6Schristos/* $NetBSD: bcopy.S,v 1.1 2005/12/20 19:28:49 christos Exp $ */
2*37c9f0a6Schristos
3*37c9f0a6Schristos/*
4*37c9f0a6Schristos * Copyright (c) 1995 Carnegie-Mellon University.
5*37c9f0a6Schristos * All rights reserved.
6*37c9f0a6Schristos *
7*37c9f0a6Schristos * Author: Trevor Blackwell.  Support for use as memcpy() and memmove()
8*37c9f0a6Schristos *	   added by Chris Demetriou.
9*37c9f0a6Schristos *
10*37c9f0a6Schristos * Permission to use, copy, modify and distribute this software and
11*37c9f0a6Schristos * its documentation is hereby granted, provided that both the copyright
12*37c9f0a6Schristos * notice and this permission notice appear in all copies of the
13*37c9f0a6Schristos * software, derivative works or modified versions, and any portions
14*37c9f0a6Schristos * thereof, and that both notices appear in supporting documentation.
15*37c9f0a6Schristos *
16*37c9f0a6Schristos * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
17*37c9f0a6Schristos * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
18*37c9f0a6Schristos * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
19*37c9f0a6Schristos *
20*37c9f0a6Schristos * Carnegie Mellon requests users of this software to return to
21*37c9f0a6Schristos *
22*37c9f0a6Schristos *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
23*37c9f0a6Schristos *  School of Computer Science
24*37c9f0a6Schristos *  Carnegie Mellon University
25*37c9f0a6Schristos *  Pittsburgh PA 15213-3890
26*37c9f0a6Schristos *
27*37c9f0a6Schristos * any improvements or extensions that they make and grant Carnegie the
28*37c9f0a6Schristos * rights to redistribute these changes.
29*37c9f0a6Schristos */
30*37c9f0a6Schristos
31*37c9f0a6Schristos#include <machine/asm.h>
32*37c9f0a6Schristos
33*37c9f0a6Schristos#if defined(MEMCOPY) || defined(MEMMOVE)
34*37c9f0a6Schristos#ifdef MEMCOPY
35*37c9f0a6Schristos#define	FUNCTION	memcpy
36*37c9f0a6Schristos#else
37*37c9f0a6Schristos#define FUNCTION	memmove
38*37c9f0a6Schristos#endif
39*37c9f0a6Schristos#define	SRCREG		a1
40*37c9f0a6Schristos#define	DSTREG		a0
41*37c9f0a6Schristos#else /* !(defined(MEMCOPY) || defined(MEMMOVE)) */
42*37c9f0a6Schristos#define	FUNCTION	bcopy
43*37c9f0a6Schristos#define	SRCREG		a0
44*37c9f0a6Schristos#define	DSTREG		a1
45*37c9f0a6Schristos#endif /* !(defined(MEMCOPY) || defined(MEMMOVE)) */
46*37c9f0a6Schristos
47*37c9f0a6Schristos#define	SIZEREG		a2
48*37c9f0a6Schristos
49*37c9f0a6Schristos/*
50*37c9f0a6Schristos * Copy bytes.
51*37c9f0a6Schristos *
52*37c9f0a6Schristos * void bcopy(char *from, char *to, size_t len);
53*37c9f0a6Schristos * char *memcpy(void *to, const void *from, size_t len);
54*37c9f0a6Schristos * char *memmove(void *to, const void *from, size_t len);
55*37c9f0a6Schristos *
56*37c9f0a6Schristos * No matter how invoked, the source and destination registers
57*37c9f0a6Schristos * for calculation.  There's no point in copying them to "working"
58*37c9f0a6Schristos * registers, since the code uses their values "in place," and
59*37c9f0a6Schristos * copying them would be slower.
60*37c9f0a6Schristos */
61*37c9f0a6Schristos
62*37c9f0a6SchristosLEAF(FUNCTION,3)
63*37c9f0a6Schristos
64*37c9f0a6Schristos#if defined(MEMCOPY) || defined(MEMMOVE)
65*37c9f0a6Schristos	/* set up return value, while we still can */
66*37c9f0a6Schristos	mov	DSTREG,v0
67*37c9f0a6Schristos#endif
68*37c9f0a6Schristos
69*37c9f0a6Schristos	/* Check for negative length */
70*37c9f0a6Schristos	ble	SIZEREG,bcopy_done
71*37c9f0a6Schristos
72*37c9f0a6Schristos	/* Check for overlap */
73*37c9f0a6Schristos	subq	DSTREG,SRCREG,t5
74*37c9f0a6Schristos	cmpult	t5,SIZEREG,t5
75*37c9f0a6Schristos	bne	t5,bcopy_overlap
76*37c9f0a6Schristos
77*37c9f0a6Schristos	/* a3 = end address */
78*37c9f0a6Schristos	addq	SRCREG,SIZEREG,a3
79*37c9f0a6Schristos
80*37c9f0a6Schristos	/* Get the first word */
81*37c9f0a6Schristos	ldq_u	t2,0(SRCREG)
82*37c9f0a6Schristos
83*37c9f0a6Schristos	/* Do they have the same alignment? */
84*37c9f0a6Schristos	xor	SRCREG,DSTREG,t0
85*37c9f0a6Schristos	and	t0,7,t0
86*37c9f0a6Schristos	and	DSTREG,7,t1
87*37c9f0a6Schristos	bne	t0,bcopy_different_alignment
88*37c9f0a6Schristos
89*37c9f0a6Schristos	/* src & dst have same alignment */
90*37c9f0a6Schristos	beq	t1,bcopy_all_aligned
91*37c9f0a6Schristos
92*37c9f0a6Schristos	ldq_u	t3,0(DSTREG)
93*37c9f0a6Schristos	addq	SIZEREG,t1,SIZEREG
94*37c9f0a6Schristos	mskqh	t2,SRCREG,t2
95*37c9f0a6Schristos	mskql	t3,SRCREG,t3
96*37c9f0a6Schristos	or	t2,t3,t2
97*37c9f0a6Schristos
98*37c9f0a6Schristos	/* Dst is 8-byte aligned */
99*37c9f0a6Schristos
100*37c9f0a6Schristosbcopy_all_aligned:
101*37c9f0a6Schristos	/* If less than 8 bytes,skip loop */
102*37c9f0a6Schristos	subq	SIZEREG,1,t0
103*37c9f0a6Schristos	and	SIZEREG,7,SIZEREG
104*37c9f0a6Schristos	bic	t0,7,t0
105*37c9f0a6Schristos	beq	t0,bcopy_samealign_lp_end
106*37c9f0a6Schristos
107*37c9f0a6Schristosbcopy_samealign_lp:
108*37c9f0a6Schristos	stq_u	t2,0(DSTREG)
109*37c9f0a6Schristos	addq	DSTREG,8,DSTREG
110*37c9f0a6Schristos	ldq_u	t2,8(SRCREG)
111*37c9f0a6Schristos	subq	t0,8,t0
112*37c9f0a6Schristos	addq	SRCREG,8,SRCREG
113*37c9f0a6Schristos	bne	t0,bcopy_samealign_lp
114*37c9f0a6Schristos
115*37c9f0a6Schristosbcopy_samealign_lp_end:
116*37c9f0a6Schristos	/* If we're done, exit */
117*37c9f0a6Schristos	bne	SIZEREG,bcopy_small_left
118*37c9f0a6Schristos	stq_u	t2,0(DSTREG)
119*37c9f0a6Schristos	RET
120*37c9f0a6Schristos
121*37c9f0a6Schristosbcopy_small_left:
122*37c9f0a6Schristos	mskql	t2,SIZEREG,t4
123*37c9f0a6Schristos	ldq_u	t3,0(DSTREG)
124*37c9f0a6Schristos	mskqh	t3,SIZEREG,t3
125*37c9f0a6Schristos	or	t4,t3,t4
126*37c9f0a6Schristos	stq_u	t4,0(DSTREG)
127*37c9f0a6Schristos	RET
128*37c9f0a6Schristos
129*37c9f0a6Schristosbcopy_different_alignment:
130*37c9f0a6Schristos	/*
131*37c9f0a6Schristos	 * this is the fun part
132*37c9f0a6Schristos	 */
133*37c9f0a6Schristos	addq	SRCREG,SIZEREG,a3
134*37c9f0a6Schristos	cmpule	SIZEREG,8,t0
135*37c9f0a6Schristos	bne	t0,bcopy_da_finish
136*37c9f0a6Schristos
137*37c9f0a6Schristos	beq	t1,bcopy_da_noentry
138*37c9f0a6Schristos
139*37c9f0a6Schristos	/* Do the initial partial word */
140*37c9f0a6Schristos	subq	zero,DSTREG,t0
141*37c9f0a6Schristos	and	t0,7,t0
142*37c9f0a6Schristos	ldq_u	t3,7(SRCREG)
143*37c9f0a6Schristos	extql	t2,SRCREG,t2
144*37c9f0a6Schristos	extqh	t3,SRCREG,t3
145*37c9f0a6Schristos	or	t2,t3,t5
146*37c9f0a6Schristos	insql	t5,DSTREG,t5
147*37c9f0a6Schristos	ldq_u	t6,0(DSTREG)
148*37c9f0a6Schristos	mskql	t6,DSTREG,t6
149*37c9f0a6Schristos	or	t5,t6,t5
150*37c9f0a6Schristos	stq_u	t5,0(DSTREG)
151*37c9f0a6Schristos	addq	SRCREG,t0,SRCREG
152*37c9f0a6Schristos	addq	DSTREG,t0,DSTREG
153*37c9f0a6Schristos	subq	SIZEREG,t0,SIZEREG
154*37c9f0a6Schristos	ldq_u	t2,0(SRCREG)
155*37c9f0a6Schristos
156*37c9f0a6Schristosbcopy_da_noentry:
157*37c9f0a6Schristos	subq	SIZEREG,1,t0
158*37c9f0a6Schristos	bic	t0,7,t0
159*37c9f0a6Schristos	and	SIZEREG,7,SIZEREG
160*37c9f0a6Schristos	beq	t0,bcopy_da_finish2
161*37c9f0a6Schristos
162*37c9f0a6Schristosbcopy_da_lp:
163*37c9f0a6Schristos	ldq_u	t3,7(SRCREG)
164*37c9f0a6Schristos	addq	SRCREG,8,SRCREG
165*37c9f0a6Schristos	extql	t2,SRCREG,t4
166*37c9f0a6Schristos	extqh	t3,SRCREG,t5
167*37c9f0a6Schristos	subq	t0,8,t0
168*37c9f0a6Schristos	or	t4,t5,t5
169*37c9f0a6Schristos	stq	t5,0(DSTREG)
170*37c9f0a6Schristos	addq	DSTREG,8,DSTREG
171*37c9f0a6Schristos	beq	t0,bcopy_da_finish1
172*37c9f0a6Schristos	ldq_u	t2,7(SRCREG)
173*37c9f0a6Schristos	addq	SRCREG,8,SRCREG
174*37c9f0a6Schristos	extql	t3,SRCREG,t4
175*37c9f0a6Schristos	extqh	t2,SRCREG,t5
176*37c9f0a6Schristos	subq	t0,8,t0
177*37c9f0a6Schristos	or	t4,t5,t5
178*37c9f0a6Schristos	stq	t5,0(DSTREG)
179*37c9f0a6Schristos	addq	DSTREG,8,DSTREG
180*37c9f0a6Schristos	bne	t0,bcopy_da_lp
181*37c9f0a6Schristos
182*37c9f0a6Schristosbcopy_da_finish2:
183*37c9f0a6Schristos	/* Do the last new word */
184*37c9f0a6Schristos	mov	t2,t3
185*37c9f0a6Schristos
186*37c9f0a6Schristosbcopy_da_finish1:
187*37c9f0a6Schristos	/* Do the last partial word */
188*37c9f0a6Schristos	ldq_u	t2,-1(a3)
189*37c9f0a6Schristos	extql	t3,SRCREG,t3
190*37c9f0a6Schristos	extqh	t2,SRCREG,t2
191*37c9f0a6Schristos	or	t2,t3,t2
192*37c9f0a6Schristos	br	zero,bcopy_samealign_lp_end
193*37c9f0a6Schristos
194*37c9f0a6Schristosbcopy_da_finish:
195*37c9f0a6Schristos	/* Do the last word in the next source word */
196*37c9f0a6Schristos	ldq_u	t3,-1(a3)
197*37c9f0a6Schristos	extql	t2,SRCREG,t2
198*37c9f0a6Schristos	extqh	t3,SRCREG,t3
199*37c9f0a6Schristos	or	t2,t3,t2
200*37c9f0a6Schristos	insqh	t2,DSTREG,t3
201*37c9f0a6Schristos	insql	t2,DSTREG,t2
202*37c9f0a6Schristos	lda	t4,-1(zero)
203*37c9f0a6Schristos	mskql	t4,SIZEREG,t5
204*37c9f0a6Schristos	cmovne	t5,t5,t4
205*37c9f0a6Schristos	insqh	t4,DSTREG,t5
206*37c9f0a6Schristos	insql	t4,DSTREG,t4
207*37c9f0a6Schristos	addq	DSTREG,SIZEREG,a4
208*37c9f0a6Schristos	ldq_u	t6,0(DSTREG)
209*37c9f0a6Schristos	ldq_u	t7,-1(a4)
210*37c9f0a6Schristos	bic	t6,t4,t6
211*37c9f0a6Schristos	bic	t7,t5,t7
212*37c9f0a6Schristos	and	t2,t4,t2
213*37c9f0a6Schristos	and	t3,t5,t3
214*37c9f0a6Schristos	or	t2,t6,t2
215*37c9f0a6Schristos	or	t3,t7,t3
216*37c9f0a6Schristos	stq_u	t3,-1(a4)
217*37c9f0a6Schristos	stq_u	t2,0(DSTREG)
218*37c9f0a6Schristos	RET
219*37c9f0a6Schristos
220*37c9f0a6Schristosbcopy_overlap:
221*37c9f0a6Schristos	/*
222*37c9f0a6Schristos	 * Basically equivalent to previous case, only backwards.
223*37c9f0a6Schristos	 * Not quite as highly optimized
224*37c9f0a6Schristos	 */
225*37c9f0a6Schristos	addq	SRCREG,SIZEREG,a3
226*37c9f0a6Schristos	addq	DSTREG,SIZEREG,a4
227*37c9f0a6Schristos
228*37c9f0a6Schristos	/* less than 8 bytes - don't worry about overlap */
229*37c9f0a6Schristos	cmpule	SIZEREG,8,t0
230*37c9f0a6Schristos	bne	t0,bcopy_ov_short
231*37c9f0a6Schristos
232*37c9f0a6Schristos	/* Possibly do a partial first word */
233*37c9f0a6Schristos	and	a4,7,t4
234*37c9f0a6Schristos	beq	t4,bcopy_ov_nostart2
235*37c9f0a6Schristos	subq	a3,t4,a3
236*37c9f0a6Schristos	subq	a4,t4,a4
237*37c9f0a6Schristos	ldq_u	t1,0(a3)
238*37c9f0a6Schristos	subq	SIZEREG,t4,SIZEREG
239*37c9f0a6Schristos	ldq_u	t2,7(a3)
240*37c9f0a6Schristos	ldq	t3,0(a4)
241*37c9f0a6Schristos	extql	t1,a3,t1
242*37c9f0a6Schristos	extqh	t2,a3,t2
243*37c9f0a6Schristos	or	t1,t2,t1
244*37c9f0a6Schristos	mskqh	t3,t4,t3
245*37c9f0a6Schristos	mskql	t1,t4,t1
246*37c9f0a6Schristos	or	t1,t3,t1
247*37c9f0a6Schristos	stq	t1,0(a4)
248*37c9f0a6Schristos
249*37c9f0a6Schristosbcopy_ov_nostart2:
250*37c9f0a6Schristos	bic	SIZEREG,7,t4
251*37c9f0a6Schristos	and	SIZEREG,7,SIZEREG
252*37c9f0a6Schristos	beq	t4,bcopy_ov_lp_end
253*37c9f0a6Schristos
254*37c9f0a6Schristosbcopy_ov_lp:
255*37c9f0a6Schristos	/* This could be more pipelined, but it doesn't seem worth it */
256*37c9f0a6Schristos	ldq_u	t0,-8(a3)
257*37c9f0a6Schristos	subq	a4,8,a4
258*37c9f0a6Schristos	ldq_u	t1,-1(a3)
259*37c9f0a6Schristos	subq	a3,8,a3
260*37c9f0a6Schristos	extql	t0,a3,t0
261*37c9f0a6Schristos	extqh	t1,a3,t1
262*37c9f0a6Schristos	subq	t4,8,t4
263*37c9f0a6Schristos	or	t0,t1,t0
264*37c9f0a6Schristos	stq	t0,0(a4)
265*37c9f0a6Schristos	bne	t4,bcopy_ov_lp
266*37c9f0a6Schristos
267*37c9f0a6Schristosbcopy_ov_lp_end:
268*37c9f0a6Schristos	beq	SIZEREG,bcopy_done
269*37c9f0a6Schristos
270*37c9f0a6Schristos	ldq_u	t0,0(SRCREG)
271*37c9f0a6Schristos	ldq_u	t1,7(SRCREG)
272*37c9f0a6Schristos	ldq_u	t2,0(DSTREG)
273*37c9f0a6Schristos	extql	t0,SRCREG,t0
274*37c9f0a6Schristos	extqh	t1,SRCREG,t1
275*37c9f0a6Schristos	or	t0,t1,t0
276*37c9f0a6Schristos	insql	t0,DSTREG,t0
277*37c9f0a6Schristos	mskql	t2,DSTREG,t2
278*37c9f0a6Schristos	or	t2,t0,t2
279*37c9f0a6Schristos	stq_u	t2,0(DSTREG)
280*37c9f0a6Schristos
281*37c9f0a6Schristosbcopy_done:
282*37c9f0a6Schristos	RET
283*37c9f0a6Schristos
284*37c9f0a6Schristosbcopy_ov_short:
285*37c9f0a6Schristos	ldq_u	t2,0(SRCREG)
286*37c9f0a6Schristos	br	zero,bcopy_da_finish
287*37c9f0a6Schristos
288*37c9f0a6Schristos	END(FUNCTION)
289