xref: /minix3/common/lib/libc/arch/alpha/string/bcopy.S (revision b6cbf7203b080219de306404f8022a65b7884f33)
1*b6cbf720SGianluca Guida/* $NetBSD: bcopy.S,v 1.1 2005/12/20 19:28:49 christos Exp $ */
2*b6cbf720SGianluca Guida
3*b6cbf720SGianluca Guida/*
4*b6cbf720SGianluca Guida * Copyright (c) 1995 Carnegie-Mellon University.
5*b6cbf720SGianluca Guida * All rights reserved.
6*b6cbf720SGianluca Guida *
7*b6cbf720SGianluca Guida * Author: Trevor Blackwell.  Support for use as memcpy() and memmove()
8*b6cbf720SGianluca Guida *	   added by Chris Demetriou.
9*b6cbf720SGianluca Guida *
10*b6cbf720SGianluca Guida * Permission to use, copy, modify and distribute this software and
11*b6cbf720SGianluca Guida * its documentation is hereby granted, provided that both the copyright
12*b6cbf720SGianluca Guida * notice and this permission notice appear in all copies of the
13*b6cbf720SGianluca Guida * software, derivative works or modified versions, and any portions
14*b6cbf720SGianluca Guida * thereof, and that both notices appear in supporting documentation.
15*b6cbf720SGianluca Guida *
16*b6cbf720SGianluca Guida * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
17*b6cbf720SGianluca Guida * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
18*b6cbf720SGianluca Guida * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
19*b6cbf720SGianluca Guida *
20*b6cbf720SGianluca Guida * Carnegie Mellon requests users of this software to return to
21*b6cbf720SGianluca Guida *
22*b6cbf720SGianluca Guida *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
23*b6cbf720SGianluca Guida *  School of Computer Science
24*b6cbf720SGianluca Guida *  Carnegie Mellon University
25*b6cbf720SGianluca Guida *  Pittsburgh PA 15213-3890
26*b6cbf720SGianluca Guida *
27*b6cbf720SGianluca Guida * any improvements or extensions that they make and grant Carnegie the
28*b6cbf720SGianluca Guida * rights to redistribute these changes.
29*b6cbf720SGianluca Guida */
30*b6cbf720SGianluca Guida
31*b6cbf720SGianluca Guida#include <machine/asm.h>
32*b6cbf720SGianluca Guida
33*b6cbf720SGianluca Guida#if defined(MEMCOPY) || defined(MEMMOVE)
34*b6cbf720SGianluca Guida#ifdef MEMCOPY
35*b6cbf720SGianluca Guida#define	FUNCTION	memcpy
36*b6cbf720SGianluca Guida#else
37*b6cbf720SGianluca Guida#define FUNCTION	memmove
38*b6cbf720SGianluca Guida#endif
39*b6cbf720SGianluca Guida#define	SRCREG		a1
40*b6cbf720SGianluca Guida#define	DSTREG		a0
41*b6cbf720SGianluca Guida#else /* !(defined(MEMCOPY) || defined(MEMMOVE)) */
42*b6cbf720SGianluca Guida#define	FUNCTION	bcopy
43*b6cbf720SGianluca Guida#define	SRCREG		a0
44*b6cbf720SGianluca Guida#define	DSTREG		a1
45*b6cbf720SGianluca Guida#endif /* !(defined(MEMCOPY) || defined(MEMMOVE)) */
46*b6cbf720SGianluca Guida
47*b6cbf720SGianluca Guida#define	SIZEREG		a2
48*b6cbf720SGianluca Guida
49*b6cbf720SGianluca Guida/*
50*b6cbf720SGianluca Guida * Copy bytes.
51*b6cbf720SGianluca Guida *
52*b6cbf720SGianluca Guida * void bcopy(char *from, char *to, size_t len);
53*b6cbf720SGianluca Guida * char *memcpy(void *to, const void *from, size_t len);
54*b6cbf720SGianluca Guida * char *memmove(void *to, const void *from, size_t len);
55*b6cbf720SGianluca Guida *
56*b6cbf720SGianluca Guida * No matter how invoked, the source and destination registers
57*b6cbf720SGianluca Guida * for calculation.  There's no point in copying them to "working"
58*b6cbf720SGianluca Guida * registers, since the code uses their values "in place," and
59*b6cbf720SGianluca Guida * copying them would be slower.
60*b6cbf720SGianluca Guida */
61*b6cbf720SGianluca Guida
62*b6cbf720SGianluca GuidaLEAF(FUNCTION,3)
63*b6cbf720SGianluca Guida
64*b6cbf720SGianluca Guida#if defined(MEMCOPY) || defined(MEMMOVE)
65*b6cbf720SGianluca Guida	/* set up return value, while we still can */
66*b6cbf720SGianluca Guida	mov	DSTREG,v0
67*b6cbf720SGianluca Guida#endif
68*b6cbf720SGianluca Guida
69*b6cbf720SGianluca Guida	/* Check for negative length */
70*b6cbf720SGianluca Guida	ble	SIZEREG,bcopy_done
71*b6cbf720SGianluca Guida
72*b6cbf720SGianluca Guida	/* Check for overlap */
73*b6cbf720SGianluca Guida	subq	DSTREG,SRCREG,t5
74*b6cbf720SGianluca Guida	cmpult	t5,SIZEREG,t5
75*b6cbf720SGianluca Guida	bne	t5,bcopy_overlap
76*b6cbf720SGianluca Guida
77*b6cbf720SGianluca Guida	/* a3 = end address */
78*b6cbf720SGianluca Guida	addq	SRCREG,SIZEREG,a3
79*b6cbf720SGianluca Guida
80*b6cbf720SGianluca Guida	/* Get the first word */
81*b6cbf720SGianluca Guida	ldq_u	t2,0(SRCREG)
82*b6cbf720SGianluca Guida
83*b6cbf720SGianluca Guida	/* Do they have the same alignment? */
84*b6cbf720SGianluca Guida	xor	SRCREG,DSTREG,t0
85*b6cbf720SGianluca Guida	and	t0,7,t0
86*b6cbf720SGianluca Guida	and	DSTREG,7,t1
87*b6cbf720SGianluca Guida	bne	t0,bcopy_different_alignment
88*b6cbf720SGianluca Guida
89*b6cbf720SGianluca Guida	/* src & dst have same alignment */
90*b6cbf720SGianluca Guida	beq	t1,bcopy_all_aligned
91*b6cbf720SGianluca Guida
92*b6cbf720SGianluca Guida	ldq_u	t3,0(DSTREG)
93*b6cbf720SGianluca Guida	addq	SIZEREG,t1,SIZEREG
94*b6cbf720SGianluca Guida	mskqh	t2,SRCREG,t2
95*b6cbf720SGianluca Guida	mskql	t3,SRCREG,t3
96*b6cbf720SGianluca Guida	or	t2,t3,t2
97*b6cbf720SGianluca Guida
98*b6cbf720SGianluca Guida	/* Dst is 8-byte aligned */
99*b6cbf720SGianluca Guida
100*b6cbf720SGianluca Guidabcopy_all_aligned:
101*b6cbf720SGianluca Guida	/* If less than 8 bytes,skip loop */
102*b6cbf720SGianluca Guida	subq	SIZEREG,1,t0
103*b6cbf720SGianluca Guida	and	SIZEREG,7,SIZEREG
104*b6cbf720SGianluca Guida	bic	t0,7,t0
105*b6cbf720SGianluca Guida	beq	t0,bcopy_samealign_lp_end
106*b6cbf720SGianluca Guida
107*b6cbf720SGianluca Guidabcopy_samealign_lp:
108*b6cbf720SGianluca Guida	stq_u	t2,0(DSTREG)
109*b6cbf720SGianluca Guida	addq	DSTREG,8,DSTREG
110*b6cbf720SGianluca Guida	ldq_u	t2,8(SRCREG)
111*b6cbf720SGianluca Guida	subq	t0,8,t0
112*b6cbf720SGianluca Guida	addq	SRCREG,8,SRCREG
113*b6cbf720SGianluca Guida	bne	t0,bcopy_samealign_lp
114*b6cbf720SGianluca Guida
115*b6cbf720SGianluca Guidabcopy_samealign_lp_end:
116*b6cbf720SGianluca Guida	/* If we're done, exit */
117*b6cbf720SGianluca Guida	bne	SIZEREG,bcopy_small_left
118*b6cbf720SGianluca Guida	stq_u	t2,0(DSTREG)
119*b6cbf720SGianluca Guida	RET
120*b6cbf720SGianluca Guida
121*b6cbf720SGianluca Guidabcopy_small_left:
122*b6cbf720SGianluca Guida	mskql	t2,SIZEREG,t4
123*b6cbf720SGianluca Guida	ldq_u	t3,0(DSTREG)
124*b6cbf720SGianluca Guida	mskqh	t3,SIZEREG,t3
125*b6cbf720SGianluca Guida	or	t4,t3,t4
126*b6cbf720SGianluca Guida	stq_u	t4,0(DSTREG)
127*b6cbf720SGianluca Guida	RET
128*b6cbf720SGianluca Guida
129*b6cbf720SGianluca Guidabcopy_different_alignment:
130*b6cbf720SGianluca Guida	/*
131*b6cbf720SGianluca Guida	 * this is the fun part
132*b6cbf720SGianluca Guida	 */
133*b6cbf720SGianluca Guida	addq	SRCREG,SIZEREG,a3
134*b6cbf720SGianluca Guida	cmpule	SIZEREG,8,t0
135*b6cbf720SGianluca Guida	bne	t0,bcopy_da_finish
136*b6cbf720SGianluca Guida
137*b6cbf720SGianluca Guida	beq	t1,bcopy_da_noentry
138*b6cbf720SGianluca Guida
139*b6cbf720SGianluca Guida	/* Do the initial partial word */
140*b6cbf720SGianluca Guida	subq	zero,DSTREG,t0
141*b6cbf720SGianluca Guida	and	t0,7,t0
142*b6cbf720SGianluca Guida	ldq_u	t3,7(SRCREG)
143*b6cbf720SGianluca Guida	extql	t2,SRCREG,t2
144*b6cbf720SGianluca Guida	extqh	t3,SRCREG,t3
145*b6cbf720SGianluca Guida	or	t2,t3,t5
146*b6cbf720SGianluca Guida	insql	t5,DSTREG,t5
147*b6cbf720SGianluca Guida	ldq_u	t6,0(DSTREG)
148*b6cbf720SGianluca Guida	mskql	t6,DSTREG,t6
149*b6cbf720SGianluca Guida	or	t5,t6,t5
150*b6cbf720SGianluca Guida	stq_u	t5,0(DSTREG)
151*b6cbf720SGianluca Guida	addq	SRCREG,t0,SRCREG
152*b6cbf720SGianluca Guida	addq	DSTREG,t0,DSTREG
153*b6cbf720SGianluca Guida	subq	SIZEREG,t0,SIZEREG
154*b6cbf720SGianluca Guida	ldq_u	t2,0(SRCREG)
155*b6cbf720SGianluca Guida
156*b6cbf720SGianluca Guidabcopy_da_noentry:
157*b6cbf720SGianluca Guida	subq	SIZEREG,1,t0
158*b6cbf720SGianluca Guida	bic	t0,7,t0
159*b6cbf720SGianluca Guida	and	SIZEREG,7,SIZEREG
160*b6cbf720SGianluca Guida	beq	t0,bcopy_da_finish2
161*b6cbf720SGianluca Guida
162*b6cbf720SGianluca Guidabcopy_da_lp:
163*b6cbf720SGianluca Guida	ldq_u	t3,7(SRCREG)
164*b6cbf720SGianluca Guida	addq	SRCREG,8,SRCREG
165*b6cbf720SGianluca Guida	extql	t2,SRCREG,t4
166*b6cbf720SGianluca Guida	extqh	t3,SRCREG,t5
167*b6cbf720SGianluca Guida	subq	t0,8,t0
168*b6cbf720SGianluca Guida	or	t4,t5,t5
169*b6cbf720SGianluca Guida	stq	t5,0(DSTREG)
170*b6cbf720SGianluca Guida	addq	DSTREG,8,DSTREG
171*b6cbf720SGianluca Guida	beq	t0,bcopy_da_finish1
172*b6cbf720SGianluca Guida	ldq_u	t2,7(SRCREG)
173*b6cbf720SGianluca Guida	addq	SRCREG,8,SRCREG
174*b6cbf720SGianluca Guida	extql	t3,SRCREG,t4
175*b6cbf720SGianluca Guida	extqh	t2,SRCREG,t5
176*b6cbf720SGianluca Guida	subq	t0,8,t0
177*b6cbf720SGianluca Guida	or	t4,t5,t5
178*b6cbf720SGianluca Guida	stq	t5,0(DSTREG)
179*b6cbf720SGianluca Guida	addq	DSTREG,8,DSTREG
180*b6cbf720SGianluca Guida	bne	t0,bcopy_da_lp
181*b6cbf720SGianluca Guida
182*b6cbf720SGianluca Guidabcopy_da_finish2:
183*b6cbf720SGianluca Guida	/* Do the last new word */
184*b6cbf720SGianluca Guida	mov	t2,t3
185*b6cbf720SGianluca Guida
186*b6cbf720SGianluca Guidabcopy_da_finish1:
187*b6cbf720SGianluca Guida	/* Do the last partial word */
188*b6cbf720SGianluca Guida	ldq_u	t2,-1(a3)
189*b6cbf720SGianluca Guida	extql	t3,SRCREG,t3
190*b6cbf720SGianluca Guida	extqh	t2,SRCREG,t2
191*b6cbf720SGianluca Guida	or	t2,t3,t2
192*b6cbf720SGianluca Guida	br	zero,bcopy_samealign_lp_end
193*b6cbf720SGianluca Guida
194*b6cbf720SGianluca Guidabcopy_da_finish:
195*b6cbf720SGianluca Guida	/* Do the last word in the next source word */
196*b6cbf720SGianluca Guida	ldq_u	t3,-1(a3)
197*b6cbf720SGianluca Guida	extql	t2,SRCREG,t2
198*b6cbf720SGianluca Guida	extqh	t3,SRCREG,t3
199*b6cbf720SGianluca Guida	or	t2,t3,t2
200*b6cbf720SGianluca Guida	insqh	t2,DSTREG,t3
201*b6cbf720SGianluca Guida	insql	t2,DSTREG,t2
202*b6cbf720SGianluca Guida	lda	t4,-1(zero)
203*b6cbf720SGianluca Guida	mskql	t4,SIZEREG,t5
204*b6cbf720SGianluca Guida	cmovne	t5,t5,t4
205*b6cbf720SGianluca Guida	insqh	t4,DSTREG,t5
206*b6cbf720SGianluca Guida	insql	t4,DSTREG,t4
207*b6cbf720SGianluca Guida	addq	DSTREG,SIZEREG,a4
208*b6cbf720SGianluca Guida	ldq_u	t6,0(DSTREG)
209*b6cbf720SGianluca Guida	ldq_u	t7,-1(a4)
210*b6cbf720SGianluca Guida	bic	t6,t4,t6
211*b6cbf720SGianluca Guida	bic	t7,t5,t7
212*b6cbf720SGianluca Guida	and	t2,t4,t2
213*b6cbf720SGianluca Guida	and	t3,t5,t3
214*b6cbf720SGianluca Guida	or	t2,t6,t2
215*b6cbf720SGianluca Guida	or	t3,t7,t3
216*b6cbf720SGianluca Guida	stq_u	t3,-1(a4)
217*b6cbf720SGianluca Guida	stq_u	t2,0(DSTREG)
218*b6cbf720SGianluca Guida	RET
219*b6cbf720SGianluca Guida
220*b6cbf720SGianluca Guidabcopy_overlap:
221*b6cbf720SGianluca Guida	/*
222*b6cbf720SGianluca Guida	 * Basically equivalent to previous case, only backwards.
223*b6cbf720SGianluca Guida	 * Not quite as highly optimized
224*b6cbf720SGianluca Guida	 */
225*b6cbf720SGianluca Guida	addq	SRCREG,SIZEREG,a3
226*b6cbf720SGianluca Guida	addq	DSTREG,SIZEREG,a4
227*b6cbf720SGianluca Guida
228*b6cbf720SGianluca Guida	/* less than 8 bytes - don't worry about overlap */
229*b6cbf720SGianluca Guida	cmpule	SIZEREG,8,t0
230*b6cbf720SGianluca Guida	bne	t0,bcopy_ov_short
231*b6cbf720SGianluca Guida
232*b6cbf720SGianluca Guida	/* Possibly do a partial first word */
233*b6cbf720SGianluca Guida	and	a4,7,t4
234*b6cbf720SGianluca Guida	beq	t4,bcopy_ov_nostart2
235*b6cbf720SGianluca Guida	subq	a3,t4,a3
236*b6cbf720SGianluca Guida	subq	a4,t4,a4
237*b6cbf720SGianluca Guida	ldq_u	t1,0(a3)
238*b6cbf720SGianluca Guida	subq	SIZEREG,t4,SIZEREG
239*b6cbf720SGianluca Guida	ldq_u	t2,7(a3)
240*b6cbf720SGianluca Guida	ldq	t3,0(a4)
241*b6cbf720SGianluca Guida	extql	t1,a3,t1
242*b6cbf720SGianluca Guida	extqh	t2,a3,t2
243*b6cbf720SGianluca Guida	or	t1,t2,t1
244*b6cbf720SGianluca Guida	mskqh	t3,t4,t3
245*b6cbf720SGianluca Guida	mskql	t1,t4,t1
246*b6cbf720SGianluca Guida	or	t1,t3,t1
247*b6cbf720SGianluca Guida	stq	t1,0(a4)
248*b6cbf720SGianluca Guida
249*b6cbf720SGianluca Guidabcopy_ov_nostart2:
250*b6cbf720SGianluca Guida	bic	SIZEREG,7,t4
251*b6cbf720SGianluca Guida	and	SIZEREG,7,SIZEREG
252*b6cbf720SGianluca Guida	beq	t4,bcopy_ov_lp_end
253*b6cbf720SGianluca Guida
254*b6cbf720SGianluca Guidabcopy_ov_lp:
255*b6cbf720SGianluca Guida	/* This could be more pipelined, but it doesn't seem worth it */
256*b6cbf720SGianluca Guida	ldq_u	t0,-8(a3)
257*b6cbf720SGianluca Guida	subq	a4,8,a4
258*b6cbf720SGianluca Guida	ldq_u	t1,-1(a3)
259*b6cbf720SGianluca Guida	subq	a3,8,a3
260*b6cbf720SGianluca Guida	extql	t0,a3,t0
261*b6cbf720SGianluca Guida	extqh	t1,a3,t1
262*b6cbf720SGianluca Guida	subq	t4,8,t4
263*b6cbf720SGianluca Guida	or	t0,t1,t0
264*b6cbf720SGianluca Guida	stq	t0,0(a4)
265*b6cbf720SGianluca Guida	bne	t4,bcopy_ov_lp
266*b6cbf720SGianluca Guida
267*b6cbf720SGianluca Guidabcopy_ov_lp_end:
268*b6cbf720SGianluca Guida	beq	SIZEREG,bcopy_done
269*b6cbf720SGianluca Guida
270*b6cbf720SGianluca Guida	ldq_u	t0,0(SRCREG)
271*b6cbf720SGianluca Guida	ldq_u	t1,7(SRCREG)
272*b6cbf720SGianluca Guida	ldq_u	t2,0(DSTREG)
273*b6cbf720SGianluca Guida	extql	t0,SRCREG,t0
274*b6cbf720SGianluca Guida	extqh	t1,SRCREG,t1
275*b6cbf720SGianluca Guida	or	t0,t1,t0
276*b6cbf720SGianluca Guida	insql	t0,DSTREG,t0
277*b6cbf720SGianluca Guida	mskql	t2,DSTREG,t2
278*b6cbf720SGianluca Guida	or	t2,t0,t2
279*b6cbf720SGianluca Guida	stq_u	t2,0(DSTREG)
280*b6cbf720SGianluca Guida
281*b6cbf720SGianluca Guidabcopy_done:
282*b6cbf720SGianluca Guida	RET
283*b6cbf720SGianluca Guida
284*b6cbf720SGianluca Guidabcopy_ov_short:
285*b6cbf720SGianluca Guida	ldq_u	t2,0(SRCREG)
286*b6cbf720SGianluca Guida	br	zero,bcopy_da_finish
287*b6cbf720SGianluca Guida
288*b6cbf720SGianluca Guida	END(FUNCTION)
289