xref: /openbsd-src/lib/libc/arch/alpha/string/bcopy.S (revision 9b9d2a55a62c8e82206c25f94fcc7f4e2765250e)
1*9b9d2a55Sguenther/*	$OpenBSD: bcopy.S,v 1.8 2015/08/31 02:53:56 guenther Exp $	*/
239bae441Sniklas/*	$NetBSD: bcopy.S,v 1.3 1996/10/17 03:08:11 cgd Exp $	*/
339bae441Sniklas
4df930be7Sderaadt/*
5df930be7Sderaadt * Copyright (c) 1995 Carnegie-Mellon University.
6df930be7Sderaadt * All rights reserved.
7df930be7Sderaadt *
8df930be7Sderaadt * Author: Trevor Blackwell.  Support for use as memcpy() and memmove()
9df930be7Sderaadt *	   added by Chris Demetriou.
10df930be7Sderaadt *
11df930be7Sderaadt * Permission to use, copy, modify and distribute this software and
12df930be7Sderaadt * its documentation is hereby granted, provided that both the copyright
13df930be7Sderaadt * notice and this permission notice appear in all copies of the
14df930be7Sderaadt * software, derivative works or modified versions, and any portions
15df930be7Sderaadt * thereof, and that both notices appear in supporting documentation.
16df930be7Sderaadt *
17df930be7Sderaadt * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
18df930be7Sderaadt * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
19df930be7Sderaadt * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
20df930be7Sderaadt *
21df930be7Sderaadt * Carnegie Mellon requests users of this software to return to
22df930be7Sderaadt *
23df930be7Sderaadt *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
24df930be7Sderaadt *  School of Computer Science
25df930be7Sderaadt *  Carnegie Mellon University
26df930be7Sderaadt *  Pittsburgh PA 15213-3890
27df930be7Sderaadt *
28df930be7Sderaadt * any improvements or extensions that they make and grant Carnegie the
29df930be7Sderaadt * rights to redistribute these changes.
30df930be7Sderaadt */
31df930be7Sderaadt
32*9b9d2a55Sguenther#include "SYS.h"
33df930be7Sderaadt
34df930be7Sderaadt#define	SRCREG		a0
35df930be7Sderaadt#define	DSTREG		a1
36df930be7Sderaadt#define	SIZEREG		a2
37df930be7Sderaadt
38df930be7Sderaadt/*
39df930be7Sderaadt * Copy bytes.
40df930be7Sderaadt *
41df930be7Sderaadt * void bcopy(char *from, char *to, size_t len);
42df930be7Sderaadt *
43df930be7Sderaadt * No matter how invoked, the source and destination registers
44df930be7Sderaadt * for calculation.  There's no point in copying them to "working"
45df930be7Sderaadt * registers, since the code uses their values "in place," and
46df930be7Sderaadt * copying them would be slower.
47df930be7Sderaadt */
48df930be7Sderaadt
495b859c19SderaadtLEAF(bcopy,3)
50c55068e2Sderaadt	/* Check for zero length */
51c55068e2Sderaadt	beq	SIZEREG,bcopy_done
52df930be7Sderaadt
53df930be7Sderaadt	/* Check for overlap */
54df930be7Sderaadt	subq	DSTREG,SRCREG,t5
55df930be7Sderaadt	cmpult	t5,SIZEREG,t5
56df930be7Sderaadt	bne	t5,bcopy_overlap
57df930be7Sderaadt
58df930be7Sderaadt	/* a3 = end address */
59df930be7Sderaadt	addq	SRCREG,SIZEREG,a3
60df930be7Sderaadt
61df930be7Sderaadt	/* Get the first word */
62df930be7Sderaadt	ldq_u	t2,0(SRCREG)
63df930be7Sderaadt
64df930be7Sderaadt	/* Do they have the same alignment? */
65df930be7Sderaadt	xor	SRCREG,DSTREG,t0
66df930be7Sderaadt	and	t0,7,t0
67df930be7Sderaadt	and	DSTREG,7,t1
68df930be7Sderaadt	bne	t0,bcopy_different_alignment
69df930be7Sderaadt
70df930be7Sderaadt	/* src & dst have same alignment */
71df930be7Sderaadt	beq	t1,bcopy_all_aligned
72df930be7Sderaadt
73df930be7Sderaadt	ldq_u	t3,0(DSTREG)
74df930be7Sderaadt	addq	SIZEREG,t1,SIZEREG
75df930be7Sderaadt	mskqh	t2,SRCREG,t2
76df930be7Sderaadt	mskql	t3,SRCREG,t3
77df930be7Sderaadt	or	t2,t3,t2
78df930be7Sderaadt
79df930be7Sderaadt	/* Dst is 8-byte aligned */
80df930be7Sderaadt
81df930be7Sderaadtbcopy_all_aligned:
82df930be7Sderaadt	/* If less than 8 bytes,skip loop */
83df930be7Sderaadt	subq	SIZEREG,1,t0
84df930be7Sderaadt	and	SIZEREG,7,SIZEREG
85df930be7Sderaadt	bic	t0,7,t0
86df930be7Sderaadt	beq	t0,bcopy_samealign_lp_end
87df930be7Sderaadt
88df930be7Sderaadtbcopy_samealign_lp:
89df930be7Sderaadt	stq_u	t2,0(DSTREG)
90df930be7Sderaadt	addq	DSTREG,8,DSTREG
91df930be7Sderaadt	ldq_u	t2,8(SRCREG)
92df930be7Sderaadt	subq	t0,8,t0
93df930be7Sderaadt	addq	SRCREG,8,SRCREG
94df930be7Sderaadt	bne	t0,bcopy_samealign_lp
95df930be7Sderaadt
96df930be7Sderaadtbcopy_samealign_lp_end:
97df930be7Sderaadt	/* If we're done, exit */
98df930be7Sderaadt	bne	SIZEREG,bcopy_small_left
99df930be7Sderaadt	stq_u	t2,0(DSTREG)
100df930be7Sderaadt	RET
101df930be7Sderaadt
102df930be7Sderaadtbcopy_small_left:
103df930be7Sderaadt	mskql	t2,SIZEREG,t4
104df930be7Sderaadt	ldq_u	t3,0(DSTREG)
105df930be7Sderaadt	mskqh	t3,SIZEREG,t3
106df930be7Sderaadt	or	t4,t3,t4
107df930be7Sderaadt	stq_u	t4,0(DSTREG)
108df930be7Sderaadt	RET
109df930be7Sderaadt
110df930be7Sderaadtbcopy_different_alignment:
111df930be7Sderaadt	/*
112df930be7Sderaadt	 * this is the fun part
113df930be7Sderaadt	 */
114df930be7Sderaadt	addq	SRCREG,SIZEREG,a3
115df930be7Sderaadt	cmpule	SIZEREG,8,t0
116df930be7Sderaadt	bne	t0,bcopy_da_finish
117df930be7Sderaadt
118df930be7Sderaadt	beq	t1,bcopy_da_noentry
119df930be7Sderaadt
120df930be7Sderaadt	/* Do the initial partial word */
121df930be7Sderaadt	subq	zero,DSTREG,t0
122df930be7Sderaadt	and	t0,7,t0
123df930be7Sderaadt	ldq_u	t3,7(SRCREG)
124df930be7Sderaadt	extql	t2,SRCREG,t2
125df930be7Sderaadt	extqh	t3,SRCREG,t3
126df930be7Sderaadt	or	t2,t3,t5
127df930be7Sderaadt	insql	t5,DSTREG,t5
128df930be7Sderaadt	ldq_u	t6,0(DSTREG)
129df930be7Sderaadt	mskql	t6,DSTREG,t6
130df930be7Sderaadt	or	t5,t6,t5
131df930be7Sderaadt	stq_u	t5,0(DSTREG)
132df930be7Sderaadt	addq	SRCREG,t0,SRCREG
133df930be7Sderaadt	addq	DSTREG,t0,DSTREG
134df930be7Sderaadt	subq	SIZEREG,t0,SIZEREG
135df930be7Sderaadt	ldq_u	t2,0(SRCREG)
136df930be7Sderaadt
137df930be7Sderaadtbcopy_da_noentry:
138df930be7Sderaadt	subq	SIZEREG,1,t0
139df930be7Sderaadt	bic	t0,7,t0
140df930be7Sderaadt	and	SIZEREG,7,SIZEREG
141df930be7Sderaadt	beq	t0,bcopy_da_finish2
142df930be7Sderaadt
143df930be7Sderaadtbcopy_da_lp:
144df930be7Sderaadt	ldq_u	t3,7(SRCREG)
145df930be7Sderaadt	addq	SRCREG,8,SRCREG
146df930be7Sderaadt	extql	t2,SRCREG,t4
147df930be7Sderaadt	extqh	t3,SRCREG,t5
148df930be7Sderaadt	subq	t0,8,t0
149df930be7Sderaadt	or	t4,t5,t5
150df930be7Sderaadt	stq	t5,0(DSTREG)
151df930be7Sderaadt	addq	DSTREG,8,DSTREG
152df930be7Sderaadt	beq	t0,bcopy_da_finish1
153df930be7Sderaadt	ldq_u	t2,7(SRCREG)
154df930be7Sderaadt	addq	SRCREG,8,SRCREG
155df930be7Sderaadt	extql	t3,SRCREG,t4
156df930be7Sderaadt	extqh	t2,SRCREG,t5
157df930be7Sderaadt	subq	t0,8,t0
158df930be7Sderaadt	or	t4,t5,t5
159df930be7Sderaadt	stq	t5,0(DSTREG)
160df930be7Sderaadt	addq	DSTREG,8,DSTREG
161df930be7Sderaadt	bne	t0,bcopy_da_lp
162df930be7Sderaadt
163df930be7Sderaadtbcopy_da_finish2:
164df930be7Sderaadt	/* Do the last new word */
165df930be7Sderaadt	mov	t2,t3
166df930be7Sderaadt
167df930be7Sderaadtbcopy_da_finish1:
168df930be7Sderaadt	/* Do the last partial word */
169df930be7Sderaadt	ldq_u	t2,-1(a3)
170df930be7Sderaadt	extql	t3,SRCREG,t3
171df930be7Sderaadt	extqh	t2,SRCREG,t2
172df930be7Sderaadt	or	t2,t3,t2
173df930be7Sderaadt	br	zero,bcopy_samealign_lp_end
174df930be7Sderaadt
175df930be7Sderaadtbcopy_da_finish:
176df930be7Sderaadt	/* Do the last word in the next source word */
177df930be7Sderaadt	ldq_u	t3,-1(a3)
178df930be7Sderaadt	extql	t2,SRCREG,t2
179df930be7Sderaadt	extqh	t3,SRCREG,t3
180df930be7Sderaadt	or	t2,t3,t2
181df930be7Sderaadt	insqh	t2,DSTREG,t3
182df930be7Sderaadt	insql	t2,DSTREG,t2
183df930be7Sderaadt	lda	t4,-1(zero)
184df930be7Sderaadt	mskql	t4,SIZEREG,t5
185df930be7Sderaadt	cmovne	t5,t5,t4
186df930be7Sderaadt	insqh	t4,DSTREG,t5
187df930be7Sderaadt	insql	t4,DSTREG,t4
188df930be7Sderaadt	addq	DSTREG,SIZEREG,a4
189df930be7Sderaadt	ldq_u	t6,0(DSTREG)
190df930be7Sderaadt	ldq_u	t7,-1(a4)
191df930be7Sderaadt	bic	t6,t4,t6
192df930be7Sderaadt	bic	t7,t5,t7
193df930be7Sderaadt	and	t2,t4,t2
194df930be7Sderaadt	and	t3,t5,t3
195df930be7Sderaadt	or	t2,t6,t2
196df930be7Sderaadt	or	t3,t7,t3
197df930be7Sderaadt	stq_u	t3,-1(a4)
198df930be7Sderaadt	stq_u	t2,0(DSTREG)
199df930be7Sderaadt	RET
200df930be7Sderaadt
201df930be7Sderaadtbcopy_overlap:
202df930be7Sderaadt	/*
203df930be7Sderaadt	 * Basically equivalent to previous case, only backwards.
204df930be7Sderaadt	 * Not quite as highly optimized
205df930be7Sderaadt	 */
206df930be7Sderaadt	addq	SRCREG,SIZEREG,a3
207df930be7Sderaadt	addq	DSTREG,SIZEREG,a4
208df930be7Sderaadt
209df930be7Sderaadt	/* less than 8 bytes - don't worry about overlap */
210df930be7Sderaadt	cmpule	SIZEREG,8,t0
211df930be7Sderaadt	bne	t0,bcopy_ov_short
212df930be7Sderaadt
213df930be7Sderaadt	/* Possibly do a partial first word */
214df930be7Sderaadt	and	a4,7,t4
215df930be7Sderaadt	beq	t4,bcopy_ov_nostart2
216df930be7Sderaadt	subq	a3,t4,a3
217df930be7Sderaadt	subq	a4,t4,a4
218df930be7Sderaadt	ldq_u	t1,0(a3)
219df930be7Sderaadt	subq	SIZEREG,t4,SIZEREG
220df930be7Sderaadt	ldq_u	t2,7(a3)
221df930be7Sderaadt	ldq	t3,0(a4)
222df930be7Sderaadt	extql	t1,a3,t1
223df930be7Sderaadt	extqh	t2,a3,t2
224df930be7Sderaadt	or	t1,t2,t1
225df930be7Sderaadt	mskqh	t3,t4,t3
226df930be7Sderaadt	mskql	t1,t4,t1
227df930be7Sderaadt	or	t1,t3,t1
228df930be7Sderaadt	stq	t1,0(a4)
229df930be7Sderaadt
230df930be7Sderaadtbcopy_ov_nostart2:
231df930be7Sderaadt	bic	SIZEREG,7,t4
232df930be7Sderaadt	and	SIZEREG,7,SIZEREG
233df930be7Sderaadt	beq	t4,bcopy_ov_lp_end
234df930be7Sderaadt
235df930be7Sderaadtbcopy_ov_lp:
236df930be7Sderaadt	/* This could be more pipelined, but it doesn't seem worth it */
237df930be7Sderaadt	ldq_u	t0,-8(a3)
238df930be7Sderaadt	subq	a4,8,a4
239df930be7Sderaadt	ldq_u	t1,-1(a3)
240df930be7Sderaadt	subq	a3,8,a3
241df930be7Sderaadt	extql	t0,a3,t0
242df930be7Sderaadt	extqh	t1,a3,t1
243df930be7Sderaadt	subq	t4,8,t4
244df930be7Sderaadt	or	t0,t1,t0
245df930be7Sderaadt	stq	t0,0(a4)
246df930be7Sderaadt	bne	t4,bcopy_ov_lp
247df930be7Sderaadt
248df930be7Sderaadtbcopy_ov_lp_end:
249df930be7Sderaadt	beq	SIZEREG,bcopy_done
250df930be7Sderaadt
251df930be7Sderaadt	ldq_u	t0,0(SRCREG)
252df930be7Sderaadt	ldq_u	t1,7(SRCREG)
253df930be7Sderaadt	ldq_u	t2,0(DSTREG)
254df930be7Sderaadt	extql	t0,SRCREG,t0
255df930be7Sderaadt	extqh	t1,SRCREG,t1
256df930be7Sderaadt	or	t0,t1,t0
257df930be7Sderaadt	insql	t0,DSTREG,t0
258df930be7Sderaadt	mskql	t2,DSTREG,t2
259df930be7Sderaadt	or	t2,t0,t2
260df930be7Sderaadt	stq_u	t2,0(DSTREG)
261df930be7Sderaadt
262df930be7Sderaadtbcopy_done:
263df930be7Sderaadt	RET
264df930be7Sderaadt
265df930be7Sderaadtbcopy_ov_short:
266df930be7Sderaadt	ldq_u	t2,0(SRCREG)
267df930be7Sderaadt	br	zero,bcopy_da_finish
268df930be7Sderaadt
269*9b9d2a55Sguenther	END_WEAK(bcopy)
270