xref: /openbsd-src/lib/libc/arch/alpha/string/bcopy.S (revision b2ea75c1b17e1a9a339660e7ed45cd24946b230e)
1/*	$OpenBSD: bcopy.S,v 1.4 1996/11/13 23:24:27 etheisen Exp $	*/
2/*	$NetBSD: bcopy.S,v 1.3 1996/10/17 03:08:11 cgd Exp $	*/
3
4/*
5 * Copyright (c) 1995 Carnegie-Mellon University.
6 * All rights reserved.
7 *
8 * Author: Trevor Blackwell.  Support for use as memcpy() and memmove()
9 *	   added by Chris Demetriou.
10 *
11 * Permission to use, copy, modify and distribute this software and
12 * its documentation is hereby granted, provided that both the copyright
13 * notice and this permission notice appear in all copies of the
14 * software, derivative works or modified versions, and any portions
15 * thereof, and that both notices appear in supporting documentation.
16 *
17 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
18 * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
19 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
20 *
21 * Carnegie Mellon requests users of this software to return to
22 *
23 *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
24 *  School of Computer Science
25 *  Carnegie Mellon University
26 *  Pittsburgh PA 15213-3890
27 *
28 * any improvements or extensions that they make and grant Carnegie the
29 * rights to redistribute these changes.
30 */
31
32#if defined(LIBC_SCCS)
33	.text
34	.asciz "$OpenBSD: bcopy.S,v 1.4 1996/11/13 23:24:27 etheisen Exp $"
35#endif
36
37#include <machine/asm.h>
38
39#if defined(MEMCOPY) || defined(MEMMOVE)
40#ifdef MEMCOPY
41#define	FUNCTION	memcpy
42#else
43#define FUNCTION	memmove
44#endif
45#define	SRCREG		a1
46#define	DSTREG		a0
47#else /* !(defined(MEMCOPY) || defined(MEMMOVE)) */
48#define	FUNCTION	bcopy
49#define	SRCREG		a0
50#define	DSTREG		a1
51#endif /* !(defined(MEMCOPY) || defined(MEMMOVE)) */
52
53#define	SIZEREG		a2
54
55/*
56 * Copy bytes.
57 *
58 * void bcopy(char *from, char *to, size_t len);
59 * char *memcpy(void *to, const void *from, size_t len);
60 * char *memmove(void *to, const void *from, size_t len);
61 *
62 * No matter how invoked, the source and destination registers
63 * for calculation.  There's no point in copying them to "working"
64 * registers, since the code uses their values "in place," and
65 * copying them would be slower.
66 */
67
68LEAF(FUNCTION,3)
69
70#if defined(MEMCOPY) || defined(MEMMOVE)
71	/* set up return value, while we still can */
72	mov	DSTREG,v0
73#endif
74
75	/* Check for negative length */
76	ble	SIZEREG,bcopy_done
77
78	/* Check for overlap */
79	subq	DSTREG,SRCREG,t5
80	cmpult	t5,SIZEREG,t5
81	bne	t5,bcopy_overlap
82
83	/* a3 = end address */
84	addq	SRCREG,SIZEREG,a3
85
86	/* Get the first word */
87	ldq_u	t2,0(SRCREG)
88
89	/* Do they have the same alignment? */
90	xor	SRCREG,DSTREG,t0
91	and	t0,7,t0
92	and	DSTREG,7,t1
93	bne	t0,bcopy_different_alignment
94
95	/* src & dst have same alignment */
96	beq	t1,bcopy_all_aligned
97
98	ldq_u	t3,0(DSTREG)
99	addq	SIZEREG,t1,SIZEREG
100	mskqh	t2,SRCREG,t2
101	mskql	t3,SRCREG,t3
102	or	t2,t3,t2
103
104	/* Dst is 8-byte aligned */
105
106bcopy_all_aligned:
107	/* If less than 8 bytes,skip loop */
108	subq	SIZEREG,1,t0
109	and	SIZEREG,7,SIZEREG
110	bic	t0,7,t0
111	beq	t0,bcopy_samealign_lp_end
112
113bcopy_samealign_lp:
114	stq_u	t2,0(DSTREG)
115	addq	DSTREG,8,DSTREG
116	ldq_u	t2,8(SRCREG)
117	subq	t0,8,t0
118	addq	SRCREG,8,SRCREG
119	bne	t0,bcopy_samealign_lp
120
121bcopy_samealign_lp_end:
122	/* If we're done, exit */
123	bne	SIZEREG,bcopy_small_left
124	stq_u	t2,0(DSTREG)
125	RET
126
127bcopy_small_left:
128	mskql	t2,SIZEREG,t4
129	ldq_u	t3,0(DSTREG)
130	mskqh	t3,SIZEREG,t3
131	or	t4,t3,t4
132	stq_u	t4,0(DSTREG)
133	RET
134
135bcopy_different_alignment:
136	/*
137	 * this is the fun part
138	 */
139	addq	SRCREG,SIZEREG,a3
140	cmpule	SIZEREG,8,t0
141	bne	t0,bcopy_da_finish
142
143	beq	t1,bcopy_da_noentry
144
145	/* Do the initial partial word */
146	subq	zero,DSTREG,t0
147	and	t0,7,t0
148	ldq_u	t3,7(SRCREG)
149	extql	t2,SRCREG,t2
150	extqh	t3,SRCREG,t3
151	or	t2,t3,t5
152	insql	t5,DSTREG,t5
153	ldq_u	t6,0(DSTREG)
154	mskql	t6,DSTREG,t6
155	or	t5,t6,t5
156	stq_u	t5,0(DSTREG)
157	addq	SRCREG,t0,SRCREG
158	addq	DSTREG,t0,DSTREG
159	subq	SIZEREG,t0,SIZEREG
160	ldq_u	t2,0(SRCREG)
161
162bcopy_da_noentry:
163	subq	SIZEREG,1,t0
164	bic	t0,7,t0
165	and	SIZEREG,7,SIZEREG
166	beq	t0,bcopy_da_finish2
167
168bcopy_da_lp:
169	ldq_u	t3,7(SRCREG)
170	addq	SRCREG,8,SRCREG
171	extql	t2,SRCREG,t4
172	extqh	t3,SRCREG,t5
173	subq	t0,8,t0
174	or	t4,t5,t5
175	stq	t5,0(DSTREG)
176	addq	DSTREG,8,DSTREG
177	beq	t0,bcopy_da_finish1
178	ldq_u	t2,7(SRCREG)
179	addq	SRCREG,8,SRCREG
180	extql	t3,SRCREG,t4
181	extqh	t2,SRCREG,t5
182	subq	t0,8,t0
183	or	t4,t5,t5
184	stq	t5,0(DSTREG)
185	addq	DSTREG,8,DSTREG
186	bne	t0,bcopy_da_lp
187
188bcopy_da_finish2:
189	/* Do the last new word */
190	mov	t2,t3
191
192bcopy_da_finish1:
193	/* Do the last partial word */
194	ldq_u	t2,-1(a3)
195	extql	t3,SRCREG,t3
196	extqh	t2,SRCREG,t2
197	or	t2,t3,t2
198	br	zero,bcopy_samealign_lp_end
199
200bcopy_da_finish:
201	/* Do the last word in the next source word */
202	ldq_u	t3,-1(a3)
203	extql	t2,SRCREG,t2
204	extqh	t3,SRCREG,t3
205	or	t2,t3,t2
206	insqh	t2,DSTREG,t3
207	insql	t2,DSTREG,t2
208	lda	t4,-1(zero)
209	mskql	t4,SIZEREG,t5
210	cmovne	t5,t5,t4
211	insqh	t4,DSTREG,t5
212	insql	t4,DSTREG,t4
213	addq	DSTREG,SIZEREG,a4
214	ldq_u	t6,0(DSTREG)
215	ldq_u	t7,-1(a4)
216	bic	t6,t4,t6
217	bic	t7,t5,t7
218	and	t2,t4,t2
219	and	t3,t5,t3
220	or	t2,t6,t2
221	or	t3,t7,t3
222	stq_u	t3,-1(a4)
223	stq_u	t2,0(DSTREG)
224	RET
225
226bcopy_overlap:
227	/*
228	 * Basically equivalent to previous case, only backwards.
229	 * Not quite as highly optimized
230	 */
231	addq	SRCREG,SIZEREG,a3
232	addq	DSTREG,SIZEREG,a4
233
234	/* less than 8 bytes - don't worry about overlap */
235	cmpule	SIZEREG,8,t0
236	bne	t0,bcopy_ov_short
237
238	/* Possibly do a partial first word */
239	and	a4,7,t4
240	beq	t4,bcopy_ov_nostart2
241	subq	a3,t4,a3
242	subq	a4,t4,a4
243	ldq_u	t1,0(a3)
244	subq	SIZEREG,t4,SIZEREG
245	ldq_u	t2,7(a3)
246	ldq	t3,0(a4)
247	extql	t1,a3,t1
248	extqh	t2,a3,t2
249	or	t1,t2,t1
250	mskqh	t3,t4,t3
251	mskql	t1,t4,t1
252	or	t1,t3,t1
253	stq	t1,0(a4)
254
255bcopy_ov_nostart2:
256	bic	SIZEREG,7,t4
257	and	SIZEREG,7,SIZEREG
258	beq	t4,bcopy_ov_lp_end
259
260bcopy_ov_lp:
261	/* This could be more pipelined, but it doesn't seem worth it */
262	ldq_u	t0,-8(a3)
263	subq	a4,8,a4
264	ldq_u	t1,-1(a3)
265	subq	a3,8,a3
266	extql	t0,a3,t0
267	extqh	t1,a3,t1
268	subq	t4,8,t4
269	or	t0,t1,t0
270	stq	t0,0(a4)
271	bne	t4,bcopy_ov_lp
272
273bcopy_ov_lp_end:
274	beq	SIZEREG,bcopy_done
275
276	ldq_u	t0,0(SRCREG)
277	ldq_u	t1,7(SRCREG)
278	ldq_u	t2,0(DSTREG)
279	extql	t0,SRCREG,t0
280	extqh	t1,SRCREG,t1
281	or	t0,t1,t0
282	insql	t0,DSTREG,t0
283	mskql	t2,DSTREG,t2
284	or	t2,t0,t2
285	stq_u	t2,0(DSTREG)
286
287bcopy_done:
288	RET
289
290bcopy_ov_short:
291	ldq_u	t2,0(SRCREG)
292	br	zero,bcopy_da_finish
293
294	END(FUNCTION)
295