xref: /netbsd-src/sys/lib/libkern/arch/hppa/bcopy.S (revision 181254a7b1bdde6873432bffef2d2decc4b5c22f)
1/*	$NetBSD: bcopy.S,v 1.15 2015/08/30 07:55:45 uebayasi Exp $	*/
2
3/*
4 * Copyright (c) 2002 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Matthew Fredette.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32/*
33 * Copy routines for NetBSD/hppa.
34 */
35
36#undef _LOCORE
37#define _LOCORE	/* XXX fredette - unfortunate */
38
39#if defined(SPCOPY) && !defined(_STANDALONE)
40
41#include "opt_diagnostic.h"
42#include "opt_multiprocessor.h"
43
44#include <machine/cpu.h>
45
46#endif
47
48#include <machine/asm.h>
49#include <machine/frame.h>
50#include <machine/reg.h>
51
52#if defined(LIBC_SCCS) && !defined(lint)
53RCSID("$NetBSD: bcopy.S,v 1.15 2015/08/30 07:55:45 uebayasi Exp $")
54#endif /* LIBC_SCCS and not lint */
55
56/*
57 * The stbys instruction is a little asymmetric.  When (%r2 & 3)
58 * is zero, stbys,b,m %r1, 4(%r2) works like stws,ma.  You
59 * might then wish that when (%r2 & 3) == 0, stbys,e,m %r1, -4(%r2)
60 * worked like stws,mb.  But it doesn't.
61 *
62 * This macro works around this problem.  It requires that %t2
63 * hold the number of bytes that will be written by this store
64 * (meaning that it ranges from one to four).
65 *
66 * Watch the delay-slot trickery here.  The comib is used to set
67 * up which instruction, either the stws or the stbys, is run
68 * in the delay slot of the b instruction.
69 */
70#define _STBYS_E_M(r, dst_spc, dst_off)				  \
71	comib,<>	4, %t2, 4				! \
72	b		4					! \
73	stws,mb		r, -4(dst_spc, dst_off)			! \
74	stbys,e,m	r, 0(dst_spc, dst_off)
75
76/*
77 * This macro does a bulk copy with no shifting.  cmplt and m are
78 * the completer and displacement multiplier, respectively, for
79 * the load and store instructions.
80 */
81#define _COPY(src_spc, src_off, dst_spc, dst_off, count, cmplt, m) \
82								! \
83	/*							! \
84	 * Loop storing 16 bytes at a time.  Since count 	! \
85	 * may be > INT_MAX, we have to be careful and		! \
86	 * avoid comparisons that treat it as a signed 		! \
87	 * quantity, until after this loop, when count		! \
88	 * is guaranteed to be less than 16.			! \
89	 */							! \
90	comib,>>=,n	15, count, _LABEL(_skip16)		! \
91.label _LABEL(_loop16)						! \
92	addi		-16, count, count			! \
93	ldws,cmplt	m*4(src_spc, src_off), %t1		! \
94	ldws,cmplt	m*4(src_spc, src_off), %t2		! \
95	ldws,cmplt	m*4(src_spc, src_off), %t3		! \
96	ldws,cmplt	m*4(src_spc, src_off), %t4		! \
97	stws,cmplt	%t1, m*4(dst_spc, dst_off)		! \
98	stws,cmplt	%t2, m*4(dst_spc, dst_off)		! \
99	stws,cmplt	%t3, m*4(dst_spc, dst_off)		! \
100	comib,<<	15, count, _LABEL(_loop16)		! \
101	stws,cmplt	%t4, m*4(dst_spc, dst_off)		! \
102.label _LABEL(_skip16)						! \
103								! \
104	/* Loop storing 4 bytes at a time. */			! \
105	addib,<,n	-4, count, _LABEL(_skip4)		! \
106.label _LABEL(_loop4)						! \
107	ldws,cmplt	m*4(src_spc, src_off), %t1		! \
108	addib,>=	-4, count, _LABEL(_loop4)		! \
109	stws,cmplt	%t1, m*4(dst_spc, dst_off)		! \
110.label _LABEL(_skip4)						! \
111	/* Restore the correct count. */			! \
112	addi		4, count, count				! \
113								! \
114.label _LABEL(_do1)						! \
115								! \
116	/* Loop storing 1 byte at a time. */			! \
117	addib,<,n	-1, count, _LABEL(_skip1)		! \
118.label _LABEL(_loop1)						! \
119	ldbs,cmplt	m*1(src_spc, src_off), %t1		! \
120	addib,>=	-1, count, _LABEL(_loop1)		! \
121	stbs,cmplt	%t1, m*1(dst_spc, dst_off)		! \
122.label _LABEL(_skip1)						! \
123	/* Restore the correct count. */			! \
124	b		_LABEL(_done)				! \
125	addi		1, count, count
126
127/*
128 * This macro is definitely strange.  It exists purely to
129 * allow the _COPYS macro to be reused, but because it
130 * requires this long attempt to explain it, I'm starting
131 * to doubt the value of that.
132 *
133 * Part of the expansion of the _COPYS macro below are loops
134 * that copy four words or one word at a time, performing shifts
135 * to get data to line up correctly in the destination buffer.
136 *
137 * The _COPYS macro is used when copying backwards, as well
138 * as forwards.  The 4-word loop always loads into %t1, %t2, %t3,
139 * and %t4 in that order.  This means that when copying forward,
140 * %t1 will have the word from the lowest address, and %t4 will
141 * have the word from the highest address.  When copying
142 * backwards, the opposite is true.
143 *
144 * The shift instructions need pairs of registers with adjacent
145 * words, with the register containing the word from the lowest
146 * address *always* coming first.  It is this assymetry that
147 * gives rise to this macro - depending on which direction
148 * we're copying in, these ordered pairs are different.
149 *
150 * Fortunately, we can compute those register numbers at compile
151 * time, and assemble them manually into a shift instruction.
152 * That's what this macro does.
153 *
154 * This macro takes two arguments.  n ranges from 0 to 3 and
155 * is the "shift number", i.e., n = 0 means we're doing the
156 * shift for what will be the first store.
157 *
158 * m is the displacement multiplier from the _COPYS macro call.
159 * This is 1 for a forward copy and -1 for a backwards copy.
160 * So, the ((m + 1) / 2) term yields 0 for a backwards copy and
161 * 1 for a forward copy, and the ((m - 1) / 2) term yields
162 * 0 for a forward copy, and -1 for a backwards copy.
163 * These terms are used to discriminate the register computations
164 * below.
165 *
166 * When copying forward, then, the first register used with
167 * the first vshd will be 19 + (3 - ((0 - 1) & 3)), or %t4,
168 * which matches _COPYS' requirement that the word last loaded
169 * be in %t4.  The first register used for the second vshd
170 * will then "wrap" around to 19 + (3 - ((1 - 1) & 3)), or %t1.
171 * And so on to %t2 and %t3.
172 *
173 * When copying forward, the second register used with the first
174 * vshd will be (19 + (3 - ((n + 0) & 3)), or %t1.  It will
175 * continue to be %t2, then %t3, and finally %t4.
176 *
177 * When copying backwards, the values for the first and second
178 * register for each vshd are reversed from the forwards case.
179 * (Symmetry reclaimed!)  Proving this is "left as an exercise
180 * for the reader" (remember the different discriminating values!)
181 */
182#define _VSHD(n, m, t)						  \
183	.word (0xd0000000					| \
184	((19 + (3 - ((n - 1 * ((m + 1) / 2)) & 3))) << 16)	| \
185	((19 + (3 - ((n + 1 * ((m - 1) / 2)) & 3))) << 21)	| \
186	(t))
187
188/*
189 * This macro does a bulk copy with shifting.  cmplt and m are
190 * the completer and displacement multiplier, respectively, for
191 * the load and store instructions.  It is assumed that the
192 * word last loaded is already in %t4.
193 */
194#define _COPYS(src_spc, src_off, dst_spc, dst_off, count, cmplt, m) \
195								! \
196	/*							! \
197	 * Loop storing 16 bytes at a time.  Since count 	! \
198	 * may be > INT_MAX, we have to be careful and		! \
199	 * avoid comparisons that treat it as a signed 		! \
200	 * quantity, until after this loop, when count		! \
201	 * is guaranteed to be less than 16.			! \
202	 */							! \
203	comib,>>=,n	15, count, _LABEL(S_skip16)		! \
204.label _LABEL(S_loop16)						! \
205	addi		-16, count, count			! \
206	ldws,cmplt	m*4(src_spc, src_off), %t1		! \
207	ldws,cmplt	m*4(src_spc, src_off), %t2		! \
208	ldws,cmplt	m*4(src_spc, src_off), %t3		! \
209	_VSHD(0, m, 1)	/* vshd %t4, %t1, %r1 */		! \
210	ldws,cmplt	m*4(src_spc, src_off), %t4		! \
211	_VSHD(1, m, 22)	/* vshd %t1, %t2, %t1 */		! \
212	_VSHD(2, m, 21)	/* vshd %t2, %t3, %t2 */		! \
213	_VSHD(3, m, 20)	/* vshd %t3, %t4, %t3 */		! \
214	stws,cmplt	%r1, m*4(dst_spc, dst_off)		! \
215	stws,cmplt	%t1, m*4(dst_spc, dst_off)		! \
216	stws,cmplt	%t2, m*4(dst_spc, dst_off)		! \
217	comib,<<	15, count, _LABEL(S_loop16)		! \
218	stws,cmplt	%t3, m*4(dst_spc, dst_off)		! \
219.label _LABEL(S_skip16)						! \
220								! \
221	/* Loop storing 4 bytes at a time. */			! \
222	addib,<,n	-4, count, _LABEL(S_skip4)		! \
223.label _LABEL(S_loop4)						! \
224	ldws,cmplt	m*4(src_spc, src_off), %t1		! \
225	_VSHD(0, m, 1)	/* into %r1 (1) */			! \
226	copy		%t1, %t4				! \
227	addib,>=	-4, count, _LABEL(S_loop4)		! \
228	stws,cmplt	%r1, m*4(dst_spc, dst_off)		! \
229.label _LABEL(S_skip4)						! \
230								! \
231	/*							! \
232 	 * We now need to "back up" src_off by the		! \
233	 * number of bytes remaining in the FIFO		! \
234	 * (i.e., the number of bytes remaining in %t4),	! \
235	 * because (the correct) count still includes		! \
236	 * these bytes, and we intent to keep it that		! \
237	 * way, and finish with the single-byte copier.		! \
238	 *							! \
239	 * The number of bytes remaining in the FIFO is		! \
240	 * related to the shift count, so recover it,		! \
241	 * restoring the correct count at the same time.	! \
242	 */							! \
243	mfctl	%cr11, %t1					! \
244	addi	4, count, count					! \
245	shd	%r0, %t1, 3, %t1				! \
246								! \
247	/*							! \
248	 * If we're copying forward, the shift count		! \
249	 * is the number of bytes remaining in the		! \
250	 * FIFO, and we want to subtract it from src_off.	! \
251	 * If we're copying backwards, (4 - shift count)	! \
252	 * is the number of bytes remaining in the FIFO,	! \
253	 * and we want to add it to src_off.			! \
254	 *							! \
255	 * We observe that x + (4 - y) = x - (y - 4),		! \
256	 * and introduce this instruction to add -4 when	! \
257	 * m is -1, although this does mean one extra		! \
258	 * instruction in the forward case.			! \
259	 */							! \
260	addi	4*((m - 1) / 2), %t1, %t1			! \
261								! \
262	/* Now branch to the byte-at-a-time loop. */		! \
263	b	_LABEL(_do1)					! \
264	sub	src_off, %t1, src_off
265
266/*
267 * This macro copies a region in the forward direction.
268 */
269#define _COPY_FORWARD(src_spc, src_off, dst_spc, dst_off, count)  \
270								! \
271	/*							! \
272	 * Since in the shifting-left case we will		! \
273	 * load 8 bytes before checking count, to		! \
274	 * keep things simple, branch to the byte 		! \
275	 * copier unless we're copying at least 8.		! \
276	 */							! \
277	comib,>>,n	8, count, _LABEL(_do1)			! \
278								! \
279	/*							! \
280	 * Once we 4-byte align the source offset, 		! \
281	 * figure out how many bytes from the region		! \
282	 * will be in the first 4-byte word we read.		! \
283	 * Ditto for writing the destination offset.		! \
284	 */							! \
285	extru		src_off, 31, 2, %t1			! \
286	extru		dst_off, 31, 2, %t2			! \
287	subi		4, %t1, %t1				! \
288	subi		4, %t2, %t2				! \
289								! \
290	/*							! \
291	 * Calculate the byte shift required.  A 		! \
292	 * positive value means a source 4-byte word 		! \
293	 * has to be shifted to the right to line up 		! \
294	 * as a destination 4-byte word.			! \
295	 */							! \
296	sub		%t1, %t2, %t1				! \
297								! \
298	/* 4-byte align src_off. */				! \
299	depi		0, 31, 2, src_off			! \
300								! \
301	/*							! \
302	 * It's somewhat important to note that this		! \
303	 * code thinks of count as "the number of bytes		! \
304	 * that haven't been stored yet", as opposed to		! \
305	 * "the number of bytes that haven't been copied	! \
306	 * yet".  The distinction is subtle, but becomes	! \
307	 * apparent at the end of the shifting code, where	! \
308	 * we "back up" src_off to correspond to count,		! \
309	 * as opposed to flushing the FIFO.			! \
310	 *							! \
311	 * We calculated above how many bytes our first		! \
312	 * store will store, so update count now.		! \
313	 *							! \
314	 * If the shift is zero, strictly as an optimization	! \
315	 * we use a copy loop that does no shifting.		! \
316	 */							! \
317	comb,<>		%r0, %t1, _LABEL(_shifting)		! \
318	sub		count, %t2, count			! \
319								! \
320	/* Load and store the first word. */			! \
321	ldws,ma		4(src_spc, src_off), %t4		! \
322	stbys,b,m	%t4, 4(dst_spc, dst_off)		! \
323								! \
324	/* Do the rest of the copy. */				! \
325	_COPY(src_spc,src_off,dst_spc,dst_off,count,ma,1)	! \
326								! \
327.label _LABEL(_shifting)					! \
328								! \
329	/*							! \
330	 * If shift < 0, we need to shift words to the		! \
331	 * left.  Since we can't do this directly, we		! \
332	 * adjust the shift so it's a shift to the right	! \
333	 * and load the first word into the high word of	! \
334	 * the FIFO.  Otherwise, we load a zero into the	! \
335	 * high word of the FIFO.				! \
336	 */							! \
337	comb,<=		%r0, %t1, _LABEL(_shiftingrt)		! \
338	copy		%r0, %t3				! \
339	addi		4, %t1, %t1				! \
340	ldws,ma		4(src_spc, src_off), %t3		! \
341.label _LABEL(_shiftingrt)					! \
342								! \
343	/*							! \
344	 * Turn the shift byte count into a bit count,		! \
345	 * load the next word, set the Shift Amount 		! \
346	 * Register, and form and store the first word.		! \
347	 */							! \
348	sh3add		%t1, %r0, %t1				! \
349	ldws,ma		4(src_spc, src_off), %t4		! \
350	mtctl		%t1, %cr11				! \
351	vshd		%t3, %t4, %r1				! \
352	stbys,b,m	%r1, 4(dst_spc, dst_off)		! \
353								! \
354	/* Do the rest of the copy. */				! \
355	_COPYS(src_spc,src_off,dst_spc,dst_off,count,ma,1)
356
357/* This macro copies a region in the reverse direction. */
358#define _COPY_REVERSE(src_spc, src_off, dst_spc, dst_off, count)  \
359								! \
360	/* Immediately add count to both offsets. */		! \
361	add	src_off, count, src_off				! \
362	add	dst_off, count, dst_off				! \
363								! \
364	/*							! \
365	 * Since in the shifting-right case we 			! \
366	 * will load 8 bytes before checking 			! \
367	 * count, to keep things simple, branch 		! \
368	 * to the byte copier unless we're 			! \
369	 * copying at least 8 bytes.				! \
370	 */							! \
371	comib,>>,n	8, count, _LABEL(_do1)			! \
372								! \
373	/*							! \
374	 * Once we 4-byte align the source offset, 		! \
375	 * figure out how many bytes from the region		! \
376	 * will be in the first 4-byte word we read.		! \
377	 * Ditto for writing the destination offset.		! \
378	 */							! \
379	extru,<>	src_off, 31, 2, %t1			! \
380	ldi		4, %t1					! \
381	extru,<>	dst_off, 31, 2, %t2			! \
382	ldi		4, %t2					! \
383								! \
384	/*							! \
385	 * Calculate the byte shift required.  A 		! \
386	 * positive value means a source 4-byte 		! \
387	 * word has to be shifted to the right to 		! \
388	 * line up as a destination 4-byte word.		! \
389	 */							! \
390	sub		%t2, %t1, %t1				! \
391								! \
392	/*							! \
393	 * 4-byte align src_off, leaving it pointing 		! \
394	 * to the 4-byte word *after* the next word 		! \
395	 * we intend to load.					! \
396	 *							! \
397	 * It's somewhat important to note that this		! \
398	 * code thinks of count as "the number of bytes		! \
399	 * that haven't been stored yet", as opposed to		! \
400	 * "the number of bytes that haven't been copied	! \
401	 * yet".  The distinction is subtle, but becomes	! \
402	 * apparent at the end of the shifting code, where	! \
403	 * we "back up" src_off to correspond to count,		! \
404	 * as opposed to flushing the FIFO.			! \
405	 *							! \
406	 * We calculated above how many bytes our first		! \
407	 * store will store, so update count now.		! \
408	 *							! \
409	 * If the shift is zero, we use a copy loop that	! \
410	 * does no shifting.  NB: unlike the forward case,	! \
411	 * this is NOT strictly an optimization.  If the	! \
412	 * SAR is zero the vshds do NOT do the right thing.	! \
413	 * This is another assymetry more or less the "fault"	! \
414	 * of vshd.						! \
415	 */							! \
416	addi		3, src_off, src_off			! \
417	sub		count, %t2, count			! \
418	comb,<>		%r0, %t1, _LABEL(_shifting)		! \
419	depi		0, 31, 2, src_off			! \
420								! \
421	/* Load and store the first word. */			! \
422	ldws,mb		-4(src_spc, src_off), %t4		! \
423	_STBYS_E_M(%t4, dst_spc, dst_off)			! \
424								! \
425	/* Do the rest of the copy. */				! \
426	_COPY(src_spc,src_off,dst_spc,dst_off,count,mb,-1)	! \
427								! \
428.label _LABEL(_shifting)					! \
429								! \
430	/*							! \
431	 * If shift < 0, we need to shift words to the		! \
432	 * left.  Since we can't do this directly, we		! \
433	 * adjust the shift so it's a shift to the right	! \
434	 * and load a zero in to the low word of the FIFO.	! \
435	 * Otherwise, we load the first word into the		! \
436	 * low word of the FIFO.				! \
437	 *							! \
438	 * Note the nullification trickery here.  We 		! \
439	 * assume that we're shifting to the left, and		! \
440	 * load zero into the low word of the FIFO.  Then	! \
441	 * we nullify the addi if we're shifting to the		! \
442	 * right.  If the addi is not nullified, we are		! \
443 	 * shifting to the left, so we nullify the load.	! \
444	 * we branch if we're shifting to the 			! \
445	 */							! \
446	copy		%r0, %t3				! \
447	comb,<=,n	%r0, %t1, 0				! \
448	addi,tr		4, %t1, %t1				! \
449	ldws,mb		-4(src_spc, src_off), %t3		! \
450								! \
451	/*							! \
452	 * Turn the shift byte count into a bit count,		! \
453	 * load the next word, set the Shift Amount 		! \
454	 * Register, and form and store the first word.		! \
455	 */							! \
456	sh3add		%t1, %r0, %t1				! \
457	ldws,mb		-4(src_spc, src_off), %t4		! \
458	mtctl		%t1, %cr11				! \
459	vshd		%t4, %t3, %r1				! \
460	_STBYS_E_M(%r1, dst_spc, dst_off)			! \
461								! \
462	/* Do the rest of the copy. */				! \
463	_COPYS(src_spc,src_off,dst_spc,dst_off,count,mb,-1)
464
465/*
466 * For paranoia, when things aren't going well, enable this
467 * code to assemble byte-at-a-time-only copying.
468 */
469#if 1
470#undef _COPY_FORWARD
471#define _COPY_FORWARD(src_spc, src_off, dst_spc, dst_off, count)  \
472	comb,=,n	%r0, count, _LABEL(_done)		! \
473	ldbs,ma		1(src_spc, src_off), %r1		! \
474	addib,<>	-1, count, -12				! \
475	stbs,ma		%r1, 1(dst_spc, dst_off)		! \
476	b,n		_LABEL(_done)
477#undef _COPY_REVERSE
478#define _COPY_REVERSE(src_spc, src_off, dst_spc, dst_off, count)  \
479	comb,=		%r0, count, _LABEL(_done)		! \
480	add		src_off, count, src_off			! \
481	add		dst_off, count, dst_off			! \
482	ldbs,mb		-1(src_spc, src_off), %r1		! \
483	addib,<>	-1, count, -12				! \
484	stbs,mb		%r1, -1(dst_spc, dst_off)		! \
485	b,n		_LABEL(_done)
486#endif
487
488/*
489 * If none of the following are defined, define BCOPY.
490 */
491#if !(defined(SPCOPY) || defined(MEMCPY) || defined(MEMMOVE))
492#define BCOPY
493#endif
494
495#if defined(SPCOPY) && !defined(_STANDALONE)
496
497#include <sys/errno.h>
498#include "assym.h"
499
500/*
501 * int spcopy(pa_space_t ssp, const void *src, pa_space_t dsp, void *dst,
502 * 	size_t len)
503 *
504 * We assume that the regions do not overlap.
505 */
506LEAF_ENTRY(spcopy)
507
508        /*
509	 * Setup the fault handler, which will fill in %ret0 if triggered.
510	 */
511	GET_CURLWP(%r31)
512#ifdef	DIAGNOSTIC
513	comb,<>,n %r0, %r31, Lspcopy_curlwp_ok
514	ldil	L%panic, %r1
515	ldil	L%Lspcopy_curlwp_bad, %arg0
516	ldo	R%panic(%r1), %r1
517	ldo	R%Lspcopy_curlwp_bad(%arg0), %arg0
518	.call
519	bv,n    %r0(%r1)
520	nop
521Lspcopy_curlwp_bad:
522	.asciz	"spcopy: curlwp == NULL\n"
523	.align	8
524Lspcopy_curlwp_ok:
525#endif /* DIAGNOSTIC */
526	ldil    L%spcopy_fault, %r1
527	ldw     L_PCB(%r31), %r31
528	ldo     R%spcopy_fault(%r1), %r1
529	stw     %r1, PCB_ONFAULT(%r31)
530
531	/* Setup the space registers. */
532	mfsp	%sr2, %ret1
533	mtsp	%arg0, %sr1
534	mtsp	%arg2, %sr2
535
536	/* Get the len argument and do the copy. */
537	ldw	HPPA_FRAME_ARG(4)(%sp), %arg0
538#define	_LABEL(l) __CONCAT(spcopy,l)
539	_COPY_FORWARD(%sr1,%arg1,%sr2,%arg3,%arg0)
540_LABEL(_done):
541
542	/* Return. */
543	copy	%r0, %ret0
544ALTENTRY(spcopy_fault)
545	stw     %r0, PCB_ONFAULT(%r31)
546	bv	%r0(%rp)
547	mtsp	%ret1, %sr2
548EXIT(spcopy)
549#endif /* SPCOPY && !_STANDALONE */
550
551#ifdef MEMCPY
552/*
553 * void *memcpy(void *restrict dst, const void *restrict src, size_t len);
554 *
555 * memcpy is specifically restricted to working on
556 * non-overlapping regions, so we can just copy forward.
557 */
558LEAF_ENTRY(memcpy)
559	copy	%arg0, %ret0
560#define	_LABEL(l) __CONCAT(memcpy,l)
561	_COPY_FORWARD(%sr0,%arg1,%sr0,%arg0,%arg2)
562_LABEL(_done):
563	bv,n	%r0(%rp)
564	nop
565EXIT(memcpy)
566#endif /* MEMCPY */
567
568#ifdef BCOPY
569/*
570 * void bcopy(const void *src, void *dst, size_t len);
571 */
572LEAF_ENTRY(bcopy)
573	copy	%arg0, %r1
574	copy	%arg1, %arg0
575	copy	%r1, %arg1
576	/* FALLTHROUGH */
577#define _LABEL_F(l) __CONCAT(bcopy_F,l)
578#define _LABEL_R(l) __CONCAT(bcopy_R,l)
579#endif
580
581#ifdef MEMMOVE
582/*
583 * void *memmove(void *dst, const void *src, size_t len);
584 */
585LEAF_ENTRY(memmove)
586#define _LABEL_F(l) __CONCAT(memmove_F,l)
587#define _LABEL_R(l) __CONCAT(memmove_R,l)
588	copy	%arg0, %ret0
589#endif /* MEMMOVE */
590
591#if defined(BCOPY) || defined(MEMMOVE)
592
593	/*
594	 * If src >= dst or src + len <= dst, we copy
595	 * forward, else we copy in reverse.
596	 */
597	add		%arg1, %arg2, %r1
598	comb,>>=,n	%arg1, %arg0, 0
599	comb,>>,n	%r1, %arg0, _LABEL_R(_go)
600
601#define _LABEL _LABEL_F
602	_COPY_FORWARD(%sr0,%arg1,%sr0,%arg0,%arg2)
603#undef _LABEL
604
605_LABEL_R(_go):
606#define _LABEL _LABEL_R
607	_COPY_REVERSE(%sr0,%arg1,%sr0,%arg0,%arg2)
608#undef _LABEL
609
610_LABEL_F(_done):
611_LABEL_R(_done):
612	bv,n	%r0(%rp)
613	nop
614#ifdef BCOPY
615EXIT(bcopy)
616#else
617EXIT(memmove)
618#endif
619#endif /* BCOPY || MEMMOVE */
620