xref: /netbsd-src/sys/arch/sparc64/sparc64/copy.S (revision ca08b3e761e6fc5454aa5d028935f5d82d92ea3d)
1/*	$NetBSD: copy.S,v 1.9 2020/06/30 16:20:02 maxv Exp $	*/
2
3/*
4 * Copyright (c) 2006-2010 Matthew R. Green
5 * Copyright (c) 1996-2002 Eduardo Horvath
6 * Copyright (c) 1996 Paul Kranenburg
7 * Copyright (c) 1996
8 * 	The President and Fellows of Harvard College.
9 *	All rights reserved.
10 * Copyright (c) 1992, 1993
11 *	The Regents of the University of California.
12 *	All rights reserved.
13 *
14 * This software was developed by the Computer Systems Engineering group
15 * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and
16 * contributed to Berkeley.
17 *
18 * All advertising materials mentioning features or use of this software
19 * must display the following acknowledgement:
20 *	This product includes software developed by the University of
21 *	California, Lawrence Berkeley Laboratory.
22 *	This product includes software developed by Harvard University.
23 *
24 * Redistribution and use in source and binary forms, with or without
25 * modification, are permitted provided that the following conditions
26 * are met:
27
28 * 1. Redistributions of source code must retain the above copyright
29 *    notice, this list of conditions and the following disclaimer.
30 * 2. Redistributions in binary form must reproduce the above copyright
31 *    notice, this list of conditions and the following disclaimer in the
32 *    documentation and/or other materials provided with the
33 *    distribution.
34 * 3. All advertising materials mentioning features or use of this
35 *    software must display the following acknowledgement:
36 *	This product includes software developed by the University of
37 *	California, Berkeley and its contributors.
38 *	This product includes software developed by Harvard University.
39 *	This product includes software developed by Paul Kranenburg.
40 * 4. Neither the name of the University nor the names of its
41 *    contributors may be used to endorse or promote products derived
42 *    from this software without specific prior written permission.
43 *
44 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS''
45 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
46 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
47 * PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR
48 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
49 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
50 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
51 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
52 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
53 * TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
54 * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
55 * DAMAGE.
56 *
57 *	@(#)locore.s	8.4 (Berkeley) 12/10/93
58 */
59
60
61#include "opt_ddb.h"
62#include "opt_kgdb.h"
63#include "opt_multiprocessor.h"
64#include "opt_compat_netbsd.h"
65#include "opt_compat_netbsd32.h"
66#include "opt_lockdebug.h"
67
68#include "assym.h"
69#include <machine/param.h>
70#include <machine/ctlreg.h>
71#include <machine/asm.h>
72#include <machine/locore.h>
73#include <machine/psl.h>
74
75#include "ksyms.h"
76
77	.register	%g2,#scratch
78	.register	%g3,#scratch
79
80/*
81 * copyinstr(fromaddr, toaddr, maxlength, &lencopied)
82 *
83 * Copy a null terminated string from the user address space into
84 * the kernel address space.
85 */
86ENTRY(copyinstr)
87	! %o0 = fromaddr, %o1 = toaddr, %o2 = maxlen, %o3 = &lencopied
88#ifdef NOTDEF_DEBUG
89	save	%sp, -CC64FSZ, %sp
90	set	8f, %o0
91	mov	%i0, %o1
92	mov	%i1, %o2
93	mov	%i2, %o3
94	call	printf
95	 mov	%i3, %o4
96	restore
97	.data
988:	.asciz	"copyinstr: from=%x to=%x max=%x &len=%x\n"
99	_ALIGN
100	.text
101#endif
102	brgz,pt	%o2, 1f					! Make sure len is valid
103	 sethi	%hi(CPCB), %o4		! (first instr of copy)
104	retl
105	 mov	ENAMETOOLONG, %o0
1061:
107	LDPTR	[%o4 + %lo(CPCB)], %o4	! catch faults
108	set	Lcsdone, %o5
109	membar	#Sync
110	STPTR	%o5, [%o4 + PCB_ONFAULT]
111
112	mov	%o1, %o5		!	save = toaddr;
113! XXX should do this in bigger chunks when possible
1140:					! loop:
115	ldsba	[%o0] ASI_AIUS, %g1	!	c = *fromaddr;
116	stb	%g1, [%o1]		!	*toaddr++ = c;
117	inc	%o1
118	brz,a,pn	%g1, Lcsdone	!	if (c == NULL)
119	 clr	%o0			!		{ error = 0; done; }
120	deccc	%o2			!	if (--len > 0) {
121	bg,pt	%icc, 0b		!		fromaddr++;
122	 inc	%o0			!		goto loop;
123	ba,pt	%xcc, Lcsdone		!	}
124	 mov	ENAMETOOLONG, %o0	!	error = ENAMETOOLONG;
125	NOTREACHED
126
127/*
128 * copyoutstr(fromaddr, toaddr, maxlength, &lencopied)
129 *
130 * Copy a null terminated string from the kernel
131 * address space to the user address space.
132 */
133ENTRY(copyoutstr)
134	! %o0 = fromaddr, %o1 = toaddr, %o2 = maxlen, %o3 = &lencopied
135#ifdef NOTDEF_DEBUG
136	save	%sp, -CC64FSZ, %sp
137	set	8f, %o0
138	mov	%i0, %o1
139	mov	%i1, %o2
140	mov	%i2, %o3
141	call	printf
142	 mov	%i3, %o4
143	restore
144	.data
1458:	.asciz	"copyoutstr: from=%x to=%x max=%x &len=%x\n"
146	_ALIGN
147	.text
148#endif
149	brgz,pt	%o2, 1f					! Make sure len is valid
150	 sethi	%hi(CPCB), %o4		! (first instr of copy)
151	retl
152	 mov	ENAMETOOLONG, %o0
1531:
154	LDPTR	[%o4 + %lo(CPCB)], %o4	! catch faults
155	set	Lcsdone, %o5
156	membar	#Sync
157	STPTR	%o5, [%o4 + PCB_ONFAULT]
158
159	mov	%o1, %o5		!	save = toaddr;
160! XXX should do this in bigger chunks when possible
1610:					! loop:
162	ldsb	[%o0], %g1		!	c = *fromaddr;
163	stba	%g1, [%o1] ASI_AIUS	!	*toaddr++ = c;
164	inc	%o1
165	brz,a,pn	%g1, Lcsdone	!	if (c == NULL)
166	 clr	%o0			!		{ error = 0; done; }
167	deccc	%o2			!	if (--len > 0) {
168	bg,pt	%icc, 0b		!		fromaddr++;
169	 inc	%o0			!		goto loop;
170					!	}
171	mov	ENAMETOOLONG, %o0	!	error = ENAMETOOLONG;
172Lcsdone:				! done:
173	sub	%o1, %o5, %o1		!	len = to - save;
174	brnz,a	%o3, 1f			!	if (lencopied)
175	 STPTR	%o1, [%o3]		!		*lencopied = len;
1761:
177	retl				! cpcb->pcb_onfault = 0;
178	 STPTR	%g0, [%o4 + PCB_ONFAULT]! return (error);
179
180
181/*
182 * copyin(src, dst, len)
183 *
184 * Copy specified amount of data from user space into the kernel.
185 *
186 * This is a modified version of memcpy that uses ASI_AIUS.  When
187 * memcpy is optimized to use block copy ASIs, this should be also.
188 */
189
190ENTRY(copyin)
191!	flushw			! Make sure we don't have stack probs & lose hibits of %o
192#ifdef NOTDEF_DEBUG
193	save	%sp, -CC64FSZ, %sp
194	set	1f, %o0
195	mov	%i0, %o1
196	mov	%i1, %o2
197	call	printf
198	 mov	%i2, %o3
199	restore
200	.data
2011:	.asciz	"copyin: src=%x dest=%x len=%x\n"
202	_ALIGN
203	.text
204#endif
205	sethi	%hi(CPCB), %o3
206	wr	%g0, ASI_AIUS, %asi
207	LDPTR	[%o3 + %lo(CPCB)], %o3
208	set	Lcopyfault, %o4
209!	mov	%o7, %g7		! save return address
210	membar	#Sync
211	STPTR	%o4, [%o3 + PCB_ONFAULT]
212	cmp	%o2, BCOPY_SMALL
213Lcopyin_start:
214	bge,a	Lcopyin_fancy	! if >= this many, go be fancy.
215	 btst	7, %o0		! (part of being fancy)
216
217	/*
218	 * Not much to copy, just do it a byte at a time.
219	 */
220	deccc	%o2		! while (--len >= 0)
221	bl	1f
2220:
223	 inc	%o0
224	ldsba	[%o0 - 1] %asi, %o4!	*dst++ = (++src)[-1];
225	stb	%o4, [%o1]
226	deccc	%o2
227	bge	0b
228	 inc	%o1
2291:
230	ba	Lcopyin_done
231	 clr	%o0
232	NOTREACHED
233
234	/*
235	 * Plenty of data to copy, so try to do it optimally.
236	 */
237Lcopyin_fancy:
238	! check for common case first: everything lines up.
239!	btst	7, %o0		! done already
240	bne	1f
241	 .empty
242	btst	7, %o1
243	be,a	Lcopyin_doubles
244	 dec	8, %o2		! if all lined up, len -= 8, goto copyin_doubes
245
246	! If the low bits match, we can make these line up.
2471:
248	xor	%o0, %o1, %o3	! t = src ^ dst;
249	btst	1, %o3		! if (t & 1) {
250	be,a	1f
251	 btst	1, %o0		! [delay slot: if (src & 1)]
252
253	! low bits do not match, must copy by bytes.
2540:
255	ldsba	[%o0] %asi, %o4	!	do {
256	inc	%o0		!		(++dst)[-1] = *src++;
257	inc	%o1
258	deccc	%o2
259	bnz	0b		!	} while (--len != 0);
260	 stb	%o4, [%o1 - 1]
261	ba	Lcopyin_done
262	 clr	%o0
263	NOTREACHED
264
265	! lowest bit matches, so we can copy by words, if nothing else
2661:
267	be,a	1f		! if (src & 1) {
268	 btst	2, %o3		! [delay slot: if (t & 2)]
269
270	! although low bits match, both are 1: must copy 1 byte to align
271	ldsba	[%o0] %asi, %o4	!	*dst++ = *src++;
272	stb	%o4, [%o1]
273	inc	%o0
274	inc	%o1
275	dec	%o2		!	len--;
276	btst	2, %o3		! } [if (t & 2)]
2771:
278	be,a	1f		! if (t & 2) {
279	 btst	2, %o0		! [delay slot: if (src & 2)]
280	dec	2, %o2		!	len -= 2;
2810:
282	ldsha	[%o0] %asi, %o4	!	do {
283	sth	%o4, [%o1]	!		*(short *)dst = *(short *)src;
284	inc	2, %o0		!		dst += 2, src += 2;
285	deccc	2, %o2		!	} while ((len -= 2) >= 0);
286	bge	0b
287	 inc	2, %o1
288	b	Lcopyin_mopb	!	goto mop_up_byte;
289	 btst	1, %o2		! } [delay slot: if (len & 1)]
290	NOTREACHED
291
292	! low two bits match, so we can copy by longwords
2931:
294	be,a	1f		! if (src & 2) {
295	 btst	4, %o3		! [delay slot: if (t & 4)]
296
297	! although low 2 bits match, they are 10: must copy one short to align
298	ldsha	[%o0] %asi, %o4	!	(*short *)dst = *(short *)src;
299	sth	%o4, [%o1]
300	inc	2, %o0		!	dst += 2;
301	inc	2, %o1		!	src += 2;
302	dec	2, %o2		!	len -= 2;
303	btst	4, %o3		! } [if (t & 4)]
3041:
305	be,a	1f		! if (t & 4) {
306	 btst	4, %o0		! [delay slot: if (src & 4)]
307	dec	4, %o2		!	len -= 4;
3080:
309	lduwa	[%o0] %asi, %o4	!	do {
310	st	%o4, [%o1]	!		*(int *)dst = *(int *)src;
311	inc	4, %o0		!		dst += 4, src += 4;
312	deccc	4, %o2		!	} while ((len -= 4) >= 0);
313	bge	0b
314	 inc	4, %o1
315	b	Lcopyin_mopw	!	goto mop_up_word_and_byte;
316	 btst	2, %o2		! } [delay slot: if (len & 2)]
317	NOTREACHED
318
319	! low three bits match, so we can copy by doublewords
3201:
321	be	1f		! if (src & 4) {
322	 dec	8, %o2		! [delay slot: len -= 8]
323	lduwa	[%o0] %asi, %o4	!	*(int *)dst = *(int *)src;
324	st	%o4, [%o1]
325	inc	4, %o0		!	dst += 4, src += 4, len -= 4;
326	inc	4, %o1
327	dec	4, %o2		! }
3281:
329Lcopyin_doubles:
330	ldxa	[%o0] %asi, %g1	! do {
331	stx	%g1, [%o1]	!	*(double *)dst = *(double *)src;
332	inc	8, %o0		!	dst += 8, src += 8;
333	deccc	8, %o2		! } while ((len -= 8) >= 0);
334	bge	Lcopyin_doubles
335	 inc	8, %o1
336
337	! check for a usual case again (save work)
338	btst	7, %o2		! if ((len & 7) == 0)
339	be	Lcopyin_done	!	goto copyin_done;
340
341	 btst	4, %o2		! if ((len & 4)) == 0)
342	be,a	Lcopyin_mopw	!	goto mop_up_word_and_byte;
343	 btst	2, %o2		! [delay slot: if (len & 2)]
344	lduwa	[%o0] %asi, %o4	!	*(int *)dst = *(int *)src;
345	st	%o4, [%o1]
346	inc	4, %o0		!	dst += 4;
347	inc	4, %o1		!	src += 4;
348	btst	2, %o2		! } [if (len & 2)]
349
3501:
351	! mop up trailing word (if present) and byte (if present).
352Lcopyin_mopw:
353	be	Lcopyin_mopb	! no word, go mop up byte
354	 btst	1, %o2		! [delay slot: if (len & 1)]
355	ldsha	[%o0] %asi, %o4	! *(short *)dst = *(short *)src;
356	be	Lcopyin_done	! if ((len & 1) == 0) goto done;
357	 sth	%o4, [%o1]
358	ldsba	[%o0 + 2] %asi, %o4	! dst[2] = src[2];
359	stb	%o4, [%o1 + 2]
360	ba	Lcopyin_done
361	 clr	%o0
362	NOTREACHED
363
364	! mop up trailing byte (if present).
365Lcopyin_mopb:
366	be,a	Lcopyin_done
367	 nop
368	ldsba	[%o0] %asi, %o4
369	stb	%o4, [%o1]
370
371Lcopyin_done:
372	sethi	%hi(CPCB), %o3
373!	stb	%o4,[%o1]	! Store last byte -- should not be needed
374	LDPTR	[%o3 + %lo(CPCB)], %o3
375	membar	#Sync
376	STPTR	%g0, [%o3 + PCB_ONFAULT]
377	wr	%g0, ASI_PRIMARY_NOFAULT, %asi		! Restore ASI
378	retl
379	 clr	%o0			! return 0
380
381/*
382 * copyout(src, dst, len)
383 *
384 * Copy specified amount of data from kernel to user space.
385 * Just like copyin, except that the `dst' addresses are user space
386 * rather than the `src' addresses.
387 *
388 * This is a modified version of memcpy that uses ASI_AIUS.  When
389 * memcpy is optimized to use block copy ASIs, this should be also.
390 */
391 /*
392  * This needs to be reimplemented to really do the copy.
393  */
394ENTRY(copyout)
395	/*
396	 * ******NOTE****** this depends on memcpy() not using %g7
397	 */
398#ifdef NOTDEF_DEBUG
399	save	%sp, -CC64FSZ, %sp
400	set	1f, %o0
401	mov	%i0, %o1
402	set	CTX_SECONDARY, %o4
403	mov	%i1, %o2
404	ldxa	[%o4] ASI_DMMU, %o4
405	call	printf
406	 mov	%i2, %o3
407	restore
408	.data
4091:	.asciz	"copyout: src=%x dest=%x len=%x ctx=%d\n"
410	_ALIGN
411	.text
412#endif
413Ldocopy:
414	sethi	%hi(CPCB), %o3
415	wr	%g0, ASI_AIUS, %asi
416	LDPTR	[%o3 + %lo(CPCB)], %o3
417	set	Lcopyfault, %o4
418!	mov	%o7, %g7		! save return address
419	membar	#Sync
420	STPTR	%o4, [%o3 + PCB_ONFAULT]
421	cmp	%o2, BCOPY_SMALL
422Lcopyout_start:
423	membar	#StoreStore
424	bge,a	Lcopyout_fancy	! if >= this many, go be fancy.
425	 btst	7, %o0		! (part of being fancy)
426
427	/*
428	 * Not much to copy, just do it a byte at a time.
429	 */
430	deccc	%o2		! while (--len >= 0)
431	bl	1f
432	 .empty
4330:
434	inc	%o0
435	ldsb	[%o0 - 1], %o4	! (++dst)[-1] = *src++;
436	stba	%o4, [%o1] %asi
437	deccc	%o2
438	bge	0b
439	 inc	%o1
4401:
441	ba	Lcopyout_done
442	 clr	%o0
443	NOTREACHED
444
445	/*
446	 * Plenty of data to copy, so try to do it optimally.
447	 */
448Lcopyout_fancy:
449	! check for common case first: everything lines up.
450!	btst	7, %o0		! done already
451	bne	1f
452	 .empty
453	btst	7, %o1
454	be,a	Lcopyout_doubles
455	 dec	8, %o2		! if all lined up, len -= 8, goto copyout_doubes
456
457	! If the low bits match, we can make these line up.
4581:
459	xor	%o0, %o1, %o3	! t = src ^ dst;
460	btst	1, %o3		! if (t & 1) {
461	be,a	1f
462	 btst	1, %o0		! [delay slot: if (src & 1)]
463
464	! low bits do not match, must copy by bytes.
4650:
466	ldsb	[%o0], %o4	!	do {
467	inc	%o0		!		(++dst)[-1] = *src++;
468	inc	%o1
469	deccc	%o2
470	bnz	0b		!	} while (--len != 0);
471	 stba	%o4, [%o1 - 1] %asi
472	ba	Lcopyout_done
473	 clr	%o0
474	NOTREACHED
475
476	! lowest bit matches, so we can copy by words, if nothing else
4771:
478	be,a	1f		! if (src & 1) {
479	 btst	2, %o3		! [delay slot: if (t & 2)]
480
481	! although low bits match, both are 1: must copy 1 byte to align
482	ldsb	[%o0], %o4	!	*dst++ = *src++;
483	stba	%o4, [%o1] %asi
484	inc	%o0
485	inc	%o1
486	dec	%o2		!	len--;
487	btst	2, %o3		! } [if (t & 2)]
4881:
489	be,a	1f		! if (t & 2) {
490	 btst	2, %o0		! [delay slot: if (src & 2)]
491	dec	2, %o2		!	len -= 2;
4920:
493	ldsh	[%o0], %o4	!	do {
494	stha	%o4, [%o1] %asi	!		*(short *)dst = *(short *)src;
495	inc	2, %o0		!		dst += 2, src += 2;
496	deccc	2, %o2		!	} while ((len -= 2) >= 0);
497	bge	0b
498	 inc	2, %o1
499	b	Lcopyout_mopb	!	goto mop_up_byte;
500	 btst	1, %o2		! } [delay slot: if (len & 1)]
501	NOTREACHED
502
503	! low two bits match, so we can copy by longwords
5041:
505	be,a	1f		! if (src & 2) {
506	 btst	4, %o3		! [delay slot: if (t & 4)]
507
508	! although low 2 bits match, they are 10: must copy one short to align
509	ldsh	[%o0], %o4	!	(*short *)dst = *(short *)src;
510	stha	%o4, [%o1] %asi
511	inc	2, %o0		!	dst += 2;
512	inc	2, %o1		!	src += 2;
513	dec	2, %o2		!	len -= 2;
514	btst	4, %o3		! } [if (t & 4)]
5151:
516	be,a	1f		! if (t & 4) {
517	 btst	4, %o0		! [delay slot: if (src & 4)]
518	dec	4, %o2		!	len -= 4;
5190:
520	lduw	[%o0], %o4	!	do {
521	sta	%o4, [%o1] %asi	!		*(int *)dst = *(int *)src;
522	inc	4, %o0		!		dst += 4, src += 4;
523	deccc	4, %o2		!	} while ((len -= 4) >= 0);
524	bge	0b
525	 inc	4, %o1
526	b	Lcopyout_mopw	!	goto mop_up_word_and_byte;
527	 btst	2, %o2		! } [delay slot: if (len & 2)]
528	NOTREACHED
529
530	! low three bits match, so we can copy by doublewords
5311:
532	be	1f		! if (src & 4) {
533	 dec	8, %o2		! [delay slot: len -= 8]
534	lduw	[%o0], %o4	!	*(int *)dst = *(int *)src;
535	sta	%o4, [%o1] %asi
536	inc	4, %o0		!	dst += 4, src += 4, len -= 4;
537	inc	4, %o1
538	dec	4, %o2		! }
5391:
540Lcopyout_doubles:
541	ldx	[%o0], %g1	! do {
542	stxa	%g1, [%o1] %asi	!	*(double *)dst = *(double *)src;
543	inc	8, %o0		!	dst += 8, src += 8;
544	deccc	8, %o2		! } while ((len -= 8) >= 0);
545	bge	Lcopyout_doubles
546	 inc	8, %o1
547
548	! check for a usual case again (save work)
549	btst	7, %o2		! if ((len & 7) == 0)
550	be	Lcopyout_done	!	goto copyout_done;
551
552	 btst	4, %o2		! if ((len & 4)) == 0)
553	be,a	Lcopyout_mopw	!	goto mop_up_word_and_byte;
554	 btst	2, %o2		! [delay slot: if (len & 2)]
555	lduw	[%o0], %o4	!	*(int *)dst = *(int *)src;
556	sta	%o4, [%o1] %asi
557	inc	4, %o0		!	dst += 4;
558	inc	4, %o1		!	src += 4;
559	btst	2, %o2		! } [if (len & 2)]
560
5611:
562	! mop up trailing word (if present) and byte (if present).
563Lcopyout_mopw:
564	be	Lcopyout_mopb	! no word, go mop up byte
565	 btst	1, %o2		! [delay slot: if (len & 1)]
566	ldsh	[%o0], %o4	! *(short *)dst = *(short *)src;
567	be	Lcopyout_done	! if ((len & 1) == 0) goto done;
568	 stha	%o4, [%o1] %asi
569	ldsb	[%o0 + 2], %o4	! dst[2] = src[2];
570	stba	%o4, [%o1 + 2] %asi
571	ba	Lcopyout_done
572	 clr	%o0
573	NOTREACHED
574
575	! mop up trailing byte (if present).
576Lcopyout_mopb:
577	be,a	Lcopyout_done
578	 nop
579	ldsb	[%o0], %o4
580	stba	%o4, [%o1] %asi
581
582Lcopyout_done:
583	sethi	%hi(CPCB), %o3
584	LDPTR	[%o3 + %lo(CPCB)], %o3
585	membar	#Sync
586	STPTR	%g0, [%o3 + PCB_ONFAULT]
587!	jmp	%g7 + 8		! Original instr
588	wr	%g0, ASI_PRIMARY_NOFAULT, %asi		! Restore ASI
589	membar	#StoreStore|#StoreLoad
590	retl			! New instr
591	 clr	%o0			! return 0
592
593! Copyin or copyout fault.  Clear cpcb->pcb_onfault and return error.
594! Note that although we were in memcpy, there is no state to clean up;
595! the only special thing is that we have to return to [g7 + 8] rather than
596! [o7 + 8].
597Lcopyfault:
598	sethi	%hi(CPCB), %o3
599	LDPTR	[%o3 + %lo(CPCB)], %o3
600	STPTR	%g0, [%o3 + PCB_ONFAULT]
601	membar	#StoreStore|#StoreLoad
602#ifdef NOTDEF_DEBUG
603	save	%sp, -CC64FSZ, %sp
604	set	1f, %o0
605	call	printf
606	 nop
607	restore
608	.data
6091:	.asciz	"copyfault: fault occurred\n"
610	_ALIGN
611	.text
612#endif
613	retl
614	 wr	%g0, ASI_PRIMARY_NOFAULT, %asi		! Restore ASI
615
616/*
617 * Compare-and-swap the pointer in the user-space.
618 *
619 * int	_ucas_32(volatile uint32_t *uptr, uint32_t old, uint32_t new,
620 *		 uint32_t *ret);
621 */
622ENTRY(_ucas_32)
623	sethi	%hi(CPCB), %o4
624	wr	%g0, ASI_AIUS, %asi
625	LDPTR	[%o4 + %lo(CPCB)], %o4
626	set	Lcopyfault, %o5				! reusing copyin/copyout
627	membar	#Sync					! fault handler
628	STPTR	%o5, [%o4 + PCB_ONFAULT]
629
630	casa	[%o0] %asi, %o1, %o2			! cas, result in %o2
631
632	sethi	%hi(CPCB), %o4
633	LDPTR	[%o4 + %lo(CPCB)], %o4
634	membar	#Sync
635	STPTR	%g0, [%o4 + PCB_ONFAULT]
636	wr	%g0, ASI_PRIMARY_NOFAULT, %asi		! Restore ASI
637	membar	#StoreStore|#StoreLoad
638	stw	%o2, [%o3]		! store the cas result in *result
639	retl
640	 clr	%o0			! return 0
641
642#ifdef _LP64
643/*
644 * Compare-and-swap the 64-bit integer in the user-space.
645 *
646 * int	_ucas_64(volatile uint64_t *uptr, uint64_t old, uint64_t new,
647 *		 uint64_t *ret);
648 */
649ENTRY(_ucas_64)
650	sethi	%hi(CPCB), %o4
651	wr	%g0, ASI_AIUS, %asi
652	LDPTR	[%o4 + %lo(CPCB)], %o4
653	set	Lcopyfault, %o5				! reusing copyin/copyout
654	membar	#Sync					! fault handler
655	STPTR	%o5, [%o4 + PCB_ONFAULT]
656
657	casxa	[%o0] %asi, %o1, %o2			! cas, result in %o2
658
659	sethi	%hi(CPCB), %o4
660	LDPTR	[%o4 + %lo(CPCB)], %o4
661	membar	#Sync
662	STPTR	%g0, [%o4 + PCB_ONFAULT]
663	wr	%g0, ASI_PRIMARY_NOFAULT, %asi		! Restore ASI
664	membar	#StoreStore|#StoreLoad
665	stx	%o2, [%o3]		! store the cas result in *result
666	retl
667	 clr	%o0			! return 0
668#endif /* _LP64 */
669
670/**************************************************************************/
671
672#define	UFETCHSTORE_PROLOGUE						 \
673	sethi	%hi(CPCB), %o2		/* cpcb->pcb_onfault =   */ 	;\
674	LDPTR	[%o2 + %lo(CPCB)], %o2	/*    Lufetchstore_fault */ 	;\
675	set	Lufetchstore_fault, %o3				 	;\
676	STPTR	%o3, [%o2 + PCB_ONFAULT]			 	;\
677	membar	#Sync
678
679#define	_UFETCHSTORE_EPILOGUE						 \
680	STPTR	%g0, [%o2 + PCB_ONFAULT] /* cpcb->pcb_onfault = NULL */	;\
681	membar	#StoreStore|#StoreLoad
682
683#define	UFETCHSTORE_EPILOGUE						 \
684	membar	#Sync							;\
685	_UFETCHSTORE_EPILOGUE
686
687#define	UFETCHSTORE_RETURN_SUCCESS					 \
688	retl								;\
689	 clr	%o0
690
691/* LINTSTUB: int _ufetch_8(const uint8_t *uaddr, uint8_t *valp); */
692ENTRY(_ufetch_8)
693	UFETCHSTORE_PROLOGUE
694	lduba	[%o0] ASI_AIUS, %o0	! %o0 = *uaddr
695	UFETCHSTORE_EPILOGUE
696	stb	%o0, [%o1]		! *valp = %o0
697	UFETCHSTORE_RETURN_SUCCESS
698
699/* LINTSTUB: int _ufetch_16(const uint16_t *uaddr, uint16_t *valp); */
700ENTRY(_ufetch_16)
701	UFETCHSTORE_PROLOGUE
702	lduha	[%o0] ASI_AIUS, %o0	! %o0 = *uaddr
703	UFETCHSTORE_EPILOGUE
704	sth	%o0, [%o1]		! *valp = %o0
705	UFETCHSTORE_RETURN_SUCCESS
706
707/* LINTSTUB: int _ufetch_32(const uint32_t *uaddr, uint32_t *valp); */
708ENTRY(_ufetch_32)
709	UFETCHSTORE_PROLOGUE
710	lduwa	[%o0] ASI_AIUS, %o0	! %o0 = *uaddr
711	UFETCHSTORE_EPILOGUE
712	stw	%o0, [%o1]		! *valp = %o0
713	UFETCHSTORE_RETURN_SUCCESS
714
715#ifdef _LP64
716/* LINTSTUB: int _ufetch_64(const uint64_t *uaddr, uint64_t *valp); */
717ENTRY(_ufetch_64)
718	UFETCHSTORE_PROLOGUE
719	ldxa	[%o0] ASI_AIUS, %o0	! %o0 = *uaddr
720	UFETCHSTORE_EPILOGUE
721	stx	%o0, [%o1]		! *valp = %o0
722	UFETCHSTORE_RETURN_SUCCESS
723#endif /* _LP64 */
724
725/* LINTSTUB: int _ustore_8(uint8_t *uaddr, uint8_t val); */
726ENTRY(_ustore_8)
727	UFETCHSTORE_PROLOGUE
728	stba	%o1, [%o0] ASI_AIUS 	! *uaddr = val
729	UFETCHSTORE_EPILOGUE
730	UFETCHSTORE_RETURN_SUCCESS
731
732/* LINTSTUB: int _ustore_16(uint16_t *uaddr, uint16_t val); */
733ENTRY(_ustore_16)
734	UFETCHSTORE_PROLOGUE
735	stha	%o1, [%o0] ASI_AIUS	! *uaddr = val
736	UFETCHSTORE_EPILOGUE
737	UFETCHSTORE_RETURN_SUCCESS
738
739/* LINTSTUB: int _ustore_32(uint32_t *uaddr, uint32_t val); */
740ENTRY(_ustore_32)
741	UFETCHSTORE_PROLOGUE
742	stwa	%o1, [%o0] ASI_AIUS	! *uaddr = val
743	UFETCHSTORE_EPILOGUE
744	UFETCHSTORE_RETURN_SUCCESS
745
746#ifdef _LP64
747/* LINTSTUB: int _ustore_64(uint64_t *uaddr, uint64_t val); */
748ENTRY(_ustore_64)
749	UFETCHSTORE_PROLOGUE
750	stxa	%o1, [%o0] ASI_AIUS	! *uaddr = val
751	UFETCHSTORE_EPILOGUE
752	UFETCHSTORE_RETURN_SUCCESS
753#endif /* _LP64 */
754
755Lufetchstore_fault:
756	_UFETCHSTORE_EPILOGUE		! (won't fit in delay slot)
757	retl
758	 nop				! error already in %o0
759
760/**************************************************************************/
761
762/* probeget and probeset are meant to be used during autoconfiguration */
763/*
764 * The following probably need to be changed, but to what I don't know.
765 */
766
767/*
768 * uint64_t
769 * probeget(addr, asi, size)
770 *	paddr_t addr;
771 *	int asi;
772 *	int size;
773 *
774 * Read or write a (byte,word,longword) from the given address.
775 * Like {fu,su}{byte,halfword,word} but our caller is supposed
776 * to know what he is doing... the address can be anywhere.
777 *
778 * We optimize for space, rather than time, here.
779 */
780ENTRY(probeget)
781#ifndef _LP64
782	!! Shuffle the args around into LP64 format
783	COMBINE(%o0, %o1, %o0)
784	mov	%o2, %o1
785	mov	%o3, %o2
786#endif
787	mov	%o2, %o4
788	! %o0 = addr, %o1 = asi, %o4 = (1,2,4)
789	sethi	%hi(CPCB), %o2
790	LDPTR	[%o2 + %lo(CPCB)], %o2	! cpcb->pcb_onfault = Lfsbail;
791#ifdef _LP64
792	set	_C_LABEL(Lfsbail), %o5
793#else
794	set	_C_LABEL(Lfsprobe), %o5
795#endif
796	STPTR	%o5, [%o2 + PCB_ONFAULT]
797	or	%o0, 0x9, %o3		! if (PHYS_ASI(asi)) {
798	sub	%o3, 0x1d, %o3
799	brz,a	%o3, 0f
800	 mov	%g0, %o5
801	DLFLUSH(%o0,%o5)		!	flush cache line
802					! }
8030:
804#ifndef _LP64
805	rdpr	%pstate, %g1
806	wrpr	%g1, PSTATE_AM, %pstate
807#endif
808	btst	1, %o4
809	wr	%o1, 0, %asi
810	membar	#Sync
811	bz	0f			! if (len & 1)
812	 btst	2, %o4
813	ba,pt	%icc, 1f
814	 lduba	[%o0] %asi, %o0		!	value = *(char *)addr;
8150:
816	bz	0f			! if (len & 2)
817	 btst	4, %o4
818	ba,pt	%icc, 1f
819	 lduha	[%o0] %asi, %o0		!	value = *(short *)addr;
8200:
821	bz	0f			! if (len & 4)
822	 btst	8, %o4
823	ba,pt	%icc, 1f
824	 lda	[%o0] %asi, %o0		!	value = *(int *)addr;
8250:
826	ldxa	[%o0] %asi, %o0		!	value = *(long *)addr;
8271:
828#ifndef _LP64
829	SPLIT(%o0, %o0, %o1)
830#endif
831	membar	#Sync
832#ifndef _LP64
833	wrpr	%g1, 0, %pstate
834#endif
835	brz	%o5, 1f			! if (cache flush addr != 0)
836	 nop
837	DLFLUSH2(%o5)			!	flush cache line again
8381:
839	wr	%g0, ASI_PRIMARY_NOFAULT, %asi		! Restore default ASI
840	STPTR	%g0, [%o2 + PCB_ONFAULT]
841	retl				! made it, clear onfault and return
842	 membar	#StoreStore|#StoreLoad
843
844	/*
845	 * Fault handlers for probeget
846	 */
847_C_LABEL(Lfsprobe):
848#ifndef _LP64
849	wrpr	%g1, 0, %pstate
850#endif
851	STPTR	%g0, [%o2 + PCB_ONFAULT]! error in r/w, clear pcb_onfault
852	mov	-1, %o1
853	wr	%g0, ASI_PRIMARY_NOFAULT, %asi		! Restore default ASI
854	membar	#StoreStore|#StoreLoad
855	retl				! and return error indicator
856	 mov	-1, %o0
857
858	.globl	_C_LABEL(Lfsbail)
859_C_LABEL(Lfsbail):
860	STPTR	%g0, [%o2 + PCB_ONFAULT]! error in r/w, clear pcb_onfault
861	membar	#StoreStore|#StoreLoad
862#ifndef _LP64
863	mov	-1, %o1
864#endif
865	retl				! and return error indicator
866	 mov	-1, %o0
867
868/*
869 * probeset(addr, asi, size, val)
870 *	paddr_t addr;
871 *	int asi;
872 *	int size;
873 *	long val;
874 *
875 * As above, but we return 0 on success.
876 */
877ENTRY(probeset)
878#ifndef _LP64
879	!! Shuffle the args around into LP64 format
880	COMBINE(%o0, %o1, %o0)
881	mov	%o2, %o1
882	mov	%o3, %o2
883	COMBINE(%o4, %o5, %o3)
884#endif
885	mov	%o2, %o4
886	! %o0 = addr, %o1 = asi, %o4 = (1,2,4), %o3 = val
887	sethi	%hi(CPCB), %o2		! Lfsbail requires CPCB in %o2
888	LDPTR	[%o2 + %lo(CPCB)], %o2	! cpcb->pcb_onfault = Lfsbail;
889	set	_C_LABEL(Lfsbail), %o5
890	STPTR	%o5, [%o2 + PCB_ONFAULT]
891	btst	1, %o4
892	wr	%o1, 0, %asi
893	membar	#Sync
894	bz	0f			! if (len & 1)
895	 btst	2, %o4
896	ba,pt	%icc, 1f
897	 stba	%o3, [%o0] %asi		!	*(char *)addr = value;
8980:
899	bz	0f			! if (len & 2)
900	 btst	4, %o4
901	ba,pt	%icc, 1f
902	 stha	%o3, [%o0] %asi		!	*(short *)addr = value;
9030:
904	bz	0f			! if (len & 4)
905	 btst	8, %o4
906	ba,pt	%icc, 1f
907	 sta	%o3, [%o0] %asi		!	*(int *)addr = value;
9080:
909	bz	Lfsbail			! if (len & 8)
910	ba,pt	%icc, 1f
911	 sta	%o3, [%o0] %asi		!	*(int *)addr = value;
9121:	membar	#Sync
913	clr	%o0			! made it, clear onfault and return 0
914	wr	%g0, ASI_PRIMARY_NOFAULT, %asi		! Restore default ASI
915	STPTR	%g0, [%o2 + PCB_ONFAULT]
916	retl
917	 membar	#StoreStore|#StoreLoad
918
919
920/*
921 * kcopy() is exactly like bcopy except that it set pcb_onfault such that
922 * when a fault occurs, it is able to return -1 to indicate this to the
923 * caller.
924 */
925ENTRY(kcopy)
926#ifdef DEBUG
927	set	pmapdebug, %o4
928	ld	[%o4], %o4
929	btst	0x80, %o4	! PDB_COPY
930	bz,pt	%icc, 3f
931	 nop
932	save	%sp, -CC64FSZ, %sp
933	mov	%i0, %o1
934	set	2f, %o0
935	mov	%i1, %o2
936	call	printf
937	 mov	%i2, %o3
938!	ta	1; nop
939	restore
940	.data
9412:	.asciz	"kcopy(%p->%p,%x)\n"
942	_ALIGN
943	.text
9443:
945#endif
946	sethi	%hi(CPCB), %o5		! cpcb->pcb_onfault = Lkcerr;
947	LDPTR	[%o5 + %lo(CPCB)], %o5
948	set	Lkcerr, %o3
949	LDPTR	[%o5 + PCB_ONFAULT], %g1! save current onfault handler
950	membar	#LoadStore
951	STPTR	%o3, [%o5 + PCB_ONFAULT]
952	membar	#StoreStore|#StoreLoad
953
954	cmp	%o2, BCOPY_SMALL
955Lkcopy_start:
956	bge,a	Lkcopy_fancy	! if >= this many, go be fancy.
957	 btst	7, %o0		! (part of being fancy)
958
959	/*
960	 * Not much to copy, just do it a byte at a time.
961	 */
962	deccc	%o2		! while (--len >= 0)
963	bl	1f
964	 .empty
9650:
966	ldsb	[%o0], %o4	!	*dst++ = *src++;
967	inc	%o0
968	stb	%o4, [%o1]
969	deccc	%o2
970	bge	0b
971	 inc	%o1
9721:
973	membar	#Sync		! Make sure all fauls are processed
974	STPTR	%g1, [%o5 + PCB_ONFAULT]! restore fault handler
975	membar	#StoreStore|#StoreLoad
976	retl
977	 clr	%o0
978	NOTREACHED
979
980	/*
981	 * Plenty of data to copy, so try to do it optimally.
982	 */
983Lkcopy_fancy:
984	! check for common case first: everything lines up.
985!	btst	7, %o0		! done already
986	bne	1f
987	 .empty
988	btst	7, %o1
989	be,a	Lkcopy_doubles
990	 dec	8, %o2		! if all lined up, len -= 8, goto kcopy_doubes
991
992	! If the low bits match, we can make these line up.
9931:
994	xor	%o0, %o1, %o3	! t = src ^ dst;
995	btst	1, %o3		! if (t & 1) {
996	be,a	1f
997	 btst	1, %o0		! [delay slot: if (src & 1)]
998
999	! low bits do not match, must copy by bytes.
10000:
1001	ldsb	[%o0], %o4	!	do {
1002	inc	%o0		!		*dst++ = *src++;
1003	stb	%o4, [%o1]
1004	deccc	%o2
1005	bnz	0b		!	} while (--len != 0);
1006	 inc	%o1
1007	membar	#Sync		! Make sure all traps are taken
1008	STPTR	%g1, [%o5 + PCB_ONFAULT]! restore fault handler
1009	membar	#StoreStore|#StoreLoad
1010	retl
1011	 clr	%o0
1012	NOTREACHED
1013
1014	! lowest bit matches, so we can copy by words, if nothing else
10151:
1016	be,a	1f		! if (src & 1) {
1017	 btst	2, %o3		! [delay slot: if (t & 2)]
1018
1019	! although low bits match, both are 1: must copy 1 byte to align
1020	ldsb	[%o0], %o4	!	*dst++ = *src++;
1021	inc	%o0
1022	stb	%o4, [%o1]
1023	dec	%o2		!	len--;
1024	inc	%o1
1025	btst	2, %o3		! } [if (t & 2)]
10261:
1027	be,a	1f		! if (t & 2) {
1028	 btst	2, %o0		! [delay slot: if (src & 2)]
1029	dec	2, %o2		!	len -= 2;
10300:
1031	ldsh	[%o0], %o4	!	do {
1032	inc	2, %o0		!		dst += 2, src += 2;
1033	sth	%o4, [%o1]	!		*(short *)dst = *(short *)src;
1034	deccc	2, %o2		!	} while ((len -= 2) >= 0);
1035	bge	0b
1036	 inc	2, %o1
1037	b	Lkcopy_mopb	!	goto mop_up_byte;
1038	 btst	1, %o2		! } [delay slot: if (len & 1)]
1039	NOTREACHED
1040
1041	! low two bits match, so we can copy by longwords
10421:
1043	be,a	1f		! if (src & 2) {
1044	 btst	4, %o3		! [delay slot: if (t & 4)]
1045
1046	! although low 2 bits match, they are 10: must copy one short to align
1047	ldsh	[%o0], %o4	!	(*short *)dst = *(short *)src;
1048	inc	2, %o0		!	dst += 2;
1049	sth	%o4, [%o1]
1050	dec	2, %o2		!	len -= 2;
1051	inc	2, %o1		!	src += 2;
1052	btst	4, %o3		! } [if (t & 4)]
10531:
1054	be,a	1f		! if (t & 4) {
1055	 btst	4, %o0		! [delay slot: if (src & 4)]
1056	dec	4, %o2		!	len -= 4;
10570:
1058	ld	[%o0], %o4	!	do {
1059	inc	4, %o0		!		dst += 4, src += 4;
1060	st	%o4, [%o1]	!		*(int *)dst = *(int *)src;
1061	deccc	4, %o2		!	} while ((len -= 4) >= 0);
1062	bge	0b
1063	 inc	4, %o1
1064	b	Lkcopy_mopw	!	goto mop_up_word_and_byte;
1065	 btst	2, %o2		! } [delay slot: if (len & 2)]
1066	NOTREACHED
1067
1068	! low three bits match, so we can copy by doublewords
10691:
1070	be	1f		! if (src & 4) {
1071	 dec	8, %o2		! [delay slot: len -= 8]
1072	ld	[%o0], %o4	!	*(int *)dst = *(int *)src;
1073	inc	4, %o0		!	dst += 4, src += 4, len -= 4;
1074	st	%o4, [%o1]
1075	dec	4, %o2		! }
1076	inc	4, %o1
10771:
1078Lkcopy_doubles:
1079	ldx	[%o0], %g5	! do {
1080	inc	8, %o0		!	dst += 8, src += 8;
1081	stx	%g5, [%o1]	!	*(double *)dst = *(double *)src;
1082	deccc	8, %o2		! } while ((len -= 8) >= 0);
1083	bge	Lkcopy_doubles
1084	 inc	8, %o1
1085
1086	! check for a usual case again (save work)
1087	btst	7, %o2		! if ((len & 7) == 0)
1088	be	Lkcopy_done	!	goto kcopy_done;
1089
1090	 btst	4, %o2		! if ((len & 4)) == 0)
1091	be,a	Lkcopy_mopw	!	goto mop_up_word_and_byte;
1092	 btst	2, %o2		! [delay slot: if (len & 2)]
1093	ld	[%o0], %o4	!	*(int *)dst = *(int *)src;
1094	inc	4, %o0		!	dst += 4;
1095	st	%o4, [%o1]
1096	inc	4, %o1		!	src += 4;
1097	btst	2, %o2		! } [if (len & 2)]
1098
10991:
1100	! mop up trailing word (if present) and byte (if present).
1101Lkcopy_mopw:
1102	be	Lkcopy_mopb	! no word, go mop up byte
1103	 btst	1, %o2		! [delay slot: if (len & 1)]
1104	ldsh	[%o0], %o4	! *(short *)dst = *(short *)src;
1105	be	Lkcopy_done	! if ((len & 1) == 0) goto done;
1106	 sth	%o4, [%o1]
1107	ldsb	[%o0 + 2], %o4	! dst[2] = src[2];
1108	stb	%o4, [%o1 + 2]
1109	membar	#Sync		! Make sure all traps are taken
1110	STPTR	%g1, [%o5 + PCB_ONFAULT]! restore fault handler
1111	membar	#StoreStore|#StoreLoad
1112	retl
1113	 clr	%o0
1114	NOTREACHED
1115
1116	! mop up trailing byte (if present).
1117Lkcopy_mopb:
1118	bne,a	1f
1119	 ldsb	[%o0], %o4
1120
1121Lkcopy_done:
1122	membar	#Sync		! Make sure all traps are taken
1123	STPTR	%g1, [%o5 + PCB_ONFAULT]! restore fault handler
1124	membar	#StoreStore|#StoreLoad
1125	retl
1126	 clr	%o0
1127	NOTREACHED
1128
11291:
1130	stb	%o4, [%o1]
1131	membar	#Sync		! Make sure all traps are taken
1132	STPTR	%g1, [%o5 + PCB_ONFAULT]! restore fault handler
1133	membar	#StoreStore|#StoreLoad
1134	retl
1135	 clr	%o0
1136	NOTREACHED
1137
1138Lkcerr:
1139#ifdef DEBUG
1140	set	pmapdebug, %o4
1141	ld	[%o4], %o4
1142	btst	0x80, %o4	! PDB_COPY
1143	bz,pt	%icc, 3f
1144	 nop
1145	save	%sp, -CC64FSZ, %sp
1146	set	2f, %o0
1147	call	printf
1148	 nop
1149!	ta	1; nop
1150	restore
1151	.data
11522:	.asciz	"kcopy error\n"
1153	_ALIGN
1154	.text
11553:
1156#endif
1157	STPTR	%g1, [%o5 + PCB_ONFAULT]! restore fault handler
1158	retl				! and return error indicator
1159	 membar	#StoreStore|#StoreLoad
1160	NOTREACHED
1161