xref: /onnv-gate/usr/src/uts/sun4u/cpu/spitfire_copy.s (revision 0:68f95e015346)
1*0Sstevel@tonic-gate/*
2*0Sstevel@tonic-gate * CDDL HEADER START
3*0Sstevel@tonic-gate *
4*0Sstevel@tonic-gate * The contents of this file are subject to the terms of the
5*0Sstevel@tonic-gate * Common Development and Distribution License, Version 1.0 only
6*0Sstevel@tonic-gate * (the "License").  You may not use this file except in compliance
7*0Sstevel@tonic-gate * with the License.
8*0Sstevel@tonic-gate *
9*0Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10*0Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing.
11*0Sstevel@tonic-gate * See the License for the specific language governing permissions
12*0Sstevel@tonic-gate * and limitations under the License.
13*0Sstevel@tonic-gate *
14*0Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each
15*0Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16*0Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the
17*0Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying
18*0Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner]
19*0Sstevel@tonic-gate *
20*0Sstevel@tonic-gate * CDDL HEADER END
21*0Sstevel@tonic-gate */
22*0Sstevel@tonic-gate/*
23*0Sstevel@tonic-gate * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
24*0Sstevel@tonic-gate * Use is subject to license terms.
25*0Sstevel@tonic-gate */
26*0Sstevel@tonic-gate
27*0Sstevel@tonic-gate#pragma ident	"%Z%%M%	%I%	%E% SMI"
28*0Sstevel@tonic-gate
29*0Sstevel@tonic-gate#include <sys/param.h>
30*0Sstevel@tonic-gate#include <sys/errno.h>
31*0Sstevel@tonic-gate#include <sys/asm_linkage.h>
32*0Sstevel@tonic-gate#include <sys/vtrace.h>
33*0Sstevel@tonic-gate#include <sys/machthread.h>
34*0Sstevel@tonic-gate#include <sys/clock.h>
35*0Sstevel@tonic-gate#include <sys/asi.h>
36*0Sstevel@tonic-gate#include <sys/fsr.h>
37*0Sstevel@tonic-gate#include <sys/privregs.h>
38*0Sstevel@tonic-gate
39*0Sstevel@tonic-gate#if !defined(lint)
40*0Sstevel@tonic-gate#include "assym.h"
41*0Sstevel@tonic-gate#endif	/* lint */
42*0Sstevel@tonic-gate
43*0Sstevel@tonic-gate
44*0Sstevel@tonic-gate/*
45*0Sstevel@tonic-gate * Pseudo-code to aid in understanding the control flow of the
46*0Sstevel@tonic-gate * bcopy routine.
47*0Sstevel@tonic-gate *
48*0Sstevel@tonic-gate * On entry to bcopy:
49*0Sstevel@tonic-gate *
50*0Sstevel@tonic-gate *	%l6 = curthread->t_lofault;
51*0Sstevel@tonic-gate *	used_block_copy = FALSE;			! %l6 |= 1
52*0Sstevel@tonic-gate *	if (%l6 != NULL) {
53*0Sstevel@tonic-gate *		curthread->t_lofault = .copyerr;
54*0Sstevel@tonic-gate *		caller_error_handler = TRUE		! %l6 |= 2
55*0Sstevel@tonic-gate *	}
56*0Sstevel@tonic-gate *
57*0Sstevel@tonic-gate * 	if (length < VIS_COPY)
58*0Sstevel@tonic-gate * 		goto regular_copy;
59*0Sstevel@tonic-gate *
60*0Sstevel@tonic-gate * 	if (!use_vis)
61*0Sstevel@tonic-gate * 		goto_regular_copy;
62*0Sstevel@tonic-gate *
63*0Sstevel@tonic-gate * 	if (curthread->t_lwp == NULL) {
64*0Sstevel@tonic-gate *		! Kernel threads do not have pcb's in which to store
65*0Sstevel@tonic-gate *		! the floating point state, disallow preemption during
66*0Sstevel@tonic-gate *		! the copy.
67*0Sstevel@tonic-gate * 		kpreempt_disable(curthread);
68*0Sstevel@tonic-gate *	}
69*0Sstevel@tonic-gate *
70*0Sstevel@tonic-gate * 	old_fprs = %fprs;
71*0Sstevel@tonic-gate * 	old_gsr = %gsr;
72*0Sstevel@tonic-gate * 	if (%fprs.fef) {
73*0Sstevel@tonic-gate *              ! If we need to save 4 blocks of fpregs then make sure
74*0Sstevel@tonic-gate *		! the length is still appropriate for that extra overhead.
75*0Sstevel@tonic-gate * 		if (length < (large_length + (64 * 4))) {
76*0Sstevel@tonic-gate * 			if (curthread->t_lwp == NULL)
77*0Sstevel@tonic-gate * 				kpreempt_enable(curthread);
78*0Sstevel@tonic-gate * 			goto regular_copy;
79*0Sstevel@tonic-gate * 		}
80*0Sstevel@tonic-gate * 		%fprs.fef = 1;
81*0Sstevel@tonic-gate * 		save current fpregs on stack using blockstore
82*0Sstevel@tonic-gate * 	} else {
83*0Sstevel@tonic-gate * 		%fprs.fef = 1;
84*0Sstevel@tonic-gate * 	}
85*0Sstevel@tonic-gate *
86*0Sstevel@tonic-gate * 	used_block_copy = 1;				! %l6 |= 1
87*0Sstevel@tonic-gate * 	do_blockcopy_here;
88*0Sstevel@tonic-gate *
89*0Sstevel@tonic-gate * In lofault handler:
90*0Sstevel@tonic-gate *	curthread->t_lofault = .copyerr2;
91*0Sstevel@tonic-gate *	Continue on with the normal exit handler
92*0Sstevel@tonic-gate *
93*0Sstevel@tonic-gate * On exit:
94*0Sstevel@tonic-gate *	call_kpreempt = 0;
95*0Sstevel@tonic-gate * 	if (used_block_copy) {				! %l6 & 1
96*0Sstevel@tonic-gate * 		%gsr = old_gsr;
97*0Sstevel@tonic-gate * 		if (old_fprs & FPRS_FEF)
98*0Sstevel@tonic-gate * 			restore fpregs from stack using blockload
99*0Sstevel@tonic-gate *		else
100*0Sstevel@tonic-gate *			zero fpregs
101*0Sstevel@tonic-gate * 		%fprs = old_fprs;
102*0Sstevel@tonic-gate * 		if (curthread->t_lwp == NULL) {
103*0Sstevel@tonic-gate *			kpreempt_enable(curthread);
104*0Sstevel@tonic-gate *			call_kpreempt = 1;
105*0Sstevel@tonic-gate *		}
106*0Sstevel@tonic-gate * 	}
107*0Sstevel@tonic-gate * 	curthread->t_lofault = (%l6 & ~3);
108*0Sstevel@tonic-gate *	if (call_kpreempt)
109*0Sstevel@tonic-gate *		kpreempt(%pil);
110*0Sstevel@tonic-gate * 	return (0)
111*0Sstevel@tonic-gate *
112*0Sstevel@tonic-gate * In second lofault handler (.copyerr2):
113*0Sstevel@tonic-gate *	We've tried to restore fp state from the stack and failed.  To
114*0Sstevel@tonic-gate *	prevent from returning with a corrupted fp state, we will panic.
115*0Sstevel@tonic-gate */
116*0Sstevel@tonic-gate
117*0Sstevel@tonic-gate/*
118*0Sstevel@tonic-gate * Notes on preserving existing fp state:
119*0Sstevel@tonic-gate *
120*0Sstevel@tonic-gate * When a copyOP decides to use fp we may have to preserve existing
121*0Sstevel@tonic-gate * floating point state.  It is not the caller's state that we need to
122*0Sstevel@tonic-gate * preserve - the rest of the kernel does not use fp and, anyway, fp
123*0Sstevel@tonic-gate * registers are volatile across a call.  Some examples:
124*0Sstevel@tonic-gate *
125*0Sstevel@tonic-gate *	- userland has fp state and is interrupted (device interrupt
126*0Sstevel@tonic-gate *	  or trap) and within the interrupt/trap handling we use
127*0Sstevel@tonic-gate *	  bcopy()
128*0Sstevel@tonic-gate *	- another (higher level) interrupt or trap handler uses bcopy
129*0Sstevel@tonic-gate *	  while a bcopy from an earlier interrupt is still active
130*0Sstevel@tonic-gate *	- an asynchronous error trap occurs while fp state exists (in
131*0Sstevel@tonic-gate *	  userland or in kernel copy) and the tl0 component of the handling
132*0Sstevel@tonic-gate *	  uses bcopy
133*0Sstevel@tonic-gate *	- a user process with fp state incurs a copy-on-write fault and
134*0Sstevel@tonic-gate *	  hwblkpagecopy always uses fp
135*0Sstevel@tonic-gate *
136*0Sstevel@tonic-gate * We therefore need a per-call place in which to preserve fp state -
137*0Sstevel@tonic-gate * using our stack is ideal (and since fp copy cannot be leaf optimized
138*0Sstevel@tonic-gate * because of calls it makes, this is no hardship).
139*0Sstevel@tonic-gate *
140*0Sstevel@tonic-gate * To make sure that floating point state is always saved and restored
141*0Sstevel@tonic-gate * correctly, the following "big rules" must be followed when the floating
142*0Sstevel@tonic-gate * point registers will be used:
143*0Sstevel@tonic-gate *
144*0Sstevel@tonic-gate * 1. %l6 always holds the caller's lofault handler.  Also in this register,
145*0Sstevel@tonic-gate *    Bit 1 (FPUSED_FLAG) indicates that the floating point registers are in
146*0Sstevel@tonic-gate *    use.  Bit 2 (BCOPY_FLAG) indicates that the call was to bcopy.
147*0Sstevel@tonic-gate *
148*0Sstevel@tonic-gate * 2. The FPUSED flag indicates that all FP state has been successfully stored
149*0Sstevel@tonic-gate *    on the stack.  It should not be set until this save has been completed.
150*0Sstevel@tonic-gate *
151*0Sstevel@tonic-gate * 3. The FPUSED flag should not be cleared on exit until all FP state has
152*0Sstevel@tonic-gate *    been restored from the stack.  If an error occurs while restoring
153*0Sstevel@tonic-gate *    data from the stack, the error handler can check this flag to see if
154*0Sstevel@tonic-gate *    a restore is necessary.
155*0Sstevel@tonic-gate *
156*0Sstevel@tonic-gate * 4. Code run under the new lofault handler must be kept to a minimum.  In
157*0Sstevel@tonic-gate *    particular, any calls to kpreempt() should not be made until after the
158*0Sstevel@tonic-gate *    lofault handler has been restored.
159*0Sstevel@tonic-gate */
160*0Sstevel@tonic-gate
161*0Sstevel@tonic-gate/*
162*0Sstevel@tonic-gate * This shadows sys/machsystm.h which can't be included due to the lack of
163*0Sstevel@tonic-gate * _ASM guards in include files it references. Change it here, change it there.
164*0Sstevel@tonic-gate */
165*0Sstevel@tonic-gate#define VIS_COPY_THRESHOLD 900
166*0Sstevel@tonic-gate
167*0Sstevel@tonic-gate/*
168*0Sstevel@tonic-gate * Less then or equal this number of bytes we will always copy byte-for-byte
169*0Sstevel@tonic-gate */
170*0Sstevel@tonic-gate#define	SMALL_LIMIT	7
171*0Sstevel@tonic-gate
172*0Sstevel@tonic-gate/*
173*0Sstevel@tonic-gate * Flags set in the lower bits of the t_lofault address:
174*0Sstevel@tonic-gate * FPUSED_FLAG: The FP registers were in use and must be restored
175*0Sstevel@tonic-gate * BCOPY_FLAG: Set for bcopy calls, cleared for kcopy calls
176*0Sstevel@tonic-gate * COPY_FLAGS: Both of the above
177*0Sstevel@tonic-gate *
178*0Sstevel@tonic-gate * Other flags:
179*0Sstevel@tonic-gate * KPREEMPT_FLAG: kpreempt needs to be called
180*0Sstevel@tonic-gate */
181*0Sstevel@tonic-gate#define	FPUSED_FLAG	1
182*0Sstevel@tonic-gate#define BCOPY_FLAG	2
183*0Sstevel@tonic-gate#define	COPY_FLAGS	(FPUSED_FLAG | BCOPY_FLAG)
184*0Sstevel@tonic-gate#define	KPREEMPT_FLAG	4
185*0Sstevel@tonic-gate
186*0Sstevel@tonic-gate/*
187*0Sstevel@tonic-gate * Size of stack frame in order to accomodate a 64-byte aligned
188*0Sstevel@tonic-gate * floating-point register save area and 2 32-bit temp locations.
189*0Sstevel@tonic-gate */
190*0Sstevel@tonic-gate#define	HWCOPYFRAMESIZE	((64 * 5) + (2 * 4))
191*0Sstevel@tonic-gate
192*0Sstevel@tonic-gate#define SAVED_FPREGS_OFFSET	(64 * 5)
193*0Sstevel@tonic-gate#define	SAVED_FPRS_OFFSET	(SAVED_FPREGS_OFFSET + 4)
194*0Sstevel@tonic-gate#define	SAVED_GSR_OFFSET	(SAVED_FPRS_OFFSET + 4)
195*0Sstevel@tonic-gate
196*0Sstevel@tonic-gate/*
197*0Sstevel@tonic-gate * Common macros used by the various versions of the block copy
198*0Sstevel@tonic-gate * routines in this file.
199*0Sstevel@tonic-gate */
200*0Sstevel@tonic-gate
201*0Sstevel@tonic-gate#define	FZERO				\
202*0Sstevel@tonic-gate	fzero	%f0			;\
203*0Sstevel@tonic-gate	fzero	%f2			;\
204*0Sstevel@tonic-gate	faddd	%f0, %f2, %f4		;\
205*0Sstevel@tonic-gate	fmuld	%f0, %f2, %f6		;\
206*0Sstevel@tonic-gate	faddd	%f0, %f2, %f8		;\
207*0Sstevel@tonic-gate	fmuld	%f0, %f2, %f10		;\
208*0Sstevel@tonic-gate	faddd	%f0, %f2, %f12		;\
209*0Sstevel@tonic-gate	fmuld	%f0, %f2, %f14		;\
210*0Sstevel@tonic-gate	faddd	%f0, %f2, %f16		;\
211*0Sstevel@tonic-gate	fmuld	%f0, %f2, %f18		;\
212*0Sstevel@tonic-gate	faddd	%f0, %f2, %f20		;\
213*0Sstevel@tonic-gate	fmuld	%f0, %f2, %f22		;\
214*0Sstevel@tonic-gate	faddd	%f0, %f2, %f24		;\
215*0Sstevel@tonic-gate	fmuld	%f0, %f2, %f26		;\
216*0Sstevel@tonic-gate	faddd	%f0, %f2, %f28		;\
217*0Sstevel@tonic-gate	fmuld	%f0, %f2, %f30		;\
218*0Sstevel@tonic-gate	faddd	%f0, %f2, %f32		;\
219*0Sstevel@tonic-gate	fmuld	%f0, %f2, %f34		;\
220*0Sstevel@tonic-gate	faddd	%f0, %f2, %f36		;\
221*0Sstevel@tonic-gate	fmuld	%f0, %f2, %f38		;\
222*0Sstevel@tonic-gate	faddd	%f0, %f2, %f40		;\
223*0Sstevel@tonic-gate	fmuld	%f0, %f2, %f42		;\
224*0Sstevel@tonic-gate	faddd	%f0, %f2, %f44		;\
225*0Sstevel@tonic-gate	fmuld	%f0, %f2, %f46		;\
226*0Sstevel@tonic-gate	faddd	%f0, %f2, %f48		;\
227*0Sstevel@tonic-gate	fmuld	%f0, %f2, %f50		;\
228*0Sstevel@tonic-gate	faddd	%f0, %f2, %f52		;\
229*0Sstevel@tonic-gate	fmuld	%f0, %f2, %f54		;\
230*0Sstevel@tonic-gate	faddd	%f0, %f2, %f56		;\
231*0Sstevel@tonic-gate	fmuld	%f0, %f2, %f58		;\
232*0Sstevel@tonic-gate	faddd	%f0, %f2, %f60		;\
233*0Sstevel@tonic-gate	fmuld	%f0, %f2, %f62
234*0Sstevel@tonic-gate
235*0Sstevel@tonic-gate
236*0Sstevel@tonic-gate#define	FALIGN_D0			\
237*0Sstevel@tonic-gate	faligndata %d0, %d2, %d48	;\
238*0Sstevel@tonic-gate	faligndata %d2, %d4, %d50	;\
239*0Sstevel@tonic-gate	faligndata %d4, %d6, %d52	;\
240*0Sstevel@tonic-gate	faligndata %d6, %d8, %d54	;\
241*0Sstevel@tonic-gate	faligndata %d8, %d10, %d56	;\
242*0Sstevel@tonic-gate	faligndata %d10, %d12, %d58	;\
243*0Sstevel@tonic-gate	faligndata %d12, %d14, %d60	;\
244*0Sstevel@tonic-gate	faligndata %d14, %d16, %d62
245*0Sstevel@tonic-gate
246*0Sstevel@tonic-gate#define	FALIGN_D16			\
247*0Sstevel@tonic-gate	faligndata %d16, %d18, %d48	;\
248*0Sstevel@tonic-gate	faligndata %d18, %d20, %d50	;\
249*0Sstevel@tonic-gate	faligndata %d20, %d22, %d52	;\
250*0Sstevel@tonic-gate	faligndata %d22, %d24, %d54	;\
251*0Sstevel@tonic-gate	faligndata %d24, %d26, %d56	;\
252*0Sstevel@tonic-gate	faligndata %d26, %d28, %d58	;\
253*0Sstevel@tonic-gate	faligndata %d28, %d30, %d60	;\
254*0Sstevel@tonic-gate	faligndata %d30, %d32, %d62
255*0Sstevel@tonic-gate
256*0Sstevel@tonic-gate#define	FALIGN_D32			\
257*0Sstevel@tonic-gate	faligndata %d32, %d34, %d48	;\
258*0Sstevel@tonic-gate	faligndata %d34, %d36, %d50	;\
259*0Sstevel@tonic-gate	faligndata %d36, %d38, %d52	;\
260*0Sstevel@tonic-gate	faligndata %d38, %d40, %d54	;\
261*0Sstevel@tonic-gate	faligndata %d40, %d42, %d56	;\
262*0Sstevel@tonic-gate	faligndata %d42, %d44, %d58	;\
263*0Sstevel@tonic-gate	faligndata %d44, %d46, %d60	;\
264*0Sstevel@tonic-gate	faligndata %d46, %d0, %d62
265*0Sstevel@tonic-gate
266*0Sstevel@tonic-gate#define	FALIGN_D2			\
267*0Sstevel@tonic-gate	faligndata %d2, %d4, %d48	;\
268*0Sstevel@tonic-gate	faligndata %d4, %d6, %d50	;\
269*0Sstevel@tonic-gate	faligndata %d6, %d8, %d52	;\
270*0Sstevel@tonic-gate	faligndata %d8, %d10, %d54	;\
271*0Sstevel@tonic-gate	faligndata %d10, %d12, %d56	;\
272*0Sstevel@tonic-gate	faligndata %d12, %d14, %d58	;\
273*0Sstevel@tonic-gate	faligndata %d14, %d16, %d60	;\
274*0Sstevel@tonic-gate	faligndata %d16, %d18, %d62
275*0Sstevel@tonic-gate
276*0Sstevel@tonic-gate#define	FALIGN_D18			\
277*0Sstevel@tonic-gate	faligndata %d18, %d20, %d48	;\
278*0Sstevel@tonic-gate	faligndata %d20, %d22, %d50	;\
279*0Sstevel@tonic-gate	faligndata %d22, %d24, %d52	;\
280*0Sstevel@tonic-gate	faligndata %d24, %d26, %d54	;\
281*0Sstevel@tonic-gate	faligndata %d26, %d28, %d56	;\
282*0Sstevel@tonic-gate	faligndata %d28, %d30, %d58	;\
283*0Sstevel@tonic-gate	faligndata %d30, %d32, %d60	;\
284*0Sstevel@tonic-gate	faligndata %d32, %d34, %d62
285*0Sstevel@tonic-gate
286*0Sstevel@tonic-gate#define	FALIGN_D34			\
287*0Sstevel@tonic-gate	faligndata %d34, %d36, %d48	;\
288*0Sstevel@tonic-gate	faligndata %d36, %d38, %d50	;\
289*0Sstevel@tonic-gate	faligndata %d38, %d40, %d52	;\
290*0Sstevel@tonic-gate	faligndata %d40, %d42, %d54	;\
291*0Sstevel@tonic-gate	faligndata %d42, %d44, %d56	;\
292*0Sstevel@tonic-gate	faligndata %d44, %d46, %d58	;\
293*0Sstevel@tonic-gate	faligndata %d46, %d0, %d60	;\
294*0Sstevel@tonic-gate	faligndata %d0, %d2, %d62
295*0Sstevel@tonic-gate
296*0Sstevel@tonic-gate#define	FALIGN_D4			\
297*0Sstevel@tonic-gate	faligndata %d4, %d6, %d48	;\
298*0Sstevel@tonic-gate	faligndata %d6, %d8, %d50	;\
299*0Sstevel@tonic-gate	faligndata %d8, %d10, %d52	;\
300*0Sstevel@tonic-gate	faligndata %d10, %d12, %d54	;\
301*0Sstevel@tonic-gate	faligndata %d12, %d14, %d56	;\
302*0Sstevel@tonic-gate	faligndata %d14, %d16, %d58	;\
303*0Sstevel@tonic-gate	faligndata %d16, %d18, %d60	;\
304*0Sstevel@tonic-gate	faligndata %d18, %d20, %d62
305*0Sstevel@tonic-gate
306*0Sstevel@tonic-gate#define	FALIGN_D20			\
307*0Sstevel@tonic-gate	faligndata %d20, %d22, %d48	;\
308*0Sstevel@tonic-gate	faligndata %d22, %d24, %d50	;\
309*0Sstevel@tonic-gate	faligndata %d24, %d26, %d52	;\
310*0Sstevel@tonic-gate	faligndata %d26, %d28, %d54	;\
311*0Sstevel@tonic-gate	faligndata %d28, %d30, %d56	;\
312*0Sstevel@tonic-gate	faligndata %d30, %d32, %d58	;\
313*0Sstevel@tonic-gate	faligndata %d32, %d34, %d60	;\
314*0Sstevel@tonic-gate	faligndata %d34, %d36, %d62
315*0Sstevel@tonic-gate
316*0Sstevel@tonic-gate#define	FALIGN_D36			\
317*0Sstevel@tonic-gate	faligndata %d36, %d38, %d48	;\
318*0Sstevel@tonic-gate	faligndata %d38, %d40, %d50	;\
319*0Sstevel@tonic-gate	faligndata %d40, %d42, %d52	;\
320*0Sstevel@tonic-gate	faligndata %d42, %d44, %d54	;\
321*0Sstevel@tonic-gate	faligndata %d44, %d46, %d56	;\
322*0Sstevel@tonic-gate	faligndata %d46, %d0, %d58	;\
323*0Sstevel@tonic-gate	faligndata %d0, %d2, %d60	;\
324*0Sstevel@tonic-gate	faligndata %d2, %d4, %d62
325*0Sstevel@tonic-gate
326*0Sstevel@tonic-gate#define	FALIGN_D6			\
327*0Sstevel@tonic-gate	faligndata %d6, %d8, %d48	;\
328*0Sstevel@tonic-gate	faligndata %d8, %d10, %d50	;\
329*0Sstevel@tonic-gate	faligndata %d10, %d12, %d52	;\
330*0Sstevel@tonic-gate	faligndata %d12, %d14, %d54	;\
331*0Sstevel@tonic-gate	faligndata %d14, %d16, %d56	;\
332*0Sstevel@tonic-gate	faligndata %d16, %d18, %d58	;\
333*0Sstevel@tonic-gate	faligndata %d18, %d20, %d60	;\
334*0Sstevel@tonic-gate	faligndata %d20, %d22, %d62
335*0Sstevel@tonic-gate
336*0Sstevel@tonic-gate#define	FALIGN_D22			\
337*0Sstevel@tonic-gate	faligndata %d22, %d24, %d48	;\
338*0Sstevel@tonic-gate	faligndata %d24, %d26, %d50	;\
339*0Sstevel@tonic-gate	faligndata %d26, %d28, %d52	;\
340*0Sstevel@tonic-gate	faligndata %d28, %d30, %d54	;\
341*0Sstevel@tonic-gate	faligndata %d30, %d32, %d56	;\
342*0Sstevel@tonic-gate	faligndata %d32, %d34, %d58	;\
343*0Sstevel@tonic-gate	faligndata %d34, %d36, %d60	;\
344*0Sstevel@tonic-gate	faligndata %d36, %d38, %d62
345*0Sstevel@tonic-gate
346*0Sstevel@tonic-gate#define	FALIGN_D38			\
347*0Sstevel@tonic-gate	faligndata %d38, %d40, %d48	;\
348*0Sstevel@tonic-gate	faligndata %d40, %d42, %d50	;\
349*0Sstevel@tonic-gate	faligndata %d42, %d44, %d52	;\
350*0Sstevel@tonic-gate	faligndata %d44, %d46, %d54	;\
351*0Sstevel@tonic-gate	faligndata %d46, %d0, %d56	;\
352*0Sstevel@tonic-gate	faligndata %d0, %d2, %d58	;\
353*0Sstevel@tonic-gate	faligndata %d2, %d4, %d60	;\
354*0Sstevel@tonic-gate	faligndata %d4, %d6, %d62
355*0Sstevel@tonic-gate
356*0Sstevel@tonic-gate#define	FALIGN_D8			\
357*0Sstevel@tonic-gate	faligndata %d8, %d10, %d48	;\
358*0Sstevel@tonic-gate	faligndata %d10, %d12, %d50	;\
359*0Sstevel@tonic-gate	faligndata %d12, %d14, %d52	;\
360*0Sstevel@tonic-gate	faligndata %d14, %d16, %d54	;\
361*0Sstevel@tonic-gate	faligndata %d16, %d18, %d56	;\
362*0Sstevel@tonic-gate	faligndata %d18, %d20, %d58	;\
363*0Sstevel@tonic-gate	faligndata %d20, %d22, %d60	;\
364*0Sstevel@tonic-gate	faligndata %d22, %d24, %d62
365*0Sstevel@tonic-gate
366*0Sstevel@tonic-gate#define	FALIGN_D24			\
367*0Sstevel@tonic-gate	faligndata %d24, %d26, %d48	;\
368*0Sstevel@tonic-gate	faligndata %d26, %d28, %d50	;\
369*0Sstevel@tonic-gate	faligndata %d28, %d30, %d52	;\
370*0Sstevel@tonic-gate	faligndata %d30, %d32, %d54	;\
371*0Sstevel@tonic-gate	faligndata %d32, %d34, %d56	;\
372*0Sstevel@tonic-gate	faligndata %d34, %d36, %d58	;\
373*0Sstevel@tonic-gate	faligndata %d36, %d38, %d60	;\
374*0Sstevel@tonic-gate	faligndata %d38, %d40, %d62
375*0Sstevel@tonic-gate
376*0Sstevel@tonic-gate#define	FALIGN_D40			\
377*0Sstevel@tonic-gate	faligndata %d40, %d42, %d48	;\
378*0Sstevel@tonic-gate	faligndata %d42, %d44, %d50	;\
379*0Sstevel@tonic-gate	faligndata %d44, %d46, %d52	;\
380*0Sstevel@tonic-gate	faligndata %d46, %d0, %d54	;\
381*0Sstevel@tonic-gate	faligndata %d0, %d2, %d56	;\
382*0Sstevel@tonic-gate	faligndata %d2, %d4, %d58	;\
383*0Sstevel@tonic-gate	faligndata %d4, %d6, %d60	;\
384*0Sstevel@tonic-gate	faligndata %d6, %d8, %d62
385*0Sstevel@tonic-gate
386*0Sstevel@tonic-gate#define	FALIGN_D10			\
387*0Sstevel@tonic-gate	faligndata %d10, %d12, %d48	;\
388*0Sstevel@tonic-gate	faligndata %d12, %d14, %d50	;\
389*0Sstevel@tonic-gate	faligndata %d14, %d16, %d52	;\
390*0Sstevel@tonic-gate	faligndata %d16, %d18, %d54	;\
391*0Sstevel@tonic-gate	faligndata %d18, %d20, %d56	;\
392*0Sstevel@tonic-gate	faligndata %d20, %d22, %d58	;\
393*0Sstevel@tonic-gate	faligndata %d22, %d24, %d60	;\
394*0Sstevel@tonic-gate	faligndata %d24, %d26, %d62
395*0Sstevel@tonic-gate
396*0Sstevel@tonic-gate#define	FALIGN_D26			\
397*0Sstevel@tonic-gate	faligndata %d26, %d28, %d48	;\
398*0Sstevel@tonic-gate	faligndata %d28, %d30, %d50	;\
399*0Sstevel@tonic-gate	faligndata %d30, %d32, %d52	;\
400*0Sstevel@tonic-gate	faligndata %d32, %d34, %d54	;\
401*0Sstevel@tonic-gate	faligndata %d34, %d36, %d56	;\
402*0Sstevel@tonic-gate	faligndata %d36, %d38, %d58	;\
403*0Sstevel@tonic-gate	faligndata %d38, %d40, %d60	;\
404*0Sstevel@tonic-gate	faligndata %d40, %d42, %d62
405*0Sstevel@tonic-gate
406*0Sstevel@tonic-gate#define	FALIGN_D42			\
407*0Sstevel@tonic-gate	faligndata %d42, %d44, %d48	;\
408*0Sstevel@tonic-gate	faligndata %d44, %d46, %d50	;\
409*0Sstevel@tonic-gate	faligndata %d46, %d0, %d52	;\
410*0Sstevel@tonic-gate	faligndata %d0, %d2, %d54	;\
411*0Sstevel@tonic-gate	faligndata %d2, %d4, %d56	;\
412*0Sstevel@tonic-gate	faligndata %d4, %d6, %d58	;\
413*0Sstevel@tonic-gate	faligndata %d6, %d8, %d60	;\
414*0Sstevel@tonic-gate	faligndata %d8, %d10, %d62
415*0Sstevel@tonic-gate
416*0Sstevel@tonic-gate#define	FALIGN_D12			\
417*0Sstevel@tonic-gate	faligndata %d12, %d14, %d48	;\
418*0Sstevel@tonic-gate	faligndata %d14, %d16, %d50	;\
419*0Sstevel@tonic-gate	faligndata %d16, %d18, %d52	;\
420*0Sstevel@tonic-gate	faligndata %d18, %d20, %d54	;\
421*0Sstevel@tonic-gate	faligndata %d20, %d22, %d56	;\
422*0Sstevel@tonic-gate	faligndata %d22, %d24, %d58	;\
423*0Sstevel@tonic-gate	faligndata %d24, %d26, %d60	;\
424*0Sstevel@tonic-gate	faligndata %d26, %d28, %d62
425*0Sstevel@tonic-gate
426*0Sstevel@tonic-gate#define	FALIGN_D28			\
427*0Sstevel@tonic-gate	faligndata %d28, %d30, %d48	;\
428*0Sstevel@tonic-gate	faligndata %d30, %d32, %d50	;\
429*0Sstevel@tonic-gate	faligndata %d32, %d34, %d52	;\
430*0Sstevel@tonic-gate	faligndata %d34, %d36, %d54	;\
431*0Sstevel@tonic-gate	faligndata %d36, %d38, %d56	;\
432*0Sstevel@tonic-gate	faligndata %d38, %d40, %d58	;\
433*0Sstevel@tonic-gate	faligndata %d40, %d42, %d60	;\
434*0Sstevel@tonic-gate	faligndata %d42, %d44, %d62
435*0Sstevel@tonic-gate
436*0Sstevel@tonic-gate#define	FALIGN_D44			\
437*0Sstevel@tonic-gate	faligndata %d44, %d46, %d48	;\
438*0Sstevel@tonic-gate	faligndata %d46, %d0, %d50	;\
439*0Sstevel@tonic-gate	faligndata %d0, %d2, %d52	;\
440*0Sstevel@tonic-gate	faligndata %d2, %d4, %d54	;\
441*0Sstevel@tonic-gate	faligndata %d4, %d6, %d56	;\
442*0Sstevel@tonic-gate	faligndata %d6, %d8, %d58	;\
443*0Sstevel@tonic-gate	faligndata %d8, %d10, %d60	;\
444*0Sstevel@tonic-gate	faligndata %d10, %d12, %d62
445*0Sstevel@tonic-gate
446*0Sstevel@tonic-gate#define	FALIGN_D14			\
447*0Sstevel@tonic-gate	faligndata %d14, %d16, %d48	;\
448*0Sstevel@tonic-gate	faligndata %d16, %d18, %d50	;\
449*0Sstevel@tonic-gate	faligndata %d18, %d20, %d52	;\
450*0Sstevel@tonic-gate	faligndata %d20, %d22, %d54	;\
451*0Sstevel@tonic-gate	faligndata %d22, %d24, %d56	;\
452*0Sstevel@tonic-gate	faligndata %d24, %d26, %d58	;\
453*0Sstevel@tonic-gate	faligndata %d26, %d28, %d60	;\
454*0Sstevel@tonic-gate	faligndata %d28, %d30, %d62
455*0Sstevel@tonic-gate
456*0Sstevel@tonic-gate#define	FALIGN_D30			\
457*0Sstevel@tonic-gate	faligndata %d30, %d32, %d48	;\
458*0Sstevel@tonic-gate	faligndata %d32, %d34, %d50	;\
459*0Sstevel@tonic-gate	faligndata %d34, %d36, %d52	;\
460*0Sstevel@tonic-gate	faligndata %d36, %d38, %d54	;\
461*0Sstevel@tonic-gate	faligndata %d38, %d40, %d56	;\
462*0Sstevel@tonic-gate	faligndata %d40, %d42, %d58	;\
463*0Sstevel@tonic-gate	faligndata %d42, %d44, %d60	;\
464*0Sstevel@tonic-gate	faligndata %d44, %d46, %d62
465*0Sstevel@tonic-gate
466*0Sstevel@tonic-gate#define	FALIGN_D46			\
467*0Sstevel@tonic-gate	faligndata %d46, %d0, %d48	;\
468*0Sstevel@tonic-gate	faligndata %d0, %d2, %d50	;\
469*0Sstevel@tonic-gate	faligndata %d2, %d4, %d52	;\
470*0Sstevel@tonic-gate	faligndata %d4, %d6, %d54	;\
471*0Sstevel@tonic-gate	faligndata %d6, %d8, %d56	;\
472*0Sstevel@tonic-gate	faligndata %d8, %d10, %d58	;\
473*0Sstevel@tonic-gate	faligndata %d10, %d12, %d60	;\
474*0Sstevel@tonic-gate	faligndata %d12, %d14, %d62
475*0Sstevel@tonic-gate
476*0Sstevel@tonic-gate
477*0Sstevel@tonic-gate/*
478*0Sstevel@tonic-gate * Copy a block of storage, returning an error code if `from' or
479*0Sstevel@tonic-gate * `to' takes a kernel pagefault which cannot be resolved.
480*0Sstevel@tonic-gate * Returns errno value on pagefault error, 0 if all ok
481*0Sstevel@tonic-gate */
482*0Sstevel@tonic-gate
483*0Sstevel@tonic-gate
484*0Sstevel@tonic-gate
485*0Sstevel@tonic-gate#if defined(lint)
486*0Sstevel@tonic-gate
487*0Sstevel@tonic-gate/* ARGSUSED */
488*0Sstevel@tonic-gateint
489*0Sstevel@tonic-gatekcopy(const void *from, void *to, size_t count)
490*0Sstevel@tonic-gate{ return(0); }
491*0Sstevel@tonic-gate
492*0Sstevel@tonic-gate#else	/* lint */
493*0Sstevel@tonic-gate
494*0Sstevel@tonic-gate	.seg	".text"
495*0Sstevel@tonic-gate	.align	4
496*0Sstevel@tonic-gate
497*0Sstevel@tonic-gate	ENTRY(kcopy)
498*0Sstevel@tonic-gate
499*0Sstevel@tonic-gate	save	%sp, -SA(MINFRAME + HWCOPYFRAMESIZE), %sp
500*0Sstevel@tonic-gate	set	.copyerr, %l6		! copyerr is lofault value
501*0Sstevel@tonic-gate	ldn	[THREAD_REG + T_LOFAULT], %l7	! save existing handler
502*0Sstevel@tonic-gate	membar	#Sync			! sync error barrier (see copy.s)
503*0Sstevel@tonic-gate	stn	%l6, [THREAD_REG + T_LOFAULT]	! set t_lofault
504*0Sstevel@tonic-gate	!
505*0Sstevel@tonic-gate	! Note that we carefully do *not* flag the setting of
506*0Sstevel@tonic-gate	! t_lofault.
507*0Sstevel@tonic-gate	!
508*0Sstevel@tonic-gate	ba,pt	%ncc, .do_copy		! common code
509*0Sstevel@tonic-gate	  mov	%l7, %l6
510*0Sstevel@tonic-gate
511*0Sstevel@tonic-gate/*
512*0Sstevel@tonic-gate * We got here because of a fault during kcopy or bcopy if a fault
513*0Sstevel@tonic-gate * handler existed when bcopy was called.
514*0Sstevel@tonic-gate * Errno value is in %g1.
515*0Sstevel@tonic-gate */
516*0Sstevel@tonic-gate.copyerr:
517*0Sstevel@tonic-gate	set	.copyerr2, %l1
518*0Sstevel@tonic-gate	membar	#Sync			! sync error barrier
519*0Sstevel@tonic-gate	stn	%l1, [THREAD_REG + T_LOFAULT]	! set t_lofault
520*0Sstevel@tonic-gate	btst	FPUSED_FLAG, %l6
521*0Sstevel@tonic-gate	bz	%icc, 1f
522*0Sstevel@tonic-gate	  and	%l6, BCOPY_FLAG, %l1	! copy flag to %l1
523*0Sstevel@tonic-gate
524*0Sstevel@tonic-gate	membar	#Sync
525*0Sstevel@tonic-gate
526*0Sstevel@tonic-gate	ld	[%fp + STACK_BIAS - SAVED_GSR_OFFSET], %o2	! restore gsr
527*0Sstevel@tonic-gate	wr	%o2, 0, %gsr
528*0Sstevel@tonic-gate
529*0Sstevel@tonic-gate	ld	[%fp + STACK_BIAS - SAVED_FPRS_OFFSET], %o3
530*0Sstevel@tonic-gate	btst	FPRS_FEF, %o3
531*0Sstevel@tonic-gate	bz	%icc, 4f
532*0Sstevel@tonic-gate	  nop
533*0Sstevel@tonic-gate
534*0Sstevel@tonic-gate	! restore fpregs from stack
535*0Sstevel@tonic-gate	membar	#Sync
536*0Sstevel@tonic-gate	add	%fp, STACK_BIAS - 257, %o2
537*0Sstevel@tonic-gate	and	%o2, -64, %o2
538*0Sstevel@tonic-gate	ldda	[%o2]ASI_BLK_P, %d0
539*0Sstevel@tonic-gate	add	%o2, 64, %o2
540*0Sstevel@tonic-gate	ldda	[%o2]ASI_BLK_P, %d16
541*0Sstevel@tonic-gate	add	%o2, 64, %o2
542*0Sstevel@tonic-gate	ldda	[%o2]ASI_BLK_P, %d32
543*0Sstevel@tonic-gate	add	%o2, 64, %o2
544*0Sstevel@tonic-gate	ldda	[%o2]ASI_BLK_P, %d48
545*0Sstevel@tonic-gate	membar	#Sync
546*0Sstevel@tonic-gate
547*0Sstevel@tonic-gate	ba,pt	%ncc, 2f
548*0Sstevel@tonic-gate	  wr	%o3, 0, %fprs		! restore fprs
549*0Sstevel@tonic-gate
550*0Sstevel@tonic-gate4:
551*0Sstevel@tonic-gate	FZERO				! zero all of the fpregs
552*0Sstevel@tonic-gate	wr	%o3, 0, %fprs		! restore fprs
553*0Sstevel@tonic-gate
554*0Sstevel@tonic-gate2:	ldn	[THREAD_REG + T_LWP], %o2
555*0Sstevel@tonic-gate	tst	%o2
556*0Sstevel@tonic-gate	bnz,pt	%ncc, 1f
557*0Sstevel@tonic-gate	  nop
558*0Sstevel@tonic-gate
559*0Sstevel@tonic-gate	ldsb	[THREAD_REG + T_PREEMPT], %l0
560*0Sstevel@tonic-gate	deccc	%l0
561*0Sstevel@tonic-gate	bnz,pn	%ncc, 1f
562*0Sstevel@tonic-gate	  stb	%l0, [THREAD_REG + T_PREEMPT]
563*0Sstevel@tonic-gate
564*0Sstevel@tonic-gate	! Check for a kernel preemption request
565*0Sstevel@tonic-gate	ldn	[THREAD_REG + T_CPU], %l0
566*0Sstevel@tonic-gate	ldub	[%l0 + CPU_KPRUNRUN], %l0
567*0Sstevel@tonic-gate	tst	%l0
568*0Sstevel@tonic-gate	bnz,a,pt	%ncc, 1f	! Need to call kpreempt?
569*0Sstevel@tonic-gate	  or	%l1, KPREEMPT_FLAG, %l1	! If so, set the flag
570*0Sstevel@tonic-gate
571*0Sstevel@tonic-gate	!
572*0Sstevel@tonic-gate	! Need to cater for the different expectations of kcopy
573*0Sstevel@tonic-gate	! and bcopy. kcopy will *always* set a t_lofault handler
574*0Sstevel@tonic-gate	! If it fires, we're expected to just return the error code
575*0Sstevel@tonic-gate	! and *not* to invoke any existing error handler. As far as
576*0Sstevel@tonic-gate	! bcopy is concerned, we only set t_lofault if there was an
577*0Sstevel@tonic-gate	! existing lofault handler. In that case we're expected to
578*0Sstevel@tonic-gate	! invoke the previously existing handler after restting the
579*0Sstevel@tonic-gate	! t_lofault value.
580*0Sstevel@tonic-gate	!
581*0Sstevel@tonic-gate1:
582*0Sstevel@tonic-gate	andn	%l6, COPY_FLAGS, %l6	! remove flags from lofault address
583*0Sstevel@tonic-gate	membar	#Sync			! sync error barrier
584*0Sstevel@tonic-gate	stn	%l6, [THREAD_REG + T_LOFAULT]	! restore old t_lofault
585*0Sstevel@tonic-gate
586*0Sstevel@tonic-gate	! call kpreempt if necessary
587*0Sstevel@tonic-gate	btst	KPREEMPT_FLAG, %l1
588*0Sstevel@tonic-gate	bz,pt	%icc, 2f
589*0Sstevel@tonic-gate	  nop
590*0Sstevel@tonic-gate	call	kpreempt
591*0Sstevel@tonic-gate	  rdpr	%pil, %o0	! pass %pil
592*0Sstevel@tonic-gate2:
593*0Sstevel@tonic-gate	btst	BCOPY_FLAG, %l1
594*0Sstevel@tonic-gate	bnz,pn	%ncc, 3f
595*0Sstevel@tonic-gate	  nop
596*0Sstevel@tonic-gate	ret
597*0Sstevel@tonic-gate	restore	%g1, 0, %o0
598*0Sstevel@tonic-gate
599*0Sstevel@tonic-gate3:
600*0Sstevel@tonic-gate	!
601*0Sstevel@tonic-gate	! We're here via bcopy. There *must* have been an error handler
602*0Sstevel@tonic-gate	! in place otheerwise we would have died a nasty death already.
603*0Sstevel@tonic-gate	!
604*0Sstevel@tonic-gate	jmp	%l6				! goto real handler
605*0Sstevel@tonic-gate	restore	%g0, 0, %o0			! dispose of copy window
606*0Sstevel@tonic-gate
607*0Sstevel@tonic-gate/*
608*0Sstevel@tonic-gate * We got here because of a fault in .copyerr.  We can't safely restore fp
609*0Sstevel@tonic-gate * state, so we panic.
610*0Sstevel@tonic-gate */
611*0Sstevel@tonic-gatefp_panic_msg:
612*0Sstevel@tonic-gate	.asciz	"Unable to restore fp state after copy operation"
613*0Sstevel@tonic-gate
614*0Sstevel@tonic-gate	.align	4
615*0Sstevel@tonic-gate.copyerr2:
616*0Sstevel@tonic-gate	set	fp_panic_msg, %o0
617*0Sstevel@tonic-gate	call	panic
618*0Sstevel@tonic-gate	  nop
619*0Sstevel@tonic-gate	SET_SIZE(kcopy)
620*0Sstevel@tonic-gate#endif	/* lint */
621*0Sstevel@tonic-gate
622*0Sstevel@tonic-gate
623*0Sstevel@tonic-gate/*
624*0Sstevel@tonic-gate * Copy a block of storage - must not overlap (from + len <= to).
625*0Sstevel@tonic-gate * Registers: l6 - saved t_lofault
626*0Sstevel@tonic-gate *
627*0Sstevel@tonic-gate * Copy a page of memory.
628*0Sstevel@tonic-gate * Assumes double word alignment and a count >= 256.
629*0Sstevel@tonic-gate */
630*0Sstevel@tonic-gate#if defined(lint)
631*0Sstevel@tonic-gate
632*0Sstevel@tonic-gate/* ARGSUSED */
633*0Sstevel@tonic-gatevoid
634*0Sstevel@tonic-gatebcopy(const void *from, void *to, size_t count)
635*0Sstevel@tonic-gate{}
636*0Sstevel@tonic-gate
637*0Sstevel@tonic-gate#else	/* lint */
638*0Sstevel@tonic-gate
639*0Sstevel@tonic-gate	ENTRY(bcopy)
640*0Sstevel@tonic-gate
641*0Sstevel@tonic-gate	save	%sp, -SA(MINFRAME + HWCOPYFRAMESIZE), %sp
642*0Sstevel@tonic-gate	ldn	[THREAD_REG + T_LOFAULT], %l6	! save t_lofault
643*0Sstevel@tonic-gate	tst	%l6
644*0Sstevel@tonic-gate        !
645*0Sstevel@tonic-gate        ! We've already captured whether t_lofault was zero on entry.
646*0Sstevel@tonic-gate        ! We need to mark ourselves as being from bcopy since both
647*0Sstevel@tonic-gate        ! kcopy and bcopy use the same code path. If BCOPY_FLAG is set
648*0Sstevel@tonic-gate        ! and the saved lofault was zero, we won't reset lofault on
649*0Sstevel@tonic-gate        ! returning.
650*0Sstevel@tonic-gate        !
651*0Sstevel@tonic-gate	or	%l6, BCOPY_FLAG, %l6
652*0Sstevel@tonic-gate	bz,pt	%ncc, .do_copy
653*0Sstevel@tonic-gate	sethi	%hi(.copyerr), %o2
654*0Sstevel@tonic-gate	or	%o2, %lo(.copyerr), %o2
655*0Sstevel@tonic-gate	membar	#Sync			! sync error barrier
656*0Sstevel@tonic-gate	stn	%o2, [THREAD_REG + T_LOFAULT]	! install new vector
657*0Sstevel@tonic-gate
658*0Sstevel@tonic-gate.do_copy:
659*0Sstevel@tonic-gate	cmp	%i2, 12			! for small counts
660*0Sstevel@tonic-gate	blu	%ncc, .bytecp		! just copy bytes
661*0Sstevel@tonic-gate	  .empty
662*0Sstevel@tonic-gate
663*0Sstevel@tonic-gate	cmp	%i2, VIS_COPY_THRESHOLD	! for large counts
664*0Sstevel@tonic-gate	blu,pt	%ncc, .bcb_punt
665*0Sstevel@tonic-gate	  .empty
666*0Sstevel@tonic-gate
667*0Sstevel@tonic-gate	!
668*0Sstevel@tonic-gate	! Check to see if VIS acceleration is enabled
669*0Sstevel@tonic-gate	!
670*0Sstevel@tonic-gate	sethi	%hi(use_hw_bcopy), %o2
671*0Sstevel@tonic-gate	ld	[%o2 + %lo(use_hw_bcopy)], %o2
672*0Sstevel@tonic-gate	tst	%o2
673*0Sstevel@tonic-gate	bz,pn	%icc, .bcb_punt
674*0Sstevel@tonic-gate	  nop
675*0Sstevel@tonic-gate
676*0Sstevel@tonic-gate	subcc	%i1, %i0, %i3
677*0Sstevel@tonic-gate	bneg,a,pn %ncc, 1f
678*0Sstevel@tonic-gate	neg	%i3
679*0Sstevel@tonic-gate1:
680*0Sstevel@tonic-gate	/*
681*0Sstevel@tonic-gate	 * Compare against 256 since we should be checking block addresses
682*0Sstevel@tonic-gate	 * and (dest & ~63) - (src & ~63) can be 3 blocks even if
683*0Sstevel@tonic-gate	 * src = dest + (64 * 3) + 63.
684*0Sstevel@tonic-gate	 */
685*0Sstevel@tonic-gate	cmp	%i3, 256
686*0Sstevel@tonic-gate	blu,pn	%ncc, .bcb_punt
687*0Sstevel@tonic-gate	  nop
688*0Sstevel@tonic-gate
689*0Sstevel@tonic-gate	ldn	[THREAD_REG + T_LWP], %o3
690*0Sstevel@tonic-gate	tst	%o3
691*0Sstevel@tonic-gate	bnz,pt	%ncc, 1f
692*0Sstevel@tonic-gate	  nop
693*0Sstevel@tonic-gate
694*0Sstevel@tonic-gate	! kpreempt_disable();
695*0Sstevel@tonic-gate	ldsb	[THREAD_REG + T_PREEMPT], %o2
696*0Sstevel@tonic-gate	inc	%o2
697*0Sstevel@tonic-gate	stb	%o2, [THREAD_REG + T_PREEMPT]
698*0Sstevel@tonic-gate
699*0Sstevel@tonic-gate1:
700*0Sstevel@tonic-gate	rd	%fprs, %o2		! check for unused fp
701*0Sstevel@tonic-gate	st	%o2, [%fp + STACK_BIAS - SAVED_FPRS_OFFSET] ! save orig %fprs
702*0Sstevel@tonic-gate	btst	FPRS_FEF, %o2
703*0Sstevel@tonic-gate	bz,a	%icc, .do_blockcopy
704*0Sstevel@tonic-gate	  wr	%g0, FPRS_FEF, %fprs
705*0Sstevel@tonic-gate
706*0Sstevel@tonic-gate.bcb_fpregs_inuse:
707*0Sstevel@tonic-gate	cmp	%i2, VIS_COPY_THRESHOLD+(64*4) ! for large counts (larger
708*0Sstevel@tonic-gate	bgeu	%ncc, 1f		!  if we have to save the fpregs)
709*0Sstevel@tonic-gate	  nop
710*0Sstevel@tonic-gate
711*0Sstevel@tonic-gate	tst	%o3
712*0Sstevel@tonic-gate	bnz,pt	%ncc, .bcb_punt
713*0Sstevel@tonic-gate	  nop
714*0Sstevel@tonic-gate
715*0Sstevel@tonic-gate	ldsb	[THREAD_REG + T_PREEMPT], %l0
716*0Sstevel@tonic-gate	deccc	%l0
717*0Sstevel@tonic-gate	bnz,pn	%icc, .bcb_punt
718*0Sstevel@tonic-gate	  stb	%l0, [THREAD_REG + T_PREEMPT]
719*0Sstevel@tonic-gate
720*0Sstevel@tonic-gate	! Check for a kernel preemption request
721*0Sstevel@tonic-gate	ldn	[THREAD_REG + T_CPU], %l0
722*0Sstevel@tonic-gate	ldub	[%l0 + CPU_KPRUNRUN], %l0
723*0Sstevel@tonic-gate	tst	%l0
724*0Sstevel@tonic-gate	bz,pt	%icc, .bcb_punt
725*0Sstevel@tonic-gate	  nop
726*0Sstevel@tonic-gate
727*0Sstevel@tonic-gate	! Attempt to preempt
728*0Sstevel@tonic-gate	call	kpreempt
729*0Sstevel@tonic-gate	  rdpr	  %pil, %o0		  ! pass %pil
730*0Sstevel@tonic-gate
731*0Sstevel@tonic-gate	ba,pt	%ncc, .bcb_punt
732*0Sstevel@tonic-gate	  nop
733*0Sstevel@tonic-gate
734*0Sstevel@tonic-gate1:
735*0Sstevel@tonic-gate	wr	%g0, FPRS_FEF, %fprs
736*0Sstevel@tonic-gate
737*0Sstevel@tonic-gate	! save in-use fpregs on stack
738*0Sstevel@tonic-gate	membar	#Sync
739*0Sstevel@tonic-gate	add	%fp, STACK_BIAS - 257, %o2
740*0Sstevel@tonic-gate	and	%o2, -64, %o2
741*0Sstevel@tonic-gate	stda	%d0, [%o2]ASI_BLK_P
742*0Sstevel@tonic-gate	add	%o2, 64, %o2
743*0Sstevel@tonic-gate	stda	%d16, [%o2]ASI_BLK_P
744*0Sstevel@tonic-gate	add	%o2, 64, %o2
745*0Sstevel@tonic-gate	stda	%d32, [%o2]ASI_BLK_P
746*0Sstevel@tonic-gate	add	%o2, 64, %o2
747*0Sstevel@tonic-gate	stda	%d48, [%o2]ASI_BLK_P
748*0Sstevel@tonic-gate	membar	#Sync
749*0Sstevel@tonic-gate
750*0Sstevel@tonic-gate.do_blockcopy:
751*0Sstevel@tonic-gate	membar	#StoreStore|#StoreLoad|#LoadStore
752*0Sstevel@tonic-gate
753*0Sstevel@tonic-gate	rd	%gsr, %o2
754*0Sstevel@tonic-gate	st	%o2, [%fp + STACK_BIAS - SAVED_GSR_OFFSET]	! save gsr
755*0Sstevel@tonic-gate
756*0Sstevel@tonic-gate	! Set the lower bit in the saved t_lofault to indicate
757*0Sstevel@tonic-gate	! that we need to clear the %fprs register on the way
758*0Sstevel@tonic-gate	! out
759*0Sstevel@tonic-gate	or	%l6, FPUSED_FLAG, %l6
760*0Sstevel@tonic-gate
761*0Sstevel@tonic-gate	! Swap src/dst since the code below is memcpy code
762*0Sstevel@tonic-gate	! and memcpy/bcopy have different calling sequences
763*0Sstevel@tonic-gate	mov	%i1, %i5
764*0Sstevel@tonic-gate	mov	%i0, %i1
765*0Sstevel@tonic-gate	mov	%i5, %i0
766*0Sstevel@tonic-gate
767*0Sstevel@tonic-gate!!! This code is nearly identical to the version in the sun4u
768*0Sstevel@tonic-gate!!! libc_psr.  Most bugfixes made to that file should be
769*0Sstevel@tonic-gate!!! merged into this routine.
770*0Sstevel@tonic-gate
771*0Sstevel@tonic-gate	andcc	%i0, 7, %o3
772*0Sstevel@tonic-gate	bz,pt	%ncc, blkcpy
773*0Sstevel@tonic-gate	sub	%o3, 8, %o3
774*0Sstevel@tonic-gate	neg	%o3
775*0Sstevel@tonic-gate	sub	%i2, %o3, %i2
776*0Sstevel@tonic-gate
777*0Sstevel@tonic-gate	! Align Destination on double-word boundary
778*0Sstevel@tonic-gate
779*0Sstevel@tonic-gate2:	ldub	[%i1], %o4
780*0Sstevel@tonic-gate	inc	%i1
781*0Sstevel@tonic-gate	inc	%i0
782*0Sstevel@tonic-gate	deccc	%o3
783*0Sstevel@tonic-gate	bgu	%ncc, 2b
784*0Sstevel@tonic-gate	stb	%o4, [%i0 - 1]
785*0Sstevel@tonic-gateblkcpy:
786*0Sstevel@tonic-gate	andcc	%i0, 63, %i3
787*0Sstevel@tonic-gate	bz,pn	%ncc, blalign		! now block aligned
788*0Sstevel@tonic-gate	sub	%i3, 64, %i3
789*0Sstevel@tonic-gate	neg	%i3			! bytes till block aligned
790*0Sstevel@tonic-gate	sub	%i2, %i3, %i2		! update %i2 with new count
791*0Sstevel@tonic-gate
792*0Sstevel@tonic-gate	! Copy %i3 bytes till dst is block (64 byte) aligned. use
793*0Sstevel@tonic-gate	! double word copies.
794*0Sstevel@tonic-gate
795*0Sstevel@tonic-gate	alignaddr %i1, %g0, %g1
796*0Sstevel@tonic-gate	ldd	[%g1], %d0
797*0Sstevel@tonic-gate	add	%g1, 8, %g1
798*0Sstevel@tonic-gate6:
799*0Sstevel@tonic-gate	ldd	[%g1], %d2
800*0Sstevel@tonic-gate	add	%g1, 8, %g1
801*0Sstevel@tonic-gate	subcc	%i3, 8, %i3
802*0Sstevel@tonic-gate	faligndata %d0, %d2, %d8
803*0Sstevel@tonic-gate	std	%d8, [%i0]
804*0Sstevel@tonic-gate	add	%i1, 8, %i1
805*0Sstevel@tonic-gate	bz,pn	%ncc, blalign
806*0Sstevel@tonic-gate	add	%i0, 8, %i0
807*0Sstevel@tonic-gate	ldd	[%g1], %d0
808*0Sstevel@tonic-gate	add	%g1, 8, %g1
809*0Sstevel@tonic-gate	subcc	%i3, 8, %i3
810*0Sstevel@tonic-gate	faligndata %d2, %d0, %d8
811*0Sstevel@tonic-gate	std	%d8, [%i0]
812*0Sstevel@tonic-gate	add	%i1, 8, %i1
813*0Sstevel@tonic-gate	bgu,pn	%ncc, 6b
814*0Sstevel@tonic-gate	add	%i0, 8, %i0
815*0Sstevel@tonic-gate
816*0Sstevel@tonic-gateblalign:
817*0Sstevel@tonic-gate	membar	#StoreLoad
818*0Sstevel@tonic-gate	! %i2 = total length
819*0Sstevel@tonic-gate	! %i3 = blocks	(length - 64) / 64
820*0Sstevel@tonic-gate	! %i4 = doubles remaining  (length - blocks)
821*0Sstevel@tonic-gate	sub	%i2, 64, %i3
822*0Sstevel@tonic-gate	andn	%i3, 63, %i3
823*0Sstevel@tonic-gate	sub	%i2, %i3, %i4
824*0Sstevel@tonic-gate	andn	%i4, 7, %i4
825*0Sstevel@tonic-gate	sub	%i4, 16, %i4
826*0Sstevel@tonic-gate	sub	%i2, %i4, %i2
827*0Sstevel@tonic-gate	sub	%i2, %i3, %i2
828*0Sstevel@tonic-gate
829*0Sstevel@tonic-gate	andn	%i1, 0x3f, %l7		! blk aligned address
830*0Sstevel@tonic-gate	alignaddr %i1, %g0, %g0		! gen %gsr
831*0Sstevel@tonic-gate
832*0Sstevel@tonic-gate	srl	%i1, 3, %l5		! bits 3,4,5 are now least sig in  %l5
833*0Sstevel@tonic-gate	andcc	%l5, 7, %i5		! mask everything except bits 1,2 3
834*0Sstevel@tonic-gate	add	%i1, %i4, %i1
835*0Sstevel@tonic-gate	add	%i1, %i3, %i1
836*0Sstevel@tonic-gate
837*0Sstevel@tonic-gate	ldda	[%l7]ASI_BLK_P, %d0
838*0Sstevel@tonic-gate	add	%l7, 64, %l7
839*0Sstevel@tonic-gate	ldda	[%l7]ASI_BLK_P, %d16
840*0Sstevel@tonic-gate	add	%l7, 64, %l7
841*0Sstevel@tonic-gate	ldda	[%l7]ASI_BLK_P, %d32
842*0Sstevel@tonic-gate	add	%l7, 64, %l7
843*0Sstevel@tonic-gate	sub	%i3, 128, %i3
844*0Sstevel@tonic-gate
845*0Sstevel@tonic-gate	! switch statement to get us to the right 8 byte blk within a
846*0Sstevel@tonic-gate	! 64 byte block
847*0Sstevel@tonic-gate	cmp	 %i5, 4
848*0Sstevel@tonic-gate	bgeu,a	 hlf
849*0Sstevel@tonic-gate	cmp	 %i5, 6
850*0Sstevel@tonic-gate	cmp	 %i5, 2
851*0Sstevel@tonic-gate	bgeu,a	 sqtr
852*0Sstevel@tonic-gate	nop
853*0Sstevel@tonic-gate	cmp	 %i5, 1
854*0Sstevel@tonic-gate	be,a	 seg1
855*0Sstevel@tonic-gate	nop
856*0Sstevel@tonic-gate	ba,pt	 %ncc, seg0
857*0Sstevel@tonic-gate	nop
858*0Sstevel@tonic-gatesqtr:
859*0Sstevel@tonic-gate	be,a	 seg2
860*0Sstevel@tonic-gate	nop
861*0Sstevel@tonic-gate	ba,pt	 %ncc, seg3
862*0Sstevel@tonic-gate	nop
863*0Sstevel@tonic-gate
864*0Sstevel@tonic-gatehlf:
865*0Sstevel@tonic-gate	bgeu,a	 fqtr
866*0Sstevel@tonic-gate	nop
867*0Sstevel@tonic-gate	cmp	 %i5, 5
868*0Sstevel@tonic-gate	be,a	 seg5
869*0Sstevel@tonic-gate	nop
870*0Sstevel@tonic-gate	ba,pt	 %ncc, seg4
871*0Sstevel@tonic-gate	nop
872*0Sstevel@tonic-gatefqtr:
873*0Sstevel@tonic-gate	be,a	 seg6
874*0Sstevel@tonic-gate	nop
875*0Sstevel@tonic-gate	ba,pt	 %ncc, seg7
876*0Sstevel@tonic-gate	nop
877*0Sstevel@tonic-gate
878*0Sstevel@tonic-gate
879*0Sstevel@tonic-gateseg0:
880*0Sstevel@tonic-gate	! 1st chunk - %d0 low, %d16 high, %d32 pre, %d48 dst
881*0Sstevel@tonic-gate	FALIGN_D0
882*0Sstevel@tonic-gate	ldda	[%l7]ASI_BLK_P, %d0
883*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_P
884*0Sstevel@tonic-gate	add	%l7, 64, %l7
885*0Sstevel@tonic-gate	subcc	%i3, 64, %i3
886*0Sstevel@tonic-gate	bz,pn	%ncc, 0f
887*0Sstevel@tonic-gate	add	%i0, 64, %i0
888*0Sstevel@tonic-gate	! 2nd chunk -  %d0 pre, %d16 low, %d32 high, %d48 dst
889*0Sstevel@tonic-gate	FALIGN_D16
890*0Sstevel@tonic-gate	ldda	[%l7]ASI_BLK_P, %d16
891*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_P
892*0Sstevel@tonic-gate	add	%l7, 64, %l7
893*0Sstevel@tonic-gate	subcc	%i3, 64, %i3
894*0Sstevel@tonic-gate	bz,pn	%ncc, 1f
895*0Sstevel@tonic-gate	add	%i0, 64, %i0
896*0Sstevel@tonic-gate	! 3rd chunk -  %d0 high, %d16 pre, %d32 low, %d48 dst
897*0Sstevel@tonic-gate	FALIGN_D32
898*0Sstevel@tonic-gate	ldda	[%l7]ASI_BLK_P, %d32
899*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_P
900*0Sstevel@tonic-gate	add	%l7, 64, %l7
901*0Sstevel@tonic-gate	subcc	%i3, 64, %i3
902*0Sstevel@tonic-gate	bz,pn	%ncc, 2f
903*0Sstevel@tonic-gate	add	%i0, 64, %i0
904*0Sstevel@tonic-gate	ba,a,pt	%ncc, seg0
905*0Sstevel@tonic-gate
906*0Sstevel@tonic-gate0:
907*0Sstevel@tonic-gate	FALIGN_D16
908*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_P
909*0Sstevel@tonic-gate	add	%i0, 64, %i0
910*0Sstevel@tonic-gate	membar	#Sync
911*0Sstevel@tonic-gate	FALIGN_D32
912*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_P
913*0Sstevel@tonic-gate	ba,pt	%ncc, blkd0
914*0Sstevel@tonic-gate	add	%i0, 64, %i0
915*0Sstevel@tonic-gate
916*0Sstevel@tonic-gate1:
917*0Sstevel@tonic-gate	FALIGN_D32
918*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_P
919*0Sstevel@tonic-gate	add	%i0, 64, %i0
920*0Sstevel@tonic-gate	membar	#Sync
921*0Sstevel@tonic-gate	FALIGN_D0
922*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_P
923*0Sstevel@tonic-gate	ba,pt	%ncc, blkd16
924*0Sstevel@tonic-gate	add	%i0, 64, %i0
925*0Sstevel@tonic-gate
926*0Sstevel@tonic-gate2:
927*0Sstevel@tonic-gate	FALIGN_D0
928*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_P
929*0Sstevel@tonic-gate	add	%i0, 64, %i0
930*0Sstevel@tonic-gate	membar	#Sync
931*0Sstevel@tonic-gate	FALIGN_D16
932*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_P
933*0Sstevel@tonic-gate	ba,pt	%ncc, blkd32
934*0Sstevel@tonic-gate	add	%i0, 64, %i0
935*0Sstevel@tonic-gate
936*0Sstevel@tonic-gateseg1:
937*0Sstevel@tonic-gate	! 1st chunk - %d0 low, %d16 high, %d32 pre, %d48 dst
938*0Sstevel@tonic-gate	FALIGN_D2
939*0Sstevel@tonic-gate	ldda	[%l7]ASI_BLK_P, %d0
940*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_P
941*0Sstevel@tonic-gate	add	%l7, 64, %l7
942*0Sstevel@tonic-gate	subcc	%i3, 64, %i3
943*0Sstevel@tonic-gate	bz,pn	%ncc, 0f
944*0Sstevel@tonic-gate	add	%i0, 64, %i0
945*0Sstevel@tonic-gate	! 2nd chunk -  %d0 pre, %d16 low, %d32 high, %d48 dst
946*0Sstevel@tonic-gate	FALIGN_D18
947*0Sstevel@tonic-gate	ldda	[%l7]ASI_BLK_P, %d16
948*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_P
949*0Sstevel@tonic-gate	add	%l7, 64, %l7
950*0Sstevel@tonic-gate	subcc	%i3, 64, %i3
951*0Sstevel@tonic-gate	bz,pn	%ncc, 1f
952*0Sstevel@tonic-gate	add	%i0, 64, %i0
953*0Sstevel@tonic-gate	! 3rd chunk -  %d0 high, %d16 pre, %d32 low, %d48 dst
954*0Sstevel@tonic-gate	FALIGN_D34
955*0Sstevel@tonic-gate	ldda	[%l7]ASI_BLK_P, %d32
956*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_P
957*0Sstevel@tonic-gate	add	%l7, 64, %l7
958*0Sstevel@tonic-gate	subcc	%i3, 64, %i3
959*0Sstevel@tonic-gate	bz,pn	%ncc, 2f
960*0Sstevel@tonic-gate	add	%i0, 64, %i0
961*0Sstevel@tonic-gate	ba,a,pt	%ncc, seg1
962*0Sstevel@tonic-gate0:
963*0Sstevel@tonic-gate	FALIGN_D18
964*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_P
965*0Sstevel@tonic-gate	add	%i0, 64, %i0
966*0Sstevel@tonic-gate	membar	#Sync
967*0Sstevel@tonic-gate	FALIGN_D34
968*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_P
969*0Sstevel@tonic-gate	ba,pt	%ncc, blkd2
970*0Sstevel@tonic-gate	add	%i0, 64, %i0
971*0Sstevel@tonic-gate
972*0Sstevel@tonic-gate1:
973*0Sstevel@tonic-gate	FALIGN_D34
974*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_P
975*0Sstevel@tonic-gate	add	%i0, 64, %i0
976*0Sstevel@tonic-gate	membar	#Sync
977*0Sstevel@tonic-gate	FALIGN_D2
978*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_P
979*0Sstevel@tonic-gate	ba,pt	%ncc, blkd18
980*0Sstevel@tonic-gate	add	%i0, 64, %i0
981*0Sstevel@tonic-gate
982*0Sstevel@tonic-gate2:
983*0Sstevel@tonic-gate	FALIGN_D2
984*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_P
985*0Sstevel@tonic-gate	add	%i0, 64, %i0
986*0Sstevel@tonic-gate	membar	#Sync
987*0Sstevel@tonic-gate	FALIGN_D18
988*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_P
989*0Sstevel@tonic-gate	ba,pt	%ncc, blkd34
990*0Sstevel@tonic-gate	add	%i0, 64, %i0
991*0Sstevel@tonic-gate
992*0Sstevel@tonic-gateseg2:
993*0Sstevel@tonic-gate	! 1st chunk - %d0 low, %d16 high, %d32 pre, %d48 dst
994*0Sstevel@tonic-gate	FALIGN_D4
995*0Sstevel@tonic-gate	ldda	[%l7]ASI_BLK_P, %d0
996*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_P
997*0Sstevel@tonic-gate	add	%l7, 64, %l7
998*0Sstevel@tonic-gate	subcc	%i3, 64, %i3
999*0Sstevel@tonic-gate	bz,pn	%ncc, 0f
1000*0Sstevel@tonic-gate	add	%i0, 64, %i0
1001*0Sstevel@tonic-gate	! 2nd chunk -  %d0 pre, %d16 low, %d32 high, %d48 dst
1002*0Sstevel@tonic-gate	FALIGN_D20
1003*0Sstevel@tonic-gate	ldda	[%l7]ASI_BLK_P, %d16
1004*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_P
1005*0Sstevel@tonic-gate	add	%l7, 64, %l7
1006*0Sstevel@tonic-gate	subcc	%i3, 64, %i3
1007*0Sstevel@tonic-gate	bz,pn	%ncc, 1f
1008*0Sstevel@tonic-gate	add	%i0, 64, %i0
1009*0Sstevel@tonic-gate	! 3rd chunk -  %d0 high, %d16 pre, %d32 low, %d48 dst
1010*0Sstevel@tonic-gate	FALIGN_D36
1011*0Sstevel@tonic-gate	ldda	[%l7]ASI_BLK_P, %d32
1012*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_P
1013*0Sstevel@tonic-gate	add	%l7, 64, %l7
1014*0Sstevel@tonic-gate	subcc	%i3, 64, %i3
1015*0Sstevel@tonic-gate	bz,pn	%ncc, 2f
1016*0Sstevel@tonic-gate	add	%i0, 64, %i0
1017*0Sstevel@tonic-gate	ba,a,pt	%ncc, seg2
1018*0Sstevel@tonic-gate
1019*0Sstevel@tonic-gate0:
1020*0Sstevel@tonic-gate	FALIGN_D20
1021*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_P
1022*0Sstevel@tonic-gate	add	%i0, 64, %i0
1023*0Sstevel@tonic-gate	membar	#Sync
1024*0Sstevel@tonic-gate	FALIGN_D36
1025*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_P
1026*0Sstevel@tonic-gate	ba,pt	%ncc, blkd4
1027*0Sstevel@tonic-gate	add	%i0, 64, %i0
1028*0Sstevel@tonic-gate
1029*0Sstevel@tonic-gate1:
1030*0Sstevel@tonic-gate	FALIGN_D36
1031*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_P
1032*0Sstevel@tonic-gate	add	%i0, 64, %i0
1033*0Sstevel@tonic-gate	membar	#Sync
1034*0Sstevel@tonic-gate	FALIGN_D4
1035*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_P
1036*0Sstevel@tonic-gate	ba,pt	%ncc, blkd20
1037*0Sstevel@tonic-gate	add	%i0, 64, %i0
1038*0Sstevel@tonic-gate
1039*0Sstevel@tonic-gate2:
1040*0Sstevel@tonic-gate	FALIGN_D4
1041*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_P
1042*0Sstevel@tonic-gate	add	%i0, 64, %i0
1043*0Sstevel@tonic-gate	membar	#Sync
1044*0Sstevel@tonic-gate	FALIGN_D20
1045*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_P
1046*0Sstevel@tonic-gate	ba,pt	%ncc, blkd36
1047*0Sstevel@tonic-gate	add	%i0, 64, %i0
1048*0Sstevel@tonic-gate
1049*0Sstevel@tonic-gateseg3:
1050*0Sstevel@tonic-gate	! 1st chunk - %d0 low, %d16 high, %d32 pre, %d48 dst
1051*0Sstevel@tonic-gate	FALIGN_D6
1052*0Sstevel@tonic-gate	ldda	[%l7]ASI_BLK_P, %d0
1053*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_P
1054*0Sstevel@tonic-gate	add	%l7, 64, %l7
1055*0Sstevel@tonic-gate	subcc	%i3, 64, %i3
1056*0Sstevel@tonic-gate	bz,pn	%ncc, 0f
1057*0Sstevel@tonic-gate	add	%i0, 64, %i0
1058*0Sstevel@tonic-gate	! 2nd chunk -  %d0 pre, %d16 low, %d32 high, %d48 dst
1059*0Sstevel@tonic-gate	FALIGN_D22
1060*0Sstevel@tonic-gate	ldda	[%l7]ASI_BLK_P, %d16
1061*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_P
1062*0Sstevel@tonic-gate	add	%l7, 64, %l7
1063*0Sstevel@tonic-gate	subcc	%i3, 64, %i3
1064*0Sstevel@tonic-gate	bz,pn	%ncc, 1f
1065*0Sstevel@tonic-gate	add	%i0, 64, %i0
1066*0Sstevel@tonic-gate	! 3rd chunk -  %d0 high, %d16 pre, %d32 low, %d48 dst
1067*0Sstevel@tonic-gate	FALIGN_D38
1068*0Sstevel@tonic-gate	ldda	[%l7]ASI_BLK_P, %d32
1069*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_P
1070*0Sstevel@tonic-gate	add	%l7, 64, %l7
1071*0Sstevel@tonic-gate	subcc	%i3, 64, %i3
1072*0Sstevel@tonic-gate	bz,pn	%ncc, 2f
1073*0Sstevel@tonic-gate	add	%i0, 64, %i0
1074*0Sstevel@tonic-gate	ba,a,pt	%ncc, seg3
1075*0Sstevel@tonic-gate
1076*0Sstevel@tonic-gate0:
1077*0Sstevel@tonic-gate	FALIGN_D22
1078*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_P
1079*0Sstevel@tonic-gate	add	%i0, 64, %i0
1080*0Sstevel@tonic-gate	membar	#Sync
1081*0Sstevel@tonic-gate	FALIGN_D38
1082*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_P
1083*0Sstevel@tonic-gate	ba,pt	%ncc, blkd6
1084*0Sstevel@tonic-gate	add	%i0, 64, %i0
1085*0Sstevel@tonic-gate
1086*0Sstevel@tonic-gate1:
1087*0Sstevel@tonic-gate	FALIGN_D38
1088*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_P
1089*0Sstevel@tonic-gate	add	%i0, 64, %i0
1090*0Sstevel@tonic-gate	membar	#Sync
1091*0Sstevel@tonic-gate	FALIGN_D6
1092*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_P
1093*0Sstevel@tonic-gate	ba,pt	%ncc, blkd22
1094*0Sstevel@tonic-gate	add	%i0, 64, %i0
1095*0Sstevel@tonic-gate
1096*0Sstevel@tonic-gate2:
1097*0Sstevel@tonic-gate	FALIGN_D6
1098*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_P
1099*0Sstevel@tonic-gate	add	%i0, 64, %i0
1100*0Sstevel@tonic-gate	membar	#Sync
1101*0Sstevel@tonic-gate	FALIGN_D22
1102*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_P
1103*0Sstevel@tonic-gate	ba,pt	%ncc, blkd38
1104*0Sstevel@tonic-gate	add	%i0, 64, %i0
1105*0Sstevel@tonic-gate
1106*0Sstevel@tonic-gateseg4:
1107*0Sstevel@tonic-gate	! 1st chunk - %d0 low, %d16 high, %d32 pre, %d48 dst
1108*0Sstevel@tonic-gate	FALIGN_D8
1109*0Sstevel@tonic-gate	ldda	[%l7]ASI_BLK_P, %d0
1110*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_P
1111*0Sstevel@tonic-gate	add	%l7, 64, %l7
1112*0Sstevel@tonic-gate	subcc	%i3, 64, %i3
1113*0Sstevel@tonic-gate	bz,pn	%ncc, 0f
1114*0Sstevel@tonic-gate	add	%i0, 64, %i0
1115*0Sstevel@tonic-gate	! 2nd chunk -  %d0 pre, %d16 low, %d32 high, %d48 dst
1116*0Sstevel@tonic-gate	FALIGN_D24
1117*0Sstevel@tonic-gate	ldda	[%l7]ASI_BLK_P, %d16
1118*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_P
1119*0Sstevel@tonic-gate	add	%l7, 64, %l7
1120*0Sstevel@tonic-gate	subcc	%i3, 64, %i3
1121*0Sstevel@tonic-gate	bz,pn	%ncc, 1f
1122*0Sstevel@tonic-gate	add	%i0, 64, %i0
1123*0Sstevel@tonic-gate	! 3rd chunk -  %d0 high, %d16 pre, %d32 low, %d48 dst
1124*0Sstevel@tonic-gate	FALIGN_D40
1125*0Sstevel@tonic-gate	ldda	[%l7]ASI_BLK_P, %d32
1126*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_P
1127*0Sstevel@tonic-gate	add	%l7, 64, %l7
1128*0Sstevel@tonic-gate	subcc	%i3, 64, %i3
1129*0Sstevel@tonic-gate	bz,pn	%ncc, 2f
1130*0Sstevel@tonic-gate	add	%i0, 64, %i0
1131*0Sstevel@tonic-gate	ba,a,pt	%ncc, seg4
1132*0Sstevel@tonic-gate
1133*0Sstevel@tonic-gate0:
1134*0Sstevel@tonic-gate	FALIGN_D24
1135*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_P
1136*0Sstevel@tonic-gate	add	%i0, 64, %i0
1137*0Sstevel@tonic-gate	membar	#Sync
1138*0Sstevel@tonic-gate	FALIGN_D40
1139*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_P
1140*0Sstevel@tonic-gate	ba,pt	%ncc, blkd8
1141*0Sstevel@tonic-gate	add	%i0, 64, %i0
1142*0Sstevel@tonic-gate
1143*0Sstevel@tonic-gate1:
1144*0Sstevel@tonic-gate	FALIGN_D40
1145*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_P
1146*0Sstevel@tonic-gate	add	%i0, 64, %i0
1147*0Sstevel@tonic-gate	membar	#Sync
1148*0Sstevel@tonic-gate	FALIGN_D8
1149*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_P
1150*0Sstevel@tonic-gate	ba,pt	%ncc, blkd24
1151*0Sstevel@tonic-gate	add	%i0, 64, %i0
1152*0Sstevel@tonic-gate
1153*0Sstevel@tonic-gate2:
1154*0Sstevel@tonic-gate	FALIGN_D8
1155*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_P
1156*0Sstevel@tonic-gate	add	%i0, 64, %i0
1157*0Sstevel@tonic-gate	membar	#Sync
1158*0Sstevel@tonic-gate	FALIGN_D24
1159*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_P
1160*0Sstevel@tonic-gate	ba,pt	%ncc, blkd40
1161*0Sstevel@tonic-gate	add	%i0, 64, %i0
1162*0Sstevel@tonic-gate
1163*0Sstevel@tonic-gateseg5:
1164*0Sstevel@tonic-gate	! 1st chunk - %d0 low, %d16 high, %d32 pre, %d48 dst
1165*0Sstevel@tonic-gate	FALIGN_D10
1166*0Sstevel@tonic-gate	ldda	[%l7]ASI_BLK_P, %d0
1167*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_P
1168*0Sstevel@tonic-gate	add	%l7, 64, %l7
1169*0Sstevel@tonic-gate	subcc	%i3, 64, %i3
1170*0Sstevel@tonic-gate	bz,pn	%ncc, 0f
1171*0Sstevel@tonic-gate	add	%i0, 64, %i0
1172*0Sstevel@tonic-gate	! 2nd chunk -  %d0 pre, %d16 low, %d32 high, %d48 dst
1173*0Sstevel@tonic-gate	FALIGN_D26
1174*0Sstevel@tonic-gate	ldda	[%l7]ASI_BLK_P, %d16
1175*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_P
1176*0Sstevel@tonic-gate	add	%l7, 64, %l7
1177*0Sstevel@tonic-gate	subcc	%i3, 64, %i3
1178*0Sstevel@tonic-gate	bz,pn	%ncc, 1f
1179*0Sstevel@tonic-gate	add	%i0, 64, %i0
1180*0Sstevel@tonic-gate	! 3rd chunk -  %d0 high, %d16 pre, %d32 low, %d48 dst
1181*0Sstevel@tonic-gate	FALIGN_D42
1182*0Sstevel@tonic-gate	ldda	[%l7]ASI_BLK_P, %d32
1183*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_P
1184*0Sstevel@tonic-gate	add	%l7, 64, %l7
1185*0Sstevel@tonic-gate	subcc	%i3, 64, %i3
1186*0Sstevel@tonic-gate	bz,pn	%ncc, 2f
1187*0Sstevel@tonic-gate	add	%i0, 64, %i0
1188*0Sstevel@tonic-gate	ba,a,pt	%ncc, seg5
1189*0Sstevel@tonic-gate
1190*0Sstevel@tonic-gate0:
1191*0Sstevel@tonic-gate	FALIGN_D26
1192*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_P
1193*0Sstevel@tonic-gate	add	%i0, 64, %i0
1194*0Sstevel@tonic-gate	membar	#Sync
1195*0Sstevel@tonic-gate	FALIGN_D42
1196*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_P
1197*0Sstevel@tonic-gate	ba,pt	%ncc, blkd10
1198*0Sstevel@tonic-gate	add	%i0, 64, %i0
1199*0Sstevel@tonic-gate
1200*0Sstevel@tonic-gate1:
1201*0Sstevel@tonic-gate	FALIGN_D42
1202*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_P
1203*0Sstevel@tonic-gate	add	%i0, 64, %i0
1204*0Sstevel@tonic-gate	membar	#Sync
1205*0Sstevel@tonic-gate	FALIGN_D10
1206*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_P
1207*0Sstevel@tonic-gate	ba,pt	%ncc, blkd26
1208*0Sstevel@tonic-gate	add	%i0, 64, %i0
1209*0Sstevel@tonic-gate
1210*0Sstevel@tonic-gate2:
1211*0Sstevel@tonic-gate	FALIGN_D10
1212*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_P
1213*0Sstevel@tonic-gate	add	%i0, 64, %i0
1214*0Sstevel@tonic-gate	membar	#Sync
1215*0Sstevel@tonic-gate	FALIGN_D26
1216*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_P
1217*0Sstevel@tonic-gate	ba,pt	%ncc, blkd42
1218*0Sstevel@tonic-gate	add	%i0, 64, %i0
1219*0Sstevel@tonic-gate
1220*0Sstevel@tonic-gateseg6:
1221*0Sstevel@tonic-gate	! 1st chunk - %d0 low, %d16 high, %d32 pre, %d48 dst
1222*0Sstevel@tonic-gate	FALIGN_D12
1223*0Sstevel@tonic-gate	ldda	[%l7]ASI_BLK_P, %d0
1224*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_P
1225*0Sstevel@tonic-gate	add	%l7, 64, %l7
1226*0Sstevel@tonic-gate	subcc	%i3, 64, %i3
1227*0Sstevel@tonic-gate	bz,pn	%ncc, 0f
1228*0Sstevel@tonic-gate	add	%i0, 64, %i0
1229*0Sstevel@tonic-gate	! 2nd chunk -  %d0 pre, %d16 low, %d32 high, %d48 dst
1230*0Sstevel@tonic-gate	FALIGN_D28
1231*0Sstevel@tonic-gate	ldda	[%l7]ASI_BLK_P, %d16
1232*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_P
1233*0Sstevel@tonic-gate	add	%l7, 64, %l7
1234*0Sstevel@tonic-gate	subcc	%i3, 64, %i3
1235*0Sstevel@tonic-gate	bz,pn	%ncc, 1f
1236*0Sstevel@tonic-gate	add	%i0, 64, %i0
1237*0Sstevel@tonic-gate	! 3rd chunk -  %d0 high, %d16 pre, %d32 low, %d48 dst
1238*0Sstevel@tonic-gate	FALIGN_D44
1239*0Sstevel@tonic-gate	ldda	[%l7]ASI_BLK_P, %d32
1240*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_P
1241*0Sstevel@tonic-gate	add	%l7, 64, %l7
1242*0Sstevel@tonic-gate	subcc	%i3, 64, %i3
1243*0Sstevel@tonic-gate	bz,pn	%ncc, 2f
1244*0Sstevel@tonic-gate	add	%i0, 64, %i0
1245*0Sstevel@tonic-gate	ba,a,pt	%ncc, seg6
1246*0Sstevel@tonic-gate
1247*0Sstevel@tonic-gate0:
1248*0Sstevel@tonic-gate	FALIGN_D28
1249*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_P
1250*0Sstevel@tonic-gate	add	%i0, 64, %i0
1251*0Sstevel@tonic-gate	membar	#Sync
1252*0Sstevel@tonic-gate	FALIGN_D44
1253*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_P
1254*0Sstevel@tonic-gate	ba,pt	%ncc, blkd12
1255*0Sstevel@tonic-gate	add	%i0, 64, %i0
1256*0Sstevel@tonic-gate
1257*0Sstevel@tonic-gate1:
1258*0Sstevel@tonic-gate	FALIGN_D44
1259*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_P
1260*0Sstevel@tonic-gate	add	%i0, 64, %i0
1261*0Sstevel@tonic-gate	membar	#Sync
1262*0Sstevel@tonic-gate	FALIGN_D12
1263*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_P
1264*0Sstevel@tonic-gate	ba,pt	%ncc, blkd28
1265*0Sstevel@tonic-gate	add	%i0, 64, %i0
1266*0Sstevel@tonic-gate
1267*0Sstevel@tonic-gate2:
1268*0Sstevel@tonic-gate	FALIGN_D12
1269*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_P
1270*0Sstevel@tonic-gate	add	%i0, 64, %i0
1271*0Sstevel@tonic-gate	membar	#Sync
1272*0Sstevel@tonic-gate	FALIGN_D28
1273*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_P
1274*0Sstevel@tonic-gate	ba,pt	%ncc, blkd44
1275*0Sstevel@tonic-gate	add	%i0, 64, %i0
1276*0Sstevel@tonic-gate
1277*0Sstevel@tonic-gateseg7:
1278*0Sstevel@tonic-gate	! 1st chunk - %d0 low, %d16 high, %d32 pre, %d48 dst
1279*0Sstevel@tonic-gate	FALIGN_D14
1280*0Sstevel@tonic-gate	ldda	[%l7]ASI_BLK_P, %d0
1281*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_P
1282*0Sstevel@tonic-gate	add	%l7, 64, %l7
1283*0Sstevel@tonic-gate	subcc	%i3, 64, %i3
1284*0Sstevel@tonic-gate	bz,pn	%ncc, 0f
1285*0Sstevel@tonic-gate	add	%i0, 64, %i0
1286*0Sstevel@tonic-gate	! 2nd chunk -  %d0 pre, %d16 low, %d32 high, %d48 dst
1287*0Sstevel@tonic-gate	FALIGN_D30
1288*0Sstevel@tonic-gate	ldda	[%l7]ASI_BLK_P, %d16
1289*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_P
1290*0Sstevel@tonic-gate	add	%l7, 64, %l7
1291*0Sstevel@tonic-gate	subcc	%i3, 64, %i3
1292*0Sstevel@tonic-gate	bz,pn	%ncc, 1f
1293*0Sstevel@tonic-gate	add	%i0, 64, %i0
1294*0Sstevel@tonic-gate	! 3rd chunk -  %d0 high, %d16 pre, %d32 low, %d48 dst
1295*0Sstevel@tonic-gate	FALIGN_D46
1296*0Sstevel@tonic-gate	ldda	[%l7]ASI_BLK_P, %d32
1297*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_P
1298*0Sstevel@tonic-gate	add	%l7, 64, %l7
1299*0Sstevel@tonic-gate	subcc	%i3, 64, %i3
1300*0Sstevel@tonic-gate	bz,pn	%ncc, 2f
1301*0Sstevel@tonic-gate	add	%i0, 64, %i0
1302*0Sstevel@tonic-gate	ba,a,pt	%ncc, seg7
1303*0Sstevel@tonic-gate
1304*0Sstevel@tonic-gate0:
1305*0Sstevel@tonic-gate	FALIGN_D30
1306*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_P
1307*0Sstevel@tonic-gate	add	%i0, 64, %i0
1308*0Sstevel@tonic-gate	membar	#Sync
1309*0Sstevel@tonic-gate	FALIGN_D46
1310*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_P
1311*0Sstevel@tonic-gate	ba,pt	%ncc, blkd14
1312*0Sstevel@tonic-gate	add	%i0, 64, %i0
1313*0Sstevel@tonic-gate
1314*0Sstevel@tonic-gate1:
1315*0Sstevel@tonic-gate	FALIGN_D46
1316*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_P
1317*0Sstevel@tonic-gate	add	%i0, 64, %i0
1318*0Sstevel@tonic-gate	membar	#Sync
1319*0Sstevel@tonic-gate	FALIGN_D14
1320*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_P
1321*0Sstevel@tonic-gate	ba,pt	%ncc, blkd30
1322*0Sstevel@tonic-gate	add	%i0, 64, %i0
1323*0Sstevel@tonic-gate
1324*0Sstevel@tonic-gate2:
1325*0Sstevel@tonic-gate	FALIGN_D14
1326*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_P
1327*0Sstevel@tonic-gate	add	%i0, 64, %i0
1328*0Sstevel@tonic-gate	membar	#Sync
1329*0Sstevel@tonic-gate	FALIGN_D30
1330*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_P
1331*0Sstevel@tonic-gate	ba,pt	%ncc, blkd46
1332*0Sstevel@tonic-gate	add	%i0, 64, %i0
1333*0Sstevel@tonic-gate
1334*0Sstevel@tonic-gate
1335*0Sstevel@tonic-gate	!
1336*0Sstevel@tonic-gate	! dribble out the last partial block
1337*0Sstevel@tonic-gate	!
1338*0Sstevel@tonic-gateblkd0:
1339*0Sstevel@tonic-gate	subcc	%i4, 8, %i4
1340*0Sstevel@tonic-gate	blu,pn	%ncc, blkdone
1341*0Sstevel@tonic-gate	faligndata %d0, %d2, %d48
1342*0Sstevel@tonic-gate	std	%d48, [%i0]
1343*0Sstevel@tonic-gate	add	%i0, 8, %i0
1344*0Sstevel@tonic-gateblkd2:
1345*0Sstevel@tonic-gate	subcc	%i4, 8, %i4
1346*0Sstevel@tonic-gate	blu,pn	%ncc, blkdone
1347*0Sstevel@tonic-gate	faligndata %d2, %d4, %d48
1348*0Sstevel@tonic-gate	std	%d48, [%i0]
1349*0Sstevel@tonic-gate	add	%i0, 8, %i0
1350*0Sstevel@tonic-gateblkd4:
1351*0Sstevel@tonic-gate	subcc	%i4, 8, %i4
1352*0Sstevel@tonic-gate	blu,pn	%ncc, blkdone
1353*0Sstevel@tonic-gate	faligndata %d4, %d6, %d48
1354*0Sstevel@tonic-gate	std	%d48, [%i0]
1355*0Sstevel@tonic-gate	add	%i0, 8, %i0
1356*0Sstevel@tonic-gateblkd6:
1357*0Sstevel@tonic-gate	subcc	%i4, 8, %i4
1358*0Sstevel@tonic-gate	blu,pn	%ncc, blkdone
1359*0Sstevel@tonic-gate	faligndata %d6, %d8, %d48
1360*0Sstevel@tonic-gate	std	%d48, [%i0]
1361*0Sstevel@tonic-gate	add	%i0, 8, %i0
1362*0Sstevel@tonic-gateblkd8:
1363*0Sstevel@tonic-gate	subcc	%i4, 8, %i4
1364*0Sstevel@tonic-gate	blu,pn	%ncc, blkdone
1365*0Sstevel@tonic-gate	faligndata %d8, %d10, %d48
1366*0Sstevel@tonic-gate	std	%d48, [%i0]
1367*0Sstevel@tonic-gate	add	%i0, 8, %i0
1368*0Sstevel@tonic-gateblkd10:
1369*0Sstevel@tonic-gate	subcc	%i4, 8, %i4
1370*0Sstevel@tonic-gate	blu,pn	%ncc, blkdone
1371*0Sstevel@tonic-gate	faligndata %d10, %d12, %d48
1372*0Sstevel@tonic-gate	std	%d48, [%i0]
1373*0Sstevel@tonic-gate	add	%i0, 8, %i0
1374*0Sstevel@tonic-gateblkd12:
1375*0Sstevel@tonic-gate	subcc	%i4, 8, %i4
1376*0Sstevel@tonic-gate	blu,pn	%ncc, blkdone
1377*0Sstevel@tonic-gate	faligndata %d12, %d14, %d48
1378*0Sstevel@tonic-gate	std	%d48, [%i0]
1379*0Sstevel@tonic-gate	add	%i0, 8, %i0
1380*0Sstevel@tonic-gateblkd14:
1381*0Sstevel@tonic-gate	subcc	%i4, 8, %i4
1382*0Sstevel@tonic-gate	blu,pn	%ncc, blkdone
1383*0Sstevel@tonic-gate	fsrc1	%d14, %d0
1384*0Sstevel@tonic-gate	ba,a,pt	%ncc, blkleft
1385*0Sstevel@tonic-gate
1386*0Sstevel@tonic-gateblkd16:
1387*0Sstevel@tonic-gate	subcc	%i4, 8, %i4
1388*0Sstevel@tonic-gate	blu,pn	%ncc, blkdone
1389*0Sstevel@tonic-gate	faligndata %d16, %d18, %d48
1390*0Sstevel@tonic-gate	std	%d48, [%i0]
1391*0Sstevel@tonic-gate	add	%i0, 8, %i0
1392*0Sstevel@tonic-gateblkd18:
1393*0Sstevel@tonic-gate	subcc	%i4, 8, %i4
1394*0Sstevel@tonic-gate	blu,pn	%ncc, blkdone
1395*0Sstevel@tonic-gate	faligndata %d18, %d20, %d48
1396*0Sstevel@tonic-gate	std	%d48, [%i0]
1397*0Sstevel@tonic-gate	add	%i0, 8, %i0
1398*0Sstevel@tonic-gateblkd20:
1399*0Sstevel@tonic-gate	subcc	%i4, 8, %i4
1400*0Sstevel@tonic-gate	blu,pn	%ncc, blkdone
1401*0Sstevel@tonic-gate	faligndata %d20, %d22, %d48
1402*0Sstevel@tonic-gate	std	%d48, [%i0]
1403*0Sstevel@tonic-gate	add	%i0, 8, %i0
1404*0Sstevel@tonic-gateblkd22:
1405*0Sstevel@tonic-gate	subcc	%i4, 8, %i4
1406*0Sstevel@tonic-gate	blu,pn	%ncc, blkdone
1407*0Sstevel@tonic-gate	faligndata %d22, %d24, %d48
1408*0Sstevel@tonic-gate	std	%d48, [%i0]
1409*0Sstevel@tonic-gate	add	%i0, 8, %i0
1410*0Sstevel@tonic-gateblkd24:
1411*0Sstevel@tonic-gate	subcc	%i4, 8, %i4
1412*0Sstevel@tonic-gate	blu,pn	%ncc, blkdone
1413*0Sstevel@tonic-gate	faligndata %d24, %d26, %d48
1414*0Sstevel@tonic-gate	std	%d48, [%i0]
1415*0Sstevel@tonic-gate	add	%i0, 8, %i0
1416*0Sstevel@tonic-gateblkd26:
1417*0Sstevel@tonic-gate	subcc	%i4, 8, %i4
1418*0Sstevel@tonic-gate	blu,pn	%ncc, blkdone
1419*0Sstevel@tonic-gate	faligndata %d26, %d28, %d48
1420*0Sstevel@tonic-gate	std	%d48, [%i0]
1421*0Sstevel@tonic-gate	add	%i0, 8, %i0
1422*0Sstevel@tonic-gateblkd28:
1423*0Sstevel@tonic-gate	subcc	%i4, 8, %i4
1424*0Sstevel@tonic-gate	blu,pn	%ncc, blkdone
1425*0Sstevel@tonic-gate	faligndata %d28, %d30, %d48
1426*0Sstevel@tonic-gate	std	%d48, [%i0]
1427*0Sstevel@tonic-gate	add	%i0, 8, %i0
1428*0Sstevel@tonic-gateblkd30:
1429*0Sstevel@tonic-gate	subcc	%i4, 8, %i4
1430*0Sstevel@tonic-gate	blu,pn	%ncc, blkdone
1431*0Sstevel@tonic-gate	fsrc1	%d30, %d0
1432*0Sstevel@tonic-gate	ba,a,pt	%ncc, blkleft
1433*0Sstevel@tonic-gateblkd32:
1434*0Sstevel@tonic-gate	subcc	%i4, 8, %i4
1435*0Sstevel@tonic-gate	blu,pn	%ncc, blkdone
1436*0Sstevel@tonic-gate	faligndata %d32, %d34, %d48
1437*0Sstevel@tonic-gate	std	%d48, [%i0]
1438*0Sstevel@tonic-gate	add	%i0, 8, %i0
1439*0Sstevel@tonic-gateblkd34:
1440*0Sstevel@tonic-gate	subcc	%i4, 8, %i4
1441*0Sstevel@tonic-gate	blu,pn	%ncc, blkdone
1442*0Sstevel@tonic-gate	faligndata %d34, %d36, %d48
1443*0Sstevel@tonic-gate	std	%d48, [%i0]
1444*0Sstevel@tonic-gate	add	%i0, 8, %i0
1445*0Sstevel@tonic-gateblkd36:
1446*0Sstevel@tonic-gate	subcc	%i4, 8, %i4
1447*0Sstevel@tonic-gate	blu,pn	%ncc, blkdone
1448*0Sstevel@tonic-gate	faligndata %d36, %d38, %d48
1449*0Sstevel@tonic-gate	std	%d48, [%i0]
1450*0Sstevel@tonic-gate	add	%i0, 8, %i0
1451*0Sstevel@tonic-gateblkd38:
1452*0Sstevel@tonic-gate	subcc	%i4, 8, %i4
1453*0Sstevel@tonic-gate	blu,pn	%ncc, blkdone
1454*0Sstevel@tonic-gate	faligndata %d38, %d40, %d48
1455*0Sstevel@tonic-gate	std	%d48, [%i0]
1456*0Sstevel@tonic-gate	add	%i0, 8, %i0
1457*0Sstevel@tonic-gateblkd40:
1458*0Sstevel@tonic-gate	subcc	%i4, 8, %i4
1459*0Sstevel@tonic-gate	blu,pn	%ncc, blkdone
1460*0Sstevel@tonic-gate	faligndata %d40, %d42, %d48
1461*0Sstevel@tonic-gate	std	%d48, [%i0]
1462*0Sstevel@tonic-gate	add	%i0, 8, %i0
1463*0Sstevel@tonic-gateblkd42:
1464*0Sstevel@tonic-gate	subcc	%i4, 8, %i4
1465*0Sstevel@tonic-gate	blu,pn	%ncc, blkdone
1466*0Sstevel@tonic-gate	faligndata %d42, %d44, %d48
1467*0Sstevel@tonic-gate	std	%d48, [%i0]
1468*0Sstevel@tonic-gate	add	%i0, 8, %i0
1469*0Sstevel@tonic-gateblkd44:
1470*0Sstevel@tonic-gate	subcc	%i4, 8, %i4
1471*0Sstevel@tonic-gate	blu,pn	%ncc, blkdone
1472*0Sstevel@tonic-gate	faligndata %d44, %d46, %d48
1473*0Sstevel@tonic-gate	std	%d48, [%i0]
1474*0Sstevel@tonic-gate	add	%i0, 8, %i0
1475*0Sstevel@tonic-gateblkd46:
1476*0Sstevel@tonic-gate	subcc	%i4, 8, %i4
1477*0Sstevel@tonic-gate	blu,pn	%ncc, blkdone
1478*0Sstevel@tonic-gate	fsrc1	%d46, %d0
1479*0Sstevel@tonic-gate
1480*0Sstevel@tonic-gateblkleft:
1481*0Sstevel@tonic-gate1:
1482*0Sstevel@tonic-gate	ldd	[%l7], %d2
1483*0Sstevel@tonic-gate	add	%l7, 8, %l7
1484*0Sstevel@tonic-gate	subcc	%i4, 8, %i4
1485*0Sstevel@tonic-gate	faligndata %d0, %d2, %d8
1486*0Sstevel@tonic-gate	std	%d8, [%i0]
1487*0Sstevel@tonic-gate	blu,pn	%ncc, blkdone
1488*0Sstevel@tonic-gate	add	%i0, 8, %i0
1489*0Sstevel@tonic-gate	ldd	[%l7], %d0
1490*0Sstevel@tonic-gate	add	%l7, 8, %l7
1491*0Sstevel@tonic-gate	subcc	%i4, 8, %i4
1492*0Sstevel@tonic-gate	faligndata %d2, %d0, %d8
1493*0Sstevel@tonic-gate	std	%d8, [%i0]
1494*0Sstevel@tonic-gate	bgeu,pt	%ncc, 1b
1495*0Sstevel@tonic-gate	add	%i0, 8, %i0
1496*0Sstevel@tonic-gate
1497*0Sstevel@tonic-gateblkdone:
1498*0Sstevel@tonic-gate	tst	%i2
1499*0Sstevel@tonic-gate	bz,pt	%ncc, .bcb_exit
1500*0Sstevel@tonic-gate	and	%l3, 0x4, %l3		! fprs.du = fprs.dl = 0
1501*0Sstevel@tonic-gate
1502*0Sstevel@tonic-gate7:	ldub	[%i1], %i4
1503*0Sstevel@tonic-gate	inc	%i1
1504*0Sstevel@tonic-gate	inc	%i0
1505*0Sstevel@tonic-gate	deccc	%i2
1506*0Sstevel@tonic-gate	bgu,pt	%ncc, 7b
1507*0Sstevel@tonic-gate	  stb	  %i4, [%i0 - 1]
1508*0Sstevel@tonic-gate
1509*0Sstevel@tonic-gate.bcb_exit:
1510*0Sstevel@tonic-gate	membar	#StoreLoad|#StoreStore
1511*0Sstevel@tonic-gate	btst	FPUSED_FLAG, %l6
1512*0Sstevel@tonic-gate	bz	%icc, 1f
1513*0Sstevel@tonic-gate	  and	%l6, COPY_FLAGS, %l1	! Store flags in %l1
1514*0Sstevel@tonic-gate					! We can't clear the flags from %l6 yet.
1515*0Sstevel@tonic-gate					! If there's an error, .copyerr will
1516*0Sstevel@tonic-gate					! need them
1517*0Sstevel@tonic-gate
1518*0Sstevel@tonic-gate	ld	[%fp + STACK_BIAS - SAVED_GSR_OFFSET], %o2	! restore gsr
1519*0Sstevel@tonic-gate	wr	%o2, 0, %gsr
1520*0Sstevel@tonic-gate
1521*0Sstevel@tonic-gate	ld	[%fp + STACK_BIAS - SAVED_FPRS_OFFSET], %o3
1522*0Sstevel@tonic-gate	btst	FPRS_FEF, %o3
1523*0Sstevel@tonic-gate	bz	%icc, 4f
1524*0Sstevel@tonic-gate	  nop
1525*0Sstevel@tonic-gate
1526*0Sstevel@tonic-gate	! restore fpregs from stack
1527*0Sstevel@tonic-gate	membar	#Sync
1528*0Sstevel@tonic-gate	add	%fp, STACK_BIAS - 257, %o2
1529*0Sstevel@tonic-gate	and	%o2, -64, %o2
1530*0Sstevel@tonic-gate	ldda	[%o2]ASI_BLK_P, %d0
1531*0Sstevel@tonic-gate	add	%o2, 64, %o2
1532*0Sstevel@tonic-gate	ldda	[%o2]ASI_BLK_P, %d16
1533*0Sstevel@tonic-gate	add	%o2, 64, %o2
1534*0Sstevel@tonic-gate	ldda	[%o2]ASI_BLK_P, %d32
1535*0Sstevel@tonic-gate	add	%o2, 64, %o2
1536*0Sstevel@tonic-gate	ldda	[%o2]ASI_BLK_P, %d48
1537*0Sstevel@tonic-gate	membar	#Sync
1538*0Sstevel@tonic-gate
1539*0Sstevel@tonic-gate	ba,pt	%ncc, 2f
1540*0Sstevel@tonic-gate	  wr	%o3, 0, %fprs		! restore fprs
1541*0Sstevel@tonic-gate
1542*0Sstevel@tonic-gate4:
1543*0Sstevel@tonic-gate	FZERO				! zero all of the fpregs
1544*0Sstevel@tonic-gate	wr	%o3, 0, %fprs		! restore fprs
1545*0Sstevel@tonic-gate
1546*0Sstevel@tonic-gate2:	ldn	[THREAD_REG + T_LWP], %o2
1547*0Sstevel@tonic-gate	tst	%o2
1548*0Sstevel@tonic-gate	bnz,pt	%ncc, 1f
1549*0Sstevel@tonic-gate	  nop
1550*0Sstevel@tonic-gate
1551*0Sstevel@tonic-gate	ldsb	[THREAD_REG + T_PREEMPT], %l0
1552*0Sstevel@tonic-gate	deccc	%l0
1553*0Sstevel@tonic-gate	bnz,pn	%ncc, 1f
1554*0Sstevel@tonic-gate	  stb	%l0, [THREAD_REG + T_PREEMPT]
1555*0Sstevel@tonic-gate
1556*0Sstevel@tonic-gate	! Check for a kernel preemption request
1557*0Sstevel@tonic-gate	ldn	[THREAD_REG + T_CPU], %l0
1558*0Sstevel@tonic-gate	ldub	[%l0 + CPU_KPRUNRUN], %l0
1559*0Sstevel@tonic-gate	tst	%l0
1560*0Sstevel@tonic-gate	bnz,a,pt	%ncc, 1f	! Need to call kpreempt?
1561*0Sstevel@tonic-gate	  or	%l1, KPREEMPT_FLAG, %l1	! If so, set the flag
1562*0Sstevel@tonic-gate
1563*0Sstevel@tonic-gate1:
1564*0Sstevel@tonic-gate	btst	BCOPY_FLAG, %l1
1565*0Sstevel@tonic-gate	bz,pn	%icc, 3f
1566*0Sstevel@tonic-gate	  andncc	%l6, COPY_FLAGS, %l6
1567*0Sstevel@tonic-gate
1568*0Sstevel@tonic-gate	!
1569*0Sstevel@tonic-gate	! Here via bcopy. Check to see if the handler was NULL.
1570*0Sstevel@tonic-gate	! If so, just return quietly. Otherwise, reset the
1571*0Sstevel@tonic-gate	! handler and go home.
1572*0Sstevel@tonic-gate	!
1573*0Sstevel@tonic-gate	bnz,pn	%ncc, 3f
1574*0Sstevel@tonic-gate	  nop
1575*0Sstevel@tonic-gate
1576*0Sstevel@tonic-gate	!
1577*0Sstevel@tonic-gate	! Null handler.  Check for kpreempt flag, call if necessary,
1578*0Sstevel@tonic-gate	! then return.
1579*0Sstevel@tonic-gate	!
1580*0Sstevel@tonic-gate	btst	KPREEMPT_FLAG, %l1
1581*0Sstevel@tonic-gate	bz,pt	%icc, 2f
1582*0Sstevel@tonic-gate	  nop
1583*0Sstevel@tonic-gate	call	kpreempt
1584*0Sstevel@tonic-gate	  rdpr	%pil, %o0	! pass %pil
1585*0Sstevel@tonic-gate2:
1586*0Sstevel@tonic-gate	ret
1587*0Sstevel@tonic-gate	  restore	%g0, 0, %o0
1588*0Sstevel@tonic-gate
1589*0Sstevel@tonic-gate	!
1590*0Sstevel@tonic-gate	! Here via kcopy or bcopy with a handler.Reset the
1591*0Sstevel@tonic-gate	! fault handler.
1592*0Sstevel@tonic-gate	!
1593*0Sstevel@tonic-gate3:
1594*0Sstevel@tonic-gate	membar	#Sync
1595*0Sstevel@tonic-gate	stn	%l6, [THREAD_REG + T_LOFAULT]	! restore old t_lofault
1596*0Sstevel@tonic-gate
1597*0Sstevel@tonic-gate	! call kpreempt if necessary
1598*0Sstevel@tonic-gate	btst	KPREEMPT_FLAG, %l1
1599*0Sstevel@tonic-gate	bz,pt	%icc, 4f
1600*0Sstevel@tonic-gate	  nop
1601*0Sstevel@tonic-gate	call	kpreempt
1602*0Sstevel@tonic-gate	  rdpr	%pil, %o0
1603*0Sstevel@tonic-gate4:
1604*0Sstevel@tonic-gate	ret
1605*0Sstevel@tonic-gate	  restore	%g0, 0, %o0
1606*0Sstevel@tonic-gate
1607*0Sstevel@tonic-gate.bcb_punt:
1608*0Sstevel@tonic-gate	!
1609*0Sstevel@tonic-gate	! use aligned transfers where possible
1610*0Sstevel@tonic-gate	!
1611*0Sstevel@tonic-gate	xor	%i0, %i1, %o4		! xor from and to address
1612*0Sstevel@tonic-gate	btst	7, %o4			! if lower three bits zero
1613*0Sstevel@tonic-gate	bz	%icc, .aldoubcp		! can align on double boundary
1614*0Sstevel@tonic-gate	.empty	! assembler complaints about label
1615*0Sstevel@tonic-gate
1616*0Sstevel@tonic-gate	xor	%i0, %i1, %o4		! xor from and to address
1617*0Sstevel@tonic-gate	btst	3, %o4			! if lower two bits zero
1618*0Sstevel@tonic-gate	bz	%icc, .alwordcp		! can align on word boundary
1619*0Sstevel@tonic-gate	btst	3, %i0			! delay slot, from address unaligned?
1620*0Sstevel@tonic-gate	!
1621*0Sstevel@tonic-gate	! use aligned reads and writes where possible
1622*0Sstevel@tonic-gate	! this differs from wordcp in that it copes
1623*0Sstevel@tonic-gate	! with odd alignment between source and destnation
1624*0Sstevel@tonic-gate	! using word reads and writes with the proper shifts
1625*0Sstevel@tonic-gate	! in between to align transfers to and from memory
1626*0Sstevel@tonic-gate	! i0 - src address, i1 - dest address, i2 - count
1627*0Sstevel@tonic-gate	! i3, i4 - tmps for used generating complete word
1628*0Sstevel@tonic-gate	! i5 (word to write)
1629*0Sstevel@tonic-gate	! l0 size in bits of upper part of source word (US)
1630*0Sstevel@tonic-gate	! l1 size in bits of lower part of source word (LS = 32 - US)
1631*0Sstevel@tonic-gate	! l2 size in bits of upper part of destination word (UD)
1632*0Sstevel@tonic-gate	! l3 size in bits of lower part of destination word (LD = 32 - UD)
1633*0Sstevel@tonic-gate	! l4 number of bytes leftover after aligned transfers complete
1634*0Sstevel@tonic-gate	! l5 the number 32
1635*0Sstevel@tonic-gate	!
1636*0Sstevel@tonic-gate	mov	32, %l5			! load an oft-needed constant
1637*0Sstevel@tonic-gate	bz	.align_dst_only
1638*0Sstevel@tonic-gate	btst	3, %i1			! is destnation address aligned?
1639*0Sstevel@tonic-gate	clr	%i4			! clear registers used in either case
1640*0Sstevel@tonic-gate	bz	%icc, .align_src_only
1641*0Sstevel@tonic-gate	clr	%l0
1642*0Sstevel@tonic-gate	!
1643*0Sstevel@tonic-gate	! both source and destination addresses are unaligned
1644*0Sstevel@tonic-gate	!
1645*0Sstevel@tonic-gate1:					! align source
1646*0Sstevel@tonic-gate	ldub	[%i0], %i3		! read a byte from source address
1647*0Sstevel@tonic-gate	add	%i0, 1, %i0		! increment source address
1648*0Sstevel@tonic-gate	or	%i4, %i3, %i4		! or in with previous bytes (if any)
1649*0Sstevel@tonic-gate	btst	3, %i0			! is source aligned?
1650*0Sstevel@tonic-gate	add	%l0, 8, %l0		! increment size of upper source (US)
1651*0Sstevel@tonic-gate	bnz,a	1b
1652*0Sstevel@tonic-gate	sll	%i4, 8, %i4		! make room for next byte
1653*0Sstevel@tonic-gate
1654*0Sstevel@tonic-gate	sub	%l5, %l0, %l1		! generate shift left count (LS)
1655*0Sstevel@tonic-gate	sll	%i4, %l1, %i4		! prepare to get rest
1656*0Sstevel@tonic-gate	ld	[%i0], %i3		! read a word
1657*0Sstevel@tonic-gate	add	%i0, 4, %i0		! increment source address
1658*0Sstevel@tonic-gate	srl	%i3, %l0, %i5		! upper src bits into lower dst bits
1659*0Sstevel@tonic-gate	or	%i4, %i5, %i5		! merge
1660*0Sstevel@tonic-gate	mov	24, %l3			! align destination
1661*0Sstevel@tonic-gate1:
1662*0Sstevel@tonic-gate	srl	%i5, %l3, %i4		! prepare to write a single byte
1663*0Sstevel@tonic-gate	stb	%i4, [%i1]		! write a byte
1664*0Sstevel@tonic-gate	add	%i1, 1, %i1		! increment destination address
1665*0Sstevel@tonic-gate	sub	%i2, 1, %i2		! decrement count
1666*0Sstevel@tonic-gate	btst	3, %i1			! is destination aligned?
1667*0Sstevel@tonic-gate	bnz,a	1b
1668*0Sstevel@tonic-gate	sub	%l3, 8, %l3		! delay slot, decrement shift count (LD)
1669*0Sstevel@tonic-gate	sub	%l5, %l3, %l2		! generate shift left count (UD)
1670*0Sstevel@tonic-gate	sll	%i5, %l2, %i5		! move leftover into upper bytes
1671*0Sstevel@tonic-gate	cmp	%l2, %l0		! cmp # reqd to fill dst w old src left
1672*0Sstevel@tonic-gate	bgu	%ncc, .more_needed	! need more to fill than we have
1673*0Sstevel@tonic-gate	nop
1674*0Sstevel@tonic-gate
1675*0Sstevel@tonic-gate	sll	%i3, %l1, %i3		! clear upper used byte(s)
1676*0Sstevel@tonic-gate	srl	%i3, %l1, %i3
1677*0Sstevel@tonic-gate	! get the odd bytes between alignments
1678*0Sstevel@tonic-gate	sub	%l0, %l2, %l0		! regenerate shift count
1679*0Sstevel@tonic-gate	sub	%l5, %l0, %l1		! generate new shift left count (LS)
1680*0Sstevel@tonic-gate	and	%i2, 3, %l4		! must do remaining bytes if count%4 > 0
1681*0Sstevel@tonic-gate	andn	%i2, 3, %i2		! # of aligned bytes that can be moved
1682*0Sstevel@tonic-gate	srl	%i3, %l0, %i4
1683*0Sstevel@tonic-gate	or	%i5, %i4, %i5
1684*0Sstevel@tonic-gate	st	%i5, [%i1]		! write a word
1685*0Sstevel@tonic-gate	subcc	%i2, 4, %i2		! decrement count
1686*0Sstevel@tonic-gate	bz	%ncc, .unalign_out
1687*0Sstevel@tonic-gate	add	%i1, 4, %i1		! increment destination address
1688*0Sstevel@tonic-gate
1689*0Sstevel@tonic-gate	b	2f
1690*0Sstevel@tonic-gate	sll	%i3, %l1, %i5		! get leftover into upper bits
1691*0Sstevel@tonic-gate.more_needed:
1692*0Sstevel@tonic-gate	sll	%i3, %l0, %i3		! save remaining byte(s)
1693*0Sstevel@tonic-gate	srl	%i3, %l0, %i3
1694*0Sstevel@tonic-gate	sub	%l2, %l0, %l1		! regenerate shift count
1695*0Sstevel@tonic-gate	sub	%l5, %l1, %l0		! generate new shift left count
1696*0Sstevel@tonic-gate	sll	%i3, %l1, %i4		! move to fill empty space
1697*0Sstevel@tonic-gate	b	3f
1698*0Sstevel@tonic-gate	or	%i5, %i4, %i5		! merge to complete word
1699*0Sstevel@tonic-gate	!
1700*0Sstevel@tonic-gate	! the source address is aligned and destination is not
1701*0Sstevel@tonic-gate	!
1702*0Sstevel@tonic-gate.align_dst_only:
1703*0Sstevel@tonic-gate	ld	[%i0], %i4		! read a word
1704*0Sstevel@tonic-gate	add	%i0, 4, %i0		! increment source address
1705*0Sstevel@tonic-gate	mov	24, %l0			! initial shift alignment count
1706*0Sstevel@tonic-gate1:
1707*0Sstevel@tonic-gate	srl	%i4, %l0, %i3		! prepare to write a single byte
1708*0Sstevel@tonic-gate	stb	%i3, [%i1]		! write a byte
1709*0Sstevel@tonic-gate	add	%i1, 1, %i1		! increment destination address
1710*0Sstevel@tonic-gate	sub	%i2, 1, %i2		! decrement count
1711*0Sstevel@tonic-gate	btst	3, %i1			! is destination aligned?
1712*0Sstevel@tonic-gate	bnz,a	1b
1713*0Sstevel@tonic-gate	sub	%l0, 8, %l0		! delay slot, decrement shift count
1714*0Sstevel@tonic-gate.xfer:
1715*0Sstevel@tonic-gate	sub	%l5, %l0, %l1		! generate shift left count
1716*0Sstevel@tonic-gate	sll	%i4, %l1, %i5		! get leftover
1717*0Sstevel@tonic-gate3:
1718*0Sstevel@tonic-gate	and	%i2, 3, %l4		! must do remaining bytes if count%4 > 0
1719*0Sstevel@tonic-gate	andn	%i2, 3, %i2		! # of aligned bytes that can be moved
1720*0Sstevel@tonic-gate2:
1721*0Sstevel@tonic-gate	ld	[%i0], %i3		! read a source word
1722*0Sstevel@tonic-gate	add	%i0, 4, %i0		! increment source address
1723*0Sstevel@tonic-gate	srl	%i3, %l0, %i4		! upper src bits into lower dst bits
1724*0Sstevel@tonic-gate	or	%i5, %i4, %i5		! merge with upper dest bits (leftover)
1725*0Sstevel@tonic-gate	st	%i5, [%i1]		! write a destination word
1726*0Sstevel@tonic-gate	subcc	%i2, 4, %i2		! decrement count
1727*0Sstevel@tonic-gate	bz	%ncc, .unalign_out	! check if done
1728*0Sstevel@tonic-gate	add	%i1, 4, %i1		! increment destination address
1729*0Sstevel@tonic-gate	b	2b			! loop
1730*0Sstevel@tonic-gate	sll	%i3, %l1, %i5		! get leftover
1731*0Sstevel@tonic-gate.unalign_out:
1732*0Sstevel@tonic-gate	tst	%l4			! any bytes leftover?
1733*0Sstevel@tonic-gate	bz	%ncc, .cpdone
1734*0Sstevel@tonic-gate	.empty				! allow next instruction in delay slot
1735*0Sstevel@tonic-gate1:
1736*0Sstevel@tonic-gate	sub	%l0, 8, %l0		! decrement shift
1737*0Sstevel@tonic-gate	srl	%i3, %l0, %i4		! upper src byte into lower dst byte
1738*0Sstevel@tonic-gate	stb	%i4, [%i1]		! write a byte
1739*0Sstevel@tonic-gate	subcc	%l4, 1, %l4		! decrement count
1740*0Sstevel@tonic-gate	bz	%ncc, .cpdone		! done?
1741*0Sstevel@tonic-gate	add	%i1, 1, %i1		! increment destination
1742*0Sstevel@tonic-gate	tst	%l0			! any more previously read bytes
1743*0Sstevel@tonic-gate	bnz	%ncc, 1b		! we have leftover bytes
1744*0Sstevel@tonic-gate	mov	%l4, %i2		! delay slot, mv cnt where dbytecp wants
1745*0Sstevel@tonic-gate	b	.dbytecp		! let dbytecp do the rest
1746*0Sstevel@tonic-gate	sub	%i0, %i1, %i0		! i0 gets the difference of src and dst
1747*0Sstevel@tonic-gate	!
1748*0Sstevel@tonic-gate	! the destination address is aligned and the source is not
1749*0Sstevel@tonic-gate	!
1750*0Sstevel@tonic-gate.align_src_only:
1751*0Sstevel@tonic-gate	ldub	[%i0], %i3		! read a byte from source address
1752*0Sstevel@tonic-gate	add	%i0, 1, %i0		! increment source address
1753*0Sstevel@tonic-gate	or	%i4, %i3, %i4		! or in with previous bytes (if any)
1754*0Sstevel@tonic-gate	btst	3, %i0			! is source aligned?
1755*0Sstevel@tonic-gate	add	%l0, 8, %l0		! increment shift count (US)
1756*0Sstevel@tonic-gate	bnz,a	.align_src_only
1757*0Sstevel@tonic-gate	sll	%i4, 8, %i4		! make room for next byte
1758*0Sstevel@tonic-gate	b,a	.xfer
1759*0Sstevel@tonic-gate	!
1760*0Sstevel@tonic-gate	! if from address unaligned for double-word moves,
1761*0Sstevel@tonic-gate	! move bytes till it is, if count is < 56 it could take
1762*0Sstevel@tonic-gate	! longer to align the thing than to do the transfer
1763*0Sstevel@tonic-gate	! in word size chunks right away
1764*0Sstevel@tonic-gate	!
1765*0Sstevel@tonic-gate.aldoubcp:
1766*0Sstevel@tonic-gate	cmp	%i2, 56			! if count < 56, use wordcp, it takes
1767*0Sstevel@tonic-gate	blu,a	%ncc, .alwordcp		! longer to align doubles than words
1768*0Sstevel@tonic-gate	mov	3, %o0			! mask for word alignment
1769*0Sstevel@tonic-gate	call	.alignit		! copy bytes until aligned
1770*0Sstevel@tonic-gate	mov	7, %o0			! mask for double alignment
1771*0Sstevel@tonic-gate	!
1772*0Sstevel@tonic-gate	! source and destination are now double-word aligned
1773*0Sstevel@tonic-gate	! i3 has aligned count returned by alignit
1774*0Sstevel@tonic-gate	!
1775*0Sstevel@tonic-gate	and	%i2, 7, %i2		! unaligned leftover count
1776*0Sstevel@tonic-gate	sub	%i0, %i1, %i0		! i0 gets the difference of src and dst
1777*0Sstevel@tonic-gate5:
1778*0Sstevel@tonic-gate	ldx	[%i0+%i1], %o4		! read from address
1779*0Sstevel@tonic-gate	stx	%o4, [%i1]		! write at destination address
1780*0Sstevel@tonic-gate	subcc	%i3, 8, %i3		! dec count
1781*0Sstevel@tonic-gate	bgu	%ncc, 5b
1782*0Sstevel@tonic-gate	add	%i1, 8, %i1		! delay slot, inc to address
1783*0Sstevel@tonic-gate	cmp	%i2, 4			! see if we can copy a word
1784*0Sstevel@tonic-gate	blu	%ncc, .dbytecp		! if 3 or less bytes use bytecp
1785*0Sstevel@tonic-gate	.empty
1786*0Sstevel@tonic-gate	!
1787*0Sstevel@tonic-gate	! for leftover bytes we fall into wordcp, if needed
1788*0Sstevel@tonic-gate	!
1789*0Sstevel@tonic-gate.wordcp:
1790*0Sstevel@tonic-gate	and	%i2, 3, %i2		! unaligned leftover count
1791*0Sstevel@tonic-gate5:
1792*0Sstevel@tonic-gate	ld	[%i0+%i1], %o4		! read from address
1793*0Sstevel@tonic-gate	st	%o4, [%i1]		! write at destination address
1794*0Sstevel@tonic-gate	subcc	%i3, 4, %i3		! dec count
1795*0Sstevel@tonic-gate	bgu	%ncc, 5b
1796*0Sstevel@tonic-gate	add	%i1, 4, %i1		! delay slot, inc to address
1797*0Sstevel@tonic-gate	b,a	.dbytecp
1798*0Sstevel@tonic-gate
1799*0Sstevel@tonic-gate	! we come here to align copies on word boundaries
1800*0Sstevel@tonic-gate.alwordcp:
1801*0Sstevel@tonic-gate	call	.alignit		! go word-align it
1802*0Sstevel@tonic-gate	mov	3, %o0			! bits that must be zero to be aligned
1803*0Sstevel@tonic-gate	b	.wordcp
1804*0Sstevel@tonic-gate	sub	%i0, %i1, %i0		! i0 gets the difference of src and dst
1805*0Sstevel@tonic-gate
1806*0Sstevel@tonic-gate	!
1807*0Sstevel@tonic-gate	! byte copy, works with any alignment
1808*0Sstevel@tonic-gate	!
1809*0Sstevel@tonic-gate.bytecp:
1810*0Sstevel@tonic-gate	b	.dbytecp
1811*0Sstevel@tonic-gate	sub	%i0, %i1, %i0		! i0 gets difference of src and dst
1812*0Sstevel@tonic-gate
1813*0Sstevel@tonic-gate	!
1814*0Sstevel@tonic-gate	! differenced byte copy, works with any alignment
1815*0Sstevel@tonic-gate	! assumes dest in %i1 and (source - dest) in %i0
1816*0Sstevel@tonic-gate	!
1817*0Sstevel@tonic-gate1:
1818*0Sstevel@tonic-gate	stb	%o4, [%i1]		! write to address
1819*0Sstevel@tonic-gate	inc	%i1			! inc to address
1820*0Sstevel@tonic-gate.dbytecp:
1821*0Sstevel@tonic-gate	deccc	%i2			! dec count
1822*0Sstevel@tonic-gate	bgeu,a	%ncc, 1b		! loop till done
1823*0Sstevel@tonic-gate	ldub	[%i0+%i1], %o4		! read from address
1824*0Sstevel@tonic-gate	!
1825*0Sstevel@tonic-gate	! FPUSED_FLAG will not have been set in any path leading to
1826*0Sstevel@tonic-gate	! this point. No need to deal with it.
1827*0Sstevel@tonic-gate	!
1828*0Sstevel@tonic-gate.cpdone:
1829*0Sstevel@tonic-gate	btst	BCOPY_FLAG, %l6
1830*0Sstevel@tonic-gate	bz,pn	%icc, 2f
1831*0Sstevel@tonic-gate	andncc	%l6, BCOPY_FLAG, %l6
1832*0Sstevel@tonic-gate	!
1833*0Sstevel@tonic-gate	! Here via bcopy. Check to see if the handler was NULL.
1834*0Sstevel@tonic-gate	! If so, just return quietly. Otherwise, reset the
1835*0Sstevel@tonic-gate	! handler and go home.
1836*0Sstevel@tonic-gate	!
1837*0Sstevel@tonic-gate	bnz,pn	%ncc, 2f
1838*0Sstevel@tonic-gate	nop
1839*0Sstevel@tonic-gate	!
1840*0Sstevel@tonic-gate	! Null handler.
1841*0Sstevel@tonic-gate	!
1842*0Sstevel@tonic-gate	ret
1843*0Sstevel@tonic-gate	restore %g0, 0, %o0
1844*0Sstevel@tonic-gate	!
1845*0Sstevel@tonic-gate	! Here via kcopy or bcopy with a handler.Reset the
1846*0Sstevel@tonic-gate	! fault handler.
1847*0Sstevel@tonic-gate	!
1848*0Sstevel@tonic-gate2:
1849*0Sstevel@tonic-gate  	membar	#Sync
1850*0Sstevel@tonic-gate	stn	%l6, [THREAD_REG + T_LOFAULT]	! restore old t_lofault
1851*0Sstevel@tonic-gate	ret
1852*0Sstevel@tonic-gate	restore	%g0, 0, %o0		! return (0)
1853*0Sstevel@tonic-gate
1854*0Sstevel@tonic-gate/*
1855*0Sstevel@tonic-gate * Common code used to align transfers on word and doubleword
1856*0Sstevel@tonic-gate * boudaries.  Aligns source and destination and returns a count
1857*0Sstevel@tonic-gate * of aligned bytes to transfer in %i3
1858*0Sstevel@tonic-gate */
1859*0Sstevel@tonic-gate1:
1860*0Sstevel@tonic-gate	inc	%i0			! inc from
1861*0Sstevel@tonic-gate	stb	%o4, [%i1]		! write a byte
1862*0Sstevel@tonic-gate	inc	%i1			! inc to
1863*0Sstevel@tonic-gate	dec	%i2			! dec count
1864*0Sstevel@tonic-gate.alignit:
1865*0Sstevel@tonic-gate	btst	%o0, %i0		! %o0 is bit mask to check for alignment
1866*0Sstevel@tonic-gate	bnz,a	1b
1867*0Sstevel@tonic-gate	ldub	[%i0], %o4		! read next byte
1868*0Sstevel@tonic-gate
1869*0Sstevel@tonic-gate	retl
1870*0Sstevel@tonic-gate	andn	%i2, %o0, %i3		! return size of aligned bytes
1871*0Sstevel@tonic-gate	SET_SIZE(bcopy)
1872*0Sstevel@tonic-gate
1873*0Sstevel@tonic-gate#endif	/* lint */
1874*0Sstevel@tonic-gate
1875*0Sstevel@tonic-gate/*
1876*0Sstevel@tonic-gate * Block copy with possibly overlapped operands.
1877*0Sstevel@tonic-gate */
1878*0Sstevel@tonic-gate
1879*0Sstevel@tonic-gate#if defined(lint)
1880*0Sstevel@tonic-gate
1881*0Sstevel@tonic-gate/*ARGSUSED*/
1882*0Sstevel@tonic-gatevoid
1883*0Sstevel@tonic-gateovbcopy(const void *from, void *to, size_t count)
1884*0Sstevel@tonic-gate{}
1885*0Sstevel@tonic-gate
1886*0Sstevel@tonic-gate#else	/* lint */
1887*0Sstevel@tonic-gate
1888*0Sstevel@tonic-gate	ENTRY(ovbcopy)
1889*0Sstevel@tonic-gate	tst	%o2			! check count
1890*0Sstevel@tonic-gate	bgu,a	%ncc, 1f		! nothing to do or bad arguments
1891*0Sstevel@tonic-gate	subcc	%o0, %o1, %o3		! difference of from and to address
1892*0Sstevel@tonic-gate
1893*0Sstevel@tonic-gate	retl				! return
1894*0Sstevel@tonic-gate	nop
1895*0Sstevel@tonic-gate1:
1896*0Sstevel@tonic-gate	bneg,a	%ncc, 2f
1897*0Sstevel@tonic-gate	neg	%o3			! if < 0, make it positive
1898*0Sstevel@tonic-gate2:	cmp	%o2, %o3		! cmp size and abs(from - to)
1899*0Sstevel@tonic-gate	bleu	%ncc, bcopy		! if size <= abs(diff): use bcopy,
1900*0Sstevel@tonic-gate	.empty				!   no overlap
1901*0Sstevel@tonic-gate	cmp	%o0, %o1		! compare from and to addresses
1902*0Sstevel@tonic-gate	blu	%ncc, .ov_bkwd		! if from < to, copy backwards
1903*0Sstevel@tonic-gate	nop
1904*0Sstevel@tonic-gate	!
1905*0Sstevel@tonic-gate	! Copy forwards.
1906*0Sstevel@tonic-gate	!
1907*0Sstevel@tonic-gate.ov_fwd:
1908*0Sstevel@tonic-gate	ldub	[%o0], %o3		! read from address
1909*0Sstevel@tonic-gate	inc	%o0			! inc from address
1910*0Sstevel@tonic-gate	stb	%o3, [%o1]		! write to address
1911*0Sstevel@tonic-gate	deccc	%o2			! dec count
1912*0Sstevel@tonic-gate	bgu	%ncc, .ov_fwd		! loop till done
1913*0Sstevel@tonic-gate	inc	%o1			! inc to address
1914*0Sstevel@tonic-gate
1915*0Sstevel@tonic-gate	retl				! return
1916*0Sstevel@tonic-gate	nop
1917*0Sstevel@tonic-gate	!
1918*0Sstevel@tonic-gate	! Copy backwards.
1919*0Sstevel@tonic-gate	!
1920*0Sstevel@tonic-gate.ov_bkwd:
1921*0Sstevel@tonic-gate	deccc	%o2			! dec count
1922*0Sstevel@tonic-gate	ldub	[%o0 + %o2], %o3	! get byte at end of src
1923*0Sstevel@tonic-gate	bgu	%ncc, .ov_bkwd		! loop till done
1924*0Sstevel@tonic-gate	stb	%o3, [%o1 + %o2]	! delay slot, store at end of dst
1925*0Sstevel@tonic-gate
1926*0Sstevel@tonic-gate	retl				! return
1927*0Sstevel@tonic-gate	nop
1928*0Sstevel@tonic-gate	SET_SIZE(ovbcopy)
1929*0Sstevel@tonic-gate
1930*0Sstevel@tonic-gate#endif	/* lint */
1931*0Sstevel@tonic-gate
1932*0Sstevel@tonic-gate/*
1933*0Sstevel@tonic-gate * hwblkpagecopy()
1934*0Sstevel@tonic-gate *
1935*0Sstevel@tonic-gate * Copies exactly one page.  This routine assumes the caller (ppcopy)
1936*0Sstevel@tonic-gate * has already disabled kernel preemption and has checked
1937*0Sstevel@tonic-gate * use_hw_bcopy.
1938*0Sstevel@tonic-gate */
1939*0Sstevel@tonic-gate#ifdef lint
1940*0Sstevel@tonic-gate/*ARGSUSED*/
1941*0Sstevel@tonic-gatevoid
1942*0Sstevel@tonic-gatehwblkpagecopy(const void *src, void *dst)
1943*0Sstevel@tonic-gate{ }
1944*0Sstevel@tonic-gate#else /* lint */
1945*0Sstevel@tonic-gate	ENTRY(hwblkpagecopy)
1946*0Sstevel@tonic-gate	! get another window w/space for three aligned blocks of saved fpregs
1947*0Sstevel@tonic-gate	save	%sp, -SA(MINFRAME + 4*64), %sp
1948*0Sstevel@tonic-gate
1949*0Sstevel@tonic-gate	! %i0 - source address (arg)
1950*0Sstevel@tonic-gate	! %i1 - destination address (arg)
1951*0Sstevel@tonic-gate	! %i2 - length of region (not arg)
1952*0Sstevel@tonic-gate	! %l0 - saved fprs
1953*0Sstevel@tonic-gate	! %l1 - pointer to saved fpregs
1954*0Sstevel@tonic-gate
1955*0Sstevel@tonic-gate	rd	%fprs, %l0		! check for unused fp
1956*0Sstevel@tonic-gate	btst	FPRS_FEF, %l0
1957*0Sstevel@tonic-gate	bz	1f
1958*0Sstevel@tonic-gate	membar	#Sync
1959*0Sstevel@tonic-gate
1960*0Sstevel@tonic-gate	! save in-use fpregs on stack
1961*0Sstevel@tonic-gate	add	%fp, STACK_BIAS - 193, %l1
1962*0Sstevel@tonic-gate	and	%l1, -64, %l1
1963*0Sstevel@tonic-gate	stda	%d0, [%l1]ASI_BLK_P
1964*0Sstevel@tonic-gate	add	%l1, 64, %l3
1965*0Sstevel@tonic-gate	stda	%d16, [%l3]ASI_BLK_P
1966*0Sstevel@tonic-gate	add	%l3, 64, %l3
1967*0Sstevel@tonic-gate	stda	%d32, [%l3]ASI_BLK_P
1968*0Sstevel@tonic-gate	membar	#Sync
1969*0Sstevel@tonic-gate
1970*0Sstevel@tonic-gate1:	wr	%g0, FPRS_FEF, %fprs
1971*0Sstevel@tonic-gate	ldda	[%i0]ASI_BLK_P, %d0
1972*0Sstevel@tonic-gate	add	%i0, 64, %i0
1973*0Sstevel@tonic-gate	set	PAGESIZE - 64, %i2
1974*0Sstevel@tonic-gate
1975*0Sstevel@tonic-gate2:	ldda	[%i0]ASI_BLK_P, %d16
1976*0Sstevel@tonic-gate	fsrc1	%d0, %d32
1977*0Sstevel@tonic-gate	fsrc1	%d2, %d34
1978*0Sstevel@tonic-gate	fsrc1	%d4, %d36
1979*0Sstevel@tonic-gate	fsrc1	%d6, %d38
1980*0Sstevel@tonic-gate	fsrc1	%d8, %d40
1981*0Sstevel@tonic-gate	fsrc1	%d10, %d42
1982*0Sstevel@tonic-gate	fsrc1	%d12, %d44
1983*0Sstevel@tonic-gate	fsrc1	%d14, %d46
1984*0Sstevel@tonic-gate	stda	%d32, [%i1]ASI_BLK_P
1985*0Sstevel@tonic-gate	add	%i0, 64, %i0
1986*0Sstevel@tonic-gate	subcc	%i2, 64, %i2
1987*0Sstevel@tonic-gate	bz,pn	%ncc, 3f
1988*0Sstevel@tonic-gate	add	%i1, 64, %i1
1989*0Sstevel@tonic-gate	ldda	[%i0]ASI_BLK_P, %d0
1990*0Sstevel@tonic-gate	fsrc1	%d16, %d32
1991*0Sstevel@tonic-gate	fsrc1	%d18, %d34
1992*0Sstevel@tonic-gate	fsrc1	%d20, %d36
1993*0Sstevel@tonic-gate	fsrc1	%d22, %d38
1994*0Sstevel@tonic-gate	fsrc1	%d24, %d40
1995*0Sstevel@tonic-gate	fsrc1	%d26, %d42
1996*0Sstevel@tonic-gate	fsrc1	%d28, %d44
1997*0Sstevel@tonic-gate	fsrc1	%d30, %d46
1998*0Sstevel@tonic-gate	stda	%d32, [%i1]ASI_BLK_P
1999*0Sstevel@tonic-gate	add	%i0, 64, %i0
2000*0Sstevel@tonic-gate	sub	%i2, 64, %i2
2001*0Sstevel@tonic-gate	ba,pt	%ncc, 2b
2002*0Sstevel@tonic-gate	add	%i1, 64, %i1
2003*0Sstevel@tonic-gate
2004*0Sstevel@tonic-gate3:	membar	#Sync
2005*0Sstevel@tonic-gate	btst	FPRS_FEF, %l0
2006*0Sstevel@tonic-gate	bz	4f
2007*0Sstevel@tonic-gate	stda	%d16, [%i1]ASI_BLK_P
2008*0Sstevel@tonic-gate
2009*0Sstevel@tonic-gate	! restore fpregs from stack
2010*0Sstevel@tonic-gate	membar	#Sync
2011*0Sstevel@tonic-gate	ldda	[%l1]ASI_BLK_P, %d0
2012*0Sstevel@tonic-gate	add	%l1, 64, %l3
2013*0Sstevel@tonic-gate	ldda	[%l3]ASI_BLK_P, %d16
2014*0Sstevel@tonic-gate	add	%l3, 64, %l3
2015*0Sstevel@tonic-gate	ldda	[%l3]ASI_BLK_P, %d32
2016*0Sstevel@tonic-gate
2017*0Sstevel@tonic-gate4:	wr	%l0, 0, %fprs		! restore fprs
2018*0Sstevel@tonic-gate	membar #Sync
2019*0Sstevel@tonic-gate	ret
2020*0Sstevel@tonic-gate	restore	%g0, 0, %o0
2021*0Sstevel@tonic-gate	SET_SIZE(hwblkpagecopy)
2022*0Sstevel@tonic-gate#endif	/* lint */
2023*0Sstevel@tonic-gate
2024*0Sstevel@tonic-gate
2025*0Sstevel@tonic-gate/*
2026*0Sstevel@tonic-gate * Transfer data to and from user space -
2027*0Sstevel@tonic-gate * Note that these routines can cause faults
2028*0Sstevel@tonic-gate * It is assumed that the kernel has nothing at
2029*0Sstevel@tonic-gate * less than KERNELBASE in the virtual address space.
2030*0Sstevel@tonic-gate *
2031*0Sstevel@tonic-gate * Note that copyin(9F) and copyout(9F) are part of the
2032*0Sstevel@tonic-gate * DDI/DKI which specifies that they return '-1' on "errors."
2033*0Sstevel@tonic-gate *
2034*0Sstevel@tonic-gate * Sigh.
2035*0Sstevel@tonic-gate *
2036*0Sstevel@tonic-gate * So there's two extremely similar routines - xcopyin() and xcopyout()
2037*0Sstevel@tonic-gate * which return the errno that we've faithfully computed.  This
2038*0Sstevel@tonic-gate * allows other callers (e.g. uiomove(9F)) to work correctly.
2039*0Sstevel@tonic-gate * Given that these are used pretty heavily, we expand the calling
2040*0Sstevel@tonic-gate * sequences inline for all flavours (rather than making wrappers).
2041*0Sstevel@tonic-gate *
2042*0Sstevel@tonic-gate * There are also stub routines for xcopyout_little and xcopyin_little,
2043*0Sstevel@tonic-gate * which currently are intended to handle requests of <= 16 bytes from
2044*0Sstevel@tonic-gate * do_unaligned. Future enhancement to make them handle 8k pages efficiently
2045*0Sstevel@tonic-gate * is left as an exercise...
2046*0Sstevel@tonic-gate */
2047*0Sstevel@tonic-gate
2048*0Sstevel@tonic-gate/*
2049*0Sstevel@tonic-gate * Copy user data to kernel space (copyOP/xcopyOP/copyOP_noerr)
2050*0Sstevel@tonic-gate *
2051*0Sstevel@tonic-gate * General theory of operation:
2052*0Sstevel@tonic-gate *
2053*0Sstevel@tonic-gate * The only difference between default_copy{in,out} and
2054*0Sstevel@tonic-gate * default_xcopy{in,out} is in the error handling routine they invoke
2055*0Sstevel@tonic-gate * when a memory access error is seen. default_xcopyOP returns the errno
2056*0Sstevel@tonic-gate * while default_copyOP returns -1 (see above). copy{in,out}_noerr set
2057*0Sstevel@tonic-gate * a special flag (by oring the value 2 into the fault handler address)
2058*0Sstevel@tonic-gate * if they are called with a fault handler already in place. That flag
2059*0Sstevel@tonic-gate * causes the default handlers to trampoline to the previous handler
2060*0Sstevel@tonic-gate * upon an error.
2061*0Sstevel@tonic-gate *
2062*0Sstevel@tonic-gate * None of the copyops routines grab a window until it's decided that
2063*0Sstevel@tonic-gate * we need to do a HW block copy operation. This saves a window
2064*0Sstevel@tonic-gate * spill/fill when we're called during socket ops. The typical IO
2065*0Sstevel@tonic-gate * path won't cause spill/fill traps.
2066*0Sstevel@tonic-gate *
2067*0Sstevel@tonic-gate * This code uses a set of 4 limits for the maximum size that will
2068*0Sstevel@tonic-gate * be copied given a particular input/output address alignment.
2069*0Sstevel@tonic-gate * the default limits are:
2070*0Sstevel@tonic-gate *
2071*0Sstevel@tonic-gate * single byte aligned - 900 (hw_copy_limit_1)
2072*0Sstevel@tonic-gate * two byte aligned - 1800 (hw_copy_limit_2)
2073*0Sstevel@tonic-gate * four byte aligned - 3600 (hw_copy_limit_4)
2074*0Sstevel@tonic-gate * eight byte aligned - 7200 (hw_copy_limit_8)
2075*0Sstevel@tonic-gate *
2076*0Sstevel@tonic-gate * If the value for a particular limit is zero, the copy will be done
2077*0Sstevel@tonic-gate * via the copy loops rather than VIS.
2078*0Sstevel@tonic-gate *
2079*0Sstevel@tonic-gate * Flow:
2080*0Sstevel@tonic-gate *
2081*0Sstevel@tonic-gate * If count == zero return zero.
2082*0Sstevel@tonic-gate *
2083*0Sstevel@tonic-gate * Store the previous lo_fault handler into %g6.
2084*0Sstevel@tonic-gate * Place our secondary lofault handler into %g5.
2085*0Sstevel@tonic-gate * Place the address of our nowindow fault handler into %o3.
2086*0Sstevel@tonic-gate * Place the address of the windowed fault handler into %o4.
2087*0Sstevel@tonic-gate * --> We'll use this handler if we end up grabbing a window
2088*0Sstevel@tonic-gate * --> before we use VIS instructions.
2089*0Sstevel@tonic-gate *
2090*0Sstevel@tonic-gate * If count is less than or equal to SMALL_LIMIT (7) we
2091*0Sstevel@tonic-gate * always do a byte for byte copy.
2092*0Sstevel@tonic-gate *
2093*0Sstevel@tonic-gate * If count is > SMALL_LIMIT, we check the alignment of the input
2094*0Sstevel@tonic-gate * and output pointers. Based on the alignment we check count
2095*0Sstevel@tonic-gate * against a soft limit of VIS_COPY_THRESHOLD (900 on spitfire). If
2096*0Sstevel@tonic-gate * we're larger than VIS_COPY_THRESHOLD, we check against a limit based
2097*0Sstevel@tonic-gate * on detected alignment. If we exceed the alignment value we copy
2098*0Sstevel@tonic-gate * via VIS instructions.
2099*0Sstevel@tonic-gate *
2100*0Sstevel@tonic-gate * If we don't exceed one of the limits, we store -count in %o3,
2101*0Sstevel@tonic-gate * we store the number of chunks (8, 4, 2 or 1 byte) operated
2102*0Sstevel@tonic-gate * on in our basic copy loop in %o2. Following this we branch
2103*0Sstevel@tonic-gate * to the appropriate copy loop and copy that many chunks.
2104*0Sstevel@tonic-gate * Since we've been adding the chunk size to %o3 each time through
2105*0Sstevel@tonic-gate * as well as decrementing %o2, we can tell if any data is
2106*0Sstevel@tonic-gate * is left to be copied by examining %o3. If that is zero, we're
2107*0Sstevel@tonic-gate * done and can go home. If not, we figure out what the largest
2108*0Sstevel@tonic-gate * chunk size left to be copied is and branch to that copy loop
2109*0Sstevel@tonic-gate * unless there's only one byte left. We load that as we're
2110*0Sstevel@tonic-gate * branching to code that stores it just before we return.
2111*0Sstevel@tonic-gate *
2112*0Sstevel@tonic-gate * There is one potential situation in which we start to do a VIS
2113*0Sstevel@tonic-gate * copy but decide to punt and return to the copy loops. There is
2114*0Sstevel@tonic-gate * (in the default configuration) a window of 256 bytes between
2115*0Sstevel@tonic-gate * the single byte aligned copy limit and what VIS treats as its
2116*0Sstevel@tonic-gate * minimum if floating point is in use in the calling app. We need
2117*0Sstevel@tonic-gate * to be prepared to handle this. See the .small_copyOP label for
2118*0Sstevel@tonic-gate * details.
2119*0Sstevel@tonic-gate *
2120*0Sstevel@tonic-gate * Fault handlers are invoked if we reference memory that has no
2121*0Sstevel@tonic-gate * current mapping.  All forms share the same copyio_fault handler.
2122*0Sstevel@tonic-gate * This routine handles fixing up the stack and general housecleaning.
2123*0Sstevel@tonic-gate * Each copy operation has a simple fault handler that is then called
2124*0Sstevel@tonic-gate * to do the work specific to the invidual operation.  The handlers
2125*0Sstevel@tonic-gate * for default_copyOP and copyOP_noerr are found at the end of
2126*0Sstevel@tonic-gate * default_copyout. The handlers for default_xcopyOP are found at the
2127*0Sstevel@tonic-gate * end of xdefault_copyin.
2128*0Sstevel@tonic-gate */
2129*0Sstevel@tonic-gate
2130*0Sstevel@tonic-gate/*
2131*0Sstevel@tonic-gate * Copy kernel data to user space (copyout/xcopyout/xcopyout_little).
2132*0Sstevel@tonic-gate */
2133*0Sstevel@tonic-gate
2134*0Sstevel@tonic-gate#if defined(lint)
2135*0Sstevel@tonic-gate
2136*0Sstevel@tonic-gate/*ARGSUSED*/
2137*0Sstevel@tonic-gateint
2138*0Sstevel@tonic-gatecopyout(const void *kaddr, void *uaddr, size_t count)
2139*0Sstevel@tonic-gate{ return (0); }
2140*0Sstevel@tonic-gate
2141*0Sstevel@tonic-gate#else	/* lint */
2142*0Sstevel@tonic-gate
2143*0Sstevel@tonic-gate/*
2144*0Sstevel@tonic-gate * We save the arguments in the following registers in case of a fault:
2145*0Sstevel@tonic-gate * 	kaddr - %g2
2146*0Sstevel@tonic-gate * 	uaddr - %g3
2147*0Sstevel@tonic-gate * 	count - %g4
2148*0Sstevel@tonic-gate */
2149*0Sstevel@tonic-gate#define	SAVE_SRC	%g2
2150*0Sstevel@tonic-gate#define	SAVE_DST	%g3
2151*0Sstevel@tonic-gate#define	SAVE_COUNT	%g4
2152*0Sstevel@tonic-gate
2153*0Sstevel@tonic-gate#define	REAL_LOFAULT		%g5
2154*0Sstevel@tonic-gate#define	SAVED_LOFAULT		%g6
2155*0Sstevel@tonic-gate
2156*0Sstevel@tonic-gate/*
2157*0Sstevel@tonic-gate * Generic copyio fault handler.  This is the first line of defense when a
2158*0Sstevel@tonic-gate * fault occurs in (x)copyin/(x)copyout.  In order for this to function
2159*0Sstevel@tonic-gate * properly, the value of the 'real' lofault handler should be in REAL_LOFAULT.
2160*0Sstevel@tonic-gate * This allows us to share common code for all the flavors of the copy
2161*0Sstevel@tonic-gate * operations, including the _noerr versions.
2162*0Sstevel@tonic-gate *
2163*0Sstevel@tonic-gate * Note that this function will restore the original input parameters before
2164*0Sstevel@tonic-gate * calling REAL_LOFAULT.  So the real handler can vector to the appropriate
2165*0Sstevel@tonic-gate * member of the t_copyop structure, if needed.
2166*0Sstevel@tonic-gate */
2167*0Sstevel@tonic-gate	ENTRY(copyio_fault)
2168*0Sstevel@tonic-gate	btst	FPUSED_FLAG, SAVED_LOFAULT
2169*0Sstevel@tonic-gate	bz	1f
2170*0Sstevel@tonic-gate	  andn	SAVED_LOFAULT, FPUSED_FLAG, SAVED_LOFAULT
2171*0Sstevel@tonic-gate
2172*0Sstevel@tonic-gate	membar	#Sync
2173*0Sstevel@tonic-gate
2174*0Sstevel@tonic-gate	ld	[%fp + STACK_BIAS - SAVED_GSR_OFFSET], %o2
2175*0Sstevel@tonic-gate	wr	%o2, 0, %gsr		! restore gsr
2176*0Sstevel@tonic-gate
2177*0Sstevel@tonic-gate	ld	[%fp + STACK_BIAS - SAVED_FPRS_OFFSET], %o3
2178*0Sstevel@tonic-gate	btst	FPRS_FEF, %o3
2179*0Sstevel@tonic-gate	bz	4f
2180*0Sstevel@tonic-gate	  nop
2181*0Sstevel@tonic-gate
2182*0Sstevel@tonic-gate	! restore fpregs from stack
2183*0Sstevel@tonic-gate	membar	#Sync
2184*0Sstevel@tonic-gate	add	%fp, STACK_BIAS - 257, %o2
2185*0Sstevel@tonic-gate	and	%o2, -64, %o2
2186*0Sstevel@tonic-gate	ldda	[%o2]ASI_BLK_P, %d0
2187*0Sstevel@tonic-gate	add	%o2, 64, %o2
2188*0Sstevel@tonic-gate	ldda	[%o2]ASI_BLK_P, %d16
2189*0Sstevel@tonic-gate	add	%o2, 64, %o2
2190*0Sstevel@tonic-gate	ldda	[%o2]ASI_BLK_P, %d32
2191*0Sstevel@tonic-gate	add	%o2, 64, %o2
2192*0Sstevel@tonic-gate	ldda	[%o2]ASI_BLK_P, %d48
2193*0Sstevel@tonic-gate	membar	#Sync
2194*0Sstevel@tonic-gate
2195*0Sstevel@tonic-gate	ba,pt	%ncc, 1f
2196*0Sstevel@tonic-gate	  wr	%o3, 0, %fprs		! restore fprs
2197*0Sstevel@tonic-gate
2198*0Sstevel@tonic-gate4:
2199*0Sstevel@tonic-gate	FZERO				! zero all of the fpregs
2200*0Sstevel@tonic-gate	wr	%o3, 0, %fprs		! restore fprs
2201*0Sstevel@tonic-gate
2202*0Sstevel@tonic-gate1:
2203*0Sstevel@tonic-gate
2204*0Sstevel@tonic-gate	restore
2205*0Sstevel@tonic-gate
2206*0Sstevel@tonic-gate	mov	SAVE_SRC, %o0
2207*0Sstevel@tonic-gate	mov	SAVE_DST, %o1
2208*0Sstevel@tonic-gate	jmp	REAL_LOFAULT
2209*0Sstevel@tonic-gate	  mov	SAVE_COUNT, %o2
2210*0Sstevel@tonic-gate	SET_SIZE(copyio_fault)
2211*0Sstevel@tonic-gate
2212*0Sstevel@tonic-gate	ENTRY(copyio_fault_nowindow)
2213*0Sstevel@tonic-gate	membar	#Sync
2214*0Sstevel@tonic-gate	stn	SAVED_LOFAULT, [THREAD_REG + T_LOFAULT]	! restore old t_lofault
2215*0Sstevel@tonic-gate
2216*0Sstevel@tonic-gate	mov	SAVE_SRC, %o0
2217*0Sstevel@tonic-gate	mov	SAVE_DST, %o1
2218*0Sstevel@tonic-gate	jmp	REAL_LOFAULT
2219*0Sstevel@tonic-gate	  mov	SAVE_COUNT, %o2
2220*0Sstevel@tonic-gate	SET_SIZE(copyio_fault_nowindow)
2221*0Sstevel@tonic-gate
2222*0Sstevel@tonic-gate	ENTRY(copyout)
2223*0Sstevel@tonic-gate	sethi	%hi(.copyout_err), REAL_LOFAULT
2224*0Sstevel@tonic-gate	or	REAL_LOFAULT, %lo(.copyout_err), REAL_LOFAULT
2225*0Sstevel@tonic-gate
2226*0Sstevel@tonic-gate.do_copyout:
2227*0Sstevel@tonic-gate	!
2228*0Sstevel@tonic-gate	! Check the length and bail if zero.
2229*0Sstevel@tonic-gate	!
2230*0Sstevel@tonic-gate	tst	%o2
2231*0Sstevel@tonic-gate	bnz,pt	%ncc, 1f
2232*0Sstevel@tonic-gate	  nop
2233*0Sstevel@tonic-gate	retl
2234*0Sstevel@tonic-gate	  clr	%o0
2235*0Sstevel@tonic-gate1:
2236*0Sstevel@tonic-gate	sethi	%hi(copyio_fault), %o4
2237*0Sstevel@tonic-gate	or	%o4, %lo(copyio_fault), %o4
2238*0Sstevel@tonic-gate	sethi	%hi(copyio_fault_nowindow), %o3
2239*0Sstevel@tonic-gate	ldn	[THREAD_REG + T_LOFAULT], SAVED_LOFAULT
2240*0Sstevel@tonic-gate	or	%o3, %lo(copyio_fault_nowindow), %o3
2241*0Sstevel@tonic-gate	membar	#Sync
2242*0Sstevel@tonic-gate	stn	%o3, [THREAD_REG + T_LOFAULT]
2243*0Sstevel@tonic-gate
2244*0Sstevel@tonic-gate	mov	%o0, SAVE_SRC
2245*0Sstevel@tonic-gate	mov	%o1, SAVE_DST
2246*0Sstevel@tonic-gate	mov	%o2, SAVE_COUNT
2247*0Sstevel@tonic-gate
2248*0Sstevel@tonic-gate	!
2249*0Sstevel@tonic-gate	! Check to see if we're more than SMALL_LIMIT (7 bytes).
2250*0Sstevel@tonic-gate	! Run in leaf mode, using the %o regs as our input regs.
2251*0Sstevel@tonic-gate	!
2252*0Sstevel@tonic-gate	subcc	%o2, SMALL_LIMIT, %o3
2253*0Sstevel@tonic-gate	bgu,a,pt %ncc, .dco_ns
2254*0Sstevel@tonic-gate	or	%o0, %o1, %o3
2255*0Sstevel@tonic-gate	!
2256*0Sstevel@tonic-gate	! What was previously ".small_copyout"
2257*0Sstevel@tonic-gate	! Do full differenced copy.
2258*0Sstevel@tonic-gate	!
2259*0Sstevel@tonic-gate.dcobcp:
2260*0Sstevel@tonic-gate	sub	%g0, %o2, %o3		! negate count
2261*0Sstevel@tonic-gate	add	%o0, %o2, %o0		! make %o0 point at the end
2262*0Sstevel@tonic-gate	add	%o1, %o2, %o1		! make %o1 point at the end
2263*0Sstevel@tonic-gate	ba,pt	%ncc, .dcocl
2264*0Sstevel@tonic-gate	ldub	[%o0 + %o3], %o4	! load first byte
2265*0Sstevel@tonic-gate	!
2266*0Sstevel@tonic-gate	! %o0 and %o2 point at the end and remain pointing at the end
2267*0Sstevel@tonic-gate	! of their buffers. We pull things out by adding %o3 (which is
2268*0Sstevel@tonic-gate	! the negation of the length) to the buffer end which gives us
2269*0Sstevel@tonic-gate	! the curent location in the buffers. By incrementing %o3 we walk
2270*0Sstevel@tonic-gate	! through both buffers without having to bump each buffer's
2271*0Sstevel@tonic-gate	! pointer. A very fast 4 instruction loop.
2272*0Sstevel@tonic-gate	!
2273*0Sstevel@tonic-gate	.align 16
2274*0Sstevel@tonic-gate.dcocl:
2275*0Sstevel@tonic-gate	stba	%o4, [%o1 + %o3]ASI_USER
2276*0Sstevel@tonic-gate	inccc	%o3
2277*0Sstevel@tonic-gate	bl,a,pt	%ncc, .dcocl
2278*0Sstevel@tonic-gate	ldub	[%o0 + %o3], %o4
2279*0Sstevel@tonic-gate	!
2280*0Sstevel@tonic-gate	! We're done. Go home.
2281*0Sstevel@tonic-gate	!
2282*0Sstevel@tonic-gate	membar	#Sync
2283*0Sstevel@tonic-gate	stn	SAVED_LOFAULT, [THREAD_REG + T_LOFAULT]
2284*0Sstevel@tonic-gate	retl
2285*0Sstevel@tonic-gate	clr	%o0
2286*0Sstevel@tonic-gate	!
2287*0Sstevel@tonic-gate	! Try aligned copies from here.
2288*0Sstevel@tonic-gate	!
2289*0Sstevel@tonic-gate.dco_ns:
2290*0Sstevel@tonic-gate	! %o0 = kernel addr (to be copied from)
2291*0Sstevel@tonic-gate	! %o1 = user addr (to be copied to)
2292*0Sstevel@tonic-gate	! %o2 = length
2293*0Sstevel@tonic-gate	! %o3 = %o1 | %o2 (used for alignment checking)
2294*0Sstevel@tonic-gate	! %o4 is alternate lo_fault
2295*0Sstevel@tonic-gate	! %o5 is original lo_fault
2296*0Sstevel@tonic-gate	!
2297*0Sstevel@tonic-gate	! See if we're single byte aligned. If we are, check the
2298*0Sstevel@tonic-gate	! limit for single byte copies. If we're smaller or equal,
2299*0Sstevel@tonic-gate	! bounce to the byte for byte copy loop. Otherwise do it in
2300*0Sstevel@tonic-gate	! HW (if enabled).
2301*0Sstevel@tonic-gate	!
2302*0Sstevel@tonic-gate	btst	1, %o3
2303*0Sstevel@tonic-gate	bz,pt	%icc, .dcoh8
2304*0Sstevel@tonic-gate	btst	7, %o3
2305*0Sstevel@tonic-gate	!
2306*0Sstevel@tonic-gate	! Single byte aligned. Do we do it via HW or via
2307*0Sstevel@tonic-gate	! byte for byte? Do a quick no memory reference
2308*0Sstevel@tonic-gate	! check to pick up small copies.
2309*0Sstevel@tonic-gate	!
2310*0Sstevel@tonic-gate	subcc	%o2, VIS_COPY_THRESHOLD, %o3
2311*0Sstevel@tonic-gate	bleu,pt	%ncc, .dcobcp
2312*0Sstevel@tonic-gate	sethi	%hi(hw_copy_limit_1), %o3
2313*0Sstevel@tonic-gate	!
2314*0Sstevel@tonic-gate	! Big enough that we need to check the HW limit for
2315*0Sstevel@tonic-gate	! this size copy.
2316*0Sstevel@tonic-gate	!
2317*0Sstevel@tonic-gate	ld	[%o3 + %lo(hw_copy_limit_1)], %o3
2318*0Sstevel@tonic-gate	!
2319*0Sstevel@tonic-gate	! Is HW copy on? If not, do everything byte for byte.
2320*0Sstevel@tonic-gate	!
2321*0Sstevel@tonic-gate	tst	%o3
2322*0Sstevel@tonic-gate	bz,pn	%icc, .dcobcp
2323*0Sstevel@tonic-gate	subcc	%o3, %o2, %o3
2324*0Sstevel@tonic-gate	!
2325*0Sstevel@tonic-gate	! If we're less than or equal to the single byte copy limit,
2326*0Sstevel@tonic-gate	! bop to the copy loop.
2327*0Sstevel@tonic-gate	!
2328*0Sstevel@tonic-gate	bge,pt	%ncc, .dcobcp
2329*0Sstevel@tonic-gate	nop
2330*0Sstevel@tonic-gate	!
2331*0Sstevel@tonic-gate	! We're big enough and copy is on. Do it with HW.
2332*0Sstevel@tonic-gate	!
2333*0Sstevel@tonic-gate	ba,pt	%ncc, .big_copyout
2334*0Sstevel@tonic-gate	nop
2335*0Sstevel@tonic-gate.dcoh8:
2336*0Sstevel@tonic-gate	!
2337*0Sstevel@tonic-gate	! 8 byte aligned?
2338*0Sstevel@tonic-gate	!
2339*0Sstevel@tonic-gate	bnz,a	%ncc, .dcoh4
2340*0Sstevel@tonic-gate	btst	3, %o3
2341*0Sstevel@tonic-gate	!
2342*0Sstevel@tonic-gate	! See if we're in the "small range".
2343*0Sstevel@tonic-gate	! If so, go off and do the copy.
2344*0Sstevel@tonic-gate	! If not, load the hard limit. %o3 is
2345*0Sstevel@tonic-gate	! available for reuse.
2346*0Sstevel@tonic-gate	!
2347*0Sstevel@tonic-gate	subcc	%o2, VIS_COPY_THRESHOLD, %o3
2348*0Sstevel@tonic-gate	bleu,pt	%ncc, .dcos8
2349*0Sstevel@tonic-gate	sethi	%hi(hw_copy_limit_8), %o3
2350*0Sstevel@tonic-gate	ld	[%o3 + %lo(hw_copy_limit_8)], %o3
2351*0Sstevel@tonic-gate	!
2352*0Sstevel@tonic-gate	! If it's zero, there's no HW bcopy.
2353*0Sstevel@tonic-gate	! Bop off to the aligned copy.
2354*0Sstevel@tonic-gate	!
2355*0Sstevel@tonic-gate	tst	%o3
2356*0Sstevel@tonic-gate	bz,pn	%icc, .dcos8
2357*0Sstevel@tonic-gate	subcc	%o3, %o2, %o3
2358*0Sstevel@tonic-gate	!
2359*0Sstevel@tonic-gate	! We're negative if our size is larger than hw_copy_limit_8.
2360*0Sstevel@tonic-gate	!
2361*0Sstevel@tonic-gate	bge,pt	%ncc, .dcos8
2362*0Sstevel@tonic-gate	nop
2363*0Sstevel@tonic-gate	!
2364*0Sstevel@tonic-gate	! HW assist is on and we're large enough. Do it.
2365*0Sstevel@tonic-gate	!
2366*0Sstevel@tonic-gate	ba,pt	%ncc, .big_copyout
2367*0Sstevel@tonic-gate	nop
2368*0Sstevel@tonic-gate.dcos8:
2369*0Sstevel@tonic-gate	!
2370*0Sstevel@tonic-gate	! Housekeeping for copy loops. Uses same idea as in the byte for
2371*0Sstevel@tonic-gate	! byte copy loop above.
2372*0Sstevel@tonic-gate	!
2373*0Sstevel@tonic-gate	add	%o0, %o2, %o0
2374*0Sstevel@tonic-gate	add	%o1, %o2, %o1
2375*0Sstevel@tonic-gate	sub	%g0, %o2, %o3
2376*0Sstevel@tonic-gate	ba,pt	%ncc, .dodebc
2377*0Sstevel@tonic-gate	srl	%o2, 3, %o2		! Number of 8 byte chunks to copy
2378*0Sstevel@tonic-gate	!
2379*0Sstevel@tonic-gate	! 4 byte aligned?
2380*0Sstevel@tonic-gate	!
2381*0Sstevel@tonic-gate.dcoh4:
2382*0Sstevel@tonic-gate	bnz,pn	%ncc, .dcoh2
2383*0Sstevel@tonic-gate	!
2384*0Sstevel@tonic-gate	! See if we're in the "small range".
2385*0Sstevel@tonic-gate	! If so, go off an do the copy.
2386*0Sstevel@tonic-gate	! If not, load the hard limit. %o3 is
2387*0Sstevel@tonic-gate	! available for reuse.
2388*0Sstevel@tonic-gate	!
2389*0Sstevel@tonic-gate	subcc	%o2, VIS_COPY_THRESHOLD, %o3
2390*0Sstevel@tonic-gate	bleu,pt	%ncc, .dcos4
2391*0Sstevel@tonic-gate	sethi	%hi(hw_copy_limit_4), %o3
2392*0Sstevel@tonic-gate	ld	[%o3 + %lo(hw_copy_limit_4)], %o3
2393*0Sstevel@tonic-gate	!
2394*0Sstevel@tonic-gate	! If it's zero, there's no HW bcopy.
2395*0Sstevel@tonic-gate	! Bop off to the aligned copy.
2396*0Sstevel@tonic-gate	!
2397*0Sstevel@tonic-gate	tst	%o3
2398*0Sstevel@tonic-gate	bz,pn	%icc, .dcos4
2399*0Sstevel@tonic-gate	subcc	%o3, %o2, %o3
2400*0Sstevel@tonic-gate	!
2401*0Sstevel@tonic-gate	! We're negative if our size is larger than hw_copy_limit_4.
2402*0Sstevel@tonic-gate	!
2403*0Sstevel@tonic-gate	bge,pt	%ncc, .dcos4
2404*0Sstevel@tonic-gate	nop
2405*0Sstevel@tonic-gate	!
2406*0Sstevel@tonic-gate	! HW assist is on and we're large enough. Do it.
2407*0Sstevel@tonic-gate	!
2408*0Sstevel@tonic-gate	ba,pt	%ncc, .big_copyout
2409*0Sstevel@tonic-gate	nop
2410*0Sstevel@tonic-gate.dcos4:
2411*0Sstevel@tonic-gate	add	%o0, %o2, %o0
2412*0Sstevel@tonic-gate	add	%o1, %o2, %o1
2413*0Sstevel@tonic-gate	sub	%g0, %o2, %o3
2414*0Sstevel@tonic-gate	ba,pt	%ncc, .dodfbc
2415*0Sstevel@tonic-gate	srl	%o2, 2, %o2		! Number of 4 byte chunks to copy
2416*0Sstevel@tonic-gate	!
2417*0Sstevel@tonic-gate	! We must be 2 byte aligned. Off we go.
2418*0Sstevel@tonic-gate	! The check for small copies was done in the
2419*0Sstevel@tonic-gate	! delay at .dcoh4
2420*0Sstevel@tonic-gate	!
2421*0Sstevel@tonic-gate.dcoh2:
2422*0Sstevel@tonic-gate	ble	%ncc, .dcos2
2423*0Sstevel@tonic-gate	sethi	%hi(hw_copy_limit_2), %o3
2424*0Sstevel@tonic-gate	ld	[%o3 + %lo(hw_copy_limit_2)], %o3
2425*0Sstevel@tonic-gate	tst	%o3
2426*0Sstevel@tonic-gate	bz,pn	%icc, .dcos2
2427*0Sstevel@tonic-gate	subcc	%o3, %o2, %o3
2428*0Sstevel@tonic-gate	bge,pt	%ncc, .dcos2
2429*0Sstevel@tonic-gate	nop
2430*0Sstevel@tonic-gate	!
2431*0Sstevel@tonic-gate	! HW is on and we're big enough. Do it.
2432*0Sstevel@tonic-gate	!
2433*0Sstevel@tonic-gate	ba,pt	%ncc, .big_copyout
2434*0Sstevel@tonic-gate	nop
2435*0Sstevel@tonic-gate.dcos2:
2436*0Sstevel@tonic-gate	add	%o0, %o2, %o0
2437*0Sstevel@tonic-gate	add	%o1, %o2, %o1
2438*0Sstevel@tonic-gate	sub	%g0, %o2, %o3
2439*0Sstevel@tonic-gate	ba,pt	%ncc, .dodtbc
2440*0Sstevel@tonic-gate	srl	%o2, 1, %o2		! Number of 2 byte chunks to copy
2441*0Sstevel@tonic-gate.small_copyout:
2442*0Sstevel@tonic-gate	!
2443*0Sstevel@tonic-gate	! Why are we doing this AGAIN? There are certain conditions in
2444*0Sstevel@tonic-gate	! big_copyout that will cause us to forego the HW assisted copies
2445*0Sstevel@tonic-gate	! and bounce back to a non-HW assisted copy. This dispatches those
2446*0Sstevel@tonic-gate	! copies. Note that we branch around this in the main line code.
2447*0Sstevel@tonic-gate	!
2448*0Sstevel@tonic-gate	! We make no check for limits or HW enablement here. We've
2449*0Sstevel@tonic-gate	! already been told that we're a poster child so just go off
2450*0Sstevel@tonic-gate	! and do it.
2451*0Sstevel@tonic-gate	!
2452*0Sstevel@tonic-gate	or	%o0, %o1, %o3
2453*0Sstevel@tonic-gate	btst	1, %o3
2454*0Sstevel@tonic-gate	bnz	%icc, .dcobcp		! Most likely
2455*0Sstevel@tonic-gate	btst	7, %o3
2456*0Sstevel@tonic-gate	bz	%icc, .dcos8
2457*0Sstevel@tonic-gate	btst	3, %o3
2458*0Sstevel@tonic-gate	bz	%icc, .dcos4
2459*0Sstevel@tonic-gate	nop
2460*0Sstevel@tonic-gate	ba,pt	%ncc, .dcos2
2461*0Sstevel@tonic-gate	nop
2462*0Sstevel@tonic-gate	.align 32
2463*0Sstevel@tonic-gate.dodebc:
2464*0Sstevel@tonic-gate	ldx	[%o0 + %o3], %o4
2465*0Sstevel@tonic-gate	deccc	%o2
2466*0Sstevel@tonic-gate	stxa	%o4, [%o1 + %o3]ASI_USER
2467*0Sstevel@tonic-gate	bg,pt	%ncc, .dodebc
2468*0Sstevel@tonic-gate	addcc	%o3, 8, %o3
2469*0Sstevel@tonic-gate	!
2470*0Sstevel@tonic-gate	! End of copy loop. Check to see if we're done. Most
2471*0Sstevel@tonic-gate	! eight byte aligned copies end here.
2472*0Sstevel@tonic-gate	!
2473*0Sstevel@tonic-gate	bz,pt	%ncc, .dcofh
2474*0Sstevel@tonic-gate	nop
2475*0Sstevel@tonic-gate	!
2476*0Sstevel@tonic-gate	! Something is left - do it byte for byte.
2477*0Sstevel@tonic-gate	!
2478*0Sstevel@tonic-gate	ba,pt	%ncc, .dcocl
2479*0Sstevel@tonic-gate	ldub	[%o0 + %o3], %o4	! load next byte
2480*0Sstevel@tonic-gate	!
2481*0Sstevel@tonic-gate	! Four byte copy loop. %o2 is the number of 4 byte chunks to copy.
2482*0Sstevel@tonic-gate	!
2483*0Sstevel@tonic-gate	.align 32
2484*0Sstevel@tonic-gate.dodfbc:
2485*0Sstevel@tonic-gate	lduw	[%o0 + %o3], %o4
2486*0Sstevel@tonic-gate	deccc	%o2
2487*0Sstevel@tonic-gate	sta	%o4, [%o1 + %o3]ASI_USER
2488*0Sstevel@tonic-gate	bg,pt	%ncc, .dodfbc
2489*0Sstevel@tonic-gate	addcc	%o3, 4, %o3
2490*0Sstevel@tonic-gate	!
2491*0Sstevel@tonic-gate	! End of copy loop. Check to see if we're done. Most
2492*0Sstevel@tonic-gate	! four byte aligned copies end here.
2493*0Sstevel@tonic-gate	!
2494*0Sstevel@tonic-gate	bz,pt	%ncc, .dcofh
2495*0Sstevel@tonic-gate	nop
2496*0Sstevel@tonic-gate	!
2497*0Sstevel@tonic-gate	! Something is left. Do it byte for byte.
2498*0Sstevel@tonic-gate	!
2499*0Sstevel@tonic-gate	ba,pt	%ncc, .dcocl
2500*0Sstevel@tonic-gate	ldub	[%o0 + %o3], %o4	! load next byte
2501*0Sstevel@tonic-gate	!
2502*0Sstevel@tonic-gate	! two byte aligned copy loop. %o2 is the number of 2 byte chunks to
2503*0Sstevel@tonic-gate	! copy.
2504*0Sstevel@tonic-gate	!
2505*0Sstevel@tonic-gate	.align 32
2506*0Sstevel@tonic-gate.dodtbc:
2507*0Sstevel@tonic-gate	lduh	[%o0 + %o3], %o4
2508*0Sstevel@tonic-gate	deccc	%o2
2509*0Sstevel@tonic-gate	stha	%o4, [%o1 + %o3]ASI_USER
2510*0Sstevel@tonic-gate	bg,pt	%ncc, .dodtbc
2511*0Sstevel@tonic-gate	addcc	%o3, 2, %o3
2512*0Sstevel@tonic-gate	!
2513*0Sstevel@tonic-gate	! End of copy loop. Anything left?
2514*0Sstevel@tonic-gate	!
2515*0Sstevel@tonic-gate	bz,pt	%ncc, .dcofh
2516*0Sstevel@tonic-gate	nop
2517*0Sstevel@tonic-gate	!
2518*0Sstevel@tonic-gate	! Deal with the last byte
2519*0Sstevel@tonic-gate	!
2520*0Sstevel@tonic-gate	ldub	[%o0 + %o3], %o4
2521*0Sstevel@tonic-gate	stba	%o4, [%o1 + %o3]ASI_USER
2522*0Sstevel@tonic-gate.dcofh:
2523*0Sstevel@tonic-gate	membar	#Sync
2524*0Sstevel@tonic-gate	stn	SAVED_LOFAULT, [THREAD_REG + T_LOFAULT]	! restore old t_lofault
2525*0Sstevel@tonic-gate	retl
2526*0Sstevel@tonic-gate	clr	%o0
2527*0Sstevel@tonic-gate
2528*0Sstevel@tonic-gate.big_copyout:
2529*0Sstevel@tonic-gate	!
2530*0Sstevel@tonic-gate	! Are we using the FP registers?
2531*0Sstevel@tonic-gate	!
2532*0Sstevel@tonic-gate	rd	%fprs, %o3			! check for unused fp
2533*0Sstevel@tonic-gate	btst	FPRS_FEF, %o3
2534*0Sstevel@tonic-gate	bnz	%icc, .copyout_fpregs_inuse
2535*0Sstevel@tonic-gate	nop
2536*0Sstevel@tonic-gate	!
2537*0Sstevel@tonic-gate	! We're going to go off and do a block copy.
2538*0Sstevel@tonic-gate	! Switch fault hendlers and grab a window. We
2539*0Sstevel@tonic-gate	! don't do a membar #Sync since we've done only
2540*0Sstevel@tonic-gate	! kernel data to this point.
2541*0Sstevel@tonic-gate	!
2542*0Sstevel@tonic-gate	stn	%o4, [THREAD_REG + T_LOFAULT]
2543*0Sstevel@tonic-gate	save	%sp, -SA(MINFRAME + HWCOPYFRAMESIZE), %sp
2544*0Sstevel@tonic-gate	!
2545*0Sstevel@tonic-gate	! %o3 is now %i3. Save original %fprs.
2546*0Sstevel@tonic-gate	!
2547*0Sstevel@tonic-gate	st	%i3, [%fp + STACK_BIAS - SAVED_FPRS_OFFSET]
2548*0Sstevel@tonic-gate	ba,pt	%ncc, .do_block_copyout		! Not in use. Go off and do it.
2549*0Sstevel@tonic-gate	wr	%g0, FPRS_FEF, %fprs		! clear %fprs
2550*0Sstevel@tonic-gate	!
2551*0Sstevel@tonic-gate.copyout_fpregs_inuse:
2552*0Sstevel@tonic-gate	!
2553*0Sstevel@tonic-gate	! We're here if the FP regs are in use. Need to see if the request
2554*0Sstevel@tonic-gate	! exceeds our suddenly larger minimum.
2555*0Sstevel@tonic-gate	!
2556*0Sstevel@tonic-gate	cmp	%i2, VIS_COPY_THRESHOLD+(64*4) ! for large counts (larger
2557*0Sstevel@tonic-gate	bl	%ncc, .small_copyout
2558*0Sstevel@tonic-gate	  nop
2559*0Sstevel@tonic-gate	!
2560*0Sstevel@tonic-gate	! We're going to go off and do a block copy.
2561*0Sstevel@tonic-gate	! Change to the heavy duty fault handler and grab a window first.
2562*0Sstevel@tonic-gate	!
2563*0Sstevel@tonic-gate	stn	%o4, [THREAD_REG + T_LOFAULT]
2564*0Sstevel@tonic-gate	save	%sp, -SA(MINFRAME + HWCOPYFRAMESIZE), %sp
2565*0Sstevel@tonic-gate	st	%i3, [%fp + STACK_BIAS - SAVED_FPRS_OFFSET]
2566*0Sstevel@tonic-gate	!
2567*0Sstevel@tonic-gate	! save in-use fpregs on stack
2568*0Sstevel@tonic-gate	!
2569*0Sstevel@tonic-gate	wr	%g0, FPRS_FEF, %fprs
2570*0Sstevel@tonic-gate	membar	#Sync
2571*0Sstevel@tonic-gate	add	%fp, STACK_BIAS - 257, %o2
2572*0Sstevel@tonic-gate	and	%o2, -64, %o2
2573*0Sstevel@tonic-gate	stda	%d0, [%o2]ASI_BLK_P
2574*0Sstevel@tonic-gate	add	%o2, 64, %o2
2575*0Sstevel@tonic-gate	stda	%d16, [%o2]ASI_BLK_P
2576*0Sstevel@tonic-gate	add	%o2, 64, %o2
2577*0Sstevel@tonic-gate	stda	%d32, [%o2]ASI_BLK_P
2578*0Sstevel@tonic-gate	add	%o2, 64, %o2
2579*0Sstevel@tonic-gate	stda	%d48, [%o2]ASI_BLK_P
2580*0Sstevel@tonic-gate	membar	#Sync
2581*0Sstevel@tonic-gate
2582*0Sstevel@tonic-gate.do_block_copyout:
2583*0Sstevel@tonic-gate	membar	#StoreStore|#StoreLoad|#LoadStore
2584*0Sstevel@tonic-gate
2585*0Sstevel@tonic-gate	rd	%gsr, %o2
2586*0Sstevel@tonic-gate	st	%o2, [%fp + STACK_BIAS - SAVED_GSR_OFFSET]	! save gsr
2587*0Sstevel@tonic-gate
2588*0Sstevel@tonic-gate	! Set the lower bit in the saved t_lofault to indicate
2589*0Sstevel@tonic-gate	! that we need to clear the %fprs register on the way
2590*0Sstevel@tonic-gate	! out
2591*0Sstevel@tonic-gate	or	SAVED_LOFAULT, FPUSED_FLAG, SAVED_LOFAULT
2592*0Sstevel@tonic-gate
2593*0Sstevel@tonic-gate	! Swap src/dst since the code below is memcpy code
2594*0Sstevel@tonic-gate	! and memcpy/bcopy have different calling sequences
2595*0Sstevel@tonic-gate	mov	%i1, %i5
2596*0Sstevel@tonic-gate	mov	%i0, %i1
2597*0Sstevel@tonic-gate	mov	%i5, %i0
2598*0Sstevel@tonic-gate
2599*0Sstevel@tonic-gate!!! This code is nearly identical to the version in the sun4u
2600*0Sstevel@tonic-gate!!! libc_psr.  Most bugfixes made to that file should be
2601*0Sstevel@tonic-gate!!! merged into this routine.
2602*0Sstevel@tonic-gate
2603*0Sstevel@tonic-gate	andcc	%i0, 7, %o3
2604*0Sstevel@tonic-gate	bz	%ncc, copyout_blkcpy
2605*0Sstevel@tonic-gate	sub	%o3, 8, %o3
2606*0Sstevel@tonic-gate	neg	%o3
2607*0Sstevel@tonic-gate	sub	%i2, %o3, %i2
2608*0Sstevel@tonic-gate
2609*0Sstevel@tonic-gate	! Align Destination on double-word boundary
2610*0Sstevel@tonic-gate
2611*0Sstevel@tonic-gate2:	ldub	[%i1], %o4
2612*0Sstevel@tonic-gate	inc	%i1
2613*0Sstevel@tonic-gate	stba	%o4, [%i0]ASI_USER
2614*0Sstevel@tonic-gate	deccc	%o3
2615*0Sstevel@tonic-gate	bgu	%ncc, 2b
2616*0Sstevel@tonic-gate	  inc	%i0
2617*0Sstevel@tonic-gatecopyout_blkcpy:
2618*0Sstevel@tonic-gate	andcc	%i0, 63, %i3
2619*0Sstevel@tonic-gate	bz,pn	%ncc, copyout_blalign	! now block aligned
2620*0Sstevel@tonic-gate	sub	%i3, 64, %i3
2621*0Sstevel@tonic-gate	neg	%i3			! bytes till block aligned
2622*0Sstevel@tonic-gate	sub	%i2, %i3, %i2		! update %i2 with new count
2623*0Sstevel@tonic-gate
2624*0Sstevel@tonic-gate	! Copy %i3 bytes till dst is block (64 byte) aligned. use
2625*0Sstevel@tonic-gate	! double word copies.
2626*0Sstevel@tonic-gate
2627*0Sstevel@tonic-gate	alignaddr %i1, %g0, %g1
2628*0Sstevel@tonic-gate	ldd	[%g1], %d0
2629*0Sstevel@tonic-gate	add	%g1, 8, %g1
2630*0Sstevel@tonic-gate6:
2631*0Sstevel@tonic-gate	ldd	[%g1], %d2
2632*0Sstevel@tonic-gate	add	%g1, 8, %g1
2633*0Sstevel@tonic-gate	subcc	%i3, 8, %i3
2634*0Sstevel@tonic-gate	faligndata %d0, %d2, %d8
2635*0Sstevel@tonic-gate	stda	 %d8, [%i0]ASI_USER
2636*0Sstevel@tonic-gate	add	%i1, 8, %i1
2637*0Sstevel@tonic-gate	bz,pn	%ncc, copyout_blalign
2638*0Sstevel@tonic-gate	add	%i0, 8, %i0
2639*0Sstevel@tonic-gate	ldd	[%g1], %d0
2640*0Sstevel@tonic-gate	add	%g1, 8, %g1
2641*0Sstevel@tonic-gate	subcc	%i3, 8, %i3
2642*0Sstevel@tonic-gate	faligndata %d2, %d0, %d8
2643*0Sstevel@tonic-gate	stda	 %d8, [%i0]ASI_USER
2644*0Sstevel@tonic-gate	add	%i1, 8, %i1
2645*0Sstevel@tonic-gate	bgu,pn	%ncc, 6b
2646*0Sstevel@tonic-gate	add	%i0, 8, %i0
2647*0Sstevel@tonic-gate
2648*0Sstevel@tonic-gatecopyout_blalign:
2649*0Sstevel@tonic-gate	membar	#StoreLoad
2650*0Sstevel@tonic-gate	! %i2 = total length
2651*0Sstevel@tonic-gate	! %i3 = blocks	(length - 64) / 64
2652*0Sstevel@tonic-gate	! %i4 = doubles remaining  (length - blocks)
2653*0Sstevel@tonic-gate	sub	%i2, 64, %i3
2654*0Sstevel@tonic-gate	andn	%i3, 63, %i3
2655*0Sstevel@tonic-gate	sub	%i2, %i3, %i4
2656*0Sstevel@tonic-gate	andn	%i4, 7, %i4
2657*0Sstevel@tonic-gate	sub	%i4, 16, %i4
2658*0Sstevel@tonic-gate	sub	%i2, %i4, %i2
2659*0Sstevel@tonic-gate	sub	%i2, %i3, %i2
2660*0Sstevel@tonic-gate
2661*0Sstevel@tonic-gate	andn	%i1, 0x3f, %l7		! blk aligned address
2662*0Sstevel@tonic-gate	alignaddr %i1, %g0, %g0		! gen %gsr
2663*0Sstevel@tonic-gate
2664*0Sstevel@tonic-gate	srl	%i1, 3, %l5		! bits 3,4,5 are now least sig in  %l5
2665*0Sstevel@tonic-gate	andcc	%l5, 7, %i5		! mask everything except bits 1,2 3
2666*0Sstevel@tonic-gate	add	%i1, %i4, %i1
2667*0Sstevel@tonic-gate	add	%i1, %i3, %i1
2668*0Sstevel@tonic-gate
2669*0Sstevel@tonic-gate	ldda	[%l7]ASI_BLK_P, %d0
2670*0Sstevel@tonic-gate	add	%l7, 64, %l7
2671*0Sstevel@tonic-gate	ldda	[%l7]ASI_BLK_P, %d16
2672*0Sstevel@tonic-gate	add	%l7, 64, %l7
2673*0Sstevel@tonic-gate	ldda	[%l7]ASI_BLK_P, %d32
2674*0Sstevel@tonic-gate	add	%l7, 64, %l7
2675*0Sstevel@tonic-gate	sub	%i3, 128, %i3
2676*0Sstevel@tonic-gate
2677*0Sstevel@tonic-gate	! switch statement to get us to the right 8 byte blk within a
2678*0Sstevel@tonic-gate	! 64 byte block
2679*0Sstevel@tonic-gate
2680*0Sstevel@tonic-gate	cmp	 %i5, 4
2681*0Sstevel@tonic-gate	bgeu,a	 copyout_hlf
2682*0Sstevel@tonic-gate	cmp	 %i5, 6
2683*0Sstevel@tonic-gate	cmp	 %i5, 2
2684*0Sstevel@tonic-gate	bgeu,a	 copyout_sqtr
2685*0Sstevel@tonic-gate	nop
2686*0Sstevel@tonic-gate	cmp	 %i5, 1
2687*0Sstevel@tonic-gate	be,a	 copyout_seg1
2688*0Sstevel@tonic-gate	nop
2689*0Sstevel@tonic-gate	ba,pt	 %ncc, copyout_seg0
2690*0Sstevel@tonic-gate	nop
2691*0Sstevel@tonic-gatecopyout_sqtr:
2692*0Sstevel@tonic-gate	be,a	 copyout_seg2
2693*0Sstevel@tonic-gate	nop
2694*0Sstevel@tonic-gate	ba,pt	 %ncc, copyout_seg3
2695*0Sstevel@tonic-gate	nop
2696*0Sstevel@tonic-gate
2697*0Sstevel@tonic-gatecopyout_hlf:
2698*0Sstevel@tonic-gate	bgeu,a	 copyout_fqtr
2699*0Sstevel@tonic-gate	nop
2700*0Sstevel@tonic-gate	cmp	 %i5, 5
2701*0Sstevel@tonic-gate	be,a	 copyout_seg5
2702*0Sstevel@tonic-gate	nop
2703*0Sstevel@tonic-gate	ba,pt	 %ncc, copyout_seg4
2704*0Sstevel@tonic-gate	nop
2705*0Sstevel@tonic-gatecopyout_fqtr:
2706*0Sstevel@tonic-gate	be,a	 copyout_seg6
2707*0Sstevel@tonic-gate	nop
2708*0Sstevel@tonic-gate	ba,pt	 %ncc, copyout_seg7
2709*0Sstevel@tonic-gate	nop
2710*0Sstevel@tonic-gate
2711*0Sstevel@tonic-gatecopyout_seg0:
2712*0Sstevel@tonic-gate	! 1st chunk - %d0 low, %d16 high, %d32 pre, %d48 dst
2713*0Sstevel@tonic-gate	FALIGN_D0
2714*0Sstevel@tonic-gate	ldda	[%l7]ASI_BLK_P, %d0
2715*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_AIUS
2716*0Sstevel@tonic-gate	add	%l7, 64, %l7
2717*0Sstevel@tonic-gate	subcc	%i3, 64, %i3
2718*0Sstevel@tonic-gate	bz,pn	%ncc, 0f
2719*0Sstevel@tonic-gate	add	%i0, 64, %i0
2720*0Sstevel@tonic-gate	! 2nd chunk -  %d0 pre, %d16 low, %d32 high, %d48 dst
2721*0Sstevel@tonic-gate	FALIGN_D16
2722*0Sstevel@tonic-gate	ldda	[%l7]ASI_BLK_P, %d16
2723*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_AIUS
2724*0Sstevel@tonic-gate	add	%l7, 64, %l7
2725*0Sstevel@tonic-gate	subcc	%i3, 64, %i3
2726*0Sstevel@tonic-gate	bz,pn	%ncc, 1f
2727*0Sstevel@tonic-gate	add	%i0, 64, %i0
2728*0Sstevel@tonic-gate	! 3rd chunk -  %d0 high, %d16 pre, %d32 low, %d48 dst
2729*0Sstevel@tonic-gate	FALIGN_D32
2730*0Sstevel@tonic-gate	ldda	[%l7]ASI_BLK_P, %d32
2731*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_AIUS
2732*0Sstevel@tonic-gate	add	%l7, 64, %l7
2733*0Sstevel@tonic-gate	subcc	%i3, 64, %i3
2734*0Sstevel@tonic-gate	bz,pn	%ncc, 2f
2735*0Sstevel@tonic-gate	add	%i0, 64, %i0
2736*0Sstevel@tonic-gate	ba,a,pt	%ncc, copyout_seg0
2737*0Sstevel@tonic-gate
2738*0Sstevel@tonic-gate0:
2739*0Sstevel@tonic-gate	FALIGN_D16
2740*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_AIUS
2741*0Sstevel@tonic-gate	add	%i0, 64, %i0
2742*0Sstevel@tonic-gate	membar	#Sync
2743*0Sstevel@tonic-gate	FALIGN_D32
2744*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_AIUS
2745*0Sstevel@tonic-gate	ba,pt	%ncc, copyout_blkd0
2746*0Sstevel@tonic-gate	add	%i0, 64, %i0
2747*0Sstevel@tonic-gate
2748*0Sstevel@tonic-gate1:
2749*0Sstevel@tonic-gate	FALIGN_D32
2750*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_AIUS
2751*0Sstevel@tonic-gate	add	%i0, 64, %i0
2752*0Sstevel@tonic-gate	membar	#Sync
2753*0Sstevel@tonic-gate	FALIGN_D0
2754*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_AIUS
2755*0Sstevel@tonic-gate	ba,pt	%ncc, copyout_blkd16
2756*0Sstevel@tonic-gate	add	%i0, 64, %i0
2757*0Sstevel@tonic-gate
2758*0Sstevel@tonic-gate2:
2759*0Sstevel@tonic-gate	FALIGN_D0
2760*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_AIUS
2761*0Sstevel@tonic-gate	add	%i0, 64, %i0
2762*0Sstevel@tonic-gate	membar	#Sync
2763*0Sstevel@tonic-gate	FALIGN_D16
2764*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_AIUS
2765*0Sstevel@tonic-gate	ba,pt	%ncc, copyout_blkd32
2766*0Sstevel@tonic-gate	add	%i0, 64, %i0
2767*0Sstevel@tonic-gate
2768*0Sstevel@tonic-gatecopyout_seg1:
2769*0Sstevel@tonic-gate	! 1st chunk - %d0 low, %d16 high, %d32 pre, %d48 dst
2770*0Sstevel@tonic-gate	FALIGN_D2
2771*0Sstevel@tonic-gate	ldda	[%l7]ASI_BLK_P, %d0
2772*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_AIUS
2773*0Sstevel@tonic-gate	add	%l7, 64, %l7
2774*0Sstevel@tonic-gate	subcc	%i3, 64, %i3
2775*0Sstevel@tonic-gate	bz,pn	%ncc, 0f
2776*0Sstevel@tonic-gate	add	%i0, 64, %i0
2777*0Sstevel@tonic-gate	! 2nd chunk -  %d0 pre, %d16 low, %d32 high, %d48 dst
2778*0Sstevel@tonic-gate	FALIGN_D18
2779*0Sstevel@tonic-gate	ldda	[%l7]ASI_BLK_P, %d16
2780*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_AIUS
2781*0Sstevel@tonic-gate	add	%l7, 64, %l7
2782*0Sstevel@tonic-gate	subcc	%i3, 64, %i3
2783*0Sstevel@tonic-gate	bz,pn	%ncc, 1f
2784*0Sstevel@tonic-gate	add	%i0, 64, %i0
2785*0Sstevel@tonic-gate	! 3rd chunk -  %d0 high, %d16 pre, %d32 low, %d48 dst
2786*0Sstevel@tonic-gate	FALIGN_D34
2787*0Sstevel@tonic-gate	ldda	[%l7]ASI_BLK_P, %d32
2788*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_AIUS
2789*0Sstevel@tonic-gate	add	%l7, 64, %l7
2790*0Sstevel@tonic-gate	subcc	%i3, 64, %i3
2791*0Sstevel@tonic-gate	bz,pn	%ncc, 2f
2792*0Sstevel@tonic-gate	add	%i0, 64, %i0
2793*0Sstevel@tonic-gate	ba,a,pt	%ncc, copyout_seg1
2794*0Sstevel@tonic-gate0:
2795*0Sstevel@tonic-gate	FALIGN_D18
2796*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_AIUS
2797*0Sstevel@tonic-gate	add	%i0, 64, %i0
2798*0Sstevel@tonic-gate	membar	#Sync
2799*0Sstevel@tonic-gate	FALIGN_D34
2800*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_AIUS
2801*0Sstevel@tonic-gate	ba,pt	%ncc, copyout_blkd2
2802*0Sstevel@tonic-gate	add	%i0, 64, %i0
2803*0Sstevel@tonic-gate
2804*0Sstevel@tonic-gate1:
2805*0Sstevel@tonic-gate	FALIGN_D34
2806*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_AIUS
2807*0Sstevel@tonic-gate	add	%i0, 64, %i0
2808*0Sstevel@tonic-gate	membar	#Sync
2809*0Sstevel@tonic-gate	FALIGN_D2
2810*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_AIUS
2811*0Sstevel@tonic-gate	ba,pt	%ncc, copyout_blkd18
2812*0Sstevel@tonic-gate	add	%i0, 64, %i0
2813*0Sstevel@tonic-gate
2814*0Sstevel@tonic-gate2:
2815*0Sstevel@tonic-gate	FALIGN_D2
2816*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_AIUS
2817*0Sstevel@tonic-gate	add	%i0, 64, %i0
2818*0Sstevel@tonic-gate	membar	#Sync
2819*0Sstevel@tonic-gate	FALIGN_D18
2820*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_AIUS
2821*0Sstevel@tonic-gate	ba,pt	%ncc, copyout_blkd34
2822*0Sstevel@tonic-gate	add	%i0, 64, %i0
2823*0Sstevel@tonic-gate
2824*0Sstevel@tonic-gatecopyout_seg2:
2825*0Sstevel@tonic-gate	! 1st chunk - %d0 low, %d16 high, %d32 pre, %d48 dst
2826*0Sstevel@tonic-gate	FALIGN_D4
2827*0Sstevel@tonic-gate	ldda	[%l7]ASI_BLK_P, %d0
2828*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_AIUS
2829*0Sstevel@tonic-gate	add	%l7, 64, %l7
2830*0Sstevel@tonic-gate	subcc	%i3, 64, %i3
2831*0Sstevel@tonic-gate	bz,pn	%ncc, 0f
2832*0Sstevel@tonic-gate	add	%i0, 64, %i0
2833*0Sstevel@tonic-gate	! 2nd chunk -  %d0 pre, %d16 low, %d32 high, %d48 dst
2834*0Sstevel@tonic-gate	FALIGN_D20
2835*0Sstevel@tonic-gate	ldda	[%l7]ASI_BLK_P, %d16
2836*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_AIUS
2837*0Sstevel@tonic-gate	add	%l7, 64, %l7
2838*0Sstevel@tonic-gate	subcc	%i3, 64, %i3
2839*0Sstevel@tonic-gate	bz,pn	%ncc, 1f
2840*0Sstevel@tonic-gate	add	%i0, 64, %i0
2841*0Sstevel@tonic-gate	! 3rd chunk -  %d0 high, %d16 pre, %d32 low, %d48 dst
2842*0Sstevel@tonic-gate	FALIGN_D36
2843*0Sstevel@tonic-gate	ldda	[%l7]ASI_BLK_P, %d32
2844*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_AIUS
2845*0Sstevel@tonic-gate	add	%l7, 64, %l7
2846*0Sstevel@tonic-gate	subcc	%i3, 64, %i3
2847*0Sstevel@tonic-gate	bz,pn	%ncc, 2f
2848*0Sstevel@tonic-gate	add	%i0, 64, %i0
2849*0Sstevel@tonic-gate	ba,a,pt	%ncc, copyout_seg2
2850*0Sstevel@tonic-gate
2851*0Sstevel@tonic-gate0:
2852*0Sstevel@tonic-gate	FALIGN_D20
2853*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_AIUS
2854*0Sstevel@tonic-gate	add	%i0, 64, %i0
2855*0Sstevel@tonic-gate	membar	#Sync
2856*0Sstevel@tonic-gate	FALIGN_D36
2857*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_AIUS
2858*0Sstevel@tonic-gate	ba,pt	%ncc, copyout_blkd4
2859*0Sstevel@tonic-gate	add	%i0, 64, %i0
2860*0Sstevel@tonic-gate
2861*0Sstevel@tonic-gate1:
2862*0Sstevel@tonic-gate	FALIGN_D36
2863*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_AIUS
2864*0Sstevel@tonic-gate	add	%i0, 64, %i0
2865*0Sstevel@tonic-gate	membar	#Sync
2866*0Sstevel@tonic-gate	FALIGN_D4
2867*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_AIUS
2868*0Sstevel@tonic-gate	ba,pt	%ncc, copyout_blkd20
2869*0Sstevel@tonic-gate	add	%i0, 64, %i0
2870*0Sstevel@tonic-gate
2871*0Sstevel@tonic-gate2:
2872*0Sstevel@tonic-gate	FALIGN_D4
2873*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_AIUS
2874*0Sstevel@tonic-gate	add	%i0, 64, %i0
2875*0Sstevel@tonic-gate	membar	#Sync
2876*0Sstevel@tonic-gate	FALIGN_D20
2877*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_AIUS
2878*0Sstevel@tonic-gate	ba,pt	%ncc, copyout_blkd36
2879*0Sstevel@tonic-gate	add	%i0, 64, %i0
2880*0Sstevel@tonic-gate
2881*0Sstevel@tonic-gatecopyout_seg3:
2882*0Sstevel@tonic-gate	! 1st chunk - %d0 low, %d16 high, %d32 pre, %d48 dst
2883*0Sstevel@tonic-gate	FALIGN_D6
2884*0Sstevel@tonic-gate	ldda	[%l7]ASI_BLK_P, %d0
2885*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_AIUS
2886*0Sstevel@tonic-gate	add	%l7, 64, %l7
2887*0Sstevel@tonic-gate	subcc	%i3, 64, %i3
2888*0Sstevel@tonic-gate	bz,pn	%ncc, 0f
2889*0Sstevel@tonic-gate	add	%i0, 64, %i0
2890*0Sstevel@tonic-gate	! 2nd chunk -  %d0 pre, %d16 low, %d32 high, %d48 dst
2891*0Sstevel@tonic-gate	FALIGN_D22
2892*0Sstevel@tonic-gate	ldda	[%l7]ASI_BLK_P, %d16
2893*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_AIUS
2894*0Sstevel@tonic-gate	add	%l7, 64, %l7
2895*0Sstevel@tonic-gate	subcc	%i3, 64, %i3
2896*0Sstevel@tonic-gate	bz,pn	%ncc, 1f
2897*0Sstevel@tonic-gate	add	%i0, 64, %i0
2898*0Sstevel@tonic-gate	! 3rd chunk -  %d0 high, %d16 pre, %d32 low, %d48 dst
2899*0Sstevel@tonic-gate	FALIGN_D38
2900*0Sstevel@tonic-gate	ldda	[%l7]ASI_BLK_P, %d32
2901*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_AIUS
2902*0Sstevel@tonic-gate	add	%l7, 64, %l7
2903*0Sstevel@tonic-gate	subcc	%i3, 64, %i3
2904*0Sstevel@tonic-gate	bz,pn	%ncc, 2f
2905*0Sstevel@tonic-gate	add	%i0, 64, %i0
2906*0Sstevel@tonic-gate	ba,a,pt	%ncc, copyout_seg3
2907*0Sstevel@tonic-gate
2908*0Sstevel@tonic-gate0:
2909*0Sstevel@tonic-gate	FALIGN_D22
2910*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_AIUS
2911*0Sstevel@tonic-gate	add	%i0, 64, %i0
2912*0Sstevel@tonic-gate	membar	#Sync
2913*0Sstevel@tonic-gate	FALIGN_D38
2914*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_AIUS
2915*0Sstevel@tonic-gate	ba,pt	%ncc, copyout_blkd6
2916*0Sstevel@tonic-gate	add	%i0, 64, %i0
2917*0Sstevel@tonic-gate
2918*0Sstevel@tonic-gate1:
2919*0Sstevel@tonic-gate	FALIGN_D38
2920*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_AIUS
2921*0Sstevel@tonic-gate	add	%i0, 64, %i0
2922*0Sstevel@tonic-gate	membar	#Sync
2923*0Sstevel@tonic-gate	FALIGN_D6
2924*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_AIUS
2925*0Sstevel@tonic-gate	ba,pt	%ncc, copyout_blkd22
2926*0Sstevel@tonic-gate	add	%i0, 64, %i0
2927*0Sstevel@tonic-gate
2928*0Sstevel@tonic-gate2:
2929*0Sstevel@tonic-gate	FALIGN_D6
2930*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_AIUS
2931*0Sstevel@tonic-gate	add	%i0, 64, %i0
2932*0Sstevel@tonic-gate	membar	#Sync
2933*0Sstevel@tonic-gate	FALIGN_D22
2934*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_AIUS
2935*0Sstevel@tonic-gate	ba,pt	%ncc, copyout_blkd38
2936*0Sstevel@tonic-gate	add	%i0, 64, %i0
2937*0Sstevel@tonic-gate
2938*0Sstevel@tonic-gatecopyout_seg4:
2939*0Sstevel@tonic-gate	! 1st chunk - %d0 low, %d16 high, %d32 pre, %d48 dst
2940*0Sstevel@tonic-gate	FALIGN_D8
2941*0Sstevel@tonic-gate	ldda	[%l7]ASI_BLK_P, %d0
2942*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_AIUS
2943*0Sstevel@tonic-gate	add	%l7, 64, %l7
2944*0Sstevel@tonic-gate	subcc	%i3, 64, %i3
2945*0Sstevel@tonic-gate	bz,pn	%ncc, 0f
2946*0Sstevel@tonic-gate	add	%i0, 64, %i0
2947*0Sstevel@tonic-gate	! 2nd chunk -  %d0 pre, %d16 low, %d32 high, %d48 dst
2948*0Sstevel@tonic-gate	FALIGN_D24
2949*0Sstevel@tonic-gate	ldda	[%l7]ASI_BLK_P, %d16
2950*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_AIUS
2951*0Sstevel@tonic-gate	add	%l7, 64, %l7
2952*0Sstevel@tonic-gate	subcc	%i3, 64, %i3
2953*0Sstevel@tonic-gate	bz,pn	%ncc, 1f
2954*0Sstevel@tonic-gate	add	%i0, 64, %i0
2955*0Sstevel@tonic-gate	! 3rd chunk -  %d0 high, %d16 pre, %d32 low, %d48 dst
2956*0Sstevel@tonic-gate	FALIGN_D40
2957*0Sstevel@tonic-gate	ldda	[%l7]ASI_BLK_P, %d32
2958*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_AIUS
2959*0Sstevel@tonic-gate	add	%l7, 64, %l7
2960*0Sstevel@tonic-gate	subcc	%i3, 64, %i3
2961*0Sstevel@tonic-gate	bz,pn	%ncc, 2f
2962*0Sstevel@tonic-gate	add	%i0, 64, %i0
2963*0Sstevel@tonic-gate	ba,a,pt	%ncc, copyout_seg4
2964*0Sstevel@tonic-gate
2965*0Sstevel@tonic-gate0:
2966*0Sstevel@tonic-gate	FALIGN_D24
2967*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_AIUS
2968*0Sstevel@tonic-gate	add	%i0, 64, %i0
2969*0Sstevel@tonic-gate	membar	#Sync
2970*0Sstevel@tonic-gate	FALIGN_D40
2971*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_AIUS
2972*0Sstevel@tonic-gate	ba,pt	%ncc, copyout_blkd8
2973*0Sstevel@tonic-gate	add	%i0, 64, %i0
2974*0Sstevel@tonic-gate
2975*0Sstevel@tonic-gate1:
2976*0Sstevel@tonic-gate	FALIGN_D40
2977*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_AIUS
2978*0Sstevel@tonic-gate	add	%i0, 64, %i0
2979*0Sstevel@tonic-gate	membar	#Sync
2980*0Sstevel@tonic-gate	FALIGN_D8
2981*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_AIUS
2982*0Sstevel@tonic-gate	ba,pt	%ncc, copyout_blkd24
2983*0Sstevel@tonic-gate	add	%i0, 64, %i0
2984*0Sstevel@tonic-gate
2985*0Sstevel@tonic-gate2:
2986*0Sstevel@tonic-gate	FALIGN_D8
2987*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_AIUS
2988*0Sstevel@tonic-gate	add	%i0, 64, %i0
2989*0Sstevel@tonic-gate	membar	#Sync
2990*0Sstevel@tonic-gate	FALIGN_D24
2991*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_AIUS
2992*0Sstevel@tonic-gate	ba,pt	%ncc, copyout_blkd40
2993*0Sstevel@tonic-gate	add	%i0, 64, %i0
2994*0Sstevel@tonic-gate
2995*0Sstevel@tonic-gatecopyout_seg5:
2996*0Sstevel@tonic-gate	! 1st chunk - %d0 low, %d16 high, %d32 pre, %d48 dst
2997*0Sstevel@tonic-gate	FALIGN_D10
2998*0Sstevel@tonic-gate	ldda	[%l7]ASI_BLK_P, %d0
2999*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_AIUS
3000*0Sstevel@tonic-gate	add	%l7, 64, %l7
3001*0Sstevel@tonic-gate	subcc	%i3, 64, %i3
3002*0Sstevel@tonic-gate	bz,pn	%ncc, 0f
3003*0Sstevel@tonic-gate	add	%i0, 64, %i0
3004*0Sstevel@tonic-gate	! 2nd chunk -  %d0 pre, %d16 low, %d32 high, %d48 dst
3005*0Sstevel@tonic-gate	FALIGN_D26
3006*0Sstevel@tonic-gate	ldda	[%l7]ASI_BLK_P, %d16
3007*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_AIUS
3008*0Sstevel@tonic-gate	add	%l7, 64, %l7
3009*0Sstevel@tonic-gate	subcc	%i3, 64, %i3
3010*0Sstevel@tonic-gate	bz,pn	%ncc, 1f
3011*0Sstevel@tonic-gate	add	%i0, 64, %i0
3012*0Sstevel@tonic-gate	! 3rd chunk -  %d0 high, %d16 pre, %d32 low, %d48 dst
3013*0Sstevel@tonic-gate	FALIGN_D42
3014*0Sstevel@tonic-gate	ldda	[%l7]ASI_BLK_P, %d32
3015*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_AIUS
3016*0Sstevel@tonic-gate	add	%l7, 64, %l7
3017*0Sstevel@tonic-gate	subcc	%i3, 64, %i3
3018*0Sstevel@tonic-gate	bz,pn	%ncc, 2f
3019*0Sstevel@tonic-gate	add	%i0, 64, %i0
3020*0Sstevel@tonic-gate	ba,a,pt	%ncc, copyout_seg5
3021*0Sstevel@tonic-gate
3022*0Sstevel@tonic-gate0:
3023*0Sstevel@tonic-gate	FALIGN_D26
3024*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_AIUS
3025*0Sstevel@tonic-gate	add	%i0, 64, %i0
3026*0Sstevel@tonic-gate	membar	#Sync
3027*0Sstevel@tonic-gate	FALIGN_D42
3028*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_AIUS
3029*0Sstevel@tonic-gate	ba,pt	%ncc, copyout_blkd10
3030*0Sstevel@tonic-gate	add	%i0, 64, %i0
3031*0Sstevel@tonic-gate
3032*0Sstevel@tonic-gate1:
3033*0Sstevel@tonic-gate	FALIGN_D42
3034*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_AIUS
3035*0Sstevel@tonic-gate	add	%i0, 64, %i0
3036*0Sstevel@tonic-gate	membar	#Sync
3037*0Sstevel@tonic-gate	FALIGN_D10
3038*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_AIUS
3039*0Sstevel@tonic-gate	ba,pt	%ncc, copyout_blkd26
3040*0Sstevel@tonic-gate	add	%i0, 64, %i0
3041*0Sstevel@tonic-gate
3042*0Sstevel@tonic-gate2:
3043*0Sstevel@tonic-gate	FALIGN_D10
3044*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_AIUS
3045*0Sstevel@tonic-gate	add	%i0, 64, %i0
3046*0Sstevel@tonic-gate	membar	#Sync
3047*0Sstevel@tonic-gate	FALIGN_D26
3048*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_AIUS
3049*0Sstevel@tonic-gate	ba,pt	%ncc, copyout_blkd42
3050*0Sstevel@tonic-gate	add	%i0, 64, %i0
3051*0Sstevel@tonic-gate
3052*0Sstevel@tonic-gatecopyout_seg6:
3053*0Sstevel@tonic-gate	! 1st chunk - %d0 low, %d16 high, %d32 pre, %d48 dst
3054*0Sstevel@tonic-gate	FALIGN_D12
3055*0Sstevel@tonic-gate	ldda	[%l7]ASI_BLK_P, %d0
3056*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_AIUS
3057*0Sstevel@tonic-gate	add	%l7, 64, %l7
3058*0Sstevel@tonic-gate	subcc	%i3, 64, %i3
3059*0Sstevel@tonic-gate	bz,pn	%ncc, 0f
3060*0Sstevel@tonic-gate	add	%i0, 64, %i0
3061*0Sstevel@tonic-gate	! 2nd chunk -  %d0 pre, %d16 low, %d32 high, %d48 dst
3062*0Sstevel@tonic-gate	FALIGN_D28
3063*0Sstevel@tonic-gate	ldda	[%l7]ASI_BLK_P, %d16
3064*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_AIUS
3065*0Sstevel@tonic-gate	add	%l7, 64, %l7
3066*0Sstevel@tonic-gate	subcc	%i3, 64, %i3
3067*0Sstevel@tonic-gate	bz,pn	%ncc, 1f
3068*0Sstevel@tonic-gate	add	%i0, 64, %i0
3069*0Sstevel@tonic-gate	! 3rd chunk -  %d0 high, %d16 pre, %d32 low, %d48 dst
3070*0Sstevel@tonic-gate	FALIGN_D44
3071*0Sstevel@tonic-gate	ldda	[%l7]ASI_BLK_P, %d32
3072*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_AIUS
3073*0Sstevel@tonic-gate	add	%l7, 64, %l7
3074*0Sstevel@tonic-gate	subcc	%i3, 64, %i3
3075*0Sstevel@tonic-gate	bz,pn	%ncc, 2f
3076*0Sstevel@tonic-gate	add	%i0, 64, %i0
3077*0Sstevel@tonic-gate	ba,a,pt	%ncc, copyout_seg6
3078*0Sstevel@tonic-gate
3079*0Sstevel@tonic-gate0:
3080*0Sstevel@tonic-gate	FALIGN_D28
3081*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_AIUS
3082*0Sstevel@tonic-gate	add	%i0, 64, %i0
3083*0Sstevel@tonic-gate	membar	#Sync
3084*0Sstevel@tonic-gate	FALIGN_D44
3085*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_AIUS
3086*0Sstevel@tonic-gate	ba,pt	%ncc, copyout_blkd12
3087*0Sstevel@tonic-gate	add	%i0, 64, %i0
3088*0Sstevel@tonic-gate
3089*0Sstevel@tonic-gate1:
3090*0Sstevel@tonic-gate	FALIGN_D44
3091*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_AIUS
3092*0Sstevel@tonic-gate	add	%i0, 64, %i0
3093*0Sstevel@tonic-gate	membar	#Sync
3094*0Sstevel@tonic-gate	FALIGN_D12
3095*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_AIUS
3096*0Sstevel@tonic-gate	ba,pt	%ncc, copyout_blkd28
3097*0Sstevel@tonic-gate	add	%i0, 64, %i0
3098*0Sstevel@tonic-gate
3099*0Sstevel@tonic-gate2:
3100*0Sstevel@tonic-gate	FALIGN_D12
3101*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_AIUS
3102*0Sstevel@tonic-gate	add	%i0, 64, %i0
3103*0Sstevel@tonic-gate	membar	#Sync
3104*0Sstevel@tonic-gate	FALIGN_D28
3105*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_AIUS
3106*0Sstevel@tonic-gate	ba,pt	%ncc, copyout_blkd44
3107*0Sstevel@tonic-gate	add	%i0, 64, %i0
3108*0Sstevel@tonic-gate
3109*0Sstevel@tonic-gatecopyout_seg7:
3110*0Sstevel@tonic-gate	! 1st chunk - %d0 low, %d16 high, %d32 pre, %d48 dst
3111*0Sstevel@tonic-gate	FALIGN_D14
3112*0Sstevel@tonic-gate	ldda	[%l7]ASI_BLK_P, %d0
3113*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_AIUS
3114*0Sstevel@tonic-gate	add	%l7, 64, %l7
3115*0Sstevel@tonic-gate	subcc	%i3, 64, %i3
3116*0Sstevel@tonic-gate	bz,pn	%ncc, 0f
3117*0Sstevel@tonic-gate	add	%i0, 64, %i0
3118*0Sstevel@tonic-gate	! 2nd chunk -  %d0 pre, %d16 low, %d32 high, %d48 dst
3119*0Sstevel@tonic-gate	FALIGN_D30
3120*0Sstevel@tonic-gate	ldda	[%l7]ASI_BLK_P, %d16
3121*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_AIUS
3122*0Sstevel@tonic-gate	add	%l7, 64, %l7
3123*0Sstevel@tonic-gate	subcc	%i3, 64, %i3
3124*0Sstevel@tonic-gate	bz,pn	%ncc, 1f
3125*0Sstevel@tonic-gate	add	%i0, 64, %i0
3126*0Sstevel@tonic-gate	! 3rd chunk -  %d0 high, %d16 pre, %d32 low, %d48 dst
3127*0Sstevel@tonic-gate	FALIGN_D46
3128*0Sstevel@tonic-gate	ldda	[%l7]ASI_BLK_P, %d32
3129*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_AIUS
3130*0Sstevel@tonic-gate	add	%l7, 64, %l7
3131*0Sstevel@tonic-gate	subcc	%i3, 64, %i3
3132*0Sstevel@tonic-gate	bz,pn	%ncc, 2f
3133*0Sstevel@tonic-gate	add	%i0, 64, %i0
3134*0Sstevel@tonic-gate	ba,a,pt	%ncc, copyout_seg7
3135*0Sstevel@tonic-gate
3136*0Sstevel@tonic-gate0:
3137*0Sstevel@tonic-gate	FALIGN_D30
3138*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_AIUS
3139*0Sstevel@tonic-gate	add	%i0, 64, %i0
3140*0Sstevel@tonic-gate	membar	#Sync
3141*0Sstevel@tonic-gate	FALIGN_D46
3142*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_AIUS
3143*0Sstevel@tonic-gate	ba,pt	%ncc, copyout_blkd14
3144*0Sstevel@tonic-gate	add	%i0, 64, %i0
3145*0Sstevel@tonic-gate
3146*0Sstevel@tonic-gate1:
3147*0Sstevel@tonic-gate	FALIGN_D46
3148*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_AIUS
3149*0Sstevel@tonic-gate	add	%i0, 64, %i0
3150*0Sstevel@tonic-gate	membar	#Sync
3151*0Sstevel@tonic-gate	FALIGN_D14
3152*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_AIUS
3153*0Sstevel@tonic-gate	ba,pt	%ncc, copyout_blkd30
3154*0Sstevel@tonic-gate	add	%i0, 64, %i0
3155*0Sstevel@tonic-gate
3156*0Sstevel@tonic-gate2:
3157*0Sstevel@tonic-gate	FALIGN_D14
3158*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_AIUS
3159*0Sstevel@tonic-gate	add	%i0, 64, %i0
3160*0Sstevel@tonic-gate	membar	#Sync
3161*0Sstevel@tonic-gate	FALIGN_D30
3162*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_AIUS
3163*0Sstevel@tonic-gate	ba,pt	%ncc, copyout_blkd46
3164*0Sstevel@tonic-gate	add	%i0, 64, %i0
3165*0Sstevel@tonic-gate
3166*0Sstevel@tonic-gate
3167*0Sstevel@tonic-gate	!
3168*0Sstevel@tonic-gate	! dribble out the last partial block
3169*0Sstevel@tonic-gate	!
3170*0Sstevel@tonic-gatecopyout_blkd0:
3171*0Sstevel@tonic-gate	subcc	%i4, 8, %i4
3172*0Sstevel@tonic-gate	blu,pn	%ncc, copyout_blkdone
3173*0Sstevel@tonic-gate	faligndata %d0, %d2, %d48
3174*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_USER
3175*0Sstevel@tonic-gate	add	%i0, 8, %i0
3176*0Sstevel@tonic-gatecopyout_blkd2:
3177*0Sstevel@tonic-gate	subcc	%i4, 8, %i4
3178*0Sstevel@tonic-gate	blu,pn	%ncc, copyout_blkdone
3179*0Sstevel@tonic-gate	faligndata %d2, %d4, %d48
3180*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_USER
3181*0Sstevel@tonic-gate	add	%i0, 8, %i0
3182*0Sstevel@tonic-gatecopyout_blkd4:
3183*0Sstevel@tonic-gate	subcc	%i4, 8, %i4
3184*0Sstevel@tonic-gate	blu,pn	%ncc, copyout_blkdone
3185*0Sstevel@tonic-gate	faligndata %d4, %d6, %d48
3186*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_USER
3187*0Sstevel@tonic-gate	add	%i0, 8, %i0
3188*0Sstevel@tonic-gatecopyout_blkd6:
3189*0Sstevel@tonic-gate	subcc	%i4, 8, %i4
3190*0Sstevel@tonic-gate	blu,pn	%ncc, copyout_blkdone
3191*0Sstevel@tonic-gate	faligndata %d6, %d8, %d48
3192*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_USER
3193*0Sstevel@tonic-gate	add	%i0, 8, %i0
3194*0Sstevel@tonic-gatecopyout_blkd8:
3195*0Sstevel@tonic-gate	subcc	%i4, 8, %i4
3196*0Sstevel@tonic-gate	blu,pn	%ncc, copyout_blkdone
3197*0Sstevel@tonic-gate	faligndata %d8, %d10, %d48
3198*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_USER
3199*0Sstevel@tonic-gate	add	%i0, 8, %i0
3200*0Sstevel@tonic-gatecopyout_blkd10:
3201*0Sstevel@tonic-gate	subcc	%i4, 8, %i4
3202*0Sstevel@tonic-gate	blu,pn	%ncc, copyout_blkdone
3203*0Sstevel@tonic-gate	faligndata %d10, %d12, %d48
3204*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_USER
3205*0Sstevel@tonic-gate	add	%i0, 8, %i0
3206*0Sstevel@tonic-gatecopyout_blkd12:
3207*0Sstevel@tonic-gate	subcc	%i4, 8, %i4
3208*0Sstevel@tonic-gate	blu,pn	%ncc, copyout_blkdone
3209*0Sstevel@tonic-gate	faligndata %d12, %d14, %d48
3210*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_USER
3211*0Sstevel@tonic-gate	add	%i0, 8, %i0
3212*0Sstevel@tonic-gatecopyout_blkd14:
3213*0Sstevel@tonic-gate	subcc	%i4, 8, %i4
3214*0Sstevel@tonic-gate	blu,pn	%ncc, copyout_blkdone
3215*0Sstevel@tonic-gate	fsrc1	%d14, %d0
3216*0Sstevel@tonic-gate	ba,a,pt	%ncc, copyout_blkleft
3217*0Sstevel@tonic-gate
3218*0Sstevel@tonic-gatecopyout_blkd16:
3219*0Sstevel@tonic-gate	subcc	%i4, 8, %i4
3220*0Sstevel@tonic-gate	blu,pn	%ncc, copyout_blkdone
3221*0Sstevel@tonic-gate	faligndata %d16, %d18, %d48
3222*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_USER
3223*0Sstevel@tonic-gate	add	%i0, 8, %i0
3224*0Sstevel@tonic-gatecopyout_blkd18:
3225*0Sstevel@tonic-gate	subcc	%i4, 8, %i4
3226*0Sstevel@tonic-gate	blu,pn	%ncc, copyout_blkdone
3227*0Sstevel@tonic-gate	faligndata %d18, %d20, %d48
3228*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_USER
3229*0Sstevel@tonic-gate	add	%i0, 8, %i0
3230*0Sstevel@tonic-gatecopyout_blkd20:
3231*0Sstevel@tonic-gate	subcc	%i4, 8, %i4
3232*0Sstevel@tonic-gate	blu,pn	%ncc, copyout_blkdone
3233*0Sstevel@tonic-gate	faligndata %d20, %d22, %d48
3234*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_USER
3235*0Sstevel@tonic-gate	add	%i0, 8, %i0
3236*0Sstevel@tonic-gatecopyout_blkd22:
3237*0Sstevel@tonic-gate	subcc	%i4, 8, %i4
3238*0Sstevel@tonic-gate	blu,pn	%ncc, copyout_blkdone
3239*0Sstevel@tonic-gate	faligndata %d22, %d24, %d48
3240*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_USER
3241*0Sstevel@tonic-gate	add	%i0, 8, %i0
3242*0Sstevel@tonic-gatecopyout_blkd24:
3243*0Sstevel@tonic-gate	subcc	%i4, 8, %i4
3244*0Sstevel@tonic-gate	blu,pn	%ncc, copyout_blkdone
3245*0Sstevel@tonic-gate	faligndata %d24, %d26, %d48
3246*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_USER
3247*0Sstevel@tonic-gate	add	%i0, 8, %i0
3248*0Sstevel@tonic-gatecopyout_blkd26:
3249*0Sstevel@tonic-gate	subcc	%i4, 8, %i4
3250*0Sstevel@tonic-gate	blu,pn	%ncc, copyout_blkdone
3251*0Sstevel@tonic-gate	faligndata %d26, %d28, %d48
3252*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_USER
3253*0Sstevel@tonic-gate	add	%i0, 8, %i0
3254*0Sstevel@tonic-gatecopyout_blkd28:
3255*0Sstevel@tonic-gate	subcc	%i4, 8, %i4
3256*0Sstevel@tonic-gate	blu,pn	%ncc, copyout_blkdone
3257*0Sstevel@tonic-gate	faligndata %d28, %d30, %d48
3258*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_USER
3259*0Sstevel@tonic-gate	add	%i0, 8, %i0
3260*0Sstevel@tonic-gatecopyout_blkd30:
3261*0Sstevel@tonic-gate	subcc	%i4, 8, %i4
3262*0Sstevel@tonic-gate	blu,pn	%ncc, copyout_blkdone
3263*0Sstevel@tonic-gate	fsrc1	%d30, %d0
3264*0Sstevel@tonic-gate	ba,a,pt	%ncc, copyout_blkleft
3265*0Sstevel@tonic-gatecopyout_blkd32:
3266*0Sstevel@tonic-gate	subcc	%i4, 8, %i4
3267*0Sstevel@tonic-gate	blu,pn	%ncc, copyout_blkdone
3268*0Sstevel@tonic-gate	faligndata %d32, %d34, %d48
3269*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_USER
3270*0Sstevel@tonic-gate	add	%i0, 8, %i0
3271*0Sstevel@tonic-gatecopyout_blkd34:
3272*0Sstevel@tonic-gate	subcc	%i4, 8, %i4
3273*0Sstevel@tonic-gate	blu,pn	%ncc, copyout_blkdone
3274*0Sstevel@tonic-gate	faligndata %d34, %d36, %d48
3275*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_USER
3276*0Sstevel@tonic-gate	add	%i0, 8, %i0
3277*0Sstevel@tonic-gatecopyout_blkd36:
3278*0Sstevel@tonic-gate	subcc	%i4, 8, %i4
3279*0Sstevel@tonic-gate	blu,pn	%ncc, copyout_blkdone
3280*0Sstevel@tonic-gate	faligndata %d36, %d38, %d48
3281*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_USER
3282*0Sstevel@tonic-gate	add	%i0, 8, %i0
3283*0Sstevel@tonic-gatecopyout_blkd38:
3284*0Sstevel@tonic-gate	subcc	%i4, 8, %i4
3285*0Sstevel@tonic-gate	blu,pn	%ncc, copyout_blkdone
3286*0Sstevel@tonic-gate	faligndata %d38, %d40, %d48
3287*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_USER
3288*0Sstevel@tonic-gate	add	%i0, 8, %i0
3289*0Sstevel@tonic-gatecopyout_blkd40:
3290*0Sstevel@tonic-gate	subcc	%i4, 8, %i4
3291*0Sstevel@tonic-gate	blu,pn	%ncc, copyout_blkdone
3292*0Sstevel@tonic-gate	faligndata %d40, %d42, %d48
3293*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_USER
3294*0Sstevel@tonic-gate	add	%i0, 8, %i0
3295*0Sstevel@tonic-gatecopyout_blkd42:
3296*0Sstevel@tonic-gate	subcc	%i4, 8, %i4
3297*0Sstevel@tonic-gate	blu,pn	%ncc, copyout_blkdone
3298*0Sstevel@tonic-gate	faligndata %d42, %d44, %d48
3299*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_USER
3300*0Sstevel@tonic-gate	add	%i0, 8, %i0
3301*0Sstevel@tonic-gatecopyout_blkd44:
3302*0Sstevel@tonic-gate	subcc	%i4, 8, %i4
3303*0Sstevel@tonic-gate	blu,pn	%ncc, copyout_blkdone
3304*0Sstevel@tonic-gate	faligndata %d44, %d46, %d48
3305*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_USER
3306*0Sstevel@tonic-gate	add	%i0, 8, %i0
3307*0Sstevel@tonic-gatecopyout_blkd46:
3308*0Sstevel@tonic-gate	subcc	%i4, 8, %i4
3309*0Sstevel@tonic-gate	blu,pn	%ncc, copyout_blkdone
3310*0Sstevel@tonic-gate	fsrc1	%d46, %d0
3311*0Sstevel@tonic-gate
3312*0Sstevel@tonic-gatecopyout_blkleft:
3313*0Sstevel@tonic-gate1:
3314*0Sstevel@tonic-gate	ldd	[%l7], %d2
3315*0Sstevel@tonic-gate	add	%l7, 8, %l7
3316*0Sstevel@tonic-gate	subcc	%i4, 8, %i4
3317*0Sstevel@tonic-gate	faligndata %d0, %d2, %d8
3318*0Sstevel@tonic-gate	stda	%d8, [%i0]ASI_USER
3319*0Sstevel@tonic-gate	blu,pn	%ncc, copyout_blkdone
3320*0Sstevel@tonic-gate	add	%i0, 8, %i0
3321*0Sstevel@tonic-gate	ldd	[%l7], %d0
3322*0Sstevel@tonic-gate	add	%l7, 8, %l7
3323*0Sstevel@tonic-gate	subcc	%i4, 8, %i4
3324*0Sstevel@tonic-gate	faligndata %d2, %d0, %d8
3325*0Sstevel@tonic-gate	stda	%d8, [%i0]ASI_USER
3326*0Sstevel@tonic-gate	bgeu,pt	%ncc, 1b
3327*0Sstevel@tonic-gate	add	%i0, 8, %i0
3328*0Sstevel@tonic-gate
3329*0Sstevel@tonic-gatecopyout_blkdone:
3330*0Sstevel@tonic-gate	tst	%i2
3331*0Sstevel@tonic-gate	bz,pt	%ncc, .copyout_exit
3332*0Sstevel@tonic-gate	and	%l3, 0x4, %l3		! fprs.du = fprs.dl = 0
3333*0Sstevel@tonic-gate
3334*0Sstevel@tonic-gate7:	ldub	[%i1], %i4
3335*0Sstevel@tonic-gate	inc	%i1
3336*0Sstevel@tonic-gate	stba	%i4, [%i0]ASI_USER
3337*0Sstevel@tonic-gate	inc	%i0
3338*0Sstevel@tonic-gate	deccc	%i2
3339*0Sstevel@tonic-gate	bgu	%ncc, 7b
3340*0Sstevel@tonic-gate	  nop
3341*0Sstevel@tonic-gate
3342*0Sstevel@tonic-gate.copyout_exit:
3343*0Sstevel@tonic-gate	membar	#StoreLoad|#StoreStore
3344*0Sstevel@tonic-gate	btst	FPUSED_FLAG, SAVED_LOFAULT
3345*0Sstevel@tonic-gate	bz	1f
3346*0Sstevel@tonic-gate	  nop
3347*0Sstevel@tonic-gate
3348*0Sstevel@tonic-gate	ld	[%fp + STACK_BIAS - SAVED_GSR_OFFSET], %o2
3349*0Sstevel@tonic-gate	wr	%o2, 0, %gsr		! restore gsr
3350*0Sstevel@tonic-gate
3351*0Sstevel@tonic-gate	ld	[%fp + STACK_BIAS - SAVED_FPRS_OFFSET], %o3
3352*0Sstevel@tonic-gate	btst	FPRS_FEF, %o3
3353*0Sstevel@tonic-gate	bz	4f
3354*0Sstevel@tonic-gate	  nop
3355*0Sstevel@tonic-gate
3356*0Sstevel@tonic-gate	! restore fpregs from stack
3357*0Sstevel@tonic-gate	membar	#Sync
3358*0Sstevel@tonic-gate	add	%fp, STACK_BIAS - 257, %o2
3359*0Sstevel@tonic-gate	and	%o2, -64, %o2
3360*0Sstevel@tonic-gate	ldda	[%o2]ASI_BLK_P, %d0
3361*0Sstevel@tonic-gate	add	%o2, 64, %o2
3362*0Sstevel@tonic-gate	ldda	[%o2]ASI_BLK_P, %d16
3363*0Sstevel@tonic-gate	add	%o2, 64, %o2
3364*0Sstevel@tonic-gate	ldda	[%o2]ASI_BLK_P, %d32
3365*0Sstevel@tonic-gate	add	%o2, 64, %o2
3366*0Sstevel@tonic-gate	ldda	[%o2]ASI_BLK_P, %d48
3367*0Sstevel@tonic-gate	membar	#Sync
3368*0Sstevel@tonic-gate
3369*0Sstevel@tonic-gate	ba,pt	%ncc, 1f
3370*0Sstevel@tonic-gate	  wr	%o3, 0, %fprs		! restore fprs
3371*0Sstevel@tonic-gate
3372*0Sstevel@tonic-gate4:
3373*0Sstevel@tonic-gate	FZERO				! zero all of the fpregs
3374*0Sstevel@tonic-gate	wr	%o3, 0, %fprs		! restore fprs
3375*0Sstevel@tonic-gate
3376*0Sstevel@tonic-gate1:
3377*0Sstevel@tonic-gate	andn	SAVED_LOFAULT, FPUSED_FLAG, SAVED_LOFAULT
3378*0Sstevel@tonic-gate	membar	#Sync			! sync error barrier
3379*0Sstevel@tonic-gate	stn	SAVED_LOFAULT, [THREAD_REG + T_LOFAULT]	! restore old t_lofault
3380*0Sstevel@tonic-gate	ret
3381*0Sstevel@tonic-gate	restore	%g0, 0, %o0
3382*0Sstevel@tonic-gate
3383*0Sstevel@tonic-gate.copyout_err:
3384*0Sstevel@tonic-gate	ldn	[THREAD_REG + T_COPYOPS], %o4
3385*0Sstevel@tonic-gate	brz	%o4, 2f
3386*0Sstevel@tonic-gate	nop
3387*0Sstevel@tonic-gate	ldn	[%o4 + CP_COPYOUT], %g2
3388*0Sstevel@tonic-gate	jmp	%g2
3389*0Sstevel@tonic-gate	nop
3390*0Sstevel@tonic-gate2:
3391*0Sstevel@tonic-gate	retl
3392*0Sstevel@tonic-gate	mov	-1, %o0
3393*0Sstevel@tonic-gate	SET_SIZE(copyout)
3394*0Sstevel@tonic-gate
3395*0Sstevel@tonic-gate#endif	/* lint */
3396*0Sstevel@tonic-gate
3397*0Sstevel@tonic-gate
3398*0Sstevel@tonic-gate#ifdef	lint
3399*0Sstevel@tonic-gate
3400*0Sstevel@tonic-gate/*ARGSUSED*/
3401*0Sstevel@tonic-gateint
3402*0Sstevel@tonic-gatexcopyout(const void *kaddr, void *uaddr, size_t count)
3403*0Sstevel@tonic-gate{ return (0); }
3404*0Sstevel@tonic-gate
3405*0Sstevel@tonic-gate#else	/* lint */
3406*0Sstevel@tonic-gate
3407*0Sstevel@tonic-gate	ENTRY(xcopyout)
3408*0Sstevel@tonic-gate	sethi	%hi(.xcopyout_err), REAL_LOFAULT
3409*0Sstevel@tonic-gate	b	.do_copyout
3410*0Sstevel@tonic-gate	  or	REAL_LOFAULT, %lo(.xcopyout_err), REAL_LOFAULT
3411*0Sstevel@tonic-gate.xcopyout_err:
3412*0Sstevel@tonic-gate	ldn	[THREAD_REG + T_COPYOPS], %o4
3413*0Sstevel@tonic-gate	brz	%o4, 2f
3414*0Sstevel@tonic-gate	nop
3415*0Sstevel@tonic-gate	ldn	[%o4 + CP_XCOPYOUT], %g2
3416*0Sstevel@tonic-gate	jmp	%g2
3417*0Sstevel@tonic-gate	nop
3418*0Sstevel@tonic-gate2:
3419*0Sstevel@tonic-gate	retl
3420*0Sstevel@tonic-gate	mov	%g1, %o0
3421*0Sstevel@tonic-gate	SET_SIZE(xcopyout)
3422*0Sstevel@tonic-gate
3423*0Sstevel@tonic-gate#endif	/* lint */
3424*0Sstevel@tonic-gate
3425*0Sstevel@tonic-gate#ifdef	lint
3426*0Sstevel@tonic-gate
3427*0Sstevel@tonic-gate/*ARGSUSED*/
3428*0Sstevel@tonic-gateint
3429*0Sstevel@tonic-gatexcopyout_little(const void *kaddr, void *uaddr, size_t count)
3430*0Sstevel@tonic-gate{ return (0); }
3431*0Sstevel@tonic-gate
3432*0Sstevel@tonic-gate#else	/* lint */
3433*0Sstevel@tonic-gate
3434*0Sstevel@tonic-gate	ENTRY(xcopyout_little)
3435*0Sstevel@tonic-gate	sethi	%hi(.little_err), %o4
3436*0Sstevel@tonic-gate	ldn	[THREAD_REG + T_LOFAULT], %o5
3437*0Sstevel@tonic-gate	or	%o4, %lo(.little_err), %o4
3438*0Sstevel@tonic-gate	membar	#Sync			! sync error barrier
3439*0Sstevel@tonic-gate	stn	%o4, [THREAD_REG + T_LOFAULT]
3440*0Sstevel@tonic-gate
3441*0Sstevel@tonic-gate	subcc	%g0, %o2, %o3
3442*0Sstevel@tonic-gate	add	%o0, %o2, %o0
3443*0Sstevel@tonic-gate	bz,pn	%ncc, 2f		! check for zero bytes
3444*0Sstevel@tonic-gate	sub	%o2, 1, %o4
3445*0Sstevel@tonic-gate	add	%o0, %o4, %o0		! start w/last byte
3446*0Sstevel@tonic-gate	add	%o1, %o2, %o1
3447*0Sstevel@tonic-gate	ldub	[%o0+%o3], %o4
3448*0Sstevel@tonic-gate
3449*0Sstevel@tonic-gate1:	stba	%o4, [%o1+%o3]ASI_AIUSL
3450*0Sstevel@tonic-gate	inccc	%o3
3451*0Sstevel@tonic-gate	sub	%o0, 2, %o0		! get next byte
3452*0Sstevel@tonic-gate	bcc,a,pt %ncc, 1b
3453*0Sstevel@tonic-gate	  ldub	[%o0+%o3], %o4
3454*0Sstevel@tonic-gate
3455*0Sstevel@tonic-gate2:	membar	#Sync			! sync error barrier
3456*0Sstevel@tonic-gate	stn	%o5, [THREAD_REG + T_LOFAULT]	! restore old t_lofault
3457*0Sstevel@tonic-gate	retl
3458*0Sstevel@tonic-gate	mov	%g0, %o0		! return (0)
3459*0Sstevel@tonic-gate	SET_SIZE(xcopyout_little)
3460*0Sstevel@tonic-gate
3461*0Sstevel@tonic-gate#endif	/* lint */
3462*0Sstevel@tonic-gate
3463*0Sstevel@tonic-gate/*
3464*0Sstevel@tonic-gate * Copy user data to kernel space (copyin/xcopyin/xcopyin_little)
3465*0Sstevel@tonic-gate */
3466*0Sstevel@tonic-gate
3467*0Sstevel@tonic-gate#if defined(lint)
3468*0Sstevel@tonic-gate
3469*0Sstevel@tonic-gate/*ARGSUSED*/
3470*0Sstevel@tonic-gateint
3471*0Sstevel@tonic-gatecopyin(const void *uaddr, void *kaddr, size_t count)
3472*0Sstevel@tonic-gate{ return (0); }
3473*0Sstevel@tonic-gate
3474*0Sstevel@tonic-gate#else	/* lint */
3475*0Sstevel@tonic-gate
3476*0Sstevel@tonic-gate	ENTRY(copyin)
3477*0Sstevel@tonic-gate	sethi	%hi(.copyin_err), REAL_LOFAULT
3478*0Sstevel@tonic-gate	or	REAL_LOFAULT, %lo(.copyin_err), REAL_LOFAULT
3479*0Sstevel@tonic-gate
3480*0Sstevel@tonic-gate.do_copyin:
3481*0Sstevel@tonic-gate	!
3482*0Sstevel@tonic-gate	! Check the length and bail if zero.
3483*0Sstevel@tonic-gate	!
3484*0Sstevel@tonic-gate	tst	%o2
3485*0Sstevel@tonic-gate	bnz,pt	%ncc, 1f
3486*0Sstevel@tonic-gate	  nop
3487*0Sstevel@tonic-gate	retl
3488*0Sstevel@tonic-gate	  clr	%o0
3489*0Sstevel@tonic-gate1:
3490*0Sstevel@tonic-gate	sethi	%hi(copyio_fault), %o4
3491*0Sstevel@tonic-gate	or	%o4, %lo(copyio_fault), %o4
3492*0Sstevel@tonic-gate	sethi	%hi(copyio_fault_nowindow), %o3
3493*0Sstevel@tonic-gate	ldn	[THREAD_REG + T_LOFAULT], SAVED_LOFAULT
3494*0Sstevel@tonic-gate	or	%o3, %lo(copyio_fault_nowindow), %o3
3495*0Sstevel@tonic-gate	membar	#Sync
3496*0Sstevel@tonic-gate	stn	%o3, [THREAD_REG + T_LOFAULT]
3497*0Sstevel@tonic-gate
3498*0Sstevel@tonic-gate	mov	%o0, SAVE_SRC
3499*0Sstevel@tonic-gate	mov	%o1, SAVE_DST
3500*0Sstevel@tonic-gate	mov	%o2, SAVE_COUNT
3501*0Sstevel@tonic-gate
3502*0Sstevel@tonic-gate	!
3503*0Sstevel@tonic-gate	! Check to see if we're more than SMALL_LIMIT.
3504*0Sstevel@tonic-gate	!
3505*0Sstevel@tonic-gate	subcc	%o2, SMALL_LIMIT, %o3
3506*0Sstevel@tonic-gate	bgu,a,pt %ncc, .dci_ns
3507*0Sstevel@tonic-gate	or	%o0, %o1, %o3
3508*0Sstevel@tonic-gate	!
3509*0Sstevel@tonic-gate	! What was previously ".small_copyin"
3510*0Sstevel@tonic-gate	!
3511*0Sstevel@tonic-gate.dcibcp:
3512*0Sstevel@tonic-gate	sub	%g0, %o2, %o3		! setup for copy loop
3513*0Sstevel@tonic-gate	add	%o0, %o2, %o0
3514*0Sstevel@tonic-gate	add	%o1, %o2, %o1
3515*0Sstevel@tonic-gate	ba,pt	%ncc, .dcicl
3516*0Sstevel@tonic-gate	lduba	[%o0 + %o3]ASI_USER, %o4
3517*0Sstevel@tonic-gate	!
3518*0Sstevel@tonic-gate	! %o0 and %o1 point at the end and remain pointing at the end
3519*0Sstevel@tonic-gate	! of their buffers. We pull things out by adding %o3 (which is
3520*0Sstevel@tonic-gate	! the negation of the length) to the buffer end which gives us
3521*0Sstevel@tonic-gate	! the curent location in the buffers. By incrementing %o3 we walk
3522*0Sstevel@tonic-gate	! through both buffers without having to bump each buffer's
3523*0Sstevel@tonic-gate	! pointer. A very fast 4 instruction loop.
3524*0Sstevel@tonic-gate	!
3525*0Sstevel@tonic-gate	.align 16
3526*0Sstevel@tonic-gate.dcicl:
3527*0Sstevel@tonic-gate	stb	%o4, [%o1 + %o3]
3528*0Sstevel@tonic-gate	inccc	%o3
3529*0Sstevel@tonic-gate	bl,a,pt %ncc, .dcicl
3530*0Sstevel@tonic-gate	lduba	[%o0 + %o3]ASI_USER, %o4
3531*0Sstevel@tonic-gate	!
3532*0Sstevel@tonic-gate	! We're done. Go home.
3533*0Sstevel@tonic-gate	!
3534*0Sstevel@tonic-gate	membar	#Sync
3535*0Sstevel@tonic-gate	stn	SAVED_LOFAULT, [THREAD_REG + T_LOFAULT]
3536*0Sstevel@tonic-gate	retl
3537*0Sstevel@tonic-gate	clr	%o0
3538*0Sstevel@tonic-gate	!
3539*0Sstevel@tonic-gate	! Try aligned copies from here.
3540*0Sstevel@tonic-gate	!
3541*0Sstevel@tonic-gate.dci_ns:
3542*0Sstevel@tonic-gate	!
3543*0Sstevel@tonic-gate	! See if we're single byte aligned. If we are, check the
3544*0Sstevel@tonic-gate	! limit for single byte copies. If we're smaller, or equal,
3545*0Sstevel@tonic-gate	! bounce to the byte for byte copy loop. Otherwise do it in
3546*0Sstevel@tonic-gate	! HW (if enabled).
3547*0Sstevel@tonic-gate	!
3548*0Sstevel@tonic-gate	btst	1, %o3
3549*0Sstevel@tonic-gate	bz,a,pt	%icc, .dcih8
3550*0Sstevel@tonic-gate	btst	7, %o3
3551*0Sstevel@tonic-gate	!
3552*0Sstevel@tonic-gate	! We're single byte aligned.
3553*0Sstevel@tonic-gate	!
3554*0Sstevel@tonic-gate	subcc	%o2, VIS_COPY_THRESHOLD, %o3
3555*0Sstevel@tonic-gate	bleu,pt	%ncc, .dcibcp
3556*0Sstevel@tonic-gate	sethi	%hi(hw_copy_limit_1), %o3
3557*0Sstevel@tonic-gate	ld	[%o3 + %lo(hw_copy_limit_1)], %o3
3558*0Sstevel@tonic-gate	!
3559*0Sstevel@tonic-gate	! Is HW copy on? If not do everything byte for byte.
3560*0Sstevel@tonic-gate	!
3561*0Sstevel@tonic-gate	tst	%o3
3562*0Sstevel@tonic-gate	bz,pn	%icc, .dcibcp
3563*0Sstevel@tonic-gate	subcc	%o3, %o2, %o3
3564*0Sstevel@tonic-gate	!
3565*0Sstevel@tonic-gate	! Are we bigger than the HW limit? If not
3566*0Sstevel@tonic-gate	! go to byte for byte.
3567*0Sstevel@tonic-gate	!
3568*0Sstevel@tonic-gate	bge,pt	%ncc, .dcibcp
3569*0Sstevel@tonic-gate	nop
3570*0Sstevel@tonic-gate	!
3571*0Sstevel@tonic-gate	! We're big enough and copy is on. Do it with HW.
3572*0Sstevel@tonic-gate	!
3573*0Sstevel@tonic-gate	ba,pt	%ncc, .big_copyin
3574*0Sstevel@tonic-gate	nop
3575*0Sstevel@tonic-gate.dcih8:
3576*0Sstevel@tonic-gate	!
3577*0Sstevel@tonic-gate	! 8 byte aligned?
3578*0Sstevel@tonic-gate	!
3579*0Sstevel@tonic-gate	bnz,a	%ncc, .dcih4
3580*0Sstevel@tonic-gate	btst	3, %o3
3581*0Sstevel@tonic-gate	!
3582*0Sstevel@tonic-gate	! We're eight byte aligned.
3583*0Sstevel@tonic-gate	!
3584*0Sstevel@tonic-gate	subcc	%o2, VIS_COPY_THRESHOLD, %o3
3585*0Sstevel@tonic-gate	bleu,pt	%ncc, .dcis8
3586*0Sstevel@tonic-gate	sethi	%hi(hw_copy_limit_8), %o3
3587*0Sstevel@tonic-gate	ld	[%o3 + %lo(hw_copy_limit_8)], %o3
3588*0Sstevel@tonic-gate	!
3589*0Sstevel@tonic-gate	! Is HW assist on? If not, do it with the aligned copy.
3590*0Sstevel@tonic-gate	!
3591*0Sstevel@tonic-gate	tst	%o3
3592*0Sstevel@tonic-gate	bz,pn	%icc, .dcis8
3593*0Sstevel@tonic-gate	subcc	%o3, %o2, %o3
3594*0Sstevel@tonic-gate	bge	%ncc, .dcis8
3595*0Sstevel@tonic-gate	nop
3596*0Sstevel@tonic-gate	ba,pt	%ncc, .big_copyin
3597*0Sstevel@tonic-gate	nop
3598*0Sstevel@tonic-gate.dcis8:
3599*0Sstevel@tonic-gate	!
3600*0Sstevel@tonic-gate	! Housekeeping for copy loops. Uses same idea as in the byte for
3601*0Sstevel@tonic-gate	! byte copy loop above.
3602*0Sstevel@tonic-gate	!
3603*0Sstevel@tonic-gate	add	%o0, %o2, %o0
3604*0Sstevel@tonic-gate	add	%o1, %o2, %o1
3605*0Sstevel@tonic-gate	sub	%g0, %o2, %o3
3606*0Sstevel@tonic-gate	ba,pt	%ncc, .didebc
3607*0Sstevel@tonic-gate	srl	%o2, 3, %o2		! Number of 8 byte chunks to copy
3608*0Sstevel@tonic-gate	!
3609*0Sstevel@tonic-gate	! 4 byte aligned?
3610*0Sstevel@tonic-gate	!
3611*0Sstevel@tonic-gate.dcih4:
3612*0Sstevel@tonic-gate	bnz	%ncc, .dcih2
3613*0Sstevel@tonic-gate	subcc	%o2, VIS_COPY_THRESHOLD, %o3
3614*0Sstevel@tonic-gate	bleu,pt	%ncc, .dcis4
3615*0Sstevel@tonic-gate	sethi	%hi(hw_copy_limit_4), %o3
3616*0Sstevel@tonic-gate	ld	[%o3 + %lo(hw_copy_limit_4)], %o3
3617*0Sstevel@tonic-gate	!
3618*0Sstevel@tonic-gate	! Is HW assist on? If not, do it with the aligned copy.
3619*0Sstevel@tonic-gate	!
3620*0Sstevel@tonic-gate	tst	%o3
3621*0Sstevel@tonic-gate	bz,pn	%icc, .dcis4
3622*0Sstevel@tonic-gate	subcc	%o3, %o2, %o3
3623*0Sstevel@tonic-gate	!
3624*0Sstevel@tonic-gate	! We're negative if our size is less than or equal to hw_copy_limit_4.
3625*0Sstevel@tonic-gate	!
3626*0Sstevel@tonic-gate	bge	%ncc, .dcis4
3627*0Sstevel@tonic-gate	nop
3628*0Sstevel@tonic-gate	ba,pt	%ncc, .big_copyin
3629*0Sstevel@tonic-gate	nop
3630*0Sstevel@tonic-gate.dcis4:
3631*0Sstevel@tonic-gate	!
3632*0Sstevel@tonic-gate	! Housekeeping for copy loops. Uses same idea as in the byte
3633*0Sstevel@tonic-gate	! for byte copy loop above.
3634*0Sstevel@tonic-gate	!
3635*0Sstevel@tonic-gate	add	%o0, %o2, %o0
3636*0Sstevel@tonic-gate	add	%o1, %o2, %o1
3637*0Sstevel@tonic-gate	sub	%g0, %o2, %o3
3638*0Sstevel@tonic-gate	ba,pt	%ncc, .didfbc
3639*0Sstevel@tonic-gate	srl	%o2, 2, %o2		! Number of 4 byte chunks to copy
3640*0Sstevel@tonic-gate.dcih2:
3641*0Sstevel@tonic-gate	!
3642*0Sstevel@tonic-gate	! We're two byte aligned. Check for "smallness"
3643*0Sstevel@tonic-gate	! done in delay at .dcih4
3644*0Sstevel@tonic-gate	!
3645*0Sstevel@tonic-gate	bleu,pt	%ncc, .dcis2
3646*0Sstevel@tonic-gate	sethi	%hi(hw_copy_limit_2), %o3
3647*0Sstevel@tonic-gate	ld	[%o3 + %lo(hw_copy_limit_2)], %o3
3648*0Sstevel@tonic-gate	!
3649*0Sstevel@tonic-gate	! Is HW assist on? If not, do it with the aligned copy.
3650*0Sstevel@tonic-gate	!
3651*0Sstevel@tonic-gate	tst	%o3
3652*0Sstevel@tonic-gate	bz,pn	%icc, .dcis2
3653*0Sstevel@tonic-gate	subcc	%o3, %o2, %o3
3654*0Sstevel@tonic-gate	!
3655*0Sstevel@tonic-gate	! Are we larger than the HW limit?
3656*0Sstevel@tonic-gate	!
3657*0Sstevel@tonic-gate	bge	%ncc, .dcis2
3658*0Sstevel@tonic-gate	nop
3659*0Sstevel@tonic-gate	!
3660*0Sstevel@tonic-gate	! HW assist is on and we're large enough to use it.
3661*0Sstevel@tonic-gate	!
3662*0Sstevel@tonic-gate	ba,pt	%ncc, .big_copyin
3663*0Sstevel@tonic-gate	nop
3664*0Sstevel@tonic-gate	!
3665*0Sstevel@tonic-gate	! Housekeeping for copy loops. Uses same idea as in the byte
3666*0Sstevel@tonic-gate	! for byte copy loop above.
3667*0Sstevel@tonic-gate	!
3668*0Sstevel@tonic-gate.dcis2:
3669*0Sstevel@tonic-gate	add	%o0, %o2, %o0
3670*0Sstevel@tonic-gate	add	%o1, %o2, %o1
3671*0Sstevel@tonic-gate	sub	%g0, %o2, %o3
3672*0Sstevel@tonic-gate	ba,pt	%ncc, .didtbc
3673*0Sstevel@tonic-gate	srl	%o2, 1, %o2		! Number of 2 byte chunks to copy
3674*0Sstevel@tonic-gate	!
3675*0Sstevel@tonic-gate.small_copyin:
3676*0Sstevel@tonic-gate	!
3677*0Sstevel@tonic-gate	! Why are we doing this AGAIN? There are certain conditions in
3678*0Sstevel@tonic-gate	! big copyin that will cause us to forgo the HW assisted copys
3679*0Sstevel@tonic-gate	! and bounce back to a non-hw assisted copy. This dispatches
3680*0Sstevel@tonic-gate	! those copies. Note that we branch around this in the main line
3681*0Sstevel@tonic-gate	! code.
3682*0Sstevel@tonic-gate	!
3683*0Sstevel@tonic-gate	! We make no check for limits or HW enablement here. We've
3684*0Sstevel@tonic-gate	! already been told that we're a poster child so just go off
3685*0Sstevel@tonic-gate	! and do it.
3686*0Sstevel@tonic-gate	!
3687*0Sstevel@tonic-gate	or	%o0, %o1, %o3
3688*0Sstevel@tonic-gate	btst	1, %o3
3689*0Sstevel@tonic-gate	bnz	%icc, .dcibcp		! Most likely
3690*0Sstevel@tonic-gate	btst	7, %o3
3691*0Sstevel@tonic-gate	bz	%icc, .dcis8
3692*0Sstevel@tonic-gate	btst	3, %o3
3693*0Sstevel@tonic-gate	bz	%icc, .dcis4
3694*0Sstevel@tonic-gate	nop
3695*0Sstevel@tonic-gate	ba,pt	%ncc, .dcis2
3696*0Sstevel@tonic-gate	nop
3697*0Sstevel@tonic-gate	!
3698*0Sstevel@tonic-gate	! Eight byte aligned copies. A steal from the original .small_copyin
3699*0Sstevel@tonic-gate	! with modifications. %o2 is number of 8 byte chunks to copy. When
3700*0Sstevel@tonic-gate	! done, we examine %o3. If this is < 0, we have 1 - 7 bytes more
3701*0Sstevel@tonic-gate	! to copy.
3702*0Sstevel@tonic-gate	!
3703*0Sstevel@tonic-gate	.align 32
3704*0Sstevel@tonic-gate.didebc:
3705*0Sstevel@tonic-gate	ldxa	[%o0 + %o3]ASI_USER, %o4
3706*0Sstevel@tonic-gate	deccc	%o2
3707*0Sstevel@tonic-gate	stx	%o4, [%o1 + %o3]
3708*0Sstevel@tonic-gate	bg,pt	%ncc, .didebc
3709*0Sstevel@tonic-gate	addcc	%o3, 8, %o3
3710*0Sstevel@tonic-gate	!
3711*0Sstevel@tonic-gate	! End of copy loop. Most 8 byte aligned copies end here.
3712*0Sstevel@tonic-gate	!
3713*0Sstevel@tonic-gate	bz,pt	%ncc, .dcifh
3714*0Sstevel@tonic-gate	nop
3715*0Sstevel@tonic-gate	!
3716*0Sstevel@tonic-gate	! Something is left. Do it byte for byte.
3717*0Sstevel@tonic-gate	!
3718*0Sstevel@tonic-gate	ba,pt	%ncc, .dcicl
3719*0Sstevel@tonic-gate	lduba	[%o0 + %o3]ASI_USER, %o4
3720*0Sstevel@tonic-gate	!
3721*0Sstevel@tonic-gate	! 4 byte copy loop. %o2 is number of 4 byte chunks to copy.
3722*0Sstevel@tonic-gate	!
3723*0Sstevel@tonic-gate	.align 32
3724*0Sstevel@tonic-gate.didfbc:
3725*0Sstevel@tonic-gate	lduwa	[%o0 + %o3]ASI_USER, %o4
3726*0Sstevel@tonic-gate	deccc	%o2
3727*0Sstevel@tonic-gate	st	%o4, [%o1 + %o3]
3728*0Sstevel@tonic-gate	bg,pt	%ncc, .didfbc
3729*0Sstevel@tonic-gate	addcc	%o3, 4, %o3
3730*0Sstevel@tonic-gate	!
3731*0Sstevel@tonic-gate	! End of copy loop. Most 4 byte aligned copies end here.
3732*0Sstevel@tonic-gate	!
3733*0Sstevel@tonic-gate	bz,pt	%ncc, .dcifh
3734*0Sstevel@tonic-gate	nop
3735*0Sstevel@tonic-gate	!
3736*0Sstevel@tonic-gate	! Something is left. Do it byte for byte.
3737*0Sstevel@tonic-gate	!
3738*0Sstevel@tonic-gate	ba,pt	%ncc, .dcicl
3739*0Sstevel@tonic-gate	lduba	[%o0 + %o3]ASI_USER, %o4
3740*0Sstevel@tonic-gate	!
3741*0Sstevel@tonic-gate	! 2 byte aligned copy loop. %o2 is number of 2 byte chunks to
3742*0Sstevel@tonic-gate	! copy.
3743*0Sstevel@tonic-gate	!
3744*0Sstevel@tonic-gate	.align 32
3745*0Sstevel@tonic-gate.didtbc:
3746*0Sstevel@tonic-gate	lduha	[%o0 + %o3]ASI_USER, %o4
3747*0Sstevel@tonic-gate	deccc	%o2
3748*0Sstevel@tonic-gate	sth	%o4, [%o1 + %o3]
3749*0Sstevel@tonic-gate	bg,pt	%ncc, .didtbc
3750*0Sstevel@tonic-gate	addcc	%o3, 2, %o3
3751*0Sstevel@tonic-gate	!
3752*0Sstevel@tonic-gate	! End of copy loop. Most 2 byte aligned copies end here.
3753*0Sstevel@tonic-gate	!
3754*0Sstevel@tonic-gate	bz,pt	%ncc, .dcifh
3755*0Sstevel@tonic-gate	nop
3756*0Sstevel@tonic-gate	!
3757*0Sstevel@tonic-gate	! Deal with the last byte
3758*0Sstevel@tonic-gate	!
3759*0Sstevel@tonic-gate	lduba	[%o0 + %o3]ASI_USER, %o4
3760*0Sstevel@tonic-gate	stb	%o4, [%o1 + %o3]
3761*0Sstevel@tonic-gate.dcifh:
3762*0Sstevel@tonic-gate	membar	#Sync
3763*0Sstevel@tonic-gate	stn     SAVED_LOFAULT, [THREAD_REG + T_LOFAULT]   ! restore old t_lofault
3764*0Sstevel@tonic-gate	retl
3765*0Sstevel@tonic-gate	clr	%o0
3766*0Sstevel@tonic-gate
3767*0Sstevel@tonic-gate.big_copyin:
3768*0Sstevel@tonic-gate	!
3769*0Sstevel@tonic-gate	! Are we using the FP registers?
3770*0Sstevel@tonic-gate	!
3771*0Sstevel@tonic-gate	rd	%fprs, %o3		! check for unused fp
3772*0Sstevel@tonic-gate	btst	FPRS_FEF, %o3
3773*0Sstevel@tonic-gate	bnz	%ncc, .copyin_fpregs_inuse
3774*0Sstevel@tonic-gate	nop
3775*0Sstevel@tonic-gate	!
3776*0Sstevel@tonic-gate	! We're going off to do a block copy.
3777*0Sstevel@tonic-gate	! Switch fault hendlers and grab a window. We
3778*0Sstevel@tonic-gate	! don't do a membar #Sync since we've done only
3779*0Sstevel@tonic-gate	! kernel data to this point.
3780*0Sstevel@tonic-gate	!
3781*0Sstevel@tonic-gate	stn	%o4, [THREAD_REG + T_LOFAULT]
3782*0Sstevel@tonic-gate	save	%sp, -SA(MINFRAME + HWCOPYFRAMESIZE), %sp
3783*0Sstevel@tonic-gate	!
3784*0Sstevel@tonic-gate	! %o3 is %i3 after the save...
3785*0Sstevel@tonic-gate	!
3786*0Sstevel@tonic-gate	st	%i3, [%fp + STACK_BIAS - SAVED_FPRS_OFFSET]
3787*0Sstevel@tonic-gate	ba,pt	%ncc, .do_blockcopyin
3788*0Sstevel@tonic-gate	wr	%g0, FPRS_FEF, %fprs
3789*0Sstevel@tonic-gate.copyin_fpregs_inuse:
3790*0Sstevel@tonic-gate	!
3791*0Sstevel@tonic-gate	! We're here if the FP regs are in use. Need to see if the request
3792*0Sstevel@tonic-gate	! exceeds our suddenly larger minimum.
3793*0Sstevel@tonic-gate	!
3794*0Sstevel@tonic-gate	cmp	%i2, VIS_COPY_THRESHOLD+(64*4)
3795*0Sstevel@tonic-gate	bl	%ncc, .small_copyin
3796*0Sstevel@tonic-gate	nop
3797*0Sstevel@tonic-gate	!
3798*0Sstevel@tonic-gate	! We're going off and do a block copy.
3799*0Sstevel@tonic-gate	! Change to the heavy duty fault handler and grab a window first.
3800*0Sstevel@tonic-gate	! New handler is passed in
3801*0Sstevel@tonic-gate	!
3802*0Sstevel@tonic-gate	stn	%o4, [THREAD_REG + T_LOFAULT]
3803*0Sstevel@tonic-gate	save	%sp, -SA(MINFRAME + HWCOPYFRAMESIZE), %sp
3804*0Sstevel@tonic-gate	!
3805*0Sstevel@tonic-gate	! %o3 is now %i3
3806*0Sstevel@tonic-gate	!
3807*0Sstevel@tonic-gate	st	%i3, [%fp + STACK_BIAS - SAVED_FPRS_OFFSET]
3808*0Sstevel@tonic-gate
3809*0Sstevel@tonic-gate	! save in-use fpregs on stack
3810*0Sstevel@tonic-gate	wr	%g0, FPRS_FEF, %fprs
3811*0Sstevel@tonic-gate	membar	#Sync
3812*0Sstevel@tonic-gate	add	%fp, STACK_BIAS - 257, %o2
3813*0Sstevel@tonic-gate	and	%o2, -64, %o2
3814*0Sstevel@tonic-gate	stda	%d0, [%o2]ASI_BLK_P
3815*0Sstevel@tonic-gate	add	%o2, 64, %o2
3816*0Sstevel@tonic-gate	stda	%d16, [%o2]ASI_BLK_P
3817*0Sstevel@tonic-gate	add	%o2, 64, %o2
3818*0Sstevel@tonic-gate	stda	%d32, [%o2]ASI_BLK_P
3819*0Sstevel@tonic-gate	add	%o2, 64, %o2
3820*0Sstevel@tonic-gate	stda	%d48, [%o2]ASI_BLK_P
3821*0Sstevel@tonic-gate	membar	#Sync
3822*0Sstevel@tonic-gate
3823*0Sstevel@tonic-gate.do_blockcopyin:
3824*0Sstevel@tonic-gate	membar	#StoreStore|#StoreLoad|#LoadStore
3825*0Sstevel@tonic-gate
3826*0Sstevel@tonic-gate	rd	%gsr, %o2
3827*0Sstevel@tonic-gate	st	%o2, [%fp + STACK_BIAS - SAVED_GSR_OFFSET]	! save gsr
3828*0Sstevel@tonic-gate
3829*0Sstevel@tonic-gate	! Set the lower bit in the saved t_lofault to indicate
3830*0Sstevel@tonic-gate	! that we need to clear the %fprs register on the way
3831*0Sstevel@tonic-gate	! out
3832*0Sstevel@tonic-gate	or	SAVED_LOFAULT, FPUSED_FLAG, SAVED_LOFAULT
3833*0Sstevel@tonic-gate
3834*0Sstevel@tonic-gate	! Swap src/dst since the code below is memcpy code
3835*0Sstevel@tonic-gate	! and memcpy/bcopy have different calling sequences
3836*0Sstevel@tonic-gate	mov	%i1, %i5
3837*0Sstevel@tonic-gate	mov	%i0, %i1
3838*0Sstevel@tonic-gate	mov	%i5, %i0
3839*0Sstevel@tonic-gate
3840*0Sstevel@tonic-gate!!! This code is nearly identical to the version in the sun4u
3841*0Sstevel@tonic-gate!!! libc_psr.  Most bugfixes made to that file should be
3842*0Sstevel@tonic-gate!!! merged into this routine.
3843*0Sstevel@tonic-gate
3844*0Sstevel@tonic-gate	andcc	%i0, 7, %o3
3845*0Sstevel@tonic-gate	bz	copyin_blkcpy
3846*0Sstevel@tonic-gate	sub	%o3, 8, %o3
3847*0Sstevel@tonic-gate	neg	%o3
3848*0Sstevel@tonic-gate	sub	%i2, %o3, %i2
3849*0Sstevel@tonic-gate
3850*0Sstevel@tonic-gate	! Align Destination on double-word boundary
3851*0Sstevel@tonic-gate
3852*0Sstevel@tonic-gate2:	lduba	[%i1]ASI_USER, %o4
3853*0Sstevel@tonic-gate	inc	%i1
3854*0Sstevel@tonic-gate	inc	%i0
3855*0Sstevel@tonic-gate	deccc	%o3
3856*0Sstevel@tonic-gate	bgu	%ncc, 2b
3857*0Sstevel@tonic-gate	stb	%o4, [%i0-1]
3858*0Sstevel@tonic-gatecopyin_blkcpy:
3859*0Sstevel@tonic-gate	andcc	%i0, 63, %i3
3860*0Sstevel@tonic-gate	bz,pn	%ncc, copyin_blalign	! now block aligned
3861*0Sstevel@tonic-gate	sub	%i3, 64, %i3
3862*0Sstevel@tonic-gate	neg	%i3			! bytes till block aligned
3863*0Sstevel@tonic-gate	sub	%i2, %i3, %i2		! update %i2 with new count
3864*0Sstevel@tonic-gate
3865*0Sstevel@tonic-gate	! Copy %i3 bytes till dst is block (64 byte) aligned. use
3866*0Sstevel@tonic-gate	! double word copies.
3867*0Sstevel@tonic-gate
3868*0Sstevel@tonic-gate	alignaddr %i1, %g0, %g1
3869*0Sstevel@tonic-gate	ldda	[%g1]ASI_USER, %d0
3870*0Sstevel@tonic-gate	add	%g1, 8, %g1
3871*0Sstevel@tonic-gate6:
3872*0Sstevel@tonic-gate	ldda	[%g1]ASI_USER, %d2
3873*0Sstevel@tonic-gate	add	%g1, 8, %g1
3874*0Sstevel@tonic-gate	subcc	%i3, 8, %i3
3875*0Sstevel@tonic-gate	faligndata %d0, %d2, %d8
3876*0Sstevel@tonic-gate	std	%d8, [%i0]
3877*0Sstevel@tonic-gate	add	%i1, 8, %i1
3878*0Sstevel@tonic-gate	bz,pn	%ncc, copyin_blalign
3879*0Sstevel@tonic-gate	add	%i0, 8, %i0
3880*0Sstevel@tonic-gate	ldda	[%g1]ASI_USER, %d0
3881*0Sstevel@tonic-gate	add	%g1, 8, %g1
3882*0Sstevel@tonic-gate	subcc	%i3, 8, %i3
3883*0Sstevel@tonic-gate	faligndata %d2, %d0, %d8
3884*0Sstevel@tonic-gate	std	%d8, [%i0]
3885*0Sstevel@tonic-gate	add	%i1, 8, %i1
3886*0Sstevel@tonic-gate	bgu,pn	%ncc, 6b
3887*0Sstevel@tonic-gate	add	%i0, 8, %i0
3888*0Sstevel@tonic-gate
3889*0Sstevel@tonic-gatecopyin_blalign:
3890*0Sstevel@tonic-gate	membar	#StoreLoad
3891*0Sstevel@tonic-gate	! %i2 = total length
3892*0Sstevel@tonic-gate	! %i3 = blocks	(length - 64) / 64
3893*0Sstevel@tonic-gate	! %i4 = doubles remaining  (length - blocks)
3894*0Sstevel@tonic-gate	sub	%i2, 64, %i3
3895*0Sstevel@tonic-gate	andn	%i3, 63, %i3
3896*0Sstevel@tonic-gate	sub	%i2, %i3, %i4
3897*0Sstevel@tonic-gate	andn	%i4, 7, %i4
3898*0Sstevel@tonic-gate	sub	%i4, 16, %i4
3899*0Sstevel@tonic-gate	sub	%i2, %i4, %i2
3900*0Sstevel@tonic-gate	sub	%i2, %i3, %i2
3901*0Sstevel@tonic-gate
3902*0Sstevel@tonic-gate	andn	%i1, 0x3f, %l7		! blk aligned address
3903*0Sstevel@tonic-gate	alignaddr %i1, %g0, %g0		! gen %gsr
3904*0Sstevel@tonic-gate
3905*0Sstevel@tonic-gate	srl	%i1, 3, %l5		! bits 3,4,5 are now least sig in  %l5
3906*0Sstevel@tonic-gate	andcc	%l5, 7, %i5		! mask everything except bits 1,2 3
3907*0Sstevel@tonic-gate	add	%i1, %i4, %i1
3908*0Sstevel@tonic-gate	add	%i1, %i3, %i1
3909*0Sstevel@tonic-gate
3910*0Sstevel@tonic-gate	ldda	[%l7]ASI_BLK_AIUS, %d0
3911*0Sstevel@tonic-gate	add	%l7, 64, %l7
3912*0Sstevel@tonic-gate	ldda	[%l7]ASI_BLK_AIUS, %d16
3913*0Sstevel@tonic-gate	add	%l7, 64, %l7
3914*0Sstevel@tonic-gate	ldda	[%l7]ASI_BLK_AIUS, %d32
3915*0Sstevel@tonic-gate	add	%l7, 64, %l7
3916*0Sstevel@tonic-gate	sub	%i3, 128, %i3
3917*0Sstevel@tonic-gate
3918*0Sstevel@tonic-gate	! switch statement to get us to the right 8 byte blk within a
3919*0Sstevel@tonic-gate	! 64 byte block
3920*0Sstevel@tonic-gate
3921*0Sstevel@tonic-gate	cmp	 %i5, 4
3922*0Sstevel@tonic-gate	bgeu,a	 copyin_hlf
3923*0Sstevel@tonic-gate	cmp	 %i5, 6
3924*0Sstevel@tonic-gate	cmp	 %i5, 2
3925*0Sstevel@tonic-gate	bgeu,a	 copyin_sqtr
3926*0Sstevel@tonic-gate	nop
3927*0Sstevel@tonic-gate	cmp	 %i5, 1
3928*0Sstevel@tonic-gate	be,a	 copyin_seg1
3929*0Sstevel@tonic-gate	nop
3930*0Sstevel@tonic-gate	ba,pt	 %ncc, copyin_seg0
3931*0Sstevel@tonic-gate	nop
3932*0Sstevel@tonic-gatecopyin_sqtr:
3933*0Sstevel@tonic-gate	be,a	 copyin_seg2
3934*0Sstevel@tonic-gate	nop
3935*0Sstevel@tonic-gate	ba,pt	 %ncc, copyin_seg3
3936*0Sstevel@tonic-gate	nop
3937*0Sstevel@tonic-gate
3938*0Sstevel@tonic-gatecopyin_hlf:
3939*0Sstevel@tonic-gate	bgeu,a	 copyin_fqtr
3940*0Sstevel@tonic-gate	nop
3941*0Sstevel@tonic-gate	cmp	 %i5, 5
3942*0Sstevel@tonic-gate	be,a	 copyin_seg5
3943*0Sstevel@tonic-gate	nop
3944*0Sstevel@tonic-gate	ba,pt	 %ncc, copyin_seg4
3945*0Sstevel@tonic-gate	nop
3946*0Sstevel@tonic-gatecopyin_fqtr:
3947*0Sstevel@tonic-gate	be,a	 copyin_seg6
3948*0Sstevel@tonic-gate	nop
3949*0Sstevel@tonic-gate	ba,pt	 %ncc, copyin_seg7
3950*0Sstevel@tonic-gate	nop
3951*0Sstevel@tonic-gate
3952*0Sstevel@tonic-gatecopyin_seg0:
3953*0Sstevel@tonic-gate	! 1st chunk - %d0 low, %d16 high, %d32 pre, %d48 dst
3954*0Sstevel@tonic-gate	FALIGN_D0
3955*0Sstevel@tonic-gate	ldda	[%l7]ASI_BLK_AIUS, %d0
3956*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_P
3957*0Sstevel@tonic-gate	add	%l7, 64, %l7
3958*0Sstevel@tonic-gate	subcc	%i3, 64, %i3
3959*0Sstevel@tonic-gate	bz,pn	%ncc, 0f
3960*0Sstevel@tonic-gate	add	%i0, 64, %i0
3961*0Sstevel@tonic-gate	! 2nd chunk -  %d0 pre, %d16 low, %d32 high, %d48 dst
3962*0Sstevel@tonic-gate	FALIGN_D16
3963*0Sstevel@tonic-gate	ldda	[%l7]ASI_BLK_AIUS, %d16
3964*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_P
3965*0Sstevel@tonic-gate	add	%l7, 64, %l7
3966*0Sstevel@tonic-gate	subcc	%i3, 64, %i3
3967*0Sstevel@tonic-gate	bz,pn	%ncc, 1f
3968*0Sstevel@tonic-gate	add	%i0, 64, %i0
3969*0Sstevel@tonic-gate	! 3rd chunk -  %d0 high, %d16 pre, %d32 low, %d48 dst
3970*0Sstevel@tonic-gate	FALIGN_D32
3971*0Sstevel@tonic-gate	ldda	[%l7]ASI_BLK_AIUS, %d32
3972*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_P
3973*0Sstevel@tonic-gate	add	%l7, 64, %l7
3974*0Sstevel@tonic-gate	subcc	%i3, 64, %i3
3975*0Sstevel@tonic-gate	bz,pn	%ncc, 2f
3976*0Sstevel@tonic-gate	add	%i0, 64, %i0
3977*0Sstevel@tonic-gate	ba,a,pt	%ncc, copyin_seg0
3978*0Sstevel@tonic-gate
3979*0Sstevel@tonic-gate0:
3980*0Sstevel@tonic-gate	FALIGN_D16
3981*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_P
3982*0Sstevel@tonic-gate	add	%i0, 64, %i0
3983*0Sstevel@tonic-gate	membar	#Sync
3984*0Sstevel@tonic-gate	FALIGN_D32
3985*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_P
3986*0Sstevel@tonic-gate	ba,pt	%ncc, copyin_blkd0
3987*0Sstevel@tonic-gate	add	%i0, 64, %i0
3988*0Sstevel@tonic-gate
3989*0Sstevel@tonic-gate1:
3990*0Sstevel@tonic-gate	FALIGN_D32
3991*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_P
3992*0Sstevel@tonic-gate	add	%i0, 64, %i0
3993*0Sstevel@tonic-gate	membar	#Sync
3994*0Sstevel@tonic-gate	FALIGN_D0
3995*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_P
3996*0Sstevel@tonic-gate	ba,pt	%ncc, copyin_blkd16
3997*0Sstevel@tonic-gate	add	%i0, 64, %i0
3998*0Sstevel@tonic-gate
3999*0Sstevel@tonic-gate2:
4000*0Sstevel@tonic-gate	FALIGN_D0
4001*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_P
4002*0Sstevel@tonic-gate	add	%i0, 64, %i0
4003*0Sstevel@tonic-gate	membar	#Sync
4004*0Sstevel@tonic-gate	FALIGN_D16
4005*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_P
4006*0Sstevel@tonic-gate	ba,pt	%ncc, copyin_blkd32
4007*0Sstevel@tonic-gate	add	%i0, 64, %i0
4008*0Sstevel@tonic-gate
4009*0Sstevel@tonic-gatecopyin_seg1:
4010*0Sstevel@tonic-gate	! 1st chunk - %d0 low, %d16 high, %d32 pre, %d48 dst
4011*0Sstevel@tonic-gate	FALIGN_D2
4012*0Sstevel@tonic-gate	ldda	[%l7]ASI_BLK_AIUS, %d0
4013*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_P
4014*0Sstevel@tonic-gate	add	%l7, 64, %l7
4015*0Sstevel@tonic-gate	subcc	%i3, 64, %i3
4016*0Sstevel@tonic-gate	bz,pn	%ncc, 0f
4017*0Sstevel@tonic-gate	add	%i0, 64, %i0
4018*0Sstevel@tonic-gate	! 2nd chunk -  %d0 pre, %d16 low, %d32 high, %d48 dst
4019*0Sstevel@tonic-gate	FALIGN_D18
4020*0Sstevel@tonic-gate	ldda	[%l7]ASI_BLK_AIUS, %d16
4021*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_P
4022*0Sstevel@tonic-gate	add	%l7, 64, %l7
4023*0Sstevel@tonic-gate	subcc	%i3, 64, %i3
4024*0Sstevel@tonic-gate	bz,pn	%ncc, 1f
4025*0Sstevel@tonic-gate	add	%i0, 64, %i0
4026*0Sstevel@tonic-gate	! 3rd chunk -  %d0 high, %d16 pre, %d32 low, %d48 dst
4027*0Sstevel@tonic-gate	FALIGN_D34
4028*0Sstevel@tonic-gate	ldda	[%l7]ASI_BLK_AIUS, %d32
4029*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_P
4030*0Sstevel@tonic-gate	add	%l7, 64, %l7
4031*0Sstevel@tonic-gate	subcc	%i3, 64, %i3
4032*0Sstevel@tonic-gate	bz,pn	%ncc, 2f
4033*0Sstevel@tonic-gate	add	%i0, 64, %i0
4034*0Sstevel@tonic-gate	ba,a,pt	%ncc, copyin_seg1
4035*0Sstevel@tonic-gate0:
4036*0Sstevel@tonic-gate	FALIGN_D18
4037*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_P
4038*0Sstevel@tonic-gate	add	%i0, 64, %i0
4039*0Sstevel@tonic-gate	membar	#Sync
4040*0Sstevel@tonic-gate	FALIGN_D34
4041*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_P
4042*0Sstevel@tonic-gate	ba,pt	%ncc, copyin_blkd2
4043*0Sstevel@tonic-gate	add	%i0, 64, %i0
4044*0Sstevel@tonic-gate
4045*0Sstevel@tonic-gate1:
4046*0Sstevel@tonic-gate	FALIGN_D34
4047*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_P
4048*0Sstevel@tonic-gate	add	%i0, 64, %i0
4049*0Sstevel@tonic-gate	membar	#Sync
4050*0Sstevel@tonic-gate	FALIGN_D2
4051*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_P
4052*0Sstevel@tonic-gate	ba,pt	%ncc, copyin_blkd18
4053*0Sstevel@tonic-gate	add	%i0, 64, %i0
4054*0Sstevel@tonic-gate
4055*0Sstevel@tonic-gate2:
4056*0Sstevel@tonic-gate	FALIGN_D2
4057*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_P
4058*0Sstevel@tonic-gate	add	%i0, 64, %i0
4059*0Sstevel@tonic-gate	membar	#Sync
4060*0Sstevel@tonic-gate	FALIGN_D18
4061*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_P
4062*0Sstevel@tonic-gate	ba,pt	%ncc, copyin_blkd34
4063*0Sstevel@tonic-gate	add	%i0, 64, %i0
4064*0Sstevel@tonic-gatecopyin_seg2:
4065*0Sstevel@tonic-gate	! 1st chunk - %d0 low, %d16 high, %d32 pre, %d48 dst
4066*0Sstevel@tonic-gate	FALIGN_D4
4067*0Sstevel@tonic-gate	ldda	[%l7]ASI_BLK_AIUS, %d0
4068*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_P
4069*0Sstevel@tonic-gate	add	%l7, 64, %l7
4070*0Sstevel@tonic-gate	subcc	%i3, 64, %i3
4071*0Sstevel@tonic-gate	bz,pn	%ncc, 0f
4072*0Sstevel@tonic-gate	add	%i0, 64, %i0
4073*0Sstevel@tonic-gate	! 2nd chunk -  %d0 pre, %d16 low, %d32 high, %d48 dst
4074*0Sstevel@tonic-gate	FALIGN_D20
4075*0Sstevel@tonic-gate	ldda	[%l7]ASI_BLK_AIUS, %d16
4076*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_P
4077*0Sstevel@tonic-gate	add	%l7, 64, %l7
4078*0Sstevel@tonic-gate	subcc	%i3, 64, %i3
4079*0Sstevel@tonic-gate	bz,pn	%ncc, 1f
4080*0Sstevel@tonic-gate	add	%i0, 64, %i0
4081*0Sstevel@tonic-gate	! 3rd chunk -  %d0 high, %d16 pre, %d32 low, %d48 dst
4082*0Sstevel@tonic-gate	FALIGN_D36
4083*0Sstevel@tonic-gate	ldda	[%l7]ASI_BLK_AIUS, %d32
4084*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_P
4085*0Sstevel@tonic-gate	add	%l7, 64, %l7
4086*0Sstevel@tonic-gate	subcc	%i3, 64, %i3
4087*0Sstevel@tonic-gate	bz,pn	%ncc, 2f
4088*0Sstevel@tonic-gate	add	%i0, 64, %i0
4089*0Sstevel@tonic-gate	ba,a,pt	%ncc, copyin_seg2
4090*0Sstevel@tonic-gate
4091*0Sstevel@tonic-gate0:
4092*0Sstevel@tonic-gate	FALIGN_D20
4093*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_P
4094*0Sstevel@tonic-gate	add	%i0, 64, %i0
4095*0Sstevel@tonic-gate	membar	#Sync
4096*0Sstevel@tonic-gate	FALIGN_D36
4097*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_P
4098*0Sstevel@tonic-gate	ba,pt	%ncc, copyin_blkd4
4099*0Sstevel@tonic-gate	add	%i0, 64, %i0
4100*0Sstevel@tonic-gate
4101*0Sstevel@tonic-gate1:
4102*0Sstevel@tonic-gate	FALIGN_D36
4103*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_P
4104*0Sstevel@tonic-gate	add	%i0, 64, %i0
4105*0Sstevel@tonic-gate	membar	#Sync
4106*0Sstevel@tonic-gate	FALIGN_D4
4107*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_P
4108*0Sstevel@tonic-gate	ba,pt	%ncc, copyin_blkd20
4109*0Sstevel@tonic-gate	add	%i0, 64, %i0
4110*0Sstevel@tonic-gate
4111*0Sstevel@tonic-gate2:
4112*0Sstevel@tonic-gate	FALIGN_D4
4113*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_P
4114*0Sstevel@tonic-gate	add	%i0, 64, %i0
4115*0Sstevel@tonic-gate	membar	#Sync
4116*0Sstevel@tonic-gate	FALIGN_D20
4117*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_P
4118*0Sstevel@tonic-gate	ba,pt	%ncc, copyin_blkd36
4119*0Sstevel@tonic-gate	add	%i0, 64, %i0
4120*0Sstevel@tonic-gate
4121*0Sstevel@tonic-gatecopyin_seg3:
4122*0Sstevel@tonic-gate	! 1st chunk - %d0 low, %d16 high, %d32 pre, %d48 dst
4123*0Sstevel@tonic-gate	FALIGN_D6
4124*0Sstevel@tonic-gate	ldda	[%l7]ASI_BLK_AIUS, %d0
4125*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_P
4126*0Sstevel@tonic-gate	add	%l7, 64, %l7
4127*0Sstevel@tonic-gate	subcc	%i3, 64, %i3
4128*0Sstevel@tonic-gate	bz,pn	%ncc, 0f
4129*0Sstevel@tonic-gate	add	%i0, 64, %i0
4130*0Sstevel@tonic-gate	! 2nd chunk -  %d0 pre, %d16 low, %d32 high, %d48 dst
4131*0Sstevel@tonic-gate	FALIGN_D22
4132*0Sstevel@tonic-gate	ldda	[%l7]ASI_BLK_AIUS, %d16
4133*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_P
4134*0Sstevel@tonic-gate	add	%l7, 64, %l7
4135*0Sstevel@tonic-gate	subcc	%i3, 64, %i3
4136*0Sstevel@tonic-gate	bz,pn	%ncc, 1f
4137*0Sstevel@tonic-gate	add	%i0, 64, %i0
4138*0Sstevel@tonic-gate	! 3rd chunk -  %d0 high, %d16 pre, %d32 low, %d48 dst
4139*0Sstevel@tonic-gate	FALIGN_D38
4140*0Sstevel@tonic-gate	ldda	[%l7]ASI_BLK_AIUS, %d32
4141*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_P
4142*0Sstevel@tonic-gate	add	%l7, 64, %l7
4143*0Sstevel@tonic-gate	subcc	%i3, 64, %i3
4144*0Sstevel@tonic-gate	bz,pn	%ncc, 2f
4145*0Sstevel@tonic-gate	add	%i0, 64, %i0
4146*0Sstevel@tonic-gate	ba,a,pt	%ncc, copyin_seg3
4147*0Sstevel@tonic-gate
4148*0Sstevel@tonic-gate0:
4149*0Sstevel@tonic-gate	FALIGN_D22
4150*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_P
4151*0Sstevel@tonic-gate	add	%i0, 64, %i0
4152*0Sstevel@tonic-gate	membar	#Sync
4153*0Sstevel@tonic-gate	FALIGN_D38
4154*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_P
4155*0Sstevel@tonic-gate	ba,pt	%ncc, copyin_blkd6
4156*0Sstevel@tonic-gate	add	%i0, 64, %i0
4157*0Sstevel@tonic-gate
4158*0Sstevel@tonic-gate1:
4159*0Sstevel@tonic-gate	FALIGN_D38
4160*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_P
4161*0Sstevel@tonic-gate	add	%i0, 64, %i0
4162*0Sstevel@tonic-gate	membar	#Sync
4163*0Sstevel@tonic-gate	FALIGN_D6
4164*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_P
4165*0Sstevel@tonic-gate	ba,pt	%ncc, copyin_blkd22
4166*0Sstevel@tonic-gate	add	%i0, 64, %i0
4167*0Sstevel@tonic-gate
4168*0Sstevel@tonic-gate2:
4169*0Sstevel@tonic-gate	FALIGN_D6
4170*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_P
4171*0Sstevel@tonic-gate	add	%i0, 64, %i0
4172*0Sstevel@tonic-gate	membar	#Sync
4173*0Sstevel@tonic-gate	FALIGN_D22
4174*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_P
4175*0Sstevel@tonic-gate	ba,pt	%ncc, copyin_blkd38
4176*0Sstevel@tonic-gate	add	%i0, 64, %i0
4177*0Sstevel@tonic-gate
4178*0Sstevel@tonic-gatecopyin_seg4:
4179*0Sstevel@tonic-gate	! 1st chunk - %d0 low, %d16 high, %d32 pre, %d48 dst
4180*0Sstevel@tonic-gate	FALIGN_D8
4181*0Sstevel@tonic-gate	ldda	[%l7]ASI_BLK_AIUS, %d0
4182*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_P
4183*0Sstevel@tonic-gate	add	%l7, 64, %l7
4184*0Sstevel@tonic-gate	subcc	%i3, 64, %i3
4185*0Sstevel@tonic-gate	bz,pn	%ncc, 0f
4186*0Sstevel@tonic-gate	add	%i0, 64, %i0
4187*0Sstevel@tonic-gate	! 2nd chunk -  %d0 pre, %d16 low, %d32 high, %d48 dst
4188*0Sstevel@tonic-gate	FALIGN_D24
4189*0Sstevel@tonic-gate	ldda	[%l7]ASI_BLK_AIUS, %d16
4190*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_P
4191*0Sstevel@tonic-gate	add	%l7, 64, %l7
4192*0Sstevel@tonic-gate	subcc	%i3, 64, %i3
4193*0Sstevel@tonic-gate	bz,pn	%ncc, 1f
4194*0Sstevel@tonic-gate	add	%i0, 64, %i0
4195*0Sstevel@tonic-gate	! 3rd chunk -  %d0 high, %d16 pre, %d32 low, %d48 dst
4196*0Sstevel@tonic-gate	FALIGN_D40
4197*0Sstevel@tonic-gate	ldda	[%l7]ASI_BLK_AIUS, %d32
4198*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_P
4199*0Sstevel@tonic-gate	add	%l7, 64, %l7
4200*0Sstevel@tonic-gate	subcc	%i3, 64, %i3
4201*0Sstevel@tonic-gate	bz,pn	%ncc, 2f
4202*0Sstevel@tonic-gate	add	%i0, 64, %i0
4203*0Sstevel@tonic-gate	ba,a,pt	%ncc, copyin_seg4
4204*0Sstevel@tonic-gate
4205*0Sstevel@tonic-gate0:
4206*0Sstevel@tonic-gate	FALIGN_D24
4207*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_P
4208*0Sstevel@tonic-gate	add	%i0, 64, %i0
4209*0Sstevel@tonic-gate	membar	#Sync
4210*0Sstevel@tonic-gate	FALIGN_D40
4211*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_P
4212*0Sstevel@tonic-gate	ba,pt	%ncc, copyin_blkd8
4213*0Sstevel@tonic-gate	add	%i0, 64, %i0
4214*0Sstevel@tonic-gate
4215*0Sstevel@tonic-gate1:
4216*0Sstevel@tonic-gate	FALIGN_D40
4217*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_P
4218*0Sstevel@tonic-gate	add	%i0, 64, %i0
4219*0Sstevel@tonic-gate	membar	#Sync
4220*0Sstevel@tonic-gate	FALIGN_D8
4221*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_P
4222*0Sstevel@tonic-gate	ba,pt	%ncc, copyin_blkd24
4223*0Sstevel@tonic-gate	add	%i0, 64, %i0
4224*0Sstevel@tonic-gate
4225*0Sstevel@tonic-gate2:
4226*0Sstevel@tonic-gate	FALIGN_D8
4227*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_P
4228*0Sstevel@tonic-gate	add	%i0, 64, %i0
4229*0Sstevel@tonic-gate	membar	#Sync
4230*0Sstevel@tonic-gate	FALIGN_D24
4231*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_P
4232*0Sstevel@tonic-gate	ba,pt	%ncc, copyin_blkd40
4233*0Sstevel@tonic-gate	add	%i0, 64, %i0
4234*0Sstevel@tonic-gate
4235*0Sstevel@tonic-gatecopyin_seg5:
4236*0Sstevel@tonic-gate	! 1st chunk - %d0 low, %d16 high, %d32 pre, %d48 dst
4237*0Sstevel@tonic-gate	FALIGN_D10
4238*0Sstevel@tonic-gate	ldda	[%l7]ASI_BLK_AIUS, %d0
4239*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_P
4240*0Sstevel@tonic-gate	add	%l7, 64, %l7
4241*0Sstevel@tonic-gate	subcc	%i3, 64, %i3
4242*0Sstevel@tonic-gate	bz,pn	%ncc, 0f
4243*0Sstevel@tonic-gate	add	%i0, 64, %i0
4244*0Sstevel@tonic-gate	! 2nd chunk -  %d0 pre, %d16 low, %d32 high, %d48 dst
4245*0Sstevel@tonic-gate	FALIGN_D26
4246*0Sstevel@tonic-gate	ldda	[%l7]ASI_BLK_AIUS, %d16
4247*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_P
4248*0Sstevel@tonic-gate	add	%l7, 64, %l7
4249*0Sstevel@tonic-gate	subcc	%i3, 64, %i3
4250*0Sstevel@tonic-gate	bz,pn	%ncc, 1f
4251*0Sstevel@tonic-gate	add	%i0, 64, %i0
4252*0Sstevel@tonic-gate	! 3rd chunk -  %d0 high, %d16 pre, %d32 low, %d48 dst
4253*0Sstevel@tonic-gate	FALIGN_D42
4254*0Sstevel@tonic-gate	ldda	[%l7]ASI_BLK_AIUS, %d32
4255*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_P
4256*0Sstevel@tonic-gate	add	%l7, 64, %l7
4257*0Sstevel@tonic-gate	subcc	%i3, 64, %i3
4258*0Sstevel@tonic-gate	bz,pn	%ncc, 2f
4259*0Sstevel@tonic-gate	add	%i0, 64, %i0
4260*0Sstevel@tonic-gate	ba,a,pt	%ncc, copyin_seg5
4261*0Sstevel@tonic-gate
4262*0Sstevel@tonic-gate0:
4263*0Sstevel@tonic-gate	FALIGN_D26
4264*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_P
4265*0Sstevel@tonic-gate	add	%i0, 64, %i0
4266*0Sstevel@tonic-gate	membar	#Sync
4267*0Sstevel@tonic-gate	FALIGN_D42
4268*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_P
4269*0Sstevel@tonic-gate	ba,pt	%ncc, copyin_blkd10
4270*0Sstevel@tonic-gate	add	%i0, 64, %i0
4271*0Sstevel@tonic-gate
4272*0Sstevel@tonic-gate1:
4273*0Sstevel@tonic-gate	FALIGN_D42
4274*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_P
4275*0Sstevel@tonic-gate	add	%i0, 64, %i0
4276*0Sstevel@tonic-gate	membar	#Sync
4277*0Sstevel@tonic-gate	FALIGN_D10
4278*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_P
4279*0Sstevel@tonic-gate	ba,pt	%ncc, copyin_blkd26
4280*0Sstevel@tonic-gate	add	%i0, 64, %i0
4281*0Sstevel@tonic-gate
4282*0Sstevel@tonic-gate2:
4283*0Sstevel@tonic-gate	FALIGN_D10
4284*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_P
4285*0Sstevel@tonic-gate	add	%i0, 64, %i0
4286*0Sstevel@tonic-gate	membar	#Sync
4287*0Sstevel@tonic-gate	FALIGN_D26
4288*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_P
4289*0Sstevel@tonic-gate	ba,pt	%ncc, copyin_blkd42
4290*0Sstevel@tonic-gate	add	%i0, 64, %i0
4291*0Sstevel@tonic-gate
4292*0Sstevel@tonic-gatecopyin_seg6:
4293*0Sstevel@tonic-gate	! 1st chunk - %d0 low, %d16 high, %d32 pre, %d48 dst
4294*0Sstevel@tonic-gate	FALIGN_D12
4295*0Sstevel@tonic-gate	ldda	[%l7]ASI_BLK_AIUS, %d0
4296*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_P
4297*0Sstevel@tonic-gate	add	%l7, 64, %l7
4298*0Sstevel@tonic-gate	subcc	%i3, 64, %i3
4299*0Sstevel@tonic-gate	bz,pn	%ncc, 0f
4300*0Sstevel@tonic-gate	add	%i0, 64, %i0
4301*0Sstevel@tonic-gate	! 2nd chunk -  %d0 pre, %d16 low, %d32 high, %d48 dst
4302*0Sstevel@tonic-gate	FALIGN_D28
4303*0Sstevel@tonic-gate	ldda	[%l7]ASI_BLK_AIUS, %d16
4304*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_P
4305*0Sstevel@tonic-gate	add	%l7, 64, %l7
4306*0Sstevel@tonic-gate	subcc	%i3, 64, %i3
4307*0Sstevel@tonic-gate	bz,pn	%ncc, 1f
4308*0Sstevel@tonic-gate	add	%i0, 64, %i0
4309*0Sstevel@tonic-gate	! 3rd chunk -  %d0 high, %d16 pre, %d32 low, %d48 dst
4310*0Sstevel@tonic-gate	FALIGN_D44
4311*0Sstevel@tonic-gate	ldda	[%l7]ASI_BLK_AIUS, %d32
4312*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_P
4313*0Sstevel@tonic-gate	add	%l7, 64, %l7
4314*0Sstevel@tonic-gate	subcc	%i3, 64, %i3
4315*0Sstevel@tonic-gate	bz,pn	%ncc, 2f
4316*0Sstevel@tonic-gate	add	%i0, 64, %i0
4317*0Sstevel@tonic-gate	ba,a,pt	%ncc, copyin_seg6
4318*0Sstevel@tonic-gate
4319*0Sstevel@tonic-gate0:
4320*0Sstevel@tonic-gate	FALIGN_D28
4321*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_P
4322*0Sstevel@tonic-gate	add	%i0, 64, %i0
4323*0Sstevel@tonic-gate	membar	#Sync
4324*0Sstevel@tonic-gate	FALIGN_D44
4325*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_P
4326*0Sstevel@tonic-gate	ba,pt	%ncc, copyin_blkd12
4327*0Sstevel@tonic-gate	add	%i0, 64, %i0
4328*0Sstevel@tonic-gate
4329*0Sstevel@tonic-gate1:
4330*0Sstevel@tonic-gate	FALIGN_D44
4331*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_P
4332*0Sstevel@tonic-gate	add	%i0, 64, %i0
4333*0Sstevel@tonic-gate	membar	#Sync
4334*0Sstevel@tonic-gate	FALIGN_D12
4335*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_P
4336*0Sstevel@tonic-gate	ba,pt	%ncc, copyin_blkd28
4337*0Sstevel@tonic-gate	add	%i0, 64, %i0
4338*0Sstevel@tonic-gate
4339*0Sstevel@tonic-gate2:
4340*0Sstevel@tonic-gate	FALIGN_D12
4341*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_P
4342*0Sstevel@tonic-gate	add	%i0, 64, %i0
4343*0Sstevel@tonic-gate	membar	#Sync
4344*0Sstevel@tonic-gate	FALIGN_D28
4345*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_P
4346*0Sstevel@tonic-gate	ba,pt	%ncc, copyin_blkd44
4347*0Sstevel@tonic-gate	add	%i0, 64, %i0
4348*0Sstevel@tonic-gate
4349*0Sstevel@tonic-gatecopyin_seg7:
4350*0Sstevel@tonic-gate	! 1st chunk - %d0 low, %d16 high, %d32 pre, %d48 dst
4351*0Sstevel@tonic-gate	FALIGN_D14
4352*0Sstevel@tonic-gate	ldda	[%l7]ASI_BLK_AIUS, %d0
4353*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_P
4354*0Sstevel@tonic-gate	add	%l7, 64, %l7
4355*0Sstevel@tonic-gate	subcc	%i3, 64, %i3
4356*0Sstevel@tonic-gate	bz,pn	%ncc, 0f
4357*0Sstevel@tonic-gate	add	%i0, 64, %i0
4358*0Sstevel@tonic-gate	! 2nd chunk -  %d0 pre, %d16 low, %d32 high, %d48 dst
4359*0Sstevel@tonic-gate	FALIGN_D30
4360*0Sstevel@tonic-gate	ldda	[%l7]ASI_BLK_AIUS, %d16
4361*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_P
4362*0Sstevel@tonic-gate	add	%l7, 64, %l7
4363*0Sstevel@tonic-gate	subcc	%i3, 64, %i3
4364*0Sstevel@tonic-gate	bz,pn	%ncc, 1f
4365*0Sstevel@tonic-gate	add	%i0, 64, %i0
4366*0Sstevel@tonic-gate	! 3rd chunk -  %d0 high, %d16 pre, %d32 low, %d48 dst
4367*0Sstevel@tonic-gate	FALIGN_D46
4368*0Sstevel@tonic-gate	ldda	[%l7]ASI_BLK_AIUS, %d32
4369*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_P
4370*0Sstevel@tonic-gate	add	%l7, 64, %l7
4371*0Sstevel@tonic-gate	subcc	%i3, 64, %i3
4372*0Sstevel@tonic-gate	bz,pn	%ncc, 2f
4373*0Sstevel@tonic-gate	add	%i0, 64, %i0
4374*0Sstevel@tonic-gate	ba,a,pt	%ncc, copyin_seg7
4375*0Sstevel@tonic-gate
4376*0Sstevel@tonic-gate0:
4377*0Sstevel@tonic-gate	FALIGN_D30
4378*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_P
4379*0Sstevel@tonic-gate	add	%i0, 64, %i0
4380*0Sstevel@tonic-gate	membar	#Sync
4381*0Sstevel@tonic-gate	FALIGN_D46
4382*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_P
4383*0Sstevel@tonic-gate	ba,pt	%ncc, copyin_blkd14
4384*0Sstevel@tonic-gate	add	%i0, 64, %i0
4385*0Sstevel@tonic-gate
4386*0Sstevel@tonic-gate1:
4387*0Sstevel@tonic-gate	FALIGN_D46
4388*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_P
4389*0Sstevel@tonic-gate	add	%i0, 64, %i0
4390*0Sstevel@tonic-gate	membar	#Sync
4391*0Sstevel@tonic-gate	FALIGN_D14
4392*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_P
4393*0Sstevel@tonic-gate	ba,pt	%ncc, copyin_blkd30
4394*0Sstevel@tonic-gate	add	%i0, 64, %i0
4395*0Sstevel@tonic-gate
4396*0Sstevel@tonic-gate2:
4397*0Sstevel@tonic-gate	FALIGN_D14
4398*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_P
4399*0Sstevel@tonic-gate	add	%i0, 64, %i0
4400*0Sstevel@tonic-gate	membar	#Sync
4401*0Sstevel@tonic-gate	FALIGN_D30
4402*0Sstevel@tonic-gate	stda	%d48, [%i0]ASI_BLK_P
4403*0Sstevel@tonic-gate	ba,pt	%ncc, copyin_blkd46
4404*0Sstevel@tonic-gate	add	%i0, 64, %i0
4405*0Sstevel@tonic-gate
4406*0Sstevel@tonic-gate
4407*0Sstevel@tonic-gate	!
4408*0Sstevel@tonic-gate	! dribble out the last partial block
4409*0Sstevel@tonic-gate	!
4410*0Sstevel@tonic-gatecopyin_blkd0:
4411*0Sstevel@tonic-gate	subcc	%i4, 8, %i4
4412*0Sstevel@tonic-gate	blu,pn	%ncc, copyin_blkdone
4413*0Sstevel@tonic-gate	faligndata %d0, %d2, %d48
4414*0Sstevel@tonic-gate	std	%d48, [%i0]
4415*0Sstevel@tonic-gate	add	%i0, 8, %i0
4416*0Sstevel@tonic-gatecopyin_blkd2:
4417*0Sstevel@tonic-gate	subcc	%i4, 8, %i4
4418*0Sstevel@tonic-gate	blu,pn	%ncc, copyin_blkdone
4419*0Sstevel@tonic-gate	faligndata %d2, %d4, %d48
4420*0Sstevel@tonic-gate	std	%d48, [%i0]
4421*0Sstevel@tonic-gate	add	%i0, 8, %i0
4422*0Sstevel@tonic-gatecopyin_blkd4:
4423*0Sstevel@tonic-gate	subcc	%i4, 8, %i4
4424*0Sstevel@tonic-gate	blu,pn	%ncc, copyin_blkdone
4425*0Sstevel@tonic-gate	faligndata %d4, %d6, %d48
4426*0Sstevel@tonic-gate	std	%d48, [%i0]
4427*0Sstevel@tonic-gate	add	%i0, 8, %i0
4428*0Sstevel@tonic-gatecopyin_blkd6:
4429*0Sstevel@tonic-gate	subcc	%i4, 8, %i4
4430*0Sstevel@tonic-gate	blu,pn	%ncc, copyin_blkdone
4431*0Sstevel@tonic-gate	faligndata %d6, %d8, %d48
4432*0Sstevel@tonic-gate	std	%d48, [%i0]
4433*0Sstevel@tonic-gate	add	%i0, 8, %i0
4434*0Sstevel@tonic-gatecopyin_blkd8:
4435*0Sstevel@tonic-gate	subcc	%i4, 8, %i4
4436*0Sstevel@tonic-gate	blu,pn	%ncc, copyin_blkdone
4437*0Sstevel@tonic-gate	faligndata %d8, %d10, %d48
4438*0Sstevel@tonic-gate	std	%d48, [%i0]
4439*0Sstevel@tonic-gate	add	%i0, 8, %i0
4440*0Sstevel@tonic-gatecopyin_blkd10:
4441*0Sstevel@tonic-gate	subcc	%i4, 8, %i4
4442*0Sstevel@tonic-gate	blu,pn	%ncc, copyin_blkdone
4443*0Sstevel@tonic-gate	faligndata %d10, %d12, %d48
4444*0Sstevel@tonic-gate	std	%d48, [%i0]
4445*0Sstevel@tonic-gate	add	%i0, 8, %i0
4446*0Sstevel@tonic-gatecopyin_blkd12:
4447*0Sstevel@tonic-gate	subcc	%i4, 8, %i4
4448*0Sstevel@tonic-gate	blu,pn	%ncc, copyin_blkdone
4449*0Sstevel@tonic-gate	faligndata %d12, %d14, %d48
4450*0Sstevel@tonic-gate	std	%d48, [%i0]
4451*0Sstevel@tonic-gate	add	%i0, 8, %i0
4452*0Sstevel@tonic-gatecopyin_blkd14:
4453*0Sstevel@tonic-gate	subcc	%i4, 8, %i4
4454*0Sstevel@tonic-gate	blu,pn	%ncc, copyin_blkdone
4455*0Sstevel@tonic-gate	fsrc1	%d14, %d0
4456*0Sstevel@tonic-gate	ba,a,pt	%ncc, copyin_blkleft
4457*0Sstevel@tonic-gate
4458*0Sstevel@tonic-gatecopyin_blkd16:
4459*0Sstevel@tonic-gate	subcc	%i4, 8, %i4
4460*0Sstevel@tonic-gate	blu,pn	%ncc, copyin_blkdone
4461*0Sstevel@tonic-gate	faligndata %d16, %d18, %d48
4462*0Sstevel@tonic-gate	std	%d48, [%i0]
4463*0Sstevel@tonic-gate	add	%i0, 8, %i0
4464*0Sstevel@tonic-gatecopyin_blkd18:
4465*0Sstevel@tonic-gate	subcc	%i4, 8, %i4
4466*0Sstevel@tonic-gate	blu,pn	%ncc, copyin_blkdone
4467*0Sstevel@tonic-gate	faligndata %d18, %d20, %d48
4468*0Sstevel@tonic-gate	std	%d48, [%i0]
4469*0Sstevel@tonic-gate	add	%i0, 8, %i0
4470*0Sstevel@tonic-gatecopyin_blkd20:
4471*0Sstevel@tonic-gate	subcc	%i4, 8, %i4
4472*0Sstevel@tonic-gate	blu,pn	%ncc, copyin_blkdone
4473*0Sstevel@tonic-gate	faligndata %d20, %d22, %d48
4474*0Sstevel@tonic-gate	std	%d48, [%i0]
4475*0Sstevel@tonic-gate	add	%i0, 8, %i0
4476*0Sstevel@tonic-gatecopyin_blkd22:
4477*0Sstevel@tonic-gate	subcc	%i4, 8, %i4
4478*0Sstevel@tonic-gate	blu,pn	%ncc, copyin_blkdone
4479*0Sstevel@tonic-gate	faligndata %d22, %d24, %d48
4480*0Sstevel@tonic-gate	std	%d48, [%i0]
4481*0Sstevel@tonic-gate	add	%i0, 8, %i0
4482*0Sstevel@tonic-gatecopyin_blkd24:
4483*0Sstevel@tonic-gate	subcc	%i4, 8, %i4
4484*0Sstevel@tonic-gate	blu,pn	%ncc, copyin_blkdone
4485*0Sstevel@tonic-gate	faligndata %d24, %d26, %d48
4486*0Sstevel@tonic-gate	std	%d48, [%i0]
4487*0Sstevel@tonic-gate	add	%i0, 8, %i0
4488*0Sstevel@tonic-gatecopyin_blkd26:
4489*0Sstevel@tonic-gate	subcc	%i4, 8, %i4
4490*0Sstevel@tonic-gate	blu,pn	%ncc, copyin_blkdone
4491*0Sstevel@tonic-gate	faligndata %d26, %d28, %d48
4492*0Sstevel@tonic-gate	std	%d48, [%i0]
4493*0Sstevel@tonic-gate	add	%i0, 8, %i0
4494*0Sstevel@tonic-gatecopyin_blkd28:
4495*0Sstevel@tonic-gate	subcc	%i4, 8, %i4
4496*0Sstevel@tonic-gate	blu,pn	%ncc, copyin_blkdone
4497*0Sstevel@tonic-gate	faligndata %d28, %d30, %d48
4498*0Sstevel@tonic-gate	std	%d48, [%i0]
4499*0Sstevel@tonic-gate	add	%i0, 8, %i0
4500*0Sstevel@tonic-gatecopyin_blkd30:
4501*0Sstevel@tonic-gate	subcc	%i4, 8, %i4
4502*0Sstevel@tonic-gate	blu,pn	%ncc, copyin_blkdone
4503*0Sstevel@tonic-gate	fsrc1	%d30, %d0
4504*0Sstevel@tonic-gate	ba,a,pt	%ncc, copyin_blkleft
4505*0Sstevel@tonic-gatecopyin_blkd32:
4506*0Sstevel@tonic-gate	subcc	%i4, 8, %i4
4507*0Sstevel@tonic-gate	blu,pn	%ncc, copyin_blkdone
4508*0Sstevel@tonic-gate	faligndata %d32, %d34, %d48
4509*0Sstevel@tonic-gate	std	%d48, [%i0]
4510*0Sstevel@tonic-gate	add	%i0, 8, %i0
4511*0Sstevel@tonic-gatecopyin_blkd34:
4512*0Sstevel@tonic-gate	subcc	%i4, 8, %i4
4513*0Sstevel@tonic-gate	blu,pn	%ncc, copyin_blkdone
4514*0Sstevel@tonic-gate	faligndata %d34, %d36, %d48
4515*0Sstevel@tonic-gate	std	%d48, [%i0]
4516*0Sstevel@tonic-gate	add	%i0, 8, %i0
4517*0Sstevel@tonic-gatecopyin_blkd36:
4518*0Sstevel@tonic-gate	subcc	%i4, 8, %i4
4519*0Sstevel@tonic-gate	blu,pn	%ncc, copyin_blkdone
4520*0Sstevel@tonic-gate	faligndata %d36, %d38, %d48
4521*0Sstevel@tonic-gate	std	%d48, [%i0]
4522*0Sstevel@tonic-gate	add	%i0, 8, %i0
4523*0Sstevel@tonic-gatecopyin_blkd38:
4524*0Sstevel@tonic-gate	subcc	%i4, 8, %i4
4525*0Sstevel@tonic-gate	blu,pn	%ncc, copyin_blkdone
4526*0Sstevel@tonic-gate	faligndata %d38, %d40, %d48
4527*0Sstevel@tonic-gate	std	%d48, [%i0]
4528*0Sstevel@tonic-gate	add	%i0, 8, %i0
4529*0Sstevel@tonic-gatecopyin_blkd40:
4530*0Sstevel@tonic-gate	subcc	%i4, 8, %i4
4531*0Sstevel@tonic-gate	blu,pn	%ncc, copyin_blkdone
4532*0Sstevel@tonic-gate	faligndata %d40, %d42, %d48
4533*0Sstevel@tonic-gate	std	%d48, [%i0]
4534*0Sstevel@tonic-gate	add	%i0, 8, %i0
4535*0Sstevel@tonic-gatecopyin_blkd42:
4536*0Sstevel@tonic-gate	subcc	%i4, 8, %i4
4537*0Sstevel@tonic-gate	blu,pn	%ncc, copyin_blkdone
4538*0Sstevel@tonic-gate	faligndata %d42, %d44, %d48
4539*0Sstevel@tonic-gate	std	%d48, [%i0]
4540*0Sstevel@tonic-gate	add	%i0, 8, %i0
4541*0Sstevel@tonic-gatecopyin_blkd44:
4542*0Sstevel@tonic-gate	subcc	%i4, 8, %i4
4543*0Sstevel@tonic-gate	blu,pn	%ncc, copyin_blkdone
4544*0Sstevel@tonic-gate	faligndata %d44, %d46, %d48
4545*0Sstevel@tonic-gate	std	%d48, [%i0]
4546*0Sstevel@tonic-gate	add	%i0, 8, %i0
4547*0Sstevel@tonic-gatecopyin_blkd46:
4548*0Sstevel@tonic-gate	subcc	%i4, 8, %i4
4549*0Sstevel@tonic-gate	blu,pn	%ncc, copyin_blkdone
4550*0Sstevel@tonic-gate	fsrc1	%d46, %d0
4551*0Sstevel@tonic-gate
4552*0Sstevel@tonic-gatecopyin_blkleft:
4553*0Sstevel@tonic-gate1:
4554*0Sstevel@tonic-gate	ldda	[%l7]ASI_USER, %d2
4555*0Sstevel@tonic-gate	add	%l7, 8, %l7
4556*0Sstevel@tonic-gate	subcc	%i4, 8, %i4
4557*0Sstevel@tonic-gate	faligndata %d0, %d2, %d8
4558*0Sstevel@tonic-gate	std	%d8, [%i0]
4559*0Sstevel@tonic-gate	blu,pn	%ncc, copyin_blkdone
4560*0Sstevel@tonic-gate	add	%i0, 8, %i0
4561*0Sstevel@tonic-gate	ldda	[%l7]ASI_USER, %d0
4562*0Sstevel@tonic-gate	add	%l7, 8, %l7
4563*0Sstevel@tonic-gate	subcc	%i4, 8, %i4
4564*0Sstevel@tonic-gate	faligndata %d2, %d0, %d8
4565*0Sstevel@tonic-gate	std	%d8, [%i0]
4566*0Sstevel@tonic-gate	bgeu,pt	%ncc, 1b
4567*0Sstevel@tonic-gate	add	%i0, 8, %i0
4568*0Sstevel@tonic-gate
4569*0Sstevel@tonic-gatecopyin_blkdone:
4570*0Sstevel@tonic-gate	tst	%i2
4571*0Sstevel@tonic-gate	bz,pt	%ncc, .copyin_exit
4572*0Sstevel@tonic-gate	and	%l3, 0x4, %l3		! fprs.du = fprs.dl = 0
4573*0Sstevel@tonic-gate
4574*0Sstevel@tonic-gate7:	lduba	[%i1]ASI_USER, %i4
4575*0Sstevel@tonic-gate	inc	%i1
4576*0Sstevel@tonic-gate	inc	%i0
4577*0Sstevel@tonic-gate	deccc	%i2
4578*0Sstevel@tonic-gate	bgu	%ncc, 7b
4579*0Sstevel@tonic-gate	  stb	  %i4, [%i0 - 1]
4580*0Sstevel@tonic-gate
4581*0Sstevel@tonic-gate.copyin_exit:
4582*0Sstevel@tonic-gate	membar	#StoreLoad|#StoreStore
4583*0Sstevel@tonic-gate	btst	FPUSED_FLAG, SAVED_LOFAULT
4584*0Sstevel@tonic-gate	bz	%icc, 1f
4585*0Sstevel@tonic-gate	  nop
4586*0Sstevel@tonic-gate
4587*0Sstevel@tonic-gate	ld	[%fp + STACK_BIAS - SAVED_GSR_OFFSET], %o2	! restore gsr
4588*0Sstevel@tonic-gate	wr	%o2, 0, %gsr
4589*0Sstevel@tonic-gate
4590*0Sstevel@tonic-gate	ld	[%fp + STACK_BIAS - SAVED_FPRS_OFFSET], %o3
4591*0Sstevel@tonic-gate	btst	FPRS_FEF, %o3
4592*0Sstevel@tonic-gate	bz	%icc, 4f
4593*0Sstevel@tonic-gate	  nop
4594*0Sstevel@tonic-gate
4595*0Sstevel@tonic-gate	! restore fpregs from stack
4596*0Sstevel@tonic-gate	membar	#Sync
4597*0Sstevel@tonic-gate	add	%fp, STACK_BIAS - 257, %o2
4598*0Sstevel@tonic-gate	and	%o2, -64, %o2
4599*0Sstevel@tonic-gate	ldda	[%o2]ASI_BLK_P, %d0
4600*0Sstevel@tonic-gate	add	%o2, 64, %o2
4601*0Sstevel@tonic-gate	ldda	[%o2]ASI_BLK_P, %d16
4602*0Sstevel@tonic-gate	add	%o2, 64, %o2
4603*0Sstevel@tonic-gate	ldda	[%o2]ASI_BLK_P, %d32
4604*0Sstevel@tonic-gate	add	%o2, 64, %o2
4605*0Sstevel@tonic-gate	ldda	[%o2]ASI_BLK_P, %d48
4606*0Sstevel@tonic-gate	membar	#Sync
4607*0Sstevel@tonic-gate
4608*0Sstevel@tonic-gate	ba,pt	%ncc, 1f
4609*0Sstevel@tonic-gate	  wr	%o3, 0, %fprs		! restore fprs
4610*0Sstevel@tonic-gate
4611*0Sstevel@tonic-gate4:
4612*0Sstevel@tonic-gate	FZERO				! zero all of the fpregs
4613*0Sstevel@tonic-gate	wr	%o3, 0, %fprs		! restore fprs
4614*0Sstevel@tonic-gate
4615*0Sstevel@tonic-gate1:
4616*0Sstevel@tonic-gate	andn	SAVED_LOFAULT, FPUSED_FLAG, SAVED_LOFAULT
4617*0Sstevel@tonic-gate	membar	#Sync				! sync error barrier
4618*0Sstevel@tonic-gate	stn	SAVED_LOFAULT, [THREAD_REG + T_LOFAULT]	! restore old t_lofault
4619*0Sstevel@tonic-gate	ret
4620*0Sstevel@tonic-gate	restore	%g0, 0, %o0
4621*0Sstevel@tonic-gate.copyin_err:
4622*0Sstevel@tonic-gate	ldn	[THREAD_REG + T_COPYOPS], %o4
4623*0Sstevel@tonic-gate	brz	%o4, 2f
4624*0Sstevel@tonic-gate	nop
4625*0Sstevel@tonic-gate	ldn	[%o4 + CP_COPYIN], %g2
4626*0Sstevel@tonic-gate	jmp	%g2
4627*0Sstevel@tonic-gate	nop
4628*0Sstevel@tonic-gate2:
4629*0Sstevel@tonic-gate	retl
4630*0Sstevel@tonic-gate	mov	-1, %o0
4631*0Sstevel@tonic-gate	SET_SIZE(copyin)
4632*0Sstevel@tonic-gate
4633*0Sstevel@tonic-gate#endif	/* lint */
4634*0Sstevel@tonic-gate
4635*0Sstevel@tonic-gate#ifdef	lint
4636*0Sstevel@tonic-gate
4637*0Sstevel@tonic-gate/*ARGSUSED*/
4638*0Sstevel@tonic-gateint
4639*0Sstevel@tonic-gatexcopyin(const void *uaddr, void *kaddr, size_t count)
4640*0Sstevel@tonic-gate{ return (0); }
4641*0Sstevel@tonic-gate
4642*0Sstevel@tonic-gate#else	/* lint */
4643*0Sstevel@tonic-gate
4644*0Sstevel@tonic-gate	ENTRY(xcopyin)
4645*0Sstevel@tonic-gate	sethi	%hi(.xcopyin_err), REAL_LOFAULT
4646*0Sstevel@tonic-gate	b	.do_copyin
4647*0Sstevel@tonic-gate	  or	REAL_LOFAULT, %lo(.xcopyin_err), REAL_LOFAULT
4648*0Sstevel@tonic-gate.xcopyin_err:
4649*0Sstevel@tonic-gate	ldn	[THREAD_REG + T_COPYOPS], %o4
4650*0Sstevel@tonic-gate	brz	%o4, 2f
4651*0Sstevel@tonic-gate	nop
4652*0Sstevel@tonic-gate	ldn	[%o4 + CP_XCOPYIN], %g2
4653*0Sstevel@tonic-gate	jmp	%g2
4654*0Sstevel@tonic-gate	nop
4655*0Sstevel@tonic-gate2:
4656*0Sstevel@tonic-gate	retl
4657*0Sstevel@tonic-gate	mov	%g1, %o0
4658*0Sstevel@tonic-gate	SET_SIZE(xcopyin)
4659*0Sstevel@tonic-gate
4660*0Sstevel@tonic-gate#endif	/* lint */
4661*0Sstevel@tonic-gate
4662*0Sstevel@tonic-gate#ifdef	lint
4663*0Sstevel@tonic-gate
4664*0Sstevel@tonic-gate/*ARGSUSED*/
4665*0Sstevel@tonic-gateint
4666*0Sstevel@tonic-gatexcopyin_little(const void *uaddr, void *kaddr, size_t count)
4667*0Sstevel@tonic-gate{ return (0); }
4668*0Sstevel@tonic-gate
4669*0Sstevel@tonic-gate#else	/* lint */
4670*0Sstevel@tonic-gate
4671*0Sstevel@tonic-gate	ENTRY(xcopyin_little)
4672*0Sstevel@tonic-gate	sethi	%hi(.little_err), %o4
4673*0Sstevel@tonic-gate	ldn	[THREAD_REG + T_LOFAULT], %o5
4674*0Sstevel@tonic-gate	or	%o4, %lo(.little_err), %o4
4675*0Sstevel@tonic-gate	membar	#Sync				! sync error barrier
4676*0Sstevel@tonic-gate	stn	%o4, [THREAD_REG + T_LOFAULT]
4677*0Sstevel@tonic-gate
4678*0Sstevel@tonic-gate	subcc	%g0, %o2, %o3
4679*0Sstevel@tonic-gate	add	%o0, %o2, %o0
4680*0Sstevel@tonic-gate	bz,pn	%ncc, 2f		! check for zero bytes
4681*0Sstevel@tonic-gate	sub	%o2, 1, %o4
4682*0Sstevel@tonic-gate	add	%o0, %o4, %o0		! start w/last byte
4683*0Sstevel@tonic-gate	add	%o1, %o2, %o1
4684*0Sstevel@tonic-gate	lduba	[%o0+%o3]ASI_AIUSL, %o4
4685*0Sstevel@tonic-gate
4686*0Sstevel@tonic-gate1:	stb	%o4, [%o1+%o3]
4687*0Sstevel@tonic-gate	inccc	%o3
4688*0Sstevel@tonic-gate	sub	%o0, 2, %o0		! get next byte
4689*0Sstevel@tonic-gate	bcc,a,pt %ncc, 1b
4690*0Sstevel@tonic-gate	  lduba	[%o0+%o3]ASI_AIUSL, %o4
4691*0Sstevel@tonic-gate
4692*0Sstevel@tonic-gate2:	membar	#Sync				! sync error barrier
4693*0Sstevel@tonic-gate	stn	%o5, [THREAD_REG + T_LOFAULT]	! restore old t_lofault
4694*0Sstevel@tonic-gate	retl
4695*0Sstevel@tonic-gate	mov	%g0, %o0		! return (0)
4696*0Sstevel@tonic-gate
4697*0Sstevel@tonic-gate.little_err:
4698*0Sstevel@tonic-gate	membar	#Sync				! sync error barrier
4699*0Sstevel@tonic-gate	stn	%o5, [THREAD_REG + T_LOFAULT]	! restore old t_lofault
4700*0Sstevel@tonic-gate	retl
4701*0Sstevel@tonic-gate	mov	%g1, %o0
4702*0Sstevel@tonic-gate	SET_SIZE(xcopyin_little)
4703*0Sstevel@tonic-gate
4704*0Sstevel@tonic-gate#endif	/* lint */
4705*0Sstevel@tonic-gate
4706*0Sstevel@tonic-gate
4707*0Sstevel@tonic-gate/*
4708*0Sstevel@tonic-gate * Copy a block of storage - must not overlap (from + len <= to).
4709*0Sstevel@tonic-gate * No fault handler installed (to be called under on_fault())
4710*0Sstevel@tonic-gate */
4711*0Sstevel@tonic-gate#if defined(lint)
4712*0Sstevel@tonic-gate
4713*0Sstevel@tonic-gate/* ARGSUSED */
4714*0Sstevel@tonic-gatevoid
4715*0Sstevel@tonic-gatecopyin_noerr(const void *ufrom, void *kto, size_t count)
4716*0Sstevel@tonic-gate{}
4717*0Sstevel@tonic-gate
4718*0Sstevel@tonic-gate#else	/* lint */
4719*0Sstevel@tonic-gate
4720*0Sstevel@tonic-gate	ENTRY(copyin_noerr)
4721*0Sstevel@tonic-gate	sethi	%hi(.copyio_noerr), REAL_LOFAULT
4722*0Sstevel@tonic-gate	b	.do_copyin
4723*0Sstevel@tonic-gate	  or	REAL_LOFAULT, %lo(.copyio_noerr), REAL_LOFAULT
4724*0Sstevel@tonic-gate.copyio_noerr:
4725*0Sstevel@tonic-gate	jmp	SAVED_LOFAULT
4726*0Sstevel@tonic-gate	  nop
4727*0Sstevel@tonic-gate	SET_SIZE(copyin_noerr)
4728*0Sstevel@tonic-gate
4729*0Sstevel@tonic-gate#endif /* lint */
4730*0Sstevel@tonic-gate
4731*0Sstevel@tonic-gate/*
4732*0Sstevel@tonic-gate * Copy a block of storage - must not overlap (from + len <= to).
4733*0Sstevel@tonic-gate * No fault handler installed (to be called under on_fault())
4734*0Sstevel@tonic-gate */
4735*0Sstevel@tonic-gate
4736*0Sstevel@tonic-gate#if defined(lint)
4737*0Sstevel@tonic-gate
4738*0Sstevel@tonic-gate/* ARGSUSED */
4739*0Sstevel@tonic-gatevoid
4740*0Sstevel@tonic-gatecopyout_noerr(const void *kfrom, void *uto, size_t count)
4741*0Sstevel@tonic-gate{}
4742*0Sstevel@tonic-gate
4743*0Sstevel@tonic-gate#else	/* lint */
4744*0Sstevel@tonic-gate
4745*0Sstevel@tonic-gate	ENTRY(copyout_noerr)
4746*0Sstevel@tonic-gate	sethi	%hi(.copyio_noerr), REAL_LOFAULT
4747*0Sstevel@tonic-gate	b	.do_copyout
4748*0Sstevel@tonic-gate	  or	REAL_LOFAULT, %lo(.copyio_noerr), REAL_LOFAULT
4749*0Sstevel@tonic-gate	SET_SIZE(copyout_noerr)
4750*0Sstevel@tonic-gate
4751*0Sstevel@tonic-gate#endif /* lint */
4752*0Sstevel@tonic-gate
4753*0Sstevel@tonic-gate#if defined(lint)
4754*0Sstevel@tonic-gate
4755*0Sstevel@tonic-gateint use_hw_bcopy = 1;
4756*0Sstevel@tonic-gateint use_hw_copyio = 1;
4757*0Sstevel@tonic-gateint use_hw_bzero = 1;
4758*0Sstevel@tonic-gateuint_t hw_copy_limit_1 = 0;
4759*0Sstevel@tonic-gateuint_t hw_copy_limit_2 = 0;
4760*0Sstevel@tonic-gateuint_t hw_copy_limit_4 = 0;
4761*0Sstevel@tonic-gateuint_t hw_copy_limit_8 = 0;
4762*0Sstevel@tonic-gate
4763*0Sstevel@tonic-gate#else /* !lint */
4764*0Sstevel@tonic-gate
4765*0Sstevel@tonic-gate	.align	4
4766*0Sstevel@tonic-gate	DGDEF(use_hw_bcopy)
4767*0Sstevel@tonic-gate	.word	1
4768*0Sstevel@tonic-gate	DGDEF(use_hw_copyio)
4769*0Sstevel@tonic-gate	.word	1
4770*0Sstevel@tonic-gate	DGDEF(use_hw_bzero)
4771*0Sstevel@tonic-gate	.word	1
4772*0Sstevel@tonic-gate	DGDEF(hw_copy_limit_1)
4773*0Sstevel@tonic-gate	.word	0
4774*0Sstevel@tonic-gate	DGDEF(hw_copy_limit_2)
4775*0Sstevel@tonic-gate	.word	0
4776*0Sstevel@tonic-gate	DGDEF(hw_copy_limit_4)
4777*0Sstevel@tonic-gate	.word	0
4778*0Sstevel@tonic-gate	DGDEF(hw_copy_limit_8)
4779*0Sstevel@tonic-gate	.word	0
4780*0Sstevel@tonic-gate
4781*0Sstevel@tonic-gate	.align	64
4782*0Sstevel@tonic-gate	.section ".text"
4783*0Sstevel@tonic-gate#endif /* !lint */
4784*0Sstevel@tonic-gate
4785*0Sstevel@tonic-gate
4786*0Sstevel@tonic-gate/*
4787*0Sstevel@tonic-gate * hwblkclr - clears block-aligned, block-multiple-sized regions that are
4788*0Sstevel@tonic-gate * longer than 256 bytes in length using spitfire's block stores.  If
4789*0Sstevel@tonic-gate * the criteria for using this routine are not met then it calls bzero
4790*0Sstevel@tonic-gate * and returns 1.  Otherwise 0 is returned indicating success.
4791*0Sstevel@tonic-gate * Caller is responsible for ensuring use_hw_bzero is true and that
4792*0Sstevel@tonic-gate * kpreempt_disable() has been called.
4793*0Sstevel@tonic-gate */
4794*0Sstevel@tonic-gate#ifdef lint
4795*0Sstevel@tonic-gate/*ARGSUSED*/
4796*0Sstevel@tonic-gateint
4797*0Sstevel@tonic-gatehwblkclr(void *addr, size_t len)
4798*0Sstevel@tonic-gate{
4799*0Sstevel@tonic-gate	return(0);
4800*0Sstevel@tonic-gate}
4801*0Sstevel@tonic-gate#else /* lint */
4802*0Sstevel@tonic-gate	! %i0 - start address
4803*0Sstevel@tonic-gate	! %i1 - length of region (multiple of 64)
4804*0Sstevel@tonic-gate	! %l0 - saved fprs
4805*0Sstevel@tonic-gate	! %l1 - pointer to saved %d0 block
4806*0Sstevel@tonic-gate	! %l2 - saved curthread->t_lwp
4807*0Sstevel@tonic-gate
4808*0Sstevel@tonic-gate	ENTRY(hwblkclr)
4809*0Sstevel@tonic-gate	! get another window w/space for one aligned block of saved fpregs
4810*0Sstevel@tonic-gate	save	%sp, -SA(MINFRAME + 2*64), %sp
4811*0Sstevel@tonic-gate
4812*0Sstevel@tonic-gate	! Must be block-aligned
4813*0Sstevel@tonic-gate	andcc	%i0, (64-1), %g0
4814*0Sstevel@tonic-gate	bnz,pn	%ncc, 1f
4815*0Sstevel@tonic-gate	  nop
4816*0Sstevel@tonic-gate
4817*0Sstevel@tonic-gate	! ... and must be 256 bytes or more
4818*0Sstevel@tonic-gate	cmp	%i1, 256
4819*0Sstevel@tonic-gate	blu,pn	%ncc, 1f
4820*0Sstevel@tonic-gate	  nop
4821*0Sstevel@tonic-gate
4822*0Sstevel@tonic-gate	! ... and length must be a multiple of 64
4823*0Sstevel@tonic-gate	andcc	%i1, (64-1), %g0
4824*0Sstevel@tonic-gate	bz,pn	%ncc, 2f
4825*0Sstevel@tonic-gate	  nop
4826*0Sstevel@tonic-gate
4827*0Sstevel@tonic-gate1:	! punt, call bzero but notify the caller that bzero was used
4828*0Sstevel@tonic-gate	mov	%i0, %o0
4829*0Sstevel@tonic-gate	call	bzero
4830*0Sstevel@tonic-gate	  mov	%i1, %o1
4831*0Sstevel@tonic-gate	ret
4832*0Sstevel@tonic-gate	restore	%g0, 1, %o0	! return (1) - did not use block operations
4833*0Sstevel@tonic-gate
4834*0Sstevel@tonic-gate2:	rd	%fprs, %l0		! check for unused fp
4835*0Sstevel@tonic-gate	btst	FPRS_FEF, %l0
4836*0Sstevel@tonic-gate	bz	1f
4837*0Sstevel@tonic-gate	  nop
4838*0Sstevel@tonic-gate
4839*0Sstevel@tonic-gate	! save in-use fpregs on stack
4840*0Sstevel@tonic-gate	membar	#Sync
4841*0Sstevel@tonic-gate	add	%fp, STACK_BIAS - 65, %l1
4842*0Sstevel@tonic-gate	and	%l1, -64, %l1
4843*0Sstevel@tonic-gate	stda	%d0, [%l1]ASI_BLK_P
4844*0Sstevel@tonic-gate
4845*0Sstevel@tonic-gate1:	membar	#StoreStore|#StoreLoad|#LoadStore
4846*0Sstevel@tonic-gate	wr	%g0, FPRS_FEF, %fprs
4847*0Sstevel@tonic-gate	wr	%g0, ASI_BLK_P, %asi
4848*0Sstevel@tonic-gate
4849*0Sstevel@tonic-gate	! Clear block
4850*0Sstevel@tonic-gate	fzero	%d0
4851*0Sstevel@tonic-gate	fzero	%d2
4852*0Sstevel@tonic-gate	fzero	%d4
4853*0Sstevel@tonic-gate	fzero	%d6
4854*0Sstevel@tonic-gate	fzero	%d8
4855*0Sstevel@tonic-gate	fzero	%d10
4856*0Sstevel@tonic-gate	fzero	%d12
4857*0Sstevel@tonic-gate	fzero	%d14
4858*0Sstevel@tonic-gate
4859*0Sstevel@tonic-gate	mov	256, %i3
4860*0Sstevel@tonic-gate	ba	.pz_doblock
4861*0Sstevel@tonic-gate	  nop
4862*0Sstevel@tonic-gate
4863*0Sstevel@tonic-gate.pz_blkstart:
4864*0Sstevel@tonic-gate      ! stda	%d0, [%i0+192]%asi  ! in dly slot of branch that got us here
4865*0Sstevel@tonic-gate	stda	%d0, [%i0+128]%asi
4866*0Sstevel@tonic-gate	stda	%d0, [%i0+64]%asi
4867*0Sstevel@tonic-gate	stda	%d0, [%i0]%asi
4868*0Sstevel@tonic-gate.pz_zinst:
4869*0Sstevel@tonic-gate	add	%i0, %i3, %i0
4870*0Sstevel@tonic-gate	sub	%i1, %i3, %i1
4871*0Sstevel@tonic-gate.pz_doblock:
4872*0Sstevel@tonic-gate	cmp	%i1, 256
4873*0Sstevel@tonic-gate	bgeu,a	%ncc, .pz_blkstart
4874*0Sstevel@tonic-gate	  stda	%d0, [%i0+192]%asi
4875*0Sstevel@tonic-gate
4876*0Sstevel@tonic-gate	cmp	%i1, 64
4877*0Sstevel@tonic-gate	blu	%ncc, .pz_finish
4878*0Sstevel@tonic-gate
4879*0Sstevel@tonic-gate	andn	%i1, (64-1), %i3
4880*0Sstevel@tonic-gate	srl	%i3, 4, %i2		! using blocks, 1 instr / 16 words
4881*0Sstevel@tonic-gate	set	.pz_zinst, %i4
4882*0Sstevel@tonic-gate	sub	%i4, %i2, %i4
4883*0Sstevel@tonic-gate	jmp	%i4
4884*0Sstevel@tonic-gate	  nop
4885*0Sstevel@tonic-gate
4886*0Sstevel@tonic-gate.pz_finish:
4887*0Sstevel@tonic-gate	membar	#Sync
4888*0Sstevel@tonic-gate	btst	FPRS_FEF, %l0
4889*0Sstevel@tonic-gate	bz,a	.pz_finished
4890*0Sstevel@tonic-gate	  wr	%l0, 0, %fprs		! restore fprs
4891*0Sstevel@tonic-gate
4892*0Sstevel@tonic-gate	! restore fpregs from stack
4893*0Sstevel@tonic-gate	ldda	[%l1]ASI_BLK_P, %d0
4894*0Sstevel@tonic-gate	membar	#Sync
4895*0Sstevel@tonic-gate	wr	%l0, 0, %fprs		! restore fprs
4896*0Sstevel@tonic-gate
4897*0Sstevel@tonic-gate.pz_finished:
4898*0Sstevel@tonic-gate	ret
4899*0Sstevel@tonic-gate	restore	%g0, 0, %o0		! return (bzero or not)
4900*0Sstevel@tonic-gate	SET_SIZE(hwblkclr)
4901*0Sstevel@tonic-gate#endif	/* lint */
4902*0Sstevel@tonic-gate
4903*0Sstevel@tonic-gate#ifdef	lint
4904*0Sstevel@tonic-gate/* Copy 32 bytes of data from src to dst using physical addresses */
4905*0Sstevel@tonic-gate/*ARGSUSED*/
4906*0Sstevel@tonic-gatevoid
4907*0Sstevel@tonic-gatehw_pa_bcopy32(uint64_t src, uint64_t dst)
4908*0Sstevel@tonic-gate{}
4909*0Sstevel@tonic-gate#else	/*!lint */
4910*0Sstevel@tonic-gate
4911*0Sstevel@tonic-gate	/*
4912*0Sstevel@tonic-gate	 * Copy 32 bytes of data from src (%o0) to dst (%o1)
4913*0Sstevel@tonic-gate	 * using physical addresses.
4914*0Sstevel@tonic-gate	 */
4915*0Sstevel@tonic-gate	ENTRY_NP(hw_pa_bcopy32)
4916*0Sstevel@tonic-gate	rdpr    %pstate, %g1
4917*0Sstevel@tonic-gate	andn    %g1, PSTATE_IE, %g2
4918*0Sstevel@tonic-gate	wrpr    %g0, %g2, %pstate
4919*0Sstevel@tonic-gate
4920*0Sstevel@tonic-gate	ldxa    [%o0]ASI_MEM, %o2
4921*0Sstevel@tonic-gate	add     %o0, 8, %o0
4922*0Sstevel@tonic-gate	ldxa    [%o0]ASI_MEM, %o3
4923*0Sstevel@tonic-gate	add     %o0, 8, %o0
4924*0Sstevel@tonic-gate	ldxa    [%o0]ASI_MEM, %o4
4925*0Sstevel@tonic-gate	add     %o0, 8, %o0
4926*0Sstevel@tonic-gate	ldxa    [%o0]ASI_MEM, %o5
4927*0Sstevel@tonic-gate	stxa    %o2, [%o1]ASI_MEM
4928*0Sstevel@tonic-gate	add     %o1, 8, %o1
4929*0Sstevel@tonic-gate	stxa    %o3, [%o1]ASI_MEM
4930*0Sstevel@tonic-gate	add     %o1, 8, %o1
4931*0Sstevel@tonic-gate	stxa    %o4, [%o1]ASI_MEM
4932*0Sstevel@tonic-gate	add     %o1, 8, %o1
4933*0Sstevel@tonic-gate	stxa    %o5, [%o1]ASI_MEM
4934*0Sstevel@tonic-gate
4935*0Sstevel@tonic-gate	membar	#Sync
4936*0Sstevel@tonic-gate	retl
4937*0Sstevel@tonic-gate	  wrpr    %g0, %g1, %pstate
4938*0Sstevel@tonic-gate	SET_SIZE(hw_pa_bcopy32)
4939*0Sstevel@tonic-gate#endif /* lint */
4940