xref: /minix3/common/lib/libc/arch/sparc64/string/memset.S (revision 84d9c625bfea59e274550651111ae9edfdc40fbd)
1*84d9c625SLionel Sambuc/*	$NetBSD: memset.S,v 1.2 2013/03/17 02:12:41 christos Exp $	*/
2*84d9c625SLionel Sambuc
3*84d9c625SLionel Sambuc/*
4*84d9c625SLionel Sambuc * Copyright (c) 1996-2002 Eduardo Horvath
5*84d9c625SLionel Sambuc * All rights reserved.
6*84d9c625SLionel Sambuc *
7*84d9c625SLionel Sambuc * Redistribution and use in source and binary forms, with or without
8*84d9c625SLionel Sambuc * modification, are permitted provided that the following conditions
9*84d9c625SLionel Sambuc * are met:
10*84d9c625SLionel Sambuc * 1. Redistributions of source code must retain the above copyright
11*84d9c625SLionel Sambuc *    notice, this list of conditions and the following disclaimer.
12*84d9c625SLionel Sambuc *
13*84d9c625SLionel Sambuc * THIS SOFTWARE IS PROVIDED BY THE AUTHOR  ``AS IS'' AND
14*84d9c625SLionel Sambuc * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15*84d9c625SLionel Sambuc * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16*84d9c625SLionel Sambuc * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR  BE LIABLE
17*84d9c625SLionel Sambuc * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18*84d9c625SLionel Sambuc * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19*84d9c625SLionel Sambuc * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20*84d9c625SLionel Sambuc * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21*84d9c625SLionel Sambuc * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22*84d9c625SLionel Sambuc * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23*84d9c625SLionel Sambuc * SUCH DAMAGE.
24*84d9c625SLionel Sambuc *
25*84d9c625SLionel Sambuc */
26*84d9c625SLionel Sambuc#include "strmacros.h"
27*84d9c625SLionel Sambuc#if defined(LIBC_SCCS) && !defined(lint)
28*84d9c625SLionel SambucRCSID("$NetBSD: memset.S,v 1.2 2013/03/17 02:12:41 christos Exp $")
29*84d9c625SLionel Sambuc#endif  /* LIBC_SCCS and not lint */
30*84d9c625SLionel Sambuc
31*84d9c625SLionel Sambuc
32*84d9c625SLionel Sambuc/*
33*84d9c625SLionel Sambuc * XXXXXXXXXXXXXXXXXXXX
34*84d9c625SLionel Sambuc * We need to make sure that this doesn't use floating point
35*84d9c625SLionel Sambuc * before our trap handlers are installed or we could panic
36*84d9c625SLionel Sambuc * XXXXXXXXXXXXXXXXXXXX
37*84d9c625SLionel Sambuc */
38*84d9c625SLionel Sambuc/*
39*84d9c625SLionel Sambuc * memset(addr, c, len)
40*84d9c625SLionel Sambuc *
41*84d9c625SLionel Sambuc * We want to use VIS instructions if we're clearing out more than
42*84d9c625SLionel Sambuc * 256 bytes, but to do that we need to properly save and restore the
43*84d9c625SLionel Sambuc * FP registers.  Unfortunately the code to do that in the kernel needs
44*84d9c625SLionel Sambuc * to keep track of the current owner of the FPU, hence the different
45*84d9c625SLionel Sambuc * code.
46*84d9c625SLionel Sambuc *
47*84d9c625SLionel Sambuc * XXXXX To produce more efficient code, we do not allow lengths
48*84d9c625SLionel Sambuc * greater than 0x80000000000000000, which are negative numbers.
49*84d9c625SLionel Sambuc * This should not really be an issue since the VA hole should
50*84d9c625SLionel Sambuc * cause any such ranges to fail anyway.
51*84d9c625SLionel Sambuc */
52*84d9c625SLionel Sambuc#if !defined(_KERNEL) || defined(_RUMPKERNEL)
53*84d9c625SLionel SambucENTRY(bzero)
54*84d9c625SLionel Sambuc	! %o0 = addr, %o1 = len
55*84d9c625SLionel Sambuc	mov	%o1, %o2
56*84d9c625SLionel Sambuc	clr	%o1			! ser pattern
57*84d9c625SLionel Sambuc#endif
58*84d9c625SLionel SambucENTRY(memset)
59*84d9c625SLionel Sambuc	! %o0 = addr, %o1 = pattern, %o2 = len
60*84d9c625SLionel Sambuc	mov	%o0, %o4		! Save original pointer
61*84d9c625SLionel Sambuc
62*84d9c625SLionel SambucLmemset_internal:
63*84d9c625SLionel Sambuc	btst	7, %o0			! Word aligned?
64*84d9c625SLionel Sambuc	bz,pn	%xcc, 0f
65*84d9c625SLionel Sambuc	 nop
66*84d9c625SLionel Sambuc	inc	%o0
67*84d9c625SLionel Sambuc	deccc	%o2			! Store up to 7 bytes
68*84d9c625SLionel Sambuc	bge,a,pt	CCCR, Lmemset_internal
69*84d9c625SLionel Sambuc	 stb	%o1, [%o0 - 1]
70*84d9c625SLionel Sambuc
71*84d9c625SLionel Sambuc	retl				! Duplicate Lmemset_done
72*84d9c625SLionel Sambuc	 mov	%o4, %o0
73*84d9c625SLionel Sambuc0:
74*84d9c625SLionel Sambuc	/*
75*84d9c625SLionel Sambuc	 * Duplicate the pattern so it fills 64-bits.
76*84d9c625SLionel Sambuc	 */
77*84d9c625SLionel Sambuc	andcc	%o1, 0x0ff, %o1		! No need to extend zero
78*84d9c625SLionel Sambuc	bz,pt	%icc, 1f
79*84d9c625SLionel Sambuc	 sllx	%o1, 8, %o3		! sigh.  all dependent insns.
80*84d9c625SLionel Sambuc	or	%o1, %o3, %o1
81*84d9c625SLionel Sambuc	sllx	%o1, 16, %o3
82*84d9c625SLionel Sambuc	or	%o1, %o3, %o1
83*84d9c625SLionel Sambuc	sllx	%o1, 32, %o3
84*84d9c625SLionel Sambuc	 or	%o1, %o3, %o1
85*84d9c625SLionel Sambuc1:
86*84d9c625SLionel Sambuc#ifdef USE_BLOCK_STORE_LOAD
87*84d9c625SLionel Sambuc	!! Now we are 64-bit aligned
88*84d9c625SLionel Sambuc	cmp	%o2, 256		! Use block clear if len > 256
89*84d9c625SLionel Sambuc	bge,pt	CCCR, Lmemset_block	! use block store insns
90*84d9c625SLionel Sambuc#endif	/* USE_BLOCK_STORE_LOAD */
91*84d9c625SLionel Sambuc	 deccc	8, %o2
92*84d9c625SLionel SambucLmemset_longs:
93*84d9c625SLionel Sambuc	bl,pn	CCCR, Lmemset_cleanup	! Less than 8 bytes left
94*84d9c625SLionel Sambuc	 nop
95*84d9c625SLionel Sambuc3:
96*84d9c625SLionel Sambuc	inc	8, %o0
97*84d9c625SLionel Sambuc	deccc	8, %o2
98*84d9c625SLionel Sambuc	bge,pt	CCCR, 3b
99*84d9c625SLionel Sambuc	 stx	%o1, [%o0 - 8]		! Do 1 longword at a time
100*84d9c625SLionel Sambuc
101*84d9c625SLionel Sambuc	/*
102*84d9c625SLionel Sambuc	 * Len is in [-8..-1] where -8 => done, -7 => 1 byte to zero,
103*84d9c625SLionel Sambuc	 * -6 => two bytes, etc.  Mop up this remainder, if any.
104*84d9c625SLionel Sambuc	 */
105*84d9c625SLionel SambucLmemset_cleanup:
106*84d9c625SLionel Sambuc	btst	4, %o2
107*84d9c625SLionel Sambuc	bz,pt	CCCR, 5f		! if (len & 4) {
108*84d9c625SLionel Sambuc	 nop
109*84d9c625SLionel Sambuc	stw	%o1, [%o0]		!	*(int *)addr = 0;
110*84d9c625SLionel Sambuc	inc	4, %o0			!	addr += 4;
111*84d9c625SLionel Sambuc5:
112*84d9c625SLionel Sambuc	btst	2, %o2
113*84d9c625SLionel Sambuc	bz,pt	CCCR, 7f		! if (len & 2) {
114*84d9c625SLionel Sambuc	 nop
115*84d9c625SLionel Sambuc	sth	%o1, [%o0]		!	*(short *)addr = 0;
116*84d9c625SLionel Sambuc	inc	2, %o0			!	addr += 2;
117*84d9c625SLionel Sambuc7:
118*84d9c625SLionel Sambuc	btst	1, %o2
119*84d9c625SLionel Sambuc	bnz,a	%icc, Lmemset_done	! if (len & 1)
120*84d9c625SLionel Sambuc	 stb	%o1, [%o0]		!	*addr = 0;
121*84d9c625SLionel SambucLmemset_done:
122*84d9c625SLionel Sambuc	retl
123*84d9c625SLionel Sambuc	 mov	%o4, %o0		! Restore ponter for memset (ugh)
124*84d9c625SLionel Sambuc
125*84d9c625SLionel Sambuc#ifdef USE_BLOCK_STORE_LOAD
126*84d9c625SLionel SambucLmemset_block:
127*84d9c625SLionel Sambuc	sethi	%hi(block_disable), %o3
128*84d9c625SLionel Sambuc	ldx	[ %o3 + %lo(block_disable) ], %o3
129*84d9c625SLionel Sambuc	brnz,pn	%o3, Lmemset_longs
130*84d9c625SLionel Sambuc	!! Make sure our trap table is installed
131*84d9c625SLionel Sambuc	set	_C_LABEL(trapbase), %o5
132*84d9c625SLionel Sambuc	rdpr	%tba, %o3
133*84d9c625SLionel Sambuc	sub	%o3, %o5, %o3
134*84d9c625SLionel Sambuc	brnz,pn	%o3, Lmemset_longs	! No, then don't use block load/store
135*84d9c625SLionel Sambuc	 nop
136*84d9c625SLionel Sambuc/*
137*84d9c625SLionel Sambuc * Kernel:
138*84d9c625SLionel Sambuc *
139*84d9c625SLionel Sambuc * Here we use VIS instructions to do a block clear of a page.
140*84d9c625SLionel Sambuc * But before we can do that we need to save and enable the FPU.
141*84d9c625SLionel Sambuc * The last owner of the FPU registers is fplwp, and
142*84d9c625SLionel Sambuc * fplwp->l_md.md_fpstate is the current fpstate.  If that's not
143*84d9c625SLionel Sambuc * null, call savefpstate() with it to store our current fp state.
144*84d9c625SLionel Sambuc *
145*84d9c625SLionel Sambuc * Next, allocate an aligned fpstate on the stack.  We will properly
146*84d9c625SLionel Sambuc * nest calls on a particular stack so this should not be a problem.
147*84d9c625SLionel Sambuc *
148*84d9c625SLionel Sambuc * Now we grab either curlwp (or if we're on the interrupt stack
149*84d9c625SLionel Sambuc * lwp0).  We stash its existing fpstate in a local register and
150*84d9c625SLionel Sambuc * put our new fpstate in curlwp->p_md.md_fpstate.  We point
151*84d9c625SLionel Sambuc * fplwp at curlwp (or lwp0) and enable the FPU.
152*84d9c625SLionel Sambuc *
153*84d9c625SLionel Sambuc * If we are ever preempted, our FPU state will be saved in our
154*84d9c625SLionel Sambuc * fpstate.  Then, when we're resumed and we take an FPDISABLED
155*84d9c625SLionel Sambuc * trap, the trap handler will be able to fish our FPU state out
156*84d9c625SLionel Sambuc * of curlwp (or lwp0).
157*84d9c625SLionel Sambuc *
158*84d9c625SLionel Sambuc * On exiting this routine we undo the damage: restore the original
159*84d9c625SLionel Sambuc * pointer to curlwp->p_md.md_fpstate, clear our fplwp, and disable
160*84d9c625SLionel Sambuc * the MMU.
161*84d9c625SLionel Sambuc *
162*84d9c625SLionel Sambuc */
163*84d9c625SLionel Sambuc
164*84d9c625SLionel Sambuc	ENABLE_FPU(0)
165*84d9c625SLionel Sambuc
166*84d9c625SLionel Sambuc	!! We are now 8-byte aligned.  We need to become 64-byte aligned.
167*84d9c625SLionel Sambuc	btst	63, %i0
168*84d9c625SLionel Sambuc	bz,pt	CCCR, 2f
169*84d9c625SLionel Sambuc	 nop
170*84d9c625SLionel Sambuc1:
171*84d9c625SLionel Sambuc	stx	%i1, [%i0]
172*84d9c625SLionel Sambuc	inc	8, %i0
173*84d9c625SLionel Sambuc	btst	63, %i0
174*84d9c625SLionel Sambuc	bnz,pt	%xcc, 1b
175*84d9c625SLionel Sambuc	 dec	8, %i2
176*84d9c625SLionel Sambuc
177*84d9c625SLionel Sambuc2:
178*84d9c625SLionel Sambuc	brz	%i1, 3f					! Skip the memory op
179*84d9c625SLionel Sambuc	 fzero	%f0					! if pattern is 0
180*84d9c625SLionel Sambuc
181*84d9c625SLionel Sambuc#ifdef _LP64
182*84d9c625SLionel Sambuc	stx	%i1, [%i0]				! Flush this puppy to RAM
183*84d9c625SLionel Sambuc	membar	#StoreLoad
184*84d9c625SLionel Sambuc	ldd	[%i0], %f0
185*84d9c625SLionel Sambuc#else
186*84d9c625SLionel Sambuc	stw	%i1, [%i0]				! Flush this puppy to RAM
187*84d9c625SLionel Sambuc	membar	#StoreLoad
188*84d9c625SLionel Sambuc	ld	[%i0], %f0
189*84d9c625SLionel Sambuc	fmovsa	%icc, %f0, %f1
190*84d9c625SLionel Sambuc#endif
191*84d9c625SLionel Sambuc
192*84d9c625SLionel Sambuc3:
193*84d9c625SLionel Sambuc	fmovd	%f0, %f2				! Duplicate the pattern
194*84d9c625SLionel Sambuc	fmovd	%f0, %f4
195*84d9c625SLionel Sambuc	fmovd	%f0, %f6
196*84d9c625SLionel Sambuc	fmovd	%f0, %f8
197*84d9c625SLionel Sambuc	fmovd	%f0, %f10
198*84d9c625SLionel Sambuc	fmovd	%f0, %f12
199*84d9c625SLionel Sambuc	fmovd	%f0, %f14
200*84d9c625SLionel Sambuc
201*84d9c625SLionel Sambuc	!! Remember: we were 8 bytes too far
202*84d9c625SLionel Sambuc	dec	56, %i2					! Go one iteration too far
203*84d9c625SLionel Sambuc5:
204*84d9c625SLionel Sambuc	stda	%f0, [%i0] ASI_STORE			! Store 64 bytes
205*84d9c625SLionel Sambuc	deccc	BLOCK_SIZE, %i2
206*84d9c625SLionel Sambuc	bg,pt	%icc, 5b
207*84d9c625SLionel Sambuc	 inc	BLOCK_SIZE, %i0
208*84d9c625SLionel Sambuc
209*84d9c625SLionel Sambuc	membar	#Sync
210*84d9c625SLionel Sambuc/*
211*84d9c625SLionel Sambuc * We've saved our possible fpstate, now disable the fpu
212*84d9c625SLionel Sambuc * and continue with life.
213*84d9c625SLionel Sambuc */
214*84d9c625SLionel Sambuc	RESTORE_FPU
215*84d9c625SLionel Sambuc	addcc	%i2, 56, %i2				! Restore the count
216*84d9c625SLionel Sambuc	ba,pt	%xcc, Lmemset_longs			! Finish up the remainder
217*84d9c625SLionel Sambuc	 restore
218*84d9c625SLionel Sambuc#endif	/* USE_BLOCK_STORE_LOAD */
219