xref: /netbsd-src/common/lib/libc/arch/sparc64/string/memset.S (revision ebbc7028d31885fd6b60923c2d5c0e3d7663c21a)
1*ebbc7028Sandvar/*	$NetBSD: memset.S,v 1.3 2021/08/13 20:47:54 andvar Exp $	*/
2ca8f29b6Schristos
3ca8f29b6Schristos/*
4ca8f29b6Schristos * Copyright (c) 1996-2002 Eduardo Horvath
5ca8f29b6Schristos * All rights reserved.
6ca8f29b6Schristos *
7ca8f29b6Schristos * Redistribution and use in source and binary forms, with or without
8ca8f29b6Schristos * modification, are permitted provided that the following conditions
9ca8f29b6Schristos * are met:
10ca8f29b6Schristos * 1. Redistributions of source code must retain the above copyright
11ca8f29b6Schristos *    notice, this list of conditions and the following disclaimer.
12ca8f29b6Schristos *
13ca8f29b6Schristos * THIS SOFTWARE IS PROVIDED BY THE AUTHOR  ``AS IS'' AND
14ca8f29b6Schristos * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15ca8f29b6Schristos * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16ca8f29b6Schristos * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR  BE LIABLE
17ca8f29b6Schristos * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18ca8f29b6Schristos * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19ca8f29b6Schristos * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20ca8f29b6Schristos * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21ca8f29b6Schristos * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22ca8f29b6Schristos * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23ca8f29b6Schristos * SUCH DAMAGE.
24ca8f29b6Schristos *
25ca8f29b6Schristos */
26ca8f29b6Schristos#include "strmacros.h"
2712ea7fb3Schristos#if defined(LIBC_SCCS) && !defined(lint)
28*ebbc7028SandvarRCSID("$NetBSD: memset.S,v 1.3 2021/08/13 20:47:54 andvar Exp $")
2912ea7fb3Schristos#endif  /* LIBC_SCCS and not lint */
3012ea7fb3Schristos
31ca8f29b6Schristos
32ca8f29b6Schristos/*
33ca8f29b6Schristos * XXXXXXXXXXXXXXXXXXXX
34ca8f29b6Schristos * We need to make sure that this doesn't use floating point
35ca8f29b6Schristos * before our trap handlers are installed or we could panic
36ca8f29b6Schristos * XXXXXXXXXXXXXXXXXXXX
37ca8f29b6Schristos */
38ca8f29b6Schristos/*
39ca8f29b6Schristos * memset(addr, c, len)
40ca8f29b6Schristos *
41ca8f29b6Schristos * We want to use VIS instructions if we're clearing out more than
42ca8f29b6Schristos * 256 bytes, but to do that we need to properly save and restore the
43ca8f29b6Schristos * FP registers.  Unfortunately the code to do that in the kernel needs
44ca8f29b6Schristos * to keep track of the current owner of the FPU, hence the different
45ca8f29b6Schristos * code.
46ca8f29b6Schristos *
47ca8f29b6Schristos * XXXXX To produce more efficient code, we do not allow lengths
48ca8f29b6Schristos * greater than 0x80000000000000000, which are negative numbers.
49ca8f29b6Schristos * This should not really be an issue since the VA hole should
50ca8f29b6Schristos * cause any such ranges to fail anyway.
51ca8f29b6Schristos */
52ca8f29b6Schristos#if !defined(_KERNEL) || defined(_RUMPKERNEL)
53ca8f29b6SchristosENTRY(bzero)
54ca8f29b6Schristos	! %o0 = addr, %o1 = len
55ca8f29b6Schristos	mov	%o1, %o2
5612ea7fb3Schristos	clr	%o1			! ser pattern
57ca8f29b6Schristos#endif
58ca8f29b6SchristosENTRY(memset)
59ca8f29b6Schristos	! %o0 = addr, %o1 = pattern, %o2 = len
60ca8f29b6Schristos	mov	%o0, %o4		! Save original pointer
61ca8f29b6Schristos
62ca8f29b6SchristosLmemset_internal:
63ca8f29b6Schristos	btst	7, %o0			! Word aligned?
64ca8f29b6Schristos	bz,pn	%xcc, 0f
65ca8f29b6Schristos	 nop
66ca8f29b6Schristos	inc	%o0
67ca8f29b6Schristos	deccc	%o2			! Store up to 7 bytes
68ca8f29b6Schristos	bge,a,pt	CCCR, Lmemset_internal
69ca8f29b6Schristos	 stb	%o1, [%o0 - 1]
70ca8f29b6Schristos
71ca8f29b6Schristos	retl				! Duplicate Lmemset_done
72ca8f29b6Schristos	 mov	%o4, %o0
73ca8f29b6Schristos0:
74ca8f29b6Schristos	/*
75ca8f29b6Schristos	 * Duplicate the pattern so it fills 64-bits.
76ca8f29b6Schristos	 */
77ca8f29b6Schristos	andcc	%o1, 0x0ff, %o1		! No need to extend zero
78ca8f29b6Schristos	bz,pt	%icc, 1f
79ca8f29b6Schristos	 sllx	%o1, 8, %o3		! sigh.  all dependent insns.
80ca8f29b6Schristos	or	%o1, %o3, %o1
81ca8f29b6Schristos	sllx	%o1, 16, %o3
82ca8f29b6Schristos	or	%o1, %o3, %o1
83ca8f29b6Schristos	sllx	%o1, 32, %o3
84ca8f29b6Schristos	 or	%o1, %o3, %o1
85ca8f29b6Schristos1:
86ca8f29b6Schristos#ifdef USE_BLOCK_STORE_LOAD
87ca8f29b6Schristos	!! Now we are 64-bit aligned
88ca8f29b6Schristos	cmp	%o2, 256		! Use block clear if len > 256
89ca8f29b6Schristos	bge,pt	CCCR, Lmemset_block	! use block store insns
90ca8f29b6Schristos#endif	/* USE_BLOCK_STORE_LOAD */
91ca8f29b6Schristos	 deccc	8, %o2
92ca8f29b6SchristosLmemset_longs:
93ca8f29b6Schristos	bl,pn	CCCR, Lmemset_cleanup	! Less than 8 bytes left
94ca8f29b6Schristos	 nop
95ca8f29b6Schristos3:
96ca8f29b6Schristos	inc	8, %o0
97ca8f29b6Schristos	deccc	8, %o2
98ca8f29b6Schristos	bge,pt	CCCR, 3b
99ca8f29b6Schristos	 stx	%o1, [%o0 - 8]		! Do 1 longword at a time
100ca8f29b6Schristos
101ca8f29b6Schristos	/*
102ca8f29b6Schristos	 * Len is in [-8..-1] where -8 => done, -7 => 1 byte to zero,
103ca8f29b6Schristos	 * -6 => two bytes, etc.  Mop up this remainder, if any.
104ca8f29b6Schristos	 */
105ca8f29b6SchristosLmemset_cleanup:
106ca8f29b6Schristos	btst	4, %o2
107ca8f29b6Schristos	bz,pt	CCCR, 5f		! if (len & 4) {
108ca8f29b6Schristos	 nop
109ca8f29b6Schristos	stw	%o1, [%o0]		!	*(int *)addr = 0;
110ca8f29b6Schristos	inc	4, %o0			!	addr += 4;
111ca8f29b6Schristos5:
112ca8f29b6Schristos	btst	2, %o2
113ca8f29b6Schristos	bz,pt	CCCR, 7f		! if (len & 2) {
114ca8f29b6Schristos	 nop
115ca8f29b6Schristos	sth	%o1, [%o0]		!	*(short *)addr = 0;
116ca8f29b6Schristos	inc	2, %o0			!	addr += 2;
117ca8f29b6Schristos7:
118ca8f29b6Schristos	btst	1, %o2
119ca8f29b6Schristos	bnz,a	%icc, Lmemset_done	! if (len & 1)
120ca8f29b6Schristos	 stb	%o1, [%o0]		!	*addr = 0;
121ca8f29b6SchristosLmemset_done:
122ca8f29b6Schristos	retl
123*ebbc7028Sandvar	 mov	%o4, %o0		! Restore pointer for memset (ugh)
124ca8f29b6Schristos
125ca8f29b6Schristos#ifdef USE_BLOCK_STORE_LOAD
126ca8f29b6SchristosLmemset_block:
127ca8f29b6Schristos	sethi	%hi(block_disable), %o3
128ca8f29b6Schristos	ldx	[ %o3 + %lo(block_disable) ], %o3
129ca8f29b6Schristos	brnz,pn	%o3, Lmemset_longs
130ca8f29b6Schristos	!! Make sure our trap table is installed
131ca8f29b6Schristos	set	_C_LABEL(trapbase), %o5
132ca8f29b6Schristos	rdpr	%tba, %o3
133ca8f29b6Schristos	sub	%o3, %o5, %o3
134ca8f29b6Schristos	brnz,pn	%o3, Lmemset_longs	! No, then don't use block load/store
135ca8f29b6Schristos	 nop
136ca8f29b6Schristos/*
137ca8f29b6Schristos * Kernel:
138ca8f29b6Schristos *
139ca8f29b6Schristos * Here we use VIS instructions to do a block clear of a page.
140ca8f29b6Schristos * But before we can do that we need to save and enable the FPU.
141ca8f29b6Schristos * The last owner of the FPU registers is fplwp, and
142ca8f29b6Schristos * fplwp->l_md.md_fpstate is the current fpstate.  If that's not
143ca8f29b6Schristos * null, call savefpstate() with it to store our current fp state.
144ca8f29b6Schristos *
145ca8f29b6Schristos * Next, allocate an aligned fpstate on the stack.  We will properly
146ca8f29b6Schristos * nest calls on a particular stack so this should not be a problem.
147ca8f29b6Schristos *
148ca8f29b6Schristos * Now we grab either curlwp (or if we're on the interrupt stack
149ca8f29b6Schristos * lwp0).  We stash its existing fpstate in a local register and
150ca8f29b6Schristos * put our new fpstate in curlwp->p_md.md_fpstate.  We point
151ca8f29b6Schristos * fplwp at curlwp (or lwp0) and enable the FPU.
152ca8f29b6Schristos *
153ca8f29b6Schristos * If we are ever preempted, our FPU state will be saved in our
154ca8f29b6Schristos * fpstate.  Then, when we're resumed and we take an FPDISABLED
155ca8f29b6Schristos * trap, the trap handler will be able to fish our FPU state out
156ca8f29b6Schristos * of curlwp (or lwp0).
157ca8f29b6Schristos *
158ca8f29b6Schristos * On exiting this routine we undo the damage: restore the original
159ca8f29b6Schristos * pointer to curlwp->p_md.md_fpstate, clear our fplwp, and disable
160ca8f29b6Schristos * the MMU.
161ca8f29b6Schristos *
162ca8f29b6Schristos */
163ca8f29b6Schristos
164ca8f29b6Schristos	ENABLE_FPU(0)
165ca8f29b6Schristos
166ca8f29b6Schristos	!! We are now 8-byte aligned.  We need to become 64-byte aligned.
167ca8f29b6Schristos	btst	63, %i0
168ca8f29b6Schristos	bz,pt	CCCR, 2f
169ca8f29b6Schristos	 nop
170ca8f29b6Schristos1:
171ca8f29b6Schristos	stx	%i1, [%i0]
172ca8f29b6Schristos	inc	8, %i0
173ca8f29b6Schristos	btst	63, %i0
174ca8f29b6Schristos	bnz,pt	%xcc, 1b
175ca8f29b6Schristos	 dec	8, %i2
176ca8f29b6Schristos
177ca8f29b6Schristos2:
178ca8f29b6Schristos	brz	%i1, 3f					! Skip the memory op
179ca8f29b6Schristos	 fzero	%f0					! if pattern is 0
180ca8f29b6Schristos
181ca8f29b6Schristos#ifdef _LP64
182ca8f29b6Schristos	stx	%i1, [%i0]				! Flush this puppy to RAM
183ca8f29b6Schristos	membar	#StoreLoad
184ca8f29b6Schristos	ldd	[%i0], %f0
185ca8f29b6Schristos#else
186ca8f29b6Schristos	stw	%i1, [%i0]				! Flush this puppy to RAM
187ca8f29b6Schristos	membar	#StoreLoad
188ca8f29b6Schristos	ld	[%i0], %f0
189ca8f29b6Schristos	fmovsa	%icc, %f0, %f1
190ca8f29b6Schristos#endif
191ca8f29b6Schristos
192ca8f29b6Schristos3:
193ca8f29b6Schristos	fmovd	%f0, %f2				! Duplicate the pattern
194ca8f29b6Schristos	fmovd	%f0, %f4
195ca8f29b6Schristos	fmovd	%f0, %f6
196ca8f29b6Schristos	fmovd	%f0, %f8
197ca8f29b6Schristos	fmovd	%f0, %f10
198ca8f29b6Schristos	fmovd	%f0, %f12
199ca8f29b6Schristos	fmovd	%f0, %f14
200ca8f29b6Schristos
201ca8f29b6Schristos	!! Remember: we were 8 bytes too far
202ca8f29b6Schristos	dec	56, %i2					! Go one iteration too far
203ca8f29b6Schristos5:
204ca8f29b6Schristos	stda	%f0, [%i0] ASI_STORE			! Store 64 bytes
205ca8f29b6Schristos	deccc	BLOCK_SIZE, %i2
206ca8f29b6Schristos	bg,pt	%icc, 5b
207ca8f29b6Schristos	 inc	BLOCK_SIZE, %i0
208ca8f29b6Schristos
209ca8f29b6Schristos	membar	#Sync
210ca8f29b6Schristos/*
211ca8f29b6Schristos * We've saved our possible fpstate, now disable the fpu
212ca8f29b6Schristos * and continue with life.
213ca8f29b6Schristos */
214ca8f29b6Schristos	RESTORE_FPU
215ca8f29b6Schristos	addcc	%i2, 56, %i2				! Restore the count
216ca8f29b6Schristos	ba,pt	%xcc, Lmemset_longs			! Finish up the remainder
217ca8f29b6Schristos	 restore
218ca8f29b6Schristos#endif	/* USE_BLOCK_STORE_LOAD */
219