xref: /netbsd-src/common/lib/libc/arch/sparc64/string/memset.S (revision ca8f29b6bf953eda3addd5415ebb64e0dcfcd44d)
1/*	$NetBSD: memset.S,v 1.1 2013/03/17 00:42:32 christos Exp $	*/
2
3/*
4 * Copyright (c) 1996-2002 Eduardo Horvath
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 *
13 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR  ``AS IS'' AND
14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR  BE LIABLE
17 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23 * SUCH DAMAGE.
24 *
25 */
26#include "strmacros.h"
27
28/*
29 * XXXXXXXXXXXXXXXXXXXX
30 * We need to make sure that this doesn't use floating point
31 * before our trap handlers are installed or we could panic
32 * XXXXXXXXXXXXXXXXXXXX
33 */
34/*
35 * memset(addr, c, len)
36 *
37 * We want to use VIS instructions if we're clearing out more than
38 * 256 bytes, but to do that we need to properly save and restore the
39 * FP registers.  Unfortunately the code to do that in the kernel needs
40 * to keep track of the current owner of the FPU, hence the different
41 * code.
42 *
43 * XXXXX To produce more efficient code, we do not allow lengths
44 * greater than 0x80000000000000000, which are negative numbers.
45 * This should not really be an issue since the VA hole should
46 * cause any such ranges to fail anyway.
47 */
48#if !defined(_KERNEL) || defined(_RUMPKERNEL)
49ENTRY(bzero)
50	! %o0 = addr, %o1 = len
51	mov	%o1, %o2
52	mov	0, %o1
53#endif
54ENTRY(memset)
55	! %o0 = addr, %o1 = pattern, %o2 = len
56	mov	%o0, %o4		! Save original pointer
57
58Lmemset_internal:
59	btst	7, %o0			! Word aligned?
60	bz,pn	%xcc, 0f
61	 nop
62	inc	%o0
63	deccc	%o2			! Store up to 7 bytes
64	bge,a,pt	CCCR, Lmemset_internal
65	 stb	%o1, [%o0 - 1]
66
67	retl				! Duplicate Lmemset_done
68	 mov	%o4, %o0
690:
70	/*
71	 * Duplicate the pattern so it fills 64-bits.
72	 */
73	andcc	%o1, 0x0ff, %o1		! No need to extend zero
74	bz,pt	%icc, 1f
75	 sllx	%o1, 8, %o3		! sigh.  all dependent insns.
76	or	%o1, %o3, %o1
77	sllx	%o1, 16, %o3
78	or	%o1, %o3, %o1
79	sllx	%o1, 32, %o3
80	 or	%o1, %o3, %o1
811:
82#ifdef USE_BLOCK_STORE_LOAD
83	!! Now we are 64-bit aligned
84	cmp	%o2, 256		! Use block clear if len > 256
85	bge,pt	CCCR, Lmemset_block	! use block store insns
86#endif	/* USE_BLOCK_STORE_LOAD */
87	 deccc	8, %o2
88Lmemset_longs:
89	bl,pn	CCCR, Lmemset_cleanup	! Less than 8 bytes left
90	 nop
913:
92	inc	8, %o0
93	deccc	8, %o2
94	bge,pt	CCCR, 3b
95	 stx	%o1, [%o0 - 8]		! Do 1 longword at a time
96
97	/*
98	 * Len is in [-8..-1] where -8 => done, -7 => 1 byte to zero,
99	 * -6 => two bytes, etc.  Mop up this remainder, if any.
100	 */
101Lmemset_cleanup:
102	btst	4, %o2
103	bz,pt	CCCR, 5f		! if (len & 4) {
104	 nop
105	stw	%o1, [%o0]		!	*(int *)addr = 0;
106	inc	4, %o0			!	addr += 4;
1075:
108	btst	2, %o2
109	bz,pt	CCCR, 7f		! if (len & 2) {
110	 nop
111	sth	%o1, [%o0]		!	*(short *)addr = 0;
112	inc	2, %o0			!	addr += 2;
1137:
114	btst	1, %o2
115	bnz,a	%icc, Lmemset_done	! if (len & 1)
116	 stb	%o1, [%o0]		!	*addr = 0;
117Lmemset_done:
118	retl
119	 mov	%o4, %o0		! Restore ponter for memset (ugh)
120
121#ifdef USE_BLOCK_STORE_LOAD
122Lmemset_block:
123	sethi	%hi(block_disable), %o3
124	ldx	[ %o3 + %lo(block_disable) ], %o3
125	brnz,pn	%o3, Lmemset_longs
126	!! Make sure our trap table is installed
127	set	_C_LABEL(trapbase), %o5
128	rdpr	%tba, %o3
129	sub	%o3, %o5, %o3
130	brnz,pn	%o3, Lmemset_longs	! No, then don't use block load/store
131	 nop
132/*
133 * Kernel:
134 *
135 * Here we use VIS instructions to do a block clear of a page.
136 * But before we can do that we need to save and enable the FPU.
137 * The last owner of the FPU registers is fplwp, and
138 * fplwp->l_md.md_fpstate is the current fpstate.  If that's not
139 * null, call savefpstate() with it to store our current fp state.
140 *
141 * Next, allocate an aligned fpstate on the stack.  We will properly
142 * nest calls on a particular stack so this should not be a problem.
143 *
144 * Now we grab either curlwp (or if we're on the interrupt stack
145 * lwp0).  We stash its existing fpstate in a local register and
146 * put our new fpstate in curlwp->p_md.md_fpstate.  We point
147 * fplwp at curlwp (or lwp0) and enable the FPU.
148 *
149 * If we are ever preempted, our FPU state will be saved in our
150 * fpstate.  Then, when we're resumed and we take an FPDISABLED
151 * trap, the trap handler will be able to fish our FPU state out
152 * of curlwp (or lwp0).
153 *
154 * On exiting this routine we undo the damage: restore the original
155 * pointer to curlwp->p_md.md_fpstate, clear our fplwp, and disable
156 * the MMU.
157 *
158 */
159
160	ENABLE_FPU(0)
161
162	!! We are now 8-byte aligned.  We need to become 64-byte aligned.
163	btst	63, %i0
164	bz,pt	CCCR, 2f
165	 nop
1661:
167	stx	%i1, [%i0]
168	inc	8, %i0
169	btst	63, %i0
170	bnz,pt	%xcc, 1b
171	 dec	8, %i2
172
1732:
174	brz	%i1, 3f					! Skip the memory op
175	 fzero	%f0					! if pattern is 0
176
177#ifdef _LP64
178	stx	%i1, [%i0]				! Flush this puppy to RAM
179	membar	#StoreLoad
180	ldd	[%i0], %f0
181#else
182	stw	%i1, [%i0]				! Flush this puppy to RAM
183	membar	#StoreLoad
184	ld	[%i0], %f0
185	fmovsa	%icc, %f0, %f1
186#endif
187
1883:
189	fmovd	%f0, %f2				! Duplicate the pattern
190	fmovd	%f0, %f4
191	fmovd	%f0, %f6
192	fmovd	%f0, %f8
193	fmovd	%f0, %f10
194	fmovd	%f0, %f12
195	fmovd	%f0, %f14
196
197	!! Remember: we were 8 bytes too far
198	dec	56, %i2					! Go one iteration too far
1995:
200	stda	%f0, [%i0] ASI_STORE			! Store 64 bytes
201	deccc	BLOCK_SIZE, %i2
202	bg,pt	%icc, 5b
203	 inc	BLOCK_SIZE, %i0
204
205	membar	#Sync
206/*
207 * We've saved our possible fpstate, now disable the fpu
208 * and continue with life.
209 */
210	RESTORE_FPU
211	addcc	%i2, 56, %i2				! Restore the count
212	ba,pt	%xcc, Lmemset_longs			! Finish up the remainder
213	 restore
214#endif	/* USE_BLOCK_STORE_LOAD */
215