xref: /netbsd-src/lib/libc/arch/powerpc/string/bzero.S (revision 1ca5c1b28139779176bd5c13ad7c5f25c0bcd5f8)
1/*	$NetBSD: bzero.S,v 1.3 2001/11/30 02:25:50 mjl Exp $ */
2
3/*-
4 * Copyright (C) 2001	Martin J. Laubach <mjl@netbsd.org>
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 * 3. The name of the author may not be used to endorse or promote products
16 *    derived from this software without specific prior written permission.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29/*----------------------------------------------------------------------*/
30
31#include <machine/asm.h>
32#ifdef _KERNEL
33#include <assym.h>
34#endif
35
36#define USE_STSWX 0	/* don't. slower than trivial copy loop */
37
38/*----------------------------------------------------------------------*/
39/*
40     void bzero(void *b r3, size_t len r4);
41     void * memset(void *b r3, int c r4, size_t len r5);
42*/
43/*----------------------------------------------------------------------*/
44
45#define r_dst	r3
46#define r_len	r4
47#define r_val	r0
48
49		.text
50		.align 4
51ENTRY(bzero)
52		li	r_val, 0		/* Value to stuff in */
53		b	cb_memset
54
55ENTRY(memset)
56		cmplwi	cr1, r5, 0
57		mr.	r0, r4
58		mr	r8, r3
59		beqlr-	cr1			/* Nothing to do */
60
61		rlwimi	r0, r4, 8, 16, 23	/* word extend fill value */
62		rlwimi	r0, r0, 16, 0, 15
63		mr	r4, r5
64		bne-	simple_fill		/* =! 0, use trivial fill */
65cb_memset:
66
67/*----------------------------------------------------------------------*/
68#ifndef _KERNEL
69		/* First find out cache line size */
70#ifdef PIC
71		mflr	r9
72		bl	_GLOBAL_OFFSET_TABLE_@local-4
73		mflr	r10
74		mtlr	r9
75		lwz	r5,cache_size@got(r10)
76#else
77		lis	r5,cache_size@h
78		ori	r5,r5,cache_size@l
79#endif
80		lwz	r6, 0(r5)
81		cmpwi	r6, -1
82		bne+	cb_cacheline_known
83
84/*----------------------------------------------------------------------*/
85#define CTL_MACHDEP	7
86#define CPU_CACHELINE	1
87
88#define STKFRAME_SZ	48
89#define MIB		8
90#define OLDPLEN		16
91#define R3_SAVE		20
92#define R4_SAVE		24
93#define R0_SAVE		28
94#define R8_SAVE		32
95
96		mflr	r6
97		stw	r6, 4(r1)
98		stwu	r1, -STKFRAME_SZ(r1)
99
100		stw	r8, R8_SAVE(r1)
101		stw	r3, R3_SAVE(r1)
102		stw	r4, R4_SAVE(r1)
103		stw	r0, R0_SAVE(r1)
104
105		li	r0, CTL_MACHDEP		/* Construct MIB */
106		stw	r0, MIB(r1)
107		li	r0, CPU_CACHELINE
108		stw	r0, MIB+4(r1)
109
110		li	r0, 4			/* Oldlenp := 4 */
111		stw	r0, OLDPLEN(r1)
112
113		addi	r3, r1, MIB
114		li	r4, 2			/* namelen */
115		/* r5 already contains &cache_size */
116		addi	r6, r1, OLDPLEN
117		li	r7, 0
118		li	r8, 0
119		bl	PIC_PLT(_C_LABEL(sysctl))
120
121		lwz	r8, R8_SAVE(r1)
122		lwz	r3, R3_SAVE(r1)
123		lwz	r4, R4_SAVE(r1)
124		lwz	r0, R0_SAVE(r1)
125
126#ifdef PIC
127		bl	_GLOBAL_OFFSET_TABLE_@local-4
128		mflr	r10
129		lwz	r9, cache_size@got(r10)
130		lwz	r9, 0(r9)
131#else
132		lis	r5, cache_size@ha
133		lwz	r9, cache_size@l(r5)
134#endif
135		la	r1, STKFRAME_SZ(r1)
136		lwz	r5, 4(r1)
137		mtlr	r5
138
139		cntlzw	r6, r9			/* compute shift value */
140		li	r5, 31
141		subf	r5, r6, r5
142
143#ifdef PIC
144		lwz	r6, cache_sh@got(r10)
145		stw	r5, 0(r6)
146#else
147		lis	r6, cache_sh@ha
148		stw	r5, cache_sh@l(r6)
149#endif
150/*----------------------------------------------------------------------*/
151/* Okay, we know the cache line size (r9) and shift value (r10) */
152cb_cacheline_known:
153#ifdef PIC
154		lwz	r5, cache_size@got(r10)
155		lwz	r9, 0(r5)
156		lwz	r5, cache_sh@got(r10)
157		lwz	r10, 0(r5)
158#else
159		lis	r9, cache_size@ha
160		lwz	r9, cache_size@l(r9)
161		lis	r10, cache_sh@ha
162		lwz	r10, cache_sh@l(r10)
163#endif
164
165#else /* _KERNEL */
166		li	r9, CACHELINESIZE
167#if CACHELINESIZE == 32
168#define CACHELINESHIFT 5
169#else
170#error Define CACHELINESHIFT for your CACHELINESIZE
171#endif
172		li	r10, CACHELINESHIFT
173#endif /* _KERNEL */
174		/* Back in memory filling business */
175
176		cmplwi	cr1, r_len, 0		/* Nothing to do? */
177		add	r5, r9, r9
178		cmplw	r_len, r5		/* <= 2*CL bytes to move? */
179		beqlr-	cr1			/* then do nothing */
180
181		blt+	simple_fill		/* a trivial fill routine */
182
183		/* Word align the block, fill bytewise until dst even*/
184
185		andi.	r5, r_dst, 0x03
186		li	r6, 4
187		beq+	cb_aligned_w		/* already aligned to word? */
188
189		subf	r5, r5, r6		/* bytes to fill to align4 */
190#if USE_STSWX
191		mtxer	r5
192		stswx	r0, 0, r_dst
193		add	r_dst, r5, r_dst
194#else
195		mtctr	r5
196
197		subi	r_dst, r_dst, 1
1981:		stbu	r_val, 1(r_dst)		/* Fill bytewise */
199		bdnz	1b
200
201		addi	r_dst, r_dst, 1
202#endif
203		subf	r_len, r5, r_len
204
205cb_aligned_w:	/* Cache block align, fill wordwise until dst aligned */
206
207		/* I know I have something to do since we had > 2*CL initially */
208		/* so no need to check for r_len = 0 */
209
210		rlwinm.	r5, r_dst, 30, 29, 31
211		srwi	r6, r9, 2
212		beq	cb_aligned_cb		/* already on CL boundary? */
213
214		subf	r5, r5, r6		/* words to fill to alignment */
215		mtctr	r5
216		slwi	r5, r5, 2
217		subf	r_len, r5, r_len
218
219		subi	r_dst, r_dst, 4
2201:		stwu	r_val, 4(r_dst)		/* Fill wordwise */
221		bdnz	1b
222		addi	r_dst, r_dst, 4
223
224cb_aligned_cb:	/* no need to check r_len, see above */
225
226		srw.	r5, r_len, r10		/* Number of cache blocks */
227		mtctr	r5
228		beq	cblocks_done
229
230		slw	r5, r5, r10
231		subf	r_len, r5, r_len
232
2331:		dcbz	0, r_dst		/* Clear blockwise */
234		add	r_dst, r_dst, r9
235		bdnz	1b
236
237cblocks_done:	/* still CL aligned, but less than CL bytes left */
238		cmplwi	cr1, r_len, 0
239		cmplwi	r_len, 8
240		beq-	cr1, sf_return
241
242		blt-	sf_bytewise		/* <8 remaining? */
243		b	sf_aligned_w
244
245/*----------------------------------------------------------------------*/
246wbzero:		li	r_val, 0
247
248		cmplwi	r_len, 0
249		beqlr-				/* Nothing to do */
250
251simple_fill:
252#if USE_STSWX
253		cmplwi	cr1, r_len, 12		/* < 12 bytes to move? */
254#else
255		cmplwi	cr1, r_len, 8		/* < 8 bytes to move? */
256#endif
257		andi.	r5, r_dst, 0x03		/* bytes to fill to align4 */
258		blt	cr1, sf_bytewise	/* trivial byte mover */
259
260		li	r6, 4
261		subf	r5, r5, r6
262		beq+	sf_aligned_w		/* dest is word aligned */
263
264#if USE_STSWX
265		mtxer	r5
266		stswx	r0, 0, r_dst
267		add	r_dst, r5, r_dst
268#else
269		mtctr	r5			/* nope, then fill bytewise */
270		subi	r_dst, r_dst, 1		/* until it is */
2711:		stbu	r_val, 1(r_dst)
272		bdnz	1b
273
274		addi	r_dst, r_dst, 1
275#endif
276		subf	r_len, r5, r_len
277
278sf_aligned_w:	/* no need to check r_len since it were >= 8 bytes initially */
279#if USE_STSWX
280		mr	r6, r0
281		mr	r7, r0
282
283		srwi	r5, r_len, 3
284		mtctr	r5
285
286		slwi	r5, r5, 3		/* adjust len */
287		subf.	r_len, r5, r_len
288
2891:		stswi	r6, r_dst, 8
290		addi	r_dst, r_dst, 8
291		bdnz	1b
292#else
293		srwi	r5, r_len, 2		/* words to fill */
294		mtctr	r5
295
296		slwi	r5, r5, 2
297		subf.	r_len, r5, r_len	/* adjust len for fill */
298
299		subi	r_dst, r_dst, 4
3001:		stwu	r_val, 4(r_dst)
301		bdnz	1b
302		addi	r_dst, r_dst, 4
303#endif
304
305sf_word_done:	bne-	sf_bytewise
306
307sf_return:	mr	r3, r8			/* restore orig ptr */
308		blr				/* for memset functionality */
309
310sf_bytewise:
311#if USE_STSWX
312		mr	r5, r0
313		mr	r6, r0
314		mr	r7, r0
315
316		mtxer	r_len
317		stswx	r5, 0, r_dst
318#else
319		mtctr	r_len
320
321		subi	r_dst, r_dst, 1
3221:		stbu	r_val, 1(r_dst)
323		bdnz	1b
324#endif
325		mr	r3, r8			/* restore orig ptr */
326		blr				/* for memset functionality */
327
328/*----------------------------------------------------------------------*/
329#ifndef _KERNEL
330		.data
331cache_size:	.long	-1
332cache_sh:	.long	0
333
334#endif
335/*----------------------------------------------------------------------*/
336