xref: /netbsd-src/lib/libc/arch/powerpc/string/bzero.S (revision 0bcd9d746a131e95b51c84f598e2c8f4351442e3)
1/*	$NetBSD: bzero.S,v 1.12 2013/07/18 12:20:41 matt Exp $ */
2
3/*-
4 * Copyright (C) 2001	Martin J. Laubach <mjl@NetBSD.org>
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 * 3. The name of the author may not be used to endorse or promote products
16 *    derived from this software without specific prior written permission.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29/*----------------------------------------------------------------------*/
30
31#include <machine/asm.h>
32
33
34#if defined(LIBC_SCCS) && !defined(lint)
35__RCSID("$NetBSD: bzero.S,v 1.12 2013/07/18 12:20:41 matt Exp $")
36#endif /* LIBC_SCCS && !lint */
37
38#ifdef _KERNEL
39#include <assym.h>
40#endif
41
42#define USE_STSWX 0	/* don't. slower than trivial copy loop */
43
44/*----------------------------------------------------------------------*/
45/*
46     void bzero(void *b %r3, size_t len %r4);
47     void * memset(void *b %r3, int c %r4, size_t len %r5);
48*/
49/*----------------------------------------------------------------------*/
50
51#define r_dst	%r3
52#define r_len	%r4
53#define r_val	%r0
54
55		.text
56		.align 4
57ENTRY(bzero)
58		li	r_val, 0		/* Value to stuff in */
59		b	cb_memset
60END(bzero)
61
62ENTRY(memset)
63		cmplwi	%cr1, %r5, 0
64		mr.	%r0, %r4
65		mr	%r8, %r3
66		beqlr-	%cr1			/* Nothing to do */
67
68		rlwimi	%r0, %r4, 8, 16, 23	/* word extend fill value */
69		rlwimi	%r0, %r0, 16, 0, 15
70		mr	%r4, %r5
71		bne-	simple_fill		/* =! 0, use trivial fill */
72cb_memset:
73
74/*----------------------------------------------------------------------*/
75#ifndef _KERNEL
76		/* First find out cache line size */
77		mflr	%r9
78#ifdef PIC
79		bcl	20,31,1f
801:		mflr	%r5
81		mtlr	%r9
82		addis	%r5,%r5,cache_info+4-1b@ha
83		lwzu	%r9,cache_info+4-1b@l(%r5)
84#else
85		lis	%r5,cache_info+4@ha
86		lwzu	%r9,cache_info+4@l(%r5)
87#endif
88		lwz	%r10,cache_sh-(cache_info+4)(%r5)
89		cmpwi	%r9, -1
90		bne+	cb_cacheline_known
91
92		addi	%r5, %r5, -4	/* point r5 @ beginning of cache_info */
93
94/*----------------------------------------------------------------------*/
95#define CTL_MACHDEP	7
96#define CPU_CACHELINE	1
97#define	CPU_CACHEINFO	5
98
99#define STKFRAME_SZ	64
100#define MIB		8
101#define OLDPLEN		16
102#define R3_SAVE		20
103#define R4_SAVE		24
104#define R0_SAVE		28
105#define R8_SAVE		32
106#define R31_SAVE	36
107#ifdef PIC
108#define R30_SAVE	40
109#endif
110
111		stw	%r9, 4(%r1)
112		stwu	%r1, -STKFRAME_SZ(%r1)
113
114		stw	%r31, R31_SAVE(%r1)
115		mr	%r31, %r5		/* cache info */
116
117#ifdef PIC
118		stw	%r30, R30_SAVE(%r1)
119		PIC_TOCSETUP(cb_memset,%r30)
120#endif
121
122		stw	%r8, R8_SAVE(%r1)
123		stw	%r3, R3_SAVE(%r1)
124		stw	%r4, R4_SAVE(%r1)
125		stw	%r0, R0_SAVE(%r1)
126
127		li	%r0, CTL_MACHDEP		/* Construct MIB */
128		stw	%r0, MIB(%r1)
129		li	%r0, CPU_CACHEINFO
130		stw	%r0, MIB+4(%r1)
131
132		li	%r0, 4*4			/* Oldlenp := 4*4 */
133		stw	%r0, OLDPLEN(%r1)
134
135		addi	%r3, %r1, MIB
136		li	%r4, 2			/* namelen */
137		/* %r5 already contains &cache_info */
138		addi	%r6, %r1, OLDPLEN
139		li	%r7, 0
140		li	%r8, 0
141		bl	PIC_PLT(_C_LABEL(sysctl))
142
143		cmpwi	%r3, 0			/* Check result */
144		beq	1f
145
146		/* Failure, try older sysctl */
147
148		li	%r0, CTL_MACHDEP	/* Construct MIB */
149		stw	%r0, MIB(%r1)
150		li	%r0, CPU_CACHELINE
151		stw	%r0, MIB+4(%r1)
152
153		li	%r0, 4			/* Oldlenp := 4 */
154		stw	%r0, OLDPLEN(%r1)
155
156		addi	%r3, %r1, MIB
157		li	%r4, 2			/* namelen */
158		addi	%r5, %r31, 4
159		addi	%r6, %r1, OLDPLEN
160		li	%r7, 0
161		li	%r8, 0
162		bl	PIC_PLT(_C_LABEL(sysctl))
1631:
164		lwz	%r3, R3_SAVE(%r1)
165		lwz	%r4, R4_SAVE(%r1)
166		lwz	%r8, R8_SAVE(%r1)
167		lwz	%r0, R0_SAVE(%r1)
168		lwz	%r9, 4(%r31)
169		lwz	%r31, R31_SAVE(%r1)
170#ifdef PIC
171		lwz	%r30, R30_SAVE(%r1)
172#endif
173		addi	%r1, %r1, STKFRAME_SZ
174		lwz	%r7, 4(%r1)
175		mtlr	%r7
176
177		cntlzw	%r6, %r9			/* compute shift value */
178		li	%r5, 31
179		subf	%r10, %r6, %r5
180
181#ifdef PIC
182		mflr	%r9
183		bcl	20,31,1f
1841:		mflr	%r5
185		mtlr	%r9
186
187		addis	%r5, %r5, cache_info+4-1b@ha
188		lwzu	%r9, cache_info+4-1b@l(%r5)
189#else
190		lis	%r5, cache_info+4@ha
191		lwzu	%r9, cache_info+4@l(%r5)
192#endif
193		stw	%r10, cache_sh-(cache_info+4)(%r5)
194
195/*----------------------------------------------------------------------*/
196/* Okay, we know the cache line size (%r9) and shift value (%r10) */
197cb_cacheline_known:
198#else /* _KERNEL */
199#ifdef	MULTIPROCESSOR
200		mfsprg	%r10, 0			/* Get cpu_info pointer */
201#else
202		lis	%r10, cpu_info_store@ha
203		addi	%r10, %r10, cpu_info_store@l
204#endif
205		lwz	%r9, CPU_CI+4(%r10)	/* Load D$ line size */
206		cntlzw	%r10, %r9			/* Calculate shift.. */
207		li	%r6, 31
208		subf	%r10, %r10, %r6
209#endif /* _KERNEL */
210		/* Back in memory filling business */
211
212		cmplwi	%cr1, r_len, 0		/* Nothing to do? */
213		add	%r5, %r9, %r9
214		cmplw	r_len, %r5		/* <= 2*CL bytes to move? */
215		beqlr-	%cr1			/* then do nothing */
216
217		blt+	simple_fill		/* a trivial fill routine */
218
219		/* Word align the block, fill bytewise until dst even*/
220
221		andi.	%r5, r_dst, 0x03
222		li	%r6, 4
223		beq+	cb_aligned_w		/* already aligned to word? */
224
225		subf	%r5, %r5, %r6		/* bytes to fill to align4 */
226#if USE_STSWX
227		mtxer	%r5
228		stswx	%r0, 0, r_dst
229		add	r_dst, %r5, r_dst
230#else
231		mtctr	%r5
232
233		subi	r_dst, r_dst, 1
2341:		stbu	r_val, 1(r_dst)		/* Fill bytewise */
235		bdnz	1b
236
237		addi	r_dst, r_dst, 1
238#endif
239		subf	r_len, %r5, r_len
240
241cb_aligned_w:	/* Cache block align, fill wordwise until dst aligned */
242
243		/* I know I have something to do since we had > 2*CL initially */
244		/* so no need to check for r_len = 0 */
245
246		subi	%r6, %r9, 1		/* CL mask */
247		and.	%r5, r_dst, %r6
248		srwi	%r5, %r5, 2
249		srwi	%r6, %r9, 2
250		beq	cb_aligned_cb		/* already on CL boundary? */
251
252		subf	%r5, %r5, %r6		/* words to fill to alignment */
253		mtctr	%r5
254		slwi	%r5, %r5, 2
255		subf	r_len, %r5, r_len
256
257		subi	r_dst, r_dst, 4
2581:		stwu	r_val, 4(r_dst)		/* Fill wordwise */
259		bdnz	1b
260		addi	r_dst, r_dst, 4
261
262cb_aligned_cb:	/* no need to check r_len, see above */
263
264		srw.	%r5, r_len, %r10		/* Number of cache blocks */
265		mtctr	%r5
266		beq	cblocks_done
267
268		slw	%r5, %r5, %r10
269		subf	r_len, %r5, r_len
270
2711:		dcbz	0, r_dst		/* Clear blockwise */
272		add	r_dst, r_dst, %r9
273		bdnz	1b
274
275cblocks_done:	/* still CL aligned, but less than CL bytes left */
276		cmplwi	%cr1, r_len, 0
277		cmplwi	r_len, 8
278		beq-	%cr1, sf_return
279
280		blt-	sf_bytewise		/* <8 remaining? */
281		b	sf_aligned_w
282
283/*----------------------------------------------------------------------*/
284wbzero:		li	r_val, 0
285
286		cmplwi	r_len, 0
287		beqlr-				/* Nothing to do */
288
289simple_fill:
290#if USE_STSWX
291		cmplwi	%cr1, r_len, 12		/* < 12 bytes to move? */
292#else
293		cmplwi	%cr1, r_len, 8		/* < 8 bytes to move? */
294#endif
295		andi.	%r5, r_dst, 0x03		/* bytes to fill to align4 */
296		blt	%cr1, sf_bytewise	/* trivial byte mover */
297
298		li	%r6, 4
299		subf	%r5, %r5, %r6
300		beq+	sf_aligned_w		/* dest is word aligned */
301
302#if USE_STSWX
303		mtxer	%r5
304		stswx	%r0, 0, r_dst
305		add	r_dst, %r5, r_dst
306#else
307		mtctr	%r5			/* nope, then fill bytewise */
308		subi	r_dst, r_dst, 1		/* until it is */
3091:		stbu	r_val, 1(r_dst)
310		bdnz	1b
311
312		addi	r_dst, r_dst, 1
313#endif
314		subf	r_len, %r5, r_len
315
316sf_aligned_w:	/* no need to check r_len since it were >= 8 bytes initially */
317#if USE_STSWX
318		mr	%r6, %r0
319		mr	%r7, %r0
320
321		srwi	%r5, r_len, 3
322		mtctr	%r5
323
324		slwi	%r5, %r5, 3		/* adjust len */
325		subf.	r_len, %r5, r_len
326
3271:		stswi	%r6, r_dst, 8
328		addi	r_dst, r_dst, 8
329		bdnz	1b
330#else
331		srwi	%r5, r_len, 2		/* words to fill */
332		mtctr	%r5
333
334		slwi	%r5, %r5, 2
335		subf.	r_len, %r5, r_len	/* adjust len for fill */
336
337		subi	r_dst, r_dst, 4
3381:		stwu	r_val, 4(r_dst)
339		bdnz	1b
340		addi	r_dst, r_dst, 4
341#endif
342
343sf_word_done:	bne-	sf_bytewise
344
345sf_return:	mr	%r3, %r8			/* restore orig ptr */
346		blr				/* for memset functionality */
347
348sf_bytewise:
349#if USE_STSWX
350		mr	%r5, %r0
351		mr	%r6, %r0
352		mr	%r7, %r0
353
354		mtxer	r_len
355		stswx	%r5, 0, r_dst
356#else
357		mtctr	r_len
358
359		subi	r_dst, r_dst, 1
3601:		stbu	r_val, 1(r_dst)
361		bdnz	1b
362#endif
363		mr	%r3, %r8			/* restore orig ptr */
364		blr				/* for memset functionality */
365END(memset)
366
367/*----------------------------------------------------------------------*/
368#ifndef _KERNEL
369		.data
370		.p2align 2
371cache_info:	.long	-1, -1, -1, -1
372cache_sh:	.long	0
373
374#endif
375/*----------------------------------------------------------------------*/
376