xref: /netbsd-src/lib/libc/arch/powerpc/string/bzero.S (revision 116a2064fe9d0dfa6e5fffa7a1391610f738527d)
1/*	$NetBSD: bzero.S,v 1.15 2016/12/19 14:30:23 christos Exp $ */
2
3/*-
4 * Copyright (C) 2001	Martin J. Laubach <mjl@NetBSD.org>
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 * 3. The name of the author may not be used to endorse or promote products
16 *    derived from this software without specific prior written permission.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29/*----------------------------------------------------------------------*/
30
31#include <machine/asm.h>
32
33
34#if defined(LIBC_SCCS) && !defined(lint)
35__RCSID("$NetBSD: bzero.S,v 1.15 2016/12/19 14:30:23 christos Exp $")
36#endif /* LIBC_SCCS && !lint */
37
38#include "assym.h"
39
40#define USE_STSWX 0	/* don't. slower than trivial copy loop */
41
42/*----------------------------------------------------------------------*/
43/*
44     void bzero(void *b %r3, size_t len %r4);
45     void * memset(void *b %r3, int c %r4, size_t len %r5);
46*/
47/*----------------------------------------------------------------------*/
48
49#define r_dst	%r3
50#define r_len	%r4
51#define r_val	%r0
52
53		.text
54		.align 4
55ENTRY(bzero)
56		li	r_val, 0		/* Value to stuff in */
57		cmplwi	%cr1, %r4, 0		/* Zero length? */
58		beqlr-	%cr1			/* Yes, do nothing */
59		b	cb_memset
60END(bzero)
61
62ENTRY(memset)
63		cmplwi	%cr1, %r5, 0
64		mr.	%r0, %r4
65		mr	%r8, %r3
66		beqlr-	%cr1			/* Nothing to do */
67
68		rlwimi	%r0, %r4, 8, 16, 23	/* word extend fill value */
69		rlwimi	%r0, %r0, 16, 0, 15
70		mr	%r4, %r5
71		bne-	simple_fill		/* =! 0, use trivial fill */
72cb_memset:
73
74/*----------------------------------------------------------------------*/
75#ifndef _KERNEL
76#ifdef __PIC__
77		/* First get cache line size */
78		mflr	%r9
79		bcl	20,31,1f
801:		mflr	%r10
81		mtlr	%r9
82		addis	%r10,%r10,_libc_powerpc_cache_info+CACHE_INFO_DCACHE_LINE_SIZE-1b@ha
83		lwz	%r9,_libc_powerpc_cache_info+CACHE_INFO_DCACHE_LINE_SIZE-1b@l(%r10)
84#else
85		lis	%r10,_libc_powerpc_cache_info+CACHE_INFO_DCACHE_LINE_SIZE@ha
86		lwz	%r9,_libc_powerpc_cache_info+CACHE_INFO_DCACHE_LINE_SIZE@l(%r10)
87#endif
88		cmplwi	%cr1, %r9, 0		/* Unknown? */
89		beq-	simple_fill		/* a trivial fill routine */
90#else /* _KERNEL */
91#ifdef	MULTIPROCESSOR
92		mfsprg	%r10, 0			/* Get cpu_info pointer */
93#else
94		lis	%r10, cpu_info_store@ha
95		addi	%r10, %r10, cpu_info_store@l
96#endif
97		lwz	%r9, CPU_CI+CACHE_INFO_DCACHE_LINE_SIZE(%r10)	/* Load D$ line size */
98#endif /* _KERNEL */
99		cntlzw	%r10, %r9			/* Calculate shift.. */
100		li	%r6, 31
101		subf	%r10, %r10, %r6
102		/* Back in memory filling business */
103
104		cmplwi	%cr1, r_len, 0		/* Nothing to do? */
105		add	%r5, %r9, %r9
106		cmplw	r_len, %r5		/* <= 2*CL bytes to move? */
107		beqlr-	%cr1			/* then do nothing */
108
109		blt+	simple_fill		/* a trivial fill routine */
110
111		/* Word align the block, fill bytewise until dst even*/
112
113		andi.	%r5, r_dst, 0x03
114		li	%r6, 4
115		beq+	cb_aligned_w		/* already aligned to word? */
116
117		subf	%r5, %r5, %r6		/* bytes to fill to align4 */
118#if USE_STSWX
119		mtxer	%r5
120		stswx	%r0, 0, r_dst
121		add	r_dst, %r5, r_dst
122#else
123		mtctr	%r5
124
125		subi	r_dst, r_dst, 1
1261:		stbu	r_val, 1(r_dst)		/* Fill bytewise */
127		bdnz	1b
128
129		addi	r_dst, r_dst, 1
130#endif
131		subf	r_len, %r5, r_len
132
133cb_aligned_w:	/* Cache block align, fill wordwise until dst aligned */
134
135		/* I know I have something to do since we had > 2*CL initially */
136		/* so no need to check for r_len = 0 */
137
138		subi	%r6, %r9, 1		/* CL mask */
139		and.	%r5, r_dst, %r6
140		srwi	%r5, %r5, 2
141		srwi	%r6, %r9, 2
142		beq	cb_aligned_cb		/* already on CL boundary? */
143
144		subf	%r5, %r5, %r6		/* words to fill to alignment */
145		mtctr	%r5
146		slwi	%r5, %r5, 2
147		subf	r_len, %r5, r_len
148
149		subi	r_dst, r_dst, 4
1501:		stwu	r_val, 4(r_dst)		/* Fill wordwise */
151		bdnz	1b
152		addi	r_dst, r_dst, 4
153
154cb_aligned_cb:	/* no need to check r_len, see above */
155
156		srw.	%r5, r_len, %r10		/* Number of cache blocks */
157		mtctr	%r5
158		beq	cblocks_done
159
160		slw	%r5, %r5, %r10
161		subf	r_len, %r5, r_len
162
1631:		dcbz	0, r_dst		/* Clear blockwise */
164		add	r_dst, r_dst, %r9
165		bdnz	1b
166
167cblocks_done:	/* still CL aligned, but less than CL bytes left */
168		cmplwi	%cr1, r_len, 0
169		cmplwi	r_len, 8
170		beq-	%cr1, sf_return
171
172		blt-	sf_bytewise		/* <8 remaining? */
173		b	sf_aligned_w
174
175/*----------------------------------------------------------------------*/
176wbzero:		li	r_val, 0
177
178		cmplwi	r_len, 0
179		beqlr-				/* Nothing to do */
180
181simple_fill:
182#if USE_STSWX
183		cmplwi	%cr1, r_len, 12		/* < 12 bytes to move? */
184#else
185		cmplwi	%cr1, r_len, 8		/* < 8 bytes to move? */
186#endif
187		andi.	%r5, r_dst, 0x03		/* bytes to fill to align4 */
188		blt	%cr1, sf_bytewise	/* trivial byte mover */
189
190		li	%r6, 4
191		subf	%r5, %r5, %r6
192		beq+	sf_aligned_w		/* dest is word aligned */
193
194#if USE_STSWX
195		mtxer	%r5
196		stswx	%r0, 0, r_dst
197		add	r_dst, %r5, r_dst
198#else
199		mtctr	%r5			/* nope, then fill bytewise */
200		subi	r_dst, r_dst, 1		/* until it is */
2011:		stbu	r_val, 1(r_dst)
202		bdnz	1b
203
204		addi	r_dst, r_dst, 1
205#endif
206		subf	r_len, %r5, r_len
207
208sf_aligned_w:	/* no need to check r_len since it were >= 8 bytes initially */
209#if USE_STSWX
210		mr	%r6, %r0
211		mr	%r7, %r0
212
213		srwi	%r5, r_len, 3
214		mtctr	%r5
215
216		slwi	%r5, %r5, 3		/* adjust len */
217		subf.	r_len, %r5, r_len
218
2191:		stswi	%r6, r_dst, 8
220		addi	r_dst, r_dst, 8
221		bdnz	1b
222#else
223		srwi	%r5, r_len, 2		/* words to fill */
224		mtctr	%r5
225
226		slwi	%r5, %r5, 2
227		subf.	r_len, %r5, r_len	/* adjust len for fill */
228
229		subi	r_dst, r_dst, 4
2301:		stwu	r_val, 4(r_dst)
231		bdnz	1b
232		addi	r_dst, r_dst, 4
233#endif
234
235sf_word_done:	bne-	sf_bytewise
236
237sf_return:	mr	%r3, %r8			/* restore orig ptr */
238		blr				/* for memset functionality */
239
240sf_bytewise:
241#if USE_STSWX
242		mr	%r5, %r0
243		mr	%r6, %r0
244		mr	%r7, %r0
245
246		mtxer	r_len
247		stswx	%r5, 0, r_dst
248#else
249		mtctr	r_len
250
251		subi	r_dst, r_dst, 1
2521:		stbu	r_val, 1(r_dst)
253		bdnz	1b
254#endif
255		mr	%r3, %r8			/* restore orig ptr */
256		blr				/* for memset functionality */
257END(memset)
258
259/*----------------------------------------------------------------------*/
260#ifndef _KERNEL
261		.data
262		.p2align 2
263cache_info:	.long	-1, -1, -1, -1
264cache_sh:	.long	0
265
266#endif
267/*----------------------------------------------------------------------*/
268