xref: /netbsd-src/lib/libc/arch/powerpc/string/bzero.S (revision 116a2064fe9d0dfa6e5fffa7a1391610f738527d)
1*116a2064Schristos/*	$NetBSD: bzero.S,v 1.15 2016/12/19 14:30:23 christos Exp $ */
2feb89016Smjl
3feb89016Smjl/*-
499410184Ssalo * Copyright (C) 2001	Martin J. Laubach <mjl@NetBSD.org>
5feb89016Smjl * All rights reserved.
6feb89016Smjl *
7feb89016Smjl * Redistribution and use in source and binary forms, with or without
8feb89016Smjl * modification, are permitted provided that the following conditions
9feb89016Smjl * are met:
10feb89016Smjl * 1. Redistributions of source code must retain the above copyright
11feb89016Smjl *    notice, this list of conditions and the following disclaimer.
12feb89016Smjl * 2. Redistributions in binary form must reproduce the above copyright
13feb89016Smjl *    notice, this list of conditions and the following disclaimer in the
14feb89016Smjl *    documentation and/or other materials provided with the distribution.
15feb89016Smjl * 3. The name of the author may not be used to endorse or promote products
16feb89016Smjl *    derived from this software without specific prior written permission.
17feb89016Smjl *
18feb89016Smjl * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19feb89016Smjl * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20feb89016Smjl * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21feb89016Smjl * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22feb89016Smjl * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23feb89016Smjl * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24feb89016Smjl * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25feb89016Smjl * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26feb89016Smjl * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27feb89016Smjl * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28feb89016Smjl */
29feb89016Smjl/*----------------------------------------------------------------------*/
30feb89016Smjl
31feb89016Smjl#include <machine/asm.h>
32cf88c389Smatt
33cf88c389Smatt
34cf88c389Smatt#if defined(LIBC_SCCS) && !defined(lint)
35*116a2064Schristos__RCSID("$NetBSD: bzero.S,v 1.15 2016/12/19 14:30:23 christos Exp $")
36cf88c389Smatt#endif /* LIBC_SCCS && !lint */
37cf88c389Smatt
38bff961ddSmatt#include "assym.h"
39feb89016Smjl
40feb89016Smjl#define USE_STSWX 0	/* don't. slower than trivial copy loop */
41feb89016Smjl
42feb89016Smjl/*----------------------------------------------------------------------*/
43feb89016Smjl/*
441cddd41eSmatt     void bzero(void *b %r3, size_t len %r4);
451cddd41eSmatt     void * memset(void *b %r3, int c %r4, size_t len %r5);
46feb89016Smjl*/
47feb89016Smjl/*----------------------------------------------------------------------*/
48feb89016Smjl
491cddd41eSmatt#define r_dst	%r3
501cddd41eSmatt#define r_len	%r4
511cddd41eSmatt#define r_val	%r0
52feb89016Smjl
53feb89016Smjl		.text
54feb89016Smjl		.align 4
55feb89016SmjlENTRY(bzero)
56feb89016Smjl		li	r_val, 0		/* Value to stuff in */
57*116a2064Schristos		cmplwi	%cr1, %r4, 0		/* Zero length? */
58*116a2064Schristos		beqlr-	%cr1			/* Yes, do nothing */
59feb89016Smjl		b	cb_memset
60cf88c389SmattEND(bzero)
61feb89016Smjl
62feb89016SmjlENTRY(memset)
632599cc89Smatt		cmplwi	%cr1, %r5, 0
641cddd41eSmatt		mr.	%r0, %r4
651cddd41eSmatt		mr	%r8, %r3
662599cc89Smatt		beqlr-	%cr1			/* Nothing to do */
67feb89016Smjl
681cddd41eSmatt		rlwimi	%r0, %r4, 8, 16, 23	/* word extend fill value */
691cddd41eSmatt		rlwimi	%r0, %r0, 16, 0, 15
701cddd41eSmatt		mr	%r4, %r5
71feb89016Smjl		bne-	simple_fill		/* =! 0, use trivial fill */
72feb89016Smjlcb_memset:
73feb89016Smjl
74feb89016Smjl/*----------------------------------------------------------------------*/
757f0157c4Smjl#ifndef _KERNEL
764d12bfcdSjoerg#ifdef __PIC__
77bff961ddSmatt		/* First get cache line size */
788ef507a8Smatt		mflr	%r9
790bcd9d74Smatt		bcl	20,31,1f
80bff961ddSmatt1:		mflr	%r10
818ef507a8Smatt		mtlr	%r9
82bff961ddSmatt		addis	%r10,%r10,_libc_powerpc_cache_info+CACHE_INFO_DCACHE_LINE_SIZE-1b@ha
83bff961ddSmatt		lwz	%r9,_libc_powerpc_cache_info+CACHE_INFO_DCACHE_LINE_SIZE-1b@l(%r10)
84feb89016Smjl#else
85bff961ddSmatt		lis	%r10,_libc_powerpc_cache_info+CACHE_INFO_DCACHE_LINE_SIZE@ha
86bff961ddSmatt		lwz	%r9,_libc_powerpc_cache_info+CACHE_INFO_DCACHE_LINE_SIZE@l(%r10)
87feb89016Smjl#endif
88bff961ddSmatt		cmplwi	%cr1, %r9, 0		/* Unknown? */
89bff961ddSmatt		beq-	simple_fill		/* a trivial fill routine */
907f0157c4Smjl#else /* _KERNEL */
91bc403009Seeh#ifdef	MULTIPROCESSOR
921cddd41eSmatt		mfsprg	%r10, 0			/* Get cpu_info pointer */
937f0157c4Smjl#else
941cddd41eSmatt		lis	%r10, cpu_info_store@ha
951cddd41eSmatt		addi	%r10, %r10, cpu_info_store@l
967f0157c4Smjl#endif
97bff961ddSmatt		lwz	%r9, CPU_CI+CACHE_INFO_DCACHE_LINE_SIZE(%r10)	/* Load D$ line size */
98bff961ddSmatt#endif /* _KERNEL */
991cddd41eSmatt		cntlzw	%r10, %r9			/* Calculate shift.. */
1001cddd41eSmatt		li	%r6, 31
1011cddd41eSmatt		subf	%r10, %r10, %r6
102feb89016Smjl		/* Back in memory filling business */
103feb89016Smjl
1042599cc89Smatt		cmplwi	%cr1, r_len, 0		/* Nothing to do? */
1051cddd41eSmatt		add	%r5, %r9, %r9
1061cddd41eSmatt		cmplw	r_len, %r5		/* <= 2*CL bytes to move? */
1072599cc89Smatt		beqlr-	%cr1			/* then do nothing */
108feb89016Smjl
109feb89016Smjl		blt+	simple_fill		/* a trivial fill routine */
110feb89016Smjl
111feb89016Smjl		/* Word align the block, fill bytewise until dst even*/
112feb89016Smjl
1131cddd41eSmatt		andi.	%r5, r_dst, 0x03
1141cddd41eSmatt		li	%r6, 4
115feb89016Smjl		beq+	cb_aligned_w		/* already aligned to word? */
116feb89016Smjl
1171cddd41eSmatt		subf	%r5, %r5, %r6		/* bytes to fill to align4 */
118feb89016Smjl#if USE_STSWX
1191cddd41eSmatt		mtxer	%r5
1201cddd41eSmatt		stswx	%r0, 0, r_dst
1211cddd41eSmatt		add	r_dst, %r5, r_dst
122feb89016Smjl#else
1231cddd41eSmatt		mtctr	%r5
124feb89016Smjl
125feb89016Smjl		subi	r_dst, r_dst, 1
126feb89016Smjl1:		stbu	r_val, 1(r_dst)		/* Fill bytewise */
127feb89016Smjl		bdnz	1b
128feb89016Smjl
129feb89016Smjl		addi	r_dst, r_dst, 1
130feb89016Smjl#endif
1311cddd41eSmatt		subf	r_len, %r5, r_len
132feb89016Smjl
133feb89016Smjlcb_aligned_w:	/* Cache block align, fill wordwise until dst aligned */
134feb89016Smjl
135feb89016Smjl		/* I know I have something to do since we had > 2*CL initially */
136feb89016Smjl		/* so no need to check for r_len = 0 */
137feb89016Smjl
13894a50cb2Shannken		subi	%r6, %r9, 1		/* CL mask */
13994a50cb2Shannken		and.	%r5, r_dst, %r6
14094a50cb2Shannken		srwi	%r5, %r5, 2
1411cddd41eSmatt		srwi	%r6, %r9, 2
142feb89016Smjl		beq	cb_aligned_cb		/* already on CL boundary? */
143feb89016Smjl
1441cddd41eSmatt		subf	%r5, %r5, %r6		/* words to fill to alignment */
1451cddd41eSmatt		mtctr	%r5
1461cddd41eSmatt		slwi	%r5, %r5, 2
1471cddd41eSmatt		subf	r_len, %r5, r_len
148feb89016Smjl
149feb89016Smjl		subi	r_dst, r_dst, 4
150feb89016Smjl1:		stwu	r_val, 4(r_dst)		/* Fill wordwise */
151feb89016Smjl		bdnz	1b
152feb89016Smjl		addi	r_dst, r_dst, 4
153feb89016Smjl
154feb89016Smjlcb_aligned_cb:	/* no need to check r_len, see above */
155feb89016Smjl
1561cddd41eSmatt		srw.	%r5, r_len, %r10		/* Number of cache blocks */
1571cddd41eSmatt		mtctr	%r5
158feb89016Smjl		beq	cblocks_done
159feb89016Smjl
1601cddd41eSmatt		slw	%r5, %r5, %r10
1611cddd41eSmatt		subf	r_len, %r5, r_len
162feb89016Smjl
163feb89016Smjl1:		dcbz	0, r_dst		/* Clear blockwise */
1641cddd41eSmatt		add	r_dst, r_dst, %r9
165feb89016Smjl		bdnz	1b
166feb89016Smjl
167feb89016Smjlcblocks_done:	/* still CL aligned, but less than CL bytes left */
1682599cc89Smatt		cmplwi	%cr1, r_len, 0
169feb89016Smjl		cmplwi	r_len, 8
1702599cc89Smatt		beq-	%cr1, sf_return
171feb89016Smjl
172feb89016Smjl		blt-	sf_bytewise		/* <8 remaining? */
173feb89016Smjl		b	sf_aligned_w
174feb89016Smjl
175feb89016Smjl/*----------------------------------------------------------------------*/
176feb89016Smjlwbzero:		li	r_val, 0
177feb89016Smjl
178feb89016Smjl		cmplwi	r_len, 0
179feb89016Smjl		beqlr-				/* Nothing to do */
180feb89016Smjl
181feb89016Smjlsimple_fill:
182feb89016Smjl#if USE_STSWX
1832599cc89Smatt		cmplwi	%cr1, r_len, 12		/* < 12 bytes to move? */
184feb89016Smjl#else
1852599cc89Smatt		cmplwi	%cr1, r_len, 8		/* < 8 bytes to move? */
186feb89016Smjl#endif
1871cddd41eSmatt		andi.	%r5, r_dst, 0x03		/* bytes to fill to align4 */
1882599cc89Smatt		blt	%cr1, sf_bytewise	/* trivial byte mover */
189feb89016Smjl
1901cddd41eSmatt		li	%r6, 4
1911cddd41eSmatt		subf	%r5, %r5, %r6
192feb89016Smjl		beq+	sf_aligned_w		/* dest is word aligned */
193feb89016Smjl
194feb89016Smjl#if USE_STSWX
1951cddd41eSmatt		mtxer	%r5
1961cddd41eSmatt		stswx	%r0, 0, r_dst
1971cddd41eSmatt		add	r_dst, %r5, r_dst
198feb89016Smjl#else
1991cddd41eSmatt		mtctr	%r5			/* nope, then fill bytewise */
200feb89016Smjl		subi	r_dst, r_dst, 1		/* until it is */
201feb89016Smjl1:		stbu	r_val, 1(r_dst)
202feb89016Smjl		bdnz	1b
203feb89016Smjl
204feb89016Smjl		addi	r_dst, r_dst, 1
205feb89016Smjl#endif
2061cddd41eSmatt		subf	r_len, %r5, r_len
207feb89016Smjl
208feb89016Smjlsf_aligned_w:	/* no need to check r_len since it were >= 8 bytes initially */
209feb89016Smjl#if USE_STSWX
2101cddd41eSmatt		mr	%r6, %r0
2111cddd41eSmatt		mr	%r7, %r0
212feb89016Smjl
2131cddd41eSmatt		srwi	%r5, r_len, 3
2141cddd41eSmatt		mtctr	%r5
215feb89016Smjl
2161cddd41eSmatt		slwi	%r5, %r5, 3		/* adjust len */
2171cddd41eSmatt		subf.	r_len, %r5, r_len
218feb89016Smjl
2191cddd41eSmatt1:		stswi	%r6, r_dst, 8
220feb89016Smjl		addi	r_dst, r_dst, 8
221feb89016Smjl		bdnz	1b
222feb89016Smjl#else
2231cddd41eSmatt		srwi	%r5, r_len, 2		/* words to fill */
2241cddd41eSmatt		mtctr	%r5
225feb89016Smjl
2261cddd41eSmatt		slwi	%r5, %r5, 2
2271cddd41eSmatt		subf.	r_len, %r5, r_len	/* adjust len for fill */
228feb89016Smjl
229feb89016Smjl		subi	r_dst, r_dst, 4
230feb89016Smjl1:		stwu	r_val, 4(r_dst)
231feb89016Smjl		bdnz	1b
232feb89016Smjl		addi	r_dst, r_dst, 4
233feb89016Smjl#endif
234feb89016Smjl
235feb89016Smjlsf_word_done:	bne-	sf_bytewise
236feb89016Smjl
2371cddd41eSmattsf_return:	mr	%r3, %r8			/* restore orig ptr */
238feb89016Smjl		blr				/* for memset functionality */
239feb89016Smjl
240feb89016Smjlsf_bytewise:
241feb89016Smjl#if USE_STSWX
2421cddd41eSmatt		mr	%r5, %r0
2431cddd41eSmatt		mr	%r6, %r0
2441cddd41eSmatt		mr	%r7, %r0
245feb89016Smjl
246feb89016Smjl		mtxer	r_len
2471cddd41eSmatt		stswx	%r5, 0, r_dst
248feb89016Smjl#else
249feb89016Smjl		mtctr	r_len
250feb89016Smjl
251feb89016Smjl		subi	r_dst, r_dst, 1
252feb89016Smjl1:		stbu	r_val, 1(r_dst)
253feb89016Smjl		bdnz	1b
254feb89016Smjl#endif
2551cddd41eSmatt		mr	%r3, %r8			/* restore orig ptr */
256feb89016Smjl		blr				/* for memset functionality */
257cf88c389SmattEND(memset)
258feb89016Smjl
259feb89016Smjl/*----------------------------------------------------------------------*/
260625c08cfSmjl#ifndef _KERNEL
261feb89016Smjl		.data
2620bcd9d74Smatt		.p2align 2
263bc403009Seehcache_info:	.long	-1, -1, -1, -1
264feb89016Smjlcache_sh:	.long	0
265feb89016Smjl
266625c08cfSmjl#endif
267feb89016Smjl/*----------------------------------------------------------------------*/
268