1*116a2064Schristos/* $NetBSD: bzero.S,v 1.15 2016/12/19 14:30:23 christos Exp $ */ 2feb89016Smjl 3feb89016Smjl/*- 499410184Ssalo * Copyright (C) 2001 Martin J. Laubach <mjl@NetBSD.org> 5feb89016Smjl * All rights reserved. 6feb89016Smjl * 7feb89016Smjl * Redistribution and use in source and binary forms, with or without 8feb89016Smjl * modification, are permitted provided that the following conditions 9feb89016Smjl * are met: 10feb89016Smjl * 1. Redistributions of source code must retain the above copyright 11feb89016Smjl * notice, this list of conditions and the following disclaimer. 12feb89016Smjl * 2. Redistributions in binary form must reproduce the above copyright 13feb89016Smjl * notice, this list of conditions and the following disclaimer in the 14feb89016Smjl * documentation and/or other materials provided with the distribution. 15feb89016Smjl * 3. The name of the author may not be used to endorse or promote products 16feb89016Smjl * derived from this software without specific prior written permission. 17feb89016Smjl * 18feb89016Smjl * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 19feb89016Smjl * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 20feb89016Smjl * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 21feb89016Smjl * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 22feb89016Smjl * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 23feb89016Smjl * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24feb89016Smjl * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25feb89016Smjl * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26feb89016Smjl * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27feb89016Smjl * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28feb89016Smjl */ 29feb89016Smjl/*----------------------------------------------------------------------*/ 30feb89016Smjl 31feb89016Smjl#include <machine/asm.h> 32cf88c389Smatt 33cf88c389Smatt 34cf88c389Smatt#if defined(LIBC_SCCS) && !defined(lint) 35*116a2064Schristos__RCSID("$NetBSD: bzero.S,v 1.15 2016/12/19 14:30:23 christos Exp $") 36cf88c389Smatt#endif /* LIBC_SCCS && !lint */ 37cf88c389Smatt 38bff961ddSmatt#include "assym.h" 39feb89016Smjl 40feb89016Smjl#define USE_STSWX 0 /* don't. slower than trivial copy loop */ 41feb89016Smjl 42feb89016Smjl/*----------------------------------------------------------------------*/ 43feb89016Smjl/* 441cddd41eSmatt void bzero(void *b %r3, size_t len %r4); 451cddd41eSmatt void * memset(void *b %r3, int c %r4, size_t len %r5); 46feb89016Smjl*/ 47feb89016Smjl/*----------------------------------------------------------------------*/ 48feb89016Smjl 491cddd41eSmatt#define r_dst %r3 501cddd41eSmatt#define r_len %r4 511cddd41eSmatt#define r_val %r0 52feb89016Smjl 53feb89016Smjl .text 54feb89016Smjl .align 4 55feb89016SmjlENTRY(bzero) 56feb89016Smjl li r_val, 0 /* Value to stuff in */ 57*116a2064Schristos cmplwi %cr1, %r4, 0 /* Zero length? */ 58*116a2064Schristos beqlr- %cr1 /* Yes, do nothing */ 59feb89016Smjl b cb_memset 60cf88c389SmattEND(bzero) 61feb89016Smjl 62feb89016SmjlENTRY(memset) 632599cc89Smatt cmplwi %cr1, %r5, 0 641cddd41eSmatt mr. %r0, %r4 651cddd41eSmatt mr %r8, %r3 662599cc89Smatt beqlr- %cr1 /* Nothing to do */ 67feb89016Smjl 681cddd41eSmatt rlwimi %r0, %r4, 8, 16, 23 /* word extend fill value */ 691cddd41eSmatt rlwimi %r0, %r0, 16, 0, 15 701cddd41eSmatt mr %r4, %r5 71feb89016Smjl bne- simple_fill /* =! 0, use trivial fill */ 72feb89016Smjlcb_memset: 73feb89016Smjl 74feb89016Smjl/*----------------------------------------------------------------------*/ 757f0157c4Smjl#ifndef _KERNEL 764d12bfcdSjoerg#ifdef __PIC__ 77bff961ddSmatt /* First get cache line size */ 788ef507a8Smatt mflr %r9 790bcd9d74Smatt bcl 20,31,1f 80bff961ddSmatt1: mflr %r10 818ef507a8Smatt mtlr %r9 82bff961ddSmatt addis %r10,%r10,_libc_powerpc_cache_info+CACHE_INFO_DCACHE_LINE_SIZE-1b@ha 83bff961ddSmatt lwz %r9,_libc_powerpc_cache_info+CACHE_INFO_DCACHE_LINE_SIZE-1b@l(%r10) 84feb89016Smjl#else 85bff961ddSmatt lis %r10,_libc_powerpc_cache_info+CACHE_INFO_DCACHE_LINE_SIZE@ha 86bff961ddSmatt lwz %r9,_libc_powerpc_cache_info+CACHE_INFO_DCACHE_LINE_SIZE@l(%r10) 87feb89016Smjl#endif 88bff961ddSmatt cmplwi %cr1, %r9, 0 /* Unknown? */ 89bff961ddSmatt beq- simple_fill /* a trivial fill routine */ 907f0157c4Smjl#else /* _KERNEL */ 91bc403009Seeh#ifdef MULTIPROCESSOR 921cddd41eSmatt mfsprg %r10, 0 /* Get cpu_info pointer */ 937f0157c4Smjl#else 941cddd41eSmatt lis %r10, cpu_info_store@ha 951cddd41eSmatt addi %r10, %r10, cpu_info_store@l 967f0157c4Smjl#endif 97bff961ddSmatt lwz %r9, CPU_CI+CACHE_INFO_DCACHE_LINE_SIZE(%r10) /* Load D$ line size */ 98bff961ddSmatt#endif /* _KERNEL */ 991cddd41eSmatt cntlzw %r10, %r9 /* Calculate shift.. */ 1001cddd41eSmatt li %r6, 31 1011cddd41eSmatt subf %r10, %r10, %r6 102feb89016Smjl /* Back in memory filling business */ 103feb89016Smjl 1042599cc89Smatt cmplwi %cr1, r_len, 0 /* Nothing to do? */ 1051cddd41eSmatt add %r5, %r9, %r9 1061cddd41eSmatt cmplw r_len, %r5 /* <= 2*CL bytes to move? */ 1072599cc89Smatt beqlr- %cr1 /* then do nothing */ 108feb89016Smjl 109feb89016Smjl blt+ simple_fill /* a trivial fill routine */ 110feb89016Smjl 111feb89016Smjl /* Word align the block, fill bytewise until dst even*/ 112feb89016Smjl 1131cddd41eSmatt andi. %r5, r_dst, 0x03 1141cddd41eSmatt li %r6, 4 115feb89016Smjl beq+ cb_aligned_w /* already aligned to word? */ 116feb89016Smjl 1171cddd41eSmatt subf %r5, %r5, %r6 /* bytes to fill to align4 */ 118feb89016Smjl#if USE_STSWX 1191cddd41eSmatt mtxer %r5 1201cddd41eSmatt stswx %r0, 0, r_dst 1211cddd41eSmatt add r_dst, %r5, r_dst 122feb89016Smjl#else 1231cddd41eSmatt mtctr %r5 124feb89016Smjl 125feb89016Smjl subi r_dst, r_dst, 1 126feb89016Smjl1: stbu r_val, 1(r_dst) /* Fill bytewise */ 127feb89016Smjl bdnz 1b 128feb89016Smjl 129feb89016Smjl addi r_dst, r_dst, 1 130feb89016Smjl#endif 1311cddd41eSmatt subf r_len, %r5, r_len 132feb89016Smjl 133feb89016Smjlcb_aligned_w: /* Cache block align, fill wordwise until dst aligned */ 134feb89016Smjl 135feb89016Smjl /* I know I have something to do since we had > 2*CL initially */ 136feb89016Smjl /* so no need to check for r_len = 0 */ 137feb89016Smjl 13894a50cb2Shannken subi %r6, %r9, 1 /* CL mask */ 13994a50cb2Shannken and. %r5, r_dst, %r6 14094a50cb2Shannken srwi %r5, %r5, 2 1411cddd41eSmatt srwi %r6, %r9, 2 142feb89016Smjl beq cb_aligned_cb /* already on CL boundary? */ 143feb89016Smjl 1441cddd41eSmatt subf %r5, %r5, %r6 /* words to fill to alignment */ 1451cddd41eSmatt mtctr %r5 1461cddd41eSmatt slwi %r5, %r5, 2 1471cddd41eSmatt subf r_len, %r5, r_len 148feb89016Smjl 149feb89016Smjl subi r_dst, r_dst, 4 150feb89016Smjl1: stwu r_val, 4(r_dst) /* Fill wordwise */ 151feb89016Smjl bdnz 1b 152feb89016Smjl addi r_dst, r_dst, 4 153feb89016Smjl 154feb89016Smjlcb_aligned_cb: /* no need to check r_len, see above */ 155feb89016Smjl 1561cddd41eSmatt srw. %r5, r_len, %r10 /* Number of cache blocks */ 1571cddd41eSmatt mtctr %r5 158feb89016Smjl beq cblocks_done 159feb89016Smjl 1601cddd41eSmatt slw %r5, %r5, %r10 1611cddd41eSmatt subf r_len, %r5, r_len 162feb89016Smjl 163feb89016Smjl1: dcbz 0, r_dst /* Clear blockwise */ 1641cddd41eSmatt add r_dst, r_dst, %r9 165feb89016Smjl bdnz 1b 166feb89016Smjl 167feb89016Smjlcblocks_done: /* still CL aligned, but less than CL bytes left */ 1682599cc89Smatt cmplwi %cr1, r_len, 0 169feb89016Smjl cmplwi r_len, 8 1702599cc89Smatt beq- %cr1, sf_return 171feb89016Smjl 172feb89016Smjl blt- sf_bytewise /* <8 remaining? */ 173feb89016Smjl b sf_aligned_w 174feb89016Smjl 175feb89016Smjl/*----------------------------------------------------------------------*/ 176feb89016Smjlwbzero: li r_val, 0 177feb89016Smjl 178feb89016Smjl cmplwi r_len, 0 179feb89016Smjl beqlr- /* Nothing to do */ 180feb89016Smjl 181feb89016Smjlsimple_fill: 182feb89016Smjl#if USE_STSWX 1832599cc89Smatt cmplwi %cr1, r_len, 12 /* < 12 bytes to move? */ 184feb89016Smjl#else 1852599cc89Smatt cmplwi %cr1, r_len, 8 /* < 8 bytes to move? */ 186feb89016Smjl#endif 1871cddd41eSmatt andi. %r5, r_dst, 0x03 /* bytes to fill to align4 */ 1882599cc89Smatt blt %cr1, sf_bytewise /* trivial byte mover */ 189feb89016Smjl 1901cddd41eSmatt li %r6, 4 1911cddd41eSmatt subf %r5, %r5, %r6 192feb89016Smjl beq+ sf_aligned_w /* dest is word aligned */ 193feb89016Smjl 194feb89016Smjl#if USE_STSWX 1951cddd41eSmatt mtxer %r5 1961cddd41eSmatt stswx %r0, 0, r_dst 1971cddd41eSmatt add r_dst, %r5, r_dst 198feb89016Smjl#else 1991cddd41eSmatt mtctr %r5 /* nope, then fill bytewise */ 200feb89016Smjl subi r_dst, r_dst, 1 /* until it is */ 201feb89016Smjl1: stbu r_val, 1(r_dst) 202feb89016Smjl bdnz 1b 203feb89016Smjl 204feb89016Smjl addi r_dst, r_dst, 1 205feb89016Smjl#endif 2061cddd41eSmatt subf r_len, %r5, r_len 207feb89016Smjl 208feb89016Smjlsf_aligned_w: /* no need to check r_len since it were >= 8 bytes initially */ 209feb89016Smjl#if USE_STSWX 2101cddd41eSmatt mr %r6, %r0 2111cddd41eSmatt mr %r7, %r0 212feb89016Smjl 2131cddd41eSmatt srwi %r5, r_len, 3 2141cddd41eSmatt mtctr %r5 215feb89016Smjl 2161cddd41eSmatt slwi %r5, %r5, 3 /* adjust len */ 2171cddd41eSmatt subf. r_len, %r5, r_len 218feb89016Smjl 2191cddd41eSmatt1: stswi %r6, r_dst, 8 220feb89016Smjl addi r_dst, r_dst, 8 221feb89016Smjl bdnz 1b 222feb89016Smjl#else 2231cddd41eSmatt srwi %r5, r_len, 2 /* words to fill */ 2241cddd41eSmatt mtctr %r5 225feb89016Smjl 2261cddd41eSmatt slwi %r5, %r5, 2 2271cddd41eSmatt subf. r_len, %r5, r_len /* adjust len for fill */ 228feb89016Smjl 229feb89016Smjl subi r_dst, r_dst, 4 230feb89016Smjl1: stwu r_val, 4(r_dst) 231feb89016Smjl bdnz 1b 232feb89016Smjl addi r_dst, r_dst, 4 233feb89016Smjl#endif 234feb89016Smjl 235feb89016Smjlsf_word_done: bne- sf_bytewise 236feb89016Smjl 2371cddd41eSmattsf_return: mr %r3, %r8 /* restore orig ptr */ 238feb89016Smjl blr /* for memset functionality */ 239feb89016Smjl 240feb89016Smjlsf_bytewise: 241feb89016Smjl#if USE_STSWX 2421cddd41eSmatt mr %r5, %r0 2431cddd41eSmatt mr %r6, %r0 2441cddd41eSmatt mr %r7, %r0 245feb89016Smjl 246feb89016Smjl mtxer r_len 2471cddd41eSmatt stswx %r5, 0, r_dst 248feb89016Smjl#else 249feb89016Smjl mtctr r_len 250feb89016Smjl 251feb89016Smjl subi r_dst, r_dst, 1 252feb89016Smjl1: stbu r_val, 1(r_dst) 253feb89016Smjl bdnz 1b 254feb89016Smjl#endif 2551cddd41eSmatt mr %r3, %r8 /* restore orig ptr */ 256feb89016Smjl blr /* for memset functionality */ 257cf88c389SmattEND(memset) 258feb89016Smjl 259feb89016Smjl/*----------------------------------------------------------------------*/ 260625c08cfSmjl#ifndef _KERNEL 261feb89016Smjl .data 2620bcd9d74Smatt .p2align 2 263bc403009Seehcache_info: .long -1, -1, -1, -1 264feb89016Smjlcache_sh: .long 0 265feb89016Smjl 266625c08cfSmjl#endif 267feb89016Smjl/*----------------------------------------------------------------------*/ 268