/* $NetBSD: bzero.S,v 1.4 2002/03/12 22:08:22 eeh Exp $ */ /*- * Copyright (C) 2001 Martin J. Laubach * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ /*----------------------------------------------------------------------*/ #include #ifdef _KERNEL #include #endif #define USE_STSWX 0 /* don't. slower than trivial copy loop */ /*----------------------------------------------------------------------*/ /* void bzero(void *b r3, size_t len r4); void * memset(void *b r3, int c r4, size_t len r5); */ /*----------------------------------------------------------------------*/ #define r_dst r3 #define r_len r4 #define r_val r0 .text .align 4 ENTRY(bzero) li r_val, 0 /* Value to stuff in */ b cb_memset ENTRY(memset) cmplwi cr1, r5, 0 mr. r0, r4 mr r8, r3 beqlr- cr1 /* Nothing to do */ rlwimi r0, r4, 8, 16, 23 /* word extend fill value */ rlwimi r0, r0, 16, 0, 15 mr r4, r5 bne- simple_fill /* =! 0, use trivial fill */ cb_memset: /*----------------------------------------------------------------------*/ #ifndef _KERNEL /* First find out cache line size */ #ifdef PIC mflr r9 bl _GLOBAL_OFFSET_TABLE_@local-4 mflr r10 mtlr r9 lwz r5,cache_info@got(r10) #else lis r5,cache_info@h ori r5,r5,cache_info@l #endif lwz r6, 4(r5) cmpwi r6, -1 bne+ cb_cacheline_known /*----------------------------------------------------------------------*/ #define CTL_MACHDEP 7 #define CPU_CACHELINE 1 #define CPU_CACHEINFO 5 #define STKFRAME_SZ 48 #define MIB 8 #define OLDPLEN 16 #define R3_SAVE 20 #define R4_SAVE 24 #define R0_SAVE 28 #define R8_SAVE 32 mflr r6 stw r6, 4(r1) stwu r1, -STKFRAME_SZ(r1) stw r8, R8_SAVE(r1) stw r3, R3_SAVE(r1) stw r4, R4_SAVE(r1) stw r0, R0_SAVE(r1) li r0, CTL_MACHDEP /* Construct MIB */ stw r0, MIB(r1) li r0, CPU_CACHEINFO stw r0, MIB+4(r1) li r0, 4*4 /* Oldlenp := 4*4 */ stw r0, OLDPLEN(r1) addi r3, r1, MIB li r4, 2 /* namelen */ /* r5 already contains &cache_info */ addi r6, r1, OLDPLEN li r7, 0 li r8, 0 bl PIC_PLT(_C_LABEL(sysctl)) cmpwi r3, 0 /* Check result */ beq 1f /* Failure, try older sysctl */ li r0, CTL_MACHDEP /* Construct MIB */ stw r0, MIB(r1) li r0, CPU_CACHELINE stw r0, MIB+4(r1) li r0, 4 /* Oldlenp := 4 */ stw r0, OLDPLEN(r1) addi r3, r1, MIB li r4, 2 /* namelen */ #ifdef PIC mflr r9 bl _GLOBAL_OFFSET_TABLE_@local-4 mflr r10 mtlr r9 lwz r5,cache_info@got(r10) addi r5, r5, 4 #else lis r5,cache_info+4@h ori r5,r5,cache_info+4@l #endif addi r6, r1, OLDPLEN li r7, 0 li r8, 0 bl PIC_PLT(_C_LABEL(sysctl)) 1: lwz r8, R8_SAVE(r1) lwz r3, R3_SAVE(r1) lwz r4, R4_SAVE(r1) lwz r0, R0_SAVE(r1) #ifdef PIC bl _GLOBAL_OFFSET_TABLE_@local-4 mflr r10 lwz r9, cache_info@got(r10) lwz r9, 4(r9) #else lis r5, cache_info+4@ha lwz r9, cache_info+4@l(r5) #endif la r1, STKFRAME_SZ(r1) lwz r5, 4(r1) mtlr r5 cntlzw r6, r9 /* compute shift value */ li r5, 31 subf r5, r6, r5 #ifdef PIC lwz r6, cache_sh@got(r10) stw r5, 0(r6) #else lis r6, cache_sh@ha stw r5, cache_sh@l(r6) #endif /*----------------------------------------------------------------------*/ /* Okay, we know the cache line size (r9) and shift value (r10) */ cb_cacheline_known: #ifdef PIC lwz r5, cache_info@got(r10) lwz r9, 4(r5) lwz r5, cache_sh@got(r10) lwz r10, 0(r5) #else lis r9, cache_info+4@ha lwz r9, cache_info+4@l(r9) lis r10, cache_sh@ha lwz r10, cache_sh@l(r10) #endif #else /* _KERNEL */ #ifdef MULTIPROCESSOR mfspr r10, 0 /* Get cpu_info pointer */ #else lis r10, cpu_info_store@ha addi r10, r10, cpu_info_store@l #endif lwz r9, CPU_CI+4(r10) /* Load D$ line size */ cntlzw r10, r9 /* Calculate shift.. */ li r6, 31 subf r10, r10, r6 #endif /* _KERNEL */ /* Back in memory filling business */ cmplwi cr1, r_len, 0 /* Nothing to do? */ add r5, r9, r9 cmplw r_len, r5 /* <= 2*CL bytes to move? */ beqlr- cr1 /* then do nothing */ blt+ simple_fill /* a trivial fill routine */ /* Word align the block, fill bytewise until dst even*/ andi. r5, r_dst, 0x03 li r6, 4 beq+ cb_aligned_w /* already aligned to word? */ subf r5, r5, r6 /* bytes to fill to align4 */ #if USE_STSWX mtxer r5 stswx r0, 0, r_dst add r_dst, r5, r_dst #else mtctr r5 subi r_dst, r_dst, 1 1: stbu r_val, 1(r_dst) /* Fill bytewise */ bdnz 1b addi r_dst, r_dst, 1 #endif subf r_len, r5, r_len cb_aligned_w: /* Cache block align, fill wordwise until dst aligned */ /* I know I have something to do since we had > 2*CL initially */ /* so no need to check for r_len = 0 */ rlwinm. r5, r_dst, 30, 29, 31 srwi r6, r9, 2 beq cb_aligned_cb /* already on CL boundary? */ subf r5, r5, r6 /* words to fill to alignment */ mtctr r5 slwi r5, r5, 2 subf r_len, r5, r_len subi r_dst, r_dst, 4 1: stwu r_val, 4(r_dst) /* Fill wordwise */ bdnz 1b addi r_dst, r_dst, 4 cb_aligned_cb: /* no need to check r_len, see above */ srw. r5, r_len, r10 /* Number of cache blocks */ mtctr r5 beq cblocks_done slw r5, r5, r10 subf r_len, r5, r_len 1: dcbz 0, r_dst /* Clear blockwise */ add r_dst, r_dst, r9 bdnz 1b cblocks_done: /* still CL aligned, but less than CL bytes left */ cmplwi cr1, r_len, 0 cmplwi r_len, 8 beq- cr1, sf_return blt- sf_bytewise /* <8 remaining? */ b sf_aligned_w /*----------------------------------------------------------------------*/ wbzero: li r_val, 0 cmplwi r_len, 0 beqlr- /* Nothing to do */ simple_fill: #if USE_STSWX cmplwi cr1, r_len, 12 /* < 12 bytes to move? */ #else cmplwi cr1, r_len, 8 /* < 8 bytes to move? */ #endif andi. r5, r_dst, 0x03 /* bytes to fill to align4 */ blt cr1, sf_bytewise /* trivial byte mover */ li r6, 4 subf r5, r5, r6 beq+ sf_aligned_w /* dest is word aligned */ #if USE_STSWX mtxer r5 stswx r0, 0, r_dst add r_dst, r5, r_dst #else mtctr r5 /* nope, then fill bytewise */ subi r_dst, r_dst, 1 /* until it is */ 1: stbu r_val, 1(r_dst) bdnz 1b addi r_dst, r_dst, 1 #endif subf r_len, r5, r_len sf_aligned_w: /* no need to check r_len since it were >= 8 bytes initially */ #if USE_STSWX mr r6, r0 mr r7, r0 srwi r5, r_len, 3 mtctr r5 slwi r5, r5, 3 /* adjust len */ subf. r_len, r5, r_len 1: stswi r6, r_dst, 8 addi r_dst, r_dst, 8 bdnz 1b #else srwi r5, r_len, 2 /* words to fill */ mtctr r5 slwi r5, r5, 2 subf. r_len, r5, r_len /* adjust len for fill */ subi r_dst, r_dst, 4 1: stwu r_val, 4(r_dst) bdnz 1b addi r_dst, r_dst, 4 #endif sf_word_done: bne- sf_bytewise sf_return: mr r3, r8 /* restore orig ptr */ blr /* for memset functionality */ sf_bytewise: #if USE_STSWX mr r5, r0 mr r6, r0 mr r7, r0 mtxer r_len stswx r5, 0, r_dst #else mtctr r_len subi r_dst, r_dst, 1 1: stbu r_val, 1(r_dst) bdnz 1b #endif mr r3, r8 /* restore orig ptr */ blr /* for memset functionality */ /*----------------------------------------------------------------------*/ #ifndef _KERNEL .data cache_info: .long -1, -1, -1, -1 cache_sh: .long 0 #endif /*----------------------------------------------------------------------*/