1/* $NetBSD: bzero.S,v 1.3 2001/11/30 02:25:50 mjl Exp $ */ 2 3/*- 4 * Copyright (C) 2001 Martin J. Laubach <mjl@netbsd.org> 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. The name of the author may not be used to endorse or promote products 16 * derived from this software without specific prior written permission. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 19 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 20 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 21 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 23 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 */ 29/*----------------------------------------------------------------------*/ 30 31#include <machine/asm.h> 32#ifdef _KERNEL 33#include <assym.h> 34#endif 35 36#define USE_STSWX 0 /* don't. slower than trivial copy loop */ 37 38/*----------------------------------------------------------------------*/ 39/* 40 void bzero(void *b r3, size_t len r4); 41 void * memset(void *b r3, int c r4, size_t len r5); 42*/ 43/*----------------------------------------------------------------------*/ 44 45#define r_dst r3 46#define r_len r4 47#define r_val r0 48 49 .text 50 .align 4 51ENTRY(bzero) 52 li r_val, 0 /* Value to stuff in */ 53 b cb_memset 54 55ENTRY(memset) 56 cmplwi cr1, r5, 0 57 mr. r0, r4 58 mr r8, r3 59 beqlr- cr1 /* Nothing to do */ 60 61 rlwimi r0, r4, 8, 16, 23 /* word extend fill value */ 62 rlwimi r0, r0, 16, 0, 15 63 mr r4, r5 64 bne- simple_fill /* =! 0, use trivial fill */ 65cb_memset: 66 67/*----------------------------------------------------------------------*/ 68#ifndef _KERNEL 69 /* First find out cache line size */ 70#ifdef PIC 71 mflr r9 72 bl _GLOBAL_OFFSET_TABLE_@local-4 73 mflr r10 74 mtlr r9 75 lwz r5,cache_size@got(r10) 76#else 77 lis r5,cache_size@h 78 ori r5,r5,cache_size@l 79#endif 80 lwz r6, 0(r5) 81 cmpwi r6, -1 82 bne+ cb_cacheline_known 83 84/*----------------------------------------------------------------------*/ 85#define CTL_MACHDEP 7 86#define CPU_CACHELINE 1 87 88#define STKFRAME_SZ 48 89#define MIB 8 90#define OLDPLEN 16 91#define R3_SAVE 20 92#define R4_SAVE 24 93#define R0_SAVE 28 94#define R8_SAVE 32 95 96 mflr r6 97 stw r6, 4(r1) 98 stwu r1, -STKFRAME_SZ(r1) 99 100 stw r8, R8_SAVE(r1) 101 stw r3, R3_SAVE(r1) 102 stw r4, R4_SAVE(r1) 103 stw r0, R0_SAVE(r1) 104 105 li r0, CTL_MACHDEP /* Construct MIB */ 106 stw r0, MIB(r1) 107 li r0, CPU_CACHELINE 108 stw r0, MIB+4(r1) 109 110 li r0, 4 /* Oldlenp := 4 */ 111 stw r0, OLDPLEN(r1) 112 113 addi r3, r1, MIB 114 li r4, 2 /* namelen */ 115 /* r5 already contains &cache_size */ 116 addi r6, r1, OLDPLEN 117 li r7, 0 118 li r8, 0 119 bl PIC_PLT(_C_LABEL(sysctl)) 120 121 lwz r8, R8_SAVE(r1) 122 lwz r3, R3_SAVE(r1) 123 lwz r4, R4_SAVE(r1) 124 lwz r0, R0_SAVE(r1) 125 126#ifdef PIC 127 bl _GLOBAL_OFFSET_TABLE_@local-4 128 mflr r10 129 lwz r9, cache_size@got(r10) 130 lwz r9, 0(r9) 131#else 132 lis r5, cache_size@ha 133 lwz r9, cache_size@l(r5) 134#endif 135 la r1, STKFRAME_SZ(r1) 136 lwz r5, 4(r1) 137 mtlr r5 138 139 cntlzw r6, r9 /* compute shift value */ 140 li r5, 31 141 subf r5, r6, r5 142 143#ifdef PIC 144 lwz r6, cache_sh@got(r10) 145 stw r5, 0(r6) 146#else 147 lis r6, cache_sh@ha 148 stw r5, cache_sh@l(r6) 149#endif 150/*----------------------------------------------------------------------*/ 151/* Okay, we know the cache line size (r9) and shift value (r10) */ 152cb_cacheline_known: 153#ifdef PIC 154 lwz r5, cache_size@got(r10) 155 lwz r9, 0(r5) 156 lwz r5, cache_sh@got(r10) 157 lwz r10, 0(r5) 158#else 159 lis r9, cache_size@ha 160 lwz r9, cache_size@l(r9) 161 lis r10, cache_sh@ha 162 lwz r10, cache_sh@l(r10) 163#endif 164 165#else /* _KERNEL */ 166 li r9, CACHELINESIZE 167#if CACHELINESIZE == 32 168#define CACHELINESHIFT 5 169#else 170#error Define CACHELINESHIFT for your CACHELINESIZE 171#endif 172 li r10, CACHELINESHIFT 173#endif /* _KERNEL */ 174 /* Back in memory filling business */ 175 176 cmplwi cr1, r_len, 0 /* Nothing to do? */ 177 add r5, r9, r9 178 cmplw r_len, r5 /* <= 2*CL bytes to move? */ 179 beqlr- cr1 /* then do nothing */ 180 181 blt+ simple_fill /* a trivial fill routine */ 182 183 /* Word align the block, fill bytewise until dst even*/ 184 185 andi. r5, r_dst, 0x03 186 li r6, 4 187 beq+ cb_aligned_w /* already aligned to word? */ 188 189 subf r5, r5, r6 /* bytes to fill to align4 */ 190#if USE_STSWX 191 mtxer r5 192 stswx r0, 0, r_dst 193 add r_dst, r5, r_dst 194#else 195 mtctr r5 196 197 subi r_dst, r_dst, 1 1981: stbu r_val, 1(r_dst) /* Fill bytewise */ 199 bdnz 1b 200 201 addi r_dst, r_dst, 1 202#endif 203 subf r_len, r5, r_len 204 205cb_aligned_w: /* Cache block align, fill wordwise until dst aligned */ 206 207 /* I know I have something to do since we had > 2*CL initially */ 208 /* so no need to check for r_len = 0 */ 209 210 rlwinm. r5, r_dst, 30, 29, 31 211 srwi r6, r9, 2 212 beq cb_aligned_cb /* already on CL boundary? */ 213 214 subf r5, r5, r6 /* words to fill to alignment */ 215 mtctr r5 216 slwi r5, r5, 2 217 subf r_len, r5, r_len 218 219 subi r_dst, r_dst, 4 2201: stwu r_val, 4(r_dst) /* Fill wordwise */ 221 bdnz 1b 222 addi r_dst, r_dst, 4 223 224cb_aligned_cb: /* no need to check r_len, see above */ 225 226 srw. r5, r_len, r10 /* Number of cache blocks */ 227 mtctr r5 228 beq cblocks_done 229 230 slw r5, r5, r10 231 subf r_len, r5, r_len 232 2331: dcbz 0, r_dst /* Clear blockwise */ 234 add r_dst, r_dst, r9 235 bdnz 1b 236 237cblocks_done: /* still CL aligned, but less than CL bytes left */ 238 cmplwi cr1, r_len, 0 239 cmplwi r_len, 8 240 beq- cr1, sf_return 241 242 blt- sf_bytewise /* <8 remaining? */ 243 b sf_aligned_w 244 245/*----------------------------------------------------------------------*/ 246wbzero: li r_val, 0 247 248 cmplwi r_len, 0 249 beqlr- /* Nothing to do */ 250 251simple_fill: 252#if USE_STSWX 253 cmplwi cr1, r_len, 12 /* < 12 bytes to move? */ 254#else 255 cmplwi cr1, r_len, 8 /* < 8 bytes to move? */ 256#endif 257 andi. r5, r_dst, 0x03 /* bytes to fill to align4 */ 258 blt cr1, sf_bytewise /* trivial byte mover */ 259 260 li r6, 4 261 subf r5, r5, r6 262 beq+ sf_aligned_w /* dest is word aligned */ 263 264#if USE_STSWX 265 mtxer r5 266 stswx r0, 0, r_dst 267 add r_dst, r5, r_dst 268#else 269 mtctr r5 /* nope, then fill bytewise */ 270 subi r_dst, r_dst, 1 /* until it is */ 2711: stbu r_val, 1(r_dst) 272 bdnz 1b 273 274 addi r_dst, r_dst, 1 275#endif 276 subf r_len, r5, r_len 277 278sf_aligned_w: /* no need to check r_len since it were >= 8 bytes initially */ 279#if USE_STSWX 280 mr r6, r0 281 mr r7, r0 282 283 srwi r5, r_len, 3 284 mtctr r5 285 286 slwi r5, r5, 3 /* adjust len */ 287 subf. r_len, r5, r_len 288 2891: stswi r6, r_dst, 8 290 addi r_dst, r_dst, 8 291 bdnz 1b 292#else 293 srwi r5, r_len, 2 /* words to fill */ 294 mtctr r5 295 296 slwi r5, r5, 2 297 subf. r_len, r5, r_len /* adjust len for fill */ 298 299 subi r_dst, r_dst, 4 3001: stwu r_val, 4(r_dst) 301 bdnz 1b 302 addi r_dst, r_dst, 4 303#endif 304 305sf_word_done: bne- sf_bytewise 306 307sf_return: mr r3, r8 /* restore orig ptr */ 308 blr /* for memset functionality */ 309 310sf_bytewise: 311#if USE_STSWX 312 mr r5, r0 313 mr r6, r0 314 mr r7, r0 315 316 mtxer r_len 317 stswx r5, 0, r_dst 318#else 319 mtctr r_len 320 321 subi r_dst, r_dst, 1 3221: stbu r_val, 1(r_dst) 323 bdnz 1b 324#endif 325 mr r3, r8 /* restore orig ptr */ 326 blr /* for memset functionality */ 327 328/*----------------------------------------------------------------------*/ 329#ifndef _KERNEL 330 .data 331cache_size: .long -1 332cache_sh: .long 0 333 334#endif 335/*----------------------------------------------------------------------*/ 336