1/* $NetBSD: bzero.S,v 1.12 2013/07/18 12:20:41 matt Exp $ */ 2 3/*- 4 * Copyright (C) 2001 Martin J. Laubach <mjl@NetBSD.org> 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. The name of the author may not be used to endorse or promote products 16 * derived from this software without specific prior written permission. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 19 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 20 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 21 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 23 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 */ 29/*----------------------------------------------------------------------*/ 30 31#include <machine/asm.h> 32 33 34#if defined(LIBC_SCCS) && !defined(lint) 35__RCSID("$NetBSD: bzero.S,v 1.12 2013/07/18 12:20:41 matt Exp $") 36#endif /* LIBC_SCCS && !lint */ 37 38#ifdef _KERNEL 39#include <assym.h> 40#endif 41 42#define USE_STSWX 0 /* don't. slower than trivial copy loop */ 43 44/*----------------------------------------------------------------------*/ 45/* 46 void bzero(void *b %r3, size_t len %r4); 47 void * memset(void *b %r3, int c %r4, size_t len %r5); 48*/ 49/*----------------------------------------------------------------------*/ 50 51#define r_dst %r3 52#define r_len %r4 53#define r_val %r0 54 55 .text 56 .align 4 57ENTRY(bzero) 58 li r_val, 0 /* Value to stuff in */ 59 b cb_memset 60END(bzero) 61 62ENTRY(memset) 63 cmplwi %cr1, %r5, 0 64 mr. %r0, %r4 65 mr %r8, %r3 66 beqlr- %cr1 /* Nothing to do */ 67 68 rlwimi %r0, %r4, 8, 16, 23 /* word extend fill value */ 69 rlwimi %r0, %r0, 16, 0, 15 70 mr %r4, %r5 71 bne- simple_fill /* =! 0, use trivial fill */ 72cb_memset: 73 74/*----------------------------------------------------------------------*/ 75#ifndef _KERNEL 76 /* First find out cache line size */ 77 mflr %r9 78#ifdef PIC 79 bcl 20,31,1f 801: mflr %r5 81 mtlr %r9 82 addis %r5,%r5,cache_info+4-1b@ha 83 lwzu %r9,cache_info+4-1b@l(%r5) 84#else 85 lis %r5,cache_info+4@ha 86 lwzu %r9,cache_info+4@l(%r5) 87#endif 88 lwz %r10,cache_sh-(cache_info+4)(%r5) 89 cmpwi %r9, -1 90 bne+ cb_cacheline_known 91 92 addi %r5, %r5, -4 /* point r5 @ beginning of cache_info */ 93 94/*----------------------------------------------------------------------*/ 95#define CTL_MACHDEP 7 96#define CPU_CACHELINE 1 97#define CPU_CACHEINFO 5 98 99#define STKFRAME_SZ 64 100#define MIB 8 101#define OLDPLEN 16 102#define R3_SAVE 20 103#define R4_SAVE 24 104#define R0_SAVE 28 105#define R8_SAVE 32 106#define R31_SAVE 36 107#ifdef PIC 108#define R30_SAVE 40 109#endif 110 111 stw %r9, 4(%r1) 112 stwu %r1, -STKFRAME_SZ(%r1) 113 114 stw %r31, R31_SAVE(%r1) 115 mr %r31, %r5 /* cache info */ 116 117#ifdef PIC 118 stw %r30, R30_SAVE(%r1) 119 PIC_TOCSETUP(cb_memset,%r30) 120#endif 121 122 stw %r8, R8_SAVE(%r1) 123 stw %r3, R3_SAVE(%r1) 124 stw %r4, R4_SAVE(%r1) 125 stw %r0, R0_SAVE(%r1) 126 127 li %r0, CTL_MACHDEP /* Construct MIB */ 128 stw %r0, MIB(%r1) 129 li %r0, CPU_CACHEINFO 130 stw %r0, MIB+4(%r1) 131 132 li %r0, 4*4 /* Oldlenp := 4*4 */ 133 stw %r0, OLDPLEN(%r1) 134 135 addi %r3, %r1, MIB 136 li %r4, 2 /* namelen */ 137 /* %r5 already contains &cache_info */ 138 addi %r6, %r1, OLDPLEN 139 li %r7, 0 140 li %r8, 0 141 bl PIC_PLT(_C_LABEL(sysctl)) 142 143 cmpwi %r3, 0 /* Check result */ 144 beq 1f 145 146 /* Failure, try older sysctl */ 147 148 li %r0, CTL_MACHDEP /* Construct MIB */ 149 stw %r0, MIB(%r1) 150 li %r0, CPU_CACHELINE 151 stw %r0, MIB+4(%r1) 152 153 li %r0, 4 /* Oldlenp := 4 */ 154 stw %r0, OLDPLEN(%r1) 155 156 addi %r3, %r1, MIB 157 li %r4, 2 /* namelen */ 158 addi %r5, %r31, 4 159 addi %r6, %r1, OLDPLEN 160 li %r7, 0 161 li %r8, 0 162 bl PIC_PLT(_C_LABEL(sysctl)) 1631: 164 lwz %r3, R3_SAVE(%r1) 165 lwz %r4, R4_SAVE(%r1) 166 lwz %r8, R8_SAVE(%r1) 167 lwz %r0, R0_SAVE(%r1) 168 lwz %r9, 4(%r31) 169 lwz %r31, R31_SAVE(%r1) 170#ifdef PIC 171 lwz %r30, R30_SAVE(%r1) 172#endif 173 addi %r1, %r1, STKFRAME_SZ 174 lwz %r7, 4(%r1) 175 mtlr %r7 176 177 cntlzw %r6, %r9 /* compute shift value */ 178 li %r5, 31 179 subf %r10, %r6, %r5 180 181#ifdef PIC 182 mflr %r9 183 bcl 20,31,1f 1841: mflr %r5 185 mtlr %r9 186 187 addis %r5, %r5, cache_info+4-1b@ha 188 lwzu %r9, cache_info+4-1b@l(%r5) 189#else 190 lis %r5, cache_info+4@ha 191 lwzu %r9, cache_info+4@l(%r5) 192#endif 193 stw %r10, cache_sh-(cache_info+4)(%r5) 194 195/*----------------------------------------------------------------------*/ 196/* Okay, we know the cache line size (%r9) and shift value (%r10) */ 197cb_cacheline_known: 198#else /* _KERNEL */ 199#ifdef MULTIPROCESSOR 200 mfsprg %r10, 0 /* Get cpu_info pointer */ 201#else 202 lis %r10, cpu_info_store@ha 203 addi %r10, %r10, cpu_info_store@l 204#endif 205 lwz %r9, CPU_CI+4(%r10) /* Load D$ line size */ 206 cntlzw %r10, %r9 /* Calculate shift.. */ 207 li %r6, 31 208 subf %r10, %r10, %r6 209#endif /* _KERNEL */ 210 /* Back in memory filling business */ 211 212 cmplwi %cr1, r_len, 0 /* Nothing to do? */ 213 add %r5, %r9, %r9 214 cmplw r_len, %r5 /* <= 2*CL bytes to move? */ 215 beqlr- %cr1 /* then do nothing */ 216 217 blt+ simple_fill /* a trivial fill routine */ 218 219 /* Word align the block, fill bytewise until dst even*/ 220 221 andi. %r5, r_dst, 0x03 222 li %r6, 4 223 beq+ cb_aligned_w /* already aligned to word? */ 224 225 subf %r5, %r5, %r6 /* bytes to fill to align4 */ 226#if USE_STSWX 227 mtxer %r5 228 stswx %r0, 0, r_dst 229 add r_dst, %r5, r_dst 230#else 231 mtctr %r5 232 233 subi r_dst, r_dst, 1 2341: stbu r_val, 1(r_dst) /* Fill bytewise */ 235 bdnz 1b 236 237 addi r_dst, r_dst, 1 238#endif 239 subf r_len, %r5, r_len 240 241cb_aligned_w: /* Cache block align, fill wordwise until dst aligned */ 242 243 /* I know I have something to do since we had > 2*CL initially */ 244 /* so no need to check for r_len = 0 */ 245 246 subi %r6, %r9, 1 /* CL mask */ 247 and. %r5, r_dst, %r6 248 srwi %r5, %r5, 2 249 srwi %r6, %r9, 2 250 beq cb_aligned_cb /* already on CL boundary? */ 251 252 subf %r5, %r5, %r6 /* words to fill to alignment */ 253 mtctr %r5 254 slwi %r5, %r5, 2 255 subf r_len, %r5, r_len 256 257 subi r_dst, r_dst, 4 2581: stwu r_val, 4(r_dst) /* Fill wordwise */ 259 bdnz 1b 260 addi r_dst, r_dst, 4 261 262cb_aligned_cb: /* no need to check r_len, see above */ 263 264 srw. %r5, r_len, %r10 /* Number of cache blocks */ 265 mtctr %r5 266 beq cblocks_done 267 268 slw %r5, %r5, %r10 269 subf r_len, %r5, r_len 270 2711: dcbz 0, r_dst /* Clear blockwise */ 272 add r_dst, r_dst, %r9 273 bdnz 1b 274 275cblocks_done: /* still CL aligned, but less than CL bytes left */ 276 cmplwi %cr1, r_len, 0 277 cmplwi r_len, 8 278 beq- %cr1, sf_return 279 280 blt- sf_bytewise /* <8 remaining? */ 281 b sf_aligned_w 282 283/*----------------------------------------------------------------------*/ 284wbzero: li r_val, 0 285 286 cmplwi r_len, 0 287 beqlr- /* Nothing to do */ 288 289simple_fill: 290#if USE_STSWX 291 cmplwi %cr1, r_len, 12 /* < 12 bytes to move? */ 292#else 293 cmplwi %cr1, r_len, 8 /* < 8 bytes to move? */ 294#endif 295 andi. %r5, r_dst, 0x03 /* bytes to fill to align4 */ 296 blt %cr1, sf_bytewise /* trivial byte mover */ 297 298 li %r6, 4 299 subf %r5, %r5, %r6 300 beq+ sf_aligned_w /* dest is word aligned */ 301 302#if USE_STSWX 303 mtxer %r5 304 stswx %r0, 0, r_dst 305 add r_dst, %r5, r_dst 306#else 307 mtctr %r5 /* nope, then fill bytewise */ 308 subi r_dst, r_dst, 1 /* until it is */ 3091: stbu r_val, 1(r_dst) 310 bdnz 1b 311 312 addi r_dst, r_dst, 1 313#endif 314 subf r_len, %r5, r_len 315 316sf_aligned_w: /* no need to check r_len since it were >= 8 bytes initially */ 317#if USE_STSWX 318 mr %r6, %r0 319 mr %r7, %r0 320 321 srwi %r5, r_len, 3 322 mtctr %r5 323 324 slwi %r5, %r5, 3 /* adjust len */ 325 subf. r_len, %r5, r_len 326 3271: stswi %r6, r_dst, 8 328 addi r_dst, r_dst, 8 329 bdnz 1b 330#else 331 srwi %r5, r_len, 2 /* words to fill */ 332 mtctr %r5 333 334 slwi %r5, %r5, 2 335 subf. r_len, %r5, r_len /* adjust len for fill */ 336 337 subi r_dst, r_dst, 4 3381: stwu r_val, 4(r_dst) 339 bdnz 1b 340 addi r_dst, r_dst, 4 341#endif 342 343sf_word_done: bne- sf_bytewise 344 345sf_return: mr %r3, %r8 /* restore orig ptr */ 346 blr /* for memset functionality */ 347 348sf_bytewise: 349#if USE_STSWX 350 mr %r5, %r0 351 mr %r6, %r0 352 mr %r7, %r0 353 354 mtxer r_len 355 stswx %r5, 0, r_dst 356#else 357 mtctr r_len 358 359 subi r_dst, r_dst, 1 3601: stbu r_val, 1(r_dst) 361 bdnz 1b 362#endif 363 mr %r3, %r8 /* restore orig ptr */ 364 blr /* for memset functionality */ 365END(memset) 366 367/*----------------------------------------------------------------------*/ 368#ifndef _KERNEL 369 .data 370 .p2align 2 371cache_info: .long -1, -1, -1, -1 372cache_sh: .long 0 373 374#endif 375/*----------------------------------------------------------------------*/ 376