1/* $NetBSD: bcopy_page.S,v 1.10 2013/12/17 01:27:21 joerg Exp $ */ 2 3/* 4 * Copyright (c) 1995 Scott Stevens 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. All advertising materials mentioning features or use of this software 16 * must display the following acknowledgement: 17 * This product includes software developed by Scott Stevens. 18 * 4. The name of the author may not be used to endorse or promote products 19 * derived from this software without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 22 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 23 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 24 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 25 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 26 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 27 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 28 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 29 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 30 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 * 32 * RiscBSD kernel project 33 * 34 * bcopy_page.S 35 * 36 * page optimised bcopy and bzero routines 37 * 38 * Created : 08/04/95 39 */ 40 41#include <machine/asm.h> 42 43#include "assym.h" 44 45#ifndef __XSCALE__ 46 47/* #define BIG_LOOPS */ 48 49/* 50 * bcopy_page(src, dest) 51 * 52 * Optimised copy page routine. 53 * 54 * On entry: 55 * r0 - src address 56 * r1 - dest address 57 * 58 * Requires: 59 * number of bytes per page (PAGE_SIZE) is a multiple of 512 (BIG_LOOPS), 128 60 * otherwise. 61 */ 62 63#define CHUNK_SIZE 32 64 65#define PREFETCH_FIRST_CHUNK /* nothing */ 66#define PREFETCH_NEXT_CHUNK /* nothing */ 67 68#ifndef COPY_CHUNK 69#define COPY_CHUNK \ 70 PREFETCH_NEXT_CHUNK ; \ 71 ldmia r0!, {r3-r8,ip,lr} ; \ 72 stmia r1!, {r3-r8,ip,lr} 73#endif /* ! COPY_CHUNK */ 74 75#ifndef SAVE_REGS 76#define SAVE_REGS push {r4-r8, lr} 77#define RESTORE_REGS pop {r4-r8, pc} 78#endif 79 80ENTRY(bcopy_page) 81 PREFETCH_FIRST_CHUNK 82 SAVE_REGS 83#ifdef BIG_LOOPS 84 mov r2, #(PAGE_SIZE >> 9) 85#else 86 mov r2, #(PAGE_SIZE >> 7) 87#endif 88 891: 90 COPY_CHUNK 91 COPY_CHUNK 92 COPY_CHUNK 93 COPY_CHUNK 94 95#ifdef BIG_LOOPS 96 /* There is little point making the loop any larger; unless we are 97 running with the cache off, the load/store overheads will 98 completely dominate this loop. */ 99 COPY_CHUNK 100 COPY_CHUNK 101 COPY_CHUNK 102 COPY_CHUNK 103 104 COPY_CHUNK 105 COPY_CHUNK 106 COPY_CHUNK 107 COPY_CHUNK 108 109 COPY_CHUNK 110 COPY_CHUNK 111 COPY_CHUNK 112 COPY_CHUNK 113#endif 114 subs r2, r2, #1 115 bne 1b 116 117 RESTORE_REGS /* ...and return. */ 118END(bcopy_page) 119 120/* 121 * bzero_page(dest) 122 * 123 * Optimised zero page routine. 124 * 125 * On entry: 126 * r0 - dest address 127 * 128 * Requires: 129 * number of bytes per page (PAGE_SIZE) is a multiple of 512 (BIG_LOOPS), 128 130 * otherwise 131 */ 132 133ENTRY(bzero_page) 134 push {r4-r8, lr} 135#ifdef BIG_LOOPS 136 mov r2, #(PAGE_SIZE >> 9) 137#else 138 mov r2, #(PAGE_SIZE >> 7) 139#endif 140 mov r3, #0 141 mov r4, #0 142 mov r5, #0 143 mov r6, #0 144 mov r7, #0 145 mov r8, #0 146 mov ip, #0 147 mov lr, #0 148 1491: 150 stmia r0!, {r3-r8,ip,lr} 151 stmia r0!, {r3-r8,ip,lr} 152 stmia r0!, {r3-r8,ip,lr} 153 stmia r0!, {r3-r8,ip,lr} 154 155#ifdef BIG_LOOPS 156 /* There is little point making the loop any larger; unless we are 157 running with the cache off, the load/store overheads will 158 completely dominate this loop. */ 159 stmia r0!, {r3-r8,ip,lr} 160 stmia r0!, {r3-r8,ip,lr} 161 stmia r0!, {r3-r8,ip,lr} 162 stmia r0!, {r3-r8,ip,lr} 163 164 stmia r0!, {r3-r8,ip,lr} 165 stmia r0!, {r3-r8,ip,lr} 166 stmia r0!, {r3-r8,ip,lr} 167 stmia r0!, {r3-r8,ip,lr} 168 169 stmia r0!, {r3-r8,ip,lr} 170 stmia r0!, {r3-r8,ip,lr} 171 stmia r0!, {r3-r8,ip,lr} 172 stmia r0!, {r3-r8,ip,lr} 173 174#endif 175 176 subs r2, r2, #1 177 bne 1b 178 179 pop {r4-r8, pc} 180END(bzero_page) 181 182#else /* __XSCALE__ */ 183 184/* 185 * XSCALE version of bcopy_page 186 */ 187ENTRY(bcopy_page) 188 pld [r0] 189 push {r4, r5} 190 mov ip, #32 191 ldr r2, [r0], #0x04 /* 0x00 */ 192 ldr r3, [r0], #0x04 /* 0x04 */ 1931: pld [r0, #0x18] /* Prefetch 0x20 */ 194 ldr r4, [r0], #0x04 /* 0x08 */ 195 ldr r5, [r0], #0x04 /* 0x0c */ 196 strd r2, r3, [r1], #0x08 197 ldr r2, [r0], #0x04 /* 0x10 */ 198 ldr r3, [r0], #0x04 /* 0x14 */ 199 strd r4, r5, [r1], #0x08 200 ldr r4, [r0], #0x04 /* 0x18 */ 201 ldr r5, [r0], #0x04 /* 0x1c */ 202 strd r2, r3, [r1], #0x08 203 ldr r2, [r0], #0x04 /* 0x20 */ 204 ldr r3, [r0], #0x04 /* 0x24 */ 205 pld [r0, #0x18] /* Prefetch 0x40 */ 206 strd r4, r5, [r1], #0x08 207 ldr r4, [r0], #0x04 /* 0x28 */ 208 ldr r5, [r0], #0x04 /* 0x2c */ 209 strd r2, r3, [r1], #0x08 210 ldr r2, [r0], #0x04 /* 0x30 */ 211 ldr r3, [r0], #0x04 /* 0x34 */ 212 strd r4, r5, [r1], #0x08 213 ldr r4, [r0], #0x04 /* 0x38 */ 214 ldr r5, [r0], #0x04 /* 0x3c */ 215 strd r2, r3, [r1], #0x08 216 ldr r2, [r0], #0x04 /* 0x40 */ 217 ldr r3, [r0], #0x04 /* 0x44 */ 218 pld [r0, #0x18] /* Prefetch 0x60 */ 219 strd r4, r5, [r1], #0x08 220 ldr r4, [r0], #0x04 /* 0x48 */ 221 ldr r5, [r0], #0x04 /* 0x4c */ 222 strd r2, r3, [r1], #0x08 223 ldr r2, [r0], #0x04 /* 0x50 */ 224 ldr r3, [r0], #0x04 /* 0x54 */ 225 strd r4, r5, [r1], #0x08 226 ldr r4, [r0], #0x04 /* 0x58 */ 227 ldr r5, [r0], #0x04 /* 0x5c */ 228 strd r2, r3, [r1], #0x08 229 ldr r2, [r0], #0x04 /* 0x60 */ 230 ldr r3, [r0], #0x04 /* 0x64 */ 231 pld [r0, #0x18] /* Prefetch 0x80 */ 232 strd r4, r5, [r1], #0x08 233 ldr r4, [r0], #0x04 /* 0x68 */ 234 ldr r5, [r0], #0x04 /* 0x6c */ 235 strd r2, r3, [r1], #0x08 236 ldr r2, [r0], #0x04 /* 0x70 */ 237 ldr r3, [r0], #0x04 /* 0x74 */ 238 strd r4, r5, [r1], #0x08 239 ldr r4, [r0], #0x04 /* 0x78 */ 240 ldr r5, [r0], #0x04 /* 0x7c */ 241 strd r2, r3, [r1], #0x08 242 subs ip, ip, #0x01 243 ldrgt r2, [r0], #0x04 /* 0x80 */ 244 ldrgt r3, [r0], #0x04 /* 0x84 */ 245 strd r4, r5, [r1], #0x08 246 bgt 1b 247 pop {r4, r5} 248 RET 249END(bcopy_page) 250 251/* 252 * XSCALE version of bzero_page 253 */ 254ENTRY(bzero_page) 255 mov r1, #PAGE_SIZE 256 mov r2, #0 257 mov r3, #0 2581: strd r2, r3, [r0], #8 /* 32 */ 259 strd r2, r3, [r0], #8 260 strd r2, r3, [r0], #8 261 strd r2, r3, [r0], #8 262 strd r2, r3, [r0], #8 /* 64 */ 263 strd r2, r3, [r0], #8 264 strd r2, r3, [r0], #8 265 strd r2, r3, [r0], #8 266 strd r2, r3, [r0], #8 /* 96 */ 267 strd r2, r3, [r0], #8 268 strd r2, r3, [r0], #8 269 strd r2, r3, [r0], #8 270 strd r2, r3, [r0], #8 /* 128 */ 271 strd r2, r3, [r0], #8 272 strd r2, r3, [r0], #8 273 strd r2, r3, [r0], #8 274 subs r1, r1, #128 275 bne 1b 276 RET 277END(bzero_page) 278#endif /* __XSCALE__ */ 279