1*0c87c94aSjoerg/* $NetBSD: bcopy_page.S,v 1.10 2013/12/17 01:27:21 joerg Exp $ */ 258438fb4Smatt 358438fb4Smatt/* 458438fb4Smatt * Copyright (c) 1995 Scott Stevens 558438fb4Smatt * All rights reserved. 658438fb4Smatt * 758438fb4Smatt * Redistribution and use in source and binary forms, with or without 858438fb4Smatt * modification, are permitted provided that the following conditions 958438fb4Smatt * are met: 1058438fb4Smatt * 1. Redistributions of source code must retain the above copyright 1158438fb4Smatt * notice, this list of conditions and the following disclaimer. 1258438fb4Smatt * 2. Redistributions in binary form must reproduce the above copyright 1358438fb4Smatt * notice, this list of conditions and the following disclaimer in the 1458438fb4Smatt * documentation and/or other materials provided with the distribution. 1558438fb4Smatt * 3. All advertising materials mentioning features or use of this software 1658438fb4Smatt * must display the following acknowledgement: 1758438fb4Smatt * This product includes software developed by Scott Stevens. 1858438fb4Smatt * 4. The name of the author may not be used to endorse or promote products 1958438fb4Smatt * derived from this software without specific prior written permission. 2058438fb4Smatt * 2158438fb4Smatt * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 2258438fb4Smatt * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 2358438fb4Smatt * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 2458438fb4Smatt * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 2558438fb4Smatt * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 2658438fb4Smatt * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 2758438fb4Smatt * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 2858438fb4Smatt * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 2958438fb4Smatt * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 3058438fb4Smatt * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 3158438fb4Smatt * 3258438fb4Smatt * RiscBSD kernel project 3358438fb4Smatt * 3458438fb4Smatt * bcopy_page.S 3558438fb4Smatt * 3658438fb4Smatt * page optimised bcopy and bzero routines 3758438fb4Smatt * 3858438fb4Smatt * Created : 08/04/95 3958438fb4Smatt */ 4058438fb4Smatt 4158438fb4Smatt#include <machine/asm.h> 4258438fb4Smatt 439a8042f2Sthorpej#include "assym.h" 449a8042f2Sthorpej 459d9ddf04Sscw#ifndef __XSCALE__ 469d9ddf04Sscw 476e43d024Schris/* #define BIG_LOOPS */ 486e43d024Schris 4958438fb4Smatt/* 5058438fb4Smatt * bcopy_page(src, dest) 5158438fb4Smatt * 5258438fb4Smatt * Optimised copy page routine. 5358438fb4Smatt * 5458438fb4Smatt * On entry: 5558438fb4Smatt * r0 - src address 5658438fb4Smatt * r1 - dest address 5758438fb4Smatt * 5858438fb4Smatt * Requires: 599a8042f2Sthorpej * number of bytes per page (PAGE_SIZE) is a multiple of 512 (BIG_LOOPS), 128 606e43d024Schris * otherwise. 6158438fb4Smatt */ 6258438fb4Smatt 63fdcc8560Sthorpej#define CHUNK_SIZE 32 64fdcc8560Sthorpej 65fdcc8560Sthorpej#define PREFETCH_FIRST_CHUNK /* nothing */ 66fdcc8560Sthorpej#define PREFETCH_NEXT_CHUNK /* nothing */ 67fdcc8560Sthorpej 68fdcc8560Sthorpej#ifndef COPY_CHUNK 69fdcc8560Sthorpej#define COPY_CHUNK \ 70fdcc8560Sthorpej PREFETCH_NEXT_CHUNK ; \ 71fdcc8560Sthorpej ldmia r0!, {r3-r8,ip,lr} ; \ 72fdcc8560Sthorpej stmia r1!, {r3-r8,ip,lr} 73fdcc8560Sthorpej#endif /* ! COPY_CHUNK */ 74fdcc8560Sthorpej 75fdcc8560Sthorpej#ifndef SAVE_REGS 76ab152917Smatt#define SAVE_REGS push {r4-r8, lr} 77ab152917Smatt#define RESTORE_REGS pop {r4-r8, pc} 78fdcc8560Sthorpej#endif 79fdcc8560Sthorpej 8058438fb4SmattENTRY(bcopy_page) 81fdcc8560Sthorpej PREFETCH_FIRST_CHUNK 82fdcc8560Sthorpej SAVE_REGS 836e43d024Schris#ifdef BIG_LOOPS 849a8042f2Sthorpej mov r2, #(PAGE_SIZE >> 9) 856e43d024Schris#else 869a8042f2Sthorpej mov r2, #(PAGE_SIZE >> 7) 876e43d024Schris#endif 8858438fb4Smatt 89003b8e8bSthorpej1: 90fdcc8560Sthorpej COPY_CHUNK 91fdcc8560Sthorpej COPY_CHUNK 92fdcc8560Sthorpej COPY_CHUNK 93fdcc8560Sthorpej COPY_CHUNK 9458438fb4Smatt 956e43d024Schris#ifdef BIG_LOOPS 966e43d024Schris /* There is little point making the loop any larger; unless we are 976e43d024Schris running with the cache off, the load/store overheads will 986e43d024Schris completely dominate this loop. */ 99fdcc8560Sthorpej COPY_CHUNK 100fdcc8560Sthorpej COPY_CHUNK 101fdcc8560Sthorpej COPY_CHUNK 102fdcc8560Sthorpej COPY_CHUNK 10358438fb4Smatt 104fdcc8560Sthorpej COPY_CHUNK 105fdcc8560Sthorpej COPY_CHUNK 106fdcc8560Sthorpej COPY_CHUNK 107fdcc8560Sthorpej COPY_CHUNK 10858438fb4Smatt 109fdcc8560Sthorpej COPY_CHUNK 110fdcc8560Sthorpej COPY_CHUNK 111fdcc8560Sthorpej COPY_CHUNK 112fdcc8560Sthorpej COPY_CHUNK 1136e43d024Schris#endif 11458438fb4Smatt subs r2, r2, #1 115003b8e8bSthorpej bne 1b 11658438fb4Smatt 117fdcc8560Sthorpej RESTORE_REGS /* ...and return. */ 118ab152917SmattEND(bcopy_page) 11958438fb4Smatt 12058438fb4Smatt/* 12158438fb4Smatt * bzero_page(dest) 12258438fb4Smatt * 12358438fb4Smatt * Optimised zero page routine. 12458438fb4Smatt * 12558438fb4Smatt * On entry: 12658438fb4Smatt * r0 - dest address 12758438fb4Smatt * 12858438fb4Smatt * Requires: 1299a8042f2Sthorpej * number of bytes per page (PAGE_SIZE) is a multiple of 512 (BIG_LOOPS), 128 1306e43d024Schris * otherwise 13158438fb4Smatt */ 13258438fb4Smatt 13358438fb4SmattENTRY(bzero_page) 134ab152917Smatt push {r4-r8, lr} 1356e43d024Schris#ifdef BIG_LOOPS 1369a8042f2Sthorpej mov r2, #(PAGE_SIZE >> 9) 1376e43d024Schris#else 1389a8042f2Sthorpej mov r2, #(PAGE_SIZE >> 7) 1396e43d024Schris#endif 14058438fb4Smatt mov r3, #0 14158438fb4Smatt mov r4, #0 14258438fb4Smatt mov r5, #0 14358438fb4Smatt mov r6, #0 14458438fb4Smatt mov r7, #0 14558438fb4Smatt mov r8, #0 1466e43d024Schris mov ip, #0 1476e43d024Schris mov lr, #0 14858438fb4Smatt 149003b8e8bSthorpej1: 1506e43d024Schris stmia r0!, {r3-r8,ip,lr} 1516e43d024Schris stmia r0!, {r3-r8,ip,lr} 1526e43d024Schris stmia r0!, {r3-r8,ip,lr} 1536e43d024Schris stmia r0!, {r3-r8,ip,lr} 15458438fb4Smatt 1556e43d024Schris#ifdef BIG_LOOPS 1566e43d024Schris /* There is little point making the loop any larger; unless we are 1576e43d024Schris running with the cache off, the load/store overheads will 1586e43d024Schris completely dominate this loop. */ 1596e43d024Schris stmia r0!, {r3-r8,ip,lr} 1606e43d024Schris stmia r0!, {r3-r8,ip,lr} 1616e43d024Schris stmia r0!, {r3-r8,ip,lr} 1626e43d024Schris stmia r0!, {r3-r8,ip,lr} 16358438fb4Smatt 1646e43d024Schris stmia r0!, {r3-r8,ip,lr} 1656e43d024Schris stmia r0!, {r3-r8,ip,lr} 1666e43d024Schris stmia r0!, {r3-r8,ip,lr} 1676e43d024Schris stmia r0!, {r3-r8,ip,lr} 16858438fb4Smatt 1696e43d024Schris stmia r0!, {r3-r8,ip,lr} 1706e43d024Schris stmia r0!, {r3-r8,ip,lr} 1716e43d024Schris stmia r0!, {r3-r8,ip,lr} 1726e43d024Schris stmia r0!, {r3-r8,ip,lr} 1736e43d024Schris 1746e43d024Schris#endif 17558438fb4Smatt 17658438fb4Smatt subs r2, r2, #1 177003b8e8bSthorpej bne 1b 17858438fb4Smatt 179ab152917Smatt pop {r4-r8, pc} 180ab152917SmattEND(bzero_page) 1819d9ddf04Sscw 1829d9ddf04Sscw#else /* __XSCALE__ */ 1839d9ddf04Sscw 1849d9ddf04Sscw/* 1859d9ddf04Sscw * XSCALE version of bcopy_page 1869d9ddf04Sscw */ 1879d9ddf04SscwENTRY(bcopy_page) 1889d9ddf04Sscw pld [r0] 189ab152917Smatt push {r4, r5} 1909d9ddf04Sscw mov ip, #32 1919d9ddf04Sscw ldr r2, [r0], #0x04 /* 0x00 */ 1929d9ddf04Sscw ldr r3, [r0], #0x04 /* 0x04 */ 1939d9ddf04Sscw1: pld [r0, #0x18] /* Prefetch 0x20 */ 1949d9ddf04Sscw ldr r4, [r0], #0x04 /* 0x08 */ 1959d9ddf04Sscw ldr r5, [r0], #0x04 /* 0x0c */ 196*0c87c94aSjoerg strd r2, r3, [r1], #0x08 1979d9ddf04Sscw ldr r2, [r0], #0x04 /* 0x10 */ 1989d9ddf04Sscw ldr r3, [r0], #0x04 /* 0x14 */ 199*0c87c94aSjoerg strd r4, r5, [r1], #0x08 2009d9ddf04Sscw ldr r4, [r0], #0x04 /* 0x18 */ 2019d9ddf04Sscw ldr r5, [r0], #0x04 /* 0x1c */ 202*0c87c94aSjoerg strd r2, r3, [r1], #0x08 2039d9ddf04Sscw ldr r2, [r0], #0x04 /* 0x20 */ 2049d9ddf04Sscw ldr r3, [r0], #0x04 /* 0x24 */ 2059d9ddf04Sscw pld [r0, #0x18] /* Prefetch 0x40 */ 206*0c87c94aSjoerg strd r4, r5, [r1], #0x08 2079d9ddf04Sscw ldr r4, [r0], #0x04 /* 0x28 */ 2089d9ddf04Sscw ldr r5, [r0], #0x04 /* 0x2c */ 209*0c87c94aSjoerg strd r2, r3, [r1], #0x08 2109d9ddf04Sscw ldr r2, [r0], #0x04 /* 0x30 */ 2119d9ddf04Sscw ldr r3, [r0], #0x04 /* 0x34 */ 212*0c87c94aSjoerg strd r4, r5, [r1], #0x08 2139d9ddf04Sscw ldr r4, [r0], #0x04 /* 0x38 */ 2149d9ddf04Sscw ldr r5, [r0], #0x04 /* 0x3c */ 215*0c87c94aSjoerg strd r2, r3, [r1], #0x08 2169d9ddf04Sscw ldr r2, [r0], #0x04 /* 0x40 */ 2179d9ddf04Sscw ldr r3, [r0], #0x04 /* 0x44 */ 2189d9ddf04Sscw pld [r0, #0x18] /* Prefetch 0x60 */ 219*0c87c94aSjoerg strd r4, r5, [r1], #0x08 2209d9ddf04Sscw ldr r4, [r0], #0x04 /* 0x48 */ 2219d9ddf04Sscw ldr r5, [r0], #0x04 /* 0x4c */ 222*0c87c94aSjoerg strd r2, r3, [r1], #0x08 2239d9ddf04Sscw ldr r2, [r0], #0x04 /* 0x50 */ 2249d9ddf04Sscw ldr r3, [r0], #0x04 /* 0x54 */ 225*0c87c94aSjoerg strd r4, r5, [r1], #0x08 2269d9ddf04Sscw ldr r4, [r0], #0x04 /* 0x58 */ 2279d9ddf04Sscw ldr r5, [r0], #0x04 /* 0x5c */ 228*0c87c94aSjoerg strd r2, r3, [r1], #0x08 2299d9ddf04Sscw ldr r2, [r0], #0x04 /* 0x60 */ 2309d9ddf04Sscw ldr r3, [r0], #0x04 /* 0x64 */ 2319d9ddf04Sscw pld [r0, #0x18] /* Prefetch 0x80 */ 232*0c87c94aSjoerg strd r4, r5, [r1], #0x08 2339d9ddf04Sscw ldr r4, [r0], #0x04 /* 0x68 */ 2349d9ddf04Sscw ldr r5, [r0], #0x04 /* 0x6c */ 235*0c87c94aSjoerg strd r2, r3, [r1], #0x08 2369d9ddf04Sscw ldr r2, [r0], #0x04 /* 0x70 */ 2379d9ddf04Sscw ldr r3, [r0], #0x04 /* 0x74 */ 238*0c87c94aSjoerg strd r4, r5, [r1], #0x08 2399d9ddf04Sscw ldr r4, [r0], #0x04 /* 0x78 */ 2409d9ddf04Sscw ldr r5, [r0], #0x04 /* 0x7c */ 241*0c87c94aSjoerg strd r2, r3, [r1], #0x08 2429d9ddf04Sscw subs ip, ip, #0x01 2439d9ddf04Sscw ldrgt r2, [r0], #0x04 /* 0x80 */ 2449d9ddf04Sscw ldrgt r3, [r0], #0x04 /* 0x84 */ 245*0c87c94aSjoerg strd r4, r5, [r1], #0x08 2469d9ddf04Sscw bgt 1b 247ab152917Smatt pop {r4, r5} 248ab152917Smatt RET 249ab152917SmattEND(bcopy_page) 2509d9ddf04Sscw 2519d9ddf04Sscw/* 2529d9ddf04Sscw * XSCALE version of bzero_page 2539d9ddf04Sscw */ 2549d9ddf04SscwENTRY(bzero_page) 2559d9ddf04Sscw mov r1, #PAGE_SIZE 2569d9ddf04Sscw mov r2, #0 2579d9ddf04Sscw mov r3, #0 258*0c87c94aSjoerg1: strd r2, r3, [r0], #8 /* 32 */ 259*0c87c94aSjoerg strd r2, r3, [r0], #8 260*0c87c94aSjoerg strd r2, r3, [r0], #8 261*0c87c94aSjoerg strd r2, r3, [r0], #8 262*0c87c94aSjoerg strd r2, r3, [r0], #8 /* 64 */ 263*0c87c94aSjoerg strd r2, r3, [r0], #8 264*0c87c94aSjoerg strd r2, r3, [r0], #8 265*0c87c94aSjoerg strd r2, r3, [r0], #8 266*0c87c94aSjoerg strd r2, r3, [r0], #8 /* 96 */ 267*0c87c94aSjoerg strd r2, r3, [r0], #8 268*0c87c94aSjoerg strd r2, r3, [r0], #8 269*0c87c94aSjoerg strd r2, r3, [r0], #8 270*0c87c94aSjoerg strd r2, r3, [r0], #8 /* 128 */ 271*0c87c94aSjoerg strd r2, r3, [r0], #8 272*0c87c94aSjoerg strd r2, r3, [r0], #8 273*0c87c94aSjoerg strd r2, r3, [r0], #8 2749d9ddf04Sscw subs r1, r1, #128 2759d9ddf04Sscw bne 1b 276ab152917Smatt RET 277ab152917SmattEND(bzero_page) 2789d9ddf04Sscw#endif /* __XSCALE__ */ 279