xref: /netbsd-src/sys/arch/arm/arm32/bcopy_page.S (revision 0c87c94ac0e57ae4f913eadb8e8d9db831350c65)
1*0c87c94aSjoerg/*	$NetBSD: bcopy_page.S,v 1.10 2013/12/17 01:27:21 joerg Exp $	*/
258438fb4Smatt
358438fb4Smatt/*
458438fb4Smatt * Copyright (c) 1995 Scott Stevens
558438fb4Smatt * All rights reserved.
658438fb4Smatt *
758438fb4Smatt * Redistribution and use in source and binary forms, with or without
858438fb4Smatt * modification, are permitted provided that the following conditions
958438fb4Smatt * are met:
1058438fb4Smatt * 1. Redistributions of source code must retain the above copyright
1158438fb4Smatt *    notice, this list of conditions and the following disclaimer.
1258438fb4Smatt * 2. Redistributions in binary form must reproduce the above copyright
1358438fb4Smatt *    notice, this list of conditions and the following disclaimer in the
1458438fb4Smatt *    documentation and/or other materials provided with the distribution.
1558438fb4Smatt * 3. All advertising materials mentioning features or use of this software
1658438fb4Smatt *    must display the following acknowledgement:
1758438fb4Smatt *	This product includes software developed by Scott Stevens.
1858438fb4Smatt * 4. The name of the author may not be used to endorse or promote products
1958438fb4Smatt *    derived from this software without specific prior written permission.
2058438fb4Smatt *
2158438fb4Smatt * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
2258438fb4Smatt * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
2358438fb4Smatt * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
2458438fb4Smatt * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
2558438fb4Smatt * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
2658438fb4Smatt * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
2758438fb4Smatt * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
2858438fb4Smatt * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
2958438fb4Smatt * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
3058438fb4Smatt * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
3158438fb4Smatt *
3258438fb4Smatt * RiscBSD kernel project
3358438fb4Smatt *
3458438fb4Smatt * bcopy_page.S
3558438fb4Smatt *
3658438fb4Smatt * page optimised bcopy and bzero routines
3758438fb4Smatt *
3858438fb4Smatt * Created      : 08/04/95
3958438fb4Smatt */
4058438fb4Smatt
4158438fb4Smatt#include <machine/asm.h>
4258438fb4Smatt
439a8042f2Sthorpej#include "assym.h"
449a8042f2Sthorpej
459d9ddf04Sscw#ifndef __XSCALE__
469d9ddf04Sscw
476e43d024Schris/* #define BIG_LOOPS */
486e43d024Schris
4958438fb4Smatt/*
5058438fb4Smatt * bcopy_page(src, dest)
5158438fb4Smatt *
5258438fb4Smatt * Optimised copy page routine.
5358438fb4Smatt *
5458438fb4Smatt * On entry:
5558438fb4Smatt *   r0 - src address
5658438fb4Smatt *   r1 - dest address
5758438fb4Smatt *
5858438fb4Smatt * Requires:
599a8042f2Sthorpej *   number of bytes per page (PAGE_SIZE) is a multiple of 512 (BIG_LOOPS), 128
606e43d024Schris *   otherwise.
6158438fb4Smatt */
6258438fb4Smatt
63fdcc8560Sthorpej#define	CHUNK_SIZE	32
64fdcc8560Sthorpej
65fdcc8560Sthorpej#define	PREFETCH_FIRST_CHUNK	/* nothing */
66fdcc8560Sthorpej#define	PREFETCH_NEXT_CHUNK	/* nothing */
67fdcc8560Sthorpej
68fdcc8560Sthorpej#ifndef COPY_CHUNK
69fdcc8560Sthorpej#define	COPY_CHUNK \
70fdcc8560Sthorpej	PREFETCH_NEXT_CHUNK ; \
71fdcc8560Sthorpej	ldmia	r0!, {r3-r8,ip,lr} ; \
72fdcc8560Sthorpej	stmia	r1!, {r3-r8,ip,lr}
73fdcc8560Sthorpej#endif /* ! COPY_CHUNK */
74fdcc8560Sthorpej
75fdcc8560Sthorpej#ifndef SAVE_REGS
76ab152917Smatt#define	SAVE_REGS	push	{r4-r8, lr}
77ab152917Smatt#define	RESTORE_REGS	pop	{r4-r8, pc}
78fdcc8560Sthorpej#endif
79fdcc8560Sthorpej
8058438fb4SmattENTRY(bcopy_page)
81fdcc8560Sthorpej	PREFETCH_FIRST_CHUNK
82fdcc8560Sthorpej	SAVE_REGS
836e43d024Schris#ifdef BIG_LOOPS
849a8042f2Sthorpej	mov	r2, #(PAGE_SIZE >> 9)
856e43d024Schris#else
869a8042f2Sthorpej	mov	r2, #(PAGE_SIZE >> 7)
876e43d024Schris#endif
8858438fb4Smatt
89003b8e8bSthorpej1:
90fdcc8560Sthorpej	COPY_CHUNK
91fdcc8560Sthorpej	COPY_CHUNK
92fdcc8560Sthorpej	COPY_CHUNK
93fdcc8560Sthorpej	COPY_CHUNK
9458438fb4Smatt
956e43d024Schris#ifdef BIG_LOOPS
966e43d024Schris	/* There is little point making the loop any larger; unless we are
976e43d024Schris	   running with the cache off, the load/store overheads will
986e43d024Schris	   completely dominate this loop.  */
99fdcc8560Sthorpej	COPY_CHUNK
100fdcc8560Sthorpej	COPY_CHUNK
101fdcc8560Sthorpej	COPY_CHUNK
102fdcc8560Sthorpej	COPY_CHUNK
10358438fb4Smatt
104fdcc8560Sthorpej	COPY_CHUNK
105fdcc8560Sthorpej	COPY_CHUNK
106fdcc8560Sthorpej	COPY_CHUNK
107fdcc8560Sthorpej	COPY_CHUNK
10858438fb4Smatt
109fdcc8560Sthorpej	COPY_CHUNK
110fdcc8560Sthorpej	COPY_CHUNK
111fdcc8560Sthorpej	COPY_CHUNK
112fdcc8560Sthorpej	COPY_CHUNK
1136e43d024Schris#endif
11458438fb4Smatt	subs	r2, r2, #1
115003b8e8bSthorpej	bne	1b
11658438fb4Smatt
117fdcc8560Sthorpej	RESTORE_REGS		/* ...and return. */
118ab152917SmattEND(bcopy_page)
11958438fb4Smatt
12058438fb4Smatt/*
12158438fb4Smatt * bzero_page(dest)
12258438fb4Smatt *
12358438fb4Smatt * Optimised zero page routine.
12458438fb4Smatt *
12558438fb4Smatt * On entry:
12658438fb4Smatt *   r0 - dest address
12758438fb4Smatt *
12858438fb4Smatt * Requires:
1299a8042f2Sthorpej *   number of bytes per page (PAGE_SIZE) is a multiple of 512 (BIG_LOOPS), 128
1306e43d024Schris *   otherwise
13158438fb4Smatt */
13258438fb4Smatt
13358438fb4SmattENTRY(bzero_page)
134ab152917Smatt	push	{r4-r8, lr}
1356e43d024Schris#ifdef BIG_LOOPS
1369a8042f2Sthorpej	mov	r2, #(PAGE_SIZE >> 9)
1376e43d024Schris#else
1389a8042f2Sthorpej	mov	r2, #(PAGE_SIZE >> 7)
1396e43d024Schris#endif
14058438fb4Smatt	mov	r3, #0
14158438fb4Smatt	mov	r4, #0
14258438fb4Smatt	mov	r5, #0
14358438fb4Smatt	mov	r6, #0
14458438fb4Smatt	mov	r7, #0
14558438fb4Smatt	mov	r8, #0
1466e43d024Schris	mov	ip, #0
1476e43d024Schris	mov	lr, #0
14858438fb4Smatt
149003b8e8bSthorpej1:
1506e43d024Schris	stmia	r0!, {r3-r8,ip,lr}
1516e43d024Schris	stmia	r0!, {r3-r8,ip,lr}
1526e43d024Schris	stmia	r0!, {r3-r8,ip,lr}
1536e43d024Schris	stmia	r0!, {r3-r8,ip,lr}
15458438fb4Smatt
1556e43d024Schris#ifdef BIG_LOOPS
1566e43d024Schris	/* There is little point making the loop any larger; unless we are
1576e43d024Schris	   running with the cache off, the load/store overheads will
1586e43d024Schris	   completely dominate this loop.  */
1596e43d024Schris	stmia	r0!, {r3-r8,ip,lr}
1606e43d024Schris	stmia	r0!, {r3-r8,ip,lr}
1616e43d024Schris	stmia	r0!, {r3-r8,ip,lr}
1626e43d024Schris	stmia	r0!, {r3-r8,ip,lr}
16358438fb4Smatt
1646e43d024Schris	stmia	r0!, {r3-r8,ip,lr}
1656e43d024Schris	stmia	r0!, {r3-r8,ip,lr}
1666e43d024Schris	stmia	r0!, {r3-r8,ip,lr}
1676e43d024Schris	stmia	r0!, {r3-r8,ip,lr}
16858438fb4Smatt
1696e43d024Schris	stmia	r0!, {r3-r8,ip,lr}
1706e43d024Schris	stmia	r0!, {r3-r8,ip,lr}
1716e43d024Schris	stmia	r0!, {r3-r8,ip,lr}
1726e43d024Schris	stmia	r0!, {r3-r8,ip,lr}
1736e43d024Schris
1746e43d024Schris#endif
17558438fb4Smatt
17658438fb4Smatt	subs	r2, r2, #1
177003b8e8bSthorpej	bne	1b
17858438fb4Smatt
179ab152917Smatt	pop	{r4-r8, pc}
180ab152917SmattEND(bzero_page)
1819d9ddf04Sscw
1829d9ddf04Sscw#else	/* __XSCALE__ */
1839d9ddf04Sscw
1849d9ddf04Sscw/*
1859d9ddf04Sscw * XSCALE version of bcopy_page
1869d9ddf04Sscw */
1879d9ddf04SscwENTRY(bcopy_page)
1889d9ddf04Sscw	pld	[r0]
189ab152917Smatt	push	{r4, r5}
1909d9ddf04Sscw	mov	ip, #32
1919d9ddf04Sscw	ldr	r2, [r0], #0x04		/* 0x00 */
1929d9ddf04Sscw	ldr	r3, [r0], #0x04		/* 0x04 */
1939d9ddf04Sscw1:	pld	[r0, #0x18]		/* Prefetch 0x20 */
1949d9ddf04Sscw	ldr	r4, [r0], #0x04		/* 0x08 */
1959d9ddf04Sscw	ldr	r5, [r0], #0x04		/* 0x0c */
196*0c87c94aSjoerg	strd	r2, r3, [r1], #0x08
1979d9ddf04Sscw	ldr	r2, [r0], #0x04		/* 0x10 */
1989d9ddf04Sscw	ldr	r3, [r0], #0x04		/* 0x14 */
199*0c87c94aSjoerg	strd	r4, r5, [r1], #0x08
2009d9ddf04Sscw	ldr	r4, [r0], #0x04		/* 0x18 */
2019d9ddf04Sscw	ldr	r5, [r0], #0x04		/* 0x1c */
202*0c87c94aSjoerg	strd	r2, r3, [r1], #0x08
2039d9ddf04Sscw	ldr	r2, [r0], #0x04		/* 0x20 */
2049d9ddf04Sscw	ldr	r3, [r0], #0x04		/* 0x24 */
2059d9ddf04Sscw	pld	[r0, #0x18]		/* Prefetch 0x40 */
206*0c87c94aSjoerg	strd	r4, r5, [r1], #0x08
2079d9ddf04Sscw	ldr	r4, [r0], #0x04		/* 0x28 */
2089d9ddf04Sscw	ldr	r5, [r0], #0x04		/* 0x2c */
209*0c87c94aSjoerg	strd	r2, r3, [r1], #0x08
2109d9ddf04Sscw	ldr	r2, [r0], #0x04		/* 0x30 */
2119d9ddf04Sscw	ldr	r3, [r0], #0x04		/* 0x34 */
212*0c87c94aSjoerg	strd	r4, r5, [r1], #0x08
2139d9ddf04Sscw	ldr	r4, [r0], #0x04		/* 0x38 */
2149d9ddf04Sscw	ldr	r5, [r0], #0x04		/* 0x3c */
215*0c87c94aSjoerg	strd	r2, r3, [r1], #0x08
2169d9ddf04Sscw	ldr	r2, [r0], #0x04		/* 0x40 */
2179d9ddf04Sscw	ldr	r3, [r0], #0x04		/* 0x44 */
2189d9ddf04Sscw	pld	[r0, #0x18]		/* Prefetch 0x60 */
219*0c87c94aSjoerg	strd	r4, r5, [r1], #0x08
2209d9ddf04Sscw	ldr	r4, [r0], #0x04		/* 0x48 */
2219d9ddf04Sscw	ldr	r5, [r0], #0x04		/* 0x4c */
222*0c87c94aSjoerg	strd	r2, r3, [r1], #0x08
2239d9ddf04Sscw	ldr	r2, [r0], #0x04		/* 0x50 */
2249d9ddf04Sscw	ldr	r3, [r0], #0x04		/* 0x54 */
225*0c87c94aSjoerg	strd	r4, r5, [r1], #0x08
2269d9ddf04Sscw	ldr	r4, [r0], #0x04		/* 0x58 */
2279d9ddf04Sscw	ldr	r5, [r0], #0x04		/* 0x5c */
228*0c87c94aSjoerg	strd	r2, r3, [r1], #0x08
2299d9ddf04Sscw	ldr	r2, [r0], #0x04		/* 0x60 */
2309d9ddf04Sscw	ldr	r3, [r0], #0x04		/* 0x64 */
2319d9ddf04Sscw	pld	[r0, #0x18]		/* Prefetch 0x80 */
232*0c87c94aSjoerg	strd	r4, r5, [r1], #0x08
2339d9ddf04Sscw	ldr	r4, [r0], #0x04		/* 0x68 */
2349d9ddf04Sscw	ldr	r5, [r0], #0x04		/* 0x6c */
235*0c87c94aSjoerg	strd	r2, r3, [r1], #0x08
2369d9ddf04Sscw	ldr	r2, [r0], #0x04		/* 0x70 */
2379d9ddf04Sscw	ldr	r3, [r0], #0x04		/* 0x74 */
238*0c87c94aSjoerg	strd	r4, r5, [r1], #0x08
2399d9ddf04Sscw	ldr	r4, [r0], #0x04		/* 0x78 */
2409d9ddf04Sscw	ldr	r5, [r0], #0x04		/* 0x7c */
241*0c87c94aSjoerg	strd	r2, r3, [r1], #0x08
2429d9ddf04Sscw	subs	ip, ip, #0x01
2439d9ddf04Sscw	ldrgt	r2, [r0], #0x04		/* 0x80 */
2449d9ddf04Sscw	ldrgt	r3, [r0], #0x04		/* 0x84 */
245*0c87c94aSjoerg	strd	r4, r5, [r1], #0x08
2469d9ddf04Sscw	bgt	1b
247ab152917Smatt	pop	{r4, r5}
248ab152917Smatt	RET
249ab152917SmattEND(bcopy_page)
2509d9ddf04Sscw
2519d9ddf04Sscw/*
2529d9ddf04Sscw * XSCALE version of bzero_page
2539d9ddf04Sscw */
2549d9ddf04SscwENTRY(bzero_page)
2559d9ddf04Sscw	mov	r1, #PAGE_SIZE
2569d9ddf04Sscw	mov	r2, #0
2579d9ddf04Sscw	mov	r3, #0
258*0c87c94aSjoerg1:	strd	r2, r3, [r0], #8	/* 32 */
259*0c87c94aSjoerg	strd	r2, r3, [r0], #8
260*0c87c94aSjoerg	strd	r2, r3, [r0], #8
261*0c87c94aSjoerg	strd	r2, r3, [r0], #8
262*0c87c94aSjoerg	strd	r2, r3, [r0], #8	/* 64 */
263*0c87c94aSjoerg	strd	r2, r3, [r0], #8
264*0c87c94aSjoerg	strd	r2, r3, [r0], #8
265*0c87c94aSjoerg	strd	r2, r3, [r0], #8
266*0c87c94aSjoerg	strd	r2, r3, [r0], #8	/* 96 */
267*0c87c94aSjoerg	strd	r2, r3, [r0], #8
268*0c87c94aSjoerg	strd	r2, r3, [r0], #8
269*0c87c94aSjoerg	strd	r2, r3, [r0], #8
270*0c87c94aSjoerg	strd	r2, r3, [r0], #8	/* 128 */
271*0c87c94aSjoerg	strd	r2, r3, [r0], #8
272*0c87c94aSjoerg	strd	r2, r3, [r0], #8
273*0c87c94aSjoerg	strd	r2, r3, [r0], #8
2749d9ddf04Sscw	subs	r1, r1, #128
2759d9ddf04Sscw	bne	1b
276ab152917Smatt	RET
277ab152917SmattEND(bzero_page)
2789d9ddf04Sscw#endif	/* __XSCALE__ */
279