xref: /freebsd-src/contrib/cortex-strings/src/aarch64/memmove.S (revision 8c4282b370bd66908b45b6a223226a9fc2b69d57)
1*09a53ad8SAndrew Turner/* Copyright (c) 2013, Linaro Limited
2*09a53ad8SAndrew Turner   All rights reserved.
3*09a53ad8SAndrew Turner
4*09a53ad8SAndrew Turner   Redistribution and use in source and binary forms, with or without
5*09a53ad8SAndrew Turner   modification, are permitted provided that the following conditions are met:
6*09a53ad8SAndrew Turner       * Redistributions of source code must retain the above copyright
7*09a53ad8SAndrew Turner         notice, this list of conditions and the following disclaimer.
8*09a53ad8SAndrew Turner       * Redistributions in binary form must reproduce the above copyright
9*09a53ad8SAndrew Turner         notice, this list of conditions and the following disclaimer in the
10*09a53ad8SAndrew Turner         documentation and/or other materials provided with the distribution.
11*09a53ad8SAndrew Turner       * Neither the name of the Linaro nor the
12*09a53ad8SAndrew Turner         names of its contributors may be used to endorse or promote products
13*09a53ad8SAndrew Turner         derived from this software without specific prior written permission.
14*09a53ad8SAndrew Turner
15*09a53ad8SAndrew Turner   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
16*09a53ad8SAndrew Turner   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
17*09a53ad8SAndrew Turner   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
18*09a53ad8SAndrew Turner   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
19*09a53ad8SAndrew Turner   HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
20*09a53ad8SAndrew Turner   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
21*09a53ad8SAndrew Turner   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22*09a53ad8SAndrew Turner   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23*09a53ad8SAndrew Turner   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24*09a53ad8SAndrew Turner   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25*09a53ad8SAndrew Turner   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */
26*09a53ad8SAndrew Turner
27*09a53ad8SAndrew Turner/*
28*09a53ad8SAndrew Turner * Copyright (c) 2015 ARM Ltd
29*09a53ad8SAndrew Turner * All rights reserved.
30*09a53ad8SAndrew Turner *
31*09a53ad8SAndrew Turner * Redistribution and use in source and binary forms, with or without
32*09a53ad8SAndrew Turner * modification, are permitted provided that the following conditions
33*09a53ad8SAndrew Turner * are met:
34*09a53ad8SAndrew Turner * 1. Redistributions of source code must retain the above copyright
35*09a53ad8SAndrew Turner *    notice, this list of conditions and the following disclaimer.
36*09a53ad8SAndrew Turner * 2. Redistributions in binary form must reproduce the above copyright
37*09a53ad8SAndrew Turner *    notice, this list of conditions and the following disclaimer in the
38*09a53ad8SAndrew Turner *    documentation and/or other materials provided with the distribution.
39*09a53ad8SAndrew Turner * 3. The name of the company may not be used to endorse or promote
40*09a53ad8SAndrew Turner *    products derived from this software without specific prior written
41*09a53ad8SAndrew Turner *    permission.
42*09a53ad8SAndrew Turner *
43*09a53ad8SAndrew Turner * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
44*09a53ad8SAndrew Turner * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
45*09a53ad8SAndrew Turner * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
46*09a53ad8SAndrew Turner * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
47*09a53ad8SAndrew Turner * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
48*09a53ad8SAndrew Turner * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
49*09a53ad8SAndrew Turner * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
50*09a53ad8SAndrew Turner * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
51*09a53ad8SAndrew Turner * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
52*09a53ad8SAndrew Turner * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
53*09a53ad8SAndrew Turner */
54*09a53ad8SAndrew Turner
55*09a53ad8SAndrew Turner/* Assumptions:
56*09a53ad8SAndrew Turner *
57*09a53ad8SAndrew Turner * ARMv8-a, AArch64, unaligned accesses
58*09a53ad8SAndrew Turner */
59*09a53ad8SAndrew Turner
60*09a53ad8SAndrew Turner	.macro def_fn f p2align=0
61*09a53ad8SAndrew Turner	.text
62*09a53ad8SAndrew Turner	.p2align \p2align
63*09a53ad8SAndrew Turner	.global \f
64*09a53ad8SAndrew Turner	.type \f, %function
65*09a53ad8SAndrew Turner\f:
66*09a53ad8SAndrew Turner	.endm
67*09a53ad8SAndrew Turner
68*09a53ad8SAndrew Turner/* Parameters and result.  */
69*09a53ad8SAndrew Turner#define dstin	x0
70*09a53ad8SAndrew Turner#define src	x1
71*09a53ad8SAndrew Turner#define count	x2
72*09a53ad8SAndrew Turner#define srcend	x3
73*09a53ad8SAndrew Turner#define dstend	x4
74*09a53ad8SAndrew Turner#define tmp1	x5
75*09a53ad8SAndrew Turner#define A_l	x6
76*09a53ad8SAndrew Turner#define A_h	x7
77*09a53ad8SAndrew Turner#define B_l	x8
78*09a53ad8SAndrew Turner#define B_h	x9
79*09a53ad8SAndrew Turner#define C_l	x10
80*09a53ad8SAndrew Turner#define C_h	x11
81*09a53ad8SAndrew Turner#define D_l	x12
82*09a53ad8SAndrew Turner#define D_h	x13
83*09a53ad8SAndrew Turner#define E_l	count
84*09a53ad8SAndrew Turner#define E_h	tmp1
85*09a53ad8SAndrew Turner
86*09a53ad8SAndrew Turner/* All memmoves up to 96 bytes are done by memcpy as it supports overlaps.
87*09a53ad8SAndrew Turner   Larger backwards copies are also handled by memcpy. The only remaining
88*09a53ad8SAndrew Turner   case is forward large copies.  The destination is aligned, and an
89*09a53ad8SAndrew Turner   unrolled loop processes 64 bytes per iteration.
90*09a53ad8SAndrew Turner*/
91*09a53ad8SAndrew Turner
92*09a53ad8SAndrew Turnerdef_fn memmove, 6
93*09a53ad8SAndrew Turner	sub	tmp1, dstin, src
94*09a53ad8SAndrew Turner	cmp	count, 96
95*09a53ad8SAndrew Turner	ccmp	tmp1, count, 2, hi
96*09a53ad8SAndrew Turner	b.hs	memcpy
97*09a53ad8SAndrew Turner
98*09a53ad8SAndrew Turner	cbz	tmp1, 3f
99*09a53ad8SAndrew Turner	add	dstend, dstin, count
100*09a53ad8SAndrew Turner	add	srcend, src, count
101*09a53ad8SAndrew Turner
102*09a53ad8SAndrew Turner	/* Align dstend to 16 byte alignment so that we don't cross cache line
103*09a53ad8SAndrew Turner	   boundaries on both loads and stores.	 There are at least 96 bytes
104*09a53ad8SAndrew Turner	   to copy, so copy 16 bytes unaligned and then align.	The loop
105*09a53ad8SAndrew Turner	   copies 64 bytes per iteration and prefetches one iteration ahead.  */
106*09a53ad8SAndrew Turner
107*09a53ad8SAndrew Turner	and	tmp1, dstend, 15
108*09a53ad8SAndrew Turner	ldp	D_l, D_h, [srcend, -16]
109*09a53ad8SAndrew Turner	sub	srcend, srcend, tmp1
110*09a53ad8SAndrew Turner	sub	count, count, tmp1
111*09a53ad8SAndrew Turner	ldp	A_l, A_h, [srcend, -16]
112*09a53ad8SAndrew Turner	stp	D_l, D_h, [dstend, -16]
113*09a53ad8SAndrew Turner	ldp	B_l, B_h, [srcend, -32]
114*09a53ad8SAndrew Turner	ldp	C_l, C_h, [srcend, -48]
115*09a53ad8SAndrew Turner	ldp	D_l, D_h, [srcend, -64]!
116*09a53ad8SAndrew Turner	sub	dstend, dstend, tmp1
117*09a53ad8SAndrew Turner	subs	count, count, 128
118*09a53ad8SAndrew Turner	b.ls	2f
119*09a53ad8SAndrew Turner	nop
120*09a53ad8SAndrew Turner1:
121*09a53ad8SAndrew Turner	stp	A_l, A_h, [dstend, -16]
122*09a53ad8SAndrew Turner	ldp	A_l, A_h, [srcend, -16]
123*09a53ad8SAndrew Turner	stp	B_l, B_h, [dstend, -32]
124*09a53ad8SAndrew Turner	ldp	B_l, B_h, [srcend, -32]
125*09a53ad8SAndrew Turner	stp	C_l, C_h, [dstend, -48]
126*09a53ad8SAndrew Turner	ldp	C_l, C_h, [srcend, -48]
127*09a53ad8SAndrew Turner	stp	D_l, D_h, [dstend, -64]!
128*09a53ad8SAndrew Turner	ldp	D_l, D_h, [srcend, -64]!
129*09a53ad8SAndrew Turner	subs	count, count, 64
130*09a53ad8SAndrew Turner	b.hi	1b
131*09a53ad8SAndrew Turner
132*09a53ad8SAndrew Turner	/* Write the last full set of 64 bytes.	 The remainder is at most 64
133*09a53ad8SAndrew Turner	   bytes, so it is safe to always copy 64 bytes from the start even if
134*09a53ad8SAndrew Turner	   there is just 1 byte left.  */
135*09a53ad8SAndrew Turner2:
136*09a53ad8SAndrew Turner	ldp	E_l, E_h, [src, 48]
137*09a53ad8SAndrew Turner	stp	A_l, A_h, [dstend, -16]
138*09a53ad8SAndrew Turner	ldp	A_l, A_h, [src, 32]
139*09a53ad8SAndrew Turner	stp	B_l, B_h, [dstend, -32]
140*09a53ad8SAndrew Turner	ldp	B_l, B_h, [src, 16]
141*09a53ad8SAndrew Turner	stp	C_l, C_h, [dstend, -48]
142*09a53ad8SAndrew Turner	ldp	C_l, C_h, [src]
143*09a53ad8SAndrew Turner	stp	D_l, D_h, [dstend, -64]
144*09a53ad8SAndrew Turner	stp	E_l, E_h, [dstin, 48]
145*09a53ad8SAndrew Turner	stp	A_l, A_h, [dstin, 32]
146*09a53ad8SAndrew Turner	stp	B_l, B_h, [dstin, 16]
147*09a53ad8SAndrew Turner	stp	C_l, C_h, [dstin]
148*09a53ad8SAndrew Turner3:	ret
149*09a53ad8SAndrew Turner
150*09a53ad8SAndrew Turner	.size	memmove, . - memmove
151