1*09a53ad8SAndrew Turner/* Copyright (c) 2013, Linaro Limited 2*09a53ad8SAndrew Turner All rights reserved. 3*09a53ad8SAndrew Turner 4*09a53ad8SAndrew Turner Redistribution and use in source and binary forms, with or without 5*09a53ad8SAndrew Turner modification, are permitted provided that the following conditions are met: 6*09a53ad8SAndrew Turner * Redistributions of source code must retain the above copyright 7*09a53ad8SAndrew Turner notice, this list of conditions and the following disclaimer. 8*09a53ad8SAndrew Turner * Redistributions in binary form must reproduce the above copyright 9*09a53ad8SAndrew Turner notice, this list of conditions and the following disclaimer in the 10*09a53ad8SAndrew Turner documentation and/or other materials provided with the distribution. 11*09a53ad8SAndrew Turner * Neither the name of the Linaro nor the 12*09a53ad8SAndrew Turner names of its contributors may be used to endorse or promote products 13*09a53ad8SAndrew Turner derived from this software without specific prior written permission. 14*09a53ad8SAndrew Turner 15*09a53ad8SAndrew Turner THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 16*09a53ad8SAndrew Turner "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 17*09a53ad8SAndrew Turner LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 18*09a53ad8SAndrew Turner A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 19*09a53ad8SAndrew Turner HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 20*09a53ad8SAndrew Turner SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 21*09a53ad8SAndrew Turner LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 22*09a53ad8SAndrew Turner DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 23*09a53ad8SAndrew Turner THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24*09a53ad8SAndrew Turner (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25*09a53ad8SAndrew Turner OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ 26*09a53ad8SAndrew Turner 27*09a53ad8SAndrew Turner/* 28*09a53ad8SAndrew Turner * Copyright (c) 2015 ARM Ltd 29*09a53ad8SAndrew Turner * All rights reserved. 30*09a53ad8SAndrew Turner * 31*09a53ad8SAndrew Turner * Redistribution and use in source and binary forms, with or without 32*09a53ad8SAndrew Turner * modification, are permitted provided that the following conditions 33*09a53ad8SAndrew Turner * are met: 34*09a53ad8SAndrew Turner * 1. Redistributions of source code must retain the above copyright 35*09a53ad8SAndrew Turner * notice, this list of conditions and the following disclaimer. 36*09a53ad8SAndrew Turner * 2. Redistributions in binary form must reproduce the above copyright 37*09a53ad8SAndrew Turner * notice, this list of conditions and the following disclaimer in the 38*09a53ad8SAndrew Turner * documentation and/or other materials provided with the distribution. 39*09a53ad8SAndrew Turner * 3. The name of the company may not be used to endorse or promote 40*09a53ad8SAndrew Turner * products derived from this software without specific prior written 41*09a53ad8SAndrew Turner * permission. 42*09a53ad8SAndrew Turner * 43*09a53ad8SAndrew Turner * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED 44*09a53ad8SAndrew Turner * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 45*09a53ad8SAndrew Turner * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 46*09a53ad8SAndrew Turner * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 47*09a53ad8SAndrew Turner * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 48*09a53ad8SAndrew Turner * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 49*09a53ad8SAndrew Turner * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 50*09a53ad8SAndrew Turner * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 51*09a53ad8SAndrew Turner * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 52*09a53ad8SAndrew Turner * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 53*09a53ad8SAndrew Turner */ 54*09a53ad8SAndrew Turner 55*09a53ad8SAndrew Turner/* Assumptions: 56*09a53ad8SAndrew Turner * 57*09a53ad8SAndrew Turner * ARMv8-a, AArch64, unaligned accesses 58*09a53ad8SAndrew Turner */ 59*09a53ad8SAndrew Turner 60*09a53ad8SAndrew Turner .macro def_fn f p2align=0 61*09a53ad8SAndrew Turner .text 62*09a53ad8SAndrew Turner .p2align \p2align 63*09a53ad8SAndrew Turner .global \f 64*09a53ad8SAndrew Turner .type \f, %function 65*09a53ad8SAndrew Turner\f: 66*09a53ad8SAndrew Turner .endm 67*09a53ad8SAndrew Turner 68*09a53ad8SAndrew Turner/* Parameters and result. */ 69*09a53ad8SAndrew Turner#define dstin x0 70*09a53ad8SAndrew Turner#define src x1 71*09a53ad8SAndrew Turner#define count x2 72*09a53ad8SAndrew Turner#define srcend x3 73*09a53ad8SAndrew Turner#define dstend x4 74*09a53ad8SAndrew Turner#define tmp1 x5 75*09a53ad8SAndrew Turner#define A_l x6 76*09a53ad8SAndrew Turner#define A_h x7 77*09a53ad8SAndrew Turner#define B_l x8 78*09a53ad8SAndrew Turner#define B_h x9 79*09a53ad8SAndrew Turner#define C_l x10 80*09a53ad8SAndrew Turner#define C_h x11 81*09a53ad8SAndrew Turner#define D_l x12 82*09a53ad8SAndrew Turner#define D_h x13 83*09a53ad8SAndrew Turner#define E_l count 84*09a53ad8SAndrew Turner#define E_h tmp1 85*09a53ad8SAndrew Turner 86*09a53ad8SAndrew Turner/* All memmoves up to 96 bytes are done by memcpy as it supports overlaps. 87*09a53ad8SAndrew Turner Larger backwards copies are also handled by memcpy. The only remaining 88*09a53ad8SAndrew Turner case is forward large copies. The destination is aligned, and an 89*09a53ad8SAndrew Turner unrolled loop processes 64 bytes per iteration. 90*09a53ad8SAndrew Turner*/ 91*09a53ad8SAndrew Turner 92*09a53ad8SAndrew Turnerdef_fn memmove, 6 93*09a53ad8SAndrew Turner sub tmp1, dstin, src 94*09a53ad8SAndrew Turner cmp count, 96 95*09a53ad8SAndrew Turner ccmp tmp1, count, 2, hi 96*09a53ad8SAndrew Turner b.hs memcpy 97*09a53ad8SAndrew Turner 98*09a53ad8SAndrew Turner cbz tmp1, 3f 99*09a53ad8SAndrew Turner add dstend, dstin, count 100*09a53ad8SAndrew Turner add srcend, src, count 101*09a53ad8SAndrew Turner 102*09a53ad8SAndrew Turner /* Align dstend to 16 byte alignment so that we don't cross cache line 103*09a53ad8SAndrew Turner boundaries on both loads and stores. There are at least 96 bytes 104*09a53ad8SAndrew Turner to copy, so copy 16 bytes unaligned and then align. The loop 105*09a53ad8SAndrew Turner copies 64 bytes per iteration and prefetches one iteration ahead. */ 106*09a53ad8SAndrew Turner 107*09a53ad8SAndrew Turner and tmp1, dstend, 15 108*09a53ad8SAndrew Turner ldp D_l, D_h, [srcend, -16] 109*09a53ad8SAndrew Turner sub srcend, srcend, tmp1 110*09a53ad8SAndrew Turner sub count, count, tmp1 111*09a53ad8SAndrew Turner ldp A_l, A_h, [srcend, -16] 112*09a53ad8SAndrew Turner stp D_l, D_h, [dstend, -16] 113*09a53ad8SAndrew Turner ldp B_l, B_h, [srcend, -32] 114*09a53ad8SAndrew Turner ldp C_l, C_h, [srcend, -48] 115*09a53ad8SAndrew Turner ldp D_l, D_h, [srcend, -64]! 116*09a53ad8SAndrew Turner sub dstend, dstend, tmp1 117*09a53ad8SAndrew Turner subs count, count, 128 118*09a53ad8SAndrew Turner b.ls 2f 119*09a53ad8SAndrew Turner nop 120*09a53ad8SAndrew Turner1: 121*09a53ad8SAndrew Turner stp A_l, A_h, [dstend, -16] 122*09a53ad8SAndrew Turner ldp A_l, A_h, [srcend, -16] 123*09a53ad8SAndrew Turner stp B_l, B_h, [dstend, -32] 124*09a53ad8SAndrew Turner ldp B_l, B_h, [srcend, -32] 125*09a53ad8SAndrew Turner stp C_l, C_h, [dstend, -48] 126*09a53ad8SAndrew Turner ldp C_l, C_h, [srcend, -48] 127*09a53ad8SAndrew Turner stp D_l, D_h, [dstend, -64]! 128*09a53ad8SAndrew Turner ldp D_l, D_h, [srcend, -64]! 129*09a53ad8SAndrew Turner subs count, count, 64 130*09a53ad8SAndrew Turner b.hi 1b 131*09a53ad8SAndrew Turner 132*09a53ad8SAndrew Turner /* Write the last full set of 64 bytes. The remainder is at most 64 133*09a53ad8SAndrew Turner bytes, so it is safe to always copy 64 bytes from the start even if 134*09a53ad8SAndrew Turner there is just 1 byte left. */ 135*09a53ad8SAndrew Turner2: 136*09a53ad8SAndrew Turner ldp E_l, E_h, [src, 48] 137*09a53ad8SAndrew Turner stp A_l, A_h, [dstend, -16] 138*09a53ad8SAndrew Turner ldp A_l, A_h, [src, 32] 139*09a53ad8SAndrew Turner stp B_l, B_h, [dstend, -32] 140*09a53ad8SAndrew Turner ldp B_l, B_h, [src, 16] 141*09a53ad8SAndrew Turner stp C_l, C_h, [dstend, -48] 142*09a53ad8SAndrew Turner ldp C_l, C_h, [src] 143*09a53ad8SAndrew Turner stp D_l, D_h, [dstend, -64] 144*09a53ad8SAndrew Turner stp E_l, E_h, [dstin, 48] 145*09a53ad8SAndrew Turner stp A_l, A_h, [dstin, 32] 146*09a53ad8SAndrew Turner stp B_l, B_h, [dstin, 16] 147*09a53ad8SAndrew Turner stp C_l, C_h, [dstin] 148*09a53ad8SAndrew Turner3: ret 149*09a53ad8SAndrew Turner 150*09a53ad8SAndrew Turner .size memmove, . - memmove 151