1*09a53ad8SAndrew Turner/* Copyright (c) 2012, Linaro Limited 2*09a53ad8SAndrew Turner All rights reserved. 3*09a53ad8SAndrew Turner 4*09a53ad8SAndrew Turner Redistribution and use in source and binary forms, with or without 5*09a53ad8SAndrew Turner modification, are permitted provided that the following conditions are met: 6*09a53ad8SAndrew Turner * Redistributions of source code must retain the above copyright 7*09a53ad8SAndrew Turner notice, this list of conditions and the following disclaimer. 8*09a53ad8SAndrew Turner * Redistributions in binary form must reproduce the above copyright 9*09a53ad8SAndrew Turner notice, this list of conditions and the following disclaimer in the 10*09a53ad8SAndrew Turner documentation and/or other materials provided with the distribution. 11*09a53ad8SAndrew Turner * Neither the name of the Linaro nor the 12*09a53ad8SAndrew Turner names of its contributors may be used to endorse or promote products 13*09a53ad8SAndrew Turner derived from this software without specific prior written permission. 14*09a53ad8SAndrew Turner 15*09a53ad8SAndrew Turner THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 16*09a53ad8SAndrew Turner "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 17*09a53ad8SAndrew Turner LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 18*09a53ad8SAndrew Turner A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 19*09a53ad8SAndrew Turner HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 20*09a53ad8SAndrew Turner SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 21*09a53ad8SAndrew Turner LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 22*09a53ad8SAndrew Turner DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 23*09a53ad8SAndrew Turner THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24*09a53ad8SAndrew Turner (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25*09a53ad8SAndrew Turner OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ 26*09a53ad8SAndrew Turner 27*09a53ad8SAndrew Turner/* 28*09a53ad8SAndrew Turner * Copyright (c) 2015 ARM Ltd 29*09a53ad8SAndrew Turner * All rights reserved. 30*09a53ad8SAndrew Turner * 31*09a53ad8SAndrew Turner * Redistribution and use in source and binary forms, with or without 32*09a53ad8SAndrew Turner * modification, are permitted provided that the following conditions 33*09a53ad8SAndrew Turner * are met: 34*09a53ad8SAndrew Turner * 1. Redistributions of source code must retain the above copyright 35*09a53ad8SAndrew Turner * notice, this list of conditions and the following disclaimer. 36*09a53ad8SAndrew Turner * 2. Redistributions in binary form must reproduce the above copyright 37*09a53ad8SAndrew Turner * notice, this list of conditions and the following disclaimer in the 38*09a53ad8SAndrew Turner * documentation and/or other materials provided with the distribution. 39*09a53ad8SAndrew Turner * 3. The name of the company may not be used to endorse or promote 40*09a53ad8SAndrew Turner * products derived from this software without specific prior written 41*09a53ad8SAndrew Turner * permission. 42*09a53ad8SAndrew Turner * 43*09a53ad8SAndrew Turner * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED 44*09a53ad8SAndrew Turner * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 45*09a53ad8SAndrew Turner * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 46*09a53ad8SAndrew Turner * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 47*09a53ad8SAndrew Turner * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 48*09a53ad8SAndrew Turner * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 49*09a53ad8SAndrew Turner * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 50*09a53ad8SAndrew Turner * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 51*09a53ad8SAndrew Turner * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 52*09a53ad8SAndrew Turner * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 53*09a53ad8SAndrew Turner */ 54*09a53ad8SAndrew Turner 55*09a53ad8SAndrew Turner/* Assumptions: 56*09a53ad8SAndrew Turner * 57*09a53ad8SAndrew Turner * ARMv8-a, AArch64, unaligned accesses 58*09a53ad8SAndrew Turner * 59*09a53ad8SAndrew Turner */ 60*09a53ad8SAndrew Turner 61*09a53ad8SAndrew Turner 62*09a53ad8SAndrew Turner#define dstin x0 63*09a53ad8SAndrew Turner#define val x1 64*09a53ad8SAndrew Turner#define valw w1 65*09a53ad8SAndrew Turner#define count x2 66*09a53ad8SAndrew Turner#define dst x3 67*09a53ad8SAndrew Turner#define dstend x4 68*09a53ad8SAndrew Turner#define tmp1 x5 69*09a53ad8SAndrew Turner#define tmp1w w5 70*09a53ad8SAndrew Turner#define tmp2 x6 71*09a53ad8SAndrew Turner#define tmp2w w6 72*09a53ad8SAndrew Turner#define zva_len x7 73*09a53ad8SAndrew Turner#define zva_lenw w7 74*09a53ad8SAndrew Turner 75*09a53ad8SAndrew Turner#define L(l) .L ## l 76*09a53ad8SAndrew Turner 77*09a53ad8SAndrew Turner .macro def_fn f p2align=0 78*09a53ad8SAndrew Turner .text 79*09a53ad8SAndrew Turner .p2align \p2align 80*09a53ad8SAndrew Turner .global \f 81*09a53ad8SAndrew Turner .type \f, %function 82*09a53ad8SAndrew Turner\f: 83*09a53ad8SAndrew Turner .endm 84*09a53ad8SAndrew Turner 85*09a53ad8SAndrew Turnerdef_fn memset p2align=6 86*09a53ad8SAndrew Turner 87*09a53ad8SAndrew Turner dup v0.16B, valw 88*09a53ad8SAndrew Turner add dstend, dstin, count 89*09a53ad8SAndrew Turner 90*09a53ad8SAndrew Turner cmp count, 96 91*09a53ad8SAndrew Turner b.hi L(set_long) 92*09a53ad8SAndrew Turner cmp count, 16 93*09a53ad8SAndrew Turner b.hs L(set_medium) 94*09a53ad8SAndrew Turner mov val, v0.D[0] 95*09a53ad8SAndrew Turner 96*09a53ad8SAndrew Turner /* Set 0..15 bytes. */ 97*09a53ad8SAndrew Turner tbz count, 3, 1f 98*09a53ad8SAndrew Turner str val, [dstin] 99*09a53ad8SAndrew Turner str val, [dstend, -8] 100*09a53ad8SAndrew Turner ret 101*09a53ad8SAndrew Turner nop 102*09a53ad8SAndrew Turner1: tbz count, 2, 2f 103*09a53ad8SAndrew Turner str valw, [dstin] 104*09a53ad8SAndrew Turner str valw, [dstend, -4] 105*09a53ad8SAndrew Turner ret 106*09a53ad8SAndrew Turner2: cbz count, 3f 107*09a53ad8SAndrew Turner strb valw, [dstin] 108*09a53ad8SAndrew Turner tbz count, 1, 3f 109*09a53ad8SAndrew Turner strh valw, [dstend, -2] 110*09a53ad8SAndrew Turner3: ret 111*09a53ad8SAndrew Turner 112*09a53ad8SAndrew Turner /* Set 17..96 bytes. */ 113*09a53ad8SAndrew TurnerL(set_medium): 114*09a53ad8SAndrew Turner str q0, [dstin] 115*09a53ad8SAndrew Turner tbnz count, 6, L(set96) 116*09a53ad8SAndrew Turner str q0, [dstend, -16] 117*09a53ad8SAndrew Turner tbz count, 5, 1f 118*09a53ad8SAndrew Turner str q0, [dstin, 16] 119*09a53ad8SAndrew Turner str q0, [dstend, -32] 120*09a53ad8SAndrew Turner1: ret 121*09a53ad8SAndrew Turner 122*09a53ad8SAndrew Turner .p2align 4 123*09a53ad8SAndrew Turner /* Set 64..96 bytes. Write 64 bytes from the start and 124*09a53ad8SAndrew Turner 32 bytes from the end. */ 125*09a53ad8SAndrew TurnerL(set96): 126*09a53ad8SAndrew Turner str q0, [dstin, 16] 127*09a53ad8SAndrew Turner stp q0, q0, [dstin, 32] 128*09a53ad8SAndrew Turner stp q0, q0, [dstend, -32] 129*09a53ad8SAndrew Turner ret 130*09a53ad8SAndrew Turner 131*09a53ad8SAndrew Turner .p2align 3 132*09a53ad8SAndrew Turner nop 133*09a53ad8SAndrew TurnerL(set_long): 134*09a53ad8SAndrew Turner and valw, valw, 255 135*09a53ad8SAndrew Turner bic dst, dstin, 15 136*09a53ad8SAndrew Turner str q0, [dstin] 137*09a53ad8SAndrew Turner cmp count, 256 138*09a53ad8SAndrew Turner ccmp valw, 0, 0, cs 139*09a53ad8SAndrew Turner b.eq L(try_zva) 140*09a53ad8SAndrew TurnerL(no_zva): 141*09a53ad8SAndrew Turner sub count, dstend, dst /* Count is 16 too large. */ 142*09a53ad8SAndrew Turner add dst, dst, 16 143*09a53ad8SAndrew Turner sub count, count, 64 + 16 /* Adjust count and bias for loop. */ 144*09a53ad8SAndrew Turner1: stp q0, q0, [dst], 64 145*09a53ad8SAndrew Turner stp q0, q0, [dst, -32] 146*09a53ad8SAndrew TurnerL(tail64): 147*09a53ad8SAndrew Turner subs count, count, 64 148*09a53ad8SAndrew Turner b.hi 1b 149*09a53ad8SAndrew Turner2: stp q0, q0, [dstend, -64] 150*09a53ad8SAndrew Turner stp q0, q0, [dstend, -32] 151*09a53ad8SAndrew Turner ret 152*09a53ad8SAndrew Turner 153*09a53ad8SAndrew Turner .p2align 3 154*09a53ad8SAndrew TurnerL(try_zva): 155*09a53ad8SAndrew Turner mrs tmp1, dczid_el0 156*09a53ad8SAndrew Turner tbnz tmp1w, 4, L(no_zva) 157*09a53ad8SAndrew Turner and tmp1w, tmp1w, 15 158*09a53ad8SAndrew Turner cmp tmp1w, 4 /* ZVA size is 64 bytes. */ 159*09a53ad8SAndrew Turner b.ne L(zva_128) 160*09a53ad8SAndrew Turner 161*09a53ad8SAndrew Turner /* Write the first and last 64 byte aligned block using stp rather 162*09a53ad8SAndrew Turner than using DC ZVA. This is faster on some cores. 163*09a53ad8SAndrew Turner */ 164*09a53ad8SAndrew TurnerL(zva_64): 165*09a53ad8SAndrew Turner str q0, [dst, 16] 166*09a53ad8SAndrew Turner stp q0, q0, [dst, 32] 167*09a53ad8SAndrew Turner bic dst, dst, 63 168*09a53ad8SAndrew Turner stp q0, q0, [dst, 64] 169*09a53ad8SAndrew Turner stp q0, q0, [dst, 96] 170*09a53ad8SAndrew Turner sub count, dstend, dst /* Count is now 128 too large. */ 171*09a53ad8SAndrew Turner sub count, count, 128+64+64 /* Adjust count and bias for loop. */ 172*09a53ad8SAndrew Turner add dst, dst, 128 173*09a53ad8SAndrew Turner nop 174*09a53ad8SAndrew Turner1: dc zva, dst 175*09a53ad8SAndrew Turner add dst, dst, 64 176*09a53ad8SAndrew Turner subs count, count, 64 177*09a53ad8SAndrew Turner b.hi 1b 178*09a53ad8SAndrew Turner stp q0, q0, [dst, 0] 179*09a53ad8SAndrew Turner stp q0, q0, [dst, 32] 180*09a53ad8SAndrew Turner stp q0, q0, [dstend, -64] 181*09a53ad8SAndrew Turner stp q0, q0, [dstend, -32] 182*09a53ad8SAndrew Turner ret 183*09a53ad8SAndrew Turner 184*09a53ad8SAndrew Turner .p2align 3 185*09a53ad8SAndrew TurnerL(zva_128): 186*09a53ad8SAndrew Turner cmp tmp1w, 5 /* ZVA size is 128 bytes. */ 187*09a53ad8SAndrew Turner b.ne L(zva_other) 188*09a53ad8SAndrew Turner 189*09a53ad8SAndrew Turner str q0, [dst, 16] 190*09a53ad8SAndrew Turner stp q0, q0, [dst, 32] 191*09a53ad8SAndrew Turner stp q0, q0, [dst, 64] 192*09a53ad8SAndrew Turner stp q0, q0, [dst, 96] 193*09a53ad8SAndrew Turner bic dst, dst, 127 194*09a53ad8SAndrew Turner sub count, dstend, dst /* Count is now 128 too large. */ 195*09a53ad8SAndrew Turner sub count, count, 128+128 /* Adjust count and bias for loop. */ 196*09a53ad8SAndrew Turner add dst, dst, 128 197*09a53ad8SAndrew Turner1: dc zva, dst 198*09a53ad8SAndrew Turner add dst, dst, 128 199*09a53ad8SAndrew Turner subs count, count, 128 200*09a53ad8SAndrew Turner b.hi 1b 201*09a53ad8SAndrew Turner stp q0, q0, [dstend, -128] 202*09a53ad8SAndrew Turner stp q0, q0, [dstend, -96] 203*09a53ad8SAndrew Turner stp q0, q0, [dstend, -64] 204*09a53ad8SAndrew Turner stp q0, q0, [dstend, -32] 205*09a53ad8SAndrew Turner ret 206*09a53ad8SAndrew Turner 207*09a53ad8SAndrew TurnerL(zva_other): 208*09a53ad8SAndrew Turner mov tmp2w, 4 209*09a53ad8SAndrew Turner lsl zva_lenw, tmp2w, tmp1w 210*09a53ad8SAndrew Turner add tmp1, zva_len, 64 /* Max alignment bytes written. */ 211*09a53ad8SAndrew Turner cmp count, tmp1 212*09a53ad8SAndrew Turner blo L(no_zva) 213*09a53ad8SAndrew Turner 214*09a53ad8SAndrew Turner sub tmp2, zva_len, 1 215*09a53ad8SAndrew Turner add tmp1, dst, zva_len 216*09a53ad8SAndrew Turner add dst, dst, 16 217*09a53ad8SAndrew Turner subs count, tmp1, dst /* Actual alignment bytes to write. */ 218*09a53ad8SAndrew Turner bic tmp1, tmp1, tmp2 /* Aligned dc zva start address. */ 219*09a53ad8SAndrew Turner beq 2f 220*09a53ad8SAndrew Turner1: stp q0, q0, [dst], 64 221*09a53ad8SAndrew Turner stp q0, q0, [dst, -32] 222*09a53ad8SAndrew Turner subs count, count, 64 223*09a53ad8SAndrew Turner b.hi 1b 224*09a53ad8SAndrew Turner2: mov dst, tmp1 225*09a53ad8SAndrew Turner sub count, dstend, tmp1 /* Remaining bytes to write. */ 226*09a53ad8SAndrew Turner subs count, count, zva_len 227*09a53ad8SAndrew Turner b.lo 4f 228*09a53ad8SAndrew Turner3: dc zva, dst 229*09a53ad8SAndrew Turner add dst, dst, zva_len 230*09a53ad8SAndrew Turner subs count, count, zva_len 231*09a53ad8SAndrew Turner b.hs 3b 232*09a53ad8SAndrew Turner4: add count, count, zva_len 233*09a53ad8SAndrew Turner b L(tail64) 234*09a53ad8SAndrew Turner 235*09a53ad8SAndrew Turner .size memset, . - memset 236