1*09a53ad8SAndrew Turner/* memcmp - compare memory 2*09a53ad8SAndrew Turner 3*09a53ad8SAndrew Turner Copyright (c) 2013, Linaro Limited 4*09a53ad8SAndrew Turner All rights reserved. 5*09a53ad8SAndrew Turner 6*09a53ad8SAndrew Turner Redistribution and use in source and binary forms, with or without 7*09a53ad8SAndrew Turner modification, are permitted provided that the following conditions are met: 8*09a53ad8SAndrew Turner * Redistributions of source code must retain the above copyright 9*09a53ad8SAndrew Turner notice, this list of conditions and the following disclaimer. 10*09a53ad8SAndrew Turner * Redistributions in binary form must reproduce the above copyright 11*09a53ad8SAndrew Turner notice, this list of conditions and the following disclaimer in the 12*09a53ad8SAndrew Turner documentation and/or other materials provided with the distribution. 13*09a53ad8SAndrew Turner * Neither the name of the Linaro nor the 14*09a53ad8SAndrew Turner names of its contributors may be used to endorse or promote products 15*09a53ad8SAndrew Turner derived from this software without specific prior written permission. 16*09a53ad8SAndrew Turner 17*09a53ad8SAndrew Turner THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18*09a53ad8SAndrew Turner "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19*09a53ad8SAndrew Turner LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 20*09a53ad8SAndrew Turner A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 21*09a53ad8SAndrew Turner HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 22*09a53ad8SAndrew Turner SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 23*09a53ad8SAndrew Turner LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24*09a53ad8SAndrew Turner DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25*09a53ad8SAndrew Turner THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26*09a53ad8SAndrew Turner (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27*09a53ad8SAndrew Turner OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ 28*09a53ad8SAndrew Turner 29*09a53ad8SAndrew Turner/* Assumptions: 30*09a53ad8SAndrew Turner * 31*09a53ad8SAndrew Turner * ARMv8-a, AArch64 32*09a53ad8SAndrew Turner */ 33*09a53ad8SAndrew Turner 34*09a53ad8SAndrew Turner .macro def_fn f p2align=0 35*09a53ad8SAndrew Turner .text 36*09a53ad8SAndrew Turner .p2align \p2align 37*09a53ad8SAndrew Turner .global \f 38*09a53ad8SAndrew Turner .type \f, %function 39*09a53ad8SAndrew Turner\f: 40*09a53ad8SAndrew Turner .endm 41*09a53ad8SAndrew Turner 42*09a53ad8SAndrew Turner/* Parameters and result. */ 43*09a53ad8SAndrew Turner#define src1 x0 44*09a53ad8SAndrew Turner#define src2 x1 45*09a53ad8SAndrew Turner#define limit x2 46*09a53ad8SAndrew Turner#define result x0 47*09a53ad8SAndrew Turner 48*09a53ad8SAndrew Turner/* Internal variables. */ 49*09a53ad8SAndrew Turner#define data1 x3 50*09a53ad8SAndrew Turner#define data1w w3 51*09a53ad8SAndrew Turner#define data2 x4 52*09a53ad8SAndrew Turner#define data2w w4 53*09a53ad8SAndrew Turner#define has_nul x5 54*09a53ad8SAndrew Turner#define diff x6 55*09a53ad8SAndrew Turner#define endloop x7 56*09a53ad8SAndrew Turner#define tmp1 x8 57*09a53ad8SAndrew Turner#define tmp2 x9 58*09a53ad8SAndrew Turner#define tmp3 x10 59*09a53ad8SAndrew Turner#define pos x11 60*09a53ad8SAndrew Turner#define limit_wd x12 61*09a53ad8SAndrew Turner#define mask x13 62*09a53ad8SAndrew Turner 63*09a53ad8SAndrew Turnerdef_fn memcmp p2align=6 64*09a53ad8SAndrew Turner cbz limit, .Lret0 65*09a53ad8SAndrew Turner eor tmp1, src1, src2 66*09a53ad8SAndrew Turner tst tmp1, #7 67*09a53ad8SAndrew Turner b.ne .Lmisaligned8 68*09a53ad8SAndrew Turner ands tmp1, src1, #7 69*09a53ad8SAndrew Turner b.ne .Lmutual_align 70*09a53ad8SAndrew Turner add limit_wd, limit, #7 71*09a53ad8SAndrew Turner lsr limit_wd, limit_wd, #3 72*09a53ad8SAndrew Turner /* Start of performance-critical section -- one 64B cache line. */ 73*09a53ad8SAndrew Turner.Lloop_aligned: 74*09a53ad8SAndrew Turner ldr data1, [src1], #8 75*09a53ad8SAndrew Turner ldr data2, [src2], #8 76*09a53ad8SAndrew Turner.Lstart_realigned: 77*09a53ad8SAndrew Turner subs limit_wd, limit_wd, #1 78*09a53ad8SAndrew Turner eor diff, data1, data2 /* Non-zero if differences found. */ 79*09a53ad8SAndrew Turner csinv endloop, diff, xzr, ne /* Last Dword or differences. */ 80*09a53ad8SAndrew Turner cbz endloop, .Lloop_aligned 81*09a53ad8SAndrew Turner /* End of performance-critical section -- one 64B cache line. */ 82*09a53ad8SAndrew Turner 83*09a53ad8SAndrew Turner /* Not reached the limit, must have found a diff. */ 84*09a53ad8SAndrew Turner cbnz limit_wd, .Lnot_limit 85*09a53ad8SAndrew Turner 86*09a53ad8SAndrew Turner /* Limit % 8 == 0 => all bytes significant. */ 87*09a53ad8SAndrew Turner ands limit, limit, #7 88*09a53ad8SAndrew Turner b.eq .Lnot_limit 89*09a53ad8SAndrew Turner 90*09a53ad8SAndrew Turner lsl limit, limit, #3 /* Bits -> bytes. */ 91*09a53ad8SAndrew Turner mov mask, #~0 92*09a53ad8SAndrew Turner#ifdef __AARCH64EB__ 93*09a53ad8SAndrew Turner lsr mask, mask, limit 94*09a53ad8SAndrew Turner#else 95*09a53ad8SAndrew Turner lsl mask, mask, limit 96*09a53ad8SAndrew Turner#endif 97*09a53ad8SAndrew Turner bic data1, data1, mask 98*09a53ad8SAndrew Turner bic data2, data2, mask 99*09a53ad8SAndrew Turner 100*09a53ad8SAndrew Turner orr diff, diff, mask 101*09a53ad8SAndrew Turner.Lnot_limit: 102*09a53ad8SAndrew Turner 103*09a53ad8SAndrew Turner#ifndef __AARCH64EB__ 104*09a53ad8SAndrew Turner rev diff, diff 105*09a53ad8SAndrew Turner rev data1, data1 106*09a53ad8SAndrew Turner rev data2, data2 107*09a53ad8SAndrew Turner#endif 108*09a53ad8SAndrew Turner /* The MS-non-zero bit of DIFF marks either the first bit 109*09a53ad8SAndrew Turner that is different, or the end of the significant data. 110*09a53ad8SAndrew Turner Shifting left now will bring the critical information into the 111*09a53ad8SAndrew Turner top bits. */ 112*09a53ad8SAndrew Turner clz pos, diff 113*09a53ad8SAndrew Turner lsl data1, data1, pos 114*09a53ad8SAndrew Turner lsl data2, data2, pos 115*09a53ad8SAndrew Turner /* But we need to zero-extend (char is unsigned) the value and then 116*09a53ad8SAndrew Turner perform a signed 32-bit subtraction. */ 117*09a53ad8SAndrew Turner lsr data1, data1, #56 118*09a53ad8SAndrew Turner sub result, data1, data2, lsr #56 119*09a53ad8SAndrew Turner ret 120*09a53ad8SAndrew Turner 121*09a53ad8SAndrew Turner.Lmutual_align: 122*09a53ad8SAndrew Turner /* Sources are mutually aligned, but are not currently at an 123*09a53ad8SAndrew Turner alignment boundary. Round down the addresses and then mask off 124*09a53ad8SAndrew Turner the bytes that precede the start point. */ 125*09a53ad8SAndrew Turner bic src1, src1, #7 126*09a53ad8SAndrew Turner bic src2, src2, #7 127*09a53ad8SAndrew Turner add limit, limit, tmp1 /* Adjust the limit for the extra. */ 128*09a53ad8SAndrew Turner lsl tmp1, tmp1, #3 /* Bytes beyond alignment -> bits. */ 129*09a53ad8SAndrew Turner ldr data1, [src1], #8 130*09a53ad8SAndrew Turner neg tmp1, tmp1 /* Bits to alignment -64. */ 131*09a53ad8SAndrew Turner ldr data2, [src2], #8 132*09a53ad8SAndrew Turner mov tmp2, #~0 133*09a53ad8SAndrew Turner#ifdef __AARCH64EB__ 134*09a53ad8SAndrew Turner /* Big-endian. Early bytes are at MSB. */ 135*09a53ad8SAndrew Turner lsl tmp2, tmp2, tmp1 /* Shift (tmp1 & 63). */ 136*09a53ad8SAndrew Turner#else 137*09a53ad8SAndrew Turner /* Little-endian. Early bytes are at LSB. */ 138*09a53ad8SAndrew Turner lsr tmp2, tmp2, tmp1 /* Shift (tmp1 & 63). */ 139*09a53ad8SAndrew Turner#endif 140*09a53ad8SAndrew Turner add limit_wd, limit, #7 141*09a53ad8SAndrew Turner orr data1, data1, tmp2 142*09a53ad8SAndrew Turner orr data2, data2, tmp2 143*09a53ad8SAndrew Turner lsr limit_wd, limit_wd, #3 144*09a53ad8SAndrew Turner b .Lstart_realigned 145*09a53ad8SAndrew Turner 146*09a53ad8SAndrew Turner.Lret0: 147*09a53ad8SAndrew Turner mov result, #0 148*09a53ad8SAndrew Turner ret 149*09a53ad8SAndrew Turner 150*09a53ad8SAndrew Turner .p2align 6 151*09a53ad8SAndrew Turner.Lmisaligned8: 152*09a53ad8SAndrew Turner sub limit, limit, #1 153*09a53ad8SAndrew Turner1: 154*09a53ad8SAndrew Turner /* Perhaps we can do better than this. */ 155*09a53ad8SAndrew Turner ldrb data1w, [src1], #1 156*09a53ad8SAndrew Turner ldrb data2w, [src2], #1 157*09a53ad8SAndrew Turner subs limit, limit, #1 158*09a53ad8SAndrew Turner ccmp data1w, data2w, #0, cs /* NZCV = 0b0000. */ 159*09a53ad8SAndrew Turner b.eq 1b 160*09a53ad8SAndrew Turner sub result, data1, data2 161*09a53ad8SAndrew Turner ret 162*09a53ad8SAndrew Turner .size memcmp, . - memcmp 163