1/* $NetBSD: memcmp.S,v 1.2 2018/02/04 21:52:16 skrll Exp $ */ 2 3/*- 4 * Copyright (c) 2014 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Matt Thomas of 3am Software Foundry. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32#include <machine/asm.h> 33 34RCSID("$NetBSD: memcmp.S,v 1.2 2018/02/04 21:52:16 skrll Exp $") 35 36ENTRY(memcmp) 37 mov x9, x0 38 mov x10, x1 39 mov x0, xzr 40 cbz x2, .Lmemcmp_ret 41#ifdef _KERNEL 42 cmp x2, #6 43 b.eq .Lmemcmp_6bytes 44#endif 45 cmp x2, #8 46 b.ls .Lmemcmp_lessthan8 47 48 ands x3, x9, #7 49 b.eq .Lmemcmp_dword_loop 50 51/* 52 * The src1 address is not dword aligned. 53 */ 54 add x2, x2, x3 /* add unalignment to length */ 55 sub x2, x2, #8 /* now subtract a dword */ 56 57 sub x9, x9, x3 /* dword align src1 */ 58 sub x10, x10, x3 /* adjust src2 */ 59 60 lsl x3, x3, #3 /* convert bytes to bits */ 61 ldr x4, [x9], #8 /* load dword from src1 */ 62 ldr x6, [x10], #8 /* load dword from src2 */ 63#ifdef __AARCH64EB__ 64 lsl x4, x4, x3 /* discard leading bytes from data1 */ 65 lsl x6, x6, x3 /* discard leading bytes from data2 */ 66#else 67 lsr x4, x4, x3 /* discard leading bytes from data1 */ 68 lsr x6, x6, x3 /* discard leading bytes from data2 */ 69#endif 70 subs x0, x4, x6 /* compare data */ 71 b.ne .Lmemcmp_last_compare /* difference. find it */ 72 73.Lmemcmp_dword_loop: 74 subs x2, x2, #8 75 b.mi .Lmemcmp_finish_dword 76 ldr x4, [x9], #8 77 ldr x6, [x10], #8 78 subs x0, x4, x6 79 b.eq .Lmemcmp_dword_loop /* no difference. go to loop */ 80 b .Lmemcmp_last_compare /* go find the difference. */ 81 82.Lmemcmp_finish_dword: 83 /* 84 * we might have gotten here with nothing left. If so, just bail. 85 */ 86 tst x2, #7 87 b.eq .Lmemcmp_ret 88 mov x4, xzr 89 mov x6, xzr 90 /* 91 * 92 */ 93 tbz x2, #2, .Lmemcmp_finish_word 94 ldr w4, [x9], #4 95 ldr w6, [x10], #4 96#ifdef __AARCH64EB__ 97 lsl x4, x4, #32 /* move to MSW */ 98 lsl x6, x6, #32 /* move to MSW */ 99#endif 100 101.Lmemcmp_finish_word: 102 tbz x2, #1, .Lmemcmp_finish_hword 103 ldrh w5, [x9], #2 104 ldrh w7, [x10], #2 105#ifdef __AARCH64EB__ 106 orr x4, x4, x5, lsl #16 107 orr x6, x6, x7, lsl #16 108#else 109 orr x4, x4, x5, lsl #32 110 orr x6, x6, x7, lsl #32 111#endif 112 113.Lmemcmp_finish_hword: 114 tbz x2, #0, .Lmemcmp_last_compare0 115 116 ldrb w5, [x9] 117 ldrb w7, [x10] 118#ifdef __AARCH64EB__ 119 orr x4, x4, x5, lsl #8 120 orr x6, x6, x7, lsl #8 121#else 122 orr x4, x4, x5, lsl #48 123 orr x6, x6, x7, lsl #48 124#endif 125 b .Lmemcmp_last_compare0 /* go find the difference. */ 126 127/* 128 * D 129 */ 130.Lmemcmp_lessthan8: 131 sub x2, x2, #1 1321: ldrb w4, [x9], #1 133 ldrb w5, [x10], #1 134 subs x2, x2, #1 135 ccmp x4, x5, #0, cs 136 b.eq 1b 137 sub x0, x4, x5 138 139.Lmemcmp_ret: 140 ret 141 142#ifdef _KERNEL 143.Lmemcmp_6bytes: 144 ldr w4, [x9], #4 145 ldrh w5, [x9] 146#if __AARCH64EB__ 147 orr x4, x4, x5, lsl #48 148 rev x4, x4 149#else 150 orr x4, x4, x5, lsl #32 151#endif 152 ldr w6, [x10], #4 153 ldrh w7, [x10] 154#if __AARCH64EB__ 155 orr x6, x6, x7, lsl #48 156 rev x6, x6 157#else 158 orr x6, x6, x7, lsl #32 159#endif 160#endif /* _KERNEL */ 161 162/* 163 * We have loaded the final bytes in x4 and x6 in host-endian. Now we have 164 * to figure what the difference is (if any). First we subtract. Any bytes 165 * that are the same will be 0. So to find the first non-zero byte we byterev 166 * and then use clz to find that byte. 167 * We mask the location to get the start of the byte. We shift both 168 * data dwords left to remove the equal part. Then we shift right to discard 169 * the trailing bytes. Then we subtract and return. 170 */ 171.Lmemcmp_last_compare0: 172 subs x0, x4, x6 173 b.eq .Lmemcmp_ret 174.Lmemcmp_last_compare: 175#if __AARCH64EB__ 176 clz x1, x0 /* find first non-zero byte */ 177 rev x0, x0 178#else 179 rev x1, x0 180 clz x1, x1 /* find first non-zero byte */ 181#endif 182 bfi x1, xzr, #0, #3 /* make it byte aligned */ 183 lsr x1, x0, x1 /* shift to LSB */ 184#if __AARCH64EL__ 185 rev x4, x4 /* byte reverse */ 186 rev x6, x6 /* byte reverse */ 187#endif 188 subs x0, x4, x6 189 csetm x0, cc /* set mask bits as sign */ 190 bfm x0, x1, #0, #7 /* extend with sign bit */ 191 ret 192END(memcmp) 193