1*84d9c625SLionel Sambuc/*- 2*84d9c625SLionel Sambuc * Copyright (c) 2012 The NetBSD Foundation, Inc. 3*84d9c625SLionel Sambuc * All rights reserved. 4*84d9c625SLionel Sambuc * 5*84d9c625SLionel Sambuc * This code is derived from software contributed to The NetBSD Foundation 6*84d9c625SLionel Sambuc * by Matt Thomas of 3am Software Foundry. 7*84d9c625SLionel Sambuc * 8*84d9c625SLionel Sambuc * Redistribution and use in source and binary forms, with or without 9*84d9c625SLionel Sambuc * modification, are permitted provided that the following conditions 10*84d9c625SLionel Sambuc * are met: 11*84d9c625SLionel Sambuc * 1. Redistributions of source code must retain the above copyright 12*84d9c625SLionel Sambuc * notice, this list of conditions and the following disclaimer. 13*84d9c625SLionel Sambuc * 2. Redistributions in binary form must reproduce the above copyright 14*84d9c625SLionel Sambuc * notice, this list of conditions and the following disclaimer in the 15*84d9c625SLionel Sambuc * documentation and/or other materials provided with the distribution. 16*84d9c625SLionel Sambuc * 17*84d9c625SLionel Sambuc * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 18*84d9c625SLionel Sambuc * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 19*84d9c625SLionel Sambuc * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 20*84d9c625SLionel Sambuc * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 21*84d9c625SLionel Sambuc * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 22*84d9c625SLionel Sambuc * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 23*84d9c625SLionel Sambuc * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 24*84d9c625SLionel Sambuc * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 25*84d9c625SLionel Sambuc * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 26*84d9c625SLionel Sambuc * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 27*84d9c625SLionel Sambuc * POSSIBILITY OF SUCH DAMAGE. 28*84d9c625SLionel Sambuc */ 29*84d9c625SLionel Sambuc 30*84d9c625SLionel Sambuc#include <machine/asm.h> 31*84d9c625SLionel Sambuc 32*84d9c625SLionel SambucRCSID("$NetBSD: strlen_neon.S,v 1.3 2012/12/28 05:15:08 matt Exp $") 33*84d9c625SLionel Sambuc .text 34*84d9c625SLionel Sambuc 35*84d9c625SLionel SambucENTRY(strlen) 36*84d9c625SLionel Sambuc mov ip, r0 /* we use r0 for return value */ 37*84d9c625SLionel Sambuc ands r1, r0, #15 /* verify qword alignment */ 38*84d9c625SLionel Sambuc neg r0, r1 /* subtract misalignment from count */ 39*84d9c625SLionel Sambuc veor q2, q2, q2 /* clear mask */ 40*84d9c625SLionel Sambuc mov r3, #7 /* NBBY - 1 */ 41*84d9c625SLionel Sambuc vdup.32 q3, r3 /* dup throughout q3 */ 42*84d9c625SLionel Sambuc movw r3, #0x0404 /* magic since there are 4 bytes per U32 */ 43*84d9c625SLionel Sambuc orr r3, r3, lsl #16 /* copy to upper 16 bits */ 44*84d9c625SLionel Sambuc beq .Lmain_loop 45*84d9c625SLionel Sambuc veor q0, q0, q0 /* clear q0 */ 46*84d9c625SLionel Sambuc vmvn q2, q2 /* set all 16 bytes of mask to all 1s */ 47*84d9c625SLionel Sambuc bic ip, ip, #15 /* qword align string address */ 48*84d9c625SLionel Sambuc lsl r1, r1, #3 /* convert to bits */ 49*84d9c625SLionel Sambuc cmp r1, #64 50*84d9c625SLionel Sambuc rsbgt r1, r1, #128 /* > 64? BE so we are shifting LSW right */ 51*84d9c625SLionel Sambuc movgt r2, #0 /* > 64? leave MSW alone */ 52*84d9c625SLionel Sambuc rsble r2, r1, #64 /* <=64? BE so we are shifting MSW right */ 53*84d9c625SLionel Sambuc movle r1, #64 /* <=64? clear LSW */ 54*84d9c625SLionel Sambuc vmov d0, r1, r2 /* set shifts for lower and upper halves */ 55*84d9c625SLionel Sambuc vmovl.u32 q0, d0 /* 2 U32 -> 2 U64 */ 56*84d9c625SLionel Sambuc vshl.u64 q2, q2, q0 /* shift */ 57*84d9c625SLionel Sambuc /* 58*84d9c625SLionel Sambuc * Main loop. Load 16 bytes, do a clz, 59*84d9c625SLionel Sambuc */ 60*84d9c625SLionel Sambuc.Lmain_loop: 61*84d9c625SLionel Sambuc vld1.64 {d0, d1}, [ip:128]! /* load qword */ 62*84d9c625SLionel Sambuc#ifdef __ARMEL__ 63*84d9c625SLionel Sambuc vrev64.8 q0, q0 /* convert to BE for clz */ 64*84d9c625SLionel Sambuc#endif 65*84d9c625SLionel Sambuc vswp d0, d1 /* swap dwords to get BE qword */ 66*84d9c625SLionel Sambuc vorr q0, q0, q2 /* or "in" leading byte mask */ 67*84d9c625SLionel Sambuc veor q2, q2, q2 /* clear leading byte mask */ 68*84d9c625SLionel Sambuc vceq.i8 q1, q0, #0 /* test each byte for 0 */ 69*84d9c625SLionel Sambuc /* Why couldn't there be a 64-bit CLZ? */ 70*84d9c625SLionel Sambuc vclz.u32 q1, q1 /* count leading zeroes to find the 0 byte */ 71*84d9c625SLionel Sambuc vadd.u32 q1, q1, q3 /* round up to byte bounary */ 72*84d9c625SLionel Sambuc vshr.u32 q1, q1, #3 /* convert to bytes */ 73*84d9c625SLionel Sambuc vmovn.u32 d0, q1 /* 4 I32 -> 4 I16 */ 74*84d9c625SLionel Sambuc vmovn.u16 d0, q0 /* 4 I16 -> 4 I8 */ 75*84d9c625SLionel Sambuc vmov r2, s0 /* get counts */ 76*84d9c625SLionel Sambuc eors r2, r2, r3 /* xor with 0x04040404 */ 77*84d9c625SLionel Sambuc addeq r0, #16 /* 0? no NULs */ 78*84d9c625SLionel Sambuc beq .Lmain_loop /* get next qword */ 79*84d9c625SLionel Sambuc clz ip, r2 /* count leading zeros */ 80*84d9c625SLionel Sambuc mov r2, r2, lsl ip /* discard them */ 81*84d9c625SLionel Sambuc mov ip, ip, lsr #3 /* divide leading zeroes by 8 */ 82*84d9c625SLionel Sambuc add r0, r0, ip, lsl #2 /* multiple by 4 and add to count */ 83*84d9c625SLionel Sambuc and r2, r2, #(3 << 29) 84*84d9c625SLionel Sambuc add r0, r0, r2, lsr #29 85*84d9c625SLionel Sambuc RET /* and return. */ 86*84d9c625SLionel SambucEND(strlen) 87