1*0a6a1f1dSLionel Sambuc/* $NetBSD: strlen.S,v 1.1 2014/09/03 19:34:25 matt Exp $ */ 2*0a6a1f1dSLionel Sambuc 3*0a6a1f1dSLionel Sambuc/*- 4*0a6a1f1dSLionel Sambuc * Copyright (C) 2001 Martin J. Laubach <mjl@NetBSD.org> 5*0a6a1f1dSLionel Sambuc * All rights reserved. 6*0a6a1f1dSLionel Sambuc * 7*0a6a1f1dSLionel Sambuc * Redistribution and use in source and binary forms, with or without 8*0a6a1f1dSLionel Sambuc * modification, are permitted provided that the following conditions 9*0a6a1f1dSLionel Sambuc * are met: 10*0a6a1f1dSLionel Sambuc * 1. Redistributions of source code must retain the above copyright 11*0a6a1f1dSLionel Sambuc * notice, this list of conditions and the following disclaimer. 12*0a6a1f1dSLionel Sambuc * 2. Redistributions in binary form must reproduce the above copyright 13*0a6a1f1dSLionel Sambuc * notice, this list of conditions and the following disclaimer in the 14*0a6a1f1dSLionel Sambuc * documentation and/or other materials provided with the distribution. 15*0a6a1f1dSLionel Sambuc * 3. The name of the author may not be used to endorse or promote products 16*0a6a1f1dSLionel Sambuc * derived from this software without specific prior written permission. 17*0a6a1f1dSLionel Sambuc * 18*0a6a1f1dSLionel Sambuc * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 19*0a6a1f1dSLionel Sambuc * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 20*0a6a1f1dSLionel Sambuc * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 21*0a6a1f1dSLionel Sambuc * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 22*0a6a1f1dSLionel Sambuc * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 23*0a6a1f1dSLionel Sambuc * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24*0a6a1f1dSLionel Sambuc * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25*0a6a1f1dSLionel Sambuc * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26*0a6a1f1dSLionel Sambuc * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27*0a6a1f1dSLionel Sambuc * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28*0a6a1f1dSLionel Sambuc */ 29*0a6a1f1dSLionel Sambuc/*----------------------------------------------------------------------*/ 30*0a6a1f1dSLionel Sambuc 31*0a6a1f1dSLionel Sambuc#include <machine/asm.h> 32*0a6a1f1dSLionel Sambuc 33*0a6a1f1dSLionel Sambuc__RCSID("$NetBSD: strlen.S,v 1.1 2014/09/03 19:34:25 matt Exp $"); 34*0a6a1f1dSLionel Sambuc 35*0a6a1f1dSLionel Sambuc/*----------------------------------------------------------------------*/ 36*0a6a1f1dSLionel Sambuc/* The algorithm here uses the following techniques: 37*0a6a1f1dSLionel Sambuc 38*0a6a1f1dSLionel Sambuc 1) Given a word 'x', we can test to see if it contains any 0 bytes 39*0a6a1f1dSLionel Sambuc by subtracting 0x01010101, and seeing if any of the high bits of each 40*0a6a1f1dSLionel Sambuc byte changed from 0 to 1. This works because the least significant 41*0a6a1f1dSLionel Sambuc 0 byte must have had no incoming carry (otherwise it's not the least 42*0a6a1f1dSLionel Sambuc significant), so it is 0x00 - 0x01 == 0xff. For all other 43*0a6a1f1dSLionel Sambuc byte values, either they have the high bit set initially, or when 44*0a6a1f1dSLionel Sambuc 1 is subtracted you get a value in the range 0x00-0x7f, none of which 45*0a6a1f1dSLionel Sambuc have their high bit set. The expression here is 46*0a6a1f1dSLionel Sambuc (x + 0xfefefeff) & ~(x | 0x7f7f7f7f), which gives 0x00000000 when 47*0a6a1f1dSLionel Sambuc there were no 0x00 bytes in the word. 48*0a6a1f1dSLionel Sambuc 49*0a6a1f1dSLionel Sambuc 2) Given a word 'x', we can test to see _which_ byte was zero by 50*0a6a1f1dSLionel Sambuc calculating ~(((x & 0x7f7f7f7f) + 0x7f7f7f7f) | x | 0x7f7f7f7f). 51*0a6a1f1dSLionel Sambuc This produces 0x80 in each byte that was zero, and 0x00 in all 52*0a6a1f1dSLionel Sambuc the other bytes. The '| 0x7f7f7f7f' clears the low 7 bits in each 53*0a6a1f1dSLionel Sambuc byte, and the '| x' part ensures that bytes with the high bit set 54*0a6a1f1dSLionel Sambuc produce 0x00. The addition will carry into the high bit of each byte 55*0a6a1f1dSLionel Sambuc iff that byte had one of its low 7 bits set. We can then just see 56*0a6a1f1dSLionel Sambuc which was the most significant bit set and divide by 8 to find how 57*0a6a1f1dSLionel Sambuc many to add to the index. 58*0a6a1f1dSLionel Sambuc This is from the book 'The PowerPC Compiler Writer's Guide', 59*0a6a1f1dSLionel Sambuc by Steve Hoxey, Faraydon Karim, Bill Hay and Hank Warren. 60*0a6a1f1dSLionel Sambuc*/ 61*0a6a1f1dSLionel Sambuc/*----------------------------------------------------------------------*/ 62*0a6a1f1dSLionel Sambuc 63*0a6a1f1dSLionel SambucENTRY(strlen) 64*0a6a1f1dSLionel Sambuc 65*0a6a1f1dSLionel Sambuc l.or r12, r3, r0 /* save start */ 66*0a6a1f1dSLionel Sambuc 67*0a6a1f1dSLionel Sambuc /* Setup constants */ 68*0a6a1f1dSLionel Sambuc l.movhi r13, 0x7f7f 69*0a6a1f1dSLionel Sambuc l.movhi r15, 0xfefe 70*0a6a1f1dSLionel Sambuc l.ori r13, r13, 0x7f7f 71*0a6a1f1dSLionel Sambuc l.ori r15, r15, 0xfeff 72*0a6a1f1dSLionel Sambuc 73*0a6a1f1dSLionel Sambuc1: l.andi r7, r12, 3 /* get low bits of start */ 74*0a6a1f1dSLionel Sambuc l.sfeqi r7, 0 /* all clear? */ 75*0a6a1f1dSLionel Sambuc l.bf 3f /* yes, skip alignment */ 76*0a6a1f1dSLionel Sambuc l.nop /* -- delay slot -- */ 77*0a6a1f1dSLionel Sambuc 78*0a6a1f1dSLionel Sambuc l.sub r12, r12, r7 /* word align start */ 79*0a6a1f1dSLionel Sambuc l.lwz r8, 0(r12) /* load data */ 80*0a6a1f1dSLionel Sambuc l.addi r6, r0, -1 /* r6 = 0xffffffff */ 81*0a6a1f1dSLionel Sambuc l.slli r5, r7, 3 /* bits to bytes */ 82*0a6a1f1dSLionel Sambuc l.srl r6, r6, r5 /* clear low (MSB) bytes */ 83*0a6a1f1dSLionel Sambuc l.xori r6, r6, -1 /* complement */ 84*0a6a1f1dSLionel Sambuc l.or r8, r8, r6 /* merge with loaded word */ 85*0a6a1f1dSLionel Sambuc l.j 4f /* and process */ 86*0a6a1f1dSLionel Sambuc l.nop /* -- delay-slot -- */ 87*0a6a1f1dSLionel Sambuc 88*0a6a1f1dSLionel Sambuc2: l.addi r12, r12, 4 /* advance to next word */ 89*0a6a1f1dSLionel Sambuc3: l.lwz r8, 0(r12) /* fetch data word */ 90*0a6a1f1dSLionel Sambuc 91*0a6a1f1dSLionel Sambuc // Step 1: (x + 0xfefefeff) & ~(x | 0x7f7f7f7f) 92*0a6a1f1dSLionel Sambuc4: l.or r7, r8, r13 /* t0 = x | 0x7f7f7f7f */ 93*0a6a1f1dSLionel Sambuc l.xori r6, r7, -1 /* t1 = ~t0 */ 94*0a6a1f1dSLionel Sambuc l.add r5, r8, r15 /* t2 = x + 0xfefefeff */ 95*0a6a1f1dSLionel Sambuc l.and r4, r7, r5 /* t3 = t1 & t2 */ 96*0a6a1f1dSLionel Sambuc l.sfeqi r4, 0 97*0a6a1f1dSLionel Sambuc l.bf 2b /* no NUL bytes here */ 98*0a6a1f1dSLionel Sambuc l.nop /* -- delay slot -- */ 99*0a6a1f1dSLionel Sambuc 100*0a6a1f1dSLionel Sambuc // Step 2: ~(((x & 0x7f7f7f7f) + 0x7f7f7f7f) | x | 0x7f7f7f7f) 101*0a6a1f1dSLionel Sambuc l.and r7, r8, r13 /* t0 = x & 0x7f7f7f7f */ 102*0a6a1f1dSLionel Sambuc l.or r6, r8, r13 /* t1 = x | 0x7f7f7f7f */ 103*0a6a1f1dSLionel Sambuc l.add r5, r7, r13 /* t2 = t0 + 0x7f7f7f7f */ 104*0a6a1f1dSLionel Sambuc l.or r4, r5, r6 /* t3 = t2 | t1 */ 105*0a6a1f1dSLionel Sambuc l.xori r4, r4, -1 /* t3 = ~t3 */ 106*0a6a1f1dSLionel Sambuc 107*0a6a1f1dSLionel Sambuc l.fl1 r5, r4 /* find last bit set */ 108*0a6a1f1dSLionel Sambuc l.ori r6, r0, 32 /* bits per word */ 109*0a6a1f1dSLionel Sambuc l.sub r7, r6, r5 /* cvt to leading zeros */ 110*0a6a1f1dSLionel Sambuc l.srli r8, r7, 3 /* shift to byte count */ 111*0a6a1f1dSLionel Sambuc 112*0a6a1f1dSLionel SambucLdone: 113*0a6a1f1dSLionel Sambuc l.add r12, r12, r8 /* r12 contains end pointer */ 114*0a6a1f1dSLionel Sambuc 115*0a6a1f1dSLionel Sambuc /* NOTE: Keep it so this function returns the end pointer 116*0a6a1f1dSLionel Sambuc in r12, so we can it use from other str* calls (strcat 117*0a6a1f1dSLionel Sambuc comes to mind */ 118*0a6a1f1dSLionel Sambuc 119*0a6a1f1dSLionel Sambuc l.sub r11, r12, r3 /* length = end - start */ 120*0a6a1f1dSLionel Sambuc l.jr lr /* return */ 121*0a6a1f1dSLionel Sambuc l.nop /* -- delay slot -- */ 122*0a6a1f1dSLionel SambucEND(strlen) 123*0a6a1f1dSLionel Sambuc/*----------------------------------------------------------------------*/ 124