1*cf88c389Smatt/* $NetBSD: strlen.S,v 1.6 2011/01/15 07:31:12 matt Exp $ */ 237c9f0a6Schristos 39eb6edc9Sross/*- 49eb6edc9Sross * Copyright (C) 2001 Martin J. Laubach <mjl@NetBSD.org> 59eb6edc9Sross * All rights reserved. 69eb6edc9Sross * 79eb6edc9Sross * Redistribution and use in source and binary forms, with or without 89eb6edc9Sross * modification, are permitted provided that the following conditions 99eb6edc9Sross * are met: 109eb6edc9Sross * 1. Redistributions of source code must retain the above copyright 119eb6edc9Sross * notice, this list of conditions and the following disclaimer. 129eb6edc9Sross * 2. Redistributions in binary form must reproduce the above copyright 139eb6edc9Sross * notice, this list of conditions and the following disclaimer in the 149eb6edc9Sross * documentation and/or other materials provided with the distribution. 159eb6edc9Sross * 3. The name of the author may not be used to endorse or promote products 169eb6edc9Sross * derived from this software without specific prior written permission. 179eb6edc9Sross * 189eb6edc9Sross * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 199eb6edc9Sross * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 209eb6edc9Sross * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 219eb6edc9Sross * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 229eb6edc9Sross * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 239eb6edc9Sross * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 249eb6edc9Sross * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 259eb6edc9Sross * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 269eb6edc9Sross * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 279eb6edc9Sross * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 289eb6edc9Sross */ 299eb6edc9Sross/*----------------------------------------------------------------------*/ 3037c9f0a6Schristos 319eb6edc9Sross#include <machine/asm.h> 329eb6edc9Sross 33*cf88c389Smatt__RCSID("$NetBSD: strlen.S,v 1.6 2011/01/15 07:31:12 matt Exp $"); 34*cf88c389Smatt 359eb6edc9Sross/*----------------------------------------------------------------------*/ 369eb6edc9Sross/* The algorithm here uses the following techniques: 379eb6edc9Sross 389eb6edc9Sross 1) Given a word 'x', we can test to see if it contains any 0 bytes 399eb6edc9Sross by subtracting 0x01010101, and seeing if any of the high bits of each 409eb6edc9Sross byte changed from 0 to 1. This works because the least significant 419eb6edc9Sross 0 byte must have had no incoming carry (otherwise it's not the least 429eb6edc9Sross significant), so it is 0x00 - 0x01 == 0xff. For all other 439eb6edc9Sross byte values, either they have the high bit set initially, or when 449eb6edc9Sross 1 is subtracted you get a value in the range 0x00-0x7f, none of which 459eb6edc9Sross have their high bit set. The expression here is 469eb6edc9Sross (x + 0xfefefeff) & ~(x | 0x7f7f7f7f), which gives 0x00000000 when 479eb6edc9Sross there were no 0x00 bytes in the word. 489eb6edc9Sross 499eb6edc9Sross 2) Given a word 'x', we can test to see _which_ byte was zero by 509eb6edc9Sross calculating ~(((x & 0x7f7f7f7f) + 0x7f7f7f7f) | x | 0x7f7f7f7f). 519eb6edc9Sross This produces 0x80 in each byte that was zero, and 0x00 in all 529eb6edc9Sross the other bytes. The '| 0x7f7f7f7f' clears the low 7 bits in each 539eb6edc9Sross byte, and the '| x' part ensures that bytes with the high bit set 549eb6edc9Sross produce 0x00. The addition will carry into the high bit of each byte 559eb6edc9Sross iff that byte had one of its low 7 bits set. We can then just see 569eb6edc9Sross which was the most significant bit set and divide by 8 to find how 579eb6edc9Sross many to add to the index. 589eb6edc9Sross This is from the book 'The PowerPC Compiler Writer's Guide', 599eb6edc9Sross by Steve Hoxey, Faraydon Karim, Bill Hay and Hank Warren. 609eb6edc9Sross*/ 619eb6edc9Sross/*----------------------------------------------------------------------*/ 629eb6edc9Sross 639eb6edc9Sross .text 649eb6edc9Sross .align 4 659eb6edc9Sross 669eb6edc9SrossENTRY(strlen) 679eb6edc9Sross 689eb6edc9Sross /* Setup constants */ 699eb6edc9Sross lis %r10, 0x7f7f 709eb6edc9Sross lis %r9, 0xfefe 719eb6edc9Sross ori %r10, %r10, 0x7f7f 729eb6edc9Sross ori %r9, %r9, 0xfeff 739eb6edc9Sross 749eb6edc9Sross /* Mask out leading bytes on non aligned strings */ 759eb6edc9Sross rlwinm. %r8, %r3, 3, 27, 28 /* leading bits to mask */ 76282f07d6Sross#ifdef _LP64 77282f07d6Sross clrrdi %r5, %r3, 2 /* clear low 2 addr bits */ 78282f07d6Sross#else 799eb6edc9Sross clrrwi %r5, %r3, 2 /* clear low 2 addr bits */ 80282f07d6Sross#endif 819eb6edc9Sross li %r0, -1 829eb6edc9Sross beq+ 3f /* skip alignment if already */ 839eb6edc9Sross /* aligned */ 849eb6edc9Sross 859eb6edc9Sross srw %r0, %r0, %r8 /* make 0000...1111 mask */ 869eb6edc9Sross 879eb6edc9Sross lwz %r7, 0(%r5) 889eb6edc9Sross nor %r0, %r0, %r0 /* invert mask */ 899eb6edc9Sross or %r7, %r7, %r0 /* make leading bytes != 0 */ 909eb6edc9Sross b 2f 919eb6edc9Sross 929eb6edc9Sross3: subi %r5, %r5, 4 939eb6edc9Sross 949eb6edc9Sross1: lwzu %r7, 4(%r5) /* fetch data word */ 959eb6edc9Sross 969eb6edc9Sross2: nor %r0, %r7, %r10 /* do step 1 */ 979eb6edc9Sross add %r6, %r7, %r9 989eb6edc9Sross and. %r0, %r0, %r6 999eb6edc9Sross 1009eb6edc9Sross beq+ 1b /* no NUL bytes here */ 1019eb6edc9Sross 1029eb6edc9Sross and %r8, %r7, %r10 /* ok, a NUL is somewhere */ 1039eb6edc9Sross or %r7, %r7, %r10 /* do step 2 to find out */ 1049eb6edc9Sross add %r0, %r8, %r10 /* where */ 1059eb6edc9Sross nor %r8, %r7, %r0 1069eb6edc9Sross 1079eb6edc9Sross cntlzw %r0, %r8 /* offset from this word */ 1089eb6edc9Sross srwi %r4, %r0, 3 1099eb6edc9Sross 1109eb6edc9Sross add %r4, %r5, %r4 /* r4 contains end pointer */ 1119eb6edc9Sross /* NOTE: Keep it so this function returns the end pointer 1129eb6edc9Sross in r4, so we can it use from other str* calls (strcat 1139eb6edc9Sross comes to mind */ 1149eb6edc9Sross 1159eb6edc9Sross subf %r3, %r3, %r4 1169eb6edc9Sross blr 117*cf88c389SmattEND(strlen) 1189eb6edc9Sross/*----------------------------------------------------------------------*/ 119