xref: /netbsd-src/common/lib/libc/arch/powerpc/string/strlen.S (revision cf88c3890b9a2e310adacd2c695f42a55ca4e79a)
1*cf88c389Smatt/*	$NetBSD: strlen.S,v 1.6 2011/01/15 07:31:12 matt Exp $ */
237c9f0a6Schristos
39eb6edc9Sross/*-
49eb6edc9Sross * Copyright (C) 2001	Martin J. Laubach <mjl@NetBSD.org>
59eb6edc9Sross * All rights reserved.
69eb6edc9Sross *
79eb6edc9Sross * Redistribution and use in source and binary forms, with or without
89eb6edc9Sross * modification, are permitted provided that the following conditions
99eb6edc9Sross * are met:
109eb6edc9Sross * 1. Redistributions of source code must retain the above copyright
119eb6edc9Sross *    notice, this list of conditions and the following disclaimer.
129eb6edc9Sross * 2. Redistributions in binary form must reproduce the above copyright
139eb6edc9Sross *    notice, this list of conditions and the following disclaimer in the
149eb6edc9Sross *    documentation and/or other materials provided with the distribution.
159eb6edc9Sross * 3. The name of the author may not be used to endorse or promote products
169eb6edc9Sross *    derived from this software without specific prior written permission.
179eb6edc9Sross *
189eb6edc9Sross * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
199eb6edc9Sross * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
209eb6edc9Sross * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
219eb6edc9Sross * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
229eb6edc9Sross * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
239eb6edc9Sross * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
249eb6edc9Sross * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
259eb6edc9Sross * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
269eb6edc9Sross * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
279eb6edc9Sross * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
289eb6edc9Sross */
299eb6edc9Sross/*----------------------------------------------------------------------*/
3037c9f0a6Schristos
319eb6edc9Sross#include <machine/asm.h>
329eb6edc9Sross
33*cf88c389Smatt__RCSID("$NetBSD: strlen.S,v 1.6 2011/01/15 07:31:12 matt Exp $");
34*cf88c389Smatt
359eb6edc9Sross/*----------------------------------------------------------------------*/
369eb6edc9Sross/* The algorithm here uses the following techniques:
379eb6edc9Sross
389eb6edc9Sross   1) Given a word 'x', we can test to see if it contains any 0 bytes
399eb6edc9Sross      by subtracting 0x01010101, and seeing if any of the high bits of each
409eb6edc9Sross      byte changed from 0 to 1. This works because the least significant
419eb6edc9Sross      0 byte must have had no incoming carry (otherwise it's not the least
429eb6edc9Sross      significant), so it is 0x00 - 0x01 == 0xff. For all other
439eb6edc9Sross      byte values, either they have the high bit set initially, or when
449eb6edc9Sross      1 is subtracted you get a value in the range 0x00-0x7f, none of which
459eb6edc9Sross      have their high bit set. The expression here is
469eb6edc9Sross      (x + 0xfefefeff) & ~(x | 0x7f7f7f7f), which gives 0x00000000 when
479eb6edc9Sross      there were no 0x00 bytes in the word.
489eb6edc9Sross
499eb6edc9Sross   2) Given a word 'x', we can test to see _which_ byte was zero by
509eb6edc9Sross      calculating ~(((x & 0x7f7f7f7f) + 0x7f7f7f7f) | x | 0x7f7f7f7f).
519eb6edc9Sross      This produces 0x80 in each byte that was zero, and 0x00 in all
529eb6edc9Sross      the other bytes. The '| 0x7f7f7f7f' clears the low 7 bits in each
539eb6edc9Sross      byte, and the '| x' part ensures that bytes with the high bit set
549eb6edc9Sross      produce 0x00. The addition will carry into the high bit of each byte
559eb6edc9Sross      iff that byte had one of its low 7 bits set. We can then just see
569eb6edc9Sross      which was the most significant bit set and divide by 8 to find how
579eb6edc9Sross      many to add to the index.
589eb6edc9Sross      This is from the book 'The PowerPC Compiler Writer's Guide',
599eb6edc9Sross      by Steve Hoxey, Faraydon Karim, Bill Hay and Hank Warren.
609eb6edc9Sross*/
619eb6edc9Sross/*----------------------------------------------------------------------*/
629eb6edc9Sross
639eb6edc9Sross		.text
649eb6edc9Sross		.align 4
659eb6edc9Sross
669eb6edc9SrossENTRY(strlen)
679eb6edc9Sross
689eb6edc9Sross		/* Setup constants */
699eb6edc9Sross		lis	%r10, 0x7f7f
709eb6edc9Sross		lis	%r9, 0xfefe
719eb6edc9Sross		ori	%r10, %r10, 0x7f7f
729eb6edc9Sross		ori	%r9, %r9, 0xfeff
739eb6edc9Sross
749eb6edc9Sross		/* Mask out leading bytes on non aligned strings */
759eb6edc9Sross		rlwinm.	%r8, %r3, 3, 27, 28	/* leading bits to mask */
76282f07d6Sross#ifdef _LP64
77282f07d6Sross		clrrdi	%r5, %r3, 2		/*  clear low 2 addr bits */
78282f07d6Sross#else
799eb6edc9Sross		clrrwi	%r5, %r3, 2		/*  clear low 2 addr bits */
80282f07d6Sross#endif
819eb6edc9Sross		li	%r0, -1
829eb6edc9Sross		beq+	3f			/* skip alignment if already */
839eb6edc9Sross						/* aligned */
849eb6edc9Sross
859eb6edc9Sross		srw	%r0, %r0, %r8		/* make 0000...1111 mask */
869eb6edc9Sross
879eb6edc9Sross		lwz	%r7, 0(%r5)
889eb6edc9Sross		nor	%r0, %r0, %r0		/* invert mask */
899eb6edc9Sross		or	%r7, %r7, %r0		/* make leading bytes != 0 */
909eb6edc9Sross		b	2f
919eb6edc9Sross
929eb6edc9Sross3:		subi	%r5, %r5, 4
939eb6edc9Sross
949eb6edc9Sross1:		lwzu	%r7, 4(%r5)		/* fetch data word */
959eb6edc9Sross
969eb6edc9Sross2:		nor	%r0, %r7, %r10		/* do step 1 */
979eb6edc9Sross		add	%r6, %r7, %r9
989eb6edc9Sross		and.	%r0, %r0, %r6
999eb6edc9Sross
1009eb6edc9Sross		beq+	1b			/* no NUL bytes here */
1019eb6edc9Sross
1029eb6edc9Sross		and	%r8, %r7, %r10		/* ok, a NUL is somewhere */
1039eb6edc9Sross		or	%r7, %r7, %r10		/* do step 2 to find out */
1049eb6edc9Sross		add	%r0, %r8, %r10		/* where */
1059eb6edc9Sross		nor	%r8, %r7, %r0
1069eb6edc9Sross
1079eb6edc9Sross		cntlzw	%r0, %r8		/* offset from this word */
1089eb6edc9Sross		srwi	%r4, %r0, 3
1099eb6edc9Sross
1109eb6edc9Sross		add	%r4, %r5, %r4		/* r4 contains end pointer */
1119eb6edc9Sross		/* NOTE: Keep it so this function returns the end pointer
1129eb6edc9Sross		   in r4, so we can it use from other str* calls (strcat
1139eb6edc9Sross		   comes to mind */
1149eb6edc9Sross
1159eb6edc9Sross		subf	%r3, %r3, %r4
1169eb6edc9Sross		blr
117*cf88c389SmattEND(strlen)
1189eb6edc9Sross/*----------------------------------------------------------------------*/
119