xref: /minix3/common/lib/libc/arch/powerpc/string/strlen.S (revision b6cbf7203b080219de306404f8022a65b7884f33)
1*b6cbf720SGianluca Guida/*	$NetBSD: strlen.S,v 1.6 2011/01/15 07:31:12 matt Exp $ */
2*b6cbf720SGianluca Guida
3*b6cbf720SGianluca Guida/*-
4*b6cbf720SGianluca Guida * Copyright (C) 2001	Martin J. Laubach <mjl@NetBSD.org>
5*b6cbf720SGianluca Guida * All rights reserved.
6*b6cbf720SGianluca Guida *
7*b6cbf720SGianluca Guida * Redistribution and use in source and binary forms, with or without
8*b6cbf720SGianluca Guida * modification, are permitted provided that the following conditions
9*b6cbf720SGianluca Guida * are met:
10*b6cbf720SGianluca Guida * 1. Redistributions of source code must retain the above copyright
11*b6cbf720SGianluca Guida *    notice, this list of conditions and the following disclaimer.
12*b6cbf720SGianluca Guida * 2. Redistributions in binary form must reproduce the above copyright
13*b6cbf720SGianluca Guida *    notice, this list of conditions and the following disclaimer in the
14*b6cbf720SGianluca Guida *    documentation and/or other materials provided with the distribution.
15*b6cbf720SGianluca Guida * 3. The name of the author may not be used to endorse or promote products
16*b6cbf720SGianluca Guida *    derived from this software without specific prior written permission.
17*b6cbf720SGianluca Guida *
18*b6cbf720SGianluca Guida * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19*b6cbf720SGianluca Guida * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20*b6cbf720SGianluca Guida * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21*b6cbf720SGianluca Guida * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22*b6cbf720SGianluca Guida * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23*b6cbf720SGianluca Guida * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24*b6cbf720SGianluca Guida * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25*b6cbf720SGianluca Guida * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26*b6cbf720SGianluca Guida * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27*b6cbf720SGianluca Guida * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28*b6cbf720SGianluca Guida */
29*b6cbf720SGianluca Guida/*----------------------------------------------------------------------*/
30*b6cbf720SGianluca Guida
31*b6cbf720SGianluca Guida#include <machine/asm.h>
32*b6cbf720SGianluca Guida
33*b6cbf720SGianluca Guida__RCSID("$NetBSD: strlen.S,v 1.6 2011/01/15 07:31:12 matt Exp $");
34*b6cbf720SGianluca Guida
35*b6cbf720SGianluca Guida/*----------------------------------------------------------------------*/
36*b6cbf720SGianluca Guida/* The algorithm here uses the following techniques:
37*b6cbf720SGianluca Guida
38*b6cbf720SGianluca Guida   1) Given a word 'x', we can test to see if it contains any 0 bytes
39*b6cbf720SGianluca Guida      by subtracting 0x01010101, and seeing if any of the high bits of each
40*b6cbf720SGianluca Guida      byte changed from 0 to 1. This works because the least significant
41*b6cbf720SGianluca Guida      0 byte must have had no incoming carry (otherwise it's not the least
42*b6cbf720SGianluca Guida      significant), so it is 0x00 - 0x01 == 0xff. For all other
43*b6cbf720SGianluca Guida      byte values, either they have the high bit set initially, or when
44*b6cbf720SGianluca Guida      1 is subtracted you get a value in the range 0x00-0x7f, none of which
45*b6cbf720SGianluca Guida      have their high bit set. The expression here is
46*b6cbf720SGianluca Guida      (x + 0xfefefeff) & ~(x | 0x7f7f7f7f), which gives 0x00000000 when
47*b6cbf720SGianluca Guida      there were no 0x00 bytes in the word.
48*b6cbf720SGianluca Guida
49*b6cbf720SGianluca Guida   2) Given a word 'x', we can test to see _which_ byte was zero by
50*b6cbf720SGianluca Guida      calculating ~(((x & 0x7f7f7f7f) + 0x7f7f7f7f) | x | 0x7f7f7f7f).
51*b6cbf720SGianluca Guida      This produces 0x80 in each byte that was zero, and 0x00 in all
52*b6cbf720SGianluca Guida      the other bytes. The '| 0x7f7f7f7f' clears the low 7 bits in each
53*b6cbf720SGianluca Guida      byte, and the '| x' part ensures that bytes with the high bit set
54*b6cbf720SGianluca Guida      produce 0x00. The addition will carry into the high bit of each byte
55*b6cbf720SGianluca Guida      iff that byte had one of its low 7 bits set. We can then just see
56*b6cbf720SGianluca Guida      which was the most significant bit set and divide by 8 to find how
57*b6cbf720SGianluca Guida      many to add to the index.
58*b6cbf720SGianluca Guida      This is from the book 'The PowerPC Compiler Writer's Guide',
59*b6cbf720SGianluca Guida      by Steve Hoxey, Faraydon Karim, Bill Hay and Hank Warren.
60*b6cbf720SGianluca Guida*/
61*b6cbf720SGianluca Guida/*----------------------------------------------------------------------*/
62*b6cbf720SGianluca Guida
63*b6cbf720SGianluca Guida		.text
64*b6cbf720SGianluca Guida		.align 4
65*b6cbf720SGianluca Guida
66*b6cbf720SGianluca GuidaENTRY(strlen)
67*b6cbf720SGianluca Guida
68*b6cbf720SGianluca Guida		/* Setup constants */
69*b6cbf720SGianluca Guida		lis	%r10, 0x7f7f
70*b6cbf720SGianluca Guida		lis	%r9, 0xfefe
71*b6cbf720SGianluca Guida		ori	%r10, %r10, 0x7f7f
72*b6cbf720SGianluca Guida		ori	%r9, %r9, 0xfeff
73*b6cbf720SGianluca Guida
74*b6cbf720SGianluca Guida		/* Mask out leading bytes on non aligned strings */
75*b6cbf720SGianluca Guida		rlwinm.	%r8, %r3, 3, 27, 28	/* leading bits to mask */
76*b6cbf720SGianluca Guida#ifdef _LP64
77*b6cbf720SGianluca Guida		clrrdi	%r5, %r3, 2		/*  clear low 2 addr bits */
78*b6cbf720SGianluca Guida#else
79*b6cbf720SGianluca Guida		clrrwi	%r5, %r3, 2		/*  clear low 2 addr bits */
80*b6cbf720SGianluca Guida#endif
81*b6cbf720SGianluca Guida		li	%r0, -1
82*b6cbf720SGianluca Guida		beq+	3f			/* skip alignment if already */
83*b6cbf720SGianluca Guida						/* aligned */
84*b6cbf720SGianluca Guida
85*b6cbf720SGianluca Guida		srw	%r0, %r0, %r8		/* make 0000...1111 mask */
86*b6cbf720SGianluca Guida
87*b6cbf720SGianluca Guida		lwz	%r7, 0(%r5)
88*b6cbf720SGianluca Guida		nor	%r0, %r0, %r0		/* invert mask */
89*b6cbf720SGianluca Guida		or	%r7, %r7, %r0		/* make leading bytes != 0 */
90*b6cbf720SGianluca Guida		b	2f
91*b6cbf720SGianluca Guida
92*b6cbf720SGianluca Guida3:		subi	%r5, %r5, 4
93*b6cbf720SGianluca Guida
94*b6cbf720SGianluca Guida1:		lwzu	%r7, 4(%r5)		/* fetch data word */
95*b6cbf720SGianluca Guida
96*b6cbf720SGianluca Guida2:		nor	%r0, %r7, %r10		/* do step 1 */
97*b6cbf720SGianluca Guida		add	%r6, %r7, %r9
98*b6cbf720SGianluca Guida		and.	%r0, %r0, %r6
99*b6cbf720SGianluca Guida
100*b6cbf720SGianluca Guida		beq+	1b			/* no NUL bytes here */
101*b6cbf720SGianluca Guida
102*b6cbf720SGianluca Guida		and	%r8, %r7, %r10		/* ok, a NUL is somewhere */
103*b6cbf720SGianluca Guida		or	%r7, %r7, %r10		/* do step 2 to find out */
104*b6cbf720SGianluca Guida		add	%r0, %r8, %r10		/* where */
105*b6cbf720SGianluca Guida		nor	%r8, %r7, %r0
106*b6cbf720SGianluca Guida
107*b6cbf720SGianluca Guida		cntlzw	%r0, %r8		/* offset from this word */
108*b6cbf720SGianluca Guida		srwi	%r4, %r0, 3
109*b6cbf720SGianluca Guida
110*b6cbf720SGianluca Guida		add	%r4, %r5, %r4		/* r4 contains end pointer */
111*b6cbf720SGianluca Guida		/* NOTE: Keep it so this function returns the end pointer
112*b6cbf720SGianluca Guida		   in r4, so we can it use from other str* calls (strcat
113*b6cbf720SGianluca Guida		   comes to mind */
114*b6cbf720SGianluca Guida
115*b6cbf720SGianluca Guida		subf	%r3, %r3, %r4
116*b6cbf720SGianluca Guida		blr
117*b6cbf720SGianluca GuidaEND(strlen)
118*b6cbf720SGianluca Guida/*----------------------------------------------------------------------*/
119