xref: /minix3/common/lib/libc/arch/or1k/string/strlen.S (revision 0a6a1f1d05b60e214de2f05a7310ddd1f0e590e7)
1*0a6a1f1dSLionel Sambuc/*	$NetBSD: strlen.S,v 1.1 2014/09/03 19:34:25 matt Exp $ */
2*0a6a1f1dSLionel Sambuc
3*0a6a1f1dSLionel Sambuc/*-
4*0a6a1f1dSLionel Sambuc * Copyright (C) 2001	Martin J. Laubach <mjl@NetBSD.org>
5*0a6a1f1dSLionel Sambuc * All rights reserved.
6*0a6a1f1dSLionel Sambuc *
7*0a6a1f1dSLionel Sambuc * Redistribution and use in source and binary forms, with or without
8*0a6a1f1dSLionel Sambuc * modification, are permitted provided that the following conditions
9*0a6a1f1dSLionel Sambuc * are met:
10*0a6a1f1dSLionel Sambuc * 1. Redistributions of source code must retain the above copyright
11*0a6a1f1dSLionel Sambuc *    notice, this list of conditions and the following disclaimer.
12*0a6a1f1dSLionel Sambuc * 2. Redistributions in binary form must reproduce the above copyright
13*0a6a1f1dSLionel Sambuc *    notice, this list of conditions and the following disclaimer in the
14*0a6a1f1dSLionel Sambuc *    documentation and/or other materials provided with the distribution.
15*0a6a1f1dSLionel Sambuc * 3. The name of the author may not be used to endorse or promote products
16*0a6a1f1dSLionel Sambuc *    derived from this software without specific prior written permission.
17*0a6a1f1dSLionel Sambuc *
18*0a6a1f1dSLionel Sambuc * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19*0a6a1f1dSLionel Sambuc * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20*0a6a1f1dSLionel Sambuc * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21*0a6a1f1dSLionel Sambuc * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22*0a6a1f1dSLionel Sambuc * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23*0a6a1f1dSLionel Sambuc * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24*0a6a1f1dSLionel Sambuc * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25*0a6a1f1dSLionel Sambuc * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26*0a6a1f1dSLionel Sambuc * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27*0a6a1f1dSLionel Sambuc * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28*0a6a1f1dSLionel Sambuc */
29*0a6a1f1dSLionel Sambuc/*----------------------------------------------------------------------*/
30*0a6a1f1dSLionel Sambuc
31*0a6a1f1dSLionel Sambuc#include <machine/asm.h>
32*0a6a1f1dSLionel Sambuc
33*0a6a1f1dSLionel Sambuc__RCSID("$NetBSD: strlen.S,v 1.1 2014/09/03 19:34:25 matt Exp $");
34*0a6a1f1dSLionel Sambuc
35*0a6a1f1dSLionel Sambuc/*----------------------------------------------------------------------*/
36*0a6a1f1dSLionel Sambuc/* The algorithm here uses the following techniques:
37*0a6a1f1dSLionel Sambuc
38*0a6a1f1dSLionel Sambuc   1) Given a word 'x', we can test to see if it contains any 0 bytes
39*0a6a1f1dSLionel Sambuc      by subtracting 0x01010101, and seeing if any of the high bits of each
40*0a6a1f1dSLionel Sambuc      byte changed from 0 to 1. This works because the least significant
41*0a6a1f1dSLionel Sambuc      0 byte must have had no incoming carry (otherwise it's not the least
42*0a6a1f1dSLionel Sambuc      significant), so it is 0x00 - 0x01 == 0xff. For all other
43*0a6a1f1dSLionel Sambuc      byte values, either they have the high bit set initially, or when
44*0a6a1f1dSLionel Sambuc      1 is subtracted you get a value in the range 0x00-0x7f, none of which
45*0a6a1f1dSLionel Sambuc      have their high bit set. The expression here is
46*0a6a1f1dSLionel Sambuc      (x + 0xfefefeff) & ~(x | 0x7f7f7f7f), which gives 0x00000000 when
47*0a6a1f1dSLionel Sambuc      there were no 0x00 bytes in the word.
48*0a6a1f1dSLionel Sambuc
49*0a6a1f1dSLionel Sambuc   2) Given a word 'x', we can test to see _which_ byte was zero by
50*0a6a1f1dSLionel Sambuc      calculating ~(((x & 0x7f7f7f7f) + 0x7f7f7f7f) | x | 0x7f7f7f7f).
51*0a6a1f1dSLionel Sambuc      This produces 0x80 in each byte that was zero, and 0x00 in all
52*0a6a1f1dSLionel Sambuc      the other bytes. The '| 0x7f7f7f7f' clears the low 7 bits in each
53*0a6a1f1dSLionel Sambuc      byte, and the '| x' part ensures that bytes with the high bit set
54*0a6a1f1dSLionel Sambuc      produce 0x00. The addition will carry into the high bit of each byte
55*0a6a1f1dSLionel Sambuc      iff that byte had one of its low 7 bits set. We can then just see
56*0a6a1f1dSLionel Sambuc      which was the most significant bit set and divide by 8 to find how
57*0a6a1f1dSLionel Sambuc      many to add to the index.
58*0a6a1f1dSLionel Sambuc      This is from the book 'The PowerPC Compiler Writer's Guide',
59*0a6a1f1dSLionel Sambuc      by Steve Hoxey, Faraydon Karim, Bill Hay and Hank Warren.
60*0a6a1f1dSLionel Sambuc*/
61*0a6a1f1dSLionel Sambuc/*----------------------------------------------------------------------*/
62*0a6a1f1dSLionel Sambuc
63*0a6a1f1dSLionel SambucENTRY(strlen)
64*0a6a1f1dSLionel Sambuc
65*0a6a1f1dSLionel Sambuc		l.or	r12, r3, r0		/* save start */
66*0a6a1f1dSLionel Sambuc
67*0a6a1f1dSLionel Sambuc		/* Setup constants */
68*0a6a1f1dSLionel Sambuc		l.movhi	r13, 0x7f7f
69*0a6a1f1dSLionel Sambuc		l.movhi	r15, 0xfefe
70*0a6a1f1dSLionel Sambuc		l.ori	r13, r13, 0x7f7f
71*0a6a1f1dSLionel Sambuc		l.ori	r15, r15, 0xfeff
72*0a6a1f1dSLionel Sambuc
73*0a6a1f1dSLionel Sambuc1:		l.andi	r7, r12, 3		/* get low bits of start */
74*0a6a1f1dSLionel Sambuc		l.sfeqi	r7, 0			/* all clear? */
75*0a6a1f1dSLionel Sambuc		l.bf	3f			/*   yes, skip alignment */
76*0a6a1f1dSLionel Sambuc		l.nop				/* -- delay slot -- */
77*0a6a1f1dSLionel Sambuc
78*0a6a1f1dSLionel Sambuc		l.sub	r12, r12, r7		/* word align start */
79*0a6a1f1dSLionel Sambuc		l.lwz	r8, 0(r12)		/* load data */
80*0a6a1f1dSLionel Sambuc		l.addi	r6, r0, -1		/* r6 = 0xffffffff */
81*0a6a1f1dSLionel Sambuc		l.slli	r5, r7, 3		/* bits to bytes */
82*0a6a1f1dSLionel Sambuc		l.srl	r6, r6, r5		/* clear low (MSB) bytes */
83*0a6a1f1dSLionel Sambuc		l.xori	r6, r6, -1		/* complement */
84*0a6a1f1dSLionel Sambuc		l.or	r8, r8, r6		/* merge with loaded word */
85*0a6a1f1dSLionel Sambuc		l.j	4f			/* and process */
86*0a6a1f1dSLionel Sambuc		l.nop				/* -- delay-slot -- */
87*0a6a1f1dSLionel Sambuc
88*0a6a1f1dSLionel Sambuc2:		l.addi	r12, r12, 4		/* advance to next word */
89*0a6a1f1dSLionel Sambuc3:		l.lwz	r8, 0(r12)		/* fetch data word */
90*0a6a1f1dSLionel Sambuc
91*0a6a1f1dSLionel Sambuc		// Step 1: (x + 0xfefefeff) & ~(x | 0x7f7f7f7f)
92*0a6a1f1dSLionel Sambuc4:		l.or	r7, r8, r13		/* t0 = x | 0x7f7f7f7f */
93*0a6a1f1dSLionel Sambuc		l.xori	r6, r7, -1		/* t1 = ~t0 */
94*0a6a1f1dSLionel Sambuc		l.add	r5, r8, r15		/* t2 = x + 0xfefefeff */
95*0a6a1f1dSLionel Sambuc		l.and	r4, r7, r5		/* t3 = t1 & t2 */
96*0a6a1f1dSLionel Sambuc		l.sfeqi	r4, 0
97*0a6a1f1dSLionel Sambuc		l.bf	2b			/* no NUL bytes here */
98*0a6a1f1dSLionel Sambuc		l.nop				/* -- delay slot -- */
99*0a6a1f1dSLionel Sambuc
100*0a6a1f1dSLionel Sambuc		// Step 2: ~(((x & 0x7f7f7f7f) + 0x7f7f7f7f) | x | 0x7f7f7f7f)
101*0a6a1f1dSLionel Sambuc		l.and	r7, r8, r13		/* t0 = x & 0x7f7f7f7f */
102*0a6a1f1dSLionel Sambuc		l.or	r6, r8, r13		/* t1 = x | 0x7f7f7f7f */
103*0a6a1f1dSLionel Sambuc		l.add	r5, r7, r13		/* t2 = t0 + 0x7f7f7f7f */
104*0a6a1f1dSLionel Sambuc		l.or	r4, r5, r6		/* t3 = t2 | t1 */
105*0a6a1f1dSLionel Sambuc		l.xori	r4, r4, -1		/* t3 = ~t3 */
106*0a6a1f1dSLionel Sambuc
107*0a6a1f1dSLionel Sambuc		l.fl1	r5, r4			/* find last bit set */
108*0a6a1f1dSLionel Sambuc		l.ori	r6, r0, 32		/* bits per word */
109*0a6a1f1dSLionel Sambuc		l.sub	r7, r6, r5		/* cvt to leading zeros */
110*0a6a1f1dSLionel Sambuc		l.srli	r8, r7, 3		/* shift to byte count */
111*0a6a1f1dSLionel Sambuc
112*0a6a1f1dSLionel SambucLdone:
113*0a6a1f1dSLionel Sambuc		l.add	r12, r12, r8		/* r12 contains end pointer */
114*0a6a1f1dSLionel Sambuc
115*0a6a1f1dSLionel Sambuc		/* NOTE: Keep it so this function returns the end pointer
116*0a6a1f1dSLionel Sambuc		   in r12, so we can it use from other str* calls (strcat
117*0a6a1f1dSLionel Sambuc		   comes to mind */
118*0a6a1f1dSLionel Sambuc
119*0a6a1f1dSLionel Sambuc		l.sub	r11, r12, r3		/* length = end - start */
120*0a6a1f1dSLionel Sambuc		l.jr	lr			/* return */
121*0a6a1f1dSLionel Sambuc		l.nop				/* -- delay slot -- */
122*0a6a1f1dSLionel SambucEND(strlen)
123*0a6a1f1dSLionel Sambuc/*----------------------------------------------------------------------*/
124