xref: /illumos-gate/usr/src/lib/libc/i386/gen/strlen.S (revision 5d9d9091f564c198a760790b0bfa72c44e17912b)
1*5d9d9091SRichard Lowe/*
2*5d9d9091SRichard Lowe * CDDL HEADER START
3*5d9d9091SRichard Lowe *
4*5d9d9091SRichard Lowe * The contents of this file are subject to the terms of the
5*5d9d9091SRichard Lowe * Common Development and Distribution License (the "License").
6*5d9d9091SRichard Lowe * You may not use this file except in compliance with the License.
7*5d9d9091SRichard Lowe *
8*5d9d9091SRichard Lowe * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9*5d9d9091SRichard Lowe * or http://www.opensolaris.org/os/licensing.
10*5d9d9091SRichard Lowe * See the License for the specific language governing permissions
11*5d9d9091SRichard Lowe * and limitations under the License.
12*5d9d9091SRichard Lowe *
13*5d9d9091SRichard Lowe * When distributing Covered Code, include this CDDL HEADER in each
14*5d9d9091SRichard Lowe * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15*5d9d9091SRichard Lowe * If applicable, add the following below this CDDL HEADER, with the
16*5d9d9091SRichard Lowe * fields enclosed by brackets "[]" replaced with your own identifying
17*5d9d9091SRichard Lowe * information: Portions Copyright [yyyy] [name of copyright owner]
18*5d9d9091SRichard Lowe *
19*5d9d9091SRichard Lowe * CDDL HEADER END
20*5d9d9091SRichard Lowe */
21*5d9d9091SRichard Lowe/*
22*5d9d9091SRichard Lowe * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
23*5d9d9091SRichard Lowe * Use is subject to license terms.
24*5d9d9091SRichard Lowe */
25*5d9d9091SRichard Lowe
26*5d9d9091SRichard Lowe	.file	"strlen.s"
27*5d9d9091SRichard Lowe
28*5d9d9091SRichard Lowe/
29*5d9d9091SRichard Lowe/ strlen(s)
30*5d9d9091SRichard Lowe/
31*5d9d9091SRichard Lowe/ Returns the number of non-NULL bytes in string argument.
32*5d9d9091SRichard Lowe/
33*5d9d9091SRichard Lowe/
34*5d9d9091SRichard Lowe/ Fast assembly language version of the following C-program strlen
35*5d9d9091SRichard Lowe/ which represents the `standard' for the C-library.
36*5d9d9091SRichard Lowe/
37*5d9d9091SRichard Lowe/	size_t
38*5d9d9091SRichard Lowe/	strlen(const char *s)
39*5d9d9091SRichard Lowe/	{
40*5d9d9091SRichard Lowe/		const char	*s0 = s + 1;
41*5d9d9091SRichard Lowe/
42*5d9d9091SRichard Lowe/		while (*s++ != '\0')
43*5d9d9091SRichard Lowe/			;
44*5d9d9091SRichard Lowe/		return (s - s0);
45*5d9d9091SRichard Lowe/	}
46*5d9d9091SRichard Lowe/
47*5d9d9091SRichard Lowe/ In this assembly language version, the following expression is used
48*5d9d9091SRichard Lowe/ to check if a 32-bit word data contains a null byte or not:
49*5d9d9091SRichard Lowe/	(((A & 0x7f7f7f7f) + 0x7f7f7f7f) | A) & 0x80808080
50*5d9d9091SRichard Lowe/ If the above expression geneates a value other than 0x80808080,
51*5d9d9091SRichard Lowe/ that means the 32-bit word data contains a null byte.
52*5d9d9091SRichard Lowe/
53*5d9d9091SRichard Lowe
54*5d9d9091SRichard Lowe#include "SYS.h"
55*5d9d9091SRichard Lowe
56*5d9d9091SRichard Lowe	ENTRY(strlen)
57*5d9d9091SRichard Lowe	mov	4(%esp), %edx		/ src in %edx
58*5d9d9091SRichard Lowe	mov	%edx, %eax		/ cpy src to %eax
59*5d9d9091SRichard Lowe
60*5d9d9091SRichard Lowe	and	$3, %edx		/ is src aligned?
61*5d9d9091SRichard Lowe	jz	countbytes
62*5d9d9091SRichard Lowe					/ work byte-wise until aligned
63*5d9d9091SRichard Lowe	cmpb	$0, (%eax)		/ is *src == 0 ?
64*5d9d9091SRichard Lowe	jz	done
65*5d9d9091SRichard Lowe	inc	%eax			/ increment src
66*5d9d9091SRichard Lowe	cmp	$3, %edx		/ if aligned, jump to word-wise check
67*5d9d9091SRichard Lowe	jz	countbytes
68*5d9d9091SRichard Lowe	cmpb	$0, (%eax)
69*5d9d9091SRichard Lowe	jz	done
70*5d9d9091SRichard Lowe	inc	%eax
71*5d9d9091SRichard Lowe	cmp	$2, %edx
72*5d9d9091SRichard Lowe	jz	countbytes
73*5d9d9091SRichard Lowe	cmpb	$0, (%eax)
74*5d9d9091SRichard Lowe	jz	done
75*5d9d9091SRichard Lowe	inc	%eax
76*5d9d9091SRichard Lowe
77*5d9d9091SRichard Lowe	.align    16
78*5d9d9091SRichard Lowe
79*5d9d9091SRichard Lowecountbytes:
80*5d9d9091SRichard Lowe	mov	(%eax), %ecx		/ load wrd
81*5d9d9091SRichard Lowe	add	$4, %eax		/ increment src by 4 (bytes in word)
82*5d9d9091SRichard Lowe	lea	-0x01010101(%ecx), %edx	/ (wrd - 0x01010101)
83*5d9d9091SRichard Lowe	not	%ecx			/ ~wrd
84*5d9d9091SRichard Lowe	and	$0x80808080, %ecx	/ ~wrd & 0x80808080
85*5d9d9091SRichard Lowe	and	%edx, %ecx		/ (wrd - 0x01010101) & ~wrd & 0x80808080
86*5d9d9091SRichard Lowe	jz	countbytes		/ if zero, no null byte found -- cont
87*5d9d9091SRichard Lowe
88*5d9d9091SRichard Lowehas_zero_byte:
89*5d9d9091SRichard Lowe	bsfl	%ecx, %ecx		/ find first set bit (null byte)
90*5d9d9091SRichard Lowe	shr	$3, %ecx		/ switch bit position to byte posn
91*5d9d9091SRichard Lowe	lea	-4(%eax, %ecx, 1), %eax	/ undo pre-increment and count bytes
92*5d9d9091SRichard Lowedone:
93*5d9d9091SRichard Lowe	sub	4(%esp), %eax		/ return (src - old_src)
94*5d9d9091SRichard Lowe	ret
95*5d9d9091SRichard Lowe	SET_SIZE(strlen)
96