xref: /minix3/common/lib/libc/string/strspn.c (revision 0a6a1f1d05b60e214de2f05a7310ddd1f0e590e7)
1*0a6a1f1dSLionel Sambuc /*	$NetBSD: strspn.c,v 1.1 2014/07/19 18:38:33 lneto Exp $	*/
2*0a6a1f1dSLionel Sambuc 
3*0a6a1f1dSLionel Sambuc /*-
4*0a6a1f1dSLionel Sambuc  * Copyright (c) 2008 Joerg Sonnenberger
5*0a6a1f1dSLionel Sambuc  * All rights reserved.
6*0a6a1f1dSLionel Sambuc  *
7*0a6a1f1dSLionel Sambuc  * Redistribution and use in source and binary forms, with or without
8*0a6a1f1dSLionel Sambuc  * modification, are permitted provided that the following conditions
9*0a6a1f1dSLionel Sambuc  * are met:
10*0a6a1f1dSLionel Sambuc  * 1. Redistributions of source code must retain the above copyright
11*0a6a1f1dSLionel Sambuc  *    notice, this list of conditions and the following disclaimer.
12*0a6a1f1dSLionel Sambuc  * 2. Redistributions in binary form must reproduce the above copyright
13*0a6a1f1dSLionel Sambuc  *    notice, this list of conditions and the following disclaimer in the
14*0a6a1f1dSLionel Sambuc  *    documentation and/or other materials provided with the distribution.
15*0a6a1f1dSLionel Sambuc  *
16*0a6a1f1dSLionel Sambuc  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
17*0a6a1f1dSLionel Sambuc  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18*0a6a1f1dSLionel Sambuc  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19*0a6a1f1dSLionel Sambuc  * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
20*0a6a1f1dSLionel Sambuc  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21*0a6a1f1dSLionel Sambuc  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22*0a6a1f1dSLionel Sambuc  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23*0a6a1f1dSLionel Sambuc  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24*0a6a1f1dSLionel Sambuc  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25*0a6a1f1dSLionel Sambuc  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26*0a6a1f1dSLionel Sambuc  */
27*0a6a1f1dSLionel Sambuc 
28*0a6a1f1dSLionel Sambuc #include <sys/cdefs.h>
29*0a6a1f1dSLionel Sambuc __RCSID("$NetBSD: strspn.c,v 1.1 2014/07/19 18:38:33 lneto Exp $");
30*0a6a1f1dSLionel Sambuc 
31*0a6a1f1dSLionel Sambuc #if !defined(_KERNEL) && !defined(_STANDALONE)
32*0a6a1f1dSLionel Sambuc #include <assert.h>
33*0a6a1f1dSLionel Sambuc #include <inttypes.h>
34*0a6a1f1dSLionel Sambuc #include <limits.h>
35*0a6a1f1dSLionel Sambuc #include <string.h>
36*0a6a1f1dSLionel Sambuc #else
37*0a6a1f1dSLionel Sambuc #include <lib/libkern/libkern.h>
38*0a6a1f1dSLionel Sambuc #endif
39*0a6a1f1dSLionel Sambuc 
40*0a6a1f1dSLionel Sambuc #if ULONG_MAX != 0xffffffffffffffffull
41*0a6a1f1dSLionel Sambuc 
42*0a6a1f1dSLionel Sambuc size_t
strspn(const char * s,const char * charset)43*0a6a1f1dSLionel Sambuc strspn(const char *s, const char *charset)
44*0a6a1f1dSLionel Sambuc {
45*0a6a1f1dSLionel Sambuc 	static const uint8_t idx[8] = { 1, 2, 4, 8, 16, 32, 64, 128 };
46*0a6a1f1dSLionel Sambuc 	uint8_t set[32];
47*0a6a1f1dSLionel Sambuc 	const char *t;
48*0a6a1f1dSLionel Sambuc #define UC(a) ((unsigned int)(unsigned char)(a))
49*0a6a1f1dSLionel Sambuc 
50*0a6a1f1dSLionel Sambuc 	_DIAGASSERT(s != NULL);
51*0a6a1f1dSLionel Sambuc 	_DIAGASSERT(charset != NULL);
52*0a6a1f1dSLionel Sambuc 
53*0a6a1f1dSLionel Sambuc 	if (charset[0] == '\0')
54*0a6a1f1dSLionel Sambuc 		return 0;
55*0a6a1f1dSLionel Sambuc 	if (charset[1] == '\0') {
56*0a6a1f1dSLionel Sambuc 		for (t = s; *t != '\0'; ++t) {
57*0a6a1f1dSLionel Sambuc 			if (*t != *charset)
58*0a6a1f1dSLionel Sambuc 				break;
59*0a6a1f1dSLionel Sambuc 		}
60*0a6a1f1dSLionel Sambuc 		return t - s;
61*0a6a1f1dSLionel Sambuc 	}
62*0a6a1f1dSLionel Sambuc 
63*0a6a1f1dSLionel Sambuc 	(void)memset(set, 0, sizeof(set));
64*0a6a1f1dSLionel Sambuc 
65*0a6a1f1dSLionel Sambuc 	for (; *charset != '\0'; ++charset)
66*0a6a1f1dSLionel Sambuc 		set[UC(*charset) >> 3] |= idx[UC(*charset) & 7];
67*0a6a1f1dSLionel Sambuc 
68*0a6a1f1dSLionel Sambuc 	for (t = s; *t != '\0'; ++t)
69*0a6a1f1dSLionel Sambuc 		if ((set[UC(*t) >> 3] & idx[UC(*t) & 7]) == 0)
70*0a6a1f1dSLionel Sambuc 			break;
71*0a6a1f1dSLionel Sambuc 	return t - s;
72*0a6a1f1dSLionel Sambuc }
73*0a6a1f1dSLionel Sambuc 
74*0a6a1f1dSLionel Sambuc #else
75*0a6a1f1dSLionel Sambuc 
76*0a6a1f1dSLionel Sambuc /* 64 bit system, use four 64 bits registers for bitmask */
77*0a6a1f1dSLionel Sambuc 
78*0a6a1f1dSLionel Sambuc static size_t
strspn_x(const char * s_s,const char * charset_s,unsigned long invert)79*0a6a1f1dSLionel Sambuc strspn_x(const char *s_s, const char *charset_s, unsigned long invert)
80*0a6a1f1dSLionel Sambuc {
81*0a6a1f1dSLionel Sambuc 	const unsigned char *s = (const unsigned char *)s_s;
82*0a6a1f1dSLionel Sambuc 	const unsigned char *charset = (const unsigned char *)charset_s;
83*0a6a1f1dSLionel Sambuc 	unsigned long m_0, m_4, m_8, m_c;
84*0a6a1f1dSLionel Sambuc 	unsigned char ch, next_ch;
85*0a6a1f1dSLionel Sambuc 	unsigned long bit;
86*0a6a1f1dSLionel Sambuc 	unsigned long check;
87*0a6a1f1dSLionel Sambuc 	size_t count;
88*0a6a1f1dSLionel Sambuc 
89*0a6a1f1dSLionel Sambuc 	/* Four 64bit registers have one bit for each character value */
90*0a6a1f1dSLionel Sambuc 	m_0 = 0;
91*0a6a1f1dSLionel Sambuc 	m_4 = 0;
92*0a6a1f1dSLionel Sambuc 	m_8 = 0;
93*0a6a1f1dSLionel Sambuc 	m_c = 0;
94*0a6a1f1dSLionel Sambuc 
95*0a6a1f1dSLionel Sambuc 	for (ch = *charset; ch != 0; ch = next_ch) {
96*0a6a1f1dSLionel Sambuc 		next_ch = *++charset;
97*0a6a1f1dSLionel Sambuc 		bit = 1ul << (ch & 0x3f);
98*0a6a1f1dSLionel Sambuc 		if (__predict_true(ch < 0x80)) {
99*0a6a1f1dSLionel Sambuc 			if (ch < 0x40)
100*0a6a1f1dSLionel Sambuc 				m_0 |= bit;
101*0a6a1f1dSLionel Sambuc 			else
102*0a6a1f1dSLionel Sambuc 				m_4 |= bit;
103*0a6a1f1dSLionel Sambuc 		} else {
104*0a6a1f1dSLionel Sambuc 			if (ch < 0xc0)
105*0a6a1f1dSLionel Sambuc 				m_8 |= bit;
106*0a6a1f1dSLionel Sambuc 			else
107*0a6a1f1dSLionel Sambuc 				m_c |= bit;
108*0a6a1f1dSLionel Sambuc 		}
109*0a6a1f1dSLionel Sambuc 	}
110*0a6a1f1dSLionel Sambuc 
111*0a6a1f1dSLionel Sambuc 	/* For strcspn() we just invert the validity set */
112*0a6a1f1dSLionel Sambuc 	m_0 ^= invert;
113*0a6a1f1dSLionel Sambuc 	m_4 ^= invert;
114*0a6a1f1dSLionel Sambuc 	m_8 ^= invert;
115*0a6a1f1dSLionel Sambuc 	m_c ^= invert;
116*0a6a1f1dSLionel Sambuc 
117*0a6a1f1dSLionel Sambuc 	/*
118*0a6a1f1dSLionel Sambuc 	 * We could do remove the lsb from m_0 to terminate at the
119*0a6a1f1dSLionel Sambuc 	 * end of the input string.
120*0a6a1f1dSLionel Sambuc 	 * However prefetching the next char is benifitial and we must
121*0a6a1f1dSLionel Sambuc 	 * not read the byte after the \0 - as it might fault!
122*0a6a1f1dSLionel Sambuc 	 * So we take the 'hit' of the compare against 0.
123*0a6a1f1dSLionel Sambuc 	 */
124*0a6a1f1dSLionel Sambuc 
125*0a6a1f1dSLionel Sambuc 	ch = *s++;
126*0a6a1f1dSLionel Sambuc 	for (count = 0; ch != 0; ch = next_ch) {
127*0a6a1f1dSLionel Sambuc 		next_ch = s[count];
128*0a6a1f1dSLionel Sambuc 		if (__predict_true(ch < 0x80)) {
129*0a6a1f1dSLionel Sambuc 			check = m_0;
130*0a6a1f1dSLionel Sambuc 			if (ch >= 0x40)
131*0a6a1f1dSLionel Sambuc 				check = m_4;
132*0a6a1f1dSLionel Sambuc 		} else {
133*0a6a1f1dSLionel Sambuc 			check = m_8;
134*0a6a1f1dSLionel Sambuc 			if (ch >= 0xc0)
135*0a6a1f1dSLionel Sambuc 				check = m_c;
136*0a6a1f1dSLionel Sambuc 		}
137*0a6a1f1dSLionel Sambuc 		if (!((check >> (ch & 0x3f)) & 1))
138*0a6a1f1dSLionel Sambuc 			break;
139*0a6a1f1dSLionel Sambuc 		count++;
140*0a6a1f1dSLionel Sambuc 	}
141*0a6a1f1dSLionel Sambuc 	return count;
142*0a6a1f1dSLionel Sambuc }
143*0a6a1f1dSLionel Sambuc 
144*0a6a1f1dSLionel Sambuc size_t
strspn(const char * s,const char * charset)145*0a6a1f1dSLionel Sambuc strspn(const char *s, const char *charset)
146*0a6a1f1dSLionel Sambuc {
147*0a6a1f1dSLionel Sambuc 	return strspn_x(s, charset, 0);
148*0a6a1f1dSLionel Sambuc }
149*0a6a1f1dSLionel Sambuc 
150*0a6a1f1dSLionel Sambuc size_t
strcspn(const char * s,const char * charset)151*0a6a1f1dSLionel Sambuc strcspn(const char *s, const char *charset)
152*0a6a1f1dSLionel Sambuc {
153*0a6a1f1dSLionel Sambuc 	return strspn_x(s, charset, ~0ul);
154*0a6a1f1dSLionel Sambuc }
155*0a6a1f1dSLionel Sambuc #endif
156