xref: /netbsd-src/common/lib/libc/string/strspn.c (revision 567c8efbdbecb6bc61f75356575fd2bfb358f379)
1*567c8efbSrillig /*	$NetBSD: strspn.c,v 1.3 2023/06/18 22:18:13 rillig Exp $	*/
24ab4902eSlneto 
34ab4902eSlneto /*-
44ab4902eSlneto  * Copyright (c) 2008 Joerg Sonnenberger
54ab4902eSlneto  * All rights reserved.
64ab4902eSlneto  *
74ab4902eSlneto  * Redistribution and use in source and binary forms, with or without
84ab4902eSlneto  * modification, are permitted provided that the following conditions
94ab4902eSlneto  * are met:
104ab4902eSlneto  * 1. Redistributions of source code must retain the above copyright
114ab4902eSlneto  *    notice, this list of conditions and the following disclaimer.
124ab4902eSlneto  * 2. Redistributions in binary form must reproduce the above copyright
134ab4902eSlneto  *    notice, this list of conditions and the following disclaimer in the
144ab4902eSlneto  *    documentation and/or other materials provided with the distribution.
154ab4902eSlneto  *
164ab4902eSlneto  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
174ab4902eSlneto  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
184ab4902eSlneto  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
194ab4902eSlneto  * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
204ab4902eSlneto  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
214ab4902eSlneto  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
224ab4902eSlneto  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
234ab4902eSlneto  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
244ab4902eSlneto  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
254ab4902eSlneto  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
264ab4902eSlneto  */
274ab4902eSlneto 
284ab4902eSlneto #include <sys/cdefs.h>
29*567c8efbSrillig __RCSID("$NetBSD: strspn.c,v 1.3 2023/06/18 22:18:13 rillig Exp $");
304ab4902eSlneto 
314ab4902eSlneto #if !defined(_KERNEL) && !defined(_STANDALONE)
324ab4902eSlneto #include <assert.h>
334ab4902eSlneto #include <inttypes.h>
344ab4902eSlneto #include <limits.h>
354ab4902eSlneto #include <string.h>
364ab4902eSlneto #else
374ab4902eSlneto #include <lib/libkern/libkern.h>
384ab4902eSlneto #endif
394ab4902eSlneto 
404ab4902eSlneto #if ULONG_MAX != 0xffffffffffffffffull
414ab4902eSlneto 
424ab4902eSlneto size_t
strspn(const char * s,const char * charset)434ab4902eSlneto strspn(const char *s, const char *charset)
444ab4902eSlneto {
454ab4902eSlneto 	static const uint8_t idx[8] = { 1, 2, 4, 8, 16, 32, 64, 128 };
464ab4902eSlneto 	uint8_t set[32];
474ab4902eSlneto 	const char *t;
484ab4902eSlneto #define UC(a) ((unsigned int)(unsigned char)(a))
494ab4902eSlneto 
504ab4902eSlneto 	if (charset[0] == '\0')
514ab4902eSlneto 		return 0;
524ab4902eSlneto 	if (charset[1] == '\0') {
534ab4902eSlneto 		for (t = s; *t != '\0'; ++t) {
544ab4902eSlneto 			if (*t != *charset)
554ab4902eSlneto 				break;
564ab4902eSlneto 		}
574ab4902eSlneto 		return t - s;
584ab4902eSlneto 	}
594ab4902eSlneto 
604ab4902eSlneto 	(void)memset(set, 0, sizeof(set));
614ab4902eSlneto 
624ab4902eSlneto 	for (; *charset != '\0'; ++charset)
634ab4902eSlneto 		set[UC(*charset) >> 3] |= idx[UC(*charset) & 7];
644ab4902eSlneto 
654ab4902eSlneto 	for (t = s; *t != '\0'; ++t)
664ab4902eSlneto 		if ((set[UC(*t) >> 3] & idx[UC(*t) & 7]) == 0)
674ab4902eSlneto 			break;
684ab4902eSlneto 	return t - s;
694ab4902eSlneto }
704ab4902eSlneto 
714ab4902eSlneto #else
724ab4902eSlneto 
734ab4902eSlneto /* 64 bit system, use four 64 bits registers for bitmask */
744ab4902eSlneto 
754ab4902eSlneto static size_t
strspn_x(const char * s_s,const char * charset_s,unsigned long invert)764ab4902eSlneto strspn_x(const char *s_s, const char *charset_s, unsigned long invert)
774ab4902eSlneto {
784ab4902eSlneto 	const unsigned char *s = (const unsigned char *)s_s;
794ab4902eSlneto 	const unsigned char *charset = (const unsigned char *)charset_s;
804ab4902eSlneto 	unsigned long m_0, m_4, m_8, m_c;
814ab4902eSlneto 	unsigned char ch, next_ch;
824ab4902eSlneto 	unsigned long bit;
834ab4902eSlneto 	unsigned long check;
844ab4902eSlneto 	size_t count;
854ab4902eSlneto 
864ab4902eSlneto 	/* Four 64bit registers have one bit for each character value */
874ab4902eSlneto 	m_0 = 0;
884ab4902eSlneto 	m_4 = 0;
894ab4902eSlneto 	m_8 = 0;
904ab4902eSlneto 	m_c = 0;
914ab4902eSlneto 
924ab4902eSlneto 	for (ch = *charset; ch != 0; ch = next_ch) {
934ab4902eSlneto 		next_ch = *++charset;
944ab4902eSlneto 		bit = 1ul << (ch & 0x3f);
954ab4902eSlneto 		if (__predict_true(ch < 0x80)) {
964ab4902eSlneto 			if (ch < 0x40)
974ab4902eSlneto 				m_0 |= bit;
984ab4902eSlneto 			else
994ab4902eSlneto 				m_4 |= bit;
1004ab4902eSlneto 		} else {
1014ab4902eSlneto 			if (ch < 0xc0)
1024ab4902eSlneto 				m_8 |= bit;
1034ab4902eSlneto 			else
1044ab4902eSlneto 				m_c |= bit;
1054ab4902eSlneto 		}
1064ab4902eSlneto 	}
1074ab4902eSlneto 
1084ab4902eSlneto 	/* For strcspn() we just invert the validity set */
1094ab4902eSlneto 	m_0 ^= invert;
1104ab4902eSlneto 	m_4 ^= invert;
1114ab4902eSlneto 	m_8 ^= invert;
1124ab4902eSlneto 	m_c ^= invert;
1134ab4902eSlneto 
1144ab4902eSlneto 	/*
1154ab4902eSlneto 	 * We could do remove the lsb from m_0 to terminate at the
1164ab4902eSlneto 	 * end of the input string.
117*567c8efbSrillig 	 * However prefetching the next char is beneficial and we must
1184ab4902eSlneto 	 * not read the byte after the \0 - as it might fault!
1194ab4902eSlneto 	 * So we take the 'hit' of the compare against 0.
1204ab4902eSlneto 	 */
1214ab4902eSlneto 
1224ab4902eSlneto 	ch = *s++;
1234ab4902eSlneto 	for (count = 0; ch != 0; ch = next_ch) {
1244ab4902eSlneto 		next_ch = s[count];
1254ab4902eSlneto 		if (__predict_true(ch < 0x80)) {
1264ab4902eSlneto 			check = m_0;
1274ab4902eSlneto 			if (ch >= 0x40)
1284ab4902eSlneto 				check = m_4;
1294ab4902eSlneto 		} else {
1304ab4902eSlneto 			check = m_8;
1314ab4902eSlneto 			if (ch >= 0xc0)
1324ab4902eSlneto 				check = m_c;
1334ab4902eSlneto 		}
1344ab4902eSlneto 		if (!((check >> (ch & 0x3f)) & 1))
1354ab4902eSlneto 			break;
1364ab4902eSlneto 		count++;
1374ab4902eSlneto 	}
1384ab4902eSlneto 	return count;
1394ab4902eSlneto }
1404ab4902eSlneto 
1414ab4902eSlneto size_t
strspn(const char * s,const char * charset)1424ab4902eSlneto strspn(const char *s, const char *charset)
1434ab4902eSlneto {
1444ab4902eSlneto 	return strspn_x(s, charset, 0);
1454ab4902eSlneto }
1464ab4902eSlneto 
1474ab4902eSlneto size_t
strcspn(const char * s,const char * charset)1484ab4902eSlneto strcspn(const char *s, const char *charset)
1494ab4902eSlneto {
1504ab4902eSlneto 	return strspn_x(s, charset, ~0ul);
1514ab4902eSlneto }
1524ab4902eSlneto #endif
153