1*567c8efbSrillig /* $NetBSD: strspn.c,v 1.3 2023/06/18 22:18:13 rillig Exp $ */
24ab4902eSlneto
34ab4902eSlneto /*-
44ab4902eSlneto * Copyright (c) 2008 Joerg Sonnenberger
54ab4902eSlneto * All rights reserved.
64ab4902eSlneto *
74ab4902eSlneto * Redistribution and use in source and binary forms, with or without
84ab4902eSlneto * modification, are permitted provided that the following conditions
94ab4902eSlneto * are met:
104ab4902eSlneto * 1. Redistributions of source code must retain the above copyright
114ab4902eSlneto * notice, this list of conditions and the following disclaimer.
124ab4902eSlneto * 2. Redistributions in binary form must reproduce the above copyright
134ab4902eSlneto * notice, this list of conditions and the following disclaimer in the
144ab4902eSlneto * documentation and/or other materials provided with the distribution.
154ab4902eSlneto *
164ab4902eSlneto * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
174ab4902eSlneto * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
184ab4902eSlneto * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
194ab4902eSlneto * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
204ab4902eSlneto * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
214ab4902eSlneto * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
224ab4902eSlneto * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
234ab4902eSlneto * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
244ab4902eSlneto * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
254ab4902eSlneto * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
264ab4902eSlneto */
274ab4902eSlneto
284ab4902eSlneto #include <sys/cdefs.h>
29*567c8efbSrillig __RCSID("$NetBSD: strspn.c,v 1.3 2023/06/18 22:18:13 rillig Exp $");
304ab4902eSlneto
314ab4902eSlneto #if !defined(_KERNEL) && !defined(_STANDALONE)
324ab4902eSlneto #include <assert.h>
334ab4902eSlneto #include <inttypes.h>
344ab4902eSlneto #include <limits.h>
354ab4902eSlneto #include <string.h>
364ab4902eSlneto #else
374ab4902eSlneto #include <lib/libkern/libkern.h>
384ab4902eSlneto #endif
394ab4902eSlneto
404ab4902eSlneto #if ULONG_MAX != 0xffffffffffffffffull
414ab4902eSlneto
424ab4902eSlneto size_t
strspn(const char * s,const char * charset)434ab4902eSlneto strspn(const char *s, const char *charset)
444ab4902eSlneto {
454ab4902eSlneto static const uint8_t idx[8] = { 1, 2, 4, 8, 16, 32, 64, 128 };
464ab4902eSlneto uint8_t set[32];
474ab4902eSlneto const char *t;
484ab4902eSlneto #define UC(a) ((unsigned int)(unsigned char)(a))
494ab4902eSlneto
504ab4902eSlneto if (charset[0] == '\0')
514ab4902eSlneto return 0;
524ab4902eSlneto if (charset[1] == '\0') {
534ab4902eSlneto for (t = s; *t != '\0'; ++t) {
544ab4902eSlneto if (*t != *charset)
554ab4902eSlneto break;
564ab4902eSlneto }
574ab4902eSlneto return t - s;
584ab4902eSlneto }
594ab4902eSlneto
604ab4902eSlneto (void)memset(set, 0, sizeof(set));
614ab4902eSlneto
624ab4902eSlneto for (; *charset != '\0'; ++charset)
634ab4902eSlneto set[UC(*charset) >> 3] |= idx[UC(*charset) & 7];
644ab4902eSlneto
654ab4902eSlneto for (t = s; *t != '\0'; ++t)
664ab4902eSlneto if ((set[UC(*t) >> 3] & idx[UC(*t) & 7]) == 0)
674ab4902eSlneto break;
684ab4902eSlneto return t - s;
694ab4902eSlneto }
704ab4902eSlneto
714ab4902eSlneto #else
724ab4902eSlneto
734ab4902eSlneto /* 64 bit system, use four 64 bits registers for bitmask */
744ab4902eSlneto
754ab4902eSlneto static size_t
strspn_x(const char * s_s,const char * charset_s,unsigned long invert)764ab4902eSlneto strspn_x(const char *s_s, const char *charset_s, unsigned long invert)
774ab4902eSlneto {
784ab4902eSlneto const unsigned char *s = (const unsigned char *)s_s;
794ab4902eSlneto const unsigned char *charset = (const unsigned char *)charset_s;
804ab4902eSlneto unsigned long m_0, m_4, m_8, m_c;
814ab4902eSlneto unsigned char ch, next_ch;
824ab4902eSlneto unsigned long bit;
834ab4902eSlneto unsigned long check;
844ab4902eSlneto size_t count;
854ab4902eSlneto
864ab4902eSlneto /* Four 64bit registers have one bit for each character value */
874ab4902eSlneto m_0 = 0;
884ab4902eSlneto m_4 = 0;
894ab4902eSlneto m_8 = 0;
904ab4902eSlneto m_c = 0;
914ab4902eSlneto
924ab4902eSlneto for (ch = *charset; ch != 0; ch = next_ch) {
934ab4902eSlneto next_ch = *++charset;
944ab4902eSlneto bit = 1ul << (ch & 0x3f);
954ab4902eSlneto if (__predict_true(ch < 0x80)) {
964ab4902eSlneto if (ch < 0x40)
974ab4902eSlneto m_0 |= bit;
984ab4902eSlneto else
994ab4902eSlneto m_4 |= bit;
1004ab4902eSlneto } else {
1014ab4902eSlneto if (ch < 0xc0)
1024ab4902eSlneto m_8 |= bit;
1034ab4902eSlneto else
1044ab4902eSlneto m_c |= bit;
1054ab4902eSlneto }
1064ab4902eSlneto }
1074ab4902eSlneto
1084ab4902eSlneto /* For strcspn() we just invert the validity set */
1094ab4902eSlneto m_0 ^= invert;
1104ab4902eSlneto m_4 ^= invert;
1114ab4902eSlneto m_8 ^= invert;
1124ab4902eSlneto m_c ^= invert;
1134ab4902eSlneto
1144ab4902eSlneto /*
1154ab4902eSlneto * We could do remove the lsb from m_0 to terminate at the
1164ab4902eSlneto * end of the input string.
117*567c8efbSrillig * However prefetching the next char is beneficial and we must
1184ab4902eSlneto * not read the byte after the \0 - as it might fault!
1194ab4902eSlneto * So we take the 'hit' of the compare against 0.
1204ab4902eSlneto */
1214ab4902eSlneto
1224ab4902eSlneto ch = *s++;
1234ab4902eSlneto for (count = 0; ch != 0; ch = next_ch) {
1244ab4902eSlneto next_ch = s[count];
1254ab4902eSlneto if (__predict_true(ch < 0x80)) {
1264ab4902eSlneto check = m_0;
1274ab4902eSlneto if (ch >= 0x40)
1284ab4902eSlneto check = m_4;
1294ab4902eSlneto } else {
1304ab4902eSlneto check = m_8;
1314ab4902eSlneto if (ch >= 0xc0)
1324ab4902eSlneto check = m_c;
1334ab4902eSlneto }
1344ab4902eSlneto if (!((check >> (ch & 0x3f)) & 1))
1354ab4902eSlneto break;
1364ab4902eSlneto count++;
1374ab4902eSlneto }
1384ab4902eSlneto return count;
1394ab4902eSlneto }
1404ab4902eSlneto
1414ab4902eSlneto size_t
strspn(const char * s,const char * charset)1424ab4902eSlneto strspn(const char *s, const char *charset)
1434ab4902eSlneto {
1444ab4902eSlneto return strspn_x(s, charset, 0);
1454ab4902eSlneto }
1464ab4902eSlneto
1474ab4902eSlneto size_t
strcspn(const char * s,const char * charset)1484ab4902eSlneto strcspn(const char *s, const char *charset)
1494ab4902eSlneto {
1504ab4902eSlneto return strspn_x(s, charset, ~0ul);
1514ab4902eSlneto }
1524ab4902eSlneto #endif
153