1*4b169a6bSchristos /* Copyright (C) 1991, 1993, 1996-1997, 1999-2000, 2003-2004, 2006, 2008-2022
28dffb485Schristos Free Software Foundation, Inc.
38dffb485Schristos
48dffb485Schristos Based on strlen implementation by Torbjorn Granlund (tege@sics.se),
58dffb485Schristos with help from Dan Sahlin (dan@sics.se) and
68dffb485Schristos commentary by Jim Blandy (jimb@ai.mit.edu);
78dffb485Schristos adaptation to memchr suggested by Dick Karpinski (dick@cca.ucsf.edu),
88dffb485Schristos and implemented by Roland McGrath (roland@ai.mit.edu).
98dffb485Schristos
108dffb485Schristos NOTE: The canonical source of this file is maintained with the GNU C Library.
118dffb485Schristos Bugs can be reported to bug-glibc@prep.ai.mit.edu.
128dffb485Schristos
13*4b169a6bSchristos This file is free software: you can redistribute it and/or modify
14*4b169a6bSchristos it under the terms of the GNU Lesser General Public License as
15*4b169a6bSchristos published by the Free Software Foundation; either version 2.1 of the
16*4b169a6bSchristos License, or (at your option) any later version.
178dffb485Schristos
18*4b169a6bSchristos This file is distributed in the hope that it will be useful,
198dffb485Schristos but WITHOUT ANY WARRANTY; without even the implied warranty of
208dffb485Schristos MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21*4b169a6bSchristos GNU Lesser General Public License for more details.
228dffb485Schristos
23*4b169a6bSchristos You should have received a copy of the GNU Lesser General Public License
248dffb485Schristos along with this program. If not, see <https://www.gnu.org/licenses/>. */
258dffb485Schristos
268dffb485Schristos #ifndef _LIBC
278dffb485Schristos # include <config.h>
288dffb485Schristos #endif
298dffb485Schristos
308dffb485Schristos #include <string.h>
318dffb485Schristos
328dffb485Schristos #include <stddef.h>
338dffb485Schristos
348dffb485Schristos #if defined _LIBC
358dffb485Schristos # include <memcopy.h>
368dffb485Schristos #else
378dffb485Schristos # define reg_char char
388dffb485Schristos #endif
398dffb485Schristos
408dffb485Schristos #include <limits.h>
418dffb485Schristos
428dffb485Schristos #if HAVE_BP_SYM_H || defined _LIBC
438dffb485Schristos # include <bp-sym.h>
448dffb485Schristos #else
458dffb485Schristos # define BP_SYM(sym) sym
468dffb485Schristos #endif
478dffb485Schristos
488dffb485Schristos #undef __memchr
498dffb485Schristos #ifdef _LIBC
508dffb485Schristos # undef memchr
518dffb485Schristos #endif
528dffb485Schristos
538dffb485Schristos #ifndef weak_alias
548dffb485Schristos # define __memchr memchr
558dffb485Schristos #endif
568dffb485Schristos
578dffb485Schristos /* Search no more than N bytes of S for C. */
588dffb485Schristos void *
__memchr(void const * s,int c_in,size_t n)598dffb485Schristos __memchr (void const *s, int c_in, size_t n)
608dffb485Schristos {
618dffb485Schristos /* On 32-bit hardware, choosing longword to be a 32-bit unsigned
628dffb485Schristos long instead of a 64-bit uintmax_t tends to give better
638dffb485Schristos performance. On 64-bit hardware, unsigned long is generally 64
648dffb485Schristos bits already. Change this typedef to experiment with
658dffb485Schristos performance. */
668dffb485Schristos typedef unsigned long int longword;
678dffb485Schristos
688dffb485Schristos const unsigned char *char_ptr;
698dffb485Schristos const longword *longword_ptr;
708dffb485Schristos longword repeated_one;
718dffb485Schristos longword repeated_c;
728dffb485Schristos unsigned reg_char c;
738dffb485Schristos
748dffb485Schristos c = (unsigned char) c_in;
758dffb485Schristos
768dffb485Schristos /* Handle the first few bytes by reading one byte at a time.
778dffb485Schristos Do this until CHAR_PTR is aligned on a longword boundary. */
788dffb485Schristos for (char_ptr = (const unsigned char *) s;
798dffb485Schristos n > 0 && (size_t) char_ptr % sizeof (longword) != 0;
808dffb485Schristos --n, ++char_ptr)
818dffb485Schristos if (*char_ptr == c)
828dffb485Schristos return (void *) char_ptr;
838dffb485Schristos
848dffb485Schristos longword_ptr = (const longword *) char_ptr;
858dffb485Schristos
868dffb485Schristos /* All these elucidatory comments refer to 4-byte longwords,
878dffb485Schristos but the theory applies equally well to any size longwords. */
888dffb485Schristos
898dffb485Schristos /* Compute auxiliary longword values:
908dffb485Schristos repeated_one is a value which has a 1 in every byte.
918dffb485Schristos repeated_c has c in every byte. */
928dffb485Schristos repeated_one = 0x01010101;
938dffb485Schristos repeated_c = c | (c << 8);
948dffb485Schristos repeated_c |= repeated_c << 16;
958dffb485Schristos if (0xffffffffU < (longword) -1)
968dffb485Schristos {
978dffb485Schristos repeated_one |= repeated_one << 31 << 1;
988dffb485Schristos repeated_c |= repeated_c << 31 << 1;
998dffb485Schristos if (8 < sizeof (longword))
1008dffb485Schristos {
1018dffb485Schristos size_t i;
1028dffb485Schristos
1038dffb485Schristos for (i = 64; i < sizeof (longword) * 8; i *= 2)
1048dffb485Schristos {
1058dffb485Schristos repeated_one |= repeated_one << i;
1068dffb485Schristos repeated_c |= repeated_c << i;
1078dffb485Schristos }
1088dffb485Schristos }
1098dffb485Schristos }
1108dffb485Schristos
1118dffb485Schristos /* Instead of the traditional loop which tests each byte, we will test a
1128dffb485Schristos longword at a time. The tricky part is testing if *any of the four*
1138dffb485Schristos bytes in the longword in question are equal to c. We first use an xor
1148dffb485Schristos with repeated_c. This reduces the task to testing whether *any of the
1158dffb485Schristos four* bytes in longword1 is zero.
1168dffb485Schristos
1178dffb485Schristos We compute tmp =
1188dffb485Schristos ((longword1 - repeated_one) & ~longword1) & (repeated_one << 7).
1198dffb485Schristos That is, we perform the following operations:
1208dffb485Schristos 1. Subtract repeated_one.
1218dffb485Schristos 2. & ~longword1.
1228dffb485Schristos 3. & a mask consisting of 0x80 in every byte.
1238dffb485Schristos Consider what happens in each byte:
1248dffb485Schristos - If a byte of longword1 is zero, step 1 and 2 transform it into 0xff,
1258dffb485Schristos and step 3 transforms it into 0x80. A carry can also be propagated
1268dffb485Schristos to more significant bytes.
1278dffb485Schristos - If a byte of longword1 is nonzero, let its lowest 1 bit be at
1288dffb485Schristos position k (0 <= k <= 7); so the lowest k bits are 0. After step 1,
1298dffb485Schristos the byte ends in a single bit of value 0 and k bits of value 1.
1308dffb485Schristos After step 2, the result is just k bits of value 1: 2^k - 1. After
1318dffb485Schristos step 3, the result is 0. And no carry is produced.
1328dffb485Schristos So, if longword1 has only non-zero bytes, tmp is zero.
1338dffb485Schristos Whereas if longword1 has a zero byte, call j the position of the least
1348dffb485Schristos significant zero byte. Then the result has a zero at positions 0, ...,
1358dffb485Schristos j-1 and a 0x80 at position j. We cannot predict the result at the more
1368dffb485Schristos significant bytes (positions j+1..3), but it does not matter since we
1378dffb485Schristos already have a non-zero bit at position 8*j+7.
1388dffb485Schristos
1398dffb485Schristos So, the test whether any byte in longword1 is zero is equivalent to
1408dffb485Schristos testing whether tmp is nonzero. */
1418dffb485Schristos
1428dffb485Schristos while (n >= sizeof (longword))
1438dffb485Schristos {
1448dffb485Schristos longword longword1 = *longword_ptr ^ repeated_c;
1458dffb485Schristos
1468dffb485Schristos if ((((longword1 - repeated_one) & ~longword1)
1478dffb485Schristos & (repeated_one << 7)) != 0)
1488dffb485Schristos break;
1498dffb485Schristos longword_ptr++;
1508dffb485Schristos n -= sizeof (longword);
1518dffb485Schristos }
1528dffb485Schristos
1538dffb485Schristos char_ptr = (const unsigned char *) longword_ptr;
1548dffb485Schristos
1558dffb485Schristos /* At this point, we know that either n < sizeof (longword), or one of the
1568dffb485Schristos sizeof (longword) bytes starting at char_ptr is == c. On little-endian
1578dffb485Schristos machines, we could determine the first such byte without any further
1588dffb485Schristos memory accesses, just by looking at the tmp result from the last loop
1598dffb485Schristos iteration. But this does not work on big-endian machines. Choose code
1608dffb485Schristos that works in both cases. */
1618dffb485Schristos
1628dffb485Schristos for (; n > 0; --n, ++char_ptr)
1638dffb485Schristos {
1648dffb485Schristos if (*char_ptr == c)
1658dffb485Schristos return (void *) char_ptr;
1668dffb485Schristos }
1678dffb485Schristos
1688dffb485Schristos return NULL;
1698dffb485Schristos }
1708dffb485Schristos #ifdef weak_alias
1718dffb485Schristos weak_alias (__memchr, BP_SYM (memchr))
1728dffb485Schristos #endif
173