xref: /dflybsd-src/contrib/grep/lib/memchr2.c (revision 91b9ed38d3db6a8a8ac5b66da1d43e6e331e259a)
1*09d4459fSDaniel Fojt /* Copyright (C) 1991, 1993, 1996-1997, 1999-2000, 2003-2004, 2006, 2008-2020
2680a9cb8SJohn Marino    Free Software Foundation, Inc.
3680a9cb8SJohn Marino 
4680a9cb8SJohn Marino    Based on strlen implementation by Torbjorn Granlund (tege@sics.se),
5680a9cb8SJohn Marino    with help from Dan Sahlin (dan@sics.se) and
6680a9cb8SJohn Marino    commentary by Jim Blandy (jimb@ai.mit.edu);
7680a9cb8SJohn Marino    adaptation to memchr suggested by Dick Karpinski (dick@cca.ucsf.edu),
8680a9cb8SJohn Marino    and implemented in glibc by Roland McGrath (roland@ai.mit.edu).
9680a9cb8SJohn Marino    Extension to memchr2 implemented by Eric Blake (ebb9@byu.net).
10680a9cb8SJohn Marino 
11680a9cb8SJohn Marino This program is free software: you can redistribute it and/or modify it
12680a9cb8SJohn Marino under the terms of the GNU General Public License as published by the
13680a9cb8SJohn Marino Free Software Foundation; either version 3 of the License, or any
14680a9cb8SJohn Marino later version.
15680a9cb8SJohn Marino 
16680a9cb8SJohn Marino This program is distributed in the hope that it will be useful,
17680a9cb8SJohn Marino but WITHOUT ANY WARRANTY; without even the implied warranty of
18680a9cb8SJohn Marino MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19680a9cb8SJohn Marino GNU General Public License for more details.
20680a9cb8SJohn Marino 
21680a9cb8SJohn Marino You should have received a copy of the GNU General Public License
22*09d4459fSDaniel Fojt along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
23680a9cb8SJohn Marino 
24680a9cb8SJohn Marino #include <config.h>
25680a9cb8SJohn Marino 
26680a9cb8SJohn Marino #include "memchr2.h"
27680a9cb8SJohn Marino 
28680a9cb8SJohn Marino #include <limits.h>
29680a9cb8SJohn Marino #include <stdint.h>
30680a9cb8SJohn Marino #include <string.h>
31680a9cb8SJohn Marino 
32680a9cb8SJohn Marino /* Return the first address of either C1 or C2 (treated as unsigned
33680a9cb8SJohn Marino    char) that occurs within N bytes of the memory region S.  If
34680a9cb8SJohn Marino    neither byte appears, return NULL.  */
35680a9cb8SJohn Marino void *
memchr2(void const * s,int c1_in,int c2_in,size_t n)36680a9cb8SJohn Marino memchr2 (void const *s, int c1_in, int c2_in, size_t n)
37680a9cb8SJohn Marino {
38680a9cb8SJohn Marino   /* On 32-bit hardware, choosing longword to be a 32-bit unsigned
39680a9cb8SJohn Marino      long instead of a 64-bit uintmax_t tends to give better
40680a9cb8SJohn Marino      performance.  On 64-bit hardware, unsigned long is generally 64
41680a9cb8SJohn Marino      bits already.  Change this typedef to experiment with
42680a9cb8SJohn Marino      performance.  */
43680a9cb8SJohn Marino   typedef unsigned long int longword;
44680a9cb8SJohn Marino 
45680a9cb8SJohn Marino   const unsigned char *char_ptr;
46680a9cb8SJohn Marino   void const *void_ptr;
47680a9cb8SJohn Marino   const longword *longword_ptr;
48680a9cb8SJohn Marino   longword repeated_one;
49680a9cb8SJohn Marino   longword repeated_c1;
50680a9cb8SJohn Marino   longword repeated_c2;
51680a9cb8SJohn Marino   unsigned char c1;
52680a9cb8SJohn Marino   unsigned char c2;
53680a9cb8SJohn Marino 
54680a9cb8SJohn Marino   c1 = (unsigned char) c1_in;
55680a9cb8SJohn Marino   c2 = (unsigned char) c2_in;
56680a9cb8SJohn Marino 
57680a9cb8SJohn Marino   if (c1 == c2)
58680a9cb8SJohn Marino     return memchr (s, c1, n);
59680a9cb8SJohn Marino 
60680a9cb8SJohn Marino   /* Handle the first few bytes by reading one byte at a time.
61680a9cb8SJohn Marino      Do this until VOID_PTR is aligned on a longword boundary.  */
62680a9cb8SJohn Marino   for (void_ptr = s;
63680a9cb8SJohn Marino        n > 0 && (uintptr_t) void_ptr % sizeof (longword) != 0;
64680a9cb8SJohn Marino        --n)
65680a9cb8SJohn Marino     {
66680a9cb8SJohn Marino       char_ptr = void_ptr;
67680a9cb8SJohn Marino       if (*char_ptr == c1 || *char_ptr == c2)
68680a9cb8SJohn Marino         return (void *) void_ptr;
69680a9cb8SJohn Marino       void_ptr = char_ptr + 1;
70680a9cb8SJohn Marino     }
71680a9cb8SJohn Marino 
72680a9cb8SJohn Marino   longword_ptr = void_ptr;
73680a9cb8SJohn Marino 
74680a9cb8SJohn Marino   /* All these elucidatory comments refer to 4-byte longwords,
75680a9cb8SJohn Marino      but the theory applies equally well to any size longwords.  */
76680a9cb8SJohn Marino 
77680a9cb8SJohn Marino   /* Compute auxiliary longword values:
78680a9cb8SJohn Marino      repeated_one is a value which has a 1 in every byte.
79680a9cb8SJohn Marino      repeated_c1 has c1 in every byte.
80680a9cb8SJohn Marino      repeated_c2 has c2 in every byte.  */
81680a9cb8SJohn Marino   repeated_one = 0x01010101;
82680a9cb8SJohn Marino   repeated_c1 = c1 | (c1 << 8);
83680a9cb8SJohn Marino   repeated_c2 = c2 | (c2 << 8);
84680a9cb8SJohn Marino   repeated_c1 |= repeated_c1 << 16;
85680a9cb8SJohn Marino   repeated_c2 |= repeated_c2 << 16;
86680a9cb8SJohn Marino   if (0xffffffffU < (longword) -1)
87680a9cb8SJohn Marino     {
88680a9cb8SJohn Marino       repeated_one |= repeated_one << 31 << 1;
89680a9cb8SJohn Marino       repeated_c1 |= repeated_c1 << 31 << 1;
90680a9cb8SJohn Marino       repeated_c2 |= repeated_c2 << 31 << 1;
91680a9cb8SJohn Marino       if (8 < sizeof (longword))
92680a9cb8SJohn Marino         {
93680a9cb8SJohn Marino           size_t i;
94680a9cb8SJohn Marino 
95680a9cb8SJohn Marino           for (i = 64; i < sizeof (longword) * 8; i *= 2)
96680a9cb8SJohn Marino             {
97680a9cb8SJohn Marino               repeated_one |= repeated_one << i;
98680a9cb8SJohn Marino               repeated_c1 |= repeated_c1 << i;
99680a9cb8SJohn Marino               repeated_c2 |= repeated_c2 << i;
100680a9cb8SJohn Marino             }
101680a9cb8SJohn Marino         }
102680a9cb8SJohn Marino     }
103680a9cb8SJohn Marino 
104680a9cb8SJohn Marino   /* Instead of the traditional loop which tests each byte, we will test a
105680a9cb8SJohn Marino      longword at a time.  The tricky part is testing if *any of the four*
106680a9cb8SJohn Marino      bytes in the longword in question are equal to c1 or c2.  We first use
107680a9cb8SJohn Marino      an xor with repeated_c1 and repeated_c2, respectively.  This reduces
108680a9cb8SJohn Marino      the task to testing whether *any of the four* bytes in longword1 or
109680a9cb8SJohn Marino      longword2 is zero.
110680a9cb8SJohn Marino 
111680a9cb8SJohn Marino      Let's consider longword1.  We compute tmp1 =
112680a9cb8SJohn Marino        ((longword1 - repeated_one) & ~longword1) & (repeated_one << 7).
113680a9cb8SJohn Marino      That is, we perform the following operations:
114680a9cb8SJohn Marino        1. Subtract repeated_one.
115680a9cb8SJohn Marino        2. & ~longword1.
116680a9cb8SJohn Marino        3. & a mask consisting of 0x80 in every byte.
117680a9cb8SJohn Marino      Consider what happens in each byte:
118680a9cb8SJohn Marino        - If a byte of longword1 is zero, step 1 and 2 transform it into 0xff,
119680a9cb8SJohn Marino          and step 3 transforms it into 0x80.  A carry can also be propagated
120680a9cb8SJohn Marino          to more significant bytes.
121680a9cb8SJohn Marino        - If a byte of longword1 is nonzero, let its lowest 1 bit be at
122680a9cb8SJohn Marino          position k (0 <= k <= 7); so the lowest k bits are 0.  After step 1,
123680a9cb8SJohn Marino          the byte ends in a single bit of value 0 and k bits of value 1.
124680a9cb8SJohn Marino          After step 2, the result is just k bits of value 1: 2^k - 1.  After
125680a9cb8SJohn Marino          step 3, the result is 0.  And no carry is produced.
126680a9cb8SJohn Marino      So, if longword1 has only non-zero bytes, tmp1 is zero.
127680a9cb8SJohn Marino      Whereas if longword1 has a zero byte, call j the position of the least
128680a9cb8SJohn Marino      significant zero byte.  Then the result has a zero at positions 0, ...,
129680a9cb8SJohn Marino      j-1 and a 0x80 at position j.  We cannot predict the result at the more
130680a9cb8SJohn Marino      significant bytes (positions j+1..3), but it does not matter since we
131680a9cb8SJohn Marino      already have a non-zero bit at position 8*j+7.
132680a9cb8SJohn Marino 
133680a9cb8SJohn Marino      Similarly, we compute tmp2 =
134680a9cb8SJohn Marino        ((longword2 - repeated_one) & ~longword2) & (repeated_one << 7).
135680a9cb8SJohn Marino 
136680a9cb8SJohn Marino      The test whether any byte in longword1 or longword2 is zero is equivalent
137680a9cb8SJohn Marino      to testing whether tmp1 is nonzero or tmp2 is nonzero.  We can combine
138680a9cb8SJohn Marino      this into a single test, whether (tmp1 | tmp2) is nonzero.  */
139680a9cb8SJohn Marino 
140680a9cb8SJohn Marino   while (n >= sizeof (longword))
141680a9cb8SJohn Marino     {
142680a9cb8SJohn Marino       longword longword1 = *longword_ptr ^ repeated_c1;
143680a9cb8SJohn Marino       longword longword2 = *longword_ptr ^ repeated_c2;
144680a9cb8SJohn Marino 
145680a9cb8SJohn Marino       if (((((longword1 - repeated_one) & ~longword1)
146680a9cb8SJohn Marino             | ((longword2 - repeated_one) & ~longword2))
147680a9cb8SJohn Marino            & (repeated_one << 7)) != 0)
148680a9cb8SJohn Marino         break;
149680a9cb8SJohn Marino       longword_ptr++;
150680a9cb8SJohn Marino       n -= sizeof (longword);
151680a9cb8SJohn Marino     }
152680a9cb8SJohn Marino 
153680a9cb8SJohn Marino   char_ptr = (const unsigned char *) longword_ptr;
154680a9cb8SJohn Marino 
155680a9cb8SJohn Marino   /* At this point, we know that either n < sizeof (longword), or one of the
156680a9cb8SJohn Marino      sizeof (longword) bytes starting at char_ptr is == c1 or == c2.  On
157680a9cb8SJohn Marino      little-endian machines, we could determine the first such byte without
158680a9cb8SJohn Marino      any further memory accesses, just by looking at the (tmp1 | tmp2) result
159680a9cb8SJohn Marino      from the last loop iteration.  But this does not work on big-endian
160680a9cb8SJohn Marino      machines.  Choose code that works in both cases.  */
161680a9cb8SJohn Marino 
162680a9cb8SJohn Marino   for (; n > 0; --n, ++char_ptr)
163680a9cb8SJohn Marino     {
164680a9cb8SJohn Marino       if (*char_ptr == c1 || *char_ptr == c2)
165680a9cb8SJohn Marino         return (void *) char_ptr;
166680a9cb8SJohn Marino     }
167680a9cb8SJohn Marino 
168680a9cb8SJohn Marino   return NULL;
169680a9cb8SJohn Marino }
170