xref: /dflybsd-src/contrib/grep/lib/str-two-way.h (revision dc7c36e42d07cfd62c8aa566489e061f2ff94edc)
1*dc7c36e4SJohn Marino /* Byte-wise substring search, using the Two-Way algorithm.
2*dc7c36e4SJohn Marino    Copyright (C) 2008-2015 Free Software Foundation, Inc.
3*dc7c36e4SJohn Marino    This file is part of the GNU C Library.
4*dc7c36e4SJohn Marino    Written by Eric Blake <ebb9@byu.net>, 2008.
5*dc7c36e4SJohn Marino 
6*dc7c36e4SJohn Marino    This program is free software; you can redistribute it and/or modify
7*dc7c36e4SJohn Marino    it under the terms of the GNU General Public License as published by
8*dc7c36e4SJohn Marino    the Free Software Foundation; either version 3, or (at your option)
9*dc7c36e4SJohn Marino    any later version.
10*dc7c36e4SJohn Marino 
11*dc7c36e4SJohn Marino    This program is distributed in the hope that it will be useful,
12*dc7c36e4SJohn Marino    but WITHOUT ANY WARRANTY; without even the implied warranty of
13*dc7c36e4SJohn Marino    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14*dc7c36e4SJohn Marino    GNU General Public License for more details.
15*dc7c36e4SJohn Marino 
16*dc7c36e4SJohn Marino    You should have received a copy of the GNU General Public License along
17*dc7c36e4SJohn Marino    with this program; if not, see <http://www.gnu.org/licenses/>.  */
18*dc7c36e4SJohn Marino 
19*dc7c36e4SJohn Marino /* Before including this file, you need to include <config.h> and
20*dc7c36e4SJohn Marino    <string.h>, and define:
21*dc7c36e4SJohn Marino      RESULT_TYPE             A macro that expands to the return type.
22*dc7c36e4SJohn Marino      AVAILABLE(h, h_l, j, n_l)
23*dc7c36e4SJohn Marino                              A macro that returns nonzero if there are
24*dc7c36e4SJohn Marino                              at least N_L bytes left starting at H[J].
25*dc7c36e4SJohn Marino                              H is 'unsigned char *', H_L, J, and N_L
26*dc7c36e4SJohn Marino                              are 'size_t'; H_L is an lvalue.  For
27*dc7c36e4SJohn Marino                              NUL-terminated searches, H_L can be
28*dc7c36e4SJohn Marino                              modified each iteration to avoid having
29*dc7c36e4SJohn Marino                              to compute the end of H up front.
30*dc7c36e4SJohn Marino 
31*dc7c36e4SJohn Marino   For case-insensitivity, you may optionally define:
32*dc7c36e4SJohn Marino      CMP_FUNC(p1, p2, l)     A macro that returns 0 iff the first L
33*dc7c36e4SJohn Marino                              characters of P1 and P2 are equal.
34*dc7c36e4SJohn Marino      CANON_ELEMENT(c)        A macro that canonicalizes an element right after
35*dc7c36e4SJohn Marino                              it has been fetched from one of the two strings.
36*dc7c36e4SJohn Marino                              The argument is an 'unsigned char'; the result
37*dc7c36e4SJohn Marino                              must be an 'unsigned char' as well.
38*dc7c36e4SJohn Marino 
39*dc7c36e4SJohn Marino   This file undefines the macros documented above, and defines
40*dc7c36e4SJohn Marino   LONG_NEEDLE_THRESHOLD.
41*dc7c36e4SJohn Marino */
42*dc7c36e4SJohn Marino 
43*dc7c36e4SJohn Marino #include <limits.h>
44*dc7c36e4SJohn Marino #include <stdint.h>
45*dc7c36e4SJohn Marino 
46*dc7c36e4SJohn Marino /* We use the Two-Way string matching algorithm (also known as
47*dc7c36e4SJohn Marino    Chrochemore-Perrin), which guarantees linear complexity with
48*dc7c36e4SJohn Marino    constant space.  Additionally, for long needles, we also use a bad
49*dc7c36e4SJohn Marino    character shift table similar to the Boyer-Moore algorithm to
50*dc7c36e4SJohn Marino    achieve improved (potentially sub-linear) performance.
51*dc7c36e4SJohn Marino 
52*dc7c36e4SJohn Marino    See http://www-igm.univ-mlv.fr/~lecroq/string/node26.html#SECTION00260,
53*dc7c36e4SJohn Marino    http://en.wikipedia.org/wiki/Boyer-Moore_string_search_algorithm,
54*dc7c36e4SJohn Marino    http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.34.6641&rep=rep1&type=pdf
55*dc7c36e4SJohn Marino */
56*dc7c36e4SJohn Marino 
57*dc7c36e4SJohn Marino /* Point at which computing a bad-byte shift table is likely to be
58*dc7c36e4SJohn Marino    worthwhile.  Small needles should not compute a table, since it
59*dc7c36e4SJohn Marino    adds (1 << CHAR_BIT) + NEEDLE_LEN computations of preparation for a
60*dc7c36e4SJohn Marino    speedup no greater than a factor of NEEDLE_LEN.  The larger the
61*dc7c36e4SJohn Marino    needle, the better the potential performance gain.  On the other
62*dc7c36e4SJohn Marino    hand, on non-POSIX systems with CHAR_BIT larger than eight, the
63*dc7c36e4SJohn Marino    memory required for the table is prohibitive.  */
64*dc7c36e4SJohn Marino #if CHAR_BIT < 10
65*dc7c36e4SJohn Marino # define LONG_NEEDLE_THRESHOLD 32U
66*dc7c36e4SJohn Marino #else
67*dc7c36e4SJohn Marino # define LONG_NEEDLE_THRESHOLD SIZE_MAX
68*dc7c36e4SJohn Marino #endif
69*dc7c36e4SJohn Marino 
70*dc7c36e4SJohn Marino #ifndef MAX
71*dc7c36e4SJohn Marino # define MAX(a, b) ((a < b) ? (b) : (a))
72*dc7c36e4SJohn Marino #endif
73*dc7c36e4SJohn Marino 
74*dc7c36e4SJohn Marino #ifndef CANON_ELEMENT
75*dc7c36e4SJohn Marino # define CANON_ELEMENT(c) c
76*dc7c36e4SJohn Marino #endif
77*dc7c36e4SJohn Marino #ifndef CMP_FUNC
78*dc7c36e4SJohn Marino # define CMP_FUNC memcmp
79*dc7c36e4SJohn Marino #endif
80*dc7c36e4SJohn Marino 
81*dc7c36e4SJohn Marino /* Perform a critical factorization of NEEDLE, of length NEEDLE_LEN.
82*dc7c36e4SJohn Marino    Return the index of the first byte in the right half, and set
83*dc7c36e4SJohn Marino    *PERIOD to the global period of the right half.
84*dc7c36e4SJohn Marino 
85*dc7c36e4SJohn Marino    The global period of a string is the smallest index (possibly its
86*dc7c36e4SJohn Marino    length) at which all remaining bytes in the string are repetitions
87*dc7c36e4SJohn Marino    of the prefix (the last repetition may be a subset of the prefix).
88*dc7c36e4SJohn Marino 
89*dc7c36e4SJohn Marino    When NEEDLE is factored into two halves, a local period is the
90*dc7c36e4SJohn Marino    length of the smallest word that shares a suffix with the left half
91*dc7c36e4SJohn Marino    and shares a prefix with the right half.  All factorizations of a
92*dc7c36e4SJohn Marino    non-empty NEEDLE have a local period of at least 1 and no greater
93*dc7c36e4SJohn Marino    than NEEDLE_LEN.
94*dc7c36e4SJohn Marino 
95*dc7c36e4SJohn Marino    A critical factorization has the property that the local period
96*dc7c36e4SJohn Marino    equals the global period.  All strings have at least one critical
97*dc7c36e4SJohn Marino    factorization with the left half smaller than the global period.
98*dc7c36e4SJohn Marino    And while some strings have more than one critical factorization,
99*dc7c36e4SJohn Marino    it is provable that with an ordered alphabet, at least one of the
100*dc7c36e4SJohn Marino    critical factorizations corresponds to a maximal suffix.
101*dc7c36e4SJohn Marino 
102*dc7c36e4SJohn Marino    Given an ordered alphabet, a critical factorization can be computed
103*dc7c36e4SJohn Marino    in linear time, with 2 * NEEDLE_LEN comparisons, by computing the
104*dc7c36e4SJohn Marino    shorter of two ordered maximal suffixes.  The ordered maximal
105*dc7c36e4SJohn Marino    suffixes are determined by lexicographic comparison while tracking
106*dc7c36e4SJohn Marino    periodicity.  */
107*dc7c36e4SJohn Marino static size_t
108*dc7c36e4SJohn Marino critical_factorization (const unsigned char *needle, size_t needle_len,
109*dc7c36e4SJohn Marino                         size_t *period)
110*dc7c36e4SJohn Marino {
111*dc7c36e4SJohn Marino   /* Index of last byte of left half, or SIZE_MAX.  */
112*dc7c36e4SJohn Marino   size_t max_suffix, max_suffix_rev;
113*dc7c36e4SJohn Marino   size_t j; /* Index into NEEDLE for current candidate suffix.  */
114*dc7c36e4SJohn Marino   size_t k; /* Offset into current period.  */
115*dc7c36e4SJohn Marino   size_t p; /* Intermediate period.  */
116*dc7c36e4SJohn Marino   unsigned char a, b; /* Current comparison bytes.  */
117*dc7c36e4SJohn Marino 
118*dc7c36e4SJohn Marino   /* Special case NEEDLE_LEN of 1 or 2 (all callers already filtered
119*dc7c36e4SJohn Marino      out 0-length needles.  */
120*dc7c36e4SJohn Marino   if (needle_len < 3)
121*dc7c36e4SJohn Marino     {
122*dc7c36e4SJohn Marino       *period = 1;
123*dc7c36e4SJohn Marino       return needle_len - 1;
124*dc7c36e4SJohn Marino     }
125*dc7c36e4SJohn Marino 
126*dc7c36e4SJohn Marino   /* Invariants:
127*dc7c36e4SJohn Marino      0 <= j < NEEDLE_LEN - 1
128*dc7c36e4SJohn Marino      -1 <= max_suffix{,_rev} < j (treating SIZE_MAX as if it were signed)
129*dc7c36e4SJohn Marino      min(max_suffix, max_suffix_rev) < global period of NEEDLE
130*dc7c36e4SJohn Marino      1 <= p <= global period of NEEDLE
131*dc7c36e4SJohn Marino      p == global period of the substring NEEDLE[max_suffix{,_rev}+1...j]
132*dc7c36e4SJohn Marino      1 <= k <= p
133*dc7c36e4SJohn Marino   */
134*dc7c36e4SJohn Marino 
135*dc7c36e4SJohn Marino   /* Perform lexicographic search.  */
136*dc7c36e4SJohn Marino   max_suffix = SIZE_MAX;
137*dc7c36e4SJohn Marino   j = 0;
138*dc7c36e4SJohn Marino   k = p = 1;
139*dc7c36e4SJohn Marino   while (j + k < needle_len)
140*dc7c36e4SJohn Marino     {
141*dc7c36e4SJohn Marino       a = CANON_ELEMENT (needle[j + k]);
142*dc7c36e4SJohn Marino       b = CANON_ELEMENT (needle[max_suffix + k]);
143*dc7c36e4SJohn Marino       if (a < b)
144*dc7c36e4SJohn Marino         {
145*dc7c36e4SJohn Marino           /* Suffix is smaller, period is entire prefix so far.  */
146*dc7c36e4SJohn Marino           j += k;
147*dc7c36e4SJohn Marino           k = 1;
148*dc7c36e4SJohn Marino           p = j - max_suffix;
149*dc7c36e4SJohn Marino         }
150*dc7c36e4SJohn Marino       else if (a == b)
151*dc7c36e4SJohn Marino         {
152*dc7c36e4SJohn Marino           /* Advance through repetition of the current period.  */
153*dc7c36e4SJohn Marino           if (k != p)
154*dc7c36e4SJohn Marino             ++k;
155*dc7c36e4SJohn Marino           else
156*dc7c36e4SJohn Marino             {
157*dc7c36e4SJohn Marino               j += p;
158*dc7c36e4SJohn Marino               k = 1;
159*dc7c36e4SJohn Marino             }
160*dc7c36e4SJohn Marino         }
161*dc7c36e4SJohn Marino       else /* b < a */
162*dc7c36e4SJohn Marino         {
163*dc7c36e4SJohn Marino           /* Suffix is larger, start over from current location.  */
164*dc7c36e4SJohn Marino           max_suffix = j++;
165*dc7c36e4SJohn Marino           k = p = 1;
166*dc7c36e4SJohn Marino         }
167*dc7c36e4SJohn Marino     }
168*dc7c36e4SJohn Marino   *period = p;
169*dc7c36e4SJohn Marino 
170*dc7c36e4SJohn Marino   /* Perform reverse lexicographic search.  */
171*dc7c36e4SJohn Marino   max_suffix_rev = SIZE_MAX;
172*dc7c36e4SJohn Marino   j = 0;
173*dc7c36e4SJohn Marino   k = p = 1;
174*dc7c36e4SJohn Marino   while (j + k < needle_len)
175*dc7c36e4SJohn Marino     {
176*dc7c36e4SJohn Marino       a = CANON_ELEMENT (needle[j + k]);
177*dc7c36e4SJohn Marino       b = CANON_ELEMENT (needle[max_suffix_rev + k]);
178*dc7c36e4SJohn Marino       if (b < a)
179*dc7c36e4SJohn Marino         {
180*dc7c36e4SJohn Marino           /* Suffix is smaller, period is entire prefix so far.  */
181*dc7c36e4SJohn Marino           j += k;
182*dc7c36e4SJohn Marino           k = 1;
183*dc7c36e4SJohn Marino           p = j - max_suffix_rev;
184*dc7c36e4SJohn Marino         }
185*dc7c36e4SJohn Marino       else if (a == b)
186*dc7c36e4SJohn Marino         {
187*dc7c36e4SJohn Marino           /* Advance through repetition of the current period.  */
188*dc7c36e4SJohn Marino           if (k != p)
189*dc7c36e4SJohn Marino             ++k;
190*dc7c36e4SJohn Marino           else
191*dc7c36e4SJohn Marino             {
192*dc7c36e4SJohn Marino               j += p;
193*dc7c36e4SJohn Marino               k = 1;
194*dc7c36e4SJohn Marino             }
195*dc7c36e4SJohn Marino         }
196*dc7c36e4SJohn Marino       else /* a < b */
197*dc7c36e4SJohn Marino         {
198*dc7c36e4SJohn Marino           /* Suffix is larger, start over from current location.  */
199*dc7c36e4SJohn Marino           max_suffix_rev = j++;
200*dc7c36e4SJohn Marino           k = p = 1;
201*dc7c36e4SJohn Marino         }
202*dc7c36e4SJohn Marino     }
203*dc7c36e4SJohn Marino 
204*dc7c36e4SJohn Marino   /* Choose the shorter suffix.  Return the index of the first byte of
205*dc7c36e4SJohn Marino      the right half, rather than the last byte of the left half.
206*dc7c36e4SJohn Marino 
207*dc7c36e4SJohn Marino      For some examples, 'banana' has two critical factorizations, both
208*dc7c36e4SJohn Marino      exposed by the two lexicographic extreme suffixes of 'anana' and
209*dc7c36e4SJohn Marino      'nana', where both suffixes have a period of 2.  On the other
210*dc7c36e4SJohn Marino      hand, with 'aab' and 'bba', both strings have a single critical
211*dc7c36e4SJohn Marino      factorization of the last byte, with the suffix having a period
212*dc7c36e4SJohn Marino      of 1.  While the maximal lexicographic suffix of 'aab' is 'b',
213*dc7c36e4SJohn Marino      the maximal lexicographic suffix of 'bba' is 'ba', which is not a
214*dc7c36e4SJohn Marino      critical factorization.  Conversely, the maximal reverse
215*dc7c36e4SJohn Marino      lexicographic suffix of 'a' works for 'bba', but not 'ab' for
216*dc7c36e4SJohn Marino      'aab'.  The shorter suffix of the two will always be a critical
217*dc7c36e4SJohn Marino      factorization.  */
218*dc7c36e4SJohn Marino   if (max_suffix_rev + 1 < max_suffix + 1)
219*dc7c36e4SJohn Marino     return max_suffix + 1;
220*dc7c36e4SJohn Marino   *period = p;
221*dc7c36e4SJohn Marino   return max_suffix_rev + 1;
222*dc7c36e4SJohn Marino }
223*dc7c36e4SJohn Marino 
224*dc7c36e4SJohn Marino /* Return the first location of non-empty NEEDLE within HAYSTACK, or
225*dc7c36e4SJohn Marino    NULL.  HAYSTACK_LEN is the minimum known length of HAYSTACK.  This
226*dc7c36e4SJohn Marino    method is optimized for NEEDLE_LEN < LONG_NEEDLE_THRESHOLD.
227*dc7c36e4SJohn Marino    Performance is guaranteed to be linear, with an initialization cost
228*dc7c36e4SJohn Marino    of 2 * NEEDLE_LEN comparisons.
229*dc7c36e4SJohn Marino 
230*dc7c36e4SJohn Marino    If AVAILABLE does not modify HAYSTACK_LEN (as in memmem), then at
231*dc7c36e4SJohn Marino    most 2 * HAYSTACK_LEN - NEEDLE_LEN comparisons occur in searching.
232*dc7c36e4SJohn Marino    If AVAILABLE modifies HAYSTACK_LEN (as in strstr), then at most 3 *
233*dc7c36e4SJohn Marino    HAYSTACK_LEN - NEEDLE_LEN comparisons occur in searching.  */
234*dc7c36e4SJohn Marino static RETURN_TYPE
235*dc7c36e4SJohn Marino two_way_short_needle (const unsigned char *haystack, size_t haystack_len,
236*dc7c36e4SJohn Marino                       const unsigned char *needle, size_t needle_len)
237*dc7c36e4SJohn Marino {
238*dc7c36e4SJohn Marino   size_t i; /* Index into current byte of NEEDLE.  */
239*dc7c36e4SJohn Marino   size_t j; /* Index into current window of HAYSTACK.  */
240*dc7c36e4SJohn Marino   size_t period; /* The period of the right half of needle.  */
241*dc7c36e4SJohn Marino   size_t suffix; /* The index of the right half of needle.  */
242*dc7c36e4SJohn Marino 
243*dc7c36e4SJohn Marino   /* Factor the needle into two halves, such that the left half is
244*dc7c36e4SJohn Marino      smaller than the global period, and the right half is
245*dc7c36e4SJohn Marino      periodic (with a period as large as NEEDLE_LEN - suffix).  */
246*dc7c36e4SJohn Marino   suffix = critical_factorization (needle, needle_len, &period);
247*dc7c36e4SJohn Marino 
248*dc7c36e4SJohn Marino   /* Perform the search.  Each iteration compares the right half
249*dc7c36e4SJohn Marino      first.  */
250*dc7c36e4SJohn Marino   if (CMP_FUNC (needle, needle + period, suffix) == 0)
251*dc7c36e4SJohn Marino     {
252*dc7c36e4SJohn Marino       /* Entire needle is periodic; a mismatch in the left half can
253*dc7c36e4SJohn Marino          only advance by the period, so use memory to avoid rescanning
254*dc7c36e4SJohn Marino          known occurrences of the period in the right half.  */
255*dc7c36e4SJohn Marino       size_t memory = 0;
256*dc7c36e4SJohn Marino       j = 0;
257*dc7c36e4SJohn Marino       while (AVAILABLE (haystack, haystack_len, j, needle_len))
258*dc7c36e4SJohn Marino         {
259*dc7c36e4SJohn Marino           /* Scan for matches in right half.  */
260*dc7c36e4SJohn Marino           i = MAX (suffix, memory);
261*dc7c36e4SJohn Marino           while (i < needle_len && (CANON_ELEMENT (needle[i])
262*dc7c36e4SJohn Marino                                     == CANON_ELEMENT (haystack[i + j])))
263*dc7c36e4SJohn Marino             ++i;
264*dc7c36e4SJohn Marino           if (needle_len <= i)
265*dc7c36e4SJohn Marino             {
266*dc7c36e4SJohn Marino               /* Scan for matches in left half.  */
267*dc7c36e4SJohn Marino               i = suffix - 1;
268*dc7c36e4SJohn Marino               while (memory < i + 1 && (CANON_ELEMENT (needle[i])
269*dc7c36e4SJohn Marino                                         == CANON_ELEMENT (haystack[i + j])))
270*dc7c36e4SJohn Marino                 --i;
271*dc7c36e4SJohn Marino               if (i + 1 < memory + 1)
272*dc7c36e4SJohn Marino                 return (RETURN_TYPE) (haystack + j);
273*dc7c36e4SJohn Marino               /* No match, so remember how many repetitions of period
274*dc7c36e4SJohn Marino                  on the right half were scanned.  */
275*dc7c36e4SJohn Marino               j += period;
276*dc7c36e4SJohn Marino               memory = needle_len - period;
277*dc7c36e4SJohn Marino             }
278*dc7c36e4SJohn Marino           else
279*dc7c36e4SJohn Marino             {
280*dc7c36e4SJohn Marino               j += i - suffix + 1;
281*dc7c36e4SJohn Marino               memory = 0;
282*dc7c36e4SJohn Marino             }
283*dc7c36e4SJohn Marino         }
284*dc7c36e4SJohn Marino     }
285*dc7c36e4SJohn Marino   else
286*dc7c36e4SJohn Marino     {
287*dc7c36e4SJohn Marino       /* The two halves of needle are distinct; no extra memory is
288*dc7c36e4SJohn Marino          required, and any mismatch results in a maximal shift.  */
289*dc7c36e4SJohn Marino       period = MAX (suffix, needle_len - suffix) + 1;
290*dc7c36e4SJohn Marino       j = 0;
291*dc7c36e4SJohn Marino       while (AVAILABLE (haystack, haystack_len, j, needle_len))
292*dc7c36e4SJohn Marino         {
293*dc7c36e4SJohn Marino           /* Scan for matches in right half.  */
294*dc7c36e4SJohn Marino           i = suffix;
295*dc7c36e4SJohn Marino           while (i < needle_len && (CANON_ELEMENT (needle[i])
296*dc7c36e4SJohn Marino                                     == CANON_ELEMENT (haystack[i + j])))
297*dc7c36e4SJohn Marino             ++i;
298*dc7c36e4SJohn Marino           if (needle_len <= i)
299*dc7c36e4SJohn Marino             {
300*dc7c36e4SJohn Marino               /* Scan for matches in left half.  */
301*dc7c36e4SJohn Marino               i = suffix - 1;
302*dc7c36e4SJohn Marino               while (i != SIZE_MAX && (CANON_ELEMENT (needle[i])
303*dc7c36e4SJohn Marino                                        == CANON_ELEMENT (haystack[i + j])))
304*dc7c36e4SJohn Marino                 --i;
305*dc7c36e4SJohn Marino               if (i == SIZE_MAX)
306*dc7c36e4SJohn Marino                 return (RETURN_TYPE) (haystack + j);
307*dc7c36e4SJohn Marino               j += period;
308*dc7c36e4SJohn Marino             }
309*dc7c36e4SJohn Marino           else
310*dc7c36e4SJohn Marino             j += i - suffix + 1;
311*dc7c36e4SJohn Marino         }
312*dc7c36e4SJohn Marino     }
313*dc7c36e4SJohn Marino   return NULL;
314*dc7c36e4SJohn Marino }
315*dc7c36e4SJohn Marino 
316*dc7c36e4SJohn Marino /* Return the first location of non-empty NEEDLE within HAYSTACK, or
317*dc7c36e4SJohn Marino    NULL.  HAYSTACK_LEN is the minimum known length of HAYSTACK.  This
318*dc7c36e4SJohn Marino    method is optimized for LONG_NEEDLE_THRESHOLD <= NEEDLE_LEN.
319*dc7c36e4SJohn Marino    Performance is guaranteed to be linear, with an initialization cost
320*dc7c36e4SJohn Marino    of 3 * NEEDLE_LEN + (1 << CHAR_BIT) operations.
321*dc7c36e4SJohn Marino 
322*dc7c36e4SJohn Marino    If AVAILABLE does not modify HAYSTACK_LEN (as in memmem), then at
323*dc7c36e4SJohn Marino    most 2 * HAYSTACK_LEN - NEEDLE_LEN comparisons occur in searching,
324*dc7c36e4SJohn Marino    and sublinear performance O(HAYSTACK_LEN / NEEDLE_LEN) is possible.
325*dc7c36e4SJohn Marino    If AVAILABLE modifies HAYSTACK_LEN (as in strstr), then at most 3 *
326*dc7c36e4SJohn Marino    HAYSTACK_LEN - NEEDLE_LEN comparisons occur in searching, and
327*dc7c36e4SJohn Marino    sublinear performance is not possible.  */
328*dc7c36e4SJohn Marino static RETURN_TYPE
329*dc7c36e4SJohn Marino two_way_long_needle (const unsigned char *haystack, size_t haystack_len,
330*dc7c36e4SJohn Marino                      const unsigned char *needle, size_t needle_len)
331*dc7c36e4SJohn Marino {
332*dc7c36e4SJohn Marino   size_t i; /* Index into current byte of NEEDLE.  */
333*dc7c36e4SJohn Marino   size_t j; /* Index into current window of HAYSTACK.  */
334*dc7c36e4SJohn Marino   size_t period; /* The period of the right half of needle.  */
335*dc7c36e4SJohn Marino   size_t suffix; /* The index of the right half of needle.  */
336*dc7c36e4SJohn Marino   size_t shift_table[1U << CHAR_BIT]; /* See below.  */
337*dc7c36e4SJohn Marino 
338*dc7c36e4SJohn Marino   /* Factor the needle into two halves, such that the left half is
339*dc7c36e4SJohn Marino      smaller than the global period, and the right half is
340*dc7c36e4SJohn Marino      periodic (with a period as large as NEEDLE_LEN - suffix).  */
341*dc7c36e4SJohn Marino   suffix = critical_factorization (needle, needle_len, &period);
342*dc7c36e4SJohn Marino 
343*dc7c36e4SJohn Marino   /* Populate shift_table.  For each possible byte value c,
344*dc7c36e4SJohn Marino      shift_table[c] is the distance from the last occurrence of c to
345*dc7c36e4SJohn Marino      the end of NEEDLE, or NEEDLE_LEN if c is absent from the NEEDLE.
346*dc7c36e4SJohn Marino      shift_table[NEEDLE[NEEDLE_LEN - 1]] contains the only 0.  */
347*dc7c36e4SJohn Marino   for (i = 0; i < 1U << CHAR_BIT; i++)
348*dc7c36e4SJohn Marino     shift_table[i] = needle_len;
349*dc7c36e4SJohn Marino   for (i = 0; i < needle_len; i++)
350*dc7c36e4SJohn Marino     shift_table[CANON_ELEMENT (needle[i])] = needle_len - i - 1;
351*dc7c36e4SJohn Marino 
352*dc7c36e4SJohn Marino   /* Perform the search.  Each iteration compares the right half
353*dc7c36e4SJohn Marino      first.  */
354*dc7c36e4SJohn Marino   if (CMP_FUNC (needle, needle + period, suffix) == 0)
355*dc7c36e4SJohn Marino     {
356*dc7c36e4SJohn Marino       /* Entire needle is periodic; a mismatch in the left half can
357*dc7c36e4SJohn Marino          only advance by the period, so use memory to avoid rescanning
358*dc7c36e4SJohn Marino          known occurrences of the period in the right half.  */
359*dc7c36e4SJohn Marino       size_t memory = 0;
360*dc7c36e4SJohn Marino       size_t shift;
361*dc7c36e4SJohn Marino       j = 0;
362*dc7c36e4SJohn Marino       while (AVAILABLE (haystack, haystack_len, j, needle_len))
363*dc7c36e4SJohn Marino         {
364*dc7c36e4SJohn Marino           /* Check the last byte first; if it does not match, then
365*dc7c36e4SJohn Marino              shift to the next possible match location.  */
366*dc7c36e4SJohn Marino           shift = shift_table[CANON_ELEMENT (haystack[j + needle_len - 1])];
367*dc7c36e4SJohn Marino           if (0 < shift)
368*dc7c36e4SJohn Marino             {
369*dc7c36e4SJohn Marino               if (memory && shift < period)
370*dc7c36e4SJohn Marino                 {
371*dc7c36e4SJohn Marino                   /* Since needle is periodic, but the last period has
372*dc7c36e4SJohn Marino                      a byte out of place, there can be no match until
373*dc7c36e4SJohn Marino                      after the mismatch.  */
374*dc7c36e4SJohn Marino                   shift = needle_len - period;
375*dc7c36e4SJohn Marino                 }
376*dc7c36e4SJohn Marino               memory = 0;
377*dc7c36e4SJohn Marino               j += shift;
378*dc7c36e4SJohn Marino               continue;
379*dc7c36e4SJohn Marino             }
380*dc7c36e4SJohn Marino           /* Scan for matches in right half.  The last byte has
381*dc7c36e4SJohn Marino              already been matched, by virtue of the shift table.  */
382*dc7c36e4SJohn Marino           i = MAX (suffix, memory);
383*dc7c36e4SJohn Marino           while (i < needle_len - 1 && (CANON_ELEMENT (needle[i])
384*dc7c36e4SJohn Marino                                         == CANON_ELEMENT (haystack[i + j])))
385*dc7c36e4SJohn Marino             ++i;
386*dc7c36e4SJohn Marino           if (needle_len - 1 <= i)
387*dc7c36e4SJohn Marino             {
388*dc7c36e4SJohn Marino               /* Scan for matches in left half.  */
389*dc7c36e4SJohn Marino               i = suffix - 1;
390*dc7c36e4SJohn Marino               while (memory < i + 1 && (CANON_ELEMENT (needle[i])
391*dc7c36e4SJohn Marino                                         == CANON_ELEMENT (haystack[i + j])))
392*dc7c36e4SJohn Marino                 --i;
393*dc7c36e4SJohn Marino               if (i + 1 < memory + 1)
394*dc7c36e4SJohn Marino                 return (RETURN_TYPE) (haystack + j);
395*dc7c36e4SJohn Marino               /* No match, so remember how many repetitions of period
396*dc7c36e4SJohn Marino                  on the right half were scanned.  */
397*dc7c36e4SJohn Marino               j += period;
398*dc7c36e4SJohn Marino               memory = needle_len - period;
399*dc7c36e4SJohn Marino             }
400*dc7c36e4SJohn Marino           else
401*dc7c36e4SJohn Marino             {
402*dc7c36e4SJohn Marino               j += i - suffix + 1;
403*dc7c36e4SJohn Marino               memory = 0;
404*dc7c36e4SJohn Marino             }
405*dc7c36e4SJohn Marino         }
406*dc7c36e4SJohn Marino     }
407*dc7c36e4SJohn Marino   else
408*dc7c36e4SJohn Marino     {
409*dc7c36e4SJohn Marino       /* The two halves of needle are distinct; no extra memory is
410*dc7c36e4SJohn Marino          required, and any mismatch results in a maximal shift.  */
411*dc7c36e4SJohn Marino       size_t shift;
412*dc7c36e4SJohn Marino       period = MAX (suffix, needle_len - suffix) + 1;
413*dc7c36e4SJohn Marino       j = 0;
414*dc7c36e4SJohn Marino       while (AVAILABLE (haystack, haystack_len, j, needle_len))
415*dc7c36e4SJohn Marino         {
416*dc7c36e4SJohn Marino           /* Check the last byte first; if it does not match, then
417*dc7c36e4SJohn Marino              shift to the next possible match location.  */
418*dc7c36e4SJohn Marino           shift = shift_table[CANON_ELEMENT (haystack[j + needle_len - 1])];
419*dc7c36e4SJohn Marino           if (0 < shift)
420*dc7c36e4SJohn Marino             {
421*dc7c36e4SJohn Marino               j += shift;
422*dc7c36e4SJohn Marino               continue;
423*dc7c36e4SJohn Marino             }
424*dc7c36e4SJohn Marino           /* Scan for matches in right half.  The last byte has
425*dc7c36e4SJohn Marino              already been matched, by virtue of the shift table.  */
426*dc7c36e4SJohn Marino           i = suffix;
427*dc7c36e4SJohn Marino           while (i < needle_len - 1 && (CANON_ELEMENT (needle[i])
428*dc7c36e4SJohn Marino                                         == CANON_ELEMENT (haystack[i + j])))
429*dc7c36e4SJohn Marino             ++i;
430*dc7c36e4SJohn Marino           if (needle_len - 1 <= i)
431*dc7c36e4SJohn Marino             {
432*dc7c36e4SJohn Marino               /* Scan for matches in left half.  */
433*dc7c36e4SJohn Marino               i = suffix - 1;
434*dc7c36e4SJohn Marino               while (i != SIZE_MAX && (CANON_ELEMENT (needle[i])
435*dc7c36e4SJohn Marino                                        == CANON_ELEMENT (haystack[i + j])))
436*dc7c36e4SJohn Marino                 --i;
437*dc7c36e4SJohn Marino               if (i == SIZE_MAX)
438*dc7c36e4SJohn Marino                 return (RETURN_TYPE) (haystack + j);
439*dc7c36e4SJohn Marino               j += period;
440*dc7c36e4SJohn Marino             }
441*dc7c36e4SJohn Marino           else
442*dc7c36e4SJohn Marino             j += i - suffix + 1;
443*dc7c36e4SJohn Marino         }
444*dc7c36e4SJohn Marino     }
445*dc7c36e4SJohn Marino   return NULL;
446*dc7c36e4SJohn Marino }
447*dc7c36e4SJohn Marino 
448*dc7c36e4SJohn Marino #undef AVAILABLE
449*dc7c36e4SJohn Marino #undef CANON_ELEMENT
450*dc7c36e4SJohn Marino #undef CMP_FUNC
451*dc7c36e4SJohn Marino #undef MAX
452*dc7c36e4SJohn Marino #undef RETURN_TYPE
453