xref: /llvm-project/libcxx/include/__functional/boyer_moore_searcher.h (revision f69585235ec85d54e0f3fc41b2d5700430907f99)
1971e9c80SNikolas Klauser //===----------------------------------------------------------------------===//
2971e9c80SNikolas Klauser //
3971e9c80SNikolas Klauser // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4971e9c80SNikolas Klauser // See https://llvm.org/LICENSE.txt for license information.
5971e9c80SNikolas Klauser // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6971e9c80SNikolas Klauser //
7971e9c80SNikolas Klauser //===----------------------------------------------------------------------===//
8971e9c80SNikolas Klauser 
9971e9c80SNikolas Klauser #ifndef _LIBCPP___FUNCTIONAL_BOYER_MOORE_SEARCHER_H
10971e9c80SNikolas Klauser #define _LIBCPP___FUNCTIONAL_BOYER_MOORE_SEARCHER_H
11971e9c80SNikolas Klauser 
12971e9c80SNikolas Klauser #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
13971e9c80SNikolas Klauser #  pragma GCC system_header
14971e9c80SNikolas Klauser #endif
15971e9c80SNikolas Klauser 
16971e9c80SNikolas Klauser #include <__algorithm/fill_n.h>
17971e9c80SNikolas Klauser #include <__config>
18971e9c80SNikolas Klauser #include <__functional/hash.h>
19971e9c80SNikolas Klauser #include <__functional/operations.h>
20971e9c80SNikolas Klauser #include <__iterator/distance.h>
21971e9c80SNikolas Klauser #include <__iterator/iterator_traits.h>
22971e9c80SNikolas Klauser #include <__memory/shared_ptr.h>
2379702f7fSIan Anderson #include <__type_traits/make_unsigned.h>
24971e9c80SNikolas Klauser #include <__utility/pair.h>
252e43a304SNikolas Klauser #include <__vector/vector.h>
26971e9c80SNikolas Klauser #include <array>
272e43a304SNikolas Klauser #include <limits>
28971e9c80SNikolas Klauser #include <unordered_map>
29971e9c80SNikolas Klauser 
304f15267dSNikolas Klauser #if _LIBCPP_STD_VER >= 17
31971e9c80SNikolas Klauser 
32971e9c80SNikolas Klauser _LIBCPP_PUSH_MACROS
33971e9c80SNikolas Klauser #  include <__undef_macros>
34971e9c80SNikolas Klauser 
35971e9c80SNikolas Klauser _LIBCPP_BEGIN_NAMESPACE_STD
36971e9c80SNikolas Klauser 
379783f28cSLouis Dionne template <class _Key, class _Value, class _Hash, class _BinaryPredicate, bool /*useArray*/>
38971e9c80SNikolas Klauser class _BMSkipTable;
39971e9c80SNikolas Klauser 
40971e9c80SNikolas Klauser // General case for BM data searching; use a map
419783f28cSLouis Dionne template <class _Key, class _Value, class _Hash, class _BinaryPredicate>
42971e9c80SNikolas Klauser class _BMSkipTable<_Key, _Value, _Hash, _BinaryPredicate, false> {
43971e9c80SNikolas Klauser private:
44971e9c80SNikolas Klauser   using value_type = _Value;
45971e9c80SNikolas Klauser   using key_type   = _Key;
46971e9c80SNikolas Klauser 
47971e9c80SNikolas Klauser   const value_type __default_value_;
48971e9c80SNikolas Klauser   unordered_map<_Key, _Value, _Hash, _BinaryPredicate> __table_;
49971e9c80SNikolas Klauser 
50971e9c80SNikolas Klauser public:
519783f28cSLouis Dionne   _LIBCPP_HIDE_FROM_ABI explicit _BMSkipTable(
529783f28cSLouis Dionne       size_t __sz, value_type __default_value, _Hash __hash, _BinaryPredicate __pred)
539783f28cSLouis Dionne       : __default_value_(__default_value), __table_(__sz, __hash, __pred) {}
54971e9c80SNikolas Klauser 
559783f28cSLouis Dionne   _LIBCPP_HIDE_FROM_ABI void insert(const key_type& __key, value_type __val) { __table_[__key] = __val; }
56971e9c80SNikolas Klauser 
57971e9c80SNikolas Klauser   _LIBCPP_HIDE_FROM_ABI value_type operator[](const key_type& __key) const {
58971e9c80SNikolas Klauser     auto __it = __table_.find(__key);
59971e9c80SNikolas Klauser     return __it == __table_.end() ? __default_value_ : __it->second;
60971e9c80SNikolas Klauser   }
61971e9c80SNikolas Klauser };
62971e9c80SNikolas Klauser 
63971e9c80SNikolas Klauser // Special case small numeric values; use an array
649783f28cSLouis Dionne template <class _Key, class _Value, class _Hash, class _BinaryPredicate>
65971e9c80SNikolas Klauser class _BMSkipTable<_Key, _Value, _Hash, _BinaryPredicate, true> {
66971e9c80SNikolas Klauser private:
67971e9c80SNikolas Klauser   using value_type = _Value;
68971e9c80SNikolas Klauser   using key_type   = _Key;
69971e9c80SNikolas Klauser 
70971e9c80SNikolas Klauser   using unsigned_key_type = make_unsigned_t<key_type>;
71971e9c80SNikolas Klauser   std::array<value_type, 256> __table_;
72971e9c80SNikolas Klauser   static_assert(numeric_limits<unsigned_key_type>::max() < 256);
73971e9c80SNikolas Klauser 
74971e9c80SNikolas Klauser public:
75971e9c80SNikolas Klauser   _LIBCPP_HIDE_FROM_ABI explicit _BMSkipTable(size_t, value_type __default_value, _Hash, _BinaryPredicate) {
76971e9c80SNikolas Klauser     std::fill_n(__table_.data(), __table_.size(), __default_value);
77971e9c80SNikolas Klauser   }
78971e9c80SNikolas Klauser 
79971e9c80SNikolas Klauser   _LIBCPP_HIDE_FROM_ABI void insert(key_type __key, value_type __val) {
80971e9c80SNikolas Klauser     __table_[static_cast<unsigned_key_type>(__key)] = __val;
81971e9c80SNikolas Klauser   }
82971e9c80SNikolas Klauser 
83971e9c80SNikolas Klauser   _LIBCPP_HIDE_FROM_ABI value_type operator[](key_type __key) const {
84971e9c80SNikolas Klauser     return __table_[static_cast<unsigned_key_type>(__key)];
85971e9c80SNikolas Klauser   }
86971e9c80SNikolas Klauser };
87971e9c80SNikolas Klauser 
88971e9c80SNikolas Klauser template <class _RandomAccessIterator1,
89971e9c80SNikolas Klauser           class _Hash            = hash<typename iterator_traits<_RandomAccessIterator1>::value_type>,
90971e9c80SNikolas Klauser           class _BinaryPredicate = equal_to<>>
91971e9c80SNikolas Klauser class _LIBCPP_TEMPLATE_VIS boyer_moore_searcher {
92971e9c80SNikolas Klauser private:
93971e9c80SNikolas Klauser   using difference_type = typename std::iterator_traits<_RandomAccessIterator1>::difference_type;
94971e9c80SNikolas Klauser   using value_type      = typename std::iterator_traits<_RandomAccessIterator1>::value_type;
95*f6958523SNikolas Klauser   using __skip_table_type _LIBCPP_NODEBUG =
969783f28cSLouis Dionne       _BMSkipTable<value_type,
97971e9c80SNikolas Klauser                    difference_type,
98971e9c80SNikolas Klauser                    _Hash,
99971e9c80SNikolas Klauser                    _BinaryPredicate,
1009783f28cSLouis Dionne                    is_integral_v<value_type> && sizeof(value_type) == 1 && is_same_v<_Hash, hash<value_type>> &&
1019783f28cSLouis Dionne                        is_same_v<_BinaryPredicate, equal_to<>>>;
102971e9c80SNikolas Klauser 
103971e9c80SNikolas Klauser public:
1049783f28cSLouis Dionne   _LIBCPP_HIDE_FROM_ABI boyer_moore_searcher(
1059783f28cSLouis Dionne       _RandomAccessIterator1 __first,
106971e9c80SNikolas Klauser       _RandomAccessIterator1 __last,
107971e9c80SNikolas Klauser       _Hash __hash            = _Hash(),
108971e9c80SNikolas Klauser       _BinaryPredicate __pred = _BinaryPredicate())
109971e9c80SNikolas Klauser       : __first_(__first),
110971e9c80SNikolas Klauser         __last_(__last),
111971e9c80SNikolas Klauser         __pred_(__pred),
112971e9c80SNikolas Klauser         __pattern_length_(__last - __first),
113971e9c80SNikolas Klauser         __skip_table_(std::make_shared<__skip_table_type>(__pattern_length_, -1, __hash, __pred_)),
114971e9c80SNikolas Klauser         __suffix_(std::__allocate_shared_unbounded_array<difference_type[]>(
115971e9c80SNikolas Klauser             allocator<difference_type>(), __pattern_length_ + 1)) {
116971e9c80SNikolas Klauser     difference_type __i = 0;
117971e9c80SNikolas Klauser     while (__first != __last) {
118971e9c80SNikolas Klauser       __skip_table_->insert(*__first, __i);
119971e9c80SNikolas Klauser       ++__first;
120971e9c80SNikolas Klauser       ++__i;
121971e9c80SNikolas Klauser     }
122971e9c80SNikolas Klauser     __build_suffix_table(__first_, __last_, __pred_);
123971e9c80SNikolas Klauser   }
124971e9c80SNikolas Klauser 
125971e9c80SNikolas Klauser   template <class _RandomAccessIterator2>
12683ce1397SNikolas Klauser   _LIBCPP_HIDE_FROM_ABI pair<_RandomAccessIterator2, _RandomAccessIterator2>
127971e9c80SNikolas Klauser   operator()(_RandomAccessIterator2 __first, _RandomAccessIterator2 __last) const {
128971e9c80SNikolas Klauser     static_assert(__is_same_uncvref<typename iterator_traits<_RandomAccessIterator1>::value_type,
129971e9c80SNikolas Klauser                                     typename iterator_traits<_RandomAccessIterator2>::value_type>::value,
130971e9c80SNikolas Klauser                   "Corpus and Pattern iterators must point to the same type");
131971e9c80SNikolas Klauser     if (__first == __last)
132971e9c80SNikolas Klauser       return std::make_pair(__last, __last);
133971e9c80SNikolas Klauser     if (__first_ == __last_)
134971e9c80SNikolas Klauser       return std::make_pair(__first, __first);
135971e9c80SNikolas Klauser 
136971e9c80SNikolas Klauser     if (__pattern_length_ > (__last - __first))
137971e9c80SNikolas Klauser       return std::make_pair(__last, __last);
138971e9c80SNikolas Klauser     return __search(__first, __last);
139971e9c80SNikolas Klauser   }
140971e9c80SNikolas Klauser 
141971e9c80SNikolas Klauser private:
142971e9c80SNikolas Klauser   _RandomAccessIterator1 __first_;
143971e9c80SNikolas Klauser   _RandomAccessIterator1 __last_;
144971e9c80SNikolas Klauser   _BinaryPredicate __pred_;
145971e9c80SNikolas Klauser   difference_type __pattern_length_;
146971e9c80SNikolas Klauser   shared_ptr<__skip_table_type> __skip_table_;
147971e9c80SNikolas Klauser   shared_ptr<difference_type[]> __suffix_;
148971e9c80SNikolas Klauser 
149971e9c80SNikolas Klauser   template <class _RandomAccessIterator2>
15083ce1397SNikolas Klauser   _LIBCPP_HIDE_FROM_ABI pair<_RandomAccessIterator2, _RandomAccessIterator2>
151971e9c80SNikolas Klauser   __search(_RandomAccessIterator2 __f, _RandomAccessIterator2 __l) const {
152971e9c80SNikolas Klauser     _RandomAccessIterator2 __current      = __f;
153971e9c80SNikolas Klauser     const _RandomAccessIterator2 __last   = __l - __pattern_length_;
154971e9c80SNikolas Klauser     const __skip_table_type& __skip_table = *__skip_table_;
155971e9c80SNikolas Klauser 
156971e9c80SNikolas Klauser     while (__current <= __last) {
157971e9c80SNikolas Klauser       difference_type __j = __pattern_length_;
158971e9c80SNikolas Klauser       while (__pred_(__first_[__j - 1], __current[__j - 1])) {
159971e9c80SNikolas Klauser         --__j;
160971e9c80SNikolas Klauser         if (__j == 0)
161971e9c80SNikolas Klauser           return std::make_pair(__current, __current + __pattern_length_);
162971e9c80SNikolas Klauser       }
163971e9c80SNikolas Klauser 
164971e9c80SNikolas Klauser       difference_type __k = __skip_table[__current[__j - 1]];
165971e9c80SNikolas Klauser       difference_type __m = __j - __k - 1;
166971e9c80SNikolas Klauser       if (__k < __j && __m > __suffix_[__j])
167971e9c80SNikolas Klauser         __current += __m;
168971e9c80SNikolas Klauser       else
169971e9c80SNikolas Klauser         __current += __suffix_[__j];
170971e9c80SNikolas Klauser     }
171971e9c80SNikolas Klauser     return std::make_pair(__l, __l);
172971e9c80SNikolas Klauser   }
173971e9c80SNikolas Klauser 
174971e9c80SNikolas Klauser   template <class _Iterator, class _Container>
17583ce1397SNikolas Klauser   _LIBCPP_HIDE_FROM_ABI void
17683ce1397SNikolas Klauser   __compute_bm_prefix(_Iterator __first, _Iterator __last, _BinaryPredicate __pred, _Container& __prefix) {
177971e9c80SNikolas Klauser     const size_t __count = __last - __first;
178971e9c80SNikolas Klauser 
179971e9c80SNikolas Klauser     __prefix[0] = 0;
180971e9c80SNikolas Klauser     size_t __k  = 0;
181971e9c80SNikolas Klauser 
182971e9c80SNikolas Klauser     for (size_t __i = 1; __i != __count; ++__i) {
183971e9c80SNikolas Klauser       while (__k > 0 && !__pred(__first[__k], __first[__i]))
184971e9c80SNikolas Klauser         __k = __prefix[__k - 1];
185971e9c80SNikolas Klauser 
186971e9c80SNikolas Klauser       if (__pred(__first[__k], __first[__i]))
187971e9c80SNikolas Klauser         ++__k;
188971e9c80SNikolas Klauser       __prefix[__i] = __k;
189971e9c80SNikolas Klauser     }
190971e9c80SNikolas Klauser   }
191971e9c80SNikolas Klauser 
19283ce1397SNikolas Klauser   _LIBCPP_HIDE_FROM_ABI void
19383ce1397SNikolas Klauser   __build_suffix_table(_RandomAccessIterator1 __first, _RandomAccessIterator1 __last, _BinaryPredicate __pred) {
194971e9c80SNikolas Klauser     const size_t __count = __last - __first;
195971e9c80SNikolas Klauser 
196971e9c80SNikolas Klauser     if (__count == 0)
197971e9c80SNikolas Klauser       return;
198971e9c80SNikolas Klauser 
199971e9c80SNikolas Klauser     vector<difference_type> __scratch(__count);
200971e9c80SNikolas Klauser 
201971e9c80SNikolas Klauser     __compute_bm_prefix(__first, __last, __pred, __scratch);
202971e9c80SNikolas Klauser     for (size_t __i = 0; __i <= __count; ++__i)
203971e9c80SNikolas Klauser       __suffix_[__i] = __count - __scratch[__count - 1];
204971e9c80SNikolas Klauser 
205971e9c80SNikolas Klauser     using _ReverseIter = reverse_iterator<_RandomAccessIterator1>;
206971e9c80SNikolas Klauser     __compute_bm_prefix(_ReverseIter(__last), _ReverseIter(__first), __pred, __scratch);
207971e9c80SNikolas Klauser 
208971e9c80SNikolas Klauser     for (size_t __i = 0; __i != __count; ++__i) {
209971e9c80SNikolas Klauser       const size_t __j          = __count - __scratch[__i];
210971e9c80SNikolas Klauser       const difference_type __k = __i - __scratch[__i] + 1;
211971e9c80SNikolas Klauser 
212971e9c80SNikolas Klauser       if (__suffix_[__j] > __k)
213971e9c80SNikolas Klauser         __suffix_[__j] = __k;
214971e9c80SNikolas Klauser     }
215971e9c80SNikolas Klauser   }
216971e9c80SNikolas Klauser };
217c2df7076SLouis Dionne _LIBCPP_CTAD_SUPPORTED_FOR_TYPE(boyer_moore_searcher);
218971e9c80SNikolas Klauser 
219971e9c80SNikolas Klauser template <class _RandomAccessIterator1,
220971e9c80SNikolas Klauser           class _Hash            = hash<typename iterator_traits<_RandomAccessIterator1>::value_type>,
221971e9c80SNikolas Klauser           class _BinaryPredicate = equal_to<>>
222971e9c80SNikolas Klauser class _LIBCPP_TEMPLATE_VIS boyer_moore_horspool_searcher {
223971e9c80SNikolas Klauser private:
224971e9c80SNikolas Klauser   using difference_type = typename iterator_traits<_RandomAccessIterator1>::difference_type;
225971e9c80SNikolas Klauser   using value_type      = typename iterator_traits<_RandomAccessIterator1>::value_type;
226*f6958523SNikolas Klauser   using __skip_table_type _LIBCPP_NODEBUG =
2279783f28cSLouis Dionne       _BMSkipTable<value_type,
228971e9c80SNikolas Klauser                    difference_type,
229971e9c80SNikolas Klauser                    _Hash,
230971e9c80SNikolas Klauser                    _BinaryPredicate,
2319783f28cSLouis Dionne                    is_integral_v<value_type> && sizeof(value_type) == 1 && is_same_v<_Hash, hash<value_type>> &&
2329783f28cSLouis Dionne                        is_same_v<_BinaryPredicate, equal_to<>>>;
2339783f28cSLouis Dionne 
234971e9c80SNikolas Klauser public:
2359783f28cSLouis Dionne   _LIBCPP_HIDE_FROM_ABI boyer_moore_horspool_searcher(
2369783f28cSLouis Dionne       _RandomAccessIterator1 __first,
237971e9c80SNikolas Klauser       _RandomAccessIterator1 __last,
238971e9c80SNikolas Klauser       _Hash __hash            = _Hash(),
239971e9c80SNikolas Klauser       _BinaryPredicate __pred = _BinaryPredicate())
240971e9c80SNikolas Klauser       : __first_(__first),
241971e9c80SNikolas Klauser         __last_(__last),
242971e9c80SNikolas Klauser         __pred_(__pred),
243971e9c80SNikolas Klauser         __pattern_length_(__last - __first),
244971e9c80SNikolas Klauser         __skip_table_(std::make_shared<__skip_table_type>(__pattern_length_, __pattern_length_, __hash, __pred_)) {
245971e9c80SNikolas Klauser     if (__first == __last)
246971e9c80SNikolas Klauser       return;
247971e9c80SNikolas Klauser     --__last;
248971e9c80SNikolas Klauser     difference_type __i = 0;
249971e9c80SNikolas Klauser     while (__first != __last) {
250971e9c80SNikolas Klauser       __skip_table_->insert(*__first, __pattern_length_ - 1 - __i);
251971e9c80SNikolas Klauser       ++__first;
252971e9c80SNikolas Klauser       ++__i;
253971e9c80SNikolas Klauser     }
254971e9c80SNikolas Klauser   }
255971e9c80SNikolas Klauser 
256971e9c80SNikolas Klauser   template <class _RandomAccessIterator2>
25783ce1397SNikolas Klauser   _LIBCPP_HIDE_FROM_ABI pair<_RandomAccessIterator2, _RandomAccessIterator2>
258971e9c80SNikolas Klauser   operator()(_RandomAccessIterator2 __first, _RandomAccessIterator2 __last) const {
259971e9c80SNikolas Klauser     static_assert(__is_same_uncvref<typename std::iterator_traits<_RandomAccessIterator1>::value_type,
260971e9c80SNikolas Klauser                                     typename std::iterator_traits<_RandomAccessIterator2>::value_type>::value,
261971e9c80SNikolas Klauser                   "Corpus and Pattern iterators must point to the same type");
262971e9c80SNikolas Klauser     if (__first == __last)
263971e9c80SNikolas Klauser       return std::make_pair(__last, __last);
264971e9c80SNikolas Klauser     if (__first_ == __last_)
265971e9c80SNikolas Klauser       return std::make_pair(__first, __first);
266971e9c80SNikolas Klauser 
267971e9c80SNikolas Klauser     if (__pattern_length_ > __last - __first)
268971e9c80SNikolas Klauser       return std::make_pair(__last, __last);
269971e9c80SNikolas Klauser 
270971e9c80SNikolas Klauser     return __search(__first, __last);
271971e9c80SNikolas Klauser   }
272971e9c80SNikolas Klauser 
273971e9c80SNikolas Klauser private:
274971e9c80SNikolas Klauser   _RandomAccessIterator1 __first_;
275971e9c80SNikolas Klauser   _RandomAccessIterator1 __last_;
276971e9c80SNikolas Klauser   _BinaryPredicate __pred_;
277971e9c80SNikolas Klauser   difference_type __pattern_length_;
278971e9c80SNikolas Klauser   shared_ptr<__skip_table_type> __skip_table_;
279971e9c80SNikolas Klauser 
280971e9c80SNikolas Klauser   template <class _RandomAccessIterator2>
28183ce1397SNikolas Klauser   _LIBCPP_HIDE_FROM_ABI pair<_RandomAccessIterator2, _RandomAccessIterator2>
282971e9c80SNikolas Klauser   __search(_RandomAccessIterator2 __f, _RandomAccessIterator2 __l) const {
283971e9c80SNikolas Klauser     _RandomAccessIterator2 __current      = __f;
284971e9c80SNikolas Klauser     const _RandomAccessIterator2 __last   = __l - __pattern_length_;
285971e9c80SNikolas Klauser     const __skip_table_type& __skip_table = *__skip_table_;
286971e9c80SNikolas Klauser 
287971e9c80SNikolas Klauser     while (__current <= __last) {
288971e9c80SNikolas Klauser       difference_type __j = __pattern_length_;
289971e9c80SNikolas Klauser       while (__pred_(__first_[__j - 1], __current[__j - 1])) {
290971e9c80SNikolas Klauser         --__j;
291971e9c80SNikolas Klauser         if (__j == 0)
292971e9c80SNikolas Klauser           return std::make_pair(__current, __current + __pattern_length_);
293971e9c80SNikolas Klauser       }
294971e9c80SNikolas Klauser       __current += __skip_table[__current[__pattern_length_ - 1]];
295971e9c80SNikolas Klauser     }
296971e9c80SNikolas Klauser     return std::make_pair(__l, __l);
297971e9c80SNikolas Klauser   }
298971e9c80SNikolas Klauser };
299c2df7076SLouis Dionne _LIBCPP_CTAD_SUPPORTED_FOR_TYPE(boyer_moore_horspool_searcher);
300971e9c80SNikolas Klauser 
301971e9c80SNikolas Klauser _LIBCPP_END_NAMESPACE_STD
302971e9c80SNikolas Klauser 
303971e9c80SNikolas Klauser _LIBCPP_POP_MACROS
304971e9c80SNikolas Klauser 
3054f15267dSNikolas Klauser #endif // _LIBCPP_STD_VER >= 17
306971e9c80SNikolas Klauser 
307971e9c80SNikolas Klauser #endif // _LIBCPP___FUNCTIONAL_BOYER_MOORE_SEARCHER_H
308