xref: /llvm-project/libcxx/include/__functional/boyer_moore_searcher.h (revision f69585235ec85d54e0f3fc41b2d5700430907f99)
1 //===----------------------------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef _LIBCPP___FUNCTIONAL_BOYER_MOORE_SEARCHER_H
10 #define _LIBCPP___FUNCTIONAL_BOYER_MOORE_SEARCHER_H
11 
12 #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
13 #  pragma GCC system_header
14 #endif
15 
16 #include <__algorithm/fill_n.h>
17 #include <__config>
18 #include <__functional/hash.h>
19 #include <__functional/operations.h>
20 #include <__iterator/distance.h>
21 #include <__iterator/iterator_traits.h>
22 #include <__memory/shared_ptr.h>
23 #include <__type_traits/make_unsigned.h>
24 #include <__utility/pair.h>
25 #include <__vector/vector.h>
26 #include <array>
27 #include <limits>
28 #include <unordered_map>
29 
30 #if _LIBCPP_STD_VER >= 17
31 
32 _LIBCPP_PUSH_MACROS
33 #  include <__undef_macros>
34 
35 _LIBCPP_BEGIN_NAMESPACE_STD
36 
37 template <class _Key, class _Value, class _Hash, class _BinaryPredicate, bool /*useArray*/>
38 class _BMSkipTable;
39 
40 // General case for BM data searching; use a map
41 template <class _Key, class _Value, class _Hash, class _BinaryPredicate>
42 class _BMSkipTable<_Key, _Value, _Hash, _BinaryPredicate, false> {
43 private:
44   using value_type = _Value;
45   using key_type   = _Key;
46 
47   const value_type __default_value_;
48   unordered_map<_Key, _Value, _Hash, _BinaryPredicate> __table_;
49 
50 public:
51   _LIBCPP_HIDE_FROM_ABI explicit _BMSkipTable(
52       size_t __sz, value_type __default_value, _Hash __hash, _BinaryPredicate __pred)
53       : __default_value_(__default_value), __table_(__sz, __hash, __pred) {}
54 
55   _LIBCPP_HIDE_FROM_ABI void insert(const key_type& __key, value_type __val) { __table_[__key] = __val; }
56 
57   _LIBCPP_HIDE_FROM_ABI value_type operator[](const key_type& __key) const {
58     auto __it = __table_.find(__key);
59     return __it == __table_.end() ? __default_value_ : __it->second;
60   }
61 };
62 
63 // Special case small numeric values; use an array
64 template <class _Key, class _Value, class _Hash, class _BinaryPredicate>
65 class _BMSkipTable<_Key, _Value, _Hash, _BinaryPredicate, true> {
66 private:
67   using value_type = _Value;
68   using key_type   = _Key;
69 
70   using unsigned_key_type = make_unsigned_t<key_type>;
71   std::array<value_type, 256> __table_;
72   static_assert(numeric_limits<unsigned_key_type>::max() < 256);
73 
74 public:
75   _LIBCPP_HIDE_FROM_ABI explicit _BMSkipTable(size_t, value_type __default_value, _Hash, _BinaryPredicate) {
76     std::fill_n(__table_.data(), __table_.size(), __default_value);
77   }
78 
79   _LIBCPP_HIDE_FROM_ABI void insert(key_type __key, value_type __val) {
80     __table_[static_cast<unsigned_key_type>(__key)] = __val;
81   }
82 
83   _LIBCPP_HIDE_FROM_ABI value_type operator[](key_type __key) const {
84     return __table_[static_cast<unsigned_key_type>(__key)];
85   }
86 };
87 
88 template <class _RandomAccessIterator1,
89           class _Hash            = hash<typename iterator_traits<_RandomAccessIterator1>::value_type>,
90           class _BinaryPredicate = equal_to<>>
91 class _LIBCPP_TEMPLATE_VIS boyer_moore_searcher {
92 private:
93   using difference_type = typename std::iterator_traits<_RandomAccessIterator1>::difference_type;
94   using value_type      = typename std::iterator_traits<_RandomAccessIterator1>::value_type;
95   using __skip_table_type _LIBCPP_NODEBUG =
96       _BMSkipTable<value_type,
97                    difference_type,
98                    _Hash,
99                    _BinaryPredicate,
100                    is_integral_v<value_type> && sizeof(value_type) == 1 && is_same_v<_Hash, hash<value_type>> &&
101                        is_same_v<_BinaryPredicate, equal_to<>>>;
102 
103 public:
104   _LIBCPP_HIDE_FROM_ABI boyer_moore_searcher(
105       _RandomAccessIterator1 __first,
106       _RandomAccessIterator1 __last,
107       _Hash __hash            = _Hash(),
108       _BinaryPredicate __pred = _BinaryPredicate())
109       : __first_(__first),
110         __last_(__last),
111         __pred_(__pred),
112         __pattern_length_(__last - __first),
113         __skip_table_(std::make_shared<__skip_table_type>(__pattern_length_, -1, __hash, __pred_)),
114         __suffix_(std::__allocate_shared_unbounded_array<difference_type[]>(
115             allocator<difference_type>(), __pattern_length_ + 1)) {
116     difference_type __i = 0;
117     while (__first != __last) {
118       __skip_table_->insert(*__first, __i);
119       ++__first;
120       ++__i;
121     }
122     __build_suffix_table(__first_, __last_, __pred_);
123   }
124 
125   template <class _RandomAccessIterator2>
126   _LIBCPP_HIDE_FROM_ABI pair<_RandomAccessIterator2, _RandomAccessIterator2>
127   operator()(_RandomAccessIterator2 __first, _RandomAccessIterator2 __last) const {
128     static_assert(__is_same_uncvref<typename iterator_traits<_RandomAccessIterator1>::value_type,
129                                     typename iterator_traits<_RandomAccessIterator2>::value_type>::value,
130                   "Corpus and Pattern iterators must point to the same type");
131     if (__first == __last)
132       return std::make_pair(__last, __last);
133     if (__first_ == __last_)
134       return std::make_pair(__first, __first);
135 
136     if (__pattern_length_ > (__last - __first))
137       return std::make_pair(__last, __last);
138     return __search(__first, __last);
139   }
140 
141 private:
142   _RandomAccessIterator1 __first_;
143   _RandomAccessIterator1 __last_;
144   _BinaryPredicate __pred_;
145   difference_type __pattern_length_;
146   shared_ptr<__skip_table_type> __skip_table_;
147   shared_ptr<difference_type[]> __suffix_;
148 
149   template <class _RandomAccessIterator2>
150   _LIBCPP_HIDE_FROM_ABI pair<_RandomAccessIterator2, _RandomAccessIterator2>
151   __search(_RandomAccessIterator2 __f, _RandomAccessIterator2 __l) const {
152     _RandomAccessIterator2 __current      = __f;
153     const _RandomAccessIterator2 __last   = __l - __pattern_length_;
154     const __skip_table_type& __skip_table = *__skip_table_;
155 
156     while (__current <= __last) {
157       difference_type __j = __pattern_length_;
158       while (__pred_(__first_[__j - 1], __current[__j - 1])) {
159         --__j;
160         if (__j == 0)
161           return std::make_pair(__current, __current + __pattern_length_);
162       }
163 
164       difference_type __k = __skip_table[__current[__j - 1]];
165       difference_type __m = __j - __k - 1;
166       if (__k < __j && __m > __suffix_[__j])
167         __current += __m;
168       else
169         __current += __suffix_[__j];
170     }
171     return std::make_pair(__l, __l);
172   }
173 
174   template <class _Iterator, class _Container>
175   _LIBCPP_HIDE_FROM_ABI void
176   __compute_bm_prefix(_Iterator __first, _Iterator __last, _BinaryPredicate __pred, _Container& __prefix) {
177     const size_t __count = __last - __first;
178 
179     __prefix[0] = 0;
180     size_t __k  = 0;
181 
182     for (size_t __i = 1; __i != __count; ++__i) {
183       while (__k > 0 && !__pred(__first[__k], __first[__i]))
184         __k = __prefix[__k - 1];
185 
186       if (__pred(__first[__k], __first[__i]))
187         ++__k;
188       __prefix[__i] = __k;
189     }
190   }
191 
192   _LIBCPP_HIDE_FROM_ABI void
193   __build_suffix_table(_RandomAccessIterator1 __first, _RandomAccessIterator1 __last, _BinaryPredicate __pred) {
194     const size_t __count = __last - __first;
195 
196     if (__count == 0)
197       return;
198 
199     vector<difference_type> __scratch(__count);
200 
201     __compute_bm_prefix(__first, __last, __pred, __scratch);
202     for (size_t __i = 0; __i <= __count; ++__i)
203       __suffix_[__i] = __count - __scratch[__count - 1];
204 
205     using _ReverseIter = reverse_iterator<_RandomAccessIterator1>;
206     __compute_bm_prefix(_ReverseIter(__last), _ReverseIter(__first), __pred, __scratch);
207 
208     for (size_t __i = 0; __i != __count; ++__i) {
209       const size_t __j          = __count - __scratch[__i];
210       const difference_type __k = __i - __scratch[__i] + 1;
211 
212       if (__suffix_[__j] > __k)
213         __suffix_[__j] = __k;
214     }
215   }
216 };
217 _LIBCPP_CTAD_SUPPORTED_FOR_TYPE(boyer_moore_searcher);
218 
219 template <class _RandomAccessIterator1,
220           class _Hash            = hash<typename iterator_traits<_RandomAccessIterator1>::value_type>,
221           class _BinaryPredicate = equal_to<>>
222 class _LIBCPP_TEMPLATE_VIS boyer_moore_horspool_searcher {
223 private:
224   using difference_type = typename iterator_traits<_RandomAccessIterator1>::difference_type;
225   using value_type      = typename iterator_traits<_RandomAccessIterator1>::value_type;
226   using __skip_table_type _LIBCPP_NODEBUG =
227       _BMSkipTable<value_type,
228                    difference_type,
229                    _Hash,
230                    _BinaryPredicate,
231                    is_integral_v<value_type> && sizeof(value_type) == 1 && is_same_v<_Hash, hash<value_type>> &&
232                        is_same_v<_BinaryPredicate, equal_to<>>>;
233 
234 public:
235   _LIBCPP_HIDE_FROM_ABI boyer_moore_horspool_searcher(
236       _RandomAccessIterator1 __first,
237       _RandomAccessIterator1 __last,
238       _Hash __hash            = _Hash(),
239       _BinaryPredicate __pred = _BinaryPredicate())
240       : __first_(__first),
241         __last_(__last),
242         __pred_(__pred),
243         __pattern_length_(__last - __first),
244         __skip_table_(std::make_shared<__skip_table_type>(__pattern_length_, __pattern_length_, __hash, __pred_)) {
245     if (__first == __last)
246       return;
247     --__last;
248     difference_type __i = 0;
249     while (__first != __last) {
250       __skip_table_->insert(*__first, __pattern_length_ - 1 - __i);
251       ++__first;
252       ++__i;
253     }
254   }
255 
256   template <class _RandomAccessIterator2>
257   _LIBCPP_HIDE_FROM_ABI pair<_RandomAccessIterator2, _RandomAccessIterator2>
258   operator()(_RandomAccessIterator2 __first, _RandomAccessIterator2 __last) const {
259     static_assert(__is_same_uncvref<typename std::iterator_traits<_RandomAccessIterator1>::value_type,
260                                     typename std::iterator_traits<_RandomAccessIterator2>::value_type>::value,
261                   "Corpus and Pattern iterators must point to the same type");
262     if (__first == __last)
263       return std::make_pair(__last, __last);
264     if (__first_ == __last_)
265       return std::make_pair(__first, __first);
266 
267     if (__pattern_length_ > __last - __first)
268       return std::make_pair(__last, __last);
269 
270     return __search(__first, __last);
271   }
272 
273 private:
274   _RandomAccessIterator1 __first_;
275   _RandomAccessIterator1 __last_;
276   _BinaryPredicate __pred_;
277   difference_type __pattern_length_;
278   shared_ptr<__skip_table_type> __skip_table_;
279 
280   template <class _RandomAccessIterator2>
281   _LIBCPP_HIDE_FROM_ABI pair<_RandomAccessIterator2, _RandomAccessIterator2>
282   __search(_RandomAccessIterator2 __f, _RandomAccessIterator2 __l) const {
283     _RandomAccessIterator2 __current      = __f;
284     const _RandomAccessIterator2 __last   = __l - __pattern_length_;
285     const __skip_table_type& __skip_table = *__skip_table_;
286 
287     while (__current <= __last) {
288       difference_type __j = __pattern_length_;
289       while (__pred_(__first_[__j - 1], __current[__j - 1])) {
290         --__j;
291         if (__j == 0)
292           return std::make_pair(__current, __current + __pattern_length_);
293       }
294       __current += __skip_table[__current[__pattern_length_ - 1]];
295     }
296     return std::make_pair(__l, __l);
297   }
298 };
299 _LIBCPP_CTAD_SUPPORTED_FOR_TYPE(boyer_moore_horspool_searcher);
300 
301 _LIBCPP_END_NAMESPACE_STD
302 
303 _LIBCPP_POP_MACROS
304 
305 #endif // _LIBCPP_STD_VER >= 17
306 
307 #endif // _LIBCPP___FUNCTIONAL_BOYER_MOORE_SEARCHER_H
308