xref: /freebsd-src/contrib/llvm-project/libcxx/include/__functional/boyer_moore_searcher.h (revision cb14a3fe5122c879eae1fb480ed7ce82a699ddb6)
181ad6265SDimitry Andric //===----------------------------------------------------------------------===//
281ad6265SDimitry Andric //
381ad6265SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
481ad6265SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
581ad6265SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
681ad6265SDimitry Andric //
781ad6265SDimitry Andric //===----------------------------------------------------------------------===//
881ad6265SDimitry Andric 
981ad6265SDimitry Andric #ifndef _LIBCPP___FUNCTIONAL_BOYER_MOORE_SEARCHER_H
1081ad6265SDimitry Andric #define _LIBCPP___FUNCTIONAL_BOYER_MOORE_SEARCHER_H
1181ad6265SDimitry Andric 
1281ad6265SDimitry Andric #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
1381ad6265SDimitry Andric #  pragma GCC system_header
1481ad6265SDimitry Andric #endif
1581ad6265SDimitry Andric 
1681ad6265SDimitry Andric #include <__algorithm/fill_n.h>
1781ad6265SDimitry Andric #include <__config>
1881ad6265SDimitry Andric #include <__functional/hash.h>
1981ad6265SDimitry Andric #include <__functional/operations.h>
2081ad6265SDimitry Andric #include <__iterator/distance.h>
2181ad6265SDimitry Andric #include <__iterator/iterator_traits.h>
2281ad6265SDimitry Andric #include <__memory/shared_ptr.h>
2306c3fb27SDimitry Andric #include <__type_traits/make_unsigned.h>
2481ad6265SDimitry Andric #include <__utility/pair.h>
2581ad6265SDimitry Andric #include <array>
2681ad6265SDimitry Andric #include <unordered_map>
2781ad6265SDimitry Andric #include <vector>
2881ad6265SDimitry Andric 
2906c3fb27SDimitry Andric #if _LIBCPP_STD_VER >= 17
3081ad6265SDimitry Andric 
3181ad6265SDimitry Andric _LIBCPP_PUSH_MACROS
3281ad6265SDimitry Andric #  include <__undef_macros>
3381ad6265SDimitry Andric 
3481ad6265SDimitry Andric _LIBCPP_BEGIN_NAMESPACE_STD
3581ad6265SDimitry Andric 
36*cb14a3feSDimitry Andric template <class _Key, class _Value, class _Hash, class _BinaryPredicate, bool /*useArray*/>
3781ad6265SDimitry Andric class _BMSkipTable;
3881ad6265SDimitry Andric 
3981ad6265SDimitry Andric // General case for BM data searching; use a map
40*cb14a3feSDimitry Andric template <class _Key, class _Value, class _Hash, class _BinaryPredicate>
4181ad6265SDimitry Andric class _BMSkipTable<_Key, _Value, _Hash, _BinaryPredicate, false> {
4281ad6265SDimitry Andric private:
4381ad6265SDimitry Andric   using value_type = _Value;
4481ad6265SDimitry Andric   using key_type   = _Key;
4581ad6265SDimitry Andric 
4681ad6265SDimitry Andric   const value_type __default_value_;
4781ad6265SDimitry Andric   unordered_map<_Key, _Value, _Hash, _BinaryPredicate> __table_;
4881ad6265SDimitry Andric 
4981ad6265SDimitry Andric public:
_BMSkipTable(size_t __sz,value_type __default_value,_Hash __hash,_BinaryPredicate __pred)50*cb14a3feSDimitry Andric   _LIBCPP_HIDE_FROM_ABI explicit _BMSkipTable(
51*cb14a3feSDimitry Andric       size_t __sz, value_type __default_value, _Hash __hash, _BinaryPredicate __pred)
52*cb14a3feSDimitry Andric       : __default_value_(__default_value), __table_(__sz, __hash, __pred) {}
5381ad6265SDimitry Andric 
insert(const key_type & __key,value_type __val)54*cb14a3feSDimitry Andric   _LIBCPP_HIDE_FROM_ABI void insert(const key_type& __key, value_type __val) { __table_[__key] = __val; }
5581ad6265SDimitry Andric 
5681ad6265SDimitry Andric   _LIBCPP_HIDE_FROM_ABI value_type operator[](const key_type& __key) const {
5781ad6265SDimitry Andric     auto __it = __table_.find(__key);
5881ad6265SDimitry Andric     return __it == __table_.end() ? __default_value_ : __it->second;
5981ad6265SDimitry Andric   }
6081ad6265SDimitry Andric };
6181ad6265SDimitry Andric 
6281ad6265SDimitry Andric // Special case small numeric values; use an array
63*cb14a3feSDimitry Andric template <class _Key, class _Value, class _Hash, class _BinaryPredicate>
6481ad6265SDimitry Andric class _BMSkipTable<_Key, _Value, _Hash, _BinaryPredicate, true> {
6581ad6265SDimitry Andric private:
6681ad6265SDimitry Andric   using value_type = _Value;
6781ad6265SDimitry Andric   using key_type   = _Key;
6881ad6265SDimitry Andric 
6981ad6265SDimitry Andric   using unsigned_key_type = make_unsigned_t<key_type>;
7081ad6265SDimitry Andric   std::array<value_type, 256> __table_;
7181ad6265SDimitry Andric   static_assert(numeric_limits<unsigned_key_type>::max() < 256);
7281ad6265SDimitry Andric 
7381ad6265SDimitry Andric public:
_BMSkipTable(size_t,value_type __default_value,_Hash,_BinaryPredicate)7481ad6265SDimitry Andric   _LIBCPP_HIDE_FROM_ABI explicit _BMSkipTable(size_t, value_type __default_value, _Hash, _BinaryPredicate) {
7581ad6265SDimitry Andric     std::fill_n(__table_.data(), __table_.size(), __default_value);
7681ad6265SDimitry Andric   }
7781ad6265SDimitry Andric 
insert(key_type __key,value_type __val)7881ad6265SDimitry Andric   _LIBCPP_HIDE_FROM_ABI void insert(key_type __key, value_type __val) {
7981ad6265SDimitry Andric     __table_[static_cast<unsigned_key_type>(__key)] = __val;
8081ad6265SDimitry Andric   }
8181ad6265SDimitry Andric 
8281ad6265SDimitry Andric   _LIBCPP_HIDE_FROM_ABI value_type operator[](key_type __key) const {
8381ad6265SDimitry Andric     return __table_[static_cast<unsigned_key_type>(__key)];
8481ad6265SDimitry Andric   }
8581ad6265SDimitry Andric };
8681ad6265SDimitry Andric 
8781ad6265SDimitry Andric template <class _RandomAccessIterator1,
8881ad6265SDimitry Andric           class _Hash            = hash<typename iterator_traits<_RandomAccessIterator1>::value_type>,
8981ad6265SDimitry Andric           class _BinaryPredicate = equal_to<>>
9081ad6265SDimitry Andric class _LIBCPP_TEMPLATE_VIS boyer_moore_searcher {
9181ad6265SDimitry Andric private:
9281ad6265SDimitry Andric   using difference_type = typename std::iterator_traits<_RandomAccessIterator1>::difference_type;
9381ad6265SDimitry Andric   using value_type      = typename std::iterator_traits<_RandomAccessIterator1>::value_type;
94*cb14a3feSDimitry Andric   using __skip_table_type =
95*cb14a3feSDimitry Andric       _BMSkipTable<value_type,
9681ad6265SDimitry Andric                    difference_type,
9781ad6265SDimitry Andric                    _Hash,
9881ad6265SDimitry Andric                    _BinaryPredicate,
99*cb14a3feSDimitry Andric                    is_integral_v<value_type> && sizeof(value_type) == 1 && is_same_v<_Hash, hash<value_type>> &&
100*cb14a3feSDimitry Andric                        is_same_v<_BinaryPredicate, equal_to<>>>;
10181ad6265SDimitry Andric 
10281ad6265SDimitry Andric public:
103*cb14a3feSDimitry Andric   _LIBCPP_HIDE_FROM_ABI boyer_moore_searcher(
104*cb14a3feSDimitry Andric       _RandomAccessIterator1 __first,
10581ad6265SDimitry Andric       _RandomAccessIterator1 __last,
10681ad6265SDimitry Andric       _Hash __hash            = _Hash(),
10781ad6265SDimitry Andric       _BinaryPredicate __pred = _BinaryPredicate())
__first_(__first)10881ad6265SDimitry Andric       : __first_(__first),
10981ad6265SDimitry Andric         __last_(__last),
11081ad6265SDimitry Andric         __pred_(__pred),
11181ad6265SDimitry Andric         __pattern_length_(__last - __first),
11281ad6265SDimitry Andric         __skip_table_(std::make_shared<__skip_table_type>(__pattern_length_, -1, __hash, __pred_)),
11381ad6265SDimitry Andric         __suffix_(std::__allocate_shared_unbounded_array<difference_type[]>(
11481ad6265SDimitry Andric             allocator<difference_type>(), __pattern_length_ + 1)) {
11581ad6265SDimitry Andric     difference_type __i = 0;
11681ad6265SDimitry Andric     while (__first != __last) {
11781ad6265SDimitry Andric       __skip_table_->insert(*__first, __i);
11881ad6265SDimitry Andric       ++__first;
11981ad6265SDimitry Andric       ++__i;
12081ad6265SDimitry Andric     }
12181ad6265SDimitry Andric     __build_suffix_table(__first_, __last_, __pred_);
12281ad6265SDimitry Andric   }
12381ad6265SDimitry Andric 
12481ad6265SDimitry Andric   template <class _RandomAccessIterator2>
12506c3fb27SDimitry Andric   _LIBCPP_HIDE_FROM_ABI pair<_RandomAccessIterator2, _RandomAccessIterator2>
operator()12681ad6265SDimitry Andric   operator()(_RandomAccessIterator2 __first, _RandomAccessIterator2 __last) const {
12781ad6265SDimitry Andric     static_assert(__is_same_uncvref<typename iterator_traits<_RandomAccessIterator1>::value_type,
12881ad6265SDimitry Andric                                     typename iterator_traits<_RandomAccessIterator2>::value_type>::value,
12981ad6265SDimitry Andric                   "Corpus and Pattern iterators must point to the same type");
13081ad6265SDimitry Andric     if (__first == __last)
13181ad6265SDimitry Andric       return std::make_pair(__last, __last);
13281ad6265SDimitry Andric     if (__first_ == __last_)
13381ad6265SDimitry Andric       return std::make_pair(__first, __first);
13481ad6265SDimitry Andric 
13581ad6265SDimitry Andric     if (__pattern_length_ > (__last - __first))
13681ad6265SDimitry Andric       return std::make_pair(__last, __last);
13781ad6265SDimitry Andric     return __search(__first, __last);
13881ad6265SDimitry Andric   }
13981ad6265SDimitry Andric 
14081ad6265SDimitry Andric private:
14181ad6265SDimitry Andric   _RandomAccessIterator1 __first_;
14281ad6265SDimitry Andric   _RandomAccessIterator1 __last_;
14381ad6265SDimitry Andric   _BinaryPredicate __pred_;
14481ad6265SDimitry Andric   difference_type __pattern_length_;
14581ad6265SDimitry Andric   shared_ptr<__skip_table_type> __skip_table_;
14681ad6265SDimitry Andric   shared_ptr<difference_type[]> __suffix_;
14781ad6265SDimitry Andric 
14881ad6265SDimitry Andric   template <class _RandomAccessIterator2>
14906c3fb27SDimitry Andric   _LIBCPP_HIDE_FROM_ABI pair<_RandomAccessIterator2, _RandomAccessIterator2>
__search(_RandomAccessIterator2 __f,_RandomAccessIterator2 __l)15081ad6265SDimitry Andric   __search(_RandomAccessIterator2 __f, _RandomAccessIterator2 __l) const {
15181ad6265SDimitry Andric     _RandomAccessIterator2 __current      = __f;
15281ad6265SDimitry Andric     const _RandomAccessIterator2 __last   = __l - __pattern_length_;
15381ad6265SDimitry Andric     const __skip_table_type& __skip_table = *__skip_table_;
15481ad6265SDimitry Andric 
15581ad6265SDimitry Andric     while (__current <= __last) {
15681ad6265SDimitry Andric       difference_type __j = __pattern_length_;
15781ad6265SDimitry Andric       while (__pred_(__first_[__j - 1], __current[__j - 1])) {
15881ad6265SDimitry Andric         --__j;
15981ad6265SDimitry Andric         if (__j == 0)
16081ad6265SDimitry Andric           return std::make_pair(__current, __current + __pattern_length_);
16181ad6265SDimitry Andric       }
16281ad6265SDimitry Andric 
16381ad6265SDimitry Andric       difference_type __k = __skip_table[__current[__j - 1]];
16481ad6265SDimitry Andric       difference_type __m = __j - __k - 1;
16581ad6265SDimitry Andric       if (__k < __j && __m > __suffix_[__j])
16681ad6265SDimitry Andric         __current += __m;
16781ad6265SDimitry Andric       else
16881ad6265SDimitry Andric         __current += __suffix_[__j];
16981ad6265SDimitry Andric     }
17081ad6265SDimitry Andric     return std::make_pair(__l, __l);
17181ad6265SDimitry Andric   }
17281ad6265SDimitry Andric 
17381ad6265SDimitry Andric   template <class _Iterator, class _Container>
17406c3fb27SDimitry Andric   _LIBCPP_HIDE_FROM_ABI void
__compute_bm_prefix(_Iterator __first,_Iterator __last,_BinaryPredicate __pred,_Container & __prefix)17506c3fb27SDimitry Andric   __compute_bm_prefix(_Iterator __first, _Iterator __last, _BinaryPredicate __pred, _Container& __prefix) {
17681ad6265SDimitry Andric     const size_t __count = __last - __first;
17781ad6265SDimitry Andric 
17881ad6265SDimitry Andric     __prefix[0] = 0;
17981ad6265SDimitry Andric     size_t __k  = 0;
18081ad6265SDimitry Andric 
18181ad6265SDimitry Andric     for (size_t __i = 1; __i != __count; ++__i) {
18281ad6265SDimitry Andric       while (__k > 0 && !__pred(__first[__k], __first[__i]))
18381ad6265SDimitry Andric         __k = __prefix[__k - 1];
18481ad6265SDimitry Andric 
18581ad6265SDimitry Andric       if (__pred(__first[__k], __first[__i]))
18681ad6265SDimitry Andric         ++__k;
18781ad6265SDimitry Andric       __prefix[__i] = __k;
18881ad6265SDimitry Andric     }
18981ad6265SDimitry Andric   }
19081ad6265SDimitry Andric 
19106c3fb27SDimitry Andric   _LIBCPP_HIDE_FROM_ABI void
__build_suffix_table(_RandomAccessIterator1 __first,_RandomAccessIterator1 __last,_BinaryPredicate __pred)19206c3fb27SDimitry Andric   __build_suffix_table(_RandomAccessIterator1 __first, _RandomAccessIterator1 __last, _BinaryPredicate __pred) {
19381ad6265SDimitry Andric     const size_t __count = __last - __first;
19481ad6265SDimitry Andric 
19581ad6265SDimitry Andric     if (__count == 0)
19681ad6265SDimitry Andric       return;
19781ad6265SDimitry Andric 
19881ad6265SDimitry Andric     vector<difference_type> __scratch(__count);
19981ad6265SDimitry Andric 
20081ad6265SDimitry Andric     __compute_bm_prefix(__first, __last, __pred, __scratch);
20181ad6265SDimitry Andric     for (size_t __i = 0; __i <= __count; ++__i)
20281ad6265SDimitry Andric       __suffix_[__i] = __count - __scratch[__count - 1];
20381ad6265SDimitry Andric 
20481ad6265SDimitry Andric     using _ReverseIter = reverse_iterator<_RandomAccessIterator1>;
20581ad6265SDimitry Andric     __compute_bm_prefix(_ReverseIter(__last), _ReverseIter(__first), __pred, __scratch);
20681ad6265SDimitry Andric 
20781ad6265SDimitry Andric     for (size_t __i = 0; __i != __count; ++__i) {
20881ad6265SDimitry Andric       const size_t __j          = __count - __scratch[__i];
20981ad6265SDimitry Andric       const difference_type __k = __i - __scratch[__i] + 1;
21081ad6265SDimitry Andric 
21181ad6265SDimitry Andric       if (__suffix_[__j] > __k)
21281ad6265SDimitry Andric         __suffix_[__j] = __k;
21381ad6265SDimitry Andric     }
21481ad6265SDimitry Andric   }
21581ad6265SDimitry Andric };
216bdd1243dSDimitry Andric _LIBCPP_CTAD_SUPPORTED_FOR_TYPE(boyer_moore_searcher);
21781ad6265SDimitry Andric 
21881ad6265SDimitry Andric template <class _RandomAccessIterator1,
21981ad6265SDimitry Andric           class _Hash            = hash<typename iterator_traits<_RandomAccessIterator1>::value_type>,
22081ad6265SDimitry Andric           class _BinaryPredicate = equal_to<>>
22181ad6265SDimitry Andric class _LIBCPP_TEMPLATE_VIS boyer_moore_horspool_searcher {
22281ad6265SDimitry Andric private:
22381ad6265SDimitry Andric   using difference_type = typename iterator_traits<_RandomAccessIterator1>::difference_type;
22481ad6265SDimitry Andric   using value_type      = typename iterator_traits<_RandomAccessIterator1>::value_type;
225*cb14a3feSDimitry Andric   using __skip_table_type =
226*cb14a3feSDimitry Andric       _BMSkipTable<value_type,
22781ad6265SDimitry Andric                    difference_type,
22881ad6265SDimitry Andric                    _Hash,
22981ad6265SDimitry Andric                    _BinaryPredicate,
230*cb14a3feSDimitry Andric                    is_integral_v<value_type> && sizeof(value_type) == 1 && is_same_v<_Hash, hash<value_type>> &&
231*cb14a3feSDimitry Andric                        is_same_v<_BinaryPredicate, equal_to<>>>;
232*cb14a3feSDimitry Andric 
23381ad6265SDimitry Andric public:
234*cb14a3feSDimitry Andric   _LIBCPP_HIDE_FROM_ABI boyer_moore_horspool_searcher(
235*cb14a3feSDimitry Andric       _RandomAccessIterator1 __first,
23681ad6265SDimitry Andric       _RandomAccessIterator1 __last,
23781ad6265SDimitry Andric       _Hash __hash            = _Hash(),
23881ad6265SDimitry Andric       _BinaryPredicate __pred = _BinaryPredicate())
__first_(__first)23981ad6265SDimitry Andric       : __first_(__first),
24081ad6265SDimitry Andric         __last_(__last),
24181ad6265SDimitry Andric         __pred_(__pred),
24281ad6265SDimitry Andric         __pattern_length_(__last - __first),
24381ad6265SDimitry Andric         __skip_table_(std::make_shared<__skip_table_type>(__pattern_length_, __pattern_length_, __hash, __pred_)) {
24481ad6265SDimitry Andric     if (__first == __last)
24581ad6265SDimitry Andric       return;
24681ad6265SDimitry Andric     --__last;
24781ad6265SDimitry Andric     difference_type __i = 0;
24881ad6265SDimitry Andric     while (__first != __last) {
24981ad6265SDimitry Andric       __skip_table_->insert(*__first, __pattern_length_ - 1 - __i);
25081ad6265SDimitry Andric       ++__first;
25181ad6265SDimitry Andric       ++__i;
25281ad6265SDimitry Andric     }
25381ad6265SDimitry Andric   }
25481ad6265SDimitry Andric 
25581ad6265SDimitry Andric   template <class _RandomAccessIterator2>
25606c3fb27SDimitry Andric   _LIBCPP_HIDE_FROM_ABI pair<_RandomAccessIterator2, _RandomAccessIterator2>
operator()25781ad6265SDimitry Andric   operator()(_RandomAccessIterator2 __first, _RandomAccessIterator2 __last) const {
25881ad6265SDimitry Andric     static_assert(__is_same_uncvref<typename std::iterator_traits<_RandomAccessIterator1>::value_type,
25981ad6265SDimitry Andric                                     typename std::iterator_traits<_RandomAccessIterator2>::value_type>::value,
26081ad6265SDimitry Andric                   "Corpus and Pattern iterators must point to the same type");
26181ad6265SDimitry Andric     if (__first == __last)
26281ad6265SDimitry Andric       return std::make_pair(__last, __last);
26381ad6265SDimitry Andric     if (__first_ == __last_)
26481ad6265SDimitry Andric       return std::make_pair(__first, __first);
26581ad6265SDimitry Andric 
26681ad6265SDimitry Andric     if (__pattern_length_ > __last - __first)
26781ad6265SDimitry Andric       return std::make_pair(__last, __last);
26881ad6265SDimitry Andric 
26981ad6265SDimitry Andric     return __search(__first, __last);
27081ad6265SDimitry Andric   }
27181ad6265SDimitry Andric 
27281ad6265SDimitry Andric private:
27381ad6265SDimitry Andric   _RandomAccessIterator1 __first_;
27481ad6265SDimitry Andric   _RandomAccessIterator1 __last_;
27581ad6265SDimitry Andric   _BinaryPredicate __pred_;
27681ad6265SDimitry Andric   difference_type __pattern_length_;
27781ad6265SDimitry Andric   shared_ptr<__skip_table_type> __skip_table_;
27881ad6265SDimitry Andric 
27981ad6265SDimitry Andric   template <class _RandomAccessIterator2>
28006c3fb27SDimitry Andric   _LIBCPP_HIDE_FROM_ABI pair<_RandomAccessIterator2, _RandomAccessIterator2>
__search(_RandomAccessIterator2 __f,_RandomAccessIterator2 __l)28181ad6265SDimitry Andric   __search(_RandomAccessIterator2 __f, _RandomAccessIterator2 __l) const {
28281ad6265SDimitry Andric     _RandomAccessIterator2 __current      = __f;
28381ad6265SDimitry Andric     const _RandomAccessIterator2 __last   = __l - __pattern_length_;
28481ad6265SDimitry Andric     const __skip_table_type& __skip_table = *__skip_table_;
28581ad6265SDimitry Andric 
28681ad6265SDimitry Andric     while (__current <= __last) {
28781ad6265SDimitry Andric       difference_type __j = __pattern_length_;
28881ad6265SDimitry Andric       while (__pred_(__first_[__j - 1], __current[__j - 1])) {
28981ad6265SDimitry Andric         --__j;
29081ad6265SDimitry Andric         if (__j == 0)
29181ad6265SDimitry Andric           return std::make_pair(__current, __current + __pattern_length_);
29281ad6265SDimitry Andric       }
29381ad6265SDimitry Andric       __current += __skip_table[__current[__pattern_length_ - 1]];
29481ad6265SDimitry Andric     }
29581ad6265SDimitry Andric     return std::make_pair(__l, __l);
29681ad6265SDimitry Andric   }
29781ad6265SDimitry Andric };
298bdd1243dSDimitry Andric _LIBCPP_CTAD_SUPPORTED_FOR_TYPE(boyer_moore_horspool_searcher);
29981ad6265SDimitry Andric 
30081ad6265SDimitry Andric _LIBCPP_END_NAMESPACE_STD
30181ad6265SDimitry Andric 
30281ad6265SDimitry Andric _LIBCPP_POP_MACROS
30381ad6265SDimitry Andric 
30406c3fb27SDimitry Andric #endif // _LIBCPP_STD_VER >= 17
30581ad6265SDimitry Andric 
30681ad6265SDimitry Andric #endif // _LIBCPP___FUNCTIONAL_BOYER_MOORE_SEARCHER_H
307