181ad6265SDimitry Andric //===----------------------------------------------------------------------===// 281ad6265SDimitry Andric // 381ad6265SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 481ad6265SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 581ad6265SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 681ad6265SDimitry Andric // 781ad6265SDimitry Andric //===----------------------------------------------------------------------===// 881ad6265SDimitry Andric 981ad6265SDimitry Andric #ifndef _LIBCPP___FUNCTIONAL_BOYER_MOORE_SEARCHER_H 1081ad6265SDimitry Andric #define _LIBCPP___FUNCTIONAL_BOYER_MOORE_SEARCHER_H 1181ad6265SDimitry Andric 1281ad6265SDimitry Andric #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) 1381ad6265SDimitry Andric # pragma GCC system_header 1481ad6265SDimitry Andric #endif 1581ad6265SDimitry Andric 1681ad6265SDimitry Andric #include <__algorithm/fill_n.h> 1781ad6265SDimitry Andric #include <__config> 1881ad6265SDimitry Andric #include <__functional/hash.h> 1981ad6265SDimitry Andric #include <__functional/operations.h> 2081ad6265SDimitry Andric #include <__iterator/distance.h> 2181ad6265SDimitry Andric #include <__iterator/iterator_traits.h> 2281ad6265SDimitry Andric #include <__memory/shared_ptr.h> 2306c3fb27SDimitry Andric #include <__type_traits/make_unsigned.h> 2481ad6265SDimitry Andric #include <__utility/pair.h> 2581ad6265SDimitry Andric #include <array> 2681ad6265SDimitry Andric #include <unordered_map> 2781ad6265SDimitry Andric #include <vector> 2881ad6265SDimitry Andric 2906c3fb27SDimitry Andric #if _LIBCPP_STD_VER >= 17 3081ad6265SDimitry Andric 3181ad6265SDimitry Andric _LIBCPP_PUSH_MACROS 3281ad6265SDimitry Andric # include <__undef_macros> 3381ad6265SDimitry Andric 3481ad6265SDimitry Andric _LIBCPP_BEGIN_NAMESPACE_STD 3581ad6265SDimitry Andric 36*cb14a3feSDimitry Andric template <class _Key, class _Value, class _Hash, class _BinaryPredicate, bool /*useArray*/> 3781ad6265SDimitry Andric class _BMSkipTable; 3881ad6265SDimitry Andric 3981ad6265SDimitry Andric // General case for BM data searching; use a map 40*cb14a3feSDimitry Andric template <class _Key, class _Value, class _Hash, class _BinaryPredicate> 4181ad6265SDimitry Andric class _BMSkipTable<_Key, _Value, _Hash, _BinaryPredicate, false> { 4281ad6265SDimitry Andric private: 4381ad6265SDimitry Andric using value_type = _Value; 4481ad6265SDimitry Andric using key_type = _Key; 4581ad6265SDimitry Andric 4681ad6265SDimitry Andric const value_type __default_value_; 4781ad6265SDimitry Andric unordered_map<_Key, _Value, _Hash, _BinaryPredicate> __table_; 4881ad6265SDimitry Andric 4981ad6265SDimitry Andric public: _BMSkipTable(size_t __sz,value_type __default_value,_Hash __hash,_BinaryPredicate __pred)50*cb14a3feSDimitry Andric _LIBCPP_HIDE_FROM_ABI explicit _BMSkipTable( 51*cb14a3feSDimitry Andric size_t __sz, value_type __default_value, _Hash __hash, _BinaryPredicate __pred) 52*cb14a3feSDimitry Andric : __default_value_(__default_value), __table_(__sz, __hash, __pred) {} 5381ad6265SDimitry Andric insert(const key_type & __key,value_type __val)54*cb14a3feSDimitry Andric _LIBCPP_HIDE_FROM_ABI void insert(const key_type& __key, value_type __val) { __table_[__key] = __val; } 5581ad6265SDimitry Andric 5681ad6265SDimitry Andric _LIBCPP_HIDE_FROM_ABI value_type operator[](const key_type& __key) const { 5781ad6265SDimitry Andric auto __it = __table_.find(__key); 5881ad6265SDimitry Andric return __it == __table_.end() ? __default_value_ : __it->second; 5981ad6265SDimitry Andric } 6081ad6265SDimitry Andric }; 6181ad6265SDimitry Andric 6281ad6265SDimitry Andric // Special case small numeric values; use an array 63*cb14a3feSDimitry Andric template <class _Key, class _Value, class _Hash, class _BinaryPredicate> 6481ad6265SDimitry Andric class _BMSkipTable<_Key, _Value, _Hash, _BinaryPredicate, true> { 6581ad6265SDimitry Andric private: 6681ad6265SDimitry Andric using value_type = _Value; 6781ad6265SDimitry Andric using key_type = _Key; 6881ad6265SDimitry Andric 6981ad6265SDimitry Andric using unsigned_key_type = make_unsigned_t<key_type>; 7081ad6265SDimitry Andric std::array<value_type, 256> __table_; 7181ad6265SDimitry Andric static_assert(numeric_limits<unsigned_key_type>::max() < 256); 7281ad6265SDimitry Andric 7381ad6265SDimitry Andric public: _BMSkipTable(size_t,value_type __default_value,_Hash,_BinaryPredicate)7481ad6265SDimitry Andric _LIBCPP_HIDE_FROM_ABI explicit _BMSkipTable(size_t, value_type __default_value, _Hash, _BinaryPredicate) { 7581ad6265SDimitry Andric std::fill_n(__table_.data(), __table_.size(), __default_value); 7681ad6265SDimitry Andric } 7781ad6265SDimitry Andric insert(key_type __key,value_type __val)7881ad6265SDimitry Andric _LIBCPP_HIDE_FROM_ABI void insert(key_type __key, value_type __val) { 7981ad6265SDimitry Andric __table_[static_cast<unsigned_key_type>(__key)] = __val; 8081ad6265SDimitry Andric } 8181ad6265SDimitry Andric 8281ad6265SDimitry Andric _LIBCPP_HIDE_FROM_ABI value_type operator[](key_type __key) const { 8381ad6265SDimitry Andric return __table_[static_cast<unsigned_key_type>(__key)]; 8481ad6265SDimitry Andric } 8581ad6265SDimitry Andric }; 8681ad6265SDimitry Andric 8781ad6265SDimitry Andric template <class _RandomAccessIterator1, 8881ad6265SDimitry Andric class _Hash = hash<typename iterator_traits<_RandomAccessIterator1>::value_type>, 8981ad6265SDimitry Andric class _BinaryPredicate = equal_to<>> 9081ad6265SDimitry Andric class _LIBCPP_TEMPLATE_VIS boyer_moore_searcher { 9181ad6265SDimitry Andric private: 9281ad6265SDimitry Andric using difference_type = typename std::iterator_traits<_RandomAccessIterator1>::difference_type; 9381ad6265SDimitry Andric using value_type = typename std::iterator_traits<_RandomAccessIterator1>::value_type; 94*cb14a3feSDimitry Andric using __skip_table_type = 95*cb14a3feSDimitry Andric _BMSkipTable<value_type, 9681ad6265SDimitry Andric difference_type, 9781ad6265SDimitry Andric _Hash, 9881ad6265SDimitry Andric _BinaryPredicate, 99*cb14a3feSDimitry Andric is_integral_v<value_type> && sizeof(value_type) == 1 && is_same_v<_Hash, hash<value_type>> && 100*cb14a3feSDimitry Andric is_same_v<_BinaryPredicate, equal_to<>>>; 10181ad6265SDimitry Andric 10281ad6265SDimitry Andric public: 103*cb14a3feSDimitry Andric _LIBCPP_HIDE_FROM_ABI boyer_moore_searcher( 104*cb14a3feSDimitry Andric _RandomAccessIterator1 __first, 10581ad6265SDimitry Andric _RandomAccessIterator1 __last, 10681ad6265SDimitry Andric _Hash __hash = _Hash(), 10781ad6265SDimitry Andric _BinaryPredicate __pred = _BinaryPredicate()) __first_(__first)10881ad6265SDimitry Andric : __first_(__first), 10981ad6265SDimitry Andric __last_(__last), 11081ad6265SDimitry Andric __pred_(__pred), 11181ad6265SDimitry Andric __pattern_length_(__last - __first), 11281ad6265SDimitry Andric __skip_table_(std::make_shared<__skip_table_type>(__pattern_length_, -1, __hash, __pred_)), 11381ad6265SDimitry Andric __suffix_(std::__allocate_shared_unbounded_array<difference_type[]>( 11481ad6265SDimitry Andric allocator<difference_type>(), __pattern_length_ + 1)) { 11581ad6265SDimitry Andric difference_type __i = 0; 11681ad6265SDimitry Andric while (__first != __last) { 11781ad6265SDimitry Andric __skip_table_->insert(*__first, __i); 11881ad6265SDimitry Andric ++__first; 11981ad6265SDimitry Andric ++__i; 12081ad6265SDimitry Andric } 12181ad6265SDimitry Andric __build_suffix_table(__first_, __last_, __pred_); 12281ad6265SDimitry Andric } 12381ad6265SDimitry Andric 12481ad6265SDimitry Andric template <class _RandomAccessIterator2> 12506c3fb27SDimitry Andric _LIBCPP_HIDE_FROM_ABI pair<_RandomAccessIterator2, _RandomAccessIterator2> operator()12681ad6265SDimitry Andric operator()(_RandomAccessIterator2 __first, _RandomAccessIterator2 __last) const { 12781ad6265SDimitry Andric static_assert(__is_same_uncvref<typename iterator_traits<_RandomAccessIterator1>::value_type, 12881ad6265SDimitry Andric typename iterator_traits<_RandomAccessIterator2>::value_type>::value, 12981ad6265SDimitry Andric "Corpus and Pattern iterators must point to the same type"); 13081ad6265SDimitry Andric if (__first == __last) 13181ad6265SDimitry Andric return std::make_pair(__last, __last); 13281ad6265SDimitry Andric if (__first_ == __last_) 13381ad6265SDimitry Andric return std::make_pair(__first, __first); 13481ad6265SDimitry Andric 13581ad6265SDimitry Andric if (__pattern_length_ > (__last - __first)) 13681ad6265SDimitry Andric return std::make_pair(__last, __last); 13781ad6265SDimitry Andric return __search(__first, __last); 13881ad6265SDimitry Andric } 13981ad6265SDimitry Andric 14081ad6265SDimitry Andric private: 14181ad6265SDimitry Andric _RandomAccessIterator1 __first_; 14281ad6265SDimitry Andric _RandomAccessIterator1 __last_; 14381ad6265SDimitry Andric _BinaryPredicate __pred_; 14481ad6265SDimitry Andric difference_type __pattern_length_; 14581ad6265SDimitry Andric shared_ptr<__skip_table_type> __skip_table_; 14681ad6265SDimitry Andric shared_ptr<difference_type[]> __suffix_; 14781ad6265SDimitry Andric 14881ad6265SDimitry Andric template <class _RandomAccessIterator2> 14906c3fb27SDimitry Andric _LIBCPP_HIDE_FROM_ABI pair<_RandomAccessIterator2, _RandomAccessIterator2> __search(_RandomAccessIterator2 __f,_RandomAccessIterator2 __l)15081ad6265SDimitry Andric __search(_RandomAccessIterator2 __f, _RandomAccessIterator2 __l) const { 15181ad6265SDimitry Andric _RandomAccessIterator2 __current = __f; 15281ad6265SDimitry Andric const _RandomAccessIterator2 __last = __l - __pattern_length_; 15381ad6265SDimitry Andric const __skip_table_type& __skip_table = *__skip_table_; 15481ad6265SDimitry Andric 15581ad6265SDimitry Andric while (__current <= __last) { 15681ad6265SDimitry Andric difference_type __j = __pattern_length_; 15781ad6265SDimitry Andric while (__pred_(__first_[__j - 1], __current[__j - 1])) { 15881ad6265SDimitry Andric --__j; 15981ad6265SDimitry Andric if (__j == 0) 16081ad6265SDimitry Andric return std::make_pair(__current, __current + __pattern_length_); 16181ad6265SDimitry Andric } 16281ad6265SDimitry Andric 16381ad6265SDimitry Andric difference_type __k = __skip_table[__current[__j - 1]]; 16481ad6265SDimitry Andric difference_type __m = __j - __k - 1; 16581ad6265SDimitry Andric if (__k < __j && __m > __suffix_[__j]) 16681ad6265SDimitry Andric __current += __m; 16781ad6265SDimitry Andric else 16881ad6265SDimitry Andric __current += __suffix_[__j]; 16981ad6265SDimitry Andric } 17081ad6265SDimitry Andric return std::make_pair(__l, __l); 17181ad6265SDimitry Andric } 17281ad6265SDimitry Andric 17381ad6265SDimitry Andric template <class _Iterator, class _Container> 17406c3fb27SDimitry Andric _LIBCPP_HIDE_FROM_ABI void __compute_bm_prefix(_Iterator __first,_Iterator __last,_BinaryPredicate __pred,_Container & __prefix)17506c3fb27SDimitry Andric __compute_bm_prefix(_Iterator __first, _Iterator __last, _BinaryPredicate __pred, _Container& __prefix) { 17681ad6265SDimitry Andric const size_t __count = __last - __first; 17781ad6265SDimitry Andric 17881ad6265SDimitry Andric __prefix[0] = 0; 17981ad6265SDimitry Andric size_t __k = 0; 18081ad6265SDimitry Andric 18181ad6265SDimitry Andric for (size_t __i = 1; __i != __count; ++__i) { 18281ad6265SDimitry Andric while (__k > 0 && !__pred(__first[__k], __first[__i])) 18381ad6265SDimitry Andric __k = __prefix[__k - 1]; 18481ad6265SDimitry Andric 18581ad6265SDimitry Andric if (__pred(__first[__k], __first[__i])) 18681ad6265SDimitry Andric ++__k; 18781ad6265SDimitry Andric __prefix[__i] = __k; 18881ad6265SDimitry Andric } 18981ad6265SDimitry Andric } 19081ad6265SDimitry Andric 19106c3fb27SDimitry Andric _LIBCPP_HIDE_FROM_ABI void __build_suffix_table(_RandomAccessIterator1 __first,_RandomAccessIterator1 __last,_BinaryPredicate __pred)19206c3fb27SDimitry Andric __build_suffix_table(_RandomAccessIterator1 __first, _RandomAccessIterator1 __last, _BinaryPredicate __pred) { 19381ad6265SDimitry Andric const size_t __count = __last - __first; 19481ad6265SDimitry Andric 19581ad6265SDimitry Andric if (__count == 0) 19681ad6265SDimitry Andric return; 19781ad6265SDimitry Andric 19881ad6265SDimitry Andric vector<difference_type> __scratch(__count); 19981ad6265SDimitry Andric 20081ad6265SDimitry Andric __compute_bm_prefix(__first, __last, __pred, __scratch); 20181ad6265SDimitry Andric for (size_t __i = 0; __i <= __count; ++__i) 20281ad6265SDimitry Andric __suffix_[__i] = __count - __scratch[__count - 1]; 20381ad6265SDimitry Andric 20481ad6265SDimitry Andric using _ReverseIter = reverse_iterator<_RandomAccessIterator1>; 20581ad6265SDimitry Andric __compute_bm_prefix(_ReverseIter(__last), _ReverseIter(__first), __pred, __scratch); 20681ad6265SDimitry Andric 20781ad6265SDimitry Andric for (size_t __i = 0; __i != __count; ++__i) { 20881ad6265SDimitry Andric const size_t __j = __count - __scratch[__i]; 20981ad6265SDimitry Andric const difference_type __k = __i - __scratch[__i] + 1; 21081ad6265SDimitry Andric 21181ad6265SDimitry Andric if (__suffix_[__j] > __k) 21281ad6265SDimitry Andric __suffix_[__j] = __k; 21381ad6265SDimitry Andric } 21481ad6265SDimitry Andric } 21581ad6265SDimitry Andric }; 216bdd1243dSDimitry Andric _LIBCPP_CTAD_SUPPORTED_FOR_TYPE(boyer_moore_searcher); 21781ad6265SDimitry Andric 21881ad6265SDimitry Andric template <class _RandomAccessIterator1, 21981ad6265SDimitry Andric class _Hash = hash<typename iterator_traits<_RandomAccessIterator1>::value_type>, 22081ad6265SDimitry Andric class _BinaryPredicate = equal_to<>> 22181ad6265SDimitry Andric class _LIBCPP_TEMPLATE_VIS boyer_moore_horspool_searcher { 22281ad6265SDimitry Andric private: 22381ad6265SDimitry Andric using difference_type = typename iterator_traits<_RandomAccessIterator1>::difference_type; 22481ad6265SDimitry Andric using value_type = typename iterator_traits<_RandomAccessIterator1>::value_type; 225*cb14a3feSDimitry Andric using __skip_table_type = 226*cb14a3feSDimitry Andric _BMSkipTable<value_type, 22781ad6265SDimitry Andric difference_type, 22881ad6265SDimitry Andric _Hash, 22981ad6265SDimitry Andric _BinaryPredicate, 230*cb14a3feSDimitry Andric is_integral_v<value_type> && sizeof(value_type) == 1 && is_same_v<_Hash, hash<value_type>> && 231*cb14a3feSDimitry Andric is_same_v<_BinaryPredicate, equal_to<>>>; 232*cb14a3feSDimitry Andric 23381ad6265SDimitry Andric public: 234*cb14a3feSDimitry Andric _LIBCPP_HIDE_FROM_ABI boyer_moore_horspool_searcher( 235*cb14a3feSDimitry Andric _RandomAccessIterator1 __first, 23681ad6265SDimitry Andric _RandomAccessIterator1 __last, 23781ad6265SDimitry Andric _Hash __hash = _Hash(), 23881ad6265SDimitry Andric _BinaryPredicate __pred = _BinaryPredicate()) __first_(__first)23981ad6265SDimitry Andric : __first_(__first), 24081ad6265SDimitry Andric __last_(__last), 24181ad6265SDimitry Andric __pred_(__pred), 24281ad6265SDimitry Andric __pattern_length_(__last - __first), 24381ad6265SDimitry Andric __skip_table_(std::make_shared<__skip_table_type>(__pattern_length_, __pattern_length_, __hash, __pred_)) { 24481ad6265SDimitry Andric if (__first == __last) 24581ad6265SDimitry Andric return; 24681ad6265SDimitry Andric --__last; 24781ad6265SDimitry Andric difference_type __i = 0; 24881ad6265SDimitry Andric while (__first != __last) { 24981ad6265SDimitry Andric __skip_table_->insert(*__first, __pattern_length_ - 1 - __i); 25081ad6265SDimitry Andric ++__first; 25181ad6265SDimitry Andric ++__i; 25281ad6265SDimitry Andric } 25381ad6265SDimitry Andric } 25481ad6265SDimitry Andric 25581ad6265SDimitry Andric template <class _RandomAccessIterator2> 25606c3fb27SDimitry Andric _LIBCPP_HIDE_FROM_ABI pair<_RandomAccessIterator2, _RandomAccessIterator2> operator()25781ad6265SDimitry Andric operator()(_RandomAccessIterator2 __first, _RandomAccessIterator2 __last) const { 25881ad6265SDimitry Andric static_assert(__is_same_uncvref<typename std::iterator_traits<_RandomAccessIterator1>::value_type, 25981ad6265SDimitry Andric typename std::iterator_traits<_RandomAccessIterator2>::value_type>::value, 26081ad6265SDimitry Andric "Corpus and Pattern iterators must point to the same type"); 26181ad6265SDimitry Andric if (__first == __last) 26281ad6265SDimitry Andric return std::make_pair(__last, __last); 26381ad6265SDimitry Andric if (__first_ == __last_) 26481ad6265SDimitry Andric return std::make_pair(__first, __first); 26581ad6265SDimitry Andric 26681ad6265SDimitry Andric if (__pattern_length_ > __last - __first) 26781ad6265SDimitry Andric return std::make_pair(__last, __last); 26881ad6265SDimitry Andric 26981ad6265SDimitry Andric return __search(__first, __last); 27081ad6265SDimitry Andric } 27181ad6265SDimitry Andric 27281ad6265SDimitry Andric private: 27381ad6265SDimitry Andric _RandomAccessIterator1 __first_; 27481ad6265SDimitry Andric _RandomAccessIterator1 __last_; 27581ad6265SDimitry Andric _BinaryPredicate __pred_; 27681ad6265SDimitry Andric difference_type __pattern_length_; 27781ad6265SDimitry Andric shared_ptr<__skip_table_type> __skip_table_; 27881ad6265SDimitry Andric 27981ad6265SDimitry Andric template <class _RandomAccessIterator2> 28006c3fb27SDimitry Andric _LIBCPP_HIDE_FROM_ABI pair<_RandomAccessIterator2, _RandomAccessIterator2> __search(_RandomAccessIterator2 __f,_RandomAccessIterator2 __l)28181ad6265SDimitry Andric __search(_RandomAccessIterator2 __f, _RandomAccessIterator2 __l) const { 28281ad6265SDimitry Andric _RandomAccessIterator2 __current = __f; 28381ad6265SDimitry Andric const _RandomAccessIterator2 __last = __l - __pattern_length_; 28481ad6265SDimitry Andric const __skip_table_type& __skip_table = *__skip_table_; 28581ad6265SDimitry Andric 28681ad6265SDimitry Andric while (__current <= __last) { 28781ad6265SDimitry Andric difference_type __j = __pattern_length_; 28881ad6265SDimitry Andric while (__pred_(__first_[__j - 1], __current[__j - 1])) { 28981ad6265SDimitry Andric --__j; 29081ad6265SDimitry Andric if (__j == 0) 29181ad6265SDimitry Andric return std::make_pair(__current, __current + __pattern_length_); 29281ad6265SDimitry Andric } 29381ad6265SDimitry Andric __current += __skip_table[__current[__pattern_length_ - 1]]; 29481ad6265SDimitry Andric } 29581ad6265SDimitry Andric return std::make_pair(__l, __l); 29681ad6265SDimitry Andric } 29781ad6265SDimitry Andric }; 298bdd1243dSDimitry Andric _LIBCPP_CTAD_SUPPORTED_FOR_TYPE(boyer_moore_horspool_searcher); 29981ad6265SDimitry Andric 30081ad6265SDimitry Andric _LIBCPP_END_NAMESPACE_STD 30181ad6265SDimitry Andric 30281ad6265SDimitry Andric _LIBCPP_POP_MACROS 30381ad6265SDimitry Andric 30406c3fb27SDimitry Andric #endif // _LIBCPP_STD_VER >= 17 30581ad6265SDimitry Andric 30681ad6265SDimitry Andric #endif // _LIBCPP___FUNCTIONAL_BOYER_MOORE_SEARCHER_H 307