1971e9c80SNikolas Klauser //===----------------------------------------------------------------------===// 2971e9c80SNikolas Klauser // 3971e9c80SNikolas Klauser // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4971e9c80SNikolas Klauser // See https://llvm.org/LICENSE.txt for license information. 5971e9c80SNikolas Klauser // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6971e9c80SNikolas Klauser // 7971e9c80SNikolas Klauser //===----------------------------------------------------------------------===// 8971e9c80SNikolas Klauser 9971e9c80SNikolas Klauser #ifndef _LIBCPP___FUNCTIONAL_BOYER_MOORE_SEARCHER_H 10971e9c80SNikolas Klauser #define _LIBCPP___FUNCTIONAL_BOYER_MOORE_SEARCHER_H 11971e9c80SNikolas Klauser 12971e9c80SNikolas Klauser #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) 13971e9c80SNikolas Klauser # pragma GCC system_header 14971e9c80SNikolas Klauser #endif 15971e9c80SNikolas Klauser 16971e9c80SNikolas Klauser #include <__algorithm/fill_n.h> 17971e9c80SNikolas Klauser #include <__config> 18971e9c80SNikolas Klauser #include <__functional/hash.h> 19971e9c80SNikolas Klauser #include <__functional/operations.h> 20971e9c80SNikolas Klauser #include <__iterator/distance.h> 21971e9c80SNikolas Klauser #include <__iterator/iterator_traits.h> 22971e9c80SNikolas Klauser #include <__memory/shared_ptr.h> 2379702f7fSIan Anderson #include <__type_traits/make_unsigned.h> 24971e9c80SNikolas Klauser #include <__utility/pair.h> 252e43a304SNikolas Klauser #include <__vector/vector.h> 26971e9c80SNikolas Klauser #include <array> 272e43a304SNikolas Klauser #include <limits> 28971e9c80SNikolas Klauser #include <unordered_map> 29971e9c80SNikolas Klauser 304f15267dSNikolas Klauser #if _LIBCPP_STD_VER >= 17 31971e9c80SNikolas Klauser 32971e9c80SNikolas Klauser _LIBCPP_PUSH_MACROS 33971e9c80SNikolas Klauser # include <__undef_macros> 34971e9c80SNikolas Klauser 35971e9c80SNikolas Klauser _LIBCPP_BEGIN_NAMESPACE_STD 36971e9c80SNikolas Klauser 379783f28cSLouis Dionne template <class _Key, class _Value, class _Hash, class _BinaryPredicate, bool /*useArray*/> 38971e9c80SNikolas Klauser class _BMSkipTable; 39971e9c80SNikolas Klauser 40971e9c80SNikolas Klauser // General case for BM data searching; use a map 419783f28cSLouis Dionne template <class _Key, class _Value, class _Hash, class _BinaryPredicate> 42971e9c80SNikolas Klauser class _BMSkipTable<_Key, _Value, _Hash, _BinaryPredicate, false> { 43971e9c80SNikolas Klauser private: 44971e9c80SNikolas Klauser using value_type = _Value; 45971e9c80SNikolas Klauser using key_type = _Key; 46971e9c80SNikolas Klauser 47971e9c80SNikolas Klauser const value_type __default_value_; 48971e9c80SNikolas Klauser unordered_map<_Key, _Value, _Hash, _BinaryPredicate> __table_; 49971e9c80SNikolas Klauser 50971e9c80SNikolas Klauser public: 519783f28cSLouis Dionne _LIBCPP_HIDE_FROM_ABI explicit _BMSkipTable( 529783f28cSLouis Dionne size_t __sz, value_type __default_value, _Hash __hash, _BinaryPredicate __pred) 539783f28cSLouis Dionne : __default_value_(__default_value), __table_(__sz, __hash, __pred) {} 54971e9c80SNikolas Klauser 559783f28cSLouis Dionne _LIBCPP_HIDE_FROM_ABI void insert(const key_type& __key, value_type __val) { __table_[__key] = __val; } 56971e9c80SNikolas Klauser 57971e9c80SNikolas Klauser _LIBCPP_HIDE_FROM_ABI value_type operator[](const key_type& __key) const { 58971e9c80SNikolas Klauser auto __it = __table_.find(__key); 59971e9c80SNikolas Klauser return __it == __table_.end() ? __default_value_ : __it->second; 60971e9c80SNikolas Klauser } 61971e9c80SNikolas Klauser }; 62971e9c80SNikolas Klauser 63971e9c80SNikolas Klauser // Special case small numeric values; use an array 649783f28cSLouis Dionne template <class _Key, class _Value, class _Hash, class _BinaryPredicate> 65971e9c80SNikolas Klauser class _BMSkipTable<_Key, _Value, _Hash, _BinaryPredicate, true> { 66971e9c80SNikolas Klauser private: 67971e9c80SNikolas Klauser using value_type = _Value; 68971e9c80SNikolas Klauser using key_type = _Key; 69971e9c80SNikolas Klauser 70971e9c80SNikolas Klauser using unsigned_key_type = make_unsigned_t<key_type>; 71971e9c80SNikolas Klauser std::array<value_type, 256> __table_; 72971e9c80SNikolas Klauser static_assert(numeric_limits<unsigned_key_type>::max() < 256); 73971e9c80SNikolas Klauser 74971e9c80SNikolas Klauser public: 75971e9c80SNikolas Klauser _LIBCPP_HIDE_FROM_ABI explicit _BMSkipTable(size_t, value_type __default_value, _Hash, _BinaryPredicate) { 76971e9c80SNikolas Klauser std::fill_n(__table_.data(), __table_.size(), __default_value); 77971e9c80SNikolas Klauser } 78971e9c80SNikolas Klauser 79971e9c80SNikolas Klauser _LIBCPP_HIDE_FROM_ABI void insert(key_type __key, value_type __val) { 80971e9c80SNikolas Klauser __table_[static_cast<unsigned_key_type>(__key)] = __val; 81971e9c80SNikolas Klauser } 82971e9c80SNikolas Klauser 83971e9c80SNikolas Klauser _LIBCPP_HIDE_FROM_ABI value_type operator[](key_type __key) const { 84971e9c80SNikolas Klauser return __table_[static_cast<unsigned_key_type>(__key)]; 85971e9c80SNikolas Klauser } 86971e9c80SNikolas Klauser }; 87971e9c80SNikolas Klauser 88971e9c80SNikolas Klauser template <class _RandomAccessIterator1, 89971e9c80SNikolas Klauser class _Hash = hash<typename iterator_traits<_RandomAccessIterator1>::value_type>, 90971e9c80SNikolas Klauser class _BinaryPredicate = equal_to<>> 91971e9c80SNikolas Klauser class _LIBCPP_TEMPLATE_VIS boyer_moore_searcher { 92971e9c80SNikolas Klauser private: 93971e9c80SNikolas Klauser using difference_type = typename std::iterator_traits<_RandomAccessIterator1>::difference_type; 94971e9c80SNikolas Klauser using value_type = typename std::iterator_traits<_RandomAccessIterator1>::value_type; 95*f6958523SNikolas Klauser using __skip_table_type _LIBCPP_NODEBUG = 969783f28cSLouis Dionne _BMSkipTable<value_type, 97971e9c80SNikolas Klauser difference_type, 98971e9c80SNikolas Klauser _Hash, 99971e9c80SNikolas Klauser _BinaryPredicate, 1009783f28cSLouis Dionne is_integral_v<value_type> && sizeof(value_type) == 1 && is_same_v<_Hash, hash<value_type>> && 1019783f28cSLouis Dionne is_same_v<_BinaryPredicate, equal_to<>>>; 102971e9c80SNikolas Klauser 103971e9c80SNikolas Klauser public: 1049783f28cSLouis Dionne _LIBCPP_HIDE_FROM_ABI boyer_moore_searcher( 1059783f28cSLouis Dionne _RandomAccessIterator1 __first, 106971e9c80SNikolas Klauser _RandomAccessIterator1 __last, 107971e9c80SNikolas Klauser _Hash __hash = _Hash(), 108971e9c80SNikolas Klauser _BinaryPredicate __pred = _BinaryPredicate()) 109971e9c80SNikolas Klauser : __first_(__first), 110971e9c80SNikolas Klauser __last_(__last), 111971e9c80SNikolas Klauser __pred_(__pred), 112971e9c80SNikolas Klauser __pattern_length_(__last - __first), 113971e9c80SNikolas Klauser __skip_table_(std::make_shared<__skip_table_type>(__pattern_length_, -1, __hash, __pred_)), 114971e9c80SNikolas Klauser __suffix_(std::__allocate_shared_unbounded_array<difference_type[]>( 115971e9c80SNikolas Klauser allocator<difference_type>(), __pattern_length_ + 1)) { 116971e9c80SNikolas Klauser difference_type __i = 0; 117971e9c80SNikolas Klauser while (__first != __last) { 118971e9c80SNikolas Klauser __skip_table_->insert(*__first, __i); 119971e9c80SNikolas Klauser ++__first; 120971e9c80SNikolas Klauser ++__i; 121971e9c80SNikolas Klauser } 122971e9c80SNikolas Klauser __build_suffix_table(__first_, __last_, __pred_); 123971e9c80SNikolas Klauser } 124971e9c80SNikolas Klauser 125971e9c80SNikolas Klauser template <class _RandomAccessIterator2> 12683ce1397SNikolas Klauser _LIBCPP_HIDE_FROM_ABI pair<_RandomAccessIterator2, _RandomAccessIterator2> 127971e9c80SNikolas Klauser operator()(_RandomAccessIterator2 __first, _RandomAccessIterator2 __last) const { 128971e9c80SNikolas Klauser static_assert(__is_same_uncvref<typename iterator_traits<_RandomAccessIterator1>::value_type, 129971e9c80SNikolas Klauser typename iterator_traits<_RandomAccessIterator2>::value_type>::value, 130971e9c80SNikolas Klauser "Corpus and Pattern iterators must point to the same type"); 131971e9c80SNikolas Klauser if (__first == __last) 132971e9c80SNikolas Klauser return std::make_pair(__last, __last); 133971e9c80SNikolas Klauser if (__first_ == __last_) 134971e9c80SNikolas Klauser return std::make_pair(__first, __first); 135971e9c80SNikolas Klauser 136971e9c80SNikolas Klauser if (__pattern_length_ > (__last - __first)) 137971e9c80SNikolas Klauser return std::make_pair(__last, __last); 138971e9c80SNikolas Klauser return __search(__first, __last); 139971e9c80SNikolas Klauser } 140971e9c80SNikolas Klauser 141971e9c80SNikolas Klauser private: 142971e9c80SNikolas Klauser _RandomAccessIterator1 __first_; 143971e9c80SNikolas Klauser _RandomAccessIterator1 __last_; 144971e9c80SNikolas Klauser _BinaryPredicate __pred_; 145971e9c80SNikolas Klauser difference_type __pattern_length_; 146971e9c80SNikolas Klauser shared_ptr<__skip_table_type> __skip_table_; 147971e9c80SNikolas Klauser shared_ptr<difference_type[]> __suffix_; 148971e9c80SNikolas Klauser 149971e9c80SNikolas Klauser template <class _RandomAccessIterator2> 15083ce1397SNikolas Klauser _LIBCPP_HIDE_FROM_ABI pair<_RandomAccessIterator2, _RandomAccessIterator2> 151971e9c80SNikolas Klauser __search(_RandomAccessIterator2 __f, _RandomAccessIterator2 __l) const { 152971e9c80SNikolas Klauser _RandomAccessIterator2 __current = __f; 153971e9c80SNikolas Klauser const _RandomAccessIterator2 __last = __l - __pattern_length_; 154971e9c80SNikolas Klauser const __skip_table_type& __skip_table = *__skip_table_; 155971e9c80SNikolas Klauser 156971e9c80SNikolas Klauser while (__current <= __last) { 157971e9c80SNikolas Klauser difference_type __j = __pattern_length_; 158971e9c80SNikolas Klauser while (__pred_(__first_[__j - 1], __current[__j - 1])) { 159971e9c80SNikolas Klauser --__j; 160971e9c80SNikolas Klauser if (__j == 0) 161971e9c80SNikolas Klauser return std::make_pair(__current, __current + __pattern_length_); 162971e9c80SNikolas Klauser } 163971e9c80SNikolas Klauser 164971e9c80SNikolas Klauser difference_type __k = __skip_table[__current[__j - 1]]; 165971e9c80SNikolas Klauser difference_type __m = __j - __k - 1; 166971e9c80SNikolas Klauser if (__k < __j && __m > __suffix_[__j]) 167971e9c80SNikolas Klauser __current += __m; 168971e9c80SNikolas Klauser else 169971e9c80SNikolas Klauser __current += __suffix_[__j]; 170971e9c80SNikolas Klauser } 171971e9c80SNikolas Klauser return std::make_pair(__l, __l); 172971e9c80SNikolas Klauser } 173971e9c80SNikolas Klauser 174971e9c80SNikolas Klauser template <class _Iterator, class _Container> 17583ce1397SNikolas Klauser _LIBCPP_HIDE_FROM_ABI void 17683ce1397SNikolas Klauser __compute_bm_prefix(_Iterator __first, _Iterator __last, _BinaryPredicate __pred, _Container& __prefix) { 177971e9c80SNikolas Klauser const size_t __count = __last - __first; 178971e9c80SNikolas Klauser 179971e9c80SNikolas Klauser __prefix[0] = 0; 180971e9c80SNikolas Klauser size_t __k = 0; 181971e9c80SNikolas Klauser 182971e9c80SNikolas Klauser for (size_t __i = 1; __i != __count; ++__i) { 183971e9c80SNikolas Klauser while (__k > 0 && !__pred(__first[__k], __first[__i])) 184971e9c80SNikolas Klauser __k = __prefix[__k - 1]; 185971e9c80SNikolas Klauser 186971e9c80SNikolas Klauser if (__pred(__first[__k], __first[__i])) 187971e9c80SNikolas Klauser ++__k; 188971e9c80SNikolas Klauser __prefix[__i] = __k; 189971e9c80SNikolas Klauser } 190971e9c80SNikolas Klauser } 191971e9c80SNikolas Klauser 19283ce1397SNikolas Klauser _LIBCPP_HIDE_FROM_ABI void 19383ce1397SNikolas Klauser __build_suffix_table(_RandomAccessIterator1 __first, _RandomAccessIterator1 __last, _BinaryPredicate __pred) { 194971e9c80SNikolas Klauser const size_t __count = __last - __first; 195971e9c80SNikolas Klauser 196971e9c80SNikolas Klauser if (__count == 0) 197971e9c80SNikolas Klauser return; 198971e9c80SNikolas Klauser 199971e9c80SNikolas Klauser vector<difference_type> __scratch(__count); 200971e9c80SNikolas Klauser 201971e9c80SNikolas Klauser __compute_bm_prefix(__first, __last, __pred, __scratch); 202971e9c80SNikolas Klauser for (size_t __i = 0; __i <= __count; ++__i) 203971e9c80SNikolas Klauser __suffix_[__i] = __count - __scratch[__count - 1]; 204971e9c80SNikolas Klauser 205971e9c80SNikolas Klauser using _ReverseIter = reverse_iterator<_RandomAccessIterator1>; 206971e9c80SNikolas Klauser __compute_bm_prefix(_ReverseIter(__last), _ReverseIter(__first), __pred, __scratch); 207971e9c80SNikolas Klauser 208971e9c80SNikolas Klauser for (size_t __i = 0; __i != __count; ++__i) { 209971e9c80SNikolas Klauser const size_t __j = __count - __scratch[__i]; 210971e9c80SNikolas Klauser const difference_type __k = __i - __scratch[__i] + 1; 211971e9c80SNikolas Klauser 212971e9c80SNikolas Klauser if (__suffix_[__j] > __k) 213971e9c80SNikolas Klauser __suffix_[__j] = __k; 214971e9c80SNikolas Klauser } 215971e9c80SNikolas Klauser } 216971e9c80SNikolas Klauser }; 217c2df7076SLouis Dionne _LIBCPP_CTAD_SUPPORTED_FOR_TYPE(boyer_moore_searcher); 218971e9c80SNikolas Klauser 219971e9c80SNikolas Klauser template <class _RandomAccessIterator1, 220971e9c80SNikolas Klauser class _Hash = hash<typename iterator_traits<_RandomAccessIterator1>::value_type>, 221971e9c80SNikolas Klauser class _BinaryPredicate = equal_to<>> 222971e9c80SNikolas Klauser class _LIBCPP_TEMPLATE_VIS boyer_moore_horspool_searcher { 223971e9c80SNikolas Klauser private: 224971e9c80SNikolas Klauser using difference_type = typename iterator_traits<_RandomAccessIterator1>::difference_type; 225971e9c80SNikolas Klauser using value_type = typename iterator_traits<_RandomAccessIterator1>::value_type; 226*f6958523SNikolas Klauser using __skip_table_type _LIBCPP_NODEBUG = 2279783f28cSLouis Dionne _BMSkipTable<value_type, 228971e9c80SNikolas Klauser difference_type, 229971e9c80SNikolas Klauser _Hash, 230971e9c80SNikolas Klauser _BinaryPredicate, 2319783f28cSLouis Dionne is_integral_v<value_type> && sizeof(value_type) == 1 && is_same_v<_Hash, hash<value_type>> && 2329783f28cSLouis Dionne is_same_v<_BinaryPredicate, equal_to<>>>; 2339783f28cSLouis Dionne 234971e9c80SNikolas Klauser public: 2359783f28cSLouis Dionne _LIBCPP_HIDE_FROM_ABI boyer_moore_horspool_searcher( 2369783f28cSLouis Dionne _RandomAccessIterator1 __first, 237971e9c80SNikolas Klauser _RandomAccessIterator1 __last, 238971e9c80SNikolas Klauser _Hash __hash = _Hash(), 239971e9c80SNikolas Klauser _BinaryPredicate __pred = _BinaryPredicate()) 240971e9c80SNikolas Klauser : __first_(__first), 241971e9c80SNikolas Klauser __last_(__last), 242971e9c80SNikolas Klauser __pred_(__pred), 243971e9c80SNikolas Klauser __pattern_length_(__last - __first), 244971e9c80SNikolas Klauser __skip_table_(std::make_shared<__skip_table_type>(__pattern_length_, __pattern_length_, __hash, __pred_)) { 245971e9c80SNikolas Klauser if (__first == __last) 246971e9c80SNikolas Klauser return; 247971e9c80SNikolas Klauser --__last; 248971e9c80SNikolas Klauser difference_type __i = 0; 249971e9c80SNikolas Klauser while (__first != __last) { 250971e9c80SNikolas Klauser __skip_table_->insert(*__first, __pattern_length_ - 1 - __i); 251971e9c80SNikolas Klauser ++__first; 252971e9c80SNikolas Klauser ++__i; 253971e9c80SNikolas Klauser } 254971e9c80SNikolas Klauser } 255971e9c80SNikolas Klauser 256971e9c80SNikolas Klauser template <class _RandomAccessIterator2> 25783ce1397SNikolas Klauser _LIBCPP_HIDE_FROM_ABI pair<_RandomAccessIterator2, _RandomAccessIterator2> 258971e9c80SNikolas Klauser operator()(_RandomAccessIterator2 __first, _RandomAccessIterator2 __last) const { 259971e9c80SNikolas Klauser static_assert(__is_same_uncvref<typename std::iterator_traits<_RandomAccessIterator1>::value_type, 260971e9c80SNikolas Klauser typename std::iterator_traits<_RandomAccessIterator2>::value_type>::value, 261971e9c80SNikolas Klauser "Corpus and Pattern iterators must point to the same type"); 262971e9c80SNikolas Klauser if (__first == __last) 263971e9c80SNikolas Klauser return std::make_pair(__last, __last); 264971e9c80SNikolas Klauser if (__first_ == __last_) 265971e9c80SNikolas Klauser return std::make_pair(__first, __first); 266971e9c80SNikolas Klauser 267971e9c80SNikolas Klauser if (__pattern_length_ > __last - __first) 268971e9c80SNikolas Klauser return std::make_pair(__last, __last); 269971e9c80SNikolas Klauser 270971e9c80SNikolas Klauser return __search(__first, __last); 271971e9c80SNikolas Klauser } 272971e9c80SNikolas Klauser 273971e9c80SNikolas Klauser private: 274971e9c80SNikolas Klauser _RandomAccessIterator1 __first_; 275971e9c80SNikolas Klauser _RandomAccessIterator1 __last_; 276971e9c80SNikolas Klauser _BinaryPredicate __pred_; 277971e9c80SNikolas Klauser difference_type __pattern_length_; 278971e9c80SNikolas Klauser shared_ptr<__skip_table_type> __skip_table_; 279971e9c80SNikolas Klauser 280971e9c80SNikolas Klauser template <class _RandomAccessIterator2> 28183ce1397SNikolas Klauser _LIBCPP_HIDE_FROM_ABI pair<_RandomAccessIterator2, _RandomAccessIterator2> 282971e9c80SNikolas Klauser __search(_RandomAccessIterator2 __f, _RandomAccessIterator2 __l) const { 283971e9c80SNikolas Klauser _RandomAccessIterator2 __current = __f; 284971e9c80SNikolas Klauser const _RandomAccessIterator2 __last = __l - __pattern_length_; 285971e9c80SNikolas Klauser const __skip_table_type& __skip_table = *__skip_table_; 286971e9c80SNikolas Klauser 287971e9c80SNikolas Klauser while (__current <= __last) { 288971e9c80SNikolas Klauser difference_type __j = __pattern_length_; 289971e9c80SNikolas Klauser while (__pred_(__first_[__j - 1], __current[__j - 1])) { 290971e9c80SNikolas Klauser --__j; 291971e9c80SNikolas Klauser if (__j == 0) 292971e9c80SNikolas Klauser return std::make_pair(__current, __current + __pattern_length_); 293971e9c80SNikolas Klauser } 294971e9c80SNikolas Klauser __current += __skip_table[__current[__pattern_length_ - 1]]; 295971e9c80SNikolas Klauser } 296971e9c80SNikolas Klauser return std::make_pair(__l, __l); 297971e9c80SNikolas Klauser } 298971e9c80SNikolas Klauser }; 299c2df7076SLouis Dionne _LIBCPP_CTAD_SUPPORTED_FOR_TYPE(boyer_moore_horspool_searcher); 300971e9c80SNikolas Klauser 301971e9c80SNikolas Klauser _LIBCPP_END_NAMESPACE_STD 302971e9c80SNikolas Klauser 303971e9c80SNikolas Klauser _LIBCPP_POP_MACROS 304971e9c80SNikolas Klauser 3054f15267dSNikolas Klauser #endif // _LIBCPP_STD_VER >= 17 306971e9c80SNikolas Klauser 307971e9c80SNikolas Klauser #endif // _LIBCPP___FUNCTIONAL_BOYER_MOORE_SEARCHER_H 308