1 //===----------------------------------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #ifndef _LIBCPP___FUNCTIONAL_BOYER_MOORE_SEARCHER_H 10 #define _LIBCPP___FUNCTIONAL_BOYER_MOORE_SEARCHER_H 11 12 #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) 13 # pragma GCC system_header 14 #endif 15 16 #include <__algorithm/fill_n.h> 17 #include <__config> 18 #include <__functional/hash.h> 19 #include <__functional/operations.h> 20 #include <__iterator/distance.h> 21 #include <__iterator/iterator_traits.h> 22 #include <__memory/shared_ptr.h> 23 #include <__type_traits/make_unsigned.h> 24 #include <__utility/pair.h> 25 #include <__vector/vector.h> 26 #include <array> 27 #include <limits> 28 #include <unordered_map> 29 30 #if _LIBCPP_STD_VER >= 17 31 32 _LIBCPP_PUSH_MACROS 33 # include <__undef_macros> 34 35 _LIBCPP_BEGIN_NAMESPACE_STD 36 37 template <class _Key, class _Value, class _Hash, class _BinaryPredicate, bool /*useArray*/> 38 class _BMSkipTable; 39 40 // General case for BM data searching; use a map 41 template <class _Key, class _Value, class _Hash, class _BinaryPredicate> 42 class _BMSkipTable<_Key, _Value, _Hash, _BinaryPredicate, false> { 43 private: 44 using value_type = _Value; 45 using key_type = _Key; 46 47 const value_type __default_value_; 48 unordered_map<_Key, _Value, _Hash, _BinaryPredicate> __table_; 49 50 public: 51 _LIBCPP_HIDE_FROM_ABI explicit _BMSkipTable( 52 size_t __sz, value_type __default_value, _Hash __hash, _BinaryPredicate __pred) 53 : __default_value_(__default_value), __table_(__sz, __hash, __pred) {} 54 55 _LIBCPP_HIDE_FROM_ABI void insert(const key_type& __key, value_type __val) { __table_[__key] = __val; } 56 57 _LIBCPP_HIDE_FROM_ABI value_type operator[](const key_type& __key) const { 58 auto __it = __table_.find(__key); 59 return __it == __table_.end() ? __default_value_ : __it->second; 60 } 61 }; 62 63 // Special case small numeric values; use an array 64 template <class _Key, class _Value, class _Hash, class _BinaryPredicate> 65 class _BMSkipTable<_Key, _Value, _Hash, _BinaryPredicate, true> { 66 private: 67 using value_type = _Value; 68 using key_type = _Key; 69 70 using unsigned_key_type = make_unsigned_t<key_type>; 71 std::array<value_type, 256> __table_; 72 static_assert(numeric_limits<unsigned_key_type>::max() < 256); 73 74 public: 75 _LIBCPP_HIDE_FROM_ABI explicit _BMSkipTable(size_t, value_type __default_value, _Hash, _BinaryPredicate) { 76 std::fill_n(__table_.data(), __table_.size(), __default_value); 77 } 78 79 _LIBCPP_HIDE_FROM_ABI void insert(key_type __key, value_type __val) { 80 __table_[static_cast<unsigned_key_type>(__key)] = __val; 81 } 82 83 _LIBCPP_HIDE_FROM_ABI value_type operator[](key_type __key) const { 84 return __table_[static_cast<unsigned_key_type>(__key)]; 85 } 86 }; 87 88 template <class _RandomAccessIterator1, 89 class _Hash = hash<typename iterator_traits<_RandomAccessIterator1>::value_type>, 90 class _BinaryPredicate = equal_to<>> 91 class _LIBCPP_TEMPLATE_VIS boyer_moore_searcher { 92 private: 93 using difference_type = typename std::iterator_traits<_RandomAccessIterator1>::difference_type; 94 using value_type = typename std::iterator_traits<_RandomAccessIterator1>::value_type; 95 using __skip_table_type _LIBCPP_NODEBUG = 96 _BMSkipTable<value_type, 97 difference_type, 98 _Hash, 99 _BinaryPredicate, 100 is_integral_v<value_type> && sizeof(value_type) == 1 && is_same_v<_Hash, hash<value_type>> && 101 is_same_v<_BinaryPredicate, equal_to<>>>; 102 103 public: 104 _LIBCPP_HIDE_FROM_ABI boyer_moore_searcher( 105 _RandomAccessIterator1 __first, 106 _RandomAccessIterator1 __last, 107 _Hash __hash = _Hash(), 108 _BinaryPredicate __pred = _BinaryPredicate()) 109 : __first_(__first), 110 __last_(__last), 111 __pred_(__pred), 112 __pattern_length_(__last - __first), 113 __skip_table_(std::make_shared<__skip_table_type>(__pattern_length_, -1, __hash, __pred_)), 114 __suffix_(std::__allocate_shared_unbounded_array<difference_type[]>( 115 allocator<difference_type>(), __pattern_length_ + 1)) { 116 difference_type __i = 0; 117 while (__first != __last) { 118 __skip_table_->insert(*__first, __i); 119 ++__first; 120 ++__i; 121 } 122 __build_suffix_table(__first_, __last_, __pred_); 123 } 124 125 template <class _RandomAccessIterator2> 126 _LIBCPP_HIDE_FROM_ABI pair<_RandomAccessIterator2, _RandomAccessIterator2> 127 operator()(_RandomAccessIterator2 __first, _RandomAccessIterator2 __last) const { 128 static_assert(__is_same_uncvref<typename iterator_traits<_RandomAccessIterator1>::value_type, 129 typename iterator_traits<_RandomAccessIterator2>::value_type>::value, 130 "Corpus and Pattern iterators must point to the same type"); 131 if (__first == __last) 132 return std::make_pair(__last, __last); 133 if (__first_ == __last_) 134 return std::make_pair(__first, __first); 135 136 if (__pattern_length_ > (__last - __first)) 137 return std::make_pair(__last, __last); 138 return __search(__first, __last); 139 } 140 141 private: 142 _RandomAccessIterator1 __first_; 143 _RandomAccessIterator1 __last_; 144 _BinaryPredicate __pred_; 145 difference_type __pattern_length_; 146 shared_ptr<__skip_table_type> __skip_table_; 147 shared_ptr<difference_type[]> __suffix_; 148 149 template <class _RandomAccessIterator2> 150 _LIBCPP_HIDE_FROM_ABI pair<_RandomAccessIterator2, _RandomAccessIterator2> 151 __search(_RandomAccessIterator2 __f, _RandomAccessIterator2 __l) const { 152 _RandomAccessIterator2 __current = __f; 153 const _RandomAccessIterator2 __last = __l - __pattern_length_; 154 const __skip_table_type& __skip_table = *__skip_table_; 155 156 while (__current <= __last) { 157 difference_type __j = __pattern_length_; 158 while (__pred_(__first_[__j - 1], __current[__j - 1])) { 159 --__j; 160 if (__j == 0) 161 return std::make_pair(__current, __current + __pattern_length_); 162 } 163 164 difference_type __k = __skip_table[__current[__j - 1]]; 165 difference_type __m = __j - __k - 1; 166 if (__k < __j && __m > __suffix_[__j]) 167 __current += __m; 168 else 169 __current += __suffix_[__j]; 170 } 171 return std::make_pair(__l, __l); 172 } 173 174 template <class _Iterator, class _Container> 175 _LIBCPP_HIDE_FROM_ABI void 176 __compute_bm_prefix(_Iterator __first, _Iterator __last, _BinaryPredicate __pred, _Container& __prefix) { 177 const size_t __count = __last - __first; 178 179 __prefix[0] = 0; 180 size_t __k = 0; 181 182 for (size_t __i = 1; __i != __count; ++__i) { 183 while (__k > 0 && !__pred(__first[__k], __first[__i])) 184 __k = __prefix[__k - 1]; 185 186 if (__pred(__first[__k], __first[__i])) 187 ++__k; 188 __prefix[__i] = __k; 189 } 190 } 191 192 _LIBCPP_HIDE_FROM_ABI void 193 __build_suffix_table(_RandomAccessIterator1 __first, _RandomAccessIterator1 __last, _BinaryPredicate __pred) { 194 const size_t __count = __last - __first; 195 196 if (__count == 0) 197 return; 198 199 vector<difference_type> __scratch(__count); 200 201 __compute_bm_prefix(__first, __last, __pred, __scratch); 202 for (size_t __i = 0; __i <= __count; ++__i) 203 __suffix_[__i] = __count - __scratch[__count - 1]; 204 205 using _ReverseIter = reverse_iterator<_RandomAccessIterator1>; 206 __compute_bm_prefix(_ReverseIter(__last), _ReverseIter(__first), __pred, __scratch); 207 208 for (size_t __i = 0; __i != __count; ++__i) { 209 const size_t __j = __count - __scratch[__i]; 210 const difference_type __k = __i - __scratch[__i] + 1; 211 212 if (__suffix_[__j] > __k) 213 __suffix_[__j] = __k; 214 } 215 } 216 }; 217 _LIBCPP_CTAD_SUPPORTED_FOR_TYPE(boyer_moore_searcher); 218 219 template <class _RandomAccessIterator1, 220 class _Hash = hash<typename iterator_traits<_RandomAccessIterator1>::value_type>, 221 class _BinaryPredicate = equal_to<>> 222 class _LIBCPP_TEMPLATE_VIS boyer_moore_horspool_searcher { 223 private: 224 using difference_type = typename iterator_traits<_RandomAccessIterator1>::difference_type; 225 using value_type = typename iterator_traits<_RandomAccessIterator1>::value_type; 226 using __skip_table_type _LIBCPP_NODEBUG = 227 _BMSkipTable<value_type, 228 difference_type, 229 _Hash, 230 _BinaryPredicate, 231 is_integral_v<value_type> && sizeof(value_type) == 1 && is_same_v<_Hash, hash<value_type>> && 232 is_same_v<_BinaryPredicate, equal_to<>>>; 233 234 public: 235 _LIBCPP_HIDE_FROM_ABI boyer_moore_horspool_searcher( 236 _RandomAccessIterator1 __first, 237 _RandomAccessIterator1 __last, 238 _Hash __hash = _Hash(), 239 _BinaryPredicate __pred = _BinaryPredicate()) 240 : __first_(__first), 241 __last_(__last), 242 __pred_(__pred), 243 __pattern_length_(__last - __first), 244 __skip_table_(std::make_shared<__skip_table_type>(__pattern_length_, __pattern_length_, __hash, __pred_)) { 245 if (__first == __last) 246 return; 247 --__last; 248 difference_type __i = 0; 249 while (__first != __last) { 250 __skip_table_->insert(*__first, __pattern_length_ - 1 - __i); 251 ++__first; 252 ++__i; 253 } 254 } 255 256 template <class _RandomAccessIterator2> 257 _LIBCPP_HIDE_FROM_ABI pair<_RandomAccessIterator2, _RandomAccessIterator2> 258 operator()(_RandomAccessIterator2 __first, _RandomAccessIterator2 __last) const { 259 static_assert(__is_same_uncvref<typename std::iterator_traits<_RandomAccessIterator1>::value_type, 260 typename std::iterator_traits<_RandomAccessIterator2>::value_type>::value, 261 "Corpus and Pattern iterators must point to the same type"); 262 if (__first == __last) 263 return std::make_pair(__last, __last); 264 if (__first_ == __last_) 265 return std::make_pair(__first, __first); 266 267 if (__pattern_length_ > __last - __first) 268 return std::make_pair(__last, __last); 269 270 return __search(__first, __last); 271 } 272 273 private: 274 _RandomAccessIterator1 __first_; 275 _RandomAccessIterator1 __last_; 276 _BinaryPredicate __pred_; 277 difference_type __pattern_length_; 278 shared_ptr<__skip_table_type> __skip_table_; 279 280 template <class _RandomAccessIterator2> 281 _LIBCPP_HIDE_FROM_ABI pair<_RandomAccessIterator2, _RandomAccessIterator2> 282 __search(_RandomAccessIterator2 __f, _RandomAccessIterator2 __l) const { 283 _RandomAccessIterator2 __current = __f; 284 const _RandomAccessIterator2 __last = __l - __pattern_length_; 285 const __skip_table_type& __skip_table = *__skip_table_; 286 287 while (__current <= __last) { 288 difference_type __j = __pattern_length_; 289 while (__pred_(__first_[__j - 1], __current[__j - 1])) { 290 --__j; 291 if (__j == 0) 292 return std::make_pair(__current, __current + __pattern_length_); 293 } 294 __current += __skip_table[__current[__pattern_length_ - 1]]; 295 } 296 return std::make_pair(__l, __l); 297 } 298 }; 299 _LIBCPP_CTAD_SUPPORTED_FOR_TYPE(boyer_moore_horspool_searcher); 300 301 _LIBCPP_END_NAMESPACE_STD 302 303 _LIBCPP_POP_MACROS 304 305 #endif // _LIBCPP_STD_VER >= 17 306 307 #endif // _LIBCPP___FUNCTIONAL_BOYER_MOORE_SEARCHER_H 308