master
  1//===----------------------------------------------------------------------===//
  2//
  3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4// See https://llvm.org/LICENSE.txt for license information.
  5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6//
  7//===----------------------------------------------------------------------===//
  8
  9#ifndef _LIBCPP___FUNCTIONAL_BOYER_MOORE_SEARCHER_H
 10#define _LIBCPP___FUNCTIONAL_BOYER_MOORE_SEARCHER_H
 11
 12#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
 13#  pragma GCC system_header
 14#endif
 15
 16#include <__algorithm/fill_n.h>
 17#include <__config>
 18#include <__functional/hash.h>
 19#include <__functional/operations.h>
 20#include <__iterator/iterator_traits.h>
 21#include <__memory/shared_ptr.h>
 22#include <__type_traits/make_unsigned.h>
 23#include <__utility/pair.h>
 24#include <array>
 25#include <limits>
 26#include <unordered_map>
 27
 28#if _LIBCPP_STD_VER >= 17
 29
 30_LIBCPP_PUSH_MACROS
 31#  include <__undef_macros>
 32
 33_LIBCPP_BEGIN_NAMESPACE_STD
 34
 35template <class _Key, class _Value, class _Hash, class _BinaryPredicate, bool /*useArray*/>
 36class _BMSkipTable;
 37
 38// General case for BM data searching; use a map
 39template <class _Key, class _Value, class _Hash, class _BinaryPredicate>
 40class _BMSkipTable<_Key, _Value, _Hash, _BinaryPredicate, false> {
 41private:
 42  using value_type = _Value;
 43  using key_type   = _Key;
 44
 45  const value_type __default_value_;
 46  unordered_map<_Key, _Value, _Hash, _BinaryPredicate> __table_;
 47
 48public:
 49  _LIBCPP_HIDE_FROM_ABI explicit _BMSkipTable(
 50      size_t __sz, value_type __default_value, _Hash __hash, _BinaryPredicate __pred)
 51      : __default_value_(__default_value), __table_(__sz, __hash, __pred) {}
 52
 53  _LIBCPP_HIDE_FROM_ABI void insert(const key_type& __key, value_type __val) { __table_[__key] = __val; }
 54
 55  _LIBCPP_HIDE_FROM_ABI value_type operator[](const key_type& __key) const {
 56    auto __it = __table_.find(__key);
 57    return __it == __table_.end() ? __default_value_ : __it->second;
 58  }
 59};
 60
 61// Special case small numeric values; use an array
 62template <class _Key, class _Value, class _Hash, class _BinaryPredicate>
 63class _BMSkipTable<_Key, _Value, _Hash, _BinaryPredicate, true> {
 64private:
 65  using value_type = _Value;
 66  using key_type   = _Key;
 67
 68  using unsigned_key_type = make_unsigned_t<key_type>;
 69  std::array<value_type, 256> __table_;
 70  static_assert(numeric_limits<unsigned_key_type>::max() < 256);
 71
 72public:
 73  _LIBCPP_HIDE_FROM_ABI explicit _BMSkipTable(size_t, value_type __default_value, _Hash, _BinaryPredicate) {
 74    std::fill_n(__table_.data(), __table_.size(), __default_value);
 75  }
 76
 77  _LIBCPP_HIDE_FROM_ABI void insert(key_type __key, value_type __val) {
 78    __table_[static_cast<unsigned_key_type>(__key)] = __val;
 79  }
 80
 81  _LIBCPP_HIDE_FROM_ABI value_type operator[](key_type __key) const {
 82    return __table_[static_cast<unsigned_key_type>(__key)];
 83  }
 84};
 85
 86template <class _RandomAccessIterator1,
 87          class _Hash            = hash<typename iterator_traits<_RandomAccessIterator1>::value_type>,
 88          class _BinaryPredicate = equal_to<>>
 89class boyer_moore_searcher {
 90private:
 91  using difference_type = typename std::iterator_traits<_RandomAccessIterator1>::difference_type;
 92  using value_type      = typename std::iterator_traits<_RandomAccessIterator1>::value_type;
 93  using __skip_table_type _LIBCPP_NODEBUG =
 94      _BMSkipTable<value_type,
 95                   difference_type,
 96                   _Hash,
 97                   _BinaryPredicate,
 98                   is_integral_v<value_type> && sizeof(value_type) == 1 && is_same_v<_Hash, hash<value_type>> &&
 99                       is_same_v<_BinaryPredicate, equal_to<>>>;
100
101public:
102  _LIBCPP_HIDE_FROM_ABI boyer_moore_searcher(
103      _RandomAccessIterator1 __first,
104      _RandomAccessIterator1 __last,
105      _Hash __hash            = _Hash(),
106      _BinaryPredicate __pred = _BinaryPredicate())
107      : __first_(__first),
108        __last_(__last),
109        __pred_(__pred),
110        __pattern_length_(__last - __first),
111        __skip_table_(std::make_shared<__skip_table_type>(__pattern_length_, -1, __hash, __pred_)),
112        __suffix_(std::__allocate_shared_unbounded_array<difference_type[]>(
113            allocator<difference_type>(), __pattern_length_ + 1)) {
114    difference_type __i = 0;
115    while (__first != __last) {
116      __skip_table_->insert(*__first, __i);
117      ++__first;
118      ++__i;
119    }
120    __build_suffix_table(__first_, __last_, __pred_);
121  }
122
123  template <class _RandomAccessIterator2>
124  _LIBCPP_HIDE_FROM_ABI pair<_RandomAccessIterator2, _RandomAccessIterator2>
125  operator()(_RandomAccessIterator2 __first, _RandomAccessIterator2 __last) const {
126    static_assert(is_same_v<__remove_cvref_t<typename iterator_traits<_RandomAccessIterator1>::value_type>,
127                            __remove_cvref_t<typename iterator_traits<_RandomAccessIterator2>::value_type>>,
128                  "Corpus and Pattern iterators must point to the same type");
129    if (__first == __last)
130      return std::make_pair(__last, __last);
131    if (__first_ == __last_)
132      return std::make_pair(__first, __first);
133
134    if (__pattern_length_ > (__last - __first))
135      return std::make_pair(__last, __last);
136    return __search(__first, __last);
137  }
138
139private:
140  _RandomAccessIterator1 __first_;
141  _RandomAccessIterator1 __last_;
142  _BinaryPredicate __pred_;
143  difference_type __pattern_length_;
144  shared_ptr<__skip_table_type> __skip_table_;
145  shared_ptr<difference_type[]> __suffix_;
146
147  template <class _RandomAccessIterator2>
148  _LIBCPP_HIDE_FROM_ABI pair<_RandomAccessIterator2, _RandomAccessIterator2>
149  __search(_RandomAccessIterator2 __f, _RandomAccessIterator2 __l) const {
150    _RandomAccessIterator2 __current      = __f;
151    const _RandomAccessIterator2 __last   = __l - __pattern_length_;
152    const __skip_table_type& __skip_table = *__skip_table_;
153
154    while (__current <= __last) {
155      difference_type __j = __pattern_length_;
156      while (__pred_(__first_[__j - 1], __current[__j - 1])) {
157        --__j;
158        if (__j == 0)
159          return std::make_pair(__current, __current + __pattern_length_);
160      }
161
162      difference_type __k = __skip_table[__current[__j - 1]];
163      difference_type __m = __j - __k - 1;
164      if (__k < __j && __m > __suffix_[__j])
165        __current += __m;
166      else
167        __current += __suffix_[__j];
168    }
169    return std::make_pair(__l, __l);
170  }
171
172  template <class _Iterator, class _Container>
173  _LIBCPP_HIDE_FROM_ABI void
174  __compute_bm_prefix(_Iterator __first, _Iterator __last, _BinaryPredicate __pred, _Container& __prefix) {
175    const size_t __count = __last - __first;
176
177    __prefix[0] = 0;
178    size_t __k  = 0;
179
180    for (size_t __i = 1; __i != __count; ++__i) {
181      while (__k > 0 && !__pred(__first[__k], __first[__i]))
182        __k = __prefix[__k - 1];
183
184      if (__pred(__first[__k], __first[__i]))
185        ++__k;
186      __prefix[__i] = __k;
187    }
188  }
189
190  _LIBCPP_HIDE_FROM_ABI void
191  __build_suffix_table(_RandomAccessIterator1 __first, _RandomAccessIterator1 __last, _BinaryPredicate __pred) {
192    const size_t __count = __last - __first;
193
194    if (__count == 0)
195      return;
196
197    auto __scratch = std::make_unique<difference_type[]>(__count);
198
199    __compute_bm_prefix(__first, __last, __pred, __scratch);
200    for (size_t __i = 0; __i <= __count; ++__i)
201      __suffix_[__i] = __count - __scratch[__count - 1];
202
203    using _ReverseIter = reverse_iterator<_RandomAccessIterator1>;
204    __compute_bm_prefix(_ReverseIter(__last), _ReverseIter(__first), __pred, __scratch);
205
206    for (size_t __i = 0; __i != __count; ++__i) {
207      const size_t __j          = __count - __scratch[__i];
208      const difference_type __k = __i - __scratch[__i] + 1;
209
210      if (__suffix_[__j] > __k)
211        __suffix_[__j] = __k;
212    }
213  }
214};
215_LIBCPP_CTAD_SUPPORTED_FOR_TYPE(boyer_moore_searcher);
216
217template <class _RandomAccessIterator1,
218          class _Hash            = hash<typename iterator_traits<_RandomAccessIterator1>::value_type>,
219          class _BinaryPredicate = equal_to<>>
220class boyer_moore_horspool_searcher {
221private:
222  using difference_type = typename iterator_traits<_RandomAccessIterator1>::difference_type;
223  using value_type      = typename iterator_traits<_RandomAccessIterator1>::value_type;
224  using __skip_table_type _LIBCPP_NODEBUG =
225      _BMSkipTable<value_type,
226                   difference_type,
227                   _Hash,
228                   _BinaryPredicate,
229                   is_integral_v<value_type> && sizeof(value_type) == 1 && is_same_v<_Hash, hash<value_type>> &&
230                       is_same_v<_BinaryPredicate, equal_to<>>>;
231
232public:
233  _LIBCPP_HIDE_FROM_ABI boyer_moore_horspool_searcher(
234      _RandomAccessIterator1 __first,
235      _RandomAccessIterator1 __last,
236      _Hash __hash            = _Hash(),
237      _BinaryPredicate __pred = _BinaryPredicate())
238      : __first_(__first),
239        __last_(__last),
240        __pred_(__pred),
241        __pattern_length_(__last - __first),
242        __skip_table_(std::make_shared<__skip_table_type>(__pattern_length_, __pattern_length_, __hash, __pred_)) {
243    if (__first == __last)
244      return;
245    --__last;
246    difference_type __i = 0;
247    while (__first != __last) {
248      __skip_table_->insert(*__first, __pattern_length_ - 1 - __i);
249      ++__first;
250      ++__i;
251    }
252  }
253
254  template <class _RandomAccessIterator2>
255  _LIBCPP_HIDE_FROM_ABI pair<_RandomAccessIterator2, _RandomAccessIterator2>
256  operator()(_RandomAccessIterator2 __first, _RandomAccessIterator2 __last) const {
257    static_assert(is_same_v<__remove_cvref_t<typename std::iterator_traits<_RandomAccessIterator1>::value_type>,
258                            __remove_cvref_t<typename std::iterator_traits<_RandomAccessIterator2>::value_type>>,
259                  "Corpus and Pattern iterators must point to the same type");
260    if (__first == __last)
261      return std::make_pair(__last, __last);
262    if (__first_ == __last_)
263      return std::make_pair(__first, __first);
264
265    if (__pattern_length_ > __last - __first)
266      return std::make_pair(__last, __last);
267
268    return __search(__first, __last);
269  }
270
271private:
272  _RandomAccessIterator1 __first_;
273  _RandomAccessIterator1 __last_;
274  _BinaryPredicate __pred_;
275  difference_type __pattern_length_;
276  shared_ptr<__skip_table_type> __skip_table_;
277
278  template <class _RandomAccessIterator2>
279  _LIBCPP_HIDE_FROM_ABI pair<_RandomAccessIterator2, _RandomAccessIterator2>
280  __search(_RandomAccessIterator2 __f, _RandomAccessIterator2 __l) const {
281    _RandomAccessIterator2 __current      = __f;
282    const _RandomAccessIterator2 __last   = __l - __pattern_length_;
283    const __skip_table_type& __skip_table = *__skip_table_;
284
285    while (__current <= __last) {
286      difference_type __j = __pattern_length_;
287      while (__pred_(__first_[__j - 1], __current[__j - 1])) {
288        --__j;
289        if (__j == 0)
290          return std::make_pair(__current, __current + __pattern_length_);
291      }
292      __current += __skip_table[__current[__pattern_length_ - 1]];
293    }
294    return std::make_pair(__l, __l);
295  }
296};
297_LIBCPP_CTAD_SUPPORTED_FOR_TYPE(boyer_moore_horspool_searcher);
298
299_LIBCPP_END_NAMESPACE_STD
300
301_LIBCPP_POP_MACROS
302
303#endif // _LIBCPP_STD_VER >= 17
304
305#endif // _LIBCPP___FUNCTIONAL_BOYER_MOORE_SEARCHER_H