master
1//===----------------------------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#ifndef _LIBCPP___FUNCTIONAL_BOYER_MOORE_SEARCHER_H
10#define _LIBCPP___FUNCTIONAL_BOYER_MOORE_SEARCHER_H
11
12#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
13# pragma GCC system_header
14#endif
15
16#include <__algorithm/fill_n.h>
17#include <__config>
18#include <__functional/hash.h>
19#include <__functional/operations.h>
20#include <__iterator/iterator_traits.h>
21#include <__memory/shared_ptr.h>
22#include <__type_traits/make_unsigned.h>
23#include <__utility/pair.h>
24#include <array>
25#include <limits>
26#include <unordered_map>
27
28#if _LIBCPP_STD_VER >= 17
29
30_LIBCPP_PUSH_MACROS
31# include <__undef_macros>
32
33_LIBCPP_BEGIN_NAMESPACE_STD
34
35template <class _Key, class _Value, class _Hash, class _BinaryPredicate, bool /*useArray*/>
36class _BMSkipTable;
37
38// General case for BM data searching; use a map
39template <class _Key, class _Value, class _Hash, class _BinaryPredicate>
40class _BMSkipTable<_Key, _Value, _Hash, _BinaryPredicate, false> {
41private:
42 using value_type = _Value;
43 using key_type = _Key;
44
45 const value_type __default_value_;
46 unordered_map<_Key, _Value, _Hash, _BinaryPredicate> __table_;
47
48public:
49 _LIBCPP_HIDE_FROM_ABI explicit _BMSkipTable(
50 size_t __sz, value_type __default_value, _Hash __hash, _BinaryPredicate __pred)
51 : __default_value_(__default_value), __table_(__sz, __hash, __pred) {}
52
53 _LIBCPP_HIDE_FROM_ABI void insert(const key_type& __key, value_type __val) { __table_[__key] = __val; }
54
55 _LIBCPP_HIDE_FROM_ABI value_type operator[](const key_type& __key) const {
56 auto __it = __table_.find(__key);
57 return __it == __table_.end() ? __default_value_ : __it->second;
58 }
59};
60
61// Special case small numeric values; use an array
62template <class _Key, class _Value, class _Hash, class _BinaryPredicate>
63class _BMSkipTable<_Key, _Value, _Hash, _BinaryPredicate, true> {
64private:
65 using value_type = _Value;
66 using key_type = _Key;
67
68 using unsigned_key_type = make_unsigned_t<key_type>;
69 std::array<value_type, 256> __table_;
70 static_assert(numeric_limits<unsigned_key_type>::max() < 256);
71
72public:
73 _LIBCPP_HIDE_FROM_ABI explicit _BMSkipTable(size_t, value_type __default_value, _Hash, _BinaryPredicate) {
74 std::fill_n(__table_.data(), __table_.size(), __default_value);
75 }
76
77 _LIBCPP_HIDE_FROM_ABI void insert(key_type __key, value_type __val) {
78 __table_[static_cast<unsigned_key_type>(__key)] = __val;
79 }
80
81 _LIBCPP_HIDE_FROM_ABI value_type operator[](key_type __key) const {
82 return __table_[static_cast<unsigned_key_type>(__key)];
83 }
84};
85
86template <class _RandomAccessIterator1,
87 class _Hash = hash<typename iterator_traits<_RandomAccessIterator1>::value_type>,
88 class _BinaryPredicate = equal_to<>>
89class boyer_moore_searcher {
90private:
91 using difference_type = typename std::iterator_traits<_RandomAccessIterator1>::difference_type;
92 using value_type = typename std::iterator_traits<_RandomAccessIterator1>::value_type;
93 using __skip_table_type _LIBCPP_NODEBUG =
94 _BMSkipTable<value_type,
95 difference_type,
96 _Hash,
97 _BinaryPredicate,
98 is_integral_v<value_type> && sizeof(value_type) == 1 && is_same_v<_Hash, hash<value_type>> &&
99 is_same_v<_BinaryPredicate, equal_to<>>>;
100
101public:
102 _LIBCPP_HIDE_FROM_ABI boyer_moore_searcher(
103 _RandomAccessIterator1 __first,
104 _RandomAccessIterator1 __last,
105 _Hash __hash = _Hash(),
106 _BinaryPredicate __pred = _BinaryPredicate())
107 : __first_(__first),
108 __last_(__last),
109 __pred_(__pred),
110 __pattern_length_(__last - __first),
111 __skip_table_(std::make_shared<__skip_table_type>(__pattern_length_, -1, __hash, __pred_)),
112 __suffix_(std::__allocate_shared_unbounded_array<difference_type[]>(
113 allocator<difference_type>(), __pattern_length_ + 1)) {
114 difference_type __i = 0;
115 while (__first != __last) {
116 __skip_table_->insert(*__first, __i);
117 ++__first;
118 ++__i;
119 }
120 __build_suffix_table(__first_, __last_, __pred_);
121 }
122
123 template <class _RandomAccessIterator2>
124 _LIBCPP_HIDE_FROM_ABI pair<_RandomAccessIterator2, _RandomAccessIterator2>
125 operator()(_RandomAccessIterator2 __first, _RandomAccessIterator2 __last) const {
126 static_assert(is_same_v<__remove_cvref_t<typename iterator_traits<_RandomAccessIterator1>::value_type>,
127 __remove_cvref_t<typename iterator_traits<_RandomAccessIterator2>::value_type>>,
128 "Corpus and Pattern iterators must point to the same type");
129 if (__first == __last)
130 return std::make_pair(__last, __last);
131 if (__first_ == __last_)
132 return std::make_pair(__first, __first);
133
134 if (__pattern_length_ > (__last - __first))
135 return std::make_pair(__last, __last);
136 return __search(__first, __last);
137 }
138
139private:
140 _RandomAccessIterator1 __first_;
141 _RandomAccessIterator1 __last_;
142 _BinaryPredicate __pred_;
143 difference_type __pattern_length_;
144 shared_ptr<__skip_table_type> __skip_table_;
145 shared_ptr<difference_type[]> __suffix_;
146
147 template <class _RandomAccessIterator2>
148 _LIBCPP_HIDE_FROM_ABI pair<_RandomAccessIterator2, _RandomAccessIterator2>
149 __search(_RandomAccessIterator2 __f, _RandomAccessIterator2 __l) const {
150 _RandomAccessIterator2 __current = __f;
151 const _RandomAccessIterator2 __last = __l - __pattern_length_;
152 const __skip_table_type& __skip_table = *__skip_table_;
153
154 while (__current <= __last) {
155 difference_type __j = __pattern_length_;
156 while (__pred_(__first_[__j - 1], __current[__j - 1])) {
157 --__j;
158 if (__j == 0)
159 return std::make_pair(__current, __current + __pattern_length_);
160 }
161
162 difference_type __k = __skip_table[__current[__j - 1]];
163 difference_type __m = __j - __k - 1;
164 if (__k < __j && __m > __suffix_[__j])
165 __current += __m;
166 else
167 __current += __suffix_[__j];
168 }
169 return std::make_pair(__l, __l);
170 }
171
172 template <class _Iterator, class _Container>
173 _LIBCPP_HIDE_FROM_ABI void
174 __compute_bm_prefix(_Iterator __first, _Iterator __last, _BinaryPredicate __pred, _Container& __prefix) {
175 const size_t __count = __last - __first;
176
177 __prefix[0] = 0;
178 size_t __k = 0;
179
180 for (size_t __i = 1; __i != __count; ++__i) {
181 while (__k > 0 && !__pred(__first[__k], __first[__i]))
182 __k = __prefix[__k - 1];
183
184 if (__pred(__first[__k], __first[__i]))
185 ++__k;
186 __prefix[__i] = __k;
187 }
188 }
189
190 _LIBCPP_HIDE_FROM_ABI void
191 __build_suffix_table(_RandomAccessIterator1 __first, _RandomAccessIterator1 __last, _BinaryPredicate __pred) {
192 const size_t __count = __last - __first;
193
194 if (__count == 0)
195 return;
196
197 auto __scratch = std::make_unique<difference_type[]>(__count);
198
199 __compute_bm_prefix(__first, __last, __pred, __scratch);
200 for (size_t __i = 0; __i <= __count; ++__i)
201 __suffix_[__i] = __count - __scratch[__count - 1];
202
203 using _ReverseIter = reverse_iterator<_RandomAccessIterator1>;
204 __compute_bm_prefix(_ReverseIter(__last), _ReverseIter(__first), __pred, __scratch);
205
206 for (size_t __i = 0; __i != __count; ++__i) {
207 const size_t __j = __count - __scratch[__i];
208 const difference_type __k = __i - __scratch[__i] + 1;
209
210 if (__suffix_[__j] > __k)
211 __suffix_[__j] = __k;
212 }
213 }
214};
215_LIBCPP_CTAD_SUPPORTED_FOR_TYPE(boyer_moore_searcher);
216
217template <class _RandomAccessIterator1,
218 class _Hash = hash<typename iterator_traits<_RandomAccessIterator1>::value_type>,
219 class _BinaryPredicate = equal_to<>>
220class boyer_moore_horspool_searcher {
221private:
222 using difference_type = typename iterator_traits<_RandomAccessIterator1>::difference_type;
223 using value_type = typename iterator_traits<_RandomAccessIterator1>::value_type;
224 using __skip_table_type _LIBCPP_NODEBUG =
225 _BMSkipTable<value_type,
226 difference_type,
227 _Hash,
228 _BinaryPredicate,
229 is_integral_v<value_type> && sizeof(value_type) == 1 && is_same_v<_Hash, hash<value_type>> &&
230 is_same_v<_BinaryPredicate, equal_to<>>>;
231
232public:
233 _LIBCPP_HIDE_FROM_ABI boyer_moore_horspool_searcher(
234 _RandomAccessIterator1 __first,
235 _RandomAccessIterator1 __last,
236 _Hash __hash = _Hash(),
237 _BinaryPredicate __pred = _BinaryPredicate())
238 : __first_(__first),
239 __last_(__last),
240 __pred_(__pred),
241 __pattern_length_(__last - __first),
242 __skip_table_(std::make_shared<__skip_table_type>(__pattern_length_, __pattern_length_, __hash, __pred_)) {
243 if (__first == __last)
244 return;
245 --__last;
246 difference_type __i = 0;
247 while (__first != __last) {
248 __skip_table_->insert(*__first, __pattern_length_ - 1 - __i);
249 ++__first;
250 ++__i;
251 }
252 }
253
254 template <class _RandomAccessIterator2>
255 _LIBCPP_HIDE_FROM_ABI pair<_RandomAccessIterator2, _RandomAccessIterator2>
256 operator()(_RandomAccessIterator2 __first, _RandomAccessIterator2 __last) const {
257 static_assert(is_same_v<__remove_cvref_t<typename std::iterator_traits<_RandomAccessIterator1>::value_type>,
258 __remove_cvref_t<typename std::iterator_traits<_RandomAccessIterator2>::value_type>>,
259 "Corpus and Pattern iterators must point to the same type");
260 if (__first == __last)
261 return std::make_pair(__last, __last);
262 if (__first_ == __last_)
263 return std::make_pair(__first, __first);
264
265 if (__pattern_length_ > __last - __first)
266 return std::make_pair(__last, __last);
267
268 return __search(__first, __last);
269 }
270
271private:
272 _RandomAccessIterator1 __first_;
273 _RandomAccessIterator1 __last_;
274 _BinaryPredicate __pred_;
275 difference_type __pattern_length_;
276 shared_ptr<__skip_table_type> __skip_table_;
277
278 template <class _RandomAccessIterator2>
279 _LIBCPP_HIDE_FROM_ABI pair<_RandomAccessIterator2, _RandomAccessIterator2>
280 __search(_RandomAccessIterator2 __f, _RandomAccessIterator2 __l) const {
281 _RandomAccessIterator2 __current = __f;
282 const _RandomAccessIterator2 __last = __l - __pattern_length_;
283 const __skip_table_type& __skip_table = *__skip_table_;
284
285 while (__current <= __last) {
286 difference_type __j = __pattern_length_;
287 while (__pred_(__first_[__j - 1], __current[__j - 1])) {
288 --__j;
289 if (__j == 0)
290 return std::make_pair(__current, __current + __pattern_length_);
291 }
292 __current += __skip_table[__current[__pattern_length_ - 1]];
293 }
294 return std::make_pair(__l, __l);
295 }
296};
297_LIBCPP_CTAD_SUPPORTED_FOR_TYPE(boyer_moore_horspool_searcher);
298
299_LIBCPP_END_NAMESPACE_STD
300
301_LIBCPP_POP_MACROS
302
303#endif // _LIBCPP_STD_VER >= 17
304
305#endif // _LIBCPP___FUNCTIONAL_BOYER_MOORE_SEARCHER_H