master
  1//===----------------------------------------------------------------------===//
  2//
  3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4// See https://llvm.org/LICENSE.txt for license information.
  5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6//
  7//===----------------------------------------------------------------------===//
  8
  9#ifndef _LIBCPP___PSTL_CPU_ALGOS_FIND_IF_H
 10#define _LIBCPP___PSTL_CPU_ALGOS_FIND_IF_H
 11
 12#include <__algorithm/find_if.h>
 13#include <__assert>
 14#include <__atomic/atomic.h>
 15#include <__config>
 16#include <__functional/operations.h>
 17#include <__iterator/concepts.h>
 18#include <__iterator/iterator_traits.h>
 19#include <__pstl/backend_fwd.h>
 20#include <__pstl/cpu_algos/cpu_traits.h>
 21#include <__type_traits/is_execution_policy.h>
 22#include <__utility/move.h>
 23#include <__utility/pair.h>
 24#include <optional>
 25
 26#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
 27#  pragma GCC system_header
 28#endif
 29
 30_LIBCPP_PUSH_MACROS
 31#include <__undef_macros>
 32
 33#if _LIBCPP_STD_VER >= 17
 34
 35_LIBCPP_BEGIN_NAMESPACE_STD
 36namespace __pstl {
 37
 38template <class _Backend, class _Index, class _Brick, class _Compare>
 39_LIBCPP_HIDE_FROM_ABI optional<_Index>
 40__parallel_find(_Index __first, _Index __last, _Brick __f, _Compare __comp, bool __b_first) {
 41  typedef typename std::iterator_traits<_Index>::difference_type _DifferenceType;
 42  const _DifferenceType __n      = __last - __first;
 43  _DifferenceType __initial_dist = __b_first ? __n : -1;
 44  std::atomic<_DifferenceType> __extremum(__initial_dist);
 45  // TODO: find out what is better here: parallel_for or parallel_reduce
 46  auto __res =
 47      __cpu_traits<_Backend>::__for_each(__first, __last, [__comp, __f, __first, &__extremum](_Index __i, _Index __j) {
 48        // See "Reducing Contention Through Priority Updates", PPoPP '13, for discussion of
 49        // why using a shared variable scales fairly well in this situation.
 50        if (__comp(__i - __first, __extremum)) {
 51          _Index __result = __f(__i, __j);
 52          // If not '__last' returned then we found what we want so put this to extremum
 53          if (__result != __j) {
 54            const _DifferenceType __k = __result - __first;
 55            for (_DifferenceType __old = __extremum; __comp(__k, __old); __old = __extremum) {
 56              __extremum.compare_exchange_weak(__old, __k);
 57            }
 58          }
 59        }
 60      });
 61  if (!__res)
 62    return nullopt;
 63  return __extremum.load() != __initial_dist ? __first + __extremum.load() : __last;
 64}
 65
 66template <class _Backend, class _Index, class _DifferenceType, class _Compare>
 67_LIBCPP_HIDE_FROM_ABI _Index
 68__simd_first(_Index __first, _DifferenceType __begin, _DifferenceType __end, _Compare __comp) noexcept {
 69  // Experiments show good block sizes like this
 70  const _DifferenceType __block_size                                                = 8;
 71  alignas(__cpu_traits<_Backend>::__lane_size) _DifferenceType __lane[__block_size] = {0};
 72  while (__end - __begin >= __block_size) {
 73    _DifferenceType __found = 0;
 74    _PSTL_PRAGMA_SIMD_REDUCTION(| : __found) for (_DifferenceType __i = __begin; __i < __begin + __block_size; ++__i) {
 75      const _DifferenceType __t = __comp(__first, __i);
 76      __lane[__i - __begin]     = __t;
 77      __found |= __t;
 78    }
 79    if (__found) {
 80      _DifferenceType __i;
 81      // This will vectorize
 82      for (__i = 0; __i < __block_size; ++__i) {
 83        if (__lane[__i]) {
 84          break;
 85        }
 86      }
 87      return __first + __begin + __i;
 88    }
 89    __begin += __block_size;
 90  }
 91
 92  // Keep remainder scalar
 93  while (__begin != __end) {
 94    if (__comp(__first, __begin)) {
 95      return __first + __begin;
 96    }
 97    ++__begin;
 98  }
 99  return __first + __end;
100}
101
102template <class _Backend, class _RawExecutionPolicy>
103struct __cpu_parallel_find_if {
104  template <class _Policy, class _ForwardIterator, class _Predicate>
105  _LIBCPP_HIDE_FROM_ABI optional<_ForwardIterator>
106  operator()(_Policy&& __policy, _ForwardIterator __first, _ForwardIterator __last, _Predicate __pred) const noexcept {
107    if constexpr (__is_parallel_execution_policy_v<_RawExecutionPolicy> &&
108                  __has_random_access_iterator_category_or_concept<_ForwardIterator>::value) {
109      return __pstl::__parallel_find<_Backend>(
110          __first,
111          __last,
112          [&__policy, &__pred](_ForwardIterator __brick_first, _ForwardIterator __brick_last) {
113            using _FindIfUnseq = __pstl::__find_if<_Backend, __remove_parallel_policy_t<_RawExecutionPolicy>>;
114            auto __res = _FindIfUnseq()(std::__remove_parallel_policy(__policy), __brick_first, __brick_last, __pred);
115            _LIBCPP_ASSERT_INTERNAL(__res, "unseq/seq should never try to allocate!");
116            return *std::move(__res);
117          },
118          less<>{},
119          true);
120    } else if constexpr (__is_unsequenced_execution_policy_v<_RawExecutionPolicy> &&
121                         __has_random_access_iterator_category_or_concept<_ForwardIterator>::value) {
122      using __diff_t = __iter_diff_t<_ForwardIterator>;
123      return __pstl::__simd_first<_Backend>(
124          __first, __diff_t(0), __last - __first, [&__pred](_ForwardIterator __iter, __diff_t __i) {
125            return __pred(__iter[__i]);
126          });
127    } else {
128      return std::find_if(__first, __last, __pred);
129    }
130  }
131};
132
133} // namespace __pstl
134_LIBCPP_END_NAMESPACE_STD
135
136#endif // _LIBCPP_STD_VER >= 17
137
138_LIBCPP_POP_MACROS
139
140#endif // _LIBCPP___PSTL_CPU_ALGOS_FIND_IF_H