1b68e2ebaSNikolas Klauser //===----------------------------------------------------------------------===// 2b68e2ebaSNikolas Klauser // 3b68e2ebaSNikolas Klauser // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4b68e2ebaSNikolas Klauser // See https://llvm.org/LICENSE.txt for license information. 5b68e2ebaSNikolas Klauser // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6b68e2ebaSNikolas Klauser // 7b68e2ebaSNikolas Klauser //===----------------------------------------------------------------------===// 8b68e2ebaSNikolas Klauser 9b68e2ebaSNikolas Klauser #ifndef _LIBCPP___ALGORITHM_SIMD_UTILS_H 10b68e2ebaSNikolas Klauser #define _LIBCPP___ALGORITHM_SIMD_UTILS_H 11b68e2ebaSNikolas Klauser 12985c1a44SNikolas Klauser #include <__algorithm/min.h> 13b68e2ebaSNikolas Klauser #include <__bit/bit_cast.h> 14ffc3a6b2SZibi Sarbinowski #include <__bit/countl.h> 15b68e2ebaSNikolas Klauser #include <__bit/countr.h> 16b68e2ebaSNikolas Klauser #include <__config> 17e99c4906SNikolas Klauser #include <__cstddef/size_t.h> 18b68e2ebaSNikolas Klauser #include <__type_traits/is_arithmetic.h> 19b68e2ebaSNikolas Klauser #include <__type_traits/is_same.h> 20b68e2ebaSNikolas Klauser #include <__utility/integer_sequence.h> 21b68e2ebaSNikolas Klauser #include <cstdint> 22b68e2ebaSNikolas Klauser 23b68e2ebaSNikolas Klauser #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) 24b68e2ebaSNikolas Klauser # pragma GCC system_header 25b68e2ebaSNikolas Klauser #endif 26b68e2ebaSNikolas Klauser 27985c1a44SNikolas Klauser _LIBCPP_PUSH_MACROS 28985c1a44SNikolas Klauser #include <__undef_macros> 29985c1a44SNikolas Klauser 30b68e2ebaSNikolas Klauser // TODO: Find out how altivec changes things and allow vectorizations there too. 310ad663eaSMark de Wever #if _LIBCPP_STD_VER >= 14 && defined(_LIBCPP_CLANG_VER) && !defined(__ALTIVEC__) 32b68e2ebaSNikolas Klauser # define _LIBCPP_HAS_ALGORITHM_VECTOR_UTILS 1 33b68e2ebaSNikolas Klauser #else 34b68e2ebaSNikolas Klauser # define _LIBCPP_HAS_ALGORITHM_VECTOR_UTILS 0 35b68e2ebaSNikolas Klauser #endif 36b68e2ebaSNikolas Klauser 37b68e2ebaSNikolas Klauser #if _LIBCPP_HAS_ALGORITHM_VECTOR_UTILS && !defined(__OPTIMIZE_SIZE__) 38b68e2ebaSNikolas Klauser # define _LIBCPP_VECTORIZE_ALGORITHMS 1 39b68e2ebaSNikolas Klauser #else 40b68e2ebaSNikolas Klauser # define _LIBCPP_VECTORIZE_ALGORITHMS 0 41b68e2ebaSNikolas Klauser #endif 42b68e2ebaSNikolas Klauser 43b68e2ebaSNikolas Klauser #if _LIBCPP_HAS_ALGORITHM_VECTOR_UTILS 44b68e2ebaSNikolas Klauser 45b68e2ebaSNikolas Klauser _LIBCPP_BEGIN_NAMESPACE_STD 46b68e2ebaSNikolas Klauser 4705cc2d5fSNikolas Klauser template <class _Tp> 4805cc2d5fSNikolas Klauser inline constexpr bool __can_map_to_integer_v = 4905cc2d5fSNikolas Klauser sizeof(_Tp) == alignof(_Tp) && (sizeof(_Tp) == 1 || sizeof(_Tp) == 2 || sizeof(_Tp) == 4 || sizeof(_Tp) == 8); 5005cc2d5fSNikolas Klauser 5105cc2d5fSNikolas Klauser template <size_t _TypeSize> 5205cc2d5fSNikolas Klauser struct __get_as_integer_type_impl; 5305cc2d5fSNikolas Klauser 5405cc2d5fSNikolas Klauser template <> 5505cc2d5fSNikolas Klauser struct __get_as_integer_type_impl<1> { 5605cc2d5fSNikolas Klauser using type = uint8_t; 5705cc2d5fSNikolas Klauser }; 5805cc2d5fSNikolas Klauser 5905cc2d5fSNikolas Klauser template <> 6005cc2d5fSNikolas Klauser struct __get_as_integer_type_impl<2> { 6105cc2d5fSNikolas Klauser using type = uint16_t; 6205cc2d5fSNikolas Klauser }; 6305cc2d5fSNikolas Klauser template <> 6405cc2d5fSNikolas Klauser struct __get_as_integer_type_impl<4> { 6505cc2d5fSNikolas Klauser using type = uint32_t; 6605cc2d5fSNikolas Klauser }; 6705cc2d5fSNikolas Klauser template <> 6805cc2d5fSNikolas Klauser struct __get_as_integer_type_impl<8> { 6905cc2d5fSNikolas Klauser using type = uint64_t; 7005cc2d5fSNikolas Klauser }; 7105cc2d5fSNikolas Klauser 7205cc2d5fSNikolas Klauser template <class _Tp> 73f6958523SNikolas Klauser using __get_as_integer_type_t _LIBCPP_NODEBUG = typename __get_as_integer_type_impl<sizeof(_Tp)>::type; 7405cc2d5fSNikolas Klauser 75b68e2ebaSNikolas Klauser // This isn't specialized for 64 byte vectors on purpose. They have the potential to significantly reduce performance 76b68e2ebaSNikolas Klauser // in mixed simd/non-simd workloads and don't provide any performance improvement for currently vectorized algorithms 77b68e2ebaSNikolas Klauser // as far as benchmarks are concerned. 78af57ad65Szibi2 # if defined(__AVX__) || defined(__MVS__) 79b68e2ebaSNikolas Klauser template <class _Tp> 80b68e2ebaSNikolas Klauser inline constexpr size_t __native_vector_size = 32 / sizeof(_Tp); 81b68e2ebaSNikolas Klauser # elif defined(__SSE__) || defined(__ARM_NEON__) 82b68e2ebaSNikolas Klauser template <class _Tp> 83b68e2ebaSNikolas Klauser inline constexpr size_t __native_vector_size = 16 / sizeof(_Tp); 84b68e2ebaSNikolas Klauser # elif defined(__MMX__) 85b68e2ebaSNikolas Klauser template <class _Tp> 86b68e2ebaSNikolas Klauser inline constexpr size_t __native_vector_size = 8 / sizeof(_Tp); 87b68e2ebaSNikolas Klauser # else 88b68e2ebaSNikolas Klauser template <class _Tp> 89b68e2ebaSNikolas Klauser inline constexpr size_t __native_vector_size = 1; 90b68e2ebaSNikolas Klauser # endif 91b68e2ebaSNikolas Klauser 92b68e2ebaSNikolas Klauser template <class _ArithmeticT, size_t _Np> 93f6958523SNikolas Klauser using __simd_vector __attribute__((__ext_vector_type__(_Np))) _LIBCPP_NODEBUG = _ArithmeticT; 94b68e2ebaSNikolas Klauser 95b68e2ebaSNikolas Klauser template <class _VecT> 96b68e2ebaSNikolas Klauser inline constexpr size_t __simd_vector_size_v = []<bool _False = false>() -> size_t { 97b68e2ebaSNikolas Klauser static_assert(_False, "Not a vector!"); 98b68e2ebaSNikolas Klauser }(); 99b68e2ebaSNikolas Klauser 100b68e2ebaSNikolas Klauser template <class _Tp, size_t _Np> 101b68e2ebaSNikolas Klauser inline constexpr size_t __simd_vector_size_v<__simd_vector<_Tp, _Np>> = _Np; 102b68e2ebaSNikolas Klauser 103b68e2ebaSNikolas Klauser template <class _Tp, size_t _Np> 104b68e2ebaSNikolas Klauser _LIBCPP_HIDE_FROM_ABI _Tp __simd_vector_underlying_type_impl(__simd_vector<_Tp, _Np>) { 105b68e2ebaSNikolas Klauser return _Tp{}; 106b68e2ebaSNikolas Klauser } 107b68e2ebaSNikolas Klauser 108b68e2ebaSNikolas Klauser template <class _VecT> 109f6958523SNikolas Klauser using __simd_vector_underlying_type_t _LIBCPP_NODEBUG = decltype(std::__simd_vector_underlying_type_impl(_VecT{})); 110b68e2ebaSNikolas Klauser 111b68e2ebaSNikolas Klauser // This isn't inlined without always_inline when loading chars. 11205cc2d5fSNikolas Klauser template <class _VecT, class _Iter> 11317e0686aSNikolas Klauser [[__nodiscard__]] _LIBCPP_ALWAYS_INLINE _LIBCPP_HIDE_FROM_ABI _VecT __load_vector(_Iter __iter) noexcept { 114b68e2ebaSNikolas Klauser return [=]<size_t... _Indices>(index_sequence<_Indices...>) _LIBCPP_ALWAYS_INLINE noexcept { 11505cc2d5fSNikolas Klauser return _VecT{__iter[_Indices]...}; 116b68e2ebaSNikolas Klauser }(make_index_sequence<__simd_vector_size_v<_VecT>>{}); 117b68e2ebaSNikolas Klauser } 118b68e2ebaSNikolas Klauser 119*570f0309SVitaly Buka template <class _Tp, size_t _Np> 120*570f0309SVitaly Buka [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI bool __all_of(__simd_vector<_Tp, _Np> __vec) noexcept { 121*570f0309SVitaly Buka return __builtin_reduce_and(__builtin_convertvector(__vec, __simd_vector<bool, _Np>)); 122b68e2ebaSNikolas Klauser } 123b68e2ebaSNikolas Klauser 124b68e2ebaSNikolas Klauser template <class _Tp, size_t _Np> 125*570f0309SVitaly Buka [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI size_t __find_first_set(__simd_vector<_Tp, _Np> __vec) noexcept { 126*570f0309SVitaly Buka using __mask_vec = __simd_vector<bool, _Np>; 127b68e2ebaSNikolas Klauser 128*570f0309SVitaly Buka // This has MSan disabled du to https://github.com/llvm/llvm-project/issues/85876 129*570f0309SVitaly Buka auto __impl = [&]<class _MaskT>(_MaskT) _LIBCPP_NO_SANITIZE("memory") noexcept { 130*570f0309SVitaly Buka # if defined(_LIBCPP_BIG_ENDIAN) 131*570f0309SVitaly Buka return std::min<size_t>( 132*570f0309SVitaly Buka _Np, std::__countl_zero(__builtin_bit_cast(_MaskT, __builtin_convertvector(__vec, __mask_vec)))); 133*570f0309SVitaly Buka # else 134*570f0309SVitaly Buka return std::min<size_t>( 135*570f0309SVitaly Buka _Np, std::__countr_zero(__builtin_bit_cast(_MaskT, __builtin_convertvector(__vec, __mask_vec)))); 136*570f0309SVitaly Buka # endif 137*570f0309SVitaly Buka }; 138ed572f20SVitaly Buka 139*570f0309SVitaly Buka if constexpr (sizeof(__mask_vec) == sizeof(uint8_t)) { 140*570f0309SVitaly Buka return __impl(uint8_t{}); 141*570f0309SVitaly Buka } else if constexpr (sizeof(__mask_vec) == sizeof(uint16_t)) { 142*570f0309SVitaly Buka return __impl(uint16_t{}); 143*570f0309SVitaly Buka } else if constexpr (sizeof(__mask_vec) == sizeof(uint32_t)) { 144*570f0309SVitaly Buka return __impl(uint32_t{}); 145*570f0309SVitaly Buka } else if constexpr (sizeof(__mask_vec) == sizeof(uint64_t)) { 146*570f0309SVitaly Buka return __impl(uint64_t{}); 147ed572f20SVitaly Buka } else { 148*570f0309SVitaly Buka static_assert(sizeof(__mask_vec) == 0, "unexpected required size for mask integer type"); 149b68e2ebaSNikolas Klauser return 0; 150b68e2ebaSNikolas Klauser } 151b68e2ebaSNikolas Klauser } 152b68e2ebaSNikolas Klauser 153*570f0309SVitaly Buka template <class _Tp, size_t _Np> 154*570f0309SVitaly Buka [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI size_t __find_first_not_set(__simd_vector<_Tp, _Np> __vec) noexcept { 155b68e2ebaSNikolas Klauser return std::__find_first_set(~__vec); 156b68e2ebaSNikolas Klauser } 157b68e2ebaSNikolas Klauser 158b68e2ebaSNikolas Klauser _LIBCPP_END_NAMESPACE_STD 159b68e2ebaSNikolas Klauser 160b68e2ebaSNikolas Klauser #endif // _LIBCPP_HAS_ALGORITHM_VECTOR_UTILS 161b68e2ebaSNikolas Klauser 162985c1a44SNikolas Klauser _LIBCPP_POP_MACROS 163985c1a44SNikolas Klauser 164b68e2ebaSNikolas Klauser #endif // _LIBCPP___ALGORITHM_SIMD_UTILS_H 165