1e78f53d1SNikolas Klauser //===----------------------------------------------------------------------===// 2e78f53d1SNikolas Klauser // 3e78f53d1SNikolas Klauser // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4e78f53d1SNikolas Klauser // See https://llvm.org/LICENSE.txt for license information. 5e78f53d1SNikolas Klauser // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6e78f53d1SNikolas Klauser // 7e78f53d1SNikolas Klauser //===----------------------------------------------------------------------===// 8e78f53d1SNikolas Klauser 9*ce777190SNikolas Klauser #ifndef _LIBCPP___CXX03___ALGORITHM_SIMD_UTILS_H 10*ce777190SNikolas Klauser #define _LIBCPP___CXX03___ALGORITHM_SIMD_UTILS_H 11e78f53d1SNikolas Klauser 1273fbae83SNikolas Klauser #include <__cxx03/__algorithm/min.h> 1373fbae83SNikolas Klauser #include <__cxx03/__bit/bit_cast.h> 1473fbae83SNikolas Klauser #include <__cxx03/__bit/countl.h> 1573fbae83SNikolas Klauser #include <__cxx03/__bit/countr.h> 1673fbae83SNikolas Klauser #include <__cxx03/__config> 1773fbae83SNikolas Klauser #include <__cxx03/__type_traits/is_arithmetic.h> 1873fbae83SNikolas Klauser #include <__cxx03/__type_traits/is_same.h> 1973fbae83SNikolas Klauser #include <__cxx03/__utility/integer_sequence.h> 2073fbae83SNikolas Klauser #include <__cxx03/cstddef> 2173fbae83SNikolas Klauser #include <__cxx03/cstdint> 22e78f53d1SNikolas Klauser 23e78f53d1SNikolas Klauser #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) 24e78f53d1SNikolas Klauser # pragma GCC system_header 25e78f53d1SNikolas Klauser #endif 26e78f53d1SNikolas Klauser 27e78f53d1SNikolas Klauser _LIBCPP_PUSH_MACROS 2873fbae83SNikolas Klauser #include <__cxx03/__undef_macros> 29e78f53d1SNikolas Klauser 30e78f53d1SNikolas Klauser // TODO: Find out how altivec changes things and allow vectorizations there too. 31e78f53d1SNikolas Klauser #if _LIBCPP_STD_VER >= 14 && defined(_LIBCPP_CLANG_VER) && !defined(__ALTIVEC__) 32e78f53d1SNikolas Klauser # define _LIBCPP_HAS_ALGORITHM_VECTOR_UTILS 1 33e78f53d1SNikolas Klauser #else 34e78f53d1SNikolas Klauser # define _LIBCPP_HAS_ALGORITHM_VECTOR_UTILS 0 35e78f53d1SNikolas Klauser #endif 36e78f53d1SNikolas Klauser 37e78f53d1SNikolas Klauser #if _LIBCPP_HAS_ALGORITHM_VECTOR_UTILS && !defined(__OPTIMIZE_SIZE__) 38e78f53d1SNikolas Klauser # define _LIBCPP_VECTORIZE_ALGORITHMS 1 39e78f53d1SNikolas Klauser #else 40e78f53d1SNikolas Klauser # define _LIBCPP_VECTORIZE_ALGORITHMS 0 41e78f53d1SNikolas Klauser #endif 42e78f53d1SNikolas Klauser 43e78f53d1SNikolas Klauser #if _LIBCPP_HAS_ALGORITHM_VECTOR_UTILS 44e78f53d1SNikolas Klauser 45e78f53d1SNikolas Klauser _LIBCPP_BEGIN_NAMESPACE_STD 46e78f53d1SNikolas Klauser 47e78f53d1SNikolas Klauser template <class _Tp> 48e78f53d1SNikolas Klauser inline constexpr bool __can_map_to_integer_v = 49e78f53d1SNikolas Klauser sizeof(_Tp) == alignof(_Tp) && (sizeof(_Tp) == 1 || sizeof(_Tp) == 2 || sizeof(_Tp) == 4 || sizeof(_Tp) == 8); 50e78f53d1SNikolas Klauser 51e78f53d1SNikolas Klauser template <size_t _TypeSize> 52e78f53d1SNikolas Klauser struct __get_as_integer_type_impl; 53e78f53d1SNikolas Klauser 54e78f53d1SNikolas Klauser template <> 55e78f53d1SNikolas Klauser struct __get_as_integer_type_impl<1> { 56e78f53d1SNikolas Klauser using type = uint8_t; 57e78f53d1SNikolas Klauser }; 58e78f53d1SNikolas Klauser 59e78f53d1SNikolas Klauser template <> 60e78f53d1SNikolas Klauser struct __get_as_integer_type_impl<2> { 61e78f53d1SNikolas Klauser using type = uint16_t; 62e78f53d1SNikolas Klauser }; 63e78f53d1SNikolas Klauser template <> 64e78f53d1SNikolas Klauser struct __get_as_integer_type_impl<4> { 65e78f53d1SNikolas Klauser using type = uint32_t; 66e78f53d1SNikolas Klauser }; 67e78f53d1SNikolas Klauser template <> 68e78f53d1SNikolas Klauser struct __get_as_integer_type_impl<8> { 69e78f53d1SNikolas Klauser using type = uint64_t; 70e78f53d1SNikolas Klauser }; 71e78f53d1SNikolas Klauser 72e78f53d1SNikolas Klauser template <class _Tp> 73e78f53d1SNikolas Klauser using __get_as_integer_type_t = typename __get_as_integer_type_impl<sizeof(_Tp)>::type; 74e78f53d1SNikolas Klauser 75e78f53d1SNikolas Klauser // This isn't specialized for 64 byte vectors on purpose. They have the potential to significantly reduce performance 76e78f53d1SNikolas Klauser // in mixed simd/non-simd workloads and don't provide any performance improvement for currently vectorized algorithms 77e78f53d1SNikolas Klauser // as far as benchmarks are concerned. 78e78f53d1SNikolas Klauser # if defined(__AVX__) || defined(__MVS__) 79e78f53d1SNikolas Klauser template <class _Tp> 80e78f53d1SNikolas Klauser inline constexpr size_t __native_vector_size = 32 / sizeof(_Tp); 81e78f53d1SNikolas Klauser # elif defined(__SSE__) || defined(__ARM_NEON__) 82e78f53d1SNikolas Klauser template <class _Tp> 83e78f53d1SNikolas Klauser inline constexpr size_t __native_vector_size = 16 / sizeof(_Tp); 84e78f53d1SNikolas Klauser # elif defined(__MMX__) 85e78f53d1SNikolas Klauser template <class _Tp> 86e78f53d1SNikolas Klauser inline constexpr size_t __native_vector_size = 8 / sizeof(_Tp); 87e78f53d1SNikolas Klauser # else 88e78f53d1SNikolas Klauser template <class _Tp> 89e78f53d1SNikolas Klauser inline constexpr size_t __native_vector_size = 1; 90e78f53d1SNikolas Klauser # endif 91e78f53d1SNikolas Klauser 92e78f53d1SNikolas Klauser template <class _ArithmeticT, size_t _Np> 93e78f53d1SNikolas Klauser using __simd_vector __attribute__((__ext_vector_type__(_Np))) = _ArithmeticT; 94e78f53d1SNikolas Klauser 95e78f53d1SNikolas Klauser template <class _VecT> 96e78f53d1SNikolas Klauser inline constexpr size_t __simd_vector_size_v = []<bool _False = false>() -> size_t { 97e78f53d1SNikolas Klauser static_assert(_False, "Not a vector!"); 98e78f53d1SNikolas Klauser }(); 99e78f53d1SNikolas Klauser 100e78f53d1SNikolas Klauser template <class _Tp, size_t _Np> 101e78f53d1SNikolas Klauser inline constexpr size_t __simd_vector_size_v<__simd_vector<_Tp, _Np>> = _Np; 102e78f53d1SNikolas Klauser 103e78f53d1SNikolas Klauser template <class _Tp, size_t _Np> 104e78f53d1SNikolas Klauser _LIBCPP_HIDE_FROM_ABI _Tp __simd_vector_underlying_type_impl(__simd_vector<_Tp, _Np>) { 105e78f53d1SNikolas Klauser return _Tp{}; 106e78f53d1SNikolas Klauser } 107e78f53d1SNikolas Klauser 108e78f53d1SNikolas Klauser template <class _VecT> 109e78f53d1SNikolas Klauser using __simd_vector_underlying_type_t = decltype(std::__simd_vector_underlying_type_impl(_VecT{})); 110e78f53d1SNikolas Klauser 111e78f53d1SNikolas Klauser // This isn't inlined without always_inline when loading chars. 112e78f53d1SNikolas Klauser template <class _VecT, class _Iter> 113e78f53d1SNikolas Klauser _LIBCPP_NODISCARD _LIBCPP_ALWAYS_INLINE _LIBCPP_HIDE_FROM_ABI _VecT __load_vector(_Iter __iter) noexcept { 114e78f53d1SNikolas Klauser return [=]<size_t... _Indices>(index_sequence<_Indices...>) _LIBCPP_ALWAYS_INLINE noexcept { 115e78f53d1SNikolas Klauser return _VecT{__iter[_Indices]...}; 116e78f53d1SNikolas Klauser }(make_index_sequence<__simd_vector_size_v<_VecT>>{}); 117e78f53d1SNikolas Klauser } 118e78f53d1SNikolas Klauser 119e78f53d1SNikolas Klauser template <class _Tp, size_t _Np> 120e78f53d1SNikolas Klauser _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI bool __all_of(__simd_vector<_Tp, _Np> __vec) noexcept { 121e78f53d1SNikolas Klauser return __builtin_reduce_and(__builtin_convertvector(__vec, __simd_vector<bool, _Np>)); 122e78f53d1SNikolas Klauser } 123e78f53d1SNikolas Klauser 124e78f53d1SNikolas Klauser template <class _Tp, size_t _Np> 125e78f53d1SNikolas Klauser _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI size_t __find_first_set(__simd_vector<_Tp, _Np> __vec) noexcept { 126e78f53d1SNikolas Klauser using __mask_vec = __simd_vector<bool, _Np>; 127e78f53d1SNikolas Klauser 128e78f53d1SNikolas Klauser // This has MSan disabled du to https://github.com/llvm/llvm-project/issues/85876 129e78f53d1SNikolas Klauser auto __impl = [&]<class _MaskT>(_MaskT) _LIBCPP_NO_SANITIZE("memory") noexcept { 130e78f53d1SNikolas Klauser # if defined(_LIBCPP_BIG_ENDIAN) 131e78f53d1SNikolas Klauser return std::min<size_t>( 132e78f53d1SNikolas Klauser _Np, std::__countl_zero(__builtin_bit_cast(_MaskT, __builtin_convertvector(__vec, __mask_vec)))); 133e78f53d1SNikolas Klauser # else 134e78f53d1SNikolas Klauser return std::min<size_t>( 135e78f53d1SNikolas Klauser _Np, std::__countr_zero(__builtin_bit_cast(_MaskT, __builtin_convertvector(__vec, __mask_vec)))); 136e78f53d1SNikolas Klauser # endif 137e78f53d1SNikolas Klauser }; 138e78f53d1SNikolas Klauser 139e78f53d1SNikolas Klauser if constexpr (sizeof(__mask_vec) == sizeof(uint8_t)) { 140e78f53d1SNikolas Klauser return __impl(uint8_t{}); 141e78f53d1SNikolas Klauser } else if constexpr (sizeof(__mask_vec) == sizeof(uint16_t)) { 142e78f53d1SNikolas Klauser return __impl(uint16_t{}); 143e78f53d1SNikolas Klauser } else if constexpr (sizeof(__mask_vec) == sizeof(uint32_t)) { 144e78f53d1SNikolas Klauser return __impl(uint32_t{}); 145e78f53d1SNikolas Klauser } else if constexpr (sizeof(__mask_vec) == sizeof(uint64_t)) { 146e78f53d1SNikolas Klauser return __impl(uint64_t{}); 147e78f53d1SNikolas Klauser } else { 148e78f53d1SNikolas Klauser static_assert(sizeof(__mask_vec) == 0, "unexpected required size for mask integer type"); 149e78f53d1SNikolas Klauser return 0; 150e78f53d1SNikolas Klauser } 151e78f53d1SNikolas Klauser } 152e78f53d1SNikolas Klauser 153e78f53d1SNikolas Klauser template <class _Tp, size_t _Np> 154e78f53d1SNikolas Klauser _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI size_t __find_first_not_set(__simd_vector<_Tp, _Np> __vec) noexcept { 155e78f53d1SNikolas Klauser return std::__find_first_set(~__vec); 156e78f53d1SNikolas Klauser } 157e78f53d1SNikolas Klauser 158e78f53d1SNikolas Klauser _LIBCPP_END_NAMESPACE_STD 159e78f53d1SNikolas Klauser 160e78f53d1SNikolas Klauser #endif // _LIBCPP_HAS_ALGORITHM_VECTOR_UTILS 161e78f53d1SNikolas Klauser 162e78f53d1SNikolas Klauser _LIBCPP_POP_MACROS 163e78f53d1SNikolas Klauser 164*ce777190SNikolas Klauser #endif // _LIBCPP___CXX03___ALGORITHM_SIMD_UTILS_H 165