xref: /llvm-project/libcxx/include/__algorithm/simd_utils.h (revision 570f03096a195be6302747cefda0af13ac70d2eb)
1b68e2ebaSNikolas Klauser //===----------------------------------------------------------------------===//
2b68e2ebaSNikolas Klauser //
3b68e2ebaSNikolas Klauser // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4b68e2ebaSNikolas Klauser // See https://llvm.org/LICENSE.txt for license information.
5b68e2ebaSNikolas Klauser // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6b68e2ebaSNikolas Klauser //
7b68e2ebaSNikolas Klauser //===----------------------------------------------------------------------===//
8b68e2ebaSNikolas Klauser 
9b68e2ebaSNikolas Klauser #ifndef _LIBCPP___ALGORITHM_SIMD_UTILS_H
10b68e2ebaSNikolas Klauser #define _LIBCPP___ALGORITHM_SIMD_UTILS_H
11b68e2ebaSNikolas Klauser 
12985c1a44SNikolas Klauser #include <__algorithm/min.h>
13b68e2ebaSNikolas Klauser #include <__bit/bit_cast.h>
14ffc3a6b2SZibi Sarbinowski #include <__bit/countl.h>
15b68e2ebaSNikolas Klauser #include <__bit/countr.h>
16b68e2ebaSNikolas Klauser #include <__config>
17e99c4906SNikolas Klauser #include <__cstddef/size_t.h>
18b68e2ebaSNikolas Klauser #include <__type_traits/is_arithmetic.h>
19b68e2ebaSNikolas Klauser #include <__type_traits/is_same.h>
20b68e2ebaSNikolas Klauser #include <__utility/integer_sequence.h>
21b68e2ebaSNikolas Klauser #include <cstdint>
22b68e2ebaSNikolas Klauser 
23b68e2ebaSNikolas Klauser #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
24b68e2ebaSNikolas Klauser #  pragma GCC system_header
25b68e2ebaSNikolas Klauser #endif
26b68e2ebaSNikolas Klauser 
27985c1a44SNikolas Klauser _LIBCPP_PUSH_MACROS
28985c1a44SNikolas Klauser #include <__undef_macros>
29985c1a44SNikolas Klauser 
30b68e2ebaSNikolas Klauser // TODO: Find out how altivec changes things and allow vectorizations there too.
310ad663eaSMark de Wever #if _LIBCPP_STD_VER >= 14 && defined(_LIBCPP_CLANG_VER) && !defined(__ALTIVEC__)
32b68e2ebaSNikolas Klauser #  define _LIBCPP_HAS_ALGORITHM_VECTOR_UTILS 1
33b68e2ebaSNikolas Klauser #else
34b68e2ebaSNikolas Klauser #  define _LIBCPP_HAS_ALGORITHM_VECTOR_UTILS 0
35b68e2ebaSNikolas Klauser #endif
36b68e2ebaSNikolas Klauser 
37b68e2ebaSNikolas Klauser #if _LIBCPP_HAS_ALGORITHM_VECTOR_UTILS && !defined(__OPTIMIZE_SIZE__)
38b68e2ebaSNikolas Klauser #  define _LIBCPP_VECTORIZE_ALGORITHMS 1
39b68e2ebaSNikolas Klauser #else
40b68e2ebaSNikolas Klauser #  define _LIBCPP_VECTORIZE_ALGORITHMS 0
41b68e2ebaSNikolas Klauser #endif
42b68e2ebaSNikolas Klauser 
43b68e2ebaSNikolas Klauser #if _LIBCPP_HAS_ALGORITHM_VECTOR_UTILS
44b68e2ebaSNikolas Klauser 
45b68e2ebaSNikolas Klauser _LIBCPP_BEGIN_NAMESPACE_STD
46b68e2ebaSNikolas Klauser 
4705cc2d5fSNikolas Klauser template <class _Tp>
4805cc2d5fSNikolas Klauser inline constexpr bool __can_map_to_integer_v =
4905cc2d5fSNikolas Klauser     sizeof(_Tp) == alignof(_Tp) && (sizeof(_Tp) == 1 || sizeof(_Tp) == 2 || sizeof(_Tp) == 4 || sizeof(_Tp) == 8);
5005cc2d5fSNikolas Klauser 
5105cc2d5fSNikolas Klauser template <size_t _TypeSize>
5205cc2d5fSNikolas Klauser struct __get_as_integer_type_impl;
5305cc2d5fSNikolas Klauser 
5405cc2d5fSNikolas Klauser template <>
5505cc2d5fSNikolas Klauser struct __get_as_integer_type_impl<1> {
5605cc2d5fSNikolas Klauser   using type = uint8_t;
5705cc2d5fSNikolas Klauser };
5805cc2d5fSNikolas Klauser 
5905cc2d5fSNikolas Klauser template <>
6005cc2d5fSNikolas Klauser struct __get_as_integer_type_impl<2> {
6105cc2d5fSNikolas Klauser   using type = uint16_t;
6205cc2d5fSNikolas Klauser };
6305cc2d5fSNikolas Klauser template <>
6405cc2d5fSNikolas Klauser struct __get_as_integer_type_impl<4> {
6505cc2d5fSNikolas Klauser   using type = uint32_t;
6605cc2d5fSNikolas Klauser };
6705cc2d5fSNikolas Klauser template <>
6805cc2d5fSNikolas Klauser struct __get_as_integer_type_impl<8> {
6905cc2d5fSNikolas Klauser   using type = uint64_t;
7005cc2d5fSNikolas Klauser };
7105cc2d5fSNikolas Klauser 
7205cc2d5fSNikolas Klauser template <class _Tp>
73f6958523SNikolas Klauser using __get_as_integer_type_t _LIBCPP_NODEBUG = typename __get_as_integer_type_impl<sizeof(_Tp)>::type;
7405cc2d5fSNikolas Klauser 
75b68e2ebaSNikolas Klauser // This isn't specialized for 64 byte vectors on purpose. They have the potential to significantly reduce performance
76b68e2ebaSNikolas Klauser // in mixed simd/non-simd workloads and don't provide any performance improvement for currently vectorized algorithms
77b68e2ebaSNikolas Klauser // as far as benchmarks are concerned.
78af57ad65Szibi2 #  if defined(__AVX__) || defined(__MVS__)
79b68e2ebaSNikolas Klauser template <class _Tp>
80b68e2ebaSNikolas Klauser inline constexpr size_t __native_vector_size = 32 / sizeof(_Tp);
81b68e2ebaSNikolas Klauser #  elif defined(__SSE__) || defined(__ARM_NEON__)
82b68e2ebaSNikolas Klauser template <class _Tp>
83b68e2ebaSNikolas Klauser inline constexpr size_t __native_vector_size = 16 / sizeof(_Tp);
84b68e2ebaSNikolas Klauser #  elif defined(__MMX__)
85b68e2ebaSNikolas Klauser template <class _Tp>
86b68e2ebaSNikolas Klauser inline constexpr size_t __native_vector_size = 8 / sizeof(_Tp);
87b68e2ebaSNikolas Klauser #  else
88b68e2ebaSNikolas Klauser template <class _Tp>
89b68e2ebaSNikolas Klauser inline constexpr size_t __native_vector_size = 1;
90b68e2ebaSNikolas Klauser #  endif
91b68e2ebaSNikolas Klauser 
92b68e2ebaSNikolas Klauser template <class _ArithmeticT, size_t _Np>
93f6958523SNikolas Klauser using __simd_vector __attribute__((__ext_vector_type__(_Np))) _LIBCPP_NODEBUG = _ArithmeticT;
94b68e2ebaSNikolas Klauser 
95b68e2ebaSNikolas Klauser template <class _VecT>
96b68e2ebaSNikolas Klauser inline constexpr size_t __simd_vector_size_v = []<bool _False = false>() -> size_t {
97b68e2ebaSNikolas Klauser   static_assert(_False, "Not a vector!");
98b68e2ebaSNikolas Klauser }();
99b68e2ebaSNikolas Klauser 
100b68e2ebaSNikolas Klauser template <class _Tp, size_t _Np>
101b68e2ebaSNikolas Klauser inline constexpr size_t __simd_vector_size_v<__simd_vector<_Tp, _Np>> = _Np;
102b68e2ebaSNikolas Klauser 
103b68e2ebaSNikolas Klauser template <class _Tp, size_t _Np>
104b68e2ebaSNikolas Klauser _LIBCPP_HIDE_FROM_ABI _Tp __simd_vector_underlying_type_impl(__simd_vector<_Tp, _Np>) {
105b68e2ebaSNikolas Klauser   return _Tp{};
106b68e2ebaSNikolas Klauser }
107b68e2ebaSNikolas Klauser 
108b68e2ebaSNikolas Klauser template <class _VecT>
109f6958523SNikolas Klauser using __simd_vector_underlying_type_t _LIBCPP_NODEBUG = decltype(std::__simd_vector_underlying_type_impl(_VecT{}));
110b68e2ebaSNikolas Klauser 
111b68e2ebaSNikolas Klauser // This isn't inlined without always_inline when loading chars.
11205cc2d5fSNikolas Klauser template <class _VecT, class _Iter>
11317e0686aSNikolas Klauser [[__nodiscard__]] _LIBCPP_ALWAYS_INLINE _LIBCPP_HIDE_FROM_ABI _VecT __load_vector(_Iter __iter) noexcept {
114b68e2ebaSNikolas Klauser   return [=]<size_t... _Indices>(index_sequence<_Indices...>) _LIBCPP_ALWAYS_INLINE noexcept {
11505cc2d5fSNikolas Klauser     return _VecT{__iter[_Indices]...};
116b68e2ebaSNikolas Klauser   }(make_index_sequence<__simd_vector_size_v<_VecT>>{});
117b68e2ebaSNikolas Klauser }
118b68e2ebaSNikolas Klauser 
119*570f0309SVitaly Buka template <class _Tp, size_t _Np>
120*570f0309SVitaly Buka [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI bool __all_of(__simd_vector<_Tp, _Np> __vec) noexcept {
121*570f0309SVitaly Buka   return __builtin_reduce_and(__builtin_convertvector(__vec, __simd_vector<bool, _Np>));
122b68e2ebaSNikolas Klauser }
123b68e2ebaSNikolas Klauser 
124b68e2ebaSNikolas Klauser template <class _Tp, size_t _Np>
125*570f0309SVitaly Buka [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI size_t __find_first_set(__simd_vector<_Tp, _Np> __vec) noexcept {
126*570f0309SVitaly Buka   using __mask_vec = __simd_vector<bool, _Np>;
127b68e2ebaSNikolas Klauser 
128*570f0309SVitaly Buka   // This has MSan disabled du to https://github.com/llvm/llvm-project/issues/85876
129*570f0309SVitaly Buka   auto __impl = [&]<class _MaskT>(_MaskT) _LIBCPP_NO_SANITIZE("memory") noexcept {
130*570f0309SVitaly Buka #  if defined(_LIBCPP_BIG_ENDIAN)
131*570f0309SVitaly Buka     return std::min<size_t>(
132*570f0309SVitaly Buka         _Np, std::__countl_zero(__builtin_bit_cast(_MaskT, __builtin_convertvector(__vec, __mask_vec))));
133*570f0309SVitaly Buka #  else
134*570f0309SVitaly Buka     return std::min<size_t>(
135*570f0309SVitaly Buka         _Np, std::__countr_zero(__builtin_bit_cast(_MaskT, __builtin_convertvector(__vec, __mask_vec))));
136*570f0309SVitaly Buka #  endif
137*570f0309SVitaly Buka   };
138ed572f20SVitaly Buka 
139*570f0309SVitaly Buka   if constexpr (sizeof(__mask_vec) == sizeof(uint8_t)) {
140*570f0309SVitaly Buka     return __impl(uint8_t{});
141*570f0309SVitaly Buka   } else if constexpr (sizeof(__mask_vec) == sizeof(uint16_t)) {
142*570f0309SVitaly Buka     return __impl(uint16_t{});
143*570f0309SVitaly Buka   } else if constexpr (sizeof(__mask_vec) == sizeof(uint32_t)) {
144*570f0309SVitaly Buka     return __impl(uint32_t{});
145*570f0309SVitaly Buka   } else if constexpr (sizeof(__mask_vec) == sizeof(uint64_t)) {
146*570f0309SVitaly Buka     return __impl(uint64_t{});
147ed572f20SVitaly Buka   } else {
148*570f0309SVitaly Buka     static_assert(sizeof(__mask_vec) == 0, "unexpected required size for mask integer type");
149b68e2ebaSNikolas Klauser     return 0;
150b68e2ebaSNikolas Klauser   }
151b68e2ebaSNikolas Klauser }
152b68e2ebaSNikolas Klauser 
153*570f0309SVitaly Buka template <class _Tp, size_t _Np>
154*570f0309SVitaly Buka [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI size_t __find_first_not_set(__simd_vector<_Tp, _Np> __vec) noexcept {
155b68e2ebaSNikolas Klauser   return std::__find_first_set(~__vec);
156b68e2ebaSNikolas Klauser }
157b68e2ebaSNikolas Klauser 
158b68e2ebaSNikolas Klauser _LIBCPP_END_NAMESPACE_STD
159b68e2ebaSNikolas Klauser 
160b68e2ebaSNikolas Klauser #endif // _LIBCPP_HAS_ALGORITHM_VECTOR_UTILS
161b68e2ebaSNikolas Klauser 
162985c1a44SNikolas Klauser _LIBCPP_POP_MACROS
163985c1a44SNikolas Klauser 
164b68e2ebaSNikolas Klauser #endif // _LIBCPP___ALGORITHM_SIMD_UTILS_H
165