10e30dd44SZhangyin // -*- C++ -*- 20e30dd44SZhangyin //===----------------------------------------------------------------------===// 30e30dd44SZhangyin // 40e30dd44SZhangyin // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 50e30dd44SZhangyin // See https://llvm.org/LICENSE.txt for license information. 60e30dd44SZhangyin // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 70e30dd44SZhangyin // 80e30dd44SZhangyin //===----------------------------------------------------------------------===// 90e30dd44SZhangyin 100e30dd44SZhangyin #ifndef _LIBCPP_EXPERIMENTAL___SIMD_VEC_EXT_H 110e30dd44SZhangyin #define _LIBCPP_EXPERIMENTAL___SIMD_VEC_EXT_H 120e30dd44SZhangyin 1337dca605SLouis Dionne #include <__assert> 14e7a45c6dSZhangyin #include <__bit/bit_ceil.h> 15118f120eSLouis Dionne #include <__config> 16e99c4906SNikolas Klauser #include <__cstddef/size_t.h> 17d6832a61SLouis Dionne #include <__type_traits/integral_constant.h> 18593521b0SZhangYin #include <__utility/forward.h> 191314e877Sphilnik777 #include <__utility/integer_sequence.h> 2050ae0da0SNikolas Klauser #include <experimental/__simd/declaration.h> 211314e877Sphilnik777 #include <experimental/__simd/traits.h> 22e7a45c6dSZhangyin #include <experimental/__simd/utility.h> 230e30dd44SZhangyin 240e30dd44SZhangyin #if _LIBCPP_STD_VER >= 17 && defined(_LIBCPP_ENABLE_EXPERIMENTAL) 250e30dd44SZhangyin 260e30dd44SZhangyin _LIBCPP_BEGIN_NAMESPACE_EXPERIMENTAL 270e30dd44SZhangyin inline namespace parallelism_v2 { 280e30dd44SZhangyin namespace simd_abi { 290e30dd44SZhangyin template <int _Np> 300e30dd44SZhangyin struct __vec_ext { 310e30dd44SZhangyin static constexpr size_t __simd_size = _Np; 320e30dd44SZhangyin }; 330e30dd44SZhangyin } // namespace simd_abi 34e7a45c6dSZhangyin 351314e877Sphilnik777 template <int _Np> 361314e877Sphilnik777 inline constexpr bool is_abi_tag_v<simd_abi::__vec_ext<_Np>> = _Np > 0 && _Np <= 32; 371314e877Sphilnik777 38e7a45c6dSZhangyin template <class _Tp, int _Np> 39e7a45c6dSZhangyin struct __simd_storage<_Tp, simd_abi::__vec_ext<_Np>> { 40e7a45c6dSZhangyin _Tp __data __attribute__((__vector_size__(std::__bit_ceil((sizeof(_Tp) * _Np))))); 41e7a45c6dSZhangyin 42cf31d0ecSZhangYin _LIBCPP_HIDE_FROM_ABI _Tp __get(size_t __idx) const noexcept { 43c34aca8dSMital Ashok _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS(__idx < _Np, "Index is out of bounds"); 44e7a45c6dSZhangyin return __data[__idx]; 45e7a45c6dSZhangyin } 46cf31d0ecSZhangYin _LIBCPP_HIDE_FROM_ABI void __set(size_t __idx, _Tp __v) noexcept { 47c34aca8dSMital Ashok _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS(__idx < _Np, "Index is out of bounds"); 48e7a45c6dSZhangyin __data[__idx] = __v; 49e7a45c6dSZhangyin } 50e7a45c6dSZhangyin }; 51e7a45c6dSZhangyin 52e7a45c6dSZhangyin template <class _Tp, int _Np> 53e7a45c6dSZhangyin struct __mask_storage<_Tp, simd_abi::__vec_ext<_Np>> 54e7a45c6dSZhangyin : __simd_storage<decltype(experimental::__choose_mask_type<_Tp>()), simd_abi::__vec_ext<_Np>> {}; 55e7a45c6dSZhangyin 56e7a45c6dSZhangyin template <class _Tp, int _Np> 57e7a45c6dSZhangyin struct __simd_operations<_Tp, simd_abi::__vec_ext<_Np>> { 58*f6958523SNikolas Klauser using _SimdStorage _LIBCPP_NODEBUG = __simd_storage<_Tp, simd_abi::__vec_ext<_Np>>; 59*f6958523SNikolas Klauser using _MaskStorage _LIBCPP_NODEBUG = __mask_storage<_Tp, simd_abi::__vec_ext<_Np>>; 60ed29f275SZhangyin 61cf31d0ecSZhangYin static _LIBCPP_HIDE_FROM_ABI _SimdStorage __broadcast(_Tp __v) noexcept { 62ed29f275SZhangyin _SimdStorage __result; 63ed29f275SZhangyin for (int __i = 0; __i < _Np; ++__i) { 64ed29f275SZhangyin __result.__set(__i, __v); 65ed29f275SZhangyin } 66ed29f275SZhangyin return __result; 67ed29f275SZhangyin } 68593521b0SZhangYin 69593521b0SZhangYin template <class _Generator, size_t... _Is> 70593521b0SZhangYin static _LIBCPP_HIDE_FROM_ABI _SimdStorage __generate_init(_Generator&& __g, std::index_sequence<_Is...>) { 71593521b0SZhangYin return _SimdStorage{{__g(std::integral_constant<size_t, _Is>())...}}; 72593521b0SZhangYin } 73593521b0SZhangYin 74593521b0SZhangYin template <class _Generator> 75593521b0SZhangYin static _LIBCPP_HIDE_FROM_ABI _SimdStorage __generate(_Generator&& __g) noexcept { 76593521b0SZhangYin return __generate_init(std::forward<_Generator>(__g), std::make_index_sequence<_Np>()); 77593521b0SZhangYin } 786bb5c989SZhangYin 796bb5c989SZhangYin template <class _Up> 806bb5c989SZhangYin static _LIBCPP_HIDE_FROM_ABI void __load(_SimdStorage& __s, const _Up* __mem) noexcept { 816bb5c989SZhangYin for (size_t __i = 0; __i < _Np; __i++) 826bb5c989SZhangYin __s.__data[__i] = static_cast<_Tp>(__mem[__i]); 836bb5c989SZhangYin } 84058e4454SZhangYin 85058e4454SZhangYin template <class _Up> 86058e4454SZhangYin static _LIBCPP_HIDE_FROM_ABI void __store(_SimdStorage __s, _Up* __mem) noexcept { 87058e4454SZhangYin for (size_t __i = 0; __i < _Np; __i++) 88058e4454SZhangYin __mem[__i] = static_cast<_Up>(__s.__data[__i]); 89058e4454SZhangYin } 902c3d7d53SZhangYin 912c3d7d53SZhangYin static _LIBCPP_HIDE_FROM_ABI void __increment(_SimdStorage& __s) noexcept { __s.__data = __s.__data + 1; } 922c3d7d53SZhangYin 932c3d7d53SZhangYin static _LIBCPP_HIDE_FROM_ABI void __decrement(_SimdStorage& __s) noexcept { __s.__data = __s.__data - 1; } 942c3d7d53SZhangYin 952c3d7d53SZhangYin static _LIBCPP_HIDE_FROM_ABI _MaskStorage __negate(_SimdStorage __s) noexcept { return {!__s.__data}; } 962c3d7d53SZhangYin 972c3d7d53SZhangYin static _LIBCPP_HIDE_FROM_ABI _SimdStorage __bitwise_not(_SimdStorage __s) noexcept { return {~__s.__data}; } 982c3d7d53SZhangYin 992c3d7d53SZhangYin static _LIBCPP_HIDE_FROM_ABI _SimdStorage __unary_minus(_SimdStorage __s) noexcept { return {-__s.__data}; } 100e7a45c6dSZhangyin }; 101e7a45c6dSZhangyin 102e7a45c6dSZhangyin template <class _Tp, int _Np> 103e7a45c6dSZhangyin struct __mask_operations<_Tp, simd_abi::__vec_ext<_Np>> { 104*f6958523SNikolas Klauser using _MaskStorage _LIBCPP_NODEBUG = __mask_storage<_Tp, simd_abi::__vec_ext<_Np>>; 105ed29f275SZhangyin 106cf31d0ecSZhangYin static _LIBCPP_HIDE_FROM_ABI _MaskStorage __broadcast(bool __v) noexcept { 107ed29f275SZhangyin _MaskStorage __result; 108ed29f275SZhangyin auto __all_bits_v = experimental::__set_all_bits<_Tp>(__v); 109ed29f275SZhangyin for (int __i = 0; __i < _Np; ++__i) { 110ed29f275SZhangyin __result.__set(__i, __all_bits_v); 111ed29f275SZhangyin } 112ed29f275SZhangyin return __result; 113ed29f275SZhangyin } 1146bb5c989SZhangYin 1156bb5c989SZhangYin static _LIBCPP_HIDE_FROM_ABI void __load(_MaskStorage& __s, const bool* __mem) noexcept { 1166bb5c989SZhangYin for (size_t __i = 0; __i < _Np; __i++) 1176bb5c989SZhangYin __s.__data[__i] = experimental::__set_all_bits<_Tp>(__mem[__i]); 1186bb5c989SZhangYin } 119058e4454SZhangYin 120058e4454SZhangYin static _LIBCPP_HIDE_FROM_ABI void __store(_MaskStorage __s, bool* __mem) noexcept { 121058e4454SZhangYin for (size_t __i = 0; __i < _Np; __i++) 122058e4454SZhangYin __mem[__i] = static_cast<bool>(__s.__data[__i]); 123058e4454SZhangYin } 124e7a45c6dSZhangyin }; 125e7a45c6dSZhangyin 1260e30dd44SZhangyin } // namespace parallelism_v2 1270e30dd44SZhangyin _LIBCPP_END_NAMESPACE_EXPERIMENTAL 1280e30dd44SZhangyin 1290e30dd44SZhangyin #endif // _LIBCPP_STD_VER >= 17 && defined(_LIBCPP_ENABLE_EXPERIMENTAL) 1300e30dd44SZhangyin #endif // _LIBCPP_EXPERIMENTAL___SIMD_VEC_EXT_H 131