1b1e83836Smrg // Generic simd conversions -*- C++ -*- 2b1e83836Smrg 3b1e83836Smrg // Copyright (C) 2020-2022 Free Software Foundation, Inc. 4b1e83836Smrg // 5b1e83836Smrg // This file is part of the GNU ISO C++ Library. This library is free 6b1e83836Smrg // software; you can redistribute it and/or modify it under the 7b1e83836Smrg // terms of the GNU General Public License as published by the 8b1e83836Smrg // Free Software Foundation; either version 3, or (at your option) 9b1e83836Smrg // any later version. 10b1e83836Smrg 11b1e83836Smrg // This library is distributed in the hope that it will be useful, 12b1e83836Smrg // but WITHOUT ANY WARRANTY; without even the implied warranty of 13b1e83836Smrg // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14b1e83836Smrg // GNU General Public License for more details. 15b1e83836Smrg 16b1e83836Smrg // Under Section 7 of GPL version 3, you are granted additional 17b1e83836Smrg // permissions described in the GCC Runtime Library Exception, version 18b1e83836Smrg // 3.1, as published by the Free Software Foundation. 19b1e83836Smrg 20b1e83836Smrg // You should have received a copy of the GNU General Public License and 21b1e83836Smrg // a copy of the GCC Runtime Library Exception along with this program; 22b1e83836Smrg // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 23b1e83836Smrg // <http://www.gnu.org/licenses/>. 24b1e83836Smrg 25b1e83836Smrg #ifndef _GLIBCXX_EXPERIMENTAL_SIMD_CONVERTER_H_ 26b1e83836Smrg #define _GLIBCXX_EXPERIMENTAL_SIMD_CONVERTER_H_ 27b1e83836Smrg 28b1e83836Smrg #if __cplusplus >= 201703L 29b1e83836Smrg 30b1e83836Smrg _GLIBCXX_SIMD_BEGIN_NAMESPACE 31b1e83836Smrg // _SimdConverter scalar -> scalar {{{ 32b1e83836Smrg template <typename _From, typename _To> 33b1e83836Smrg struct _SimdConverter<_From, simd_abi::scalar, _To, simd_abi::scalar, 34b1e83836Smrg enable_if_t<!is_same_v<_From, _To>>> 35b1e83836Smrg { 36b1e83836Smrg _GLIBCXX_SIMD_INTRINSIC constexpr _To operator()(_From __a) const noexcept 37b1e83836Smrg { return static_cast<_To>(__a); } 38b1e83836Smrg }; 39b1e83836Smrg 40b1e83836Smrg // }}} 41b1e83836Smrg // _SimdConverter scalar -> "native" {{{ 42b1e83836Smrg template <typename _From, typename _To, typename _Abi> 43b1e83836Smrg struct _SimdConverter<_From, simd_abi::scalar, _To, _Abi, 44b1e83836Smrg enable_if_t<!is_same_v<_Abi, simd_abi::scalar>>> 45b1e83836Smrg { 46b1e83836Smrg using _Ret = typename _Abi::template __traits<_To>::_SimdMember; 47b1e83836Smrg 48b1e83836Smrg template <typename... _More> 49b1e83836Smrg _GLIBCXX_SIMD_INTRINSIC constexpr _Ret 50b1e83836Smrg operator()(_From __a, _More... __more) const noexcept 51b1e83836Smrg { 52b1e83836Smrg static_assert(sizeof...(_More) + 1 == _Abi::template _S_size<_To>); 53b1e83836Smrg static_assert(conjunction_v<is_same<_From, _More>...>); 54b1e83836Smrg return __make_vector<_To>(__a, __more...); 55b1e83836Smrg } 56b1e83836Smrg }; 57b1e83836Smrg 58b1e83836Smrg // }}} 59b1e83836Smrg // _SimdConverter "native 1" -> "native 2" {{{ 60b1e83836Smrg template <typename _From, typename _To, typename _AFrom, typename _ATo> 61b1e83836Smrg struct _SimdConverter< 62b1e83836Smrg _From, _AFrom, _To, _ATo, 63b1e83836Smrg enable_if_t<!disjunction_v< 64b1e83836Smrg __is_fixed_size_abi<_AFrom>, __is_fixed_size_abi<_ATo>, 65b1e83836Smrg is_same<_AFrom, simd_abi::scalar>, is_same<_ATo, simd_abi::scalar>, 66b1e83836Smrg conjunction<is_same<_From, _To>, is_same<_AFrom, _ATo>>>>> 67b1e83836Smrg { 68b1e83836Smrg using _Arg = typename _AFrom::template __traits<_From>::_SimdMember; 69b1e83836Smrg using _Ret = typename _ATo::template __traits<_To>::_SimdMember; 70b1e83836Smrg using _V = __vector_type_t<_To, simd_size_v<_To, _ATo>>; 71b1e83836Smrg 72b1e83836Smrg template <typename... _More> 73b1e83836Smrg _GLIBCXX_SIMD_INTRINSIC constexpr _Ret 74b1e83836Smrg operator()(_Arg __a, _More... __more) const noexcept 75b1e83836Smrg { return __vector_convert<_V>(__a, __more...); } 76b1e83836Smrg }; 77b1e83836Smrg 78b1e83836Smrg // }}} 79b1e83836Smrg // _SimdConverter scalar -> fixed_size<1> {{{1 80b1e83836Smrg template <typename _From, typename _To> 81b1e83836Smrg struct _SimdConverter<_From, simd_abi::scalar, _To, simd_abi::fixed_size<1>, 82b1e83836Smrg void> 83b1e83836Smrg { 84b1e83836Smrg _GLIBCXX_SIMD_INTRINSIC constexpr _SimdTuple<_To, simd_abi::scalar> 85b1e83836Smrg operator()(_From __x) const noexcept 86b1e83836Smrg { return {static_cast<_To>(__x)}; } 87b1e83836Smrg }; 88b1e83836Smrg 89b1e83836Smrg // _SimdConverter fixed_size<1> -> scalar {{{1 90b1e83836Smrg template <typename _From, typename _To> 91b1e83836Smrg struct _SimdConverter<_From, simd_abi::fixed_size<1>, _To, simd_abi::scalar, 92b1e83836Smrg void> 93b1e83836Smrg { 94b1e83836Smrg _GLIBCXX_SIMD_INTRINSIC constexpr _To 95b1e83836Smrg operator()(_SimdTuple<_From, simd_abi::scalar> __x) const noexcept 96b1e83836Smrg { return {static_cast<_To>(__x.first)}; } 97b1e83836Smrg }; 98b1e83836Smrg 99b1e83836Smrg // _SimdConverter fixed_size<_Np> -> fixed_size<_Np> {{{1 100b1e83836Smrg template <typename _From, typename _To, int _Np> 101b1e83836Smrg struct _SimdConverter<_From, simd_abi::fixed_size<_Np>, _To, 102b1e83836Smrg simd_abi::fixed_size<_Np>, 103b1e83836Smrg enable_if_t<!is_same_v<_From, _To>>> 104b1e83836Smrg { 105b1e83836Smrg using _Ret = __fixed_size_storage_t<_To, _Np>; 106b1e83836Smrg using _Arg = __fixed_size_storage_t<_From, _Np>; 107b1e83836Smrg 108b1e83836Smrg _GLIBCXX_SIMD_INTRINSIC constexpr _Ret 109b1e83836Smrg operator()(const _Arg& __x) const noexcept 110b1e83836Smrg { 111b1e83836Smrg if constexpr (is_same_v<_From, _To>) 112b1e83836Smrg return __x; 113b1e83836Smrg 114b1e83836Smrg // special case (optimize) int signedness casts 115b1e83836Smrg else if constexpr (sizeof(_From) == sizeof(_To) 116b1e83836Smrg && is_integral_v<_From> && is_integral_v<_To>) 117b1e83836Smrg return __bit_cast<_Ret>(__x); 118b1e83836Smrg 119b1e83836Smrg // special case if all ABI tags in _Ret are scalar 120b1e83836Smrg else if constexpr (__is_scalar_abi<typename _Ret::_FirstAbi>()) 121b1e83836Smrg { 122b1e83836Smrg return __call_with_subscripts( 123b1e83836Smrg __x, make_index_sequence<_Np>(), 124*0a307195Smrg [](auto... __values) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA -> _Ret { 125b1e83836Smrg return __make_simd_tuple<_To, decltype((void) __values, 126b1e83836Smrg simd_abi::scalar())...>( 127b1e83836Smrg static_cast<_To>(__values)...); 128b1e83836Smrg }); 129b1e83836Smrg } 130b1e83836Smrg 131b1e83836Smrg // from one vector to one vector 132b1e83836Smrg else if constexpr (_Arg::_S_first_size == _Ret::_S_first_size) 133b1e83836Smrg { 134b1e83836Smrg _SimdConverter<_From, typename _Arg::_FirstAbi, _To, 135b1e83836Smrg typename _Ret::_FirstAbi> 136b1e83836Smrg __native_cvt; 137b1e83836Smrg if constexpr (_Arg::_S_tuple_size == 1) 138b1e83836Smrg return {__native_cvt(__x.first)}; 139b1e83836Smrg else 140b1e83836Smrg { 141b1e83836Smrg constexpr size_t _NRemain = _Np - _Arg::_S_first_size; 142b1e83836Smrg _SimdConverter<_From, simd_abi::fixed_size<_NRemain>, _To, 143b1e83836Smrg simd_abi::fixed_size<_NRemain>> 144b1e83836Smrg __remainder_cvt; 145b1e83836Smrg return {__native_cvt(__x.first), __remainder_cvt(__x.second)}; 146b1e83836Smrg } 147b1e83836Smrg } 148b1e83836Smrg 149b1e83836Smrg // from one vector to multiple vectors 150b1e83836Smrg else if constexpr (_Arg::_S_first_size > _Ret::_S_first_size) 151b1e83836Smrg { 152b1e83836Smrg const auto __multiple_return_chunks 153b1e83836Smrg = __convert_all<__vector_type_t<_To, _Ret::_S_first_size>>( 154b1e83836Smrg __x.first); 155b1e83836Smrg constexpr auto __converted = __multiple_return_chunks.size() 156b1e83836Smrg * _Ret::_FirstAbi::template _S_size<_To>; 157b1e83836Smrg constexpr auto __remaining = _Np - __converted; 158b1e83836Smrg if constexpr (_Arg::_S_tuple_size == 1 && __remaining == 0) 159b1e83836Smrg return __to_simd_tuple<_To, _Np>(__multiple_return_chunks); 160b1e83836Smrg else if constexpr (_Arg::_S_tuple_size == 1) 161b1e83836Smrg { // e.g. <int, 3> -> <double, 2, 1> or <short, 7> -> <double, 4, 2, 162b1e83836Smrg // 1> 163b1e83836Smrg using _RetRem 164b1e83836Smrg = __remove_cvref_t<decltype(__simd_tuple_pop_front<__converted>( 165b1e83836Smrg _Ret()))>; 166b1e83836Smrg const auto __return_chunks2 167b1e83836Smrg = __convert_all<__vector_type_t<_To, _RetRem::_S_first_size>, 0, 168b1e83836Smrg __converted>(__x.first); 169b1e83836Smrg constexpr auto __converted2 170b1e83836Smrg = __converted 171b1e83836Smrg + __return_chunks2.size() * _RetRem::_S_first_size; 172b1e83836Smrg if constexpr (__converted2 == _Np) 173b1e83836Smrg return __to_simd_tuple<_To, _Np>(__multiple_return_chunks, 174b1e83836Smrg __return_chunks2); 175b1e83836Smrg else 176b1e83836Smrg { 177b1e83836Smrg using _RetRem2 = __remove_cvref_t< 178b1e83836Smrg decltype(__simd_tuple_pop_front<__return_chunks2.size() 179b1e83836Smrg * _RetRem::_S_first_size>( 180b1e83836Smrg _RetRem()))>; 181b1e83836Smrg const auto __return_chunks3 = __convert_all< 182b1e83836Smrg __vector_type_t<_To, _RetRem2::_S_first_size>, 0, 183b1e83836Smrg __converted2>(__x.first); 184b1e83836Smrg constexpr auto __converted3 185b1e83836Smrg = __converted2 186b1e83836Smrg + __return_chunks3.size() * _RetRem2::_S_first_size; 187b1e83836Smrg if constexpr (__converted3 == _Np) 188b1e83836Smrg return __to_simd_tuple<_To, _Np>(__multiple_return_chunks, 189b1e83836Smrg __return_chunks2, 190b1e83836Smrg __return_chunks3); 191b1e83836Smrg else 192b1e83836Smrg { 193b1e83836Smrg using _RetRem3 194b1e83836Smrg = __remove_cvref_t<decltype(__simd_tuple_pop_front< 195b1e83836Smrg __return_chunks3.size() 196b1e83836Smrg * _RetRem2::_S_first_size>( 197b1e83836Smrg _RetRem2()))>; 198b1e83836Smrg const auto __return_chunks4 = __convert_all< 199b1e83836Smrg __vector_type_t<_To, _RetRem3::_S_first_size>, 0, 200b1e83836Smrg __converted3>(__x.first); 201b1e83836Smrg constexpr auto __converted4 202b1e83836Smrg = __converted3 203b1e83836Smrg + __return_chunks4.size() * _RetRem3::_S_first_size; 204b1e83836Smrg if constexpr (__converted4 == _Np) 205b1e83836Smrg return __to_simd_tuple<_To, _Np>( 206b1e83836Smrg __multiple_return_chunks, __return_chunks2, 207b1e83836Smrg __return_chunks3, __return_chunks4); 208b1e83836Smrg else 209b1e83836Smrg __assert_unreachable<_To>(); 210b1e83836Smrg } 211b1e83836Smrg } 212b1e83836Smrg } 213b1e83836Smrg else 214b1e83836Smrg { 215b1e83836Smrg constexpr size_t _NRemain = _Np - _Arg::_S_first_size; 216b1e83836Smrg _SimdConverter<_From, simd_abi::fixed_size<_NRemain>, _To, 217b1e83836Smrg simd_abi::fixed_size<_NRemain>> 218b1e83836Smrg __remainder_cvt; 219b1e83836Smrg return __simd_tuple_concat( 220b1e83836Smrg __to_simd_tuple<_To, _Arg::_S_first_size>( 221b1e83836Smrg __multiple_return_chunks), 222b1e83836Smrg __remainder_cvt(__x.second)); 223b1e83836Smrg } 224b1e83836Smrg } 225b1e83836Smrg 226b1e83836Smrg // from multiple vectors to one vector 227b1e83836Smrg // _Arg::_S_first_size < _Ret::_S_first_size 228b1e83836Smrg // a) heterogeneous input at the end of the tuple (possible with partial 229b1e83836Smrg // native registers in _Ret) 230b1e83836Smrg else if constexpr (_Ret::_S_tuple_size == 1 231b1e83836Smrg && _Np % _Arg::_S_first_size != 0) 232b1e83836Smrg { 233b1e83836Smrg static_assert(_Ret::_FirstAbi::template _S_is_partial<_To>); 234b1e83836Smrg return _Ret{__generate_from_n_evaluations< 235b1e83836Smrg _Np, typename _VectorTraits<typename _Ret::_FirstType>::type>( 236*0a307195Smrg [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { 237*0a307195Smrg return static_cast<_To>(__x[__i]); 238*0a307195Smrg })}; 239b1e83836Smrg } 240b1e83836Smrg else 241b1e83836Smrg { 242b1e83836Smrg static_assert(_Arg::_S_tuple_size > 1); 243b1e83836Smrg constexpr auto __n 244b1e83836Smrg = __div_roundup(_Ret::_S_first_size, _Arg::_S_first_size); 245b1e83836Smrg return __call_with_n_evaluations<__n>( 246*0a307195Smrg [&__x](auto... __uncvted) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { 247b1e83836Smrg // assuming _Arg Abi tags for all __i are _Arg::_FirstAbi 248b1e83836Smrg _SimdConverter<_From, typename _Arg::_FirstAbi, _To, 249b1e83836Smrg typename _Ret::_FirstAbi> 250b1e83836Smrg __native_cvt; 251b1e83836Smrg if constexpr (_Ret::_S_tuple_size == 1) 252b1e83836Smrg return _Ret{__native_cvt(__uncvted...)}; 253b1e83836Smrg else 254b1e83836Smrg return _Ret{ 255b1e83836Smrg __native_cvt(__uncvted...), 256b1e83836Smrg _SimdConverter< 257b1e83836Smrg _From, simd_abi::fixed_size<_Np - _Ret::_S_first_size>, _To, 258b1e83836Smrg simd_abi::fixed_size<_Np - _Ret::_S_first_size>>()( 259b1e83836Smrg __simd_tuple_pop_front<_Ret::_S_first_size>(__x))}; 260*0a307195Smrg }, [&__x](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { 261*0a307195Smrg return __get_tuple_at<__i>(__x); 262*0a307195Smrg }); 263b1e83836Smrg } 264b1e83836Smrg } 265b1e83836Smrg }; 266b1e83836Smrg 267b1e83836Smrg // _SimdConverter "native" -> fixed_size<_Np> {{{1 268b1e83836Smrg // i.e. 1 register to ? registers 269b1e83836Smrg template <typename _From, typename _Ap, typename _To, int _Np> 270b1e83836Smrg struct _SimdConverter<_From, _Ap, _To, simd_abi::fixed_size<_Np>, 271b1e83836Smrg enable_if_t<!__is_fixed_size_abi_v<_Ap>>> 272b1e83836Smrg { 273b1e83836Smrg static_assert( 274b1e83836Smrg _Np == simd_size_v<_From, _Ap>, 275b1e83836Smrg "_SimdConverter to fixed_size only works for equal element counts"); 276b1e83836Smrg 277b1e83836Smrg using _Ret = __fixed_size_storage_t<_To, _Np>; 278b1e83836Smrg 279b1e83836Smrg _GLIBCXX_SIMD_INTRINSIC constexpr _Ret 280b1e83836Smrg operator()(typename _SimdTraits<_From, _Ap>::_SimdMember __x) const noexcept 281b1e83836Smrg { 282b1e83836Smrg if constexpr (_Ret::_S_tuple_size == 1) 283b1e83836Smrg return {__vector_convert<typename _Ret::_FirstType::_BuiltinType>(__x)}; 284b1e83836Smrg else 285b1e83836Smrg { 286b1e83836Smrg using _FixedNp = simd_abi::fixed_size<_Np>; 287b1e83836Smrg _SimdConverter<_From, _FixedNp, _To, _FixedNp> __fixed_cvt; 288b1e83836Smrg using _FromFixedStorage = __fixed_size_storage_t<_From, _Np>; 289b1e83836Smrg if constexpr (_FromFixedStorage::_S_tuple_size == 1) 290b1e83836Smrg return __fixed_cvt(_FromFixedStorage{__x}); 291b1e83836Smrg else if constexpr (_FromFixedStorage::_S_tuple_size == 2) 292b1e83836Smrg { 293b1e83836Smrg _FromFixedStorage __tmp; 294b1e83836Smrg static_assert(sizeof(__tmp) <= sizeof(__x)); 295b1e83836Smrg __builtin_memcpy(&__tmp.first, &__x, sizeof(__tmp.first)); 296b1e83836Smrg __builtin_memcpy(&__tmp.second.first, 297b1e83836Smrg reinterpret_cast<const char*>(&__x) 298b1e83836Smrg + sizeof(__tmp.first), 299b1e83836Smrg sizeof(__tmp.second.first)); 300b1e83836Smrg return __fixed_cvt(__tmp); 301b1e83836Smrg } 302b1e83836Smrg else 303b1e83836Smrg __assert_unreachable<_From>(); 304b1e83836Smrg } 305b1e83836Smrg } 306b1e83836Smrg }; 307b1e83836Smrg 308b1e83836Smrg // _SimdConverter fixed_size<_Np> -> "native" {{{1 309b1e83836Smrg // i.e. ? register to 1 registers 310b1e83836Smrg template <typename _From, int _Np, typename _To, typename _Ap> 311b1e83836Smrg struct _SimdConverter<_From, simd_abi::fixed_size<_Np>, _To, _Ap, 312b1e83836Smrg enable_if_t<!__is_fixed_size_abi_v<_Ap>>> 313b1e83836Smrg { 314b1e83836Smrg static_assert( 315b1e83836Smrg _Np == simd_size_v<_To, _Ap>, 316b1e83836Smrg "_SimdConverter to fixed_size only works for equal element counts"); 317b1e83836Smrg 318b1e83836Smrg using _Arg = __fixed_size_storage_t<_From, _Np>; 319b1e83836Smrg 320b1e83836Smrg _GLIBCXX_SIMD_INTRINSIC constexpr 321b1e83836Smrg typename _SimdTraits<_To, _Ap>::_SimdMember 322b1e83836Smrg operator()(const _Arg& __x) const noexcept 323b1e83836Smrg { 324b1e83836Smrg if constexpr (_Arg::_S_tuple_size == 1) 325b1e83836Smrg return __vector_convert<__vector_type_t<_To, _Np>>(__x.first); 326b1e83836Smrg else if constexpr (_Arg::_S_is_homogeneous) 327b1e83836Smrg return __call_with_n_evaluations<_Arg::_S_tuple_size>( 328*0a307195Smrg [](auto... __members) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { 329b1e83836Smrg if constexpr ((is_convertible_v<decltype(__members), _To> && ...)) 330b1e83836Smrg return __vector_type_t<_To, _Np>{static_cast<_To>(__members)...}; 331b1e83836Smrg else 332b1e83836Smrg return __vector_convert<__vector_type_t<_To, _Np>>(__members...); 333*0a307195Smrg }, [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { 334*0a307195Smrg return __get_tuple_at<__i>(__x); 335*0a307195Smrg }); 336b1e83836Smrg else if constexpr (__fixed_size_storage_t<_To, _Np>::_S_tuple_size == 1) 337b1e83836Smrg { 338b1e83836Smrg _SimdConverter<_From, simd_abi::fixed_size<_Np>, _To, 339b1e83836Smrg simd_abi::fixed_size<_Np>> 340b1e83836Smrg __fixed_cvt; 341b1e83836Smrg return __fixed_cvt(__x).first; 342b1e83836Smrg } 343b1e83836Smrg else 344b1e83836Smrg { 345b1e83836Smrg const _SimdWrapper<_From, _Np> __xv 346b1e83836Smrg = __generate_from_n_evaluations<_Np, __vector_type_t<_From, _Np>>( 347*0a307195Smrg [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { return __x[__i]; }); 348b1e83836Smrg return __vector_convert<__vector_type_t<_To, _Np>>(__xv); 349b1e83836Smrg } 350b1e83836Smrg } 351b1e83836Smrg }; 352b1e83836Smrg 353b1e83836Smrg // }}}1 354b1e83836Smrg _GLIBCXX_SIMD_END_NAMESPACE 355b1e83836Smrg #endif // __cplusplus >= 201703L 356b1e83836Smrg #endif // _GLIBCXX_EXPERIMENTAL_SIMD_CONVERTER_H_ 357b1e83836Smrg 358b1e83836Smrg // vim: foldmethod=marker sw=2 noet ts=8 sts=2 tw=80 359