1 // Simd Abi specific implementations -*- C++ -*- 2 3 // Copyright (C) 2020-2022 Free Software Foundation, Inc. 4 // 5 // This file is part of the GNU ISO C++ Library. This library is free 6 // software; you can redistribute it and/or modify it under the 7 // terms of the GNU General Public License as published by the 8 // Free Software Foundation; either version 3, or (at your option) 9 // any later version. 10 11 // This library is distributed in the hope that it will be useful, 12 // but WITHOUT ANY WARRANTY; without even the implied warranty of 13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 // GNU General Public License for more details. 15 16 // Under Section 7 of GPL version 3, you are granted additional 17 // permissions described in the GCC Runtime Library Exception, version 18 // 3.1, as published by the Free Software Foundation. 19 20 // You should have received a copy of the GNU General Public License and 21 // a copy of the GCC Runtime Library Exception along with this program; 22 // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 23 // <http://www.gnu.org/licenses/>. 24 25 #ifndef _GLIBCXX_EXPERIMENTAL_SIMD_ABIS_H_ 26 #define _GLIBCXX_EXPERIMENTAL_SIMD_ABIS_H_ 27 28 #if __cplusplus >= 201703L 29 30 #include <array> 31 #include <cmath> 32 #include <cstdlib> 33 34 _GLIBCXX_SIMD_BEGIN_NAMESPACE 35 // _S_allbits{{{ 36 template <typename _V> 37 static inline _GLIBCXX_SIMD_USE_CONSTEXPR _V _S_allbits 38 = reinterpret_cast<_V>(~__vector_type_t<char, sizeof(_V) / sizeof(char)>()); 39 40 // }}} 41 // _S_signmask, _S_absmask{{{ 42 template <typename _V, typename = _VectorTraits<_V>> 43 static inline _GLIBCXX_SIMD_USE_CONSTEXPR _V _S_signmask 44 = __xor(_V() + 1, _V() - 1); 45 46 template <typename _V, typename = _VectorTraits<_V>> 47 static inline _GLIBCXX_SIMD_USE_CONSTEXPR _V _S_absmask 48 = __andnot(_S_signmask<_V>, _S_allbits<_V>); 49 50 //}}} 51 // __vector_permute<Indices...>{{{ 52 // Index == -1 requests zeroing of the output element 53 template <int... _Indices, typename _Tp, typename _TVT = _VectorTraits<_Tp>, 54 typename = __detail::__odr_helper> 55 _Tp 56 __vector_permute(_Tp __x) 57 { 58 static_assert(sizeof...(_Indices) == _TVT::_S_full_size); 59 return __make_vector<typename _TVT::value_type>( 60 (_Indices == -1 ? 0 : __x[_Indices == -1 ? 0 : _Indices])...); 61 } 62 63 // }}} 64 // __vector_shuffle<Indices...>{{{ 65 // Index == -1 requests zeroing of the output element 66 template <int... _Indices, typename _Tp, typename _TVT = _VectorTraits<_Tp>, 67 typename = __detail::__odr_helper> 68 _Tp 69 __vector_shuffle(_Tp __x, _Tp __y) 70 { 71 return _Tp{(_Indices == -1 ? 0 72 : _Indices < _TVT::_S_full_size 73 ? __x[_Indices] 74 : __y[_Indices - _TVT::_S_full_size])...}; 75 } 76 77 // }}} 78 // __make_wrapper{{{ 79 template <typename _Tp, typename... _Args> 80 _GLIBCXX_SIMD_INTRINSIC constexpr _SimdWrapper<_Tp, sizeof...(_Args)> 81 __make_wrapper(const _Args&... __args) 82 { return __make_vector<_Tp>(__args...); } 83 84 // }}} 85 // __wrapper_bitcast{{{ 86 template <typename _Tp, size_t _ToN = 0, typename _Up, size_t _M, 87 size_t _Np = _ToN != 0 ? _ToN : sizeof(_Up) * _M / sizeof(_Tp)> 88 _GLIBCXX_SIMD_INTRINSIC constexpr _SimdWrapper<_Tp, _Np> 89 __wrapper_bitcast(_SimdWrapper<_Up, _M> __x) 90 { 91 static_assert(_Np > 1); 92 return __intrin_bitcast<__vector_type_t<_Tp, _Np>>(__x._M_data); 93 } 94 95 // }}} 96 // __shift_elements_right{{{ 97 // if (__shift % 2ⁿ == 0) => the low n Bytes are correct 98 template <unsigned __shift, typename _Tp, typename _TVT = _VectorTraits<_Tp>> 99 _GLIBCXX_SIMD_INTRINSIC _Tp 100 __shift_elements_right(_Tp __v) 101 { 102 [[maybe_unused]] const auto __iv = __to_intrin(__v); 103 static_assert(__shift <= sizeof(_Tp)); 104 if constexpr (__shift == 0) 105 return __v; 106 else if constexpr (__shift == sizeof(_Tp)) 107 return _Tp(); 108 #if _GLIBCXX_SIMD_X86INTRIN // {{{ 109 else if constexpr (__have_sse && __shift == 8 110 && _TVT::template _S_is<float, 4>) 111 return _mm_movehl_ps(__iv, __iv); 112 else if constexpr (__have_sse2 && __shift == 8 113 && _TVT::template _S_is<double, 2>) 114 return _mm_unpackhi_pd(__iv, __iv); 115 else if constexpr (__have_sse2 && sizeof(_Tp) == 16) 116 return reinterpret_cast<typename _TVT::type>( 117 _mm_srli_si128(reinterpret_cast<__m128i>(__iv), __shift)); 118 else if constexpr (__shift == 16 && sizeof(_Tp) == 32) 119 { 120 /*if constexpr (__have_avx && _TVT::template _S_is<double, 4>) 121 return _mm256_permute2f128_pd(__iv, __iv, 0x81); 122 else if constexpr (__have_avx && _TVT::template _S_is<float, 8>) 123 return _mm256_permute2f128_ps(__iv, __iv, 0x81); 124 else if constexpr (__have_avx) 125 return reinterpret_cast<typename _TVT::type>( 126 _mm256_permute2f128_si256(__iv, __iv, 0x81)); 127 else*/ 128 return __zero_extend(__hi128(__v)); 129 } 130 else if constexpr (__have_avx2 && sizeof(_Tp) == 32 && __shift < 16) 131 { 132 const auto __vll = __vector_bitcast<_LLong>(__v); 133 return reinterpret_cast<typename _TVT::type>( 134 _mm256_alignr_epi8(_mm256_permute2x128_si256(__vll, __vll, 0x81), 135 __vll, __shift)); 136 } 137 else if constexpr (__have_avx && sizeof(_Tp) == 32 && __shift < 16) 138 { 139 const auto __vll = __vector_bitcast<_LLong>(__v); 140 return reinterpret_cast<typename _TVT::type>( 141 __concat(_mm_alignr_epi8(__hi128(__vll), __lo128(__vll), __shift), 142 _mm_srli_si128(__hi128(__vll), __shift))); 143 } 144 else if constexpr (sizeof(_Tp) == 32 && __shift > 16) 145 return __zero_extend(__shift_elements_right<__shift - 16>(__hi128(__v))); 146 else if constexpr (sizeof(_Tp) == 64 && __shift == 32) 147 return __zero_extend(__hi256(__v)); 148 else if constexpr (__have_avx512f && sizeof(_Tp) == 64) 149 { 150 if constexpr (__shift >= 48) 151 return __zero_extend( 152 __shift_elements_right<__shift - 48>(__extract<3, 4>(__v))); 153 else if constexpr (__shift >= 32) 154 return __zero_extend( 155 __shift_elements_right<__shift - 32>(__hi256(__v))); 156 else if constexpr (__shift % 8 == 0) 157 return reinterpret_cast<typename _TVT::type>( 158 _mm512_alignr_epi64(__m512i(), __intrin_bitcast<__m512i>(__v), 159 __shift / 8)); 160 else if constexpr (__shift % 4 == 0) 161 return reinterpret_cast<typename _TVT::type>( 162 _mm512_alignr_epi32(__m512i(), __intrin_bitcast<__m512i>(__v), 163 __shift / 4)); 164 else if constexpr (__have_avx512bw && __shift < 16) 165 { 166 const auto __vll = __vector_bitcast<_LLong>(__v); 167 return reinterpret_cast<typename _TVT::type>( 168 _mm512_alignr_epi8(_mm512_shuffle_i32x4(__vll, __vll, 0xf9), 169 __vll, __shift)); 170 } 171 else if constexpr (__have_avx512bw && __shift < 32) 172 { 173 const auto __vll = __vector_bitcast<_LLong>(__v); 174 return reinterpret_cast<typename _TVT::type>( 175 _mm512_alignr_epi8(_mm512_shuffle_i32x4(__vll, __m512i(), 0xee), 176 _mm512_shuffle_i32x4(__vll, __vll, 0xf9), 177 __shift - 16)); 178 } 179 else 180 __assert_unreachable<_Tp>(); 181 } 182 /* 183 } else if constexpr (__shift % 16 == 0 && sizeof(_Tp) == 64) 184 return __auto_bitcast(__extract<__shift / 16, 4>(__v)); 185 */ 186 #endif // _GLIBCXX_SIMD_X86INTRIN }}} 187 else 188 { 189 constexpr int __chunksize = __shift % 8 == 0 ? 8 190 : __shift % 4 == 0 ? 4 191 : __shift % 2 == 0 ? 2 192 : 1; 193 auto __w = __vector_bitcast<__int_with_sizeof_t<__chunksize>>(__v); 194 using _Up = decltype(__w); 195 return __intrin_bitcast<_Tp>( 196 __call_with_n_evaluations<(sizeof(_Tp) - __shift) / __chunksize>( 197 [](auto... __chunks) { return _Up{__chunks...}; }, 198 [&](auto __i) { return __w[__shift / __chunksize + __i]; })); 199 } 200 } 201 202 // }}} 203 // __extract_part(_SimdWrapper<_Tp, _Np>) {{{ 204 template <int _Index, int _Total, int _Combine, typename _Tp, size_t _Np> 205 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_CONST 206 _SimdWrapper<_Tp, _Np / _Total * _Combine> 207 __extract_part(const _SimdWrapper<_Tp, _Np> __x) 208 { 209 if constexpr (_Index % 2 == 0 && _Total % 2 == 0 && _Combine % 2 == 0) 210 return __extract_part<_Index / 2, _Total / 2, _Combine / 2>(__x); 211 else 212 { 213 constexpr size_t __values_per_part = _Np / _Total; 214 constexpr size_t __values_to_skip = _Index * __values_per_part; 215 constexpr size_t __return_size = __values_per_part * _Combine; 216 using _R = __vector_type_t<_Tp, __return_size>; 217 static_assert((_Index + _Combine) * __values_per_part * sizeof(_Tp) 218 <= sizeof(__x), 219 "out of bounds __extract_part"); 220 // the following assertion would ensure no "padding" to be read 221 // static_assert(_Total >= _Index + _Combine, "_Total must be greater 222 // than _Index"); 223 224 // static_assert(__return_size * _Total == _Np, "_Np must be divisible 225 // by _Total"); 226 if (__x._M_is_constprop()) 227 return __generate_from_n_evaluations<__return_size, _R>( 228 [&](auto __i) { return __x[__values_to_skip + __i]; }); 229 if constexpr (_Index == 0 && _Total == 1) 230 return __x; 231 else if constexpr (_Index == 0) 232 return __intrin_bitcast<_R>(__as_vector(__x)); 233 #if _GLIBCXX_SIMD_X86INTRIN // {{{ 234 else if constexpr (sizeof(__x) == 32 235 && __return_size * sizeof(_Tp) <= 16) 236 { 237 constexpr size_t __bytes_to_skip = __values_to_skip * sizeof(_Tp); 238 if constexpr (__bytes_to_skip == 16) 239 return __vector_bitcast<_Tp, __return_size>( 240 __hi128(__as_vector(__x))); 241 else 242 return __vector_bitcast<_Tp, __return_size>( 243 _mm_alignr_epi8(__hi128(__vector_bitcast<_LLong>(__x)), 244 __lo128(__vector_bitcast<_LLong>(__x)), 245 __bytes_to_skip)); 246 } 247 #endif // _GLIBCXX_SIMD_X86INTRIN }}} 248 else if constexpr (_Index > 0 249 && (__values_to_skip % __return_size != 0 250 || sizeof(_R) >= 8) 251 && (__values_to_skip + __return_size) * sizeof(_Tp) 252 <= 64 253 && sizeof(__x) >= 16) 254 return __intrin_bitcast<_R>( 255 __shift_elements_right<__values_to_skip * sizeof(_Tp)>( 256 __as_vector(__x))); 257 else 258 { 259 _R __r = {}; 260 __builtin_memcpy(&__r, 261 reinterpret_cast<const char*>(&__x) 262 + sizeof(_Tp) * __values_to_skip, 263 __return_size * sizeof(_Tp)); 264 return __r; 265 } 266 } 267 } 268 269 // }}} 270 // __extract_part(_SimdWrapper<bool, _Np>) {{{ 271 template <int _Index, int _Total, int _Combine = 1, size_t _Np> 272 _GLIBCXX_SIMD_INTRINSIC constexpr _SimdWrapper<bool, _Np / _Total * _Combine> 273 __extract_part(const _SimdWrapper<bool, _Np> __x) 274 { 275 static_assert(_Combine == 1, "_Combine != 1 not implemented"); 276 static_assert(__have_avx512f && _Np == _Np); 277 static_assert(_Total >= 2 && _Index + _Combine <= _Total && _Index >= 0); 278 return __x._M_data >> (_Index * _Np / _Total); 279 } 280 281 // }}} 282 283 // __vector_convert {{{ 284 // implementation requires an index sequence 285 template <typename _To, typename _From, size_t... _I> 286 _GLIBCXX_SIMD_INTRINSIC constexpr _To 287 __vector_convert(_From __a, index_sequence<_I...>) 288 { 289 using _Tp = typename _VectorTraits<_To>::value_type; 290 return _To{static_cast<_Tp>(__a[_I])...}; 291 } 292 293 template <typename _To, typename _From, size_t... _I> 294 _GLIBCXX_SIMD_INTRINSIC constexpr _To 295 __vector_convert(_From __a, _From __b, index_sequence<_I...>) 296 { 297 using _Tp = typename _VectorTraits<_To>::value_type; 298 return _To{static_cast<_Tp>(__a[_I])..., static_cast<_Tp>(__b[_I])...}; 299 } 300 301 template <typename _To, typename _From, size_t... _I> 302 _GLIBCXX_SIMD_INTRINSIC constexpr _To 303 __vector_convert(_From __a, _From __b, _From __c, index_sequence<_I...>) 304 { 305 using _Tp = typename _VectorTraits<_To>::value_type; 306 return _To{static_cast<_Tp>(__a[_I])..., static_cast<_Tp>(__b[_I])..., 307 static_cast<_Tp>(__c[_I])...}; 308 } 309 310 template <typename _To, typename _From, size_t... _I> 311 _GLIBCXX_SIMD_INTRINSIC constexpr _To 312 __vector_convert(_From __a, _From __b, _From __c, _From __d, 313 index_sequence<_I...>) 314 { 315 using _Tp = typename _VectorTraits<_To>::value_type; 316 return _To{static_cast<_Tp>(__a[_I])..., static_cast<_Tp>(__b[_I])..., 317 static_cast<_Tp>(__c[_I])..., static_cast<_Tp>(__d[_I])...}; 318 } 319 320 template <typename _To, typename _From, size_t... _I> 321 _GLIBCXX_SIMD_INTRINSIC constexpr _To 322 __vector_convert(_From __a, _From __b, _From __c, _From __d, _From __e, 323 index_sequence<_I...>) 324 { 325 using _Tp = typename _VectorTraits<_To>::value_type; 326 return _To{static_cast<_Tp>(__a[_I])..., static_cast<_Tp>(__b[_I])..., 327 static_cast<_Tp>(__c[_I])..., static_cast<_Tp>(__d[_I])..., 328 static_cast<_Tp>(__e[_I])...}; 329 } 330 331 template <typename _To, typename _From, size_t... _I> 332 _GLIBCXX_SIMD_INTRINSIC constexpr _To 333 __vector_convert(_From __a, _From __b, _From __c, _From __d, _From __e, 334 _From __f, index_sequence<_I...>) 335 { 336 using _Tp = typename _VectorTraits<_To>::value_type; 337 return _To{static_cast<_Tp>(__a[_I])..., static_cast<_Tp>(__b[_I])..., 338 static_cast<_Tp>(__c[_I])..., static_cast<_Tp>(__d[_I])..., 339 static_cast<_Tp>(__e[_I])..., static_cast<_Tp>(__f[_I])...}; 340 } 341 342 template <typename _To, typename _From, size_t... _I> 343 _GLIBCXX_SIMD_INTRINSIC constexpr _To 344 __vector_convert(_From __a, _From __b, _From __c, _From __d, _From __e, 345 _From __f, _From __g, index_sequence<_I...>) 346 { 347 using _Tp = typename _VectorTraits<_To>::value_type; 348 return _To{static_cast<_Tp>(__a[_I])..., static_cast<_Tp>(__b[_I])..., 349 static_cast<_Tp>(__c[_I])..., static_cast<_Tp>(__d[_I])..., 350 static_cast<_Tp>(__e[_I])..., static_cast<_Tp>(__f[_I])..., 351 static_cast<_Tp>(__g[_I])...}; 352 } 353 354 template <typename _To, typename _From, size_t... _I> 355 _GLIBCXX_SIMD_INTRINSIC constexpr _To 356 __vector_convert(_From __a, _From __b, _From __c, _From __d, _From __e, 357 _From __f, _From __g, _From __h, index_sequence<_I...>) 358 { 359 using _Tp = typename _VectorTraits<_To>::value_type; 360 return _To{static_cast<_Tp>(__a[_I])..., static_cast<_Tp>(__b[_I])..., 361 static_cast<_Tp>(__c[_I])..., static_cast<_Tp>(__d[_I])..., 362 static_cast<_Tp>(__e[_I])..., static_cast<_Tp>(__f[_I])..., 363 static_cast<_Tp>(__g[_I])..., static_cast<_Tp>(__h[_I])...}; 364 } 365 366 template <typename _To, typename _From, size_t... _I> 367 _GLIBCXX_SIMD_INTRINSIC constexpr _To 368 __vector_convert(_From __a, _From __b, _From __c, _From __d, _From __e, 369 _From __f, _From __g, _From __h, _From __i, 370 index_sequence<_I...>) 371 { 372 using _Tp = typename _VectorTraits<_To>::value_type; 373 return _To{static_cast<_Tp>(__a[_I])..., static_cast<_Tp>(__b[_I])..., 374 static_cast<_Tp>(__c[_I])..., static_cast<_Tp>(__d[_I])..., 375 static_cast<_Tp>(__e[_I])..., static_cast<_Tp>(__f[_I])..., 376 static_cast<_Tp>(__g[_I])..., static_cast<_Tp>(__h[_I])..., 377 static_cast<_Tp>(__i[_I])...}; 378 } 379 380 template <typename _To, typename _From, size_t... _I> 381 _GLIBCXX_SIMD_INTRINSIC constexpr _To 382 __vector_convert(_From __a, _From __b, _From __c, _From __d, _From __e, 383 _From __f, _From __g, _From __h, _From __i, _From __j, 384 index_sequence<_I...>) 385 { 386 using _Tp = typename _VectorTraits<_To>::value_type; 387 return _To{static_cast<_Tp>(__a[_I])..., static_cast<_Tp>(__b[_I])..., 388 static_cast<_Tp>(__c[_I])..., static_cast<_Tp>(__d[_I])..., 389 static_cast<_Tp>(__e[_I])..., static_cast<_Tp>(__f[_I])..., 390 static_cast<_Tp>(__g[_I])..., static_cast<_Tp>(__h[_I])..., 391 static_cast<_Tp>(__i[_I])..., static_cast<_Tp>(__j[_I])...}; 392 } 393 394 template <typename _To, typename _From, size_t... _I> 395 _GLIBCXX_SIMD_INTRINSIC constexpr _To 396 __vector_convert(_From __a, _From __b, _From __c, _From __d, _From __e, 397 _From __f, _From __g, _From __h, _From __i, _From __j, 398 _From __k, index_sequence<_I...>) 399 { 400 using _Tp = typename _VectorTraits<_To>::value_type; 401 return _To{static_cast<_Tp>(__a[_I])..., static_cast<_Tp>(__b[_I])..., 402 static_cast<_Tp>(__c[_I])..., static_cast<_Tp>(__d[_I])..., 403 static_cast<_Tp>(__e[_I])..., static_cast<_Tp>(__f[_I])..., 404 static_cast<_Tp>(__g[_I])..., static_cast<_Tp>(__h[_I])..., 405 static_cast<_Tp>(__i[_I])..., static_cast<_Tp>(__j[_I])..., 406 static_cast<_Tp>(__k[_I])...}; 407 } 408 409 template <typename _To, typename _From, size_t... _I> 410 _GLIBCXX_SIMD_INTRINSIC constexpr _To 411 __vector_convert(_From __a, _From __b, _From __c, _From __d, _From __e, 412 _From __f, _From __g, _From __h, _From __i, _From __j, 413 _From __k, _From __l, index_sequence<_I...>) 414 { 415 using _Tp = typename _VectorTraits<_To>::value_type; 416 return _To{static_cast<_Tp>(__a[_I])..., static_cast<_Tp>(__b[_I])..., 417 static_cast<_Tp>(__c[_I])..., static_cast<_Tp>(__d[_I])..., 418 static_cast<_Tp>(__e[_I])..., static_cast<_Tp>(__f[_I])..., 419 static_cast<_Tp>(__g[_I])..., static_cast<_Tp>(__h[_I])..., 420 static_cast<_Tp>(__i[_I])..., static_cast<_Tp>(__j[_I])..., 421 static_cast<_Tp>(__k[_I])..., static_cast<_Tp>(__l[_I])...}; 422 } 423 424 template <typename _To, typename _From, size_t... _I> 425 _GLIBCXX_SIMD_INTRINSIC constexpr _To 426 __vector_convert(_From __a, _From __b, _From __c, _From __d, _From __e, 427 _From __f, _From __g, _From __h, _From __i, _From __j, 428 _From __k, _From __l, _From __m, index_sequence<_I...>) 429 { 430 using _Tp = typename _VectorTraits<_To>::value_type; 431 return _To{static_cast<_Tp>(__a[_I])..., static_cast<_Tp>(__b[_I])..., 432 static_cast<_Tp>(__c[_I])..., static_cast<_Tp>(__d[_I])..., 433 static_cast<_Tp>(__e[_I])..., static_cast<_Tp>(__f[_I])..., 434 static_cast<_Tp>(__g[_I])..., static_cast<_Tp>(__h[_I])..., 435 static_cast<_Tp>(__i[_I])..., static_cast<_Tp>(__j[_I])..., 436 static_cast<_Tp>(__k[_I])..., static_cast<_Tp>(__l[_I])..., 437 static_cast<_Tp>(__m[_I])...}; 438 } 439 440 template <typename _To, typename _From, size_t... _I> 441 _GLIBCXX_SIMD_INTRINSIC constexpr _To 442 __vector_convert(_From __a, _From __b, _From __c, _From __d, _From __e, 443 _From __f, _From __g, _From __h, _From __i, _From __j, 444 _From __k, _From __l, _From __m, _From __n, 445 index_sequence<_I...>) 446 { 447 using _Tp = typename _VectorTraits<_To>::value_type; 448 return _To{static_cast<_Tp>(__a[_I])..., static_cast<_Tp>(__b[_I])..., 449 static_cast<_Tp>(__c[_I])..., static_cast<_Tp>(__d[_I])..., 450 static_cast<_Tp>(__e[_I])..., static_cast<_Tp>(__f[_I])..., 451 static_cast<_Tp>(__g[_I])..., static_cast<_Tp>(__h[_I])..., 452 static_cast<_Tp>(__i[_I])..., static_cast<_Tp>(__j[_I])..., 453 static_cast<_Tp>(__k[_I])..., static_cast<_Tp>(__l[_I])..., 454 static_cast<_Tp>(__m[_I])..., static_cast<_Tp>(__n[_I])...}; 455 } 456 457 template <typename _To, typename _From, size_t... _I> 458 _GLIBCXX_SIMD_INTRINSIC constexpr _To 459 __vector_convert(_From __a, _From __b, _From __c, _From __d, _From __e, 460 _From __f, _From __g, _From __h, _From __i, _From __j, 461 _From __k, _From __l, _From __m, _From __n, _From __o, 462 index_sequence<_I...>) 463 { 464 using _Tp = typename _VectorTraits<_To>::value_type; 465 return _To{static_cast<_Tp>(__a[_I])..., static_cast<_Tp>(__b[_I])..., 466 static_cast<_Tp>(__c[_I])..., static_cast<_Tp>(__d[_I])..., 467 static_cast<_Tp>(__e[_I])..., static_cast<_Tp>(__f[_I])..., 468 static_cast<_Tp>(__g[_I])..., static_cast<_Tp>(__h[_I])..., 469 static_cast<_Tp>(__i[_I])..., static_cast<_Tp>(__j[_I])..., 470 static_cast<_Tp>(__k[_I])..., static_cast<_Tp>(__l[_I])..., 471 static_cast<_Tp>(__m[_I])..., static_cast<_Tp>(__n[_I])..., 472 static_cast<_Tp>(__o[_I])...}; 473 } 474 475 template <typename _To, typename _From, size_t... _I> 476 _GLIBCXX_SIMD_INTRINSIC constexpr _To 477 __vector_convert(_From __a, _From __b, _From __c, _From __d, _From __e, 478 _From __f, _From __g, _From __h, _From __i, _From __j, 479 _From __k, _From __l, _From __m, _From __n, _From __o, 480 _From __p, index_sequence<_I...>) 481 { 482 using _Tp = typename _VectorTraits<_To>::value_type; 483 return _To{static_cast<_Tp>(__a[_I])..., static_cast<_Tp>(__b[_I])..., 484 static_cast<_Tp>(__c[_I])..., static_cast<_Tp>(__d[_I])..., 485 static_cast<_Tp>(__e[_I])..., static_cast<_Tp>(__f[_I])..., 486 static_cast<_Tp>(__g[_I])..., static_cast<_Tp>(__h[_I])..., 487 static_cast<_Tp>(__i[_I])..., static_cast<_Tp>(__j[_I])..., 488 static_cast<_Tp>(__k[_I])..., static_cast<_Tp>(__l[_I])..., 489 static_cast<_Tp>(__m[_I])..., static_cast<_Tp>(__n[_I])..., 490 static_cast<_Tp>(__o[_I])..., static_cast<_Tp>(__p[_I])...}; 491 } 492 493 // Defer actual conversion to the overload that takes an index sequence. Note 494 // that this function adds zeros or drops values off the end if you don't ensure 495 // matching width. 496 template <typename _To, typename... _From, size_t _FromSize> 497 _GLIBCXX_SIMD_INTRINSIC constexpr _To 498 __vector_convert(_SimdWrapper<_From, _FromSize>... __xs) 499 { 500 #ifdef _GLIBCXX_SIMD_WORKAROUND_PR85048 501 using _From0 = __first_of_pack_t<_From...>; 502 using _FW = _SimdWrapper<_From0, _FromSize>; 503 if (!_FW::_S_is_partial && !(... && __xs._M_is_constprop())) 504 { 505 if constexpr ((sizeof...(_From) & (sizeof...(_From) - 1)) 506 == 0) // power-of-two number of arguments 507 return __convert_x86<_To>(__as_vector(__xs)...); 508 else // append zeros and recurse until the above branch is taken 509 return __vector_convert<_To>(__xs..., _FW{}); 510 } 511 else 512 #endif 513 return __vector_convert<_To>( 514 __as_vector(__xs)..., 515 make_index_sequence<(sizeof...(__xs) == 1 ? std::min( 516 _VectorTraits<_To>::_S_full_size, int(_FromSize)) 517 : _FromSize)>()); 518 } 519 520 // }}} 521 // __convert function{{{ 522 template <typename _To, typename _From, typename... _More> 523 _GLIBCXX_SIMD_INTRINSIC constexpr auto 524 __convert(_From __v0, _More... __vs) 525 { 526 static_assert((true && ... && is_same_v<_From, _More>) ); 527 if constexpr (__is_vectorizable_v<_From>) 528 { 529 using _V = typename _VectorTraits<_To>::type; 530 using _Tp = typename _VectorTraits<_To>::value_type; 531 return _V{static_cast<_Tp>(__v0), static_cast<_Tp>(__vs)...}; 532 } 533 else if constexpr (__is_vector_type_v<_From>) 534 return __convert<_To>(__as_wrapper(__v0), __as_wrapper(__vs)...); 535 else // _SimdWrapper arguments 536 { 537 constexpr size_t __input_size = _From::_S_size * (1 + sizeof...(_More)); 538 if constexpr (__is_vectorizable_v<_To>) 539 return __convert<__vector_type_t<_To, __input_size>>(__v0, __vs...); 540 else if constexpr (!__is_vector_type_v<_To>) 541 return _To(__convert<typename _To::_BuiltinType>(__v0, __vs...)); 542 else 543 { 544 static_assert( 545 sizeof...(_More) == 0 546 || _VectorTraits<_To>::_S_full_size >= __input_size, 547 "__convert(...) requires the input to fit into the output"); 548 return __vector_convert<_To>(__v0, __vs...); 549 } 550 } 551 } 552 553 // }}} 554 // __convert_all{{{ 555 // Converts __v into array<_To, N>, where N is _NParts if non-zero or 556 // otherwise deduced from _To such that N * #elements(_To) <= #elements(__v). 557 // Note: this function may return less than all converted elements 558 template <typename _To, 559 size_t _NParts = 0, // allows to convert fewer or more (only last 560 // _To, to be partially filled) than all 561 size_t _Offset = 0, // where to start, # of elements (not Bytes or 562 // Parts) 563 typename _From, typename _FromVT = _VectorTraits<_From>> 564 _GLIBCXX_SIMD_INTRINSIC auto 565 __convert_all(_From __v) 566 { 567 if constexpr (is_arithmetic_v<_To> && _NParts != 1) 568 { 569 static_assert(_Offset < _FromVT::_S_full_size); 570 constexpr auto _Np 571 = _NParts == 0 ? _FromVT::_S_partial_width - _Offset : _NParts; 572 return __generate_from_n_evaluations<_Np, array<_To, _Np>>( 573 [&](auto __i) { return static_cast<_To>(__v[__i + _Offset]); }); 574 } 575 else 576 { 577 static_assert(__is_vector_type_v<_To>); 578 using _ToVT = _VectorTraits<_To>; 579 if constexpr (__is_vector_type_v<_From>) 580 return __convert_all<_To, _NParts>(__as_wrapper(__v)); 581 else if constexpr (_NParts == 1) 582 { 583 static_assert(_Offset % _ToVT::_S_full_size == 0); 584 return array<_To, 1>{__vector_convert<_To>( 585 __extract_part<_Offset / _ToVT::_S_full_size, 586 __div_roundup(_FromVT::_S_partial_width, 587 _ToVT::_S_full_size)>(__v))}; 588 } 589 #if _GLIBCXX_SIMD_X86INTRIN // {{{ 590 else if constexpr (!__have_sse4_1 && _Offset == 0 591 && is_integral_v<typename _FromVT::value_type> 592 && sizeof(typename _FromVT::value_type) 593 < sizeof(typename _ToVT::value_type) 594 && !(sizeof(typename _FromVT::value_type) == 4 595 && is_same_v<typename _ToVT::value_type, double>)) 596 { 597 using _ToT = typename _ToVT::value_type; 598 using _FromT = typename _FromVT::value_type; 599 constexpr size_t _Np 600 = _NParts != 0 601 ? _NParts 602 : (_FromVT::_S_partial_width / _ToVT::_S_full_size); 603 using _R = array<_To, _Np>; 604 // __adjust modifies its input to have _Np (use _SizeConstant) 605 // entries so that no unnecessary intermediate conversions are 606 // requested and, more importantly, no intermediate conversions are 607 // missing 608 [[maybe_unused]] auto __adjust 609 = [](auto __n, 610 auto __vv) -> _SimdWrapper<_FromT, decltype(__n)::value> { 611 return __vector_bitcast<_FromT, decltype(__n)::value>(__vv); 612 }; 613 [[maybe_unused]] const auto __vi = __to_intrin(__v); 614 auto&& __make_array = [](auto __x0, [[maybe_unused]] auto __x1) { 615 if constexpr (_Np == 1) 616 return _R{__intrin_bitcast<_To>(__x0)}; 617 else 618 return _R{__intrin_bitcast<_To>(__x0), 619 __intrin_bitcast<_To>(__x1)}; 620 }; 621 622 if constexpr (_Np == 0) 623 return _R{}; 624 else if constexpr (sizeof(_FromT) == 1 && sizeof(_ToT) == 2) 625 { 626 static_assert(is_integral_v<_FromT>); 627 static_assert(is_integral_v<_ToT>); 628 if constexpr (is_unsigned_v<_FromT>) 629 return __make_array(_mm_unpacklo_epi8(__vi, __m128i()), 630 _mm_unpackhi_epi8(__vi, __m128i())); 631 else 632 return __make_array( 633 _mm_srai_epi16(_mm_unpacklo_epi8(__vi, __vi), 8), 634 _mm_srai_epi16(_mm_unpackhi_epi8(__vi, __vi), 8)); 635 } 636 else if constexpr (sizeof(_FromT) == 2 && sizeof(_ToT) == 4) 637 { 638 static_assert(is_integral_v<_FromT>); 639 if constexpr (is_floating_point_v<_ToT>) 640 { 641 const auto __ints 642 = __convert_all<__vector_type16_t<int>, _Np>( 643 __adjust(_SizeConstant<_Np * 4>(), __v)); 644 return __generate_from_n_evaluations<_Np, _R>( 645 [&](auto __i) { 646 return __vector_convert<_To>(__as_wrapper(__ints[__i])); 647 }); 648 } 649 else if constexpr (is_unsigned_v<_FromT>) 650 return __make_array(_mm_unpacklo_epi16(__vi, __m128i()), 651 _mm_unpackhi_epi16(__vi, __m128i())); 652 else 653 return __make_array( 654 _mm_srai_epi32(_mm_unpacklo_epi16(__vi, __vi), 16), 655 _mm_srai_epi32(_mm_unpackhi_epi16(__vi, __vi), 16)); 656 } 657 else if constexpr (sizeof(_FromT) == 4 && sizeof(_ToT) == 8 658 && is_integral_v<_FromT> && is_integral_v<_ToT>) 659 { 660 if constexpr (is_unsigned_v<_FromT>) 661 return __make_array(_mm_unpacklo_epi32(__vi, __m128i()), 662 _mm_unpackhi_epi32(__vi, __m128i())); 663 else 664 return __make_array( 665 _mm_unpacklo_epi32(__vi, _mm_srai_epi32(__vi, 31)), 666 _mm_unpackhi_epi32(__vi, _mm_srai_epi32(__vi, 31))); 667 } 668 else if constexpr (sizeof(_FromT) == 4 && sizeof(_ToT) == 8 669 && is_integral_v<_FromT> && is_integral_v<_ToT>) 670 { 671 if constexpr (is_unsigned_v<_FromT>) 672 return __make_array(_mm_unpacklo_epi32(__vi, __m128i()), 673 _mm_unpackhi_epi32(__vi, __m128i())); 674 else 675 return __make_array( 676 _mm_unpacklo_epi32(__vi, _mm_srai_epi32(__vi, 31)), 677 _mm_unpackhi_epi32(__vi, _mm_srai_epi32(__vi, 31))); 678 } 679 else if constexpr (sizeof(_FromT) == 1 && sizeof(_ToT) >= 4 680 && is_signed_v<_FromT>) 681 { 682 const __m128i __vv[2] = {_mm_unpacklo_epi8(__vi, __vi), 683 _mm_unpackhi_epi8(__vi, __vi)}; 684 const __vector_type_t<int, 4> __vvvv[4] = { 685 __vector_bitcast<int>(_mm_unpacklo_epi16(__vv[0], __vv[0])), 686 __vector_bitcast<int>(_mm_unpackhi_epi16(__vv[0], __vv[0])), 687 __vector_bitcast<int>(_mm_unpacklo_epi16(__vv[1], __vv[1])), 688 __vector_bitcast<int>(_mm_unpackhi_epi16(__vv[1], __vv[1]))}; 689 if constexpr (sizeof(_ToT) == 4) 690 return __generate_from_n_evaluations<_Np, _R>([&](auto __i) { 691 return __vector_convert<_To>( 692 _SimdWrapper<int, 4>(__vvvv[__i] >> 24)); 693 }); 694 else if constexpr (is_integral_v<_ToT>) 695 return __generate_from_n_evaluations<_Np, _R>([&](auto __i) { 696 const auto __signbits = __to_intrin(__vvvv[__i / 2] >> 31); 697 const auto __sx32 = __to_intrin(__vvvv[__i / 2] >> 24); 698 return __vector_bitcast<_ToT>( 699 __i % 2 == 0 ? _mm_unpacklo_epi32(__sx32, __signbits) 700 : _mm_unpackhi_epi32(__sx32, __signbits)); 701 }); 702 else 703 return __generate_from_n_evaluations<_Np, _R>([&](auto __i) { 704 const _SimdWrapper<int, 4> __int4 = __vvvv[__i / 2] >> 24; 705 return __vector_convert<_To>( 706 __i % 2 == 0 ? __int4 707 : _SimdWrapper<int, 4>( 708 _mm_unpackhi_epi64(__to_intrin(__int4), 709 __to_intrin(__int4)))); 710 }); 711 } 712 else if constexpr (sizeof(_FromT) == 1 && sizeof(_ToT) == 4) 713 { 714 const auto __shorts = __convert_all<__vector_type16_t< 715 conditional_t<is_signed_v<_FromT>, short, unsigned short>>>( 716 __adjust(_SizeConstant<(_Np + 1) / 2 * 8>(), __v)); 717 return __generate_from_n_evaluations<_Np, _R>([&](auto __i) { 718 return __convert_all<_To>(__shorts[__i / 2])[__i % 2]; 719 }); 720 } 721 else if constexpr (sizeof(_FromT) == 2 && sizeof(_ToT) == 8 722 && is_signed_v<_FromT> && is_integral_v<_ToT>) 723 { 724 const __m128i __vv[2] = {_mm_unpacklo_epi16(__vi, __vi), 725 _mm_unpackhi_epi16(__vi, __vi)}; 726 const __vector_type16_t<int> __vvvv[4] 727 = {__vector_bitcast<int>( 728 _mm_unpacklo_epi32(_mm_srai_epi32(__vv[0], 16), 729 _mm_srai_epi32(__vv[0], 31))), 730 __vector_bitcast<int>( 731 _mm_unpackhi_epi32(_mm_srai_epi32(__vv[0], 16), 732 _mm_srai_epi32(__vv[0], 31))), 733 __vector_bitcast<int>( 734 _mm_unpacklo_epi32(_mm_srai_epi32(__vv[1], 16), 735 _mm_srai_epi32(__vv[1], 31))), 736 __vector_bitcast<int>( 737 _mm_unpackhi_epi32(_mm_srai_epi32(__vv[1], 16), 738 _mm_srai_epi32(__vv[1], 31)))}; 739 return __generate_from_n_evaluations<_Np, _R>([&](auto __i) { 740 return __vector_bitcast<_ToT>(__vvvv[__i]); 741 }); 742 } 743 else if constexpr (sizeof(_FromT) <= 2 && sizeof(_ToT) == 8) 744 { 745 const auto __ints 746 = __convert_all<__vector_type16_t<conditional_t< 747 is_signed_v<_FromT> || is_floating_point_v<_ToT>, int, 748 unsigned int>>>( 749 __adjust(_SizeConstant<(_Np + 1) / 2 * 4>(), __v)); 750 return __generate_from_n_evaluations<_Np, _R>([&](auto __i) { 751 return __convert_all<_To>(__ints[__i / 2])[__i % 2]; 752 }); 753 } 754 else 755 __assert_unreachable<_To>(); 756 } 757 #endif // _GLIBCXX_SIMD_X86INTRIN }}} 758 else if constexpr ((_FromVT::_S_partial_width - _Offset) 759 > _ToVT::_S_full_size) 760 { 761 /* 762 static_assert( 763 (_FromVT::_S_partial_width & (_FromVT::_S_partial_width - 1)) == 764 0, 765 "__convert_all only supports power-of-2 number of elements. 766 Otherwise " "the return type cannot be array<_To, N>."); 767 */ 768 constexpr size_t _NTotal 769 = (_FromVT::_S_partial_width - _Offset) / _ToVT::_S_full_size; 770 constexpr size_t _Np = _NParts == 0 ? _NTotal : _NParts; 771 static_assert( 772 _Np <= _NTotal 773 || (_Np == _NTotal + 1 774 && (_FromVT::_S_partial_width - _Offset) % _ToVT::_S_full_size 775 > 0)); 776 using _R = array<_To, _Np>; 777 if constexpr (_Np == 1) 778 return _R{__vector_convert<_To>( 779 __extract_part<_Offset, _FromVT::_S_partial_width, 780 _ToVT::_S_full_size>(__v))}; 781 else 782 return __generate_from_n_evaluations<_Np, _R>([&]( 783 auto __i) constexpr { 784 auto __part 785 = __extract_part<__i * _ToVT::_S_full_size + _Offset, 786 _FromVT::_S_partial_width, 787 _ToVT::_S_full_size>(__v); 788 return __vector_convert<_To>(__part); 789 }); 790 } 791 else if constexpr (_Offset == 0) 792 return array<_To, 1>{__vector_convert<_To>(__v)}; 793 else 794 return array<_To, 1>{__vector_convert<_To>( 795 __extract_part<_Offset, _FromVT::_S_partial_width, 796 _FromVT::_S_partial_width - _Offset>(__v))}; 797 } 798 } 799 800 // }}} 801 802 // _GnuTraits {{{ 803 template <typename _Tp, typename _Mp, typename _Abi, size_t _Np> 804 struct _GnuTraits 805 { 806 using _IsValid = true_type; 807 using _SimdImpl = typename _Abi::_SimdImpl; 808 using _MaskImpl = typename _Abi::_MaskImpl; 809 810 // simd and simd_mask member types {{{ 811 using _SimdMember = _SimdWrapper<_Tp, _Np>; 812 using _MaskMember = _SimdWrapper<_Mp, _Np>; 813 static constexpr size_t _S_simd_align = alignof(_SimdMember); 814 static constexpr size_t _S_mask_align = alignof(_MaskMember); 815 816 // }}} 817 // size metadata {{{ 818 static constexpr size_t _S_full_size = _SimdMember::_S_full_size; 819 static constexpr bool _S_is_partial = _SimdMember::_S_is_partial; 820 821 // }}} 822 // _SimdBase / base class for simd, providing extra conversions {{{ 823 struct _SimdBase2 824 { 825 _GLIBCXX_SIMD_ALWAYS_INLINE 826 explicit operator __intrinsic_type_t<_Tp, _Np>() const 827 { 828 return __to_intrin(static_cast<const simd<_Tp, _Abi>*>(this)->_M_data); 829 } 830 _GLIBCXX_SIMD_ALWAYS_INLINE 831 explicit operator __vector_type_t<_Tp, _Np>() const 832 { 833 return static_cast<const simd<_Tp, _Abi>*>(this)->_M_data.__builtin(); 834 } 835 }; 836 837 struct _SimdBase1 838 { 839 _GLIBCXX_SIMD_ALWAYS_INLINE 840 explicit operator __intrinsic_type_t<_Tp, _Np>() const 841 { return __data(*static_cast<const simd<_Tp, _Abi>*>(this)); } 842 }; 843 844 using _SimdBase = conditional_t< 845 is_same<__intrinsic_type_t<_Tp, _Np>, __vector_type_t<_Tp, _Np>>::value, 846 _SimdBase1, _SimdBase2>; 847 848 // }}} 849 // _MaskBase {{{ 850 struct _MaskBase2 851 { 852 _GLIBCXX_SIMD_ALWAYS_INLINE 853 explicit operator __intrinsic_type_t<_Tp, _Np>() const 854 { 855 return static_cast<const simd_mask<_Tp, _Abi>*>(this) 856 ->_M_data.__intrin(); 857 } 858 _GLIBCXX_SIMD_ALWAYS_INLINE 859 explicit operator __vector_type_t<_Tp, _Np>() const 860 { 861 return static_cast<const simd_mask<_Tp, _Abi>*>(this)->_M_data._M_data; 862 } 863 }; 864 865 struct _MaskBase1 866 { 867 _GLIBCXX_SIMD_ALWAYS_INLINE 868 explicit operator __intrinsic_type_t<_Tp, _Np>() const 869 { return __data(*static_cast<const simd_mask<_Tp, _Abi>*>(this)); } 870 }; 871 872 using _MaskBase = conditional_t< 873 is_same<__intrinsic_type_t<_Tp, _Np>, __vector_type_t<_Tp, _Np>>::value, 874 _MaskBase1, _MaskBase2>; 875 876 // }}} 877 // _MaskCastType {{{ 878 // parameter type of one explicit simd_mask constructor 879 class _MaskCastType 880 { 881 using _Up = __intrinsic_type_t<_Tp, _Np>; 882 _Up _M_data; 883 884 public: 885 _GLIBCXX_SIMD_ALWAYS_INLINE 886 _MaskCastType(_Up __x) : _M_data(__x) {} 887 _GLIBCXX_SIMD_ALWAYS_INLINE 888 operator _MaskMember() const { return _M_data; } 889 }; 890 891 // }}} 892 // _SimdCastType {{{ 893 // parameter type of one explicit simd constructor 894 class _SimdCastType1 895 { 896 using _Ap = __intrinsic_type_t<_Tp, _Np>; 897 _SimdMember _M_data; 898 899 public: 900 _GLIBCXX_SIMD_ALWAYS_INLINE 901 _SimdCastType1(_Ap __a) : _M_data(__vector_bitcast<_Tp>(__a)) {} 902 _GLIBCXX_SIMD_ALWAYS_INLINE 903 operator _SimdMember() const { return _M_data; } 904 }; 905 906 class _SimdCastType2 907 { 908 using _Ap = __intrinsic_type_t<_Tp, _Np>; 909 using _Bp = __vector_type_t<_Tp, _Np>; 910 _SimdMember _M_data; 911 912 public: 913 _GLIBCXX_SIMD_ALWAYS_INLINE 914 _SimdCastType2(_Ap __a) : _M_data(__vector_bitcast<_Tp>(__a)) {} 915 _GLIBCXX_SIMD_ALWAYS_INLINE 916 _SimdCastType2(_Bp __b) : _M_data(__b) {} 917 _GLIBCXX_SIMD_ALWAYS_INLINE 918 operator _SimdMember() const { return _M_data; } 919 }; 920 921 using _SimdCastType = conditional_t< 922 is_same<__intrinsic_type_t<_Tp, _Np>, __vector_type_t<_Tp, _Np>>::value, 923 _SimdCastType1, _SimdCastType2>; 924 //}}} 925 }; 926 927 // }}} 928 struct _CommonImplX86; 929 struct _CommonImplNeon; 930 struct _CommonImplBuiltin; 931 template <typename _Abi, typename = __detail::__odr_helper> struct _SimdImplBuiltin; 932 template <typename _Abi, typename = __detail::__odr_helper> struct _MaskImplBuiltin; 933 template <typename _Abi, typename = __detail::__odr_helper> struct _SimdImplX86; 934 template <typename _Abi, typename = __detail::__odr_helper> struct _MaskImplX86; 935 template <typename _Abi, typename = __detail::__odr_helper> struct _SimdImplNeon; 936 template <typename _Abi, typename = __detail::__odr_helper> struct _MaskImplNeon; 937 template <typename _Abi, typename = __detail::__odr_helper> struct _SimdImplPpc; 938 template <typename _Abi, typename = __detail::__odr_helper> struct _MaskImplPpc; 939 940 // simd_abi::_VecBuiltin {{{ 941 template <int _UsedBytes> 942 struct simd_abi::_VecBuiltin 943 { 944 template <typename _Tp> 945 static constexpr size_t _S_size = _UsedBytes / sizeof(_Tp); 946 947 // validity traits {{{ 948 struct _IsValidAbiTag : __bool_constant<(_UsedBytes > 1)> {}; 949 950 template <typename _Tp> 951 struct _IsValidSizeFor 952 : __bool_constant<(_UsedBytes / sizeof(_Tp) > 1 953 && _UsedBytes % sizeof(_Tp) == 0 954 && _UsedBytes <= __vectorized_sizeof<_Tp>() 955 && (!__have_avx512f || _UsedBytes <= 32))> {}; 956 957 template <typename _Tp> 958 struct _IsValid : conjunction<_IsValidAbiTag, __is_vectorizable<_Tp>, 959 _IsValidSizeFor<_Tp>> {}; 960 961 template <typename _Tp> 962 static constexpr bool _S_is_valid_v = _IsValid<_Tp>::value; 963 964 // }}} 965 // _SimdImpl/_MaskImpl {{{ 966 #if _GLIBCXX_SIMD_X86INTRIN 967 using _CommonImpl = _CommonImplX86; 968 using _SimdImpl = _SimdImplX86<_VecBuiltin<_UsedBytes>>; 969 using _MaskImpl = _MaskImplX86<_VecBuiltin<_UsedBytes>>; 970 #elif _GLIBCXX_SIMD_HAVE_NEON 971 using _CommonImpl = _CommonImplNeon; 972 using _SimdImpl = _SimdImplNeon<_VecBuiltin<_UsedBytes>>; 973 using _MaskImpl = _MaskImplNeon<_VecBuiltin<_UsedBytes>>; 974 #else 975 using _CommonImpl = _CommonImplBuiltin; 976 #ifdef __ALTIVEC__ 977 using _SimdImpl = _SimdImplPpc<_VecBuiltin<_UsedBytes>>; 978 using _MaskImpl = _MaskImplPpc<_VecBuiltin<_UsedBytes>>; 979 #else 980 using _SimdImpl = _SimdImplBuiltin<_VecBuiltin<_UsedBytes>>; 981 using _MaskImpl = _MaskImplBuiltin<_VecBuiltin<_UsedBytes>>; 982 #endif 983 #endif 984 985 // }}} 986 // __traits {{{ 987 template <typename _Tp> 988 using _MaskValueType = __int_for_sizeof_t<_Tp>; 989 990 template <typename _Tp> 991 using __traits 992 = conditional_t<_S_is_valid_v<_Tp>, 993 _GnuTraits<_Tp, _MaskValueType<_Tp>, 994 _VecBuiltin<_UsedBytes>, _S_size<_Tp>>, 995 _InvalidTraits>; 996 997 //}}} 998 // size metadata {{{ 999 template <typename _Tp> 1000 static constexpr size_t _S_full_size = __traits<_Tp>::_S_full_size; 1001 1002 template <typename _Tp> 1003 static constexpr bool _S_is_partial = __traits<_Tp>::_S_is_partial; 1004 1005 // }}} 1006 // implicit masks {{{ 1007 template <typename _Tp> 1008 using _MaskMember = _SimdWrapper<_MaskValueType<_Tp>, _S_size<_Tp>>; 1009 1010 template <typename _Tp> 1011 _GLIBCXX_SIMD_INTRINSIC static constexpr _MaskMember<_Tp> 1012 _S_implicit_mask() 1013 { 1014 using _UV = typename _MaskMember<_Tp>::_BuiltinType; 1015 if constexpr (!_MaskMember<_Tp>::_S_is_partial) 1016 return ~_UV(); 1017 else 1018 { 1019 constexpr auto __size = _S_size<_Tp>; 1020 _GLIBCXX_SIMD_USE_CONSTEXPR auto __r = __generate_vector<_UV>( 1021 [](auto __i) constexpr { return __i < __size ? -1 : 0; }); 1022 return __r; 1023 } 1024 } 1025 1026 template <typename _Tp> 1027 _GLIBCXX_SIMD_INTRINSIC static constexpr __intrinsic_type_t<_Tp, 1028 _S_size<_Tp>> 1029 _S_implicit_mask_intrin() 1030 { 1031 return __to_intrin( 1032 __vector_bitcast<_Tp>(_S_implicit_mask<_Tp>()._M_data)); 1033 } 1034 1035 template <typename _TW, typename _TVT = _VectorTraits<_TW>> 1036 _GLIBCXX_SIMD_INTRINSIC static constexpr _TW _S_masked(_TW __x) 1037 { 1038 using _Tp = typename _TVT::value_type; 1039 if constexpr (!_MaskMember<_Tp>::_S_is_partial) 1040 return __x; 1041 else 1042 return __and(__as_vector(__x), 1043 __vector_bitcast<_Tp>(_S_implicit_mask<_Tp>())); 1044 } 1045 1046 template <typename _TW, typename _TVT = _VectorTraits<_TW>> 1047 _GLIBCXX_SIMD_INTRINSIC static constexpr auto 1048 __make_padding_nonzero(_TW __x) 1049 { 1050 using _Tp = typename _TVT::value_type; 1051 if constexpr (!_S_is_partial<_Tp>) 1052 return __x; 1053 else 1054 { 1055 _GLIBCXX_SIMD_USE_CONSTEXPR auto __implicit_mask 1056 = __vector_bitcast<_Tp>(_S_implicit_mask<_Tp>()); 1057 if constexpr (is_integral_v<_Tp>) 1058 return __or(__x, ~__implicit_mask); 1059 else 1060 { 1061 _GLIBCXX_SIMD_USE_CONSTEXPR auto __one 1062 = __andnot(__implicit_mask, 1063 __vector_broadcast<_S_full_size<_Tp>>(_Tp(1))); 1064 // it's not enough to return `x | 1_in_padding` because the 1065 // padding in x might be inf or nan (independent of 1066 // __FINITE_MATH_ONLY__, because it's about padding bits) 1067 return __or(__and(__x, __implicit_mask), __one); 1068 } 1069 } 1070 } 1071 // }}} 1072 }; 1073 1074 // }}} 1075 // simd_abi::_VecBltnBtmsk {{{ 1076 template <int _UsedBytes> 1077 struct simd_abi::_VecBltnBtmsk 1078 { 1079 template <typename _Tp> 1080 static constexpr size_t _S_size = _UsedBytes / sizeof(_Tp); 1081 1082 // validity traits {{{ 1083 struct _IsValidAbiTag : __bool_constant<(_UsedBytes > 1)> {}; 1084 1085 template <typename _Tp> 1086 struct _IsValidSizeFor 1087 : __bool_constant<(_UsedBytes / sizeof(_Tp) > 1 1088 && _UsedBytes % sizeof(_Tp) == 0 && _UsedBytes <= 64 1089 && (_UsedBytes > 32 || __have_avx512vl))> {}; 1090 1091 // Bitmasks require at least AVX512F. If sizeof(_Tp) < 4 the AVX512BW is also 1092 // required. 1093 template <typename _Tp> 1094 struct _IsValid 1095 : conjunction< 1096 _IsValidAbiTag, __bool_constant<__have_avx512f>, 1097 __bool_constant<__have_avx512bw || (sizeof(_Tp) >= 4)>, 1098 __bool_constant<(__vectorized_sizeof<_Tp>() > sizeof(_Tp))>, 1099 _IsValidSizeFor<_Tp>> {}; 1100 1101 template <typename _Tp> 1102 static constexpr bool _S_is_valid_v = _IsValid<_Tp>::value; 1103 1104 // }}} 1105 // simd/_MaskImpl {{{ 1106 #if _GLIBCXX_SIMD_X86INTRIN 1107 using _CommonImpl = _CommonImplX86; 1108 using _SimdImpl = _SimdImplX86<_VecBltnBtmsk<_UsedBytes>>; 1109 using _MaskImpl = _MaskImplX86<_VecBltnBtmsk<_UsedBytes>>; 1110 #else 1111 template <int> 1112 struct _MissingImpl; 1113 1114 using _CommonImpl = _MissingImpl<_UsedBytes>; 1115 using _SimdImpl = _MissingImpl<_UsedBytes>; 1116 using _MaskImpl = _MissingImpl<_UsedBytes>; 1117 #endif 1118 1119 // }}} 1120 // __traits {{{ 1121 template <typename _Tp> 1122 using _MaskMember = _SimdWrapper<bool, _S_size<_Tp>>; 1123 1124 template <typename _Tp> 1125 using __traits = conditional_t< 1126 _S_is_valid_v<_Tp>, 1127 _GnuTraits<_Tp, bool, _VecBltnBtmsk<_UsedBytes>, _S_size<_Tp>>, 1128 _InvalidTraits>; 1129 1130 //}}} 1131 // size metadata {{{ 1132 template <typename _Tp> 1133 static constexpr size_t _S_full_size = __traits<_Tp>::_S_full_size; 1134 template <typename _Tp> 1135 static constexpr bool _S_is_partial = __traits<_Tp>::_S_is_partial; 1136 1137 // }}} 1138 // implicit mask {{{ 1139 private: 1140 template <typename _Tp> 1141 using _ImplicitMask = _SimdWrapper<bool, _S_size<_Tp>>; 1142 1143 public: 1144 template <size_t _Np> 1145 _GLIBCXX_SIMD_INTRINSIC static constexpr __bool_storage_member_type_t<_Np> 1146 __implicit_mask_n() 1147 { 1148 using _Tp = __bool_storage_member_type_t<_Np>; 1149 return _Np < sizeof(_Tp) * __CHAR_BIT__ ? _Tp((1ULL << _Np) - 1) : ~_Tp(); 1150 } 1151 1152 template <typename _Tp> 1153 _GLIBCXX_SIMD_INTRINSIC static constexpr _ImplicitMask<_Tp> 1154 _S_implicit_mask() 1155 { return __implicit_mask_n<_S_size<_Tp>>(); } 1156 1157 template <typename _Tp> 1158 _GLIBCXX_SIMD_INTRINSIC static constexpr __bool_storage_member_type_t< 1159 _S_size<_Tp>> 1160 _S_implicit_mask_intrin() 1161 { return __implicit_mask_n<_S_size<_Tp>>(); } 1162 1163 template <typename _Tp, size_t _Np> 1164 _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdWrapper<_Tp, _Np> 1165 _S_masked(_SimdWrapper<_Tp, _Np> __x) 1166 { 1167 if constexpr (is_same_v<_Tp, bool>) 1168 if constexpr (_Np < 8 || (_Np & (_Np - 1)) != 0) 1169 return _MaskImpl::_S_bit_and( 1170 __x, _SimdWrapper<_Tp, _Np>( 1171 __bool_storage_member_type_t<_Np>((1ULL << _Np) - 1))); 1172 else 1173 return __x; 1174 else 1175 return _S_masked(__x._M_data); 1176 } 1177 1178 template <typename _TV> 1179 _GLIBCXX_SIMD_INTRINSIC static constexpr _TV 1180 _S_masked(_TV __x) 1181 { 1182 using _Tp = typename _VectorTraits<_TV>::value_type; 1183 static_assert( 1184 !__is_bitmask_v<_TV>, 1185 "_VecBltnBtmsk::_S_masked cannot work on bitmasks, since it doesn't " 1186 "know the number of elements. Use _SimdWrapper<bool, N> instead."); 1187 if constexpr (_S_is_partial<_Tp>) 1188 { 1189 constexpr size_t _Np = _S_size<_Tp>; 1190 return __make_dependent_t<_TV, _CommonImpl>::_S_blend( 1191 _S_implicit_mask<_Tp>(), _SimdWrapper<_Tp, _Np>(), 1192 _SimdWrapper<_Tp, _Np>(__x)); 1193 } 1194 else 1195 return __x; 1196 } 1197 1198 template <typename _TV, typename _TVT = _VectorTraits<_TV>> 1199 _GLIBCXX_SIMD_INTRINSIC static constexpr auto 1200 __make_padding_nonzero(_TV __x) 1201 { 1202 using _Tp = typename _TVT::value_type; 1203 if constexpr (!_S_is_partial<_Tp>) 1204 return __x; 1205 else 1206 { 1207 constexpr size_t _Np = _S_size<_Tp>; 1208 if constexpr (is_integral_v<typename _TVT::value_type>) 1209 return __x 1210 | __generate_vector<_Tp, _S_full_size<_Tp>>( 1211 [](auto __i) -> _Tp { 1212 if (__i < _Np) 1213 return 0; 1214 else 1215 return 1; 1216 }); 1217 else 1218 return __make_dependent_t<_TV, _CommonImpl>::_S_blend( 1219 _S_implicit_mask<_Tp>(), 1220 _SimdWrapper<_Tp, _Np>( 1221 __vector_broadcast<_S_full_size<_Tp>>(_Tp(1))), 1222 _SimdWrapper<_Tp, _Np>(__x)) 1223 ._M_data; 1224 } 1225 } 1226 1227 // }}} 1228 }; 1229 1230 //}}} 1231 // _CommonImplBuiltin {{{ 1232 struct _CommonImplBuiltin 1233 { 1234 // _S_converts_via_decomposition{{{ 1235 // This lists all cases where a __vector_convert needs to fall back to 1236 // conversion of individual scalars (i.e. decompose the input vector into 1237 // scalars, convert, compose output vector). In those cases, _S_masked_load & 1238 // _S_masked_store prefer to use the _S_bit_iteration implementation. 1239 template <typename _From, typename _To, size_t _ToSize> 1240 static inline constexpr bool __converts_via_decomposition_v 1241 = sizeof(_From) != sizeof(_To); 1242 1243 // }}} 1244 // _S_load{{{ 1245 template <typename _Tp, size_t _Np, size_t _Bytes = _Np * sizeof(_Tp)> 1246 _GLIBCXX_SIMD_INTRINSIC static __vector_type_t<_Tp, _Np> 1247 _S_load(const void* __p) 1248 { 1249 static_assert(_Np > 1); 1250 static_assert(_Bytes % sizeof(_Tp) == 0); 1251 using _Rp = __vector_type_t<_Tp, _Np>; 1252 if constexpr (sizeof(_Rp) == _Bytes) 1253 { 1254 _Rp __r; 1255 __builtin_memcpy(&__r, __p, _Bytes); 1256 return __r; 1257 } 1258 else 1259 { 1260 #ifdef _GLIBCXX_SIMD_WORKAROUND_PR90424 1261 using _Up = conditional_t< 1262 is_integral_v<_Tp>, 1263 conditional_t<_Bytes % 4 == 0, 1264 conditional_t<_Bytes % 8 == 0, long long, int>, 1265 conditional_t<_Bytes % 2 == 0, short, signed char>>, 1266 conditional_t<(_Bytes < 8 || _Np % 2 == 1 || _Np == 2), _Tp, 1267 double>>; 1268 using _V = __vector_type_t<_Up, _Np * sizeof(_Tp) / sizeof(_Up)>; 1269 if constexpr (sizeof(_V) != sizeof(_Rp)) 1270 { // on i386 with 4 < _Bytes <= 8 1271 _Rp __r{}; 1272 __builtin_memcpy(&__r, __p, _Bytes); 1273 return __r; 1274 } 1275 else 1276 #else // _GLIBCXX_SIMD_WORKAROUND_PR90424 1277 using _V = _Rp; 1278 #endif // _GLIBCXX_SIMD_WORKAROUND_PR90424 1279 { 1280 _V __r{}; 1281 static_assert(_Bytes <= sizeof(_V)); 1282 __builtin_memcpy(&__r, __p, _Bytes); 1283 return reinterpret_cast<_Rp>(__r); 1284 } 1285 } 1286 } 1287 1288 // }}} 1289 // _S_store {{{ 1290 template <size_t _ReqBytes = 0, typename _TV> 1291 _GLIBCXX_SIMD_INTRINSIC static void _S_store(_TV __x, void* __addr) 1292 { 1293 constexpr size_t _Bytes = _ReqBytes == 0 ? sizeof(__x) : _ReqBytes; 1294 static_assert(sizeof(__x) >= _Bytes); 1295 1296 if constexpr (__is_vector_type_v<_TV>) 1297 { 1298 using _Tp = typename _VectorTraits<_TV>::value_type; 1299 constexpr size_t _Np = _Bytes / sizeof(_Tp); 1300 static_assert(_Np * sizeof(_Tp) == _Bytes); 1301 1302 #ifdef _GLIBCXX_SIMD_WORKAROUND_PR90424 1303 using _Up = conditional_t< 1304 (is_integral_v<_Tp> || _Bytes < 4), 1305 conditional_t<(sizeof(__x) > sizeof(long long)), long long, _Tp>, 1306 float>; 1307 const auto __v = __vector_bitcast<_Up>(__x); 1308 #else // _GLIBCXX_SIMD_WORKAROUND_PR90424 1309 const __vector_type_t<_Tp, _Np> __v = __x; 1310 #endif // _GLIBCXX_SIMD_WORKAROUND_PR90424 1311 1312 if constexpr ((_Bytes & (_Bytes - 1)) != 0) 1313 { 1314 constexpr size_t _MoreBytes = std::__bit_ceil(_Bytes); 1315 alignas(decltype(__v)) char __tmp[_MoreBytes]; 1316 __builtin_memcpy(__tmp, &__v, _MoreBytes); 1317 __builtin_memcpy(__addr, __tmp, _Bytes); 1318 } 1319 else 1320 __builtin_memcpy(__addr, &__v, _Bytes); 1321 } 1322 else 1323 __builtin_memcpy(__addr, &__x, _Bytes); 1324 } 1325 1326 template <typename _Tp, size_t _Np> 1327 _GLIBCXX_SIMD_INTRINSIC static void _S_store(_SimdWrapper<_Tp, _Np> __x, 1328 void* __addr) 1329 { _S_store<_Np * sizeof(_Tp)>(__x._M_data, __addr); } 1330 1331 // }}} 1332 // _S_store_bool_array(_BitMask) {{{ 1333 template <size_t _Np, bool _Sanitized> 1334 _GLIBCXX_SIMD_INTRINSIC static constexpr void 1335 _S_store_bool_array(_BitMask<_Np, _Sanitized> __x, bool* __mem) 1336 { 1337 if constexpr (_Np == 1) 1338 __mem[0] = __x[0]; 1339 else if constexpr (_Np == 2) 1340 { 1341 short __bool2 = (__x._M_to_bits() * 0x81) & 0x0101; 1342 _S_store<_Np>(__bool2, __mem); 1343 } 1344 else if constexpr (_Np == 3) 1345 { 1346 int __bool3 = (__x._M_to_bits() * 0x4081) & 0x010101; 1347 _S_store<_Np>(__bool3, __mem); 1348 } 1349 else 1350 { 1351 __execute_n_times<__div_roundup(_Np, 4)>([&](auto __i) { 1352 constexpr int __offset = __i * 4; 1353 constexpr int __remaining = _Np - __offset; 1354 if constexpr (__remaining > 4 && __remaining <= 7) 1355 { 1356 const _ULLong __bool7 1357 = (__x.template _M_extract<__offset>()._M_to_bits() 1358 * 0x40810204081ULL) 1359 & 0x0101010101010101ULL; 1360 _S_store<__remaining>(__bool7, __mem + __offset); 1361 } 1362 else if constexpr (__remaining >= 4) 1363 { 1364 int __bits = __x.template _M_extract<__offset>()._M_to_bits(); 1365 if constexpr (__remaining > 7) 1366 __bits &= 0xf; 1367 const int __bool4 = (__bits * 0x204081) & 0x01010101; 1368 _S_store<4>(__bool4, __mem + __offset); 1369 } 1370 }); 1371 } 1372 } 1373 1374 // }}} 1375 // _S_blend{{{ 1376 template <typename _Tp, size_t _Np> 1377 _GLIBCXX_SIMD_INTRINSIC static constexpr auto 1378 _S_blend(_SimdWrapper<__int_for_sizeof_t<_Tp>, _Np> __k, 1379 _SimdWrapper<_Tp, _Np> __at0, _SimdWrapper<_Tp, _Np> __at1) 1380 { return __k._M_data ? __at1._M_data : __at0._M_data; } 1381 1382 // }}} 1383 }; 1384 1385 // }}} 1386 // _SimdImplBuiltin {{{1 1387 template <typename _Abi, typename> 1388 struct _SimdImplBuiltin 1389 { 1390 // member types {{{2 1391 template <typename _Tp> 1392 static constexpr size_t _S_max_store_size = 16; 1393 1394 using abi_type = _Abi; 1395 1396 template <typename _Tp> 1397 using _TypeTag = _Tp*; 1398 1399 template <typename _Tp> 1400 using _SimdMember = typename _Abi::template __traits<_Tp>::_SimdMember; 1401 1402 template <typename _Tp> 1403 using _MaskMember = typename _Abi::template _MaskMember<_Tp>; 1404 1405 template <typename _Tp> 1406 static constexpr size_t _S_size = _Abi::template _S_size<_Tp>; 1407 1408 template <typename _Tp> 1409 static constexpr size_t _S_full_size = _Abi::template _S_full_size<_Tp>; 1410 1411 using _CommonImpl = typename _Abi::_CommonImpl; 1412 using _SuperImpl = typename _Abi::_SimdImpl; 1413 using _MaskImpl = typename _Abi::_MaskImpl; 1414 1415 // _M_make_simd(_SimdWrapper/__intrinsic_type_t) {{{2 1416 template <typename _Tp, size_t _Np> 1417 _GLIBCXX_SIMD_INTRINSIC static simd<_Tp, _Abi> 1418 _M_make_simd(_SimdWrapper<_Tp, _Np> __x) 1419 { return {__private_init, __x}; } 1420 1421 template <typename _Tp, size_t _Np> 1422 _GLIBCXX_SIMD_INTRINSIC static simd<_Tp, _Abi> 1423 _M_make_simd(__intrinsic_type_t<_Tp, _Np> __x) 1424 { return {__private_init, __vector_bitcast<_Tp>(__x)}; } 1425 1426 // _S_broadcast {{{2 1427 template <typename _Tp> 1428 _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdMember<_Tp> 1429 _S_broadcast(_Tp __x) noexcept 1430 { return __vector_broadcast<_S_full_size<_Tp>>(__x); } 1431 1432 // _S_generator {{{2 1433 template <typename _Fp, typename _Tp> 1434 inline static constexpr _SimdMember<_Tp> _S_generator(_Fp&& __gen, 1435 _TypeTag<_Tp>) 1436 { 1437 return __generate_vector<_Tp, _S_full_size<_Tp>>([&]( 1438 auto __i) constexpr { 1439 if constexpr (__i < _S_size<_Tp>) 1440 return __gen(__i); 1441 else 1442 return 0; 1443 }); 1444 } 1445 1446 // _S_load {{{2 1447 template <typename _Tp, typename _Up> 1448 _GLIBCXX_SIMD_INTRINSIC static _SimdMember<_Tp> 1449 _S_load(const _Up* __mem, _TypeTag<_Tp>) noexcept 1450 { 1451 constexpr size_t _Np = _S_size<_Tp>; 1452 constexpr size_t __max_load_size 1453 = (sizeof(_Up) >= 4 && __have_avx512f) || __have_avx512bw ? 64 1454 : (is_floating_point_v<_Up> && __have_avx) || __have_avx2 ? 32 1455 : 16; 1456 constexpr size_t __bytes_to_load = sizeof(_Up) * _Np; 1457 if constexpr (sizeof(_Up) > 8) 1458 return __generate_vector<_Tp, _SimdMember<_Tp>::_S_full_size>([&]( 1459 auto __i) constexpr { 1460 return static_cast<_Tp>(__i < _Np ? __mem[__i] : 0); 1461 }); 1462 else if constexpr (is_same_v<_Up, _Tp>) 1463 return _CommonImpl::template _S_load<_Tp, _S_full_size<_Tp>, 1464 _Np * sizeof(_Tp)>(__mem); 1465 else if constexpr (__bytes_to_load <= __max_load_size) 1466 return __convert<_SimdMember<_Tp>>( 1467 _CommonImpl::template _S_load<_Up, _Np>(__mem)); 1468 else if constexpr (__bytes_to_load % __max_load_size == 0) 1469 { 1470 constexpr size_t __n_loads = __bytes_to_load / __max_load_size; 1471 constexpr size_t __elements_per_load = _Np / __n_loads; 1472 return __call_with_n_evaluations<__n_loads>( 1473 [](auto... __uncvted) { 1474 return __convert<_SimdMember<_Tp>>(__uncvted...); 1475 }, 1476 [&](auto __i) { 1477 return _CommonImpl::template _S_load<_Up, __elements_per_load>( 1478 __mem + __i * __elements_per_load); 1479 }); 1480 } 1481 else if constexpr (__bytes_to_load % (__max_load_size / 2) == 0 1482 && __max_load_size > 16) 1483 { // e.g. int[] -> <char, 12> with AVX2 1484 constexpr size_t __n_loads 1485 = __bytes_to_load / (__max_load_size / 2); 1486 constexpr size_t __elements_per_load = _Np / __n_loads; 1487 return __call_with_n_evaluations<__n_loads>( 1488 [](auto... __uncvted) { 1489 return __convert<_SimdMember<_Tp>>(__uncvted...); 1490 }, 1491 [&](auto __i) { 1492 return _CommonImpl::template _S_load<_Up, __elements_per_load>( 1493 __mem + __i * __elements_per_load); 1494 }); 1495 } 1496 else // e.g. int[] -> <char, 9> 1497 return __call_with_subscripts( 1498 __mem, make_index_sequence<_Np>(), [](auto... __args) { 1499 return __vector_type_t<_Tp, _S_full_size<_Tp>>{ 1500 static_cast<_Tp>(__args)...}; 1501 }); 1502 } 1503 1504 // _S_masked_load {{{2 1505 template <typename _Tp, size_t _Np, typename _Up> 1506 static inline _SimdWrapper<_Tp, _Np> 1507 _S_masked_load(_SimdWrapper<_Tp, _Np> __merge, _MaskMember<_Tp> __k, 1508 const _Up* __mem) noexcept 1509 { 1510 _BitOps::_S_bit_iteration(_MaskImpl::_S_to_bits(__k), [&](auto __i) { 1511 __merge._M_set(__i, static_cast<_Tp>(__mem[__i])); 1512 }); 1513 return __merge; 1514 } 1515 1516 // _S_store {{{2 1517 template <typename _Tp, typename _Up> 1518 _GLIBCXX_SIMD_INTRINSIC static void 1519 _S_store(_SimdMember<_Tp> __v, _Up* __mem, _TypeTag<_Tp>) noexcept 1520 { 1521 // TODO: converting int -> "smaller int" can be optimized with AVX512 1522 constexpr size_t _Np = _S_size<_Tp>; 1523 constexpr size_t __max_store_size 1524 = _SuperImpl::template _S_max_store_size<_Up>; 1525 if constexpr (sizeof(_Up) > 8) 1526 __execute_n_times<_Np>([&](auto __i) constexpr { 1527 __mem[__i] = __v[__i]; 1528 }); 1529 else if constexpr (is_same_v<_Up, _Tp>) 1530 _CommonImpl::_S_store(__v, __mem); 1531 else if constexpr (sizeof(_Up) * _Np <= __max_store_size) 1532 _CommonImpl::_S_store(_SimdWrapper<_Up, _Np>(__convert<_Up>(__v)), 1533 __mem); 1534 else 1535 { 1536 constexpr size_t __vsize = __max_store_size / sizeof(_Up); 1537 // round up to convert the last partial vector as well: 1538 constexpr size_t __stores = __div_roundup(_Np, __vsize); 1539 constexpr size_t __full_stores = _Np / __vsize; 1540 using _V = __vector_type_t<_Up, __vsize>; 1541 const array<_V, __stores> __converted 1542 = __convert_all<_V, __stores>(__v); 1543 __execute_n_times<__full_stores>([&](auto __i) constexpr { 1544 _CommonImpl::_S_store(__converted[__i], __mem + __i * __vsize); 1545 }); 1546 if constexpr (__full_stores < __stores) 1547 _CommonImpl::template _S_store<(_Np - __full_stores * __vsize) 1548 * sizeof(_Up)>( 1549 __converted[__full_stores], __mem + __full_stores * __vsize); 1550 } 1551 } 1552 1553 // _S_masked_store_nocvt {{{2 1554 template <typename _Tp, size_t _Np> 1555 _GLIBCXX_SIMD_INTRINSIC static void 1556 _S_masked_store_nocvt(_SimdWrapper<_Tp, _Np> __v, _Tp* __mem, 1557 _MaskMember<_Tp> __k) 1558 { 1559 _BitOps::_S_bit_iteration( 1560 _MaskImpl::_S_to_bits(__k), [&](auto __i) constexpr { 1561 __mem[__i] = __v[__i]; 1562 }); 1563 } 1564 1565 // _S_masked_store {{{2 1566 template <typename _TW, typename _TVT = _VectorTraits<_TW>, 1567 typename _Tp = typename _TVT::value_type, typename _Up> 1568 static inline void 1569 _S_masked_store(const _TW __v, _Up* __mem, const _MaskMember<_Tp> __k) 1570 noexcept 1571 { 1572 constexpr size_t _TV_size = _S_size<_Tp>; 1573 [[maybe_unused]] const auto __vi = __to_intrin(__v); 1574 constexpr size_t __max_store_size 1575 = _SuperImpl::template _S_max_store_size<_Up>; 1576 if constexpr ( 1577 is_same_v< 1578 _Tp, 1579 _Up> || (is_integral_v<_Tp> && is_integral_v<_Up> && sizeof(_Tp) == sizeof(_Up))) 1580 { 1581 // bitwise or no conversion, reinterpret: 1582 const _MaskMember<_Up> __kk = [&]() { 1583 if constexpr (__is_bitmask_v<decltype(__k)>) 1584 return _MaskMember<_Up>(__k._M_data); 1585 else 1586 return __wrapper_bitcast<__int_for_sizeof_t<_Up>>(__k); 1587 }(); 1588 _SuperImpl::_S_masked_store_nocvt(__wrapper_bitcast<_Up>(__v), 1589 __mem, __kk); 1590 } 1591 else if constexpr (__vectorized_sizeof<_Up>() > sizeof(_Up) 1592 && !_CommonImpl:: 1593 template __converts_via_decomposition_v< 1594 _Tp, _Up, __max_store_size>) 1595 { // conversion via decomposition is better handled via the 1596 // bit_iteration 1597 // fallback below 1598 constexpr size_t _UW_size 1599 = std::min(_TV_size, __max_store_size / sizeof(_Up)); 1600 static_assert(_UW_size <= _TV_size); 1601 using _UW = _SimdWrapper<_Up, _UW_size>; 1602 using _UV = __vector_type_t<_Up, _UW_size>; 1603 using _UAbi = simd_abi::deduce_t<_Up, _UW_size>; 1604 if constexpr (_UW_size == _TV_size) // one convert+store 1605 { 1606 const _UW __converted = __convert<_UW>(__v); 1607 _SuperImpl::_S_masked_store_nocvt( 1608 __converted, __mem, 1609 _UAbi::_MaskImpl::template _S_convert< 1610 __int_for_sizeof_t<_Up>>(__k)); 1611 } 1612 else 1613 { 1614 static_assert(_UW_size * sizeof(_Up) == __max_store_size); 1615 constexpr size_t _NFullStores = _TV_size / _UW_size; 1616 constexpr size_t _NAllStores 1617 = __div_roundup(_TV_size, _UW_size); 1618 constexpr size_t _NParts = _S_full_size<_Tp> / _UW_size; 1619 const array<_UV, _NAllStores> __converted 1620 = __convert_all<_UV, _NAllStores>(__v); 1621 __execute_n_times<_NFullStores>([&](auto __i) { 1622 _SuperImpl::_S_masked_store_nocvt( 1623 _UW(__converted[__i]), __mem + __i * _UW_size, 1624 _UAbi::_MaskImpl::template _S_convert< 1625 __int_for_sizeof_t<_Up>>( 1626 __extract_part<__i, _NParts>(__k.__as_full_vector()))); 1627 }); 1628 if constexpr (_NAllStores 1629 > _NFullStores) // one partial at the end 1630 _SuperImpl::_S_masked_store_nocvt( 1631 _UW(__converted[_NFullStores]), 1632 __mem + _NFullStores * _UW_size, 1633 _UAbi::_MaskImpl::template _S_convert< 1634 __int_for_sizeof_t<_Up>>( 1635 __extract_part<_NFullStores, _NParts>( 1636 __k.__as_full_vector()))); 1637 } 1638 } 1639 else 1640 _BitOps::_S_bit_iteration( 1641 _MaskImpl::_S_to_bits(__k), [&](auto __i) constexpr { 1642 __mem[__i] = static_cast<_Up>(__v[__i]); 1643 }); 1644 } 1645 1646 // _S_complement {{{2 1647 template <typename _Tp, size_t _Np> 1648 _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdWrapper<_Tp, _Np> 1649 _S_complement(_SimdWrapper<_Tp, _Np> __x) noexcept 1650 { 1651 if constexpr (is_floating_point_v<_Tp>) 1652 return __vector_bitcast<_Tp>(~__vector_bitcast<__int_for_sizeof_t<_Tp>>(__x)); 1653 else 1654 return ~__x._M_data; 1655 } 1656 1657 // _S_unary_minus {{{2 1658 template <typename _Tp, size_t _Np> 1659 _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdWrapper<_Tp, _Np> 1660 _S_unary_minus(_SimdWrapper<_Tp, _Np> __x) noexcept 1661 { 1662 // GCC doesn't use the psign instructions, but pxor & psub seem to be 1663 // just as good a choice as pcmpeqd & psign. So meh. 1664 return -__x._M_data; 1665 } 1666 1667 // arithmetic operators {{{2 1668 template <typename _Tp, size_t _Np> 1669 _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdWrapper<_Tp, _Np> 1670 _S_plus(_SimdWrapper<_Tp, _Np> __x, _SimdWrapper<_Tp, _Np> __y) 1671 { return __x._M_data + __y._M_data; } 1672 1673 template <typename _Tp, size_t _Np> 1674 _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdWrapper<_Tp, _Np> 1675 _S_minus(_SimdWrapper<_Tp, _Np> __x, _SimdWrapper<_Tp, _Np> __y) 1676 { return __x._M_data - __y._M_data; } 1677 1678 template <typename _Tp, size_t _Np> 1679 _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdWrapper<_Tp, _Np> 1680 _S_multiplies(_SimdWrapper<_Tp, _Np> __x, _SimdWrapper<_Tp, _Np> __y) 1681 { return __x._M_data * __y._M_data; } 1682 1683 template <typename _Tp, size_t _Np> 1684 _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdWrapper<_Tp, _Np> 1685 _S_divides(_SimdWrapper<_Tp, _Np> __x, _SimdWrapper<_Tp, _Np> __y) 1686 { 1687 // Note that division by 0 is always UB, so we must ensure we avoid the 1688 // case for partial registers 1689 if constexpr (!_Abi::template _S_is_partial<_Tp>) 1690 return __x._M_data / __y._M_data; 1691 else 1692 return __x._M_data / _Abi::__make_padding_nonzero(__y._M_data); 1693 } 1694 1695 template <typename _Tp, size_t _Np> 1696 _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdWrapper<_Tp, _Np> 1697 _S_modulus(_SimdWrapper<_Tp, _Np> __x, _SimdWrapper<_Tp, _Np> __y) 1698 { 1699 if constexpr (!_Abi::template _S_is_partial<_Tp>) 1700 return __x._M_data % __y._M_data; 1701 else 1702 return __as_vector(__x) 1703 % _Abi::__make_padding_nonzero(__as_vector(__y)); 1704 } 1705 1706 template <typename _Tp, size_t _Np> 1707 _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdWrapper<_Tp, _Np> 1708 _S_bit_and(_SimdWrapper<_Tp, _Np> __x, _SimdWrapper<_Tp, _Np> __y) 1709 { return __and(__x, __y); } 1710 1711 template <typename _Tp, size_t _Np> 1712 _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdWrapper<_Tp, _Np> 1713 _S_bit_or(_SimdWrapper<_Tp, _Np> __x, _SimdWrapper<_Tp, _Np> __y) 1714 { return __or(__x, __y); } 1715 1716 template <typename _Tp, size_t _Np> 1717 _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdWrapper<_Tp, _Np> 1718 _S_bit_xor(_SimdWrapper<_Tp, _Np> __x, _SimdWrapper<_Tp, _Np> __y) 1719 { return __xor(__x, __y); } 1720 1721 template <typename _Tp, size_t _Np> 1722 _GLIBCXX_SIMD_INTRINSIC static _SimdWrapper<_Tp, _Np> 1723 _S_bit_shift_left(_SimdWrapper<_Tp, _Np> __x, _SimdWrapper<_Tp, _Np> __y) 1724 { return __x._M_data << __y._M_data; } 1725 1726 template <typename _Tp, size_t _Np> 1727 _GLIBCXX_SIMD_INTRINSIC static _SimdWrapper<_Tp, _Np> 1728 _S_bit_shift_right(_SimdWrapper<_Tp, _Np> __x, _SimdWrapper<_Tp, _Np> __y) 1729 { return __x._M_data >> __y._M_data; } 1730 1731 template <typename _Tp, size_t _Np> 1732 _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdWrapper<_Tp, _Np> 1733 _S_bit_shift_left(_SimdWrapper<_Tp, _Np> __x, int __y) 1734 { return __x._M_data << __y; } 1735 1736 template <typename _Tp, size_t _Np> 1737 _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdWrapper<_Tp, _Np> 1738 _S_bit_shift_right(_SimdWrapper<_Tp, _Np> __x, int __y) 1739 { return __x._M_data >> __y; } 1740 1741 // compares {{{2 1742 // _S_equal_to {{{3 1743 template <typename _Tp, size_t _Np> 1744 _GLIBCXX_SIMD_INTRINSIC static constexpr _MaskMember<_Tp> 1745 _S_equal_to(_SimdWrapper<_Tp, _Np> __x, _SimdWrapper<_Tp, _Np> __y) 1746 { return __x._M_data == __y._M_data; } 1747 1748 // _S_not_equal_to {{{3 1749 template <typename _Tp, size_t _Np> 1750 _GLIBCXX_SIMD_INTRINSIC static constexpr _MaskMember<_Tp> 1751 _S_not_equal_to(_SimdWrapper<_Tp, _Np> __x, _SimdWrapper<_Tp, _Np> __y) 1752 { return __x._M_data != __y._M_data; } 1753 1754 // _S_less {{{3 1755 template <typename _Tp, size_t _Np> 1756 _GLIBCXX_SIMD_INTRINSIC static constexpr _MaskMember<_Tp> 1757 _S_less(_SimdWrapper<_Tp, _Np> __x, _SimdWrapper<_Tp, _Np> __y) 1758 { return __x._M_data < __y._M_data; } 1759 1760 // _S_less_equal {{{3 1761 template <typename _Tp, size_t _Np> 1762 _GLIBCXX_SIMD_INTRINSIC static constexpr _MaskMember<_Tp> 1763 _S_less_equal(_SimdWrapper<_Tp, _Np> __x, _SimdWrapper<_Tp, _Np> __y) 1764 { return __x._M_data <= __y._M_data; } 1765 1766 // _S_negate {{{2 1767 template <typename _Tp, size_t _Np> 1768 _GLIBCXX_SIMD_INTRINSIC static constexpr _MaskMember<_Tp> 1769 _S_negate(_SimdWrapper<_Tp, _Np> __x) noexcept 1770 { return !__x._M_data; } 1771 1772 // _S_min, _S_max, _S_minmax {{{2 1773 template <typename _Tp, size_t _Np> 1774 _GLIBCXX_SIMD_NORMAL_MATH _GLIBCXX_SIMD_INTRINSIC static constexpr 1775 _SimdWrapper<_Tp, _Np> 1776 _S_min(_SimdWrapper<_Tp, _Np> __a, _SimdWrapper<_Tp, _Np> __b) 1777 { return __a._M_data < __b._M_data ? __a._M_data : __b._M_data; } 1778 1779 template <typename _Tp, size_t _Np> 1780 _GLIBCXX_SIMD_NORMAL_MATH _GLIBCXX_SIMD_INTRINSIC static constexpr 1781 _SimdWrapper<_Tp, _Np> 1782 _S_max(_SimdWrapper<_Tp, _Np> __a, _SimdWrapper<_Tp, _Np> __b) 1783 { return __a._M_data > __b._M_data ? __a._M_data : __b._M_data; } 1784 1785 template <typename _Tp, size_t _Np> 1786 _GLIBCXX_SIMD_NORMAL_MATH _GLIBCXX_SIMD_INTRINSIC static constexpr 1787 pair<_SimdWrapper<_Tp, _Np>, _SimdWrapper<_Tp, _Np>> 1788 _S_minmax(_SimdWrapper<_Tp, _Np> __a, _SimdWrapper<_Tp, _Np> __b) 1789 { 1790 return {__a._M_data < __b._M_data ? __a._M_data : __b._M_data, 1791 __a._M_data < __b._M_data ? __b._M_data : __a._M_data}; 1792 } 1793 1794 // reductions {{{2 1795 template <size_t _Np, size_t... _Is, size_t... _Zeros, typename _Tp, 1796 typename _BinaryOperation> 1797 _GLIBCXX_SIMD_INTRINSIC static _Tp 1798 _S_reduce_partial(index_sequence<_Is...>, index_sequence<_Zeros...>, 1799 simd<_Tp, _Abi> __x, _BinaryOperation&& __binary_op) 1800 { 1801 using _V = __vector_type_t<_Tp, _Np / 2>; 1802 static_assert(sizeof(_V) <= sizeof(__x)); 1803 // _S_full_size is the size of the smallest native SIMD register that 1804 // can store _Np/2 elements: 1805 using _FullSimd = __deduced_simd<_Tp, _VectorTraits<_V>::_S_full_size>; 1806 using _HalfSimd = __deduced_simd<_Tp, _Np / 2>; 1807 const auto __xx = __as_vector(__x); 1808 return _HalfSimd::abi_type::_SimdImpl::_S_reduce( 1809 static_cast<_HalfSimd>(__as_vector(__binary_op( 1810 static_cast<_FullSimd>(__intrin_bitcast<_V>(__xx)), 1811 static_cast<_FullSimd>(__intrin_bitcast<_V>( 1812 __vector_permute<(_Np / 2 + _Is)..., (int(_Zeros * 0) - 1)...>( 1813 __xx)))))), 1814 __binary_op); 1815 } 1816 1817 template <typename _Tp, typename _BinaryOperation> 1818 _GLIBCXX_SIMD_INTRINSIC static constexpr _Tp 1819 _S_reduce(simd<_Tp, _Abi> __x, _BinaryOperation&& __binary_op) 1820 { 1821 constexpr size_t _Np = simd_size_v<_Tp, _Abi>; 1822 if constexpr (_Np == 1) 1823 return __x[0]; 1824 else if constexpr (_Np == 2) 1825 return __binary_op(simd<_Tp, simd_abi::scalar>(__x[0]), 1826 simd<_Tp, simd_abi::scalar>(__x[1]))[0]; 1827 else if constexpr (_Abi::template _S_is_partial<_Tp>) //{{{ 1828 { 1829 [[maybe_unused]] constexpr auto __full_size 1830 = _Abi::template _S_full_size<_Tp>; 1831 if constexpr (_Np == 3) 1832 return __binary_op( 1833 __binary_op(simd<_Tp, simd_abi::scalar>(__x[0]), 1834 simd<_Tp, simd_abi::scalar>(__x[1])), 1835 simd<_Tp, simd_abi::scalar>(__x[2]))[0]; 1836 else if constexpr (is_same_v<__remove_cvref_t<_BinaryOperation>, 1837 plus<>>) 1838 { 1839 using _Ap = simd_abi::deduce_t<_Tp, __full_size>; 1840 return _Ap::_SimdImpl::_S_reduce( 1841 simd<_Tp, _Ap>(__private_init, 1842 _Abi::_S_masked(__as_vector(__x))), 1843 __binary_op); 1844 } 1845 else if constexpr (is_same_v<__remove_cvref_t<_BinaryOperation>, 1846 multiplies<>>) 1847 { 1848 using _Ap = simd_abi::deduce_t<_Tp, __full_size>; 1849 using _TW = _SimdWrapper<_Tp, __full_size>; 1850 _GLIBCXX_SIMD_USE_CONSTEXPR auto __implicit_mask_full 1851 = _Abi::template _S_implicit_mask<_Tp>().__as_full_vector(); 1852 _GLIBCXX_SIMD_USE_CONSTEXPR _TW __one 1853 = __vector_broadcast<__full_size>(_Tp(1)); 1854 const _TW __x_full = __data(__x).__as_full_vector(); 1855 const _TW __x_padded_with_ones 1856 = _Ap::_CommonImpl::_S_blend(__implicit_mask_full, __one, 1857 __x_full); 1858 return _Ap::_SimdImpl::_S_reduce( 1859 simd<_Tp, _Ap>(__private_init, __x_padded_with_ones), 1860 __binary_op); 1861 } 1862 else if constexpr (_Np & 1) 1863 { 1864 using _Ap = simd_abi::deduce_t<_Tp, _Np - 1>; 1865 return __binary_op( 1866 simd<_Tp, simd_abi::scalar>(_Ap::_SimdImpl::_S_reduce( 1867 simd<_Tp, _Ap>( 1868 __intrin_bitcast<__vector_type_t<_Tp, _Np - 1>>( 1869 __as_vector(__x))), 1870 __binary_op)), 1871 simd<_Tp, simd_abi::scalar>(__x[_Np - 1]))[0]; 1872 } 1873 else 1874 return _S_reduce_partial<_Np>( 1875 make_index_sequence<_Np / 2>(), 1876 make_index_sequence<__full_size - _Np / 2>(), __x, __binary_op); 1877 } //}}} 1878 else if constexpr (sizeof(__x) == 16) //{{{ 1879 { 1880 if constexpr (_Np == 16) 1881 { 1882 const auto __y = __data(__x); 1883 __x = __binary_op( 1884 _M_make_simd<_Tp, _Np>( 1885 __vector_permute<0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 1886 7, 7>(__y)), 1887 _M_make_simd<_Tp, _Np>( 1888 __vector_permute<8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 1889 14, 14, 15, 15>(__y))); 1890 } 1891 if constexpr (_Np >= 8) 1892 { 1893 const auto __y = __vector_bitcast<short>(__data(__x)); 1894 __x = __binary_op( 1895 _M_make_simd<_Tp, _Np>(__vector_bitcast<_Tp>( 1896 __vector_permute<0, 0, 1, 1, 2, 2, 3, 3>(__y))), 1897 _M_make_simd<_Tp, _Np>(__vector_bitcast<_Tp>( 1898 __vector_permute<4, 4, 5, 5, 6, 6, 7, 7>(__y)))); 1899 } 1900 if constexpr (_Np >= 4) 1901 { 1902 using _Up = conditional_t<is_floating_point_v<_Tp>, float, int>; 1903 const auto __y = __vector_bitcast<_Up>(__data(__x)); 1904 __x = __binary_op(__x, 1905 _M_make_simd<_Tp, _Np>(__vector_bitcast<_Tp>( 1906 __vector_permute<3, 2, 1, 0>(__y)))); 1907 } 1908 using _Up = conditional_t<is_floating_point_v<_Tp>, double, _LLong>; 1909 const auto __y = __vector_bitcast<_Up>(__data(__x)); 1910 __x = __binary_op(__x, _M_make_simd<_Tp, _Np>(__vector_bitcast<_Tp>( 1911 __vector_permute<1, 1>(__y)))); 1912 return __x[0]; 1913 } //}}} 1914 else 1915 { 1916 static_assert(sizeof(__x) > __min_vector_size<_Tp>); 1917 static_assert((_Np & (_Np - 1)) == 0); // _Np must be a power of 2 1918 using _Ap = simd_abi::deduce_t<_Tp, _Np / 2>; 1919 using _V = simd<_Tp, _Ap>; 1920 return _Ap::_SimdImpl::_S_reduce( 1921 __binary_op(_V(__private_init, __extract<0, 2>(__as_vector(__x))), 1922 _V(__private_init, 1923 __extract<1, 2>(__as_vector(__x)))), 1924 static_cast<_BinaryOperation&&>(__binary_op)); 1925 } 1926 } 1927 1928 // math {{{2 1929 // frexp, modf and copysign implemented in simd_math.h 1930 #define _GLIBCXX_SIMD_MATH_FALLBACK(__name) \ 1931 template <typename _Tp, typename... _More> \ 1932 static _Tp _S_##__name(const _Tp& __x, const _More&... __more) \ 1933 { \ 1934 return __generate_vector<_Tp>( \ 1935 [&](auto __i) { return __name(__x[__i], __more[__i]...); }); \ 1936 } 1937 1938 #define _GLIBCXX_SIMD_MATH_FALLBACK_MASKRET(__name) \ 1939 template <typename _Tp, typename... _More> \ 1940 static typename _Tp::mask_type _S_##__name(const _Tp& __x, \ 1941 const _More&... __more) \ 1942 { \ 1943 return __generate_vector<_Tp>( \ 1944 [&](auto __i) { return __name(__x[__i], __more[__i]...); }); \ 1945 } 1946 1947 #define _GLIBCXX_SIMD_MATH_FALLBACK_FIXEDRET(_RetTp, __name) \ 1948 template <typename _Tp, typename... _More> \ 1949 static auto _S_##__name(const _Tp& __x, const _More&... __more) \ 1950 { \ 1951 return __fixed_size_storage_t<_RetTp, \ 1952 _VectorTraits<_Tp>::_S_partial_width>:: \ 1953 _S_generate([&](auto __meta) constexpr { \ 1954 return __meta._S_generator( \ 1955 [&](auto __i) { \ 1956 return __name(__x[__meta._S_offset + __i], \ 1957 __more[__meta._S_offset + __i]...); \ 1958 }, \ 1959 static_cast<_RetTp*>(nullptr)); \ 1960 }); \ 1961 } 1962 1963 _GLIBCXX_SIMD_MATH_FALLBACK(acos) 1964 _GLIBCXX_SIMD_MATH_FALLBACK(asin) 1965 _GLIBCXX_SIMD_MATH_FALLBACK(atan) 1966 _GLIBCXX_SIMD_MATH_FALLBACK(atan2) 1967 _GLIBCXX_SIMD_MATH_FALLBACK(cos) 1968 _GLIBCXX_SIMD_MATH_FALLBACK(sin) 1969 _GLIBCXX_SIMD_MATH_FALLBACK(tan) 1970 _GLIBCXX_SIMD_MATH_FALLBACK(acosh) 1971 _GLIBCXX_SIMD_MATH_FALLBACK(asinh) 1972 _GLIBCXX_SIMD_MATH_FALLBACK(atanh) 1973 _GLIBCXX_SIMD_MATH_FALLBACK(cosh) 1974 _GLIBCXX_SIMD_MATH_FALLBACK(sinh) 1975 _GLIBCXX_SIMD_MATH_FALLBACK(tanh) 1976 _GLIBCXX_SIMD_MATH_FALLBACK(exp) 1977 _GLIBCXX_SIMD_MATH_FALLBACK(exp2) 1978 _GLIBCXX_SIMD_MATH_FALLBACK(expm1) 1979 _GLIBCXX_SIMD_MATH_FALLBACK(ldexp) 1980 _GLIBCXX_SIMD_MATH_FALLBACK_FIXEDRET(int, ilogb) 1981 _GLIBCXX_SIMD_MATH_FALLBACK(log) 1982 _GLIBCXX_SIMD_MATH_FALLBACK(log10) 1983 _GLIBCXX_SIMD_MATH_FALLBACK(log1p) 1984 _GLIBCXX_SIMD_MATH_FALLBACK(log2) 1985 _GLIBCXX_SIMD_MATH_FALLBACK(logb) 1986 1987 // modf implemented in simd_math.h 1988 _GLIBCXX_SIMD_MATH_FALLBACK(scalbn) 1989 _GLIBCXX_SIMD_MATH_FALLBACK(scalbln) 1990 _GLIBCXX_SIMD_MATH_FALLBACK(cbrt) 1991 _GLIBCXX_SIMD_MATH_FALLBACK(fabs) 1992 _GLIBCXX_SIMD_MATH_FALLBACK(pow) 1993 _GLIBCXX_SIMD_MATH_FALLBACK(sqrt) 1994 _GLIBCXX_SIMD_MATH_FALLBACK(erf) 1995 _GLIBCXX_SIMD_MATH_FALLBACK(erfc) 1996 _GLIBCXX_SIMD_MATH_FALLBACK(lgamma) 1997 _GLIBCXX_SIMD_MATH_FALLBACK(tgamma) 1998 1999 _GLIBCXX_SIMD_MATH_FALLBACK_FIXEDRET(long, lrint) 2000 _GLIBCXX_SIMD_MATH_FALLBACK_FIXEDRET(long long, llrint) 2001 2002 _GLIBCXX_SIMD_MATH_FALLBACK_FIXEDRET(long, lround) 2003 _GLIBCXX_SIMD_MATH_FALLBACK_FIXEDRET(long long, llround) 2004 2005 _GLIBCXX_SIMD_MATH_FALLBACK(fmod) 2006 _GLIBCXX_SIMD_MATH_FALLBACK(remainder) 2007 2008 template <typename _Tp, typename _TVT = _VectorTraits<_Tp>> 2009 static _Tp 2010 _S_remquo(const _Tp __x, const _Tp __y, 2011 __fixed_size_storage_t<int, _TVT::_S_partial_width>* __z) 2012 { 2013 return __generate_vector<_Tp>([&](auto __i) { 2014 int __tmp; 2015 auto __r = remquo(__x[__i], __y[__i], &__tmp); 2016 __z->_M_set(__i, __tmp); 2017 return __r; 2018 }); 2019 } 2020 2021 // copysign in simd_math.h 2022 _GLIBCXX_SIMD_MATH_FALLBACK(nextafter) 2023 _GLIBCXX_SIMD_MATH_FALLBACK(fdim) 2024 _GLIBCXX_SIMD_MATH_FALLBACK(fmax) 2025 _GLIBCXX_SIMD_MATH_FALLBACK(fmin) 2026 _GLIBCXX_SIMD_MATH_FALLBACK(fma) 2027 2028 template <typename _Tp, size_t _Np> 2029 static constexpr _MaskMember<_Tp> 2030 _S_isgreater(_SimdWrapper<_Tp, _Np> __x, 2031 _SimdWrapper<_Tp, _Np> __y) noexcept 2032 { 2033 using _Ip = __int_for_sizeof_t<_Tp>; 2034 const auto __xn = __vector_bitcast<_Ip>(__x); 2035 const auto __yn = __vector_bitcast<_Ip>(__y); 2036 const auto __xp = __xn < 0 ? -(__xn & __finite_max_v<_Ip>) : __xn; 2037 const auto __yp = __yn < 0 ? -(__yn & __finite_max_v<_Ip>) : __yn; 2038 return __andnot(_SuperImpl::_S_isunordered(__x, __y)._M_data, 2039 __xp > __yp); 2040 } 2041 2042 template <typename _Tp, size_t _Np> 2043 static constexpr _MaskMember<_Tp> 2044 _S_isgreaterequal(_SimdWrapper<_Tp, _Np> __x, 2045 _SimdWrapper<_Tp, _Np> __y) noexcept 2046 { 2047 using _Ip = __int_for_sizeof_t<_Tp>; 2048 const auto __xn = __vector_bitcast<_Ip>(__x); 2049 const auto __yn = __vector_bitcast<_Ip>(__y); 2050 const auto __xp = __xn < 0 ? -(__xn & __finite_max_v<_Ip>) : __xn; 2051 const auto __yp = __yn < 0 ? -(__yn & __finite_max_v<_Ip>) : __yn; 2052 return __andnot(_SuperImpl::_S_isunordered(__x, __y)._M_data, 2053 __xp >= __yp); 2054 } 2055 2056 template <typename _Tp, size_t _Np> 2057 static constexpr _MaskMember<_Tp> 2058 _S_isless(_SimdWrapper<_Tp, _Np> __x, _SimdWrapper<_Tp, _Np> __y) noexcept 2059 { 2060 using _Ip = __int_for_sizeof_t<_Tp>; 2061 const auto __xn = __vector_bitcast<_Ip>(__x); 2062 const auto __yn = __vector_bitcast<_Ip>(__y); 2063 const auto __xp = __xn < 0 ? -(__xn & __finite_max_v<_Ip>) : __xn; 2064 const auto __yp = __yn < 0 ? -(__yn & __finite_max_v<_Ip>) : __yn; 2065 return __andnot(_SuperImpl::_S_isunordered(__x, __y)._M_data, 2066 __xp < __yp); 2067 } 2068 2069 template <typename _Tp, size_t _Np> 2070 static constexpr _MaskMember<_Tp> 2071 _S_islessequal(_SimdWrapper<_Tp, _Np> __x, 2072 _SimdWrapper<_Tp, _Np> __y) noexcept 2073 { 2074 using _Ip = __int_for_sizeof_t<_Tp>; 2075 const auto __xn = __vector_bitcast<_Ip>(__x); 2076 const auto __yn = __vector_bitcast<_Ip>(__y); 2077 const auto __xp = __xn < 0 ? -(__xn & __finite_max_v<_Ip>) : __xn; 2078 const auto __yp = __yn < 0 ? -(__yn & __finite_max_v<_Ip>) : __yn; 2079 return __andnot(_SuperImpl::_S_isunordered(__x, __y)._M_data, 2080 __xp <= __yp); 2081 } 2082 2083 template <typename _Tp, size_t _Np> 2084 static constexpr _MaskMember<_Tp> 2085 _S_islessgreater(_SimdWrapper<_Tp, _Np> __x, 2086 _SimdWrapper<_Tp, _Np> __y) noexcept 2087 { 2088 return __andnot(_SuperImpl::_S_isunordered(__x, __y), 2089 _SuperImpl::_S_not_equal_to(__x, __y)); 2090 } 2091 2092 #undef _GLIBCXX_SIMD_MATH_FALLBACK 2093 #undef _GLIBCXX_SIMD_MATH_FALLBACK_MASKRET 2094 #undef _GLIBCXX_SIMD_MATH_FALLBACK_FIXEDRET 2095 // _S_abs {{{3 2096 template <typename _Tp, size_t _Np> 2097 _GLIBCXX_SIMD_INTRINSIC static _SimdWrapper<_Tp, _Np> 2098 _S_abs(_SimdWrapper<_Tp, _Np> __x) noexcept 2099 { 2100 // if (__builtin_is_constant_evaluated()) 2101 // { 2102 // return __x._M_data < 0 ? -__x._M_data : __x._M_data; 2103 // } 2104 if constexpr (is_floating_point_v<_Tp>) 2105 // `v < 0 ? -v : v` cannot compile to the efficient implementation of 2106 // masking the signbit off because it must consider v == -0 2107 2108 // ~(-0.) & v would be easy, but breaks with fno-signed-zeros 2109 return __and(_S_absmask<__vector_type_t<_Tp, _Np>>, __x._M_data); 2110 else 2111 return __x._M_data < 0 ? -__x._M_data : __x._M_data; 2112 } 2113 2114 // }}}3 2115 // _S_plus_minus {{{ 2116 // Returns __x + __y - __y without -fassociative-math optimizing to __x. 2117 // - _TV must be __vector_type_t<floating-point type, N>. 2118 // - _UV must be _TV or floating-point type. 2119 template <typename _TV, typename _UV> 2120 _GLIBCXX_SIMD_INTRINSIC static constexpr _TV _S_plus_minus(_TV __x, 2121 _UV __y) noexcept 2122 { 2123 #if defined __i386__ && !defined __SSE_MATH__ 2124 if constexpr (sizeof(__x) == 8) 2125 { // operations on __x would use the FPU 2126 static_assert(is_same_v<_TV, __vector_type_t<float, 2>>); 2127 const auto __x4 = __vector_bitcast<float, 4>(__x); 2128 if constexpr (is_same_v<_TV, _UV>) 2129 return __vector_bitcast<float, 2>( 2130 _S_plus_minus(__x4, __vector_bitcast<float, 4>(__y))); 2131 else 2132 return __vector_bitcast<float, 2>(_S_plus_minus(__x4, __y)); 2133 } 2134 #endif 2135 #if !defined __clang__ && __GCC_IEC_559 == 0 2136 if (__builtin_is_constant_evaluated() 2137 || (__builtin_constant_p(__x) && __builtin_constant_p(__y))) 2138 return (__x + __y) - __y; 2139 else 2140 return [&] { 2141 __x += __y; 2142 if constexpr(__have_sse) 2143 { 2144 if constexpr (sizeof(__x) >= 16) 2145 asm("" : "+x"(__x)); 2146 else if constexpr (is_same_v<__vector_type_t<float, 2>, _TV>) 2147 asm("" : "+x"(__x[0]), "+x"(__x[1])); 2148 else 2149 __assert_unreachable<_TV>(); 2150 } 2151 else if constexpr(__have_neon) 2152 asm("" : "+w"(__x)); 2153 else if constexpr (__have_power_vmx) 2154 { 2155 if constexpr (is_same_v<__vector_type_t<float, 2>, _TV>) 2156 asm("" : "+fgr"(__x[0]), "+fgr"(__x[1])); 2157 else 2158 asm("" : "+v"(__x)); 2159 } 2160 else 2161 asm("" : "+g"(__x)); 2162 return __x - __y; 2163 }(); 2164 #else 2165 return (__x + __y) - __y; 2166 #endif 2167 } 2168 2169 // }}} 2170 // _S_nearbyint {{{3 2171 template <typename _Tp, typename _TVT = _VectorTraits<_Tp>> 2172 _GLIBCXX_SIMD_INTRINSIC static _Tp _S_nearbyint(_Tp __x_) noexcept 2173 { 2174 using value_type = typename _TVT::value_type; 2175 using _V = typename _TVT::type; 2176 const _V __x = __x_; 2177 const _V __absx = __and(__x, _S_absmask<_V>); 2178 static_assert(__CHAR_BIT__ * sizeof(1ull) >= __digits_v<value_type>); 2179 _GLIBCXX_SIMD_USE_CONSTEXPR _V __shifter_abs 2180 = _V() + (1ull << (__digits_v<value_type> - 1)); 2181 const _V __shifter = __or(__and(_S_signmask<_V>, __x), __shifter_abs); 2182 const _V __shifted = _S_plus_minus(__x, __shifter); 2183 return __absx < __shifter_abs ? __shifted : __x; 2184 } 2185 2186 // _S_rint {{{3 2187 template <typename _Tp, typename _TVT = _VectorTraits<_Tp>> 2188 _GLIBCXX_SIMD_INTRINSIC static _Tp _S_rint(_Tp __x) noexcept 2189 { 2190 return _SuperImpl::_S_nearbyint(__x); 2191 } 2192 2193 // _S_trunc {{{3 2194 template <typename _Tp, size_t _Np> 2195 _GLIBCXX_SIMD_INTRINSIC static _SimdWrapper<_Tp, _Np> 2196 _S_trunc(_SimdWrapper<_Tp, _Np> __x) 2197 { 2198 using _V = __vector_type_t<_Tp, _Np>; 2199 const _V __absx = __and(__x._M_data, _S_absmask<_V>); 2200 static_assert(__CHAR_BIT__ * sizeof(1ull) >= __digits_v<_Tp>); 2201 constexpr _Tp __shifter = 1ull << (__digits_v<_Tp> - 1); 2202 _V __truncated = _S_plus_minus(__absx, __shifter); 2203 __truncated -= __truncated > __absx ? _V() + 1 : _V(); 2204 return __absx < __shifter ? __or(__xor(__absx, __x._M_data), __truncated) 2205 : __x._M_data; 2206 } 2207 2208 // _S_round {{{3 2209 template <typename _Tp, size_t _Np> 2210 _GLIBCXX_SIMD_INTRINSIC static _SimdWrapper<_Tp, _Np> 2211 _S_round(_SimdWrapper<_Tp, _Np> __x) 2212 { 2213 const auto __abs_x = _SuperImpl::_S_abs(__x); 2214 const auto __t_abs = _SuperImpl::_S_trunc(__abs_x)._M_data; 2215 const auto __r_abs // round(abs(x)) = 2216 = __t_abs + (__abs_x._M_data - __t_abs >= _Tp(.5) ? _Tp(1) : 0); 2217 return __or(__xor(__abs_x._M_data, __x._M_data), __r_abs); 2218 } 2219 2220 // _S_floor {{{3 2221 template <typename _Tp, size_t _Np> 2222 _GLIBCXX_SIMD_INTRINSIC static _SimdWrapper<_Tp, _Np> 2223 _S_floor(_SimdWrapper<_Tp, _Np> __x) 2224 { 2225 const auto __y = _SuperImpl::_S_trunc(__x)._M_data; 2226 const auto __negative_input 2227 = __vector_bitcast<_Tp>(__x._M_data < __vector_broadcast<_Np, _Tp>(0)); 2228 const auto __mask 2229 = __andnot(__vector_bitcast<_Tp>(__y == __x._M_data), __negative_input); 2230 return __or(__andnot(__mask, __y), 2231 __and(__mask, __y - __vector_broadcast<_Np, _Tp>(1))); 2232 } 2233 2234 // _S_ceil {{{3 2235 template <typename _Tp, size_t _Np> 2236 _GLIBCXX_SIMD_INTRINSIC static _SimdWrapper<_Tp, _Np> 2237 _S_ceil(_SimdWrapper<_Tp, _Np> __x) 2238 { 2239 const auto __y = _SuperImpl::_S_trunc(__x)._M_data; 2240 const auto __negative_input 2241 = __vector_bitcast<_Tp>(__x._M_data < __vector_broadcast<_Np, _Tp>(0)); 2242 const auto __inv_mask 2243 = __or(__vector_bitcast<_Tp>(__y == __x._M_data), __negative_input); 2244 return __or(__and(__inv_mask, __y), 2245 __andnot(__inv_mask, __y + __vector_broadcast<_Np, _Tp>(1))); 2246 } 2247 2248 // _S_isnan {{{3 2249 template <typename _Tp, size_t _Np> 2250 _GLIBCXX_SIMD_INTRINSIC static _MaskMember<_Tp> 2251 _S_isnan([[maybe_unused]] _SimdWrapper<_Tp, _Np> __x) 2252 { 2253 #if __FINITE_MATH_ONLY__ 2254 return {}; // false 2255 #elif !defined __SUPPORT_SNAN__ 2256 return ~(__x._M_data == __x._M_data); 2257 #elif defined __STDC_IEC_559__ 2258 using _Ip = __int_for_sizeof_t<_Tp>; 2259 const auto __absn = __vector_bitcast<_Ip>(_SuperImpl::_S_abs(__x)); 2260 const auto __infn 2261 = __vector_bitcast<_Ip>(__vector_broadcast<_Np>(__infinity_v<_Tp>)); 2262 return __infn < __absn; 2263 #else 2264 #error "Not implemented: how to support SNaN but non-IEC559 floating-point?" 2265 #endif 2266 } 2267 2268 // _S_isfinite {{{3 2269 template <typename _Tp, size_t _Np> 2270 _GLIBCXX_SIMD_INTRINSIC static _MaskMember<_Tp> 2271 _S_isfinite([[maybe_unused]] _SimdWrapper<_Tp, _Np> __x) 2272 { 2273 #if __FINITE_MATH_ONLY__ 2274 using _UV = typename _MaskMember<_Tp>::_BuiltinType; 2275 _GLIBCXX_SIMD_USE_CONSTEXPR _UV __alltrue = ~_UV(); 2276 return __alltrue; 2277 #else 2278 // if all exponent bits are set, __x is either inf or NaN 2279 using _Ip = __int_for_sizeof_t<_Tp>; 2280 const auto __absn = __vector_bitcast<_Ip>(_SuperImpl::_S_abs(__x)); 2281 const auto __maxn 2282 = __vector_bitcast<_Ip>(__vector_broadcast<_Np>(__finite_max_v<_Tp>)); 2283 return __absn <= __maxn; 2284 #endif 2285 } 2286 2287 // _S_isunordered {{{3 2288 template <typename _Tp, size_t _Np> 2289 _GLIBCXX_SIMD_INTRINSIC static _MaskMember<_Tp> 2290 _S_isunordered(_SimdWrapper<_Tp, _Np> __x, _SimdWrapper<_Tp, _Np> __y) 2291 { 2292 return __or(_S_isnan(__x), _S_isnan(__y)); 2293 } 2294 2295 // _S_signbit {{{3 2296 template <typename _Tp, size_t _Np> 2297 _GLIBCXX_SIMD_INTRINSIC static _MaskMember<_Tp> 2298 _S_signbit(_SimdWrapper<_Tp, _Np> __x) 2299 { 2300 using _Ip = __int_for_sizeof_t<_Tp>; 2301 return __vector_bitcast<_Ip>(__x) < 0; 2302 // Arithmetic right shift (SRA) would also work (instead of compare), but 2303 // 64-bit SRA isn't available on x86 before AVX512. And in general, 2304 // compares are more likely to be efficient than SRA. 2305 } 2306 2307 // _S_isinf {{{3 2308 template <typename _Tp, size_t _Np> 2309 _GLIBCXX_SIMD_INTRINSIC static _MaskMember<_Tp> 2310 _S_isinf([[maybe_unused]] _SimdWrapper<_Tp, _Np> __x) 2311 { 2312 #if __FINITE_MATH_ONLY__ 2313 return {}; // false 2314 #else 2315 return _SuperImpl::template _S_equal_to<_Tp, _Np>(_SuperImpl::_S_abs(__x), 2316 __vector_broadcast<_Np>( 2317 __infinity_v<_Tp>)); 2318 // alternative: 2319 // compare to inf using the corresponding integer type 2320 /* 2321 return 2322 __vector_bitcast<_Tp>(__vector_bitcast<__int_for_sizeof_t<_Tp>>( 2323 _S_abs(__x)._M_data) 2324 == 2325 __vector_bitcast<__int_for_sizeof_t<_Tp>>(__vector_broadcast<_Np>( 2326 __infinity_v<_Tp>))); 2327 */ 2328 #endif 2329 } 2330 2331 // _S_isnormal {{{3 2332 template <typename _Tp, size_t _Np> 2333 _GLIBCXX_SIMD_INTRINSIC static _MaskMember<_Tp> 2334 _S_isnormal(_SimdWrapper<_Tp, _Np> __x) 2335 { 2336 using _Ip = __int_for_sizeof_t<_Tp>; 2337 const auto __absn = __vector_bitcast<_Ip>(_SuperImpl::_S_abs(__x)); 2338 const auto __minn 2339 = __vector_bitcast<_Ip>(__vector_broadcast<_Np>(__norm_min_v<_Tp>)); 2340 #if __FINITE_MATH_ONLY__ 2341 return __absn >= __minn; 2342 #else 2343 const auto __maxn 2344 = __vector_bitcast<_Ip>(__vector_broadcast<_Np>(__finite_max_v<_Tp>)); 2345 return __minn <= __absn && __absn <= __maxn; 2346 #endif 2347 } 2348 2349 // _S_fpclassify {{{3 2350 template <typename _Tp, size_t _Np> 2351 _GLIBCXX_SIMD_INTRINSIC static __fixed_size_storage_t<int, _Np> 2352 _S_fpclassify(_SimdWrapper<_Tp, _Np> __x) 2353 { 2354 using _I = __int_for_sizeof_t<_Tp>; 2355 const auto __xn 2356 = __vector_bitcast<_I>(__to_intrin(_SuperImpl::_S_abs(__x))); 2357 constexpr size_t _NI = sizeof(__xn) / sizeof(_I); 2358 _GLIBCXX_SIMD_USE_CONSTEXPR auto __minn 2359 = __vector_bitcast<_I>(__vector_broadcast<_NI>(__norm_min_v<_Tp>)); 2360 _GLIBCXX_SIMD_USE_CONSTEXPR auto __infn 2361 = __vector_bitcast<_I>(__vector_broadcast<_NI>(__infinity_v<_Tp>)); 2362 2363 _GLIBCXX_SIMD_USE_CONSTEXPR auto __fp_normal 2364 = __vector_broadcast<_NI, _I>(FP_NORMAL); 2365 #if !__FINITE_MATH_ONLY__ 2366 _GLIBCXX_SIMD_USE_CONSTEXPR auto __fp_nan 2367 = __vector_broadcast<_NI, _I>(FP_NAN); 2368 _GLIBCXX_SIMD_USE_CONSTEXPR auto __fp_infinite 2369 = __vector_broadcast<_NI, _I>(FP_INFINITE); 2370 #endif 2371 #ifndef __FAST_MATH__ 2372 _GLIBCXX_SIMD_USE_CONSTEXPR auto __fp_subnormal 2373 = __vector_broadcast<_NI, _I>(FP_SUBNORMAL); 2374 #endif 2375 _GLIBCXX_SIMD_USE_CONSTEXPR auto __fp_zero 2376 = __vector_broadcast<_NI, _I>(FP_ZERO); 2377 2378 __vector_type_t<_I, _NI> 2379 __tmp = __xn < __minn 2380 #ifdef __FAST_MATH__ 2381 ? __fp_zero 2382 #else 2383 ? (__xn == 0 ? __fp_zero : __fp_subnormal) 2384 #endif 2385 #if __FINITE_MATH_ONLY__ 2386 : __fp_normal; 2387 #else 2388 : (__xn < __infn ? __fp_normal 2389 : (__xn == __infn ? __fp_infinite : __fp_nan)); 2390 #endif 2391 2392 if constexpr (sizeof(_I) == sizeof(int)) 2393 { 2394 using _FixedInt = __fixed_size_storage_t<int, _Np>; 2395 const auto __as_int = __vector_bitcast<int, _Np>(__tmp); 2396 if constexpr (_FixedInt::_S_tuple_size == 1) 2397 return {__as_int}; 2398 else if constexpr (_FixedInt::_S_tuple_size == 2 2399 && is_same_v< 2400 typename _FixedInt::_SecondType::_FirstAbi, 2401 simd_abi::scalar>) 2402 return {__extract<0, 2>(__as_int), __as_int[_Np - 1]}; 2403 else if constexpr (_FixedInt::_S_tuple_size == 2) 2404 return {__extract<0, 2>(__as_int), 2405 __auto_bitcast(__extract<1, 2>(__as_int))}; 2406 else 2407 __assert_unreachable<_Tp>(); 2408 } 2409 else if constexpr (_Np == 2 && sizeof(_I) == 8 2410 && __fixed_size_storage_t<int, _Np>::_S_tuple_size == 2) 2411 { 2412 const auto __aslong = __vector_bitcast<_LLong>(__tmp); 2413 return {int(__aslong[0]), {int(__aslong[1])}}; 2414 } 2415 #if _GLIBCXX_SIMD_X86INTRIN 2416 else if constexpr (sizeof(_Tp) == 8 && sizeof(__tmp) == 32 2417 && __fixed_size_storage_t<int, _Np>::_S_tuple_size == 1) 2418 return {_mm_packs_epi32(__to_intrin(__lo128(__tmp)), 2419 __to_intrin(__hi128(__tmp)))}; 2420 else if constexpr (sizeof(_Tp) == 8 && sizeof(__tmp) == 64 2421 && __fixed_size_storage_t<int, _Np>::_S_tuple_size == 1) 2422 return {_mm512_cvtepi64_epi32(__to_intrin(__tmp))}; 2423 #endif // _GLIBCXX_SIMD_X86INTRIN 2424 else if constexpr (__fixed_size_storage_t<int, _Np>::_S_tuple_size == 1) 2425 return {__call_with_subscripts<_Np>(__vector_bitcast<_LLong>(__tmp), 2426 [](auto... __l) { 2427 return __make_wrapper<int>(__l...); 2428 })}; 2429 else 2430 __assert_unreachable<_Tp>(); 2431 } 2432 2433 // _S_increment & _S_decrement{{{2 2434 template <typename _Tp, size_t _Np> 2435 _GLIBCXX_SIMD_INTRINSIC static void 2436 _S_increment(_SimdWrapper<_Tp, _Np>& __x) 2437 { __x = __x._M_data + 1; } 2438 2439 template <typename _Tp, size_t _Np> 2440 _GLIBCXX_SIMD_INTRINSIC static void 2441 _S_decrement(_SimdWrapper<_Tp, _Np>& __x) 2442 { __x = __x._M_data - 1; } 2443 2444 // smart_reference access {{{2 2445 template <typename _Tp, size_t _Np, typename _Up> 2446 _GLIBCXX_SIMD_INTRINSIC constexpr static void 2447 _S_set(_SimdWrapper<_Tp, _Np>& __v, int __i, _Up&& __x) noexcept 2448 { __v._M_set(__i, static_cast<_Up&&>(__x)); } 2449 2450 // _S_masked_assign{{{2 2451 template <typename _Tp, typename _K, size_t _Np> 2452 _GLIBCXX_SIMD_INTRINSIC static void 2453 _S_masked_assign(_SimdWrapper<_K, _Np> __k, _SimdWrapper<_Tp, _Np>& __lhs, 2454 __type_identity_t<_SimdWrapper<_Tp, _Np>> __rhs) 2455 { 2456 if (__k._M_is_constprop_none_of()) 2457 return; 2458 else if (__k._M_is_constprop_all_of()) 2459 __lhs = __rhs; 2460 else 2461 __lhs = _CommonImpl::_S_blend(__k, __lhs, __rhs); 2462 } 2463 2464 template <typename _Tp, typename _K, size_t _Np> 2465 _GLIBCXX_SIMD_INTRINSIC static void 2466 _S_masked_assign(_SimdWrapper<_K, _Np> __k, _SimdWrapper<_Tp, _Np>& __lhs, 2467 __type_identity_t<_Tp> __rhs) 2468 { 2469 if (__k._M_is_constprop_none_of()) 2470 return; 2471 else if (__k._M_is_constprop_all_of()) 2472 __lhs = __vector_broadcast<_Np>(__rhs); 2473 else if (__builtin_constant_p(__rhs) && __rhs == 0) 2474 { 2475 if constexpr (!is_same_v<bool, _K>) 2476 // the __andnot optimization only makes sense if __k._M_data is a 2477 // vector register 2478 __lhs._M_data 2479 = __andnot(__vector_bitcast<_Tp>(__k), __lhs._M_data); 2480 else 2481 // for AVX512/__mmask, a _mm512_maskz_mov is best 2482 __lhs 2483 = _CommonImpl::_S_blend(__k, __lhs, _SimdWrapper<_Tp, _Np>()); 2484 } 2485 else 2486 __lhs = _CommonImpl::_S_blend(__k, __lhs, 2487 _SimdWrapper<_Tp, _Np>( 2488 __vector_broadcast<_Np>(__rhs))); 2489 } 2490 2491 // _S_masked_cassign {{{2 2492 template <typename _Op, typename _Tp, typename _K, size_t _Np> 2493 _GLIBCXX_SIMD_INTRINSIC static void 2494 _S_masked_cassign(const _SimdWrapper<_K, _Np> __k, 2495 _SimdWrapper<_Tp, _Np>& __lhs, 2496 const __type_identity_t<_SimdWrapper<_Tp, _Np>> __rhs, 2497 _Op __op) 2498 { 2499 if (__k._M_is_constprop_none_of()) 2500 return; 2501 else if (__k._M_is_constprop_all_of()) 2502 __lhs = __op(_SuperImpl{}, __lhs, __rhs); 2503 else 2504 __lhs = _CommonImpl::_S_blend(__k, __lhs, 2505 __op(_SuperImpl{}, __lhs, __rhs)); 2506 } 2507 2508 template <typename _Op, typename _Tp, typename _K, size_t _Np> 2509 _GLIBCXX_SIMD_INTRINSIC static void 2510 _S_masked_cassign(const _SimdWrapper<_K, _Np> __k, 2511 _SimdWrapper<_Tp, _Np>& __lhs, 2512 const __type_identity_t<_Tp> __rhs, _Op __op) 2513 { _S_masked_cassign(__k, __lhs, __vector_broadcast<_Np>(__rhs), __op); } 2514 2515 // _S_masked_unary {{{2 2516 template <template <typename> class _Op, typename _Tp, typename _K, 2517 size_t _Np> 2518 _GLIBCXX_SIMD_INTRINSIC static _SimdWrapper<_Tp, _Np> 2519 _S_masked_unary(const _SimdWrapper<_K, _Np> __k, 2520 const _SimdWrapper<_Tp, _Np> __v) 2521 { 2522 if (__k._M_is_constprop_none_of()) 2523 return __v; 2524 auto __vv = _M_make_simd(__v); 2525 _Op<decltype(__vv)> __op; 2526 if (__k._M_is_constprop_all_of()) 2527 return __data(__op(__vv)); 2528 else 2529 return _CommonImpl::_S_blend(__k, __v, __data(__op(__vv))); 2530 } 2531 2532 //}}}2 2533 }; 2534 2535 // _MaskImplBuiltinMixin {{{1 2536 struct _MaskImplBuiltinMixin 2537 { 2538 template <typename _Tp> 2539 using _TypeTag = _Tp*; 2540 2541 // _S_to_maskvector {{{ 2542 template <typename _Up, size_t _ToN = 1> 2543 _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdWrapper<_Up, _ToN> 2544 _S_to_maskvector(bool __x) 2545 { 2546 static_assert(is_same_v<_Up, __int_for_sizeof_t<_Up>>); 2547 return __x ? __vector_type_t<_Up, _ToN>{~_Up()} 2548 : __vector_type_t<_Up, _ToN>{}; 2549 } 2550 2551 template <typename _Up, size_t _UpN = 0, size_t _Np, bool _Sanitized, 2552 size_t _ToN = _UpN == 0 ? _Np : _UpN> 2553 _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdWrapper<_Up, _ToN> 2554 _S_to_maskvector(_BitMask<_Np, _Sanitized> __x) 2555 { 2556 static_assert(is_same_v<_Up, __int_for_sizeof_t<_Up>>); 2557 return __generate_vector<__vector_type_t<_Up, _ToN>>([&]( 2558 auto __i) constexpr { 2559 if constexpr (__i < _Np) 2560 return __x[__i] ? ~_Up() : _Up(); 2561 else 2562 return _Up(); 2563 }); 2564 } 2565 2566 template <typename _Up, size_t _UpN = 0, typename _Tp, size_t _Np, 2567 size_t _ToN = _UpN == 0 ? _Np : _UpN> 2568 _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdWrapper<_Up, _ToN> 2569 _S_to_maskvector(_SimdWrapper<_Tp, _Np> __x) 2570 { 2571 static_assert(is_same_v<_Up, __int_for_sizeof_t<_Up>>); 2572 using _TW = _SimdWrapper<_Tp, _Np>; 2573 using _UW = _SimdWrapper<_Up, _ToN>; 2574 if constexpr (sizeof(_Up) == sizeof(_Tp) && sizeof(_TW) == sizeof(_UW)) 2575 return __wrapper_bitcast<_Up, _ToN>(__x); 2576 else if constexpr (is_same_v<_Tp, bool>) // bits -> vector 2577 return _S_to_maskvector<_Up, _ToN>(_BitMask<_Np>(__x._M_data)); 2578 else 2579 { // vector -> vector 2580 /* 2581 [[maybe_unused]] const auto __y = __vector_bitcast<_Up>(__x._M_data); 2582 if constexpr (sizeof(_Tp) == 8 && sizeof(_Up) == 4 && sizeof(__y) == 2583 16) return __vector_permute<1, 3, -1, -1>(__y); else if constexpr 2584 (sizeof(_Tp) == 4 && sizeof(_Up) == 2 2585 && sizeof(__y) == 16) 2586 return __vector_permute<1, 3, 5, 7, -1, -1, -1, -1>(__y); 2587 else if constexpr (sizeof(_Tp) == 8 && sizeof(_Up) == 2 2588 && sizeof(__y) == 16) 2589 return __vector_permute<3, 7, -1, -1, -1, -1, -1, -1>(__y); 2590 else if constexpr (sizeof(_Tp) == 2 && sizeof(_Up) == 1 2591 && sizeof(__y) == 16) 2592 return __vector_permute<1, 3, 5, 7, 9, 11, 13, 15, -1, -1, -1, -1, 2593 -1, -1, -1, -1>(__y); else if constexpr (sizeof(_Tp) == 4 && 2594 sizeof(_Up) == 1 2595 && sizeof(__y) == 16) 2596 return __vector_permute<3, 7, 11, 15, -1, -1, -1, -1, -1, -1, -1, 2597 -1, -1, -1, -1, -1>(__y); else if constexpr (sizeof(_Tp) == 8 && 2598 sizeof(_Up) == 1 2599 && sizeof(__y) == 16) 2600 return __vector_permute<7, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, 2601 -1, -1, -1, -1, -1>(__y); else 2602 */ 2603 { 2604 return __generate_vector<__vector_type_t<_Up, _ToN>>([&]( 2605 auto __i) constexpr { 2606 if constexpr (__i < _Np) 2607 return _Up(__x[__i.value]); 2608 else 2609 return _Up(); 2610 }); 2611 } 2612 } 2613 } 2614 2615 // }}} 2616 // _S_to_bits {{{ 2617 template <typename _Tp, size_t _Np> 2618 _GLIBCXX_SIMD_INTRINSIC static constexpr _SanitizedBitMask<_Np> 2619 _S_to_bits(_SimdWrapper<_Tp, _Np> __x) 2620 { 2621 static_assert(!is_same_v<_Tp, bool>); 2622 static_assert(_Np <= __CHAR_BIT__ * sizeof(_ULLong)); 2623 using _Up = make_unsigned_t<__int_for_sizeof_t<_Tp>>; 2624 const auto __bools 2625 = __vector_bitcast<_Up>(__x) >> (sizeof(_Up) * __CHAR_BIT__ - 1); 2626 _ULLong __r = 0; 2627 __execute_n_times<_Np>( 2628 [&](auto __i) { __r |= _ULLong(__bools[__i.value]) << __i; }); 2629 return __r; 2630 } 2631 2632 // }}} 2633 }; 2634 2635 // _MaskImplBuiltin {{{1 2636 template <typename _Abi, typename> 2637 struct _MaskImplBuiltin : _MaskImplBuiltinMixin 2638 { 2639 using _MaskImplBuiltinMixin::_S_to_bits; 2640 using _MaskImplBuiltinMixin::_S_to_maskvector; 2641 2642 // member types {{{ 2643 template <typename _Tp> 2644 using _SimdMember = typename _Abi::template __traits<_Tp>::_SimdMember; 2645 2646 template <typename _Tp> 2647 using _MaskMember = typename _Abi::template _MaskMember<_Tp>; 2648 2649 using _SuperImpl = typename _Abi::_MaskImpl; 2650 using _CommonImpl = typename _Abi::_CommonImpl; 2651 2652 template <typename _Tp> 2653 static constexpr size_t _S_size = simd_size_v<_Tp, _Abi>; 2654 2655 // }}} 2656 // _S_broadcast {{{ 2657 template <typename _Tp> 2658 _GLIBCXX_SIMD_INTRINSIC static constexpr _MaskMember<_Tp> 2659 _S_broadcast(bool __x) 2660 { 2661 return __x ? _Abi::template _S_implicit_mask<_Tp>() 2662 : _MaskMember<_Tp>(); 2663 } 2664 2665 // }}} 2666 // _S_load {{{ 2667 template <typename _Tp> 2668 _GLIBCXX_SIMD_INTRINSIC static constexpr _MaskMember<_Tp> 2669 _S_load(const bool* __mem) 2670 { 2671 using _I = __int_for_sizeof_t<_Tp>; 2672 if constexpr (sizeof(_Tp) == sizeof(bool)) 2673 { 2674 const auto __bools 2675 = _CommonImpl::template _S_load<_I, _S_size<_Tp>>(__mem); 2676 // bool is {0, 1}, everything else is UB 2677 return __bools > 0; 2678 } 2679 else 2680 return __generate_vector<_I, _S_size<_Tp>>([&](auto __i) constexpr { 2681 return __mem[__i] ? ~_I() : _I(); 2682 }); 2683 } 2684 2685 // }}} 2686 // _S_convert {{{ 2687 template <typename _Tp, size_t _Np, bool _Sanitized> 2688 _GLIBCXX_SIMD_INTRINSIC static constexpr auto 2689 _S_convert(_BitMask<_Np, _Sanitized> __x) 2690 { 2691 if constexpr (__is_builtin_bitmask_abi<_Abi>()) 2692 return _SimdWrapper<bool, simd_size_v<_Tp, _Abi>>(__x._M_to_bits()); 2693 else 2694 return _SuperImpl::template _S_to_maskvector<__int_for_sizeof_t<_Tp>, 2695 _S_size<_Tp>>( 2696 __x._M_sanitized()); 2697 } 2698 2699 template <typename _Tp, size_t _Np> 2700 _GLIBCXX_SIMD_INTRINSIC static constexpr auto 2701 _S_convert(_SimdWrapper<bool, _Np> __x) 2702 { 2703 if constexpr (__is_builtin_bitmask_abi<_Abi>()) 2704 return _SimdWrapper<bool, simd_size_v<_Tp, _Abi>>(__x._M_data); 2705 else 2706 return _SuperImpl::template _S_to_maskvector<__int_for_sizeof_t<_Tp>, 2707 _S_size<_Tp>>( 2708 _BitMask<_Np>(__x._M_data)._M_sanitized()); 2709 } 2710 2711 template <typename _Tp, typename _Up, size_t _Np> 2712 _GLIBCXX_SIMD_INTRINSIC static constexpr auto 2713 _S_convert(_SimdWrapper<_Up, _Np> __x) 2714 { 2715 if constexpr (__is_builtin_bitmask_abi<_Abi>()) 2716 return _SimdWrapper<bool, simd_size_v<_Tp, _Abi>>( 2717 _SuperImpl::_S_to_bits(__x)); 2718 else 2719 return _SuperImpl::template _S_to_maskvector<__int_for_sizeof_t<_Tp>, 2720 _S_size<_Tp>>(__x); 2721 } 2722 2723 template <typename _Tp, typename _Up, typename _UAbi> 2724 _GLIBCXX_SIMD_INTRINSIC static constexpr auto 2725 _S_convert(simd_mask<_Up, _UAbi> __x) 2726 { 2727 if constexpr (__is_builtin_bitmask_abi<_Abi>()) 2728 { 2729 using _R = _SimdWrapper<bool, simd_size_v<_Tp, _Abi>>; 2730 if constexpr (__is_builtin_bitmask_abi<_UAbi>()) // bits -> bits 2731 return _R(__data(__x)); 2732 else if constexpr (__is_scalar_abi<_UAbi>()) // bool -> bits 2733 return _R(__data(__x)); 2734 else if constexpr (__is_fixed_size_abi_v<_UAbi>) // bitset -> bits 2735 return _R(__data(__x)._M_to_bits()); 2736 else // vector -> bits 2737 return _R(_UAbi::_MaskImpl::_S_to_bits(__data(__x))._M_to_bits()); 2738 } 2739 else 2740 return _SuperImpl::template _S_to_maskvector<__int_for_sizeof_t<_Tp>, 2741 _S_size<_Tp>>( 2742 __data(__x)); 2743 } 2744 2745 // }}} 2746 // _S_masked_load {{{2 2747 template <typename _Tp, size_t _Np> 2748 static inline _SimdWrapper<_Tp, _Np> 2749 _S_masked_load(_SimdWrapper<_Tp, _Np> __merge, 2750 _SimdWrapper<_Tp, _Np> __mask, const bool* __mem) noexcept 2751 { 2752 // AVX(2) has 32/64 bit maskload, but nothing at 8 bit granularity 2753 auto __tmp = __wrapper_bitcast<__int_for_sizeof_t<_Tp>>(__merge); 2754 _BitOps::_S_bit_iteration(_SuperImpl::_S_to_bits(__mask), 2755 [&](auto __i) { 2756 __tmp._M_set(__i, -__mem[__i]); 2757 }); 2758 __merge = __wrapper_bitcast<_Tp>(__tmp); 2759 return __merge; 2760 } 2761 2762 // _S_store {{{2 2763 template <typename _Tp, size_t _Np> 2764 _GLIBCXX_SIMD_INTRINSIC static void _S_store(_SimdWrapper<_Tp, _Np> __v, 2765 bool* __mem) noexcept 2766 { 2767 __execute_n_times<_Np>([&](auto __i) constexpr { 2768 __mem[__i] = __v[__i]; 2769 }); 2770 } 2771 2772 // _S_masked_store {{{2 2773 template <typename _Tp, size_t _Np> 2774 static inline void 2775 _S_masked_store(const _SimdWrapper<_Tp, _Np> __v, bool* __mem, 2776 const _SimdWrapper<_Tp, _Np> __k) noexcept 2777 { 2778 _BitOps::_S_bit_iteration( 2779 _SuperImpl::_S_to_bits(__k), [&](auto __i) constexpr { 2780 __mem[__i] = __v[__i]; 2781 }); 2782 } 2783 2784 // _S_from_bitmask{{{2 2785 template <size_t _Np, typename _Tp> 2786 _GLIBCXX_SIMD_INTRINSIC static _MaskMember<_Tp> 2787 _S_from_bitmask(_SanitizedBitMask<_Np> __bits, _TypeTag<_Tp>) 2788 { 2789 return _SuperImpl::template _S_to_maskvector<_Tp, _S_size<_Tp>>(__bits); 2790 } 2791 2792 // logical and bitwise operators {{{2 2793 template <typename _Tp, size_t _Np> 2794 _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdWrapper<_Tp, _Np> 2795 _S_logical_and(const _SimdWrapper<_Tp, _Np>& __x, 2796 const _SimdWrapper<_Tp, _Np>& __y) 2797 { return __and(__x._M_data, __y._M_data); } 2798 2799 template <typename _Tp, size_t _Np> 2800 _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdWrapper<_Tp, _Np> 2801 _S_logical_or(const _SimdWrapper<_Tp, _Np>& __x, 2802 const _SimdWrapper<_Tp, _Np>& __y) 2803 { return __or(__x._M_data, __y._M_data); } 2804 2805 template <typename _Tp, size_t _Np> 2806 _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdWrapper<_Tp, _Np> 2807 _S_bit_not(const _SimdWrapper<_Tp, _Np>& __x) 2808 { 2809 if constexpr (_Abi::template _S_is_partial<_Tp>) 2810 return __andnot(__x, __wrapper_bitcast<_Tp>( 2811 _Abi::template _S_implicit_mask<_Tp>())); 2812 else 2813 return __not(__x._M_data); 2814 } 2815 2816 template <typename _Tp, size_t _Np> 2817 _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdWrapper<_Tp, _Np> 2818 _S_bit_and(const _SimdWrapper<_Tp, _Np>& __x, 2819 const _SimdWrapper<_Tp, _Np>& __y) 2820 { return __and(__x._M_data, __y._M_data); } 2821 2822 template <typename _Tp, size_t _Np> 2823 _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdWrapper<_Tp, _Np> 2824 _S_bit_or(const _SimdWrapper<_Tp, _Np>& __x, 2825 const _SimdWrapper<_Tp, _Np>& __y) 2826 { return __or(__x._M_data, __y._M_data); } 2827 2828 template <typename _Tp, size_t _Np> 2829 _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdWrapper<_Tp, _Np> 2830 _S_bit_xor(const _SimdWrapper<_Tp, _Np>& __x, 2831 const _SimdWrapper<_Tp, _Np>& __y) 2832 { return __xor(__x._M_data, __y._M_data); } 2833 2834 // smart_reference access {{{2 2835 template <typename _Tp, size_t _Np> 2836 static constexpr void _S_set(_SimdWrapper<_Tp, _Np>& __k, int __i, 2837 bool __x) noexcept 2838 { 2839 if constexpr (is_same_v<_Tp, bool>) 2840 __k._M_set(__i, __x); 2841 else 2842 { 2843 static_assert(is_same_v<_Tp, __int_for_sizeof_t<_Tp>>); 2844 if (__builtin_is_constant_evaluated()) 2845 { 2846 __k = __generate_from_n_evaluations<_Np, 2847 __vector_type_t<_Tp, _Np>>( 2848 [&](auto __j) { 2849 if (__i == static_cast<int>(__j)) 2850 return _Tp(-__x); 2851 else 2852 return __k[+__j]; 2853 }); 2854 } 2855 else 2856 __k._M_data[__i] = -__x; 2857 } 2858 } 2859 2860 // _S_masked_assign{{{2 2861 template <typename _Tp, size_t _Np> 2862 _GLIBCXX_SIMD_INTRINSIC static void 2863 _S_masked_assign(_SimdWrapper<_Tp, _Np> __k, 2864 _SimdWrapper<_Tp, _Np>& __lhs, 2865 __type_identity_t<_SimdWrapper<_Tp, _Np>> __rhs) 2866 { __lhs = _CommonImpl::_S_blend(__k, __lhs, __rhs); } 2867 2868 template <typename _Tp, size_t _Np> 2869 _GLIBCXX_SIMD_INTRINSIC static void 2870 _S_masked_assign(_SimdWrapper<_Tp, _Np> __k, 2871 _SimdWrapper<_Tp, _Np>& __lhs, bool __rhs) 2872 { 2873 if (__builtin_constant_p(__rhs)) 2874 { 2875 if (__rhs == false) 2876 __lhs = __andnot(__k, __lhs); 2877 else 2878 __lhs = __or(__k, __lhs); 2879 return; 2880 } 2881 __lhs = _CommonImpl::_S_blend(__k, __lhs, 2882 __data(simd_mask<_Tp, _Abi>(__rhs))); 2883 } 2884 2885 //}}}2 2886 // _S_all_of {{{ 2887 template <typename _Tp> 2888 _GLIBCXX_SIMD_INTRINSIC static bool 2889 _S_all_of(simd_mask<_Tp, _Abi> __k) 2890 { 2891 return __call_with_subscripts( 2892 __data(__k), make_index_sequence<_S_size<_Tp>>(), 2893 [](const auto... __ent) constexpr { return (... && !(__ent == 0)); }); 2894 } 2895 2896 // }}} 2897 // _S_any_of {{{ 2898 template <typename _Tp> 2899 _GLIBCXX_SIMD_INTRINSIC static bool 2900 _S_any_of(simd_mask<_Tp, _Abi> __k) 2901 { 2902 return __call_with_subscripts( 2903 __data(__k), make_index_sequence<_S_size<_Tp>>(), 2904 [](const auto... __ent) constexpr { return (... || !(__ent == 0)); }); 2905 } 2906 2907 // }}} 2908 // _S_none_of {{{ 2909 template <typename _Tp> 2910 _GLIBCXX_SIMD_INTRINSIC static bool 2911 _S_none_of(simd_mask<_Tp, _Abi> __k) 2912 { 2913 return __call_with_subscripts( 2914 __data(__k), make_index_sequence<_S_size<_Tp>>(), 2915 [](const auto... __ent) constexpr { return (... && (__ent == 0)); }); 2916 } 2917 2918 // }}} 2919 // _S_some_of {{{ 2920 template <typename _Tp> 2921 _GLIBCXX_SIMD_INTRINSIC static bool 2922 _S_some_of(simd_mask<_Tp, _Abi> __k) 2923 { 2924 const int __n_true = _SuperImpl::_S_popcount(__k); 2925 return __n_true > 0 && __n_true < int(_S_size<_Tp>); 2926 } 2927 2928 // }}} 2929 // _S_popcount {{{ 2930 template <typename _Tp> 2931 _GLIBCXX_SIMD_INTRINSIC static int 2932 _S_popcount(simd_mask<_Tp, _Abi> __k) 2933 { 2934 using _I = __int_for_sizeof_t<_Tp>; 2935 if constexpr (is_default_constructible_v<simd<_I, _Abi>>) 2936 return -reduce( 2937 simd<_I, _Abi>(__private_init, __wrapper_bitcast<_I>(__data(__k)))); 2938 else 2939 return -reduce(__bit_cast<rebind_simd_t<_I, simd<_Tp, _Abi>>>( 2940 simd<_Tp, _Abi>(__private_init, __data(__k)))); 2941 } 2942 2943 // }}} 2944 // _S_find_first_set {{{ 2945 template <typename _Tp> 2946 _GLIBCXX_SIMD_INTRINSIC static int 2947 _S_find_first_set(simd_mask<_Tp, _Abi> __k) 2948 { 2949 return std::__countr_zero( 2950 _SuperImpl::_S_to_bits(__data(__k))._M_to_bits()); 2951 } 2952 2953 // }}} 2954 // _S_find_last_set {{{ 2955 template <typename _Tp> 2956 _GLIBCXX_SIMD_INTRINSIC static int 2957 _S_find_last_set(simd_mask<_Tp, _Abi> __k) 2958 { 2959 return std::__bit_width( 2960 _SuperImpl::_S_to_bits(__data(__k))._M_to_bits()) - 1; 2961 } 2962 2963 // }}} 2964 }; 2965 2966 //}}}1 2967 _GLIBCXX_SIMD_END_NAMESPACE 2968 #endif // __cplusplus >= 201703L 2969 #endif // _GLIBCXX_EXPERIMENTAL_SIMD_ABIS_H_ 2970 2971 // vim: foldmethod=marker foldmarker={{{,}}} sw=2 noet ts=8 sts=2 tw=100 2972