1 // Definition of the public simd interfaces -*- C++ -*- 2 3 // Copyright (C) 2020-2022 Free Software Foundation, Inc. 4 // 5 // This file is part of the GNU ISO C++ Library. This library is free 6 // software; you can redistribute it and/or modify it under the 7 // terms of the GNU General Public License as published by the 8 // Free Software Foundation; either version 3, or (at your option) 9 // any later version. 10 11 // This library is distributed in the hope that it will be useful, 12 // but WITHOUT ANY WARRANTY; without even the implied warranty of 13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 // GNU General Public License for more details. 15 16 // Under Section 7 of GPL version 3, you are granted additional 17 // permissions described in the GCC Runtime Library Exception, version 18 // 3.1, as published by the Free Software Foundation. 19 20 // You should have received a copy of the GNU General Public License and 21 // a copy of the GCC Runtime Library Exception along with this program; 22 // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 23 // <http://www.gnu.org/licenses/>. 24 25 #ifndef _GLIBCXX_EXPERIMENTAL_SIMD_H 26 #define _GLIBCXX_EXPERIMENTAL_SIMD_H 27 28 #if __cplusplus >= 201703L 29 30 #include "simd_detail.h" 31 #include "numeric_traits.h" 32 #include <bit> 33 #include <bitset> 34 #ifdef _GLIBCXX_DEBUG_UB 35 #include <cstdio> // for stderr 36 #endif 37 #include <cstring> 38 #include <cmath> 39 #include <functional> 40 #include <iosfwd> 41 #include <utility> 42 43 #if _GLIBCXX_SIMD_X86INTRIN 44 #include <x86intrin.h> 45 #elif _GLIBCXX_SIMD_HAVE_NEON 46 #pragma GCC diagnostic push 47 // narrowing conversion of '__a' from 'uint64_t' {aka 'long long unsigned int'} to 48 // 'int64x1_t' {aka 'long long int'} [-Wnarrowing] 49 #pragma GCC diagnostic ignored "-Wnarrowing" 50 #include <arm_neon.h> 51 #pragma GCC diagnostic pop 52 #endif 53 54 /** @ingroup ts_simd 55 * @{ 56 */ 57 /* There are several closely related types, with the following naming 58 * convention: 59 * _Tp: vectorizable (arithmetic) type (or any type) 60 * _TV: __vector_type_t<_Tp, _Np> 61 * _TW: _SimdWrapper<_Tp, _Np> 62 * _TI: __intrinsic_type_t<_Tp, _Np> 63 * _TVT: _VectorTraits<_TV> or _VectorTraits<_TW> 64 * If one additional type is needed use _U instead of _T. 65 * Otherwise use _T\d, _TV\d, _TW\d, TI\d, _TVT\d. 66 * 67 * More naming conventions: 68 * _Ap or _Abi: An ABI tag from the simd_abi namespace 69 * _Ip: often used for integer types with sizeof(_Ip) == sizeof(_Tp), 70 * _IV, _IW as for _TV, _TW 71 * _Np: number of elements (not bytes) 72 * _Bytes: number of bytes 73 * 74 * Variable names: 75 * __k: mask object (vector- or bitmask) 76 */ 77 _GLIBCXX_SIMD_BEGIN_NAMESPACE 78 79 #if !_GLIBCXX_SIMD_X86INTRIN 80 using __m128 [[__gnu__::__vector_size__(16)]] = float; 81 using __m128d [[__gnu__::__vector_size__(16)]] = double; 82 using __m128i [[__gnu__::__vector_size__(16)]] = long long; 83 using __m256 [[__gnu__::__vector_size__(32)]] = float; 84 using __m256d [[__gnu__::__vector_size__(32)]] = double; 85 using __m256i [[__gnu__::__vector_size__(32)]] = long long; 86 using __m512 [[__gnu__::__vector_size__(64)]] = float; 87 using __m512d [[__gnu__::__vector_size__(64)]] = double; 88 using __m512i [[__gnu__::__vector_size__(64)]] = long long; 89 #endif 90 91 namespace simd_abi { 92 // simd_abi forward declarations {{{ 93 // implementation details: 94 struct _Scalar; 95 96 template <int _Np> 97 struct _Fixed; 98 99 // There are two major ABIs that appear on different architectures. 100 // Both have non-boolean values packed into an N Byte register 101 // -> #elements = N / sizeof(T) 102 // Masks differ: 103 // 1. Use value vector registers for masks (all 0 or all 1) 104 // 2. Use bitmasks (mask registers) with one bit per value in the corresponding 105 // value vector 106 // 107 // Both can be partially used, masking off the rest when doing horizontal 108 // operations or operations that can trap (e.g. FP_INVALID or integer division 109 // by 0). This is encoded as the number of used bytes. 110 template <int _UsedBytes> 111 struct _VecBuiltin; 112 113 template <int _UsedBytes> 114 struct _VecBltnBtmsk; 115 116 template <typename _Tp, int _Np> 117 using _VecN = _VecBuiltin<sizeof(_Tp) * _Np>; 118 119 template <int _UsedBytes = 16> 120 using _Sse = _VecBuiltin<_UsedBytes>; 121 122 template <int _UsedBytes = 32> 123 using _Avx = _VecBuiltin<_UsedBytes>; 124 125 template <int _UsedBytes = 64> 126 using _Avx512 = _VecBltnBtmsk<_UsedBytes>; 127 128 template <int _UsedBytes = 16> 129 using _Neon = _VecBuiltin<_UsedBytes>; 130 131 // implementation-defined: 132 using __sse = _Sse<>; 133 using __avx = _Avx<>; 134 using __avx512 = _Avx512<>; 135 using __neon = _Neon<>; 136 using __neon128 = _Neon<16>; 137 using __neon64 = _Neon<8>; 138 139 // standard: 140 template <typename _Tp, size_t _Np, typename...> 141 struct deduce; 142 143 template <int _Np> 144 using fixed_size = _Fixed<_Np>; 145 146 using scalar = _Scalar; 147 148 // }}} 149 } // namespace simd_abi 150 // forward declarations is_simd(_mask), simd(_mask), simd_size {{{ 151 template <typename _Tp> 152 struct is_simd; 153 154 template <typename _Tp> 155 struct is_simd_mask; 156 157 template <typename _Tp, typename _Abi> 158 class simd; 159 160 template <typename _Tp, typename _Abi> 161 class simd_mask; 162 163 template <typename _Tp, typename _Abi> 164 struct simd_size; 165 166 // }}} 167 // load/store flags {{{ 168 struct element_aligned_tag 169 { 170 template <typename _Tp, typename _Up = typename _Tp::value_type> 171 static constexpr size_t _S_alignment = alignof(_Up); 172 173 template <typename _Tp, typename _Up> 174 _GLIBCXX_SIMD_INTRINSIC static constexpr _Up* _S_applyelement_aligned_tag175 _S_apply(_Up* __ptr) 176 { return __ptr; } 177 }; 178 179 struct vector_aligned_tag 180 { 181 template <typename _Tp, typename _Up = typename _Tp::value_type> 182 static constexpr size_t _S_alignment 183 = std::__bit_ceil(sizeof(_Up) * _Tp::size()); 184 185 template <typename _Tp, typename _Up> 186 _GLIBCXX_SIMD_INTRINSIC static constexpr _Up* _S_applyvector_aligned_tag187 _S_apply(_Up* __ptr) 188 { return static_cast<_Up*>(__builtin_assume_aligned(__ptr, _S_alignment<_Tp, _Up>)); } 189 }; 190 191 template <size_t _Np> struct overaligned_tag 192 { 193 template <typename _Tp, typename _Up = typename _Tp::value_type> 194 static constexpr size_t _S_alignment = _Np; 195 196 template <typename _Tp, typename _Up> 197 _GLIBCXX_SIMD_INTRINSIC static constexpr _Up* _S_applyoveraligned_tag198 _S_apply(_Up* __ptr) 199 { return static_cast<_Up*>(__builtin_assume_aligned(__ptr, _Np)); } 200 }; 201 202 inline constexpr element_aligned_tag element_aligned = {}; 203 204 inline constexpr vector_aligned_tag vector_aligned = {}; 205 206 template <size_t _Np> 207 inline constexpr overaligned_tag<_Np> overaligned = {}; 208 209 // }}} 210 template <size_t _Xp> 211 using _SizeConstant = integral_constant<size_t, _Xp>; 212 // constexpr feature detection{{{ 213 constexpr inline bool __have_mmx = _GLIBCXX_SIMD_HAVE_MMX; 214 constexpr inline bool __have_sse = _GLIBCXX_SIMD_HAVE_SSE; 215 constexpr inline bool __have_sse2 = _GLIBCXX_SIMD_HAVE_SSE2; 216 constexpr inline bool __have_sse3 = _GLIBCXX_SIMD_HAVE_SSE3; 217 constexpr inline bool __have_ssse3 = _GLIBCXX_SIMD_HAVE_SSSE3; 218 constexpr inline bool __have_sse4_1 = _GLIBCXX_SIMD_HAVE_SSE4_1; 219 constexpr inline bool __have_sse4_2 = _GLIBCXX_SIMD_HAVE_SSE4_2; 220 constexpr inline bool __have_xop = _GLIBCXX_SIMD_HAVE_XOP; 221 constexpr inline bool __have_avx = _GLIBCXX_SIMD_HAVE_AVX; 222 constexpr inline bool __have_avx2 = _GLIBCXX_SIMD_HAVE_AVX2; 223 constexpr inline bool __have_bmi = _GLIBCXX_SIMD_HAVE_BMI1; 224 constexpr inline bool __have_bmi2 = _GLIBCXX_SIMD_HAVE_BMI2; 225 constexpr inline bool __have_lzcnt = _GLIBCXX_SIMD_HAVE_LZCNT; 226 constexpr inline bool __have_sse4a = _GLIBCXX_SIMD_HAVE_SSE4A; 227 constexpr inline bool __have_fma = _GLIBCXX_SIMD_HAVE_FMA; 228 constexpr inline bool __have_fma4 = _GLIBCXX_SIMD_HAVE_FMA4; 229 constexpr inline bool __have_f16c = _GLIBCXX_SIMD_HAVE_F16C; 230 constexpr inline bool __have_popcnt = _GLIBCXX_SIMD_HAVE_POPCNT; 231 constexpr inline bool __have_avx512f = _GLIBCXX_SIMD_HAVE_AVX512F; 232 constexpr inline bool __have_avx512dq = _GLIBCXX_SIMD_HAVE_AVX512DQ; 233 constexpr inline bool __have_avx512vl = _GLIBCXX_SIMD_HAVE_AVX512VL; 234 constexpr inline bool __have_avx512bw = _GLIBCXX_SIMD_HAVE_AVX512BW; 235 constexpr inline bool __have_avx512dq_vl = __have_avx512dq && __have_avx512vl; 236 constexpr inline bool __have_avx512bw_vl = __have_avx512bw && __have_avx512vl; 237 constexpr inline bool __have_avx512bitalg = _GLIBCXX_SIMD_HAVE_AVX512BITALG; 238 constexpr inline bool __have_avx512vbmi2 = _GLIBCXX_SIMD_HAVE_AVX512VBMI2; 239 constexpr inline bool __have_avx512vbmi = _GLIBCXX_SIMD_HAVE_AVX512VBMI; 240 constexpr inline bool __have_avx512ifma = _GLIBCXX_SIMD_HAVE_AVX512IFMA; 241 constexpr inline bool __have_avx512cd = _GLIBCXX_SIMD_HAVE_AVX512CD; 242 constexpr inline bool __have_avx512vnni = _GLIBCXX_SIMD_HAVE_AVX512VNNI; 243 constexpr inline bool __have_avx512vpopcntdq = _GLIBCXX_SIMD_HAVE_AVX512VPOPCNTDQ; 244 constexpr inline bool __have_avx512vp2intersect = _GLIBCXX_SIMD_HAVE_AVX512VP2INTERSECT; 245 246 constexpr inline bool __have_neon = _GLIBCXX_SIMD_HAVE_NEON; 247 constexpr inline bool __have_neon_a32 = _GLIBCXX_SIMD_HAVE_NEON_A32; 248 constexpr inline bool __have_neon_a64 = _GLIBCXX_SIMD_HAVE_NEON_A64; 249 constexpr inline bool __support_neon_float = 250 #if defined __GCC_IEC_559 251 __GCC_IEC_559 == 0; 252 #elif defined __FAST_MATH__ 253 true; 254 #else 255 false; 256 #endif 257 258 #ifdef _ARCH_PWR10 259 constexpr inline bool __have_power10vec = true; 260 #else 261 constexpr inline bool __have_power10vec = false; 262 #endif 263 #ifdef __POWER9_VECTOR__ 264 constexpr inline bool __have_power9vec = true; 265 #else 266 constexpr inline bool __have_power9vec = false; 267 #endif 268 #if defined __POWER8_VECTOR__ 269 constexpr inline bool __have_power8vec = true; 270 #else 271 constexpr inline bool __have_power8vec = __have_power9vec; 272 #endif 273 #if defined __VSX__ 274 constexpr inline bool __have_power_vsx = true; 275 #else 276 constexpr inline bool __have_power_vsx = __have_power8vec; 277 #endif 278 #if defined __ALTIVEC__ 279 constexpr inline bool __have_power_vmx = true; 280 #else 281 constexpr inline bool __have_power_vmx = __have_power_vsx; 282 #endif 283 284 // }}} 285 286 namespace __detail 287 { 288 #ifdef math_errhandling 289 // Determines _S_handle_fpexcept from math_errhandling if it is defined and expands to a constant 290 // expression. math_errhandling may expand to an extern symbol, in which case a constexpr value 291 // must be guessed. 292 template <int = math_errhandling> 293 constexpr bool __handle_fpexcept_impl(int)294 __handle_fpexcept_impl(int) 295 { return math_errhandling & MATH_ERREXCEPT; } 296 #endif 297 298 // Fallback if math_errhandling doesn't work: with fast-math assume floating-point exceptions are 299 // ignored, otherwise implement correct exception behavior. 300 constexpr bool __handle_fpexcept_impl(float)301 __handle_fpexcept_impl(float) 302 { 303 #if defined __FAST_MATH__ 304 return false; 305 #else 306 return true; 307 #endif 308 } 309 310 /// True if math functions must raise floating-point exceptions as specified by C17. 311 static constexpr bool _S_handle_fpexcept = __handle_fpexcept_impl(0); 312 313 constexpr std::uint_least64_t __floating_point_flags()314 __floating_point_flags() 315 { 316 std::uint_least64_t __flags = 0; 317 if constexpr (_S_handle_fpexcept) 318 __flags |= 1; 319 #ifdef __FAST_MATH__ 320 __flags |= 1 << 1; 321 #elif __FINITE_MATH_ONLY__ 322 __flags |= 2 << 1; 323 #elif __GCC_IEC_559 < 2 324 __flags |= 3 << 1; 325 #endif 326 __flags |= (__FLT_EVAL_METHOD__ + 1) << 3; 327 return __flags; 328 } 329 330 constexpr std::uint_least64_t __machine_flags()331 __machine_flags() 332 { 333 if constexpr (__have_mmx || __have_sse) 334 return __have_mmx 335 | (__have_sse << 1) 336 | (__have_sse2 << 2) 337 | (__have_sse3 << 3) 338 | (__have_ssse3 << 4) 339 | (__have_sse4_1 << 5) 340 | (__have_sse4_2 << 6) 341 | (__have_xop << 7) 342 | (__have_avx << 8) 343 | (__have_avx2 << 9) 344 | (__have_bmi << 10) 345 | (__have_bmi2 << 11) 346 | (__have_lzcnt << 12) 347 | (__have_sse4a << 13) 348 | (__have_fma << 14) 349 | (__have_fma4 << 15) 350 | (__have_f16c << 16) 351 | (__have_popcnt << 17) 352 | (__have_avx512f << 18) 353 | (__have_avx512dq << 19) 354 | (__have_avx512vl << 20) 355 | (__have_avx512bw << 21) 356 | (__have_avx512bitalg << 22) 357 | (__have_avx512vbmi2 << 23) 358 | (__have_avx512vbmi << 24) 359 | (__have_avx512ifma << 25) 360 | (__have_avx512cd << 26) 361 | (__have_avx512vnni << 27) 362 | (__have_avx512vpopcntdq << 28) 363 | (__have_avx512vp2intersect << 29); 364 else if constexpr (__have_neon) 365 return __have_neon 366 | (__have_neon_a32 << 1) 367 | (__have_neon_a64 << 2) 368 | (__have_neon_a64 << 2) 369 | (__support_neon_float << 3); 370 else if constexpr (__have_power_vmx) 371 return __have_power_vmx 372 | (__have_power_vsx << 1) 373 | (__have_power8vec << 2) 374 | (__have_power9vec << 3) 375 | (__have_power10vec << 4); 376 else 377 return 0; 378 } 379 380 namespace 381 { 382 struct _OdrEnforcer {}; 383 } 384 385 template <std::uint_least64_t...> 386 struct _MachineFlagsTemplate {}; 387 388 /**@internal 389 * Use this type as default template argument to all function templates that 390 * are not declared always_inline. It ensures, that a function 391 * specialization, which the compiler decides not to inline, has a unique symbol 392 * (_OdrEnforcer) or a symbol matching the machine/architecture flags 393 * (_MachineFlagsTemplate). This helps to avoid ODR violations in cases where 394 * users link TUs compiled with different flags. This is especially important 395 * for using simd in libraries. 396 */ 397 using __odr_helper 398 = conditional_t<__machine_flags() == 0, _OdrEnforcer, 399 _MachineFlagsTemplate<__machine_flags(), __floating_point_flags()>>; 400 401 struct _Minimum 402 { 403 template <typename _Tp> 404 _GLIBCXX_SIMD_INTRINSIC constexpr 405 _Tp operator_Minimum406 operator()(_Tp __a, _Tp __b) const 407 { 408 using std::min; 409 return min(__a, __b); 410 } 411 }; 412 413 struct _Maximum 414 { 415 template <typename _Tp> 416 _GLIBCXX_SIMD_INTRINSIC constexpr 417 _Tp operator_Maximum418 operator()(_Tp __a, _Tp __b) const 419 { 420 using std::max; 421 return max(__a, __b); 422 } 423 }; 424 } // namespace __detail 425 426 // unrolled/pack execution helpers 427 // __execute_n_times{{{ 428 template <typename _Fp, size_t... _I> 429 [[__gnu__::__flatten__]] _GLIBCXX_SIMD_INTRINSIC constexpr 430 void __execute_on_index_sequence(_Fp && __f,index_sequence<_I...>)431 __execute_on_index_sequence(_Fp&& __f, index_sequence<_I...>) 432 { ((void)__f(_SizeConstant<_I>()), ...); } 433 434 template <typename _Fp> 435 _GLIBCXX_SIMD_INTRINSIC constexpr void __execute_on_index_sequence(_Fp &&,index_sequence<>)436 __execute_on_index_sequence(_Fp&&, index_sequence<>) 437 { } 438 439 template <size_t _Np, typename _Fp> 440 _GLIBCXX_SIMD_INTRINSIC constexpr void __execute_n_times(_Fp && __f)441 __execute_n_times(_Fp&& __f) 442 { 443 __execute_on_index_sequence(static_cast<_Fp&&>(__f), 444 make_index_sequence<_Np>{}); 445 } 446 447 // }}} 448 // __generate_from_n_evaluations{{{ 449 template <typename _R, typename _Fp, size_t... _I> 450 [[__gnu__::__flatten__]] _GLIBCXX_SIMD_INTRINSIC constexpr 451 _R __execute_on_index_sequence_with_return(_Fp && __f,index_sequence<_I...>)452 __execute_on_index_sequence_with_return(_Fp&& __f, index_sequence<_I...>) 453 { return _R{__f(_SizeConstant<_I>())...}; } 454 455 template <size_t _Np, typename _R, typename _Fp> 456 _GLIBCXX_SIMD_INTRINSIC constexpr _R __generate_from_n_evaluations(_Fp && __f)457 __generate_from_n_evaluations(_Fp&& __f) 458 { 459 return __execute_on_index_sequence_with_return<_R>( 460 static_cast<_Fp&&>(__f), make_index_sequence<_Np>{}); 461 } 462 463 // }}} 464 // __call_with_n_evaluations{{{ 465 template <size_t... _I, typename _F0, typename _FArgs> 466 [[__gnu__::__flatten__]] _GLIBCXX_SIMD_INTRINSIC constexpr 467 auto __call_with_n_evaluations(index_sequence<_I...>,_F0 && __f0,_FArgs && __fargs)468 __call_with_n_evaluations(index_sequence<_I...>, _F0&& __f0, _FArgs&& __fargs) 469 { return __f0(__fargs(_SizeConstant<_I>())...); } 470 471 template <size_t _Np, typename _F0, typename _FArgs> 472 _GLIBCXX_SIMD_INTRINSIC constexpr auto __call_with_n_evaluations(_F0 && __f0,_FArgs && __fargs)473 __call_with_n_evaluations(_F0&& __f0, _FArgs&& __fargs) 474 { 475 return __call_with_n_evaluations(make_index_sequence<_Np>{}, 476 static_cast<_F0&&>(__f0), 477 static_cast<_FArgs&&>(__fargs)); 478 } 479 480 // }}} 481 // __call_with_subscripts{{{ 482 template <size_t _First = 0, size_t... _It, typename _Tp, typename _Fp> 483 [[__gnu__::__flatten__]] _GLIBCXX_SIMD_INTRINSIC constexpr 484 auto __call_with_subscripts(_Tp && __x,index_sequence<_It...>,_Fp && __fun)485 __call_with_subscripts(_Tp&& __x, index_sequence<_It...>, _Fp&& __fun) 486 { return __fun(__x[_First + _It]...); } 487 488 template <size_t _Np, size_t _First = 0, typename _Tp, typename _Fp> 489 _GLIBCXX_SIMD_INTRINSIC constexpr auto __call_with_subscripts(_Tp && __x,_Fp && __fun)490 __call_with_subscripts(_Tp&& __x, _Fp&& __fun) 491 { 492 return __call_with_subscripts<_First>(static_cast<_Tp&&>(__x), 493 make_index_sequence<_Np>(), 494 static_cast<_Fp&&>(__fun)); 495 } 496 497 // }}} 498 499 // vvv ---- type traits ---- vvv 500 // integer type aliases{{{ 501 using _UChar = unsigned char; 502 using _SChar = signed char; 503 using _UShort = unsigned short; 504 using _UInt = unsigned int; 505 using _ULong = unsigned long; 506 using _ULLong = unsigned long long; 507 using _LLong = long long; 508 509 //}}} 510 // __first_of_pack{{{ 511 template <typename _T0, typename...> 512 struct __first_of_pack 513 { using type = _T0; }; 514 515 template <typename... _Ts> 516 using __first_of_pack_t = typename __first_of_pack<_Ts...>::type; 517 518 //}}} 519 // __value_type_or_identity_t {{{ 520 template <typename _Tp> 521 typename _Tp::value_type 522 __value_type_or_identity_impl(int); 523 524 template <typename _Tp> 525 _Tp 526 __value_type_or_identity_impl(float); 527 528 template <typename _Tp> 529 using __value_type_or_identity_t 530 = decltype(__value_type_or_identity_impl<_Tp>(int())); 531 532 // }}} 533 // __is_vectorizable {{{ 534 template <typename _Tp> 535 struct __is_vectorizable : public is_arithmetic<_Tp> {}; 536 537 template <> 538 struct __is_vectorizable<bool> : public false_type {}; 539 540 template <typename _Tp> 541 inline constexpr bool __is_vectorizable_v = __is_vectorizable<_Tp>::value; 542 543 // Deduces to a vectorizable type 544 template <typename _Tp, typename = enable_if_t<__is_vectorizable_v<_Tp>>> 545 using _Vectorizable = _Tp; 546 547 // }}} 548 // _LoadStorePtr / __is_possible_loadstore_conversion {{{ 549 template <typename _Ptr, typename _ValueType> 550 struct __is_possible_loadstore_conversion 551 : conjunction<__is_vectorizable<_Ptr>, __is_vectorizable<_ValueType>> {}; 552 553 template <> 554 struct __is_possible_loadstore_conversion<bool, bool> : true_type {}; 555 556 // Deduces to a type allowed for load/store with the given value type. 557 template <typename _Ptr, typename _ValueType, 558 typename = enable_if_t< 559 __is_possible_loadstore_conversion<_Ptr, _ValueType>::value>> 560 using _LoadStorePtr = _Ptr; 561 562 // }}} 563 // __is_bitmask{{{ 564 template <typename _Tp, typename = void_t<>> 565 struct __is_bitmask : false_type {}; 566 567 template <typename _Tp> 568 inline constexpr bool __is_bitmask_v = __is_bitmask<_Tp>::value; 569 570 // the __mmaskXX case: 571 template <typename _Tp> 572 struct __is_bitmask<_Tp, 573 void_t<decltype(declval<unsigned&>() = declval<_Tp>() & 1u)>> 574 : true_type {}; 575 576 // }}} 577 // __int_for_sizeof{{{ 578 #pragma GCC diagnostic push 579 #pragma GCC diagnostic ignored "-Wpedantic" 580 template <size_t _Bytes> 581 constexpr auto 582 __int_for_sizeof() 583 { 584 static_assert(_Bytes > 0); 585 if constexpr (_Bytes == sizeof(int)) 586 return int(); 587 #ifdef __clang__ 588 else if constexpr (_Bytes == sizeof(char)) 589 return char(); 590 #else 591 else if constexpr (_Bytes == sizeof(_SChar)) 592 return _SChar(); 593 #endif 594 else if constexpr (_Bytes == sizeof(short)) 595 return short(); 596 #ifndef __clang__ 597 else if constexpr (_Bytes == sizeof(long)) 598 return long(); 599 #endif 600 else if constexpr (_Bytes == sizeof(_LLong)) 601 return _LLong(); 602 #ifdef __SIZEOF_INT128__ 603 else if constexpr (_Bytes == sizeof(__int128)) 604 return __int128(); 605 #endif // __SIZEOF_INT128__ 606 else if constexpr (_Bytes % sizeof(int) == 0) 607 { 608 constexpr size_t _Np = _Bytes / sizeof(int); 609 struct _Ip 610 { 611 int _M_data[_Np]; 612 613 _GLIBCXX_SIMD_INTRINSIC constexpr _Ip 614 operator&(_Ip __rhs) const 615 { 616 return __generate_from_n_evaluations<_Np, _Ip>( 617 [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { 618 return __rhs._M_data[__i] & _M_data[__i]; 619 }); 620 } 621 622 _GLIBCXX_SIMD_INTRINSIC constexpr _Ip 623 operator|(_Ip __rhs) const 624 { 625 return __generate_from_n_evaluations<_Np, _Ip>( 626 [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { 627 return __rhs._M_data[__i] | _M_data[__i]; 628 }); 629 } 630 631 _GLIBCXX_SIMD_INTRINSIC constexpr _Ip 632 operator^(_Ip __rhs) const 633 { 634 return __generate_from_n_evaluations<_Np, _Ip>( 635 [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { 636 return __rhs._M_data[__i] ^ _M_data[__i]; 637 }); 638 } 639 640 _GLIBCXX_SIMD_INTRINSIC constexpr _Ip 641 operator~() const 642 { 643 return __generate_from_n_evaluations<_Np, _Ip>( 644 [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { return ~_M_data[__i]; }); 645 } 646 }; 647 return _Ip{}; 648 } 649 else 650 static_assert(_Bytes == 0, "this should be unreachable"); 651 } 652 #pragma GCC diagnostic pop 653 654 template <typename _Tp> 655 using __int_for_sizeof_t = decltype(__int_for_sizeof<sizeof(_Tp)>()); 656 657 template <size_t _Np> 658 using __int_with_sizeof_t = decltype(__int_for_sizeof<_Np>()); 659 660 // }}} 661 // __is_fixed_size_abi{{{ 662 template <typename _Tp> 663 struct __is_fixed_size_abi : false_type {}; 664 665 template <int _Np> 666 struct __is_fixed_size_abi<simd_abi::fixed_size<_Np>> : true_type {}; 667 668 template <typename _Tp> 669 inline constexpr bool __is_fixed_size_abi_v = __is_fixed_size_abi<_Tp>::value; 670 671 // }}} 672 // __is_scalar_abi {{{ 673 template <typename _Abi> 674 constexpr bool 675 __is_scalar_abi() 676 { return is_same_v<simd_abi::scalar, _Abi>; } 677 678 // }}} 679 // __abi_bytes_v {{{ 680 template <template <int> class _Abi, int _Bytes> 681 constexpr int 682 __abi_bytes_impl(_Abi<_Bytes>*) 683 { return _Bytes; } 684 685 template <typename _Tp> 686 constexpr int 687 __abi_bytes_impl(_Tp*) 688 { return -1; } 689 690 template <typename _Abi> 691 inline constexpr int __abi_bytes_v 692 = __abi_bytes_impl(static_cast<_Abi*>(nullptr)); 693 694 // }}} 695 // __is_builtin_bitmask_abi {{{ 696 template <typename _Abi> 697 constexpr bool 698 __is_builtin_bitmask_abi() 699 { return is_same_v<simd_abi::_VecBltnBtmsk<__abi_bytes_v<_Abi>>, _Abi>; } 700 701 // }}} 702 // __is_sse_abi {{{ 703 template <typename _Abi> 704 constexpr bool 705 __is_sse_abi() 706 { 707 constexpr auto _Bytes = __abi_bytes_v<_Abi>; 708 return _Bytes <= 16 && is_same_v<simd_abi::_VecBuiltin<_Bytes>, _Abi>; 709 } 710 711 // }}} 712 // __is_avx_abi {{{ 713 template <typename _Abi> 714 constexpr bool 715 __is_avx_abi() 716 { 717 constexpr auto _Bytes = __abi_bytes_v<_Abi>; 718 return _Bytes > 16 && _Bytes <= 32 719 && is_same_v<simd_abi::_VecBuiltin<_Bytes>, _Abi>; 720 } 721 722 // }}} 723 // __is_avx512_abi {{{ 724 template <typename _Abi> 725 constexpr bool 726 __is_avx512_abi() 727 { 728 constexpr auto _Bytes = __abi_bytes_v<_Abi>; 729 return _Bytes <= 64 && is_same_v<simd_abi::_Avx512<_Bytes>, _Abi>; 730 } 731 732 // }}} 733 // __is_neon_abi {{{ 734 template <typename _Abi> 735 constexpr bool 736 __is_neon_abi() 737 { 738 constexpr auto _Bytes = __abi_bytes_v<_Abi>; 739 return _Bytes <= 16 && is_same_v<simd_abi::_VecBuiltin<_Bytes>, _Abi>; 740 } 741 742 // }}} 743 // __make_dependent_t {{{ 744 template <typename, typename _Up> 745 struct __make_dependent 746 { using type = _Up; }; 747 748 template <typename _Tp, typename _Up> 749 using __make_dependent_t = typename __make_dependent<_Tp, _Up>::type; 750 751 // }}} 752 // ^^^ ---- type traits ---- ^^^ 753 754 // __invoke_ub{{{ 755 template <typename... _Args> 756 [[noreturn]] _GLIBCXX_SIMD_ALWAYS_INLINE void 757 __invoke_ub([[maybe_unused]] const char* __msg, [[maybe_unused]] const _Args&... __args) 758 { 759 #ifdef _GLIBCXX_DEBUG_UB 760 __builtin_fprintf(stderr, __msg, __args...); 761 __builtin_trap(); 762 #else 763 __builtin_unreachable(); 764 #endif 765 } 766 767 // }}} 768 // __assert_unreachable{{{ 769 template <typename _Tp> 770 struct __assert_unreachable 771 { static_assert(!is_same_v<_Tp, _Tp>, "this should be unreachable"); }; 772 773 // }}} 774 // __size_or_zero_v {{{ 775 template <typename _Tp, typename _Ap, size_t _Np = simd_size<_Tp, _Ap>::value> 776 constexpr size_t 777 __size_or_zero_dispatch(int) 778 { return _Np; } 779 780 template <typename _Tp, typename _Ap> 781 constexpr size_t 782 __size_or_zero_dispatch(float) 783 { return 0; } 784 785 template <typename _Tp, typename _Ap> 786 inline constexpr size_t __size_or_zero_v 787 = __size_or_zero_dispatch<_Tp, _Ap>(0); 788 789 // }}} 790 // __div_roundup {{{ 791 inline constexpr size_t 792 __div_roundup(size_t __a, size_t __b) 793 { return (__a + __b - 1) / __b; } 794 795 // }}} 796 // _ExactBool{{{ 797 class _ExactBool 798 { 799 const bool _M_data; 800 801 public: 802 _GLIBCXX_SIMD_INTRINSIC constexpr 803 _ExactBool(bool __b) : _M_data(__b) {} 804 805 _ExactBool(int) = delete; 806 807 _GLIBCXX_SIMD_INTRINSIC constexpr 808 operator bool() const 809 { return _M_data; } 810 }; 811 812 // }}} 813 // __may_alias{{{ 814 /**@internal 815 * Helper __may_alias<_Tp> that turns _Tp into the type to be used for an 816 * aliasing pointer. This adds the __may_alias attribute to _Tp (with compilers 817 * that support it). 818 */ 819 template <typename _Tp> 820 using __may_alias [[__gnu__::__may_alias__]] = _Tp; 821 822 // }}} 823 // _UnsupportedBase {{{ 824 // simd and simd_mask base for unsupported <_Tp, _Abi> 825 struct _UnsupportedBase 826 { 827 _UnsupportedBase() = delete; 828 _UnsupportedBase(const _UnsupportedBase&) = delete; 829 _UnsupportedBase& operator=(const _UnsupportedBase&) = delete; 830 ~_UnsupportedBase() = delete; 831 }; 832 833 // }}} 834 // _InvalidTraits {{{ 835 /** 836 * @internal 837 * Defines the implementation of __a given <_Tp, _Abi>. 838 * 839 * Implementations must ensure that only valid <_Tp, _Abi> instantiations are 840 * possible. Static assertions in the type definition do not suffice. It is 841 * important that SFINAE works. 842 */ 843 struct _InvalidTraits 844 { 845 using _IsValid = false_type; 846 using _SimdBase = _UnsupportedBase; 847 using _MaskBase = _UnsupportedBase; 848 849 static constexpr size_t _S_full_size = 0; 850 static constexpr bool _S_is_partial = false; 851 852 static constexpr size_t _S_simd_align = 1; 853 struct _SimdImpl; 854 struct _SimdMember {}; 855 struct _SimdCastType; 856 857 static constexpr size_t _S_mask_align = 1; 858 struct _MaskImpl; 859 struct _MaskMember {}; 860 struct _MaskCastType; 861 }; 862 863 // }}} 864 // _SimdTraits {{{ 865 template <typename _Tp, typename _Abi, typename = void_t<>> 866 struct _SimdTraits : _InvalidTraits {}; 867 868 // }}} 869 // __private_init, __bitset_init{{{ 870 /** 871 * @internal 872 * Tag used for private init constructor of simd and simd_mask 873 */ 874 inline constexpr struct _PrivateInit {} __private_init = {}; 875 876 inline constexpr struct _BitsetInit {} __bitset_init = {}; 877 878 // }}} 879 // __is_narrowing_conversion<_From, _To>{{{ 880 template <typename _From, typename _To, bool = is_arithmetic_v<_From>, 881 bool = is_arithmetic_v<_To>> 882 struct __is_narrowing_conversion; 883 884 // ignore "signed/unsigned mismatch" in the following trait. 885 // The implicit conversions will do the right thing here. 886 template <typename _From, typename _To> 887 struct __is_narrowing_conversion<_From, _To, true, true> 888 : public __bool_constant<( 889 __digits_v<_From> > __digits_v<_To> 890 || __finite_max_v<_From> > __finite_max_v<_To> 891 || __finite_min_v<_From> < __finite_min_v<_To> 892 || (is_signed_v<_From> && is_unsigned_v<_To>))> {}; 893 894 template <typename _Tp> 895 struct __is_narrowing_conversion<_Tp, bool, true, true> 896 : public true_type {}; 897 898 template <> 899 struct __is_narrowing_conversion<bool, bool, true, true> 900 : public false_type {}; 901 902 template <typename _Tp> 903 struct __is_narrowing_conversion<_Tp, _Tp, true, true> 904 : public false_type {}; 905 906 template <typename _From, typename _To> 907 struct __is_narrowing_conversion<_From, _To, false, true> 908 : public negation<is_convertible<_From, _To>> {}; 909 910 // }}} 911 // __converts_to_higher_integer_rank{{{ 912 template <typename _From, typename _To, bool = (sizeof(_From) < sizeof(_To))> 913 struct __converts_to_higher_integer_rank : public true_type {}; 914 915 // this may fail for char -> short if sizeof(char) == sizeof(short) 916 template <typename _From, typename _To> 917 struct __converts_to_higher_integer_rank<_From, _To, false> 918 : public is_same<decltype(declval<_From>() + declval<_To>()), _To> {}; 919 920 // }}} 921 // __data(simd/simd_mask) {{{ 922 template <typename _Tp, typename _Ap> 923 _GLIBCXX_SIMD_INTRINSIC constexpr const auto& 924 __data(const simd<_Tp, _Ap>& __x); 925 926 template <typename _Tp, typename _Ap> 927 _GLIBCXX_SIMD_INTRINSIC constexpr auto& 928 __data(simd<_Tp, _Ap>& __x); 929 930 template <typename _Tp, typename _Ap> 931 _GLIBCXX_SIMD_INTRINSIC constexpr const auto& 932 __data(const simd_mask<_Tp, _Ap>& __x); 933 934 template <typename _Tp, typename _Ap> 935 _GLIBCXX_SIMD_INTRINSIC constexpr auto& 936 __data(simd_mask<_Tp, _Ap>& __x); 937 938 // }}} 939 // _SimdConverter {{{ 940 template <typename _FromT, typename _FromA, typename _ToT, typename _ToA, 941 typename = void> 942 struct _SimdConverter; 943 944 template <typename _Tp, typename _Ap> 945 struct _SimdConverter<_Tp, _Ap, _Tp, _Ap, void> 946 { 947 template <typename _Up> 948 _GLIBCXX_SIMD_INTRINSIC const _Up& 949 operator()(const _Up& __x) 950 { return __x; } 951 }; 952 953 // }}} 954 // __to_value_type_or_member_type {{{ 955 template <typename _V> 956 _GLIBCXX_SIMD_INTRINSIC constexpr auto 957 __to_value_type_or_member_type(const _V& __x) -> decltype(__data(__x)) 958 { return __data(__x); } 959 960 template <typename _V> 961 _GLIBCXX_SIMD_INTRINSIC constexpr const typename _V::value_type& 962 __to_value_type_or_member_type(const typename _V::value_type& __x) 963 { return __x; } 964 965 // }}} 966 // __bool_storage_member_type{{{ 967 template <size_t _Size> 968 struct __bool_storage_member_type; 969 970 template <size_t _Size> 971 using __bool_storage_member_type_t = 972 typename __bool_storage_member_type<_Size>::type; 973 974 // }}} 975 // _SimdTuple {{{ 976 // why not tuple? 977 // 1. tuple gives no guarantee about the storage order, but I require 978 // storage 979 // equivalent to array<_Tp, _Np> 980 // 2. direct access to the element type (first template argument) 981 // 3. enforces equal element type, only different _Abi types are allowed 982 template <typename _Tp, typename... _Abis> 983 struct _SimdTuple; 984 985 //}}} 986 // __fixed_size_storage_t {{{ 987 template <typename _Tp, int _Np> 988 struct __fixed_size_storage; 989 990 template <typename _Tp, int _Np> 991 using __fixed_size_storage_t = typename __fixed_size_storage<_Tp, _Np>::type; 992 993 // }}} 994 // _SimdWrapper fwd decl{{{ 995 template <typename _Tp, size_t _Size, typename = void_t<>> 996 struct _SimdWrapper; 997 998 template <typename _Tp> 999 using _SimdWrapper8 = _SimdWrapper<_Tp, 8 / sizeof(_Tp)>; 1000 template <typename _Tp> 1001 using _SimdWrapper16 = _SimdWrapper<_Tp, 16 / sizeof(_Tp)>; 1002 template <typename _Tp> 1003 using _SimdWrapper32 = _SimdWrapper<_Tp, 32 / sizeof(_Tp)>; 1004 template <typename _Tp> 1005 using _SimdWrapper64 = _SimdWrapper<_Tp, 64 / sizeof(_Tp)>; 1006 1007 // }}} 1008 // __is_simd_wrapper {{{ 1009 template <typename _Tp> 1010 struct __is_simd_wrapper : false_type {}; 1011 1012 template <typename _Tp, size_t _Np> 1013 struct __is_simd_wrapper<_SimdWrapper<_Tp, _Np>> : true_type {}; 1014 1015 template <typename _Tp> 1016 inline constexpr bool __is_simd_wrapper_v = __is_simd_wrapper<_Tp>::value; 1017 1018 // }}} 1019 // _BitOps {{{ 1020 struct _BitOps 1021 { 1022 // _S_bit_iteration {{{ 1023 template <typename _Tp, typename _Fp> 1024 static void 1025 _S_bit_iteration(_Tp __mask, _Fp&& __f) 1026 { 1027 static_assert(sizeof(_ULLong) >= sizeof(_Tp)); 1028 conditional_t<sizeof(_Tp) <= sizeof(_UInt), _UInt, _ULLong> __k; 1029 if constexpr (is_convertible_v<_Tp, decltype(__k)>) 1030 __k = __mask; 1031 else 1032 __k = __mask.to_ullong(); 1033 while(__k) 1034 { 1035 __f(std::__countr_zero(__k)); 1036 __k &= (__k - 1); 1037 } 1038 } 1039 1040 //}}} 1041 }; 1042 1043 //}}} 1044 // __increment, __decrement {{{ 1045 template <typename _Tp = void> 1046 struct __increment 1047 { constexpr _Tp operator()(_Tp __a) const { return ++__a; } }; 1048 1049 template <> 1050 struct __increment<void> 1051 { 1052 template <typename _Tp> 1053 constexpr _Tp 1054 operator()(_Tp __a) const 1055 { return ++__a; } 1056 }; 1057 1058 template <typename _Tp = void> 1059 struct __decrement 1060 { constexpr _Tp operator()(_Tp __a) const { return --__a; } }; 1061 1062 template <> 1063 struct __decrement<void> 1064 { 1065 template <typename _Tp> 1066 constexpr _Tp 1067 operator()(_Tp __a) const 1068 { return --__a; } 1069 }; 1070 1071 // }}} 1072 // _ValuePreserving(OrInt) {{{ 1073 template <typename _From, typename _To, 1074 typename = enable_if_t<negation< 1075 __is_narrowing_conversion<__remove_cvref_t<_From>, _To>>::value>> 1076 using _ValuePreserving = _From; 1077 1078 template <typename _From, typename _To, 1079 typename _DecayedFrom = __remove_cvref_t<_From>, 1080 typename = enable_if_t<conjunction< 1081 is_convertible<_From, _To>, 1082 disjunction< 1083 is_same<_DecayedFrom, _To>, is_same<_DecayedFrom, int>, 1084 conjunction<is_same<_DecayedFrom, _UInt>, is_unsigned<_To>>, 1085 negation<__is_narrowing_conversion<_DecayedFrom, _To>>>>::value>> 1086 using _ValuePreservingOrInt = _From; 1087 1088 // }}} 1089 // __intrinsic_type {{{ 1090 template <typename _Tp, size_t _Bytes, typename = void_t<>> 1091 struct __intrinsic_type; 1092 1093 template <typename _Tp, size_t _Size> 1094 using __intrinsic_type_t = 1095 typename __intrinsic_type<_Tp, _Size * sizeof(_Tp)>::type; 1096 1097 template <typename _Tp> 1098 using __intrinsic_type2_t = typename __intrinsic_type<_Tp, 2>::type; 1099 template <typename _Tp> 1100 using __intrinsic_type4_t = typename __intrinsic_type<_Tp, 4>::type; 1101 template <typename _Tp> 1102 using __intrinsic_type8_t = typename __intrinsic_type<_Tp, 8>::type; 1103 template <typename _Tp> 1104 using __intrinsic_type16_t = typename __intrinsic_type<_Tp, 16>::type; 1105 template <typename _Tp> 1106 using __intrinsic_type32_t = typename __intrinsic_type<_Tp, 32>::type; 1107 template <typename _Tp> 1108 using __intrinsic_type64_t = typename __intrinsic_type<_Tp, 64>::type; 1109 1110 // }}} 1111 // _BitMask {{{ 1112 template <size_t _Np, bool _Sanitized = false> 1113 struct _BitMask; 1114 1115 template <size_t _Np, bool _Sanitized> 1116 struct __is_bitmask<_BitMask<_Np, _Sanitized>, void> : true_type {}; 1117 1118 template <size_t _Np> 1119 using _SanitizedBitMask = _BitMask<_Np, true>; 1120 1121 template <size_t _Np, bool _Sanitized> 1122 struct _BitMask 1123 { 1124 static_assert(_Np > 0); 1125 1126 static constexpr size_t _NBytes = __div_roundup(_Np, __CHAR_BIT__); 1127 1128 using _Tp = conditional_t<_Np == 1, bool, 1129 make_unsigned_t<__int_with_sizeof_t<std::min( 1130 sizeof(_ULLong), std::__bit_ceil(_NBytes))>>>; 1131 1132 static constexpr int _S_array_size = __div_roundup(_NBytes, sizeof(_Tp)); 1133 1134 _Tp _M_bits[_S_array_size]; 1135 1136 static constexpr int _S_unused_bits 1137 = _Np == 1 ? 0 : _S_array_size * sizeof(_Tp) * __CHAR_BIT__ - _Np; 1138 1139 static constexpr _Tp _S_bitmask = +_Tp(~_Tp()) >> _S_unused_bits; 1140 1141 constexpr _BitMask() noexcept = default; 1142 1143 constexpr _BitMask(unsigned long long __x) noexcept 1144 : _M_bits{static_cast<_Tp>(__x)} {} 1145 1146 _BitMask(bitset<_Np> __x) noexcept : _BitMask(__x.to_ullong()) {} 1147 1148 constexpr _BitMask(const _BitMask&) noexcept = default; 1149 1150 template <bool _RhsSanitized, typename = enable_if_t<_RhsSanitized == false 1151 && _Sanitized == true>> 1152 constexpr _BitMask(const _BitMask<_Np, _RhsSanitized>& __rhs) noexcept 1153 : _BitMask(__rhs._M_sanitized()) {} 1154 1155 constexpr operator _SimdWrapper<bool, _Np>() const noexcept 1156 { 1157 static_assert(_S_array_size == 1); 1158 return _M_bits[0]; 1159 } 1160 1161 // precondition: is sanitized 1162 constexpr _Tp 1163 _M_to_bits() const noexcept 1164 { 1165 static_assert(_S_array_size == 1); 1166 return _M_bits[0]; 1167 } 1168 1169 // precondition: is sanitized 1170 constexpr unsigned long long 1171 to_ullong() const noexcept 1172 { 1173 static_assert(_S_array_size == 1); 1174 return _M_bits[0]; 1175 } 1176 1177 // precondition: is sanitized 1178 constexpr unsigned long 1179 to_ulong() const noexcept 1180 { 1181 static_assert(_S_array_size == 1); 1182 return _M_bits[0]; 1183 } 1184 1185 constexpr bitset<_Np> 1186 _M_to_bitset() const noexcept 1187 { 1188 static_assert(_S_array_size == 1); 1189 return _M_bits[0]; 1190 } 1191 1192 constexpr decltype(auto) 1193 _M_sanitized() const noexcept 1194 { 1195 if constexpr (_Sanitized) 1196 return *this; 1197 else if constexpr (_Np == 1) 1198 return _SanitizedBitMask<_Np>(_M_bits[0]); 1199 else 1200 { 1201 _SanitizedBitMask<_Np> __r = {}; 1202 for (int __i = 0; __i < _S_array_size; ++__i) 1203 __r._M_bits[__i] = _M_bits[__i]; 1204 if constexpr (_S_unused_bits > 0) 1205 __r._M_bits[_S_array_size - 1] &= _S_bitmask; 1206 return __r; 1207 } 1208 } 1209 1210 template <size_t _Mp, bool _LSanitized> 1211 constexpr _BitMask<_Np + _Mp, _Sanitized> 1212 _M_prepend(_BitMask<_Mp, _LSanitized> __lsb) const noexcept 1213 { 1214 constexpr size_t _RN = _Np + _Mp; 1215 using _Rp = _BitMask<_RN, _Sanitized>; 1216 if constexpr (_Rp::_S_array_size == 1) 1217 { 1218 _Rp __r{{_M_bits[0]}}; 1219 __r._M_bits[0] <<= _Mp; 1220 __r._M_bits[0] |= __lsb._M_sanitized()._M_bits[0]; 1221 return __r; 1222 } 1223 else 1224 __assert_unreachable<_Rp>(); 1225 } 1226 1227 // Return a new _BitMask with size _NewSize while dropping _DropLsb least 1228 // significant bits. If the operation implicitly produces a sanitized bitmask, 1229 // the result type will have _Sanitized set. 1230 template <size_t _DropLsb, size_t _NewSize = _Np - _DropLsb> 1231 constexpr auto 1232 _M_extract() const noexcept 1233 { 1234 static_assert(_Np > _DropLsb); 1235 static_assert(_DropLsb + _NewSize <= sizeof(_ULLong) * __CHAR_BIT__, 1236 "not implemented for bitmasks larger than one ullong"); 1237 if constexpr (_NewSize == 1) 1238 // must sanitize because the return _Tp is bool 1239 return _SanitizedBitMask<1>(_M_bits[0] & (_Tp(1) << _DropLsb)); 1240 else 1241 return _BitMask<_NewSize, 1242 ((_NewSize + _DropLsb == sizeof(_Tp) * __CHAR_BIT__ 1243 && _NewSize + _DropLsb <= _Np) 1244 || ((_Sanitized || _Np == sizeof(_Tp) * __CHAR_BIT__) 1245 && _NewSize + _DropLsb >= _Np))>(_M_bits[0] 1246 >> _DropLsb); 1247 } 1248 1249 // True if all bits are set. Implicitly sanitizes if _Sanitized == false. 1250 constexpr bool 1251 all() const noexcept 1252 { 1253 if constexpr (_Np == 1) 1254 return _M_bits[0]; 1255 else if constexpr (!_Sanitized) 1256 return _M_sanitized().all(); 1257 else 1258 { 1259 constexpr _Tp __allbits = ~_Tp(); 1260 for (int __i = 0; __i < _S_array_size - 1; ++__i) 1261 if (_M_bits[__i] != __allbits) 1262 return false; 1263 return _M_bits[_S_array_size - 1] == _S_bitmask; 1264 } 1265 } 1266 1267 // True if at least one bit is set. Implicitly sanitizes if _Sanitized == 1268 // false. 1269 constexpr bool 1270 any() const noexcept 1271 { 1272 if constexpr (_Np == 1) 1273 return _M_bits[0]; 1274 else if constexpr (!_Sanitized) 1275 return _M_sanitized().any(); 1276 else 1277 { 1278 for (int __i = 0; __i < _S_array_size - 1; ++__i) 1279 if (_M_bits[__i] != 0) 1280 return true; 1281 return _M_bits[_S_array_size - 1] != 0; 1282 } 1283 } 1284 1285 // True if no bit is set. Implicitly sanitizes if _Sanitized == false. 1286 constexpr bool 1287 none() const noexcept 1288 { 1289 if constexpr (_Np == 1) 1290 return !_M_bits[0]; 1291 else if constexpr (!_Sanitized) 1292 return _M_sanitized().none(); 1293 else 1294 { 1295 for (int __i = 0; __i < _S_array_size - 1; ++__i) 1296 if (_M_bits[__i] != 0) 1297 return false; 1298 return _M_bits[_S_array_size - 1] == 0; 1299 } 1300 } 1301 1302 // Returns the number of set bits. Implicitly sanitizes if _Sanitized == 1303 // false. 1304 constexpr int 1305 count() const noexcept 1306 { 1307 if constexpr (_Np == 1) 1308 return _M_bits[0]; 1309 else if constexpr (!_Sanitized) 1310 return _M_sanitized().none(); 1311 else 1312 { 1313 int __result = __builtin_popcountll(_M_bits[0]); 1314 for (int __i = 1; __i < _S_array_size; ++__i) 1315 __result += __builtin_popcountll(_M_bits[__i]); 1316 return __result; 1317 } 1318 } 1319 1320 // Returns the bit at offset __i as bool. 1321 constexpr bool 1322 operator[](size_t __i) const noexcept 1323 { 1324 if constexpr (_Np == 1) 1325 return _M_bits[0]; 1326 else if constexpr (_S_array_size == 1) 1327 return (_M_bits[0] >> __i) & 1; 1328 else 1329 { 1330 const size_t __j = __i / (sizeof(_Tp) * __CHAR_BIT__); 1331 const size_t __shift = __i % (sizeof(_Tp) * __CHAR_BIT__); 1332 return (_M_bits[__j] >> __shift) & 1; 1333 } 1334 } 1335 1336 template <size_t __i> 1337 constexpr bool 1338 operator[](_SizeConstant<__i>) const noexcept 1339 { 1340 static_assert(__i < _Np); 1341 constexpr size_t __j = __i / (sizeof(_Tp) * __CHAR_BIT__); 1342 constexpr size_t __shift = __i % (sizeof(_Tp) * __CHAR_BIT__); 1343 return static_cast<bool>(_M_bits[__j] & (_Tp(1) << __shift)); 1344 } 1345 1346 // Set the bit at offset __i to __x. 1347 constexpr void 1348 set(size_t __i, bool __x) noexcept 1349 { 1350 if constexpr (_Np == 1) 1351 _M_bits[0] = __x; 1352 else if constexpr (_S_array_size == 1) 1353 { 1354 _M_bits[0] &= ~_Tp(_Tp(1) << __i); 1355 _M_bits[0] |= _Tp(_Tp(__x) << __i); 1356 } 1357 else 1358 { 1359 const size_t __j = __i / (sizeof(_Tp) * __CHAR_BIT__); 1360 const size_t __shift = __i % (sizeof(_Tp) * __CHAR_BIT__); 1361 _M_bits[__j] &= ~_Tp(_Tp(1) << __shift); 1362 _M_bits[__j] |= _Tp(_Tp(__x) << __shift); 1363 } 1364 } 1365 1366 template <size_t __i> 1367 constexpr void 1368 set(_SizeConstant<__i>, bool __x) noexcept 1369 { 1370 static_assert(__i < _Np); 1371 if constexpr (_Np == 1) 1372 _M_bits[0] = __x; 1373 else 1374 { 1375 constexpr size_t __j = __i / (sizeof(_Tp) * __CHAR_BIT__); 1376 constexpr size_t __shift = __i % (sizeof(_Tp) * __CHAR_BIT__); 1377 constexpr _Tp __mask = ~_Tp(_Tp(1) << __shift); 1378 _M_bits[__j] &= __mask; 1379 _M_bits[__j] |= _Tp(_Tp(__x) << __shift); 1380 } 1381 } 1382 1383 // Inverts all bits. Sanitized input leads to sanitized output. 1384 constexpr _BitMask 1385 operator~() const noexcept 1386 { 1387 if constexpr (_Np == 1) 1388 return !_M_bits[0]; 1389 else 1390 { 1391 _BitMask __result{}; 1392 for (int __i = 0; __i < _S_array_size - 1; ++__i) 1393 __result._M_bits[__i] = ~_M_bits[__i]; 1394 if constexpr (_Sanitized) 1395 __result._M_bits[_S_array_size - 1] 1396 = _M_bits[_S_array_size - 1] ^ _S_bitmask; 1397 else 1398 __result._M_bits[_S_array_size - 1] = ~_M_bits[_S_array_size - 1]; 1399 return __result; 1400 } 1401 } 1402 1403 constexpr _BitMask& 1404 operator^=(const _BitMask& __b) & noexcept 1405 { 1406 __execute_n_times<_S_array_size>( 1407 [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { _M_bits[__i] ^= __b._M_bits[__i]; }); 1408 return *this; 1409 } 1410 1411 constexpr _BitMask& 1412 operator|=(const _BitMask& __b) & noexcept 1413 { 1414 __execute_n_times<_S_array_size>( 1415 [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { _M_bits[__i] |= __b._M_bits[__i]; }); 1416 return *this; 1417 } 1418 1419 constexpr _BitMask& 1420 operator&=(const _BitMask& __b) & noexcept 1421 { 1422 __execute_n_times<_S_array_size>( 1423 [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { _M_bits[__i] &= __b._M_bits[__i]; }); 1424 return *this; 1425 } 1426 1427 friend constexpr _BitMask 1428 operator^(const _BitMask& __a, const _BitMask& __b) noexcept 1429 { 1430 _BitMask __r = __a; 1431 __r ^= __b; 1432 return __r; 1433 } 1434 1435 friend constexpr _BitMask 1436 operator|(const _BitMask& __a, const _BitMask& __b) noexcept 1437 { 1438 _BitMask __r = __a; 1439 __r |= __b; 1440 return __r; 1441 } 1442 1443 friend constexpr _BitMask 1444 operator&(const _BitMask& __a, const _BitMask& __b) noexcept 1445 { 1446 _BitMask __r = __a; 1447 __r &= __b; 1448 return __r; 1449 } 1450 1451 _GLIBCXX_SIMD_INTRINSIC 1452 constexpr bool 1453 _M_is_constprop() const 1454 { 1455 if constexpr (_S_array_size == 0) 1456 return __builtin_constant_p(_M_bits[0]); 1457 else 1458 { 1459 for (int __i = 0; __i < _S_array_size; ++__i) 1460 if (!__builtin_constant_p(_M_bits[__i])) 1461 return false; 1462 return true; 1463 } 1464 } 1465 }; 1466 1467 // }}} 1468 1469 // vvv ---- builtin vector types [[gnu::vector_size(N)]] and operations ---- vvv 1470 // __min_vector_size {{{ 1471 template <typename _Tp = void> 1472 static inline constexpr int __min_vector_size = 2 * sizeof(_Tp); 1473 1474 #if _GLIBCXX_SIMD_HAVE_NEON 1475 template <> 1476 inline constexpr int __min_vector_size<void> = 8; 1477 #else 1478 template <> 1479 inline constexpr int __min_vector_size<void> = 16; 1480 #endif 1481 1482 // }}} 1483 // __vector_type {{{ 1484 template <typename _Tp, size_t _Np, typename = void> 1485 struct __vector_type_n {}; 1486 1487 // substition failure for 0-element case 1488 template <typename _Tp> 1489 struct __vector_type_n<_Tp, 0, void> {}; 1490 1491 // special case 1-element to be _Tp itself 1492 template <typename _Tp> 1493 struct __vector_type_n<_Tp, 1, enable_if_t<__is_vectorizable_v<_Tp>>> 1494 { using type = _Tp; }; 1495 1496 // else, use GNU-style builtin vector types 1497 template <typename _Tp, size_t _Np> 1498 struct __vector_type_n<_Tp, _Np, enable_if_t<__is_vectorizable_v<_Tp> && _Np >= 2>> 1499 { 1500 static constexpr size_t _S_Np2 = std::__bit_ceil(_Np * sizeof(_Tp)); 1501 1502 static constexpr size_t _S_Bytes = 1503 #ifdef __i386__ 1504 // Using [[gnu::vector_size(8)]] would wreak havoc on the FPU because 1505 // those objects are passed via MMX registers and nothing ever calls EMMS. 1506 _S_Np2 == 8 ? 16 : 1507 #endif 1508 _S_Np2 < __min_vector_size<_Tp> ? __min_vector_size<_Tp> 1509 : _S_Np2; 1510 1511 using type [[__gnu__::__vector_size__(_S_Bytes)]] = _Tp; 1512 }; 1513 1514 template <typename _Tp, size_t _Bytes, size_t = _Bytes % sizeof(_Tp)> 1515 struct __vector_type; 1516 1517 template <typename _Tp, size_t _Bytes> 1518 struct __vector_type<_Tp, _Bytes, 0> 1519 : __vector_type_n<_Tp, _Bytes / sizeof(_Tp)> {}; 1520 1521 template <typename _Tp, size_t _Size> 1522 using __vector_type_t = typename __vector_type_n<_Tp, _Size>::type; 1523 1524 template <typename _Tp> 1525 using __vector_type2_t = typename __vector_type<_Tp, 2>::type; 1526 template <typename _Tp> 1527 using __vector_type4_t = typename __vector_type<_Tp, 4>::type; 1528 template <typename _Tp> 1529 using __vector_type8_t = typename __vector_type<_Tp, 8>::type; 1530 template <typename _Tp> 1531 using __vector_type16_t = typename __vector_type<_Tp, 16>::type; 1532 template <typename _Tp> 1533 using __vector_type32_t = typename __vector_type<_Tp, 32>::type; 1534 template <typename _Tp> 1535 using __vector_type64_t = typename __vector_type<_Tp, 64>::type; 1536 1537 // }}} 1538 // __is_vector_type {{{ 1539 template <typename _Tp, typename = void_t<>> 1540 struct __is_vector_type : false_type {}; 1541 1542 template <typename _Tp> 1543 struct __is_vector_type< 1544 _Tp, void_t<typename __vector_type< 1545 remove_reference_t<decltype(declval<_Tp>()[0])>, sizeof(_Tp)>::type>> 1546 : is_same<_Tp, typename __vector_type< 1547 remove_reference_t<decltype(declval<_Tp>()[0])>, 1548 sizeof(_Tp)>::type> {}; 1549 1550 template <typename _Tp> 1551 inline constexpr bool __is_vector_type_v = __is_vector_type<_Tp>::value; 1552 1553 // }}} 1554 // __is_intrinsic_type {{{ 1555 #if _GLIBCXX_SIMD_HAVE_SSE_ABI 1556 template <typename _Tp> 1557 using __is_intrinsic_type = __is_vector_type<_Tp>; 1558 #else // not SSE (x86) 1559 template <typename _Tp, typename = void_t<>> 1560 struct __is_intrinsic_type : false_type {}; 1561 1562 template <typename _Tp> 1563 struct __is_intrinsic_type< 1564 _Tp, void_t<typename __intrinsic_type< 1565 remove_reference_t<decltype(declval<_Tp>()[0])>, sizeof(_Tp)>::type>> 1566 : is_same<_Tp, typename __intrinsic_type< 1567 remove_reference_t<decltype(declval<_Tp>()[0])>, 1568 sizeof(_Tp)>::type> {}; 1569 #endif 1570 1571 template <typename _Tp> 1572 inline constexpr bool __is_intrinsic_type_v = __is_intrinsic_type<_Tp>::value; 1573 1574 // }}} 1575 // _VectorTraits{{{ 1576 template <typename _Tp, typename = void_t<>> 1577 struct _VectorTraitsImpl; 1578 1579 template <typename _Tp> 1580 struct _VectorTraitsImpl<_Tp, enable_if_t<__is_vector_type_v<_Tp> 1581 || __is_intrinsic_type_v<_Tp>>> 1582 { 1583 using type = _Tp; 1584 using value_type = remove_reference_t<decltype(declval<_Tp>()[0])>; 1585 static constexpr int _S_full_size = sizeof(_Tp) / sizeof(value_type); 1586 using _Wrapper = _SimdWrapper<value_type, _S_full_size>; 1587 template <typename _Up, int _W = _S_full_size> 1588 static constexpr bool _S_is 1589 = is_same_v<value_type, _Up> && _W == _S_full_size; 1590 }; 1591 1592 template <typename _Tp, size_t _Np> 1593 struct _VectorTraitsImpl<_SimdWrapper<_Tp, _Np>, 1594 void_t<__vector_type_t<_Tp, _Np>>> 1595 { 1596 using type = __vector_type_t<_Tp, _Np>; 1597 using value_type = _Tp; 1598 static constexpr int _S_full_size = sizeof(type) / sizeof(value_type); 1599 using _Wrapper = _SimdWrapper<_Tp, _Np>; 1600 static constexpr bool _S_is_partial = (_Np == _S_full_size); 1601 static constexpr int _S_partial_width = _Np; 1602 template <typename _Up, int _W = _S_full_size> 1603 static constexpr bool _S_is 1604 = is_same_v<value_type, _Up>&& _W == _S_full_size; 1605 }; 1606 1607 template <typename _Tp, typename = typename _VectorTraitsImpl<_Tp>::type> 1608 using _VectorTraits = _VectorTraitsImpl<_Tp>; 1609 1610 // }}} 1611 // __as_vector{{{ 1612 template <typename _V> 1613 _GLIBCXX_SIMD_INTRINSIC constexpr auto 1614 __as_vector(_V __x) 1615 { 1616 if constexpr (__is_vector_type_v<_V>) 1617 return __x; 1618 else if constexpr (is_simd<_V>::value || is_simd_mask<_V>::value) 1619 return __data(__x)._M_data; 1620 else if constexpr (__is_vectorizable_v<_V>) 1621 return __vector_type_t<_V, 2>{__x}; 1622 else 1623 return __x._M_data; 1624 } 1625 1626 // }}} 1627 // __as_wrapper{{{ 1628 template <size_t _Np = 0, typename _V> 1629 _GLIBCXX_SIMD_INTRINSIC constexpr auto 1630 __as_wrapper(_V __x) 1631 { 1632 if constexpr (__is_vector_type_v<_V>) 1633 return _SimdWrapper<typename _VectorTraits<_V>::value_type, 1634 (_Np > 0 ? _Np : _VectorTraits<_V>::_S_full_size)>(__x); 1635 else if constexpr (is_simd<_V>::value || is_simd_mask<_V>::value) 1636 { 1637 static_assert(_V::size() == _Np); 1638 return __data(__x); 1639 } 1640 else 1641 { 1642 static_assert(_V::_S_size == _Np); 1643 return __x; 1644 } 1645 } 1646 1647 // }}} 1648 // __intrin_bitcast{{{ 1649 template <typename _To, typename _From> 1650 _GLIBCXX_SIMD_INTRINSIC constexpr _To 1651 __intrin_bitcast(_From __v) 1652 { 1653 static_assert((__is_vector_type_v<_From> || __is_intrinsic_type_v<_From>) 1654 && (__is_vector_type_v<_To> || __is_intrinsic_type_v<_To>)); 1655 if constexpr (sizeof(_To) == sizeof(_From)) 1656 return reinterpret_cast<_To>(__v); 1657 else if constexpr (sizeof(_From) > sizeof(_To)) 1658 if constexpr (sizeof(_To) >= 16) 1659 return reinterpret_cast<const __may_alias<_To>&>(__v); 1660 else 1661 { 1662 _To __r; 1663 __builtin_memcpy(&__r, &__v, sizeof(_To)); 1664 return __r; 1665 } 1666 #if _GLIBCXX_SIMD_X86INTRIN && !defined __clang__ 1667 else if constexpr (__have_avx && sizeof(_From) == 16 && sizeof(_To) == 32) 1668 return reinterpret_cast<_To>(__builtin_ia32_ps256_ps( 1669 reinterpret_cast<__vector_type_t<float, 4>>(__v))); 1670 else if constexpr (__have_avx512f && sizeof(_From) == 16 1671 && sizeof(_To) == 64) 1672 return reinterpret_cast<_To>(__builtin_ia32_ps512_ps( 1673 reinterpret_cast<__vector_type_t<float, 4>>(__v))); 1674 else if constexpr (__have_avx512f && sizeof(_From) == 32 1675 && sizeof(_To) == 64) 1676 return reinterpret_cast<_To>(__builtin_ia32_ps512_256ps( 1677 reinterpret_cast<__vector_type_t<float, 8>>(__v))); 1678 #endif // _GLIBCXX_SIMD_X86INTRIN 1679 else if constexpr (sizeof(__v) <= 8) 1680 return reinterpret_cast<_To>( 1681 __vector_type_t<__int_for_sizeof_t<_From>, sizeof(_To) / sizeof(_From)>{ 1682 reinterpret_cast<__int_for_sizeof_t<_From>>(__v)}); 1683 else 1684 { 1685 static_assert(sizeof(_To) > sizeof(_From)); 1686 _To __r = {}; 1687 __builtin_memcpy(&__r, &__v, sizeof(_From)); 1688 return __r; 1689 } 1690 } 1691 1692 // }}} 1693 // __vector_bitcast{{{ 1694 template <typename _To, size_t _NN = 0, typename _From, 1695 typename _FromVT = _VectorTraits<_From>, 1696 size_t _Np = _NN == 0 ? sizeof(_From) / sizeof(_To) : _NN> 1697 _GLIBCXX_SIMD_INTRINSIC constexpr __vector_type_t<_To, _Np> 1698 __vector_bitcast(_From __x) 1699 { 1700 using _R = __vector_type_t<_To, _Np>; 1701 return __intrin_bitcast<_R>(__x); 1702 } 1703 1704 template <typename _To, size_t _NN = 0, typename _Tp, size_t _Nx, 1705 size_t _Np 1706 = _NN == 0 ? sizeof(_SimdWrapper<_Tp, _Nx>) / sizeof(_To) : _NN> 1707 _GLIBCXX_SIMD_INTRINSIC constexpr __vector_type_t<_To, _Np> 1708 __vector_bitcast(const _SimdWrapper<_Tp, _Nx>& __x) 1709 { 1710 static_assert(_Np > 1); 1711 return __intrin_bitcast<__vector_type_t<_To, _Np>>(__x._M_data); 1712 } 1713 1714 // }}} 1715 // __convert_x86 declarations {{{ 1716 #ifdef _GLIBCXX_SIMD_WORKAROUND_PR85048 1717 template <typename _To, typename _Tp, typename _TVT = _VectorTraits<_Tp>> 1718 _To __convert_x86(_Tp); 1719 1720 template <typename _To, typename _Tp, typename _TVT = _VectorTraits<_Tp>> 1721 _To __convert_x86(_Tp, _Tp); 1722 1723 template <typename _To, typename _Tp, typename _TVT = _VectorTraits<_Tp>> 1724 _To __convert_x86(_Tp, _Tp, _Tp, _Tp); 1725 1726 template <typename _To, typename _Tp, typename _TVT = _VectorTraits<_Tp>> 1727 _To __convert_x86(_Tp, _Tp, _Tp, _Tp, _Tp, _Tp, _Tp, _Tp); 1728 1729 template <typename _To, typename _Tp, typename _TVT = _VectorTraits<_Tp>> 1730 _To __convert_x86(_Tp, _Tp, _Tp, _Tp, _Tp, _Tp, _Tp, _Tp, _Tp, _Tp, _Tp, _Tp, 1731 _Tp, _Tp, _Tp, _Tp); 1732 #endif // _GLIBCXX_SIMD_WORKAROUND_PR85048 1733 1734 //}}} 1735 // __bit_cast {{{ 1736 template <typename _To, typename _From> 1737 _GLIBCXX_SIMD_INTRINSIC constexpr _To 1738 __bit_cast(const _From __x) 1739 { 1740 #if __has_builtin(__builtin_bit_cast) 1741 return __builtin_bit_cast(_To, __x); 1742 #else 1743 static_assert(sizeof(_To) == sizeof(_From)); 1744 constexpr bool __to_is_vectorizable 1745 = is_arithmetic_v<_To> || is_enum_v<_To>; 1746 constexpr bool __from_is_vectorizable 1747 = is_arithmetic_v<_From> || is_enum_v<_From>; 1748 if constexpr (__is_vector_type_v<_To> && __is_vector_type_v<_From>) 1749 return reinterpret_cast<_To>(__x); 1750 else if constexpr (__is_vector_type_v<_To> && __from_is_vectorizable) 1751 { 1752 using _FV [[__gnu__::__vector_size__(sizeof(_From))]] = _From; 1753 return reinterpret_cast<_To>(_FV{__x}); 1754 } 1755 else if constexpr (__to_is_vectorizable && __from_is_vectorizable) 1756 { 1757 using _TV [[__gnu__::__vector_size__(sizeof(_To))]] = _To; 1758 using _FV [[__gnu__::__vector_size__(sizeof(_From))]] = _From; 1759 return reinterpret_cast<_TV>(_FV{__x})[0]; 1760 } 1761 else if constexpr (__to_is_vectorizable && __is_vector_type_v<_From>) 1762 { 1763 using _TV [[__gnu__::__vector_size__(sizeof(_To))]] = _To; 1764 return reinterpret_cast<_TV>(__x)[0]; 1765 } 1766 else 1767 { 1768 _To __r; 1769 __builtin_memcpy(reinterpret_cast<char*>(&__r), 1770 reinterpret_cast<const char*>(&__x), sizeof(_To)); 1771 return __r; 1772 } 1773 #endif 1774 } 1775 1776 // }}} 1777 // __to_intrin {{{ 1778 template <typename _Tp, typename _TVT = _VectorTraits<_Tp>, 1779 typename _R = __intrinsic_type_t<typename _TVT::value_type, _TVT::_S_full_size>> 1780 _GLIBCXX_SIMD_INTRINSIC constexpr _R 1781 __to_intrin(_Tp __x) 1782 { 1783 static_assert(sizeof(__x) <= sizeof(_R), 1784 "__to_intrin may never drop values off the end"); 1785 if constexpr (sizeof(__x) == sizeof(_R)) 1786 return reinterpret_cast<_R>(__as_vector(__x)); 1787 else 1788 { 1789 using _Up = __int_for_sizeof_t<_Tp>; 1790 return reinterpret_cast<_R>( 1791 __vector_type_t<_Up, sizeof(_R) / sizeof(_Up)>{__bit_cast<_Up>(__x)}); 1792 } 1793 } 1794 1795 // }}} 1796 // __make_vector{{{ 1797 template <typename _Tp, typename... _Args> 1798 _GLIBCXX_SIMD_INTRINSIC constexpr __vector_type_t<_Tp, sizeof...(_Args)> 1799 __make_vector(const _Args&... __args) 1800 { return __vector_type_t<_Tp, sizeof...(_Args)>{static_cast<_Tp>(__args)...}; } 1801 1802 // }}} 1803 // __vector_broadcast{{{ 1804 template <size_t _Np, typename _Tp, size_t... _I> 1805 _GLIBCXX_SIMD_INTRINSIC constexpr __vector_type_t<_Tp, _Np> 1806 __vector_broadcast_impl(_Tp __x, index_sequence<_I...>) 1807 { return __vector_type_t<_Tp, _Np>{((void)_I, __x)...}; } 1808 1809 template <size_t _Np, typename _Tp> 1810 _GLIBCXX_SIMD_INTRINSIC constexpr __vector_type_t<_Tp, _Np> 1811 __vector_broadcast(_Tp __x) 1812 { return __vector_broadcast_impl<_Np, _Tp>(__x, make_index_sequence<_Np>()); } 1813 1814 // }}} 1815 // __generate_vector{{{ 1816 template <typename _Tp, size_t _Np, typename _Gp, size_t... _I> 1817 _GLIBCXX_SIMD_INTRINSIC constexpr __vector_type_t<_Tp, _Np> 1818 __generate_vector_impl(_Gp&& __gen, index_sequence<_I...>) 1819 { return __vector_type_t<_Tp, _Np>{ static_cast<_Tp>(__gen(_SizeConstant<_I>()))...}; } 1820 1821 template <typename _V, typename _VVT = _VectorTraits<_V>, typename _Gp> 1822 _GLIBCXX_SIMD_INTRINSIC constexpr _V 1823 __generate_vector(_Gp&& __gen) 1824 { 1825 if constexpr (__is_vector_type_v<_V>) 1826 return __generate_vector_impl<typename _VVT::value_type, 1827 _VVT::_S_full_size>( 1828 static_cast<_Gp&&>(__gen), make_index_sequence<_VVT::_S_full_size>()); 1829 else 1830 return __generate_vector_impl<typename _VVT::value_type, 1831 _VVT::_S_partial_width>( 1832 static_cast<_Gp&&>(__gen), 1833 make_index_sequence<_VVT::_S_partial_width>()); 1834 } 1835 1836 template <typename _Tp, size_t _Np, typename _Gp> 1837 _GLIBCXX_SIMD_INTRINSIC constexpr __vector_type_t<_Tp, _Np> 1838 __generate_vector(_Gp&& __gen) 1839 { 1840 return __generate_vector_impl<_Tp, _Np>(static_cast<_Gp&&>(__gen), 1841 make_index_sequence<_Np>()); 1842 } 1843 1844 // }}} 1845 // __xor{{{ 1846 template <typename _TW> 1847 _GLIBCXX_SIMD_INTRINSIC constexpr _TW 1848 __xor(_TW __a, _TW __b) noexcept 1849 { 1850 if constexpr (__is_vector_type_v<_TW> || __is_simd_wrapper_v<_TW>) 1851 { 1852 using _Tp = typename conditional_t<__is_simd_wrapper_v<_TW>, _TW, 1853 _VectorTraitsImpl<_TW>>::value_type; 1854 if constexpr (is_floating_point_v<_Tp>) 1855 { 1856 using _Ip = make_unsigned_t<__int_for_sizeof_t<_Tp>>; 1857 return __vector_bitcast<_Tp>(__vector_bitcast<_Ip>(__a) 1858 ^ __vector_bitcast<_Ip>(__b)); 1859 } 1860 else if constexpr (__is_vector_type_v<_TW>) 1861 return __a ^ __b; 1862 else 1863 return __a._M_data ^ __b._M_data; 1864 } 1865 else 1866 return __a ^ __b; 1867 } 1868 1869 // }}} 1870 // __or{{{ 1871 template <typename _TW> 1872 _GLIBCXX_SIMD_INTRINSIC constexpr _TW 1873 __or(_TW __a, _TW __b) noexcept 1874 { 1875 if constexpr (__is_vector_type_v<_TW> || __is_simd_wrapper_v<_TW>) 1876 { 1877 using _Tp = typename conditional_t<__is_simd_wrapper_v<_TW>, _TW, 1878 _VectorTraitsImpl<_TW>>::value_type; 1879 if constexpr (is_floating_point_v<_Tp>) 1880 { 1881 using _Ip = make_unsigned_t<__int_for_sizeof_t<_Tp>>; 1882 return __vector_bitcast<_Tp>(__vector_bitcast<_Ip>(__a) 1883 | __vector_bitcast<_Ip>(__b)); 1884 } 1885 else if constexpr (__is_vector_type_v<_TW>) 1886 return __a | __b; 1887 else 1888 return __a._M_data | __b._M_data; 1889 } 1890 else 1891 return __a | __b; 1892 } 1893 1894 // }}} 1895 // __and{{{ 1896 template <typename _TW> 1897 _GLIBCXX_SIMD_INTRINSIC constexpr _TW 1898 __and(_TW __a, _TW __b) noexcept 1899 { 1900 if constexpr (__is_vector_type_v<_TW> || __is_simd_wrapper_v<_TW>) 1901 { 1902 using _Tp = typename conditional_t<__is_simd_wrapper_v<_TW>, _TW, 1903 _VectorTraitsImpl<_TW>>::value_type; 1904 if constexpr (is_floating_point_v<_Tp>) 1905 { 1906 using _Ip = make_unsigned_t<__int_for_sizeof_t<_Tp>>; 1907 return __vector_bitcast<_Tp>(__vector_bitcast<_Ip>(__a) 1908 & __vector_bitcast<_Ip>(__b)); 1909 } 1910 else if constexpr (__is_vector_type_v<_TW>) 1911 return __a & __b; 1912 else 1913 return __a._M_data & __b._M_data; 1914 } 1915 else 1916 return __a & __b; 1917 } 1918 1919 // }}} 1920 // __andnot{{{ 1921 #if _GLIBCXX_SIMD_X86INTRIN && !defined __clang__ 1922 static constexpr struct 1923 { 1924 _GLIBCXX_SIMD_INTRINSIC __v4sf 1925 operator()(__v4sf __a, __v4sf __b) const noexcept 1926 { return __builtin_ia32_andnps(__a, __b); } 1927 1928 _GLIBCXX_SIMD_INTRINSIC __v2df 1929 operator()(__v2df __a, __v2df __b) const noexcept 1930 { return __builtin_ia32_andnpd(__a, __b); } 1931 1932 _GLIBCXX_SIMD_INTRINSIC __v2di 1933 operator()(__v2di __a, __v2di __b) const noexcept 1934 { return __builtin_ia32_pandn128(__a, __b); } 1935 1936 _GLIBCXX_SIMD_INTRINSIC __v8sf 1937 operator()(__v8sf __a, __v8sf __b) const noexcept 1938 { return __builtin_ia32_andnps256(__a, __b); } 1939 1940 _GLIBCXX_SIMD_INTRINSIC __v4df 1941 operator()(__v4df __a, __v4df __b) const noexcept 1942 { return __builtin_ia32_andnpd256(__a, __b); } 1943 1944 _GLIBCXX_SIMD_INTRINSIC __v4di 1945 operator()(__v4di __a, __v4di __b) const noexcept 1946 { 1947 if constexpr (__have_avx2) 1948 return __builtin_ia32_andnotsi256(__a, __b); 1949 else 1950 return reinterpret_cast<__v4di>( 1951 __builtin_ia32_andnpd256(reinterpret_cast<__v4df>(__a), 1952 reinterpret_cast<__v4df>(__b))); 1953 } 1954 1955 _GLIBCXX_SIMD_INTRINSIC __v16sf 1956 operator()(__v16sf __a, __v16sf __b) const noexcept 1957 { 1958 if constexpr (__have_avx512dq) 1959 return _mm512_andnot_ps(__a, __b); 1960 else 1961 return reinterpret_cast<__v16sf>( 1962 _mm512_andnot_si512(reinterpret_cast<__v8di>(__a), 1963 reinterpret_cast<__v8di>(__b))); 1964 } 1965 1966 _GLIBCXX_SIMD_INTRINSIC __v8df 1967 operator()(__v8df __a, __v8df __b) const noexcept 1968 { 1969 if constexpr (__have_avx512dq) 1970 return _mm512_andnot_pd(__a, __b); 1971 else 1972 return reinterpret_cast<__v8df>( 1973 _mm512_andnot_si512(reinterpret_cast<__v8di>(__a), 1974 reinterpret_cast<__v8di>(__b))); 1975 } 1976 1977 _GLIBCXX_SIMD_INTRINSIC __v8di 1978 operator()(__v8di __a, __v8di __b) const noexcept 1979 { return _mm512_andnot_si512(__a, __b); } 1980 } _S_x86_andnot; 1981 #endif // _GLIBCXX_SIMD_X86INTRIN && !__clang__ 1982 1983 template <typename _TW> 1984 _GLIBCXX_SIMD_INTRINSIC constexpr _TW 1985 __andnot(_TW __a, _TW __b) noexcept 1986 { 1987 if constexpr (__is_vector_type_v<_TW> || __is_simd_wrapper_v<_TW>) 1988 { 1989 using _TVT = conditional_t<__is_simd_wrapper_v<_TW>, _TW, 1990 _VectorTraitsImpl<_TW>>; 1991 using _Tp = typename _TVT::value_type; 1992 #if _GLIBCXX_SIMD_X86INTRIN && !defined __clang__ 1993 if constexpr (sizeof(_TW) >= 16) 1994 { 1995 const auto __ai = __to_intrin(__a); 1996 const auto __bi = __to_intrin(__b); 1997 if (!__builtin_is_constant_evaluated() 1998 && !(__builtin_constant_p(__ai) && __builtin_constant_p(__bi))) 1999 { 2000 const auto __r = _S_x86_andnot(__ai, __bi); 2001 if constexpr (is_convertible_v<decltype(__r), _TW>) 2002 return __r; 2003 else 2004 return reinterpret_cast<typename _TVT::type>(__r); 2005 } 2006 } 2007 #endif // _GLIBCXX_SIMD_X86INTRIN 2008 using _Ip = make_unsigned_t<__int_for_sizeof_t<_Tp>>; 2009 return __vector_bitcast<_Tp>(~__vector_bitcast<_Ip>(__a) 2010 & __vector_bitcast<_Ip>(__b)); 2011 } 2012 else 2013 return ~__a & __b; 2014 } 2015 2016 // }}} 2017 // __not{{{ 2018 template <typename _Tp, typename _TVT = _VectorTraits<_Tp>> 2019 _GLIBCXX_SIMD_INTRINSIC constexpr _Tp 2020 __not(_Tp __a) noexcept 2021 { 2022 if constexpr (is_floating_point_v<typename _TVT::value_type>) 2023 return reinterpret_cast<typename _TVT::type>( 2024 ~__vector_bitcast<unsigned>(__a)); 2025 else 2026 return ~__a; 2027 } 2028 2029 // }}} 2030 // __concat{{{ 2031 template <typename _Tp, typename _TVT = _VectorTraits<_Tp>, 2032 typename _R = __vector_type_t<typename _TVT::value_type, _TVT::_S_full_size * 2>> 2033 constexpr _R 2034 __concat(_Tp a_, _Tp b_) 2035 { 2036 #ifdef _GLIBCXX_SIMD_WORKAROUND_XXX_1 2037 using _W 2038 = conditional_t<is_floating_point_v<typename _TVT::value_type>, double, 2039 conditional_t<(sizeof(_Tp) >= 2 * sizeof(long long)), 2040 long long, typename _TVT::value_type>>; 2041 constexpr int input_width = sizeof(_Tp) / sizeof(_W); 2042 const auto __a = __vector_bitcast<_W>(a_); 2043 const auto __b = __vector_bitcast<_W>(b_); 2044 using _Up = __vector_type_t<_W, sizeof(_R) / sizeof(_W)>; 2045 #else 2046 constexpr int input_width = _TVT::_S_full_size; 2047 const _Tp& __a = a_; 2048 const _Tp& __b = b_; 2049 using _Up = _R; 2050 #endif 2051 if constexpr (input_width == 2) 2052 return reinterpret_cast<_R>(_Up{__a[0], __a[1], __b[0], __b[1]}); 2053 else if constexpr (input_width == 4) 2054 return reinterpret_cast<_R>( 2055 _Up{__a[0], __a[1], __a[2], __a[3], __b[0], __b[1], __b[2], __b[3]}); 2056 else if constexpr (input_width == 8) 2057 return reinterpret_cast<_R>( 2058 _Up{__a[0], __a[1], __a[2], __a[3], __a[4], __a[5], __a[6], __a[7], 2059 __b[0], __b[1], __b[2], __b[3], __b[4], __b[5], __b[6], __b[7]}); 2060 else if constexpr (input_width == 16) 2061 return reinterpret_cast<_R>( 2062 _Up{__a[0], __a[1], __a[2], __a[3], __a[4], __a[5], __a[6], 2063 __a[7], __a[8], __a[9], __a[10], __a[11], __a[12], __a[13], 2064 __a[14], __a[15], __b[0], __b[1], __b[2], __b[3], __b[4], 2065 __b[5], __b[6], __b[7], __b[8], __b[9], __b[10], __b[11], 2066 __b[12], __b[13], __b[14], __b[15]}); 2067 else if constexpr (input_width == 32) 2068 return reinterpret_cast<_R>( 2069 _Up{__a[0], __a[1], __a[2], __a[3], __a[4], __a[5], __a[6], 2070 __a[7], __a[8], __a[9], __a[10], __a[11], __a[12], __a[13], 2071 __a[14], __a[15], __a[16], __a[17], __a[18], __a[19], __a[20], 2072 __a[21], __a[22], __a[23], __a[24], __a[25], __a[26], __a[27], 2073 __a[28], __a[29], __a[30], __a[31], __b[0], __b[1], __b[2], 2074 __b[3], __b[4], __b[5], __b[6], __b[7], __b[8], __b[9], 2075 __b[10], __b[11], __b[12], __b[13], __b[14], __b[15], __b[16], 2076 __b[17], __b[18], __b[19], __b[20], __b[21], __b[22], __b[23], 2077 __b[24], __b[25], __b[26], __b[27], __b[28], __b[29], __b[30], 2078 __b[31]}); 2079 } 2080 2081 // }}} 2082 // __zero_extend {{{ 2083 template <typename _Tp, typename _TVT = _VectorTraits<_Tp>> 2084 struct _ZeroExtendProxy 2085 { 2086 using value_type = typename _TVT::value_type; 2087 static constexpr size_t _Np = _TVT::_S_full_size; 2088 const _Tp __x; 2089 2090 template <typename _To, typename _ToVT = _VectorTraits<_To>, 2091 typename 2092 = enable_if_t<is_same_v<typename _ToVT::value_type, value_type>>> 2093 _GLIBCXX_SIMD_INTRINSIC operator _To() const 2094 { 2095 constexpr size_t _ToN = _ToVT::_S_full_size; 2096 if constexpr (_ToN == _Np) 2097 return __x; 2098 else if constexpr (_ToN == 2 * _Np) 2099 { 2100 #ifdef _GLIBCXX_SIMD_WORKAROUND_XXX_3 2101 if constexpr (__have_avx && _TVT::template _S_is<float, 4>) 2102 return __vector_bitcast<value_type>( 2103 _mm256_insertf128_ps(__m256(), __x, 0)); 2104 else if constexpr (__have_avx && _TVT::template _S_is<double, 2>) 2105 return __vector_bitcast<value_type>( 2106 _mm256_insertf128_pd(__m256d(), __x, 0)); 2107 else if constexpr (__have_avx2 && _Np * sizeof(value_type) == 16) 2108 return __vector_bitcast<value_type>( 2109 _mm256_insertf128_si256(__m256i(), __to_intrin(__x), 0)); 2110 else if constexpr (__have_avx512f && _TVT::template _S_is<float, 8>) 2111 { 2112 if constexpr (__have_avx512dq) 2113 return __vector_bitcast<value_type>( 2114 _mm512_insertf32x8(__m512(), __x, 0)); 2115 else 2116 return reinterpret_cast<__m512>( 2117 _mm512_insertf64x4(__m512d(), 2118 reinterpret_cast<__m256d>(__x), 0)); 2119 } 2120 else if constexpr (__have_avx512f 2121 && _TVT::template _S_is<double, 4>) 2122 return __vector_bitcast<value_type>( 2123 _mm512_insertf64x4(__m512d(), __x, 0)); 2124 else if constexpr (__have_avx512f && _Np * sizeof(value_type) == 32) 2125 return __vector_bitcast<value_type>( 2126 _mm512_inserti64x4(__m512i(), __to_intrin(__x), 0)); 2127 #endif 2128 return __concat(__x, _Tp()); 2129 } 2130 else if constexpr (_ToN == 4 * _Np) 2131 { 2132 #ifdef _GLIBCXX_SIMD_WORKAROUND_XXX_3 2133 if constexpr (__have_avx512dq && _TVT::template _S_is<double, 2>) 2134 { 2135 return __vector_bitcast<value_type>( 2136 _mm512_insertf64x2(__m512d(), __x, 0)); 2137 } 2138 else if constexpr (__have_avx512f 2139 && is_floating_point_v<value_type>) 2140 { 2141 return __vector_bitcast<value_type>( 2142 _mm512_insertf32x4(__m512(), reinterpret_cast<__m128>(__x), 2143 0)); 2144 } 2145 else if constexpr (__have_avx512f && _Np * sizeof(value_type) == 16) 2146 { 2147 return __vector_bitcast<value_type>( 2148 _mm512_inserti32x4(__m512i(), __to_intrin(__x), 0)); 2149 } 2150 #endif 2151 return __concat(__concat(__x, _Tp()), 2152 __vector_type_t<value_type, _Np * 2>()); 2153 } 2154 else if constexpr (_ToN == 8 * _Np) 2155 return __concat(operator __vector_type_t<value_type, _Np * 4>(), 2156 __vector_type_t<value_type, _Np * 4>()); 2157 else if constexpr (_ToN == 16 * _Np) 2158 return __concat(operator __vector_type_t<value_type, _Np * 8>(), 2159 __vector_type_t<value_type, _Np * 8>()); 2160 else 2161 __assert_unreachable<_Tp>(); 2162 } 2163 }; 2164 2165 template <typename _Tp, typename _TVT = _VectorTraits<_Tp>> 2166 _GLIBCXX_SIMD_INTRINSIC _ZeroExtendProxy<_Tp, _TVT> 2167 __zero_extend(_Tp __x) 2168 { return {__x}; } 2169 2170 // }}} 2171 // __extract<_Np, By>{{{ 2172 template <int _Offset, 2173 int _SplitBy, 2174 typename _Tp, 2175 typename _TVT = _VectorTraits<_Tp>, 2176 typename _R = __vector_type_t<typename _TVT::value_type, _TVT::_S_full_size / _SplitBy>> 2177 _GLIBCXX_SIMD_INTRINSIC constexpr _R 2178 __extract(_Tp __in) 2179 { 2180 using value_type = typename _TVT::value_type; 2181 #if _GLIBCXX_SIMD_X86INTRIN // {{{ 2182 if constexpr (sizeof(_Tp) == 64 && _SplitBy == 4 && _Offset > 0) 2183 { 2184 if constexpr (__have_avx512dq && is_same_v<double, value_type>) 2185 return _mm512_extractf64x2_pd(__to_intrin(__in), _Offset); 2186 else if constexpr (is_floating_point_v<value_type>) 2187 return __vector_bitcast<value_type>( 2188 _mm512_extractf32x4_ps(__intrin_bitcast<__m512>(__in), _Offset)); 2189 else 2190 return reinterpret_cast<_R>( 2191 _mm512_extracti32x4_epi32(__intrin_bitcast<__m512i>(__in), 2192 _Offset)); 2193 } 2194 else 2195 #endif // _GLIBCXX_SIMD_X86INTRIN }}} 2196 { 2197 #ifdef _GLIBCXX_SIMD_WORKAROUND_XXX_1 2198 using _W = conditional_t< 2199 is_floating_point_v<value_type>, double, 2200 conditional_t<(sizeof(_R) >= 16), long long, value_type>>; 2201 static_assert(sizeof(_R) % sizeof(_W) == 0); 2202 constexpr int __return_width = sizeof(_R) / sizeof(_W); 2203 using _Up = __vector_type_t<_W, __return_width>; 2204 const auto __x = __vector_bitcast<_W>(__in); 2205 #else 2206 constexpr int __return_width = _TVT::_S_full_size / _SplitBy; 2207 using _Up = _R; 2208 const __vector_type_t<value_type, _TVT::_S_full_size>& __x 2209 = __in; // only needed for _Tp = _SimdWrapper<value_type, _Np> 2210 #endif 2211 constexpr int _O = _Offset * __return_width; 2212 return __call_with_subscripts<__return_width, _O>( 2213 __x, [](auto... __entries) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { 2214 return reinterpret_cast<_R>(_Up{__entries...}); 2215 }); 2216 } 2217 } 2218 2219 // }}} 2220 // __lo/__hi64[z]{{{ 2221 template <typename _Tp, 2222 typename _R = __vector_type8_t<typename _VectorTraits<_Tp>::value_type>> 2223 _GLIBCXX_SIMD_INTRINSIC constexpr _R 2224 __lo64(_Tp __x) 2225 { 2226 _R __r{}; 2227 __builtin_memcpy(&__r, &__x, 8); 2228 return __r; 2229 } 2230 2231 template <typename _Tp, 2232 typename _R = __vector_type8_t<typename _VectorTraits<_Tp>::value_type>> 2233 _GLIBCXX_SIMD_INTRINSIC constexpr _R 2234 __hi64(_Tp __x) 2235 { 2236 static_assert(sizeof(_Tp) == 16, "use __hi64z if you meant it"); 2237 _R __r{}; 2238 __builtin_memcpy(&__r, reinterpret_cast<const char*>(&__x) + 8, 8); 2239 return __r; 2240 } 2241 2242 template <typename _Tp, 2243 typename _R = __vector_type8_t<typename _VectorTraits<_Tp>::value_type>> 2244 _GLIBCXX_SIMD_INTRINSIC constexpr _R 2245 __hi64z([[maybe_unused]] _Tp __x) 2246 { 2247 _R __r{}; 2248 if constexpr (sizeof(_Tp) == 16) 2249 __builtin_memcpy(&__r, reinterpret_cast<const char*>(&__x) + 8, 8); 2250 return __r; 2251 } 2252 2253 // }}} 2254 // __lo/__hi128{{{ 2255 template <typename _Tp> 2256 _GLIBCXX_SIMD_INTRINSIC constexpr auto 2257 __lo128(_Tp __x) 2258 { return __extract<0, sizeof(_Tp) / 16>(__x); } 2259 2260 template <typename _Tp> 2261 _GLIBCXX_SIMD_INTRINSIC constexpr auto 2262 __hi128(_Tp __x) 2263 { 2264 static_assert(sizeof(__x) == 32); 2265 return __extract<1, 2>(__x); 2266 } 2267 2268 // }}} 2269 // __lo/__hi256{{{ 2270 template <typename _Tp> 2271 _GLIBCXX_SIMD_INTRINSIC constexpr auto 2272 __lo256(_Tp __x) 2273 { 2274 static_assert(sizeof(__x) == 64); 2275 return __extract<0, 2>(__x); 2276 } 2277 2278 template <typename _Tp> 2279 _GLIBCXX_SIMD_INTRINSIC constexpr auto 2280 __hi256(_Tp __x) 2281 { 2282 static_assert(sizeof(__x) == 64); 2283 return __extract<1, 2>(__x); 2284 } 2285 2286 // }}} 2287 // __auto_bitcast{{{ 2288 template <typename _Tp> 2289 struct _AutoCast 2290 { 2291 static_assert(__is_vector_type_v<_Tp>); 2292 2293 const _Tp __x; 2294 2295 template <typename _Up, typename _UVT = _VectorTraits<_Up>> 2296 _GLIBCXX_SIMD_INTRINSIC constexpr operator _Up() const 2297 { return __intrin_bitcast<typename _UVT::type>(__x); } 2298 }; 2299 2300 template <typename _Tp> 2301 _GLIBCXX_SIMD_INTRINSIC constexpr _AutoCast<_Tp> 2302 __auto_bitcast(const _Tp& __x) 2303 { return {__x}; } 2304 2305 template <typename _Tp, size_t _Np> 2306 _GLIBCXX_SIMD_INTRINSIC constexpr 2307 _AutoCast<typename _SimdWrapper<_Tp, _Np>::_BuiltinType> 2308 __auto_bitcast(const _SimdWrapper<_Tp, _Np>& __x) 2309 { return {__x._M_data}; } 2310 2311 // }}} 2312 // ^^^ ---- builtin vector types [[gnu::vector_size(N)]] and operations ---- ^^^ 2313 2314 #if _GLIBCXX_SIMD_HAVE_SSE_ABI 2315 // __bool_storage_member_type{{{ 2316 #if _GLIBCXX_SIMD_HAVE_AVX512F && _GLIBCXX_SIMD_X86INTRIN 2317 template <size_t _Size> 2318 struct __bool_storage_member_type 2319 { 2320 static_assert((_Size & (_Size - 1)) != 0, 2321 "This trait may only be used for non-power-of-2 sizes. " 2322 "Power-of-2 sizes must be specialized."); 2323 using type = 2324 typename __bool_storage_member_type<std::__bit_ceil(_Size)>::type; 2325 }; 2326 2327 template <> 2328 struct __bool_storage_member_type<1> { using type = bool; }; 2329 2330 template <> 2331 struct __bool_storage_member_type<2> { using type = __mmask8; }; 2332 2333 template <> 2334 struct __bool_storage_member_type<4> { using type = __mmask8; }; 2335 2336 template <> 2337 struct __bool_storage_member_type<8> { using type = __mmask8; }; 2338 2339 template <> 2340 struct __bool_storage_member_type<16> { using type = __mmask16; }; 2341 2342 template <> 2343 struct __bool_storage_member_type<32> { using type = __mmask32; }; 2344 2345 template <> 2346 struct __bool_storage_member_type<64> { using type = __mmask64; }; 2347 #endif // _GLIBCXX_SIMD_HAVE_AVX512F 2348 2349 // }}} 2350 // __intrinsic_type (x86){{{ 2351 // the following excludes bool via __is_vectorizable 2352 #if _GLIBCXX_SIMD_HAVE_SSE 2353 template <typename _Tp, size_t _Bytes> 2354 struct __intrinsic_type<_Tp, _Bytes, enable_if_t<__is_vectorizable_v<_Tp> && _Bytes <= 64>> 2355 { 2356 static_assert(!is_same_v<_Tp, long double>, 2357 "no __intrinsic_type support for long double on x86"); 2358 2359 static constexpr size_t _S_VBytes = _Bytes <= 16 ? 16 : _Bytes <= 32 ? 32 : 64; 2360 2361 using type [[__gnu__::__vector_size__(_S_VBytes)]] 2362 = conditional_t<is_integral_v<_Tp>, long long int, _Tp>; 2363 }; 2364 #endif // _GLIBCXX_SIMD_HAVE_SSE 2365 2366 // }}} 2367 #endif // _GLIBCXX_SIMD_HAVE_SSE_ABI 2368 // __intrinsic_type (ARM){{{ 2369 #if _GLIBCXX_SIMD_HAVE_NEON 2370 template <> 2371 struct __intrinsic_type<float, 8, void> 2372 { using type = float32x2_t; }; 2373 2374 template <> 2375 struct __intrinsic_type<float, 16, void> 2376 { using type = float32x4_t; }; 2377 2378 template <> 2379 struct __intrinsic_type<double, 8, void> 2380 { 2381 #if _GLIBCXX_SIMD_HAVE_NEON_A64 2382 using type = float64x1_t; 2383 #endif 2384 }; 2385 2386 template <> 2387 struct __intrinsic_type<double, 16, void> 2388 { 2389 #if _GLIBCXX_SIMD_HAVE_NEON_A64 2390 using type = float64x2_t; 2391 #endif 2392 }; 2393 2394 #define _GLIBCXX_SIMD_ARM_INTRIN(_Bits, _Np) \ 2395 template <> \ 2396 struct __intrinsic_type<__int_with_sizeof_t<_Bits / 8>, \ 2397 _Np * _Bits / 8, void> \ 2398 { using type = int##_Bits##x##_Np##_t; }; \ 2399 template <> \ 2400 struct __intrinsic_type<make_unsigned_t<__int_with_sizeof_t<_Bits / 8>>, \ 2401 _Np * _Bits / 8, void> \ 2402 { using type = uint##_Bits##x##_Np##_t; } 2403 _GLIBCXX_SIMD_ARM_INTRIN(8, 8); 2404 _GLIBCXX_SIMD_ARM_INTRIN(8, 16); 2405 _GLIBCXX_SIMD_ARM_INTRIN(16, 4); 2406 _GLIBCXX_SIMD_ARM_INTRIN(16, 8); 2407 _GLIBCXX_SIMD_ARM_INTRIN(32, 2); 2408 _GLIBCXX_SIMD_ARM_INTRIN(32, 4); 2409 _GLIBCXX_SIMD_ARM_INTRIN(64, 1); 2410 _GLIBCXX_SIMD_ARM_INTRIN(64, 2); 2411 #undef _GLIBCXX_SIMD_ARM_INTRIN 2412 2413 template <typename _Tp, size_t _Bytes> 2414 struct __intrinsic_type<_Tp, _Bytes, enable_if_t<__is_vectorizable_v<_Tp> && _Bytes <= 16>> 2415 { 2416 static constexpr int _SVecBytes = _Bytes <= 8 ? 8 : 16; 2417 2418 using _Ip = __int_for_sizeof_t<_Tp>; 2419 2420 using _Up = conditional_t< 2421 is_floating_point_v<_Tp>, _Tp, 2422 conditional_t<is_unsigned_v<_Tp>, make_unsigned_t<_Ip>, _Ip>>; 2423 2424 static_assert(!is_same_v<_Tp, _Up> || _SVecBytes != _Bytes, 2425 "should use explicit specialization above"); 2426 2427 using type = typename __intrinsic_type<_Up, _SVecBytes>::type; 2428 }; 2429 #endif // _GLIBCXX_SIMD_HAVE_NEON 2430 2431 // }}} 2432 // __intrinsic_type (PPC){{{ 2433 #ifdef __ALTIVEC__ 2434 template <typename _Tp> 2435 struct __intrinsic_type_impl; 2436 2437 #define _GLIBCXX_SIMD_PPC_INTRIN(_Tp) \ 2438 template <> \ 2439 struct __intrinsic_type_impl<_Tp> { using type = __vector _Tp; } 2440 _GLIBCXX_SIMD_PPC_INTRIN(float); 2441 #ifdef __VSX__ 2442 _GLIBCXX_SIMD_PPC_INTRIN(double); 2443 #endif 2444 _GLIBCXX_SIMD_PPC_INTRIN(signed char); 2445 _GLIBCXX_SIMD_PPC_INTRIN(unsigned char); 2446 _GLIBCXX_SIMD_PPC_INTRIN(signed short); 2447 _GLIBCXX_SIMD_PPC_INTRIN(unsigned short); 2448 _GLIBCXX_SIMD_PPC_INTRIN(signed int); 2449 _GLIBCXX_SIMD_PPC_INTRIN(unsigned int); 2450 #if defined __VSX__ || __SIZEOF_LONG__ == 4 2451 _GLIBCXX_SIMD_PPC_INTRIN(signed long); 2452 _GLIBCXX_SIMD_PPC_INTRIN(unsigned long); 2453 #endif 2454 #ifdef __VSX__ 2455 _GLIBCXX_SIMD_PPC_INTRIN(signed long long); 2456 _GLIBCXX_SIMD_PPC_INTRIN(unsigned long long); 2457 #endif 2458 #undef _GLIBCXX_SIMD_PPC_INTRIN 2459 2460 template <typename _Tp, size_t _Bytes> 2461 struct __intrinsic_type<_Tp, _Bytes, enable_if_t<__is_vectorizable_v<_Tp> && _Bytes <= 16>> 2462 { 2463 static constexpr bool _S_is_ldouble = is_same_v<_Tp, long double>; 2464 2465 // allow _Tp == long double with -mlong-double-64 2466 static_assert(!(_S_is_ldouble && sizeof(long double) > sizeof(double)), 2467 "no __intrinsic_type support for 128-bit floating point on PowerPC"); 2468 2469 #ifndef __VSX__ 2470 static_assert(!(is_same_v<_Tp, double> 2471 || (_S_is_ldouble && sizeof(long double) == sizeof(double))), 2472 "no __intrinsic_type support for 64-bit floating point on PowerPC w/o VSX"); 2473 #endif 2474 2475 static constexpr auto __element_type() 2476 { 2477 if constexpr (is_floating_point_v<_Tp>) 2478 { 2479 if constexpr (_S_is_ldouble) 2480 return double {}; 2481 else 2482 return _Tp {}; 2483 } 2484 else if constexpr (is_signed_v<_Tp>) 2485 { 2486 if constexpr (sizeof(_Tp) == sizeof(_SChar)) 2487 return _SChar {}; 2488 else if constexpr (sizeof(_Tp) == sizeof(short)) 2489 return short {}; 2490 else if constexpr (sizeof(_Tp) == sizeof(int)) 2491 return int {}; 2492 else if constexpr (sizeof(_Tp) == sizeof(_LLong)) 2493 return _LLong {}; 2494 } 2495 else 2496 { 2497 if constexpr (sizeof(_Tp) == sizeof(_UChar)) 2498 return _UChar {}; 2499 else if constexpr (sizeof(_Tp) == sizeof(_UShort)) 2500 return _UShort {}; 2501 else if constexpr (sizeof(_Tp) == sizeof(_UInt)) 2502 return _UInt {}; 2503 else if constexpr (sizeof(_Tp) == sizeof(_ULLong)) 2504 return _ULLong {}; 2505 } 2506 } 2507 2508 using type = typename __intrinsic_type_impl<decltype(__element_type())>::type; 2509 }; 2510 #endif // __ALTIVEC__ 2511 2512 // }}} 2513 // _SimdWrapper<bool>{{{1 2514 template <size_t _Width> 2515 struct _SimdWrapper<bool, _Width, 2516 void_t<typename __bool_storage_member_type<_Width>::type>> 2517 { 2518 using _BuiltinType = typename __bool_storage_member_type<_Width>::type; 2519 using value_type = bool; 2520 2521 static constexpr size_t _S_full_size = sizeof(_BuiltinType) * __CHAR_BIT__; 2522 2523 _GLIBCXX_SIMD_INTRINSIC constexpr _SimdWrapper<bool, _S_full_size> 2524 __as_full_vector() const 2525 { return _M_data; } 2526 2527 _GLIBCXX_SIMD_INTRINSIC constexpr 2528 _SimdWrapper() = default; 2529 2530 _GLIBCXX_SIMD_INTRINSIC constexpr 2531 _SimdWrapper(_BuiltinType __k) : _M_data(__k) {}; 2532 2533 _GLIBCXX_SIMD_INTRINSIC 2534 operator const _BuiltinType&() const 2535 { return _M_data; } 2536 2537 _GLIBCXX_SIMD_INTRINSIC 2538 operator _BuiltinType&() 2539 { return _M_data; } 2540 2541 _GLIBCXX_SIMD_INTRINSIC _BuiltinType 2542 __intrin() const 2543 { return _M_data; } 2544 2545 _GLIBCXX_SIMD_INTRINSIC constexpr value_type 2546 operator[](size_t __i) const 2547 { return _M_data & (_BuiltinType(1) << __i); } 2548 2549 template <size_t __i> 2550 _GLIBCXX_SIMD_INTRINSIC constexpr value_type 2551 operator[](_SizeConstant<__i>) const 2552 { return _M_data & (_BuiltinType(1) << __i); } 2553 2554 _GLIBCXX_SIMD_INTRINSIC constexpr void 2555 _M_set(size_t __i, value_type __x) 2556 { 2557 if (__x) 2558 _M_data |= (_BuiltinType(1) << __i); 2559 else 2560 _M_data &= ~(_BuiltinType(1) << __i); 2561 } 2562 2563 _GLIBCXX_SIMD_INTRINSIC constexpr bool 2564 _M_is_constprop() const 2565 { return __builtin_constant_p(_M_data); } 2566 2567 _GLIBCXX_SIMD_INTRINSIC constexpr bool 2568 _M_is_constprop_none_of() const 2569 { 2570 if (__builtin_constant_p(_M_data)) 2571 { 2572 constexpr int __nbits = sizeof(_BuiltinType) * __CHAR_BIT__; 2573 constexpr _BuiltinType __active_mask 2574 = ~_BuiltinType() >> (__nbits - _Width); 2575 return (_M_data & __active_mask) == 0; 2576 } 2577 return false; 2578 } 2579 2580 _GLIBCXX_SIMD_INTRINSIC constexpr bool 2581 _M_is_constprop_all_of() const 2582 { 2583 if (__builtin_constant_p(_M_data)) 2584 { 2585 constexpr int __nbits = sizeof(_BuiltinType) * __CHAR_BIT__; 2586 constexpr _BuiltinType __active_mask 2587 = ~_BuiltinType() >> (__nbits - _Width); 2588 return (_M_data & __active_mask) == __active_mask; 2589 } 2590 return false; 2591 } 2592 2593 _BuiltinType _M_data; 2594 }; 2595 2596 // _SimdWrapperBase{{{1 2597 template <bool _MustZeroInitPadding, typename _BuiltinType> 2598 struct _SimdWrapperBase; 2599 2600 template <typename _BuiltinType> 2601 struct _SimdWrapperBase<false, _BuiltinType> // no padding or no SNaNs 2602 { 2603 _GLIBCXX_SIMD_INTRINSIC constexpr 2604 _SimdWrapperBase() = default; 2605 2606 _GLIBCXX_SIMD_INTRINSIC constexpr 2607 _SimdWrapperBase(_BuiltinType __init) : _M_data(__init) {} 2608 2609 _BuiltinType _M_data; 2610 }; 2611 2612 template <typename _BuiltinType> 2613 struct _SimdWrapperBase<true, _BuiltinType> // with padding that needs to 2614 // never become SNaN 2615 { 2616 _GLIBCXX_SIMD_INTRINSIC constexpr 2617 _SimdWrapperBase() : _M_data() {} 2618 2619 _GLIBCXX_SIMD_INTRINSIC constexpr 2620 _SimdWrapperBase(_BuiltinType __init) : _M_data(__init) {} 2621 2622 _BuiltinType _M_data; 2623 }; 2624 2625 // }}} 2626 // _SimdWrapper{{{ 2627 template <typename _Tp, size_t _Width> 2628 struct _SimdWrapper< 2629 _Tp, _Width, 2630 void_t<__vector_type_t<_Tp, _Width>, __intrinsic_type_t<_Tp, _Width>>> 2631 : _SimdWrapperBase<__has_iec559_behavior<__signaling_NaN, _Tp>::value 2632 && sizeof(_Tp) * _Width 2633 == sizeof(__vector_type_t<_Tp, _Width>), 2634 __vector_type_t<_Tp, _Width>> 2635 { 2636 using _Base 2637 = _SimdWrapperBase<__has_iec559_behavior<__signaling_NaN, _Tp>::value 2638 && sizeof(_Tp) * _Width 2639 == sizeof(__vector_type_t<_Tp, _Width>), 2640 __vector_type_t<_Tp, _Width>>; 2641 2642 static_assert(__is_vectorizable_v<_Tp>); 2643 static_assert(_Width >= 2); // 1 doesn't make sense, use _Tp directly then 2644 2645 using _BuiltinType = __vector_type_t<_Tp, _Width>; 2646 using value_type = _Tp; 2647 2648 static inline constexpr size_t _S_full_size 2649 = sizeof(_BuiltinType) / sizeof(value_type); 2650 static inline constexpr int _S_size = _Width; 2651 static inline constexpr bool _S_is_partial = _S_full_size != _S_size; 2652 2653 using _Base::_M_data; 2654 2655 _GLIBCXX_SIMD_INTRINSIC constexpr _SimdWrapper<_Tp, _S_full_size> 2656 __as_full_vector() const 2657 { return _M_data; } 2658 2659 _GLIBCXX_SIMD_INTRINSIC constexpr 2660 _SimdWrapper(initializer_list<_Tp> __init) 2661 : _Base(__generate_from_n_evaluations<_Width, _BuiltinType>( 2662 [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { 2663 return __init.begin()[__i.value]; 2664 })) {} 2665 2666 _GLIBCXX_SIMD_INTRINSIC constexpr 2667 _SimdWrapper() = default; 2668 2669 _GLIBCXX_SIMD_INTRINSIC constexpr 2670 _SimdWrapper(const _SimdWrapper&) = default; 2671 2672 _GLIBCXX_SIMD_INTRINSIC constexpr 2673 _SimdWrapper(_SimdWrapper&&) = default; 2674 2675 _GLIBCXX_SIMD_INTRINSIC constexpr _SimdWrapper& 2676 operator=(const _SimdWrapper&) = default; 2677 2678 _GLIBCXX_SIMD_INTRINSIC constexpr _SimdWrapper& 2679 operator=(_SimdWrapper&&) = default; 2680 2681 template <typename _V, typename = enable_if_t<disjunction_v< 2682 is_same<_V, __vector_type_t<_Tp, _Width>>, 2683 is_same<_V, __intrinsic_type_t<_Tp, _Width>>>>> 2684 _GLIBCXX_SIMD_INTRINSIC constexpr 2685 _SimdWrapper(_V __x) 2686 // __vector_bitcast can convert e.g. __m128 to __vector(2) float 2687 : _Base(__vector_bitcast<_Tp, _Width>(__x)) {} 2688 2689 template <typename... _As, 2690 typename = enable_if_t<((is_same_v<simd_abi::scalar, _As> && ...) 2691 && sizeof...(_As) <= _Width)>> 2692 _GLIBCXX_SIMD_INTRINSIC constexpr 2693 operator _SimdTuple<_Tp, _As...>() const 2694 { 2695 return __generate_from_n_evaluations<sizeof...(_As), _SimdTuple<_Tp, _As...>>( 2696 [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA 2697 { return _M_data[int(__i)]; }); 2698 } 2699 2700 _GLIBCXX_SIMD_INTRINSIC constexpr 2701 operator const _BuiltinType&() const 2702 { return _M_data; } 2703 2704 _GLIBCXX_SIMD_INTRINSIC constexpr 2705 operator _BuiltinType&() 2706 { return _M_data; } 2707 2708 _GLIBCXX_SIMD_INTRINSIC constexpr _Tp 2709 operator[](size_t __i) const 2710 { return _M_data[__i]; } 2711 2712 template <size_t __i> 2713 _GLIBCXX_SIMD_INTRINSIC constexpr _Tp 2714 operator[](_SizeConstant<__i>) const 2715 { return _M_data[__i]; } 2716 2717 _GLIBCXX_SIMD_INTRINSIC constexpr void 2718 _M_set(size_t __i, _Tp __x) 2719 { 2720 if (__builtin_is_constant_evaluated()) 2721 _M_data = __generate_from_n_evaluations<_Width, _BuiltinType>([&](auto __j) { 2722 return __j == __i ? __x : _M_data[__j()]; 2723 }); 2724 else 2725 _M_data[__i] = __x; 2726 } 2727 2728 _GLIBCXX_SIMD_INTRINSIC 2729 constexpr bool 2730 _M_is_constprop() const 2731 { return __builtin_constant_p(_M_data); } 2732 2733 _GLIBCXX_SIMD_INTRINSIC constexpr bool 2734 _M_is_constprop_none_of() const 2735 { 2736 if (__builtin_constant_p(_M_data)) 2737 { 2738 bool __r = true; 2739 if constexpr (is_floating_point_v<_Tp>) 2740 { 2741 using _Ip = __int_for_sizeof_t<_Tp>; 2742 const auto __intdata = __vector_bitcast<_Ip>(_M_data); 2743 __execute_n_times<_Width>( 2744 [&](auto __i) { __r &= __intdata[__i.value] == _Ip(); }); 2745 } 2746 else 2747 __execute_n_times<_Width>( 2748 [&](auto __i) { __r &= _M_data[__i.value] == _Tp(); }); 2749 if (__builtin_constant_p(__r)) 2750 return __r; 2751 } 2752 return false; 2753 } 2754 2755 _GLIBCXX_SIMD_INTRINSIC constexpr bool 2756 _M_is_constprop_all_of() const 2757 { 2758 if (__builtin_constant_p(_M_data)) 2759 { 2760 bool __r = true; 2761 if constexpr (is_floating_point_v<_Tp>) 2762 { 2763 using _Ip = __int_for_sizeof_t<_Tp>; 2764 const auto __intdata = __vector_bitcast<_Ip>(_M_data); 2765 __execute_n_times<_Width>( 2766 [&](auto __i) { __r &= __intdata[__i.value] == ~_Ip(); }); 2767 } 2768 else 2769 __execute_n_times<_Width>( 2770 [&](auto __i) { __r &= _M_data[__i.value] == ~_Tp(); }); 2771 if (__builtin_constant_p(__r)) 2772 return __r; 2773 } 2774 return false; 2775 } 2776 }; 2777 2778 // }}} 2779 2780 // __vectorized_sizeof {{{ 2781 template <typename _Tp> 2782 constexpr size_t 2783 __vectorized_sizeof() 2784 { 2785 if constexpr (!__is_vectorizable_v<_Tp>) 2786 return 0; 2787 2788 if constexpr (sizeof(_Tp) <= 8) 2789 { 2790 // X86: 2791 if constexpr (__have_avx512bw) 2792 return 64; 2793 if constexpr (__have_avx512f && sizeof(_Tp) >= 4) 2794 return 64; 2795 if constexpr (__have_avx2) 2796 return 32; 2797 if constexpr (__have_avx && is_floating_point_v<_Tp>) 2798 return 32; 2799 if constexpr (__have_sse2) 2800 return 16; 2801 if constexpr (__have_sse && is_same_v<_Tp, float>) 2802 return 16; 2803 /* The following is too much trouble because of mixed MMX and x87 code. 2804 * While nothing here explicitly calls MMX instructions of registers, 2805 * they are still emitted but no EMMS cleanup is done. 2806 if constexpr (__have_mmx && sizeof(_Tp) <= 4 && is_integral_v<_Tp>) 2807 return 8; 2808 */ 2809 2810 // PowerPC: 2811 if constexpr (__have_power8vec 2812 || (__have_power_vmx && (sizeof(_Tp) < 8)) 2813 || (__have_power_vsx && is_floating_point_v<_Tp>) ) 2814 return 16; 2815 2816 // ARM: 2817 if constexpr (__have_neon_a64 2818 || (__have_neon_a32 && !is_same_v<_Tp, double>) ) 2819 return 16; 2820 if constexpr (__have_neon 2821 && sizeof(_Tp) < 8 2822 // Only allow fp if the user allows non-ICE559 fp (e.g. 2823 // via -ffast-math). ARMv7 NEON fp is not conforming to 2824 // IEC559. 2825 && (__support_neon_float || !is_floating_point_v<_Tp>)) 2826 return 16; 2827 } 2828 2829 return sizeof(_Tp); 2830 } 2831 2832 // }}} 2833 namespace simd_abi { 2834 // most of simd_abi is defined in simd_detail.h 2835 template <typename _Tp> 2836 inline constexpr int max_fixed_size 2837 = (__have_avx512bw && sizeof(_Tp) == 1) ? 64 : 32; 2838 2839 // compatible {{{ 2840 #if defined __x86_64__ || defined __aarch64__ 2841 template <typename _Tp> 2842 using compatible = conditional_t<(sizeof(_Tp) <= 8), _VecBuiltin<16>, scalar>; 2843 #elif defined __ARM_NEON 2844 // FIXME: not sure, probably needs to be scalar (or dependent on the hard-float 2845 // ABI?) 2846 template <typename _Tp> 2847 using compatible 2848 = conditional_t<(sizeof(_Tp) < 8 2849 && (__support_neon_float || !is_floating_point_v<_Tp>)), 2850 _VecBuiltin<16>, scalar>; 2851 #else 2852 template <typename> 2853 using compatible = scalar; 2854 #endif 2855 2856 // }}} 2857 // native {{{ 2858 template <typename _Tp> 2859 constexpr auto 2860 __determine_native_abi() 2861 { 2862 constexpr size_t __bytes = __vectorized_sizeof<_Tp>(); 2863 if constexpr (__bytes == sizeof(_Tp)) 2864 return static_cast<scalar*>(nullptr); 2865 else if constexpr (__have_avx512vl || (__have_avx512f && __bytes == 64)) 2866 return static_cast<_VecBltnBtmsk<__bytes>*>(nullptr); 2867 else 2868 return static_cast<_VecBuiltin<__bytes>*>(nullptr); 2869 } 2870 2871 template <typename _Tp, typename = enable_if_t<__is_vectorizable_v<_Tp>>> 2872 using native = remove_pointer_t<decltype(__determine_native_abi<_Tp>())>; 2873 2874 // }}} 2875 // __default_abi {{{ 2876 #if defined _GLIBCXX_SIMD_DEFAULT_ABI 2877 template <typename _Tp> 2878 using __default_abi = _GLIBCXX_SIMD_DEFAULT_ABI<_Tp>; 2879 #else 2880 template <typename _Tp> 2881 using __default_abi = compatible<_Tp>; 2882 #endif 2883 2884 // }}} 2885 } // namespace simd_abi 2886 2887 // traits {{{1 2888 template <typename _Tp> 2889 struct is_simd_flag_type 2890 : false_type 2891 {}; 2892 2893 template <> 2894 struct is_simd_flag_type<element_aligned_tag> 2895 : true_type 2896 {}; 2897 2898 template <> 2899 struct is_simd_flag_type<vector_aligned_tag> 2900 : true_type 2901 {}; 2902 2903 template <size_t _Np> 2904 struct is_simd_flag_type<overaligned_tag<_Np>> 2905 : __bool_constant<(_Np > 0) and __has_single_bit(_Np)> 2906 {}; 2907 2908 template <typename _Tp> 2909 inline constexpr bool is_simd_flag_type_v = is_simd_flag_type<_Tp>::value; 2910 2911 template <typename _Tp, typename = enable_if_t<is_simd_flag_type_v<_Tp>>> 2912 using _IsSimdFlagType = _Tp; 2913 2914 // is_abi_tag {{{2 2915 template <typename _Tp, typename = void_t<>> 2916 struct is_abi_tag : false_type {}; 2917 2918 template <typename _Tp> 2919 struct is_abi_tag<_Tp, void_t<typename _Tp::_IsValidAbiTag>> 2920 : public _Tp::_IsValidAbiTag {}; 2921 2922 template <typename _Tp> 2923 inline constexpr bool is_abi_tag_v = is_abi_tag<_Tp>::value; 2924 2925 // is_simd(_mask) {{{2 2926 template <typename _Tp> 2927 struct is_simd : public false_type {}; 2928 2929 template <typename _Tp> 2930 inline constexpr bool is_simd_v = is_simd<_Tp>::value; 2931 2932 template <typename _Tp> 2933 struct is_simd_mask : public false_type {}; 2934 2935 template <typename _Tp> 2936 inline constexpr bool is_simd_mask_v = is_simd_mask<_Tp>::value; 2937 2938 // simd_size {{{2 2939 template <typename _Tp, typename _Abi, typename = void> 2940 struct __simd_size_impl {}; 2941 2942 template <typename _Tp, typename _Abi> 2943 struct __simd_size_impl< 2944 _Tp, _Abi, 2945 enable_if_t<conjunction_v<__is_vectorizable<_Tp>, is_abi_tag<_Abi>>>> 2946 : _SizeConstant<_Abi::template _S_size<_Tp>> {}; 2947 2948 template <typename _Tp, typename _Abi = simd_abi::__default_abi<_Tp>> 2949 struct simd_size : __simd_size_impl<_Tp, _Abi> {}; 2950 2951 template <typename _Tp, typename _Abi = simd_abi::__default_abi<_Tp>> 2952 inline constexpr size_t simd_size_v = simd_size<_Tp, _Abi>::value; 2953 2954 // simd_abi::deduce {{{2 2955 template <typename _Tp, size_t _Np, typename = void> 2956 struct __deduce_impl; 2957 2958 namespace simd_abi { 2959 /** 2960 * @tparam _Tp The requested `value_type` for the elements. 2961 * @tparam _Np The requested number of elements. 2962 * @tparam _Abis This parameter is ignored, since this implementation cannot 2963 * make any use of it. Either __a good native ABI is matched and used as `type` 2964 * alias, or the `fixed_size<_Np>` ABI is used, which internally is built from 2965 * the best matching native ABIs. 2966 */ 2967 template <typename _Tp, size_t _Np, typename...> 2968 struct deduce : __deduce_impl<_Tp, _Np> {}; 2969 2970 template <typename _Tp, size_t _Np, typename... _Abis> 2971 using deduce_t = typename deduce<_Tp, _Np, _Abis...>::type; 2972 } // namespace simd_abi 2973 2974 // }}}2 2975 // rebind_simd {{{2 2976 template <typename _Tp, typename _V, typename = void> 2977 struct rebind_simd; 2978 2979 template <typename _Tp, typename _Up, typename _Abi> 2980 struct rebind_simd<_Tp, simd<_Up, _Abi>, 2981 void_t<simd_abi::deduce_t<_Tp, simd_size_v<_Up, _Abi>, _Abi>>> 2982 { using type = simd<_Tp, simd_abi::deduce_t<_Tp, simd_size_v<_Up, _Abi>, _Abi>>; }; 2983 2984 template <typename _Tp, typename _Up, typename _Abi> 2985 struct rebind_simd<_Tp, simd_mask<_Up, _Abi>, 2986 void_t<simd_abi::deduce_t<_Tp, simd_size_v<_Up, _Abi>, _Abi>>> 2987 { using type = simd_mask<_Tp, simd_abi::deduce_t<_Tp, simd_size_v<_Up, _Abi>, _Abi>>; }; 2988 2989 template <typename _Tp, typename _V> 2990 using rebind_simd_t = typename rebind_simd<_Tp, _V>::type; 2991 2992 // resize_simd {{{2 2993 template <int _Np, typename _V, typename = void> 2994 struct resize_simd; 2995 2996 template <int _Np, typename _Tp, typename _Abi> 2997 struct resize_simd<_Np, simd<_Tp, _Abi>, void_t<simd_abi::deduce_t<_Tp, _Np, _Abi>>> 2998 { using type = simd<_Tp, simd_abi::deduce_t<_Tp, _Np, _Abi>>; }; 2999 3000 template <int _Np, typename _Tp, typename _Abi> 3001 struct resize_simd<_Np, simd_mask<_Tp, _Abi>, void_t<simd_abi::deduce_t<_Tp, _Np, _Abi>>> 3002 { using type = simd_mask<_Tp, simd_abi::deduce_t<_Tp, _Np, _Abi>>; }; 3003 3004 template <int _Np, typename _V> 3005 using resize_simd_t = typename resize_simd<_Np, _V>::type; 3006 3007 // }}}2 3008 // memory_alignment {{{2 3009 template <typename _Tp, typename _Up = typename _Tp::value_type> 3010 struct memory_alignment 3011 : public _SizeConstant<vector_aligned_tag::_S_alignment<_Tp, _Up>> {}; 3012 3013 template <typename _Tp, typename _Up = typename _Tp::value_type> 3014 inline constexpr size_t memory_alignment_v = memory_alignment<_Tp, _Up>::value; 3015 3016 // class template simd [simd] {{{1 3017 template <typename _Tp, typename _Abi = simd_abi::__default_abi<_Tp>> 3018 class simd; 3019 3020 template <typename _Tp, typename _Abi> 3021 struct is_simd<simd<_Tp, _Abi>> : public true_type {}; 3022 3023 template <typename _Tp> 3024 using native_simd = simd<_Tp, simd_abi::native<_Tp>>; 3025 3026 template <typename _Tp, int _Np> 3027 using fixed_size_simd = simd<_Tp, simd_abi::fixed_size<_Np>>; 3028 3029 template <typename _Tp, size_t _Np> 3030 using __deduced_simd = simd<_Tp, simd_abi::deduce_t<_Tp, _Np>>; 3031 3032 // class template simd_mask [simd_mask] {{{1 3033 template <typename _Tp, typename _Abi = simd_abi::__default_abi<_Tp>> 3034 class simd_mask; 3035 3036 template <typename _Tp, typename _Abi> 3037 struct is_simd_mask<simd_mask<_Tp, _Abi>> : public true_type {}; 3038 3039 template <typename _Tp> 3040 using native_simd_mask = simd_mask<_Tp, simd_abi::native<_Tp>>; 3041 3042 template <typename _Tp, int _Np> 3043 using fixed_size_simd_mask = simd_mask<_Tp, simd_abi::fixed_size<_Np>>; 3044 3045 template <typename _Tp, size_t _Np> 3046 using __deduced_simd_mask = simd_mask<_Tp, simd_abi::deduce_t<_Tp, _Np>>; 3047 3048 // casts [simd.casts] {{{1 3049 // static_simd_cast {{{2 3050 template <typename _Tp, typename _Up, typename _Ap, bool = is_simd_v<_Tp>, typename = void> 3051 struct __static_simd_cast_return_type; 3052 3053 template <typename _Tp, typename _A0, typename _Up, typename _Ap> 3054 struct __static_simd_cast_return_type<simd_mask<_Tp, _A0>, _Up, _Ap, false, void> 3055 : __static_simd_cast_return_type<simd<_Tp, _A0>, _Up, _Ap> {}; 3056 3057 template <typename _Tp, typename _Up, typename _Ap> 3058 struct __static_simd_cast_return_type< 3059 _Tp, _Up, _Ap, true, enable_if_t<_Tp::size() == simd_size_v<_Up, _Ap>>> 3060 { using type = _Tp; }; 3061 3062 template <typename _Tp, typename _Ap> 3063 struct __static_simd_cast_return_type<_Tp, _Tp, _Ap, false, 3064 #ifdef _GLIBCXX_SIMD_FIX_P2TS_ISSUE66 3065 enable_if_t<__is_vectorizable_v<_Tp>> 3066 #else 3067 void 3068 #endif 3069 > 3070 { using type = simd<_Tp, _Ap>; }; 3071 3072 template <typename _Tp, typename = void> 3073 struct __safe_make_signed { using type = _Tp;}; 3074 3075 template <typename _Tp> 3076 struct __safe_make_signed<_Tp, enable_if_t<is_integral_v<_Tp>>> 3077 { 3078 // the extra make_unsigned_t is because of PR85951 3079 using type = make_signed_t<make_unsigned_t<_Tp>>; 3080 }; 3081 3082 template <typename _Tp> 3083 using safe_make_signed_t = typename __safe_make_signed<_Tp>::type; 3084 3085 template <typename _Tp, typename _Up, typename _Ap> 3086 struct __static_simd_cast_return_type<_Tp, _Up, _Ap, false, 3087 #ifdef _GLIBCXX_SIMD_FIX_P2TS_ISSUE66 3088 enable_if_t<__is_vectorizable_v<_Tp>> 3089 #else 3090 void 3091 #endif 3092 > 3093 { 3094 using type = conditional_t< 3095 (is_integral_v<_Up> && is_integral_v<_Tp> && 3096 #ifndef _GLIBCXX_SIMD_FIX_P2TS_ISSUE65 3097 is_signed_v<_Up> != is_signed_v<_Tp> && 3098 #endif 3099 is_same_v<safe_make_signed_t<_Up>, safe_make_signed_t<_Tp>>), 3100 simd<_Tp, _Ap>, fixed_size_simd<_Tp, simd_size_v<_Up, _Ap>>>; 3101 }; 3102 3103 template <typename _Tp, typename _Up, typename _Ap, 3104 typename _R 3105 = typename __static_simd_cast_return_type<_Tp, _Up, _Ap>::type> 3106 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR _R 3107 static_simd_cast(const simd<_Up, _Ap>& __x) 3108 { 3109 if constexpr (is_same<_R, simd<_Up, _Ap>>::value) 3110 return __x; 3111 else 3112 { 3113 _SimdConverter<_Up, _Ap, typename _R::value_type, typename _R::abi_type> 3114 __c; 3115 return _R(__private_init, __c(__data(__x))); 3116 } 3117 } 3118 3119 namespace __proposed { 3120 template <typename _Tp, typename _Up, typename _Ap, 3121 typename _R 3122 = typename __static_simd_cast_return_type<_Tp, _Up, _Ap>::type> 3123 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR typename _R::mask_type 3124 static_simd_cast(const simd_mask<_Up, _Ap>& __x) 3125 { 3126 using _RM = typename _R::mask_type; 3127 return {__private_init, _RM::abi_type::_MaskImpl::template _S_convert< 3128 typename _RM::simd_type::value_type>(__x)}; 3129 } 3130 3131 template <typename _To, typename _Up, typename _Abi> 3132 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR 3133 _To 3134 simd_bit_cast(const simd<_Up, _Abi>& __x) 3135 { 3136 using _Tp = typename _To::value_type; 3137 using _ToMember = typename _SimdTraits<_Tp, typename _To::abi_type>::_SimdMember; 3138 using _From = simd<_Up, _Abi>; 3139 using _FromMember = typename _SimdTraits<_Up, _Abi>::_SimdMember; 3140 // with concepts, the following should be constraints 3141 static_assert(sizeof(_To) == sizeof(_From)); 3142 static_assert(is_trivially_copyable_v<_Tp> && is_trivially_copyable_v<_Up>); 3143 static_assert(is_trivially_copyable_v<_ToMember> && is_trivially_copyable_v<_FromMember>); 3144 #if __has_builtin(__builtin_bit_cast) 3145 return {__private_init, __builtin_bit_cast(_ToMember, __data(__x))}; 3146 #else 3147 return {__private_init, __bit_cast<_ToMember>(__data(__x))}; 3148 #endif 3149 } 3150 3151 template <typename _To, typename _Up, typename _Abi> 3152 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR 3153 _To 3154 simd_bit_cast(const simd_mask<_Up, _Abi>& __x) 3155 { 3156 using _From = simd_mask<_Up, _Abi>; 3157 static_assert(sizeof(_To) == sizeof(_From)); 3158 static_assert(is_trivially_copyable_v<_From>); 3159 // _To can be simd<T, A>, specifically simd<T, fixed_size<N>> in which case _To is not trivially 3160 // copyable. 3161 if constexpr (is_simd_v<_To>) 3162 { 3163 using _Tp = typename _To::value_type; 3164 using _ToMember = typename _SimdTraits<_Tp, typename _To::abi_type>::_SimdMember; 3165 static_assert(is_trivially_copyable_v<_ToMember>); 3166 #if __has_builtin(__builtin_bit_cast) 3167 return {__private_init, __builtin_bit_cast(_ToMember, __x)}; 3168 #else 3169 return {__private_init, __bit_cast<_ToMember>(__x)}; 3170 #endif 3171 } 3172 else 3173 { 3174 static_assert(is_trivially_copyable_v<_To>); 3175 #if __has_builtin(__builtin_bit_cast) 3176 return __builtin_bit_cast(_To, __x); 3177 #else 3178 return __bit_cast<_To>(__x); 3179 #endif 3180 } 3181 } 3182 } // namespace __proposed 3183 3184 // simd_cast {{{2 3185 template <typename _Tp, typename _Up, typename _Ap, 3186 typename _To = __value_type_or_identity_t<_Tp>> 3187 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR auto 3188 simd_cast(const simd<_ValuePreserving<_Up, _To>, _Ap>& __x) 3189 -> decltype(static_simd_cast<_Tp>(__x)) 3190 { return static_simd_cast<_Tp>(__x); } 3191 3192 namespace __proposed { 3193 template <typename _Tp, typename _Up, typename _Ap, 3194 typename _To = __value_type_or_identity_t<_Tp>> 3195 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR auto 3196 simd_cast(const simd_mask<_ValuePreserving<_Up, _To>, _Ap>& __x) 3197 -> decltype(static_simd_cast<_Tp>(__x)) 3198 { return static_simd_cast<_Tp>(__x); } 3199 } // namespace __proposed 3200 3201 // }}}2 3202 // resizing_simd_cast {{{ 3203 namespace __proposed { 3204 /* Proposed spec: 3205 3206 template <class T, class U, class Abi> 3207 T resizing_simd_cast(const simd<U, Abi>& x) 3208 3209 p1 Constraints: 3210 - is_simd_v<T> is true and 3211 - T::value_type is the same type as U 3212 3213 p2 Returns: 3214 A simd object with the i^th element initialized to x[i] for all i in the 3215 range of [0, min(T::size(), simd_size_v<U, Abi>)). If T::size() is larger 3216 than simd_size_v<U, Abi>, the remaining elements are value-initialized. 3217 3218 template <class T, class U, class Abi> 3219 T resizing_simd_cast(const simd_mask<U, Abi>& x) 3220 3221 p1 Constraints: is_simd_mask_v<T> is true 3222 3223 p2 Returns: 3224 A simd_mask object with the i^th element initialized to x[i] for all i in 3225 the range of [0, min(T::size(), simd_size_v<U, Abi>)). If T::size() is larger 3226 than simd_size_v<U, Abi>, the remaining elements are initialized to false. 3227 3228 */ 3229 3230 template <typename _Tp, typename _Up, typename _Ap> 3231 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR enable_if_t< 3232 conjunction_v<is_simd<_Tp>, is_same<typename _Tp::value_type, _Up>>, _Tp> 3233 resizing_simd_cast(const simd<_Up, _Ap>& __x) 3234 { 3235 if constexpr (is_same_v<typename _Tp::abi_type, _Ap>) 3236 return __x; 3237 else if (__builtin_is_constant_evaluated()) 3238 return _Tp([&](auto __i) constexpr { 3239 return __i < simd_size_v<_Up, _Ap> ? __x[__i] : _Up(); 3240 }); 3241 else if constexpr (simd_size_v<_Up, _Ap> == 1) 3242 { 3243 _Tp __r{}; 3244 __r[0] = __x[0]; 3245 return __r; 3246 } 3247 else if constexpr (_Tp::size() == 1) 3248 return __x[0]; 3249 else if constexpr (sizeof(_Tp) == sizeof(__x) 3250 && !__is_fixed_size_abi_v<_Ap>) 3251 return {__private_init, 3252 __vector_bitcast<typename _Tp::value_type, _Tp::size()>( 3253 _Ap::_S_masked(__data(__x))._M_data)}; 3254 else 3255 { 3256 _Tp __r{}; 3257 __builtin_memcpy(&__data(__r), &__data(__x), 3258 sizeof(_Up) 3259 * std::min(_Tp::size(), simd_size_v<_Up, _Ap>)); 3260 return __r; 3261 } 3262 } 3263 3264 template <typename _Tp, typename _Up, typename _Ap> 3265 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR 3266 enable_if_t<is_simd_mask_v<_Tp>, _Tp> 3267 resizing_simd_cast(const simd_mask<_Up, _Ap>& __x) 3268 { 3269 return {__private_init, _Tp::abi_type::_MaskImpl::template _S_convert< 3270 typename _Tp::simd_type::value_type>(__x)}; 3271 } 3272 } // namespace __proposed 3273 3274 // }}} 3275 // to_fixed_size {{{2 3276 template <typename _Tp, int _Np> 3277 _GLIBCXX_SIMD_INTRINSIC fixed_size_simd<_Tp, _Np> 3278 to_fixed_size(const fixed_size_simd<_Tp, _Np>& __x) 3279 { return __x; } 3280 3281 template <typename _Tp, int _Np> 3282 _GLIBCXX_SIMD_INTRINSIC fixed_size_simd_mask<_Tp, _Np> 3283 to_fixed_size(const fixed_size_simd_mask<_Tp, _Np>& __x) 3284 { return __x; } 3285 3286 template <typename _Tp, typename _Ap> 3287 _GLIBCXX_SIMD_INTRINSIC fixed_size_simd<_Tp, simd_size_v<_Tp, _Ap>> 3288 to_fixed_size(const simd<_Tp, _Ap>& __x) 3289 { 3290 using _Rp = fixed_size_simd<_Tp, simd_size_v<_Tp, _Ap>>; 3291 return _Rp([&__x](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { return __x[__i]; }); 3292 } 3293 3294 template <typename _Tp, typename _Ap> 3295 _GLIBCXX_SIMD_INTRINSIC fixed_size_simd_mask<_Tp, simd_size_v<_Tp, _Ap>> 3296 to_fixed_size(const simd_mask<_Tp, _Ap>& __x) 3297 { 3298 return {__private_init, 3299 [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { return __x[__i]; }}; 3300 } 3301 3302 // to_native {{{2 3303 template <typename _Tp, int _Np> 3304 _GLIBCXX_SIMD_INTRINSIC 3305 enable_if_t<(_Np == native_simd<_Tp>::size()), native_simd<_Tp>> 3306 to_native(const fixed_size_simd<_Tp, _Np>& __x) 3307 { 3308 alignas(memory_alignment_v<native_simd<_Tp>>) _Tp __mem[_Np]; 3309 __x.copy_to(__mem, vector_aligned); 3310 return {__mem, vector_aligned}; 3311 } 3312 3313 template <typename _Tp, int _Np> 3314 _GLIBCXX_SIMD_INTRINSIC 3315 enable_if_t<(_Np == native_simd_mask<_Tp>::size()), native_simd_mask<_Tp>> 3316 to_native(const fixed_size_simd_mask<_Tp, _Np>& __x) 3317 { 3318 return native_simd_mask<_Tp>( 3319 __private_init, 3320 [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { return __x[__i]; }); 3321 } 3322 3323 // to_compatible {{{2 3324 template <typename _Tp, int _Np> 3325 _GLIBCXX_SIMD_INTRINSIC enable_if_t<(_Np == simd<_Tp>::size()), simd<_Tp>> 3326 to_compatible(const simd<_Tp, simd_abi::fixed_size<_Np>>& __x) 3327 { 3328 alignas(memory_alignment_v<simd<_Tp>>) _Tp __mem[_Np]; 3329 __x.copy_to(__mem, vector_aligned); 3330 return {__mem, vector_aligned}; 3331 } 3332 3333 template <typename _Tp, int _Np> 3334 _GLIBCXX_SIMD_INTRINSIC 3335 enable_if_t<(_Np == simd_mask<_Tp>::size()), simd_mask<_Tp>> 3336 to_compatible(const simd_mask<_Tp, simd_abi::fixed_size<_Np>>& __x) 3337 { 3338 return simd_mask<_Tp>( 3339 __private_init, 3340 [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { return __x[__i]; }); 3341 } 3342 3343 // masked assignment [simd_mask.where] {{{1 3344 3345 // where_expression {{{1 3346 // const_where_expression<M, T> {{{2 3347 template <typename _M, typename _Tp> 3348 class const_where_expression 3349 { 3350 using _V = _Tp; 3351 static_assert(is_same_v<_V, __remove_cvref_t<_Tp>>); 3352 3353 struct _Wrapper { using value_type = _V; }; 3354 3355 protected: 3356 using _Impl = typename _V::_Impl; 3357 3358 using value_type = 3359 typename conditional_t<is_arithmetic_v<_V>, _Wrapper, _V>::value_type; 3360 3361 _GLIBCXX_SIMD_INTRINSIC friend const _M& 3362 __get_mask(const const_where_expression& __x) 3363 { return __x._M_k; } 3364 3365 _GLIBCXX_SIMD_INTRINSIC friend const _Tp& 3366 __get_lvalue(const const_where_expression& __x) 3367 { return __x._M_value; } 3368 3369 const _M& _M_k; 3370 _Tp& _M_value; 3371 3372 public: 3373 const_where_expression(const const_where_expression&) = delete; 3374 3375 const_where_expression& operator=(const const_where_expression&) = delete; 3376 3377 _GLIBCXX_SIMD_INTRINSIC constexpr 3378 const_where_expression(const _M& __kk, const _Tp& dd) 3379 : _M_k(__kk), _M_value(const_cast<_Tp&>(dd)) {} 3380 3381 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR _V 3382 operator-() const&& 3383 { 3384 return {__private_init, 3385 _Impl::template _S_masked_unary<negate>(__data(_M_k), 3386 __data(_M_value))}; 3387 } 3388 3389 template <typename _Up, typename _Flags> 3390 [[nodiscard]] _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR _V 3391 copy_from(const _LoadStorePtr<_Up, value_type>* __mem, _IsSimdFlagType<_Flags>) const&& 3392 { 3393 return {__private_init, 3394 _Impl::_S_masked_load(__data(_M_value), __data(_M_k), 3395 _Flags::template _S_apply<_V>(__mem))}; 3396 } 3397 3398 template <typename _Up, typename _Flags> 3399 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR void 3400 copy_to(_LoadStorePtr<_Up, value_type>* __mem, _IsSimdFlagType<_Flags>) const&& 3401 { 3402 _Impl::_S_masked_store(__data(_M_value), 3403 _Flags::template _S_apply<_V>(__mem), 3404 __data(_M_k)); 3405 } 3406 }; 3407 3408 // const_where_expression<bool, T> {{{2 3409 template <typename _Tp> 3410 class const_where_expression<bool, _Tp> 3411 { 3412 using _M = bool; 3413 using _V = _Tp; 3414 3415 static_assert(is_same_v<_V, __remove_cvref_t<_Tp>>); 3416 3417 struct _Wrapper { using value_type = _V; }; 3418 3419 protected: 3420 using value_type 3421 = typename conditional_t<is_arithmetic_v<_V>, _Wrapper, _V>::value_type; 3422 3423 _GLIBCXX_SIMD_INTRINSIC friend const _M& 3424 __get_mask(const const_where_expression& __x) 3425 { return __x._M_k; } 3426 3427 _GLIBCXX_SIMD_INTRINSIC friend const _Tp& 3428 __get_lvalue(const const_where_expression& __x) 3429 { return __x._M_value; } 3430 3431 const bool _M_k; 3432 _Tp& _M_value; 3433 3434 public: 3435 const_where_expression(const const_where_expression&) = delete; 3436 const_where_expression& operator=(const const_where_expression&) = delete; 3437 3438 _GLIBCXX_SIMD_INTRINSIC constexpr 3439 const_where_expression(const bool __kk, const _Tp& dd) 3440 : _M_k(__kk), _M_value(const_cast<_Tp&>(dd)) {} 3441 3442 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR _V 3443 operator-() const&& 3444 { return _M_k ? -_M_value : _M_value; } 3445 3446 template <typename _Up, typename _Flags> 3447 [[nodiscard]] _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR _V 3448 copy_from(const _LoadStorePtr<_Up, value_type>* __mem, _IsSimdFlagType<_Flags>) const&& 3449 { return _M_k ? static_cast<_V>(__mem[0]) : _M_value; } 3450 3451 template <typename _Up, typename _Flags> 3452 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR void 3453 copy_to(_LoadStorePtr<_Up, value_type>* __mem, _IsSimdFlagType<_Flags>) const&& 3454 { 3455 if (_M_k) 3456 __mem[0] = _M_value; 3457 } 3458 }; 3459 3460 // where_expression<M, T> {{{2 3461 template <typename _M, typename _Tp> 3462 class where_expression : public const_where_expression<_M, _Tp> 3463 { 3464 using _Impl = typename const_where_expression<_M, _Tp>::_Impl; 3465 3466 static_assert(!is_const<_Tp>::value, 3467 "where_expression may only be instantiated with __a non-const " 3468 "_Tp parameter"); 3469 3470 using typename const_where_expression<_M, _Tp>::value_type; 3471 using const_where_expression<_M, _Tp>::_M_k; 3472 using const_where_expression<_M, _Tp>::_M_value; 3473 3474 static_assert( 3475 is_same<typename _M::abi_type, typename _Tp::abi_type>::value, ""); 3476 static_assert(_M::size() == _Tp::size(), ""); 3477 3478 _GLIBCXX_SIMD_INTRINSIC friend constexpr _Tp& 3479 __get_lvalue(where_expression& __x) 3480 { return __x._M_value; } 3481 3482 public: 3483 where_expression(const where_expression&) = delete; 3484 where_expression& operator=(const where_expression&) = delete; 3485 3486 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR 3487 where_expression(const _M& __kk, _Tp& dd) 3488 : const_where_expression<_M, _Tp>(__kk, dd) {} 3489 3490 template <typename _Up> 3491 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR void 3492 operator=(_Up&& __x) && 3493 { 3494 _Impl::_S_masked_assign(__data(_M_k), __data(_M_value), 3495 __to_value_type_or_member_type<_Tp>( 3496 static_cast<_Up&&>(__x))); 3497 } 3498 3499 #define _GLIBCXX_SIMD_OP_(__op, __name) \ 3500 template <typename _Up> \ 3501 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR void \ 3502 operator __op##=(_Up&& __x)&& \ 3503 { \ 3504 _Impl::template _S_masked_cassign( \ 3505 __data(_M_k), __data(_M_value), \ 3506 __to_value_type_or_member_type<_Tp>(static_cast<_Up&&>(__x)), \ 3507 [](auto __impl, auto __lhs, auto __rhs) \ 3508 constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA \ 3509 { return __impl.__name(__lhs, __rhs); }); \ 3510 } \ 3511 static_assert(true) 3512 _GLIBCXX_SIMD_OP_(+, _S_plus); 3513 _GLIBCXX_SIMD_OP_(-, _S_minus); 3514 _GLIBCXX_SIMD_OP_(*, _S_multiplies); 3515 _GLIBCXX_SIMD_OP_(/, _S_divides); 3516 _GLIBCXX_SIMD_OP_(%, _S_modulus); 3517 _GLIBCXX_SIMD_OP_(&, _S_bit_and); 3518 _GLIBCXX_SIMD_OP_(|, _S_bit_or); 3519 _GLIBCXX_SIMD_OP_(^, _S_bit_xor); 3520 _GLIBCXX_SIMD_OP_(<<, _S_shift_left); 3521 _GLIBCXX_SIMD_OP_(>>, _S_shift_right); 3522 #undef _GLIBCXX_SIMD_OP_ 3523 3524 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR void 3525 operator++() && 3526 { 3527 __data(_M_value) 3528 = _Impl::template _S_masked_unary<__increment>(__data(_M_k), __data(_M_value)); 3529 } 3530 3531 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR void 3532 operator++(int) && 3533 { 3534 __data(_M_value) 3535 = _Impl::template _S_masked_unary<__increment>(__data(_M_k), __data(_M_value)); 3536 } 3537 3538 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR void 3539 operator--() && 3540 { 3541 __data(_M_value) 3542 = _Impl::template _S_masked_unary<__decrement>(__data(_M_k), __data(_M_value)); 3543 } 3544 3545 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR void 3546 operator--(int) && 3547 { 3548 __data(_M_value) 3549 = _Impl::template _S_masked_unary<__decrement>(__data(_M_k), __data(_M_value)); 3550 } 3551 3552 // intentionally hides const_where_expression::copy_from 3553 template <typename _Up, typename _Flags> 3554 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR void 3555 copy_from(const _LoadStorePtr<_Up, value_type>* __mem, _IsSimdFlagType<_Flags>) && 3556 { 3557 __data(_M_value) = _Impl::_S_masked_load(__data(_M_value), __data(_M_k), 3558 _Flags::template _S_apply<_Tp>(__mem)); 3559 } 3560 }; 3561 3562 // where_expression<bool, T> {{{2 3563 template <typename _Tp> 3564 class where_expression<bool, _Tp> 3565 : public const_where_expression<bool, _Tp> 3566 { 3567 using _M = bool; 3568 using typename const_where_expression<_M, _Tp>::value_type; 3569 using const_where_expression<_M, _Tp>::_M_k; 3570 using const_where_expression<_M, _Tp>::_M_value; 3571 3572 public: 3573 where_expression(const where_expression&) = delete; 3574 where_expression& operator=(const where_expression&) = delete; 3575 3576 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR 3577 where_expression(const _M& __kk, _Tp& dd) 3578 : const_where_expression<_M, _Tp>(__kk, dd) {} 3579 3580 #define _GLIBCXX_SIMD_OP_(__op) \ 3581 template <typename _Up> \ 3582 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR void \ 3583 operator __op(_Up&& __x)&& \ 3584 { if (_M_k) _M_value __op static_cast<_Up&&>(__x); } 3585 3586 _GLIBCXX_SIMD_OP_(=) 3587 _GLIBCXX_SIMD_OP_(+=) 3588 _GLIBCXX_SIMD_OP_(-=) 3589 _GLIBCXX_SIMD_OP_(*=) 3590 _GLIBCXX_SIMD_OP_(/=) 3591 _GLIBCXX_SIMD_OP_(%=) 3592 _GLIBCXX_SIMD_OP_(&=) 3593 _GLIBCXX_SIMD_OP_(|=) 3594 _GLIBCXX_SIMD_OP_(^=) 3595 _GLIBCXX_SIMD_OP_(<<=) 3596 _GLIBCXX_SIMD_OP_(>>=) 3597 #undef _GLIBCXX_SIMD_OP_ 3598 3599 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR void 3600 operator++() && 3601 { if (_M_k) ++_M_value; } 3602 3603 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR void 3604 operator++(int) && 3605 { if (_M_k) ++_M_value; } 3606 3607 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR void 3608 operator--() && 3609 { if (_M_k) --_M_value; } 3610 3611 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR void 3612 operator--(int) && 3613 { if (_M_k) --_M_value; } 3614 3615 // intentionally hides const_where_expression::copy_from 3616 template <typename _Up, typename _Flags> 3617 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR void 3618 copy_from(const _LoadStorePtr<_Up, value_type>* __mem, _IsSimdFlagType<_Flags>) && 3619 { if (_M_k) _M_value = __mem[0]; } 3620 }; 3621 3622 // where {{{1 3623 template <typename _Tp, typename _Ap> 3624 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR 3625 where_expression<simd_mask<_Tp, _Ap>, simd<_Tp, _Ap>> 3626 where(const typename simd<_Tp, _Ap>::mask_type& __k, simd<_Tp, _Ap>& __value) 3627 { return {__k, __value}; } 3628 3629 template <typename _Tp, typename _Ap> 3630 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR 3631 const_where_expression<simd_mask<_Tp, _Ap>, simd<_Tp, _Ap>> 3632 where(const typename simd<_Tp, _Ap>::mask_type& __k, const simd<_Tp, _Ap>& __value) 3633 { return {__k, __value}; } 3634 3635 template <typename _Tp, typename _Ap> 3636 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR 3637 where_expression<simd_mask<_Tp, _Ap>, simd_mask<_Tp, _Ap>> 3638 where(const remove_const_t<simd_mask<_Tp, _Ap>>& __k, simd_mask<_Tp, _Ap>& __value) 3639 { return {__k, __value}; } 3640 3641 template <typename _Tp, typename _Ap> 3642 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR 3643 const_where_expression<simd_mask<_Tp, _Ap>, simd_mask<_Tp, _Ap>> 3644 where(const remove_const_t<simd_mask<_Tp, _Ap>>& __k, const simd_mask<_Tp, _Ap>& __value) 3645 { return {__k, __value}; } 3646 3647 template <typename _Tp> 3648 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR where_expression<bool, _Tp> 3649 where(_ExactBool __k, _Tp& __value) 3650 { return {__k, __value}; } 3651 3652 template <typename _Tp> 3653 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR const_where_expression<bool, _Tp> 3654 where(_ExactBool __k, const _Tp& __value) 3655 { return {__k, __value}; } 3656 3657 template <typename _Tp, typename _Ap> 3658 _GLIBCXX_SIMD_CONSTEXPR void 3659 where(bool __k, simd<_Tp, _Ap>& __value) = delete; 3660 3661 template <typename _Tp, typename _Ap> 3662 _GLIBCXX_SIMD_CONSTEXPR void 3663 where(bool __k, const simd<_Tp, _Ap>& __value) = delete; 3664 3665 // proposed mask iterations {{{1 3666 namespace __proposed { 3667 template <size_t _Np> 3668 class where_range 3669 { 3670 const bitset<_Np> __bits; 3671 3672 public: 3673 where_range(bitset<_Np> __b) : __bits(__b) {} 3674 3675 class iterator 3676 { 3677 size_t __mask; 3678 size_t __bit; 3679 3680 _GLIBCXX_SIMD_INTRINSIC void 3681 __next_bit() 3682 { __bit = __builtin_ctzl(__mask); } 3683 3684 _GLIBCXX_SIMD_INTRINSIC void 3685 __reset_lsb() 3686 { 3687 // 01100100 - 1 = 01100011 3688 __mask &= (__mask - 1); 3689 // __asm__("btr %1,%0" : "+r"(__mask) : "r"(__bit)); 3690 } 3691 3692 public: 3693 iterator(decltype(__mask) __m) : __mask(__m) { __next_bit(); } 3694 iterator(const iterator&) = default; 3695 iterator(iterator&&) = default; 3696 3697 _GLIBCXX_SIMD_ALWAYS_INLINE size_t 3698 operator->() const 3699 { return __bit; } 3700 3701 _GLIBCXX_SIMD_ALWAYS_INLINE size_t 3702 operator*() const 3703 { return __bit; } 3704 3705 _GLIBCXX_SIMD_ALWAYS_INLINE iterator& 3706 operator++() 3707 { 3708 __reset_lsb(); 3709 __next_bit(); 3710 return *this; 3711 } 3712 3713 _GLIBCXX_SIMD_ALWAYS_INLINE iterator 3714 operator++(int) 3715 { 3716 iterator __tmp = *this; 3717 __reset_lsb(); 3718 __next_bit(); 3719 return __tmp; 3720 } 3721 3722 _GLIBCXX_SIMD_ALWAYS_INLINE bool 3723 operator==(const iterator& __rhs) const 3724 { return __mask == __rhs.__mask; } 3725 3726 _GLIBCXX_SIMD_ALWAYS_INLINE bool 3727 operator!=(const iterator& __rhs) const 3728 { return __mask != __rhs.__mask; } 3729 }; 3730 3731 iterator 3732 begin() const 3733 { return __bits.to_ullong(); } 3734 3735 iterator 3736 end() const 3737 { return 0; } 3738 }; 3739 3740 template <typename _Tp, typename _Ap> 3741 where_range<simd_size_v<_Tp, _Ap>> 3742 where(const simd_mask<_Tp, _Ap>& __k) 3743 { return __k.__to_bitset(); } 3744 3745 } // namespace __proposed 3746 3747 // }}}1 3748 // reductions [simd.reductions] {{{1 3749 template <typename _Tp, typename _Abi, typename _BinaryOperation = plus<>> 3750 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR _Tp 3751 reduce(const simd<_Tp, _Abi>& __v, _BinaryOperation __binary_op = _BinaryOperation()) 3752 { return _Abi::_SimdImpl::_S_reduce(__v, __binary_op); } 3753 3754 template <typename _M, typename _V, typename _BinaryOperation = plus<>> 3755 _GLIBCXX_SIMD_INTRINSIC typename _V::value_type 3756 reduce(const const_where_expression<_M, _V>& __x, 3757 typename _V::value_type __identity_element, _BinaryOperation __binary_op) 3758 { 3759 if (__builtin_expect(none_of(__get_mask(__x)), false)) 3760 return __identity_element; 3761 3762 _V __tmp = __identity_element; 3763 _V::_Impl::_S_masked_assign(__data(__get_mask(__x)), __data(__tmp), 3764 __data(__get_lvalue(__x))); 3765 return reduce(__tmp, __binary_op); 3766 } 3767 3768 template <typename _M, typename _V> 3769 _GLIBCXX_SIMD_INTRINSIC typename _V::value_type 3770 reduce(const const_where_expression<_M, _V>& __x, plus<> __binary_op = {}) 3771 { return reduce(__x, 0, __binary_op); } 3772 3773 template <typename _M, typename _V> 3774 _GLIBCXX_SIMD_INTRINSIC typename _V::value_type 3775 reduce(const const_where_expression<_M, _V>& __x, multiplies<> __binary_op) 3776 { return reduce(__x, 1, __binary_op); } 3777 3778 template <typename _M, typename _V> 3779 _GLIBCXX_SIMD_INTRINSIC typename _V::value_type 3780 reduce(const const_where_expression<_M, _V>& __x, bit_and<> __binary_op) 3781 { return reduce(__x, ~typename _V::value_type(), __binary_op); } 3782 3783 template <typename _M, typename _V> 3784 _GLIBCXX_SIMD_INTRINSIC typename _V::value_type 3785 reduce(const const_where_expression<_M, _V>& __x, bit_or<> __binary_op) 3786 { return reduce(__x, 0, __binary_op); } 3787 3788 template <typename _M, typename _V> 3789 _GLIBCXX_SIMD_INTRINSIC typename _V::value_type 3790 reduce(const const_where_expression<_M, _V>& __x, bit_xor<> __binary_op) 3791 { return reduce(__x, 0, __binary_op); } 3792 3793 template <typename _Tp, typename _Abi> 3794 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR _Tp 3795 hmin(const simd<_Tp, _Abi>& __v) noexcept 3796 { return _Abi::_SimdImpl::_S_reduce(__v, __detail::_Minimum()); } 3797 3798 template <typename _Tp, typename _Abi> 3799 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR _Tp 3800 hmax(const simd<_Tp, _Abi>& __v) noexcept 3801 { return _Abi::_SimdImpl::_S_reduce(__v, __detail::_Maximum()); } 3802 3803 template <typename _M, typename _V> 3804 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR 3805 typename _V::value_type 3806 hmin(const const_where_expression<_M, _V>& __x) noexcept 3807 { 3808 using _Tp = typename _V::value_type; 3809 constexpr _Tp __id_elem = 3810 #ifdef __FINITE_MATH_ONLY__ 3811 __finite_max_v<_Tp>; 3812 #else 3813 __value_or<__infinity, _Tp>(__finite_max_v<_Tp>); 3814 #endif 3815 _V __tmp = __id_elem; 3816 _V::_Impl::_S_masked_assign(__data(__get_mask(__x)), __data(__tmp), 3817 __data(__get_lvalue(__x))); 3818 return _V::abi_type::_SimdImpl::_S_reduce(__tmp, __detail::_Minimum()); 3819 } 3820 3821 template <typename _M, typename _V> 3822 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR 3823 typename _V::value_type 3824 hmax(const const_where_expression<_M, _V>& __x) noexcept 3825 { 3826 using _Tp = typename _V::value_type; 3827 constexpr _Tp __id_elem = 3828 #ifdef __FINITE_MATH_ONLY__ 3829 __finite_min_v<_Tp>; 3830 #else 3831 [] { 3832 if constexpr (__value_exists_v<__infinity, _Tp>) 3833 return -__infinity_v<_Tp>; 3834 else 3835 return __finite_min_v<_Tp>; 3836 }(); 3837 #endif 3838 _V __tmp = __id_elem; 3839 _V::_Impl::_S_masked_assign(__data(__get_mask(__x)), __data(__tmp), 3840 __data(__get_lvalue(__x))); 3841 return _V::abi_type::_SimdImpl::_S_reduce(__tmp, __detail::_Maximum()); 3842 } 3843 3844 // }}}1 3845 // algorithms [simd.alg] {{{ 3846 template <typename _Tp, typename _Ap> 3847 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR simd<_Tp, _Ap> 3848 min(const simd<_Tp, _Ap>& __a, const simd<_Tp, _Ap>& __b) 3849 { return {__private_init, _Ap::_SimdImpl::_S_min(__data(__a), __data(__b))}; } 3850 3851 template <typename _Tp, typename _Ap> 3852 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR simd<_Tp, _Ap> 3853 max(const simd<_Tp, _Ap>& __a, const simd<_Tp, _Ap>& __b) 3854 { return {__private_init, _Ap::_SimdImpl::_S_max(__data(__a), __data(__b))}; } 3855 3856 template <typename _Tp, typename _Ap> 3857 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR 3858 pair<simd<_Tp, _Ap>, simd<_Tp, _Ap>> 3859 minmax(const simd<_Tp, _Ap>& __a, const simd<_Tp, _Ap>& __b) 3860 { 3861 const auto pair_of_members 3862 = _Ap::_SimdImpl::_S_minmax(__data(__a), __data(__b)); 3863 return {simd<_Tp, _Ap>(__private_init, pair_of_members.first), 3864 simd<_Tp, _Ap>(__private_init, pair_of_members.second)}; 3865 } 3866 3867 template <typename _Tp, typename _Ap> 3868 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR simd<_Tp, _Ap> 3869 clamp(const simd<_Tp, _Ap>& __v, const simd<_Tp, _Ap>& __lo, const simd<_Tp, _Ap>& __hi) 3870 { 3871 using _Impl = typename _Ap::_SimdImpl; 3872 return {__private_init, 3873 _Impl::_S_min(__data(__hi), 3874 _Impl::_S_max(__data(__lo), __data(__v)))}; 3875 } 3876 3877 // }}} 3878 3879 template <size_t... _Sizes, typename _Tp, typename _Ap, 3880 typename = enable_if_t<((_Sizes + ...) == simd<_Tp, _Ap>::size())>> 3881 inline tuple<simd<_Tp, simd_abi::deduce_t<_Tp, _Sizes>>...> 3882 split(const simd<_Tp, _Ap>&); 3883 3884 // __extract_part {{{ 3885 template <int _Index, int _Total, int _Combine = 1, typename _Tp, size_t _Np> 3886 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_CONST constexpr 3887 _SimdWrapper<_Tp, _Np / _Total * _Combine> 3888 __extract_part(const _SimdWrapper<_Tp, _Np> __x); 3889 3890 template <int _Index, int _Parts, int _Combine = 1, typename _Tp, typename _A0, typename... _As> 3891 _GLIBCXX_SIMD_INTRINSIC constexpr auto 3892 __extract_part(const _SimdTuple<_Tp, _A0, _As...>& __x); 3893 3894 // }}} 3895 // _SizeList {{{ 3896 template <size_t _V0, size_t... _Values> 3897 struct _SizeList 3898 { 3899 template <size_t _I> 3900 static constexpr size_t 3901 _S_at(_SizeConstant<_I> = {}) 3902 { 3903 if constexpr (_I == 0) 3904 return _V0; 3905 else 3906 return _SizeList<_Values...>::template _S_at<_I - 1>(); 3907 } 3908 3909 template <size_t _I> 3910 static constexpr auto 3911 _S_before(_SizeConstant<_I> = {}) 3912 { 3913 if constexpr (_I == 0) 3914 return _SizeConstant<0>(); 3915 else 3916 return _SizeConstant< 3917 _V0 + _SizeList<_Values...>::template _S_before<_I - 1>()>(); 3918 } 3919 3920 template <size_t _Np> 3921 static constexpr auto 3922 _S_pop_front(_SizeConstant<_Np> = {}) 3923 { 3924 if constexpr (_Np == 0) 3925 return _SizeList(); 3926 else 3927 return _SizeList<_Values...>::template _S_pop_front<_Np - 1>(); 3928 } 3929 }; 3930 3931 // }}} 3932 // __extract_center {{{ 3933 template <typename _Tp, size_t _Np> 3934 _GLIBCXX_SIMD_INTRINSIC _SimdWrapper<_Tp, _Np / 2> 3935 __extract_center(_SimdWrapper<_Tp, _Np> __x) 3936 { 3937 static_assert(_Np >= 4); 3938 static_assert(_Np % 4 == 0); // x0 - x1 - x2 - x3 -> return {x1, x2} 3939 #if _GLIBCXX_SIMD_X86INTRIN // {{{ 3940 if constexpr (__have_avx512f && sizeof(_Tp) * _Np == 64) 3941 { 3942 const auto __intrin = __to_intrin(__x); 3943 if constexpr (is_integral_v<_Tp>) 3944 return __vector_bitcast<_Tp>(_mm512_castsi512_si256( 3945 _mm512_shuffle_i32x4(__intrin, __intrin, 3946 1 + 2 * 0x4 + 2 * 0x10 + 3 * 0x40))); 3947 else if constexpr (sizeof(_Tp) == 4) 3948 return __vector_bitcast<_Tp>(_mm512_castps512_ps256( 3949 _mm512_shuffle_f32x4(__intrin, __intrin, 3950 1 + 2 * 0x4 + 2 * 0x10 + 3 * 0x40))); 3951 else if constexpr (sizeof(_Tp) == 8) 3952 return __vector_bitcast<_Tp>(_mm512_castpd512_pd256( 3953 _mm512_shuffle_f64x2(__intrin, __intrin, 3954 1 + 2 * 0x4 + 2 * 0x10 + 3 * 0x40))); 3955 else 3956 __assert_unreachable<_Tp>(); 3957 } 3958 else if constexpr (sizeof(_Tp) * _Np == 32 && is_floating_point_v<_Tp>) 3959 return __vector_bitcast<_Tp>( 3960 _mm_shuffle_pd(__lo128(__vector_bitcast<double>(__x)), 3961 __hi128(__vector_bitcast<double>(__x)), 1)); 3962 else if constexpr (sizeof(__x) == 32 && sizeof(_Tp) * _Np <= 32) 3963 return __vector_bitcast<_Tp>( 3964 _mm_alignr_epi8(__hi128(__vector_bitcast<_LLong>(__x)), 3965 __lo128(__vector_bitcast<_LLong>(__x)), 3966 sizeof(_Tp) * _Np / 4)); 3967 else 3968 #endif // _GLIBCXX_SIMD_X86INTRIN }}} 3969 { 3970 __vector_type_t<_Tp, _Np / 2> __r; 3971 __builtin_memcpy(&__r, 3972 reinterpret_cast<const char*>(&__x) 3973 + sizeof(_Tp) * _Np / 4, 3974 sizeof(_Tp) * _Np / 2); 3975 return __r; 3976 } 3977 } 3978 3979 template <typename _Tp, typename _A0, typename... _As> 3980 _GLIBCXX_SIMD_INTRINSIC 3981 _SimdWrapper<_Tp, _SimdTuple<_Tp, _A0, _As...>::_S_size() / 2> 3982 __extract_center(const _SimdTuple<_Tp, _A0, _As...>& __x) 3983 { 3984 if constexpr (sizeof...(_As) == 0) 3985 return __extract_center(__x.first); 3986 else 3987 return __extract_part<1, 4, 2>(__x); 3988 } 3989 3990 // }}} 3991 // __split_wrapper {{{ 3992 template <size_t... _Sizes, typename _Tp, typename... _As> 3993 auto 3994 __split_wrapper(_SizeList<_Sizes...>, const _SimdTuple<_Tp, _As...>& __x) 3995 { 3996 return split<_Sizes...>( 3997 fixed_size_simd<_Tp, _SimdTuple<_Tp, _As...>::_S_size()>(__private_init, 3998 __x)); 3999 } 4000 4001 // }}} 4002 4003 // split<simd>(simd) {{{ 4004 template <typename _V, typename _Ap, 4005 size_t _Parts = simd_size_v<typename _V::value_type, _Ap> / _V::size()> 4006 enable_if_t<simd_size_v<typename _V::value_type, _Ap> == _Parts * _V::size() 4007 && is_simd_v<_V>, array<_V, _Parts>> 4008 split(const simd<typename _V::value_type, _Ap>& __x) 4009 { 4010 using _Tp = typename _V::value_type; 4011 if constexpr (_Parts == 1) 4012 { 4013 return {simd_cast<_V>(__x)}; 4014 } 4015 else if (__x._M_is_constprop()) 4016 { 4017 return __generate_from_n_evaluations<_Parts, array<_V, _Parts>>( 4018 [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { 4019 return _V([&](auto __j) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA 4020 { return __x[__i * _V::size() + __j]; }); 4021 }); 4022 } 4023 else if constexpr ( 4024 __is_fixed_size_abi_v<_Ap> 4025 && (is_same_v<typename _V::abi_type, simd_abi::scalar> 4026 || (__is_fixed_size_abi_v<typename _V::abi_type> 4027 && sizeof(_V) == sizeof(_Tp) * _V::size() // _V doesn't have padding 4028 ))) 4029 { 4030 // fixed_size -> fixed_size (w/o padding) or scalar 4031 #ifdef _GLIBCXX_SIMD_USE_ALIASING_LOADS 4032 const __may_alias<_Tp>* const __element_ptr 4033 = reinterpret_cast<const __may_alias<_Tp>*>(&__data(__x)); 4034 return __generate_from_n_evaluations<_Parts, array<_V, _Parts>>( 4035 [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA 4036 { return _V(__element_ptr + __i * _V::size(), vector_aligned); }); 4037 #else 4038 const auto& __xx = __data(__x); 4039 return __generate_from_n_evaluations<_Parts, array<_V, _Parts>>( 4040 [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { 4041 [[maybe_unused]] constexpr size_t __offset 4042 = decltype(__i)::value * _V::size(); 4043 return _V([&](auto __j) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { 4044 constexpr _SizeConstant<__j + __offset> __k; 4045 return __xx[__k]; 4046 }); 4047 }); 4048 #endif 4049 } 4050 else if constexpr (is_same_v<typename _V::abi_type, simd_abi::scalar>) 4051 { 4052 // normally memcpy should work here as well 4053 return __generate_from_n_evaluations<_Parts, array<_V, _Parts>>( 4054 [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { return __x[__i]; }); 4055 } 4056 else 4057 { 4058 return __generate_from_n_evaluations<_Parts, array<_V, _Parts>>( 4059 [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { 4060 if constexpr (__is_fixed_size_abi_v<typename _V::abi_type>) 4061 return _V([&](auto __j) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { 4062 return __x[__i * _V::size() + __j]; 4063 }); 4064 else 4065 return _V(__private_init, 4066 __extract_part<decltype(__i)::value, _Parts>(__data(__x))); 4067 }); 4068 } 4069 } 4070 4071 // }}} 4072 // split<simd_mask>(simd_mask) {{{ 4073 template <typename _V, typename _Ap, 4074 size_t _Parts = simd_size_v<typename _V::simd_type::value_type, _Ap> / _V::size()> 4075 enable_if_t<is_simd_mask_v<_V> && simd_size_v<typename 4076 _V::simd_type::value_type, _Ap> == _Parts * _V::size(), array<_V, _Parts>> 4077 split(const simd_mask<typename _V::simd_type::value_type, _Ap>& __x) 4078 { 4079 if constexpr (is_same_v<_Ap, typename _V::abi_type>) 4080 return {__x}; 4081 else if constexpr (_Parts == 1) 4082 return {__proposed::static_simd_cast<_V>(__x)}; 4083 else if constexpr (_Parts == 2 && __is_sse_abi<typename _V::abi_type>() 4084 && __is_avx_abi<_Ap>()) 4085 return {_V(__private_init, __lo128(__data(__x))), 4086 _V(__private_init, __hi128(__data(__x)))}; 4087 else if constexpr (_V::size() <= __CHAR_BIT__ * sizeof(_ULLong)) 4088 { 4089 const bitset __bits = __x.__to_bitset(); 4090 return __generate_from_n_evaluations<_Parts, array<_V, _Parts>>( 4091 [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { 4092 constexpr size_t __offset = __i * _V::size(); 4093 return _V(__bitset_init, (__bits >> __offset).to_ullong()); 4094 }); 4095 } 4096 else 4097 { 4098 return __generate_from_n_evaluations<_Parts, array<_V, _Parts>>( 4099 [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { 4100 constexpr size_t __offset = __i * _V::size(); 4101 return _V(__private_init, 4102 [&](auto __j) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { 4103 return __x[__j + __offset]; 4104 }); 4105 }); 4106 } 4107 } 4108 4109 // }}} 4110 // split<_Sizes...>(simd) {{{ 4111 template <size_t... _Sizes, typename _Tp, typename _Ap, typename> 4112 _GLIBCXX_SIMD_ALWAYS_INLINE 4113 tuple<simd<_Tp, simd_abi::deduce_t<_Tp, _Sizes>>...> 4114 split(const simd<_Tp, _Ap>& __x) 4115 { 4116 using _SL = _SizeList<_Sizes...>; 4117 using _Tuple = tuple<__deduced_simd<_Tp, _Sizes>...>; 4118 constexpr size_t _Np = simd_size_v<_Tp, _Ap>; 4119 constexpr size_t _N0 = _SL::template _S_at<0>(); 4120 using _V = __deduced_simd<_Tp, _N0>; 4121 4122 if (__x._M_is_constprop()) 4123 return __generate_from_n_evaluations<sizeof...(_Sizes), _Tuple>( 4124 [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { 4125 using _Vi = __deduced_simd<_Tp, _SL::_S_at(__i)>; 4126 constexpr size_t __offset = _SL::_S_before(__i); 4127 return _Vi([&](auto __j) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { 4128 return __x[__offset + __j]; 4129 }); 4130 }); 4131 else if constexpr (_Np == _N0) 4132 { 4133 static_assert(sizeof...(_Sizes) == 1); 4134 return {simd_cast<_V>(__x)}; 4135 } 4136 else if constexpr // split from fixed_size, such that __x::first.size == _N0 4137 (__is_fixed_size_abi_v< 4138 _Ap> && __fixed_size_storage_t<_Tp, _Np>::_S_first_size == _N0) 4139 { 4140 static_assert( 4141 !__is_fixed_size_abi_v<typename _V::abi_type>, 4142 "How can <_Tp, _Np> be __a single _SimdTuple entry but __a " 4143 "fixed_size_simd " 4144 "when deduced?"); 4145 // extract first and recurse (__split_wrapper is needed to deduce a new 4146 // _Sizes pack) 4147 return tuple_cat(make_tuple(_V(__private_init, __data(__x).first)), 4148 __split_wrapper(_SL::template _S_pop_front<1>(), 4149 __data(__x).second)); 4150 } 4151 else if constexpr ((!is_same_v<simd_abi::scalar, 4152 simd_abi::deduce_t<_Tp, _Sizes>> && ...) 4153 && (!__is_fixed_size_abi_v< 4154 simd_abi::deduce_t<_Tp, _Sizes>> && ...)) 4155 { 4156 if constexpr (((_Sizes * 2 == _Np) && ...)) 4157 return {{__private_init, __extract_part<0, 2>(__data(__x))}, 4158 {__private_init, __extract_part<1, 2>(__data(__x))}}; 4159 else if constexpr (is_same_v<_SizeList<_Sizes...>, 4160 _SizeList<_Np / 3, _Np / 3, _Np / 3>>) 4161 return {{__private_init, __extract_part<0, 3>(__data(__x))}, 4162 {__private_init, __extract_part<1, 3>(__data(__x))}, 4163 {__private_init, __extract_part<2, 3>(__data(__x))}}; 4164 else if constexpr (is_same_v<_SizeList<_Sizes...>, 4165 _SizeList<2 * _Np / 3, _Np / 3>>) 4166 return {{__private_init, __extract_part<0, 3, 2>(__data(__x))}, 4167 {__private_init, __extract_part<2, 3>(__data(__x))}}; 4168 else if constexpr (is_same_v<_SizeList<_Sizes...>, 4169 _SizeList<_Np / 3, 2 * _Np / 3>>) 4170 return {{__private_init, __extract_part<0, 3>(__data(__x))}, 4171 {__private_init, __extract_part<1, 3, 2>(__data(__x))}}; 4172 else if constexpr (is_same_v<_SizeList<_Sizes...>, 4173 _SizeList<_Np / 2, _Np / 4, _Np / 4>>) 4174 return {{__private_init, __extract_part<0, 2>(__data(__x))}, 4175 {__private_init, __extract_part<2, 4>(__data(__x))}, 4176 {__private_init, __extract_part<3, 4>(__data(__x))}}; 4177 else if constexpr (is_same_v<_SizeList<_Sizes...>, 4178 _SizeList<_Np / 4, _Np / 4, _Np / 2>>) 4179 return {{__private_init, __extract_part<0, 4>(__data(__x))}, 4180 {__private_init, __extract_part<1, 4>(__data(__x))}, 4181 {__private_init, __extract_part<1, 2>(__data(__x))}}; 4182 else if constexpr (is_same_v<_SizeList<_Sizes...>, 4183 _SizeList<_Np / 4, _Np / 2, _Np / 4>>) 4184 return {{__private_init, __extract_part<0, 4>(__data(__x))}, 4185 {__private_init, __extract_center(__data(__x))}, 4186 {__private_init, __extract_part<3, 4>(__data(__x))}}; 4187 else if constexpr (((_Sizes * 4 == _Np) && ...)) 4188 return {{__private_init, __extract_part<0, 4>(__data(__x))}, 4189 {__private_init, __extract_part<1, 4>(__data(__x))}, 4190 {__private_init, __extract_part<2, 4>(__data(__x))}, 4191 {__private_init, __extract_part<3, 4>(__data(__x))}}; 4192 // else fall through 4193 } 4194 #ifdef _GLIBCXX_SIMD_USE_ALIASING_LOADS 4195 const __may_alias<_Tp>* const __element_ptr 4196 = reinterpret_cast<const __may_alias<_Tp>*>(&__x); 4197 return __generate_from_n_evaluations<sizeof...(_Sizes), _Tuple>( 4198 [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { 4199 using _Vi = __deduced_simd<_Tp, _SL::_S_at(__i)>; 4200 constexpr size_t __offset = _SL::_S_before(__i); 4201 constexpr size_t __base_align = alignof(simd<_Tp, _Ap>); 4202 constexpr size_t __a 4203 = __base_align - ((__offset * sizeof(_Tp)) % __base_align); 4204 constexpr size_t __b = ((__a - 1) & __a) ^ __a; 4205 constexpr size_t __alignment = __b == 0 ? __a : __b; 4206 return _Vi(__element_ptr + __offset, overaligned<__alignment>); 4207 }); 4208 #else 4209 return __generate_from_n_evaluations<sizeof...(_Sizes), _Tuple>( 4210 [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { 4211 using _Vi = __deduced_simd<_Tp, _SL::_S_at(__i)>; 4212 const auto& __xx = __data(__x); 4213 using _Offset = decltype(_SL::_S_before(__i)); 4214 return _Vi([&](auto __j) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { 4215 constexpr _SizeConstant<_Offset::value + __j> __k; 4216 return __xx[__k]; 4217 }); 4218 }); 4219 #endif 4220 } 4221 4222 // }}} 4223 4224 // __subscript_in_pack {{{ 4225 template <size_t _I, typename _Tp, typename _Ap, typename... _As> 4226 _GLIBCXX_SIMD_INTRINSIC constexpr _Tp 4227 __subscript_in_pack(const simd<_Tp, _Ap>& __x, const simd<_Tp, _As>&... __xs) 4228 { 4229 if constexpr (_I < simd_size_v<_Tp, _Ap>) 4230 return __x[_I]; 4231 else 4232 return __subscript_in_pack<_I - simd_size_v<_Tp, _Ap>>(__xs...); 4233 } 4234 4235 // }}} 4236 // __store_pack_of_simd {{{ 4237 template <typename _Tp, typename _A0, typename... _As> 4238 _GLIBCXX_SIMD_INTRINSIC void 4239 __store_pack_of_simd(char* __mem, const simd<_Tp, _A0>& __x0, const simd<_Tp, _As>&... __xs) 4240 { 4241 constexpr size_t __n_bytes = sizeof(_Tp) * simd_size_v<_Tp, _A0>; 4242 __builtin_memcpy(__mem, &__data(__x0), __n_bytes); 4243 if constexpr (sizeof...(__xs) > 0) 4244 __store_pack_of_simd(__mem + __n_bytes, __xs...); 4245 } 4246 4247 // }}} 4248 // concat(simd...) {{{ 4249 template <typename _Tp, typename... _As, typename = __detail::__odr_helper> 4250 inline _GLIBCXX_SIMD_CONSTEXPR 4251 simd<_Tp, simd_abi::deduce_t<_Tp, (simd_size_v<_Tp, _As> + ...)>> 4252 concat(const simd<_Tp, _As>&... __xs) 4253 { 4254 using _Rp = __deduced_simd<_Tp, (simd_size_v<_Tp, _As> + ...)>; 4255 if constexpr (sizeof...(__xs) == 1) 4256 return simd_cast<_Rp>(__xs...); 4257 else if ((... && __xs._M_is_constprop())) 4258 return simd<_Tp, 4259 simd_abi::deduce_t<_Tp, (simd_size_v<_Tp, _As> + ...)>>( 4260 [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA 4261 { return __subscript_in_pack<__i>(__xs...); }); 4262 else 4263 { 4264 _Rp __r{}; 4265 __store_pack_of_simd(reinterpret_cast<char*>(&__data(__r)), __xs...); 4266 return __r; 4267 } 4268 } 4269 4270 // }}} 4271 // concat(array<simd>) {{{ 4272 template <typename _Tp, typename _Abi, size_t _Np> 4273 _GLIBCXX_SIMD_ALWAYS_INLINE 4274 _GLIBCXX_SIMD_CONSTEXPR __deduced_simd<_Tp, simd_size_v<_Tp, _Abi> * _Np> 4275 concat(const array<simd<_Tp, _Abi>, _Np>& __x) 4276 { 4277 return __call_with_subscripts<_Np>( 4278 __x, [](const auto&... __xs) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { 4279 return concat(__xs...); 4280 }); 4281 } 4282 4283 // }}} 4284 4285 /// @cond undocumented 4286 // _SmartReference {{{ 4287 template <typename _Up, typename _Accessor = _Up, 4288 typename _ValueType = typename _Up::value_type> 4289 class _SmartReference 4290 { 4291 friend _Accessor; 4292 int _M_index; 4293 _Up& _M_obj; 4294 4295 _GLIBCXX_SIMD_INTRINSIC constexpr _ValueType 4296 _M_read() const noexcept 4297 { 4298 if constexpr (is_arithmetic_v<_Up>) 4299 return _M_obj; 4300 else 4301 return _M_obj[_M_index]; 4302 } 4303 4304 template <typename _Tp> 4305 _GLIBCXX_SIMD_INTRINSIC constexpr void 4306 _M_write(_Tp&& __x) const 4307 { _Accessor::_S_set(_M_obj, _M_index, static_cast<_Tp&&>(__x)); } 4308 4309 public: 4310 _GLIBCXX_SIMD_INTRINSIC constexpr 4311 _SmartReference(_Up& __o, int __i) noexcept 4312 : _M_index(__i), _M_obj(__o) {} 4313 4314 using value_type = _ValueType; 4315 4316 _GLIBCXX_SIMD_INTRINSIC 4317 _SmartReference(const _SmartReference&) = delete; 4318 4319 _GLIBCXX_SIMD_INTRINSIC constexpr 4320 operator value_type() const noexcept 4321 { return _M_read(); } 4322 4323 template <typename _Tp, typename = _ValuePreservingOrInt<__remove_cvref_t<_Tp>, value_type>> 4324 _GLIBCXX_SIMD_INTRINSIC constexpr _SmartReference 4325 operator=(_Tp&& __x) && 4326 { 4327 _M_write(static_cast<_Tp&&>(__x)); 4328 return {_M_obj, _M_index}; 4329 } 4330 4331 #define _GLIBCXX_SIMD_OP_(__op) \ 4332 template <typename _Tp, \ 4333 typename _TT = decltype(declval<value_type>() __op declval<_Tp>()), \ 4334 typename = _ValuePreservingOrInt<__remove_cvref_t<_Tp>, _TT>, \ 4335 typename = _ValuePreservingOrInt<_TT, value_type>> \ 4336 _GLIBCXX_SIMD_INTRINSIC constexpr _SmartReference \ 4337 operator __op##=(_Tp&& __x) && \ 4338 { \ 4339 const value_type& __lhs = _M_read(); \ 4340 _M_write(__lhs __op __x); \ 4341 return {_M_obj, _M_index}; \ 4342 } 4343 _GLIBCXX_SIMD_ALL_ARITHMETICS(_GLIBCXX_SIMD_OP_); 4344 _GLIBCXX_SIMD_ALL_SHIFTS(_GLIBCXX_SIMD_OP_); 4345 _GLIBCXX_SIMD_ALL_BINARY(_GLIBCXX_SIMD_OP_); 4346 #undef _GLIBCXX_SIMD_OP_ 4347 4348 template <typename _Tp = void, 4349 typename = decltype(++declval<conditional_t<true, value_type, _Tp>&>())> 4350 _GLIBCXX_SIMD_INTRINSIC constexpr _SmartReference 4351 operator++() && 4352 { 4353 value_type __x = _M_read(); 4354 _M_write(++__x); 4355 return {_M_obj, _M_index}; 4356 } 4357 4358 template <typename _Tp = void, 4359 typename = decltype(declval<conditional_t<true, value_type, _Tp>&>()++)> 4360 _GLIBCXX_SIMD_INTRINSIC constexpr value_type 4361 operator++(int) && 4362 { 4363 const value_type __r = _M_read(); 4364 value_type __x = __r; 4365 _M_write(++__x); 4366 return __r; 4367 } 4368 4369 template <typename _Tp = void, 4370 typename = decltype(--declval<conditional_t<true, value_type, _Tp>&>())> 4371 _GLIBCXX_SIMD_INTRINSIC constexpr _SmartReference 4372 operator--() && 4373 { 4374 value_type __x = _M_read(); 4375 _M_write(--__x); 4376 return {_M_obj, _M_index}; 4377 } 4378 4379 template <typename _Tp = void, 4380 typename = decltype(declval<conditional_t<true, value_type, _Tp>&>()--)> 4381 _GLIBCXX_SIMD_INTRINSIC constexpr value_type 4382 operator--(int) && 4383 { 4384 const value_type __r = _M_read(); 4385 value_type __x = __r; 4386 _M_write(--__x); 4387 return __r; 4388 } 4389 4390 _GLIBCXX_SIMD_INTRINSIC friend void 4391 swap(_SmartReference&& __a, _SmartReference&& __b) noexcept( 4392 conjunction< 4393 is_nothrow_constructible<value_type, _SmartReference&&>, 4394 is_nothrow_assignable<_SmartReference&&, value_type&&>>::value) 4395 { 4396 value_type __tmp = static_cast<_SmartReference&&>(__a); 4397 static_cast<_SmartReference&&>(__a) = static_cast<value_type>(__b); 4398 static_cast<_SmartReference&&>(__b) = std::move(__tmp); 4399 } 4400 4401 _GLIBCXX_SIMD_INTRINSIC friend void 4402 swap(value_type& __a, _SmartReference&& __b) noexcept( 4403 conjunction< 4404 is_nothrow_constructible<value_type, value_type&&>, 4405 is_nothrow_assignable<value_type&, value_type&&>, 4406 is_nothrow_assignable<_SmartReference&&, value_type&&>>::value) 4407 { 4408 value_type __tmp(std::move(__a)); 4409 __a = static_cast<value_type>(__b); 4410 static_cast<_SmartReference&&>(__b) = std::move(__tmp); 4411 } 4412 4413 _GLIBCXX_SIMD_INTRINSIC friend void 4414 swap(_SmartReference&& __a, value_type& __b) noexcept( 4415 conjunction< 4416 is_nothrow_constructible<value_type, _SmartReference&&>, 4417 is_nothrow_assignable<value_type&, value_type&&>, 4418 is_nothrow_assignable<_SmartReference&&, value_type&&>>::value) 4419 { 4420 value_type __tmp(__a); 4421 static_cast<_SmartReference&&>(__a) = std::move(__b); 4422 __b = std::move(__tmp); 4423 } 4424 }; 4425 4426 // }}} 4427 // __scalar_abi_wrapper {{{ 4428 template <int _Bytes> 4429 struct __scalar_abi_wrapper 4430 { 4431 template <typename _Tp> static constexpr size_t _S_full_size = 1; 4432 template <typename _Tp> static constexpr size_t _S_size = 1; 4433 template <typename _Tp> static constexpr size_t _S_is_partial = false; 4434 4435 template <typename _Tp, typename _Abi = simd_abi::scalar> 4436 static constexpr bool _S_is_valid_v 4437 = _Abi::template _IsValid<_Tp>::value && sizeof(_Tp) == _Bytes; 4438 }; 4439 4440 // }}} 4441 // __decay_abi metafunction {{{ 4442 template <typename _Tp> 4443 struct __decay_abi { using type = _Tp; }; 4444 4445 template <int _Bytes> 4446 struct __decay_abi<__scalar_abi_wrapper<_Bytes>> 4447 { using type = simd_abi::scalar; }; 4448 4449 // }}} 4450 // __find_next_valid_abi metafunction {{{1 4451 // Given an ABI tag A<N>, find an N2 < N such that A<N2>::_S_is_valid_v<_Tp> == 4452 // true, N2 is a power-of-2, and A<N2>::_S_is_partial<_Tp> is false. Break 4453 // recursion at 2 elements in the resulting ABI tag. In this case 4454 // type::_S_is_valid_v<_Tp> may be false. 4455 template <template <int> class _Abi, int _Bytes, typename _Tp> 4456 struct __find_next_valid_abi 4457 { 4458 static constexpr auto 4459 _S_choose() 4460 { 4461 constexpr int _NextBytes = std::__bit_ceil(_Bytes) / 2; 4462 using _NextAbi = _Abi<_NextBytes>; 4463 if constexpr (_NextBytes < sizeof(_Tp) * 2) // break recursion 4464 return _Abi<_Bytes>(); 4465 else if constexpr (_NextAbi::template _S_is_partial<_Tp> == false 4466 && _NextAbi::template _S_is_valid_v<_Tp>) 4467 return _NextAbi(); 4468 else 4469 return __find_next_valid_abi<_Abi, _NextBytes, _Tp>::_S_choose(); 4470 } 4471 4472 using type = decltype(_S_choose()); 4473 }; 4474 4475 template <int _Bytes, typename _Tp> 4476 struct __find_next_valid_abi<__scalar_abi_wrapper, _Bytes, _Tp> 4477 { using type = simd_abi::scalar; }; 4478 4479 // _AbiList {{{1 4480 template <template <int> class...> 4481 struct _AbiList 4482 { 4483 template <typename, int> static constexpr bool _S_has_valid_abi = false; 4484 template <typename, int> using _FirstValidAbi = void; 4485 template <typename, int> using _BestAbi = void; 4486 }; 4487 4488 template <template <int> class _A0, template <int> class... _Rest> 4489 struct _AbiList<_A0, _Rest...> 4490 { 4491 template <typename _Tp, int _Np> 4492 static constexpr bool _S_has_valid_abi 4493 = _A0<sizeof(_Tp) * _Np>::template _S_is_valid_v< 4494 _Tp> || _AbiList<_Rest...>::template _S_has_valid_abi<_Tp, _Np>; 4495 4496 template <typename _Tp, int _Np> 4497 using _FirstValidAbi = conditional_t< 4498 _A0<sizeof(_Tp) * _Np>::template _S_is_valid_v<_Tp>, 4499 typename __decay_abi<_A0<sizeof(_Tp) * _Np>>::type, 4500 typename _AbiList<_Rest...>::template _FirstValidAbi<_Tp, _Np>>; 4501 4502 template <typename _Tp, int _Np> 4503 static constexpr auto 4504 _S_determine_best_abi() 4505 { 4506 static_assert(_Np >= 1); 4507 constexpr int _Bytes = sizeof(_Tp) * _Np; 4508 if constexpr (_Np == 1) 4509 return __make_dependent_t<_Tp, simd_abi::scalar>{}; 4510 else 4511 { 4512 constexpr int __fullsize = _A0<_Bytes>::template _S_full_size<_Tp>; 4513 // _A0<_Bytes> is good if: 4514 // 1. The ABI tag is valid for _Tp 4515 // 2. The storage overhead is no more than padding to fill the next 4516 // power-of-2 number of bytes 4517 if constexpr (_A0<_Bytes>::template _S_is_valid_v< 4518 _Tp> && __fullsize / 2 < _Np) 4519 return typename __decay_abi<_A0<_Bytes>>::type{}; 4520 else 4521 { 4522 using _Bp = 4523 typename __find_next_valid_abi<_A0, _Bytes, _Tp>::type; 4524 if constexpr (_Bp::template _S_is_valid_v< 4525 _Tp> && _Bp::template _S_size<_Tp> <= _Np) 4526 return _Bp{}; 4527 else 4528 return 4529 typename _AbiList<_Rest...>::template _BestAbi<_Tp, _Np>{}; 4530 } 4531 } 4532 } 4533 4534 template <typename _Tp, int _Np> 4535 using _BestAbi = decltype(_S_determine_best_abi<_Tp, _Np>()); 4536 }; 4537 4538 // }}}1 4539 4540 // the following lists all native ABIs, which makes them accessible to 4541 // simd_abi::deduce and select_best_vector_type_t (for fixed_size). Order 4542 // matters: Whatever comes first has higher priority. 4543 using _AllNativeAbis = _AbiList<simd_abi::_VecBltnBtmsk, simd_abi::_VecBuiltin, 4544 __scalar_abi_wrapper>; 4545 4546 // valid _SimdTraits specialization {{{1 4547 template <typename _Tp, typename _Abi> 4548 struct _SimdTraits<_Tp, _Abi, void_t<typename _Abi::template _IsValid<_Tp>>> 4549 : _Abi::template __traits<_Tp> {}; 4550 4551 // __deduce_impl specializations {{{1 4552 // try all native ABIs (including scalar) first 4553 template <typename _Tp, size_t _Np> 4554 struct __deduce_impl< 4555 _Tp, _Np, enable_if_t<_AllNativeAbis::template _S_has_valid_abi<_Tp, _Np>>> 4556 { using type = _AllNativeAbis::_FirstValidAbi<_Tp, _Np>; }; 4557 4558 // fall back to fixed_size only if scalar and native ABIs don't match 4559 template <typename _Tp, size_t _Np, typename = void> 4560 struct __deduce_fixed_size_fallback {}; 4561 4562 template <typename _Tp, size_t _Np> 4563 struct __deduce_fixed_size_fallback<_Tp, _Np, 4564 enable_if_t<simd_abi::fixed_size<_Np>::template _S_is_valid_v<_Tp>>> 4565 { using type = simd_abi::fixed_size<_Np>; }; 4566 4567 template <typename _Tp, size_t _Np, typename> 4568 struct __deduce_impl : public __deduce_fixed_size_fallback<_Tp, _Np> {}; 4569 4570 //}}}1 4571 /// @endcond 4572 4573 // simd_mask {{{ 4574 template <typename _Tp, typename _Abi> 4575 class simd_mask : public _SimdTraits<_Tp, _Abi>::_MaskBase 4576 { 4577 // types, tags, and friends {{{ 4578 using _Traits = _SimdTraits<_Tp, _Abi>; 4579 using _MemberType = typename _Traits::_MaskMember; 4580 4581 // We map all masks with equal element sizeof to a single integer type, the 4582 // one given by __int_for_sizeof_t<_Tp>. This is the approach 4583 // [[gnu::vector_size(N)]] types take as well and it reduces the number of 4584 // template specializations in the implementation classes. 4585 using _Ip = __int_for_sizeof_t<_Tp>; 4586 static constexpr _Ip* _S_type_tag = nullptr; 4587 4588 friend typename _Traits::_MaskBase; 4589 friend class simd<_Tp, _Abi>; // to construct masks on return 4590 friend typename _Traits::_SimdImpl; // to construct masks on return and 4591 // inspect data on masked operations 4592 public: 4593 using _Impl = typename _Traits::_MaskImpl; 4594 friend _Impl; 4595 4596 // }}} 4597 // member types {{{ 4598 using value_type = bool; 4599 using reference = _SmartReference<_MemberType, _Impl, value_type>; 4600 using simd_type = simd<_Tp, _Abi>; 4601 using abi_type = _Abi; 4602 4603 // }}} 4604 static constexpr size_t size() // {{{ 4605 { return __size_or_zero_v<_Tp, _Abi>; } 4606 4607 // }}} 4608 // constructors & assignment {{{ 4609 simd_mask() = default; 4610 simd_mask(const simd_mask&) = default; 4611 simd_mask(simd_mask&&) = default; 4612 simd_mask& operator=(const simd_mask&) = default; 4613 simd_mask& operator=(simd_mask&&) = default; 4614 4615 // }}} 4616 // access to internal representation (optional feature) {{{ 4617 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR explicit 4618 simd_mask(typename _Traits::_MaskCastType __init) 4619 : _M_data{__init} {} 4620 // conversions to internal type is done in _MaskBase 4621 4622 // }}} 4623 // bitset interface (extension to be proposed) {{{ 4624 // TS_FEEDBACK: 4625 // Conversion of simd_mask to and from bitset makes it much easier to 4626 // interface with other facilities. I suggest adding `static 4627 // simd_mask::from_bitset` and `simd_mask::to_bitset`. 4628 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR static simd_mask 4629 __from_bitset(bitset<size()> bs) 4630 { return {__bitset_init, bs}; } 4631 4632 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR bitset<size()> 4633 __to_bitset() const 4634 { return _Impl::_S_to_bits(_M_data)._M_to_bitset(); } 4635 4636 // }}} 4637 // explicit broadcast constructor {{{ 4638 _GLIBCXX_SIMD_ALWAYS_INLINE explicit _GLIBCXX_SIMD_CONSTEXPR 4639 simd_mask(value_type __x) 4640 : _M_data(_Impl::template _S_broadcast<_Ip>(__x)) {} 4641 4642 // }}} 4643 // implicit type conversion constructor {{{ 4644 #ifdef _GLIBCXX_SIMD_ENABLE_IMPLICIT_MASK_CAST 4645 // proposed improvement 4646 template <typename _Up, typename _A2, 4647 typename = enable_if_t<simd_size_v<_Up, _A2> == size()>> 4648 _GLIBCXX_SIMD_ALWAYS_INLINE explicit(sizeof(_MemberType) 4649 != sizeof(typename _SimdTraits<_Up, _A2>::_MaskMember)) 4650 simd_mask(const simd_mask<_Up, _A2>& __x) 4651 : simd_mask(__proposed::static_simd_cast<simd_mask>(__x)) {} 4652 #else 4653 // conforming to ISO/IEC 19570:2018 4654 template <typename _Up, typename = enable_if_t<conjunction< 4655 is_same<abi_type, simd_abi::fixed_size<size()>>, 4656 is_same<_Up, _Up>>::value>> 4657 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR 4658 simd_mask(const simd_mask<_Up, simd_abi::fixed_size<size()>>& __x) 4659 : _M_data(_Impl::_S_from_bitmask(__data(__x), _S_type_tag)) {} 4660 #endif 4661 4662 // }}} 4663 // load constructor {{{ 4664 template <typename _Flags> 4665 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR 4666 simd_mask(const value_type* __mem, _IsSimdFlagType<_Flags>) 4667 : _M_data(_Impl::template _S_load<_Ip>(_Flags::template _S_apply<simd_mask>(__mem))) {} 4668 4669 template <typename _Flags> 4670 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR 4671 simd_mask(const value_type* __mem, simd_mask __k, _IsSimdFlagType<_Flags>) 4672 : _M_data{} 4673 { 4674 _M_data = _Impl::_S_masked_load(_M_data, __k._M_data, 4675 _Flags::template _S_apply<simd_mask>(__mem)); 4676 } 4677 4678 // }}} 4679 // loads [simd_mask.load] {{{ 4680 template <typename _Flags> 4681 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR void 4682 copy_from(const value_type* __mem, _IsSimdFlagType<_Flags>) 4683 { _M_data = _Impl::template _S_load<_Ip>(_Flags::template _S_apply<simd_mask>(__mem)); } 4684 4685 // }}} 4686 // stores [simd_mask.store] {{{ 4687 template <typename _Flags> 4688 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR void 4689 copy_to(value_type* __mem, _IsSimdFlagType<_Flags>) const 4690 { _Impl::_S_store(_M_data, _Flags::template _S_apply<simd_mask>(__mem)); } 4691 4692 // }}} 4693 // scalar access {{{ 4694 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR reference 4695 operator[](size_t __i) 4696 { 4697 if (__i >= size()) 4698 __invoke_ub("Subscript %d is out of range [0, %d]", __i, size() - 1); 4699 return {_M_data, int(__i)}; 4700 } 4701 4702 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR value_type 4703 operator[](size_t __i) const 4704 { 4705 if (__i >= size()) 4706 __invoke_ub("Subscript %d is out of range [0, %d]", __i, size() - 1); 4707 if constexpr (__is_scalar_abi<_Abi>()) 4708 return _M_data; 4709 else 4710 return static_cast<bool>(_M_data[__i]); 4711 } 4712 4713 // }}} 4714 // negation {{{ 4715 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR simd_mask 4716 operator!() const 4717 { return {__private_init, _Impl::_S_bit_not(_M_data)}; } 4718 4719 // }}} 4720 // simd_mask binary operators [simd_mask.binary] {{{ 4721 #ifdef _GLIBCXX_SIMD_ENABLE_IMPLICIT_MASK_CAST 4722 // simd_mask<int> && simd_mask<uint> needs disambiguation 4723 template <typename _Up, typename _A2, 4724 typename = enable_if_t<is_convertible_v<simd_mask<_Up, _A2>, simd_mask>>> 4725 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask 4726 operator&&(const simd_mask& __x, const simd_mask<_Up, _A2>& __y) 4727 { 4728 return {__private_init, 4729 _Impl::_S_logical_and(__x._M_data, simd_mask(__y)._M_data)}; 4730 } 4731 4732 template <typename _Up, typename _A2, 4733 typename = enable_if_t<is_convertible_v<simd_mask<_Up, _A2>, simd_mask>>> 4734 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask 4735 operator||(const simd_mask& __x, const simd_mask<_Up, _A2>& __y) 4736 { 4737 return {__private_init, 4738 _Impl::_S_logical_or(__x._M_data, simd_mask(__y)._M_data)}; 4739 } 4740 #endif // _GLIBCXX_SIMD_ENABLE_IMPLICIT_MASK_CAST 4741 4742 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask 4743 operator&&(const simd_mask& __x, const simd_mask& __y) 4744 { return {__private_init, _Impl::_S_logical_and(__x._M_data, __y._M_data)}; } 4745 4746 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask 4747 operator||(const simd_mask& __x, const simd_mask& __y) 4748 { return {__private_init, _Impl::_S_logical_or(__x._M_data, __y._M_data)}; } 4749 4750 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask 4751 operator&(const simd_mask& __x, const simd_mask& __y) 4752 { return {__private_init, _Impl::_S_bit_and(__x._M_data, __y._M_data)}; } 4753 4754 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask 4755 operator|(const simd_mask& __x, const simd_mask& __y) 4756 { return {__private_init, _Impl::_S_bit_or(__x._M_data, __y._M_data)}; } 4757 4758 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask 4759 operator^(const simd_mask& __x, const simd_mask& __y) 4760 { return {__private_init, _Impl::_S_bit_xor(__x._M_data, __y._M_data)}; } 4761 4762 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask& 4763 operator&=(simd_mask& __x, const simd_mask& __y) 4764 { 4765 __x._M_data = _Impl::_S_bit_and(__x._M_data, __y._M_data); 4766 return __x; 4767 } 4768 4769 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask& 4770 operator|=(simd_mask& __x, const simd_mask& __y) 4771 { 4772 __x._M_data = _Impl::_S_bit_or(__x._M_data, __y._M_data); 4773 return __x; 4774 } 4775 4776 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask& 4777 operator^=(simd_mask& __x, const simd_mask& __y) 4778 { 4779 __x._M_data = _Impl::_S_bit_xor(__x._M_data, __y._M_data); 4780 return __x; 4781 } 4782 4783 // }}} 4784 // simd_mask compares [simd_mask.comparison] {{{ 4785 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask 4786 operator==(const simd_mask& __x, const simd_mask& __y) 4787 { return !operator!=(__x, __y); } 4788 4789 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask 4790 operator!=(const simd_mask& __x, const simd_mask& __y) 4791 { return {__private_init, _Impl::_S_bit_xor(__x._M_data, __y._M_data)}; } 4792 4793 // }}} 4794 // private_init ctor {{{ 4795 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR 4796 simd_mask(_PrivateInit, typename _Traits::_MaskMember __init) 4797 : _M_data(__init) {} 4798 4799 // }}} 4800 // private_init generator ctor {{{ 4801 template <typename _Fp, typename = decltype(bool(declval<_Fp>()(size_t())))> 4802 _GLIBCXX_SIMD_INTRINSIC constexpr 4803 simd_mask(_PrivateInit, _Fp&& __gen) 4804 : _M_data() 4805 { 4806 __execute_n_times<size()>([&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { 4807 _Impl::_S_set(_M_data, __i, __gen(__i)); 4808 }); 4809 } 4810 4811 // }}} 4812 // bitset_init ctor {{{ 4813 _GLIBCXX_SIMD_INTRINSIC constexpr 4814 simd_mask(_BitsetInit, bitset<size()> __init) 4815 : _M_data(_Impl::_S_from_bitmask(_SanitizedBitMask<size()>(__init), _S_type_tag)) 4816 {} 4817 4818 // }}} 4819 // __cvt {{{ 4820 // TS_FEEDBACK: 4821 // The conversion operator this implements should be a ctor on simd_mask. 4822 // Once you call .__cvt() on a simd_mask it converts conveniently. 4823 // A useful variation: add `explicit(sizeof(_Tp) != sizeof(_Up))` 4824 struct _CvtProxy 4825 { 4826 template <typename _Up, typename _A2, 4827 typename = enable_if_t<simd_size_v<_Up, _A2> == simd_size_v<_Tp, _Abi>>> 4828 _GLIBCXX_SIMD_ALWAYS_INLINE 4829 operator simd_mask<_Up, _A2>() && 4830 { 4831 using namespace std::experimental::__proposed; 4832 return static_simd_cast<simd_mask<_Up, _A2>>(_M_data); 4833 } 4834 4835 const simd_mask<_Tp, _Abi>& _M_data; 4836 }; 4837 4838 _GLIBCXX_SIMD_INTRINSIC _CvtProxy 4839 __cvt() const 4840 { return {*this}; } 4841 4842 // }}} 4843 // operator?: overloads (suggested extension) {{{ 4844 #ifdef __GXX_CONDITIONAL_IS_OVERLOADABLE__ 4845 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask 4846 operator?:(const simd_mask& __k, const simd_mask& __where_true, 4847 const simd_mask& __where_false) 4848 { 4849 auto __ret = __where_false; 4850 _Impl::_S_masked_assign(__k._M_data, __ret._M_data, __where_true._M_data); 4851 return __ret; 4852 } 4853 4854 template <typename _U1, typename _U2, 4855 typename _Rp = simd<common_type_t<_U1, _U2>, _Abi>, 4856 typename = enable_if_t<conjunction_v< 4857 is_convertible<_U1, _Rp>, is_convertible<_U2, _Rp>, 4858 is_convertible<simd_mask, typename _Rp::mask_type>>>> 4859 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend _Rp 4860 operator?:(const simd_mask& __k, const _U1& __where_true, 4861 const _U2& __where_false) 4862 { 4863 _Rp __ret = __where_false; 4864 _Rp::_Impl::_S_masked_assign( 4865 __data(static_cast<typename _Rp::mask_type>(__k)), __data(__ret), 4866 __data(static_cast<_Rp>(__where_true))); 4867 return __ret; 4868 } 4869 4870 #ifdef _GLIBCXX_SIMD_ENABLE_IMPLICIT_MASK_CAST 4871 template <typename _Kp, typename _Ak, typename _Up, typename _Au, 4872 typename = enable_if_t< 4873 conjunction_v<is_convertible<simd_mask<_Kp, _Ak>, simd_mask>, 4874 is_convertible<simd_mask<_Up, _Au>, simd_mask>>>> 4875 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask 4876 operator?:(const simd_mask<_Kp, _Ak>& __k, const simd_mask& __where_true, 4877 const simd_mask<_Up, _Au>& __where_false) 4878 { 4879 simd_mask __ret = __where_false; 4880 _Impl::_S_masked_assign(simd_mask(__k)._M_data, __ret._M_data, 4881 __where_true._M_data); 4882 return __ret; 4883 } 4884 #endif // _GLIBCXX_SIMD_ENABLE_IMPLICIT_MASK_CAST 4885 #endif // __GXX_CONDITIONAL_IS_OVERLOADABLE__ 4886 4887 // }}} 4888 // _M_is_constprop {{{ 4889 _GLIBCXX_SIMD_INTRINSIC constexpr bool 4890 _M_is_constprop() const 4891 { 4892 if constexpr (__is_scalar_abi<_Abi>()) 4893 return __builtin_constant_p(_M_data); 4894 else 4895 return _M_data._M_is_constprop(); 4896 } 4897 4898 // }}} 4899 4900 private: 4901 friend const auto& __data<_Tp, abi_type>(const simd_mask&); 4902 friend auto& __data<_Tp, abi_type>(simd_mask&); 4903 alignas(_Traits::_S_mask_align) _MemberType _M_data; 4904 }; 4905 4906 // }}} 4907 4908 /// @cond undocumented 4909 // __data(simd_mask) {{{ 4910 template <typename _Tp, typename _Ap> 4911 _GLIBCXX_SIMD_INTRINSIC constexpr const auto& 4912 __data(const simd_mask<_Tp, _Ap>& __x) 4913 { return __x._M_data; } 4914 4915 template <typename _Tp, typename _Ap> 4916 _GLIBCXX_SIMD_INTRINSIC constexpr auto& 4917 __data(simd_mask<_Tp, _Ap>& __x) 4918 { return __x._M_data; } 4919 4920 // }}} 4921 /// @endcond 4922 4923 // simd_mask reductions [simd_mask.reductions] {{{ 4924 template <typename _Tp, typename _Abi> 4925 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR bool 4926 all_of(const simd_mask<_Tp, _Abi>& __k) noexcept 4927 { 4928 if (__builtin_is_constant_evaluated() || __k._M_is_constprop()) 4929 { 4930 for (size_t __i = 0; __i < simd_size_v<_Tp, _Abi>; ++__i) 4931 if (!__k[__i]) 4932 return false; 4933 return true; 4934 } 4935 else 4936 return _Abi::_MaskImpl::_S_all_of(__k); 4937 } 4938 4939 template <typename _Tp, typename _Abi> 4940 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR bool 4941 any_of(const simd_mask<_Tp, _Abi>& __k) noexcept 4942 { 4943 if (__builtin_is_constant_evaluated() || __k._M_is_constprop()) 4944 { 4945 for (size_t __i = 0; __i < simd_size_v<_Tp, _Abi>; ++__i) 4946 if (__k[__i]) 4947 return true; 4948 return false; 4949 } 4950 else 4951 return _Abi::_MaskImpl::_S_any_of(__k); 4952 } 4953 4954 template <typename _Tp, typename _Abi> 4955 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR bool 4956 none_of(const simd_mask<_Tp, _Abi>& __k) noexcept 4957 { 4958 if (__builtin_is_constant_evaluated() || __k._M_is_constprop()) 4959 { 4960 for (size_t __i = 0; __i < simd_size_v<_Tp, _Abi>; ++__i) 4961 if (__k[__i]) 4962 return false; 4963 return true; 4964 } 4965 else 4966 return _Abi::_MaskImpl::_S_none_of(__k); 4967 } 4968 4969 template <typename _Tp, typename _Abi> 4970 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR bool 4971 some_of(const simd_mask<_Tp, _Abi>& __k) noexcept 4972 { 4973 if (__builtin_is_constant_evaluated() || __k._M_is_constprop()) 4974 { 4975 for (size_t __i = 1; __i < simd_size_v<_Tp, _Abi>; ++__i) 4976 if (__k[__i] != __k[__i - 1]) 4977 return true; 4978 return false; 4979 } 4980 else 4981 return _Abi::_MaskImpl::_S_some_of(__k); 4982 } 4983 4984 template <typename _Tp, typename _Abi> 4985 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR int 4986 popcount(const simd_mask<_Tp, _Abi>& __k) noexcept 4987 { 4988 if (__builtin_is_constant_evaluated() || __k._M_is_constprop()) 4989 { 4990 const int __r = __call_with_subscripts<simd_size_v<_Tp, _Abi>>( 4991 __k, [](auto... __elements) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { 4992 return ((__elements != 0) + ...); 4993 }); 4994 if (__builtin_is_constant_evaluated() || __builtin_constant_p(__r)) 4995 return __r; 4996 } 4997 return _Abi::_MaskImpl::_S_popcount(__k); 4998 } 4999 5000 template <typename _Tp, typename _Abi> 5001 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR int 5002 find_first_set(const simd_mask<_Tp, _Abi>& __k) 5003 { 5004 if (__builtin_is_constant_evaluated() || __k._M_is_constprop()) 5005 { 5006 constexpr size_t _Np = simd_size_v<_Tp, _Abi>; 5007 const size_t _Idx = __call_with_n_evaluations<_Np>( 5008 [](auto... __indexes) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { 5009 return std::min({__indexes...}); 5010 }, [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { 5011 return __k[__i] ? +__i : _Np; 5012 }); 5013 if (_Idx >= _Np) 5014 __invoke_ub("find_first_set(empty mask) is UB"); 5015 if (__builtin_constant_p(_Idx)) 5016 return _Idx; 5017 } 5018 return _Abi::_MaskImpl::_S_find_first_set(__k); 5019 } 5020 5021 template <typename _Tp, typename _Abi> 5022 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR int 5023 find_last_set(const simd_mask<_Tp, _Abi>& __k) 5024 { 5025 if (__builtin_is_constant_evaluated() || __k._M_is_constprop()) 5026 { 5027 constexpr size_t _Np = simd_size_v<_Tp, _Abi>; 5028 const int _Idx = __call_with_n_evaluations<_Np>( 5029 [](auto... __indexes) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { 5030 return std::max({__indexes...}); 5031 }, [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { 5032 return __k[__i] ? int(__i) : -1; 5033 }); 5034 if (_Idx < 0) 5035 __invoke_ub("find_first_set(empty mask) is UB"); 5036 if (__builtin_constant_p(_Idx)) 5037 return _Idx; 5038 } 5039 return _Abi::_MaskImpl::_S_find_last_set(__k); 5040 } 5041 5042 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR bool 5043 all_of(_ExactBool __x) noexcept 5044 { return __x; } 5045 5046 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR bool 5047 any_of(_ExactBool __x) noexcept 5048 { return __x; } 5049 5050 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR bool 5051 none_of(_ExactBool __x) noexcept 5052 { return !__x; } 5053 5054 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR bool 5055 some_of(_ExactBool) noexcept 5056 { return false; } 5057 5058 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR int 5059 popcount(_ExactBool __x) noexcept 5060 { return __x; } 5061 5062 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR int 5063 find_first_set(_ExactBool) 5064 { return 0; } 5065 5066 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR int 5067 find_last_set(_ExactBool) 5068 { return 0; } 5069 5070 // }}} 5071 5072 /// @cond undocumented 5073 // _SimdIntOperators{{{1 5074 template <typename _V, typename _Tp, typename _Abi, bool> 5075 class _SimdIntOperators {}; 5076 5077 template <typename _V, typename _Tp, typename _Abi> 5078 class _SimdIntOperators<_V, _Tp, _Abi, true> 5079 { 5080 using _Impl = typename _SimdTraits<_Tp, _Abi>::_SimdImpl; 5081 5082 _GLIBCXX_SIMD_INTRINSIC constexpr const _V& 5083 __derived() const 5084 { return *static_cast<const _V*>(this); } 5085 5086 template <typename _Up> 5087 _GLIBCXX_SIMD_INTRINSIC static _GLIBCXX_SIMD_CONSTEXPR _V 5088 _S_make_derived(_Up&& __d) 5089 { return {__private_init, static_cast<_Up&&>(__d)}; } 5090 5091 public: 5092 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend 5093 _V& 5094 operator%=(_V& __lhs, const _V& __x) 5095 { return __lhs = __lhs % __x; } 5096 5097 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend 5098 _V& 5099 operator&=(_V& __lhs, const _V& __x) 5100 { return __lhs = __lhs & __x; } 5101 5102 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend 5103 _V& 5104 operator|=(_V& __lhs, const _V& __x) 5105 { return __lhs = __lhs | __x; } 5106 5107 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend 5108 _V& 5109 operator^=(_V& __lhs, const _V& __x) 5110 { return __lhs = __lhs ^ __x; } 5111 5112 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend 5113 _V& 5114 operator<<=(_V& __lhs, const _V& __x) 5115 { return __lhs = __lhs << __x; } 5116 5117 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend 5118 _V& 5119 operator>>=(_V& __lhs, const _V& __x) 5120 { return __lhs = __lhs >> __x; } 5121 5122 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend 5123 _V& 5124 operator<<=(_V& __lhs, int __x) 5125 { return __lhs = __lhs << __x; } 5126 5127 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend 5128 _V& 5129 operator>>=(_V& __lhs, int __x) 5130 { return __lhs = __lhs >> __x; } 5131 5132 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend 5133 _V 5134 operator%(const _V& __x, const _V& __y) 5135 { 5136 return _SimdIntOperators::_S_make_derived( 5137 _Impl::_S_modulus(__data(__x), __data(__y))); 5138 } 5139 5140 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend 5141 _V 5142 operator&(const _V& __x, const _V& __y) 5143 { 5144 return _SimdIntOperators::_S_make_derived( 5145 _Impl::_S_bit_and(__data(__x), __data(__y))); 5146 } 5147 5148 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend 5149 _V 5150 operator|(const _V& __x, const _V& __y) 5151 { 5152 return _SimdIntOperators::_S_make_derived( 5153 _Impl::_S_bit_or(__data(__x), __data(__y))); 5154 } 5155 5156 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend 5157 _V 5158 operator^(const _V& __x, const _V& __y) 5159 { 5160 return _SimdIntOperators::_S_make_derived( 5161 _Impl::_S_bit_xor(__data(__x), __data(__y))); 5162 } 5163 5164 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend 5165 _V 5166 operator<<(const _V& __x, const _V& __y) 5167 { 5168 return _SimdIntOperators::_S_make_derived( 5169 _Impl::_S_bit_shift_left(__data(__x), __data(__y))); 5170 } 5171 5172 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend 5173 _V 5174 operator>>(const _V& __x, const _V& __y) 5175 { 5176 return _SimdIntOperators::_S_make_derived( 5177 _Impl::_S_bit_shift_right(__data(__x), __data(__y))); 5178 } 5179 5180 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend 5181 _V 5182 operator<<(const _V& __x, int __y) 5183 { 5184 if (__y < 0) 5185 __invoke_ub("The behavior is undefined if the right operand of a " 5186 "shift operation is negative. [expr.shift]\nA shift by " 5187 "%d was requested", 5188 __y); 5189 if (size_t(__y) >= sizeof(declval<_Tp>() << __y) * __CHAR_BIT__) 5190 __invoke_ub( 5191 "The behavior is undefined if the right operand of a " 5192 "shift operation is greater than or equal to the width of the " 5193 "promoted left operand. [expr.shift]\nA shift by %d was requested", 5194 __y); 5195 return _SimdIntOperators::_S_make_derived( 5196 _Impl::_S_bit_shift_left(__data(__x), __y)); 5197 } 5198 5199 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend 5200 _V 5201 operator>>(const _V& __x, int __y) 5202 { 5203 if (__y < 0) 5204 __invoke_ub( 5205 "The behavior is undefined if the right operand of a shift " 5206 "operation is negative. [expr.shift]\nA shift by %d was requested", 5207 __y); 5208 if (size_t(__y) >= sizeof(declval<_Tp>() << __y) * __CHAR_BIT__) 5209 __invoke_ub( 5210 "The behavior is undefined if the right operand of a shift " 5211 "operation is greater than or equal to the width of the promoted " 5212 "left operand. [expr.shift]\nA shift by %d was requested", 5213 __y); 5214 return _SimdIntOperators::_S_make_derived( 5215 _Impl::_S_bit_shift_right(__data(__x), __y)); 5216 } 5217 5218 // unary operators (for integral _Tp) 5219 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR 5220 _V 5221 operator~() const 5222 { return {__private_init, _Impl::_S_complement(__derived()._M_data)}; } 5223 }; 5224 5225 //}}}1 5226 /// @endcond 5227 5228 // simd {{{ 5229 template <typename _Tp, typename _Abi> 5230 class simd : public _SimdIntOperators< 5231 simd<_Tp, _Abi>, _Tp, _Abi, 5232 conjunction<is_integral<_Tp>, 5233 typename _SimdTraits<_Tp, _Abi>::_IsValid>::value>, 5234 public _SimdTraits<_Tp, _Abi>::_SimdBase 5235 { 5236 using _Traits = _SimdTraits<_Tp, _Abi>; 5237 using _MemberType = typename _Traits::_SimdMember; 5238 using _CastType = typename _Traits::_SimdCastType; 5239 static constexpr _Tp* _S_type_tag = nullptr; 5240 friend typename _Traits::_SimdBase; 5241 5242 public: 5243 using _Impl = typename _Traits::_SimdImpl; 5244 friend _Impl; 5245 friend _SimdIntOperators<simd, _Tp, _Abi, true>; 5246 5247 using value_type = _Tp; 5248 using reference = _SmartReference<_MemberType, _Impl, value_type>; 5249 using mask_type = simd_mask<_Tp, _Abi>; 5250 using abi_type = _Abi; 5251 5252 static constexpr size_t size() 5253 { return __size_or_zero_v<_Tp, _Abi>; } 5254 5255 _GLIBCXX_SIMD_CONSTEXPR simd() = default; 5256 _GLIBCXX_SIMD_CONSTEXPR simd(const simd&) = default; 5257 _GLIBCXX_SIMD_CONSTEXPR simd(simd&&) noexcept = default; 5258 _GLIBCXX_SIMD_CONSTEXPR simd& operator=(const simd&) = default; 5259 _GLIBCXX_SIMD_CONSTEXPR simd& operator=(simd&&) noexcept = default; 5260 5261 // implicit broadcast constructor 5262 template <typename _Up, 5263 typename = enable_if_t<!is_same_v<__remove_cvref_t<_Up>, bool>>> 5264 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR 5265 simd(_ValuePreservingOrInt<_Up, value_type>&& __x) 5266 : _M_data( 5267 _Impl::_S_broadcast(static_cast<value_type>(static_cast<_Up&&>(__x)))) 5268 {} 5269 5270 // implicit type conversion constructor (convert from fixed_size to 5271 // fixed_size) 5272 template <typename _Up> 5273 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR 5274 simd(const simd<_Up, simd_abi::fixed_size<size()>>& __x, 5275 enable_if_t< 5276 conjunction< 5277 is_same<simd_abi::fixed_size<size()>, abi_type>, 5278 negation<__is_narrowing_conversion<_Up, value_type>>, 5279 __converts_to_higher_integer_rank<_Up, value_type>>::value, 5280 void*> = nullptr) 5281 : simd{static_cast<array<_Up, size()>>(__x).data(), vector_aligned} {} 5282 5283 // explicit type conversion constructor 5284 #ifdef _GLIBCXX_SIMD_ENABLE_STATIC_CAST 5285 template <typename _Up, typename _A2, 5286 typename = decltype(static_simd_cast<simd>( 5287 declval<const simd<_Up, _A2>&>()))> 5288 _GLIBCXX_SIMD_ALWAYS_INLINE explicit _GLIBCXX_SIMD_CONSTEXPR 5289 simd(const simd<_Up, _A2>& __x) 5290 : simd(static_simd_cast<simd>(__x)) {} 5291 #endif // _GLIBCXX_SIMD_ENABLE_STATIC_CAST 5292 5293 // generator constructor 5294 template <typename _Fp> 5295 _GLIBCXX_SIMD_ALWAYS_INLINE explicit _GLIBCXX_SIMD_CONSTEXPR 5296 simd(_Fp&& __gen, _ValuePreservingOrInt<decltype(declval<_Fp>()( 5297 declval<_SizeConstant<0>&>())), 5298 value_type>* = nullptr) 5299 : _M_data(_Impl::_S_generator(static_cast<_Fp&&>(__gen), _S_type_tag)) {} 5300 5301 // load constructor 5302 template <typename _Up, typename _Flags> 5303 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR 5304 simd(const _Up* __mem, _IsSimdFlagType<_Flags>) 5305 : _M_data( 5306 _Impl::_S_load(_Flags::template _S_apply<simd>(__mem), _S_type_tag)) 5307 {} 5308 5309 // loads [simd.load] 5310 template <typename _Up, typename _Flags> 5311 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR void 5312 copy_from(const _Vectorizable<_Up>* __mem, _IsSimdFlagType<_Flags>) 5313 { 5314 _M_data = static_cast<decltype(_M_data)>( 5315 _Impl::_S_load(_Flags::template _S_apply<simd>(__mem), _S_type_tag)); 5316 } 5317 5318 // stores [simd.store] 5319 template <typename _Up, typename _Flags> 5320 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR void 5321 copy_to(_Vectorizable<_Up>* __mem, _IsSimdFlagType<_Flags>) const 5322 { 5323 _Impl::_S_store(_M_data, _Flags::template _S_apply<simd>(__mem), 5324 _S_type_tag); 5325 } 5326 5327 // scalar access 5328 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR reference 5329 operator[](size_t __i) 5330 { return {_M_data, int(__i)}; } 5331 5332 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR value_type 5333 operator[]([[maybe_unused]] size_t __i) const 5334 { 5335 if constexpr (__is_scalar_abi<_Abi>()) 5336 { 5337 _GLIBCXX_DEBUG_ASSERT(__i == 0); 5338 return _M_data; 5339 } 5340 else 5341 return _M_data[__i]; 5342 } 5343 5344 // increment and decrement: 5345 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR simd& 5346 operator++() 5347 { 5348 _Impl::_S_increment(_M_data); 5349 return *this; 5350 } 5351 5352 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR simd 5353 operator++(int) 5354 { 5355 simd __r = *this; 5356 _Impl::_S_increment(_M_data); 5357 return __r; 5358 } 5359 5360 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR simd& 5361 operator--() 5362 { 5363 _Impl::_S_decrement(_M_data); 5364 return *this; 5365 } 5366 5367 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR simd 5368 operator--(int) 5369 { 5370 simd __r = *this; 5371 _Impl::_S_decrement(_M_data); 5372 return __r; 5373 } 5374 5375 // unary operators (for any _Tp) 5376 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR mask_type 5377 operator!() const 5378 { return {__private_init, _Impl::_S_negate(_M_data)}; } 5379 5380 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR simd 5381 operator+() const 5382 { return *this; } 5383 5384 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR simd 5385 operator-() const 5386 { return {__private_init, _Impl::_S_unary_minus(_M_data)}; } 5387 5388 // access to internal representation (suggested extension) 5389 _GLIBCXX_SIMD_ALWAYS_INLINE explicit _GLIBCXX_SIMD_CONSTEXPR 5390 simd(_CastType __init) : _M_data(__init) {} 5391 5392 // compound assignment [simd.cassign] 5393 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd& 5394 operator+=(simd& __lhs, const simd& __x) 5395 { return __lhs = __lhs + __x; } 5396 5397 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd& 5398 operator-=(simd& __lhs, const simd& __x) 5399 { return __lhs = __lhs - __x; } 5400 5401 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd& 5402 operator*=(simd& __lhs, const simd& __x) 5403 { return __lhs = __lhs * __x; } 5404 5405 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd& 5406 operator/=(simd& __lhs, const simd& __x) 5407 { return __lhs = __lhs / __x; } 5408 5409 // binary operators [simd.binary] 5410 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd 5411 operator+(const simd& __x, const simd& __y) 5412 { return {__private_init, _Impl::_S_plus(__x._M_data, __y._M_data)}; } 5413 5414 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd 5415 operator-(const simd& __x, const simd& __y) 5416 { return {__private_init, _Impl::_S_minus(__x._M_data, __y._M_data)}; } 5417 5418 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd 5419 operator*(const simd& __x, const simd& __y) 5420 { return {__private_init, _Impl::_S_multiplies(__x._M_data, __y._M_data)}; } 5421 5422 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd 5423 operator/(const simd& __x, const simd& __y) 5424 { return {__private_init, _Impl::_S_divides(__x._M_data, __y._M_data)}; } 5425 5426 // compares [simd.comparison] 5427 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend mask_type 5428 operator==(const simd& __x, const simd& __y) 5429 { return simd::_S_make_mask(_Impl::_S_equal_to(__x._M_data, __y._M_data)); } 5430 5431 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend mask_type 5432 operator!=(const simd& __x, const simd& __y) 5433 { 5434 return simd::_S_make_mask( 5435 _Impl::_S_not_equal_to(__x._M_data, __y._M_data)); 5436 } 5437 5438 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend mask_type 5439 operator<(const simd& __x, const simd& __y) 5440 { return simd::_S_make_mask(_Impl::_S_less(__x._M_data, __y._M_data)); } 5441 5442 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend mask_type 5443 operator<=(const simd& __x, const simd& __y) 5444 { 5445 return simd::_S_make_mask(_Impl::_S_less_equal(__x._M_data, __y._M_data)); 5446 } 5447 5448 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend mask_type 5449 operator>(const simd& __x, const simd& __y) 5450 { return simd::_S_make_mask(_Impl::_S_less(__y._M_data, __x._M_data)); } 5451 5452 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend mask_type 5453 operator>=(const simd& __x, const simd& __y) 5454 { 5455 return simd::_S_make_mask(_Impl::_S_less_equal(__y._M_data, __x._M_data)); 5456 } 5457 5458 // operator?: overloads (suggested extension) {{{ 5459 #ifdef __GXX_CONDITIONAL_IS_OVERLOADABLE__ 5460 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd 5461 operator?:(const mask_type& __k, const simd& __where_true, 5462 const simd& __where_false) 5463 { 5464 auto __ret = __where_false; 5465 _Impl::_S_masked_assign(__data(__k), __data(__ret), __data(__where_true)); 5466 return __ret; 5467 } 5468 5469 #endif // __GXX_CONDITIONAL_IS_OVERLOADABLE__ 5470 // }}} 5471 5472 // "private" because of the first arguments's namespace 5473 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR 5474 simd(_PrivateInit, const _MemberType& __init) 5475 : _M_data(__init) {} 5476 5477 // "private" because of the first arguments's namespace 5478 _GLIBCXX_SIMD_INTRINSIC 5479 simd(_BitsetInit, bitset<size()> __init) : _M_data() 5480 { where(mask_type(__bitset_init, __init), *this) = ~*this; } 5481 5482 _GLIBCXX_SIMD_INTRINSIC constexpr bool 5483 _M_is_constprop() const 5484 { 5485 if constexpr (__is_scalar_abi<_Abi>()) 5486 return __builtin_constant_p(_M_data); 5487 else 5488 return _M_data._M_is_constprop(); 5489 } 5490 5491 private: 5492 _GLIBCXX_SIMD_INTRINSIC static constexpr mask_type 5493 _S_make_mask(typename mask_type::_MemberType __k) 5494 { return {__private_init, __k}; } 5495 5496 friend const auto& __data<value_type, abi_type>(const simd&); 5497 friend auto& __data<value_type, abi_type>(simd&); 5498 alignas(_Traits::_S_simd_align) _MemberType _M_data; 5499 }; 5500 5501 // }}} 5502 /// @cond undocumented 5503 // __data {{{ 5504 template <typename _Tp, typename _Ap> 5505 _GLIBCXX_SIMD_INTRINSIC constexpr const auto& 5506 __data(const simd<_Tp, _Ap>& __x) 5507 { return __x._M_data; } 5508 5509 template <typename _Tp, typename _Ap> 5510 _GLIBCXX_SIMD_INTRINSIC constexpr auto& 5511 __data(simd<_Tp, _Ap>& __x) 5512 { return __x._M_data; } 5513 5514 // }}} 5515 namespace __float_bitwise_operators { //{{{ 5516 template <typename _Tp, typename _Ap> 5517 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR simd<_Tp, _Ap> 5518 operator^(const simd<_Tp, _Ap>& __a, const simd<_Tp, _Ap>& __b) 5519 { return {__private_init, _Ap::_SimdImpl::_S_bit_xor(__data(__a), __data(__b))}; } 5520 5521 template <typename _Tp, typename _Ap> 5522 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR simd<_Tp, _Ap> 5523 operator|(const simd<_Tp, _Ap>& __a, const simd<_Tp, _Ap>& __b) 5524 { return {__private_init, _Ap::_SimdImpl::_S_bit_or(__data(__a), __data(__b))}; } 5525 5526 template <typename _Tp, typename _Ap> 5527 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR simd<_Tp, _Ap> 5528 operator&(const simd<_Tp, _Ap>& __a, const simd<_Tp, _Ap>& __b) 5529 { return {__private_init, _Ap::_SimdImpl::_S_bit_and(__data(__a), __data(__b))}; } 5530 5531 template <typename _Tp, typename _Ap> 5532 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR 5533 enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Ap>> 5534 operator~(const simd<_Tp, _Ap>& __a) 5535 { return {__private_init, _Ap::_SimdImpl::_S_complement(__data(__a))}; } 5536 } // namespace __float_bitwise_operators }}} 5537 /// @endcond 5538 5539 /// @} 5540 _GLIBCXX_SIMD_END_NAMESPACE 5541 5542 #endif // __cplusplus >= 201703L 5543 #endif // _GLIBCXX_EXPERIMENTAL_SIMD_H 5544 5545 // vim: foldmethod=marker foldmarker={{{,}}} 5546