1 // Definition of the public simd interfaces -*- C++ -*- 2 3 // Copyright (C) 2020-2022 Free Software Foundation, Inc. 4 // 5 // This file is part of the GNU ISO C++ Library. This library is free 6 // software; you can redistribute it and/or modify it under the 7 // terms of the GNU General Public License as published by the 8 // Free Software Foundation; either version 3, or (at your option) 9 // any later version. 10 11 // This library is distributed in the hope that it will be useful, 12 // but WITHOUT ANY WARRANTY; without even the implied warranty of 13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 // GNU General Public License for more details. 15 16 // Under Section 7 of GPL version 3, you are granted additional 17 // permissions described in the GCC Runtime Library Exception, version 18 // 3.1, as published by the Free Software Foundation. 19 20 // You should have received a copy of the GNU General Public License and 21 // a copy of the GCC Runtime Library Exception along with this program; 22 // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 23 // <http://www.gnu.org/licenses/>. 24 25 #ifndef _GLIBCXX_EXPERIMENTAL_SIMD_H 26 #define _GLIBCXX_EXPERIMENTAL_SIMD_H 27 28 #if __cplusplus >= 201703L 29 30 #include "simd_detail.h" 31 #include "numeric_traits.h" 32 #include <bit> 33 #include <bitset> 34 #ifdef _GLIBCXX_DEBUG_UB 35 #include <cstdio> // for stderr 36 #endif 37 #include <cstring> 38 #include <cmath> 39 #include <functional> 40 #include <iosfwd> 41 #include <utility> 42 43 #if _GLIBCXX_SIMD_X86INTRIN 44 #include <x86intrin.h> 45 #elif _GLIBCXX_SIMD_HAVE_NEON 46 #include <arm_neon.h> 47 #endif 48 49 /** @ingroup ts_simd 50 * @{ 51 */ 52 /* There are several closely related types, with the following naming 53 * convention: 54 * _Tp: vectorizable (arithmetic) type (or any type) 55 * _TV: __vector_type_t<_Tp, _Np> 56 * _TW: _SimdWrapper<_Tp, _Np> 57 * _TI: __intrinsic_type_t<_Tp, _Np> 58 * _TVT: _VectorTraits<_TV> or _VectorTraits<_TW> 59 * If one additional type is needed use _U instead of _T. 60 * Otherwise use _T\d, _TV\d, _TW\d, TI\d, _TVT\d. 61 * 62 * More naming conventions: 63 * _Ap or _Abi: An ABI tag from the simd_abi namespace 64 * _Ip: often used for integer types with sizeof(_Ip) == sizeof(_Tp), 65 * _IV, _IW as for _TV, _TW 66 * _Np: number of elements (not bytes) 67 * _Bytes: number of bytes 68 * 69 * Variable names: 70 * __k: mask object (vector- or bitmask) 71 */ 72 _GLIBCXX_SIMD_BEGIN_NAMESPACE 73 74 #if !_GLIBCXX_SIMD_X86INTRIN 75 using __m128 [[__gnu__::__vector_size__(16)]] = float; 76 using __m128d [[__gnu__::__vector_size__(16)]] = double; 77 using __m128i [[__gnu__::__vector_size__(16)]] = long long; 78 using __m256 [[__gnu__::__vector_size__(32)]] = float; 79 using __m256d [[__gnu__::__vector_size__(32)]] = double; 80 using __m256i [[__gnu__::__vector_size__(32)]] = long long; 81 using __m512 [[__gnu__::__vector_size__(64)]] = float; 82 using __m512d [[__gnu__::__vector_size__(64)]] = double; 83 using __m512i [[__gnu__::__vector_size__(64)]] = long long; 84 #endif 85 86 namespace simd_abi { 87 // simd_abi forward declarations {{{ 88 // implementation details: 89 struct _Scalar; 90 91 template <int _Np> 92 struct _Fixed; 93 94 // There are two major ABIs that appear on different architectures. 95 // Both have non-boolean values packed into an N Byte register 96 // -> #elements = N / sizeof(T) 97 // Masks differ: 98 // 1. Use value vector registers for masks (all 0 or all 1) 99 // 2. Use bitmasks (mask registers) with one bit per value in the corresponding 100 // value vector 101 // 102 // Both can be partially used, masking off the rest when doing horizontal 103 // operations or operations that can trap (e.g. FP_INVALID or integer division 104 // by 0). This is encoded as the number of used bytes. 105 template <int _UsedBytes> 106 struct _VecBuiltin; 107 108 template <int _UsedBytes> 109 struct _VecBltnBtmsk; 110 111 template <typename _Tp, int _Np> 112 using _VecN = _VecBuiltin<sizeof(_Tp) * _Np>; 113 114 template <int _UsedBytes = 16> 115 using _Sse = _VecBuiltin<_UsedBytes>; 116 117 template <int _UsedBytes = 32> 118 using _Avx = _VecBuiltin<_UsedBytes>; 119 120 template <int _UsedBytes = 64> 121 using _Avx512 = _VecBltnBtmsk<_UsedBytes>; 122 123 template <int _UsedBytes = 16> 124 using _Neon = _VecBuiltin<_UsedBytes>; 125 126 // implementation-defined: 127 using __sse = _Sse<>; 128 using __avx = _Avx<>; 129 using __avx512 = _Avx512<>; 130 using __neon = _Neon<>; 131 using __neon128 = _Neon<16>; 132 using __neon64 = _Neon<8>; 133 134 // standard: 135 template <typename _Tp, size_t _Np, typename...> 136 struct deduce; 137 138 template <int _Np> 139 using fixed_size = _Fixed<_Np>; 140 141 using scalar = _Scalar; 142 143 // }}} 144 } // namespace simd_abi 145 // forward declarations is_simd(_mask), simd(_mask), simd_size {{{ 146 template <typename _Tp> 147 struct is_simd; 148 149 template <typename _Tp> 150 struct is_simd_mask; 151 152 template <typename _Tp, typename _Abi> 153 class simd; 154 155 template <typename _Tp, typename _Abi> 156 class simd_mask; 157 158 template <typename _Tp, typename _Abi> 159 struct simd_size; 160 161 // }}} 162 // load/store flags {{{ 163 struct element_aligned_tag 164 { 165 template <typename _Tp, typename _Up = typename _Tp::value_type> 166 static constexpr size_t _S_alignment = alignof(_Up); 167 168 template <typename _Tp, typename _Up> 169 _GLIBCXX_SIMD_INTRINSIC static constexpr _Up* 170 _S_apply(_Up* __ptr) 171 { return __ptr; } 172 }; 173 174 struct vector_aligned_tag 175 { 176 template <typename _Tp, typename _Up = typename _Tp::value_type> 177 static constexpr size_t _S_alignment 178 = std::__bit_ceil(sizeof(_Up) * _Tp::size()); 179 180 template <typename _Tp, typename _Up> 181 _GLIBCXX_SIMD_INTRINSIC static constexpr _Up* 182 _S_apply(_Up* __ptr) 183 { 184 return static_cast<_Up*>( 185 __builtin_assume_aligned(__ptr, _S_alignment<_Tp, _Up>)); 186 } 187 }; 188 189 template <size_t _Np> struct overaligned_tag 190 { 191 template <typename _Tp, typename _Up = typename _Tp::value_type> 192 static constexpr size_t _S_alignment = _Np; 193 194 template <typename _Tp, typename _Up> 195 _GLIBCXX_SIMD_INTRINSIC static constexpr _Up* 196 _S_apply(_Up* __ptr) 197 { return static_cast<_Up*>(__builtin_assume_aligned(__ptr, _Np)); } 198 }; 199 200 inline constexpr element_aligned_tag element_aligned = {}; 201 202 inline constexpr vector_aligned_tag vector_aligned = {}; 203 204 template <size_t _Np> 205 inline constexpr overaligned_tag<_Np> overaligned = {}; 206 207 // }}} 208 template <size_t _Xp> 209 using _SizeConstant = integral_constant<size_t, _Xp>; 210 // constexpr feature detection{{{ 211 constexpr inline bool __have_mmx = _GLIBCXX_SIMD_HAVE_MMX; 212 constexpr inline bool __have_sse = _GLIBCXX_SIMD_HAVE_SSE; 213 constexpr inline bool __have_sse2 = _GLIBCXX_SIMD_HAVE_SSE2; 214 constexpr inline bool __have_sse3 = _GLIBCXX_SIMD_HAVE_SSE3; 215 constexpr inline bool __have_ssse3 = _GLIBCXX_SIMD_HAVE_SSSE3; 216 constexpr inline bool __have_sse4_1 = _GLIBCXX_SIMD_HAVE_SSE4_1; 217 constexpr inline bool __have_sse4_2 = _GLIBCXX_SIMD_HAVE_SSE4_2; 218 constexpr inline bool __have_xop = _GLIBCXX_SIMD_HAVE_XOP; 219 constexpr inline bool __have_avx = _GLIBCXX_SIMD_HAVE_AVX; 220 constexpr inline bool __have_avx2 = _GLIBCXX_SIMD_HAVE_AVX2; 221 constexpr inline bool __have_bmi = _GLIBCXX_SIMD_HAVE_BMI1; 222 constexpr inline bool __have_bmi2 = _GLIBCXX_SIMD_HAVE_BMI2; 223 constexpr inline bool __have_lzcnt = _GLIBCXX_SIMD_HAVE_LZCNT; 224 constexpr inline bool __have_sse4a = _GLIBCXX_SIMD_HAVE_SSE4A; 225 constexpr inline bool __have_fma = _GLIBCXX_SIMD_HAVE_FMA; 226 constexpr inline bool __have_fma4 = _GLIBCXX_SIMD_HAVE_FMA4; 227 constexpr inline bool __have_f16c = _GLIBCXX_SIMD_HAVE_F16C; 228 constexpr inline bool __have_popcnt = _GLIBCXX_SIMD_HAVE_POPCNT; 229 constexpr inline bool __have_avx512f = _GLIBCXX_SIMD_HAVE_AVX512F; 230 constexpr inline bool __have_avx512dq = _GLIBCXX_SIMD_HAVE_AVX512DQ; 231 constexpr inline bool __have_avx512vl = _GLIBCXX_SIMD_HAVE_AVX512VL; 232 constexpr inline bool __have_avx512bw = _GLIBCXX_SIMD_HAVE_AVX512BW; 233 constexpr inline bool __have_avx512dq_vl = __have_avx512dq && __have_avx512vl; 234 constexpr inline bool __have_avx512bw_vl = __have_avx512bw && __have_avx512vl; 235 constexpr inline bool __have_avx512bitalg = _GLIBCXX_SIMD_HAVE_AVX512BITALG; 236 constexpr inline bool __have_avx512vbmi2 = _GLIBCXX_SIMD_HAVE_AVX512VBMI2; 237 constexpr inline bool __have_avx512vbmi = _GLIBCXX_SIMD_HAVE_AVX512VBMI; 238 constexpr inline bool __have_avx512ifma = _GLIBCXX_SIMD_HAVE_AVX512IFMA; 239 constexpr inline bool __have_avx512cd = _GLIBCXX_SIMD_HAVE_AVX512CD; 240 constexpr inline bool __have_avx512vnni = _GLIBCXX_SIMD_HAVE_AVX512VNNI; 241 constexpr inline bool __have_avx512vpopcntdq = _GLIBCXX_SIMD_HAVE_AVX512VPOPCNTDQ; 242 constexpr inline bool __have_avx512vp2intersect = _GLIBCXX_SIMD_HAVE_AVX512VP2INTERSECT; 243 244 constexpr inline bool __have_neon = _GLIBCXX_SIMD_HAVE_NEON; 245 constexpr inline bool __have_neon_a32 = _GLIBCXX_SIMD_HAVE_NEON_A32; 246 constexpr inline bool __have_neon_a64 = _GLIBCXX_SIMD_HAVE_NEON_A64; 247 constexpr inline bool __support_neon_float = 248 #if defined __GCC_IEC_559 249 __GCC_IEC_559 == 0; 250 #elif defined __FAST_MATH__ 251 true; 252 #else 253 false; 254 #endif 255 256 #ifdef _ARCH_PWR10 257 constexpr inline bool __have_power10vec = true; 258 #else 259 constexpr inline bool __have_power10vec = false; 260 #endif 261 #ifdef __POWER9_VECTOR__ 262 constexpr inline bool __have_power9vec = true; 263 #else 264 constexpr inline bool __have_power9vec = false; 265 #endif 266 #if defined __POWER8_VECTOR__ 267 constexpr inline bool __have_power8vec = true; 268 #else 269 constexpr inline bool __have_power8vec = __have_power9vec; 270 #endif 271 #if defined __VSX__ 272 constexpr inline bool __have_power_vsx = true; 273 #else 274 constexpr inline bool __have_power_vsx = __have_power8vec; 275 #endif 276 #if defined __ALTIVEC__ 277 constexpr inline bool __have_power_vmx = true; 278 #else 279 constexpr inline bool __have_power_vmx = __have_power_vsx; 280 #endif 281 282 // }}} 283 284 namespace __detail 285 { 286 #ifdef math_errhandling 287 // Determines _S_handle_fpexcept from math_errhandling if it is defined and expands to a constant 288 // expression. math_errhandling may expand to an extern symbol, in which case a constexpr value 289 // must be guessed. 290 template <int = math_errhandling> 291 constexpr bool __handle_fpexcept_impl(int) 292 { return math_errhandling & MATH_ERREXCEPT; } 293 #endif 294 295 // Fallback if math_errhandling doesn't work: with fast-math assume floating-point exceptions are 296 // ignored, otherwise implement correct exception behavior. 297 constexpr bool __handle_fpexcept_impl(float) 298 { 299 #if defined __FAST_MATH__ 300 return false; 301 #else 302 return true; 303 #endif 304 } 305 306 /// True if math functions must raise floating-point exceptions as specified by C17. 307 static constexpr bool _S_handle_fpexcept = __handle_fpexcept_impl(0); 308 309 constexpr std::uint_least64_t 310 __floating_point_flags() 311 { 312 std::uint_least64_t __flags = 0; 313 if constexpr (_S_handle_fpexcept) 314 __flags |= 1; 315 #ifdef __FAST_MATH__ 316 __flags |= 1 << 1; 317 #elif __FINITE_MATH_ONLY__ 318 __flags |= 2 << 1; 319 #elif __GCC_IEC_559 < 2 320 __flags |= 3 << 1; 321 #endif 322 __flags |= (__FLT_EVAL_METHOD__ + 1) << 3; 323 return __flags; 324 } 325 326 constexpr std::uint_least64_t 327 __machine_flags() 328 { 329 if constexpr (__have_mmx || __have_sse) 330 return __have_mmx 331 | (__have_sse << 1) 332 | (__have_sse2 << 2) 333 | (__have_sse3 << 3) 334 | (__have_ssse3 << 4) 335 | (__have_sse4_1 << 5) 336 | (__have_sse4_2 << 6) 337 | (__have_xop << 7) 338 | (__have_avx << 8) 339 | (__have_avx2 << 9) 340 | (__have_bmi << 10) 341 | (__have_bmi2 << 11) 342 | (__have_lzcnt << 12) 343 | (__have_sse4a << 13) 344 | (__have_fma << 14) 345 | (__have_fma4 << 15) 346 | (__have_f16c << 16) 347 | (__have_popcnt << 17) 348 | (__have_avx512f << 18) 349 | (__have_avx512dq << 19) 350 | (__have_avx512vl << 20) 351 | (__have_avx512bw << 21) 352 | (__have_avx512bitalg << 22) 353 | (__have_avx512vbmi2 << 23) 354 | (__have_avx512vbmi << 24) 355 | (__have_avx512ifma << 25) 356 | (__have_avx512cd << 26) 357 | (__have_avx512vnni << 27) 358 | (__have_avx512vpopcntdq << 28) 359 | (__have_avx512vp2intersect << 29); 360 else if constexpr (__have_neon) 361 return __have_neon 362 | (__have_neon_a32 << 1) 363 | (__have_neon_a64 << 2) 364 | (__have_neon_a64 << 2) 365 | (__support_neon_float << 3); 366 else if constexpr (__have_power_vmx) 367 return __have_power_vmx 368 | (__have_power_vsx << 1) 369 | (__have_power8vec << 2) 370 | (__have_power9vec << 3) 371 | (__have_power10vec << 4); 372 else 373 return 0; 374 } 375 376 namespace 377 { 378 struct _OdrEnforcer {}; 379 } 380 381 template <std::uint_least64_t...> 382 struct _MachineFlagsTemplate {}; 383 384 /**@internal 385 * Use this type as default template argument to all function templates that 386 * are not declared always_inline. It ensures, that a function 387 * specialization, which the compiler decides not to inline, has a unique symbol 388 * (_OdrEnforcer) or a symbol matching the machine/architecture flags 389 * (_MachineFlagsTemplate). This helps to avoid ODR violations in cases where 390 * users link TUs compiled with different flags. This is especially important 391 * for using simd in libraries. 392 */ 393 using __odr_helper 394 = conditional_t<__machine_flags() == 0, _OdrEnforcer, 395 _MachineFlagsTemplate<__machine_flags(), __floating_point_flags()>>; 396 397 struct _Minimum 398 { 399 template <typename _Tp> 400 _GLIBCXX_SIMD_INTRINSIC constexpr 401 _Tp 402 operator()(_Tp __a, _Tp __b) const 403 { 404 using std::min; 405 return min(__a, __b); 406 } 407 }; 408 409 struct _Maximum 410 { 411 template <typename _Tp> 412 _GLIBCXX_SIMD_INTRINSIC constexpr 413 _Tp 414 operator()(_Tp __a, _Tp __b) const 415 { 416 using std::max; 417 return max(__a, __b); 418 } 419 }; 420 } // namespace __detail 421 422 // unrolled/pack execution helpers 423 // __execute_n_times{{{ 424 template <typename _Fp, size_t... _I> 425 [[__gnu__::__flatten__]] _GLIBCXX_SIMD_INTRINSIC constexpr 426 void 427 __execute_on_index_sequence(_Fp&& __f, index_sequence<_I...>) 428 { ((void)__f(_SizeConstant<_I>()), ...); } 429 430 template <typename _Fp> 431 _GLIBCXX_SIMD_INTRINSIC constexpr void 432 __execute_on_index_sequence(_Fp&&, index_sequence<>) 433 { } 434 435 template <size_t _Np, typename _Fp> 436 _GLIBCXX_SIMD_INTRINSIC constexpr void 437 __execute_n_times(_Fp&& __f) 438 { 439 __execute_on_index_sequence(static_cast<_Fp&&>(__f), 440 make_index_sequence<_Np>{}); 441 } 442 443 // }}} 444 // __generate_from_n_evaluations{{{ 445 template <typename _R, typename _Fp, size_t... _I> 446 [[__gnu__::__flatten__]] _GLIBCXX_SIMD_INTRINSIC constexpr 447 _R 448 __execute_on_index_sequence_with_return(_Fp&& __f, index_sequence<_I...>) 449 { return _R{__f(_SizeConstant<_I>())...}; } 450 451 template <size_t _Np, typename _R, typename _Fp> 452 _GLIBCXX_SIMD_INTRINSIC constexpr _R 453 __generate_from_n_evaluations(_Fp&& __f) 454 { 455 return __execute_on_index_sequence_with_return<_R>( 456 static_cast<_Fp&&>(__f), make_index_sequence<_Np>{}); 457 } 458 459 // }}} 460 // __call_with_n_evaluations{{{ 461 template <size_t... _I, typename _F0, typename _FArgs> 462 [[__gnu__::__flatten__]] _GLIBCXX_SIMD_INTRINSIC constexpr 463 auto 464 __call_with_n_evaluations(index_sequence<_I...>, _F0&& __f0, _FArgs&& __fargs) 465 { return __f0(__fargs(_SizeConstant<_I>())...); } 466 467 template <size_t _Np, typename _F0, typename _FArgs> 468 _GLIBCXX_SIMD_INTRINSIC constexpr auto 469 __call_with_n_evaluations(_F0&& __f0, _FArgs&& __fargs) 470 { 471 return __call_with_n_evaluations(make_index_sequence<_Np>{}, 472 static_cast<_F0&&>(__f0), 473 static_cast<_FArgs&&>(__fargs)); 474 } 475 476 // }}} 477 // __call_with_subscripts{{{ 478 template <size_t _First = 0, size_t... _It, typename _Tp, typename _Fp> 479 [[__gnu__::__flatten__]] _GLIBCXX_SIMD_INTRINSIC constexpr 480 auto 481 __call_with_subscripts(_Tp&& __x, index_sequence<_It...>, _Fp&& __fun) 482 { return __fun(__x[_First + _It]...); } 483 484 template <size_t _Np, size_t _First = 0, typename _Tp, typename _Fp> 485 _GLIBCXX_SIMD_INTRINSIC constexpr auto 486 __call_with_subscripts(_Tp&& __x, _Fp&& __fun) 487 { 488 return __call_with_subscripts<_First>(static_cast<_Tp&&>(__x), 489 make_index_sequence<_Np>(), 490 static_cast<_Fp&&>(__fun)); 491 } 492 493 // }}} 494 495 // vvv ---- type traits ---- vvv 496 // integer type aliases{{{ 497 using _UChar = unsigned char; 498 using _SChar = signed char; 499 using _UShort = unsigned short; 500 using _UInt = unsigned int; 501 using _ULong = unsigned long; 502 using _ULLong = unsigned long long; 503 using _LLong = long long; 504 505 //}}} 506 // __first_of_pack{{{ 507 template <typename _T0, typename...> 508 struct __first_of_pack 509 { using type = _T0; }; 510 511 template <typename... _Ts> 512 using __first_of_pack_t = typename __first_of_pack<_Ts...>::type; 513 514 //}}} 515 // __value_type_or_identity_t {{{ 516 template <typename _Tp> 517 typename _Tp::value_type 518 __value_type_or_identity_impl(int); 519 520 template <typename _Tp> 521 _Tp 522 __value_type_or_identity_impl(float); 523 524 template <typename _Tp> 525 using __value_type_or_identity_t 526 = decltype(__value_type_or_identity_impl<_Tp>(int())); 527 528 // }}} 529 // __is_vectorizable {{{ 530 template <typename _Tp> 531 struct __is_vectorizable : public is_arithmetic<_Tp> {}; 532 533 template <> 534 struct __is_vectorizable<bool> : public false_type {}; 535 536 template <typename _Tp> 537 inline constexpr bool __is_vectorizable_v = __is_vectorizable<_Tp>::value; 538 539 // Deduces to a vectorizable type 540 template <typename _Tp, typename = enable_if_t<__is_vectorizable_v<_Tp>>> 541 using _Vectorizable = _Tp; 542 543 // }}} 544 // _LoadStorePtr / __is_possible_loadstore_conversion {{{ 545 template <typename _Ptr, typename _ValueType> 546 struct __is_possible_loadstore_conversion 547 : conjunction<__is_vectorizable<_Ptr>, __is_vectorizable<_ValueType>> {}; 548 549 template <> 550 struct __is_possible_loadstore_conversion<bool, bool> : true_type {}; 551 552 // Deduces to a type allowed for load/store with the given value type. 553 template <typename _Ptr, typename _ValueType, 554 typename = enable_if_t< 555 __is_possible_loadstore_conversion<_Ptr, _ValueType>::value>> 556 using _LoadStorePtr = _Ptr; 557 558 // }}} 559 // __is_bitmask{{{ 560 template <typename _Tp, typename = void_t<>> 561 struct __is_bitmask : false_type {}; 562 563 template <typename _Tp> 564 inline constexpr bool __is_bitmask_v = __is_bitmask<_Tp>::value; 565 566 // the __mmaskXX case: 567 template <typename _Tp> 568 struct __is_bitmask<_Tp, 569 void_t<decltype(declval<unsigned&>() = declval<_Tp>() & 1u)>> 570 : true_type {}; 571 572 // }}} 573 // __int_for_sizeof{{{ 574 #pragma GCC diagnostic push 575 #pragma GCC diagnostic ignored "-Wpedantic" 576 template <size_t _Bytes> 577 constexpr auto 578 __int_for_sizeof() 579 { 580 if constexpr (_Bytes == sizeof(int)) 581 return int(); 582 #ifdef __clang__ 583 else if constexpr (_Bytes == sizeof(char)) 584 return char(); 585 #else 586 else if constexpr (_Bytes == sizeof(_SChar)) 587 return _SChar(); 588 #endif 589 else if constexpr (_Bytes == sizeof(short)) 590 return short(); 591 #ifndef __clang__ 592 else if constexpr (_Bytes == sizeof(long)) 593 return long(); 594 #endif 595 else if constexpr (_Bytes == sizeof(_LLong)) 596 return _LLong(); 597 #ifdef __SIZEOF_INT128__ 598 else if constexpr (_Bytes == sizeof(__int128)) 599 return __int128(); 600 #endif // __SIZEOF_INT128__ 601 else if constexpr (_Bytes % sizeof(int) == 0) 602 { 603 constexpr size_t _Np = _Bytes / sizeof(int); 604 struct _Ip 605 { 606 int _M_data[_Np]; 607 608 _GLIBCXX_SIMD_INTRINSIC constexpr _Ip 609 operator&(_Ip __rhs) const 610 { 611 return __generate_from_n_evaluations<_Np, _Ip>( 612 [&](auto __i) { return __rhs._M_data[__i] & _M_data[__i]; }); 613 } 614 615 _GLIBCXX_SIMD_INTRINSIC constexpr _Ip 616 operator|(_Ip __rhs) const 617 { 618 return __generate_from_n_evaluations<_Np, _Ip>( 619 [&](auto __i) { return __rhs._M_data[__i] | _M_data[__i]; }); 620 } 621 622 _GLIBCXX_SIMD_INTRINSIC constexpr _Ip 623 operator^(_Ip __rhs) const 624 { 625 return __generate_from_n_evaluations<_Np, _Ip>( 626 [&](auto __i) { return __rhs._M_data[__i] ^ _M_data[__i]; }); 627 } 628 629 _GLIBCXX_SIMD_INTRINSIC constexpr _Ip 630 operator~() const 631 { 632 return __generate_from_n_evaluations<_Np, _Ip>( 633 [&](auto __i) { return ~_M_data[__i]; }); 634 } 635 }; 636 return _Ip{}; 637 } 638 else 639 static_assert(_Bytes != _Bytes, "this should be unreachable"); 640 } 641 #pragma GCC diagnostic pop 642 643 template <typename _Tp> 644 using __int_for_sizeof_t = decltype(__int_for_sizeof<sizeof(_Tp)>()); 645 646 template <size_t _Np> 647 using __int_with_sizeof_t = decltype(__int_for_sizeof<_Np>()); 648 649 // }}} 650 // __is_fixed_size_abi{{{ 651 template <typename _Tp> 652 struct __is_fixed_size_abi : false_type {}; 653 654 template <int _Np> 655 struct __is_fixed_size_abi<simd_abi::fixed_size<_Np>> : true_type {}; 656 657 template <typename _Tp> 658 inline constexpr bool __is_fixed_size_abi_v = __is_fixed_size_abi<_Tp>::value; 659 660 // }}} 661 // __is_scalar_abi {{{ 662 template <typename _Abi> 663 constexpr bool 664 __is_scalar_abi() 665 { return is_same_v<simd_abi::scalar, _Abi>; } 666 667 // }}} 668 // __abi_bytes_v {{{ 669 template <template <int> class _Abi, int _Bytes> 670 constexpr int 671 __abi_bytes_impl(_Abi<_Bytes>*) 672 { return _Bytes; } 673 674 template <typename _Tp> 675 constexpr int 676 __abi_bytes_impl(_Tp*) 677 { return -1; } 678 679 template <typename _Abi> 680 inline constexpr int __abi_bytes_v 681 = __abi_bytes_impl(static_cast<_Abi*>(nullptr)); 682 683 // }}} 684 // __is_builtin_bitmask_abi {{{ 685 template <typename _Abi> 686 constexpr bool 687 __is_builtin_bitmask_abi() 688 { return is_same_v<simd_abi::_VecBltnBtmsk<__abi_bytes_v<_Abi>>, _Abi>; } 689 690 // }}} 691 // __is_sse_abi {{{ 692 template <typename _Abi> 693 constexpr bool 694 __is_sse_abi() 695 { 696 constexpr auto _Bytes = __abi_bytes_v<_Abi>; 697 return _Bytes <= 16 && is_same_v<simd_abi::_VecBuiltin<_Bytes>, _Abi>; 698 } 699 700 // }}} 701 // __is_avx_abi {{{ 702 template <typename _Abi> 703 constexpr bool 704 __is_avx_abi() 705 { 706 constexpr auto _Bytes = __abi_bytes_v<_Abi>; 707 return _Bytes > 16 && _Bytes <= 32 708 && is_same_v<simd_abi::_VecBuiltin<_Bytes>, _Abi>; 709 } 710 711 // }}} 712 // __is_avx512_abi {{{ 713 template <typename _Abi> 714 constexpr bool 715 __is_avx512_abi() 716 { 717 constexpr auto _Bytes = __abi_bytes_v<_Abi>; 718 return _Bytes <= 64 && is_same_v<simd_abi::_Avx512<_Bytes>, _Abi>; 719 } 720 721 // }}} 722 // __is_neon_abi {{{ 723 template <typename _Abi> 724 constexpr bool 725 __is_neon_abi() 726 { 727 constexpr auto _Bytes = __abi_bytes_v<_Abi>; 728 return _Bytes <= 16 && is_same_v<simd_abi::_VecBuiltin<_Bytes>, _Abi>; 729 } 730 731 // }}} 732 // __make_dependent_t {{{ 733 template <typename, typename _Up> 734 struct __make_dependent 735 { using type = _Up; }; 736 737 template <typename _Tp, typename _Up> 738 using __make_dependent_t = typename __make_dependent<_Tp, _Up>::type; 739 740 // }}} 741 // ^^^ ---- type traits ---- ^^^ 742 743 // __invoke_ub{{{ 744 template <typename... _Args> 745 [[noreturn]] _GLIBCXX_SIMD_ALWAYS_INLINE void 746 __invoke_ub([[maybe_unused]] const char* __msg, 747 [[maybe_unused]] const _Args&... __args) 748 { 749 #ifdef _GLIBCXX_DEBUG_UB 750 __builtin_fprintf(stderr, __msg, __args...); 751 __builtin_trap(); 752 #else 753 __builtin_unreachable(); 754 #endif 755 } 756 757 // }}} 758 // __assert_unreachable{{{ 759 template <typename _Tp> 760 struct __assert_unreachable 761 { static_assert(!is_same_v<_Tp, _Tp>, "this should be unreachable"); }; 762 763 // }}} 764 // __size_or_zero_v {{{ 765 template <typename _Tp, typename _Ap, size_t _Np = simd_size<_Tp, _Ap>::value> 766 constexpr size_t 767 __size_or_zero_dispatch(int) 768 { return _Np; } 769 770 template <typename _Tp, typename _Ap> 771 constexpr size_t 772 __size_or_zero_dispatch(float) 773 { return 0; } 774 775 template <typename _Tp, typename _Ap> 776 inline constexpr size_t __size_or_zero_v 777 = __size_or_zero_dispatch<_Tp, _Ap>(0); 778 779 // }}} 780 // __div_roundup {{{ 781 inline constexpr size_t 782 __div_roundup(size_t __a, size_t __b) 783 { return (__a + __b - 1) / __b; } 784 785 // }}} 786 // _ExactBool{{{ 787 class _ExactBool 788 { 789 const bool _M_data; 790 791 public: 792 _GLIBCXX_SIMD_INTRINSIC constexpr _ExactBool(bool __b) : _M_data(__b) {} 793 794 _ExactBool(int) = delete; 795 796 _GLIBCXX_SIMD_INTRINSIC constexpr operator bool() const { return _M_data; } 797 }; 798 799 // }}} 800 // __may_alias{{{ 801 /**@internal 802 * Helper __may_alias<_Tp> that turns _Tp into the type to be used for an 803 * aliasing pointer. This adds the __may_alias attribute to _Tp (with compilers 804 * that support it). 805 */ 806 template <typename _Tp> 807 using __may_alias [[__gnu__::__may_alias__]] = _Tp; 808 809 // }}} 810 // _UnsupportedBase {{{ 811 // simd and simd_mask base for unsupported <_Tp, _Abi> 812 struct _UnsupportedBase 813 { 814 _UnsupportedBase() = delete; 815 _UnsupportedBase(const _UnsupportedBase&) = delete; 816 _UnsupportedBase& operator=(const _UnsupportedBase&) = delete; 817 ~_UnsupportedBase() = delete; 818 }; 819 820 // }}} 821 // _InvalidTraits {{{ 822 /** 823 * @internal 824 * Defines the implementation of __a given <_Tp, _Abi>. 825 * 826 * Implementations must ensure that only valid <_Tp, _Abi> instantiations are 827 * possible. Static assertions in the type definition do not suffice. It is 828 * important that SFINAE works. 829 */ 830 struct _InvalidTraits 831 { 832 using _IsValid = false_type; 833 using _SimdBase = _UnsupportedBase; 834 using _MaskBase = _UnsupportedBase; 835 836 static constexpr size_t _S_full_size = 0; 837 static constexpr bool _S_is_partial = false; 838 839 static constexpr size_t _S_simd_align = 1; 840 struct _SimdImpl; 841 struct _SimdMember {}; 842 struct _SimdCastType; 843 844 static constexpr size_t _S_mask_align = 1; 845 struct _MaskImpl; 846 struct _MaskMember {}; 847 struct _MaskCastType; 848 }; 849 850 // }}} 851 // _SimdTraits {{{ 852 template <typename _Tp, typename _Abi, typename = void_t<>> 853 struct _SimdTraits : _InvalidTraits {}; 854 855 // }}} 856 // __private_init, __bitset_init{{{ 857 /** 858 * @internal 859 * Tag used for private init constructor of simd and simd_mask 860 */ 861 inline constexpr struct _PrivateInit {} __private_init = {}; 862 863 inline constexpr struct _BitsetInit {} __bitset_init = {}; 864 865 // }}} 866 // __is_narrowing_conversion<_From, _To>{{{ 867 template <typename _From, typename _To, bool = is_arithmetic_v<_From>, 868 bool = is_arithmetic_v<_To>> 869 struct __is_narrowing_conversion; 870 871 // ignore "signed/unsigned mismatch" in the following trait. 872 // The implicit conversions will do the right thing here. 873 template <typename _From, typename _To> 874 struct __is_narrowing_conversion<_From, _To, true, true> 875 : public __bool_constant<( 876 __digits_v<_From> > __digits_v<_To> 877 || __finite_max_v<_From> > __finite_max_v<_To> 878 || __finite_min_v<_From> < __finite_min_v<_To> 879 || (is_signed_v<_From> && is_unsigned_v<_To>))> {}; 880 881 template <typename _Tp> 882 struct __is_narrowing_conversion<_Tp, bool, true, true> 883 : public true_type {}; 884 885 template <> 886 struct __is_narrowing_conversion<bool, bool, true, true> 887 : public false_type {}; 888 889 template <typename _Tp> 890 struct __is_narrowing_conversion<_Tp, _Tp, true, true> 891 : public false_type {}; 892 893 template <typename _From, typename _To> 894 struct __is_narrowing_conversion<_From, _To, false, true> 895 : public negation<is_convertible<_From, _To>> {}; 896 897 // }}} 898 // __converts_to_higher_integer_rank{{{ 899 template <typename _From, typename _To, bool = (sizeof(_From) < sizeof(_To))> 900 struct __converts_to_higher_integer_rank : public true_type {}; 901 902 // this may fail for char -> short if sizeof(char) == sizeof(short) 903 template <typename _From, typename _To> 904 struct __converts_to_higher_integer_rank<_From, _To, false> 905 : public is_same<decltype(declval<_From>() + declval<_To>()), _To> {}; 906 907 // }}} 908 // __data(simd/simd_mask) {{{ 909 template <typename _Tp, typename _Ap> 910 _GLIBCXX_SIMD_INTRINSIC constexpr const auto& 911 __data(const simd<_Tp, _Ap>& __x); 912 913 template <typename _Tp, typename _Ap> 914 _GLIBCXX_SIMD_INTRINSIC constexpr auto& 915 __data(simd<_Tp, _Ap>& __x); 916 917 template <typename _Tp, typename _Ap> 918 _GLIBCXX_SIMD_INTRINSIC constexpr const auto& 919 __data(const simd_mask<_Tp, _Ap>& __x); 920 921 template <typename _Tp, typename _Ap> 922 _GLIBCXX_SIMD_INTRINSIC constexpr auto& 923 __data(simd_mask<_Tp, _Ap>& __x); 924 925 // }}} 926 // _SimdConverter {{{ 927 template <typename _FromT, typename _FromA, typename _ToT, typename _ToA, 928 typename = void> 929 struct _SimdConverter; 930 931 template <typename _Tp, typename _Ap> 932 struct _SimdConverter<_Tp, _Ap, _Tp, _Ap, void> 933 { 934 template <typename _Up> 935 _GLIBCXX_SIMD_INTRINSIC const _Up& 936 operator()(const _Up& __x) 937 { return __x; } 938 }; 939 940 // }}} 941 // __to_value_type_or_member_type {{{ 942 template <typename _V> 943 _GLIBCXX_SIMD_INTRINSIC constexpr auto 944 __to_value_type_or_member_type(const _V& __x) -> decltype(__data(__x)) 945 { return __data(__x); } 946 947 template <typename _V> 948 _GLIBCXX_SIMD_INTRINSIC constexpr const typename _V::value_type& 949 __to_value_type_or_member_type(const typename _V::value_type& __x) 950 { return __x; } 951 952 // }}} 953 // __bool_storage_member_type{{{ 954 template <size_t _Size> 955 struct __bool_storage_member_type; 956 957 template <size_t _Size> 958 using __bool_storage_member_type_t = 959 typename __bool_storage_member_type<_Size>::type; 960 961 // }}} 962 // _SimdTuple {{{ 963 // why not tuple? 964 // 1. tuple gives no guarantee about the storage order, but I require 965 // storage 966 // equivalent to array<_Tp, _Np> 967 // 2. direct access to the element type (first template argument) 968 // 3. enforces equal element type, only different _Abi types are allowed 969 template <typename _Tp, typename... _Abis> 970 struct _SimdTuple; 971 972 //}}} 973 // __fixed_size_storage_t {{{ 974 template <typename _Tp, int _Np> 975 struct __fixed_size_storage; 976 977 template <typename _Tp, int _Np> 978 using __fixed_size_storage_t = typename __fixed_size_storage<_Tp, _Np>::type; 979 980 // }}} 981 // _SimdWrapper fwd decl{{{ 982 template <typename _Tp, size_t _Size, typename = void_t<>> 983 struct _SimdWrapper; 984 985 template <typename _Tp> 986 using _SimdWrapper8 = _SimdWrapper<_Tp, 8 / sizeof(_Tp)>; 987 template <typename _Tp> 988 using _SimdWrapper16 = _SimdWrapper<_Tp, 16 / sizeof(_Tp)>; 989 template <typename _Tp> 990 using _SimdWrapper32 = _SimdWrapper<_Tp, 32 / sizeof(_Tp)>; 991 template <typename _Tp> 992 using _SimdWrapper64 = _SimdWrapper<_Tp, 64 / sizeof(_Tp)>; 993 994 // }}} 995 // __is_simd_wrapper {{{ 996 template <typename _Tp> 997 struct __is_simd_wrapper : false_type {}; 998 999 template <typename _Tp, size_t _Np> 1000 struct __is_simd_wrapper<_SimdWrapper<_Tp, _Np>> : true_type {}; 1001 1002 template <typename _Tp> 1003 inline constexpr bool __is_simd_wrapper_v = __is_simd_wrapper<_Tp>::value; 1004 1005 // }}} 1006 // _BitOps {{{ 1007 struct _BitOps 1008 { 1009 // _S_bit_iteration {{{ 1010 template <typename _Tp, typename _Fp> 1011 static void 1012 _S_bit_iteration(_Tp __mask, _Fp&& __f) 1013 { 1014 static_assert(sizeof(_ULLong) >= sizeof(_Tp)); 1015 conditional_t<sizeof(_Tp) <= sizeof(_UInt), _UInt, _ULLong> __k; 1016 if constexpr (is_convertible_v<_Tp, decltype(__k)>) 1017 __k = __mask; 1018 else 1019 __k = __mask.to_ullong(); 1020 while(__k) 1021 { 1022 __f(std::__countr_zero(__k)); 1023 __k &= (__k - 1); 1024 } 1025 } 1026 1027 //}}} 1028 }; 1029 1030 //}}} 1031 // __increment, __decrement {{{ 1032 template <typename _Tp = void> 1033 struct __increment 1034 { constexpr _Tp operator()(_Tp __a) const { return ++__a; } }; 1035 1036 template <> 1037 struct __increment<void> 1038 { 1039 template <typename _Tp> 1040 constexpr _Tp 1041 operator()(_Tp __a) const 1042 { return ++__a; } 1043 }; 1044 1045 template <typename _Tp = void> 1046 struct __decrement 1047 { constexpr _Tp operator()(_Tp __a) const { return --__a; } }; 1048 1049 template <> 1050 struct __decrement<void> 1051 { 1052 template <typename _Tp> 1053 constexpr _Tp 1054 operator()(_Tp __a) const 1055 { return --__a; } 1056 }; 1057 1058 // }}} 1059 // _ValuePreserving(OrInt) {{{ 1060 template <typename _From, typename _To, 1061 typename = enable_if_t<negation< 1062 __is_narrowing_conversion<__remove_cvref_t<_From>, _To>>::value>> 1063 using _ValuePreserving = _From; 1064 1065 template <typename _From, typename _To, 1066 typename _DecayedFrom = __remove_cvref_t<_From>, 1067 typename = enable_if_t<conjunction< 1068 is_convertible<_From, _To>, 1069 disjunction< 1070 is_same<_DecayedFrom, _To>, is_same<_DecayedFrom, int>, 1071 conjunction<is_same<_DecayedFrom, _UInt>, is_unsigned<_To>>, 1072 negation<__is_narrowing_conversion<_DecayedFrom, _To>>>>::value>> 1073 using _ValuePreservingOrInt = _From; 1074 1075 // }}} 1076 // __intrinsic_type {{{ 1077 template <typename _Tp, size_t _Bytes, typename = void_t<>> 1078 struct __intrinsic_type; 1079 1080 template <typename _Tp, size_t _Size> 1081 using __intrinsic_type_t = 1082 typename __intrinsic_type<_Tp, _Size * sizeof(_Tp)>::type; 1083 1084 template <typename _Tp> 1085 using __intrinsic_type2_t = typename __intrinsic_type<_Tp, 2>::type; 1086 template <typename _Tp> 1087 using __intrinsic_type4_t = typename __intrinsic_type<_Tp, 4>::type; 1088 template <typename _Tp> 1089 using __intrinsic_type8_t = typename __intrinsic_type<_Tp, 8>::type; 1090 template <typename _Tp> 1091 using __intrinsic_type16_t = typename __intrinsic_type<_Tp, 16>::type; 1092 template <typename _Tp> 1093 using __intrinsic_type32_t = typename __intrinsic_type<_Tp, 32>::type; 1094 template <typename _Tp> 1095 using __intrinsic_type64_t = typename __intrinsic_type<_Tp, 64>::type; 1096 1097 // }}} 1098 // _BitMask {{{ 1099 template <size_t _Np, bool _Sanitized = false> 1100 struct _BitMask; 1101 1102 template <size_t _Np, bool _Sanitized> 1103 struct __is_bitmask<_BitMask<_Np, _Sanitized>, void> : true_type {}; 1104 1105 template <size_t _Np> 1106 using _SanitizedBitMask = _BitMask<_Np, true>; 1107 1108 template <size_t _Np, bool _Sanitized> 1109 struct _BitMask 1110 { 1111 static_assert(_Np > 0); 1112 1113 static constexpr size_t _NBytes = __div_roundup(_Np, __CHAR_BIT__); 1114 1115 using _Tp = conditional_t<_Np == 1, bool, 1116 make_unsigned_t<__int_with_sizeof_t<std::min( 1117 sizeof(_ULLong), std::__bit_ceil(_NBytes))>>>; 1118 1119 static constexpr int _S_array_size = __div_roundup(_NBytes, sizeof(_Tp)); 1120 1121 _Tp _M_bits[_S_array_size]; 1122 1123 static constexpr int _S_unused_bits 1124 = _Np == 1 ? 0 : _S_array_size * sizeof(_Tp) * __CHAR_BIT__ - _Np; 1125 1126 static constexpr _Tp _S_bitmask = +_Tp(~_Tp()) >> _S_unused_bits; 1127 1128 constexpr _BitMask() noexcept = default; 1129 1130 constexpr _BitMask(unsigned long long __x) noexcept 1131 : _M_bits{static_cast<_Tp>(__x)} {} 1132 1133 _BitMask(bitset<_Np> __x) noexcept : _BitMask(__x.to_ullong()) {} 1134 1135 constexpr _BitMask(const _BitMask&) noexcept = default; 1136 1137 template <bool _RhsSanitized, typename = enable_if_t<_RhsSanitized == false 1138 && _Sanitized == true>> 1139 constexpr _BitMask(const _BitMask<_Np, _RhsSanitized>& __rhs) noexcept 1140 : _BitMask(__rhs._M_sanitized()) {} 1141 1142 constexpr operator _SimdWrapper<bool, _Np>() const noexcept 1143 { 1144 static_assert(_S_array_size == 1); 1145 return _M_bits[0]; 1146 } 1147 1148 // precondition: is sanitized 1149 constexpr _Tp 1150 _M_to_bits() const noexcept 1151 { 1152 static_assert(_S_array_size == 1); 1153 return _M_bits[0]; 1154 } 1155 1156 // precondition: is sanitized 1157 constexpr unsigned long long 1158 to_ullong() const noexcept 1159 { 1160 static_assert(_S_array_size == 1); 1161 return _M_bits[0]; 1162 } 1163 1164 // precondition: is sanitized 1165 constexpr unsigned long 1166 to_ulong() const noexcept 1167 { 1168 static_assert(_S_array_size == 1); 1169 return _M_bits[0]; 1170 } 1171 1172 constexpr bitset<_Np> 1173 _M_to_bitset() const noexcept 1174 { 1175 static_assert(_S_array_size == 1); 1176 return _M_bits[0]; 1177 } 1178 1179 constexpr decltype(auto) 1180 _M_sanitized() const noexcept 1181 { 1182 if constexpr (_Sanitized) 1183 return *this; 1184 else if constexpr (_Np == 1) 1185 return _SanitizedBitMask<_Np>(_M_bits[0]); 1186 else 1187 { 1188 _SanitizedBitMask<_Np> __r = {}; 1189 for (int __i = 0; __i < _S_array_size; ++__i) 1190 __r._M_bits[__i] = _M_bits[__i]; 1191 if constexpr (_S_unused_bits > 0) 1192 __r._M_bits[_S_array_size - 1] &= _S_bitmask; 1193 return __r; 1194 } 1195 } 1196 1197 template <size_t _Mp, bool _LSanitized> 1198 constexpr _BitMask<_Np + _Mp, _Sanitized> 1199 _M_prepend(_BitMask<_Mp, _LSanitized> __lsb) const noexcept 1200 { 1201 constexpr size_t _RN = _Np + _Mp; 1202 using _Rp = _BitMask<_RN, _Sanitized>; 1203 if constexpr (_Rp::_S_array_size == 1) 1204 { 1205 _Rp __r{{_M_bits[0]}}; 1206 __r._M_bits[0] <<= _Mp; 1207 __r._M_bits[0] |= __lsb._M_sanitized()._M_bits[0]; 1208 return __r; 1209 } 1210 else 1211 __assert_unreachable<_Rp>(); 1212 } 1213 1214 // Return a new _BitMask with size _NewSize while dropping _DropLsb least 1215 // significant bits. If the operation implicitly produces a sanitized bitmask, 1216 // the result type will have _Sanitized set. 1217 template <size_t _DropLsb, size_t _NewSize = _Np - _DropLsb> 1218 constexpr auto 1219 _M_extract() const noexcept 1220 { 1221 static_assert(_Np > _DropLsb); 1222 static_assert(_DropLsb + _NewSize <= sizeof(_ULLong) * __CHAR_BIT__, 1223 "not implemented for bitmasks larger than one ullong"); 1224 if constexpr (_NewSize == 1) 1225 // must sanitize because the return _Tp is bool 1226 return _SanitizedBitMask<1>(_M_bits[0] & (_Tp(1) << _DropLsb)); 1227 else 1228 return _BitMask<_NewSize, 1229 ((_NewSize + _DropLsb == sizeof(_Tp) * __CHAR_BIT__ 1230 && _NewSize + _DropLsb <= _Np) 1231 || ((_Sanitized || _Np == sizeof(_Tp) * __CHAR_BIT__) 1232 && _NewSize + _DropLsb >= _Np))>(_M_bits[0] 1233 >> _DropLsb); 1234 } 1235 1236 // True if all bits are set. Implicitly sanitizes if _Sanitized == false. 1237 constexpr bool 1238 all() const noexcept 1239 { 1240 if constexpr (_Np == 1) 1241 return _M_bits[0]; 1242 else if constexpr (!_Sanitized) 1243 return _M_sanitized().all(); 1244 else 1245 { 1246 constexpr _Tp __allbits = ~_Tp(); 1247 for (int __i = 0; __i < _S_array_size - 1; ++__i) 1248 if (_M_bits[__i] != __allbits) 1249 return false; 1250 return _M_bits[_S_array_size - 1] == _S_bitmask; 1251 } 1252 } 1253 1254 // True if at least one bit is set. Implicitly sanitizes if _Sanitized == 1255 // false. 1256 constexpr bool 1257 any() const noexcept 1258 { 1259 if constexpr (_Np == 1) 1260 return _M_bits[0]; 1261 else if constexpr (!_Sanitized) 1262 return _M_sanitized().any(); 1263 else 1264 { 1265 for (int __i = 0; __i < _S_array_size - 1; ++__i) 1266 if (_M_bits[__i] != 0) 1267 return true; 1268 return _M_bits[_S_array_size - 1] != 0; 1269 } 1270 } 1271 1272 // True if no bit is set. Implicitly sanitizes if _Sanitized == false. 1273 constexpr bool 1274 none() const noexcept 1275 { 1276 if constexpr (_Np == 1) 1277 return !_M_bits[0]; 1278 else if constexpr (!_Sanitized) 1279 return _M_sanitized().none(); 1280 else 1281 { 1282 for (int __i = 0; __i < _S_array_size - 1; ++__i) 1283 if (_M_bits[__i] != 0) 1284 return false; 1285 return _M_bits[_S_array_size - 1] == 0; 1286 } 1287 } 1288 1289 // Returns the number of set bits. Implicitly sanitizes if _Sanitized == 1290 // false. 1291 constexpr int 1292 count() const noexcept 1293 { 1294 if constexpr (_Np == 1) 1295 return _M_bits[0]; 1296 else if constexpr (!_Sanitized) 1297 return _M_sanitized().none(); 1298 else 1299 { 1300 int __result = __builtin_popcountll(_M_bits[0]); 1301 for (int __i = 1; __i < _S_array_size; ++__i) 1302 __result += __builtin_popcountll(_M_bits[__i]); 1303 return __result; 1304 } 1305 } 1306 1307 // Returns the bit at offset __i as bool. 1308 constexpr bool 1309 operator[](size_t __i) const noexcept 1310 { 1311 if constexpr (_Np == 1) 1312 return _M_bits[0]; 1313 else if constexpr (_S_array_size == 1) 1314 return (_M_bits[0] >> __i) & 1; 1315 else 1316 { 1317 const size_t __j = __i / (sizeof(_Tp) * __CHAR_BIT__); 1318 const size_t __shift = __i % (sizeof(_Tp) * __CHAR_BIT__); 1319 return (_M_bits[__j] >> __shift) & 1; 1320 } 1321 } 1322 1323 template <size_t __i> 1324 constexpr bool 1325 operator[](_SizeConstant<__i>) const noexcept 1326 { 1327 static_assert(__i < _Np); 1328 constexpr size_t __j = __i / (sizeof(_Tp) * __CHAR_BIT__); 1329 constexpr size_t __shift = __i % (sizeof(_Tp) * __CHAR_BIT__); 1330 return static_cast<bool>(_M_bits[__j] & (_Tp(1) << __shift)); 1331 } 1332 1333 // Set the bit at offset __i to __x. 1334 constexpr void 1335 set(size_t __i, bool __x) noexcept 1336 { 1337 if constexpr (_Np == 1) 1338 _M_bits[0] = __x; 1339 else if constexpr (_S_array_size == 1) 1340 { 1341 _M_bits[0] &= ~_Tp(_Tp(1) << __i); 1342 _M_bits[0] |= _Tp(_Tp(__x) << __i); 1343 } 1344 else 1345 { 1346 const size_t __j = __i / (sizeof(_Tp) * __CHAR_BIT__); 1347 const size_t __shift = __i % (sizeof(_Tp) * __CHAR_BIT__); 1348 _M_bits[__j] &= ~_Tp(_Tp(1) << __shift); 1349 _M_bits[__j] |= _Tp(_Tp(__x) << __shift); 1350 } 1351 } 1352 1353 template <size_t __i> 1354 constexpr void 1355 set(_SizeConstant<__i>, bool __x) noexcept 1356 { 1357 static_assert(__i < _Np); 1358 if constexpr (_Np == 1) 1359 _M_bits[0] = __x; 1360 else 1361 { 1362 constexpr size_t __j = __i / (sizeof(_Tp) * __CHAR_BIT__); 1363 constexpr size_t __shift = __i % (sizeof(_Tp) * __CHAR_BIT__); 1364 constexpr _Tp __mask = ~_Tp(_Tp(1) << __shift); 1365 _M_bits[__j] &= __mask; 1366 _M_bits[__j] |= _Tp(_Tp(__x) << __shift); 1367 } 1368 } 1369 1370 // Inverts all bits. Sanitized input leads to sanitized output. 1371 constexpr _BitMask 1372 operator~() const noexcept 1373 { 1374 if constexpr (_Np == 1) 1375 return !_M_bits[0]; 1376 else 1377 { 1378 _BitMask __result{}; 1379 for (int __i = 0; __i < _S_array_size - 1; ++__i) 1380 __result._M_bits[__i] = ~_M_bits[__i]; 1381 if constexpr (_Sanitized) 1382 __result._M_bits[_S_array_size - 1] 1383 = _M_bits[_S_array_size - 1] ^ _S_bitmask; 1384 else 1385 __result._M_bits[_S_array_size - 1] = ~_M_bits[_S_array_size - 1]; 1386 return __result; 1387 } 1388 } 1389 1390 constexpr _BitMask& 1391 operator^=(const _BitMask& __b) & noexcept 1392 { 1393 __execute_n_times<_S_array_size>( 1394 [&](auto __i) { _M_bits[__i] ^= __b._M_bits[__i]; }); 1395 return *this; 1396 } 1397 1398 constexpr _BitMask& 1399 operator|=(const _BitMask& __b) & noexcept 1400 { 1401 __execute_n_times<_S_array_size>( 1402 [&](auto __i) { _M_bits[__i] |= __b._M_bits[__i]; }); 1403 return *this; 1404 } 1405 1406 constexpr _BitMask& 1407 operator&=(const _BitMask& __b) & noexcept 1408 { 1409 __execute_n_times<_S_array_size>( 1410 [&](auto __i) { _M_bits[__i] &= __b._M_bits[__i]; }); 1411 return *this; 1412 } 1413 1414 friend constexpr _BitMask 1415 operator^(const _BitMask& __a, const _BitMask& __b) noexcept 1416 { 1417 _BitMask __r = __a; 1418 __r ^= __b; 1419 return __r; 1420 } 1421 1422 friend constexpr _BitMask 1423 operator|(const _BitMask& __a, const _BitMask& __b) noexcept 1424 { 1425 _BitMask __r = __a; 1426 __r |= __b; 1427 return __r; 1428 } 1429 1430 friend constexpr _BitMask 1431 operator&(const _BitMask& __a, const _BitMask& __b) noexcept 1432 { 1433 _BitMask __r = __a; 1434 __r &= __b; 1435 return __r; 1436 } 1437 1438 _GLIBCXX_SIMD_INTRINSIC 1439 constexpr bool 1440 _M_is_constprop() const 1441 { 1442 if constexpr (_S_array_size == 0) 1443 return __builtin_constant_p(_M_bits[0]); 1444 else 1445 { 1446 for (int __i = 0; __i < _S_array_size; ++__i) 1447 if (!__builtin_constant_p(_M_bits[__i])) 1448 return false; 1449 return true; 1450 } 1451 } 1452 }; 1453 1454 // }}} 1455 1456 // vvv ---- builtin vector types [[gnu::vector_size(N)]] and operations ---- vvv 1457 // __min_vector_size {{{ 1458 template <typename _Tp = void> 1459 static inline constexpr int __min_vector_size = 2 * sizeof(_Tp); 1460 1461 #if _GLIBCXX_SIMD_HAVE_NEON 1462 template <> 1463 inline constexpr int __min_vector_size<void> = 8; 1464 #else 1465 template <> 1466 inline constexpr int __min_vector_size<void> = 16; 1467 #endif 1468 1469 // }}} 1470 // __vector_type {{{ 1471 template <typename _Tp, size_t _Np, typename = void> 1472 struct __vector_type_n {}; 1473 1474 // substition failure for 0-element case 1475 template <typename _Tp> 1476 struct __vector_type_n<_Tp, 0, void> {}; 1477 1478 // special case 1-element to be _Tp itself 1479 template <typename _Tp> 1480 struct __vector_type_n<_Tp, 1, enable_if_t<__is_vectorizable_v<_Tp>>> 1481 { using type = _Tp; }; 1482 1483 // else, use GNU-style builtin vector types 1484 template <typename _Tp, size_t _Np> 1485 struct __vector_type_n<_Tp, _Np, 1486 enable_if_t<__is_vectorizable_v<_Tp> && _Np >= 2>> 1487 { 1488 static constexpr size_t _S_Np2 = std::__bit_ceil(_Np * sizeof(_Tp)); 1489 1490 static constexpr size_t _S_Bytes = 1491 #ifdef __i386__ 1492 // Using [[gnu::vector_size(8)]] would wreak havoc on the FPU because 1493 // those objects are passed via MMX registers and nothing ever calls EMMS. 1494 _S_Np2 == 8 ? 16 : 1495 #endif 1496 _S_Np2 < __min_vector_size<_Tp> ? __min_vector_size<_Tp> 1497 : _S_Np2; 1498 1499 using type [[__gnu__::__vector_size__(_S_Bytes)]] = _Tp; 1500 }; 1501 1502 template <typename _Tp, size_t _Bytes, size_t = _Bytes % sizeof(_Tp)> 1503 struct __vector_type; 1504 1505 template <typename _Tp, size_t _Bytes> 1506 struct __vector_type<_Tp, _Bytes, 0> 1507 : __vector_type_n<_Tp, _Bytes / sizeof(_Tp)> {}; 1508 1509 template <typename _Tp, size_t _Size> 1510 using __vector_type_t = typename __vector_type_n<_Tp, _Size>::type; 1511 1512 template <typename _Tp> 1513 using __vector_type2_t = typename __vector_type<_Tp, 2>::type; 1514 template <typename _Tp> 1515 using __vector_type4_t = typename __vector_type<_Tp, 4>::type; 1516 template <typename _Tp> 1517 using __vector_type8_t = typename __vector_type<_Tp, 8>::type; 1518 template <typename _Tp> 1519 using __vector_type16_t = typename __vector_type<_Tp, 16>::type; 1520 template <typename _Tp> 1521 using __vector_type32_t = typename __vector_type<_Tp, 32>::type; 1522 template <typename _Tp> 1523 using __vector_type64_t = typename __vector_type<_Tp, 64>::type; 1524 1525 // }}} 1526 // __is_vector_type {{{ 1527 template <typename _Tp, typename = void_t<>> 1528 struct __is_vector_type : false_type {}; 1529 1530 template <typename _Tp> 1531 struct __is_vector_type< 1532 _Tp, void_t<typename __vector_type< 1533 remove_reference_t<decltype(declval<_Tp>()[0])>, sizeof(_Tp)>::type>> 1534 : is_same<_Tp, typename __vector_type< 1535 remove_reference_t<decltype(declval<_Tp>()[0])>, 1536 sizeof(_Tp)>::type> {}; 1537 1538 template <typename _Tp> 1539 inline constexpr bool __is_vector_type_v = __is_vector_type<_Tp>::value; 1540 1541 // }}} 1542 // __is_intrinsic_type {{{ 1543 #if _GLIBCXX_SIMD_HAVE_SSE_ABI 1544 template <typename _Tp> 1545 using __is_intrinsic_type = __is_vector_type<_Tp>; 1546 #else // not SSE (x86) 1547 template <typename _Tp, typename = void_t<>> 1548 struct __is_intrinsic_type : false_type {}; 1549 1550 template <typename _Tp> 1551 struct __is_intrinsic_type< 1552 _Tp, void_t<typename __intrinsic_type< 1553 remove_reference_t<decltype(declval<_Tp>()[0])>, sizeof(_Tp)>::type>> 1554 : is_same<_Tp, typename __intrinsic_type< 1555 remove_reference_t<decltype(declval<_Tp>()[0])>, 1556 sizeof(_Tp)>::type> {}; 1557 #endif 1558 1559 template <typename _Tp> 1560 inline constexpr bool __is_intrinsic_type_v = __is_intrinsic_type<_Tp>::value; 1561 1562 // }}} 1563 // _VectorTraits{{{ 1564 template <typename _Tp, typename = void_t<>> 1565 struct _VectorTraitsImpl; 1566 1567 template <typename _Tp> 1568 struct _VectorTraitsImpl<_Tp, enable_if_t<__is_vector_type_v<_Tp> 1569 || __is_intrinsic_type_v<_Tp>>> 1570 { 1571 using type = _Tp; 1572 using value_type = remove_reference_t<decltype(declval<_Tp>()[0])>; 1573 static constexpr int _S_full_size = sizeof(_Tp) / sizeof(value_type); 1574 using _Wrapper = _SimdWrapper<value_type, _S_full_size>; 1575 template <typename _Up, int _W = _S_full_size> 1576 static constexpr bool _S_is 1577 = is_same_v<value_type, _Up> && _W == _S_full_size; 1578 }; 1579 1580 template <typename _Tp, size_t _Np> 1581 struct _VectorTraitsImpl<_SimdWrapper<_Tp, _Np>, 1582 void_t<__vector_type_t<_Tp, _Np>>> 1583 { 1584 using type = __vector_type_t<_Tp, _Np>; 1585 using value_type = _Tp; 1586 static constexpr int _S_full_size = sizeof(type) / sizeof(value_type); 1587 using _Wrapper = _SimdWrapper<_Tp, _Np>; 1588 static constexpr bool _S_is_partial = (_Np == _S_full_size); 1589 static constexpr int _S_partial_width = _Np; 1590 template <typename _Up, int _W = _S_full_size> 1591 static constexpr bool _S_is 1592 = is_same_v<value_type, _Up>&& _W == _S_full_size; 1593 }; 1594 1595 template <typename _Tp, typename = typename _VectorTraitsImpl<_Tp>::type> 1596 using _VectorTraits = _VectorTraitsImpl<_Tp>; 1597 1598 // }}} 1599 // __as_vector{{{ 1600 template <typename _V> 1601 _GLIBCXX_SIMD_INTRINSIC constexpr auto 1602 __as_vector(_V __x) 1603 { 1604 if constexpr (__is_vector_type_v<_V>) 1605 return __x; 1606 else if constexpr (is_simd<_V>::value || is_simd_mask<_V>::value) 1607 return __data(__x)._M_data; 1608 else if constexpr (__is_vectorizable_v<_V>) 1609 return __vector_type_t<_V, 2>{__x}; 1610 else 1611 return __x._M_data; 1612 } 1613 1614 // }}} 1615 // __as_wrapper{{{ 1616 template <size_t _Np = 0, typename _V> 1617 _GLIBCXX_SIMD_INTRINSIC constexpr auto 1618 __as_wrapper(_V __x) 1619 { 1620 if constexpr (__is_vector_type_v<_V>) 1621 return _SimdWrapper<typename _VectorTraits<_V>::value_type, 1622 (_Np > 0 ? _Np : _VectorTraits<_V>::_S_full_size)>(__x); 1623 else if constexpr (is_simd<_V>::value || is_simd_mask<_V>::value) 1624 { 1625 static_assert(_V::size() == _Np); 1626 return __data(__x); 1627 } 1628 else 1629 { 1630 static_assert(_V::_S_size == _Np); 1631 return __x; 1632 } 1633 } 1634 1635 // }}} 1636 // __intrin_bitcast{{{ 1637 template <typename _To, typename _From> 1638 _GLIBCXX_SIMD_INTRINSIC constexpr _To 1639 __intrin_bitcast(_From __v) 1640 { 1641 static_assert((__is_vector_type_v<_From> || __is_intrinsic_type_v<_From>) 1642 && (__is_vector_type_v<_To> || __is_intrinsic_type_v<_To>)); 1643 if constexpr (sizeof(_To) == sizeof(_From)) 1644 return reinterpret_cast<_To>(__v); 1645 else if constexpr (sizeof(_From) > sizeof(_To)) 1646 if constexpr (sizeof(_To) >= 16) 1647 return reinterpret_cast<const __may_alias<_To>&>(__v); 1648 else 1649 { 1650 _To __r; 1651 __builtin_memcpy(&__r, &__v, sizeof(_To)); 1652 return __r; 1653 } 1654 #if _GLIBCXX_SIMD_X86INTRIN && !defined __clang__ 1655 else if constexpr (__have_avx && sizeof(_From) == 16 && sizeof(_To) == 32) 1656 return reinterpret_cast<_To>(__builtin_ia32_ps256_ps( 1657 reinterpret_cast<__vector_type_t<float, 4>>(__v))); 1658 else if constexpr (__have_avx512f && sizeof(_From) == 16 1659 && sizeof(_To) == 64) 1660 return reinterpret_cast<_To>(__builtin_ia32_ps512_ps( 1661 reinterpret_cast<__vector_type_t<float, 4>>(__v))); 1662 else if constexpr (__have_avx512f && sizeof(_From) == 32 1663 && sizeof(_To) == 64) 1664 return reinterpret_cast<_To>(__builtin_ia32_ps512_256ps( 1665 reinterpret_cast<__vector_type_t<float, 8>>(__v))); 1666 #endif // _GLIBCXX_SIMD_X86INTRIN 1667 else if constexpr (sizeof(__v) <= 8) 1668 return reinterpret_cast<_To>( 1669 __vector_type_t<__int_for_sizeof_t<_From>, sizeof(_To) / sizeof(_From)>{ 1670 reinterpret_cast<__int_for_sizeof_t<_From>>(__v)}); 1671 else 1672 { 1673 static_assert(sizeof(_To) > sizeof(_From)); 1674 _To __r = {}; 1675 __builtin_memcpy(&__r, &__v, sizeof(_From)); 1676 return __r; 1677 } 1678 } 1679 1680 // }}} 1681 // __vector_bitcast{{{ 1682 template <typename _To, size_t _NN = 0, typename _From, 1683 typename _FromVT = _VectorTraits<_From>, 1684 size_t _Np = _NN == 0 ? sizeof(_From) / sizeof(_To) : _NN> 1685 _GLIBCXX_SIMD_INTRINSIC constexpr __vector_type_t<_To, _Np> 1686 __vector_bitcast(_From __x) 1687 { 1688 using _R = __vector_type_t<_To, _Np>; 1689 return __intrin_bitcast<_R>(__x); 1690 } 1691 1692 template <typename _To, size_t _NN = 0, typename _Tp, size_t _Nx, 1693 size_t _Np 1694 = _NN == 0 ? sizeof(_SimdWrapper<_Tp, _Nx>) / sizeof(_To) : _NN> 1695 _GLIBCXX_SIMD_INTRINSIC constexpr __vector_type_t<_To, _Np> 1696 __vector_bitcast(const _SimdWrapper<_Tp, _Nx>& __x) 1697 { 1698 static_assert(_Np > 1); 1699 return __intrin_bitcast<__vector_type_t<_To, _Np>>(__x._M_data); 1700 } 1701 1702 // }}} 1703 // __convert_x86 declarations {{{ 1704 #ifdef _GLIBCXX_SIMD_WORKAROUND_PR85048 1705 template <typename _To, typename _Tp, typename _TVT = _VectorTraits<_Tp>> 1706 _To __convert_x86(_Tp); 1707 1708 template <typename _To, typename _Tp, typename _TVT = _VectorTraits<_Tp>> 1709 _To __convert_x86(_Tp, _Tp); 1710 1711 template <typename _To, typename _Tp, typename _TVT = _VectorTraits<_Tp>> 1712 _To __convert_x86(_Tp, _Tp, _Tp, _Tp); 1713 1714 template <typename _To, typename _Tp, typename _TVT = _VectorTraits<_Tp>> 1715 _To __convert_x86(_Tp, _Tp, _Tp, _Tp, _Tp, _Tp, _Tp, _Tp); 1716 1717 template <typename _To, typename _Tp, typename _TVT = _VectorTraits<_Tp>> 1718 _To __convert_x86(_Tp, _Tp, _Tp, _Tp, _Tp, _Tp, _Tp, _Tp, _Tp, _Tp, _Tp, _Tp, 1719 _Tp, _Tp, _Tp, _Tp); 1720 #endif // _GLIBCXX_SIMD_WORKAROUND_PR85048 1721 1722 //}}} 1723 // __bit_cast {{{ 1724 template <typename _To, typename _From> 1725 _GLIBCXX_SIMD_INTRINSIC constexpr _To 1726 __bit_cast(const _From __x) 1727 { 1728 #if __has_builtin(__builtin_bit_cast) 1729 return __builtin_bit_cast(_To, __x); 1730 #else 1731 static_assert(sizeof(_To) == sizeof(_From)); 1732 constexpr bool __to_is_vectorizable 1733 = is_arithmetic_v<_To> || is_enum_v<_To>; 1734 constexpr bool __from_is_vectorizable 1735 = is_arithmetic_v<_From> || is_enum_v<_From>; 1736 if constexpr (__is_vector_type_v<_To> && __is_vector_type_v<_From>) 1737 return reinterpret_cast<_To>(__x); 1738 else if constexpr (__is_vector_type_v<_To> && __from_is_vectorizable) 1739 { 1740 using _FV [[gnu::vector_size(sizeof(_From))]] = _From; 1741 return reinterpret_cast<_To>(_FV{__x}); 1742 } 1743 else if constexpr (__to_is_vectorizable && __from_is_vectorizable) 1744 { 1745 using _TV [[gnu::vector_size(sizeof(_To))]] = _To; 1746 using _FV [[gnu::vector_size(sizeof(_From))]] = _From; 1747 return reinterpret_cast<_TV>(_FV{__x})[0]; 1748 } 1749 else if constexpr (__to_is_vectorizable && __is_vector_type_v<_From>) 1750 { 1751 using _TV [[gnu::vector_size(sizeof(_To))]] = _To; 1752 return reinterpret_cast<_TV>(__x)[0]; 1753 } 1754 else 1755 { 1756 _To __r; 1757 __builtin_memcpy(reinterpret_cast<char*>(&__r), 1758 reinterpret_cast<const char*>(&__x), sizeof(_To)); 1759 return __r; 1760 } 1761 #endif 1762 } 1763 1764 // }}} 1765 // __to_intrin {{{ 1766 template <typename _Tp, typename _TVT = _VectorTraits<_Tp>, 1767 typename _R 1768 = __intrinsic_type_t<typename _TVT::value_type, _TVT::_S_full_size>> 1769 _GLIBCXX_SIMD_INTRINSIC constexpr _R 1770 __to_intrin(_Tp __x) 1771 { 1772 static_assert(sizeof(__x) <= sizeof(_R), 1773 "__to_intrin may never drop values off the end"); 1774 if constexpr (sizeof(__x) == sizeof(_R)) 1775 return reinterpret_cast<_R>(__as_vector(__x)); 1776 else 1777 { 1778 using _Up = __int_for_sizeof_t<_Tp>; 1779 return reinterpret_cast<_R>( 1780 __vector_type_t<_Up, sizeof(_R) / sizeof(_Up)>{__bit_cast<_Up>(__x)}); 1781 } 1782 } 1783 1784 // }}} 1785 // __make_vector{{{ 1786 template <typename _Tp, typename... _Args> 1787 _GLIBCXX_SIMD_INTRINSIC constexpr __vector_type_t<_Tp, sizeof...(_Args)> 1788 __make_vector(const _Args&... __args) 1789 { 1790 return __vector_type_t<_Tp, sizeof...(_Args)>{static_cast<_Tp>(__args)...}; 1791 } 1792 1793 // }}} 1794 // __vector_broadcast{{{ 1795 template <size_t _Np, typename _Tp, size_t... _I> 1796 _GLIBCXX_SIMD_INTRINSIC constexpr __vector_type_t<_Tp, _Np> 1797 __vector_broadcast_impl(_Tp __x, index_sequence<_I...>) 1798 { return __vector_type_t<_Tp, _Np>{((void)_I, __x)...}; } 1799 1800 template <size_t _Np, typename _Tp> 1801 _GLIBCXX_SIMD_INTRINSIC constexpr __vector_type_t<_Tp, _Np> 1802 __vector_broadcast(_Tp __x) 1803 { return __vector_broadcast_impl<_Np, _Tp>(__x, make_index_sequence<_Np>()); } 1804 1805 // }}} 1806 // __generate_vector{{{ 1807 template <typename _Tp, size_t _Np, typename _Gp, size_t... _I> 1808 _GLIBCXX_SIMD_INTRINSIC constexpr __vector_type_t<_Tp, _Np> 1809 __generate_vector_impl(_Gp&& __gen, index_sequence<_I...>) 1810 { 1811 return __vector_type_t<_Tp, _Np>{ 1812 static_cast<_Tp>(__gen(_SizeConstant<_I>()))...}; 1813 } 1814 1815 template <typename _V, typename _VVT = _VectorTraits<_V>, typename _Gp> 1816 _GLIBCXX_SIMD_INTRINSIC constexpr _V 1817 __generate_vector(_Gp&& __gen) 1818 { 1819 if constexpr (__is_vector_type_v<_V>) 1820 return __generate_vector_impl<typename _VVT::value_type, 1821 _VVT::_S_full_size>( 1822 static_cast<_Gp&&>(__gen), make_index_sequence<_VVT::_S_full_size>()); 1823 else 1824 return __generate_vector_impl<typename _VVT::value_type, 1825 _VVT::_S_partial_width>( 1826 static_cast<_Gp&&>(__gen), 1827 make_index_sequence<_VVT::_S_partial_width>()); 1828 } 1829 1830 template <typename _Tp, size_t _Np, typename _Gp> 1831 _GLIBCXX_SIMD_INTRINSIC constexpr __vector_type_t<_Tp, _Np> 1832 __generate_vector(_Gp&& __gen) 1833 { 1834 return __generate_vector_impl<_Tp, _Np>(static_cast<_Gp&&>(__gen), 1835 make_index_sequence<_Np>()); 1836 } 1837 1838 // }}} 1839 // __xor{{{ 1840 template <typename _TW> 1841 _GLIBCXX_SIMD_INTRINSIC constexpr _TW 1842 __xor(_TW __a, _TW __b) noexcept 1843 { 1844 if constexpr (__is_vector_type_v<_TW> || __is_simd_wrapper_v<_TW>) 1845 { 1846 using _Tp = typename conditional_t<__is_simd_wrapper_v<_TW>, _TW, 1847 _VectorTraitsImpl<_TW>>::value_type; 1848 if constexpr (is_floating_point_v<_Tp>) 1849 { 1850 using _Ip = make_unsigned_t<__int_for_sizeof_t<_Tp>>; 1851 return __vector_bitcast<_Tp>(__vector_bitcast<_Ip>(__a) 1852 ^ __vector_bitcast<_Ip>(__b)); 1853 } 1854 else if constexpr (__is_vector_type_v<_TW>) 1855 return __a ^ __b; 1856 else 1857 return __a._M_data ^ __b._M_data; 1858 } 1859 else 1860 return __a ^ __b; 1861 } 1862 1863 // }}} 1864 // __or{{{ 1865 template <typename _TW> 1866 _GLIBCXX_SIMD_INTRINSIC constexpr _TW 1867 __or(_TW __a, _TW __b) noexcept 1868 { 1869 if constexpr (__is_vector_type_v<_TW> || __is_simd_wrapper_v<_TW>) 1870 { 1871 using _Tp = typename conditional_t<__is_simd_wrapper_v<_TW>, _TW, 1872 _VectorTraitsImpl<_TW>>::value_type; 1873 if constexpr (is_floating_point_v<_Tp>) 1874 { 1875 using _Ip = make_unsigned_t<__int_for_sizeof_t<_Tp>>; 1876 return __vector_bitcast<_Tp>(__vector_bitcast<_Ip>(__a) 1877 | __vector_bitcast<_Ip>(__b)); 1878 } 1879 else if constexpr (__is_vector_type_v<_TW>) 1880 return __a | __b; 1881 else 1882 return __a._M_data | __b._M_data; 1883 } 1884 else 1885 return __a | __b; 1886 } 1887 1888 // }}} 1889 // __and{{{ 1890 template <typename _TW> 1891 _GLIBCXX_SIMD_INTRINSIC constexpr _TW 1892 __and(_TW __a, _TW __b) noexcept 1893 { 1894 if constexpr (__is_vector_type_v<_TW> || __is_simd_wrapper_v<_TW>) 1895 { 1896 using _Tp = typename conditional_t<__is_simd_wrapper_v<_TW>, _TW, 1897 _VectorTraitsImpl<_TW>>::value_type; 1898 if constexpr (is_floating_point_v<_Tp>) 1899 { 1900 using _Ip = make_unsigned_t<__int_for_sizeof_t<_Tp>>; 1901 return __vector_bitcast<_Tp>(__vector_bitcast<_Ip>(__a) 1902 & __vector_bitcast<_Ip>(__b)); 1903 } 1904 else if constexpr (__is_vector_type_v<_TW>) 1905 return __a & __b; 1906 else 1907 return __a._M_data & __b._M_data; 1908 } 1909 else 1910 return __a & __b; 1911 } 1912 1913 // }}} 1914 // __andnot{{{ 1915 #if _GLIBCXX_SIMD_X86INTRIN && !defined __clang__ 1916 static constexpr struct 1917 { 1918 _GLIBCXX_SIMD_INTRINSIC __v4sf 1919 operator()(__v4sf __a, __v4sf __b) const noexcept 1920 { return __builtin_ia32_andnps(__a, __b); } 1921 1922 _GLIBCXX_SIMD_INTRINSIC __v2df 1923 operator()(__v2df __a, __v2df __b) const noexcept 1924 { return __builtin_ia32_andnpd(__a, __b); } 1925 1926 _GLIBCXX_SIMD_INTRINSIC __v2di 1927 operator()(__v2di __a, __v2di __b) const noexcept 1928 { return __builtin_ia32_pandn128(__a, __b); } 1929 1930 _GLIBCXX_SIMD_INTRINSIC __v8sf 1931 operator()(__v8sf __a, __v8sf __b) const noexcept 1932 { return __builtin_ia32_andnps256(__a, __b); } 1933 1934 _GLIBCXX_SIMD_INTRINSIC __v4df 1935 operator()(__v4df __a, __v4df __b) const noexcept 1936 { return __builtin_ia32_andnpd256(__a, __b); } 1937 1938 _GLIBCXX_SIMD_INTRINSIC __v4di 1939 operator()(__v4di __a, __v4di __b) const noexcept 1940 { 1941 if constexpr (__have_avx2) 1942 return __builtin_ia32_andnotsi256(__a, __b); 1943 else 1944 return reinterpret_cast<__v4di>( 1945 __builtin_ia32_andnpd256(reinterpret_cast<__v4df>(__a), 1946 reinterpret_cast<__v4df>(__b))); 1947 } 1948 1949 _GLIBCXX_SIMD_INTRINSIC __v16sf 1950 operator()(__v16sf __a, __v16sf __b) const noexcept 1951 { 1952 if constexpr (__have_avx512dq) 1953 return _mm512_andnot_ps(__a, __b); 1954 else 1955 return reinterpret_cast<__v16sf>( 1956 _mm512_andnot_si512(reinterpret_cast<__v8di>(__a), 1957 reinterpret_cast<__v8di>(__b))); 1958 } 1959 1960 _GLIBCXX_SIMD_INTRINSIC __v8df 1961 operator()(__v8df __a, __v8df __b) const noexcept 1962 { 1963 if constexpr (__have_avx512dq) 1964 return _mm512_andnot_pd(__a, __b); 1965 else 1966 return reinterpret_cast<__v8df>( 1967 _mm512_andnot_si512(reinterpret_cast<__v8di>(__a), 1968 reinterpret_cast<__v8di>(__b))); 1969 } 1970 1971 _GLIBCXX_SIMD_INTRINSIC __v8di 1972 operator()(__v8di __a, __v8di __b) const noexcept 1973 { return _mm512_andnot_si512(__a, __b); } 1974 } _S_x86_andnot; 1975 #endif // _GLIBCXX_SIMD_X86INTRIN && !__clang__ 1976 1977 template <typename _TW> 1978 _GLIBCXX_SIMD_INTRINSIC constexpr _TW 1979 __andnot(_TW __a, _TW __b) noexcept 1980 { 1981 if constexpr (__is_vector_type_v<_TW> || __is_simd_wrapper_v<_TW>) 1982 { 1983 using _TVT = conditional_t<__is_simd_wrapper_v<_TW>, _TW, 1984 _VectorTraitsImpl<_TW>>; 1985 using _Tp = typename _TVT::value_type; 1986 #if _GLIBCXX_SIMD_X86INTRIN && !defined __clang__ 1987 if constexpr (sizeof(_TW) >= 16) 1988 { 1989 const auto __ai = __to_intrin(__a); 1990 const auto __bi = __to_intrin(__b); 1991 if (!__builtin_is_constant_evaluated() 1992 && !(__builtin_constant_p(__ai) && __builtin_constant_p(__bi))) 1993 { 1994 const auto __r = _S_x86_andnot(__ai, __bi); 1995 if constexpr (is_convertible_v<decltype(__r), _TW>) 1996 return __r; 1997 else 1998 return reinterpret_cast<typename _TVT::type>(__r); 1999 } 2000 } 2001 #endif // _GLIBCXX_SIMD_X86INTRIN 2002 using _Ip = make_unsigned_t<__int_for_sizeof_t<_Tp>>; 2003 return __vector_bitcast<_Tp>(~__vector_bitcast<_Ip>(__a) 2004 & __vector_bitcast<_Ip>(__b)); 2005 } 2006 else 2007 return ~__a & __b; 2008 } 2009 2010 // }}} 2011 // __not{{{ 2012 template <typename _Tp, typename _TVT = _VectorTraits<_Tp>> 2013 _GLIBCXX_SIMD_INTRINSIC constexpr _Tp 2014 __not(_Tp __a) noexcept 2015 { 2016 if constexpr (is_floating_point_v<typename _TVT::value_type>) 2017 return reinterpret_cast<typename _TVT::type>( 2018 ~__vector_bitcast<unsigned>(__a)); 2019 else 2020 return ~__a; 2021 } 2022 2023 // }}} 2024 // __concat{{{ 2025 template <typename _Tp, typename _TVT = _VectorTraits<_Tp>, 2026 typename _R = __vector_type_t<typename _TVT::value_type, 2027 _TVT::_S_full_size * 2>> 2028 constexpr _R 2029 __concat(_Tp a_, _Tp b_) 2030 { 2031 #ifdef _GLIBCXX_SIMD_WORKAROUND_XXX_1 2032 using _W 2033 = conditional_t<is_floating_point_v<typename _TVT::value_type>, double, 2034 conditional_t<(sizeof(_Tp) >= 2 * sizeof(long long)), 2035 long long, typename _TVT::value_type>>; 2036 constexpr int input_width = sizeof(_Tp) / sizeof(_W); 2037 const auto __a = __vector_bitcast<_W>(a_); 2038 const auto __b = __vector_bitcast<_W>(b_); 2039 using _Up = __vector_type_t<_W, sizeof(_R) / sizeof(_W)>; 2040 #else 2041 constexpr int input_width = _TVT::_S_full_size; 2042 const _Tp& __a = a_; 2043 const _Tp& __b = b_; 2044 using _Up = _R; 2045 #endif 2046 if constexpr (input_width == 2) 2047 return reinterpret_cast<_R>(_Up{__a[0], __a[1], __b[0], __b[1]}); 2048 else if constexpr (input_width == 4) 2049 return reinterpret_cast<_R>( 2050 _Up{__a[0], __a[1], __a[2], __a[3], __b[0], __b[1], __b[2], __b[3]}); 2051 else if constexpr (input_width == 8) 2052 return reinterpret_cast<_R>( 2053 _Up{__a[0], __a[1], __a[2], __a[3], __a[4], __a[5], __a[6], __a[7], 2054 __b[0], __b[1], __b[2], __b[3], __b[4], __b[5], __b[6], __b[7]}); 2055 else if constexpr (input_width == 16) 2056 return reinterpret_cast<_R>( 2057 _Up{__a[0], __a[1], __a[2], __a[3], __a[4], __a[5], __a[6], 2058 __a[7], __a[8], __a[9], __a[10], __a[11], __a[12], __a[13], 2059 __a[14], __a[15], __b[0], __b[1], __b[2], __b[3], __b[4], 2060 __b[5], __b[6], __b[7], __b[8], __b[9], __b[10], __b[11], 2061 __b[12], __b[13], __b[14], __b[15]}); 2062 else if constexpr (input_width == 32) 2063 return reinterpret_cast<_R>( 2064 _Up{__a[0], __a[1], __a[2], __a[3], __a[4], __a[5], __a[6], 2065 __a[7], __a[8], __a[9], __a[10], __a[11], __a[12], __a[13], 2066 __a[14], __a[15], __a[16], __a[17], __a[18], __a[19], __a[20], 2067 __a[21], __a[22], __a[23], __a[24], __a[25], __a[26], __a[27], 2068 __a[28], __a[29], __a[30], __a[31], __b[0], __b[1], __b[2], 2069 __b[3], __b[4], __b[5], __b[6], __b[7], __b[8], __b[9], 2070 __b[10], __b[11], __b[12], __b[13], __b[14], __b[15], __b[16], 2071 __b[17], __b[18], __b[19], __b[20], __b[21], __b[22], __b[23], 2072 __b[24], __b[25], __b[26], __b[27], __b[28], __b[29], __b[30], 2073 __b[31]}); 2074 } 2075 2076 // }}} 2077 // __zero_extend {{{ 2078 template <typename _Tp, typename _TVT = _VectorTraits<_Tp>> 2079 struct _ZeroExtendProxy 2080 { 2081 using value_type = typename _TVT::value_type; 2082 static constexpr size_t _Np = _TVT::_S_full_size; 2083 const _Tp __x; 2084 2085 template <typename _To, typename _ToVT = _VectorTraits<_To>, 2086 typename 2087 = enable_if_t<is_same_v<typename _ToVT::value_type, value_type>>> 2088 _GLIBCXX_SIMD_INTRINSIC operator _To() const 2089 { 2090 constexpr size_t _ToN = _ToVT::_S_full_size; 2091 if constexpr (_ToN == _Np) 2092 return __x; 2093 else if constexpr (_ToN == 2 * _Np) 2094 { 2095 #ifdef _GLIBCXX_SIMD_WORKAROUND_XXX_3 2096 if constexpr (__have_avx && _TVT::template _S_is<float, 4>) 2097 return __vector_bitcast<value_type>( 2098 _mm256_insertf128_ps(__m256(), __x, 0)); 2099 else if constexpr (__have_avx && _TVT::template _S_is<double, 2>) 2100 return __vector_bitcast<value_type>( 2101 _mm256_insertf128_pd(__m256d(), __x, 0)); 2102 else if constexpr (__have_avx2 && _Np * sizeof(value_type) == 16) 2103 return __vector_bitcast<value_type>( 2104 _mm256_insertf128_si256(__m256i(), __to_intrin(__x), 0)); 2105 else if constexpr (__have_avx512f && _TVT::template _S_is<float, 8>) 2106 { 2107 if constexpr (__have_avx512dq) 2108 return __vector_bitcast<value_type>( 2109 _mm512_insertf32x8(__m512(), __x, 0)); 2110 else 2111 return reinterpret_cast<__m512>( 2112 _mm512_insertf64x4(__m512d(), 2113 reinterpret_cast<__m256d>(__x), 0)); 2114 } 2115 else if constexpr (__have_avx512f 2116 && _TVT::template _S_is<double, 4>) 2117 return __vector_bitcast<value_type>( 2118 _mm512_insertf64x4(__m512d(), __x, 0)); 2119 else if constexpr (__have_avx512f && _Np * sizeof(value_type) == 32) 2120 return __vector_bitcast<value_type>( 2121 _mm512_inserti64x4(__m512i(), __to_intrin(__x), 0)); 2122 #endif 2123 return __concat(__x, _Tp()); 2124 } 2125 else if constexpr (_ToN == 4 * _Np) 2126 { 2127 #ifdef _GLIBCXX_SIMD_WORKAROUND_XXX_3 2128 if constexpr (__have_avx512dq && _TVT::template _S_is<double, 2>) 2129 { 2130 return __vector_bitcast<value_type>( 2131 _mm512_insertf64x2(__m512d(), __x, 0)); 2132 } 2133 else if constexpr (__have_avx512f 2134 && is_floating_point_v<value_type>) 2135 { 2136 return __vector_bitcast<value_type>( 2137 _mm512_insertf32x4(__m512(), reinterpret_cast<__m128>(__x), 2138 0)); 2139 } 2140 else if constexpr (__have_avx512f && _Np * sizeof(value_type) == 16) 2141 { 2142 return __vector_bitcast<value_type>( 2143 _mm512_inserti32x4(__m512i(), __to_intrin(__x), 0)); 2144 } 2145 #endif 2146 return __concat(__concat(__x, _Tp()), 2147 __vector_type_t<value_type, _Np * 2>()); 2148 } 2149 else if constexpr (_ToN == 8 * _Np) 2150 return __concat(operator __vector_type_t<value_type, _Np * 4>(), 2151 __vector_type_t<value_type, _Np * 4>()); 2152 else if constexpr (_ToN == 16 * _Np) 2153 return __concat(operator __vector_type_t<value_type, _Np * 8>(), 2154 __vector_type_t<value_type, _Np * 8>()); 2155 else 2156 __assert_unreachable<_Tp>(); 2157 } 2158 }; 2159 2160 template <typename _Tp, typename _TVT = _VectorTraits<_Tp>> 2161 _GLIBCXX_SIMD_INTRINSIC _ZeroExtendProxy<_Tp, _TVT> 2162 __zero_extend(_Tp __x) 2163 { return {__x}; } 2164 2165 // }}} 2166 // __extract<_Np, By>{{{ 2167 template <int _Offset, 2168 int _SplitBy, 2169 typename _Tp, 2170 typename _TVT = _VectorTraits<_Tp>, 2171 typename _R = __vector_type_t<typename _TVT::value_type, 2172 _TVT::_S_full_size / _SplitBy>> 2173 _GLIBCXX_SIMD_INTRINSIC constexpr _R 2174 __extract(_Tp __in) 2175 { 2176 using value_type = typename _TVT::value_type; 2177 #if _GLIBCXX_SIMD_X86INTRIN // {{{ 2178 if constexpr (sizeof(_Tp) == 64 && _SplitBy == 4 && _Offset > 0) 2179 { 2180 if constexpr (__have_avx512dq && is_same_v<double, value_type>) 2181 return _mm512_extractf64x2_pd(__to_intrin(__in), _Offset); 2182 else if constexpr (is_floating_point_v<value_type>) 2183 return __vector_bitcast<value_type>( 2184 _mm512_extractf32x4_ps(__intrin_bitcast<__m512>(__in), _Offset)); 2185 else 2186 return reinterpret_cast<_R>( 2187 _mm512_extracti32x4_epi32(__intrin_bitcast<__m512i>(__in), 2188 _Offset)); 2189 } 2190 else 2191 #endif // _GLIBCXX_SIMD_X86INTRIN }}} 2192 { 2193 #ifdef _GLIBCXX_SIMD_WORKAROUND_XXX_1 2194 using _W = conditional_t< 2195 is_floating_point_v<value_type>, double, 2196 conditional_t<(sizeof(_R) >= 16), long long, value_type>>; 2197 static_assert(sizeof(_R) % sizeof(_W) == 0); 2198 constexpr int __return_width = sizeof(_R) / sizeof(_W); 2199 using _Up = __vector_type_t<_W, __return_width>; 2200 const auto __x = __vector_bitcast<_W>(__in); 2201 #else 2202 constexpr int __return_width = _TVT::_S_full_size / _SplitBy; 2203 using _Up = _R; 2204 const __vector_type_t<value_type, _TVT::_S_full_size>& __x 2205 = __in; // only needed for _Tp = _SimdWrapper<value_type, _Np> 2206 #endif 2207 constexpr int _O = _Offset * __return_width; 2208 return __call_with_subscripts<__return_width, _O>( 2209 __x, [](auto... __entries) { 2210 return reinterpret_cast<_R>(_Up{__entries...}); 2211 }); 2212 } 2213 } 2214 2215 // }}} 2216 // __lo/__hi64[z]{{{ 2217 template <typename _Tp, 2218 typename _R 2219 = __vector_type8_t<typename _VectorTraits<_Tp>::value_type>> 2220 _GLIBCXX_SIMD_INTRINSIC constexpr _R 2221 __lo64(_Tp __x) 2222 { 2223 _R __r{}; 2224 __builtin_memcpy(&__r, &__x, 8); 2225 return __r; 2226 } 2227 2228 template <typename _Tp, 2229 typename _R 2230 = __vector_type8_t<typename _VectorTraits<_Tp>::value_type>> 2231 _GLIBCXX_SIMD_INTRINSIC constexpr _R 2232 __hi64(_Tp __x) 2233 { 2234 static_assert(sizeof(_Tp) == 16, "use __hi64z if you meant it"); 2235 _R __r{}; 2236 __builtin_memcpy(&__r, reinterpret_cast<const char*>(&__x) + 8, 8); 2237 return __r; 2238 } 2239 2240 template <typename _Tp, 2241 typename _R 2242 = __vector_type8_t<typename _VectorTraits<_Tp>::value_type>> 2243 _GLIBCXX_SIMD_INTRINSIC constexpr _R 2244 __hi64z([[maybe_unused]] _Tp __x) 2245 { 2246 _R __r{}; 2247 if constexpr (sizeof(_Tp) == 16) 2248 __builtin_memcpy(&__r, reinterpret_cast<const char*>(&__x) + 8, 8); 2249 return __r; 2250 } 2251 2252 // }}} 2253 // __lo/__hi128{{{ 2254 template <typename _Tp> 2255 _GLIBCXX_SIMD_INTRINSIC constexpr auto 2256 __lo128(_Tp __x) 2257 { return __extract<0, sizeof(_Tp) / 16>(__x); } 2258 2259 template <typename _Tp> 2260 _GLIBCXX_SIMD_INTRINSIC constexpr auto 2261 __hi128(_Tp __x) 2262 { 2263 static_assert(sizeof(__x) == 32); 2264 return __extract<1, 2>(__x); 2265 } 2266 2267 // }}} 2268 // __lo/__hi256{{{ 2269 template <typename _Tp> 2270 _GLIBCXX_SIMD_INTRINSIC constexpr auto 2271 __lo256(_Tp __x) 2272 { 2273 static_assert(sizeof(__x) == 64); 2274 return __extract<0, 2>(__x); 2275 } 2276 2277 template <typename _Tp> 2278 _GLIBCXX_SIMD_INTRINSIC constexpr auto 2279 __hi256(_Tp __x) 2280 { 2281 static_assert(sizeof(__x) == 64); 2282 return __extract<1, 2>(__x); 2283 } 2284 2285 // }}} 2286 // __auto_bitcast{{{ 2287 template <typename _Tp> 2288 struct _AutoCast 2289 { 2290 static_assert(__is_vector_type_v<_Tp>); 2291 2292 const _Tp __x; 2293 2294 template <typename _Up, typename _UVT = _VectorTraits<_Up>> 2295 _GLIBCXX_SIMD_INTRINSIC constexpr operator _Up() const 2296 { return __intrin_bitcast<typename _UVT::type>(__x); } 2297 }; 2298 2299 template <typename _Tp> 2300 _GLIBCXX_SIMD_INTRINSIC constexpr _AutoCast<_Tp> 2301 __auto_bitcast(const _Tp& __x) 2302 { return {__x}; } 2303 2304 template <typename _Tp, size_t _Np> 2305 _GLIBCXX_SIMD_INTRINSIC constexpr 2306 _AutoCast<typename _SimdWrapper<_Tp, _Np>::_BuiltinType> 2307 __auto_bitcast(const _SimdWrapper<_Tp, _Np>& __x) 2308 { return {__x._M_data}; } 2309 2310 // }}} 2311 // ^^^ ---- builtin vector types [[gnu::vector_size(N)]] and operations ---- ^^^ 2312 2313 #if _GLIBCXX_SIMD_HAVE_SSE_ABI 2314 // __bool_storage_member_type{{{ 2315 #if _GLIBCXX_SIMD_HAVE_AVX512F && _GLIBCXX_SIMD_X86INTRIN 2316 template <size_t _Size> 2317 struct __bool_storage_member_type 2318 { 2319 static_assert((_Size & (_Size - 1)) != 0, 2320 "This trait may only be used for non-power-of-2 sizes. " 2321 "Power-of-2 sizes must be specialized."); 2322 using type = 2323 typename __bool_storage_member_type<std::__bit_ceil(_Size)>::type; 2324 }; 2325 2326 template <> 2327 struct __bool_storage_member_type<1> { using type = bool; }; 2328 2329 template <> 2330 struct __bool_storage_member_type<2> { using type = __mmask8; }; 2331 2332 template <> 2333 struct __bool_storage_member_type<4> { using type = __mmask8; }; 2334 2335 template <> 2336 struct __bool_storage_member_type<8> { using type = __mmask8; }; 2337 2338 template <> 2339 struct __bool_storage_member_type<16> { using type = __mmask16; }; 2340 2341 template <> 2342 struct __bool_storage_member_type<32> { using type = __mmask32; }; 2343 2344 template <> 2345 struct __bool_storage_member_type<64> { using type = __mmask64; }; 2346 #endif // _GLIBCXX_SIMD_HAVE_AVX512F 2347 2348 // }}} 2349 // __intrinsic_type (x86){{{ 2350 // the following excludes bool via __is_vectorizable 2351 #if _GLIBCXX_SIMD_HAVE_SSE 2352 template <typename _Tp, size_t _Bytes> 2353 struct __intrinsic_type<_Tp, _Bytes, 2354 enable_if_t<__is_vectorizable_v<_Tp> && _Bytes <= 64>> 2355 { 2356 static_assert(!is_same_v<_Tp, long double>, 2357 "no __intrinsic_type support for long double on x86"); 2358 2359 static constexpr size_t _S_VBytes = _Bytes <= 16 ? 16 2360 : _Bytes <= 32 ? 32 2361 : 64; 2362 2363 using type [[__gnu__::__vector_size__(_S_VBytes)]] 2364 = conditional_t<is_integral_v<_Tp>, long long int, _Tp>; 2365 }; 2366 #endif // _GLIBCXX_SIMD_HAVE_SSE 2367 2368 // }}} 2369 #endif // _GLIBCXX_SIMD_HAVE_SSE_ABI 2370 // __intrinsic_type (ARM){{{ 2371 #if _GLIBCXX_SIMD_HAVE_NEON 2372 template <> 2373 struct __intrinsic_type<float, 8, void> 2374 { using type = float32x2_t; }; 2375 2376 template <> 2377 struct __intrinsic_type<float, 16, void> 2378 { using type = float32x4_t; }; 2379 2380 #if _GLIBCXX_SIMD_HAVE_NEON_A64 2381 template <> 2382 struct __intrinsic_type<double, 8, void> 2383 { using type = float64x1_t; }; 2384 2385 template <> 2386 struct __intrinsic_type<double, 16, void> 2387 { using type = float64x2_t; }; 2388 #endif 2389 2390 #define _GLIBCXX_SIMD_ARM_INTRIN(_Bits, _Np) \ 2391 template <> \ 2392 struct __intrinsic_type<__int_with_sizeof_t<_Bits / 8>, \ 2393 _Np * _Bits / 8, void> \ 2394 { using type = int##_Bits##x##_Np##_t; }; \ 2395 template <> \ 2396 struct __intrinsic_type<make_unsigned_t<__int_with_sizeof_t<_Bits / 8>>, \ 2397 _Np * _Bits / 8, void> \ 2398 { using type = uint##_Bits##x##_Np##_t; } 2399 _GLIBCXX_SIMD_ARM_INTRIN(8, 8); 2400 _GLIBCXX_SIMD_ARM_INTRIN(8, 16); 2401 _GLIBCXX_SIMD_ARM_INTRIN(16, 4); 2402 _GLIBCXX_SIMD_ARM_INTRIN(16, 8); 2403 _GLIBCXX_SIMD_ARM_INTRIN(32, 2); 2404 _GLIBCXX_SIMD_ARM_INTRIN(32, 4); 2405 _GLIBCXX_SIMD_ARM_INTRIN(64, 1); 2406 _GLIBCXX_SIMD_ARM_INTRIN(64, 2); 2407 #undef _GLIBCXX_SIMD_ARM_INTRIN 2408 2409 template <typename _Tp, size_t _Bytes> 2410 struct __intrinsic_type<_Tp, _Bytes, 2411 enable_if_t<__is_vectorizable_v<_Tp> && _Bytes <= 16>> 2412 { 2413 static constexpr int _SVecBytes = _Bytes <= 8 ? 8 : 16; 2414 using _Ip = __int_for_sizeof_t<_Tp>; 2415 using _Up = conditional_t< 2416 is_floating_point_v<_Tp>, _Tp, 2417 conditional_t<is_unsigned_v<_Tp>, make_unsigned_t<_Ip>, _Ip>>; 2418 static_assert(!is_same_v<_Tp, _Up> || _SVecBytes != _Bytes, 2419 "should use explicit specialization above"); 2420 using type = typename __intrinsic_type<_Up, _SVecBytes>::type; 2421 }; 2422 #endif // _GLIBCXX_SIMD_HAVE_NEON 2423 2424 // }}} 2425 // __intrinsic_type (PPC){{{ 2426 #ifdef __ALTIVEC__ 2427 template <typename _Tp> 2428 struct __intrinsic_type_impl; 2429 2430 #define _GLIBCXX_SIMD_PPC_INTRIN(_Tp) \ 2431 template <> \ 2432 struct __intrinsic_type_impl<_Tp> { using type = __vector _Tp; } 2433 _GLIBCXX_SIMD_PPC_INTRIN(float); 2434 #ifdef __VSX__ 2435 _GLIBCXX_SIMD_PPC_INTRIN(double); 2436 #endif 2437 _GLIBCXX_SIMD_PPC_INTRIN(signed char); 2438 _GLIBCXX_SIMD_PPC_INTRIN(unsigned char); 2439 _GLIBCXX_SIMD_PPC_INTRIN(signed short); 2440 _GLIBCXX_SIMD_PPC_INTRIN(unsigned short); 2441 _GLIBCXX_SIMD_PPC_INTRIN(signed int); 2442 _GLIBCXX_SIMD_PPC_INTRIN(unsigned int); 2443 #if defined __VSX__ || __SIZEOF_LONG__ == 4 2444 _GLIBCXX_SIMD_PPC_INTRIN(signed long); 2445 _GLIBCXX_SIMD_PPC_INTRIN(unsigned long); 2446 #endif 2447 #ifdef __VSX__ 2448 _GLIBCXX_SIMD_PPC_INTRIN(signed long long); 2449 _GLIBCXX_SIMD_PPC_INTRIN(unsigned long long); 2450 #endif 2451 #undef _GLIBCXX_SIMD_PPC_INTRIN 2452 2453 template <typename _Tp, size_t _Bytes> 2454 struct __intrinsic_type<_Tp, _Bytes, 2455 enable_if_t<__is_vectorizable_v<_Tp> && _Bytes <= 16>> 2456 { 2457 static constexpr bool _S_is_ldouble = is_same_v<_Tp, long double>; 2458 // allow _Tp == long double with -mlong-double-64 2459 static_assert(!(_S_is_ldouble && sizeof(long double) > sizeof(double)), 2460 "no __intrinsic_type support for 128-bit floating point on PowerPC"); 2461 #ifndef __VSX__ 2462 static_assert(!(is_same_v<_Tp, double> 2463 || (_S_is_ldouble && sizeof(long double) == sizeof(double))), 2464 "no __intrinsic_type support for 64-bit floating point on PowerPC w/o VSX"); 2465 #endif 2466 using type = 2467 typename __intrinsic_type_impl< 2468 conditional_t<is_floating_point_v<_Tp>, 2469 conditional_t<_S_is_ldouble, double, _Tp>, 2470 __int_for_sizeof_t<_Tp>>>::type; 2471 }; 2472 #endif // __ALTIVEC__ 2473 2474 // }}} 2475 // _SimdWrapper<bool>{{{1 2476 template <size_t _Width> 2477 struct _SimdWrapper<bool, _Width, 2478 void_t<typename __bool_storage_member_type<_Width>::type>> 2479 { 2480 using _BuiltinType = typename __bool_storage_member_type<_Width>::type; 2481 using value_type = bool; 2482 2483 static constexpr size_t _S_full_size = sizeof(_BuiltinType) * __CHAR_BIT__; 2484 2485 _GLIBCXX_SIMD_INTRINSIC constexpr _SimdWrapper<bool, _S_full_size> 2486 __as_full_vector() const { return _M_data; } 2487 2488 _GLIBCXX_SIMD_INTRINSIC constexpr _SimdWrapper() = default; 2489 _GLIBCXX_SIMD_INTRINSIC constexpr _SimdWrapper(_BuiltinType __k) 2490 : _M_data(__k) {}; 2491 2492 _GLIBCXX_SIMD_INTRINSIC operator const _BuiltinType&() const 2493 { return _M_data; } 2494 2495 _GLIBCXX_SIMD_INTRINSIC operator _BuiltinType&() 2496 { return _M_data; } 2497 2498 _GLIBCXX_SIMD_INTRINSIC _BuiltinType __intrin() const 2499 { return _M_data; } 2500 2501 _GLIBCXX_SIMD_INTRINSIC constexpr value_type operator[](size_t __i) const 2502 { return _M_data & (_BuiltinType(1) << __i); } 2503 2504 template <size_t __i> 2505 _GLIBCXX_SIMD_INTRINSIC constexpr value_type 2506 operator[](_SizeConstant<__i>) const 2507 { return _M_data & (_BuiltinType(1) << __i); } 2508 2509 _GLIBCXX_SIMD_INTRINSIC constexpr void _M_set(size_t __i, value_type __x) 2510 { 2511 if (__x) 2512 _M_data |= (_BuiltinType(1) << __i); 2513 else 2514 _M_data &= ~(_BuiltinType(1) << __i); 2515 } 2516 2517 _GLIBCXX_SIMD_INTRINSIC 2518 constexpr bool _M_is_constprop() const 2519 { return __builtin_constant_p(_M_data); } 2520 2521 _GLIBCXX_SIMD_INTRINSIC constexpr bool _M_is_constprop_none_of() const 2522 { 2523 if (__builtin_constant_p(_M_data)) 2524 { 2525 constexpr int __nbits = sizeof(_BuiltinType) * __CHAR_BIT__; 2526 constexpr _BuiltinType __active_mask 2527 = ~_BuiltinType() >> (__nbits - _Width); 2528 return (_M_data & __active_mask) == 0; 2529 } 2530 return false; 2531 } 2532 2533 _GLIBCXX_SIMD_INTRINSIC constexpr bool _M_is_constprop_all_of() const 2534 { 2535 if (__builtin_constant_p(_M_data)) 2536 { 2537 constexpr int __nbits = sizeof(_BuiltinType) * __CHAR_BIT__; 2538 constexpr _BuiltinType __active_mask 2539 = ~_BuiltinType() >> (__nbits - _Width); 2540 return (_M_data & __active_mask) == __active_mask; 2541 } 2542 return false; 2543 } 2544 2545 _BuiltinType _M_data; 2546 }; 2547 2548 // _SimdWrapperBase{{{1 2549 template <bool _MustZeroInitPadding, typename _BuiltinType> 2550 struct _SimdWrapperBase; 2551 2552 template <typename _BuiltinType> 2553 struct _SimdWrapperBase<false, _BuiltinType> // no padding or no SNaNs 2554 { 2555 _GLIBCXX_SIMD_INTRINSIC constexpr _SimdWrapperBase() = default; 2556 _GLIBCXX_SIMD_INTRINSIC constexpr _SimdWrapperBase(_BuiltinType __init) 2557 : _M_data(__init) 2558 {} 2559 2560 _BuiltinType _M_data; 2561 }; 2562 2563 template <typename _BuiltinType> 2564 struct _SimdWrapperBase<true, _BuiltinType> // with padding that needs to 2565 // never become SNaN 2566 { 2567 _GLIBCXX_SIMD_INTRINSIC constexpr _SimdWrapperBase() : _M_data() {} 2568 _GLIBCXX_SIMD_INTRINSIC constexpr _SimdWrapperBase(_BuiltinType __init) 2569 : _M_data(__init) 2570 {} 2571 2572 _BuiltinType _M_data; 2573 }; 2574 2575 // }}} 2576 // _SimdWrapper{{{ 2577 template <typename _Tp, size_t _Width> 2578 struct _SimdWrapper< 2579 _Tp, _Width, 2580 void_t<__vector_type_t<_Tp, _Width>, __intrinsic_type_t<_Tp, _Width>>> 2581 : _SimdWrapperBase<__has_iec559_behavior<__signaling_NaN, _Tp>::value 2582 && sizeof(_Tp) * _Width 2583 == sizeof(__vector_type_t<_Tp, _Width>), 2584 __vector_type_t<_Tp, _Width>> 2585 { 2586 using _Base 2587 = _SimdWrapperBase<__has_iec559_behavior<__signaling_NaN, _Tp>::value 2588 && sizeof(_Tp) * _Width 2589 == sizeof(__vector_type_t<_Tp, _Width>), 2590 __vector_type_t<_Tp, _Width>>; 2591 2592 static_assert(__is_vectorizable_v<_Tp>); 2593 static_assert(_Width >= 2); // 1 doesn't make sense, use _Tp directly then 2594 2595 using _BuiltinType = __vector_type_t<_Tp, _Width>; 2596 using value_type = _Tp; 2597 2598 static inline constexpr size_t _S_full_size 2599 = sizeof(_BuiltinType) / sizeof(value_type); 2600 static inline constexpr int _S_size = _Width; 2601 static inline constexpr bool _S_is_partial = _S_full_size != _S_size; 2602 2603 using _Base::_M_data; 2604 2605 _GLIBCXX_SIMD_INTRINSIC constexpr _SimdWrapper<_Tp, _S_full_size> 2606 __as_full_vector() const 2607 { return _M_data; } 2608 2609 _GLIBCXX_SIMD_INTRINSIC constexpr _SimdWrapper(initializer_list<_Tp> __init) 2610 : _Base(__generate_from_n_evaluations<_Width, _BuiltinType>( 2611 [&](auto __i) { return __init.begin()[__i.value]; })) {} 2612 2613 _GLIBCXX_SIMD_INTRINSIC constexpr _SimdWrapper() = default; 2614 _GLIBCXX_SIMD_INTRINSIC constexpr _SimdWrapper(const _SimdWrapper&) 2615 = default; 2616 _GLIBCXX_SIMD_INTRINSIC constexpr _SimdWrapper(_SimdWrapper&&) = default; 2617 2618 _GLIBCXX_SIMD_INTRINSIC constexpr _SimdWrapper& 2619 operator=(const _SimdWrapper&) = default; 2620 _GLIBCXX_SIMD_INTRINSIC constexpr _SimdWrapper& 2621 operator=(_SimdWrapper&&) = default; 2622 2623 template <typename _V, typename = enable_if_t<disjunction_v< 2624 is_same<_V, __vector_type_t<_Tp, _Width>>, 2625 is_same<_V, __intrinsic_type_t<_Tp, _Width>>>>> 2626 _GLIBCXX_SIMD_INTRINSIC constexpr _SimdWrapper(_V __x) 2627 // __vector_bitcast can convert e.g. __m128 to __vector(2) float 2628 : _Base(__vector_bitcast<_Tp, _Width>(__x)) {} 2629 2630 template <typename... _As, 2631 typename = enable_if_t<((is_same_v<simd_abi::scalar, _As> && ...) 2632 && sizeof...(_As) <= _Width)>> 2633 _GLIBCXX_SIMD_INTRINSIC constexpr 2634 operator _SimdTuple<_Tp, _As...>() const 2635 { 2636 const auto& dd = _M_data; // workaround for GCC7 ICE 2637 return __generate_from_n_evaluations<sizeof...(_As), 2638 _SimdTuple<_Tp, _As...>>([&]( 2639 auto __i) constexpr { return dd[int(__i)]; }); 2640 } 2641 2642 _GLIBCXX_SIMD_INTRINSIC constexpr operator const _BuiltinType&() const 2643 { return _M_data; } 2644 2645 _GLIBCXX_SIMD_INTRINSIC constexpr operator _BuiltinType&() 2646 { return _M_data; } 2647 2648 _GLIBCXX_SIMD_INTRINSIC constexpr _Tp operator[](size_t __i) const 2649 { return _M_data[__i]; } 2650 2651 template <size_t __i> 2652 _GLIBCXX_SIMD_INTRINSIC constexpr _Tp operator[](_SizeConstant<__i>) const 2653 { return _M_data[__i]; } 2654 2655 _GLIBCXX_SIMD_INTRINSIC constexpr void _M_set(size_t __i, _Tp __x) 2656 { _M_data[__i] = __x; } 2657 2658 _GLIBCXX_SIMD_INTRINSIC 2659 constexpr bool _M_is_constprop() const 2660 { return __builtin_constant_p(_M_data); } 2661 2662 _GLIBCXX_SIMD_INTRINSIC constexpr bool _M_is_constprop_none_of() const 2663 { 2664 if (__builtin_constant_p(_M_data)) 2665 { 2666 bool __r = true; 2667 if constexpr (is_floating_point_v<_Tp>) 2668 { 2669 using _Ip = __int_for_sizeof_t<_Tp>; 2670 const auto __intdata = __vector_bitcast<_Ip>(_M_data); 2671 __execute_n_times<_Width>( 2672 [&](auto __i) { __r &= __intdata[__i.value] == _Ip(); }); 2673 } 2674 else 2675 __execute_n_times<_Width>( 2676 [&](auto __i) { __r &= _M_data[__i.value] == _Tp(); }); 2677 if (__builtin_constant_p(__r)) 2678 return __r; 2679 } 2680 return false; 2681 } 2682 2683 _GLIBCXX_SIMD_INTRINSIC constexpr bool _M_is_constprop_all_of() const 2684 { 2685 if (__builtin_constant_p(_M_data)) 2686 { 2687 bool __r = true; 2688 if constexpr (is_floating_point_v<_Tp>) 2689 { 2690 using _Ip = __int_for_sizeof_t<_Tp>; 2691 const auto __intdata = __vector_bitcast<_Ip>(_M_data); 2692 __execute_n_times<_Width>( 2693 [&](auto __i) { __r &= __intdata[__i.value] == ~_Ip(); }); 2694 } 2695 else 2696 __execute_n_times<_Width>( 2697 [&](auto __i) { __r &= _M_data[__i.value] == ~_Tp(); }); 2698 if (__builtin_constant_p(__r)) 2699 return __r; 2700 } 2701 return false; 2702 } 2703 }; 2704 2705 // }}} 2706 2707 // __vectorized_sizeof {{{ 2708 template <typename _Tp> 2709 constexpr size_t 2710 __vectorized_sizeof() 2711 { 2712 if constexpr (!__is_vectorizable_v<_Tp>) 2713 return 0; 2714 2715 if constexpr (sizeof(_Tp) <= 8) 2716 { 2717 // X86: 2718 if constexpr (__have_avx512bw) 2719 return 64; 2720 if constexpr (__have_avx512f && sizeof(_Tp) >= 4) 2721 return 64; 2722 if constexpr (__have_avx2) 2723 return 32; 2724 if constexpr (__have_avx && is_floating_point_v<_Tp>) 2725 return 32; 2726 if constexpr (__have_sse2) 2727 return 16; 2728 if constexpr (__have_sse && is_same_v<_Tp, float>) 2729 return 16; 2730 /* The following is too much trouble because of mixed MMX and x87 code. 2731 * While nothing here explicitly calls MMX instructions of registers, 2732 * they are still emitted but no EMMS cleanup is done. 2733 if constexpr (__have_mmx && sizeof(_Tp) <= 4 && is_integral_v<_Tp>) 2734 return 8; 2735 */ 2736 2737 // PowerPC: 2738 if constexpr (__have_power8vec 2739 || (__have_power_vmx && (sizeof(_Tp) < 8)) 2740 || (__have_power_vsx && is_floating_point_v<_Tp>) ) 2741 return 16; 2742 2743 // ARM: 2744 if constexpr (__have_neon_a64 2745 || (__have_neon_a32 && !is_same_v<_Tp, double>) ) 2746 return 16; 2747 if constexpr (__have_neon 2748 && sizeof(_Tp) < 8 2749 // Only allow fp if the user allows non-ICE559 fp (e.g. 2750 // via -ffast-math). ARMv7 NEON fp is not conforming to 2751 // IEC559. 2752 && (__support_neon_float || !is_floating_point_v<_Tp>)) 2753 return 16; 2754 } 2755 2756 return sizeof(_Tp); 2757 } 2758 2759 // }}} 2760 namespace simd_abi { 2761 // most of simd_abi is defined in simd_detail.h 2762 template <typename _Tp> 2763 inline constexpr int max_fixed_size 2764 = (__have_avx512bw && sizeof(_Tp) == 1) ? 64 : 32; 2765 2766 // compatible {{{ 2767 #if defined __x86_64__ || defined __aarch64__ 2768 template <typename _Tp> 2769 using compatible = conditional_t<(sizeof(_Tp) <= 8), _VecBuiltin<16>, scalar>; 2770 #elif defined __ARM_NEON 2771 // FIXME: not sure, probably needs to be scalar (or dependent on the hard-float 2772 // ABI?) 2773 template <typename _Tp> 2774 using compatible 2775 = conditional_t<(sizeof(_Tp) < 8 2776 && (__support_neon_float || !is_floating_point_v<_Tp>)), 2777 _VecBuiltin<16>, scalar>; 2778 #else 2779 template <typename> 2780 using compatible = scalar; 2781 #endif 2782 2783 // }}} 2784 // native {{{ 2785 template <typename _Tp> 2786 constexpr auto 2787 __determine_native_abi() 2788 { 2789 constexpr size_t __bytes = __vectorized_sizeof<_Tp>(); 2790 if constexpr (__bytes == sizeof(_Tp)) 2791 return static_cast<scalar*>(nullptr); 2792 else if constexpr (__have_avx512vl || (__have_avx512f && __bytes == 64)) 2793 return static_cast<_VecBltnBtmsk<__bytes>*>(nullptr); 2794 else 2795 return static_cast<_VecBuiltin<__bytes>*>(nullptr); 2796 } 2797 2798 template <typename _Tp, typename = enable_if_t<__is_vectorizable_v<_Tp>>> 2799 using native = remove_pointer_t<decltype(__determine_native_abi<_Tp>())>; 2800 2801 // }}} 2802 // __default_abi {{{ 2803 #if defined _GLIBCXX_SIMD_DEFAULT_ABI 2804 template <typename _Tp> 2805 using __default_abi = _GLIBCXX_SIMD_DEFAULT_ABI<_Tp>; 2806 #else 2807 template <typename _Tp> 2808 using __default_abi = compatible<_Tp>; 2809 #endif 2810 2811 // }}} 2812 } // namespace simd_abi 2813 2814 // traits {{{1 2815 // is_abi_tag {{{2 2816 template <typename _Tp, typename = void_t<>> 2817 struct is_abi_tag : false_type {}; 2818 2819 template <typename _Tp> 2820 struct is_abi_tag<_Tp, void_t<typename _Tp::_IsValidAbiTag>> 2821 : public _Tp::_IsValidAbiTag {}; 2822 2823 template <typename _Tp> 2824 inline constexpr bool is_abi_tag_v = is_abi_tag<_Tp>::value; 2825 2826 // is_simd(_mask) {{{2 2827 template <typename _Tp> 2828 struct is_simd : public false_type {}; 2829 2830 template <typename _Tp> 2831 inline constexpr bool is_simd_v = is_simd<_Tp>::value; 2832 2833 template <typename _Tp> 2834 struct is_simd_mask : public false_type {}; 2835 2836 template <typename _Tp> 2837 inline constexpr bool is_simd_mask_v = is_simd_mask<_Tp>::value; 2838 2839 // simd_size {{{2 2840 template <typename _Tp, typename _Abi, typename = void> 2841 struct __simd_size_impl {}; 2842 2843 template <typename _Tp, typename _Abi> 2844 struct __simd_size_impl< 2845 _Tp, _Abi, 2846 enable_if_t<conjunction_v<__is_vectorizable<_Tp>, is_abi_tag<_Abi>>>> 2847 : _SizeConstant<_Abi::template _S_size<_Tp>> {}; 2848 2849 template <typename _Tp, typename _Abi = simd_abi::__default_abi<_Tp>> 2850 struct simd_size : __simd_size_impl<_Tp, _Abi> {}; 2851 2852 template <typename _Tp, typename _Abi = simd_abi::__default_abi<_Tp>> 2853 inline constexpr size_t simd_size_v = simd_size<_Tp, _Abi>::value; 2854 2855 // simd_abi::deduce {{{2 2856 template <typename _Tp, size_t _Np, typename = void> 2857 struct __deduce_impl; 2858 2859 namespace simd_abi { 2860 /** 2861 * @tparam _Tp The requested `value_type` for the elements. 2862 * @tparam _Np The requested number of elements. 2863 * @tparam _Abis This parameter is ignored, since this implementation cannot 2864 * make any use of it. Either __a good native ABI is matched and used as `type` 2865 * alias, or the `fixed_size<_Np>` ABI is used, which internally is built from 2866 * the best matching native ABIs. 2867 */ 2868 template <typename _Tp, size_t _Np, typename...> 2869 struct deduce : __deduce_impl<_Tp, _Np> {}; 2870 2871 template <typename _Tp, size_t _Np, typename... _Abis> 2872 using deduce_t = typename deduce<_Tp, _Np, _Abis...>::type; 2873 } // namespace simd_abi 2874 2875 // }}}2 2876 // rebind_simd {{{2 2877 template <typename _Tp, typename _V, typename = void> 2878 struct rebind_simd; 2879 2880 template <typename _Tp, typename _Up, typename _Abi> 2881 struct rebind_simd< 2882 _Tp, simd<_Up, _Abi>, 2883 void_t<simd_abi::deduce_t<_Tp, simd_size_v<_Up, _Abi>, _Abi>>> 2884 { 2885 using type 2886 = simd<_Tp, simd_abi::deduce_t<_Tp, simd_size_v<_Up, _Abi>, _Abi>>; 2887 }; 2888 2889 template <typename _Tp, typename _Up, typename _Abi> 2890 struct rebind_simd< 2891 _Tp, simd_mask<_Up, _Abi>, 2892 void_t<simd_abi::deduce_t<_Tp, simd_size_v<_Up, _Abi>, _Abi>>> 2893 { 2894 using type 2895 = simd_mask<_Tp, simd_abi::deduce_t<_Tp, simd_size_v<_Up, _Abi>, _Abi>>; 2896 }; 2897 2898 template <typename _Tp, typename _V> 2899 using rebind_simd_t = typename rebind_simd<_Tp, _V>::type; 2900 2901 // resize_simd {{{2 2902 template <int _Np, typename _V, typename = void> 2903 struct resize_simd; 2904 2905 template <int _Np, typename _Tp, typename _Abi> 2906 struct resize_simd<_Np, simd<_Tp, _Abi>, 2907 void_t<simd_abi::deduce_t<_Tp, _Np, _Abi>>> 2908 { using type = simd<_Tp, simd_abi::deduce_t<_Tp, _Np, _Abi>>; }; 2909 2910 template <int _Np, typename _Tp, typename _Abi> 2911 struct resize_simd<_Np, simd_mask<_Tp, _Abi>, 2912 void_t<simd_abi::deduce_t<_Tp, _Np, _Abi>>> 2913 { using type = simd_mask<_Tp, simd_abi::deduce_t<_Tp, _Np, _Abi>>; }; 2914 2915 template <int _Np, typename _V> 2916 using resize_simd_t = typename resize_simd<_Np, _V>::type; 2917 2918 // }}}2 2919 // memory_alignment {{{2 2920 template <typename _Tp, typename _Up = typename _Tp::value_type> 2921 struct memory_alignment 2922 : public _SizeConstant<vector_aligned_tag::_S_alignment<_Tp, _Up>> {}; 2923 2924 template <typename _Tp, typename _Up = typename _Tp::value_type> 2925 inline constexpr size_t memory_alignment_v = memory_alignment<_Tp, _Up>::value; 2926 2927 // class template simd [simd] {{{1 2928 template <typename _Tp, typename _Abi = simd_abi::__default_abi<_Tp>> 2929 class simd; 2930 2931 template <typename _Tp, typename _Abi> 2932 struct is_simd<simd<_Tp, _Abi>> : public true_type {}; 2933 2934 template <typename _Tp> 2935 using native_simd = simd<_Tp, simd_abi::native<_Tp>>; 2936 2937 template <typename _Tp, int _Np> 2938 using fixed_size_simd = simd<_Tp, simd_abi::fixed_size<_Np>>; 2939 2940 template <typename _Tp, size_t _Np> 2941 using __deduced_simd = simd<_Tp, simd_abi::deduce_t<_Tp, _Np>>; 2942 2943 // class template simd_mask [simd_mask] {{{1 2944 template <typename _Tp, typename _Abi = simd_abi::__default_abi<_Tp>> 2945 class simd_mask; 2946 2947 template <typename _Tp, typename _Abi> 2948 struct is_simd_mask<simd_mask<_Tp, _Abi>> : public true_type {}; 2949 2950 template <typename _Tp> 2951 using native_simd_mask = simd_mask<_Tp, simd_abi::native<_Tp>>; 2952 2953 template <typename _Tp, int _Np> 2954 using fixed_size_simd_mask = simd_mask<_Tp, simd_abi::fixed_size<_Np>>; 2955 2956 template <typename _Tp, size_t _Np> 2957 using __deduced_simd_mask = simd_mask<_Tp, simd_abi::deduce_t<_Tp, _Np>>; 2958 2959 // casts [simd.casts] {{{1 2960 // static_simd_cast {{{2 2961 template <typename _Tp, typename _Up, typename _Ap, bool = is_simd_v<_Tp>, 2962 typename = void> 2963 struct __static_simd_cast_return_type; 2964 2965 template <typename _Tp, typename _A0, typename _Up, typename _Ap> 2966 struct __static_simd_cast_return_type<simd_mask<_Tp, _A0>, _Up, _Ap, false, 2967 void> 2968 : __static_simd_cast_return_type<simd<_Tp, _A0>, _Up, _Ap> {}; 2969 2970 template <typename _Tp, typename _Up, typename _Ap> 2971 struct __static_simd_cast_return_type< 2972 _Tp, _Up, _Ap, true, enable_if_t<_Tp::size() == simd_size_v<_Up, _Ap>>> 2973 { using type = _Tp; }; 2974 2975 template <typename _Tp, typename _Ap> 2976 struct __static_simd_cast_return_type<_Tp, _Tp, _Ap, false, 2977 #ifdef _GLIBCXX_SIMD_FIX_P2TS_ISSUE66 2978 enable_if_t<__is_vectorizable_v<_Tp>> 2979 #else 2980 void 2981 #endif 2982 > 2983 { using type = simd<_Tp, _Ap>; }; 2984 2985 template <typename _Tp, typename = void> 2986 struct __safe_make_signed { using type = _Tp;}; 2987 2988 template <typename _Tp> 2989 struct __safe_make_signed<_Tp, enable_if_t<is_integral_v<_Tp>>> 2990 { 2991 // the extra make_unsigned_t is because of PR85951 2992 using type = make_signed_t<make_unsigned_t<_Tp>>; 2993 }; 2994 2995 template <typename _Tp> 2996 using safe_make_signed_t = typename __safe_make_signed<_Tp>::type; 2997 2998 template <typename _Tp, typename _Up, typename _Ap> 2999 struct __static_simd_cast_return_type<_Tp, _Up, _Ap, false, 3000 #ifdef _GLIBCXX_SIMD_FIX_P2TS_ISSUE66 3001 enable_if_t<__is_vectorizable_v<_Tp>> 3002 #else 3003 void 3004 #endif 3005 > 3006 { 3007 using type = conditional_t< 3008 (is_integral_v<_Up> && is_integral_v<_Tp> && 3009 #ifndef _GLIBCXX_SIMD_FIX_P2TS_ISSUE65 3010 is_signed_v<_Up> != is_signed_v<_Tp> && 3011 #endif 3012 is_same_v<safe_make_signed_t<_Up>, safe_make_signed_t<_Tp>>), 3013 simd<_Tp, _Ap>, fixed_size_simd<_Tp, simd_size_v<_Up, _Ap>>>; 3014 }; 3015 3016 template <typename _Tp, typename _Up, typename _Ap, 3017 typename _R 3018 = typename __static_simd_cast_return_type<_Tp, _Up, _Ap>::type> 3019 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR _R 3020 static_simd_cast(const simd<_Up, _Ap>& __x) 3021 { 3022 if constexpr (is_same<_R, simd<_Up, _Ap>>::value) 3023 return __x; 3024 else 3025 { 3026 _SimdConverter<_Up, _Ap, typename _R::value_type, typename _R::abi_type> 3027 __c; 3028 return _R(__private_init, __c(__data(__x))); 3029 } 3030 } 3031 3032 namespace __proposed { 3033 template <typename _Tp, typename _Up, typename _Ap, 3034 typename _R 3035 = typename __static_simd_cast_return_type<_Tp, _Up, _Ap>::type> 3036 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR typename _R::mask_type 3037 static_simd_cast(const simd_mask<_Up, _Ap>& __x) 3038 { 3039 using _RM = typename _R::mask_type; 3040 return {__private_init, _RM::abi_type::_MaskImpl::template _S_convert< 3041 typename _RM::simd_type::value_type>(__x)}; 3042 } 3043 3044 template <typename _To, typename _Up, typename _Abi> 3045 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR 3046 _To 3047 simd_bit_cast(const simd<_Up, _Abi>& __x) 3048 { 3049 using _Tp = typename _To::value_type; 3050 using _ToMember = typename _SimdTraits<_Tp, typename _To::abi_type>::_SimdMember; 3051 using _From = simd<_Up, _Abi>; 3052 using _FromMember = typename _SimdTraits<_Up, _Abi>::_SimdMember; 3053 // with concepts, the following should be constraints 3054 static_assert(sizeof(_To) == sizeof(_From)); 3055 static_assert(is_trivially_copyable_v<_Tp> && is_trivially_copyable_v<_Up>); 3056 static_assert(is_trivially_copyable_v<_ToMember> && is_trivially_copyable_v<_FromMember>); 3057 #if __has_builtin(__builtin_bit_cast) 3058 return {__private_init, __builtin_bit_cast(_ToMember, __data(__x))}; 3059 #else 3060 return {__private_init, __bit_cast<_ToMember>(__data(__x))}; 3061 #endif 3062 } 3063 3064 template <typename _To, typename _Up, typename _Abi> 3065 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR 3066 _To 3067 simd_bit_cast(const simd_mask<_Up, _Abi>& __x) 3068 { 3069 using _From = simd_mask<_Up, _Abi>; 3070 static_assert(sizeof(_To) == sizeof(_From)); 3071 static_assert(is_trivially_copyable_v<_From>); 3072 // _To can be simd<T, A>, specifically simd<T, fixed_size<N>> in which case _To is not trivially 3073 // copyable. 3074 if constexpr (is_simd_v<_To>) 3075 { 3076 using _Tp = typename _To::value_type; 3077 using _ToMember = typename _SimdTraits<_Tp, typename _To::abi_type>::_SimdMember; 3078 static_assert(is_trivially_copyable_v<_ToMember>); 3079 #if __has_builtin(__builtin_bit_cast) 3080 return {__private_init, __builtin_bit_cast(_ToMember, __x)}; 3081 #else 3082 return {__private_init, __bit_cast<_ToMember>(__x)}; 3083 #endif 3084 } 3085 else 3086 { 3087 static_assert(is_trivially_copyable_v<_To>); 3088 #if __has_builtin(__builtin_bit_cast) 3089 return __builtin_bit_cast(_To, __x); 3090 #else 3091 return __bit_cast<_To>(__x); 3092 #endif 3093 } 3094 } 3095 } // namespace __proposed 3096 3097 // simd_cast {{{2 3098 template <typename _Tp, typename _Up, typename _Ap, 3099 typename _To = __value_type_or_identity_t<_Tp>> 3100 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR auto 3101 simd_cast(const simd<_ValuePreserving<_Up, _To>, _Ap>& __x) 3102 -> decltype(static_simd_cast<_Tp>(__x)) 3103 { return static_simd_cast<_Tp>(__x); } 3104 3105 namespace __proposed { 3106 template <typename _Tp, typename _Up, typename _Ap, 3107 typename _To = __value_type_or_identity_t<_Tp>> 3108 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR auto 3109 simd_cast(const simd_mask<_ValuePreserving<_Up, _To>, _Ap>& __x) 3110 -> decltype(static_simd_cast<_Tp>(__x)) 3111 { return static_simd_cast<_Tp>(__x); } 3112 } // namespace __proposed 3113 3114 // }}}2 3115 // resizing_simd_cast {{{ 3116 namespace __proposed { 3117 /* Proposed spec: 3118 3119 template <class T, class U, class Abi> 3120 T resizing_simd_cast(const simd<U, Abi>& x) 3121 3122 p1 Constraints: 3123 - is_simd_v<T> is true and 3124 - T::value_type is the same type as U 3125 3126 p2 Returns: 3127 A simd object with the i^th element initialized to x[i] for all i in the 3128 range of [0, min(T::size(), simd_size_v<U, Abi>)). If T::size() is larger 3129 than simd_size_v<U, Abi>, the remaining elements are value-initialized. 3130 3131 template <class T, class U, class Abi> 3132 T resizing_simd_cast(const simd_mask<U, Abi>& x) 3133 3134 p1 Constraints: is_simd_mask_v<T> is true 3135 3136 p2 Returns: 3137 A simd_mask object with the i^th element initialized to x[i] for all i in 3138 the range of [0, min(T::size(), simd_size_v<U, Abi>)). If T::size() is larger 3139 than simd_size_v<U, Abi>, the remaining elements are initialized to false. 3140 3141 */ 3142 3143 template <typename _Tp, typename _Up, typename _Ap> 3144 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR enable_if_t< 3145 conjunction_v<is_simd<_Tp>, is_same<typename _Tp::value_type, _Up>>, _Tp> 3146 resizing_simd_cast(const simd<_Up, _Ap>& __x) 3147 { 3148 if constexpr (is_same_v<typename _Tp::abi_type, _Ap>) 3149 return __x; 3150 else if constexpr (simd_size_v<_Up, _Ap> == 1) 3151 { 3152 _Tp __r{}; 3153 __r[0] = __x[0]; 3154 return __r; 3155 } 3156 else if constexpr (_Tp::size() == 1) 3157 return __x[0]; 3158 else if constexpr (sizeof(_Tp) == sizeof(__x) 3159 && !__is_fixed_size_abi_v<_Ap>) 3160 return {__private_init, 3161 __vector_bitcast<typename _Tp::value_type, _Tp::size()>( 3162 _Ap::_S_masked(__data(__x))._M_data)}; 3163 else 3164 { 3165 _Tp __r{}; 3166 __builtin_memcpy(&__data(__r), &__data(__x), 3167 sizeof(_Up) 3168 * std::min(_Tp::size(), simd_size_v<_Up, _Ap>)); 3169 return __r; 3170 } 3171 } 3172 3173 template <typename _Tp, typename _Up, typename _Ap> 3174 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR 3175 enable_if_t<is_simd_mask_v<_Tp>, _Tp> 3176 resizing_simd_cast(const simd_mask<_Up, _Ap>& __x) 3177 { 3178 return {__private_init, _Tp::abi_type::_MaskImpl::template _S_convert< 3179 typename _Tp::simd_type::value_type>(__x)}; 3180 } 3181 } // namespace __proposed 3182 3183 // }}} 3184 // to_fixed_size {{{2 3185 template <typename _Tp, int _Np> 3186 _GLIBCXX_SIMD_INTRINSIC fixed_size_simd<_Tp, _Np> 3187 to_fixed_size(const fixed_size_simd<_Tp, _Np>& __x) 3188 { return __x; } 3189 3190 template <typename _Tp, int _Np> 3191 _GLIBCXX_SIMD_INTRINSIC fixed_size_simd_mask<_Tp, _Np> 3192 to_fixed_size(const fixed_size_simd_mask<_Tp, _Np>& __x) 3193 { return __x; } 3194 3195 template <typename _Tp, typename _Ap> 3196 _GLIBCXX_SIMD_INTRINSIC auto 3197 to_fixed_size(const simd<_Tp, _Ap>& __x) 3198 { 3199 return simd<_Tp, simd_abi::fixed_size<simd_size_v<_Tp, _Ap>>>([&__x]( 3200 auto __i) constexpr { return __x[__i]; }); 3201 } 3202 3203 template <typename _Tp, typename _Ap> 3204 _GLIBCXX_SIMD_INTRINSIC auto 3205 to_fixed_size(const simd_mask<_Tp, _Ap>& __x) 3206 { 3207 constexpr int _Np = simd_mask<_Tp, _Ap>::size(); 3208 fixed_size_simd_mask<_Tp, _Np> __r; 3209 __execute_n_times<_Np>([&](auto __i) constexpr { __r[__i] = __x[__i]; }); 3210 return __r; 3211 } 3212 3213 // to_native {{{2 3214 template <typename _Tp, int _Np> 3215 _GLIBCXX_SIMD_INTRINSIC 3216 enable_if_t<(_Np == native_simd<_Tp>::size()), native_simd<_Tp>> 3217 to_native(const fixed_size_simd<_Tp, _Np>& __x) 3218 { 3219 alignas(memory_alignment_v<native_simd<_Tp>>) _Tp __mem[_Np]; 3220 __x.copy_to(__mem, vector_aligned); 3221 return {__mem, vector_aligned}; 3222 } 3223 3224 template <typename _Tp, size_t _Np> 3225 _GLIBCXX_SIMD_INTRINSIC 3226 enable_if_t<(_Np == native_simd_mask<_Tp>::size()), native_simd_mask<_Tp>> 3227 to_native(const fixed_size_simd_mask<_Tp, _Np>& __x) 3228 { 3229 return native_simd_mask<_Tp>([&](auto __i) constexpr { return __x[__i]; }); 3230 } 3231 3232 // to_compatible {{{2 3233 template <typename _Tp, size_t _Np> 3234 _GLIBCXX_SIMD_INTRINSIC enable_if_t<(_Np == simd<_Tp>::size()), simd<_Tp>> 3235 to_compatible(const simd<_Tp, simd_abi::fixed_size<_Np>>& __x) 3236 { 3237 alignas(memory_alignment_v<simd<_Tp>>) _Tp __mem[_Np]; 3238 __x.copy_to(__mem, vector_aligned); 3239 return {__mem, vector_aligned}; 3240 } 3241 3242 template <typename _Tp, size_t _Np> 3243 _GLIBCXX_SIMD_INTRINSIC 3244 enable_if_t<(_Np == simd_mask<_Tp>::size()), simd_mask<_Tp>> 3245 to_compatible(const simd_mask<_Tp, simd_abi::fixed_size<_Np>>& __x) 3246 { return simd_mask<_Tp>([&](auto __i) constexpr { return __x[__i]; }); } 3247 3248 // masked assignment [simd_mask.where] {{{1 3249 3250 // where_expression {{{1 3251 // const_where_expression<M, T> {{{2 3252 template <typename _M, typename _Tp> 3253 class const_where_expression 3254 { 3255 using _V = _Tp; 3256 static_assert(is_same_v<_V, __remove_cvref_t<_Tp>>); 3257 3258 struct _Wrapper { using value_type = _V; }; 3259 3260 protected: 3261 using _Impl = typename _V::_Impl; 3262 3263 using value_type = 3264 typename conditional_t<is_arithmetic_v<_V>, _Wrapper, _V>::value_type; 3265 3266 _GLIBCXX_SIMD_INTRINSIC friend const _M& 3267 __get_mask(const const_where_expression& __x) 3268 { return __x._M_k; } 3269 3270 _GLIBCXX_SIMD_INTRINSIC friend const _Tp& 3271 __get_lvalue(const const_where_expression& __x) 3272 { return __x._M_value; } 3273 3274 const _M& _M_k; 3275 _Tp& _M_value; 3276 3277 public: 3278 const_where_expression(const const_where_expression&) = delete; 3279 const_where_expression& operator=(const const_where_expression&) = delete; 3280 3281 _GLIBCXX_SIMD_INTRINSIC const_where_expression(const _M& __kk, const _Tp& dd) 3282 : _M_k(__kk), _M_value(const_cast<_Tp&>(dd)) {} 3283 3284 _GLIBCXX_SIMD_INTRINSIC _V 3285 operator-() const&& 3286 { 3287 return {__private_init, 3288 _Impl::template _S_masked_unary<negate>(__data(_M_k), 3289 __data(_M_value))}; 3290 } 3291 3292 template <typename _Up, typename _Flags> 3293 [[nodiscard]] _GLIBCXX_SIMD_INTRINSIC _V 3294 copy_from(const _LoadStorePtr<_Up, value_type>* __mem, _Flags) const&& 3295 { 3296 return {__private_init, 3297 _Impl::_S_masked_load(__data(_M_value), __data(_M_k), 3298 _Flags::template _S_apply<_V>(__mem))}; 3299 } 3300 3301 template <typename _Up, typename _Flags> 3302 _GLIBCXX_SIMD_INTRINSIC void 3303 copy_to(_LoadStorePtr<_Up, value_type>* __mem, _Flags) const&& 3304 { 3305 _Impl::_S_masked_store(__data(_M_value), 3306 _Flags::template _S_apply<_V>(__mem), 3307 __data(_M_k)); 3308 } 3309 }; 3310 3311 // const_where_expression<bool, T> {{{2 3312 template <typename _Tp> 3313 class const_where_expression<bool, _Tp> 3314 { 3315 using _M = bool; 3316 using _V = _Tp; 3317 3318 static_assert(is_same_v<_V, __remove_cvref_t<_Tp>>); 3319 3320 struct _Wrapper { using value_type = _V; }; 3321 3322 protected: 3323 using value_type = 3324 typename conditional_t<is_arithmetic_v<_V>, _Wrapper, _V>::value_type; 3325 3326 _GLIBCXX_SIMD_INTRINSIC friend const _M& 3327 __get_mask(const const_where_expression& __x) 3328 { return __x._M_k; } 3329 3330 _GLIBCXX_SIMD_INTRINSIC friend const _Tp& 3331 __get_lvalue(const const_where_expression& __x) 3332 { return __x._M_value; } 3333 3334 const bool _M_k; 3335 _Tp& _M_value; 3336 3337 public: 3338 const_where_expression(const const_where_expression&) = delete; 3339 const_where_expression& operator=(const const_where_expression&) = delete; 3340 3341 _GLIBCXX_SIMD_INTRINSIC const_where_expression(const bool __kk, const _Tp& dd) 3342 : _M_k(__kk), _M_value(const_cast<_Tp&>(dd)) {} 3343 3344 _GLIBCXX_SIMD_INTRINSIC _V operator-() const&& 3345 { return _M_k ? -_M_value : _M_value; } 3346 3347 template <typename _Up, typename _Flags> 3348 [[nodiscard]] _GLIBCXX_SIMD_INTRINSIC _V 3349 copy_from(const _LoadStorePtr<_Up, value_type>* __mem, _Flags) const&& 3350 { return _M_k ? static_cast<_V>(__mem[0]) : _M_value; } 3351 3352 template <typename _Up, typename _Flags> 3353 _GLIBCXX_SIMD_INTRINSIC void 3354 copy_to(_LoadStorePtr<_Up, value_type>* __mem, _Flags) const&& 3355 { 3356 if (_M_k) 3357 __mem[0] = _M_value; 3358 } 3359 }; 3360 3361 // where_expression<M, T> {{{2 3362 template <typename _M, typename _Tp> 3363 class where_expression : public const_where_expression<_M, _Tp> 3364 { 3365 using _Impl = typename const_where_expression<_M, _Tp>::_Impl; 3366 3367 static_assert(!is_const<_Tp>::value, 3368 "where_expression may only be instantiated with __a non-const " 3369 "_Tp parameter"); 3370 3371 using typename const_where_expression<_M, _Tp>::value_type; 3372 using const_where_expression<_M, _Tp>::_M_k; 3373 using const_where_expression<_M, _Tp>::_M_value; 3374 3375 static_assert( 3376 is_same<typename _M::abi_type, typename _Tp::abi_type>::value, ""); 3377 static_assert(_M::size() == _Tp::size(), ""); 3378 3379 _GLIBCXX_SIMD_INTRINSIC friend _Tp& __get_lvalue(where_expression& __x) 3380 { return __x._M_value; } 3381 3382 public: 3383 where_expression(const where_expression&) = delete; 3384 where_expression& operator=(const where_expression&) = delete; 3385 3386 _GLIBCXX_SIMD_INTRINSIC where_expression(const _M& __kk, _Tp& dd) 3387 : const_where_expression<_M, _Tp>(__kk, dd) {} 3388 3389 template <typename _Up> 3390 _GLIBCXX_SIMD_INTRINSIC void operator=(_Up&& __x) && 3391 { 3392 _Impl::_S_masked_assign(__data(_M_k), __data(_M_value), 3393 __to_value_type_or_member_type<_Tp>( 3394 static_cast<_Up&&>(__x))); 3395 } 3396 3397 #define _GLIBCXX_SIMD_OP_(__op, __name) \ 3398 template <typename _Up> \ 3399 _GLIBCXX_SIMD_INTRINSIC void operator __op##=(_Up&& __x)&& \ 3400 { \ 3401 _Impl::template _S_masked_cassign( \ 3402 __data(_M_k), __data(_M_value), \ 3403 __to_value_type_or_member_type<_Tp>(static_cast<_Up&&>(__x)), \ 3404 [](auto __impl, auto __lhs, auto __rhs) constexpr { \ 3405 return __impl.__name(__lhs, __rhs); \ 3406 }); \ 3407 } \ 3408 static_assert(true) 3409 _GLIBCXX_SIMD_OP_(+, _S_plus); 3410 _GLIBCXX_SIMD_OP_(-, _S_minus); 3411 _GLIBCXX_SIMD_OP_(*, _S_multiplies); 3412 _GLIBCXX_SIMD_OP_(/, _S_divides); 3413 _GLIBCXX_SIMD_OP_(%, _S_modulus); 3414 _GLIBCXX_SIMD_OP_(&, _S_bit_and); 3415 _GLIBCXX_SIMD_OP_(|, _S_bit_or); 3416 _GLIBCXX_SIMD_OP_(^, _S_bit_xor); 3417 _GLIBCXX_SIMD_OP_(<<, _S_shift_left); 3418 _GLIBCXX_SIMD_OP_(>>, _S_shift_right); 3419 #undef _GLIBCXX_SIMD_OP_ 3420 3421 _GLIBCXX_SIMD_INTRINSIC void operator++() && 3422 { 3423 __data(_M_value) 3424 = _Impl::template _S_masked_unary<__increment>(__data(_M_k), 3425 __data(_M_value)); 3426 } 3427 3428 _GLIBCXX_SIMD_INTRINSIC void operator++(int) && 3429 { 3430 __data(_M_value) 3431 = _Impl::template _S_masked_unary<__increment>(__data(_M_k), 3432 __data(_M_value)); 3433 } 3434 3435 _GLIBCXX_SIMD_INTRINSIC void operator--() && 3436 { 3437 __data(_M_value) 3438 = _Impl::template _S_masked_unary<__decrement>(__data(_M_k), 3439 __data(_M_value)); 3440 } 3441 3442 _GLIBCXX_SIMD_INTRINSIC void operator--(int) && 3443 { 3444 __data(_M_value) 3445 = _Impl::template _S_masked_unary<__decrement>(__data(_M_k), 3446 __data(_M_value)); 3447 } 3448 3449 // intentionally hides const_where_expression::copy_from 3450 template <typename _Up, typename _Flags> 3451 _GLIBCXX_SIMD_INTRINSIC void 3452 copy_from(const _LoadStorePtr<_Up, value_type>* __mem, _Flags) && 3453 { 3454 __data(_M_value) 3455 = _Impl::_S_masked_load(__data(_M_value), __data(_M_k), 3456 _Flags::template _S_apply<_Tp>(__mem)); 3457 } 3458 }; 3459 3460 // where_expression<bool, T> {{{2 3461 template <typename _Tp> 3462 class where_expression<bool, _Tp> : public const_where_expression<bool, _Tp> 3463 { 3464 using _M = bool; 3465 using typename const_where_expression<_M, _Tp>::value_type; 3466 using const_where_expression<_M, _Tp>::_M_k; 3467 using const_where_expression<_M, _Tp>::_M_value; 3468 3469 public: 3470 where_expression(const where_expression&) = delete; 3471 where_expression& operator=(const where_expression&) = delete; 3472 3473 _GLIBCXX_SIMD_INTRINSIC where_expression(const _M& __kk, _Tp& dd) 3474 : const_where_expression<_M, _Tp>(__kk, dd) {} 3475 3476 #define _GLIBCXX_SIMD_OP_(__op) \ 3477 template <typename _Up> \ 3478 _GLIBCXX_SIMD_INTRINSIC void operator __op(_Up&& __x)&& \ 3479 { if (_M_k) _M_value __op static_cast<_Up&&>(__x); } 3480 3481 _GLIBCXX_SIMD_OP_(=) 3482 _GLIBCXX_SIMD_OP_(+=) 3483 _GLIBCXX_SIMD_OP_(-=) 3484 _GLIBCXX_SIMD_OP_(*=) 3485 _GLIBCXX_SIMD_OP_(/=) 3486 _GLIBCXX_SIMD_OP_(%=) 3487 _GLIBCXX_SIMD_OP_(&=) 3488 _GLIBCXX_SIMD_OP_(|=) 3489 _GLIBCXX_SIMD_OP_(^=) 3490 _GLIBCXX_SIMD_OP_(<<=) 3491 _GLIBCXX_SIMD_OP_(>>=) 3492 #undef _GLIBCXX_SIMD_OP_ 3493 3494 _GLIBCXX_SIMD_INTRINSIC void operator++() && 3495 { if (_M_k) ++_M_value; } 3496 3497 _GLIBCXX_SIMD_INTRINSIC void operator++(int) && 3498 { if (_M_k) ++_M_value; } 3499 3500 _GLIBCXX_SIMD_INTRINSIC void operator--() && 3501 { if (_M_k) --_M_value; } 3502 3503 _GLIBCXX_SIMD_INTRINSIC void operator--(int) && 3504 { if (_M_k) --_M_value; } 3505 3506 // intentionally hides const_where_expression::copy_from 3507 template <typename _Up, typename _Flags> 3508 _GLIBCXX_SIMD_INTRINSIC void 3509 copy_from(const _LoadStorePtr<_Up, value_type>* __mem, _Flags) && 3510 { if (_M_k) _M_value = __mem[0]; } 3511 }; 3512 3513 // where {{{1 3514 template <typename _Tp, typename _Ap> 3515 _GLIBCXX_SIMD_INTRINSIC where_expression<simd_mask<_Tp, _Ap>, simd<_Tp, _Ap>> 3516 where(const typename simd<_Tp, _Ap>::mask_type& __k, simd<_Tp, _Ap>& __value) 3517 { return {__k, __value}; } 3518 3519 template <typename _Tp, typename _Ap> 3520 _GLIBCXX_SIMD_INTRINSIC 3521 const_where_expression<simd_mask<_Tp, _Ap>, simd<_Tp, _Ap>> 3522 where(const typename simd<_Tp, _Ap>::mask_type& __k, 3523 const simd<_Tp, _Ap>& __value) 3524 { return {__k, __value}; } 3525 3526 template <typename _Tp, typename _Ap> 3527 _GLIBCXX_SIMD_INTRINSIC 3528 where_expression<simd_mask<_Tp, _Ap>, simd_mask<_Tp, _Ap>> 3529 where(const remove_const_t<simd_mask<_Tp, _Ap>>& __k, 3530 simd_mask<_Tp, _Ap>& __value) 3531 { return {__k, __value}; } 3532 3533 template <typename _Tp, typename _Ap> 3534 _GLIBCXX_SIMD_INTRINSIC 3535 const_where_expression<simd_mask<_Tp, _Ap>, simd_mask<_Tp, _Ap>> 3536 where(const remove_const_t<simd_mask<_Tp, _Ap>>& __k, 3537 const simd_mask<_Tp, _Ap>& __value) 3538 { return {__k, __value}; } 3539 3540 template <typename _Tp> 3541 _GLIBCXX_SIMD_INTRINSIC where_expression<bool, _Tp> 3542 where(_ExactBool __k, _Tp& __value) 3543 { return {__k, __value}; } 3544 3545 template <typename _Tp> 3546 _GLIBCXX_SIMD_INTRINSIC const_where_expression<bool, _Tp> 3547 where(_ExactBool __k, const _Tp& __value) 3548 { return {__k, __value}; } 3549 3550 template <typename _Tp, typename _Ap> 3551 void where(bool __k, simd<_Tp, _Ap>& __value) = delete; 3552 3553 template <typename _Tp, typename _Ap> 3554 void where(bool __k, const simd<_Tp, _Ap>& __value) = delete; 3555 3556 // proposed mask iterations {{{1 3557 namespace __proposed { 3558 template <size_t _Np> 3559 class where_range 3560 { 3561 const bitset<_Np> __bits; 3562 3563 public: 3564 where_range(bitset<_Np> __b) : __bits(__b) {} 3565 3566 class iterator 3567 { 3568 size_t __mask; 3569 size_t __bit; 3570 3571 _GLIBCXX_SIMD_INTRINSIC void __next_bit() 3572 { __bit = __builtin_ctzl(__mask); } 3573 3574 _GLIBCXX_SIMD_INTRINSIC void __reset_lsb() 3575 { 3576 // 01100100 - 1 = 01100011 3577 __mask &= (__mask - 1); 3578 // __asm__("btr %1,%0" : "+r"(__mask) : "r"(__bit)); 3579 } 3580 3581 public: 3582 iterator(decltype(__mask) __m) : __mask(__m) { __next_bit(); } 3583 iterator(const iterator&) = default; 3584 iterator(iterator&&) = default; 3585 3586 _GLIBCXX_SIMD_ALWAYS_INLINE size_t operator->() const 3587 { return __bit; } 3588 3589 _GLIBCXX_SIMD_ALWAYS_INLINE size_t operator*() const 3590 { return __bit; } 3591 3592 _GLIBCXX_SIMD_ALWAYS_INLINE iterator& operator++() 3593 { 3594 __reset_lsb(); 3595 __next_bit(); 3596 return *this; 3597 } 3598 3599 _GLIBCXX_SIMD_ALWAYS_INLINE iterator operator++(int) 3600 { 3601 iterator __tmp = *this; 3602 __reset_lsb(); 3603 __next_bit(); 3604 return __tmp; 3605 } 3606 3607 _GLIBCXX_SIMD_ALWAYS_INLINE bool operator==(const iterator& __rhs) const 3608 { return __mask == __rhs.__mask; } 3609 3610 _GLIBCXX_SIMD_ALWAYS_INLINE bool operator!=(const iterator& __rhs) const 3611 { return __mask != __rhs.__mask; } 3612 }; 3613 3614 iterator begin() const 3615 { return __bits.to_ullong(); } 3616 3617 iterator end() const 3618 { return 0; } 3619 }; 3620 3621 template <typename _Tp, typename _Ap> 3622 where_range<simd_size_v<_Tp, _Ap>> 3623 where(const simd_mask<_Tp, _Ap>& __k) 3624 { return __k.__to_bitset(); } 3625 3626 } // namespace __proposed 3627 3628 // }}}1 3629 // reductions [simd.reductions] {{{1 3630 template <typename _Tp, typename _Abi, typename _BinaryOperation = plus<>> 3631 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR _Tp 3632 reduce(const simd<_Tp, _Abi>& __v, 3633 _BinaryOperation __binary_op = _BinaryOperation()) 3634 { return _Abi::_SimdImpl::_S_reduce(__v, __binary_op); } 3635 3636 template <typename _M, typename _V, typename _BinaryOperation = plus<>> 3637 _GLIBCXX_SIMD_INTRINSIC typename _V::value_type 3638 reduce(const const_where_expression<_M, _V>& __x, 3639 typename _V::value_type __identity_element, 3640 _BinaryOperation __binary_op) 3641 { 3642 if (__builtin_expect(none_of(__get_mask(__x)), false)) 3643 return __identity_element; 3644 3645 _V __tmp = __identity_element; 3646 _V::_Impl::_S_masked_assign(__data(__get_mask(__x)), __data(__tmp), 3647 __data(__get_lvalue(__x))); 3648 return reduce(__tmp, __binary_op); 3649 } 3650 3651 template <typename _M, typename _V> 3652 _GLIBCXX_SIMD_INTRINSIC typename _V::value_type 3653 reduce(const const_where_expression<_M, _V>& __x, plus<> __binary_op = {}) 3654 { return reduce(__x, 0, __binary_op); } 3655 3656 template <typename _M, typename _V> 3657 _GLIBCXX_SIMD_INTRINSIC typename _V::value_type 3658 reduce(const const_where_expression<_M, _V>& __x, multiplies<> __binary_op) 3659 { return reduce(__x, 1, __binary_op); } 3660 3661 template <typename _M, typename _V> 3662 _GLIBCXX_SIMD_INTRINSIC typename _V::value_type 3663 reduce(const const_where_expression<_M, _V>& __x, bit_and<> __binary_op) 3664 { return reduce(__x, ~typename _V::value_type(), __binary_op); } 3665 3666 template <typename _M, typename _V> 3667 _GLIBCXX_SIMD_INTRINSIC typename _V::value_type 3668 reduce(const const_where_expression<_M, _V>& __x, bit_or<> __binary_op) 3669 { return reduce(__x, 0, __binary_op); } 3670 3671 template <typename _M, typename _V> 3672 _GLIBCXX_SIMD_INTRINSIC typename _V::value_type 3673 reduce(const const_where_expression<_M, _V>& __x, bit_xor<> __binary_op) 3674 { return reduce(__x, 0, __binary_op); } 3675 3676 template <typename _Tp, typename _Abi> 3677 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR _Tp 3678 hmin(const simd<_Tp, _Abi>& __v) noexcept 3679 { 3680 return _Abi::_SimdImpl::_S_reduce(__v, __detail::_Minimum()); 3681 } 3682 3683 template <typename _Tp, typename _Abi> 3684 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR _Tp 3685 hmax(const simd<_Tp, _Abi>& __v) noexcept 3686 { 3687 return _Abi::_SimdImpl::_S_reduce(__v, __detail::_Maximum()); 3688 } 3689 3690 template <typename _M, typename _V> 3691 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR 3692 typename _V::value_type 3693 hmin(const const_where_expression<_M, _V>& __x) noexcept 3694 { 3695 using _Tp = typename _V::value_type; 3696 constexpr _Tp __id_elem = 3697 #ifdef __FINITE_MATH_ONLY__ 3698 __finite_max_v<_Tp>; 3699 #else 3700 __value_or<__infinity, _Tp>(__finite_max_v<_Tp>); 3701 #endif 3702 _V __tmp = __id_elem; 3703 _V::_Impl::_S_masked_assign(__data(__get_mask(__x)), __data(__tmp), 3704 __data(__get_lvalue(__x))); 3705 return _V::abi_type::_SimdImpl::_S_reduce(__tmp, __detail::_Minimum()); 3706 } 3707 3708 template <typename _M, typename _V> 3709 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR 3710 typename _V::value_type 3711 hmax(const const_where_expression<_M, _V>& __x) noexcept 3712 { 3713 using _Tp = typename _V::value_type; 3714 constexpr _Tp __id_elem = 3715 #ifdef __FINITE_MATH_ONLY__ 3716 __finite_min_v<_Tp>; 3717 #else 3718 [] { 3719 if constexpr (__value_exists_v<__infinity, _Tp>) 3720 return -__infinity_v<_Tp>; 3721 else 3722 return __finite_min_v<_Tp>; 3723 }(); 3724 #endif 3725 _V __tmp = __id_elem; 3726 _V::_Impl::_S_masked_assign(__data(__get_mask(__x)), __data(__tmp), 3727 __data(__get_lvalue(__x))); 3728 return _V::abi_type::_SimdImpl::_S_reduce(__tmp, __detail::_Maximum()); 3729 } 3730 3731 // }}}1 3732 // algorithms [simd.alg] {{{ 3733 template <typename _Tp, typename _Ap> 3734 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR simd<_Tp, _Ap> 3735 min(const simd<_Tp, _Ap>& __a, const simd<_Tp, _Ap>& __b) 3736 { return {__private_init, _Ap::_SimdImpl::_S_min(__data(__a), __data(__b))}; } 3737 3738 template <typename _Tp, typename _Ap> 3739 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR simd<_Tp, _Ap> 3740 max(const simd<_Tp, _Ap>& __a, const simd<_Tp, _Ap>& __b) 3741 { return {__private_init, _Ap::_SimdImpl::_S_max(__data(__a), __data(__b))}; } 3742 3743 template <typename _Tp, typename _Ap> 3744 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR 3745 pair<simd<_Tp, _Ap>, simd<_Tp, _Ap>> 3746 minmax(const simd<_Tp, _Ap>& __a, const simd<_Tp, _Ap>& __b) 3747 { 3748 const auto pair_of_members 3749 = _Ap::_SimdImpl::_S_minmax(__data(__a), __data(__b)); 3750 return {simd<_Tp, _Ap>(__private_init, pair_of_members.first), 3751 simd<_Tp, _Ap>(__private_init, pair_of_members.second)}; 3752 } 3753 3754 template <typename _Tp, typename _Ap> 3755 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR simd<_Tp, _Ap> 3756 clamp(const simd<_Tp, _Ap>& __v, const simd<_Tp, _Ap>& __lo, 3757 const simd<_Tp, _Ap>& __hi) 3758 { 3759 using _Impl = typename _Ap::_SimdImpl; 3760 return {__private_init, 3761 _Impl::_S_min(__data(__hi), 3762 _Impl::_S_max(__data(__lo), __data(__v)))}; 3763 } 3764 3765 // }}} 3766 3767 template <size_t... _Sizes, typename _Tp, typename _Ap, 3768 typename = enable_if_t<((_Sizes + ...) == simd<_Tp, _Ap>::size())>> 3769 inline tuple<simd<_Tp, simd_abi::deduce_t<_Tp, _Sizes>>...> 3770 split(const simd<_Tp, _Ap>&); 3771 3772 // __extract_part {{{ 3773 template <int _Index, int _Total, int _Combine = 1, typename _Tp, size_t _Np> 3774 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_CONST 3775 _SimdWrapper<_Tp, _Np / _Total * _Combine> 3776 __extract_part(const _SimdWrapper<_Tp, _Np> __x); 3777 3778 template <int _Index, int _Parts, int _Combine = 1, typename _Tp, typename _A0, 3779 typename... _As> 3780 _GLIBCXX_SIMD_INTRINSIC auto 3781 __extract_part(const _SimdTuple<_Tp, _A0, _As...>& __x); 3782 3783 // }}} 3784 // _SizeList {{{ 3785 template <size_t _V0, size_t... _Values> 3786 struct _SizeList 3787 { 3788 template <size_t _I> 3789 static constexpr size_t _S_at(_SizeConstant<_I> = {}) 3790 { 3791 if constexpr (_I == 0) 3792 return _V0; 3793 else 3794 return _SizeList<_Values...>::template _S_at<_I - 1>(); 3795 } 3796 3797 template <size_t _I> 3798 static constexpr auto _S_before(_SizeConstant<_I> = {}) 3799 { 3800 if constexpr (_I == 0) 3801 return _SizeConstant<0>(); 3802 else 3803 return _SizeConstant< 3804 _V0 + _SizeList<_Values...>::template _S_before<_I - 1>()>(); 3805 } 3806 3807 template <size_t _Np> 3808 static constexpr auto _S_pop_front(_SizeConstant<_Np> = {}) 3809 { 3810 if constexpr (_Np == 0) 3811 return _SizeList(); 3812 else 3813 return _SizeList<_Values...>::template _S_pop_front<_Np - 1>(); 3814 } 3815 }; 3816 3817 // }}} 3818 // __extract_center {{{ 3819 template <typename _Tp, size_t _Np> 3820 _GLIBCXX_SIMD_INTRINSIC _SimdWrapper<_Tp, _Np / 2> 3821 __extract_center(_SimdWrapper<_Tp, _Np> __x) 3822 { 3823 static_assert(_Np >= 4); 3824 static_assert(_Np % 4 == 0); // x0 - x1 - x2 - x3 -> return {x1, x2} 3825 #if _GLIBCXX_SIMD_X86INTRIN // {{{ 3826 if constexpr (__have_avx512f && sizeof(_Tp) * _Np == 64) 3827 { 3828 const auto __intrin = __to_intrin(__x); 3829 if constexpr (is_integral_v<_Tp>) 3830 return __vector_bitcast<_Tp>(_mm512_castsi512_si256( 3831 _mm512_shuffle_i32x4(__intrin, __intrin, 3832 1 + 2 * 0x4 + 2 * 0x10 + 3 * 0x40))); 3833 else if constexpr (sizeof(_Tp) == 4) 3834 return __vector_bitcast<_Tp>(_mm512_castps512_ps256( 3835 _mm512_shuffle_f32x4(__intrin, __intrin, 3836 1 + 2 * 0x4 + 2 * 0x10 + 3 * 0x40))); 3837 else if constexpr (sizeof(_Tp) == 8) 3838 return __vector_bitcast<_Tp>(_mm512_castpd512_pd256( 3839 _mm512_shuffle_f64x2(__intrin, __intrin, 3840 1 + 2 * 0x4 + 2 * 0x10 + 3 * 0x40))); 3841 else 3842 __assert_unreachable<_Tp>(); 3843 } 3844 else if constexpr (sizeof(_Tp) * _Np == 32 && is_floating_point_v<_Tp>) 3845 return __vector_bitcast<_Tp>( 3846 _mm_shuffle_pd(__lo128(__vector_bitcast<double>(__x)), 3847 __hi128(__vector_bitcast<double>(__x)), 1)); 3848 else if constexpr (sizeof(__x) == 32 && sizeof(_Tp) * _Np <= 32) 3849 return __vector_bitcast<_Tp>( 3850 _mm_alignr_epi8(__hi128(__vector_bitcast<_LLong>(__x)), 3851 __lo128(__vector_bitcast<_LLong>(__x)), 3852 sizeof(_Tp) * _Np / 4)); 3853 else 3854 #endif // _GLIBCXX_SIMD_X86INTRIN }}} 3855 { 3856 __vector_type_t<_Tp, _Np / 2> __r; 3857 __builtin_memcpy(&__r, 3858 reinterpret_cast<const char*>(&__x) 3859 + sizeof(_Tp) * _Np / 4, 3860 sizeof(_Tp) * _Np / 2); 3861 return __r; 3862 } 3863 } 3864 3865 template <typename _Tp, typename _A0, typename... _As> 3866 _GLIBCXX_SIMD_INTRINSIC 3867 _SimdWrapper<_Tp, _SimdTuple<_Tp, _A0, _As...>::_S_size() / 2> 3868 __extract_center(const _SimdTuple<_Tp, _A0, _As...>& __x) 3869 { 3870 if constexpr (sizeof...(_As) == 0) 3871 return __extract_center(__x.first); 3872 else 3873 return __extract_part<1, 4, 2>(__x); 3874 } 3875 3876 // }}} 3877 // __split_wrapper {{{ 3878 template <size_t... _Sizes, typename _Tp, typename... _As> 3879 auto 3880 __split_wrapper(_SizeList<_Sizes...>, const _SimdTuple<_Tp, _As...>& __x) 3881 { 3882 return split<_Sizes...>( 3883 fixed_size_simd<_Tp, _SimdTuple<_Tp, _As...>::_S_size()>(__private_init, 3884 __x)); 3885 } 3886 3887 // }}} 3888 3889 // split<simd>(simd) {{{ 3890 template <typename _V, typename _Ap, 3891 size_t _Parts = simd_size_v<typename _V::value_type, _Ap> / _V::size()> 3892 enable_if_t<simd_size_v<typename _V::value_type, _Ap> == _Parts * _V::size() 3893 && is_simd_v<_V>, array<_V, _Parts>> 3894 split(const simd<typename _V::value_type, _Ap>& __x) 3895 { 3896 using _Tp = typename _V::value_type; 3897 if constexpr (_Parts == 1) 3898 { 3899 return {simd_cast<_V>(__x)}; 3900 } 3901 else if (__x._M_is_constprop()) 3902 { 3903 return __generate_from_n_evaluations<_Parts, array<_V, _Parts>>([&]( 3904 auto __i) constexpr { 3905 return _V([&](auto __j) constexpr { 3906 return __x[__i * _V::size() + __j]; 3907 }); 3908 }); 3909 } 3910 else if constexpr ( 3911 __is_fixed_size_abi_v<_Ap> 3912 && (is_same_v<typename _V::abi_type, simd_abi::scalar> 3913 || (__is_fixed_size_abi_v<typename _V::abi_type> 3914 && sizeof(_V) == sizeof(_Tp) * _V::size() // _V doesn't have padding 3915 ))) 3916 { 3917 // fixed_size -> fixed_size (w/o padding) or scalar 3918 #ifdef _GLIBCXX_SIMD_USE_ALIASING_LOADS 3919 const __may_alias<_Tp>* const __element_ptr 3920 = reinterpret_cast<const __may_alias<_Tp>*>(&__data(__x)); 3921 return __generate_from_n_evaluations<_Parts, array<_V, _Parts>>([&]( 3922 auto __i) constexpr { 3923 return _V(__element_ptr + __i * _V::size(), vector_aligned); 3924 }); 3925 #else 3926 const auto& __xx = __data(__x); 3927 return __generate_from_n_evaluations<_Parts, array<_V, _Parts>>([&]( 3928 auto __i) constexpr { 3929 [[maybe_unused]] constexpr size_t __offset 3930 = decltype(__i)::value * _V::size(); 3931 return _V([&](auto __j) constexpr { 3932 constexpr _SizeConstant<__j + __offset> __k; 3933 return __xx[__k]; 3934 }); 3935 }); 3936 #endif 3937 } 3938 else if constexpr (is_same_v<typename _V::abi_type, simd_abi::scalar>) 3939 { 3940 // normally memcpy should work here as well 3941 return __generate_from_n_evaluations<_Parts, array<_V, _Parts>>([&]( 3942 auto __i) constexpr { return __x[__i]; }); 3943 } 3944 else 3945 { 3946 return __generate_from_n_evaluations<_Parts, array<_V, _Parts>>([&]( 3947 auto __i) constexpr { 3948 if constexpr (__is_fixed_size_abi_v<typename _V::abi_type>) 3949 return _V([&](auto __j) constexpr { 3950 return __x[__i * _V::size() + __j]; 3951 }); 3952 else 3953 return _V(__private_init, 3954 __extract_part<decltype(__i)::value, _Parts>(__data(__x))); 3955 }); 3956 } 3957 } 3958 3959 // }}} 3960 // split<simd_mask>(simd_mask) {{{ 3961 template <typename _V, typename _Ap, 3962 size_t _Parts 3963 = simd_size_v<typename _V::simd_type::value_type, _Ap> / _V::size()> 3964 enable_if_t<is_simd_mask_v<_V> && simd_size_v<typename 3965 _V::simd_type::value_type, _Ap> == _Parts * _V::size(), array<_V, _Parts>> 3966 split(const simd_mask<typename _V::simd_type::value_type, _Ap>& __x) 3967 { 3968 if constexpr (is_same_v<_Ap, typename _V::abi_type>) 3969 return {__x}; 3970 else if constexpr (_Parts == 1) 3971 return {__proposed::static_simd_cast<_V>(__x)}; 3972 else if constexpr (_Parts == 2 && __is_sse_abi<typename _V::abi_type>() 3973 && __is_avx_abi<_Ap>()) 3974 return {_V(__private_init, __lo128(__data(__x))), 3975 _V(__private_init, __hi128(__data(__x)))}; 3976 else if constexpr (_V::size() <= __CHAR_BIT__ * sizeof(_ULLong)) 3977 { 3978 const bitset __bits = __x.__to_bitset(); 3979 return __generate_from_n_evaluations<_Parts, array<_V, _Parts>>([&]( 3980 auto __i) constexpr { 3981 constexpr size_t __offset = __i * _V::size(); 3982 return _V(__bitset_init, (__bits >> __offset).to_ullong()); 3983 }); 3984 } 3985 else 3986 { 3987 return __generate_from_n_evaluations<_Parts, array<_V, _Parts>>([&]( 3988 auto __i) constexpr { 3989 constexpr size_t __offset = __i * _V::size(); 3990 return _V( 3991 __private_init, [&](auto __j) constexpr { 3992 return __x[__j + __offset]; 3993 }); 3994 }); 3995 } 3996 } 3997 3998 // }}} 3999 // split<_Sizes...>(simd) {{{ 4000 template <size_t... _Sizes, typename _Tp, typename _Ap, typename> 4001 _GLIBCXX_SIMD_ALWAYS_INLINE 4002 tuple<simd<_Tp, simd_abi::deduce_t<_Tp, _Sizes>>...> 4003 split(const simd<_Tp, _Ap>& __x) 4004 { 4005 using _SL = _SizeList<_Sizes...>; 4006 using _Tuple = tuple<__deduced_simd<_Tp, _Sizes>...>; 4007 constexpr size_t _Np = simd_size_v<_Tp, _Ap>; 4008 constexpr size_t _N0 = _SL::template _S_at<0>(); 4009 using _V = __deduced_simd<_Tp, _N0>; 4010 4011 if (__x._M_is_constprop()) 4012 return __generate_from_n_evaluations<sizeof...(_Sizes), _Tuple>([&]( 4013 auto __i) constexpr { 4014 using _Vi = __deduced_simd<_Tp, _SL::_S_at(__i)>; 4015 constexpr size_t __offset = _SL::_S_before(__i); 4016 return _Vi([&](auto __j) constexpr { return __x[__offset + __j]; }); 4017 }); 4018 else if constexpr (_Np == _N0) 4019 { 4020 static_assert(sizeof...(_Sizes) == 1); 4021 return {simd_cast<_V>(__x)}; 4022 } 4023 else if constexpr // split from fixed_size, such that __x::first.size == _N0 4024 (__is_fixed_size_abi_v< 4025 _Ap> && __fixed_size_storage_t<_Tp, _Np>::_S_first_size == _N0) 4026 { 4027 static_assert( 4028 !__is_fixed_size_abi_v<typename _V::abi_type>, 4029 "How can <_Tp, _Np> be __a single _SimdTuple entry but __a " 4030 "fixed_size_simd " 4031 "when deduced?"); 4032 // extract first and recurse (__split_wrapper is needed to deduce a new 4033 // _Sizes pack) 4034 return tuple_cat(make_tuple(_V(__private_init, __data(__x).first)), 4035 __split_wrapper(_SL::template _S_pop_front<1>(), 4036 __data(__x).second)); 4037 } 4038 else if constexpr ((!is_same_v<simd_abi::scalar, 4039 simd_abi::deduce_t<_Tp, _Sizes>> && ...) 4040 && (!__is_fixed_size_abi_v< 4041 simd_abi::deduce_t<_Tp, _Sizes>> && ...)) 4042 { 4043 if constexpr (((_Sizes * 2 == _Np) && ...)) 4044 return {{__private_init, __extract_part<0, 2>(__data(__x))}, 4045 {__private_init, __extract_part<1, 2>(__data(__x))}}; 4046 else if constexpr (is_same_v<_SizeList<_Sizes...>, 4047 _SizeList<_Np / 3, _Np / 3, _Np / 3>>) 4048 return {{__private_init, __extract_part<0, 3>(__data(__x))}, 4049 {__private_init, __extract_part<1, 3>(__data(__x))}, 4050 {__private_init, __extract_part<2, 3>(__data(__x))}}; 4051 else if constexpr (is_same_v<_SizeList<_Sizes...>, 4052 _SizeList<2 * _Np / 3, _Np / 3>>) 4053 return {{__private_init, __extract_part<0, 3, 2>(__data(__x))}, 4054 {__private_init, __extract_part<2, 3>(__data(__x))}}; 4055 else if constexpr (is_same_v<_SizeList<_Sizes...>, 4056 _SizeList<_Np / 3, 2 * _Np / 3>>) 4057 return {{__private_init, __extract_part<0, 3>(__data(__x))}, 4058 {__private_init, __extract_part<1, 3, 2>(__data(__x))}}; 4059 else if constexpr (is_same_v<_SizeList<_Sizes...>, 4060 _SizeList<_Np / 2, _Np / 4, _Np / 4>>) 4061 return {{__private_init, __extract_part<0, 2>(__data(__x))}, 4062 {__private_init, __extract_part<2, 4>(__data(__x))}, 4063 {__private_init, __extract_part<3, 4>(__data(__x))}}; 4064 else if constexpr (is_same_v<_SizeList<_Sizes...>, 4065 _SizeList<_Np / 4, _Np / 4, _Np / 2>>) 4066 return {{__private_init, __extract_part<0, 4>(__data(__x))}, 4067 {__private_init, __extract_part<1, 4>(__data(__x))}, 4068 {__private_init, __extract_part<1, 2>(__data(__x))}}; 4069 else if constexpr (is_same_v<_SizeList<_Sizes...>, 4070 _SizeList<_Np / 4, _Np / 2, _Np / 4>>) 4071 return {{__private_init, __extract_part<0, 4>(__data(__x))}, 4072 {__private_init, __extract_center(__data(__x))}, 4073 {__private_init, __extract_part<3, 4>(__data(__x))}}; 4074 else if constexpr (((_Sizes * 4 == _Np) && ...)) 4075 return {{__private_init, __extract_part<0, 4>(__data(__x))}, 4076 {__private_init, __extract_part<1, 4>(__data(__x))}, 4077 {__private_init, __extract_part<2, 4>(__data(__x))}, 4078 {__private_init, __extract_part<3, 4>(__data(__x))}}; 4079 // else fall through 4080 } 4081 #ifdef _GLIBCXX_SIMD_USE_ALIASING_LOADS 4082 const __may_alias<_Tp>* const __element_ptr 4083 = reinterpret_cast<const __may_alias<_Tp>*>(&__x); 4084 return __generate_from_n_evaluations<sizeof...(_Sizes), _Tuple>([&]( 4085 auto __i) constexpr { 4086 using _Vi = __deduced_simd<_Tp, _SL::_S_at(__i)>; 4087 constexpr size_t __offset = _SL::_S_before(__i); 4088 constexpr size_t __base_align = alignof(simd<_Tp, _Ap>); 4089 constexpr size_t __a 4090 = __base_align - ((__offset * sizeof(_Tp)) % __base_align); 4091 constexpr size_t __b = ((__a - 1) & __a) ^ __a; 4092 constexpr size_t __alignment = __b == 0 ? __a : __b; 4093 return _Vi(__element_ptr + __offset, overaligned<__alignment>); 4094 }); 4095 #else 4096 return __generate_from_n_evaluations<sizeof...(_Sizes), _Tuple>([&]( 4097 auto __i) constexpr { 4098 using _Vi = __deduced_simd<_Tp, _SL::_S_at(__i)>; 4099 const auto& __xx = __data(__x); 4100 using _Offset = decltype(_SL::_S_before(__i)); 4101 return _Vi([&](auto __j) constexpr { 4102 constexpr _SizeConstant<_Offset::value + __j> __k; 4103 return __xx[__k]; 4104 }); 4105 }); 4106 #endif 4107 } 4108 4109 // }}} 4110 4111 // __subscript_in_pack {{{ 4112 template <size_t _I, typename _Tp, typename _Ap, typename... _As> 4113 _GLIBCXX_SIMD_INTRINSIC constexpr _Tp 4114 __subscript_in_pack(const simd<_Tp, _Ap>& __x, const simd<_Tp, _As>&... __xs) 4115 { 4116 if constexpr (_I < simd_size_v<_Tp, _Ap>) 4117 return __x[_I]; 4118 else 4119 return __subscript_in_pack<_I - simd_size_v<_Tp, _Ap>>(__xs...); 4120 } 4121 4122 // }}} 4123 // __store_pack_of_simd {{{ 4124 template <typename _Tp, typename _A0, typename... _As> 4125 _GLIBCXX_SIMD_INTRINSIC void 4126 __store_pack_of_simd(char* __mem, const simd<_Tp, _A0>& __x0, 4127 const simd<_Tp, _As>&... __xs) 4128 { 4129 constexpr size_t __n_bytes = sizeof(_Tp) * simd_size_v<_Tp, _A0>; 4130 __builtin_memcpy(__mem, &__data(__x0), __n_bytes); 4131 if constexpr (sizeof...(__xs) > 0) 4132 __store_pack_of_simd(__mem + __n_bytes, __xs...); 4133 } 4134 4135 // }}} 4136 // concat(simd...) {{{ 4137 template <typename _Tp, typename... _As, typename = __detail::__odr_helper> 4138 inline _GLIBCXX_SIMD_CONSTEXPR 4139 simd<_Tp, simd_abi::deduce_t<_Tp, (simd_size_v<_Tp, _As> + ...)>> 4140 concat(const simd<_Tp, _As>&... __xs) 4141 { 4142 using _Rp = __deduced_simd<_Tp, (simd_size_v<_Tp, _As> + ...)>; 4143 if constexpr (sizeof...(__xs) == 1) 4144 return simd_cast<_Rp>(__xs...); 4145 else if ((... && __xs._M_is_constprop())) 4146 return simd<_Tp, 4147 simd_abi::deduce_t<_Tp, (simd_size_v<_Tp, _As> + ...)>>([&]( 4148 auto __i) constexpr { return __subscript_in_pack<__i>(__xs...); }); 4149 else 4150 { 4151 _Rp __r{}; 4152 __store_pack_of_simd(reinterpret_cast<char*>(&__data(__r)), __xs...); 4153 return __r; 4154 } 4155 } 4156 4157 // }}} 4158 // concat(array<simd>) {{{ 4159 template <typename _Tp, typename _Abi, size_t _Np> 4160 _GLIBCXX_SIMD_ALWAYS_INLINE 4161 _GLIBCXX_SIMD_CONSTEXPR __deduced_simd<_Tp, simd_size_v<_Tp, _Abi> * _Np> 4162 concat(const array<simd<_Tp, _Abi>, _Np>& __x) 4163 { 4164 return __call_with_subscripts<_Np>(__x, [](const auto&... __xs) { 4165 return concat(__xs...); 4166 }); 4167 } 4168 4169 // }}} 4170 4171 /// @cond undocumented 4172 // _SmartReference {{{ 4173 template <typename _Up, typename _Accessor = _Up, 4174 typename _ValueType = typename _Up::value_type> 4175 class _SmartReference 4176 { 4177 friend _Accessor; 4178 int _M_index; 4179 _Up& _M_obj; 4180 4181 _GLIBCXX_SIMD_INTRINSIC constexpr _ValueType _M_read() const noexcept 4182 { 4183 if constexpr (is_arithmetic_v<_Up>) 4184 return _M_obj; 4185 else 4186 return _M_obj[_M_index]; 4187 } 4188 4189 template <typename _Tp> 4190 _GLIBCXX_SIMD_INTRINSIC constexpr void _M_write(_Tp&& __x) const 4191 { _Accessor::_S_set(_M_obj, _M_index, static_cast<_Tp&&>(__x)); } 4192 4193 public: 4194 _GLIBCXX_SIMD_INTRINSIC constexpr 4195 _SmartReference(_Up& __o, int __i) noexcept 4196 : _M_index(__i), _M_obj(__o) {} 4197 4198 using value_type = _ValueType; 4199 4200 _GLIBCXX_SIMD_INTRINSIC _SmartReference(const _SmartReference&) = delete; 4201 4202 _GLIBCXX_SIMD_INTRINSIC constexpr operator value_type() const noexcept 4203 { return _M_read(); } 4204 4205 template <typename _Tp, 4206 typename 4207 = _ValuePreservingOrInt<__remove_cvref_t<_Tp>, value_type>> 4208 _GLIBCXX_SIMD_INTRINSIC constexpr _SmartReference operator=(_Tp&& __x) && 4209 { 4210 _M_write(static_cast<_Tp&&>(__x)); 4211 return {_M_obj, _M_index}; 4212 } 4213 4214 #define _GLIBCXX_SIMD_OP_(__op) \ 4215 template <typename _Tp, \ 4216 typename _TT \ 4217 = decltype(declval<value_type>() __op declval<_Tp>()), \ 4218 typename = _ValuePreservingOrInt<__remove_cvref_t<_Tp>, _TT>, \ 4219 typename = _ValuePreservingOrInt<_TT, value_type>> \ 4220 _GLIBCXX_SIMD_INTRINSIC constexpr _SmartReference \ 4221 operator __op##=(_Tp&& __x) && \ 4222 { \ 4223 const value_type& __lhs = _M_read(); \ 4224 _M_write(__lhs __op __x); \ 4225 return {_M_obj, _M_index}; \ 4226 } 4227 _GLIBCXX_SIMD_ALL_ARITHMETICS(_GLIBCXX_SIMD_OP_); 4228 _GLIBCXX_SIMD_ALL_SHIFTS(_GLIBCXX_SIMD_OP_); 4229 _GLIBCXX_SIMD_ALL_BINARY(_GLIBCXX_SIMD_OP_); 4230 #undef _GLIBCXX_SIMD_OP_ 4231 4232 template <typename _Tp = void, 4233 typename 4234 = decltype(++declval<conditional_t<true, value_type, _Tp>&>())> 4235 _GLIBCXX_SIMD_INTRINSIC constexpr _SmartReference operator++() && 4236 { 4237 value_type __x = _M_read(); 4238 _M_write(++__x); 4239 return {_M_obj, _M_index}; 4240 } 4241 4242 template <typename _Tp = void, 4243 typename 4244 = decltype(declval<conditional_t<true, value_type, _Tp>&>()++)> 4245 _GLIBCXX_SIMD_INTRINSIC constexpr value_type operator++(int) && 4246 { 4247 const value_type __r = _M_read(); 4248 value_type __x = __r; 4249 _M_write(++__x); 4250 return __r; 4251 } 4252 4253 template <typename _Tp = void, 4254 typename 4255 = decltype(--declval<conditional_t<true, value_type, _Tp>&>())> 4256 _GLIBCXX_SIMD_INTRINSIC constexpr _SmartReference operator--() && 4257 { 4258 value_type __x = _M_read(); 4259 _M_write(--__x); 4260 return {_M_obj, _M_index}; 4261 } 4262 4263 template <typename _Tp = void, 4264 typename 4265 = decltype(declval<conditional_t<true, value_type, _Tp>&>()--)> 4266 _GLIBCXX_SIMD_INTRINSIC constexpr value_type operator--(int) && 4267 { 4268 const value_type __r = _M_read(); 4269 value_type __x = __r; 4270 _M_write(--__x); 4271 return __r; 4272 } 4273 4274 _GLIBCXX_SIMD_INTRINSIC friend void 4275 swap(_SmartReference&& __a, _SmartReference&& __b) noexcept( 4276 conjunction< 4277 is_nothrow_constructible<value_type, _SmartReference&&>, 4278 is_nothrow_assignable<_SmartReference&&, value_type&&>>::value) 4279 { 4280 value_type __tmp = static_cast<_SmartReference&&>(__a); 4281 static_cast<_SmartReference&&>(__a) = static_cast<value_type>(__b); 4282 static_cast<_SmartReference&&>(__b) = std::move(__tmp); 4283 } 4284 4285 _GLIBCXX_SIMD_INTRINSIC friend void 4286 swap(value_type& __a, _SmartReference&& __b) noexcept( 4287 conjunction< 4288 is_nothrow_constructible<value_type, value_type&&>, 4289 is_nothrow_assignable<value_type&, value_type&&>, 4290 is_nothrow_assignable<_SmartReference&&, value_type&&>>::value) 4291 { 4292 value_type __tmp(std::move(__a)); 4293 __a = static_cast<value_type>(__b); 4294 static_cast<_SmartReference&&>(__b) = std::move(__tmp); 4295 } 4296 4297 _GLIBCXX_SIMD_INTRINSIC friend void 4298 swap(_SmartReference&& __a, value_type& __b) noexcept( 4299 conjunction< 4300 is_nothrow_constructible<value_type, _SmartReference&&>, 4301 is_nothrow_assignable<value_type&, value_type&&>, 4302 is_nothrow_assignable<_SmartReference&&, value_type&&>>::value) 4303 { 4304 value_type __tmp(__a); 4305 static_cast<_SmartReference&&>(__a) = std::move(__b); 4306 __b = std::move(__tmp); 4307 } 4308 }; 4309 4310 // }}} 4311 // __scalar_abi_wrapper {{{ 4312 template <int _Bytes> 4313 struct __scalar_abi_wrapper 4314 { 4315 template <typename _Tp> static constexpr size_t _S_full_size = 1; 4316 template <typename _Tp> static constexpr size_t _S_size = 1; 4317 template <typename _Tp> static constexpr size_t _S_is_partial = false; 4318 4319 template <typename _Tp, typename _Abi = simd_abi::scalar> 4320 static constexpr bool _S_is_valid_v 4321 = _Abi::template _IsValid<_Tp>::value && sizeof(_Tp) == _Bytes; 4322 }; 4323 4324 // }}} 4325 // __decay_abi metafunction {{{ 4326 template <typename _Tp> 4327 struct __decay_abi { using type = _Tp; }; 4328 4329 template <int _Bytes> 4330 struct __decay_abi<__scalar_abi_wrapper<_Bytes>> 4331 { using type = simd_abi::scalar; }; 4332 4333 // }}} 4334 // __find_next_valid_abi metafunction {{{1 4335 // Given an ABI tag A<N>, find an N2 < N such that A<N2>::_S_is_valid_v<_Tp> == 4336 // true, N2 is a power-of-2, and A<N2>::_S_is_partial<_Tp> is false. Break 4337 // recursion at 2 elements in the resulting ABI tag. In this case 4338 // type::_S_is_valid_v<_Tp> may be false. 4339 template <template <int> class _Abi, int _Bytes, typename _Tp> 4340 struct __find_next_valid_abi 4341 { 4342 static constexpr auto _S_choose() 4343 { 4344 constexpr int _NextBytes = std::__bit_ceil(_Bytes) / 2; 4345 using _NextAbi = _Abi<_NextBytes>; 4346 if constexpr (_NextBytes < sizeof(_Tp) * 2) // break recursion 4347 return _Abi<_Bytes>(); 4348 else if constexpr (_NextAbi::template _S_is_partial<_Tp> == false 4349 && _NextAbi::template _S_is_valid_v<_Tp>) 4350 return _NextAbi(); 4351 else 4352 return __find_next_valid_abi<_Abi, _NextBytes, _Tp>::_S_choose(); 4353 } 4354 4355 using type = decltype(_S_choose()); 4356 }; 4357 4358 template <int _Bytes, typename _Tp> 4359 struct __find_next_valid_abi<__scalar_abi_wrapper, _Bytes, _Tp> 4360 { using type = simd_abi::scalar; }; 4361 4362 // _AbiList {{{1 4363 template <template <int> class...> 4364 struct _AbiList 4365 { 4366 template <typename, int> static constexpr bool _S_has_valid_abi = false; 4367 template <typename, int> using _FirstValidAbi = void; 4368 template <typename, int> using _BestAbi = void; 4369 }; 4370 4371 template <template <int> class _A0, template <int> class... _Rest> 4372 struct _AbiList<_A0, _Rest...> 4373 { 4374 template <typename _Tp, int _Np> 4375 static constexpr bool _S_has_valid_abi 4376 = _A0<sizeof(_Tp) * _Np>::template _S_is_valid_v< 4377 _Tp> || _AbiList<_Rest...>::template _S_has_valid_abi<_Tp, _Np>; 4378 4379 template <typename _Tp, int _Np> 4380 using _FirstValidAbi = conditional_t< 4381 _A0<sizeof(_Tp) * _Np>::template _S_is_valid_v<_Tp>, 4382 typename __decay_abi<_A0<sizeof(_Tp) * _Np>>::type, 4383 typename _AbiList<_Rest...>::template _FirstValidAbi<_Tp, _Np>>; 4384 4385 template <typename _Tp, int _Np> 4386 static constexpr auto _S_determine_best_abi() 4387 { 4388 static_assert(_Np >= 1); 4389 constexpr int _Bytes = sizeof(_Tp) * _Np; 4390 if constexpr (_Np == 1) 4391 return __make_dependent_t<_Tp, simd_abi::scalar>{}; 4392 else 4393 { 4394 constexpr int __fullsize = _A0<_Bytes>::template _S_full_size<_Tp>; 4395 // _A0<_Bytes> is good if: 4396 // 1. The ABI tag is valid for _Tp 4397 // 2. The storage overhead is no more than padding to fill the next 4398 // power-of-2 number of bytes 4399 if constexpr (_A0<_Bytes>::template _S_is_valid_v< 4400 _Tp> && __fullsize / 2 < _Np) 4401 return typename __decay_abi<_A0<_Bytes>>::type{}; 4402 else 4403 { 4404 using _Bp = 4405 typename __find_next_valid_abi<_A0, _Bytes, _Tp>::type; 4406 if constexpr (_Bp::template _S_is_valid_v< 4407 _Tp> && _Bp::template _S_size<_Tp> <= _Np) 4408 return _Bp{}; 4409 else 4410 return 4411 typename _AbiList<_Rest...>::template _BestAbi<_Tp, _Np>{}; 4412 } 4413 } 4414 } 4415 4416 template <typename _Tp, int _Np> 4417 using _BestAbi = decltype(_S_determine_best_abi<_Tp, _Np>()); 4418 }; 4419 4420 // }}}1 4421 4422 // the following lists all native ABIs, which makes them accessible to 4423 // simd_abi::deduce and select_best_vector_type_t (for fixed_size). Order 4424 // matters: Whatever comes first has higher priority. 4425 using _AllNativeAbis = _AbiList<simd_abi::_VecBltnBtmsk, simd_abi::_VecBuiltin, 4426 __scalar_abi_wrapper>; 4427 4428 // valid _SimdTraits specialization {{{1 4429 template <typename _Tp, typename _Abi> 4430 struct _SimdTraits<_Tp, _Abi, void_t<typename _Abi::template _IsValid<_Tp>>> 4431 : _Abi::template __traits<_Tp> {}; 4432 4433 // __deduce_impl specializations {{{1 4434 // try all native ABIs (including scalar) first 4435 template <typename _Tp, size_t _Np> 4436 struct __deduce_impl< 4437 _Tp, _Np, enable_if_t<_AllNativeAbis::template _S_has_valid_abi<_Tp, _Np>>> 4438 { using type = _AllNativeAbis::_FirstValidAbi<_Tp, _Np>; }; 4439 4440 // fall back to fixed_size only if scalar and native ABIs don't match 4441 template <typename _Tp, size_t _Np, typename = void> 4442 struct __deduce_fixed_size_fallback {}; 4443 4444 template <typename _Tp, size_t _Np> 4445 struct __deduce_fixed_size_fallback<_Tp, _Np, 4446 enable_if_t<simd_abi::fixed_size<_Np>::template _S_is_valid_v<_Tp>>> 4447 { using type = simd_abi::fixed_size<_Np>; }; 4448 4449 template <typename _Tp, size_t _Np, typename> 4450 struct __deduce_impl : public __deduce_fixed_size_fallback<_Tp, _Np> {}; 4451 4452 //}}}1 4453 /// @endcond 4454 4455 // simd_mask {{{ 4456 template <typename _Tp, typename _Abi> 4457 class simd_mask : public _SimdTraits<_Tp, _Abi>::_MaskBase 4458 { 4459 // types, tags, and friends {{{ 4460 using _Traits = _SimdTraits<_Tp, _Abi>; 4461 using _MemberType = typename _Traits::_MaskMember; 4462 4463 // We map all masks with equal element sizeof to a single integer type, the 4464 // one given by __int_for_sizeof_t<_Tp>. This is the approach 4465 // [[gnu::vector_size(N)]] types take as well and it reduces the number of 4466 // template specializations in the implementation classes. 4467 using _Ip = __int_for_sizeof_t<_Tp>; 4468 static constexpr _Ip* _S_type_tag = nullptr; 4469 4470 friend typename _Traits::_MaskBase; 4471 friend class simd<_Tp, _Abi>; // to construct masks on return 4472 friend typename _Traits::_SimdImpl; // to construct masks on return and 4473 // inspect data on masked operations 4474 public: 4475 using _Impl = typename _Traits::_MaskImpl; 4476 friend _Impl; 4477 4478 // }}} 4479 // member types {{{ 4480 using value_type = bool; 4481 using reference = _SmartReference<_MemberType, _Impl, value_type>; 4482 using simd_type = simd<_Tp, _Abi>; 4483 using abi_type = _Abi; 4484 4485 // }}} 4486 static constexpr size_t size() // {{{ 4487 { return __size_or_zero_v<_Tp, _Abi>; } 4488 4489 // }}} 4490 // constructors & assignment {{{ 4491 simd_mask() = default; 4492 simd_mask(const simd_mask&) = default; 4493 simd_mask(simd_mask&&) = default; 4494 simd_mask& operator=(const simd_mask&) = default; 4495 simd_mask& operator=(simd_mask&&) = default; 4496 4497 // }}} 4498 // access to internal representation (optional feature) {{{ 4499 _GLIBCXX_SIMD_ALWAYS_INLINE explicit 4500 simd_mask(typename _Traits::_MaskCastType __init) 4501 : _M_data{__init} {} 4502 // conversions to internal type is done in _MaskBase 4503 4504 // }}} 4505 // bitset interface (extension to be proposed) {{{ 4506 // TS_FEEDBACK: 4507 // Conversion of simd_mask to and from bitset makes it much easier to 4508 // interface with other facilities. I suggest adding `static 4509 // simd_mask::from_bitset` and `simd_mask::to_bitset`. 4510 _GLIBCXX_SIMD_ALWAYS_INLINE static simd_mask 4511 __from_bitset(bitset<size()> bs) 4512 { return {__bitset_init, bs}; } 4513 4514 _GLIBCXX_SIMD_ALWAYS_INLINE bitset<size()> 4515 __to_bitset() const 4516 { return _Impl::_S_to_bits(_M_data)._M_to_bitset(); } 4517 4518 // }}} 4519 // explicit broadcast constructor {{{ 4520 _GLIBCXX_SIMD_ALWAYS_INLINE explicit _GLIBCXX_SIMD_CONSTEXPR 4521 simd_mask(value_type __x) 4522 : _M_data(_Impl::template _S_broadcast<_Ip>(__x)) {} 4523 4524 // }}} 4525 // implicit type conversion constructor {{{ 4526 #ifdef _GLIBCXX_SIMD_ENABLE_IMPLICIT_MASK_CAST 4527 // proposed improvement 4528 template <typename _Up, typename _A2, 4529 typename = enable_if_t<simd_size_v<_Up, _A2> == size()>> 4530 _GLIBCXX_SIMD_ALWAYS_INLINE explicit(sizeof(_MemberType) 4531 != sizeof(typename _SimdTraits<_Up, _A2>::_MaskMember)) 4532 simd_mask(const simd_mask<_Up, _A2>& __x) 4533 : simd_mask(__proposed::static_simd_cast<simd_mask>(__x)) {} 4534 #else 4535 // conforming to ISO/IEC 19570:2018 4536 template <typename _Up, typename = enable_if_t<conjunction< 4537 is_same<abi_type, simd_abi::fixed_size<size()>>, 4538 is_same<_Up, _Up>>::value>> 4539 _GLIBCXX_SIMD_ALWAYS_INLINE 4540 simd_mask(const simd_mask<_Up, simd_abi::fixed_size<size()>>& __x) 4541 : _M_data(_Impl::_S_from_bitmask(__data(__x), _S_type_tag)) {} 4542 #endif 4543 4544 // }}} 4545 // load constructor {{{ 4546 template <typename _Flags> 4547 _GLIBCXX_SIMD_ALWAYS_INLINE 4548 simd_mask(const value_type* __mem, _Flags) 4549 : _M_data(_Impl::template _S_load<_Ip>( 4550 _Flags::template _S_apply<simd_mask>(__mem))) {} 4551 4552 template <typename _Flags> 4553 _GLIBCXX_SIMD_ALWAYS_INLINE 4554 simd_mask(const value_type* __mem, simd_mask __k, _Flags) 4555 : _M_data{} 4556 { 4557 _M_data 4558 = _Impl::_S_masked_load(_M_data, __k._M_data, 4559 _Flags::template _S_apply<simd_mask>(__mem)); 4560 } 4561 4562 // }}} 4563 // loads [simd_mask.load] {{{ 4564 template <typename _Flags> 4565 _GLIBCXX_SIMD_ALWAYS_INLINE void 4566 copy_from(const value_type* __mem, _Flags) 4567 { 4568 _M_data = _Impl::template _S_load<_Ip>( 4569 _Flags::template _S_apply<simd_mask>(__mem)); 4570 } 4571 4572 // }}} 4573 // stores [simd_mask.store] {{{ 4574 template <typename _Flags> 4575 _GLIBCXX_SIMD_ALWAYS_INLINE void 4576 copy_to(value_type* __mem, _Flags) const 4577 { _Impl::_S_store(_M_data, _Flags::template _S_apply<simd_mask>(__mem)); } 4578 4579 // }}} 4580 // scalar access {{{ 4581 _GLIBCXX_SIMD_ALWAYS_INLINE reference 4582 operator[](size_t __i) 4583 { 4584 if (__i >= size()) 4585 __invoke_ub("Subscript %d is out of range [0, %d]", __i, size() - 1); 4586 return {_M_data, int(__i)}; 4587 } 4588 4589 _GLIBCXX_SIMD_ALWAYS_INLINE value_type 4590 operator[](size_t __i) const 4591 { 4592 if (__i >= size()) 4593 __invoke_ub("Subscript %d is out of range [0, %d]", __i, size() - 1); 4594 if constexpr (__is_scalar_abi<_Abi>()) 4595 return _M_data; 4596 else 4597 return static_cast<bool>(_M_data[__i]); 4598 } 4599 4600 // }}} 4601 // negation {{{ 4602 _GLIBCXX_SIMD_ALWAYS_INLINE simd_mask 4603 operator!() const 4604 { return {__private_init, _Impl::_S_bit_not(_M_data)}; } 4605 4606 // }}} 4607 // simd_mask binary operators [simd_mask.binary] {{{ 4608 #ifdef _GLIBCXX_SIMD_ENABLE_IMPLICIT_MASK_CAST 4609 // simd_mask<int> && simd_mask<uint> needs disambiguation 4610 template <typename _Up, typename _A2, 4611 typename 4612 = enable_if_t<is_convertible_v<simd_mask<_Up, _A2>, simd_mask>>> 4613 _GLIBCXX_SIMD_ALWAYS_INLINE friend simd_mask 4614 operator&&(const simd_mask& __x, const simd_mask<_Up, _A2>& __y) 4615 { 4616 return {__private_init, 4617 _Impl::_S_logical_and(__x._M_data, simd_mask(__y)._M_data)}; 4618 } 4619 4620 template <typename _Up, typename _A2, 4621 typename 4622 = enable_if_t<is_convertible_v<simd_mask<_Up, _A2>, simd_mask>>> 4623 _GLIBCXX_SIMD_ALWAYS_INLINE friend simd_mask 4624 operator||(const simd_mask& __x, const simd_mask<_Up, _A2>& __y) 4625 { 4626 return {__private_init, 4627 _Impl::_S_logical_or(__x._M_data, simd_mask(__y)._M_data)}; 4628 } 4629 #endif // _GLIBCXX_SIMD_ENABLE_IMPLICIT_MASK_CAST 4630 4631 _GLIBCXX_SIMD_ALWAYS_INLINE friend simd_mask 4632 operator&&(const simd_mask& __x, const simd_mask& __y) 4633 { 4634 return {__private_init, _Impl::_S_logical_and(__x._M_data, __y._M_data)}; 4635 } 4636 4637 _GLIBCXX_SIMD_ALWAYS_INLINE friend simd_mask 4638 operator||(const simd_mask& __x, const simd_mask& __y) 4639 { 4640 return {__private_init, _Impl::_S_logical_or(__x._M_data, __y._M_data)}; 4641 } 4642 4643 _GLIBCXX_SIMD_ALWAYS_INLINE friend simd_mask 4644 operator&(const simd_mask& __x, const simd_mask& __y) 4645 { return {__private_init, _Impl::_S_bit_and(__x._M_data, __y._M_data)}; } 4646 4647 _GLIBCXX_SIMD_ALWAYS_INLINE friend simd_mask 4648 operator|(const simd_mask& __x, const simd_mask& __y) 4649 { return {__private_init, _Impl::_S_bit_or(__x._M_data, __y._M_data)}; } 4650 4651 _GLIBCXX_SIMD_ALWAYS_INLINE friend simd_mask 4652 operator^(const simd_mask& __x, const simd_mask& __y) 4653 { return {__private_init, _Impl::_S_bit_xor(__x._M_data, __y._M_data)}; } 4654 4655 _GLIBCXX_SIMD_ALWAYS_INLINE friend simd_mask& 4656 operator&=(simd_mask& __x, const simd_mask& __y) 4657 { 4658 __x._M_data = _Impl::_S_bit_and(__x._M_data, __y._M_data); 4659 return __x; 4660 } 4661 4662 _GLIBCXX_SIMD_ALWAYS_INLINE friend simd_mask& 4663 operator|=(simd_mask& __x, const simd_mask& __y) 4664 { 4665 __x._M_data = _Impl::_S_bit_or(__x._M_data, __y._M_data); 4666 return __x; 4667 } 4668 4669 _GLIBCXX_SIMD_ALWAYS_INLINE friend simd_mask& 4670 operator^=(simd_mask& __x, const simd_mask& __y) 4671 { 4672 __x._M_data = _Impl::_S_bit_xor(__x._M_data, __y._M_data); 4673 return __x; 4674 } 4675 4676 // }}} 4677 // simd_mask compares [simd_mask.comparison] {{{ 4678 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask 4679 operator==(const simd_mask& __x, const simd_mask& __y) 4680 { return !operator!=(__x, __y); } 4681 4682 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask 4683 operator!=(const simd_mask& __x, const simd_mask& __y) 4684 { return {__private_init, _Impl::_S_bit_xor(__x._M_data, __y._M_data)}; } 4685 4686 // }}} 4687 // private_init ctor {{{ 4688 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR 4689 simd_mask(_PrivateInit, typename _Traits::_MaskMember __init) 4690 : _M_data(__init) {} 4691 4692 // }}} 4693 // private_init generator ctor {{{ 4694 template <typename _Fp, typename = decltype(bool(declval<_Fp>()(size_t())))> 4695 _GLIBCXX_SIMD_INTRINSIC constexpr 4696 simd_mask(_PrivateInit, _Fp&& __gen) 4697 : _M_data() 4698 { 4699 __execute_n_times<size()>([&](auto __i) constexpr { 4700 _Impl::_S_set(_M_data, __i, __gen(__i)); 4701 }); 4702 } 4703 4704 // }}} 4705 // bitset_init ctor {{{ 4706 _GLIBCXX_SIMD_INTRINSIC simd_mask(_BitsetInit, bitset<size()> __init) 4707 : _M_data( 4708 _Impl::_S_from_bitmask(_SanitizedBitMask<size()>(__init), _S_type_tag)) 4709 {} 4710 4711 // }}} 4712 // __cvt {{{ 4713 // TS_FEEDBACK: 4714 // The conversion operator this implements should be a ctor on simd_mask. 4715 // Once you call .__cvt() on a simd_mask it converts conveniently. 4716 // A useful variation: add `explicit(sizeof(_Tp) != sizeof(_Up))` 4717 struct _CvtProxy 4718 { 4719 template <typename _Up, typename _A2, 4720 typename 4721 = enable_if_t<simd_size_v<_Up, _A2> == simd_size_v<_Tp, _Abi>>> 4722 _GLIBCXX_SIMD_ALWAYS_INLINE 4723 operator simd_mask<_Up, _A2>() && 4724 { 4725 using namespace std::experimental::__proposed; 4726 return static_simd_cast<simd_mask<_Up, _A2>>(_M_data); 4727 } 4728 4729 const simd_mask<_Tp, _Abi>& _M_data; 4730 }; 4731 4732 _GLIBCXX_SIMD_INTRINSIC _CvtProxy 4733 __cvt() const 4734 { return {*this}; } 4735 4736 // }}} 4737 // operator?: overloads (suggested extension) {{{ 4738 #ifdef __GXX_CONDITIONAL_IS_OVERLOADABLE__ 4739 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask 4740 operator?:(const simd_mask& __k, const simd_mask& __where_true, 4741 const simd_mask& __where_false) 4742 { 4743 auto __ret = __where_false; 4744 _Impl::_S_masked_assign(__k._M_data, __ret._M_data, __where_true._M_data); 4745 return __ret; 4746 } 4747 4748 template <typename _U1, typename _U2, 4749 typename _Rp = simd<common_type_t<_U1, _U2>, _Abi>, 4750 typename = enable_if_t<conjunction_v< 4751 is_convertible<_U1, _Rp>, is_convertible<_U2, _Rp>, 4752 is_convertible<simd_mask, typename _Rp::mask_type>>>> 4753 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend _Rp 4754 operator?:(const simd_mask& __k, const _U1& __where_true, 4755 const _U2& __where_false) 4756 { 4757 _Rp __ret = __where_false; 4758 _Rp::_Impl::_S_masked_assign( 4759 __data(static_cast<typename _Rp::mask_type>(__k)), __data(__ret), 4760 __data(static_cast<_Rp>(__where_true))); 4761 return __ret; 4762 } 4763 4764 #ifdef _GLIBCXX_SIMD_ENABLE_IMPLICIT_MASK_CAST 4765 template <typename _Kp, typename _Ak, typename _Up, typename _Au, 4766 typename = enable_if_t< 4767 conjunction_v<is_convertible<simd_mask<_Kp, _Ak>, simd_mask>, 4768 is_convertible<simd_mask<_Up, _Au>, simd_mask>>>> 4769 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask 4770 operator?:(const simd_mask<_Kp, _Ak>& __k, const simd_mask& __where_true, 4771 const simd_mask<_Up, _Au>& __where_false) 4772 { 4773 simd_mask __ret = __where_false; 4774 _Impl::_S_masked_assign(simd_mask(__k)._M_data, __ret._M_data, 4775 __where_true._M_data); 4776 return __ret; 4777 } 4778 #endif // _GLIBCXX_SIMD_ENABLE_IMPLICIT_MASK_CAST 4779 #endif // __GXX_CONDITIONAL_IS_OVERLOADABLE__ 4780 4781 // }}} 4782 // _M_is_constprop {{{ 4783 _GLIBCXX_SIMD_INTRINSIC constexpr bool 4784 _M_is_constprop() const 4785 { 4786 if constexpr (__is_scalar_abi<_Abi>()) 4787 return __builtin_constant_p(_M_data); 4788 else 4789 return _M_data._M_is_constprop(); 4790 } 4791 4792 // }}} 4793 4794 private: 4795 friend const auto& __data<_Tp, abi_type>(const simd_mask&); 4796 friend auto& __data<_Tp, abi_type>(simd_mask&); 4797 alignas(_Traits::_S_mask_align) _MemberType _M_data; 4798 }; 4799 4800 // }}} 4801 4802 /// @cond undocumented 4803 // __data(simd_mask) {{{ 4804 template <typename _Tp, typename _Ap> 4805 _GLIBCXX_SIMD_INTRINSIC constexpr const auto& 4806 __data(const simd_mask<_Tp, _Ap>& __x) 4807 { return __x._M_data; } 4808 4809 template <typename _Tp, typename _Ap> 4810 _GLIBCXX_SIMD_INTRINSIC constexpr auto& 4811 __data(simd_mask<_Tp, _Ap>& __x) 4812 { return __x._M_data; } 4813 4814 // }}} 4815 /// @endcond 4816 4817 // simd_mask reductions [simd_mask.reductions] {{{ 4818 template <typename _Tp, typename _Abi> 4819 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR bool 4820 all_of(const simd_mask<_Tp, _Abi>& __k) noexcept 4821 { 4822 if (__builtin_is_constant_evaluated() || __k._M_is_constprop()) 4823 { 4824 for (size_t __i = 0; __i < simd_size_v<_Tp, _Abi>; ++__i) 4825 if (!__k[__i]) 4826 return false; 4827 return true; 4828 } 4829 else 4830 return _Abi::_MaskImpl::_S_all_of(__k); 4831 } 4832 4833 template <typename _Tp, typename _Abi> 4834 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR bool 4835 any_of(const simd_mask<_Tp, _Abi>& __k) noexcept 4836 { 4837 if (__builtin_is_constant_evaluated() || __k._M_is_constprop()) 4838 { 4839 for (size_t __i = 0; __i < simd_size_v<_Tp, _Abi>; ++__i) 4840 if (__k[__i]) 4841 return true; 4842 return false; 4843 } 4844 else 4845 return _Abi::_MaskImpl::_S_any_of(__k); 4846 } 4847 4848 template <typename _Tp, typename _Abi> 4849 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR bool 4850 none_of(const simd_mask<_Tp, _Abi>& __k) noexcept 4851 { 4852 if (__builtin_is_constant_evaluated() || __k._M_is_constprop()) 4853 { 4854 for (size_t __i = 0; __i < simd_size_v<_Tp, _Abi>; ++__i) 4855 if (__k[__i]) 4856 return false; 4857 return true; 4858 } 4859 else 4860 return _Abi::_MaskImpl::_S_none_of(__k); 4861 } 4862 4863 template <typename _Tp, typename _Abi> 4864 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR bool 4865 some_of(const simd_mask<_Tp, _Abi>& __k) noexcept 4866 { 4867 if (__builtin_is_constant_evaluated() || __k._M_is_constprop()) 4868 { 4869 for (size_t __i = 1; __i < simd_size_v<_Tp, _Abi>; ++__i) 4870 if (__k[__i] != __k[__i - 1]) 4871 return true; 4872 return false; 4873 } 4874 else 4875 return _Abi::_MaskImpl::_S_some_of(__k); 4876 } 4877 4878 template <typename _Tp, typename _Abi> 4879 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR int 4880 popcount(const simd_mask<_Tp, _Abi>& __k) noexcept 4881 { 4882 if (__builtin_is_constant_evaluated() || __k._M_is_constprop()) 4883 { 4884 const int __r = __call_with_subscripts<simd_size_v<_Tp, _Abi>>( 4885 __k, [](auto... __elements) { return ((__elements != 0) + ...); }); 4886 if (__builtin_is_constant_evaluated() || __builtin_constant_p(__r)) 4887 return __r; 4888 } 4889 return _Abi::_MaskImpl::_S_popcount(__k); 4890 } 4891 4892 template <typename _Tp, typename _Abi> 4893 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR int 4894 find_first_set(const simd_mask<_Tp, _Abi>& __k) 4895 { 4896 if (__builtin_is_constant_evaluated() || __k._M_is_constprop()) 4897 { 4898 constexpr size_t _Np = simd_size_v<_Tp, _Abi>; 4899 const size_t _Idx = __call_with_n_evaluations<_Np>( 4900 [](auto... __indexes) { return std::min({__indexes...}); }, 4901 [&](auto __i) { return __k[__i] ? +__i : _Np; }); 4902 if (_Idx >= _Np) 4903 __invoke_ub("find_first_set(empty mask) is UB"); 4904 if (__builtin_constant_p(_Idx)) 4905 return _Idx; 4906 } 4907 return _Abi::_MaskImpl::_S_find_first_set(__k); 4908 } 4909 4910 template <typename _Tp, typename _Abi> 4911 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR int 4912 find_last_set(const simd_mask<_Tp, _Abi>& __k) 4913 { 4914 if (__builtin_is_constant_evaluated() || __k._M_is_constprop()) 4915 { 4916 constexpr size_t _Np = simd_size_v<_Tp, _Abi>; 4917 const int _Idx = __call_with_n_evaluations<_Np>( 4918 [](auto... __indexes) { return std::max({__indexes...}); }, 4919 [&](auto __i) { return __k[__i] ? int(__i) : -1; }); 4920 if (_Idx < 0) 4921 __invoke_ub("find_first_set(empty mask) is UB"); 4922 if (__builtin_constant_p(_Idx)) 4923 return _Idx; 4924 } 4925 return _Abi::_MaskImpl::_S_find_last_set(__k); 4926 } 4927 4928 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR bool 4929 all_of(_ExactBool __x) noexcept 4930 { return __x; } 4931 4932 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR bool 4933 any_of(_ExactBool __x) noexcept 4934 { return __x; } 4935 4936 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR bool 4937 none_of(_ExactBool __x) noexcept 4938 { return !__x; } 4939 4940 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR bool 4941 some_of(_ExactBool) noexcept 4942 { return false; } 4943 4944 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR int 4945 popcount(_ExactBool __x) noexcept 4946 { return __x; } 4947 4948 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR int 4949 find_first_set(_ExactBool) 4950 { return 0; } 4951 4952 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR int 4953 find_last_set(_ExactBool) 4954 { return 0; } 4955 4956 // }}} 4957 4958 /// @cond undocumented 4959 // _SimdIntOperators{{{1 4960 template <typename _V, typename _Tp, typename _Abi, bool> 4961 class _SimdIntOperators {}; 4962 4963 template <typename _V, typename _Tp, typename _Abi> 4964 class _SimdIntOperators<_V, _Tp, _Abi, true> 4965 { 4966 using _Impl = typename _SimdTraits<_Tp, _Abi>::_SimdImpl; 4967 4968 _GLIBCXX_SIMD_INTRINSIC const _V& __derived() const 4969 { return *static_cast<const _V*>(this); } 4970 4971 template <typename _Up> 4972 _GLIBCXX_SIMD_INTRINSIC static _GLIBCXX_SIMD_CONSTEXPR _V 4973 _S_make_derived(_Up&& __d) 4974 { return {__private_init, static_cast<_Up&&>(__d)}; } 4975 4976 public: 4977 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend 4978 _V& 4979 operator%=(_V& __lhs, const _V& __x) 4980 { return __lhs = __lhs % __x; } 4981 4982 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend 4983 _V& 4984 operator&=(_V& __lhs, const _V& __x) 4985 { return __lhs = __lhs & __x; } 4986 4987 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend 4988 _V& 4989 operator|=(_V& __lhs, const _V& __x) 4990 { return __lhs = __lhs | __x; } 4991 4992 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend 4993 _V& 4994 operator^=(_V& __lhs, const _V& __x) 4995 { return __lhs = __lhs ^ __x; } 4996 4997 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend 4998 _V& 4999 operator<<=(_V& __lhs, const _V& __x) 5000 { return __lhs = __lhs << __x; } 5001 5002 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend 5003 _V& 5004 operator>>=(_V& __lhs, const _V& __x) 5005 { return __lhs = __lhs >> __x; } 5006 5007 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend 5008 _V& 5009 operator<<=(_V& __lhs, int __x) 5010 { return __lhs = __lhs << __x; } 5011 5012 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend 5013 _V& 5014 operator>>=(_V& __lhs, int __x) 5015 { return __lhs = __lhs >> __x; } 5016 5017 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend 5018 _V 5019 operator%(const _V& __x, const _V& __y) 5020 { 5021 return _SimdIntOperators::_S_make_derived( 5022 _Impl::_S_modulus(__data(__x), __data(__y))); 5023 } 5024 5025 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend 5026 _V 5027 operator&(const _V& __x, const _V& __y) 5028 { 5029 return _SimdIntOperators::_S_make_derived( 5030 _Impl::_S_bit_and(__data(__x), __data(__y))); 5031 } 5032 5033 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend 5034 _V 5035 operator|(const _V& __x, const _V& __y) 5036 { 5037 return _SimdIntOperators::_S_make_derived( 5038 _Impl::_S_bit_or(__data(__x), __data(__y))); 5039 } 5040 5041 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend 5042 _V 5043 operator^(const _V& __x, const _V& __y) 5044 { 5045 return _SimdIntOperators::_S_make_derived( 5046 _Impl::_S_bit_xor(__data(__x), __data(__y))); 5047 } 5048 5049 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend 5050 _V 5051 operator<<(const _V& __x, const _V& __y) 5052 { 5053 return _SimdIntOperators::_S_make_derived( 5054 _Impl::_S_bit_shift_left(__data(__x), __data(__y))); 5055 } 5056 5057 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend 5058 _V 5059 operator>>(const _V& __x, const _V& __y) 5060 { 5061 return _SimdIntOperators::_S_make_derived( 5062 _Impl::_S_bit_shift_right(__data(__x), __data(__y))); 5063 } 5064 5065 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend 5066 _V 5067 operator<<(const _V& __x, int __y) 5068 { 5069 if (__y < 0) 5070 __invoke_ub("The behavior is undefined if the right operand of a " 5071 "shift operation is negative. [expr.shift]\nA shift by " 5072 "%d was requested", 5073 __y); 5074 if (size_t(__y) >= sizeof(declval<_Tp>() << __y) * __CHAR_BIT__) 5075 __invoke_ub( 5076 "The behavior is undefined if the right operand of a " 5077 "shift operation is greater than or equal to the width of the " 5078 "promoted left operand. [expr.shift]\nA shift by %d was requested", 5079 __y); 5080 return _SimdIntOperators::_S_make_derived( 5081 _Impl::_S_bit_shift_left(__data(__x), __y)); 5082 } 5083 5084 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend 5085 _V 5086 operator>>(const _V& __x, int __y) 5087 { 5088 if (__y < 0) 5089 __invoke_ub( 5090 "The behavior is undefined if the right operand of a shift " 5091 "operation is negative. [expr.shift]\nA shift by %d was requested", 5092 __y); 5093 if (size_t(__y) >= sizeof(declval<_Tp>() << __y) * __CHAR_BIT__) 5094 __invoke_ub( 5095 "The behavior is undefined if the right operand of a shift " 5096 "operation is greater than or equal to the width of the promoted " 5097 "left operand. [expr.shift]\nA shift by %d was requested", 5098 __y); 5099 return _SimdIntOperators::_S_make_derived( 5100 _Impl::_S_bit_shift_right(__data(__x), __y)); 5101 } 5102 5103 // unary operators (for integral _Tp) 5104 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR 5105 _V 5106 operator~() const 5107 { return {__private_init, _Impl::_S_complement(__derived()._M_data)}; } 5108 }; 5109 5110 //}}}1 5111 /// @endcond 5112 5113 // simd {{{ 5114 template <typename _Tp, typename _Abi> 5115 class simd : public _SimdIntOperators< 5116 simd<_Tp, _Abi>, _Tp, _Abi, 5117 conjunction<is_integral<_Tp>, 5118 typename _SimdTraits<_Tp, _Abi>::_IsValid>::value>, 5119 public _SimdTraits<_Tp, _Abi>::_SimdBase 5120 { 5121 using _Traits = _SimdTraits<_Tp, _Abi>; 5122 using _MemberType = typename _Traits::_SimdMember; 5123 using _CastType = typename _Traits::_SimdCastType; 5124 static constexpr _Tp* _S_type_tag = nullptr; 5125 friend typename _Traits::_SimdBase; 5126 5127 public: 5128 using _Impl = typename _Traits::_SimdImpl; 5129 friend _Impl; 5130 friend _SimdIntOperators<simd, _Tp, _Abi, true>; 5131 5132 using value_type = _Tp; 5133 using reference = _SmartReference<_MemberType, _Impl, value_type>; 5134 using mask_type = simd_mask<_Tp, _Abi>; 5135 using abi_type = _Abi; 5136 5137 static constexpr size_t size() 5138 { return __size_or_zero_v<_Tp, _Abi>; } 5139 5140 _GLIBCXX_SIMD_CONSTEXPR simd() = default; 5141 _GLIBCXX_SIMD_CONSTEXPR simd(const simd&) = default; 5142 _GLIBCXX_SIMD_CONSTEXPR simd(simd&&) noexcept = default; 5143 _GLIBCXX_SIMD_CONSTEXPR simd& operator=(const simd&) = default; 5144 _GLIBCXX_SIMD_CONSTEXPR simd& operator=(simd&&) noexcept = default; 5145 5146 // implicit broadcast constructor 5147 template <typename _Up, 5148 typename = enable_if_t<!is_same_v<__remove_cvref_t<_Up>, bool>>> 5149 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR 5150 simd(_ValuePreservingOrInt<_Up, value_type>&& __x) 5151 : _M_data( 5152 _Impl::_S_broadcast(static_cast<value_type>(static_cast<_Up&&>(__x)))) 5153 {} 5154 5155 // implicit type conversion constructor (convert from fixed_size to 5156 // fixed_size) 5157 template <typename _Up> 5158 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR 5159 simd(const simd<_Up, simd_abi::fixed_size<size()>>& __x, 5160 enable_if_t< 5161 conjunction< 5162 is_same<simd_abi::fixed_size<size()>, abi_type>, 5163 negation<__is_narrowing_conversion<_Up, value_type>>, 5164 __converts_to_higher_integer_rank<_Up, value_type>>::value, 5165 void*> = nullptr) 5166 : simd{static_cast<array<_Up, size()>>(__x).data(), vector_aligned} {} 5167 5168 // explicit type conversion constructor 5169 #ifdef _GLIBCXX_SIMD_ENABLE_STATIC_CAST 5170 template <typename _Up, typename _A2, 5171 typename = decltype(static_simd_cast<simd>( 5172 declval<const simd<_Up, _A2>&>()))> 5173 _GLIBCXX_SIMD_ALWAYS_INLINE explicit _GLIBCXX_SIMD_CONSTEXPR 5174 simd(const simd<_Up, _A2>& __x) 5175 : simd(static_simd_cast<simd>(__x)) {} 5176 #endif // _GLIBCXX_SIMD_ENABLE_STATIC_CAST 5177 5178 // generator constructor 5179 template <typename _Fp> 5180 _GLIBCXX_SIMD_ALWAYS_INLINE explicit _GLIBCXX_SIMD_CONSTEXPR 5181 simd(_Fp&& __gen, _ValuePreservingOrInt<decltype(declval<_Fp>()( 5182 declval<_SizeConstant<0>&>())), 5183 value_type>* = nullptr) 5184 : _M_data(_Impl::_S_generator(static_cast<_Fp&&>(__gen), _S_type_tag)) {} 5185 5186 // load constructor 5187 template <typename _Up, typename _Flags> 5188 _GLIBCXX_SIMD_ALWAYS_INLINE 5189 simd(const _Up* __mem, _Flags) 5190 : _M_data( 5191 _Impl::_S_load(_Flags::template _S_apply<simd>(__mem), _S_type_tag)) 5192 {} 5193 5194 // loads [simd.load] 5195 template <typename _Up, typename _Flags> 5196 _GLIBCXX_SIMD_ALWAYS_INLINE void 5197 copy_from(const _Vectorizable<_Up>* __mem, _Flags) 5198 { 5199 _M_data = static_cast<decltype(_M_data)>( 5200 _Impl::_S_load(_Flags::template _S_apply<simd>(__mem), _S_type_tag)); 5201 } 5202 5203 // stores [simd.store] 5204 template <typename _Up, typename _Flags> 5205 _GLIBCXX_SIMD_ALWAYS_INLINE void 5206 copy_to(_Vectorizable<_Up>* __mem, _Flags) const 5207 { 5208 _Impl::_S_store(_M_data, _Flags::template _S_apply<simd>(__mem), 5209 _S_type_tag); 5210 } 5211 5212 // scalar access 5213 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR reference 5214 operator[](size_t __i) 5215 { return {_M_data, int(__i)}; } 5216 5217 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR value_type 5218 operator[]([[maybe_unused]] size_t __i) const 5219 { 5220 if constexpr (__is_scalar_abi<_Abi>()) 5221 { 5222 _GLIBCXX_DEBUG_ASSERT(__i == 0); 5223 return _M_data; 5224 } 5225 else 5226 return _M_data[__i]; 5227 } 5228 5229 // increment and decrement: 5230 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR simd& 5231 operator++() 5232 { 5233 _Impl::_S_increment(_M_data); 5234 return *this; 5235 } 5236 5237 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR simd 5238 operator++(int) 5239 { 5240 simd __r = *this; 5241 _Impl::_S_increment(_M_data); 5242 return __r; 5243 } 5244 5245 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR simd& 5246 operator--() 5247 { 5248 _Impl::_S_decrement(_M_data); 5249 return *this; 5250 } 5251 5252 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR simd 5253 operator--(int) 5254 { 5255 simd __r = *this; 5256 _Impl::_S_decrement(_M_data); 5257 return __r; 5258 } 5259 5260 // unary operators (for any _Tp) 5261 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR mask_type 5262 operator!() const 5263 { return {__private_init, _Impl::_S_negate(_M_data)}; } 5264 5265 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR simd 5266 operator+() const 5267 { return *this; } 5268 5269 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR simd 5270 operator-() const 5271 { return {__private_init, _Impl::_S_unary_minus(_M_data)}; } 5272 5273 // access to internal representation (suggested extension) 5274 _GLIBCXX_SIMD_ALWAYS_INLINE explicit _GLIBCXX_SIMD_CONSTEXPR 5275 simd(_CastType __init) : _M_data(__init) {} 5276 5277 // compound assignment [simd.cassign] 5278 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd& 5279 operator+=(simd& __lhs, const simd& __x) 5280 { return __lhs = __lhs + __x; } 5281 5282 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd& 5283 operator-=(simd& __lhs, const simd& __x) 5284 { return __lhs = __lhs - __x; } 5285 5286 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd& 5287 operator*=(simd& __lhs, const simd& __x) 5288 { return __lhs = __lhs * __x; } 5289 5290 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd& 5291 operator/=(simd& __lhs, const simd& __x) 5292 { return __lhs = __lhs / __x; } 5293 5294 // binary operators [simd.binary] 5295 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd 5296 operator+(const simd& __x, const simd& __y) 5297 { return {__private_init, _Impl::_S_plus(__x._M_data, __y._M_data)}; } 5298 5299 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd 5300 operator-(const simd& __x, const simd& __y) 5301 { return {__private_init, _Impl::_S_minus(__x._M_data, __y._M_data)}; } 5302 5303 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd 5304 operator*(const simd& __x, const simd& __y) 5305 { return {__private_init, _Impl::_S_multiplies(__x._M_data, __y._M_data)}; } 5306 5307 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd 5308 operator/(const simd& __x, const simd& __y) 5309 { return {__private_init, _Impl::_S_divides(__x._M_data, __y._M_data)}; } 5310 5311 // compares [simd.comparison] 5312 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend mask_type 5313 operator==(const simd& __x, const simd& __y) 5314 { return simd::_S_make_mask(_Impl::_S_equal_to(__x._M_data, __y._M_data)); } 5315 5316 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend mask_type 5317 operator!=(const simd& __x, const simd& __y) 5318 { 5319 return simd::_S_make_mask( 5320 _Impl::_S_not_equal_to(__x._M_data, __y._M_data)); 5321 } 5322 5323 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend mask_type 5324 operator<(const simd& __x, const simd& __y) 5325 { return simd::_S_make_mask(_Impl::_S_less(__x._M_data, __y._M_data)); } 5326 5327 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend mask_type 5328 operator<=(const simd& __x, const simd& __y) 5329 { 5330 return simd::_S_make_mask(_Impl::_S_less_equal(__x._M_data, __y._M_data)); 5331 } 5332 5333 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend mask_type 5334 operator>(const simd& __x, const simd& __y) 5335 { return simd::_S_make_mask(_Impl::_S_less(__y._M_data, __x._M_data)); } 5336 5337 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend mask_type 5338 operator>=(const simd& __x, const simd& __y) 5339 { 5340 return simd::_S_make_mask(_Impl::_S_less_equal(__y._M_data, __x._M_data)); 5341 } 5342 5343 // operator?: overloads (suggested extension) {{{ 5344 #ifdef __GXX_CONDITIONAL_IS_OVERLOADABLE__ 5345 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd 5346 operator?:(const mask_type& __k, const simd& __where_true, 5347 const simd& __where_false) 5348 { 5349 auto __ret = __where_false; 5350 _Impl::_S_masked_assign(__data(__k), __data(__ret), __data(__where_true)); 5351 return __ret; 5352 } 5353 5354 #endif // __GXX_CONDITIONAL_IS_OVERLOADABLE__ 5355 // }}} 5356 5357 // "private" because of the first arguments's namespace 5358 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR 5359 simd(_PrivateInit, const _MemberType& __init) 5360 : _M_data(__init) {} 5361 5362 // "private" because of the first arguments's namespace 5363 _GLIBCXX_SIMD_INTRINSIC 5364 simd(_BitsetInit, bitset<size()> __init) : _M_data() 5365 { where(mask_type(__bitset_init, __init), *this) = ~*this; } 5366 5367 _GLIBCXX_SIMD_INTRINSIC constexpr bool 5368 _M_is_constprop() const 5369 { 5370 if constexpr (__is_scalar_abi<_Abi>()) 5371 return __builtin_constant_p(_M_data); 5372 else 5373 return _M_data._M_is_constprop(); 5374 } 5375 5376 private: 5377 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR static mask_type 5378 _S_make_mask(typename mask_type::_MemberType __k) 5379 { return {__private_init, __k}; } 5380 5381 friend const auto& __data<value_type, abi_type>(const simd&); 5382 friend auto& __data<value_type, abi_type>(simd&); 5383 alignas(_Traits::_S_simd_align) _MemberType _M_data; 5384 }; 5385 5386 // }}} 5387 /// @cond undocumented 5388 // __data {{{ 5389 template <typename _Tp, typename _Ap> 5390 _GLIBCXX_SIMD_INTRINSIC constexpr const auto& 5391 __data(const simd<_Tp, _Ap>& __x) 5392 { return __x._M_data; } 5393 5394 template <typename _Tp, typename _Ap> 5395 _GLIBCXX_SIMD_INTRINSIC constexpr auto& 5396 __data(simd<_Tp, _Ap>& __x) 5397 { return __x._M_data; } 5398 5399 // }}} 5400 namespace __float_bitwise_operators { //{{{ 5401 template <typename _Tp, typename _Ap> 5402 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR simd<_Tp, _Ap> 5403 operator^(const simd<_Tp, _Ap>& __a, const simd<_Tp, _Ap>& __b) 5404 { 5405 return {__private_init, 5406 _Ap::_SimdImpl::_S_bit_xor(__data(__a), __data(__b))}; 5407 } 5408 5409 template <typename _Tp, typename _Ap> 5410 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR simd<_Tp, _Ap> 5411 operator|(const simd<_Tp, _Ap>& __a, const simd<_Tp, _Ap>& __b) 5412 { 5413 return {__private_init, 5414 _Ap::_SimdImpl::_S_bit_or(__data(__a), __data(__b))}; 5415 } 5416 5417 template <typename _Tp, typename _Ap> 5418 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR simd<_Tp, _Ap> 5419 operator&(const simd<_Tp, _Ap>& __a, const simd<_Tp, _Ap>& __b) 5420 { 5421 return {__private_init, 5422 _Ap::_SimdImpl::_S_bit_and(__data(__a), __data(__b))}; 5423 } 5424 5425 template <typename _Tp, typename _Ap> 5426 _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR 5427 enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Ap>> 5428 operator~(const simd<_Tp, _Ap>& __a) 5429 { return {__private_init, _Ap::_SimdImpl::_S_complement(__data(__a))}; } 5430 } // namespace __float_bitwise_operators }}} 5431 /// @endcond 5432 5433 /// @} 5434 _GLIBCXX_SIMD_END_NAMESPACE 5435 5436 #endif // __cplusplus >= 201703L 5437 #endif // _GLIBCXX_EXPERIMENTAL_SIMD_H 5438 5439 // vim: foldmethod=marker foldmarker={{{,}}} 5440