1 // Simd fixed_size ABI specific implementations -*- C++ -*- 2 3 // Copyright (C) 2020-2022 Free Software Foundation, Inc. 4 // 5 // This file is part of the GNU ISO C++ Library. This library is free 6 // software; you can redistribute it and/or modify it under the 7 // terms of the GNU General Public License as published by the 8 // Free Software Foundation; either version 3, or (at your option) 9 // any later version. 10 11 // This library is distributed in the hope that it will be useful, 12 // but WITHOUT ANY WARRANTY; without even the implied warranty of 13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 // GNU General Public License for more details. 15 16 // Under Section 7 of GPL version 3, you are granted additional 17 // permissions described in the GCC Runtime Library Exception, version 18 // 3.1, as published by the Free Software Foundation. 19 20 // You should have received a copy of the GNU General Public License and 21 // a copy of the GCC Runtime Library Exception along with this program; 22 // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 23 // <http://www.gnu.org/licenses/>. 24 25 /* 26 * The fixed_size ABI gives the following guarantees: 27 * - simd objects are passed via the stack 28 * - memory layout of `simd<_Tp, _Np>` is equivalent to `array<_Tp, _Np>` 29 * - alignment of `simd<_Tp, _Np>` is `_Np * sizeof(_Tp)` if _Np is __a 30 * power-of-2 value, otherwise `std::__bit_ceil(_Np * sizeof(_Tp))` (Note: 31 * if the alignment were to exceed the system/compiler maximum, it is bounded 32 * to that maximum) 33 * - simd_mask objects are passed like bitset<_Np> 34 * - memory layout of `simd_mask<_Tp, _Np>` is equivalent to `bitset<_Np>` 35 * - alignment of `simd_mask<_Tp, _Np>` is equal to the alignment of 36 * `bitset<_Np>` 37 */ 38 39 #ifndef _GLIBCXX_EXPERIMENTAL_SIMD_FIXED_SIZE_H_ 40 #define _GLIBCXX_EXPERIMENTAL_SIMD_FIXED_SIZE_H_ 41 42 #if __cplusplus >= 201703L 43 44 #include <array> 45 46 _GLIBCXX_SIMD_BEGIN_NAMESPACE 47 48 // __simd_tuple_element {{{ 49 template <size_t _I, typename _Tp> 50 struct __simd_tuple_element; 51 52 template <typename _Tp, typename _A0, typename... _As> 53 struct __simd_tuple_element<0, _SimdTuple<_Tp, _A0, _As...>> 54 { using type = simd<_Tp, _A0>; }; 55 56 template <size_t _I, typename _Tp, typename _A0, typename... _As> 57 struct __simd_tuple_element<_I, _SimdTuple<_Tp, _A0, _As...>> 58 { using type = typename __simd_tuple_element<_I - 1, _SimdTuple<_Tp, _As...>>::type; }; 59 60 template <size_t _I, typename _Tp> 61 using __simd_tuple_element_t = typename __simd_tuple_element<_I, _Tp>::type; 62 63 // }}} 64 // __simd_tuple_concat {{{ 65 66 template <typename _Tp, typename... _A0s, typename... _A1s> 67 _GLIBCXX_SIMD_INTRINSIC constexpr _SimdTuple<_Tp, _A0s..., _A1s...> 68 __simd_tuple_concat(const _SimdTuple<_Tp, _A0s...>& __left, 69 const _SimdTuple<_Tp, _A1s...>& __right) 70 { 71 if constexpr (sizeof...(_A0s) == 0) 72 return __right; 73 else if constexpr (sizeof...(_A1s) == 0) 74 return __left; 75 else 76 return {__left.first, __simd_tuple_concat(__left.second, __right)}; 77 } 78 79 template <typename _Tp, typename _A10, typename... _A1s> 80 _GLIBCXX_SIMD_INTRINSIC constexpr _SimdTuple<_Tp, simd_abi::scalar, _A10, _A1s...> 81 __simd_tuple_concat(const _Tp& __left, const _SimdTuple<_Tp, _A10, _A1s...>& __right) 82 { return {__left, __right}; } 83 84 // }}} 85 // __simd_tuple_pop_front {{{ 86 // Returns the next _SimdTuple in __x that has _Np elements less. 87 // Precondition: _Np must match the number of elements in __first (recursively) 88 template <size_t _Np, typename _Tp> 89 _GLIBCXX_SIMD_INTRINSIC constexpr decltype(auto) 90 __simd_tuple_pop_front(_Tp&& __x) 91 { 92 if constexpr (_Np == 0) 93 return static_cast<_Tp&&>(__x); 94 else 95 { 96 using _Up = __remove_cvref_t<_Tp>; 97 static_assert(_Np >= _Up::_S_first_size); 98 return __simd_tuple_pop_front<_Np - _Up::_S_first_size>(__x.second); 99 } 100 } 101 102 // }}} 103 // __get_simd_at<_Np> {{{1 104 struct __as_simd {}; 105 106 struct __as_simd_tuple {}; 107 108 template <typename _Tp, typename _A0, typename... _Abis> 109 _GLIBCXX_SIMD_INTRINSIC constexpr simd<_Tp, _A0> 110 __simd_tuple_get_impl(__as_simd, const _SimdTuple<_Tp, _A0, _Abis...>& __t, _SizeConstant<0>) 111 { return {__private_init, __t.first}; } 112 113 template <typename _Tp, typename _A0, typename... _Abis> 114 _GLIBCXX_SIMD_INTRINSIC constexpr const auto& 115 __simd_tuple_get_impl(__as_simd_tuple, const _SimdTuple<_Tp, _A0, _Abis...>& __t, 116 _SizeConstant<0>) 117 { return __t.first; } 118 119 template <typename _Tp, typename _A0, typename... _Abis> 120 _GLIBCXX_SIMD_INTRINSIC constexpr auto& 121 __simd_tuple_get_impl(__as_simd_tuple, _SimdTuple<_Tp, _A0, _Abis...>& __t, _SizeConstant<0>) 122 { return __t.first; } 123 124 template <typename _R, size_t _Np, typename _Tp, typename... _Abis> 125 _GLIBCXX_SIMD_INTRINSIC constexpr auto 126 __simd_tuple_get_impl(_R, const _SimdTuple<_Tp, _Abis...>& __t, _SizeConstant<_Np>) 127 { return __simd_tuple_get_impl(_R(), __t.second, _SizeConstant<_Np - 1>()); } 128 129 template <size_t _Np, typename _Tp, typename... _Abis> 130 _GLIBCXX_SIMD_INTRINSIC constexpr auto& 131 __simd_tuple_get_impl(__as_simd_tuple, _SimdTuple<_Tp, _Abis...>& __t, _SizeConstant<_Np>) 132 { return __simd_tuple_get_impl(__as_simd_tuple(), __t.second, _SizeConstant<_Np - 1>()); } 133 134 template <size_t _Np, typename _Tp, typename... _Abis> 135 _GLIBCXX_SIMD_INTRINSIC constexpr auto 136 __get_simd_at(const _SimdTuple<_Tp, _Abis...>& __t) 137 { return __simd_tuple_get_impl(__as_simd(), __t, _SizeConstant<_Np>()); } 138 139 // }}} 140 // __get_tuple_at<_Np> {{{ 141 template <size_t _Np, typename _Tp, typename... _Abis> 142 _GLIBCXX_SIMD_INTRINSIC constexpr auto 143 __get_tuple_at(const _SimdTuple<_Tp, _Abis...>& __t) 144 { return __simd_tuple_get_impl(__as_simd_tuple(), __t, _SizeConstant<_Np>()); } 145 146 template <size_t _Np, typename _Tp, typename... _Abis> 147 _GLIBCXX_SIMD_INTRINSIC constexpr auto& 148 __get_tuple_at(_SimdTuple<_Tp, _Abis...>& __t) 149 { return __simd_tuple_get_impl(__as_simd_tuple(), __t, _SizeConstant<_Np>()); } 150 151 // __tuple_element_meta {{{1 152 template <typename _Tp, typename _Abi, size_t _Offset> 153 struct __tuple_element_meta : public _Abi::_SimdImpl 154 { 155 static_assert(is_same_v<typename _Abi::_SimdImpl::abi_type, 156 _Abi>); // this fails e.g. when _SimdImpl is an 157 // alias for _SimdImplBuiltin<_DifferentAbi> 158 using value_type = _Tp; 159 using abi_type = _Abi; 160 using _Traits = _SimdTraits<_Tp, _Abi>; 161 using _MaskImpl = typename _Abi::_MaskImpl; 162 using _MaskMember = typename _Traits::_MaskMember; 163 using simd_type = simd<_Tp, _Abi>; 164 static constexpr size_t _S_offset = _Offset; 165 static constexpr size_t _S_size() { return simd_size<_Tp, _Abi>::value; } 166 static constexpr _MaskImpl _S_mask_impl = {}; 167 168 template <size_t _Np, bool _Sanitized> 169 _GLIBCXX_SIMD_INTRINSIC static constexpr auto 170 _S_submask(_BitMask<_Np, _Sanitized> __bits) 171 { return __bits.template _M_extract<_Offset, _S_size()>(); } 172 173 template <size_t _Np, bool _Sanitized> 174 _GLIBCXX_SIMD_INTRINSIC static constexpr _MaskMember 175 _S_make_mask(_BitMask<_Np, _Sanitized> __bits) 176 { 177 return _MaskImpl::template _S_convert<_Tp>( 178 __bits.template _M_extract<_Offset, _S_size()>()._M_sanitized()); 179 } 180 181 _GLIBCXX_SIMD_INTRINSIC static constexpr _ULLong 182 _S_mask_to_shifted_ullong(_MaskMember __k) 183 { return _MaskImpl::_S_to_bits(__k).to_ullong() << _Offset; } 184 }; 185 186 template <size_t _Offset, typename _Tp, typename _Abi, typename... _As> 187 _GLIBCXX_SIMD_INTRINSIC constexpr 188 __tuple_element_meta<_Tp, _Abi, _Offset> 189 __make_meta(const _SimdTuple<_Tp, _Abi, _As...>&) 190 { return {}; } 191 192 // }}}1 193 // _WithOffset wrapper class {{{ 194 template <size_t _Offset, typename _Base> 195 struct _WithOffset : public _Base 196 { 197 static inline constexpr size_t _S_offset = _Offset; 198 199 _GLIBCXX_SIMD_INTRINSIC char* 200 _M_as_charptr() 201 { return reinterpret_cast<char*>(this) + _S_offset * sizeof(typename _Base::value_type); } 202 203 _GLIBCXX_SIMD_INTRINSIC const char* 204 _M_as_charptr() const 205 { return reinterpret_cast<const char*>(this) + _S_offset * sizeof(typename _Base::value_type); } 206 }; 207 208 // make _WithOffset<_WithOffset> ill-formed to use: 209 template <size_t _O0, size_t _O1, typename _Base> 210 struct _WithOffset<_O0, _WithOffset<_O1, _Base>> {}; 211 212 template <size_t _Offset, typename _Tp> 213 _GLIBCXX_SIMD_INTRINSIC 214 decltype(auto) 215 __add_offset(_Tp& __base) 216 { return static_cast<_WithOffset<_Offset, __remove_cvref_t<_Tp>>&>(__base); } 217 218 template <size_t _Offset, typename _Tp> 219 _GLIBCXX_SIMD_INTRINSIC 220 decltype(auto) 221 __add_offset(const _Tp& __base) 222 { return static_cast<const _WithOffset<_Offset, __remove_cvref_t<_Tp>>&>(__base); } 223 224 template <size_t _Offset, size_t _ExistingOffset, typename _Tp> 225 _GLIBCXX_SIMD_INTRINSIC 226 decltype(auto) 227 __add_offset(_WithOffset<_ExistingOffset, _Tp>& __base) 228 { return static_cast<_WithOffset<_Offset + _ExistingOffset, _Tp>&>(static_cast<_Tp&>(__base)); } 229 230 template <size_t _Offset, size_t _ExistingOffset, typename _Tp> 231 _GLIBCXX_SIMD_INTRINSIC 232 decltype(auto) 233 __add_offset(const _WithOffset<_ExistingOffset, _Tp>& __base) 234 { 235 return static_cast<const _WithOffset<_Offset + _ExistingOffset, _Tp>&>( 236 static_cast<const _Tp&>(__base)); 237 } 238 239 template <typename _Tp> 240 constexpr inline size_t __offset = 0; 241 242 template <size_t _Offset, typename _Tp> 243 constexpr inline size_t __offset<_WithOffset<_Offset, _Tp>> 244 = _WithOffset<_Offset, _Tp>::_S_offset; 245 246 template <typename _Tp> 247 constexpr inline size_t __offset<const _Tp> = __offset<_Tp>; 248 249 template <typename _Tp> 250 constexpr inline size_t __offset<_Tp&> = __offset<_Tp>; 251 252 template <typename _Tp> 253 constexpr inline size_t __offset<_Tp&&> = __offset<_Tp>; 254 255 // }}} 256 // _SimdTuple specializations {{{1 257 // empty {{{2 258 template <typename _Tp> 259 struct _SimdTuple<_Tp> 260 { 261 using value_type = _Tp; 262 static constexpr size_t _S_tuple_size = 0; 263 static constexpr size_t _S_size() { return 0; } 264 }; 265 266 // _SimdTupleData {{{2 267 template <typename _FirstType, typename _SecondType> 268 struct _SimdTupleData 269 { 270 _FirstType first; 271 _SecondType second; 272 273 _GLIBCXX_SIMD_INTRINSIC 274 constexpr bool 275 _M_is_constprop() const 276 { 277 if constexpr (is_class_v<_FirstType>) 278 return first._M_is_constprop() && second._M_is_constprop(); 279 else 280 return __builtin_constant_p(first) && second._M_is_constprop(); 281 } 282 }; 283 284 template <typename _FirstType, typename _Tp> 285 struct _SimdTupleData<_FirstType, _SimdTuple<_Tp>> 286 { 287 _FirstType first; 288 static constexpr _SimdTuple<_Tp> second = {}; 289 290 _GLIBCXX_SIMD_INTRINSIC 291 constexpr bool 292 _M_is_constprop() const 293 { 294 if constexpr (is_class_v<_FirstType>) 295 return first._M_is_constprop(); 296 else 297 return __builtin_constant_p(first); 298 } 299 }; 300 301 // 1 or more {{{2 302 template <typename _Tp, typename _Abi0, typename... _Abis> 303 struct _SimdTuple<_Tp, _Abi0, _Abis...> 304 : _SimdTupleData<typename _SimdTraits<_Tp, _Abi0>::_SimdMember, 305 _SimdTuple<_Tp, _Abis...>> 306 { 307 static_assert(!__is_fixed_size_abi_v<_Abi0>); 308 using value_type = _Tp; 309 using _FirstType = typename _SimdTraits<_Tp, _Abi0>::_SimdMember; 310 using _FirstAbi = _Abi0; 311 using _SecondType = _SimdTuple<_Tp, _Abis...>; 312 static constexpr size_t _S_tuple_size = sizeof...(_Abis) + 1; 313 314 static constexpr size_t _S_size() 315 { return simd_size_v<_Tp, _Abi0> + _SecondType::_S_size(); } 316 317 static constexpr size_t _S_first_size = simd_size_v<_Tp, _Abi0>; 318 static constexpr bool _S_is_homogeneous = (is_same_v<_Abi0, _Abis> && ...); 319 320 using _Base = _SimdTupleData<typename _SimdTraits<_Tp, _Abi0>::_SimdMember, 321 _SimdTuple<_Tp, _Abis...>>; 322 using _Base::first; 323 using _Base::second; 324 325 _GLIBCXX_SIMD_INTRINSIC constexpr _SimdTuple() = default; 326 _GLIBCXX_SIMD_INTRINSIC constexpr _SimdTuple(const _SimdTuple&) = default; 327 _GLIBCXX_SIMD_INTRINSIC constexpr _SimdTuple& operator=(const _SimdTuple&) 328 = default; 329 330 template <typename _Up> 331 _GLIBCXX_SIMD_INTRINSIC constexpr 332 _SimdTuple(_Up&& __x) 333 : _Base{static_cast<_Up&&>(__x)} {} 334 335 template <typename _Up, typename _Up2> 336 _GLIBCXX_SIMD_INTRINSIC constexpr 337 _SimdTuple(_Up&& __x, _Up2&& __y) 338 : _Base{static_cast<_Up&&>(__x), static_cast<_Up2&&>(__y)} {} 339 340 template <typename _Up> 341 _GLIBCXX_SIMD_INTRINSIC constexpr 342 _SimdTuple(_Up&& __x, _SimdTuple<_Tp>) 343 : _Base{static_cast<_Up&&>(__x)} {} 344 345 _GLIBCXX_SIMD_INTRINSIC char* 346 _M_as_charptr() 347 { return reinterpret_cast<char*>(this); } 348 349 _GLIBCXX_SIMD_INTRINSIC const char* 350 _M_as_charptr() const 351 { return reinterpret_cast<const char*>(this); } 352 353 template <size_t _Np> 354 _GLIBCXX_SIMD_INTRINSIC constexpr auto& 355 _M_at() 356 { 357 if constexpr (_Np == 0) 358 return first; 359 else 360 return second.template _M_at<_Np - 1>(); 361 } 362 363 template <size_t _Np> 364 _GLIBCXX_SIMD_INTRINSIC constexpr const auto& 365 _M_at() const 366 { 367 if constexpr (_Np == 0) 368 return first; 369 else 370 return second.template _M_at<_Np - 1>(); 371 } 372 373 template <size_t _Np> 374 _GLIBCXX_SIMD_INTRINSIC constexpr auto 375 _M_simd_at() const 376 { 377 if constexpr (_Np == 0) 378 return simd<_Tp, _Abi0>(__private_init, first); 379 else 380 return second.template _M_simd_at<_Np - 1>(); 381 } 382 383 template <size_t _Offset = 0, typename _Fp> 384 _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdTuple 385 _S_generate(_Fp&& __gen, _SizeConstant<_Offset> = {}) 386 { 387 auto&& __first = __gen(__tuple_element_meta<_Tp, _Abi0, _Offset>()); 388 if constexpr (_S_tuple_size == 1) 389 return {__first}; 390 else 391 return {__first, 392 _SecondType::_S_generate( 393 static_cast<_Fp&&>(__gen), 394 _SizeConstant<_Offset + simd_size_v<_Tp, _Abi0>>())}; 395 } 396 397 template <size_t _Offset = 0, typename _Fp, typename... _More> 398 _GLIBCXX_SIMD_INTRINSIC _SimdTuple 399 _M_apply_wrapped(_Fp&& __fun, const _More&... __more) const 400 { 401 auto&& __first 402 = __fun(__make_meta<_Offset>(*this), first, __more.first...); 403 if constexpr (_S_tuple_size == 1) 404 return {__first}; 405 else 406 return { 407 __first, 408 second.template _M_apply_wrapped<_Offset + simd_size_v<_Tp, _Abi0>>( 409 static_cast<_Fp&&>(__fun), __more.second...)}; 410 } 411 412 template <typename _Tup> 413 _GLIBCXX_SIMD_INTRINSIC constexpr decltype(auto) 414 _M_extract_argument(_Tup&& __tup) const 415 { 416 using _TupT = typename __remove_cvref_t<_Tup>::value_type; 417 if constexpr (is_same_v<_SimdTuple, __remove_cvref_t<_Tup>>) 418 return __tup.first; 419 else if (__builtin_is_constant_evaluated()) 420 return __fixed_size_storage_t<_TupT, _S_first_size>::_S_generate( 421 [&](auto __meta) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { 422 return __meta._S_generator( 423 [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { 424 return __tup[__i]; 425 }, static_cast<_TupT*>(nullptr)); 426 }); 427 else 428 return [&]() { // not always_inline; allow the compiler to decide 429 __fixed_size_storage_t<_TupT, _S_first_size> __r; 430 __builtin_memcpy(__r._M_as_charptr(), __tup._M_as_charptr(), 431 sizeof(__r)); 432 return __r; 433 }(); 434 } 435 436 template <typename _Tup> 437 _GLIBCXX_SIMD_INTRINSIC constexpr auto& 438 _M_skip_argument(_Tup&& __tup) const 439 { 440 static_assert(_S_tuple_size > 1); 441 using _Up = __remove_cvref_t<_Tup>; 442 constexpr size_t __off = __offset<_Up>; 443 if constexpr (_S_first_size == _Up::_S_first_size && __off == 0) 444 return __tup.second; 445 else if constexpr (_S_first_size > _Up::_S_first_size 446 && _S_first_size % _Up::_S_first_size == 0 447 && __off == 0) 448 return __simd_tuple_pop_front<_S_first_size>(__tup); 449 else if constexpr (_S_first_size + __off < _Up::_S_first_size) 450 return __add_offset<_S_first_size>(__tup); 451 else if constexpr (_S_first_size + __off == _Up::_S_first_size) 452 return __tup.second; 453 else 454 __assert_unreachable<_Tup>(); 455 } 456 457 template <size_t _Offset, typename... _More> 458 _GLIBCXX_SIMD_INTRINSIC constexpr void 459 _M_assign_front(const _SimdTuple<_Tp, _Abi0, _More...>& __x) & 460 { 461 static_assert(_Offset == 0); 462 first = __x.first; 463 if constexpr (sizeof...(_More) > 0) 464 { 465 static_assert(sizeof...(_Abis) >= sizeof...(_More)); 466 second.template _M_assign_front<0>(__x.second); 467 } 468 } 469 470 template <size_t _Offset> 471 _GLIBCXX_SIMD_INTRINSIC constexpr void 472 _M_assign_front(const _FirstType& __x) & 473 { 474 static_assert(_Offset == 0); 475 first = __x; 476 } 477 478 template <size_t _Offset, typename... _As> 479 _GLIBCXX_SIMD_INTRINSIC constexpr void 480 _M_assign_front(const _SimdTuple<_Tp, _As...>& __x) & 481 { 482 __builtin_memcpy(_M_as_charptr() + _Offset * sizeof(value_type), 483 __x._M_as_charptr(), 484 sizeof(_Tp) * _SimdTuple<_Tp, _As...>::_S_size()); 485 } 486 487 /* 488 * Iterate over the first objects in this _SimdTuple and call __fun for each 489 * of them. If additional arguments are passed via __more, chunk them into 490 * _SimdTuple or __vector_type_t objects of the same number of values. 491 */ 492 template <typename _Fp, typename... _More> 493 _GLIBCXX_SIMD_INTRINSIC constexpr _SimdTuple 494 _M_apply_per_chunk(_Fp&& __fun, _More&&... __more) const 495 { 496 if constexpr ((... 497 || conjunction_v< 498 is_lvalue_reference<_More>, 499 negation<is_const<remove_reference_t<_More>>>>) ) 500 { 501 // need to write back at least one of __more after calling __fun 502 auto&& __first = [&](auto... __args) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { 503 auto __r = __fun(__tuple_element_meta<_Tp, _Abi0, 0>(), first, 504 __args...); 505 [[maybe_unused]] auto&& __ignore_me = {( 506 [](auto&& __dst, const auto& __src) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { 507 if constexpr (is_assignable_v<decltype(__dst), 508 decltype(__dst)>) 509 { 510 __dst.template _M_assign_front<__offset<decltype(__dst)>>( 511 __src); 512 } 513 }(static_cast<_More&&>(__more), __args), 514 0)...}; 515 return __r; 516 }(_M_extract_argument(__more)...); 517 if constexpr (_S_tuple_size == 1) 518 return {__first}; 519 else 520 return {__first, 521 second._M_apply_per_chunk(static_cast<_Fp&&>(__fun), 522 _M_skip_argument(__more)...)}; 523 } 524 else 525 { 526 auto&& __first = __fun(__tuple_element_meta<_Tp, _Abi0, 0>(), first, 527 _M_extract_argument(__more)...); 528 if constexpr (_S_tuple_size == 1) 529 return {__first}; 530 else 531 return {__first, 532 second._M_apply_per_chunk(static_cast<_Fp&&>(__fun), 533 _M_skip_argument(__more)...)}; 534 } 535 } 536 537 template <typename _R = _Tp, typename _Fp, typename... _More> 538 _GLIBCXX_SIMD_INTRINSIC constexpr auto 539 _M_apply_r(_Fp&& __fun, const _More&... __more) const 540 { 541 auto&& __first = __fun(__tuple_element_meta<_Tp, _Abi0, 0>(), first, 542 __more.first...); 543 if constexpr (_S_tuple_size == 1) 544 return __first; 545 else 546 return __simd_tuple_concat<_R>( 547 __first, second.template _M_apply_r<_R>(static_cast<_Fp&&>(__fun), 548 __more.second...)); 549 } 550 551 template <typename _Fp, typename... _More> 552 _GLIBCXX_SIMD_INTRINSIC constexpr friend _SanitizedBitMask<_S_size()> 553 _M_test(const _Fp& __fun, const _SimdTuple& __x, const _More&... __more) 554 { 555 const _SanitizedBitMask<_S_first_size> __first 556 = _Abi0::_MaskImpl::_S_to_bits( 557 __fun(__tuple_element_meta<_Tp, _Abi0, 0>(), __x.first, 558 __more.first...)); 559 if constexpr (_S_tuple_size == 1) 560 return __first; 561 else 562 return _M_test(__fun, __x.second, __more.second...) 563 ._M_prepend(__first); 564 } 565 566 template <typename _Up, _Up _I> 567 _GLIBCXX_SIMD_INTRINSIC constexpr _Tp 568 operator[](integral_constant<_Up, _I>) const noexcept 569 { 570 if constexpr (_I < simd_size_v<_Tp, _Abi0>) 571 return _M_subscript_read(_I); 572 else 573 return second[integral_constant<_Up, _I - simd_size_v<_Tp, _Abi0>>()]; 574 } 575 576 _GLIBCXX_SIMD_INTRINSIC constexpr _Tp 577 operator[](size_t __i) const noexcept 578 { 579 if constexpr (_S_tuple_size == 1) 580 return _M_subscript_read(__i); 581 #ifdef _GLIBCXX_SIMD_USE_ALIASING_LOADS 582 else if (not __builtin_is_constant_evaluated()) 583 return reinterpret_cast<const __may_alias<_Tp>*>(this)[__i]; 584 #endif 585 else if constexpr (__is_scalar_abi<_Abi0>()) 586 { 587 const _Tp* ptr = &first; 588 return ptr[__i]; 589 } 590 else 591 return __i < simd_size_v<_Tp, _Abi0> ? _M_subscript_read(__i) 592 : second[__i - simd_size_v<_Tp, _Abi0>]; 593 } 594 595 _GLIBCXX_SIMD_INTRINSIC constexpr void 596 _M_set(size_t __i, _Tp __val) noexcept 597 { 598 if constexpr (_S_tuple_size == 1) 599 return _M_subscript_write(__i, __val); 600 #ifdef _GLIBCXX_SIMD_USE_ALIASING_LOADS 601 else if (not __builtin_is_constant_evaluated()) 602 reinterpret_cast<__may_alias<_Tp>*>(this)[__i] = __val; 603 #endif 604 else if (__i < simd_size_v<_Tp, _Abi0>) 605 _M_subscript_write(__i, __val); 606 else 607 second._M_set(__i - simd_size_v<_Tp, _Abi0>, __val); 608 } 609 610 private: 611 // _M_subscript_read/_write {{{ 612 _GLIBCXX_SIMD_INTRINSIC constexpr _Tp 613 _M_subscript_read([[maybe_unused]] size_t __i) const noexcept 614 { 615 if constexpr (__is_vectorizable_v<_FirstType>) 616 return first; 617 else 618 return first[__i]; 619 } 620 621 _GLIBCXX_SIMD_INTRINSIC constexpr void 622 _M_subscript_write([[maybe_unused]] size_t __i, _Tp __y) noexcept 623 { 624 if constexpr (__is_vectorizable_v<_FirstType>) 625 first = __y; 626 else 627 first._M_set(__i, __y); 628 } 629 630 // }}} 631 }; 632 633 // __make_simd_tuple {{{1 634 template <typename _Tp, typename _A0> 635 _GLIBCXX_SIMD_INTRINSIC constexpr _SimdTuple<_Tp, _A0> 636 __make_simd_tuple(simd<_Tp, _A0> __x0) 637 { return {__data(__x0)}; } 638 639 template <typename _Tp, typename _A0, typename... _As> 640 _GLIBCXX_SIMD_INTRINSIC constexpr _SimdTuple<_Tp, _A0, _As...> 641 __make_simd_tuple(const simd<_Tp, _A0>& __x0, const simd<_Tp, _As>&... __xs) 642 { return {__data(__x0), __make_simd_tuple(__xs...)}; } 643 644 template <typename _Tp, typename _A0> 645 _GLIBCXX_SIMD_INTRINSIC constexpr _SimdTuple<_Tp, _A0> 646 __make_simd_tuple(const typename _SimdTraits<_Tp, _A0>::_SimdMember& __arg0) 647 { return {__arg0}; } 648 649 template <typename _Tp, typename _A0, typename _A1, typename... _Abis> 650 _GLIBCXX_SIMD_INTRINSIC constexpr _SimdTuple<_Tp, _A0, _A1, _Abis...> 651 __make_simd_tuple( 652 const typename _SimdTraits<_Tp, _A0>::_SimdMember& __arg0, 653 const typename _SimdTraits<_Tp, _A1>::_SimdMember& __arg1, 654 const typename _SimdTraits<_Tp, _Abis>::_SimdMember&... __args) 655 { return {__arg0, __make_simd_tuple<_Tp, _A1, _Abis...>(__arg1, __args...)}; } 656 657 // __to_simd_tuple {{{1 658 template <typename _Tp, size_t _Np, typename _V, size_t _NV, typename... _VX> 659 _GLIBCXX_SIMD_INTRINSIC constexpr __fixed_size_storage_t<_Tp, _Np> 660 __to_simd_tuple(const array<_V, _NV>& __from, const _VX... __fromX); 661 662 template <typename _Tp, size_t _Np, 663 size_t _Offset = 0, // skip this many elements in __from0 664 typename _R = __fixed_size_storage_t<_Tp, _Np>, typename _V0, 665 typename _V0VT = _VectorTraits<_V0>, typename... _VX> 666 _GLIBCXX_SIMD_INTRINSIC _R constexpr __to_simd_tuple(const _V0 __from0, const _VX... __fromX) 667 { 668 static_assert(is_same_v<typename _V0VT::value_type, _Tp>); 669 static_assert(_Offset < _V0VT::_S_full_size); 670 using _R0 = __vector_type_t<_Tp, _R::_S_first_size>; 671 if constexpr (_R::_S_tuple_size == 1) 672 { 673 if constexpr (_Np == 1) 674 return _R{__from0[_Offset]}; 675 else if constexpr (_Offset == 0 && _V0VT::_S_full_size >= _Np) 676 return _R{__intrin_bitcast<_R0>(__from0)}; 677 else if constexpr (_Offset * 2 == _V0VT::_S_full_size 678 && _V0VT::_S_full_size / 2 >= _Np) 679 return _R{__intrin_bitcast<_R0>(__extract_part<1, 2>(__from0))}; 680 else if constexpr (_Offset * 4 == _V0VT::_S_full_size 681 && _V0VT::_S_full_size / 4 >= _Np) 682 return _R{__intrin_bitcast<_R0>(__extract_part<1, 4>(__from0))}; 683 else 684 __assert_unreachable<_Tp>(); 685 } 686 else 687 { 688 if constexpr (1 == _R::_S_first_size) 689 { // extract one scalar and recurse 690 if constexpr (_Offset + 1 < _V0VT::_S_full_size) 691 return _R{__from0[_Offset], 692 __to_simd_tuple<_Tp, _Np - 1, _Offset + 1>(__from0, 693 __fromX...)}; 694 else 695 return _R{__from0[_Offset], 696 __to_simd_tuple<_Tp, _Np - 1, 0>(__fromX...)}; 697 } 698 699 // place __from0 into _R::first and recurse for __fromX -> _R::second 700 else if constexpr (_V0VT::_S_full_size == _R::_S_first_size 701 && _Offset == 0) 702 return _R{__from0, 703 __to_simd_tuple<_Tp, _Np - _R::_S_first_size>(__fromX...)}; 704 705 // place lower part of __from0 into _R::first and recurse with _Offset 706 else if constexpr (_V0VT::_S_full_size > _R::_S_first_size 707 && _Offset == 0) 708 return _R{__intrin_bitcast<_R0>(__from0), 709 __to_simd_tuple<_Tp, _Np - _R::_S_first_size, 710 _R::_S_first_size>(__from0, __fromX...)}; 711 712 // place lower part of second quarter of __from0 into _R::first and 713 // recurse with _Offset 714 else if constexpr (_Offset * 4 == _V0VT::_S_full_size 715 && _V0VT::_S_full_size >= 4 * _R::_S_first_size) 716 return _R{__intrin_bitcast<_R0>(__extract_part<2, 4>(__from0)), 717 __to_simd_tuple<_Tp, _Np - _R::_S_first_size, 718 _Offset + _R::_S_first_size>(__from0, 719 __fromX...)}; 720 721 // place lower half of high half of __from0 into _R::first and recurse 722 // with _Offset 723 else if constexpr (_Offset * 2 == _V0VT::_S_full_size 724 && _V0VT::_S_full_size >= 4 * _R::_S_first_size) 725 return _R{__intrin_bitcast<_R0>(__extract_part<2, 4>(__from0)), 726 __to_simd_tuple<_Tp, _Np - _R::_S_first_size, 727 _Offset + _R::_S_first_size>(__from0, 728 __fromX...)}; 729 730 // place high half of __from0 into _R::first and recurse with __fromX 731 else if constexpr (_Offset * 2 == _V0VT::_S_full_size 732 && _V0VT::_S_full_size / 2 >= _R::_S_first_size) 733 return _R{__intrin_bitcast<_R0>(__extract_part<1, 2>(__from0)), 734 __to_simd_tuple<_Tp, _Np - _R::_S_first_size, 0>( 735 __fromX...)}; 736 737 // ill-formed if some unforseen pattern is needed 738 else 739 __assert_unreachable<_Tp>(); 740 } 741 } 742 743 template <typename _Tp, size_t _Np, typename _V, size_t _NV, typename... _VX> 744 _GLIBCXX_SIMD_INTRINSIC constexpr __fixed_size_storage_t<_Tp, _Np> 745 __to_simd_tuple(const array<_V, _NV>& __from, const _VX... __fromX) 746 { 747 if constexpr (is_same_v<_Tp, _V>) 748 { 749 static_assert( 750 sizeof...(_VX) == 0, 751 "An array of scalars must be the last argument to __to_simd_tuple"); 752 return __call_with_subscripts( 753 __from, make_index_sequence<_NV>(), 754 [&](const auto... __args) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { 755 return __simd_tuple_concat( 756 _SimdTuple<_Tp, simd_abi::scalar>{__args}..., _SimdTuple<_Tp>()); 757 }); 758 } 759 else 760 return __call_with_subscripts( 761 __from, make_index_sequence<_NV>(), 762 [&](const auto... __args) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { 763 return __to_simd_tuple<_Tp, _Np>(__args..., __fromX...); 764 }); 765 } 766 767 template <size_t, typename _Tp> 768 using __to_tuple_helper = _Tp; 769 770 template <typename _Tp, typename _A0, size_t _NOut, size_t _Np, 771 size_t... _Indexes> 772 _GLIBCXX_SIMD_INTRINSIC __fixed_size_storage_t<_Tp, _NOut> 773 __to_simd_tuple_impl(index_sequence<_Indexes...>, 774 const array<__vector_type_t<_Tp, simd_size_v<_Tp, _A0>>, _Np>& __args) 775 { 776 return __make_simd_tuple<_Tp, __to_tuple_helper<_Indexes, _A0>...>( 777 __args[_Indexes]...); 778 } 779 780 template <typename _Tp, typename _A0, size_t _NOut, size_t _Np, 781 typename _R = __fixed_size_storage_t<_Tp, _NOut>> 782 _GLIBCXX_SIMD_INTRINSIC _R 783 __to_simd_tuple_sized( 784 const array<__vector_type_t<_Tp, simd_size_v<_Tp, _A0>>, _Np>& __args) 785 { 786 static_assert(_Np * simd_size_v<_Tp, _A0> >= _NOut); 787 return __to_simd_tuple_impl<_Tp, _A0, _NOut>( 788 make_index_sequence<_R::_S_tuple_size>(), __args); 789 } 790 791 // __optimize_simd_tuple {{{1 792 template <typename _Tp> 793 _GLIBCXX_SIMD_INTRINSIC constexpr _SimdTuple<_Tp> 794 __optimize_simd_tuple(const _SimdTuple<_Tp>) 795 { return {}; } 796 797 template <typename _Tp, typename _Ap> 798 _GLIBCXX_SIMD_INTRINSIC constexpr const _SimdTuple<_Tp, _Ap>& 799 __optimize_simd_tuple(const _SimdTuple<_Tp, _Ap>& __x) 800 { return __x; } 801 802 template <typename _Tp, typename _A0, typename _A1, typename... _Abis, 803 typename _R = __fixed_size_storage_t< 804 _Tp, _SimdTuple<_Tp, _A0, _A1, _Abis...>::_S_size()>> 805 _GLIBCXX_SIMD_INTRINSIC constexpr _R 806 __optimize_simd_tuple(const _SimdTuple<_Tp, _A0, _A1, _Abis...>& __x) 807 { 808 using _Tup = _SimdTuple<_Tp, _A0, _A1, _Abis...>; 809 if constexpr (is_same_v<_R, _Tup>) 810 return __x; 811 else if constexpr (is_same_v<typename _R::_FirstType, 812 typename _Tup::_FirstType>) 813 return {__x.first, __optimize_simd_tuple(__x.second)}; 814 else if constexpr (__is_scalar_abi<_A0>() 815 || _A0::template _S_is_partial<_Tp>) 816 return {__generate_from_n_evaluations<_R::_S_first_size, 817 typename _R::_FirstType>( 818 [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { return __x[__i]; }), 819 __optimize_simd_tuple( 820 __simd_tuple_pop_front<_R::_S_first_size>(__x))}; 821 else if constexpr (is_same_v<_A0, _A1> 822 && _R::_S_first_size == simd_size_v<_Tp, _A0> + simd_size_v<_Tp, _A1>) 823 return {__concat(__x.template _M_at<0>(), __x.template _M_at<1>()), 824 __optimize_simd_tuple(__x.second.second)}; 825 else if constexpr (sizeof...(_Abis) >= 2 826 && _R::_S_first_size == (4 * simd_size_v<_Tp, _A0>) 827 && simd_size_v<_Tp, _A0> == __simd_tuple_element_t< 828 (sizeof...(_Abis) >= 2 ? 3 : 0), _Tup>::size()) 829 return { 830 __concat(__concat(__x.template _M_at<0>(), __x.template _M_at<1>()), 831 __concat(__x.template _M_at<2>(), __x.template _M_at<3>())), 832 __optimize_simd_tuple(__x.second.second.second.second)}; 833 else 834 { 835 static_assert(sizeof(_R) == sizeof(__x)); 836 _R __r; 837 __builtin_memcpy(__r._M_as_charptr(), __x._M_as_charptr(), 838 sizeof(_Tp) * _R::_S_size()); 839 return __r; 840 } 841 } 842 843 // __for_each(const _SimdTuple &, Fun) {{{1 844 template <size_t _Offset = 0, typename _Tp, typename _A0, typename _Fp> 845 _GLIBCXX_SIMD_INTRINSIC constexpr void 846 __for_each(const _SimdTuple<_Tp, _A0>& __t, _Fp&& __fun) 847 { static_cast<_Fp&&>(__fun)(__make_meta<_Offset>(__t), __t.first); } 848 849 template <size_t _Offset = 0, typename _Tp, typename _A0, typename _A1, 850 typename... _As, typename _Fp> 851 _GLIBCXX_SIMD_INTRINSIC constexpr void 852 __for_each(const _SimdTuple<_Tp, _A0, _A1, _As...>& __t, _Fp&& __fun) 853 { 854 __fun(__make_meta<_Offset>(__t), __t.first); 855 __for_each<_Offset + simd_size<_Tp, _A0>::value>(__t.second, 856 static_cast<_Fp&&>(__fun)); 857 } 858 859 // __for_each(_SimdTuple &, Fun) {{{1 860 template <size_t _Offset = 0, typename _Tp, typename _A0, typename _Fp> 861 _GLIBCXX_SIMD_INTRINSIC constexpr void 862 __for_each(_SimdTuple<_Tp, _A0>& __t, _Fp&& __fun) 863 { static_cast<_Fp&&>(__fun)(__make_meta<_Offset>(__t), __t.first); } 864 865 template <size_t _Offset = 0, typename _Tp, typename _A0, typename _A1, 866 typename... _As, typename _Fp> 867 _GLIBCXX_SIMD_INTRINSIC constexpr void 868 __for_each(_SimdTuple<_Tp, _A0, _A1, _As...>& __t, _Fp&& __fun) 869 { 870 __fun(__make_meta<_Offset>(__t), __t.first); 871 __for_each<_Offset + simd_size<_Tp, _A0>::value>(__t.second, 872 static_cast<_Fp&&>(__fun)); 873 } 874 875 // __for_each(_SimdTuple &, const _SimdTuple &, Fun) {{{1 876 template <size_t _Offset = 0, typename _Tp, typename _A0, typename _Fp> 877 _GLIBCXX_SIMD_INTRINSIC constexpr void 878 __for_each(_SimdTuple<_Tp, _A0>& __a, const _SimdTuple<_Tp, _A0>& __b, _Fp&& __fun) 879 { static_cast<_Fp&&>(__fun)(__make_meta<_Offset>(__a), __a.first, __b.first); } 880 881 template <size_t _Offset = 0, typename _Tp, typename _A0, typename _A1, 882 typename... _As, typename _Fp> 883 _GLIBCXX_SIMD_INTRINSIC constexpr void 884 __for_each(_SimdTuple<_Tp, _A0, _A1, _As...>& __a, 885 const _SimdTuple<_Tp, _A0, _A1, _As...>& __b, _Fp&& __fun) 886 { 887 __fun(__make_meta<_Offset>(__a), __a.first, __b.first); 888 __for_each<_Offset + simd_size<_Tp, _A0>::value>(__a.second, __b.second, 889 static_cast<_Fp&&>(__fun)); 890 } 891 892 // __for_each(const _SimdTuple &, const _SimdTuple &, Fun) {{{1 893 template <size_t _Offset = 0, typename _Tp, typename _A0, typename _Fp> 894 _GLIBCXX_SIMD_INTRINSIC constexpr void 895 __for_each(const _SimdTuple<_Tp, _A0>& __a, const _SimdTuple<_Tp, _A0>& __b, _Fp&& __fun) 896 { static_cast<_Fp&&>(__fun)(__make_meta<_Offset>(__a), __a.first, __b.first); } 897 898 template <size_t _Offset = 0, typename _Tp, typename _A0, typename _A1, 899 typename... _As, typename _Fp> 900 _GLIBCXX_SIMD_INTRINSIC constexpr void 901 __for_each(const _SimdTuple<_Tp, _A0, _A1, _As...>& __a, 902 const _SimdTuple<_Tp, _A0, _A1, _As...>& __b, _Fp&& __fun) 903 { 904 __fun(__make_meta<_Offset>(__a), __a.first, __b.first); 905 __for_each<_Offset + simd_size<_Tp, _A0>::value>(__a.second, __b.second, 906 static_cast<_Fp&&>(__fun)); 907 } 908 909 // }}}1 910 // __extract_part(_SimdTuple) {{{ 911 template <int _Index, int _Total, int _Combine, typename _Tp, typename _A0, typename... _As> 912 _GLIBCXX_SIMD_INTRINSIC constexpr auto // __vector_type_t or _SimdTuple 913 __extract_part(const _SimdTuple<_Tp, _A0, _As...>& __x) 914 { 915 // worst cases: 916 // (a) 4, 4, 4 => 3, 3, 3, 3 (_Total = 4) 917 // (b) 2, 2, 2 => 3, 3 (_Total = 2) 918 // (c) 4, 2 => 2, 2, 2 (_Total = 3) 919 using _Tuple = _SimdTuple<_Tp, _A0, _As...>; 920 static_assert(_Index + _Combine <= _Total && _Index >= 0 && _Total >= 1); 921 constexpr size_t _Np = _Tuple::_S_size(); 922 static_assert(_Np >= _Total && _Np % _Total == 0); 923 constexpr size_t __values_per_part = _Np / _Total; 924 [[maybe_unused]] constexpr size_t __values_to_skip 925 = _Index * __values_per_part; 926 constexpr size_t __return_size = __values_per_part * _Combine; 927 using _RetAbi = simd_abi::deduce_t<_Tp, __return_size>; 928 929 // handle (optimize) the simple cases 930 if constexpr (_Index == 0 && _Tuple::_S_first_size == __return_size) 931 return __x.first._M_data; 932 else if constexpr (_Index == 0 && _Total == _Combine) 933 return __x; 934 else if constexpr (_Index == 0 && _Tuple::_S_first_size >= __return_size) 935 return __intrin_bitcast<__vector_type_t<_Tp, __return_size>>( 936 __as_vector(__x.first)); 937 938 // recurse to skip unused data members at the beginning of _SimdTuple 939 else if constexpr (__values_to_skip >= _Tuple::_S_first_size) 940 { // recurse 941 if constexpr (_Tuple::_S_first_size % __values_per_part == 0) 942 { 943 constexpr int __parts_in_first 944 = _Tuple::_S_first_size / __values_per_part; 945 return __extract_part<_Index - __parts_in_first, 946 _Total - __parts_in_first, _Combine>( 947 __x.second); 948 } 949 else 950 return __extract_part<__values_to_skip - _Tuple::_S_first_size, 951 _Np - _Tuple::_S_first_size, __return_size>( 952 __x.second); 953 } 954 955 // extract from multiple _SimdTuple data members 956 else if constexpr (__return_size > _Tuple::_S_first_size - __values_to_skip) 957 { 958 #ifdef _GLIBCXX_SIMD_USE_ALIASING_LOADS 959 const __may_alias<_Tp>* const element_ptr 960 = reinterpret_cast<const __may_alias<_Tp>*>(&__x) + __values_to_skip; 961 return __as_vector(simd<_Tp, _RetAbi>(element_ptr, element_aligned)); 962 #else 963 [[maybe_unused]] constexpr size_t __offset = __values_to_skip; 964 return __as_vector(simd<_Tp, _RetAbi>( 965 [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { 966 constexpr _SizeConstant<__i + __offset> __k; 967 return __x[__k]; 968 })); 969 #endif 970 } 971 972 // all of the return values are in __x.first 973 else if constexpr (_Tuple::_S_first_size % __values_per_part == 0) 974 return __extract_part<_Index, _Tuple::_S_first_size / __values_per_part, 975 _Combine>(__x.first); 976 else 977 return __extract_part<__values_to_skip, _Tuple::_S_first_size, 978 _Combine * __values_per_part>(__x.first); 979 } 980 981 // }}} 982 // __fixed_size_storage_t<_Tp, _Np>{{{ 983 template <typename _Tp, int _Np, typename _Tuple, 984 typename _Next = simd<_Tp, _AllNativeAbis::_BestAbi<_Tp, _Np>>, 985 int _Remain = _Np - int(_Next::size())> 986 struct __fixed_size_storage_builder; 987 988 template <typename _Tp, int _Np> 989 struct __fixed_size_storage 990 : public __fixed_size_storage_builder<_Tp, _Np, _SimdTuple<_Tp>> {}; 991 992 template <typename _Tp, int _Np, typename... _As, typename _Next> 993 struct __fixed_size_storage_builder<_Tp, _Np, _SimdTuple<_Tp, _As...>, _Next, 994 0> 995 { using type = _SimdTuple<_Tp, _As..., typename _Next::abi_type>; }; 996 997 template <typename _Tp, int _Np, typename... _As, typename _Next, int _Remain> 998 struct __fixed_size_storage_builder<_Tp, _Np, _SimdTuple<_Tp, _As...>, _Next, 999 _Remain> 1000 { 1001 using type = typename __fixed_size_storage_builder< 1002 _Tp, _Remain, _SimdTuple<_Tp, _As..., typename _Next::abi_type>>::type; 1003 }; 1004 1005 // }}} 1006 // __autocvt_to_simd {{{ 1007 template <typename _Tp, bool = is_arithmetic_v<__remove_cvref_t<_Tp>>> 1008 struct __autocvt_to_simd 1009 { 1010 _Tp _M_data; 1011 using _TT = __remove_cvref_t<_Tp>; 1012 1013 _GLIBCXX_SIMD_INTRINSIC constexpr 1014 operator _TT() 1015 { return _M_data; } 1016 1017 _GLIBCXX_SIMD_INTRINSIC constexpr 1018 operator _TT&() 1019 { 1020 static_assert(is_lvalue_reference<_Tp>::value, ""); 1021 static_assert(!is_const<_Tp>::value, ""); 1022 return _M_data; 1023 } 1024 1025 _GLIBCXX_SIMD_INTRINSIC constexpr 1026 operator _TT*() 1027 { 1028 static_assert(is_lvalue_reference<_Tp>::value, ""); 1029 static_assert(!is_const<_Tp>::value, ""); 1030 return &_M_data; 1031 } 1032 1033 _GLIBCXX_SIMD_INTRINSIC constexpr 1034 __autocvt_to_simd(_Tp dd) : _M_data(dd) {} 1035 1036 template <typename _Abi> 1037 _GLIBCXX_SIMD_INTRINSIC constexpr 1038 operator simd<typename _TT::value_type, _Abi>() 1039 { return {__private_init, _M_data}; } 1040 1041 template <typename _Abi> 1042 _GLIBCXX_SIMD_INTRINSIC constexpr 1043 operator simd<typename _TT::value_type, _Abi>&() 1044 { return *reinterpret_cast<simd<typename _TT::value_type, _Abi>*>(&_M_data); } 1045 1046 template <typename _Abi> 1047 _GLIBCXX_SIMD_INTRINSIC constexpr 1048 operator simd<typename _TT::value_type, _Abi>*() 1049 { return reinterpret_cast<simd<typename _TT::value_type, _Abi>*>(&_M_data); } 1050 }; 1051 1052 template <typename _Tp> 1053 __autocvt_to_simd(_Tp &&) -> __autocvt_to_simd<_Tp>; 1054 1055 template <typename _Tp> 1056 struct __autocvt_to_simd<_Tp, true> 1057 { 1058 using _TT = __remove_cvref_t<_Tp>; 1059 _Tp _M_data; 1060 fixed_size_simd<_TT, 1> _M_fd; 1061 1062 _GLIBCXX_SIMD_INTRINSIC 1063 constexpr __autocvt_to_simd(_Tp dd) : _M_data(dd), _M_fd(_M_data) {} 1064 1065 _GLIBCXX_SIMD_INTRINSIC 1066 ~__autocvt_to_simd() 1067 { _M_data = __data(_M_fd).first; } 1068 1069 _GLIBCXX_SIMD_INTRINSIC constexpr 1070 operator fixed_size_simd<_TT, 1>() 1071 { return _M_fd; } 1072 1073 _GLIBCXX_SIMD_INTRINSIC constexpr 1074 operator fixed_size_simd<_TT, 1> &() 1075 { 1076 static_assert(is_lvalue_reference<_Tp>::value, ""); 1077 static_assert(!is_const<_Tp>::value, ""); 1078 return _M_fd; 1079 } 1080 1081 _GLIBCXX_SIMD_INTRINSIC constexpr 1082 operator fixed_size_simd<_TT, 1> *() 1083 { 1084 static_assert(is_lvalue_reference<_Tp>::value, ""); 1085 static_assert(!is_const<_Tp>::value, ""); 1086 return &_M_fd; 1087 } 1088 }; 1089 1090 // }}} 1091 1092 struct _CommonImplFixedSize; 1093 template <int _Np, typename = __detail::__odr_helper> struct _SimdImplFixedSize; 1094 template <int _Np, typename = __detail::__odr_helper> struct _MaskImplFixedSize; 1095 // simd_abi::_Fixed {{{ 1096 template <int _Np> 1097 struct simd_abi::_Fixed 1098 { 1099 template <typename _Tp> static constexpr size_t _S_size = _Np; 1100 template <typename _Tp> static constexpr size_t _S_full_size = _Np; 1101 // validity traits {{{ 1102 struct _IsValidAbiTag : public __bool_constant<(_Np > 0)> {}; 1103 1104 template <typename _Tp> 1105 struct _IsValidSizeFor 1106 : __bool_constant<(_Np <= simd_abi::max_fixed_size<_Tp>)> {}; 1107 1108 template <typename _Tp> 1109 struct _IsValid : conjunction<_IsValidAbiTag, __is_vectorizable<_Tp>, 1110 _IsValidSizeFor<_Tp>> {}; 1111 1112 template <typename _Tp> 1113 static constexpr bool _S_is_valid_v = _IsValid<_Tp>::value; 1114 1115 // }}} 1116 // _S_masked {{{ 1117 _GLIBCXX_SIMD_INTRINSIC static constexpr _SanitizedBitMask<_Np> 1118 _S_masked(_BitMask<_Np> __x) 1119 { return __x._M_sanitized(); } 1120 1121 _GLIBCXX_SIMD_INTRINSIC static constexpr _SanitizedBitMask<_Np> 1122 _S_masked(_SanitizedBitMask<_Np> __x) 1123 { return __x; } 1124 1125 // }}} 1126 // _*Impl {{{ 1127 using _CommonImpl = _CommonImplFixedSize; 1128 using _SimdImpl = _SimdImplFixedSize<_Np>; 1129 using _MaskImpl = _MaskImplFixedSize<_Np>; 1130 1131 // }}} 1132 // __traits {{{ 1133 template <typename _Tp, bool = _S_is_valid_v<_Tp>> 1134 struct __traits : _InvalidTraits {}; 1135 1136 template <typename _Tp> 1137 struct __traits<_Tp, true> 1138 { 1139 using _IsValid = true_type; 1140 using _SimdImpl = _SimdImplFixedSize<_Np>; 1141 using _MaskImpl = _MaskImplFixedSize<_Np>; 1142 1143 // simd and simd_mask member types {{{ 1144 using _SimdMember = __fixed_size_storage_t<_Tp, _Np>; 1145 using _MaskMember = _SanitizedBitMask<_Np>; 1146 1147 static constexpr size_t _S_simd_align 1148 = std::__bit_ceil(_Np * sizeof(_Tp)); 1149 1150 static constexpr size_t _S_mask_align = alignof(_MaskMember); 1151 1152 // }}} 1153 // _SimdBase / base class for simd, providing extra conversions {{{ 1154 struct _SimdBase 1155 { 1156 // The following ensures, function arguments are passed via the stack. 1157 // This is important for ABI compatibility across TU boundaries 1158 _GLIBCXX_SIMD_ALWAYS_INLINE constexpr 1159 _SimdBase(const _SimdBase&) {} 1160 1161 _SimdBase() = default; 1162 1163 _GLIBCXX_SIMD_ALWAYS_INLINE constexpr explicit 1164 operator const _SimdMember &() const 1165 { return static_cast<const simd<_Tp, _Fixed>*>(this)->_M_data; } 1166 1167 _GLIBCXX_SIMD_ALWAYS_INLINE constexpr explicit 1168 operator array<_Tp, _Np>() const 1169 { 1170 array<_Tp, _Np> __r; 1171 // _SimdMember can be larger because of higher alignment 1172 static_assert(sizeof(__r) <= sizeof(_SimdMember), ""); 1173 __builtin_memcpy(__r.data(), &static_cast<const _SimdMember&>(*this), 1174 sizeof(__r)); 1175 return __r; 1176 } 1177 }; 1178 1179 // }}} 1180 // _MaskBase {{{ 1181 // empty. The bitset interface suffices 1182 struct _MaskBase {}; 1183 1184 // }}} 1185 // _SimdCastType {{{ 1186 struct _SimdCastType 1187 { 1188 _GLIBCXX_SIMD_ALWAYS_INLINE constexpr 1189 _SimdCastType(const array<_Tp, _Np>&); 1190 1191 _GLIBCXX_SIMD_ALWAYS_INLINE constexpr 1192 _SimdCastType(const _SimdMember& dd) : _M_data(dd) {} 1193 1194 _GLIBCXX_SIMD_ALWAYS_INLINE constexpr explicit 1195 operator const _SimdMember &() const { return _M_data; } 1196 1197 private: 1198 const _SimdMember& _M_data; 1199 }; 1200 1201 // }}} 1202 // _MaskCastType {{{ 1203 class _MaskCastType 1204 { 1205 _MaskCastType() = delete; 1206 }; 1207 // }}} 1208 }; 1209 // }}} 1210 }; 1211 1212 // }}} 1213 // _CommonImplFixedSize {{{ 1214 struct _CommonImplFixedSize 1215 { 1216 // _S_store {{{ 1217 template <typename _Tp, typename... _As> 1218 _GLIBCXX_SIMD_INTRINSIC static void 1219 _S_store(const _SimdTuple<_Tp, _As...>& __x, void* __addr) 1220 { 1221 constexpr size_t _Np = _SimdTuple<_Tp, _As...>::_S_size(); 1222 __builtin_memcpy(__addr, &__x, _Np * sizeof(_Tp)); 1223 } 1224 1225 // }}} 1226 }; 1227 1228 // }}} 1229 // _SimdImplFixedSize {{{1 1230 // fixed_size should not inherit from _SimdMathFallback in order for 1231 // specializations in the used _SimdTuple Abis to get used 1232 template <int _Np, typename> 1233 struct _SimdImplFixedSize 1234 { 1235 // member types {{{2 1236 using _MaskMember = _SanitizedBitMask<_Np>; 1237 1238 template <typename _Tp> 1239 using _SimdMember = __fixed_size_storage_t<_Tp, _Np>; 1240 1241 template <typename _Tp> 1242 static constexpr size_t _S_tuple_size = _SimdMember<_Tp>::_S_tuple_size; 1243 1244 template <typename _Tp> 1245 using _Simd = simd<_Tp, simd_abi::fixed_size<_Np>>; 1246 1247 template <typename _Tp> 1248 using _TypeTag = _Tp*; 1249 1250 // broadcast {{{2 1251 template <typename _Tp> 1252 _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdMember<_Tp> 1253 _S_broadcast(_Tp __x) noexcept 1254 { 1255 return _SimdMember<_Tp>::_S_generate( 1256 [&](auto __meta) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { 1257 return __meta._S_broadcast(__x); 1258 }); 1259 } 1260 1261 // _S_generator {{{2 1262 template <typename _Fp, typename _Tp> 1263 _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdMember<_Tp> 1264 _S_generator(_Fp&& __gen, _TypeTag<_Tp>) 1265 { 1266 return _SimdMember<_Tp>::_S_generate( 1267 [&__gen](auto __meta) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { 1268 return __meta._S_generator( 1269 [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { 1270 return __i < _Np ? __gen(_SizeConstant<__meta._S_offset + __i>()) 1271 : 0; 1272 }, 1273 _TypeTag<_Tp>()); 1274 }); 1275 } 1276 1277 // _S_load {{{2 1278 template <typename _Tp, typename _Up> 1279 _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdMember<_Tp> 1280 _S_load(const _Up* __mem, _TypeTag<_Tp>) noexcept 1281 { 1282 return _SimdMember<_Tp>::_S_generate( 1283 [&](auto __meta) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { 1284 return __meta._S_load(&__mem[__meta._S_offset], _TypeTag<_Tp>()); 1285 }); 1286 } 1287 1288 // _S_masked_load {{{2 1289 template <typename _Tp, typename... _As, typename _Up> 1290 _GLIBCXX_SIMD_INTRINSIC static _SimdTuple<_Tp, _As...> 1291 _S_masked_load(const _SimdTuple<_Tp, _As...>& __old, 1292 const _MaskMember __bits, const _Up* __mem) noexcept 1293 { 1294 auto __merge = __old; 1295 __for_each(__merge, [&](auto __meta, auto& __native) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { 1296 if (__meta._S_submask(__bits).any()) 1297 #pragma GCC diagnostic push 1298 // Dereferencing __mem + __meta._S_offset could be UB ([expr.add]/4.3). 1299 // It is the responsibility of the caller of the masked load (via the mask's value) to 1300 // avoid UB. Consequently, the compiler may assume this branch is unreachable, if the 1301 // pointer arithmetic is UB. 1302 #pragma GCC diagnostic ignored "-Warray-bounds" 1303 __native 1304 = __meta._S_masked_load(__native, __meta._S_make_mask(__bits), 1305 __mem + __meta._S_offset); 1306 #pragma GCC diagnostic pop 1307 }); 1308 return __merge; 1309 } 1310 1311 // _S_store {{{2 1312 template <typename _Tp, typename _Up> 1313 _GLIBCXX_SIMD_INTRINSIC static constexpr void 1314 _S_store(const _SimdMember<_Tp>& __v, _Up* __mem, _TypeTag<_Tp>) noexcept 1315 { 1316 __for_each(__v, [&](auto __meta, auto __native) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { 1317 __meta._S_store(__native, &__mem[__meta._S_offset], _TypeTag<_Tp>()); 1318 }); 1319 } 1320 1321 // _S_masked_store {{{2 1322 template <typename _Tp, typename... _As, typename _Up> 1323 _GLIBCXX_SIMD_INTRINSIC static void 1324 _S_masked_store(const _SimdTuple<_Tp, _As...>& __v, _Up* __mem, 1325 const _MaskMember __bits) noexcept 1326 { 1327 __for_each(__v, [&](auto __meta, auto __native) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { 1328 if (__meta._S_submask(__bits).any()) 1329 #pragma GCC diagnostic push 1330 // __mem + __mem._S_offset could be UB ([expr.add]/4.3, but it punts 1331 // the responsibility for avoiding UB to the caller of the masked 1332 // store via the mask. Consequently, the compiler may assume this 1333 // branch is unreachable, if the pointer arithmetic is UB. 1334 #pragma GCC diagnostic ignored "-Warray-bounds" 1335 __meta._S_masked_store(__native, __mem + __meta._S_offset, 1336 __meta._S_make_mask(__bits)); 1337 #pragma GCC diagnostic pop 1338 }); 1339 } 1340 1341 // negation {{{2 1342 template <typename _Tp, typename... _As> 1343 static constexpr inline _MaskMember 1344 _S_negate(const _SimdTuple<_Tp, _As...>& __x) noexcept 1345 { 1346 _MaskMember __bits = 0; 1347 __for_each( 1348 __x, [&__bits](auto __meta, auto __native) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { 1349 __bits 1350 |= __meta._S_mask_to_shifted_ullong(__meta._S_negate(__native)); 1351 }); 1352 return __bits; 1353 } 1354 1355 // reductions {{{2 1356 template <typename _Tp, typename _BinaryOperation> 1357 static constexpr inline _Tp _S_reduce(const _Simd<_Tp>& __x, 1358 const _BinaryOperation& __binary_op) 1359 { 1360 using _Tup = _SimdMember<_Tp>; 1361 const _Tup& __tup = __data(__x); 1362 if constexpr (_Tup::_S_tuple_size == 1) 1363 return _Tup::_FirstAbi::_SimdImpl::_S_reduce( 1364 __tup.template _M_simd_at<0>(), __binary_op); 1365 else if constexpr (_Tup::_S_tuple_size == 2 && _Tup::_S_size() > 2 1366 && _Tup::_SecondType::_S_size() == 1) 1367 { 1368 return __binary_op(simd<_Tp, simd_abi::scalar>( 1369 reduce(__tup.template _M_simd_at<0>(), 1370 __binary_op)), 1371 __tup.template _M_simd_at<1>())[0]; 1372 } 1373 else if constexpr (_Tup::_S_tuple_size == 2 && _Tup::_S_size() > 4 1374 && _Tup::_SecondType::_S_size() == 2) 1375 { 1376 return __binary_op( 1377 simd<_Tp, simd_abi::scalar>( 1378 reduce(__tup.template _M_simd_at<0>(), __binary_op)), 1379 simd<_Tp, simd_abi::scalar>( 1380 reduce(__tup.template _M_simd_at<1>(), __binary_op)))[0]; 1381 } 1382 else 1383 { 1384 const auto& __x2 = __call_with_n_evaluations< 1385 __div_roundup(_Tup::_S_tuple_size, 2)>( 1386 [](auto __first_simd, auto... __remaining) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { 1387 if constexpr (sizeof...(__remaining) == 0) 1388 return __first_simd; 1389 else 1390 { 1391 using _Tup2 1392 = _SimdTuple<_Tp, 1393 typename decltype(__first_simd)::abi_type, 1394 typename decltype(__remaining)::abi_type...>; 1395 return fixed_size_simd<_Tp, _Tup2::_S_size()>( 1396 __private_init, 1397 __make_simd_tuple(__first_simd, __remaining...)); 1398 } 1399 }, 1400 [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { 1401 auto __left = __tup.template _M_simd_at<2 * __i>(); 1402 if constexpr (2 * __i + 1 == _Tup::_S_tuple_size) 1403 return __left; 1404 else 1405 { 1406 auto __right = __tup.template _M_simd_at<2 * __i + 1>(); 1407 using _LT = decltype(__left); 1408 using _RT = decltype(__right); 1409 if constexpr (_LT::size() == _RT::size()) 1410 return __binary_op(__left, __right); 1411 else 1412 { 1413 _GLIBCXX_SIMD_USE_CONSTEXPR_API 1414 typename _LT::mask_type __k( 1415 __private_init, 1416 [](auto __j) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { 1417 return __j < _RT::size(); 1418 }); 1419 _LT __ext_right = __left; 1420 where(__k, __ext_right) 1421 = __proposed::resizing_simd_cast<_LT>(__right); 1422 where(__k, __left) = __binary_op(__left, __ext_right); 1423 return __left; 1424 } 1425 } 1426 }); 1427 return reduce(__x2, __binary_op); 1428 } 1429 } 1430 1431 // _S_min, _S_max {{{2 1432 template <typename _Tp, typename... _As> 1433 _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdTuple<_Tp, _As...> 1434 _S_min(const _SimdTuple<_Tp, _As...>& __a, const _SimdTuple<_Tp, _As...>& __b) 1435 { 1436 return __a._M_apply_per_chunk( 1437 [](auto __impl, auto __aa, auto __bb) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { 1438 return __impl._S_min(__aa, __bb); 1439 }, 1440 __b); 1441 } 1442 1443 template <typename _Tp, typename... _As> 1444 _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdTuple<_Tp, _As...> 1445 _S_max(const _SimdTuple<_Tp, _As...>& __a, const _SimdTuple<_Tp, _As...>& __b) 1446 { 1447 return __a._M_apply_per_chunk( 1448 [](auto __impl, auto __aa, auto __bb) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { 1449 return __impl._S_max(__aa, __bb); 1450 }, 1451 __b); 1452 } 1453 1454 // _S_complement {{{2 1455 template <typename _Tp, typename... _As> 1456 _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdTuple<_Tp, _As...> 1457 _S_complement(const _SimdTuple<_Tp, _As...>& __x) noexcept 1458 { 1459 return __x._M_apply_per_chunk( 1460 [](auto __impl, auto __xx) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { 1461 return __impl._S_complement(__xx); 1462 }); 1463 } 1464 1465 // _S_unary_minus {{{2 1466 template <typename _Tp, typename... _As> 1467 _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdTuple<_Tp, _As...> 1468 _S_unary_minus(const _SimdTuple<_Tp, _As...>& __x) noexcept 1469 { 1470 return __x._M_apply_per_chunk( 1471 [](auto __impl, auto __xx) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { 1472 return __impl._S_unary_minus(__xx); 1473 }); 1474 } 1475 1476 // arithmetic operators {{{2 1477 1478 #define _GLIBCXX_SIMD_FIXED_OP(name_, op_) \ 1479 template <typename _Tp, typename... _As> \ 1480 _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdTuple<_Tp, _As...> name_( \ 1481 const _SimdTuple<_Tp, _As...>& __x, const _SimdTuple<_Tp, _As...>& __y) \ 1482 { \ 1483 return __x._M_apply_per_chunk( \ 1484 [](auto __impl, auto __xx, auto __yy) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { \ 1485 return __impl.name_(__xx, __yy); \ 1486 }, \ 1487 __y); \ 1488 } 1489 1490 _GLIBCXX_SIMD_FIXED_OP(_S_plus, +) 1491 _GLIBCXX_SIMD_FIXED_OP(_S_minus, -) 1492 _GLIBCXX_SIMD_FIXED_OP(_S_multiplies, *) 1493 _GLIBCXX_SIMD_FIXED_OP(_S_divides, /) 1494 _GLIBCXX_SIMD_FIXED_OP(_S_modulus, %) 1495 _GLIBCXX_SIMD_FIXED_OP(_S_bit_and, &) 1496 _GLIBCXX_SIMD_FIXED_OP(_S_bit_or, |) 1497 _GLIBCXX_SIMD_FIXED_OP(_S_bit_xor, ^) 1498 _GLIBCXX_SIMD_FIXED_OP(_S_bit_shift_left, <<) 1499 _GLIBCXX_SIMD_FIXED_OP(_S_bit_shift_right, >>) 1500 #undef _GLIBCXX_SIMD_FIXED_OP 1501 1502 template <typename _Tp, typename... _As> 1503 _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdTuple<_Tp, _As...> 1504 _S_bit_shift_left(const _SimdTuple<_Tp, _As...>& __x, int __y) 1505 { 1506 return __x._M_apply_per_chunk( 1507 [__y](auto __impl, auto __xx) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { 1508 return __impl._S_bit_shift_left(__xx, __y); 1509 }); 1510 } 1511 1512 template <typename _Tp, typename... _As> 1513 _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdTuple<_Tp, _As...> 1514 _S_bit_shift_right(const _SimdTuple<_Tp, _As...>& __x, int __y) 1515 { 1516 return __x._M_apply_per_chunk( 1517 [__y](auto __impl, auto __xx) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { 1518 return __impl._S_bit_shift_right(__xx, __y); 1519 }); 1520 } 1521 1522 // math {{{2 1523 #define _GLIBCXX_SIMD_APPLY_ON_TUPLE(_RetTp, __name) \ 1524 template <typename _Tp, typename... _As, typename... _More> \ 1525 static inline __fixed_size_storage_t<_RetTp, _Np> \ 1526 _S_##__name(const _SimdTuple<_Tp, _As...>& __x, \ 1527 const _More&... __more) \ 1528 { \ 1529 if constexpr (sizeof...(_More) == 0) \ 1530 { \ 1531 if constexpr (is_same_v<_Tp, _RetTp>) \ 1532 return __x._M_apply_per_chunk( \ 1533 [](auto __impl, auto __xx) \ 1534 constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA \ 1535 { \ 1536 using _V = typename decltype(__impl)::simd_type; \ 1537 return __data(__name(_V(__private_init, __xx))); \ 1538 }); \ 1539 else \ 1540 return __optimize_simd_tuple( \ 1541 __x.template _M_apply_r<_RetTp>( \ 1542 [](auto __impl, auto __xx) \ 1543 _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA \ 1544 { return __impl._S_##__name(__xx); })); \ 1545 } \ 1546 else if constexpr ( \ 1547 is_same_v< \ 1548 _Tp, \ 1549 _RetTp> && (... && is_same_v<_SimdTuple<_Tp, _As...>, _More>) ) \ 1550 return __x._M_apply_per_chunk( \ 1551 [](auto __impl, auto __xx, auto... __pack) \ 1552 constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA \ 1553 { \ 1554 using _V = typename decltype(__impl)::simd_type; \ 1555 return __data(__name(_V(__private_init, __xx), \ 1556 _V(__private_init, __pack)...)); \ 1557 }, __more...); \ 1558 else if constexpr (is_same_v<_Tp, _RetTp>) \ 1559 return __x._M_apply_per_chunk( \ 1560 [](auto __impl, auto __xx, auto... __pack) \ 1561 constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA \ 1562 { \ 1563 using _V = typename decltype(__impl)::simd_type; \ 1564 return __data(__name(_V(__private_init, __xx), \ 1565 __autocvt_to_simd(__pack)...)); \ 1566 }, __more...); \ 1567 else \ 1568 __assert_unreachable<_Tp>(); \ 1569 } 1570 1571 _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, acos) 1572 _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, asin) 1573 _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, atan) 1574 _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, atan2) 1575 _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, cos) 1576 _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, sin) 1577 _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, tan) 1578 _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, acosh) 1579 _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, asinh) 1580 _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, atanh) 1581 _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, cosh) 1582 _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, sinh) 1583 _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, tanh) 1584 _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, exp) 1585 _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, exp2) 1586 _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, expm1) 1587 _GLIBCXX_SIMD_APPLY_ON_TUPLE(int, ilogb) 1588 _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, log) 1589 _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, log10) 1590 _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, log1p) 1591 _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, log2) 1592 _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, logb) 1593 // modf implemented in simd_math.h 1594 _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, 1595 scalbn) // double scalbn(double x, int exp); 1596 _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, scalbln) 1597 _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, cbrt) 1598 _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, abs) 1599 _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, fabs) 1600 _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, pow) 1601 _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, sqrt) 1602 _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, erf) 1603 _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, erfc) 1604 _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, lgamma) 1605 _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, tgamma) 1606 _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, trunc) 1607 _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, ceil) 1608 _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, floor) 1609 _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, nearbyint) 1610 1611 _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, rint) 1612 _GLIBCXX_SIMD_APPLY_ON_TUPLE(long, lrint) 1613 _GLIBCXX_SIMD_APPLY_ON_TUPLE(long long, llrint) 1614 1615 _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, round) 1616 _GLIBCXX_SIMD_APPLY_ON_TUPLE(long, lround) 1617 _GLIBCXX_SIMD_APPLY_ON_TUPLE(long long, llround) 1618 1619 _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, ldexp) 1620 _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, fmod) 1621 _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, remainder) 1622 _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, copysign) 1623 _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, nextafter) 1624 _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, fdim) 1625 _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, fmax) 1626 _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, fmin) 1627 _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, fma) 1628 _GLIBCXX_SIMD_APPLY_ON_TUPLE(int, fpclassify) 1629 #undef _GLIBCXX_SIMD_APPLY_ON_TUPLE 1630 1631 template <typename _Tp, typename... _Abis> 1632 static inline _SimdTuple<_Tp, _Abis...> 1633 _S_remquo(const _SimdTuple<_Tp, _Abis...>& __x, const _SimdTuple<_Tp, _Abis...>& __y, 1634 __fixed_size_storage_t<int, _SimdTuple<_Tp, _Abis...>::_S_size()>* __z) 1635 { 1636 return __x._M_apply_per_chunk( 1637 [](auto __impl, const auto __xx, const auto __yy, auto& __zz) 1638 _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA 1639 { return __impl._S_remquo(__xx, __yy, &__zz); }, 1640 __y, *__z); 1641 } 1642 1643 template <typename _Tp, typename... _As> 1644 static inline _SimdTuple<_Tp, _As...> 1645 _S_frexp(const _SimdTuple<_Tp, _As...>& __x, 1646 __fixed_size_storage_t<int, _Np>& __exp) noexcept 1647 { 1648 return __x._M_apply_per_chunk( 1649 [](auto __impl, const auto& __a, auto& __b) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { 1650 return __data(frexp(typename decltype(__impl)::simd_type(__private_init, __a), 1651 __autocvt_to_simd(__b))); 1652 }, __exp); 1653 } 1654 1655 #define _GLIBCXX_SIMD_TEST_ON_TUPLE_(name_) \ 1656 template <typename _Tp, typename... _As> \ 1657 static inline _MaskMember \ 1658 _S_##name_(const _SimdTuple<_Tp, _As...>& __x) noexcept \ 1659 { \ 1660 return _M_test([] (auto __impl, auto __xx) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { \ 1661 return __impl._S_##name_(__xx); \ 1662 }, __x); \ 1663 } 1664 1665 _GLIBCXX_SIMD_TEST_ON_TUPLE_(isinf) 1666 _GLIBCXX_SIMD_TEST_ON_TUPLE_(isfinite) 1667 _GLIBCXX_SIMD_TEST_ON_TUPLE_(isnan) 1668 _GLIBCXX_SIMD_TEST_ON_TUPLE_(isnormal) 1669 _GLIBCXX_SIMD_TEST_ON_TUPLE_(signbit) 1670 #undef _GLIBCXX_SIMD_TEST_ON_TUPLE_ 1671 1672 // _S_increment & _S_decrement{{{2 1673 template <typename... _Ts> 1674 _GLIBCXX_SIMD_INTRINSIC static constexpr void 1675 _S_increment(_SimdTuple<_Ts...>& __x) 1676 { 1677 __for_each( 1678 __x, [](auto __meta, auto& native) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { 1679 __meta._S_increment(native); 1680 }); 1681 } 1682 1683 template <typename... _Ts> 1684 _GLIBCXX_SIMD_INTRINSIC static constexpr void 1685 _S_decrement(_SimdTuple<_Ts...>& __x) 1686 { 1687 __for_each( 1688 __x, [](auto __meta, auto& native) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { 1689 __meta._S_decrement(native); 1690 }); 1691 } 1692 1693 // compares {{{2 1694 #define _GLIBCXX_SIMD_CMP_OPERATIONS(__cmp) \ 1695 template <typename _Tp, typename... _As> \ 1696 _GLIBCXX_SIMD_INTRINSIC static constexpr _MaskMember \ 1697 __cmp(const _SimdTuple<_Tp, _As...>& __x, \ 1698 const _SimdTuple<_Tp, _As...>& __y) \ 1699 { \ 1700 return _M_test([](auto __impl, auto __xx, auto __yy) \ 1701 constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA \ 1702 { return __impl.__cmp(__xx, __yy); }, \ 1703 __x, __y); \ 1704 } 1705 1706 _GLIBCXX_SIMD_CMP_OPERATIONS(_S_equal_to) 1707 _GLIBCXX_SIMD_CMP_OPERATIONS(_S_not_equal_to) 1708 _GLIBCXX_SIMD_CMP_OPERATIONS(_S_less) 1709 _GLIBCXX_SIMD_CMP_OPERATIONS(_S_less_equal) 1710 _GLIBCXX_SIMD_CMP_OPERATIONS(_S_isless) 1711 _GLIBCXX_SIMD_CMP_OPERATIONS(_S_islessequal) 1712 _GLIBCXX_SIMD_CMP_OPERATIONS(_S_isgreater) 1713 _GLIBCXX_SIMD_CMP_OPERATIONS(_S_isgreaterequal) 1714 _GLIBCXX_SIMD_CMP_OPERATIONS(_S_islessgreater) 1715 _GLIBCXX_SIMD_CMP_OPERATIONS(_S_isunordered) 1716 #undef _GLIBCXX_SIMD_CMP_OPERATIONS 1717 1718 // smart_reference access {{{2 1719 template <typename _Tp, typename... _As, typename _Up> 1720 _GLIBCXX_SIMD_INTRINSIC static constexpr void 1721 _S_set(_SimdTuple<_Tp, _As...>& __v, int __i, _Up&& __x) noexcept 1722 { __v._M_set(__i, static_cast<_Up&&>(__x)); } 1723 1724 // _S_masked_assign {{{2 1725 template <typename _Tp, typename... _As> 1726 _GLIBCXX_SIMD_INTRINSIC static constexpr void 1727 _S_masked_assign(const _MaskMember __bits, _SimdTuple<_Tp, _As...>& __lhs, 1728 const __type_identity_t<_SimdTuple<_Tp, _As...>>& __rhs) 1729 { 1730 __for_each(__lhs, __rhs, 1731 [&](auto __meta, auto& __native_lhs, auto __native_rhs) 1732 constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA 1733 { 1734 __meta._S_masked_assign(__meta._S_make_mask(__bits), __native_lhs, 1735 __native_rhs); 1736 }); 1737 } 1738 1739 // Optimization for the case where the RHS is a scalar. No need to broadcast 1740 // the scalar to a simd first. 1741 template <typename _Tp, typename... _As> 1742 _GLIBCXX_SIMD_INTRINSIC static constexpr void 1743 _S_masked_assign(const _MaskMember __bits, _SimdTuple<_Tp, _As...>& __lhs, 1744 const __type_identity_t<_Tp> __rhs) 1745 { 1746 __for_each( 1747 __lhs, [&](auto __meta, auto& __native_lhs) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { 1748 __meta._S_masked_assign(__meta._S_make_mask(__bits), __native_lhs, 1749 __rhs); 1750 }); 1751 } 1752 1753 // _S_masked_cassign {{{2 1754 template <typename _Op, typename _Tp, typename... _As> 1755 static constexpr inline void 1756 _S_masked_cassign(const _MaskMember __bits, _SimdTuple<_Tp, _As...>& __lhs, 1757 const _SimdTuple<_Tp, _As...>& __rhs, _Op __op) 1758 { 1759 __for_each(__lhs, __rhs, 1760 [&](auto __meta, auto& __native_lhs, auto __native_rhs) 1761 constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA 1762 { 1763 __meta.template _S_masked_cassign(__meta._S_make_mask(__bits), 1764 __native_lhs, __native_rhs, __op); 1765 }); 1766 } 1767 1768 // Optimization for the case where the RHS is a scalar. No need to broadcast 1769 // the scalar to a simd first. 1770 template <typename _Op, typename _Tp, typename... _As> 1771 static constexpr inline void 1772 _S_masked_cassign(const _MaskMember __bits, _SimdTuple<_Tp, _As...>& __lhs, 1773 const _Tp& __rhs, _Op __op) 1774 { 1775 __for_each( 1776 __lhs, [&](auto __meta, auto& __native_lhs) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { 1777 __meta.template _S_masked_cassign(__meta._S_make_mask(__bits), 1778 __native_lhs, __rhs, __op); 1779 }); 1780 } 1781 1782 // _S_masked_unary {{{2 1783 template <template <typename> class _Op, typename _Tp, typename... _As> 1784 static constexpr inline _SimdTuple<_Tp, _As...> 1785 _S_masked_unary(const _MaskMember __bits, const _SimdTuple<_Tp, _As...>& __v) 1786 { 1787 return __v._M_apply_wrapped([&__bits](auto __meta, 1788 auto __native) constexpr { 1789 return __meta.template _S_masked_unary<_Op>(__meta._S_make_mask( 1790 __bits), 1791 __native); 1792 }); 1793 } 1794 1795 // }}}2 1796 }; 1797 1798 // _MaskImplFixedSize {{{1 1799 template <int _Np, typename> 1800 struct _MaskImplFixedSize 1801 { 1802 static_assert( 1803 sizeof(_ULLong) * __CHAR_BIT__ >= _Np, 1804 "The fixed_size implementation relies on one _ULLong being able to store " 1805 "all boolean elements."); // required in load & store 1806 1807 // member types {{{ 1808 using _Abi = simd_abi::fixed_size<_Np>; 1809 1810 using _MaskMember = _SanitizedBitMask<_Np>; 1811 1812 template <typename _Tp> 1813 using _FirstAbi = typename __fixed_size_storage_t<_Tp, _Np>::_FirstAbi; 1814 1815 template <typename _Tp> 1816 using _TypeTag = _Tp*; 1817 1818 // }}} 1819 // _S_broadcast {{{ 1820 template <typename> 1821 _GLIBCXX_SIMD_INTRINSIC static constexpr _MaskMember 1822 _S_broadcast(bool __x) 1823 { return __x ? ~_MaskMember() : _MaskMember(); } 1824 1825 // }}} 1826 // _S_load {{{ 1827 template <typename> 1828 _GLIBCXX_SIMD_INTRINSIC static constexpr _MaskMember 1829 _S_load(const bool* __mem) 1830 { 1831 if (__builtin_is_constant_evaluated()) 1832 { 1833 _MaskMember __r{}; 1834 for (size_t __i = 0; __i < _Np; ++__i) 1835 __r.set(__i, __mem[__i]); 1836 return __r; 1837 } 1838 using _Ip = __int_for_sizeof_t<bool>; 1839 // the following load uses element_aligned and relies on __mem already 1840 // carrying alignment information from when this load function was 1841 // called. 1842 const simd<_Ip, _Abi> __bools(reinterpret_cast<const __may_alias<_Ip>*>( 1843 __mem), 1844 element_aligned); 1845 return __data(__bools != 0); 1846 } 1847 1848 // }}} 1849 // _S_to_bits {{{ 1850 template <bool _Sanitized> 1851 _GLIBCXX_SIMD_INTRINSIC static constexpr _SanitizedBitMask<_Np> 1852 _S_to_bits(_BitMask<_Np, _Sanitized> __x) 1853 { 1854 if constexpr (_Sanitized) 1855 return __x; 1856 else 1857 return __x._M_sanitized(); 1858 } 1859 1860 // }}} 1861 // _S_convert {{{ 1862 template <typename _Tp, typename _Up, typename _UAbi> 1863 _GLIBCXX_SIMD_INTRINSIC static constexpr _MaskMember 1864 _S_convert(simd_mask<_Up, _UAbi> __x) 1865 { 1866 return _UAbi::_MaskImpl::_S_to_bits(__data(__x)) 1867 .template _M_extract<0, _Np>(); 1868 } 1869 1870 // }}} 1871 // _S_from_bitmask {{{2 1872 template <typename _Tp> 1873 _GLIBCXX_SIMD_INTRINSIC static constexpr _MaskMember 1874 _S_from_bitmask(_MaskMember __bits, _TypeTag<_Tp>) noexcept 1875 { return __bits; } 1876 1877 // _S_load {{{2 1878 static constexpr inline _MaskMember 1879 _S_load(const bool* __mem) noexcept 1880 { 1881 // TODO: _UChar is not necessarily the best type to use here. For smaller 1882 // _Np _UShort, _UInt, _ULLong, float, and double can be more efficient. 1883 _ULLong __r = 0; 1884 using _Vs = __fixed_size_storage_t<_UChar, _Np>; 1885 __for_each(_Vs{}, [&](auto __meta, auto) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { 1886 __r |= __meta._S_mask_to_shifted_ullong( 1887 __meta._S_mask_impl._S_load(&__mem[__meta._S_offset], 1888 _SizeConstant<__meta._S_size()>())); 1889 }); 1890 return __r; 1891 } 1892 1893 // _S_masked_load {{{2 1894 static constexpr inline _MaskMember 1895 _S_masked_load(_MaskMember __merge, _MaskMember __mask, const bool* __mem) noexcept 1896 { 1897 _BitOps::_S_bit_iteration(__mask.to_ullong(), 1898 [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { 1899 __merge.set(__i, __mem[__i]); 1900 }); 1901 return __merge; 1902 } 1903 1904 // _S_store {{{2 1905 static constexpr inline void 1906 _S_store(const _MaskMember __bitmask, bool* __mem) noexcept 1907 { 1908 if constexpr (_Np == 1) 1909 __mem[0] = __bitmask[0]; 1910 else 1911 _FirstAbi<_UChar>::_CommonImpl::_S_store_bool_array(__bitmask, __mem); 1912 } 1913 1914 // _S_masked_store {{{2 1915 static constexpr inline void 1916 _S_masked_store(const _MaskMember __v, bool* __mem, const _MaskMember __k) noexcept 1917 { 1918 _BitOps::_S_bit_iteration( 1919 __k, [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { __mem[__i] = __v[__i]; }); 1920 } 1921 1922 // logical and bitwise operators {{{2 1923 _GLIBCXX_SIMD_INTRINSIC static constexpr _MaskMember 1924 _S_logical_and(const _MaskMember& __x, const _MaskMember& __y) noexcept 1925 { return __x & __y; } 1926 1927 _GLIBCXX_SIMD_INTRINSIC static constexpr _MaskMember 1928 _S_logical_or(const _MaskMember& __x, const _MaskMember& __y) noexcept 1929 { return __x | __y; } 1930 1931 _GLIBCXX_SIMD_INTRINSIC static constexpr _MaskMember 1932 _S_bit_not(const _MaskMember& __x) noexcept 1933 { return ~__x; } 1934 1935 _GLIBCXX_SIMD_INTRINSIC static constexpr _MaskMember 1936 _S_bit_and(const _MaskMember& __x, const _MaskMember& __y) noexcept 1937 { return __x & __y; } 1938 1939 _GLIBCXX_SIMD_INTRINSIC static constexpr _MaskMember 1940 _S_bit_or(const _MaskMember& __x, const _MaskMember& __y) noexcept 1941 { return __x | __y; } 1942 1943 _GLIBCXX_SIMD_INTRINSIC static constexpr _MaskMember 1944 _S_bit_xor(const _MaskMember& __x, const _MaskMember& __y) noexcept 1945 { return __x ^ __y; } 1946 1947 // smart_reference access {{{2 1948 _GLIBCXX_SIMD_INTRINSIC static constexpr void 1949 _S_set(_MaskMember& __k, int __i, bool __x) noexcept 1950 { __k.set(__i, __x); } 1951 1952 // _S_masked_assign {{{2 1953 _GLIBCXX_SIMD_INTRINSIC static constexpr void 1954 _S_masked_assign(const _MaskMember __k, _MaskMember& __lhs, const _MaskMember __rhs) 1955 { __lhs = (__lhs & ~__k) | (__rhs & __k); } 1956 1957 // Optimization for the case where the RHS is a scalar. 1958 _GLIBCXX_SIMD_INTRINSIC static constexpr void 1959 _S_masked_assign(const _MaskMember __k, _MaskMember& __lhs, const bool __rhs) 1960 { 1961 if (__rhs) 1962 __lhs |= __k; 1963 else 1964 __lhs &= ~__k; 1965 } 1966 1967 // }}}2 1968 // _S_all_of {{{ 1969 template <typename _Tp> 1970 _GLIBCXX_SIMD_INTRINSIC static constexpr bool 1971 _S_all_of(simd_mask<_Tp, _Abi> __k) 1972 { return __data(__k).all(); } 1973 1974 // }}} 1975 // _S_any_of {{{ 1976 template <typename _Tp> 1977 _GLIBCXX_SIMD_INTRINSIC static constexpr bool 1978 _S_any_of(simd_mask<_Tp, _Abi> __k) 1979 { return __data(__k).any(); } 1980 1981 // }}} 1982 // _S_none_of {{{ 1983 template <typename _Tp> 1984 _GLIBCXX_SIMD_INTRINSIC static constexpr bool 1985 _S_none_of(simd_mask<_Tp, _Abi> __k) 1986 { return __data(__k).none(); } 1987 1988 // }}} 1989 // _S_some_of {{{ 1990 template <typename _Tp> 1991 _GLIBCXX_SIMD_INTRINSIC static constexpr bool 1992 _S_some_of([[maybe_unused]] simd_mask<_Tp, _Abi> __k) 1993 { 1994 if constexpr (_Np == 1) 1995 return false; 1996 else 1997 return __data(__k).any() && !__data(__k).all(); 1998 } 1999 2000 // }}} 2001 // _S_popcount {{{ 2002 template <typename _Tp> 2003 _GLIBCXX_SIMD_INTRINSIC static constexpr int 2004 _S_popcount(simd_mask<_Tp, _Abi> __k) 2005 { return __data(__k).count(); } 2006 2007 // }}} 2008 // _S_find_first_set {{{ 2009 template <typename _Tp> 2010 _GLIBCXX_SIMD_INTRINSIC static constexpr int 2011 _S_find_first_set(simd_mask<_Tp, _Abi> __k) 2012 { return std::__countr_zero(__data(__k).to_ullong()); } 2013 2014 // }}} 2015 // _S_find_last_set {{{ 2016 template <typename _Tp> 2017 _GLIBCXX_SIMD_INTRINSIC static constexpr int 2018 _S_find_last_set(simd_mask<_Tp, _Abi> __k) 2019 { return std::__bit_width(__data(__k).to_ullong()) - 1; } 2020 2021 // }}} 2022 }; 2023 // }}}1 2024 2025 _GLIBCXX_SIMD_END_NAMESPACE 2026 #endif // __cplusplus >= 201703L 2027 #endif // _GLIBCXX_EXPERIMENTAL_SIMD_FIXED_SIZE_H_ 2028 2029 // vim: foldmethod=marker sw=2 noet ts=8 sts=2 tw=80 2030