xref: /netbsd-src/external/gpl3/gcc/dist/libstdc++-v3/include/experimental/bits/simd.h (revision d16b7486a53dcb8072b60ec6fcb4373a2d0c27b7)
1 // Definition of the public simd interfaces -*- C++ -*-
2 
3 // Copyright (C) 2020-2022 Free Software Foundation, Inc.
4 //
5 // This file is part of the GNU ISO C++ Library.  This library is free
6 // software; you can redistribute it and/or modify it under the
7 // terms of the GNU General Public License as published by the
8 // Free Software Foundation; either version 3, or (at your option)
9 // any later version.
10 
11 // This library is distributed in the hope that it will be useful,
12 // but WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14 // GNU General Public License for more details.
15 
16 // Under Section 7 of GPL version 3, you are granted additional
17 // permissions described in the GCC Runtime Library Exception, version
18 // 3.1, as published by the Free Software Foundation.
19 
20 // You should have received a copy of the GNU General Public License and
21 // a copy of the GCC Runtime Library Exception along with this program;
22 // see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
23 // <http://www.gnu.org/licenses/>.
24 
25 #ifndef _GLIBCXX_EXPERIMENTAL_SIMD_H
26 #define _GLIBCXX_EXPERIMENTAL_SIMD_H
27 
28 #if __cplusplus >= 201703L
29 
30 #include "simd_detail.h"
31 #include "numeric_traits.h"
32 #include <bit>
33 #include <bitset>
34 #ifdef _GLIBCXX_DEBUG_UB
35 #include <cstdio> // for stderr
36 #endif
37 #include <cstring>
38 #include <cmath>
39 #include <functional>
40 #include <iosfwd>
41 #include <utility>
42 
43 #if _GLIBCXX_SIMD_X86INTRIN
44 #include <x86intrin.h>
45 #elif _GLIBCXX_SIMD_HAVE_NEON
46 #include <arm_neon.h>
47 #endif
48 
49 /** @ingroup ts_simd
50  * @{
51  */
52 /* There are several closely related types, with the following naming
53  * convention:
54  * _Tp: vectorizable (arithmetic) type (or any type)
55  * _TV: __vector_type_t<_Tp, _Np>
56  * _TW: _SimdWrapper<_Tp, _Np>
57  * _TI: __intrinsic_type_t<_Tp, _Np>
58  * _TVT: _VectorTraits<_TV> or _VectorTraits<_TW>
59  * If one additional type is needed use _U instead of _T.
60  * Otherwise use _T\d, _TV\d, _TW\d, TI\d, _TVT\d.
61  *
62  * More naming conventions:
63  * _Ap or _Abi: An ABI tag from the simd_abi namespace
64  * _Ip: often used for integer types with sizeof(_Ip) == sizeof(_Tp),
65  *      _IV, _IW as for _TV, _TW
66  * _Np: number of elements (not bytes)
67  * _Bytes: number of bytes
68  *
69  * Variable names:
70  * __k: mask object (vector- or bitmask)
71  */
72 _GLIBCXX_SIMD_BEGIN_NAMESPACE
73 
74 #if !_GLIBCXX_SIMD_X86INTRIN
75 using __m128  [[__gnu__::__vector_size__(16)]] = float;
76 using __m128d [[__gnu__::__vector_size__(16)]] = double;
77 using __m128i [[__gnu__::__vector_size__(16)]] = long long;
78 using __m256  [[__gnu__::__vector_size__(32)]] = float;
79 using __m256d [[__gnu__::__vector_size__(32)]] = double;
80 using __m256i [[__gnu__::__vector_size__(32)]] = long long;
81 using __m512  [[__gnu__::__vector_size__(64)]] = float;
82 using __m512d [[__gnu__::__vector_size__(64)]] = double;
83 using __m512i [[__gnu__::__vector_size__(64)]] = long long;
84 #endif
85 
86 namespace simd_abi {
87 // simd_abi forward declarations {{{
88 // implementation details:
89 struct _Scalar;
90 
91 template <int _Np>
92   struct _Fixed;
93 
94 // There are two major ABIs that appear on different architectures.
95 // Both have non-boolean values packed into an N Byte register
96 // -> #elements = N / sizeof(T)
97 // Masks differ:
98 // 1. Use value vector registers for masks (all 0 or all 1)
99 // 2. Use bitmasks (mask registers) with one bit per value in the corresponding
100 //    value vector
101 //
102 // Both can be partially used, masking off the rest when doing horizontal
103 // operations or operations that can trap (e.g. FP_INVALID or integer division
104 // by 0). This is encoded as the number of used bytes.
105 template <int _UsedBytes>
106   struct _VecBuiltin;
107 
108 template <int _UsedBytes>
109   struct _VecBltnBtmsk;
110 
111 template <typename _Tp, int _Np>
112   using _VecN = _VecBuiltin<sizeof(_Tp) * _Np>;
113 
114 template <int _UsedBytes = 16>
115   using _Sse = _VecBuiltin<_UsedBytes>;
116 
117 template <int _UsedBytes = 32>
118   using _Avx = _VecBuiltin<_UsedBytes>;
119 
120 template <int _UsedBytes = 64>
121   using _Avx512 = _VecBltnBtmsk<_UsedBytes>;
122 
123 template <int _UsedBytes = 16>
124   using _Neon = _VecBuiltin<_UsedBytes>;
125 
126 // implementation-defined:
127 using __sse = _Sse<>;
128 using __avx = _Avx<>;
129 using __avx512 = _Avx512<>;
130 using __neon = _Neon<>;
131 using __neon128 = _Neon<16>;
132 using __neon64 = _Neon<8>;
133 
134 // standard:
135 template <typename _Tp, size_t _Np, typename...>
136   struct deduce;
137 
138 template <int _Np>
139   using fixed_size = _Fixed<_Np>;
140 
141 using scalar = _Scalar;
142 
143 // }}}
144 } // namespace simd_abi
145 // forward declarations is_simd(_mask), simd(_mask), simd_size {{{
146 template <typename _Tp>
147   struct is_simd;
148 
149 template <typename _Tp>
150   struct is_simd_mask;
151 
152 template <typename _Tp, typename _Abi>
153   class simd;
154 
155 template <typename _Tp, typename _Abi>
156   class simd_mask;
157 
158 template <typename _Tp, typename _Abi>
159   struct simd_size;
160 
161 // }}}
162 // load/store flags {{{
163 struct element_aligned_tag
164 {
165   template <typename _Tp, typename _Up = typename _Tp::value_type>
166     static constexpr size_t _S_alignment = alignof(_Up);
167 
168   template <typename _Tp, typename _Up>
169     _GLIBCXX_SIMD_INTRINSIC static constexpr _Up*
170     _S_apply(_Up* __ptr)
171     { return __ptr; }
172 };
173 
174 struct vector_aligned_tag
175 {
176   template <typename _Tp, typename _Up = typename _Tp::value_type>
177     static constexpr size_t _S_alignment
178       = std::__bit_ceil(sizeof(_Up) * _Tp::size());
179 
180   template <typename _Tp, typename _Up>
181     _GLIBCXX_SIMD_INTRINSIC static constexpr _Up*
182     _S_apply(_Up* __ptr)
183     {
184       return static_cast<_Up*>(
185 	__builtin_assume_aligned(__ptr, _S_alignment<_Tp, _Up>));
186     }
187 };
188 
189 template <size_t _Np> struct overaligned_tag
190 {
191   template <typename _Tp, typename _Up = typename _Tp::value_type>
192     static constexpr size_t _S_alignment = _Np;
193 
194   template <typename _Tp, typename _Up>
195     _GLIBCXX_SIMD_INTRINSIC static constexpr _Up*
196     _S_apply(_Up* __ptr)
197     { return static_cast<_Up*>(__builtin_assume_aligned(__ptr, _Np)); }
198 };
199 
200 inline constexpr element_aligned_tag element_aligned = {};
201 
202 inline constexpr vector_aligned_tag vector_aligned = {};
203 
204 template <size_t _Np>
205   inline constexpr overaligned_tag<_Np> overaligned = {};
206 
207 // }}}
208 template <size_t _Xp>
209   using _SizeConstant = integral_constant<size_t, _Xp>;
210 // constexpr feature detection{{{
211 constexpr inline bool __have_mmx = _GLIBCXX_SIMD_HAVE_MMX;
212 constexpr inline bool __have_sse = _GLIBCXX_SIMD_HAVE_SSE;
213 constexpr inline bool __have_sse2 = _GLIBCXX_SIMD_HAVE_SSE2;
214 constexpr inline bool __have_sse3 = _GLIBCXX_SIMD_HAVE_SSE3;
215 constexpr inline bool __have_ssse3 = _GLIBCXX_SIMD_HAVE_SSSE3;
216 constexpr inline bool __have_sse4_1 = _GLIBCXX_SIMD_HAVE_SSE4_1;
217 constexpr inline bool __have_sse4_2 = _GLIBCXX_SIMD_HAVE_SSE4_2;
218 constexpr inline bool __have_xop = _GLIBCXX_SIMD_HAVE_XOP;
219 constexpr inline bool __have_avx = _GLIBCXX_SIMD_HAVE_AVX;
220 constexpr inline bool __have_avx2 = _GLIBCXX_SIMD_HAVE_AVX2;
221 constexpr inline bool __have_bmi = _GLIBCXX_SIMD_HAVE_BMI1;
222 constexpr inline bool __have_bmi2 = _GLIBCXX_SIMD_HAVE_BMI2;
223 constexpr inline bool __have_lzcnt = _GLIBCXX_SIMD_HAVE_LZCNT;
224 constexpr inline bool __have_sse4a = _GLIBCXX_SIMD_HAVE_SSE4A;
225 constexpr inline bool __have_fma = _GLIBCXX_SIMD_HAVE_FMA;
226 constexpr inline bool __have_fma4 = _GLIBCXX_SIMD_HAVE_FMA4;
227 constexpr inline bool __have_f16c = _GLIBCXX_SIMD_HAVE_F16C;
228 constexpr inline bool __have_popcnt = _GLIBCXX_SIMD_HAVE_POPCNT;
229 constexpr inline bool __have_avx512f = _GLIBCXX_SIMD_HAVE_AVX512F;
230 constexpr inline bool __have_avx512dq = _GLIBCXX_SIMD_HAVE_AVX512DQ;
231 constexpr inline bool __have_avx512vl = _GLIBCXX_SIMD_HAVE_AVX512VL;
232 constexpr inline bool __have_avx512bw = _GLIBCXX_SIMD_HAVE_AVX512BW;
233 constexpr inline bool __have_avx512dq_vl = __have_avx512dq && __have_avx512vl;
234 constexpr inline bool __have_avx512bw_vl = __have_avx512bw && __have_avx512vl;
235 constexpr inline bool __have_avx512bitalg = _GLIBCXX_SIMD_HAVE_AVX512BITALG;
236 constexpr inline bool __have_avx512vbmi2 = _GLIBCXX_SIMD_HAVE_AVX512VBMI2;
237 constexpr inline bool __have_avx512vbmi = _GLIBCXX_SIMD_HAVE_AVX512VBMI;
238 constexpr inline bool __have_avx512ifma = _GLIBCXX_SIMD_HAVE_AVX512IFMA;
239 constexpr inline bool __have_avx512cd = _GLIBCXX_SIMD_HAVE_AVX512CD;
240 constexpr inline bool __have_avx512vnni = _GLIBCXX_SIMD_HAVE_AVX512VNNI;
241 constexpr inline bool __have_avx512vpopcntdq = _GLIBCXX_SIMD_HAVE_AVX512VPOPCNTDQ;
242 constexpr inline bool __have_avx512vp2intersect = _GLIBCXX_SIMD_HAVE_AVX512VP2INTERSECT;
243 
244 constexpr inline bool __have_neon = _GLIBCXX_SIMD_HAVE_NEON;
245 constexpr inline bool __have_neon_a32 = _GLIBCXX_SIMD_HAVE_NEON_A32;
246 constexpr inline bool __have_neon_a64 = _GLIBCXX_SIMD_HAVE_NEON_A64;
247 constexpr inline bool __support_neon_float =
248 #if defined __GCC_IEC_559
249   __GCC_IEC_559 == 0;
250 #elif defined __FAST_MATH__
251   true;
252 #else
253   false;
254 #endif
255 
256 #ifdef _ARCH_PWR10
257 constexpr inline bool __have_power10vec = true;
258 #else
259 constexpr inline bool __have_power10vec = false;
260 #endif
261 #ifdef __POWER9_VECTOR__
262 constexpr inline bool __have_power9vec = true;
263 #else
264 constexpr inline bool __have_power9vec = false;
265 #endif
266 #if defined __POWER8_VECTOR__
267 constexpr inline bool __have_power8vec = true;
268 #else
269 constexpr inline bool __have_power8vec = __have_power9vec;
270 #endif
271 #if defined __VSX__
272 constexpr inline bool __have_power_vsx = true;
273 #else
274 constexpr inline bool __have_power_vsx = __have_power8vec;
275 #endif
276 #if defined __ALTIVEC__
277 constexpr inline bool __have_power_vmx = true;
278 #else
279 constexpr inline bool __have_power_vmx = __have_power_vsx;
280 #endif
281 
282 // }}}
283 
284 namespace __detail
285 {
286 #ifdef math_errhandling
287   // Determines _S_handle_fpexcept from math_errhandling if it is defined and expands to a constant
288   // expression. math_errhandling may expand to an extern symbol, in which case a constexpr value
289   // must be guessed.
290   template <int = math_errhandling>
291     constexpr bool __handle_fpexcept_impl(int)
292     { return math_errhandling & MATH_ERREXCEPT; }
293 #endif
294 
295   // Fallback if math_errhandling doesn't work: with fast-math assume floating-point exceptions are
296   // ignored, otherwise implement correct exception behavior.
297   constexpr bool __handle_fpexcept_impl(float)
298   {
299 #if defined __FAST_MATH__
300     return false;
301 #else
302     return true;
303 #endif
304   }
305 
306   /// True if math functions must raise floating-point exceptions as specified by C17.
307   static constexpr bool _S_handle_fpexcept = __handle_fpexcept_impl(0);
308 
309   constexpr std::uint_least64_t
310   __floating_point_flags()
311   {
312     std::uint_least64_t __flags = 0;
313     if constexpr (_S_handle_fpexcept)
314       __flags |= 1;
315 #ifdef __FAST_MATH__
316     __flags |= 1 << 1;
317 #elif __FINITE_MATH_ONLY__
318     __flags |= 2 << 1;
319 #elif __GCC_IEC_559 < 2
320     __flags |= 3 << 1;
321 #endif
322     __flags |= (__FLT_EVAL_METHOD__ + 1) << 3;
323     return __flags;
324   }
325 
326   constexpr std::uint_least64_t
327   __machine_flags()
328   {
329     if constexpr (__have_mmx || __have_sse)
330       return __have_mmx
331 		 | (__have_sse                << 1)
332 		 | (__have_sse2               << 2)
333 		 | (__have_sse3               << 3)
334 		 | (__have_ssse3              << 4)
335 		 | (__have_sse4_1             << 5)
336 		 | (__have_sse4_2             << 6)
337 		 | (__have_xop                << 7)
338 		 | (__have_avx                << 8)
339 		 | (__have_avx2               << 9)
340 		 | (__have_bmi                << 10)
341 		 | (__have_bmi2               << 11)
342 		 | (__have_lzcnt              << 12)
343 		 | (__have_sse4a              << 13)
344 		 | (__have_fma                << 14)
345 		 | (__have_fma4               << 15)
346 		 | (__have_f16c               << 16)
347 		 | (__have_popcnt             << 17)
348 		 | (__have_avx512f            << 18)
349 		 | (__have_avx512dq           << 19)
350 		 | (__have_avx512vl           << 20)
351 		 | (__have_avx512bw           << 21)
352 		 | (__have_avx512bitalg       << 22)
353 		 | (__have_avx512vbmi2        << 23)
354 		 | (__have_avx512vbmi         << 24)
355 		 | (__have_avx512ifma         << 25)
356 		 | (__have_avx512cd           << 26)
357 		 | (__have_avx512vnni         << 27)
358 		 | (__have_avx512vpopcntdq    << 28)
359 		 | (__have_avx512vp2intersect << 29);
360     else if constexpr (__have_neon)
361       return __have_neon
362 	       | (__have_neon_a32 << 1)
363 	       | (__have_neon_a64 << 2)
364 	       | (__have_neon_a64 << 2)
365 	       | (__support_neon_float << 3);
366     else if constexpr (__have_power_vmx)
367       return __have_power_vmx
368 	       | (__have_power_vsx  << 1)
369 	       | (__have_power8vec  << 2)
370 	       | (__have_power9vec  << 3)
371 	       | (__have_power10vec << 4);
372     else
373       return 0;
374   }
375 
376   namespace
377   {
378     struct _OdrEnforcer {};
379   }
380 
381   template <std::uint_least64_t...>
382     struct _MachineFlagsTemplate {};
383 
384   /**@internal
385    * Use this type as default template argument to all function templates that
386    * are not declared always_inline. It ensures, that a function
387    * specialization, which the compiler decides not to inline, has a unique symbol
388    * (_OdrEnforcer) or a symbol matching the machine/architecture flags
389    * (_MachineFlagsTemplate). This helps to avoid ODR violations in cases where
390    * users link TUs compiled with different flags. This is especially important
391    * for using simd in libraries.
392    */
393   using __odr_helper
394     = conditional_t<__machine_flags() == 0, _OdrEnforcer,
395 		    _MachineFlagsTemplate<__machine_flags(), __floating_point_flags()>>;
396 
397   struct _Minimum
398   {
399     template <typename _Tp>
400       _GLIBCXX_SIMD_INTRINSIC constexpr
401       _Tp
402       operator()(_Tp __a, _Tp __b) const
403       {
404 	using std::min;
405 	return min(__a, __b);
406       }
407   };
408 
409   struct _Maximum
410   {
411     template <typename _Tp>
412       _GLIBCXX_SIMD_INTRINSIC constexpr
413       _Tp
414       operator()(_Tp __a, _Tp __b) const
415       {
416 	using std::max;
417 	return max(__a, __b);
418       }
419   };
420 } // namespace __detail
421 
422 // unrolled/pack execution helpers
423 // __execute_n_times{{{
424 template <typename _Fp, size_t... _I>
425   [[__gnu__::__flatten__]] _GLIBCXX_SIMD_INTRINSIC constexpr
426   void
427   __execute_on_index_sequence(_Fp&& __f, index_sequence<_I...>)
428   { ((void)__f(_SizeConstant<_I>()), ...); }
429 
430 template <typename _Fp>
431   _GLIBCXX_SIMD_INTRINSIC constexpr void
432   __execute_on_index_sequence(_Fp&&, index_sequence<>)
433   { }
434 
435 template <size_t _Np, typename _Fp>
436   _GLIBCXX_SIMD_INTRINSIC constexpr void
437   __execute_n_times(_Fp&& __f)
438   {
439     __execute_on_index_sequence(static_cast<_Fp&&>(__f),
440 				make_index_sequence<_Np>{});
441   }
442 
443 // }}}
444 // __generate_from_n_evaluations{{{
445 template <typename _R, typename _Fp, size_t... _I>
446   [[__gnu__::__flatten__]] _GLIBCXX_SIMD_INTRINSIC constexpr
447   _R
448   __execute_on_index_sequence_with_return(_Fp&& __f, index_sequence<_I...>)
449   { return _R{__f(_SizeConstant<_I>())...}; }
450 
451 template <size_t _Np, typename _R, typename _Fp>
452   _GLIBCXX_SIMD_INTRINSIC constexpr _R
453   __generate_from_n_evaluations(_Fp&& __f)
454   {
455     return __execute_on_index_sequence_with_return<_R>(
456       static_cast<_Fp&&>(__f), make_index_sequence<_Np>{});
457   }
458 
459 // }}}
460 // __call_with_n_evaluations{{{
461 template <size_t... _I, typename _F0, typename _FArgs>
462   [[__gnu__::__flatten__]] _GLIBCXX_SIMD_INTRINSIC constexpr
463   auto
464   __call_with_n_evaluations(index_sequence<_I...>, _F0&& __f0, _FArgs&& __fargs)
465   { return __f0(__fargs(_SizeConstant<_I>())...); }
466 
467 template <size_t _Np, typename _F0, typename _FArgs>
468   _GLIBCXX_SIMD_INTRINSIC constexpr auto
469   __call_with_n_evaluations(_F0&& __f0, _FArgs&& __fargs)
470   {
471     return __call_with_n_evaluations(make_index_sequence<_Np>{},
472 				     static_cast<_F0&&>(__f0),
473 				     static_cast<_FArgs&&>(__fargs));
474   }
475 
476 // }}}
477 // __call_with_subscripts{{{
478 template <size_t _First = 0, size_t... _It, typename _Tp, typename _Fp>
479   [[__gnu__::__flatten__]] _GLIBCXX_SIMD_INTRINSIC constexpr
480   auto
481   __call_with_subscripts(_Tp&& __x, index_sequence<_It...>, _Fp&& __fun)
482   { return __fun(__x[_First + _It]...); }
483 
484 template <size_t _Np, size_t _First = 0, typename _Tp, typename _Fp>
485   _GLIBCXX_SIMD_INTRINSIC constexpr auto
486   __call_with_subscripts(_Tp&& __x, _Fp&& __fun)
487   {
488     return __call_with_subscripts<_First>(static_cast<_Tp&&>(__x),
489 					  make_index_sequence<_Np>(),
490 					  static_cast<_Fp&&>(__fun));
491   }
492 
493 // }}}
494 
495 // vvv ---- type traits ---- vvv
496 // integer type aliases{{{
497 using _UChar = unsigned char;
498 using _SChar = signed char;
499 using _UShort = unsigned short;
500 using _UInt = unsigned int;
501 using _ULong = unsigned long;
502 using _ULLong = unsigned long long;
503 using _LLong = long long;
504 
505 //}}}
506 // __first_of_pack{{{
507 template <typename _T0, typename...>
508   struct __first_of_pack
509   { using type = _T0; };
510 
511 template <typename... _Ts>
512   using __first_of_pack_t = typename __first_of_pack<_Ts...>::type;
513 
514 //}}}
515 // __value_type_or_identity_t {{{
516 template <typename _Tp>
517   typename _Tp::value_type
518   __value_type_or_identity_impl(int);
519 
520 template <typename _Tp>
521   _Tp
522   __value_type_or_identity_impl(float);
523 
524 template <typename _Tp>
525   using __value_type_or_identity_t
526     = decltype(__value_type_or_identity_impl<_Tp>(int()));
527 
528 // }}}
529 // __is_vectorizable {{{
530 template <typename _Tp>
531   struct __is_vectorizable : public is_arithmetic<_Tp> {};
532 
533 template <>
534   struct __is_vectorizable<bool> : public false_type {};
535 
536 template <typename _Tp>
537   inline constexpr bool __is_vectorizable_v = __is_vectorizable<_Tp>::value;
538 
539 // Deduces to a vectorizable type
540 template <typename _Tp, typename = enable_if_t<__is_vectorizable_v<_Tp>>>
541   using _Vectorizable = _Tp;
542 
543 // }}}
544 // _LoadStorePtr / __is_possible_loadstore_conversion {{{
545 template <typename _Ptr, typename _ValueType>
546   struct __is_possible_loadstore_conversion
547   : conjunction<__is_vectorizable<_Ptr>, __is_vectorizable<_ValueType>> {};
548 
549 template <>
550   struct __is_possible_loadstore_conversion<bool, bool> : true_type {};
551 
552 // Deduces to a type allowed for load/store with the given value type.
553 template <typename _Ptr, typename _ValueType,
554 	  typename = enable_if_t<
555 	    __is_possible_loadstore_conversion<_Ptr, _ValueType>::value>>
556   using _LoadStorePtr = _Ptr;
557 
558 // }}}
559 // __is_bitmask{{{
560 template <typename _Tp, typename = void_t<>>
561   struct __is_bitmask : false_type {};
562 
563 template <typename _Tp>
564   inline constexpr bool __is_bitmask_v = __is_bitmask<_Tp>::value;
565 
566 // the __mmaskXX case:
567 template <typename _Tp>
568   struct __is_bitmask<_Tp,
569     void_t<decltype(declval<unsigned&>() = declval<_Tp>() & 1u)>>
570   : true_type {};
571 
572 // }}}
573 // __int_for_sizeof{{{
574 #pragma GCC diagnostic push
575 #pragma GCC diagnostic ignored "-Wpedantic"
576 template <size_t _Bytes>
577   constexpr auto
578   __int_for_sizeof()
579   {
580     if constexpr (_Bytes == sizeof(int))
581       return int();
582   #ifdef __clang__
583     else if constexpr (_Bytes == sizeof(char))
584       return char();
585   #else
586     else if constexpr (_Bytes == sizeof(_SChar))
587       return _SChar();
588   #endif
589     else if constexpr (_Bytes == sizeof(short))
590       return short();
591   #ifndef __clang__
592     else if constexpr (_Bytes == sizeof(long))
593       return long();
594   #endif
595     else if constexpr (_Bytes == sizeof(_LLong))
596       return _LLong();
597   #ifdef __SIZEOF_INT128__
598     else if constexpr (_Bytes == sizeof(__int128))
599       return __int128();
600   #endif // __SIZEOF_INT128__
601     else if constexpr (_Bytes % sizeof(int) == 0)
602       {
603 	constexpr size_t _Np = _Bytes / sizeof(int);
604 	struct _Ip
605 	{
606 	  int _M_data[_Np];
607 
608 	  _GLIBCXX_SIMD_INTRINSIC constexpr _Ip
609 	  operator&(_Ip __rhs) const
610 	  {
611 	    return __generate_from_n_evaluations<_Np, _Ip>(
612 	      [&](auto __i) { return __rhs._M_data[__i] & _M_data[__i]; });
613 	  }
614 
615 	  _GLIBCXX_SIMD_INTRINSIC constexpr _Ip
616 	  operator|(_Ip __rhs) const
617 	  {
618 	    return __generate_from_n_evaluations<_Np, _Ip>(
619 	      [&](auto __i) { return __rhs._M_data[__i] | _M_data[__i]; });
620 	  }
621 
622 	  _GLIBCXX_SIMD_INTRINSIC constexpr _Ip
623 	  operator^(_Ip __rhs) const
624 	  {
625 	    return __generate_from_n_evaluations<_Np, _Ip>(
626 	      [&](auto __i) { return __rhs._M_data[__i] ^ _M_data[__i]; });
627 	  }
628 
629 	  _GLIBCXX_SIMD_INTRINSIC constexpr _Ip
630 	  operator~() const
631 	  {
632 	    return __generate_from_n_evaluations<_Np, _Ip>(
633 	      [&](auto __i) { return ~_M_data[__i]; });
634 	  }
635 	};
636 	return _Ip{};
637       }
638     else
639       static_assert(_Bytes != _Bytes, "this should be unreachable");
640   }
641 #pragma GCC diagnostic pop
642 
643 template <typename _Tp>
644   using __int_for_sizeof_t = decltype(__int_for_sizeof<sizeof(_Tp)>());
645 
646 template <size_t _Np>
647   using __int_with_sizeof_t = decltype(__int_for_sizeof<_Np>());
648 
649 // }}}
650 // __is_fixed_size_abi{{{
651 template <typename _Tp>
652   struct __is_fixed_size_abi : false_type {};
653 
654 template <int _Np>
655   struct __is_fixed_size_abi<simd_abi::fixed_size<_Np>> : true_type {};
656 
657 template <typename _Tp>
658   inline constexpr bool __is_fixed_size_abi_v = __is_fixed_size_abi<_Tp>::value;
659 
660 // }}}
661 // __is_scalar_abi {{{
662 template <typename _Abi>
663   constexpr bool
664   __is_scalar_abi()
665   { return is_same_v<simd_abi::scalar, _Abi>; }
666 
667 // }}}
668 // __abi_bytes_v {{{
669 template <template <int> class _Abi, int _Bytes>
670   constexpr int
671   __abi_bytes_impl(_Abi<_Bytes>*)
672   { return _Bytes; }
673 
674 template <typename _Tp>
675   constexpr int
676   __abi_bytes_impl(_Tp*)
677   { return -1; }
678 
679 template <typename _Abi>
680   inline constexpr int __abi_bytes_v
681     = __abi_bytes_impl(static_cast<_Abi*>(nullptr));
682 
683 // }}}
684 // __is_builtin_bitmask_abi {{{
685 template <typename _Abi>
686   constexpr bool
687   __is_builtin_bitmask_abi()
688   { return is_same_v<simd_abi::_VecBltnBtmsk<__abi_bytes_v<_Abi>>, _Abi>; }
689 
690 // }}}
691 // __is_sse_abi {{{
692 template <typename _Abi>
693   constexpr bool
694   __is_sse_abi()
695   {
696     constexpr auto _Bytes = __abi_bytes_v<_Abi>;
697     return _Bytes <= 16 && is_same_v<simd_abi::_VecBuiltin<_Bytes>, _Abi>;
698   }
699 
700 // }}}
701 // __is_avx_abi {{{
702 template <typename _Abi>
703   constexpr bool
704   __is_avx_abi()
705   {
706     constexpr auto _Bytes = __abi_bytes_v<_Abi>;
707     return _Bytes > 16 && _Bytes <= 32
708 	   && is_same_v<simd_abi::_VecBuiltin<_Bytes>, _Abi>;
709   }
710 
711 // }}}
712 // __is_avx512_abi {{{
713 template <typename _Abi>
714   constexpr bool
715   __is_avx512_abi()
716   {
717     constexpr auto _Bytes = __abi_bytes_v<_Abi>;
718     return _Bytes <= 64 && is_same_v<simd_abi::_Avx512<_Bytes>, _Abi>;
719   }
720 
721 // }}}
722 // __is_neon_abi {{{
723 template <typename _Abi>
724   constexpr bool
725   __is_neon_abi()
726   {
727     constexpr auto _Bytes = __abi_bytes_v<_Abi>;
728     return _Bytes <= 16 && is_same_v<simd_abi::_VecBuiltin<_Bytes>, _Abi>;
729   }
730 
731 // }}}
732 // __make_dependent_t {{{
733 template <typename, typename _Up>
734   struct __make_dependent
735   { using type = _Up; };
736 
737 template <typename _Tp, typename _Up>
738   using __make_dependent_t = typename __make_dependent<_Tp, _Up>::type;
739 
740 // }}}
741 // ^^^ ---- type traits ---- ^^^
742 
743 // __invoke_ub{{{
744 template <typename... _Args>
745   [[noreturn]] _GLIBCXX_SIMD_ALWAYS_INLINE void
746   __invoke_ub([[maybe_unused]] const char* __msg,
747 	      [[maybe_unused]] const _Args&... __args)
748   {
749 #ifdef _GLIBCXX_DEBUG_UB
750     __builtin_fprintf(stderr, __msg, __args...);
751     __builtin_trap();
752 #else
753     __builtin_unreachable();
754 #endif
755   }
756 
757 // }}}
758 // __assert_unreachable{{{
759 template <typename _Tp>
760   struct __assert_unreachable
761   { static_assert(!is_same_v<_Tp, _Tp>, "this should be unreachable"); };
762 
763 // }}}
764 // __size_or_zero_v {{{
765 template <typename _Tp, typename _Ap, size_t _Np = simd_size<_Tp, _Ap>::value>
766   constexpr size_t
767   __size_or_zero_dispatch(int)
768   { return _Np; }
769 
770 template <typename _Tp, typename _Ap>
771   constexpr size_t
772   __size_or_zero_dispatch(float)
773   { return 0; }
774 
775 template <typename _Tp, typename _Ap>
776   inline constexpr size_t __size_or_zero_v
777      = __size_or_zero_dispatch<_Tp, _Ap>(0);
778 
779 // }}}
780 // __div_roundup {{{
781 inline constexpr size_t
782 __div_roundup(size_t __a, size_t __b)
783 { return (__a + __b - 1) / __b; }
784 
785 // }}}
786 // _ExactBool{{{
787 class _ExactBool
788 {
789   const bool _M_data;
790 
791 public:
792   _GLIBCXX_SIMD_INTRINSIC constexpr _ExactBool(bool __b) : _M_data(__b) {}
793 
794   _ExactBool(int) = delete;
795 
796   _GLIBCXX_SIMD_INTRINSIC constexpr operator bool() const { return _M_data; }
797 };
798 
799 // }}}
800 // __may_alias{{{
801 /**@internal
802  * Helper __may_alias<_Tp> that turns _Tp into the type to be used for an
803  * aliasing pointer. This adds the __may_alias attribute to _Tp (with compilers
804  * that support it).
805  */
806 template <typename _Tp>
807   using __may_alias [[__gnu__::__may_alias__]] = _Tp;
808 
809 // }}}
810 // _UnsupportedBase {{{
811 // simd and simd_mask base for unsupported <_Tp, _Abi>
812 struct _UnsupportedBase
813 {
814   _UnsupportedBase() = delete;
815   _UnsupportedBase(const _UnsupportedBase&) = delete;
816   _UnsupportedBase& operator=(const _UnsupportedBase&) = delete;
817   ~_UnsupportedBase() = delete;
818 };
819 
820 // }}}
821 // _InvalidTraits {{{
822 /**
823  * @internal
824  * Defines the implementation of __a given <_Tp, _Abi>.
825  *
826  * Implementations must ensure that only valid <_Tp, _Abi> instantiations are
827  * possible. Static assertions in the type definition do not suffice. It is
828  * important that SFINAE works.
829  */
830 struct _InvalidTraits
831 {
832   using _IsValid = false_type;
833   using _SimdBase = _UnsupportedBase;
834   using _MaskBase = _UnsupportedBase;
835 
836   static constexpr size_t _S_full_size = 0;
837   static constexpr bool _S_is_partial = false;
838 
839   static constexpr size_t _S_simd_align = 1;
840   struct _SimdImpl;
841   struct _SimdMember {};
842   struct _SimdCastType;
843 
844   static constexpr size_t _S_mask_align = 1;
845   struct _MaskImpl;
846   struct _MaskMember {};
847   struct _MaskCastType;
848 };
849 
850 // }}}
851 // _SimdTraits {{{
852 template <typename _Tp, typename _Abi, typename = void_t<>>
853   struct _SimdTraits : _InvalidTraits {};
854 
855 // }}}
856 // __private_init, __bitset_init{{{
857 /**
858  * @internal
859  * Tag used for private init constructor of simd and simd_mask
860  */
861 inline constexpr struct _PrivateInit {} __private_init = {};
862 
863 inline constexpr struct _BitsetInit {} __bitset_init = {};
864 
865 // }}}
866 // __is_narrowing_conversion<_From, _To>{{{
867 template <typename _From, typename _To, bool = is_arithmetic_v<_From>,
868 	  bool = is_arithmetic_v<_To>>
869   struct __is_narrowing_conversion;
870 
871 // ignore "signed/unsigned mismatch" in the following trait.
872 // The implicit conversions will do the right thing here.
873 template <typename _From, typename _To>
874   struct __is_narrowing_conversion<_From, _To, true, true>
875   : public __bool_constant<(
876       __digits_v<_From> > __digits_v<_To>
877       || __finite_max_v<_From> > __finite_max_v<_To>
878       || __finite_min_v<_From> < __finite_min_v<_To>
879       || (is_signed_v<_From> && is_unsigned_v<_To>))> {};
880 
881 template <typename _Tp>
882   struct __is_narrowing_conversion<_Tp, bool, true, true>
883   : public true_type {};
884 
885 template <>
886   struct __is_narrowing_conversion<bool, bool, true, true>
887   : public false_type {};
888 
889 template <typename _Tp>
890   struct __is_narrowing_conversion<_Tp, _Tp, true, true>
891   : public false_type {};
892 
893 template <typename _From, typename _To>
894   struct __is_narrowing_conversion<_From, _To, false, true>
895   : public negation<is_convertible<_From, _To>> {};
896 
897 // }}}
898 // __converts_to_higher_integer_rank{{{
899 template <typename _From, typename _To, bool = (sizeof(_From) < sizeof(_To))>
900   struct __converts_to_higher_integer_rank : public true_type {};
901 
902 // this may fail for char -> short if sizeof(char) == sizeof(short)
903 template <typename _From, typename _To>
904   struct __converts_to_higher_integer_rank<_From, _To, false>
905   : public is_same<decltype(declval<_From>() + declval<_To>()), _To> {};
906 
907 // }}}
908 // __data(simd/simd_mask) {{{
909 template <typename _Tp, typename _Ap>
910   _GLIBCXX_SIMD_INTRINSIC constexpr const auto&
911   __data(const simd<_Tp, _Ap>& __x);
912 
913 template <typename _Tp, typename _Ap>
914   _GLIBCXX_SIMD_INTRINSIC constexpr auto&
915   __data(simd<_Tp, _Ap>& __x);
916 
917 template <typename _Tp, typename _Ap>
918   _GLIBCXX_SIMD_INTRINSIC constexpr const auto&
919   __data(const simd_mask<_Tp, _Ap>& __x);
920 
921 template <typename _Tp, typename _Ap>
922   _GLIBCXX_SIMD_INTRINSIC constexpr auto&
923   __data(simd_mask<_Tp, _Ap>& __x);
924 
925 // }}}
926 // _SimdConverter {{{
927 template <typename _FromT, typename _FromA, typename _ToT, typename _ToA,
928 	  typename = void>
929   struct _SimdConverter;
930 
931 template <typename _Tp, typename _Ap>
932   struct _SimdConverter<_Tp, _Ap, _Tp, _Ap, void>
933   {
934     template <typename _Up>
935       _GLIBCXX_SIMD_INTRINSIC const _Up&
936       operator()(const _Up& __x)
937       { return __x; }
938   };
939 
940 // }}}
941 // __to_value_type_or_member_type {{{
942 template <typename _V>
943   _GLIBCXX_SIMD_INTRINSIC constexpr auto
944   __to_value_type_or_member_type(const _V& __x) -> decltype(__data(__x))
945   { return __data(__x); }
946 
947 template <typename _V>
948   _GLIBCXX_SIMD_INTRINSIC constexpr const typename _V::value_type&
949   __to_value_type_or_member_type(const typename _V::value_type& __x)
950   { return __x; }
951 
952 // }}}
953 // __bool_storage_member_type{{{
954 template <size_t _Size>
955   struct __bool_storage_member_type;
956 
957 template <size_t _Size>
958   using __bool_storage_member_type_t =
959     typename __bool_storage_member_type<_Size>::type;
960 
961 // }}}
962 // _SimdTuple {{{
963 // why not tuple?
964 // 1. tuple gives no guarantee about the storage order, but I require
965 // storage
966 //    equivalent to array<_Tp, _Np>
967 // 2. direct access to the element type (first template argument)
968 // 3. enforces equal element type, only different _Abi types are allowed
969 template <typename _Tp, typename... _Abis>
970   struct _SimdTuple;
971 
972 //}}}
973 // __fixed_size_storage_t {{{
974 template <typename _Tp, int _Np>
975   struct __fixed_size_storage;
976 
977 template <typename _Tp, int _Np>
978   using __fixed_size_storage_t = typename __fixed_size_storage<_Tp, _Np>::type;
979 
980 // }}}
981 // _SimdWrapper fwd decl{{{
982 template <typename _Tp, size_t _Size, typename = void_t<>>
983   struct _SimdWrapper;
984 
985 template <typename _Tp>
986   using _SimdWrapper8 = _SimdWrapper<_Tp, 8 / sizeof(_Tp)>;
987 template <typename _Tp>
988   using _SimdWrapper16 = _SimdWrapper<_Tp, 16 / sizeof(_Tp)>;
989 template <typename _Tp>
990   using _SimdWrapper32 = _SimdWrapper<_Tp, 32 / sizeof(_Tp)>;
991 template <typename _Tp>
992   using _SimdWrapper64 = _SimdWrapper<_Tp, 64 / sizeof(_Tp)>;
993 
994 // }}}
995 // __is_simd_wrapper {{{
996 template <typename _Tp>
997   struct __is_simd_wrapper : false_type {};
998 
999 template <typename _Tp, size_t _Np>
1000   struct __is_simd_wrapper<_SimdWrapper<_Tp, _Np>> : true_type {};
1001 
1002 template <typename _Tp>
1003   inline constexpr bool __is_simd_wrapper_v = __is_simd_wrapper<_Tp>::value;
1004 
1005 // }}}
1006 // _BitOps {{{
1007 struct _BitOps
1008 {
1009   // _S_bit_iteration {{{
1010   template <typename _Tp, typename _Fp>
1011     static void
1012     _S_bit_iteration(_Tp __mask, _Fp&& __f)
1013     {
1014       static_assert(sizeof(_ULLong) >= sizeof(_Tp));
1015       conditional_t<sizeof(_Tp) <= sizeof(_UInt), _UInt, _ULLong> __k;
1016       if constexpr (is_convertible_v<_Tp, decltype(__k)>)
1017 	__k = __mask;
1018       else
1019 	__k = __mask.to_ullong();
1020       while(__k)
1021 	{
1022 	  __f(std::__countr_zero(__k));
1023 	  __k &= (__k - 1);
1024 	}
1025     }
1026 
1027   //}}}
1028 };
1029 
1030 //}}}
1031 // __increment, __decrement {{{
1032 template <typename _Tp = void>
1033   struct __increment
1034   { constexpr _Tp operator()(_Tp __a) const { return ++__a; } };
1035 
1036 template <>
1037   struct __increment<void>
1038   {
1039     template <typename _Tp>
1040       constexpr _Tp
1041       operator()(_Tp __a) const
1042       { return ++__a; }
1043   };
1044 
1045 template <typename _Tp = void>
1046   struct __decrement
1047   { constexpr _Tp operator()(_Tp __a) const { return --__a; } };
1048 
1049 template <>
1050   struct __decrement<void>
1051   {
1052     template <typename _Tp>
1053       constexpr _Tp
1054       operator()(_Tp __a) const
1055       { return --__a; }
1056   };
1057 
1058 // }}}
1059 // _ValuePreserving(OrInt) {{{
1060 template <typename _From, typename _To,
1061 	  typename = enable_if_t<negation<
1062 	    __is_narrowing_conversion<__remove_cvref_t<_From>, _To>>::value>>
1063   using _ValuePreserving = _From;
1064 
1065 template <typename _From, typename _To,
1066 	  typename _DecayedFrom = __remove_cvref_t<_From>,
1067 	  typename = enable_if_t<conjunction<
1068 	    is_convertible<_From, _To>,
1069 	    disjunction<
1070 	      is_same<_DecayedFrom, _To>, is_same<_DecayedFrom, int>,
1071 	      conjunction<is_same<_DecayedFrom, _UInt>, is_unsigned<_To>>,
1072 	      negation<__is_narrowing_conversion<_DecayedFrom, _To>>>>::value>>
1073   using _ValuePreservingOrInt = _From;
1074 
1075 // }}}
1076 // __intrinsic_type {{{
1077 template <typename _Tp, size_t _Bytes, typename = void_t<>>
1078   struct __intrinsic_type;
1079 
1080 template <typename _Tp, size_t _Size>
1081   using __intrinsic_type_t =
1082     typename __intrinsic_type<_Tp, _Size * sizeof(_Tp)>::type;
1083 
1084 template <typename _Tp>
1085   using __intrinsic_type2_t = typename __intrinsic_type<_Tp, 2>::type;
1086 template <typename _Tp>
1087   using __intrinsic_type4_t = typename __intrinsic_type<_Tp, 4>::type;
1088 template <typename _Tp>
1089   using __intrinsic_type8_t = typename __intrinsic_type<_Tp, 8>::type;
1090 template <typename _Tp>
1091   using __intrinsic_type16_t = typename __intrinsic_type<_Tp, 16>::type;
1092 template <typename _Tp>
1093   using __intrinsic_type32_t = typename __intrinsic_type<_Tp, 32>::type;
1094 template <typename _Tp>
1095   using __intrinsic_type64_t = typename __intrinsic_type<_Tp, 64>::type;
1096 
1097 // }}}
1098 // _BitMask {{{
1099 template <size_t _Np, bool _Sanitized = false>
1100   struct _BitMask;
1101 
1102 template <size_t _Np, bool _Sanitized>
1103   struct __is_bitmask<_BitMask<_Np, _Sanitized>, void> : true_type {};
1104 
1105 template <size_t _Np>
1106   using _SanitizedBitMask = _BitMask<_Np, true>;
1107 
1108 template <size_t _Np, bool _Sanitized>
1109   struct _BitMask
1110   {
1111     static_assert(_Np > 0);
1112 
1113     static constexpr size_t _NBytes = __div_roundup(_Np, __CHAR_BIT__);
1114 
1115     using _Tp = conditional_t<_Np == 1, bool,
1116 			      make_unsigned_t<__int_with_sizeof_t<std::min(
1117 				sizeof(_ULLong), std::__bit_ceil(_NBytes))>>>;
1118 
1119     static constexpr int _S_array_size = __div_roundup(_NBytes, sizeof(_Tp));
1120 
1121     _Tp _M_bits[_S_array_size];
1122 
1123     static constexpr int _S_unused_bits
1124       = _Np == 1 ? 0 : _S_array_size * sizeof(_Tp) * __CHAR_BIT__ - _Np;
1125 
1126     static constexpr _Tp _S_bitmask = +_Tp(~_Tp()) >> _S_unused_bits;
1127 
1128     constexpr _BitMask() noexcept = default;
1129 
1130     constexpr _BitMask(unsigned long long __x) noexcept
1131       : _M_bits{static_cast<_Tp>(__x)} {}
1132 
1133     _BitMask(bitset<_Np> __x) noexcept : _BitMask(__x.to_ullong()) {}
1134 
1135     constexpr _BitMask(const _BitMask&) noexcept = default;
1136 
1137     template <bool _RhsSanitized, typename = enable_if_t<_RhsSanitized == false
1138 							 && _Sanitized == true>>
1139       constexpr _BitMask(const _BitMask<_Np, _RhsSanitized>& __rhs) noexcept
1140 	: _BitMask(__rhs._M_sanitized()) {}
1141 
1142     constexpr operator _SimdWrapper<bool, _Np>() const noexcept
1143     {
1144       static_assert(_S_array_size == 1);
1145       return _M_bits[0];
1146     }
1147 
1148     // precondition: is sanitized
1149     constexpr _Tp
1150     _M_to_bits() const noexcept
1151     {
1152       static_assert(_S_array_size == 1);
1153       return _M_bits[0];
1154     }
1155 
1156     // precondition: is sanitized
1157     constexpr unsigned long long
1158     to_ullong() const noexcept
1159     {
1160       static_assert(_S_array_size == 1);
1161       return _M_bits[0];
1162     }
1163 
1164     // precondition: is sanitized
1165     constexpr unsigned long
1166     to_ulong() const noexcept
1167     {
1168       static_assert(_S_array_size == 1);
1169       return _M_bits[0];
1170     }
1171 
1172     constexpr bitset<_Np>
1173     _M_to_bitset() const noexcept
1174     {
1175       static_assert(_S_array_size == 1);
1176       return _M_bits[0];
1177     }
1178 
1179     constexpr decltype(auto)
1180     _M_sanitized() const noexcept
1181     {
1182       if constexpr (_Sanitized)
1183 	return *this;
1184       else if constexpr (_Np == 1)
1185 	return _SanitizedBitMask<_Np>(_M_bits[0]);
1186       else
1187 	{
1188 	  _SanitizedBitMask<_Np> __r = {};
1189 	  for (int __i = 0; __i < _S_array_size; ++__i)
1190 	    __r._M_bits[__i] = _M_bits[__i];
1191 	  if constexpr (_S_unused_bits > 0)
1192 	    __r._M_bits[_S_array_size - 1] &= _S_bitmask;
1193 	  return __r;
1194 	}
1195     }
1196 
1197     template <size_t _Mp, bool _LSanitized>
1198       constexpr _BitMask<_Np + _Mp, _Sanitized>
1199       _M_prepend(_BitMask<_Mp, _LSanitized> __lsb) const noexcept
1200       {
1201 	constexpr size_t _RN = _Np + _Mp;
1202 	using _Rp = _BitMask<_RN, _Sanitized>;
1203 	if constexpr (_Rp::_S_array_size == 1)
1204 	  {
1205 	    _Rp __r{{_M_bits[0]}};
1206 	    __r._M_bits[0] <<= _Mp;
1207 	    __r._M_bits[0] |= __lsb._M_sanitized()._M_bits[0];
1208 	    return __r;
1209 	  }
1210 	else
1211 	  __assert_unreachable<_Rp>();
1212       }
1213 
1214     // Return a new _BitMask with size _NewSize while dropping _DropLsb least
1215     // significant bits. If the operation implicitly produces a sanitized bitmask,
1216     // the result type will have _Sanitized set.
1217     template <size_t _DropLsb, size_t _NewSize = _Np - _DropLsb>
1218       constexpr auto
1219       _M_extract() const noexcept
1220       {
1221 	static_assert(_Np > _DropLsb);
1222 	static_assert(_DropLsb + _NewSize <= sizeof(_ULLong) * __CHAR_BIT__,
1223 		      "not implemented for bitmasks larger than one ullong");
1224 	if constexpr (_NewSize == 1)
1225 	  // must sanitize because the return _Tp is bool
1226 	  return _SanitizedBitMask<1>(_M_bits[0] & (_Tp(1) << _DropLsb));
1227 	else
1228 	  return _BitMask<_NewSize,
1229 			  ((_NewSize + _DropLsb == sizeof(_Tp) * __CHAR_BIT__
1230 			    && _NewSize + _DropLsb <= _Np)
1231 			   || ((_Sanitized || _Np == sizeof(_Tp) * __CHAR_BIT__)
1232 			       && _NewSize + _DropLsb >= _Np))>(_M_bits[0]
1233 								>> _DropLsb);
1234       }
1235 
1236     // True if all bits are set. Implicitly sanitizes if _Sanitized == false.
1237     constexpr bool
1238     all() const noexcept
1239     {
1240       if constexpr (_Np == 1)
1241 	return _M_bits[0];
1242       else if constexpr (!_Sanitized)
1243 	return _M_sanitized().all();
1244       else
1245 	{
1246 	  constexpr _Tp __allbits = ~_Tp();
1247 	  for (int __i = 0; __i < _S_array_size - 1; ++__i)
1248 	    if (_M_bits[__i] != __allbits)
1249 	      return false;
1250 	  return _M_bits[_S_array_size - 1] == _S_bitmask;
1251 	}
1252     }
1253 
1254     // True if at least one bit is set. Implicitly sanitizes if _Sanitized ==
1255     // false.
1256     constexpr bool
1257     any() const noexcept
1258     {
1259       if constexpr (_Np == 1)
1260 	return _M_bits[0];
1261       else if constexpr (!_Sanitized)
1262 	return _M_sanitized().any();
1263       else
1264 	{
1265 	  for (int __i = 0; __i < _S_array_size - 1; ++__i)
1266 	    if (_M_bits[__i] != 0)
1267 	      return true;
1268 	  return _M_bits[_S_array_size - 1] != 0;
1269 	}
1270     }
1271 
1272     // True if no bit is set. Implicitly sanitizes if _Sanitized == false.
1273     constexpr bool
1274     none() const noexcept
1275     {
1276       if constexpr (_Np == 1)
1277 	return !_M_bits[0];
1278       else if constexpr (!_Sanitized)
1279 	return _M_sanitized().none();
1280       else
1281 	{
1282 	  for (int __i = 0; __i < _S_array_size - 1; ++__i)
1283 	    if (_M_bits[__i] != 0)
1284 	      return false;
1285 	  return _M_bits[_S_array_size - 1] == 0;
1286 	}
1287     }
1288 
1289     // Returns the number of set bits. Implicitly sanitizes if _Sanitized ==
1290     // false.
1291     constexpr int
1292     count() const noexcept
1293     {
1294       if constexpr (_Np == 1)
1295 	return _M_bits[0];
1296       else if constexpr (!_Sanitized)
1297 	return _M_sanitized().none();
1298       else
1299 	{
1300 	  int __result = __builtin_popcountll(_M_bits[0]);
1301 	  for (int __i = 1; __i < _S_array_size; ++__i)
1302 	    __result += __builtin_popcountll(_M_bits[__i]);
1303 	  return __result;
1304 	}
1305     }
1306 
1307     // Returns the bit at offset __i as bool.
1308     constexpr bool
1309     operator[](size_t __i) const noexcept
1310     {
1311       if constexpr (_Np == 1)
1312 	return _M_bits[0];
1313       else if constexpr (_S_array_size == 1)
1314 	return (_M_bits[0] >> __i) & 1;
1315       else
1316 	{
1317 	  const size_t __j = __i / (sizeof(_Tp) * __CHAR_BIT__);
1318 	  const size_t __shift = __i % (sizeof(_Tp) * __CHAR_BIT__);
1319 	  return (_M_bits[__j] >> __shift) & 1;
1320 	}
1321     }
1322 
1323     template <size_t __i>
1324       constexpr bool
1325       operator[](_SizeConstant<__i>) const noexcept
1326       {
1327 	static_assert(__i < _Np);
1328 	constexpr size_t __j = __i / (sizeof(_Tp) * __CHAR_BIT__);
1329 	constexpr size_t __shift = __i % (sizeof(_Tp) * __CHAR_BIT__);
1330 	return static_cast<bool>(_M_bits[__j] & (_Tp(1) << __shift));
1331       }
1332 
1333     // Set the bit at offset __i to __x.
1334     constexpr void
1335     set(size_t __i, bool __x) noexcept
1336     {
1337       if constexpr (_Np == 1)
1338 	_M_bits[0] = __x;
1339       else if constexpr (_S_array_size == 1)
1340 	{
1341 	  _M_bits[0] &= ~_Tp(_Tp(1) << __i);
1342 	  _M_bits[0] |= _Tp(_Tp(__x) << __i);
1343 	}
1344       else
1345 	{
1346 	  const size_t __j = __i / (sizeof(_Tp) * __CHAR_BIT__);
1347 	  const size_t __shift = __i % (sizeof(_Tp) * __CHAR_BIT__);
1348 	  _M_bits[__j] &= ~_Tp(_Tp(1) << __shift);
1349 	  _M_bits[__j] |= _Tp(_Tp(__x) << __shift);
1350 	}
1351     }
1352 
1353     template <size_t __i>
1354       constexpr void
1355       set(_SizeConstant<__i>, bool __x) noexcept
1356       {
1357 	static_assert(__i < _Np);
1358 	if constexpr (_Np == 1)
1359 	  _M_bits[0] = __x;
1360 	else
1361 	  {
1362 	    constexpr size_t __j = __i / (sizeof(_Tp) * __CHAR_BIT__);
1363 	    constexpr size_t __shift = __i % (sizeof(_Tp) * __CHAR_BIT__);
1364 	    constexpr _Tp __mask = ~_Tp(_Tp(1) << __shift);
1365 	    _M_bits[__j] &= __mask;
1366 	    _M_bits[__j] |= _Tp(_Tp(__x) << __shift);
1367 	  }
1368       }
1369 
1370     // Inverts all bits. Sanitized input leads to sanitized output.
1371     constexpr _BitMask
1372     operator~() const noexcept
1373     {
1374       if constexpr (_Np == 1)
1375 	return !_M_bits[0];
1376       else
1377 	{
1378 	  _BitMask __result{};
1379 	  for (int __i = 0; __i < _S_array_size - 1; ++__i)
1380 	    __result._M_bits[__i] = ~_M_bits[__i];
1381 	  if constexpr (_Sanitized)
1382 	    __result._M_bits[_S_array_size - 1]
1383 	      = _M_bits[_S_array_size - 1] ^ _S_bitmask;
1384 	  else
1385 	    __result._M_bits[_S_array_size - 1] = ~_M_bits[_S_array_size - 1];
1386 	  return __result;
1387 	}
1388     }
1389 
1390     constexpr _BitMask&
1391     operator^=(const _BitMask& __b) & noexcept
1392     {
1393       __execute_n_times<_S_array_size>(
1394 	[&](auto __i) { _M_bits[__i] ^= __b._M_bits[__i]; });
1395       return *this;
1396     }
1397 
1398     constexpr _BitMask&
1399     operator|=(const _BitMask& __b) & noexcept
1400     {
1401       __execute_n_times<_S_array_size>(
1402 	[&](auto __i) { _M_bits[__i] |= __b._M_bits[__i]; });
1403       return *this;
1404     }
1405 
1406     constexpr _BitMask&
1407     operator&=(const _BitMask& __b) & noexcept
1408     {
1409       __execute_n_times<_S_array_size>(
1410 	[&](auto __i) { _M_bits[__i] &= __b._M_bits[__i]; });
1411       return *this;
1412     }
1413 
1414     friend constexpr _BitMask
1415     operator^(const _BitMask& __a, const _BitMask& __b) noexcept
1416     {
1417       _BitMask __r = __a;
1418       __r ^= __b;
1419       return __r;
1420     }
1421 
1422     friend constexpr _BitMask
1423     operator|(const _BitMask& __a, const _BitMask& __b) noexcept
1424     {
1425       _BitMask __r = __a;
1426       __r |= __b;
1427       return __r;
1428     }
1429 
1430     friend constexpr _BitMask
1431     operator&(const _BitMask& __a, const _BitMask& __b) noexcept
1432     {
1433       _BitMask __r = __a;
1434       __r &= __b;
1435       return __r;
1436     }
1437 
1438     _GLIBCXX_SIMD_INTRINSIC
1439     constexpr bool
1440     _M_is_constprop() const
1441     {
1442       if constexpr (_S_array_size == 0)
1443 	return __builtin_constant_p(_M_bits[0]);
1444       else
1445 	{
1446 	  for (int __i = 0; __i < _S_array_size; ++__i)
1447 	    if (!__builtin_constant_p(_M_bits[__i]))
1448 	      return false;
1449 	  return true;
1450 	}
1451     }
1452   };
1453 
1454 // }}}
1455 
1456 // vvv ---- builtin vector types [[gnu::vector_size(N)]] and operations ---- vvv
1457 // __min_vector_size {{{
1458 template <typename _Tp = void>
1459   static inline constexpr int __min_vector_size = 2 * sizeof(_Tp);
1460 
1461 #if _GLIBCXX_SIMD_HAVE_NEON
1462 template <>
1463   inline constexpr int __min_vector_size<void> = 8;
1464 #else
1465 template <>
1466   inline constexpr int __min_vector_size<void> = 16;
1467 #endif
1468 
1469 // }}}
1470 // __vector_type {{{
1471 template <typename _Tp, size_t _Np, typename = void>
1472   struct __vector_type_n {};
1473 
1474 // substition failure for 0-element case
1475 template <typename _Tp>
1476   struct __vector_type_n<_Tp, 0, void> {};
1477 
1478 // special case 1-element to be _Tp itself
1479 template <typename _Tp>
1480   struct __vector_type_n<_Tp, 1, enable_if_t<__is_vectorizable_v<_Tp>>>
1481   { using type = _Tp; };
1482 
1483 // else, use GNU-style builtin vector types
1484 template <typename _Tp, size_t _Np>
1485   struct __vector_type_n<_Tp, _Np,
1486 			 enable_if_t<__is_vectorizable_v<_Tp> && _Np >= 2>>
1487   {
1488     static constexpr size_t _S_Np2 = std::__bit_ceil(_Np * sizeof(_Tp));
1489 
1490     static constexpr size_t _S_Bytes =
1491 #ifdef __i386__
1492       // Using [[gnu::vector_size(8)]] would wreak havoc on the FPU because
1493       // those objects are passed via MMX registers and nothing ever calls EMMS.
1494       _S_Np2 == 8 ? 16 :
1495 #endif
1496       _S_Np2 < __min_vector_size<_Tp> ? __min_vector_size<_Tp>
1497 				      : _S_Np2;
1498 
1499     using type [[__gnu__::__vector_size__(_S_Bytes)]] = _Tp;
1500   };
1501 
1502 template <typename _Tp, size_t _Bytes, size_t = _Bytes % sizeof(_Tp)>
1503   struct __vector_type;
1504 
1505 template <typename _Tp, size_t _Bytes>
1506   struct __vector_type<_Tp, _Bytes, 0>
1507   : __vector_type_n<_Tp, _Bytes / sizeof(_Tp)> {};
1508 
1509 template <typename _Tp, size_t _Size>
1510   using __vector_type_t = typename __vector_type_n<_Tp, _Size>::type;
1511 
1512 template <typename _Tp>
1513   using __vector_type2_t = typename __vector_type<_Tp, 2>::type;
1514 template <typename _Tp>
1515   using __vector_type4_t = typename __vector_type<_Tp, 4>::type;
1516 template <typename _Tp>
1517   using __vector_type8_t = typename __vector_type<_Tp, 8>::type;
1518 template <typename _Tp>
1519   using __vector_type16_t = typename __vector_type<_Tp, 16>::type;
1520 template <typename _Tp>
1521   using __vector_type32_t = typename __vector_type<_Tp, 32>::type;
1522 template <typename _Tp>
1523   using __vector_type64_t = typename __vector_type<_Tp, 64>::type;
1524 
1525 // }}}
1526 // __is_vector_type {{{
1527 template <typename _Tp, typename = void_t<>>
1528   struct __is_vector_type : false_type {};
1529 
1530 template <typename _Tp>
1531   struct __is_vector_type<
1532     _Tp, void_t<typename __vector_type<
1533 	   remove_reference_t<decltype(declval<_Tp>()[0])>, sizeof(_Tp)>::type>>
1534     : is_same<_Tp, typename __vector_type<
1535 		     remove_reference_t<decltype(declval<_Tp>()[0])>,
1536 		     sizeof(_Tp)>::type> {};
1537 
1538 template <typename _Tp>
1539   inline constexpr bool __is_vector_type_v = __is_vector_type<_Tp>::value;
1540 
1541 // }}}
1542 // __is_intrinsic_type {{{
1543 #if _GLIBCXX_SIMD_HAVE_SSE_ABI
1544 template <typename _Tp>
1545   using __is_intrinsic_type = __is_vector_type<_Tp>;
1546 #else // not SSE (x86)
1547 template <typename _Tp, typename = void_t<>>
1548   struct __is_intrinsic_type : false_type {};
1549 
1550 template <typename _Tp>
1551   struct __is_intrinsic_type<
1552     _Tp, void_t<typename __intrinsic_type<
1553 	   remove_reference_t<decltype(declval<_Tp>()[0])>, sizeof(_Tp)>::type>>
1554     : is_same<_Tp, typename __intrinsic_type<
1555 		     remove_reference_t<decltype(declval<_Tp>()[0])>,
1556 		     sizeof(_Tp)>::type> {};
1557 #endif
1558 
1559 template <typename _Tp>
1560   inline constexpr bool __is_intrinsic_type_v = __is_intrinsic_type<_Tp>::value;
1561 
1562 // }}}
1563 // _VectorTraits{{{
1564 template <typename _Tp, typename = void_t<>>
1565   struct _VectorTraitsImpl;
1566 
1567 template <typename _Tp>
1568   struct _VectorTraitsImpl<_Tp, enable_if_t<__is_vector_type_v<_Tp>
1569 					      || __is_intrinsic_type_v<_Tp>>>
1570   {
1571     using type = _Tp;
1572     using value_type = remove_reference_t<decltype(declval<_Tp>()[0])>;
1573     static constexpr int _S_full_size = sizeof(_Tp) / sizeof(value_type);
1574     using _Wrapper = _SimdWrapper<value_type, _S_full_size>;
1575     template <typename _Up, int _W = _S_full_size>
1576       static constexpr bool _S_is
1577 	= is_same_v<value_type, _Up> && _W == _S_full_size;
1578   };
1579 
1580 template <typename _Tp, size_t _Np>
1581   struct _VectorTraitsImpl<_SimdWrapper<_Tp, _Np>,
1582 			   void_t<__vector_type_t<_Tp, _Np>>>
1583   {
1584     using type = __vector_type_t<_Tp, _Np>;
1585     using value_type = _Tp;
1586     static constexpr int _S_full_size = sizeof(type) / sizeof(value_type);
1587     using _Wrapper = _SimdWrapper<_Tp, _Np>;
1588     static constexpr bool _S_is_partial = (_Np == _S_full_size);
1589     static constexpr int _S_partial_width = _Np;
1590     template <typename _Up, int _W = _S_full_size>
1591       static constexpr bool _S_is
1592 	= is_same_v<value_type, _Up>&& _W == _S_full_size;
1593   };
1594 
1595 template <typename _Tp, typename = typename _VectorTraitsImpl<_Tp>::type>
1596   using _VectorTraits = _VectorTraitsImpl<_Tp>;
1597 
1598 // }}}
1599 // __as_vector{{{
1600 template <typename _V>
1601   _GLIBCXX_SIMD_INTRINSIC constexpr auto
1602   __as_vector(_V __x)
1603   {
1604     if constexpr (__is_vector_type_v<_V>)
1605       return __x;
1606     else if constexpr (is_simd<_V>::value || is_simd_mask<_V>::value)
1607       return __data(__x)._M_data;
1608     else if constexpr (__is_vectorizable_v<_V>)
1609       return __vector_type_t<_V, 2>{__x};
1610     else
1611       return __x._M_data;
1612   }
1613 
1614 // }}}
1615 // __as_wrapper{{{
1616 template <size_t _Np = 0, typename _V>
1617   _GLIBCXX_SIMD_INTRINSIC constexpr auto
1618   __as_wrapper(_V __x)
1619   {
1620     if constexpr (__is_vector_type_v<_V>)
1621       return _SimdWrapper<typename _VectorTraits<_V>::value_type,
1622 			  (_Np > 0 ? _Np : _VectorTraits<_V>::_S_full_size)>(__x);
1623     else if constexpr (is_simd<_V>::value || is_simd_mask<_V>::value)
1624       {
1625 	static_assert(_V::size() == _Np);
1626 	return __data(__x);
1627       }
1628     else
1629       {
1630 	static_assert(_V::_S_size == _Np);
1631 	return __x;
1632       }
1633   }
1634 
1635 // }}}
1636 // __intrin_bitcast{{{
1637 template <typename _To, typename _From>
1638   _GLIBCXX_SIMD_INTRINSIC constexpr _To
1639   __intrin_bitcast(_From __v)
1640   {
1641     static_assert((__is_vector_type_v<_From> || __is_intrinsic_type_v<_From>)
1642 		    && (__is_vector_type_v<_To> || __is_intrinsic_type_v<_To>));
1643     if constexpr (sizeof(_To) == sizeof(_From))
1644       return reinterpret_cast<_To>(__v);
1645     else if constexpr (sizeof(_From) > sizeof(_To))
1646       if constexpr (sizeof(_To) >= 16)
1647 	return reinterpret_cast<const __may_alias<_To>&>(__v);
1648       else
1649 	{
1650 	  _To __r;
1651 	  __builtin_memcpy(&__r, &__v, sizeof(_To));
1652 	  return __r;
1653 	}
1654 #if _GLIBCXX_SIMD_X86INTRIN && !defined __clang__
1655     else if constexpr (__have_avx && sizeof(_From) == 16 && sizeof(_To) == 32)
1656       return reinterpret_cast<_To>(__builtin_ia32_ps256_ps(
1657 	reinterpret_cast<__vector_type_t<float, 4>>(__v)));
1658     else if constexpr (__have_avx512f && sizeof(_From) == 16
1659 		       && sizeof(_To) == 64)
1660       return reinterpret_cast<_To>(__builtin_ia32_ps512_ps(
1661 	reinterpret_cast<__vector_type_t<float, 4>>(__v)));
1662     else if constexpr (__have_avx512f && sizeof(_From) == 32
1663 		       && sizeof(_To) == 64)
1664       return reinterpret_cast<_To>(__builtin_ia32_ps512_256ps(
1665 	reinterpret_cast<__vector_type_t<float, 8>>(__v)));
1666 #endif // _GLIBCXX_SIMD_X86INTRIN
1667     else if constexpr (sizeof(__v) <= 8)
1668       return reinterpret_cast<_To>(
1669 	__vector_type_t<__int_for_sizeof_t<_From>, sizeof(_To) / sizeof(_From)>{
1670 	  reinterpret_cast<__int_for_sizeof_t<_From>>(__v)});
1671     else
1672       {
1673 	static_assert(sizeof(_To) > sizeof(_From));
1674 	_To __r = {};
1675 	__builtin_memcpy(&__r, &__v, sizeof(_From));
1676 	return __r;
1677       }
1678   }
1679 
1680 // }}}
1681 // __vector_bitcast{{{
1682 template <typename _To, size_t _NN = 0, typename _From,
1683 	  typename _FromVT = _VectorTraits<_From>,
1684 	  size_t _Np = _NN == 0 ? sizeof(_From) / sizeof(_To) : _NN>
1685   _GLIBCXX_SIMD_INTRINSIC constexpr __vector_type_t<_To, _Np>
1686   __vector_bitcast(_From __x)
1687   {
1688     using _R = __vector_type_t<_To, _Np>;
1689     return __intrin_bitcast<_R>(__x);
1690   }
1691 
1692 template <typename _To, size_t _NN = 0, typename _Tp, size_t _Nx,
1693 	  size_t _Np
1694 	  = _NN == 0 ? sizeof(_SimdWrapper<_Tp, _Nx>) / sizeof(_To) : _NN>
1695   _GLIBCXX_SIMD_INTRINSIC constexpr __vector_type_t<_To, _Np>
1696   __vector_bitcast(const _SimdWrapper<_Tp, _Nx>& __x)
1697   {
1698     static_assert(_Np > 1);
1699     return __intrin_bitcast<__vector_type_t<_To, _Np>>(__x._M_data);
1700   }
1701 
1702 // }}}
1703 // __convert_x86 declarations {{{
1704 #ifdef _GLIBCXX_SIMD_WORKAROUND_PR85048
1705 template <typename _To, typename _Tp, typename _TVT = _VectorTraits<_Tp>>
1706   _To __convert_x86(_Tp);
1707 
1708 template <typename _To, typename _Tp, typename _TVT = _VectorTraits<_Tp>>
1709   _To __convert_x86(_Tp, _Tp);
1710 
1711 template <typename _To, typename _Tp, typename _TVT = _VectorTraits<_Tp>>
1712   _To __convert_x86(_Tp, _Tp, _Tp, _Tp);
1713 
1714 template <typename _To, typename _Tp, typename _TVT = _VectorTraits<_Tp>>
1715   _To __convert_x86(_Tp, _Tp, _Tp, _Tp, _Tp, _Tp, _Tp, _Tp);
1716 
1717 template <typename _To, typename _Tp, typename _TVT = _VectorTraits<_Tp>>
1718   _To __convert_x86(_Tp, _Tp, _Tp, _Tp, _Tp, _Tp, _Tp, _Tp, _Tp, _Tp, _Tp, _Tp,
1719 		    _Tp, _Tp, _Tp, _Tp);
1720 #endif // _GLIBCXX_SIMD_WORKAROUND_PR85048
1721 
1722 //}}}
1723 // __bit_cast {{{
1724 template <typename _To, typename _From>
1725   _GLIBCXX_SIMD_INTRINSIC constexpr _To
1726   __bit_cast(const _From __x)
1727   {
1728 #if __has_builtin(__builtin_bit_cast)
1729     return __builtin_bit_cast(_To, __x);
1730 #else
1731     static_assert(sizeof(_To) == sizeof(_From));
1732     constexpr bool __to_is_vectorizable
1733       = is_arithmetic_v<_To> || is_enum_v<_To>;
1734     constexpr bool __from_is_vectorizable
1735       = is_arithmetic_v<_From> || is_enum_v<_From>;
1736     if constexpr (__is_vector_type_v<_To> && __is_vector_type_v<_From>)
1737       return reinterpret_cast<_To>(__x);
1738     else if constexpr (__is_vector_type_v<_To> && __from_is_vectorizable)
1739       {
1740 	using _FV [[gnu::vector_size(sizeof(_From))]] = _From;
1741 	return reinterpret_cast<_To>(_FV{__x});
1742       }
1743     else if constexpr (__to_is_vectorizable && __from_is_vectorizable)
1744       {
1745 	using _TV [[gnu::vector_size(sizeof(_To))]] = _To;
1746 	using _FV [[gnu::vector_size(sizeof(_From))]] = _From;
1747 	return reinterpret_cast<_TV>(_FV{__x})[0];
1748       }
1749     else if constexpr (__to_is_vectorizable && __is_vector_type_v<_From>)
1750       {
1751 	using _TV [[gnu::vector_size(sizeof(_To))]] = _To;
1752 	return reinterpret_cast<_TV>(__x)[0];
1753       }
1754     else
1755       {
1756 	_To __r;
1757 	__builtin_memcpy(reinterpret_cast<char*>(&__r),
1758 			 reinterpret_cast<const char*>(&__x), sizeof(_To));
1759 	return __r;
1760       }
1761 #endif
1762   }
1763 
1764 // }}}
1765 // __to_intrin {{{
1766 template <typename _Tp, typename _TVT = _VectorTraits<_Tp>,
1767 	  typename _R
1768 	  = __intrinsic_type_t<typename _TVT::value_type, _TVT::_S_full_size>>
1769   _GLIBCXX_SIMD_INTRINSIC constexpr _R
1770   __to_intrin(_Tp __x)
1771   {
1772     static_assert(sizeof(__x) <= sizeof(_R),
1773 		  "__to_intrin may never drop values off the end");
1774     if constexpr (sizeof(__x) == sizeof(_R))
1775       return reinterpret_cast<_R>(__as_vector(__x));
1776     else
1777       {
1778 	using _Up = __int_for_sizeof_t<_Tp>;
1779 	return reinterpret_cast<_R>(
1780 	  __vector_type_t<_Up, sizeof(_R) / sizeof(_Up)>{__bit_cast<_Up>(__x)});
1781       }
1782   }
1783 
1784 // }}}
1785 // __make_vector{{{
1786 template <typename _Tp, typename... _Args>
1787   _GLIBCXX_SIMD_INTRINSIC constexpr __vector_type_t<_Tp, sizeof...(_Args)>
1788   __make_vector(const _Args&... __args)
1789   {
1790     return __vector_type_t<_Tp, sizeof...(_Args)>{static_cast<_Tp>(__args)...};
1791   }
1792 
1793 // }}}
1794 // __vector_broadcast{{{
1795 template <size_t _Np, typename _Tp, size_t... _I>
1796   _GLIBCXX_SIMD_INTRINSIC constexpr __vector_type_t<_Tp, _Np>
1797   __vector_broadcast_impl(_Tp __x, index_sequence<_I...>)
1798   { return __vector_type_t<_Tp, _Np>{((void)_I, __x)...}; }
1799 
1800 template <size_t _Np, typename _Tp>
1801   _GLIBCXX_SIMD_INTRINSIC constexpr __vector_type_t<_Tp, _Np>
1802   __vector_broadcast(_Tp __x)
1803   { return __vector_broadcast_impl<_Np, _Tp>(__x, make_index_sequence<_Np>()); }
1804 
1805 // }}}
1806 // __generate_vector{{{
1807   template <typename _Tp, size_t _Np, typename _Gp, size_t... _I>
1808   _GLIBCXX_SIMD_INTRINSIC constexpr __vector_type_t<_Tp, _Np>
1809   __generate_vector_impl(_Gp&& __gen, index_sequence<_I...>)
1810   {
1811     return __vector_type_t<_Tp, _Np>{
1812       static_cast<_Tp>(__gen(_SizeConstant<_I>()))...};
1813   }
1814 
1815 template <typename _V, typename _VVT = _VectorTraits<_V>, typename _Gp>
1816   _GLIBCXX_SIMD_INTRINSIC constexpr _V
1817   __generate_vector(_Gp&& __gen)
1818   {
1819     if constexpr (__is_vector_type_v<_V>)
1820       return __generate_vector_impl<typename _VVT::value_type,
1821 				    _VVT::_S_full_size>(
1822 	static_cast<_Gp&&>(__gen), make_index_sequence<_VVT::_S_full_size>());
1823     else
1824       return __generate_vector_impl<typename _VVT::value_type,
1825 				    _VVT::_S_partial_width>(
1826 	static_cast<_Gp&&>(__gen),
1827 	make_index_sequence<_VVT::_S_partial_width>());
1828   }
1829 
1830 template <typename _Tp, size_t _Np, typename _Gp>
1831   _GLIBCXX_SIMD_INTRINSIC constexpr __vector_type_t<_Tp, _Np>
1832   __generate_vector(_Gp&& __gen)
1833   {
1834     return __generate_vector_impl<_Tp, _Np>(static_cast<_Gp&&>(__gen),
1835 					    make_index_sequence<_Np>());
1836   }
1837 
1838 // }}}
1839 // __xor{{{
1840 template <typename _TW>
1841   _GLIBCXX_SIMD_INTRINSIC constexpr _TW
1842   __xor(_TW __a, _TW __b) noexcept
1843   {
1844     if constexpr (__is_vector_type_v<_TW> || __is_simd_wrapper_v<_TW>)
1845       {
1846 	using _Tp = typename conditional_t<__is_simd_wrapper_v<_TW>, _TW,
1847 					   _VectorTraitsImpl<_TW>>::value_type;
1848 	if constexpr (is_floating_point_v<_Tp>)
1849 	  {
1850 	    using _Ip = make_unsigned_t<__int_for_sizeof_t<_Tp>>;
1851 	    return __vector_bitcast<_Tp>(__vector_bitcast<_Ip>(__a)
1852 					 ^ __vector_bitcast<_Ip>(__b));
1853 	  }
1854 	else if constexpr (__is_vector_type_v<_TW>)
1855 	  return __a ^ __b;
1856 	else
1857 	  return __a._M_data ^ __b._M_data;
1858       }
1859     else
1860       return __a ^ __b;
1861   }
1862 
1863 // }}}
1864 // __or{{{
1865 template <typename _TW>
1866   _GLIBCXX_SIMD_INTRINSIC constexpr _TW
1867   __or(_TW __a, _TW __b) noexcept
1868   {
1869     if constexpr (__is_vector_type_v<_TW> || __is_simd_wrapper_v<_TW>)
1870       {
1871 	using _Tp = typename conditional_t<__is_simd_wrapper_v<_TW>, _TW,
1872 					   _VectorTraitsImpl<_TW>>::value_type;
1873 	if constexpr (is_floating_point_v<_Tp>)
1874 	  {
1875 	    using _Ip = make_unsigned_t<__int_for_sizeof_t<_Tp>>;
1876 	    return __vector_bitcast<_Tp>(__vector_bitcast<_Ip>(__a)
1877 					 | __vector_bitcast<_Ip>(__b));
1878 	  }
1879 	else if constexpr (__is_vector_type_v<_TW>)
1880 	  return __a | __b;
1881 	else
1882 	  return __a._M_data | __b._M_data;
1883       }
1884     else
1885       return __a | __b;
1886   }
1887 
1888 // }}}
1889 // __and{{{
1890 template <typename _TW>
1891   _GLIBCXX_SIMD_INTRINSIC constexpr _TW
1892   __and(_TW __a, _TW __b) noexcept
1893   {
1894     if constexpr (__is_vector_type_v<_TW> || __is_simd_wrapper_v<_TW>)
1895       {
1896 	using _Tp = typename conditional_t<__is_simd_wrapper_v<_TW>, _TW,
1897 					   _VectorTraitsImpl<_TW>>::value_type;
1898 	if constexpr (is_floating_point_v<_Tp>)
1899 	  {
1900 	    using _Ip = make_unsigned_t<__int_for_sizeof_t<_Tp>>;
1901 	    return __vector_bitcast<_Tp>(__vector_bitcast<_Ip>(__a)
1902 					 & __vector_bitcast<_Ip>(__b));
1903 	  }
1904 	else if constexpr (__is_vector_type_v<_TW>)
1905 	  return __a & __b;
1906 	else
1907 	  return __a._M_data & __b._M_data;
1908       }
1909     else
1910       return __a & __b;
1911   }
1912 
1913 // }}}
1914 // __andnot{{{
1915 #if _GLIBCXX_SIMD_X86INTRIN && !defined __clang__
1916 static constexpr struct
1917 {
1918   _GLIBCXX_SIMD_INTRINSIC __v4sf
1919   operator()(__v4sf __a, __v4sf __b) const noexcept
1920   { return __builtin_ia32_andnps(__a, __b); }
1921 
1922   _GLIBCXX_SIMD_INTRINSIC __v2df
1923   operator()(__v2df __a, __v2df __b) const noexcept
1924   { return __builtin_ia32_andnpd(__a, __b); }
1925 
1926   _GLIBCXX_SIMD_INTRINSIC __v2di
1927   operator()(__v2di __a, __v2di __b) const noexcept
1928   { return __builtin_ia32_pandn128(__a, __b); }
1929 
1930   _GLIBCXX_SIMD_INTRINSIC __v8sf
1931   operator()(__v8sf __a, __v8sf __b) const noexcept
1932   { return __builtin_ia32_andnps256(__a, __b); }
1933 
1934   _GLIBCXX_SIMD_INTRINSIC __v4df
1935   operator()(__v4df __a, __v4df __b) const noexcept
1936   { return __builtin_ia32_andnpd256(__a, __b); }
1937 
1938   _GLIBCXX_SIMD_INTRINSIC __v4di
1939   operator()(__v4di __a, __v4di __b) const noexcept
1940   {
1941     if constexpr (__have_avx2)
1942       return __builtin_ia32_andnotsi256(__a, __b);
1943     else
1944       return reinterpret_cast<__v4di>(
1945 	__builtin_ia32_andnpd256(reinterpret_cast<__v4df>(__a),
1946 				 reinterpret_cast<__v4df>(__b)));
1947   }
1948 
1949   _GLIBCXX_SIMD_INTRINSIC __v16sf
1950   operator()(__v16sf __a, __v16sf __b) const noexcept
1951   {
1952     if constexpr (__have_avx512dq)
1953       return _mm512_andnot_ps(__a, __b);
1954     else
1955       return reinterpret_cast<__v16sf>(
1956 	_mm512_andnot_si512(reinterpret_cast<__v8di>(__a),
1957 			    reinterpret_cast<__v8di>(__b)));
1958   }
1959 
1960   _GLIBCXX_SIMD_INTRINSIC __v8df
1961   operator()(__v8df __a, __v8df __b) const noexcept
1962   {
1963     if constexpr (__have_avx512dq)
1964       return _mm512_andnot_pd(__a, __b);
1965     else
1966       return reinterpret_cast<__v8df>(
1967 	_mm512_andnot_si512(reinterpret_cast<__v8di>(__a),
1968 			    reinterpret_cast<__v8di>(__b)));
1969   }
1970 
1971   _GLIBCXX_SIMD_INTRINSIC __v8di
1972   operator()(__v8di __a, __v8di __b) const noexcept
1973   { return _mm512_andnot_si512(__a, __b); }
1974 } _S_x86_andnot;
1975 #endif // _GLIBCXX_SIMD_X86INTRIN && !__clang__
1976 
1977 template <typename _TW>
1978   _GLIBCXX_SIMD_INTRINSIC constexpr _TW
1979   __andnot(_TW __a, _TW __b) noexcept
1980   {
1981     if constexpr (__is_vector_type_v<_TW> || __is_simd_wrapper_v<_TW>)
1982       {
1983 	using _TVT = conditional_t<__is_simd_wrapper_v<_TW>, _TW,
1984 				   _VectorTraitsImpl<_TW>>;
1985 	using _Tp = typename _TVT::value_type;
1986 #if _GLIBCXX_SIMD_X86INTRIN && !defined __clang__
1987 	if constexpr (sizeof(_TW) >= 16)
1988 	  {
1989 	    const auto __ai = __to_intrin(__a);
1990 	    const auto __bi = __to_intrin(__b);
1991 	    if (!__builtin_is_constant_evaluated()
1992 		&& !(__builtin_constant_p(__ai) && __builtin_constant_p(__bi)))
1993 	      {
1994 		const auto __r = _S_x86_andnot(__ai, __bi);
1995 		if constexpr (is_convertible_v<decltype(__r), _TW>)
1996 		  return __r;
1997 		else
1998 		  return reinterpret_cast<typename _TVT::type>(__r);
1999 	      }
2000 	  }
2001 #endif // _GLIBCXX_SIMD_X86INTRIN
2002 	using _Ip = make_unsigned_t<__int_for_sizeof_t<_Tp>>;
2003 	return __vector_bitcast<_Tp>(~__vector_bitcast<_Ip>(__a)
2004 				     & __vector_bitcast<_Ip>(__b));
2005       }
2006     else
2007       return ~__a & __b;
2008   }
2009 
2010 // }}}
2011 // __not{{{
2012 template <typename _Tp, typename _TVT = _VectorTraits<_Tp>>
2013   _GLIBCXX_SIMD_INTRINSIC constexpr _Tp
2014   __not(_Tp __a) noexcept
2015   {
2016     if constexpr (is_floating_point_v<typename _TVT::value_type>)
2017       return reinterpret_cast<typename _TVT::type>(
2018 	~__vector_bitcast<unsigned>(__a));
2019     else
2020       return ~__a;
2021   }
2022 
2023 // }}}
2024 // __concat{{{
2025 template <typename _Tp, typename _TVT = _VectorTraits<_Tp>,
2026 	  typename _R = __vector_type_t<typename _TVT::value_type,
2027 					_TVT::_S_full_size * 2>>
2028   constexpr _R
2029   __concat(_Tp a_, _Tp b_)
2030   {
2031 #ifdef _GLIBCXX_SIMD_WORKAROUND_XXX_1
2032     using _W
2033       = conditional_t<is_floating_point_v<typename _TVT::value_type>, double,
2034 		      conditional_t<(sizeof(_Tp) >= 2 * sizeof(long long)),
2035 				    long long, typename _TVT::value_type>>;
2036     constexpr int input_width = sizeof(_Tp) / sizeof(_W);
2037     const auto __a = __vector_bitcast<_W>(a_);
2038     const auto __b = __vector_bitcast<_W>(b_);
2039     using _Up = __vector_type_t<_W, sizeof(_R) / sizeof(_W)>;
2040 #else
2041     constexpr int input_width = _TVT::_S_full_size;
2042     const _Tp& __a = a_;
2043     const _Tp& __b = b_;
2044     using _Up = _R;
2045 #endif
2046     if constexpr (input_width == 2)
2047       return reinterpret_cast<_R>(_Up{__a[0], __a[1], __b[0], __b[1]});
2048     else if constexpr (input_width == 4)
2049       return reinterpret_cast<_R>(
2050 	_Up{__a[0], __a[1], __a[2], __a[3], __b[0], __b[1], __b[2], __b[3]});
2051     else if constexpr (input_width == 8)
2052       return reinterpret_cast<_R>(
2053 	_Up{__a[0], __a[1], __a[2], __a[3], __a[4], __a[5], __a[6], __a[7],
2054 	    __b[0], __b[1], __b[2], __b[3], __b[4], __b[5], __b[6], __b[7]});
2055     else if constexpr (input_width == 16)
2056       return reinterpret_cast<_R>(
2057 	_Up{__a[0],  __a[1],  __a[2],  __a[3],  __a[4],  __a[5],  __a[6],
2058 	    __a[7],  __a[8],  __a[9],  __a[10], __a[11], __a[12], __a[13],
2059 	    __a[14], __a[15], __b[0],  __b[1],  __b[2],  __b[3],  __b[4],
2060 	    __b[5],  __b[6],  __b[7],  __b[8],  __b[9],  __b[10], __b[11],
2061 	    __b[12], __b[13], __b[14], __b[15]});
2062     else if constexpr (input_width == 32)
2063       return reinterpret_cast<_R>(
2064 	_Up{__a[0],  __a[1],  __a[2],  __a[3],  __a[4],  __a[5],  __a[6],
2065 	    __a[7],  __a[8],  __a[9],  __a[10], __a[11], __a[12], __a[13],
2066 	    __a[14], __a[15], __a[16], __a[17], __a[18], __a[19], __a[20],
2067 	    __a[21], __a[22], __a[23], __a[24], __a[25], __a[26], __a[27],
2068 	    __a[28], __a[29], __a[30], __a[31], __b[0],  __b[1],  __b[2],
2069 	    __b[3],  __b[4],  __b[5],  __b[6],  __b[7],  __b[8],  __b[9],
2070 	    __b[10], __b[11], __b[12], __b[13], __b[14], __b[15], __b[16],
2071 	    __b[17], __b[18], __b[19], __b[20], __b[21], __b[22], __b[23],
2072 	    __b[24], __b[25], __b[26], __b[27], __b[28], __b[29], __b[30],
2073 	    __b[31]});
2074   }
2075 
2076 // }}}
2077 // __zero_extend {{{
2078 template <typename _Tp, typename _TVT = _VectorTraits<_Tp>>
2079   struct _ZeroExtendProxy
2080   {
2081     using value_type = typename _TVT::value_type;
2082     static constexpr size_t _Np = _TVT::_S_full_size;
2083     const _Tp __x;
2084 
2085     template <typename _To, typename _ToVT = _VectorTraits<_To>,
2086 	      typename
2087 	      = enable_if_t<is_same_v<typename _ToVT::value_type, value_type>>>
2088       _GLIBCXX_SIMD_INTRINSIC operator _To() const
2089       {
2090 	constexpr size_t _ToN = _ToVT::_S_full_size;
2091 	if constexpr (_ToN == _Np)
2092 	  return __x;
2093 	else if constexpr (_ToN == 2 * _Np)
2094 	  {
2095 #ifdef _GLIBCXX_SIMD_WORKAROUND_XXX_3
2096 	    if constexpr (__have_avx && _TVT::template _S_is<float, 4>)
2097 	      return __vector_bitcast<value_type>(
2098 		_mm256_insertf128_ps(__m256(), __x, 0));
2099 	    else if constexpr (__have_avx && _TVT::template _S_is<double, 2>)
2100 	      return __vector_bitcast<value_type>(
2101 		_mm256_insertf128_pd(__m256d(), __x, 0));
2102 	    else if constexpr (__have_avx2 && _Np * sizeof(value_type) == 16)
2103 	      return __vector_bitcast<value_type>(
2104 		_mm256_insertf128_si256(__m256i(), __to_intrin(__x), 0));
2105 	    else if constexpr (__have_avx512f && _TVT::template _S_is<float, 8>)
2106 	      {
2107 		if constexpr (__have_avx512dq)
2108 		  return __vector_bitcast<value_type>(
2109 		    _mm512_insertf32x8(__m512(), __x, 0));
2110 		else
2111 		  return reinterpret_cast<__m512>(
2112 		    _mm512_insertf64x4(__m512d(),
2113 				       reinterpret_cast<__m256d>(__x), 0));
2114 	      }
2115 	    else if constexpr (__have_avx512f
2116 			       && _TVT::template _S_is<double, 4>)
2117 	      return __vector_bitcast<value_type>(
2118 		_mm512_insertf64x4(__m512d(), __x, 0));
2119 	    else if constexpr (__have_avx512f && _Np * sizeof(value_type) == 32)
2120 	      return __vector_bitcast<value_type>(
2121 		_mm512_inserti64x4(__m512i(), __to_intrin(__x), 0));
2122 #endif
2123 	    return __concat(__x, _Tp());
2124 	  }
2125 	else if constexpr (_ToN == 4 * _Np)
2126 	  {
2127 #ifdef _GLIBCXX_SIMD_WORKAROUND_XXX_3
2128 	    if constexpr (__have_avx512dq && _TVT::template _S_is<double, 2>)
2129 	      {
2130 		return __vector_bitcast<value_type>(
2131 		  _mm512_insertf64x2(__m512d(), __x, 0));
2132 	      }
2133 	    else if constexpr (__have_avx512f
2134 			       && is_floating_point_v<value_type>)
2135 	      {
2136 		return __vector_bitcast<value_type>(
2137 		  _mm512_insertf32x4(__m512(), reinterpret_cast<__m128>(__x),
2138 				     0));
2139 	      }
2140 	    else if constexpr (__have_avx512f && _Np * sizeof(value_type) == 16)
2141 	      {
2142 		return __vector_bitcast<value_type>(
2143 		  _mm512_inserti32x4(__m512i(), __to_intrin(__x), 0));
2144 	      }
2145 #endif
2146 	    return __concat(__concat(__x, _Tp()),
2147 			    __vector_type_t<value_type, _Np * 2>());
2148 	  }
2149 	else if constexpr (_ToN == 8 * _Np)
2150 	  return __concat(operator __vector_type_t<value_type, _Np * 4>(),
2151 			  __vector_type_t<value_type, _Np * 4>());
2152 	else if constexpr (_ToN == 16 * _Np)
2153 	  return __concat(operator __vector_type_t<value_type, _Np * 8>(),
2154 			  __vector_type_t<value_type, _Np * 8>());
2155 	else
2156 	  __assert_unreachable<_Tp>();
2157       }
2158   };
2159 
2160 template <typename _Tp, typename _TVT = _VectorTraits<_Tp>>
2161   _GLIBCXX_SIMD_INTRINSIC _ZeroExtendProxy<_Tp, _TVT>
2162   __zero_extend(_Tp __x)
2163   { return {__x}; }
2164 
2165 // }}}
2166 // __extract<_Np, By>{{{
2167 template <int _Offset,
2168 	  int _SplitBy,
2169 	  typename _Tp,
2170 	  typename _TVT = _VectorTraits<_Tp>,
2171 	  typename _R = __vector_type_t<typename _TVT::value_type,
2172 			  _TVT::_S_full_size / _SplitBy>>
2173   _GLIBCXX_SIMD_INTRINSIC constexpr _R
2174   __extract(_Tp __in)
2175   {
2176     using value_type = typename _TVT::value_type;
2177 #if _GLIBCXX_SIMD_X86INTRIN // {{{
2178     if constexpr (sizeof(_Tp) == 64 && _SplitBy == 4 && _Offset > 0)
2179       {
2180 	if constexpr (__have_avx512dq && is_same_v<double, value_type>)
2181 	  return _mm512_extractf64x2_pd(__to_intrin(__in), _Offset);
2182 	else if constexpr (is_floating_point_v<value_type>)
2183 	  return __vector_bitcast<value_type>(
2184 	    _mm512_extractf32x4_ps(__intrin_bitcast<__m512>(__in), _Offset));
2185 	else
2186 	  return reinterpret_cast<_R>(
2187 	    _mm512_extracti32x4_epi32(__intrin_bitcast<__m512i>(__in),
2188 				      _Offset));
2189       }
2190     else
2191 #endif // _GLIBCXX_SIMD_X86INTRIN }}}
2192       {
2193 #ifdef _GLIBCXX_SIMD_WORKAROUND_XXX_1
2194 	using _W = conditional_t<
2195 	  is_floating_point_v<value_type>, double,
2196 	  conditional_t<(sizeof(_R) >= 16), long long, value_type>>;
2197 	static_assert(sizeof(_R) % sizeof(_W) == 0);
2198 	constexpr int __return_width = sizeof(_R) / sizeof(_W);
2199 	using _Up = __vector_type_t<_W, __return_width>;
2200 	const auto __x = __vector_bitcast<_W>(__in);
2201 #else
2202       constexpr int __return_width = _TVT::_S_full_size / _SplitBy;
2203       using _Up = _R;
2204       const __vector_type_t<value_type, _TVT::_S_full_size>& __x
2205 	= __in; // only needed for _Tp = _SimdWrapper<value_type, _Np>
2206 #endif
2207 	constexpr int _O = _Offset * __return_width;
2208 	return __call_with_subscripts<__return_width, _O>(
2209 	  __x, [](auto... __entries) {
2210 	    return reinterpret_cast<_R>(_Up{__entries...});
2211 	  });
2212       }
2213   }
2214 
2215 // }}}
2216 // __lo/__hi64[z]{{{
2217 template <typename _Tp,
2218 	  typename _R
2219 	  = __vector_type8_t<typename _VectorTraits<_Tp>::value_type>>
2220   _GLIBCXX_SIMD_INTRINSIC constexpr _R
2221   __lo64(_Tp __x)
2222   {
2223     _R __r{};
2224     __builtin_memcpy(&__r, &__x, 8);
2225     return __r;
2226   }
2227 
2228 template <typename _Tp,
2229 	  typename _R
2230 	  = __vector_type8_t<typename _VectorTraits<_Tp>::value_type>>
2231   _GLIBCXX_SIMD_INTRINSIC constexpr _R
2232   __hi64(_Tp __x)
2233   {
2234     static_assert(sizeof(_Tp) == 16, "use __hi64z if you meant it");
2235     _R __r{};
2236     __builtin_memcpy(&__r, reinterpret_cast<const char*>(&__x) + 8, 8);
2237     return __r;
2238   }
2239 
2240 template <typename _Tp,
2241 	  typename _R
2242 	  = __vector_type8_t<typename _VectorTraits<_Tp>::value_type>>
2243   _GLIBCXX_SIMD_INTRINSIC constexpr _R
2244   __hi64z([[maybe_unused]] _Tp __x)
2245   {
2246     _R __r{};
2247     if constexpr (sizeof(_Tp) == 16)
2248       __builtin_memcpy(&__r, reinterpret_cast<const char*>(&__x) + 8, 8);
2249     return __r;
2250   }
2251 
2252 // }}}
2253 // __lo/__hi128{{{
2254 template <typename _Tp>
2255   _GLIBCXX_SIMD_INTRINSIC constexpr auto
2256   __lo128(_Tp __x)
2257   { return __extract<0, sizeof(_Tp) / 16>(__x); }
2258 
2259 template <typename _Tp>
2260   _GLIBCXX_SIMD_INTRINSIC constexpr auto
2261   __hi128(_Tp __x)
2262   {
2263     static_assert(sizeof(__x) == 32);
2264     return __extract<1, 2>(__x);
2265   }
2266 
2267 // }}}
2268 // __lo/__hi256{{{
2269 template <typename _Tp>
2270   _GLIBCXX_SIMD_INTRINSIC constexpr auto
2271   __lo256(_Tp __x)
2272   {
2273     static_assert(sizeof(__x) == 64);
2274     return __extract<0, 2>(__x);
2275   }
2276 
2277 template <typename _Tp>
2278   _GLIBCXX_SIMD_INTRINSIC constexpr auto
2279   __hi256(_Tp __x)
2280   {
2281     static_assert(sizeof(__x) == 64);
2282     return __extract<1, 2>(__x);
2283   }
2284 
2285 // }}}
2286 // __auto_bitcast{{{
2287 template <typename _Tp>
2288   struct _AutoCast
2289   {
2290     static_assert(__is_vector_type_v<_Tp>);
2291 
2292     const _Tp __x;
2293 
2294     template <typename _Up, typename _UVT = _VectorTraits<_Up>>
2295       _GLIBCXX_SIMD_INTRINSIC constexpr operator _Up() const
2296       { return __intrin_bitcast<typename _UVT::type>(__x); }
2297   };
2298 
2299 template <typename _Tp>
2300   _GLIBCXX_SIMD_INTRINSIC constexpr _AutoCast<_Tp>
2301   __auto_bitcast(const _Tp& __x)
2302   { return {__x}; }
2303 
2304 template <typename _Tp, size_t _Np>
2305   _GLIBCXX_SIMD_INTRINSIC constexpr
2306   _AutoCast<typename _SimdWrapper<_Tp, _Np>::_BuiltinType>
2307   __auto_bitcast(const _SimdWrapper<_Tp, _Np>& __x)
2308   { return {__x._M_data}; }
2309 
2310 // }}}
2311 // ^^^ ---- builtin vector types [[gnu::vector_size(N)]] and operations ---- ^^^
2312 
2313 #if _GLIBCXX_SIMD_HAVE_SSE_ABI
2314 // __bool_storage_member_type{{{
2315 #if _GLIBCXX_SIMD_HAVE_AVX512F && _GLIBCXX_SIMD_X86INTRIN
2316 template <size_t _Size>
2317   struct __bool_storage_member_type
2318   {
2319     static_assert((_Size & (_Size - 1)) != 0,
2320 		  "This trait may only be used for non-power-of-2 sizes. "
2321 		  "Power-of-2 sizes must be specialized.");
2322     using type =
2323       typename __bool_storage_member_type<std::__bit_ceil(_Size)>::type;
2324   };
2325 
2326 template <>
2327   struct __bool_storage_member_type<1> { using type = bool; };
2328 
2329 template <>
2330   struct __bool_storage_member_type<2> { using type = __mmask8; };
2331 
2332 template <>
2333   struct __bool_storage_member_type<4> { using type = __mmask8; };
2334 
2335 template <>
2336   struct __bool_storage_member_type<8> { using type = __mmask8; };
2337 
2338 template <>
2339   struct __bool_storage_member_type<16> { using type = __mmask16; };
2340 
2341 template <>
2342   struct __bool_storage_member_type<32> { using type = __mmask32; };
2343 
2344 template <>
2345   struct __bool_storage_member_type<64> { using type = __mmask64; };
2346 #endif // _GLIBCXX_SIMD_HAVE_AVX512F
2347 
2348 // }}}
2349 // __intrinsic_type (x86){{{
2350 // the following excludes bool via __is_vectorizable
2351 #if _GLIBCXX_SIMD_HAVE_SSE
2352 template <typename _Tp, size_t _Bytes>
2353   struct __intrinsic_type<_Tp, _Bytes,
2354 			  enable_if_t<__is_vectorizable_v<_Tp> && _Bytes <= 64>>
2355   {
2356     static_assert(!is_same_v<_Tp, long double>,
2357 		  "no __intrinsic_type support for long double on x86");
2358 
2359     static constexpr size_t _S_VBytes = _Bytes <= 16   ? 16
2360 					: _Bytes <= 32 ? 32
2361 						       : 64;
2362 
2363     using type [[__gnu__::__vector_size__(_S_VBytes)]]
2364     = conditional_t<is_integral_v<_Tp>, long long int, _Tp>;
2365   };
2366 #endif // _GLIBCXX_SIMD_HAVE_SSE
2367 
2368 // }}}
2369 #endif // _GLIBCXX_SIMD_HAVE_SSE_ABI
2370 // __intrinsic_type (ARM){{{
2371 #if _GLIBCXX_SIMD_HAVE_NEON
2372 template <>
2373   struct __intrinsic_type<float, 8, void>
2374   { using type = float32x2_t; };
2375 
2376 template <>
2377   struct __intrinsic_type<float, 16, void>
2378   { using type = float32x4_t; };
2379 
2380 #if _GLIBCXX_SIMD_HAVE_NEON_A64
2381 template <>
2382   struct __intrinsic_type<double, 8, void>
2383   { using type = float64x1_t; };
2384 
2385 template <>
2386   struct __intrinsic_type<double, 16, void>
2387   { using type = float64x2_t; };
2388 #endif
2389 
2390 #define _GLIBCXX_SIMD_ARM_INTRIN(_Bits, _Np)                                   \
2391 template <>                                                                    \
2392   struct __intrinsic_type<__int_with_sizeof_t<_Bits / 8>,                      \
2393 			  _Np * _Bits / 8, void>                               \
2394   { using type = int##_Bits##x##_Np##_t; };                                    \
2395 template <>                                                                    \
2396   struct __intrinsic_type<make_unsigned_t<__int_with_sizeof_t<_Bits / 8>>,     \
2397 			  _Np * _Bits / 8, void>                               \
2398   { using type = uint##_Bits##x##_Np##_t; }
2399 _GLIBCXX_SIMD_ARM_INTRIN(8, 8);
2400 _GLIBCXX_SIMD_ARM_INTRIN(8, 16);
2401 _GLIBCXX_SIMD_ARM_INTRIN(16, 4);
2402 _GLIBCXX_SIMD_ARM_INTRIN(16, 8);
2403 _GLIBCXX_SIMD_ARM_INTRIN(32, 2);
2404 _GLIBCXX_SIMD_ARM_INTRIN(32, 4);
2405 _GLIBCXX_SIMD_ARM_INTRIN(64, 1);
2406 _GLIBCXX_SIMD_ARM_INTRIN(64, 2);
2407 #undef _GLIBCXX_SIMD_ARM_INTRIN
2408 
2409 template <typename _Tp, size_t _Bytes>
2410   struct __intrinsic_type<_Tp, _Bytes,
2411 			  enable_if_t<__is_vectorizable_v<_Tp> && _Bytes <= 16>>
2412   {
2413     static constexpr int _SVecBytes = _Bytes <= 8 ? 8 : 16;
2414     using _Ip = __int_for_sizeof_t<_Tp>;
2415     using _Up = conditional_t<
2416       is_floating_point_v<_Tp>, _Tp,
2417       conditional_t<is_unsigned_v<_Tp>, make_unsigned_t<_Ip>, _Ip>>;
2418     static_assert(!is_same_v<_Tp, _Up> || _SVecBytes != _Bytes,
2419 		  "should use explicit specialization above");
2420     using type = typename __intrinsic_type<_Up, _SVecBytes>::type;
2421   };
2422 #endif // _GLIBCXX_SIMD_HAVE_NEON
2423 
2424 // }}}
2425 // __intrinsic_type (PPC){{{
2426 #ifdef __ALTIVEC__
2427 template <typename _Tp>
2428   struct __intrinsic_type_impl;
2429 
2430 #define _GLIBCXX_SIMD_PPC_INTRIN(_Tp)                                          \
2431   template <>                                                                  \
2432     struct __intrinsic_type_impl<_Tp> { using type = __vector _Tp; }
2433 _GLIBCXX_SIMD_PPC_INTRIN(float);
2434 #ifdef __VSX__
2435 _GLIBCXX_SIMD_PPC_INTRIN(double);
2436 #endif
2437 _GLIBCXX_SIMD_PPC_INTRIN(signed char);
2438 _GLIBCXX_SIMD_PPC_INTRIN(unsigned char);
2439 _GLIBCXX_SIMD_PPC_INTRIN(signed short);
2440 _GLIBCXX_SIMD_PPC_INTRIN(unsigned short);
2441 _GLIBCXX_SIMD_PPC_INTRIN(signed int);
2442 _GLIBCXX_SIMD_PPC_INTRIN(unsigned int);
2443 #if defined __VSX__ || __SIZEOF_LONG__ == 4
2444 _GLIBCXX_SIMD_PPC_INTRIN(signed long);
2445 _GLIBCXX_SIMD_PPC_INTRIN(unsigned long);
2446 #endif
2447 #ifdef __VSX__
2448 _GLIBCXX_SIMD_PPC_INTRIN(signed long long);
2449 _GLIBCXX_SIMD_PPC_INTRIN(unsigned long long);
2450 #endif
2451 #undef _GLIBCXX_SIMD_PPC_INTRIN
2452 
2453 template <typename _Tp, size_t _Bytes>
2454   struct __intrinsic_type<_Tp, _Bytes,
2455 			  enable_if_t<__is_vectorizable_v<_Tp> && _Bytes <= 16>>
2456   {
2457     static constexpr bool _S_is_ldouble = is_same_v<_Tp, long double>;
2458     // allow _Tp == long double with -mlong-double-64
2459     static_assert(!(_S_is_ldouble && sizeof(long double) > sizeof(double)),
2460 		  "no __intrinsic_type support for 128-bit floating point on PowerPC");
2461 #ifndef __VSX__
2462     static_assert(!(is_same_v<_Tp, double>
2463 		    || (_S_is_ldouble && sizeof(long double) == sizeof(double))),
2464 		  "no __intrinsic_type support for 64-bit floating point on PowerPC w/o VSX");
2465 #endif
2466     using type =
2467       typename __intrinsic_type_impl<
2468 		 conditional_t<is_floating_point_v<_Tp>,
2469 			       conditional_t<_S_is_ldouble, double, _Tp>,
2470 			       __int_for_sizeof_t<_Tp>>>::type;
2471   };
2472 #endif // __ALTIVEC__
2473 
2474 // }}}
2475 // _SimdWrapper<bool>{{{1
2476 template <size_t _Width>
2477   struct _SimdWrapper<bool, _Width,
2478 		      void_t<typename __bool_storage_member_type<_Width>::type>>
2479   {
2480     using _BuiltinType = typename __bool_storage_member_type<_Width>::type;
2481     using value_type = bool;
2482 
2483     static constexpr size_t _S_full_size = sizeof(_BuiltinType) * __CHAR_BIT__;
2484 
2485     _GLIBCXX_SIMD_INTRINSIC constexpr _SimdWrapper<bool, _S_full_size>
2486     __as_full_vector() const { return _M_data; }
2487 
2488     _GLIBCXX_SIMD_INTRINSIC constexpr _SimdWrapper() = default;
2489     _GLIBCXX_SIMD_INTRINSIC constexpr _SimdWrapper(_BuiltinType __k)
2490       : _M_data(__k) {};
2491 
2492     _GLIBCXX_SIMD_INTRINSIC operator const _BuiltinType&() const
2493     { return _M_data; }
2494 
2495     _GLIBCXX_SIMD_INTRINSIC operator _BuiltinType&()
2496     { return _M_data; }
2497 
2498     _GLIBCXX_SIMD_INTRINSIC _BuiltinType __intrin() const
2499     { return _M_data; }
2500 
2501     _GLIBCXX_SIMD_INTRINSIC constexpr value_type operator[](size_t __i) const
2502     { return _M_data & (_BuiltinType(1) << __i); }
2503 
2504     template <size_t __i>
2505       _GLIBCXX_SIMD_INTRINSIC constexpr value_type
2506       operator[](_SizeConstant<__i>) const
2507       { return _M_data & (_BuiltinType(1) << __i); }
2508 
2509     _GLIBCXX_SIMD_INTRINSIC constexpr void _M_set(size_t __i, value_type __x)
2510     {
2511       if (__x)
2512 	_M_data |= (_BuiltinType(1) << __i);
2513       else
2514 	_M_data &= ~(_BuiltinType(1) << __i);
2515     }
2516 
2517     _GLIBCXX_SIMD_INTRINSIC
2518     constexpr bool _M_is_constprop() const
2519     { return __builtin_constant_p(_M_data); }
2520 
2521     _GLIBCXX_SIMD_INTRINSIC constexpr bool _M_is_constprop_none_of() const
2522     {
2523       if (__builtin_constant_p(_M_data))
2524 	{
2525 	  constexpr int __nbits = sizeof(_BuiltinType) * __CHAR_BIT__;
2526 	  constexpr _BuiltinType __active_mask
2527 	    = ~_BuiltinType() >> (__nbits - _Width);
2528 	  return (_M_data & __active_mask) == 0;
2529 	}
2530       return false;
2531     }
2532 
2533     _GLIBCXX_SIMD_INTRINSIC constexpr bool _M_is_constprop_all_of() const
2534     {
2535       if (__builtin_constant_p(_M_data))
2536 	{
2537 	  constexpr int __nbits = sizeof(_BuiltinType) * __CHAR_BIT__;
2538 	  constexpr _BuiltinType __active_mask
2539 	    = ~_BuiltinType() >> (__nbits - _Width);
2540 	  return (_M_data & __active_mask) == __active_mask;
2541 	}
2542       return false;
2543     }
2544 
2545     _BuiltinType _M_data;
2546   };
2547 
2548 // _SimdWrapperBase{{{1
2549 template <bool _MustZeroInitPadding, typename _BuiltinType>
2550   struct _SimdWrapperBase;
2551 
2552 template <typename _BuiltinType>
2553   struct _SimdWrapperBase<false, _BuiltinType> // no padding or no SNaNs
2554   {
2555     _GLIBCXX_SIMD_INTRINSIC constexpr _SimdWrapperBase() = default;
2556     _GLIBCXX_SIMD_INTRINSIC constexpr _SimdWrapperBase(_BuiltinType __init)
2557       : _M_data(__init)
2558     {}
2559 
2560     _BuiltinType _M_data;
2561   };
2562 
2563 template <typename _BuiltinType>
2564   struct _SimdWrapperBase<true, _BuiltinType> // with padding that needs to
2565 					      // never become SNaN
2566   {
2567     _GLIBCXX_SIMD_INTRINSIC constexpr _SimdWrapperBase() : _M_data() {}
2568     _GLIBCXX_SIMD_INTRINSIC constexpr _SimdWrapperBase(_BuiltinType __init)
2569       : _M_data(__init)
2570     {}
2571 
2572     _BuiltinType _M_data;
2573   };
2574 
2575 // }}}
2576 // _SimdWrapper{{{
2577 template <typename _Tp, size_t _Width>
2578   struct _SimdWrapper<
2579     _Tp, _Width,
2580     void_t<__vector_type_t<_Tp, _Width>, __intrinsic_type_t<_Tp, _Width>>>
2581     : _SimdWrapperBase<__has_iec559_behavior<__signaling_NaN, _Tp>::value
2582 			 && sizeof(_Tp) * _Width
2583 			      == sizeof(__vector_type_t<_Tp, _Width>),
2584 		       __vector_type_t<_Tp, _Width>>
2585   {
2586     using _Base
2587       = _SimdWrapperBase<__has_iec559_behavior<__signaling_NaN, _Tp>::value
2588 			   && sizeof(_Tp) * _Width
2589 				== sizeof(__vector_type_t<_Tp, _Width>),
2590 			 __vector_type_t<_Tp, _Width>>;
2591 
2592     static_assert(__is_vectorizable_v<_Tp>);
2593     static_assert(_Width >= 2); // 1 doesn't make sense, use _Tp directly then
2594 
2595     using _BuiltinType = __vector_type_t<_Tp, _Width>;
2596     using value_type = _Tp;
2597 
2598     static inline constexpr size_t _S_full_size
2599       = sizeof(_BuiltinType) / sizeof(value_type);
2600     static inline constexpr int _S_size = _Width;
2601     static inline constexpr bool _S_is_partial = _S_full_size != _S_size;
2602 
2603     using _Base::_M_data;
2604 
2605     _GLIBCXX_SIMD_INTRINSIC constexpr _SimdWrapper<_Tp, _S_full_size>
2606     __as_full_vector() const
2607     { return _M_data; }
2608 
2609     _GLIBCXX_SIMD_INTRINSIC constexpr _SimdWrapper(initializer_list<_Tp> __init)
2610       : _Base(__generate_from_n_evaluations<_Width, _BuiltinType>(
2611 	[&](auto __i) { return __init.begin()[__i.value]; })) {}
2612 
2613     _GLIBCXX_SIMD_INTRINSIC constexpr _SimdWrapper() = default;
2614     _GLIBCXX_SIMD_INTRINSIC constexpr _SimdWrapper(const _SimdWrapper&)
2615       = default;
2616     _GLIBCXX_SIMD_INTRINSIC constexpr _SimdWrapper(_SimdWrapper&&) = default;
2617 
2618     _GLIBCXX_SIMD_INTRINSIC constexpr _SimdWrapper&
2619     operator=(const _SimdWrapper&) = default;
2620     _GLIBCXX_SIMD_INTRINSIC constexpr _SimdWrapper&
2621     operator=(_SimdWrapper&&) = default;
2622 
2623     template <typename _V, typename = enable_if_t<disjunction_v<
2624 			     is_same<_V, __vector_type_t<_Tp, _Width>>,
2625 			     is_same<_V, __intrinsic_type_t<_Tp, _Width>>>>>
2626       _GLIBCXX_SIMD_INTRINSIC constexpr _SimdWrapper(_V __x)
2627       // __vector_bitcast can convert e.g. __m128 to __vector(2) float
2628       : _Base(__vector_bitcast<_Tp, _Width>(__x)) {}
2629 
2630     template <typename... _As,
2631 	      typename = enable_if_t<((is_same_v<simd_abi::scalar, _As> && ...)
2632 				      && sizeof...(_As) <= _Width)>>
2633       _GLIBCXX_SIMD_INTRINSIC constexpr
2634       operator _SimdTuple<_Tp, _As...>() const
2635       {
2636 	const auto& dd = _M_data; // workaround for GCC7 ICE
2637 	return __generate_from_n_evaluations<sizeof...(_As),
2638 					     _SimdTuple<_Tp, _As...>>([&](
2639 	  auto __i) constexpr { return dd[int(__i)]; });
2640       }
2641 
2642     _GLIBCXX_SIMD_INTRINSIC constexpr operator const _BuiltinType&() const
2643     { return _M_data; }
2644 
2645     _GLIBCXX_SIMD_INTRINSIC constexpr operator _BuiltinType&()
2646     { return _M_data; }
2647 
2648     _GLIBCXX_SIMD_INTRINSIC constexpr _Tp operator[](size_t __i) const
2649     { return _M_data[__i]; }
2650 
2651     template <size_t __i>
2652       _GLIBCXX_SIMD_INTRINSIC constexpr _Tp operator[](_SizeConstant<__i>) const
2653       { return _M_data[__i]; }
2654 
2655     _GLIBCXX_SIMD_INTRINSIC constexpr void _M_set(size_t __i, _Tp __x)
2656     { _M_data[__i] = __x; }
2657 
2658     _GLIBCXX_SIMD_INTRINSIC
2659     constexpr bool _M_is_constprop() const
2660     { return __builtin_constant_p(_M_data); }
2661 
2662     _GLIBCXX_SIMD_INTRINSIC constexpr bool _M_is_constprop_none_of() const
2663     {
2664       if (__builtin_constant_p(_M_data))
2665 	{
2666 	  bool __r = true;
2667 	  if constexpr (is_floating_point_v<_Tp>)
2668 	    {
2669 	      using _Ip = __int_for_sizeof_t<_Tp>;
2670 	      const auto __intdata = __vector_bitcast<_Ip>(_M_data);
2671 	      __execute_n_times<_Width>(
2672 		[&](auto __i) { __r &= __intdata[__i.value] == _Ip(); });
2673 	    }
2674 	  else
2675 	    __execute_n_times<_Width>(
2676 	      [&](auto __i) { __r &= _M_data[__i.value] == _Tp(); });
2677 	  if (__builtin_constant_p(__r))
2678 	    return __r;
2679 	}
2680       return false;
2681     }
2682 
2683     _GLIBCXX_SIMD_INTRINSIC constexpr bool _M_is_constprop_all_of() const
2684     {
2685       if (__builtin_constant_p(_M_data))
2686 	{
2687 	  bool __r = true;
2688 	  if constexpr (is_floating_point_v<_Tp>)
2689 	    {
2690 	      using _Ip = __int_for_sizeof_t<_Tp>;
2691 	      const auto __intdata = __vector_bitcast<_Ip>(_M_data);
2692 	      __execute_n_times<_Width>(
2693 		[&](auto __i) { __r &= __intdata[__i.value] == ~_Ip(); });
2694 	    }
2695 	  else
2696 	    __execute_n_times<_Width>(
2697 	      [&](auto __i) { __r &= _M_data[__i.value] == ~_Tp(); });
2698 	  if (__builtin_constant_p(__r))
2699 	    return __r;
2700 	}
2701       return false;
2702     }
2703   };
2704 
2705 // }}}
2706 
2707 // __vectorized_sizeof {{{
2708 template <typename _Tp>
2709   constexpr size_t
2710   __vectorized_sizeof()
2711   {
2712     if constexpr (!__is_vectorizable_v<_Tp>)
2713       return 0;
2714 
2715     if constexpr (sizeof(_Tp) <= 8)
2716       {
2717 	// X86:
2718 	if constexpr (__have_avx512bw)
2719 	  return 64;
2720 	if constexpr (__have_avx512f && sizeof(_Tp) >= 4)
2721 	  return 64;
2722 	if constexpr (__have_avx2)
2723 	  return 32;
2724 	if constexpr (__have_avx && is_floating_point_v<_Tp>)
2725 	  return 32;
2726 	if constexpr (__have_sse2)
2727 	  return 16;
2728 	if constexpr (__have_sse && is_same_v<_Tp, float>)
2729 	  return 16;
2730 	/* The following is too much trouble because of mixed MMX and x87 code.
2731 	 * While nothing here explicitly calls MMX instructions of registers,
2732 	 * they are still emitted but no EMMS cleanup is done.
2733 	if constexpr (__have_mmx && sizeof(_Tp) <= 4 && is_integral_v<_Tp>)
2734 	  return 8;
2735 	 */
2736 
2737 	// PowerPC:
2738 	if constexpr (__have_power8vec
2739 		      || (__have_power_vmx && (sizeof(_Tp) < 8))
2740 		      || (__have_power_vsx && is_floating_point_v<_Tp>) )
2741 	  return 16;
2742 
2743 	// ARM:
2744 	if constexpr (__have_neon_a64
2745 		      || (__have_neon_a32 && !is_same_v<_Tp, double>) )
2746 	  return 16;
2747 	if constexpr (__have_neon
2748 		      && sizeof(_Tp) < 8
2749 		      // Only allow fp if the user allows non-ICE559 fp (e.g.
2750 		      // via -ffast-math). ARMv7 NEON fp is not conforming to
2751 		      // IEC559.
2752 		      && (__support_neon_float || !is_floating_point_v<_Tp>))
2753 	  return 16;
2754       }
2755 
2756     return sizeof(_Tp);
2757   }
2758 
2759 // }}}
2760 namespace simd_abi {
2761 // most of simd_abi is defined in simd_detail.h
2762 template <typename _Tp>
2763   inline constexpr int max_fixed_size
2764     = (__have_avx512bw && sizeof(_Tp) == 1) ? 64 : 32;
2765 
2766 // compatible {{{
2767 #if defined __x86_64__ || defined __aarch64__
2768 template <typename _Tp>
2769   using compatible = conditional_t<(sizeof(_Tp) <= 8), _VecBuiltin<16>, scalar>;
2770 #elif defined __ARM_NEON
2771 // FIXME: not sure, probably needs to be scalar (or dependent on the hard-float
2772 // ABI?)
2773 template <typename _Tp>
2774   using compatible
2775     = conditional_t<(sizeof(_Tp) < 8
2776 		     && (__support_neon_float || !is_floating_point_v<_Tp>)),
2777 		    _VecBuiltin<16>, scalar>;
2778 #else
2779 template <typename>
2780   using compatible = scalar;
2781 #endif
2782 
2783 // }}}
2784 // native {{{
2785 template <typename _Tp>
2786   constexpr auto
2787   __determine_native_abi()
2788   {
2789     constexpr size_t __bytes = __vectorized_sizeof<_Tp>();
2790     if constexpr (__bytes == sizeof(_Tp))
2791       return static_cast<scalar*>(nullptr);
2792     else if constexpr (__have_avx512vl || (__have_avx512f && __bytes == 64))
2793       return static_cast<_VecBltnBtmsk<__bytes>*>(nullptr);
2794     else
2795       return static_cast<_VecBuiltin<__bytes>*>(nullptr);
2796   }
2797 
2798 template <typename _Tp, typename = enable_if_t<__is_vectorizable_v<_Tp>>>
2799   using native = remove_pointer_t<decltype(__determine_native_abi<_Tp>())>;
2800 
2801 // }}}
2802 // __default_abi {{{
2803 #if defined _GLIBCXX_SIMD_DEFAULT_ABI
2804 template <typename _Tp>
2805   using __default_abi = _GLIBCXX_SIMD_DEFAULT_ABI<_Tp>;
2806 #else
2807 template <typename _Tp>
2808   using __default_abi = compatible<_Tp>;
2809 #endif
2810 
2811 // }}}
2812 } // namespace simd_abi
2813 
2814 // traits {{{1
2815 // is_abi_tag {{{2
2816 template <typename _Tp, typename = void_t<>>
2817   struct is_abi_tag : false_type {};
2818 
2819 template <typename _Tp>
2820   struct is_abi_tag<_Tp, void_t<typename _Tp::_IsValidAbiTag>>
2821   : public _Tp::_IsValidAbiTag {};
2822 
2823 template <typename _Tp>
2824   inline constexpr bool is_abi_tag_v = is_abi_tag<_Tp>::value;
2825 
2826 // is_simd(_mask) {{{2
2827 template <typename _Tp>
2828   struct is_simd : public false_type {};
2829 
2830 template <typename _Tp>
2831   inline constexpr bool is_simd_v = is_simd<_Tp>::value;
2832 
2833 template <typename _Tp>
2834   struct is_simd_mask : public false_type {};
2835 
2836 template <typename _Tp>
2837 inline constexpr bool is_simd_mask_v = is_simd_mask<_Tp>::value;
2838 
2839 // simd_size {{{2
2840 template <typename _Tp, typename _Abi, typename = void>
2841   struct __simd_size_impl {};
2842 
2843 template <typename _Tp, typename _Abi>
2844   struct __simd_size_impl<
2845     _Tp, _Abi,
2846     enable_if_t<conjunction_v<__is_vectorizable<_Tp>, is_abi_tag<_Abi>>>>
2847     : _SizeConstant<_Abi::template _S_size<_Tp>> {};
2848 
2849 template <typename _Tp, typename _Abi = simd_abi::__default_abi<_Tp>>
2850   struct simd_size : __simd_size_impl<_Tp, _Abi> {};
2851 
2852 template <typename _Tp, typename _Abi = simd_abi::__default_abi<_Tp>>
2853   inline constexpr size_t simd_size_v = simd_size<_Tp, _Abi>::value;
2854 
2855 // simd_abi::deduce {{{2
2856 template <typename _Tp, size_t _Np, typename = void>
2857   struct __deduce_impl;
2858 
2859 namespace simd_abi {
2860 /**
2861  * @tparam _Tp   The requested `value_type` for the elements.
2862  * @tparam _Np    The requested number of elements.
2863  * @tparam _Abis This parameter is ignored, since this implementation cannot
2864  * make any use of it. Either __a good native ABI is matched and used as `type`
2865  * alias, or the `fixed_size<_Np>` ABI is used, which internally is built from
2866  * the best matching native ABIs.
2867  */
2868 template <typename _Tp, size_t _Np, typename...>
2869   struct deduce : __deduce_impl<_Tp, _Np> {};
2870 
2871 template <typename _Tp, size_t _Np, typename... _Abis>
2872   using deduce_t = typename deduce<_Tp, _Np, _Abis...>::type;
2873 } // namespace simd_abi
2874 
2875 // }}}2
2876 // rebind_simd {{{2
2877 template <typename _Tp, typename _V, typename = void>
2878   struct rebind_simd;
2879 
2880 template <typename _Tp, typename _Up, typename _Abi>
2881   struct rebind_simd<
2882     _Tp, simd<_Up, _Abi>,
2883     void_t<simd_abi::deduce_t<_Tp, simd_size_v<_Up, _Abi>, _Abi>>>
2884   {
2885     using type
2886       = simd<_Tp, simd_abi::deduce_t<_Tp, simd_size_v<_Up, _Abi>, _Abi>>;
2887   };
2888 
2889 template <typename _Tp, typename _Up, typename _Abi>
2890   struct rebind_simd<
2891     _Tp, simd_mask<_Up, _Abi>,
2892     void_t<simd_abi::deduce_t<_Tp, simd_size_v<_Up, _Abi>, _Abi>>>
2893   {
2894     using type
2895       = simd_mask<_Tp, simd_abi::deduce_t<_Tp, simd_size_v<_Up, _Abi>, _Abi>>;
2896   };
2897 
2898 template <typename _Tp, typename _V>
2899   using rebind_simd_t = typename rebind_simd<_Tp, _V>::type;
2900 
2901 // resize_simd {{{2
2902 template <int _Np, typename _V, typename = void>
2903   struct resize_simd;
2904 
2905 template <int _Np, typename _Tp, typename _Abi>
2906   struct resize_simd<_Np, simd<_Tp, _Abi>,
2907 		     void_t<simd_abi::deduce_t<_Tp, _Np, _Abi>>>
2908   { using type = simd<_Tp, simd_abi::deduce_t<_Tp, _Np, _Abi>>; };
2909 
2910 template <int _Np, typename _Tp, typename _Abi>
2911   struct resize_simd<_Np, simd_mask<_Tp, _Abi>,
2912 		     void_t<simd_abi::deduce_t<_Tp, _Np, _Abi>>>
2913   { using type = simd_mask<_Tp, simd_abi::deduce_t<_Tp, _Np, _Abi>>; };
2914 
2915 template <int _Np, typename _V>
2916   using resize_simd_t = typename resize_simd<_Np, _V>::type;
2917 
2918 // }}}2
2919 // memory_alignment {{{2
2920 template <typename _Tp, typename _Up = typename _Tp::value_type>
2921   struct memory_alignment
2922   : public _SizeConstant<vector_aligned_tag::_S_alignment<_Tp, _Up>> {};
2923 
2924 template <typename _Tp, typename _Up = typename _Tp::value_type>
2925   inline constexpr size_t memory_alignment_v = memory_alignment<_Tp, _Up>::value;
2926 
2927 // class template simd [simd] {{{1
2928 template <typename _Tp, typename _Abi = simd_abi::__default_abi<_Tp>>
2929   class simd;
2930 
2931 template <typename _Tp, typename _Abi>
2932   struct is_simd<simd<_Tp, _Abi>> : public true_type {};
2933 
2934 template <typename _Tp>
2935   using native_simd = simd<_Tp, simd_abi::native<_Tp>>;
2936 
2937 template <typename _Tp, int _Np>
2938   using fixed_size_simd = simd<_Tp, simd_abi::fixed_size<_Np>>;
2939 
2940 template <typename _Tp, size_t _Np>
2941   using __deduced_simd = simd<_Tp, simd_abi::deduce_t<_Tp, _Np>>;
2942 
2943 // class template simd_mask [simd_mask] {{{1
2944 template <typename _Tp, typename _Abi = simd_abi::__default_abi<_Tp>>
2945   class simd_mask;
2946 
2947 template <typename _Tp, typename _Abi>
2948   struct is_simd_mask<simd_mask<_Tp, _Abi>> : public true_type {};
2949 
2950 template <typename _Tp>
2951   using native_simd_mask = simd_mask<_Tp, simd_abi::native<_Tp>>;
2952 
2953 template <typename _Tp, int _Np>
2954   using fixed_size_simd_mask = simd_mask<_Tp, simd_abi::fixed_size<_Np>>;
2955 
2956 template <typename _Tp, size_t _Np>
2957   using __deduced_simd_mask = simd_mask<_Tp, simd_abi::deduce_t<_Tp, _Np>>;
2958 
2959 // casts [simd.casts] {{{1
2960 // static_simd_cast {{{2
2961 template <typename _Tp, typename _Up, typename _Ap, bool = is_simd_v<_Tp>,
2962 	  typename = void>
2963   struct __static_simd_cast_return_type;
2964 
2965 template <typename _Tp, typename _A0, typename _Up, typename _Ap>
2966   struct __static_simd_cast_return_type<simd_mask<_Tp, _A0>, _Up, _Ap, false,
2967 					void>
2968   : __static_simd_cast_return_type<simd<_Tp, _A0>, _Up, _Ap> {};
2969 
2970 template <typename _Tp, typename _Up, typename _Ap>
2971   struct __static_simd_cast_return_type<
2972     _Tp, _Up, _Ap, true, enable_if_t<_Tp::size() == simd_size_v<_Up, _Ap>>>
2973   { using type = _Tp; };
2974 
2975 template <typename _Tp, typename _Ap>
2976   struct __static_simd_cast_return_type<_Tp, _Tp, _Ap, false,
2977 #ifdef _GLIBCXX_SIMD_FIX_P2TS_ISSUE66
2978 					enable_if_t<__is_vectorizable_v<_Tp>>
2979 #else
2980 					void
2981 #endif
2982 					>
2983   { using type = simd<_Tp, _Ap>; };
2984 
2985 template <typename _Tp, typename = void>
2986   struct __safe_make_signed { using type = _Tp;};
2987 
2988 template <typename _Tp>
2989   struct __safe_make_signed<_Tp, enable_if_t<is_integral_v<_Tp>>>
2990   {
2991     // the extra make_unsigned_t is because of PR85951
2992     using type = make_signed_t<make_unsigned_t<_Tp>>;
2993   };
2994 
2995 template <typename _Tp>
2996   using safe_make_signed_t = typename __safe_make_signed<_Tp>::type;
2997 
2998 template <typename _Tp, typename _Up, typename _Ap>
2999   struct __static_simd_cast_return_type<_Tp, _Up, _Ap, false,
3000 #ifdef _GLIBCXX_SIMD_FIX_P2TS_ISSUE66
3001 					enable_if_t<__is_vectorizable_v<_Tp>>
3002 #else
3003 					void
3004 #endif
3005 					>
3006   {
3007     using type = conditional_t<
3008       (is_integral_v<_Up> && is_integral_v<_Tp> &&
3009 #ifndef _GLIBCXX_SIMD_FIX_P2TS_ISSUE65
3010        is_signed_v<_Up> != is_signed_v<_Tp> &&
3011 #endif
3012        is_same_v<safe_make_signed_t<_Up>, safe_make_signed_t<_Tp>>),
3013       simd<_Tp, _Ap>, fixed_size_simd<_Tp, simd_size_v<_Up, _Ap>>>;
3014   };
3015 
3016 template <typename _Tp, typename _Up, typename _Ap,
3017 	  typename _R
3018 	  = typename __static_simd_cast_return_type<_Tp, _Up, _Ap>::type>
3019   _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR _R
3020   static_simd_cast(const simd<_Up, _Ap>& __x)
3021   {
3022     if constexpr (is_same<_R, simd<_Up, _Ap>>::value)
3023       return __x;
3024     else
3025       {
3026 	_SimdConverter<_Up, _Ap, typename _R::value_type, typename _R::abi_type>
3027 	  __c;
3028 	return _R(__private_init, __c(__data(__x)));
3029       }
3030   }
3031 
3032 namespace __proposed {
3033 template <typename _Tp, typename _Up, typename _Ap,
3034 	  typename _R
3035 	  = typename __static_simd_cast_return_type<_Tp, _Up, _Ap>::type>
3036   _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR typename _R::mask_type
3037   static_simd_cast(const simd_mask<_Up, _Ap>& __x)
3038   {
3039     using _RM = typename _R::mask_type;
3040     return {__private_init, _RM::abi_type::_MaskImpl::template _S_convert<
3041 			      typename _RM::simd_type::value_type>(__x)};
3042   }
3043 
3044 template <typename _To, typename _Up, typename _Abi>
3045   _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR
3046   _To
3047   simd_bit_cast(const simd<_Up, _Abi>& __x)
3048   {
3049     using _Tp = typename _To::value_type;
3050     using _ToMember = typename _SimdTraits<_Tp, typename _To::abi_type>::_SimdMember;
3051     using _From = simd<_Up, _Abi>;
3052     using _FromMember = typename _SimdTraits<_Up, _Abi>::_SimdMember;
3053     // with concepts, the following should be constraints
3054     static_assert(sizeof(_To) == sizeof(_From));
3055     static_assert(is_trivially_copyable_v<_Tp> && is_trivially_copyable_v<_Up>);
3056     static_assert(is_trivially_copyable_v<_ToMember> && is_trivially_copyable_v<_FromMember>);
3057 #if __has_builtin(__builtin_bit_cast)
3058     return {__private_init, __builtin_bit_cast(_ToMember, __data(__x))};
3059 #else
3060     return {__private_init, __bit_cast<_ToMember>(__data(__x))};
3061 #endif
3062   }
3063 
3064 template <typename _To, typename _Up, typename _Abi>
3065   _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR
3066   _To
3067   simd_bit_cast(const simd_mask<_Up, _Abi>& __x)
3068   {
3069     using _From = simd_mask<_Up, _Abi>;
3070     static_assert(sizeof(_To) == sizeof(_From));
3071     static_assert(is_trivially_copyable_v<_From>);
3072     // _To can be simd<T, A>, specifically simd<T, fixed_size<N>> in which case _To is not trivially
3073     // copyable.
3074     if constexpr (is_simd_v<_To>)
3075       {
3076 	using _Tp = typename _To::value_type;
3077 	using _ToMember = typename _SimdTraits<_Tp, typename _To::abi_type>::_SimdMember;
3078 	static_assert(is_trivially_copyable_v<_ToMember>);
3079 #if __has_builtin(__builtin_bit_cast)
3080 	return {__private_init, __builtin_bit_cast(_ToMember, __x)};
3081 #else
3082 	return {__private_init, __bit_cast<_ToMember>(__x)};
3083 #endif
3084       }
3085     else
3086       {
3087 	static_assert(is_trivially_copyable_v<_To>);
3088 #if __has_builtin(__builtin_bit_cast)
3089 	return __builtin_bit_cast(_To, __x);
3090 #else
3091 	return __bit_cast<_To>(__x);
3092 #endif
3093       }
3094   }
3095 } // namespace __proposed
3096 
3097 // simd_cast {{{2
3098 template <typename _Tp, typename _Up, typename _Ap,
3099 	  typename _To = __value_type_or_identity_t<_Tp>>
3100   _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR auto
3101   simd_cast(const simd<_ValuePreserving<_Up, _To>, _Ap>& __x)
3102     -> decltype(static_simd_cast<_Tp>(__x))
3103   { return static_simd_cast<_Tp>(__x); }
3104 
3105 namespace __proposed {
3106 template <typename _Tp, typename _Up, typename _Ap,
3107 	  typename _To = __value_type_or_identity_t<_Tp>>
3108   _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR auto
3109   simd_cast(const simd_mask<_ValuePreserving<_Up, _To>, _Ap>& __x)
3110     -> decltype(static_simd_cast<_Tp>(__x))
3111   { return static_simd_cast<_Tp>(__x); }
3112 } // namespace __proposed
3113 
3114 // }}}2
3115 // resizing_simd_cast {{{
3116 namespace __proposed {
3117 /* Proposed spec:
3118 
3119 template <class T, class U, class Abi>
3120 T resizing_simd_cast(const simd<U, Abi>& x)
3121 
3122 p1  Constraints:
3123     - is_simd_v<T> is true and
3124     - T::value_type is the same type as U
3125 
3126 p2  Returns:
3127     A simd object with the i^th element initialized to x[i] for all i in the
3128     range of [0, min(T::size(), simd_size_v<U, Abi>)). If T::size() is larger
3129     than simd_size_v<U, Abi>, the remaining elements are value-initialized.
3130 
3131 template <class T, class U, class Abi>
3132 T resizing_simd_cast(const simd_mask<U, Abi>& x)
3133 
3134 p1  Constraints: is_simd_mask_v<T> is true
3135 
3136 p2  Returns:
3137     A simd_mask object with the i^th element initialized to x[i] for all i in
3138 the range of [0, min(T::size(), simd_size_v<U, Abi>)). If T::size() is larger
3139     than simd_size_v<U, Abi>, the remaining elements are initialized to false.
3140 
3141  */
3142 
3143 template <typename _Tp, typename _Up, typename _Ap>
3144   _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR enable_if_t<
3145   conjunction_v<is_simd<_Tp>, is_same<typename _Tp::value_type, _Up>>, _Tp>
3146   resizing_simd_cast(const simd<_Up, _Ap>& __x)
3147   {
3148     if constexpr (is_same_v<typename _Tp::abi_type, _Ap>)
3149       return __x;
3150     else if constexpr (simd_size_v<_Up, _Ap> == 1)
3151       {
3152 	_Tp __r{};
3153 	__r[0] = __x[0];
3154 	return __r;
3155       }
3156     else if constexpr (_Tp::size() == 1)
3157       return __x[0];
3158     else if constexpr (sizeof(_Tp) == sizeof(__x)
3159 		       && !__is_fixed_size_abi_v<_Ap>)
3160       return {__private_init,
3161 	      __vector_bitcast<typename _Tp::value_type, _Tp::size()>(
3162 		_Ap::_S_masked(__data(__x))._M_data)};
3163     else
3164       {
3165 	_Tp __r{};
3166 	__builtin_memcpy(&__data(__r), &__data(__x),
3167 			 sizeof(_Up)
3168 			   * std::min(_Tp::size(), simd_size_v<_Up, _Ap>));
3169 	return __r;
3170       }
3171   }
3172 
3173 template <typename _Tp, typename _Up, typename _Ap>
3174   _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR
3175   enable_if_t<is_simd_mask_v<_Tp>, _Tp>
3176   resizing_simd_cast(const simd_mask<_Up, _Ap>& __x)
3177   {
3178     return {__private_init, _Tp::abi_type::_MaskImpl::template _S_convert<
3179 			      typename _Tp::simd_type::value_type>(__x)};
3180   }
3181 } // namespace __proposed
3182 
3183 // }}}
3184 // to_fixed_size {{{2
3185 template <typename _Tp, int _Np>
3186   _GLIBCXX_SIMD_INTRINSIC fixed_size_simd<_Tp, _Np>
3187   to_fixed_size(const fixed_size_simd<_Tp, _Np>& __x)
3188   { return __x; }
3189 
3190 template <typename _Tp, int _Np>
3191   _GLIBCXX_SIMD_INTRINSIC fixed_size_simd_mask<_Tp, _Np>
3192   to_fixed_size(const fixed_size_simd_mask<_Tp, _Np>& __x)
3193   { return __x; }
3194 
3195 template <typename _Tp, typename _Ap>
3196   _GLIBCXX_SIMD_INTRINSIC auto
3197   to_fixed_size(const simd<_Tp, _Ap>& __x)
3198   {
3199     return simd<_Tp, simd_abi::fixed_size<simd_size_v<_Tp, _Ap>>>([&__x](
3200       auto __i) constexpr { return __x[__i]; });
3201   }
3202 
3203 template <typename _Tp, typename _Ap>
3204   _GLIBCXX_SIMD_INTRINSIC auto
3205   to_fixed_size(const simd_mask<_Tp, _Ap>& __x)
3206   {
3207     constexpr int _Np = simd_mask<_Tp, _Ap>::size();
3208     fixed_size_simd_mask<_Tp, _Np> __r;
3209     __execute_n_times<_Np>([&](auto __i) constexpr { __r[__i] = __x[__i]; });
3210     return __r;
3211   }
3212 
3213 // to_native {{{2
3214 template <typename _Tp, int _Np>
3215   _GLIBCXX_SIMD_INTRINSIC
3216   enable_if_t<(_Np == native_simd<_Tp>::size()), native_simd<_Tp>>
3217   to_native(const fixed_size_simd<_Tp, _Np>& __x)
3218   {
3219     alignas(memory_alignment_v<native_simd<_Tp>>) _Tp __mem[_Np];
3220     __x.copy_to(__mem, vector_aligned);
3221     return {__mem, vector_aligned};
3222   }
3223 
3224 template <typename _Tp, size_t _Np>
3225   _GLIBCXX_SIMD_INTRINSIC
3226   enable_if_t<(_Np == native_simd_mask<_Tp>::size()), native_simd_mask<_Tp>>
3227   to_native(const fixed_size_simd_mask<_Tp, _Np>& __x)
3228   {
3229     return native_simd_mask<_Tp>([&](auto __i) constexpr { return __x[__i]; });
3230   }
3231 
3232 // to_compatible {{{2
3233 template <typename _Tp, size_t _Np>
3234   _GLIBCXX_SIMD_INTRINSIC enable_if_t<(_Np == simd<_Tp>::size()), simd<_Tp>>
3235   to_compatible(const simd<_Tp, simd_abi::fixed_size<_Np>>& __x)
3236   {
3237     alignas(memory_alignment_v<simd<_Tp>>) _Tp __mem[_Np];
3238     __x.copy_to(__mem, vector_aligned);
3239     return {__mem, vector_aligned};
3240   }
3241 
3242 template <typename _Tp, size_t _Np>
3243   _GLIBCXX_SIMD_INTRINSIC
3244   enable_if_t<(_Np == simd_mask<_Tp>::size()), simd_mask<_Tp>>
3245   to_compatible(const simd_mask<_Tp, simd_abi::fixed_size<_Np>>& __x)
3246   { return simd_mask<_Tp>([&](auto __i) constexpr { return __x[__i]; }); }
3247 
3248 // masked assignment [simd_mask.where] {{{1
3249 
3250 // where_expression {{{1
3251 // const_where_expression<M, T> {{{2
3252 template <typename _M, typename _Tp>
3253   class const_where_expression
3254   {
3255     using _V = _Tp;
3256     static_assert(is_same_v<_V, __remove_cvref_t<_Tp>>);
3257 
3258     struct _Wrapper { using value_type = _V; };
3259 
3260   protected:
3261     using _Impl = typename _V::_Impl;
3262 
3263     using value_type =
3264       typename conditional_t<is_arithmetic_v<_V>, _Wrapper, _V>::value_type;
3265 
3266     _GLIBCXX_SIMD_INTRINSIC friend const _M&
3267     __get_mask(const const_where_expression& __x)
3268     { return __x._M_k; }
3269 
3270     _GLIBCXX_SIMD_INTRINSIC friend const _Tp&
3271     __get_lvalue(const const_where_expression& __x)
3272     { return __x._M_value; }
3273 
3274     const _M& _M_k;
3275     _Tp& _M_value;
3276 
3277   public:
3278     const_where_expression(const const_where_expression&) = delete;
3279     const_where_expression& operator=(const const_where_expression&) = delete;
3280 
3281     _GLIBCXX_SIMD_INTRINSIC const_where_expression(const _M& __kk, const _Tp& dd)
3282       : _M_k(__kk), _M_value(const_cast<_Tp&>(dd)) {}
3283 
3284     _GLIBCXX_SIMD_INTRINSIC _V
3285     operator-() const&&
3286     {
3287       return {__private_init,
3288 	      _Impl::template _S_masked_unary<negate>(__data(_M_k),
3289 						      __data(_M_value))};
3290     }
3291 
3292     template <typename _Up, typename _Flags>
3293       [[nodiscard]] _GLIBCXX_SIMD_INTRINSIC _V
3294       copy_from(const _LoadStorePtr<_Up, value_type>* __mem, _Flags) const&&
3295       {
3296 	return {__private_init,
3297 		_Impl::_S_masked_load(__data(_M_value), __data(_M_k),
3298 				      _Flags::template _S_apply<_V>(__mem))};
3299       }
3300 
3301     template <typename _Up, typename _Flags>
3302       _GLIBCXX_SIMD_INTRINSIC void
3303       copy_to(_LoadStorePtr<_Up, value_type>* __mem, _Flags) const&&
3304       {
3305 	_Impl::_S_masked_store(__data(_M_value),
3306 			       _Flags::template _S_apply<_V>(__mem),
3307 			       __data(_M_k));
3308       }
3309   };
3310 
3311 // const_where_expression<bool, T> {{{2
3312 template <typename _Tp>
3313   class const_where_expression<bool, _Tp>
3314   {
3315     using _M = bool;
3316     using _V = _Tp;
3317 
3318     static_assert(is_same_v<_V, __remove_cvref_t<_Tp>>);
3319 
3320     struct _Wrapper { using value_type = _V; };
3321 
3322   protected:
3323     using value_type =
3324       typename conditional_t<is_arithmetic_v<_V>, _Wrapper, _V>::value_type;
3325 
3326     _GLIBCXX_SIMD_INTRINSIC friend const _M&
3327     __get_mask(const const_where_expression& __x)
3328     { return __x._M_k; }
3329 
3330     _GLIBCXX_SIMD_INTRINSIC friend const _Tp&
3331     __get_lvalue(const const_where_expression& __x)
3332     { return __x._M_value; }
3333 
3334     const bool _M_k;
3335     _Tp& _M_value;
3336 
3337   public:
3338     const_where_expression(const const_where_expression&) = delete;
3339     const_where_expression& operator=(const const_where_expression&) = delete;
3340 
3341     _GLIBCXX_SIMD_INTRINSIC const_where_expression(const bool __kk, const _Tp& dd)
3342       : _M_k(__kk), _M_value(const_cast<_Tp&>(dd)) {}
3343 
3344     _GLIBCXX_SIMD_INTRINSIC _V operator-() const&&
3345     { return _M_k ? -_M_value : _M_value; }
3346 
3347     template <typename _Up, typename _Flags>
3348       [[nodiscard]] _GLIBCXX_SIMD_INTRINSIC _V
3349       copy_from(const _LoadStorePtr<_Up, value_type>* __mem, _Flags) const&&
3350       { return _M_k ? static_cast<_V>(__mem[0]) : _M_value; }
3351 
3352     template <typename _Up, typename _Flags>
3353       _GLIBCXX_SIMD_INTRINSIC void
3354       copy_to(_LoadStorePtr<_Up, value_type>* __mem, _Flags) const&&
3355       {
3356 	if (_M_k)
3357 	  __mem[0] = _M_value;
3358       }
3359   };
3360 
3361 // where_expression<M, T> {{{2
3362 template <typename _M, typename _Tp>
3363   class where_expression : public const_where_expression<_M, _Tp>
3364   {
3365     using _Impl = typename const_where_expression<_M, _Tp>::_Impl;
3366 
3367     static_assert(!is_const<_Tp>::value,
3368 		  "where_expression may only be instantiated with __a non-const "
3369 		  "_Tp parameter");
3370 
3371     using typename const_where_expression<_M, _Tp>::value_type;
3372     using const_where_expression<_M, _Tp>::_M_k;
3373     using const_where_expression<_M, _Tp>::_M_value;
3374 
3375     static_assert(
3376       is_same<typename _M::abi_type, typename _Tp::abi_type>::value, "");
3377     static_assert(_M::size() == _Tp::size(), "");
3378 
3379     _GLIBCXX_SIMD_INTRINSIC friend _Tp& __get_lvalue(where_expression& __x)
3380     { return __x._M_value; }
3381 
3382   public:
3383     where_expression(const where_expression&) = delete;
3384     where_expression& operator=(const where_expression&) = delete;
3385 
3386     _GLIBCXX_SIMD_INTRINSIC where_expression(const _M& __kk, _Tp& dd)
3387       : const_where_expression<_M, _Tp>(__kk, dd) {}
3388 
3389     template <typename _Up>
3390       _GLIBCXX_SIMD_INTRINSIC void operator=(_Up&& __x) &&
3391       {
3392 	_Impl::_S_masked_assign(__data(_M_k), __data(_M_value),
3393 				__to_value_type_or_member_type<_Tp>(
3394 				  static_cast<_Up&&>(__x)));
3395       }
3396 
3397 #define _GLIBCXX_SIMD_OP_(__op, __name)                                        \
3398   template <typename _Up>                                                      \
3399     _GLIBCXX_SIMD_INTRINSIC void operator __op##=(_Up&& __x)&&                 \
3400     {                                                                          \
3401       _Impl::template _S_masked_cassign(                                       \
3402 	__data(_M_k), __data(_M_value),                                        \
3403 	__to_value_type_or_member_type<_Tp>(static_cast<_Up&&>(__x)),          \
3404 	[](auto __impl, auto __lhs, auto __rhs) constexpr {                    \
3405 	return __impl.__name(__lhs, __rhs);                                    \
3406 	});                                                                    \
3407     }                                                                          \
3408   static_assert(true)
3409     _GLIBCXX_SIMD_OP_(+, _S_plus);
3410     _GLIBCXX_SIMD_OP_(-, _S_minus);
3411     _GLIBCXX_SIMD_OP_(*, _S_multiplies);
3412     _GLIBCXX_SIMD_OP_(/, _S_divides);
3413     _GLIBCXX_SIMD_OP_(%, _S_modulus);
3414     _GLIBCXX_SIMD_OP_(&, _S_bit_and);
3415     _GLIBCXX_SIMD_OP_(|, _S_bit_or);
3416     _GLIBCXX_SIMD_OP_(^, _S_bit_xor);
3417     _GLIBCXX_SIMD_OP_(<<, _S_shift_left);
3418     _GLIBCXX_SIMD_OP_(>>, _S_shift_right);
3419 #undef _GLIBCXX_SIMD_OP_
3420 
3421     _GLIBCXX_SIMD_INTRINSIC void operator++() &&
3422     {
3423       __data(_M_value)
3424 	= _Impl::template _S_masked_unary<__increment>(__data(_M_k),
3425 						       __data(_M_value));
3426     }
3427 
3428     _GLIBCXX_SIMD_INTRINSIC void operator++(int) &&
3429     {
3430       __data(_M_value)
3431 	= _Impl::template _S_masked_unary<__increment>(__data(_M_k),
3432 						       __data(_M_value));
3433     }
3434 
3435     _GLIBCXX_SIMD_INTRINSIC void operator--() &&
3436     {
3437       __data(_M_value)
3438 	= _Impl::template _S_masked_unary<__decrement>(__data(_M_k),
3439 						       __data(_M_value));
3440     }
3441 
3442     _GLIBCXX_SIMD_INTRINSIC void operator--(int) &&
3443     {
3444       __data(_M_value)
3445 	= _Impl::template _S_masked_unary<__decrement>(__data(_M_k),
3446 						       __data(_M_value));
3447     }
3448 
3449     // intentionally hides const_where_expression::copy_from
3450     template <typename _Up, typename _Flags>
3451       _GLIBCXX_SIMD_INTRINSIC void
3452       copy_from(const _LoadStorePtr<_Up, value_type>* __mem, _Flags) &&
3453       {
3454 	__data(_M_value)
3455 	  = _Impl::_S_masked_load(__data(_M_value), __data(_M_k),
3456 				  _Flags::template _S_apply<_Tp>(__mem));
3457       }
3458   };
3459 
3460 // where_expression<bool, T> {{{2
3461 template <typename _Tp>
3462   class where_expression<bool, _Tp> : public const_where_expression<bool, _Tp>
3463   {
3464     using _M = bool;
3465     using typename const_where_expression<_M, _Tp>::value_type;
3466     using const_where_expression<_M, _Tp>::_M_k;
3467     using const_where_expression<_M, _Tp>::_M_value;
3468 
3469   public:
3470     where_expression(const where_expression&) = delete;
3471     where_expression& operator=(const where_expression&) = delete;
3472 
3473     _GLIBCXX_SIMD_INTRINSIC where_expression(const _M& __kk, _Tp& dd)
3474       : const_where_expression<_M, _Tp>(__kk, dd) {}
3475 
3476 #define _GLIBCXX_SIMD_OP_(__op)                                                \
3477     template <typename _Up>                                                    \
3478       _GLIBCXX_SIMD_INTRINSIC void operator __op(_Up&& __x)&&                  \
3479       { if (_M_k) _M_value __op static_cast<_Up&&>(__x); }
3480 
3481     _GLIBCXX_SIMD_OP_(=)
3482     _GLIBCXX_SIMD_OP_(+=)
3483     _GLIBCXX_SIMD_OP_(-=)
3484     _GLIBCXX_SIMD_OP_(*=)
3485     _GLIBCXX_SIMD_OP_(/=)
3486     _GLIBCXX_SIMD_OP_(%=)
3487     _GLIBCXX_SIMD_OP_(&=)
3488     _GLIBCXX_SIMD_OP_(|=)
3489     _GLIBCXX_SIMD_OP_(^=)
3490     _GLIBCXX_SIMD_OP_(<<=)
3491     _GLIBCXX_SIMD_OP_(>>=)
3492   #undef _GLIBCXX_SIMD_OP_
3493 
3494     _GLIBCXX_SIMD_INTRINSIC void operator++() &&
3495     { if (_M_k) ++_M_value; }
3496 
3497     _GLIBCXX_SIMD_INTRINSIC void operator++(int) &&
3498     { if (_M_k) ++_M_value; }
3499 
3500     _GLIBCXX_SIMD_INTRINSIC void operator--() &&
3501     { if (_M_k) --_M_value; }
3502 
3503     _GLIBCXX_SIMD_INTRINSIC void operator--(int) &&
3504     { if (_M_k) --_M_value; }
3505 
3506     // intentionally hides const_where_expression::copy_from
3507     template <typename _Up, typename _Flags>
3508       _GLIBCXX_SIMD_INTRINSIC void
3509       copy_from(const _LoadStorePtr<_Up, value_type>* __mem, _Flags) &&
3510       { if (_M_k) _M_value = __mem[0]; }
3511   };
3512 
3513 // where {{{1
3514 template <typename _Tp, typename _Ap>
3515   _GLIBCXX_SIMD_INTRINSIC where_expression<simd_mask<_Tp, _Ap>, simd<_Tp, _Ap>>
3516   where(const typename simd<_Tp, _Ap>::mask_type& __k, simd<_Tp, _Ap>& __value)
3517   { return {__k, __value}; }
3518 
3519 template <typename _Tp, typename _Ap>
3520   _GLIBCXX_SIMD_INTRINSIC
3521     const_where_expression<simd_mask<_Tp, _Ap>, simd<_Tp, _Ap>>
3522     where(const typename simd<_Tp, _Ap>::mask_type& __k,
3523 	  const simd<_Tp, _Ap>& __value)
3524   { return {__k, __value}; }
3525 
3526 template <typename _Tp, typename _Ap>
3527   _GLIBCXX_SIMD_INTRINSIC
3528     where_expression<simd_mask<_Tp, _Ap>, simd_mask<_Tp, _Ap>>
3529     where(const remove_const_t<simd_mask<_Tp, _Ap>>& __k,
3530 	  simd_mask<_Tp, _Ap>& __value)
3531   { return {__k, __value}; }
3532 
3533 template <typename _Tp, typename _Ap>
3534   _GLIBCXX_SIMD_INTRINSIC
3535     const_where_expression<simd_mask<_Tp, _Ap>, simd_mask<_Tp, _Ap>>
3536     where(const remove_const_t<simd_mask<_Tp, _Ap>>& __k,
3537 	  const simd_mask<_Tp, _Ap>& __value)
3538   { return {__k, __value}; }
3539 
3540 template <typename _Tp>
3541   _GLIBCXX_SIMD_INTRINSIC where_expression<bool, _Tp>
3542   where(_ExactBool __k, _Tp& __value)
3543   { return {__k, __value}; }
3544 
3545 template <typename _Tp>
3546   _GLIBCXX_SIMD_INTRINSIC const_where_expression<bool, _Tp>
3547   where(_ExactBool __k, const _Tp& __value)
3548   { return {__k, __value}; }
3549 
3550   template <typename _Tp, typename _Ap>
3551     void where(bool __k, simd<_Tp, _Ap>& __value) = delete;
3552 
3553   template <typename _Tp, typename _Ap>
3554     void where(bool __k, const simd<_Tp, _Ap>& __value) = delete;
3555 
3556 // proposed mask iterations {{{1
3557 namespace __proposed {
3558 template <size_t _Np>
3559   class where_range
3560   {
3561     const bitset<_Np> __bits;
3562 
3563   public:
3564     where_range(bitset<_Np> __b) : __bits(__b) {}
3565 
3566     class iterator
3567     {
3568       size_t __mask;
3569       size_t __bit;
3570 
3571       _GLIBCXX_SIMD_INTRINSIC void __next_bit()
3572       { __bit = __builtin_ctzl(__mask); }
3573 
3574       _GLIBCXX_SIMD_INTRINSIC void __reset_lsb()
3575       {
3576 	// 01100100 - 1 = 01100011
3577 	__mask &= (__mask - 1);
3578 	// __asm__("btr %1,%0" : "+r"(__mask) : "r"(__bit));
3579       }
3580 
3581     public:
3582       iterator(decltype(__mask) __m) : __mask(__m) { __next_bit(); }
3583       iterator(const iterator&) = default;
3584       iterator(iterator&&) = default;
3585 
3586       _GLIBCXX_SIMD_ALWAYS_INLINE size_t operator->() const
3587       { return __bit; }
3588 
3589       _GLIBCXX_SIMD_ALWAYS_INLINE size_t operator*() const
3590       { return __bit; }
3591 
3592       _GLIBCXX_SIMD_ALWAYS_INLINE iterator& operator++()
3593       {
3594 	__reset_lsb();
3595 	__next_bit();
3596 	return *this;
3597       }
3598 
3599       _GLIBCXX_SIMD_ALWAYS_INLINE iterator operator++(int)
3600       {
3601 	iterator __tmp = *this;
3602 	__reset_lsb();
3603 	__next_bit();
3604 	return __tmp;
3605       }
3606 
3607       _GLIBCXX_SIMD_ALWAYS_INLINE bool operator==(const iterator& __rhs) const
3608       { return __mask == __rhs.__mask; }
3609 
3610       _GLIBCXX_SIMD_ALWAYS_INLINE bool operator!=(const iterator& __rhs) const
3611       { return __mask != __rhs.__mask; }
3612     };
3613 
3614     iterator begin() const
3615     { return __bits.to_ullong(); }
3616 
3617     iterator end() const
3618     { return 0; }
3619   };
3620 
3621 template <typename _Tp, typename _Ap>
3622   where_range<simd_size_v<_Tp, _Ap>>
3623   where(const simd_mask<_Tp, _Ap>& __k)
3624   { return __k.__to_bitset(); }
3625 
3626 } // namespace __proposed
3627 
3628 // }}}1
3629 // reductions [simd.reductions] {{{1
3630 template <typename _Tp, typename _Abi, typename _BinaryOperation = plus<>>
3631   _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR _Tp
3632   reduce(const simd<_Tp, _Abi>& __v,
3633 	 _BinaryOperation __binary_op = _BinaryOperation())
3634   { return _Abi::_SimdImpl::_S_reduce(__v, __binary_op); }
3635 
3636 template <typename _M, typename _V, typename _BinaryOperation = plus<>>
3637   _GLIBCXX_SIMD_INTRINSIC typename _V::value_type
3638   reduce(const const_where_expression<_M, _V>& __x,
3639 	 typename _V::value_type __identity_element,
3640 	 _BinaryOperation __binary_op)
3641   {
3642     if (__builtin_expect(none_of(__get_mask(__x)), false))
3643       return __identity_element;
3644 
3645     _V __tmp = __identity_element;
3646     _V::_Impl::_S_masked_assign(__data(__get_mask(__x)), __data(__tmp),
3647 				__data(__get_lvalue(__x)));
3648     return reduce(__tmp, __binary_op);
3649   }
3650 
3651 template <typename _M, typename _V>
3652   _GLIBCXX_SIMD_INTRINSIC typename _V::value_type
3653   reduce(const const_where_expression<_M, _V>& __x, plus<> __binary_op = {})
3654   { return reduce(__x, 0, __binary_op); }
3655 
3656 template <typename _M, typename _V>
3657   _GLIBCXX_SIMD_INTRINSIC typename _V::value_type
3658   reduce(const const_where_expression<_M, _V>& __x, multiplies<> __binary_op)
3659   { return reduce(__x, 1, __binary_op); }
3660 
3661 template <typename _M, typename _V>
3662   _GLIBCXX_SIMD_INTRINSIC typename _V::value_type
3663   reduce(const const_where_expression<_M, _V>& __x, bit_and<> __binary_op)
3664   { return reduce(__x, ~typename _V::value_type(), __binary_op); }
3665 
3666 template <typename _M, typename _V>
3667   _GLIBCXX_SIMD_INTRINSIC typename _V::value_type
3668   reduce(const const_where_expression<_M, _V>& __x, bit_or<> __binary_op)
3669   { return reduce(__x, 0, __binary_op); }
3670 
3671 template <typename _M, typename _V>
3672   _GLIBCXX_SIMD_INTRINSIC typename _V::value_type
3673   reduce(const const_where_expression<_M, _V>& __x, bit_xor<> __binary_op)
3674   { return reduce(__x, 0, __binary_op); }
3675 
3676 template <typename _Tp, typename _Abi>
3677   _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR _Tp
3678   hmin(const simd<_Tp, _Abi>& __v) noexcept
3679   {
3680     return _Abi::_SimdImpl::_S_reduce(__v, __detail::_Minimum());
3681   }
3682 
3683 template <typename _Tp, typename _Abi>
3684   _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR _Tp
3685   hmax(const simd<_Tp, _Abi>& __v) noexcept
3686   {
3687     return _Abi::_SimdImpl::_S_reduce(__v, __detail::_Maximum());
3688   }
3689 
3690 template <typename _M, typename _V>
3691   _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR
3692   typename _V::value_type
3693   hmin(const const_where_expression<_M, _V>& __x) noexcept
3694   {
3695     using _Tp = typename _V::value_type;
3696     constexpr _Tp __id_elem =
3697 #ifdef __FINITE_MATH_ONLY__
3698       __finite_max_v<_Tp>;
3699 #else
3700       __value_or<__infinity, _Tp>(__finite_max_v<_Tp>);
3701 #endif
3702     _V __tmp = __id_elem;
3703     _V::_Impl::_S_masked_assign(__data(__get_mask(__x)), __data(__tmp),
3704 				__data(__get_lvalue(__x)));
3705     return _V::abi_type::_SimdImpl::_S_reduce(__tmp, __detail::_Minimum());
3706   }
3707 
3708 template <typename _M, typename _V>
3709   _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR
3710   typename _V::value_type
3711   hmax(const const_where_expression<_M, _V>& __x) noexcept
3712   {
3713     using _Tp = typename _V::value_type;
3714     constexpr _Tp __id_elem =
3715 #ifdef __FINITE_MATH_ONLY__
3716       __finite_min_v<_Tp>;
3717 #else
3718       [] {
3719 	if constexpr (__value_exists_v<__infinity, _Tp>)
3720 	  return -__infinity_v<_Tp>;
3721 	else
3722 	  return __finite_min_v<_Tp>;
3723       }();
3724 #endif
3725     _V __tmp = __id_elem;
3726     _V::_Impl::_S_masked_assign(__data(__get_mask(__x)), __data(__tmp),
3727 				__data(__get_lvalue(__x)));
3728     return _V::abi_type::_SimdImpl::_S_reduce(__tmp, __detail::_Maximum());
3729   }
3730 
3731 // }}}1
3732 // algorithms [simd.alg] {{{
3733 template <typename _Tp, typename _Ap>
3734   _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR simd<_Tp, _Ap>
3735   min(const simd<_Tp, _Ap>& __a, const simd<_Tp, _Ap>& __b)
3736   { return {__private_init, _Ap::_SimdImpl::_S_min(__data(__a), __data(__b))}; }
3737 
3738 template <typename _Tp, typename _Ap>
3739   _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR simd<_Tp, _Ap>
3740   max(const simd<_Tp, _Ap>& __a, const simd<_Tp, _Ap>& __b)
3741   { return {__private_init, _Ap::_SimdImpl::_S_max(__data(__a), __data(__b))}; }
3742 
3743 template <typename _Tp, typename _Ap>
3744   _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR
3745   pair<simd<_Tp, _Ap>, simd<_Tp, _Ap>>
3746   minmax(const simd<_Tp, _Ap>& __a, const simd<_Tp, _Ap>& __b)
3747   {
3748     const auto pair_of_members
3749       = _Ap::_SimdImpl::_S_minmax(__data(__a), __data(__b));
3750     return {simd<_Tp, _Ap>(__private_init, pair_of_members.first),
3751 	    simd<_Tp, _Ap>(__private_init, pair_of_members.second)};
3752   }
3753 
3754 template <typename _Tp, typename _Ap>
3755   _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR simd<_Tp, _Ap>
3756   clamp(const simd<_Tp, _Ap>& __v, const simd<_Tp, _Ap>& __lo,
3757 	const simd<_Tp, _Ap>& __hi)
3758   {
3759     using _Impl = typename _Ap::_SimdImpl;
3760     return {__private_init,
3761 	    _Impl::_S_min(__data(__hi),
3762 			  _Impl::_S_max(__data(__lo), __data(__v)))};
3763   }
3764 
3765 // }}}
3766 
3767 template <size_t... _Sizes, typename _Tp, typename _Ap,
3768 	  typename = enable_if_t<((_Sizes + ...) == simd<_Tp, _Ap>::size())>>
3769   inline tuple<simd<_Tp, simd_abi::deduce_t<_Tp, _Sizes>>...>
3770   split(const simd<_Tp, _Ap>&);
3771 
3772 // __extract_part {{{
3773 template <int _Index, int _Total, int _Combine = 1, typename _Tp, size_t _Np>
3774   _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_CONST
3775   _SimdWrapper<_Tp, _Np / _Total * _Combine>
3776   __extract_part(const _SimdWrapper<_Tp, _Np> __x);
3777 
3778 template <int _Index, int _Parts, int _Combine = 1, typename _Tp, typename _A0,
3779 	  typename... _As>
3780   _GLIBCXX_SIMD_INTRINSIC auto
3781   __extract_part(const _SimdTuple<_Tp, _A0, _As...>& __x);
3782 
3783 // }}}
3784 // _SizeList {{{
3785 template <size_t _V0, size_t... _Values>
3786   struct _SizeList
3787   {
3788     template <size_t _I>
3789       static constexpr size_t _S_at(_SizeConstant<_I> = {})
3790       {
3791 	if constexpr (_I == 0)
3792 	  return _V0;
3793 	else
3794 	  return _SizeList<_Values...>::template _S_at<_I - 1>();
3795       }
3796 
3797     template <size_t _I>
3798       static constexpr auto _S_before(_SizeConstant<_I> = {})
3799       {
3800 	if constexpr (_I == 0)
3801 	  return _SizeConstant<0>();
3802 	else
3803 	  return _SizeConstant<
3804 	    _V0 + _SizeList<_Values...>::template _S_before<_I - 1>()>();
3805       }
3806 
3807     template <size_t _Np>
3808       static constexpr auto _S_pop_front(_SizeConstant<_Np> = {})
3809       {
3810 	if constexpr (_Np == 0)
3811 	  return _SizeList();
3812 	else
3813 	  return _SizeList<_Values...>::template _S_pop_front<_Np - 1>();
3814       }
3815   };
3816 
3817 // }}}
3818 // __extract_center {{{
3819 template <typename _Tp, size_t _Np>
3820   _GLIBCXX_SIMD_INTRINSIC _SimdWrapper<_Tp, _Np / 2>
3821   __extract_center(_SimdWrapper<_Tp, _Np> __x)
3822   {
3823     static_assert(_Np >= 4);
3824     static_assert(_Np % 4 == 0); // x0 - x1 - x2 - x3 -> return {x1, x2}
3825 #if _GLIBCXX_SIMD_X86INTRIN    // {{{
3826     if constexpr (__have_avx512f && sizeof(_Tp) * _Np == 64)
3827       {
3828 	const auto __intrin = __to_intrin(__x);
3829 	if constexpr (is_integral_v<_Tp>)
3830 	  return __vector_bitcast<_Tp>(_mm512_castsi512_si256(
3831 	    _mm512_shuffle_i32x4(__intrin, __intrin,
3832 				 1 + 2 * 0x4 + 2 * 0x10 + 3 * 0x40)));
3833 	else if constexpr (sizeof(_Tp) == 4)
3834 	  return __vector_bitcast<_Tp>(_mm512_castps512_ps256(
3835 	    _mm512_shuffle_f32x4(__intrin, __intrin,
3836 				 1 + 2 * 0x4 + 2 * 0x10 + 3 * 0x40)));
3837 	else if constexpr (sizeof(_Tp) == 8)
3838 	  return __vector_bitcast<_Tp>(_mm512_castpd512_pd256(
3839 	    _mm512_shuffle_f64x2(__intrin, __intrin,
3840 				 1 + 2 * 0x4 + 2 * 0x10 + 3 * 0x40)));
3841 	else
3842 	  __assert_unreachable<_Tp>();
3843       }
3844     else if constexpr (sizeof(_Tp) * _Np == 32 && is_floating_point_v<_Tp>)
3845       return __vector_bitcast<_Tp>(
3846 	_mm_shuffle_pd(__lo128(__vector_bitcast<double>(__x)),
3847 		       __hi128(__vector_bitcast<double>(__x)), 1));
3848     else if constexpr (sizeof(__x) == 32 && sizeof(_Tp) * _Np <= 32)
3849       return __vector_bitcast<_Tp>(
3850 	_mm_alignr_epi8(__hi128(__vector_bitcast<_LLong>(__x)),
3851 			__lo128(__vector_bitcast<_LLong>(__x)),
3852 			sizeof(_Tp) * _Np / 4));
3853     else
3854 #endif // _GLIBCXX_SIMD_X86INTRIN }}}
3855       {
3856 	__vector_type_t<_Tp, _Np / 2> __r;
3857 	__builtin_memcpy(&__r,
3858 			 reinterpret_cast<const char*>(&__x)
3859 			   + sizeof(_Tp) * _Np / 4,
3860 			 sizeof(_Tp) * _Np / 2);
3861 	return __r;
3862       }
3863   }
3864 
3865 template <typename _Tp, typename _A0, typename... _As>
3866   _GLIBCXX_SIMD_INTRINSIC
3867   _SimdWrapper<_Tp, _SimdTuple<_Tp, _A0, _As...>::_S_size() / 2>
3868   __extract_center(const _SimdTuple<_Tp, _A0, _As...>& __x)
3869   {
3870     if constexpr (sizeof...(_As) == 0)
3871       return __extract_center(__x.first);
3872     else
3873       return __extract_part<1, 4, 2>(__x);
3874   }
3875 
3876 // }}}
3877 // __split_wrapper {{{
3878 template <size_t... _Sizes, typename _Tp, typename... _As>
3879   auto
3880   __split_wrapper(_SizeList<_Sizes...>, const _SimdTuple<_Tp, _As...>& __x)
3881   {
3882     return split<_Sizes...>(
3883       fixed_size_simd<_Tp, _SimdTuple<_Tp, _As...>::_S_size()>(__private_init,
3884 							       __x));
3885   }
3886 
3887 // }}}
3888 
3889 // split<simd>(simd) {{{
3890 template <typename _V, typename _Ap,
3891 	  size_t _Parts = simd_size_v<typename _V::value_type, _Ap> / _V::size()>
3892   enable_if_t<simd_size_v<typename _V::value_type, _Ap> == _Parts * _V::size()
3893 		&& is_simd_v<_V>, array<_V, _Parts>>
3894   split(const simd<typename _V::value_type, _Ap>& __x)
3895   {
3896     using _Tp = typename _V::value_type;
3897     if constexpr (_Parts == 1)
3898       {
3899 	return {simd_cast<_V>(__x)};
3900       }
3901     else if (__x._M_is_constprop())
3902       {
3903 	return __generate_from_n_evaluations<_Parts, array<_V, _Parts>>([&](
3904 	  auto __i) constexpr {
3905 	  return _V([&](auto __j) constexpr {
3906 	    return __x[__i * _V::size() + __j];
3907 	  });
3908 	});
3909       }
3910     else if constexpr (
3911       __is_fixed_size_abi_v<_Ap>
3912       && (is_same_v<typename _V::abi_type, simd_abi::scalar>
3913 	|| (__is_fixed_size_abi_v<typename _V::abi_type>
3914 	  && sizeof(_V) == sizeof(_Tp) * _V::size() // _V doesn't have padding
3915 	  )))
3916       {
3917 	// fixed_size -> fixed_size (w/o padding) or scalar
3918 #ifdef _GLIBCXX_SIMD_USE_ALIASING_LOADS
3919       const __may_alias<_Tp>* const __element_ptr
3920 	= reinterpret_cast<const __may_alias<_Tp>*>(&__data(__x));
3921       return __generate_from_n_evaluations<_Parts, array<_V, _Parts>>([&](
3922 	auto __i) constexpr {
3923 	return _V(__element_ptr + __i * _V::size(), vector_aligned);
3924       });
3925 #else
3926       const auto& __xx = __data(__x);
3927       return __generate_from_n_evaluations<_Parts, array<_V, _Parts>>([&](
3928 	auto __i) constexpr {
3929 	[[maybe_unused]] constexpr size_t __offset
3930 	  = decltype(__i)::value * _V::size();
3931 	return _V([&](auto __j) constexpr {
3932 	  constexpr _SizeConstant<__j + __offset> __k;
3933 	  return __xx[__k];
3934 	});
3935       });
3936 #endif
3937     }
3938   else if constexpr (is_same_v<typename _V::abi_type, simd_abi::scalar>)
3939     {
3940       // normally memcpy should work here as well
3941       return __generate_from_n_evaluations<_Parts, array<_V, _Parts>>([&](
3942 	auto __i) constexpr { return __x[__i]; });
3943     }
3944   else
3945     {
3946       return __generate_from_n_evaluations<_Parts, array<_V, _Parts>>([&](
3947 	auto __i) constexpr {
3948 	if constexpr (__is_fixed_size_abi_v<typename _V::abi_type>)
3949 	  return _V([&](auto __j) constexpr {
3950 	    return __x[__i * _V::size() + __j];
3951 	  });
3952 	else
3953 	  return _V(__private_init,
3954 		    __extract_part<decltype(__i)::value, _Parts>(__data(__x)));
3955       });
3956     }
3957   }
3958 
3959 // }}}
3960 // split<simd_mask>(simd_mask) {{{
3961 template <typename _V, typename _Ap,
3962 	  size_t _Parts
3963 	  = simd_size_v<typename _V::simd_type::value_type, _Ap> / _V::size()>
3964   enable_if_t<is_simd_mask_v<_V> && simd_size_v<typename
3965     _V::simd_type::value_type, _Ap> == _Parts * _V::size(), array<_V, _Parts>>
3966   split(const simd_mask<typename _V::simd_type::value_type, _Ap>& __x)
3967   {
3968     if constexpr (is_same_v<_Ap, typename _V::abi_type>)
3969       return {__x};
3970     else if constexpr (_Parts == 1)
3971       return {__proposed::static_simd_cast<_V>(__x)};
3972     else if constexpr (_Parts == 2 && __is_sse_abi<typename _V::abi_type>()
3973 		       && __is_avx_abi<_Ap>())
3974       return {_V(__private_init, __lo128(__data(__x))),
3975 	      _V(__private_init, __hi128(__data(__x)))};
3976     else if constexpr (_V::size() <= __CHAR_BIT__ * sizeof(_ULLong))
3977       {
3978 	const bitset __bits = __x.__to_bitset();
3979 	return __generate_from_n_evaluations<_Parts, array<_V, _Parts>>([&](
3980 	  auto __i) constexpr {
3981 	  constexpr size_t __offset = __i * _V::size();
3982 	  return _V(__bitset_init, (__bits >> __offset).to_ullong());
3983 	});
3984       }
3985     else
3986       {
3987 	return __generate_from_n_evaluations<_Parts, array<_V, _Parts>>([&](
3988 	  auto __i) constexpr {
3989 	  constexpr size_t __offset = __i * _V::size();
3990 	  return _V(
3991 	    __private_init, [&](auto __j) constexpr {
3992 	      return __x[__j + __offset];
3993 	    });
3994 	});
3995       }
3996   }
3997 
3998 // }}}
3999 // split<_Sizes...>(simd) {{{
4000 template <size_t... _Sizes, typename _Tp, typename _Ap, typename>
4001   _GLIBCXX_SIMD_ALWAYS_INLINE
4002   tuple<simd<_Tp, simd_abi::deduce_t<_Tp, _Sizes>>...>
4003   split(const simd<_Tp, _Ap>& __x)
4004   {
4005     using _SL = _SizeList<_Sizes...>;
4006     using _Tuple = tuple<__deduced_simd<_Tp, _Sizes>...>;
4007     constexpr size_t _Np = simd_size_v<_Tp, _Ap>;
4008     constexpr size_t _N0 = _SL::template _S_at<0>();
4009     using _V = __deduced_simd<_Tp, _N0>;
4010 
4011     if (__x._M_is_constprop())
4012       return __generate_from_n_evaluations<sizeof...(_Sizes), _Tuple>([&](
4013 	auto __i) constexpr {
4014 	using _Vi = __deduced_simd<_Tp, _SL::_S_at(__i)>;
4015 	constexpr size_t __offset = _SL::_S_before(__i);
4016 	return _Vi([&](auto __j) constexpr { return __x[__offset + __j]; });
4017       });
4018     else if constexpr (_Np == _N0)
4019       {
4020 	static_assert(sizeof...(_Sizes) == 1);
4021 	return {simd_cast<_V>(__x)};
4022       }
4023     else if constexpr // split from fixed_size, such that __x::first.size == _N0
4024       (__is_fixed_size_abi_v<
4025 	 _Ap> && __fixed_size_storage_t<_Tp, _Np>::_S_first_size == _N0)
4026       {
4027 	static_assert(
4028 	  !__is_fixed_size_abi_v<typename _V::abi_type>,
4029 	  "How can <_Tp, _Np> be __a single _SimdTuple entry but __a "
4030 	  "fixed_size_simd "
4031 	  "when deduced?");
4032 	// extract first and recurse (__split_wrapper is needed to deduce a new
4033 	// _Sizes pack)
4034 	return tuple_cat(make_tuple(_V(__private_init, __data(__x).first)),
4035 			 __split_wrapper(_SL::template _S_pop_front<1>(),
4036 					 __data(__x).second));
4037       }
4038     else if constexpr ((!is_same_v<simd_abi::scalar,
4039 				   simd_abi::deduce_t<_Tp, _Sizes>> && ...)
4040 		       && (!__is_fixed_size_abi_v<
4041 			     simd_abi::deduce_t<_Tp, _Sizes>> && ...))
4042       {
4043 	if constexpr (((_Sizes * 2 == _Np) && ...))
4044 	  return {{__private_init, __extract_part<0, 2>(__data(__x))},
4045 		  {__private_init, __extract_part<1, 2>(__data(__x))}};
4046 	else if constexpr (is_same_v<_SizeList<_Sizes...>,
4047 				     _SizeList<_Np / 3, _Np / 3, _Np / 3>>)
4048 	  return {{__private_init, __extract_part<0, 3>(__data(__x))},
4049 		  {__private_init, __extract_part<1, 3>(__data(__x))},
4050 		  {__private_init, __extract_part<2, 3>(__data(__x))}};
4051 	else if constexpr (is_same_v<_SizeList<_Sizes...>,
4052 				     _SizeList<2 * _Np / 3, _Np / 3>>)
4053 	  return {{__private_init, __extract_part<0, 3, 2>(__data(__x))},
4054 		  {__private_init, __extract_part<2, 3>(__data(__x))}};
4055 	else if constexpr (is_same_v<_SizeList<_Sizes...>,
4056 				     _SizeList<_Np / 3, 2 * _Np / 3>>)
4057 	  return {{__private_init, __extract_part<0, 3>(__data(__x))},
4058 		  {__private_init, __extract_part<1, 3, 2>(__data(__x))}};
4059 	else if constexpr (is_same_v<_SizeList<_Sizes...>,
4060 				     _SizeList<_Np / 2, _Np / 4, _Np / 4>>)
4061 	  return {{__private_init, __extract_part<0, 2>(__data(__x))},
4062 		  {__private_init, __extract_part<2, 4>(__data(__x))},
4063 		  {__private_init, __extract_part<3, 4>(__data(__x))}};
4064 	else if constexpr (is_same_v<_SizeList<_Sizes...>,
4065 				     _SizeList<_Np / 4, _Np / 4, _Np / 2>>)
4066 	  return {{__private_init, __extract_part<0, 4>(__data(__x))},
4067 		  {__private_init, __extract_part<1, 4>(__data(__x))},
4068 		  {__private_init, __extract_part<1, 2>(__data(__x))}};
4069 	else if constexpr (is_same_v<_SizeList<_Sizes...>,
4070 				     _SizeList<_Np / 4, _Np / 2, _Np / 4>>)
4071 	  return {{__private_init, __extract_part<0, 4>(__data(__x))},
4072 		  {__private_init, __extract_center(__data(__x))},
4073 		  {__private_init, __extract_part<3, 4>(__data(__x))}};
4074 	else if constexpr (((_Sizes * 4 == _Np) && ...))
4075 	  return {{__private_init, __extract_part<0, 4>(__data(__x))},
4076 		  {__private_init, __extract_part<1, 4>(__data(__x))},
4077 		  {__private_init, __extract_part<2, 4>(__data(__x))},
4078 		  {__private_init, __extract_part<3, 4>(__data(__x))}};
4079 	// else fall through
4080       }
4081 #ifdef _GLIBCXX_SIMD_USE_ALIASING_LOADS
4082     const __may_alias<_Tp>* const __element_ptr
4083       = reinterpret_cast<const __may_alias<_Tp>*>(&__x);
4084     return __generate_from_n_evaluations<sizeof...(_Sizes), _Tuple>([&](
4085       auto __i) constexpr {
4086       using _Vi = __deduced_simd<_Tp, _SL::_S_at(__i)>;
4087       constexpr size_t __offset = _SL::_S_before(__i);
4088       constexpr size_t __base_align = alignof(simd<_Tp, _Ap>);
4089       constexpr size_t __a
4090 	= __base_align - ((__offset * sizeof(_Tp)) % __base_align);
4091       constexpr size_t __b = ((__a - 1) & __a) ^ __a;
4092       constexpr size_t __alignment = __b == 0 ? __a : __b;
4093       return _Vi(__element_ptr + __offset, overaligned<__alignment>);
4094     });
4095 #else
4096     return __generate_from_n_evaluations<sizeof...(_Sizes), _Tuple>([&](
4097       auto __i) constexpr {
4098       using _Vi = __deduced_simd<_Tp, _SL::_S_at(__i)>;
4099       const auto& __xx = __data(__x);
4100       using _Offset = decltype(_SL::_S_before(__i));
4101       return _Vi([&](auto __j) constexpr {
4102 	constexpr _SizeConstant<_Offset::value + __j> __k;
4103 	return __xx[__k];
4104       });
4105     });
4106 #endif
4107   }
4108 
4109 // }}}
4110 
4111 // __subscript_in_pack {{{
4112 template <size_t _I, typename _Tp, typename _Ap, typename... _As>
4113   _GLIBCXX_SIMD_INTRINSIC constexpr _Tp
4114   __subscript_in_pack(const simd<_Tp, _Ap>& __x, const simd<_Tp, _As>&... __xs)
4115   {
4116     if constexpr (_I < simd_size_v<_Tp, _Ap>)
4117       return __x[_I];
4118     else
4119       return __subscript_in_pack<_I - simd_size_v<_Tp, _Ap>>(__xs...);
4120   }
4121 
4122 // }}}
4123 // __store_pack_of_simd {{{
4124 template <typename _Tp, typename _A0, typename... _As>
4125   _GLIBCXX_SIMD_INTRINSIC void
4126   __store_pack_of_simd(char* __mem, const simd<_Tp, _A0>& __x0,
4127 		       const simd<_Tp, _As>&... __xs)
4128   {
4129     constexpr size_t __n_bytes = sizeof(_Tp) * simd_size_v<_Tp, _A0>;
4130     __builtin_memcpy(__mem, &__data(__x0), __n_bytes);
4131     if constexpr (sizeof...(__xs) > 0)
4132       __store_pack_of_simd(__mem + __n_bytes, __xs...);
4133   }
4134 
4135 // }}}
4136 // concat(simd...) {{{
4137 template <typename _Tp, typename... _As, typename = __detail::__odr_helper>
4138   inline _GLIBCXX_SIMD_CONSTEXPR
4139   simd<_Tp, simd_abi::deduce_t<_Tp, (simd_size_v<_Tp, _As> + ...)>>
4140   concat(const simd<_Tp, _As>&... __xs)
4141   {
4142     using _Rp = __deduced_simd<_Tp, (simd_size_v<_Tp, _As> + ...)>;
4143     if constexpr (sizeof...(__xs) == 1)
4144       return simd_cast<_Rp>(__xs...);
4145     else if ((... && __xs._M_is_constprop()))
4146       return simd<_Tp,
4147 		  simd_abi::deduce_t<_Tp, (simd_size_v<_Tp, _As> + ...)>>([&](
4148 	auto __i) constexpr { return __subscript_in_pack<__i>(__xs...); });
4149     else
4150       {
4151 	_Rp __r{};
4152 	__store_pack_of_simd(reinterpret_cast<char*>(&__data(__r)), __xs...);
4153 	return __r;
4154       }
4155   }
4156 
4157 // }}}
4158 // concat(array<simd>) {{{
4159 template <typename _Tp, typename _Abi, size_t _Np>
4160   _GLIBCXX_SIMD_ALWAYS_INLINE
4161   _GLIBCXX_SIMD_CONSTEXPR __deduced_simd<_Tp, simd_size_v<_Tp, _Abi> * _Np>
4162   concat(const array<simd<_Tp, _Abi>, _Np>& __x)
4163   {
4164     return __call_with_subscripts<_Np>(__x, [](const auto&... __xs) {
4165       return concat(__xs...);
4166     });
4167   }
4168 
4169 // }}}
4170 
4171 /// @cond undocumented
4172 // _SmartReference {{{
4173 template <typename _Up, typename _Accessor = _Up,
4174 	  typename _ValueType = typename _Up::value_type>
4175   class _SmartReference
4176   {
4177     friend _Accessor;
4178     int _M_index;
4179     _Up& _M_obj;
4180 
4181     _GLIBCXX_SIMD_INTRINSIC constexpr _ValueType _M_read() const noexcept
4182     {
4183       if constexpr (is_arithmetic_v<_Up>)
4184 	return _M_obj;
4185       else
4186 	return _M_obj[_M_index];
4187     }
4188 
4189     template <typename _Tp>
4190       _GLIBCXX_SIMD_INTRINSIC constexpr void _M_write(_Tp&& __x) const
4191       { _Accessor::_S_set(_M_obj, _M_index, static_cast<_Tp&&>(__x)); }
4192 
4193   public:
4194     _GLIBCXX_SIMD_INTRINSIC constexpr
4195     _SmartReference(_Up& __o, int __i) noexcept
4196     : _M_index(__i), _M_obj(__o) {}
4197 
4198     using value_type = _ValueType;
4199 
4200     _GLIBCXX_SIMD_INTRINSIC _SmartReference(const _SmartReference&) = delete;
4201 
4202     _GLIBCXX_SIMD_INTRINSIC constexpr operator value_type() const noexcept
4203     { return _M_read(); }
4204 
4205     template <typename _Tp,
4206 	      typename
4207 	      = _ValuePreservingOrInt<__remove_cvref_t<_Tp>, value_type>>
4208       _GLIBCXX_SIMD_INTRINSIC constexpr _SmartReference operator=(_Tp&& __x) &&
4209       {
4210 	_M_write(static_cast<_Tp&&>(__x));
4211 	return {_M_obj, _M_index};
4212       }
4213 
4214 #define _GLIBCXX_SIMD_OP_(__op)                                                \
4215     template <typename _Tp,                                                    \
4216 	      typename _TT                                                     \
4217 	      = decltype(declval<value_type>() __op declval<_Tp>()),           \
4218 	      typename = _ValuePreservingOrInt<__remove_cvref_t<_Tp>, _TT>,    \
4219 	      typename = _ValuePreservingOrInt<_TT, value_type>>               \
4220       _GLIBCXX_SIMD_INTRINSIC constexpr _SmartReference                        \
4221       operator __op##=(_Tp&& __x) &&                                           \
4222       {                                                                        \
4223 	const value_type& __lhs = _M_read();                                   \
4224 	_M_write(__lhs __op __x);                                              \
4225 	return {_M_obj, _M_index};                                             \
4226       }
4227     _GLIBCXX_SIMD_ALL_ARITHMETICS(_GLIBCXX_SIMD_OP_);
4228     _GLIBCXX_SIMD_ALL_SHIFTS(_GLIBCXX_SIMD_OP_);
4229     _GLIBCXX_SIMD_ALL_BINARY(_GLIBCXX_SIMD_OP_);
4230 #undef _GLIBCXX_SIMD_OP_
4231 
4232     template <typename _Tp = void,
4233 	      typename
4234 	      = decltype(++declval<conditional_t<true, value_type, _Tp>&>())>
4235       _GLIBCXX_SIMD_INTRINSIC constexpr _SmartReference operator++() &&
4236       {
4237 	value_type __x = _M_read();
4238 	_M_write(++__x);
4239 	return {_M_obj, _M_index};
4240       }
4241 
4242     template <typename _Tp = void,
4243 	      typename
4244 	      = decltype(declval<conditional_t<true, value_type, _Tp>&>()++)>
4245       _GLIBCXX_SIMD_INTRINSIC constexpr value_type operator++(int) &&
4246       {
4247 	const value_type __r = _M_read();
4248 	value_type __x = __r;
4249 	_M_write(++__x);
4250 	return __r;
4251       }
4252 
4253     template <typename _Tp = void,
4254 	      typename
4255 	      = decltype(--declval<conditional_t<true, value_type, _Tp>&>())>
4256       _GLIBCXX_SIMD_INTRINSIC constexpr _SmartReference operator--() &&
4257       {
4258 	value_type __x = _M_read();
4259 	_M_write(--__x);
4260 	return {_M_obj, _M_index};
4261       }
4262 
4263     template <typename _Tp = void,
4264 	      typename
4265 	      = decltype(declval<conditional_t<true, value_type, _Tp>&>()--)>
4266       _GLIBCXX_SIMD_INTRINSIC constexpr value_type operator--(int) &&
4267       {
4268 	const value_type __r = _M_read();
4269 	value_type __x = __r;
4270 	_M_write(--__x);
4271 	return __r;
4272       }
4273 
4274     _GLIBCXX_SIMD_INTRINSIC friend void
4275     swap(_SmartReference&& __a, _SmartReference&& __b) noexcept(
4276       conjunction<
4277 	is_nothrow_constructible<value_type, _SmartReference&&>,
4278 	is_nothrow_assignable<_SmartReference&&, value_type&&>>::value)
4279     {
4280       value_type __tmp = static_cast<_SmartReference&&>(__a);
4281       static_cast<_SmartReference&&>(__a) = static_cast<value_type>(__b);
4282       static_cast<_SmartReference&&>(__b) = std::move(__tmp);
4283     }
4284 
4285     _GLIBCXX_SIMD_INTRINSIC friend void
4286     swap(value_type& __a, _SmartReference&& __b) noexcept(
4287       conjunction<
4288 	is_nothrow_constructible<value_type, value_type&&>,
4289 	is_nothrow_assignable<value_type&, value_type&&>,
4290 	is_nothrow_assignable<_SmartReference&&, value_type&&>>::value)
4291     {
4292       value_type __tmp(std::move(__a));
4293       __a = static_cast<value_type>(__b);
4294       static_cast<_SmartReference&&>(__b) = std::move(__tmp);
4295     }
4296 
4297     _GLIBCXX_SIMD_INTRINSIC friend void
4298     swap(_SmartReference&& __a, value_type& __b) noexcept(
4299       conjunction<
4300 	is_nothrow_constructible<value_type, _SmartReference&&>,
4301 	is_nothrow_assignable<value_type&, value_type&&>,
4302 	is_nothrow_assignable<_SmartReference&&, value_type&&>>::value)
4303     {
4304       value_type __tmp(__a);
4305       static_cast<_SmartReference&&>(__a) = std::move(__b);
4306       __b = std::move(__tmp);
4307     }
4308   };
4309 
4310 // }}}
4311 // __scalar_abi_wrapper {{{
4312 template <int _Bytes>
4313   struct __scalar_abi_wrapper
4314   {
4315     template <typename _Tp> static constexpr size_t _S_full_size = 1;
4316     template <typename _Tp> static constexpr size_t _S_size = 1;
4317     template <typename _Tp> static constexpr size_t _S_is_partial = false;
4318 
4319     template <typename _Tp, typename _Abi = simd_abi::scalar>
4320       static constexpr bool _S_is_valid_v
4321 	= _Abi::template _IsValid<_Tp>::value && sizeof(_Tp) == _Bytes;
4322   };
4323 
4324 // }}}
4325 // __decay_abi metafunction {{{
4326 template <typename _Tp>
4327   struct __decay_abi { using type = _Tp; };
4328 
4329 template <int _Bytes>
4330   struct __decay_abi<__scalar_abi_wrapper<_Bytes>>
4331   { using type = simd_abi::scalar; };
4332 
4333 // }}}
4334 // __find_next_valid_abi metafunction {{{1
4335 // Given an ABI tag A<N>, find an N2 < N such that A<N2>::_S_is_valid_v<_Tp> ==
4336 // true, N2 is a power-of-2, and A<N2>::_S_is_partial<_Tp> is false. Break
4337 // recursion at 2 elements in the resulting ABI tag. In this case
4338 // type::_S_is_valid_v<_Tp> may be false.
4339 template <template <int> class _Abi, int _Bytes, typename _Tp>
4340   struct __find_next_valid_abi
4341   {
4342     static constexpr auto _S_choose()
4343     {
4344       constexpr int _NextBytes = std::__bit_ceil(_Bytes) / 2;
4345       using _NextAbi = _Abi<_NextBytes>;
4346       if constexpr (_NextBytes < sizeof(_Tp) * 2) // break recursion
4347 	return _Abi<_Bytes>();
4348       else if constexpr (_NextAbi::template _S_is_partial<_Tp> == false
4349 			 && _NextAbi::template _S_is_valid_v<_Tp>)
4350 	return _NextAbi();
4351       else
4352 	return __find_next_valid_abi<_Abi, _NextBytes, _Tp>::_S_choose();
4353     }
4354 
4355     using type = decltype(_S_choose());
4356   };
4357 
4358 template <int _Bytes, typename _Tp>
4359   struct __find_next_valid_abi<__scalar_abi_wrapper, _Bytes, _Tp>
4360   { using type = simd_abi::scalar; };
4361 
4362 // _AbiList {{{1
4363 template <template <int> class...>
4364   struct _AbiList
4365   {
4366     template <typename, int> static constexpr bool _S_has_valid_abi = false;
4367     template <typename, int> using _FirstValidAbi = void;
4368     template <typename, int> using _BestAbi = void;
4369   };
4370 
4371 template <template <int> class _A0, template <int> class... _Rest>
4372   struct _AbiList<_A0, _Rest...>
4373   {
4374     template <typename _Tp, int _Np>
4375       static constexpr bool _S_has_valid_abi
4376 	= _A0<sizeof(_Tp) * _Np>::template _S_is_valid_v<
4377 	    _Tp> || _AbiList<_Rest...>::template _S_has_valid_abi<_Tp, _Np>;
4378 
4379     template <typename _Tp, int _Np>
4380       using _FirstValidAbi = conditional_t<
4381 	_A0<sizeof(_Tp) * _Np>::template _S_is_valid_v<_Tp>,
4382 	typename __decay_abi<_A0<sizeof(_Tp) * _Np>>::type,
4383 	typename _AbiList<_Rest...>::template _FirstValidAbi<_Tp, _Np>>;
4384 
4385     template <typename _Tp, int _Np>
4386       static constexpr auto _S_determine_best_abi()
4387       {
4388 	static_assert(_Np >= 1);
4389 	constexpr int _Bytes = sizeof(_Tp) * _Np;
4390 	if constexpr (_Np == 1)
4391 	  return __make_dependent_t<_Tp, simd_abi::scalar>{};
4392 	else
4393 	  {
4394 	    constexpr int __fullsize = _A0<_Bytes>::template _S_full_size<_Tp>;
4395 	    // _A0<_Bytes> is good if:
4396 	    // 1. The ABI tag is valid for _Tp
4397 	    // 2. The storage overhead is no more than padding to fill the next
4398 	    //    power-of-2 number of bytes
4399 	    if constexpr (_A0<_Bytes>::template _S_is_valid_v<
4400 			    _Tp> && __fullsize / 2 < _Np)
4401 	      return typename __decay_abi<_A0<_Bytes>>::type{};
4402 	    else
4403 	      {
4404 		using _Bp =
4405 		  typename __find_next_valid_abi<_A0, _Bytes, _Tp>::type;
4406 		if constexpr (_Bp::template _S_is_valid_v<
4407 				_Tp> && _Bp::template _S_size<_Tp> <= _Np)
4408 		  return _Bp{};
4409 		else
4410 		  return
4411 		    typename _AbiList<_Rest...>::template _BestAbi<_Tp, _Np>{};
4412 	      }
4413 	  }
4414       }
4415 
4416     template <typename _Tp, int _Np>
4417       using _BestAbi = decltype(_S_determine_best_abi<_Tp, _Np>());
4418   };
4419 
4420 // }}}1
4421 
4422 // the following lists all native ABIs, which makes them accessible to
4423 // simd_abi::deduce and select_best_vector_type_t (for fixed_size). Order
4424 // matters: Whatever comes first has higher priority.
4425 using _AllNativeAbis = _AbiList<simd_abi::_VecBltnBtmsk, simd_abi::_VecBuiltin,
4426 				__scalar_abi_wrapper>;
4427 
4428 // valid _SimdTraits specialization {{{1
4429 template <typename _Tp, typename _Abi>
4430   struct _SimdTraits<_Tp, _Abi, void_t<typename _Abi::template _IsValid<_Tp>>>
4431   : _Abi::template __traits<_Tp> {};
4432 
4433 // __deduce_impl specializations {{{1
4434 // try all native ABIs (including scalar) first
4435 template <typename _Tp, size_t _Np>
4436   struct __deduce_impl<
4437     _Tp, _Np, enable_if_t<_AllNativeAbis::template _S_has_valid_abi<_Tp, _Np>>>
4438   { using type = _AllNativeAbis::_FirstValidAbi<_Tp, _Np>; };
4439 
4440 // fall back to fixed_size only if scalar and native ABIs don't match
4441 template <typename _Tp, size_t _Np, typename = void>
4442   struct __deduce_fixed_size_fallback {};
4443 
4444 template <typename _Tp, size_t _Np>
4445   struct __deduce_fixed_size_fallback<_Tp, _Np,
4446     enable_if_t<simd_abi::fixed_size<_Np>::template _S_is_valid_v<_Tp>>>
4447   { using type = simd_abi::fixed_size<_Np>; };
4448 
4449 template <typename _Tp, size_t _Np, typename>
4450   struct __deduce_impl : public __deduce_fixed_size_fallback<_Tp, _Np> {};
4451 
4452 //}}}1
4453 /// @endcond
4454 
4455 // simd_mask {{{
4456 template <typename _Tp, typename _Abi>
4457   class simd_mask : public _SimdTraits<_Tp, _Abi>::_MaskBase
4458   {
4459     // types, tags, and friends {{{
4460     using _Traits = _SimdTraits<_Tp, _Abi>;
4461     using _MemberType = typename _Traits::_MaskMember;
4462 
4463     // We map all masks with equal element sizeof to a single integer type, the
4464     // one given by __int_for_sizeof_t<_Tp>. This is the approach
4465     // [[gnu::vector_size(N)]] types take as well and it reduces the number of
4466     // template specializations in the implementation classes.
4467     using _Ip = __int_for_sizeof_t<_Tp>;
4468     static constexpr _Ip* _S_type_tag = nullptr;
4469 
4470     friend typename _Traits::_MaskBase;
4471     friend class simd<_Tp, _Abi>;       // to construct masks on return
4472     friend typename _Traits::_SimdImpl; // to construct masks on return and
4473 					// inspect data on masked operations
4474   public:
4475     using _Impl = typename _Traits::_MaskImpl;
4476     friend _Impl;
4477 
4478     // }}}
4479     // member types {{{
4480     using value_type = bool;
4481     using reference = _SmartReference<_MemberType, _Impl, value_type>;
4482     using simd_type = simd<_Tp, _Abi>;
4483     using abi_type = _Abi;
4484 
4485     // }}}
4486     static constexpr size_t size() // {{{
4487     { return __size_or_zero_v<_Tp, _Abi>; }
4488 
4489     // }}}
4490     // constructors & assignment {{{
4491     simd_mask() = default;
4492     simd_mask(const simd_mask&) = default;
4493     simd_mask(simd_mask&&) = default;
4494     simd_mask& operator=(const simd_mask&) = default;
4495     simd_mask& operator=(simd_mask&&) = default;
4496 
4497     // }}}
4498     // access to internal representation (optional feature) {{{
4499     _GLIBCXX_SIMD_ALWAYS_INLINE explicit
4500     simd_mask(typename _Traits::_MaskCastType __init)
4501     : _M_data{__init} {}
4502     // conversions to internal type is done in _MaskBase
4503 
4504     // }}}
4505     // bitset interface (extension to be proposed) {{{
4506     // TS_FEEDBACK:
4507     // Conversion of simd_mask to and from bitset makes it much easier to
4508     // interface with other facilities. I suggest adding `static
4509     // simd_mask::from_bitset` and `simd_mask::to_bitset`.
4510     _GLIBCXX_SIMD_ALWAYS_INLINE static simd_mask
4511     __from_bitset(bitset<size()> bs)
4512     { return {__bitset_init, bs}; }
4513 
4514     _GLIBCXX_SIMD_ALWAYS_INLINE bitset<size()>
4515     __to_bitset() const
4516     { return _Impl::_S_to_bits(_M_data)._M_to_bitset(); }
4517 
4518     // }}}
4519     // explicit broadcast constructor {{{
4520     _GLIBCXX_SIMD_ALWAYS_INLINE explicit _GLIBCXX_SIMD_CONSTEXPR
4521     simd_mask(value_type __x)
4522     : _M_data(_Impl::template _S_broadcast<_Ip>(__x)) {}
4523 
4524     // }}}
4525     // implicit type conversion constructor {{{
4526   #ifdef _GLIBCXX_SIMD_ENABLE_IMPLICIT_MASK_CAST
4527     // proposed improvement
4528     template <typename _Up, typename _A2,
4529 	      typename = enable_if_t<simd_size_v<_Up, _A2> == size()>>
4530       _GLIBCXX_SIMD_ALWAYS_INLINE explicit(sizeof(_MemberType)
4531 	  != sizeof(typename _SimdTraits<_Up, _A2>::_MaskMember))
4532       simd_mask(const simd_mask<_Up, _A2>& __x)
4533       : simd_mask(__proposed::static_simd_cast<simd_mask>(__x)) {}
4534   #else
4535     // conforming to ISO/IEC 19570:2018
4536     template <typename _Up, typename = enable_if_t<conjunction<
4537 			      is_same<abi_type, simd_abi::fixed_size<size()>>,
4538 			      is_same<_Up, _Up>>::value>>
4539       _GLIBCXX_SIMD_ALWAYS_INLINE
4540       simd_mask(const simd_mask<_Up, simd_abi::fixed_size<size()>>& __x)
4541       : _M_data(_Impl::_S_from_bitmask(__data(__x), _S_type_tag)) {}
4542   #endif
4543 
4544     // }}}
4545     // load constructor {{{
4546     template <typename _Flags>
4547       _GLIBCXX_SIMD_ALWAYS_INLINE
4548       simd_mask(const value_type* __mem, _Flags)
4549       : _M_data(_Impl::template _S_load<_Ip>(
4550 	_Flags::template _S_apply<simd_mask>(__mem))) {}
4551 
4552     template <typename _Flags>
4553       _GLIBCXX_SIMD_ALWAYS_INLINE
4554       simd_mask(const value_type* __mem, simd_mask __k, _Flags)
4555       : _M_data{}
4556       {
4557 	_M_data
4558 	  = _Impl::_S_masked_load(_M_data, __k._M_data,
4559 				  _Flags::template _S_apply<simd_mask>(__mem));
4560       }
4561 
4562     // }}}
4563     // loads [simd_mask.load] {{{
4564     template <typename _Flags>
4565       _GLIBCXX_SIMD_ALWAYS_INLINE void
4566       copy_from(const value_type* __mem, _Flags)
4567       {
4568 	_M_data = _Impl::template _S_load<_Ip>(
4569 	  _Flags::template _S_apply<simd_mask>(__mem));
4570       }
4571 
4572     // }}}
4573     // stores [simd_mask.store] {{{
4574     template <typename _Flags>
4575       _GLIBCXX_SIMD_ALWAYS_INLINE void
4576       copy_to(value_type* __mem, _Flags) const
4577       { _Impl::_S_store(_M_data, _Flags::template _S_apply<simd_mask>(__mem)); }
4578 
4579     // }}}
4580     // scalar access {{{
4581     _GLIBCXX_SIMD_ALWAYS_INLINE reference
4582     operator[](size_t __i)
4583     {
4584       if (__i >= size())
4585 	__invoke_ub("Subscript %d is out of range [0, %d]", __i, size() - 1);
4586       return {_M_data, int(__i)};
4587     }
4588 
4589     _GLIBCXX_SIMD_ALWAYS_INLINE value_type
4590     operator[](size_t __i) const
4591     {
4592       if (__i >= size())
4593 	__invoke_ub("Subscript %d is out of range [0, %d]", __i, size() - 1);
4594       if constexpr (__is_scalar_abi<_Abi>())
4595 	return _M_data;
4596       else
4597 	return static_cast<bool>(_M_data[__i]);
4598     }
4599 
4600     // }}}
4601     // negation {{{
4602     _GLIBCXX_SIMD_ALWAYS_INLINE simd_mask
4603     operator!() const
4604     { return {__private_init, _Impl::_S_bit_not(_M_data)}; }
4605 
4606     // }}}
4607     // simd_mask binary operators [simd_mask.binary] {{{
4608   #ifdef _GLIBCXX_SIMD_ENABLE_IMPLICIT_MASK_CAST
4609     // simd_mask<int> && simd_mask<uint> needs disambiguation
4610     template <typename _Up, typename _A2,
4611 	      typename
4612 	      = enable_if_t<is_convertible_v<simd_mask<_Up, _A2>, simd_mask>>>
4613       _GLIBCXX_SIMD_ALWAYS_INLINE friend simd_mask
4614       operator&&(const simd_mask& __x, const simd_mask<_Up, _A2>& __y)
4615       {
4616 	return {__private_init,
4617 		_Impl::_S_logical_and(__x._M_data, simd_mask(__y)._M_data)};
4618       }
4619 
4620     template <typename _Up, typename _A2,
4621 	      typename
4622 	      = enable_if_t<is_convertible_v<simd_mask<_Up, _A2>, simd_mask>>>
4623       _GLIBCXX_SIMD_ALWAYS_INLINE friend simd_mask
4624       operator||(const simd_mask& __x, const simd_mask<_Up, _A2>& __y)
4625       {
4626 	return {__private_init,
4627 		_Impl::_S_logical_or(__x._M_data, simd_mask(__y)._M_data)};
4628       }
4629   #endif // _GLIBCXX_SIMD_ENABLE_IMPLICIT_MASK_CAST
4630 
4631     _GLIBCXX_SIMD_ALWAYS_INLINE friend simd_mask
4632     operator&&(const simd_mask& __x, const simd_mask& __y)
4633     {
4634       return {__private_init, _Impl::_S_logical_and(__x._M_data, __y._M_data)};
4635     }
4636 
4637     _GLIBCXX_SIMD_ALWAYS_INLINE friend simd_mask
4638     operator||(const simd_mask& __x, const simd_mask& __y)
4639     {
4640       return {__private_init, _Impl::_S_logical_or(__x._M_data, __y._M_data)};
4641     }
4642 
4643     _GLIBCXX_SIMD_ALWAYS_INLINE friend simd_mask
4644     operator&(const simd_mask& __x, const simd_mask& __y)
4645     { return {__private_init, _Impl::_S_bit_and(__x._M_data, __y._M_data)}; }
4646 
4647     _GLIBCXX_SIMD_ALWAYS_INLINE friend simd_mask
4648     operator|(const simd_mask& __x, const simd_mask& __y)
4649     { return {__private_init, _Impl::_S_bit_or(__x._M_data, __y._M_data)}; }
4650 
4651     _GLIBCXX_SIMD_ALWAYS_INLINE friend simd_mask
4652     operator^(const simd_mask& __x, const simd_mask& __y)
4653     { return {__private_init, _Impl::_S_bit_xor(__x._M_data, __y._M_data)}; }
4654 
4655     _GLIBCXX_SIMD_ALWAYS_INLINE friend simd_mask&
4656     operator&=(simd_mask& __x, const simd_mask& __y)
4657     {
4658       __x._M_data = _Impl::_S_bit_and(__x._M_data, __y._M_data);
4659       return __x;
4660     }
4661 
4662     _GLIBCXX_SIMD_ALWAYS_INLINE friend simd_mask&
4663     operator|=(simd_mask& __x, const simd_mask& __y)
4664     {
4665       __x._M_data = _Impl::_S_bit_or(__x._M_data, __y._M_data);
4666       return __x;
4667     }
4668 
4669     _GLIBCXX_SIMD_ALWAYS_INLINE friend simd_mask&
4670     operator^=(simd_mask& __x, const simd_mask& __y)
4671     {
4672       __x._M_data = _Impl::_S_bit_xor(__x._M_data, __y._M_data);
4673       return __x;
4674     }
4675 
4676     // }}}
4677     // simd_mask compares [simd_mask.comparison] {{{
4678     _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask
4679     operator==(const simd_mask& __x, const simd_mask& __y)
4680     { return !operator!=(__x, __y); }
4681 
4682     _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask
4683     operator!=(const simd_mask& __x, const simd_mask& __y)
4684     { return {__private_init, _Impl::_S_bit_xor(__x._M_data, __y._M_data)}; }
4685 
4686     // }}}
4687     // private_init ctor {{{
4688     _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR
4689     simd_mask(_PrivateInit, typename _Traits::_MaskMember __init)
4690     : _M_data(__init) {}
4691 
4692     // }}}
4693     // private_init generator ctor {{{
4694     template <typename _Fp, typename = decltype(bool(declval<_Fp>()(size_t())))>
4695       _GLIBCXX_SIMD_INTRINSIC constexpr
4696       simd_mask(_PrivateInit, _Fp&& __gen)
4697       : _M_data()
4698       {
4699 	__execute_n_times<size()>([&](auto __i) constexpr {
4700 	  _Impl::_S_set(_M_data, __i, __gen(__i));
4701 	});
4702       }
4703 
4704     // }}}
4705     // bitset_init ctor {{{
4706     _GLIBCXX_SIMD_INTRINSIC simd_mask(_BitsetInit, bitset<size()> __init)
4707     : _M_data(
4708 	_Impl::_S_from_bitmask(_SanitizedBitMask<size()>(__init), _S_type_tag))
4709     {}
4710 
4711     // }}}
4712     // __cvt {{{
4713     // TS_FEEDBACK:
4714     // The conversion operator this implements should be a ctor on simd_mask.
4715     // Once you call .__cvt() on a simd_mask it converts conveniently.
4716     // A useful variation: add `explicit(sizeof(_Tp) != sizeof(_Up))`
4717     struct _CvtProxy
4718     {
4719       template <typename _Up, typename _A2,
4720 		typename
4721 		= enable_if_t<simd_size_v<_Up, _A2> == simd_size_v<_Tp, _Abi>>>
4722 	_GLIBCXX_SIMD_ALWAYS_INLINE
4723 	operator simd_mask<_Up, _A2>() &&
4724 	{
4725 	  using namespace std::experimental::__proposed;
4726 	  return static_simd_cast<simd_mask<_Up, _A2>>(_M_data);
4727 	}
4728 
4729       const simd_mask<_Tp, _Abi>& _M_data;
4730     };
4731 
4732     _GLIBCXX_SIMD_INTRINSIC _CvtProxy
4733     __cvt() const
4734     { return {*this}; }
4735 
4736     // }}}
4737     // operator?: overloads (suggested extension) {{{
4738   #ifdef __GXX_CONDITIONAL_IS_OVERLOADABLE__
4739     _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask
4740     operator?:(const simd_mask& __k, const simd_mask& __where_true,
4741 	       const simd_mask& __where_false)
4742     {
4743       auto __ret = __where_false;
4744       _Impl::_S_masked_assign(__k._M_data, __ret._M_data, __where_true._M_data);
4745       return __ret;
4746     }
4747 
4748     template <typename _U1, typename _U2,
4749 	      typename _Rp = simd<common_type_t<_U1, _U2>, _Abi>,
4750 	      typename = enable_if_t<conjunction_v<
4751 		is_convertible<_U1, _Rp>, is_convertible<_U2, _Rp>,
4752 		is_convertible<simd_mask, typename _Rp::mask_type>>>>
4753       _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend _Rp
4754       operator?:(const simd_mask& __k, const _U1& __where_true,
4755 		 const _U2& __where_false)
4756       {
4757 	_Rp __ret = __where_false;
4758 	_Rp::_Impl::_S_masked_assign(
4759 	  __data(static_cast<typename _Rp::mask_type>(__k)), __data(__ret),
4760 	  __data(static_cast<_Rp>(__where_true)));
4761 	return __ret;
4762       }
4763 
4764   #ifdef _GLIBCXX_SIMD_ENABLE_IMPLICIT_MASK_CAST
4765     template <typename _Kp, typename _Ak, typename _Up, typename _Au,
4766 	      typename = enable_if_t<
4767 		conjunction_v<is_convertible<simd_mask<_Kp, _Ak>, simd_mask>,
4768 			      is_convertible<simd_mask<_Up, _Au>, simd_mask>>>>
4769       _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask
4770       operator?:(const simd_mask<_Kp, _Ak>& __k, const simd_mask& __where_true,
4771 		 const simd_mask<_Up, _Au>& __where_false)
4772       {
4773 	simd_mask __ret = __where_false;
4774 	_Impl::_S_masked_assign(simd_mask(__k)._M_data, __ret._M_data,
4775 				__where_true._M_data);
4776 	return __ret;
4777       }
4778   #endif // _GLIBCXX_SIMD_ENABLE_IMPLICIT_MASK_CAST
4779   #endif // __GXX_CONDITIONAL_IS_OVERLOADABLE__
4780 
4781     // }}}
4782     // _M_is_constprop {{{
4783     _GLIBCXX_SIMD_INTRINSIC constexpr bool
4784     _M_is_constprop() const
4785     {
4786       if constexpr (__is_scalar_abi<_Abi>())
4787 	return __builtin_constant_p(_M_data);
4788       else
4789 	return _M_data._M_is_constprop();
4790     }
4791 
4792     // }}}
4793 
4794   private:
4795     friend const auto& __data<_Tp, abi_type>(const simd_mask&);
4796     friend auto& __data<_Tp, abi_type>(simd_mask&);
4797     alignas(_Traits::_S_mask_align) _MemberType _M_data;
4798   };
4799 
4800 // }}}
4801 
4802 /// @cond undocumented
4803 // __data(simd_mask) {{{
4804 template <typename _Tp, typename _Ap>
4805   _GLIBCXX_SIMD_INTRINSIC constexpr const auto&
4806   __data(const simd_mask<_Tp, _Ap>& __x)
4807   { return __x._M_data; }
4808 
4809 template <typename _Tp, typename _Ap>
4810   _GLIBCXX_SIMD_INTRINSIC constexpr auto&
4811   __data(simd_mask<_Tp, _Ap>& __x)
4812   { return __x._M_data; }
4813 
4814 // }}}
4815 /// @endcond
4816 
4817 // simd_mask reductions [simd_mask.reductions] {{{
4818 template <typename _Tp, typename _Abi>
4819   _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR bool
4820   all_of(const simd_mask<_Tp, _Abi>& __k) noexcept
4821   {
4822     if (__builtin_is_constant_evaluated() || __k._M_is_constprop())
4823       {
4824 	for (size_t __i = 0; __i < simd_size_v<_Tp, _Abi>; ++__i)
4825 	  if (!__k[__i])
4826 	    return false;
4827 	return true;
4828       }
4829     else
4830       return _Abi::_MaskImpl::_S_all_of(__k);
4831   }
4832 
4833 template <typename _Tp, typename _Abi>
4834   _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR bool
4835   any_of(const simd_mask<_Tp, _Abi>& __k) noexcept
4836   {
4837     if (__builtin_is_constant_evaluated() || __k._M_is_constprop())
4838       {
4839 	for (size_t __i = 0; __i < simd_size_v<_Tp, _Abi>; ++__i)
4840 	  if (__k[__i])
4841 	    return true;
4842 	return false;
4843       }
4844     else
4845       return _Abi::_MaskImpl::_S_any_of(__k);
4846   }
4847 
4848 template <typename _Tp, typename _Abi>
4849   _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR bool
4850   none_of(const simd_mask<_Tp, _Abi>& __k) noexcept
4851   {
4852     if (__builtin_is_constant_evaluated() || __k._M_is_constprop())
4853       {
4854 	for (size_t __i = 0; __i < simd_size_v<_Tp, _Abi>; ++__i)
4855 	  if (__k[__i])
4856 	    return false;
4857 	return true;
4858       }
4859     else
4860       return _Abi::_MaskImpl::_S_none_of(__k);
4861   }
4862 
4863 template <typename _Tp, typename _Abi>
4864   _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR bool
4865   some_of(const simd_mask<_Tp, _Abi>& __k) noexcept
4866   {
4867     if (__builtin_is_constant_evaluated() || __k._M_is_constprop())
4868       {
4869 	for (size_t __i = 1; __i < simd_size_v<_Tp, _Abi>; ++__i)
4870 	  if (__k[__i] != __k[__i - 1])
4871 	    return true;
4872 	return false;
4873       }
4874     else
4875       return _Abi::_MaskImpl::_S_some_of(__k);
4876   }
4877 
4878 template <typename _Tp, typename _Abi>
4879   _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR int
4880   popcount(const simd_mask<_Tp, _Abi>& __k) noexcept
4881   {
4882     if (__builtin_is_constant_evaluated() || __k._M_is_constprop())
4883       {
4884 	const int __r = __call_with_subscripts<simd_size_v<_Tp, _Abi>>(
4885 	  __k, [](auto... __elements) { return ((__elements != 0) + ...); });
4886 	if (__builtin_is_constant_evaluated() || __builtin_constant_p(__r))
4887 	  return __r;
4888       }
4889     return _Abi::_MaskImpl::_S_popcount(__k);
4890   }
4891 
4892 template <typename _Tp, typename _Abi>
4893   _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR int
4894   find_first_set(const simd_mask<_Tp, _Abi>& __k)
4895   {
4896     if (__builtin_is_constant_evaluated() || __k._M_is_constprop())
4897       {
4898 	constexpr size_t _Np = simd_size_v<_Tp, _Abi>;
4899 	const size_t _Idx = __call_with_n_evaluations<_Np>(
4900 	  [](auto... __indexes) { return std::min({__indexes...}); },
4901 	  [&](auto __i) { return __k[__i] ? +__i : _Np; });
4902 	if (_Idx >= _Np)
4903 	  __invoke_ub("find_first_set(empty mask) is UB");
4904 	if (__builtin_constant_p(_Idx))
4905 	  return _Idx;
4906       }
4907     return _Abi::_MaskImpl::_S_find_first_set(__k);
4908   }
4909 
4910 template <typename _Tp, typename _Abi>
4911   _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR int
4912   find_last_set(const simd_mask<_Tp, _Abi>& __k)
4913   {
4914     if (__builtin_is_constant_evaluated() || __k._M_is_constprop())
4915       {
4916 	constexpr size_t _Np = simd_size_v<_Tp, _Abi>;
4917 	const int _Idx = __call_with_n_evaluations<_Np>(
4918 	  [](auto... __indexes) { return std::max({__indexes...}); },
4919 	  [&](auto __i) { return __k[__i] ? int(__i) : -1; });
4920 	if (_Idx < 0)
4921 	  __invoke_ub("find_first_set(empty mask) is UB");
4922 	if (__builtin_constant_p(_Idx))
4923 	  return _Idx;
4924       }
4925     return _Abi::_MaskImpl::_S_find_last_set(__k);
4926   }
4927 
4928 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR bool
4929 all_of(_ExactBool __x) noexcept
4930 { return __x; }
4931 
4932 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR bool
4933 any_of(_ExactBool __x) noexcept
4934 { return __x; }
4935 
4936 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR bool
4937 none_of(_ExactBool __x) noexcept
4938 { return !__x; }
4939 
4940 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR bool
4941 some_of(_ExactBool) noexcept
4942 { return false; }
4943 
4944 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR int
4945 popcount(_ExactBool __x) noexcept
4946 { return __x; }
4947 
4948 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR int
4949 find_first_set(_ExactBool)
4950 { return 0; }
4951 
4952 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR int
4953 find_last_set(_ExactBool)
4954 { return 0; }
4955 
4956 // }}}
4957 
4958 /// @cond undocumented
4959 // _SimdIntOperators{{{1
4960 template <typename _V, typename _Tp, typename _Abi, bool>
4961   class _SimdIntOperators {};
4962 
4963 template <typename _V, typename _Tp, typename _Abi>
4964   class _SimdIntOperators<_V, _Tp, _Abi, true>
4965   {
4966     using _Impl = typename _SimdTraits<_Tp, _Abi>::_SimdImpl;
4967 
4968     _GLIBCXX_SIMD_INTRINSIC const _V& __derived() const
4969     { return *static_cast<const _V*>(this); }
4970 
4971     template <typename _Up>
4972       _GLIBCXX_SIMD_INTRINSIC static _GLIBCXX_SIMD_CONSTEXPR _V
4973       _S_make_derived(_Up&& __d)
4974       { return {__private_init, static_cast<_Up&&>(__d)}; }
4975 
4976   public:
4977     _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
4978     _V&
4979     operator%=(_V& __lhs, const _V& __x)
4980     { return __lhs = __lhs % __x; }
4981 
4982     _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
4983     _V&
4984     operator&=(_V& __lhs, const _V& __x)
4985     { return __lhs = __lhs & __x; }
4986 
4987     _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
4988     _V&
4989     operator|=(_V& __lhs, const _V& __x)
4990     { return __lhs = __lhs | __x; }
4991 
4992     _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
4993     _V&
4994     operator^=(_V& __lhs, const _V& __x)
4995     { return __lhs = __lhs ^ __x; }
4996 
4997     _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
4998     _V&
4999     operator<<=(_V& __lhs, const _V& __x)
5000     { return __lhs = __lhs << __x; }
5001 
5002     _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
5003     _V&
5004     operator>>=(_V& __lhs, const _V& __x)
5005     { return __lhs = __lhs >> __x; }
5006 
5007     _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
5008     _V&
5009     operator<<=(_V& __lhs, int __x)
5010     { return __lhs = __lhs << __x; }
5011 
5012     _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
5013     _V&
5014     operator>>=(_V& __lhs, int __x)
5015     { return __lhs = __lhs >> __x; }
5016 
5017     _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
5018     _V
5019     operator%(const _V& __x, const _V& __y)
5020     {
5021       return _SimdIntOperators::_S_make_derived(
5022 	_Impl::_S_modulus(__data(__x), __data(__y)));
5023     }
5024 
5025     _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
5026     _V
5027     operator&(const _V& __x, const _V& __y)
5028     {
5029       return _SimdIntOperators::_S_make_derived(
5030 	_Impl::_S_bit_and(__data(__x), __data(__y)));
5031     }
5032 
5033     _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
5034     _V
5035     operator|(const _V& __x, const _V& __y)
5036     {
5037       return _SimdIntOperators::_S_make_derived(
5038 	_Impl::_S_bit_or(__data(__x), __data(__y)));
5039     }
5040 
5041     _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
5042     _V
5043     operator^(const _V& __x, const _V& __y)
5044     {
5045       return _SimdIntOperators::_S_make_derived(
5046 	_Impl::_S_bit_xor(__data(__x), __data(__y)));
5047     }
5048 
5049     _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
5050     _V
5051     operator<<(const _V& __x, const _V& __y)
5052     {
5053       return _SimdIntOperators::_S_make_derived(
5054 	_Impl::_S_bit_shift_left(__data(__x), __data(__y)));
5055     }
5056 
5057     _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
5058     _V
5059     operator>>(const _V& __x, const _V& __y)
5060     {
5061       return _SimdIntOperators::_S_make_derived(
5062 	_Impl::_S_bit_shift_right(__data(__x), __data(__y)));
5063     }
5064 
5065     _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
5066     _V
5067     operator<<(const _V& __x, int __y)
5068     {
5069       if (__y < 0)
5070 	__invoke_ub("The behavior is undefined if the right operand of a "
5071 		    "shift operation is negative. [expr.shift]\nA shift by "
5072 		    "%d was requested",
5073 		    __y);
5074       if (size_t(__y) >= sizeof(declval<_Tp>() << __y) * __CHAR_BIT__)
5075 	__invoke_ub(
5076 	  "The behavior is undefined if the right operand of a "
5077 	  "shift operation is greater than or equal to the width of the "
5078 	  "promoted left operand. [expr.shift]\nA shift by %d was requested",
5079 	  __y);
5080       return _SimdIntOperators::_S_make_derived(
5081 	_Impl::_S_bit_shift_left(__data(__x), __y));
5082     }
5083 
5084     _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
5085     _V
5086     operator>>(const _V& __x, int __y)
5087     {
5088       if (__y < 0)
5089 	__invoke_ub(
5090 	  "The behavior is undefined if the right operand of a shift "
5091 	  "operation is negative. [expr.shift]\nA shift by %d was requested",
5092 	  __y);
5093       if (size_t(__y) >= sizeof(declval<_Tp>() << __y) * __CHAR_BIT__)
5094 	__invoke_ub(
5095 	  "The behavior is undefined if the right operand of a shift "
5096 	  "operation is greater than or equal to the width of the promoted "
5097 	  "left operand. [expr.shift]\nA shift by %d was requested",
5098 	  __y);
5099       return _SimdIntOperators::_S_make_derived(
5100 	_Impl::_S_bit_shift_right(__data(__x), __y));
5101     }
5102 
5103     // unary operators (for integral _Tp)
5104     _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR
5105     _V
5106     operator~() const
5107     { return {__private_init, _Impl::_S_complement(__derived()._M_data)}; }
5108   };
5109 
5110 //}}}1
5111 /// @endcond
5112 
5113 // simd {{{
5114 template <typename _Tp, typename _Abi>
5115   class simd : public _SimdIntOperators<
5116 		 simd<_Tp, _Abi>, _Tp, _Abi,
5117 		 conjunction<is_integral<_Tp>,
5118 			     typename _SimdTraits<_Tp, _Abi>::_IsValid>::value>,
5119 	       public _SimdTraits<_Tp, _Abi>::_SimdBase
5120   {
5121     using _Traits = _SimdTraits<_Tp, _Abi>;
5122     using _MemberType = typename _Traits::_SimdMember;
5123     using _CastType = typename _Traits::_SimdCastType;
5124     static constexpr _Tp* _S_type_tag = nullptr;
5125     friend typename _Traits::_SimdBase;
5126 
5127   public:
5128     using _Impl = typename _Traits::_SimdImpl;
5129     friend _Impl;
5130     friend _SimdIntOperators<simd, _Tp, _Abi, true>;
5131 
5132     using value_type = _Tp;
5133     using reference = _SmartReference<_MemberType, _Impl, value_type>;
5134     using mask_type = simd_mask<_Tp, _Abi>;
5135     using abi_type = _Abi;
5136 
5137     static constexpr size_t size()
5138     { return __size_or_zero_v<_Tp, _Abi>; }
5139 
5140     _GLIBCXX_SIMD_CONSTEXPR simd() = default;
5141     _GLIBCXX_SIMD_CONSTEXPR simd(const simd&) = default;
5142     _GLIBCXX_SIMD_CONSTEXPR simd(simd&&) noexcept = default;
5143     _GLIBCXX_SIMD_CONSTEXPR simd& operator=(const simd&) = default;
5144     _GLIBCXX_SIMD_CONSTEXPR simd& operator=(simd&&) noexcept = default;
5145 
5146     // implicit broadcast constructor
5147     template <typename _Up,
5148 	      typename = enable_if_t<!is_same_v<__remove_cvref_t<_Up>, bool>>>
5149       _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR
5150       simd(_ValuePreservingOrInt<_Up, value_type>&& __x)
5151       : _M_data(
5152 	_Impl::_S_broadcast(static_cast<value_type>(static_cast<_Up&&>(__x))))
5153       {}
5154 
5155     // implicit type conversion constructor (convert from fixed_size to
5156     // fixed_size)
5157     template <typename _Up>
5158       _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR
5159       simd(const simd<_Up, simd_abi::fixed_size<size()>>& __x,
5160 	   enable_if_t<
5161 	     conjunction<
5162 	       is_same<simd_abi::fixed_size<size()>, abi_type>,
5163 	       negation<__is_narrowing_conversion<_Up, value_type>>,
5164 	       __converts_to_higher_integer_rank<_Up, value_type>>::value,
5165 	     void*> = nullptr)
5166       : simd{static_cast<array<_Up, size()>>(__x).data(), vector_aligned} {}
5167 
5168       // explicit type conversion constructor
5169 #ifdef _GLIBCXX_SIMD_ENABLE_STATIC_CAST
5170     template <typename _Up, typename _A2,
5171 	      typename = decltype(static_simd_cast<simd>(
5172 		declval<const simd<_Up, _A2>&>()))>
5173       _GLIBCXX_SIMD_ALWAYS_INLINE explicit _GLIBCXX_SIMD_CONSTEXPR
5174       simd(const simd<_Up, _A2>& __x)
5175       : simd(static_simd_cast<simd>(__x)) {}
5176 #endif // _GLIBCXX_SIMD_ENABLE_STATIC_CAST
5177 
5178     // generator constructor
5179     template <typename _Fp>
5180       _GLIBCXX_SIMD_ALWAYS_INLINE explicit _GLIBCXX_SIMD_CONSTEXPR
5181       simd(_Fp&& __gen, _ValuePreservingOrInt<decltype(declval<_Fp>()(
5182 						declval<_SizeConstant<0>&>())),
5183 					      value_type>* = nullptr)
5184       : _M_data(_Impl::_S_generator(static_cast<_Fp&&>(__gen), _S_type_tag)) {}
5185 
5186     // load constructor
5187     template <typename _Up, typename _Flags>
5188       _GLIBCXX_SIMD_ALWAYS_INLINE
5189       simd(const _Up* __mem, _Flags)
5190       : _M_data(
5191 	  _Impl::_S_load(_Flags::template _S_apply<simd>(__mem), _S_type_tag))
5192       {}
5193 
5194     // loads [simd.load]
5195     template <typename _Up, typename _Flags>
5196       _GLIBCXX_SIMD_ALWAYS_INLINE void
5197       copy_from(const _Vectorizable<_Up>* __mem, _Flags)
5198       {
5199 	_M_data = static_cast<decltype(_M_data)>(
5200 	  _Impl::_S_load(_Flags::template _S_apply<simd>(__mem), _S_type_tag));
5201       }
5202 
5203     // stores [simd.store]
5204     template <typename _Up, typename _Flags>
5205       _GLIBCXX_SIMD_ALWAYS_INLINE void
5206       copy_to(_Vectorizable<_Up>* __mem, _Flags) const
5207       {
5208 	_Impl::_S_store(_M_data, _Flags::template _S_apply<simd>(__mem),
5209 			_S_type_tag);
5210       }
5211 
5212     // scalar access
5213     _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR reference
5214     operator[](size_t __i)
5215     { return {_M_data, int(__i)}; }
5216 
5217     _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR value_type
5218     operator[]([[maybe_unused]] size_t __i) const
5219     {
5220       if constexpr (__is_scalar_abi<_Abi>())
5221 	{
5222 	  _GLIBCXX_DEBUG_ASSERT(__i == 0);
5223 	  return _M_data;
5224 	}
5225       else
5226 	return _M_data[__i];
5227     }
5228 
5229     // increment and decrement:
5230     _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR simd&
5231     operator++()
5232     {
5233       _Impl::_S_increment(_M_data);
5234       return *this;
5235     }
5236 
5237     _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR simd
5238     operator++(int)
5239     {
5240       simd __r = *this;
5241       _Impl::_S_increment(_M_data);
5242       return __r;
5243     }
5244 
5245     _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR simd&
5246     operator--()
5247     {
5248       _Impl::_S_decrement(_M_data);
5249       return *this;
5250     }
5251 
5252     _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR simd
5253     operator--(int)
5254     {
5255       simd __r = *this;
5256       _Impl::_S_decrement(_M_data);
5257       return __r;
5258     }
5259 
5260     // unary operators (for any _Tp)
5261     _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR mask_type
5262     operator!() const
5263     { return {__private_init, _Impl::_S_negate(_M_data)}; }
5264 
5265     _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR simd
5266     operator+() const
5267     { return *this; }
5268 
5269     _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR simd
5270     operator-() const
5271     { return {__private_init, _Impl::_S_unary_minus(_M_data)}; }
5272 
5273     // access to internal representation (suggested extension)
5274     _GLIBCXX_SIMD_ALWAYS_INLINE explicit _GLIBCXX_SIMD_CONSTEXPR
5275     simd(_CastType __init) : _M_data(__init) {}
5276 
5277     // compound assignment [simd.cassign]
5278     _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd&
5279     operator+=(simd& __lhs, const simd& __x)
5280     { return __lhs = __lhs + __x; }
5281 
5282     _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd&
5283     operator-=(simd& __lhs, const simd& __x)
5284     { return __lhs = __lhs - __x; }
5285 
5286     _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd&
5287     operator*=(simd& __lhs, const simd& __x)
5288     { return __lhs = __lhs * __x; }
5289 
5290     _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd&
5291     operator/=(simd& __lhs, const simd& __x)
5292     { return __lhs = __lhs / __x; }
5293 
5294     // binary operators [simd.binary]
5295     _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd
5296     operator+(const simd& __x, const simd& __y)
5297     { return {__private_init, _Impl::_S_plus(__x._M_data, __y._M_data)}; }
5298 
5299     _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd
5300     operator-(const simd& __x, const simd& __y)
5301     { return {__private_init, _Impl::_S_minus(__x._M_data, __y._M_data)}; }
5302 
5303     _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd
5304     operator*(const simd& __x, const simd& __y)
5305     { return {__private_init, _Impl::_S_multiplies(__x._M_data, __y._M_data)}; }
5306 
5307     _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd
5308     operator/(const simd& __x, const simd& __y)
5309     { return {__private_init, _Impl::_S_divides(__x._M_data, __y._M_data)}; }
5310 
5311     // compares [simd.comparison]
5312     _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend mask_type
5313     operator==(const simd& __x, const simd& __y)
5314     { return simd::_S_make_mask(_Impl::_S_equal_to(__x._M_data, __y._M_data)); }
5315 
5316     _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend mask_type
5317     operator!=(const simd& __x, const simd& __y)
5318     {
5319       return simd::_S_make_mask(
5320 	_Impl::_S_not_equal_to(__x._M_data, __y._M_data));
5321     }
5322 
5323     _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend mask_type
5324     operator<(const simd& __x, const simd& __y)
5325     { return simd::_S_make_mask(_Impl::_S_less(__x._M_data, __y._M_data)); }
5326 
5327     _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend mask_type
5328     operator<=(const simd& __x, const simd& __y)
5329     {
5330       return simd::_S_make_mask(_Impl::_S_less_equal(__x._M_data, __y._M_data));
5331     }
5332 
5333     _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend mask_type
5334     operator>(const simd& __x, const simd& __y)
5335     { return simd::_S_make_mask(_Impl::_S_less(__y._M_data, __x._M_data)); }
5336 
5337     _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend mask_type
5338     operator>=(const simd& __x, const simd& __y)
5339     {
5340       return simd::_S_make_mask(_Impl::_S_less_equal(__y._M_data, __x._M_data));
5341     }
5342 
5343     // operator?: overloads (suggested extension) {{{
5344 #ifdef __GXX_CONDITIONAL_IS_OVERLOADABLE__
5345     _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd
5346     operator?:(const mask_type& __k, const simd& __where_true,
5347 	const simd& __where_false)
5348     {
5349       auto __ret = __where_false;
5350       _Impl::_S_masked_assign(__data(__k), __data(__ret), __data(__where_true));
5351       return __ret;
5352     }
5353 
5354 #endif // __GXX_CONDITIONAL_IS_OVERLOADABLE__
5355     // }}}
5356 
5357     // "private" because of the first arguments's namespace
5358     _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR
5359     simd(_PrivateInit, const _MemberType& __init)
5360     : _M_data(__init) {}
5361 
5362     // "private" because of the first arguments's namespace
5363     _GLIBCXX_SIMD_INTRINSIC
5364     simd(_BitsetInit, bitset<size()> __init) : _M_data()
5365     { where(mask_type(__bitset_init, __init), *this) = ~*this; }
5366 
5367     _GLIBCXX_SIMD_INTRINSIC constexpr bool
5368     _M_is_constprop() const
5369     {
5370       if constexpr (__is_scalar_abi<_Abi>())
5371 	return __builtin_constant_p(_M_data);
5372       else
5373 	return _M_data._M_is_constprop();
5374     }
5375 
5376   private:
5377     _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR static mask_type
5378     _S_make_mask(typename mask_type::_MemberType __k)
5379     { return {__private_init, __k}; }
5380 
5381     friend const auto& __data<value_type, abi_type>(const simd&);
5382     friend auto& __data<value_type, abi_type>(simd&);
5383     alignas(_Traits::_S_simd_align) _MemberType _M_data;
5384   };
5385 
5386 // }}}
5387 /// @cond undocumented
5388 // __data {{{
5389 template <typename _Tp, typename _Ap>
5390   _GLIBCXX_SIMD_INTRINSIC constexpr const auto&
5391   __data(const simd<_Tp, _Ap>& __x)
5392   { return __x._M_data; }
5393 
5394 template <typename _Tp, typename _Ap>
5395   _GLIBCXX_SIMD_INTRINSIC constexpr auto&
5396   __data(simd<_Tp, _Ap>& __x)
5397   { return __x._M_data; }
5398 
5399 // }}}
5400 namespace __float_bitwise_operators { //{{{
5401 template <typename _Tp, typename _Ap>
5402   _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR simd<_Tp, _Ap>
5403   operator^(const simd<_Tp, _Ap>& __a, const simd<_Tp, _Ap>& __b)
5404   {
5405     return {__private_init,
5406 	    _Ap::_SimdImpl::_S_bit_xor(__data(__a), __data(__b))};
5407   }
5408 
5409 template <typename _Tp, typename _Ap>
5410   _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR simd<_Tp, _Ap>
5411   operator|(const simd<_Tp, _Ap>& __a, const simd<_Tp, _Ap>& __b)
5412   {
5413     return {__private_init,
5414 	    _Ap::_SimdImpl::_S_bit_or(__data(__a), __data(__b))};
5415   }
5416 
5417 template <typename _Tp, typename _Ap>
5418   _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR simd<_Tp, _Ap>
5419   operator&(const simd<_Tp, _Ap>& __a, const simd<_Tp, _Ap>& __b)
5420   {
5421     return {__private_init,
5422 	    _Ap::_SimdImpl::_S_bit_and(__data(__a), __data(__b))};
5423   }
5424 
5425 template <typename _Tp, typename _Ap>
5426   _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR
5427   enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Ap>>
5428   operator~(const simd<_Tp, _Ap>& __a)
5429   { return {__private_init, _Ap::_SimdImpl::_S_complement(__data(__a))}; }
5430 } // namespace __float_bitwise_operators }}}
5431 /// @endcond
5432 
5433 /// @}
5434 _GLIBCXX_SIMD_END_NAMESPACE
5435 
5436 #endif // __cplusplus >= 201703L
5437 #endif // _GLIBCXX_EXPERIMENTAL_SIMD_H
5438 
5439 // vim: foldmethod=marker foldmarker={{{,}}}
5440