xref: /netbsd-src/external/gpl3/gcc/dist/libstdc++-v3/include/experimental/bits/simd.h (revision 0a3071956a3a9fdebdbf7f338cf2d439b45fc728)
1 // Definition of the public simd interfaces -*- C++ -*-
2 
3 // Copyright (C) 2020-2022 Free Software Foundation, Inc.
4 //
5 // This file is part of the GNU ISO C++ Library.  This library is free
6 // software; you can redistribute it and/or modify it under the
7 // terms of the GNU General Public License as published by the
8 // Free Software Foundation; either version 3, or (at your option)
9 // any later version.
10 
11 // This library is distributed in the hope that it will be useful,
12 // but WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14 // GNU General Public License for more details.
15 
16 // Under Section 7 of GPL version 3, you are granted additional
17 // permissions described in the GCC Runtime Library Exception, version
18 // 3.1, as published by the Free Software Foundation.
19 
20 // You should have received a copy of the GNU General Public License and
21 // a copy of the GCC Runtime Library Exception along with this program;
22 // see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
23 // <http://www.gnu.org/licenses/>.
24 
25 #ifndef _GLIBCXX_EXPERIMENTAL_SIMD_H
26 #define _GLIBCXX_EXPERIMENTAL_SIMD_H
27 
28 #if __cplusplus >= 201703L
29 
30 #include "simd_detail.h"
31 #include "numeric_traits.h"
32 #include <bit>
33 #include <bitset>
34 #ifdef _GLIBCXX_DEBUG_UB
35 #include <cstdio> // for stderr
36 #endif
37 #include <cstring>
38 #include <cmath>
39 #include <functional>
40 #include <iosfwd>
41 #include <utility>
42 
43 #if _GLIBCXX_SIMD_X86INTRIN
44 #include <x86intrin.h>
45 #elif _GLIBCXX_SIMD_HAVE_NEON
46 #pragma GCC diagnostic push
47 // narrowing conversion of '__a' from 'uint64_t' {aka 'long long unsigned int'} to
48 //   'int64x1_t' {aka 'long long int'} [-Wnarrowing]
49 #pragma GCC diagnostic ignored "-Wnarrowing"
50 #include <arm_neon.h>
51 #pragma GCC diagnostic pop
52 #endif
53 
54 /** @ingroup ts_simd
55  * @{
56  */
57 /* There are several closely related types, with the following naming
58  * convention:
59  * _Tp: vectorizable (arithmetic) type (or any type)
60  * _TV: __vector_type_t<_Tp, _Np>
61  * _TW: _SimdWrapper<_Tp, _Np>
62  * _TI: __intrinsic_type_t<_Tp, _Np>
63  * _TVT: _VectorTraits<_TV> or _VectorTraits<_TW>
64  * If one additional type is needed use _U instead of _T.
65  * Otherwise use _T\d, _TV\d, _TW\d, TI\d, _TVT\d.
66  *
67  * More naming conventions:
68  * _Ap or _Abi: An ABI tag from the simd_abi namespace
69  * _Ip: often used for integer types with sizeof(_Ip) == sizeof(_Tp),
70  *      _IV, _IW as for _TV, _TW
71  * _Np: number of elements (not bytes)
72  * _Bytes: number of bytes
73  *
74  * Variable names:
75  * __k: mask object (vector- or bitmask)
76  */
77 _GLIBCXX_SIMD_BEGIN_NAMESPACE
78 
79 #if !_GLIBCXX_SIMD_X86INTRIN
80 using __m128  [[__gnu__::__vector_size__(16)]] = float;
81 using __m128d [[__gnu__::__vector_size__(16)]] = double;
82 using __m128i [[__gnu__::__vector_size__(16)]] = long long;
83 using __m256  [[__gnu__::__vector_size__(32)]] = float;
84 using __m256d [[__gnu__::__vector_size__(32)]] = double;
85 using __m256i [[__gnu__::__vector_size__(32)]] = long long;
86 using __m512  [[__gnu__::__vector_size__(64)]] = float;
87 using __m512d [[__gnu__::__vector_size__(64)]] = double;
88 using __m512i [[__gnu__::__vector_size__(64)]] = long long;
89 #endif
90 
91 namespace simd_abi {
92 // simd_abi forward declarations {{{
93 // implementation details:
94 struct _Scalar;
95 
96 template <int _Np>
97   struct _Fixed;
98 
99 // There are two major ABIs that appear on different architectures.
100 // Both have non-boolean values packed into an N Byte register
101 // -> #elements = N / sizeof(T)
102 // Masks differ:
103 // 1. Use value vector registers for masks (all 0 or all 1)
104 // 2. Use bitmasks (mask registers) with one bit per value in the corresponding
105 //    value vector
106 //
107 // Both can be partially used, masking off the rest when doing horizontal
108 // operations or operations that can trap (e.g. FP_INVALID or integer division
109 // by 0). This is encoded as the number of used bytes.
110 template <int _UsedBytes>
111   struct _VecBuiltin;
112 
113 template <int _UsedBytes>
114   struct _VecBltnBtmsk;
115 
116 template <typename _Tp, int _Np>
117   using _VecN = _VecBuiltin<sizeof(_Tp) * _Np>;
118 
119 template <int _UsedBytes = 16>
120   using _Sse = _VecBuiltin<_UsedBytes>;
121 
122 template <int _UsedBytes = 32>
123   using _Avx = _VecBuiltin<_UsedBytes>;
124 
125 template <int _UsedBytes = 64>
126   using _Avx512 = _VecBltnBtmsk<_UsedBytes>;
127 
128 template <int _UsedBytes = 16>
129   using _Neon = _VecBuiltin<_UsedBytes>;
130 
131 // implementation-defined:
132 using __sse = _Sse<>;
133 using __avx = _Avx<>;
134 using __avx512 = _Avx512<>;
135 using __neon = _Neon<>;
136 using __neon128 = _Neon<16>;
137 using __neon64 = _Neon<8>;
138 
139 // standard:
140 template <typename _Tp, size_t _Np, typename...>
141   struct deduce;
142 
143 template <int _Np>
144   using fixed_size = _Fixed<_Np>;
145 
146 using scalar = _Scalar;
147 
148 // }}}
149 } // namespace simd_abi
150 // forward declarations is_simd(_mask), simd(_mask), simd_size {{{
151 template <typename _Tp>
152   struct is_simd;
153 
154 template <typename _Tp>
155   struct is_simd_mask;
156 
157 template <typename _Tp, typename _Abi>
158   class simd;
159 
160 template <typename _Tp, typename _Abi>
161   class simd_mask;
162 
163 template <typename _Tp, typename _Abi>
164   struct simd_size;
165 
166 // }}}
167 // load/store flags {{{
168 struct element_aligned_tag
169 {
170   template <typename _Tp, typename _Up = typename _Tp::value_type>
171     static constexpr size_t _S_alignment = alignof(_Up);
172 
173   template <typename _Tp, typename _Up>
174     _GLIBCXX_SIMD_INTRINSIC static constexpr _Up*
_S_applyelement_aligned_tag175     _S_apply(_Up* __ptr)
176     { return __ptr; }
177 };
178 
179 struct vector_aligned_tag
180 {
181   template <typename _Tp, typename _Up = typename _Tp::value_type>
182     static constexpr size_t _S_alignment
183       = std::__bit_ceil(sizeof(_Up) * _Tp::size());
184 
185   template <typename _Tp, typename _Up>
186     _GLIBCXX_SIMD_INTRINSIC static constexpr _Up*
_S_applyvector_aligned_tag187     _S_apply(_Up* __ptr)
188     { return static_cast<_Up*>(__builtin_assume_aligned(__ptr, _S_alignment<_Tp, _Up>)); }
189 };
190 
191 template <size_t _Np> struct overaligned_tag
192 {
193   template <typename _Tp, typename _Up = typename _Tp::value_type>
194     static constexpr size_t _S_alignment = _Np;
195 
196   template <typename _Tp, typename _Up>
197     _GLIBCXX_SIMD_INTRINSIC static constexpr _Up*
_S_applyoveraligned_tag198     _S_apply(_Up* __ptr)
199     { return static_cast<_Up*>(__builtin_assume_aligned(__ptr, _Np)); }
200 };
201 
202 inline constexpr element_aligned_tag element_aligned = {};
203 
204 inline constexpr vector_aligned_tag vector_aligned = {};
205 
206 template <size_t _Np>
207   inline constexpr overaligned_tag<_Np> overaligned = {};
208 
209 // }}}
210 template <size_t _Xp>
211   using _SizeConstant = integral_constant<size_t, _Xp>;
212 // constexpr feature detection{{{
213 constexpr inline bool __have_mmx = _GLIBCXX_SIMD_HAVE_MMX;
214 constexpr inline bool __have_sse = _GLIBCXX_SIMD_HAVE_SSE;
215 constexpr inline bool __have_sse2 = _GLIBCXX_SIMD_HAVE_SSE2;
216 constexpr inline bool __have_sse3 = _GLIBCXX_SIMD_HAVE_SSE3;
217 constexpr inline bool __have_ssse3 = _GLIBCXX_SIMD_HAVE_SSSE3;
218 constexpr inline bool __have_sse4_1 = _GLIBCXX_SIMD_HAVE_SSE4_1;
219 constexpr inline bool __have_sse4_2 = _GLIBCXX_SIMD_HAVE_SSE4_2;
220 constexpr inline bool __have_xop = _GLIBCXX_SIMD_HAVE_XOP;
221 constexpr inline bool __have_avx = _GLIBCXX_SIMD_HAVE_AVX;
222 constexpr inline bool __have_avx2 = _GLIBCXX_SIMD_HAVE_AVX2;
223 constexpr inline bool __have_bmi = _GLIBCXX_SIMD_HAVE_BMI1;
224 constexpr inline bool __have_bmi2 = _GLIBCXX_SIMD_HAVE_BMI2;
225 constexpr inline bool __have_lzcnt = _GLIBCXX_SIMD_HAVE_LZCNT;
226 constexpr inline bool __have_sse4a = _GLIBCXX_SIMD_HAVE_SSE4A;
227 constexpr inline bool __have_fma = _GLIBCXX_SIMD_HAVE_FMA;
228 constexpr inline bool __have_fma4 = _GLIBCXX_SIMD_HAVE_FMA4;
229 constexpr inline bool __have_f16c = _GLIBCXX_SIMD_HAVE_F16C;
230 constexpr inline bool __have_popcnt = _GLIBCXX_SIMD_HAVE_POPCNT;
231 constexpr inline bool __have_avx512f = _GLIBCXX_SIMD_HAVE_AVX512F;
232 constexpr inline bool __have_avx512dq = _GLIBCXX_SIMD_HAVE_AVX512DQ;
233 constexpr inline bool __have_avx512vl = _GLIBCXX_SIMD_HAVE_AVX512VL;
234 constexpr inline bool __have_avx512bw = _GLIBCXX_SIMD_HAVE_AVX512BW;
235 constexpr inline bool __have_avx512dq_vl = __have_avx512dq && __have_avx512vl;
236 constexpr inline bool __have_avx512bw_vl = __have_avx512bw && __have_avx512vl;
237 constexpr inline bool __have_avx512bitalg = _GLIBCXX_SIMD_HAVE_AVX512BITALG;
238 constexpr inline bool __have_avx512vbmi2 = _GLIBCXX_SIMD_HAVE_AVX512VBMI2;
239 constexpr inline bool __have_avx512vbmi = _GLIBCXX_SIMD_HAVE_AVX512VBMI;
240 constexpr inline bool __have_avx512ifma = _GLIBCXX_SIMD_HAVE_AVX512IFMA;
241 constexpr inline bool __have_avx512cd = _GLIBCXX_SIMD_HAVE_AVX512CD;
242 constexpr inline bool __have_avx512vnni = _GLIBCXX_SIMD_HAVE_AVX512VNNI;
243 constexpr inline bool __have_avx512vpopcntdq = _GLIBCXX_SIMD_HAVE_AVX512VPOPCNTDQ;
244 constexpr inline bool __have_avx512vp2intersect = _GLIBCXX_SIMD_HAVE_AVX512VP2INTERSECT;
245 
246 constexpr inline bool __have_neon = _GLIBCXX_SIMD_HAVE_NEON;
247 constexpr inline bool __have_neon_a32 = _GLIBCXX_SIMD_HAVE_NEON_A32;
248 constexpr inline bool __have_neon_a64 = _GLIBCXX_SIMD_HAVE_NEON_A64;
249 constexpr inline bool __support_neon_float =
250 #if defined __GCC_IEC_559
251   __GCC_IEC_559 == 0;
252 #elif defined __FAST_MATH__
253   true;
254 #else
255   false;
256 #endif
257 
258 #ifdef _ARCH_PWR10
259 constexpr inline bool __have_power10vec = true;
260 #else
261 constexpr inline bool __have_power10vec = false;
262 #endif
263 #ifdef __POWER9_VECTOR__
264 constexpr inline bool __have_power9vec = true;
265 #else
266 constexpr inline bool __have_power9vec = false;
267 #endif
268 #if defined __POWER8_VECTOR__
269 constexpr inline bool __have_power8vec = true;
270 #else
271 constexpr inline bool __have_power8vec = __have_power9vec;
272 #endif
273 #if defined __VSX__
274 constexpr inline bool __have_power_vsx = true;
275 #else
276 constexpr inline bool __have_power_vsx = __have_power8vec;
277 #endif
278 #if defined __ALTIVEC__
279 constexpr inline bool __have_power_vmx = true;
280 #else
281 constexpr inline bool __have_power_vmx = __have_power_vsx;
282 #endif
283 
284 // }}}
285 
286 namespace __detail
287 {
288 #ifdef math_errhandling
289   // Determines _S_handle_fpexcept from math_errhandling if it is defined and expands to a constant
290   // expression. math_errhandling may expand to an extern symbol, in which case a constexpr value
291   // must be guessed.
292   template <int = math_errhandling>
293     constexpr bool
__handle_fpexcept_impl(int)294     __handle_fpexcept_impl(int)
295     { return math_errhandling & MATH_ERREXCEPT; }
296 #endif
297 
298   // Fallback if math_errhandling doesn't work: with fast-math assume floating-point exceptions are
299   // ignored, otherwise implement correct exception behavior.
300   constexpr bool
__handle_fpexcept_impl(float)301   __handle_fpexcept_impl(float)
302   {
303 #if defined __FAST_MATH__
304     return false;
305 #else
306     return true;
307 #endif
308   }
309 
310   /// True if math functions must raise floating-point exceptions as specified by C17.
311   static constexpr bool _S_handle_fpexcept = __handle_fpexcept_impl(0);
312 
313   constexpr std::uint_least64_t
__floating_point_flags()314   __floating_point_flags()
315   {
316     std::uint_least64_t __flags = 0;
317     if constexpr (_S_handle_fpexcept)
318       __flags |= 1;
319 #ifdef __FAST_MATH__
320     __flags |= 1 << 1;
321 #elif __FINITE_MATH_ONLY__
322     __flags |= 2 << 1;
323 #elif __GCC_IEC_559 < 2
324     __flags |= 3 << 1;
325 #endif
326     __flags |= (__FLT_EVAL_METHOD__ + 1) << 3;
327     return __flags;
328   }
329 
330   constexpr std::uint_least64_t
__machine_flags()331   __machine_flags()
332   {
333     if constexpr (__have_mmx || __have_sse)
334       return __have_mmx
335 		 | (__have_sse                << 1)
336 		 | (__have_sse2               << 2)
337 		 | (__have_sse3               << 3)
338 		 | (__have_ssse3              << 4)
339 		 | (__have_sse4_1             << 5)
340 		 | (__have_sse4_2             << 6)
341 		 | (__have_xop                << 7)
342 		 | (__have_avx                << 8)
343 		 | (__have_avx2               << 9)
344 		 | (__have_bmi                << 10)
345 		 | (__have_bmi2               << 11)
346 		 | (__have_lzcnt              << 12)
347 		 | (__have_sse4a              << 13)
348 		 | (__have_fma                << 14)
349 		 | (__have_fma4               << 15)
350 		 | (__have_f16c               << 16)
351 		 | (__have_popcnt             << 17)
352 		 | (__have_avx512f            << 18)
353 		 | (__have_avx512dq           << 19)
354 		 | (__have_avx512vl           << 20)
355 		 | (__have_avx512bw           << 21)
356 		 | (__have_avx512bitalg       << 22)
357 		 | (__have_avx512vbmi2        << 23)
358 		 | (__have_avx512vbmi         << 24)
359 		 | (__have_avx512ifma         << 25)
360 		 | (__have_avx512cd           << 26)
361 		 | (__have_avx512vnni         << 27)
362 		 | (__have_avx512vpopcntdq    << 28)
363 		 | (__have_avx512vp2intersect << 29);
364     else if constexpr (__have_neon)
365       return __have_neon
366 	       | (__have_neon_a32 << 1)
367 	       | (__have_neon_a64 << 2)
368 	       | (__have_neon_a64 << 2)
369 	       | (__support_neon_float << 3);
370     else if constexpr (__have_power_vmx)
371       return __have_power_vmx
372 	       | (__have_power_vsx  << 1)
373 	       | (__have_power8vec  << 2)
374 	       | (__have_power9vec  << 3)
375 	       | (__have_power10vec << 4);
376     else
377       return 0;
378   }
379 
380   namespace
381   {
382     struct _OdrEnforcer {};
383   }
384 
385   template <std::uint_least64_t...>
386     struct _MachineFlagsTemplate {};
387 
388   /**@internal
389    * Use this type as default template argument to all function templates that
390    * are not declared always_inline. It ensures, that a function
391    * specialization, which the compiler decides not to inline, has a unique symbol
392    * (_OdrEnforcer) or a symbol matching the machine/architecture flags
393    * (_MachineFlagsTemplate). This helps to avoid ODR violations in cases where
394    * users link TUs compiled with different flags. This is especially important
395    * for using simd in libraries.
396    */
397   using __odr_helper
398     = conditional_t<__machine_flags() == 0, _OdrEnforcer,
399 		    _MachineFlagsTemplate<__machine_flags(), __floating_point_flags()>>;
400 
401   struct _Minimum
402   {
403     template <typename _Tp>
404       _GLIBCXX_SIMD_INTRINSIC constexpr
405       _Tp
operator_Minimum406       operator()(_Tp __a, _Tp __b) const
407       {
408 	using std::min;
409 	return min(__a, __b);
410       }
411   };
412 
413   struct _Maximum
414   {
415     template <typename _Tp>
416       _GLIBCXX_SIMD_INTRINSIC constexpr
417       _Tp
operator_Maximum418       operator()(_Tp __a, _Tp __b) const
419       {
420 	using std::max;
421 	return max(__a, __b);
422       }
423   };
424 } // namespace __detail
425 
426 // unrolled/pack execution helpers
427 // __execute_n_times{{{
428 template <typename _Fp, size_t... _I>
429   [[__gnu__::__flatten__]] _GLIBCXX_SIMD_INTRINSIC constexpr
430   void
__execute_on_index_sequence(_Fp && __f,index_sequence<_I...>)431   __execute_on_index_sequence(_Fp&& __f, index_sequence<_I...>)
432   { ((void)__f(_SizeConstant<_I>()), ...); }
433 
434 template <typename _Fp>
435   _GLIBCXX_SIMD_INTRINSIC constexpr void
__execute_on_index_sequence(_Fp &&,index_sequence<>)436   __execute_on_index_sequence(_Fp&&, index_sequence<>)
437   { }
438 
439 template <size_t _Np, typename _Fp>
440   _GLIBCXX_SIMD_INTRINSIC constexpr void
__execute_n_times(_Fp && __f)441   __execute_n_times(_Fp&& __f)
442   {
443     __execute_on_index_sequence(static_cast<_Fp&&>(__f),
444 				make_index_sequence<_Np>{});
445   }
446 
447 // }}}
448 // __generate_from_n_evaluations{{{
449 template <typename _R, typename _Fp, size_t... _I>
450   [[__gnu__::__flatten__]] _GLIBCXX_SIMD_INTRINSIC constexpr
451   _R
__execute_on_index_sequence_with_return(_Fp && __f,index_sequence<_I...>)452   __execute_on_index_sequence_with_return(_Fp&& __f, index_sequence<_I...>)
453   { return _R{__f(_SizeConstant<_I>())...}; }
454 
455 template <size_t _Np, typename _R, typename _Fp>
456   _GLIBCXX_SIMD_INTRINSIC constexpr _R
__generate_from_n_evaluations(_Fp && __f)457   __generate_from_n_evaluations(_Fp&& __f)
458   {
459     return __execute_on_index_sequence_with_return<_R>(
460       static_cast<_Fp&&>(__f), make_index_sequence<_Np>{});
461   }
462 
463 // }}}
464 // __call_with_n_evaluations{{{
465 template <size_t... _I, typename _F0, typename _FArgs>
466   [[__gnu__::__flatten__]] _GLIBCXX_SIMD_INTRINSIC constexpr
467   auto
__call_with_n_evaluations(index_sequence<_I...>,_F0 && __f0,_FArgs && __fargs)468   __call_with_n_evaluations(index_sequence<_I...>, _F0&& __f0, _FArgs&& __fargs)
469   { return __f0(__fargs(_SizeConstant<_I>())...); }
470 
471 template <size_t _Np, typename _F0, typename _FArgs>
472   _GLIBCXX_SIMD_INTRINSIC constexpr auto
__call_with_n_evaluations(_F0 && __f0,_FArgs && __fargs)473   __call_with_n_evaluations(_F0&& __f0, _FArgs&& __fargs)
474   {
475     return __call_with_n_evaluations(make_index_sequence<_Np>{},
476 				     static_cast<_F0&&>(__f0),
477 				     static_cast<_FArgs&&>(__fargs));
478   }
479 
480 // }}}
481 // __call_with_subscripts{{{
482 template <size_t _First = 0, size_t... _It, typename _Tp, typename _Fp>
483   [[__gnu__::__flatten__]] _GLIBCXX_SIMD_INTRINSIC constexpr
484   auto
__call_with_subscripts(_Tp && __x,index_sequence<_It...>,_Fp && __fun)485   __call_with_subscripts(_Tp&& __x, index_sequence<_It...>, _Fp&& __fun)
486   { return __fun(__x[_First + _It]...); }
487 
488 template <size_t _Np, size_t _First = 0, typename _Tp, typename _Fp>
489   _GLIBCXX_SIMD_INTRINSIC constexpr auto
__call_with_subscripts(_Tp && __x,_Fp && __fun)490   __call_with_subscripts(_Tp&& __x, _Fp&& __fun)
491   {
492     return __call_with_subscripts<_First>(static_cast<_Tp&&>(__x),
493 					  make_index_sequence<_Np>(),
494 					  static_cast<_Fp&&>(__fun));
495   }
496 
497 // }}}
498 
499 // vvv ---- type traits ---- vvv
500 // integer type aliases{{{
501 using _UChar = unsigned char;
502 using _SChar = signed char;
503 using _UShort = unsigned short;
504 using _UInt = unsigned int;
505 using _ULong = unsigned long;
506 using _ULLong = unsigned long long;
507 using _LLong = long long;
508 
509 //}}}
510 // __first_of_pack{{{
511 template <typename _T0, typename...>
512   struct __first_of_pack
513   { using type = _T0; };
514 
515 template <typename... _Ts>
516   using __first_of_pack_t = typename __first_of_pack<_Ts...>::type;
517 
518 //}}}
519 // __value_type_or_identity_t {{{
520 template <typename _Tp>
521   typename _Tp::value_type
522   __value_type_or_identity_impl(int);
523 
524 template <typename _Tp>
525   _Tp
526   __value_type_or_identity_impl(float);
527 
528 template <typename _Tp>
529   using __value_type_or_identity_t
530     = decltype(__value_type_or_identity_impl<_Tp>(int()));
531 
532 // }}}
533 // __is_vectorizable {{{
534 template <typename _Tp>
535   struct __is_vectorizable : public is_arithmetic<_Tp> {};
536 
537 template <>
538   struct __is_vectorizable<bool> : public false_type {};
539 
540 template <typename _Tp>
541   inline constexpr bool __is_vectorizable_v = __is_vectorizable<_Tp>::value;
542 
543 // Deduces to a vectorizable type
544 template <typename _Tp, typename = enable_if_t<__is_vectorizable_v<_Tp>>>
545   using _Vectorizable = _Tp;
546 
547 // }}}
548 // _LoadStorePtr / __is_possible_loadstore_conversion {{{
549 template <typename _Ptr, typename _ValueType>
550   struct __is_possible_loadstore_conversion
551   : conjunction<__is_vectorizable<_Ptr>, __is_vectorizable<_ValueType>> {};
552 
553 template <>
554   struct __is_possible_loadstore_conversion<bool, bool> : true_type {};
555 
556 // Deduces to a type allowed for load/store with the given value type.
557 template <typename _Ptr, typename _ValueType,
558 	  typename = enable_if_t<
559 	    __is_possible_loadstore_conversion<_Ptr, _ValueType>::value>>
560   using _LoadStorePtr = _Ptr;
561 
562 // }}}
563 // __is_bitmask{{{
564 template <typename _Tp, typename = void_t<>>
565   struct __is_bitmask : false_type {};
566 
567 template <typename _Tp>
568   inline constexpr bool __is_bitmask_v = __is_bitmask<_Tp>::value;
569 
570 // the __mmaskXX case:
571 template <typename _Tp>
572   struct __is_bitmask<_Tp,
573     void_t<decltype(declval<unsigned&>() = declval<_Tp>() & 1u)>>
574   : true_type {};
575 
576 // }}}
577 // __int_for_sizeof{{{
578 #pragma GCC diagnostic push
579 #pragma GCC diagnostic ignored "-Wpedantic"
580 template <size_t _Bytes>
581   constexpr auto
582   __int_for_sizeof()
583   {
584     static_assert(_Bytes > 0);
585     if constexpr (_Bytes == sizeof(int))
586       return int();
587   #ifdef __clang__
588     else if constexpr (_Bytes == sizeof(char))
589       return char();
590   #else
591     else if constexpr (_Bytes == sizeof(_SChar))
592       return _SChar();
593   #endif
594     else if constexpr (_Bytes == sizeof(short))
595       return short();
596   #ifndef __clang__
597     else if constexpr (_Bytes == sizeof(long))
598       return long();
599   #endif
600     else if constexpr (_Bytes == sizeof(_LLong))
601       return _LLong();
602   #ifdef __SIZEOF_INT128__
603     else if constexpr (_Bytes == sizeof(__int128))
604       return __int128();
605   #endif // __SIZEOF_INT128__
606     else if constexpr (_Bytes % sizeof(int) == 0)
607       {
608 	constexpr size_t _Np = _Bytes / sizeof(int);
609 	struct _Ip
610 	{
611 	  int _M_data[_Np];
612 
613 	  _GLIBCXX_SIMD_INTRINSIC constexpr _Ip
614 	  operator&(_Ip __rhs) const
615 	  {
616 	    return __generate_from_n_evaluations<_Np, _Ip>(
617 	      [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
618 		return __rhs._M_data[__i] & _M_data[__i];
619 	      });
620 	  }
621 
622 	  _GLIBCXX_SIMD_INTRINSIC constexpr _Ip
623 	  operator|(_Ip __rhs) const
624 	  {
625 	    return __generate_from_n_evaluations<_Np, _Ip>(
626 	      [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
627 		return __rhs._M_data[__i] | _M_data[__i];
628 	      });
629 	  }
630 
631 	  _GLIBCXX_SIMD_INTRINSIC constexpr _Ip
632 	  operator^(_Ip __rhs) const
633 	  {
634 	    return __generate_from_n_evaluations<_Np, _Ip>(
635 	      [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
636 		return __rhs._M_data[__i] ^ _M_data[__i];
637 	      });
638 	  }
639 
640 	  _GLIBCXX_SIMD_INTRINSIC constexpr _Ip
641 	  operator~() const
642 	  {
643 	    return __generate_from_n_evaluations<_Np, _Ip>(
644 	      [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { return ~_M_data[__i]; });
645 	  }
646 	};
647 	return _Ip{};
648       }
649     else
650       static_assert(_Bytes == 0, "this should be unreachable");
651   }
652 #pragma GCC diagnostic pop
653 
654 template <typename _Tp>
655   using __int_for_sizeof_t = decltype(__int_for_sizeof<sizeof(_Tp)>());
656 
657 template <size_t _Np>
658   using __int_with_sizeof_t = decltype(__int_for_sizeof<_Np>());
659 
660 // }}}
661 // __is_fixed_size_abi{{{
662 template <typename _Tp>
663   struct __is_fixed_size_abi : false_type {};
664 
665 template <int _Np>
666   struct __is_fixed_size_abi<simd_abi::fixed_size<_Np>> : true_type {};
667 
668 template <typename _Tp>
669   inline constexpr bool __is_fixed_size_abi_v = __is_fixed_size_abi<_Tp>::value;
670 
671 // }}}
672 // __is_scalar_abi {{{
673 template <typename _Abi>
674   constexpr bool
675   __is_scalar_abi()
676   { return is_same_v<simd_abi::scalar, _Abi>; }
677 
678 // }}}
679 // __abi_bytes_v {{{
680 template <template <int> class _Abi, int _Bytes>
681   constexpr int
682   __abi_bytes_impl(_Abi<_Bytes>*)
683   { return _Bytes; }
684 
685 template <typename _Tp>
686   constexpr int
687   __abi_bytes_impl(_Tp*)
688   { return -1; }
689 
690 template <typename _Abi>
691   inline constexpr int __abi_bytes_v
692     = __abi_bytes_impl(static_cast<_Abi*>(nullptr));
693 
694 // }}}
695 // __is_builtin_bitmask_abi {{{
696 template <typename _Abi>
697   constexpr bool
698   __is_builtin_bitmask_abi()
699   { return is_same_v<simd_abi::_VecBltnBtmsk<__abi_bytes_v<_Abi>>, _Abi>; }
700 
701 // }}}
702 // __is_sse_abi {{{
703 template <typename _Abi>
704   constexpr bool
705   __is_sse_abi()
706   {
707     constexpr auto _Bytes = __abi_bytes_v<_Abi>;
708     return _Bytes <= 16 && is_same_v<simd_abi::_VecBuiltin<_Bytes>, _Abi>;
709   }
710 
711 // }}}
712 // __is_avx_abi {{{
713 template <typename _Abi>
714   constexpr bool
715   __is_avx_abi()
716   {
717     constexpr auto _Bytes = __abi_bytes_v<_Abi>;
718     return _Bytes > 16 && _Bytes <= 32
719 	   && is_same_v<simd_abi::_VecBuiltin<_Bytes>, _Abi>;
720   }
721 
722 // }}}
723 // __is_avx512_abi {{{
724 template <typename _Abi>
725   constexpr bool
726   __is_avx512_abi()
727   {
728     constexpr auto _Bytes = __abi_bytes_v<_Abi>;
729     return _Bytes <= 64 && is_same_v<simd_abi::_Avx512<_Bytes>, _Abi>;
730   }
731 
732 // }}}
733 // __is_neon_abi {{{
734 template <typename _Abi>
735   constexpr bool
736   __is_neon_abi()
737   {
738     constexpr auto _Bytes = __abi_bytes_v<_Abi>;
739     return _Bytes <= 16 && is_same_v<simd_abi::_VecBuiltin<_Bytes>, _Abi>;
740   }
741 
742 // }}}
743 // __make_dependent_t {{{
744 template <typename, typename _Up>
745   struct __make_dependent
746   { using type = _Up; };
747 
748 template <typename _Tp, typename _Up>
749   using __make_dependent_t = typename __make_dependent<_Tp, _Up>::type;
750 
751 // }}}
752 // ^^^ ---- type traits ---- ^^^
753 
754 // __invoke_ub{{{
755 template <typename... _Args>
756   [[noreturn]] _GLIBCXX_SIMD_ALWAYS_INLINE void
757   __invoke_ub([[maybe_unused]] const char* __msg, [[maybe_unused]] const _Args&... __args)
758   {
759 #ifdef _GLIBCXX_DEBUG_UB
760     __builtin_fprintf(stderr, __msg, __args...);
761     __builtin_trap();
762 #else
763     __builtin_unreachable();
764 #endif
765   }
766 
767 // }}}
768 // __assert_unreachable{{{
769 template <typename _Tp>
770   struct __assert_unreachable
771   { static_assert(!is_same_v<_Tp, _Tp>, "this should be unreachable"); };
772 
773 // }}}
774 // __size_or_zero_v {{{
775 template <typename _Tp, typename _Ap, size_t _Np = simd_size<_Tp, _Ap>::value>
776   constexpr size_t
777   __size_or_zero_dispatch(int)
778   { return _Np; }
779 
780 template <typename _Tp, typename _Ap>
781   constexpr size_t
782   __size_or_zero_dispatch(float)
783   { return 0; }
784 
785 template <typename _Tp, typename _Ap>
786   inline constexpr size_t __size_or_zero_v
787      = __size_or_zero_dispatch<_Tp, _Ap>(0);
788 
789 // }}}
790 // __div_roundup {{{
791 inline constexpr size_t
792 __div_roundup(size_t __a, size_t __b)
793 { return (__a + __b - 1) / __b; }
794 
795 // }}}
796 // _ExactBool{{{
797 class _ExactBool
798 {
799   const bool _M_data;
800 
801 public:
802   _GLIBCXX_SIMD_INTRINSIC constexpr
803   _ExactBool(bool __b) : _M_data(__b) {}
804 
805   _ExactBool(int) = delete;
806 
807   _GLIBCXX_SIMD_INTRINSIC constexpr
808   operator bool() const
809   { return _M_data; }
810 };
811 
812 // }}}
813 // __may_alias{{{
814 /**@internal
815  * Helper __may_alias<_Tp> that turns _Tp into the type to be used for an
816  * aliasing pointer. This adds the __may_alias attribute to _Tp (with compilers
817  * that support it).
818  */
819 template <typename _Tp>
820   using __may_alias [[__gnu__::__may_alias__]] = _Tp;
821 
822 // }}}
823 // _UnsupportedBase {{{
824 // simd and simd_mask base for unsupported <_Tp, _Abi>
825 struct _UnsupportedBase
826 {
827   _UnsupportedBase() = delete;
828   _UnsupportedBase(const _UnsupportedBase&) = delete;
829   _UnsupportedBase& operator=(const _UnsupportedBase&) = delete;
830   ~_UnsupportedBase() = delete;
831 };
832 
833 // }}}
834 // _InvalidTraits {{{
835 /**
836  * @internal
837  * Defines the implementation of __a given <_Tp, _Abi>.
838  *
839  * Implementations must ensure that only valid <_Tp, _Abi> instantiations are
840  * possible. Static assertions in the type definition do not suffice. It is
841  * important that SFINAE works.
842  */
843 struct _InvalidTraits
844 {
845   using _IsValid = false_type;
846   using _SimdBase = _UnsupportedBase;
847   using _MaskBase = _UnsupportedBase;
848 
849   static constexpr size_t _S_full_size = 0;
850   static constexpr bool _S_is_partial = false;
851 
852   static constexpr size_t _S_simd_align = 1;
853   struct _SimdImpl;
854   struct _SimdMember {};
855   struct _SimdCastType;
856 
857   static constexpr size_t _S_mask_align = 1;
858   struct _MaskImpl;
859   struct _MaskMember {};
860   struct _MaskCastType;
861 };
862 
863 // }}}
864 // _SimdTraits {{{
865 template <typename _Tp, typename _Abi, typename = void_t<>>
866   struct _SimdTraits : _InvalidTraits {};
867 
868 // }}}
869 // __private_init, __bitset_init{{{
870 /**
871  * @internal
872  * Tag used for private init constructor of simd and simd_mask
873  */
874 inline constexpr struct _PrivateInit {} __private_init = {};
875 
876 inline constexpr struct _BitsetInit {} __bitset_init = {};
877 
878 // }}}
879 // __is_narrowing_conversion<_From, _To>{{{
880 template <typename _From, typename _To, bool = is_arithmetic_v<_From>,
881 	  bool = is_arithmetic_v<_To>>
882   struct __is_narrowing_conversion;
883 
884 // ignore "signed/unsigned mismatch" in the following trait.
885 // The implicit conversions will do the right thing here.
886 template <typename _From, typename _To>
887   struct __is_narrowing_conversion<_From, _To, true, true>
888   : public __bool_constant<(
889       __digits_v<_From> > __digits_v<_To>
890       || __finite_max_v<_From> > __finite_max_v<_To>
891       || __finite_min_v<_From> < __finite_min_v<_To>
892       || (is_signed_v<_From> && is_unsigned_v<_To>))> {};
893 
894 template <typename _Tp>
895   struct __is_narrowing_conversion<_Tp, bool, true, true>
896   : public true_type {};
897 
898 template <>
899   struct __is_narrowing_conversion<bool, bool, true, true>
900   : public false_type {};
901 
902 template <typename _Tp>
903   struct __is_narrowing_conversion<_Tp, _Tp, true, true>
904   : public false_type {};
905 
906 template <typename _From, typename _To>
907   struct __is_narrowing_conversion<_From, _To, false, true>
908   : public negation<is_convertible<_From, _To>> {};
909 
910 // }}}
911 // __converts_to_higher_integer_rank{{{
912 template <typename _From, typename _To, bool = (sizeof(_From) < sizeof(_To))>
913   struct __converts_to_higher_integer_rank : public true_type {};
914 
915 // this may fail for char -> short if sizeof(char) == sizeof(short)
916 template <typename _From, typename _To>
917   struct __converts_to_higher_integer_rank<_From, _To, false>
918   : public is_same<decltype(declval<_From>() + declval<_To>()), _To> {};
919 
920 // }}}
921 // __data(simd/simd_mask) {{{
922 template <typename _Tp, typename _Ap>
923   _GLIBCXX_SIMD_INTRINSIC constexpr const auto&
924   __data(const simd<_Tp, _Ap>& __x);
925 
926 template <typename _Tp, typename _Ap>
927   _GLIBCXX_SIMD_INTRINSIC constexpr auto&
928   __data(simd<_Tp, _Ap>& __x);
929 
930 template <typename _Tp, typename _Ap>
931   _GLIBCXX_SIMD_INTRINSIC constexpr const auto&
932   __data(const simd_mask<_Tp, _Ap>& __x);
933 
934 template <typename _Tp, typename _Ap>
935   _GLIBCXX_SIMD_INTRINSIC constexpr auto&
936   __data(simd_mask<_Tp, _Ap>& __x);
937 
938 // }}}
939 // _SimdConverter {{{
940 template <typename _FromT, typename _FromA, typename _ToT, typename _ToA,
941 	  typename = void>
942   struct _SimdConverter;
943 
944 template <typename _Tp, typename _Ap>
945   struct _SimdConverter<_Tp, _Ap, _Tp, _Ap, void>
946   {
947     template <typename _Up>
948       _GLIBCXX_SIMD_INTRINSIC const _Up&
949       operator()(const _Up& __x)
950       { return __x; }
951   };
952 
953 // }}}
954 // __to_value_type_or_member_type {{{
955 template <typename _V>
956   _GLIBCXX_SIMD_INTRINSIC constexpr auto
957   __to_value_type_or_member_type(const _V& __x) -> decltype(__data(__x))
958   { return __data(__x); }
959 
960 template <typename _V>
961   _GLIBCXX_SIMD_INTRINSIC constexpr const typename _V::value_type&
962   __to_value_type_or_member_type(const typename _V::value_type& __x)
963   { return __x; }
964 
965 // }}}
966 // __bool_storage_member_type{{{
967 template <size_t _Size>
968   struct __bool_storage_member_type;
969 
970 template <size_t _Size>
971   using __bool_storage_member_type_t =
972     typename __bool_storage_member_type<_Size>::type;
973 
974 // }}}
975 // _SimdTuple {{{
976 // why not tuple?
977 // 1. tuple gives no guarantee about the storage order, but I require
978 // storage
979 //    equivalent to array<_Tp, _Np>
980 // 2. direct access to the element type (first template argument)
981 // 3. enforces equal element type, only different _Abi types are allowed
982 template <typename _Tp, typename... _Abis>
983   struct _SimdTuple;
984 
985 //}}}
986 // __fixed_size_storage_t {{{
987 template <typename _Tp, int _Np>
988   struct __fixed_size_storage;
989 
990 template <typename _Tp, int _Np>
991   using __fixed_size_storage_t = typename __fixed_size_storage<_Tp, _Np>::type;
992 
993 // }}}
994 // _SimdWrapper fwd decl{{{
995 template <typename _Tp, size_t _Size, typename = void_t<>>
996   struct _SimdWrapper;
997 
998 template <typename _Tp>
999   using _SimdWrapper8 = _SimdWrapper<_Tp, 8 / sizeof(_Tp)>;
1000 template <typename _Tp>
1001   using _SimdWrapper16 = _SimdWrapper<_Tp, 16 / sizeof(_Tp)>;
1002 template <typename _Tp>
1003   using _SimdWrapper32 = _SimdWrapper<_Tp, 32 / sizeof(_Tp)>;
1004 template <typename _Tp>
1005   using _SimdWrapper64 = _SimdWrapper<_Tp, 64 / sizeof(_Tp)>;
1006 
1007 // }}}
1008 // __is_simd_wrapper {{{
1009 template <typename _Tp>
1010   struct __is_simd_wrapper : false_type {};
1011 
1012 template <typename _Tp, size_t _Np>
1013   struct __is_simd_wrapper<_SimdWrapper<_Tp, _Np>> : true_type {};
1014 
1015 template <typename _Tp>
1016   inline constexpr bool __is_simd_wrapper_v = __is_simd_wrapper<_Tp>::value;
1017 
1018 // }}}
1019 // _BitOps {{{
1020 struct _BitOps
1021 {
1022   // _S_bit_iteration {{{
1023   template <typename _Tp, typename _Fp>
1024     static void
1025     _S_bit_iteration(_Tp __mask, _Fp&& __f)
1026     {
1027       static_assert(sizeof(_ULLong) >= sizeof(_Tp));
1028       conditional_t<sizeof(_Tp) <= sizeof(_UInt), _UInt, _ULLong> __k;
1029       if constexpr (is_convertible_v<_Tp, decltype(__k)>)
1030 	__k = __mask;
1031       else
1032 	__k = __mask.to_ullong();
1033       while(__k)
1034 	{
1035 	  __f(std::__countr_zero(__k));
1036 	  __k &= (__k - 1);
1037 	}
1038     }
1039 
1040   //}}}
1041 };
1042 
1043 //}}}
1044 // __increment, __decrement {{{
1045 template <typename _Tp = void>
1046   struct __increment
1047   { constexpr _Tp operator()(_Tp __a) const { return ++__a; } };
1048 
1049 template <>
1050   struct __increment<void>
1051   {
1052     template <typename _Tp>
1053       constexpr _Tp
1054       operator()(_Tp __a) const
1055       { return ++__a; }
1056   };
1057 
1058 template <typename _Tp = void>
1059   struct __decrement
1060   { constexpr _Tp operator()(_Tp __a) const { return --__a; } };
1061 
1062 template <>
1063   struct __decrement<void>
1064   {
1065     template <typename _Tp>
1066       constexpr _Tp
1067       operator()(_Tp __a) const
1068       { return --__a; }
1069   };
1070 
1071 // }}}
1072 // _ValuePreserving(OrInt) {{{
1073 template <typename _From, typename _To,
1074 	  typename = enable_if_t<negation<
1075 	    __is_narrowing_conversion<__remove_cvref_t<_From>, _To>>::value>>
1076   using _ValuePreserving = _From;
1077 
1078 template <typename _From, typename _To,
1079 	  typename _DecayedFrom = __remove_cvref_t<_From>,
1080 	  typename = enable_if_t<conjunction<
1081 	    is_convertible<_From, _To>,
1082 	    disjunction<
1083 	      is_same<_DecayedFrom, _To>, is_same<_DecayedFrom, int>,
1084 	      conjunction<is_same<_DecayedFrom, _UInt>, is_unsigned<_To>>,
1085 	      negation<__is_narrowing_conversion<_DecayedFrom, _To>>>>::value>>
1086   using _ValuePreservingOrInt = _From;
1087 
1088 // }}}
1089 // __intrinsic_type {{{
1090 template <typename _Tp, size_t _Bytes, typename = void_t<>>
1091   struct __intrinsic_type;
1092 
1093 template <typename _Tp, size_t _Size>
1094   using __intrinsic_type_t =
1095     typename __intrinsic_type<_Tp, _Size * sizeof(_Tp)>::type;
1096 
1097 template <typename _Tp>
1098   using __intrinsic_type2_t = typename __intrinsic_type<_Tp, 2>::type;
1099 template <typename _Tp>
1100   using __intrinsic_type4_t = typename __intrinsic_type<_Tp, 4>::type;
1101 template <typename _Tp>
1102   using __intrinsic_type8_t = typename __intrinsic_type<_Tp, 8>::type;
1103 template <typename _Tp>
1104   using __intrinsic_type16_t = typename __intrinsic_type<_Tp, 16>::type;
1105 template <typename _Tp>
1106   using __intrinsic_type32_t = typename __intrinsic_type<_Tp, 32>::type;
1107 template <typename _Tp>
1108   using __intrinsic_type64_t = typename __intrinsic_type<_Tp, 64>::type;
1109 
1110 // }}}
1111 // _BitMask {{{
1112 template <size_t _Np, bool _Sanitized = false>
1113   struct _BitMask;
1114 
1115 template <size_t _Np, bool _Sanitized>
1116   struct __is_bitmask<_BitMask<_Np, _Sanitized>, void> : true_type {};
1117 
1118 template <size_t _Np>
1119   using _SanitizedBitMask = _BitMask<_Np, true>;
1120 
1121 template <size_t _Np, bool _Sanitized>
1122   struct _BitMask
1123   {
1124     static_assert(_Np > 0);
1125 
1126     static constexpr size_t _NBytes = __div_roundup(_Np, __CHAR_BIT__);
1127 
1128     using _Tp = conditional_t<_Np == 1, bool,
1129 			      make_unsigned_t<__int_with_sizeof_t<std::min(
1130 				sizeof(_ULLong), std::__bit_ceil(_NBytes))>>>;
1131 
1132     static constexpr int _S_array_size = __div_roundup(_NBytes, sizeof(_Tp));
1133 
1134     _Tp _M_bits[_S_array_size];
1135 
1136     static constexpr int _S_unused_bits
1137       = _Np == 1 ? 0 : _S_array_size * sizeof(_Tp) * __CHAR_BIT__ - _Np;
1138 
1139     static constexpr _Tp _S_bitmask = +_Tp(~_Tp()) >> _S_unused_bits;
1140 
1141     constexpr _BitMask() noexcept = default;
1142 
1143     constexpr _BitMask(unsigned long long __x) noexcept
1144       : _M_bits{static_cast<_Tp>(__x)} {}
1145 
1146     _BitMask(bitset<_Np> __x) noexcept : _BitMask(__x.to_ullong()) {}
1147 
1148     constexpr _BitMask(const _BitMask&) noexcept = default;
1149 
1150     template <bool _RhsSanitized, typename = enable_if_t<_RhsSanitized == false
1151 							 && _Sanitized == true>>
1152       constexpr _BitMask(const _BitMask<_Np, _RhsSanitized>& __rhs) noexcept
1153 	: _BitMask(__rhs._M_sanitized()) {}
1154 
1155     constexpr operator _SimdWrapper<bool, _Np>() const noexcept
1156     {
1157       static_assert(_S_array_size == 1);
1158       return _M_bits[0];
1159     }
1160 
1161     // precondition: is sanitized
1162     constexpr _Tp
1163     _M_to_bits() const noexcept
1164     {
1165       static_assert(_S_array_size == 1);
1166       return _M_bits[0];
1167     }
1168 
1169     // precondition: is sanitized
1170     constexpr unsigned long long
1171     to_ullong() const noexcept
1172     {
1173       static_assert(_S_array_size == 1);
1174       return _M_bits[0];
1175     }
1176 
1177     // precondition: is sanitized
1178     constexpr unsigned long
1179     to_ulong() const noexcept
1180     {
1181       static_assert(_S_array_size == 1);
1182       return _M_bits[0];
1183     }
1184 
1185     constexpr bitset<_Np>
1186     _M_to_bitset() const noexcept
1187     {
1188       static_assert(_S_array_size == 1);
1189       return _M_bits[0];
1190     }
1191 
1192     constexpr decltype(auto)
1193     _M_sanitized() const noexcept
1194     {
1195       if constexpr (_Sanitized)
1196 	return *this;
1197       else if constexpr (_Np == 1)
1198 	return _SanitizedBitMask<_Np>(_M_bits[0]);
1199       else
1200 	{
1201 	  _SanitizedBitMask<_Np> __r = {};
1202 	  for (int __i = 0; __i < _S_array_size; ++__i)
1203 	    __r._M_bits[__i] = _M_bits[__i];
1204 	  if constexpr (_S_unused_bits > 0)
1205 	    __r._M_bits[_S_array_size - 1] &= _S_bitmask;
1206 	  return __r;
1207 	}
1208     }
1209 
1210     template <size_t _Mp, bool _LSanitized>
1211       constexpr _BitMask<_Np + _Mp, _Sanitized>
1212       _M_prepend(_BitMask<_Mp, _LSanitized> __lsb) const noexcept
1213       {
1214 	constexpr size_t _RN = _Np + _Mp;
1215 	using _Rp = _BitMask<_RN, _Sanitized>;
1216 	if constexpr (_Rp::_S_array_size == 1)
1217 	  {
1218 	    _Rp __r{{_M_bits[0]}};
1219 	    __r._M_bits[0] <<= _Mp;
1220 	    __r._M_bits[0] |= __lsb._M_sanitized()._M_bits[0];
1221 	    return __r;
1222 	  }
1223 	else
1224 	  __assert_unreachable<_Rp>();
1225       }
1226 
1227     // Return a new _BitMask with size _NewSize while dropping _DropLsb least
1228     // significant bits. If the operation implicitly produces a sanitized bitmask,
1229     // the result type will have _Sanitized set.
1230     template <size_t _DropLsb, size_t _NewSize = _Np - _DropLsb>
1231       constexpr auto
1232       _M_extract() const noexcept
1233       {
1234 	static_assert(_Np > _DropLsb);
1235 	static_assert(_DropLsb + _NewSize <= sizeof(_ULLong) * __CHAR_BIT__,
1236 		      "not implemented for bitmasks larger than one ullong");
1237 	if constexpr (_NewSize == 1)
1238 	  // must sanitize because the return _Tp is bool
1239 	  return _SanitizedBitMask<1>(_M_bits[0] & (_Tp(1) << _DropLsb));
1240 	else
1241 	  return _BitMask<_NewSize,
1242 			  ((_NewSize + _DropLsb == sizeof(_Tp) * __CHAR_BIT__
1243 			    && _NewSize + _DropLsb <= _Np)
1244 			   || ((_Sanitized || _Np == sizeof(_Tp) * __CHAR_BIT__)
1245 			       && _NewSize + _DropLsb >= _Np))>(_M_bits[0]
1246 								>> _DropLsb);
1247       }
1248 
1249     // True if all bits are set. Implicitly sanitizes if _Sanitized == false.
1250     constexpr bool
1251     all() const noexcept
1252     {
1253       if constexpr (_Np == 1)
1254 	return _M_bits[0];
1255       else if constexpr (!_Sanitized)
1256 	return _M_sanitized().all();
1257       else
1258 	{
1259 	  constexpr _Tp __allbits = ~_Tp();
1260 	  for (int __i = 0; __i < _S_array_size - 1; ++__i)
1261 	    if (_M_bits[__i] != __allbits)
1262 	      return false;
1263 	  return _M_bits[_S_array_size - 1] == _S_bitmask;
1264 	}
1265     }
1266 
1267     // True if at least one bit is set. Implicitly sanitizes if _Sanitized ==
1268     // false.
1269     constexpr bool
1270     any() const noexcept
1271     {
1272       if constexpr (_Np == 1)
1273 	return _M_bits[0];
1274       else if constexpr (!_Sanitized)
1275 	return _M_sanitized().any();
1276       else
1277 	{
1278 	  for (int __i = 0; __i < _S_array_size - 1; ++__i)
1279 	    if (_M_bits[__i] != 0)
1280 	      return true;
1281 	  return _M_bits[_S_array_size - 1] != 0;
1282 	}
1283     }
1284 
1285     // True if no bit is set. Implicitly sanitizes if _Sanitized == false.
1286     constexpr bool
1287     none() const noexcept
1288     {
1289       if constexpr (_Np == 1)
1290 	return !_M_bits[0];
1291       else if constexpr (!_Sanitized)
1292 	return _M_sanitized().none();
1293       else
1294 	{
1295 	  for (int __i = 0; __i < _S_array_size - 1; ++__i)
1296 	    if (_M_bits[__i] != 0)
1297 	      return false;
1298 	  return _M_bits[_S_array_size - 1] == 0;
1299 	}
1300     }
1301 
1302     // Returns the number of set bits. Implicitly sanitizes if _Sanitized ==
1303     // false.
1304     constexpr int
1305     count() const noexcept
1306     {
1307       if constexpr (_Np == 1)
1308 	return _M_bits[0];
1309       else if constexpr (!_Sanitized)
1310 	return _M_sanitized().none();
1311       else
1312 	{
1313 	  int __result = __builtin_popcountll(_M_bits[0]);
1314 	  for (int __i = 1; __i < _S_array_size; ++__i)
1315 	    __result += __builtin_popcountll(_M_bits[__i]);
1316 	  return __result;
1317 	}
1318     }
1319 
1320     // Returns the bit at offset __i as bool.
1321     constexpr bool
1322     operator[](size_t __i) const noexcept
1323     {
1324       if constexpr (_Np == 1)
1325 	return _M_bits[0];
1326       else if constexpr (_S_array_size == 1)
1327 	return (_M_bits[0] >> __i) & 1;
1328       else
1329 	{
1330 	  const size_t __j = __i / (sizeof(_Tp) * __CHAR_BIT__);
1331 	  const size_t __shift = __i % (sizeof(_Tp) * __CHAR_BIT__);
1332 	  return (_M_bits[__j] >> __shift) & 1;
1333 	}
1334     }
1335 
1336     template <size_t __i>
1337       constexpr bool
1338       operator[](_SizeConstant<__i>) const noexcept
1339       {
1340 	static_assert(__i < _Np);
1341 	constexpr size_t __j = __i / (sizeof(_Tp) * __CHAR_BIT__);
1342 	constexpr size_t __shift = __i % (sizeof(_Tp) * __CHAR_BIT__);
1343 	return static_cast<bool>(_M_bits[__j] & (_Tp(1) << __shift));
1344       }
1345 
1346     // Set the bit at offset __i to __x.
1347     constexpr void
1348     set(size_t __i, bool __x) noexcept
1349     {
1350       if constexpr (_Np == 1)
1351 	_M_bits[0] = __x;
1352       else if constexpr (_S_array_size == 1)
1353 	{
1354 	  _M_bits[0] &= ~_Tp(_Tp(1) << __i);
1355 	  _M_bits[0] |= _Tp(_Tp(__x) << __i);
1356 	}
1357       else
1358 	{
1359 	  const size_t __j = __i / (sizeof(_Tp) * __CHAR_BIT__);
1360 	  const size_t __shift = __i % (sizeof(_Tp) * __CHAR_BIT__);
1361 	  _M_bits[__j] &= ~_Tp(_Tp(1) << __shift);
1362 	  _M_bits[__j] |= _Tp(_Tp(__x) << __shift);
1363 	}
1364     }
1365 
1366     template <size_t __i>
1367       constexpr void
1368       set(_SizeConstant<__i>, bool __x) noexcept
1369       {
1370 	static_assert(__i < _Np);
1371 	if constexpr (_Np == 1)
1372 	  _M_bits[0] = __x;
1373 	else
1374 	  {
1375 	    constexpr size_t __j = __i / (sizeof(_Tp) * __CHAR_BIT__);
1376 	    constexpr size_t __shift = __i % (sizeof(_Tp) * __CHAR_BIT__);
1377 	    constexpr _Tp __mask = ~_Tp(_Tp(1) << __shift);
1378 	    _M_bits[__j] &= __mask;
1379 	    _M_bits[__j] |= _Tp(_Tp(__x) << __shift);
1380 	  }
1381       }
1382 
1383     // Inverts all bits. Sanitized input leads to sanitized output.
1384     constexpr _BitMask
1385     operator~() const noexcept
1386     {
1387       if constexpr (_Np == 1)
1388 	return !_M_bits[0];
1389       else
1390 	{
1391 	  _BitMask __result{};
1392 	  for (int __i = 0; __i < _S_array_size - 1; ++__i)
1393 	    __result._M_bits[__i] = ~_M_bits[__i];
1394 	  if constexpr (_Sanitized)
1395 	    __result._M_bits[_S_array_size - 1]
1396 	      = _M_bits[_S_array_size - 1] ^ _S_bitmask;
1397 	  else
1398 	    __result._M_bits[_S_array_size - 1] = ~_M_bits[_S_array_size - 1];
1399 	  return __result;
1400 	}
1401     }
1402 
1403     constexpr _BitMask&
1404     operator^=(const _BitMask& __b) & noexcept
1405     {
1406       __execute_n_times<_S_array_size>(
1407 	[&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { _M_bits[__i] ^= __b._M_bits[__i]; });
1408       return *this;
1409     }
1410 
1411     constexpr _BitMask&
1412     operator|=(const _BitMask& __b) & noexcept
1413     {
1414       __execute_n_times<_S_array_size>(
1415 	[&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { _M_bits[__i] |= __b._M_bits[__i]; });
1416       return *this;
1417     }
1418 
1419     constexpr _BitMask&
1420     operator&=(const _BitMask& __b) & noexcept
1421     {
1422       __execute_n_times<_S_array_size>(
1423 	[&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { _M_bits[__i] &= __b._M_bits[__i]; });
1424       return *this;
1425     }
1426 
1427     friend constexpr _BitMask
1428     operator^(const _BitMask& __a, const _BitMask& __b) noexcept
1429     {
1430       _BitMask __r = __a;
1431       __r ^= __b;
1432       return __r;
1433     }
1434 
1435     friend constexpr _BitMask
1436     operator|(const _BitMask& __a, const _BitMask& __b) noexcept
1437     {
1438       _BitMask __r = __a;
1439       __r |= __b;
1440       return __r;
1441     }
1442 
1443     friend constexpr _BitMask
1444     operator&(const _BitMask& __a, const _BitMask& __b) noexcept
1445     {
1446       _BitMask __r = __a;
1447       __r &= __b;
1448       return __r;
1449     }
1450 
1451     _GLIBCXX_SIMD_INTRINSIC
1452     constexpr bool
1453     _M_is_constprop() const
1454     {
1455       if constexpr (_S_array_size == 0)
1456 	return __builtin_constant_p(_M_bits[0]);
1457       else
1458 	{
1459 	  for (int __i = 0; __i < _S_array_size; ++__i)
1460 	    if (!__builtin_constant_p(_M_bits[__i]))
1461 	      return false;
1462 	  return true;
1463 	}
1464     }
1465   };
1466 
1467 // }}}
1468 
1469 // vvv ---- builtin vector types [[gnu::vector_size(N)]] and operations ---- vvv
1470 // __min_vector_size {{{
1471 template <typename _Tp = void>
1472   static inline constexpr int __min_vector_size = 2 * sizeof(_Tp);
1473 
1474 #if _GLIBCXX_SIMD_HAVE_NEON
1475 template <>
1476   inline constexpr int __min_vector_size<void> = 8;
1477 #else
1478 template <>
1479   inline constexpr int __min_vector_size<void> = 16;
1480 #endif
1481 
1482 // }}}
1483 // __vector_type {{{
1484 template <typename _Tp, size_t _Np, typename = void>
1485   struct __vector_type_n {};
1486 
1487 // substition failure for 0-element case
1488 template <typename _Tp>
1489   struct __vector_type_n<_Tp, 0, void> {};
1490 
1491 // special case 1-element to be _Tp itself
1492 template <typename _Tp>
1493   struct __vector_type_n<_Tp, 1, enable_if_t<__is_vectorizable_v<_Tp>>>
1494   { using type = _Tp; };
1495 
1496 // else, use GNU-style builtin vector types
1497 template <typename _Tp, size_t _Np>
1498   struct __vector_type_n<_Tp, _Np, enable_if_t<__is_vectorizable_v<_Tp> && _Np >= 2>>
1499   {
1500     static constexpr size_t _S_Np2 = std::__bit_ceil(_Np * sizeof(_Tp));
1501 
1502     static constexpr size_t _S_Bytes =
1503 #ifdef __i386__
1504       // Using [[gnu::vector_size(8)]] would wreak havoc on the FPU because
1505       // those objects are passed via MMX registers and nothing ever calls EMMS.
1506       _S_Np2 == 8 ? 16 :
1507 #endif
1508       _S_Np2 < __min_vector_size<_Tp> ? __min_vector_size<_Tp>
1509 				      : _S_Np2;
1510 
1511     using type [[__gnu__::__vector_size__(_S_Bytes)]] = _Tp;
1512   };
1513 
1514 template <typename _Tp, size_t _Bytes, size_t = _Bytes % sizeof(_Tp)>
1515   struct __vector_type;
1516 
1517 template <typename _Tp, size_t _Bytes>
1518   struct __vector_type<_Tp, _Bytes, 0>
1519   : __vector_type_n<_Tp, _Bytes / sizeof(_Tp)> {};
1520 
1521 template <typename _Tp, size_t _Size>
1522   using __vector_type_t = typename __vector_type_n<_Tp, _Size>::type;
1523 
1524 template <typename _Tp>
1525   using __vector_type2_t = typename __vector_type<_Tp, 2>::type;
1526 template <typename _Tp>
1527   using __vector_type4_t = typename __vector_type<_Tp, 4>::type;
1528 template <typename _Tp>
1529   using __vector_type8_t = typename __vector_type<_Tp, 8>::type;
1530 template <typename _Tp>
1531   using __vector_type16_t = typename __vector_type<_Tp, 16>::type;
1532 template <typename _Tp>
1533   using __vector_type32_t = typename __vector_type<_Tp, 32>::type;
1534 template <typename _Tp>
1535   using __vector_type64_t = typename __vector_type<_Tp, 64>::type;
1536 
1537 // }}}
1538 // __is_vector_type {{{
1539 template <typename _Tp, typename = void_t<>>
1540   struct __is_vector_type : false_type {};
1541 
1542 template <typename _Tp>
1543   struct __is_vector_type<
1544     _Tp, void_t<typename __vector_type<
1545 	   remove_reference_t<decltype(declval<_Tp>()[0])>, sizeof(_Tp)>::type>>
1546     : is_same<_Tp, typename __vector_type<
1547 		     remove_reference_t<decltype(declval<_Tp>()[0])>,
1548 		     sizeof(_Tp)>::type> {};
1549 
1550 template <typename _Tp>
1551   inline constexpr bool __is_vector_type_v = __is_vector_type<_Tp>::value;
1552 
1553 // }}}
1554 // __is_intrinsic_type {{{
1555 #if _GLIBCXX_SIMD_HAVE_SSE_ABI
1556 template <typename _Tp>
1557   using __is_intrinsic_type = __is_vector_type<_Tp>;
1558 #else // not SSE (x86)
1559 template <typename _Tp, typename = void_t<>>
1560   struct __is_intrinsic_type : false_type {};
1561 
1562 template <typename _Tp>
1563   struct __is_intrinsic_type<
1564     _Tp, void_t<typename __intrinsic_type<
1565 	   remove_reference_t<decltype(declval<_Tp>()[0])>, sizeof(_Tp)>::type>>
1566     : is_same<_Tp, typename __intrinsic_type<
1567 		     remove_reference_t<decltype(declval<_Tp>()[0])>,
1568 		     sizeof(_Tp)>::type> {};
1569 #endif
1570 
1571 template <typename _Tp>
1572   inline constexpr bool __is_intrinsic_type_v = __is_intrinsic_type<_Tp>::value;
1573 
1574 // }}}
1575 // _VectorTraits{{{
1576 template <typename _Tp, typename = void_t<>>
1577   struct _VectorTraitsImpl;
1578 
1579 template <typename _Tp>
1580   struct _VectorTraitsImpl<_Tp, enable_if_t<__is_vector_type_v<_Tp>
1581 					      || __is_intrinsic_type_v<_Tp>>>
1582   {
1583     using type = _Tp;
1584     using value_type = remove_reference_t<decltype(declval<_Tp>()[0])>;
1585     static constexpr int _S_full_size = sizeof(_Tp) / sizeof(value_type);
1586     using _Wrapper = _SimdWrapper<value_type, _S_full_size>;
1587     template <typename _Up, int _W = _S_full_size>
1588       static constexpr bool _S_is
1589 	= is_same_v<value_type, _Up> && _W == _S_full_size;
1590   };
1591 
1592 template <typename _Tp, size_t _Np>
1593   struct _VectorTraitsImpl<_SimdWrapper<_Tp, _Np>,
1594 			   void_t<__vector_type_t<_Tp, _Np>>>
1595   {
1596     using type = __vector_type_t<_Tp, _Np>;
1597     using value_type = _Tp;
1598     static constexpr int _S_full_size = sizeof(type) / sizeof(value_type);
1599     using _Wrapper = _SimdWrapper<_Tp, _Np>;
1600     static constexpr bool _S_is_partial = (_Np == _S_full_size);
1601     static constexpr int _S_partial_width = _Np;
1602     template <typename _Up, int _W = _S_full_size>
1603       static constexpr bool _S_is
1604 	= is_same_v<value_type, _Up>&& _W == _S_full_size;
1605   };
1606 
1607 template <typename _Tp, typename = typename _VectorTraitsImpl<_Tp>::type>
1608   using _VectorTraits = _VectorTraitsImpl<_Tp>;
1609 
1610 // }}}
1611 // __as_vector{{{
1612 template <typename _V>
1613   _GLIBCXX_SIMD_INTRINSIC constexpr auto
1614   __as_vector(_V __x)
1615   {
1616     if constexpr (__is_vector_type_v<_V>)
1617       return __x;
1618     else if constexpr (is_simd<_V>::value || is_simd_mask<_V>::value)
1619       return __data(__x)._M_data;
1620     else if constexpr (__is_vectorizable_v<_V>)
1621       return __vector_type_t<_V, 2>{__x};
1622     else
1623       return __x._M_data;
1624   }
1625 
1626 // }}}
1627 // __as_wrapper{{{
1628 template <size_t _Np = 0, typename _V>
1629   _GLIBCXX_SIMD_INTRINSIC constexpr auto
1630   __as_wrapper(_V __x)
1631   {
1632     if constexpr (__is_vector_type_v<_V>)
1633       return _SimdWrapper<typename _VectorTraits<_V>::value_type,
1634 			  (_Np > 0 ? _Np : _VectorTraits<_V>::_S_full_size)>(__x);
1635     else if constexpr (is_simd<_V>::value || is_simd_mask<_V>::value)
1636       {
1637 	static_assert(_V::size() == _Np);
1638 	return __data(__x);
1639       }
1640     else
1641       {
1642 	static_assert(_V::_S_size == _Np);
1643 	return __x;
1644       }
1645   }
1646 
1647 // }}}
1648 // __intrin_bitcast{{{
1649 template <typename _To, typename _From>
1650   _GLIBCXX_SIMD_INTRINSIC constexpr _To
1651   __intrin_bitcast(_From __v)
1652   {
1653     static_assert((__is_vector_type_v<_From> || __is_intrinsic_type_v<_From>)
1654 		    && (__is_vector_type_v<_To> || __is_intrinsic_type_v<_To>));
1655     if constexpr (sizeof(_To) == sizeof(_From))
1656       return reinterpret_cast<_To>(__v);
1657     else if constexpr (sizeof(_From) > sizeof(_To))
1658       if constexpr (sizeof(_To) >= 16)
1659 	return reinterpret_cast<const __may_alias<_To>&>(__v);
1660       else
1661 	{
1662 	  _To __r;
1663 	  __builtin_memcpy(&__r, &__v, sizeof(_To));
1664 	  return __r;
1665 	}
1666 #if _GLIBCXX_SIMD_X86INTRIN && !defined __clang__
1667     else if constexpr (__have_avx && sizeof(_From) == 16 && sizeof(_To) == 32)
1668       return reinterpret_cast<_To>(__builtin_ia32_ps256_ps(
1669 	reinterpret_cast<__vector_type_t<float, 4>>(__v)));
1670     else if constexpr (__have_avx512f && sizeof(_From) == 16
1671 		       && sizeof(_To) == 64)
1672       return reinterpret_cast<_To>(__builtin_ia32_ps512_ps(
1673 	reinterpret_cast<__vector_type_t<float, 4>>(__v)));
1674     else if constexpr (__have_avx512f && sizeof(_From) == 32
1675 		       && sizeof(_To) == 64)
1676       return reinterpret_cast<_To>(__builtin_ia32_ps512_256ps(
1677 	reinterpret_cast<__vector_type_t<float, 8>>(__v)));
1678 #endif // _GLIBCXX_SIMD_X86INTRIN
1679     else if constexpr (sizeof(__v) <= 8)
1680       return reinterpret_cast<_To>(
1681 	__vector_type_t<__int_for_sizeof_t<_From>, sizeof(_To) / sizeof(_From)>{
1682 	  reinterpret_cast<__int_for_sizeof_t<_From>>(__v)});
1683     else
1684       {
1685 	static_assert(sizeof(_To) > sizeof(_From));
1686 	_To __r = {};
1687 	__builtin_memcpy(&__r, &__v, sizeof(_From));
1688 	return __r;
1689       }
1690   }
1691 
1692 // }}}
1693 // __vector_bitcast{{{
1694 template <typename _To, size_t _NN = 0, typename _From,
1695 	  typename _FromVT = _VectorTraits<_From>,
1696 	  size_t _Np = _NN == 0 ? sizeof(_From) / sizeof(_To) : _NN>
1697   _GLIBCXX_SIMD_INTRINSIC constexpr __vector_type_t<_To, _Np>
1698   __vector_bitcast(_From __x)
1699   {
1700     using _R = __vector_type_t<_To, _Np>;
1701     return __intrin_bitcast<_R>(__x);
1702   }
1703 
1704 template <typename _To, size_t _NN = 0, typename _Tp, size_t _Nx,
1705 	  size_t _Np
1706 	  = _NN == 0 ? sizeof(_SimdWrapper<_Tp, _Nx>) / sizeof(_To) : _NN>
1707   _GLIBCXX_SIMD_INTRINSIC constexpr __vector_type_t<_To, _Np>
1708   __vector_bitcast(const _SimdWrapper<_Tp, _Nx>& __x)
1709   {
1710     static_assert(_Np > 1);
1711     return __intrin_bitcast<__vector_type_t<_To, _Np>>(__x._M_data);
1712   }
1713 
1714 // }}}
1715 // __convert_x86 declarations {{{
1716 #ifdef _GLIBCXX_SIMD_WORKAROUND_PR85048
1717 template <typename _To, typename _Tp, typename _TVT = _VectorTraits<_Tp>>
1718   _To __convert_x86(_Tp);
1719 
1720 template <typename _To, typename _Tp, typename _TVT = _VectorTraits<_Tp>>
1721   _To __convert_x86(_Tp, _Tp);
1722 
1723 template <typename _To, typename _Tp, typename _TVT = _VectorTraits<_Tp>>
1724   _To __convert_x86(_Tp, _Tp, _Tp, _Tp);
1725 
1726 template <typename _To, typename _Tp, typename _TVT = _VectorTraits<_Tp>>
1727   _To __convert_x86(_Tp, _Tp, _Tp, _Tp, _Tp, _Tp, _Tp, _Tp);
1728 
1729 template <typename _To, typename _Tp, typename _TVT = _VectorTraits<_Tp>>
1730   _To __convert_x86(_Tp, _Tp, _Tp, _Tp, _Tp, _Tp, _Tp, _Tp, _Tp, _Tp, _Tp, _Tp,
1731 		    _Tp, _Tp, _Tp, _Tp);
1732 #endif // _GLIBCXX_SIMD_WORKAROUND_PR85048
1733 
1734 //}}}
1735 // __bit_cast {{{
1736 template <typename _To, typename _From>
1737   _GLIBCXX_SIMD_INTRINSIC constexpr _To
1738   __bit_cast(const _From __x)
1739   {
1740 #if __has_builtin(__builtin_bit_cast)
1741     return __builtin_bit_cast(_To, __x);
1742 #else
1743     static_assert(sizeof(_To) == sizeof(_From));
1744     constexpr bool __to_is_vectorizable
1745       = is_arithmetic_v<_To> || is_enum_v<_To>;
1746     constexpr bool __from_is_vectorizable
1747       = is_arithmetic_v<_From> || is_enum_v<_From>;
1748     if constexpr (__is_vector_type_v<_To> && __is_vector_type_v<_From>)
1749       return reinterpret_cast<_To>(__x);
1750     else if constexpr (__is_vector_type_v<_To> && __from_is_vectorizable)
1751       {
1752 	using _FV [[__gnu__::__vector_size__(sizeof(_From))]] = _From;
1753 	return reinterpret_cast<_To>(_FV{__x});
1754       }
1755     else if constexpr (__to_is_vectorizable && __from_is_vectorizable)
1756       {
1757 	using _TV [[__gnu__::__vector_size__(sizeof(_To))]] = _To;
1758 	using _FV [[__gnu__::__vector_size__(sizeof(_From))]] = _From;
1759 	return reinterpret_cast<_TV>(_FV{__x})[0];
1760       }
1761     else if constexpr (__to_is_vectorizable && __is_vector_type_v<_From>)
1762       {
1763 	using _TV [[__gnu__::__vector_size__(sizeof(_To))]] = _To;
1764 	return reinterpret_cast<_TV>(__x)[0];
1765       }
1766     else
1767       {
1768 	_To __r;
1769 	__builtin_memcpy(reinterpret_cast<char*>(&__r),
1770 			 reinterpret_cast<const char*>(&__x), sizeof(_To));
1771 	return __r;
1772       }
1773 #endif
1774   }
1775 
1776 // }}}
1777 // __to_intrin {{{
1778 template <typename _Tp, typename _TVT = _VectorTraits<_Tp>,
1779 	  typename _R = __intrinsic_type_t<typename _TVT::value_type, _TVT::_S_full_size>>
1780   _GLIBCXX_SIMD_INTRINSIC constexpr _R
1781   __to_intrin(_Tp __x)
1782   {
1783     static_assert(sizeof(__x) <= sizeof(_R),
1784 		  "__to_intrin may never drop values off the end");
1785     if constexpr (sizeof(__x) == sizeof(_R))
1786       return reinterpret_cast<_R>(__as_vector(__x));
1787     else
1788       {
1789 	using _Up = __int_for_sizeof_t<_Tp>;
1790 	return reinterpret_cast<_R>(
1791 	  __vector_type_t<_Up, sizeof(_R) / sizeof(_Up)>{__bit_cast<_Up>(__x)});
1792       }
1793   }
1794 
1795 // }}}
1796 // __make_vector{{{
1797 template <typename _Tp, typename... _Args>
1798   _GLIBCXX_SIMD_INTRINSIC constexpr __vector_type_t<_Tp, sizeof...(_Args)>
1799   __make_vector(const _Args&... __args)
1800   { return __vector_type_t<_Tp, sizeof...(_Args)>{static_cast<_Tp>(__args)...}; }
1801 
1802 // }}}
1803 // __vector_broadcast{{{
1804 template <size_t _Np, typename _Tp, size_t... _I>
1805   _GLIBCXX_SIMD_INTRINSIC constexpr __vector_type_t<_Tp, _Np>
1806   __vector_broadcast_impl(_Tp __x, index_sequence<_I...>)
1807   { return __vector_type_t<_Tp, _Np>{((void)_I, __x)...}; }
1808 
1809 template <size_t _Np, typename _Tp>
1810   _GLIBCXX_SIMD_INTRINSIC constexpr __vector_type_t<_Tp, _Np>
1811   __vector_broadcast(_Tp __x)
1812   { return __vector_broadcast_impl<_Np, _Tp>(__x, make_index_sequence<_Np>()); }
1813 
1814 // }}}
1815 // __generate_vector{{{
1816   template <typename _Tp, size_t _Np, typename _Gp, size_t... _I>
1817   _GLIBCXX_SIMD_INTRINSIC constexpr __vector_type_t<_Tp, _Np>
1818   __generate_vector_impl(_Gp&& __gen, index_sequence<_I...>)
1819   { return __vector_type_t<_Tp, _Np>{ static_cast<_Tp>(__gen(_SizeConstant<_I>()))...}; }
1820 
1821 template <typename _V, typename _VVT = _VectorTraits<_V>, typename _Gp>
1822   _GLIBCXX_SIMD_INTRINSIC constexpr _V
1823   __generate_vector(_Gp&& __gen)
1824   {
1825     if constexpr (__is_vector_type_v<_V>)
1826       return __generate_vector_impl<typename _VVT::value_type,
1827 				    _VVT::_S_full_size>(
1828 	static_cast<_Gp&&>(__gen), make_index_sequence<_VVT::_S_full_size>());
1829     else
1830       return __generate_vector_impl<typename _VVT::value_type,
1831 				    _VVT::_S_partial_width>(
1832 	static_cast<_Gp&&>(__gen),
1833 	make_index_sequence<_VVT::_S_partial_width>());
1834   }
1835 
1836 template <typename _Tp, size_t _Np, typename _Gp>
1837   _GLIBCXX_SIMD_INTRINSIC constexpr __vector_type_t<_Tp, _Np>
1838   __generate_vector(_Gp&& __gen)
1839   {
1840     return __generate_vector_impl<_Tp, _Np>(static_cast<_Gp&&>(__gen),
1841 					    make_index_sequence<_Np>());
1842   }
1843 
1844 // }}}
1845 // __xor{{{
1846 template <typename _TW>
1847   _GLIBCXX_SIMD_INTRINSIC constexpr _TW
1848   __xor(_TW __a, _TW __b) noexcept
1849   {
1850     if constexpr (__is_vector_type_v<_TW> || __is_simd_wrapper_v<_TW>)
1851       {
1852 	using _Tp = typename conditional_t<__is_simd_wrapper_v<_TW>, _TW,
1853 					   _VectorTraitsImpl<_TW>>::value_type;
1854 	if constexpr (is_floating_point_v<_Tp>)
1855 	  {
1856 	    using _Ip = make_unsigned_t<__int_for_sizeof_t<_Tp>>;
1857 	    return __vector_bitcast<_Tp>(__vector_bitcast<_Ip>(__a)
1858 					 ^ __vector_bitcast<_Ip>(__b));
1859 	  }
1860 	else if constexpr (__is_vector_type_v<_TW>)
1861 	  return __a ^ __b;
1862 	else
1863 	  return __a._M_data ^ __b._M_data;
1864       }
1865     else
1866       return __a ^ __b;
1867   }
1868 
1869 // }}}
1870 // __or{{{
1871 template <typename _TW>
1872   _GLIBCXX_SIMD_INTRINSIC constexpr _TW
1873   __or(_TW __a, _TW __b) noexcept
1874   {
1875     if constexpr (__is_vector_type_v<_TW> || __is_simd_wrapper_v<_TW>)
1876       {
1877 	using _Tp = typename conditional_t<__is_simd_wrapper_v<_TW>, _TW,
1878 					   _VectorTraitsImpl<_TW>>::value_type;
1879 	if constexpr (is_floating_point_v<_Tp>)
1880 	  {
1881 	    using _Ip = make_unsigned_t<__int_for_sizeof_t<_Tp>>;
1882 	    return __vector_bitcast<_Tp>(__vector_bitcast<_Ip>(__a)
1883 					 | __vector_bitcast<_Ip>(__b));
1884 	  }
1885 	else if constexpr (__is_vector_type_v<_TW>)
1886 	  return __a | __b;
1887 	else
1888 	  return __a._M_data | __b._M_data;
1889       }
1890     else
1891       return __a | __b;
1892   }
1893 
1894 // }}}
1895 // __and{{{
1896 template <typename _TW>
1897   _GLIBCXX_SIMD_INTRINSIC constexpr _TW
1898   __and(_TW __a, _TW __b) noexcept
1899   {
1900     if constexpr (__is_vector_type_v<_TW> || __is_simd_wrapper_v<_TW>)
1901       {
1902 	using _Tp = typename conditional_t<__is_simd_wrapper_v<_TW>, _TW,
1903 					   _VectorTraitsImpl<_TW>>::value_type;
1904 	if constexpr (is_floating_point_v<_Tp>)
1905 	  {
1906 	    using _Ip = make_unsigned_t<__int_for_sizeof_t<_Tp>>;
1907 	    return __vector_bitcast<_Tp>(__vector_bitcast<_Ip>(__a)
1908 					 & __vector_bitcast<_Ip>(__b));
1909 	  }
1910 	else if constexpr (__is_vector_type_v<_TW>)
1911 	  return __a & __b;
1912 	else
1913 	  return __a._M_data & __b._M_data;
1914       }
1915     else
1916       return __a & __b;
1917   }
1918 
1919 // }}}
1920 // __andnot{{{
1921 #if _GLIBCXX_SIMD_X86INTRIN && !defined __clang__
1922 static constexpr struct
1923 {
1924   _GLIBCXX_SIMD_INTRINSIC __v4sf
1925   operator()(__v4sf __a, __v4sf __b) const noexcept
1926   { return __builtin_ia32_andnps(__a, __b); }
1927 
1928   _GLIBCXX_SIMD_INTRINSIC __v2df
1929   operator()(__v2df __a, __v2df __b) const noexcept
1930   { return __builtin_ia32_andnpd(__a, __b); }
1931 
1932   _GLIBCXX_SIMD_INTRINSIC __v2di
1933   operator()(__v2di __a, __v2di __b) const noexcept
1934   { return __builtin_ia32_pandn128(__a, __b); }
1935 
1936   _GLIBCXX_SIMD_INTRINSIC __v8sf
1937   operator()(__v8sf __a, __v8sf __b) const noexcept
1938   { return __builtin_ia32_andnps256(__a, __b); }
1939 
1940   _GLIBCXX_SIMD_INTRINSIC __v4df
1941   operator()(__v4df __a, __v4df __b) const noexcept
1942   { return __builtin_ia32_andnpd256(__a, __b); }
1943 
1944   _GLIBCXX_SIMD_INTRINSIC __v4di
1945   operator()(__v4di __a, __v4di __b) const noexcept
1946   {
1947     if constexpr (__have_avx2)
1948       return __builtin_ia32_andnotsi256(__a, __b);
1949     else
1950       return reinterpret_cast<__v4di>(
1951 	__builtin_ia32_andnpd256(reinterpret_cast<__v4df>(__a),
1952 				 reinterpret_cast<__v4df>(__b)));
1953   }
1954 
1955   _GLIBCXX_SIMD_INTRINSIC __v16sf
1956   operator()(__v16sf __a, __v16sf __b) const noexcept
1957   {
1958     if constexpr (__have_avx512dq)
1959       return _mm512_andnot_ps(__a, __b);
1960     else
1961       return reinterpret_cast<__v16sf>(
1962 	_mm512_andnot_si512(reinterpret_cast<__v8di>(__a),
1963 			    reinterpret_cast<__v8di>(__b)));
1964   }
1965 
1966   _GLIBCXX_SIMD_INTRINSIC __v8df
1967   operator()(__v8df __a, __v8df __b) const noexcept
1968   {
1969     if constexpr (__have_avx512dq)
1970       return _mm512_andnot_pd(__a, __b);
1971     else
1972       return reinterpret_cast<__v8df>(
1973 	_mm512_andnot_si512(reinterpret_cast<__v8di>(__a),
1974 			    reinterpret_cast<__v8di>(__b)));
1975   }
1976 
1977   _GLIBCXX_SIMD_INTRINSIC __v8di
1978   operator()(__v8di __a, __v8di __b) const noexcept
1979   { return _mm512_andnot_si512(__a, __b); }
1980 } _S_x86_andnot;
1981 #endif // _GLIBCXX_SIMD_X86INTRIN && !__clang__
1982 
1983 template <typename _TW>
1984   _GLIBCXX_SIMD_INTRINSIC constexpr _TW
1985   __andnot(_TW __a, _TW __b) noexcept
1986   {
1987     if constexpr (__is_vector_type_v<_TW> || __is_simd_wrapper_v<_TW>)
1988       {
1989 	using _TVT = conditional_t<__is_simd_wrapper_v<_TW>, _TW,
1990 				   _VectorTraitsImpl<_TW>>;
1991 	using _Tp = typename _TVT::value_type;
1992 #if _GLIBCXX_SIMD_X86INTRIN && !defined __clang__
1993 	if constexpr (sizeof(_TW) >= 16)
1994 	  {
1995 	    const auto __ai = __to_intrin(__a);
1996 	    const auto __bi = __to_intrin(__b);
1997 	    if (!__builtin_is_constant_evaluated()
1998 		&& !(__builtin_constant_p(__ai) && __builtin_constant_p(__bi)))
1999 	      {
2000 		const auto __r = _S_x86_andnot(__ai, __bi);
2001 		if constexpr (is_convertible_v<decltype(__r), _TW>)
2002 		  return __r;
2003 		else
2004 		  return reinterpret_cast<typename _TVT::type>(__r);
2005 	      }
2006 	  }
2007 #endif // _GLIBCXX_SIMD_X86INTRIN
2008 	using _Ip = make_unsigned_t<__int_for_sizeof_t<_Tp>>;
2009 	return __vector_bitcast<_Tp>(~__vector_bitcast<_Ip>(__a)
2010 				     & __vector_bitcast<_Ip>(__b));
2011       }
2012     else
2013       return ~__a & __b;
2014   }
2015 
2016 // }}}
2017 // __not{{{
2018 template <typename _Tp, typename _TVT = _VectorTraits<_Tp>>
2019   _GLIBCXX_SIMD_INTRINSIC constexpr _Tp
2020   __not(_Tp __a) noexcept
2021   {
2022     if constexpr (is_floating_point_v<typename _TVT::value_type>)
2023       return reinterpret_cast<typename _TVT::type>(
2024 	~__vector_bitcast<unsigned>(__a));
2025     else
2026       return ~__a;
2027   }
2028 
2029 // }}}
2030 // __concat{{{
2031 template <typename _Tp, typename _TVT = _VectorTraits<_Tp>,
2032 	  typename _R = __vector_type_t<typename _TVT::value_type, _TVT::_S_full_size * 2>>
2033   constexpr _R
2034   __concat(_Tp a_, _Tp b_)
2035   {
2036 #ifdef _GLIBCXX_SIMD_WORKAROUND_XXX_1
2037     using _W
2038       = conditional_t<is_floating_point_v<typename _TVT::value_type>, double,
2039 		      conditional_t<(sizeof(_Tp) >= 2 * sizeof(long long)),
2040 				    long long, typename _TVT::value_type>>;
2041     constexpr int input_width = sizeof(_Tp) / sizeof(_W);
2042     const auto __a = __vector_bitcast<_W>(a_);
2043     const auto __b = __vector_bitcast<_W>(b_);
2044     using _Up = __vector_type_t<_W, sizeof(_R) / sizeof(_W)>;
2045 #else
2046     constexpr int input_width = _TVT::_S_full_size;
2047     const _Tp& __a = a_;
2048     const _Tp& __b = b_;
2049     using _Up = _R;
2050 #endif
2051     if constexpr (input_width == 2)
2052       return reinterpret_cast<_R>(_Up{__a[0], __a[1], __b[0], __b[1]});
2053     else if constexpr (input_width == 4)
2054       return reinterpret_cast<_R>(
2055 	_Up{__a[0], __a[1], __a[2], __a[3], __b[0], __b[1], __b[2], __b[3]});
2056     else if constexpr (input_width == 8)
2057       return reinterpret_cast<_R>(
2058 	_Up{__a[0], __a[1], __a[2], __a[3], __a[4], __a[5], __a[6], __a[7],
2059 	    __b[0], __b[1], __b[2], __b[3], __b[4], __b[5], __b[6], __b[7]});
2060     else if constexpr (input_width == 16)
2061       return reinterpret_cast<_R>(
2062 	_Up{__a[0],  __a[1],  __a[2],  __a[3],  __a[4],  __a[5],  __a[6],
2063 	    __a[7],  __a[8],  __a[9],  __a[10], __a[11], __a[12], __a[13],
2064 	    __a[14], __a[15], __b[0],  __b[1],  __b[2],  __b[3],  __b[4],
2065 	    __b[5],  __b[6],  __b[7],  __b[8],  __b[9],  __b[10], __b[11],
2066 	    __b[12], __b[13], __b[14], __b[15]});
2067     else if constexpr (input_width == 32)
2068       return reinterpret_cast<_R>(
2069 	_Up{__a[0],  __a[1],  __a[2],  __a[3],  __a[4],  __a[5],  __a[6],
2070 	    __a[7],  __a[8],  __a[9],  __a[10], __a[11], __a[12], __a[13],
2071 	    __a[14], __a[15], __a[16], __a[17], __a[18], __a[19], __a[20],
2072 	    __a[21], __a[22], __a[23], __a[24], __a[25], __a[26], __a[27],
2073 	    __a[28], __a[29], __a[30], __a[31], __b[0],  __b[1],  __b[2],
2074 	    __b[3],  __b[4],  __b[5],  __b[6],  __b[7],  __b[8],  __b[9],
2075 	    __b[10], __b[11], __b[12], __b[13], __b[14], __b[15], __b[16],
2076 	    __b[17], __b[18], __b[19], __b[20], __b[21], __b[22], __b[23],
2077 	    __b[24], __b[25], __b[26], __b[27], __b[28], __b[29], __b[30],
2078 	    __b[31]});
2079   }
2080 
2081 // }}}
2082 // __zero_extend {{{
2083 template <typename _Tp, typename _TVT = _VectorTraits<_Tp>>
2084   struct _ZeroExtendProxy
2085   {
2086     using value_type = typename _TVT::value_type;
2087     static constexpr size_t _Np = _TVT::_S_full_size;
2088     const _Tp __x;
2089 
2090     template <typename _To, typename _ToVT = _VectorTraits<_To>,
2091 	      typename
2092 	      = enable_if_t<is_same_v<typename _ToVT::value_type, value_type>>>
2093       _GLIBCXX_SIMD_INTRINSIC operator _To() const
2094       {
2095 	constexpr size_t _ToN = _ToVT::_S_full_size;
2096 	if constexpr (_ToN == _Np)
2097 	  return __x;
2098 	else if constexpr (_ToN == 2 * _Np)
2099 	  {
2100 #ifdef _GLIBCXX_SIMD_WORKAROUND_XXX_3
2101 	    if constexpr (__have_avx && _TVT::template _S_is<float, 4>)
2102 	      return __vector_bitcast<value_type>(
2103 		_mm256_insertf128_ps(__m256(), __x, 0));
2104 	    else if constexpr (__have_avx && _TVT::template _S_is<double, 2>)
2105 	      return __vector_bitcast<value_type>(
2106 		_mm256_insertf128_pd(__m256d(), __x, 0));
2107 	    else if constexpr (__have_avx2 && _Np * sizeof(value_type) == 16)
2108 	      return __vector_bitcast<value_type>(
2109 		_mm256_insertf128_si256(__m256i(), __to_intrin(__x), 0));
2110 	    else if constexpr (__have_avx512f && _TVT::template _S_is<float, 8>)
2111 	      {
2112 		if constexpr (__have_avx512dq)
2113 		  return __vector_bitcast<value_type>(
2114 		    _mm512_insertf32x8(__m512(), __x, 0));
2115 		else
2116 		  return reinterpret_cast<__m512>(
2117 		    _mm512_insertf64x4(__m512d(),
2118 				       reinterpret_cast<__m256d>(__x), 0));
2119 	      }
2120 	    else if constexpr (__have_avx512f
2121 			       && _TVT::template _S_is<double, 4>)
2122 	      return __vector_bitcast<value_type>(
2123 		_mm512_insertf64x4(__m512d(), __x, 0));
2124 	    else if constexpr (__have_avx512f && _Np * sizeof(value_type) == 32)
2125 	      return __vector_bitcast<value_type>(
2126 		_mm512_inserti64x4(__m512i(), __to_intrin(__x), 0));
2127 #endif
2128 	    return __concat(__x, _Tp());
2129 	  }
2130 	else if constexpr (_ToN == 4 * _Np)
2131 	  {
2132 #ifdef _GLIBCXX_SIMD_WORKAROUND_XXX_3
2133 	    if constexpr (__have_avx512dq && _TVT::template _S_is<double, 2>)
2134 	      {
2135 		return __vector_bitcast<value_type>(
2136 		  _mm512_insertf64x2(__m512d(), __x, 0));
2137 	      }
2138 	    else if constexpr (__have_avx512f
2139 			       && is_floating_point_v<value_type>)
2140 	      {
2141 		return __vector_bitcast<value_type>(
2142 		  _mm512_insertf32x4(__m512(), reinterpret_cast<__m128>(__x),
2143 				     0));
2144 	      }
2145 	    else if constexpr (__have_avx512f && _Np * sizeof(value_type) == 16)
2146 	      {
2147 		return __vector_bitcast<value_type>(
2148 		  _mm512_inserti32x4(__m512i(), __to_intrin(__x), 0));
2149 	      }
2150 #endif
2151 	    return __concat(__concat(__x, _Tp()),
2152 			    __vector_type_t<value_type, _Np * 2>());
2153 	  }
2154 	else if constexpr (_ToN == 8 * _Np)
2155 	  return __concat(operator __vector_type_t<value_type, _Np * 4>(),
2156 			  __vector_type_t<value_type, _Np * 4>());
2157 	else if constexpr (_ToN == 16 * _Np)
2158 	  return __concat(operator __vector_type_t<value_type, _Np * 8>(),
2159 			  __vector_type_t<value_type, _Np * 8>());
2160 	else
2161 	  __assert_unreachable<_Tp>();
2162       }
2163   };
2164 
2165 template <typename _Tp, typename _TVT = _VectorTraits<_Tp>>
2166   _GLIBCXX_SIMD_INTRINSIC _ZeroExtendProxy<_Tp, _TVT>
2167   __zero_extend(_Tp __x)
2168   { return {__x}; }
2169 
2170 // }}}
2171 // __extract<_Np, By>{{{
2172 template <int _Offset,
2173 	  int _SplitBy,
2174 	  typename _Tp,
2175 	  typename _TVT = _VectorTraits<_Tp>,
2176 	  typename _R = __vector_type_t<typename _TVT::value_type, _TVT::_S_full_size / _SplitBy>>
2177   _GLIBCXX_SIMD_INTRINSIC constexpr _R
2178   __extract(_Tp __in)
2179   {
2180     using value_type = typename _TVT::value_type;
2181 #if _GLIBCXX_SIMD_X86INTRIN // {{{
2182     if constexpr (sizeof(_Tp) == 64 && _SplitBy == 4 && _Offset > 0)
2183       {
2184 	if constexpr (__have_avx512dq && is_same_v<double, value_type>)
2185 	  return _mm512_extractf64x2_pd(__to_intrin(__in), _Offset);
2186 	else if constexpr (is_floating_point_v<value_type>)
2187 	  return __vector_bitcast<value_type>(
2188 	    _mm512_extractf32x4_ps(__intrin_bitcast<__m512>(__in), _Offset));
2189 	else
2190 	  return reinterpret_cast<_R>(
2191 	    _mm512_extracti32x4_epi32(__intrin_bitcast<__m512i>(__in),
2192 				      _Offset));
2193       }
2194     else
2195 #endif // _GLIBCXX_SIMD_X86INTRIN }}}
2196       {
2197 #ifdef _GLIBCXX_SIMD_WORKAROUND_XXX_1
2198 	using _W = conditional_t<
2199 	  is_floating_point_v<value_type>, double,
2200 	  conditional_t<(sizeof(_R) >= 16), long long, value_type>>;
2201 	static_assert(sizeof(_R) % sizeof(_W) == 0);
2202 	constexpr int __return_width = sizeof(_R) / sizeof(_W);
2203 	using _Up = __vector_type_t<_W, __return_width>;
2204 	const auto __x = __vector_bitcast<_W>(__in);
2205 #else
2206       constexpr int __return_width = _TVT::_S_full_size / _SplitBy;
2207       using _Up = _R;
2208       const __vector_type_t<value_type, _TVT::_S_full_size>& __x
2209 	= __in; // only needed for _Tp = _SimdWrapper<value_type, _Np>
2210 #endif
2211 	constexpr int _O = _Offset * __return_width;
2212 	return __call_with_subscripts<__return_width, _O>(
2213 	  __x, [](auto... __entries) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
2214 	    return reinterpret_cast<_R>(_Up{__entries...});
2215 	  });
2216       }
2217   }
2218 
2219 // }}}
2220 // __lo/__hi64[z]{{{
2221 template <typename _Tp,
2222 	  typename _R = __vector_type8_t<typename _VectorTraits<_Tp>::value_type>>
2223   _GLIBCXX_SIMD_INTRINSIC constexpr _R
2224   __lo64(_Tp __x)
2225   {
2226     _R __r{};
2227     __builtin_memcpy(&__r, &__x, 8);
2228     return __r;
2229   }
2230 
2231 template <typename _Tp,
2232 	  typename _R = __vector_type8_t<typename _VectorTraits<_Tp>::value_type>>
2233   _GLIBCXX_SIMD_INTRINSIC constexpr _R
2234   __hi64(_Tp __x)
2235   {
2236     static_assert(sizeof(_Tp) == 16, "use __hi64z if you meant it");
2237     _R __r{};
2238     __builtin_memcpy(&__r, reinterpret_cast<const char*>(&__x) + 8, 8);
2239     return __r;
2240   }
2241 
2242 template <typename _Tp,
2243 	  typename _R = __vector_type8_t<typename _VectorTraits<_Tp>::value_type>>
2244   _GLIBCXX_SIMD_INTRINSIC constexpr _R
2245   __hi64z([[maybe_unused]] _Tp __x)
2246   {
2247     _R __r{};
2248     if constexpr (sizeof(_Tp) == 16)
2249       __builtin_memcpy(&__r, reinterpret_cast<const char*>(&__x) + 8, 8);
2250     return __r;
2251   }
2252 
2253 // }}}
2254 // __lo/__hi128{{{
2255 template <typename _Tp>
2256   _GLIBCXX_SIMD_INTRINSIC constexpr auto
2257   __lo128(_Tp __x)
2258   { return __extract<0, sizeof(_Tp) / 16>(__x); }
2259 
2260 template <typename _Tp>
2261   _GLIBCXX_SIMD_INTRINSIC constexpr auto
2262   __hi128(_Tp __x)
2263   {
2264     static_assert(sizeof(__x) == 32);
2265     return __extract<1, 2>(__x);
2266   }
2267 
2268 // }}}
2269 // __lo/__hi256{{{
2270 template <typename _Tp>
2271   _GLIBCXX_SIMD_INTRINSIC constexpr auto
2272   __lo256(_Tp __x)
2273   {
2274     static_assert(sizeof(__x) == 64);
2275     return __extract<0, 2>(__x);
2276   }
2277 
2278 template <typename _Tp>
2279   _GLIBCXX_SIMD_INTRINSIC constexpr auto
2280   __hi256(_Tp __x)
2281   {
2282     static_assert(sizeof(__x) == 64);
2283     return __extract<1, 2>(__x);
2284   }
2285 
2286 // }}}
2287 // __auto_bitcast{{{
2288 template <typename _Tp>
2289   struct _AutoCast
2290   {
2291     static_assert(__is_vector_type_v<_Tp>);
2292 
2293     const _Tp __x;
2294 
2295     template <typename _Up, typename _UVT = _VectorTraits<_Up>>
2296       _GLIBCXX_SIMD_INTRINSIC constexpr operator _Up() const
2297       { return __intrin_bitcast<typename _UVT::type>(__x); }
2298   };
2299 
2300 template <typename _Tp>
2301   _GLIBCXX_SIMD_INTRINSIC constexpr _AutoCast<_Tp>
2302   __auto_bitcast(const _Tp& __x)
2303   { return {__x}; }
2304 
2305 template <typename _Tp, size_t _Np>
2306   _GLIBCXX_SIMD_INTRINSIC constexpr
2307   _AutoCast<typename _SimdWrapper<_Tp, _Np>::_BuiltinType>
2308   __auto_bitcast(const _SimdWrapper<_Tp, _Np>& __x)
2309   { return {__x._M_data}; }
2310 
2311 // }}}
2312 // ^^^ ---- builtin vector types [[gnu::vector_size(N)]] and operations ---- ^^^
2313 
2314 #if _GLIBCXX_SIMD_HAVE_SSE_ABI
2315 // __bool_storage_member_type{{{
2316 #if _GLIBCXX_SIMD_HAVE_AVX512F && _GLIBCXX_SIMD_X86INTRIN
2317 template <size_t _Size>
2318   struct __bool_storage_member_type
2319   {
2320     static_assert((_Size & (_Size - 1)) != 0,
2321 		  "This trait may only be used for non-power-of-2 sizes. "
2322 		  "Power-of-2 sizes must be specialized.");
2323     using type =
2324       typename __bool_storage_member_type<std::__bit_ceil(_Size)>::type;
2325   };
2326 
2327 template <>
2328   struct __bool_storage_member_type<1> { using type = bool; };
2329 
2330 template <>
2331   struct __bool_storage_member_type<2> { using type = __mmask8; };
2332 
2333 template <>
2334   struct __bool_storage_member_type<4> { using type = __mmask8; };
2335 
2336 template <>
2337   struct __bool_storage_member_type<8> { using type = __mmask8; };
2338 
2339 template <>
2340   struct __bool_storage_member_type<16> { using type = __mmask16; };
2341 
2342 template <>
2343   struct __bool_storage_member_type<32> { using type = __mmask32; };
2344 
2345 template <>
2346   struct __bool_storage_member_type<64> { using type = __mmask64; };
2347 #endif // _GLIBCXX_SIMD_HAVE_AVX512F
2348 
2349 // }}}
2350 // __intrinsic_type (x86){{{
2351 // the following excludes bool via __is_vectorizable
2352 #if _GLIBCXX_SIMD_HAVE_SSE
2353 template <typename _Tp, size_t _Bytes>
2354   struct __intrinsic_type<_Tp, _Bytes, enable_if_t<__is_vectorizable_v<_Tp> && _Bytes <= 64>>
2355   {
2356     static_assert(!is_same_v<_Tp, long double>,
2357 		  "no __intrinsic_type support for long double on x86");
2358 
2359     static constexpr size_t _S_VBytes = _Bytes <= 16 ? 16 : _Bytes <= 32 ? 32 : 64;
2360 
2361     using type [[__gnu__::__vector_size__(_S_VBytes)]]
2362       = conditional_t<is_integral_v<_Tp>, long long int, _Tp>;
2363   };
2364 #endif // _GLIBCXX_SIMD_HAVE_SSE
2365 
2366 // }}}
2367 #endif // _GLIBCXX_SIMD_HAVE_SSE_ABI
2368 // __intrinsic_type (ARM){{{
2369 #if _GLIBCXX_SIMD_HAVE_NEON
2370 template <>
2371   struct __intrinsic_type<float, 8, void>
2372   { using type = float32x2_t; };
2373 
2374 template <>
2375   struct __intrinsic_type<float, 16, void>
2376   { using type = float32x4_t; };
2377 
2378 template <>
2379   struct __intrinsic_type<double, 8, void>
2380   {
2381 #if _GLIBCXX_SIMD_HAVE_NEON_A64
2382    using type = float64x1_t;
2383 #endif
2384   };
2385 
2386 template <>
2387   struct __intrinsic_type<double, 16, void>
2388   {
2389 #if _GLIBCXX_SIMD_HAVE_NEON_A64
2390     using type = float64x2_t;
2391 #endif
2392   };
2393 
2394 #define _GLIBCXX_SIMD_ARM_INTRIN(_Bits, _Np)                                   \
2395 template <>                                                                    \
2396   struct __intrinsic_type<__int_with_sizeof_t<_Bits / 8>,                      \
2397 			  _Np * _Bits / 8, void>                               \
2398   { using type = int##_Bits##x##_Np##_t; };                                    \
2399 template <>                                                                    \
2400   struct __intrinsic_type<make_unsigned_t<__int_with_sizeof_t<_Bits / 8>>,     \
2401 			  _Np * _Bits / 8, void>                               \
2402   { using type = uint##_Bits##x##_Np##_t; }
2403 _GLIBCXX_SIMD_ARM_INTRIN(8, 8);
2404 _GLIBCXX_SIMD_ARM_INTRIN(8, 16);
2405 _GLIBCXX_SIMD_ARM_INTRIN(16, 4);
2406 _GLIBCXX_SIMD_ARM_INTRIN(16, 8);
2407 _GLIBCXX_SIMD_ARM_INTRIN(32, 2);
2408 _GLIBCXX_SIMD_ARM_INTRIN(32, 4);
2409 _GLIBCXX_SIMD_ARM_INTRIN(64, 1);
2410 _GLIBCXX_SIMD_ARM_INTRIN(64, 2);
2411 #undef _GLIBCXX_SIMD_ARM_INTRIN
2412 
2413 template <typename _Tp, size_t _Bytes>
2414   struct __intrinsic_type<_Tp, _Bytes, enable_if_t<__is_vectorizable_v<_Tp> && _Bytes <= 16>>
2415   {
2416     static constexpr int _SVecBytes = _Bytes <= 8 ? 8 : 16;
2417 
2418     using _Ip = __int_for_sizeof_t<_Tp>;
2419 
2420     using _Up = conditional_t<
2421       is_floating_point_v<_Tp>, _Tp,
2422       conditional_t<is_unsigned_v<_Tp>, make_unsigned_t<_Ip>, _Ip>>;
2423 
2424     static_assert(!is_same_v<_Tp, _Up> || _SVecBytes != _Bytes,
2425 		  "should use explicit specialization above");
2426 
2427     using type = typename __intrinsic_type<_Up, _SVecBytes>::type;
2428   };
2429 #endif // _GLIBCXX_SIMD_HAVE_NEON
2430 
2431 // }}}
2432 // __intrinsic_type (PPC){{{
2433 #ifdef __ALTIVEC__
2434 template <typename _Tp>
2435   struct __intrinsic_type_impl;
2436 
2437 #define _GLIBCXX_SIMD_PPC_INTRIN(_Tp)                                          \
2438   template <>                                                                  \
2439     struct __intrinsic_type_impl<_Tp> { using type = __vector _Tp; }
2440 _GLIBCXX_SIMD_PPC_INTRIN(float);
2441 #ifdef __VSX__
2442 _GLIBCXX_SIMD_PPC_INTRIN(double);
2443 #endif
2444 _GLIBCXX_SIMD_PPC_INTRIN(signed char);
2445 _GLIBCXX_SIMD_PPC_INTRIN(unsigned char);
2446 _GLIBCXX_SIMD_PPC_INTRIN(signed short);
2447 _GLIBCXX_SIMD_PPC_INTRIN(unsigned short);
2448 _GLIBCXX_SIMD_PPC_INTRIN(signed int);
2449 _GLIBCXX_SIMD_PPC_INTRIN(unsigned int);
2450 #if defined __VSX__ || __SIZEOF_LONG__ == 4
2451 _GLIBCXX_SIMD_PPC_INTRIN(signed long);
2452 _GLIBCXX_SIMD_PPC_INTRIN(unsigned long);
2453 #endif
2454 #ifdef __VSX__
2455 _GLIBCXX_SIMD_PPC_INTRIN(signed long long);
2456 _GLIBCXX_SIMD_PPC_INTRIN(unsigned long long);
2457 #endif
2458 #undef _GLIBCXX_SIMD_PPC_INTRIN
2459 
2460 template <typename _Tp, size_t _Bytes>
2461   struct __intrinsic_type<_Tp, _Bytes, enable_if_t<__is_vectorizable_v<_Tp> && _Bytes <= 16>>
2462   {
2463     static constexpr bool _S_is_ldouble = is_same_v<_Tp, long double>;
2464 
2465     // allow _Tp == long double with -mlong-double-64
2466     static_assert(!(_S_is_ldouble && sizeof(long double) > sizeof(double)),
2467 		  "no __intrinsic_type support for 128-bit floating point on PowerPC");
2468 
2469 #ifndef __VSX__
2470     static_assert(!(is_same_v<_Tp, double>
2471 		    || (_S_is_ldouble && sizeof(long double) == sizeof(double))),
2472 		  "no __intrinsic_type support for 64-bit floating point on PowerPC w/o VSX");
2473 #endif
2474 
2475     static constexpr auto __element_type()
2476     {
2477       if constexpr (is_floating_point_v<_Tp>)
2478 	{
2479 	  if constexpr (_S_is_ldouble)
2480 	    return double {};
2481 	  else
2482 	    return _Tp {};
2483 	}
2484       else if constexpr (is_signed_v<_Tp>)
2485 	{
2486 	  if constexpr (sizeof(_Tp) == sizeof(_SChar))
2487 	    return _SChar {};
2488 	  else if constexpr (sizeof(_Tp) == sizeof(short))
2489 	    return short {};
2490 	  else if constexpr (sizeof(_Tp) == sizeof(int))
2491 	    return int {};
2492 	  else if constexpr (sizeof(_Tp) == sizeof(_LLong))
2493 	    return _LLong {};
2494 	}
2495       else
2496 	{
2497 	  if constexpr (sizeof(_Tp) == sizeof(_UChar))
2498 	    return _UChar {};
2499 	  else if constexpr (sizeof(_Tp) == sizeof(_UShort))
2500 	    return _UShort {};
2501 	  else if constexpr (sizeof(_Tp) == sizeof(_UInt))
2502 	    return _UInt {};
2503 	  else if constexpr (sizeof(_Tp) == sizeof(_ULLong))
2504 	    return _ULLong {};
2505 	}
2506     }
2507 
2508     using type = typename __intrinsic_type_impl<decltype(__element_type())>::type;
2509   };
2510 #endif // __ALTIVEC__
2511 
2512 // }}}
2513 // _SimdWrapper<bool>{{{1
2514 template <size_t _Width>
2515   struct _SimdWrapper<bool, _Width,
2516 		      void_t<typename __bool_storage_member_type<_Width>::type>>
2517   {
2518     using _BuiltinType = typename __bool_storage_member_type<_Width>::type;
2519     using value_type = bool;
2520 
2521     static constexpr size_t _S_full_size = sizeof(_BuiltinType) * __CHAR_BIT__;
2522 
2523     _GLIBCXX_SIMD_INTRINSIC constexpr _SimdWrapper<bool, _S_full_size>
2524     __as_full_vector() const
2525     { return _M_data; }
2526 
2527     _GLIBCXX_SIMD_INTRINSIC constexpr
2528     _SimdWrapper() = default;
2529 
2530     _GLIBCXX_SIMD_INTRINSIC constexpr
2531     _SimdWrapper(_BuiltinType __k) : _M_data(__k) {};
2532 
2533     _GLIBCXX_SIMD_INTRINSIC
2534     operator const _BuiltinType&() const
2535     { return _M_data; }
2536 
2537     _GLIBCXX_SIMD_INTRINSIC
2538     operator _BuiltinType&()
2539     { return _M_data; }
2540 
2541     _GLIBCXX_SIMD_INTRINSIC _BuiltinType
2542     __intrin() const
2543     { return _M_data; }
2544 
2545     _GLIBCXX_SIMD_INTRINSIC constexpr value_type
2546     operator[](size_t __i) const
2547     { return _M_data & (_BuiltinType(1) << __i); }
2548 
2549     template <size_t __i>
2550       _GLIBCXX_SIMD_INTRINSIC constexpr value_type
2551       operator[](_SizeConstant<__i>) const
2552       { return _M_data & (_BuiltinType(1) << __i); }
2553 
2554     _GLIBCXX_SIMD_INTRINSIC constexpr void
2555     _M_set(size_t __i, value_type __x)
2556     {
2557       if (__x)
2558 	_M_data |= (_BuiltinType(1) << __i);
2559       else
2560 	_M_data &= ~(_BuiltinType(1) << __i);
2561     }
2562 
2563     _GLIBCXX_SIMD_INTRINSIC constexpr bool
2564     _M_is_constprop() const
2565     { return __builtin_constant_p(_M_data); }
2566 
2567     _GLIBCXX_SIMD_INTRINSIC constexpr bool
2568     _M_is_constprop_none_of() const
2569     {
2570       if (__builtin_constant_p(_M_data))
2571 	{
2572 	  constexpr int __nbits = sizeof(_BuiltinType) * __CHAR_BIT__;
2573 	  constexpr _BuiltinType __active_mask
2574 	    = ~_BuiltinType() >> (__nbits - _Width);
2575 	  return (_M_data & __active_mask) == 0;
2576 	}
2577       return false;
2578     }
2579 
2580     _GLIBCXX_SIMD_INTRINSIC constexpr bool
2581     _M_is_constprop_all_of() const
2582     {
2583       if (__builtin_constant_p(_M_data))
2584 	{
2585 	  constexpr int __nbits = sizeof(_BuiltinType) * __CHAR_BIT__;
2586 	  constexpr _BuiltinType __active_mask
2587 	    = ~_BuiltinType() >> (__nbits - _Width);
2588 	  return (_M_data & __active_mask) == __active_mask;
2589 	}
2590       return false;
2591     }
2592 
2593     _BuiltinType _M_data;
2594   };
2595 
2596 // _SimdWrapperBase{{{1
2597 template <bool _MustZeroInitPadding, typename _BuiltinType>
2598   struct _SimdWrapperBase;
2599 
2600 template <typename _BuiltinType>
2601   struct _SimdWrapperBase<false, _BuiltinType> // no padding or no SNaNs
2602   {
2603     _GLIBCXX_SIMD_INTRINSIC constexpr
2604     _SimdWrapperBase() = default;
2605 
2606     _GLIBCXX_SIMD_INTRINSIC constexpr
2607     _SimdWrapperBase(_BuiltinType __init) : _M_data(__init) {}
2608 
2609     _BuiltinType _M_data;
2610   };
2611 
2612 template <typename _BuiltinType>
2613   struct _SimdWrapperBase<true, _BuiltinType> // with padding that needs to
2614 					      // never become SNaN
2615   {
2616     _GLIBCXX_SIMD_INTRINSIC constexpr
2617     _SimdWrapperBase() : _M_data() {}
2618 
2619     _GLIBCXX_SIMD_INTRINSIC constexpr
2620     _SimdWrapperBase(_BuiltinType __init) : _M_data(__init) {}
2621 
2622     _BuiltinType _M_data;
2623   };
2624 
2625 // }}}
2626 // _SimdWrapper{{{
2627 template <typename _Tp, size_t _Width>
2628   struct _SimdWrapper<
2629     _Tp, _Width,
2630     void_t<__vector_type_t<_Tp, _Width>, __intrinsic_type_t<_Tp, _Width>>>
2631     : _SimdWrapperBase<__has_iec559_behavior<__signaling_NaN, _Tp>::value
2632 			 && sizeof(_Tp) * _Width
2633 			      == sizeof(__vector_type_t<_Tp, _Width>),
2634 		       __vector_type_t<_Tp, _Width>>
2635   {
2636     using _Base
2637       = _SimdWrapperBase<__has_iec559_behavior<__signaling_NaN, _Tp>::value
2638 			   && sizeof(_Tp) * _Width
2639 				== sizeof(__vector_type_t<_Tp, _Width>),
2640 			 __vector_type_t<_Tp, _Width>>;
2641 
2642     static_assert(__is_vectorizable_v<_Tp>);
2643     static_assert(_Width >= 2); // 1 doesn't make sense, use _Tp directly then
2644 
2645     using _BuiltinType = __vector_type_t<_Tp, _Width>;
2646     using value_type = _Tp;
2647 
2648     static inline constexpr size_t _S_full_size
2649       = sizeof(_BuiltinType) / sizeof(value_type);
2650     static inline constexpr int _S_size = _Width;
2651     static inline constexpr bool _S_is_partial = _S_full_size != _S_size;
2652 
2653     using _Base::_M_data;
2654 
2655     _GLIBCXX_SIMD_INTRINSIC constexpr _SimdWrapper<_Tp, _S_full_size>
2656     __as_full_vector() const
2657     { return _M_data; }
2658 
2659     _GLIBCXX_SIMD_INTRINSIC constexpr
2660     _SimdWrapper(initializer_list<_Tp> __init)
2661     : _Base(__generate_from_n_evaluations<_Width, _BuiltinType>(
2662 	      [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
2663 		return __init.begin()[__i.value];
2664 	      })) {}
2665 
2666     _GLIBCXX_SIMD_INTRINSIC constexpr
2667     _SimdWrapper() = default;
2668 
2669     _GLIBCXX_SIMD_INTRINSIC constexpr
2670     _SimdWrapper(const _SimdWrapper&) = default;
2671 
2672     _GLIBCXX_SIMD_INTRINSIC constexpr
2673     _SimdWrapper(_SimdWrapper&&) = default;
2674 
2675     _GLIBCXX_SIMD_INTRINSIC constexpr _SimdWrapper&
2676     operator=(const _SimdWrapper&) = default;
2677 
2678     _GLIBCXX_SIMD_INTRINSIC constexpr _SimdWrapper&
2679     operator=(_SimdWrapper&&) = default;
2680 
2681     template <typename _V, typename = enable_if_t<disjunction_v<
2682 			     is_same<_V, __vector_type_t<_Tp, _Width>>,
2683 			     is_same<_V, __intrinsic_type_t<_Tp, _Width>>>>>
2684       _GLIBCXX_SIMD_INTRINSIC constexpr
2685       _SimdWrapper(_V __x)
2686       // __vector_bitcast can convert e.g. __m128 to __vector(2) float
2687       : _Base(__vector_bitcast<_Tp, _Width>(__x)) {}
2688 
2689     template <typename... _As,
2690 	      typename = enable_if_t<((is_same_v<simd_abi::scalar, _As> && ...)
2691 				      && sizeof...(_As) <= _Width)>>
2692       _GLIBCXX_SIMD_INTRINSIC constexpr
2693       operator _SimdTuple<_Tp, _As...>() const
2694       {
2695 	return __generate_from_n_evaluations<sizeof...(_As), _SimdTuple<_Tp, _As...>>(
2696 		 [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA
2697 		 { return _M_data[int(__i)]; });
2698       }
2699 
2700     _GLIBCXX_SIMD_INTRINSIC constexpr
2701     operator const _BuiltinType&() const
2702     { return _M_data; }
2703 
2704     _GLIBCXX_SIMD_INTRINSIC constexpr
2705     operator _BuiltinType&()
2706     { return _M_data; }
2707 
2708     _GLIBCXX_SIMD_INTRINSIC constexpr _Tp
2709     operator[](size_t __i) const
2710     { return _M_data[__i]; }
2711 
2712     template <size_t __i>
2713       _GLIBCXX_SIMD_INTRINSIC constexpr _Tp
2714       operator[](_SizeConstant<__i>) const
2715       { return _M_data[__i]; }
2716 
2717     _GLIBCXX_SIMD_INTRINSIC constexpr void
2718     _M_set(size_t __i, _Tp __x)
2719     {
2720       if (__builtin_is_constant_evaluated())
2721 	_M_data = __generate_from_n_evaluations<_Width, _BuiltinType>([&](auto __j) {
2722 		    return __j == __i ? __x : _M_data[__j()];
2723 		  });
2724       else
2725 	_M_data[__i] = __x;
2726     }
2727 
2728     _GLIBCXX_SIMD_INTRINSIC
2729     constexpr bool
2730     _M_is_constprop() const
2731     { return __builtin_constant_p(_M_data); }
2732 
2733     _GLIBCXX_SIMD_INTRINSIC constexpr bool
2734     _M_is_constprop_none_of() const
2735     {
2736       if (__builtin_constant_p(_M_data))
2737 	{
2738 	  bool __r = true;
2739 	  if constexpr (is_floating_point_v<_Tp>)
2740 	    {
2741 	      using _Ip = __int_for_sizeof_t<_Tp>;
2742 	      const auto __intdata = __vector_bitcast<_Ip>(_M_data);
2743 	      __execute_n_times<_Width>(
2744 		[&](auto __i) { __r &= __intdata[__i.value] == _Ip(); });
2745 	    }
2746 	  else
2747 	    __execute_n_times<_Width>(
2748 	      [&](auto __i) { __r &= _M_data[__i.value] == _Tp(); });
2749 	  if (__builtin_constant_p(__r))
2750 	    return __r;
2751 	}
2752       return false;
2753     }
2754 
2755     _GLIBCXX_SIMD_INTRINSIC constexpr bool
2756     _M_is_constprop_all_of() const
2757     {
2758       if (__builtin_constant_p(_M_data))
2759 	{
2760 	  bool __r = true;
2761 	  if constexpr (is_floating_point_v<_Tp>)
2762 	    {
2763 	      using _Ip = __int_for_sizeof_t<_Tp>;
2764 	      const auto __intdata = __vector_bitcast<_Ip>(_M_data);
2765 	      __execute_n_times<_Width>(
2766 		[&](auto __i) { __r &= __intdata[__i.value] == ~_Ip(); });
2767 	    }
2768 	  else
2769 	    __execute_n_times<_Width>(
2770 	      [&](auto __i) { __r &= _M_data[__i.value] == ~_Tp(); });
2771 	  if (__builtin_constant_p(__r))
2772 	    return __r;
2773 	}
2774       return false;
2775     }
2776   };
2777 
2778 // }}}
2779 
2780 // __vectorized_sizeof {{{
2781 template <typename _Tp>
2782   constexpr size_t
2783   __vectorized_sizeof()
2784   {
2785     if constexpr (!__is_vectorizable_v<_Tp>)
2786       return 0;
2787 
2788     if constexpr (sizeof(_Tp) <= 8)
2789       {
2790 	// X86:
2791 	if constexpr (__have_avx512bw)
2792 	  return 64;
2793 	if constexpr (__have_avx512f && sizeof(_Tp) >= 4)
2794 	  return 64;
2795 	if constexpr (__have_avx2)
2796 	  return 32;
2797 	if constexpr (__have_avx && is_floating_point_v<_Tp>)
2798 	  return 32;
2799 	if constexpr (__have_sse2)
2800 	  return 16;
2801 	if constexpr (__have_sse && is_same_v<_Tp, float>)
2802 	  return 16;
2803 	/* The following is too much trouble because of mixed MMX and x87 code.
2804 	 * While nothing here explicitly calls MMX instructions of registers,
2805 	 * they are still emitted but no EMMS cleanup is done.
2806 	if constexpr (__have_mmx && sizeof(_Tp) <= 4 && is_integral_v<_Tp>)
2807 	  return 8;
2808 	 */
2809 
2810 	// PowerPC:
2811 	if constexpr (__have_power8vec
2812 		      || (__have_power_vmx && (sizeof(_Tp) < 8))
2813 		      || (__have_power_vsx && is_floating_point_v<_Tp>) )
2814 	  return 16;
2815 
2816 	// ARM:
2817 	if constexpr (__have_neon_a64
2818 		      || (__have_neon_a32 && !is_same_v<_Tp, double>) )
2819 	  return 16;
2820 	if constexpr (__have_neon
2821 		      && sizeof(_Tp) < 8
2822 		      // Only allow fp if the user allows non-ICE559 fp (e.g.
2823 		      // via -ffast-math). ARMv7 NEON fp is not conforming to
2824 		      // IEC559.
2825 		      && (__support_neon_float || !is_floating_point_v<_Tp>))
2826 	  return 16;
2827       }
2828 
2829     return sizeof(_Tp);
2830   }
2831 
2832 // }}}
2833 namespace simd_abi {
2834 // most of simd_abi is defined in simd_detail.h
2835 template <typename _Tp>
2836   inline constexpr int max_fixed_size
2837     = (__have_avx512bw && sizeof(_Tp) == 1) ? 64 : 32;
2838 
2839 // compatible {{{
2840 #if defined __x86_64__ || defined __aarch64__
2841 template <typename _Tp>
2842   using compatible = conditional_t<(sizeof(_Tp) <= 8), _VecBuiltin<16>, scalar>;
2843 #elif defined __ARM_NEON
2844 // FIXME: not sure, probably needs to be scalar (or dependent on the hard-float
2845 // ABI?)
2846 template <typename _Tp>
2847   using compatible
2848     = conditional_t<(sizeof(_Tp) < 8
2849 		     && (__support_neon_float || !is_floating_point_v<_Tp>)),
2850 		    _VecBuiltin<16>, scalar>;
2851 #else
2852 template <typename>
2853   using compatible = scalar;
2854 #endif
2855 
2856 // }}}
2857 // native {{{
2858 template <typename _Tp>
2859   constexpr auto
2860   __determine_native_abi()
2861   {
2862     constexpr size_t __bytes = __vectorized_sizeof<_Tp>();
2863     if constexpr (__bytes == sizeof(_Tp))
2864       return static_cast<scalar*>(nullptr);
2865     else if constexpr (__have_avx512vl || (__have_avx512f && __bytes == 64))
2866       return static_cast<_VecBltnBtmsk<__bytes>*>(nullptr);
2867     else
2868       return static_cast<_VecBuiltin<__bytes>*>(nullptr);
2869   }
2870 
2871 template <typename _Tp, typename = enable_if_t<__is_vectorizable_v<_Tp>>>
2872   using native = remove_pointer_t<decltype(__determine_native_abi<_Tp>())>;
2873 
2874 // }}}
2875 // __default_abi {{{
2876 #if defined _GLIBCXX_SIMD_DEFAULT_ABI
2877 template <typename _Tp>
2878   using __default_abi = _GLIBCXX_SIMD_DEFAULT_ABI<_Tp>;
2879 #else
2880 template <typename _Tp>
2881   using __default_abi = compatible<_Tp>;
2882 #endif
2883 
2884 // }}}
2885 } // namespace simd_abi
2886 
2887 // traits {{{1
2888 template <typename _Tp>
2889   struct is_simd_flag_type
2890   : false_type
2891   {};
2892 
2893 template <>
2894   struct is_simd_flag_type<element_aligned_tag>
2895   : true_type
2896   {};
2897 
2898 template <>
2899   struct is_simd_flag_type<vector_aligned_tag>
2900   : true_type
2901   {};
2902 
2903 template <size_t _Np>
2904   struct is_simd_flag_type<overaligned_tag<_Np>>
2905   : __bool_constant<(_Np > 0) and __has_single_bit(_Np)>
2906   {};
2907 
2908 template <typename _Tp>
2909   inline constexpr bool is_simd_flag_type_v = is_simd_flag_type<_Tp>::value;
2910 
2911 template <typename _Tp, typename = enable_if_t<is_simd_flag_type_v<_Tp>>>
2912   using _IsSimdFlagType = _Tp;
2913 
2914 // is_abi_tag {{{2
2915 template <typename _Tp, typename = void_t<>>
2916   struct is_abi_tag : false_type {};
2917 
2918 template <typename _Tp>
2919   struct is_abi_tag<_Tp, void_t<typename _Tp::_IsValidAbiTag>>
2920   : public _Tp::_IsValidAbiTag {};
2921 
2922 template <typename _Tp>
2923   inline constexpr bool is_abi_tag_v = is_abi_tag<_Tp>::value;
2924 
2925 // is_simd(_mask) {{{2
2926 template <typename _Tp>
2927   struct is_simd : public false_type {};
2928 
2929 template <typename _Tp>
2930   inline constexpr bool is_simd_v = is_simd<_Tp>::value;
2931 
2932 template <typename _Tp>
2933   struct is_simd_mask : public false_type {};
2934 
2935 template <typename _Tp>
2936 inline constexpr bool is_simd_mask_v = is_simd_mask<_Tp>::value;
2937 
2938 // simd_size {{{2
2939 template <typename _Tp, typename _Abi, typename = void>
2940   struct __simd_size_impl {};
2941 
2942 template <typename _Tp, typename _Abi>
2943   struct __simd_size_impl<
2944     _Tp, _Abi,
2945     enable_if_t<conjunction_v<__is_vectorizable<_Tp>, is_abi_tag<_Abi>>>>
2946     : _SizeConstant<_Abi::template _S_size<_Tp>> {};
2947 
2948 template <typename _Tp, typename _Abi = simd_abi::__default_abi<_Tp>>
2949   struct simd_size : __simd_size_impl<_Tp, _Abi> {};
2950 
2951 template <typename _Tp, typename _Abi = simd_abi::__default_abi<_Tp>>
2952   inline constexpr size_t simd_size_v = simd_size<_Tp, _Abi>::value;
2953 
2954 // simd_abi::deduce {{{2
2955 template <typename _Tp, size_t _Np, typename = void>
2956   struct __deduce_impl;
2957 
2958 namespace simd_abi {
2959 /**
2960  * @tparam _Tp   The requested `value_type` for the elements.
2961  * @tparam _Np    The requested number of elements.
2962  * @tparam _Abis This parameter is ignored, since this implementation cannot
2963  * make any use of it. Either __a good native ABI is matched and used as `type`
2964  * alias, or the `fixed_size<_Np>` ABI is used, which internally is built from
2965  * the best matching native ABIs.
2966  */
2967 template <typename _Tp, size_t _Np, typename...>
2968   struct deduce : __deduce_impl<_Tp, _Np> {};
2969 
2970 template <typename _Tp, size_t _Np, typename... _Abis>
2971   using deduce_t = typename deduce<_Tp, _Np, _Abis...>::type;
2972 } // namespace simd_abi
2973 
2974 // }}}2
2975 // rebind_simd {{{2
2976 template <typename _Tp, typename _V, typename = void>
2977   struct rebind_simd;
2978 
2979 template <typename _Tp, typename _Up, typename _Abi>
2980   struct rebind_simd<_Tp, simd<_Up, _Abi>,
2981 		     void_t<simd_abi::deduce_t<_Tp, simd_size_v<_Up, _Abi>, _Abi>>>
2982   { using type = simd<_Tp, simd_abi::deduce_t<_Tp, simd_size_v<_Up, _Abi>, _Abi>>; };
2983 
2984 template <typename _Tp, typename _Up, typename _Abi>
2985   struct rebind_simd<_Tp, simd_mask<_Up, _Abi>,
2986 		     void_t<simd_abi::deduce_t<_Tp, simd_size_v<_Up, _Abi>, _Abi>>>
2987   { using type = simd_mask<_Tp, simd_abi::deduce_t<_Tp, simd_size_v<_Up, _Abi>, _Abi>>; };
2988 
2989 template <typename _Tp, typename _V>
2990   using rebind_simd_t = typename rebind_simd<_Tp, _V>::type;
2991 
2992 // resize_simd {{{2
2993 template <int _Np, typename _V, typename = void>
2994   struct resize_simd;
2995 
2996 template <int _Np, typename _Tp, typename _Abi>
2997   struct resize_simd<_Np, simd<_Tp, _Abi>, void_t<simd_abi::deduce_t<_Tp, _Np, _Abi>>>
2998   { using type = simd<_Tp, simd_abi::deduce_t<_Tp, _Np, _Abi>>; };
2999 
3000 template <int _Np, typename _Tp, typename _Abi>
3001   struct resize_simd<_Np, simd_mask<_Tp, _Abi>, void_t<simd_abi::deduce_t<_Tp, _Np, _Abi>>>
3002   { using type = simd_mask<_Tp, simd_abi::deduce_t<_Tp, _Np, _Abi>>; };
3003 
3004 template <int _Np, typename _V>
3005   using resize_simd_t = typename resize_simd<_Np, _V>::type;
3006 
3007 // }}}2
3008 // memory_alignment {{{2
3009 template <typename _Tp, typename _Up = typename _Tp::value_type>
3010   struct memory_alignment
3011   : public _SizeConstant<vector_aligned_tag::_S_alignment<_Tp, _Up>> {};
3012 
3013 template <typename _Tp, typename _Up = typename _Tp::value_type>
3014   inline constexpr size_t memory_alignment_v = memory_alignment<_Tp, _Up>::value;
3015 
3016 // class template simd [simd] {{{1
3017 template <typename _Tp, typename _Abi = simd_abi::__default_abi<_Tp>>
3018   class simd;
3019 
3020 template <typename _Tp, typename _Abi>
3021   struct is_simd<simd<_Tp, _Abi>> : public true_type {};
3022 
3023 template <typename _Tp>
3024   using native_simd = simd<_Tp, simd_abi::native<_Tp>>;
3025 
3026 template <typename _Tp, int _Np>
3027   using fixed_size_simd = simd<_Tp, simd_abi::fixed_size<_Np>>;
3028 
3029 template <typename _Tp, size_t _Np>
3030   using __deduced_simd = simd<_Tp, simd_abi::deduce_t<_Tp, _Np>>;
3031 
3032 // class template simd_mask [simd_mask] {{{1
3033 template <typename _Tp, typename _Abi = simd_abi::__default_abi<_Tp>>
3034   class simd_mask;
3035 
3036 template <typename _Tp, typename _Abi>
3037   struct is_simd_mask<simd_mask<_Tp, _Abi>> : public true_type {};
3038 
3039 template <typename _Tp>
3040   using native_simd_mask = simd_mask<_Tp, simd_abi::native<_Tp>>;
3041 
3042 template <typename _Tp, int _Np>
3043   using fixed_size_simd_mask = simd_mask<_Tp, simd_abi::fixed_size<_Np>>;
3044 
3045 template <typename _Tp, size_t _Np>
3046   using __deduced_simd_mask = simd_mask<_Tp, simd_abi::deduce_t<_Tp, _Np>>;
3047 
3048 // casts [simd.casts] {{{1
3049 // static_simd_cast {{{2
3050 template <typename _Tp, typename _Up, typename _Ap, bool = is_simd_v<_Tp>, typename = void>
3051   struct __static_simd_cast_return_type;
3052 
3053 template <typename _Tp, typename _A0, typename _Up, typename _Ap>
3054   struct __static_simd_cast_return_type<simd_mask<_Tp, _A0>, _Up, _Ap, false, void>
3055   : __static_simd_cast_return_type<simd<_Tp, _A0>, _Up, _Ap> {};
3056 
3057 template <typename _Tp, typename _Up, typename _Ap>
3058   struct __static_simd_cast_return_type<
3059     _Tp, _Up, _Ap, true, enable_if_t<_Tp::size() == simd_size_v<_Up, _Ap>>>
3060   { using type = _Tp; };
3061 
3062 template <typename _Tp, typename _Ap>
3063   struct __static_simd_cast_return_type<_Tp, _Tp, _Ap, false,
3064 #ifdef _GLIBCXX_SIMD_FIX_P2TS_ISSUE66
3065 					enable_if_t<__is_vectorizable_v<_Tp>>
3066 #else
3067 					void
3068 #endif
3069 					>
3070   { using type = simd<_Tp, _Ap>; };
3071 
3072 template <typename _Tp, typename = void>
3073   struct __safe_make_signed { using type = _Tp;};
3074 
3075 template <typename _Tp>
3076   struct __safe_make_signed<_Tp, enable_if_t<is_integral_v<_Tp>>>
3077   {
3078     // the extra make_unsigned_t is because of PR85951
3079     using type = make_signed_t<make_unsigned_t<_Tp>>;
3080   };
3081 
3082 template <typename _Tp>
3083   using safe_make_signed_t = typename __safe_make_signed<_Tp>::type;
3084 
3085 template <typename _Tp, typename _Up, typename _Ap>
3086   struct __static_simd_cast_return_type<_Tp, _Up, _Ap, false,
3087 #ifdef _GLIBCXX_SIMD_FIX_P2TS_ISSUE66
3088 					enable_if_t<__is_vectorizable_v<_Tp>>
3089 #else
3090 					void
3091 #endif
3092 					>
3093   {
3094     using type = conditional_t<
3095       (is_integral_v<_Up> && is_integral_v<_Tp> &&
3096 #ifndef _GLIBCXX_SIMD_FIX_P2TS_ISSUE65
3097        is_signed_v<_Up> != is_signed_v<_Tp> &&
3098 #endif
3099        is_same_v<safe_make_signed_t<_Up>, safe_make_signed_t<_Tp>>),
3100       simd<_Tp, _Ap>, fixed_size_simd<_Tp, simd_size_v<_Up, _Ap>>>;
3101   };
3102 
3103 template <typename _Tp, typename _Up, typename _Ap,
3104 	  typename _R
3105 	  = typename __static_simd_cast_return_type<_Tp, _Up, _Ap>::type>
3106   _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR _R
3107   static_simd_cast(const simd<_Up, _Ap>& __x)
3108   {
3109     if constexpr (is_same<_R, simd<_Up, _Ap>>::value)
3110       return __x;
3111     else
3112       {
3113 	_SimdConverter<_Up, _Ap, typename _R::value_type, typename _R::abi_type>
3114 	  __c;
3115 	return _R(__private_init, __c(__data(__x)));
3116       }
3117   }
3118 
3119 namespace __proposed {
3120 template <typename _Tp, typename _Up, typename _Ap,
3121 	  typename _R
3122 	  = typename __static_simd_cast_return_type<_Tp, _Up, _Ap>::type>
3123   _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR typename _R::mask_type
3124   static_simd_cast(const simd_mask<_Up, _Ap>& __x)
3125   {
3126     using _RM = typename _R::mask_type;
3127     return {__private_init, _RM::abi_type::_MaskImpl::template _S_convert<
3128 			      typename _RM::simd_type::value_type>(__x)};
3129   }
3130 
3131 template <typename _To, typename _Up, typename _Abi>
3132   _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR
3133   _To
3134   simd_bit_cast(const simd<_Up, _Abi>& __x)
3135   {
3136     using _Tp = typename _To::value_type;
3137     using _ToMember = typename _SimdTraits<_Tp, typename _To::abi_type>::_SimdMember;
3138     using _From = simd<_Up, _Abi>;
3139     using _FromMember = typename _SimdTraits<_Up, _Abi>::_SimdMember;
3140     // with concepts, the following should be constraints
3141     static_assert(sizeof(_To) == sizeof(_From));
3142     static_assert(is_trivially_copyable_v<_Tp> && is_trivially_copyable_v<_Up>);
3143     static_assert(is_trivially_copyable_v<_ToMember> && is_trivially_copyable_v<_FromMember>);
3144 #if __has_builtin(__builtin_bit_cast)
3145     return {__private_init, __builtin_bit_cast(_ToMember, __data(__x))};
3146 #else
3147     return {__private_init, __bit_cast<_ToMember>(__data(__x))};
3148 #endif
3149   }
3150 
3151 template <typename _To, typename _Up, typename _Abi>
3152   _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR
3153   _To
3154   simd_bit_cast(const simd_mask<_Up, _Abi>& __x)
3155   {
3156     using _From = simd_mask<_Up, _Abi>;
3157     static_assert(sizeof(_To) == sizeof(_From));
3158     static_assert(is_trivially_copyable_v<_From>);
3159     // _To can be simd<T, A>, specifically simd<T, fixed_size<N>> in which case _To is not trivially
3160     // copyable.
3161     if constexpr (is_simd_v<_To>)
3162       {
3163 	using _Tp = typename _To::value_type;
3164 	using _ToMember = typename _SimdTraits<_Tp, typename _To::abi_type>::_SimdMember;
3165 	static_assert(is_trivially_copyable_v<_ToMember>);
3166 #if __has_builtin(__builtin_bit_cast)
3167 	return {__private_init, __builtin_bit_cast(_ToMember, __x)};
3168 #else
3169 	return {__private_init, __bit_cast<_ToMember>(__x)};
3170 #endif
3171       }
3172     else
3173       {
3174 	static_assert(is_trivially_copyable_v<_To>);
3175 #if __has_builtin(__builtin_bit_cast)
3176 	return __builtin_bit_cast(_To, __x);
3177 #else
3178 	return __bit_cast<_To>(__x);
3179 #endif
3180       }
3181   }
3182 } // namespace __proposed
3183 
3184 // simd_cast {{{2
3185 template <typename _Tp, typename _Up, typename _Ap,
3186 	  typename _To = __value_type_or_identity_t<_Tp>>
3187   _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR auto
3188   simd_cast(const simd<_ValuePreserving<_Up, _To>, _Ap>& __x)
3189     -> decltype(static_simd_cast<_Tp>(__x))
3190   { return static_simd_cast<_Tp>(__x); }
3191 
3192 namespace __proposed {
3193 template <typename _Tp, typename _Up, typename _Ap,
3194 	  typename _To = __value_type_or_identity_t<_Tp>>
3195   _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR auto
3196   simd_cast(const simd_mask<_ValuePreserving<_Up, _To>, _Ap>& __x)
3197     -> decltype(static_simd_cast<_Tp>(__x))
3198   { return static_simd_cast<_Tp>(__x); }
3199 } // namespace __proposed
3200 
3201 // }}}2
3202 // resizing_simd_cast {{{
3203 namespace __proposed {
3204 /* Proposed spec:
3205 
3206 template <class T, class U, class Abi>
3207 T resizing_simd_cast(const simd<U, Abi>& x)
3208 
3209 p1  Constraints:
3210     - is_simd_v<T> is true and
3211     - T::value_type is the same type as U
3212 
3213 p2  Returns:
3214     A simd object with the i^th element initialized to x[i] for all i in the
3215     range of [0, min(T::size(), simd_size_v<U, Abi>)). If T::size() is larger
3216     than simd_size_v<U, Abi>, the remaining elements are value-initialized.
3217 
3218 template <class T, class U, class Abi>
3219 T resizing_simd_cast(const simd_mask<U, Abi>& x)
3220 
3221 p1  Constraints: is_simd_mask_v<T> is true
3222 
3223 p2  Returns:
3224     A simd_mask object with the i^th element initialized to x[i] for all i in
3225 the range of [0, min(T::size(), simd_size_v<U, Abi>)). If T::size() is larger
3226     than simd_size_v<U, Abi>, the remaining elements are initialized to false.
3227 
3228  */
3229 
3230 template <typename _Tp, typename _Up, typename _Ap>
3231   _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR enable_if_t<
3232   conjunction_v<is_simd<_Tp>, is_same<typename _Tp::value_type, _Up>>, _Tp>
3233   resizing_simd_cast(const simd<_Up, _Ap>& __x)
3234   {
3235     if constexpr (is_same_v<typename _Tp::abi_type, _Ap>)
3236       return __x;
3237     else if (__builtin_is_constant_evaluated())
3238       return _Tp([&](auto __i) constexpr {
3239 	       return __i < simd_size_v<_Up, _Ap> ? __x[__i] : _Up();
3240 	     });
3241     else if constexpr (simd_size_v<_Up, _Ap> == 1)
3242       {
3243 	_Tp __r{};
3244 	__r[0] = __x[0];
3245 	return __r;
3246       }
3247     else if constexpr (_Tp::size() == 1)
3248       return __x[0];
3249     else if constexpr (sizeof(_Tp) == sizeof(__x)
3250 		       && !__is_fixed_size_abi_v<_Ap>)
3251       return {__private_init,
3252 	      __vector_bitcast<typename _Tp::value_type, _Tp::size()>(
3253 		_Ap::_S_masked(__data(__x))._M_data)};
3254     else
3255       {
3256 	_Tp __r{};
3257 	__builtin_memcpy(&__data(__r), &__data(__x),
3258 			 sizeof(_Up)
3259 			   * std::min(_Tp::size(), simd_size_v<_Up, _Ap>));
3260 	return __r;
3261       }
3262   }
3263 
3264 template <typename _Tp, typename _Up, typename _Ap>
3265   _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR
3266   enable_if_t<is_simd_mask_v<_Tp>, _Tp>
3267   resizing_simd_cast(const simd_mask<_Up, _Ap>& __x)
3268   {
3269     return {__private_init, _Tp::abi_type::_MaskImpl::template _S_convert<
3270 			      typename _Tp::simd_type::value_type>(__x)};
3271   }
3272 } // namespace __proposed
3273 
3274 // }}}
3275 // to_fixed_size {{{2
3276 template <typename _Tp, int _Np>
3277   _GLIBCXX_SIMD_INTRINSIC fixed_size_simd<_Tp, _Np>
3278   to_fixed_size(const fixed_size_simd<_Tp, _Np>& __x)
3279   { return __x; }
3280 
3281 template <typename _Tp, int _Np>
3282   _GLIBCXX_SIMD_INTRINSIC fixed_size_simd_mask<_Tp, _Np>
3283   to_fixed_size(const fixed_size_simd_mask<_Tp, _Np>& __x)
3284   { return __x; }
3285 
3286 template <typename _Tp, typename _Ap>
3287   _GLIBCXX_SIMD_INTRINSIC fixed_size_simd<_Tp, simd_size_v<_Tp, _Ap>>
3288   to_fixed_size(const simd<_Tp, _Ap>& __x)
3289   {
3290     using _Rp = fixed_size_simd<_Tp, simd_size_v<_Tp, _Ap>>;
3291     return _Rp([&__x](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { return __x[__i]; });
3292   }
3293 
3294 template <typename _Tp, typename _Ap>
3295   _GLIBCXX_SIMD_INTRINSIC fixed_size_simd_mask<_Tp, simd_size_v<_Tp, _Ap>>
3296   to_fixed_size(const simd_mask<_Tp, _Ap>& __x)
3297   {
3298     return {__private_init,
3299 	    [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { return __x[__i]; }};
3300   }
3301 
3302 // to_native {{{2
3303 template <typename _Tp, int _Np>
3304   _GLIBCXX_SIMD_INTRINSIC
3305   enable_if_t<(_Np == native_simd<_Tp>::size()), native_simd<_Tp>>
3306   to_native(const fixed_size_simd<_Tp, _Np>& __x)
3307   {
3308     alignas(memory_alignment_v<native_simd<_Tp>>) _Tp __mem[_Np];
3309     __x.copy_to(__mem, vector_aligned);
3310     return {__mem, vector_aligned};
3311   }
3312 
3313 template <typename _Tp, int _Np>
3314   _GLIBCXX_SIMD_INTRINSIC
3315   enable_if_t<(_Np == native_simd_mask<_Tp>::size()), native_simd_mask<_Tp>>
3316   to_native(const fixed_size_simd_mask<_Tp, _Np>& __x)
3317   {
3318     return native_simd_mask<_Tp>(
3319 	     __private_init,
3320 	     [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { return __x[__i]; });
3321   }
3322 
3323 // to_compatible {{{2
3324 template <typename _Tp, int _Np>
3325   _GLIBCXX_SIMD_INTRINSIC enable_if_t<(_Np == simd<_Tp>::size()), simd<_Tp>>
3326   to_compatible(const simd<_Tp, simd_abi::fixed_size<_Np>>& __x)
3327   {
3328     alignas(memory_alignment_v<simd<_Tp>>) _Tp __mem[_Np];
3329     __x.copy_to(__mem, vector_aligned);
3330     return {__mem, vector_aligned};
3331   }
3332 
3333 template <typename _Tp, int _Np>
3334   _GLIBCXX_SIMD_INTRINSIC
3335   enable_if_t<(_Np == simd_mask<_Tp>::size()), simd_mask<_Tp>>
3336   to_compatible(const simd_mask<_Tp, simd_abi::fixed_size<_Np>>& __x)
3337   {
3338     return simd_mask<_Tp>(
3339 	     __private_init,
3340 	     [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { return __x[__i]; });
3341   }
3342 
3343 // masked assignment [simd_mask.where] {{{1
3344 
3345 // where_expression {{{1
3346 // const_where_expression<M, T> {{{2
3347 template <typename _M, typename _Tp>
3348   class const_where_expression
3349   {
3350     using _V = _Tp;
3351     static_assert(is_same_v<_V, __remove_cvref_t<_Tp>>);
3352 
3353     struct _Wrapper { using value_type = _V; };
3354 
3355   protected:
3356     using _Impl = typename _V::_Impl;
3357 
3358     using value_type =
3359       typename conditional_t<is_arithmetic_v<_V>, _Wrapper, _V>::value_type;
3360 
3361     _GLIBCXX_SIMD_INTRINSIC friend const _M&
3362     __get_mask(const const_where_expression& __x)
3363     { return __x._M_k; }
3364 
3365     _GLIBCXX_SIMD_INTRINSIC friend const _Tp&
3366     __get_lvalue(const const_where_expression& __x)
3367     { return __x._M_value; }
3368 
3369     const _M& _M_k;
3370     _Tp& _M_value;
3371 
3372   public:
3373     const_where_expression(const const_where_expression&) = delete;
3374 
3375     const_where_expression& operator=(const const_where_expression&) = delete;
3376 
3377     _GLIBCXX_SIMD_INTRINSIC constexpr
3378     const_where_expression(const _M& __kk, const _Tp& dd)
3379     : _M_k(__kk), _M_value(const_cast<_Tp&>(dd)) {}
3380 
3381     _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR _V
3382     operator-() const&&
3383     {
3384       return {__private_init,
3385 	      _Impl::template _S_masked_unary<negate>(__data(_M_k),
3386 						      __data(_M_value))};
3387     }
3388 
3389     template <typename _Up, typename _Flags>
3390       [[nodiscard]] _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR _V
3391       copy_from(const _LoadStorePtr<_Up, value_type>* __mem, _IsSimdFlagType<_Flags>) const&&
3392       {
3393 	return {__private_init,
3394 		_Impl::_S_masked_load(__data(_M_value), __data(_M_k),
3395 				      _Flags::template _S_apply<_V>(__mem))};
3396       }
3397 
3398     template <typename _Up, typename _Flags>
3399       _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR void
3400       copy_to(_LoadStorePtr<_Up, value_type>* __mem, _IsSimdFlagType<_Flags>) const&&
3401       {
3402 	_Impl::_S_masked_store(__data(_M_value),
3403 			       _Flags::template _S_apply<_V>(__mem),
3404 			       __data(_M_k));
3405       }
3406   };
3407 
3408 // const_where_expression<bool, T> {{{2
3409 template <typename _Tp>
3410   class const_where_expression<bool, _Tp>
3411   {
3412     using _M = bool;
3413     using _V = _Tp;
3414 
3415     static_assert(is_same_v<_V, __remove_cvref_t<_Tp>>);
3416 
3417     struct _Wrapper { using value_type = _V; };
3418 
3419   protected:
3420     using value_type
3421       = typename conditional_t<is_arithmetic_v<_V>, _Wrapper, _V>::value_type;
3422 
3423     _GLIBCXX_SIMD_INTRINSIC friend const _M&
3424     __get_mask(const const_where_expression& __x)
3425     { return __x._M_k; }
3426 
3427     _GLIBCXX_SIMD_INTRINSIC friend const _Tp&
3428     __get_lvalue(const const_where_expression& __x)
3429     { return __x._M_value; }
3430 
3431     const bool _M_k;
3432     _Tp& _M_value;
3433 
3434   public:
3435     const_where_expression(const const_where_expression&) = delete;
3436     const_where_expression& operator=(const const_where_expression&) = delete;
3437 
3438     _GLIBCXX_SIMD_INTRINSIC constexpr
3439     const_where_expression(const bool __kk, const _Tp& dd)
3440     : _M_k(__kk), _M_value(const_cast<_Tp&>(dd)) {}
3441 
3442     _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR _V
3443     operator-() const&&
3444     { return _M_k ? -_M_value : _M_value; }
3445 
3446     template <typename _Up, typename _Flags>
3447       [[nodiscard]] _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR _V
3448       copy_from(const _LoadStorePtr<_Up, value_type>* __mem, _IsSimdFlagType<_Flags>) const&&
3449       { return _M_k ? static_cast<_V>(__mem[0]) : _M_value; }
3450 
3451     template <typename _Up, typename _Flags>
3452       _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR void
3453       copy_to(_LoadStorePtr<_Up, value_type>* __mem, _IsSimdFlagType<_Flags>) const&&
3454       {
3455 	if (_M_k)
3456 	  __mem[0] = _M_value;
3457       }
3458   };
3459 
3460 // where_expression<M, T> {{{2
3461 template <typename _M, typename _Tp>
3462   class where_expression : public const_where_expression<_M, _Tp>
3463   {
3464     using _Impl = typename const_where_expression<_M, _Tp>::_Impl;
3465 
3466     static_assert(!is_const<_Tp>::value,
3467 		  "where_expression may only be instantiated with __a non-const "
3468 		  "_Tp parameter");
3469 
3470     using typename const_where_expression<_M, _Tp>::value_type;
3471     using const_where_expression<_M, _Tp>::_M_k;
3472     using const_where_expression<_M, _Tp>::_M_value;
3473 
3474     static_assert(
3475       is_same<typename _M::abi_type, typename _Tp::abi_type>::value, "");
3476     static_assert(_M::size() == _Tp::size(), "");
3477 
3478     _GLIBCXX_SIMD_INTRINSIC friend constexpr _Tp&
3479     __get_lvalue(where_expression& __x)
3480     { return __x._M_value; }
3481 
3482   public:
3483     where_expression(const where_expression&) = delete;
3484     where_expression& operator=(const where_expression&) = delete;
3485 
3486     _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR
3487     where_expression(const _M& __kk, _Tp& dd)
3488     : const_where_expression<_M, _Tp>(__kk, dd) {}
3489 
3490     template <typename _Up>
3491       _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR void
3492       operator=(_Up&& __x) &&
3493       {
3494 	_Impl::_S_masked_assign(__data(_M_k), __data(_M_value),
3495 				__to_value_type_or_member_type<_Tp>(
3496 				  static_cast<_Up&&>(__x)));
3497       }
3498 
3499 #define _GLIBCXX_SIMD_OP_(__op, __name)                                        \
3500   template <typename _Up>                                                      \
3501     _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR void                       \
3502     operator __op##=(_Up&& __x)&&                                              \
3503     {                                                                          \
3504       _Impl::template _S_masked_cassign(                                       \
3505 	__data(_M_k), __data(_M_value),                                        \
3506 	__to_value_type_or_member_type<_Tp>(static_cast<_Up&&>(__x)),          \
3507 	[](auto __impl, auto __lhs, auto __rhs)                                \
3508 	  constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA                         \
3509 	{ return __impl.__name(__lhs, __rhs); });                              \
3510     }                                                                          \
3511   static_assert(true)
3512     _GLIBCXX_SIMD_OP_(+, _S_plus);
3513     _GLIBCXX_SIMD_OP_(-, _S_minus);
3514     _GLIBCXX_SIMD_OP_(*, _S_multiplies);
3515     _GLIBCXX_SIMD_OP_(/, _S_divides);
3516     _GLIBCXX_SIMD_OP_(%, _S_modulus);
3517     _GLIBCXX_SIMD_OP_(&, _S_bit_and);
3518     _GLIBCXX_SIMD_OP_(|, _S_bit_or);
3519     _GLIBCXX_SIMD_OP_(^, _S_bit_xor);
3520     _GLIBCXX_SIMD_OP_(<<, _S_shift_left);
3521     _GLIBCXX_SIMD_OP_(>>, _S_shift_right);
3522 #undef _GLIBCXX_SIMD_OP_
3523 
3524     _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR void
3525     operator++() &&
3526     {
3527       __data(_M_value)
3528 	= _Impl::template _S_masked_unary<__increment>(__data(_M_k), __data(_M_value));
3529     }
3530 
3531     _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR void
3532     operator++(int) &&
3533     {
3534       __data(_M_value)
3535 	= _Impl::template _S_masked_unary<__increment>(__data(_M_k), __data(_M_value));
3536     }
3537 
3538     _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR void
3539     operator--() &&
3540     {
3541       __data(_M_value)
3542 	= _Impl::template _S_masked_unary<__decrement>(__data(_M_k), __data(_M_value));
3543     }
3544 
3545     _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR void
3546     operator--(int) &&
3547     {
3548       __data(_M_value)
3549 	= _Impl::template _S_masked_unary<__decrement>(__data(_M_k), __data(_M_value));
3550     }
3551 
3552     // intentionally hides const_where_expression::copy_from
3553     template <typename _Up, typename _Flags>
3554       _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR void
3555       copy_from(const _LoadStorePtr<_Up, value_type>* __mem, _IsSimdFlagType<_Flags>) &&
3556       {
3557 	__data(_M_value) = _Impl::_S_masked_load(__data(_M_value), __data(_M_k),
3558 						 _Flags::template _S_apply<_Tp>(__mem));
3559       }
3560   };
3561 
3562 // where_expression<bool, T> {{{2
3563 template <typename _Tp>
3564   class where_expression<bool, _Tp>
3565   : public const_where_expression<bool, _Tp>
3566   {
3567     using _M = bool;
3568     using typename const_where_expression<_M, _Tp>::value_type;
3569     using const_where_expression<_M, _Tp>::_M_k;
3570     using const_where_expression<_M, _Tp>::_M_value;
3571 
3572   public:
3573     where_expression(const where_expression&) = delete;
3574     where_expression& operator=(const where_expression&) = delete;
3575 
3576     _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR
3577     where_expression(const _M& __kk, _Tp& dd)
3578     : const_where_expression<_M, _Tp>(__kk, dd) {}
3579 
3580 #define _GLIBCXX_SIMD_OP_(__op)                                                \
3581     template <typename _Up>                                                    \
3582       _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR void                     \
3583       operator __op(_Up&& __x)&&                                               \
3584       { if (_M_k) _M_value __op static_cast<_Up&&>(__x); }
3585 
3586     _GLIBCXX_SIMD_OP_(=)
3587     _GLIBCXX_SIMD_OP_(+=)
3588     _GLIBCXX_SIMD_OP_(-=)
3589     _GLIBCXX_SIMD_OP_(*=)
3590     _GLIBCXX_SIMD_OP_(/=)
3591     _GLIBCXX_SIMD_OP_(%=)
3592     _GLIBCXX_SIMD_OP_(&=)
3593     _GLIBCXX_SIMD_OP_(|=)
3594     _GLIBCXX_SIMD_OP_(^=)
3595     _GLIBCXX_SIMD_OP_(<<=)
3596     _GLIBCXX_SIMD_OP_(>>=)
3597   #undef _GLIBCXX_SIMD_OP_
3598 
3599     _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR void
3600     operator++() &&
3601     { if (_M_k) ++_M_value; }
3602 
3603     _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR void
3604     operator++(int) &&
3605     { if (_M_k) ++_M_value; }
3606 
3607     _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR void
3608     operator--() &&
3609     { if (_M_k) --_M_value; }
3610 
3611     _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR void
3612     operator--(int) &&
3613     { if (_M_k) --_M_value; }
3614 
3615     // intentionally hides const_where_expression::copy_from
3616     template <typename _Up, typename _Flags>
3617       _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR void
3618       copy_from(const _LoadStorePtr<_Up, value_type>* __mem, _IsSimdFlagType<_Flags>) &&
3619       { if (_M_k) _M_value = __mem[0]; }
3620   };
3621 
3622 // where {{{1
3623 template <typename _Tp, typename _Ap>
3624   _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR
3625   where_expression<simd_mask<_Tp, _Ap>, simd<_Tp, _Ap>>
3626   where(const typename simd<_Tp, _Ap>::mask_type& __k, simd<_Tp, _Ap>& __value)
3627   { return {__k, __value}; }
3628 
3629 template <typename _Tp, typename _Ap>
3630   _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR
3631   const_where_expression<simd_mask<_Tp, _Ap>, simd<_Tp, _Ap>>
3632   where(const typename simd<_Tp, _Ap>::mask_type& __k, const simd<_Tp, _Ap>& __value)
3633   { return {__k, __value}; }
3634 
3635 template <typename _Tp, typename _Ap>
3636   _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR
3637   where_expression<simd_mask<_Tp, _Ap>, simd_mask<_Tp, _Ap>>
3638   where(const remove_const_t<simd_mask<_Tp, _Ap>>& __k, simd_mask<_Tp, _Ap>& __value)
3639   { return {__k, __value}; }
3640 
3641 template <typename _Tp, typename _Ap>
3642   _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR
3643   const_where_expression<simd_mask<_Tp, _Ap>, simd_mask<_Tp, _Ap>>
3644   where(const remove_const_t<simd_mask<_Tp, _Ap>>& __k, const simd_mask<_Tp, _Ap>& __value)
3645   { return {__k, __value}; }
3646 
3647 template <typename _Tp>
3648   _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR where_expression<bool, _Tp>
3649   where(_ExactBool __k, _Tp& __value)
3650   { return {__k, __value}; }
3651 
3652 template <typename _Tp>
3653   _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR const_where_expression<bool, _Tp>
3654   where(_ExactBool __k, const _Tp& __value)
3655   { return {__k, __value}; }
3656 
3657 template <typename _Tp, typename _Ap>
3658   _GLIBCXX_SIMD_CONSTEXPR void
3659   where(bool __k, simd<_Tp, _Ap>& __value) = delete;
3660 
3661 template <typename _Tp, typename _Ap>
3662   _GLIBCXX_SIMD_CONSTEXPR void
3663   where(bool __k, const simd<_Tp, _Ap>& __value) = delete;
3664 
3665 // proposed mask iterations {{{1
3666 namespace __proposed {
3667 template <size_t _Np>
3668   class where_range
3669   {
3670     const bitset<_Np> __bits;
3671 
3672   public:
3673     where_range(bitset<_Np> __b) : __bits(__b) {}
3674 
3675     class iterator
3676     {
3677       size_t __mask;
3678       size_t __bit;
3679 
3680       _GLIBCXX_SIMD_INTRINSIC void
3681       __next_bit()
3682       { __bit = __builtin_ctzl(__mask); }
3683 
3684       _GLIBCXX_SIMD_INTRINSIC void
3685       __reset_lsb()
3686       {
3687 	// 01100100 - 1 = 01100011
3688 	__mask &= (__mask - 1);
3689 	// __asm__("btr %1,%0" : "+r"(__mask) : "r"(__bit));
3690       }
3691 
3692     public:
3693       iterator(decltype(__mask) __m) : __mask(__m) { __next_bit(); }
3694       iterator(const iterator&) = default;
3695       iterator(iterator&&) = default;
3696 
3697       _GLIBCXX_SIMD_ALWAYS_INLINE size_t
3698       operator->() const
3699       { return __bit; }
3700 
3701       _GLIBCXX_SIMD_ALWAYS_INLINE size_t
3702       operator*() const
3703       { return __bit; }
3704 
3705       _GLIBCXX_SIMD_ALWAYS_INLINE iterator&
3706       operator++()
3707       {
3708 	__reset_lsb();
3709 	__next_bit();
3710 	return *this;
3711       }
3712 
3713       _GLIBCXX_SIMD_ALWAYS_INLINE iterator
3714       operator++(int)
3715       {
3716 	iterator __tmp = *this;
3717 	__reset_lsb();
3718 	__next_bit();
3719 	return __tmp;
3720       }
3721 
3722       _GLIBCXX_SIMD_ALWAYS_INLINE bool
3723       operator==(const iterator& __rhs) const
3724       { return __mask == __rhs.__mask; }
3725 
3726       _GLIBCXX_SIMD_ALWAYS_INLINE bool
3727       operator!=(const iterator& __rhs) const
3728       { return __mask != __rhs.__mask; }
3729     };
3730 
3731     iterator
3732     begin() const
3733     { return __bits.to_ullong(); }
3734 
3735     iterator
3736     end() const
3737     { return 0; }
3738   };
3739 
3740 template <typename _Tp, typename _Ap>
3741   where_range<simd_size_v<_Tp, _Ap>>
3742   where(const simd_mask<_Tp, _Ap>& __k)
3743   { return __k.__to_bitset(); }
3744 
3745 } // namespace __proposed
3746 
3747 // }}}1
3748 // reductions [simd.reductions] {{{1
3749 template <typename _Tp, typename _Abi, typename _BinaryOperation = plus<>>
3750   _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR _Tp
3751   reduce(const simd<_Tp, _Abi>& __v, _BinaryOperation __binary_op = _BinaryOperation())
3752   { return _Abi::_SimdImpl::_S_reduce(__v, __binary_op); }
3753 
3754 template <typename _M, typename _V, typename _BinaryOperation = plus<>>
3755   _GLIBCXX_SIMD_INTRINSIC typename _V::value_type
3756   reduce(const const_where_expression<_M, _V>& __x,
3757 	 typename _V::value_type __identity_element, _BinaryOperation __binary_op)
3758   {
3759     if (__builtin_expect(none_of(__get_mask(__x)), false))
3760       return __identity_element;
3761 
3762     _V __tmp = __identity_element;
3763     _V::_Impl::_S_masked_assign(__data(__get_mask(__x)), __data(__tmp),
3764 				__data(__get_lvalue(__x)));
3765     return reduce(__tmp, __binary_op);
3766   }
3767 
3768 template <typename _M, typename _V>
3769   _GLIBCXX_SIMD_INTRINSIC typename _V::value_type
3770   reduce(const const_where_expression<_M, _V>& __x, plus<> __binary_op = {})
3771   { return reduce(__x, 0, __binary_op); }
3772 
3773 template <typename _M, typename _V>
3774   _GLIBCXX_SIMD_INTRINSIC typename _V::value_type
3775   reduce(const const_where_expression<_M, _V>& __x, multiplies<> __binary_op)
3776   { return reduce(__x, 1, __binary_op); }
3777 
3778 template <typename _M, typename _V>
3779   _GLIBCXX_SIMD_INTRINSIC typename _V::value_type
3780   reduce(const const_where_expression<_M, _V>& __x, bit_and<> __binary_op)
3781   { return reduce(__x, ~typename _V::value_type(), __binary_op); }
3782 
3783 template <typename _M, typename _V>
3784   _GLIBCXX_SIMD_INTRINSIC typename _V::value_type
3785   reduce(const const_where_expression<_M, _V>& __x, bit_or<> __binary_op)
3786   { return reduce(__x, 0, __binary_op); }
3787 
3788 template <typename _M, typename _V>
3789   _GLIBCXX_SIMD_INTRINSIC typename _V::value_type
3790   reduce(const const_where_expression<_M, _V>& __x, bit_xor<> __binary_op)
3791   { return reduce(__x, 0, __binary_op); }
3792 
3793 template <typename _Tp, typename _Abi>
3794   _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR _Tp
3795   hmin(const simd<_Tp, _Abi>& __v) noexcept
3796   { return _Abi::_SimdImpl::_S_reduce(__v, __detail::_Minimum()); }
3797 
3798 template <typename _Tp, typename _Abi>
3799   _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR _Tp
3800   hmax(const simd<_Tp, _Abi>& __v) noexcept
3801   { return _Abi::_SimdImpl::_S_reduce(__v, __detail::_Maximum()); }
3802 
3803 template <typename _M, typename _V>
3804   _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR
3805   typename _V::value_type
3806   hmin(const const_where_expression<_M, _V>& __x) noexcept
3807   {
3808     using _Tp = typename _V::value_type;
3809     constexpr _Tp __id_elem =
3810 #ifdef __FINITE_MATH_ONLY__
3811       __finite_max_v<_Tp>;
3812 #else
3813       __value_or<__infinity, _Tp>(__finite_max_v<_Tp>);
3814 #endif
3815     _V __tmp = __id_elem;
3816     _V::_Impl::_S_masked_assign(__data(__get_mask(__x)), __data(__tmp),
3817 				__data(__get_lvalue(__x)));
3818     return _V::abi_type::_SimdImpl::_S_reduce(__tmp, __detail::_Minimum());
3819   }
3820 
3821 template <typename _M, typename _V>
3822   _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR
3823   typename _V::value_type
3824   hmax(const const_where_expression<_M, _V>& __x) noexcept
3825   {
3826     using _Tp = typename _V::value_type;
3827     constexpr _Tp __id_elem =
3828 #ifdef __FINITE_MATH_ONLY__
3829       __finite_min_v<_Tp>;
3830 #else
3831       [] {
3832 	if constexpr (__value_exists_v<__infinity, _Tp>)
3833 	  return -__infinity_v<_Tp>;
3834 	else
3835 	  return __finite_min_v<_Tp>;
3836       }();
3837 #endif
3838     _V __tmp = __id_elem;
3839     _V::_Impl::_S_masked_assign(__data(__get_mask(__x)), __data(__tmp),
3840 				__data(__get_lvalue(__x)));
3841     return _V::abi_type::_SimdImpl::_S_reduce(__tmp, __detail::_Maximum());
3842   }
3843 
3844 // }}}1
3845 // algorithms [simd.alg] {{{
3846 template <typename _Tp, typename _Ap>
3847   _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR simd<_Tp, _Ap>
3848   min(const simd<_Tp, _Ap>& __a, const simd<_Tp, _Ap>& __b)
3849   { return {__private_init, _Ap::_SimdImpl::_S_min(__data(__a), __data(__b))}; }
3850 
3851 template <typename _Tp, typename _Ap>
3852   _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR simd<_Tp, _Ap>
3853   max(const simd<_Tp, _Ap>& __a, const simd<_Tp, _Ap>& __b)
3854   { return {__private_init, _Ap::_SimdImpl::_S_max(__data(__a), __data(__b))}; }
3855 
3856 template <typename _Tp, typename _Ap>
3857   _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR
3858   pair<simd<_Tp, _Ap>, simd<_Tp, _Ap>>
3859   minmax(const simd<_Tp, _Ap>& __a, const simd<_Tp, _Ap>& __b)
3860   {
3861     const auto pair_of_members
3862       = _Ap::_SimdImpl::_S_minmax(__data(__a), __data(__b));
3863     return {simd<_Tp, _Ap>(__private_init, pair_of_members.first),
3864 	    simd<_Tp, _Ap>(__private_init, pair_of_members.second)};
3865   }
3866 
3867 template <typename _Tp, typename _Ap>
3868   _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR simd<_Tp, _Ap>
3869   clamp(const simd<_Tp, _Ap>& __v, const simd<_Tp, _Ap>& __lo, const simd<_Tp, _Ap>& __hi)
3870   {
3871     using _Impl = typename _Ap::_SimdImpl;
3872     return {__private_init,
3873 	    _Impl::_S_min(__data(__hi),
3874 			  _Impl::_S_max(__data(__lo), __data(__v)))};
3875   }
3876 
3877 // }}}
3878 
3879 template <size_t... _Sizes, typename _Tp, typename _Ap,
3880 	  typename = enable_if_t<((_Sizes + ...) == simd<_Tp, _Ap>::size())>>
3881   inline tuple<simd<_Tp, simd_abi::deduce_t<_Tp, _Sizes>>...>
3882   split(const simd<_Tp, _Ap>&);
3883 
3884 // __extract_part {{{
3885 template <int _Index, int _Total, int _Combine = 1, typename _Tp, size_t _Np>
3886   _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_CONST constexpr
3887   _SimdWrapper<_Tp, _Np / _Total * _Combine>
3888   __extract_part(const _SimdWrapper<_Tp, _Np> __x);
3889 
3890 template <int _Index, int _Parts, int _Combine = 1, typename _Tp, typename _A0, typename... _As>
3891   _GLIBCXX_SIMD_INTRINSIC constexpr auto
3892   __extract_part(const _SimdTuple<_Tp, _A0, _As...>& __x);
3893 
3894 // }}}
3895 // _SizeList {{{
3896 template <size_t _V0, size_t... _Values>
3897   struct _SizeList
3898   {
3899     template <size_t _I>
3900       static constexpr size_t
3901       _S_at(_SizeConstant<_I> = {})
3902       {
3903 	if constexpr (_I == 0)
3904 	  return _V0;
3905 	else
3906 	  return _SizeList<_Values...>::template _S_at<_I - 1>();
3907       }
3908 
3909     template <size_t _I>
3910       static constexpr auto
3911       _S_before(_SizeConstant<_I> = {})
3912       {
3913 	if constexpr (_I == 0)
3914 	  return _SizeConstant<0>();
3915 	else
3916 	  return _SizeConstant<
3917 	    _V0 + _SizeList<_Values...>::template _S_before<_I - 1>()>();
3918       }
3919 
3920     template <size_t _Np>
3921       static constexpr auto
3922       _S_pop_front(_SizeConstant<_Np> = {})
3923       {
3924 	if constexpr (_Np == 0)
3925 	  return _SizeList();
3926 	else
3927 	  return _SizeList<_Values...>::template _S_pop_front<_Np - 1>();
3928       }
3929   };
3930 
3931 // }}}
3932 // __extract_center {{{
3933 template <typename _Tp, size_t _Np>
3934   _GLIBCXX_SIMD_INTRINSIC _SimdWrapper<_Tp, _Np / 2>
3935   __extract_center(_SimdWrapper<_Tp, _Np> __x)
3936   {
3937     static_assert(_Np >= 4);
3938     static_assert(_Np % 4 == 0); // x0 - x1 - x2 - x3 -> return {x1, x2}
3939 #if _GLIBCXX_SIMD_X86INTRIN    // {{{
3940     if constexpr (__have_avx512f && sizeof(_Tp) * _Np == 64)
3941       {
3942 	const auto __intrin = __to_intrin(__x);
3943 	if constexpr (is_integral_v<_Tp>)
3944 	  return __vector_bitcast<_Tp>(_mm512_castsi512_si256(
3945 	    _mm512_shuffle_i32x4(__intrin, __intrin,
3946 				 1 + 2 * 0x4 + 2 * 0x10 + 3 * 0x40)));
3947 	else if constexpr (sizeof(_Tp) == 4)
3948 	  return __vector_bitcast<_Tp>(_mm512_castps512_ps256(
3949 	    _mm512_shuffle_f32x4(__intrin, __intrin,
3950 				 1 + 2 * 0x4 + 2 * 0x10 + 3 * 0x40)));
3951 	else if constexpr (sizeof(_Tp) == 8)
3952 	  return __vector_bitcast<_Tp>(_mm512_castpd512_pd256(
3953 	    _mm512_shuffle_f64x2(__intrin, __intrin,
3954 				 1 + 2 * 0x4 + 2 * 0x10 + 3 * 0x40)));
3955 	else
3956 	  __assert_unreachable<_Tp>();
3957       }
3958     else if constexpr (sizeof(_Tp) * _Np == 32 && is_floating_point_v<_Tp>)
3959       return __vector_bitcast<_Tp>(
3960 	_mm_shuffle_pd(__lo128(__vector_bitcast<double>(__x)),
3961 		       __hi128(__vector_bitcast<double>(__x)), 1));
3962     else if constexpr (sizeof(__x) == 32 && sizeof(_Tp) * _Np <= 32)
3963       return __vector_bitcast<_Tp>(
3964 	_mm_alignr_epi8(__hi128(__vector_bitcast<_LLong>(__x)),
3965 			__lo128(__vector_bitcast<_LLong>(__x)),
3966 			sizeof(_Tp) * _Np / 4));
3967     else
3968 #endif // _GLIBCXX_SIMD_X86INTRIN }}}
3969       {
3970 	__vector_type_t<_Tp, _Np / 2> __r;
3971 	__builtin_memcpy(&__r,
3972 			 reinterpret_cast<const char*>(&__x)
3973 			   + sizeof(_Tp) * _Np / 4,
3974 			 sizeof(_Tp) * _Np / 2);
3975 	return __r;
3976       }
3977   }
3978 
3979 template <typename _Tp, typename _A0, typename... _As>
3980   _GLIBCXX_SIMD_INTRINSIC
3981   _SimdWrapper<_Tp, _SimdTuple<_Tp, _A0, _As...>::_S_size() / 2>
3982   __extract_center(const _SimdTuple<_Tp, _A0, _As...>& __x)
3983   {
3984     if constexpr (sizeof...(_As) == 0)
3985       return __extract_center(__x.first);
3986     else
3987       return __extract_part<1, 4, 2>(__x);
3988   }
3989 
3990 // }}}
3991 // __split_wrapper {{{
3992 template <size_t... _Sizes, typename _Tp, typename... _As>
3993   auto
3994   __split_wrapper(_SizeList<_Sizes...>, const _SimdTuple<_Tp, _As...>& __x)
3995   {
3996     return split<_Sizes...>(
3997       fixed_size_simd<_Tp, _SimdTuple<_Tp, _As...>::_S_size()>(__private_init,
3998 							       __x));
3999   }
4000 
4001 // }}}
4002 
4003 // split<simd>(simd) {{{
4004 template <typename _V, typename _Ap,
4005 	  size_t _Parts = simd_size_v<typename _V::value_type, _Ap> / _V::size()>
4006   enable_if_t<simd_size_v<typename _V::value_type, _Ap> == _Parts * _V::size()
4007 		&& is_simd_v<_V>, array<_V, _Parts>>
4008   split(const simd<typename _V::value_type, _Ap>& __x)
4009   {
4010     using _Tp = typename _V::value_type;
4011     if constexpr (_Parts == 1)
4012       {
4013 	return {simd_cast<_V>(__x)};
4014       }
4015     else if (__x._M_is_constprop())
4016       {
4017 	return __generate_from_n_evaluations<_Parts, array<_V, _Parts>>(
4018 		 [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
4019 		   return _V([&](auto __j) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA
4020 			     { return __x[__i * _V::size() + __j]; });
4021 		 });
4022       }
4023     else if constexpr (
4024       __is_fixed_size_abi_v<_Ap>
4025       && (is_same_v<typename _V::abi_type, simd_abi::scalar>
4026 	|| (__is_fixed_size_abi_v<typename _V::abi_type>
4027 	  && sizeof(_V) == sizeof(_Tp) * _V::size() // _V doesn't have padding
4028 	  )))
4029       {
4030 	// fixed_size -> fixed_size (w/o padding) or scalar
4031 #ifdef _GLIBCXX_SIMD_USE_ALIASING_LOADS
4032       const __may_alias<_Tp>* const __element_ptr
4033 	= reinterpret_cast<const __may_alias<_Tp>*>(&__data(__x));
4034       return __generate_from_n_evaluations<_Parts, array<_V, _Parts>>(
4035 	       [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA
4036 	       { return _V(__element_ptr + __i * _V::size(), vector_aligned); });
4037 #else
4038       const auto& __xx = __data(__x);
4039       return __generate_from_n_evaluations<_Parts, array<_V, _Parts>>(
4040 	       [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
4041 		 [[maybe_unused]] constexpr size_t __offset
4042 		   = decltype(__i)::value * _V::size();
4043 		 return _V([&](auto __j) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
4044 			  constexpr _SizeConstant<__j + __offset> __k;
4045 			  return __xx[__k];
4046 			});
4047 	       });
4048 #endif
4049     }
4050   else if constexpr (is_same_v<typename _V::abi_type, simd_abi::scalar>)
4051     {
4052       // normally memcpy should work here as well
4053       return __generate_from_n_evaluations<_Parts, array<_V, _Parts>>(
4054 	       [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { return __x[__i]; });
4055     }
4056   else
4057     {
4058       return __generate_from_n_evaluations<_Parts, array<_V, _Parts>>(
4059 	       [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
4060 		 if constexpr (__is_fixed_size_abi_v<typename _V::abi_type>)
4061 		   return _V([&](auto __j) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
4062 			    return __x[__i * _V::size() + __j];
4063 			  });
4064 		 else
4065 		   return _V(__private_init,
4066 			     __extract_part<decltype(__i)::value, _Parts>(__data(__x)));
4067 	       });
4068     }
4069   }
4070 
4071 // }}}
4072 // split<simd_mask>(simd_mask) {{{
4073 template <typename _V, typename _Ap,
4074 	  size_t _Parts = simd_size_v<typename _V::simd_type::value_type, _Ap> / _V::size()>
4075   enable_if_t<is_simd_mask_v<_V> && simd_size_v<typename
4076     _V::simd_type::value_type, _Ap> == _Parts * _V::size(), array<_V, _Parts>>
4077   split(const simd_mask<typename _V::simd_type::value_type, _Ap>& __x)
4078   {
4079     if constexpr (is_same_v<_Ap, typename _V::abi_type>)
4080       return {__x};
4081     else if constexpr (_Parts == 1)
4082       return {__proposed::static_simd_cast<_V>(__x)};
4083     else if constexpr (_Parts == 2 && __is_sse_abi<typename _V::abi_type>()
4084 		       && __is_avx_abi<_Ap>())
4085       return {_V(__private_init, __lo128(__data(__x))),
4086 	      _V(__private_init, __hi128(__data(__x)))};
4087     else if constexpr (_V::size() <= __CHAR_BIT__ * sizeof(_ULLong))
4088       {
4089 	const bitset __bits = __x.__to_bitset();
4090 	return __generate_from_n_evaluations<_Parts, array<_V, _Parts>>(
4091 		 [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
4092 		   constexpr size_t __offset = __i * _V::size();
4093 		   return _V(__bitset_init, (__bits >> __offset).to_ullong());
4094 		 });
4095       }
4096     else
4097       {
4098 	return __generate_from_n_evaluations<_Parts, array<_V, _Parts>>(
4099 		 [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
4100 		   constexpr size_t __offset = __i * _V::size();
4101 		   return _V(__private_init,
4102 			     [&](auto __j) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
4103 			       return __x[__j + __offset];
4104 			     });
4105 		 });
4106       }
4107   }
4108 
4109 // }}}
4110 // split<_Sizes...>(simd) {{{
4111 template <size_t... _Sizes, typename _Tp, typename _Ap, typename>
4112   _GLIBCXX_SIMD_ALWAYS_INLINE
4113   tuple<simd<_Tp, simd_abi::deduce_t<_Tp, _Sizes>>...>
4114   split(const simd<_Tp, _Ap>& __x)
4115   {
4116     using _SL = _SizeList<_Sizes...>;
4117     using _Tuple = tuple<__deduced_simd<_Tp, _Sizes>...>;
4118     constexpr size_t _Np = simd_size_v<_Tp, _Ap>;
4119     constexpr size_t _N0 = _SL::template _S_at<0>();
4120     using _V = __deduced_simd<_Tp, _N0>;
4121 
4122     if (__x._M_is_constprop())
4123       return __generate_from_n_evaluations<sizeof...(_Sizes), _Tuple>(
4124 	       [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
4125 		 using _Vi = __deduced_simd<_Tp, _SL::_S_at(__i)>;
4126 		 constexpr size_t __offset = _SL::_S_before(__i);
4127 		 return _Vi([&](auto __j) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
4128 			  return __x[__offset + __j];
4129 			});
4130 	       });
4131     else if constexpr (_Np == _N0)
4132       {
4133 	static_assert(sizeof...(_Sizes) == 1);
4134 	return {simd_cast<_V>(__x)};
4135       }
4136     else if constexpr // split from fixed_size, such that __x::first.size == _N0
4137       (__is_fixed_size_abi_v<
4138 	 _Ap> && __fixed_size_storage_t<_Tp, _Np>::_S_first_size == _N0)
4139       {
4140 	static_assert(
4141 	  !__is_fixed_size_abi_v<typename _V::abi_type>,
4142 	  "How can <_Tp, _Np> be __a single _SimdTuple entry but __a "
4143 	  "fixed_size_simd "
4144 	  "when deduced?");
4145 	// extract first and recurse (__split_wrapper is needed to deduce a new
4146 	// _Sizes pack)
4147 	return tuple_cat(make_tuple(_V(__private_init, __data(__x).first)),
4148 			 __split_wrapper(_SL::template _S_pop_front<1>(),
4149 					 __data(__x).second));
4150       }
4151     else if constexpr ((!is_same_v<simd_abi::scalar,
4152 				   simd_abi::deduce_t<_Tp, _Sizes>> && ...)
4153 		       && (!__is_fixed_size_abi_v<
4154 			     simd_abi::deduce_t<_Tp, _Sizes>> && ...))
4155       {
4156 	if constexpr (((_Sizes * 2 == _Np) && ...))
4157 	  return {{__private_init, __extract_part<0, 2>(__data(__x))},
4158 		  {__private_init, __extract_part<1, 2>(__data(__x))}};
4159 	else if constexpr (is_same_v<_SizeList<_Sizes...>,
4160 				     _SizeList<_Np / 3, _Np / 3, _Np / 3>>)
4161 	  return {{__private_init, __extract_part<0, 3>(__data(__x))},
4162 		  {__private_init, __extract_part<1, 3>(__data(__x))},
4163 		  {__private_init, __extract_part<2, 3>(__data(__x))}};
4164 	else if constexpr (is_same_v<_SizeList<_Sizes...>,
4165 				     _SizeList<2 * _Np / 3, _Np / 3>>)
4166 	  return {{__private_init, __extract_part<0, 3, 2>(__data(__x))},
4167 		  {__private_init, __extract_part<2, 3>(__data(__x))}};
4168 	else if constexpr (is_same_v<_SizeList<_Sizes...>,
4169 				     _SizeList<_Np / 3, 2 * _Np / 3>>)
4170 	  return {{__private_init, __extract_part<0, 3>(__data(__x))},
4171 		  {__private_init, __extract_part<1, 3, 2>(__data(__x))}};
4172 	else if constexpr (is_same_v<_SizeList<_Sizes...>,
4173 				     _SizeList<_Np / 2, _Np / 4, _Np / 4>>)
4174 	  return {{__private_init, __extract_part<0, 2>(__data(__x))},
4175 		  {__private_init, __extract_part<2, 4>(__data(__x))},
4176 		  {__private_init, __extract_part<3, 4>(__data(__x))}};
4177 	else if constexpr (is_same_v<_SizeList<_Sizes...>,
4178 				     _SizeList<_Np / 4, _Np / 4, _Np / 2>>)
4179 	  return {{__private_init, __extract_part<0, 4>(__data(__x))},
4180 		  {__private_init, __extract_part<1, 4>(__data(__x))},
4181 		  {__private_init, __extract_part<1, 2>(__data(__x))}};
4182 	else if constexpr (is_same_v<_SizeList<_Sizes...>,
4183 				     _SizeList<_Np / 4, _Np / 2, _Np / 4>>)
4184 	  return {{__private_init, __extract_part<0, 4>(__data(__x))},
4185 		  {__private_init, __extract_center(__data(__x))},
4186 		  {__private_init, __extract_part<3, 4>(__data(__x))}};
4187 	else if constexpr (((_Sizes * 4 == _Np) && ...))
4188 	  return {{__private_init, __extract_part<0, 4>(__data(__x))},
4189 		  {__private_init, __extract_part<1, 4>(__data(__x))},
4190 		  {__private_init, __extract_part<2, 4>(__data(__x))},
4191 		  {__private_init, __extract_part<3, 4>(__data(__x))}};
4192 	// else fall through
4193       }
4194 #ifdef _GLIBCXX_SIMD_USE_ALIASING_LOADS
4195     const __may_alias<_Tp>* const __element_ptr
4196       = reinterpret_cast<const __may_alias<_Tp>*>(&__x);
4197     return __generate_from_n_evaluations<sizeof...(_Sizes), _Tuple>(
4198 	     [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
4199 	       using _Vi = __deduced_simd<_Tp, _SL::_S_at(__i)>;
4200 	       constexpr size_t __offset = _SL::_S_before(__i);
4201 	       constexpr size_t __base_align = alignof(simd<_Tp, _Ap>);
4202 	       constexpr size_t __a
4203 		 = __base_align - ((__offset * sizeof(_Tp)) % __base_align);
4204 	       constexpr size_t __b = ((__a - 1) & __a) ^ __a;
4205 	       constexpr size_t __alignment = __b == 0 ? __a : __b;
4206 	       return _Vi(__element_ptr + __offset, overaligned<__alignment>);
4207 	     });
4208 #else
4209     return __generate_from_n_evaluations<sizeof...(_Sizes), _Tuple>(
4210 	     [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
4211 	       using _Vi = __deduced_simd<_Tp, _SL::_S_at(__i)>;
4212 	       const auto& __xx = __data(__x);
4213 	       using _Offset = decltype(_SL::_S_before(__i));
4214 	       return _Vi([&](auto __j) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
4215 			constexpr _SizeConstant<_Offset::value + __j> __k;
4216 			return __xx[__k];
4217 		      });
4218 	     });
4219 #endif
4220   }
4221 
4222 // }}}
4223 
4224 // __subscript_in_pack {{{
4225 template <size_t _I, typename _Tp, typename _Ap, typename... _As>
4226   _GLIBCXX_SIMD_INTRINSIC constexpr _Tp
4227   __subscript_in_pack(const simd<_Tp, _Ap>& __x, const simd<_Tp, _As>&... __xs)
4228   {
4229     if constexpr (_I < simd_size_v<_Tp, _Ap>)
4230       return __x[_I];
4231     else
4232       return __subscript_in_pack<_I - simd_size_v<_Tp, _Ap>>(__xs...);
4233   }
4234 
4235 // }}}
4236 // __store_pack_of_simd {{{
4237 template <typename _Tp, typename _A0, typename... _As>
4238   _GLIBCXX_SIMD_INTRINSIC void
4239   __store_pack_of_simd(char* __mem, const simd<_Tp, _A0>& __x0, const simd<_Tp, _As>&... __xs)
4240   {
4241     constexpr size_t __n_bytes = sizeof(_Tp) * simd_size_v<_Tp, _A0>;
4242     __builtin_memcpy(__mem, &__data(__x0), __n_bytes);
4243     if constexpr (sizeof...(__xs) > 0)
4244       __store_pack_of_simd(__mem + __n_bytes, __xs...);
4245   }
4246 
4247 // }}}
4248 // concat(simd...) {{{
4249 template <typename _Tp, typename... _As, typename = __detail::__odr_helper>
4250   inline _GLIBCXX_SIMD_CONSTEXPR
4251   simd<_Tp, simd_abi::deduce_t<_Tp, (simd_size_v<_Tp, _As> + ...)>>
4252   concat(const simd<_Tp, _As>&... __xs)
4253   {
4254     using _Rp = __deduced_simd<_Tp, (simd_size_v<_Tp, _As> + ...)>;
4255     if constexpr (sizeof...(__xs) == 1)
4256       return simd_cast<_Rp>(__xs...);
4257     else if ((... && __xs._M_is_constprop()))
4258       return simd<_Tp,
4259 		  simd_abi::deduce_t<_Tp, (simd_size_v<_Tp, _As> + ...)>>(
4260 	       [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA
4261 	       { return __subscript_in_pack<__i>(__xs...); });
4262     else
4263       {
4264 	_Rp __r{};
4265 	__store_pack_of_simd(reinterpret_cast<char*>(&__data(__r)), __xs...);
4266 	return __r;
4267       }
4268   }
4269 
4270 // }}}
4271 // concat(array<simd>) {{{
4272 template <typename _Tp, typename _Abi, size_t _Np>
4273   _GLIBCXX_SIMD_ALWAYS_INLINE
4274   _GLIBCXX_SIMD_CONSTEXPR __deduced_simd<_Tp, simd_size_v<_Tp, _Abi> * _Np>
4275   concat(const array<simd<_Tp, _Abi>, _Np>& __x)
4276   {
4277     return __call_with_subscripts<_Np>(
4278 	     __x, [](const auto&... __xs) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
4279 	       return concat(__xs...);
4280 	     });
4281   }
4282 
4283 // }}}
4284 
4285 /// @cond undocumented
4286 // _SmartReference {{{
4287 template <typename _Up, typename _Accessor = _Up,
4288 	  typename _ValueType = typename _Up::value_type>
4289   class _SmartReference
4290   {
4291     friend _Accessor;
4292     int _M_index;
4293     _Up& _M_obj;
4294 
4295     _GLIBCXX_SIMD_INTRINSIC constexpr _ValueType
4296     _M_read() const noexcept
4297     {
4298       if constexpr (is_arithmetic_v<_Up>)
4299 	return _M_obj;
4300       else
4301 	return _M_obj[_M_index];
4302     }
4303 
4304     template <typename _Tp>
4305       _GLIBCXX_SIMD_INTRINSIC constexpr void
4306       _M_write(_Tp&& __x) const
4307       { _Accessor::_S_set(_M_obj, _M_index, static_cast<_Tp&&>(__x)); }
4308 
4309   public:
4310     _GLIBCXX_SIMD_INTRINSIC constexpr
4311     _SmartReference(_Up& __o, int __i) noexcept
4312     : _M_index(__i), _M_obj(__o) {}
4313 
4314     using value_type = _ValueType;
4315 
4316     _GLIBCXX_SIMD_INTRINSIC
4317     _SmartReference(const _SmartReference&) = delete;
4318 
4319     _GLIBCXX_SIMD_INTRINSIC constexpr
4320     operator value_type() const noexcept
4321     { return _M_read(); }
4322 
4323     template <typename _Tp, typename = _ValuePreservingOrInt<__remove_cvref_t<_Tp>, value_type>>
4324       _GLIBCXX_SIMD_INTRINSIC constexpr _SmartReference
4325       operator=(_Tp&& __x) &&
4326       {
4327 	_M_write(static_cast<_Tp&&>(__x));
4328 	return {_M_obj, _M_index};
4329       }
4330 
4331 #define _GLIBCXX_SIMD_OP_(__op)                                                   \
4332     template <typename _Tp,                                                       \
4333 	      typename _TT = decltype(declval<value_type>() __op declval<_Tp>()), \
4334 	      typename = _ValuePreservingOrInt<__remove_cvref_t<_Tp>, _TT>,       \
4335 	      typename = _ValuePreservingOrInt<_TT, value_type>>                  \
4336       _GLIBCXX_SIMD_INTRINSIC constexpr _SmartReference                           \
4337       operator __op##=(_Tp&& __x) &&                                              \
4338       {                                                                           \
4339 	const value_type& __lhs = _M_read();                                      \
4340 	_M_write(__lhs __op __x);                                                 \
4341 	return {_M_obj, _M_index};                                                \
4342       }
4343     _GLIBCXX_SIMD_ALL_ARITHMETICS(_GLIBCXX_SIMD_OP_);
4344     _GLIBCXX_SIMD_ALL_SHIFTS(_GLIBCXX_SIMD_OP_);
4345     _GLIBCXX_SIMD_ALL_BINARY(_GLIBCXX_SIMD_OP_);
4346 #undef _GLIBCXX_SIMD_OP_
4347 
4348     template <typename _Tp = void,
4349 	      typename = decltype(++declval<conditional_t<true, value_type, _Tp>&>())>
4350       _GLIBCXX_SIMD_INTRINSIC constexpr _SmartReference
4351       operator++() &&
4352       {
4353 	value_type __x = _M_read();
4354 	_M_write(++__x);
4355 	return {_M_obj, _M_index};
4356       }
4357 
4358     template <typename _Tp = void,
4359 	      typename = decltype(declval<conditional_t<true, value_type, _Tp>&>()++)>
4360       _GLIBCXX_SIMD_INTRINSIC constexpr value_type
4361       operator++(int) &&
4362       {
4363 	const value_type __r = _M_read();
4364 	value_type __x = __r;
4365 	_M_write(++__x);
4366 	return __r;
4367       }
4368 
4369     template <typename _Tp = void,
4370 	      typename = decltype(--declval<conditional_t<true, value_type, _Tp>&>())>
4371       _GLIBCXX_SIMD_INTRINSIC constexpr _SmartReference
4372       operator--() &&
4373       {
4374 	value_type __x = _M_read();
4375 	_M_write(--__x);
4376 	return {_M_obj, _M_index};
4377       }
4378 
4379     template <typename _Tp = void,
4380 	      typename = decltype(declval<conditional_t<true, value_type, _Tp>&>()--)>
4381       _GLIBCXX_SIMD_INTRINSIC constexpr value_type
4382       operator--(int) &&
4383       {
4384 	const value_type __r = _M_read();
4385 	value_type __x = __r;
4386 	_M_write(--__x);
4387 	return __r;
4388       }
4389 
4390     _GLIBCXX_SIMD_INTRINSIC friend void
4391     swap(_SmartReference&& __a, _SmartReference&& __b) noexcept(
4392       conjunction<
4393 	is_nothrow_constructible<value_type, _SmartReference&&>,
4394 	is_nothrow_assignable<_SmartReference&&, value_type&&>>::value)
4395     {
4396       value_type __tmp = static_cast<_SmartReference&&>(__a);
4397       static_cast<_SmartReference&&>(__a) = static_cast<value_type>(__b);
4398       static_cast<_SmartReference&&>(__b) = std::move(__tmp);
4399     }
4400 
4401     _GLIBCXX_SIMD_INTRINSIC friend void
4402     swap(value_type& __a, _SmartReference&& __b) noexcept(
4403       conjunction<
4404 	is_nothrow_constructible<value_type, value_type&&>,
4405 	is_nothrow_assignable<value_type&, value_type&&>,
4406 	is_nothrow_assignable<_SmartReference&&, value_type&&>>::value)
4407     {
4408       value_type __tmp(std::move(__a));
4409       __a = static_cast<value_type>(__b);
4410       static_cast<_SmartReference&&>(__b) = std::move(__tmp);
4411     }
4412 
4413     _GLIBCXX_SIMD_INTRINSIC friend void
4414     swap(_SmartReference&& __a, value_type& __b) noexcept(
4415       conjunction<
4416 	is_nothrow_constructible<value_type, _SmartReference&&>,
4417 	is_nothrow_assignable<value_type&, value_type&&>,
4418 	is_nothrow_assignable<_SmartReference&&, value_type&&>>::value)
4419     {
4420       value_type __tmp(__a);
4421       static_cast<_SmartReference&&>(__a) = std::move(__b);
4422       __b = std::move(__tmp);
4423     }
4424   };
4425 
4426 // }}}
4427 // __scalar_abi_wrapper {{{
4428 template <int _Bytes>
4429   struct __scalar_abi_wrapper
4430   {
4431     template <typename _Tp> static constexpr size_t _S_full_size = 1;
4432     template <typename _Tp> static constexpr size_t _S_size = 1;
4433     template <typename _Tp> static constexpr size_t _S_is_partial = false;
4434 
4435     template <typename _Tp, typename _Abi = simd_abi::scalar>
4436       static constexpr bool _S_is_valid_v
4437 	= _Abi::template _IsValid<_Tp>::value && sizeof(_Tp) == _Bytes;
4438   };
4439 
4440 // }}}
4441 // __decay_abi metafunction {{{
4442 template <typename _Tp>
4443   struct __decay_abi { using type = _Tp; };
4444 
4445 template <int _Bytes>
4446   struct __decay_abi<__scalar_abi_wrapper<_Bytes>>
4447   { using type = simd_abi::scalar; };
4448 
4449 // }}}
4450 // __find_next_valid_abi metafunction {{{1
4451 // Given an ABI tag A<N>, find an N2 < N such that A<N2>::_S_is_valid_v<_Tp> ==
4452 // true, N2 is a power-of-2, and A<N2>::_S_is_partial<_Tp> is false. Break
4453 // recursion at 2 elements in the resulting ABI tag. In this case
4454 // type::_S_is_valid_v<_Tp> may be false.
4455 template <template <int> class _Abi, int _Bytes, typename _Tp>
4456   struct __find_next_valid_abi
4457   {
4458     static constexpr auto
4459     _S_choose()
4460     {
4461       constexpr int _NextBytes = std::__bit_ceil(_Bytes) / 2;
4462       using _NextAbi = _Abi<_NextBytes>;
4463       if constexpr (_NextBytes < sizeof(_Tp) * 2) // break recursion
4464 	return _Abi<_Bytes>();
4465       else if constexpr (_NextAbi::template _S_is_partial<_Tp> == false
4466 			 && _NextAbi::template _S_is_valid_v<_Tp>)
4467 	return _NextAbi();
4468       else
4469 	return __find_next_valid_abi<_Abi, _NextBytes, _Tp>::_S_choose();
4470     }
4471 
4472     using type = decltype(_S_choose());
4473   };
4474 
4475 template <int _Bytes, typename _Tp>
4476   struct __find_next_valid_abi<__scalar_abi_wrapper, _Bytes, _Tp>
4477   { using type = simd_abi::scalar; };
4478 
4479 // _AbiList {{{1
4480 template <template <int> class...>
4481   struct _AbiList
4482   {
4483     template <typename, int> static constexpr bool _S_has_valid_abi = false;
4484     template <typename, int> using _FirstValidAbi = void;
4485     template <typename, int> using _BestAbi = void;
4486   };
4487 
4488 template <template <int> class _A0, template <int> class... _Rest>
4489   struct _AbiList<_A0, _Rest...>
4490   {
4491     template <typename _Tp, int _Np>
4492       static constexpr bool _S_has_valid_abi
4493 	= _A0<sizeof(_Tp) * _Np>::template _S_is_valid_v<
4494 	    _Tp> || _AbiList<_Rest...>::template _S_has_valid_abi<_Tp, _Np>;
4495 
4496     template <typename _Tp, int _Np>
4497       using _FirstValidAbi = conditional_t<
4498 	_A0<sizeof(_Tp) * _Np>::template _S_is_valid_v<_Tp>,
4499 	typename __decay_abi<_A0<sizeof(_Tp) * _Np>>::type,
4500 	typename _AbiList<_Rest...>::template _FirstValidAbi<_Tp, _Np>>;
4501 
4502     template <typename _Tp, int _Np>
4503       static constexpr auto
4504       _S_determine_best_abi()
4505       {
4506 	static_assert(_Np >= 1);
4507 	constexpr int _Bytes = sizeof(_Tp) * _Np;
4508 	if constexpr (_Np == 1)
4509 	  return __make_dependent_t<_Tp, simd_abi::scalar>{};
4510 	else
4511 	  {
4512 	    constexpr int __fullsize = _A0<_Bytes>::template _S_full_size<_Tp>;
4513 	    // _A0<_Bytes> is good if:
4514 	    // 1. The ABI tag is valid for _Tp
4515 	    // 2. The storage overhead is no more than padding to fill the next
4516 	    //    power-of-2 number of bytes
4517 	    if constexpr (_A0<_Bytes>::template _S_is_valid_v<
4518 			    _Tp> && __fullsize / 2 < _Np)
4519 	      return typename __decay_abi<_A0<_Bytes>>::type{};
4520 	    else
4521 	      {
4522 		using _Bp =
4523 		  typename __find_next_valid_abi<_A0, _Bytes, _Tp>::type;
4524 		if constexpr (_Bp::template _S_is_valid_v<
4525 				_Tp> && _Bp::template _S_size<_Tp> <= _Np)
4526 		  return _Bp{};
4527 		else
4528 		  return
4529 		    typename _AbiList<_Rest...>::template _BestAbi<_Tp, _Np>{};
4530 	      }
4531 	  }
4532       }
4533 
4534     template <typename _Tp, int _Np>
4535       using _BestAbi = decltype(_S_determine_best_abi<_Tp, _Np>());
4536   };
4537 
4538 // }}}1
4539 
4540 // the following lists all native ABIs, which makes them accessible to
4541 // simd_abi::deduce and select_best_vector_type_t (for fixed_size). Order
4542 // matters: Whatever comes first has higher priority.
4543 using _AllNativeAbis = _AbiList<simd_abi::_VecBltnBtmsk, simd_abi::_VecBuiltin,
4544 				__scalar_abi_wrapper>;
4545 
4546 // valid _SimdTraits specialization {{{1
4547 template <typename _Tp, typename _Abi>
4548   struct _SimdTraits<_Tp, _Abi, void_t<typename _Abi::template _IsValid<_Tp>>>
4549   : _Abi::template __traits<_Tp> {};
4550 
4551 // __deduce_impl specializations {{{1
4552 // try all native ABIs (including scalar) first
4553 template <typename _Tp, size_t _Np>
4554   struct __deduce_impl<
4555     _Tp, _Np, enable_if_t<_AllNativeAbis::template _S_has_valid_abi<_Tp, _Np>>>
4556   { using type = _AllNativeAbis::_FirstValidAbi<_Tp, _Np>; };
4557 
4558 // fall back to fixed_size only if scalar and native ABIs don't match
4559 template <typename _Tp, size_t _Np, typename = void>
4560   struct __deduce_fixed_size_fallback {};
4561 
4562 template <typename _Tp, size_t _Np>
4563   struct __deduce_fixed_size_fallback<_Tp, _Np,
4564     enable_if_t<simd_abi::fixed_size<_Np>::template _S_is_valid_v<_Tp>>>
4565   { using type = simd_abi::fixed_size<_Np>; };
4566 
4567 template <typename _Tp, size_t _Np, typename>
4568   struct __deduce_impl : public __deduce_fixed_size_fallback<_Tp, _Np> {};
4569 
4570 //}}}1
4571 /// @endcond
4572 
4573 // simd_mask {{{
4574 template <typename _Tp, typename _Abi>
4575   class simd_mask : public _SimdTraits<_Tp, _Abi>::_MaskBase
4576   {
4577     // types, tags, and friends {{{
4578     using _Traits = _SimdTraits<_Tp, _Abi>;
4579     using _MemberType = typename _Traits::_MaskMember;
4580 
4581     // We map all masks with equal element sizeof to a single integer type, the
4582     // one given by __int_for_sizeof_t<_Tp>. This is the approach
4583     // [[gnu::vector_size(N)]] types take as well and it reduces the number of
4584     // template specializations in the implementation classes.
4585     using _Ip = __int_for_sizeof_t<_Tp>;
4586     static constexpr _Ip* _S_type_tag = nullptr;
4587 
4588     friend typename _Traits::_MaskBase;
4589     friend class simd<_Tp, _Abi>;       // to construct masks on return
4590     friend typename _Traits::_SimdImpl; // to construct masks on return and
4591 					// inspect data on masked operations
4592   public:
4593     using _Impl = typename _Traits::_MaskImpl;
4594     friend _Impl;
4595 
4596     // }}}
4597     // member types {{{
4598     using value_type = bool;
4599     using reference = _SmartReference<_MemberType, _Impl, value_type>;
4600     using simd_type = simd<_Tp, _Abi>;
4601     using abi_type = _Abi;
4602 
4603     // }}}
4604     static constexpr size_t size() // {{{
4605     { return __size_or_zero_v<_Tp, _Abi>; }
4606 
4607     // }}}
4608     // constructors & assignment {{{
4609     simd_mask() = default;
4610     simd_mask(const simd_mask&) = default;
4611     simd_mask(simd_mask&&) = default;
4612     simd_mask& operator=(const simd_mask&) = default;
4613     simd_mask& operator=(simd_mask&&) = default;
4614 
4615     // }}}
4616     // access to internal representation (optional feature) {{{
4617     _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR explicit
4618     simd_mask(typename _Traits::_MaskCastType __init)
4619     : _M_data{__init} {}
4620     // conversions to internal type is done in _MaskBase
4621 
4622     // }}}
4623     // bitset interface (extension to be proposed) {{{
4624     // TS_FEEDBACK:
4625     // Conversion of simd_mask to and from bitset makes it much easier to
4626     // interface with other facilities. I suggest adding `static
4627     // simd_mask::from_bitset` and `simd_mask::to_bitset`.
4628     _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR static simd_mask
4629     __from_bitset(bitset<size()> bs)
4630     { return {__bitset_init, bs}; }
4631 
4632     _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR bitset<size()>
4633     __to_bitset() const
4634     { return _Impl::_S_to_bits(_M_data)._M_to_bitset(); }
4635 
4636     // }}}
4637     // explicit broadcast constructor {{{
4638     _GLIBCXX_SIMD_ALWAYS_INLINE explicit _GLIBCXX_SIMD_CONSTEXPR
4639     simd_mask(value_type __x)
4640     : _M_data(_Impl::template _S_broadcast<_Ip>(__x)) {}
4641 
4642     // }}}
4643     // implicit type conversion constructor {{{
4644   #ifdef _GLIBCXX_SIMD_ENABLE_IMPLICIT_MASK_CAST
4645     // proposed improvement
4646     template <typename _Up, typename _A2,
4647 	      typename = enable_if_t<simd_size_v<_Up, _A2> == size()>>
4648       _GLIBCXX_SIMD_ALWAYS_INLINE explicit(sizeof(_MemberType)
4649 	  != sizeof(typename _SimdTraits<_Up, _A2>::_MaskMember))
4650       simd_mask(const simd_mask<_Up, _A2>& __x)
4651       : simd_mask(__proposed::static_simd_cast<simd_mask>(__x)) {}
4652   #else
4653     // conforming to ISO/IEC 19570:2018
4654     template <typename _Up, typename = enable_if_t<conjunction<
4655 			      is_same<abi_type, simd_abi::fixed_size<size()>>,
4656 			      is_same<_Up, _Up>>::value>>
4657       _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR
4658       simd_mask(const simd_mask<_Up, simd_abi::fixed_size<size()>>& __x)
4659       : _M_data(_Impl::_S_from_bitmask(__data(__x), _S_type_tag)) {}
4660   #endif
4661 
4662     // }}}
4663     // load constructor {{{
4664     template <typename _Flags>
4665       _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR
4666       simd_mask(const value_type* __mem, _IsSimdFlagType<_Flags>)
4667       : _M_data(_Impl::template _S_load<_Ip>(_Flags::template _S_apply<simd_mask>(__mem))) {}
4668 
4669     template <typename _Flags>
4670       _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR
4671       simd_mask(const value_type* __mem, simd_mask __k, _IsSimdFlagType<_Flags>)
4672       : _M_data{}
4673       {
4674 	_M_data = _Impl::_S_masked_load(_M_data, __k._M_data,
4675 					_Flags::template _S_apply<simd_mask>(__mem));
4676       }
4677 
4678     // }}}
4679     // loads [simd_mask.load] {{{
4680     template <typename _Flags>
4681       _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR void
4682       copy_from(const value_type* __mem, _IsSimdFlagType<_Flags>)
4683       { _M_data = _Impl::template _S_load<_Ip>(_Flags::template _S_apply<simd_mask>(__mem)); }
4684 
4685     // }}}
4686     // stores [simd_mask.store] {{{
4687     template <typename _Flags>
4688       _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR void
4689       copy_to(value_type* __mem, _IsSimdFlagType<_Flags>) const
4690       { _Impl::_S_store(_M_data, _Flags::template _S_apply<simd_mask>(__mem)); }
4691 
4692     // }}}
4693     // scalar access {{{
4694     _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR reference
4695     operator[](size_t __i)
4696     {
4697       if (__i >= size())
4698 	__invoke_ub("Subscript %d is out of range [0, %d]", __i, size() - 1);
4699       return {_M_data, int(__i)};
4700     }
4701 
4702     _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR value_type
4703     operator[](size_t __i) const
4704     {
4705       if (__i >= size())
4706 	__invoke_ub("Subscript %d is out of range [0, %d]", __i, size() - 1);
4707       if constexpr (__is_scalar_abi<_Abi>())
4708 	return _M_data;
4709       else
4710 	return static_cast<bool>(_M_data[__i]);
4711     }
4712 
4713     // }}}
4714     // negation {{{
4715     _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR simd_mask
4716     operator!() const
4717     { return {__private_init, _Impl::_S_bit_not(_M_data)}; }
4718 
4719     // }}}
4720     // simd_mask binary operators [simd_mask.binary] {{{
4721   #ifdef _GLIBCXX_SIMD_ENABLE_IMPLICIT_MASK_CAST
4722     // simd_mask<int> && simd_mask<uint> needs disambiguation
4723     template <typename _Up, typename _A2,
4724 	      typename = enable_if_t<is_convertible_v<simd_mask<_Up, _A2>, simd_mask>>>
4725       _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask
4726       operator&&(const simd_mask& __x, const simd_mask<_Up, _A2>& __y)
4727       {
4728 	return {__private_init,
4729 		_Impl::_S_logical_and(__x._M_data, simd_mask(__y)._M_data)};
4730       }
4731 
4732     template <typename _Up, typename _A2,
4733 	      typename = enable_if_t<is_convertible_v<simd_mask<_Up, _A2>, simd_mask>>>
4734       _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask
4735       operator||(const simd_mask& __x, const simd_mask<_Up, _A2>& __y)
4736       {
4737 	return {__private_init,
4738 		_Impl::_S_logical_or(__x._M_data, simd_mask(__y)._M_data)};
4739       }
4740   #endif // _GLIBCXX_SIMD_ENABLE_IMPLICIT_MASK_CAST
4741 
4742     _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask
4743     operator&&(const simd_mask& __x, const simd_mask& __y)
4744     { return {__private_init, _Impl::_S_logical_and(__x._M_data, __y._M_data)}; }
4745 
4746     _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask
4747     operator||(const simd_mask& __x, const simd_mask& __y)
4748     { return {__private_init, _Impl::_S_logical_or(__x._M_data, __y._M_data)}; }
4749 
4750     _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask
4751     operator&(const simd_mask& __x, const simd_mask& __y)
4752     { return {__private_init, _Impl::_S_bit_and(__x._M_data, __y._M_data)}; }
4753 
4754     _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask
4755     operator|(const simd_mask& __x, const simd_mask& __y)
4756     { return {__private_init, _Impl::_S_bit_or(__x._M_data, __y._M_data)}; }
4757 
4758     _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask
4759     operator^(const simd_mask& __x, const simd_mask& __y)
4760     { return {__private_init, _Impl::_S_bit_xor(__x._M_data, __y._M_data)}; }
4761 
4762     _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask&
4763     operator&=(simd_mask& __x, const simd_mask& __y)
4764     {
4765       __x._M_data = _Impl::_S_bit_and(__x._M_data, __y._M_data);
4766       return __x;
4767     }
4768 
4769     _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask&
4770     operator|=(simd_mask& __x, const simd_mask& __y)
4771     {
4772       __x._M_data = _Impl::_S_bit_or(__x._M_data, __y._M_data);
4773       return __x;
4774     }
4775 
4776     _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask&
4777     operator^=(simd_mask& __x, const simd_mask& __y)
4778     {
4779       __x._M_data = _Impl::_S_bit_xor(__x._M_data, __y._M_data);
4780       return __x;
4781     }
4782 
4783     // }}}
4784     // simd_mask compares [simd_mask.comparison] {{{
4785     _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask
4786     operator==(const simd_mask& __x, const simd_mask& __y)
4787     { return !operator!=(__x, __y); }
4788 
4789     _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask
4790     operator!=(const simd_mask& __x, const simd_mask& __y)
4791     { return {__private_init, _Impl::_S_bit_xor(__x._M_data, __y._M_data)}; }
4792 
4793     // }}}
4794     // private_init ctor {{{
4795     _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR
4796     simd_mask(_PrivateInit, typename _Traits::_MaskMember __init)
4797     : _M_data(__init) {}
4798 
4799     // }}}
4800     // private_init generator ctor {{{
4801     template <typename _Fp, typename = decltype(bool(declval<_Fp>()(size_t())))>
4802       _GLIBCXX_SIMD_INTRINSIC constexpr
4803       simd_mask(_PrivateInit, _Fp&& __gen)
4804       : _M_data()
4805       {
4806 	__execute_n_times<size()>([&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
4807 	  _Impl::_S_set(_M_data, __i, __gen(__i));
4808 	});
4809       }
4810 
4811     // }}}
4812     // bitset_init ctor {{{
4813     _GLIBCXX_SIMD_INTRINSIC constexpr
4814     simd_mask(_BitsetInit, bitset<size()> __init)
4815     : _M_data(_Impl::_S_from_bitmask(_SanitizedBitMask<size()>(__init), _S_type_tag))
4816     {}
4817 
4818     // }}}
4819     // __cvt {{{
4820     // TS_FEEDBACK:
4821     // The conversion operator this implements should be a ctor on simd_mask.
4822     // Once you call .__cvt() on a simd_mask it converts conveniently.
4823     // A useful variation: add `explicit(sizeof(_Tp) != sizeof(_Up))`
4824     struct _CvtProxy
4825     {
4826       template <typename _Up, typename _A2,
4827 		typename = enable_if_t<simd_size_v<_Up, _A2> == simd_size_v<_Tp, _Abi>>>
4828 	_GLIBCXX_SIMD_ALWAYS_INLINE
4829 	operator simd_mask<_Up, _A2>() &&
4830 	{
4831 	  using namespace std::experimental::__proposed;
4832 	  return static_simd_cast<simd_mask<_Up, _A2>>(_M_data);
4833 	}
4834 
4835       const simd_mask<_Tp, _Abi>& _M_data;
4836     };
4837 
4838     _GLIBCXX_SIMD_INTRINSIC _CvtProxy
4839     __cvt() const
4840     { return {*this}; }
4841 
4842     // }}}
4843     // operator?: overloads (suggested extension) {{{
4844   #ifdef __GXX_CONDITIONAL_IS_OVERLOADABLE__
4845     _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask
4846     operator?:(const simd_mask& __k, const simd_mask& __where_true,
4847 	       const simd_mask& __where_false)
4848     {
4849       auto __ret = __where_false;
4850       _Impl::_S_masked_assign(__k._M_data, __ret._M_data, __where_true._M_data);
4851       return __ret;
4852     }
4853 
4854     template <typename _U1, typename _U2,
4855 	      typename _Rp = simd<common_type_t<_U1, _U2>, _Abi>,
4856 	      typename = enable_if_t<conjunction_v<
4857 		is_convertible<_U1, _Rp>, is_convertible<_U2, _Rp>,
4858 		is_convertible<simd_mask, typename _Rp::mask_type>>>>
4859       _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend _Rp
4860       operator?:(const simd_mask& __k, const _U1& __where_true,
4861 		 const _U2& __where_false)
4862       {
4863 	_Rp __ret = __where_false;
4864 	_Rp::_Impl::_S_masked_assign(
4865 	  __data(static_cast<typename _Rp::mask_type>(__k)), __data(__ret),
4866 	  __data(static_cast<_Rp>(__where_true)));
4867 	return __ret;
4868       }
4869 
4870   #ifdef _GLIBCXX_SIMD_ENABLE_IMPLICIT_MASK_CAST
4871     template <typename _Kp, typename _Ak, typename _Up, typename _Au,
4872 	      typename = enable_if_t<
4873 		conjunction_v<is_convertible<simd_mask<_Kp, _Ak>, simd_mask>,
4874 			      is_convertible<simd_mask<_Up, _Au>, simd_mask>>>>
4875       _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask
4876       operator?:(const simd_mask<_Kp, _Ak>& __k, const simd_mask& __where_true,
4877 		 const simd_mask<_Up, _Au>& __where_false)
4878       {
4879 	simd_mask __ret = __where_false;
4880 	_Impl::_S_masked_assign(simd_mask(__k)._M_data, __ret._M_data,
4881 				__where_true._M_data);
4882 	return __ret;
4883       }
4884   #endif // _GLIBCXX_SIMD_ENABLE_IMPLICIT_MASK_CAST
4885   #endif // __GXX_CONDITIONAL_IS_OVERLOADABLE__
4886 
4887     // }}}
4888     // _M_is_constprop {{{
4889     _GLIBCXX_SIMD_INTRINSIC constexpr bool
4890     _M_is_constprop() const
4891     {
4892       if constexpr (__is_scalar_abi<_Abi>())
4893 	return __builtin_constant_p(_M_data);
4894       else
4895 	return _M_data._M_is_constprop();
4896     }
4897 
4898     // }}}
4899 
4900   private:
4901     friend const auto& __data<_Tp, abi_type>(const simd_mask&);
4902     friend auto& __data<_Tp, abi_type>(simd_mask&);
4903     alignas(_Traits::_S_mask_align) _MemberType _M_data;
4904   };
4905 
4906 // }}}
4907 
4908 /// @cond undocumented
4909 // __data(simd_mask) {{{
4910 template <typename _Tp, typename _Ap>
4911   _GLIBCXX_SIMD_INTRINSIC constexpr const auto&
4912   __data(const simd_mask<_Tp, _Ap>& __x)
4913   { return __x._M_data; }
4914 
4915 template <typename _Tp, typename _Ap>
4916   _GLIBCXX_SIMD_INTRINSIC constexpr auto&
4917   __data(simd_mask<_Tp, _Ap>& __x)
4918   { return __x._M_data; }
4919 
4920 // }}}
4921 /// @endcond
4922 
4923 // simd_mask reductions [simd_mask.reductions] {{{
4924 template <typename _Tp, typename _Abi>
4925   _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR bool
4926   all_of(const simd_mask<_Tp, _Abi>& __k) noexcept
4927   {
4928     if (__builtin_is_constant_evaluated() || __k._M_is_constprop())
4929       {
4930 	for (size_t __i = 0; __i < simd_size_v<_Tp, _Abi>; ++__i)
4931 	  if (!__k[__i])
4932 	    return false;
4933 	return true;
4934       }
4935     else
4936       return _Abi::_MaskImpl::_S_all_of(__k);
4937   }
4938 
4939 template <typename _Tp, typename _Abi>
4940   _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR bool
4941   any_of(const simd_mask<_Tp, _Abi>& __k) noexcept
4942   {
4943     if (__builtin_is_constant_evaluated() || __k._M_is_constprop())
4944       {
4945 	for (size_t __i = 0; __i < simd_size_v<_Tp, _Abi>; ++__i)
4946 	  if (__k[__i])
4947 	    return true;
4948 	return false;
4949       }
4950     else
4951       return _Abi::_MaskImpl::_S_any_of(__k);
4952   }
4953 
4954 template <typename _Tp, typename _Abi>
4955   _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR bool
4956   none_of(const simd_mask<_Tp, _Abi>& __k) noexcept
4957   {
4958     if (__builtin_is_constant_evaluated() || __k._M_is_constprop())
4959       {
4960 	for (size_t __i = 0; __i < simd_size_v<_Tp, _Abi>; ++__i)
4961 	  if (__k[__i])
4962 	    return false;
4963 	return true;
4964       }
4965     else
4966       return _Abi::_MaskImpl::_S_none_of(__k);
4967   }
4968 
4969 template <typename _Tp, typename _Abi>
4970   _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR bool
4971   some_of(const simd_mask<_Tp, _Abi>& __k) noexcept
4972   {
4973     if (__builtin_is_constant_evaluated() || __k._M_is_constprop())
4974       {
4975 	for (size_t __i = 1; __i < simd_size_v<_Tp, _Abi>; ++__i)
4976 	  if (__k[__i] != __k[__i - 1])
4977 	    return true;
4978 	return false;
4979       }
4980     else
4981       return _Abi::_MaskImpl::_S_some_of(__k);
4982   }
4983 
4984 template <typename _Tp, typename _Abi>
4985   _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR int
4986   popcount(const simd_mask<_Tp, _Abi>& __k) noexcept
4987   {
4988     if (__builtin_is_constant_evaluated() || __k._M_is_constprop())
4989       {
4990 	const int __r = __call_with_subscripts<simd_size_v<_Tp, _Abi>>(
4991 			  __k, [](auto... __elements) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
4992 			    return ((__elements != 0) + ...);
4993 			  });
4994 	if (__builtin_is_constant_evaluated() || __builtin_constant_p(__r))
4995 	  return __r;
4996       }
4997     return _Abi::_MaskImpl::_S_popcount(__k);
4998   }
4999 
5000 template <typename _Tp, typename _Abi>
5001   _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR int
5002   find_first_set(const simd_mask<_Tp, _Abi>& __k)
5003   {
5004     if (__builtin_is_constant_evaluated() || __k._M_is_constprop())
5005       {
5006 	constexpr size_t _Np = simd_size_v<_Tp, _Abi>;
5007 	const size_t _Idx = __call_with_n_evaluations<_Np>(
5008 			      [](auto... __indexes) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
5009 				return std::min({__indexes...});
5010 			      }, [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
5011 				return __k[__i] ? +__i : _Np;
5012 			      });
5013 	if (_Idx >= _Np)
5014 	  __invoke_ub("find_first_set(empty mask) is UB");
5015 	if (__builtin_constant_p(_Idx))
5016 	  return _Idx;
5017       }
5018     return _Abi::_MaskImpl::_S_find_first_set(__k);
5019   }
5020 
5021 template <typename _Tp, typename _Abi>
5022   _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR int
5023   find_last_set(const simd_mask<_Tp, _Abi>& __k)
5024   {
5025     if (__builtin_is_constant_evaluated() || __k._M_is_constprop())
5026       {
5027 	constexpr size_t _Np = simd_size_v<_Tp, _Abi>;
5028 	const int _Idx = __call_with_n_evaluations<_Np>(
5029 			   [](auto... __indexes) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
5030 			     return std::max({__indexes...});
5031 			   }, [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
5032 			     return __k[__i] ? int(__i) : -1;
5033 			   });
5034 	if (_Idx < 0)
5035 	  __invoke_ub("find_first_set(empty mask) is UB");
5036 	if (__builtin_constant_p(_Idx))
5037 	  return _Idx;
5038       }
5039     return _Abi::_MaskImpl::_S_find_last_set(__k);
5040   }
5041 
5042 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR bool
5043 all_of(_ExactBool __x) noexcept
5044 { return __x; }
5045 
5046 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR bool
5047 any_of(_ExactBool __x) noexcept
5048 { return __x; }
5049 
5050 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR bool
5051 none_of(_ExactBool __x) noexcept
5052 { return !__x; }
5053 
5054 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR bool
5055 some_of(_ExactBool) noexcept
5056 { return false; }
5057 
5058 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR int
5059 popcount(_ExactBool __x) noexcept
5060 { return __x; }
5061 
5062 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR int
5063 find_first_set(_ExactBool)
5064 { return 0; }
5065 
5066 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR int
5067 find_last_set(_ExactBool)
5068 { return 0; }
5069 
5070 // }}}
5071 
5072 /// @cond undocumented
5073 // _SimdIntOperators{{{1
5074 template <typename _V, typename _Tp, typename _Abi, bool>
5075   class _SimdIntOperators {};
5076 
5077 template <typename _V, typename _Tp, typename _Abi>
5078   class _SimdIntOperators<_V, _Tp, _Abi, true>
5079   {
5080     using _Impl = typename _SimdTraits<_Tp, _Abi>::_SimdImpl;
5081 
5082     _GLIBCXX_SIMD_INTRINSIC constexpr const _V&
5083     __derived() const
5084     { return *static_cast<const _V*>(this); }
5085 
5086     template <typename _Up>
5087       _GLIBCXX_SIMD_INTRINSIC static _GLIBCXX_SIMD_CONSTEXPR _V
5088       _S_make_derived(_Up&& __d)
5089       { return {__private_init, static_cast<_Up&&>(__d)}; }
5090 
5091   public:
5092     _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
5093     _V&
5094     operator%=(_V& __lhs, const _V& __x)
5095     { return __lhs = __lhs % __x; }
5096 
5097     _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
5098     _V&
5099     operator&=(_V& __lhs, const _V& __x)
5100     { return __lhs = __lhs & __x; }
5101 
5102     _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
5103     _V&
5104     operator|=(_V& __lhs, const _V& __x)
5105     { return __lhs = __lhs | __x; }
5106 
5107     _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
5108     _V&
5109     operator^=(_V& __lhs, const _V& __x)
5110     { return __lhs = __lhs ^ __x; }
5111 
5112     _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
5113     _V&
5114     operator<<=(_V& __lhs, const _V& __x)
5115     { return __lhs = __lhs << __x; }
5116 
5117     _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
5118     _V&
5119     operator>>=(_V& __lhs, const _V& __x)
5120     { return __lhs = __lhs >> __x; }
5121 
5122     _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
5123     _V&
5124     operator<<=(_V& __lhs, int __x)
5125     { return __lhs = __lhs << __x; }
5126 
5127     _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
5128     _V&
5129     operator>>=(_V& __lhs, int __x)
5130     { return __lhs = __lhs >> __x; }
5131 
5132     _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
5133     _V
5134     operator%(const _V& __x, const _V& __y)
5135     {
5136       return _SimdIntOperators::_S_make_derived(
5137 	_Impl::_S_modulus(__data(__x), __data(__y)));
5138     }
5139 
5140     _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
5141     _V
5142     operator&(const _V& __x, const _V& __y)
5143     {
5144       return _SimdIntOperators::_S_make_derived(
5145 	_Impl::_S_bit_and(__data(__x), __data(__y)));
5146     }
5147 
5148     _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
5149     _V
5150     operator|(const _V& __x, const _V& __y)
5151     {
5152       return _SimdIntOperators::_S_make_derived(
5153 	_Impl::_S_bit_or(__data(__x), __data(__y)));
5154     }
5155 
5156     _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
5157     _V
5158     operator^(const _V& __x, const _V& __y)
5159     {
5160       return _SimdIntOperators::_S_make_derived(
5161 	_Impl::_S_bit_xor(__data(__x), __data(__y)));
5162     }
5163 
5164     _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
5165     _V
5166     operator<<(const _V& __x, const _V& __y)
5167     {
5168       return _SimdIntOperators::_S_make_derived(
5169 	_Impl::_S_bit_shift_left(__data(__x), __data(__y)));
5170     }
5171 
5172     _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
5173     _V
5174     operator>>(const _V& __x, const _V& __y)
5175     {
5176       return _SimdIntOperators::_S_make_derived(
5177 	_Impl::_S_bit_shift_right(__data(__x), __data(__y)));
5178     }
5179 
5180     _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
5181     _V
5182     operator<<(const _V& __x, int __y)
5183     {
5184       if (__y < 0)
5185 	__invoke_ub("The behavior is undefined if the right operand of a "
5186 		    "shift operation is negative. [expr.shift]\nA shift by "
5187 		    "%d was requested",
5188 		    __y);
5189       if (size_t(__y) >= sizeof(declval<_Tp>() << __y) * __CHAR_BIT__)
5190 	__invoke_ub(
5191 	  "The behavior is undefined if the right operand of a "
5192 	  "shift operation is greater than or equal to the width of the "
5193 	  "promoted left operand. [expr.shift]\nA shift by %d was requested",
5194 	  __y);
5195       return _SimdIntOperators::_S_make_derived(
5196 	_Impl::_S_bit_shift_left(__data(__x), __y));
5197     }
5198 
5199     _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
5200     _V
5201     operator>>(const _V& __x, int __y)
5202     {
5203       if (__y < 0)
5204 	__invoke_ub(
5205 	  "The behavior is undefined if the right operand of a shift "
5206 	  "operation is negative. [expr.shift]\nA shift by %d was requested",
5207 	  __y);
5208       if (size_t(__y) >= sizeof(declval<_Tp>() << __y) * __CHAR_BIT__)
5209 	__invoke_ub(
5210 	  "The behavior is undefined if the right operand of a shift "
5211 	  "operation is greater than or equal to the width of the promoted "
5212 	  "left operand. [expr.shift]\nA shift by %d was requested",
5213 	  __y);
5214       return _SimdIntOperators::_S_make_derived(
5215 	_Impl::_S_bit_shift_right(__data(__x), __y));
5216     }
5217 
5218     // unary operators (for integral _Tp)
5219     _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR
5220     _V
5221     operator~() const
5222     { return {__private_init, _Impl::_S_complement(__derived()._M_data)}; }
5223   };
5224 
5225 //}}}1
5226 /// @endcond
5227 
5228 // simd {{{
5229 template <typename _Tp, typename _Abi>
5230   class simd : public _SimdIntOperators<
5231 		 simd<_Tp, _Abi>, _Tp, _Abi,
5232 		 conjunction<is_integral<_Tp>,
5233 			     typename _SimdTraits<_Tp, _Abi>::_IsValid>::value>,
5234 	       public _SimdTraits<_Tp, _Abi>::_SimdBase
5235   {
5236     using _Traits = _SimdTraits<_Tp, _Abi>;
5237     using _MemberType = typename _Traits::_SimdMember;
5238     using _CastType = typename _Traits::_SimdCastType;
5239     static constexpr _Tp* _S_type_tag = nullptr;
5240     friend typename _Traits::_SimdBase;
5241 
5242   public:
5243     using _Impl = typename _Traits::_SimdImpl;
5244     friend _Impl;
5245     friend _SimdIntOperators<simd, _Tp, _Abi, true>;
5246 
5247     using value_type = _Tp;
5248     using reference = _SmartReference<_MemberType, _Impl, value_type>;
5249     using mask_type = simd_mask<_Tp, _Abi>;
5250     using abi_type = _Abi;
5251 
5252     static constexpr size_t size()
5253     { return __size_or_zero_v<_Tp, _Abi>; }
5254 
5255     _GLIBCXX_SIMD_CONSTEXPR simd() = default;
5256     _GLIBCXX_SIMD_CONSTEXPR simd(const simd&) = default;
5257     _GLIBCXX_SIMD_CONSTEXPR simd(simd&&) noexcept = default;
5258     _GLIBCXX_SIMD_CONSTEXPR simd& operator=(const simd&) = default;
5259     _GLIBCXX_SIMD_CONSTEXPR simd& operator=(simd&&) noexcept = default;
5260 
5261     // implicit broadcast constructor
5262     template <typename _Up,
5263 	      typename = enable_if_t<!is_same_v<__remove_cvref_t<_Up>, bool>>>
5264       _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR
5265       simd(_ValuePreservingOrInt<_Up, value_type>&& __x)
5266       : _M_data(
5267 	_Impl::_S_broadcast(static_cast<value_type>(static_cast<_Up&&>(__x))))
5268       {}
5269 
5270     // implicit type conversion constructor (convert from fixed_size to
5271     // fixed_size)
5272     template <typename _Up>
5273       _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR
5274       simd(const simd<_Up, simd_abi::fixed_size<size()>>& __x,
5275 	   enable_if_t<
5276 	     conjunction<
5277 	       is_same<simd_abi::fixed_size<size()>, abi_type>,
5278 	       negation<__is_narrowing_conversion<_Up, value_type>>,
5279 	       __converts_to_higher_integer_rank<_Up, value_type>>::value,
5280 	     void*> = nullptr)
5281       : simd{static_cast<array<_Up, size()>>(__x).data(), vector_aligned} {}
5282 
5283       // explicit type conversion constructor
5284 #ifdef _GLIBCXX_SIMD_ENABLE_STATIC_CAST
5285     template <typename _Up, typename _A2,
5286 	      typename = decltype(static_simd_cast<simd>(
5287 		declval<const simd<_Up, _A2>&>()))>
5288       _GLIBCXX_SIMD_ALWAYS_INLINE explicit _GLIBCXX_SIMD_CONSTEXPR
5289       simd(const simd<_Up, _A2>& __x)
5290       : simd(static_simd_cast<simd>(__x)) {}
5291 #endif // _GLIBCXX_SIMD_ENABLE_STATIC_CAST
5292 
5293     // generator constructor
5294     template <typename _Fp>
5295       _GLIBCXX_SIMD_ALWAYS_INLINE explicit _GLIBCXX_SIMD_CONSTEXPR
5296       simd(_Fp&& __gen, _ValuePreservingOrInt<decltype(declval<_Fp>()(
5297 						declval<_SizeConstant<0>&>())),
5298 					      value_type>* = nullptr)
5299       : _M_data(_Impl::_S_generator(static_cast<_Fp&&>(__gen), _S_type_tag)) {}
5300 
5301     // load constructor
5302     template <typename _Up, typename _Flags>
5303       _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR
5304       simd(const _Up* __mem, _IsSimdFlagType<_Flags>)
5305       : _M_data(
5306 	  _Impl::_S_load(_Flags::template _S_apply<simd>(__mem), _S_type_tag))
5307       {}
5308 
5309     // loads [simd.load]
5310     template <typename _Up, typename _Flags>
5311       _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR void
5312       copy_from(const _Vectorizable<_Up>* __mem, _IsSimdFlagType<_Flags>)
5313       {
5314 	_M_data = static_cast<decltype(_M_data)>(
5315 	  _Impl::_S_load(_Flags::template _S_apply<simd>(__mem), _S_type_tag));
5316       }
5317 
5318     // stores [simd.store]
5319     template <typename _Up, typename _Flags>
5320       _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR void
5321       copy_to(_Vectorizable<_Up>* __mem, _IsSimdFlagType<_Flags>) const
5322       {
5323 	_Impl::_S_store(_M_data, _Flags::template _S_apply<simd>(__mem),
5324 			_S_type_tag);
5325       }
5326 
5327     // scalar access
5328     _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR reference
5329     operator[](size_t __i)
5330     { return {_M_data, int(__i)}; }
5331 
5332     _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR value_type
5333     operator[]([[maybe_unused]] size_t __i) const
5334     {
5335       if constexpr (__is_scalar_abi<_Abi>())
5336 	{
5337 	  _GLIBCXX_DEBUG_ASSERT(__i == 0);
5338 	  return _M_data;
5339 	}
5340       else
5341 	return _M_data[__i];
5342     }
5343 
5344     // increment and decrement:
5345     _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR simd&
5346     operator++()
5347     {
5348       _Impl::_S_increment(_M_data);
5349       return *this;
5350     }
5351 
5352     _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR simd
5353     operator++(int)
5354     {
5355       simd __r = *this;
5356       _Impl::_S_increment(_M_data);
5357       return __r;
5358     }
5359 
5360     _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR simd&
5361     operator--()
5362     {
5363       _Impl::_S_decrement(_M_data);
5364       return *this;
5365     }
5366 
5367     _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR simd
5368     operator--(int)
5369     {
5370       simd __r = *this;
5371       _Impl::_S_decrement(_M_data);
5372       return __r;
5373     }
5374 
5375     // unary operators (for any _Tp)
5376     _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR mask_type
5377     operator!() const
5378     { return {__private_init, _Impl::_S_negate(_M_data)}; }
5379 
5380     _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR simd
5381     operator+() const
5382     { return *this; }
5383 
5384     _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR simd
5385     operator-() const
5386     { return {__private_init, _Impl::_S_unary_minus(_M_data)}; }
5387 
5388     // access to internal representation (suggested extension)
5389     _GLIBCXX_SIMD_ALWAYS_INLINE explicit _GLIBCXX_SIMD_CONSTEXPR
5390     simd(_CastType __init) : _M_data(__init) {}
5391 
5392     // compound assignment [simd.cassign]
5393     _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd&
5394     operator+=(simd& __lhs, const simd& __x)
5395     { return __lhs = __lhs + __x; }
5396 
5397     _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd&
5398     operator-=(simd& __lhs, const simd& __x)
5399     { return __lhs = __lhs - __x; }
5400 
5401     _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd&
5402     operator*=(simd& __lhs, const simd& __x)
5403     { return __lhs = __lhs * __x; }
5404 
5405     _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd&
5406     operator/=(simd& __lhs, const simd& __x)
5407     { return __lhs = __lhs / __x; }
5408 
5409     // binary operators [simd.binary]
5410     _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd
5411     operator+(const simd& __x, const simd& __y)
5412     { return {__private_init, _Impl::_S_plus(__x._M_data, __y._M_data)}; }
5413 
5414     _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd
5415     operator-(const simd& __x, const simd& __y)
5416     { return {__private_init, _Impl::_S_minus(__x._M_data, __y._M_data)}; }
5417 
5418     _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd
5419     operator*(const simd& __x, const simd& __y)
5420     { return {__private_init, _Impl::_S_multiplies(__x._M_data, __y._M_data)}; }
5421 
5422     _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd
5423     operator/(const simd& __x, const simd& __y)
5424     { return {__private_init, _Impl::_S_divides(__x._M_data, __y._M_data)}; }
5425 
5426     // compares [simd.comparison]
5427     _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend mask_type
5428     operator==(const simd& __x, const simd& __y)
5429     { return simd::_S_make_mask(_Impl::_S_equal_to(__x._M_data, __y._M_data)); }
5430 
5431     _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend mask_type
5432     operator!=(const simd& __x, const simd& __y)
5433     {
5434       return simd::_S_make_mask(
5435 	_Impl::_S_not_equal_to(__x._M_data, __y._M_data));
5436     }
5437 
5438     _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend mask_type
5439     operator<(const simd& __x, const simd& __y)
5440     { return simd::_S_make_mask(_Impl::_S_less(__x._M_data, __y._M_data)); }
5441 
5442     _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend mask_type
5443     operator<=(const simd& __x, const simd& __y)
5444     {
5445       return simd::_S_make_mask(_Impl::_S_less_equal(__x._M_data, __y._M_data));
5446     }
5447 
5448     _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend mask_type
5449     operator>(const simd& __x, const simd& __y)
5450     { return simd::_S_make_mask(_Impl::_S_less(__y._M_data, __x._M_data)); }
5451 
5452     _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend mask_type
5453     operator>=(const simd& __x, const simd& __y)
5454     {
5455       return simd::_S_make_mask(_Impl::_S_less_equal(__y._M_data, __x._M_data));
5456     }
5457 
5458     // operator?: overloads (suggested extension) {{{
5459 #ifdef __GXX_CONDITIONAL_IS_OVERLOADABLE__
5460     _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd
5461     operator?:(const mask_type& __k, const simd& __where_true,
5462 	const simd& __where_false)
5463     {
5464       auto __ret = __where_false;
5465       _Impl::_S_masked_assign(__data(__k), __data(__ret), __data(__where_true));
5466       return __ret;
5467     }
5468 
5469 #endif // __GXX_CONDITIONAL_IS_OVERLOADABLE__
5470     // }}}
5471 
5472     // "private" because of the first arguments's namespace
5473     _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR
5474     simd(_PrivateInit, const _MemberType& __init)
5475     : _M_data(__init) {}
5476 
5477     // "private" because of the first arguments's namespace
5478     _GLIBCXX_SIMD_INTRINSIC
5479     simd(_BitsetInit, bitset<size()> __init) : _M_data()
5480     { where(mask_type(__bitset_init, __init), *this) = ~*this; }
5481 
5482     _GLIBCXX_SIMD_INTRINSIC constexpr bool
5483     _M_is_constprop() const
5484     {
5485       if constexpr (__is_scalar_abi<_Abi>())
5486 	return __builtin_constant_p(_M_data);
5487       else
5488 	return _M_data._M_is_constprop();
5489     }
5490 
5491   private:
5492     _GLIBCXX_SIMD_INTRINSIC static constexpr mask_type
5493     _S_make_mask(typename mask_type::_MemberType __k)
5494     { return {__private_init, __k}; }
5495 
5496     friend const auto& __data<value_type, abi_type>(const simd&);
5497     friend auto& __data<value_type, abi_type>(simd&);
5498     alignas(_Traits::_S_simd_align) _MemberType _M_data;
5499   };
5500 
5501 // }}}
5502 /// @cond undocumented
5503 // __data {{{
5504 template <typename _Tp, typename _Ap>
5505   _GLIBCXX_SIMD_INTRINSIC constexpr const auto&
5506   __data(const simd<_Tp, _Ap>& __x)
5507   { return __x._M_data; }
5508 
5509 template <typename _Tp, typename _Ap>
5510   _GLIBCXX_SIMD_INTRINSIC constexpr auto&
5511   __data(simd<_Tp, _Ap>& __x)
5512   { return __x._M_data; }
5513 
5514 // }}}
5515 namespace __float_bitwise_operators { //{{{
5516 template <typename _Tp, typename _Ap>
5517   _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR simd<_Tp, _Ap>
5518   operator^(const simd<_Tp, _Ap>& __a, const simd<_Tp, _Ap>& __b)
5519   { return {__private_init, _Ap::_SimdImpl::_S_bit_xor(__data(__a), __data(__b))}; }
5520 
5521 template <typename _Tp, typename _Ap>
5522   _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR simd<_Tp, _Ap>
5523   operator|(const simd<_Tp, _Ap>& __a, const simd<_Tp, _Ap>& __b)
5524   { return {__private_init, _Ap::_SimdImpl::_S_bit_or(__data(__a), __data(__b))}; }
5525 
5526 template <typename _Tp, typename _Ap>
5527   _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR simd<_Tp, _Ap>
5528   operator&(const simd<_Tp, _Ap>& __a, const simd<_Tp, _Ap>& __b)
5529   { return {__private_init, _Ap::_SimdImpl::_S_bit_and(__data(__a), __data(__b))}; }
5530 
5531 template <typename _Tp, typename _Ap>
5532   _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR
5533   enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Ap>>
5534   operator~(const simd<_Tp, _Ap>& __a)
5535   { return {__private_init, _Ap::_SimdImpl::_S_complement(__data(__a))}; }
5536 } // namespace __float_bitwise_operators }}}
5537 /// @endcond
5538 
5539 /// @}
5540 _GLIBCXX_SIMD_END_NAMESPACE
5541 
5542 #endif // __cplusplus >= 201703L
5543 #endif // _GLIBCXX_EXPERIMENTAL_SIMD_H
5544 
5545 // vim: foldmethod=marker foldmarker={{{,}}}
5546