xref: /netbsd-src/external/gpl3/gcc/dist/libstdc++-v3/include/experimental/bits/simd_math.h (revision 0a3071956a3a9fdebdbf7f338cf2d439b45fc728)
1 // Math overloads for simd -*- C++ -*-
2 
3 // Copyright (C) 2020-2022 Free Software Foundation, Inc.
4 //
5 // This file is part of the GNU ISO C++ Library.  This library is free
6 // software; you can redistribute it and/or modify it under the
7 // terms of the GNU General Public License as published by the
8 // Free Software Foundation; either version 3, or (at your option)
9 // any later version.
10 
11 // This library is distributed in the hope that it will be useful,
12 // but WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14 // GNU General Public License for more details.
15 
16 // Under Section 7 of GPL version 3, you are granted additional
17 // permissions described in the GCC Runtime Library Exception, version
18 // 3.1, as published by the Free Software Foundation.
19 
20 // You should have received a copy of the GNU General Public License and
21 // a copy of the GCC Runtime Library Exception along with this program;
22 // see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
23 // <http://www.gnu.org/licenses/>.
24 
25 #ifndef _GLIBCXX_EXPERIMENTAL_SIMD_MATH_H_
26 #define _GLIBCXX_EXPERIMENTAL_SIMD_MATH_H_
27 
28 #if __cplusplus >= 201703L
29 
30 #include <utility>
31 #include <iomanip>
32 
33 _GLIBCXX_SIMD_BEGIN_NAMESPACE
34 template <typename _Tp, typename _V>
35   using _Samesize = fixed_size_simd<_Tp, _V::size()>;
36 
37 // _Math_return_type {{{
38 template <typename _DoubleR, typename _Tp, typename _Abi>
39   struct _Math_return_type;
40 
41 template <typename _DoubleR, typename _Tp, typename _Abi>
42   using _Math_return_type_t =
43     typename _Math_return_type<_DoubleR, _Tp, _Abi>::type;
44 
45 template <typename _Tp, typename _Abi>
46   struct _Math_return_type<double, _Tp, _Abi>
47   { using type = simd<_Tp, _Abi>; };
48 
49 template <typename _Tp, typename _Abi>
50   struct _Math_return_type<bool, _Tp, _Abi>
51   { using type = simd_mask<_Tp, _Abi>; };
52 
53 template <typename _DoubleR, typename _Tp, typename _Abi>
54   struct _Math_return_type
55   { using type = fixed_size_simd<_DoubleR, simd_size_v<_Tp, _Abi>>; };
56 
57 //}}}
58 // _GLIBCXX_SIMD_MATH_CALL_ {{{
59 #define _GLIBCXX_SIMD_MATH_CALL_(__name)                                       \
60 template <typename _Tp, typename _Abi, typename...,                            \
61 	  typename _R = _Math_return_type_t<                                   \
62 	    decltype(std::__name(declval<double>())), _Tp, _Abi>>              \
63   _GLIBCXX_SIMD_ALWAYS_INLINE                                                  \
64   enable_if_t<is_floating_point_v<_Tp>, _R>                                    \
65   __name(simd<_Tp, _Abi> __x)                                                  \
66   { return {__private_init, _Abi::_SimdImpl::_S_##__name(__data(__x))}; }
67 
68 // }}}
69 //_Extra_argument_type{{{
70 template <typename _Up, typename _Tp, typename _Abi>
71   struct _Extra_argument_type;
72 
73 template <typename _Tp, typename _Abi>
74   struct _Extra_argument_type<_Tp*, _Tp, _Abi>
75   {
76     using type = simd<_Tp, _Abi>*;
77     static constexpr double* declval();
78     static constexpr bool __needs_temporary_scalar = true;
79 
80     _GLIBCXX_SIMD_INTRINSIC static constexpr auto _S_data(type __x)
81     { return &__data(*__x); }
82   };
83 
84 template <typename _Up, typename _Tp, typename _Abi>
85   struct _Extra_argument_type<_Up*, _Tp, _Abi>
86   {
87     static_assert(is_integral_v<_Up>);
88     using type = fixed_size_simd<_Up, simd_size_v<_Tp, _Abi>>*;
89     static constexpr _Up* declval();
90     static constexpr bool __needs_temporary_scalar = true;
91 
92     _GLIBCXX_SIMD_INTRINSIC static constexpr auto _S_data(type __x)
93     { return &__data(*__x); }
94   };
95 
96 template <typename _Tp, typename _Abi>
97   struct _Extra_argument_type<_Tp, _Tp, _Abi>
98   {
99     using type = simd<_Tp, _Abi>;
100     static constexpr double declval();
101     static constexpr bool __needs_temporary_scalar = false;
102 
103     _GLIBCXX_SIMD_INTRINSIC static constexpr decltype(auto)
104     _S_data(const type& __x)
105     { return __data(__x); }
106   };
107 
108 template <typename _Up, typename _Tp, typename _Abi>
109   struct _Extra_argument_type
110   {
111     static_assert(is_integral_v<_Up>);
112     using type = fixed_size_simd<_Up, simd_size_v<_Tp, _Abi>>;
113     static constexpr _Up declval();
114     static constexpr bool __needs_temporary_scalar = false;
115 
116     _GLIBCXX_SIMD_INTRINSIC static constexpr decltype(auto)
117     _S_data(const type& __x)
118     { return __data(__x); }
119   };
120 
121 //}}}
122 // _GLIBCXX_SIMD_MATH_CALL2_ {{{
123 #define _GLIBCXX_SIMD_MATH_CALL2_(__name, __arg2)                              \
124 template <                                                                     \
125   typename _Tp, typename _Abi, typename...,                                    \
126   typename _Arg2 = _Extra_argument_type<__arg2, _Tp, _Abi>,                    \
127   typename _R = _Math_return_type_t<                                           \
128     decltype(std::__name(declval<double>(), _Arg2::declval())), _Tp, _Abi>>    \
129   _GLIBCXX_SIMD_ALWAYS_INLINE                                                  \
130   enable_if_t<is_floating_point_v<_Tp>, _R>                                    \
131   __name(const simd<_Tp, _Abi>& __x, const typename _Arg2::type& __y)          \
132   {                                                                            \
133     return {__private_init,                                                    \
134 	    _Abi::_SimdImpl::_S_##__name(__data(__x), _Arg2::_S_data(__y))};   \
135   }                                                                            \
136 template <typename _Up, typename _Tp, typename _Abi>                           \
137   _GLIBCXX_SIMD_INTRINSIC _Math_return_type_t<                                 \
138     decltype(std::__name(                                                      \
139       declval<double>(),                                                       \
140       declval<enable_if_t<                                                     \
141 	conjunction_v<                                                         \
142 	  is_same<__arg2, _Tp>,                                                \
143 	  negation<is_same<__remove_cvref_t<_Up>, simd<_Tp, _Abi>>>,           \
144 	  is_convertible<_Up, simd<_Tp, _Abi>>, is_floating_point<_Tp>>,       \
145 	double>>())),                                                          \
146     _Tp, _Abi>                                                                 \
147   __name(_Up&& __xx, const simd<_Tp, _Abi>& __yy)                              \
148   { return __name(simd<_Tp, _Abi>(static_cast<_Up&&>(__xx)), __yy); }
149 
150 // }}}
151 // _GLIBCXX_SIMD_MATH_CALL3_ {{{
152 #define _GLIBCXX_SIMD_MATH_CALL3_(__name, __arg2, __arg3)                      \
153 template <typename _Tp, typename _Abi, typename...,                            \
154 	  typename _Arg2 = _Extra_argument_type<__arg2, _Tp, _Abi>,            \
155 	  typename _Arg3 = _Extra_argument_type<__arg3, _Tp, _Abi>,            \
156 	  typename _R = _Math_return_type_t<                                   \
157 	    decltype(std::__name(declval<double>(), _Arg2::declval(),          \
158 				 _Arg3::declval())),                           \
159 	    _Tp, _Abi>>                                                        \
160   _GLIBCXX_SIMD_ALWAYS_INLINE                                                  \
161   enable_if_t<is_floating_point_v<_Tp>, _R>                                    \
162   __name(const simd<_Tp, _Abi>& __x, const typename _Arg2::type& __y,          \
163 	 const typename _Arg3::type& __z)                                      \
164   {                                                                            \
165     return {__private_init,                                                    \
166 	    _Abi::_SimdImpl::_S_##__name(__data(__x), _Arg2::_S_data(__y),     \
167 					 _Arg3::_S_data(__z))};                \
168   }                                                                            \
169 template <                                                                     \
170   typename _T0, typename _T1, typename _T2, typename...,                       \
171   typename _U0 = __remove_cvref_t<_T0>,                                        \
172   typename _U1 = __remove_cvref_t<_T1>,                                        \
173   typename _U2 = __remove_cvref_t<_T2>,                                        \
174   typename _Simd = conditional_t<is_simd_v<_U1>, _U1, _U2>,                    \
175   typename = enable_if_t<conjunction_v<                                        \
176     is_simd<_Simd>, is_convertible<_T0&&, _Simd>,                              \
177     is_convertible<_T1&&, _Simd>, is_convertible<_T2&&, _Simd>,                \
178     negation<conjunction<                                                      \
179       is_simd<_U0>, is_floating_point<__value_type_or_identity_t<_U0>>>>>>>    \
180   _GLIBCXX_SIMD_INTRINSIC decltype(__name(declval<const _Simd&>(),             \
181 					  declval<const _Simd&>(),             \
182 					  declval<const _Simd&>()))            \
183   __name(_T0&& __xx, _T1&& __yy, _T2&& __zz)                                   \
184   {                                                                            \
185     return __name(_Simd(static_cast<_T0&&>(__xx)),                             \
186 		  _Simd(static_cast<_T1&&>(__yy)),                             \
187 		  _Simd(static_cast<_T2&&>(__zz)));                            \
188   }
189 
190 // }}}
191 // __cosSeries {{{
192 template <typename _Abi>
193   _GLIBCXX_SIMD_ALWAYS_INLINE static simd<float, _Abi>
194   __cosSeries(const simd<float, _Abi>& __x)
195   {
196     const simd<float, _Abi> __x2 = __x * __x;
197     simd<float, _Abi> __y;
198     __y = 0x1.ap-16f;                  //  1/8!
199     __y = __y * __x2 - 0x1.6c1p-10f;   // -1/6!
200     __y = __y * __x2 + 0x1.555556p-5f; //  1/4!
201     return __y * (__x2 * __x2) - .5f * __x2 + 1.f;
202   }
203 
204 template <typename _Abi>
205   _GLIBCXX_SIMD_ALWAYS_INLINE static simd<double, _Abi>
206   __cosSeries(const simd<double, _Abi>& __x)
207   {
208     const simd<double, _Abi> __x2 = __x * __x;
209     simd<double, _Abi> __y;
210     __y = 0x1.AC00000000000p-45;              //  1/16!
211     __y = __y * __x2 - 0x1.9394000000000p-37; // -1/14!
212     __y = __y * __x2 + 0x1.1EED8C0000000p-29; //  1/12!
213     __y = __y * __x2 - 0x1.27E4FB7400000p-22; // -1/10!
214     __y = __y * __x2 + 0x1.A01A01A018000p-16; //  1/8!
215     __y = __y * __x2 - 0x1.6C16C16C16C00p-10; // -1/6!
216     __y = __y * __x2 + 0x1.5555555555554p-5;  //  1/4!
217     return (__y * __x2 - .5f) * __x2 + 1.f;
218   }
219 
220 // }}}
221 // __sinSeries {{{
222 template <typename _Abi>
223   _GLIBCXX_SIMD_ALWAYS_INLINE static simd<float, _Abi>
224   __sinSeries(const simd<float, _Abi>& __x)
225   {
226     const simd<float, _Abi> __x2 = __x * __x;
227     simd<float, _Abi> __y;
228     __y = -0x1.9CC000p-13f;            // -1/7!
229     __y = __y * __x2 + 0x1.111100p-7f; //  1/5!
230     __y = __y * __x2 - 0x1.555556p-3f; // -1/3!
231     return __y * (__x2 * __x) + __x;
232   }
233 
234 template <typename _Abi>
235   _GLIBCXX_SIMD_ALWAYS_INLINE static simd<double, _Abi>
236   __sinSeries(const simd<double, _Abi>& __x)
237   {
238     // __x  = [0, 0.7854 = pi/4]
239     // __x² = [0, 0.6169 = pi²/8]
240     const simd<double, _Abi> __x2 = __x * __x;
241     simd<double, _Abi> __y;
242     __y = -0x1.ACF0000000000p-41;             // -1/15!
243     __y = __y * __x2 + 0x1.6124400000000p-33; //  1/13!
244     __y = __y * __x2 - 0x1.AE64567000000p-26; // -1/11!
245     __y = __y * __x2 + 0x1.71DE3A5540000p-19; //  1/9!
246     __y = __y * __x2 - 0x1.A01A01A01A000p-13; // -1/7!
247     __y = __y * __x2 + 0x1.1111111111110p-7;  //  1/5!
248     __y = __y * __x2 - 0x1.5555555555555p-3;  // -1/3!
249     return __y * (__x2 * __x) + __x;
250   }
251 
252 // }}}
253 // __zero_low_bits {{{
254 template <int _Bits, typename _Tp, typename _Abi>
255   _GLIBCXX_SIMD_INTRINSIC simd<_Tp, _Abi>
256   __zero_low_bits(simd<_Tp, _Abi> __x)
257   {
258     const simd<_Tp, _Abi> __bitmask
259       = __bit_cast<_Tp>(~make_unsigned_t<__int_for_sizeof_t<_Tp>>() << _Bits);
260     return {__private_init,
261 	    _Abi::_SimdImpl::_S_bit_and(__data(__x), __data(__bitmask))};
262   }
263 
264 // }}}
265 // __fold_input {{{
266 
267 /**@internal
268  * Fold @p x into [-¼π, ¼π] and remember the quadrant it came from:
269  * quadrant 0: [-¼π,  ¼π]
270  * quadrant 1: [ ¼π,  ¾π]
271  * quadrant 2: [ ¾π, 1¼π]
272  * quadrant 3: [1¼π, 1¾π]
273  *
274  * The algorithm determines `y` as the multiple `x - y * ¼π = [-¼π, ¼π]`. Using
275  * a bitmask, `y` is reduced to `quadrant`. `y` can be calculated as
276  * ```
277  * y = trunc(x / ¼π);
278  * y += fmod(y, 2);
279  * ```
280  * This can be simplified by moving the (implicit) division by 2 into the
281  * truncation expression. The `+= fmod` effect can the be achieved by using
282  * rounding instead of truncation: `y = round(x / ½π) * 2`. If precision allows,
283  * `2/π * x` is better (faster).
284  */
285 template <typename _Tp, typename _Abi>
286   struct _Folded
287   {
288     simd<_Tp, _Abi> _M_x;
289     rebind_simd_t<int, simd<_Tp, _Abi>> _M_quadrant;
290   };
291 
292 namespace __math_float {
293 inline constexpr float __pi_over_4 = 0x1.921FB6p-1f; // π/4
294 inline constexpr float __2_over_pi = 0x1.45F306p-1f; // 2/π
295 inline constexpr float __pi_2_5bits0
296   = 0x1.921fc0p0f; // π/2, 5 0-bits (least significant)
297 inline constexpr float __pi_2_5bits0_rem
298   = -0x1.5777a6p-21f; // π/2 - __pi_2_5bits0
299 } // namespace __math_float
300 namespace __math_double {
301 inline constexpr double __pi_over_4 = 0x1.921fb54442d18p-1; // π/4
302 inline constexpr double __2_over_pi = 0x1.45F306DC9C883p-1; // 2/π
303 inline constexpr double __pi_2 = 0x1.921fb54442d18p0;       // π/2
304 } // namespace __math_double
305 
306 template <typename _Abi>
307   _GLIBCXX_SIMD_ALWAYS_INLINE _Folded<float, _Abi>
308   __fold_input(const simd<float, _Abi>& __x)
309   {
310     using _V = simd<float, _Abi>;
311     using _IV = rebind_simd_t<int, _V>;
312     using namespace __math_float;
313     _Folded<float, _Abi> __r;
314     __r._M_x = abs(__x);
315 #if 0
316     // zero most mantissa bits:
317     constexpr float __1_over_pi = 0x1.45F306p-2f; // 1/π
318     const auto __y = (__r._M_x * __1_over_pi + 0x1.8p23f) - 0x1.8p23f;
319     // split π into 4 parts, the first three with 13 trailing zeros (to make the
320     // following multiplications precise):
321     constexpr float __pi0 = 0x1.920000p1f;
322     constexpr float __pi1 = 0x1.fb4000p-11f;
323     constexpr float __pi2 = 0x1.444000p-23f;
324     constexpr float __pi3 = 0x1.68c234p-38f;
325     __r._M_x - __y*__pi0 - __y*__pi1 - __y*__pi2 - __y*__pi3
326 #else
327     if (_GLIBCXX_SIMD_IS_UNLIKELY(all_of(__r._M_x < __pi_over_4)))
328       __r._M_quadrant = 0;
329     else if (_GLIBCXX_SIMD_IS_LIKELY(all_of(__r._M_x < 6 * __pi_over_4)))
330       {
331 	const _V __y = nearbyint(__r._M_x * __2_over_pi);
332 	__r._M_quadrant = static_simd_cast<_IV>(__y) & 3; // __y mod 4
333 	__r._M_x -= __y * __pi_2_5bits0;
334 	__r._M_x -= __y * __pi_2_5bits0_rem;
335       }
336     else
337       {
338 	using __math_double::__2_over_pi;
339 	using __math_double::__pi_2;
340 	using _VD = rebind_simd_t<double, _V>;
341 	_VD __xd = static_simd_cast<_VD>(__r._M_x);
342 	_VD __y = nearbyint(__xd * __2_over_pi);
343 	__r._M_quadrant = static_simd_cast<_IV>(__y) & 3; // = __y mod 4
344 	__r._M_x = static_simd_cast<_V>(__xd - __y * __pi_2);
345       }
346 #endif
347     return __r;
348   }
349 
350 template <typename _Abi>
351   _GLIBCXX_SIMD_ALWAYS_INLINE _Folded<double, _Abi>
352   __fold_input(const simd<double, _Abi>& __x)
353   {
354     using _V = simd<double, _Abi>;
355     using _IV = rebind_simd_t<int, _V>;
356     using namespace __math_double;
357 
358     _Folded<double, _Abi> __r;
359     __r._M_x = abs(__x);
360     if (_GLIBCXX_SIMD_IS_UNLIKELY(all_of(__r._M_x < __pi_over_4)))
361       {
362 	__r._M_quadrant = 0;
363 	return __r;
364       }
365     const _V __y = nearbyint(__r._M_x / (2 * __pi_over_4));
366     __r._M_quadrant = static_simd_cast<_IV>(__y) & 3;
367 
368     if (_GLIBCXX_SIMD_IS_LIKELY(all_of(__r._M_x < 1025 * __pi_over_4)))
369       {
370 	// x - y * pi/2, y uses no more than 11 mantissa bits
371 	__r._M_x -= __y * 0x1.921FB54443000p0;
372 	__r._M_x -= __y * -0x1.73DCB3B39A000p-43;
373 	__r._M_x -= __y * 0x1.45C06E0E68948p-86;
374       }
375     else if (_GLIBCXX_SIMD_IS_LIKELY(all_of(__y <= 0x1.0p30)))
376       {
377 	// x - y * pi/2, y uses no more than 29 mantissa bits
378 	__r._M_x -= __y * 0x1.921FB40000000p0;
379 	__r._M_x -= __y * 0x1.4442D00000000p-24;
380 	__r._M_x -= __y * 0x1.8469898CC5170p-48;
381       }
382     else
383       {
384 	// x - y * pi/2, y may require all mantissa bits
385 	const _V __y_hi = __zero_low_bits<26>(__y);
386 	const _V __y_lo = __y - __y_hi;
387 	const auto __pi_2_1 = 0x1.921FB50000000p0;
388 	const auto __pi_2_2 = 0x1.110B460000000p-26;
389 	const auto __pi_2_3 = 0x1.1A62630000000p-54;
390 	const auto __pi_2_4 = 0x1.8A2E03707344Ap-81;
391 	__r._M_x = __r._M_x - __y_hi * __pi_2_1
392 		   - max(__y_hi * __pi_2_2, __y_lo * __pi_2_1)
393 		   - min(__y_hi * __pi_2_2, __y_lo * __pi_2_1)
394 		   - max(__y_hi * __pi_2_3, __y_lo * __pi_2_2)
395 		   - min(__y_hi * __pi_2_3, __y_lo * __pi_2_2)
396 		   - max(__y * __pi_2_4, __y_lo * __pi_2_3)
397 		   - min(__y * __pi_2_4, __y_lo * __pi_2_3);
398       }
399     return __r;
400   }
401 
402 // }}}
403 // __extract_exponent_as_int {{{
404 template <typename _Tp, typename _Abi>
405   _GLIBCXX_SIMD_INTRINSIC
406   rebind_simd_t<int, simd<_Tp, _Abi>>
407   __extract_exponent_as_int(const simd<_Tp, _Abi>& __v)
408   {
409     using _Vp = simd<_Tp, _Abi>;
410     using _Up = make_unsigned_t<__int_for_sizeof_t<_Tp>>;
411     using namespace std::experimental::__float_bitwise_operators;
412     using namespace std::experimental::__proposed;
413     const _Vp __exponent_mask
414       = __infinity_v<_Tp>; // 0x7f800000 or 0x7ff0000000000000
415     return static_simd_cast<rebind_simd_t<int, _Vp>>(
416 	     simd_bit_cast<rebind_simd_t<_Up, _Vp>>(__v & __exponent_mask)
417       >> (__digits_v<_Tp> - 1));
418   }
419 
420 // }}}
421 // __impl_or_fallback {{{
422 template <typename ImplFun, typename FallbackFun, typename... _Args>
423   _GLIBCXX_SIMD_INTRINSIC auto
424   __impl_or_fallback_dispatch(int, ImplFun&& __impl_fun, FallbackFun&&,
425 			      _Args&&... __args)
426     -> decltype(__impl_fun(static_cast<_Args&&>(__args)...))
427   { return __impl_fun(static_cast<_Args&&>(__args)...); }
428 
429 template <typename ImplFun, typename FallbackFun, typename... _Args,
430 	  typename = __detail::__odr_helper>
431   inline auto
432   __impl_or_fallback_dispatch(float, ImplFun&&, FallbackFun&& __fallback_fun,
433 			      _Args&&... __args)
434     -> decltype(__fallback_fun(static_cast<_Args&&>(__args)...))
435   { return __fallback_fun(static_cast<_Args&&>(__args)...); }
436 
437 template <typename... _Args>
438   _GLIBCXX_SIMD_INTRINSIC auto
439   __impl_or_fallback(_Args&&... __args)
440   {
441     return __impl_or_fallback_dispatch(int(), static_cast<_Args&&>(__args)...);
442   }
443 //}}}
444 
445 // trigonometric functions {{{
446 _GLIBCXX_SIMD_MATH_CALL_(acos)
447 _GLIBCXX_SIMD_MATH_CALL_(asin)
448 _GLIBCXX_SIMD_MATH_CALL_(atan)
449 _GLIBCXX_SIMD_MATH_CALL2_(atan2, _Tp)
450 
451 /*
452  * algorithm for sine and cosine:
453  *
454  * The result can be calculated with sine or cosine depending on the π/4 section
455  * the input is in. sine   ≈ __x + __x³ cosine ≈ 1 - __x²
456  *
457  * sine:
458  * Map -__x to __x and invert the output
459  * Extend precision of __x - n * π/4 by calculating
460  * ((__x - n * p1) - n * p2) - n * p3 (p1 + p2 + p3 = π/4)
461  *
462  * Calculate Taylor series with tuned coefficients.
463  * Fix sign.
464  */
465 // cos{{{
466 template <typename _Tp, typename _Abi, typename = __detail::__odr_helper>
467   enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
468   cos(const simd<_Tp, _Abi>& __x)
469   {
470     using _V = simd<_Tp, _Abi>;
471     if constexpr (__is_scalar_abi<_Abi>() || __is_fixed_size_abi_v<_Abi>)
472       return {__private_init, _Abi::_SimdImpl::_S_cos(__data(__x))};
473     else
474       {
475 	if constexpr (is_same_v<_Tp, float>)
476 	  if (_GLIBCXX_SIMD_IS_UNLIKELY(any_of(abs(__x) >= 393382)))
477 	    return static_simd_cast<_V>(
478 	      cos(static_simd_cast<rebind_simd_t<double, _V>>(__x)));
479 
480 	const auto __f = __fold_input(__x);
481 	// quadrant | effect
482 	//        0 | cosSeries, +
483 	//        1 | sinSeries, -
484 	//        2 | cosSeries, -
485 	//        3 | sinSeries, +
486 	using namespace std::experimental::__float_bitwise_operators;
487 	const _V __sign_flip
488 	  = _V(-0.f) & static_simd_cast<_V>((1 + __f._M_quadrant) << 30);
489 
490 	const auto __need_cos = (__f._M_quadrant & 1) == 0;
491 	if (_GLIBCXX_SIMD_IS_UNLIKELY(all_of(__need_cos)))
492 	  return __sign_flip ^ __cosSeries(__f._M_x);
493 	else if (_GLIBCXX_SIMD_IS_UNLIKELY(none_of(__need_cos)))
494 	  return __sign_flip ^ __sinSeries(__f._M_x);
495 	else // some_of(__need_cos)
496 	  {
497 	    _V __r = __sinSeries(__f._M_x);
498 	    where(__need_cos.__cvt(), __r) = __cosSeries(__f._M_x);
499 	    return __r ^ __sign_flip;
500 	  }
501       }
502   }
503 
504 template <typename _Tp>
505   _GLIBCXX_SIMD_ALWAYS_INLINE
506   enable_if_t<is_floating_point<_Tp>::value, simd<_Tp, simd_abi::scalar>>
507   cos(simd<_Tp, simd_abi::scalar> __x)
508   { return std::cos(__data(__x)); }
509 
510 //}}}
511 // sin{{{
512 template <typename _Tp, typename _Abi, typename = __detail::__odr_helper>
513   enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
514   sin(const simd<_Tp, _Abi>& __x)
515   {
516     using _V = simd<_Tp, _Abi>;
517     if constexpr (__is_scalar_abi<_Abi>() || __is_fixed_size_abi_v<_Abi>)
518       return {__private_init, _Abi::_SimdImpl::_S_sin(__data(__x))};
519     else
520       {
521 	if constexpr (is_same_v<_Tp, float>)
522 	  if (_GLIBCXX_SIMD_IS_UNLIKELY(any_of(abs(__x) >= 527449)))
523 	    return static_simd_cast<_V>(
524 	      sin(static_simd_cast<rebind_simd_t<double, _V>>(__x)));
525 
526 	const auto __f = __fold_input(__x);
527 	// quadrant | effect
528 	//        0 | sinSeries
529 	//        1 | cosSeries
530 	//        2 | sinSeries, sign flip
531 	//        3 | cosSeries, sign flip
532 	using namespace std::experimental::__float_bitwise_operators;
533 	const auto __sign_flip
534 	  = (__x ^ static_simd_cast<_V>(1 - __f._M_quadrant)) & _V(_Tp(-0.));
535 
536 	const auto __need_sin = (__f._M_quadrant & 1) == 0;
537 	if (_GLIBCXX_SIMD_IS_UNLIKELY(all_of(__need_sin)))
538 	  return __sign_flip ^ __sinSeries(__f._M_x);
539 	else if (_GLIBCXX_SIMD_IS_UNLIKELY(none_of(__need_sin)))
540 	  return __sign_flip ^ __cosSeries(__f._M_x);
541 	else // some_of(__need_sin)
542 	  {
543 	    _V __r = __cosSeries(__f._M_x);
544 	    where(__need_sin.__cvt(), __r) = __sinSeries(__f._M_x);
545 	    return __sign_flip ^ __r;
546 	  }
547       }
548   }
549 
550 template <typename _Tp>
551   _GLIBCXX_SIMD_ALWAYS_INLINE
552   enable_if_t<is_floating_point<_Tp>::value, simd<_Tp, simd_abi::scalar>>
553   sin(simd<_Tp, simd_abi::scalar> __x)
554   { return std::sin(__data(__x)); }
555 
556 //}}}
557 _GLIBCXX_SIMD_MATH_CALL_(tan)
558 _GLIBCXX_SIMD_MATH_CALL_(acosh)
559 _GLIBCXX_SIMD_MATH_CALL_(asinh)
560 _GLIBCXX_SIMD_MATH_CALL_(atanh)
561 _GLIBCXX_SIMD_MATH_CALL_(cosh)
562 _GLIBCXX_SIMD_MATH_CALL_(sinh)
563 _GLIBCXX_SIMD_MATH_CALL_(tanh)
564 // }}}
565 // exponential functions {{{
566 _GLIBCXX_SIMD_MATH_CALL_(exp)
567 _GLIBCXX_SIMD_MATH_CALL_(exp2)
568 _GLIBCXX_SIMD_MATH_CALL_(expm1)
569 
570 // }}}
571 // frexp {{{
572 #if _GLIBCXX_SIMD_X86INTRIN
573 template <typename _Tp, size_t _Np>
574   _GLIBCXX_SIMD_INTRINSIC
575   _SimdWrapper<_Tp, _Np>
576   __getexp(_SimdWrapper<_Tp, _Np> __x)
577   {
578     if constexpr (__have_avx512vl && __is_sse_ps<_Tp, _Np>())
579       return __auto_bitcast(_mm_getexp_ps(__to_intrin(__x)));
580     else if constexpr (__have_avx512f && __is_sse_ps<_Tp, _Np>())
581       return __auto_bitcast(_mm512_getexp_ps(__auto_bitcast(__to_intrin(__x))));
582     else if constexpr (__have_avx512vl && __is_sse_pd<_Tp, _Np>())
583       return _mm_getexp_pd(__x);
584     else if constexpr (__have_avx512f && __is_sse_pd<_Tp, _Np>())
585       return __lo128(_mm512_getexp_pd(__auto_bitcast(__x)));
586     else if constexpr (__have_avx512vl && __is_avx_ps<_Tp, _Np>())
587       return _mm256_getexp_ps(__x);
588     else if constexpr (__have_avx512f && __is_avx_ps<_Tp, _Np>())
589       return __lo256(_mm512_getexp_ps(__auto_bitcast(__x)));
590     else if constexpr (__have_avx512vl && __is_avx_pd<_Tp, _Np>())
591       return _mm256_getexp_pd(__x);
592     else if constexpr (__have_avx512f && __is_avx_pd<_Tp, _Np>())
593       return __lo256(_mm512_getexp_pd(__auto_bitcast(__x)));
594     else if constexpr (__is_avx512_ps<_Tp, _Np>())
595       return _mm512_getexp_ps(__x);
596     else if constexpr (__is_avx512_pd<_Tp, _Np>())
597       return _mm512_getexp_pd(__x);
598     else
599       __assert_unreachable<_Tp>();
600   }
601 
602 template <typename _Tp, size_t _Np>
603   _GLIBCXX_SIMD_INTRINSIC
604   _SimdWrapper<_Tp, _Np>
605   __getmant_avx512(_SimdWrapper<_Tp, _Np> __x)
606   {
607     if constexpr (__have_avx512vl && __is_sse_ps<_Tp, _Np>())
608       return __auto_bitcast(_mm_getmant_ps(__to_intrin(__x), _MM_MANT_NORM_p5_1,
609 					   _MM_MANT_SIGN_src));
610     else if constexpr (__have_avx512f && __is_sse_ps<_Tp, _Np>())
611       return __auto_bitcast(_mm512_getmant_ps(__auto_bitcast(__to_intrin(__x)),
612 					      _MM_MANT_NORM_p5_1,
613 					      _MM_MANT_SIGN_src));
614     else if constexpr (__have_avx512vl && __is_sse_pd<_Tp, _Np>())
615       return _mm_getmant_pd(__x, _MM_MANT_NORM_p5_1, _MM_MANT_SIGN_src);
616     else if constexpr (__have_avx512f && __is_sse_pd<_Tp, _Np>())
617       return __lo128(_mm512_getmant_pd(__auto_bitcast(__x), _MM_MANT_NORM_p5_1,
618 				       _MM_MANT_SIGN_src));
619     else if constexpr (__have_avx512vl && __is_avx_ps<_Tp, _Np>())
620       return _mm256_getmant_ps(__x, _MM_MANT_NORM_p5_1, _MM_MANT_SIGN_src);
621     else if constexpr (__have_avx512f && __is_avx_ps<_Tp, _Np>())
622       return __lo256(_mm512_getmant_ps(__auto_bitcast(__x), _MM_MANT_NORM_p5_1,
623 				       _MM_MANT_SIGN_src));
624     else if constexpr (__have_avx512vl && __is_avx_pd<_Tp, _Np>())
625       return _mm256_getmant_pd(__x, _MM_MANT_NORM_p5_1, _MM_MANT_SIGN_src);
626     else if constexpr (__have_avx512f && __is_avx_pd<_Tp, _Np>())
627       return __lo256(_mm512_getmant_pd(__auto_bitcast(__x), _MM_MANT_NORM_p5_1,
628 				       _MM_MANT_SIGN_src));
629     else if constexpr (__is_avx512_ps<_Tp, _Np>())
630       return _mm512_getmant_ps(__x, _MM_MANT_NORM_p5_1, _MM_MANT_SIGN_src);
631     else if constexpr (__is_avx512_pd<_Tp, _Np>())
632       return _mm512_getmant_pd(__x, _MM_MANT_NORM_p5_1, _MM_MANT_SIGN_src);
633     else
634       __assert_unreachable<_Tp>();
635   }
636 #endif // _GLIBCXX_SIMD_X86INTRIN
637 
638 /**
639  * splits @p __v into exponent and mantissa, the sign is kept with the mantissa
640  *
641  * The return value will be in the range [0.5, 1.0[
642  * The @p __e value will be an integer defining the power-of-two exponent
643  */
644 template <typename _Tp, typename _Abi, typename = __detail::__odr_helper>
645   enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
646   frexp(const simd<_Tp, _Abi>& __x, _Samesize<int, simd<_Tp, _Abi>>* __exp)
647   {
648     if constexpr (simd_size_v<_Tp, _Abi> == 1)
649       {
650 	int __tmp;
651 	const auto __r = std::frexp(__x[0], &__tmp);
652 	(*__exp)[0] = __tmp;
653 	return __r;
654       }
655     else if constexpr (__is_fixed_size_abi_v<_Abi>)
656       return {__private_init, _Abi::_SimdImpl::_S_frexp(__data(__x), __data(*__exp))};
657 #if _GLIBCXX_SIMD_X86INTRIN
658     else if constexpr (__have_avx512f)
659       {
660 	constexpr size_t _Np = simd_size_v<_Tp, _Abi>;
661 	constexpr size_t _NI = _Np < 4 ? 4 : _Np;
662 	const auto __v = __data(__x);
663 	const auto __isnonzero
664 	  = _Abi::_SimdImpl::_S_isnonzerovalue_mask(__v._M_data);
665 	const _SimdWrapper<int, _NI> __exp_plus1
666 	  = 1 + __convert<_SimdWrapper<int, _NI>>(__getexp(__v))._M_data;
667 	const _SimdWrapper<int, _Np> __e = __wrapper_bitcast<int, _Np>(
668 	  _Abi::_CommonImpl::_S_blend(_SimdWrapper<bool, _NI>(__isnonzero),
669 				      _SimdWrapper<int, _NI>(), __exp_plus1));
670 	simd_abi::deduce_t<int, _Np>::_CommonImpl::_S_store(__e, __exp);
671 	return {__private_init,
672 		_Abi::_CommonImpl::_S_blend(_SimdWrapper<bool, _Np>(
673 					      __isnonzero),
674 					    __v, __getmant_avx512(__v))};
675       }
676 #endif // _GLIBCXX_SIMD_X86INTRIN
677     else
678       {
679 	// fallback implementation
680 	static_assert(sizeof(_Tp) == 4 || sizeof(_Tp) == 8);
681 	using _V = simd<_Tp, _Abi>;
682 	using _IV = rebind_simd_t<int, _V>;
683 	using namespace std::experimental::__proposed;
684 	using namespace std::experimental::__float_bitwise_operators;
685 
686 	constexpr int __exp_adjust = sizeof(_Tp) == 4 ? 0x7e : 0x3fe;
687 	constexpr int __exp_offset = sizeof(_Tp) == 4 ? 0x70 : 0x200;
688 	constexpr _Tp __subnorm_scale = sizeof(_Tp) == 4 ? 0x1p112 : 0x1p512;
689 	_GLIBCXX_SIMD_USE_CONSTEXPR_API _V __exponent_mask
690 	  = __infinity_v<_Tp>; // 0x7f800000 or 0x7ff0000000000000
691 	_GLIBCXX_SIMD_USE_CONSTEXPR_API _V __p5_1_exponent
692 	  = -(2 - __epsilon_v<_Tp>) / 2; // 0xbf7fffff or 0xbfefffffffffffff
693 
694 	_V __mant = __p5_1_exponent & (__exponent_mask | __x); // +/-[.5, 1)
695 	const _IV __exponent_bits = __extract_exponent_as_int(__x);
696 	if (_GLIBCXX_SIMD_IS_LIKELY(all_of(isnormal(__x))))
697 	  {
698 	    *__exp
699 	      = simd_cast<_Samesize<int, _V>>(__exponent_bits - __exp_adjust);
700 	    return __mant;
701 	  }
702 
703 #if __FINITE_MATH_ONLY__
704 	// at least one element of __x is 0 or subnormal, the rest is normal
705 	// (inf and NaN are excluded by -ffinite-math-only)
706 	const auto __iszero_inf_nan = __x == 0;
707 #else
708 	using _Ip = __int_for_sizeof_t<_Tp>;
709 	const auto __as_int = simd_bit_cast<rebind_simd_t<_Ip, _V>>(abs(__x));
710 	const auto __inf = simd_bit_cast<rebind_simd_t<_Ip, _V>>(_V(__infinity_v<_Tp>));
711 	const auto __iszero_inf_nan = static_simd_cast<typename _V::mask_type>(
712 	  __as_int == 0 || __as_int >= __inf);
713 #endif
714 
715 	const _V __scaled_subnormal = __x * __subnorm_scale;
716 	const _V __mant_subnormal
717 	  = __p5_1_exponent & (__exponent_mask | __scaled_subnormal);
718 	where(!isnormal(__x), __mant) = __mant_subnormal;
719 	where(__iszero_inf_nan, __mant) = __x;
720 	_IV __e = __extract_exponent_as_int(__scaled_subnormal);
721 	using _MaskType =
722 	  typename conditional_t<sizeof(typename _V::value_type) == sizeof(int),
723 				 _V, _IV>::mask_type;
724 	const _MaskType __value_isnormal = isnormal(__x).__cvt();
725 	where(__value_isnormal.__cvt(), __e) = __exponent_bits;
726 	static_assert(sizeof(_IV) == sizeof(__value_isnormal));
727 	const _IV __offset
728 	  = (simd_bit_cast<_IV>(__value_isnormal) & _IV(__exp_adjust))
729 	      | (simd_bit_cast<_IV>(static_simd_cast<_MaskType>(__exponent_bits == 0)
730 				      & static_simd_cast<_MaskType>(__x != 0))
731 		   & _IV(__exp_adjust + __exp_offset));
732 	*__exp = simd_cast<_Samesize<int, _V>>(__e - __offset);
733 	return __mant;
734       }
735   }
736 
737 // }}}
738 _GLIBCXX_SIMD_MATH_CALL2_(ldexp, int)
739 _GLIBCXX_SIMD_MATH_CALL_(ilogb)
740 
741 // logarithms {{{
742 _GLIBCXX_SIMD_MATH_CALL_(log)
743 _GLIBCXX_SIMD_MATH_CALL_(log10)
744 _GLIBCXX_SIMD_MATH_CALL_(log1p)
745 _GLIBCXX_SIMD_MATH_CALL_(log2)
746 
747 //}}}
748 // logb{{{
749 template <typename _Tp, typename _Abi, typename = __detail::__odr_helper>
750   enable_if_t<is_floating_point<_Tp>::value, simd<_Tp, _Abi>>
751   logb(const simd<_Tp, _Abi>& __x)
752   {
753     constexpr size_t _Np = simd_size_v<_Tp, _Abi>;
754     if constexpr (_Np == 1)
755       return std::logb(__x[0]);
756     else if constexpr (__is_fixed_size_abi_v<_Abi>)
757       return {__private_init, _Abi::_SimdImpl::_S_logb(__data(__x))};
758 #if _GLIBCXX_SIMD_X86INTRIN // {{{
759     else if constexpr (__have_avx512vl && __is_sse_ps<_Tp, _Np>())
760       return {__private_init,
761 	      __auto_bitcast(_mm_getexp_ps(__to_intrin(__as_vector(__x))))};
762     else if constexpr (__have_avx512vl && __is_sse_pd<_Tp, _Np>())
763       return {__private_init, _mm_getexp_pd(__data(__x))};
764     else if constexpr (__have_avx512vl && __is_avx_ps<_Tp, _Np>())
765       return {__private_init, _mm256_getexp_ps(__data(__x))};
766     else if constexpr (__have_avx512vl && __is_avx_pd<_Tp, _Np>())
767       return {__private_init, _mm256_getexp_pd(__data(__x))};
768     else if constexpr (__have_avx512f && __is_avx_ps<_Tp, _Np>())
769       return {__private_init,
770 	      __lo256(_mm512_getexp_ps(__auto_bitcast(__data(__x))))};
771     else if constexpr (__have_avx512f && __is_avx_pd<_Tp, _Np>())
772       return {__private_init,
773 	      __lo256(_mm512_getexp_pd(__auto_bitcast(__data(__x))))};
774     else if constexpr (__is_avx512_ps<_Tp, _Np>())
775       return {__private_init, _mm512_getexp_ps(__data(__x))};
776     else if constexpr (__is_avx512_pd<_Tp, _Np>())
777       return {__private_init, _mm512_getexp_pd(__data(__x))};
778 #endif // _GLIBCXX_SIMD_X86INTRIN }}}
779     else
780       {
781 	using _V = simd<_Tp, _Abi>;
782 	using namespace std::experimental::__proposed;
783 	auto __is_normal = isnormal(__x);
784 
785 	// work on abs(__x) to reflect the return value on Linux for negative
786 	// inputs (domain-error => implementation-defined value is returned)
787 	const _V abs_x = abs(__x);
788 
789 	// __exponent(__x) returns the exponent value (bias removed) as
790 	// simd<_Up> with integral _Up
791 	auto&& __exponent = [](const _V& __v) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
792 	  using namespace std::experimental::__proposed;
793 	  using _IV = rebind_simd_t<
794 	    conditional_t<sizeof(_Tp) == sizeof(_LLong), _LLong, int>, _V>;
795 	  return (simd_bit_cast<_IV>(__v) >> (__digits_v<_Tp> - 1))
796 		 - (__max_exponent_v<_Tp> - 1);
797 	};
798 	_V __r = static_simd_cast<_V>(__exponent(abs_x));
799 	if (_GLIBCXX_SIMD_IS_LIKELY(all_of(__is_normal)))
800 	  // without corner cases (nan, inf, subnormal, zero) we have our
801 	  // answer:
802 	  return __r;
803 	const auto __is_zero = __x == 0;
804 	const auto __is_nan = isnan(__x);
805 	const auto __is_inf = isinf(__x);
806 	where(__is_zero, __r) = -__infinity_v<_Tp>;
807 	where(__is_nan, __r) = __x;
808 	where(__is_inf, __r) = __infinity_v<_Tp>;
809 	__is_normal |= __is_zero || __is_nan || __is_inf;
810 	if (all_of(__is_normal))
811 	  // at this point everything but subnormals is handled
812 	  return __r;
813 	// subnormals repeat the exponent extraction after multiplication of the
814 	// input with __a floating point value that has 112 (0x70) in its exponent
815 	// (not too big for sp and large enough for dp)
816 	const _V __scaled = abs_x * _Tp(0x1p112);
817 	_V __scaled_exp = static_simd_cast<_V>(__exponent(__scaled) - 112);
818 	where(__is_normal, __scaled_exp) = __r;
819 	return __scaled_exp;
820       }
821   }
822 
823 //}}}
824 template <typename _Tp, typename _Abi, typename = __detail::__odr_helper>
825   enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
826   modf(const simd<_Tp, _Abi>& __x, simd<_Tp, _Abi>* __iptr)
827   {
828     if constexpr (simd_size_v<_Tp, _Abi> == 1)
829       {
830 	_Tp __tmp;
831 	_Tp __r = std::modf(__x[0], &__tmp);
832 	__iptr[0] = __tmp;
833 	return __r;
834       }
835     else
836       {
837 	const auto __integral = trunc(__x);
838 	*__iptr = __integral;
839 	auto __r = __x - __integral;
840 #if !__FINITE_MATH_ONLY__
841 	where(isinf(__x), __r) = _Tp();
842 #endif
843 	return copysign(__r, __x);
844       }
845   }
846 
847 _GLIBCXX_SIMD_MATH_CALL2_(scalbn, int)
848 _GLIBCXX_SIMD_MATH_CALL2_(scalbln, long)
849 
850 _GLIBCXX_SIMD_MATH_CALL_(cbrt)
851 
852 _GLIBCXX_SIMD_MATH_CALL_(abs)
853 _GLIBCXX_SIMD_MATH_CALL_(fabs)
854 
855 // [parallel.simd.math] only asks for is_floating_point_v<_Tp> and forgot to
856 // allow signed integral _Tp
857 template <typename _Tp, typename _Abi>
858   _GLIBCXX_SIMD_ALWAYS_INLINE
859   enable_if_t<!is_floating_point_v<_Tp> && is_signed_v<_Tp>, simd<_Tp, _Abi>>
860   abs(const simd<_Tp, _Abi>& __x)
861   { return {__private_init, _Abi::_SimdImpl::_S_abs(__data(__x))}; }
862 
863 #define _GLIBCXX_SIMD_CVTING2(_NAME)                                           \
864 template <typename _Tp, typename _Abi>                                         \
865   _GLIBCXX_SIMD_INTRINSIC simd<_Tp, _Abi> _NAME(                               \
866     const simd<_Tp, _Abi>& __x, const __type_identity_t<simd<_Tp, _Abi>>& __y) \
867   {                                                                            \
868     return _NAME(__x, __y);                                                    \
869   }                                                                            \
870                                                                                \
871 template <typename _Tp, typename _Abi>                                         \
872   _GLIBCXX_SIMD_INTRINSIC simd<_Tp, _Abi> _NAME(                               \
873     const __type_identity_t<simd<_Tp, _Abi>>& __x, const simd<_Tp, _Abi>& __y) \
874   {                                                                            \
875     return _NAME(__x, __y);                                                    \
876   }
877 
878 #define _GLIBCXX_SIMD_CVTING3(_NAME)                                           \
879 template <typename _Tp, typename _Abi>                                         \
880   _GLIBCXX_SIMD_INTRINSIC simd<_Tp, _Abi> _NAME(                               \
881     const __type_identity_t<simd<_Tp, _Abi>>& __x, const simd<_Tp, _Abi>& __y, \
882     const simd<_Tp, _Abi>& __z)                                                \
883   {                                                                            \
884     return _NAME(__x, __y, __z);                                               \
885   }                                                                            \
886                                                                                \
887 template <typename _Tp, typename _Abi>                                         \
888   _GLIBCXX_SIMD_INTRINSIC simd<_Tp, _Abi> _NAME(                               \
889     const simd<_Tp, _Abi>& __x, const __type_identity_t<simd<_Tp, _Abi>>& __y, \
890     const simd<_Tp, _Abi>& __z)                                                \
891   {                                                                            \
892     return _NAME(__x, __y, __z);                                               \
893   }                                                                            \
894                                                                                \
895 template <typename _Tp, typename _Abi>                                         \
896   _GLIBCXX_SIMD_INTRINSIC simd<_Tp, _Abi> _NAME(                               \
897     const simd<_Tp, _Abi>& __x, const simd<_Tp, _Abi>& __y,                    \
898     const __type_identity_t<simd<_Tp, _Abi>>& __z)                             \
899   {                                                                            \
900     return _NAME(__x, __y, __z);                                               \
901   }                                                                            \
902                                                                                \
903 template <typename _Tp, typename _Abi>                                         \
904   _GLIBCXX_SIMD_INTRINSIC simd<_Tp, _Abi> _NAME(                               \
905     const simd<_Tp, _Abi>& __x, const __type_identity_t<simd<_Tp, _Abi>>& __y, \
906     const __type_identity_t<simd<_Tp, _Abi>>& __z)                             \
907   {                                                                            \
908     return _NAME(__x, __y, __z);                                               \
909   }                                                                            \
910                                                                                \
911 template <typename _Tp, typename _Abi>                                         \
912   _GLIBCXX_SIMD_INTRINSIC simd<_Tp, _Abi> _NAME(                               \
913     const __type_identity_t<simd<_Tp, _Abi>>& __x, const simd<_Tp, _Abi>& __y, \
914     const __type_identity_t<simd<_Tp, _Abi>>& __z)                             \
915   {                                                                            \
916     return _NAME(__x, __y, __z);                                               \
917   }                                                                            \
918                                                                                \
919 template <typename _Tp, typename _Abi>                                         \
920   _GLIBCXX_SIMD_INTRINSIC simd<_Tp, _Abi> _NAME(                               \
921     const __type_identity_t<simd<_Tp, _Abi>>& __x,                             \
922     const __type_identity_t<simd<_Tp, _Abi>>& __y, const simd<_Tp, _Abi>& __z) \
923   {                                                                            \
924     return _NAME(__x, __y, __z);                                               \
925   }
926 
927 template <typename _R, typename _ToApply, typename _Tp, typename... _Tps>
928   _GLIBCXX_SIMD_INTRINSIC _R
929   __fixed_size_apply(_ToApply&& __apply, const _Tp& __arg0,
930 		     const _Tps&... __args)
931   {
932     return {__private_init,
933 	    __data(__arg0)._M_apply_per_chunk(
934 	      [&](auto __impl, const auto&... __inner) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
935 		using _V = typename decltype(__impl)::simd_type;
936 		return __data(__apply(_V(__private_init, __inner)...));
937 	      },
938 	      __data(__args)...)};
939   }
940 
941 template <typename _VV, typename = __detail::__odr_helper>
942   __remove_cvref_t<_VV>
943   __hypot(_VV __x, _VV __y)
944   {
945     using _V = __remove_cvref_t<_VV>;
946     using _Tp = typename _V::value_type;
947     if constexpr (_V::size() == 1)
948       return std::hypot(_Tp(__x[0]), _Tp(__y[0]));
949     else if constexpr (__is_fixed_size_abi_v<typename _V::abi_type>)
950       {
951 	return __fixed_size_apply<_V>([](auto __a,
952 					 auto __b) { return hypot(__a, __b); },
953 				      __x, __y);
954       }
955     else
956       {
957 	// A simple solution for _Tp == float would be to cast to double and
958 	// simply calculate sqrt(x²+y²) as it can't over-/underflow anymore with
959 	// dp. It still needs the Annex F fixups though and isn't faster on
960 	// Skylake-AVX512 (not even for SSE and AVX vectors, and really bad for
961 	// AVX-512).
962 	using namespace __float_bitwise_operators;
963 	using namespace __proposed;
964 	_V __absx = abs(__x);          // no error
965 	_V __absy = abs(__y);          // no error
966 	_V __hi = max(__absx, __absy); // no error
967 	_V __lo = min(__absy, __absx); // no error
968 
969 	// round __hi down to the next power-of-2:
970 	_GLIBCXX_SIMD_USE_CONSTEXPR_API _V __inf(__infinity_v<_Tp>);
971 
972 #ifndef __FAST_MATH__
973 	if constexpr (__have_neon && !__have_neon_a32)
974 	  { // With ARMv7 NEON, we have no subnormals and must use slightly
975 	    // different strategy
976 	    const _V __hi_exp = __hi & __inf;
977 	    _V __scale_back = __hi_exp;
978 	    // For large exponents (max & max/2) the inversion comes too close
979 	    // to subnormals. Subtract 3 from the exponent:
980 	    where(__hi_exp > 1, __scale_back) = __hi_exp * _Tp(0.125);
981 	    // Invert and adjust for the off-by-one error of inversion via xor:
982 	    const _V __scale = (__scale_back ^ __inf) * _Tp(.5);
983 	    const _V __h1 = __hi * __scale;
984 	    const _V __l1 = __lo * __scale;
985 	    _V __r = __scale_back * sqrt(__h1 * __h1 + __l1 * __l1);
986 	    // Fix up hypot(0, 0) to not be NaN:
987 	    where(__hi == 0, __r) = 0;
988 	    return __r;
989 	  }
990 #endif
991 
992 #ifdef __FAST_MATH__
993 	// With fast-math, ignore precision of subnormals and inputs from
994 	// __finite_max_v/2 to __finite_max_v. This removes all
995 	// branching/masking.
996 	if constexpr (true)
997 #else
998 	if (_GLIBCXX_SIMD_IS_LIKELY(all_of(isnormal(__x))
999 				    && all_of(isnormal(__y))))
1000 #endif
1001 	  {
1002 	    const _V __hi_exp = __hi & __inf;
1003 	    //((__hi + __hi) & __inf) ^ __inf almost works for computing
1004 	    //__scale,
1005 	    // except when (__hi + __hi) & __inf == __inf, in which case __scale
1006 	    // becomes 0 (should be min/2 instead) and thus loses the
1007 	    // information from __lo.
1008 #ifdef __FAST_MATH__
1009 	    using _Ip = __int_for_sizeof_t<_Tp>;
1010 	    using _IV = rebind_simd_t<_Ip, _V>;
1011 	    const auto __as_int = simd_bit_cast<_IV>(__hi_exp);
1012 	    const _V __scale
1013 	      = simd_bit_cast<_V>(2 * __bit_cast<_Ip>(_Tp(1)) - __as_int);
1014 #else
1015 	    const _V __scale = (__hi_exp ^ __inf) * _Tp(.5);
1016 #endif
1017 	    _GLIBCXX_SIMD_USE_CONSTEXPR_API _V __mant_mask
1018 	      = __norm_min_v<_Tp> - __denorm_min_v<_Tp>;
1019 	    const _V __h1 = (__hi & __mant_mask) | _V(1);
1020 	    const _V __l1 = __lo * __scale;
1021 	    return __hi_exp * sqrt(__h1 * __h1 + __l1 * __l1);
1022 	  }
1023 	else
1024 	  {
1025 	    // slower path to support subnormals
1026 	    // if __hi is subnormal, avoid scaling by inf & final mul by 0
1027 	    // (which yields NaN) by using min()
1028 	    _V __scale = _V(1 / __norm_min_v<_Tp>);
1029 	    // invert exponent w/o error and w/o using the slow divider unit:
1030 	    // xor inverts the exponent but off by 1. Multiplication with .5
1031 	    // adjusts for the discrepancy.
1032 	    where(__hi >= __norm_min_v<_Tp>, __scale)
1033 	      = ((__hi & __inf) ^ __inf) * _Tp(.5);
1034 	    // adjust final exponent for subnormal inputs
1035 	    _V __hi_exp = __norm_min_v<_Tp>;
1036 	    where(__hi >= __norm_min_v<_Tp>, __hi_exp)
1037 	      = __hi & __inf;         // no error
1038 	    _V __h1 = __hi * __scale; // no error
1039 	    _V __l1 = __lo * __scale; // no error
1040 
1041 	    // sqrt(x²+y²) = e*sqrt((x/e)²+(y/e)²):
1042 	    // this ensures no overflow in the argument to sqrt
1043 	    _V __r = __hi_exp * sqrt(__h1 * __h1 + __l1 * __l1);
1044 #ifdef __STDC_IEC_559__
1045 	    // fixup for Annex F requirements
1046 	    // the naive fixup goes like this:
1047 	    //
1048 	    // where(__l1 == 0, __r)                      = __hi;
1049 	    // where(isunordered(__x, __y), __r)          = __quiet_NaN_v<_Tp>;
1050 	    // where(isinf(__absx) || isinf(__absy), __r) = __inf;
1051 	    //
1052 	    // The fixup can be prepared in parallel with the sqrt, requiring a
1053 	    // single blend step after hi_exp * sqrt, reducing latency and
1054 	    // throughput:
1055 	    _V __fixup = __hi; // __lo == 0
1056 	    where(isunordered(__x, __y), __fixup) = __quiet_NaN_v<_Tp>;
1057 	    where(isinf(__absx) || isinf(__absy), __fixup) = __inf;
1058 	    where(!(__lo == 0 || isunordered(__x, __y)
1059 		    || (isinf(__absx) || isinf(__absy))),
1060 		  __fixup)
1061 	      = __r;
1062 	    __r = __fixup;
1063 #endif
1064 	    return __r;
1065 	  }
1066       }
1067   }
1068 
1069 template <typename _Tp, typename _Abi>
1070   _GLIBCXX_SIMD_INTRINSIC simd<_Tp, _Abi>
1071   hypot(const simd<_Tp, _Abi>& __x, const simd<_Tp, _Abi>& __y)
1072   {
1073     return __hypot<conditional_t<__is_fixed_size_abi_v<_Abi>,
1074 				 const simd<_Tp, _Abi>&, simd<_Tp, _Abi>>>(__x,
1075 									   __y);
1076   }
1077 
1078 _GLIBCXX_SIMD_CVTING2(hypot)
1079 
1080   template <typename _VV, typename = __detail::__odr_helper>
1081   __remove_cvref_t<_VV>
1082   __hypot(_VV __x, _VV __y, _VV __z)
1083   {
1084     using _V = __remove_cvref_t<_VV>;
1085     using _Abi = typename _V::abi_type;
1086     using _Tp = typename _V::value_type;
1087     /* FIXME: enable after PR77776 is resolved
1088     if constexpr (_V::size() == 1)
1089       return std::hypot(_Tp(__x[0]), _Tp(__y[0]), _Tp(__z[0]));
1090     else
1091     */
1092     if constexpr (__is_fixed_size_abi_v<_Abi> && _V::size() > 1)
1093       {
1094 	return __fixed_size_apply<simd<_Tp, _Abi>>(
1095 		 [](auto __a, auto __b, auto __c) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
1096 		   return hypot(__a, __b, __c);
1097 		 }, __x, __y, __z);
1098       }
1099     else
1100       {
1101 	using namespace __float_bitwise_operators;
1102 	using namespace __proposed;
1103 	const _V __absx = abs(__x);                 // no error
1104 	const _V __absy = abs(__y);                 // no error
1105 	const _V __absz = abs(__z);                 // no error
1106 	_V __hi = max(max(__absx, __absy), __absz); // no error
1107 	_V __l0 = min(__absz, max(__absx, __absy)); // no error
1108 	_V __l1 = min(__absy, __absx);              // no error
1109 	if constexpr (__digits_v<_Tp> == 64 && __max_exponent_v<_Tp> == 0x4000
1110 		      && __min_exponent_v<_Tp> == -0x3FFD && _V::size() == 1)
1111 	  { // Seems like x87 fp80, where bit 63 is always 1 unless subnormal or
1112 	    // NaN. In this case the bit-tricks don't work, they require IEC559
1113 	    // binary32 or binary64 format.
1114 #ifdef __STDC_IEC_559__
1115 	    // fixup for Annex F requirements
1116 	    if (isinf(__absx[0]) || isinf(__absy[0]) || isinf(__absz[0]))
1117 	      return __infinity_v<_Tp>;
1118 	    else if (isunordered(__absx[0], __absy[0] + __absz[0]))
1119 	      return __quiet_NaN_v<_Tp>;
1120 	    else if (__l0[0] == 0 && __l1[0] == 0)
1121 	      return __hi;
1122 #endif
1123 	    _V __hi_exp = __hi;
1124 	    const _ULLong __tmp = 0x8000'0000'0000'0000ull;
1125 	    __builtin_memcpy(&__data(__hi_exp), &__tmp, 8);
1126 	    const _V __scale = 1 / __hi_exp;
1127 	    __hi *= __scale;
1128 	    __l0 *= __scale;
1129 	    __l1 *= __scale;
1130 	    return __hi_exp * sqrt((__l0 * __l0 + __l1 * __l1) + __hi * __hi);
1131 	  }
1132 	else
1133 	  {
1134 	    // round __hi down to the next power-of-2:
1135 	    _GLIBCXX_SIMD_USE_CONSTEXPR_API _V __inf(__infinity_v<_Tp>);
1136 
1137 #ifndef __FAST_MATH__
1138 	    if constexpr (_V::size() > 1 && __have_neon && !__have_neon_a32)
1139 	      { // With ARMv7 NEON, we have no subnormals and must use slightly
1140 		// different strategy
1141 		const _V __hi_exp = __hi & __inf;
1142 		_V __scale_back = __hi_exp;
1143 		// For large exponents (max & max/2) the inversion comes too
1144 		// close to subnormals. Subtract 3 from the exponent:
1145 		where(__hi_exp > 1, __scale_back) = __hi_exp * _Tp(0.125);
1146 		// Invert and adjust for the off-by-one error of inversion via
1147 		// xor:
1148 		const _V __scale = (__scale_back ^ __inf) * _Tp(.5);
1149 		const _V __h1 = __hi * __scale;
1150 		__l0 *= __scale;
1151 		__l1 *= __scale;
1152 		_V __lo = __l0 * __l0
1153 			  + __l1 * __l1; // add the two smaller values first
1154 		asm("" : "+m"(__lo));
1155 		_V __r = __scale_back * sqrt(__h1 * __h1 + __lo);
1156 		// Fix up hypot(0, 0, 0) to not be NaN:
1157 		where(__hi == 0, __r) = 0;
1158 		return __r;
1159 	      }
1160 #endif
1161 
1162 #ifdef __FAST_MATH__
1163 	    // With fast-math, ignore precision of subnormals and inputs from
1164 	    // __finite_max_v/2 to __finite_max_v. This removes all
1165 	    // branching/masking.
1166 	    if constexpr (true)
1167 #else
1168 	    if (_GLIBCXX_SIMD_IS_LIKELY(all_of(isnormal(__x))
1169 					&& all_of(isnormal(__y))
1170 					&& all_of(isnormal(__z))))
1171 #endif
1172 	      {
1173 		const _V __hi_exp = __hi & __inf;
1174 		//((__hi + __hi) & __inf) ^ __inf almost works for computing
1175 		//__scale, except when (__hi + __hi) & __inf == __inf, in which
1176 		// case __scale
1177 		// becomes 0 (should be min/2 instead) and thus loses the
1178 		// information from __lo.
1179 #ifdef __FAST_MATH__
1180 		using _Ip = __int_for_sizeof_t<_Tp>;
1181 		using _IV = rebind_simd_t<_Ip, _V>;
1182 		const auto __as_int = simd_bit_cast<_IV>(__hi_exp);
1183 		const _V __scale
1184 		  = simd_bit_cast<_V>(2 * __bit_cast<_Ip>(_Tp(1)) - __as_int);
1185 #else
1186 		const _V __scale = (__hi_exp ^ __inf) * _Tp(.5);
1187 #endif
1188 		constexpr _Tp __mant_mask
1189 		  = __norm_min_v<_Tp> - __denorm_min_v<_Tp>;
1190 		const _V __h1 = (__hi & _V(__mant_mask)) | _V(1);
1191 		__l0 *= __scale;
1192 		__l1 *= __scale;
1193 		const _V __lo
1194 		  = __l0 * __l0
1195 		    + __l1 * __l1; // add the two smaller values first
1196 		return __hi_exp * sqrt(__lo + __h1 * __h1);
1197 	      }
1198 	    else
1199 	      {
1200 		// slower path to support subnormals
1201 		// if __hi is subnormal, avoid scaling by inf & final mul by 0
1202 		// (which yields NaN) by using min()
1203 		_V __scale = _V(1 / __norm_min_v<_Tp>);
1204 		// invert exponent w/o error and w/o using the slow divider
1205 		// unit: xor inverts the exponent but off by 1. Multiplication
1206 		// with .5 adjusts for the discrepancy.
1207 		where(__hi >= __norm_min_v<_Tp>, __scale)
1208 		  = ((__hi & __inf) ^ __inf) * _Tp(.5);
1209 		// adjust final exponent for subnormal inputs
1210 		_V __hi_exp = __norm_min_v<_Tp>;
1211 		where(__hi >= __norm_min_v<_Tp>, __hi_exp)
1212 		  = __hi & __inf;         // no error
1213 		_V __h1 = __hi * __scale; // no error
1214 		__l0 *= __scale;          // no error
1215 		__l1 *= __scale;          // no error
1216 		_V __lo = __l0 * __l0
1217 			  + __l1 * __l1; // add the two smaller values first
1218 		_V __r = __hi_exp * sqrt(__lo + __h1 * __h1);
1219 #ifdef __STDC_IEC_559__
1220 		// fixup for Annex F requirements
1221 		_V __fixup = __hi; // __lo == 0
1222 		// where(__lo == 0, __fixup)                   = __hi;
1223 		where(isunordered(__x, __y + __z), __fixup)
1224 		  = __quiet_NaN_v<_Tp>;
1225 		where(isinf(__absx) || isinf(__absy) || isinf(__absz), __fixup)
1226 		  = __inf;
1227 		// Instead of __lo == 0, the following could depend on __h1² ==
1228 		// __h1² + __lo (i.e. __hi is so much larger than the other two
1229 		// inputs that the result is exactly __hi). While this may
1230 		// improve precision, it is likely to reduce efficiency if the
1231 		// ISA has FMAs (because __h1² + __lo is an FMA, but the
1232 		// intermediate
1233 		// __h1² must be kept)
1234 		where(!(__lo == 0 || isunordered(__x, __y + __z)
1235 			|| isinf(__absx) || isinf(__absy) || isinf(__absz)),
1236 		      __fixup)
1237 		  = __r;
1238 		__r = __fixup;
1239 #endif
1240 		return __r;
1241 	      }
1242 	  }
1243       }
1244   }
1245 
1246   template <typename _Tp, typename _Abi>
1247   _GLIBCXX_SIMD_INTRINSIC simd<_Tp, _Abi>
1248   hypot(const simd<_Tp, _Abi>& __x, const simd<_Tp, _Abi>& __y,
1249 	const simd<_Tp, _Abi>& __z)
1250   {
1251     return __hypot<conditional_t<__is_fixed_size_abi_v<_Abi>,
1252 				 const simd<_Tp, _Abi>&, simd<_Tp, _Abi>>>(__x,
1253 									   __y,
1254 									   __z);
1255   }
1256 
1257 _GLIBCXX_SIMD_CVTING3(hypot)
1258 
1259 _GLIBCXX_SIMD_MATH_CALL2_(pow, _Tp)
1260 
1261 _GLIBCXX_SIMD_MATH_CALL_(sqrt)
1262 _GLIBCXX_SIMD_MATH_CALL_(erf)
1263 _GLIBCXX_SIMD_MATH_CALL_(erfc)
1264 _GLIBCXX_SIMD_MATH_CALL_(lgamma)
1265 _GLIBCXX_SIMD_MATH_CALL_(tgamma)
1266 _GLIBCXX_SIMD_MATH_CALL_(ceil)
1267 _GLIBCXX_SIMD_MATH_CALL_(floor)
1268 _GLIBCXX_SIMD_MATH_CALL_(nearbyint)
1269 _GLIBCXX_SIMD_MATH_CALL_(rint)
1270 _GLIBCXX_SIMD_MATH_CALL_(lrint)
1271 _GLIBCXX_SIMD_MATH_CALL_(llrint)
1272 
1273 _GLIBCXX_SIMD_MATH_CALL_(round)
1274 _GLIBCXX_SIMD_MATH_CALL_(lround)
1275 _GLIBCXX_SIMD_MATH_CALL_(llround)
1276 
1277 _GLIBCXX_SIMD_MATH_CALL_(trunc)
1278 
1279 _GLIBCXX_SIMD_MATH_CALL2_(fmod, _Tp)
1280 _GLIBCXX_SIMD_MATH_CALL2_(remainder, _Tp)
1281 _GLIBCXX_SIMD_MATH_CALL3_(remquo, _Tp, int*)
1282 
1283 template <typename _Tp, typename _Abi, typename = __detail::__odr_helper>
1284   enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
1285   copysign(const simd<_Tp, _Abi>& __x, const simd<_Tp, _Abi>& __y)
1286   {
1287     if constexpr (simd_size_v<_Tp, _Abi> == 1)
1288       return std::copysign(__x[0], __y[0]);
1289     else if constexpr (__is_fixed_size_abi_v<_Abi>)
1290       return {__private_init, _Abi::_SimdImpl::_S_copysign(__data(__x), __data(__y))};
1291     else
1292       {
1293 	using _V = simd<_Tp, _Abi>;
1294 	using namespace std::experimental::__float_bitwise_operators;
1295 	_GLIBCXX_SIMD_USE_CONSTEXPR_API auto __signmask = _V(1) ^ _V(-1);
1296 	return (__x & ~__signmask) | (__y & __signmask);
1297       }
1298   }
1299 
1300 _GLIBCXX_SIMD_MATH_CALL2_(nextafter, _Tp)
1301 // not covered in [parallel.simd.math]:
1302 // _GLIBCXX_SIMD_MATH_CALL2_(nexttoward, long double)
1303 _GLIBCXX_SIMD_MATH_CALL2_(fdim, _Tp)
1304 _GLIBCXX_SIMD_MATH_CALL2_(fmax, _Tp)
1305 _GLIBCXX_SIMD_MATH_CALL2_(fmin, _Tp)
1306 
1307 _GLIBCXX_SIMD_MATH_CALL3_(fma, _Tp, _Tp)
1308 _GLIBCXX_SIMD_MATH_CALL_(fpclassify)
1309 _GLIBCXX_SIMD_MATH_CALL_(isfinite)
1310 
1311 // isnan and isinf require special treatment because old glibc may declare
1312 // `int isinf(double)`.
1313 template <typename _Tp, typename _Abi, typename...,
1314 	  typename _R = _Math_return_type_t<bool, _Tp, _Abi>>
1315   _GLIBCXX_SIMD_ALWAYS_INLINE
1316   enable_if_t<is_floating_point_v<_Tp>, _R>
1317   isinf(simd<_Tp, _Abi> __x)
1318   { return {__private_init, _Abi::_SimdImpl::_S_isinf(__data(__x))}; }
1319 
1320 template <typename _Tp, typename _Abi, typename...,
1321 	  typename _R = _Math_return_type_t<bool, _Tp, _Abi>>
1322   _GLIBCXX_SIMD_ALWAYS_INLINE
1323   enable_if_t<is_floating_point_v<_Tp>, _R>
1324   isnan(simd<_Tp, _Abi> __x)
1325   { return {__private_init, _Abi::_SimdImpl::_S_isnan(__data(__x))}; }
1326 
1327 _GLIBCXX_SIMD_MATH_CALL_(isnormal)
1328 
1329 template <typename..., typename _Tp, typename _Abi>
1330   _GLIBCXX_SIMD_ALWAYS_INLINE
1331   simd_mask<_Tp, _Abi>
1332   signbit(simd<_Tp, _Abi> __x)
1333   {
1334     if constexpr (is_integral_v<_Tp>)
1335       {
1336 	if constexpr (is_unsigned_v<_Tp>)
1337 	  return simd_mask<_Tp, _Abi>{}; // false
1338 	else
1339 	  return __x < 0;
1340       }
1341     else
1342       return {__private_init, _Abi::_SimdImpl::_S_signbit(__data(__x))};
1343   }
1344 
1345 _GLIBCXX_SIMD_MATH_CALL2_(isgreater, _Tp)
1346 _GLIBCXX_SIMD_MATH_CALL2_(isgreaterequal, _Tp)
1347 _GLIBCXX_SIMD_MATH_CALL2_(isless, _Tp)
1348 _GLIBCXX_SIMD_MATH_CALL2_(islessequal, _Tp)
1349 _GLIBCXX_SIMD_MATH_CALL2_(islessgreater, _Tp)
1350 _GLIBCXX_SIMD_MATH_CALL2_(isunordered, _Tp)
1351 
1352 /* not covered in [parallel.simd.math]
1353 template <typename _Abi> __doublev<_Abi> nan(const char* tagp);
1354 template <typename _Abi> __floatv<_Abi> nanf(const char* tagp);
1355 template <typename _Abi> __ldoublev<_Abi> nanl(const char* tagp);
1356 
1357 template <typename _V> struct simd_div_t {
1358     _V quot, rem;
1359 };
1360 
1361 template <typename _Abi>
1362 simd_div_t<_SCharv<_Abi>> div(_SCharv<_Abi> numer,
1363 					 _SCharv<_Abi> denom);
1364 template <typename _Abi>
1365 simd_div_t<__shortv<_Abi>> div(__shortv<_Abi> numer,
1366 					 __shortv<_Abi> denom);
1367 template <typename _Abi>
1368 simd_div_t<__intv<_Abi>> div(__intv<_Abi> numer, __intv<_Abi> denom);
1369 template <typename _Abi>
1370 simd_div_t<__longv<_Abi>> div(__longv<_Abi> numer,
1371 					__longv<_Abi> denom);
1372 template <typename _Abi>
1373 simd_div_t<__llongv<_Abi>> div(__llongv<_Abi> numer,
1374 					 __llongv<_Abi> denom);
1375 */
1376 
1377 // special math {{{
1378 template <typename _Tp, typename _Abi, typename = __detail::__odr_helper>
1379   enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
1380   assoc_laguerre(const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __n,
1381 		 const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __m,
1382 		 const simd<_Tp, _Abi>& __x)
1383   {
1384     return simd<_Tp, _Abi>([&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
1385 	     return std::assoc_laguerre(__n[__i], __m[__i], __x[__i]);
1386 	   });
1387   }
1388 
1389 template <typename _Tp, typename _Abi, typename = __detail::__odr_helper>
1390   enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
1391   assoc_legendre(const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __n,
1392 		 const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __m,
1393 		 const simd<_Tp, _Abi>& __x)
1394   {
1395     return simd<_Tp, _Abi>([&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
1396 	     return std::assoc_legendre(__n[__i], __m[__i], __x[__i]);
1397 	   });
1398   }
1399 
1400 _GLIBCXX_SIMD_MATH_CALL2_(beta, _Tp)
1401 _GLIBCXX_SIMD_MATH_CALL_(comp_ellint_1)
1402 _GLIBCXX_SIMD_MATH_CALL_(comp_ellint_2)
1403 _GLIBCXX_SIMD_MATH_CALL2_(comp_ellint_3, _Tp)
1404 _GLIBCXX_SIMD_MATH_CALL2_(cyl_bessel_i, _Tp)
1405 _GLIBCXX_SIMD_MATH_CALL2_(cyl_bessel_j, _Tp)
1406 _GLIBCXX_SIMD_MATH_CALL2_(cyl_bessel_k, _Tp)
1407 _GLIBCXX_SIMD_MATH_CALL2_(cyl_neumann, _Tp)
1408 _GLIBCXX_SIMD_MATH_CALL2_(ellint_1, _Tp)
1409 _GLIBCXX_SIMD_MATH_CALL2_(ellint_2, _Tp)
1410 _GLIBCXX_SIMD_MATH_CALL3_(ellint_3, _Tp, _Tp)
1411 _GLIBCXX_SIMD_MATH_CALL_(expint)
1412 
1413 template <typename _Tp, typename _Abi, typename = __detail::__odr_helper>
1414   enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
1415   hermite(const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __n,
1416 	  const simd<_Tp, _Abi>& __x)
1417   {
1418     return simd<_Tp, _Abi>([&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
1419 	     return std::hermite(__n[__i], __x[__i]);
1420 	   });
1421   }
1422 
1423 template <typename _Tp, typename _Abi, typename = __detail::__odr_helper>
1424   enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
1425   laguerre(const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __n,
1426 	   const simd<_Tp, _Abi>& __x)
1427   {
1428     return simd<_Tp, _Abi>([&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
1429 	     return std::laguerre(__n[__i], __x[__i]);
1430 	   });
1431   }
1432 
1433 template <typename _Tp, typename _Abi, typename = __detail::__odr_helper>
1434   enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
1435   legendre(const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __n,
1436 	   const simd<_Tp, _Abi>& __x)
1437   {
1438     return simd<_Tp, _Abi>([&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
1439 	     return std::legendre(__n[__i], __x[__i]);
1440 	   });
1441   }
1442 
1443 _GLIBCXX_SIMD_MATH_CALL_(riemann_zeta)
1444 
1445 template <typename _Tp, typename _Abi, typename = __detail::__odr_helper>
1446   enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
1447   sph_bessel(const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __n,
1448 	     const simd<_Tp, _Abi>& __x)
1449   {
1450     return simd<_Tp, _Abi>([&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
1451 	     return std::sph_bessel(__n[__i], __x[__i]);
1452 	   });
1453   }
1454 
1455 template <typename _Tp, typename _Abi, typename = __detail::__odr_helper>
1456   enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
1457   sph_legendre(const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __l,
1458 	       const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __m,
1459 	       const simd<_Tp, _Abi>& theta)
1460   {
1461     return simd<_Tp, _Abi>([&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
1462 	     return std::assoc_legendre(__l[__i], __m[__i], theta[__i]);
1463 	   });
1464   }
1465 
1466 template <typename _Tp, typename _Abi, typename = __detail::__odr_helper>
1467   enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
1468   sph_neumann(const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __n,
1469 	      const simd<_Tp, _Abi>& __x)
1470   {
1471     return simd<_Tp, _Abi>([&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
1472 	     return std::sph_neumann(__n[__i], __x[__i]);
1473 	   });
1474   }
1475 // }}}
1476 
1477 #undef _GLIBCXX_SIMD_CVTING2
1478 #undef _GLIBCXX_SIMD_CVTING3
1479 #undef _GLIBCXX_SIMD_MATH_CALL_
1480 #undef _GLIBCXX_SIMD_MATH_CALL2_
1481 #undef _GLIBCXX_SIMD_MATH_CALL3_
1482 
1483 _GLIBCXX_SIMD_END_NAMESPACE
1484 
1485 #endif // __cplusplus >= 201703L
1486 #endif // _GLIBCXX_EXPERIMENTAL_SIMD_MATH_H_
1487 
1488 // vim: foldmethod=marker sw=2 ts=8 noet sts=2
1489