1*f3087befSAndrew Turner /* 2*f3087befSAndrew Turner * Double-precision vector hypot(x) function. 3*f3087befSAndrew Turner * 4*f3087befSAndrew Turner * Copyright (c) 2023-2024, Arm Limited. 5*f3087befSAndrew Turner * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 6*f3087befSAndrew Turner */ 7*f3087befSAndrew Turner 8*f3087befSAndrew Turner #include "v_math.h" 9*f3087befSAndrew Turner #include "test_sig.h" 10*f3087befSAndrew Turner #include "test_defs.h" 11*f3087befSAndrew Turner 12*f3087befSAndrew Turner #if WANT_SIMD_EXCEPT 13*f3087befSAndrew Turner static const struct data 14*f3087befSAndrew Turner { 15*f3087befSAndrew Turner uint64x2_t tiny_bound, thres; 16*f3087befSAndrew Turner } data = { 17*f3087befSAndrew Turner .tiny_bound = V2 (0x2000000000000000), /* asuint (0x1p-511). */ 18*f3087befSAndrew Turner .thres = V2 (0x3fe0000000000000), /* asuint (0x1p511) - tiny_bound. */ 19*f3087befSAndrew Turner }; 20*f3087befSAndrew Turner #else 21*f3087befSAndrew Turner static const struct data 22*f3087befSAndrew Turner { 23*f3087befSAndrew Turner uint64x2_t tiny_bound; 24*f3087befSAndrew Turner uint32x4_t thres; 25*f3087befSAndrew Turner } data = { 26*f3087befSAndrew Turner .tiny_bound = V2 (0x0360000000000000), /* asuint (0x1p-969). */ 27*f3087befSAndrew Turner .thres = V4 (0x7c900000), /* asuint (inf) - tiny_bound. */ 28*f3087befSAndrew Turner }; 29*f3087befSAndrew Turner #endif 30*f3087befSAndrew Turner 31*f3087befSAndrew Turner static float64x2_t VPCS_ATTR NOINLINE 32*f3087befSAndrew Turner special_case (float64x2_t x, float64x2_t y, float64x2_t sqsum, 33*f3087befSAndrew Turner uint32x2_t special) 34*f3087befSAndrew Turner { 35*f3087befSAndrew Turner return v_call2_f64 (hypot, x, y, vsqrtq_f64 (sqsum), vmovl_u32 (special)); 36*f3087befSAndrew Turner } 37*f3087befSAndrew Turner 38*f3087befSAndrew Turner /* Vector implementation of double-precision hypot. 39*f3087befSAndrew Turner Maximum error observed is 1.21 ULP: 40*f3087befSAndrew Turner _ZGVnN2vv_hypot (0x1.6a1b193ff85b5p-204, 0x1.bc50676c2a447p-222) 41*f3087befSAndrew Turner got 0x1.6a1b19400964ep-204 42*f3087befSAndrew Turner want 0x1.6a1b19400964dp-204. */ 43*f3087befSAndrew Turner #if WANT_SIMD_EXCEPT 44*f3087befSAndrew Turner 45*f3087befSAndrew Turner float64x2_t VPCS_ATTR V_NAME_D2 (hypot) (float64x2_t x, float64x2_t y) 46*f3087befSAndrew Turner { 47*f3087befSAndrew Turner const struct data *d = ptr_barrier (&data); 48*f3087befSAndrew Turner 49*f3087befSAndrew Turner float64x2_t ax = vabsq_f64 (x); 50*f3087befSAndrew Turner float64x2_t ay = vabsq_f64 (y); 51*f3087befSAndrew Turner 52*f3087befSAndrew Turner uint64x2_t ix = vreinterpretq_u64_f64 (ax); 53*f3087befSAndrew Turner uint64x2_t iy = vreinterpretq_u64_f64 (ay); 54*f3087befSAndrew Turner 55*f3087befSAndrew Turner /* Extreme values, NaNs, and infinities should be handled by the scalar 56*f3087befSAndrew Turner fallback for correct flag handling. */ 57*f3087befSAndrew Turner uint64x2_t specialx = vcgeq_u64 (vsubq_u64 (ix, d->tiny_bound), d->thres); 58*f3087befSAndrew Turner uint64x2_t specialy = vcgeq_u64 (vsubq_u64 (iy, d->tiny_bound), d->thres); 59*f3087befSAndrew Turner ax = v_zerofy_f64 (ax, specialx); 60*f3087befSAndrew Turner ay = v_zerofy_f64 (ay, specialy); 61*f3087befSAndrew Turner uint32x2_t special = vaddhn_u64 (specialx, specialy); 62*f3087befSAndrew Turner 63*f3087befSAndrew Turner float64x2_t sqsum = vfmaq_f64 (vmulq_f64 (ax, ax), ay, ay); 64*f3087befSAndrew Turner 65*f3087befSAndrew Turner if (unlikely (v_any_u32h (special))) 66*f3087befSAndrew Turner return special_case (x, y, sqsum, special); 67*f3087befSAndrew Turner 68*f3087befSAndrew Turner return vsqrtq_f64 (sqsum); 69*f3087befSAndrew Turner } 70*f3087befSAndrew Turner #else 71*f3087befSAndrew Turner 72*f3087befSAndrew Turner float64x2_t VPCS_ATTR V_NAME_D2 (hypot) (float64x2_t x, float64x2_t y) 73*f3087befSAndrew Turner { 74*f3087befSAndrew Turner const struct data *d = ptr_barrier (&data); 75*f3087befSAndrew Turner 76*f3087befSAndrew Turner float64x2_t sqsum = vfmaq_f64 (vmulq_f64 (x, x), y, y); 77*f3087befSAndrew Turner 78*f3087befSAndrew Turner uint32x2_t special 79*f3087befSAndrew Turner = vcge_u32 (vsubhn_u64 (vreinterpretq_u64_f64 (sqsum), d->tiny_bound), 80*f3087befSAndrew Turner vget_low_u32 (d->thres)); 81*f3087befSAndrew Turner 82*f3087befSAndrew Turner if (unlikely (v_any_u32h (special))) 83*f3087befSAndrew Turner return special_case (x, y, sqsum, special); 84*f3087befSAndrew Turner 85*f3087befSAndrew Turner return vsqrtq_f64 (sqsum); 86*f3087befSAndrew Turner } 87*f3087befSAndrew Turner #endif 88*f3087befSAndrew Turner 89*f3087befSAndrew Turner TEST_SIG (V, D, 2, hypot, -10.0, 10.0) 90*f3087befSAndrew Turner TEST_ULP (V_NAME_D2 (hypot), 1.21) 91*f3087befSAndrew Turner TEST_DISABLE_FENV_IF_NOT (V_NAME_D2 (hypot), WANT_SIMD_EXCEPT) 92*f3087befSAndrew Turner TEST_INTERVAL2 (V_NAME_D2 (hypot), 0, inf, 0, inf, 10000) 93*f3087befSAndrew Turner TEST_INTERVAL2 (V_NAME_D2 (hypot), 0, inf, -0, -inf, 10000) 94*f3087befSAndrew Turner TEST_INTERVAL2 (V_NAME_D2 (hypot), -0, -inf, 0, inf, 10000) 95*f3087befSAndrew Turner TEST_INTERVAL2 (V_NAME_D2 (hypot), -0, -inf, -0, -inf, 10000) 96