1*f3087befSAndrew Turner /* 2*f3087befSAndrew Turner * Double-precision vector sinh(x) function. 3*f3087befSAndrew Turner * 4*f3087befSAndrew Turner * Copyright (c) 2022-2024, Arm Limited. 5*f3087befSAndrew Turner * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 6*f3087befSAndrew Turner */ 7*f3087befSAndrew Turner 8*f3087befSAndrew Turner #include "v_math.h" 9*f3087befSAndrew Turner #include "test_sig.h" 10*f3087befSAndrew Turner #include "test_defs.h" 11*f3087befSAndrew Turner #include "v_expm1_inline.h" 12*f3087befSAndrew Turner 13*f3087befSAndrew Turner static const struct data 14*f3087befSAndrew Turner { 15*f3087befSAndrew Turner struct v_expm1_data d; 16*f3087befSAndrew Turner uint64x2_t halff; 17*f3087befSAndrew Turner #if WANT_SIMD_EXCEPT 18*f3087befSAndrew Turner uint64x2_t tiny_bound, thresh; 19*f3087befSAndrew Turner #else 20*f3087befSAndrew Turner float64x2_t large_bound; 21*f3087befSAndrew Turner #endif 22*f3087befSAndrew Turner } data = { 23*f3087befSAndrew Turner .d = V_EXPM1_DATA, 24*f3087befSAndrew Turner .halff = V2 (0x3fe0000000000000), 25*f3087befSAndrew Turner #if WANT_SIMD_EXCEPT 26*f3087befSAndrew Turner /* 2^-26, below which sinh(x) rounds to x. */ 27*f3087befSAndrew Turner .tiny_bound = V2 (0x3e50000000000000), 28*f3087befSAndrew Turner /* asuint(large_bound) - asuint(tiny_bound). */ 29*f3087befSAndrew Turner .thresh = V2 (0x0230000000000000), 30*f3087befSAndrew Turner #else 31*f3087befSAndrew Turner /* 2^9. expm1 helper overflows for large input. */ 32*f3087befSAndrew Turner .large_bound = V2 (0x1p+9), 33*f3087befSAndrew Turner #endif 34*f3087befSAndrew Turner }; 35*f3087befSAndrew Turner 36*f3087befSAndrew Turner static float64x2_t NOINLINE VPCS_ATTR 37*f3087befSAndrew Turner special_case (float64x2_t x) 38*f3087befSAndrew Turner { 39*f3087befSAndrew Turner return v_call_f64 (sinh, x, x, v_u64 (-1)); 40*f3087befSAndrew Turner } 41*f3087befSAndrew Turner 42*f3087befSAndrew Turner /* Approximation for vector double-precision sinh(x) using expm1. 43*f3087befSAndrew Turner sinh(x) = (exp(x) - exp(-x)) / 2. 44*f3087befSAndrew Turner The greatest observed error is 2.52 ULP: 45*f3087befSAndrew Turner _ZGVnN2v_sinh(-0x1.a098a2177a2b9p-2) got -0x1.ac2f05bb66fccp-2 46*f3087befSAndrew Turner want -0x1.ac2f05bb66fc9p-2. */ 47*f3087befSAndrew Turner float64x2_t VPCS_ATTR V_NAME_D1 (sinh) (float64x2_t x) 48*f3087befSAndrew Turner { 49*f3087befSAndrew Turner const struct data *d = ptr_barrier (&data); 50*f3087befSAndrew Turner 51*f3087befSAndrew Turner float64x2_t ax = vabsq_f64 (x); 52*f3087befSAndrew Turner uint64x2_t ix = vreinterpretq_u64_f64 (x); 53*f3087befSAndrew Turner float64x2_t halfsign = vreinterpretq_f64_u64 ( 54*f3087befSAndrew Turner vbslq_u64 (v_u64 (0x8000000000000000), ix, d->halff)); 55*f3087befSAndrew Turner 56*f3087befSAndrew Turner #if WANT_SIMD_EXCEPT 57*f3087befSAndrew Turner uint64x2_t special = vcgeq_u64 ( 58*f3087befSAndrew Turner vsubq_u64 (vreinterpretq_u64_f64 (ax), d->tiny_bound), d->thresh); 59*f3087befSAndrew Turner #else 60*f3087befSAndrew Turner uint64x2_t special = vcageq_f64 (x, d->large_bound); 61*f3087befSAndrew Turner #endif 62*f3087befSAndrew Turner 63*f3087befSAndrew Turner /* Fall back to scalar variant for all lanes if any of them are special. */ 64*f3087befSAndrew Turner if (unlikely (v_any_u64 (special))) 65*f3087befSAndrew Turner return special_case (x); 66*f3087befSAndrew Turner 67*f3087befSAndrew Turner /* Up to the point that expm1 overflows, we can use it to calculate sinh 68*f3087befSAndrew Turner using a slight rearrangement of the definition of sinh. This allows us to 69*f3087befSAndrew Turner retain acceptable accuracy for very small inputs. */ 70*f3087befSAndrew Turner float64x2_t t = expm1_inline (ax, &d->d); 71*f3087befSAndrew Turner t = vaddq_f64 (t, vdivq_f64 (t, vaddq_f64 (t, v_f64 (1.0)))); 72*f3087befSAndrew Turner return vmulq_f64 (t, halfsign); 73*f3087befSAndrew Turner } 74*f3087befSAndrew Turner 75*f3087befSAndrew Turner TEST_SIG (V, D, 1, sinh, -10.0, 10.0) 76*f3087befSAndrew Turner TEST_ULP (V_NAME_D1 (sinh), 2.02) 77*f3087befSAndrew Turner TEST_DISABLE_FENV_IF_NOT (V_NAME_D1 (sinh), WANT_SIMD_EXCEPT) 78*f3087befSAndrew Turner TEST_SYM_INTERVAL (V_NAME_D1 (sinh), 0, 0x1p-26, 1000) 79*f3087befSAndrew Turner TEST_SYM_INTERVAL (V_NAME_D1 (sinh), 0x1p-26, 0x1p9, 500000) 80*f3087befSAndrew Turner TEST_SYM_INTERVAL (V_NAME_D1 (sinh), 0x1p9, inf, 1000) 81