1*f3087befSAndrew Turner /* 2*f3087befSAndrew Turner * Double-precision log(1+x) function. 3*f3087befSAndrew Turner * 4*f3087befSAndrew Turner * Copyright (c) 2022-2024, Arm Limited. 5*f3087befSAndrew Turner * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 6*f3087befSAndrew Turner */ 7*f3087befSAndrew Turner 8*f3087befSAndrew Turner #include "poly_scalar_f64.h" 9*f3087befSAndrew Turner #include "math_config.h" 10*f3087befSAndrew Turner #include "test_sig.h" 11*f3087befSAndrew Turner #include "test_defs.h" 12*f3087befSAndrew Turner 13*f3087befSAndrew Turner #define Ln2Hi 0x1.62e42fefa3800p-1 14*f3087befSAndrew Turner #define Ln2Lo 0x1.ef35793c76730p-45 15*f3087befSAndrew Turner #define HfRt2Top 0x3fe6a09e /* top32(asuint64(sqrt(2)/2)). */ 16*f3087befSAndrew Turner #define OneMHfRt2Top \ 17*f3087befSAndrew Turner 0x00095f62 /* top32(asuint64(1)) - top32(asuint64(sqrt(2)/2)). */ 18*f3087befSAndrew Turner #define OneTop12 0x3ff 19*f3087befSAndrew Turner #define BottomMask 0xffffffff 20*f3087befSAndrew Turner #define OneMHfRt2 0x3fd2bec333018866 21*f3087befSAndrew Turner #define Rt2MOne 0x3fda827999fcef32 22*f3087befSAndrew Turner #define AbsMask 0x7fffffffffffffff 23*f3087befSAndrew Turner #define ExpM63 0x3c00 24*f3087befSAndrew Turner 25*f3087befSAndrew Turner static inline double 26*f3087befSAndrew Turner eval_poly (double f) 27*f3087befSAndrew Turner { 28*f3087befSAndrew Turner double f2 = f * f; 29*f3087befSAndrew Turner double f4 = f2 * f2; 30*f3087befSAndrew Turner double f8 = f4 * f4; 31*f3087befSAndrew Turner return estrin_18_f64 (f, f2, f4, f8, f8 * f8, __log1p_data.coeffs); 32*f3087befSAndrew Turner } 33*f3087befSAndrew Turner 34*f3087befSAndrew Turner /* log1p approximation using polynomial on reduced interval. Largest 35*f3087befSAndrew Turner observed errors are near the lower boundary of the region where k 36*f3087befSAndrew Turner is 0. 37*f3087befSAndrew Turner Maximum measured error: 1.75ULP. 38*f3087befSAndrew Turner log1p(-0x1.2e1aea97b3e5cp-2) got -0x1.65fb8659a2f9p-2 39*f3087befSAndrew Turner want -0x1.65fb8659a2f92p-2. */ 40*f3087befSAndrew Turner double 41*f3087befSAndrew Turner log1p (double x) 42*f3087befSAndrew Turner { 43*f3087befSAndrew Turner uint64_t ix = asuint64 (x); 44*f3087befSAndrew Turner uint64_t ia = ix & AbsMask; 45*f3087befSAndrew Turner uint32_t ia16 = ia >> 48; 46*f3087befSAndrew Turner 47*f3087befSAndrew Turner /* Handle special cases first. */ 48*f3087befSAndrew Turner if (unlikely (ia16 >= 0x7ff0 || ix >= 0xbff0000000000000 49*f3087befSAndrew Turner || ix == 0x8000000000000000)) 50*f3087befSAndrew Turner { 51*f3087befSAndrew Turner if (ix == 0x8000000000000000 || ix == 0x7ff0000000000000) 52*f3087befSAndrew Turner { 53*f3087befSAndrew Turner /* x == -0 => log1p(x) = -0. 54*f3087befSAndrew Turner x == Inf => log1p(x) = Inf. */ 55*f3087befSAndrew Turner return x; 56*f3087befSAndrew Turner } 57*f3087befSAndrew Turner if (ix == 0xbff0000000000000) 58*f3087befSAndrew Turner { 59*f3087befSAndrew Turner /* x == -1 => log1p(x) = -Inf. */ 60*f3087befSAndrew Turner return __math_divzero (-1); 61*f3087befSAndrew Turner ; 62*f3087befSAndrew Turner } 63*f3087befSAndrew Turner if (ia16 >= 0x7ff0) 64*f3087befSAndrew Turner { 65*f3087befSAndrew Turner /* x == +/-NaN => log1p(x) = NaN. */ 66*f3087befSAndrew Turner return __math_invalid (asdouble (ia)); 67*f3087befSAndrew Turner } 68*f3087befSAndrew Turner /* x < -1 => log1p(x) = NaN. 69*f3087befSAndrew Turner x == -Inf => log1p(x) = NaN. */ 70*f3087befSAndrew Turner return __math_invalid (x); 71*f3087befSAndrew Turner } 72*f3087befSAndrew Turner 73*f3087befSAndrew Turner /* With x + 1 = t * 2^k (where t = f + 1 and k is chosen such that f 74*f3087befSAndrew Turner is in [sqrt(2)/2, sqrt(2)]): 75*f3087befSAndrew Turner log1p(x) = k*log(2) + log1p(f). 76*f3087befSAndrew Turner 77*f3087befSAndrew Turner f may not be representable exactly, so we need a correction term: 78*f3087befSAndrew Turner let m = round(1 + x), c = (1 + x) - m. 79*f3087befSAndrew Turner c << m: at very small x, log1p(x) ~ x, hence: 80*f3087befSAndrew Turner log(1+x) - log(m) ~ c/m. 81*f3087befSAndrew Turner 82*f3087befSAndrew Turner We therefore calculate log1p(x) by k*log2 + log1p(f) + c/m. */ 83*f3087befSAndrew Turner 84*f3087befSAndrew Turner uint64_t sign = ix & ~AbsMask; 85*f3087befSAndrew Turner if (ia <= OneMHfRt2 || (!sign && ia <= Rt2MOne)) 86*f3087befSAndrew Turner { 87*f3087befSAndrew Turner if (unlikely (ia16 <= ExpM63)) 88*f3087befSAndrew Turner { 89*f3087befSAndrew Turner /* If exponent of x <= -63 then shortcut the polynomial and avoid 90*f3087befSAndrew Turner underflow by just returning x, which is exactly rounded in this 91*f3087befSAndrew Turner region. */ 92*f3087befSAndrew Turner return x; 93*f3087befSAndrew Turner } 94*f3087befSAndrew Turner /* If x is in [sqrt(2)/2 - 1, sqrt(2) - 1] then we can shortcut all the 95*f3087befSAndrew Turner logic below, as k = 0 and f = x and therefore representable exactly. 96*f3087befSAndrew Turner All we need is to return the polynomial. */ 97*f3087befSAndrew Turner return fma (x, eval_poly (x) * x, x); 98*f3087befSAndrew Turner } 99*f3087befSAndrew Turner 100*f3087befSAndrew Turner /* Obtain correctly scaled k by manipulation in the exponent. */ 101*f3087befSAndrew Turner double m = x + 1; 102*f3087befSAndrew Turner uint64_t mi = asuint64 (m); 103*f3087befSAndrew Turner uint32_t u = (mi >> 32) + OneMHfRt2Top; 104*f3087befSAndrew Turner int32_t k = (int32_t) (u >> 20) - OneTop12; 105*f3087befSAndrew Turner 106*f3087befSAndrew Turner /* Correction term c/m. */ 107*f3087befSAndrew Turner double cm = (x - (m - 1)) / m; 108*f3087befSAndrew Turner 109*f3087befSAndrew Turner /* Reduce x to f in [sqrt(2)/2, sqrt(2)]. */ 110*f3087befSAndrew Turner uint32_t utop = (u & 0x000fffff) + HfRt2Top; 111*f3087befSAndrew Turner uint64_t u_red = ((uint64_t) utop << 32) | (mi & BottomMask); 112*f3087befSAndrew Turner double f = asdouble (u_red) - 1; 113*f3087befSAndrew Turner 114*f3087befSAndrew Turner /* Approximate log1p(x) on the reduced input using a polynomial. Because 115*f3087befSAndrew Turner log1p(0)=0 we choose an approximation of the form: 116*f3087befSAndrew Turner x + C0*x^2 + C1*x^3 + C2x^4 + ... 117*f3087befSAndrew Turner Hence approximation has the form f + f^2 * P(f) 118*f3087befSAndrew Turner where P(x) = C0 + C1*x + C2x^2 + ... */ 119*f3087befSAndrew Turner double p = fma (f, eval_poly (f) * f, f); 120*f3087befSAndrew Turner 121*f3087befSAndrew Turner double kd = k; 122*f3087befSAndrew Turner double y = fma (Ln2Lo, kd, cm); 123*f3087befSAndrew Turner return y + fma (Ln2Hi, kd, p); 124*f3087befSAndrew Turner } 125*f3087befSAndrew Turner 126*f3087befSAndrew Turner TEST_SIG (S, D, 1, log1p, -0.9, 10.0) 127*f3087befSAndrew Turner TEST_ULP (log1p, 1.26) 128*f3087befSAndrew Turner TEST_SYM_INTERVAL (log1p, 0.0, 0x1p-23, 50000) 129*f3087befSAndrew Turner TEST_SYM_INTERVAL (log1p, 0x1p-23, 0.001, 50000) 130*f3087befSAndrew Turner TEST_SYM_INTERVAL (log1p, 0.001, 1.0, 50000) 131*f3087befSAndrew Turner TEST_SYM_INTERVAL (log1p, 1.0, inf, 5000) 132