1*f3087befSAndrew Turner /* 2*f3087befSAndrew Turner * Extended precision inverse error function. 3*f3087befSAndrew Turner * 4*f3087befSAndrew Turner * Copyright (c) 2023-2024, Arm Limited. 5*f3087befSAndrew Turner * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 6*f3087befSAndrew Turner */ 7*f3087befSAndrew Turner #define _GNU_SOURCE 8*f3087befSAndrew Turner #include <math.h> 9*f3087befSAndrew Turner #include <stdbool.h> 10*f3087befSAndrew Turner #include <float.h> 11*f3087befSAndrew Turner 12*f3087befSAndrew Turner #include "math_config.h" 13*f3087befSAndrew Turner #include "poly_scalar_f64.h" 14*f3087befSAndrew Turner 15*f3087befSAndrew Turner #define SQRT_PIl 0x1.c5bf891b4ef6aa79c3b0520d5db9p0l 16*f3087befSAndrew Turner #define HF_SQRT_PIl 0x1.c5bf891b4ef6aa79c3b0520d5db9p-1l 17*f3087befSAndrew Turner 18*f3087befSAndrew Turner const static struct 19*f3087befSAndrew Turner { 20*f3087befSAndrew Turner /* We use P_N and Q_N to refer to arrays of coefficients, where P_N is the 21*f3087befSAndrew Turner coeffs of the numerator in table N of Blair et al, and Q_N is the coeffs 22*f3087befSAndrew Turner of the denominator. */ 23*f3087befSAndrew Turner double P_17[7], Q_17[7], P_37[8], Q_37[8], P_57[9], Q_57[10]; 24*f3087befSAndrew Turner } data = { 25*f3087befSAndrew Turner .P_17 = { 0x1.007ce8f01b2e8p+4, -0x1.6b23cc5c6c6d7p+6, 0x1.74e5f6ceb3548p+7, 26*f3087befSAndrew Turner -0x1.5200bb15cc6bbp+7, 0x1.05d193233a849p+6, -0x1.148c5474ee5e1p+3, 27*f3087befSAndrew Turner 0x1.689181bbafd0cp-3 }, 28*f3087befSAndrew Turner .Q_17 = { 0x1.d8fb0f913bd7bp+3, -0x1.6d7f25a3f1c24p+6, 0x1.a450d8e7f4cbbp+7, 29*f3087befSAndrew Turner -0x1.bc3480485857p+7, 0x1.ae6b0c504ee02p+6, -0x1.499dfec1a7f5fp+4, 30*f3087befSAndrew Turner 0x1p+0 }, 31*f3087befSAndrew Turner .P_37 = { -0x1.f3596123109edp-7, 0x1.60b8fe375999ep-2, -0x1.779bb9bef7c0fp+1, 32*f3087befSAndrew Turner 0x1.786ea384470a2p+3, -0x1.6a7c1453c85d3p+4, 0x1.31f0fc5613142p+4, 33*f3087befSAndrew Turner -0x1.5ea6c007d4dbbp+2, 0x1.e66f265ce9e5p-3 }, 34*f3087befSAndrew Turner .Q_37 = { -0x1.636b2dcf4edbep-7, 0x1.0b5411e2acf29p-2, -0x1.3413109467a0bp+1, 35*f3087befSAndrew Turner 0x1.563e8136c554ap+3, -0x1.7b77aab1dcafbp+4, 0x1.8a3e174e05ddcp+4, 36*f3087befSAndrew Turner -0x1.4075c56404eecp+3, 0x1p+0 }, 37*f3087befSAndrew Turner .P_57 = { 0x1.b874f9516f7f1p-14, 0x1.5921f2916c1c4p-7, 0x1.145ae7d5b8fa4p-2, 38*f3087befSAndrew Turner 0x1.29d6dcc3b2fb7p+1, 0x1.cabe2209a7985p+2, 0x1.11859f0745c4p+3, 39*f3087befSAndrew Turner 0x1.b7ec7bc6a2ce5p+2, 0x1.d0419e0bb42aep+1, 0x1.c5aa03eef7258p-1 }, 40*f3087befSAndrew Turner .Q_57 = { 0x1.b8747e12691f1p-14, 0x1.59240d8ed1e0ap-7, 0x1.14aef2b181e2p-2, 41*f3087befSAndrew Turner 0x1.2cd181bcea52p+1, 0x1.e6e63e0b7aa4cp+2, 0x1.65cf8da94aa3ap+3, 42*f3087befSAndrew Turner 0x1.7e5c787b10a36p+3, 0x1.0626d68b6cea3p+3, 0x1.065c5f193abf6p+2, 43*f3087befSAndrew Turner 0x1p+0 } 44*f3087befSAndrew Turner }; 45*f3087befSAndrew Turner 46*f3087befSAndrew Turner /* Inverse error function approximation, based on rational approximation as 47*f3087befSAndrew Turner described in 48*f3087befSAndrew Turner J. M. Blair, C. A. Edwards, and J. H. Johnson, 49*f3087befSAndrew Turner "Rational Chebyshev approximations for the inverse of the error function", 50*f3087befSAndrew Turner Math. Comp. 30, pp. 827--830 (1976). 51*f3087befSAndrew Turner https://doi.org/10.1090/S0025-5718-1976-0421040-7. */ 52*f3087befSAndrew Turner static inline double 53*f3087befSAndrew Turner __erfinv (double x) 54*f3087befSAndrew Turner { 55*f3087befSAndrew Turner if (x == 1.0) 56*f3087befSAndrew Turner return __math_oflow (0); 57*f3087befSAndrew Turner if (x == -1.0) 58*f3087befSAndrew Turner return __math_oflow (1); 59*f3087befSAndrew Turner 60*f3087befSAndrew Turner double a = fabs (x); 61*f3087befSAndrew Turner if (a > 1) 62*f3087befSAndrew Turner return __math_invalid (x); 63*f3087befSAndrew Turner 64*f3087befSAndrew Turner if (a <= 0.75) 65*f3087befSAndrew Turner { 66*f3087befSAndrew Turner double t = x * x - 0.5625; 67*f3087befSAndrew Turner return x * horner_6_f64 (t, data.P_17) / horner_6_f64 (t, data.Q_17); 68*f3087befSAndrew Turner } 69*f3087befSAndrew Turner 70*f3087befSAndrew Turner if (a <= 0.9375) 71*f3087befSAndrew Turner { 72*f3087befSAndrew Turner double t = x * x - 0.87890625; 73*f3087befSAndrew Turner return x * horner_7_f64 (t, data.P_37) / horner_7_f64 (t, data.Q_37); 74*f3087befSAndrew Turner } 75*f3087befSAndrew Turner 76*f3087befSAndrew Turner double t = 1.0 / (sqrtl (-log1pl (-a))); 77*f3087befSAndrew Turner return horner_8_f64 (t, data.P_57) 78*f3087befSAndrew Turner / (copysign (t, x) * horner_9_f64 (t, data.Q_57)); 79*f3087befSAndrew Turner } 80*f3087befSAndrew Turner 81*f3087befSAndrew Turner /* Extended-precision variant, which uses the above (or asymptotic estimate) as 82*f3087befSAndrew Turner starting point for Newton refinement. This implementation is a port to C of 83*f3087befSAndrew Turner the version in the SpecialFunctions.jl Julia package, with relaxed stopping 84*f3087befSAndrew Turner criteria for the Newton refinement. */ 85*f3087befSAndrew Turner long double 86*f3087befSAndrew Turner erfinvl (long double x) 87*f3087befSAndrew Turner { 88*f3087befSAndrew Turner if (x == 0) 89*f3087befSAndrew Turner return 0; 90*f3087befSAndrew Turner 91*f3087befSAndrew Turner double yf = __erfinv (x); 92*f3087befSAndrew Turner long double y; 93*f3087befSAndrew Turner if (isfinite (yf)) 94*f3087befSAndrew Turner y = yf; 95*f3087befSAndrew Turner else 96*f3087befSAndrew Turner { 97*f3087befSAndrew Turner /* Double overflowed, use asymptotic estimate instead. */ 98*f3087befSAndrew Turner y = copysignl (sqrtl (-logl (1.0l - fabsl (x)) * SQRT_PIl), x); 99*f3087befSAndrew Turner if (!isfinite (y)) 100*f3087befSAndrew Turner return y; 101*f3087befSAndrew Turner } 102*f3087befSAndrew Turner 103*f3087befSAndrew Turner double eps = fabs (yf - nextafter (yf, 0)); 104*f3087befSAndrew Turner while (true) 105*f3087befSAndrew Turner { 106*f3087befSAndrew Turner long double dy = HF_SQRT_PIl * (erfl (y) - x) * exp (y * y); 107*f3087befSAndrew Turner y -= dy; 108*f3087befSAndrew Turner /* Stopping criterion is different to Julia implementation, but is enough 109*f3087befSAndrew Turner to ensure result is accurate when rounded to double-precision. */ 110*f3087befSAndrew Turner if (fabsl (dy) < eps) 111*f3087befSAndrew Turner break; 112*f3087befSAndrew Turner } 113*f3087befSAndrew Turner return y; 114*f3087befSAndrew Turner } 115