131914882SAlex Richardson /* 231914882SAlex Richardson * Double-precision erf(x) function. 331914882SAlex Richardson * 4*f3087befSAndrew Turner * Copyright (c) 2020-2024, Arm Limited. 5072a4ba8SAndrew Turner * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 631914882SAlex Richardson */ 731914882SAlex Richardson 831914882SAlex Richardson #include "math_config.h" 931914882SAlex Richardson #include <math.h> 1031914882SAlex Richardson #include <stdint.h> 11*f3087befSAndrew Turner #include "test_defs.h" 12*f3087befSAndrew Turner #include "test_sig.h" 1331914882SAlex Richardson 1431914882SAlex Richardson #define TwoOverSqrtPiMinusOne 0x1.06eba8214db69p-3 1531914882SAlex Richardson #define C 0x1.b0ac16p-1 1631914882SAlex Richardson #define PA __erf_data.erf_poly_A 1731914882SAlex Richardson #define NA __erf_data.erf_ratio_N_A 1831914882SAlex Richardson #define DA __erf_data.erf_ratio_D_A 1931914882SAlex Richardson #define NB __erf_data.erf_ratio_N_B 2031914882SAlex Richardson #define DB __erf_data.erf_ratio_D_B 2131914882SAlex Richardson #define PC __erf_data.erfc_poly_C 2231914882SAlex Richardson #define PD __erf_data.erfc_poly_D 2331914882SAlex Richardson #define PE __erf_data.erfc_poly_E 2431914882SAlex Richardson #define PF __erf_data.erfc_poly_F 2531914882SAlex Richardson 2631914882SAlex Richardson /* Top 32 bits of a double. */ 2731914882SAlex Richardson static inline uint32_t 2831914882SAlex Richardson top32 (double x) 2931914882SAlex Richardson { 3031914882SAlex Richardson return asuint64 (x) >> 32; 3131914882SAlex Richardson } 3231914882SAlex Richardson 3331914882SAlex Richardson /* Fast erf implementation using a mix of 3431914882SAlex Richardson rational and polynomial approximations. 3531914882SAlex Richardson Highest measured error is 1.01 ULPs at 0x1.39956ac43382fp+0. */ 3631914882SAlex Richardson double 3731914882SAlex Richardson erf (double x) 3831914882SAlex Richardson { 3931914882SAlex Richardson /* Get top word and sign. */ 4031914882SAlex Richardson uint32_t ix = top32 (x); 4131914882SAlex Richardson uint32_t ia = ix & 0x7fffffff; 4231914882SAlex Richardson uint32_t sign = ix >> 31; 4331914882SAlex Richardson 4431914882SAlex Richardson /* Normalized and subnormal cases */ 4531914882SAlex Richardson if (ia < 0x3feb0000) 4631914882SAlex Richardson { /* a = |x| < 0.84375. */ 4731914882SAlex Richardson 4831914882SAlex Richardson if (ia < 0x3e300000) 4931914882SAlex Richardson { /* a < 2^(-28). */ 5031914882SAlex Richardson if (ia < 0x00800000) 5131914882SAlex Richardson { /* a < 2^(-1015). */ 5231914882SAlex Richardson double y = fma (TwoOverSqrtPiMinusOne, x, x); 5331914882SAlex Richardson return check_uflow (y); 5431914882SAlex Richardson } 5531914882SAlex Richardson return x + TwoOverSqrtPiMinusOne * x; 5631914882SAlex Richardson } 5731914882SAlex Richardson 5831914882SAlex Richardson double x2 = x * x; 5931914882SAlex Richardson 6031914882SAlex Richardson if (ia < 0x3fe00000) 6131914882SAlex Richardson { /* a < 0.5 - Use polynomial approximation. */ 6231914882SAlex Richardson double r1 = fma (x2, PA[1], PA[0]); 6331914882SAlex Richardson double r2 = fma (x2, PA[3], PA[2]); 6431914882SAlex Richardson double r3 = fma (x2, PA[5], PA[4]); 6531914882SAlex Richardson double r4 = fma (x2, PA[7], PA[6]); 6631914882SAlex Richardson double r5 = fma (x2, PA[9], PA[8]); 6731914882SAlex Richardson double x4 = x2 * x2; 6831914882SAlex Richardson double r = r5; 6931914882SAlex Richardson r = fma (x4, r, r4); 7031914882SAlex Richardson r = fma (x4, r, r3); 7131914882SAlex Richardson r = fma (x4, r, r2); 7231914882SAlex Richardson r = fma (x4, r, r1); 7331914882SAlex Richardson return fma (r, x, x); /* This fma is crucial for accuracy. */ 7431914882SAlex Richardson } 7531914882SAlex Richardson else 7631914882SAlex Richardson { /* 0.5 <= a < 0.84375 - Use rational approximation. */ 7731914882SAlex Richardson double x4, x8, r1n, r2n, r1d, r2d, r3d; 7831914882SAlex Richardson 7931914882SAlex Richardson r1n = fma (x2, NA[1], NA[0]); 8031914882SAlex Richardson x4 = x2 * x2; 8131914882SAlex Richardson r2n = fma (x2, NA[3], NA[2]); 8231914882SAlex Richardson x8 = x4 * x4; 8331914882SAlex Richardson r1d = fma (x2, DA[0], 1.0); 8431914882SAlex Richardson r2d = fma (x2, DA[2], DA[1]); 8531914882SAlex Richardson r3d = fma (x2, DA[4], DA[3]); 8631914882SAlex Richardson double P = r1n + x4 * r2n + x8 * NA[4]; 8731914882SAlex Richardson double Q = r1d + x4 * r2d + x8 * r3d; 8831914882SAlex Richardson return fma (P / Q, x, x); 8931914882SAlex Richardson } 9031914882SAlex Richardson } 9131914882SAlex Richardson else if (ia < 0x3ff40000) 9231914882SAlex Richardson { /* 0.84375 <= |x| < 1.25. */ 9331914882SAlex Richardson double a2, a4, a6, r1n, r2n, r3n, r4n, r1d, r2d, r3d, r4d; 9431914882SAlex Richardson double a = fabs (x) - 1.0; 9531914882SAlex Richardson r1n = fma (a, NB[1], NB[0]); 9631914882SAlex Richardson a2 = a * a; 9731914882SAlex Richardson r1d = fma (a, DB[0], 1.0); 9831914882SAlex Richardson a4 = a2 * a2; 9931914882SAlex Richardson r2n = fma (a, NB[3], NB[2]); 10031914882SAlex Richardson a6 = a4 * a2; 10131914882SAlex Richardson r2d = fma (a, DB[2], DB[1]); 10231914882SAlex Richardson r3n = fma (a, NB[5], NB[4]); 10331914882SAlex Richardson r3d = fma (a, DB[4], DB[3]); 10431914882SAlex Richardson r4n = NB[6]; 10531914882SAlex Richardson r4d = DB[5]; 10631914882SAlex Richardson double P = r1n + a2 * r2n + a4 * r3n + a6 * r4n; 10731914882SAlex Richardson double Q = r1d + a2 * r2d + a4 * r3d + a6 * r4d; 10831914882SAlex Richardson if (sign) 10931914882SAlex Richardson return -C - P / Q; 11031914882SAlex Richardson else 11131914882SAlex Richardson return C + P / Q; 11231914882SAlex Richardson } 11331914882SAlex Richardson else if (ia < 0x40000000) 11431914882SAlex Richardson { /* 1.25 <= |x| < 2.0. */ 11531914882SAlex Richardson double a = fabs (x); 11631914882SAlex Richardson a = a - 1.25; 11731914882SAlex Richardson 11831914882SAlex Richardson double r1 = fma (a, PC[1], PC[0]); 11931914882SAlex Richardson double r2 = fma (a, PC[3], PC[2]); 12031914882SAlex Richardson double r3 = fma (a, PC[5], PC[4]); 12131914882SAlex Richardson double r4 = fma (a, PC[7], PC[6]); 12231914882SAlex Richardson double r5 = fma (a, PC[9], PC[8]); 12331914882SAlex Richardson double r6 = fma (a, PC[11], PC[10]); 12431914882SAlex Richardson double r7 = fma (a, PC[13], PC[12]); 12531914882SAlex Richardson double r8 = fma (a, PC[15], PC[14]); 12631914882SAlex Richardson 12731914882SAlex Richardson double a2 = a * a; 12831914882SAlex Richardson 12931914882SAlex Richardson double r = r8; 13031914882SAlex Richardson r = fma (a2, r, r7); 13131914882SAlex Richardson r = fma (a2, r, r6); 13231914882SAlex Richardson r = fma (a2, r, r5); 13331914882SAlex Richardson r = fma (a2, r, r4); 13431914882SAlex Richardson r = fma (a2, r, r3); 13531914882SAlex Richardson r = fma (a2, r, r2); 13631914882SAlex Richardson r = fma (a2, r, r1); 13731914882SAlex Richardson 13831914882SAlex Richardson if (sign) 13931914882SAlex Richardson return -1.0 + r; 14031914882SAlex Richardson else 14131914882SAlex Richardson return 1.0 - r; 14231914882SAlex Richardson } 14331914882SAlex Richardson else if (ia < 0x400a0000) 14431914882SAlex Richardson { /* 2 <= |x| < 3.25. */ 14531914882SAlex Richardson double a = fabs (x); 14631914882SAlex Richardson a = fma (0.5, a, -1.0); 14731914882SAlex Richardson 14831914882SAlex Richardson double r1 = fma (a, PD[1], PD[0]); 14931914882SAlex Richardson double r2 = fma (a, PD[3], PD[2]); 15031914882SAlex Richardson double r3 = fma (a, PD[5], PD[4]); 15131914882SAlex Richardson double r4 = fma (a, PD[7], PD[6]); 15231914882SAlex Richardson double r5 = fma (a, PD[9], PD[8]); 15331914882SAlex Richardson double r6 = fma (a, PD[11], PD[10]); 15431914882SAlex Richardson double r7 = fma (a, PD[13], PD[12]); 15531914882SAlex Richardson double r8 = fma (a, PD[15], PD[14]); 15631914882SAlex Richardson double r9 = fma (a, PD[17], PD[16]); 15731914882SAlex Richardson 15831914882SAlex Richardson double a2 = a * a; 15931914882SAlex Richardson 16031914882SAlex Richardson double r = r9; 16131914882SAlex Richardson r = fma (a2, r, r8); 16231914882SAlex Richardson r = fma (a2, r, r7); 16331914882SAlex Richardson r = fma (a2, r, r6); 16431914882SAlex Richardson r = fma (a2, r, r5); 16531914882SAlex Richardson r = fma (a2, r, r4); 16631914882SAlex Richardson r = fma (a2, r, r3); 16731914882SAlex Richardson r = fma (a2, r, r2); 16831914882SAlex Richardson r = fma (a2, r, r1); 16931914882SAlex Richardson 17031914882SAlex Richardson if (sign) 17131914882SAlex Richardson return -1.0 + r; 17231914882SAlex Richardson else 17331914882SAlex Richardson return 1.0 - r; 17431914882SAlex Richardson } 17531914882SAlex Richardson else if (ia < 0x40100000) 17631914882SAlex Richardson { /* 3.25 <= |x| < 4.0. */ 17731914882SAlex Richardson double a = fabs (x); 17831914882SAlex Richardson a = a - 3.25; 17931914882SAlex Richardson 18031914882SAlex Richardson double r1 = fma (a, PE[1], PE[0]); 18131914882SAlex Richardson double r2 = fma (a, PE[3], PE[2]); 18231914882SAlex Richardson double r3 = fma (a, PE[5], PE[4]); 18331914882SAlex Richardson double r4 = fma (a, PE[7], PE[6]); 18431914882SAlex Richardson double r5 = fma (a, PE[9], PE[8]); 18531914882SAlex Richardson double r6 = fma (a, PE[11], PE[10]); 18631914882SAlex Richardson double r7 = fma (a, PE[13], PE[12]); 18731914882SAlex Richardson 18831914882SAlex Richardson double a2 = a * a; 18931914882SAlex Richardson 19031914882SAlex Richardson double r = r7; 19131914882SAlex Richardson r = fma (a2, r, r6); 19231914882SAlex Richardson r = fma (a2, r, r5); 19331914882SAlex Richardson r = fma (a2, r, r4); 19431914882SAlex Richardson r = fma (a2, r, r3); 19531914882SAlex Richardson r = fma (a2, r, r2); 19631914882SAlex Richardson r = fma (a2, r, r1); 19731914882SAlex Richardson 19831914882SAlex Richardson if (sign) 19931914882SAlex Richardson return -1.0 + r; 20031914882SAlex Richardson else 20131914882SAlex Richardson return 1.0 - r; 20231914882SAlex Richardson } 20331914882SAlex Richardson else if (ia < 0x4017a000) 20431914882SAlex Richardson { /* 4 <= |x| < 5.90625. */ 20531914882SAlex Richardson double a = fabs (x); 20631914882SAlex Richardson a = fma (0.5, a, -2.0); 20731914882SAlex Richardson 20831914882SAlex Richardson double r1 = fma (a, PF[1], PF[0]); 20931914882SAlex Richardson double r2 = fma (a, PF[3], PF[2]); 21031914882SAlex Richardson double r3 = fma (a, PF[5], PF[4]); 21131914882SAlex Richardson double r4 = fma (a, PF[7], PF[6]); 21231914882SAlex Richardson double r5 = fma (a, PF[9], PF[8]); 21331914882SAlex Richardson double r6 = fma (a, PF[11], PF[10]); 21431914882SAlex Richardson double r7 = fma (a, PF[13], PF[12]); 21531914882SAlex Richardson double r8 = fma (a, PF[15], PF[14]); 21631914882SAlex Richardson double r9 = PF[16]; 21731914882SAlex Richardson 21831914882SAlex Richardson double a2 = a * a; 21931914882SAlex Richardson 22031914882SAlex Richardson double r = r9; 22131914882SAlex Richardson r = fma (a2, r, r8); 22231914882SAlex Richardson r = fma (a2, r, r7); 22331914882SAlex Richardson r = fma (a2, r, r6); 22431914882SAlex Richardson r = fma (a2, r, r5); 22531914882SAlex Richardson r = fma (a2, r, r4); 22631914882SAlex Richardson r = fma (a2, r, r3); 22731914882SAlex Richardson r = fma (a2, r, r2); 22831914882SAlex Richardson r = fma (a2, r, r1); 22931914882SAlex Richardson 23031914882SAlex Richardson if (sign) 23131914882SAlex Richardson return -1.0 + r; 23231914882SAlex Richardson else 23331914882SAlex Richardson return 1.0 - r; 23431914882SAlex Richardson } 23531914882SAlex Richardson else 23631914882SAlex Richardson { 23731914882SAlex Richardson /* Special cases : erf(nan)=nan, erf(+inf)=+1 and erf(-inf)=-1. */ 23831914882SAlex Richardson if (unlikely (ia >= 0x7ff00000)) 23931914882SAlex Richardson return (double) (1.0 - (sign << 1)) + 1.0 / x; 24031914882SAlex Richardson 24131914882SAlex Richardson if (sign) 24231914882SAlex Richardson return -1.0; 24331914882SAlex Richardson else 24431914882SAlex Richardson return 1.0; 24531914882SAlex Richardson } 24631914882SAlex Richardson } 247*f3087befSAndrew Turner 248*f3087befSAndrew Turner TEST_SIG (S, D, 1, erf, -6.0, 6.0) 249*f3087befSAndrew Turner TEST_ULP (erf, 0.51) 250*f3087befSAndrew Turner TEST_ULP_NONNEAREST (erf, 0.9) 251*f3087befSAndrew Turner TEST_INTERVAL (erf, 0, 0xffff000000000000, 10000) 252*f3087befSAndrew Turner TEST_SYM_INTERVAL (erf, 0x1p-1022, 0x1p-26, 40000) 253*f3087befSAndrew Turner TEST_SYM_INTERVAL (erf, 0x1p-26, 0x1p3, 40000) 254*f3087befSAndrew Turner TEST_INTERVAL (erf, 0, inf, 40000) 255