1*5a02ffc3SAndrew Turner# -*- julia -*- 2*5a02ffc3SAndrew Turner# 3*5a02ffc3SAndrew Turner# Generate tgamma128.h, containing polynomials and constants used by 4*5a02ffc3SAndrew Turner# tgamma128.c. 5*5a02ffc3SAndrew Turner# 6*5a02ffc3SAndrew Turner# Copyright (c) 2006-2023, Arm Limited. 7*5a02ffc3SAndrew Turner# SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 8*5a02ffc3SAndrew Turner 9*5a02ffc3SAndrew Turner# This Julia program depends on the 'Remez' and 'SpecialFunctions' 10*5a02ffc3SAndrew Turner# library packages. To install them, run this at the interactive Julia 11*5a02ffc3SAndrew Turner# prompt: 12*5a02ffc3SAndrew Turner# 13*5a02ffc3SAndrew Turner# import Pkg; Pkg.add(["Remez", "SpecialFunctions"]) 14*5a02ffc3SAndrew Turner# 15*5a02ffc3SAndrew Turner# Tested on Julia 1.4.1 (Ubuntu 20.04) and 1.9.0 (22.04). 16*5a02ffc3SAndrew Turner 17*5a02ffc3SAndrew Turnerimport Printf 18*5a02ffc3SAndrew Turnerimport Remez 19*5a02ffc3SAndrew Turnerimport SpecialFunctions 20*5a02ffc3SAndrew Turner 21*5a02ffc3SAndrew Turner# Round a BigFloat to 128-bit long double and format it as a C99 hex 22*5a02ffc3SAndrew Turner# float literal. 23*5a02ffc3SAndrew Turnerfunction quadhex(x) 24*5a02ffc3SAndrew Turner sign = " " 25*5a02ffc3SAndrew Turner if x < 0 26*5a02ffc3SAndrew Turner sign = "-" 27*5a02ffc3SAndrew Turner x = -x 28*5a02ffc3SAndrew Turner end 29*5a02ffc3SAndrew Turner 30*5a02ffc3SAndrew Turner exponent = BigInt(floor(log2(x))) 31*5a02ffc3SAndrew Turner exponent = max(exponent, -16382) 32*5a02ffc3SAndrew Turner @assert(exponent <= 16383) # else overflow 33*5a02ffc3SAndrew Turner 34*5a02ffc3SAndrew Turner x /= BigFloat(2)^exponent 35*5a02ffc3SAndrew Turner @assert(1 <= x < 2) 36*5a02ffc3SAndrew Turner x *= BigFloat(2)^112 37*5a02ffc3SAndrew Turner mantissa = BigInt(round(x)) 38*5a02ffc3SAndrew Turner 39*5a02ffc3SAndrew Turner mantstr = string(mantissa, base=16, pad=29) 40*5a02ffc3SAndrew Turner return Printf.@sprintf("%s0x%s.%sp%+dL", sign, mantstr[1], mantstr[2:end], 41*5a02ffc3SAndrew Turner exponent) 42*5a02ffc3SAndrew Turnerend 43*5a02ffc3SAndrew Turner 44*5a02ffc3SAndrew Turner# Round a BigFloat to 128-bit long double and return it still as a 45*5a02ffc3SAndrew Turner# BigFloat. 46*5a02ffc3SAndrew Turnerfunction quadval(x, round=0) 47*5a02ffc3SAndrew Turner sign = +1 48*5a02ffc3SAndrew Turner if x.sign < 0 49*5a02ffc3SAndrew Turner sign = -1 50*5a02ffc3SAndrew Turner x = -x 51*5a02ffc3SAndrew Turner end 52*5a02ffc3SAndrew Turner 53*5a02ffc3SAndrew Turner exponent = BigInt(floor(log2(x))) 54*5a02ffc3SAndrew Turner exponent = max(exponent, -16382) 55*5a02ffc3SAndrew Turner @assert(exponent <= 16383) # else overflow 56*5a02ffc3SAndrew Turner 57*5a02ffc3SAndrew Turner x /= BigFloat(2)^exponent 58*5a02ffc3SAndrew Turner @assert(1 <= x < 2) 59*5a02ffc3SAndrew Turner x *= BigFloat(2)^112 60*5a02ffc3SAndrew Turner if round < 0 61*5a02ffc3SAndrew Turner mantissa = floor(x) 62*5a02ffc3SAndrew Turner elseif round > 0 63*5a02ffc3SAndrew Turner mantissa = ceil(x) 64*5a02ffc3SAndrew Turner else 65*5a02ffc3SAndrew Turner mantissa = round(x) 66*5a02ffc3SAndrew Turner end 67*5a02ffc3SAndrew Turner 68*5a02ffc3SAndrew Turner return sign * mantissa * BigFloat(2)^(exponent - 112) 69*5a02ffc3SAndrew Turnerend 70*5a02ffc3SAndrew Turner 71*5a02ffc3SAndrew Turner# Output an array of BigFloats as a C array declaration. 72*5a02ffc3SAndrew Turnerfunction dumparray(a, name) 73*5a02ffc3SAndrew Turner println("static const long double ", name, "[] = {") 74*5a02ffc3SAndrew Turner for x in N 75*5a02ffc3SAndrew Turner println(" ", quadhex(x), ",") 76*5a02ffc3SAndrew Turner end 77*5a02ffc3SAndrew Turner println("};") 78*5a02ffc3SAndrew Turnerend 79*5a02ffc3SAndrew Turner 80*5a02ffc3SAndrew Turnerprint("/* 81*5a02ffc3SAndrew Turner * Polynomial coefficients and other constants for tgamma128.c. 82*5a02ffc3SAndrew Turner * 83*5a02ffc3SAndrew Turner * Copyright (c) 2006,2009,2023 Arm Limited. 84*5a02ffc3SAndrew Turner * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 85*5a02ffc3SAndrew Turner */ 86*5a02ffc3SAndrew Turner") 87*5a02ffc3SAndrew Turner 88*5a02ffc3SAndrew TurnerBase.MPFR.setprecision(512) 89*5a02ffc3SAndrew Turner 90*5a02ffc3SAndrew Turnere = exp(BigFloat(1)) 91*5a02ffc3SAndrew Turner 92*5a02ffc3SAndrew Turnerprint(" 93*5a02ffc3SAndrew Turner/* The largest positive value for which 128-bit tgamma does not overflow. */ 94*5a02ffc3SAndrew Turner") 95*5a02ffc3SAndrew Turnerlo = BigFloat("1000") 96*5a02ffc3SAndrew Turnerhi = BigFloat("2000") 97*5a02ffc3SAndrew Turnerwhile true 98*5a02ffc3SAndrew Turner global lo 99*5a02ffc3SAndrew Turner global hi 100*5a02ffc3SAndrew Turner global max_x 101*5a02ffc3SAndrew Turner 102*5a02ffc3SAndrew Turner mid = (lo + hi) / 2 103*5a02ffc3SAndrew Turner if mid == lo || mid == hi 104*5a02ffc3SAndrew Turner max_x = mid 105*5a02ffc3SAndrew Turner break 106*5a02ffc3SAndrew Turner end 107*5a02ffc3SAndrew Turner if SpecialFunctions.logabsgamma(mid)[1] < 16384 * log(BigFloat(2)) 108*5a02ffc3SAndrew Turner lo = mid 109*5a02ffc3SAndrew Turner else 110*5a02ffc3SAndrew Turner hi = mid 111*5a02ffc3SAndrew Turner end 112*5a02ffc3SAndrew Turnerend 113*5a02ffc3SAndrew Turnermax_x = quadval(max_x, -1) 114*5a02ffc3SAndrew Turnerprintln("static const long double max_x = ", quadhex(max_x), ";") 115*5a02ffc3SAndrew Turner 116*5a02ffc3SAndrew Turnerprint(" 117*5a02ffc3SAndrew Turner/* Coefficients of the polynomial used in the tgamma_large() subroutine */ 118*5a02ffc3SAndrew Turner") 119*5a02ffc3SAndrew TurnerN, D, E, X = Remez.ratfn_minimax( 120*5a02ffc3SAndrew Turner x -> x==0 ? sqrt(BigFloat(2)*pi/e) : 121*5a02ffc3SAndrew Turner exp(SpecialFunctions.logabsgamma(1/x)[1] + 122*5a02ffc3SAndrew Turner (1/x-0.5)*(1+log(x))), 123*5a02ffc3SAndrew Turner (0, 1/BigFloat(8)), 124*5a02ffc3SAndrew Turner 24, 0, 125*5a02ffc3SAndrew Turner (x, y) -> 1/y 126*5a02ffc3SAndrew Turner) 127*5a02ffc3SAndrew Turnerdumparray(N, "coeffs_large") 128*5a02ffc3SAndrew Turner 129*5a02ffc3SAndrew Turnerprint(" 130*5a02ffc3SAndrew Turner/* Coefficients of the polynomial used in the tgamma_tiny() subroutine */ 131*5a02ffc3SAndrew Turner") 132*5a02ffc3SAndrew TurnerN, D, E, X = Remez.ratfn_minimax( 133*5a02ffc3SAndrew Turner x -> x==0 ? 1 : 1/(x*SpecialFunctions.gamma(x)), 134*5a02ffc3SAndrew Turner (0, 1/BigFloat(32)), 135*5a02ffc3SAndrew Turner 13, 0, 136*5a02ffc3SAndrew Turner) 137*5a02ffc3SAndrew Turnerdumparray(N, "coeffs_tiny") 138*5a02ffc3SAndrew Turner 139*5a02ffc3SAndrew Turnerprint(" 140*5a02ffc3SAndrew Turner/* The location within the interval [1,2] where gamma has a minimum. 141*5a02ffc3SAndrew Turner * Specified as the sum of two 128-bit values, for extra precision. */ 142*5a02ffc3SAndrew Turner") 143*5a02ffc3SAndrew Turnerlo = BigFloat("1.4") 144*5a02ffc3SAndrew Turnerhi = BigFloat("1.5") 145*5a02ffc3SAndrew Turnerwhile true 146*5a02ffc3SAndrew Turner global lo 147*5a02ffc3SAndrew Turner global hi 148*5a02ffc3SAndrew Turner global min_x 149*5a02ffc3SAndrew Turner 150*5a02ffc3SAndrew Turner mid = (lo + hi) / 2 151*5a02ffc3SAndrew Turner if mid == lo || mid == hi 152*5a02ffc3SAndrew Turner min_x = mid 153*5a02ffc3SAndrew Turner break 154*5a02ffc3SAndrew Turner end 155*5a02ffc3SAndrew Turner if SpecialFunctions.digamma(mid) < 0 156*5a02ffc3SAndrew Turner lo = mid 157*5a02ffc3SAndrew Turner else 158*5a02ffc3SAndrew Turner hi = mid 159*5a02ffc3SAndrew Turner end 160*5a02ffc3SAndrew Turnerend 161*5a02ffc3SAndrew Turnermin_x_hi = quadval(min_x, -1) 162*5a02ffc3SAndrew Turnerprintln("static const long double min_x_hi = ", quadhex(min_x_hi), ";") 163*5a02ffc3SAndrew Turnerprintln("static const long double min_x_lo = ", quadhex(min_x - min_x_hi), ";") 164*5a02ffc3SAndrew Turner 165*5a02ffc3SAndrew Turnerprint(" 166*5a02ffc3SAndrew Turner/* The actual minimum value that gamma takes at that location. 167*5a02ffc3SAndrew Turner * Again specified as the sum of two 128-bit values. */ 168*5a02ffc3SAndrew Turner") 169*5a02ffc3SAndrew Turnermin_y = SpecialFunctions.gamma(min_x) 170*5a02ffc3SAndrew Turnermin_y_hi = quadval(min_y, -1) 171*5a02ffc3SAndrew Turnerprintln("static const long double min_y_hi = ", quadhex(min_y_hi), ";") 172*5a02ffc3SAndrew Turnerprintln("static const long double min_y_lo = ", quadhex(min_y - min_y_hi), ";") 173*5a02ffc3SAndrew Turner 174*5a02ffc3SAndrew Turnerfunction taylor_bodge(x) 175*5a02ffc3SAndrew Turner # Taylor series generated by Wolfram Alpha for (gamma(min_x+x)-min_y)/x^2. 176*5a02ffc3SAndrew Turner # Used in the Remez calls below for x values very near the origin, to avoid 177*5a02ffc3SAndrew Turner # significance loss problems when trying to compute it directly via that 178*5a02ffc3SAndrew Turner # formula (even in MPFR's extra precision). 179*5a02ffc3SAndrew Turner return BigFloat("0.428486815855585429730209907810650582960483696962660010556335457558784421896667728014324097132413696263704801646004585959298743677879606168187061990204432200")+x*(-BigFloat("0.130704158939785761928008749242671025181542078105370084716141350308119418619652583986015464395882363802104154017741656168641240436089858504560718773026275797")+x*(BigFloat("0.160890753325112844190519489594363387594505844658437718135952967735294789599989664428071656484587979507034160383271974554122934842441540146372016567834062876")+x*(-BigFloat("0.092277030213334350126864106458600575084335085690780082222880945224248438672595248111704471182201673989215223667543694847795410779036800385804729955729659506")))) 180*5a02ffc3SAndrew Turnerend 181*5a02ffc3SAndrew Turner 182*5a02ffc3SAndrew Turnerprint(" 183*5a02ffc3SAndrew Turner/* Coefficients of the polynomial used in the tgamma_central() subroutine 184*5a02ffc3SAndrew Turner * for computing gamma on the interval [1,min_x] */ 185*5a02ffc3SAndrew Turner") 186*5a02ffc3SAndrew TurnerN, D, E, X = Remez.ratfn_minimax( 187*5a02ffc3SAndrew Turner x -> x < BigFloat(0x1p-64) ? taylor_bodge(-x) : 188*5a02ffc3SAndrew Turner (SpecialFunctions.gamma(min_x - x) - min_y) / (x*x), 189*5a02ffc3SAndrew Turner (0, min_x - 1), 190*5a02ffc3SAndrew Turner 31, 0, 191*5a02ffc3SAndrew Turner (x, y) -> x^2, 192*5a02ffc3SAndrew Turner) 193*5a02ffc3SAndrew Turnerdumparray(N, "coeffs_central_neg") 194*5a02ffc3SAndrew Turner 195*5a02ffc3SAndrew Turnerprint(" 196*5a02ffc3SAndrew Turner/* Coefficients of the polynomial used in the tgamma_central() subroutine 197*5a02ffc3SAndrew Turner * for computing gamma on the interval [min_x,2] */ 198*5a02ffc3SAndrew Turner") 199*5a02ffc3SAndrew TurnerN, D, E, X = Remez.ratfn_minimax( 200*5a02ffc3SAndrew Turner x -> x < BigFloat(0x1p-64) ? taylor_bodge(x) : 201*5a02ffc3SAndrew Turner (SpecialFunctions.gamma(min_x + x) - min_y) / (x*x), 202*5a02ffc3SAndrew Turner (0, 2 - min_x), 203*5a02ffc3SAndrew Turner 28, 0, 204*5a02ffc3SAndrew Turner (x, y) -> x^2, 205*5a02ffc3SAndrew Turner) 206*5a02ffc3SAndrew Turnerdumparray(N, "coeffs_central_pos") 207*5a02ffc3SAndrew Turner 208*5a02ffc3SAndrew Turnerprint(" 209*5a02ffc3SAndrew Turner/* 128-bit float value of pi, used by the sin_pi_x_over_pi subroutine 210*5a02ffc3SAndrew Turner */ 211*5a02ffc3SAndrew Turner") 212*5a02ffc3SAndrew Turnerprintln("static const long double pi = ", quadhex(BigFloat(pi)), ";") 213