xref: /freebsd-src/contrib/arm-optimized-routines/math/tools/tgamma128_gen.jl (revision 5a02ffc32e777041dd2dad4e651ed2a0865a0a5d)
1*5a02ffc3SAndrew Turner# -*- julia -*-
2*5a02ffc3SAndrew Turner#
3*5a02ffc3SAndrew Turner# Generate tgamma128.h, containing polynomials and constants used by
4*5a02ffc3SAndrew Turner# tgamma128.c.
5*5a02ffc3SAndrew Turner#
6*5a02ffc3SAndrew Turner# Copyright (c) 2006-2023, Arm Limited.
7*5a02ffc3SAndrew Turner# SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
8*5a02ffc3SAndrew Turner
9*5a02ffc3SAndrew Turner# This Julia program depends on the 'Remez' and 'SpecialFunctions'
10*5a02ffc3SAndrew Turner# library packages. To install them, run this at the interactive Julia
11*5a02ffc3SAndrew Turner# prompt:
12*5a02ffc3SAndrew Turner#
13*5a02ffc3SAndrew Turner#   import Pkg; Pkg.add(["Remez", "SpecialFunctions"])
14*5a02ffc3SAndrew Turner#
15*5a02ffc3SAndrew Turner# Tested on Julia 1.4.1 (Ubuntu 20.04) and 1.9.0 (22.04).
16*5a02ffc3SAndrew Turner
17*5a02ffc3SAndrew Turnerimport Printf
18*5a02ffc3SAndrew Turnerimport Remez
19*5a02ffc3SAndrew Turnerimport SpecialFunctions
20*5a02ffc3SAndrew Turner
21*5a02ffc3SAndrew Turner# Round a BigFloat to 128-bit long double and format it as a C99 hex
22*5a02ffc3SAndrew Turner# float literal.
23*5a02ffc3SAndrew Turnerfunction quadhex(x)
24*5a02ffc3SAndrew Turner    sign = " "
25*5a02ffc3SAndrew Turner    if x < 0
26*5a02ffc3SAndrew Turner        sign = "-"
27*5a02ffc3SAndrew Turner        x = -x
28*5a02ffc3SAndrew Turner    end
29*5a02ffc3SAndrew Turner
30*5a02ffc3SAndrew Turner    exponent = BigInt(floor(log2(x)))
31*5a02ffc3SAndrew Turner    exponent = max(exponent, -16382)
32*5a02ffc3SAndrew Turner    @assert(exponent <= 16383) # else overflow
33*5a02ffc3SAndrew Turner
34*5a02ffc3SAndrew Turner    x /= BigFloat(2)^exponent
35*5a02ffc3SAndrew Turner    @assert(1 <= x < 2)
36*5a02ffc3SAndrew Turner    x *= BigFloat(2)^112
37*5a02ffc3SAndrew Turner    mantissa = BigInt(round(x))
38*5a02ffc3SAndrew Turner
39*5a02ffc3SAndrew Turner    mantstr = string(mantissa, base=16, pad=29)
40*5a02ffc3SAndrew Turner    return Printf.@sprintf("%s0x%s.%sp%+dL", sign, mantstr[1], mantstr[2:end],
41*5a02ffc3SAndrew Turner                           exponent)
42*5a02ffc3SAndrew Turnerend
43*5a02ffc3SAndrew Turner
44*5a02ffc3SAndrew Turner# Round a BigFloat to 128-bit long double and return it still as a
45*5a02ffc3SAndrew Turner# BigFloat.
46*5a02ffc3SAndrew Turnerfunction quadval(x, round=0)
47*5a02ffc3SAndrew Turner    sign = +1
48*5a02ffc3SAndrew Turner    if x.sign < 0
49*5a02ffc3SAndrew Turner        sign = -1
50*5a02ffc3SAndrew Turner        x = -x
51*5a02ffc3SAndrew Turner    end
52*5a02ffc3SAndrew Turner
53*5a02ffc3SAndrew Turner    exponent = BigInt(floor(log2(x)))
54*5a02ffc3SAndrew Turner    exponent = max(exponent, -16382)
55*5a02ffc3SAndrew Turner    @assert(exponent <= 16383) # else overflow
56*5a02ffc3SAndrew Turner
57*5a02ffc3SAndrew Turner    x /= BigFloat(2)^exponent
58*5a02ffc3SAndrew Turner    @assert(1 <= x < 2)
59*5a02ffc3SAndrew Turner    x *= BigFloat(2)^112
60*5a02ffc3SAndrew Turner    if round < 0
61*5a02ffc3SAndrew Turner        mantissa = floor(x)
62*5a02ffc3SAndrew Turner    elseif round > 0
63*5a02ffc3SAndrew Turner        mantissa = ceil(x)
64*5a02ffc3SAndrew Turner    else
65*5a02ffc3SAndrew Turner        mantissa = round(x)
66*5a02ffc3SAndrew Turner    end
67*5a02ffc3SAndrew Turner
68*5a02ffc3SAndrew Turner    return sign * mantissa * BigFloat(2)^(exponent - 112)
69*5a02ffc3SAndrew Turnerend
70*5a02ffc3SAndrew Turner
71*5a02ffc3SAndrew Turner# Output an array of BigFloats as a C array declaration.
72*5a02ffc3SAndrew Turnerfunction dumparray(a, name)
73*5a02ffc3SAndrew Turner    println("static const long double ", name, "[] = {")
74*5a02ffc3SAndrew Turner    for x in N
75*5a02ffc3SAndrew Turner        println("    ", quadhex(x), ",")
76*5a02ffc3SAndrew Turner    end
77*5a02ffc3SAndrew Turner    println("};")
78*5a02ffc3SAndrew Turnerend
79*5a02ffc3SAndrew Turner
80*5a02ffc3SAndrew Turnerprint("/*
81*5a02ffc3SAndrew Turner * Polynomial coefficients and other constants for tgamma128.c.
82*5a02ffc3SAndrew Turner *
83*5a02ffc3SAndrew Turner * Copyright (c) 2006,2009,2023 Arm Limited.
84*5a02ffc3SAndrew Turner * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
85*5a02ffc3SAndrew Turner */
86*5a02ffc3SAndrew Turner")
87*5a02ffc3SAndrew Turner
88*5a02ffc3SAndrew TurnerBase.MPFR.setprecision(512)
89*5a02ffc3SAndrew Turner
90*5a02ffc3SAndrew Turnere = exp(BigFloat(1))
91*5a02ffc3SAndrew Turner
92*5a02ffc3SAndrew Turnerprint("
93*5a02ffc3SAndrew Turner/* The largest positive value for which 128-bit tgamma does not overflow. */
94*5a02ffc3SAndrew Turner")
95*5a02ffc3SAndrew Turnerlo = BigFloat("1000")
96*5a02ffc3SAndrew Turnerhi = BigFloat("2000")
97*5a02ffc3SAndrew Turnerwhile true
98*5a02ffc3SAndrew Turner    global lo
99*5a02ffc3SAndrew Turner    global hi
100*5a02ffc3SAndrew Turner    global max_x
101*5a02ffc3SAndrew Turner
102*5a02ffc3SAndrew Turner    mid = (lo + hi) / 2
103*5a02ffc3SAndrew Turner    if mid == lo || mid == hi
104*5a02ffc3SAndrew Turner        max_x = mid
105*5a02ffc3SAndrew Turner        break
106*5a02ffc3SAndrew Turner    end
107*5a02ffc3SAndrew Turner    if SpecialFunctions.logabsgamma(mid)[1] < 16384 * log(BigFloat(2))
108*5a02ffc3SAndrew Turner        lo = mid
109*5a02ffc3SAndrew Turner    else
110*5a02ffc3SAndrew Turner        hi = mid
111*5a02ffc3SAndrew Turner    end
112*5a02ffc3SAndrew Turnerend
113*5a02ffc3SAndrew Turnermax_x = quadval(max_x, -1)
114*5a02ffc3SAndrew Turnerprintln("static const long double max_x = ", quadhex(max_x), ";")
115*5a02ffc3SAndrew Turner
116*5a02ffc3SAndrew Turnerprint("
117*5a02ffc3SAndrew Turner/* Coefficients of the polynomial used in the tgamma_large() subroutine */
118*5a02ffc3SAndrew Turner")
119*5a02ffc3SAndrew TurnerN, D, E, X = Remez.ratfn_minimax(
120*5a02ffc3SAndrew Turner    x -> x==0 ? sqrt(BigFloat(2)*pi/e) :
121*5a02ffc3SAndrew Turner                exp(SpecialFunctions.logabsgamma(1/x)[1] +
122*5a02ffc3SAndrew Turner                    (1/x-0.5)*(1+log(x))),
123*5a02ffc3SAndrew Turner    (0, 1/BigFloat(8)),
124*5a02ffc3SAndrew Turner    24, 0,
125*5a02ffc3SAndrew Turner    (x, y) -> 1/y
126*5a02ffc3SAndrew Turner)
127*5a02ffc3SAndrew Turnerdumparray(N, "coeffs_large")
128*5a02ffc3SAndrew Turner
129*5a02ffc3SAndrew Turnerprint("
130*5a02ffc3SAndrew Turner/* Coefficients of the polynomial used in the tgamma_tiny() subroutine */
131*5a02ffc3SAndrew Turner")
132*5a02ffc3SAndrew TurnerN, D, E, X = Remez.ratfn_minimax(
133*5a02ffc3SAndrew Turner    x -> x==0 ? 1 : 1/(x*SpecialFunctions.gamma(x)),
134*5a02ffc3SAndrew Turner    (0, 1/BigFloat(32)),
135*5a02ffc3SAndrew Turner    13, 0,
136*5a02ffc3SAndrew Turner)
137*5a02ffc3SAndrew Turnerdumparray(N, "coeffs_tiny")
138*5a02ffc3SAndrew Turner
139*5a02ffc3SAndrew Turnerprint("
140*5a02ffc3SAndrew Turner/* The location within the interval [1,2] where gamma has a minimum.
141*5a02ffc3SAndrew Turner * Specified as the sum of two 128-bit values, for extra precision. */
142*5a02ffc3SAndrew Turner")
143*5a02ffc3SAndrew Turnerlo = BigFloat("1.4")
144*5a02ffc3SAndrew Turnerhi = BigFloat("1.5")
145*5a02ffc3SAndrew Turnerwhile true
146*5a02ffc3SAndrew Turner    global lo
147*5a02ffc3SAndrew Turner    global hi
148*5a02ffc3SAndrew Turner    global min_x
149*5a02ffc3SAndrew Turner
150*5a02ffc3SAndrew Turner    mid = (lo + hi) / 2
151*5a02ffc3SAndrew Turner    if mid == lo || mid == hi
152*5a02ffc3SAndrew Turner        min_x = mid
153*5a02ffc3SAndrew Turner        break
154*5a02ffc3SAndrew Turner    end
155*5a02ffc3SAndrew Turner    if SpecialFunctions.digamma(mid) < 0
156*5a02ffc3SAndrew Turner        lo = mid
157*5a02ffc3SAndrew Turner    else
158*5a02ffc3SAndrew Turner        hi = mid
159*5a02ffc3SAndrew Turner    end
160*5a02ffc3SAndrew Turnerend
161*5a02ffc3SAndrew Turnermin_x_hi = quadval(min_x, -1)
162*5a02ffc3SAndrew Turnerprintln("static const long double min_x_hi = ", quadhex(min_x_hi), ";")
163*5a02ffc3SAndrew Turnerprintln("static const long double min_x_lo = ", quadhex(min_x - min_x_hi), ";")
164*5a02ffc3SAndrew Turner
165*5a02ffc3SAndrew Turnerprint("
166*5a02ffc3SAndrew Turner/* The actual minimum value that gamma takes at that location.
167*5a02ffc3SAndrew Turner * Again specified as the sum of two 128-bit values. */
168*5a02ffc3SAndrew Turner")
169*5a02ffc3SAndrew Turnermin_y = SpecialFunctions.gamma(min_x)
170*5a02ffc3SAndrew Turnermin_y_hi = quadval(min_y, -1)
171*5a02ffc3SAndrew Turnerprintln("static const long double min_y_hi = ", quadhex(min_y_hi), ";")
172*5a02ffc3SAndrew Turnerprintln("static const long double min_y_lo = ", quadhex(min_y - min_y_hi), ";")
173*5a02ffc3SAndrew Turner
174*5a02ffc3SAndrew Turnerfunction taylor_bodge(x)
175*5a02ffc3SAndrew Turner    # Taylor series generated by Wolfram Alpha for (gamma(min_x+x)-min_y)/x^2.
176*5a02ffc3SAndrew Turner    # Used in the Remez calls below for x values very near the origin, to avoid
177*5a02ffc3SAndrew Turner    # significance loss problems when trying to compute it directly via that
178*5a02ffc3SAndrew Turner    # formula (even in MPFR's extra precision).
179*5a02ffc3SAndrew Turner    return BigFloat("0.428486815855585429730209907810650582960483696962660010556335457558784421896667728014324097132413696263704801646004585959298743677879606168187061990204432200")+x*(-BigFloat("0.130704158939785761928008749242671025181542078105370084716141350308119418619652583986015464395882363802104154017741656168641240436089858504560718773026275797")+x*(BigFloat("0.160890753325112844190519489594363387594505844658437718135952967735294789599989664428071656484587979507034160383271974554122934842441540146372016567834062876")+x*(-BigFloat("0.092277030213334350126864106458600575084335085690780082222880945224248438672595248111704471182201673989215223667543694847795410779036800385804729955729659506"))))
180*5a02ffc3SAndrew Turnerend
181*5a02ffc3SAndrew Turner
182*5a02ffc3SAndrew Turnerprint("
183*5a02ffc3SAndrew Turner/* Coefficients of the polynomial used in the tgamma_central() subroutine
184*5a02ffc3SAndrew Turner * for computing gamma on the interval [1,min_x] */
185*5a02ffc3SAndrew Turner")
186*5a02ffc3SAndrew TurnerN, D, E, X = Remez.ratfn_minimax(
187*5a02ffc3SAndrew Turner    x -> x < BigFloat(0x1p-64) ? taylor_bodge(-x) :
188*5a02ffc3SAndrew Turner        (SpecialFunctions.gamma(min_x - x) - min_y) / (x*x),
189*5a02ffc3SAndrew Turner    (0, min_x - 1),
190*5a02ffc3SAndrew Turner    31, 0,
191*5a02ffc3SAndrew Turner    (x, y) -> x^2,
192*5a02ffc3SAndrew Turner)
193*5a02ffc3SAndrew Turnerdumparray(N, "coeffs_central_neg")
194*5a02ffc3SAndrew Turner
195*5a02ffc3SAndrew Turnerprint("
196*5a02ffc3SAndrew Turner/* Coefficients of the polynomial used in the tgamma_central() subroutine
197*5a02ffc3SAndrew Turner * for computing gamma on the interval [min_x,2] */
198*5a02ffc3SAndrew Turner")
199*5a02ffc3SAndrew TurnerN, D, E, X = Remez.ratfn_minimax(
200*5a02ffc3SAndrew Turner    x -> x < BigFloat(0x1p-64) ? taylor_bodge(x) :
201*5a02ffc3SAndrew Turner        (SpecialFunctions.gamma(min_x + x) - min_y) / (x*x),
202*5a02ffc3SAndrew Turner    (0, 2 - min_x),
203*5a02ffc3SAndrew Turner    28, 0,
204*5a02ffc3SAndrew Turner    (x, y) -> x^2,
205*5a02ffc3SAndrew Turner)
206*5a02ffc3SAndrew Turnerdumparray(N, "coeffs_central_pos")
207*5a02ffc3SAndrew Turner
208*5a02ffc3SAndrew Turnerprint("
209*5a02ffc3SAndrew Turner/* 128-bit float value of pi, used by the sin_pi_x_over_pi subroutine
210*5a02ffc3SAndrew Turner */
211*5a02ffc3SAndrew Turner")
212*5a02ffc3SAndrew Turnerprintln("static const long double pi = ", quadhex(BigFloat(pi)), ";")
213