12fe8fb19SBen Gras /*-
22fe8fb19SBen Gras * Copyright (c) 2005 David Schultz <das@FreeBSD.ORG>
32fe8fb19SBen Gras * All rights reserved.
42fe8fb19SBen Gras *
52fe8fb19SBen Gras * Redistribution and use in source and binary forms, with or without
62fe8fb19SBen Gras * modification, are permitted provided that the following conditions
72fe8fb19SBen Gras * are met:
82fe8fb19SBen Gras * 1. Redistributions of source code must retain the above copyright
92fe8fb19SBen Gras * notice, this list of conditions and the following disclaimer.
102fe8fb19SBen Gras * 2. Redistributions in binary form must reproduce the above copyright
112fe8fb19SBen Gras * notice, this list of conditions and the following disclaimer in the
122fe8fb19SBen Gras * documentation and/or other materials provided with the distribution.
132fe8fb19SBen Gras *
142fe8fb19SBen Gras * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
152fe8fb19SBen Gras * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
162fe8fb19SBen Gras * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
172fe8fb19SBen Gras * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
182fe8fb19SBen Gras * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
192fe8fb19SBen Gras * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
202fe8fb19SBen Gras * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
212fe8fb19SBen Gras * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
222fe8fb19SBen Gras * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
232fe8fb19SBen Gras * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
242fe8fb19SBen Gras * SUCH DAMAGE.
252fe8fb19SBen Gras */
262fe8fb19SBen Gras
272fe8fb19SBen Gras #include <sys/cdefs.h>
28*0a6a1f1dSLionel Sambuc __RCSID("$NetBSD: s_exp2f.c,v 1.2 2014/03/16 22:30:43 dsl Exp $");
292fe8fb19SBen Gras #ifdef __FBSDID
302fe8fb19SBen Gras __FBSDID("$FreeBSD: src/lib/msun/src/s_exp2f.c,v 1.9 2008/02/22 02:27:34 das Exp $");
312fe8fb19SBen Gras #endif
322fe8fb19SBen Gras
332fe8fb19SBen Gras #include <float.h>
342fe8fb19SBen Gras
352fe8fb19SBen Gras #include "math.h"
362fe8fb19SBen Gras #include "math_private.h"
372fe8fb19SBen Gras
382fe8fb19SBen Gras #define TBLBITS 4
392fe8fb19SBen Gras #define TBLSIZE (1 << TBLBITS)
402fe8fb19SBen Gras
412fe8fb19SBen Gras static const float
422fe8fb19SBen Gras redux = 0x1.8p23f / TBLSIZE,
432fe8fb19SBen Gras P1 = 0x1.62e430p-1f,
442fe8fb19SBen Gras P2 = 0x1.ebfbe0p-3f,
452fe8fb19SBen Gras P3 = 0x1.c6b348p-5f,
462fe8fb19SBen Gras P4 = 0x1.3b2c9cp-7f;
472fe8fb19SBen Gras
48*0a6a1f1dSLionel Sambuc /*
49*0a6a1f1dSLionel Sambuc * For out of range values we need to generate the appropriate
50*0a6a1f1dSLionel Sambuc * underflow or overflow trap as well as generating infinity or zero.
51*0a6a1f1dSLionel Sambuc * This means we have to get the fpu to execute an instruction that
52*0a6a1f1dSLionel Sambuc * will generate the trap (and not have the compiler optimise it away).
53*0a6a1f1dSLionel Sambuc * This is normally done by calculating 'huge * huge' or 'tiny * tiny'.
54*0a6a1f1dSLionel Sambuc *
55*0a6a1f1dSLionel Sambuc * i386 is particularly problematic.
56*0a6a1f1dSLionel Sambuc * The 'float' result is returned on the x87 stack, so is 'long double'.
57*0a6a1f1dSLionel Sambuc * If we just multiply two 'float' values the caller will see 0x1p+/-200
58*0a6a1f1dSLionel Sambuc * (not 0 or infinity).
59*0a6a1f1dSLionel Sambuc * If we use 'double' the compiler does a store-load which will convert the
60*0a6a1f1dSLionel Sambuc * value and generate the required exception.
61*0a6a1f1dSLionel Sambuc */
62*0a6a1f1dSLionel Sambuc #ifdef __i386__
63*0a6a1f1dSLionel Sambuc static volatile double overflow = 0x1p+1000;
64*0a6a1f1dSLionel Sambuc static volatile double underflow = 0x1p-1000;
65*0a6a1f1dSLionel Sambuc #else
66*0a6a1f1dSLionel Sambuc static volatile float huge = 0x1p+100;
67*0a6a1f1dSLionel Sambuc static volatile float tiny = 0x1p-100;
68*0a6a1f1dSLionel Sambuc #define overflow (huge * huge)
69*0a6a1f1dSLionel Sambuc #define underflow (tiny * tiny)
70*0a6a1f1dSLionel Sambuc #endif
712fe8fb19SBen Gras
722fe8fb19SBen Gras static const double exp2ft[TBLSIZE] = {
732fe8fb19SBen Gras 0x1.6a09e667f3bcdp-1,
742fe8fb19SBen Gras 0x1.7a11473eb0187p-1,
752fe8fb19SBen Gras 0x1.8ace5422aa0dbp-1,
762fe8fb19SBen Gras 0x1.9c49182a3f090p-1,
772fe8fb19SBen Gras 0x1.ae89f995ad3adp-1,
782fe8fb19SBen Gras 0x1.c199bdd85529cp-1,
792fe8fb19SBen Gras 0x1.d5818dcfba487p-1,
802fe8fb19SBen Gras 0x1.ea4afa2a490dap-1,
812fe8fb19SBen Gras 0x1.0000000000000p+0,
822fe8fb19SBen Gras 0x1.0b5586cf9890fp+0,
832fe8fb19SBen Gras 0x1.172b83c7d517bp+0,
842fe8fb19SBen Gras 0x1.2387a6e756238p+0,
852fe8fb19SBen Gras 0x1.306fe0a31b715p+0,
862fe8fb19SBen Gras 0x1.3dea64c123422p+0,
872fe8fb19SBen Gras 0x1.4bfdad5362a27p+0,
882fe8fb19SBen Gras 0x1.5ab07dd485429p+0,
892fe8fb19SBen Gras };
902fe8fb19SBen Gras
912fe8fb19SBen Gras /*
922fe8fb19SBen Gras * exp2f(x): compute the base 2 exponential of x
932fe8fb19SBen Gras *
942fe8fb19SBen Gras * Accuracy: Peak error < 0.501 ulp; location of peak: -0.030110927.
952fe8fb19SBen Gras *
962fe8fb19SBen Gras * Method: (equally-spaced tables)
972fe8fb19SBen Gras *
982fe8fb19SBen Gras * Reduce x:
992fe8fb19SBen Gras * x = 2**k + y, for integer k and |y| <= 1/2.
1002fe8fb19SBen Gras * Thus we have exp2f(x) = 2**k * exp2(y).
1012fe8fb19SBen Gras *
1022fe8fb19SBen Gras * Reduce y:
1032fe8fb19SBen Gras * y = i/TBLSIZE + z for integer i near y * TBLSIZE.
1042fe8fb19SBen Gras * Thus we have exp2(y) = exp2(i/TBLSIZE) * exp2(z),
1052fe8fb19SBen Gras * with |z| <= 2**-(TBLSIZE+1).
1062fe8fb19SBen Gras *
1072fe8fb19SBen Gras * We compute exp2(i/TBLSIZE) via table lookup and exp2(z) via a
1082fe8fb19SBen Gras * degree-4 minimax polynomial with maximum error under 1.4 * 2**-33.
1092fe8fb19SBen Gras * Using double precision for everything except the reduction makes
1102fe8fb19SBen Gras * roundoff error insignificant and simplifies the scaling step.
1112fe8fb19SBen Gras *
1122fe8fb19SBen Gras * This method is due to Tang, but I do not use his suggested parameters:
1132fe8fb19SBen Gras *
1142fe8fb19SBen Gras * Tang, P. Table-driven Implementation of the Exponential Function
1152fe8fb19SBen Gras * in IEEE Floating-Point Arithmetic. TOMS 15(2), 144-157 (1989).
1162fe8fb19SBen Gras */
1172fe8fb19SBen Gras float
exp2f(float x)1182fe8fb19SBen Gras exp2f(float x)
1192fe8fb19SBen Gras {
1202fe8fb19SBen Gras double tv, twopk, u, z;
1212fe8fb19SBen Gras float t;
1222fe8fb19SBen Gras uint32_t hx, ix, i0;
1232fe8fb19SBen Gras int32_t k;
1242fe8fb19SBen Gras
1252fe8fb19SBen Gras /* Filter out exceptional cases. */
1262fe8fb19SBen Gras GET_FLOAT_WORD(hx, x);
1272fe8fb19SBen Gras ix = hx & 0x7fffffff; /* high word of |x| */
1282fe8fb19SBen Gras if(ix >= 0x43000000) { /* |x| >= 128 */
1292fe8fb19SBen Gras if(ix >= 0x7f800000) {
1302fe8fb19SBen Gras if ((ix & 0x7fffff) != 0 || (hx & 0x80000000) == 0)
1312fe8fb19SBen Gras return (x + x); /* x is NaN or +Inf */
1322fe8fb19SBen Gras else
1332fe8fb19SBen Gras return (0.0); /* x is -Inf */
1342fe8fb19SBen Gras }
1352fe8fb19SBen Gras if(x >= 0x1.0p7f)
136*0a6a1f1dSLionel Sambuc return overflow; /* +infinity with overflow */
1372fe8fb19SBen Gras if(x <= -0x1.2cp7f)
138*0a6a1f1dSLionel Sambuc return underflow; /* zero with underflow */
1392fe8fb19SBen Gras } else if (ix <= 0x33000000) { /* |x| <= 0x1p-25 */
1402fe8fb19SBen Gras return (1.0f + x);
1412fe8fb19SBen Gras }
1422fe8fb19SBen Gras
1432fe8fb19SBen Gras /* Reduce x, computing z, i0, and k. */
1442fe8fb19SBen Gras STRICT_ASSIGN(float, t, x + redux);
1452fe8fb19SBen Gras GET_FLOAT_WORD(i0, t);
1462fe8fb19SBen Gras i0 += TBLSIZE / 2;
1472fe8fb19SBen Gras k = (i0 >> TBLBITS) << 20;
1482fe8fb19SBen Gras i0 &= TBLSIZE - 1;
1492fe8fb19SBen Gras t -= redux;
1502fe8fb19SBen Gras z = x - t;
1512fe8fb19SBen Gras INSERT_WORDS(twopk, 0x3ff00000 + k, 0);
1522fe8fb19SBen Gras
1532fe8fb19SBen Gras /* Compute r = exp2(y) = exp2ft[i0] * p(z). */
1542fe8fb19SBen Gras tv = exp2ft[i0];
1552fe8fb19SBen Gras u = tv * z;
1562fe8fb19SBen Gras tv = tv + u * (P1 + z * P2) + u * (z * z) * (P3 + z * P4);
1572fe8fb19SBen Gras
1582fe8fb19SBen Gras /* Scale by 2**(k>>20). */
1592fe8fb19SBen Gras return (tv * twopk);
1602fe8fb19SBen Gras }
161