1*05a0b428SJohn Marino /*-
2*05a0b428SJohn Marino * ====================================================
3*05a0b428SJohn Marino * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
4*05a0b428SJohn Marino * Copyright (c) 2009-2011, Bruce D. Evans, Steven G. Kargl, David Schultz.
5*05a0b428SJohn Marino *
6*05a0b428SJohn Marino * Developed at SunPro, a Sun Microsystems, Inc. business.
7*05a0b428SJohn Marino * Permission to use, copy, modify, and distribute this
8*05a0b428SJohn Marino * software is freely granted, provided that this notice
9*05a0b428SJohn Marino * is preserved.
10*05a0b428SJohn Marino * ====================================================
11*05a0b428SJohn Marino *
12*05a0b428SJohn Marino * The argument reduction and testing for exceptional cases was
13*05a0b428SJohn Marino * written by Steven G. Kargl with input from Bruce D. Evans
14*05a0b428SJohn Marino * and David A. Schultz.
15*05a0b428SJohn Marino */
16*05a0b428SJohn Marino
17*05a0b428SJohn Marino #include <float.h>
18*05a0b428SJohn Marino #include <ieeefp.h>
19*05a0b428SJohn Marino #include <math.h>
20*05a0b428SJohn Marino
21*05a0b428SJohn Marino #include "math_private.h"
22*05a0b428SJohn Marino
23*05a0b428SJohn Marino #define BIAS (LDBL_MAX_EXP - 1)
24*05a0b428SJohn Marino
25*05a0b428SJohn Marino static const unsigned
26*05a0b428SJohn Marino B1 = 709958130; /* B1 = (127-127.0/3-0.03306235651)*2**23 */
27*05a0b428SJohn Marino
28*05a0b428SJohn Marino long double
cbrtl(long double x)29*05a0b428SJohn Marino cbrtl(long double x)
30*05a0b428SJohn Marino {
31*05a0b428SJohn Marino long double v, r, s, t, w;
32*05a0b428SJohn Marino double dr, dt, dx;
33*05a0b428SJohn Marino float ft, fx;
34*05a0b428SJohn Marino uint32_t hx, lx;
35*05a0b428SJohn Marino uint16_t expsign, es;
36*05a0b428SJohn Marino int k;
37*05a0b428SJohn Marino volatile double vd1, vd2;
38*05a0b428SJohn Marino
39*05a0b428SJohn Marino GET_LDOUBLE_EXP(expsign,x);
40*05a0b428SJohn Marino k = expsign & 0x7fff;
41*05a0b428SJohn Marino
42*05a0b428SJohn Marino /*
43*05a0b428SJohn Marino * If x = +-Inf, then cbrt(x) = +-Inf.
44*05a0b428SJohn Marino * If x = NaN, then cbrt(x) = NaN.
45*05a0b428SJohn Marino */
46*05a0b428SJohn Marino if (k == BIAS + LDBL_MAX_EXP)
47*05a0b428SJohn Marino return (x + x);
48*05a0b428SJohn Marino
49*05a0b428SJohn Marino if (k == 0) {
50*05a0b428SJohn Marino /* If x = +-0, then cbrt(x) = +-0. */
51*05a0b428SJohn Marino GET_LDOUBLE_WORDS(es,hx,lx,x);
52*05a0b428SJohn Marino if ((hx|lx) == 0) {
53*05a0b428SJohn Marino return (x);
54*05a0b428SJohn Marino }
55*05a0b428SJohn Marino /* Adjust subnormal numbers. */
56*05a0b428SJohn Marino x *= 0x1.0p514;
57*05a0b428SJohn Marino GET_LDOUBLE_EXP(k,x);
58*05a0b428SJohn Marino k &= 0x7fff;
59*05a0b428SJohn Marino k -= BIAS + 514;
60*05a0b428SJohn Marino } else
61*05a0b428SJohn Marino k -= BIAS;
62*05a0b428SJohn Marino SET_LDOUBLE_EXP(x,BIAS);
63*05a0b428SJohn Marino v = 1;
64*05a0b428SJohn Marino
65*05a0b428SJohn Marino switch (k % 3) {
66*05a0b428SJohn Marino case 1:
67*05a0b428SJohn Marino case -2:
68*05a0b428SJohn Marino x = 2*x;
69*05a0b428SJohn Marino k--;
70*05a0b428SJohn Marino break;
71*05a0b428SJohn Marino case 2:
72*05a0b428SJohn Marino case -1:
73*05a0b428SJohn Marino x = 4*x;
74*05a0b428SJohn Marino k -= 2;
75*05a0b428SJohn Marino break;
76*05a0b428SJohn Marino }
77*05a0b428SJohn Marino SET_LDOUBLE_EXP(v, (expsign & 0x8000) | (BIAS + k / 3));
78*05a0b428SJohn Marino
79*05a0b428SJohn Marino /*
80*05a0b428SJohn Marino * The following is the guts of s_cbrtf, with the handling of
81*05a0b428SJohn Marino * special values removed and extra care for accuracy not taken,
82*05a0b428SJohn Marino * but with most of the extra accuracy not discarded.
83*05a0b428SJohn Marino */
84*05a0b428SJohn Marino
85*05a0b428SJohn Marino /* ~5-bit estimate: */
86*05a0b428SJohn Marino fx = x;
87*05a0b428SJohn Marino GET_FLOAT_WORD(hx, fx);
88*05a0b428SJohn Marino SET_FLOAT_WORD(ft, ((hx & 0x7fffffff) / 3 + B1));
89*05a0b428SJohn Marino
90*05a0b428SJohn Marino /* ~16-bit estimate: */
91*05a0b428SJohn Marino dx = x;
92*05a0b428SJohn Marino dt = ft;
93*05a0b428SJohn Marino dr = dt * dt * dt;
94*05a0b428SJohn Marino dt = dt * (dx + dx + dr) / (dx + dr + dr);
95*05a0b428SJohn Marino
96*05a0b428SJohn Marino /* ~47-bit estimate: */
97*05a0b428SJohn Marino dr = dt * dt * dt;
98*05a0b428SJohn Marino dt = dt * (dx + dx + dr) / (dx + dr + dr);
99*05a0b428SJohn Marino
100*05a0b428SJohn Marino /*
101*05a0b428SJohn Marino * dt is cbrtl(x) to ~47 bits (after x has been reduced to 1 <= x < 8).
102*05a0b428SJohn Marino * Round it away from zero to 32 bits (32 so that t*t is exact, and
103*05a0b428SJohn Marino * away from zero for technical reasons).
104*05a0b428SJohn Marino */
105*05a0b428SJohn Marino vd2 = 0x1.0p32;
106*05a0b428SJohn Marino vd1 = 0x1.0p-31;
107*05a0b428SJohn Marino #define vd ((long double)vd2 + vd1)
108*05a0b428SJohn Marino
109*05a0b428SJohn Marino t = dt + vd - 0x1.0p32;
110*05a0b428SJohn Marino
111*05a0b428SJohn Marino /*
112*05a0b428SJohn Marino * Final step Newton iteration to 64 or 113 bits with
113*05a0b428SJohn Marino * error < 0.667 ulps
114*05a0b428SJohn Marino */
115*05a0b428SJohn Marino s=t*t; /* t*t is exact */
116*05a0b428SJohn Marino r=x/s; /* error <= 0.5 ulps; |r| < |t| */
117*05a0b428SJohn Marino w=t+t; /* t+t is exact */
118*05a0b428SJohn Marino r=(r-t)/(w+r); /* r-t is exact; w+r ~= 3*t */
119*05a0b428SJohn Marino t=t+t*r; /* error <= 0.5 + 0.5/3 + epsilon */
120*05a0b428SJohn Marino
121*05a0b428SJohn Marino t *= v;
122*05a0b428SJohn Marino return (t);
123*05a0b428SJohn Marino }
124