xref: /dflybsd-src/contrib/openbsd_libm/src/ld80/s_cbrtl.c (revision 4382f29d99a100bd77a81697c2f699c11f6a472a)
1*05a0b428SJohn Marino /*-
2*05a0b428SJohn Marino  * ====================================================
3*05a0b428SJohn Marino  * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
4*05a0b428SJohn Marino  * Copyright (c) 2009-2011, Bruce D. Evans, Steven G. Kargl, David Schultz.
5*05a0b428SJohn Marino  *
6*05a0b428SJohn Marino  * Developed at SunPro, a Sun Microsystems, Inc. business.
7*05a0b428SJohn Marino  * Permission to use, copy, modify, and distribute this
8*05a0b428SJohn Marino  * software is freely granted, provided that this notice
9*05a0b428SJohn Marino  * is preserved.
10*05a0b428SJohn Marino  * ====================================================
11*05a0b428SJohn Marino  *
12*05a0b428SJohn Marino  * The argument reduction and testing for exceptional cases was
13*05a0b428SJohn Marino  * written by Steven G. Kargl with input from Bruce D. Evans
14*05a0b428SJohn Marino  * and David A. Schultz.
15*05a0b428SJohn Marino  */
16*05a0b428SJohn Marino 
17*05a0b428SJohn Marino #include <float.h>
18*05a0b428SJohn Marino #include <ieeefp.h>
19*05a0b428SJohn Marino #include <math.h>
20*05a0b428SJohn Marino 
21*05a0b428SJohn Marino #include "math_private.h"
22*05a0b428SJohn Marino 
23*05a0b428SJohn Marino #define	BIAS	(LDBL_MAX_EXP - 1)
24*05a0b428SJohn Marino 
25*05a0b428SJohn Marino static const unsigned
26*05a0b428SJohn Marino     B1 = 709958130;	/* B1 = (127-127.0/3-0.03306235651)*2**23 */
27*05a0b428SJohn Marino 
28*05a0b428SJohn Marino long double
cbrtl(long double x)29*05a0b428SJohn Marino cbrtl(long double x)
30*05a0b428SJohn Marino {
31*05a0b428SJohn Marino 	long double v, r, s, t, w;
32*05a0b428SJohn Marino 	double dr, dt, dx;
33*05a0b428SJohn Marino 	float ft, fx;
34*05a0b428SJohn Marino 	uint32_t hx, lx;
35*05a0b428SJohn Marino 	uint16_t expsign, es;
36*05a0b428SJohn Marino 	int k;
37*05a0b428SJohn Marino 	volatile double vd1, vd2;
38*05a0b428SJohn Marino 
39*05a0b428SJohn Marino 	GET_LDOUBLE_EXP(expsign,x);
40*05a0b428SJohn Marino 	k = expsign & 0x7fff;
41*05a0b428SJohn Marino 
42*05a0b428SJohn Marino 	/*
43*05a0b428SJohn Marino 	 * If x = +-Inf, then cbrt(x) = +-Inf.
44*05a0b428SJohn Marino 	 * If x = NaN, then cbrt(x) = NaN.
45*05a0b428SJohn Marino 	 */
46*05a0b428SJohn Marino 	if (k == BIAS + LDBL_MAX_EXP)
47*05a0b428SJohn Marino 		return (x + x);
48*05a0b428SJohn Marino 
49*05a0b428SJohn Marino 	if (k == 0) {
50*05a0b428SJohn Marino 		/* If x = +-0, then cbrt(x) = +-0. */
51*05a0b428SJohn Marino 		GET_LDOUBLE_WORDS(es,hx,lx,x);
52*05a0b428SJohn Marino 		if ((hx|lx) == 0) {
53*05a0b428SJohn Marino 			return (x);
54*05a0b428SJohn Marino 		}
55*05a0b428SJohn Marino 		/* Adjust subnormal numbers. */
56*05a0b428SJohn Marino 		x *= 0x1.0p514;
57*05a0b428SJohn Marino 		GET_LDOUBLE_EXP(k,x);
58*05a0b428SJohn Marino 		k &= 0x7fff;
59*05a0b428SJohn Marino 		k -= BIAS + 514;
60*05a0b428SJohn Marino 	} else
61*05a0b428SJohn Marino 		k -= BIAS;
62*05a0b428SJohn Marino 	SET_LDOUBLE_EXP(x,BIAS);
63*05a0b428SJohn Marino 	v = 1;
64*05a0b428SJohn Marino 
65*05a0b428SJohn Marino 	switch (k % 3) {
66*05a0b428SJohn Marino 	case 1:
67*05a0b428SJohn Marino 	case -2:
68*05a0b428SJohn Marino 		x = 2*x;
69*05a0b428SJohn Marino 		k--;
70*05a0b428SJohn Marino 		break;
71*05a0b428SJohn Marino 	case 2:
72*05a0b428SJohn Marino 	case -1:
73*05a0b428SJohn Marino 		x = 4*x;
74*05a0b428SJohn Marino 		k -= 2;
75*05a0b428SJohn Marino 		break;
76*05a0b428SJohn Marino 	}
77*05a0b428SJohn Marino 	SET_LDOUBLE_EXP(v, (expsign & 0x8000) | (BIAS + k / 3));
78*05a0b428SJohn Marino 
79*05a0b428SJohn Marino 	/*
80*05a0b428SJohn Marino 	 * The following is the guts of s_cbrtf, with the handling of
81*05a0b428SJohn Marino 	 * special values removed and extra care for accuracy not taken,
82*05a0b428SJohn Marino 	 * but with most of the extra accuracy not discarded.
83*05a0b428SJohn Marino 	 */
84*05a0b428SJohn Marino 
85*05a0b428SJohn Marino 	/* ~5-bit estimate: */
86*05a0b428SJohn Marino 	fx = x;
87*05a0b428SJohn Marino 	GET_FLOAT_WORD(hx, fx);
88*05a0b428SJohn Marino 	SET_FLOAT_WORD(ft, ((hx & 0x7fffffff) / 3 + B1));
89*05a0b428SJohn Marino 
90*05a0b428SJohn Marino 	/* ~16-bit estimate: */
91*05a0b428SJohn Marino 	dx = x;
92*05a0b428SJohn Marino 	dt = ft;
93*05a0b428SJohn Marino 	dr = dt * dt * dt;
94*05a0b428SJohn Marino 	dt = dt * (dx + dx + dr) / (dx + dr + dr);
95*05a0b428SJohn Marino 
96*05a0b428SJohn Marino 	/* ~47-bit estimate: */
97*05a0b428SJohn Marino 	dr = dt * dt * dt;
98*05a0b428SJohn Marino 	dt = dt * (dx + dx + dr) / (dx + dr + dr);
99*05a0b428SJohn Marino 
100*05a0b428SJohn Marino 	/*
101*05a0b428SJohn Marino 	 * dt is cbrtl(x) to ~47 bits (after x has been reduced to 1 <= x < 8).
102*05a0b428SJohn Marino 	 * Round it away from zero to 32 bits (32 so that t*t is exact, and
103*05a0b428SJohn Marino 	 * away from zero for technical reasons).
104*05a0b428SJohn Marino 	 */
105*05a0b428SJohn Marino 	vd2 = 0x1.0p32;
106*05a0b428SJohn Marino 	vd1 = 0x1.0p-31;
107*05a0b428SJohn Marino 	#define vd ((long double)vd2 + vd1)
108*05a0b428SJohn Marino 
109*05a0b428SJohn Marino 	t = dt + vd - 0x1.0p32;
110*05a0b428SJohn Marino 
111*05a0b428SJohn Marino 	/*
112*05a0b428SJohn Marino 	 * Final step Newton iteration to 64 or 113 bits with
113*05a0b428SJohn Marino 	 * error < 0.667 ulps
114*05a0b428SJohn Marino 	 */
115*05a0b428SJohn Marino 	s=t*t;				/* t*t is exact */
116*05a0b428SJohn Marino 	r=x/s;				/* error <= 0.5 ulps; |r| < |t| */
117*05a0b428SJohn Marino 	w=t+t;				/* t+t is exact */
118*05a0b428SJohn Marino 	r=(r-t)/(w+r);			/* r-t is exact; w+r ~= 3*t */
119*05a0b428SJohn Marino 	t=t+t*r;			/* error <= 0.5 + 0.5/3 + epsilon */
120*05a0b428SJohn Marino 
121*05a0b428SJohn Marino 	t *= v;
122*05a0b428SJohn Marino 	return (t);
123*05a0b428SJohn Marino }
124