1*2fe8fb19SBen Gras /* @(#)k_rem_pio2.c 5.1 93/09/24 */
2*2fe8fb19SBen Gras /*
3*2fe8fb19SBen Gras * ====================================================
4*2fe8fb19SBen Gras * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
5*2fe8fb19SBen Gras *
6*2fe8fb19SBen Gras * Developed at SunPro, a Sun Microsystems, Inc. business.
7*2fe8fb19SBen Gras * Permission to use, copy, modify, and distribute this
8*2fe8fb19SBen Gras * software is freely granted, provided that this notice
9*2fe8fb19SBen Gras * is preserved.
10*2fe8fb19SBen Gras * ====================================================
11*2fe8fb19SBen Gras */
12*2fe8fb19SBen Gras
13*2fe8fb19SBen Gras #include <sys/cdefs.h>
14*2fe8fb19SBen Gras #if defined(LIBM_SCCS) && !defined(lint)
15*2fe8fb19SBen Gras __RCSID("$NetBSD: k_rem_pio2.c,v 1.12 2010/04/23 19:17:07 drochner Exp $");
16*2fe8fb19SBen Gras #endif
17*2fe8fb19SBen Gras
18*2fe8fb19SBen Gras /*
19*2fe8fb19SBen Gras * __kernel_rem_pio2(x,y,e0,nx,prec,ipio2)
20*2fe8fb19SBen Gras * double x[],y[]; int e0,nx,prec; int ipio2[];
21*2fe8fb19SBen Gras *
22*2fe8fb19SBen Gras * __kernel_rem_pio2 return the last three digits of N with
23*2fe8fb19SBen Gras * y = x - N*pi/2
24*2fe8fb19SBen Gras * so that |y| < pi/2.
25*2fe8fb19SBen Gras *
26*2fe8fb19SBen Gras * The method is to compute the integer (mod 8) and fraction parts of
27*2fe8fb19SBen Gras * (2/pi)*x without doing the full multiplication. In general we
28*2fe8fb19SBen Gras * skip the part of the product that are known to be a huge integer (
29*2fe8fb19SBen Gras * more accurately, = 0 mod 8 ). Thus the number of operations are
30*2fe8fb19SBen Gras * independent of the exponent of the input.
31*2fe8fb19SBen Gras *
32*2fe8fb19SBen Gras * (2/pi) is represented by an array of 24-bit integers in ipio2[].
33*2fe8fb19SBen Gras *
34*2fe8fb19SBen Gras * Input parameters:
35*2fe8fb19SBen Gras * x[] The input value (must be positive) is broken into nx
36*2fe8fb19SBen Gras * pieces of 24-bit integers in double precision format.
37*2fe8fb19SBen Gras * x[i] will be the i-th 24 bit of x. The scaled exponent
38*2fe8fb19SBen Gras * of x[0] is given in input parameter e0 (i.e., x[0]*2^e0
39*2fe8fb19SBen Gras * match x's up to 24 bits.
40*2fe8fb19SBen Gras *
41*2fe8fb19SBen Gras * Example of breaking a double positive z into x[0]+x[1]+x[2]:
42*2fe8fb19SBen Gras * e0 = ilogb(z)-23
43*2fe8fb19SBen Gras * z = scalbn(z,-e0)
44*2fe8fb19SBen Gras * for i = 0,1,2
45*2fe8fb19SBen Gras * x[i] = floor(z)
46*2fe8fb19SBen Gras * z = (z-x[i])*2**24
47*2fe8fb19SBen Gras *
48*2fe8fb19SBen Gras *
49*2fe8fb19SBen Gras * y[] output result in an array of double precision numbers.
50*2fe8fb19SBen Gras * The dimension of y[] is:
51*2fe8fb19SBen Gras * 24-bit precision 1
52*2fe8fb19SBen Gras * 53-bit precision 2
53*2fe8fb19SBen Gras * 64-bit precision 2
54*2fe8fb19SBen Gras * 113-bit precision 3
55*2fe8fb19SBen Gras * The actual value is the sum of them. Thus for 113-bit
56*2fe8fb19SBen Gras * precison, one may have to do something like:
57*2fe8fb19SBen Gras *
58*2fe8fb19SBen Gras * long double t,w,r_head, r_tail;
59*2fe8fb19SBen Gras * t = (long double)y[2] + (long double)y[1];
60*2fe8fb19SBen Gras * w = (long double)y[0];
61*2fe8fb19SBen Gras * r_head = t+w;
62*2fe8fb19SBen Gras * r_tail = w - (r_head - t);
63*2fe8fb19SBen Gras *
64*2fe8fb19SBen Gras * e0 The exponent of x[0]
65*2fe8fb19SBen Gras *
66*2fe8fb19SBen Gras * nx dimension of x[]
67*2fe8fb19SBen Gras *
68*2fe8fb19SBen Gras * prec an integer indicating the precision:
69*2fe8fb19SBen Gras * 0 24 bits (single)
70*2fe8fb19SBen Gras * 1 53 bits (double)
71*2fe8fb19SBen Gras * 2 64 bits (extended)
72*2fe8fb19SBen Gras * 3 113 bits (quad)
73*2fe8fb19SBen Gras *
74*2fe8fb19SBen Gras * ipio2[]
75*2fe8fb19SBen Gras * integer array, contains the (24*i)-th to (24*i+23)-th
76*2fe8fb19SBen Gras * bit of 2/pi after binary point. The corresponding
77*2fe8fb19SBen Gras * floating value is
78*2fe8fb19SBen Gras *
79*2fe8fb19SBen Gras * ipio2[i] * 2^(-24(i+1)).
80*2fe8fb19SBen Gras *
81*2fe8fb19SBen Gras * External function:
82*2fe8fb19SBen Gras * double scalbn(), floor();
83*2fe8fb19SBen Gras *
84*2fe8fb19SBen Gras *
85*2fe8fb19SBen Gras * Here is the description of some local variables:
86*2fe8fb19SBen Gras *
87*2fe8fb19SBen Gras * jk jk+1 is the initial number of terms of ipio2[] needed
88*2fe8fb19SBen Gras * in the computation. The recommended value is 2,3,4,
89*2fe8fb19SBen Gras * 6 for single, double, extended,and quad.
90*2fe8fb19SBen Gras *
91*2fe8fb19SBen Gras * jz local integer variable indicating the number of
92*2fe8fb19SBen Gras * terms of ipio2[] used.
93*2fe8fb19SBen Gras *
94*2fe8fb19SBen Gras * jx nx - 1
95*2fe8fb19SBen Gras *
96*2fe8fb19SBen Gras * jv index for pointing to the suitable ipio2[] for the
97*2fe8fb19SBen Gras * computation. In general, we want
98*2fe8fb19SBen Gras * ( 2^e0*x[0] * ipio2[jv-1]*2^(-24jv) )/8
99*2fe8fb19SBen Gras * is an integer. Thus
100*2fe8fb19SBen Gras * e0-3-24*jv >= 0 or (e0-3)/24 >= jv
101*2fe8fb19SBen Gras * Hence jv = max(0,(e0-3)/24).
102*2fe8fb19SBen Gras *
103*2fe8fb19SBen Gras * jp jp+1 is the number of terms in PIo2[] needed, jp = jk.
104*2fe8fb19SBen Gras *
105*2fe8fb19SBen Gras * q[] double array with integral value, representing the
106*2fe8fb19SBen Gras * 24-bits chunk of the product of x and 2/pi.
107*2fe8fb19SBen Gras *
108*2fe8fb19SBen Gras * q0 the corresponding exponent of q[0]. Note that the
109*2fe8fb19SBen Gras * exponent for q[i] would be q0-24*i.
110*2fe8fb19SBen Gras *
111*2fe8fb19SBen Gras * PIo2[] double precision array, obtained by cutting pi/2
112*2fe8fb19SBen Gras * into 24 bits chunks.
113*2fe8fb19SBen Gras *
114*2fe8fb19SBen Gras * f[] ipio2[] in floating point
115*2fe8fb19SBen Gras *
116*2fe8fb19SBen Gras * iq[] integer array by breaking up q[] in 24-bits chunk.
117*2fe8fb19SBen Gras *
118*2fe8fb19SBen Gras * fq[] final product of x*(2/pi) in fq[0],..,fq[jk]
119*2fe8fb19SBen Gras *
120*2fe8fb19SBen Gras * ih integer. If >0 it indicates q[] is >= 0.5, hence
121*2fe8fb19SBen Gras * it also indicates the *sign* of the result.
122*2fe8fb19SBen Gras *
123*2fe8fb19SBen Gras */
124*2fe8fb19SBen Gras
125*2fe8fb19SBen Gras
126*2fe8fb19SBen Gras /*
127*2fe8fb19SBen Gras * Constants:
128*2fe8fb19SBen Gras * The hexadecimal values are the intended ones for the following
129*2fe8fb19SBen Gras * constants. The decimal values may be used, provided that the
130*2fe8fb19SBen Gras * compiler will convert from decimal to binary accurately enough
131*2fe8fb19SBen Gras * to produce the hexadecimal values shown.
132*2fe8fb19SBen Gras */
133*2fe8fb19SBen Gras
134*2fe8fb19SBen Gras #include "namespace.h"
135*2fe8fb19SBen Gras #include "math.h"
136*2fe8fb19SBen Gras #include "math_private.h"
137*2fe8fb19SBen Gras
138*2fe8fb19SBen Gras static const int init_jk[] = {2,3,4,6}; /* initial value for jk */
139*2fe8fb19SBen Gras
140*2fe8fb19SBen Gras static const double PIo2[] = {
141*2fe8fb19SBen Gras 1.57079625129699707031e+00, /* 0x3FF921FB, 0x40000000 */
142*2fe8fb19SBen Gras 7.54978941586159635335e-08, /* 0x3E74442D, 0x00000000 */
143*2fe8fb19SBen Gras 5.39030252995776476554e-15, /* 0x3CF84698, 0x80000000 */
144*2fe8fb19SBen Gras 3.28200341580791294123e-22, /* 0x3B78CC51, 0x60000000 */
145*2fe8fb19SBen Gras 1.27065575308067607349e-29, /* 0x39F01B83, 0x80000000 */
146*2fe8fb19SBen Gras 1.22933308981111328932e-36, /* 0x387A2520, 0x40000000 */
147*2fe8fb19SBen Gras 2.73370053816464559624e-44, /* 0x36E38222, 0x80000000 */
148*2fe8fb19SBen Gras 2.16741683877804819444e-51, /* 0x3569F31D, 0x00000000 */
149*2fe8fb19SBen Gras };
150*2fe8fb19SBen Gras
151*2fe8fb19SBen Gras static const double
152*2fe8fb19SBen Gras zero = 0.0,
153*2fe8fb19SBen Gras one = 1.0,
154*2fe8fb19SBen Gras two24 = 1.67772160000000000000e+07, /* 0x41700000, 0x00000000 */
155*2fe8fb19SBen Gras twon24 = 5.96046447753906250000e-08; /* 0x3E700000, 0x00000000 */
156*2fe8fb19SBen Gras
157*2fe8fb19SBen Gras int
__kernel_rem_pio2(double * x,double * y,int e0,int nx,int prec,const int32_t * ipio2)158*2fe8fb19SBen Gras __kernel_rem_pio2(double *x, double *y, int e0, int nx, int prec, const int32_t *ipio2)
159*2fe8fb19SBen Gras {
160*2fe8fb19SBen Gras int32_t jz,jx,jv,jp,jk,carry,n,iq[20],i,j,k,m,q0,ih;
161*2fe8fb19SBen Gras double z,fw,f[20],fq[20],q[20];
162*2fe8fb19SBen Gras
163*2fe8fb19SBen Gras /* initialize jk*/
164*2fe8fb19SBen Gras jk = init_jk[prec];
165*2fe8fb19SBen Gras jp = jk;
166*2fe8fb19SBen Gras
167*2fe8fb19SBen Gras /* determine jx,jv,q0, note that 3>q0 */
168*2fe8fb19SBen Gras jx = nx-1;
169*2fe8fb19SBen Gras jv = (e0-3)/24; if(jv<0) jv=0;
170*2fe8fb19SBen Gras q0 = e0-24*(jv+1);
171*2fe8fb19SBen Gras
172*2fe8fb19SBen Gras /* set up f[0] to f[jx+jk] where f[jx+jk] = ipio2[jv+jk] */
173*2fe8fb19SBen Gras j = jv-jx; m = jx+jk;
174*2fe8fb19SBen Gras for(i=0;i<=m;i++,j++) f[i] = (j<0)? zero : (double) ipio2[j];
175*2fe8fb19SBen Gras
176*2fe8fb19SBen Gras /* compute q[0],q[1],...q[jk] */
177*2fe8fb19SBen Gras for (i=0;i<=jk;i++) {
178*2fe8fb19SBen Gras for(j=0,fw=0.0;j<=jx;j++) fw += x[j]*f[jx+i-j]; q[i] = fw;
179*2fe8fb19SBen Gras }
180*2fe8fb19SBen Gras
181*2fe8fb19SBen Gras jz = jk;
182*2fe8fb19SBen Gras recompute:
183*2fe8fb19SBen Gras /* distill q[] into iq[] reversingly */
184*2fe8fb19SBen Gras for(i=0,j=jz,z=q[jz];j>0;i++,j--) {
185*2fe8fb19SBen Gras fw = (double)((int32_t)(twon24* z));
186*2fe8fb19SBen Gras iq[i] = (int32_t)(z-two24*fw);
187*2fe8fb19SBen Gras z = q[j-1]+fw;
188*2fe8fb19SBen Gras }
189*2fe8fb19SBen Gras
190*2fe8fb19SBen Gras /* compute n */
191*2fe8fb19SBen Gras z = scalbn(z,q0); /* actual value of z */
192*2fe8fb19SBen Gras z -= 8.0*floor(z*0.125); /* trim off integer >= 8 */
193*2fe8fb19SBen Gras n = (int32_t) z;
194*2fe8fb19SBen Gras z -= (double)n;
195*2fe8fb19SBen Gras ih = 0;
196*2fe8fb19SBen Gras if(q0>0) { /* need iq[jz-1] to determine n */
197*2fe8fb19SBen Gras i = (iq[jz-1]>>(24-q0)); n += i;
198*2fe8fb19SBen Gras iq[jz-1] -= i<<(24-q0);
199*2fe8fb19SBen Gras ih = iq[jz-1]>>(23-q0);
200*2fe8fb19SBen Gras }
201*2fe8fb19SBen Gras else if(q0==0) ih = iq[jz-1]>>23;
202*2fe8fb19SBen Gras else if(z>=0.5) ih=2;
203*2fe8fb19SBen Gras
204*2fe8fb19SBen Gras if(ih>0) { /* q > 0.5 */
205*2fe8fb19SBen Gras n += 1; carry = 0;
206*2fe8fb19SBen Gras for(i=0;i<jz ;i++) { /* compute 1-q */
207*2fe8fb19SBen Gras j = iq[i];
208*2fe8fb19SBen Gras if(carry==0) {
209*2fe8fb19SBen Gras if(j!=0) {
210*2fe8fb19SBen Gras carry = 1; iq[i] = 0x1000000- j;
211*2fe8fb19SBen Gras }
212*2fe8fb19SBen Gras } else iq[i] = 0xffffff - j;
213*2fe8fb19SBen Gras }
214*2fe8fb19SBen Gras if(q0>0) { /* rare case: chance is 1 in 12 */
215*2fe8fb19SBen Gras switch(q0) {
216*2fe8fb19SBen Gras case 1:
217*2fe8fb19SBen Gras iq[jz-1] &= 0x7fffff; break;
218*2fe8fb19SBen Gras case 2:
219*2fe8fb19SBen Gras iq[jz-1] &= 0x3fffff; break;
220*2fe8fb19SBen Gras }
221*2fe8fb19SBen Gras }
222*2fe8fb19SBen Gras if(ih==2) {
223*2fe8fb19SBen Gras z = one - z;
224*2fe8fb19SBen Gras if(carry!=0) z -= scalbn(one,q0);
225*2fe8fb19SBen Gras }
226*2fe8fb19SBen Gras }
227*2fe8fb19SBen Gras
228*2fe8fb19SBen Gras /* check if recomputation is needed */
229*2fe8fb19SBen Gras if(z==zero) {
230*2fe8fb19SBen Gras j = 0;
231*2fe8fb19SBen Gras for (i=jz-1;i>=jk;i--) j |= iq[i];
232*2fe8fb19SBen Gras if(j==0) { /* need recomputation */
233*2fe8fb19SBen Gras for(k=1;iq[jk-k]==0;k++); /* k = no. of terms needed */
234*2fe8fb19SBen Gras
235*2fe8fb19SBen Gras for(i=jz+1;i<=jz+k;i++) { /* add q[jz+1] to q[jz+k] */
236*2fe8fb19SBen Gras f[jx+i] = (double) ipio2[jv+i];
237*2fe8fb19SBen Gras for(j=0,fw=0.0;j<=jx;j++) fw += x[j]*f[jx+i-j];
238*2fe8fb19SBen Gras q[i] = fw;
239*2fe8fb19SBen Gras }
240*2fe8fb19SBen Gras jz += k;
241*2fe8fb19SBen Gras goto recompute;
242*2fe8fb19SBen Gras }
243*2fe8fb19SBen Gras }
244*2fe8fb19SBen Gras
245*2fe8fb19SBen Gras /* chop off zero terms */
246*2fe8fb19SBen Gras if(z==0.0) {
247*2fe8fb19SBen Gras jz -= 1; q0 -= 24;
248*2fe8fb19SBen Gras while(iq[jz]==0) { jz--; q0-=24;}
249*2fe8fb19SBen Gras } else { /* break z into 24-bit if necessary */
250*2fe8fb19SBen Gras z = scalbn(z,-q0);
251*2fe8fb19SBen Gras if(z>=two24) {
252*2fe8fb19SBen Gras fw = (double)((int32_t)(twon24*z));
253*2fe8fb19SBen Gras iq[jz] = (int32_t)(z-two24*fw);
254*2fe8fb19SBen Gras jz += 1; q0 += 24;
255*2fe8fb19SBen Gras iq[jz] = (int32_t) fw;
256*2fe8fb19SBen Gras } else iq[jz] = (int32_t) z ;
257*2fe8fb19SBen Gras }
258*2fe8fb19SBen Gras
259*2fe8fb19SBen Gras /* convert integer "bit" chunk to floating-point value */
260*2fe8fb19SBen Gras fw = scalbn(one,q0);
261*2fe8fb19SBen Gras for(i=jz;i>=0;i--) {
262*2fe8fb19SBen Gras q[i] = fw*(double)iq[i]; fw*=twon24;
263*2fe8fb19SBen Gras }
264*2fe8fb19SBen Gras
265*2fe8fb19SBen Gras /* compute PIo2[0,...,jp]*q[jz,...,0] */
266*2fe8fb19SBen Gras for(i=jz;i>=0;i--) {
267*2fe8fb19SBen Gras for(fw=0.0,k=0;k<=jp&&k<=jz-i;k++) fw += PIo2[k]*q[i+k];
268*2fe8fb19SBen Gras fq[jz-i] = fw;
269*2fe8fb19SBen Gras }
270*2fe8fb19SBen Gras
271*2fe8fb19SBen Gras /* compress fq[] into y[] */
272*2fe8fb19SBen Gras switch(prec) {
273*2fe8fb19SBen Gras case 0:
274*2fe8fb19SBen Gras fw = 0.0;
275*2fe8fb19SBen Gras for (i=jz;i>=0;i--) fw += fq[i];
276*2fe8fb19SBen Gras y[0] = (ih==0)? fw: -fw;
277*2fe8fb19SBen Gras break;
278*2fe8fb19SBen Gras case 1:
279*2fe8fb19SBen Gras case 2:
280*2fe8fb19SBen Gras fw = 0.0;
281*2fe8fb19SBen Gras for (i=jz;i>=0;i--) fw += fq[i];
282*2fe8fb19SBen Gras y[0] = (ih==0)? fw: -fw;
283*2fe8fb19SBen Gras fw = fq[0]-fw;
284*2fe8fb19SBen Gras for (i=1;i<=jz;i++) fw += fq[i];
285*2fe8fb19SBen Gras y[1] = (ih==0)? fw: -fw;
286*2fe8fb19SBen Gras break;
287*2fe8fb19SBen Gras case 3: /* painful */
288*2fe8fb19SBen Gras for (i=jz;i>0;i--) {
289*2fe8fb19SBen Gras fw = fq[i-1]+fq[i];
290*2fe8fb19SBen Gras fq[i] += fq[i-1]-fw;
291*2fe8fb19SBen Gras fq[i-1] = fw;
292*2fe8fb19SBen Gras }
293*2fe8fb19SBen Gras for (i=jz;i>1;i--) {
294*2fe8fb19SBen Gras fw = fq[i-1]+fq[i];
295*2fe8fb19SBen Gras fq[i] += fq[i-1]-fw;
296*2fe8fb19SBen Gras fq[i-1] = fw;
297*2fe8fb19SBen Gras }
298*2fe8fb19SBen Gras for (fw=0.0,i=jz;i>=2;i--) fw += fq[i];
299*2fe8fb19SBen Gras if(ih==0) {
300*2fe8fb19SBen Gras y[0] = fq[0]; y[1] = fq[1]; y[2] = fw;
301*2fe8fb19SBen Gras } else {
302*2fe8fb19SBen Gras y[0] = -fq[0]; y[1] = -fq[1]; y[2] = -fw;
303*2fe8fb19SBen Gras }
304*2fe8fb19SBen Gras }
305*2fe8fb19SBen Gras return n&7;
306*2fe8fb19SBen Gras }
307