xref: /csrg-svn/lib/libm/ieee/support.c (revision 24581)
1*24581Szliu /*
2*24581Szliu  * Copyright (c) 1985 Regents of the University of California.
3*24581Szliu  *
4*24581Szliu  * Use and reproduction of this software are granted  in  accordance  with
5*24581Szliu  * the terms and conditions specified in  the  Berkeley  Software  License
6*24581Szliu  * Agreement (in particular, this entails acknowledgement of the programs'
7*24581Szliu  * source, and inclusion of this notice) with the additional understanding
8*24581Szliu  * that  all  recipients  should regard themselves as participants  in  an
9*24581Szliu  * ongoing  research  project and hence should  feel  obligated  to report
10*24581Szliu  * their  experiences (good or bad) with these elementary function  codes,
11*24581Szliu  * using "sendbug 4bsd-bugs@BERKELEY", to the authors.
12*24581Szliu  */
13*24581Szliu 
14*24581Szliu #ifndef lint
15*24581Szliu static char sccsid[] = "@(#)support.c	1.1 (ELEFUNT) 09/06/85";
16*24581Szliu #endif not lint
17*24581Szliu 
18*24581Szliu /*
19*24581Szliu  * Some IEEE standard p754 recommended functions and remainder and sqrt for
20*24581Szliu  * supporting the C elementary functions.
21*24581Szliu  ******************************************************************************
22*24581Szliu  * WARNING:
23*24581Szliu  *      These codes are developed (in double) to support the C elementary
24*24581Szliu  * functions temporarily. They are not universal, and some of them are very
25*24581Szliu  * slow (in particular, drem and sqrt is extremely inefficient). Each
26*24581Szliu  * computer system should have its implementation of these functions using
27*24581Szliu  * its own assembler.
28*24581Szliu  ******************************************************************************
29*24581Szliu  *
30*24581Szliu  * IEEE p754 required operations:
31*24581Szliu  *     drem(x,p)
32*24581Szliu  *              returns  x REM y  =  x - [x/y]*y , where [x/y] is the integer
33*24581Szliu  *              nearest x/y; in half way case, choose the even one.
34*24581Szliu  *     sqrt(x)
35*24581Szliu  *              returns the square root of x correctly rounded according to
36*24581Szliu  *		the rounding mod.
37*24581Szliu  *
38*24581Szliu  * IEEE p754 recommended functions:
39*24581Szliu  * (a) copysign(x,y)
40*24581Szliu  *              returns x with the sign of y.
41*24581Szliu  * (b) scalb(x,N)
42*24581Szliu  *              returns  x * (2**N), for integer values N.
43*24581Szliu  * (c) logb(x)
44*24581Szliu  *              returns the unbiased exponent of x, a signed integer in
45*24581Szliu  *              double precision, except that logb(0) is -INF, logb(INF)
46*24581Szliu  *              is +INF, and logb(NAN) is that NAN.
47*24581Szliu  * (d) finite(x)
48*24581Szliu  *              returns the value TRUE if -INF < x < +INF and returns
49*24581Szliu  *              FALSE otherwise.
50*24581Szliu  *
51*24581Szliu  *
52*24581Szliu  * CODED IN C BY K.C. NG, 11/25/84;
53*24581Szliu  * REVISED BY K.C. NG on 1/22/85, 2/13/85, 3/24/85.
54*24581Szliu  */
55*24581Szliu 
56*24581Szliu 
57*24581Szliu #ifdef VAX      /* VAX D format */
58*24581Szliu     static unsigned short msign=0x7fff , mexp =0x7f80 ;
59*24581Szliu     static short  prep1=57, gap=7, bias=129           ;
60*24581Szliu     static double novf=1.7E38, nunf=3.0E-39, zero=0.0 ;
61*24581Szliu #else           /*IEEE double format */
62*24581Szliu     static unsigned short msign=0x7fff, mexp =0x7ff0  ;
63*24581Szliu     static short prep1=54, gap=4, bias=1023           ;
64*24581Szliu     static double novf=1.7E308, nunf=3.0E-308,zero=0.0;
65*24581Szliu #endif
66*24581Szliu 
67*24581Szliu double scalb(x,N)
68*24581Szliu double x; int N;
69*24581Szliu {
70*24581Szliu         int k;
71*24581Szliu         double scalb();
72*24581Szliu 
73*24581Szliu #ifdef NATIONAL
74*24581Szliu         unsigned short *px=(unsigned short *) &x + 3;
75*24581Szliu #else /* VAX, SUN, ZILOG */
76*24581Szliu         unsigned short *px=(unsigned short *) &x;
77*24581Szliu #endif
78*24581Szliu 
79*24581Szliu         if( x == zero )  return(x);
80*24581Szliu 
81*24581Szliu #ifdef VAX
82*24581Szliu         if( (k= *px & mexp ) != ~msign ) {
83*24581Szliu             if( N<-260) return(nunf*nunf); else if(N>260) return(novf+novf);
84*24581Szliu #else   /* IEEE */
85*24581Szliu         if( (k= *px & mexp ) != mexp ) {
86*24581Szliu             if( N<-2100) return(nunf*nunf); else if(N>2100) return(novf+novf);
87*24581Szliu             if( k == 0 ) {
88*24581Szliu                  x *= scalb(1.0,(int)prep1);  N -= prep1; return(scalb(x,N));}
89*24581Szliu #endif
90*24581Szliu 
91*24581Szliu             if((k = (k>>gap)+ N) > 0 )
92*24581Szliu                 if( k < (mexp>>gap) ) *px = (*px&~mexp) | (k<<gap);
93*24581Szliu                 else x=novf+novf;               /* overflow */
94*24581Szliu             else
95*24581Szliu                 if( k > -prep1 )
96*24581Szliu                                         /* gradual underflow */
97*24581Szliu                     {*px=(*px&~mexp)|(short)(1<<gap); x *= scalb(1.0,k-1);}
98*24581Szliu                 else
99*24581Szliu                 return(nunf*nunf);
100*24581Szliu             }
101*24581Szliu         return(x);
102*24581Szliu }
103*24581Szliu 
104*24581Szliu 
105*24581Szliu double copysign(x,y)
106*24581Szliu double x,y;
107*24581Szliu {
108*24581Szliu #ifdef NATIONAL
109*24581Szliu         unsigned short  *px=(unsigned short *) &x+3,
110*24581Szliu                         *py=(unsigned short *) &y+3;
111*24581Szliu #else /* VAX, SUN, ZILOG */
112*24581Szliu         unsigned short  *px=(unsigned short *) &x,
113*24581Szliu                         *py=(unsigned short *) &y;
114*24581Szliu #endif
115*24581Szliu 
116*24581Szliu #ifdef VAX
117*24581Szliu         if ( (*px & mexp) == 0 ) return(x);
118*24581Szliu #endif
119*24581Szliu 
120*24581Szliu         *px = ( *px & msign ) | ( *py & ~msign );
121*24581Szliu         return(x);
122*24581Szliu }
123*24581Szliu 
124*24581Szliu double logb(x)
125*24581Szliu double x;
126*24581Szliu {
127*24581Szliu 
128*24581Szliu #ifdef NATIONAL
129*24581Szliu         short *px=(short *) &x+3, k;
130*24581Szliu #else /* VAX, SUN, ZILOG */
131*24581Szliu         short *px=(short *) &x, k;
132*24581Szliu #endif
133*24581Szliu 
134*24581Szliu #ifdef VAX
135*24581Szliu         return( ((*px & mexp)>>gap) - bias);
136*24581Szliu #else /* IEEE */
137*24581Szliu         if( (k= *px & mexp ) != mexp )
138*24581Szliu             if ( k != 0 )
139*24581Szliu                 return ( (k>>gap) - bias );
140*24581Szliu             else if( x != zero)
141*24581Szliu                 return ( -1022.0 );
142*24581Szliu             else
143*24581Szliu                 return(-(1.0/zero));
144*24581Szliu         else if(x != x)
145*24581Szliu             return(x);
146*24581Szliu         else
147*24581Szliu             {*px &= msign; return(x);}
148*24581Szliu #endif
149*24581Szliu }
150*24581Szliu 
151*24581Szliu finite(x)
152*24581Szliu double x;
153*24581Szliu {
154*24581Szliu #ifdef VAX
155*24581Szliu         return(1.0);
156*24581Szliu #else  /* IEEE */
157*24581Szliu #ifdef NATIONAL
158*24581Szliu         return( (*((short *) &x+3 ) & mexp ) != mexp );
159*24581Szliu #else /* SUN, ZILOG */
160*24581Szliu         return( (*((short *) &x ) & mexp ) != mexp );
161*24581Szliu #endif
162*24581Szliu #endif
163*24581Szliu }
164*24581Szliu 
165*24581Szliu double drem(x,p)
166*24581Szliu double x,p;
167*24581Szliu {
168*24581Szliu         short sign;
169*24581Szliu         double hp,dp,tmp,drem(),scalb();
170*24581Szliu         unsigned short  k;
171*24581Szliu #ifdef NATIONAL
172*24581Szliu         unsigned short
173*24581Szliu               *px=(unsigned short *) &x  +3,
174*24581Szliu               *pp=(unsigned short *) &p  +3,
175*24581Szliu               *pd=(unsigned short *) &dp +3,
176*24581Szliu               *pt=(unsigned short *) &tmp+3;
177*24581Szliu #else /* VAX, SUN, ZILOG */
178*24581Szliu         unsigned short
179*24581Szliu               *px=(unsigned short *) &x  ,
180*24581Szliu               *pp=(unsigned short *) &p  ,
181*24581Szliu               *pd=(unsigned short *) &dp ,
182*24581Szliu               *pt=(unsigned short *) &tmp;
183*24581Szliu #endif
184*24581Szliu 
185*24581Szliu         *pp &= msign ;
186*24581Szliu 
187*24581Szliu #ifdef VAX
188*24581Szliu         if( ( *px & mexp ) == ~msign || p == zero )
189*24581Szliu #else /* IEEE */
190*24581Szliu         if( ( *px & mexp ) == mexp || p == zero )
191*24581Szliu #endif
192*24581Szliu 
193*24581Szliu                 return( (x != x)? x:zero/zero );
194*24581Szliu 
195*24581Szliu         else  if ( ((*pp & mexp)>>gap) <= 1 )
196*24581Szliu                 /* subnormal p, or almost subnormal p */
197*24581Szliu             { double b; b=scalb(1.0,(int)prep1);
198*24581Szliu               p *= b; x = drem(x,p); x *= b; return(drem(x,p)/b);}
199*24581Szliu         else  if ( p >= novf/2)
200*24581Szliu             { p /= 2 ; x /= 2; return(drem(x,p)*2);}
201*24581Szliu         else
202*24581Szliu             {
203*24581Szliu                 dp=p+p; hp=p/2;
204*24581Szliu                 sign= *px & ~msign ;
205*24581Szliu                 *px &= msign       ;
206*24581Szliu                 while ( x > dp )
207*24581Szliu                     {
208*24581Szliu                         k=(*px & mexp) - (*pd & mexp) ;
209*24581Szliu                         tmp = dp ;
210*24581Szliu                         *pt += k ;
211*24581Szliu 
212*24581Szliu #ifdef VAX
213*24581Szliu                         if( x < tmp ) *pt -= 128 ;
214*24581Szliu #else /* IEEE */
215*24581Szliu                         if( x < tmp ) *pt -= 16 ;
216*24581Szliu #endif
217*24581Szliu 
218*24581Szliu                         x -= tmp ;
219*24581Szliu                     }
220*24581Szliu                 if ( x > hp )
221*24581Szliu                     { x -= p ;  if ( x >= hp ) x -= p ; }
222*24581Szliu 
223*24581Szliu 		*px = *px ^ sign;
224*24581Szliu                 return( x);
225*24581Szliu 
226*24581Szliu             }
227*24581Szliu }
228*24581Szliu double sqrt(x)
229*24581Szliu double x;
230*24581Szliu {
231*24581Szliu         double q,s,b,r;
232*24581Szliu         double logb(),scalb();
233*24581Szliu         double t,zero=0.0;
234*24581Szliu         int m,n,i,finite();
235*24581Szliu #ifdef VAX
236*24581Szliu         int k=54;
237*24581Szliu #else   /* IEEE */
238*24581Szliu         int k=51;
239*24581Szliu #endif
240*24581Szliu 
241*24581Szliu     /* sqrt(NaN) is NaN, sqrt(+-0) = +-0 */
242*24581Szliu         if(x!=x||x==zero) return(x);
243*24581Szliu 
244*24581Szliu     /* sqrt(negative) is invalid */
245*24581Szliu         if(x<zero) return(zero/zero);
246*24581Szliu 
247*24581Szliu     /* sqrt(INF) is INF */
248*24581Szliu         if(!finite(x)) return(x);
249*24581Szliu 
250*24581Szliu     /* scale x to [1,4) */
251*24581Szliu         n=logb(x);
252*24581Szliu         x=scalb(x,-n);
253*24581Szliu         if((m=logb(x))!=0) x=scalb(x,-m);       /* subnormal number */
254*24581Szliu         m += n;
255*24581Szliu         n = m/2;
256*24581Szliu         if((n+n)!=m) {x *= 2; m -=1; n=m/2;}
257*24581Szliu 
258*24581Szliu     /* generate sqrt(x) bit by bit (accumulating in q) */
259*24581Szliu             q=1.0; s=4.0; x -= 1.0; r=1;
260*24581Szliu             for(i=1;i<=k;i++) {
261*24581Szliu                 t=s+1; x *= 4; r /= 2;
262*24581Szliu                 if(t<=x) {
263*24581Szliu                     s=t+t+2, x -= t; q += r;}
264*24581Szliu                 else
265*24581Szliu                     s *= 2;
266*24581Szliu                 }
267*24581Szliu 
268*24581Szliu     /* generate the last bit and determine the final rounding */
269*24581Szliu             r/=2; x *= 4;
270*24581Szliu             if(x==zero) goto end; 100+r; /* trigger inexact flag */
271*24581Szliu             if(s<x) {
272*24581Szliu                 q+=r; x -=s; s += 2; s *= 2; x *= 4;
273*24581Szliu                 t = (x-s)-5;
274*24581Szliu                 b=1.0+3*r/4; if(b==1.0) goto end; /* b==1 : Round-to-zero */
275*24581Szliu                 b=1.0+r/4;   if(b>1.0) t=1;	/* b>1 : Round-to-(+INF) */
276*24581Szliu                 if(t>=0) q+=r; }	      /* else: Round-to-nearest */
277*24581Szliu             else {
278*24581Szliu                 s *= 2; x *= 4;
279*24581Szliu                 t = (x-s)-1;
280*24581Szliu                 b=1.0+3*r/4; if(b==1.0) goto end;
281*24581Szliu                 b=1.0+r/4;   if(b>1.0) t=1;
282*24581Szliu                 if(t>=0) q+=r; }
283*24581Szliu 
284*24581Szliu end:        return(scalb(q,n));
285*24581Szliu }
286*24581Szliu 
287*24581Szliu #if 0
288*24581Szliu /* DREM(X,Y)
289*24581Szliu  * RETURN X REM Y =X-N*Y, N=[X/Y] ROUNDED (ROUNDED TO EVEN IN THE HALF WAY CASE)
290*24581Szliu  * DOUBLE PRECISION (VAX D format 56 bits, IEEE DOUBLE 53 BITS)
291*24581Szliu  * INTENDED FOR ASSEMBLY LANGUAGE
292*24581Szliu  * CODED IN C BY K.C. NG, 3/23/85, 4/8/85.
293*24581Szliu  *
294*24581Szliu  * Warning: this code should not get compiled in unless ALL of
295*24581Szliu  * the following machine-dependent routines are supplied.
296*24581Szliu  *
297*24581Szliu  * Required machine dependent functions (not on a VAX):
298*24581Szliu  *     swapINX(i): save inexact flag and reset it to "i"
299*24581Szliu  *     swapENI(e): save inexact enable and reset it to "e"
300*24581Szliu  */
301*24581Szliu 
302*24581Szliu double drem(x,y)
303*24581Szliu double x,y;
304*24581Szliu {
305*24581Szliu 
306*24581Szliu #ifdef NATIONAL		/* order of words in floating point number */
307*24581Szliu 	static n0=3,n1=2,n2=1,n3=0;
308*24581Szliu #else /* VAX, SUN, ZILOG */
309*24581Szliu 	static n0=0,n1=1,n2=2,n3=3;
310*24581Szliu #endif
311*24581Szliu 
312*24581Szliu     	static unsigned short mexp =0x7ff0, m25 =0x0190, m57 =0x0390;
313*24581Szliu 	static double zero=0.0;
314*24581Szliu 	double hy,y1,t,t1;
315*24581Szliu 	short k;
316*24581Szliu 	long n;
317*24581Szliu 	int i,e;
318*24581Szliu 	unsigned short xexp,yexp, *px  =(unsigned short *) &x  ,
319*24581Szliu 	      		nx,nf,	  *py  =(unsigned short *) &y  ,
320*24581Szliu 	      		sign,	  *pt  =(unsigned short *) &t  ,
321*24581Szliu 	      			  *pt1 =(unsigned short *) &t1 ;
322*24581Szliu 
323*24581Szliu 	xexp = px[n0] & mexp ;	/* exponent of x */
324*24581Szliu 	yexp = py[n0] & mexp ;	/* exponent of y */
325*24581Szliu 	sign = px[n0] &0x8000;	/* sign of x     */
326*24581Szliu 
327*24581Szliu /* return NaN if x is NaN, or y is NaN, or x is INF, or y is zero */
328*24581Szliu 	if(x!=x) return(x); if(y!=y) return(y);	     /* x or y is NaN */
329*24581Szliu 	if( xexp == mexp )   return(zero/zero);      /* x is INF */
330*24581Szliu 	if(y==zero) return(y/y);
331*24581Szliu 
332*24581Szliu /* save the inexact flag and inexact enable in i and e respectively
333*24581Szliu  * and reset them to zero
334*24581Szliu  */
335*24581Szliu 	i=swapINX(0);	e=swapENI(0);
336*24581Szliu 
337*24581Szliu /* subnormal number */
338*24581Szliu 	nx=0;
339*24581Szliu 	if(yexp==0) {t=1.0,pt[n0]+=m57; y*=t; nx=m57;}
340*24581Szliu 
341*24581Szliu /* if y is tiny (biased exponent <= 57), scale up y to y*2**57 */
342*24581Szliu 	if( yexp <= m57 ) {py[n0]+=m57; nx+=m57; yexp+=m57;}
343*24581Szliu 
344*24581Szliu 	nf=nx;
345*24581Szliu 	py[n0] &= 0x7fff;
346*24581Szliu 	px[n0] &= 0x7fff;
347*24581Szliu 
348*24581Szliu /* mask off the least significant 27 bits of y */
349*24581Szliu 	t=y; pt[n3]=0; pt[n2]&=0xf800; y1=t;
350*24581Szliu 
351*24581Szliu /* LOOP: argument reduction on x whenever x > y */
352*24581Szliu loop:
353*24581Szliu 	while ( x > y )
354*24581Szliu 	{
355*24581Szliu 	    t=y;
356*24581Szliu 	    t1=y1;
357*24581Szliu 	    xexp=px[n0]&mexp;	  /* exponent of x */
358*24581Szliu 	    k=xexp-yexp-m25;
359*24581Szliu 	    if(k>0) 	/* if x/y >= 2**26, scale up y so that x/y < 2**26 */
360*24581Szliu 		{pt[n0]+=k;pt1[n0]+=k;}
361*24581Szliu 	    n=x/t; x=(x-n*t1)-n*(t-t1);
362*24581Szliu 	}
363*24581Szliu     /* end while (x > y) */
364*24581Szliu 
365*24581Szliu 	if(nx!=0) {t=1.0; pt[n0]+=nx; x*=t; nx=0; goto loop;}
366*24581Szliu 
367*24581Szliu /* final adjustment */
368*24581Szliu 
369*24581Szliu 	hy=y/2.0;
370*24581Szliu 	if(x>hy||((x==hy)&&n%2==1)) x-=y;
371*24581Szliu 	px[n0] ^= sign;
372*24581Szliu 	if(nf!=0) { t=1.0; pt[n0]-=nf; x*=t;}
373*24581Szliu 
374*24581Szliu /* restore inexact flag and inexact enable */
375*24581Szliu 	swapINX(i); swapENI(e);
376*24581Szliu 
377*24581Szliu 	return(x);
378*24581Szliu }
379*24581Szliu #endif
380*24581Szliu 
381*24581Szliu #if 0
382*24581Szliu /* SQRT
383*24581Szliu  * RETURN CORRECTLY ROUNDED (ACCORDING TO THE ROUNDING MODE) SQRT
384*24581Szliu  * FOR IEEE DOUBLE PRECISION ONLY, INTENDED FOR ASSEMBLY LANGUAGE
385*24581Szliu  * CODED IN C BY K.C. NG, 3/22/85.
386*24581Szliu  *
387*24581Szliu  * Warning: this code should not get compiled in unless ALL of
388*24581Szliu  * the following machine-dependent routines are supplied.
389*24581Szliu  *
390*24581Szliu  * Required machine dependent functions:
391*24581Szliu  *     swapINX(i)  ...return the status of INEXACT flag and reset it to "i"
392*24581Szliu  *     swapRM(r)   ...return the current Rounding Mode and reset it to "r"
393*24581Szliu  *     swapENI(e)  ...return the status of inexact enable and reset it to "e"
394*24581Szliu  *     addc(t)     ...perform t=t+1 regarding t as a 64 bit unsigned integer
395*24581Szliu  *     subc(t)     ...perform t=t-1 regarding t as a 64 bit unsigned integer
396*24581Szliu  */
397*24581Szliu 
398*24581Szliu static unsigned long table[] = {
399*24581Szliu 0, 1204, 3062, 5746, 9193, 13348, 18162, 23592, 29598, 36145, 43202, 50740,
400*24581Szliu 58733, 67158, 75992, 85215, 83599, 71378, 60428, 50647, 41945, 34246, 27478,
401*24581Szliu 21581, 16499, 12183, 8588, 5674, 3403, 1742, 661, 130, };
402*24581Szliu 
403*24581Szliu double newsqrt(x)
404*24581Szliu double x;
405*24581Szliu {
406*24581Szliu         double y,z,t,addc(),subc(),b54=134217728.*134217728.; /* b54=2**54 */
407*24581Szliu         long mx,scalx,mexp=0x7ff00000;
408*24581Szliu         int i,j,r,e,swapINX(),swapRM(),swapENI();
409*24581Szliu         unsigned long *py=(unsigned long *) &y   ,
410*24581Szliu                       *pt=(unsigned long *) &t   ,
411*24581Szliu                       *px=(unsigned long *) &x   ;
412*24581Szliu #ifdef NATIONAL         /* ordering of word in a floating point number */
413*24581Szliu         int n0=1, n1=0;
414*24581Szliu #else
415*24581Szliu         int n0=0, n1=1;
416*24581Szliu #endif
417*24581Szliu /* Rounding Mode:  RN ...round-to-nearest
418*24581Szliu  *                 RZ ...round-towards 0
419*24581Szliu  *                 RP ...round-towards +INF
420*24581Szliu  *		   RM ...round-towards -INF
421*24581Szliu  */
422*24581Szliu         int RN=0,RZ=1,RP=2,RM=3;/* machine dependent: work on a Zilog Z8070
423*24581Szliu                                  * and a National 32081 & 16081
424*24581Szliu                                  */
425*24581Szliu 
426*24581Szliu /* exceptions */
427*24581Szliu 	if(x!=x||x==0.0) return(x);  /* sqrt(NaN) is NaN, sqrt(+-0) = +-0 */
428*24581Szliu 	if(x<0) return((x-x)/(x-x)); /* sqrt(negative) is invalid */
429*24581Szliu         if((mx=px[n0]&mexp)==mexp) return(x);  /* sqrt(+INF) is +INF */
430*24581Szliu 
431*24581Szliu /* save, reset, initialize */
432*24581Szliu         e=swapENI(0);   /* ...save and reset the inexact enable */
433*24581Szliu         i=swapINX(0);   /* ...save INEXACT flag */
434*24581Szliu         r=swapRM(RN);   /* ...save and reset the Rounding Mode to RN */
435*24581Szliu         scalx=0;
436*24581Szliu 
437*24581Szliu /* subnormal number, scale up x to x*2**54 */
438*24581Szliu         if(mx==0) {x *= b54 ; scalx-=0x01b00000;}
439*24581Szliu 
440*24581Szliu /* scale x to avoid intermediate over/underflow:
441*24581Szliu  * if (x > 2**512) x=x/2**512; if (x < 2**-512) x=x*2**512 */
442*24581Szliu         if(mx>0x5ff00000) {px[n0] -= 0x20000000; scalx+= 0x10000000;}
443*24581Szliu         if(mx<0x1ff00000) {px[n0] += 0x20000000; scalx-= 0x10000000;}
444*24581Szliu 
445*24581Szliu /* magic initial approximation to almost 8 sig. bits */
446*24581Szliu         py[n0]=(px[n0]>>1)+0x1ff80000;
447*24581Szliu         py[n0]=py[n0]-table[(py[n0]>>15)&31];
448*24581Szliu 
449*24581Szliu /* Heron's rule once with correction to improve y to almost 18 sig. bits */
450*24581Szliu         t=x/y; y=y+t; py[n0]=py[n0]-0x00100006; py[n1]=0;
451*24581Szliu 
452*24581Szliu /* triple to almost 56 sig. bits; now y approx. sqrt(x) to within 1 ulp */
453*24581Szliu         t=y*y; z=t;  pt[n0]+=0x00100000; t+=z; z=(x-z)*y;
454*24581Szliu         t=z/(t+x) ;  pt[n0]+=0x00100000; y+=t;
455*24581Szliu 
456*24581Szliu /* twiddle last bit to force y correctly rounded */
457*24581Szliu         swapRM(RZ);     /* ...set Rounding Mode to round-toward-zero */
458*24581Szliu         swapINX(0);     /* ...clear INEXACT flag */
459*24581Szliu         swapENI(e);     /* ...restore inexact enable status */
460*24581Szliu         t=x/y;          /* ...chopped quotient, possibly inexact */
461*24581Szliu         j=swapINX(i);   /* ...read and restore inexact flag */
462*24581Szliu         if(j==0) { if(t==y) goto end; else t=subc(t); }  /* ...t=t-ulp */
463*24581Szliu         b54+0.1;        /* ..trigger inexact flag, sqrt(x) is inexact */
464*24581Szliu         if(r==RN) t=addc(t);            /* ...t=t+ulp */
465*24581Szliu         else if(r==RP) { t=addc(t);y=addc(y);}/* ...t=t+ulp;y=y+ulp; */
466*24581Szliu         y=y+t;                          /* ...chopped sum */
467*24581Szliu         py[n0]=py[n0]-0x00100000;       /* ...correctly rounded sqrt(x) */
468*24581Szliu end:    py[n0]=py[n0]+scalx;            /* ...scale back y */
469*24581Szliu         swapRM(r);                      /* ...restore Rounding Mode */
470*24581Szliu         return(y);
471*24581Szliu }
472*24581Szliu #endif
473