xref: /csrg-svn/lib/libm/ieee/support.c (revision 31828)
124581Szliu /*
224581Szliu  * Copyright (c) 1985 Regents of the University of California.
324581Szliu  *
424581Szliu  * Use and reproduction of this software are granted  in  accordance  with
524581Szliu  * the terms and conditions specified in  the  Berkeley  Software  License
624581Szliu  * Agreement (in particular, this entails acknowledgement of the programs'
724581Szliu  * source, and inclusion of this notice) with the additional understanding
824581Szliu  * that  all  recipients  should regard themselves as participants  in  an
924581Szliu  * ongoing  research  project and hence should  feel  obligated  to report
1024581Szliu  * their  experiences (good or bad) with these elementary function  codes,
1124581Szliu  * using "sendbug 4bsd-bugs@BERKELEY", to the authors.
1224581Szliu  */
1324581Szliu 
1424581Szliu #ifndef lint
1524719Selefunt static char sccsid[] =
16*31828Szliu "@(#)support.c	1.1 (Berkeley) 5/23/85; 1.9 (ucb.elefunt) 07/11/87";
1724581Szliu #endif not lint
1824581Szliu 
1924581Szliu /*
20*31828Szliu  * Some IEEE standard 754 recommended functions and remainder and sqrt for
2124581Szliu  * supporting the C elementary functions.
2224581Szliu  ******************************************************************************
2324581Szliu  * WARNING:
2424581Szliu  *      These codes are developed (in double) to support the C elementary
2524581Szliu  * functions temporarily. They are not universal, and some of them are very
2624581Szliu  * slow (in particular, drem and sqrt is extremely inefficient). Each
2724581Szliu  * computer system should have its implementation of these functions using
2824581Szliu  * its own assembler.
2924581Szliu  ******************************************************************************
3024581Szliu  *
31*31828Szliu  * IEEE 754 required operations:
3224581Szliu  *     drem(x,p)
3324581Szliu  *              returns  x REM y  =  x - [x/y]*y , where [x/y] is the integer
3424581Szliu  *              nearest x/y; in half way case, choose the even one.
3524581Szliu  *     sqrt(x)
3624581Szliu  *              returns the square root of x correctly rounded according to
3724581Szliu  *		the rounding mod.
3824581Szliu  *
39*31828Szliu  * IEEE 754 recommended functions:
4024581Szliu  * (a) copysign(x,y)
4124581Szliu  *              returns x with the sign of y.
4224581Szliu  * (b) scalb(x,N)
4324581Szliu  *              returns  x * (2**N), for integer values N.
4424581Szliu  * (c) logb(x)
4524581Szliu  *              returns the unbiased exponent of x, a signed integer in
4624581Szliu  *              double precision, except that logb(0) is -INF, logb(INF)
4724581Szliu  *              is +INF, and logb(NAN) is that NAN.
4824581Szliu  * (d) finite(x)
4924581Szliu  *              returns the value TRUE if -INF < x < +INF and returns
5024581Szliu  *              FALSE otherwise.
5124581Szliu  *
5224581Szliu  *
5324581Szliu  * CODED IN C BY K.C. NG, 11/25/84;
5424581Szliu  * REVISED BY K.C. NG on 1/22/85, 2/13/85, 3/24/85.
5524581Szliu  */
5624581Szliu 
5724581Szliu 
5831789Szliu #if (defined(VAX)||defined(TAHOE))      /* VAX D format */
59*31828Szliu #include <errno.h>
6024581Szliu     static unsigned short msign=0x7fff , mexp =0x7f80 ;
6124581Szliu     static short  prep1=57, gap=7, bias=129           ;
6224581Szliu     static double novf=1.7E38, nunf=3.0E-39, zero=0.0 ;
6324581Szliu #else           /*IEEE double format */
6424581Szliu     static unsigned short msign=0x7fff, mexp =0x7ff0  ;
6524581Szliu     static short prep1=54, gap=4, bias=1023           ;
6624581Szliu     static double novf=1.7E308, nunf=3.0E-308,zero=0.0;
6724581Szliu #endif
6824581Szliu 
6924581Szliu double scalb(x,N)
7024581Szliu double x; int N;
7124581Szliu {
7224581Szliu         int k;
7324581Szliu         double scalb();
7424581Szliu 
7524581Szliu #ifdef NATIONAL
7624581Szliu         unsigned short *px=(unsigned short *) &x + 3;
7731789Szliu #else /* VAX, SUN, ZILOG, TAHOE */
7824581Szliu         unsigned short *px=(unsigned short *) &x;
7924581Szliu #endif
8024581Szliu 
8124581Szliu         if( x == zero )  return(x);
8224581Szliu 
8331789Szliu #if (defined(VAX)||defined(TAHOE))
8424581Szliu         if( (k= *px & mexp ) != ~msign ) {
85*31828Szliu             if (N < -260)
86*31828Szliu 		return(nunf*nunf);
87*31828Szliu 	    else if (N > 260) {
88*31828Szliu 		extern double infnan(),copysign();
89*31828Szliu 		return(copysign(infnan(ERANGE),x));
90*31828Szliu 	    }
9124581Szliu #else   /* IEEE */
9224581Szliu         if( (k= *px & mexp ) != mexp ) {
9324581Szliu             if( N<-2100) return(nunf*nunf); else if(N>2100) return(novf+novf);
9424581Szliu             if( k == 0 ) {
9524581Szliu                  x *= scalb(1.0,(int)prep1);  N -= prep1; return(scalb(x,N));}
9624581Szliu #endif
9724581Szliu 
9824581Szliu             if((k = (k>>gap)+ N) > 0 )
9924581Szliu                 if( k < (mexp>>gap) ) *px = (*px&~mexp) | (k<<gap);
10024581Szliu                 else x=novf+novf;               /* overflow */
10124581Szliu             else
10224581Szliu                 if( k > -prep1 )
10324581Szliu                                         /* gradual underflow */
10424581Szliu                     {*px=(*px&~mexp)|(short)(1<<gap); x *= scalb(1.0,k-1);}
10524581Szliu                 else
10624581Szliu                 return(nunf*nunf);
10724581Szliu             }
10824581Szliu         return(x);
10924581Szliu }
11024581Szliu 
11124581Szliu 
11224581Szliu double copysign(x,y)
11324581Szliu double x,y;
11424581Szliu {
11524581Szliu #ifdef NATIONAL
11624581Szliu         unsigned short  *px=(unsigned short *) &x+3,
11724581Szliu                         *py=(unsigned short *) &y+3;
11831789Szliu #else /* VAX, SUN, ZILOG,TAHOE */
11924581Szliu         unsigned short  *px=(unsigned short *) &x,
12024581Szliu                         *py=(unsigned short *) &y;
12124581Szliu #endif
12224581Szliu 
12331789Szliu #if (defined(VAX)||defined(TAHOE))
12424581Szliu         if ( (*px & mexp) == 0 ) return(x);
12524581Szliu #endif
12624581Szliu 
12724581Szliu         *px = ( *px & msign ) | ( *py & ~msign );
12824581Szliu         return(x);
12924581Szliu }
13024581Szliu 
13124581Szliu double logb(x)
13224581Szliu double x;
13324581Szliu {
13424581Szliu 
13524581Szliu #ifdef NATIONAL
13624581Szliu         short *px=(short *) &x+3, k;
13731789Szliu #else /* VAX, SUN, ZILOG, TAHOE */
13824581Szliu         short *px=(short *) &x, k;
13924581Szliu #endif
14024581Szliu 
14131789Szliu #if (defined(VAX)||defined(TAHOE))
14227449Szliu         return (int)(((*px&mexp)>>gap)-bias);
14324581Szliu #else /* IEEE */
14424581Szliu         if( (k= *px & mexp ) != mexp )
14524581Szliu             if ( k != 0 )
14624581Szliu                 return ( (k>>gap) - bias );
14724581Szliu             else if( x != zero)
14824581Szliu                 return ( -1022.0 );
14924581Szliu             else
15024581Szliu                 return(-(1.0/zero));
15124581Szliu         else if(x != x)
15224581Szliu             return(x);
15324581Szliu         else
15424581Szliu             {*px &= msign; return(x);}
15524581Szliu #endif
15624581Szliu }
15724581Szliu 
15824581Szliu finite(x)
15924581Szliu double x;
16024581Szliu {
16131789Szliu #if (defined(VAX)||defined(TAHOE))
162*31828Szliu         return(1);
16324581Szliu #else  /* IEEE */
16424581Szliu #ifdef NATIONAL
16524581Szliu         return( (*((short *) &x+3 ) & mexp ) != mexp );
16624581Szliu #else /* SUN, ZILOG */
16724581Szliu         return( (*((short *) &x ) & mexp ) != mexp );
16824581Szliu #endif
16924581Szliu #endif
17024581Szliu }
17124581Szliu 
17224581Szliu double drem(x,p)
17324581Szliu double x,p;
17424581Szliu {
17524581Szliu         short sign;
17624581Szliu         double hp,dp,tmp,drem(),scalb();
17724581Szliu         unsigned short  k;
17824581Szliu #ifdef NATIONAL
17924581Szliu         unsigned short
18024581Szliu               *px=(unsigned short *) &x  +3,
18124581Szliu               *pp=(unsigned short *) &p  +3,
18224581Szliu               *pd=(unsigned short *) &dp +3,
18324581Szliu               *pt=(unsigned short *) &tmp+3;
18431789Szliu #else /* VAX, SUN, ZILOG, TAHOE */
18524581Szliu         unsigned short
18624581Szliu               *px=(unsigned short *) &x  ,
18724581Szliu               *pp=(unsigned short *) &p  ,
18824581Szliu               *pd=(unsigned short *) &dp ,
18924581Szliu               *pt=(unsigned short *) &tmp;
19024581Szliu #endif
19124581Szliu 
19224581Szliu         *pp &= msign ;
19324581Szliu 
19431789Szliu #if (defined(VAX)||defined(TAHOE))
195*31828Szliu         if( ( *px & mexp ) == ~msign )	/* is x a reserved operand? */
19624581Szliu #else /* IEEE */
19731827Szliu         if( ( *px & mexp ) == mexp )
19824581Szliu #endif
19931827Szliu 		return  (x-p)-(x-p);	/* create nan if x is inf */
200*31828Szliu 	if (p == zero) {
20131827Szliu #if (defined(VAX)||defined(TAHOE))
202*31828Szliu 		extern double infnan();
203*31828Szliu 		return(infnan(EDOM));
204*31828Szliu #else
205*31828Szliu 		return zero/zero;
206*31828Szliu #endif
207*31828Szliu 	}
208*31828Szliu 
209*31828Szliu #if (defined(VAX)||defined(TAHOE))
210*31828Szliu         if( ( *pp & mexp ) == ~msign )	/* is p a reserved operand? */
21131827Szliu #else /* IEEE */
21231827Szliu         if( ( *pp & mexp ) == mexp )
21331827Szliu #endif
21431827Szliu 		{ if (p != p) return p; else return x;}
21524581Szliu 
21624581Szliu         else  if ( ((*pp & mexp)>>gap) <= 1 )
21724581Szliu                 /* subnormal p, or almost subnormal p */
21824581Szliu             { double b; b=scalb(1.0,(int)prep1);
21924581Szliu               p *= b; x = drem(x,p); x *= b; return(drem(x,p)/b);}
22024581Szliu         else  if ( p >= novf/2)
22124581Szliu             { p /= 2 ; x /= 2; return(drem(x,p)*2);}
22224581Szliu         else
22324581Szliu             {
22424581Szliu                 dp=p+p; hp=p/2;
22524581Szliu                 sign= *px & ~msign ;
22624581Szliu                 *px &= msign       ;
22724581Szliu                 while ( x > dp )
22824581Szliu                     {
22924581Szliu                         k=(*px & mexp) - (*pd & mexp) ;
23024581Szliu                         tmp = dp ;
23124581Szliu                         *pt += k ;
23224581Szliu 
23331789Szliu #if (defined(VAX)||defined(TAHOE))
23424581Szliu                         if( x < tmp ) *pt -= 128 ;
23524581Szliu #else /* IEEE */
23624581Szliu                         if( x < tmp ) *pt -= 16 ;
23724581Szliu #endif
23824581Szliu 
23924581Szliu                         x -= tmp ;
24024581Szliu                     }
24124581Szliu                 if ( x > hp )
24224581Szliu                     { x -= p ;  if ( x >= hp ) x -= p ; }
24324581Szliu 
24431825Szliu #if (defined(VAX)||defined(TAHOE))
24531825Szliu 		if (x)
24631825Szliu #endif
24731825Szliu 			*px ^= sign;
24824581Szliu                 return( x);
24924581Szliu 
25024581Szliu             }
25124581Szliu }
25224581Szliu double sqrt(x)
25324581Szliu double x;
25424581Szliu {
25524581Szliu         double q,s,b,r;
25624581Szliu         double logb(),scalb();
25724581Szliu         double t,zero=0.0;
25824581Szliu         int m,n,i,finite();
25931789Szliu #if (defined(VAX)||defined(TAHOE))
26024581Szliu         int k=54;
26124581Szliu #else   /* IEEE */
26224581Szliu         int k=51;
26324581Szliu #endif
26424581Szliu 
26524581Szliu     /* sqrt(NaN) is NaN, sqrt(+-0) = +-0 */
26624581Szliu         if(x!=x||x==zero) return(x);
26724581Szliu 
26824581Szliu     /* sqrt(negative) is invalid */
269*31828Szliu         if(x<zero) {
270*31828Szliu #if (defined(VAX)||defined(TAHOE))
271*31828Szliu 		extern double infnan();
272*31828Szliu 		return (infnan(EDOM));	/* NaN */
273*31828Szliu #else	/* IEEE double */
274*31828Szliu 		return(zero/zero);
275*31828Szliu #endif
276*31828Szliu 	}
27724581Szliu 
27824581Szliu     /* sqrt(INF) is INF */
27924581Szliu         if(!finite(x)) return(x);
28024581Szliu 
28124581Szliu     /* scale x to [1,4) */
28224581Szliu         n=logb(x);
28324581Szliu         x=scalb(x,-n);
28424581Szliu         if((m=logb(x))!=0) x=scalb(x,-m);       /* subnormal number */
28524581Szliu         m += n;
28624581Szliu         n = m/2;
28724581Szliu         if((n+n)!=m) {x *= 2; m -=1; n=m/2;}
28824581Szliu 
28924581Szliu     /* generate sqrt(x) bit by bit (accumulating in q) */
29024581Szliu             q=1.0; s=4.0; x -= 1.0; r=1;
29124581Szliu             for(i=1;i<=k;i++) {
29224581Szliu                 t=s+1; x *= 4; r /= 2;
29324581Szliu                 if(t<=x) {
29424581Szliu                     s=t+t+2, x -= t; q += r;}
29524581Szliu                 else
29624581Szliu                     s *= 2;
29724581Szliu                 }
29824581Szliu 
29924581Szliu     /* generate the last bit and determine the final rounding */
30024581Szliu             r/=2; x *= 4;
30124581Szliu             if(x==zero) goto end; 100+r; /* trigger inexact flag */
30224581Szliu             if(s<x) {
30324581Szliu                 q+=r; x -=s; s += 2; s *= 2; x *= 4;
30424581Szliu                 t = (x-s)-5;
30524581Szliu                 b=1.0+3*r/4; if(b==1.0) goto end; /* b==1 : Round-to-zero */
30624581Szliu                 b=1.0+r/4;   if(b>1.0) t=1;	/* b>1 : Round-to-(+INF) */
30724581Szliu                 if(t>=0) q+=r; }	      /* else: Round-to-nearest */
30824581Szliu             else {
30924581Szliu                 s *= 2; x *= 4;
31024581Szliu                 t = (x-s)-1;
31124581Szliu                 b=1.0+3*r/4; if(b==1.0) goto end;
31224581Szliu                 b=1.0+r/4;   if(b>1.0) t=1;
31324581Szliu                 if(t>=0) q+=r; }
31424581Szliu 
31524581Szliu end:        return(scalb(q,n));
31624581Szliu }
31724581Szliu 
31824581Szliu #if 0
31924581Szliu /* DREM(X,Y)
32024581Szliu  * RETURN X REM Y =X-N*Y, N=[X/Y] ROUNDED (ROUNDED TO EVEN IN THE HALF WAY CASE)
32124581Szliu  * DOUBLE PRECISION (VAX D format 56 bits, IEEE DOUBLE 53 BITS)
32224581Szliu  * INTENDED FOR ASSEMBLY LANGUAGE
32324581Szliu  * CODED IN C BY K.C. NG, 3/23/85, 4/8/85.
32424581Szliu  *
32524581Szliu  * Warning: this code should not get compiled in unless ALL of
32624581Szliu  * the following machine-dependent routines are supplied.
32724581Szliu  *
32824581Szliu  * Required machine dependent functions (not on a VAX):
32924581Szliu  *     swapINX(i): save inexact flag and reset it to "i"
33024581Szliu  *     swapENI(e): save inexact enable and reset it to "e"
33124581Szliu  */
33224581Szliu 
33324581Szliu double drem(x,y)
33424581Szliu double x,y;
33524581Szliu {
33624581Szliu 
33724581Szliu #ifdef NATIONAL		/* order of words in floating point number */
33824581Szliu 	static n0=3,n1=2,n2=1,n3=0;
33931789Szliu #else /* VAX, SUN, ZILOG, TAHOE */
34024581Szliu 	static n0=0,n1=1,n2=2,n3=3;
34124581Szliu #endif
34224581Szliu 
34324581Szliu     	static unsigned short mexp =0x7ff0, m25 =0x0190, m57 =0x0390;
34424581Szliu 	static double zero=0.0;
34524581Szliu 	double hy,y1,t,t1;
34624581Szliu 	short k;
34724581Szliu 	long n;
34824581Szliu 	int i,e;
34924581Szliu 	unsigned short xexp,yexp, *px  =(unsigned short *) &x  ,
35024581Szliu 	      		nx,nf,	  *py  =(unsigned short *) &y  ,
35124581Szliu 	      		sign,	  *pt  =(unsigned short *) &t  ,
35224581Szliu 	      			  *pt1 =(unsigned short *) &t1 ;
35324581Szliu 
35424581Szliu 	xexp = px[n0] & mexp ;	/* exponent of x */
35524581Szliu 	yexp = py[n0] & mexp ;	/* exponent of y */
35624581Szliu 	sign = px[n0] &0x8000;	/* sign of x     */
35724581Szliu 
35824581Szliu /* return NaN if x is NaN, or y is NaN, or x is INF, or y is zero */
35924581Szliu 	if(x!=x) return(x); if(y!=y) return(y);	     /* x or y is NaN */
36024581Szliu 	if( xexp == mexp )   return(zero/zero);      /* x is INF */
36124581Szliu 	if(y==zero) return(y/y);
36224581Szliu 
36324581Szliu /* save the inexact flag and inexact enable in i and e respectively
36424581Szliu  * and reset them to zero
36524581Szliu  */
36624581Szliu 	i=swapINX(0);	e=swapENI(0);
36724581Szliu 
36824581Szliu /* subnormal number */
36924581Szliu 	nx=0;
37024581Szliu 	if(yexp==0) {t=1.0,pt[n0]+=m57; y*=t; nx=m57;}
37124581Szliu 
37224581Szliu /* if y is tiny (biased exponent <= 57), scale up y to y*2**57 */
37324581Szliu 	if( yexp <= m57 ) {py[n0]+=m57; nx+=m57; yexp+=m57;}
37424581Szliu 
37524581Szliu 	nf=nx;
37624581Szliu 	py[n0] &= 0x7fff;
37724581Szliu 	px[n0] &= 0x7fff;
37824581Szliu 
37924581Szliu /* mask off the least significant 27 bits of y */
38024581Szliu 	t=y; pt[n3]=0; pt[n2]&=0xf800; y1=t;
38124581Szliu 
38224581Szliu /* LOOP: argument reduction on x whenever x > y */
38324581Szliu loop:
38424581Szliu 	while ( x > y )
38524581Szliu 	{
38624581Szliu 	    t=y;
38724581Szliu 	    t1=y1;
38824581Szliu 	    xexp=px[n0]&mexp;	  /* exponent of x */
38924581Szliu 	    k=xexp-yexp-m25;
39024581Szliu 	    if(k>0) 	/* if x/y >= 2**26, scale up y so that x/y < 2**26 */
39124581Szliu 		{pt[n0]+=k;pt1[n0]+=k;}
39224581Szliu 	    n=x/t; x=(x-n*t1)-n*(t-t1);
39324581Szliu 	}
39424581Szliu     /* end while (x > y) */
39524581Szliu 
39624581Szliu 	if(nx!=0) {t=1.0; pt[n0]+=nx; x*=t; nx=0; goto loop;}
39724581Szliu 
39824581Szliu /* final adjustment */
39924581Szliu 
40024581Szliu 	hy=y/2.0;
40124581Szliu 	if(x>hy||((x==hy)&&n%2==1)) x-=y;
40224581Szliu 	px[n0] ^= sign;
40324581Szliu 	if(nf!=0) { t=1.0; pt[n0]-=nf; x*=t;}
40424581Szliu 
40524581Szliu /* restore inexact flag and inexact enable */
40624581Szliu 	swapINX(i); swapENI(e);
40724581Szliu 
40824581Szliu 	return(x);
40924581Szliu }
41024581Szliu #endif
41124581Szliu 
41224581Szliu #if 0
41324581Szliu /* SQRT
41424581Szliu  * RETURN CORRECTLY ROUNDED (ACCORDING TO THE ROUNDING MODE) SQRT
41524581Szliu  * FOR IEEE DOUBLE PRECISION ONLY, INTENDED FOR ASSEMBLY LANGUAGE
41624581Szliu  * CODED IN C BY K.C. NG, 3/22/85.
41724581Szliu  *
41824581Szliu  * Warning: this code should not get compiled in unless ALL of
41924581Szliu  * the following machine-dependent routines are supplied.
42024581Szliu  *
42124581Szliu  * Required machine dependent functions:
42224581Szliu  *     swapINX(i)  ...return the status of INEXACT flag and reset it to "i"
42324581Szliu  *     swapRM(r)   ...return the current Rounding Mode and reset it to "r"
42424581Szliu  *     swapENI(e)  ...return the status of inexact enable and reset it to "e"
42524581Szliu  *     addc(t)     ...perform t=t+1 regarding t as a 64 bit unsigned integer
42624581Szliu  *     subc(t)     ...perform t=t-1 regarding t as a 64 bit unsigned integer
42724581Szliu  */
42824581Szliu 
42924581Szliu static unsigned long table[] = {
43024581Szliu 0, 1204, 3062, 5746, 9193, 13348, 18162, 23592, 29598, 36145, 43202, 50740,
43124581Szliu 58733, 67158, 75992, 85215, 83599, 71378, 60428, 50647, 41945, 34246, 27478,
43224581Szliu 21581, 16499, 12183, 8588, 5674, 3403, 1742, 661, 130, };
43324581Szliu 
43424581Szliu double newsqrt(x)
43524581Szliu double x;
43624581Szliu {
43724581Szliu         double y,z,t,addc(),subc(),b54=134217728.*134217728.; /* b54=2**54 */
43824581Szliu         long mx,scalx,mexp=0x7ff00000;
43924581Szliu         int i,j,r,e,swapINX(),swapRM(),swapENI();
44024581Szliu         unsigned long *py=(unsigned long *) &y   ,
44124581Szliu                       *pt=(unsigned long *) &t   ,
44224581Szliu                       *px=(unsigned long *) &x   ;
44324581Szliu #ifdef NATIONAL         /* ordering of word in a floating point number */
44424581Szliu         int n0=1, n1=0;
44524581Szliu #else
44624581Szliu         int n0=0, n1=1;
44724581Szliu #endif
44824581Szliu /* Rounding Mode:  RN ...round-to-nearest
44924581Szliu  *                 RZ ...round-towards 0
45024581Szliu  *                 RP ...round-towards +INF
45124581Szliu  *		   RM ...round-towards -INF
45224581Szliu  */
45324581Szliu         int RN=0,RZ=1,RP=2,RM=3;/* machine dependent: work on a Zilog Z8070
45424581Szliu                                  * and a National 32081 & 16081
45524581Szliu                                  */
45624581Szliu 
45724581Szliu /* exceptions */
45824581Szliu 	if(x!=x||x==0.0) return(x);  /* sqrt(NaN) is NaN, sqrt(+-0) = +-0 */
45924581Szliu 	if(x<0) return((x-x)/(x-x)); /* sqrt(negative) is invalid */
46024581Szliu         if((mx=px[n0]&mexp)==mexp) return(x);  /* sqrt(+INF) is +INF */
46124581Szliu 
46224581Szliu /* save, reset, initialize */
46324581Szliu         e=swapENI(0);   /* ...save and reset the inexact enable */
46424581Szliu         i=swapINX(0);   /* ...save INEXACT flag */
46524581Szliu         r=swapRM(RN);   /* ...save and reset the Rounding Mode to RN */
46624581Szliu         scalx=0;
46724581Szliu 
46824581Szliu /* subnormal number, scale up x to x*2**54 */
46924581Szliu         if(mx==0) {x *= b54 ; scalx-=0x01b00000;}
47024581Szliu 
47124581Szliu /* scale x to avoid intermediate over/underflow:
47224581Szliu  * if (x > 2**512) x=x/2**512; if (x < 2**-512) x=x*2**512 */
47324581Szliu         if(mx>0x5ff00000) {px[n0] -= 0x20000000; scalx+= 0x10000000;}
47424581Szliu         if(mx<0x1ff00000) {px[n0] += 0x20000000; scalx-= 0x10000000;}
47524581Szliu 
47624581Szliu /* magic initial approximation to almost 8 sig. bits */
47724581Szliu         py[n0]=(px[n0]>>1)+0x1ff80000;
47824581Szliu         py[n0]=py[n0]-table[(py[n0]>>15)&31];
47924581Szliu 
48024581Szliu /* Heron's rule once with correction to improve y to almost 18 sig. bits */
48124581Szliu         t=x/y; y=y+t; py[n0]=py[n0]-0x00100006; py[n1]=0;
48224581Szliu 
48324581Szliu /* triple to almost 56 sig. bits; now y approx. sqrt(x) to within 1 ulp */
48424581Szliu         t=y*y; z=t;  pt[n0]+=0x00100000; t+=z; z=(x-z)*y;
48524581Szliu         t=z/(t+x) ;  pt[n0]+=0x00100000; y+=t;
48624581Szliu 
48724581Szliu /* twiddle last bit to force y correctly rounded */
48824581Szliu         swapRM(RZ);     /* ...set Rounding Mode to round-toward-zero */
48924581Szliu         swapINX(0);     /* ...clear INEXACT flag */
49024581Szliu         swapENI(e);     /* ...restore inexact enable status */
49124581Szliu         t=x/y;          /* ...chopped quotient, possibly inexact */
49224581Szliu         j=swapINX(i);   /* ...read and restore inexact flag */
49324581Szliu         if(j==0) { if(t==y) goto end; else t=subc(t); }  /* ...t=t-ulp */
49424581Szliu         b54+0.1;        /* ..trigger inexact flag, sqrt(x) is inexact */
49524581Szliu         if(r==RN) t=addc(t);            /* ...t=t+ulp */
49624581Szliu         else if(r==RP) { t=addc(t);y=addc(y);}/* ...t=t+ulp;y=y+ulp; */
49724581Szliu         y=y+t;                          /* ...chopped sum */
49824581Szliu         py[n0]=py[n0]-0x00100000;       /* ...correctly rounded sqrt(x) */
49924581Szliu end:    py[n0]=py[n0]+scalx;            /* ...scale back y */
50024581Szliu         swapRM(r);                      /* ...restore Rounding Mode */
50124581Szliu         return(y);
50224581Szliu }
50324581Szliu #endif
504