xref: /csrg-svn/lib/libm/ieee/support.c (revision 35681)
134123Sbostic /*
224581Szliu  * Copyright (c) 1985 Regents of the University of California.
334123Sbostic  * All rights reserved.
434123Sbostic  *
534123Sbostic  * Redistribution and use in source and binary forms are permitted
634925Sbostic  * provided that the above copyright notice and this paragraph are
734925Sbostic  * duplicated in all such forms and that any documentation,
834925Sbostic  * advertising materials, and other materials related to such
934925Sbostic  * distribution and use acknowledge that the software was developed
1034925Sbostic  * by the University of California, Berkeley.  The name of the
1134925Sbostic  * University may not be used to endorse or promote products derived
1234925Sbostic  * from this software without specific prior written permission.
1334925Sbostic  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
1434925Sbostic  * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
1534925Sbostic  * WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
1634123Sbostic  *
1734123Sbostic  * All recipients should regard themselves as participants in an ongoing
1834123Sbostic  * research project and hence should feel obligated to report their
1934123Sbostic  * experiences (good or bad) with these elementary function codes, using
2034123Sbostic  * the sendbug(8) program, to the authors.
2124581Szliu  */
2224581Szliu 
2324581Szliu #ifndef lint
24*35681Sbostic static char sccsid[] = "@(#)support.c	5.4 (Berkeley) 09/22/88";
2534123Sbostic #endif /* not lint */
2624581Szliu 
2724581Szliu /*
2831828Szliu  * Some IEEE standard 754 recommended functions and remainder and sqrt for
2924581Szliu  * supporting the C elementary functions.
3024581Szliu  ******************************************************************************
3124581Szliu  * WARNING:
3224581Szliu  *      These codes are developed (in double) to support the C elementary
3324581Szliu  * functions temporarily. They are not universal, and some of them are very
3424581Szliu  * slow (in particular, drem and sqrt is extremely inefficient). Each
3524581Szliu  * computer system should have its implementation of these functions using
3624581Szliu  * its own assembler.
3724581Szliu  ******************************************************************************
3824581Szliu  *
3931828Szliu  * IEEE 754 required operations:
4024581Szliu  *     drem(x,p)
4124581Szliu  *              returns  x REM y  =  x - [x/y]*y , where [x/y] is the integer
4224581Szliu  *              nearest x/y; in half way case, choose the even one.
4324581Szliu  *     sqrt(x)
4424581Szliu  *              returns the square root of x correctly rounded according to
4524581Szliu  *		the rounding mod.
4624581Szliu  *
4731828Szliu  * IEEE 754 recommended functions:
4824581Szliu  * (a) copysign(x,y)
4924581Szliu  *              returns x with the sign of y.
5024581Szliu  * (b) scalb(x,N)
5124581Szliu  *              returns  x * (2**N), for integer values N.
5224581Szliu  * (c) logb(x)
5324581Szliu  *              returns the unbiased exponent of x, a signed integer in
5424581Szliu  *              double precision, except that logb(0) is -INF, logb(INF)
5524581Szliu  *              is +INF, and logb(NAN) is that NAN.
5624581Szliu  * (d) finite(x)
5724581Szliu  *              returns the value TRUE if -INF < x < +INF and returns
5824581Szliu  *              FALSE otherwise.
5924581Szliu  *
6024581Szliu  *
6124581Szliu  * CODED IN C BY K.C. NG, 11/25/84;
6224581Szliu  * REVISED BY K.C. NG on 1/22/85, 2/13/85, 3/24/85.
6324581Szliu  */
6424581Szliu 
65*35681Sbostic #include "mathimpl.h"
6624581Szliu 
6731856Szliu #if defined(vax)||defined(tahoe)      /* VAX D format */
6831828Szliu #include <errno.h>
69*35681Sbostic     static const unsigned short msign=0x7fff , mexp =0x7f80 ;
70*35681Sbostic     static const short  prep1=57, gap=7, bias=129           ;
71*35681Sbostic     static const double novf=1.7E38, nunf=3.0E-39, zero=0.0 ;
7231856Szliu #else	/* defined(vax)||defined(tahoe) */
73*35681Sbostic     static const unsigned short msign=0x7fff, mexp =0x7ff0  ;
74*35681Sbostic     static const short prep1=54, gap=4, bias=1023           ;
75*35681Sbostic     static const double novf=1.7E308, nunf=3.0E-308,zero=0.0;
7631856Szliu #endif	/* defined(vax)||defined(tahoe) */
7724581Szliu 
7824581Szliu double scalb(x,N)
7924581Szliu double x; int N;
8024581Szliu {
8124581Szliu         int k;
8224581Szliu 
8331856Szliu #ifdef national
8424581Szliu         unsigned short *px=(unsigned short *) &x + 3;
8531856Szliu #else	/* national */
8624581Szliu         unsigned short *px=(unsigned short *) &x;
8731856Szliu #endif	/* national */
8824581Szliu 
8924581Szliu         if( x == zero )  return(x);
9024581Szliu 
9131856Szliu #if defined(vax)||defined(tahoe)
9224581Szliu         if( (k= *px & mexp ) != ~msign ) {
9331828Szliu             if (N < -260)
9431828Szliu 		return(nunf*nunf);
9531828Szliu 	    else if (N > 260) {
9631828Szliu 		return(copysign(infnan(ERANGE),x));
9731828Szliu 	    }
9831856Szliu #else	/* defined(vax)||defined(tahoe) */
9924581Szliu         if( (k= *px & mexp ) != mexp ) {
10024581Szliu             if( N<-2100) return(nunf*nunf); else if(N>2100) return(novf+novf);
10124581Szliu             if( k == 0 ) {
10224581Szliu                  x *= scalb(1.0,(int)prep1);  N -= prep1; return(scalb(x,N));}
10331856Szliu #endif	/* defined(vax)||defined(tahoe) */
10424581Szliu 
10524581Szliu             if((k = (k>>gap)+ N) > 0 )
10624581Szliu                 if( k < (mexp>>gap) ) *px = (*px&~mexp) | (k<<gap);
10724581Szliu                 else x=novf+novf;               /* overflow */
10824581Szliu             else
10924581Szliu                 if( k > -prep1 )
11024581Szliu                                         /* gradual underflow */
11124581Szliu                     {*px=(*px&~mexp)|(short)(1<<gap); x *= scalb(1.0,k-1);}
11224581Szliu                 else
11324581Szliu                 return(nunf*nunf);
11424581Szliu             }
11524581Szliu         return(x);
11624581Szliu }
11724581Szliu 
11824581Szliu 
11924581Szliu double copysign(x,y)
12024581Szliu double x,y;
12124581Szliu {
12231856Szliu #ifdef national
12324581Szliu         unsigned short  *px=(unsigned short *) &x+3,
12424581Szliu                         *py=(unsigned short *) &y+3;
12531856Szliu #else	/* national */
12624581Szliu         unsigned short  *px=(unsigned short *) &x,
12724581Szliu                         *py=(unsigned short *) &y;
12831856Szliu #endif	/* national */
12924581Szliu 
13031856Szliu #if defined(vax)||defined(tahoe)
13124581Szliu         if ( (*px & mexp) == 0 ) return(x);
13231856Szliu #endif	/* defined(vax)||defined(tahoe) */
13324581Szliu 
13424581Szliu         *px = ( *px & msign ) | ( *py & ~msign );
13524581Szliu         return(x);
13624581Szliu }
13724581Szliu 
13824581Szliu double logb(x)
13924581Szliu double x;
14024581Szliu {
14124581Szliu 
14231856Szliu #ifdef national
14324581Szliu         short *px=(short *) &x+3, k;
14431856Szliu #else	/* national */
14524581Szliu         short *px=(short *) &x, k;
14631856Szliu #endif	/* national */
14724581Szliu 
14831856Szliu #if defined(vax)||defined(tahoe)
14927449Szliu         return (int)(((*px&mexp)>>gap)-bias);
15031856Szliu #else	/* defined(vax)||defined(tahoe) */
15124581Szliu         if( (k= *px & mexp ) != mexp )
15224581Szliu             if ( k != 0 )
15324581Szliu                 return ( (k>>gap) - bias );
15424581Szliu             else if( x != zero)
15524581Szliu                 return ( -1022.0 );
15624581Szliu             else
15724581Szliu                 return(-(1.0/zero));
15824581Szliu         else if(x != x)
15924581Szliu             return(x);
16024581Szliu         else
16124581Szliu             {*px &= msign; return(x);}
16231856Szliu #endif	/* defined(vax)||defined(tahoe) */
16324581Szliu }
16424581Szliu 
16524581Szliu finite(x)
16624581Szliu double x;
16724581Szliu {
16831856Szliu #if defined(vax)||defined(tahoe)
16931828Szliu         return(1);
17031856Szliu #else	/* defined(vax)||defined(tahoe) */
17131856Szliu #ifdef national
17224581Szliu         return( (*((short *) &x+3 ) & mexp ) != mexp );
17331856Szliu #else	/* national */
17424581Szliu         return( (*((short *) &x ) & mexp ) != mexp );
17531856Szliu #endif	/* national */
17631856Szliu #endif	/* defined(vax)||defined(tahoe) */
17724581Szliu }
17824581Szliu 
17924581Szliu double drem(x,p)
18024581Szliu double x,p;
18124581Szliu {
18224581Szliu         short sign;
183*35681Sbostic         double hp,dp,tmp;
18424581Szliu         unsigned short  k;
18531856Szliu #ifdef national
18624581Szliu         unsigned short
18724581Szliu               *px=(unsigned short *) &x  +3,
18824581Szliu               *pp=(unsigned short *) &p  +3,
18924581Szliu               *pd=(unsigned short *) &dp +3,
19024581Szliu               *pt=(unsigned short *) &tmp+3;
19131856Szliu #else	/* national */
19224581Szliu         unsigned short
19324581Szliu               *px=(unsigned short *) &x  ,
19424581Szliu               *pp=(unsigned short *) &p  ,
19524581Szliu               *pd=(unsigned short *) &dp ,
19624581Szliu               *pt=(unsigned short *) &tmp;
19731856Szliu #endif	/* national */
19824581Szliu 
19924581Szliu         *pp &= msign ;
20024581Szliu 
20131856Szliu #if defined(vax)||defined(tahoe)
20231828Szliu         if( ( *px & mexp ) == ~msign )	/* is x a reserved operand? */
20331856Szliu #else	/* defined(vax)||defined(tahoe) */
20431827Szliu         if( ( *px & mexp ) == mexp )
20531856Szliu #endif	/* defined(vax)||defined(tahoe) */
20631827Szliu 		return  (x-p)-(x-p);	/* create nan if x is inf */
20731828Szliu 	if (p == zero) {
20831856Szliu #if defined(vax)||defined(tahoe)
20931828Szliu 		return(infnan(EDOM));
21031856Szliu #else	/* defined(vax)||defined(tahoe) */
21131828Szliu 		return zero/zero;
21231856Szliu #endif	/* defined(vax)||defined(tahoe) */
21331828Szliu 	}
21431828Szliu 
21531856Szliu #if defined(vax)||defined(tahoe)
21631828Szliu         if( ( *pp & mexp ) == ~msign )	/* is p a reserved operand? */
21731856Szliu #else	/* defined(vax)||defined(tahoe) */
21831827Szliu         if( ( *pp & mexp ) == mexp )
21931856Szliu #endif	/* defined(vax)||defined(tahoe) */
22031827Szliu 		{ if (p != p) return p; else return x;}
22124581Szliu 
22224581Szliu         else  if ( ((*pp & mexp)>>gap) <= 1 )
22324581Szliu                 /* subnormal p, or almost subnormal p */
22424581Szliu             { double b; b=scalb(1.0,(int)prep1);
22524581Szliu               p *= b; x = drem(x,p); x *= b; return(drem(x,p)/b);}
22624581Szliu         else  if ( p >= novf/2)
22724581Szliu             { p /= 2 ; x /= 2; return(drem(x,p)*2);}
22824581Szliu         else
22924581Szliu             {
23024581Szliu                 dp=p+p; hp=p/2;
23124581Szliu                 sign= *px & ~msign ;
23224581Szliu                 *px &= msign       ;
23324581Szliu                 while ( x > dp )
23424581Szliu                     {
23524581Szliu                         k=(*px & mexp) - (*pd & mexp) ;
23624581Szliu                         tmp = dp ;
23724581Szliu                         *pt += k ;
23824581Szliu 
23931856Szliu #if defined(vax)||defined(tahoe)
24024581Szliu                         if( x < tmp ) *pt -= 128 ;
24131856Szliu #else	/* defined(vax)||defined(tahoe) */
24224581Szliu                         if( x < tmp ) *pt -= 16 ;
24331856Szliu #endif	/* defined(vax)||defined(tahoe) */
24424581Szliu 
24524581Szliu                         x -= tmp ;
24624581Szliu                     }
24724581Szliu                 if ( x > hp )
24824581Szliu                     { x -= p ;  if ( x >= hp ) x -= p ; }
24924581Szliu 
25031856Szliu #if defined(vax)||defined(tahoe)
25131825Szliu 		if (x)
25231856Szliu #endif	/* defined(vax)||defined(tahoe) */
25331825Szliu 			*px ^= sign;
25424581Szliu                 return( x);
25524581Szliu 
25624581Szliu             }
25724581Szliu }
258*35681Sbostic 
259*35681Sbostic 
26024581Szliu double sqrt(x)
26124581Szliu double x;
26224581Szliu {
26324581Szliu         double q,s,b,r;
264*35681Sbostic         double t;
265*35681Sbostic 	double const zero=0.0;
266*35681Sbostic         int m,n,i;
26731856Szliu #if defined(vax)||defined(tahoe)
26824581Szliu         int k=54;
26931856Szliu #else	/* defined(vax)||defined(tahoe) */
27024581Szliu         int k=51;
27131856Szliu #endif	/* defined(vax)||defined(tahoe) */
27224581Szliu 
27324581Szliu     /* sqrt(NaN) is NaN, sqrt(+-0) = +-0 */
27424581Szliu         if(x!=x||x==zero) return(x);
27524581Szliu 
27624581Szliu     /* sqrt(negative) is invalid */
27731828Szliu         if(x<zero) {
27831856Szliu #if defined(vax)||defined(tahoe)
27931828Szliu 		return (infnan(EDOM));	/* NaN */
28031856Szliu #else	/* defined(vax)||defined(tahoe) */
28131828Szliu 		return(zero/zero);
28231856Szliu #endif	/* defined(vax)||defined(tahoe) */
28331828Szliu 	}
28424581Szliu 
28524581Szliu     /* sqrt(INF) is INF */
28624581Szliu         if(!finite(x)) return(x);
28724581Szliu 
28824581Szliu     /* scale x to [1,4) */
28924581Szliu         n=logb(x);
29024581Szliu         x=scalb(x,-n);
29124581Szliu         if((m=logb(x))!=0) x=scalb(x,-m);       /* subnormal number */
29224581Szliu         m += n;
29324581Szliu         n = m/2;
29424581Szliu         if((n+n)!=m) {x *= 2; m -=1; n=m/2;}
29524581Szliu 
29624581Szliu     /* generate sqrt(x) bit by bit (accumulating in q) */
29724581Szliu             q=1.0; s=4.0; x -= 1.0; r=1;
29824581Szliu             for(i=1;i<=k;i++) {
29924581Szliu                 t=s+1; x *= 4; r /= 2;
30024581Szliu                 if(t<=x) {
30124581Szliu                     s=t+t+2, x -= t; q += r;}
30224581Szliu                 else
30324581Szliu                     s *= 2;
30424581Szliu                 }
30524581Szliu 
30624581Szliu     /* generate the last bit and determine the final rounding */
30724581Szliu             r/=2; x *= 4;
30824581Szliu             if(x==zero) goto end; 100+r; /* trigger inexact flag */
30924581Szliu             if(s<x) {
31024581Szliu                 q+=r; x -=s; s += 2; s *= 2; x *= 4;
31124581Szliu                 t = (x-s)-5;
31224581Szliu                 b=1.0+3*r/4; if(b==1.0) goto end; /* b==1 : Round-to-zero */
31324581Szliu                 b=1.0+r/4;   if(b>1.0) t=1;	/* b>1 : Round-to-(+INF) */
31424581Szliu                 if(t>=0) q+=r; }	      /* else: Round-to-nearest */
31524581Szliu             else {
31624581Szliu                 s *= 2; x *= 4;
31724581Szliu                 t = (x-s)-1;
31824581Szliu                 b=1.0+3*r/4; if(b==1.0) goto end;
31924581Szliu                 b=1.0+r/4;   if(b>1.0) t=1;
32024581Szliu                 if(t>=0) q+=r; }
32124581Szliu 
32224581Szliu end:        return(scalb(q,n));
32324581Szliu }
32424581Szliu 
32524581Szliu #if 0
32624581Szliu /* DREM(X,Y)
32724581Szliu  * RETURN X REM Y =X-N*Y, N=[X/Y] ROUNDED (ROUNDED TO EVEN IN THE HALF WAY CASE)
32824581Szliu  * DOUBLE PRECISION (VAX D format 56 bits, IEEE DOUBLE 53 BITS)
32924581Szliu  * INTENDED FOR ASSEMBLY LANGUAGE
33024581Szliu  * CODED IN C BY K.C. NG, 3/23/85, 4/8/85.
33124581Szliu  *
33224581Szliu  * Warning: this code should not get compiled in unless ALL of
33324581Szliu  * the following machine-dependent routines are supplied.
33424581Szliu  *
33524581Szliu  * Required machine dependent functions (not on a VAX):
33624581Szliu  *     swapINX(i): save inexact flag and reset it to "i"
33724581Szliu  *     swapENI(e): save inexact enable and reset it to "e"
33824581Szliu  */
33924581Szliu 
34024581Szliu double drem(x,y)
34124581Szliu double x,y;
34224581Szliu {
34324581Szliu 
34431856Szliu #ifdef national		/* order of words in floating point number */
345*35681Sbostic 	static const n0=3,n1=2,n2=1,n3=0;
34631789Szliu #else /* VAX, SUN, ZILOG, TAHOE */
347*35681Sbostic 	static const n0=0,n1=1,n2=2,n3=3;
34824581Szliu #endif
34924581Szliu 
350*35681Sbostic     	static const unsigned short mexp =0x7ff0, m25 =0x0190, m57 =0x0390;
351*35681Sbostic 	static const double zero=0.0;
35224581Szliu 	double hy,y1,t,t1;
35324581Szliu 	short k;
35424581Szliu 	long n;
35524581Szliu 	int i,e;
35624581Szliu 	unsigned short xexp,yexp, *px  =(unsigned short *) &x  ,
35724581Szliu 	      		nx,nf,	  *py  =(unsigned short *) &y  ,
35824581Szliu 	      		sign,	  *pt  =(unsigned short *) &t  ,
35924581Szliu 	      			  *pt1 =(unsigned short *) &t1 ;
36024581Szliu 
36124581Szliu 	xexp = px[n0] & mexp ;	/* exponent of x */
36224581Szliu 	yexp = py[n0] & mexp ;	/* exponent of y */
36324581Szliu 	sign = px[n0] &0x8000;	/* sign of x     */
36424581Szliu 
36524581Szliu /* return NaN if x is NaN, or y is NaN, or x is INF, or y is zero */
36624581Szliu 	if(x!=x) return(x); if(y!=y) return(y);	     /* x or y is NaN */
36724581Szliu 	if( xexp == mexp )   return(zero/zero);      /* x is INF */
36824581Szliu 	if(y==zero) return(y/y);
36924581Szliu 
37024581Szliu /* save the inexact flag and inexact enable in i and e respectively
37124581Szliu  * and reset them to zero
37224581Szliu  */
37324581Szliu 	i=swapINX(0);	e=swapENI(0);
37424581Szliu 
37524581Szliu /* subnormal number */
37624581Szliu 	nx=0;
37724581Szliu 	if(yexp==0) {t=1.0,pt[n0]+=m57; y*=t; nx=m57;}
37824581Szliu 
37924581Szliu /* if y is tiny (biased exponent <= 57), scale up y to y*2**57 */
38024581Szliu 	if( yexp <= m57 ) {py[n0]+=m57; nx+=m57; yexp+=m57;}
38124581Szliu 
38224581Szliu 	nf=nx;
38324581Szliu 	py[n0] &= 0x7fff;
38424581Szliu 	px[n0] &= 0x7fff;
38524581Szliu 
38624581Szliu /* mask off the least significant 27 bits of y */
38724581Szliu 	t=y; pt[n3]=0; pt[n2]&=0xf800; y1=t;
38824581Szliu 
38924581Szliu /* LOOP: argument reduction on x whenever x > y */
39024581Szliu loop:
39124581Szliu 	while ( x > y )
39224581Szliu 	{
39324581Szliu 	    t=y;
39424581Szliu 	    t1=y1;
39524581Szliu 	    xexp=px[n0]&mexp;	  /* exponent of x */
39624581Szliu 	    k=xexp-yexp-m25;
39724581Szliu 	    if(k>0) 	/* if x/y >= 2**26, scale up y so that x/y < 2**26 */
39824581Szliu 		{pt[n0]+=k;pt1[n0]+=k;}
39924581Szliu 	    n=x/t; x=(x-n*t1)-n*(t-t1);
40024581Szliu 	}
40124581Szliu     /* end while (x > y) */
40224581Szliu 
40324581Szliu 	if(nx!=0) {t=1.0; pt[n0]+=nx; x*=t; nx=0; goto loop;}
40424581Szliu 
40524581Szliu /* final adjustment */
40624581Szliu 
40724581Szliu 	hy=y/2.0;
40824581Szliu 	if(x>hy||((x==hy)&&n%2==1)) x-=y;
40924581Szliu 	px[n0] ^= sign;
41024581Szliu 	if(nf!=0) { t=1.0; pt[n0]-=nf; x*=t;}
41124581Szliu 
41224581Szliu /* restore inexact flag and inexact enable */
41324581Szliu 	swapINX(i); swapENI(e);
41424581Szliu 
41524581Szliu 	return(x);
41624581Szliu }
41724581Szliu #endif
41824581Szliu 
41924581Szliu #if 0
42024581Szliu /* SQRT
42124581Szliu  * RETURN CORRECTLY ROUNDED (ACCORDING TO THE ROUNDING MODE) SQRT
42224581Szliu  * FOR IEEE DOUBLE PRECISION ONLY, INTENDED FOR ASSEMBLY LANGUAGE
42324581Szliu  * CODED IN C BY K.C. NG, 3/22/85.
42424581Szliu  *
42524581Szliu  * Warning: this code should not get compiled in unless ALL of
42624581Szliu  * the following machine-dependent routines are supplied.
42724581Szliu  *
42824581Szliu  * Required machine dependent functions:
42924581Szliu  *     swapINX(i)  ...return the status of INEXACT flag and reset it to "i"
43024581Szliu  *     swapRM(r)   ...return the current Rounding Mode and reset it to "r"
43124581Szliu  *     swapENI(e)  ...return the status of inexact enable and reset it to "e"
43224581Szliu  *     addc(t)     ...perform t=t+1 regarding t as a 64 bit unsigned integer
43324581Szliu  *     subc(t)     ...perform t=t-1 regarding t as a 64 bit unsigned integer
43424581Szliu  */
43524581Szliu 
436*35681Sbostic static const unsigned long table[] = {
43724581Szliu 0, 1204, 3062, 5746, 9193, 13348, 18162, 23592, 29598, 36145, 43202, 50740,
43824581Szliu 58733, 67158, 75992, 85215, 83599, 71378, 60428, 50647, 41945, 34246, 27478,
43924581Szliu 21581, 16499, 12183, 8588, 5674, 3403, 1742, 661, 130, };
44024581Szliu 
44124581Szliu double newsqrt(x)
44224581Szliu double x;
44324581Szliu {
444*35681Sbostic         double y,z,t,addc(),subc()
445*35681Sbostic 	double const b54=134217728.*134217728.; /* b54=2**54 */
446*35681Sbostic         long mx,scalx;
447*35681Sbostic 	long const mexp=0x7ff00000;
44824581Szliu         int i,j,r,e,swapINX(),swapRM(),swapENI();
44924581Szliu         unsigned long *py=(unsigned long *) &y   ,
45024581Szliu                       *pt=(unsigned long *) &t   ,
45124581Szliu                       *px=(unsigned long *) &x   ;
45231856Szliu #ifdef national         /* ordering of word in a floating point number */
453*35681Sbostic         const int n0=1, n1=0;
45424581Szliu #else
455*35681Sbostic         const int n0=0, n1=1;
45624581Szliu #endif
45724581Szliu /* Rounding Mode:  RN ...round-to-nearest
45824581Szliu  *                 RZ ...round-towards 0
45924581Szliu  *                 RP ...round-towards +INF
46024581Szliu  *		   RM ...round-towards -INF
46124581Szliu  */
462*35681Sbostic         const int RN=0,RZ=1,RP=2,RM=3;
463*35681Sbostic 				/* machine dependent: work on a Zilog Z8070
46424581Szliu                                  * and a National 32081 & 16081
46524581Szliu                                  */
46624581Szliu 
46724581Szliu /* exceptions */
46824581Szliu 	if(x!=x||x==0.0) return(x);  /* sqrt(NaN) is NaN, sqrt(+-0) = +-0 */
46924581Szliu 	if(x<0) return((x-x)/(x-x)); /* sqrt(negative) is invalid */
47024581Szliu         if((mx=px[n0]&mexp)==mexp) return(x);  /* sqrt(+INF) is +INF */
47124581Szliu 
47224581Szliu /* save, reset, initialize */
47324581Szliu         e=swapENI(0);   /* ...save and reset the inexact enable */
47424581Szliu         i=swapINX(0);   /* ...save INEXACT flag */
47524581Szliu         r=swapRM(RN);   /* ...save and reset the Rounding Mode to RN */
47624581Szliu         scalx=0;
47724581Szliu 
47824581Szliu /* subnormal number, scale up x to x*2**54 */
47924581Szliu         if(mx==0) {x *= b54 ; scalx-=0x01b00000;}
48024581Szliu 
48124581Szliu /* scale x to avoid intermediate over/underflow:
48224581Szliu  * if (x > 2**512) x=x/2**512; if (x < 2**-512) x=x*2**512 */
48324581Szliu         if(mx>0x5ff00000) {px[n0] -= 0x20000000; scalx+= 0x10000000;}
48424581Szliu         if(mx<0x1ff00000) {px[n0] += 0x20000000; scalx-= 0x10000000;}
48524581Szliu 
48624581Szliu /* magic initial approximation to almost 8 sig. bits */
48724581Szliu         py[n0]=(px[n0]>>1)+0x1ff80000;
48824581Szliu         py[n0]=py[n0]-table[(py[n0]>>15)&31];
48924581Szliu 
49024581Szliu /* Heron's rule once with correction to improve y to almost 18 sig. bits */
49124581Szliu         t=x/y; y=y+t; py[n0]=py[n0]-0x00100006; py[n1]=0;
49224581Szliu 
49324581Szliu /* triple to almost 56 sig. bits; now y approx. sqrt(x) to within 1 ulp */
49424581Szliu         t=y*y; z=t;  pt[n0]+=0x00100000; t+=z; z=(x-z)*y;
49524581Szliu         t=z/(t+x) ;  pt[n0]+=0x00100000; y+=t;
49624581Szliu 
49724581Szliu /* twiddle last bit to force y correctly rounded */
49824581Szliu         swapRM(RZ);     /* ...set Rounding Mode to round-toward-zero */
49924581Szliu         swapINX(0);     /* ...clear INEXACT flag */
50024581Szliu         swapENI(e);     /* ...restore inexact enable status */
50124581Szliu         t=x/y;          /* ...chopped quotient, possibly inexact */
50224581Szliu         j=swapINX(i);   /* ...read and restore inexact flag */
50324581Szliu         if(j==0) { if(t==y) goto end; else t=subc(t); }  /* ...t=t-ulp */
50424581Szliu         b54+0.1;        /* ..trigger inexact flag, sqrt(x) is inexact */
50524581Szliu         if(r==RN) t=addc(t);            /* ...t=t+ulp */
50624581Szliu         else if(r==RP) { t=addc(t);y=addc(y);}/* ...t=t+ulp;y=y+ulp; */
50724581Szliu         y=y+t;                          /* ...chopped sum */
50824581Szliu         py[n0]=py[n0]-0x00100000;       /* ...correctly rounded sqrt(x) */
50924581Szliu end:    py[n0]=py[n0]+scalx;            /* ...scale back y */
51024581Szliu         swapRM(r);                      /* ...restore Rounding Mode */
51124581Szliu         return(y);
51224581Szliu }
51324581Szliu #endif
514