124581Szliu /* 224581Szliu * Copyright (c) 1985 Regents of the University of California. 324581Szliu * 424581Szliu * Use and reproduction of this software are granted in accordance with 524581Szliu * the terms and conditions specified in the Berkeley Software License 624581Szliu * Agreement (in particular, this entails acknowledgement of the programs' 724581Szliu * source, and inclusion of this notice) with the additional understanding 824581Szliu * that all recipients should regard themselves as participants in an 924581Szliu * ongoing research project and hence should feel obligated to report 1024581Szliu * their experiences (good or bad) with these elementary function codes, 1124581Szliu * using "sendbug 4bsd-bugs@BERKELEY", to the authors. 1224581Szliu */ 1324581Szliu 1424581Szliu #ifndef lint 1524719Selefunt static char sccsid[] = 16*27449Szliu "@(#)support.c 1.1 (Berkeley) 5/23/85; 1.3 (ucb.elefunt) 04/26/86"; 1724581Szliu #endif not lint 1824581Szliu 1924581Szliu /* 2024581Szliu * Some IEEE standard p754 recommended functions and remainder and sqrt for 2124581Szliu * supporting the C elementary functions. 2224581Szliu ****************************************************************************** 2324581Szliu * WARNING: 2424581Szliu * These codes are developed (in double) to support the C elementary 2524581Szliu * functions temporarily. They are not universal, and some of them are very 2624581Szliu * slow (in particular, drem and sqrt is extremely inefficient). Each 2724581Szliu * computer system should have its implementation of these functions using 2824581Szliu * its own assembler. 2924581Szliu ****************************************************************************** 3024581Szliu * 3124581Szliu * IEEE p754 required operations: 3224581Szliu * drem(x,p) 3324581Szliu * returns x REM y = x - [x/y]*y , where [x/y] is the integer 3424581Szliu * nearest x/y; in half way case, choose the even one. 3524581Szliu * sqrt(x) 3624581Szliu * returns the square root of x correctly rounded according to 3724581Szliu * the rounding mod. 3824581Szliu * 3924581Szliu * IEEE p754 recommended functions: 4024581Szliu * (a) copysign(x,y) 4124581Szliu * returns x with the sign of y. 4224581Szliu * (b) scalb(x,N) 4324581Szliu * returns x * (2**N), for integer values N. 4424581Szliu * (c) logb(x) 4524581Szliu * returns the unbiased exponent of x, a signed integer in 4624581Szliu * double precision, except that logb(0) is -INF, logb(INF) 4724581Szliu * is +INF, and logb(NAN) is that NAN. 4824581Szliu * (d) finite(x) 4924581Szliu * returns the value TRUE if -INF < x < +INF and returns 5024581Szliu * FALSE otherwise. 5124581Szliu * 5224581Szliu * 5324581Szliu * CODED IN C BY K.C. NG, 11/25/84; 5424581Szliu * REVISED BY K.C. NG on 1/22/85, 2/13/85, 3/24/85. 5524581Szliu */ 5624581Szliu 5724581Szliu 5824581Szliu #ifdef VAX /* VAX D format */ 5924581Szliu static unsigned short msign=0x7fff , mexp =0x7f80 ; 6024581Szliu static short prep1=57, gap=7, bias=129 ; 6124581Szliu static double novf=1.7E38, nunf=3.0E-39, zero=0.0 ; 6224581Szliu #else /*IEEE double format */ 6324581Szliu static unsigned short msign=0x7fff, mexp =0x7ff0 ; 6424581Szliu static short prep1=54, gap=4, bias=1023 ; 6524581Szliu static double novf=1.7E308, nunf=3.0E-308,zero=0.0; 6624581Szliu #endif 6724581Szliu 6824581Szliu double scalb(x,N) 6924581Szliu double x; int N; 7024581Szliu { 7124581Szliu int k; 7224581Szliu double scalb(); 7324581Szliu 7424581Szliu #ifdef NATIONAL 7524581Szliu unsigned short *px=(unsigned short *) &x + 3; 7624581Szliu #else /* VAX, SUN, ZILOG */ 7724581Szliu unsigned short *px=(unsigned short *) &x; 7824581Szliu #endif 7924581Szliu 8024581Szliu if( x == zero ) return(x); 8124581Szliu 8224581Szliu #ifdef VAX 8324581Szliu if( (k= *px & mexp ) != ~msign ) { 8424581Szliu if( N<-260) return(nunf*nunf); else if(N>260) return(novf+novf); 8524581Szliu #else /* IEEE */ 8624581Szliu if( (k= *px & mexp ) != mexp ) { 8724581Szliu if( N<-2100) return(nunf*nunf); else if(N>2100) return(novf+novf); 8824581Szliu if( k == 0 ) { 8924581Szliu x *= scalb(1.0,(int)prep1); N -= prep1; return(scalb(x,N));} 9024581Szliu #endif 9124581Szliu 9224581Szliu if((k = (k>>gap)+ N) > 0 ) 9324581Szliu if( k < (mexp>>gap) ) *px = (*px&~mexp) | (k<<gap); 9424581Szliu else x=novf+novf; /* overflow */ 9524581Szliu else 9624581Szliu if( k > -prep1 ) 9724581Szliu /* gradual underflow */ 9824581Szliu {*px=(*px&~mexp)|(short)(1<<gap); x *= scalb(1.0,k-1);} 9924581Szliu else 10024581Szliu return(nunf*nunf); 10124581Szliu } 10224581Szliu return(x); 10324581Szliu } 10424581Szliu 10524581Szliu 10624581Szliu double copysign(x,y) 10724581Szliu double x,y; 10824581Szliu { 10924581Szliu #ifdef NATIONAL 11024581Szliu unsigned short *px=(unsigned short *) &x+3, 11124581Szliu *py=(unsigned short *) &y+3; 11224581Szliu #else /* VAX, SUN, ZILOG */ 11324581Szliu unsigned short *px=(unsigned short *) &x, 11424581Szliu *py=(unsigned short *) &y; 11524581Szliu #endif 11624581Szliu 11724581Szliu #ifdef VAX 11824581Szliu if ( (*px & mexp) == 0 ) return(x); 11924581Szliu #endif 12024581Szliu 12124581Szliu *px = ( *px & msign ) | ( *py & ~msign ); 12224581Szliu return(x); 12324581Szliu } 12424581Szliu 12524581Szliu double logb(x) 12624581Szliu double x; 12724581Szliu { 12824581Szliu 12924581Szliu #ifdef NATIONAL 13024581Szliu short *px=(short *) &x+3, k; 13124581Szliu #else /* VAX, SUN, ZILOG */ 13224581Szliu short *px=(short *) &x, k; 13324581Szliu #endif 13424581Szliu 13524581Szliu #ifdef VAX 136*27449Szliu return (int)(((*px&mexp)>>gap)-bias); 13724581Szliu #else /* IEEE */ 13824581Szliu if( (k= *px & mexp ) != mexp ) 13924581Szliu if ( k != 0 ) 14024581Szliu return ( (k>>gap) - bias ); 14124581Szliu else if( x != zero) 14224581Szliu return ( -1022.0 ); 14324581Szliu else 14424581Szliu return(-(1.0/zero)); 14524581Szliu else if(x != x) 14624581Szliu return(x); 14724581Szliu else 14824581Szliu {*px &= msign; return(x);} 14924581Szliu #endif 15024581Szliu } 15124581Szliu 15224581Szliu finite(x) 15324581Szliu double x; 15424581Szliu { 15524581Szliu #ifdef VAX 15624581Szliu return(1.0); 15724581Szliu #else /* IEEE */ 15824581Szliu #ifdef NATIONAL 15924581Szliu return( (*((short *) &x+3 ) & mexp ) != mexp ); 16024581Szliu #else /* SUN, ZILOG */ 16124581Szliu return( (*((short *) &x ) & mexp ) != mexp ); 16224581Szliu #endif 16324581Szliu #endif 16424581Szliu } 16524581Szliu 16624581Szliu double drem(x,p) 16724581Szliu double x,p; 16824581Szliu { 16924581Szliu short sign; 17024581Szliu double hp,dp,tmp,drem(),scalb(); 17124581Szliu unsigned short k; 17224581Szliu #ifdef NATIONAL 17324581Szliu unsigned short 17424581Szliu *px=(unsigned short *) &x +3, 17524581Szliu *pp=(unsigned short *) &p +3, 17624581Szliu *pd=(unsigned short *) &dp +3, 17724581Szliu *pt=(unsigned short *) &tmp+3; 17824581Szliu #else /* VAX, SUN, ZILOG */ 17924581Szliu unsigned short 18024581Szliu *px=(unsigned short *) &x , 18124581Szliu *pp=(unsigned short *) &p , 18224581Szliu *pd=(unsigned short *) &dp , 18324581Szliu *pt=(unsigned short *) &tmp; 18424581Szliu #endif 18524581Szliu 18624581Szliu *pp &= msign ; 18724581Szliu 18824581Szliu #ifdef VAX 18924581Szliu if( ( *px & mexp ) == ~msign || p == zero ) 19024581Szliu #else /* IEEE */ 19124581Szliu if( ( *px & mexp ) == mexp || p == zero ) 19224581Szliu #endif 19324581Szliu 19424581Szliu return( (x != x)? x:zero/zero ); 19524581Szliu 19624581Szliu else if ( ((*pp & mexp)>>gap) <= 1 ) 19724581Szliu /* subnormal p, or almost subnormal p */ 19824581Szliu { double b; b=scalb(1.0,(int)prep1); 19924581Szliu p *= b; x = drem(x,p); x *= b; return(drem(x,p)/b);} 20024581Szliu else if ( p >= novf/2) 20124581Szliu { p /= 2 ; x /= 2; return(drem(x,p)*2);} 20224581Szliu else 20324581Szliu { 20424581Szliu dp=p+p; hp=p/2; 20524581Szliu sign= *px & ~msign ; 20624581Szliu *px &= msign ; 20724581Szliu while ( x > dp ) 20824581Szliu { 20924581Szliu k=(*px & mexp) - (*pd & mexp) ; 21024581Szliu tmp = dp ; 21124581Szliu *pt += k ; 21224581Szliu 21324581Szliu #ifdef VAX 21424581Szliu if( x < tmp ) *pt -= 128 ; 21524581Szliu #else /* IEEE */ 21624581Szliu if( x < tmp ) *pt -= 16 ; 21724581Szliu #endif 21824581Szliu 21924581Szliu x -= tmp ; 22024581Szliu } 22124581Szliu if ( x > hp ) 22224581Szliu { x -= p ; if ( x >= hp ) x -= p ; } 22324581Szliu 22424581Szliu *px = *px ^ sign; 22524581Szliu return( x); 22624581Szliu 22724581Szliu } 22824581Szliu } 22924581Szliu double sqrt(x) 23024581Szliu double x; 23124581Szliu { 23224581Szliu double q,s,b,r; 23324581Szliu double logb(),scalb(); 23424581Szliu double t,zero=0.0; 23524581Szliu int m,n,i,finite(); 23624581Szliu #ifdef VAX 23724581Szliu int k=54; 23824581Szliu #else /* IEEE */ 23924581Szliu int k=51; 24024581Szliu #endif 24124581Szliu 24224581Szliu /* sqrt(NaN) is NaN, sqrt(+-0) = +-0 */ 24324581Szliu if(x!=x||x==zero) return(x); 24424581Szliu 24524581Szliu /* sqrt(negative) is invalid */ 24624581Szliu if(x<zero) return(zero/zero); 24724581Szliu 24824581Szliu /* sqrt(INF) is INF */ 24924581Szliu if(!finite(x)) return(x); 25024581Szliu 25124581Szliu /* scale x to [1,4) */ 25224581Szliu n=logb(x); 25324581Szliu x=scalb(x,-n); 25424581Szliu if((m=logb(x))!=0) x=scalb(x,-m); /* subnormal number */ 25524581Szliu m += n; 25624581Szliu n = m/2; 25724581Szliu if((n+n)!=m) {x *= 2; m -=1; n=m/2;} 25824581Szliu 25924581Szliu /* generate sqrt(x) bit by bit (accumulating in q) */ 26024581Szliu q=1.0; s=4.0; x -= 1.0; r=1; 26124581Szliu for(i=1;i<=k;i++) { 26224581Szliu t=s+1; x *= 4; r /= 2; 26324581Szliu if(t<=x) { 26424581Szliu s=t+t+2, x -= t; q += r;} 26524581Szliu else 26624581Szliu s *= 2; 26724581Szliu } 26824581Szliu 26924581Szliu /* generate the last bit and determine the final rounding */ 27024581Szliu r/=2; x *= 4; 27124581Szliu if(x==zero) goto end; 100+r; /* trigger inexact flag */ 27224581Szliu if(s<x) { 27324581Szliu q+=r; x -=s; s += 2; s *= 2; x *= 4; 27424581Szliu t = (x-s)-5; 27524581Szliu b=1.0+3*r/4; if(b==1.0) goto end; /* b==1 : Round-to-zero */ 27624581Szliu b=1.0+r/4; if(b>1.0) t=1; /* b>1 : Round-to-(+INF) */ 27724581Szliu if(t>=0) q+=r; } /* else: Round-to-nearest */ 27824581Szliu else { 27924581Szliu s *= 2; x *= 4; 28024581Szliu t = (x-s)-1; 28124581Szliu b=1.0+3*r/4; if(b==1.0) goto end; 28224581Szliu b=1.0+r/4; if(b>1.0) t=1; 28324581Szliu if(t>=0) q+=r; } 28424581Szliu 28524581Szliu end: return(scalb(q,n)); 28624581Szliu } 28724581Szliu 28824581Szliu #if 0 28924581Szliu /* DREM(X,Y) 29024581Szliu * RETURN X REM Y =X-N*Y, N=[X/Y] ROUNDED (ROUNDED TO EVEN IN THE HALF WAY CASE) 29124581Szliu * DOUBLE PRECISION (VAX D format 56 bits, IEEE DOUBLE 53 BITS) 29224581Szliu * INTENDED FOR ASSEMBLY LANGUAGE 29324581Szliu * CODED IN C BY K.C. NG, 3/23/85, 4/8/85. 29424581Szliu * 29524581Szliu * Warning: this code should not get compiled in unless ALL of 29624581Szliu * the following machine-dependent routines are supplied. 29724581Szliu * 29824581Szliu * Required machine dependent functions (not on a VAX): 29924581Szliu * swapINX(i): save inexact flag and reset it to "i" 30024581Szliu * swapENI(e): save inexact enable and reset it to "e" 30124581Szliu */ 30224581Szliu 30324581Szliu double drem(x,y) 30424581Szliu double x,y; 30524581Szliu { 30624581Szliu 30724581Szliu #ifdef NATIONAL /* order of words in floating point number */ 30824581Szliu static n0=3,n1=2,n2=1,n3=0; 30924581Szliu #else /* VAX, SUN, ZILOG */ 31024581Szliu static n0=0,n1=1,n2=2,n3=3; 31124581Szliu #endif 31224581Szliu 31324581Szliu static unsigned short mexp =0x7ff0, m25 =0x0190, m57 =0x0390; 31424581Szliu static double zero=0.0; 31524581Szliu double hy,y1,t,t1; 31624581Szliu short k; 31724581Szliu long n; 31824581Szliu int i,e; 31924581Szliu unsigned short xexp,yexp, *px =(unsigned short *) &x , 32024581Szliu nx,nf, *py =(unsigned short *) &y , 32124581Szliu sign, *pt =(unsigned short *) &t , 32224581Szliu *pt1 =(unsigned short *) &t1 ; 32324581Szliu 32424581Szliu xexp = px[n0] & mexp ; /* exponent of x */ 32524581Szliu yexp = py[n0] & mexp ; /* exponent of y */ 32624581Szliu sign = px[n0] &0x8000; /* sign of x */ 32724581Szliu 32824581Szliu /* return NaN if x is NaN, or y is NaN, or x is INF, or y is zero */ 32924581Szliu if(x!=x) return(x); if(y!=y) return(y); /* x or y is NaN */ 33024581Szliu if( xexp == mexp ) return(zero/zero); /* x is INF */ 33124581Szliu if(y==zero) return(y/y); 33224581Szliu 33324581Szliu /* save the inexact flag and inexact enable in i and e respectively 33424581Szliu * and reset them to zero 33524581Szliu */ 33624581Szliu i=swapINX(0); e=swapENI(0); 33724581Szliu 33824581Szliu /* subnormal number */ 33924581Szliu nx=0; 34024581Szliu if(yexp==0) {t=1.0,pt[n0]+=m57; y*=t; nx=m57;} 34124581Szliu 34224581Szliu /* if y is tiny (biased exponent <= 57), scale up y to y*2**57 */ 34324581Szliu if( yexp <= m57 ) {py[n0]+=m57; nx+=m57; yexp+=m57;} 34424581Szliu 34524581Szliu nf=nx; 34624581Szliu py[n0] &= 0x7fff; 34724581Szliu px[n0] &= 0x7fff; 34824581Szliu 34924581Szliu /* mask off the least significant 27 bits of y */ 35024581Szliu t=y; pt[n3]=0; pt[n2]&=0xf800; y1=t; 35124581Szliu 35224581Szliu /* LOOP: argument reduction on x whenever x > y */ 35324581Szliu loop: 35424581Szliu while ( x > y ) 35524581Szliu { 35624581Szliu t=y; 35724581Szliu t1=y1; 35824581Szliu xexp=px[n0]&mexp; /* exponent of x */ 35924581Szliu k=xexp-yexp-m25; 36024581Szliu if(k>0) /* if x/y >= 2**26, scale up y so that x/y < 2**26 */ 36124581Szliu {pt[n0]+=k;pt1[n0]+=k;} 36224581Szliu n=x/t; x=(x-n*t1)-n*(t-t1); 36324581Szliu } 36424581Szliu /* end while (x > y) */ 36524581Szliu 36624581Szliu if(nx!=0) {t=1.0; pt[n0]+=nx; x*=t; nx=0; goto loop;} 36724581Szliu 36824581Szliu /* final adjustment */ 36924581Szliu 37024581Szliu hy=y/2.0; 37124581Szliu if(x>hy||((x==hy)&&n%2==1)) x-=y; 37224581Szliu px[n0] ^= sign; 37324581Szliu if(nf!=0) { t=1.0; pt[n0]-=nf; x*=t;} 37424581Szliu 37524581Szliu /* restore inexact flag and inexact enable */ 37624581Szliu swapINX(i); swapENI(e); 37724581Szliu 37824581Szliu return(x); 37924581Szliu } 38024581Szliu #endif 38124581Szliu 38224581Szliu #if 0 38324581Szliu /* SQRT 38424581Szliu * RETURN CORRECTLY ROUNDED (ACCORDING TO THE ROUNDING MODE) SQRT 38524581Szliu * FOR IEEE DOUBLE PRECISION ONLY, INTENDED FOR ASSEMBLY LANGUAGE 38624581Szliu * CODED IN C BY K.C. NG, 3/22/85. 38724581Szliu * 38824581Szliu * Warning: this code should not get compiled in unless ALL of 38924581Szliu * the following machine-dependent routines are supplied. 39024581Szliu * 39124581Szliu * Required machine dependent functions: 39224581Szliu * swapINX(i) ...return the status of INEXACT flag and reset it to "i" 39324581Szliu * swapRM(r) ...return the current Rounding Mode and reset it to "r" 39424581Szliu * swapENI(e) ...return the status of inexact enable and reset it to "e" 39524581Szliu * addc(t) ...perform t=t+1 regarding t as a 64 bit unsigned integer 39624581Szliu * subc(t) ...perform t=t-1 regarding t as a 64 bit unsigned integer 39724581Szliu */ 39824581Szliu 39924581Szliu static unsigned long table[] = { 40024581Szliu 0, 1204, 3062, 5746, 9193, 13348, 18162, 23592, 29598, 36145, 43202, 50740, 40124581Szliu 58733, 67158, 75992, 85215, 83599, 71378, 60428, 50647, 41945, 34246, 27478, 40224581Szliu 21581, 16499, 12183, 8588, 5674, 3403, 1742, 661, 130, }; 40324581Szliu 40424581Szliu double newsqrt(x) 40524581Szliu double x; 40624581Szliu { 40724581Szliu double y,z,t,addc(),subc(),b54=134217728.*134217728.; /* b54=2**54 */ 40824581Szliu long mx,scalx,mexp=0x7ff00000; 40924581Szliu int i,j,r,e,swapINX(),swapRM(),swapENI(); 41024581Szliu unsigned long *py=(unsigned long *) &y , 41124581Szliu *pt=(unsigned long *) &t , 41224581Szliu *px=(unsigned long *) &x ; 41324581Szliu #ifdef NATIONAL /* ordering of word in a floating point number */ 41424581Szliu int n0=1, n1=0; 41524581Szliu #else 41624581Szliu int n0=0, n1=1; 41724581Szliu #endif 41824581Szliu /* Rounding Mode: RN ...round-to-nearest 41924581Szliu * RZ ...round-towards 0 42024581Szliu * RP ...round-towards +INF 42124581Szliu * RM ...round-towards -INF 42224581Szliu */ 42324581Szliu int RN=0,RZ=1,RP=2,RM=3;/* machine dependent: work on a Zilog Z8070 42424581Szliu * and a National 32081 & 16081 42524581Szliu */ 42624581Szliu 42724581Szliu /* exceptions */ 42824581Szliu if(x!=x||x==0.0) return(x); /* sqrt(NaN) is NaN, sqrt(+-0) = +-0 */ 42924581Szliu if(x<0) return((x-x)/(x-x)); /* sqrt(negative) is invalid */ 43024581Szliu if((mx=px[n0]&mexp)==mexp) return(x); /* sqrt(+INF) is +INF */ 43124581Szliu 43224581Szliu /* save, reset, initialize */ 43324581Szliu e=swapENI(0); /* ...save and reset the inexact enable */ 43424581Szliu i=swapINX(0); /* ...save INEXACT flag */ 43524581Szliu r=swapRM(RN); /* ...save and reset the Rounding Mode to RN */ 43624581Szliu scalx=0; 43724581Szliu 43824581Szliu /* subnormal number, scale up x to x*2**54 */ 43924581Szliu if(mx==0) {x *= b54 ; scalx-=0x01b00000;} 44024581Szliu 44124581Szliu /* scale x to avoid intermediate over/underflow: 44224581Szliu * if (x > 2**512) x=x/2**512; if (x < 2**-512) x=x*2**512 */ 44324581Szliu if(mx>0x5ff00000) {px[n0] -= 0x20000000; scalx+= 0x10000000;} 44424581Szliu if(mx<0x1ff00000) {px[n0] += 0x20000000; scalx-= 0x10000000;} 44524581Szliu 44624581Szliu /* magic initial approximation to almost 8 sig. bits */ 44724581Szliu py[n0]=(px[n0]>>1)+0x1ff80000; 44824581Szliu py[n0]=py[n0]-table[(py[n0]>>15)&31]; 44924581Szliu 45024581Szliu /* Heron's rule once with correction to improve y to almost 18 sig. bits */ 45124581Szliu t=x/y; y=y+t; py[n0]=py[n0]-0x00100006; py[n1]=0; 45224581Szliu 45324581Szliu /* triple to almost 56 sig. bits; now y approx. sqrt(x) to within 1 ulp */ 45424581Szliu t=y*y; z=t; pt[n0]+=0x00100000; t+=z; z=(x-z)*y; 45524581Szliu t=z/(t+x) ; pt[n0]+=0x00100000; y+=t; 45624581Szliu 45724581Szliu /* twiddle last bit to force y correctly rounded */ 45824581Szliu swapRM(RZ); /* ...set Rounding Mode to round-toward-zero */ 45924581Szliu swapINX(0); /* ...clear INEXACT flag */ 46024581Szliu swapENI(e); /* ...restore inexact enable status */ 46124581Szliu t=x/y; /* ...chopped quotient, possibly inexact */ 46224581Szliu j=swapINX(i); /* ...read and restore inexact flag */ 46324581Szliu if(j==0) { if(t==y) goto end; else t=subc(t); } /* ...t=t-ulp */ 46424581Szliu b54+0.1; /* ..trigger inexact flag, sqrt(x) is inexact */ 46524581Szliu if(r==RN) t=addc(t); /* ...t=t+ulp */ 46624581Szliu else if(r==RP) { t=addc(t);y=addc(y);}/* ...t=t+ulp;y=y+ulp; */ 46724581Szliu y=y+t; /* ...chopped sum */ 46824581Szliu py[n0]=py[n0]-0x00100000; /* ...correctly rounded sqrt(x) */ 46924581Szliu end: py[n0]=py[n0]+scalx; /* ...scale back y */ 47024581Szliu swapRM(r); /* ...restore Rounding Mode */ 47124581Szliu return(y); 47224581Szliu } 47324581Szliu #endif 474