1 /* 2 * Copyright (c) 1985 Regents of the University of California. 3 * 4 * Use and reproduction of this software are granted in accordance with 5 * the terms and conditions specified in the Berkeley Software License 6 * Agreement (in particular, this entails acknowledgement of the programs' 7 * source, and inclusion of this notice) with the additional understanding 8 * that all recipients should regard themselves as participants in an 9 * ongoing research project and hence should feel obligated to report 10 * their experiences (good or bad) with these elementary function codes, 11 * using "sendbug 4bsd-bugs@BERKELEY", to the authors. 12 */ 13 14 #ifndef lint 15 static char sccsid[] = 16 "@(#)support.c 1.1 (Berkeley) 5/23/85; 1.2 (ucb.elefunt) 09/12/85"; 17 #endif not lint 18 19 /* 20 * Some IEEE standard p754 recommended functions and remainder and sqrt for 21 * supporting the C elementary functions. 22 ****************************************************************************** 23 * WARNING: 24 * These codes are developed (in double) to support the C elementary 25 * functions temporarily. They are not universal, and some of them are very 26 * slow (in particular, drem and sqrt is extremely inefficient). Each 27 * computer system should have its implementation of these functions using 28 * its own assembler. 29 ****************************************************************************** 30 * 31 * IEEE p754 required operations: 32 * drem(x,p) 33 * returns x REM y = x - [x/y]*y , where [x/y] is the integer 34 * nearest x/y; in half way case, choose the even one. 35 * sqrt(x) 36 * returns the square root of x correctly rounded according to 37 * the rounding mod. 38 * 39 * IEEE p754 recommended functions: 40 * (a) copysign(x,y) 41 * returns x with the sign of y. 42 * (b) scalb(x,N) 43 * returns x * (2**N), for integer values N. 44 * (c) logb(x) 45 * returns the unbiased exponent of x, a signed integer in 46 * double precision, except that logb(0) is -INF, logb(INF) 47 * is +INF, and logb(NAN) is that NAN. 48 * (d) finite(x) 49 * returns the value TRUE if -INF < x < +INF and returns 50 * FALSE otherwise. 51 * 52 * 53 * CODED IN C BY K.C. NG, 11/25/84; 54 * REVISED BY K.C. NG on 1/22/85, 2/13/85, 3/24/85. 55 */ 56 57 58 #ifdef VAX /* VAX D format */ 59 static unsigned short msign=0x7fff , mexp =0x7f80 ; 60 static short prep1=57, gap=7, bias=129 ; 61 static double novf=1.7E38, nunf=3.0E-39, zero=0.0 ; 62 #else /*IEEE double format */ 63 static unsigned short msign=0x7fff, mexp =0x7ff0 ; 64 static short prep1=54, gap=4, bias=1023 ; 65 static double novf=1.7E308, nunf=3.0E-308,zero=0.0; 66 #endif 67 68 double scalb(x,N) 69 double x; int N; 70 { 71 int k; 72 double scalb(); 73 74 #ifdef NATIONAL 75 unsigned short *px=(unsigned short *) &x + 3; 76 #else /* VAX, SUN, ZILOG */ 77 unsigned short *px=(unsigned short *) &x; 78 #endif 79 80 if( x == zero ) return(x); 81 82 #ifdef VAX 83 if( (k= *px & mexp ) != ~msign ) { 84 if( N<-260) return(nunf*nunf); else if(N>260) return(novf+novf); 85 #else /* IEEE */ 86 if( (k= *px & mexp ) != mexp ) { 87 if( N<-2100) return(nunf*nunf); else if(N>2100) return(novf+novf); 88 if( k == 0 ) { 89 x *= scalb(1.0,(int)prep1); N -= prep1; return(scalb(x,N));} 90 #endif 91 92 if((k = (k>>gap)+ N) > 0 ) 93 if( k < (mexp>>gap) ) *px = (*px&~mexp) | (k<<gap); 94 else x=novf+novf; /* overflow */ 95 else 96 if( k > -prep1 ) 97 /* gradual underflow */ 98 {*px=(*px&~mexp)|(short)(1<<gap); x *= scalb(1.0,k-1);} 99 else 100 return(nunf*nunf); 101 } 102 return(x); 103 } 104 105 106 double copysign(x,y) 107 double x,y; 108 { 109 #ifdef NATIONAL 110 unsigned short *px=(unsigned short *) &x+3, 111 *py=(unsigned short *) &y+3; 112 #else /* VAX, SUN, ZILOG */ 113 unsigned short *px=(unsigned short *) &x, 114 *py=(unsigned short *) &y; 115 #endif 116 117 #ifdef VAX 118 if ( (*px & mexp) == 0 ) return(x); 119 #endif 120 121 *px = ( *px & msign ) | ( *py & ~msign ); 122 return(x); 123 } 124 125 double logb(x) 126 double x; 127 { 128 129 #ifdef NATIONAL 130 short *px=(short *) &x+3, k; 131 #else /* VAX, SUN, ZILOG */ 132 short *px=(short *) &x, k; 133 #endif 134 135 #ifdef VAX 136 return( ((*px & mexp)>>gap) - bias); 137 #else /* IEEE */ 138 if( (k= *px & mexp ) != mexp ) 139 if ( k != 0 ) 140 return ( (k>>gap) - bias ); 141 else if( x != zero) 142 return ( -1022.0 ); 143 else 144 return(-(1.0/zero)); 145 else if(x != x) 146 return(x); 147 else 148 {*px &= msign; return(x);} 149 #endif 150 } 151 152 finite(x) 153 double x; 154 { 155 #ifdef VAX 156 return(1.0); 157 #else /* IEEE */ 158 #ifdef NATIONAL 159 return( (*((short *) &x+3 ) & mexp ) != mexp ); 160 #else /* SUN, ZILOG */ 161 return( (*((short *) &x ) & mexp ) != mexp ); 162 #endif 163 #endif 164 } 165 166 double drem(x,p) 167 double x,p; 168 { 169 short sign; 170 double hp,dp,tmp,drem(),scalb(); 171 unsigned short k; 172 #ifdef NATIONAL 173 unsigned short 174 *px=(unsigned short *) &x +3, 175 *pp=(unsigned short *) &p +3, 176 *pd=(unsigned short *) &dp +3, 177 *pt=(unsigned short *) &tmp+3; 178 #else /* VAX, SUN, ZILOG */ 179 unsigned short 180 *px=(unsigned short *) &x , 181 *pp=(unsigned short *) &p , 182 *pd=(unsigned short *) &dp , 183 *pt=(unsigned short *) &tmp; 184 #endif 185 186 *pp &= msign ; 187 188 #ifdef VAX 189 if( ( *px & mexp ) == ~msign || p == zero ) 190 #else /* IEEE */ 191 if( ( *px & mexp ) == mexp || p == zero ) 192 #endif 193 194 return( (x != x)? x:zero/zero ); 195 196 else if ( ((*pp & mexp)>>gap) <= 1 ) 197 /* subnormal p, or almost subnormal p */ 198 { double b; b=scalb(1.0,(int)prep1); 199 p *= b; x = drem(x,p); x *= b; return(drem(x,p)/b);} 200 else if ( p >= novf/2) 201 { p /= 2 ; x /= 2; return(drem(x,p)*2);} 202 else 203 { 204 dp=p+p; hp=p/2; 205 sign= *px & ~msign ; 206 *px &= msign ; 207 while ( x > dp ) 208 { 209 k=(*px & mexp) - (*pd & mexp) ; 210 tmp = dp ; 211 *pt += k ; 212 213 #ifdef VAX 214 if( x < tmp ) *pt -= 128 ; 215 #else /* IEEE */ 216 if( x < tmp ) *pt -= 16 ; 217 #endif 218 219 x -= tmp ; 220 } 221 if ( x > hp ) 222 { x -= p ; if ( x >= hp ) x -= p ; } 223 224 *px = *px ^ sign; 225 return( x); 226 227 } 228 } 229 double sqrt(x) 230 double x; 231 { 232 double q,s,b,r; 233 double logb(),scalb(); 234 double t,zero=0.0; 235 int m,n,i,finite(); 236 #ifdef VAX 237 int k=54; 238 #else /* IEEE */ 239 int k=51; 240 #endif 241 242 /* sqrt(NaN) is NaN, sqrt(+-0) = +-0 */ 243 if(x!=x||x==zero) return(x); 244 245 /* sqrt(negative) is invalid */ 246 if(x<zero) return(zero/zero); 247 248 /* sqrt(INF) is INF */ 249 if(!finite(x)) return(x); 250 251 /* scale x to [1,4) */ 252 n=logb(x); 253 x=scalb(x,-n); 254 if((m=logb(x))!=0) x=scalb(x,-m); /* subnormal number */ 255 m += n; 256 n = m/2; 257 if((n+n)!=m) {x *= 2; m -=1; n=m/2;} 258 259 /* generate sqrt(x) bit by bit (accumulating in q) */ 260 q=1.0; s=4.0; x -= 1.0; r=1; 261 for(i=1;i<=k;i++) { 262 t=s+1; x *= 4; r /= 2; 263 if(t<=x) { 264 s=t+t+2, x -= t; q += r;} 265 else 266 s *= 2; 267 } 268 269 /* generate the last bit and determine the final rounding */ 270 r/=2; x *= 4; 271 if(x==zero) goto end; 100+r; /* trigger inexact flag */ 272 if(s<x) { 273 q+=r; x -=s; s += 2; s *= 2; x *= 4; 274 t = (x-s)-5; 275 b=1.0+3*r/4; if(b==1.0) goto end; /* b==1 : Round-to-zero */ 276 b=1.0+r/4; if(b>1.0) t=1; /* b>1 : Round-to-(+INF) */ 277 if(t>=0) q+=r; } /* else: Round-to-nearest */ 278 else { 279 s *= 2; x *= 4; 280 t = (x-s)-1; 281 b=1.0+3*r/4; if(b==1.0) goto end; 282 b=1.0+r/4; if(b>1.0) t=1; 283 if(t>=0) q+=r; } 284 285 end: return(scalb(q,n)); 286 } 287 288 #if 0 289 /* DREM(X,Y) 290 * RETURN X REM Y =X-N*Y, N=[X/Y] ROUNDED (ROUNDED TO EVEN IN THE HALF WAY CASE) 291 * DOUBLE PRECISION (VAX D format 56 bits, IEEE DOUBLE 53 BITS) 292 * INTENDED FOR ASSEMBLY LANGUAGE 293 * CODED IN C BY K.C. NG, 3/23/85, 4/8/85. 294 * 295 * Warning: this code should not get compiled in unless ALL of 296 * the following machine-dependent routines are supplied. 297 * 298 * Required machine dependent functions (not on a VAX): 299 * swapINX(i): save inexact flag and reset it to "i" 300 * swapENI(e): save inexact enable and reset it to "e" 301 */ 302 303 double drem(x,y) 304 double x,y; 305 { 306 307 #ifdef NATIONAL /* order of words in floating point number */ 308 static n0=3,n1=2,n2=1,n3=0; 309 #else /* VAX, SUN, ZILOG */ 310 static n0=0,n1=1,n2=2,n3=3; 311 #endif 312 313 static unsigned short mexp =0x7ff0, m25 =0x0190, m57 =0x0390; 314 static double zero=0.0; 315 double hy,y1,t,t1; 316 short k; 317 long n; 318 int i,e; 319 unsigned short xexp,yexp, *px =(unsigned short *) &x , 320 nx,nf, *py =(unsigned short *) &y , 321 sign, *pt =(unsigned short *) &t , 322 *pt1 =(unsigned short *) &t1 ; 323 324 xexp = px[n0] & mexp ; /* exponent of x */ 325 yexp = py[n0] & mexp ; /* exponent of y */ 326 sign = px[n0] &0x8000; /* sign of x */ 327 328 /* return NaN if x is NaN, or y is NaN, or x is INF, or y is zero */ 329 if(x!=x) return(x); if(y!=y) return(y); /* x or y is NaN */ 330 if( xexp == mexp ) return(zero/zero); /* x is INF */ 331 if(y==zero) return(y/y); 332 333 /* save the inexact flag and inexact enable in i and e respectively 334 * and reset them to zero 335 */ 336 i=swapINX(0); e=swapENI(0); 337 338 /* subnormal number */ 339 nx=0; 340 if(yexp==0) {t=1.0,pt[n0]+=m57; y*=t; nx=m57;} 341 342 /* if y is tiny (biased exponent <= 57), scale up y to y*2**57 */ 343 if( yexp <= m57 ) {py[n0]+=m57; nx+=m57; yexp+=m57;} 344 345 nf=nx; 346 py[n0] &= 0x7fff; 347 px[n0] &= 0x7fff; 348 349 /* mask off the least significant 27 bits of y */ 350 t=y; pt[n3]=0; pt[n2]&=0xf800; y1=t; 351 352 /* LOOP: argument reduction on x whenever x > y */ 353 loop: 354 while ( x > y ) 355 { 356 t=y; 357 t1=y1; 358 xexp=px[n0]&mexp; /* exponent of x */ 359 k=xexp-yexp-m25; 360 if(k>0) /* if x/y >= 2**26, scale up y so that x/y < 2**26 */ 361 {pt[n0]+=k;pt1[n0]+=k;} 362 n=x/t; x=(x-n*t1)-n*(t-t1); 363 } 364 /* end while (x > y) */ 365 366 if(nx!=0) {t=1.0; pt[n0]+=nx; x*=t; nx=0; goto loop;} 367 368 /* final adjustment */ 369 370 hy=y/2.0; 371 if(x>hy||((x==hy)&&n%2==1)) x-=y; 372 px[n0] ^= sign; 373 if(nf!=0) { t=1.0; pt[n0]-=nf; x*=t;} 374 375 /* restore inexact flag and inexact enable */ 376 swapINX(i); swapENI(e); 377 378 return(x); 379 } 380 #endif 381 382 #if 0 383 /* SQRT 384 * RETURN CORRECTLY ROUNDED (ACCORDING TO THE ROUNDING MODE) SQRT 385 * FOR IEEE DOUBLE PRECISION ONLY, INTENDED FOR ASSEMBLY LANGUAGE 386 * CODED IN C BY K.C. NG, 3/22/85. 387 * 388 * Warning: this code should not get compiled in unless ALL of 389 * the following machine-dependent routines are supplied. 390 * 391 * Required machine dependent functions: 392 * swapINX(i) ...return the status of INEXACT flag and reset it to "i" 393 * swapRM(r) ...return the current Rounding Mode and reset it to "r" 394 * swapENI(e) ...return the status of inexact enable and reset it to "e" 395 * addc(t) ...perform t=t+1 regarding t as a 64 bit unsigned integer 396 * subc(t) ...perform t=t-1 regarding t as a 64 bit unsigned integer 397 */ 398 399 static unsigned long table[] = { 400 0, 1204, 3062, 5746, 9193, 13348, 18162, 23592, 29598, 36145, 43202, 50740, 401 58733, 67158, 75992, 85215, 83599, 71378, 60428, 50647, 41945, 34246, 27478, 402 21581, 16499, 12183, 8588, 5674, 3403, 1742, 661, 130, }; 403 404 double newsqrt(x) 405 double x; 406 { 407 double y,z,t,addc(),subc(),b54=134217728.*134217728.; /* b54=2**54 */ 408 long mx,scalx,mexp=0x7ff00000; 409 int i,j,r,e,swapINX(),swapRM(),swapENI(); 410 unsigned long *py=(unsigned long *) &y , 411 *pt=(unsigned long *) &t , 412 *px=(unsigned long *) &x ; 413 #ifdef NATIONAL /* ordering of word in a floating point number */ 414 int n0=1, n1=0; 415 #else 416 int n0=0, n1=1; 417 #endif 418 /* Rounding Mode: RN ...round-to-nearest 419 * RZ ...round-towards 0 420 * RP ...round-towards +INF 421 * RM ...round-towards -INF 422 */ 423 int RN=0,RZ=1,RP=2,RM=3;/* machine dependent: work on a Zilog Z8070 424 * and a National 32081 & 16081 425 */ 426 427 /* exceptions */ 428 if(x!=x||x==0.0) return(x); /* sqrt(NaN) is NaN, sqrt(+-0) = +-0 */ 429 if(x<0) return((x-x)/(x-x)); /* sqrt(negative) is invalid */ 430 if((mx=px[n0]&mexp)==mexp) return(x); /* sqrt(+INF) is +INF */ 431 432 /* save, reset, initialize */ 433 e=swapENI(0); /* ...save and reset the inexact enable */ 434 i=swapINX(0); /* ...save INEXACT flag */ 435 r=swapRM(RN); /* ...save and reset the Rounding Mode to RN */ 436 scalx=0; 437 438 /* subnormal number, scale up x to x*2**54 */ 439 if(mx==0) {x *= b54 ; scalx-=0x01b00000;} 440 441 /* scale x to avoid intermediate over/underflow: 442 * if (x > 2**512) x=x/2**512; if (x < 2**-512) x=x*2**512 */ 443 if(mx>0x5ff00000) {px[n0] -= 0x20000000; scalx+= 0x10000000;} 444 if(mx<0x1ff00000) {px[n0] += 0x20000000; scalx-= 0x10000000;} 445 446 /* magic initial approximation to almost 8 sig. bits */ 447 py[n0]=(px[n0]>>1)+0x1ff80000; 448 py[n0]=py[n0]-table[(py[n0]>>15)&31]; 449 450 /* Heron's rule once with correction to improve y to almost 18 sig. bits */ 451 t=x/y; y=y+t; py[n0]=py[n0]-0x00100006; py[n1]=0; 452 453 /* triple to almost 56 sig. bits; now y approx. sqrt(x) to within 1 ulp */ 454 t=y*y; z=t; pt[n0]+=0x00100000; t+=z; z=(x-z)*y; 455 t=z/(t+x) ; pt[n0]+=0x00100000; y+=t; 456 457 /* twiddle last bit to force y correctly rounded */ 458 swapRM(RZ); /* ...set Rounding Mode to round-toward-zero */ 459 swapINX(0); /* ...clear INEXACT flag */ 460 swapENI(e); /* ...restore inexact enable status */ 461 t=x/y; /* ...chopped quotient, possibly inexact */ 462 j=swapINX(i); /* ...read and restore inexact flag */ 463 if(j==0) { if(t==y) goto end; else t=subc(t); } /* ...t=t-ulp */ 464 b54+0.1; /* ..trigger inexact flag, sqrt(x) is inexact */ 465 if(r==RN) t=addc(t); /* ...t=t+ulp */ 466 else if(r==RP) { t=addc(t);y=addc(y);}/* ...t=t+ulp;y=y+ulp; */ 467 y=y+t; /* ...chopped sum */ 468 py[n0]=py[n0]-0x00100000; /* ...correctly rounded sqrt(x) */ 469 end: py[n0]=py[n0]+scalx; /* ...scale back y */ 470 swapRM(r); /* ...restore Rounding Mode */ 471 return(y); 472 } 473 #endif 474