1 /* @(#)e_sqrt.c 5.1 93/09/24 */ 2 /* 3 * ==================================================== 4 * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. 5 * 6 * Developed at SunPro, a Sun Microsystems, Inc. business. 7 * Permission to use, copy, modify, and distribute this 8 * software is freely granted, provided that this notice 9 * is preserved. 10 * ==================================================== 11 */ 12 13 #include <sys/cdefs.h> 14 #if defined(LIBM_SCCS) && !defined(lint) 15 __RCSID("$NetBSD: e_sqrt.c,v 1.9 1997/10/09 11:30:06 lukem Exp $"); 16 #endif 17 18 /* __ieee754_sqrt(x) 19 * Return correctly rounded sqrt. 20 * ------------------------------------------ 21 * | Use the hardware sqrt if you have one | 22 * ------------------------------------------ 23 * Method: 24 * Bit by bit method using integer arithmetic. (Slow, but portable) 25 * 1. Normalization 26 * Scale x to y in [1,4) with even powers of 2: 27 * find an integer k such that 1 <= (y=x*2^(2k)) < 4, then 28 * sqrt(x) = 2^k * sqrt(y) 29 * 2. Bit by bit computation 30 * Let q = sqrt(y) truncated to i bit after binary point (q = 1), 31 * i 0 32 * i+1 2 33 * s = 2*q , and y = 2 * ( y - q ). (1) 34 * i i i i 35 * 36 * To compute q from q , one checks whether 37 * i+1 i 38 * 39 * -(i+1) 2 40 * (q + 2 ) <= y. (2) 41 * i 42 * -(i+1) 43 * If (2) is false, then q = q ; otherwise q = q + 2 . 44 * i+1 i i+1 i 45 * 46 * With some algebric manipulation, it is not difficult to see 47 * that (2) is equivalent to 48 * -(i+1) 49 * s + 2 <= y (3) 50 * i i 51 * 52 * The advantage of (3) is that s and y can be computed by 53 * i i 54 * the following recurrence formula: 55 * if (3) is false 56 * 57 * s = s , y = y ; (4) 58 * i+1 i i+1 i 59 * 60 * otherwise, 61 * -i -(i+1) 62 * s = s + 2 , y = y - s - 2 (5) 63 * i+1 i i+1 i i 64 * 65 * One may easily use induction to prove (4) and (5). 66 * Note. Since the left hand side of (3) contain only i+2 bits, 67 * it does not necessary to do a full (53-bit) comparison 68 * in (3). 69 * 3. Final rounding 70 * After generating the 53 bits result, we compute one more bit. 71 * Together with the remainder, we can decide whether the 72 * result is exact, bigger than 1/2ulp, or less than 1/2ulp 73 * (it will never equal to 1/2ulp). 74 * The rounding mode can be detected by checking whether 75 * huge + tiny is equal to huge, and whether huge - tiny is 76 * equal to huge for some floating point number "huge" and "tiny". 77 * 78 * Special cases: 79 * sqrt(+-0) = +-0 ... exact 80 * sqrt(inf) = inf 81 * sqrt(-ve) = NaN ... with invalid signal 82 * sqrt(NaN) = NaN ... with invalid signal for signaling NaN 83 * 84 * Other methods : see the appended file at the end of the program below. 85 *--------------- 86 */ 87 88 #include "math.h" 89 #include "math_private.h" 90 91 #ifdef __STDC__ 92 static const double one = 1.0, tiny=1.0e-300; 93 #else 94 static double one = 1.0, tiny=1.0e-300; 95 #endif 96 97 #ifdef __STDC__ 98 double __ieee754_sqrt(double x) 99 #else 100 double __ieee754_sqrt(x) 101 double x; 102 #endif 103 { 104 double z; 105 int32_t sign = (int)0x80000000; 106 int32_t ix0,s0,q,m,t,i; 107 u_int32_t r,t1,s1,ix1,q1; 108 109 EXTRACT_WORDS(ix0,ix1,x); 110 111 /* take care of Inf and NaN */ 112 if((ix0&0x7ff00000)==0x7ff00000) { 113 return x*x+x; /* sqrt(NaN)=NaN, sqrt(+inf)=+inf 114 sqrt(-inf)=sNaN */ 115 } 116 /* take care of zero */ 117 if(ix0<=0) { 118 if(((ix0&(~sign))|ix1)==0) return x;/* sqrt(+-0) = +-0 */ 119 else if(ix0<0) 120 return (x-x)/(x-x); /* sqrt(-ve) = sNaN */ 121 } 122 /* normalize x */ 123 m = (ix0>>20); 124 if(m==0) { /* subnormal x */ 125 while(ix0==0) { 126 m -= 21; 127 ix0 |= (ix1>>11); ix1 <<= 21; 128 } 129 for(i=0;(ix0&0x00100000)==0;i++) ix0<<=1; 130 m -= i-1; 131 ix0 |= (ix1>>(32-i)); 132 ix1 <<= i; 133 } 134 m -= 1023; /* unbias exponent */ 135 ix0 = (ix0&0x000fffff)|0x00100000; 136 if(m&1){ /* odd m, double x to make it even */ 137 ix0 += ix0 + ((ix1&sign)>>31); 138 ix1 += ix1; 139 } 140 m >>= 1; /* m = [m/2] */ 141 142 /* generate sqrt(x) bit by bit */ 143 ix0 += ix0 + ((ix1&sign)>>31); 144 ix1 += ix1; 145 q = q1 = s0 = s1 = 0; /* [q,q1] = sqrt(x) */ 146 r = 0x00200000; /* r = moving bit from right to left */ 147 148 while(r!=0) { 149 t = s0+r; 150 if(t<=ix0) { 151 s0 = t+r; 152 ix0 -= t; 153 q += r; 154 } 155 ix0 += ix0 + ((ix1&sign)>>31); 156 ix1 += ix1; 157 r>>=1; 158 } 159 160 r = sign; 161 while(r!=0) { 162 t1 = s1+r; 163 t = s0; 164 if((t<ix0)||((t==ix0)&&(t1<=ix1))) { 165 s1 = t1+r; 166 if(((t1&sign)==sign)&&(s1&sign)==0) s0 += 1; 167 ix0 -= t; 168 if (ix1 < t1) ix0 -= 1; 169 ix1 -= t1; 170 q1 += r; 171 } 172 ix0 += ix0 + ((ix1&sign)>>31); 173 ix1 += ix1; 174 r>>=1; 175 } 176 177 /* use floating add to find out rounding direction */ 178 if((ix0|ix1)!=0) { 179 z = one-tiny; /* trigger inexact flag */ 180 if (z>=one) { 181 z = one+tiny; 182 if (q1==(u_int32_t)0xffffffff) { q1=0; q += 1;} 183 else if (z>one) { 184 if (q1==(u_int32_t)0xfffffffe) q+=1; 185 q1+=2; 186 } else 187 q1 += (q1&1); 188 } 189 } 190 ix0 = (q>>1)+0x3fe00000; 191 ix1 = q1>>1; 192 if ((q&1)==1) ix1 |= sign; 193 ix0 += (m <<20); 194 INSERT_WORDS(z,ix0,ix1); 195 return z; 196 } 197 198 /* 199 Other methods (use floating-point arithmetic) 200 ------------- 201 (This is a copy of a drafted paper by Prof W. Kahan 202 and K.C. Ng, written in May, 1986) 203 204 Two algorithms are given here to implement sqrt(x) 205 (IEEE double precision arithmetic) in software. 206 Both supply sqrt(x) correctly rounded. The first algorithm (in 207 Section A) uses newton iterations and involves four divisions. 208 The second one uses reciproot iterations to avoid division, but 209 requires more multiplications. Both algorithms need the ability 210 to chop results of arithmetic operations instead of round them, 211 and the INEXACT flag to indicate when an arithmetic operation 212 is executed exactly with no roundoff error, all part of the 213 standard (IEEE 754-1985). The ability to perform shift, add, 214 subtract and logical AND operations upon 32-bit words is needed 215 too, though not part of the standard. 216 217 A. sqrt(x) by Newton Iteration 218 219 (1) Initial approximation 220 221 Let x0 and x1 be the leading and the trailing 32-bit words of 222 a floating point number x (in IEEE double format) respectively 223 224 1 11 52 ...widths 225 ------------------------------------------------------ 226 x: |s| e | f | 227 ------------------------------------------------------ 228 msb lsb msb lsb ...order 229 230 231 ------------------------ ------------------------ 232 x0: |s| e | f1 | x1: | f2 | 233 ------------------------ ------------------------ 234 235 By performing shifts and subtracts on x0 and x1 (both regarded 236 as integers), we obtain an 8-bit approximation of sqrt(x) as 237 follows. 238 239 k := (x0>>1) + 0x1ff80000; 240 y0 := k - T1[31&(k>>15)]. ... y ~ sqrt(x) to 8 bits 241 Here k is a 32-bit integer and T1[] is an integer array containing 242 correction terms. Now magically the floating value of y (y's 243 leading 32-bit word is y0, the value of its trailing word is 0) 244 approximates sqrt(x) to almost 8-bit. 245 246 Value of T1: 247 static int T1[32]= { 248 0, 1024, 3062, 5746, 9193, 13348, 18162, 23592, 249 29598, 36145, 43202, 50740, 58733, 67158, 75992, 85215, 250 83599, 71378, 60428, 50647, 41945, 34246, 27478, 21581, 251 16499, 12183, 8588, 5674, 3403, 1742, 661, 130,}; 252 253 (2) Iterative refinement 254 255 Apply Heron's rule three times to y, we have y approximates 256 sqrt(x) to within 1 ulp (Unit in the Last Place): 257 258 y := (y+x/y)/2 ... almost 17 sig. bits 259 y := (y+x/y)/2 ... almost 35 sig. bits 260 y := y-(y-x/y)/2 ... within 1 ulp 261 262 263 Remark 1. 264 Another way to improve y to within 1 ulp is: 265 266 y := (y+x/y) ... almost 17 sig. bits to 2*sqrt(x) 267 y := y - 0x00100006 ... almost 18 sig. bits to sqrt(x) 268 269 2 270 (x-y )*y 271 y := y + 2* ---------- ...within 1 ulp 272 2 273 3y + x 274 275 276 This formula has one division fewer than the one above; however, 277 it requires more multiplications and additions. Also x must be 278 scaled in advance to avoid spurious overflow in evaluating the 279 expression 3y*y+x. Hence it is not recommended uless division 280 is slow. If division is very slow, then one should use the 281 reciproot algorithm given in section B. 282 283 (3) Final adjustment 284 285 By twiddling y's last bit it is possible to force y to be 286 correctly rounded according to the prevailing rounding mode 287 as follows. Let r and i be copies of the rounding mode and 288 inexact flag before entering the square root program. Also we 289 use the expression y+-ulp for the next representable floating 290 numbers (up and down) of y. Note that y+-ulp = either fixed 291 point y+-1, or multiply y by nextafter(1,+-inf) in chopped 292 mode. 293 294 I := FALSE; ... reset INEXACT flag I 295 R := RZ; ... set rounding mode to round-toward-zero 296 z := x/y; ... chopped quotient, possibly inexact 297 If(not I) then { ... if the quotient is exact 298 if(z=y) { 299 I := i; ... restore inexact flag 300 R := r; ... restore rounded mode 301 return sqrt(x):=y. 302 } else { 303 z := z - ulp; ... special rounding 304 } 305 } 306 i := TRUE; ... sqrt(x) is inexact 307 If (r=RN) then z=z+ulp ... rounded-to-nearest 308 If (r=RP) then { ... round-toward-+inf 309 y = y+ulp; z=z+ulp; 310 } 311 y := y+z; ... chopped sum 312 y0:=y0-0x00100000; ... y := y/2 is correctly rounded. 313 I := i; ... restore inexact flag 314 R := r; ... restore rounded mode 315 return sqrt(x):=y. 316 317 (4) Special cases 318 319 Square root of +inf, +-0, or NaN is itself; 320 Square root of a negative number is NaN with invalid signal. 321 322 323 B. sqrt(x) by Reciproot Iteration 324 325 (1) Initial approximation 326 327 Let x0 and x1 be the leading and the trailing 32-bit words of 328 a floating point number x (in IEEE double format) respectively 329 (see section A). By performing shifs and subtracts on x0 and y0, 330 we obtain a 7.8-bit approximation of 1/sqrt(x) as follows. 331 332 k := 0x5fe80000 - (x0>>1); 333 y0:= k - T2[63&(k>>14)]. ... y ~ 1/sqrt(x) to 7.8 bits 334 335 Here k is a 32-bit integer and T2[] is an integer array 336 containing correction terms. Now magically the floating 337 value of y (y's leading 32-bit word is y0, the value of 338 its trailing word y1 is set to zero) approximates 1/sqrt(x) 339 to almost 7.8-bit. 340 341 Value of T2: 342 static int T2[64]= { 343 0x1500, 0x2ef8, 0x4d67, 0x6b02, 0x87be, 0xa395, 0xbe7a, 0xd866, 344 0xf14a, 0x1091b,0x11fcd,0x13552,0x14999,0x15c98,0x16e34,0x17e5f, 345 0x18d03,0x19a01,0x1a545,0x1ae8a,0x1b5c4,0x1bb01,0x1bfde,0x1c28d, 346 0x1c2de,0x1c0db,0x1ba73,0x1b11c,0x1a4b5,0x1953d,0x18266,0x16be0, 347 0x1683e,0x179d8,0x18a4d,0x19992,0x1a789,0x1b445,0x1bf61,0x1c989, 348 0x1d16d,0x1d77b,0x1dddf,0x1e2ad,0x1e5bf,0x1e6e8,0x1e654,0x1e3cd, 349 0x1df2a,0x1d635,0x1cb16,0x1be2c,0x1ae4e,0x19bde,0x1868e,0x16e2e, 350 0x1527f,0x1334a,0x11051,0xe951, 0xbe01, 0x8e0d, 0x5924, 0x1edd,}; 351 352 (2) Iterative refinement 353 354 Apply Reciproot iteration three times to y and multiply the 355 result by x to get an approximation z that matches sqrt(x) 356 to about 1 ulp. To be exact, we will have 357 -1ulp < sqrt(x)-z<1.0625ulp. 358 359 ... set rounding mode to Round-to-nearest 360 y := y*(1.5-0.5*x*y*y) ... almost 15 sig. bits to 1/sqrt(x) 361 y := y*((1.5-2^-30)+0.5*x*y*y)... about 29 sig. bits to 1/sqrt(x) 362 ... special arrangement for better accuracy 363 z := x*y ... 29 bits to sqrt(x), with z*y<1 364 z := z + 0.5*z*(1-z*y) ... about 1 ulp to sqrt(x) 365 366 Remark 2. The constant 1.5-2^-30 is chosen to bias the error so that 367 (a) the term z*y in the final iteration is always less than 1; 368 (b) the error in the final result is biased upward so that 369 -1 ulp < sqrt(x) - z < 1.0625 ulp 370 instead of |sqrt(x)-z|<1.03125ulp. 371 372 (3) Final adjustment 373 374 By twiddling y's last bit it is possible to force y to be 375 correctly rounded according to the prevailing rounding mode 376 as follows. Let r and i be copies of the rounding mode and 377 inexact flag before entering the square root program. Also we 378 use the expression y+-ulp for the next representable floating 379 numbers (up and down) of y. Note that y+-ulp = either fixed 380 point y+-1, or multiply y by nextafter(1,+-inf) in chopped 381 mode. 382 383 R := RZ; ... set rounding mode to round-toward-zero 384 switch(r) { 385 case RN: ... round-to-nearest 386 if(x<= z*(z-ulp)...chopped) z = z - ulp; else 387 if(x<= z*(z+ulp)...chopped) z = z; else z = z+ulp; 388 break; 389 case RZ:case RM: ... round-to-zero or round-to--inf 390 R:=RP; ... reset rounding mod to round-to-+inf 391 if(x<z*z ... rounded up) z = z - ulp; else 392 if(x>=(z+ulp)*(z+ulp) ...rounded up) z = z+ulp; 393 break; 394 case RP: ... round-to-+inf 395 if(x>(z+ulp)*(z+ulp)...chopped) z = z+2*ulp; else 396 if(x>z*z ...chopped) z = z+ulp; 397 break; 398 } 399 400 Remark 3. The above comparisons can be done in fixed point. For 401 example, to compare x and w=z*z chopped, it suffices to compare 402 x1 and w1 (the trailing parts of x and w), regarding them as 403 two's complement integers. 404 405 ...Is z an exact square root? 406 To determine whether z is an exact square root of x, let z1 be the 407 trailing part of z, and also let x0 and x1 be the leading and 408 trailing parts of x. 409 410 If ((z1&0x03ffffff)!=0) ... not exact if trailing 26 bits of z!=0 411 I := 1; ... Raise Inexact flag: z is not exact 412 else { 413 j := 1 - [(x0>>20)&1] ... j = logb(x) mod 2 414 k := z1 >> 26; ... get z's 25-th and 26-th 415 fraction bits 416 I := i or (k&j) or ((k&(j+j+1))!=(x1&3)); 417 } 418 R:= r ... restore rounded mode 419 return sqrt(x):=z. 420 421 If multiplication is cheaper then the foregoing red tape, the 422 Inexact flag can be evaluated by 423 424 I := i; 425 I := (z*z!=x) or I. 426 427 Note that z*z can overwrite I; this value must be sensed if it is 428 True. 429 430 Remark 4. If z*z = x exactly, then bit 25 to bit 0 of z1 must be 431 zero. 432 433 -------------------- 434 z1: | f2 | 435 -------------------- 436 bit 31 bit 0 437 438 Further more, bit 27 and 26 of z1, bit 0 and 1 of x1, and the odd 439 or even of logb(x) have the following relations: 440 441 ------------------------------------------------- 442 bit 27,26 of z1 bit 1,0 of x1 logb(x) 443 ------------------------------------------------- 444 00 00 odd and even 445 01 01 even 446 10 10 odd 447 10 00 even 448 11 01 even 449 ------------------------------------------------- 450 451 (4) Special cases (see (4) of Section A). 452 453 */ 454 455