xref: /netbsd-src/lib/libm/src/e_sqrt.c (revision ae1bfcddc410612bc8c58b807e1830becb69a24c)
1 /* @(#)e_sqrt.c 5.1 93/09/24 */
2 /*
3  * ====================================================
4  * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
5  *
6  * Developed at SunPro, a Sun Microsystems, Inc. business.
7  * Permission to use, copy, modify, and distribute this
8  * software is freely granted, provided that this notice
9  * is preserved.
10  * ====================================================
11  */
12 
13 #ifndef lint
14 static char rcsid[] = "$Id: e_sqrt.c,v 1.4 1994/03/03 17:04:21 jtc Exp $";
15 #endif
16 
17 /* __ieee754_sqrt(x)
18  * Return correctly rounded sqrt.
19  *           ------------------------------------------
20  *	     |  Use the hardware sqrt if you have one |
21  *           ------------------------------------------
22  * Method:
23  *   Bit by bit method using integer arithmetic. (Slow, but portable)
24  *   1. Normalization
25  *	Scale x to y in [1,4) with even powers of 2:
26  *	find an integer k such that  1 <= (y=x*2^(2k)) < 4, then
27  *		sqrt(x) = 2^k * sqrt(y)
28  *   2. Bit by bit computation
29  *	Let q  = sqrt(y) truncated to i bit after binary point (q = 1),
30  *	     i							 0
31  *                                     i+1         2
32  *	    s  = 2*q , and	y  =  2   * ( y - q  ).		(1)
33  *	     i      i            i                 i
34  *
35  *	To compute q    from q , one checks whether
36  *		    i+1       i
37  *
38  *			      -(i+1) 2
39  *			(q + 2      ) <= y.			(2)
40  *     			  i
41  *							      -(i+1)
42  *	If (2) is false, then q   = q ; otherwise q   = q  + 2      .
43  *		 	       i+1   i             i+1   i
44  *
45  *	With some algebric manipulation, it is not difficult to see
46  *	that (2) is equivalent to
47  *                             -(i+1)
48  *			s  +  2       <= y			(3)
49  *			 i                i
50  *
51  *	The advantage of (3) is that s  and y  can be computed by
52  *				      i      i
53  *	the following recurrence formula:
54  *	    if (3) is false
55  *
56  *	    s     =  s  ,	y    = y   ;			(4)
57  *	     i+1      i		 i+1    i
58  *
59  *	    otherwise,
60  *                         -i                     -(i+1)
61  *	    s	  =  s  + 2  ,  y    = y  -  s  - 2  		(5)
62  *           i+1      i          i+1    i     i
63  *
64  *	One may easily use induction to prove (4) and (5).
65  *	Note. Since the left hand side of (3) contain only i+2 bits,
66  *	      it does not necessary to do a full (53-bit) comparison
67  *	      in (3).
68  *   3. Final rounding
69  *	After generating the 53 bits result, we compute one more bit.
70  *	Together with the remainder, we can decide whether the
71  *	result is exact, bigger than 1/2ulp, or less than 1/2ulp
72  *	(it will never equal to 1/2ulp).
73  *	The rounding mode can be detected by checking whether
74  *	huge + tiny is equal to huge, and whether huge - tiny is
75  *	equal to huge for some floating point number "huge" and "tiny".
76  *
77  * Special cases:
78  *	sqrt(+-0) = +-0 	... exact
79  *	sqrt(inf) = inf
80  *	sqrt(-ve) = NaN		... with invalid signal
81  *	sqrt(NaN) = NaN		... with invalid signal for signaling NaN
82  *
83  * Other methods : see the appended file at the end of the program below.
84  *---------------
85  */
86 
87 #include <math.h>
88 #include <machine/endian.h>
89 
90 #if BYTE_ORDER == LITTLE_ENDIAN
91 #define n0	1
92 #else
93 #define n0	0
94 #endif
95 
96 #ifdef __STDC__
97 static	const double	one	= 1.0, tiny=1.0e-300;
98 #else
99 static	double	one	= 1.0, tiny=1.0e-300;
100 #endif
101 
102 #ifdef __STDC__
103 	double __ieee754_sqrt(double x)
104 #else
105 	double __ieee754_sqrt(x)
106 	double x;
107 #endif
108 {
109 	double z;
110 	int 	sign = (int)0x80000000;
111 	unsigned r,t1,s1,ix1,q1;
112 	int ix0,s0,q,m,t,i;
113 
114 	ix0 = *(n0+(int*)&x);			/* high word of x */
115 	ix1 = *((1-n0)+(int*)&x);		/* low word of x */
116 
117     /* take care of Inf and NaN */
118 	if((ix0&0x7ff00000)==0x7ff00000) {
119 	    return x*x+x;		/* sqrt(NaN)=NaN, sqrt(+inf)=+inf
120 					   sqrt(-inf)=sNaN */
121 	}
122     /* take care of zero */
123 	if(ix0<=0) {
124 	    if(((ix0&(~sign))|ix1)==0) return x;/* sqrt(+-0) = +-0 */
125 	    else if(ix0<0)
126 		return (x-x)/(x-x);		/* sqrt(-ve) = sNaN */
127 	}
128     /* normalize x */
129 	m = (ix0>>20);
130 	if(m==0) {				/* subnormal x */
131 	    while(ix0==0) {
132 		m -= 21;
133 		ix0 |= (ix1>>11); ix1 <<= 21;
134 	    }
135 	    for(i=0;(ix0&0x00100000)==0;i++) ix0<<=1;
136 	    m -= i-1;
137 	    ix0 |= (ix1>>(32-i));
138 	    ix1 <<= i;
139 	}
140 	m -= 1023;	/* unbias exponent */
141 	ix0 = (ix0&0x000fffff)|0x00100000;
142 	if(m&1){	/* odd m, double x to make it even */
143 	    ix0 += ix0 + ((ix1&sign)>>31);
144 	    ix1 += ix1;
145 	}
146 	m >>= 1;	/* m = [m/2] */
147 
148     /* generate sqrt(x) bit by bit */
149 	ix0 += ix0 + ((ix1&sign)>>31);
150 	ix1 += ix1;
151 	q = q1 = s0 = s1 = 0;	/* [q,q1] = sqrt(x) */
152 	r = 0x00200000;		/* r = moving bit from right to left */
153 
154 	while(r!=0) {
155 	    t = s0+r;
156 	    if(t<=ix0) {
157 		s0   = t+r;
158 		ix0 -= t;
159 		q   += r;
160 	    }
161 	    ix0 += ix0 + ((ix1&sign)>>31);
162 	    ix1 += ix1;
163 	    r>>=1;
164 	}
165 
166 	r = sign;
167 	while(r!=0) {
168 	    t1 = s1+r;
169 	    t  = s0;
170 	    if((t<ix0)||((t==ix0)&&(t1<=ix1))) {
171 		s1  = t1+r;
172 		if(((t1&sign)==sign)&&(s1&sign)==0) s0 += 1;
173 		ix0 -= t;
174 		if (ix1 < t1) ix0 -= 1;
175 		ix1 -= t1;
176 		q1  += r;
177 	    }
178 	    ix0 += ix0 + ((ix1&sign)>>31);
179 	    ix1 += ix1;
180 	    r>>=1;
181 	}
182 
183     /* use floating add to find out rounding direction */
184 	if((ix0|ix1)!=0) {
185 	    z = one-tiny; /* trigger inexact flag */
186 	    if (z>=one) {
187 	        z = one+tiny;
188 	        if (q1==(unsigned)0xffffffff) { q1=0; q += 1;}
189 		else if (z>one) {
190 		    if (q1==(unsigned)0xfffffffe) q+=1;
191 		    q1+=2;
192 		} else
193 	            q1 += (q1&1);
194 	    }
195 	}
196 	ix0 = (q>>1)+0x3fe00000;
197 	ix1 =  q1>>1;
198 	if ((q&1)==1) ix1 |= sign;
199 	ix0 += (m <<20);
200 	*(n0+(int*)&z) = ix0;
201 	*((1-n0)+(int*)&z) = ix1;
202 	return z;
203 }
204 
205 /*
206 Other methods  (use floating-point arithmetic)
207 -------------
208 (This is a copy of a drafted paper by Prof W. Kahan
209 and K.C. Ng, written in May, 1986)
210 
211 	Two algorithms are given here to implement sqrt(x)
212 	(IEEE double precision arithmetic) in software.
213 	Both supply sqrt(x) correctly rounded. The first algorithm (in
214 	Section A) uses newton iterations and involves four divisions.
215 	The second one uses reciproot iterations to avoid division, but
216 	requires more multiplications. Both algorithms need the ability
217 	to chop results of arithmetic operations instead of round them,
218 	and the INEXACT flag to indicate when an arithmetic operation
219 	is executed exactly with no roundoff error, all part of the
220 	standard (IEEE 754-1985). The ability to perform shift, add,
221 	subtract and logical AND operations upon 32-bit words is needed
222 	too, though not part of the standard.
223 
224 A.  sqrt(x) by Newton Iteration
225 
226    (1)	Initial approximation
227 
228 	Let x0 and x1 be the leading and the trailing 32-bit words of
229 	a floating point number x (in IEEE double format) respectively
230 
231 	    1    11		     52				  ...widths
232 	   ------------------------------------------------------
233 	x: |s|	  e     |	      f				|
234 	   ------------------------------------------------------
235 	      msb    lsb  msb				      lsb ...order
236 
237 
238 	     ------------------------  	     ------------------------
239 	x0:  |s|   e    |    f1     |	 x1: |          f2           |
240 	     ------------------------  	     ------------------------
241 
242 	By performing shifts and subtracts on x0 and x1 (both regarded
243 	as integers), we obtain an 8-bit approximation of sqrt(x) as
244 	follows.
245 
246 		k  := (x0>>1) + 0x1ff80000;
247 		y0 := k - T1[31&(k>>15)].	... y ~ sqrt(x) to 8 bits
248 	Here k is a 32-bit integer and T1[] is an integer array containing
249 	correction terms. Now magically the floating value of y (y's
250 	leading 32-bit word is y0, the value of its trailing word is 0)
251 	approximates sqrt(x) to almost 8-bit.
252 
253 	Value of T1:
254 	static int T1[32]= {
255 	0,	1024,	3062,	5746,	9193,	13348,	18162,	23592,
256 	29598,	36145,	43202,	50740,	58733,	67158,	75992,	85215,
257 	83599,	71378,	60428,	50647,	41945,	34246,	27478,	21581,
258 	16499,	12183,	8588,	5674,	3403,	1742,	661,	130,};
259 
260     (2)	Iterative refinement
261 
262 	Apply Heron's rule three times to y, we have y approximates
263 	sqrt(x) to within 1 ulp (Unit in the Last Place):
264 
265 		y := (y+x/y)/2		... almost 17 sig. bits
266 		y := (y+x/y)/2		... almost 35 sig. bits
267 		y := y-(y-x/y)/2	... within 1 ulp
268 
269 
270 	Remark 1.
271 	    Another way to improve y to within 1 ulp is:
272 
273 		y := (y+x/y)		... almost 17 sig. bits to 2*sqrt(x)
274 		y := y - 0x00100006	... almost 18 sig. bits to sqrt(x)
275 
276 				2
277 			    (x-y )*y
278 		y := y + 2* ----------	...within 1 ulp
279 			       2
280 			     3y  + x
281 
282 
283 	This formula has one division fewer than the one above; however,
284 	it requires more multiplications and additions. Also x must be
285 	scaled in advance to avoid spurious overflow in evaluating the
286 	expression 3y*y+x. Hence it is not recommended uless division
287 	is slow. If division is very slow, then one should use the
288 	reciproot algorithm given in section B.
289 
290     (3) Final adjustment
291 
292 	By twiddling y's last bit it is possible to force y to be
293 	correctly rounded according to the prevailing rounding mode
294 	as follows. Let r and i be copies of the rounding mode and
295 	inexact flag before entering the square root program. Also we
296 	use the expression y+-ulp for the next representable floating
297 	numbers (up and down) of y. Note that y+-ulp = either fixed
298 	point y+-1, or multiply y by nextafter(1,+-inf) in chopped
299 	mode.
300 
301 		I := FALSE;	... reset INEXACT flag I
302 		R := RZ;	... set rounding mode to round-toward-zero
303 		z := x/y;	... chopped quotient, possibly inexact
304 		If(not I) then {	... if the quotient is exact
305 		    if(z=y) {
306 		        I := i;	 ... restore inexact flag
307 		        R := r;  ... restore rounded mode
308 		        return sqrt(x):=y.
309 		    } else {
310 			z := z - ulp;	... special rounding
311 		    }
312 		}
313 		i := TRUE;		... sqrt(x) is inexact
314 		If (r=RN) then z=z+ulp	... rounded-to-nearest
315 		If (r=RP) then {	... round-toward-+inf
316 		    y = y+ulp; z=z+ulp;
317 		}
318 		y := y+z;		... chopped sum
319 		y0:=y0-0x00100000;	... y := y/2 is correctly rounded.
320 	        I := i;	 		... restore inexact flag
321 	        R := r;  		... restore rounded mode
322 	        return sqrt(x):=y.
323 
324     (4)	Special cases
325 
326 	Square root of +inf, +-0, or NaN is itself;
327 	Square root of a negative number is NaN with invalid signal.
328 
329 
330 B.  sqrt(x) by Reciproot Iteration
331 
332    (1)	Initial approximation
333 
334 	Let x0 and x1 be the leading and the trailing 32-bit words of
335 	a floating point number x (in IEEE double format) respectively
336 	(see section A). By performing shifs and subtracts on x0 and y0,
337 	we obtain a 7.8-bit approximation of 1/sqrt(x) as follows.
338 
339 	    k := 0x5fe80000 - (x0>>1);
340 	    y0:= k - T2[63&(k>>14)].	... y ~ 1/sqrt(x) to 7.8 bits
341 
342 	Here k is a 32-bit integer and T2[] is an integer array
343 	containing correction terms. Now magically the floating
344 	value of y (y's leading 32-bit word is y0, the value of
345 	its trailing word y1 is set to zero) approximates 1/sqrt(x)
346 	to almost 7.8-bit.
347 
348 	Value of T2:
349 	static int T2[64]= {
350 	0x1500,	0x2ef8,	0x4d67,	0x6b02,	0x87be,	0xa395,	0xbe7a,	0xd866,
351 	0xf14a,	0x1091b,0x11fcd,0x13552,0x14999,0x15c98,0x16e34,0x17e5f,
352 	0x18d03,0x19a01,0x1a545,0x1ae8a,0x1b5c4,0x1bb01,0x1bfde,0x1c28d,
353 	0x1c2de,0x1c0db,0x1ba73,0x1b11c,0x1a4b5,0x1953d,0x18266,0x16be0,
354 	0x1683e,0x179d8,0x18a4d,0x19992,0x1a789,0x1b445,0x1bf61,0x1c989,
355 	0x1d16d,0x1d77b,0x1dddf,0x1e2ad,0x1e5bf,0x1e6e8,0x1e654,0x1e3cd,
356 	0x1df2a,0x1d635,0x1cb16,0x1be2c,0x1ae4e,0x19bde,0x1868e,0x16e2e,
357 	0x1527f,0x1334a,0x11051,0xe951,	0xbe01,	0x8e0d,	0x5924,	0x1edd,};
358 
359     (2)	Iterative refinement
360 
361 	Apply Reciproot iteration three times to y and multiply the
362 	result by x to get an approximation z that matches sqrt(x)
363 	to about 1 ulp. To be exact, we will have
364 		-1ulp < sqrt(x)-z<1.0625ulp.
365 
366 	... set rounding mode to Round-to-nearest
367 	   y := y*(1.5-0.5*x*y*y)	... almost 15 sig. bits to 1/sqrt(x)
368 	   y := y*((1.5-2^-30)+0.5*x*y*y)... about 29 sig. bits to 1/sqrt(x)
369 	... special arrangement for better accuracy
370 	   z := x*y			... 29 bits to sqrt(x), with z*y<1
371 	   z := z + 0.5*z*(1-z*y)	... about 1 ulp to sqrt(x)
372 
373 	Remark 2. The constant 1.5-2^-30 is chosen to bias the error so that
374 	(a) the term z*y in the final iteration is always less than 1;
375 	(b) the error in the final result is biased upward so that
376 		-1 ulp < sqrt(x) - z < 1.0625 ulp
377 	    instead of |sqrt(x)-z|<1.03125ulp.
378 
379     (3)	Final adjustment
380 
381 	By twiddling y's last bit it is possible to force y to be
382 	correctly rounded according to the prevailing rounding mode
383 	as follows. Let r and i be copies of the rounding mode and
384 	inexact flag before entering the square root program. Also we
385 	use the expression y+-ulp for the next representable floating
386 	numbers (up and down) of y. Note that y+-ulp = either fixed
387 	point y+-1, or multiply y by nextafter(1,+-inf) in chopped
388 	mode.
389 
390 	R := RZ;		... set rounding mode to round-toward-zero
391 	switch(r) {
392 	    case RN:		... round-to-nearest
393 	       if(x<= z*(z-ulp)...chopped) z = z - ulp; else
394 	       if(x<= z*(z+ulp)...chopped) z = z; else z = z+ulp;
395 	       break;
396 	    case RZ:case RM:	... round-to-zero or round-to--inf
397 	       R:=RP;		... reset rounding mod to round-to-+inf
398 	       if(x<z*z ... rounded up) z = z - ulp; else
399 	       if(x>=(z+ulp)*(z+ulp) ...rounded up) z = z+ulp;
400 	       break;
401 	    case RP:		... round-to-+inf
402 	       if(x>(z+ulp)*(z+ulp)...chopped) z = z+2*ulp; else
403 	       if(x>z*z ...chopped) z = z+ulp;
404 	       break;
405 	}
406 
407 	Remark 3. The above comparisons can be done in fixed point. For
408 	example, to compare x and w=z*z chopped, it suffices to compare
409 	x1 and w1 (the trailing parts of x and w), regarding them as
410 	two's complement integers.
411 
412 	...Is z an exact square root?
413 	To determine whether z is an exact square root of x, let z1 be the
414 	trailing part of z, and also let x0 and x1 be the leading and
415 	trailing parts of x.
416 
417 	If ((z1&0x03ffffff)!=0)	... not exact if trailing 26 bits of z!=0
418 	    I := 1;		... Raise Inexact flag: z is not exact
419 	else {
420 	    j := 1 - [(x0>>20)&1]	... j = logb(x) mod 2
421 	    k := z1 >> 26;		... get z's 25-th and 26-th
422 					    fraction bits
423 	    I := i or (k&j) or ((k&(j+j+1))!=(x1&3));
424 	}
425 	R:= r		... restore rounded mode
426 	return sqrt(x):=z.
427 
428 	If multiplication is cheaper then the foregoing red tape, the
429 	Inexact flag can be evaluated by
430 
431 	    I := i;
432 	    I := (z*z!=x) or I.
433 
434 	Note that z*z can overwrite I; this value must be sensed if it is
435 	True.
436 
437 	Remark 4. If z*z = x exactly, then bit 25 to bit 0 of z1 must be
438 	zero.
439 
440 		    --------------------
441 		z1: |        f2        |
442 		    --------------------
443 		bit 31		   bit 0
444 
445 	Further more, bit 27 and 26 of z1, bit 0 and 1 of x1, and the odd
446 	or even of logb(x) have the following relations:
447 
448 	-------------------------------------------------
449 	bit 27,26 of z1		bit 1,0 of x1	logb(x)
450 	-------------------------------------------------
451 	00			00		odd and even
452 	01			01		even
453 	10			10		odd
454 	10			00		even
455 	11			01		even
456 	-------------------------------------------------
457 
458     (4)	Special cases (see (4) of Section A).
459 
460  */
461 
462