xref: /inferno-os/libmath/fdlibm/e_sqrt.c (revision 37da2899f40661e3e9631e497da8dc59b971cbd0)
1 /* derived from /netlib/fdlibm */
2 /* @(#)e_sqrt.c 1.3 95/01/18 */
3 /*
4  * ====================================================
5  * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
6  *
7  * Developed at SunSoft, a Sun Microsystems, Inc. business.
8  * Permission to use, copy, modify, and distribute this
9  * software is freely granted, provided that this notice
10  * is preserved.
11  * ====================================================
12  */
13 
14 /* __ieee754_sqrt(x)
15  * Return correctly rounded sqrt.
16  *           ------------------------------------------
17  *	     |  Use the hardware sqrt if you have one |
18  *           ------------------------------------------
19  * Method:
20  *   Bit by bit method using integer arithmetic. (Slow, but portable)
21  *   1. Normalization
22  *	Scale x to y in [1,4) with even powers of 2:
23  *	find an integer k such that  1 <= (y=x*2^(2k)) < 4, then
24  *		sqrt(x) = 2^k * sqrt(y)
25  *   2. Bit by bit computation
26  *	Let q  = sqrt(y) truncated to i bit after binary point (q = 1),
27  *	     i							 0
28  *                                     i+1         2
29  *	    s  = 2*q , and	y  =  2   * ( y - q  ).		(1)
30  *	     i      i            i                 i
31  *
32  *	To compute q    from q , one checks whether
33  *		    i+1       i
34  *
35  *			      -(i+1) 2
36  *			(q + 2      ) <= y.			(2)
37  *     			  i
38  *							      -(i+1)
39  *	If (2) is false, then q   = q ; otherwise q   = q  + 2      .
40  *		 	       i+1   i             i+1   i
41  *
42  *	With some algebric manipulation, it is not difficult to see
43  *	that (2) is equivalent to
44  *                             -(i+1)
45  *			s  +  2       <= y			(3)
46  *			 i                i
47  *
48  *	The advantage of (3) is that s  and y  can be computed by
49  *				      i      i
50  *	the following recurrence formula:
51  *	    if (3) is false
52  *
53  *	    s     =  s  ,	y    = y   ;			(4)
54  *	     i+1      i		 i+1    i
55  *
56  *	    otherwise,
57  *                         -i                     -(i+1)
58  *	    s	  =  s  + 2  ,  y    = y  -  s  - 2  		(5)
59  *           i+1      i          i+1    i     i
60  *
61  *	One may easily use induction to prove (4) and (5).
62  *	Note. Since the left hand side of (3) contain only i+2 bits,
63  *	      it does not necessary to do a full (53-bit) comparison
64  *	      in (3).
65  *   3. Final rounding
66  *	After generating the 53 bits result, we compute one more bit.
67  *	Together with the remainder, we can decide whether the
68  *	result is exact, bigger than 1/2ulp, or less than 1/2ulp
69  *	(it will never equal to 1/2ulp).
70  *	The rounding mode can be detected by checking whether
71  *	Huge + tiny is equal to Huge, and whether Huge - tiny is
72  *	equal to Huge for some floating point number "Huge" and "tiny".
73  *
74  * Special cases:
75  *	sqrt(+-0) = +-0 	... exact
76  *	sqrt(inf) = inf
77  *	sqrt(-ve) = NaN		... with invalid signal
78  *	sqrt(NaN) = NaN		... with invalid signal for signaling NaN
79  *
80  * Other methods : see the appended file at the end of the program below.
81  *---------------
82  */
83 
84 #include "fdlibm.h"
85 
86 static	const double	one	= 1.0, tiny=1.0e-300;
87 
__ieee754_sqrt(double x)88 	double __ieee754_sqrt(double x)
89 {
90 	double z;
91 	int 	sign = (int)0x80000000;
92 	unsigned r,t1,s1,ix1,q1;
93 	int ix0,s0,q,m,t,i;
94 
95 	ix0 = __HI(x);			/* high word of x */
96 	ix1 = __LO(x);		/* low word of x */
97 
98     /* take care of Inf and NaN */
99 	if((ix0&0x7ff00000)==0x7ff00000) {
100 	    return x*x+x;		/* sqrt(NaN)=NaN, sqrt(+inf)=+inf
101 					   sqrt(-inf)=sNaN */
102 	}
103     /* take care of zero */
104 	if(ix0<=0) {
105 	    if(((ix0&(~sign))|ix1)==0) return x;/* sqrt(+-0) = +-0 */
106 	    else if(ix0<0)
107 		return (x-x)/(x-x);		/* sqrt(-ve) = sNaN */
108 	}
109     /* normalize x */
110 	m = (ix0>>20);
111 	if(m==0) {				/* subnormal x */
112 	    while(ix0==0) {
113 		m -= 21;
114 		ix0 |= (ix1>>11); ix1 <<= 21;
115 	    }
116 	    for(i=0;(ix0&0x00100000)==0;i++) ix0<<=1;
117 	    m -= i-1;
118 	    ix0 |= (ix1>>(32-i));
119 	    ix1 <<= i;
120 	}
121 	m -= 1023;	/* unbias exponent */
122 	ix0 = (ix0&0x000fffff)|0x00100000;
123 	if(m&1){	/* odd m, double x to make it even */
124 	    ix0 += ix0 + ((ix1&sign)>>31);
125 	    ix1 += ix1;
126 	}
127 	m >>= 1;	/* m = [m/2] */
128 
129     /* generate sqrt(x) bit by bit */
130 	ix0 += ix0 + ((ix1&sign)>>31);
131 	ix1 += ix1;
132 	q = q1 = s0 = s1 = 0;	/* [q,q1] = sqrt(x) */
133 	r = 0x00200000;		/* r = moving bit from right to left */
134 
135 	while(r!=0) {
136 	    t = s0+r;
137 	    if(t<=ix0) {
138 		s0   = t+r;
139 		ix0 -= t;
140 		q   += r;
141 	    }
142 	    ix0 += ix0 + ((ix1&sign)>>31);
143 	    ix1 += ix1;
144 	    r>>=1;
145 	}
146 
147 	r = sign;
148 	while(r!=0) {
149 	    t1 = s1+r;
150 	    t  = s0;
151 	    if((t<ix0)||((t==ix0)&&(t1<=ix1))) {
152 		s1  = t1+r;
153 		if(((t1&sign)==sign)&&(s1&sign)==0) s0 += 1;
154 		ix0 -= t;
155 		if (ix1 < t1) ix0 -= 1;
156 		ix1 -= t1;
157 		q1  += r;
158 	    }
159 	    ix0 += ix0 + ((ix1&sign)>>31);
160 	    ix1 += ix1;
161 	    r>>=1;
162 	}
163 
164     /* use floating add to find out rounding direction */
165 	if((ix0|ix1)!=0) {
166 	    z = one-tiny; /* trigger inexact flag */
167 	    if (z>=one) {
168 	        z = one+tiny;
169 	        if (q1==(unsigned)0xffffffff) { q1=0; q += 1;}
170 		else if (z>one) {
171 		    if (q1==(unsigned)0xfffffffe) q+=1;
172 		    q1+=2;
173 		} else
174 	            q1 += (q1&1);
175 	    }
176 	}
177 	ix0 = (q>>1)+0x3fe00000;
178 	ix1 =  q1>>1;
179 	if ((q&1)==1) ix1 |= sign;
180 	ix0 += (m <<20);
181 	__HI(z) = ix0;
182 	__LO(z) = ix1;
183 	return z;
184 }
185 
186 /*
187 Other methods  (use floating-point arithmetic)
188 -------------
189 (This is a copy of a drafted paper by Prof W. Kahan
190 and K.C. Ng, written in May, 1986)
191 
192 	Two algorithms are given here to implement sqrt(x)
193 	(IEEE double precision arithmetic) in software.
194 	Both supply sqrt(x) correctly rounded. The first algorithm (in
195 	Section A) uses newton iterations and involves four divisions.
196 	The second one uses reciproot iterations to avoid division, but
197 	requires more multiplications. Both algorithms need the ability
198 	to chop results of arithmetic operations instead of round them,
199 	and the INEXACT flag to indicate when an arithmetic operation
200 	is executed exactly with no roundoff error, all part of the
201 	standard (IEEE 754-1985). The ability to perform shift, add,
202 	subtract and logical AND operations upon 32-bit words is needed
203 	too, though not part of the standard.
204 
205 A.  sqrt(x) by Newton Iteration
206 
207    (1)	Initial approximation
208 
209 	Let x0 and x1 be the leading and the trailing 32-bit words of
210 	a floating point number x (in IEEE double format) respectively
211 
212 	    1    11		     52				  ...widths
213 	   ------------------------------------------------------
214 	x: |s|	  e     |	      f				|
215 	   ------------------------------------------------------
216 	      msb    lsb  msb				      lsb ...order
217 
218 
219 	     ------------------------  	     ------------------------
220 	x0:  |s|   e    |    f1     |	 x1: |          f2           |
221 	     ------------------------  	     ------------------------
222 
223 	By performing shifts and subtracts on x0 and x1 (both regarded
224 	as integers), we obtain an 8-bit approximation of sqrt(x) as
225 	follows.
226 
227 		k  := (x0>>1) + 0x1ff80000;
228 		y0 := k - T1[31&(k>>15)].	... y ~ sqrt(x) to 8 bits
229 	Here k is a 32-bit integer and T1[] is an integer array containing
230 	correction terms. Now magically the floating value of y (y's
231 	leading 32-bit word is y0, the value of its trailing word is 0)
232 	approximates sqrt(x) to almost 8-bit.
233 
234 	Value of T1:
235 	static int T1[32]= {
236 	0,	1024,	3062,	5746,	9193,	13348,	18162,	23592,
237 	29598,	36145,	43202,	50740,	58733,	67158,	75992,	85215,
238 	83599,	71378,	60428,	50647,	41945,	34246,	27478,	21581,
239 	16499,	12183,	8588,	5674,	3403,	1742,	661,	130,};
240 
241     (2)	Iterative refinement
242 
243 	Apply Heron's rule three times to y, we have y approximates
244 	sqrt(x) to within 1 ulp (Unit in the Last Place):
245 
246 		y := (y+x/y)/2		... almost 17 sig. bits
247 		y := (y+x/y)/2		... almost 35 sig. bits
248 		y := y-(y-x/y)/2	... within 1 ulp
249 
250 
251 	Remark 1.
252 	    Another way to improve y to within 1 ulp is:
253 
254 		y := (y+x/y)		... almost 17 sig. bits to 2*sqrt(x)
255 		y := y - 0x00100006	... almost 18 sig. bits to sqrt(x)
256 
257 				2
258 			    (x-y )*y
259 		y := y + 2* ----------	...within 1 ulp
260 			       2
261 			     3y  + x
262 
263 
264 	This formula has one division fewer than the one above; however,
265 	it requires more multiplications and additions. Also x must be
266 	scaled in advance to avoid spurious overflow in evaluating the
267 	expression 3y*y+x. Hence it is not recommended uless division
268 	is slow. If division is very slow, then one should use the
269 	reciproot algorithm given in section B.
270 
271     (3) Final adjustment
272 
273 	By twiddling y's last bit it is possible to force y to be
274 	correctly rounded according to the prevailing rounding mode
275 	as follows. Let r and i be copies of the rounding mode and
276 	inexact flag before entering the square root program. Also we
277 	use the expression y+-ulp for the next representable floating
278 	numbers (up and down) of y. Note that y+-ulp = either fixed
279 	point y+-1, or multiply y by nextafter(1,+-inf) in chopped
280 	mode.
281 
282 		I := FALSE;	... reset INEXACT flag I
283 		R := RZ;	... set rounding mode to round-toward-zero
284 		z := x/y;	... chopped quotient, possibly inexact
285 		If(not I) then {	... if the quotient is exact
286 		    if(z=y) {
287 		        I := i;	 ... restore inexact flag
288 		        R := r;  ... restore rounded mode
289 		        return sqrt(x):=y.
290 		    } else {
291 			z := z - ulp;	... special rounding
292 		    }
293 		}
294 		i := TRUE;		... sqrt(x) is inexact
295 		If (r=RN) then z=z+ulp	... rounded-to-nearest
296 		If (r=RP) then {	... round-toward-+inf
297 		    y = y+ulp; z=z+ulp;
298 		}
299 		y := y+z;		... chopped sum
300 		y0:=y0-0x00100000;	... y := y/2 is correctly rounded.
301 	        I := i;	 		... restore inexact flag
302 	        R := r;  		... restore rounded mode
303 	        return sqrt(x):=y.
304 
305     (4)	Special cases
306 
307 	Square root of +inf, +-0, or NaN is itself;
308 	Square root of a negative number is NaN with invalid signal.
309 
310 
311 B.  sqrt(x) by Reciproot Iteration
312 
313    (1)	Initial approximation
314 
315 	Let x0 and x1 be the leading and the trailing 32-bit words of
316 	a floating point number x (in IEEE double format) respectively
317 	(see section A). By performing shifs and subtracts on x0 and y0,
318 	we obtain a 7.8-bit approximation of 1/sqrt(x) as follows.
319 
320 	    k := 0x5fe80000 - (x0>>1);
321 	    y0:= k - T2[63&(k>>14)].	... y ~ 1/sqrt(x) to 7.8 bits
322 
323 	Here k is a 32-bit integer and T2[] is an integer array
324 	containing correction terms. Now magically the floating
325 	value of y (y's leading 32-bit word is y0, the value of
326 	its trailing word y1 is set to zero) approximates 1/sqrt(x)
327 	to almost 7.8-bit.
328 
329 	Value of T2:
330 	static int T2[64]= {
331 	0x1500,	0x2ef8,	0x4d67,	0x6b02,	0x87be,	0xa395,	0xbe7a,	0xd866,
332 	0xf14a,	0x1091b,0x11fcd,0x13552,0x14999,0x15c98,0x16e34,0x17e5f,
333 	0x18d03,0x19a01,0x1a545,0x1ae8a,0x1b5c4,0x1bb01,0x1bfde,0x1c28d,
334 	0x1c2de,0x1c0db,0x1ba73,0x1b11c,0x1a4b5,0x1953d,0x18266,0x16be0,
335 	0x1683e,0x179d8,0x18a4d,0x19992,0x1a789,0x1b445,0x1bf61,0x1c989,
336 	0x1d16d,0x1d77b,0x1dddf,0x1e2ad,0x1e5bf,0x1e6e8,0x1e654,0x1e3cd,
337 	0x1df2a,0x1d635,0x1cb16,0x1be2c,0x1ae4e,0x19bde,0x1868e,0x16e2e,
338 	0x1527f,0x1334a,0x11051,0xe951,	0xbe01,	0x8e0d,	0x5924,	0x1edd,};
339 
340     (2)	Iterative refinement
341 
342 	Apply Reciproot iteration three times to y and multiply the
343 	result by x to get an approximation z that matches sqrt(x)
344 	to about 1 ulp. To be exact, we will have
345 		-1ulp < sqrt(x)-z<1.0625ulp.
346 
347 	... set rounding mode to Round-to-nearest
348 	   y := y*(1.5-0.5*x*y*y)	... almost 15 sig. bits to 1/sqrt(x)
349 	   y := y*((1.5-2^-30)+0.5*x*y*y)... about 29 sig. bits to 1/sqrt(x)
350 	... special arrangement for better accuracy
351 	   z := x*y			... 29 bits to sqrt(x), with z*y<1
352 	   z := z + 0.5*z*(1-z*y)	... about 1 ulp to sqrt(x)
353 
354 	Remark 2. The constant 1.5-2^-30 is chosen to bias the error so that
355 	(a) the term z*y in the final iteration is always less than 1;
356 	(b) the error in the final result is biased upward so that
357 		-1 ulp < sqrt(x) - z < 1.0625 ulp
358 	    instead of |sqrt(x)-z|<1.03125ulp.
359 
360     (3)	Final adjustment
361 
362 	By twiddling y's last bit it is possible to force y to be
363 	correctly rounded according to the prevailing rounding mode
364 	as follows. Let r and i be copies of the rounding mode and
365 	inexact flag before entering the square root program. Also we
366 	use the expression y+-ulp for the next representable floating
367 	numbers (up and down) of y. Note that y+-ulp = either fixed
368 	point y+-1, or multiply y by nextafter(1,+-inf) in chopped
369 	mode.
370 
371 	R := RZ;		... set rounding mode to round-toward-zero
372 	switch(r) {
373 	    case RN:		... round-to-nearest
374 	       if(x<= z*(z-ulp)...chopped) z = z - ulp; else
375 	       if(x<= z*(z+ulp)...chopped) z = z; else z = z+ulp;
376 	       break;
377 	    case RZ:case RM:	... round-to-zero or round-to--inf
378 	       R:=RP;		... reset rounding mod to round-to-+inf
379 	       if(x<z*z ... rounded up) z = z - ulp; else
380 	       if(x>=(z+ulp)*(z+ulp) ...rounded up) z = z+ulp;
381 	       break;
382 	    case RP:		... round-to-+inf
383 	       if(x>(z+ulp)*(z+ulp)...chopped) z = z+2*ulp; else
384 	       if(x>z*z ...chopped) z = z+ulp;
385 	       break;
386 	}
387 
388 	Remark 3. The above comparisons can be done in fixed point. For
389 	example, to compare x and w=z*z chopped, it suffices to compare
390 	x1 and w1 (the trailing parts of x and w), regarding them as
391 	two's complement integers.
392 
393 	...Is z an exact square root?
394 	To determine whether z is an exact square root of x, let z1 be the
395 	trailing part of z, and also let x0 and x1 be the leading and
396 	trailing parts of x.
397 
398 	If ((z1&0x03ffffff)!=0)	... not exact if trailing 26 bits of z!=0
399 	    I := 1;		... Raise Inexact flag: z is not exact
400 	else {
401 	    j := 1 - [(x0>>20)&1]	... j = logb(x) mod 2
402 	    k := z1 >> 26;		... get z's 25-th and 26-th
403 					    fraction bits
404 	    I := i or (k&j) or ((k&(j+j+1))!=(x1&3));
405 	}
406 	R:= r		... restore rounded mode
407 	return sqrt(x):=z.
408 
409 	If multiplication is cheaper then the foregoing red tape, the
410 	Inexact flag can be evaluated by
411 
412 	    I := i;
413 	    I := (z*z!=x) or I.
414 
415 	Note that z*z can overwrite I; this value must be sensed if it is
416 	True.
417 
418 	Remark 4. If z*z = x exactly, then bit 25 to bit 0 of z1 must be
419 	zero.
420 
421 		    --------------------
422 		z1: |        f2        |
423 		    --------------------
424 		bit 31		   bit 0
425 
426 	Further more, bit 27 and 26 of z1, bit 0 and 1 of x1, and the odd
427 	or even of logb(x) have the following relations:
428 
429 	-------------------------------------------------
430 	bit 27,26 of z1		bit 1,0 of x1	logb(x)
431 	-------------------------------------------------
432 	00			00		odd and even
433 	01			01		even
434 	10			10		odd
435 	10			00		even
436 	11			01		even
437 	-------------------------------------------------
438 
439     (4)	Special cases (see (4) of Section A).
440 
441  */
442 
443