xref: /openbsd-src/lib/libm/src/e_sqrt.c (revision 5054e3e78af0749a9bb00ba9a024b3ee2d90290f)
1 /* @(#)e_sqrt.c 5.1 93/09/24 */
2 /*
3  * ====================================================
4  * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
5  *
6  * Developed at SunPro, a Sun Microsystems, Inc. business.
7  * Permission to use, copy, modify, and distribute this
8  * software is freely granted, provided that this notice
9  * is preserved.
10  * ====================================================
11  */
12 
13 /* sqrt(x)
14  * Return correctly rounded sqrt.
15  *           ------------------------------------------
16  *	     |  Use the hardware sqrt if you have one |
17  *           ------------------------------------------
18  * Method:
19  *   Bit by bit method using integer arithmetic. (Slow, but portable)
20  *   1. Normalization
21  *	Scale x to y in [1,4) with even powers of 2:
22  *	find an integer k such that  1 <= (y=x*2^(2k)) < 4, then
23  *		sqrt(x) = 2^k * sqrt(y)
24  *   2. Bit by bit computation
25  *	Let q  = sqrt(y) truncated to i bit after binary point (q = 1),
26  *	     i							 0
27  *                                     i+1         2
28  *	    s  = 2*q , and	y  =  2   * ( y - q  ).		(1)
29  *	     i      i            i                 i
30  *
31  *	To compute q    from q , one checks whether
32  *		    i+1       i
33  *
34  *			      -(i+1) 2
35  *			(q + 2      ) <= y.			(2)
36  *     			  i
37  *							      -(i+1)
38  *	If (2) is false, then q   = q ; otherwise q   = q  + 2      .
39  *		 	       i+1   i             i+1   i
40  *
41  *	With some algebric manipulation, it is not difficult to see
42  *	that (2) is equivalent to
43  *                             -(i+1)
44  *			s  +  2       <= y			(3)
45  *			 i                i
46  *
47  *	The advantage of (3) is that s  and y  can be computed by
48  *				      i      i
49  *	the following recurrence formula:
50  *	    if (3) is false
51  *
52  *	    s     =  s  ,	y    = y   ;			(4)
53  *	     i+1      i		 i+1    i
54  *
55  *	    otherwise,
56  *                         -i                     -(i+1)
57  *	    s	  =  s  + 2  ,  y    = y  -  s  - 2  		(5)
58  *           i+1      i          i+1    i     i
59  *
60  *	One may easily use induction to prove (4) and (5).
61  *	Note. Since the left hand side of (3) contain only i+2 bits,
62  *	      it does not necessary to do a full (53-bit) comparison
63  *	      in (3).
64  *   3. Final rounding
65  *	After generating the 53 bits result, we compute one more bit.
66  *	Together with the remainder, we can decide whether the
67  *	result is exact, bigger than 1/2ulp, or less than 1/2ulp
68  *	(it will never equal to 1/2ulp).
69  *	The rounding mode can be detected by checking whether
70  *	huge + tiny is equal to huge, and whether huge - tiny is
71  *	equal to huge for some floating point number "huge" and "tiny".
72  *
73  * Special cases:
74  *	sqrt(+-0) = +-0 	... exact
75  *	sqrt(inf) = inf
76  *	sqrt(-ve) = NaN		... with invalid signal
77  *	sqrt(NaN) = NaN		... with invalid signal for signaling NaN
78  *
79  * Other methods : see the appended file at the end of the program below.
80  *---------------
81  */
82 
83 #include <sys/cdefs.h>
84 #include <float.h>
85 #include <math.h>
86 
87 #include "math_private.h"
88 
89 static	const double	one	= 1.0, tiny=1.0e-300;
90 
91 double
92 sqrt(double x)
93 {
94 	double z;
95 	int32_t sign = (int)0x80000000;
96 	int32_t ix0,s0,q,m,t,i;
97 	u_int32_t r,t1,s1,ix1,q1;
98 
99 	EXTRACT_WORDS(ix0,ix1,x);
100 
101     /* take care of Inf and NaN */
102 	if((ix0&0x7ff00000)==0x7ff00000) {
103 	    return x*x+x;		/* sqrt(NaN)=NaN, sqrt(+inf)=+inf
104 					   sqrt(-inf)=sNaN */
105 	}
106     /* take care of zero */
107 	if(ix0<=0) {
108 	    if(((ix0&(~sign))|ix1)==0) return x;/* sqrt(+-0) = +-0 */
109 	    else if(ix0<0)
110 		return (x-x)/(x-x);		/* sqrt(-ve) = sNaN */
111 	}
112     /* normalize x */
113 	m = (ix0>>20);
114 	if(m==0) {				/* subnormal x */
115 	    while(ix0==0) {
116 		m -= 21;
117 		ix0 |= (ix1>>11); ix1 <<= 21;
118 	    }
119 	    for(i=0;(ix0&0x00100000)==0;i++) ix0<<=1;
120 	    m -= i-1;
121 	    ix0 |= (ix1>>(32-i));
122 	    ix1 <<= i;
123 	}
124 	m -= 1023;	/* unbias exponent */
125 	ix0 = (ix0&0x000fffff)|0x00100000;
126 	if(m&1){	/* odd m, double x to make it even */
127 	    ix0 += ix0 + ((ix1&sign)>>31);
128 	    ix1 += ix1;
129 	}
130 	m >>= 1;	/* m = [m/2] */
131 
132     /* generate sqrt(x) bit by bit */
133 	ix0 += ix0 + ((ix1&sign)>>31);
134 	ix1 += ix1;
135 	q = q1 = s0 = s1 = 0;	/* [q,q1] = sqrt(x) */
136 	r = 0x00200000;		/* r = moving bit from right to left */
137 
138 	while(r!=0) {
139 	    t = s0+r;
140 	    if(t<=ix0) {
141 		s0   = t+r;
142 		ix0 -= t;
143 		q   += r;
144 	    }
145 	    ix0 += ix0 + ((ix1&sign)>>31);
146 	    ix1 += ix1;
147 	    r>>=1;
148 	}
149 
150 	r = sign;
151 	while(r!=0) {
152 	    t1 = s1+r;
153 	    t  = s0;
154 	    if((t<ix0)||((t==ix0)&&(t1<=ix1))) {
155 		s1  = t1+r;
156 		if(((t1&sign)==sign)&&(s1&sign)==0) s0 += 1;
157 		ix0 -= t;
158 		if (ix1 < t1) ix0 -= 1;
159 		ix1 -= t1;
160 		q1  += r;
161 	    }
162 	    ix0 += ix0 + ((ix1&sign)>>31);
163 	    ix1 += ix1;
164 	    r>>=1;
165 	}
166 
167     /* use floating add to find out rounding direction */
168 	if((ix0|ix1)!=0) {
169 	    z = one-tiny; /* trigger inexact flag */
170 	    if (z>=one) {
171 	        z = one+tiny;
172 	        if (q1==(u_int32_t)0xffffffff) { q1=0; q += 1;}
173 		else if (z>one) {
174 		    if (q1==(u_int32_t)0xfffffffe) q+=1;
175 		    q1+=2;
176 		} else
177 	            q1 += (q1&1);
178 	    }
179 	}
180 	ix0 = (q>>1)+0x3fe00000;
181 	ix1 =  q1>>1;
182 	if ((q&1)==1) ix1 |= sign;
183 	ix0 += (m <<20);
184 	INSERT_WORDS(z,ix0,ix1);
185 	return z;
186 }
187 
188 /*
189 Other methods  (use floating-point arithmetic)
190 -------------
191 (This is a copy of a drafted paper by Prof W. Kahan
192 and K.C. Ng, written in May, 1986)
193 
194 	Two algorithms are given here to implement sqrt(x)
195 	(IEEE double precision arithmetic) in software.
196 	Both supply sqrt(x) correctly rounded. The first algorithm (in
197 	Section A) uses newton iterations and involves four divisions.
198 	The second one uses reciproot iterations to avoid division, but
199 	requires more multiplications. Both algorithms need the ability
200 	to chop results of arithmetic operations instead of round them,
201 	and the INEXACT flag to indicate when an arithmetic operation
202 	is executed exactly with no roundoff error, all part of the
203 	standard (IEEE 754-1985). The ability to perform shift, add,
204 	subtract and logical AND operations upon 32-bit words is needed
205 	too, though not part of the standard.
206 
207 A.  sqrt(x) by Newton Iteration
208 
209    (1)	Initial approximation
210 
211 	Let x0 and x1 be the leading and the trailing 32-bit words of
212 	a floating point number x (in IEEE double format) respectively
213 
214 	    1    11		     52				  ...widths
215 	   ------------------------------------------------------
216 	x: |s|	  e     |	      f				|
217 	   ------------------------------------------------------
218 	      msb    lsb  msb				      lsb ...order
219 
220 
221 	     ------------------------  	     ------------------------
222 	x0:  |s|   e    |    f1     |	 x1: |          f2           |
223 	     ------------------------  	     ------------------------
224 
225 	By performing shifts and subtracts on x0 and x1 (both regarded
226 	as integers), we obtain an 8-bit approximation of sqrt(x) as
227 	follows.
228 
229 		k  := (x0>>1) + 0x1ff80000;
230 		y0 := k - T1[31&(k>>15)].	... y ~ sqrt(x) to 8 bits
231 	Here k is a 32-bit integer and T1[] is an integer array containing
232 	correction terms. Now magically the floating value of y (y's
233 	leading 32-bit word is y0, the value of its trailing word is 0)
234 	approximates sqrt(x) to almost 8-bit.
235 
236 	Value of T1:
237 	static int T1[32]= {
238 	0,	1024,	3062,	5746,	9193,	13348,	18162,	23592,
239 	29598,	36145,	43202,	50740,	58733,	67158,	75992,	85215,
240 	83599,	71378,	60428,	50647,	41945,	34246,	27478,	21581,
241 	16499,	12183,	8588,	5674,	3403,	1742,	661,	130,};
242 
243     (2)	Iterative refinement
244 
245 	Apply Heron's rule three times to y, we have y approximates
246 	sqrt(x) to within 1 ulp (Unit in the Last Place):
247 
248 		y := (y+x/y)/2		... almost 17 sig. bits
249 		y := (y+x/y)/2		... almost 35 sig. bits
250 		y := y-(y-x/y)/2	... within 1 ulp
251 
252 
253 	Remark 1.
254 	    Another way to improve y to within 1 ulp is:
255 
256 		y := (y+x/y)		... almost 17 sig. bits to 2*sqrt(x)
257 		y := y - 0x00100006	... almost 18 sig. bits to sqrt(x)
258 
259 				2
260 			    (x-y )*y
261 		y := y + 2* ----------	...within 1 ulp
262 			       2
263 			     3y  + x
264 
265 
266 	This formula has one division fewer than the one above; however,
267 	it requires more multiplications and additions. Also x must be
268 	scaled in advance to avoid spurious overflow in evaluating the
269 	expression 3y*y+x. Hence it is not recommended uless division
270 	is slow. If division is very slow, then one should use the
271 	reciproot algorithm given in section B.
272 
273     (3) Final adjustment
274 
275 	By twiddling y's last bit it is possible to force y to be
276 	correctly rounded according to the prevailing rounding mode
277 	as follows. Let r and i be copies of the rounding mode and
278 	inexact flag before entering the square root program. Also we
279 	use the expression y+-ulp for the next representable floating
280 	numbers (up and down) of y. Note that y+-ulp = either fixed
281 	point y+-1, or multiply y by nextafter(1,+-inf) in chopped
282 	mode.
283 
284 		I := FALSE;	... reset INEXACT flag I
285 		R := RZ;	... set rounding mode to round-toward-zero
286 		z := x/y;	... chopped quotient, possibly inexact
287 		If(not I) then {	... if the quotient is exact
288 		    if(z=y) {
289 		        I := i;	 ... restore inexact flag
290 		        R := r;  ... restore rounded mode
291 		        return sqrt(x):=y.
292 		    } else {
293 			z := z - ulp;	... special rounding
294 		    }
295 		}
296 		i := TRUE;		... sqrt(x) is inexact
297 		If (r=RN) then z=z+ulp	... rounded-to-nearest
298 		If (r=RP) then {	... round-toward-+inf
299 		    y = y+ulp; z=z+ulp;
300 		}
301 		y := y+z;		... chopped sum
302 		y0:=y0-0x00100000;	... y := y/2 is correctly rounded.
303 	        I := i;	 		... restore inexact flag
304 	        R := r;  		... restore rounded mode
305 	        return sqrt(x):=y.
306 
307     (4)	Special cases
308 
309 	Square root of +inf, +-0, or NaN is itself;
310 	Square root of a negative number is NaN with invalid signal.
311 
312 
313 B.  sqrt(x) by Reciproot Iteration
314 
315    (1)	Initial approximation
316 
317 	Let x0 and x1 be the leading and the trailing 32-bit words of
318 	a floating point number x (in IEEE double format) respectively
319 	(see section A). By performing shifs and subtracts on x0 and y0,
320 	we obtain a 7.8-bit approximation of 1/sqrt(x) as follows.
321 
322 	    k := 0x5fe80000 - (x0>>1);
323 	    y0:= k - T2[63&(k>>14)].	... y ~ 1/sqrt(x) to 7.8 bits
324 
325 	Here k is a 32-bit integer and T2[] is an integer array
326 	containing correction terms. Now magically the floating
327 	value of y (y's leading 32-bit word is y0, the value of
328 	its trailing word y1 is set to zero) approximates 1/sqrt(x)
329 	to almost 7.8-bit.
330 
331 	Value of T2:
332 	static int T2[64]= {
333 	0x1500,	0x2ef8,	0x4d67,	0x6b02,	0x87be,	0xa395,	0xbe7a,	0xd866,
334 	0xf14a,	0x1091b,0x11fcd,0x13552,0x14999,0x15c98,0x16e34,0x17e5f,
335 	0x18d03,0x19a01,0x1a545,0x1ae8a,0x1b5c4,0x1bb01,0x1bfde,0x1c28d,
336 	0x1c2de,0x1c0db,0x1ba73,0x1b11c,0x1a4b5,0x1953d,0x18266,0x16be0,
337 	0x1683e,0x179d8,0x18a4d,0x19992,0x1a789,0x1b445,0x1bf61,0x1c989,
338 	0x1d16d,0x1d77b,0x1dddf,0x1e2ad,0x1e5bf,0x1e6e8,0x1e654,0x1e3cd,
339 	0x1df2a,0x1d635,0x1cb16,0x1be2c,0x1ae4e,0x19bde,0x1868e,0x16e2e,
340 	0x1527f,0x1334a,0x11051,0xe951,	0xbe01,	0x8e0d,	0x5924,	0x1edd,};
341 
342     (2)	Iterative refinement
343 
344 	Apply Reciproot iteration three times to y and multiply the
345 	result by x to get an approximation z that matches sqrt(x)
346 	to about 1 ulp. To be exact, we will have
347 		-1ulp < sqrt(x)-z<1.0625ulp.
348 
349 	... set rounding mode to Round-to-nearest
350 	   y := y*(1.5-0.5*x*y*y)	... almost 15 sig. bits to 1/sqrt(x)
351 	   y := y*((1.5-2^-30)+0.5*x*y*y)... about 29 sig. bits to 1/sqrt(x)
352 	... special arrangement for better accuracy
353 	   z := x*y			... 29 bits to sqrt(x), with z*y<1
354 	   z := z + 0.5*z*(1-z*y)	... about 1 ulp to sqrt(x)
355 
356 	Remark 2. The constant 1.5-2^-30 is chosen to bias the error so that
357 	(a) the term z*y in the final iteration is always less than 1;
358 	(b) the error in the final result is biased upward so that
359 		-1 ulp < sqrt(x) - z < 1.0625 ulp
360 	    instead of |sqrt(x)-z|<1.03125ulp.
361 
362     (3)	Final adjustment
363 
364 	By twiddling y's last bit it is possible to force y to be
365 	correctly rounded according to the prevailing rounding mode
366 	as follows. Let r and i be copies of the rounding mode and
367 	inexact flag before entering the square root program. Also we
368 	use the expression y+-ulp for the next representable floating
369 	numbers (up and down) of y. Note that y+-ulp = either fixed
370 	point y+-1, or multiply y by nextafter(1,+-inf) in chopped
371 	mode.
372 
373 	R := RZ;		... set rounding mode to round-toward-zero
374 	switch(r) {
375 	    case RN:		... round-to-nearest
376 	       if(x<= z*(z-ulp)...chopped) z = z - ulp; else
377 	       if(x<= z*(z+ulp)...chopped) z = z; else z = z+ulp;
378 	       break;
379 	    case RZ:case RM:	... round-to-zero or round-to--inf
380 	       R:=RP;		... reset rounding mod to round-to-+inf
381 	       if(x<z*z ... rounded up) z = z - ulp; else
382 	       if(x>=(z+ulp)*(z+ulp) ...rounded up) z = z+ulp;
383 	       break;
384 	    case RP:		... round-to-+inf
385 	       if(x>(z+ulp)*(z+ulp)...chopped) z = z+2*ulp; else
386 	       if(x>z*z ...chopped) z = z+ulp;
387 	       break;
388 	}
389 
390 	Remark 3. The above comparisons can be done in fixed point. For
391 	example, to compare x and w=z*z chopped, it suffices to compare
392 	x1 and w1 (the trailing parts of x and w), regarding them as
393 	two's complement integers.
394 
395 	...Is z an exact square root?
396 	To determine whether z is an exact square root of x, let z1 be the
397 	trailing part of z, and also let x0 and x1 be the leading and
398 	trailing parts of x.
399 
400 	If ((z1&0x03ffffff)!=0)	... not exact if trailing 26 bits of z!=0
401 	    I := 1;		... Raise Inexact flag: z is not exact
402 	else {
403 	    j := 1 - [(x0>>20)&1]	... j = logb(x) mod 2
404 	    k := z1 >> 26;		... get z's 25-th and 26-th
405 					    fraction bits
406 	    I := i or (k&j) or ((k&(j+j+1))!=(x1&3));
407 	}
408 	R:= r		... restore rounded mode
409 	return sqrt(x):=z.
410 
411 	If multiplication is cheaper then the foregoing red tape, the
412 	Inexact flag can be evaluated by
413 
414 	    I := i;
415 	    I := (z*z!=x) or I.
416 
417 	Note that z*z can overwrite I; this value must be sensed if it is
418 	True.
419 
420 	Remark 4. If z*z = x exactly, then bit 25 to bit 0 of z1 must be
421 	zero.
422 
423 		    --------------------
424 		z1: |        f2        |
425 		    --------------------
426 		bit 31		   bit 0
427 
428 	Further more, bit 27 and 26 of z1, bit 0 and 1 of x1, and the odd
429 	or even of logb(x) have the following relations:
430 
431 	-------------------------------------------------
432 	bit 27,26 of z1		bit 1,0 of x1	logb(x)
433 	-------------------------------------------------
434 	00			00		odd and even
435 	01			01		even
436 	10			10		odd
437 	10			00		even
438 	11			01		even
439 	-------------------------------------------------
440 
441     (4)	Special cases (see (4) of Section A).
442 
443  */
444 
445 #if LDBL_MANT_DIG == 53
446 #ifdef __weak_alias
447 __weak_alias(sqrtl, sqrt);
448 #endif /* __weak_alias */
449 #endif /* LDBL_MANT_DIG == 53 */
450