1 /* mpfr_sqrt -- square root of a floating-point number
2
3 Copyright 1999-2023 Free Software Foundation, Inc.
4 Contributed by the AriC and Caramba projects, INRIA.
5
6 This file is part of the GNU MPFR Library.
7
8 The GNU MPFR Library is free software; you can redistribute it and/or modify
9 it under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 3 of the License, or (at your
11 option) any later version.
12
13 The GNU MPFR Library is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
16 License for more details.
17
18 You should have received a copy of the GNU Lesser General Public License
19 along with the GNU MPFR Library; see the file COPYING.LESSER. If not, see
20 https://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc.,
21 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */
22
23 #define MPFR_NEED_LONGLONG_H
24 #include "mpfr-impl.h"
25
26 #if !defined(MPFR_GENERIC_ABI) && GMP_NUMB_BITS == 64
27
28 #include "invsqrt_limb.h"
29
30 /* Put in rp[1]*2^64+rp[0] an approximation of floor(sqrt(2^128*n)),
31 with 2^126 <= n := np[1]*2^64 + np[0] < 2^128. We have:
32 {rp, 2} - 4 <= floor(sqrt(2^128*n)) <= {rp, 2} + 26. */
33 static void
mpfr_sqrt2_approx(mpfr_limb_ptr rp,mpfr_limb_srcptr np)34 mpfr_sqrt2_approx (mpfr_limb_ptr rp, mpfr_limb_srcptr np)
35 {
36 mp_limb_t x, r1, r0, h, l;
37
38 __gmpfr_sqrt_limb (r1, h, l, x, np[1]);
39
40 /* now r1 = floor(sqrt(2^64*n1)) and h:l = 2^64*n1 - r1^2 with h:l <= 2*r1,
41 thus h <= 1, and x is an approximation of 2^96/sqrt(np[1])-2^64 */
42
43 l += np[0];
44 h += (l < np[0]);
45
46 /* now 2^64*n1 + n0 - r1^2 = 2^64*h + l with h <= 2 */
47
48 /* divide by 2 */
49 l = (h << 63) | (l >> 1);
50 h = h >> 1;
51
52 /* now h <= 1 */
53
54 /* now add (2^64+x) * (h*2^64+l) / 2^64 to [r1*2^64, 0] */
55
56 umul_hi (r0, x, l); /* x * l */
57 r0 += l;
58 r1 += h + (r0 < l); /* now we have added 2^64 * (h*2^64+l) */
59 if (h)
60 {
61 r0 += x;
62 r1 += (r0 < x); /* add x */
63 }
64
65 MPFR_ASSERTD(r1 & MPFR_LIMB_HIGHBIT);
66
67 rp[0] = r0;
68 rp[1] = r1;
69 }
70
71 /* Special code for prec(r) = prec(u) < GMP_NUMB_BITS. We cannot have
72 prec(u) = GMP_NUMB_BITS here, since when the exponent of u is odd,
73 we need to shift u by one bit to the right without losing any bit.
74 Assumes GMP_NUMB_BITS = 64. */
75 static int
mpfr_sqrt1(mpfr_ptr r,mpfr_srcptr u,mpfr_rnd_t rnd_mode)76 mpfr_sqrt1 (mpfr_ptr r, mpfr_srcptr u, mpfr_rnd_t rnd_mode)
77 {
78 mpfr_prec_t p = MPFR_GET_PREC(r);
79 mpfr_prec_t exp_u = MPFR_EXP(u), exp_r, sh = GMP_NUMB_BITS - p;
80 mp_limb_t u0, r0, rb, sb, mask = MPFR_LIMB_MASK(sh);
81 mpfr_limb_ptr rp = MPFR_MANT(r);
82
83 MPFR_STAT_STATIC_ASSERT (GMP_NUMB_BITS == 64);
84
85 /* first make the exponent even */
86 u0 = MPFR_MANT(u)[0];
87 if (((unsigned int) exp_u & 1) != 0)
88 {
89 u0 >>= 1;
90 exp_u ++;
91 }
92 MPFR_ASSERTD (((unsigned int) exp_u & 1) == 0);
93 exp_r = exp_u / 2;
94
95 /* then compute an approximation of the integer square root of
96 u0*2^GMP_NUMB_BITS */
97 __gmpfr_sqrt_limb_approx (r0, u0);
98
99 sb = 1; /* when we can round correctly with the approximation, the sticky bit
100 is non-zero */
101
102 /* the exact square root is in [r0, r0 + 7] */
103 if (MPFR_UNLIKELY(((r0 + 7) & (mask >> 1)) <= 7))
104 {
105 /* We should ensure r0 has its most significant bit set.
106 Since r0 <= sqrt(2^64*u0) <= r0 + 7, as soon as sqrt(2^64*u0)>=2^63+7,
107 which happens for u0>=2^62+8, then r0 >= 2^63.
108 It thus remains to check that for 2^62 <= u0 <= 2^62+7,
109 __gmpfr_sqrt_limb_approx (r0, u0) gives r0 >= 2^63, which is indeed
110 the case:
111 u0=4611686018427387904 r0=9223372036854775808
112 u0=4611686018427387905 r0=9223372036854775808
113 u0=4611686018427387906 r0=9223372036854775809
114 u0=4611686018427387907 r0=9223372036854775810
115 u0=4611686018427387908 r0=9223372036854775811
116 u0=4611686018427387909 r0=9223372036854775812
117 u0=4611686018427387910 r0=9223372036854775813
118 u0=4611686018427387911 r0=9223372036854775814 */
119 MPFR_ASSERTD(r0 >= MPFR_LIMB_HIGHBIT);
120 umul_ppmm (rb, sb, r0, r0);
121 sub_ddmmss (rb, sb, u0, 0, rb, sb);
122 /* for the exact square root, we should have 0 <= rb:sb <= 2*r0 */
123 while (!(rb == 0 || (rb == 1 && sb <= 2 * r0)))
124 {
125 /* subtract 2*r0+1 from rb:sb: subtract r0 before incrementing r0,
126 then r0 after (which is r0+1) */
127 rb -= (sb < r0);
128 sb -= r0;
129 r0 ++;
130 rb -= (sb < r0);
131 sb -= r0;
132 }
133 /* now we should have rb*2^64 + sb <= 2*r0 */
134 MPFR_ASSERTD(rb == 0 || (rb == 1 && sb <= 2 * r0));
135 sb = rb | sb;
136 }
137
138 rb = r0 & (MPFR_LIMB_ONE << (sh - 1));
139 sb |= (r0 & mask) ^ rb;
140 rp[0] = r0 & ~mask;
141
142 /* rounding: sb = 0 implies rb = 0, since (rb,sb)=(1,0) is not possible */
143 MPFR_ASSERTD (rb == 0 || sb != 0);
144
145 /* Note: if 1 and 2 are in [emin,emax], no overflow nor underflow
146 is possible */
147 if (MPFR_UNLIKELY (exp_r > __gmpfr_emax))
148 return mpfr_overflow (r, rnd_mode, 1);
149
150 /* See comments in mpfr_div_1 */
151 if (MPFR_UNLIKELY (exp_r < __gmpfr_emin))
152 {
153 if (rnd_mode == MPFR_RNDN)
154 {
155 /* If (1-2^(-p-1))*2^(emin-1) <= sqrt(u) < 2^(emin-1),
156 then sqrt(u) would be rounded to 2^(emin-1) with unbounded
157 exponent range, and there would be no underflow.
158 But this case cannot happen if u has precision p.
159 Indeed, we would have:
160 (1-2^(-p-1))^2*2^(2*emin-2) <= u < 2^(2*emin-2), i.e.,
161 (1-2^(-p)+2^(-2p-2))*2^(2*emin-2) <= u < 2^(2*emin-2),
162 and there is no p-bit number in that interval. */
163 /* If the result is <= 0.5^2^(emin-1), we should round to 0. */
164 if (exp_r < __gmpfr_emin - 1 ||
165 (rp[0] == MPFR_LIMB_HIGHBIT && sb == 0))
166 rnd_mode = MPFR_RNDZ;
167 }
168 else if (MPFR_IS_LIKE_RNDA(rnd_mode, 0))
169 {
170 if (exp_r == __gmpfr_emin - 1 &&
171 rp[0] == ~mask &&
172 (rb | sb) != 0)
173 goto rounding; /* no underflow */
174 }
175 return mpfr_underflow (r, rnd_mode, 1);
176 }
177
178 rounding:
179 MPFR_EXP (r) = exp_r;
180 if (sb == 0 /* implies rb = 0 */ || rnd_mode == MPFR_RNDF)
181 {
182 MPFR_ASSERTD (rb == 0 || rnd_mode == MPFR_RNDF);
183 MPFR_ASSERTD(exp_r >= __gmpfr_emin);
184 MPFR_ASSERTD(exp_r <= __gmpfr_emax);
185 MPFR_RET (0);
186 }
187 else if (rnd_mode == MPFR_RNDN)
188 {
189 /* since sb <> 0, only rb is needed to decide how to round, and the exact
190 middle is not possible */
191 if (rb == 0)
192 goto truncate;
193 else
194 goto add_one_ulp;
195 }
196 else if (MPFR_IS_LIKE_RNDZ(rnd_mode, 0))
197 {
198 truncate:
199 MPFR_ASSERTD(exp_r >= __gmpfr_emin);
200 MPFR_ASSERTD(exp_r <= __gmpfr_emax);
201 MPFR_RET(-1);
202 }
203 else /* round away from zero */
204 {
205 add_one_ulp:
206 rp[0] += MPFR_LIMB_ONE << sh;
207 if (rp[0] == 0)
208 {
209 rp[0] = MPFR_LIMB_HIGHBIT;
210 if (MPFR_UNLIKELY(exp_r + 1 > __gmpfr_emax))
211 return mpfr_overflow (r, rnd_mode, 1);
212 MPFR_ASSERTD(exp_r + 1 <= __gmpfr_emax);
213 MPFR_ASSERTD(exp_r + 1 >= __gmpfr_emin);
214 MPFR_SET_EXP (r, exp_r + 1);
215 }
216 MPFR_RET(1);
217 }
218 }
219
220 /* Special code for prec(r) = prec(u) = GMP_NUMB_BITS. */
221 static int
mpfr_sqrt1n(mpfr_ptr r,mpfr_srcptr u,mpfr_rnd_t rnd_mode)222 mpfr_sqrt1n (mpfr_ptr r, mpfr_srcptr u, mpfr_rnd_t rnd_mode)
223 {
224 mpfr_prec_t exp_u = MPFR_EXP(u), exp_r;
225 mp_limb_t u0, r0, rb, sb, low;
226 mpfr_limb_ptr rp = MPFR_MANT(r);
227
228 MPFR_STAT_STATIC_ASSERT (GMP_NUMB_BITS == 64);
229 MPFR_ASSERTD(MPFR_PREC(r) == GMP_NUMB_BITS);
230 MPFR_ASSERTD(MPFR_PREC(u) <= GMP_NUMB_BITS);
231
232 /* first make the exponent even */
233 u0 = MPFR_MANT(u)[0];
234 if (((unsigned int) exp_u & 1) != 0)
235 {
236 low = u0 << (GMP_NUMB_BITS - 1);
237 u0 >>= 1;
238 exp_u ++;
239 }
240 else
241 low = 0; /* low part of u0 */
242 MPFR_ASSERTD (((unsigned int) exp_u & 1) == 0);
243 exp_r = exp_u / 2;
244
245 /* then compute an approximation of the integer square root of
246 u0*2^GMP_NUMB_BITS */
247 __gmpfr_sqrt_limb_approx (r0, u0);
248
249 /* the exact square root is in [r0, r0 + 7] */
250
251 /* As shown in mpfr_sqrt1 above, r0 has its most significant bit set */
252 MPFR_ASSERTD(r0 >= MPFR_LIMB_HIGHBIT);
253
254 umul_ppmm (rb, sb, r0, r0);
255 sub_ddmmss (rb, sb, u0, low, rb, sb);
256 /* for the exact square root, we should have 0 <= rb:sb <= 2*r0 */
257 while (!(rb == 0 || (rb == 1 && sb <= 2 * r0)))
258 {
259 /* subtract 2*r0+1 from rb:sb: subtract r0 before incrementing r0,
260 then r0 after (which is r0+1) */
261 rb -= (sb < r0);
262 sb -= r0;
263 r0 ++;
264 rb -= (sb < r0);
265 sb -= r0;
266 }
267 /* now we have u0*2^64+low = r0^2 + rb*2^64+sb, with rb*2^64+sb <= 2*r0 */
268 MPFR_ASSERTD(rb == 0 || (rb == 1 && sb <= 2 * r0));
269
270 /* We can't have the middle case u0*2^64 = (r0 + 1/2)^2 since
271 (r0 + 1/2)^2 is not an integer.
272 We thus rb = 1 whenever u0*2^64 > (r0 + 1/2)^2, thus rb*2^64 + sb > r0
273 and the sticky bit is always 1, unless we had rb = sb = 0. */
274
275 rb = rb || (sb > r0);
276 sb = rb | sb;
277 rp[0] = r0;
278
279 /* rounding */
280
281 /* Note: if 1 and 2 are in [emin,emax], no overflow nor underflow
282 is possible */
283 if (MPFR_UNLIKELY (exp_r > __gmpfr_emax))
284 return mpfr_overflow (r, rnd_mode, 1);
285
286 /* See comments in mpfr_div_1 */
287 if (MPFR_UNLIKELY (exp_r < __gmpfr_emin))
288 {
289 if (rnd_mode == MPFR_RNDN)
290 {
291 /* the case rp[0] = 111...111 and rb = 1 cannot happen, since it
292 would imply u0 >= (2^64-1/2)^2/2^64 thus u0 >= 2^64 */
293 if (exp_r < __gmpfr_emin - 1 ||
294 (rp[0] == MPFR_LIMB_HIGHBIT && sb == 0))
295 rnd_mode = MPFR_RNDZ;
296 }
297 else if (MPFR_IS_LIKE_RNDA(rnd_mode, 0))
298 {
299 if (exp_r == __gmpfr_emin - 1 &&
300 rp[0] == MPFR_LIMB_MAX &&
301 (rb | sb) != 0)
302 goto rounding; /* no underflow */
303 }
304 return mpfr_underflow (r, rnd_mode, 1);
305 }
306
307 /* sb = 0 can only occur when the square root is exact, i.e., rb = 0 */
308
309 rounding:
310 MPFR_EXP (r) = exp_r;
311 if (sb == 0 /* implies rb = 0 */ || rnd_mode == MPFR_RNDF)
312 {
313 MPFR_ASSERTD(exp_r >= __gmpfr_emin);
314 MPFR_ASSERTD(exp_r <= __gmpfr_emax);
315 MPFR_RET (0);
316 }
317 else if (rnd_mode == MPFR_RNDN)
318 {
319 /* we can't have sb = 0, thus rb is enough */
320 if (rb == 0)
321 goto truncate;
322 else
323 goto add_one_ulp;
324 }
325 else if (MPFR_IS_LIKE_RNDZ(rnd_mode, 0))
326 {
327 truncate:
328 MPFR_ASSERTD(exp_r >= __gmpfr_emin);
329 MPFR_ASSERTD(exp_r <= __gmpfr_emax);
330 MPFR_RET(-1);
331 }
332 else /* round away from zero */
333 {
334 add_one_ulp:
335 rp[0] += MPFR_LIMB_ONE;
336 if (rp[0] == 0)
337 {
338 rp[0] = MPFR_LIMB_HIGHBIT;
339 if (MPFR_UNLIKELY(exp_r + 1 > __gmpfr_emax))
340 return mpfr_overflow (r, rnd_mode, 1);
341 MPFR_ASSERTD(exp_r + 1 <= __gmpfr_emax);
342 MPFR_ASSERTD(exp_r + 1 >= __gmpfr_emin);
343 MPFR_SET_EXP (r, exp_r + 1);
344 }
345 MPFR_RET(1);
346 }
347 }
348
349 /* Special code for GMP_NUMB_BITS < prec(r) = prec(u) < 2*GMP_NUMB_BITS.
350 Assumes GMP_NUMB_BITS=64. */
351 static int
mpfr_sqrt2(mpfr_ptr r,mpfr_srcptr u,mpfr_rnd_t rnd_mode)352 mpfr_sqrt2 (mpfr_ptr r, mpfr_srcptr u, mpfr_rnd_t rnd_mode)
353 {
354 mpfr_prec_t p = MPFR_GET_PREC(r);
355 mpfr_limb_ptr up = MPFR_MANT(u), rp = MPFR_MANT(r);
356 mp_limb_t np[4], rb, sb, mask;
357 mpfr_prec_t exp_u = MPFR_EXP(u), exp_r, sh = 2 * GMP_NUMB_BITS - p;
358
359 MPFR_STAT_STATIC_ASSERT (GMP_NUMB_BITS == 64);
360
361 if (((unsigned int) exp_u & 1) != 0)
362 {
363 np[3] = up[1] >> 1;
364 np[2] = (up[1] << (GMP_NUMB_BITS - 1)) | (up[0] >> 1);
365 np[1] = up[0] << (GMP_NUMB_BITS - 1);
366 exp_u ++;
367 }
368 else
369 {
370 np[3] = up[1];
371 np[2] = up[0];
372 np[1] = 0;
373 }
374 exp_r = exp_u / 2;
375
376 mask = MPFR_LIMB_MASK(sh);
377
378 mpfr_sqrt2_approx (rp, np + 2);
379 /* with n = np[3]*2^64+np[2], we have:
380 {rp, 2} - 4 <= floor(sqrt(2^128*n)) <= {rp, 2} + 26, thus we can round
381 correctly except when the number formed by the last sh-1 bits
382 of rp[0] is in the range [-26, 4]. */
383 if (MPFR_LIKELY(((rp[0] + 26) & (mask >> 1)) > 30))
384 sb = 1;
385 else
386 {
387 mp_limb_t tp[4], h, l;
388
389 np[0] = 0;
390 mpn_sqr (tp, rp, 2);
391 /* since we know s - 26 <= r <= s + 4 and 0 <= n^2 - s <= 2*s, we have
392 -8*s-16 <= n - r^2 <= 54*s - 676, thus it suffices to compute
393 n - r^2 modulo 2^192 */
394 mpn_sub_n (tp, np, tp, 3);
395 /* invariant: h:l = 2 * {rp, 2}, with upper bit implicit */
396 h = (rp[1] << 1) | (rp[0] >> (GMP_NUMB_BITS - 1));
397 l = rp[0] << 1;
398 while ((mp_limb_signed_t) tp[2] < 0) /* approximation was too large */
399 {
400 /* subtract 1 to {rp, 2}, thus 2 to h:l */
401 h -= (l <= MPFR_LIMB_ONE);
402 l -= 2;
403 /* add (1:h:l)+1 to {tp,3} */
404 tp[0] += l + 1;
405 tp[1] += h + (tp[0] < l);
406 /* necessarily rp[1] has its most significant bit set */
407 tp[2] += MPFR_LIMB_ONE + (tp[1] < h || (tp[1] == h && tp[0] < l));
408 }
409 /* now tp[2] >= 0 */
410 /* now we want {tp, 4} <= 2 * {rp, 2}, which implies tp[2] <= 1 */
411 while (tp[2] > 1 || (tp[2] == 1 && tp[1] > h) ||
412 (tp[2] == 1 && tp[1] == h && tp[0] > l))
413 {
414 /* subtract (1:h:l)+1 from {tp,3} */
415 tp[2] -= MPFR_LIMB_ONE + (tp[1] < h || (tp[1] == h && tp[0] <= l));
416 tp[1] -= h + (tp[0] <= l);
417 tp[0] -= l + 1;
418 /* add 2 to h:l */
419 l += 2;
420 h += (l <= MPFR_LIMB_ONE);
421 }
422 /* restore {rp, 2} from h:l */
423 rp[1] = MPFR_LIMB_HIGHBIT | (h >> 1);
424 rp[0] = (h << (GMP_NUMB_BITS - 1)) | (l >> 1);
425 sb = tp[2] | tp[0] | tp[1];
426 }
427
428 rb = rp[0] & (MPFR_LIMB_ONE << (sh - 1));
429 sb |= (rp[0] & mask) ^ rb;
430 rp[0] = rp[0] & ~mask;
431
432 /* rounding */
433 if (MPFR_UNLIKELY (exp_r > __gmpfr_emax))
434 return mpfr_overflow (r, rnd_mode, 1);
435
436 /* See comments in mpfr_div_1 */
437 if (MPFR_UNLIKELY (exp_r < __gmpfr_emin))
438 {
439 if (rnd_mode == MPFR_RNDN)
440 {
441 if (exp_r < __gmpfr_emin - 1 || (rp[1] == MPFR_LIMB_HIGHBIT &&
442 rp[0] == MPFR_LIMB_ZERO && sb == 0))
443 rnd_mode = MPFR_RNDZ;
444 }
445 else if (MPFR_IS_LIKE_RNDA(rnd_mode, 0))
446 {
447 if (exp_r == __gmpfr_emin - 1 && (rp[1] == MPFR_LIMB_MAX &&
448 rp[0] == ~mask) && (rb | sb))
449 goto rounding; /* no underflow */
450 }
451 return mpfr_underflow (r, rnd_mode, 1);
452 }
453
454 rounding:
455 MPFR_EXP (r) = exp_r;
456 if (sb == 0 /* implies rb = 0 */ || rnd_mode == MPFR_RNDF)
457 {
458 MPFR_ASSERTD(exp_r >= __gmpfr_emin);
459 MPFR_ASSERTD(exp_r <= __gmpfr_emax);
460 MPFR_RET (0);
461 }
462 else if (rnd_mode == MPFR_RNDN)
463 {
464 /* since sb <> 0 now, only rb is needed */
465 if (rb == 0)
466 goto truncate;
467 else
468 goto add_one_ulp;
469 }
470 else if (MPFR_IS_LIKE_RNDZ(rnd_mode, 0))
471 {
472 truncate:
473 MPFR_ASSERTD(exp_r >= __gmpfr_emin);
474 MPFR_ASSERTD(exp_r <= __gmpfr_emax);
475 MPFR_RET(-1);
476 }
477 else /* round away from zero */
478 {
479 add_one_ulp:
480 rp[0] += MPFR_LIMB_ONE << sh;
481 rp[1] += rp[0] == 0;
482 if (rp[1] == 0)
483 {
484 rp[1] = MPFR_LIMB_HIGHBIT;
485 if (MPFR_UNLIKELY(exp_r + 1 > __gmpfr_emax))
486 return mpfr_overflow (r, rnd_mode, 1);
487 MPFR_ASSERTD(exp_r + 1 <= __gmpfr_emax);
488 MPFR_ASSERTD(exp_r + 1 >= __gmpfr_emin);
489 MPFR_SET_EXP (r, exp_r + 1);
490 }
491 MPFR_RET(1);
492 }
493 }
494
495 #endif /* !defined(MPFR_GENERIC_ABI) && GMP_NUMB_BITS == 64 */
496
497 int
mpfr_sqrt(mpfr_ptr r,mpfr_srcptr u,mpfr_rnd_t rnd_mode)498 mpfr_sqrt (mpfr_ptr r, mpfr_srcptr u, mpfr_rnd_t rnd_mode)
499 {
500 mp_size_t rsize; /* number of limbs of r (plus 1 if exact limb multiple) */
501 mp_size_t rrsize;
502 mp_size_t usize; /* number of limbs of u */
503 mp_size_t tsize; /* number of limbs of the sqrtrem remainder */
504 mp_size_t k;
505 mp_size_t l;
506 mpfr_limb_ptr rp, rp0;
507 mpfr_limb_ptr up;
508 mpfr_limb_ptr sp;
509 mp_limb_t sticky0; /* truncated part of input */
510 mp_limb_t sticky1; /* truncated part of rp[0] */
511 mp_limb_t sticky;
512 int odd_exp;
513 int sh; /* number of extra bits in rp[0] */
514 int inexact; /* return ternary flag */
515 mpfr_exp_t expr;
516 mpfr_prec_t rq = MPFR_GET_PREC (r);
517 MPFR_TMP_DECL(marker);
518
519 MPFR_LOG_FUNC
520 (("x[%Pd]=%.*Rg rnd=%d", mpfr_get_prec (u), mpfr_log_prec, u, rnd_mode),
521 ("y[%Pd]=%.*Rg inexact=%d",
522 mpfr_get_prec (r), mpfr_log_prec, r, inexact));
523
524 if (MPFR_UNLIKELY(MPFR_IS_SINGULAR(u)))
525 {
526 if (MPFR_IS_NAN(u))
527 {
528 MPFR_SET_NAN(r);
529 MPFR_RET_NAN;
530 }
531 else if (MPFR_IS_ZERO(u))
532 {
533 /* 0+ or 0- */
534 MPFR_SET_SAME_SIGN(r, u);
535 MPFR_SET_ZERO(r);
536 MPFR_RET(0); /* zero is exact */
537 }
538 else
539 {
540 MPFR_ASSERTD(MPFR_IS_INF(u));
541 /* sqrt(-Inf) = NAN */
542 if (MPFR_IS_NEG(u))
543 {
544 MPFR_SET_NAN(r);
545 MPFR_RET_NAN;
546 }
547 MPFR_SET_POS(r);
548 MPFR_SET_INF(r);
549 MPFR_RET(0);
550 }
551 }
552 if (MPFR_UNLIKELY(MPFR_IS_NEG(u)))
553 {
554 MPFR_SET_NAN(r);
555 MPFR_RET_NAN;
556 }
557 MPFR_SET_POS(r);
558
559 #if !defined(MPFR_GENERIC_ABI) && GMP_NUMB_BITS == 64
560 {
561 mpfr_prec_t uq = MPFR_GET_PREC (u);
562
563 if (rq == uq)
564 {
565 if (rq < GMP_NUMB_BITS)
566 return mpfr_sqrt1 (r, u, rnd_mode);
567
568 if (GMP_NUMB_BITS < rq && rq < 2*GMP_NUMB_BITS)
569 return mpfr_sqrt2 (r, u, rnd_mode);
570
571 if (rq == GMP_NUMB_BITS)
572 return mpfr_sqrt1n (r, u, rnd_mode);
573 }
574 }
575 #endif
576
577 MPFR_TMP_MARK (marker);
578 MPFR_UNSIGNED_MINUS_MODULO (sh, rq);
579 if (sh == 0 && rnd_mode == MPFR_RNDN)
580 sh = GMP_NUMB_BITS; /* ugly case */
581 rsize = MPFR_LIMB_SIZE(r) + (sh == GMP_NUMB_BITS);
582 /* rsize is the number of limbs of r + 1 if exact limb multiple and rounding
583 to nearest, this is the number of wanted limbs for the square root */
584 rrsize = rsize + rsize;
585 usize = MPFR_LIMB_SIZE(u); /* number of limbs of u */
586 rp0 = MPFR_MANT(r);
587 rp = (sh < GMP_NUMB_BITS) ? rp0 : MPFR_TMP_LIMBS_ALLOC (rsize);
588 up = MPFR_MANT(u);
589 sticky0 = MPFR_LIMB_ZERO; /* truncated part of input */
590 sticky1 = MPFR_LIMB_ZERO; /* truncated part of rp[0] */
591 odd_exp = (unsigned int) MPFR_GET_EXP (u) & 1;
592 inexact = -1; /* return ternary flag */
593
594 sp = MPFR_TMP_LIMBS_ALLOC (rrsize);
595
596 /* copy the most significant limbs of u to {sp, rrsize} */
597 if (MPFR_LIKELY(usize <= rrsize)) /* in case r and u have the same precision,
598 we have indeed rrsize = 2 * usize */
599 {
600 k = rrsize - usize;
601 if (MPFR_LIKELY(k))
602 MPN_ZERO (sp, k);
603 if (odd_exp)
604 {
605 if (MPFR_LIKELY(k))
606 sp[k - 1] = mpn_rshift (sp + k, up, usize, 1);
607 else
608 sticky0 = mpn_rshift (sp, up, usize, 1);
609 }
610 else
611 MPN_COPY (sp + rrsize - usize, up, usize);
612 }
613 else /* usize > rrsize: truncate the input */
614 {
615 k = usize - rrsize;
616 if (odd_exp)
617 sticky0 = mpn_rshift (sp, up + k, rrsize, 1);
618 else
619 MPN_COPY (sp, up + k, rrsize);
620 l = k;
621 while (sticky0 == MPFR_LIMB_ZERO && l != 0)
622 sticky0 = up[--l];
623 }
624
625 /* sticky0 is non-zero iff the truncated part of the input is non-zero */
626
627 tsize = mpn_sqrtrem (rp, NULL, sp, rrsize);
628
629 /* a return value of zero in mpn_sqrtrem indicates a perfect square */
630 sticky = sticky0 || tsize != 0;
631
632 /* truncate low bits of rp[0] */
633 sticky1 = rp[0] & ((sh < GMP_NUMB_BITS) ? MPFR_LIMB_MASK(sh)
634 : MPFR_LIMB_MAX);
635 rp[0] -= sticky1;
636
637 sticky = sticky || sticky1;
638
639 expr = (MPFR_GET_EXP(u) + odd_exp) / 2; /* exact */
640
641 if (rnd_mode == MPFR_RNDZ || rnd_mode == MPFR_RNDD ||
642 sticky == MPFR_LIMB_ZERO)
643 {
644 inexact = (sticky == MPFR_LIMB_ZERO) ? 0 : -1;
645 goto truncate;
646 }
647 else if (rnd_mode == MPFR_RNDN)
648 {
649 /* if sh < GMP_NUMB_BITS, the round bit is bit (sh-1) of sticky1
650 and the sticky bit is formed by the low sh-1 bits from
651 sticky1, together with the sqrtrem remainder and sticky0. */
652 if (sh < GMP_NUMB_BITS)
653 {
654 if (sticky1 & (MPFR_LIMB_ONE << (sh - 1)))
655 { /* round bit is set */
656 if (sticky1 == (MPFR_LIMB_ONE << (sh - 1)) && tsize == 0
657 && sticky0 == 0)
658 goto even_rule;
659 else
660 goto add_one_ulp;
661 }
662 else /* round bit is zero */
663 goto truncate; /* with the default inexact=-1 */
664 }
665 else /* sh = GMP_NUMB_BITS: the round bit is the most significant bit
666 of rp[0], and the remaining GMP_NUMB_BITS-1 bits contribute to
667 the sticky bit */
668 {
669 if (sticky1 & MPFR_LIMB_HIGHBIT)
670 { /* round bit is set */
671 if (sticky1 == MPFR_LIMB_HIGHBIT && tsize == 0 && sticky0 == 0)
672 goto even_rule;
673 else
674 goto add_one_ulp;
675 }
676 else /* round bit is zero */
677 goto truncate; /* with the default inexact=-1 */
678 }
679 }
680 else /* rnd_mode=GMP_RDNU, necessarily sticky <> 0, thus add 1 ulp */
681 goto add_one_ulp;
682
683 even_rule: /* has to set inexact */
684 if (sh < GMP_NUMB_BITS)
685 inexact = (rp[0] & (MPFR_LIMB_ONE << sh)) ? 1 : -1;
686 else
687 inexact = (rp[1] & MPFR_LIMB_ONE) ? 1 : -1;
688 if (inexact == -1)
689 goto truncate;
690 /* else go through add_one_ulp */
691
692 add_one_ulp:
693 inexact = 1; /* always here */
694 if (sh == GMP_NUMB_BITS)
695 {
696 rp ++;
697 rsize --;
698 sh = 0;
699 }
700 /* now rsize = MPFR_LIMB_SIZE(r) */
701 if (mpn_add_1 (rp0, rp, rsize, MPFR_LIMB_ONE << sh))
702 {
703 expr ++;
704 rp0[rsize - 1] = MPFR_LIMB_HIGHBIT;
705 }
706 goto end;
707
708 truncate: /* inexact = 0 or -1 */
709 if (sh == GMP_NUMB_BITS)
710 MPN_COPY (rp0, rp + 1, rsize - 1);
711
712 end:
713 /* Do not use MPFR_SET_EXP because the range has not been checked yet. */
714 MPFR_ASSERTN (expr >= MPFR_EMIN_MIN && expr <= MPFR_EMAX_MAX);
715 MPFR_EXP (r) = expr;
716 MPFR_TMP_FREE(marker);
717
718 return mpfr_check_range (r, inexact, rnd_mode);
719 }
720