xref: /netbsd-src/external/lgpl3/mpfr/dist/src/sqrt.c (revision ec6772edaf0cdcb5f52a48f4aca5e33a8fb8ecfd)
1 /* mpfr_sqrt -- square root of a floating-point number
2 
3 Copyright 1999-2023 Free Software Foundation, Inc.
4 Contributed by the AriC and Caramba projects, INRIA.
5 
6 This file is part of the GNU MPFR Library.
7 
8 The GNU MPFR Library is free software; you can redistribute it and/or modify
9 it under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 3 of the License, or (at your
11 option) any later version.
12 
13 The GNU MPFR Library is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
16 License for more details.
17 
18 You should have received a copy of the GNU Lesser General Public License
19 along with the GNU MPFR Library; see the file COPYING.LESSER.  If not, see
20 https://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc.,
21 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */
22 
23 #define MPFR_NEED_LONGLONG_H
24 #include "mpfr-impl.h"
25 
26 #if !defined(MPFR_GENERIC_ABI) && GMP_NUMB_BITS == 64
27 
28 #include "invsqrt_limb.h"
29 
30 /* Put in rp[1]*2^64+rp[0] an approximation of floor(sqrt(2^128*n)),
31    with 2^126 <= n := np[1]*2^64 + np[0] < 2^128. We have:
32    {rp, 2} - 4 <= floor(sqrt(2^128*n)) <= {rp, 2} + 26. */
33 static void
mpfr_sqrt2_approx(mpfr_limb_ptr rp,mpfr_limb_srcptr np)34 mpfr_sqrt2_approx (mpfr_limb_ptr rp, mpfr_limb_srcptr np)
35 {
36   mp_limb_t x, r1, r0, h, l;
37 
38   __gmpfr_sqrt_limb (r1, h, l, x, np[1]);
39 
40   /* now r1 = floor(sqrt(2^64*n1)) and h:l = 2^64*n1 - r1^2 with h:l <= 2*r1,
41      thus h <= 1, and x is an approximation of 2^96/sqrt(np[1])-2^64 */
42 
43   l += np[0];
44   h += (l < np[0]);
45 
46   /* now 2^64*n1 + n0 - r1^2 = 2^64*h + l with h <= 2 */
47 
48   /* divide by 2 */
49   l = (h << 63) | (l >> 1);
50   h = h >> 1;
51 
52   /* now h <= 1 */
53 
54   /* now add (2^64+x) * (h*2^64+l) / 2^64 to [r1*2^64, 0] */
55 
56   umul_hi (r0, x, l); /* x * l */
57   r0 += l;
58   r1 += h + (r0 < l); /* now we have added 2^64 * (h*2^64+l) */
59   if (h)
60     {
61       r0 += x;
62       r1 += (r0 < x); /* add x */
63     }
64 
65   MPFR_ASSERTD(r1 & MPFR_LIMB_HIGHBIT);
66 
67   rp[0] = r0;
68   rp[1] = r1;
69 }
70 
71 /* Special code for prec(r) = prec(u) < GMP_NUMB_BITS. We cannot have
72    prec(u) = GMP_NUMB_BITS here, since when the exponent of u is odd,
73    we need to shift u by one bit to the right without losing any bit.
74    Assumes GMP_NUMB_BITS = 64. */
75 static int
mpfr_sqrt1(mpfr_ptr r,mpfr_srcptr u,mpfr_rnd_t rnd_mode)76 mpfr_sqrt1 (mpfr_ptr r, mpfr_srcptr u, mpfr_rnd_t rnd_mode)
77 {
78   mpfr_prec_t p = MPFR_GET_PREC(r);
79   mpfr_prec_t exp_u = MPFR_EXP(u), exp_r, sh = GMP_NUMB_BITS - p;
80   mp_limb_t u0, r0, rb, sb, mask = MPFR_LIMB_MASK(sh);
81   mpfr_limb_ptr rp = MPFR_MANT(r);
82 
83   MPFR_STAT_STATIC_ASSERT (GMP_NUMB_BITS == 64);
84 
85   /* first make the exponent even */
86   u0 = MPFR_MANT(u)[0];
87   if (((unsigned int) exp_u & 1) != 0)
88     {
89       u0 >>= 1;
90       exp_u ++;
91     }
92   MPFR_ASSERTD (((unsigned int) exp_u & 1) == 0);
93   exp_r = exp_u / 2;
94 
95   /* then compute an approximation of the integer square root of
96      u0*2^GMP_NUMB_BITS */
97   __gmpfr_sqrt_limb_approx (r0, u0);
98 
99   sb = 1; /* when we can round correctly with the approximation, the sticky bit
100              is non-zero */
101 
102   /* the exact square root is in [r0, r0 + 7] */
103   if (MPFR_UNLIKELY(((r0 + 7) & (mask >> 1)) <= 7))
104     {
105       /* We should ensure r0 has its most significant bit set.
106          Since r0 <= sqrt(2^64*u0) <= r0 + 7, as soon as sqrt(2^64*u0)>=2^63+7,
107          which happens for u0>=2^62+8, then r0 >= 2^63.
108          It thus remains to check that for 2^62 <= u0 <= 2^62+7,
109          __gmpfr_sqrt_limb_approx (r0, u0) gives r0 >= 2^63, which is indeed
110          the case:
111          u0=4611686018427387904 r0=9223372036854775808
112          u0=4611686018427387905 r0=9223372036854775808
113          u0=4611686018427387906 r0=9223372036854775809
114          u0=4611686018427387907 r0=9223372036854775810
115          u0=4611686018427387908 r0=9223372036854775811
116          u0=4611686018427387909 r0=9223372036854775812
117          u0=4611686018427387910 r0=9223372036854775813
118          u0=4611686018427387911 r0=9223372036854775814 */
119       MPFR_ASSERTD(r0 >= MPFR_LIMB_HIGHBIT);
120       umul_ppmm (rb, sb, r0, r0);
121       sub_ddmmss (rb, sb, u0, 0, rb, sb);
122       /* for the exact square root, we should have 0 <= rb:sb <= 2*r0 */
123       while (!(rb == 0 || (rb == 1 && sb <= 2 * r0)))
124         {
125           /* subtract 2*r0+1 from rb:sb: subtract r0 before incrementing r0,
126              then r0 after (which is r0+1) */
127           rb -= (sb < r0);
128           sb -= r0;
129           r0 ++;
130           rb -= (sb < r0);
131           sb -= r0;
132         }
133       /* now we should have rb*2^64 + sb <= 2*r0 */
134       MPFR_ASSERTD(rb == 0 || (rb == 1 && sb <= 2 * r0));
135       sb = rb | sb;
136     }
137 
138   rb = r0 & (MPFR_LIMB_ONE << (sh - 1));
139   sb |= (r0 & mask) ^ rb;
140   rp[0] = r0 & ~mask;
141 
142   /* rounding: sb = 0 implies rb = 0, since (rb,sb)=(1,0) is not possible */
143   MPFR_ASSERTD (rb == 0 || sb != 0);
144 
145   /* Note: if 1 and 2 are in [emin,emax], no overflow nor underflow
146      is possible */
147   if (MPFR_UNLIKELY (exp_r > __gmpfr_emax))
148     return mpfr_overflow (r, rnd_mode, 1);
149 
150   /* See comments in mpfr_div_1 */
151   if (MPFR_UNLIKELY (exp_r < __gmpfr_emin))
152     {
153       if (rnd_mode == MPFR_RNDN)
154         {
155           /* If (1-2^(-p-1))*2^(emin-1) <= sqrt(u) < 2^(emin-1),
156              then sqrt(u) would be rounded to 2^(emin-1) with unbounded
157              exponent range, and there would be no underflow.
158              But this case cannot happen if u has precision p.
159              Indeed, we would have:
160              (1-2^(-p-1))^2*2^(2*emin-2) <= u < 2^(2*emin-2), i.e.,
161              (1-2^(-p)+2^(-2p-2))*2^(2*emin-2) <= u < 2^(2*emin-2),
162              and there is no p-bit number in that interval. */
163           /* If the result is <= 0.5^2^(emin-1), we should round to 0. */
164           if (exp_r < __gmpfr_emin - 1 ||
165               (rp[0] == MPFR_LIMB_HIGHBIT && sb == 0))
166             rnd_mode = MPFR_RNDZ;
167         }
168       else if (MPFR_IS_LIKE_RNDA(rnd_mode, 0))
169         {
170           if (exp_r == __gmpfr_emin - 1 &&
171               rp[0] == ~mask &&
172               (rb | sb) != 0)
173             goto rounding; /* no underflow */
174         }
175       return mpfr_underflow (r, rnd_mode, 1);
176     }
177 
178  rounding:
179   MPFR_EXP (r) = exp_r;
180   if (sb == 0 /* implies rb = 0 */ || rnd_mode == MPFR_RNDF)
181     {
182       MPFR_ASSERTD (rb == 0 || rnd_mode == MPFR_RNDF);
183       MPFR_ASSERTD(exp_r >= __gmpfr_emin);
184       MPFR_ASSERTD(exp_r <= __gmpfr_emax);
185       MPFR_RET (0);
186     }
187   else if (rnd_mode == MPFR_RNDN)
188     {
189       /* since sb <> 0, only rb is needed to decide how to round, and the exact
190          middle is not possible */
191       if (rb == 0)
192         goto truncate;
193       else
194         goto add_one_ulp;
195     }
196   else if (MPFR_IS_LIKE_RNDZ(rnd_mode, 0))
197     {
198     truncate:
199       MPFR_ASSERTD(exp_r >= __gmpfr_emin);
200       MPFR_ASSERTD(exp_r <= __gmpfr_emax);
201       MPFR_RET(-1);
202     }
203   else /* round away from zero */
204     {
205     add_one_ulp:
206       rp[0] += MPFR_LIMB_ONE << sh;
207       if (rp[0] == 0)
208         {
209           rp[0] = MPFR_LIMB_HIGHBIT;
210           if (MPFR_UNLIKELY(exp_r + 1 > __gmpfr_emax))
211             return mpfr_overflow (r, rnd_mode, 1);
212           MPFR_ASSERTD(exp_r + 1 <= __gmpfr_emax);
213           MPFR_ASSERTD(exp_r + 1 >= __gmpfr_emin);
214           MPFR_SET_EXP (r, exp_r + 1);
215         }
216       MPFR_RET(1);
217     }
218 }
219 
220 /* Special code for prec(r) = prec(u) = GMP_NUMB_BITS. */
221 static int
mpfr_sqrt1n(mpfr_ptr r,mpfr_srcptr u,mpfr_rnd_t rnd_mode)222 mpfr_sqrt1n (mpfr_ptr r, mpfr_srcptr u, mpfr_rnd_t rnd_mode)
223 {
224   mpfr_prec_t exp_u = MPFR_EXP(u), exp_r;
225   mp_limb_t u0, r0, rb, sb, low;
226   mpfr_limb_ptr rp = MPFR_MANT(r);
227 
228   MPFR_STAT_STATIC_ASSERT (GMP_NUMB_BITS == 64);
229   MPFR_ASSERTD(MPFR_PREC(r) == GMP_NUMB_BITS);
230   MPFR_ASSERTD(MPFR_PREC(u) <= GMP_NUMB_BITS);
231 
232   /* first make the exponent even */
233   u0 = MPFR_MANT(u)[0];
234   if (((unsigned int) exp_u & 1) != 0)
235     {
236       low = u0 << (GMP_NUMB_BITS - 1);
237       u0 >>= 1;
238       exp_u ++;
239     }
240   else
241     low = 0; /* low part of u0 */
242   MPFR_ASSERTD (((unsigned int) exp_u & 1) == 0);
243   exp_r = exp_u / 2;
244 
245   /* then compute an approximation of the integer square root of
246      u0*2^GMP_NUMB_BITS */
247   __gmpfr_sqrt_limb_approx (r0, u0);
248 
249   /* the exact square root is in [r0, r0 + 7] */
250 
251   /* As shown in mpfr_sqrt1 above, r0 has its most significant bit set */
252   MPFR_ASSERTD(r0 >= MPFR_LIMB_HIGHBIT);
253 
254   umul_ppmm (rb, sb, r0, r0);
255   sub_ddmmss (rb, sb, u0, low, rb, sb);
256   /* for the exact square root, we should have 0 <= rb:sb <= 2*r0 */
257   while (!(rb == 0 || (rb == 1 && sb <= 2 * r0)))
258     {
259       /* subtract 2*r0+1 from rb:sb: subtract r0 before incrementing r0,
260          then r0 after (which is r0+1) */
261       rb -= (sb < r0);
262       sb -= r0;
263       r0 ++;
264       rb -= (sb < r0);
265       sb -= r0;
266     }
267   /* now we have u0*2^64+low = r0^2 + rb*2^64+sb, with rb*2^64+sb <= 2*r0 */
268   MPFR_ASSERTD(rb == 0 || (rb == 1 && sb <= 2 * r0));
269 
270   /* We can't have the middle case u0*2^64 = (r0 + 1/2)^2 since
271      (r0 + 1/2)^2 is not an integer.
272      We thus rb = 1 whenever u0*2^64 > (r0 + 1/2)^2, thus rb*2^64 + sb > r0
273      and the sticky bit is always 1, unless we had rb = sb = 0. */
274 
275   rb = rb || (sb > r0);
276   sb = rb | sb;
277   rp[0] = r0;
278 
279   /* rounding */
280 
281   /* Note: if 1 and 2 are in [emin,emax], no overflow nor underflow
282      is possible */
283   if (MPFR_UNLIKELY (exp_r > __gmpfr_emax))
284     return mpfr_overflow (r, rnd_mode, 1);
285 
286   /* See comments in mpfr_div_1 */
287   if (MPFR_UNLIKELY (exp_r < __gmpfr_emin))
288     {
289       if (rnd_mode == MPFR_RNDN)
290         {
291           /* the case rp[0] = 111...111 and rb = 1 cannot happen, since it
292              would imply u0 >= (2^64-1/2)^2/2^64 thus u0 >= 2^64 */
293           if (exp_r < __gmpfr_emin - 1 ||
294               (rp[0] == MPFR_LIMB_HIGHBIT && sb == 0))
295             rnd_mode = MPFR_RNDZ;
296         }
297       else if (MPFR_IS_LIKE_RNDA(rnd_mode, 0))
298         {
299           if (exp_r == __gmpfr_emin - 1 &&
300               rp[0] == MPFR_LIMB_MAX &&
301               (rb | sb) != 0)
302             goto rounding; /* no underflow */
303         }
304       return mpfr_underflow (r, rnd_mode, 1);
305     }
306 
307   /* sb = 0 can only occur when the square root is exact, i.e., rb = 0 */
308 
309  rounding:
310   MPFR_EXP (r) = exp_r;
311   if (sb == 0 /* implies rb = 0 */ || rnd_mode == MPFR_RNDF)
312     {
313       MPFR_ASSERTD(exp_r >= __gmpfr_emin);
314       MPFR_ASSERTD(exp_r <= __gmpfr_emax);
315       MPFR_RET (0);
316     }
317   else if (rnd_mode == MPFR_RNDN)
318     {
319       /* we can't have sb = 0, thus rb is enough */
320       if (rb == 0)
321         goto truncate;
322       else
323         goto add_one_ulp;
324     }
325   else if (MPFR_IS_LIKE_RNDZ(rnd_mode, 0))
326     {
327     truncate:
328       MPFR_ASSERTD(exp_r >= __gmpfr_emin);
329       MPFR_ASSERTD(exp_r <= __gmpfr_emax);
330       MPFR_RET(-1);
331     }
332   else /* round away from zero */
333     {
334     add_one_ulp:
335       rp[0] += MPFR_LIMB_ONE;
336       if (rp[0] == 0)
337         {
338           rp[0] = MPFR_LIMB_HIGHBIT;
339           if (MPFR_UNLIKELY(exp_r + 1 > __gmpfr_emax))
340             return mpfr_overflow (r, rnd_mode, 1);
341           MPFR_ASSERTD(exp_r + 1 <= __gmpfr_emax);
342           MPFR_ASSERTD(exp_r + 1 >= __gmpfr_emin);
343           MPFR_SET_EXP (r, exp_r + 1);
344         }
345       MPFR_RET(1);
346     }
347 }
348 
349 /* Special code for GMP_NUMB_BITS < prec(r) = prec(u) < 2*GMP_NUMB_BITS.
350    Assumes GMP_NUMB_BITS=64. */
351 static int
mpfr_sqrt2(mpfr_ptr r,mpfr_srcptr u,mpfr_rnd_t rnd_mode)352 mpfr_sqrt2 (mpfr_ptr r, mpfr_srcptr u, mpfr_rnd_t rnd_mode)
353 {
354   mpfr_prec_t p = MPFR_GET_PREC(r);
355   mpfr_limb_ptr up = MPFR_MANT(u), rp = MPFR_MANT(r);
356   mp_limb_t np[4], rb, sb, mask;
357   mpfr_prec_t exp_u = MPFR_EXP(u), exp_r, sh = 2 * GMP_NUMB_BITS - p;
358 
359   MPFR_STAT_STATIC_ASSERT (GMP_NUMB_BITS == 64);
360 
361   if (((unsigned int) exp_u & 1) != 0)
362     {
363       np[3] = up[1] >> 1;
364       np[2] = (up[1] << (GMP_NUMB_BITS - 1)) | (up[0] >> 1);
365       np[1] = up[0] << (GMP_NUMB_BITS - 1);
366       exp_u ++;
367     }
368   else
369     {
370       np[3] = up[1];
371       np[2] = up[0];
372       np[1] = 0;
373     }
374   exp_r = exp_u / 2;
375 
376   mask = MPFR_LIMB_MASK(sh);
377 
378   mpfr_sqrt2_approx (rp, np + 2);
379   /* with n = np[3]*2^64+np[2], we have:
380      {rp, 2} - 4 <= floor(sqrt(2^128*n)) <= {rp, 2} + 26, thus we can round
381      correctly except when the number formed by the last sh-1 bits
382      of rp[0] is in the range [-26, 4]. */
383   if (MPFR_LIKELY(((rp[0] + 26) & (mask >> 1)) > 30))
384     sb = 1;
385   else
386     {
387       mp_limb_t tp[4], h, l;
388 
389       np[0] = 0;
390       mpn_sqr (tp, rp, 2);
391       /* since we know s - 26 <= r <= s + 4 and 0 <= n^2 - s <= 2*s, we have
392          -8*s-16 <= n - r^2 <= 54*s - 676, thus it suffices to compute
393          n - r^2 modulo 2^192 */
394       mpn_sub_n (tp, np, tp, 3);
395       /* invariant: h:l = 2 * {rp, 2}, with upper bit implicit */
396       h = (rp[1] << 1) | (rp[0] >> (GMP_NUMB_BITS - 1));
397       l = rp[0] << 1;
398       while ((mp_limb_signed_t) tp[2] < 0) /* approximation was too large */
399         {
400           /* subtract 1 to {rp, 2}, thus 2 to h:l */
401           h -= (l <= MPFR_LIMB_ONE);
402           l -= 2;
403           /* add (1:h:l)+1 to {tp,3} */
404           tp[0] += l + 1;
405           tp[1] += h + (tp[0] < l);
406           /* necessarily rp[1] has its most significant bit set */
407           tp[2] += MPFR_LIMB_ONE + (tp[1] < h || (tp[1] == h && tp[0] < l));
408         }
409       /* now tp[2] >= 0 */
410       /* now we want {tp, 4} <= 2 * {rp, 2}, which implies tp[2] <= 1 */
411       while (tp[2] > 1 || (tp[2] == 1 && tp[1] > h) ||
412              (tp[2] == 1 && tp[1] == h && tp[0] > l))
413         {
414           /* subtract (1:h:l)+1 from {tp,3} */
415           tp[2] -= MPFR_LIMB_ONE + (tp[1] < h || (tp[1] == h && tp[0] <= l));
416           tp[1] -= h + (tp[0] <= l);
417           tp[0] -= l + 1;
418           /* add 2 to  h:l */
419           l += 2;
420           h += (l <= MPFR_LIMB_ONE);
421         }
422       /* restore {rp, 2} from h:l */
423       rp[1] = MPFR_LIMB_HIGHBIT | (h >> 1);
424       rp[0] = (h << (GMP_NUMB_BITS - 1)) | (l >> 1);
425       sb = tp[2] | tp[0] | tp[1];
426     }
427 
428   rb = rp[0] & (MPFR_LIMB_ONE << (sh - 1));
429   sb |= (rp[0] & mask) ^ rb;
430   rp[0] = rp[0] & ~mask;
431 
432   /* rounding */
433   if (MPFR_UNLIKELY (exp_r > __gmpfr_emax))
434     return mpfr_overflow (r, rnd_mode, 1);
435 
436   /* See comments in mpfr_div_1 */
437   if (MPFR_UNLIKELY (exp_r < __gmpfr_emin))
438     {
439       if (rnd_mode == MPFR_RNDN)
440         {
441           if (exp_r < __gmpfr_emin - 1 || (rp[1] == MPFR_LIMB_HIGHBIT &&
442                                            rp[0] == MPFR_LIMB_ZERO && sb == 0))
443             rnd_mode = MPFR_RNDZ;
444         }
445       else if (MPFR_IS_LIKE_RNDA(rnd_mode, 0))
446         {
447           if (exp_r == __gmpfr_emin - 1 && (rp[1] == MPFR_LIMB_MAX &&
448                                             rp[0] == ~mask) && (rb | sb))
449             goto rounding; /* no underflow */
450         }
451       return mpfr_underflow (r, rnd_mode, 1);
452     }
453 
454  rounding:
455   MPFR_EXP (r) = exp_r;
456   if (sb == 0 /* implies rb = 0 */ || rnd_mode == MPFR_RNDF)
457     {
458       MPFR_ASSERTD(exp_r >= __gmpfr_emin);
459       MPFR_ASSERTD(exp_r <= __gmpfr_emax);
460       MPFR_RET (0);
461     }
462   else if (rnd_mode == MPFR_RNDN)
463     {
464       /* since sb <> 0 now, only rb is needed */
465       if (rb == 0)
466         goto truncate;
467       else
468         goto add_one_ulp;
469     }
470   else if (MPFR_IS_LIKE_RNDZ(rnd_mode, 0))
471     {
472     truncate:
473       MPFR_ASSERTD(exp_r >= __gmpfr_emin);
474       MPFR_ASSERTD(exp_r <= __gmpfr_emax);
475       MPFR_RET(-1);
476     }
477   else /* round away from zero */
478     {
479     add_one_ulp:
480       rp[0] += MPFR_LIMB_ONE << sh;
481       rp[1] += rp[0] == 0;
482       if (rp[1] == 0)
483         {
484           rp[1] = MPFR_LIMB_HIGHBIT;
485           if (MPFR_UNLIKELY(exp_r + 1 > __gmpfr_emax))
486             return mpfr_overflow (r, rnd_mode, 1);
487           MPFR_ASSERTD(exp_r + 1 <= __gmpfr_emax);
488           MPFR_ASSERTD(exp_r + 1 >= __gmpfr_emin);
489           MPFR_SET_EXP (r, exp_r + 1);
490         }
491       MPFR_RET(1);
492     }
493 }
494 
495 #endif /* !defined(MPFR_GENERIC_ABI) && GMP_NUMB_BITS == 64 */
496 
497 int
mpfr_sqrt(mpfr_ptr r,mpfr_srcptr u,mpfr_rnd_t rnd_mode)498 mpfr_sqrt (mpfr_ptr r, mpfr_srcptr u, mpfr_rnd_t rnd_mode)
499 {
500   mp_size_t rsize; /* number of limbs of r (plus 1 if exact limb multiple) */
501   mp_size_t rrsize;
502   mp_size_t usize; /* number of limbs of u */
503   mp_size_t tsize; /* number of limbs of the sqrtrem remainder */
504   mp_size_t k;
505   mp_size_t l;
506   mpfr_limb_ptr rp, rp0;
507   mpfr_limb_ptr up;
508   mpfr_limb_ptr sp;
509   mp_limb_t sticky0; /* truncated part of input */
510   mp_limb_t sticky1; /* truncated part of rp[0] */
511   mp_limb_t sticky;
512   int odd_exp;
513   int sh; /* number of extra bits in rp[0] */
514   int inexact; /* return ternary flag */
515   mpfr_exp_t expr;
516   mpfr_prec_t rq = MPFR_GET_PREC (r);
517   MPFR_TMP_DECL(marker);
518 
519   MPFR_LOG_FUNC
520     (("x[%Pd]=%.*Rg rnd=%d", mpfr_get_prec (u), mpfr_log_prec, u, rnd_mode),
521      ("y[%Pd]=%.*Rg inexact=%d",
522       mpfr_get_prec (r), mpfr_log_prec, r, inexact));
523 
524   if (MPFR_UNLIKELY(MPFR_IS_SINGULAR(u)))
525     {
526       if (MPFR_IS_NAN(u))
527         {
528           MPFR_SET_NAN(r);
529           MPFR_RET_NAN;
530         }
531       else if (MPFR_IS_ZERO(u))
532         {
533           /* 0+ or 0- */
534           MPFR_SET_SAME_SIGN(r, u);
535           MPFR_SET_ZERO(r);
536           MPFR_RET(0); /* zero is exact */
537         }
538       else
539         {
540           MPFR_ASSERTD(MPFR_IS_INF(u));
541           /* sqrt(-Inf) = NAN */
542           if (MPFR_IS_NEG(u))
543             {
544               MPFR_SET_NAN(r);
545               MPFR_RET_NAN;
546             }
547           MPFR_SET_POS(r);
548           MPFR_SET_INF(r);
549           MPFR_RET(0);
550         }
551     }
552   if (MPFR_UNLIKELY(MPFR_IS_NEG(u)))
553     {
554       MPFR_SET_NAN(r);
555       MPFR_RET_NAN;
556     }
557   MPFR_SET_POS(r);
558 
559 #if !defined(MPFR_GENERIC_ABI) && GMP_NUMB_BITS == 64
560   {
561     mpfr_prec_t uq = MPFR_GET_PREC (u);
562 
563     if (rq == uq)
564       {
565         if (rq < GMP_NUMB_BITS)
566           return mpfr_sqrt1 (r, u, rnd_mode);
567 
568         if (GMP_NUMB_BITS < rq && rq < 2*GMP_NUMB_BITS)
569           return mpfr_sqrt2 (r, u, rnd_mode);
570 
571         if (rq == GMP_NUMB_BITS)
572           return mpfr_sqrt1n (r, u, rnd_mode);
573       }
574   }
575 #endif
576 
577   MPFR_TMP_MARK (marker);
578   MPFR_UNSIGNED_MINUS_MODULO (sh, rq);
579   if (sh == 0 && rnd_mode == MPFR_RNDN)
580     sh = GMP_NUMB_BITS; /* ugly case */
581   rsize = MPFR_LIMB_SIZE(r) + (sh == GMP_NUMB_BITS);
582   /* rsize is the number of limbs of r + 1 if exact limb multiple and rounding
583      to nearest, this is the number of wanted limbs for the square root */
584   rrsize = rsize + rsize;
585   usize = MPFR_LIMB_SIZE(u); /* number of limbs of u */
586   rp0 = MPFR_MANT(r);
587   rp = (sh < GMP_NUMB_BITS) ? rp0 : MPFR_TMP_LIMBS_ALLOC (rsize);
588   up = MPFR_MANT(u);
589   sticky0 = MPFR_LIMB_ZERO; /* truncated part of input */
590   sticky1 = MPFR_LIMB_ZERO; /* truncated part of rp[0] */
591   odd_exp = (unsigned int) MPFR_GET_EXP (u) & 1;
592   inexact = -1; /* return ternary flag */
593 
594   sp = MPFR_TMP_LIMBS_ALLOC (rrsize);
595 
596   /* copy the most significant limbs of u to {sp, rrsize} */
597   if (MPFR_LIKELY(usize <= rrsize)) /* in case r and u have the same precision,
598                                        we have indeed rrsize = 2 * usize */
599     {
600       k = rrsize - usize;
601       if (MPFR_LIKELY(k))
602         MPN_ZERO (sp, k);
603       if (odd_exp)
604         {
605           if (MPFR_LIKELY(k))
606             sp[k - 1] = mpn_rshift (sp + k, up, usize, 1);
607           else
608             sticky0 = mpn_rshift (sp, up, usize, 1);
609         }
610       else
611         MPN_COPY (sp + rrsize - usize, up, usize);
612     }
613   else /* usize > rrsize: truncate the input */
614     {
615       k = usize - rrsize;
616       if (odd_exp)
617         sticky0 = mpn_rshift (sp, up + k, rrsize, 1);
618       else
619         MPN_COPY (sp, up + k, rrsize);
620       l = k;
621       while (sticky0 == MPFR_LIMB_ZERO && l != 0)
622         sticky0 = up[--l];
623     }
624 
625   /* sticky0 is non-zero iff the truncated part of the input is non-zero */
626 
627   tsize = mpn_sqrtrem (rp, NULL, sp, rrsize);
628 
629   /* a return value of zero in mpn_sqrtrem indicates a perfect square */
630   sticky = sticky0 || tsize != 0;
631 
632   /* truncate low bits of rp[0] */
633   sticky1 = rp[0] & ((sh < GMP_NUMB_BITS) ? MPFR_LIMB_MASK(sh)
634                      : MPFR_LIMB_MAX);
635   rp[0] -= sticky1;
636 
637   sticky = sticky || sticky1;
638 
639   expr = (MPFR_GET_EXP(u) + odd_exp) / 2;  /* exact */
640 
641   if (rnd_mode == MPFR_RNDZ || rnd_mode == MPFR_RNDD ||
642       sticky == MPFR_LIMB_ZERO)
643     {
644       inexact = (sticky == MPFR_LIMB_ZERO) ? 0 : -1;
645       goto truncate;
646     }
647   else if (rnd_mode == MPFR_RNDN)
648     {
649       /* if sh < GMP_NUMB_BITS, the round bit is bit (sh-1) of sticky1
650                   and the sticky bit is formed by the low sh-1 bits from
651                   sticky1, together with the sqrtrem remainder and sticky0. */
652       if (sh < GMP_NUMB_BITS)
653         {
654           if (sticky1 & (MPFR_LIMB_ONE << (sh - 1)))
655             { /* round bit is set */
656               if (sticky1 == (MPFR_LIMB_ONE << (sh - 1)) && tsize == 0
657                   && sticky0 == 0)
658                 goto even_rule;
659               else
660                 goto add_one_ulp;
661             }
662           else /* round bit is zero */
663             goto truncate; /* with the default inexact=-1 */
664         }
665       else /* sh = GMP_NUMB_BITS: the round bit is the most significant bit
666               of rp[0], and the remaining GMP_NUMB_BITS-1 bits contribute to
667               the sticky bit */
668         {
669           if (sticky1 & MPFR_LIMB_HIGHBIT)
670             { /* round bit is set */
671               if (sticky1 == MPFR_LIMB_HIGHBIT && tsize == 0 && sticky0 == 0)
672                 goto even_rule;
673               else
674                 goto add_one_ulp;
675             }
676           else /* round bit is zero */
677             goto truncate; /* with the default inexact=-1 */
678         }
679     }
680   else /* rnd_mode=GMP_RDNU, necessarily sticky <> 0, thus add 1 ulp */
681     goto add_one_ulp;
682 
683  even_rule: /* has to set inexact */
684   if (sh < GMP_NUMB_BITS)
685     inexact = (rp[0] & (MPFR_LIMB_ONE << sh)) ? 1 : -1;
686   else
687     inexact = (rp[1] & MPFR_LIMB_ONE) ? 1 : -1;
688   if (inexact == -1)
689     goto truncate;
690   /* else go through add_one_ulp */
691 
692  add_one_ulp:
693   inexact = 1; /* always here */
694   if (sh == GMP_NUMB_BITS)
695     {
696       rp ++;
697       rsize --;
698       sh = 0;
699     }
700   /* now rsize = MPFR_LIMB_SIZE(r) */
701   if (mpn_add_1 (rp0, rp, rsize, MPFR_LIMB_ONE << sh))
702     {
703       expr ++;
704       rp0[rsize - 1] = MPFR_LIMB_HIGHBIT;
705     }
706   goto end;
707 
708  truncate: /* inexact = 0 or -1 */
709   if (sh == GMP_NUMB_BITS)
710     MPN_COPY (rp0, rp + 1, rsize - 1);
711 
712  end:
713   /* Do not use MPFR_SET_EXP because the range has not been checked yet. */
714   MPFR_ASSERTN (expr >= MPFR_EMIN_MIN && expr <= MPFR_EMAX_MAX);
715   MPFR_EXP (r) = expr;
716   MPFR_TMP_FREE(marker);
717 
718   return mpfr_check_range (r, inexact, rnd_mode);
719 }
720