1 /* mpfr_sqr -- Floating-point square
2
3 Copyright 2004-2023 Free Software Foundation, Inc.
4 Contributed by the AriC and Caramba projects, INRIA.
5
6 This file is part of the GNU MPFR Library.
7
8 The GNU MPFR Library is free software; you can redistribute it and/or modify
9 it under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 3 of the License, or (at your
11 option) any later version.
12
13 The GNU MPFR Library is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
16 License for more details.
17
18 You should have received a copy of the GNU Lesser General Public License
19 along with the GNU MPFR Library; see the file COPYING.LESSER. If not, see
20 https://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc.,
21 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */
22
23 #define MPFR_NEED_LONGLONG_H
24 #include "mpfr-impl.h"
25
26 #if !defined(MPFR_GENERIC_ABI) && (GMP_NUMB_BITS == 32 || GMP_NUMB_BITS == 64)
27
28 /* Special code for prec(a) < GMP_NUMB_BITS and prec(b) <= GMP_NUMB_BITS.
29 Note: this function was copied from mpfr_mul_1 in file mul.c, thus any
30 change here should be done also in mpfr_mul_1.
31 Although this function works as soon as prec(a) < GMP_NUMB_BITS and
32 prec(b) <= GMP_NUMB_BITS, we use it for prec(a)=prec(b) < GMP_NUMB_BITS. */
33 static int
mpfr_sqr_1(mpfr_ptr a,mpfr_srcptr b,mpfr_rnd_t rnd_mode,mpfr_prec_t p)34 mpfr_sqr_1 (mpfr_ptr a, mpfr_srcptr b, mpfr_rnd_t rnd_mode, mpfr_prec_t p)
35 {
36 mp_limb_t a0;
37 mpfr_limb_ptr ap = MPFR_MANT(a);
38 mp_limb_t b0 = MPFR_MANT(b)[0];
39 mpfr_exp_t ax;
40 mpfr_prec_t sh = GMP_NUMB_BITS - p;
41 mp_limb_t rb, sb, mask = MPFR_LIMB_MASK(sh);
42
43 /* When prec(b) <= GMP_NUMB_BITS / 2, we could replace umul_ppmm
44 by a limb multiplication as follows, but we assume umul_ppmm is as fast
45 as a limb multiplication on modern processors:
46 a0 = (b0 >> (GMP_NUMB_BITS / 2)) * (b0 >> (GMP_NUMB_BITS / 2));
47 sb = 0;
48 */
49 ax = MPFR_GET_EXP(b) * 2;
50 umul_ppmm (a0, sb, b0, b0);
51 if (a0 < MPFR_LIMB_HIGHBIT)
52 {
53 ax --;
54 a0 = (a0 << 1) | (sb >> (GMP_NUMB_BITS - 1));
55 sb <<= 1;
56 }
57 rb = a0 & (MPFR_LIMB_ONE << (sh - 1));
58 sb |= (a0 & mask) ^ rb;
59 ap[0] = a0 & ~mask;
60
61 MPFR_SIGN(a) = MPFR_SIGN_POS;
62
63 /* rounding */
64 if (MPFR_UNLIKELY(ax > __gmpfr_emax))
65 return mpfr_overflow (a, rnd_mode, MPFR_SIGN_POS);
66
67 /* Warning: underflow should be checked *after* rounding, thus when rounding
68 away and when a > 0.111...111*2^(emin-1), or when rounding to nearest and
69 a >= 0.111...111[1]*2^(emin-1), there is no underflow. */
70 if (MPFR_UNLIKELY(ax < __gmpfr_emin))
71 {
72 /* Note: for emin=2*k+1, a >= 0.111...111*2^(emin-1) is not possible,
73 i.e., a >= (1 - 2^(-p))*2^(2k), since we need a = b^2 with EXP(b)=k,
74 and the largest such b is (1 - 2^(-p))*2^k satisfies
75 b^2 < (1 - 2^(-p))*2^(2k).
76 For emin=2*k, it is only possible for some values of p: it is not
77 possible for p=53, because the largest significand is 6369051672525772
78 but its square has only 52 leading ones. For p=24 it is possible,
79 with b = 11863283, whose square has 24 leading ones. */
80 if (ax == __gmpfr_emin - 1 && ap[0] == ~mask &&
81 ((rnd_mode == MPFR_RNDN && rb) ||
82 (MPFR_IS_LIKE_RNDA (rnd_mode, 0) && (rb | sb))))
83 goto rounding; /* no underflow */
84 /* For RNDN, mpfr_underflow always rounds away, thus for |a| <= 2^(emin-2)
85 we have to change to RNDZ. This corresponds to:
86 (a) either ax < emin - 1
87 (b) or ax = emin - 1 and ap[0] = 1000....000 and rb = sb = 0 */
88 if (rnd_mode == MPFR_RNDN &&
89 (ax < __gmpfr_emin - 1 ||
90 (ap[0] == MPFR_LIMB_HIGHBIT && (rb | sb) == 0)))
91 rnd_mode = MPFR_RNDZ;
92 return mpfr_underflow (a, rnd_mode, MPFR_SIGN_POS);
93 }
94
95 rounding:
96 MPFR_EXP (a) = ax; /* Don't use MPFR_SET_EXP since ax might be < __gmpfr_emin
97 in the cases "goto rounding" above. */
98 if ((rb == 0 && sb == 0) || rnd_mode == MPFR_RNDF)
99 {
100 MPFR_ASSERTD(ax >= __gmpfr_emin);
101 MPFR_RET (0);
102 }
103 else if (rnd_mode == MPFR_RNDN)
104 {
105 if (rb == 0 || (sb == 0 && (ap[0] & (MPFR_LIMB_ONE << sh)) == 0))
106 goto truncate;
107 else
108 goto add_one_ulp;
109 }
110 else if (MPFR_IS_LIKE_RNDZ (rnd_mode, 0))
111 {
112 truncate:
113 MPFR_ASSERTD(ax >= __gmpfr_emin);
114 MPFR_RET(-MPFR_SIGN_POS);
115 }
116 else /* round away from zero */
117 {
118 add_one_ulp:
119 ap[0] += MPFR_LIMB_ONE << sh;
120 if (ap[0] == 0)
121 {
122 ap[0] = MPFR_LIMB_HIGHBIT;
123 if (MPFR_UNLIKELY(ax + 1 > __gmpfr_emax))
124 return mpfr_overflow (a, rnd_mode, MPFR_SIGN_POS);
125 MPFR_ASSERTD(ax + 1 <= __gmpfr_emax);
126 MPFR_ASSERTD(ax + 1 >= __gmpfr_emin);
127 MPFR_SET_EXP (a, ax + 1);
128 }
129 MPFR_RET(MPFR_SIGN_POS);
130 }
131 }
132
133 /* special code for PREC(a) = GMP_NUMB_BITS */
134 static int
mpfr_sqr_1n(mpfr_ptr a,mpfr_srcptr b,mpfr_rnd_t rnd_mode)135 mpfr_sqr_1n (mpfr_ptr a, mpfr_srcptr b, mpfr_rnd_t rnd_mode)
136 {
137 mp_limb_t a0;
138 mpfr_limb_ptr ap = MPFR_MANT(a);
139 mp_limb_t b0 = MPFR_MANT(b)[0];
140 mpfr_exp_t ax;
141 mp_limb_t rb, sb;
142
143 ax = MPFR_GET_EXP(b) * 2;
144 umul_ppmm (a0, sb, b0, b0);
145 if (a0 < MPFR_LIMB_HIGHBIT)
146 {
147 ax --;
148 a0 = (a0 << 1) | (sb >> (GMP_NUMB_BITS - 1));
149 sb <<= 1;
150 }
151 rb = sb & MPFR_LIMB_HIGHBIT;
152 sb = sb & ~MPFR_LIMB_HIGHBIT;
153 ap[0] = a0;
154
155 MPFR_SIGN(a) = MPFR_SIGN_POS;
156
157 /* rounding */
158 if (MPFR_UNLIKELY(ax > __gmpfr_emax))
159 return mpfr_overflow (a, rnd_mode, MPFR_SIGN_POS);
160
161 /* Warning: underflow should be checked *after* rounding, thus when rounding
162 away and when a > 0.111...111*2^(emin-1), or when rounding to nearest and
163 a >= 0.111...111[1]*2^(emin-1), there is no underflow. */
164 if (MPFR_UNLIKELY(ax < __gmpfr_emin))
165 {
166 /* As seen in mpfr_mul_1, we cannot have a0 = 111...111 here if there
167 was not an exponent decrease (ax--) above.
168 In the case of an exponent decrease:
169 - For GMP_NUMB_BITS=32, a0 = 111...111 is not possible since the
170 largest b0 such that b0^2 < 2^(2*32-1) is b0=3037000499, but
171 its square has only 30 leading ones.
172 - For GMP_NUMB_BITS=64, a0 = 111...111 is possible: the largest b0
173 is 13043817825332782212, and its square has 64 leading ones; but
174 since the next bit is rb=0, for RNDN, we always have an underflow.
175 For the test below, note that a is positive.
176 */
177 if (ax == __gmpfr_emin - 1 && ap[0] == MPFR_LIMB_MAX &&
178 MPFR_IS_LIKE_RNDA (rnd_mode, 0))
179 goto rounding; /* no underflow */
180 /* For RNDN, mpfr_underflow always rounds away, thus for |a| <= 2^(emin-2)
181 we have to change to RNDZ. This corresponds to:
182 (a) either ax < emin - 1
183 (b) or ax = emin - 1 and ap[0] = 1000....000 and rb = sb = 0 */
184 if (rnd_mode == MPFR_RNDN &&
185 (ax < __gmpfr_emin - 1 ||
186 (ap[0] == MPFR_LIMB_HIGHBIT && (rb | sb) == 0)))
187 rnd_mode = MPFR_RNDZ;
188 return mpfr_underflow (a, rnd_mode, MPFR_SIGN_POS);
189 }
190
191 rounding:
192 MPFR_EXP (a) = ax; /* Don't use MPFR_SET_EXP since ax might be < __gmpfr_emin
193 in the cases "goto rounding" above. */
194 if ((rb == 0 && sb == 0) || rnd_mode == MPFR_RNDF)
195 {
196 MPFR_ASSERTD(ax >= __gmpfr_emin);
197 MPFR_RET (0);
198 }
199 else if (rnd_mode == MPFR_RNDN)
200 {
201 if (rb == 0 || (sb == 0 && (ap[0] & MPFR_LIMB_ONE) == 0))
202 goto truncate;
203 else
204 goto add_one_ulp;
205 }
206 else if (MPFR_IS_LIKE_RNDZ (rnd_mode, 0))
207 {
208 truncate:
209 MPFR_ASSERTD(ax >= __gmpfr_emin);
210 MPFR_RET(-MPFR_SIGN_POS);
211 }
212 else /* round away from zero */
213 {
214 add_one_ulp:
215 ap[0] += MPFR_LIMB_ONE;
216 if (ap[0] == 0)
217 {
218 ap[0] = MPFR_LIMB_HIGHBIT;
219 if (MPFR_UNLIKELY(ax + 1 > __gmpfr_emax))
220 return mpfr_overflow (a, rnd_mode, MPFR_SIGN_POS);
221 MPFR_ASSERTD(ax + 1 <= __gmpfr_emax);
222 MPFR_ASSERTD(ax + 1 >= __gmpfr_emin);
223 MPFR_SET_EXP (a, ax + 1);
224 }
225 MPFR_RET(MPFR_SIGN_POS);
226 }
227 }
228
229 /* Special code for GMP_NUMB_BITS < prec(a) < 2*GMP_NUMB_BITS and
230 GMP_NUMB_BITS < prec(b) <= 2*GMP_NUMB_BITS.
231 Note: this function was copied and optimized from mpfr_mul_2 in file mul.c,
232 thus any change here should be done also in mpfr_mul_2, if applicable. */
233 static int
mpfr_sqr_2(mpfr_ptr a,mpfr_srcptr b,mpfr_rnd_t rnd_mode,mpfr_prec_t p)234 mpfr_sqr_2 (mpfr_ptr a, mpfr_srcptr b, mpfr_rnd_t rnd_mode, mpfr_prec_t p)
235 {
236 mp_limb_t h, l, u, v;
237 mpfr_limb_ptr ap = MPFR_MANT(a);
238 mpfr_exp_t ax = 2 * MPFR_GET_EXP(b);
239 mpfr_prec_t sh = 2 * GMP_NUMB_BITS - p;
240 mp_limb_t rb, sb, sb2, mask = MPFR_LIMB_MASK(sh);
241 mp_limb_t *bp = MPFR_MANT(b);
242
243 /* we store the 4-limb product in h=ap[1], l=ap[0], sb=ap[-1], sb2=ap[-2] */
244 umul_ppmm (h, l, bp[1], bp[1]);
245 umul_ppmm (u, v, bp[1], bp[0]);
246 l += u << 1;
247 h += (l < (u << 1)) + (u >> (GMP_NUMB_BITS - 1));
248
249 /* now the full square is {h, l, 2*v + high(b0*c0), low(b0*c0)},
250 where the lower part contributes to less than 3 ulps to {h, l} */
251
252 /* If h has its most significant bit set and the low sh-1 bits of l are not
253 000...000 nor 111...111 nor 111...110, then we can round correctly;
254 if h has zero as most significant bit, we have to shift left h and l,
255 thus if the low sh-2 bits are not 000...000 nor 111...111 nor 111...110,
256 then we can round correctly. To avoid an extra test we consider the latter
257 case (if we can round, we can also round in the former case).
258 For sh <= 3, we have mask <= 7, thus (mask>>2) <= 1, and the approximation
259 cannot be enough. */
260 if (MPFR_LIKELY(((l + 2) & (mask >> 2)) > 2))
261 sb = sb2 = 1; /* result cannot be exact in that case */
262 else
263 {
264 mp_limb_t carry1, carry2;
265
266 umul_ppmm (sb, sb2, bp[0], bp[0]);
267 /* the full product is {h, l, sb + v + w, sb2} */
268 ADD_LIMB (sb, v, carry1);
269 ADD_LIMB (l, carry1, carry2);
270 h += carry2;
271 ADD_LIMB (sb, v, carry1);
272 ADD_LIMB (l, carry1, carry2);
273 h += carry2;
274 }
275 if (h < MPFR_LIMB_HIGHBIT)
276 {
277 ax --;
278 h = (h << 1) | (l >> (GMP_NUMB_BITS - 1));
279 l = (l << 1) | (sb >> (GMP_NUMB_BITS - 1));
280 sb <<= 1;
281 /* no need to shift sb2 since we only want to know if it is zero or not */
282 }
283 ap[1] = h;
284 rb = l & (MPFR_LIMB_ONE << (sh - 1));
285 sb |= ((l & mask) ^ rb) | sb2;
286 ap[0] = l & ~mask;
287
288 MPFR_SIGN(a) = MPFR_SIGN_POS;
289
290 /* rounding */
291 if (MPFR_UNLIKELY(ax > __gmpfr_emax))
292 return mpfr_overflow (a, rnd_mode, MPFR_SIGN_POS);
293
294 /* Warning: underflow should be checked *after* rounding, thus when rounding
295 away and when a > 0.111...111*2^(emin-1), or when rounding to nearest and
296 a >= 0.111...111[1]*2^(emin-1), there is no underflow. */
297 if (MPFR_UNLIKELY(ax < __gmpfr_emin))
298 {
299 /* Note: like for mpfr_sqr_1, the case
300 0.111...111*2^(emin-1) < a < 2^(emin-1) is not possible when emin is
301 odd, since (modulo a shift) this would imply 1-2^(-p) < a = b^2 < 1,
302 and this is not possible with 1-2^(-p) <= b < 1.
303 For emin even, it is possible for some values of p, for example for
304 p=69 with b=417402170410649030795*2^k. */
305 if (ax == __gmpfr_emin - 1 &&
306 ap[1] == MPFR_LIMB_MAX &&
307 ap[0] == ~mask &&
308 ((rnd_mode == MPFR_RNDN && rb) ||
309 (MPFR_IS_LIKE_RNDA (rnd_mode, 0) && (rb | sb))))
310 goto rounding; /* no underflow */
311 /* for RNDN, mpfr_underflow always rounds away, thus for
312 |a| <= 2^(emin-2) we have to change to RNDZ */
313 if (rnd_mode == MPFR_RNDN &&
314 (ax < __gmpfr_emin - 1 ||
315 (ap[1] == MPFR_LIMB_HIGHBIT && ap[0] == 0 && (rb | sb) == 0)))
316 rnd_mode = MPFR_RNDZ;
317 return mpfr_underflow (a, rnd_mode, MPFR_SIGN_POS);
318 }
319
320 rounding:
321 MPFR_EXP (a) = ax; /* Don't use MPFR_SET_EXP since ax might be < __gmpfr_emin
322 in the cases "goto rounding" above. */
323 if ((rb == 0 && sb == 0) || rnd_mode == MPFR_RNDF)
324 {
325 MPFR_ASSERTD(ax >= __gmpfr_emin);
326 MPFR_RET (0);
327 }
328 else if (rnd_mode == MPFR_RNDN)
329 {
330 if (rb == 0 || (sb == 0 && (ap[0] & (MPFR_LIMB_ONE << sh)) == 0))
331 goto truncate;
332 else
333 goto add_one_ulp;
334 }
335 else if (MPFR_IS_LIKE_RNDZ (rnd_mode, 0))
336 {
337 truncate:
338 MPFR_ASSERTD(ax >= __gmpfr_emin);
339 MPFR_RET(-MPFR_SIGN_POS);
340 }
341 else /* round away from zero */
342 {
343 add_one_ulp:
344 ap[0] += MPFR_LIMB_ONE << sh;
345 ap[1] += (ap[0] == 0);
346 if (ap[1] == 0)
347 {
348 ap[1] = MPFR_LIMB_HIGHBIT;
349 if (MPFR_UNLIKELY(ax + 1 > __gmpfr_emax))
350 return mpfr_overflow (a, rnd_mode, MPFR_SIGN_POS);
351 MPFR_ASSERTD(ax + 1 <= __gmpfr_emax);
352 MPFR_ASSERTD(ax + 1 >= __gmpfr_emin);
353 MPFR_SET_EXP (a, ax + 1);
354 }
355 MPFR_RET(MPFR_SIGN_POS);
356 }
357 }
358
359 /* Special code for 2*GMP_NUMB_BITS < prec(a) < 3*GMP_NUMB_BITS and
360 2*GMP_NUMB_BITS < prec(b) <= 3*GMP_NUMB_BITS. */
361 static int
mpfr_sqr_3(mpfr_ptr a,mpfr_srcptr b,mpfr_rnd_t rnd_mode,mpfr_prec_t p)362 mpfr_sqr_3 (mpfr_ptr a, mpfr_srcptr b, mpfr_rnd_t rnd_mode, mpfr_prec_t p)
363 {
364 mp_limb_t a0, a1, a2, h, l;
365 mpfr_limb_ptr ap = MPFR_MANT(a);
366 mpfr_exp_t ax = 2 * MPFR_GET_EXP(b);
367 mpfr_prec_t sh = 3 * GMP_NUMB_BITS - p;
368 mp_limb_t rb, sb, sb2, mask = MPFR_LIMB_MASK(sh);
369 mp_limb_t *bp = MPFR_MANT(b);
370
371 /* we store the upper 3-limb product in a2, a1, a0:
372 b2^2, 2*b2*b1, 2*b2*b0+b1^2 */
373
374 /* first compute b2*b1 and b2*b0, which will be shifted by 1 */
375 umul_ppmm (a1, a0, bp[2], bp[1]);
376 umul_ppmm (h, l, bp[2], bp[0]);
377 a0 += h;
378 a1 += (a0 < h);
379 /* now a1, a0 contains b2*b1 + floor(b2*b0/B): there can be no overflow
380 since b2*b1*B + b2*b0 <= b2*(b1*B+b0) <= b2*(B^2-1) < B^3 */
381
382 /* multiply a2, a1, a0 by 2 */
383 a2 = a1 >> (GMP_NUMB_BITS - 1);
384 a1 = (a1 << 1) | (a0 >> (GMP_NUMB_BITS - 1));
385 a0 = (a0 << 1);
386
387 /* add b2^2 */
388 umul_ppmm (h, l, bp[2], bp[2]);
389 a1 += l;
390 a2 += h + (a1 < l);
391
392 /* add b1^2 */
393 umul_ppmm (h, l, bp[1], bp[1]);
394 a0 += h;
395 a1 += (a0 < h);
396 a2 += (a1 == 0 && a0 < h);
397
398 /* Now the approximate product {a2, a1, a0} has an error of less than
399 5 ulps (3 ulps for the ignored low limbs of 2*b2*b0+b1^2,
400 plus 2 ulps for the ignored 2*b1*b0 (plus b0^2).
401 Since we might shift by 1 bit, we make sure the low sh-2 bits of a0
402 are not 0, -1, -2, -3 or -4. */
403
404 if (MPFR_LIKELY(((a0 + 4) & (mask >> 2)) > 4))
405 sb = sb2 = 1; /* result cannot be exact in that case */
406 else
407 {
408 mp_limb_t t[6];
409 mpn_sqr (t, bp, 3);
410 a2 = t[5];
411 a1 = t[4];
412 a0 = t[3];
413 sb = t[2];
414 sb2 = t[1] | t[0];
415 }
416 if (a2 < MPFR_LIMB_HIGHBIT)
417 {
418 ax --;
419 a2 = (a2 << 1) | (a1 >> (GMP_NUMB_BITS - 1));
420 a1 = (a1 << 1) | (a0 >> (GMP_NUMB_BITS - 1));
421 a0 = (a0 << 1) | (sb >> (GMP_NUMB_BITS - 1));
422 sb <<= 1;
423 /* no need to shift sb2: we only need to know if it is zero or not */
424 }
425 ap[2] = a2;
426 ap[1] = a1;
427 rb = a0 & (MPFR_LIMB_ONE << (sh - 1));
428 sb |= ((a0 & mask) ^ rb) | sb2;
429 ap[0] = a0 & ~mask;
430
431 MPFR_SIGN(a) = MPFR_SIGN_POS;
432
433 /* rounding */
434 if (MPFR_UNLIKELY(ax > __gmpfr_emax))
435 return mpfr_overflow (a, rnd_mode, MPFR_SIGN_POS);
436
437 /* Warning: underflow should be checked *after* rounding, thus when rounding
438 away and when a > 0.111...111*2^(emin-1), or when rounding to nearest and
439 a >= 0.111...111[1]*2^(emin-1), there is no underflow. */
440 if (MPFR_UNLIKELY(ax < __gmpfr_emin))
441 {
442 if (ax == __gmpfr_emin - 1 &&
443 ap[2] == MPFR_LIMB_MAX &&
444 ap[1] == MPFR_LIMB_MAX &&
445 ap[0] == ~mask &&
446 ((rnd_mode == MPFR_RNDN && rb) ||
447 (MPFR_IS_LIKE_RNDA (rnd_mode, 0) && (rb | sb))))
448 goto rounding; /* no underflow */
449 /* for RNDN, mpfr_underflow always rounds away, thus for |a| <= 2^(emin-2)
450 we have to change to RNDZ */
451 if (rnd_mode == MPFR_RNDN &&
452 (ax < __gmpfr_emin - 1 ||
453 (ap[2] == MPFR_LIMB_HIGHBIT && ap[1] == 0 && ap[0] == 0
454 && (rb | sb) == 0)))
455 rnd_mode = MPFR_RNDZ;
456 return mpfr_underflow (a, rnd_mode, MPFR_SIGN_POS);
457 }
458
459 rounding:
460 MPFR_EXP (a) = ax; /* Don't use MPFR_SET_EXP since ax might be < __gmpfr_emin
461 in the cases "goto rounding" above. */
462 if ((rb == 0 && sb == 0) || rnd_mode == MPFR_RNDF)
463 {
464 MPFR_ASSERTD(ax >= __gmpfr_emin);
465 MPFR_RET (0);
466 }
467 else if (rnd_mode == MPFR_RNDN)
468 {
469 if (rb == 0 || (sb == 0 && (ap[0] & (MPFR_LIMB_ONE << sh)) == 0))
470 goto truncate;
471 else
472 goto add_one_ulp;
473 }
474 else if (MPFR_IS_LIKE_RNDZ (rnd_mode, 0))
475 {
476 truncate:
477 MPFR_ASSERTD(ax >= __gmpfr_emin);
478 MPFR_RET(-MPFR_SIGN_POS);
479 }
480 else /* round away from zero */
481 {
482 add_one_ulp:
483 ap[0] += MPFR_LIMB_ONE << sh;
484 ap[1] += (ap[0] == 0);
485 ap[2] += (ap[1] == 0) && (ap[0] == 0);
486 if (ap[2] == 0)
487 {
488 ap[2] = MPFR_LIMB_HIGHBIT;
489 if (MPFR_UNLIKELY(ax + 1 > __gmpfr_emax))
490 return mpfr_overflow (a, rnd_mode, MPFR_SIGN_POS);
491 MPFR_ASSERTD(ax + 1 <= __gmpfr_emax);
492 MPFR_ASSERTD(ax + 1 >= __gmpfr_emin);
493 MPFR_SET_EXP (a, ax + 1);
494 }
495 MPFR_RET(MPFR_SIGN_POS);
496 }
497 }
498
499 #endif /* !defined(MPFR_GENERIC_ABI) && ... */
500
501 /* Note: mpfr_sqr will call mpfr_mul if bn > MPFR_SQR_THRESHOLD,
502 in order to use Mulders' mulhigh, which is handled only here
503 to avoid partial code duplication. There is some overhead due
504 to the additional tests, but slowdown should not be noticeable
505 as this code is not executed in very small precisions. */
506
507 int
mpfr_sqr(mpfr_ptr a,mpfr_srcptr b,mpfr_rnd_t rnd_mode)508 mpfr_sqr (mpfr_ptr a, mpfr_srcptr b, mpfr_rnd_t rnd_mode)
509 {
510 int cc, inexact;
511 mpfr_exp_t ax;
512 mp_limb_t *tmp;
513 mp_limb_t b1;
514 mpfr_prec_t aq, bq;
515 mp_size_t bn, tn;
516 MPFR_TMP_DECL(marker);
517
518 MPFR_LOG_FUNC
519 (("x[%Pd]=%.*Rg rnd=%d", mpfr_get_prec (b), mpfr_log_prec, b, rnd_mode),
520 ("y[%Pd]=%.*Rg inexact=%d",
521 mpfr_get_prec (a), mpfr_log_prec, a, inexact));
522
523 /* deal with special cases */
524 if (MPFR_UNLIKELY(MPFR_IS_SINGULAR(b)))
525 {
526 if (MPFR_IS_NAN(b))
527 {
528 MPFR_SET_NAN(a);
529 MPFR_RET_NAN;
530 }
531 MPFR_SET_POS (a);
532 if (MPFR_IS_INF(b))
533 MPFR_SET_INF(a);
534 else
535 ( MPFR_ASSERTD(MPFR_IS_ZERO(b)), MPFR_SET_ZERO(a) );
536 MPFR_RET(0);
537 }
538 aq = MPFR_GET_PREC(a);
539 bq = MPFR_GET_PREC(b);
540
541 #if !defined(MPFR_GENERIC_ABI) && (GMP_NUMB_BITS == 32 || GMP_NUMB_BITS == 64)
542 if (aq == bq)
543 {
544 if (aq < GMP_NUMB_BITS)
545 return mpfr_sqr_1 (a, b, rnd_mode, aq);
546
547 if (GMP_NUMB_BITS < aq && aq < 2 * GMP_NUMB_BITS)
548 return mpfr_sqr_2 (a, b, rnd_mode, aq);
549
550 if (aq == GMP_NUMB_BITS)
551 return mpfr_sqr_1n (a, b, rnd_mode);
552
553 if (2 * GMP_NUMB_BITS < aq && aq < 3 * GMP_NUMB_BITS)
554 return mpfr_sqr_3 (a, b, rnd_mode, aq);
555 }
556 #endif
557
558 ax = 2 * MPFR_GET_EXP (b);
559 MPFR_ASSERTN (2 * (mpfr_uprec_t) bq <= MPFR_PREC_MAX);
560
561 bn = MPFR_LIMB_SIZE (b); /* number of limbs of b */
562 tn = MPFR_PREC2LIMBS (2 * bq); /* number of limbs of square,
563 2*bn or 2*bn-1 */
564
565 if (MPFR_UNLIKELY(bn > MPFR_SQR_THRESHOLD))
566 /* the following line should not be replaced by mpfr_sqr,
567 otherwise we'll get an infinite loop! */
568 return mpfr_mul (a, b, b, rnd_mode);
569
570 MPFR_TMP_MARK(marker);
571 tmp = MPFR_TMP_LIMBS_ALLOC (2 * bn);
572
573 /* Multiplies the mantissa in temporary allocated space */
574 mpn_sqr (tmp, MPFR_MANT(b), bn);
575 b1 = tmp[2 * bn - 1];
576
577 /* now tmp[0]..tmp[2*bn-1] contains the product of both mantissa,
578 with tmp[2*bn-1]>=2^(GMP_NUMB_BITS-2) */
579 b1 >>= GMP_NUMB_BITS - 1; /* msb from the product */
580
581 /* if the mantissas of b and c are uniformly distributed in ]1/2, 1],
582 then their product is in ]1/4, 1/2] with probability 2*ln(2)-1 ~ 0.386
583 and in [1/2, 1] with probability 2-2*ln(2) ~ 0.614 */
584 tmp += 2 * bn - tn; /* +0 or +1 */
585 if (MPFR_UNLIKELY(b1 == 0))
586 mpn_lshift (tmp, tmp, tn, 1); /* tn <= k, so no stack corruption */
587
588 cc = mpfr_round_raw (MPFR_MANT (a), tmp, 2 * bq, 0, aq, rnd_mode, &inexact);
589 /* cc = 1 ==> result is a power of two */
590 if (MPFR_UNLIKELY(cc))
591 MPFR_MANT(a)[MPFR_LIMB_SIZE(a)-1] = MPFR_LIMB_HIGHBIT;
592
593 MPFR_TMP_FREE(marker);
594 {
595 mpfr_exp_t ax2 = ax + ((int) b1 - 1 + cc);
596 if (MPFR_UNLIKELY( ax2 > __gmpfr_emax))
597 return mpfr_overflow (a, rnd_mode, MPFR_SIGN_POS);
598 if (MPFR_UNLIKELY( ax2 < __gmpfr_emin))
599 {
600 /* In the rounding to the nearest mode, if the exponent of the exact
601 result (i.e. before rounding, i.e. without taking cc into account)
602 is < __gmpfr_emin - 1 or the exact result is a power of 2 (i.e. if
603 both arguments are powers of 2), then round to zero. */
604 if (rnd_mode == MPFR_RNDN &&
605 (ax + (mpfr_exp_t) b1 < __gmpfr_emin || mpfr_powerof2_raw (b)))
606 rnd_mode = MPFR_RNDZ;
607 return mpfr_underflow (a, rnd_mode, MPFR_SIGN_POS);
608 }
609 MPFR_SET_EXP (a, ax2);
610 MPFR_SET_POS (a);
611 }
612 MPFR_RET (inexact);
613 }
614