xref: /netbsd-src/external/lgpl3/mpfr/dist/src/set_d.c (revision ba125506a622fe649968631a56eba5d42ff57863)
1 /* mpfr_set_d -- convert a machine double precision float to
2                  a multiple precision floating-point number
3 
4 Copyright 1999-2004, 2006-2023 Free Software Foundation, Inc.
5 Contributed by the AriC and Caramba projects, INRIA.
6 
7 This file is part of the GNU MPFR Library.
8 
9 The GNU MPFR Library is free software; you can redistribute it and/or modify
10 it under the terms of the GNU Lesser General Public License as published by
11 the Free Software Foundation; either version 3 of the License, or (at your
12 option) any later version.
13 
14 The GNU MPFR Library is distributed in the hope that it will be useful, but
15 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
16 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
17 License for more details.
18 
19 You should have received a copy of the GNU Lesser General Public License
20 along with the GNU MPFR Library; see the file COPYING.LESSER.  If not, see
21 https://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc.,
22 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */
23 
24 #include <float.h>  /* For DOUBLE_ISINF and DOUBLE_ISNAN */
25 
26 #define MPFR_NEED_LONGLONG_H
27 #include "mpfr-impl.h"
28 
29 /* Extracts the bits of |d| in rp[0..n-1] where n=ceil(53/GMP_NUMB_BITS).
30    Assumes d finite and <> 0.
31    Returns the corresponding exponent such that |d| = {rp, n} * 2^exp,
32    with the value of {rp, n} in [1/2, 1).
33    The int type should be sufficient for exp.
34 */
35 static int
extract_double(mpfr_limb_ptr rp,double d)36 extract_double (mpfr_limb_ptr rp, double d)
37 {
38   int exp;
39   mp_limb_t man[MPFR_LIMBS_PER_DOUBLE];
40 
41   /* FIXME: Generalize to handle GMP_NUMB_BITS < 16. */
42 
43   MPFR_ASSERTD(!DOUBLE_ISNAN(d));
44   MPFR_ASSERTD(!DOUBLE_ISINF(d));
45   MPFR_ASSERTD(d != 0.0);
46 
47 #if _MPFR_IEEE_FLOATS
48 
49   {
50     union mpfr_ieee_double_extract x;
51     x.d = d;
52 
53     exp = x.s.exp;
54     if (exp)
55       {
56         /* x.s.manh has 20 bits (in its low bits), x.s.manl has 32 bits */
57 #if GMP_NUMB_BITS >= 64
58         man[0] = ((MPFR_LIMB_ONE << (GMP_NUMB_BITS - 1)) |
59                   ((mp_limb_t) x.s.manh << (GMP_NUMB_BITS - 21)) |
60                   ((mp_limb_t) x.s.manl << (GMP_NUMB_BITS - 53)));
61 #elif GMP_NUMB_BITS == 32
62         man[1] = (MPFR_LIMB_ONE << 31) | (x.s.manh << 11) | (x.s.manl >> 21);
63         man[0] = x.s.manl << 11;
64 #elif GMP_NUMB_BITS == 16
65         MPFR_STAT_STATIC_ASSERT (GMP_NUMB_BITS == 16);
66         man[3] = (MPFR_LIMB_ONE << 15) | (x.s.manh >> 5);
67         man[2] = (x.s.manh << 11) | (x.s.manl >> 21);
68         man[1] = x.s.manl >> 5;
69         man[0] = MPFR_LIMB_LSHIFT(x.s.manl,11);
70 #else
71         MPFR_STAT_STATIC_ASSERT (GMP_NUMB_BITS == 8);
72         man[6] = (MPFR_LIMB_ONE << 7) | (x.s.manh >> 13);
73         man[5] = (mp_limb_t) (x.s.manh >> 5);
74         man[4] = MPFR_LIMB_LSHIFT(x.s.manh, 3) | (mp_limb_t) (x.s.manl >> 29);
75         man[3] = (mp_limb_t) (x.s.manl >> 21);
76         man[2] = (mp_limb_t) (x.s.manl >> 13);
77         man[1] = (mp_limb_t) (x.s.manl >> 5);
78         man[0] = MPFR_LIMB_LSHIFT(x.s.manl,3);
79 #endif
80         exp -= 1022;
81       }
82     else /* subnormal number */
83       {
84         int cnt;
85         exp = -1021;
86 #if GMP_NUMB_BITS >= 64
87         man[0] = (((mp_limb_t) x.s.manh << (GMP_NUMB_BITS - 21)) |
88                   ((mp_limb_t) x.s.manl << (GMP_NUMB_BITS - 53)));
89         count_leading_zeros (cnt, man[0]);
90 #elif GMP_NUMB_BITS == 32
91         man[1] = (x.s.manh << 11) /* high 21 bits */
92           | (x.s.manl >> 21); /* middle 11 bits */
93         man[0] = x.s.manl << 11; /* low 21 bits */
94         if (man[1] == 0)
95           {
96             man[1] = man[0];
97             man[0] = 0;
98             exp -= GMP_NUMB_BITS;
99           }
100         count_leading_zeros (cnt, man[1]);
101         man[1] = (man[1] << cnt) |
102           (cnt != 0 ? man[0] >> (GMP_NUMB_BITS - cnt) : 0);
103 #elif GMP_NUMB_BITS == 16
104         man[3] = x.s.manh >> 5;
105         man[2] = (x.s.manh << 11) | (x.s.manl >> 21);
106         man[1] = x.s.manl >> 5;
107         man[0] = x.s.manl << 11;
108         while (man[3] == 0) /* d is assumed <> 0 */
109           {
110             man[3] = man[2];
111             man[2] = man[1];
112             man[1] = man[0];
113             man[0] = 0;
114             exp -= GMP_NUMB_BITS;
115           }
116         count_leading_zeros (cnt, man[3]);
117         if (cnt)
118           {
119             man[3] = (man[3] << cnt) | (man[2] >> (GMP_NUMB_BITS - cnt));
120             man[2] = (man[2] << cnt) | (man[1] >> (GMP_NUMB_BITS - cnt));
121             man[1] = (man[1] << cnt) | (man[0] >> (GMP_NUMB_BITS - cnt));
122           }
123 #else
124         MPFR_STAT_STATIC_ASSERT (GMP_NUMB_BITS == 8);
125         man[6] = x.s.manh >> 13;
126         man[5] = x.s.manh >> 5;
127         man[4] = (x.s.manh << 3) | (x.s.manl >> 29);
128         man[3] = x.s.manl >> 21;
129         man[2] = x.s.manl >> 13;
130         man[1] = x.s.manl >> 5;
131         man[0] = x.s.manl << 3;
132         while (man[6] == 0) /* d is assumed <> 0 */
133           {
134             man[6] = man[5];
135             man[5] = man[4];
136             man[4] = man[3];
137             man[3] = man[2];
138             man[2] = man[1];
139             man[1] = man[0];
140             man[0] = 0;
141             exp -= GMP_NUMB_BITS;
142           }
143         count_leading_zeros (cnt, man[6]);
144         if (cnt)
145           {
146             int i;
147             for (i = 6; i > 0; i--)
148               man[i] = (man[i] << cnt) | (man[i-1] >> (GMP_NUMB_BITS - cnt));
149           }
150 #endif
151         man[0] <<= cnt;
152         exp -= cnt;
153       }
154   }
155 
156 #else /* _MPFR_IEEE_FLOATS */
157 
158   {
159     /* Unknown (or known to be non-IEEE) double format.  */
160     exp = 0;
161     d = ABS (d);
162     if (d >= 1.0)
163       {
164         while (d >= 32768.0)
165           {
166             d *= (1.0 / 65536.0);
167             exp += 16;
168           }
169         while (d >= 1.0)
170           {
171             d *= 0.5;
172             exp += 1;
173           }
174       }
175     else if (d < 0.5)
176       {
177         while (d < (1.0 / 65536.0))
178           {
179             d *=  65536.0;
180             exp -= 16;
181           }
182         while (d < 0.5)
183           {
184             d *= 2.0;
185             exp -= 1;
186           }
187       }
188 
189     d *= MP_BASE_AS_DOUBLE;
190 #if GMP_NUMB_BITS >= 64
191 #ifndef __clang__
192     man[0] = d;
193 #else
194     /* clang up to version 11 produces an invalid exception when d >= 2^63,
195        see <https://github.com/llvm/llvm-project/issues/18060>
196        (old URL: <https://bugs.llvm.org/show_bug.cgi?id=17686>).
197        Since this is always the case, here, we use the following patch. */
198     MPFR_STAT_STATIC_ASSERT (GMP_NUMB_BITS == 64);
199     man[0] = 0x8000000000000000 + (mp_limb_t) (d - 0x8000000000000000);
200 #endif /* __clang__ */
201 #elif GMP_NUMB_BITS == 32
202     man[1] = (mp_limb_t) d;
203     man[0] = (mp_limb_t) ((d - man[1]) * MP_BASE_AS_DOUBLE);
204 #else
205     MPFR_STAT_STATIC_ASSERT (GMP_NUMB_BITS == 16);
206     {
207       double r = d;
208       man[3] = (mp_limb_t) r;
209       r = (r - man[3]) * MP_BASE_AS_DOUBLE;
210       man[2] = (mp_limb_t) r;
211       r = (r - man[2]) * MP_BASE_AS_DOUBLE;
212       man[1] = (mp_limb_t) r;
213       r = (r - man[1]) * MP_BASE_AS_DOUBLE;
214       man[0] = (mp_limb_t) r;
215     }
216 #endif
217   }
218 
219 #endif /* _MPFR_IEEE_FLOATS */
220 
221   rp[0] = man[0];
222 #if GMP_NUMB_BITS <= 32
223   rp[1] = man[1];
224 #endif
225 #if GMP_NUMB_BITS <= 16
226   rp[2] = man[2];
227   rp[3] = man[3];
228 #endif
229 #if GMP_NUMB_BITS <= 8
230   rp[4] = man[4];
231   rp[5] = man[5];
232   rp[6] = man[6];
233 #endif
234 
235   MPFR_ASSERTD((rp[MPFR_LIMBS_PER_DOUBLE - 1] & MPFR_LIMB_HIGHBIT) != 0);
236 
237   return exp;
238 }
239 
240 /* End of part included from gmp-2.0.2 */
241 
242 int
mpfr_set_d(mpfr_ptr r,double d,mpfr_rnd_t rnd_mode)243 mpfr_set_d (mpfr_ptr r, double d, mpfr_rnd_t rnd_mode)
244 {
245   int inexact;
246   mpfr_t tmp;
247   mp_limb_t tmpmant[MPFR_LIMBS_PER_DOUBLE];
248   MPFR_SAVE_EXPO_DECL (expo);
249 
250   if (MPFR_UNLIKELY(DOUBLE_ISNAN(d)))
251     {
252       /* we don't propagate the sign bit */
253       MPFR_SET_NAN(r);
254       MPFR_RET_NAN;
255     }
256   else if (MPFR_UNLIKELY(d == 0))
257     {
258 #if _MPFR_IEEE_FLOATS
259       union mpfr_ieee_double_extract x;
260 
261       MPFR_SET_ZERO(r);
262       /* set correct sign */
263       x.d = d;
264       if (x.s.sig == 1)
265         MPFR_SET_NEG(r);
266       else
267         MPFR_SET_POS(r);
268 #else /* _MPFR_IEEE_FLOATS */
269       MPFR_SET_ZERO(r);
270       {
271         /* This is to get the sign of zero on non-IEEE hardware
272            Some systems support +0.0, -0.0, and unsigned zero.
273            Some other systems may just have an unsigned zero.
274            We can't use d == +0.0 since it should be always true,
275            so we check that the memory representation of d is the
276            same as +0.0, etc.
277            Note: r is set to -0 only if d is detected as a negative zero
278            *and*, for the double type, -0 has a different representation
279            from +0. If -0.0 has several representations, the code below
280            may not work as expected, but this is hardly fixable in a
281            portable way (without depending on a math library) and only
282            the sign could be incorrect. Such systems should be taken
283            into account on a case-by-case basis. If the code is changed
284            here, set_d64.c code should be updated too. */
285         double poszero = +0.0, negzero = DBL_NEG_ZERO;
286         if (memcmp(&d, &poszero, sizeof(double)) == 0)
287           MPFR_SET_POS(r);
288         else if (memcmp(&d, &negzero, sizeof(double)) == 0)
289           MPFR_SET_NEG(r);
290         else
291           MPFR_SET_POS(r);
292       }
293 #endif /* _MPFR_IEEE_FLOATS */
294       return 0; /* 0 is exact */
295     }
296   else if (MPFR_UNLIKELY(DOUBLE_ISINF(d)))
297     {
298       MPFR_SET_INF(r);
299       if (d > 0)
300         MPFR_SET_POS(r);
301       else
302         MPFR_SET_NEG(r);
303       return 0; /* infinity is exact */
304     }
305 
306   /* now d is neither 0, nor NaN nor Inf */
307 
308   MPFR_SAVE_EXPO_MARK (expo);
309 
310   /* warning: don't use tmp=r here, even if SIZE(r) >= MPFR_LIMBS_PER_DOUBLE,
311      since PREC(r) may be different from PREC(tmp), and then both variables
312      would have same precision in the mpfr_set4 call below. */
313   MPFR_MANT(tmp) = tmpmant;
314   MPFR_PREC(tmp) = IEEE_DBL_MANT_DIG;
315 
316   /* don't use MPFR_SET_EXP here since the exponent may be out of range */
317   MPFR_EXP(tmp) = extract_double (tmpmant, d);
318 
319   /* tmp is exact since PREC(tmp)=53 */
320   inexact = mpfr_set4 (r, tmp, rnd_mode,
321                        (d < 0) ? MPFR_SIGN_NEG : MPFR_SIGN_POS);
322 
323   MPFR_SAVE_EXPO_FREE (expo);
324   return mpfr_check_range (r, inexact, rnd_mode);
325 }
326 
327 
328 
329