xref: /netbsd-src/external/lgpl3/mpfr/dist/src/set_ld.c (revision ba125506a622fe649968631a56eba5d42ff57863)
1 /* mpfr_set_ld -- convert a machine long double to
2                   a multiple precision floating-point number
3 
4 Copyright 2002-2023 Free Software Foundation, Inc.
5 Contributed by the AriC and Caramba projects, INRIA.
6 
7 This file is part of the GNU MPFR Library.
8 
9 The GNU MPFR Library is free software; you can redistribute it and/or modify
10 it under the terms of the GNU Lesser General Public License as published by
11 the Free Software Foundation; either version 3 of the License, or (at your
12 option) any later version.
13 
14 The GNU MPFR Library is distributed in the hope that it will be useful, but
15 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
16 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
17 License for more details.
18 
19 You should have received a copy of the GNU Lesser General Public License
20 along with the GNU MPFR Library; see the file COPYING.LESSER.  If not, see
21 https://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc.,
22 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */
23 
24 #include <float.h> /* needed so that MPFR_LDBL_MANT_DIG is correctly defined */
25 
26 #define MPFR_NEED_LONGLONG_H
27 #include "mpfr-impl.h"
28 
29 /* To check for +inf, one can use the test x > LDBL_MAX, as LDBL_MAX is
30    the maximum finite number representable in a long double, according
31    to DR 467; see
32      https://www.open-std.org/jtc1/sc22/wg14/www/docs/n2092.htm
33    If this fails on some platform, a test x - x != 0 might be used. */
34 
35 #if defined(HAVE_LDOUBLE_IS_DOUBLE)
36 
37 /* the "long double" format is the same as "double" */
38 int
mpfr_set_ld(mpfr_ptr r,long double d,mpfr_rnd_t rnd_mode)39 mpfr_set_ld (mpfr_ptr r, long double d, mpfr_rnd_t rnd_mode)
40 {
41   return mpfr_set_d (r, (double) d, rnd_mode);
42 }
43 
44 #elif defined(HAVE_LDOUBLE_IEEE_EXT_LITTLE)
45 
46 #if GMP_NUMB_BITS >= 64
47 # define MPFR_LIMBS_PER_LONG_DOUBLE 1
48 #elif GMP_NUMB_BITS == 32
49 # define MPFR_LIMBS_PER_LONG_DOUBLE 2
50 #elif GMP_NUMB_BITS == 16
51 # define MPFR_LIMBS_PER_LONG_DOUBLE 4
52 #elif GMP_NUMB_BITS == 8
53 # define MPFR_LIMBS_PER_LONG_DOUBLE 8
54 #else
55 #error "GMP_NUMB_BITS is assumed to be 8, 16, 32 or >= 64"
56 #endif
57 /* The hypothetical GMP_NUMB_BITS == 16 is not supported. It will trigger
58    an error below. */
59 
60 /* IEEE Extended Little Endian Code */
61 int
mpfr_set_ld(mpfr_ptr r,long double d,mpfr_rnd_t rnd_mode)62 mpfr_set_ld (mpfr_ptr r, long double d, mpfr_rnd_t rnd_mode)
63 {
64   int inexact, k, cnt;
65   mpfr_t tmp;
66   mp_limb_t tmpmant[MPFR_LIMBS_PER_LONG_DOUBLE];
67   mpfr_long_double_t x;
68   mpfr_exp_t exp;
69   int signd;
70   MPFR_SAVE_EXPO_DECL (expo);
71 
72   /* Check for NAN */
73   if (MPFR_UNLIKELY (DOUBLE_ISNAN (d)))
74     {
75       /* we don't propagate the sign bit */
76       MPFR_SET_NAN (r);
77       MPFR_RET_NAN;
78     }
79   /* Check for INF */
80   else if (MPFR_UNLIKELY (d > LDBL_MAX))
81     {
82       MPFR_SET_INF (r);
83       MPFR_SET_POS (r);
84       return 0;
85     }
86   else if (MPFR_UNLIKELY (d < -LDBL_MAX))
87     {
88       MPFR_SET_INF (r);
89       MPFR_SET_NEG (r);
90       return 0;
91     }
92   /* Check for ZERO */
93   else if (MPFR_UNLIKELY (d == 0.0))
94     {
95       x.ld = d;
96       MPFR_SET_ZERO (r);
97       if (x.s.sign == 1)
98         MPFR_SET_NEG(r);
99       else
100         MPFR_SET_POS(r);
101       return 0;
102     }
103 
104   /* now d is neither 0, nor NaN nor Inf */
105   MPFR_SAVE_EXPO_MARK (expo);
106 
107   MPFR_MANT (tmp) = tmpmant;
108   MPFR_PREC (tmp) = 64;
109 
110   /* Extract sign */
111   x.ld = d;
112   signd = MPFR_SIGN_POS;
113   if (x.ld < 0.0)
114     {
115       signd = MPFR_SIGN_NEG;
116       x.ld = -x.ld;
117     }
118 
119   /* Extract and normalize the significand */
120 #if MPFR_LIMBS_PER_LONG_DOUBLE == 1
121   tmpmant[0] = ((mp_limb_t) x.s.manh << 32) | ((mp_limb_t) x.s.manl);
122   count_leading_zeros (cnt, tmpmant[0]);
123   tmpmant[0] <<= cnt;
124   k = 0; /* number of limbs shifted */
125 #else /* MPFR_LIMBS_PER_LONG_DOUBLE >= 2 */
126 #if MPFR_LIMBS_PER_LONG_DOUBLE == 2
127   tmpmant[0] = (mp_limb_t) x.s.manl;
128   tmpmant[1] = (mp_limb_t) x.s.manh;
129 #elif MPFR_LIMBS_PER_LONG_DOUBLE == 4
130   tmpmant[0] = (mp_limb_t) x.s.manl;
131   tmpmant[1] = (mp_limb_t) (x.s.manl >> 16);
132   tmpmant[2] = (mp_limb_t) x.s.manh;
133   tmpmant[3] = (mp_limb_t) (x.s.manh >> 16);
134 #elif MPFR_LIMBS_PER_LONG_DOUBLE == 8
135   tmpmant[0] = (mp_limb_t) x.s.manl;
136   tmpmant[1] = (mp_limb_t) (x.s.manl >> 8);
137   tmpmant[2] = (mp_limb_t) (x.s.manl >> 16);
138   tmpmant[3] = (mp_limb_t) (x.s.manl >> 24);
139   tmpmant[4] = (mp_limb_t) x.s.manh;
140   tmpmant[5] = (mp_limb_t) (x.s.manh >> 8);
141   tmpmant[6] = (mp_limb_t) (x.s.manh >> 16);
142   tmpmant[7] = (mp_limb_t) (x.s.manh >> 24);
143 #else
144 #error "MPFR_LIMBS_PER_LONG_DOUBLE should be 1, 2, 4 or 8"
145 #endif /* MPFR_LIMBS_PER_LONG_DOUBLE >= 2 */
146   {
147     int i = MPFR_LIMBS_PER_LONG_DOUBLE;
148     MPN_NORMALIZE_NOT_ZERO (tmpmant, i);
149     k = MPFR_LIMBS_PER_LONG_DOUBLE - i;
150     count_leading_zeros (cnt, tmpmant[i - 1]);
151     if (cnt != 0)
152       mpn_lshift (tmpmant + k, tmpmant, i, cnt);
153     else if (k != 0)
154       /* since we copy {tmpmant, i} into {tmpmant + k, i}, we should work
155          decreasingly, thus call mpn_copyd */
156       mpn_copyd (tmpmant + k, tmpmant, i);
157     if (k != 0)
158       MPN_ZERO (tmpmant, k);
159   }
160 #endif /* MPFR_LIMBS_PER_LONG_DOUBLE == 1 */
161 
162   /* Set exponent */
163   exp = (mpfr_exp_t) ((x.s.exph << 8) + x.s.expl);  /* 15-bit unsigned int */
164   if (MPFR_UNLIKELY (exp == 0))
165     exp -= 0x3FFD;
166   else
167     exp -= 0x3FFE;
168 
169   MPFR_SET_EXP (tmp, exp - cnt - k * GMP_NUMB_BITS);
170 
171   /* tmp is exact */
172   inexact = mpfr_set4 (r, tmp, rnd_mode, signd);
173 
174   MPFR_SAVE_EXPO_FREE (expo);
175   return mpfr_check_range (r, inexact, rnd_mode);
176 }
177 
178 #elif defined(HAVE_LDOUBLE_MAYBE_DOUBLE_DOUBLE)
179 
180 /* double-double code, see
181    https://gcc.gnu.org/git/?p=gcc.git;a=blob;f=libgcc/config/rs6000/ibm-ldouble-format;h=e8ada17f7696cd942e710d5b67d4149f5fcccf45;hb=HEAD */
182 int
mpfr_set_ld(mpfr_ptr r,long double d,mpfr_rnd_t rnd_mode)183 mpfr_set_ld (mpfr_ptr r, long double d, mpfr_rnd_t rnd_mode)
184 {
185   mpfr_t t, u;
186   int inexact;
187   double h, l;
188   MPFR_SAVE_EXPO_DECL (expo);
189 
190   /* Check for NAN. Since we can't use isnan(), we rely on the
191      LONGDOUBLE_NAN_ACTION macro. The sign bit is not propagated. */
192   LONGDOUBLE_NAN_ACTION (d, { MPFR_SET_NAN(r); MPFR_RET_NAN; });
193 
194   /* Check for INF */
195   if (d > LDBL_MAX)
196     {
197       mpfr_set_inf (r, 1);
198       return 0;
199     }
200   else if (d < -LDBL_MAX)
201     {
202       mpfr_set_inf (r, -1);
203       return 0;
204     }
205   /* Check for ZERO */
206   else if (d == 0.0)
207     return mpfr_set_d (r, (double) d, rnd_mode);
208 
209   if (d >= LDBL_MAX || d <= -LDBL_MAX)
210     h = (d >= LDBL_MAX) ? LDBL_MAX : -LDBL_MAX;
211   else
212     h = (double) d; /* should not overflow */
213   l = (double) (d - (long double) h);
214 
215   MPFR_SAVE_EXPO_MARK (expo);
216 
217   mpfr_init2 (t, IEEE_DBL_MANT_DIG);
218   mpfr_init2 (u, IEEE_DBL_MANT_DIG);
219 
220   inexact = mpfr_set_d (t, h, MPFR_RNDN);
221   MPFR_ASSERTN(inexact == 0);
222   inexact = mpfr_set_d (u, l, MPFR_RNDN);
223   MPFR_ASSERTN(inexact == 0);
224   inexact = mpfr_add (r, t, u, rnd_mode);
225 
226   mpfr_clear (t);
227   mpfr_clear (u);
228 
229   MPFR_SAVE_EXPO_FREE (expo);
230   inexact = mpfr_check_range (r, inexact, rnd_mode);
231   return inexact;
232 }
233 
234 #else
235 
236 /* Generic code */
237 int
mpfr_set_ld(mpfr_ptr r,long double d,mpfr_rnd_t rnd_mode)238 mpfr_set_ld (mpfr_ptr r, long double d, mpfr_rnd_t rnd_mode)
239 {
240   mpfr_t t, u;
241   int inexact, shift_exp;
242   long double x;
243   MPFR_SAVE_EXPO_DECL (expo);
244 
245   /* Check for NAN. Since we can't use isnan(), we rely on the
246      LONGDOUBLE_NAN_ACTION macro. The sign bit is not propagated. */
247   LONGDOUBLE_NAN_ACTION (d, { MPFR_SET_NAN(r); MPFR_RET_NAN; });
248 
249   /* Check for INF */
250   if (d > LDBL_MAX)
251     {
252       mpfr_set_inf (r, 1);
253       return 0;
254     }
255   else if (d < -LDBL_MAX)
256     {
257       mpfr_set_inf (r, -1);
258       return 0;
259     }
260   /* Check for ZERO */
261   else if (d == 0.0)
262     return mpfr_set_d (r, (double) d, rnd_mode);
263 
264   mpfr_init2 (t, MPFR_LDBL_MANT_DIG);
265   mpfr_init2 (u, IEEE_DBL_MANT_DIG);
266 
267   MPFR_SAVE_EXPO_MARK (expo);
268 
269  convert:
270   x = d;
271   MPFR_SET_ZERO (t);  /* The sign doesn't matter. */
272   shift_exp = 0; /* invariant: remainder to deal with is d*2^shift_exp */
273   while (x != (long double) 0.0)
274     {
275       /* Check overflow of double */
276       if (x > (long double) DBL_MAX || (-x) > (long double) DBL_MAX)
277         {
278           long double div9, div10, div11, div12, div13;
279 
280 #define TWO_64 18446744073709551616.0 /* 2^64 */
281 #define TWO_128 (TWO_64 * TWO_64)
282 #define TWO_256 (TWO_128 * TWO_128)
283           div9 = (long double) (double) (TWO_256 * TWO_256); /* 2^(2^9) */
284           div10 = div9 * div9;
285           div11 = div10 * div10; /* 2^(2^11) */
286           div12 = div11 * div11; /* 2^(2^12) */
287           div13 = div12 * div12; /* 2^(2^13) */
288           if (ABS (x) >= div13)
289             {
290               x /= div13; /* exact */
291               shift_exp += 8192;
292               mpfr_div_2si (t, t, 8192, MPFR_RNDZ);
293             }
294           if (ABS (x) >= div12)
295             {
296               x /= div12; /* exact */
297               shift_exp += 4096;
298               mpfr_div_2si (t, t, 4096, MPFR_RNDZ);
299             }
300           if (ABS (x) >= div11)
301             {
302               x /= div11; /* exact */
303               shift_exp += 2048;
304               mpfr_div_2si (t, t, 2048, MPFR_RNDZ);
305             }
306           if (ABS (x) >= div10)
307             {
308               x /= div10; /* exact */
309               shift_exp += 1024;
310               mpfr_div_2si (t, t, 1024, MPFR_RNDZ);
311             }
312           /* warning: we may have DBL_MAX=2^1024*(1-2^(-53)) < x < 2^1024,
313              therefore we have one extra exponent reduction step */
314           if (ABS (x) >= div9)
315             {
316               x /= div9; /* exact */
317               shift_exp += 512;
318               mpfr_div_2si (t, t, 512, MPFR_RNDZ);
319             }
320         } /* Check overflow of double */
321       else /* no overflow on double */
322         {
323           long double div9, div10, div11;
324 
325           div9 = (long double) (double) 7.4583407312002067432909653e-155;
326           /* div9 = 2^(-2^9) */
327           div10 = div9  * div9;  /* 2^(-2^10) */
328           div11 = div10 * div10; /* 2^(-2^11) if extended precision */
329           /* since -DBL_MAX <= x <= DBL_MAX, the cast to double should not
330              overflow here */
331           if (ABS(x) < div10 &&
332               div11 != (long double) 0.0 &&
333               div11 / div10 == div10) /* possible underflow */
334             {
335               long double div12, div13;
336               /* After the divisions, any bit of x must be >= div10,
337                  hence the possible division by div9. */
338               div12 = div11 * div11; /* 2^(-2^12) */
339               div13 = div12 * div12; /* 2^(-2^13) */
340               if (ABS (x) <= div13)
341                 {
342                   x /= div13; /* exact */
343                   shift_exp -= 8192;
344                   mpfr_mul_2si (t, t, 8192, MPFR_RNDZ);
345                 }
346               if (ABS (x) <= div12)
347                 {
348                   x /= div12; /* exact */
349                   shift_exp -= 4096;
350                   mpfr_mul_2si (t, t, 4096, MPFR_RNDZ);
351                 }
352               if (ABS (x) <= div11)
353                 {
354                   x /= div11; /* exact */
355                   shift_exp -= 2048;
356                   mpfr_mul_2si (t, t, 2048, MPFR_RNDZ);
357                 }
358               if (ABS (x) <= div10)
359                 {
360                   x /= div10; /* exact */
361                   shift_exp -= 1024;
362                   mpfr_mul_2si (t, t, 1024, MPFR_RNDZ);
363                 }
364               if (ABS(x) <= div9)
365                 {
366                   x /= div9;  /* exact */
367                   shift_exp -= 512;
368                   mpfr_mul_2si (t, t, 512, MPFR_RNDZ);
369                 }
370             }
371           else /* no underflow */
372             {
373               inexact = mpfr_set_d (u, (double) x, MPFR_RNDZ);
374               MPFR_ASSERTD (inexact == 0);
375               if (mpfr_add (t, t, u, MPFR_RNDZ) != 0)
376                 {
377                   if (!mpfr_number_p (t))
378                     break;
379                   /* Inexact. This cannot happen unless the C implementation
380                      "lies" on the precision or when long doubles are
381                      implemented with FP expansions like double-double on
382                      PowerPC. */
383                   if (MPFR_PREC (t) != MPFR_PREC (r) + 1)
384                     {
385                       /* We assume that MPFR_PREC (r) < MPFR_PREC_MAX.
386                          The precision MPFR_PREC (r) + 1 allows us to
387                          deduce the rounding bit and the sticky bit. */
388                       mpfr_set_prec (t, MPFR_PREC (r) + 1);
389                       goto convert;
390                     }
391                   else
392                     {
393                       mp_limb_t *tp;
394                       int rb_mask;
395 
396                       /* Since mpfr_add was inexact, the sticky bit is 1. */
397                       tp = MPFR_MANT (t);
398                       rb_mask = MPFR_LIMB_ONE <<
399                         (GMP_NUMB_BITS - 1 -
400                          (MPFR_PREC (r) & (GMP_NUMB_BITS - 1)));
401                       if (rnd_mode == MPFR_RNDN)
402                         rnd_mode = (*tp & rb_mask) ^ MPFR_IS_NEG (t) ?
403                           MPFR_RNDU : MPFR_RNDD;
404                       *tp |= rb_mask;
405                       break;
406                     }
407                 }
408               x -= (long double) mpfr_get_d1 (u); /* exact */
409             }
410         }
411     }
412   inexact = mpfr_mul_2si (r, t, shift_exp, rnd_mode);
413   mpfr_clear (t);
414   mpfr_clear (u);
415 
416   MPFR_SAVE_EXPO_FREE (expo);
417   return mpfr_check_range (r, inexact, rnd_mode);
418 }
419 
420 #endif
421