xref: /netbsd-src/external/lgpl3/mpfr/dist/src/set_ld.c (revision e670fd5c413e99c2f6a37901bb21c537fcd322d2)
1 /* mpfr_set_ld -- convert a machine long double to
2                   a multiple precision floating-point number
3 
4 Copyright 2002-2020 Free Software Foundation, Inc.
5 Contributed by the AriC and Caramba projects, INRIA.
6 
7 This file is part of the GNU MPFR Library.
8 
9 The GNU MPFR Library is free software; you can redistribute it and/or modify
10 it under the terms of the GNU Lesser General Public License as published by
11 the Free Software Foundation; either version 3 of the License, or (at your
12 option) any later version.
13 
14 The GNU MPFR Library is distributed in the hope that it will be useful, but
15 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
16 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
17 License for more details.
18 
19 You should have received a copy of the GNU Lesser General Public License
20 along with the GNU MPFR Library; see the file COPYING.LESSER.  If not, see
21 https://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc.,
22 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */
23 
24 #include <float.h> /* needed so that MPFR_LDBL_MANT_DIG is correctly defined */
25 
26 #define MPFR_NEED_LONGLONG_H
27 #include "mpfr-impl.h"
28 
29 /* To check for +inf, one can use the test x > LDBL_MAX, as LDBL_MAX is
30    the maximum finite number representable in a long double, according
31    to DR 467; see
32      http://www.open-std.org/jtc1/sc22/wg14/www/docs/n2092.htm
33    If this fails on some platform, a test x - x != 0 might be used. */
34 
35 #if defined(HAVE_LDOUBLE_IS_DOUBLE)
36 
37 /* the "long double" format is the same as "double" */
38 int
39 mpfr_set_ld (mpfr_ptr r, long double d, mpfr_rnd_t rnd_mode)
40 {
41   return mpfr_set_d (r, (double) d, rnd_mode);
42 }
43 
44 #elif defined(HAVE_LDOUBLE_IEEE_EXT_LITTLE)
45 
46 #if GMP_NUMB_BITS >= 64
47 # define MPFR_LIMBS_PER_LONG_DOUBLE 1
48 #elif GMP_NUMB_BITS == 32
49 # define MPFR_LIMBS_PER_LONG_DOUBLE 2
50 #elif GMP_NUMB_BITS == 16
51 # define MPFR_LIMBS_PER_LONG_DOUBLE 4
52 #elif GMP_NUMB_BITS == 8
53 # define MPFR_LIMBS_PER_LONG_DOUBLE 8
54 #else
55 #error "GMP_NUMB_BITS is assumed to be 8, 16, 32 or >= 64"
56 #endif
57 /* The hypothetical GMP_NUMB_BITS == 16 is not supported. It will trigger
58    an error below. */
59 
60 /* IEEE Extended Little Endian Code */
61 int
62 mpfr_set_ld (mpfr_ptr r, long double d, mpfr_rnd_t rnd_mode)
63 {
64   int inexact, k, cnt;
65   mpfr_t tmp;
66   mp_limb_t tmpmant[MPFR_LIMBS_PER_LONG_DOUBLE];
67   mpfr_long_double_t x;
68   mpfr_exp_t exp;
69   int signd;
70   MPFR_SAVE_EXPO_DECL (expo);
71 
72   /* Check for NAN */
73   if (MPFR_UNLIKELY (DOUBLE_ISNAN (d)))
74     {
75       MPFR_SET_NAN (r);
76       MPFR_RET_NAN;
77     }
78   /* Check for INF */
79   else if (MPFR_UNLIKELY (d > LDBL_MAX))
80     {
81       MPFR_SET_INF (r);
82       MPFR_SET_POS (r);
83       return 0;
84     }
85   else if (MPFR_UNLIKELY (d < -LDBL_MAX))
86     {
87       MPFR_SET_INF (r);
88       MPFR_SET_NEG (r);
89       return 0;
90     }
91   /* Check for ZERO */
92   else if (MPFR_UNLIKELY (d == 0.0))
93     {
94       x.ld = d;
95       MPFR_SET_ZERO (r);
96       if (x.s.sign == 1)
97         MPFR_SET_NEG(r);
98       else
99         MPFR_SET_POS(r);
100       return 0;
101     }
102 
103   /* now d is neither 0, nor NaN nor Inf */
104   MPFR_SAVE_EXPO_MARK (expo);
105 
106   MPFR_MANT (tmp) = tmpmant;
107   MPFR_PREC (tmp) = 64;
108 
109   /* Extract sign */
110   x.ld = d;
111   signd = MPFR_SIGN_POS;
112   if (x.ld < 0.0)
113     {
114       signd = MPFR_SIGN_NEG;
115       x.ld = -x.ld;
116     }
117 
118   /* Extract and normalize the significand */
119 #if MPFR_LIMBS_PER_LONG_DOUBLE == 1
120   tmpmant[0] = ((mp_limb_t) x.s.manh << 32) | ((mp_limb_t) x.s.manl);
121   count_leading_zeros (cnt, tmpmant[0]);
122   tmpmant[0] <<= cnt;
123   k = 0; /* number of limbs shifted */
124 #else /* MPFR_LIMBS_PER_LONG_DOUBLE >= 2 */
125 #if MPFR_LIMBS_PER_LONG_DOUBLE == 2
126   tmpmant[0] = (mp_limb_t) x.s.manl;
127   tmpmant[1] = (mp_limb_t) x.s.manh;
128 #elif MPFR_LIMBS_PER_LONG_DOUBLE == 4
129   tmpmant[0] = (mp_limb_t) x.s.manl;
130   tmpmant[1] = (mp_limb_t) (x.s.manl >> 16);
131   tmpmant[2] = (mp_limb_t) x.s.manh;
132   tmpmant[3] = (mp_limb_t) (x.s.manh >> 16);
133 #elif MPFR_LIMBS_PER_LONG_DOUBLE == 8
134   tmpmant[0] = (mp_limb_t) x.s.manl;
135   tmpmant[1] = (mp_limb_t) (x.s.manl >> 8);
136   tmpmant[2] = (mp_limb_t) (x.s.manl >> 16);
137   tmpmant[3] = (mp_limb_t) (x.s.manl >> 24);
138   tmpmant[4] = (mp_limb_t) x.s.manh;
139   tmpmant[5] = (mp_limb_t) (x.s.manh >> 8);
140   tmpmant[6] = (mp_limb_t) (x.s.manh >> 16);
141   tmpmant[7] = (mp_limb_t) (x.s.manh >> 24);
142 #else
143 #error "MPFR_LIMBS_PER_LONG_DOUBLE should be 1, 2, 4 or 8"
144 #endif /* MPFR_LIMBS_PER_LONG_DOUBLE >= 2 */
145   {
146     int i = MPFR_LIMBS_PER_LONG_DOUBLE;
147     MPN_NORMALIZE_NOT_ZERO (tmpmant, i);
148     k = MPFR_LIMBS_PER_LONG_DOUBLE - i;
149     count_leading_zeros (cnt, tmpmant[i - 1]);
150     if (cnt != 0)
151       mpn_lshift (tmpmant + k, tmpmant, i, cnt);
152     else if (k != 0)
153       /* since we copy {tmpmant, i} into {tmpmant + k, i}, we should work
154          decreasingly, thus call mpn_copyd */
155       mpn_copyd (tmpmant + k, tmpmant, i);
156     if (k != 0)
157       MPN_ZERO (tmpmant, k);
158   }
159 #endif /* MPFR_LIMBS_PER_LONG_DOUBLE == 1 */
160 
161   /* Set exponent */
162   exp = (mpfr_exp_t) ((x.s.exph << 8) + x.s.expl);  /* 15-bit unsigned int */
163   if (MPFR_UNLIKELY (exp == 0))
164     exp -= 0x3FFD;
165   else
166     exp -= 0x3FFE;
167 
168   MPFR_SET_EXP (tmp, exp - cnt - k * GMP_NUMB_BITS);
169 
170   /* tmp is exact */
171   inexact = mpfr_set4 (r, tmp, rnd_mode, signd);
172 
173   MPFR_SAVE_EXPO_FREE (expo);
174   return mpfr_check_range (r, inexact, rnd_mode);
175 }
176 
177 #elif defined(HAVE_LDOUBLE_MAYBE_DOUBLE_DOUBLE)
178 
179 /* double-double code */
180 int
181 mpfr_set_ld (mpfr_ptr r, long double d, mpfr_rnd_t rnd_mode)
182 {
183   mpfr_t t, u;
184   int inexact;
185   double h, l;
186   MPFR_SAVE_EXPO_DECL (expo);
187 
188   /* Check for NAN */
189   LONGDOUBLE_NAN_ACTION (d, goto nan);
190 
191   /* Check for INF */
192   if (d > LDBL_MAX)
193     {
194       mpfr_set_inf (r, 1);
195       return 0;
196     }
197   else if (d < -LDBL_MAX)
198     {
199       mpfr_set_inf (r, -1);
200       return 0;
201     }
202   /* Check for ZERO */
203   else if (d == 0.0)
204     return mpfr_set_d (r, (double) d, rnd_mode);
205 
206   if (d >= LDBL_MAX || d <= -LDBL_MAX)
207     h = (d >= LDBL_MAX) ? LDBL_MAX : -LDBL_MAX;
208   else
209     h = (double) d; /* should not overflow */
210   l = (double) (d - (long double) h);
211 
212   MPFR_SAVE_EXPO_MARK (expo);
213 
214   mpfr_init2 (t, IEEE_DBL_MANT_DIG);
215   mpfr_init2 (u, IEEE_DBL_MANT_DIG);
216 
217   inexact = mpfr_set_d (t, h, MPFR_RNDN);
218   MPFR_ASSERTN(inexact == 0);
219   inexact = mpfr_set_d (u, l, MPFR_RNDN);
220   MPFR_ASSERTN(inexact == 0);
221   inexact = mpfr_add (r, t, u, rnd_mode);
222 
223   mpfr_clear (t);
224   mpfr_clear (u);
225 
226   MPFR_SAVE_EXPO_FREE (expo);
227   inexact = mpfr_check_range (r, inexact, rnd_mode);
228   return inexact;
229 
230  nan:
231   MPFR_SET_NAN(r);
232   MPFR_RET_NAN;
233 }
234 
235 #else
236 
237 /* Generic code */
238 int
239 mpfr_set_ld (mpfr_ptr r, long double d, mpfr_rnd_t rnd_mode)
240 {
241   mpfr_t t, u;
242   int inexact, shift_exp;
243   long double x;
244   MPFR_SAVE_EXPO_DECL (expo);
245 
246   /* Check for NAN */
247   LONGDOUBLE_NAN_ACTION (d, goto nan);
248 
249   /* Check for INF */
250   if (d > LDBL_MAX)
251     {
252       mpfr_set_inf (r, 1);
253       return 0;
254     }
255   else if (d < -LDBL_MAX)
256     {
257       mpfr_set_inf (r, -1);
258       return 0;
259     }
260   /* Check for ZERO */
261   else if (d == 0.0)
262     return mpfr_set_d (r, (double) d, rnd_mode);
263 
264   mpfr_init2 (t, MPFR_LDBL_MANT_DIG);
265   mpfr_init2 (u, IEEE_DBL_MANT_DIG);
266 
267   MPFR_SAVE_EXPO_MARK (expo);
268 
269  convert:
270   x = d;
271   MPFR_SET_ZERO (t);  /* The sign doesn't matter. */
272   shift_exp = 0; /* invariant: remainder to deal with is d*2^shift_exp */
273   while (x != (long double) 0.0)
274     {
275       /* Check overflow of double */
276       if (x > (long double) DBL_MAX || (-x) > (long double) DBL_MAX)
277         {
278           long double div9, div10, div11, div12, div13;
279 
280 #define TWO_64 18446744073709551616.0 /* 2^64 */
281 #define TWO_128 (TWO_64 * TWO_64)
282 #define TWO_256 (TWO_128 * TWO_128)
283           div9 = (long double) (double) (TWO_256 * TWO_256); /* 2^(2^9) */
284           div10 = div9 * div9;
285           div11 = div10 * div10; /* 2^(2^11) */
286           div12 = div11 * div11; /* 2^(2^12) */
287           div13 = div12 * div12; /* 2^(2^13) */
288           if (ABS (x) >= div13)
289             {
290               x /= div13; /* exact */
291               shift_exp += 8192;
292               mpfr_div_2si (t, t, 8192, MPFR_RNDZ);
293             }
294           if (ABS (x) >= div12)
295             {
296               x /= div12; /* exact */
297               shift_exp += 4096;
298               mpfr_div_2si (t, t, 4096, MPFR_RNDZ);
299             }
300           if (ABS (x) >= div11)
301             {
302               x /= div11; /* exact */
303               shift_exp += 2048;
304               mpfr_div_2si (t, t, 2048, MPFR_RNDZ);
305             }
306           if (ABS (x) >= div10)
307             {
308               x /= div10; /* exact */
309               shift_exp += 1024;
310               mpfr_div_2si (t, t, 1024, MPFR_RNDZ);
311             }
312           /* warning: we may have DBL_MAX=2^1024*(1-2^(-53)) < x < 2^1024,
313              therefore we have one extra exponent reduction step */
314           if (ABS (x) >= div9)
315             {
316               x /= div9; /* exact */
317               shift_exp += 512;
318               mpfr_div_2si (t, t, 512, MPFR_RNDZ);
319             }
320         } /* Check overflow of double */
321       else /* no overflow on double */
322         {
323           long double div9, div10, div11;
324 
325           div9 = (long double) (double) 7.4583407312002067432909653e-155;
326           /* div9 = 2^(-2^9) */
327           div10 = div9  * div9;  /* 2^(-2^10) */
328           div11 = div10 * div10; /* 2^(-2^11) if extended precision */
329           /* since -DBL_MAX <= x <= DBL_MAX, the cast to double should not
330              overflow here */
331           if (ABS(x) < div10 &&
332               div11 != (long double) 0.0 &&
333               div11 / div10 == div10) /* possible underflow */
334             {
335               long double div12, div13;
336               /* After the divisions, any bit of x must be >= div10,
337                  hence the possible division by div9. */
338               div12 = div11 * div11; /* 2^(-2^12) */
339               div13 = div12 * div12; /* 2^(-2^13) */
340               if (ABS (x) <= div13)
341                 {
342                   x /= div13; /* exact */
343                   shift_exp -= 8192;
344                   mpfr_mul_2si (t, t, 8192, MPFR_RNDZ);
345                 }
346               if (ABS (x) <= div12)
347                 {
348                   x /= div12; /* exact */
349                   shift_exp -= 4096;
350                   mpfr_mul_2si (t, t, 4096, MPFR_RNDZ);
351                 }
352               if (ABS (x) <= div11)
353                 {
354                   x /= div11; /* exact */
355                   shift_exp -= 2048;
356                   mpfr_mul_2si (t, t, 2048, MPFR_RNDZ);
357                 }
358               if (ABS (x) <= div10)
359                 {
360                   x /= div10; /* exact */
361                   shift_exp -= 1024;
362                   mpfr_mul_2si (t, t, 1024, MPFR_RNDZ);
363                 }
364               if (ABS(x) <= div9)
365                 {
366                   x /= div9;  /* exact */
367                   shift_exp -= 512;
368                   mpfr_mul_2si (t, t, 512, MPFR_RNDZ);
369                 }
370             }
371           else /* no underflow */
372             {
373               inexact = mpfr_set_d (u, (double) x, MPFR_RNDZ);
374               MPFR_ASSERTD (inexact == 0);
375               if (mpfr_add (t, t, u, MPFR_RNDZ) != 0)
376                 {
377                   if (!mpfr_number_p (t))
378                     break;
379                   /* Inexact. This cannot happen unless the C implementation
380                      "lies" on the precision or when long doubles are
381                      implemented with FP expansions like double-double on
382                      PowerPC. */
383                   if (MPFR_PREC (t) != MPFR_PREC (r) + 1)
384                     {
385                       /* We assume that MPFR_PREC (r) < MPFR_PREC_MAX.
386                          The precision MPFR_PREC (r) + 1 allows us to
387                          deduce the rounding bit and the sticky bit. */
388                       mpfr_set_prec (t, MPFR_PREC (r) + 1);
389                       goto convert;
390                     }
391                   else
392                     {
393                       mp_limb_t *tp;
394                       int rb_mask;
395 
396                       /* Since mpfr_add was inexact, the sticky bit is 1. */
397                       tp = MPFR_MANT (t);
398                       rb_mask = MPFR_LIMB_ONE <<
399                         (GMP_NUMB_BITS - 1 -
400                          (MPFR_PREC (r) & (GMP_NUMB_BITS - 1)));
401                       if (rnd_mode == MPFR_RNDN)
402                         rnd_mode = (*tp & rb_mask) ^ MPFR_IS_NEG (t) ?
403                           MPFR_RNDU : MPFR_RNDD;
404                       *tp |= rb_mask;
405                       break;
406                     }
407                 }
408               x -= (long double) mpfr_get_d1 (u); /* exact */
409             }
410         }
411     }
412   inexact = mpfr_mul_2si (r, t, shift_exp, rnd_mode);
413   mpfr_clear (t);
414   mpfr_clear (u);
415 
416   MPFR_SAVE_EXPO_FREE (expo);
417   return mpfr_check_range (r, inexact, rnd_mode);
418 
419  nan:
420   MPFR_SET_NAN(r);
421   MPFR_RET_NAN;
422 }
423 
424 #endif
425