1 /* mpfr_set_ld -- convert a machine long double to
2 a multiple precision floating-point number
3
4 Copyright 2002-2023 Free Software Foundation, Inc.
5 Contributed by the AriC and Caramba projects, INRIA.
6
7 This file is part of the GNU MPFR Library.
8
9 The GNU MPFR Library is free software; you can redistribute it and/or modify
10 it under the terms of the GNU Lesser General Public License as published by
11 the Free Software Foundation; either version 3 of the License, or (at your
12 option) any later version.
13
14 The GNU MPFR Library is distributed in the hope that it will be useful, but
15 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
16 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
17 License for more details.
18
19 You should have received a copy of the GNU Lesser General Public License
20 along with the GNU MPFR Library; see the file COPYING.LESSER. If not, see
21 https://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc.,
22 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */
23
24 #include <float.h> /* needed so that MPFR_LDBL_MANT_DIG is correctly defined */
25
26 #define MPFR_NEED_LONGLONG_H
27 #include "mpfr-impl.h"
28
29 /* To check for +inf, one can use the test x > LDBL_MAX, as LDBL_MAX is
30 the maximum finite number representable in a long double, according
31 to DR 467; see
32 https://www.open-std.org/jtc1/sc22/wg14/www/docs/n2092.htm
33 If this fails on some platform, a test x - x != 0 might be used. */
34
35 #if defined(HAVE_LDOUBLE_IS_DOUBLE)
36
37 /* the "long double" format is the same as "double" */
38 int
mpfr_set_ld(mpfr_ptr r,long double d,mpfr_rnd_t rnd_mode)39 mpfr_set_ld (mpfr_ptr r, long double d, mpfr_rnd_t rnd_mode)
40 {
41 return mpfr_set_d (r, (double) d, rnd_mode);
42 }
43
44 #elif defined(HAVE_LDOUBLE_IEEE_EXT_LITTLE)
45
46 #if GMP_NUMB_BITS >= 64
47 # define MPFR_LIMBS_PER_LONG_DOUBLE 1
48 #elif GMP_NUMB_BITS == 32
49 # define MPFR_LIMBS_PER_LONG_DOUBLE 2
50 #elif GMP_NUMB_BITS == 16
51 # define MPFR_LIMBS_PER_LONG_DOUBLE 4
52 #elif GMP_NUMB_BITS == 8
53 # define MPFR_LIMBS_PER_LONG_DOUBLE 8
54 #else
55 #error "GMP_NUMB_BITS is assumed to be 8, 16, 32 or >= 64"
56 #endif
57 /* The hypothetical GMP_NUMB_BITS == 16 is not supported. It will trigger
58 an error below. */
59
60 /* IEEE Extended Little Endian Code */
61 int
mpfr_set_ld(mpfr_ptr r,long double d,mpfr_rnd_t rnd_mode)62 mpfr_set_ld (mpfr_ptr r, long double d, mpfr_rnd_t rnd_mode)
63 {
64 int inexact, k, cnt;
65 mpfr_t tmp;
66 mp_limb_t tmpmant[MPFR_LIMBS_PER_LONG_DOUBLE];
67 mpfr_long_double_t x;
68 mpfr_exp_t exp;
69 int signd;
70 MPFR_SAVE_EXPO_DECL (expo);
71
72 /* Check for NAN */
73 if (MPFR_UNLIKELY (DOUBLE_ISNAN (d)))
74 {
75 /* we don't propagate the sign bit */
76 MPFR_SET_NAN (r);
77 MPFR_RET_NAN;
78 }
79 /* Check for INF */
80 else if (MPFR_UNLIKELY (d > LDBL_MAX))
81 {
82 MPFR_SET_INF (r);
83 MPFR_SET_POS (r);
84 return 0;
85 }
86 else if (MPFR_UNLIKELY (d < -LDBL_MAX))
87 {
88 MPFR_SET_INF (r);
89 MPFR_SET_NEG (r);
90 return 0;
91 }
92 /* Check for ZERO */
93 else if (MPFR_UNLIKELY (d == 0.0))
94 {
95 x.ld = d;
96 MPFR_SET_ZERO (r);
97 if (x.s.sign == 1)
98 MPFR_SET_NEG(r);
99 else
100 MPFR_SET_POS(r);
101 return 0;
102 }
103
104 /* now d is neither 0, nor NaN nor Inf */
105 MPFR_SAVE_EXPO_MARK (expo);
106
107 MPFR_MANT (tmp) = tmpmant;
108 MPFR_PREC (tmp) = 64;
109
110 /* Extract sign */
111 x.ld = d;
112 signd = MPFR_SIGN_POS;
113 if (x.ld < 0.0)
114 {
115 signd = MPFR_SIGN_NEG;
116 x.ld = -x.ld;
117 }
118
119 /* Extract and normalize the significand */
120 #if MPFR_LIMBS_PER_LONG_DOUBLE == 1
121 tmpmant[0] = ((mp_limb_t) x.s.manh << 32) | ((mp_limb_t) x.s.manl);
122 count_leading_zeros (cnt, tmpmant[0]);
123 tmpmant[0] <<= cnt;
124 k = 0; /* number of limbs shifted */
125 #else /* MPFR_LIMBS_PER_LONG_DOUBLE >= 2 */
126 #if MPFR_LIMBS_PER_LONG_DOUBLE == 2
127 tmpmant[0] = (mp_limb_t) x.s.manl;
128 tmpmant[1] = (mp_limb_t) x.s.manh;
129 #elif MPFR_LIMBS_PER_LONG_DOUBLE == 4
130 tmpmant[0] = (mp_limb_t) x.s.manl;
131 tmpmant[1] = (mp_limb_t) (x.s.manl >> 16);
132 tmpmant[2] = (mp_limb_t) x.s.manh;
133 tmpmant[3] = (mp_limb_t) (x.s.manh >> 16);
134 #elif MPFR_LIMBS_PER_LONG_DOUBLE == 8
135 tmpmant[0] = (mp_limb_t) x.s.manl;
136 tmpmant[1] = (mp_limb_t) (x.s.manl >> 8);
137 tmpmant[2] = (mp_limb_t) (x.s.manl >> 16);
138 tmpmant[3] = (mp_limb_t) (x.s.manl >> 24);
139 tmpmant[4] = (mp_limb_t) x.s.manh;
140 tmpmant[5] = (mp_limb_t) (x.s.manh >> 8);
141 tmpmant[6] = (mp_limb_t) (x.s.manh >> 16);
142 tmpmant[7] = (mp_limb_t) (x.s.manh >> 24);
143 #else
144 #error "MPFR_LIMBS_PER_LONG_DOUBLE should be 1, 2, 4 or 8"
145 #endif /* MPFR_LIMBS_PER_LONG_DOUBLE >= 2 */
146 {
147 int i = MPFR_LIMBS_PER_LONG_DOUBLE;
148 MPN_NORMALIZE_NOT_ZERO (tmpmant, i);
149 k = MPFR_LIMBS_PER_LONG_DOUBLE - i;
150 count_leading_zeros (cnt, tmpmant[i - 1]);
151 if (cnt != 0)
152 mpn_lshift (tmpmant + k, tmpmant, i, cnt);
153 else if (k != 0)
154 /* since we copy {tmpmant, i} into {tmpmant + k, i}, we should work
155 decreasingly, thus call mpn_copyd */
156 mpn_copyd (tmpmant + k, tmpmant, i);
157 if (k != 0)
158 MPN_ZERO (tmpmant, k);
159 }
160 #endif /* MPFR_LIMBS_PER_LONG_DOUBLE == 1 */
161
162 /* Set exponent */
163 exp = (mpfr_exp_t) ((x.s.exph << 8) + x.s.expl); /* 15-bit unsigned int */
164 if (MPFR_UNLIKELY (exp == 0))
165 exp -= 0x3FFD;
166 else
167 exp -= 0x3FFE;
168
169 MPFR_SET_EXP (tmp, exp - cnt - k * GMP_NUMB_BITS);
170
171 /* tmp is exact */
172 inexact = mpfr_set4 (r, tmp, rnd_mode, signd);
173
174 MPFR_SAVE_EXPO_FREE (expo);
175 return mpfr_check_range (r, inexact, rnd_mode);
176 }
177
178 #elif defined(HAVE_LDOUBLE_MAYBE_DOUBLE_DOUBLE)
179
180 /* double-double code, see
181 https://gcc.gnu.org/git/?p=gcc.git;a=blob;f=libgcc/config/rs6000/ibm-ldouble-format;h=e8ada17f7696cd942e710d5b67d4149f5fcccf45;hb=HEAD */
182 int
mpfr_set_ld(mpfr_ptr r,long double d,mpfr_rnd_t rnd_mode)183 mpfr_set_ld (mpfr_ptr r, long double d, mpfr_rnd_t rnd_mode)
184 {
185 mpfr_t t, u;
186 int inexact;
187 double h, l;
188 MPFR_SAVE_EXPO_DECL (expo);
189
190 /* Check for NAN. Since we can't use isnan(), we rely on the
191 LONGDOUBLE_NAN_ACTION macro. The sign bit is not propagated. */
192 LONGDOUBLE_NAN_ACTION (d, { MPFR_SET_NAN(r); MPFR_RET_NAN; });
193
194 /* Check for INF */
195 if (d > LDBL_MAX)
196 {
197 mpfr_set_inf (r, 1);
198 return 0;
199 }
200 else if (d < -LDBL_MAX)
201 {
202 mpfr_set_inf (r, -1);
203 return 0;
204 }
205 /* Check for ZERO */
206 else if (d == 0.0)
207 return mpfr_set_d (r, (double) d, rnd_mode);
208
209 if (d >= LDBL_MAX || d <= -LDBL_MAX)
210 h = (d >= LDBL_MAX) ? LDBL_MAX : -LDBL_MAX;
211 else
212 h = (double) d; /* should not overflow */
213 l = (double) (d - (long double) h);
214
215 MPFR_SAVE_EXPO_MARK (expo);
216
217 mpfr_init2 (t, IEEE_DBL_MANT_DIG);
218 mpfr_init2 (u, IEEE_DBL_MANT_DIG);
219
220 inexact = mpfr_set_d (t, h, MPFR_RNDN);
221 MPFR_ASSERTN(inexact == 0);
222 inexact = mpfr_set_d (u, l, MPFR_RNDN);
223 MPFR_ASSERTN(inexact == 0);
224 inexact = mpfr_add (r, t, u, rnd_mode);
225
226 mpfr_clear (t);
227 mpfr_clear (u);
228
229 MPFR_SAVE_EXPO_FREE (expo);
230 inexact = mpfr_check_range (r, inexact, rnd_mode);
231 return inexact;
232 }
233
234 #else
235
236 /* Generic code */
237 int
mpfr_set_ld(mpfr_ptr r,long double d,mpfr_rnd_t rnd_mode)238 mpfr_set_ld (mpfr_ptr r, long double d, mpfr_rnd_t rnd_mode)
239 {
240 mpfr_t t, u;
241 int inexact, shift_exp;
242 long double x;
243 MPFR_SAVE_EXPO_DECL (expo);
244
245 /* Check for NAN. Since we can't use isnan(), we rely on the
246 LONGDOUBLE_NAN_ACTION macro. The sign bit is not propagated. */
247 LONGDOUBLE_NAN_ACTION (d, { MPFR_SET_NAN(r); MPFR_RET_NAN; });
248
249 /* Check for INF */
250 if (d > LDBL_MAX)
251 {
252 mpfr_set_inf (r, 1);
253 return 0;
254 }
255 else if (d < -LDBL_MAX)
256 {
257 mpfr_set_inf (r, -1);
258 return 0;
259 }
260 /* Check for ZERO */
261 else if (d == 0.0)
262 return mpfr_set_d (r, (double) d, rnd_mode);
263
264 mpfr_init2 (t, MPFR_LDBL_MANT_DIG);
265 mpfr_init2 (u, IEEE_DBL_MANT_DIG);
266
267 MPFR_SAVE_EXPO_MARK (expo);
268
269 convert:
270 x = d;
271 MPFR_SET_ZERO (t); /* The sign doesn't matter. */
272 shift_exp = 0; /* invariant: remainder to deal with is d*2^shift_exp */
273 while (x != (long double) 0.0)
274 {
275 /* Check overflow of double */
276 if (x > (long double) DBL_MAX || (-x) > (long double) DBL_MAX)
277 {
278 long double div9, div10, div11, div12, div13;
279
280 #define TWO_64 18446744073709551616.0 /* 2^64 */
281 #define TWO_128 (TWO_64 * TWO_64)
282 #define TWO_256 (TWO_128 * TWO_128)
283 div9 = (long double) (double) (TWO_256 * TWO_256); /* 2^(2^9) */
284 div10 = div9 * div9;
285 div11 = div10 * div10; /* 2^(2^11) */
286 div12 = div11 * div11; /* 2^(2^12) */
287 div13 = div12 * div12; /* 2^(2^13) */
288 if (ABS (x) >= div13)
289 {
290 x /= div13; /* exact */
291 shift_exp += 8192;
292 mpfr_div_2si (t, t, 8192, MPFR_RNDZ);
293 }
294 if (ABS (x) >= div12)
295 {
296 x /= div12; /* exact */
297 shift_exp += 4096;
298 mpfr_div_2si (t, t, 4096, MPFR_RNDZ);
299 }
300 if (ABS (x) >= div11)
301 {
302 x /= div11; /* exact */
303 shift_exp += 2048;
304 mpfr_div_2si (t, t, 2048, MPFR_RNDZ);
305 }
306 if (ABS (x) >= div10)
307 {
308 x /= div10; /* exact */
309 shift_exp += 1024;
310 mpfr_div_2si (t, t, 1024, MPFR_RNDZ);
311 }
312 /* warning: we may have DBL_MAX=2^1024*(1-2^(-53)) < x < 2^1024,
313 therefore we have one extra exponent reduction step */
314 if (ABS (x) >= div9)
315 {
316 x /= div9; /* exact */
317 shift_exp += 512;
318 mpfr_div_2si (t, t, 512, MPFR_RNDZ);
319 }
320 } /* Check overflow of double */
321 else /* no overflow on double */
322 {
323 long double div9, div10, div11;
324
325 div9 = (long double) (double) 7.4583407312002067432909653e-155;
326 /* div9 = 2^(-2^9) */
327 div10 = div9 * div9; /* 2^(-2^10) */
328 div11 = div10 * div10; /* 2^(-2^11) if extended precision */
329 /* since -DBL_MAX <= x <= DBL_MAX, the cast to double should not
330 overflow here */
331 if (ABS(x) < div10 &&
332 div11 != (long double) 0.0 &&
333 div11 / div10 == div10) /* possible underflow */
334 {
335 long double div12, div13;
336 /* After the divisions, any bit of x must be >= div10,
337 hence the possible division by div9. */
338 div12 = div11 * div11; /* 2^(-2^12) */
339 div13 = div12 * div12; /* 2^(-2^13) */
340 if (ABS (x) <= div13)
341 {
342 x /= div13; /* exact */
343 shift_exp -= 8192;
344 mpfr_mul_2si (t, t, 8192, MPFR_RNDZ);
345 }
346 if (ABS (x) <= div12)
347 {
348 x /= div12; /* exact */
349 shift_exp -= 4096;
350 mpfr_mul_2si (t, t, 4096, MPFR_RNDZ);
351 }
352 if (ABS (x) <= div11)
353 {
354 x /= div11; /* exact */
355 shift_exp -= 2048;
356 mpfr_mul_2si (t, t, 2048, MPFR_RNDZ);
357 }
358 if (ABS (x) <= div10)
359 {
360 x /= div10; /* exact */
361 shift_exp -= 1024;
362 mpfr_mul_2si (t, t, 1024, MPFR_RNDZ);
363 }
364 if (ABS(x) <= div9)
365 {
366 x /= div9; /* exact */
367 shift_exp -= 512;
368 mpfr_mul_2si (t, t, 512, MPFR_RNDZ);
369 }
370 }
371 else /* no underflow */
372 {
373 inexact = mpfr_set_d (u, (double) x, MPFR_RNDZ);
374 MPFR_ASSERTD (inexact == 0);
375 if (mpfr_add (t, t, u, MPFR_RNDZ) != 0)
376 {
377 if (!mpfr_number_p (t))
378 break;
379 /* Inexact. This cannot happen unless the C implementation
380 "lies" on the precision or when long doubles are
381 implemented with FP expansions like double-double on
382 PowerPC. */
383 if (MPFR_PREC (t) != MPFR_PREC (r) + 1)
384 {
385 /* We assume that MPFR_PREC (r) < MPFR_PREC_MAX.
386 The precision MPFR_PREC (r) + 1 allows us to
387 deduce the rounding bit and the sticky bit. */
388 mpfr_set_prec (t, MPFR_PREC (r) + 1);
389 goto convert;
390 }
391 else
392 {
393 mp_limb_t *tp;
394 int rb_mask;
395
396 /* Since mpfr_add was inexact, the sticky bit is 1. */
397 tp = MPFR_MANT (t);
398 rb_mask = MPFR_LIMB_ONE <<
399 (GMP_NUMB_BITS - 1 -
400 (MPFR_PREC (r) & (GMP_NUMB_BITS - 1)));
401 if (rnd_mode == MPFR_RNDN)
402 rnd_mode = (*tp & rb_mask) ^ MPFR_IS_NEG (t) ?
403 MPFR_RNDU : MPFR_RNDD;
404 *tp |= rb_mask;
405 break;
406 }
407 }
408 x -= (long double) mpfr_get_d1 (u); /* exact */
409 }
410 }
411 }
412 inexact = mpfr_mul_2si (r, t, shift_exp, rnd_mode);
413 mpfr_clear (t);
414 mpfr_clear (u);
415
416 MPFR_SAVE_EXPO_FREE (expo);
417 return mpfr_check_range (r, inexact, rnd_mode);
418 }
419
420 #endif
421