xref: /netbsd-src/external/lgpl3/mpfr/dist/src/get_ld.c (revision e670fd5c413e99c2f6a37901bb21c537fcd322d2)
1 /* mpfr_get_ld, mpfr_get_ld_2exp -- convert a multiple precision floating-point
2                                     number to a machine long double
3 
4 Copyright 2002-2020 Free Software Foundation, Inc.
5 Contributed by the AriC and Caramba projects, INRIA.
6 
7 This file is part of the GNU MPFR Library.
8 
9 The GNU MPFR Library is free software; you can redistribute it and/or modify
10 it under the terms of the GNU Lesser General Public License as published by
11 the Free Software Foundation; either version 3 of the License, or (at your
12 option) any later version.
13 
14 The GNU MPFR Library is distributed in the hope that it will be useful, but
15 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
16 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
17 License for more details.
18 
19 You should have received a copy of the GNU Lesser General Public License
20 along with the GNU MPFR Library; see the file COPYING.LESSER.  If not, see
21 https://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc.,
22 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */
23 
24 #include <float.h> /* needed so that MPFR_LDBL_MANT_DIG is correctly defined */
25 
26 #include "mpfr-impl.h"
27 
28 #if defined(HAVE_LDOUBLE_IS_DOUBLE)
29 
30 /* special code when "long double" is the same format as "double" */
31 long double
32 mpfr_get_ld (mpfr_srcptr x, mpfr_rnd_t rnd_mode)
33 {
34   return (long double) mpfr_get_d (x, rnd_mode);
35 }
36 
37 #elif defined(HAVE_LDOUBLE_IEEE_EXT_LITTLE)
38 
39 /* Note: The code will return a result with a 64-bit precision, even
40    if the rounding precision is only 53 bits like on FreeBSD and
41    NetBSD 6- (or with GCC's -mpc64 option to simulate this on other
42    platforms). This is consistent with how strtold behaves in these
43    cases, for instance. */
44 
45 /* special code for IEEE 754 little-endian extended format */
46 long double
47 mpfr_get_ld (mpfr_srcptr x, mpfr_rnd_t rnd_mode)
48 {
49   mpfr_long_double_t ld;
50   mpfr_t tmp;
51   int inex;
52   MPFR_SAVE_EXPO_DECL (expo);
53 
54   MPFR_SAVE_EXPO_MARK (expo);
55 
56   mpfr_init2 (tmp, MPFR_LDBL_MANT_DIG);
57   inex = mpfr_set (tmp, x, rnd_mode);
58 
59   mpfr_set_emin (-16381-63); /* emin=-16444, see below */
60   mpfr_set_emax (16384);
61   mpfr_subnormalize (tmp, mpfr_check_range (tmp, inex, rnd_mode), rnd_mode);
62   mpfr_prec_round (tmp, 64, MPFR_RNDZ); /* exact */
63   if (MPFR_UNLIKELY (MPFR_IS_SINGULAR (tmp)))
64     ld.ld = (long double) mpfr_get_d (tmp, rnd_mode);
65   else
66     {
67       mp_limb_t *tmpmant;
68       mpfr_exp_t e, denorm;
69 
70       tmpmant = MPFR_MANT (tmp);
71       e = MPFR_GET_EXP (tmp);
72       /* The smallest positive normal number is 2^(-16382), which is
73          0.5*2^(-16381) in MPFR, thus any exponent <= -16382 corresponds to a
74          subnormal number. The smallest positive subnormal number is 2^(-16445)
75          which is 0.5*2^(-16444) in MPFR thus 0 <= denorm <= 63. */
76       denorm = MPFR_UNLIKELY (e <= -16382) ? - e - 16382 + 1 : 0;
77       MPFR_ASSERTD (0 <= denorm && denorm < 64);
78 #if GMP_NUMB_BITS >= 64
79       ld.s.manl = (tmpmant[0] >> denorm);
80       ld.s.manh = (tmpmant[0] >> denorm) >> 32;
81 #elif GMP_NUMB_BITS == 32
82       if (MPFR_LIKELY (denorm == 0))
83         {
84           ld.s.manl = tmpmant[0];
85           ld.s.manh = tmpmant[1];
86         }
87       else if (denorm < 32)
88         {
89           ld.s.manl = (tmpmant[0] >> denorm) | (tmpmant[1] << (32 - denorm));
90           ld.s.manh = tmpmant[1] >> denorm;
91         }
92       else /* 32 <= denorm < 64 */
93         {
94           ld.s.manl = tmpmant[1] >> (denorm - 32);
95           ld.s.manh = 0;
96         }
97 #elif GMP_NUMB_BITS == 16
98       if (MPFR_LIKELY (denorm == 0))
99         {
100           /* manl = tmpmant[1] | tmpmant[0]
101              manh = tmpmant[3] | tmpmant[2] */
102           ld.s.manl = tmpmant[0] | ((unsigned long) tmpmant[1] << 16);
103           ld.s.manh = tmpmant[2] | ((unsigned long) tmpmant[3] << 16);
104         }
105       else if (denorm < 16)
106         {
107           /* manl = low(mant[2],denorm) | mant[1] | high(mant[0],16-denorm)
108              manh = mant[3] | high(mant[2],16-denorm) */
109           ld.s.manl = (tmpmant[0] >> denorm)
110             | ((unsigned long) tmpmant[1] << (16 - denorm))
111             | ((unsigned long) tmpmant[2] << (32 - denorm));
112           ld.s.manh = (tmpmant[2] >> denorm)
113             | ((unsigned long) tmpmant[3] << (16 - denorm));
114         }
115       else if (denorm == 16)
116         {
117           /* manl = tmpmant[2] | tmpmant[1]
118              manh = 0000000000 | tmpmant[3] */
119           ld.s.manl = tmpmant[1] | ((unsigned long) tmpmant[2] << 16);
120           ld.s.manh = tmpmant[3];
121         }
122       else if (denorm < 32)
123         {
124           /* manl = low(mant[3],denorm-16) | mant[2] | high(mant[1],32-denorm)
125              manh = high(mant[3],32-denorm) */
126           ld.s.manl = (tmpmant[1] >> (denorm - 16))
127             | ((unsigned long) tmpmant[2] << (32 - denorm))
128             | ((unsigned long) tmpmant[3] << (48 - denorm));
129           ld.s.manh = tmpmant[3] >> (denorm - 16);
130         }
131       else if (denorm == 32)
132         {
133           /* manl = tmpmant[3] | tmpmant[2]
134              manh = 0 */
135           ld.s.manl = tmpmant[2] | ((unsigned long) tmpmant[3] << 16);
136           ld.s.manh = 0;
137         }
138       else if (denorm < 48)
139         {
140           /* manl = zero(denorm-32) | tmpmant[3] | high(tmpmant[2],48-denorm)
141              manh = 0 */
142           ld.s.manl = (tmpmant[2] >> (denorm - 32))
143             | ((unsigned long) tmpmant[3] << (48 - denorm));
144           ld.s.manh = 0;
145         }
146       else /* 48 <= denorm < 64 */
147         {
148           /* we assume a right shift of 0 is identity */
149           ld.s.manl = tmpmant[3] >> (denorm - 48);
150           ld.s.manh = 0;
151         }
152 #elif GMP_NUMB_BITS == 8
153       {
154         unsigned long long mant = 0;
155         int i;
156         for (i = 0; i < 8; i++)
157           mant |= ((unsigned long) tmpmant[i] << (8*i));
158         mant >>= denorm;
159         ld.s.manl = mant;
160         ld.s.manh = mant >> 32;
161       }
162 #else
163 # error "GMP_NUMB_BITS must be 16, 32 or >= 64"
164       /* Other values have never been supported anyway. */
165 #endif
166       if (MPFR_LIKELY (denorm == 0))
167         {
168           ld.s.exph = (e + 0x3FFE) >> 8;
169           ld.s.expl = (e + 0x3FFE);
170         }
171       else
172         ld.s.exph = ld.s.expl = 0;
173       ld.s.sign = MPFR_IS_NEG (x);
174     }
175 
176   mpfr_clear (tmp);
177   MPFR_SAVE_EXPO_FREE (expo);
178   return ld.ld;
179 }
180 
181 #else
182 
183 /* generic code */
184 long double
185 mpfr_get_ld (mpfr_srcptr x, mpfr_rnd_t rnd_mode)
186 {
187   if (MPFR_UNLIKELY (MPFR_IS_SINGULAR (x)))
188     return (long double) mpfr_get_d (x, rnd_mode);
189   else /* now x is a normal non-zero number */
190     {
191       long double r; /* result */
192       double s; /* part of result */
193       MPFR_SAVE_EXPO_DECL (expo);
194 
195       MPFR_SAVE_EXPO_MARK (expo);
196 
197 #if defined(HAVE_LDOUBLE_MAYBE_DOUBLE_DOUBLE)
198       if (MPFR_LDBL_MANT_DIG == 106)
199         {
200           /* Assume double-double format (as found with the PowerPC ABI).
201              The generic code below isn't used because numbers with
202              precision > 106 would not be supported. */
203           s = mpfr_get_d (x, MPFR_RNDN); /* high part of x */
204           /* Let's first consider special cases separately. The test for
205              infinity is really needed to avoid a NaN result. The test
206              for NaN is mainly for optimization. The test for 0 is useful
207              to get the correct sign (assuming mpfr_get_d supports signed
208              zeros on the implementation). */
209           if (s == 0 || DOUBLE_ISNAN (s) || DOUBLE_ISINF (s))
210             r = (long double) s;
211           else
212             {
213               mpfr_t y, z;
214 
215               mpfr_init2 (y, mpfr_get_prec (x));
216               mpfr_init2 (z, IEEE_DBL_MANT_DIG); /* keep the precision small */
217               mpfr_set_d (z, s, MPFR_RNDN);  /* exact */
218               mpfr_sub (y, x, z, MPFR_RNDN); /* exact */
219               /* Add the second part of y (in the correct rounding mode). */
220               r = (long double) s + (long double) mpfr_get_d (y, rnd_mode);
221               mpfr_clear (z);
222               mpfr_clear (y);
223             }
224         }
225       else
226 #endif
227         {
228           long double m;
229           mpfr_exp_t sh; /* exponent shift -> x/2^sh is in the double range */
230           mpfr_t y, z;
231           int sign;
232 
233           /* First round x to the target long double precision, so that
234              all subsequent operations are exact (this avoids double rounding
235              problems). However if the format contains numbers that have more
236              precision, MPFR won't be able to generate such numbers. */
237           mpfr_init2 (y, MPFR_LDBL_MANT_DIG);
238           mpfr_init2 (z, MPFR_LDBL_MANT_DIG);
239           /* Note about the precision of z: even though IEEE_DBL_MANT_DIG is
240              sufficient, z has been set to the same precision as y so that
241              the mpfr_sub below calls mpfr_sub1sp, which is faster than the
242              generic subtraction, even in this particular case (from tests
243              done by Patrick Pelissier on a 64-bit Core2 Duo against r7285).
244              But here there is an important cancellation in the subtraction.
245              TODO: get more information about what has been tested. */
246 
247           mpfr_set (y, x, rnd_mode);
248           sh = MPFR_GET_EXP (y);
249           sign = MPFR_SIGN (y);
250           MPFR_SET_EXP (y, 0);
251           MPFR_SET_POS (y);
252 
253           r = 0.0;
254           do
255             {
256               s = mpfr_get_d (y, MPFR_RNDN); /* high part of y */
257               r += (long double) s;
258               mpfr_set_d (z, s, MPFR_RNDN);  /* exact */
259               mpfr_sub (y, y, z, MPFR_RNDN); /* exact */
260             }
261           while (!MPFR_IS_ZERO (y));
262 
263           mpfr_clear (z);
264           mpfr_clear (y);
265 
266           /* we now have to multiply back by 2^sh */
267           MPFR_ASSERTD (r > 0);
268           if (sh != 0)
269             {
270               /* An overflow may occur (example: 0.5*2^1024) */
271               while (r < 1.0)
272                 {
273                   r += r;
274                   sh--;
275                 }
276 
277               if (sh > 0)
278                 m = 2.0;
279               else
280                 {
281                   m = 0.5;
282                   sh = -sh;
283                 }
284 
285               for (;;)
286                 {
287                   if (sh % 2)
288                     r = r * m;
289                   sh >>= 1;
290                   if (sh == 0)
291                     break;
292                   m = m * m;
293                 }
294             }
295           if (sign < 0)
296             r = -r;
297         }
298       MPFR_SAVE_EXPO_FREE (expo);
299       return r;
300     }
301 }
302 
303 #endif
304 
305 /* contributed by Damien Stehle */
306 long double
307 mpfr_get_ld_2exp (long *expptr, mpfr_srcptr src, mpfr_rnd_t rnd_mode)
308 {
309   long double ret;
310   mpfr_exp_t exp;
311   mpfr_t tmp;
312 
313   if (MPFR_UNLIKELY (MPFR_IS_SINGULAR (src)))
314     return (long double) mpfr_get_d_2exp (expptr, src, rnd_mode);
315 
316   MPFR_ALIAS (tmp, src, MPFR_SIGN (src), 0);
317   ret = mpfr_get_ld (tmp, rnd_mode);
318 
319   exp = MPFR_GET_EXP (src);
320 
321   /* rounding can give 1.0, adjust back to 0.5 <= abs(ret) < 1.0 */
322   if (ret == 1.0)
323     {
324       ret = 0.5;
325       exp ++;
326     }
327   else if (ret ==  -1.0)
328     {
329       ret = -0.5;
330       exp ++;
331     }
332 
333   MPFR_ASSERTN ((ret >= 0.5 && ret < 1.0)
334                 || (ret <= -0.5 && ret > -1.0));
335   MPFR_ASSERTN (exp >= LONG_MIN && exp <= LONG_MAX);
336 
337   *expptr = exp;
338   return ret;
339 }
340