xref: /netbsd-src/external/lgpl3/mpfr/dist/src/get_flt.c (revision 9fb66d812c00ebfb445c0b47dea128f32aa6fe96)
1 /* mpfr_get_flt -- convert a mpfr_t to a machine single precision float
2 
3 Copyright 2009-2020 Free Software Foundation, Inc.
4 Contributed by the AriC and Caramba projects, INRIA.
5 
6 This file is part of the GNU MPFR Library.
7 
8 The GNU MPFR Library is free software; you can redistribute it and/or modify
9 it under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 3 of the License, or (at your
11 option) any later version.
12 
13 The GNU MPFR Library is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
16 License for more details.
17 
18 You should have received a copy of the GNU Lesser General Public License
19 along with the GNU MPFR Library; see the file COPYING.LESSER.  If not, see
20 https://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc.,
21 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */
22 
23 #include <float.h>     /* for FLT_MIN */
24 
25 #define MPFR_NEED_LONGLONG_H
26 #include "mpfr-impl.h"
27 
28 #include "ieee_floats.h"
29 
30 #define FLT_NEG_ZERO ((float) DBL_NEG_ZERO)
31 #define MPFR_FLT_INFM ((float) MPFR_DBL_INFM)
32 #define MPFR_FLT_INFP ((float) MPFR_DBL_INFP)
33 
34 float
35 mpfr_get_flt (mpfr_srcptr src, mpfr_rnd_t rnd_mode)
36 {
37   int negative;
38   mpfr_exp_t e;
39   float d;
40 
41   /* in case of NaN, +Inf, -Inf, +0, -0, the conversion from double to float
42      is exact */
43   if (MPFR_UNLIKELY (MPFR_IS_SINGULAR (src)))
44     return (float) mpfr_get_d (src, rnd_mode);
45 
46   e = MPFR_GET_EXP (src);
47   negative = MPFR_IS_NEG (src);
48 
49   if (MPFR_UNLIKELY(rnd_mode == MPFR_RNDA))
50     rnd_mode = negative ? MPFR_RNDD : MPFR_RNDU;
51 
52   /* FIXME: The code below assumes the IEEE-754 binary32 format
53      with subnormal support. Fix it by converting to double, then
54      to float, and in case of binary radix (for which we want
55      correct rounding), handle double-rounding issues somewhere
56      in the code? */
57 
58   /* the smallest positive normal float number is 2^(-126) = 0.5*2^(-125),
59      and the smallest positive subnormal number is 2^(-149) = 0.5*2^(-148) */
60   if (MPFR_UNLIKELY (e < -148))
61     {
62       /* |src| < 2^(-149), i.e., |src| is smaller than the smallest positive
63          subnormal number.
64          In round-to-nearest mode, 2^(-150) is rounded to zero.
65       */
66       d = negative ?
67         (rnd_mode == MPFR_RNDD ||
68          (rnd_mode == MPFR_RNDN && mpfr_cmp_si_2exp (src, -1, -150) < 0)
69          ? -FLT_MIN : FLT_NEG_ZERO) :
70         (rnd_mode == MPFR_RNDU ||
71          (rnd_mode == MPFR_RNDN && mpfr_cmp_si_2exp (src, 1, -150) > 0)
72          ? FLT_MIN : 0.0);
73       if (d != 0.0) /* we multiply FLT_MIN = 2^(-126) by FLT_EPSILON = 2^(-23)
74                        to get +-2^(-149) */
75         d *= FLT_EPSILON;
76     }
77   /* the largest normal number is 2^128*(1-2^(-24)) = 0.111...111e128 */
78   else if (MPFR_UNLIKELY (e > 128))
79     {
80       d = negative ?
81         (rnd_mode == MPFR_RNDZ || rnd_mode == MPFR_RNDU ?
82          -FLT_MAX : MPFR_FLT_INFM) :
83         (rnd_mode == MPFR_RNDZ || rnd_mode == MPFR_RNDD ?
84          FLT_MAX : MPFR_FLT_INFP);
85     }
86   else /* -148 <= e <= 127 */
87     {
88       int nbits;
89       mp_limb_t tp[MPFR_LIMBS_PER_FLT];
90       int carry;
91       double dd;
92 
93       nbits = IEEE_FLT_MANT_DIG; /* 24 */
94       if (MPFR_UNLIKELY (e < -125))
95         /*In the subnormal case, compute the exact number of significant bits*/
96         {
97           nbits += 125 + e;
98           MPFR_ASSERTD (1 <= nbits && nbits < 24);
99         }
100       carry = mpfr_round_raw_4 (tp, MPFR_MANT(src), MPFR_PREC(src), negative,
101                                 nbits, rnd_mode);
102       /* we perform the reconstruction using the 'double' type here,
103          knowing the result is exactly representable as 'float' */
104       if (MPFR_UNLIKELY(carry))
105         dd = 1.0;
106       else
107         {
108 #if MPFR_LIMBS_PER_FLT == 1
109           dd = (double) tp[0] / MP_BASE_AS_DOUBLE;
110 #else
111           mp_size_t np, i;
112           np = MPFR_PREC2LIMBS (nbits);
113           MPFR_ASSERTD(np <= MPFR_LIMBS_PER_FLT);
114           /* The following computations are exact thanks to the previous
115              mpfr_round_raw. */
116           dd = (double) tp[0] / MP_BASE_AS_DOUBLE;
117           for (i = 1 ; i < np ; i++)
118             dd = (dd + tp[i]) / MP_BASE_AS_DOUBLE;
119           /* dd is the mantissa (between 1/2 and 1) of the argument rounded
120              to 24 bits */
121 #endif
122         }
123       dd = mpfr_scale2 (dd, e);
124       if (negative)
125         dd = -dd;
126 
127       /* convert (exactly) to float */
128       d = (float) dd;
129     }
130 
131   return d;
132 }
133 
134