xref: /netbsd-src/external/lgpl3/mpfr/dist/src/get_flt.c (revision ba125506a622fe649968631a56eba5d42ff57863)
1 /* mpfr_get_flt -- convert a mpfr_t to a machine single precision float
2 
3 Copyright 2009-2023 Free Software Foundation, Inc.
4 Contributed by the AriC and Caramba projects, INRIA.
5 
6 This file is part of the GNU MPFR Library.
7 
8 The GNU MPFR Library is free software; you can redistribute it and/or modify
9 it under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 3 of the License, or (at your
11 option) any later version.
12 
13 The GNU MPFR Library is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
16 License for more details.
17 
18 You should have received a copy of the GNU Lesser General Public License
19 along with the GNU MPFR Library; see the file COPYING.LESSER.  If not, see
20 https://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc.,
21 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */
22 
23 #include <float.h>     /* for FLT_MIN */
24 
25 #define MPFR_NEED_LONGLONG_H
26 #include "mpfr-impl.h"
27 
28 #include "ieee_floats.h"
29 
30 #define FLT_NEG_ZERO ((float) DBL_NEG_ZERO)
31 #define MPFR_FLT_INFM ((float) MPFR_DBL_INFM)
32 #define MPFR_FLT_INFP ((float) MPFR_DBL_INFP)
33 
34 float
mpfr_get_flt(mpfr_srcptr src,mpfr_rnd_t rnd_mode)35 mpfr_get_flt (mpfr_srcptr src, mpfr_rnd_t rnd_mode)
36 {
37   int negative;
38   mpfr_exp_t e;
39   float d;
40 
41   /* in case of NaN, +Inf, -Inf, +0, -0, the conversion from double to float
42      is exact */
43   if (MPFR_UNLIKELY (MPFR_IS_SINGULAR (src)))
44     {
45       /* for NaN, we don't propagate the sign bit */
46       return (float) mpfr_get_d (src, rnd_mode);
47     }
48 
49   e = MPFR_GET_EXP (src);
50   negative = MPFR_IS_NEG (src);
51 
52   if (MPFR_UNLIKELY(rnd_mode == MPFR_RNDA))
53     rnd_mode = negative ? MPFR_RNDD : MPFR_RNDU;
54 
55   /* FIXME: The code below assumes the IEEE-754 binary32 format
56      with subnormal support. Fix it by converting to double, then
57      to float, and in case of binary radix (for which we want
58      correct rounding), handle double-rounding issues somewhere
59      in the code? */
60 
61   /* the smallest positive normal float number is 2^(-126) = 0.5*2^(-125),
62      and the smallest positive subnormal number is 2^(-149) = 0.5*2^(-148) */
63   if (MPFR_UNLIKELY (e < -148))
64     {
65       /* |src| < 2^(-149), i.e., |src| is smaller than the smallest positive
66          subnormal number.
67          In round-to-nearest mode, 2^(-150) is rounded to zero.
68       */
69       d = negative ?
70         (rnd_mode == MPFR_RNDD ||
71          (rnd_mode == MPFR_RNDN && mpfr_cmp_si_2exp (src, -1, -150) < 0)
72          ? -FLT_MIN : FLT_NEG_ZERO) :
73         (rnd_mode == MPFR_RNDU ||
74          (rnd_mode == MPFR_RNDN && mpfr_cmp_si_2exp (src, 1, -150) > 0)
75          ? FLT_MIN : 0.0);
76       if (d != 0.0) /* we multiply FLT_MIN = 2^(-126) by FLT_EPSILON = 2^(-23)
77                        to get +-2^(-149) */
78         d *= FLT_EPSILON;
79     }
80   /* the largest normal number is 2^128*(1-2^(-24)) = 0.111...111e128 */
81   else if (MPFR_UNLIKELY (e > 128))
82     {
83       d = negative ?
84         (rnd_mode == MPFR_RNDZ || rnd_mode == MPFR_RNDU ?
85          -FLT_MAX : MPFR_FLT_INFM) :
86         (rnd_mode == MPFR_RNDZ || rnd_mode == MPFR_RNDD ?
87          FLT_MAX : MPFR_FLT_INFP);
88     }
89   else /* -148 <= e <= 127 */
90     {
91       int nbits;
92       mp_limb_t tp[MPFR_LIMBS_PER_FLT];
93       int carry;
94       double dd;
95 
96       nbits = IEEE_FLT_MANT_DIG; /* 24 */
97       if (MPFR_UNLIKELY (e < -125))
98         /*In the subnormal case, compute the exact number of significant bits*/
99         {
100           nbits += 125 + e;
101           MPFR_ASSERTD (1 <= nbits && nbits < 24);
102         }
103       carry = mpfr_round_raw_4 (tp, MPFR_MANT(src), MPFR_PREC(src), negative,
104                                 nbits, rnd_mode);
105       /* we perform the reconstruction using the 'double' type here,
106          knowing the result is exactly representable as 'float' */
107       if (MPFR_UNLIKELY(carry))
108         dd = 1.0;
109       else
110         {
111 #if MPFR_LIMBS_PER_FLT == 1
112           dd = (double) tp[0] / MP_BASE_AS_DOUBLE;
113 #else
114           mp_size_t np, i;
115           np = MPFR_PREC2LIMBS (nbits);
116           MPFR_ASSERTD(np <= MPFR_LIMBS_PER_FLT);
117           /* The following computations are exact thanks to the previous
118              mpfr_round_raw. */
119           dd = (double) tp[0] / MP_BASE_AS_DOUBLE;
120           for (i = 1 ; i < np ; i++)
121             dd = (dd + tp[i]) / MP_BASE_AS_DOUBLE;
122           /* dd is the mantissa (between 1/2 and 1) of the argument rounded
123              to 24 bits */
124 #endif
125         }
126       dd = mpfr_scale2 (dd, e);
127       if (negative)
128         dd = -dd;
129 
130       /* convert (exactly) to float */
131       d = (float) dd;
132     }
133 
134   return d;
135 }
136 
137