1 /* mpfr_get_flt -- convert a mpfr_t to a machine single precision float
2
3 Copyright 2009-2023 Free Software Foundation, Inc.
4 Contributed by the AriC and Caramba projects, INRIA.
5
6 This file is part of the GNU MPFR Library.
7
8 The GNU MPFR Library is free software; you can redistribute it and/or modify
9 it under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 3 of the License, or (at your
11 option) any later version.
12
13 The GNU MPFR Library is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
16 License for more details.
17
18 You should have received a copy of the GNU Lesser General Public License
19 along with the GNU MPFR Library; see the file COPYING.LESSER. If not, see
20 https://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc.,
21 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */
22
23 #include <float.h> /* for FLT_MIN */
24
25 #define MPFR_NEED_LONGLONG_H
26 #include "mpfr-impl.h"
27
28 #include "ieee_floats.h"
29
30 #define FLT_NEG_ZERO ((float) DBL_NEG_ZERO)
31 #define MPFR_FLT_INFM ((float) MPFR_DBL_INFM)
32 #define MPFR_FLT_INFP ((float) MPFR_DBL_INFP)
33
34 float
mpfr_get_flt(mpfr_srcptr src,mpfr_rnd_t rnd_mode)35 mpfr_get_flt (mpfr_srcptr src, mpfr_rnd_t rnd_mode)
36 {
37 int negative;
38 mpfr_exp_t e;
39 float d;
40
41 /* in case of NaN, +Inf, -Inf, +0, -0, the conversion from double to float
42 is exact */
43 if (MPFR_UNLIKELY (MPFR_IS_SINGULAR (src)))
44 {
45 /* for NaN, we don't propagate the sign bit */
46 return (float) mpfr_get_d (src, rnd_mode);
47 }
48
49 e = MPFR_GET_EXP (src);
50 negative = MPFR_IS_NEG (src);
51
52 if (MPFR_UNLIKELY(rnd_mode == MPFR_RNDA))
53 rnd_mode = negative ? MPFR_RNDD : MPFR_RNDU;
54
55 /* FIXME: The code below assumes the IEEE-754 binary32 format
56 with subnormal support. Fix it by converting to double, then
57 to float, and in case of binary radix (for which we want
58 correct rounding), handle double-rounding issues somewhere
59 in the code? */
60
61 /* the smallest positive normal float number is 2^(-126) = 0.5*2^(-125),
62 and the smallest positive subnormal number is 2^(-149) = 0.5*2^(-148) */
63 if (MPFR_UNLIKELY (e < -148))
64 {
65 /* |src| < 2^(-149), i.e., |src| is smaller than the smallest positive
66 subnormal number.
67 In round-to-nearest mode, 2^(-150) is rounded to zero.
68 */
69 d = negative ?
70 (rnd_mode == MPFR_RNDD ||
71 (rnd_mode == MPFR_RNDN && mpfr_cmp_si_2exp (src, -1, -150) < 0)
72 ? -FLT_MIN : FLT_NEG_ZERO) :
73 (rnd_mode == MPFR_RNDU ||
74 (rnd_mode == MPFR_RNDN && mpfr_cmp_si_2exp (src, 1, -150) > 0)
75 ? FLT_MIN : 0.0);
76 if (d != 0.0) /* we multiply FLT_MIN = 2^(-126) by FLT_EPSILON = 2^(-23)
77 to get +-2^(-149) */
78 d *= FLT_EPSILON;
79 }
80 /* the largest normal number is 2^128*(1-2^(-24)) = 0.111...111e128 */
81 else if (MPFR_UNLIKELY (e > 128))
82 {
83 d = negative ?
84 (rnd_mode == MPFR_RNDZ || rnd_mode == MPFR_RNDU ?
85 -FLT_MAX : MPFR_FLT_INFM) :
86 (rnd_mode == MPFR_RNDZ || rnd_mode == MPFR_RNDD ?
87 FLT_MAX : MPFR_FLT_INFP);
88 }
89 else /* -148 <= e <= 127 */
90 {
91 int nbits;
92 mp_limb_t tp[MPFR_LIMBS_PER_FLT];
93 int carry;
94 double dd;
95
96 nbits = IEEE_FLT_MANT_DIG; /* 24 */
97 if (MPFR_UNLIKELY (e < -125))
98 /*In the subnormal case, compute the exact number of significant bits*/
99 {
100 nbits += 125 + e;
101 MPFR_ASSERTD (1 <= nbits && nbits < 24);
102 }
103 carry = mpfr_round_raw_4 (tp, MPFR_MANT(src), MPFR_PREC(src), negative,
104 nbits, rnd_mode);
105 /* we perform the reconstruction using the 'double' type here,
106 knowing the result is exactly representable as 'float' */
107 if (MPFR_UNLIKELY(carry))
108 dd = 1.0;
109 else
110 {
111 #if MPFR_LIMBS_PER_FLT == 1
112 dd = (double) tp[0] / MP_BASE_AS_DOUBLE;
113 #else
114 mp_size_t np, i;
115 np = MPFR_PREC2LIMBS (nbits);
116 MPFR_ASSERTD(np <= MPFR_LIMBS_PER_FLT);
117 /* The following computations are exact thanks to the previous
118 mpfr_round_raw. */
119 dd = (double) tp[0] / MP_BASE_AS_DOUBLE;
120 for (i = 1 ; i < np ; i++)
121 dd = (dd + tp[i]) / MP_BASE_AS_DOUBLE;
122 /* dd is the mantissa (between 1/2 and 1) of the argument rounded
123 to 24 bits */
124 #endif
125 }
126 dd = mpfr_scale2 (dd, e);
127 if (negative)
128 dd = -dd;
129
130 /* convert (exactly) to float */
131 d = (float) dd;
132 }
133
134 return d;
135 }
136
137