1 // std::to_chars implementation for floating-point types -*- C++ -*-
2
3 // Copyright (C) 2020-2022 Free Software Foundation, Inc.
4 //
5 // This file is part of the GNU ISO C++ Library. This library is free
6 // software; you can redistribute it and/or modify it under the
7 // terms of the GNU General Public License as published by the
8 // Free Software Foundation; either version 3, or (at your option)
9 // any later version.
10
11 // This library is distributed in the hope that it will be useful,
12 // but WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 // GNU General Public License for more details.
15
16 // Under Section 7 of GPL version 3, you are granted additional
17 // permissions described in the GCC Runtime Library Exception, version
18 // 3.1, as published by the Free Software Foundation.
19
20 // You should have received a copy of the GNU General Public License and
21 // a copy of the GCC Runtime Library Exception along with this program;
22 // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23 // <http://www.gnu.org/licenses/>.
24
25 #include <charconv>
26
27 #include <bit>
28 #include <cfenv>
29 #include <cassert>
30 #include <cmath>
31 #include <cstdio>
32 #include <cstring>
33 #if __has_include(<langinfo.h>)
34 # include <langinfo.h> // for nl_langinfo
35 #endif
36 #include <optional>
37 #include <string_view>
38 #include <type_traits>
39
40 #ifdef _GLIBCXX_LONG_DOUBLE_ALT128_COMPAT
41 #ifndef __LONG_DOUBLE_IBM128__
42 #error "floating_to_chars.cc must be compiled with -mabi=ibmlongdouble"
43 #endif
44 // sprintf for __ieee128
45 extern "C" int __sprintfieee128(char*, const char*, ...);
46 #endif
47
48 // This implementation crucially assumes float/double have the
49 // IEEE binary32/binary64 formats.
50 #if _GLIBCXX_FLOAT_IS_IEEE_BINARY32 && _GLIBCXX_DOUBLE_IS_IEEE_BINARY64 \
51 /* And it also assumes that uint64_t POW10_SPLIT_2[3133][3] is valid. */\
52 && __SIZE_WIDTH__ >= 32
53
54 // Determine the binary format of 'long double'.
55
56 // We support the binary64, float80 (i.e. x86 80-bit extended precision),
57 // binary128, and ibm128 formats.
58 #define LDK_UNSUPPORTED 0
59 #define LDK_BINARY64 1
60 #define LDK_FLOAT80 2
61 #define LDK_BINARY128 3
62 #define LDK_IBM128 4
63
64 #if __LDBL_MANT_DIG__ == __DBL_MANT_DIG__
65 # define LONG_DOUBLE_KIND LDK_BINARY64
66 #elif __LDBL_MANT_DIG__ == 64
67 # define LONG_DOUBLE_KIND LDK_FLOAT80
68 #elif __LDBL_MANT_DIG__ == 113
69 # define LONG_DOUBLE_KIND LDK_BINARY128
70 #elif __LDBL_MANT_DIG__ == 106
71 # define LONG_DOUBLE_KIND LDK_IBM128
72 #else
73 # define LONG_DOUBLE_KIND LDK_UNSUPPORTED
74 #endif
75
76 // For now we only support __float128 when it's the powerpc64 __ieee128 type.
77 #if defined _GLIBCXX_LONG_DOUBLE_ALT128_COMPAT && __FLT128_MANT_DIG__ == 113
78 // Define overloads of std::to_chars for __float128.
79 # define FLOAT128_TO_CHARS 1
80 #endif
81
82 #ifdef FLOAT128_TO_CHARS
83 using F128_type = __float128;
84 #else
85 using F128_type = void;
86 #endif
87
88 namespace
89 {
90 #if defined __SIZEOF_INT128__
91 using uint128_t = unsigned __int128;
92 #else
93 # include "uint128_t.h"
94 #endif
95
96 namespace ryu
97 {
98 #include "ryu/common.h"
99 #include "ryu/digit_table.h"
100 #include "ryu/d2s_intrinsics.h"
101 #include "ryu/d2s_full_table.h"
102 #include "ryu/d2fixed_full_table.h"
103 #include "ryu/f2s_intrinsics.h"
104 #include "ryu/d2s.c"
105 #include "ryu/d2fixed.c"
106 #include "ryu/f2s.c"
107
108 namespace generic128
109 {
110 // Put the generic Ryu bits in their own namespace to avoid name conflicts.
111 # include "ryu/generic_128.h"
112 # include "ryu/ryu_generic_128.h"
113 # include "ryu/generic_128.c"
114 } // namespace generic128
115
116 using generic128::floating_decimal_128;
117 using generic128::generic_binary_to_decimal;
118
119 int
to_chars(const floating_decimal_128 v,char * const result)120 to_chars(const floating_decimal_128 v, char* const result)
121 { return generic128::generic_to_chars(v, result); }
122 } // namespace ryu
123
124 // A traits class that contains pertinent information about the binary
125 // format of each of the floating-point types we support.
126 template<typename T>
127 struct floating_type_traits
128 { };
129
130 template<>
131 struct floating_type_traits<float>
132 {
133 static constexpr int mantissa_bits = 23;
134 static constexpr int exponent_bits = 8;
135 static constexpr bool has_implicit_leading_bit = true;
136 using mantissa_t = uint32_t;
137 using shortest_scientific_t = ryu::floating_decimal_32;
138
139 static constexpr uint64_t pow10_adjustment_tab[]
140 = { 0b0000000000011101011100110101100101101110000000000000000000000000 };
141 };
142
143 template<>
144 struct floating_type_traits<double>
145 {
146 static constexpr int mantissa_bits = 52;
147 static constexpr int exponent_bits = 11;
148 static constexpr bool has_implicit_leading_bit = true;
149 using mantissa_t = uint64_t;
150 using shortest_scientific_t = ryu::floating_decimal_64;
151
152 static constexpr uint64_t pow10_adjustment_tab[]
153 = { 0b0000000000000000000000011000110101110111000001100101110000111100,
154 0b0111100011110101011000011110000000110110010101011000001110011111,
155 0b0101101100000000011100100100111100110110110100010001010101110000,
156 0b0011110010111000101111110101100011101100010001010000000101100111,
157 0b0001010000011001011100100001010000010101101000001101000000000000 };
158 };
159
160 #if LONG_DOUBLE_KIND == LDK_BINARY128 || defined FLOAT128_TO_CHARS
161 // Traits for the IEEE binary128 format.
162 struct floating_type_traits_binary128
163 {
164 static constexpr int mantissa_bits = 112;
165 static constexpr int exponent_bits = 15;
166 static constexpr bool has_implicit_leading_bit = true;
167 using mantissa_t = uint128_t;
168 using shortest_scientific_t = ryu::floating_decimal_128;
169
170 static constexpr uint64_t pow10_adjustment_tab[]
171 = { 0b0000000000000000000000000000000000000000000000000100000010000000,
172 0b1011001111110100000100010101101110011100100110000110010110011000,
173 0b1010100010001101111111000000001101010010100010010000111011110111,
174 0b1011111001110001111000011111000010110111000111110100101010100101,
175 0b0110100110011110011011000011000010011001110001001001010011100011,
176 0b0000011111110010101111101011101010000110011111100111001110100111,
177 0b0100010101010110000010111011110100000010011001001010001110111101,
178 0b1101110111000010001101100000110100000111001001101011000101011011,
179 0b0100111011101101010000001101011000101100101110010010110000101011,
180 0b0100000110111000000110101000010011101000110100010110000011101101,
181 0b1011001101001000100001010001100100001111011101010101110001010110,
182 0b1000000001000000101001110010110010001111101101010101001100000110,
183 0b0101110110100110000110000001001010111110001110010000111111010011,
184 0b1010001111100111000100011100100100111100100101000001011001000111,
185 0b1010011000011100110101100111001011100101111111100001110100000100,
186 0b1100011100100010100000110001001010000000100000001001010111011101,
187 0b0101110000100011001111101101000000100110000010010111010001111010,
188 0b0100111100011010110111101000100110000111001001101100000001111100,
189 0b1100100100111110101011000100000101011010110111000111110100110101,
190 0b0110010000010111010100110011000000111010000010111011010110000100,
191 0b0101001001010010110111010111000101011100000111100111000001110010,
192 0b1101111111001011101010110001000111011010111101001011010110100100,
193 0b0001000100110000011111101011001101110010110110010000000011100100,
194 0b0001000000000101001001001000000000011000100011001110101001001110,
195 0b0010010010001000111010011011100001000110011011011110110100111000,
196 0b0000100110101100000111100010100100011100110111011100001111001100,
197 0b1011111010001110001100000011110111111111100000001011111111101100,
198 0b0000011100001111010101110000100110111100101101110111101001000001,
199 0b1100010001110110111100001001001101101000011100000010110101001011,
200 0b0100101001101011111001011110101101100011011111011100101010101111,
201 0b0001101001111001110000101101101100001011010001011110011101000010,
202 0b1111000000101001101111011010110011101110100001011011001011100010,
203 0b0101001010111101101100001111100010010110001101001000001101100100,
204 0b0101100101011110001100101011111000111001111001001001101101100001,
205 0b1111001101010010100100011011000110110010001111000111010001001101,
206 0b0001110010011000000001000110110111011000011100001000011001110111,
207 0b0100001011011011011011110011101100100101111111101100101000001110,
208 0b0101011110111101010111100111101111000101111111111110100011011010,
209 0b1110101010001001110100000010110111010111111010111110100110010110,
210 0b1010001111100001001100101000110100001100011100110010000011010111,
211 0b1111111101101111000100111100000101011000001110011011101010111001,
212 0b1111101100001110100101111101011001000100000101110000110010100011,
213 0b1001010110110101101101000101010001010000101011011111010011010000,
214 0b0111001110110011101001100111000001000100001010110000010000001101,
215 0b0101111100111110100111011001111001111011011110010111010011101010,
216 0b1110111000000001100100111001100100110001011011001110101111110111,
217 0b0001010001001101010111101010011111000011110001101101011001111111,
218 0b0101000011100011010010001101100001011101011010100110101100100010,
219 0b0001000101011000100101111100110110000101101101111000110001001011,
220 0b0101100101001011011000010101000000010100011100101101000010011111,
221 0b1000010010001011101001011010100010111011110100110011011000100111,
222 0b1000011011100001010111010111010011101100100010010010100100101001,
223 0b1001001001010111110101000010111010000000101111010100001010010010,
224 0b0011011110110010010101111011000001000000000011011111000011111011,
225 0b1011000110100011001110000001000100000001011100010111010010011110,
226 0b0111101110110101110111110000011000000100011100011000101101101110,
227 0b1001100101111011011100011110101011001111100111101010101010110111,
228 0b1100110010010001100011001111010000000100011101001111011101001111,
229 0b1000111001111010100101000010000100000001001100101010001011001101,
230 0b0011101011110000110010100101010100110010100001000010101011111101,
231 0b1100000000000110000010101011000000011101000110011111100010111111,
232 0b0010100110000011011100010110111100010110101100110011101110001101,
233 0b0010111101010011111000111001111100110111111100100011110001101110,
234 0b1001110111001001101001001001011000010100110001000000100011010110,
235 0b0011110101100111011011111100001000011001010100111100100101111010,
236 0b0010001101000011000010100101110000010101101000100110000100001010,
237 0b0010000010100110010101100101110011101111000111111111001001100001,
238 0b0100111111011011011011100111111011000010011101101111011111110110,
239 0b1111111111010110101011101000100101110100001110001001101011100111,
240 0b1011111101000101110000111100100010111010100001010000010010110010,
241 0b1111010101001011101011101010000100110110001110111100100110111111,
242 0b1011001101000001001101000010101010010110010001100001011100011010,
243 0b0101001011011101010001110100010000010001111100100100100001001101,
244 0b0010100000111001100011000101100101000001111100111001101000000010,
245 0b1011001111010101011001000100100110100100110111110100000110111000,
246 0b0101011111010011100011010010111101110010100001111111100010001001,
247 0b0010111011101100100000000000001111111010011101100111100001001101,
248 0b1101000000000000000000000000000000000000000000000000000000000000 };
249 };
250
251 # ifdef FLOAT128_TO_CHARS
252 template<>
253 struct floating_type_traits<__float128> : floating_type_traits_binary128
254 { };
255 # endif
256 #endif
257
258 #if LONG_DOUBLE_KIND == LDK_BINARY64
259 // When long double is equivalent to double, we just forward the long double
260 // overloads to the double overloads, so we don't need to define a
261 // floating_type_traits<long double> specialization in this case.
262 #elif LONG_DOUBLE_KIND == LDK_FLOAT80
263 template<>
264 struct floating_type_traits<long double>
265 {
266 static constexpr int mantissa_bits = 64;
267 static constexpr int exponent_bits = 15;
268 static constexpr bool has_implicit_leading_bit = false;
269 using mantissa_t = uint64_t;
270 using shortest_scientific_t = ryu::floating_decimal_128;
271
272 static constexpr uint64_t pow10_adjustment_tab[]
273 = { 0b0000000000000000000000000000110101011111110100010100110000011101,
274 0b1001100101001111010011011111101000101111110001011001011101110000,
275 0b0000101111111011110010001000001010111101011110111111010100011001,
276 0b0011100000011111001101101011111001111100100010000101001111101001,
277 0b0100100100000000100111010010101110011000110001101101110011001010,
278 0b0111100111100010100000010011000010010110101111110101000011110100,
279 0b1010100111100010011110000011011101101100010110000110101010101010,
280 0b0000001111001111000000101100111011011000101000110011101100110010,
281 0b0111000011100100101101010100001101111110101111001000010011111111,
282 0b0010111000100110100100100010101100111010110001101010010111001000,
283 0b0000100000010110000011001001000111000001111010100101101000001111,
284 0b0010101011101000111100001011000010011101000101010010010000101111,
285 0b1011111011101101110010101011010001111000101000101101011001100011,
286 0b1010111011011011110111110011001010000010011001110100101101000101,
287 0b0011000001110110011010010000011100100011001011001100001101010110,
288 0b0100011111011000111111101000011110000010111110101001000000001001,
289 0b1110000001110001001101101110011000100000001010000111100010111010,
290 0b1110001001010011101000111000001000010100110000010110100011110000,
291 0b0000011010110000110001111000011111000011001101001101001001000110,
292 0b1010010111001000101001100101010110100100100010010010000101000010,
293 0b1011001110000111100010100110000011100011111001110111001100000101,
294 0b0110101001001000010110001000010001010101110101100001111100011001,
295 0b1111100011110101011110011010101001010010100011000010110001101001,
296 0b0100000100001000111101011100010011011111011001000000001100011000,
297 0b1110111111000111100101110111110000000011001110011100011011011001,
298 0b1100001100100000010001100011011000111011110000110011010101000011,
299 0b1111111011100111011101001111111000010000001111010111110010000100,
300 0b1110111001111110101111000101000000001010001110011010001000111010,
301 0b1000010001011000101111111010110011111101110101101001111000111010,
302 0b0100000111101001000111011001101000001010111011101001101111000100,
303 0b0000011100110001000111011100111100110001101111111010110111100000,
304 0b0000011101011100100110010011110101010100010011110010010111010000,
305 0b0011011001100111110101111100001001101110101101001110110011110110,
306 0b1011000101000001110100111001100100111100110011110000000001101000,
307 0b1011100011110100001001110101010110111001000000001011101001011110,
308 0b1111001010010010100000010110101010101011101000101000000000001100,
309 0b1000001111100100111001110101100001010011111111000001000011110000,
310 0b0001011101001000010000101101111000001110101100110011001100110111,
311 0b1110011100000010101011011111001010111101111110100000011100000011,
312 0b1001110110011100101010011110100010110001001110110000101011100110,
313 0b1001101000100011100111010000011011100001000000110101100100001001,
314 0b1010111000101000101101010111000010001100001010100011111100000100,
315 0b0111101000100011000101101011111011100010001101110111001111001011,
316 0b1110100111010110001110110110000000010110100011110000010001111100,
317 0b1100010100011010001011001000111001010101011110100101011001000000,
318 0b0000110001111001100110010110111010101101001101000000000010010101,
319 0b0001110111101000001111101010110010010000111110111100000111110100,
320 0b0111110111001001111000110001101101001010101110110101111110000100,
321 0b0000111110111010101111100010111010011100010110011011011001000001,
322 0b1010010100100100101110111111111000101100000010111111101101000110,
323 0b1000100111111101100011001101000110001000000100010101010100001101,
324 0b1100101010101000111100101100001000110001110010100000000010110101,
325 0b1010000100111101100100101010010110100010000000110101101110000100,
326 0b1011111011110001110000100100000000001010111010001101100000100100,
327 0b0111101101100011001110011100000001000101101101111000100111011111,
328 0b0100111010010011011001010011110100001100111010010101111111100011,
329 0b0010001001011000111000001100110111110111110010100011000110110110,
330 0b0101010110000000010000100000110100111011111101000100000111010010,
331 0b0110000011011101000001010100110101101110011100110101000000001001,
332 0b1101100110100000011000001111000100100100110001100110101010101100,
333 0b0010100101010110010010001010101000011111111111001011001010001111,
334 0b0111001010001111001100111001010101001000110101000011110000001000,
335 0b0110010011001001001111110001010010001011010010001101110110110011,
336 0b0110010100111011000100111000001001101011111001110010111110111111,
337 0b0101110111001001101100110100101001110010101110011001101110001000,
338 0b0100110101010111011010001100010111100011010011111001010100111000,
339 0b0111000110110111011110100100010111000110000110110110110001111110,
340 0b1000101101010100100100111110100011110110110010011001110011110101,
341 0b1001101110101001010100111101101011000101000010110101101111110000,
342 0b0100100101001011011001001011000010001101001010010001010110101000,
343 0b0010100001001011100110101000010110000111000111000011100101011011,
344 0b0110111000011001111101101011111010001000000010101000101010011110,
345 0b1000110110100001111011000001111100001001000000010110010100100100,
346 0b1001110100011111100111101011010000010101011100101000010010100110,
347 0b0001010110101110100010101010001110110110100011101010001001111100,
348 0b1010100101101100000010110011100110100010010000100100001110000100,
349 0b0001000000010000001010000010100110000001110100111001110111101101,
350 0b1100000000000000000000000000000000000000000000000000000000000000 };
351 };
352 #elif LONG_DOUBLE_KIND == LDK_BINARY128
353 template<>
354 struct floating_type_traits<long double> : floating_type_traits_binary128
355 { };
356 #elif LONG_DOUBLE_KIND == LDK_IBM128
357 template<>
358 struct floating_type_traits<long double>
359 {
360 static constexpr int mantissa_bits = 105;
361 static constexpr int exponent_bits = 11;
362 static constexpr bool has_implicit_leading_bit = true;
363 using mantissa_t = uint128_t;
364 using shortest_scientific_t = ryu::floating_decimal_128;
365
366 static constexpr uint64_t pow10_adjustment_tab[]
367 = { 0b0000000000000000000000000000000000000000000000001000000100000000,
368 0b0000000000000000000100000000000000000000001000000000000000000010,
369 0b0000100000000000000000001001000000000000000001100100000000000000,
370 0b0011000000000000000000000000000001110000010000000000000000000000,
371 0b0000100000000000001000000000000000000000000000100000000000000000 };
372 };
373 #endif
374
375 // An IEEE-style decomposition of a floating-point value of type T.
376 template<typename T>
377 struct ieee_t
378 {
379 typename floating_type_traits<T>::mantissa_t mantissa;
380 uint32_t biased_exponent;
381 bool sign;
382 };
383
384 // Decompose the floating-point value into its IEEE components.
385 template<typename T>
386 ieee_t<T>
get_ieee_repr(const T value)387 get_ieee_repr(const T value)
388 {
389 using mantissa_t = typename floating_type_traits<T>::mantissa_t;
390 constexpr int mantissa_bits = floating_type_traits<T>::mantissa_bits;
391 constexpr int exponent_bits = floating_type_traits<T>::exponent_bits;
392 constexpr int total_bits = mantissa_bits + exponent_bits + 1;
393
394 constexpr auto get_uint_t = [] {
395 if constexpr (total_bits <= 32)
396 return uint32_t{};
397 else if constexpr (total_bits <= 64)
398 return uint64_t{};
399 else if constexpr (total_bits <= 128)
400 return uint128_t{};
401 };
402 using uint_t = decltype(get_uint_t());
403 uint_t value_bits = 0;
404 memcpy(&value_bits, &value, sizeof(value));
405
406 ieee_t<T> ieee_repr;
407 ieee_repr.mantissa
408 = static_cast<mantissa_t>(value_bits & ((uint_t{1} << mantissa_bits) - 1u));
409 value_bits >>= mantissa_bits;
410 ieee_repr.biased_exponent
411 = static_cast<uint32_t>(value_bits & ((uint_t{1} << exponent_bits) - 1u));
412 value_bits >>= exponent_bits;
413 ieee_repr.sign = (value_bits & 1) != 0;
414 return ieee_repr;
415 }
416
417 #if LONG_DOUBLE_KIND == LDK_IBM128
418 template<>
419 ieee_t<long double>
get_ieee_repr(const long double value)420 get_ieee_repr(const long double value)
421 {
422 // The layout of __ibm128 isn't compatible with the standard IEEE format.
423 // So we transform it into an IEEE-compatible format, suitable for
424 // consumption by the generic Ryu API, with an 11-bit exponent and 105-bit
425 // mantissa (plus an implicit leading bit). We use the exponent and sign
426 // of the high part, and we merge the mantissa of the high part with the
427 // mantissa (and the implicit leading bit) of the low part.
428 uint64_t value_bits[2] = {};
429 memcpy(value_bits, &value, sizeof(value_bits));
430
431 const uint64_t value_hi = value_bits[0];
432 const uint64_t value_lo = value_bits[1];
433
434 uint64_t mantissa_hi = value_hi & ((1ull << 52) - 1);
435 unsigned exponent_hi = (value_hi >> 52) & ((1ull << 11) - 1);
436 const int sign_hi = (value_hi >> 63) & 1;
437
438 uint64_t mantissa_lo = value_lo & ((1ull << 52) - 1);
439 const unsigned exponent_lo = (value_lo >> 52) & ((1ull << 11) - 1);
440 const int sign_lo = (value_lo >> 63) & 1;
441
442 {
443 // The following code for adjusting the low-part mantissa to combine
444 // it with the high-part mantissa is taken from the glibc source file
445 // sysdeps/ieee754/ldbl-128ibm/printf_fphex.c.
446 mantissa_lo <<= 7;
447 if (exponent_lo != 0)
448 mantissa_lo |= (1ull << (52 + 7));
449 else
450 mantissa_lo <<= 1;
451
452 const int ediff = exponent_hi - exponent_lo - 53;
453 if (ediff > 63)
454 mantissa_lo = 0;
455 else if (ediff > 0)
456 mantissa_lo >>= ediff;
457 else if (ediff < 0)
458 mantissa_lo <<= -ediff;
459
460 if (sign_lo != sign_hi && mantissa_lo != 0)
461 {
462 mantissa_lo = (1ull << 60) - mantissa_lo;
463 if (mantissa_hi == 0)
464 {
465 mantissa_hi = 0xffffffffffffeLL | (mantissa_lo >> 59);
466 mantissa_lo = 0xfffffffffffffffLL & (mantissa_lo << 1);
467 exponent_hi--;
468 }
469 else
470 mantissa_hi--;
471 }
472 }
473
474 ieee_t<long double> ieee_repr;
475 ieee_repr.mantissa = ((uint128_t{mantissa_hi} << 64)
476 | (uint128_t{mantissa_lo} << 4)) >> 11;
477 ieee_repr.biased_exponent = exponent_hi;
478 ieee_repr.sign = sign_hi;
479 return ieee_repr;
480 }
481 #endif
482
483 // Invoke Ryu to obtain the shortest scientific form for the given
484 // floating-point number.
485 template<typename T>
486 typename floating_type_traits<T>::shortest_scientific_t
floating_to_shortest_scientific(const T value)487 floating_to_shortest_scientific(const T value)
488 {
489 if constexpr (std::is_same_v<T, float>)
490 return ryu::floating_to_fd32(value);
491 else if constexpr (std::is_same_v<T, double>)
492 return ryu::floating_to_fd64(value);
493 else if constexpr (std::is_same_v<T, long double>
494 || std::is_same_v<T, F128_type>)
495 {
496 constexpr int mantissa_bits
497 = floating_type_traits<T>::mantissa_bits;
498 constexpr int exponent_bits
499 = floating_type_traits<T>::exponent_bits;
500 constexpr bool has_implicit_leading_bit
501 = floating_type_traits<T>::has_implicit_leading_bit;
502
503 const auto [mantissa, exponent, sign] = get_ieee_repr(value);
504 return ryu::generic_binary_to_decimal(mantissa, exponent, sign,
505 mantissa_bits, exponent_bits,
506 !has_implicit_leading_bit);
507 }
508 }
509
510 // This subroutine returns true if the shortest scientific form fd is a
511 // positive power of 10, and the floating-point number that has this shortest
512 // scientific form is smaller than this power of 10.
513 //
514 // For instance, the exactly-representable 64-bit number
515 // 99999999999999991611392.0 has the shortest scientific form 1e23, so its
516 // exact value is smaller than its shortest scientific form.
517 //
518 // For these powers of 10 the length of the fixed form is one digit less
519 // than what the scientific exponent suggests.
520 //
521 // This subroutine inspects a lookup table to detect when fd is such a
522 // "rounded up" power of 10.
523 template<typename T>
524 bool
is_rounded_up_pow10_p(const typename floating_type_traits<T>::shortest_scientific_t fd)525 is_rounded_up_pow10_p(const typename
526 floating_type_traits<T>::shortest_scientific_t fd)
527 {
528 if (fd.exponent < 0 || fd.mantissa != 1) [[likely]]
529 return false;
530
531 constexpr auto& pow10_adjustment_tab
532 = floating_type_traits<T>::pow10_adjustment_tab;
533 __glibcxx_assert(fd.exponent/64 < (int)std::size(pow10_adjustment_tab));
534 return (pow10_adjustment_tab[fd.exponent/64]
535 & (1ull << (63 - fd.exponent%64)));
536 }
537
538 int
get_mantissa_length(const ryu::floating_decimal_32 fd)539 get_mantissa_length(const ryu::floating_decimal_32 fd)
540 { return ryu::decimalLength9(fd.mantissa); }
541
542 int
get_mantissa_length(const ryu::floating_decimal_64 fd)543 get_mantissa_length(const ryu::floating_decimal_64 fd)
544 { return ryu::decimalLength17(fd.mantissa); }
545
546 int
get_mantissa_length(const ryu::floating_decimal_128 fd)547 get_mantissa_length(const ryu::floating_decimal_128 fd)
548 { return ryu::generic128::decimalLength(fd.mantissa); }
549
550 #if !defined __SIZEOF_INT128__
551 // An implementation of base-10 std::to_chars for the uint128_t class type,
552 // used by targets that lack __int128.
553 std::to_chars_result
to_chars(char * first,char * const last,uint128_t x)554 to_chars(char* first, char* const last, uint128_t x)
555 {
556 const int len = ryu::generic128::decimalLength(x);
557 if (last - first < len)
558 return {last, std::errc::value_too_large};
559 if (x == 0)
560 {
561 *first++ = '0';
562 return {first, std::errc{}};
563 }
564 for (int i = 0; i < len; ++i)
565 {
566 first[len - 1 - i] = '0' + static_cast<char>(x % 10);
567 x /= 10;
568 }
569 __glibcxx_assert(x == 0);
570 return {first + len, std::errc{}};
571 }
572 #endif
573 } // anon namespace
574
575 namespace std _GLIBCXX_VISIBILITY(default)
576 {
577 _GLIBCXX_BEGIN_NAMESPACE_VERSION
578
579 // This subroutine of __floating_to_chars_* handles writing nan, inf and 0 in
580 // all formatting modes.
581 template<typename T>
582 static optional<to_chars_result>
__handle_special_value(char * first,char * const last,const T value,const chars_format fmt,const int precision)583 __handle_special_value(char* first, char* const last, const T value,
584 const chars_format fmt, const int precision)
585 {
586 __glibcxx_assert(precision >= 0);
587
588 string_view str;
589 switch (__builtin_fpclassify(FP_NAN, FP_INFINITE, FP_NORMAL, FP_SUBNORMAL,
590 FP_ZERO, value))
591 {
592 case FP_INFINITE:
593 str = "-inf";
594 break;
595
596 case FP_NAN:
597 str = "-nan";
598 break;
599
600 case FP_ZERO:
601 break;
602
603 default:
604 case FP_SUBNORMAL:
605 case FP_NORMAL: [[likely]]
606 return nullopt;
607 }
608
609 if (!str.empty())
610 {
611 // We're formatting +-inf or +-nan.
612 if (!__builtin_signbit(value))
613 str.remove_prefix(strlen("-"));
614
615 if (last - first < (int)str.length())
616 return {{last, errc::value_too_large}};
617
618 memcpy(first, &str[0], str.length());
619 first += str.length();
620 return {{first, errc{}}};
621 }
622
623 // We're formatting 0.
624 __glibcxx_assert(value == 0);
625 const auto orig_first = first;
626 const bool sign = __builtin_signbit(value);
627 int expected_output_length;
628 switch (fmt)
629 {
630 case chars_format::fixed:
631 case chars_format::scientific:
632 case chars_format::hex:
633 expected_output_length = sign + 1;
634 if (precision)
635 expected_output_length += strlen(".") + precision;
636 if (fmt == chars_format::scientific)
637 expected_output_length += strlen("e+00");
638 else if (fmt == chars_format::hex)
639 expected_output_length += strlen("p+0");
640 if (last - first < expected_output_length)
641 return {{last, errc::value_too_large}};
642
643 if (sign)
644 *first++ = '-';
645 *first++ = '0';
646 if (precision)
647 {
648 *first++ = '.';
649 memset(first, '0', precision);
650 first += precision;
651 }
652 if (fmt == chars_format::scientific)
653 {
654 memcpy(first, "e+00", 4);
655 first += 4;
656 }
657 else if (fmt == chars_format::hex)
658 {
659 memcpy(first, "p+0", 3);
660 first += 3;
661 }
662 break;
663
664 case chars_format::general:
665 default: // case chars_format{}:
666 expected_output_length = sign + 1;
667 if (last - first < expected_output_length)
668 return {{last, errc::value_too_large}};
669
670 if (sign)
671 *first++ = '-';
672 *first++ = '0';
673 break;
674 }
675 __glibcxx_assert(first - orig_first == expected_output_length);
676 return {{first, errc{}}};
677 }
678
679 // This subroutine of the floating-point to_chars overloads performs
680 // hexadecimal formatting.
681 template<typename T>
682 static to_chars_result
__floating_to_chars_hex(char * first,char * const last,const T value,const optional<int> precision)683 __floating_to_chars_hex(char* first, char* const last, const T value,
684 const optional<int> precision)
685 {
686 if (precision.has_value() && precision.value() < 0) [[unlikely]]
687 // A negative precision argument is treated as if it were omitted.
688 return __floating_to_chars_hex(first, last, value, nullopt);
689
690 __glibcxx_requires_valid_range(first, last);
691
692 constexpr int mantissa_bits = floating_type_traits<T>::mantissa_bits;
693 constexpr bool has_implicit_leading_bit
694 = floating_type_traits<T>::has_implicit_leading_bit;
695 constexpr int exponent_bits = floating_type_traits<T>::exponent_bits;
696 constexpr int exponent_bias = (1u << (exponent_bits - 1)) - 1;
697 using mantissa_t = typename floating_type_traits<T>::mantissa_t;
698 constexpr int mantissa_t_width = sizeof(mantissa_t) * __CHAR_BIT__;
699
700 if (auto result = __handle_special_value(first, last, value,
701 chars_format::hex,
702 precision.value_or(0)))
703 return *result;
704
705 // Extract the sign, mantissa and exponent from the value.
706 const auto [ieee_mantissa, biased_exponent, sign] = get_ieee_repr(value);
707 const bool is_normal_number = (biased_exponent != 0);
708
709 // Calculate the unbiased exponent.
710 const int32_t unbiased_exponent = (is_normal_number
711 ? biased_exponent - exponent_bias
712 : 1 - exponent_bias);
713
714 // Shift the mantissa so that its bitwidth is a multiple of 4.
715 constexpr unsigned rounded_mantissa_bits = (mantissa_bits + 3) / 4 * 4;
716 static_assert(mantissa_t_width >= rounded_mantissa_bits);
717 mantissa_t effective_mantissa
718 = ieee_mantissa << (rounded_mantissa_bits - mantissa_bits);
719 if (is_normal_number)
720 {
721 if constexpr (has_implicit_leading_bit)
722 // Restore the mantissa's implicit leading bit.
723 effective_mantissa |= mantissa_t{1} << rounded_mantissa_bits;
724 else
725 // The explicit mantissa bit should already be set.
726 __glibcxx_assert(effective_mantissa & (mantissa_t{1} << (mantissa_bits
727 - 1u)));
728 }
729
730 // Compute the shortest precision needed to print this value exactly,
731 // disregarding trailing zeros.
732 constexpr int full_hex_precision = (has_implicit_leading_bit
733 ? (mantissa_bits + 3) / 4
734 // With an explicit leading bit, we
735 // use the four leading nibbles as the
736 // hexit before the decimal point.
737 : (mantissa_bits - 4 + 3) / 4);
738 const int trailing_zeros = __countr_zero(effective_mantissa) / 4;
739 const int shortest_full_precision = full_hex_precision - trailing_zeros;
740 __glibcxx_assert(shortest_full_precision >= 0);
741
742 int written_exponent = unbiased_exponent;
743 int effective_precision = precision.value_or(shortest_full_precision);
744 int excess_precision = 0;
745 if (effective_precision < shortest_full_precision)
746 {
747 // When limiting the precision, we need to determine how to round the
748 // least significant printed hexit. The following branchless
749 // bit-level-parallel technique computes whether to round up the
750 // mantissa bit at index N (according to round-to-nearest rules) when
751 // dropping N bits of precision, for each index N in the bit vector.
752 // This technique is borrowed from the MSVC implementation.
753 using bitvec = mantissa_t;
754 const bitvec round_bit = effective_mantissa << 1;
755 const bitvec has_tail_bits = round_bit - 1;
756 const bitvec lsb_bit = effective_mantissa;
757 const bitvec should_round = round_bit & (has_tail_bits | lsb_bit);
758
759 const int dropped_bits = 4*(full_hex_precision - effective_precision);
760 // Mask out the dropped nibbles.
761 effective_mantissa >>= dropped_bits;
762 effective_mantissa <<= dropped_bits;
763 if (should_round & (mantissa_t{1} << dropped_bits))
764 {
765 // Round up the least significant nibble.
766 effective_mantissa += mantissa_t{1} << dropped_bits;
767 // Check and adjust for overflow of the leading nibble. When the
768 // type has an implicit leading bit, then the leading nibble
769 // before rounding is either 0 or 1, so it can't overflow.
770 if constexpr (!has_implicit_leading_bit)
771 {
772 // The only supported floating-point type with explicit
773 // leading mantissa bit is LDK_FLOAT80, i.e. x86 80-bit
774 // extended precision, and so we hardcode the below overflow
775 // check+adjustment for this type.
776 static_assert(mantissa_t_width == 64
777 && rounded_mantissa_bits == 64);
778 if (effective_mantissa == 0)
779 {
780 // We rounded up the least significant nibble and the
781 // mantissa overflowed, e.g f.fcp+10 with precision=1
782 // became 10.0p+10. Absorb this extra hexit into the
783 // exponent to obtain 1.0p+14.
784 effective_mantissa
785 = mantissa_t{1} << (rounded_mantissa_bits - 4);
786 written_exponent += 4;
787 }
788 }
789 }
790 }
791 else
792 {
793 excess_precision = effective_precision - shortest_full_precision;
794 effective_precision = shortest_full_precision;
795 }
796
797 // Compute the leading hexit and mask it out from the mantissa.
798 char leading_hexit;
799 if constexpr (has_implicit_leading_bit)
800 {
801 const auto nibble = unsigned(effective_mantissa >> rounded_mantissa_bits);
802 __glibcxx_assert(nibble <= 2);
803 leading_hexit = '0' + nibble;
804 effective_mantissa &= ~(mantissa_t{0b11} << rounded_mantissa_bits);
805 }
806 else
807 {
808 const auto nibble = unsigned(effective_mantissa >> (rounded_mantissa_bits-4));
809 __glibcxx_assert(nibble < 16);
810 leading_hexit = "0123456789abcdef"[nibble];
811 effective_mantissa &= ~(mantissa_t{0b1111} << (rounded_mantissa_bits-4));
812 written_exponent -= 3;
813 }
814
815 // Now before we start writing the string, determine the total length of
816 // the output string and perform a single bounds check.
817 int expected_output_length = sign + 1;
818 if (effective_precision + excess_precision > 0)
819 expected_output_length += strlen(".");
820 expected_output_length += effective_precision;
821 const int abs_written_exponent = abs(written_exponent);
822 expected_output_length += (abs_written_exponent >= 10000 ? strlen("p+ddddd")
823 : abs_written_exponent >= 1000 ? strlen("p+dddd")
824 : abs_written_exponent >= 100 ? strlen("p+ddd")
825 : abs_written_exponent >= 10 ? strlen("p+dd")
826 : strlen("p+d"));
827 if (last - first < expected_output_length
828 || last - first - expected_output_length < excess_precision)
829 return {last, errc::value_too_large};
830 char* const expected_output_end = first + expected_output_length + excess_precision;
831
832 // Write the negative sign and the leading hexit.
833 if (sign)
834 *first++ = '-';
835 *first++ = leading_hexit;
836
837 if (effective_precision + excess_precision > 0)
838 *first++ = '.';
839
840 if (effective_precision > 0)
841 {
842 int written_hexits = 0;
843 // Extract and mask out the leading nibble after the decimal point,
844 // write its corresponding hexit, and repeat until the mantissa is
845 // empty.
846 int nibble_offset = rounded_mantissa_bits;
847 if constexpr (!has_implicit_leading_bit)
848 // We already printed the entire leading hexit.
849 nibble_offset -= 4;
850 while (effective_mantissa != 0)
851 {
852 nibble_offset -= 4;
853 const auto nibble = unsigned(effective_mantissa >> nibble_offset);
854 __glibcxx_assert(nibble < 16);
855 *first++ = "0123456789abcdef"[nibble];
856 ++written_hexits;
857 effective_mantissa &= ~(mantissa_t{0b1111} << nibble_offset);
858 }
859 __glibcxx_assert(nibble_offset >= 0);
860 __glibcxx_assert(written_hexits <= effective_precision);
861 // Since the mantissa is now empty, every hexit hereafter must be '0'.
862 if (int remaining_hexits = effective_precision - written_hexits)
863 {
864 memset(first, '0', remaining_hexits);
865 first += remaining_hexits;
866 }
867 }
868
869 if (excess_precision > 0)
870 {
871 memset(first, '0', excess_precision);
872 first += excess_precision;
873 }
874
875 // Finally, write the exponent.
876 *first++ = 'p';
877 if (written_exponent >= 0)
878 *first++ = '+';
879 const to_chars_result result = to_chars(first, last, written_exponent);
880 __glibcxx_assert(result.ec == errc{} && result.ptr == expected_output_end);
881 return result;
882 }
883
884 namespace
885 {
886 #pragma GCC diagnostic push
887 #pragma GCC diagnostic ignored "-Wabi"
888 template<typename T, typename... Extra>
889 inline int
sprintf_ld(char * buffer,const char * format_string,T value,Extra...args)890 sprintf_ld(char* buffer, const char* format_string, T value, Extra... args)
891 {
892 int len;
893
894 #if _GLIBCXX_USE_C99_FENV_TR1 && defined(FE_TONEAREST)
895 const int saved_rounding_mode = fegetround();
896 if (saved_rounding_mode != FE_TONEAREST)
897 fesetround(FE_TONEAREST); // We want round-to-nearest behavior.
898 #endif
899
900 #ifdef _GLIBCXX_LONG_DOUBLE_ALT128_COMPAT
901 if constexpr (is_same_v<T, __ieee128>)
902 len = __sprintfieee128(buffer, format_string, args..., value);
903 else
904 #endif
905 len = sprintf(buffer, format_string, args..., value);
906
907 #if _GLIBCXX_USE_C99_FENV_TR1 && defined(FE_TONEAREST)
908 if (saved_rounding_mode != FE_TONEAREST)
909 fesetround(saved_rounding_mode);
910 #endif
911
912 return len;
913 }
914 #pragma GCC diagnostic pop
915 }
916
917 template<typename T>
918 static to_chars_result
__floating_to_chars_shortest(char * first,char * const last,const T value,chars_format fmt)919 __floating_to_chars_shortest(char* first, char* const last, const T value,
920 chars_format fmt)
921 {
922 if (fmt == chars_format::hex)
923 return __floating_to_chars_hex(first, last, value, nullopt);
924
925 __glibcxx_assert(fmt == chars_format::fixed
926 || fmt == chars_format::scientific
927 || fmt == chars_format::general
928 || fmt == chars_format{});
929 __glibcxx_requires_valid_range(first, last);
930
931 if (auto result = __handle_special_value(first, last, value, fmt, 0))
932 return *result;
933
934 const auto fd = floating_to_shortest_scientific(value);
935 const int mantissa_length = get_mantissa_length(fd);
936 const int scientific_exponent = fd.exponent + mantissa_length - 1;
937
938 if (fmt == chars_format::general)
939 {
940 // Resolve the 'general' formatting mode as per the specification of
941 // the 'g' printf output specifier. Since there is no precision
942 // argument, the default precision of the 'g' specifier, 6, applies.
943 if (scientific_exponent >= -4 && scientific_exponent < 6)
944 fmt = chars_format::fixed;
945 else
946 fmt = chars_format::scientific;
947 }
948 else if (fmt == chars_format{})
949 {
950 // The 'plain' formatting mode resolves to 'scientific' if it yields
951 // the shorter string, and resolves to 'fixed' otherwise. The
952 // following lower and upper bounds on the exponent characterize when
953 // to prefer 'fixed' over 'scientific'.
954 int lower_bound = -(mantissa_length + 3);
955 int upper_bound = 5;
956 if (mantissa_length == 1)
957 // The decimal point in scientific notation will be omitted in this
958 // case; tighten the bounds appropriately.
959 ++lower_bound, --upper_bound;
960
961 if (fd.exponent >= lower_bound && fd.exponent <= upper_bound)
962 fmt = chars_format::fixed;
963 else
964 fmt = chars_format::scientific;
965 }
966
967 if (fmt == chars_format::scientific)
968 {
969 // Calculate the total length of the output string, perform a bounds
970 // check, and then defer to Ryu's to_chars subroutine.
971 int expected_output_length = fd.sign + mantissa_length;
972 if (mantissa_length > 1)
973 expected_output_length += strlen(".");
974 const int abs_exponent = abs(scientific_exponent);
975 expected_output_length += (abs_exponent >= 1000 ? strlen("e+dddd")
976 : abs_exponent >= 100 ? strlen("e+ddd")
977 : strlen("e+dd"));
978 if (last - first < expected_output_length)
979 return {last, errc::value_too_large};
980
981 const int output_length = ryu::to_chars(fd, first);
982 __glibcxx_assert(output_length == expected_output_length);
983 return {first + output_length, errc{}};
984 }
985 else if (fmt == chars_format::fixed && fd.exponent >= 0)
986 {
987 // The Ryu exponent is positive, and so this number's shortest
988 // representation is a whole number, to be formatted in fixed instead
989 // of scientific notation "as if by std::printf". This means we may
990 // need to print more digits of the IEEE mantissa than what the
991 // shortest scientific form given by Ryu provides.
992 //
993 // For instance, the exactly representable number
994 // 12300000000000001048576.0 has as its shortest scientific
995 // representation 123e+22, so in this case fd.mantissa is 123 and
996 // fd.exponent is 22, which doesn't have enough information to format
997 // the number exactly. So we defer to Ryu's d2fixed_buffered_n with
998 // precision=0 to format the number in the general case here.
999
1000 // To that end, first compute the output length and perform a bounds
1001 // check.
1002 int expected_output_length = fd.sign + mantissa_length + fd.exponent;
1003 if (is_rounded_up_pow10_p<T>(fd))
1004 --expected_output_length;
1005 if (last - first < expected_output_length)
1006 return {last, errc::value_too_large};
1007
1008 // Optimization: if the shortest representation fits inside the IEEE
1009 // mantissa, then the number is certainly exactly-representable and
1010 // its shortest scientific form must be equal to its exact form. So
1011 // we can write the value in fixed form exactly via fd.mantissa and
1012 // fd.exponent.
1013 //
1014 // Taking log2 of both sides of the desired condition
1015 // fd.mantissa * 10^fd.exponent < 2^mantissa_bits
1016 // we get
1017 // log2 fd.mantissa + fd.exponent * log2 10 < mantissa_bits
1018 // where log2 10 is slightly smaller than 10/3=3.333...
1019 //
1020 // After adding some wiggle room due to rounding we get the condition
1021 // value_fits_inside_mantissa_p below.
1022 const int log2_mantissa = __bit_width(fd.mantissa) - 1;
1023 const bool value_fits_inside_mantissa_p
1024 = (log2_mantissa + (fd.exponent*10 + 2) / 3
1025 < floating_type_traits<T>::mantissa_bits - 2);
1026 if (value_fits_inside_mantissa_p)
1027 {
1028 // Print the small exactly-representable number in fixed form by
1029 // writing out fd.mantissa followed by fd.exponent many 0s.
1030 if (fd.sign)
1031 *first++ = '-';
1032 to_chars_result result = to_chars(first, last, fd.mantissa);
1033 __glibcxx_assert(result.ec == errc{});
1034 memset(result.ptr, '0', fd.exponent);
1035 result.ptr += fd.exponent;
1036 const int output_length = fd.sign + (result.ptr - first);
1037 __glibcxx_assert(output_length == expected_output_length);
1038 return result;
1039 }
1040 else if constexpr (is_same_v<T, long double>
1041 || is_same_v<T, F128_type>)
1042 {
1043 // We can't use d2fixed_buffered_n for types larger than double,
1044 // so we instead format larger types through sprintf.
1045 // TODO: We currently go through an intermediate buffer in order
1046 // to accommodate the mandatory null terminator of sprintf, but we
1047 // can avoid this if we use sprintf to write all but the last
1048 // digit, and carefully compute and write the last digit
1049 // ourselves.
1050 char buffer[expected_output_length+1];
1051 const int output_length = sprintf_ld(buffer, "%.0Lf", value);
1052 __glibcxx_assert(output_length == expected_output_length);
1053 memcpy(first, buffer, output_length);
1054 return {first + output_length, errc{}};
1055 }
1056 else
1057 {
1058 // Otherwise, the number is too big, so defer to d2fixed_buffered_n.
1059 const int output_length = ryu::d2fixed_buffered_n(value, 0, first);
1060 __glibcxx_assert(output_length == expected_output_length);
1061 return {first + output_length, errc{}};
1062 }
1063 }
1064 else if (fmt == chars_format::fixed && fd.exponent < 0)
1065 {
1066 // The Ryu exponent is negative, so fd.mantissa definitely contains
1067 // all of the whole part of the number, and therefore fd.mantissa and
1068 // fd.exponent contain all of the information needed to format the
1069 // number in fixed notation "as if by std::printf" (with precision
1070 // equal to -fd.exponent).
1071 const int whole_digits = max<int>(mantissa_length + fd.exponent, 1);
1072 const int expected_output_length
1073 = fd.sign + whole_digits + strlen(".") + -fd.exponent;
1074 if (last - first < expected_output_length)
1075 return {last, errc::value_too_large};
1076 if (mantissa_length <= -fd.exponent)
1077 {
1078 // The magnitude of the number is less than one. Format the
1079 // number appropriately.
1080 const auto orig_first = first;
1081 if (fd.sign)
1082 *first++ = '-';
1083 *first++ = '0';
1084 *first++ = '.';
1085 const int leading_zeros = -fd.exponent - mantissa_length;
1086 memset(first, '0', leading_zeros);
1087 first += leading_zeros;
1088 const to_chars_result result = to_chars(first, last, fd.mantissa);
1089 const int output_length = result.ptr - orig_first;
1090 __glibcxx_assert(output_length == expected_output_length
1091 && result.ec == errc{});
1092 return result;
1093 }
1094 else
1095 {
1096 // The magnitude of the number is at least one.
1097 const auto orig_first = first;
1098 if (fd.sign)
1099 *first++ = '-';
1100 to_chars_result result = to_chars(first, last, fd.mantissa);
1101 __glibcxx_assert(result.ec == errc{});
1102 // Make space for and write the decimal point in the correct spot.
1103 memmove(&result.ptr[fd.exponent+1], &result.ptr[fd.exponent],
1104 -fd.exponent);
1105 result.ptr[fd.exponent] = '.';
1106 const int output_length = result.ptr + 1 - orig_first;
1107 __glibcxx_assert(output_length == expected_output_length);
1108 ++result.ptr;
1109 return result;
1110 }
1111 }
1112
1113 __glibcxx_assert(false);
1114 __builtin_unreachable();
1115 }
1116
1117 template<typename T>
1118 static to_chars_result
__floating_to_chars_precision(char * first,char * const last,const T value,chars_format fmt,const int precision)1119 __floating_to_chars_precision(char* first, char* const last, const T value,
1120 chars_format fmt, const int precision)
1121 {
1122 if (fmt == chars_format::hex)
1123 return __floating_to_chars_hex(first, last, value, precision);
1124
1125 if (precision < 0) [[unlikely]]
1126 // A negative precision argument is treated as if it were omitted, in
1127 // which case the default precision of 6 applies, as per the printf
1128 // specification.
1129 return __floating_to_chars_precision(first, last, value, fmt, 6);
1130
1131 __glibcxx_assert(fmt == chars_format::fixed
1132 || fmt == chars_format::scientific
1133 || fmt == chars_format::general);
1134 __glibcxx_requires_valid_range(first, last);
1135
1136 if (auto result = __handle_special_value(first, last, value,
1137 fmt, precision))
1138 return *result;
1139
1140 constexpr int mantissa_bits = floating_type_traits<T>::mantissa_bits;
1141 constexpr int exponent_bits = floating_type_traits<T>::exponent_bits;
1142 constexpr int exponent_bias = (1u << (exponent_bits - 1)) - 1;
1143
1144 // Extract the sign and exponent from the value.
1145 const auto [mantissa, biased_exponent, sign] = get_ieee_repr(value);
1146 const bool is_normal_number = (biased_exponent != 0);
1147
1148 // Calculate the unbiased exponent.
1149 const int32_t unbiased_exponent = (is_normal_number
1150 ? biased_exponent - exponent_bias
1151 : 1 - exponent_bias);
1152
1153 // Obtain trunc(log2(abs(value))), which is just the unbiased exponent.
1154 const int floor_log2_value = unbiased_exponent;
1155 // This is within +-1 of log10(abs(value)). Note that log10 2 is 0.3010..
1156 const int approx_log10_value = (floor_log2_value >= 0
1157 ? (floor_log2_value*301 + 999)/1000
1158 : (floor_log2_value*301 - 999)/1000);
1159
1160 // Compute (an upper bound of) the number's effective precision when it is
1161 // formatted in scientific and fixed notation. Beyond this precision all
1162 // digits are definitely zero, and this fact allows us to bound the sizes
1163 // of any local output buffers that we may need to use. TODO: Consider
1164 // the number of trailing zero bits in the mantissa to obtain finer upper
1165 // bounds.
1166 // ???: Using "mantissa_bits + 1" instead of just "mantissa_bits" in the
1167 // bounds below is necessary only for __ibm128, it seems. Even though the
1168 // type has 105 bits of precision, printf may output 106 fractional digits
1169 // on some inputs, e.g. 0x1.bcd19f5d720d12a3513e3301028p+0.
1170 const int max_eff_scientific_precision
1171 = (floor_log2_value >= 0
1172 ? max(mantissa_bits + 1, approx_log10_value + 1)
1173 : -(7*floor_log2_value + 9)/10 + 2 + mantissa_bits + 1);
1174 __glibcxx_assert(max_eff_scientific_precision > 0);
1175
1176 const int max_eff_fixed_precision
1177 = (floor_log2_value >= 0
1178 ? mantissa_bits + 1
1179 : -floor_log2_value + mantissa_bits + 1);
1180 __glibcxx_assert(max_eff_fixed_precision > 0);
1181
1182 // Ryu doesn't support formatting floating-point types larger than double
1183 // with an explicit precision, so instead we just go through printf.
1184 if constexpr (is_same_v<T, long double> || is_same_v<T, F128_type>)
1185 {
1186 int effective_precision;
1187 const char* output_specifier;
1188 if (fmt == chars_format::scientific)
1189 {
1190 effective_precision = min(precision, max_eff_scientific_precision);
1191 output_specifier = "%.*Le";
1192 }
1193 else if (fmt == chars_format::fixed)
1194 {
1195 effective_precision = min(precision, max_eff_fixed_precision);
1196 output_specifier = "%.*Lf";
1197 }
1198 else if (fmt == chars_format::general)
1199 {
1200 effective_precision = min(precision, max_eff_scientific_precision);
1201 output_specifier = "%.*Lg";
1202 }
1203 else
1204 __builtin_unreachable();
1205 const int excess_precision = (fmt != chars_format::general
1206 ? precision - effective_precision : 0);
1207
1208 // Since the output of printf is locale-sensitive, we need to be able
1209 // to handle a radix point that's different from '.'.
1210 char radix[6] = {'.', '\0', '\0', '\0', '\0', '\0'};
1211 #ifdef RADIXCHAR
1212 if (effective_precision > 0)
1213 // ???: Can nl_langinfo() ever return null?
1214 if (const char* const radix_ptr = nl_langinfo(RADIXCHAR))
1215 {
1216 strncpy(radix, radix_ptr, sizeof(radix)-1);
1217 // We accept only radix points which are at most 4 bytes (one
1218 // UTF-8 character) wide.
1219 __glibcxx_assert(radix[4] == '\0');
1220 }
1221 #endif
1222
1223 // Compute straightforward upper bounds on the output length.
1224 int output_length_upper_bound;
1225 if (fmt == chars_format::scientific || fmt == chars_format::general)
1226 output_length_upper_bound = (strlen("-d") + sizeof(radix)
1227 + effective_precision
1228 + strlen("e+dddd"));
1229 else if (fmt == chars_format::fixed)
1230 {
1231 if (approx_log10_value >= 0)
1232 output_length_upper_bound = sign + approx_log10_value + 1;
1233 else
1234 output_length_upper_bound = sign + strlen("0");
1235 output_length_upper_bound += sizeof(radix) + effective_precision;
1236 }
1237 else
1238 __builtin_unreachable();
1239
1240 // Do the sprintf into the local buffer.
1241 char buffer[output_length_upper_bound+1];
1242 int output_length
1243 = sprintf_ld(buffer, output_specifier, value, effective_precision);
1244 __glibcxx_assert(output_length <= output_length_upper_bound);
1245
1246 if (effective_precision > 0)
1247 // We need to replace a radix that is different from '.' with '.'.
1248 if (const string_view radix_sv = {radix}; radix_sv != ".")
1249 {
1250 const string_view buffer_sv = {buffer, (size_t)output_length};
1251 const size_t radix_index = buffer_sv.find(radix_sv);
1252 if (radix_index != string_view::npos)
1253 {
1254 buffer[radix_index] = '.';
1255 if (radix_sv.length() > 1)
1256 {
1257 memmove(&buffer[radix_index + 1],
1258 &buffer[radix_index + radix_sv.length()],
1259 output_length - radix_index - radix_sv.length());
1260 output_length -= radix_sv.length() - 1;
1261 }
1262 }
1263 }
1264
1265 // Copy the string from the buffer over to the output range.
1266 if (last - first < output_length
1267 || last - first - output_length < excess_precision)
1268 return {last, errc::value_too_large};
1269 memcpy(first, buffer, output_length);
1270 first += output_length;
1271
1272 // Add the excess 0s to the result.
1273 if (excess_precision > 0)
1274 {
1275 if (fmt == chars_format::scientific)
1276 {
1277 char* const significand_end
1278 = (output_length >= 6 && first[-6] == 'e' ? &first[-6]
1279 : first[-5] == 'e' ? &first[-5]
1280 : &first[-4]);
1281 __glibcxx_assert(*significand_end == 'e');
1282 memmove(significand_end + excess_precision, significand_end,
1283 first - significand_end);
1284 memset(significand_end, '0', excess_precision);
1285 first += excess_precision;
1286 }
1287 else if (fmt == chars_format::fixed)
1288 {
1289 memset(first, '0', excess_precision);
1290 first += excess_precision;
1291 }
1292 }
1293 return {first, errc{}};
1294 }
1295 else if (fmt == chars_format::scientific)
1296 {
1297 const int effective_precision
1298 = min(precision, max_eff_scientific_precision);
1299 const int excess_precision = precision - effective_precision;
1300
1301 // We can easily compute the output length exactly whenever the
1302 // scientific exponent is far enough away from +-100. But if it's
1303 // near +-100, then our log2 approximation is too coarse (and doesn't
1304 // consider precision-dependent rounding) in order to accurately
1305 // distinguish between a scientific exponent of +-100 and +-99.
1306 const bool scientific_exponent_near_100_p
1307 = abs(abs(floor_log2_value) - 332) <= 4;
1308
1309 // Compute an upper bound on the output length. TODO: Maybe also
1310 // consider a lower bound on the output length.
1311 int output_length_upper_bound = sign + strlen("d");
1312 if (effective_precision > 0)
1313 output_length_upper_bound += strlen(".") + effective_precision;
1314 if (scientific_exponent_near_100_p
1315 || (floor_log2_value >= 332 || floor_log2_value <= -333))
1316 output_length_upper_bound += strlen("e+ddd");
1317 else
1318 output_length_upper_bound += strlen("e+dd");
1319
1320 int output_length;
1321 if (last - first >= output_length_upper_bound
1322 && last - first - output_length_upper_bound >= excess_precision)
1323 {
1324 // The result will definitely fit into the output range, so we can
1325 // write directly into it.
1326 output_length = ryu::d2exp_buffered_n(value, effective_precision,
1327 first, nullptr);
1328 __glibcxx_assert(output_length == output_length_upper_bound
1329 || (scientific_exponent_near_100_p
1330 && (output_length
1331 == output_length_upper_bound - 1)));
1332 }
1333 else if (scientific_exponent_near_100_p)
1334 {
1335 // Write the result of d2exp_buffered_n into an intermediate
1336 // buffer, do a bounds check, and copy the result into the output
1337 // range.
1338 char buffer[output_length_upper_bound];
1339 output_length = ryu::d2exp_buffered_n(value, effective_precision,
1340 buffer, nullptr);
1341 __glibcxx_assert(output_length == output_length_upper_bound - 1
1342 || output_length == output_length_upper_bound);
1343 if (last - first < output_length
1344 || last - first - output_length < excess_precision)
1345 return {last, errc::value_too_large};
1346 memcpy(first, buffer, output_length);
1347 }
1348 else
1349 // If the scientific exponent is not near 100, then the upper bound
1350 // is actually the exact length, and so the result will definitely
1351 // not fit into the output range.
1352 return {last, errc::value_too_large};
1353 first += output_length;
1354 if (excess_precision > 0)
1355 {
1356 // Splice the excess zeros into the result.
1357 char* const significand_end = (first[-5] == 'e'
1358 ? &first[-5] : &first[-4]);
1359 __glibcxx_assert(*significand_end == 'e');
1360 memmove(significand_end + excess_precision, significand_end,
1361 first - significand_end);
1362 memset(significand_end, '0', excess_precision);
1363 first += excess_precision;
1364 }
1365 return {first, errc{}};
1366 }
1367 else if (fmt == chars_format::fixed)
1368 {
1369 const int effective_precision
1370 = min(precision, max_eff_fixed_precision);
1371 const int excess_precision = precision - effective_precision;
1372
1373 // Compute an upper bound on the output length. TODO: Maybe also
1374 // consider a lower bound on the output length.
1375 int output_length_upper_bound;
1376 if (approx_log10_value >= 0)
1377 output_length_upper_bound = sign + approx_log10_value + 1;
1378 else
1379 output_length_upper_bound = sign + strlen("0");
1380 if (effective_precision > 0)
1381 output_length_upper_bound += strlen(".") + effective_precision;
1382
1383 int output_length;
1384 if (last - first >= output_length_upper_bound
1385 && last - first - output_length_upper_bound >= excess_precision)
1386 {
1387 // The result will definitely fit into the output range, so we can
1388 // write directly into it.
1389 output_length = ryu::d2fixed_buffered_n(value, effective_precision,
1390 first);
1391 __glibcxx_assert(output_length <= output_length_upper_bound);
1392 }
1393 else
1394 {
1395 // Write the result of d2fixed_buffered_n into an intermediate
1396 // buffer, do a bounds check, and copy the result into the output
1397 // range.
1398 char buffer[output_length_upper_bound];
1399 output_length = ryu::d2fixed_buffered_n(value, effective_precision,
1400 buffer);
1401 __glibcxx_assert(output_length <= output_length_upper_bound);
1402 if (last - first < output_length
1403 || last - first - output_length < excess_precision)
1404 return {last, errc::value_too_large};
1405 memcpy(first, buffer, output_length);
1406 }
1407 first += output_length;
1408 if (excess_precision > 0)
1409 {
1410 // Append the excess zeros into the result.
1411 memset(first, '0', excess_precision);
1412 first += excess_precision;
1413 }
1414 return {first, errc{}};
1415 }
1416 else if (fmt == chars_format::general)
1417 {
1418 // Handle the 'general' formatting mode as per C11 printf's %g output
1419 // specifier. Since Ryu doesn't do zero-trimming, we always write to
1420 // an intermediate buffer and manually perform zero-trimming there
1421 // before copying the result over to the output range.
1422 int effective_precision
1423 = min(precision, max_eff_scientific_precision + 1);
1424 const int output_length_upper_bound
1425 = strlen("-d.") + effective_precision + strlen("e+ddd");
1426 // The four bytes of headroom is to avoid needing to do a memmove when
1427 // rewriting a scientific form such as 1.00e-2 into the equivalent
1428 // fixed form 0.001.
1429 char buffer[4 + output_length_upper_bound];
1430
1431 // 7.21.6.1/8: "Let P equal ... 1 if the precision is zero."
1432 if (effective_precision == 0)
1433 effective_precision = 1;
1434
1435 // Perform a trial formatting in scientific form, and obtain the
1436 // scientific exponent.
1437 int scientific_exponent;
1438 char* buffer_start = buffer + 4;
1439 int output_length
1440 = ryu::d2exp_buffered_n(value, effective_precision - 1,
1441 buffer_start, &scientific_exponent);
1442 __glibcxx_assert(output_length <= output_length_upper_bound);
1443
1444 // 7.21.6.1/8: "Then, if a conversion with style E would have an
1445 // exponent of X:
1446 // if P > X >= -4, the conversion is with style f and
1447 // precision P - (X + 1).
1448 // otherwise, the conversion is with style e and precision P - 1."
1449 const bool resolve_to_fixed_form
1450 = (scientific_exponent >= -4
1451 && scientific_exponent < effective_precision);
1452 if (resolve_to_fixed_form)
1453 {
1454 // Rather than invoking d2fixed_buffered_n to reformat the number
1455 // for us from scratch, we can just rewrite the scientific form
1456 // into fixed form in-place. This is safe to do because whenever
1457 // %g resolves to %f, the fixed form will be no larger than the
1458 // corresponding scientific form, and it will also contain the
1459 // same significant digits as the scientific form.
1460 fmt = chars_format::fixed;
1461 if (scientific_exponent < 0)
1462 {
1463 // e.g. buffer_start == "-1.234e-04"
1464 char* leading_digit = &buffer_start[sign];
1465 leading_digit[1] = leading_digit[0];
1466 // buffer_start == "-11234e-04"
1467 buffer_start -= -scientific_exponent;
1468 __glibcxx_assert(buffer_start >= buffer);
1469 // buffer_start == "????-11234e-04"
1470 char* head = buffer_start;
1471 if (sign)
1472 *head++ = '-';
1473 *head++ = '0';
1474 *head++ = '.';
1475 memset(head, '0', -scientific_exponent - 1);
1476 // buffer_start == "-0.00011234e-04"
1477
1478 // Now drop the exponent suffix, and add the leading zeros to
1479 // the output length.
1480 output_length -= strlen("e-0d");
1481 output_length += -scientific_exponent;
1482 if (effective_precision - 1 == 0)
1483 // The scientific form had no decimal point, but the fixed
1484 // form now does.
1485 output_length += strlen(".");
1486 }
1487 else if (effective_precision == 1)
1488 {
1489 // The scientific exponent must be 0, so the fixed form
1490 // coincides with the scientific form (minus the exponent
1491 // suffix).
1492 __glibcxx_assert(scientific_exponent == 0);
1493 output_length -= strlen("e+dd");
1494 }
1495 else
1496 {
1497 // We are dealing with a scientific form which has a
1498 // non-empty fractional part and a nonnegative exponent,
1499 // e.g. buffer_start == "1.234e+02".
1500 __glibcxx_assert(effective_precision >= 1);
1501 char* const decimal_point = &buffer_start[sign + 1];
1502 __glibcxx_assert(*decimal_point == '.');
1503 memmove(decimal_point, decimal_point+1,
1504 scientific_exponent);
1505 // buffer_start == "123.4e+02"
1506 decimal_point[scientific_exponent] = '.';
1507 if (scientific_exponent >= 100)
1508 output_length -= strlen("e+ddd");
1509 else
1510 output_length -= strlen("e+dd");
1511 if (effective_precision - 1 == scientific_exponent)
1512 output_length -= strlen(".");
1513 }
1514 effective_precision -= 1 + scientific_exponent;
1515
1516 __glibcxx_assert(output_length <= output_length_upper_bound);
1517 }
1518 else
1519 {
1520 // We're sticking to the scientific form, so keep the output as-is.
1521 fmt = chars_format::scientific;
1522 effective_precision = effective_precision - 1;
1523 }
1524
1525 // 7.21.6.1/8: "Finally ... any any trailing zeros are removed from
1526 // the fractional portion of the result and the decimal-point
1527 // character is removed if there is no fractional portion remaining."
1528 if (effective_precision > 0)
1529 {
1530 char* decimal_point = nullptr;
1531 if (fmt == chars_format::scientific)
1532 decimal_point = &buffer_start[sign + 1];
1533 else if (fmt == chars_format::fixed)
1534 decimal_point
1535 = &buffer_start[output_length] - effective_precision - 1;
1536 __glibcxx_assert(*decimal_point == '.');
1537
1538 char* const fractional_part_start = decimal_point + 1;
1539 char* fractional_part_end = nullptr;
1540 if (fmt == chars_format::scientific)
1541 {
1542 fractional_part_end = (buffer_start[output_length-5] == 'e'
1543 ? &buffer_start[output_length-5]
1544 : &buffer_start[output_length-4]);
1545 __glibcxx_assert(*fractional_part_end == 'e');
1546 }
1547 else if (fmt == chars_format::fixed)
1548 fractional_part_end = &buffer_start[output_length];
1549
1550 const string_view fractional_part
1551 = {fractional_part_start, (size_t)(fractional_part_end
1552 - fractional_part_start) };
1553 const size_t last_nonzero_digit_pos
1554 = fractional_part.find_last_not_of('0');
1555
1556 char* trim_start;
1557 if (last_nonzero_digit_pos == string_view::npos)
1558 trim_start = decimal_point;
1559 else
1560 trim_start = &fractional_part_start[last_nonzero_digit_pos] + 1;
1561 if (fmt == chars_format::scientific)
1562 memmove(trim_start, fractional_part_end,
1563 &buffer_start[output_length] - fractional_part_end);
1564 output_length -= fractional_part_end - trim_start;
1565 }
1566
1567 if (last - first < output_length)
1568 return {last, errc::value_too_large};
1569
1570 memcpy(first, buffer_start, output_length);
1571 return {first + output_length, errc{}};
1572 }
1573
1574 __glibcxx_assert(false);
1575 __builtin_unreachable();
1576 }
1577
1578 // Define the overloads for float.
1579 to_chars_result
to_chars(char * first,char * last,float value)1580 to_chars(char* first, char* last, float value) noexcept
1581 { return __floating_to_chars_shortest(first, last, value, chars_format{}); }
1582
1583 to_chars_result
to_chars(char * first,char * last,float value,chars_format fmt)1584 to_chars(char* first, char* last, float value, chars_format fmt) noexcept
1585 { return __floating_to_chars_shortest(first, last, value, fmt); }
1586
1587 to_chars_result
to_chars(char * first,char * last,float value,chars_format fmt,int precision)1588 to_chars(char* first, char* last, float value, chars_format fmt,
1589 int precision) noexcept
1590 { return __floating_to_chars_precision(first, last, value, fmt, precision); }
1591
1592 // Define the overloads for double.
1593 to_chars_result
to_chars(char * first,char * last,double value)1594 to_chars(char* first, char* last, double value) noexcept
1595 { return __floating_to_chars_shortest(first, last, value, chars_format{}); }
1596
1597 to_chars_result
to_chars(char * first,char * last,double value,chars_format fmt)1598 to_chars(char* first, char* last, double value, chars_format fmt) noexcept
1599 { return __floating_to_chars_shortest(first, last, value, fmt); }
1600
1601 to_chars_result
to_chars(char * first,char * last,double value,chars_format fmt,int precision)1602 to_chars(char* first, char* last, double value, chars_format fmt,
1603 int precision) noexcept
1604 { return __floating_to_chars_precision(first, last, value, fmt, precision); }
1605
1606 // Define the overloads for long double.
1607 to_chars_result
to_chars(char * first,char * last,long double value)1608 to_chars(char* first, char* last, long double value) noexcept
1609 {
1610 if constexpr (LONG_DOUBLE_KIND == LDK_BINARY64
1611 || LONG_DOUBLE_KIND == LDK_UNSUPPORTED)
1612 return __floating_to_chars_shortest(first, last, static_cast<double>(value),
1613 chars_format{});
1614 else
1615 return __floating_to_chars_shortest(first, last, value, chars_format{});
1616 }
1617
1618 to_chars_result
to_chars(char * first,char * last,long double value,chars_format fmt)1619 to_chars(char* first, char* last, long double value, chars_format fmt) noexcept
1620 {
1621 if constexpr (LONG_DOUBLE_KIND == LDK_BINARY64
1622 || LONG_DOUBLE_KIND == LDK_UNSUPPORTED)
1623 return __floating_to_chars_shortest(first, last, static_cast<double>(value),
1624 fmt);
1625 else
1626 return __floating_to_chars_shortest(first, last, value, fmt);
1627 }
1628
1629 to_chars_result
to_chars(char * first,char * last,long double value,chars_format fmt,int precision)1630 to_chars(char* first, char* last, long double value, chars_format fmt,
1631 int precision) noexcept
1632 {
1633 if constexpr (LONG_DOUBLE_KIND == LDK_BINARY64
1634 || LONG_DOUBLE_KIND == LDK_UNSUPPORTED)
1635 return __floating_to_chars_precision(first, last, static_cast<double>(value),
1636 fmt,
1637 precision);
1638 else
1639 return __floating_to_chars_precision(first, last, value, fmt, precision);
1640 }
1641
1642 #ifdef FLOAT128_TO_CHARS
1643 to_chars_result
to_chars(char * first,char * last,__float128 value)1644 to_chars(char* first, char* last, __float128 value) noexcept
1645 {
1646 return __floating_to_chars_shortest(first, last, value, chars_format{});
1647 }
1648
1649 to_chars_result
to_chars(char * first,char * last,__float128 value,chars_format fmt)1650 to_chars(char* first, char* last, __float128 value, chars_format fmt) noexcept
1651 {
1652 return __floating_to_chars_shortest(first, last, value, fmt);
1653 }
1654
1655 to_chars_result
to_chars(char * first,char * last,__float128 value,chars_format fmt,int precision)1656 to_chars(char* first, char* last, __float128 value, chars_format fmt,
1657 int precision) noexcept
1658 {
1659 return __floating_to_chars_precision(first, last, value, fmt, precision);
1660 }
1661 #endif
1662
1663 #ifdef _GLIBCXX_LONG_DOUBLE_COMPAT
1664 // Map the -mlong-double-64 long double overloads to the double overloads.
1665 extern "C" to_chars_result
1666 _ZSt8to_charsPcS_e(char* first, char* last, double value) noexcept
1667 __attribute__((alias ("_ZSt8to_charsPcS_d")));
1668
1669 extern "C" to_chars_result
1670 _ZSt8to_charsPcS_eSt12chars_format(char* first, char* last, double value,
1671 chars_format fmt) noexcept
1672 __attribute__((alias ("_ZSt8to_charsPcS_dSt12chars_format")));
1673
1674 extern "C" to_chars_result
1675 _ZSt8to_charsPcS_eSt12chars_formati(char* first, char* last, double value,
1676 chars_format fmt, int precision) noexcept
1677 __attribute__((alias ("_ZSt8to_charsPcS_dSt12chars_formati")));
1678 #endif
1679
1680 _GLIBCXX_END_NAMESPACE_VERSION
1681 } // namespace std
1682
1683 #endif // _GLIBCXX_FLOAT_IS_IEEE_BINARY32 && _GLIBCXX_DOUBLE_IS_IEEE_BINARY64
1684