xref: /llvm-project/libcxx/src/include/from_chars_floating_point.h (revision 6c4267fb1779bc5550bb413f33250f9365acfbc6)
1 //===----------------------------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef _LIBCPP_SRC_INCLUDE_FROM_CHARS_FLOATING_POINT_H
10 #define _LIBCPP_SRC_INCLUDE_FROM_CHARS_FLOATING_POINT_H
11 
12 // These headers are in the shared LLVM-libc header library.
13 #include "shared/fp_bits.h"
14 #include "shared/str_to_float.h"
15 #include "shared/str_to_integer.h"
16 
17 #include <__assert>
18 #include <__config>
19 #include <cctype>
20 #include <charconv>
21 #include <concepts>
22 #include <limits>
23 
24 // Included for the _Floating_type_traits class
25 #include "to_chars_floating_point.h"
26 
27 _LIBCPP_BEGIN_NAMESPACE_STD
28 
29 // Parses an infinity string.
30 // Valid strings are case insensitive and contain INF or INFINITY.
31 //
32 // - __first is the first argument to std::from_chars. When the string is invalid
33 //   this value is returned as ptr in the result.
34 // - __last is the last argument of std::from_chars.
35 // - __value is the value argument of std::from_chars,
36 // - __ptr is the current position is the input string. This is points beyond
37 //   the initial I character.
38 // - __negative whether a valid string represents -inf or +inf.
39 template <floating_point _Fp>
40 __from_chars_result<_Fp>
41 __from_chars_floating_point_inf(const char* const __first, const char* __last, const char* __ptr, bool __negative) {
42   if (__last - __ptr < 2) [[unlikely]]
43     return {_Fp{0}, 0, errc::invalid_argument};
44 
45   if (std::tolower(__ptr[0]) != 'n' || std::tolower(__ptr[1]) != 'f') [[unlikely]]
46     return {_Fp{0}, 0, errc::invalid_argument};
47 
48   __ptr += 2;
49 
50   // At this point the result is valid and contains INF.
51   // When the remaining part contains INITY this will be consumed. Otherwise
52   // only INF is consumed. For example INFINITZ will consume INF and ignore
53   // INITZ.
54 
55   if (__last - __ptr >= 5              //
56       && std::tolower(__ptr[0]) == 'i' //
57       && std::tolower(__ptr[1]) == 'n' //
58       && std::tolower(__ptr[2]) == 'i' //
59       && std::tolower(__ptr[3]) == 't' //
60       && std::tolower(__ptr[4]) == 'y')
61     __ptr += 5;
62 
63   if constexpr (numeric_limits<_Fp>::has_infinity) {
64     if (__negative)
65       return {-std::numeric_limits<_Fp>::infinity(), __ptr - __first, std::errc{}};
66 
67     return {std::numeric_limits<_Fp>::infinity(), __ptr - __first, std::errc{}};
68   } else {
69     return {_Fp{0}, __ptr - __first, errc::result_out_of_range};
70   }
71 }
72 
73 // Parses a nan string.
74 // Valid strings are case insensitive and contain INF or INFINITY.
75 //
76 // - __first is the first argument to std::from_chars. When the string is invalid
77 //   this value is returned as ptr in the result.
78 // - __last is the last argument of std::from_chars.
79 // - __value is the value argument of std::from_chars,
80 // - __ptr is the current position is the input string. This is points beyond
81 //   the initial N character.
82 // - __negative whether a valid string represents -nan or +nan.
83 template <floating_point _Fp>
84 __from_chars_result<_Fp>
85 __from_chars_floating_point_nan(const char* const __first, const char* __last, const char* __ptr, bool __negative) {
86   if (__last - __ptr < 2) [[unlikely]]
87     return {_Fp{0}, 0, errc::invalid_argument};
88 
89   if (std::tolower(__ptr[0]) != 'a' || std::tolower(__ptr[1]) != 'n') [[unlikely]]
90     return {_Fp{0}, 0, errc::invalid_argument};
91 
92   __ptr += 2;
93 
94   // At this point the result is valid and contains NAN. When the remaining
95   // part contains ( n-char-sequence_opt ) this will be consumed. Otherwise
96   // only NAN is consumed. For example NAN(abcd will consume NAN and ignore
97   // (abcd.
98   if (__last - __ptr >= 2 && __ptr[0] == '(') {
99     size_t __offset = 1;
100     do {
101       if (__ptr[__offset] == ')') {
102         __ptr += __offset + 1;
103         break;
104       }
105       if (__ptr[__offset] != '_' && !std::isalnum(__ptr[__offset]))
106         break;
107       ++__offset;
108     } while (__ptr + __offset != __last);
109   }
110 
111   if (__negative)
112     return {-std::numeric_limits<_Fp>::quiet_NaN(), __ptr - __first, std::errc{}};
113 
114   return {std::numeric_limits<_Fp>::quiet_NaN(), __ptr - __first, std::errc{}};
115 }
116 
117 template <class _Tp>
118 struct __fractional_constant_result {
119   size_t __offset{size_t(-1)};
120   _Tp __mantissa{0};
121   int __exponent{0};
122   bool __truncated{false};
123   bool __is_valid{false};
124 };
125 
126 // Parses the hex constant part of the hexadecimal floating-point value.
127 // - input start of buffer given to from_chars
128 // - __n the number of elements in the buffer
129 // - __offset where to start parsing. The input can have an optional sign, the
130 //   offset starts after this sign.
131 template <class _Tp>
132 __fractional_constant_result<_Tp> __parse_fractional_hex_constant(const char* __input, size_t __n, size_t __offset) {
133   __fractional_constant_result<_Tp> __result;
134 
135   const _Tp __mantissa_truncate_threshold = numeric_limits<_Tp>::max() / 16;
136   bool __fraction                         = false;
137   for (; __offset < __n; ++__offset) {
138     if (std::isxdigit(__input[__offset])) {
139       __result.__is_valid = true;
140 
141       uint32_t __digit = __input[__offset] - '0';
142       switch (std::tolower(__input[__offset])) {
143       case 'a':
144         __digit = 10;
145         break;
146       case 'b':
147         __digit = 11;
148         break;
149       case 'c':
150         __digit = 12;
151         break;
152       case 'd':
153         __digit = 13;
154         break;
155       case 'e':
156         __digit = 14;
157         break;
158       case 'f':
159         __digit = 15;
160         break;
161       }
162 
163       if (__result.__mantissa < __mantissa_truncate_threshold) {
164         __result.__mantissa = (__result.__mantissa * 16) + __digit;
165         if (__fraction)
166           __result.__exponent -= 4;
167       } else {
168         if (__digit > 0)
169           __result.__truncated = true;
170         if (!__fraction)
171           __result.__exponent += 4;
172       }
173     } else if (__input[__offset] == '.') {
174       if (__fraction)
175         break; // this means that __input[__offset] points to a second decimal point, ending the number.
176 
177       __fraction = true;
178     } else
179       break;
180   }
181 
182   __result.__offset = __offset;
183   return __result;
184 }
185 
186 struct __exponent_result {
187   size_t __offset{size_t(-1)};
188   int __value{0};
189   bool __present{false};
190 };
191 
192 // When the exponent is not present the result of the struct contains
193 // __offset, 0, false. This allows using the results unconditionally, the
194 // __present is important for the scientific notation, where the value is
195 // mandatory.
196 __exponent_result __parse_exponent(const char* __input, size_t __n, size_t __offset, char __marker) {
197   if (__offset + 1 < __n &&                          // an exponent always needs at least one digit.
198       std::tolower(__input[__offset]) == __marker && //
199       !std::isspace(__input[__offset + 1])           // leading whitespace is not allowed.
200   ) {
201     ++__offset;
202     LIBC_NAMESPACE::shared::StrToNumResult<int32_t> __e =
203         LIBC_NAMESPACE::shared::strtointeger<int32_t>(__input + __offset, 10, __n - __offset);
204     // __result.error contains the errno value, 0 or ERANGE these are not interesting.
205     // If the number of characters parsed is 0 it means there was no number.
206     if (__e.parsed_len != 0)
207       return {__offset + __e.parsed_len, __e.value, true};
208     else
209       --__offset; // the assumption of a valid exponent was not true, undo eating the exponent character.
210   }
211 
212   return {__offset, 0, false};
213 }
214 
215 // Here we do this operation as int64 to avoid overflow.
216 int32_t __merge_exponents(int64_t __fractional, int64_t __exponent, int __max_biased_exponent) {
217   int64_t __sum = __fractional + __exponent;
218 
219   if (__sum > __max_biased_exponent)
220     return __max_biased_exponent;
221 
222   if (__sum < -__max_biased_exponent)
223     return -__max_biased_exponent;
224 
225   return __sum;
226 }
227 
228 template <class _Fp, class _Tp>
229 __from_chars_result<_Fp>
230 __calculate_result(_Tp __mantissa, int __exponent, bool __negative, __from_chars_result<_Fp> __result) {
231   auto __r = LIBC_NAMESPACE::shared::FPBits<_Fp>();
232   __r.set_mantissa(__mantissa);
233   __r.set_biased_exponent(__exponent);
234 
235   // C17 7.12.1/6
236   // The result underflows if the magnitude of the mathematical result is so
237   // small that the mathematical result cannot be represented, without
238   // extraordinary roundoff error, in an object of the specified type.237) If
239   // the result underflows, the function returns an implementation-defined
240   // value whose magnitude is no greater than the smallest normalized positive
241   // number in the specified type; if the integer expression math_errhandling
242   // & MATH_ERRNO is nonzero, whether errno acquires the value ERANGE is
243   // implementation-defined; if the integer expression math_errhandling &
244   // MATH_ERREXCEPT is nonzero, whether the "underflow" floating-point
245   // exception is raised is implementation-defined.
246   //
247   // LLVM-LIBC sets ERAGNE for subnormal values
248   //
249   // [charconv.from.chars]/1
250   //   ... If the parsed value is not in the range representable by the type of
251   //   value, value is unmodified and the member ec of the return value is
252   //   equal to errc::result_out_of_range. ...
253   //
254   // Undo the ERANGE for subnormal values.
255   if (__result.__ec == errc::result_out_of_range && __r.is_subnormal() && !__r.is_zero())
256     __result.__ec = errc{};
257 
258   if (__negative)
259     __result.__value = -__r.get_val();
260   else
261     __result.__value = __r.get_val();
262 
263   return __result;
264 }
265 
266 // Implements from_chars for decimal floating-point values.
267 // __first forwarded from from_chars
268 // __last forwarded from from_chars
269 // __value forwarded from from_chars
270 // __fmt forwarded from from_chars
271 // __ptr the start of the buffer to parse. This is after the optional sign character.
272 // __negative should __value be set to a negative value?
273 //
274 // This function and __from_chars_floating_point_decimal are similar. However
275 // the similar parts are all in helper functions. So the amount of code
276 // duplication is minimal.
277 template <floating_point _Fp>
278 __from_chars_result<_Fp>
279 __from_chars_floating_point_hex(const char* const __first, const char* __last, const char* __ptr, bool __negative) {
280   size_t __n         = __last - __first;
281   ptrdiff_t __offset = __ptr - __first;
282 
283   auto __fractional =
284       std::__parse_fractional_hex_constant<typename _Floating_type_traits<_Fp>::_Uint_type>(__first, __n, __offset);
285   if (!__fractional.__is_valid)
286     return {_Fp{0}, 0, errc::invalid_argument};
287 
288   auto __parsed_exponent = std::__parse_exponent(__first, __n, __fractional.__offset, 'p');
289   __offset               = __parsed_exponent.__offset;
290   int __exponent         = std::__merge_exponents(
291       __fractional.__exponent, __parsed_exponent.__value, LIBC_NAMESPACE::shared::FPBits<_Fp>::MAX_BIASED_EXPONENT);
292 
293   __from_chars_result<_Fp> __result{_Fp{0}, __offset, {}};
294   LIBC_NAMESPACE::shared::ExpandedFloat<_Fp> __expanded_float = {0, 0};
295   if (__fractional.__mantissa != 0) {
296     auto __temp = LIBC_NAMESPACE::shared::binary_exp_to_float<_Fp>(
297         {__fractional.__mantissa, __exponent},
298         __fractional.__truncated,
299         LIBC_NAMESPACE::shared::RoundDirection::Nearest);
300     __expanded_float = __temp.num;
301     if (__temp.error == ERANGE) {
302       __result.__ec = errc::result_out_of_range;
303     }
304   }
305 
306   return std::__calculate_result<_Fp>(__expanded_float.mantissa, __expanded_float.exponent, __negative, __result);
307 }
308 
309 // Parses the hex constant part of the decimal float value.
310 // - input start of buffer given to from_chars
311 // - __n the number of elements in the buffer
312 // - __offset where to start parsing. The input can have an optional sign, the
313 //   offset starts after this sign.
314 template <class _Tp>
315 __fractional_constant_result<_Tp>
316 __parse_fractional_decimal_constant(const char* __input, ptrdiff_t __n, ptrdiff_t __offset) {
317   __fractional_constant_result<_Tp> __result;
318 
319   const _Tp __mantissa_truncate_threshold = numeric_limits<_Tp>::max() / 10;
320   bool __fraction                         = false;
321   for (; __offset < __n; ++__offset) {
322     if (std::isdigit(__input[__offset])) {
323       __result.__is_valid = true;
324 
325       uint32_t __digit = __input[__offset] - '0';
326       if (__result.__mantissa < __mantissa_truncate_threshold) {
327         __result.__mantissa = (__result.__mantissa * 10) + __digit;
328         if (__fraction)
329           --__result.__exponent;
330       } else {
331         if (__digit > 0)
332           __result.__truncated = true;
333         if (!__fraction)
334           ++__result.__exponent;
335       }
336     } else if (__input[__offset] == '.') {
337       if (__fraction)
338         break; // this means that __input[__offset] points to a second decimal point, ending the number.
339 
340       __fraction = true;
341     } else
342       break;
343   }
344 
345   __result.__offset = __offset;
346   return __result;
347 }
348 
349 // Implements from_chars for decimal floating-point values.
350 // __first forwarded from from_chars
351 // __last forwarded from from_chars
352 // __value forwarded from from_chars
353 // __fmt forwarded from from_chars
354 // __ptr the start of the buffer to parse. This is after the optional sign character.
355 // __negative should __value be set to a negative value?
356 template <floating_point _Fp>
357 __from_chars_result<_Fp> __from_chars_floating_point_decimal(
358     const char* const __first, const char* __last, chars_format __fmt, const char* __ptr, bool __negative) {
359   ptrdiff_t __n      = __last - __first;
360   ptrdiff_t __offset = __ptr - __first;
361 
362   auto __fractional =
363       std::__parse_fractional_decimal_constant<typename _Floating_type_traits<_Fp>::_Uint_type>(__first, __n, __offset);
364   if (!__fractional.__is_valid)
365     return {_Fp{0}, 0, errc::invalid_argument};
366 
367   __offset = __fractional.__offset;
368 
369   // LWG3456 Pattern used by std::from_chars is underspecified
370   // This changes fixed to ignore a possible exponent instead of making its
371   // existance an error.
372   int __exponent;
373   if (__fmt == chars_format::fixed) {
374     __exponent =
375         std::__merge_exponents(__fractional.__exponent, 0, LIBC_NAMESPACE::shared::FPBits<_Fp>::MAX_BIASED_EXPONENT);
376   } else {
377     auto __parsed_exponent = std::__parse_exponent(__first, __n, __offset, 'e');
378     if (__fmt == chars_format::scientific && !__parsed_exponent.__present) {
379       // [charconv.from.chars]/6.2 if fmt has chars_format::scientific set but not chars_format::fixed,
380       // the otherwise optional exponent part shall appear;
381       return {_Fp{0}, 0, errc::invalid_argument};
382     }
383 
384     __offset   = __parsed_exponent.__offset;
385     __exponent = std::__merge_exponents(
386         __fractional.__exponent, __parsed_exponent.__value, LIBC_NAMESPACE::shared::FPBits<_Fp>::MAX_BIASED_EXPONENT);
387   }
388 
389   __from_chars_result<_Fp> __result{_Fp{0}, __offset, {}};
390   LIBC_NAMESPACE::shared::ExpandedFloat<_Fp> __expanded_float = {0, 0};
391   if (__fractional.__mantissa != 0) {
392     // This function expects to parse a positive value. This means it does not
393     // take a __first, __n as arguments, since __first points to '-' for
394     // negative values.
395     auto __temp = LIBC_NAMESPACE::shared::decimal_exp_to_float<_Fp>(
396         {__fractional.__mantissa, __exponent},
397         __fractional.__truncated,
398         LIBC_NAMESPACE::shared::RoundDirection::Nearest,
399         __ptr,
400         __last - __ptr);
401     __expanded_float = __temp.num;
402     if (__temp.error == ERANGE) {
403       __result.__ec = errc::result_out_of_range;
404     }
405   }
406 
407   return std::__calculate_result(__expanded_float.mantissa, __expanded_float.exponent, __negative, __result);
408 }
409 
410 template <floating_point _Fp>
411 __from_chars_result<_Fp>
412 __from_chars_floating_point_impl(const char* const __first, const char* __last, chars_format __fmt) {
413   if (__first == __last) [[unlikely]]
414     return {_Fp{0}, 0, errc::invalid_argument};
415 
416   const char* __ptr = __first;
417   bool __negative   = *__ptr == '-';
418   if (__negative) {
419     ++__ptr;
420     if (__ptr == __last) [[unlikely]]
421       return {_Fp{0}, 0, errc::invalid_argument};
422   }
423 
424   // [charconv.from.chars]
425   //   [Note 1: If the pattern allows for an optional sign, but the string has
426   //   no digit characters following the sign, no characters match the pattern.
427   //   -- end note]
428   // This is true for integrals, floating point allows -.0
429 
430   // [charconv.from.chars]/6.2
431   //   if fmt has chars_format::scientific set but not chars_format::fixed, the
432   //   otherwise optional exponent part shall appear;
433   // Since INF/NAN do not have an exponent this value is not valid.
434   //
435   // LWG3456 Pattern used by std::from_chars is underspecified
436   // Does not address this point, but proposed option B does solve this issue,
437   // Both MSVC STL and libstdc++ implement this this behaviour.
438   switch (std::tolower(*__ptr)) {
439   case 'i':
440     return std::__from_chars_floating_point_inf<_Fp>(__first, __last, __ptr + 1, __negative);
441   case 'n':
442     if constexpr (numeric_limits<_Fp>::has_quiet_NaN)
443       // NOTE: The pointer passed here will be parsed in the default C locale.
444       // This is standard behavior (see https://eel.is/c++draft/charconv.from.chars), but may be unexpected.
445       return std::__from_chars_floating_point_nan<_Fp>(__first, __last, __ptr + 1, __negative);
446     return {_Fp{0}, 0, errc::invalid_argument};
447   }
448 
449   if (__fmt == chars_format::hex)
450     return std::__from_chars_floating_point_hex<_Fp>(__first, __last, __ptr, __negative);
451 
452   return std::__from_chars_floating_point_decimal<_Fp>(__first, __last, __fmt, __ptr, __negative);
453 }
454 
455 _LIBCPP_END_NAMESPACE_STD
456 
457 #endif //_LIBCPP_SRC_INCLUDE_FROM_CHARS_FLOATING_POINT_H
458