1 //===----------------------------------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #ifndef _LIBCPP_SRC_INCLUDE_FROM_CHARS_FLOATING_POINT_H 10 #define _LIBCPP_SRC_INCLUDE_FROM_CHARS_FLOATING_POINT_H 11 12 // These headers are in the shared LLVM-libc header library. 13 #include "shared/fp_bits.h" 14 #include "shared/str_to_float.h" 15 #include "shared/str_to_integer.h" 16 17 #include <__assert> 18 #include <__config> 19 #include <cctype> 20 #include <charconv> 21 #include <concepts> 22 #include <limits> 23 24 // Included for the _Floating_type_traits class 25 #include "to_chars_floating_point.h" 26 27 _LIBCPP_BEGIN_NAMESPACE_STD 28 29 // Parses an infinity string. 30 // Valid strings are case insensitive and contain INF or INFINITY. 31 // 32 // - __first is the first argument to std::from_chars. When the string is invalid 33 // this value is returned as ptr in the result. 34 // - __last is the last argument of std::from_chars. 35 // - __value is the value argument of std::from_chars, 36 // - __ptr is the current position is the input string. This is points beyond 37 // the initial I character. 38 // - __negative whether a valid string represents -inf or +inf. 39 template <floating_point _Fp> 40 __from_chars_result<_Fp> 41 __from_chars_floating_point_inf(const char* const __first, const char* __last, const char* __ptr, bool __negative) { 42 if (__last - __ptr < 2) [[unlikely]] 43 return {_Fp{0}, 0, errc::invalid_argument}; 44 45 if (std::tolower(__ptr[0]) != 'n' || std::tolower(__ptr[1]) != 'f') [[unlikely]] 46 return {_Fp{0}, 0, errc::invalid_argument}; 47 48 __ptr += 2; 49 50 // At this point the result is valid and contains INF. 51 // When the remaining part contains INITY this will be consumed. Otherwise 52 // only INF is consumed. For example INFINITZ will consume INF and ignore 53 // INITZ. 54 55 if (__last - __ptr >= 5 // 56 && std::tolower(__ptr[0]) == 'i' // 57 && std::tolower(__ptr[1]) == 'n' // 58 && std::tolower(__ptr[2]) == 'i' // 59 && std::tolower(__ptr[3]) == 't' // 60 && std::tolower(__ptr[4]) == 'y') 61 __ptr += 5; 62 63 if constexpr (numeric_limits<_Fp>::has_infinity) { 64 if (__negative) 65 return {-std::numeric_limits<_Fp>::infinity(), __ptr - __first, std::errc{}}; 66 67 return {std::numeric_limits<_Fp>::infinity(), __ptr - __first, std::errc{}}; 68 } else { 69 return {_Fp{0}, __ptr - __first, errc::result_out_of_range}; 70 } 71 } 72 73 // Parses a nan string. 74 // Valid strings are case insensitive and contain INF or INFINITY. 75 // 76 // - __first is the first argument to std::from_chars. When the string is invalid 77 // this value is returned as ptr in the result. 78 // - __last is the last argument of std::from_chars. 79 // - __value is the value argument of std::from_chars, 80 // - __ptr is the current position is the input string. This is points beyond 81 // the initial N character. 82 // - __negative whether a valid string represents -nan or +nan. 83 template <floating_point _Fp> 84 __from_chars_result<_Fp> 85 __from_chars_floating_point_nan(const char* const __first, const char* __last, const char* __ptr, bool __negative) { 86 if (__last - __ptr < 2) [[unlikely]] 87 return {_Fp{0}, 0, errc::invalid_argument}; 88 89 if (std::tolower(__ptr[0]) != 'a' || std::tolower(__ptr[1]) != 'n') [[unlikely]] 90 return {_Fp{0}, 0, errc::invalid_argument}; 91 92 __ptr += 2; 93 94 // At this point the result is valid and contains NAN. When the remaining 95 // part contains ( n-char-sequence_opt ) this will be consumed. Otherwise 96 // only NAN is consumed. For example NAN(abcd will consume NAN and ignore 97 // (abcd. 98 if (__last - __ptr >= 2 && __ptr[0] == '(') { 99 size_t __offset = 1; 100 do { 101 if (__ptr[__offset] == ')') { 102 __ptr += __offset + 1; 103 break; 104 } 105 if (__ptr[__offset] != '_' && !std::isalnum(__ptr[__offset])) 106 break; 107 ++__offset; 108 } while (__ptr + __offset != __last); 109 } 110 111 if (__negative) 112 return {-std::numeric_limits<_Fp>::quiet_NaN(), __ptr - __first, std::errc{}}; 113 114 return {std::numeric_limits<_Fp>::quiet_NaN(), __ptr - __first, std::errc{}}; 115 } 116 117 template <class _Tp> 118 struct __fractional_constant_result { 119 size_t __offset{size_t(-1)}; 120 _Tp __mantissa{0}; 121 int __exponent{0}; 122 bool __truncated{false}; 123 bool __is_valid{false}; 124 }; 125 126 // Parses the hex constant part of the hexadecimal floating-point value. 127 // - input start of buffer given to from_chars 128 // - __n the number of elements in the buffer 129 // - __offset where to start parsing. The input can have an optional sign, the 130 // offset starts after this sign. 131 template <class _Tp> 132 __fractional_constant_result<_Tp> __parse_fractional_hex_constant(const char* __input, size_t __n, size_t __offset) { 133 __fractional_constant_result<_Tp> __result; 134 135 const _Tp __mantissa_truncate_threshold = numeric_limits<_Tp>::max() / 16; 136 bool __fraction = false; 137 for (; __offset < __n; ++__offset) { 138 if (std::isxdigit(__input[__offset])) { 139 __result.__is_valid = true; 140 141 uint32_t __digit = __input[__offset] - '0'; 142 switch (std::tolower(__input[__offset])) { 143 case 'a': 144 __digit = 10; 145 break; 146 case 'b': 147 __digit = 11; 148 break; 149 case 'c': 150 __digit = 12; 151 break; 152 case 'd': 153 __digit = 13; 154 break; 155 case 'e': 156 __digit = 14; 157 break; 158 case 'f': 159 __digit = 15; 160 break; 161 } 162 163 if (__result.__mantissa < __mantissa_truncate_threshold) { 164 __result.__mantissa = (__result.__mantissa * 16) + __digit; 165 if (__fraction) 166 __result.__exponent -= 4; 167 } else { 168 if (__digit > 0) 169 __result.__truncated = true; 170 if (!__fraction) 171 __result.__exponent += 4; 172 } 173 } else if (__input[__offset] == '.') { 174 if (__fraction) 175 break; // this means that __input[__offset] points to a second decimal point, ending the number. 176 177 __fraction = true; 178 } else 179 break; 180 } 181 182 __result.__offset = __offset; 183 return __result; 184 } 185 186 struct __exponent_result { 187 size_t __offset{size_t(-1)}; 188 int __value{0}; 189 bool __present{false}; 190 }; 191 192 // When the exponent is not present the result of the struct contains 193 // __offset, 0, false. This allows using the results unconditionally, the 194 // __present is important for the scientific notation, where the value is 195 // mandatory. 196 __exponent_result __parse_exponent(const char* __input, size_t __n, size_t __offset, char __marker) { 197 if (__offset + 1 < __n && // an exponent always needs at least one digit. 198 std::tolower(__input[__offset]) == __marker && // 199 !std::isspace(__input[__offset + 1]) // leading whitespace is not allowed. 200 ) { 201 ++__offset; 202 LIBC_NAMESPACE::shared::StrToNumResult<int32_t> __e = 203 LIBC_NAMESPACE::shared::strtointeger<int32_t>(__input + __offset, 10, __n - __offset); 204 // __result.error contains the errno value, 0 or ERANGE these are not interesting. 205 // If the number of characters parsed is 0 it means there was no number. 206 if (__e.parsed_len != 0) 207 return {__offset + __e.parsed_len, __e.value, true}; 208 else 209 --__offset; // the assumption of a valid exponent was not true, undo eating the exponent character. 210 } 211 212 return {__offset, 0, false}; 213 } 214 215 // Here we do this operation as int64 to avoid overflow. 216 int32_t __merge_exponents(int64_t __fractional, int64_t __exponent, int __max_biased_exponent) { 217 int64_t __sum = __fractional + __exponent; 218 219 if (__sum > __max_biased_exponent) 220 return __max_biased_exponent; 221 222 if (__sum < -__max_biased_exponent) 223 return -__max_biased_exponent; 224 225 return __sum; 226 } 227 228 template <class _Fp, class _Tp> 229 __from_chars_result<_Fp> 230 __calculate_result(_Tp __mantissa, int __exponent, bool __negative, __from_chars_result<_Fp> __result) { 231 auto __r = LIBC_NAMESPACE::shared::FPBits<_Fp>(); 232 __r.set_mantissa(__mantissa); 233 __r.set_biased_exponent(__exponent); 234 235 // C17 7.12.1/6 236 // The result underflows if the magnitude of the mathematical result is so 237 // small that the mathematical result cannot be represented, without 238 // extraordinary roundoff error, in an object of the specified type.237) If 239 // the result underflows, the function returns an implementation-defined 240 // value whose magnitude is no greater than the smallest normalized positive 241 // number in the specified type; if the integer expression math_errhandling 242 // & MATH_ERRNO is nonzero, whether errno acquires the value ERANGE is 243 // implementation-defined; if the integer expression math_errhandling & 244 // MATH_ERREXCEPT is nonzero, whether the "underflow" floating-point 245 // exception is raised is implementation-defined. 246 // 247 // LLVM-LIBC sets ERAGNE for subnormal values 248 // 249 // [charconv.from.chars]/1 250 // ... If the parsed value is not in the range representable by the type of 251 // value, value is unmodified and the member ec of the return value is 252 // equal to errc::result_out_of_range. ... 253 // 254 // Undo the ERANGE for subnormal values. 255 if (__result.__ec == errc::result_out_of_range && __r.is_subnormal() && !__r.is_zero()) 256 __result.__ec = errc{}; 257 258 if (__negative) 259 __result.__value = -__r.get_val(); 260 else 261 __result.__value = __r.get_val(); 262 263 return __result; 264 } 265 266 // Implements from_chars for decimal floating-point values. 267 // __first forwarded from from_chars 268 // __last forwarded from from_chars 269 // __value forwarded from from_chars 270 // __fmt forwarded from from_chars 271 // __ptr the start of the buffer to parse. This is after the optional sign character. 272 // __negative should __value be set to a negative value? 273 // 274 // This function and __from_chars_floating_point_decimal are similar. However 275 // the similar parts are all in helper functions. So the amount of code 276 // duplication is minimal. 277 template <floating_point _Fp> 278 __from_chars_result<_Fp> 279 __from_chars_floating_point_hex(const char* const __first, const char* __last, const char* __ptr, bool __negative) { 280 size_t __n = __last - __first; 281 ptrdiff_t __offset = __ptr - __first; 282 283 auto __fractional = 284 std::__parse_fractional_hex_constant<typename _Floating_type_traits<_Fp>::_Uint_type>(__first, __n, __offset); 285 if (!__fractional.__is_valid) 286 return {_Fp{0}, 0, errc::invalid_argument}; 287 288 auto __parsed_exponent = std::__parse_exponent(__first, __n, __fractional.__offset, 'p'); 289 __offset = __parsed_exponent.__offset; 290 int __exponent = std::__merge_exponents( 291 __fractional.__exponent, __parsed_exponent.__value, LIBC_NAMESPACE::shared::FPBits<_Fp>::MAX_BIASED_EXPONENT); 292 293 __from_chars_result<_Fp> __result{_Fp{0}, __offset, {}}; 294 LIBC_NAMESPACE::shared::ExpandedFloat<_Fp> __expanded_float = {0, 0}; 295 if (__fractional.__mantissa != 0) { 296 auto __temp = LIBC_NAMESPACE::shared::binary_exp_to_float<_Fp>( 297 {__fractional.__mantissa, __exponent}, 298 __fractional.__truncated, 299 LIBC_NAMESPACE::shared::RoundDirection::Nearest); 300 __expanded_float = __temp.num; 301 if (__temp.error == ERANGE) { 302 __result.__ec = errc::result_out_of_range; 303 } 304 } 305 306 return std::__calculate_result<_Fp>(__expanded_float.mantissa, __expanded_float.exponent, __negative, __result); 307 } 308 309 // Parses the hex constant part of the decimal float value. 310 // - input start of buffer given to from_chars 311 // - __n the number of elements in the buffer 312 // - __offset where to start parsing. The input can have an optional sign, the 313 // offset starts after this sign. 314 template <class _Tp> 315 __fractional_constant_result<_Tp> 316 __parse_fractional_decimal_constant(const char* __input, ptrdiff_t __n, ptrdiff_t __offset) { 317 __fractional_constant_result<_Tp> __result; 318 319 const _Tp __mantissa_truncate_threshold = numeric_limits<_Tp>::max() / 10; 320 bool __fraction = false; 321 for (; __offset < __n; ++__offset) { 322 if (std::isdigit(__input[__offset])) { 323 __result.__is_valid = true; 324 325 uint32_t __digit = __input[__offset] - '0'; 326 if (__result.__mantissa < __mantissa_truncate_threshold) { 327 __result.__mantissa = (__result.__mantissa * 10) + __digit; 328 if (__fraction) 329 --__result.__exponent; 330 } else { 331 if (__digit > 0) 332 __result.__truncated = true; 333 if (!__fraction) 334 ++__result.__exponent; 335 } 336 } else if (__input[__offset] == '.') { 337 if (__fraction) 338 break; // this means that __input[__offset] points to a second decimal point, ending the number. 339 340 __fraction = true; 341 } else 342 break; 343 } 344 345 __result.__offset = __offset; 346 return __result; 347 } 348 349 // Implements from_chars for decimal floating-point values. 350 // __first forwarded from from_chars 351 // __last forwarded from from_chars 352 // __value forwarded from from_chars 353 // __fmt forwarded from from_chars 354 // __ptr the start of the buffer to parse. This is after the optional sign character. 355 // __negative should __value be set to a negative value? 356 template <floating_point _Fp> 357 __from_chars_result<_Fp> __from_chars_floating_point_decimal( 358 const char* const __first, const char* __last, chars_format __fmt, const char* __ptr, bool __negative) { 359 ptrdiff_t __n = __last - __first; 360 ptrdiff_t __offset = __ptr - __first; 361 362 auto __fractional = 363 std::__parse_fractional_decimal_constant<typename _Floating_type_traits<_Fp>::_Uint_type>(__first, __n, __offset); 364 if (!__fractional.__is_valid) 365 return {_Fp{0}, 0, errc::invalid_argument}; 366 367 __offset = __fractional.__offset; 368 369 // LWG3456 Pattern used by std::from_chars is underspecified 370 // This changes fixed to ignore a possible exponent instead of making its 371 // existance an error. 372 int __exponent; 373 if (__fmt == chars_format::fixed) { 374 __exponent = 375 std::__merge_exponents(__fractional.__exponent, 0, LIBC_NAMESPACE::shared::FPBits<_Fp>::MAX_BIASED_EXPONENT); 376 } else { 377 auto __parsed_exponent = std::__parse_exponent(__first, __n, __offset, 'e'); 378 if (__fmt == chars_format::scientific && !__parsed_exponent.__present) { 379 // [charconv.from.chars]/6.2 if fmt has chars_format::scientific set but not chars_format::fixed, 380 // the otherwise optional exponent part shall appear; 381 return {_Fp{0}, 0, errc::invalid_argument}; 382 } 383 384 __offset = __parsed_exponent.__offset; 385 __exponent = std::__merge_exponents( 386 __fractional.__exponent, __parsed_exponent.__value, LIBC_NAMESPACE::shared::FPBits<_Fp>::MAX_BIASED_EXPONENT); 387 } 388 389 __from_chars_result<_Fp> __result{_Fp{0}, __offset, {}}; 390 LIBC_NAMESPACE::shared::ExpandedFloat<_Fp> __expanded_float = {0, 0}; 391 if (__fractional.__mantissa != 0) { 392 // This function expects to parse a positive value. This means it does not 393 // take a __first, __n as arguments, since __first points to '-' for 394 // negative values. 395 auto __temp = LIBC_NAMESPACE::shared::decimal_exp_to_float<_Fp>( 396 {__fractional.__mantissa, __exponent}, 397 __fractional.__truncated, 398 LIBC_NAMESPACE::shared::RoundDirection::Nearest, 399 __ptr, 400 __last - __ptr); 401 __expanded_float = __temp.num; 402 if (__temp.error == ERANGE) { 403 __result.__ec = errc::result_out_of_range; 404 } 405 } 406 407 return std::__calculate_result(__expanded_float.mantissa, __expanded_float.exponent, __negative, __result); 408 } 409 410 template <floating_point _Fp> 411 __from_chars_result<_Fp> 412 __from_chars_floating_point_impl(const char* const __first, const char* __last, chars_format __fmt) { 413 if (__first == __last) [[unlikely]] 414 return {_Fp{0}, 0, errc::invalid_argument}; 415 416 const char* __ptr = __first; 417 bool __negative = *__ptr == '-'; 418 if (__negative) { 419 ++__ptr; 420 if (__ptr == __last) [[unlikely]] 421 return {_Fp{0}, 0, errc::invalid_argument}; 422 } 423 424 // [charconv.from.chars] 425 // [Note 1: If the pattern allows for an optional sign, but the string has 426 // no digit characters following the sign, no characters match the pattern. 427 // -- end note] 428 // This is true for integrals, floating point allows -.0 429 430 // [charconv.from.chars]/6.2 431 // if fmt has chars_format::scientific set but not chars_format::fixed, the 432 // otherwise optional exponent part shall appear; 433 // Since INF/NAN do not have an exponent this value is not valid. 434 // 435 // LWG3456 Pattern used by std::from_chars is underspecified 436 // Does not address this point, but proposed option B does solve this issue, 437 // Both MSVC STL and libstdc++ implement this this behaviour. 438 switch (std::tolower(*__ptr)) { 439 case 'i': 440 return std::__from_chars_floating_point_inf<_Fp>(__first, __last, __ptr + 1, __negative); 441 case 'n': 442 if constexpr (numeric_limits<_Fp>::has_quiet_NaN) 443 // NOTE: The pointer passed here will be parsed in the default C locale. 444 // This is standard behavior (see https://eel.is/c++draft/charconv.from.chars), but may be unexpected. 445 return std::__from_chars_floating_point_nan<_Fp>(__first, __last, __ptr + 1, __negative); 446 return {_Fp{0}, 0, errc::invalid_argument}; 447 } 448 449 if (__fmt == chars_format::hex) 450 return std::__from_chars_floating_point_hex<_Fp>(__first, __last, __ptr, __negative); 451 452 return std::__from_chars_floating_point_decimal<_Fp>(__first, __last, __fmt, __ptr, __negative); 453 } 454 455 _LIBCPP_END_NAMESPACE_STD 456 457 #endif //_LIBCPP_SRC_INCLUDE_FROM_CHARS_FLOATING_POINT_H 458