1 //===-- Int type specifier converters for scanf -----------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "src/stdio/scanf_core/float_converter.h" 10 11 #include "src/__support/CPP/limits.h" 12 #include "src/__support/char_vector.h" 13 #include "src/__support/ctype_utils.h" 14 #include "src/__support/macros/config.h" 15 #include "src/stdio/scanf_core/converter_utils.h" 16 #include "src/stdio/scanf_core/core_structs.h" 17 #include "src/stdio/scanf_core/reader.h" 18 19 #include <stddef.h> 20 21 namespace LIBC_NAMESPACE_DECL { 22 namespace scanf_core { 23 24 // All of the floating point conversions are the same for scanf, every name will 25 // accept every style. 26 int convert_float(Reader *reader, const FormatSection &to_conv) { 27 // %a/A/e/E/f/F/g/G "Matches an optionally signed floating-point number, 28 // infinity, or NaN, whose format is the same as expected for the subject 29 // sequence of the strtod function. The corresponding argument shall be a 30 // pointer to floating." 31 32 CharVector out_str = CharVector(); 33 bool is_number = false; 34 35 size_t max_width = cpp::numeric_limits<size_t>::max(); 36 if (to_conv.max_width > 0) { 37 max_width = to_conv.max_width; 38 } 39 40 char cur_char = reader->getc(); 41 // Handle the sign. 42 if (cur_char == '+' || cur_char == '-') { 43 if (!out_str.append(cur_char)) { 44 return ALLOCATION_FAILURE; 45 } 46 if (out_str.length() == max_width) { 47 return MATCHING_FAILURE; 48 } else { 49 cur_char = reader->getc(); 50 } 51 } 52 53 static constexpr char DECIMAL_POINT = '.'; 54 static const char inf_string[] = "infinity"; 55 56 // Handle inf 57 58 if (internal::tolower(cur_char) == inf_string[0]) { 59 size_t inf_index = 0; 60 61 for (; 62 inf_index < (sizeof(inf_string) - 1) && out_str.length() < max_width && 63 internal::tolower(cur_char) == inf_string[inf_index]; 64 ++inf_index) { 65 if (!out_str.append(cur_char)) { 66 return ALLOCATION_FAILURE; 67 } 68 cur_char = reader->getc(); 69 } 70 71 if (inf_index == 3 || inf_index == sizeof(inf_string) - 1) { 72 write_float_with_length(out_str.c_str(), to_conv); 73 return READ_OK; 74 } else { 75 return MATCHING_FAILURE; 76 } 77 } 78 79 static const char nan_string[] = "nan"; 80 81 // Handle nan 82 if (internal::tolower(cur_char) == nan_string[0]) { 83 size_t nan_index = 0; 84 85 for (; 86 nan_index < (sizeof(nan_string) - 1) && out_str.length() < max_width && 87 internal::tolower(cur_char) == nan_string[nan_index]; 88 ++nan_index) { 89 if (!out_str.append(cur_char)) { 90 return ALLOCATION_FAILURE; 91 } 92 cur_char = reader->getc(); 93 } 94 95 if (nan_index == sizeof(nan_string) - 1) { 96 write_float_with_length(out_str.c_str(), to_conv); 97 return READ_OK; 98 } else { 99 return MATCHING_FAILURE; 100 } 101 } 102 103 // Assume base of 10 by default but check if it is actually base 16. 104 int base = 10; 105 106 // If the string starts with 0 it might be in hex. 107 if (cur_char == '0') { 108 is_number = true; 109 // Read the next character to check. 110 if (!out_str.append(cur_char)) { 111 return ALLOCATION_FAILURE; 112 } 113 // If we've hit the end, then this is "0", which is valid. 114 if (out_str.length() == max_width) { 115 write_float_with_length(out_str.c_str(), to_conv); 116 return READ_OK; 117 } else { 118 cur_char = reader->getc(); 119 } 120 121 // If that next character is an 'x' then this is a hexadecimal number. 122 if (internal::tolower(cur_char) == 'x') { 123 base = 16; 124 125 if (!out_str.append(cur_char)) { 126 return ALLOCATION_FAILURE; 127 } 128 // If we've hit the end here, we have "0x" which is a valid prefix to a 129 // floating point number, and will be evaluated to 0. 130 if (out_str.length() == max_width) { 131 write_float_with_length(out_str.c_str(), to_conv); 132 return READ_OK; 133 } else { 134 cur_char = reader->getc(); 135 } 136 } 137 } 138 139 const char exponent_mark = ((base == 10) ? 'e' : 'p'); 140 bool after_decimal = false; 141 142 // The format for the remaining characters at this point is DD.DDe+/-DD for 143 // base 10 and XX.XXp+/-DD for base 16 144 145 // This handles the digits before and after the decimal point, but not the 146 // exponent. 147 while (out_str.length() < max_width) { 148 if (internal::isalnum(cur_char) && 149 internal::b36_char_to_int(cur_char) < base) { 150 is_number = true; 151 if (!out_str.append(cur_char)) { 152 return ALLOCATION_FAILURE; 153 } 154 cur_char = reader->getc(); 155 } else if (cur_char == DECIMAL_POINT && !after_decimal) { 156 after_decimal = true; 157 if (!out_str.append(cur_char)) { 158 return ALLOCATION_FAILURE; 159 } 160 cur_char = reader->getc(); 161 } else { 162 break; 163 } 164 } 165 166 // Handle the exponent, which has an exponent mark, an optional sign, and 167 // decimal digits. 168 if (internal::tolower(cur_char) == exponent_mark) { 169 if (!out_str.append(cur_char)) { 170 return ALLOCATION_FAILURE; 171 } 172 if (out_str.length() == max_width) { 173 // This is laid out in the standard as being a matching error (100e is not 174 // a valid float) but may conflict with existing implementations. 175 return MATCHING_FAILURE; 176 } else { 177 cur_char = reader->getc(); 178 } 179 180 if (cur_char == '+' || cur_char == '-') { 181 if (!out_str.append(cur_char)) { 182 return ALLOCATION_FAILURE; 183 } 184 if (out_str.length() == max_width) { 185 return MATCHING_FAILURE; 186 } else { 187 cur_char = reader->getc(); 188 } 189 } 190 191 // It is specified by the standard that "100er" is a matching failure since 192 // the longest prefix of a possibly valid floating-point number (which is 193 // "100e") is not a valid floating-point number. If there is an exponent 194 // mark then there must be a digit after it else the number is not valid. 195 // Some implementations will roll back two characters (to just "100") and 196 // accept that since the prefix is not valid, and some will interpret an 197 // exponent mark followed by no digits as an additional exponent of 0 198 // (accepting "100e" and returning 100.0). Both of these behaviors are wrong 199 // by the standard, but they may be used in real code, see Hyrum's law. This 200 // code follows the standard, but may be incompatible due to code expecting 201 // these bugs. 202 if (!internal::isdigit(cur_char)) { 203 return MATCHING_FAILURE; 204 } 205 206 while (internal::isdigit(cur_char) && out_str.length() < max_width) { 207 if (!out_str.append(cur_char)) { 208 return ALLOCATION_FAILURE; 209 } 210 cur_char = reader->getc(); 211 } 212 } 213 214 // We always read one more character than will be used, so we have to put the 215 // last one back. 216 reader->ungetc(cur_char); 217 218 // If we haven't actually found any digits, this is a matching failure (this 219 // catches cases like "+.") 220 if (!is_number) { 221 return MATCHING_FAILURE; 222 } 223 write_float_with_length(out_str.c_str(), to_conv); 224 225 return READ_OK; 226 } 227 228 } // namespace scanf_core 229 } // namespace LIBC_NAMESPACE_DECL 230