1 //===-- runtime/edit-input.cpp --------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "edit-input.h" 10 #include "namelist.h" 11 #include "utf.h" 12 #include "flang/Common/real.h" 13 #include "flang/Common/uint128.h" 14 #include <algorithm> 15 #include <cfenv> 16 17 namespace Fortran::runtime::io { 18 19 // Checks that a list-directed input value has been entirely consumed and 20 // doesn't contain unparsed characters before the next value separator. 21 static inline bool IsCharValueSeparator(const DataEdit &edit, char32_t ch) { 22 char32_t comma{ 23 edit.modes.editingFlags & decimalComma ? char32_t{';'} : char32_t{','}}; 24 return ch == ' ' || ch == '\t' || ch == comma || ch == '/' || 25 (edit.IsNamelist() && (ch == '&' || ch == '$')); 26 } 27 28 static bool CheckCompleteListDirectedField( 29 IoStatementState &io, const DataEdit &edit) { 30 if (edit.IsListDirected()) { 31 std::size_t byteCount; 32 if (auto ch{io.GetCurrentChar(byteCount)}) { 33 if (IsCharValueSeparator(edit, *ch)) { 34 return true; 35 } else { 36 const auto &connection{io.GetConnectionState()}; 37 io.GetIoErrorHandler().SignalError(IostatBadListDirectedInputSeparator, 38 "invalid character (0x%x) after list-directed input value, " 39 "at column %d in record %d", 40 static_cast<unsigned>(*ch), 41 static_cast<int>(connection.positionInRecord + 1), 42 static_cast<int>(connection.currentRecordNumber)); 43 return false; 44 } 45 } else { 46 return true; // end of record: ok 47 } 48 } else { 49 return true; 50 } 51 } 52 53 template <int LOG2_BASE> 54 static bool EditBOZInput( 55 IoStatementState &io, const DataEdit &edit, void *n, std::size_t bytes) { 56 // Skip leading white space & zeroes 57 std::optional<int> remaining{io.CueUpInput(edit)}; 58 auto start{io.GetConnectionState().positionInRecord}; 59 std::optional<char32_t> next{io.NextInField(remaining, edit)}; 60 if (next.value_or('?') == '0') { 61 do { 62 start = io.GetConnectionState().positionInRecord; 63 next = io.NextInField(remaining, edit); 64 } while (next && *next == '0'); 65 } 66 // Count significant digits after any leading white space & zeroes 67 int digits{0}; 68 int significantBits{0}; 69 for (; next; next = io.NextInField(remaining, edit)) { 70 char32_t ch{*next}; 71 if (ch == ' ' || ch == '\t') { 72 if (edit.modes.editingFlags & blankZero) { 73 ch = '0'; // BZ mode - treat blank as if it were zero 74 } else { 75 continue; 76 } 77 } 78 if (ch >= '0' && ch <= '1') { 79 } else if (LOG2_BASE >= 3 && ch >= '2' && ch <= '7') { 80 } else if (LOG2_BASE >= 4 && ch >= '8' && ch <= '9') { 81 } else if (LOG2_BASE >= 4 && ch >= 'A' && ch <= 'F') { 82 } else if (LOG2_BASE >= 4 && ch >= 'a' && ch <= 'f') { 83 } else { 84 io.GetIoErrorHandler().SignalError( 85 "Bad character '%lc' in B/O/Z input field", ch); 86 return false; 87 } 88 if (digits++ == 0) { 89 significantBits = 4; 90 if (ch >= '0' && ch <= '1') { 91 significantBits = 1; 92 } else if (ch >= '2' && ch <= '3') { 93 significantBits = 2; 94 } else if (ch >= '4' && ch <= '7') { 95 significantBits = 3; 96 } else { 97 significantBits = 4; 98 } 99 } else { 100 significantBits += LOG2_BASE; 101 } 102 } 103 auto significantBytes{static_cast<std::size_t>(significantBits + 7) / 8}; 104 if (significantBytes > bytes) { 105 io.GetIoErrorHandler().SignalError(IostatBOZInputOverflow, 106 "B/O/Z input of %d digits overflows %zd-byte variable", digits, bytes); 107 return false; 108 } 109 // Reset to start of significant digits 110 io.HandleAbsolutePosition(start); 111 remaining.reset(); 112 // Make a second pass now that the digit count is known 113 std::memset(n, 0, bytes); 114 int increment{isHostLittleEndian ? -1 : 1}; 115 auto *data{reinterpret_cast<unsigned char *>(n) + 116 (isHostLittleEndian ? significantBytes - 1 : 0)}; 117 int shift{((digits - 1) * LOG2_BASE) & 7}; 118 while (digits > 0) { 119 char32_t ch{*io.NextInField(remaining, edit)}; 120 int digit{0}; 121 if (ch == ' ' || ch == '\t') { 122 if (edit.modes.editingFlags & blankZero) { 123 ch = '0'; // BZ mode - treat blank as if it were zero 124 } else { 125 continue; 126 } 127 } 128 --digits; 129 if (ch >= '0' && ch <= '9') { 130 digit = ch - '0'; 131 } else if (ch >= 'A' && ch <= 'F') { 132 digit = ch + 10 - 'A'; 133 } else if (ch >= 'a' && ch <= 'f') { 134 digit = ch + 10 - 'a'; 135 } else { 136 continue; 137 } 138 if (shift < 0) { 139 if (shift + LOG2_BASE > 0) { // misaligned octal 140 *data |= digit >> -shift; 141 } 142 shift += 8; 143 data += increment; 144 } 145 *data |= digit << shift; 146 shift -= LOG2_BASE; 147 } 148 return CheckCompleteListDirectedField(io, edit); 149 } 150 151 static inline char32_t GetRadixPointChar(const DataEdit &edit) { 152 return edit.modes.editingFlags & decimalComma ? char32_t{','} : char32_t{'.'}; 153 } 154 155 // Prepares input from a field, and returns the sign, if any, else '\0'. 156 static char ScanNumericPrefix(IoStatementState &io, const DataEdit &edit, 157 std::optional<char32_t> &next, std::optional<int> &remaining) { 158 remaining = io.CueUpInput(edit); 159 next = io.NextInField(remaining, edit); 160 char sign{'\0'}; 161 if (next) { 162 if (*next == '-' || *next == '+') { 163 sign = *next; 164 if (!edit.IsListDirected()) { 165 io.SkipSpaces(remaining); 166 } 167 next = io.NextInField(remaining, edit); 168 } 169 } 170 return sign; 171 } 172 173 bool EditIntegerInput( 174 IoStatementState &io, const DataEdit &edit, void *n, int kind) { 175 RUNTIME_CHECK(io.GetIoErrorHandler(), kind >= 1 && !(kind & (kind - 1))); 176 switch (edit.descriptor) { 177 case DataEdit::ListDirected: 178 if (IsNamelistNameOrSlash(io)) { 179 return false; 180 } 181 break; 182 case 'G': 183 case 'I': 184 break; 185 case 'B': 186 return EditBOZInput<1>(io, edit, n, kind); 187 case 'O': 188 return EditBOZInput<3>(io, edit, n, kind); 189 case 'Z': 190 return EditBOZInput<4>(io, edit, n, kind); 191 case 'A': // legacy extension 192 return EditCharacterInput(io, edit, reinterpret_cast<char *>(n), kind); 193 default: 194 io.GetIoErrorHandler().SignalError(IostatErrorInFormat, 195 "Data edit descriptor '%c' may not be used with an INTEGER data item", 196 edit.descriptor); 197 return false; 198 } 199 std::optional<int> remaining; 200 std::optional<char32_t> next; 201 char sign{ScanNumericPrefix(io, edit, next, remaining)}; 202 common::UnsignedInt128 value{0}; 203 bool any{!!sign}; 204 bool overflow{false}; 205 for (; next; next = io.NextInField(remaining, edit)) { 206 char32_t ch{*next}; 207 if (ch == ' ' || ch == '\t') { 208 if (edit.modes.editingFlags & blankZero) { 209 ch = '0'; // BZ mode - treat blank as if it were zero 210 } else { 211 continue; 212 } 213 } 214 int digit{0}; 215 if (ch >= '0' && ch <= '9') { 216 digit = ch - '0'; 217 } else { 218 io.GetIoErrorHandler().SignalError( 219 "Bad character '%lc' in INTEGER input field", ch); 220 return false; 221 } 222 static constexpr auto maxu128{~common::UnsignedInt128{0}}; 223 static constexpr auto maxu128OverTen{maxu128 / 10}; 224 static constexpr int maxLastDigit{ 225 static_cast<int>(maxu128 - (maxu128OverTen * 10))}; 226 overflow |= value >= maxu128OverTen && 227 (value > maxu128OverTen || digit > maxLastDigit); 228 value *= 10; 229 value += digit; 230 any = true; 231 } 232 if (!any && !remaining) { 233 io.GetIoErrorHandler().SignalError( 234 "Integer value absent from NAMELIST or list-directed input"); 235 return false; 236 } 237 auto maxForKind{common::UnsignedInt128{1} << ((8 * kind) - 1)}; 238 overflow |= value >= maxForKind && (value > maxForKind || sign != '-'); 239 if (overflow) { 240 io.GetIoErrorHandler().SignalError(IostatIntegerInputOverflow, 241 "Decimal input overflows INTEGER(%d) variable", kind); 242 return false; 243 } 244 if (sign == '-') { 245 value = -value; 246 } 247 if (any || !io.GetIoErrorHandler().InError()) { 248 // The value is stored in the lower order bits on big endian platform. 249 // When memcpy, shift the value to the higher order bit. 250 auto shft{static_cast<int>(sizeof(value.low())) - kind}; 251 // For kind==8 (i.e. shft==0), the value is stored in low_ in big endian. 252 if (!isHostLittleEndian && shft >= 0) { 253 auto l{value.low() << (8 * shft)}; 254 std::memcpy(n, &l, kind); 255 } else { 256 std::memcpy(n, &value, kind); // a blank field means zero 257 } 258 return true; 259 } else { 260 return false; 261 } 262 } 263 264 // Parses a REAL input number from the input source as a normalized 265 // fraction into a supplied buffer -- there's an optional '-', a 266 // decimal point when the input is not hexadecimal, and at least one 267 // digit. Replaces blanks with zeroes where appropriate. 268 struct ScannedRealInput { 269 // Number of characters that (should) have been written to the 270 // buffer -- this can be larger than the buffer size, which 271 // indicates buffer overflow. Zero indicates an error. 272 int got{0}; 273 int exponent{0}; // adjusted as necessary; binary if isHexadecimal 274 bool isHexadecimal{false}; // 0X... 275 }; 276 static ScannedRealInput ScanRealInput( 277 char *buffer, int bufferSize, IoStatementState &io, const DataEdit &edit) { 278 std::optional<int> remaining; 279 std::optional<char32_t> next; 280 int got{0}; 281 std::optional<int> radixPointOffset; 282 auto Put{[&](char ch) -> void { 283 if (got < bufferSize) { 284 buffer[got] = ch; 285 } 286 ++got; 287 }}; 288 char sign{ScanNumericPrefix(io, edit, next, remaining)}; 289 if (sign == '-') { 290 Put('-'); 291 } 292 bool bzMode{(edit.modes.editingFlags & blankZero) != 0}; 293 int exponent{0}; 294 if (!next || (!bzMode && *next == ' ')) { 295 if (!edit.IsListDirected() && !io.GetConnectionState().IsAtEOF()) { 296 // An empty/blank field means zero when not list-directed. 297 // A fixed-width field containing only a sign is also zero; 298 // this behavior isn't standard-conforming in F'2023 but it is 299 // required to pass FCVS. 300 Put('0'); 301 } 302 return {got, exponent, false}; 303 } 304 char32_t radixPointChar{GetRadixPointChar(edit)}; 305 char32_t first{*next >= 'a' && *next <= 'z' ? *next + 'A' - 'a' : *next}; 306 bool isHexadecimal{false}; 307 if (first == 'N' || first == 'I') { 308 // NaN or infinity - convert to upper case 309 // Subtle: a blank field of digits could be followed by 'E' or 'D', 310 for (; next && 311 ((*next >= 'a' && *next <= 'z') || (*next >= 'A' && *next <= 'Z')); 312 next = io.NextInField(remaining, edit)) { 313 if (*next >= 'a' && *next <= 'z') { 314 Put(*next - 'a' + 'A'); 315 } else { 316 Put(*next); 317 } 318 } 319 if (next && *next == '(') { // NaN(...) 320 Put('('); 321 int depth{1}; 322 while (true) { 323 next = io.NextInField(remaining, edit); 324 if (depth == 0) { 325 break; 326 } else if (!next) { 327 return {}; // error 328 } else if (*next == '(') { 329 ++depth; 330 } else if (*next == ')') { 331 --depth; 332 } 333 Put(*next); 334 } 335 } 336 } else if (first == radixPointChar || (first >= '0' && first <= '9') || 337 (bzMode && (first == ' ' || first == '\t')) || first == 'E' || 338 first == 'D' || first == 'Q') { 339 if (first == '0') { 340 next = io.NextInField(remaining, edit); 341 if (next && (*next == 'x' || *next == 'X')) { // 0X... 342 isHexadecimal = true; 343 next = io.NextInField(remaining, edit); 344 } else { 345 Put('0'); 346 } 347 } 348 // input field is normalized to a fraction 349 if (!isHexadecimal) { 350 Put('.'); 351 } 352 auto start{got}; 353 for (; next; next = io.NextInField(remaining, edit)) { 354 char32_t ch{*next}; 355 if (ch == ' ' || ch == '\t') { 356 if (isHexadecimal) { 357 return {}; // error 358 } else if (bzMode) { 359 ch = '0'; // BZ mode - treat blank as if it were zero 360 } else { 361 continue; // ignore blank in fixed field 362 } 363 } 364 if (ch == '0' && got == start && !radixPointOffset) { 365 // omit leading zeroes before the radix point 366 } else if (ch >= '0' && ch <= '9') { 367 Put(ch); 368 } else if (ch == radixPointChar && !radixPointOffset) { 369 // The radix point character is *not* copied to the buffer. 370 radixPointOffset = got - start; // # of digits before the radix point 371 } else if (isHexadecimal && ch >= 'A' && ch <= 'F') { 372 Put(ch); 373 } else if (isHexadecimal && ch >= 'a' && ch <= 'f') { 374 Put(ch - 'a' + 'A'); // normalize to capitals 375 } else { 376 break; 377 } 378 } 379 if (got == start) { 380 // Nothing but zeroes and maybe a radix point. F'2018 requires 381 // at least one digit, but F'77 did not, and a bare "." shows up in 382 // the FCVS suite. 383 Put('0'); // emit at least one digit 384 } 385 // In list-directed input, a bad exponent is not consumed. 386 auto nextBeforeExponent{next}; 387 auto startExponent{io.GetConnectionState().positionInRecord}; 388 bool hasGoodExponent{false}; 389 if (next) { 390 if (isHexadecimal) { 391 if (*next == 'p' || *next == 'P') { 392 next = io.NextInField(remaining, edit); 393 } else { 394 // The binary exponent is not optional in the standard. 395 return {}; // error 396 } 397 } else if (*next == 'e' || *next == 'E' || *next == 'd' || *next == 'D' || 398 *next == 'q' || *next == 'Q') { 399 // Optional exponent letter. Blanks are allowed between the 400 // optional exponent letter and the exponent value. 401 io.SkipSpaces(remaining); 402 next = io.NextInField(remaining, edit); 403 } 404 } 405 if (next && 406 (*next == '-' || *next == '+' || (*next >= '0' && *next <= '9') || 407 *next == ' ' || *next == '\t')) { 408 bool negExpo{*next == '-'}; 409 if (negExpo || *next == '+') { 410 next = io.NextInField(remaining, edit); 411 } 412 for (; next; next = io.NextInField(remaining, edit)) { 413 if (*next >= '0' && *next <= '9') { 414 hasGoodExponent = true; 415 if (exponent < 10000) { 416 exponent = 10 * exponent + *next - '0'; 417 } 418 } else if (*next == ' ' || *next == '\t') { 419 if (isHexadecimal) { 420 break; 421 } else if (bzMode) { 422 hasGoodExponent = true; 423 exponent = 10 * exponent; 424 } 425 } else { 426 break; 427 } 428 } 429 if (negExpo) { 430 exponent = -exponent; 431 } 432 } 433 if (!hasGoodExponent) { 434 if (isHexadecimal) { 435 return {}; // error 436 } 437 // There isn't a good exponent; do not consume it. 438 next = nextBeforeExponent; 439 io.HandleAbsolutePosition(startExponent); 440 // The default exponent is -kP, but the scale factor doesn't affect 441 // an explicit exponent. 442 exponent = -edit.modes.scale; 443 } 444 // Adjust exponent by number of digits before the radix point. 445 if (isHexadecimal) { 446 // Exponents for hexadecimal input are binary. 447 exponent += radixPointOffset.value_or(got - start) * 4; 448 } else if (radixPointOffset) { 449 exponent += *radixPointOffset; 450 } else { 451 // When no redix point (or comma) appears in the value, the 'd' 452 // part of the edit descriptor must be interpreted as the number of 453 // digits in the value to be interpreted as being to the *right* of 454 // the assumed radix point (13.7.2.3.2) 455 exponent += got - start - edit.digits.value_or(0); 456 } 457 } 458 // Consume the trailing ')' of a list-directed or NAMELIST complex 459 // input value. 460 if (edit.descriptor == DataEdit::ListDirectedImaginaryPart) { 461 if (next && (*next == ' ' || *next == '\t')) { 462 io.SkipSpaces(remaining); 463 next = io.NextInField(remaining, edit); 464 } 465 if (!next) { // NextInField fails on separators like ')' 466 std::size_t byteCount{0}; 467 next = io.GetCurrentChar(byteCount); 468 if (next && *next == ')') { 469 io.HandleRelativePosition(byteCount); 470 } 471 } 472 } else if (remaining) { 473 while (next && (*next == ' ' || *next == '\t')) { 474 next = io.NextInField(remaining, edit); 475 } 476 if (next) { 477 return {}; // error: unused nonblank character in fixed-width field 478 } 479 } 480 return {got, exponent, isHexadecimal}; 481 } 482 483 static void RaiseFPExceptions(decimal::ConversionResultFlags flags) { 484 #undef RAISE 485 #ifdef feraisexcept // a macro in some environments; omit std:: 486 #define RAISE feraiseexcept 487 #else 488 #define RAISE std::feraiseexcept 489 #endif 490 if (flags & decimal::ConversionResultFlags::Overflow) { 491 RAISE(FE_OVERFLOW); 492 } 493 if (flags & decimal::ConversionResultFlags::Underflow) { 494 RAISE(FE_UNDERFLOW); 495 } 496 if (flags & decimal::ConversionResultFlags::Inexact) { 497 RAISE(FE_INEXACT); 498 } 499 if (flags & decimal::ConversionResultFlags::Invalid) { 500 RAISE(FE_INVALID); 501 } 502 #undef RAISE 503 } 504 505 // If no special modes are in effect and the form of the input value 506 // that's present in the input stream is acceptable to the decimal->binary 507 // converter without modification, this fast path for real input 508 // saves time by avoiding memory copies and reformatting of the exponent. 509 template <int PRECISION> 510 static bool TryFastPathRealDecimalInput( 511 IoStatementState &io, const DataEdit &edit, void *n) { 512 if (edit.modes.editingFlags & (blankZero | decimalComma)) { 513 return false; 514 } 515 if (edit.modes.scale != 0) { 516 return false; 517 } 518 const ConnectionState &connection{io.GetConnectionState()}; 519 if (connection.internalIoCharKind > 1) { 520 return false; // reading non-default character 521 } 522 const char *str{nullptr}; 523 std::size_t got{io.GetNextInputBytes(str)}; 524 if (got == 0 || str == nullptr || !connection.recordLength.has_value()) { 525 return false; // could not access reliably-terminated input stream 526 } 527 const char *p{str}; 528 std::int64_t maxConsume{ 529 std::min<std::int64_t>(got, edit.width.value_or(got))}; 530 const char *limit{str + maxConsume}; 531 decimal::ConversionToBinaryResult<PRECISION> converted{ 532 decimal::ConvertToBinary<PRECISION>(p, edit.modes.round, limit)}; 533 if (converted.flags & (decimal::Invalid | decimal::Overflow)) { 534 return false; 535 } 536 if (edit.digits.value_or(0) != 0) { 537 // Edit descriptor is Fw.d (or other) with d != 0, which 538 // implies scaling 539 const char *q{str}; 540 for (; q < limit; ++q) { 541 if (*q == '.' || *q == 'n' || *q == 'N') { 542 break; 543 } 544 } 545 if (q == limit) { 546 // No explicit decimal point, and not NaN/Inf. 547 return false; 548 } 549 } 550 if (edit.descriptor == DataEdit::ListDirectedImaginaryPart) { 551 // Need to consume a trailing ')', possibly with leading spaces 552 for (; p < limit && (*p == ' ' || *p == '\t'); ++p) { 553 } 554 if (p < limit && *p == ')') { 555 ++p; 556 } else { 557 return false; 558 } 559 } else if (edit.IsListDirected()) { 560 if (p < limit && !IsCharValueSeparator(edit, *p)) { 561 return false; 562 } 563 } else { 564 for (; p < limit && (*p == ' ' || *p == '\t'); ++p) { 565 } 566 if (edit.width && p < str + *edit.width) { 567 return false; // unconverted characters remain in fixed width field 568 } 569 } 570 // Success on the fast path! 571 *reinterpret_cast<decimal::BinaryFloatingPointNumber<PRECISION> *>(n) = 572 converted.binary; 573 io.HandleRelativePosition(p - str); 574 // Set FP exception flags 575 if (converted.flags != decimal::ConversionResultFlags::Exact) { 576 RaiseFPExceptions(converted.flags); 577 } 578 return true; 579 } 580 581 template <int binaryPrecision> 582 decimal::ConversionToBinaryResult<binaryPrecision> ConvertHexadecimal( 583 const char *&p, enum decimal::FortranRounding rounding, int expo) { 584 using RealType = decimal::BinaryFloatingPointNumber<binaryPrecision>; 585 using RawType = typename RealType::RawType; 586 bool isNegative{*p == '-'}; 587 constexpr RawType one{1}; 588 RawType signBit{0}; 589 if (isNegative) { 590 ++p; 591 signBit = one << (RealType::bits - 1); 592 } 593 RawType fraction{0}; 594 // Adjust the incoming binary P+/- exponent to shift the radix point 595 // to below the LSB and add in the bias. 596 expo += binaryPrecision - 1 + RealType::exponentBias; 597 // Input the fraction. 598 int roundingBit{0}; 599 int guardBit{0}; 600 for (; *p; ++p) { 601 fraction <<= 4; 602 expo -= 4; 603 if (*p >= '0' && *p <= '9') { 604 fraction |= *p - '0'; 605 } else if (*p >= 'A' && *p <= 'F') { 606 fraction |= *p - 'A' + 10; // data were normalized to capitals 607 } else { 608 break; 609 } 610 while (fraction >> binaryPrecision) { 611 guardBit |= roundingBit; 612 roundingBit = (int)fraction & 1; 613 fraction >>= 1; 614 ++expo; 615 } 616 } 617 if (fraction) { 618 // Boost biased expo if too small 619 while (expo < 1) { 620 guardBit |= roundingBit; 621 roundingBit = (int)fraction & 1; 622 fraction >>= 1; 623 ++expo; 624 } 625 // Normalize 626 while (expo > 1 && !(fraction >> (binaryPrecision - 1))) { 627 fraction <<= 1; 628 --expo; 629 } 630 } 631 // Package & return result 632 constexpr RawType significandMask{(one << RealType::significandBits) - 1}; 633 int flags{(roundingBit | guardBit) ? decimal::Inexact : decimal::Exact}; 634 if (!fraction) { 635 expo = 0; 636 } else if (expo == 1 && !(fraction >> (binaryPrecision - 1))) { 637 expo = 0; // subnormal 638 flags |= decimal::Underflow; 639 } else if (expo >= RealType::maxExponent) { 640 expo = RealType::maxExponent; // +/-Inf 641 fraction = 0; 642 flags |= decimal::Overflow; 643 } else { 644 fraction &= significandMask; // remove explicit normalization unless x87 645 } 646 return decimal::ConversionToBinaryResult<binaryPrecision>{ 647 RealType{static_cast<RawType>(signBit | 648 static_cast<RawType>(expo) << RealType::significandBits | fraction)}, 649 static_cast<decimal::ConversionResultFlags>(flags)}; 650 } 651 652 template <int KIND> 653 bool EditCommonRealInput(IoStatementState &io, const DataEdit &edit, void *n) { 654 constexpr int binaryPrecision{common::PrecisionOfRealKind(KIND)}; 655 if (TryFastPathRealDecimalInput<binaryPrecision>(io, edit, n)) { 656 return CheckCompleteListDirectedField(io, edit); 657 } 658 // Fast path wasn't available or didn't work; go the more general route 659 static constexpr int maxDigits{ 660 common::MaxDecimalConversionDigits(binaryPrecision)}; 661 static constexpr int bufferSize{maxDigits + 18}; 662 char buffer[bufferSize]; 663 auto scanned{ScanRealInput(buffer, maxDigits + 2, io, edit)}; 664 int got{scanned.got}; 665 if (got >= maxDigits + 2) { 666 io.GetIoErrorHandler().Crash("EditCommonRealInput: buffer was too small"); 667 return false; 668 } 669 if (got == 0) { 670 const auto &connection{io.GetConnectionState()}; 671 io.GetIoErrorHandler().SignalError(IostatBadRealInput, 672 "Bad real input data at column %d of record %d", 673 static_cast<int>(connection.positionInRecord + 1), 674 static_cast<int>(connection.currentRecordNumber)); 675 return false; 676 } 677 decimal::ConversionToBinaryResult<binaryPrecision> converted; 678 const char *p{buffer}; 679 if (scanned.isHexadecimal) { 680 buffer[got] = '\0'; 681 converted = ConvertHexadecimal<binaryPrecision>( 682 p, edit.modes.round, scanned.exponent); 683 } else { 684 bool hadExtra{got > maxDigits}; 685 int exponent{scanned.exponent}; 686 if (exponent != 0) { 687 buffer[got++] = 'e'; 688 if (exponent < 0) { 689 buffer[got++] = '-'; 690 exponent = -exponent; 691 } 692 if (exponent > 9999) { 693 exponent = 9999; // will convert to +/-Inf 694 } 695 if (exponent > 999) { 696 int dig{exponent / 1000}; 697 buffer[got++] = '0' + dig; 698 int rest{exponent - 1000 * dig}; 699 dig = rest / 100; 700 buffer[got++] = '0' + dig; 701 rest -= 100 * dig; 702 dig = rest / 10; 703 buffer[got++] = '0' + dig; 704 buffer[got++] = '0' + (rest - 10 * dig); 705 } else if (exponent > 99) { 706 int dig{exponent / 100}; 707 buffer[got++] = '0' + dig; 708 int rest{exponent - 100 * dig}; 709 dig = rest / 10; 710 buffer[got++] = '0' + dig; 711 buffer[got++] = '0' + (rest - 10 * dig); 712 } else if (exponent > 9) { 713 int dig{exponent / 10}; 714 buffer[got++] = '0' + dig; 715 buffer[got++] = '0' + (exponent - 10 * dig); 716 } else { 717 buffer[got++] = '0' + exponent; 718 } 719 } 720 buffer[got] = '\0'; 721 converted = decimal::ConvertToBinary<binaryPrecision>(p, edit.modes.round); 722 if (hadExtra) { 723 converted.flags = static_cast<enum decimal::ConversionResultFlags>( 724 converted.flags | decimal::Inexact); 725 } 726 } 727 if (*p) { // unprocessed junk after value 728 const auto &connection{io.GetConnectionState()}; 729 io.GetIoErrorHandler().SignalError(IostatBadRealInput, 730 "Trailing characters after real input data at column %d of record %d", 731 static_cast<int>(connection.positionInRecord + 1), 732 static_cast<int>(connection.currentRecordNumber)); 733 return false; 734 } 735 *reinterpret_cast<decimal::BinaryFloatingPointNumber<binaryPrecision> *>(n) = 736 converted.binary; 737 // Set FP exception flags 738 if (converted.flags != decimal::ConversionResultFlags::Exact) { 739 if (converted.flags & decimal::ConversionResultFlags::Overflow) { 740 io.GetIoErrorHandler().SignalError(IostatRealInputOverflow); 741 return false; 742 } 743 RaiseFPExceptions(converted.flags); 744 } 745 return CheckCompleteListDirectedField(io, edit); 746 } 747 748 template <int KIND> 749 bool EditRealInput(IoStatementState &io, const DataEdit &edit, void *n) { 750 switch (edit.descriptor) { 751 case DataEdit::ListDirected: 752 if (IsNamelistNameOrSlash(io)) { 753 return false; 754 } 755 return EditCommonRealInput<KIND>(io, edit, n); 756 case DataEdit::ListDirectedRealPart: 757 case DataEdit::ListDirectedImaginaryPart: 758 case 'F': 759 case 'E': // incl. EN, ES, & EX 760 case 'D': 761 case 'G': 762 return EditCommonRealInput<KIND>(io, edit, n); 763 case 'B': 764 return EditBOZInput<1>(io, edit, n, 765 common::BitsForBinaryPrecision(common::PrecisionOfRealKind(KIND)) >> 3); 766 case 'O': 767 return EditBOZInput<3>(io, edit, n, 768 common::BitsForBinaryPrecision(common::PrecisionOfRealKind(KIND)) >> 3); 769 case 'Z': 770 return EditBOZInput<4>(io, edit, n, 771 common::BitsForBinaryPrecision(common::PrecisionOfRealKind(KIND)) >> 3); 772 case 'A': // legacy extension 773 return EditCharacterInput(io, edit, reinterpret_cast<char *>(n), KIND); 774 default: 775 io.GetIoErrorHandler().SignalError(IostatErrorInFormat, 776 "Data edit descriptor '%c' may not be used for REAL input", 777 edit.descriptor); 778 return false; 779 } 780 } 781 782 // 13.7.3 in Fortran 2018 783 bool EditLogicalInput(IoStatementState &io, const DataEdit &edit, bool &x) { 784 switch (edit.descriptor) { 785 case DataEdit::ListDirected: 786 if (IsNamelistNameOrSlash(io)) { 787 return false; 788 } 789 break; 790 case 'L': 791 case 'G': 792 break; 793 default: 794 io.GetIoErrorHandler().SignalError(IostatErrorInFormat, 795 "Data edit descriptor '%c' may not be used for LOGICAL input", 796 edit.descriptor); 797 return false; 798 } 799 std::optional<int> remaining{io.CueUpInput(edit)}; 800 std::optional<char32_t> next{io.NextInField(remaining, edit)}; 801 if (next && *next == '.') { // skip optional period 802 next = io.NextInField(remaining, edit); 803 } 804 if (!next) { 805 io.GetIoErrorHandler().SignalError("Empty LOGICAL input field"); 806 return false; 807 } 808 switch (*next) { 809 case 'T': 810 case 't': 811 x = true; 812 break; 813 case 'F': 814 case 'f': 815 x = false; 816 break; 817 default: 818 io.GetIoErrorHandler().SignalError( 819 "Bad character '%lc' in LOGICAL input field", *next); 820 return false; 821 } 822 if (remaining) { // ignore the rest of a fixed-width field 823 io.HandleRelativePosition(*remaining); 824 } else if (edit.descriptor == DataEdit::ListDirected) { 825 while (io.NextInField(remaining, edit)) { // discard rest of field 826 } 827 } 828 return CheckCompleteListDirectedField(io, edit); 829 } 830 831 // See 13.10.3.1 paragraphs 7-9 in Fortran 2018 832 template <typename CHAR> 833 static bool EditDelimitedCharacterInput( 834 IoStatementState &io, CHAR *x, std::size_t length, char32_t delimiter) { 835 bool result{true}; 836 while (true) { 837 std::size_t byteCount{0}; 838 auto ch{io.GetCurrentChar(byteCount)}; 839 if (!ch) { 840 if (io.AdvanceRecord()) { 841 continue; 842 } else { 843 result = false; // EOF in character value 844 break; 845 } 846 } 847 io.HandleRelativePosition(byteCount); 848 if (*ch == delimiter) { 849 auto next{io.GetCurrentChar(byteCount)}; 850 if (next && *next == delimiter) { 851 // Repeated delimiter: use as character value 852 io.HandleRelativePosition(byteCount); 853 } else { 854 break; // closing delimiter 855 } 856 } 857 if (length > 0) { 858 *x++ = *ch; 859 --length; 860 } 861 } 862 std::fill_n(x, length, ' '); 863 return result; 864 } 865 866 template <typename CHAR> 867 static bool EditListDirectedCharacterInput( 868 IoStatementState &io, CHAR *x, std::size_t length, const DataEdit &edit) { 869 std::size_t byteCount{0}; 870 auto ch{io.GetCurrentChar(byteCount)}; 871 if (ch && (*ch == '\'' || *ch == '"')) { 872 io.HandleRelativePosition(byteCount); 873 return EditDelimitedCharacterInput(io, x, length, *ch); 874 } 875 if (IsNamelistNameOrSlash(io) || io.GetConnectionState().IsAtEOF()) { 876 return false; 877 } 878 // Undelimited list-directed character input: stop at a value separator 879 // or the end of the current record. Subtlety: the "remaining" count 880 // here is a dummy that's used to avoid the interpretation of separators 881 // in NextInField. 882 std::optional<int> remaining{length > 0 ? maxUTF8Bytes : 0}; 883 while (std::optional<char32_t> next{io.NextInField(remaining, edit)}) { 884 bool isSep{false}; 885 switch (*next) { 886 case ' ': 887 case '\t': 888 case '/': 889 isSep = true; 890 break; 891 case '&': 892 case '$': 893 isSep = edit.IsNamelist(); 894 break; 895 case ',': 896 isSep = !(edit.modes.editingFlags & decimalComma); 897 break; 898 case ';': 899 isSep = !!(edit.modes.editingFlags & decimalComma); 900 break; 901 default: 902 break; 903 } 904 if (isSep) { 905 remaining = 0; 906 } else { 907 *x++ = *next; 908 remaining = --length > 0 ? maxUTF8Bytes : 0; 909 } 910 } 911 std::fill_n(x, length, ' '); 912 return true; 913 } 914 915 template <typename CHAR> 916 bool EditCharacterInput(IoStatementState &io, const DataEdit &edit, CHAR *x, 917 std::size_t lengthChars) { 918 switch (edit.descriptor) { 919 case DataEdit::ListDirected: 920 return EditListDirectedCharacterInput(io, x, lengthChars, edit); 921 case 'A': 922 case 'G': 923 break; 924 case 'B': 925 return EditBOZInput<1>(io, edit, x, lengthChars * sizeof *x); 926 case 'O': 927 return EditBOZInput<3>(io, edit, x, lengthChars * sizeof *x); 928 case 'Z': 929 return EditBOZInput<4>(io, edit, x, lengthChars * sizeof *x); 930 default: 931 io.GetIoErrorHandler().SignalError(IostatErrorInFormat, 932 "Data edit descriptor '%c' may not be used with a CHARACTER data item", 933 edit.descriptor); 934 return false; 935 } 936 const ConnectionState &connection{io.GetConnectionState()}; 937 std::size_t remainingChars{lengthChars}; 938 // Skip leading characters. 939 // Their bytes don't count towards INQUIRE(IOLENGTH=). 940 std::size_t skipChars{0}; 941 if (edit.width && *edit.width > 0) { 942 remainingChars = *edit.width; 943 if (remainingChars > lengthChars) { 944 skipChars = remainingChars - lengthChars; 945 } 946 } 947 // When the field is wider than the variable, we drop the leading 948 // characters. When the variable is wider than the field, there can be 949 // trailing padding or an EOR condition. 950 const char *input{nullptr}; 951 std::size_t readyBytes{0}; 952 // Transfer payload bytes; these do count. 953 while (remainingChars > 0) { 954 if (readyBytes == 0) { 955 readyBytes = io.GetNextInputBytes(input); 956 if (readyBytes == 0 || 957 (readyBytes < remainingChars && edit.modes.nonAdvancing)) { 958 if (io.CheckForEndOfRecord(readyBytes)) { 959 if (readyBytes == 0) { 960 // PAD='YES' and no more data 961 std::fill_n(x, lengthChars, ' '); 962 return !io.GetIoErrorHandler().InError(); 963 } else { 964 // Do partial read(s) then pad on last iteration 965 } 966 } else { 967 return !io.GetIoErrorHandler().InError(); 968 } 969 } 970 } 971 std::size_t chunkBytes; 972 std::size_t chunkChars{1}; 973 bool skipping{skipChars > 0}; 974 if (connection.isUTF8) { 975 chunkBytes = MeasureUTF8Bytes(*input); 976 if (skipping) { 977 --skipChars; 978 } else if (auto ucs{DecodeUTF8(input)}) { 979 if ((sizeof *x == 1 && *ucs > 0xff) || 980 (sizeof *x == 2 && *ucs > 0xffff)) { 981 *x++ = '?'; 982 } else { 983 *x++ = *ucs; 984 } 985 --lengthChars; 986 } else if (chunkBytes == 0) { 987 // error recovery: skip bad encoding 988 chunkBytes = 1; 989 } 990 } else if (connection.internalIoCharKind > 1) { 991 // Reading from non-default character internal unit 992 chunkBytes = connection.internalIoCharKind; 993 if (skipping) { 994 --skipChars; 995 } else { 996 char32_t buffer{0}; 997 std::memcpy(&buffer, input, chunkBytes); 998 if ((sizeof *x == 1 && buffer > 0xff) || 999 (sizeof *x == 2 && buffer > 0xffff)) { 1000 *x++ = '?'; 1001 } else { 1002 *x++ = buffer; 1003 } 1004 --lengthChars; 1005 } 1006 } else if constexpr (sizeof *x > 1) { 1007 // Read single byte with expansion into multi-byte CHARACTER 1008 chunkBytes = 1; 1009 if (skipping) { 1010 --skipChars; 1011 } else { 1012 *x++ = static_cast<unsigned char>(*input); 1013 --lengthChars; 1014 } 1015 } else { // single bytes -> default CHARACTER 1016 if (skipping) { 1017 chunkBytes = std::min<std::size_t>(skipChars, readyBytes); 1018 chunkChars = chunkBytes; 1019 skipChars -= chunkChars; 1020 } else { 1021 chunkBytes = std::min<std::size_t>(remainingChars, readyBytes); 1022 chunkBytes = std::min<std::size_t>(lengthChars, chunkBytes); 1023 chunkChars = chunkBytes; 1024 std::memcpy(x, input, chunkBytes); 1025 x += chunkBytes; 1026 lengthChars -= chunkChars; 1027 } 1028 } 1029 input += chunkBytes; 1030 remainingChars -= chunkChars; 1031 if (!skipping) { 1032 io.GotChar(chunkBytes); 1033 } 1034 io.HandleRelativePosition(chunkBytes); 1035 readyBytes -= chunkBytes; 1036 } 1037 // Pad the remainder of the input variable, if any. 1038 std::fill_n(x, lengthChars, ' '); 1039 return CheckCompleteListDirectedField(io, edit); 1040 } 1041 1042 template bool EditRealInput<2>(IoStatementState &, const DataEdit &, void *); 1043 template bool EditRealInput<3>(IoStatementState &, const DataEdit &, void *); 1044 template bool EditRealInput<4>(IoStatementState &, const DataEdit &, void *); 1045 template bool EditRealInput<8>(IoStatementState &, const DataEdit &, void *); 1046 template bool EditRealInput<10>(IoStatementState &, const DataEdit &, void *); 1047 // TODO: double/double 1048 template bool EditRealInput<16>(IoStatementState &, const DataEdit &, void *); 1049 1050 template bool EditCharacterInput( 1051 IoStatementState &, const DataEdit &, char *, std::size_t); 1052 template bool EditCharacterInput( 1053 IoStatementState &, const DataEdit &, char16_t *, std::size_t); 1054 template bool EditCharacterInput( 1055 IoStatementState &, const DataEdit &, char32_t *, std::size_t); 1056 1057 } // namespace Fortran::runtime::io 1058