1 //===-- runtime/edit-input.cpp --------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "edit-input.h" 10 #include "namelist.h" 11 #include "utf.h" 12 #include "flang/Common/real.h" 13 #include "flang/Common/uint128.h" 14 #include <algorithm> 15 #include <cfenv> 16 17 namespace Fortran::runtime::io { 18 19 // Checks that a list-directed input value has been entirely consumed and 20 // doesn't contain unparsed characters before the next value separator. 21 static inline bool IsCharValueSeparator(const DataEdit &edit, char32_t ch) { 22 char32_t comma{ 23 edit.modes.editingFlags & decimalComma ? char32_t{';'} : char32_t{','}}; 24 return ch == ' ' || ch == '\t' || ch == '/' || ch == comma; 25 } 26 27 static bool CheckCompleteListDirectedField( 28 IoStatementState &io, const DataEdit &edit) { 29 if (edit.IsListDirected()) { 30 std::size_t byteCount; 31 if (auto ch{io.GetCurrentChar(byteCount)}) { 32 if (IsCharValueSeparator(edit, *ch)) { 33 return true; 34 } else { 35 const auto &connection{io.GetConnectionState()}; 36 io.GetIoErrorHandler().SignalError(IostatBadListDirectedInputSeparator, 37 "invalid character (0x%x) after list-directed input value, " 38 "at column %d in record %d", 39 static_cast<unsigned>(*ch), 40 static_cast<int>(connection.positionInRecord + 1), 41 static_cast<int>(connection.currentRecordNumber)); 42 return false; 43 } 44 } else { 45 return true; // end of record: ok 46 } 47 } else { 48 return true; 49 } 50 } 51 52 template <int LOG2_BASE> 53 static bool EditBOZInput( 54 IoStatementState &io, const DataEdit &edit, void *n, std::size_t bytes) { 55 // Skip leading white space & zeroes 56 std::optional<int> remaining{io.CueUpInput(edit)}; 57 auto start{io.GetConnectionState().positionInRecord}; 58 std::optional<char32_t> next{io.NextInField(remaining, edit)}; 59 if (next.value_or('?') == '0') { 60 do { 61 start = io.GetConnectionState().positionInRecord; 62 next = io.NextInField(remaining, edit); 63 } while (next && *next == '0'); 64 } 65 // Count significant digits after any leading white space & zeroes 66 int digits{0}; 67 for (; next; next = io.NextInField(remaining, edit)) { 68 char32_t ch{*next}; 69 if (ch == ' ' || ch == '\t') { 70 continue; 71 } 72 if (ch >= '0' && ch <= '1') { 73 } else if (LOG2_BASE >= 3 && ch >= '2' && ch <= '7') { 74 } else if (LOG2_BASE >= 4 && ch >= '8' && ch <= '9') { 75 } else if (LOG2_BASE >= 4 && ch >= 'A' && ch <= 'F') { 76 } else if (LOG2_BASE >= 4 && ch >= 'a' && ch <= 'f') { 77 } else { 78 io.GetIoErrorHandler().SignalError( 79 "Bad character '%lc' in B/O/Z input field", ch); 80 return false; 81 } 82 ++digits; 83 } 84 auto significantBytes{static_cast<std::size_t>(digits * LOG2_BASE + 7) / 8}; 85 if (significantBytes > bytes) { 86 io.GetIoErrorHandler().SignalError(IostatBOZInputOverflow, 87 "B/O/Z input of %d digits overflows %zd-byte variable", digits, bytes); 88 return false; 89 } 90 // Reset to start of significant digits 91 io.HandleAbsolutePosition(start); 92 remaining.reset(); 93 // Make a second pass now that the digit count is known 94 std::memset(n, 0, bytes); 95 int increment{isHostLittleEndian ? -1 : 1}; 96 auto *data{reinterpret_cast<unsigned char *>(n) + 97 (isHostLittleEndian ? significantBytes - 1 : 0)}; 98 int shift{((digits - 1) * LOG2_BASE) & 7}; 99 if (shift + LOG2_BASE > 8) { 100 shift -= 8; // misaligned octal 101 } 102 while (digits > 0) { 103 char32_t ch{*io.NextInField(remaining, edit)}; 104 int digit{0}; 105 if (ch >= '0' && ch <= '9') { 106 digit = ch - '0'; 107 } else if (ch >= 'A' && ch <= 'F') { 108 digit = ch + 10 - 'A'; 109 } else if (ch >= 'a' && ch <= 'f') { 110 digit = ch + 10 - 'a'; 111 } else { 112 continue; 113 } 114 --digits; 115 if (shift < 0) { 116 shift += 8; 117 if (shift + LOG2_BASE > 8) { // misaligned octal 118 *data |= digit >> (8 - shift); 119 } 120 data += increment; 121 } 122 *data |= digit << shift; 123 shift -= LOG2_BASE; 124 } 125 return CheckCompleteListDirectedField(io, edit); 126 } 127 128 static inline char32_t GetRadixPointChar(const DataEdit &edit) { 129 return edit.modes.editingFlags & decimalComma ? char32_t{','} : char32_t{'.'}; 130 } 131 132 // Prepares input from a field, and returns the sign, if any, else '\0'. 133 static char ScanNumericPrefix(IoStatementState &io, const DataEdit &edit, 134 std::optional<char32_t> &next, std::optional<int> &remaining) { 135 remaining = io.CueUpInput(edit); 136 next = io.NextInField(remaining, edit); 137 char sign{'\0'}; 138 if (next) { 139 if (*next == '-' || *next == '+') { 140 sign = *next; 141 if (!edit.IsListDirected()) { 142 io.SkipSpaces(remaining); 143 } 144 next = io.NextInField(remaining, edit); 145 } 146 } 147 return sign; 148 } 149 150 bool EditIntegerInput( 151 IoStatementState &io, const DataEdit &edit, void *n, int kind) { 152 RUNTIME_CHECK(io.GetIoErrorHandler(), kind >= 1 && !(kind & (kind - 1))); 153 switch (edit.descriptor) { 154 case DataEdit::ListDirected: 155 if (IsNamelistNameOrSlash(io)) { 156 return false; 157 } 158 break; 159 case 'G': 160 case 'I': 161 break; 162 case 'B': 163 return EditBOZInput<1>(io, edit, n, kind); 164 case 'O': 165 return EditBOZInput<3>(io, edit, n, kind); 166 case 'Z': 167 return EditBOZInput<4>(io, edit, n, kind); 168 case 'A': // legacy extension 169 return EditCharacterInput(io, edit, reinterpret_cast<char *>(n), kind); 170 default: 171 io.GetIoErrorHandler().SignalError(IostatErrorInFormat, 172 "Data edit descriptor '%c' may not be used with an INTEGER data item", 173 edit.descriptor); 174 return false; 175 } 176 std::optional<int> remaining; 177 std::optional<char32_t> next; 178 char sign{ScanNumericPrefix(io, edit, next, remaining)}; 179 common::UnsignedInt128 value{0}; 180 bool any{!!sign}; 181 bool overflow{false}; 182 for (; next; next = io.NextInField(remaining, edit)) { 183 char32_t ch{*next}; 184 if (ch == ' ' || ch == '\t') { 185 if (edit.modes.editingFlags & blankZero) { 186 ch = '0'; // BZ mode - treat blank as if it were zero 187 } else { 188 continue; 189 } 190 } 191 int digit{0}; 192 if (ch >= '0' && ch <= '9') { 193 digit = ch - '0'; 194 } else { 195 io.GetIoErrorHandler().SignalError( 196 "Bad character '%lc' in INTEGER input field", ch); 197 return false; 198 } 199 static constexpr auto maxu128{~common::UnsignedInt128{0}}; 200 static constexpr auto maxu128OverTen{maxu128 / 10}; 201 static constexpr int maxLastDigit{ 202 static_cast<int>(maxu128 - (maxu128OverTen * 10))}; 203 overflow |= value >= maxu128OverTen && 204 (value > maxu128OverTen || digit > maxLastDigit); 205 value *= 10; 206 value += digit; 207 any = true; 208 } 209 if (!any && !remaining) { 210 io.GetIoErrorHandler().SignalError( 211 "Integer value absent from NAMELIST or list-directed input"); 212 return false; 213 } 214 auto maxForKind{common::UnsignedInt128{1} << ((8 * kind) - 1)}; 215 overflow |= value >= maxForKind && (value > maxForKind || sign != '-'); 216 if (overflow) { 217 io.GetIoErrorHandler().SignalError(IostatIntegerInputOverflow, 218 "Decimal input overflows INTEGER(%d) variable", kind); 219 return false; 220 } 221 if (sign == '-') { 222 value = -value; 223 } 224 if (any || !io.GetConnectionState().IsAtEOF()) { 225 std::memcpy(n, &value, kind); // a blank field means zero 226 } 227 return any; 228 } 229 230 // Parses a REAL input number from the input source as a normalized 231 // fraction into a supplied buffer -- there's an optional '-', a 232 // decimal point when the input is not hexadecimal, and at least one 233 // digit. Replaces blanks with zeroes where appropriate. 234 struct ScannedRealInput { 235 // Number of characters that (should) have been written to the 236 // buffer -- this can be larger than the buffer size, which 237 // indicates buffer overflow. Zero indicates an error. 238 int got{0}; 239 int exponent{0}; // adjusted as necessary; binary if isHexadecimal 240 bool isHexadecimal{false}; // 0X... 241 }; 242 static ScannedRealInput ScanRealInput( 243 char *buffer, int bufferSize, IoStatementState &io, const DataEdit &edit) { 244 std::optional<int> remaining; 245 std::optional<char32_t> next; 246 int got{0}; 247 std::optional<int> radixPointOffset; 248 auto Put{[&](char ch) -> void { 249 if (got < bufferSize) { 250 buffer[got] = ch; 251 } 252 ++got; 253 }}; 254 char sign{ScanNumericPrefix(io, edit, next, remaining)}; 255 if (sign == '-') { 256 Put('-'); 257 } 258 bool bzMode{(edit.modes.editingFlags & blankZero) != 0}; 259 int exponent{0}; 260 if (!next || (!bzMode && *next == ' ')) { 261 if (!edit.IsListDirected() && !io.GetConnectionState().IsAtEOF()) { 262 // An empty/blank field means zero when not list-directed. 263 // A fixed-width field containing only a sign is also zero; 264 // this behavior isn't standard-conforming in F'2023 but it is 265 // required to pass FCVS. 266 Put('0'); 267 } 268 return {got, exponent, false}; 269 } 270 char32_t radixPointChar{GetRadixPointChar(edit)}; 271 char32_t first{*next >= 'a' && *next <= 'z' ? *next + 'A' - 'a' : *next}; 272 bool isHexadecimal{false}; 273 if (first == 'N' || first == 'I') { 274 // NaN or infinity - convert to upper case 275 // Subtle: a blank field of digits could be followed by 'E' or 'D', 276 for (; next && 277 ((*next >= 'a' && *next <= 'z') || (*next >= 'A' && *next <= 'Z')); 278 next = io.NextInField(remaining, edit)) { 279 if (*next >= 'a' && *next <= 'z') { 280 Put(*next - 'a' + 'A'); 281 } else { 282 Put(*next); 283 } 284 } 285 if (next && *next == '(') { // NaN(...) 286 Put('('); 287 int depth{1}; 288 while (true) { 289 next = io.NextInField(remaining, edit); 290 if (depth == 0) { 291 break; 292 } else if (!next) { 293 return {}; // error 294 } else if (*next == '(') { 295 ++depth; 296 } else if (*next == ')') { 297 --depth; 298 } 299 Put(*next); 300 } 301 } 302 } else if (first == radixPointChar || (first >= '0' && first <= '9') || 303 (bzMode && (first == ' ' || first == '\t')) || first == 'E' || 304 first == 'D' || first == 'Q') { 305 if (first == '0') { 306 next = io.NextInField(remaining, edit); 307 if (next && (*next == 'x' || *next == 'X')) { // 0X... 308 isHexadecimal = true; 309 next = io.NextInField(remaining, edit); 310 } else { 311 Put('0'); 312 } 313 } 314 // input field is normalized to a fraction 315 if (!isHexadecimal) { 316 Put('.'); 317 } 318 auto start{got}; 319 for (; next; next = io.NextInField(remaining, edit)) { 320 char32_t ch{*next}; 321 if (ch == ' ' || ch == '\t') { 322 if (isHexadecimal) { 323 return {}; // error 324 } else if (bzMode) { 325 ch = '0'; // BZ mode - treat blank as if it were zero 326 } else { 327 continue; // ignore blank in fixed field 328 } 329 } 330 if (ch == '0' && got == start && !radixPointOffset) { 331 // omit leading zeroes before the radix point 332 } else if (ch >= '0' && ch <= '9') { 333 Put(ch); 334 } else if (ch == radixPointChar && !radixPointOffset) { 335 // The radix point character is *not* copied to the buffer. 336 radixPointOffset = got - start; // # of digits before the radix point 337 } else if (isHexadecimal && ch >= 'A' && ch <= 'F') { 338 Put(ch); 339 } else if (isHexadecimal && ch >= 'a' && ch <= 'f') { 340 Put(ch - 'a' + 'A'); // normalize to capitals 341 } else { 342 break; 343 } 344 } 345 if (got == start) { 346 // Nothing but zeroes and maybe a radix point. F'2018 requires 347 // at least one digit, but F'77 did not, and a bare "." shows up in 348 // the FCVS suite. 349 Put('0'); // emit at least one digit 350 } 351 // In list-directed input, a bad exponent is not consumed. 352 auto nextBeforeExponent{next}; 353 auto startExponent{io.GetConnectionState().positionInRecord}; 354 bool hasGoodExponent{false}; 355 if (next) { 356 if (isHexadecimal) { 357 if (*next == 'p' || *next == 'P') { 358 next = io.NextInField(remaining, edit); 359 } else { 360 // The binary exponent is not optional in the standard. 361 return {}; // error 362 } 363 } else if (*next == 'e' || *next == 'E' || *next == 'd' || *next == 'D' || 364 *next == 'q' || *next == 'Q') { 365 // Optional exponent letter. Blanks are allowed between the 366 // optional exponent letter and the exponent value. 367 io.SkipSpaces(remaining); 368 next = io.NextInField(remaining, edit); 369 } 370 } 371 if (next && 372 (*next == '-' || *next == '+' || (*next >= '0' && *next <= '9') || 373 *next == ' ' || *next == '\t')) { 374 bool negExpo{*next == '-'}; 375 if (negExpo || *next == '+') { 376 next = io.NextInField(remaining, edit); 377 } 378 for (; next; next = io.NextInField(remaining, edit)) { 379 if (*next >= '0' && *next <= '9') { 380 hasGoodExponent = true; 381 if (exponent < 10000) { 382 exponent = 10 * exponent + *next - '0'; 383 } 384 } else if (*next == ' ' || *next == '\t') { 385 if (isHexadecimal) { 386 break; 387 } else if (bzMode) { 388 hasGoodExponent = true; 389 exponent = 10 * exponent; 390 } 391 } else { 392 break; 393 } 394 } 395 if (negExpo) { 396 exponent = -exponent; 397 } 398 } 399 if (!hasGoodExponent) { 400 if (isHexadecimal) { 401 return {}; // error 402 } 403 // There isn't a good exponent; do not consume it. 404 next = nextBeforeExponent; 405 io.HandleAbsolutePosition(startExponent); 406 // The default exponent is -kP, but the scale factor doesn't affect 407 // an explicit exponent. 408 exponent = -edit.modes.scale; 409 } 410 // Adjust exponent by number of digits before the radix point. 411 if (isHexadecimal) { 412 // Exponents for hexadecimal input are binary. 413 exponent += radixPointOffset.value_or(got - start) * 4; 414 } else if (radixPointOffset) { 415 exponent += *radixPointOffset; 416 } else { 417 // When no redix point (or comma) appears in the value, the 'd' 418 // part of the edit descriptor must be interpreted as the number of 419 // digits in the value to be interpreted as being to the *right* of 420 // the assumed radix point (13.7.2.3.2) 421 exponent += got - start - edit.digits.value_or(0); 422 } 423 } 424 // Consume the trailing ')' of a list-directed or NAMELIST complex 425 // input value. 426 if (edit.descriptor == DataEdit::ListDirectedImaginaryPart) { 427 if (next && (*next == ' ' || *next == '\t')) { 428 io.SkipSpaces(remaining); 429 next = io.NextInField(remaining, edit); 430 } 431 if (!next) { // NextInField fails on separators like ')' 432 std::size_t byteCount{0}; 433 next = io.GetCurrentChar(byteCount); 434 if (next && *next == ')') { 435 io.HandleRelativePosition(byteCount); 436 } 437 } 438 } else if (remaining) { 439 while (next && (*next == ' ' || *next == '\t')) { 440 next = io.NextInField(remaining, edit); 441 } 442 if (next) { 443 return {}; // error: unused nonblank character in fixed-width field 444 } 445 } 446 return {got, exponent, isHexadecimal}; 447 } 448 449 static void RaiseFPExceptions(decimal::ConversionResultFlags flags) { 450 #undef RAISE 451 #ifdef feraisexcept // a macro in some environments; omit std:: 452 #define RAISE feraiseexcept 453 #else 454 #define RAISE std::feraiseexcept 455 #endif 456 if (flags & decimal::ConversionResultFlags::Overflow) { 457 RAISE(FE_OVERFLOW); 458 } 459 if (flags & decimal::ConversionResultFlags::Inexact) { 460 RAISE(FE_INEXACT); 461 } 462 if (flags & decimal::ConversionResultFlags::Invalid) { 463 RAISE(FE_INVALID); 464 } 465 #undef RAISE 466 } 467 468 // If no special modes are in effect and the form of the input value 469 // that's present in the input stream is acceptable to the decimal->binary 470 // converter without modification, this fast path for real input 471 // saves time by avoiding memory copies and reformatting of the exponent. 472 template <int PRECISION> 473 static bool TryFastPathRealDecimalInput( 474 IoStatementState &io, const DataEdit &edit, void *n) { 475 if (edit.modes.editingFlags & (blankZero | decimalComma)) { 476 return false; 477 } 478 if (edit.modes.scale != 0) { 479 return false; 480 } 481 const ConnectionState &connection{io.GetConnectionState()}; 482 if (connection.internalIoCharKind > 1) { 483 return false; // reading non-default character 484 } 485 const char *str{nullptr}; 486 std::size_t got{io.GetNextInputBytes(str)}; 487 if (got == 0 || str == nullptr || !connection.recordLength.has_value()) { 488 return false; // could not access reliably-terminated input stream 489 } 490 const char *p{str}; 491 std::int64_t maxConsume{ 492 std::min<std::int64_t>(got, edit.width.value_or(got))}; 493 const char *limit{str + maxConsume}; 494 decimal::ConversionToBinaryResult<PRECISION> converted{ 495 decimal::ConvertToBinary<PRECISION>(p, edit.modes.round, limit)}; 496 if (converted.flags & (decimal::Invalid | decimal::Overflow)) { 497 return false; 498 } 499 if (edit.digits.value_or(0) != 0) { 500 // Edit descriptor is Fw.d (or other) with d != 0, which 501 // implies scaling 502 const char *q{str}; 503 for (; q < limit; ++q) { 504 if (*q == '.' || *q == 'n' || *q == 'N') { 505 break; 506 } 507 } 508 if (q == limit) { 509 // No explicit decimal point, and not NaN/Inf. 510 return false; 511 } 512 } 513 if (edit.descriptor == DataEdit::ListDirectedImaginaryPart) { 514 // Need to consume a trailing ')', possibly with leading spaces 515 for (; p < limit && (*p == ' ' || *p == '\t'); ++p) { 516 } 517 if (p < limit && *p == ')') { 518 ++p; 519 } else { 520 return false; 521 } 522 } else if (edit.IsListDirected()) { 523 if (p < limit && !IsCharValueSeparator(edit, *p)) { 524 return false; 525 } 526 } else { 527 for (; p < limit && (*p == ' ' || *p == '\t'); ++p) { 528 } 529 if (edit.width && p < str + *edit.width) { 530 return false; // unconverted characters remain in fixed width field 531 } 532 } 533 // Success on the fast path! 534 *reinterpret_cast<decimal::BinaryFloatingPointNumber<PRECISION> *>(n) = 535 converted.binary; 536 io.HandleRelativePosition(p - str); 537 // Set FP exception flags 538 if (converted.flags != decimal::ConversionResultFlags::Exact) { 539 RaiseFPExceptions(converted.flags); 540 } 541 return true; 542 } 543 544 template <int binaryPrecision> 545 decimal::ConversionToBinaryResult<binaryPrecision> ConvertHexadecimal( 546 const char *&p, enum decimal::FortranRounding rounding, int expo) { 547 using RealType = decimal::BinaryFloatingPointNumber<binaryPrecision>; 548 using RawType = typename RealType::RawType; 549 bool isNegative{*p == '-'}; 550 constexpr RawType one{1}; 551 RawType signBit{0}; 552 if (isNegative) { 553 ++p; 554 signBit = one << (RealType::bits - 1); 555 } 556 RawType fraction{0}; 557 // Adjust the incoming binary P+/- exponent to shift the radix point 558 // to below the LSB and add in the bias. 559 expo += binaryPrecision - 1 + RealType::exponentBias; 560 // Input the fraction. 561 int roundingBit{0}; 562 int guardBit{0}; 563 for (; *p; ++p) { 564 fraction <<= 4; 565 expo -= 4; 566 if (*p >= '0' && *p <= '9') { 567 fraction |= *p - '0'; 568 } else if (*p >= 'A' && *p <= 'F') { 569 fraction |= *p - 'A' + 10; // data were normalized to capitals 570 } else { 571 break; 572 } 573 while (fraction >> binaryPrecision) { 574 guardBit |= roundingBit; 575 roundingBit = (int)fraction & 1; 576 fraction >>= 1; 577 ++expo; 578 } 579 } 580 if (fraction) { 581 // Boost biased expo if too small 582 while (expo < 1) { 583 guardBit |= roundingBit; 584 roundingBit = (int)fraction & 1; 585 fraction >>= 1; 586 ++expo; 587 } 588 // Normalize 589 while (expo > 1 && !(fraction >> (binaryPrecision - 1))) { 590 fraction <<= 1; 591 --expo; 592 } 593 // Rounding 594 bool increase{false}; 595 switch (rounding) { 596 case decimal::RoundNearest: // RN & RP 597 increase = roundingBit && (guardBit | ((int)fraction & 1)); 598 break; 599 case decimal::RoundUp: // RU 600 increase = !isNegative && (roundingBit | guardBit); 601 break; 602 case decimal::RoundDown: // RD 603 increase = isNegative && (roundingBit | guardBit); 604 break; 605 case decimal::RoundToZero: // RZ 606 break; 607 case decimal::RoundCompatible: // RC 608 increase = roundingBit != 0; 609 break; 610 } 611 if (increase) { 612 ++fraction; 613 if (fraction >> binaryPrecision) { 614 fraction >>= 1; 615 ++expo; 616 } 617 } 618 } 619 // Package & return result 620 constexpr RawType significandMask{(one << RealType::significandBits) - 1}; 621 if (!fraction) { 622 expo = 0; 623 } else if (expo == 1 && !(fraction >> (binaryPrecision - 1))) { 624 expo = 0; // subnormal 625 } else if (expo >= RealType::maxExponent) { 626 expo = RealType::maxExponent; // +/-Inf 627 fraction = 0; 628 } else { 629 fraction &= significandMask; // remove explicit normalization unless x87 630 } 631 return decimal::ConversionToBinaryResult<binaryPrecision>{ 632 RealType{static_cast<RawType>(signBit | 633 static_cast<RawType>(expo) << RealType::significandBits | fraction)}, 634 (roundingBit | guardBit) ? decimal::Inexact : decimal::Exact}; 635 } 636 637 template <int KIND> 638 bool EditCommonRealInput(IoStatementState &io, const DataEdit &edit, void *n) { 639 constexpr int binaryPrecision{common::PrecisionOfRealKind(KIND)}; 640 if (TryFastPathRealDecimalInput<binaryPrecision>(io, edit, n)) { 641 return CheckCompleteListDirectedField(io, edit); 642 } 643 // Fast path wasn't available or didn't work; go the more general route 644 static constexpr int maxDigits{ 645 common::MaxDecimalConversionDigits(binaryPrecision)}; 646 static constexpr int bufferSize{maxDigits + 18}; 647 char buffer[bufferSize]; 648 auto scanned{ScanRealInput(buffer, maxDigits + 2, io, edit)}; 649 int got{scanned.got}; 650 if (got >= maxDigits + 2) { 651 io.GetIoErrorHandler().Crash("EditCommonRealInput: buffer was too small"); 652 return false; 653 } 654 if (got == 0) { 655 const auto &connection{io.GetConnectionState()}; 656 io.GetIoErrorHandler().SignalError(IostatBadRealInput, 657 "Bad real input data at column %d of record %d", 658 static_cast<int>(connection.positionInRecord + 1), 659 static_cast<int>(connection.currentRecordNumber)); 660 return false; 661 } 662 decimal::ConversionToBinaryResult<binaryPrecision> converted; 663 const char *p{buffer}; 664 if (scanned.isHexadecimal) { 665 buffer[got] = '\0'; 666 converted = ConvertHexadecimal<binaryPrecision>( 667 p, edit.modes.round, scanned.exponent); 668 } else { 669 bool hadExtra{got > maxDigits}; 670 int exponent{scanned.exponent}; 671 if (exponent != 0) { 672 buffer[got++] = 'e'; 673 if (exponent < 0) { 674 buffer[got++] = '-'; 675 exponent = -exponent; 676 } 677 if (exponent > 9999) { 678 exponent = 9999; // will convert to +/-Inf 679 } 680 if (exponent > 999) { 681 int dig{exponent / 1000}; 682 buffer[got++] = '0' + dig; 683 int rest{exponent - 1000 * dig}; 684 dig = rest / 100; 685 buffer[got++] = '0' + dig; 686 rest -= 100 * dig; 687 dig = rest / 10; 688 buffer[got++] = '0' + dig; 689 buffer[got++] = '0' + (rest - 10 * dig); 690 } else if (exponent > 99) { 691 int dig{exponent / 100}; 692 buffer[got++] = '0' + dig; 693 int rest{exponent - 100 * dig}; 694 dig = rest / 10; 695 buffer[got++] = '0' + dig; 696 buffer[got++] = '0' + (rest - 10 * dig); 697 } else if (exponent > 9) { 698 int dig{exponent / 10}; 699 buffer[got++] = '0' + dig; 700 buffer[got++] = '0' + (exponent - 10 * dig); 701 } else { 702 buffer[got++] = '0' + exponent; 703 } 704 } 705 buffer[got] = '\0'; 706 converted = decimal::ConvertToBinary<binaryPrecision>(p, edit.modes.round); 707 if (hadExtra) { 708 converted.flags = static_cast<enum decimal::ConversionResultFlags>( 709 converted.flags | decimal::Inexact); 710 } 711 } 712 if (*p) { // unprocessed junk after value 713 const auto &connection{io.GetConnectionState()}; 714 io.GetIoErrorHandler().SignalError(IostatBadRealInput, 715 "Trailing characters after real input data at column %d of record %d", 716 static_cast<int>(connection.positionInRecord + 1), 717 static_cast<int>(connection.currentRecordNumber)); 718 return false; 719 } 720 *reinterpret_cast<decimal::BinaryFloatingPointNumber<binaryPrecision> *>(n) = 721 converted.binary; 722 // Set FP exception flags 723 if (converted.flags != decimal::ConversionResultFlags::Exact) { 724 if (converted.flags & decimal::ConversionResultFlags::Overflow) { 725 io.GetIoErrorHandler().SignalError(IostatRealInputOverflow); 726 return false; 727 } 728 RaiseFPExceptions(converted.flags); 729 } 730 return CheckCompleteListDirectedField(io, edit); 731 } 732 733 template <int KIND> 734 bool EditRealInput(IoStatementState &io, const DataEdit &edit, void *n) { 735 switch (edit.descriptor) { 736 case DataEdit::ListDirected: 737 if (IsNamelistNameOrSlash(io)) { 738 return false; 739 } 740 return EditCommonRealInput<KIND>(io, edit, n); 741 case DataEdit::ListDirectedRealPart: 742 case DataEdit::ListDirectedImaginaryPart: 743 case 'F': 744 case 'E': // incl. EN, ES, & EX 745 case 'D': 746 case 'G': 747 return EditCommonRealInput<KIND>(io, edit, n); 748 case 'B': 749 return EditBOZInput<1>(io, edit, n, 750 common::BitsForBinaryPrecision(common::PrecisionOfRealKind(KIND)) >> 3); 751 case 'O': 752 return EditBOZInput<3>(io, edit, n, 753 common::BitsForBinaryPrecision(common::PrecisionOfRealKind(KIND)) >> 3); 754 case 'Z': 755 return EditBOZInput<4>(io, edit, n, 756 common::BitsForBinaryPrecision(common::PrecisionOfRealKind(KIND)) >> 3); 757 case 'A': // legacy extension 758 return EditCharacterInput(io, edit, reinterpret_cast<char *>(n), KIND); 759 default: 760 io.GetIoErrorHandler().SignalError(IostatErrorInFormat, 761 "Data edit descriptor '%c' may not be used for REAL input", 762 edit.descriptor); 763 return false; 764 } 765 } 766 767 // 13.7.3 in Fortran 2018 768 bool EditLogicalInput(IoStatementState &io, const DataEdit &edit, bool &x) { 769 switch (edit.descriptor) { 770 case DataEdit::ListDirected: 771 if (IsNamelistNameOrSlash(io)) { 772 return false; 773 } 774 break; 775 case 'L': 776 case 'G': 777 break; 778 default: 779 io.GetIoErrorHandler().SignalError(IostatErrorInFormat, 780 "Data edit descriptor '%c' may not be used for LOGICAL input", 781 edit.descriptor); 782 return false; 783 } 784 std::optional<int> remaining{io.CueUpInput(edit)}; 785 std::optional<char32_t> next{io.NextInField(remaining, edit)}; 786 if (next && *next == '.') { // skip optional period 787 next = io.NextInField(remaining, edit); 788 } 789 if (!next) { 790 io.GetIoErrorHandler().SignalError("Empty LOGICAL input field"); 791 return false; 792 } 793 switch (*next) { 794 case 'T': 795 case 't': 796 x = true; 797 break; 798 case 'F': 799 case 'f': 800 x = false; 801 break; 802 default: 803 io.GetIoErrorHandler().SignalError( 804 "Bad character '%lc' in LOGICAL input field", *next); 805 return false; 806 } 807 if (remaining) { // ignore the rest of a fixed-width field 808 io.HandleRelativePosition(*remaining); 809 } else if (edit.descriptor == DataEdit::ListDirected) { 810 while (io.NextInField(remaining, edit)) { // discard rest of field 811 } 812 } 813 return CheckCompleteListDirectedField(io, edit); 814 } 815 816 // See 13.10.3.1 paragraphs 7-9 in Fortran 2018 817 template <typename CHAR> 818 static bool EditDelimitedCharacterInput( 819 IoStatementState &io, CHAR *x, std::size_t length, char32_t delimiter) { 820 bool result{true}; 821 while (true) { 822 std::size_t byteCount{0}; 823 auto ch{io.GetCurrentChar(byteCount)}; 824 if (!ch) { 825 if (io.AdvanceRecord()) { 826 continue; 827 } else { 828 result = false; // EOF in character value 829 break; 830 } 831 } 832 io.HandleRelativePosition(byteCount); 833 if (*ch == delimiter) { 834 auto next{io.GetCurrentChar(byteCount)}; 835 if (next && *next == delimiter) { 836 // Repeated delimiter: use as character value 837 io.HandleRelativePosition(byteCount); 838 } else { 839 break; // closing delimiter 840 } 841 } 842 if (length > 0) { 843 *x++ = *ch; 844 --length; 845 } 846 } 847 std::fill_n(x, length, ' '); 848 return result; 849 } 850 851 template <typename CHAR> 852 static bool EditListDirectedCharacterInput( 853 IoStatementState &io, CHAR *x, std::size_t length, const DataEdit &edit) { 854 std::size_t byteCount{0}; 855 auto ch{io.GetCurrentChar(byteCount)}; 856 if (ch && (*ch == '\'' || *ch == '"')) { 857 io.HandleRelativePosition(byteCount); 858 return EditDelimitedCharacterInput(io, x, length, *ch); 859 } 860 if (IsNamelistNameOrSlash(io) || io.GetConnectionState().IsAtEOF()) { 861 return false; 862 } 863 // Undelimited list-directed character input: stop at a value separator 864 // or the end of the current record. Subtlety: the "remaining" count 865 // here is a dummy that's used to avoid the interpretation of separators 866 // in NextInField. 867 std::optional<int> remaining{length > 0 ? maxUTF8Bytes : 0}; 868 while (std::optional<char32_t> next{io.NextInField(remaining, edit)}) { 869 bool isSep{false}; 870 switch (*next) { 871 case ' ': 872 case '\t': 873 case '/': 874 isSep = true; 875 break; 876 case ',': 877 isSep = !(edit.modes.editingFlags & decimalComma); 878 break; 879 case ';': 880 isSep = !!(edit.modes.editingFlags & decimalComma); 881 break; 882 default: 883 break; 884 } 885 if (isSep) { 886 remaining = 0; 887 } else { 888 *x++ = *next; 889 remaining = --length > 0 ? maxUTF8Bytes : 0; 890 } 891 } 892 std::fill_n(x, length, ' '); 893 return true; 894 } 895 896 template <typename CHAR> 897 bool EditCharacterInput( 898 IoStatementState &io, const DataEdit &edit, CHAR *x, std::size_t length) { 899 switch (edit.descriptor) { 900 case DataEdit::ListDirected: 901 return EditListDirectedCharacterInput(io, x, length, edit); 902 case 'A': 903 case 'G': 904 break; 905 case 'B': 906 return EditBOZInput<1>(io, edit, x, length * sizeof *x); 907 case 'O': 908 return EditBOZInput<3>(io, edit, x, length * sizeof *x); 909 case 'Z': 910 return EditBOZInput<4>(io, edit, x, length * sizeof *x); 911 default: 912 io.GetIoErrorHandler().SignalError(IostatErrorInFormat, 913 "Data edit descriptor '%c' may not be used with a CHARACTER data item", 914 edit.descriptor); 915 return false; 916 } 917 const ConnectionState &connection{io.GetConnectionState()}; 918 std::size_t remaining{length}; 919 if (edit.width && *edit.width > 0) { 920 remaining = *edit.width; 921 } 922 // When the field is wider than the variable, we drop the leading 923 // characters. When the variable is wider than the field, there can be 924 // trailing padding or an EOR condition. 925 const char *input{nullptr}; 926 std::size_t ready{0}; 927 // Skip leading bytes. 928 // These bytes don't count towards INQUIRE(IOLENGTH=). 929 std::size_t skip{remaining > length ? remaining - length : 0}; 930 // Transfer payload bytes; these do count. 931 while (remaining > 0) { 932 if (ready == 0) { 933 ready = io.GetNextInputBytes(input); 934 if (ready == 0 || (ready < remaining && edit.modes.nonAdvancing)) { 935 if (io.CheckForEndOfRecord(ready)) { 936 if (ready == 0) { 937 // PAD='YES' and no more data 938 std::fill_n(x, length, ' '); 939 return !io.GetIoErrorHandler().InError(); 940 } else { 941 // Do partial read(s) then pad on last iteration 942 } 943 } else { 944 return !io.GetIoErrorHandler().InError(); 945 } 946 } 947 } 948 std::size_t chunk; 949 bool skipping{skip > 0}; 950 if (connection.isUTF8) { 951 chunk = MeasureUTF8Bytes(*input); 952 if (skipping) { 953 --skip; 954 } else if (auto ucs{DecodeUTF8(input)}) { 955 *x++ = *ucs; 956 --length; 957 } else if (chunk == 0) { 958 // error recovery: skip bad encoding 959 chunk = 1; 960 } 961 --remaining; 962 } else if (connection.internalIoCharKind > 1) { 963 // Reading from non-default character internal unit 964 chunk = connection.internalIoCharKind; 965 if (skipping) { 966 --skip; 967 } else { 968 char32_t buffer{0}; 969 std::memcpy(&buffer, input, chunk); 970 *x++ = buffer; 971 --length; 972 } 973 --remaining; 974 } else if constexpr (sizeof *x > 1) { 975 // Read single byte with expansion into multi-byte CHARACTER 976 chunk = 1; 977 if (skipping) { 978 --skip; 979 } else { 980 *x++ = static_cast<unsigned char>(*input); 981 --length; 982 } 983 --remaining; 984 } else { // single bytes -> default CHARACTER 985 if (skipping) { 986 chunk = std::min<std::size_t>(skip, ready); 987 skip -= chunk; 988 } else { 989 chunk = std::min<std::size_t>(remaining, ready); 990 std::memcpy(x, input, chunk); 991 x += chunk; 992 length -= chunk; 993 } 994 remaining -= chunk; 995 } 996 input += chunk; 997 if (!skipping) { 998 io.GotChar(chunk); 999 } 1000 io.HandleRelativePosition(chunk); 1001 ready -= chunk; 1002 } 1003 // Pad the remainder of the input variable, if any. 1004 std::fill_n(x, length, ' '); 1005 return CheckCompleteListDirectedField(io, edit); 1006 } 1007 1008 template bool EditRealInput<2>(IoStatementState &, const DataEdit &, void *); 1009 template bool EditRealInput<3>(IoStatementState &, const DataEdit &, void *); 1010 template bool EditRealInput<4>(IoStatementState &, const DataEdit &, void *); 1011 template bool EditRealInput<8>(IoStatementState &, const DataEdit &, void *); 1012 template bool EditRealInput<10>(IoStatementState &, const DataEdit &, void *); 1013 // TODO: double/double 1014 template bool EditRealInput<16>(IoStatementState &, const DataEdit &, void *); 1015 1016 template bool EditCharacterInput( 1017 IoStatementState &, const DataEdit &, char *, std::size_t); 1018 template bool EditCharacterInput( 1019 IoStatementState &, const DataEdit &, char16_t *, std::size_t); 1020 template bool EditCharacterInput( 1021 IoStatementState &, const DataEdit &, char32_t *, std::size_t); 1022 1023 } // namespace Fortran::runtime::io 1024