1 //===-- runtime/edit-input.cpp --------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "edit-input.h" 10 #include "namelist.h" 11 #include "utf.h" 12 #include "flang/Common/real.h" 13 #include "flang/Common/uint128.h" 14 #include <algorithm> 15 #include <cfenv> 16 17 namespace Fortran::runtime::io { 18 19 // Checks that a list-directed input value has been entirely consumed and 20 // doesn't contain unparsed characters before the next value separator. 21 static inline bool IsCharValueSeparator(const DataEdit &edit, char32_t ch) { 22 char32_t comma{ 23 edit.modes.editingFlags & decimalComma ? char32_t{';'} : char32_t{','}}; 24 return ch == ' ' || ch == '\t' || ch == '/' || ch == comma; 25 } 26 27 static bool CheckCompleteListDirectedField( 28 IoStatementState &io, const DataEdit &edit) { 29 if (edit.IsListDirected()) { 30 std::size_t byteCount; 31 if (auto ch{io.GetCurrentChar(byteCount)}) { 32 if (IsCharValueSeparator(edit, *ch)) { 33 return true; 34 } else { 35 const auto &connection{io.GetConnectionState()}; 36 io.GetIoErrorHandler().SignalError(IostatBadListDirectedInputSeparator, 37 "invalid character (0x%x) after list-directed input value, " 38 "at column %d in record %d", 39 static_cast<unsigned>(*ch), 40 static_cast<int>(connection.positionInRecord + 1), 41 static_cast<int>(connection.currentRecordNumber)); 42 return false; 43 } 44 } else { 45 return true; // end of record: ok 46 } 47 } else { 48 return true; 49 } 50 } 51 52 template <int LOG2_BASE> 53 static bool EditBOZInput( 54 IoStatementState &io, const DataEdit &edit, void *n, std::size_t bytes) { 55 // Skip leading white space & zeroes 56 std::optional<int> remaining{io.CueUpInput(edit)}; 57 auto start{io.GetConnectionState().positionInRecord}; 58 std::optional<char32_t> next{io.NextInField(remaining, edit)}; 59 if (next.value_or('?') == '0') { 60 do { 61 start = io.GetConnectionState().positionInRecord; 62 next = io.NextInField(remaining, edit); 63 } while (next && *next == '0'); 64 } 65 // Count significant digits after any leading white space & zeroes 66 int digits{0}; 67 int significantBits{0}; 68 for (; next; next = io.NextInField(remaining, edit)) { 69 char32_t ch{*next}; 70 if (ch == ' ' || ch == '\t') { 71 if (edit.modes.editingFlags & blankZero) { 72 ch = '0'; // BZ mode - treat blank as if it were zero 73 } else { 74 continue; 75 } 76 } 77 if (ch >= '0' && ch <= '1') { 78 } else if (LOG2_BASE >= 3 && ch >= '2' && ch <= '7') { 79 } else if (LOG2_BASE >= 4 && ch >= '8' && ch <= '9') { 80 } else if (LOG2_BASE >= 4 && ch >= 'A' && ch <= 'F') { 81 } else if (LOG2_BASE >= 4 && ch >= 'a' && ch <= 'f') { 82 } else { 83 io.GetIoErrorHandler().SignalError( 84 "Bad character '%lc' in B/O/Z input field", ch); 85 return false; 86 } 87 if (digits++ == 0) { 88 significantBits = 4; 89 if (ch >= '0' && ch <= '1') { 90 significantBits = 1; 91 } else if (ch >= '2' && ch <= '3') { 92 significantBits = 2; 93 } else if (ch >= '4' && ch <= '7') { 94 significantBits = 3; 95 } else { 96 significantBits = 4; 97 } 98 } else { 99 significantBits += LOG2_BASE; 100 } 101 } 102 auto significantBytes{static_cast<std::size_t>(significantBits + 7) / 8}; 103 if (significantBytes > bytes) { 104 io.GetIoErrorHandler().SignalError(IostatBOZInputOverflow, 105 "B/O/Z input of %d digits overflows %zd-byte variable", digits, bytes); 106 return false; 107 } 108 // Reset to start of significant digits 109 io.HandleAbsolutePosition(start); 110 remaining.reset(); 111 // Make a second pass now that the digit count is known 112 std::memset(n, 0, bytes); 113 int increment{isHostLittleEndian ? -1 : 1}; 114 auto *data{reinterpret_cast<unsigned char *>(n) + 115 (isHostLittleEndian ? significantBytes - 1 : 0)}; 116 int shift{((digits - 1) * LOG2_BASE) & 7}; 117 while (digits > 0) { 118 char32_t ch{*io.NextInField(remaining, edit)}; 119 int digit{0}; 120 if (ch == ' ' || ch == '\t') { 121 if (edit.modes.editingFlags & blankZero) { 122 ch = '0'; // BZ mode - treat blank as if it were zero 123 } else { 124 continue; 125 } 126 } 127 --digits; 128 if (ch >= '0' && ch <= '9') { 129 digit = ch - '0'; 130 } else if (ch >= 'A' && ch <= 'F') { 131 digit = ch + 10 - 'A'; 132 } else if (ch >= 'a' && ch <= 'f') { 133 digit = ch + 10 - 'a'; 134 } else { 135 continue; 136 } 137 if (shift < 0) { 138 if (shift + LOG2_BASE > 0) { // misaligned octal 139 *data |= digit >> -shift; 140 } 141 shift += 8; 142 data += increment; 143 } 144 *data |= digit << shift; 145 shift -= LOG2_BASE; 146 } 147 return CheckCompleteListDirectedField(io, edit); 148 } 149 150 static inline char32_t GetRadixPointChar(const DataEdit &edit) { 151 return edit.modes.editingFlags & decimalComma ? char32_t{','} : char32_t{'.'}; 152 } 153 154 // Prepares input from a field, and returns the sign, if any, else '\0'. 155 static char ScanNumericPrefix(IoStatementState &io, const DataEdit &edit, 156 std::optional<char32_t> &next, std::optional<int> &remaining) { 157 remaining = io.CueUpInput(edit); 158 next = io.NextInField(remaining, edit); 159 char sign{'\0'}; 160 if (next) { 161 if (*next == '-' || *next == '+') { 162 sign = *next; 163 if (!edit.IsListDirected()) { 164 io.SkipSpaces(remaining); 165 } 166 next = io.NextInField(remaining, edit); 167 } 168 } 169 return sign; 170 } 171 172 bool EditIntegerInput( 173 IoStatementState &io, const DataEdit &edit, void *n, int kind) { 174 RUNTIME_CHECK(io.GetIoErrorHandler(), kind >= 1 && !(kind & (kind - 1))); 175 switch (edit.descriptor) { 176 case DataEdit::ListDirected: 177 if (IsNamelistNameOrSlash(io)) { 178 return false; 179 } 180 break; 181 case 'G': 182 case 'I': 183 break; 184 case 'B': 185 return EditBOZInput<1>(io, edit, n, kind); 186 case 'O': 187 return EditBOZInput<3>(io, edit, n, kind); 188 case 'Z': 189 return EditBOZInput<4>(io, edit, n, kind); 190 case 'A': // legacy extension 191 return EditCharacterInput(io, edit, reinterpret_cast<char *>(n), kind); 192 default: 193 io.GetIoErrorHandler().SignalError(IostatErrorInFormat, 194 "Data edit descriptor '%c' may not be used with an INTEGER data item", 195 edit.descriptor); 196 return false; 197 } 198 std::optional<int> remaining; 199 std::optional<char32_t> next; 200 char sign{ScanNumericPrefix(io, edit, next, remaining)}; 201 common::UnsignedInt128 value{0}; 202 bool any{!!sign}; 203 bool overflow{false}; 204 for (; next; next = io.NextInField(remaining, edit)) { 205 char32_t ch{*next}; 206 if (ch == ' ' || ch == '\t') { 207 if (edit.modes.editingFlags & blankZero) { 208 ch = '0'; // BZ mode - treat blank as if it were zero 209 } else { 210 continue; 211 } 212 } 213 int digit{0}; 214 if (ch >= '0' && ch <= '9') { 215 digit = ch - '0'; 216 } else { 217 io.GetIoErrorHandler().SignalError( 218 "Bad character '%lc' in INTEGER input field", ch); 219 return false; 220 } 221 static constexpr auto maxu128{~common::UnsignedInt128{0}}; 222 static constexpr auto maxu128OverTen{maxu128 / 10}; 223 static constexpr int maxLastDigit{ 224 static_cast<int>(maxu128 - (maxu128OverTen * 10))}; 225 overflow |= value >= maxu128OverTen && 226 (value > maxu128OverTen || digit > maxLastDigit); 227 value *= 10; 228 value += digit; 229 any = true; 230 } 231 if (!any && !remaining) { 232 io.GetIoErrorHandler().SignalError( 233 "Integer value absent from NAMELIST or list-directed input"); 234 return false; 235 } 236 auto maxForKind{common::UnsignedInt128{1} << ((8 * kind) - 1)}; 237 overflow |= value >= maxForKind && (value > maxForKind || sign != '-'); 238 if (overflow) { 239 io.GetIoErrorHandler().SignalError(IostatIntegerInputOverflow, 240 "Decimal input overflows INTEGER(%d) variable", kind); 241 return false; 242 } 243 if (sign == '-') { 244 value = -value; 245 } 246 if (any || !io.GetConnectionState().IsAtEOF()) { 247 // The value is stored in the lower order bits on big endian platform. 248 // When memcpy, shift the value to the higher order bit. 249 auto shft{static_cast<int>(sizeof(value.low())) - kind}; 250 // For kind==8 (i.e. shft==0), the value is stored in low_ in big endian. 251 if (!isHostLittleEndian && shft >= 0) { 252 auto l{value.low() << (8 * shft)}; 253 std::memcpy(n, &l, kind); 254 } else { 255 std::memcpy(n, &value, kind); // a blank field means zero 256 } 257 } 258 return any; 259 } 260 261 // Parses a REAL input number from the input source as a normalized 262 // fraction into a supplied buffer -- there's an optional '-', a 263 // decimal point when the input is not hexadecimal, and at least one 264 // digit. Replaces blanks with zeroes where appropriate. 265 struct ScannedRealInput { 266 // Number of characters that (should) have been written to the 267 // buffer -- this can be larger than the buffer size, which 268 // indicates buffer overflow. Zero indicates an error. 269 int got{0}; 270 int exponent{0}; // adjusted as necessary; binary if isHexadecimal 271 bool isHexadecimal{false}; // 0X... 272 }; 273 static ScannedRealInput ScanRealInput( 274 char *buffer, int bufferSize, IoStatementState &io, const DataEdit &edit) { 275 std::optional<int> remaining; 276 std::optional<char32_t> next; 277 int got{0}; 278 std::optional<int> radixPointOffset; 279 auto Put{[&](char ch) -> void { 280 if (got < bufferSize) { 281 buffer[got] = ch; 282 } 283 ++got; 284 }}; 285 char sign{ScanNumericPrefix(io, edit, next, remaining)}; 286 if (sign == '-') { 287 Put('-'); 288 } 289 bool bzMode{(edit.modes.editingFlags & blankZero) != 0}; 290 int exponent{0}; 291 if (!next || (!bzMode && *next == ' ')) { 292 if (!edit.IsListDirected() && !io.GetConnectionState().IsAtEOF()) { 293 // An empty/blank field means zero when not list-directed. 294 // A fixed-width field containing only a sign is also zero; 295 // this behavior isn't standard-conforming in F'2023 but it is 296 // required to pass FCVS. 297 Put('0'); 298 } 299 return {got, exponent, false}; 300 } 301 char32_t radixPointChar{GetRadixPointChar(edit)}; 302 char32_t first{*next >= 'a' && *next <= 'z' ? *next + 'A' - 'a' : *next}; 303 bool isHexadecimal{false}; 304 if (first == 'N' || first == 'I') { 305 // NaN or infinity - convert to upper case 306 // Subtle: a blank field of digits could be followed by 'E' or 'D', 307 for (; next && 308 ((*next >= 'a' && *next <= 'z') || (*next >= 'A' && *next <= 'Z')); 309 next = io.NextInField(remaining, edit)) { 310 if (*next >= 'a' && *next <= 'z') { 311 Put(*next - 'a' + 'A'); 312 } else { 313 Put(*next); 314 } 315 } 316 if (next && *next == '(') { // NaN(...) 317 Put('('); 318 int depth{1}; 319 while (true) { 320 next = io.NextInField(remaining, edit); 321 if (depth == 0) { 322 break; 323 } else if (!next) { 324 return {}; // error 325 } else if (*next == '(') { 326 ++depth; 327 } else if (*next == ')') { 328 --depth; 329 } 330 Put(*next); 331 } 332 } 333 } else if (first == radixPointChar || (first >= '0' && first <= '9') || 334 (bzMode && (first == ' ' || first == '\t')) || first == 'E' || 335 first == 'D' || first == 'Q') { 336 if (first == '0') { 337 next = io.NextInField(remaining, edit); 338 if (next && (*next == 'x' || *next == 'X')) { // 0X... 339 isHexadecimal = true; 340 next = io.NextInField(remaining, edit); 341 } else { 342 Put('0'); 343 } 344 } 345 // input field is normalized to a fraction 346 if (!isHexadecimal) { 347 Put('.'); 348 } 349 auto start{got}; 350 for (; next; next = io.NextInField(remaining, edit)) { 351 char32_t ch{*next}; 352 if (ch == ' ' || ch == '\t') { 353 if (isHexadecimal) { 354 return {}; // error 355 } else if (bzMode) { 356 ch = '0'; // BZ mode - treat blank as if it were zero 357 } else { 358 continue; // ignore blank in fixed field 359 } 360 } 361 if (ch == '0' && got == start && !radixPointOffset) { 362 // omit leading zeroes before the radix point 363 } else if (ch >= '0' && ch <= '9') { 364 Put(ch); 365 } else if (ch == radixPointChar && !radixPointOffset) { 366 // The radix point character is *not* copied to the buffer. 367 radixPointOffset = got - start; // # of digits before the radix point 368 } else if (isHexadecimal && ch >= 'A' && ch <= 'F') { 369 Put(ch); 370 } else if (isHexadecimal && ch >= 'a' && ch <= 'f') { 371 Put(ch - 'a' + 'A'); // normalize to capitals 372 } else { 373 break; 374 } 375 } 376 if (got == start) { 377 // Nothing but zeroes and maybe a radix point. F'2018 requires 378 // at least one digit, but F'77 did not, and a bare "." shows up in 379 // the FCVS suite. 380 Put('0'); // emit at least one digit 381 } 382 // In list-directed input, a bad exponent is not consumed. 383 auto nextBeforeExponent{next}; 384 auto startExponent{io.GetConnectionState().positionInRecord}; 385 bool hasGoodExponent{false}; 386 if (next) { 387 if (isHexadecimal) { 388 if (*next == 'p' || *next == 'P') { 389 next = io.NextInField(remaining, edit); 390 } else { 391 // The binary exponent is not optional in the standard. 392 return {}; // error 393 } 394 } else if (*next == 'e' || *next == 'E' || *next == 'd' || *next == 'D' || 395 *next == 'q' || *next == 'Q') { 396 // Optional exponent letter. Blanks are allowed between the 397 // optional exponent letter and the exponent value. 398 io.SkipSpaces(remaining); 399 next = io.NextInField(remaining, edit); 400 } 401 } 402 if (next && 403 (*next == '-' || *next == '+' || (*next >= '0' && *next <= '9') || 404 *next == ' ' || *next == '\t')) { 405 bool negExpo{*next == '-'}; 406 if (negExpo || *next == '+') { 407 next = io.NextInField(remaining, edit); 408 } 409 for (; next; next = io.NextInField(remaining, edit)) { 410 if (*next >= '0' && *next <= '9') { 411 hasGoodExponent = true; 412 if (exponent < 10000) { 413 exponent = 10 * exponent + *next - '0'; 414 } 415 } else if (*next == ' ' || *next == '\t') { 416 if (isHexadecimal) { 417 break; 418 } else if (bzMode) { 419 hasGoodExponent = true; 420 exponent = 10 * exponent; 421 } 422 } else { 423 break; 424 } 425 } 426 if (negExpo) { 427 exponent = -exponent; 428 } 429 } 430 if (!hasGoodExponent) { 431 if (isHexadecimal) { 432 return {}; // error 433 } 434 // There isn't a good exponent; do not consume it. 435 next = nextBeforeExponent; 436 io.HandleAbsolutePosition(startExponent); 437 // The default exponent is -kP, but the scale factor doesn't affect 438 // an explicit exponent. 439 exponent = -edit.modes.scale; 440 } 441 // Adjust exponent by number of digits before the radix point. 442 if (isHexadecimal) { 443 // Exponents for hexadecimal input are binary. 444 exponent += radixPointOffset.value_or(got - start) * 4; 445 } else if (radixPointOffset) { 446 exponent += *radixPointOffset; 447 } else { 448 // When no redix point (or comma) appears in the value, the 'd' 449 // part of the edit descriptor must be interpreted as the number of 450 // digits in the value to be interpreted as being to the *right* of 451 // the assumed radix point (13.7.2.3.2) 452 exponent += got - start - edit.digits.value_or(0); 453 } 454 } 455 // Consume the trailing ')' of a list-directed or NAMELIST complex 456 // input value. 457 if (edit.descriptor == DataEdit::ListDirectedImaginaryPart) { 458 if (next && (*next == ' ' || *next == '\t')) { 459 io.SkipSpaces(remaining); 460 next = io.NextInField(remaining, edit); 461 } 462 if (!next) { // NextInField fails on separators like ')' 463 std::size_t byteCount{0}; 464 next = io.GetCurrentChar(byteCount); 465 if (next && *next == ')') { 466 io.HandleRelativePosition(byteCount); 467 } 468 } 469 } else if (remaining) { 470 while (next && (*next == ' ' || *next == '\t')) { 471 next = io.NextInField(remaining, edit); 472 } 473 if (next) { 474 return {}; // error: unused nonblank character in fixed-width field 475 } 476 } 477 return {got, exponent, isHexadecimal}; 478 } 479 480 static void RaiseFPExceptions(decimal::ConversionResultFlags flags) { 481 #undef RAISE 482 #ifdef feraisexcept // a macro in some environments; omit std:: 483 #define RAISE feraiseexcept 484 #else 485 #define RAISE std::feraiseexcept 486 #endif 487 if (flags & decimal::ConversionResultFlags::Overflow) { 488 RAISE(FE_OVERFLOW); 489 } 490 if (flags & decimal::ConversionResultFlags::Underflow) { 491 RAISE(FE_UNDERFLOW); 492 } 493 if (flags & decimal::ConversionResultFlags::Inexact) { 494 RAISE(FE_INEXACT); 495 } 496 if (flags & decimal::ConversionResultFlags::Invalid) { 497 RAISE(FE_INVALID); 498 } 499 #undef RAISE 500 } 501 502 // If no special modes are in effect and the form of the input value 503 // that's present in the input stream is acceptable to the decimal->binary 504 // converter without modification, this fast path for real input 505 // saves time by avoiding memory copies and reformatting of the exponent. 506 template <int PRECISION> 507 static bool TryFastPathRealDecimalInput( 508 IoStatementState &io, const DataEdit &edit, void *n) { 509 if (edit.modes.editingFlags & (blankZero | decimalComma)) { 510 return false; 511 } 512 if (edit.modes.scale != 0) { 513 return false; 514 } 515 const ConnectionState &connection{io.GetConnectionState()}; 516 if (connection.internalIoCharKind > 1) { 517 return false; // reading non-default character 518 } 519 const char *str{nullptr}; 520 std::size_t got{io.GetNextInputBytes(str)}; 521 if (got == 0 || str == nullptr || !connection.recordLength.has_value()) { 522 return false; // could not access reliably-terminated input stream 523 } 524 const char *p{str}; 525 std::int64_t maxConsume{ 526 std::min<std::int64_t>(got, edit.width.value_or(got))}; 527 const char *limit{str + maxConsume}; 528 decimal::ConversionToBinaryResult<PRECISION> converted{ 529 decimal::ConvertToBinary<PRECISION>(p, edit.modes.round, limit)}; 530 if (converted.flags & (decimal::Invalid | decimal::Overflow)) { 531 return false; 532 } 533 if (edit.digits.value_or(0) != 0) { 534 // Edit descriptor is Fw.d (or other) with d != 0, which 535 // implies scaling 536 const char *q{str}; 537 for (; q < limit; ++q) { 538 if (*q == '.' || *q == 'n' || *q == 'N') { 539 break; 540 } 541 } 542 if (q == limit) { 543 // No explicit decimal point, and not NaN/Inf. 544 return false; 545 } 546 } 547 if (edit.descriptor == DataEdit::ListDirectedImaginaryPart) { 548 // Need to consume a trailing ')', possibly with leading spaces 549 for (; p < limit && (*p == ' ' || *p == '\t'); ++p) { 550 } 551 if (p < limit && *p == ')') { 552 ++p; 553 } else { 554 return false; 555 } 556 } else if (edit.IsListDirected()) { 557 if (p < limit && !IsCharValueSeparator(edit, *p)) { 558 return false; 559 } 560 } else { 561 for (; p < limit && (*p == ' ' || *p == '\t'); ++p) { 562 } 563 if (edit.width && p < str + *edit.width) { 564 return false; // unconverted characters remain in fixed width field 565 } 566 } 567 // Success on the fast path! 568 *reinterpret_cast<decimal::BinaryFloatingPointNumber<PRECISION> *>(n) = 569 converted.binary; 570 io.HandleRelativePosition(p - str); 571 // Set FP exception flags 572 if (converted.flags != decimal::ConversionResultFlags::Exact) { 573 RaiseFPExceptions(converted.flags); 574 } 575 return true; 576 } 577 578 template <int binaryPrecision> 579 decimal::ConversionToBinaryResult<binaryPrecision> ConvertHexadecimal( 580 const char *&p, enum decimal::FortranRounding rounding, int expo) { 581 using RealType = decimal::BinaryFloatingPointNumber<binaryPrecision>; 582 using RawType = typename RealType::RawType; 583 bool isNegative{*p == '-'}; 584 constexpr RawType one{1}; 585 RawType signBit{0}; 586 if (isNegative) { 587 ++p; 588 signBit = one << (RealType::bits - 1); 589 } 590 RawType fraction{0}; 591 // Adjust the incoming binary P+/- exponent to shift the radix point 592 // to below the LSB and add in the bias. 593 expo += binaryPrecision - 1 + RealType::exponentBias; 594 // Input the fraction. 595 int roundingBit{0}; 596 int guardBit{0}; 597 for (; *p; ++p) { 598 fraction <<= 4; 599 expo -= 4; 600 if (*p >= '0' && *p <= '9') { 601 fraction |= *p - '0'; 602 } else if (*p >= 'A' && *p <= 'F') { 603 fraction |= *p - 'A' + 10; // data were normalized to capitals 604 } else { 605 break; 606 } 607 while (fraction >> binaryPrecision) { 608 guardBit |= roundingBit; 609 roundingBit = (int)fraction & 1; 610 fraction >>= 1; 611 ++expo; 612 } 613 } 614 if (fraction) { 615 // Boost biased expo if too small 616 while (expo < 1) { 617 guardBit |= roundingBit; 618 roundingBit = (int)fraction & 1; 619 fraction >>= 1; 620 ++expo; 621 } 622 // Normalize 623 while (expo > 1 && !(fraction >> (binaryPrecision - 1))) { 624 fraction <<= 1; 625 --expo; 626 } 627 // Rounding 628 bool increase{false}; 629 switch (rounding) { 630 case decimal::RoundNearest: // RN & RP 631 increase = roundingBit && (guardBit | ((int)fraction & 1)); 632 break; 633 case decimal::RoundUp: // RU 634 increase = !isNegative && (roundingBit | guardBit); 635 break; 636 case decimal::RoundDown: // RD 637 increase = isNegative && (roundingBit | guardBit); 638 break; 639 case decimal::RoundToZero: // RZ 640 break; 641 case decimal::RoundCompatible: // RC 642 increase = roundingBit != 0; 643 break; 644 } 645 if (increase) { 646 ++fraction; 647 if (fraction >> binaryPrecision) { 648 fraction >>= 1; 649 ++expo; 650 } 651 } 652 } 653 // Package & return result 654 constexpr RawType significandMask{(one << RealType::significandBits) - 1}; 655 int flags{(roundingBit | guardBit) ? decimal::Inexact : decimal::Exact}; 656 if (!fraction) { 657 expo = 0; 658 } else if (expo == 1 && !(fraction >> (binaryPrecision - 1))) { 659 expo = 0; // subnormal 660 flags |= decimal::Underflow; 661 } else if (expo >= RealType::maxExponent) { 662 if (rounding == decimal::RoundToZero || 663 (rounding == decimal::RoundDown && !isNegative) || 664 (rounding == decimal::RoundUp && isNegative)) { 665 expo = RealType::maxExponent - 1; // +/-HUGE() 666 fraction = significandMask; 667 } else { 668 expo = RealType::maxExponent; // +/-Inf 669 fraction = 0; 670 flags |= decimal::Overflow; 671 } 672 } else { 673 fraction &= significandMask; // remove explicit normalization unless x87 674 } 675 return decimal::ConversionToBinaryResult<binaryPrecision>{ 676 RealType{static_cast<RawType>(signBit | 677 static_cast<RawType>(expo) << RealType::significandBits | fraction)}, 678 static_cast<decimal::ConversionResultFlags>(flags)}; 679 } 680 681 template <int KIND> 682 bool EditCommonRealInput(IoStatementState &io, const DataEdit &edit, void *n) { 683 constexpr int binaryPrecision{common::PrecisionOfRealKind(KIND)}; 684 if (TryFastPathRealDecimalInput<binaryPrecision>(io, edit, n)) { 685 return CheckCompleteListDirectedField(io, edit); 686 } 687 // Fast path wasn't available or didn't work; go the more general route 688 static constexpr int maxDigits{ 689 common::MaxDecimalConversionDigits(binaryPrecision)}; 690 static constexpr int bufferSize{maxDigits + 18}; 691 char buffer[bufferSize]; 692 auto scanned{ScanRealInput(buffer, maxDigits + 2, io, edit)}; 693 int got{scanned.got}; 694 if (got >= maxDigits + 2) { 695 io.GetIoErrorHandler().Crash("EditCommonRealInput: buffer was too small"); 696 return false; 697 } 698 if (got == 0) { 699 const auto &connection{io.GetConnectionState()}; 700 io.GetIoErrorHandler().SignalError(IostatBadRealInput, 701 "Bad real input data at column %d of record %d", 702 static_cast<int>(connection.positionInRecord + 1), 703 static_cast<int>(connection.currentRecordNumber)); 704 return false; 705 } 706 decimal::ConversionToBinaryResult<binaryPrecision> converted; 707 const char *p{buffer}; 708 if (scanned.isHexadecimal) { 709 buffer[got] = '\0'; 710 converted = ConvertHexadecimal<binaryPrecision>( 711 p, edit.modes.round, scanned.exponent); 712 } else { 713 bool hadExtra{got > maxDigits}; 714 int exponent{scanned.exponent}; 715 if (exponent != 0) { 716 buffer[got++] = 'e'; 717 if (exponent < 0) { 718 buffer[got++] = '-'; 719 exponent = -exponent; 720 } 721 if (exponent > 9999) { 722 exponent = 9999; // will convert to +/-Inf 723 } 724 if (exponent > 999) { 725 int dig{exponent / 1000}; 726 buffer[got++] = '0' + dig; 727 int rest{exponent - 1000 * dig}; 728 dig = rest / 100; 729 buffer[got++] = '0' + dig; 730 rest -= 100 * dig; 731 dig = rest / 10; 732 buffer[got++] = '0' + dig; 733 buffer[got++] = '0' + (rest - 10 * dig); 734 } else if (exponent > 99) { 735 int dig{exponent / 100}; 736 buffer[got++] = '0' + dig; 737 int rest{exponent - 100 * dig}; 738 dig = rest / 10; 739 buffer[got++] = '0' + dig; 740 buffer[got++] = '0' + (rest - 10 * dig); 741 } else if (exponent > 9) { 742 int dig{exponent / 10}; 743 buffer[got++] = '0' + dig; 744 buffer[got++] = '0' + (exponent - 10 * dig); 745 } else { 746 buffer[got++] = '0' + exponent; 747 } 748 } 749 buffer[got] = '\0'; 750 converted = decimal::ConvertToBinary<binaryPrecision>(p, edit.modes.round); 751 if (hadExtra) { 752 converted.flags = static_cast<enum decimal::ConversionResultFlags>( 753 converted.flags | decimal::Inexact); 754 } 755 } 756 if (*p) { // unprocessed junk after value 757 const auto &connection{io.GetConnectionState()}; 758 io.GetIoErrorHandler().SignalError(IostatBadRealInput, 759 "Trailing characters after real input data at column %d of record %d", 760 static_cast<int>(connection.positionInRecord + 1), 761 static_cast<int>(connection.currentRecordNumber)); 762 return false; 763 } 764 *reinterpret_cast<decimal::BinaryFloatingPointNumber<binaryPrecision> *>(n) = 765 converted.binary; 766 // Set FP exception flags 767 if (converted.flags != decimal::ConversionResultFlags::Exact) { 768 if (converted.flags & decimal::ConversionResultFlags::Overflow) { 769 io.GetIoErrorHandler().SignalError(IostatRealInputOverflow); 770 return false; 771 } 772 RaiseFPExceptions(converted.flags); 773 } 774 return CheckCompleteListDirectedField(io, edit); 775 } 776 777 template <int KIND> 778 bool EditRealInput(IoStatementState &io, const DataEdit &edit, void *n) { 779 switch (edit.descriptor) { 780 case DataEdit::ListDirected: 781 if (IsNamelistNameOrSlash(io)) { 782 return false; 783 } 784 return EditCommonRealInput<KIND>(io, edit, n); 785 case DataEdit::ListDirectedRealPart: 786 case DataEdit::ListDirectedImaginaryPart: 787 case 'F': 788 case 'E': // incl. EN, ES, & EX 789 case 'D': 790 case 'G': 791 return EditCommonRealInput<KIND>(io, edit, n); 792 case 'B': 793 return EditBOZInput<1>(io, edit, n, 794 common::BitsForBinaryPrecision(common::PrecisionOfRealKind(KIND)) >> 3); 795 case 'O': 796 return EditBOZInput<3>(io, edit, n, 797 common::BitsForBinaryPrecision(common::PrecisionOfRealKind(KIND)) >> 3); 798 case 'Z': 799 return EditBOZInput<4>(io, edit, n, 800 common::BitsForBinaryPrecision(common::PrecisionOfRealKind(KIND)) >> 3); 801 case 'A': // legacy extension 802 return EditCharacterInput(io, edit, reinterpret_cast<char *>(n), KIND); 803 default: 804 io.GetIoErrorHandler().SignalError(IostatErrorInFormat, 805 "Data edit descriptor '%c' may not be used for REAL input", 806 edit.descriptor); 807 return false; 808 } 809 } 810 811 // 13.7.3 in Fortran 2018 812 bool EditLogicalInput(IoStatementState &io, const DataEdit &edit, bool &x) { 813 switch (edit.descriptor) { 814 case DataEdit::ListDirected: 815 if (IsNamelistNameOrSlash(io)) { 816 return false; 817 } 818 break; 819 case 'L': 820 case 'G': 821 break; 822 default: 823 io.GetIoErrorHandler().SignalError(IostatErrorInFormat, 824 "Data edit descriptor '%c' may not be used for LOGICAL input", 825 edit.descriptor); 826 return false; 827 } 828 std::optional<int> remaining{io.CueUpInput(edit)}; 829 std::optional<char32_t> next{io.NextInField(remaining, edit)}; 830 if (next && *next == '.') { // skip optional period 831 next = io.NextInField(remaining, edit); 832 } 833 if (!next) { 834 io.GetIoErrorHandler().SignalError("Empty LOGICAL input field"); 835 return false; 836 } 837 switch (*next) { 838 case 'T': 839 case 't': 840 x = true; 841 break; 842 case 'F': 843 case 'f': 844 x = false; 845 break; 846 default: 847 io.GetIoErrorHandler().SignalError( 848 "Bad character '%lc' in LOGICAL input field", *next); 849 return false; 850 } 851 if (remaining) { // ignore the rest of a fixed-width field 852 io.HandleRelativePosition(*remaining); 853 } else if (edit.descriptor == DataEdit::ListDirected) { 854 while (io.NextInField(remaining, edit)) { // discard rest of field 855 } 856 } 857 return CheckCompleteListDirectedField(io, edit); 858 } 859 860 // See 13.10.3.1 paragraphs 7-9 in Fortran 2018 861 template <typename CHAR> 862 static bool EditDelimitedCharacterInput( 863 IoStatementState &io, CHAR *x, std::size_t length, char32_t delimiter) { 864 bool result{true}; 865 while (true) { 866 std::size_t byteCount{0}; 867 auto ch{io.GetCurrentChar(byteCount)}; 868 if (!ch) { 869 if (io.AdvanceRecord()) { 870 continue; 871 } else { 872 result = false; // EOF in character value 873 break; 874 } 875 } 876 io.HandleRelativePosition(byteCount); 877 if (*ch == delimiter) { 878 auto next{io.GetCurrentChar(byteCount)}; 879 if (next && *next == delimiter) { 880 // Repeated delimiter: use as character value 881 io.HandleRelativePosition(byteCount); 882 } else { 883 break; // closing delimiter 884 } 885 } 886 if (length > 0) { 887 *x++ = *ch; 888 --length; 889 } 890 } 891 std::fill_n(x, length, ' '); 892 return result; 893 } 894 895 template <typename CHAR> 896 static bool EditListDirectedCharacterInput( 897 IoStatementState &io, CHAR *x, std::size_t length, const DataEdit &edit) { 898 std::size_t byteCount{0}; 899 auto ch{io.GetCurrentChar(byteCount)}; 900 if (ch && (*ch == '\'' || *ch == '"')) { 901 io.HandleRelativePosition(byteCount); 902 return EditDelimitedCharacterInput(io, x, length, *ch); 903 } 904 if (IsNamelistNameOrSlash(io) || io.GetConnectionState().IsAtEOF()) { 905 return false; 906 } 907 // Undelimited list-directed character input: stop at a value separator 908 // or the end of the current record. Subtlety: the "remaining" count 909 // here is a dummy that's used to avoid the interpretation of separators 910 // in NextInField. 911 std::optional<int> remaining{length > 0 ? maxUTF8Bytes : 0}; 912 while (std::optional<char32_t> next{io.NextInField(remaining, edit)}) { 913 bool isSep{false}; 914 switch (*next) { 915 case ' ': 916 case '\t': 917 case '/': 918 isSep = true; 919 break; 920 case ',': 921 isSep = !(edit.modes.editingFlags & decimalComma); 922 break; 923 case ';': 924 isSep = !!(edit.modes.editingFlags & decimalComma); 925 break; 926 default: 927 break; 928 } 929 if (isSep) { 930 remaining = 0; 931 } else { 932 *x++ = *next; 933 remaining = --length > 0 ? maxUTF8Bytes : 0; 934 } 935 } 936 std::fill_n(x, length, ' '); 937 return true; 938 } 939 940 template <typename CHAR> 941 bool EditCharacterInput(IoStatementState &io, const DataEdit &edit, CHAR *x, 942 std::size_t lengthChars) { 943 switch (edit.descriptor) { 944 case DataEdit::ListDirected: 945 return EditListDirectedCharacterInput(io, x, lengthChars, edit); 946 case 'A': 947 case 'G': 948 break; 949 case 'B': 950 return EditBOZInput<1>(io, edit, x, lengthChars * sizeof *x); 951 case 'O': 952 return EditBOZInput<3>(io, edit, x, lengthChars * sizeof *x); 953 case 'Z': 954 return EditBOZInput<4>(io, edit, x, lengthChars * sizeof *x); 955 default: 956 io.GetIoErrorHandler().SignalError(IostatErrorInFormat, 957 "Data edit descriptor '%c' may not be used with a CHARACTER data item", 958 edit.descriptor); 959 return false; 960 } 961 const ConnectionState &connection{io.GetConnectionState()}; 962 std::size_t remainingChars{lengthChars}; 963 // Skip leading characters. 964 // Their bytes don't count towards INQUIRE(IOLENGTH=). 965 std::size_t skipChars{0}; 966 if (edit.width && *edit.width > 0) { 967 remainingChars = *edit.width; 968 if (remainingChars > lengthChars) { 969 skipChars = remainingChars - lengthChars; 970 } 971 } 972 // When the field is wider than the variable, we drop the leading 973 // characters. When the variable is wider than the field, there can be 974 // trailing padding or an EOR condition. 975 const char *input{nullptr}; 976 std::size_t readyBytes{0}; 977 // Transfer payload bytes; these do count. 978 while (remainingChars > 0) { 979 if (readyBytes == 0) { 980 readyBytes = io.GetNextInputBytes(input); 981 if (readyBytes == 0 || 982 (readyBytes < remainingChars && edit.modes.nonAdvancing)) { 983 if (io.CheckForEndOfRecord(readyBytes)) { 984 if (readyBytes == 0) { 985 // PAD='YES' and no more data 986 std::fill_n(x, lengthChars, ' '); 987 return !io.GetIoErrorHandler().InError(); 988 } else { 989 // Do partial read(s) then pad on last iteration 990 } 991 } else { 992 return !io.GetIoErrorHandler().InError(); 993 } 994 } 995 } 996 std::size_t chunkBytes; 997 std::size_t chunkChars{1}; 998 bool skipping{skipChars > 0}; 999 if (connection.isUTF8) { 1000 chunkBytes = MeasureUTF8Bytes(*input); 1001 if (skipping) { 1002 --skipChars; 1003 } else if (auto ucs{DecodeUTF8(input)}) { 1004 *x++ = *ucs; 1005 --lengthChars; 1006 } else if (chunkBytes == 0) { 1007 // error recovery: skip bad encoding 1008 chunkBytes = 1; 1009 } 1010 } else if (connection.internalIoCharKind > 1) { 1011 // Reading from non-default character internal unit 1012 chunkBytes = connection.internalIoCharKind; 1013 if (skipping) { 1014 --skipChars; 1015 } else { 1016 char32_t buffer{0}; 1017 std::memcpy(&buffer, input, chunkBytes); 1018 *x++ = buffer; 1019 --lengthChars; 1020 } 1021 } else if constexpr (sizeof *x > 1) { 1022 // Read single byte with expansion into multi-byte CHARACTER 1023 chunkBytes = 1; 1024 if (skipping) { 1025 --skipChars; 1026 } else { 1027 *x++ = static_cast<unsigned char>(*input); 1028 --lengthChars; 1029 } 1030 } else { // single bytes -> default CHARACTER 1031 if (skipping) { 1032 chunkBytes = std::min<std::size_t>(skipChars, readyBytes); 1033 chunkChars = chunkBytes; 1034 skipChars -= chunkChars; 1035 } else { 1036 chunkBytes = std::min<std::size_t>(remainingChars, readyBytes); 1037 chunkBytes = std::min<std::size_t>(lengthChars, chunkBytes); 1038 chunkChars = chunkBytes; 1039 std::memcpy(x, input, chunkBytes); 1040 x += chunkBytes; 1041 lengthChars -= chunkChars; 1042 } 1043 } 1044 input += chunkBytes; 1045 remainingChars -= chunkChars; 1046 if (!skipping) { 1047 io.GotChar(chunkBytes); 1048 } 1049 io.HandleRelativePosition(chunkBytes); 1050 readyBytes -= chunkBytes; 1051 } 1052 // Pad the remainder of the input variable, if any. 1053 std::fill_n(x, lengthChars, ' '); 1054 return CheckCompleteListDirectedField(io, edit); 1055 } 1056 1057 template bool EditRealInput<2>(IoStatementState &, const DataEdit &, void *); 1058 template bool EditRealInput<3>(IoStatementState &, const DataEdit &, void *); 1059 template bool EditRealInput<4>(IoStatementState &, const DataEdit &, void *); 1060 template bool EditRealInput<8>(IoStatementState &, const DataEdit &, void *); 1061 template bool EditRealInput<10>(IoStatementState &, const DataEdit &, void *); 1062 // TODO: double/double 1063 template bool EditRealInput<16>(IoStatementState &, const DataEdit &, void *); 1064 1065 template bool EditCharacterInput( 1066 IoStatementState &, const DataEdit &, char *, std::size_t); 1067 template bool EditCharacterInput( 1068 IoStatementState &, const DataEdit &, char16_t *, std::size_t); 1069 template bool EditCharacterInput( 1070 IoStatementState &, const DataEdit &, char32_t *, std::size_t); 1071 1072 } // namespace Fortran::runtime::io 1073