1 //===-- runtime/edit-input.cpp --------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "edit-input.h" 10 #include "namelist.h" 11 #include "utf.h" 12 #include "flang/Common/real.h" 13 #include "flang/Common/uint128.h" 14 #include <algorithm> 15 16 namespace Fortran::runtime::io { 17 18 static bool EditBOZInput(IoStatementState &io, const DataEdit &edit, void *n, 19 int base, int totalBitSize) { 20 std::optional<int> remaining; 21 std::optional<char32_t> next{io.PrepareInput(edit, remaining)}; 22 common::UnsignedInt128 value{0}; 23 for (; next; next = io.NextInField(remaining, edit)) { 24 char32_t ch{*next}; 25 if (ch == ' ' || ch == '\t') { 26 continue; 27 } 28 int digit{0}; 29 if (ch >= '0' && ch <= '1') { 30 digit = ch - '0'; 31 } else if (base >= 8 && ch >= '2' && ch <= '7') { 32 digit = ch - '0'; 33 } else if (base >= 10 && ch >= '8' && ch <= '9') { 34 digit = ch - '0'; 35 } else if (base == 16 && ch >= 'A' && ch <= 'Z') { 36 digit = ch + 10 - 'A'; 37 } else if (base == 16 && ch >= 'a' && ch <= 'z') { 38 digit = ch + 10 - 'a'; 39 } else { 40 io.GetIoErrorHandler().SignalError( 41 "Bad character '%lc' in B/O/Z input field", ch); 42 return false; 43 } 44 value *= base; 45 value += digit; 46 } 47 // TODO: check for overflow 48 std::memcpy(n, &value, totalBitSize >> 3); 49 return true; 50 } 51 52 static inline char32_t GetDecimalPoint(const DataEdit &edit) { 53 return edit.modes.editingFlags & decimalComma ? char32_t{','} : char32_t{'.'}; 54 } 55 56 // Prepares input from a field, and consumes the sign, if any. 57 // Returns true if there's a '-' sign. 58 static bool ScanNumericPrefix(IoStatementState &io, const DataEdit &edit, 59 std::optional<char32_t> &next, std::optional<int> &remaining) { 60 next = io.PrepareInput(edit, remaining); 61 bool negative{false}; 62 if (next) { 63 negative = *next == '-'; 64 if (negative || *next == '+') { 65 io.SkipSpaces(remaining); 66 next = io.NextInField(remaining, edit); 67 } 68 } 69 return negative; 70 } 71 72 bool EditIntegerInput( 73 IoStatementState &io, const DataEdit &edit, void *n, int kind) { 74 RUNTIME_CHECK(io.GetIoErrorHandler(), kind >= 1 && !(kind & (kind - 1))); 75 switch (edit.descriptor) { 76 case DataEdit::ListDirected: 77 if (IsNamelistName(io)) { 78 return false; 79 } 80 break; 81 case 'G': 82 case 'I': 83 break; 84 case 'B': 85 return EditBOZInput(io, edit, n, 2, kind << 3); 86 case 'O': 87 return EditBOZInput(io, edit, n, 8, kind << 3); 88 case 'Z': 89 return EditBOZInput(io, edit, n, 16, kind << 3); 90 case 'A': // legacy extension 91 return EditCharacterInput(io, edit, reinterpret_cast<char *>(n), kind); 92 default: 93 io.GetIoErrorHandler().SignalError(IostatErrorInFormat, 94 "Data edit descriptor '%c' may not be used with an INTEGER data item", 95 edit.descriptor); 96 return false; 97 } 98 std::optional<int> remaining; 99 std::optional<char32_t> next; 100 bool negate{ScanNumericPrefix(io, edit, next, remaining)}; 101 common::UnsignedInt128 value{0}; 102 bool any{negate}; 103 for (; next; next = io.NextInField(remaining, edit)) { 104 char32_t ch{*next}; 105 if (ch == ' ' || ch == '\t') { 106 if (edit.modes.editingFlags & blankZero) { 107 ch = '0'; // BZ mode - treat blank as if it were zero 108 } else { 109 continue; 110 } 111 } 112 int digit{0}; 113 if (ch >= '0' && ch <= '9') { 114 digit = ch - '0'; 115 } else { 116 io.GetIoErrorHandler().SignalError( 117 "Bad character '%lc' in INTEGER input field", ch); 118 return false; 119 } 120 value *= 10; 121 value += digit; 122 any = true; 123 } 124 if (negate) { 125 value = -value; 126 } 127 if (any || !io.GetConnectionState().IsAtEOF()) { 128 std::memcpy(n, &value, kind); // a blank field means zero 129 } 130 return any; 131 } 132 133 // Parses a REAL input number from the input source as a normalized 134 // fraction into a supplied buffer -- there's an optional '-', a 135 // decimal point, and at least one digit. The adjusted exponent value 136 // is returned in a reference argument. The returned value is the number 137 // of characters that (should) have been written to the buffer -- this can 138 // be larger than the buffer size and can indicate overflow. Replaces 139 // blanks with zeroes if appropriate. 140 static int ScanRealInput(char *buffer, int bufferSize, IoStatementState &io, 141 const DataEdit &edit, int &exponent) { 142 std::optional<int> remaining; 143 std::optional<char32_t> next; 144 int got{0}; 145 std::optional<int> decimalPoint; 146 auto Put{[&](char ch) -> void { 147 if (got < bufferSize) { 148 buffer[got] = ch; 149 } 150 ++got; 151 }}; 152 if (ScanNumericPrefix(io, edit, next, remaining)) { 153 Put('-'); 154 } 155 if (next.value_or(' ') == ' ') { // empty/blank field means zero 156 remaining.reset(); 157 if (!io.GetConnectionState().IsAtEOF()) { 158 Put('0'); 159 } 160 return got; 161 } 162 char32_t decimal{GetDecimalPoint(edit)}; 163 char32_t first{*next >= 'a' && *next <= 'z' ? *next + 'A' - 'a' : *next}; 164 if (first == 'N' || first == 'I') { 165 // NaN or infinity - convert to upper case 166 // Subtle: a blank field of digits could be followed by 'E' or 'D', 167 for (; next && 168 ((*next >= 'a' && *next <= 'z') || (*next >= 'A' && *next <= 'Z')); 169 next = io.NextInField(remaining, edit)) { 170 if (*next >= 'a' && *next <= 'z') { 171 Put(*next - 'a' + 'A'); 172 } else { 173 Put(*next); 174 } 175 } 176 if (next && *next == '(') { // NaN(...) 177 while (next && *next != ')') { 178 next = io.NextInField(remaining, edit); 179 } 180 } 181 exponent = 0; 182 } else if (first == decimal || (first >= '0' && first <= '9') || 183 first == 'E' || first == 'D' || first == 'Q') { 184 Put('.'); // input field is normalized to a fraction 185 auto start{got}; 186 bool bzMode{(edit.modes.editingFlags & blankZero) != 0}; 187 for (; next; next = io.NextInField(remaining, edit)) { 188 char32_t ch{*next}; 189 if (ch == ' ' || ch == '\t') { 190 if (bzMode) { 191 ch = '0'; // BZ mode - treat blank as if it were zero 192 } else { 193 continue; 194 } 195 } 196 if (ch == '0' && got == start && !decimalPoint) { 197 // omit leading zeroes before the decimal 198 } else if (ch >= '0' && ch <= '9') { 199 Put(ch); 200 } else if (ch == decimal && !decimalPoint) { 201 // the decimal point is *not* copied to the buffer 202 decimalPoint = got - start; // # of digits before the decimal point 203 } else { 204 break; 205 } 206 } 207 if (got == start) { 208 Put('0'); // emit at least one digit 209 } 210 if (next && 211 (*next == 'e' || *next == 'E' || *next == 'd' || *next == 'D' || 212 *next == 'q' || *next == 'Q')) { 213 // Optional exponent letter. Blanks are allowed between the 214 // optional exponent letter and the exponent value. 215 io.SkipSpaces(remaining); 216 next = io.NextInField(remaining, edit); 217 } 218 // The default exponent is -kP, but the scale factor doesn't affect 219 // an explicit exponent. 220 exponent = -edit.modes.scale; 221 if (next && 222 (*next == '-' || *next == '+' || (*next >= '0' && *next <= '9') || 223 (bzMode && (*next == ' ' || *next == '\t')))) { 224 bool negExpo{*next == '-'}; 225 if (negExpo || *next == '+') { 226 next = io.NextInField(remaining, edit); 227 } 228 for (exponent = 0; next; next = io.NextInField(remaining, edit)) { 229 if (*next >= '0' && *next <= '9') { 230 exponent = 10 * exponent + *next - '0'; 231 } else if (bzMode && (*next == ' ' || *next == '\t')) { 232 exponent = 10 * exponent; 233 } else { 234 break; 235 } 236 } 237 if (negExpo) { 238 exponent = -exponent; 239 } 240 } 241 if (decimalPoint) { 242 exponent += *decimalPoint; 243 } else { 244 // When no decimal point (or comma) appears in the value, the 'd' 245 // part of the edit descriptor must be interpreted as the number of 246 // digits in the value to be interpreted as being to the *right* of 247 // the assumed decimal point (13.7.2.3.2) 248 exponent += got - start - edit.digits.value_or(0); 249 } 250 } else { 251 // TODO: hex FP input 252 exponent = 0; 253 return 0; 254 } 255 // Consume the trailing ')' of a list-directed or NAMELIST complex 256 // input value. 257 if (edit.descriptor == DataEdit::ListDirectedImaginaryPart) { 258 if (next && (*next == ' ' || *next == '\t')) { 259 next = io.NextInField(remaining, edit); 260 } 261 if (!next) { // NextInField fails on separators like ')' 262 std::size_t byteCount{0}; 263 next = io.GetCurrentChar(byteCount); 264 if (next && *next == ')') { 265 io.HandleRelativePosition(byteCount); 266 } 267 } 268 } else if (remaining) { 269 while (next && (*next == ' ' || *next == '\t')) { 270 next = io.NextInField(remaining, edit); 271 } 272 if (next) { 273 return 0; // error: unused nonblank character in fixed-width field 274 } 275 } 276 return got; 277 } 278 279 // If no special modes are in effect and the form of the input value 280 // that's present in the input stream is acceptable to the decimal->binary 281 // converter without modification, this fast path for real input 282 // saves time by avoiding memory copies and reformatting of the exponent. 283 template <int PRECISION> 284 static bool TryFastPathRealInput( 285 IoStatementState &io, const DataEdit &edit, void *n) { 286 if (edit.modes.editingFlags & (blankZero | decimalComma)) { 287 return false; 288 } 289 if (edit.modes.scale != 0) { 290 return false; 291 } 292 const char *str{nullptr}; 293 std::size_t got{io.GetNextInputBytes(str)}; 294 if (got == 0 || str == nullptr || 295 !io.GetConnectionState().recordLength.has_value()) { 296 return false; // could not access reliably-terminated input stream 297 } 298 const char *p{str}; 299 std::int64_t maxConsume{ 300 std::min<std::int64_t>(got, edit.width.value_or(got))}; 301 const char *limit{str + maxConsume}; 302 decimal::ConversionToBinaryResult<PRECISION> converted{ 303 decimal::ConvertToBinary<PRECISION>(p, edit.modes.round, limit)}; 304 if (converted.flags & decimal::Invalid) { 305 return false; 306 } 307 if (edit.digits.value_or(0) != 0 && 308 std::memchr(str, '.', p - str) == nullptr) { 309 // No explicit decimal point, and edit descriptor is Fw.d (or other) 310 // with d != 0, which implies scaling. 311 return false; 312 } 313 for (; p < limit && (*p == ' ' || *p == '\t'); ++p) { 314 } 315 if (edit.descriptor == DataEdit::ListDirectedImaginaryPart) { 316 // Need to consume a trailing ')' and any white space after 317 if (p >= limit || *p != ')') { 318 return false; 319 } 320 for (++p; p < limit && (*p == ' ' || *p == '\t'); ++p) { 321 } 322 } 323 if (edit.width && p < str + *edit.width) { 324 return false; // unconverted characters remain in fixed width field 325 } 326 // Success on the fast path! 327 // TODO: raise converted.flags as exceptions? 328 *reinterpret_cast<decimal::BinaryFloatingPointNumber<PRECISION> *>(n) = 329 converted.binary; 330 io.HandleRelativePosition(p - str); 331 return true; 332 } 333 334 template <int KIND> 335 bool EditCommonRealInput(IoStatementState &io, const DataEdit &edit, void *n) { 336 constexpr int binaryPrecision{common::PrecisionOfRealKind(KIND)}; 337 if (TryFastPathRealInput<binaryPrecision>(io, edit, n)) { 338 return true; 339 } 340 // Fast path wasn't available or didn't work; go the more general route 341 static constexpr int maxDigits{ 342 common::MaxDecimalConversionDigits(binaryPrecision)}; 343 static constexpr int bufferSize{maxDigits + 18}; 344 char buffer[bufferSize]; 345 int exponent{0}; 346 int got{ScanRealInput(buffer, maxDigits + 2, io, edit, exponent)}; 347 if (got >= maxDigits + 2) { 348 io.GetIoErrorHandler().Crash("EditCommonRealInput: buffer was too small"); 349 return false; 350 } 351 if (got == 0) { 352 io.GetIoErrorHandler().SignalError("Bad REAL input value"); 353 return false; 354 } 355 bool hadExtra{got > maxDigits}; 356 if (exponent != 0) { 357 buffer[got++] = 'e'; 358 if (exponent < 0) { 359 buffer[got++] = '-'; 360 exponent = -exponent; 361 } 362 if (exponent > 9999) { 363 exponent = 9999; // will convert to +/-Inf 364 } 365 if (exponent > 999) { 366 int dig{exponent / 1000}; 367 buffer[got++] = '0' + dig; 368 int rest{exponent - 1000 * dig}; 369 dig = rest / 100; 370 buffer[got++] = '0' + dig; 371 rest -= 100 * dig; 372 dig = rest / 10; 373 buffer[got++] = '0' + dig; 374 buffer[got++] = '0' + (rest - 10 * dig); 375 } else if (exponent > 99) { 376 int dig{exponent / 100}; 377 buffer[got++] = '0' + dig; 378 int rest{exponent - 100 * dig}; 379 dig = rest / 10; 380 buffer[got++] = '0' + dig; 381 buffer[got++] = '0' + (rest - 10 * dig); 382 } else if (exponent > 9) { 383 int dig{exponent / 10}; 384 buffer[got++] = '0' + dig; 385 buffer[got++] = '0' + (exponent - 10 * dig); 386 } else { 387 buffer[got++] = '0' + exponent; 388 } 389 } 390 buffer[got] = '\0'; 391 const char *p{buffer}; 392 decimal::ConversionToBinaryResult<binaryPrecision> converted{ 393 decimal::ConvertToBinary<binaryPrecision>(p, edit.modes.round)}; 394 if (hadExtra) { 395 converted.flags = static_cast<enum decimal::ConversionResultFlags>( 396 converted.flags | decimal::Inexact); 397 } 398 // TODO: raise converted.flags as exceptions? 399 *reinterpret_cast<decimal::BinaryFloatingPointNumber<binaryPrecision> *>(n) = 400 converted.binary; 401 return true; 402 } 403 404 template <int KIND> 405 bool EditRealInput(IoStatementState &io, const DataEdit &edit, void *n) { 406 constexpr int binaryPrecision{common::PrecisionOfRealKind(KIND)}; 407 switch (edit.descriptor) { 408 case DataEdit::ListDirected: 409 if (IsNamelistName(io)) { 410 return false; 411 } 412 return EditCommonRealInput<KIND>(io, edit, n); 413 case DataEdit::ListDirectedRealPart: 414 case DataEdit::ListDirectedImaginaryPart: 415 case 'F': 416 case 'E': // incl. EN, ES, & EX 417 case 'D': 418 case 'G': 419 return EditCommonRealInput<KIND>(io, edit, n); 420 case 'B': 421 return EditBOZInput( 422 io, edit, n, 2, common::BitsForBinaryPrecision(binaryPrecision)); 423 case 'O': 424 return EditBOZInput( 425 io, edit, n, 8, common::BitsForBinaryPrecision(binaryPrecision)); 426 case 'Z': 427 return EditBOZInput( 428 io, edit, n, 16, common::BitsForBinaryPrecision(binaryPrecision)); 429 case 'A': // legacy extension 430 return EditCharacterInput(io, edit, reinterpret_cast<char *>(n), KIND); 431 default: 432 io.GetIoErrorHandler().SignalError(IostatErrorInFormat, 433 "Data edit descriptor '%c' may not be used for REAL input", 434 edit.descriptor); 435 return false; 436 } 437 } 438 439 // 13.7.3 in Fortran 2018 440 bool EditLogicalInput(IoStatementState &io, const DataEdit &edit, bool &x) { 441 switch (edit.descriptor) { 442 case DataEdit::ListDirected: 443 if (IsNamelistName(io)) { 444 return false; 445 } 446 break; 447 case 'L': 448 case 'G': 449 break; 450 default: 451 io.GetIoErrorHandler().SignalError(IostatErrorInFormat, 452 "Data edit descriptor '%c' may not be used for LOGICAL input", 453 edit.descriptor); 454 return false; 455 } 456 std::optional<int> remaining; 457 std::optional<char32_t> next{io.PrepareInput(edit, remaining)}; 458 if (next && *next == '.') { // skip optional period 459 next = io.NextInField(remaining, edit); 460 } 461 if (!next) { 462 io.GetIoErrorHandler().SignalError("Empty LOGICAL input field"); 463 return false; 464 } 465 switch (*next) { 466 case 'T': 467 case 't': 468 x = true; 469 break; 470 case 'F': 471 case 'f': 472 x = false; 473 break; 474 default: 475 io.GetIoErrorHandler().SignalError( 476 "Bad character '%lc' in LOGICAL input field", *next); 477 return false; 478 } 479 if (remaining) { // ignore the rest of the field 480 io.HandleRelativePosition(*remaining); 481 } else if (edit.descriptor == DataEdit::ListDirected) { 482 while (io.NextInField(remaining, edit)) { // discard rest of field 483 } 484 } 485 return true; 486 } 487 488 // See 13.10.3.1 paragraphs 7-9 in Fortran 2018 489 template <typename CHAR> 490 static bool EditDelimitedCharacterInput( 491 IoStatementState &io, CHAR *x, std::size_t length, char32_t delimiter) { 492 bool result{true}; 493 while (true) { 494 std::size_t byteCount{0}; 495 auto ch{io.GetCurrentChar(byteCount)}; 496 if (!ch) { 497 if (io.AdvanceRecord()) { 498 continue; 499 } else { 500 result = false; // EOF in character value 501 break; 502 } 503 } 504 io.HandleRelativePosition(byteCount); 505 if (*ch == delimiter) { 506 auto next{io.GetCurrentChar(byteCount)}; 507 if (next && *next == delimiter) { 508 // Repeated delimiter: use as character value 509 io.HandleRelativePosition(byteCount); 510 } else { 511 break; // closing delimiter 512 } 513 } 514 if (length > 0) { 515 *x++ = *ch; 516 --length; 517 } 518 } 519 std::fill_n(x, length, ' '); 520 return result; 521 } 522 523 template <typename CHAR> 524 static bool EditListDirectedCharacterInput( 525 IoStatementState &io, CHAR *x, std::size_t length, const DataEdit &edit) { 526 std::size_t byteCount{0}; 527 auto ch{io.GetCurrentChar(byteCount)}; 528 if (ch && (*ch == '\'' || *ch == '"')) { 529 io.HandleRelativePosition(byteCount); 530 return EditDelimitedCharacterInput(io, x, length, *ch); 531 } 532 if (IsNamelistName(io) || io.GetConnectionState().IsAtEOF()) { 533 return false; 534 } 535 // Undelimited list-directed character input: stop at a value separator 536 // or the end of the current record. Subtlety: the "remaining" count 537 // here is a dummy that's used to avoid the interpretation of separators 538 // in NextInField. 539 std::optional<int> remaining{maxUTF8Bytes}; 540 while (std::optional<char32_t> next{io.NextInField(remaining, edit)}) { 541 switch (*next) { 542 case ' ': 543 case '\t': 544 case ',': 545 case ';': 546 case '/': 547 remaining = 0; // value separator: stop 548 break; 549 default: 550 *x++ = *next; 551 --length; 552 remaining = maxUTF8Bytes; 553 } 554 } 555 std::fill_n(x, length, ' '); 556 return true; 557 } 558 559 template <typename CHAR> 560 bool EditCharacterInput( 561 IoStatementState &io, const DataEdit &edit, CHAR *x, std::size_t length) { 562 switch (edit.descriptor) { 563 case DataEdit::ListDirected: 564 return EditListDirectedCharacterInput(io, x, length, edit); 565 case 'A': 566 case 'G': 567 break; 568 default: 569 io.GetIoErrorHandler().SignalError(IostatErrorInFormat, 570 "Data edit descriptor '%c' may not be used with a CHARACTER data item", 571 edit.descriptor); 572 return false; 573 } 574 const ConnectionState &connection{io.GetConnectionState()}; 575 if (connection.IsAtEOF()) { 576 return false; 577 } 578 std::size_t remaining{length}; 579 if (edit.width && *edit.width > 0) { 580 remaining = *edit.width; 581 } 582 // When the field is wider than the variable, we drop the leading 583 // characters. When the variable is wider than the field, there's 584 // trailing padding. 585 const char *input{nullptr}; 586 std::size_t ready{0}; 587 bool hitEnd{false}; 588 // Skip leading bytes. 589 // These bytes don't count towards INQUIRE(IOLENGTH=). 590 std::size_t skip{remaining > length ? remaining - length : 0}; 591 // Transfer payload bytes; these do count. 592 while (remaining > 0) { 593 if (ready == 0) { 594 ready = io.GetNextInputBytes(input); 595 if (ready == 0) { 596 hitEnd = true; 597 break; 598 } 599 } 600 std::size_t chunk; 601 bool skipping{skip > 0}; 602 if (connection.isUTF8) { 603 chunk = MeasureUTF8Bytes(*input); 604 if (skipping) { 605 --skip; 606 } else if (auto ucs{DecodeUTF8(input)}) { 607 *x++ = *ucs; 608 --length; 609 } else if (chunk == 0) { 610 // error recovery: skip bad encoding 611 chunk = 1; 612 } 613 --remaining; 614 } else { 615 if (skipping) { 616 chunk = std::min<std::size_t>(skip, ready); 617 skip -= chunk; 618 } else { 619 chunk = std::min<std::size_t>(remaining, ready); 620 std::memcpy(x, input, chunk); 621 x += chunk; 622 length -= chunk; 623 } 624 remaining -= chunk; 625 } 626 input += chunk; 627 if (!skipping) { 628 io.GotChar(chunk); 629 } 630 io.HandleRelativePosition(chunk); 631 ready -= chunk; 632 } 633 // Pad the remainder of the input variable, if any. 634 std::fill_n(x, length, ' '); 635 if (hitEnd) { 636 io.CheckForEndOfRecord(); // signal any needed error 637 } 638 return true; 639 } 640 641 template bool EditRealInput<2>(IoStatementState &, const DataEdit &, void *); 642 template bool EditRealInput<3>(IoStatementState &, const DataEdit &, void *); 643 template bool EditRealInput<4>(IoStatementState &, const DataEdit &, void *); 644 template bool EditRealInput<8>(IoStatementState &, const DataEdit &, void *); 645 template bool EditRealInput<10>(IoStatementState &, const DataEdit &, void *); 646 // TODO: double/double 647 template bool EditRealInput<16>(IoStatementState &, const DataEdit &, void *); 648 649 template bool EditCharacterInput( 650 IoStatementState &, const DataEdit &, char *, std::size_t); 651 template bool EditCharacterInput( 652 IoStatementState &, const DataEdit &, char16_t *, std::size_t); 653 template bool EditCharacterInput( 654 IoStatementState &, const DataEdit &, char32_t *, std::size_t); 655 656 } // namespace Fortran::runtime::io 657