1 //===-- runtime/edit-input.cpp --------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "edit-input.h" 10 #include "namelist.h" 11 #include "utf.h" 12 #include "flang/Common/optional.h" 13 #include "flang/Common/real.h" 14 #include "flang/Common/uint128.h" 15 #include "flang/Runtime/freestanding-tools.h" 16 #include <algorithm> 17 #include <cfenv> 18 19 namespace Fortran::runtime::io { 20 RT_OFFLOAD_API_GROUP_BEGIN 21 22 // Checks that a list-directed input value has been entirely consumed and 23 // doesn't contain unparsed characters before the next value separator. 24 static inline RT_API_ATTRS bool IsCharValueSeparator( 25 const DataEdit &edit, char32_t ch) { 26 char32_t comma{ 27 edit.modes.editingFlags & decimalComma ? char32_t{';'} : char32_t{','}}; 28 return ch == ' ' || ch == '\t' || ch == comma || ch == '/' || 29 (edit.IsNamelist() && (ch == '&' || ch == '$')); 30 } 31 32 static RT_API_ATTRS bool CheckCompleteListDirectedField( 33 IoStatementState &io, const DataEdit &edit) { 34 if (edit.IsListDirected()) { 35 std::size_t byteCount; 36 if (auto ch{io.GetCurrentChar(byteCount)}) { 37 if (IsCharValueSeparator(edit, *ch)) { 38 return true; 39 } else { 40 const auto &connection{io.GetConnectionState()}; 41 io.GetIoErrorHandler().SignalError(IostatBadListDirectedInputSeparator, 42 "invalid character (0x%x) after list-directed input value, " 43 "at column %d in record %d", 44 static_cast<unsigned>(*ch), 45 static_cast<int>(connection.positionInRecord + 1), 46 static_cast<int>(connection.currentRecordNumber)); 47 return false; 48 } 49 } else { 50 return true; // end of record: ok 51 } 52 } else { 53 return true; 54 } 55 } 56 57 static inline RT_API_ATTRS char32_t GetSeparatorChar(const DataEdit &edit) { 58 return edit.modes.editingFlags & decimalComma ? char32_t{';'} : char32_t{','}; 59 } 60 61 template <int LOG2_BASE> 62 static RT_API_ATTRS bool EditBOZInput( 63 IoStatementState &io, const DataEdit &edit, void *n, std::size_t bytes) { 64 // Skip leading white space & zeroes 65 Fortran::common::optional<int> remaining{io.CueUpInput(edit)}; 66 auto start{io.GetConnectionState().positionInRecord}; 67 Fortran::common::optional<char32_t> next{io.NextInField(remaining, edit)}; 68 if (next.value_or('?') == '0') { 69 do { 70 start = io.GetConnectionState().positionInRecord; 71 next = io.NextInField(remaining, edit); 72 } while (next && *next == '0'); 73 } 74 // Count significant digits after any leading white space & zeroes 75 int digits{0}; 76 int significantBits{0}; 77 const char32_t comma{GetSeparatorChar(edit)}; 78 for (; next; next = io.NextInField(remaining, edit)) { 79 char32_t ch{*next}; 80 if (ch == ' ' || ch == '\t') { 81 if (edit.modes.editingFlags & blankZero) { 82 ch = '0'; // BZ mode - treat blank as if it were zero 83 } else { 84 continue; 85 } 86 } 87 if (ch >= '0' && ch <= '1') { 88 } else if (LOG2_BASE >= 3 && ch >= '2' && ch <= '7') { 89 } else if (LOG2_BASE >= 4 && ch >= '8' && ch <= '9') { 90 } else if (LOG2_BASE >= 4 && ch >= 'A' && ch <= 'F') { 91 } else if (LOG2_BASE >= 4 && ch >= 'a' && ch <= 'f') { 92 } else if (ch == comma) { 93 break; // end non-list-directed field early 94 } else { 95 io.GetIoErrorHandler().SignalError( 96 "Bad character '%lc' in B/O/Z input field", ch); 97 return false; 98 } 99 if (digits++ == 0) { 100 if (ch >= '0' && ch <= '1') { 101 significantBits = 1; 102 } else if (ch >= '2' && ch <= '3') { 103 significantBits = 2; 104 } else if (ch >= '4' && ch <= '7') { 105 significantBits = 3; 106 } else { 107 significantBits = 4; 108 } 109 } else { 110 significantBits += LOG2_BASE; 111 } 112 } 113 auto significantBytes{static_cast<std::size_t>(significantBits + 7) / 8}; 114 if (significantBytes > bytes) { 115 io.GetIoErrorHandler().SignalError(IostatBOZInputOverflow, 116 "B/O/Z input of %d digits overflows %zd-byte variable", digits, bytes); 117 return false; 118 } 119 // Reset to start of significant digits 120 io.HandleAbsolutePosition(start); 121 remaining.reset(); 122 // Make a second pass now that the digit count is known 123 std::memset(n, 0, bytes); 124 int increment{isHostLittleEndian ? -1 : 1}; 125 auto *data{reinterpret_cast<unsigned char *>(n) + 126 (isHostLittleEndian ? significantBytes - 1 : bytes - significantBytes)}; 127 int bitsAfterFirstDigit{(digits - 1) * LOG2_BASE}; 128 int shift{bitsAfterFirstDigit & 7}; 129 if (shift + (significantBits - bitsAfterFirstDigit) > 8) { 130 shift = shift - 8; // misaligned octal 131 } 132 while (digits > 0) { 133 char32_t ch{io.NextInField(remaining, edit).value_or(' ')}; 134 int digit{0}; 135 if (ch == ' ' || ch == '\t') { 136 if (edit.modes.editingFlags & blankZero) { 137 ch = '0'; // BZ mode - treat blank as if it were zero 138 } else { 139 continue; 140 } 141 } 142 --digits; 143 if (ch >= '0' && ch <= '9') { 144 digit = ch - '0'; 145 } else if (ch >= 'A' && ch <= 'F') { 146 digit = ch + 10 - 'A'; 147 } else if (ch >= 'a' && ch <= 'f') { 148 digit = ch + 10 - 'a'; 149 } else { 150 continue; 151 } 152 if (shift < 0) { 153 if (shift + LOG2_BASE > 0) { // misaligned octal 154 *data |= digit >> -shift; 155 } 156 shift += 8; 157 data += increment; 158 } 159 *data |= digit << shift; 160 shift -= LOG2_BASE; 161 } 162 return CheckCompleteListDirectedField(io, edit); 163 } 164 165 static inline RT_API_ATTRS char32_t GetRadixPointChar(const DataEdit &edit) { 166 return edit.modes.editingFlags & decimalComma ? char32_t{','} : char32_t{'.'}; 167 } 168 169 // Prepares input from a field, and returns the sign, if any, else '\0'. 170 static RT_API_ATTRS char ScanNumericPrefix(IoStatementState &io, 171 const DataEdit &edit, Fortran::common::optional<char32_t> &next, 172 Fortran::common::optional<int> &remaining) { 173 remaining = io.CueUpInput(edit); 174 next = io.NextInField(remaining, edit); 175 char sign{'\0'}; 176 if (next) { 177 if (*next == '-' || *next == '+') { 178 sign = *next; 179 if (!edit.IsListDirected()) { 180 io.SkipSpaces(remaining); 181 } 182 next = io.NextInField(remaining, edit); 183 } 184 } 185 return sign; 186 } 187 188 RT_API_ATTRS bool EditIntegerInput(IoStatementState &io, const DataEdit &edit, 189 void *n, int kind, bool isSigned) { 190 RUNTIME_CHECK(io.GetIoErrorHandler(), kind >= 1 && !(kind & (kind - 1))); 191 switch (edit.descriptor) { 192 case DataEdit::ListDirected: 193 if (IsNamelistNameOrSlash(io)) { 194 return false; 195 } 196 break; 197 case 'G': 198 case 'I': 199 break; 200 case 'B': 201 return EditBOZInput<1>(io, edit, n, kind); 202 case 'O': 203 return EditBOZInput<3>(io, edit, n, kind); 204 case 'Z': 205 return EditBOZInput<4>(io, edit, n, kind); 206 case 'A': // legacy extension 207 return EditCharacterInput(io, edit, reinterpret_cast<char *>(n), kind); 208 default: 209 io.GetIoErrorHandler().SignalError(IostatErrorInFormat, 210 "Data edit descriptor '%c' may not be used with an INTEGER data item", 211 edit.descriptor); 212 return false; 213 } 214 Fortran::common::optional<int> remaining; 215 Fortran::common::optional<char32_t> next; 216 char sign{ScanNumericPrefix(io, edit, next, remaining)}; 217 if (sign == '-' && !isSigned) { 218 io.GetIoErrorHandler().SignalError("Negative sign in UNSIGNED input field"); 219 return false; 220 } 221 common::UnsignedInt128 value{0}; 222 bool any{!!sign}; 223 bool overflow{false}; 224 const char32_t comma{GetSeparatorChar(edit)}; 225 static constexpr auto maxu128{~common::UnsignedInt128{0}}; 226 for (; next; next = io.NextInField(remaining, edit)) { 227 char32_t ch{*next}; 228 if (ch == ' ' || ch == '\t') { 229 if (edit.modes.editingFlags & blankZero) { 230 ch = '0'; // BZ mode - treat blank as if it were zero 231 } else { 232 continue; 233 } 234 } 235 int digit{0}; 236 if (ch >= '0' && ch <= '9') { 237 digit = ch - '0'; 238 } else if (ch == comma) { 239 break; // end non-list-directed field early 240 } else { 241 if (edit.modes.inNamelist && ch == GetRadixPointChar(edit)) { 242 // Ignore any fractional part that might appear in NAMELIST integer 243 // input, like a few other Fortran compilers do. 244 // TODO: also process exponents? Some compilers do, but they obviously 245 // can't just be ignored. 246 while ((next = io.NextInField(remaining, edit))) { 247 if (*next < '0' || *next > '9') { 248 break; 249 } 250 } 251 if (!next || *next == comma) { 252 break; 253 } 254 } 255 io.GetIoErrorHandler().SignalError( 256 "Bad character '%lc' in INTEGER input field", ch); 257 return false; 258 } 259 static constexpr auto maxu128OverTen{maxu128 / 10}; 260 static constexpr int maxLastDigit{ 261 static_cast<int>(maxu128 - (maxu128OverTen * 10))}; 262 overflow |= value >= maxu128OverTen && 263 (value > maxu128OverTen || digit > maxLastDigit); 264 value *= 10; 265 value += digit; 266 any = true; 267 } 268 if (!any && !remaining) { 269 io.GetIoErrorHandler().SignalError( 270 "Integer value absent from NAMELIST or list-directed input"); 271 return false; 272 } 273 if (isSigned) { 274 auto maxForKind{common::UnsignedInt128{1} << ((8 * kind) - 1)}; 275 overflow |= value >= maxForKind && (value > maxForKind || sign != '-'); 276 } else { 277 auto maxForKind{maxu128 >> (((16 - kind) * 8) + (isSigned ? 1 : 0))}; 278 overflow |= value >= maxForKind; 279 } 280 if (overflow) { 281 io.GetIoErrorHandler().SignalError(IostatIntegerInputOverflow, 282 "Decimal input overflows INTEGER(%d) variable", kind); 283 return false; 284 } 285 if (sign == '-') { 286 value = -value; 287 } 288 if (any || !io.GetIoErrorHandler().InError()) { 289 // The value is stored in the lower order bits on big endian platform. 290 // When memcpy, shift the value to the higher order bit. 291 auto shft{static_cast<int>(sizeof(value.low())) - kind}; 292 // For kind==8 (i.e. shft==0), the value is stored in low_ in big endian. 293 if (!isHostLittleEndian && shft >= 0) { 294 auto l{value.low() << (8 * shft)}; 295 std::memcpy(n, &l, kind); 296 } else { 297 std::memcpy(n, &value, kind); // a blank field means zero 298 } 299 return true; 300 } else { 301 return false; 302 } 303 } 304 305 // Parses a REAL input number from the input source as a normalized 306 // fraction into a supplied buffer -- there's an optional '-', a 307 // decimal point when the input is not hexadecimal, and at least one 308 // digit. Replaces blanks with zeroes where appropriate. 309 struct ScannedRealInput { 310 // Number of characters that (should) have been written to the 311 // buffer -- this can be larger than the buffer size, which 312 // indicates buffer overflow. Zero indicates an error. 313 int got{0}; 314 int exponent{0}; // adjusted as necessary; binary if isHexadecimal 315 bool isHexadecimal{false}; // 0X... 316 }; 317 static RT_API_ATTRS ScannedRealInput ScanRealInput( 318 char *buffer, int bufferSize, IoStatementState &io, const DataEdit &edit) { 319 Fortran::common::optional<int> remaining; 320 Fortran::common::optional<char32_t> next; 321 int got{0}; 322 Fortran::common::optional<int> radixPointOffset; 323 // The following lambda definition violates the conding style, 324 // but cuda-11.8 nvcc hits an internal error with the brace initialization. 325 auto Put = [&](char ch) -> void { 326 if (got < bufferSize) { 327 buffer[got] = ch; 328 } 329 ++got; 330 }; 331 char sign{ScanNumericPrefix(io, edit, next, remaining)}; 332 if (sign == '-') { 333 Put('-'); 334 } 335 bool bzMode{(edit.modes.editingFlags & blankZero) != 0}; 336 int exponent{0}; 337 if (!next || (!bzMode && *next == ' ') || 338 (!(edit.modes.editingFlags & decimalComma) && *next == ',')) { 339 if (!edit.IsListDirected() && !io.GetConnectionState().IsAtEOF()) { 340 // An empty/blank field means zero when not list-directed. 341 // A fixed-width field containing only a sign is also zero; 342 // this behavior isn't standard-conforming in F'2023 but it is 343 // required to pass FCVS. 344 Put('0'); 345 } 346 return {got, exponent, false}; 347 } 348 char32_t radixPointChar{GetRadixPointChar(edit)}; 349 char32_t first{*next >= 'a' && *next <= 'z' ? *next + 'A' - 'a' : *next}; 350 bool isHexadecimal{false}; 351 if (first == 'N' || first == 'I') { 352 // NaN or infinity - convert to upper case 353 // Subtle: a blank field of digits could be followed by 'E' or 'D', 354 for (; next && 355 ((*next >= 'a' && *next <= 'z') || (*next >= 'A' && *next <= 'Z')); 356 next = io.NextInField(remaining, edit)) { 357 if (*next >= 'a' && *next <= 'z') { 358 Put(*next - 'a' + 'A'); 359 } else { 360 Put(*next); 361 } 362 } 363 if (next && *next == '(') { // NaN(...) 364 Put('('); 365 int depth{1}; 366 while (true) { 367 next = io.NextInField(remaining, edit); 368 if (depth == 0) { 369 break; 370 } else if (!next) { 371 return {}; // error 372 } else if (*next == '(') { 373 ++depth; 374 } else if (*next == ')') { 375 --depth; 376 } 377 Put(*next); 378 } 379 } 380 } else if (first == radixPointChar || (first >= '0' && first <= '9') || 381 (bzMode && (first == ' ' || first == '\t')) || first == 'E' || 382 first == 'D' || first == 'Q') { 383 if (first == '0') { 384 next = io.NextInField(remaining, edit); 385 if (next && (*next == 'x' || *next == 'X')) { // 0X... 386 isHexadecimal = true; 387 next = io.NextInField(remaining, edit); 388 } else { 389 Put('0'); 390 } 391 } 392 // input field is normalized to a fraction 393 if (!isHexadecimal) { 394 Put('.'); 395 } 396 auto start{got}; 397 for (; next; next = io.NextInField(remaining, edit)) { 398 char32_t ch{*next}; 399 if (ch == ' ' || ch == '\t') { 400 if (isHexadecimal) { 401 return {}; // error 402 } else if (bzMode) { 403 ch = '0'; // BZ mode - treat blank as if it were zero 404 } else { 405 continue; // ignore blank in fixed field 406 } 407 } 408 if (ch == '0' && got == start && !radixPointOffset) { 409 // omit leading zeroes before the radix point 410 } else if (ch >= '0' && ch <= '9') { 411 Put(ch); 412 } else if (ch == radixPointChar && !radixPointOffset) { 413 // The radix point character is *not* copied to the buffer. 414 radixPointOffset = got - start; // # of digits before the radix point 415 } else if (isHexadecimal && ch >= 'A' && ch <= 'F') { 416 Put(ch); 417 } else if (isHexadecimal && ch >= 'a' && ch <= 'f') { 418 Put(ch - 'a' + 'A'); // normalize to capitals 419 } else { 420 break; 421 } 422 } 423 if (got == start) { 424 // Nothing but zeroes and maybe a radix point. F'2018 requires 425 // at least one digit, but F'77 did not, and a bare "." shows up in 426 // the FCVS suite. 427 Put('0'); // emit at least one digit 428 } 429 // In list-directed input, a bad exponent is not consumed. 430 auto nextBeforeExponent{next}; 431 auto startExponent{io.GetConnectionState().positionInRecord}; 432 bool hasGoodExponent{false}; 433 if (next) { 434 if (isHexadecimal) { 435 if (*next == 'p' || *next == 'P') { 436 next = io.NextInField(remaining, edit); 437 } else { 438 // The binary exponent is not optional in the standard. 439 return {}; // error 440 } 441 } else if (*next == 'e' || *next == 'E' || *next == 'd' || *next == 'D' || 442 *next == 'q' || *next == 'Q') { 443 // Optional exponent letter. Blanks are allowed between the 444 // optional exponent letter and the exponent value. 445 io.SkipSpaces(remaining); 446 next = io.NextInField(remaining, edit); 447 } 448 } 449 if (next && 450 (*next == '-' || *next == '+' || (*next >= '0' && *next <= '9') || 451 *next == ' ' || *next == '\t')) { 452 bool negExpo{*next == '-'}; 453 if (negExpo || *next == '+') { 454 next = io.NextInField(remaining, edit); 455 } 456 for (; next; next = io.NextInField(remaining, edit)) { 457 if (*next >= '0' && *next <= '9') { 458 hasGoodExponent = true; 459 if (exponent < 10000) { 460 exponent = 10 * exponent + *next - '0'; 461 } 462 } else if (*next == ' ' || *next == '\t') { 463 if (isHexadecimal) { 464 break; 465 } else if (bzMode) { 466 hasGoodExponent = true; 467 exponent = 10 * exponent; 468 } 469 } else { 470 break; 471 } 472 } 473 if (negExpo) { 474 exponent = -exponent; 475 } 476 } 477 if (!hasGoodExponent) { 478 if (isHexadecimal) { 479 return {}; // error 480 } 481 // There isn't a good exponent; do not consume it. 482 next = nextBeforeExponent; 483 io.HandleAbsolutePosition(startExponent); 484 // The default exponent is -kP, but the scale factor doesn't affect 485 // an explicit exponent. 486 exponent = -edit.modes.scale; 487 } 488 // Adjust exponent by number of digits before the radix point. 489 if (isHexadecimal) { 490 // Exponents for hexadecimal input are binary. 491 exponent += radixPointOffset.value_or(got - start) * 4; 492 } else if (radixPointOffset) { 493 exponent += *radixPointOffset; 494 } else { 495 // When no redix point (or comma) appears in the value, the 'd' 496 // part of the edit descriptor must be interpreted as the number of 497 // digits in the value to be interpreted as being to the *right* of 498 // the assumed radix point (13.7.2.3.2) 499 exponent += got - start - edit.digits.value_or(0); 500 } 501 } 502 // Consume the trailing ')' of a list-directed or NAMELIST complex 503 // input value. 504 if (edit.descriptor == DataEdit::ListDirectedImaginaryPart) { 505 if (next && (*next == ' ' || *next == '\t')) { 506 io.SkipSpaces(remaining); 507 next = io.NextInField(remaining, edit); 508 } 509 if (!next) { // NextInField fails on separators like ')' 510 std::size_t byteCount{0}; 511 next = io.GetCurrentChar(byteCount); 512 if (next && *next == ')') { 513 io.HandleRelativePosition(byteCount); 514 } 515 } 516 } else if (remaining) { 517 while (next && (*next == ' ' || *next == '\t')) { 518 next = io.NextInField(remaining, edit); 519 } 520 if (next && (*next != ',' || (edit.modes.editingFlags & decimalComma))) { 521 return {}; // error: unused nonblank character in fixed-width field 522 } 523 } 524 return {got, exponent, isHexadecimal}; 525 } 526 527 static RT_API_ATTRS void RaiseFPExceptions( 528 decimal::ConversionResultFlags flags) { 529 #undef RAISE 530 #if defined(RT_DEVICE_COMPILATION) 531 Terminator terminator(__FILE__, __LINE__); 532 #define RAISE(e) \ 533 terminator.Crash( \ 534 "not implemented yet: raising FP exception in device code: %s", #e); 535 #else // !defined(RT_DEVICE_COMPILATION) 536 #ifdef feraisexcept // a macro in some environments; omit std:: 537 #define RAISE feraiseexcept 538 #else 539 #define RAISE std::feraiseexcept 540 #endif 541 #endif // !defined(RT_DEVICE_COMPILATION) 542 543 // Some environment (e.g. emscripten, musl) don't define FE_OVERFLOW as allowed 544 // by c99 (but not c++11) :-/ 545 #if defined(FE_OVERFLOW) || defined(RT_DEVICE_COMPILATION) 546 if (flags & decimal::ConversionResultFlags::Overflow) { 547 RAISE(FE_OVERFLOW); 548 } 549 #endif 550 #if defined(FE_UNDERFLOW) || defined(RT_DEVICE_COMPILATION) 551 if (flags & decimal::ConversionResultFlags::Underflow) { 552 RAISE(FE_UNDERFLOW); 553 } 554 #endif 555 #if defined(FE_INEXACT) || defined(RT_DEVICE_COMPILATION) 556 if (flags & decimal::ConversionResultFlags::Inexact) { 557 RAISE(FE_INEXACT); 558 } 559 #endif 560 #if defined(FE_INVALID) || defined(RT_DEVICE_COMPILATION) 561 if (flags & decimal::ConversionResultFlags::Invalid) { 562 RAISE(FE_INVALID); 563 } 564 #endif 565 #undef RAISE 566 } 567 568 // If no special modes are in effect and the form of the input value 569 // that's present in the input stream is acceptable to the decimal->binary 570 // converter without modification, this fast path for real input 571 // saves time by avoiding memory copies and reformatting of the exponent. 572 template <int PRECISION> 573 static RT_API_ATTRS bool TryFastPathRealDecimalInput( 574 IoStatementState &io, const DataEdit &edit, void *n) { 575 if (edit.modes.editingFlags & (blankZero | decimalComma)) { 576 return false; 577 } 578 if (edit.modes.scale != 0) { 579 return false; 580 } 581 const ConnectionState &connection{io.GetConnectionState()}; 582 if (connection.internalIoCharKind > 1) { 583 return false; // reading non-default character 584 } 585 const char *str{nullptr}; 586 std::size_t got{io.GetNextInputBytes(str)}; 587 if (got == 0 || str == nullptr || !connection.recordLength.has_value()) { 588 return false; // could not access reliably-terminated input stream 589 } 590 const char *p{str}; 591 std::int64_t maxConsume{ 592 std::min<std::int64_t>(got, edit.width.value_or(got))}; 593 const char *limit{str + maxConsume}; 594 decimal::ConversionToBinaryResult<PRECISION> converted{ 595 decimal::ConvertToBinary<PRECISION>(p, edit.modes.round, limit)}; 596 if (converted.flags & (decimal::Invalid | decimal::Overflow)) { 597 return false; 598 } 599 if (edit.digits.value_or(0) != 0) { 600 // Edit descriptor is Fw.d (or other) with d != 0, which 601 // implies scaling 602 const char *q{str}; 603 for (; q < limit; ++q) { 604 if (*q == '.' || *q == 'n' || *q == 'N') { 605 break; 606 } 607 } 608 if (q == limit) { 609 // No explicit decimal point, and not NaN/Inf. 610 return false; 611 } 612 } 613 if (edit.descriptor == DataEdit::ListDirectedImaginaryPart) { 614 // Need to consume a trailing ')', possibly with leading spaces 615 for (; p < limit && (*p == ' ' || *p == '\t'); ++p) { 616 } 617 if (p < limit && *p == ')') { 618 ++p; 619 } else { 620 return false; 621 } 622 } else if (edit.IsListDirected()) { 623 if (p < limit && !IsCharValueSeparator(edit, *p)) { 624 return false; 625 } 626 } else { 627 for (; p < limit && (*p == ' ' || *p == '\t'); ++p) { 628 } 629 if (edit.width && p < str + *edit.width) { 630 return false; // unconverted characters remain in fixed width field 631 } 632 } 633 // Success on the fast path! 634 *reinterpret_cast<decimal::BinaryFloatingPointNumber<PRECISION> *>(n) = 635 converted.binary; 636 io.HandleRelativePosition(p - str); 637 // Set FP exception flags 638 if (converted.flags != decimal::ConversionResultFlags::Exact) { 639 RaiseFPExceptions(converted.flags); 640 } 641 return true; 642 } 643 644 template <int binaryPrecision> 645 RT_API_ATTRS decimal::ConversionToBinaryResult<binaryPrecision> 646 ConvertHexadecimal( 647 const char *&p, enum decimal::FortranRounding rounding, int expo) { 648 using RealType = decimal::BinaryFloatingPointNumber<binaryPrecision>; 649 using RawType = typename RealType::RawType; 650 bool isNegative{*p == '-'}; 651 constexpr RawType one{1}; 652 RawType signBit{0}; 653 if (isNegative) { 654 ++p; 655 signBit = one << (RealType::bits - 1); 656 } 657 RawType fraction{0}; 658 // Adjust the incoming binary P+/- exponent to shift the radix point 659 // to below the LSB and add in the bias. 660 expo += binaryPrecision - 1 + RealType::exponentBias; 661 // Input the fraction. 662 int roundingBit{0}; 663 int guardBit{0}; 664 for (; *p; ++p) { 665 fraction <<= 4; 666 expo -= 4; 667 if (*p >= '0' && *p <= '9') { 668 fraction |= *p - '0'; 669 } else if (*p >= 'A' && *p <= 'F') { 670 fraction |= *p - 'A' + 10; // data were normalized to capitals 671 } else { 672 break; 673 } 674 if (fraction >> binaryPrecision) { 675 while (fraction >> binaryPrecision) { 676 guardBit |= roundingBit; 677 roundingBit = (int)fraction & 1; 678 fraction >>= 1; 679 ++expo; 680 } 681 // Consume excess digits 682 while (*++p) { 683 if (*p == '0') { 684 } else if ((*p >= '1' && *p <= '9') || (*p >= 'A' && *p <= 'F')) { 685 guardBit = 1; 686 } else { 687 break; 688 } 689 } 690 break; 691 } 692 } 693 if (fraction) { 694 // Boost biased expo if too small 695 while (expo < 1) { 696 guardBit |= roundingBit; 697 roundingBit = (int)fraction & 1; 698 fraction >>= 1; 699 ++expo; 700 } 701 // Normalize 702 while (expo > 1 && !(fraction >> (binaryPrecision - 1))) { 703 fraction <<= 1; 704 --expo; 705 guardBit = roundingBit = 0; 706 } 707 } 708 // Rounding 709 bool increase{false}; 710 switch (rounding) { 711 case decimal::RoundNearest: // RN & RP 712 increase = roundingBit && (guardBit | ((int)fraction & 1)); 713 break; 714 case decimal::RoundUp: // RU 715 increase = !isNegative && (roundingBit | guardBit); 716 break; 717 case decimal::RoundDown: // RD 718 increase = isNegative && (roundingBit | guardBit); 719 break; 720 case decimal::RoundToZero: // RZ 721 break; 722 case decimal::RoundCompatible: // RC 723 increase = roundingBit != 0; 724 break; 725 } 726 if (increase) { 727 ++fraction; 728 if (fraction >> binaryPrecision) { 729 fraction >>= 1; 730 ++expo; 731 } 732 } 733 // Package & return result 734 constexpr RawType significandMask{(one << RealType::significandBits) - 1}; 735 int flags{(roundingBit | guardBit) ? decimal::Inexact : decimal::Exact}; 736 if (!fraction) { 737 expo = 0; 738 } else if (expo == 1 && !(fraction >> (binaryPrecision - 1))) { 739 expo = 0; // subnormal 740 flags |= decimal::Underflow; 741 } else if (expo >= RealType::maxExponent) { 742 if (rounding == decimal::RoundToZero || 743 (rounding == decimal::RoundDown && !isNegative) || 744 (rounding == decimal::RoundUp && isNegative)) { 745 expo = RealType::maxExponent - 1; // +/-HUGE() 746 fraction = significandMask; 747 } else { 748 expo = RealType::maxExponent; // +/-Inf 749 fraction = 0; 750 flags |= decimal::Overflow; 751 } 752 } else { 753 fraction &= significandMask; // remove explicit normalization unless x87 754 } 755 return decimal::ConversionToBinaryResult<binaryPrecision>{ 756 RealType{static_cast<RawType>(signBit | 757 static_cast<RawType>(expo) << RealType::significandBits | fraction)}, 758 static_cast<decimal::ConversionResultFlags>(flags)}; 759 } 760 761 template <int KIND> 762 RT_API_ATTRS bool EditCommonRealInput( 763 IoStatementState &io, const DataEdit &edit, void *n) { 764 constexpr int binaryPrecision{common::PrecisionOfRealKind(KIND)}; 765 if (TryFastPathRealDecimalInput<binaryPrecision>(io, edit, n)) { 766 return CheckCompleteListDirectedField(io, edit); 767 } 768 // Fast path wasn't available or didn't work; go the more general route 769 static constexpr int maxDigits{ 770 common::MaxDecimalConversionDigits(binaryPrecision)}; 771 static constexpr int bufferSize{maxDigits + 18}; 772 char buffer[bufferSize]; 773 auto scanned{ScanRealInput(buffer, maxDigits + 2, io, edit)}; 774 int got{scanned.got}; 775 if (got >= maxDigits + 2) { 776 io.GetIoErrorHandler().Crash("EditCommonRealInput: buffer was too small"); 777 return false; 778 } 779 if (got == 0) { 780 const auto &connection{io.GetConnectionState()}; 781 io.GetIoErrorHandler().SignalError(IostatBadRealInput, 782 "Bad real input data at column %d of record %d", 783 static_cast<int>(connection.positionInRecord + 1), 784 static_cast<int>(connection.currentRecordNumber)); 785 return false; 786 } 787 decimal::ConversionToBinaryResult<binaryPrecision> converted; 788 const char *p{buffer}; 789 if (scanned.isHexadecimal) { 790 buffer[got] = '\0'; 791 converted = ConvertHexadecimal<binaryPrecision>( 792 p, edit.modes.round, scanned.exponent); 793 } else { 794 bool hadExtra{got > maxDigits}; 795 int exponent{scanned.exponent}; 796 if (exponent != 0) { 797 buffer[got++] = 'e'; 798 if (exponent < 0) { 799 buffer[got++] = '-'; 800 exponent = -exponent; 801 } 802 if (exponent > 9999) { 803 exponent = 9999; // will convert to +/-Inf 804 } 805 if (exponent > 999) { 806 int dig{exponent / 1000}; 807 buffer[got++] = '0' + dig; 808 int rest{exponent - 1000 * dig}; 809 dig = rest / 100; 810 buffer[got++] = '0' + dig; 811 rest -= 100 * dig; 812 dig = rest / 10; 813 buffer[got++] = '0' + dig; 814 buffer[got++] = '0' + (rest - 10 * dig); 815 } else if (exponent > 99) { 816 int dig{exponent / 100}; 817 buffer[got++] = '0' + dig; 818 int rest{exponent - 100 * dig}; 819 dig = rest / 10; 820 buffer[got++] = '0' + dig; 821 buffer[got++] = '0' + (rest - 10 * dig); 822 } else if (exponent > 9) { 823 int dig{exponent / 10}; 824 buffer[got++] = '0' + dig; 825 buffer[got++] = '0' + (exponent - 10 * dig); 826 } else { 827 buffer[got++] = '0' + exponent; 828 } 829 } 830 buffer[got] = '\0'; 831 converted = decimal::ConvertToBinary<binaryPrecision>(p, edit.modes.round); 832 if (hadExtra) { 833 converted.flags = static_cast<enum decimal::ConversionResultFlags>( 834 converted.flags | decimal::Inexact); 835 } 836 } 837 if (*p) { // unprocessed junk after value 838 const auto &connection{io.GetConnectionState()}; 839 io.GetIoErrorHandler().SignalError(IostatBadRealInput, 840 "Trailing characters after real input data at column %d of record %d", 841 static_cast<int>(connection.positionInRecord + 1), 842 static_cast<int>(connection.currentRecordNumber)); 843 return false; 844 } 845 *reinterpret_cast<decimal::BinaryFloatingPointNumber<binaryPrecision> *>(n) = 846 converted.binary; 847 // Set FP exception flags 848 if (converted.flags != decimal::ConversionResultFlags::Exact) { 849 if (converted.flags & decimal::ConversionResultFlags::Overflow) { 850 io.GetIoErrorHandler().SignalError(IostatRealInputOverflow); 851 return false; 852 } 853 RaiseFPExceptions(converted.flags); 854 } 855 return CheckCompleteListDirectedField(io, edit); 856 } 857 858 template <int KIND> 859 RT_API_ATTRS bool EditRealInput( 860 IoStatementState &io, const DataEdit &edit, void *n) { 861 switch (edit.descriptor) { 862 case DataEdit::ListDirected: 863 if (IsNamelistNameOrSlash(io)) { 864 return false; 865 } 866 return EditCommonRealInput<KIND>(io, edit, n); 867 case DataEdit::ListDirectedRealPart: 868 case DataEdit::ListDirectedImaginaryPart: 869 case 'F': 870 case 'E': // incl. EN, ES, & EX 871 case 'D': 872 case 'G': 873 return EditCommonRealInput<KIND>(io, edit, n); 874 case 'B': 875 return EditBOZInput<1>(io, edit, n, 876 common::BitsForBinaryPrecision(common::PrecisionOfRealKind(KIND)) >> 3); 877 case 'O': 878 return EditBOZInput<3>(io, edit, n, 879 common::BitsForBinaryPrecision(common::PrecisionOfRealKind(KIND)) >> 3); 880 case 'Z': 881 return EditBOZInput<4>(io, edit, n, 882 common::BitsForBinaryPrecision(common::PrecisionOfRealKind(KIND)) >> 3); 883 case 'A': // legacy extension 884 return EditCharacterInput(io, edit, reinterpret_cast<char *>(n), KIND); 885 default: 886 io.GetIoErrorHandler().SignalError(IostatErrorInFormat, 887 "Data edit descriptor '%c' may not be used for REAL input", 888 edit.descriptor); 889 return false; 890 } 891 } 892 893 // 13.7.3 in Fortran 2018 894 RT_API_ATTRS bool EditLogicalInput( 895 IoStatementState &io, const DataEdit &edit, bool &x) { 896 switch (edit.descriptor) { 897 case DataEdit::ListDirected: 898 if (IsNamelistNameOrSlash(io)) { 899 return false; 900 } 901 break; 902 case 'L': 903 case 'G': 904 break; 905 default: 906 io.GetIoErrorHandler().SignalError(IostatErrorInFormat, 907 "Data edit descriptor '%c' may not be used for LOGICAL input", 908 edit.descriptor); 909 return false; 910 } 911 Fortran::common::optional<int> remaining{io.CueUpInput(edit)}; 912 Fortran::common::optional<char32_t> next{io.NextInField(remaining, edit)}; 913 if (next && *next == '.') { // skip optional period 914 next = io.NextInField(remaining, edit); 915 } 916 if (!next) { 917 io.GetIoErrorHandler().SignalError("Empty LOGICAL input field"); 918 return false; 919 } 920 switch (*next) { 921 case 'T': 922 case 't': 923 x = true; 924 break; 925 case 'F': 926 case 'f': 927 x = false; 928 break; 929 default: 930 io.GetIoErrorHandler().SignalError( 931 "Bad character '%lc' in LOGICAL input field", *next); 932 return false; 933 } 934 if (remaining) { // ignore the rest of a fixed-width field 935 io.HandleRelativePosition(*remaining); 936 } else if (edit.descriptor == DataEdit::ListDirected) { 937 while (io.NextInField(remaining, edit)) { // discard rest of field 938 } 939 } 940 return CheckCompleteListDirectedField(io, edit); 941 } 942 943 // See 13.10.3.1 paragraphs 7-9 in Fortran 2018 944 template <typename CHAR> 945 static RT_API_ATTRS bool EditDelimitedCharacterInput( 946 IoStatementState &io, CHAR *x, std::size_t length, char32_t delimiter) { 947 bool result{true}; 948 while (true) { 949 std::size_t byteCount{0}; 950 auto ch{io.GetCurrentChar(byteCount)}; 951 if (!ch) { 952 if (io.AdvanceRecord()) { 953 continue; 954 } else { 955 result = false; // EOF in character value 956 break; 957 } 958 } 959 io.HandleRelativePosition(byteCount); 960 if (*ch == delimiter) { 961 auto next{io.GetCurrentChar(byteCount)}; 962 if (next && *next == delimiter) { 963 // Repeated delimiter: use as character value 964 io.HandleRelativePosition(byteCount); 965 } else { 966 break; // closing delimiter 967 } 968 } 969 if (length > 0) { 970 *x++ = *ch; 971 --length; 972 } 973 } 974 Fortran::runtime::fill_n(x, length, ' '); 975 return result; 976 } 977 978 template <typename CHAR> 979 static RT_API_ATTRS bool EditListDirectedCharacterInput( 980 IoStatementState &io, CHAR *x, std::size_t length, const DataEdit &edit) { 981 std::size_t byteCount{0}; 982 auto ch{io.GetCurrentChar(byteCount)}; 983 if (ch && (*ch == '\'' || *ch == '"')) { 984 io.HandleRelativePosition(byteCount); 985 return EditDelimitedCharacterInput(io, x, length, *ch); 986 } 987 if (IsNamelistNameOrSlash(io) || io.GetConnectionState().IsAtEOF()) { 988 return false; 989 } 990 // Undelimited list-directed character input: stop at a value separator 991 // or the end of the current record. 992 while (auto ch{io.GetCurrentChar(byteCount)}) { 993 bool isSep{false}; 994 switch (*ch) { 995 case ' ': 996 case '\t': 997 case '/': 998 isSep = true; 999 break; 1000 case '&': 1001 case '$': 1002 isSep = edit.IsNamelist(); 1003 break; 1004 case ',': 1005 isSep = !(edit.modes.editingFlags & decimalComma); 1006 break; 1007 case ';': 1008 isSep = !!(edit.modes.editingFlags & decimalComma); 1009 break; 1010 default: 1011 break; 1012 } 1013 if (isSep) { 1014 break; 1015 } 1016 if (length > 0) { 1017 *x++ = *ch; 1018 --length; 1019 } else if (edit.IsNamelist()) { 1020 // GNU compatibility 1021 break; 1022 } 1023 io.HandleRelativePosition(byteCount); 1024 io.GotChar(byteCount); 1025 } 1026 Fortran::runtime::fill_n(x, length, ' '); 1027 return true; 1028 } 1029 1030 template <typename CHAR> 1031 RT_API_ATTRS bool EditCharacterInput(IoStatementState &io, const DataEdit &edit, 1032 CHAR *x, std::size_t lengthChars) { 1033 switch (edit.descriptor) { 1034 case DataEdit::ListDirected: 1035 return EditListDirectedCharacterInput(io, x, lengthChars, edit); 1036 case 'A': 1037 case 'G': 1038 break; 1039 case 'B': 1040 return EditBOZInput<1>(io, edit, x, lengthChars * sizeof *x); 1041 case 'O': 1042 return EditBOZInput<3>(io, edit, x, lengthChars * sizeof *x); 1043 case 'Z': 1044 return EditBOZInput<4>(io, edit, x, lengthChars * sizeof *x); 1045 default: 1046 io.GetIoErrorHandler().SignalError(IostatErrorInFormat, 1047 "Data edit descriptor '%c' may not be used with a CHARACTER data item", 1048 edit.descriptor); 1049 return false; 1050 } 1051 const ConnectionState &connection{io.GetConnectionState()}; 1052 std::size_t remainingChars{lengthChars}; 1053 // Skip leading characters. 1054 // Their bytes don't count towards INQUIRE(IOLENGTH=). 1055 std::size_t skipChars{0}; 1056 if (edit.width && *edit.width > 0) { 1057 remainingChars = *edit.width; 1058 if (remainingChars > lengthChars) { 1059 skipChars = remainingChars - lengthChars; 1060 } 1061 } 1062 // When the field is wider than the variable, we drop the leading 1063 // characters. When the variable is wider than the field, there can be 1064 // trailing padding or an EOR condition. 1065 const char *input{nullptr}; 1066 std::size_t readyBytes{0}; 1067 // Transfer payload bytes; these do count. 1068 while (remainingChars > 0) { 1069 if (readyBytes == 0) { 1070 readyBytes = io.GetNextInputBytes(input); 1071 if (readyBytes == 0 || 1072 (readyBytes < remainingChars && edit.modes.nonAdvancing)) { 1073 if (io.CheckForEndOfRecord(readyBytes)) { 1074 if (readyBytes == 0) { 1075 // PAD='YES' and no more data 1076 Fortran::runtime::fill_n(x, lengthChars, ' '); 1077 return !io.GetIoErrorHandler().InError(); 1078 } else { 1079 // Do partial read(s) then pad on last iteration 1080 } 1081 } else { 1082 return !io.GetIoErrorHandler().InError(); 1083 } 1084 } 1085 } 1086 std::size_t chunkBytes; 1087 std::size_t chunkChars{1}; 1088 bool skipping{skipChars > 0}; 1089 if (connection.isUTF8) { 1090 chunkBytes = MeasureUTF8Bytes(*input); 1091 if (skipping) { 1092 --skipChars; 1093 } else if (auto ucs{DecodeUTF8(input)}) { 1094 if ((sizeof *x == 1 && *ucs > 0xff) || 1095 (sizeof *x == 2 && *ucs > 0xffff)) { 1096 *x++ = '?'; 1097 } else { 1098 *x++ = *ucs; 1099 } 1100 --lengthChars; 1101 } else if (chunkBytes == 0) { 1102 // error recovery: skip bad encoding 1103 chunkBytes = 1; 1104 } 1105 } else if (connection.internalIoCharKind > 1) { 1106 // Reading from non-default character internal unit 1107 chunkBytes = connection.internalIoCharKind; 1108 if (skipping) { 1109 --skipChars; 1110 } else { 1111 char32_t buffer{0}; 1112 std::memcpy(&buffer, input, chunkBytes); 1113 if ((sizeof *x == 1 && buffer > 0xff) || 1114 (sizeof *x == 2 && buffer > 0xffff)) { 1115 *x++ = '?'; 1116 } else { 1117 *x++ = buffer; 1118 } 1119 --lengthChars; 1120 } 1121 } else if constexpr (sizeof *x > 1) { 1122 // Read single byte with expansion into multi-byte CHARACTER 1123 chunkBytes = 1; 1124 if (skipping) { 1125 --skipChars; 1126 } else { 1127 *x++ = static_cast<unsigned char>(*input); 1128 --lengthChars; 1129 } 1130 } else { // single bytes -> default CHARACTER 1131 if (skipping) { 1132 chunkBytes = std::min<std::size_t>(skipChars, readyBytes); 1133 chunkChars = chunkBytes; 1134 skipChars -= chunkChars; 1135 } else { 1136 chunkBytes = std::min<std::size_t>(remainingChars, readyBytes); 1137 chunkBytes = std::min<std::size_t>(lengthChars, chunkBytes); 1138 chunkChars = chunkBytes; 1139 std::memcpy(x, input, chunkBytes); 1140 x += chunkBytes; 1141 lengthChars -= chunkChars; 1142 } 1143 } 1144 input += chunkBytes; 1145 remainingChars -= chunkChars; 1146 if (!skipping) { 1147 io.GotChar(chunkBytes); 1148 } 1149 io.HandleRelativePosition(chunkBytes); 1150 readyBytes -= chunkBytes; 1151 } 1152 // Pad the remainder of the input variable, if any. 1153 Fortran::runtime::fill_n(x, lengthChars, ' '); 1154 return CheckCompleteListDirectedField(io, edit); 1155 } 1156 1157 template RT_API_ATTRS bool EditRealInput<2>( 1158 IoStatementState &, const DataEdit &, void *); 1159 template RT_API_ATTRS bool EditRealInput<3>( 1160 IoStatementState &, const DataEdit &, void *); 1161 template RT_API_ATTRS bool EditRealInput<4>( 1162 IoStatementState &, const DataEdit &, void *); 1163 template RT_API_ATTRS bool EditRealInput<8>( 1164 IoStatementState &, const DataEdit &, void *); 1165 template RT_API_ATTRS bool EditRealInput<10>( 1166 IoStatementState &, const DataEdit &, void *); 1167 // TODO: double/double 1168 template RT_API_ATTRS bool EditRealInput<16>( 1169 IoStatementState &, const DataEdit &, void *); 1170 1171 template RT_API_ATTRS bool EditCharacterInput( 1172 IoStatementState &, const DataEdit &, char *, std::size_t); 1173 template RT_API_ATTRS bool EditCharacterInput( 1174 IoStatementState &, const DataEdit &, char16_t *, std::size_t); 1175 template RT_API_ATTRS bool EditCharacterInput( 1176 IoStatementState &, const DataEdit &, char32_t *, std::size_t); 1177 1178 RT_OFFLOAD_API_GROUP_END 1179 } // namespace Fortran::runtime::io 1180