1 //===-- runtime/edit-input.cpp --------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "edit-input.h" 10 #include "namelist.h" 11 #include "utf.h" 12 #include "flang/Common/optional.h" 13 #include "flang/Common/real.h" 14 #include "flang/Common/uint128.h" 15 #include "flang/Runtime/freestanding-tools.h" 16 #include <algorithm> 17 #include <cfenv> 18 19 namespace Fortran::runtime::io { 20 RT_OFFLOAD_API_GROUP_BEGIN 21 22 // Checks that a list-directed input value has been entirely consumed and 23 // doesn't contain unparsed characters before the next value separator. 24 static inline RT_API_ATTRS bool IsCharValueSeparator( 25 const DataEdit &edit, char32_t ch) { 26 char32_t comma{ 27 edit.modes.editingFlags & decimalComma ? char32_t{';'} : char32_t{','}}; 28 return ch == ' ' || ch == '\t' || ch == comma || ch == '/' || 29 (edit.IsNamelist() && (ch == '&' || ch == '$')); 30 } 31 32 static RT_API_ATTRS bool CheckCompleteListDirectedField( 33 IoStatementState &io, const DataEdit &edit) { 34 if (edit.IsListDirected()) { 35 std::size_t byteCount; 36 if (auto ch{io.GetCurrentChar(byteCount)}) { 37 if (IsCharValueSeparator(edit, *ch)) { 38 return true; 39 } else { 40 const auto &connection{io.GetConnectionState()}; 41 io.GetIoErrorHandler().SignalError(IostatBadListDirectedInputSeparator, 42 "invalid character (0x%x) after list-directed input value, " 43 "at column %d in record %d", 44 static_cast<unsigned>(*ch), 45 static_cast<int>(connection.positionInRecord + 1), 46 static_cast<int>(connection.currentRecordNumber)); 47 return false; 48 } 49 } else { 50 return true; // end of record: ok 51 } 52 } else { 53 return true; 54 } 55 } 56 57 static inline RT_API_ATTRS char32_t GetSeparatorChar(const DataEdit &edit) { 58 return edit.modes.editingFlags & decimalComma ? char32_t{';'} : char32_t{','}; 59 } 60 61 template <int LOG2_BASE> 62 static RT_API_ATTRS bool EditBOZInput( 63 IoStatementState &io, const DataEdit &edit, void *n, std::size_t bytes) { 64 // Skip leading white space & zeroes 65 Fortran::common::optional<int> remaining{io.CueUpInput(edit)}; 66 auto start{io.GetConnectionState().positionInRecord}; 67 Fortran::common::optional<char32_t> next{io.NextInField(remaining, edit)}; 68 if (next.value_or('?') == '0') { 69 do { 70 start = io.GetConnectionState().positionInRecord; 71 next = io.NextInField(remaining, edit); 72 } while (next && *next == '0'); 73 } 74 // Count significant digits after any leading white space & zeroes 75 int digits{0}; 76 int significantBits{0}; 77 const char32_t comma{GetSeparatorChar(edit)}; 78 for (; next; next = io.NextInField(remaining, edit)) { 79 char32_t ch{*next}; 80 if (ch == ' ' || ch == '\t') { 81 if (edit.modes.editingFlags & blankZero) { 82 ch = '0'; // BZ mode - treat blank as if it were zero 83 } else { 84 continue; 85 } 86 } 87 if (ch >= '0' && ch <= '1') { 88 } else if (LOG2_BASE >= 3 && ch >= '2' && ch <= '7') { 89 } else if (LOG2_BASE >= 4 && ch >= '8' && ch <= '9') { 90 } else if (LOG2_BASE >= 4 && ch >= 'A' && ch <= 'F') { 91 } else if (LOG2_BASE >= 4 && ch >= 'a' && ch <= 'f') { 92 } else if (ch == comma) { 93 break; // end non-list-directed field early 94 } else { 95 io.GetIoErrorHandler().SignalError( 96 "Bad character '%lc' in B/O/Z input field", ch); 97 return false; 98 } 99 if (digits++ == 0) { 100 significantBits = 4; 101 if (ch >= '0' && ch <= '1') { 102 significantBits = 1; 103 } else if (ch >= '2' && ch <= '3') { 104 significantBits = 2; 105 } else if (ch >= '4' && ch <= '7') { 106 significantBits = 3; 107 } else { 108 significantBits = 4; 109 } 110 } else { 111 significantBits += LOG2_BASE; 112 } 113 } 114 auto significantBytes{static_cast<std::size_t>(significantBits + 7) / 8}; 115 if (significantBytes > bytes) { 116 io.GetIoErrorHandler().SignalError(IostatBOZInputOverflow, 117 "B/O/Z input of %d digits overflows %zd-byte variable", digits, bytes); 118 return false; 119 } 120 // Reset to start of significant digits 121 io.HandleAbsolutePosition(start); 122 remaining.reset(); 123 // Make a second pass now that the digit count is known 124 std::memset(n, 0, bytes); 125 int increment{isHostLittleEndian ? -1 : 1}; 126 auto *data{reinterpret_cast<unsigned char *>(n) + 127 (isHostLittleEndian ? significantBytes - 1 : bytes - significantBytes)}; 128 int shift{((digits - 1) * LOG2_BASE) & 7}; 129 while (digits > 0) { 130 char32_t ch{*io.NextInField(remaining, edit)}; 131 int digit{0}; 132 if (ch == ' ' || ch == '\t') { 133 if (edit.modes.editingFlags & blankZero) { 134 ch = '0'; // BZ mode - treat blank as if it were zero 135 } else { 136 continue; 137 } 138 } 139 --digits; 140 if (ch >= '0' && ch <= '9') { 141 digit = ch - '0'; 142 } else if (ch >= 'A' && ch <= 'F') { 143 digit = ch + 10 - 'A'; 144 } else if (ch >= 'a' && ch <= 'f') { 145 digit = ch + 10 - 'a'; 146 } else { 147 continue; 148 } 149 if (shift < 0) { 150 if (shift + LOG2_BASE > 0) { // misaligned octal 151 *data |= digit >> -shift; 152 } 153 shift += 8; 154 data += increment; 155 } 156 *data |= digit << shift; 157 shift -= LOG2_BASE; 158 } 159 return CheckCompleteListDirectedField(io, edit); 160 } 161 162 static inline RT_API_ATTRS char32_t GetRadixPointChar(const DataEdit &edit) { 163 return edit.modes.editingFlags & decimalComma ? char32_t{','} : char32_t{'.'}; 164 } 165 166 // Prepares input from a field, and returns the sign, if any, else '\0'. 167 static RT_API_ATTRS char ScanNumericPrefix(IoStatementState &io, 168 const DataEdit &edit, Fortran::common::optional<char32_t> &next, 169 Fortran::common::optional<int> &remaining) { 170 remaining = io.CueUpInput(edit); 171 next = io.NextInField(remaining, edit); 172 char sign{'\0'}; 173 if (next) { 174 if (*next == '-' || *next == '+') { 175 sign = *next; 176 if (!edit.IsListDirected()) { 177 io.SkipSpaces(remaining); 178 } 179 next = io.NextInField(remaining, edit); 180 } 181 } 182 return sign; 183 } 184 185 RT_API_ATTRS bool EditIntegerInput( 186 IoStatementState &io, const DataEdit &edit, void *n, int kind) { 187 RUNTIME_CHECK(io.GetIoErrorHandler(), kind >= 1 && !(kind & (kind - 1))); 188 switch (edit.descriptor) { 189 case DataEdit::ListDirected: 190 if (IsNamelistNameOrSlash(io)) { 191 return false; 192 } 193 break; 194 case 'G': 195 case 'I': 196 break; 197 case 'B': 198 return EditBOZInput<1>(io, edit, n, kind); 199 case 'O': 200 return EditBOZInput<3>(io, edit, n, kind); 201 case 'Z': 202 return EditBOZInput<4>(io, edit, n, kind); 203 case 'A': // legacy extension 204 return EditCharacterInput(io, edit, reinterpret_cast<char *>(n), kind); 205 default: 206 io.GetIoErrorHandler().SignalError(IostatErrorInFormat, 207 "Data edit descriptor '%c' may not be used with an INTEGER data item", 208 edit.descriptor); 209 return false; 210 } 211 Fortran::common::optional<int> remaining; 212 Fortran::common::optional<char32_t> next; 213 char sign{ScanNumericPrefix(io, edit, next, remaining)}; 214 common::UnsignedInt128 value{0}; 215 bool any{!!sign}; 216 bool overflow{false}; 217 const char32_t comma{GetSeparatorChar(edit)}; 218 for (; next; next = io.NextInField(remaining, edit)) { 219 char32_t ch{*next}; 220 if (ch == ' ' || ch == '\t') { 221 if (edit.modes.editingFlags & blankZero) { 222 ch = '0'; // BZ mode - treat blank as if it were zero 223 } else { 224 continue; 225 } 226 } 227 int digit{0}; 228 if (ch >= '0' && ch <= '9') { 229 digit = ch - '0'; 230 } else if (ch == comma) { 231 break; // end non-list-directed field early 232 } else { 233 if (edit.modes.inNamelist && ch == GetRadixPointChar(edit)) { 234 // Ignore any fractional part that might appear in NAMELIST integer 235 // input, like a few other Fortran compilers do. 236 // TODO: also process exponents? Some compilers do, but they obviously 237 // can't just be ignored. 238 while ((next = io.NextInField(remaining, edit))) { 239 if (*next < '0' || *next > '9') { 240 break; 241 } 242 } 243 if (!next || *next == comma) { 244 break; 245 } 246 } 247 io.GetIoErrorHandler().SignalError( 248 "Bad character '%lc' in INTEGER input field", ch); 249 return false; 250 } 251 static constexpr auto maxu128{~common::UnsignedInt128{0}}; 252 static constexpr auto maxu128OverTen{maxu128 / 10}; 253 static constexpr int maxLastDigit{ 254 static_cast<int>(maxu128 - (maxu128OverTen * 10))}; 255 overflow |= value >= maxu128OverTen && 256 (value > maxu128OverTen || digit > maxLastDigit); 257 value *= 10; 258 value += digit; 259 any = true; 260 } 261 if (!any && !remaining) { 262 io.GetIoErrorHandler().SignalError( 263 "Integer value absent from NAMELIST or list-directed input"); 264 return false; 265 } 266 auto maxForKind{common::UnsignedInt128{1} << ((8 * kind) - 1)}; 267 overflow |= value >= maxForKind && (value > maxForKind || sign != '-'); 268 if (overflow) { 269 io.GetIoErrorHandler().SignalError(IostatIntegerInputOverflow, 270 "Decimal input overflows INTEGER(%d) variable", kind); 271 return false; 272 } 273 if (sign == '-') { 274 value = -value; 275 } 276 if (any || !io.GetIoErrorHandler().InError()) { 277 // The value is stored in the lower order bits on big endian platform. 278 // When memcpy, shift the value to the higher order bit. 279 auto shft{static_cast<int>(sizeof(value.low())) - kind}; 280 // For kind==8 (i.e. shft==0), the value is stored in low_ in big endian. 281 if (!isHostLittleEndian && shft >= 0) { 282 auto l{value.low() << (8 * shft)}; 283 std::memcpy(n, &l, kind); 284 } else { 285 std::memcpy(n, &value, kind); // a blank field means zero 286 } 287 return true; 288 } else { 289 return false; 290 } 291 } 292 293 // Parses a REAL input number from the input source as a normalized 294 // fraction into a supplied buffer -- there's an optional '-', a 295 // decimal point when the input is not hexadecimal, and at least one 296 // digit. Replaces blanks with zeroes where appropriate. 297 struct ScannedRealInput { 298 // Number of characters that (should) have been written to the 299 // buffer -- this can be larger than the buffer size, which 300 // indicates buffer overflow. Zero indicates an error. 301 int got{0}; 302 int exponent{0}; // adjusted as necessary; binary if isHexadecimal 303 bool isHexadecimal{false}; // 0X... 304 }; 305 static RT_API_ATTRS ScannedRealInput ScanRealInput( 306 char *buffer, int bufferSize, IoStatementState &io, const DataEdit &edit) { 307 Fortran::common::optional<int> remaining; 308 Fortran::common::optional<char32_t> next; 309 int got{0}; 310 Fortran::common::optional<int> radixPointOffset; 311 // The following lambda definition violates the conding style, 312 // but cuda-11.8 nvcc hits an internal error with the brace initialization. 313 auto Put = [&](char ch) -> void { 314 if (got < bufferSize) { 315 buffer[got] = ch; 316 } 317 ++got; 318 }; 319 char sign{ScanNumericPrefix(io, edit, next, remaining)}; 320 if (sign == '-') { 321 Put('-'); 322 } 323 bool bzMode{(edit.modes.editingFlags & blankZero) != 0}; 324 int exponent{0}; 325 if (!next || (!bzMode && *next == ' ') || 326 (!(edit.modes.editingFlags & decimalComma) && *next == ',')) { 327 if (!edit.IsListDirected() && !io.GetConnectionState().IsAtEOF()) { 328 // An empty/blank field means zero when not list-directed. 329 // A fixed-width field containing only a sign is also zero; 330 // this behavior isn't standard-conforming in F'2023 but it is 331 // required to pass FCVS. 332 Put('0'); 333 } 334 return {got, exponent, false}; 335 } 336 char32_t radixPointChar{GetRadixPointChar(edit)}; 337 char32_t first{*next >= 'a' && *next <= 'z' ? *next + 'A' - 'a' : *next}; 338 bool isHexadecimal{false}; 339 if (first == 'N' || first == 'I') { 340 // NaN or infinity - convert to upper case 341 // Subtle: a blank field of digits could be followed by 'E' or 'D', 342 for (; next && 343 ((*next >= 'a' && *next <= 'z') || (*next >= 'A' && *next <= 'Z')); 344 next = io.NextInField(remaining, edit)) { 345 if (*next >= 'a' && *next <= 'z') { 346 Put(*next - 'a' + 'A'); 347 } else { 348 Put(*next); 349 } 350 } 351 if (next && *next == '(') { // NaN(...) 352 Put('('); 353 int depth{1}; 354 while (true) { 355 next = io.NextInField(remaining, edit); 356 if (depth == 0) { 357 break; 358 } else if (!next) { 359 return {}; // error 360 } else if (*next == '(') { 361 ++depth; 362 } else if (*next == ')') { 363 --depth; 364 } 365 Put(*next); 366 } 367 } 368 } else if (first == radixPointChar || (first >= '0' && first <= '9') || 369 (bzMode && (first == ' ' || first == '\t')) || first == 'E' || 370 first == 'D' || first == 'Q') { 371 if (first == '0') { 372 next = io.NextInField(remaining, edit); 373 if (next && (*next == 'x' || *next == 'X')) { // 0X... 374 isHexadecimal = true; 375 next = io.NextInField(remaining, edit); 376 } else { 377 Put('0'); 378 } 379 } 380 // input field is normalized to a fraction 381 if (!isHexadecimal) { 382 Put('.'); 383 } 384 auto start{got}; 385 for (; next; next = io.NextInField(remaining, edit)) { 386 char32_t ch{*next}; 387 if (ch == ' ' || ch == '\t') { 388 if (isHexadecimal) { 389 return {}; // error 390 } else if (bzMode) { 391 ch = '0'; // BZ mode - treat blank as if it were zero 392 } else { 393 continue; // ignore blank in fixed field 394 } 395 } 396 if (ch == '0' && got == start && !radixPointOffset) { 397 // omit leading zeroes before the radix point 398 } else if (ch >= '0' && ch <= '9') { 399 Put(ch); 400 } else if (ch == radixPointChar && !radixPointOffset) { 401 // The radix point character is *not* copied to the buffer. 402 radixPointOffset = got - start; // # of digits before the radix point 403 } else if (isHexadecimal && ch >= 'A' && ch <= 'F') { 404 Put(ch); 405 } else if (isHexadecimal && ch >= 'a' && ch <= 'f') { 406 Put(ch - 'a' + 'A'); // normalize to capitals 407 } else { 408 break; 409 } 410 } 411 if (got == start) { 412 // Nothing but zeroes and maybe a radix point. F'2018 requires 413 // at least one digit, but F'77 did not, and a bare "." shows up in 414 // the FCVS suite. 415 Put('0'); // emit at least one digit 416 } 417 // In list-directed input, a bad exponent is not consumed. 418 auto nextBeforeExponent{next}; 419 auto startExponent{io.GetConnectionState().positionInRecord}; 420 bool hasGoodExponent{false}; 421 if (next) { 422 if (isHexadecimal) { 423 if (*next == 'p' || *next == 'P') { 424 next = io.NextInField(remaining, edit); 425 } else { 426 // The binary exponent is not optional in the standard. 427 return {}; // error 428 } 429 } else if (*next == 'e' || *next == 'E' || *next == 'd' || *next == 'D' || 430 *next == 'q' || *next == 'Q') { 431 // Optional exponent letter. Blanks are allowed between the 432 // optional exponent letter and the exponent value. 433 io.SkipSpaces(remaining); 434 next = io.NextInField(remaining, edit); 435 } 436 } 437 if (next && 438 (*next == '-' || *next == '+' || (*next >= '0' && *next <= '9') || 439 *next == ' ' || *next == '\t')) { 440 bool negExpo{*next == '-'}; 441 if (negExpo || *next == '+') { 442 next = io.NextInField(remaining, edit); 443 } 444 for (; next; next = io.NextInField(remaining, edit)) { 445 if (*next >= '0' && *next <= '9') { 446 hasGoodExponent = true; 447 if (exponent < 10000) { 448 exponent = 10 * exponent + *next - '0'; 449 } 450 } else if (*next == ' ' || *next == '\t') { 451 if (isHexadecimal) { 452 break; 453 } else if (bzMode) { 454 hasGoodExponent = true; 455 exponent = 10 * exponent; 456 } 457 } else { 458 break; 459 } 460 } 461 if (negExpo) { 462 exponent = -exponent; 463 } 464 } 465 if (!hasGoodExponent) { 466 if (isHexadecimal) { 467 return {}; // error 468 } 469 // There isn't a good exponent; do not consume it. 470 next = nextBeforeExponent; 471 io.HandleAbsolutePosition(startExponent); 472 // The default exponent is -kP, but the scale factor doesn't affect 473 // an explicit exponent. 474 exponent = -edit.modes.scale; 475 } 476 // Adjust exponent by number of digits before the radix point. 477 if (isHexadecimal) { 478 // Exponents for hexadecimal input are binary. 479 exponent += radixPointOffset.value_or(got - start) * 4; 480 } else if (radixPointOffset) { 481 exponent += *radixPointOffset; 482 } else { 483 // When no redix point (or comma) appears in the value, the 'd' 484 // part of the edit descriptor must be interpreted as the number of 485 // digits in the value to be interpreted as being to the *right* of 486 // the assumed radix point (13.7.2.3.2) 487 exponent += got - start - edit.digits.value_or(0); 488 } 489 } 490 // Consume the trailing ')' of a list-directed or NAMELIST complex 491 // input value. 492 if (edit.descriptor == DataEdit::ListDirectedImaginaryPart) { 493 if (next && (*next == ' ' || *next == '\t')) { 494 io.SkipSpaces(remaining); 495 next = io.NextInField(remaining, edit); 496 } 497 if (!next) { // NextInField fails on separators like ')' 498 std::size_t byteCount{0}; 499 next = io.GetCurrentChar(byteCount); 500 if (next && *next == ')') { 501 io.HandleRelativePosition(byteCount); 502 } 503 } 504 } else if (remaining) { 505 while (next && (*next == ' ' || *next == '\t')) { 506 next = io.NextInField(remaining, edit); 507 } 508 if (next && (*next != ',' || (edit.modes.editingFlags & decimalComma))) { 509 return {}; // error: unused nonblank character in fixed-width field 510 } 511 } 512 return {got, exponent, isHexadecimal}; 513 } 514 515 static RT_API_ATTRS void RaiseFPExceptions( 516 decimal::ConversionResultFlags flags) { 517 #undef RAISE 518 #if defined(RT_DEVICE_COMPILATION) 519 Terminator terminator(__FILE__, __LINE__); 520 #define RAISE(e) \ 521 terminator.Crash( \ 522 "not implemented yet: raising FP exception in device code: %s", #e); 523 #else // !defined(RT_DEVICE_COMPILATION) 524 #ifdef feraisexcept // a macro in some environments; omit std:: 525 #define RAISE feraiseexcept 526 #else 527 #define RAISE std::feraiseexcept 528 #endif 529 #endif // !defined(RT_DEVICE_COMPILATION) 530 531 // Some environment (e.g. emscripten, musl) don't define FE_OVERFLOW as allowed 532 // by c99 (but not c++11) :-/ 533 #if defined(FE_OVERFLOW) || defined(RT_DEVICE_COMPILATION) 534 if (flags & decimal::ConversionResultFlags::Overflow) { 535 RAISE(FE_OVERFLOW); 536 } 537 #endif 538 #if defined(FE_UNDERFLOW) || defined(RT_DEVICE_COMPILATION) 539 if (flags & decimal::ConversionResultFlags::Underflow) { 540 RAISE(FE_UNDERFLOW); 541 } 542 #endif 543 #if defined(FE_INEXACT) || defined(RT_DEVICE_COMPILATION) 544 if (flags & decimal::ConversionResultFlags::Inexact) { 545 RAISE(FE_INEXACT); 546 } 547 #endif 548 #if defined(FE_INVALID) || defined(RT_DEVICE_COMPILATION) 549 if (flags & decimal::ConversionResultFlags::Invalid) { 550 RAISE(FE_INVALID); 551 } 552 #endif 553 #undef RAISE 554 } 555 556 // If no special modes are in effect and the form of the input value 557 // that's present in the input stream is acceptable to the decimal->binary 558 // converter without modification, this fast path for real input 559 // saves time by avoiding memory copies and reformatting of the exponent. 560 template <int PRECISION> 561 static RT_API_ATTRS bool TryFastPathRealDecimalInput( 562 IoStatementState &io, const DataEdit &edit, void *n) { 563 if (edit.modes.editingFlags & (blankZero | decimalComma)) { 564 return false; 565 } 566 if (edit.modes.scale != 0) { 567 return false; 568 } 569 const ConnectionState &connection{io.GetConnectionState()}; 570 if (connection.internalIoCharKind > 1) { 571 return false; // reading non-default character 572 } 573 const char *str{nullptr}; 574 std::size_t got{io.GetNextInputBytes(str)}; 575 if (got == 0 || str == nullptr || !connection.recordLength.has_value()) { 576 return false; // could not access reliably-terminated input stream 577 } 578 const char *p{str}; 579 std::int64_t maxConsume{ 580 std::min<std::int64_t>(got, edit.width.value_or(got))}; 581 const char *limit{str + maxConsume}; 582 decimal::ConversionToBinaryResult<PRECISION> converted{ 583 decimal::ConvertToBinary<PRECISION>(p, edit.modes.round, limit)}; 584 if (converted.flags & (decimal::Invalid | decimal::Overflow)) { 585 return false; 586 } 587 if (edit.digits.value_or(0) != 0) { 588 // Edit descriptor is Fw.d (or other) with d != 0, which 589 // implies scaling 590 const char *q{str}; 591 for (; q < limit; ++q) { 592 if (*q == '.' || *q == 'n' || *q == 'N') { 593 break; 594 } 595 } 596 if (q == limit) { 597 // No explicit decimal point, and not NaN/Inf. 598 return false; 599 } 600 } 601 if (edit.descriptor == DataEdit::ListDirectedImaginaryPart) { 602 // Need to consume a trailing ')', possibly with leading spaces 603 for (; p < limit && (*p == ' ' || *p == '\t'); ++p) { 604 } 605 if (p < limit && *p == ')') { 606 ++p; 607 } else { 608 return false; 609 } 610 } else if (edit.IsListDirected()) { 611 if (p < limit && !IsCharValueSeparator(edit, *p)) { 612 return false; 613 } 614 } else { 615 for (; p < limit && (*p == ' ' || *p == '\t'); ++p) { 616 } 617 if (edit.width && p < str + *edit.width) { 618 return false; // unconverted characters remain in fixed width field 619 } 620 } 621 // Success on the fast path! 622 *reinterpret_cast<decimal::BinaryFloatingPointNumber<PRECISION> *>(n) = 623 converted.binary; 624 io.HandleRelativePosition(p - str); 625 // Set FP exception flags 626 if (converted.flags != decimal::ConversionResultFlags::Exact) { 627 RaiseFPExceptions(converted.flags); 628 } 629 return true; 630 } 631 632 template <int binaryPrecision> 633 RT_API_ATTRS decimal::ConversionToBinaryResult<binaryPrecision> 634 ConvertHexadecimal( 635 const char *&p, enum decimal::FortranRounding rounding, int expo) { 636 using RealType = decimal::BinaryFloatingPointNumber<binaryPrecision>; 637 using RawType = typename RealType::RawType; 638 bool isNegative{*p == '-'}; 639 constexpr RawType one{1}; 640 RawType signBit{0}; 641 if (isNegative) { 642 ++p; 643 signBit = one << (RealType::bits - 1); 644 } 645 RawType fraction{0}; 646 // Adjust the incoming binary P+/- exponent to shift the radix point 647 // to below the LSB and add in the bias. 648 expo += binaryPrecision - 1 + RealType::exponentBias; 649 // Input the fraction. 650 int roundingBit{0}; 651 int guardBit{0}; 652 for (; *p; ++p) { 653 fraction <<= 4; 654 expo -= 4; 655 if (*p >= '0' && *p <= '9') { 656 fraction |= *p - '0'; 657 } else if (*p >= 'A' && *p <= 'F') { 658 fraction |= *p - 'A' + 10; // data were normalized to capitals 659 } else { 660 break; 661 } 662 if (fraction >> binaryPrecision) { 663 while (fraction >> binaryPrecision) { 664 guardBit |= roundingBit; 665 roundingBit = (int)fraction & 1; 666 fraction >>= 1; 667 ++expo; 668 } 669 // Consume excess digits 670 while (*++p) { 671 if (*p == '0') { 672 } else if ((*p >= '1' && *p <= '9') || (*p >= 'A' && *p <= 'F')) { 673 guardBit = 1; 674 } else { 675 break; 676 } 677 } 678 break; 679 } 680 } 681 if (fraction) { 682 // Boost biased expo if too small 683 while (expo < 1) { 684 guardBit |= roundingBit; 685 roundingBit = (int)fraction & 1; 686 fraction >>= 1; 687 ++expo; 688 } 689 // Normalize 690 while (expo > 1 && !(fraction >> (binaryPrecision - 1))) { 691 fraction <<= 1; 692 --expo; 693 guardBit = roundingBit = 0; 694 } 695 } 696 // Rounding 697 bool increase{false}; 698 switch (rounding) { 699 case decimal::RoundNearest: // RN & RP 700 increase = roundingBit && (guardBit | ((int)fraction & 1)); 701 break; 702 case decimal::RoundUp: // RU 703 increase = !isNegative && (roundingBit | guardBit); 704 break; 705 case decimal::RoundDown: // RD 706 increase = isNegative && (roundingBit | guardBit); 707 break; 708 case decimal::RoundToZero: // RZ 709 break; 710 case decimal::RoundCompatible: // RC 711 increase = roundingBit != 0; 712 break; 713 } 714 if (increase) { 715 ++fraction; 716 if (fraction >> binaryPrecision) { 717 fraction >>= 1; 718 ++expo; 719 } 720 } 721 // Package & return result 722 constexpr RawType significandMask{(one << RealType::significandBits) - 1}; 723 int flags{(roundingBit | guardBit) ? decimal::Inexact : decimal::Exact}; 724 if (!fraction) { 725 expo = 0; 726 } else if (expo == 1 && !(fraction >> (binaryPrecision - 1))) { 727 expo = 0; // subnormal 728 flags |= decimal::Underflow; 729 } else if (expo >= RealType::maxExponent) { 730 if (rounding == decimal::RoundToZero || 731 (rounding == decimal::RoundDown && !isNegative) || 732 (rounding == decimal::RoundUp && isNegative)) { 733 expo = RealType::maxExponent - 1; // +/-HUGE() 734 fraction = significandMask; 735 } else { 736 expo = RealType::maxExponent; // +/-Inf 737 fraction = 0; 738 flags |= decimal::Overflow; 739 } 740 } else { 741 fraction &= significandMask; // remove explicit normalization unless x87 742 } 743 return decimal::ConversionToBinaryResult<binaryPrecision>{ 744 RealType{static_cast<RawType>(signBit | 745 static_cast<RawType>(expo) << RealType::significandBits | fraction)}, 746 static_cast<decimal::ConversionResultFlags>(flags)}; 747 } 748 749 template <int KIND> 750 RT_API_ATTRS bool EditCommonRealInput( 751 IoStatementState &io, const DataEdit &edit, void *n) { 752 constexpr int binaryPrecision{common::PrecisionOfRealKind(KIND)}; 753 if (TryFastPathRealDecimalInput<binaryPrecision>(io, edit, n)) { 754 return CheckCompleteListDirectedField(io, edit); 755 } 756 // Fast path wasn't available or didn't work; go the more general route 757 static constexpr int maxDigits{ 758 common::MaxDecimalConversionDigits(binaryPrecision)}; 759 static constexpr int bufferSize{maxDigits + 18}; 760 char buffer[bufferSize]; 761 auto scanned{ScanRealInput(buffer, maxDigits + 2, io, edit)}; 762 int got{scanned.got}; 763 if (got >= maxDigits + 2) { 764 io.GetIoErrorHandler().Crash("EditCommonRealInput: buffer was too small"); 765 return false; 766 } 767 if (got == 0) { 768 const auto &connection{io.GetConnectionState()}; 769 io.GetIoErrorHandler().SignalError(IostatBadRealInput, 770 "Bad real input data at column %d of record %d", 771 static_cast<int>(connection.positionInRecord + 1), 772 static_cast<int>(connection.currentRecordNumber)); 773 return false; 774 } 775 decimal::ConversionToBinaryResult<binaryPrecision> converted; 776 const char *p{buffer}; 777 if (scanned.isHexadecimal) { 778 buffer[got] = '\0'; 779 converted = ConvertHexadecimal<binaryPrecision>( 780 p, edit.modes.round, scanned.exponent); 781 } else { 782 bool hadExtra{got > maxDigits}; 783 int exponent{scanned.exponent}; 784 if (exponent != 0) { 785 buffer[got++] = 'e'; 786 if (exponent < 0) { 787 buffer[got++] = '-'; 788 exponent = -exponent; 789 } 790 if (exponent > 9999) { 791 exponent = 9999; // will convert to +/-Inf 792 } 793 if (exponent > 999) { 794 int dig{exponent / 1000}; 795 buffer[got++] = '0' + dig; 796 int rest{exponent - 1000 * dig}; 797 dig = rest / 100; 798 buffer[got++] = '0' + dig; 799 rest -= 100 * dig; 800 dig = rest / 10; 801 buffer[got++] = '0' + dig; 802 buffer[got++] = '0' + (rest - 10 * dig); 803 } else if (exponent > 99) { 804 int dig{exponent / 100}; 805 buffer[got++] = '0' + dig; 806 int rest{exponent - 100 * dig}; 807 dig = rest / 10; 808 buffer[got++] = '0' + dig; 809 buffer[got++] = '0' + (rest - 10 * dig); 810 } else if (exponent > 9) { 811 int dig{exponent / 10}; 812 buffer[got++] = '0' + dig; 813 buffer[got++] = '0' + (exponent - 10 * dig); 814 } else { 815 buffer[got++] = '0' + exponent; 816 } 817 } 818 buffer[got] = '\0'; 819 converted = decimal::ConvertToBinary<binaryPrecision>(p, edit.modes.round); 820 if (hadExtra) { 821 converted.flags = static_cast<enum decimal::ConversionResultFlags>( 822 converted.flags | decimal::Inexact); 823 } 824 } 825 if (*p) { // unprocessed junk after value 826 const auto &connection{io.GetConnectionState()}; 827 io.GetIoErrorHandler().SignalError(IostatBadRealInput, 828 "Trailing characters after real input data at column %d of record %d", 829 static_cast<int>(connection.positionInRecord + 1), 830 static_cast<int>(connection.currentRecordNumber)); 831 return false; 832 } 833 *reinterpret_cast<decimal::BinaryFloatingPointNumber<binaryPrecision> *>(n) = 834 converted.binary; 835 // Set FP exception flags 836 if (converted.flags != decimal::ConversionResultFlags::Exact) { 837 if (converted.flags & decimal::ConversionResultFlags::Overflow) { 838 io.GetIoErrorHandler().SignalError(IostatRealInputOverflow); 839 return false; 840 } 841 RaiseFPExceptions(converted.flags); 842 } 843 return CheckCompleteListDirectedField(io, edit); 844 } 845 846 template <int KIND> 847 RT_API_ATTRS bool EditRealInput( 848 IoStatementState &io, const DataEdit &edit, void *n) { 849 switch (edit.descriptor) { 850 case DataEdit::ListDirected: 851 if (IsNamelistNameOrSlash(io)) { 852 return false; 853 } 854 return EditCommonRealInput<KIND>(io, edit, n); 855 case DataEdit::ListDirectedRealPart: 856 case DataEdit::ListDirectedImaginaryPart: 857 case 'F': 858 case 'E': // incl. EN, ES, & EX 859 case 'D': 860 case 'G': 861 return EditCommonRealInput<KIND>(io, edit, n); 862 case 'B': 863 return EditBOZInput<1>(io, edit, n, 864 common::BitsForBinaryPrecision(common::PrecisionOfRealKind(KIND)) >> 3); 865 case 'O': 866 return EditBOZInput<3>(io, edit, n, 867 common::BitsForBinaryPrecision(common::PrecisionOfRealKind(KIND)) >> 3); 868 case 'Z': 869 return EditBOZInput<4>(io, edit, n, 870 common::BitsForBinaryPrecision(common::PrecisionOfRealKind(KIND)) >> 3); 871 case 'A': // legacy extension 872 return EditCharacterInput(io, edit, reinterpret_cast<char *>(n), KIND); 873 default: 874 io.GetIoErrorHandler().SignalError(IostatErrorInFormat, 875 "Data edit descriptor '%c' may not be used for REAL input", 876 edit.descriptor); 877 return false; 878 } 879 } 880 881 // 13.7.3 in Fortran 2018 882 RT_API_ATTRS bool EditLogicalInput( 883 IoStatementState &io, const DataEdit &edit, bool &x) { 884 switch (edit.descriptor) { 885 case DataEdit::ListDirected: 886 if (IsNamelistNameOrSlash(io)) { 887 return false; 888 } 889 break; 890 case 'L': 891 case 'G': 892 break; 893 default: 894 io.GetIoErrorHandler().SignalError(IostatErrorInFormat, 895 "Data edit descriptor '%c' may not be used for LOGICAL input", 896 edit.descriptor); 897 return false; 898 } 899 Fortran::common::optional<int> remaining{io.CueUpInput(edit)}; 900 Fortran::common::optional<char32_t> next{io.NextInField(remaining, edit)}; 901 if (next && *next == '.') { // skip optional period 902 next = io.NextInField(remaining, edit); 903 } 904 if (!next) { 905 io.GetIoErrorHandler().SignalError("Empty LOGICAL input field"); 906 return false; 907 } 908 switch (*next) { 909 case 'T': 910 case 't': 911 x = true; 912 break; 913 case 'F': 914 case 'f': 915 x = false; 916 break; 917 default: 918 io.GetIoErrorHandler().SignalError( 919 "Bad character '%lc' in LOGICAL input field", *next); 920 return false; 921 } 922 if (remaining) { // ignore the rest of a fixed-width field 923 io.HandleRelativePosition(*remaining); 924 } else if (edit.descriptor == DataEdit::ListDirected) { 925 while (io.NextInField(remaining, edit)) { // discard rest of field 926 } 927 } 928 return CheckCompleteListDirectedField(io, edit); 929 } 930 931 // See 13.10.3.1 paragraphs 7-9 in Fortran 2018 932 template <typename CHAR> 933 static RT_API_ATTRS bool EditDelimitedCharacterInput( 934 IoStatementState &io, CHAR *x, std::size_t length, char32_t delimiter) { 935 bool result{true}; 936 while (true) { 937 std::size_t byteCount{0}; 938 auto ch{io.GetCurrentChar(byteCount)}; 939 if (!ch) { 940 if (io.AdvanceRecord()) { 941 continue; 942 } else { 943 result = false; // EOF in character value 944 break; 945 } 946 } 947 io.HandleRelativePosition(byteCount); 948 if (*ch == delimiter) { 949 auto next{io.GetCurrentChar(byteCount)}; 950 if (next && *next == delimiter) { 951 // Repeated delimiter: use as character value 952 io.HandleRelativePosition(byteCount); 953 } else { 954 break; // closing delimiter 955 } 956 } 957 if (length > 0) { 958 *x++ = *ch; 959 --length; 960 } 961 } 962 Fortran::runtime::fill_n(x, length, ' '); 963 return result; 964 } 965 966 template <typename CHAR> 967 static RT_API_ATTRS bool EditListDirectedCharacterInput( 968 IoStatementState &io, CHAR *x, std::size_t length, const DataEdit &edit) { 969 std::size_t byteCount{0}; 970 auto ch{io.GetCurrentChar(byteCount)}; 971 if (ch && (*ch == '\'' || *ch == '"')) { 972 io.HandleRelativePosition(byteCount); 973 return EditDelimitedCharacterInput(io, x, length, *ch); 974 } 975 if (IsNamelistNameOrSlash(io) || io.GetConnectionState().IsAtEOF()) { 976 return false; 977 } 978 // Undelimited list-directed character input: stop at a value separator 979 // or the end of the current record. Subtlety: the "remaining" count 980 // here is a dummy that's used to avoid the interpretation of separators 981 // in NextInField. 982 Fortran::common::optional<int> remaining{length > 0 ? maxUTF8Bytes : 0}; 983 while (Fortran::common::optional<char32_t> next{ 984 io.NextInField(remaining, edit)}) { 985 bool isSep{false}; 986 switch (*next) { 987 case ' ': 988 case '\t': 989 case '/': 990 isSep = true; 991 break; 992 case '&': 993 case '$': 994 isSep = edit.IsNamelist(); 995 break; 996 case ',': 997 isSep = !(edit.modes.editingFlags & decimalComma); 998 break; 999 case ';': 1000 isSep = !!(edit.modes.editingFlags & decimalComma); 1001 break; 1002 default: 1003 break; 1004 } 1005 if (isSep) { 1006 remaining = 0; 1007 } else { 1008 *x++ = *next; 1009 remaining = --length > 0 ? maxUTF8Bytes : 0; 1010 } 1011 } 1012 Fortran::runtime::fill_n(x, length, ' '); 1013 return true; 1014 } 1015 1016 template <typename CHAR> 1017 RT_API_ATTRS bool EditCharacterInput(IoStatementState &io, const DataEdit &edit, 1018 CHAR *x, std::size_t lengthChars) { 1019 switch (edit.descriptor) { 1020 case DataEdit::ListDirected: 1021 return EditListDirectedCharacterInput(io, x, lengthChars, edit); 1022 case 'A': 1023 case 'G': 1024 break; 1025 case 'B': 1026 return EditBOZInput<1>(io, edit, x, lengthChars * sizeof *x); 1027 case 'O': 1028 return EditBOZInput<3>(io, edit, x, lengthChars * sizeof *x); 1029 case 'Z': 1030 return EditBOZInput<4>(io, edit, x, lengthChars * sizeof *x); 1031 default: 1032 io.GetIoErrorHandler().SignalError(IostatErrorInFormat, 1033 "Data edit descriptor '%c' may not be used with a CHARACTER data item", 1034 edit.descriptor); 1035 return false; 1036 } 1037 const ConnectionState &connection{io.GetConnectionState()}; 1038 std::size_t remainingChars{lengthChars}; 1039 // Skip leading characters. 1040 // Their bytes don't count towards INQUIRE(IOLENGTH=). 1041 std::size_t skipChars{0}; 1042 if (edit.width && *edit.width > 0) { 1043 remainingChars = *edit.width; 1044 if (remainingChars > lengthChars) { 1045 skipChars = remainingChars - lengthChars; 1046 } 1047 } 1048 // When the field is wider than the variable, we drop the leading 1049 // characters. When the variable is wider than the field, there can be 1050 // trailing padding or an EOR condition. 1051 const char *input{nullptr}; 1052 std::size_t readyBytes{0}; 1053 // Transfer payload bytes; these do count. 1054 while (remainingChars > 0) { 1055 if (readyBytes == 0) { 1056 readyBytes = io.GetNextInputBytes(input); 1057 if (readyBytes == 0 || 1058 (readyBytes < remainingChars && edit.modes.nonAdvancing)) { 1059 if (io.CheckForEndOfRecord(readyBytes)) { 1060 if (readyBytes == 0) { 1061 // PAD='YES' and no more data 1062 Fortran::runtime::fill_n(x, lengthChars, ' '); 1063 return !io.GetIoErrorHandler().InError(); 1064 } else { 1065 // Do partial read(s) then pad on last iteration 1066 } 1067 } else { 1068 return !io.GetIoErrorHandler().InError(); 1069 } 1070 } 1071 } 1072 std::size_t chunkBytes; 1073 std::size_t chunkChars{1}; 1074 bool skipping{skipChars > 0}; 1075 if (connection.isUTF8) { 1076 chunkBytes = MeasureUTF8Bytes(*input); 1077 if (skipping) { 1078 --skipChars; 1079 } else if (auto ucs{DecodeUTF8(input)}) { 1080 if ((sizeof *x == 1 && *ucs > 0xff) || 1081 (sizeof *x == 2 && *ucs > 0xffff)) { 1082 *x++ = '?'; 1083 } else { 1084 *x++ = *ucs; 1085 } 1086 --lengthChars; 1087 } else if (chunkBytes == 0) { 1088 // error recovery: skip bad encoding 1089 chunkBytes = 1; 1090 } 1091 } else if (connection.internalIoCharKind > 1) { 1092 // Reading from non-default character internal unit 1093 chunkBytes = connection.internalIoCharKind; 1094 if (skipping) { 1095 --skipChars; 1096 } else { 1097 char32_t buffer{0}; 1098 std::memcpy(&buffer, input, chunkBytes); 1099 if ((sizeof *x == 1 && buffer > 0xff) || 1100 (sizeof *x == 2 && buffer > 0xffff)) { 1101 *x++ = '?'; 1102 } else { 1103 *x++ = buffer; 1104 } 1105 --lengthChars; 1106 } 1107 } else if constexpr (sizeof *x > 1) { 1108 // Read single byte with expansion into multi-byte CHARACTER 1109 chunkBytes = 1; 1110 if (skipping) { 1111 --skipChars; 1112 } else { 1113 *x++ = static_cast<unsigned char>(*input); 1114 --lengthChars; 1115 } 1116 } else { // single bytes -> default CHARACTER 1117 if (skipping) { 1118 chunkBytes = std::min<std::size_t>(skipChars, readyBytes); 1119 chunkChars = chunkBytes; 1120 skipChars -= chunkChars; 1121 } else { 1122 chunkBytes = std::min<std::size_t>(remainingChars, readyBytes); 1123 chunkBytes = std::min<std::size_t>(lengthChars, chunkBytes); 1124 chunkChars = chunkBytes; 1125 std::memcpy(x, input, chunkBytes); 1126 x += chunkBytes; 1127 lengthChars -= chunkChars; 1128 } 1129 } 1130 input += chunkBytes; 1131 remainingChars -= chunkChars; 1132 if (!skipping) { 1133 io.GotChar(chunkBytes); 1134 } 1135 io.HandleRelativePosition(chunkBytes); 1136 readyBytes -= chunkBytes; 1137 } 1138 // Pad the remainder of the input variable, if any. 1139 Fortran::runtime::fill_n(x, lengthChars, ' '); 1140 return CheckCompleteListDirectedField(io, edit); 1141 } 1142 1143 template RT_API_ATTRS bool EditRealInput<2>( 1144 IoStatementState &, const DataEdit &, void *); 1145 template RT_API_ATTRS bool EditRealInput<3>( 1146 IoStatementState &, const DataEdit &, void *); 1147 template RT_API_ATTRS bool EditRealInput<4>( 1148 IoStatementState &, const DataEdit &, void *); 1149 template RT_API_ATTRS bool EditRealInput<8>( 1150 IoStatementState &, const DataEdit &, void *); 1151 template RT_API_ATTRS bool EditRealInput<10>( 1152 IoStatementState &, const DataEdit &, void *); 1153 // TODO: double/double 1154 template RT_API_ATTRS bool EditRealInput<16>( 1155 IoStatementState &, const DataEdit &, void *); 1156 1157 template RT_API_ATTRS bool EditCharacterInput( 1158 IoStatementState &, const DataEdit &, char *, std::size_t); 1159 template RT_API_ATTRS bool EditCharacterInput( 1160 IoStatementState &, const DataEdit &, char16_t *, std::size_t); 1161 template RT_API_ATTRS bool EditCharacterInput( 1162 IoStatementState &, const DataEdit &, char32_t *, std::size_t); 1163 1164 RT_OFFLOAD_API_GROUP_END 1165 } // namespace Fortran::runtime::io 1166