1 //===-- runtime/format-implementation.h -------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 // Implements out-of-line member functions of template class FormatControl 10 11 #ifndef FORTRAN_RUNTIME_FORMAT_IMPLEMENTATION_H_ 12 #define FORTRAN_RUNTIME_FORMAT_IMPLEMENTATION_H_ 13 14 #include "emit-encoded.h" 15 #include "format.h" 16 #include "io-stmt.h" 17 #include "memory.h" 18 #include "flang/Common/format.h" 19 #include "flang/Decimal/decimal.h" 20 #include "flang/Runtime/main.h" 21 #include <algorithm> 22 #include <cstring> 23 #include <limits> 24 25 namespace Fortran::runtime::io { 26 27 template <typename CONTEXT> 28 RT_API_ATTRS FormatControl<CONTEXT>::FormatControl(const Terminator &terminator, 29 const CharType *format, std::size_t formatLength, 30 const Descriptor *formatDescriptor, int maxHeight) 31 : maxHeight_{static_cast<std::uint8_t>(maxHeight)}, format_{format}, 32 formatLength_{static_cast<int>(formatLength)} { 33 RUNTIME_CHECK(terminator, maxHeight == maxHeight_); 34 if (!format && formatDescriptor) { 35 // The format is a character array passed via a descriptor. 36 std::size_t elements{formatDescriptor->Elements()}; 37 std::size_t elementBytes{formatDescriptor->ElementBytes()}; 38 formatLength = elements * elementBytes / sizeof(CharType); 39 formatLength_ = static_cast<int>(formatLength); 40 if (formatDescriptor->IsContiguous()) { 41 // Treat the contiguous array as a single character value. 42 format_ = const_cast<const CharType *>( 43 reinterpret_cast<CharType *>(formatDescriptor->raw().base_addr)); 44 } else { 45 // Concatenate its elements into a temporary array. 46 char *p{reinterpret_cast<char *>( 47 AllocateMemoryOrCrash(terminator, formatLength * sizeof(CharType)))}; 48 format_ = p; 49 SubscriptValue at[maxRank]; 50 formatDescriptor->GetLowerBounds(at); 51 for (std::size_t j{0}; j < elements; ++j) { 52 std::memcpy(p, formatDescriptor->Element<char>(at), elementBytes); 53 p += elementBytes; 54 formatDescriptor->IncrementSubscripts(at); 55 } 56 freeFormat_ = true; 57 } 58 } 59 RUNTIME_CHECK( 60 terminator, formatLength == static_cast<std::size_t>(formatLength_)); 61 stack_[0].start = offset_; 62 stack_[0].remaining = Iteration::unlimited; // 13.4(8) 63 } 64 65 template <typename CONTEXT> 66 RT_API_ATTRS int FormatControl<CONTEXT>::GetIntField( 67 IoErrorHandler &handler, CharType firstCh, bool *hadError) { 68 CharType ch{firstCh ? firstCh : PeekNext()}; 69 bool negate{ch == '-'}; 70 if (negate || ch == '+') { 71 if (firstCh) { 72 firstCh = '\0'; 73 } else { 74 ++offset_; 75 } 76 ch = PeekNext(); 77 } 78 if (ch < '0' || ch > '9') { 79 handler.SignalError(IostatErrorInFormat, 80 "Invalid FORMAT: integer expected at '%c'", static_cast<char>(ch)); 81 if (hadError) { 82 *hadError = true; 83 } 84 return 0; 85 } 86 int result{0}; 87 while (ch >= '0' && ch <= '9') { 88 constexpr int tenth{std::numeric_limits<int>::max() / 10}; 89 if (result > tenth || 90 ch - '0' > std::numeric_limits<int>::max() - 10 * result) { 91 handler.SignalError( 92 IostatErrorInFormat, "FORMAT integer field out of range"); 93 if (hadError) { 94 *hadError = true; 95 } 96 return result; 97 } 98 result = 10 * result + ch - '0'; 99 if (firstCh) { 100 firstCh = '\0'; 101 } else { 102 ++offset_; 103 } 104 ch = PeekNext(); 105 } 106 if (negate && (result *= -1) > 0) { 107 handler.SignalError( 108 IostatErrorInFormat, "FORMAT integer field out of range"); 109 if (hadError) { 110 *hadError = true; 111 } 112 } 113 return result; 114 } 115 116 // Xn, TRn, TLn 117 template <typename CONTEXT> 118 static RT_API_ATTRS bool RelativeTabbing(CONTEXT &context, int n) { 119 ConnectionState &connection{context.GetConnectionState()}; 120 if constexpr (std::is_same_v<CONTEXT, 121 ExternalFormattedIoStatementState<Direction::Input>> || 122 std::is_same_v<CONTEXT, 123 ExternalFormattedIoStatementState<Direction::Output>>) { 124 if (n != 0 && connection.isUTF8) { 125 const char *p{}; 126 if (n > 0) { // Xn or TRn 127 // Skip 'n' multi-byte characters. If that's more than are in the 128 // current record, that's valid -- the program can position past the 129 // end and then reposition back with Tn or TLn. 130 std::size_t bytesLeft{context.ViewBytesInRecord(p, true)}; 131 for (; n > 0 && bytesLeft && p; --n) { 132 std::size_t byteCount{MeasureUTF8Bytes(*p)}; 133 if (byteCount > bytesLeft) { 134 break; 135 } 136 context.HandleRelativePosition(byteCount); 137 bytesLeft -= byteCount; 138 // Don't call GotChar(byteCount), these don't count towards SIZE= 139 p += byteCount; 140 } 141 } else { // n < 0: TLn 142 n = -n; 143 if (std::int64_t excess{connection.positionInRecord - 144 connection.recordLength.value_or(connection.positionInRecord)}; 145 excess > 0) { 146 // Have tabbed past the end of the record 147 if (excess >= n) { 148 context.HandleRelativePosition(-n); 149 return true; 150 } 151 context.HandleRelativePosition(-excess); 152 n -= excess; 153 } 154 std::size_t bytesLeft{context.ViewBytesInRecord(p, false)}; 155 // Go back 'n' multi-byte characters. 156 for (; n > 0 && bytesLeft && p; --n) { 157 std::size_t byteCount{MeasurePreviousUTF8Bytes(p, bytesLeft)}; 158 context.HandleRelativePosition(-byteCount); 159 bytesLeft -= byteCount; 160 p -= byteCount; 161 } 162 } 163 } 164 } 165 if (connection.internalIoCharKind > 1) { 166 n *= connection.internalIoCharKind; 167 } 168 context.HandleRelativePosition(n); 169 return true; 170 } 171 172 // Tn 173 template <typename CONTEXT> 174 static RT_API_ATTRS bool AbsoluteTabbing(CONTEXT &context, int n) { 175 ConnectionState &connection{context.GetConnectionState()}; 176 n = n > 0 ? n - 1 : 0; // convert 1-based position to 0-based offset 177 if constexpr (std::is_same_v<CONTEXT, 178 ExternalFormattedIoStatementState<Direction::Input>> || 179 std::is_same_v<CONTEXT, 180 ExternalFormattedIoStatementState<Direction::Output>>) { 181 if (connection.isUTF8) { 182 // Reset to the beginning of the record, then TR(n-1) 183 connection.HandleAbsolutePosition(0); 184 return RelativeTabbing(context, n); 185 } 186 } 187 if (connection.internalIoCharKind > 1) { 188 n *= connection.internalIoCharKind; 189 } 190 context.HandleAbsolutePosition(n); 191 return true; 192 } 193 194 template <typename CONTEXT> 195 static RT_API_ATTRS void HandleControl( 196 CONTEXT &context, char ch, char next, int n) { 197 MutableModes &modes{context.mutableModes()}; 198 switch (ch) { 199 case 'B': 200 if (next == 'Z') { 201 modes.editingFlags |= blankZero; 202 return; 203 } 204 if (next == 'N') { 205 modes.editingFlags &= ~blankZero; 206 return; 207 } 208 break; 209 case 'D': 210 if (next == 'C') { 211 modes.editingFlags |= decimalComma; 212 return; 213 } 214 if (next == 'P') { 215 modes.editingFlags &= ~decimalComma; 216 return; 217 } 218 break; 219 case 'P': 220 if (!next) { 221 modes.scale = n; // kP - decimal scaling by 10**k 222 return; 223 } 224 break; 225 case 'R': 226 switch (next) { 227 case 'N': 228 modes.round = decimal::RoundNearest; 229 return; 230 case 'Z': 231 modes.round = decimal::RoundToZero; 232 return; 233 case 'U': 234 modes.round = decimal::RoundUp; 235 return; 236 case 'D': 237 modes.round = decimal::RoundDown; 238 return; 239 case 'C': 240 modes.round = decimal::RoundCompatible; 241 return; 242 case 'P': 243 modes.round = executionEnvironment.defaultOutputRoundingMode; 244 return; 245 default: 246 break; 247 } 248 break; 249 case 'X': 250 if (!next && RelativeTabbing(context, n)) { 251 return; 252 } 253 break; 254 case 'S': 255 if (next == 'P') { 256 modes.editingFlags |= signPlus; 257 return; 258 } 259 if (!next || next == 'S') { 260 modes.editingFlags &= ~signPlus; 261 return; 262 } 263 break; 264 case 'T': { 265 if (!next) { // Tn 266 if (AbsoluteTabbing(context, n)) { 267 return; 268 } 269 } else if (next == 'R' || next == 'L') { // TRn / TLn 270 if (RelativeTabbing(context, next == 'L' ? -n : n)) { 271 return; 272 } 273 } 274 } break; 275 default: 276 break; 277 } 278 if (next) { 279 context.SignalError(IostatErrorInFormat, 280 "Unknown '%c%c' edit descriptor in FORMAT", ch, next); 281 } else { 282 context.SignalError( 283 IostatErrorInFormat, "Unknown '%c' edit descriptor in FORMAT", ch); 284 } 285 } 286 287 // Locates the next data edit descriptor in the format. 288 // Handles all repetition counts and control edit descriptors. 289 // Generally assumes that the format string has survived the common 290 // format validator gauntlet. 291 template <typename CONTEXT> 292 RT_API_ATTRS int FormatControl<CONTEXT>::CueUpNextDataEdit( 293 Context &context, bool stop) { 294 bool hitUnlimitedLoopEnd{false}; 295 // Do repetitions remain on an unparenthesized data edit? 296 while (height_ > 1 && format_[stack_[height_ - 1].start] != '(') { 297 offset_ = stack_[height_ - 1].start; 298 int repeat{stack_[height_ - 1].remaining}; 299 --height_; 300 if (repeat > 0) { 301 return repeat; 302 } 303 } 304 while (true) { 305 Fortran::common::optional<int> repeat; 306 bool unlimited{false}; 307 auto maybeReversionPoint{offset_}; 308 CharType ch{GetNextChar(context)}; 309 while (ch == ',' || ch == ':') { 310 // Skip commas, and don't complain if they're missing; the format 311 // validator does that. 312 if (stop && ch == ':') { 313 return 0; 314 } 315 ch = GetNextChar(context); 316 } 317 if (ch == '-' || ch == '+' || (ch >= '0' && ch <= '9')) { 318 bool hadSign{ch == '-' || ch == '+'}; 319 repeat = GetIntField(context, ch); 320 ch = GetNextChar(context); 321 if (hadSign && ch != 'p' && ch != 'P') { 322 ReportBadFormat(context, 323 "Invalid FORMAT: signed integer may appear only before 'P", 324 maybeReversionPoint); 325 return 0; 326 } 327 } else if (ch == '*') { 328 unlimited = true; 329 ch = GetNextChar(context); 330 if (ch != '(') { 331 ReportBadFormat(context, 332 "Invalid FORMAT: '*' may appear only before '('", 333 maybeReversionPoint); 334 return 0; 335 } 336 if (height_ != 1) { 337 ReportBadFormat(context, 338 "Invalid FORMAT: '*' must be nested in exactly one set of " 339 "parentheses", 340 maybeReversionPoint); 341 return 0; 342 } 343 } 344 ch = Capitalize(ch); 345 if (ch == '(') { 346 if (height_ >= maxHeight_) { 347 ReportBadFormat(context, 348 "FORMAT stack overflow: too many nested parentheses", 349 maybeReversionPoint); 350 return 0; 351 } 352 stack_[height_].start = offset_ - 1; // the '(' 353 RUNTIME_CHECK(context, format_[stack_[height_].start] == '('); 354 if (unlimited || height_ == 0) { 355 stack_[height_].remaining = Iteration::unlimited; 356 } else if (repeat) { 357 if (*repeat <= 0) { 358 *repeat = 1; // error recovery 359 } 360 stack_[height_].remaining = *repeat - 1; 361 } else { 362 stack_[height_].remaining = 0; 363 } 364 if (height_ == 1 && !hitEnd_) { 365 // Subtle point (F'2018 13.4 para 9): the last parenthesized group 366 // at height 1 becomes the restart point after control reaches the 367 // end of the format, including its repeat count. 368 stack_[0].start = maybeReversionPoint; 369 } 370 ++height_; 371 } else if (height_ == 0) { 372 ReportBadFormat(context, "FORMAT lacks initial '('", maybeReversionPoint); 373 return 0; 374 } else if (ch == ')') { 375 if (height_ == 1) { 376 hitEnd_ = true; 377 if (stop) { 378 return 0; // end of FORMAT and no data items remain 379 } 380 context.AdvanceRecord(); // implied / before rightmost ) 381 } 382 auto restart{stack_[height_ - 1].start}; 383 if (format_[restart] == '(') { 384 ++restart; 385 } 386 if (stack_[height_ - 1].remaining == Iteration::unlimited) { 387 if (height_ > 1 && GetNextChar(context) != ')') { 388 ReportBadFormat(context, 389 "Unlimited repetition in FORMAT may not be followed by more " 390 "items", 391 restart); 392 return 0; 393 } 394 if (hitUnlimitedLoopEnd) { 395 ReportBadFormat(context, 396 "Unlimited repetition in FORMAT lacks data edit descriptors", 397 restart); 398 return 0; 399 } 400 hitUnlimitedLoopEnd = true; 401 offset_ = restart; 402 } else if (stack_[height_ - 1].remaining-- > 0) { 403 offset_ = restart; 404 } else { 405 --height_; 406 } 407 } else if (ch == '\'' || ch == '"') { 408 // Quoted 'character literal' 409 CharType quote{ch}; 410 auto start{offset_}; 411 while (offset_ < formatLength_ && format_[offset_] != quote) { 412 ++offset_; 413 } 414 if (offset_ >= formatLength_) { 415 ReportBadFormat(context, 416 "FORMAT missing closing quote on character literal", 417 maybeReversionPoint); 418 return 0; 419 } 420 ++offset_; 421 std::size_t chars{ 422 static_cast<std::size_t>(&format_[offset_] - &format_[start])}; 423 if (offset_ < formatLength_ && format_[offset_] == quote) { 424 // subtle: handle doubled quote character in a literal by including 425 // the first in the output, then treating the second as the start 426 // of another character literal. 427 } else { 428 --chars; 429 } 430 EmitAscii(context, format_ + start, chars); 431 } else if (ch == 'H') { 432 // 9HHOLLERITH 433 if (!repeat || *repeat < 1 || offset_ + *repeat > formatLength_) { 434 ReportBadFormat(context, "Invalid width on Hollerith in FORMAT", 435 maybeReversionPoint); 436 return 0; 437 } 438 EmitAscii(context, format_ + offset_, static_cast<std::size_t>(*repeat)); 439 offset_ += *repeat; 440 } else if (ch >= 'A' && ch <= 'Z') { 441 int start{offset_ - 1}; 442 CharType next{'\0'}; 443 if (ch != 'P') { // 1PE5.2 - comma not required (C1302) 444 CharType peek{Capitalize(PeekNext())}; 445 if (peek >= 'A' && peek <= 'Z') { 446 if ((ch == 'A' && peek == 'T' /* anticipate F'202X AT editing */) || 447 ch == 'B' || ch == 'D' || ch == 'E' || ch == 'R' || ch == 'S' || 448 ch == 'T') { 449 // Assume a two-letter edit descriptor 450 next = peek; 451 ++offset_; 452 } else { 453 // extension: assume a comma between 'ch' and 'peek' 454 } 455 } 456 } 457 if ((!next && 458 (ch == 'A' || ch == 'I' || ch == 'B' || ch == 'E' || ch == 'D' || 459 ch == 'O' || ch == 'Z' || ch == 'F' || ch == 'G' || 460 ch == 'L')) || 461 (ch == 'E' && (next == 'N' || next == 'S' || next == 'X')) || 462 (ch == 'D' && next == 'T')) { 463 // Data edit descriptor found 464 offset_ = start; 465 return repeat && *repeat > 0 ? *repeat : 1; 466 } else { 467 // Control edit descriptor 468 if (ch == 'T') { // Tn, TLn, TRn 469 repeat = GetIntField(context); 470 } 471 HandleControl(context, static_cast<char>(ch), static_cast<char>(next), 472 repeat ? *repeat : 1); 473 } 474 } else if (ch == '/') { 475 context.AdvanceRecord(repeat && *repeat > 0 ? *repeat : 1); 476 } else if (ch == '$' || ch == '\\') { 477 context.mutableModes().nonAdvancing = true; 478 } else if (ch == '\t' || ch == '\v') { 479 // Tabs (extension) 480 // TODO: any other raw characters? 481 EmitAscii(context, format_ + offset_ - 1, 1); 482 } else { 483 ReportBadFormat( 484 context, "Invalid character in FORMAT", maybeReversionPoint); 485 return 0; 486 } 487 } 488 } 489 490 // Returns the next data edit descriptor 491 template <typename CONTEXT> 492 RT_API_ATTRS Fortran::common::optional<DataEdit> 493 FormatControl<CONTEXT>::GetNextDataEdit(Context &context, int maxRepeat) { 494 int repeat{CueUpNextDataEdit(context)}; 495 auto start{offset_}; 496 DataEdit edit; 497 edit.modes = context.mutableModes(); 498 // Handle repeated nonparenthesized edit descriptors 499 edit.repeat = std::min(repeat, maxRepeat); // 0 if maxRepeat==0 500 if (repeat > maxRepeat) { 501 stack_[height_].start = start; // after repeat count 502 stack_[height_].remaining = repeat - edit.repeat; 503 ++height_; 504 } 505 edit.descriptor = static_cast<char>(Capitalize(GetNextChar(context))); 506 if (edit.descriptor == 'D' && Capitalize(PeekNext()) == 'T') { 507 // DT['iotype'][(v_list)] defined I/O 508 edit.descriptor = DataEdit::DefinedDerivedType; 509 ++offset_; 510 if (auto quote{static_cast<char>(PeekNext())}; 511 quote == '\'' || quote == '"') { 512 // Capture the quoted 'iotype' 513 bool ok{false}; 514 for (++offset_; offset_ < formatLength_;) { 515 auto ch{static_cast<char>(format_[offset_++])}; 516 if (ch == quote && 517 (offset_ == formatLength_ || 518 static_cast<char>(format_[offset_]) != quote)) { 519 ok = true; 520 break; // that was terminating quote 521 } 522 if (edit.ioTypeChars >= edit.maxIoTypeChars) { 523 ReportBadFormat(context, "Excessive DT'iotype' in FORMAT", start); 524 return Fortran::common::nullopt; 525 } 526 edit.ioType[edit.ioTypeChars++] = ch; 527 if (ch == quote) { 528 ++offset_; 529 } 530 } 531 if (!ok) { 532 ReportBadFormat(context, "Unclosed DT'iotype' in FORMAT", start); 533 return Fortran::common::nullopt; 534 } 535 } 536 if (PeekNext() == '(') { 537 // Capture the v_list arguments 538 bool ok{false}; 539 for (++offset_; offset_ < formatLength_;) { 540 bool hadError{false}; 541 int n{GetIntField(context, '\0', &hadError)}; 542 if (hadError) { 543 ok = false; 544 break; 545 } 546 if (edit.vListEntries >= edit.maxVListEntries) { 547 ReportBadFormat(context, "Excessive DT(v_list) in FORMAT", start); 548 return Fortran::common::nullopt; 549 } 550 edit.vList[edit.vListEntries++] = n; 551 auto ch{static_cast<char>(GetNextChar(context))}; 552 if (ch != ',') { 553 ok = ch == ')'; 554 break; 555 } 556 } 557 if (!ok) { 558 ReportBadFormat(context, "Unclosed DT(v_list) in FORMAT", start); 559 return Fortran::common::nullopt; 560 } 561 } 562 } else { // not DT'iotype' 563 if (edit.descriptor == 'E') { 564 if (auto next{static_cast<char>(Capitalize(PeekNext()))}; 565 next == 'N' || next == 'S' || next == 'X') { 566 edit.variation = next; 567 ++offset_; 568 } 569 } 570 // Width is optional for A[w] in the standard and optional 571 // for Lw in most compilers. 572 // Intel & (presumably, from bug report) Fujitsu allow 573 // a missing 'w' & 'd'/'m' for other edit descriptors -- but not 574 // 'd'/'m' with a missing 'w' -- and so interpret "(E)" as "(E0)". 575 if (CharType ch{PeekNext()}; (ch >= '0' && ch <= '9') || ch == '.') { 576 edit.width = GetIntField(context); 577 if constexpr (std::is_base_of_v<InputStatementState, CONTEXT>) { 578 if (edit.width.value_or(-1) == 0) { 579 ReportBadFormat(context, "Input field width is zero", start); 580 } 581 } 582 if (PeekNext() == '.') { 583 ++offset_; 584 edit.digits = GetIntField(context); 585 if (CharType ch{PeekNext()}; 586 ch == 'e' || ch == 'E' || ch == 'd' || ch == 'D') { 587 ++offset_; 588 edit.expoDigits = GetIntField(context); 589 } 590 } 591 } 592 } 593 return edit; 594 } 595 596 template <typename CONTEXT> 597 RT_API_ATTRS void FormatControl<CONTEXT>::Finish(Context &context) { 598 CueUpNextDataEdit(context, true /* stop at colon or end of FORMAT */); 599 if (freeFormat_) { 600 FreeMemory(const_cast<CharType *>(format_)); 601 } 602 } 603 } // namespace Fortran::runtime::io 604 #endif // FORTRAN_RUNTIME_FORMAT_IMPLEMENTATION_H_ 605