1 //===-- lib/Parser/prescan.cpp --------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "prescan.h" 10 #include "flang/Common/idioms.h" 11 #include "flang/Parser/characters.h" 12 #include "flang/Parser/message.h" 13 #include "flang/Parser/preprocessor.h" 14 #include "flang/Parser/source.h" 15 #include "flang/Parser/token-sequence.h" 16 #include "llvm/Support/raw_ostream.h" 17 #include <cstddef> 18 #include <cstring> 19 #include <utility> 20 #include <vector> 21 22 namespace Fortran::parser { 23 24 using common::LanguageFeature; 25 26 static constexpr int maxPrescannerNesting{100}; 27 28 Prescanner::Prescanner(Messages &messages, CookedSource &cooked, 29 Preprocessor &preprocessor, common::LanguageFeatureControl lfc) 30 : messages_{messages}, cooked_{cooked}, preprocessor_{preprocessor}, 31 allSources_{preprocessor_.allSources()}, features_{lfc}, 32 backslashFreeFormContinuation_{preprocessor.AnyDefinitions()}, 33 encoding_{allSources_.encoding()} {} 34 35 Prescanner::Prescanner(const Prescanner &that, Preprocessor &prepro, 36 bool isNestedInIncludeDirective) 37 : messages_{that.messages_}, cooked_{that.cooked_}, preprocessor_{prepro}, 38 allSources_{that.allSources_}, features_{that.features_}, 39 preprocessingOnly_{that.preprocessingOnly_}, 40 expandIncludeLines_{that.expandIncludeLines_}, 41 isNestedInIncludeDirective_{isNestedInIncludeDirective}, 42 backslashFreeFormContinuation_{that.backslashFreeFormContinuation_}, 43 inFixedForm_{that.inFixedForm_}, 44 fixedFormColumnLimit_{that.fixedFormColumnLimit_}, 45 encoding_{that.encoding_}, 46 prescannerNesting_{that.prescannerNesting_ + 1}, 47 skipLeadingAmpersand_{that.skipLeadingAmpersand_}, 48 compilerDirectiveBloomFilter_{that.compilerDirectiveBloomFilter_}, 49 compilerDirectiveSentinels_{that.compilerDirectiveSentinels_} {} 50 51 // Returns number of bytes to skip 52 static inline int IsSpace(const char *p) { 53 if (*p == ' ') { 54 return 1; 55 } else if (*p == '\xa0') { // LATIN-1 NBSP non-breaking space 56 return 1; 57 } else if (p[0] == '\xc2' && p[1] == '\xa0') { // UTF-8 NBSP 58 return 2; 59 } else { 60 return 0; 61 } 62 } 63 64 static inline int IsSpaceOrTab(const char *p) { 65 return *p == '\t' ? 1 : IsSpace(p); 66 } 67 68 static inline constexpr bool IsFixedFormCommentChar(char ch) { 69 return ch == '!' || ch == '*' || ch == 'C' || ch == 'c'; 70 } 71 72 static void NormalizeCompilerDirectiveCommentMarker(TokenSequence &dir) { 73 char *p{dir.GetMutableCharData()}; 74 char *limit{p + dir.SizeInChars()}; 75 for (; p < limit; ++p) { 76 if (*p != ' ') { 77 CHECK(IsFixedFormCommentChar(*p)); 78 *p = '!'; 79 return; 80 } 81 } 82 DIE("compiler directive all blank"); 83 } 84 85 void Prescanner::Prescan(ProvenanceRange range) { 86 startProvenance_ = range.start(); 87 start_ = allSources_.GetSource(range); 88 CHECK(start_); 89 limit_ = start_ + range.size(); 90 nextLine_ = start_; 91 const bool beganInFixedForm{inFixedForm_}; 92 if (prescannerNesting_ > maxPrescannerNesting) { 93 Say(GetProvenance(start_), 94 "too many nested INCLUDE/#include files, possibly circular"_err_en_US); 95 return; 96 } 97 while (!IsAtEnd()) { 98 Statement(); 99 } 100 if (inFixedForm_ != beganInFixedForm) { 101 std::string dir{"!dir$ "}; 102 if (beganInFixedForm) { 103 dir += "fixed"; 104 } else { 105 dir += "free"; 106 } 107 dir += '\n'; 108 TokenSequence tokens{dir, allSources_.AddCompilerInsertion(dir).start()}; 109 tokens.Emit(cooked_); 110 } 111 } 112 113 void Prescanner::Statement() { 114 TokenSequence tokens; 115 const char *statementStart{nextLine_}; 116 LineClassification line{ClassifyLine(statementStart)}; 117 switch (line.kind) { 118 case LineClassification::Kind::Comment: 119 nextLine_ += line.payloadOffset; // advance to '!' or newline 120 NextLine(); 121 return; 122 case LineClassification::Kind::IncludeLine: 123 FortranInclude(nextLine_ + line.payloadOffset); 124 NextLine(); 125 return; 126 case LineClassification::Kind::ConditionalCompilationDirective: 127 case LineClassification::Kind::IncludeDirective: 128 preprocessor_.Directive(TokenizePreprocessorDirective(), *this); 129 afterPreprocessingDirective_ = true; 130 skipLeadingAmpersand_ |= !inFixedForm_; 131 return; 132 case LineClassification::Kind::PreprocessorDirective: 133 preprocessor_.Directive(TokenizePreprocessorDirective(), *this); 134 afterPreprocessingDirective_ = true; 135 // Don't set skipLeadingAmpersand_ 136 return; 137 case LineClassification::Kind::DefinitionDirective: 138 preprocessor_.Directive(TokenizePreprocessorDirective(), *this); 139 // Don't set afterPreprocessingDirective_ or skipLeadingAmpersand_ 140 return; 141 case LineClassification::Kind::CompilerDirective: { 142 directiveSentinel_ = line.sentinel; 143 CHECK(InCompilerDirective()); 144 BeginStatementAndAdvance(); 145 if (inFixedForm_) { 146 CHECK(IsFixedFormCommentChar(*at_)); 147 } else { 148 while (int n{IsSpaceOrTab(at_)}) { 149 at_ += n, ++column_; 150 } 151 CHECK(*at_ == '!'); 152 } 153 std::optional<int> condOffset; 154 if (directiveSentinel_[0] == '$' && directiveSentinel_[1] == '\0') { 155 // OpenMP conditional compilation line. 156 condOffset = 2; 157 } else if (directiveSentinel_[0] == '@' && directiveSentinel_[1] == 'c' && 158 directiveSentinel_[2] == 'u' && directiveSentinel_[3] == 'f' && 159 directiveSentinel_[4] == '\0') { 160 // CUDA conditional compilation line. 161 condOffset = 5; 162 } 163 if (condOffset) { 164 at_ += *condOffset, column_ += *condOffset; 165 if (auto payload{IsIncludeLine(at_)}) { 166 FortranInclude(at_ + *payload); 167 return; 168 } else if (inFixedForm_) { 169 LabelField(tokens); 170 } else { 171 SkipSpaces(); 172 } 173 } else { 174 // Compiler directive. Emit normalized sentinel, squash following spaces. 175 EmitChar(tokens, '!'); 176 ++at_, ++column_; 177 for (const char *sp{directiveSentinel_}; *sp != '\0'; 178 ++sp, ++at_, ++column_) { 179 EmitChar(tokens, *sp); 180 } 181 if (IsSpaceOrTab(at_)) { 182 EmitChar(tokens, ' '); 183 while (int n{IsSpaceOrTab(at_)}) { 184 at_ += n, ++column_; 185 } 186 } 187 tokens.CloseToken(); 188 } 189 break; 190 } 191 case LineClassification::Kind::Source: { 192 BeginStatementAndAdvance(); 193 bool checkLabelField{false}; 194 if (inFixedForm_) { 195 if (features_.IsEnabled(LanguageFeature::OldDebugLines) && 196 (*at_ == 'D' || *at_ == 'd')) { 197 NextChar(); 198 } 199 checkLabelField = true; 200 } else { 201 if (skipLeadingAmpersand_) { 202 skipLeadingAmpersand_ = false; 203 const char *p{SkipWhiteSpace(at_)}; 204 if (p < limit_ && *p == '&') { 205 column_ += ++p - at_; 206 at_ = p; 207 } 208 } else { 209 SkipSpaces(); 210 } 211 } 212 // Check for a leading identifier that might be a keyword macro 213 // that will expand to anything indicating a non-source line, like 214 // a comment marker or directive sentinel. If so, disable line 215 // continuation, so that NextToken() won't consume anything from 216 // following lines. 217 if (IsLegalIdentifierStart(*at_)) { 218 // TODO: Only bother with these cases when any keyword macro has 219 // been defined with replacement text that could begin a comment 220 // or directive sentinel. 221 const char *p{at_}; 222 while (IsLegalInIdentifier(*++p)) { 223 } 224 CharBlock id{at_, static_cast<std::size_t>(p - at_)}; 225 if (preprocessor_.IsNameDefined(id) && 226 !preprocessor_.IsFunctionLikeDefinition(id)) { 227 checkLabelField = false; 228 TokenSequence toks; 229 toks.Put(id, GetProvenance(at_)); 230 if (auto replaced{preprocessor_.MacroReplacement(toks, *this)}) { 231 auto newLineClass{ClassifyLine(*replaced, GetCurrentProvenance())}; 232 if (newLineClass.kind == 233 LineClassification::Kind::CompilerDirective) { 234 directiveSentinel_ = newLineClass.sentinel; 235 disableSourceContinuation_ = false; 236 } else { 237 disableSourceContinuation_ = !replaced->empty() && 238 newLineClass.kind != LineClassification::Kind::Source; 239 } 240 } 241 } 242 } 243 if (checkLabelField) { 244 LabelField(tokens); 245 } 246 } break; 247 } 248 249 while (NextToken(tokens)) { 250 } 251 if (continuationLines_ > 255) { 252 if (features_.ShouldWarn(common::LanguageFeature::MiscSourceExtensions)) { 253 Say(common::LanguageFeature::MiscSourceExtensions, 254 GetProvenance(statementStart), 255 "%d continuation lines is more than the Fortran standard allows"_port_en_US, 256 continuationLines_); 257 } 258 } 259 260 Provenance newlineProvenance{GetCurrentProvenance()}; 261 if (std::optional<TokenSequence> preprocessed{ 262 preprocessor_.MacroReplacement(tokens, *this)}) { 263 // Reprocess the preprocessed line. 264 LineClassification ppl{ClassifyLine(*preprocessed, newlineProvenance)}; 265 switch (ppl.kind) { 266 case LineClassification::Kind::Comment: 267 break; 268 case LineClassification::Kind::IncludeLine: 269 FortranInclude(preprocessed->TokenAt(0).begin() + ppl.payloadOffset); 270 break; 271 case LineClassification::Kind::ConditionalCompilationDirective: 272 case LineClassification::Kind::IncludeDirective: 273 case LineClassification::Kind::DefinitionDirective: 274 case LineClassification::Kind::PreprocessorDirective: 275 if (features_.ShouldWarn(common::UsageWarning::Preprocessing)) { 276 Say(common::UsageWarning::Preprocessing, 277 preprocessed->GetProvenanceRange(), 278 "Preprocessed line resembles a preprocessor directive"_warn_en_US); 279 } 280 CheckAndEmitLine(preprocessed->ToLowerCase(), newlineProvenance); 281 break; 282 case LineClassification::Kind::CompilerDirective: 283 if (preprocessed->HasRedundantBlanks()) { 284 preprocessed->RemoveRedundantBlanks(); 285 } 286 while (CompilerDirectiveContinuation(*preprocessed, ppl.sentinel)) { 287 newlineProvenance = GetCurrentProvenance(); 288 } 289 NormalizeCompilerDirectiveCommentMarker(*preprocessed); 290 preprocessed->ToLowerCase(); 291 SourceFormChange(preprocessed->ToString()); 292 CheckAndEmitLine(preprocessed->ToLowerCase().ClipComment( 293 *this, true /* skip first ! */), 294 newlineProvenance); 295 break; 296 case LineClassification::Kind::Source: 297 if (inFixedForm_) { 298 if (!preprocessingOnly_ && preprocessed->HasBlanks()) { 299 preprocessed->RemoveBlanks(); 300 } 301 } else { 302 while (SourceLineContinuation(*preprocessed)) { 303 newlineProvenance = GetCurrentProvenance(); 304 } 305 if (preprocessed->HasRedundantBlanks()) { 306 preprocessed->RemoveRedundantBlanks(); 307 } 308 } 309 CheckAndEmitLine( 310 preprocessed->ToLowerCase().ClipComment(*this), newlineProvenance); 311 break; 312 } 313 } else { // no macro replacement 314 if (line.kind == LineClassification::Kind::CompilerDirective) { 315 while (CompilerDirectiveContinuation(tokens, line.sentinel)) { 316 newlineProvenance = GetCurrentProvenance(); 317 } 318 tokens.ToLowerCase(); 319 SourceFormChange(tokens.ToString()); 320 } else { // Kind::Source 321 tokens.ToLowerCase(); 322 if (inFixedForm_) { 323 EnforceStupidEndStatementRules(tokens); 324 } 325 } 326 CheckAndEmitLine(tokens, newlineProvenance); 327 } 328 directiveSentinel_ = nullptr; 329 } 330 331 void Prescanner::CheckAndEmitLine( 332 TokenSequence &tokens, Provenance newlineProvenance) { 333 tokens.CheckBadFortranCharacters( 334 messages_, *this, disableSourceContinuation_); 335 // Parenthesis nesting check does not apply while any #include is 336 // active, nor on the lines before and after a top-level #include, 337 // nor before or after conditional source. 338 // Applications play shenanigans with line continuation before and 339 // after #include'd subprogram argument lists and conditional source. 340 if (!isNestedInIncludeDirective_ && !omitNewline_ && 341 !afterPreprocessingDirective_ && tokens.BadlyNestedParentheses() && 342 !preprocessor_.InConditional()) { 343 if (nextLine_ < limit_ && IsPreprocessorDirectiveLine(nextLine_)) { 344 // don't complain 345 } else { 346 tokens.CheckBadParentheses(messages_); 347 } 348 } 349 tokens.Emit(cooked_); 350 if (omitNewline_) { 351 omitNewline_ = false; 352 } else { 353 cooked_.Put('\n', newlineProvenance); 354 afterPreprocessingDirective_ = false; 355 } 356 } 357 358 TokenSequence Prescanner::TokenizePreprocessorDirective() { 359 CHECK(!IsAtEnd() && !inPreprocessorDirective_); 360 inPreprocessorDirective_ = true; 361 BeginStatementAndAdvance(); 362 TokenSequence tokens; 363 while (NextToken(tokens)) { 364 } 365 inPreprocessorDirective_ = false; 366 return tokens; 367 } 368 369 void Prescanner::NextLine() { 370 void *vstart{static_cast<void *>(const_cast<char *>(nextLine_))}; 371 void *v{std::memchr(vstart, '\n', limit_ - nextLine_)}; 372 if (!v) { 373 nextLine_ = limit_; 374 } else { 375 const char *nl{const_cast<const char *>(static_cast<char *>(v))}; 376 nextLine_ = nl + 1; 377 } 378 } 379 380 void Prescanner::LabelField(TokenSequence &token) { 381 int outCol{1}; 382 const char *start{at_}; 383 std::optional<int> badColumn; 384 for (; *at_ != '\n' && column_ <= 6; ++at_) { 385 if (*at_ == '\t') { 386 ++at_; 387 column_ = 7; 388 break; 389 } 390 if (int n{IsSpace(at_)}; n == 0 && 391 !(*at_ == '0' && column_ == 6)) { // '0' in column 6 becomes space 392 EmitChar(token, *at_); 393 ++outCol; 394 if (!badColumn && (column_ == 6 || !IsDecimalDigit(*at_))) { 395 badColumn = column_; 396 } 397 } 398 ++column_; 399 } 400 if (badColumn && !preprocessor_.IsNameDefined(token.CurrentOpenToken())) { 401 if ((prescannerNesting_ > 0 && *badColumn == 6 && 402 cooked_.BufferedBytes() == firstCookedCharacterOffset_) || 403 afterPreprocessingDirective_) { 404 // This is the first source line in #include'd text or conditional 405 // code under #if, or the first source line after such. 406 // If it turns out that the preprocessed text begins with a 407 // fixed form continuation line, the newline at the end 408 // of the latest source line beforehand will be deleted in 409 // CookedSource::Marshal(). 410 cooked_.MarkPossibleFixedFormContinuation(); 411 } else if (features_.ShouldWarn(common::UsageWarning::Scanning)) { 412 Say(common::UsageWarning::Scanning, GetProvenance(start + *badColumn - 1), 413 *badColumn == 6 414 ? "Statement should not begin with a continuation line"_warn_en_US 415 : "Character in fixed-form label field must be a digit"_warn_en_US); 416 } 417 token.clear(); 418 if (*badColumn < 6) { 419 at_ = start; 420 column_ = 1; 421 return; 422 } 423 outCol = 1; 424 } 425 if (outCol == 1) { // empty label field 426 // Emit a space so that, if the line is rescanned after preprocessing, 427 // a leading 'C' or 'D' won't be left-justified and then accidentally 428 // misinterpreted as a comment card. 429 EmitChar(token, ' '); 430 ++outCol; 431 } 432 token.CloseToken(); 433 SkipToNextSignificantCharacter(); 434 if (IsDecimalDigit(*at_)) { 435 if (features_.ShouldWarn(common::LanguageFeature::MiscSourceExtensions)) { 436 Say(common::LanguageFeature::MiscSourceExtensions, GetCurrentProvenance(), 437 "Label digit is not in fixed-form label field"_port_en_US); 438 } 439 } 440 } 441 442 // 6.3.3.5: A program unit END statement, or any other statement whose 443 // initial line resembles an END statement, shall not be continued in 444 // fixed form source. 445 void Prescanner::EnforceStupidEndStatementRules(const TokenSequence &tokens) { 446 CharBlock cBlock{tokens.ToCharBlock()}; 447 const char *str{cBlock.begin()}; 448 std::size_t n{cBlock.size()}; 449 if (n < 3) { 450 return; 451 } 452 std::size_t j{0}; 453 for (; j < n && (str[j] == ' ' || (str[j] >= '0' && str[j] <= '9')); ++j) { 454 } 455 if (j + 3 > n || std::memcmp(str + j, "end", 3) != 0) { 456 return; 457 } 458 // It starts with END, possibly after a label. 459 auto start{allSources_.GetSourcePosition(tokens.GetCharProvenance(j))}; 460 auto end{allSources_.GetSourcePosition(tokens.GetCharProvenance(n - 1))}; 461 if (!start || !end) { 462 return; 463 } 464 if (&*start->sourceFile == &*end->sourceFile && start->line == end->line) { 465 return; // no continuation 466 } 467 j += 3; 468 static const char *const prefixes[]{"program", "subroutine", "function", 469 "blockdata", "module", "submodule", nullptr}; 470 bool isPrefix{j == n || !IsLegalInIdentifier(str[j])}; // prefix is END 471 std::size_t endOfPrefix{j - 1}; 472 for (const char *const *p{prefixes}; *p; ++p) { 473 std::size_t pLen{std::strlen(*p)}; 474 if (j + pLen <= n && std::memcmp(str + j, *p, pLen) == 0) { 475 isPrefix = true; // END thing as prefix 476 j += pLen; 477 endOfPrefix = j - 1; 478 for (; j < n && IsLegalInIdentifier(str[j]); ++j) { 479 } 480 break; 481 } 482 } 483 if (isPrefix) { 484 auto range{tokens.GetTokenProvenanceRange(1)}; 485 if (j == n) { // END or END thing [name] 486 Say(range, 487 "Program unit END statement may not be continued in fixed form source"_err_en_US); 488 } else { 489 auto endOfPrefixPos{ 490 allSources_.GetSourcePosition(tokens.GetCharProvenance(endOfPrefix))}; 491 auto next{allSources_.GetSourcePosition(tokens.GetCharProvenance(j))}; 492 if (endOfPrefixPos && next && 493 &*endOfPrefixPos->sourceFile == &*start->sourceFile && 494 endOfPrefixPos->line == start->line && 495 (&*next->sourceFile != &*start->sourceFile || 496 next->line != start->line)) { 497 Say(range, 498 "Initial line of continued statement must not appear to be a program unit END in fixed form source"_err_en_US); 499 } 500 } 501 } 502 } 503 504 void Prescanner::SkipToEndOfLine() { 505 while (*at_ != '\n') { 506 ++at_, ++column_; 507 } 508 } 509 510 bool Prescanner::MustSkipToEndOfLine() const { 511 if (inFixedForm_ && column_ > fixedFormColumnLimit_ && !tabInCurrentLine_) { 512 return true; // skip over ignored columns in right margin (73:80) 513 } else if (*at_ == '!' && !inCharLiteral_) { 514 return !IsCompilerDirectiveSentinel(at_); 515 } else { 516 return false; 517 } 518 } 519 520 void Prescanner::NextChar() { 521 CHECK(*at_ != '\n'); 522 int n{IsSpace(at_)}; 523 at_ += n ? n : 1; 524 ++column_; 525 while (at_[0] == '\xef' && at_[1] == '\xbb' && at_[2] == '\xbf') { 526 // UTF-8 byte order mark - treat this file as UTF-8 527 at_ += 3; 528 encoding_ = Encoding::UTF_8; 529 } 530 SkipToNextSignificantCharacter(); 531 } 532 533 // Skip everything that should be ignored until the next significant 534 // character is reached; handles C-style comments in preprocessing 535 // directives, Fortran ! comments, stuff after the right margin in 536 // fixed form, and all forms of line continuation. 537 bool Prescanner::SkipToNextSignificantCharacter() { 538 auto anyContinuationLine{false}; 539 if (inPreprocessorDirective_) { 540 SkipCComments(); 541 } else { 542 bool mightNeedSpace{false}; 543 if (MustSkipToEndOfLine()) { 544 SkipToEndOfLine(); 545 } else { 546 mightNeedSpace = *at_ == '\n'; 547 } 548 for (; Continuation(mightNeedSpace); mightNeedSpace = false) { 549 anyContinuationLine = true; 550 ++continuationLines_; 551 if (MustSkipToEndOfLine()) { 552 SkipToEndOfLine(); 553 } 554 } 555 if (*at_ == '\t') { 556 tabInCurrentLine_ = true; 557 } 558 } 559 return anyContinuationLine; 560 } 561 562 void Prescanner::SkipCComments() { 563 while (true) { 564 if (IsCComment(at_)) { 565 if (const char *after{SkipCComment(at_)}) { 566 column_ += after - at_; 567 // May have skipped over one or more newlines; relocate the start of 568 // the next line. 569 nextLine_ = at_ = after; 570 NextLine(); 571 } else { 572 // Don't emit any messages about unclosed C-style comments, because 573 // the sequence /* can appear legally in a FORMAT statement. There's 574 // no ambiguity, since the sequence */ cannot appear legally. 575 break; 576 } 577 } else if (inPreprocessorDirective_ && at_[0] == '\\' && at_ + 2 < limit_ && 578 at_[1] == '\n' && !IsAtEnd()) { 579 BeginSourceLineAndAdvance(); 580 } else { 581 break; 582 } 583 } 584 } 585 586 void Prescanner::SkipSpaces() { 587 while (IsSpaceOrTab(at_)) { 588 NextChar(); 589 } 590 insertASpace_ = false; 591 } 592 593 const char *Prescanner::SkipWhiteSpace(const char *p) { 594 while (int n{IsSpaceOrTab(p)}) { 595 p += n; 596 } 597 return p; 598 } 599 600 const char *Prescanner::SkipWhiteSpaceAndCComments(const char *p) const { 601 while (true) { 602 if (int n{IsSpaceOrTab(p)}) { 603 p += n; 604 } else if (IsCComment(p)) { 605 if (const char *after{SkipCComment(p)}) { 606 p = after; 607 } else { 608 break; 609 } 610 } else { 611 break; 612 } 613 } 614 return p; 615 } 616 617 const char *Prescanner::SkipCComment(const char *p) const { 618 char star{' '}, slash{' '}; 619 p += 2; 620 while (star != '*' || slash != '/') { 621 if (p >= limit_) { 622 return nullptr; // signifies an unterminated comment 623 } 624 star = slash; 625 slash = *p++; 626 } 627 return p; 628 } 629 630 bool Prescanner::NextToken(TokenSequence &tokens) { 631 CHECK(at_ >= start_ && at_ < limit_); 632 if (InFixedFormSource() && !preprocessingOnly_) { 633 SkipSpaces(); 634 } else { 635 if (*at_ == '/' && IsCComment(at_)) { 636 // Recognize and skip over classic C style /*comments*/ when 637 // outside a character literal. 638 if (features_.ShouldWarn(LanguageFeature::ClassicCComments)) { 639 Say(LanguageFeature::ClassicCComments, GetCurrentProvenance(), 640 "nonstandard usage: C-style comment"_port_en_US); 641 } 642 SkipCComments(); 643 } 644 if (IsSpaceOrTab(at_)) { 645 // Compress free-form white space into a single space character. 646 const auto theSpace{at_}; 647 char previous{at_ <= start_ ? ' ' : at_[-1]}; 648 NextChar(); 649 SkipSpaces(); 650 if (*at_ == '\n' && !omitNewline_) { 651 // Discard white space at the end of a line. 652 } else if (!inPreprocessorDirective_ && 653 (previous == '(' || *at_ == '(' || *at_ == ')')) { 654 // Discard white space before/after '(' and before ')', unless in a 655 // preprocessor directive. This helps yield space-free contiguous 656 // names for generic interfaces like OPERATOR( + ) and 657 // READ ( UNFORMATTED ), without misinterpreting #define f (notAnArg). 658 // This has the effect of silently ignoring the illegal spaces in 659 // the array constructor ( /1,2/ ) but that seems benign; it's 660 // hard to avoid that while still removing spaces from OPERATOR( / ) 661 // and OPERATOR( // ). 662 } else { 663 // Preserve the squashed white space as a single space character. 664 tokens.PutNextTokenChar(' ', GetProvenance(theSpace)); 665 tokens.CloseToken(); 666 return true; 667 } 668 } 669 } 670 if (insertASpace_) { 671 tokens.PutNextTokenChar(' ', spaceProvenance_); 672 insertASpace_ = false; 673 } 674 if (*at_ == '\n') { 675 return false; 676 } 677 const char *start{at_}; 678 if (*at_ == '\'' || *at_ == '"') { 679 QuotedCharacterLiteral(tokens, start); 680 preventHollerith_ = false; 681 } else if (IsDecimalDigit(*at_)) { 682 int n{0}, digits{0}; 683 static constexpr int maxHollerith{256 /*lines*/ * (132 - 6 /*columns*/)}; 684 do { 685 if (n < maxHollerith) { 686 n = 10 * n + DecimalDigitValue(*at_); 687 } 688 EmitCharAndAdvance(tokens, *at_); 689 ++digits; 690 if (InFixedFormSource()) { 691 SkipSpaces(); 692 } 693 } while (IsDecimalDigit(*at_)); 694 if ((*at_ == 'h' || *at_ == 'H') && n > 0 && n < maxHollerith && 695 !preventHollerith_) { 696 Hollerith(tokens, n, start); 697 } else if (*at_ == '.') { 698 while (IsDecimalDigit(EmitCharAndAdvance(tokens, *at_))) { 699 } 700 ExponentAndKind(tokens); 701 } else if (ExponentAndKind(tokens)) { 702 } else if (digits == 1 && n == 0 && (*at_ == 'x' || *at_ == 'X') && 703 inPreprocessorDirective_) { 704 do { 705 EmitCharAndAdvance(tokens, *at_); 706 } while (IsHexadecimalDigit(*at_)); 707 } else if (at_[0] == '_' && (at_[1] == '\'' || at_[1] == '"')) { // 4_"..." 708 EmitCharAndAdvance(tokens, *at_); 709 QuotedCharacterLiteral(tokens, start); 710 } else if (IsLetter(*at_) && !preventHollerith_ && 711 parenthesisNesting_ > 0 && 712 !preprocessor_.IsNameDefined(CharBlock{at_, 1})) { 713 // Handles FORMAT(3I9HHOLLERITH) by skipping over the first I so that 714 // we don't misrecognize I9HHOLLERITH as an identifier in the next case. 715 EmitCharAndAdvance(tokens, *at_); 716 } 717 preventHollerith_ = false; 718 } else if (*at_ == '.') { 719 char nch{EmitCharAndAdvance(tokens, '.')}; 720 if (!inPreprocessorDirective_ && IsDecimalDigit(nch)) { 721 while (IsDecimalDigit(EmitCharAndAdvance(tokens, *at_))) { 722 } 723 ExponentAndKind(tokens); 724 } else if (nch == '.' && EmitCharAndAdvance(tokens, '.') == '.') { 725 EmitCharAndAdvance(tokens, '.'); // variadic macro definition ellipsis 726 } 727 preventHollerith_ = false; 728 } else if (IsLegalInIdentifier(*at_)) { 729 int parts{1}; 730 const char *afterLast{nullptr}; 731 do { 732 EmitChar(tokens, *at_); 733 ++at_, ++column_; 734 afterLast = at_; 735 if (SkipToNextSignificantCharacter() && IsLegalIdentifierStart(*at_)) { 736 tokens.CloseToken(); 737 ++parts; 738 } 739 } while (IsLegalInIdentifier(*at_)); 740 if (parts >= 3) { 741 // Subtlety: When an identifier is split across three or more continuation 742 // lines (or two continuation lines, immediately preceded or followed 743 // by '&' free form continuation line markers, its parts are kept as 744 // distinct pp-tokens so that macro replacement operates on them 745 // independently. This trick accommodates the historic practice of 746 // using line continuation for token pasting after replacement. 747 } else if (parts == 2) { 748 if (afterLast && afterLast < limit_) { 749 afterLast = SkipWhiteSpace(afterLast); 750 } 751 if ((start > start_ && start[-1] == '&') || 752 (afterLast && afterLast < limit_ && 753 (*afterLast == '&' || *afterLast == '\n'))) { 754 // call & call foo& call foo& 755 // &MACRO& OR &MACRO& OR &MACRO 756 // &foo(...) &(...) 757 } else { 758 tokens.ReopenLastToken(); 759 } 760 } 761 if (InFixedFormSource()) { 762 SkipSpaces(); 763 } 764 if ((*at_ == '\'' || *at_ == '"') && 765 tokens.CharAt(tokens.SizeInChars() - 1) == '_') { // kind_"..." 766 QuotedCharacterLiteral(tokens, start); 767 preventHollerith_ = false; 768 } else { 769 preventHollerith_ = true; // DO 10 H = ... 770 } 771 } else if (*at_ == '*') { 772 if (EmitCharAndAdvance(tokens, '*') == '*') { 773 EmitCharAndAdvance(tokens, '*'); 774 } else { 775 // Subtle ambiguity: 776 // CHARACTER*2H declares H because *2 is a kind specifier 777 // DATAC/N*2H / is repeated Hollerith 778 preventHollerith_ = !slashInCurrentStatement_; 779 } 780 } else { 781 char ch{*at_}; 782 if (ch == '(') { 783 if (parenthesisNesting_++ == 0) { 784 isPossibleMacroCall_ = tokens.SizeInTokens() > 0 && 785 preprocessor_.IsFunctionLikeDefinition( 786 tokens.TokenAt(tokens.SizeInTokens() - 1)); 787 } 788 } else if (ch == ')' && parenthesisNesting_ > 0) { 789 --parenthesisNesting_; 790 } 791 char nch{EmitCharAndAdvance(tokens, ch)}; 792 preventHollerith_ = false; 793 if ((nch == '=' && 794 (ch == '<' || ch == '>' || ch == '/' || ch == '=' || ch == '!')) || 795 (ch == nch && 796 (ch == '/' || ch == ':' || ch == '*' || ch == '#' || ch == '&' || 797 ch == '|' || ch == '<' || ch == '>')) || 798 (ch == '=' && nch == '>')) { 799 // token comprises two characters 800 EmitCharAndAdvance(tokens, nch); 801 } else if (ch == '/') { 802 slashInCurrentStatement_ = true; 803 } else if (ch == ';' && InFixedFormSource()) { 804 SkipSpaces(); 805 if (IsDecimalDigit(*at_)) { 806 if (features_.ShouldWarn( 807 common::LanguageFeature::MiscSourceExtensions)) { 808 Say(common::LanguageFeature::MiscSourceExtensions, 809 GetProvenanceRange(at_, at_ + 1), 810 "Label should be in the label field"_port_en_US); 811 } 812 } 813 } 814 } 815 tokens.CloseToken(); 816 return true; 817 } 818 819 bool Prescanner::ExponentAndKind(TokenSequence &tokens) { 820 char ed{ToLowerCaseLetter(*at_)}; 821 if (ed != 'e' && ed != 'd') { 822 return false; 823 } 824 // Do some look-ahead to ensure that this 'e'/'d' is an exponent, 825 // not the start of an identifier that could be a macro. 826 const char *p{at_}; 827 if (int n{IsSpace(++p)}) { 828 p += n; 829 } 830 if (*p == '+' || *p == '-') { 831 if (int n{IsSpace(++p)}) { 832 p += n; 833 } 834 } 835 if (IsDecimalDigit(*p)) { // it's an exponent 836 EmitCharAndAdvance(tokens, ed); 837 if (*at_ == '+' || *at_ == '-') { 838 EmitCharAndAdvance(tokens, *at_); 839 } 840 while (IsDecimalDigit(*at_)) { 841 EmitCharAndAdvance(tokens, *at_); 842 } 843 if (*at_ == '_') { 844 while (IsLegalInIdentifier(EmitCharAndAdvance(tokens, *at_))) { 845 } 846 } 847 return true; 848 } else { 849 return false; 850 } 851 } 852 853 void Prescanner::QuotedCharacterLiteral( 854 TokenSequence &tokens, const char *start) { 855 char quote{*at_}; 856 const char *end{at_ + 1}; 857 inCharLiteral_ = true; 858 continuationInCharLiteral_ = true; 859 const auto emit{[&](char ch) { EmitChar(tokens, ch); }}; 860 const auto insert{[&](char ch) { EmitInsertedChar(tokens, ch); }}; 861 bool isEscaped{false}; 862 bool escapesEnabled{features_.IsEnabled(LanguageFeature::BackslashEscapes)}; 863 while (true) { 864 if (*at_ == '\\') { 865 if (escapesEnabled) { 866 isEscaped = !isEscaped; 867 } else { 868 // The parser always processes escape sequences, so don't confuse it 869 // when escapes are disabled. 870 insert('\\'); 871 } 872 } else { 873 isEscaped = false; 874 } 875 EmitQuotedChar(static_cast<unsigned char>(*at_), emit, insert, false, 876 Encoding::LATIN_1); 877 while (PadOutCharacterLiteral(tokens)) { 878 } 879 if (*at_ == '\n') { 880 if (!inPreprocessorDirective_) { 881 Say(GetProvenanceRange(start, end), 882 "Incomplete character literal"_err_en_US); 883 } 884 break; 885 } 886 // Here's a weird edge case. When there's a two or more following 887 // continuation lines at this point, and the entire significant part of 888 // the next continuation line is the name of a keyword macro, replace 889 // it in the character literal with its definition. Example: 890 // #define FOO foo 891 // subroutine subr() bind(c, name="my_& 892 // &FOO& 893 // &_bar") ... 894 // produces a binding name of "my_foo_bar". 895 while (at_[1] == '&' && nextLine_ < limit_ && !InFixedFormSource()) { 896 const char *idStart{nextLine_}; 897 if (const char *amper{SkipWhiteSpace(nextLine_)}; *amper == '&') { 898 idStart = amper + 1; 899 } 900 if (IsLegalIdentifierStart(*idStart)) { 901 std::size_t idLen{1}; 902 for (; IsLegalInIdentifier(idStart[idLen]); ++idLen) { 903 } 904 if (idStart[idLen] == '&') { 905 CharBlock id{idStart, idLen}; 906 if (preprocessor_.IsNameDefined(id)) { 907 TokenSequence ppTokens; 908 ppTokens.Put(id, GetProvenance(idStart)); 909 if (auto replaced{ 910 preprocessor_.MacroReplacement(ppTokens, *this)}) { 911 tokens.Put(*replaced); 912 at_ = &idStart[idLen - 1]; 913 NextLine(); 914 continue; // try again on the next line 915 } 916 } 917 } 918 } 919 break; 920 } 921 end = at_ + 1; 922 NextChar(); 923 if (*at_ == quote && !isEscaped) { 924 // A doubled unescaped quote mark becomes a single instance of that 925 // quote character in the literal (later). There can be spaces between 926 // the quotes in fixed form source. 927 EmitChar(tokens, quote); 928 inCharLiteral_ = false; // for cases like print *, '...'!comment 929 NextChar(); 930 if (InFixedFormSource()) { 931 SkipSpaces(); 932 } 933 if (*at_ != quote) { 934 break; 935 } 936 inCharLiteral_ = true; 937 } 938 } 939 continuationInCharLiteral_ = false; 940 inCharLiteral_ = false; 941 } 942 943 void Prescanner::Hollerith( 944 TokenSequence &tokens, int count, const char *start) { 945 inCharLiteral_ = true; 946 CHECK(*at_ == 'h' || *at_ == 'H'); 947 EmitChar(tokens, 'H'); 948 while (count-- > 0) { 949 if (PadOutCharacterLiteral(tokens)) { 950 } else if (*at_ == '\n') { 951 if (features_.ShouldWarn(common::UsageWarning::Scanning)) { 952 Say(common::UsageWarning::Scanning, GetProvenanceRange(start, at_), 953 "Possible truncated Hollerith literal"_warn_en_US); 954 } 955 break; 956 } else { 957 NextChar(); 958 // Each multi-byte character encoding counts as a single character. 959 // No escape sequences are recognized. 960 // Hollerith is always emitted to the cooked character 961 // stream in UTF-8. 962 DecodedCharacter decoded{DecodeCharacter( 963 encoding_, at_, static_cast<std::size_t>(limit_ - at_), false)}; 964 if (decoded.bytes > 0) { 965 EncodedCharacter utf8{ 966 EncodeCharacter<Encoding::UTF_8>(decoded.codepoint)}; 967 for (int j{0}; j < utf8.bytes; ++j) { 968 EmitChar(tokens, utf8.buffer[j]); 969 } 970 at_ += decoded.bytes - 1; 971 } else { 972 Say(GetProvenanceRange(start, at_), 973 "Bad character in Hollerith literal"_err_en_US); 974 break; 975 } 976 } 977 } 978 if (*at_ != '\n') { 979 NextChar(); 980 } 981 inCharLiteral_ = false; 982 } 983 984 // In fixed form, source card images must be processed as if they were at 985 // least 72 columns wide, at least in character literal contexts. 986 bool Prescanner::PadOutCharacterLiteral(TokenSequence &tokens) { 987 while (inFixedForm_ && !tabInCurrentLine_ && at_[1] == '\n') { 988 if (column_ < fixedFormColumnLimit_) { 989 tokens.PutNextTokenChar(' ', spaceProvenance_); 990 ++column_; 991 return true; 992 } 993 if (!FixedFormContinuation(false /*no need to insert space*/) || 994 tabInCurrentLine_) { 995 return false; 996 } 997 CHECK(column_ == 7); 998 --at_; // point to column 6 of continuation line 999 column_ = 6; 1000 } 1001 return false; 1002 } 1003 1004 static bool IsAtProcess(const char *p) { 1005 static const char pAtProc[]{"process"}; 1006 for (std::size_t i{0}; i < sizeof pAtProc - 1; ++i) { 1007 if (ToLowerCaseLetter(*++p) != pAtProc[i]) 1008 return false; 1009 } 1010 return true; 1011 } 1012 1013 bool Prescanner::IsFixedFormCommentLine(const char *start) const { 1014 const char *p{start}; 1015 1016 // The @process directive must start in column 1. 1017 if (*p == '@' && IsAtProcess(p)) { 1018 return true; 1019 } 1020 1021 if (IsFixedFormCommentChar(*p) || *p == '%' || // VAX %list, %eject, &c. 1022 ((*p == 'D' || *p == 'd') && 1023 !features_.IsEnabled(LanguageFeature::OldDebugLines))) { 1024 return true; 1025 } 1026 bool anyTabs{false}; 1027 while (true) { 1028 if (int n{IsSpace(p)}) { 1029 p += n; 1030 } else if (*p == '\t') { 1031 anyTabs = true; 1032 ++p; 1033 } else if (*p == '0' && !anyTabs && p == start + 5) { 1034 ++p; // 0 in column 6 must treated as a space 1035 } else { 1036 break; 1037 } 1038 } 1039 if (!anyTabs && p >= start + fixedFormColumnLimit_) { 1040 return true; 1041 } 1042 if (*p == '!' && !inCharLiteral_ && (anyTabs || p != start + 5)) { 1043 return true; 1044 } 1045 return *p == '\n'; 1046 } 1047 1048 const char *Prescanner::IsFreeFormComment(const char *p) const { 1049 p = SkipWhiteSpaceAndCComments(p); 1050 if (*p == '!' || *p == '\n') { 1051 return p; 1052 } else if (*p == '@') { 1053 return IsAtProcess(p) ? p : nullptr; 1054 } else { 1055 return nullptr; 1056 } 1057 } 1058 1059 std::optional<std::size_t> Prescanner::IsIncludeLine(const char *start) const { 1060 if (!expandIncludeLines_) { 1061 return std::nullopt; 1062 } 1063 const char *p{SkipWhiteSpace(start)}; 1064 if (*p == '0' && inFixedForm_ && p == start + 5) { 1065 // Accept " 0INCLUDE" in fixed form. 1066 p = SkipWhiteSpace(p + 1); 1067 } 1068 for (const char *q{"include"}; *q; ++q) { 1069 if (ToLowerCaseLetter(*p) != *q) { 1070 return std::nullopt; 1071 } 1072 p = SkipWhiteSpace(p + 1); 1073 } 1074 if (IsDecimalDigit(*p)) { // accept & ignore a numeric kind prefix 1075 for (p = SkipWhiteSpace(p + 1); IsDecimalDigit(*p); 1076 p = SkipWhiteSpace(p + 1)) { 1077 } 1078 if (*p != '_') { 1079 return std::nullopt; 1080 } 1081 p = SkipWhiteSpace(p + 1); 1082 } 1083 if (*p == '"' || *p == '\'') { 1084 return {p - start}; 1085 } 1086 return std::nullopt; 1087 } 1088 1089 void Prescanner::FortranInclude(const char *firstQuote) { 1090 const char *p{firstQuote}; 1091 while (*p != '"' && *p != '\'') { 1092 ++p; 1093 } 1094 char quote{*p}; 1095 std::string path; 1096 for (++p; *p != '\n'; ++p) { 1097 if (*p == quote) { 1098 if (p[1] != quote) { 1099 break; 1100 } 1101 ++p; 1102 } 1103 path += *p; 1104 } 1105 if (*p != quote) { 1106 Say(GetProvenanceRange(firstQuote, p), 1107 "malformed path name string"_err_en_US); 1108 return; 1109 } 1110 p = SkipWhiteSpace(p + 1); 1111 if (*p != '\n' && *p != '!') { 1112 const char *garbage{p}; 1113 for (; *p != '\n' && *p != '!'; ++p) { 1114 } 1115 if (features_.ShouldWarn(common::UsageWarning::Scanning)) { 1116 Say(common::UsageWarning::Scanning, GetProvenanceRange(garbage, p), 1117 "excess characters after path name"_warn_en_US); 1118 } 1119 } 1120 std::string buf; 1121 llvm::raw_string_ostream error{buf}; 1122 Provenance provenance{GetProvenance(nextLine_)}; 1123 std::optional<std::string> prependPath; 1124 if (const SourceFile * currentFile{allSources_.GetSourceFile(provenance)}) { 1125 prependPath = DirectoryName(currentFile->path()); 1126 } 1127 const SourceFile *included{ 1128 allSources_.Open(path, error, std::move(prependPath))}; 1129 if (!included) { 1130 Say(provenance, "INCLUDE: %s"_err_en_US, buf); 1131 } else if (included->bytes() > 0) { 1132 ProvenanceRange includeLineRange{ 1133 provenance, static_cast<std::size_t>(p - nextLine_)}; 1134 ProvenanceRange fileRange{ 1135 allSources_.AddIncludedFile(*included, includeLineRange)}; 1136 Preprocessor cleanPrepro{allSources_}; 1137 if (preprocessor_.IsNameDefined("__FILE__"s)) { 1138 cleanPrepro.DefineStandardMacros(); // __FILE__, __LINE__, &c. 1139 } 1140 if (preprocessor_.IsNameDefined("_CUDA"s)) { 1141 cleanPrepro.Define("_CUDA"s, "1"); 1142 } 1143 Prescanner{*this, cleanPrepro, /*isNestedInIncludeDirective=*/false} 1144 .set_encoding(included->encoding()) 1145 .Prescan(fileRange); 1146 } 1147 } 1148 1149 const char *Prescanner::IsPreprocessorDirectiveLine(const char *start) const { 1150 const char *p{start}; 1151 while (int n{IsSpace(p)}) { 1152 p += n; 1153 } 1154 if (*p == '#') { 1155 if (inFixedForm_ && p == start + 5) { 1156 return nullptr; 1157 } 1158 } else { 1159 p = SkipWhiteSpace(p); 1160 if (*p != '#') { 1161 return nullptr; 1162 } 1163 } 1164 return SkipWhiteSpace(p + 1); 1165 } 1166 1167 bool Prescanner::IsNextLinePreprocessorDirective() const { 1168 return IsPreprocessorDirectiveLine(nextLine_) != nullptr; 1169 } 1170 1171 bool Prescanner::SkipCommentLine(bool afterAmpersand) { 1172 if (IsAtEnd()) { 1173 if (afterAmpersand && prescannerNesting_ > 0) { 1174 // A continuation marker at the end of the last line in an 1175 // include file inhibits the newline for that line. 1176 SkipToEndOfLine(); 1177 omitNewline_ = true; 1178 } 1179 } else if (inPreprocessorDirective_) { 1180 } else { 1181 auto lineClass{ClassifyLine(nextLine_)}; 1182 if (lineClass.kind == LineClassification::Kind::Comment) { 1183 NextLine(); 1184 return true; 1185 } else if (lineClass.kind == 1186 LineClassification::Kind::ConditionalCompilationDirective || 1187 lineClass.kind == LineClassification::Kind::PreprocessorDirective) { 1188 // Allow conditional compilation directives (e.g., #ifdef) to affect 1189 // continuation lines. 1190 // Allow other preprocessor directives, too, except #include 1191 // (when it does not follow '&'), #define, and #undef (because 1192 // they cannot be allowed to affect preceding text on a 1193 // continued line). 1194 preprocessor_.Directive(TokenizePreprocessorDirective(), *this); 1195 return true; 1196 } else if (afterAmpersand && 1197 (lineClass.kind == LineClassification::Kind::DefinitionDirective || 1198 lineClass.kind == LineClassification::Kind::IncludeDirective || 1199 lineClass.kind == LineClassification::Kind::IncludeLine)) { 1200 SkipToEndOfLine(); 1201 omitNewline_ = true; 1202 skipLeadingAmpersand_ = true; 1203 } 1204 } 1205 return false; 1206 } 1207 1208 const char *Prescanner::FixedFormContinuationLine(bool mightNeedSpace) { 1209 if (IsAtEnd()) { 1210 return nullptr; 1211 } 1212 tabInCurrentLine_ = false; 1213 char col1{*nextLine_}; 1214 if (IsFixedFormCommentChar(col1)) { 1215 int j{1}; 1216 if (InCompilerDirective()) { 1217 // Must be a continued compiler directive. 1218 for (; j < 5; ++j) { 1219 char ch{directiveSentinel_[j - 1]}; 1220 if (ch == '\0') { 1221 break; 1222 } 1223 if (ch != ToLowerCaseLetter(nextLine_[j])) { 1224 return nullptr; 1225 } 1226 } 1227 } else if (features_.IsEnabled(LanguageFeature::OpenMP)) { 1228 // Fixed Source Form Conditional Compilation Sentinels. 1229 if (nextLine_[1] != '$') { 1230 return nullptr; 1231 } 1232 j++; 1233 } else { 1234 return nullptr; 1235 } 1236 for (; j < 5; ++j) { 1237 if (nextLine_[j] != ' ') { 1238 return nullptr; 1239 } 1240 } 1241 const char *col6{nextLine_ + 5}; 1242 if (*col6 != '\n' && *col6 != '0' && !IsSpaceOrTab(col6)) { 1243 if (mightNeedSpace && !IsSpace(nextLine_ + 6)) { 1244 insertASpace_ = true; 1245 } 1246 return nextLine_ + 6; 1247 } 1248 return nullptr; 1249 } else { 1250 // Normal case: not in a compiler directive. 1251 if (col1 == '&' && 1252 features_.IsEnabled( 1253 LanguageFeature::FixedFormContinuationWithColumn1Ampersand)) { 1254 // Extension: '&' as continuation marker 1255 if (features_.ShouldWarn( 1256 LanguageFeature::FixedFormContinuationWithColumn1Ampersand)) { 1257 Say(LanguageFeature::FixedFormContinuationWithColumn1Ampersand, 1258 GetProvenance(nextLine_), "nonstandard usage"_port_en_US); 1259 } 1260 return nextLine_ + 1; 1261 } 1262 if (col1 == '\t' && nextLine_[1] >= '1' && nextLine_[1] <= '9') { 1263 tabInCurrentLine_ = true; 1264 return nextLine_ + 2; // VAX extension 1265 } 1266 if ((col1 == ' ' || 1267 ((col1 == 'D' || col1 == 'd') && 1268 features_.IsEnabled(LanguageFeature::OldDebugLines))) && 1269 nextLine_[1] == ' ' && nextLine_[2] == ' ' && nextLine_[3] == ' ' && 1270 nextLine_[4] == ' ') { 1271 const char *col6{nextLine_ + 5}; 1272 if (*col6 != '\n' && *col6 != '0' && !IsSpaceOrTab(col6)) { 1273 if ((*col6 == 'i' || *col6 == 'I') && IsIncludeLine(nextLine_)) { 1274 // It's An INCLUDE line, not a continuation 1275 } else { 1276 return nextLine_ + 6; 1277 } 1278 } 1279 } 1280 if (IsImplicitContinuation()) { 1281 return nextLine_; 1282 } 1283 } 1284 return nullptr; // not a continuation line 1285 } 1286 1287 const char *Prescanner::FreeFormContinuationLine(bool ampersand) { 1288 const char *p{nextLine_}; 1289 if (p >= limit_) { 1290 return nullptr; 1291 } 1292 p = SkipWhiteSpace(p); 1293 if (*p == '!') { 1294 ++p; 1295 if (InCompilerDirective()) { 1296 for (const char *s{directiveSentinel_}; *s != '\0'; ++p, ++s) { 1297 if (*s != ToLowerCaseLetter(*p)) { 1298 return nullptr; 1299 } 1300 } 1301 } else if (features_.IsEnabled(LanguageFeature::OpenMP) && *p == '$') { 1302 ++p; 1303 } else { 1304 return nullptr; 1305 } 1306 p = SkipWhiteSpace(p); 1307 if (*p == '&') { 1308 if (!ampersand) { 1309 insertASpace_ = true; 1310 } 1311 return p + 1; 1312 } else if (ampersand) { 1313 return p; 1314 } else { 1315 return nullptr; 1316 } 1317 } else { 1318 if (*p == '&') { 1319 return p + 1; 1320 } else if (*p == '!' || *p == '\n' || *p == '#') { 1321 return nullptr; 1322 } else if (ampersand || IsImplicitContinuation()) { 1323 if (continuationInCharLiteral_) { 1324 // 'a'& -> 'a''b' == "a'b" 1325 // 'b' 1326 if (features_.ShouldWarn( 1327 common::LanguageFeature::MiscSourceExtensions)) { 1328 Say(common::LanguageFeature::MiscSourceExtensions, 1329 GetProvenanceRange(p, p + 1), 1330 "Character literal continuation line should have been preceded by '&'"_port_en_US); 1331 } 1332 } else if (p > nextLine_) { 1333 --p; 1334 } else { 1335 insertASpace_ = true; 1336 } 1337 return p; 1338 } else { 1339 return nullptr; 1340 } 1341 } 1342 } 1343 1344 bool Prescanner::FixedFormContinuation(bool mightNeedSpace) { 1345 // N.B. We accept '&' as a continuation indicator in fixed form, too, 1346 // but not in a character literal. 1347 if (*at_ == '&' && inCharLiteral_) { 1348 return false; 1349 } 1350 do { 1351 if (const char *cont{FixedFormContinuationLine(mightNeedSpace)}) { 1352 BeginSourceLine(cont); 1353 column_ = 7; 1354 NextLine(); 1355 return true; 1356 } 1357 } while (SkipCommentLine(false /* not after ampersand */)); 1358 return false; 1359 } 1360 1361 bool Prescanner::FreeFormContinuation() { 1362 const char *p{at_}; 1363 bool ampersand{*p == '&'}; 1364 if (ampersand) { 1365 p = SkipWhiteSpace(p + 1); 1366 } 1367 if (*p != '\n') { 1368 if (inCharLiteral_) { 1369 return false; 1370 } else if (*p == '!') { // & ! comment - ok 1371 } else if (ampersand && isPossibleMacroCall_ && (*p == ',' || *p == ')')) { 1372 return false; // allow & at end of a macro argument 1373 } else if (features_.ShouldWarn(LanguageFeature::CruftAfterAmpersand)) { 1374 Say(LanguageFeature::CruftAfterAmpersand, GetProvenance(p), 1375 "missing ! before comment after &"_warn_en_US); 1376 } 1377 } 1378 do { 1379 if (const char *cont{FreeFormContinuationLine(ampersand)}) { 1380 BeginSourceLine(cont); 1381 NextLine(); 1382 return true; 1383 } 1384 } while (SkipCommentLine(ampersand)); 1385 return false; 1386 } 1387 1388 // Implicit line continuation allows a preprocessor macro call with 1389 // arguments to span multiple lines. 1390 bool Prescanner::IsImplicitContinuation() const { 1391 return !inPreprocessorDirective_ && !inCharLiteral_ && isPossibleMacroCall_ && 1392 parenthesisNesting_ > 0 && !IsAtEnd() && 1393 ClassifyLine(nextLine_).kind == LineClassification::Kind::Source; 1394 } 1395 1396 bool Prescanner::Continuation(bool mightNeedFixedFormSpace) { 1397 if (disableSourceContinuation_) { 1398 return false; 1399 } else if (*at_ == '\n' || *at_ == '&') { 1400 if (inFixedForm_) { 1401 return FixedFormContinuation(mightNeedFixedFormSpace); 1402 } else { 1403 return FreeFormContinuation(); 1404 } 1405 } else if (*at_ == '\\' && at_ + 2 == nextLine_ && 1406 backslashFreeFormContinuation_ && !inFixedForm_ && nextLine_ < limit_) { 1407 // cpp-like handling of \ at end of a free form source line 1408 BeginSourceLine(nextLine_); 1409 NextLine(); 1410 return true; 1411 } else { 1412 return false; 1413 } 1414 } 1415 1416 std::optional<Prescanner::LineClassification> 1417 Prescanner::IsFixedFormCompilerDirectiveLine(const char *start) const { 1418 const char *p{start}; 1419 char col1{*p++}; 1420 if (!IsFixedFormCommentChar(col1)) { 1421 return std::nullopt; 1422 } 1423 char sentinel[5], *sp{sentinel}; 1424 int column{2}; 1425 for (; column < 6; ++column, ++p) { 1426 if (*p == '\n' || IsSpaceOrTab(p)) { 1427 break; 1428 } 1429 if (sp == sentinel + 1 && sentinel[0] == '$' && IsDecimalDigit(*p)) { 1430 // OpenMP conditional compilation line: leave the label alone 1431 break; 1432 } 1433 *sp++ = ToLowerCaseLetter(*p); 1434 } 1435 if (column == 6) { 1436 if (*p == '0') { 1437 ++p; 1438 } else if (int n{IsSpaceOrTab(p)}) { 1439 p += n; 1440 } else { 1441 // This is a Continuation line, not an initial directive line. 1442 return std::nullopt; 1443 } 1444 } 1445 if (sp == sentinel) { 1446 return std::nullopt; 1447 } 1448 *sp = '\0'; 1449 if (const char *ss{IsCompilerDirectiveSentinel( 1450 sentinel, static_cast<std::size_t>(sp - sentinel))}) { 1451 std::size_t payloadOffset = p - start; 1452 return {LineClassification{ 1453 LineClassification::Kind::CompilerDirective, payloadOffset, ss}}; 1454 } 1455 return std::nullopt; 1456 } 1457 1458 std::optional<Prescanner::LineClassification> 1459 Prescanner::IsFreeFormCompilerDirectiveLine(const char *start) const { 1460 if (const char *p{SkipWhiteSpace(start)}; p && *p++ == '!') { 1461 if (auto maybePair{IsCompilerDirectiveSentinel(p)}) { 1462 auto offset{static_cast<std::size_t>(maybePair->second - start)}; 1463 return {LineClassification{LineClassification::Kind::CompilerDirective, 1464 offset, maybePair->first}}; 1465 } 1466 } 1467 return std::nullopt; 1468 } 1469 1470 Prescanner &Prescanner::AddCompilerDirectiveSentinel(const std::string &dir) { 1471 std::uint64_t packed{0}; 1472 for (char ch : dir) { 1473 packed = (packed << 8) | (ToLowerCaseLetter(ch) & 0xff); 1474 } 1475 compilerDirectiveBloomFilter_.set(packed % prime1); 1476 compilerDirectiveBloomFilter_.set(packed % prime2); 1477 compilerDirectiveSentinels_.insert(dir); 1478 return *this; 1479 } 1480 1481 const char *Prescanner::IsCompilerDirectiveSentinel( 1482 const char *sentinel, std::size_t len) const { 1483 std::uint64_t packed{0}; 1484 for (std::size_t j{0}; j < len; ++j) { 1485 packed = (packed << 8) | (sentinel[j] & 0xff); 1486 } 1487 if (len == 0 || !compilerDirectiveBloomFilter_.test(packed % prime1) || 1488 !compilerDirectiveBloomFilter_.test(packed % prime2)) { 1489 return nullptr; 1490 } 1491 const auto iter{compilerDirectiveSentinels_.find(std::string(sentinel, len))}; 1492 return iter == compilerDirectiveSentinels_.end() ? nullptr : iter->c_str(); 1493 } 1494 1495 const char *Prescanner::IsCompilerDirectiveSentinel(CharBlock token) const { 1496 const char *p{token.begin()}; 1497 const char *end{p + token.size()}; 1498 while (p < end && (*p == ' ' || *p == '\n')) { 1499 ++p; 1500 } 1501 if (p < end && *p == '!') { 1502 ++p; 1503 } 1504 while (end > p && (end[-1] == ' ' || end[-1] == '\t')) { 1505 --end; 1506 } 1507 return end > p && IsCompilerDirectiveSentinel(p, end - p) ? p : nullptr; 1508 } 1509 1510 std::optional<std::pair<const char *, const char *>> 1511 Prescanner::IsCompilerDirectiveSentinel(const char *p) const { 1512 char sentinel[8]; 1513 for (std::size_t j{0}; j + 1 < sizeof sentinel && *p != '\n'; ++p, ++j) { 1514 if (int n{*p == '&' ? 1 : IsSpaceOrTab(p)}) { 1515 if (j > 0) { 1516 sentinel[j] = '\0'; 1517 p = SkipWhiteSpace(p + n); 1518 if (*p != '!') { 1519 if (const char *sp{IsCompilerDirectiveSentinel(sentinel, j)}) { 1520 return std::make_pair(sp, p); 1521 } 1522 } 1523 } 1524 break; 1525 } else { 1526 sentinel[j] = ToLowerCaseLetter(*p); 1527 } 1528 } 1529 return std::nullopt; 1530 } 1531 1532 constexpr bool IsDirective(const char *match, const char *dir) { 1533 for (; *match; ++match) { 1534 if (*match != ToLowerCaseLetter(*dir++)) { 1535 return false; 1536 } 1537 } 1538 return true; 1539 } 1540 1541 Prescanner::LineClassification Prescanner::ClassifyLine( 1542 const char *start) const { 1543 if (inFixedForm_) { 1544 if (std::optional<LineClassification> lc{ 1545 IsFixedFormCompilerDirectiveLine(start)}) { 1546 return std::move(*lc); 1547 } 1548 if (IsFixedFormCommentLine(start)) { 1549 return {LineClassification::Kind::Comment}; 1550 } 1551 } else { 1552 if (std::optional<LineClassification> lc{ 1553 IsFreeFormCompilerDirectiveLine(start)}) { 1554 return std::move(*lc); 1555 } 1556 if (const char *bang{IsFreeFormComment(start)}) { 1557 return {LineClassification::Kind::Comment, 1558 static_cast<std::size_t>(bang - start)}; 1559 } 1560 } 1561 if (std::optional<std::size_t> quoteOffset{IsIncludeLine(start)}) { 1562 return {LineClassification::Kind::IncludeLine, *quoteOffset}; 1563 } 1564 if (const char *dir{IsPreprocessorDirectiveLine(start)}) { 1565 if (IsDirective("if", dir) || IsDirective("elif", dir) || 1566 IsDirective("else", dir) || IsDirective("endif", dir)) { 1567 return {LineClassification::Kind::ConditionalCompilationDirective}; 1568 } else if (IsDirective("include", dir)) { 1569 return {LineClassification::Kind::IncludeDirective}; 1570 } else if (IsDirective("define", dir) || IsDirective("undef", dir)) { 1571 return {LineClassification::Kind::DefinitionDirective}; 1572 } else { 1573 return {LineClassification::Kind::PreprocessorDirective}; 1574 } 1575 } 1576 return {LineClassification::Kind::Source}; 1577 } 1578 1579 Prescanner::LineClassification Prescanner::ClassifyLine( 1580 TokenSequence &tokens, Provenance newlineProvenance) const { 1581 // Append a newline temporarily. 1582 tokens.PutNextTokenChar('\n', newlineProvenance); 1583 tokens.CloseToken(); 1584 const char *ppd{tokens.ToCharBlock().begin()}; 1585 LineClassification classification{ClassifyLine(ppd)}; 1586 tokens.pop_back(); // remove the newline 1587 return classification; 1588 } 1589 1590 void Prescanner::SourceFormChange(std::string &&dir) { 1591 if (dir == "!dir$ free") { 1592 inFixedForm_ = false; 1593 } else if (dir == "!dir$ fixed") { 1594 inFixedForm_ = true; 1595 } 1596 } 1597 1598 // Acquire and append compiler directive continuation lines to 1599 // the tokens that constitute a compiler directive, even when those 1600 // directive continuation lines are the result of macro expansion. 1601 // (Not used when neither the original compiler directive line nor 1602 // the directive continuation line result from preprocessing; regular 1603 // line continuation during tokenization handles that normal case.) 1604 bool Prescanner::CompilerDirectiveContinuation( 1605 TokenSequence &tokens, const char *origSentinel) { 1606 if (inFixedForm_ || tokens.empty() || 1607 tokens.TokenAt(tokens.SizeInTokens() - 1) != "&") { 1608 return false; 1609 } 1610 LineClassification followingLine{ClassifyLine(nextLine_)}; 1611 if (followingLine.kind == LineClassification::Kind::Comment) { 1612 nextLine_ += followingLine.payloadOffset; // advance to '!' or newline 1613 NextLine(); 1614 return true; 1615 } 1616 CHECK(origSentinel != nullptr); 1617 directiveSentinel_ = origSentinel; // so InCompilerDirective() is true 1618 const char *nextContinuation{ 1619 followingLine.kind == LineClassification::Kind::CompilerDirective 1620 ? FreeFormContinuationLine(true) 1621 : nullptr}; 1622 if (!nextContinuation && 1623 followingLine.kind != LineClassification::Kind::Source) { 1624 return false; 1625 } 1626 auto origNextLine{nextLine_}; 1627 BeginSourceLine(nextLine_); 1628 NextLine(); 1629 if (nextContinuation) { 1630 // What follows is !DIR$ & xxx; skip over the & so that it 1631 // doesn't cause a spurious continuation. 1632 at_ = nextContinuation; 1633 } else { 1634 // What follows looks like a source line before macro expansion, 1635 // but might become a directive continuation afterwards. 1636 SkipSpaces(); 1637 } 1638 TokenSequence followingTokens; 1639 while (NextToken(followingTokens)) { 1640 } 1641 if (auto followingPrepro{ 1642 preprocessor_.MacroReplacement(followingTokens, *this)}) { 1643 followingTokens = std::move(*followingPrepro); 1644 } 1645 followingTokens.RemoveRedundantBlanks(); 1646 std::size_t startAt{0}; 1647 std::size_t following{followingTokens.SizeInTokens()}; 1648 bool ok{false}; 1649 if (nextContinuation) { 1650 ok = true; 1651 } else { 1652 startAt = 2; 1653 if (startAt < following && followingTokens.TokenAt(0) == "!") { 1654 CharBlock sentinel{followingTokens.TokenAt(1)}; 1655 if (!sentinel.empty() && 1656 std::memcmp(sentinel.begin(), origSentinel, sentinel.size()) == 0) { 1657 ok = true; 1658 while ( 1659 startAt < following && followingTokens.TokenAt(startAt).IsBlank()) { 1660 ++startAt; 1661 } 1662 if (startAt < following && followingTokens.TokenAt(startAt) == "&") { 1663 ++startAt; 1664 } 1665 } 1666 } 1667 } 1668 if (ok) { 1669 tokens.pop_back(); // delete original '&' 1670 tokens.Put(followingTokens, startAt, following - startAt); 1671 tokens.RemoveRedundantBlanks(); 1672 } else { 1673 nextLine_ = origNextLine; 1674 } 1675 return ok; 1676 } 1677 1678 // Similar, but for source line continuation after macro replacement. 1679 bool Prescanner::SourceLineContinuation(TokenSequence &tokens) { 1680 if (!inFixedForm_ && !tokens.empty() && 1681 tokens.TokenAt(tokens.SizeInTokens() - 1) == "&") { 1682 LineClassification followingLine{ClassifyLine(nextLine_)}; 1683 if (followingLine.kind == LineClassification::Kind::Comment) { 1684 nextLine_ += followingLine.payloadOffset; // advance to '!' or newline 1685 NextLine(); 1686 return true; 1687 } else if (const char *nextContinuation{FreeFormContinuationLine(true)}) { 1688 BeginSourceLine(nextLine_); 1689 NextLine(); 1690 TokenSequence followingTokens; 1691 at_ = nextContinuation; 1692 while (NextToken(followingTokens)) { 1693 } 1694 if (auto followingPrepro{ 1695 preprocessor_.MacroReplacement(followingTokens, *this)}) { 1696 followingTokens = std::move(*followingPrepro); 1697 } 1698 followingTokens.RemoveRedundantBlanks(); 1699 tokens.pop_back(); // delete original '&' 1700 tokens.Put(followingTokens); 1701 return true; 1702 } 1703 } 1704 return false; 1705 } 1706 } // namespace Fortran::parser 1707