1 //===-- lib/Parser/prescan.cpp --------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "prescan.h" 10 #include "flang/Common/idioms.h" 11 #include "flang/Parser/characters.h" 12 #include "flang/Parser/message.h" 13 #include "flang/Parser/preprocessor.h" 14 #include "flang/Parser/source.h" 15 #include "flang/Parser/token-sequence.h" 16 #include "llvm/Support/raw_ostream.h" 17 #include <cstddef> 18 #include <cstring> 19 #include <utility> 20 #include <vector> 21 22 namespace Fortran::parser { 23 24 using common::LanguageFeature; 25 26 static constexpr int maxPrescannerNesting{100}; 27 28 Prescanner::Prescanner(Messages &messages, CookedSource &cooked, 29 Preprocessor &preprocessor, common::LanguageFeatureControl lfc) 30 : messages_{messages}, cooked_{cooked}, preprocessor_{preprocessor}, 31 allSources_{preprocessor_.allSources()}, features_{lfc}, 32 backslashFreeFormContinuation_{preprocessor.AnyDefinitions()}, 33 encoding_{allSources_.encoding()} {} 34 35 Prescanner::Prescanner(const Prescanner &that, Preprocessor &prepro, 36 bool isNestedInIncludeDirective) 37 : messages_{that.messages_}, cooked_{that.cooked_}, preprocessor_{prepro}, 38 allSources_{that.allSources_}, features_{that.features_}, 39 preprocessingOnly_{that.preprocessingOnly_}, 40 expandIncludeLines_{that.expandIncludeLines_}, 41 isNestedInIncludeDirective_{isNestedInIncludeDirective}, 42 backslashFreeFormContinuation_{that.backslashFreeFormContinuation_}, 43 inFixedForm_{that.inFixedForm_}, 44 fixedFormColumnLimit_{that.fixedFormColumnLimit_}, 45 encoding_{that.encoding_}, 46 prescannerNesting_{that.prescannerNesting_ + 1}, 47 skipLeadingAmpersand_{that.skipLeadingAmpersand_}, 48 compilerDirectiveBloomFilter_{that.compilerDirectiveBloomFilter_}, 49 compilerDirectiveSentinels_{that.compilerDirectiveSentinels_} {} 50 51 // Returns number of bytes to skip 52 static inline int IsSpace(const char *p) { 53 if (*p == ' ') { 54 return 1; 55 } else if (*p == '\xa0') { // LATIN-1 NBSP non-breaking space 56 return 1; 57 } else if (p[0] == '\xc2' && p[1] == '\xa0') { // UTF-8 NBSP 58 return 2; 59 } else { 60 return 0; 61 } 62 } 63 64 static inline int IsSpaceOrTab(const char *p) { 65 return *p == '\t' ? 1 : IsSpace(p); 66 } 67 68 static inline constexpr bool IsFixedFormCommentChar(char ch) { 69 return ch == '!' || ch == '*' || ch == 'C' || ch == 'c'; 70 } 71 72 static void NormalizeCompilerDirectiveCommentMarker(TokenSequence &dir) { 73 char *p{dir.GetMutableCharData()}; 74 char *limit{p + dir.SizeInChars()}; 75 for (; p < limit; ++p) { 76 if (*p != ' ') { 77 CHECK(IsFixedFormCommentChar(*p)); 78 *p = '!'; 79 return; 80 } 81 } 82 DIE("compiler directive all blank"); 83 } 84 85 void Prescanner::Prescan(ProvenanceRange range) { 86 startProvenance_ = range.start(); 87 start_ = allSources_.GetSource(range); 88 CHECK(start_); 89 limit_ = start_ + range.size(); 90 nextLine_ = start_; 91 const bool beganInFixedForm{inFixedForm_}; 92 if (prescannerNesting_ > maxPrescannerNesting) { 93 Say(GetProvenance(start_), 94 "too many nested INCLUDE/#include files, possibly circular"_err_en_US); 95 return; 96 } 97 while (!IsAtEnd()) { 98 Statement(); 99 } 100 if (inFixedForm_ != beganInFixedForm) { 101 std::string dir{"!dir$ "}; 102 if (beganInFixedForm) { 103 dir += "fixed"; 104 } else { 105 dir += "free"; 106 } 107 dir += '\n'; 108 TokenSequence tokens{dir, allSources_.AddCompilerInsertion(dir).start()}; 109 tokens.Emit(cooked_); 110 } 111 } 112 113 void Prescanner::Statement() { 114 TokenSequence tokens; 115 const char *statementStart{nextLine_}; 116 LineClassification line{ClassifyLine(statementStart)}; 117 switch (line.kind) { 118 case LineClassification::Kind::Comment: 119 nextLine_ += line.payloadOffset; // advance to '!' or newline 120 NextLine(); 121 return; 122 case LineClassification::Kind::IncludeLine: 123 FortranInclude(nextLine_ + line.payloadOffset); 124 NextLine(); 125 return; 126 case LineClassification::Kind::ConditionalCompilationDirective: 127 case LineClassification::Kind::IncludeDirective: 128 preprocessor_.Directive(TokenizePreprocessorDirective(), *this); 129 afterPreprocessingDirective_ = true; 130 skipLeadingAmpersand_ |= !inFixedForm_; 131 return; 132 case LineClassification::Kind::PreprocessorDirective: 133 preprocessor_.Directive(TokenizePreprocessorDirective(), *this); 134 afterPreprocessingDirective_ = true; 135 // Don't set skipLeadingAmpersand_ 136 return; 137 case LineClassification::Kind::DefinitionDirective: 138 preprocessor_.Directive(TokenizePreprocessorDirective(), *this); 139 // Don't set afterPreprocessingDirective_ or skipLeadingAmpersand_ 140 return; 141 case LineClassification::Kind::CompilerDirective: { 142 directiveSentinel_ = line.sentinel; 143 CHECK(InCompilerDirective()); 144 BeginStatementAndAdvance(); 145 if (inFixedForm_) { 146 CHECK(IsFixedFormCommentChar(*at_)); 147 } else { 148 while (int n{IsSpaceOrTab(at_)}) { 149 at_ += n, ++column_; 150 } 151 CHECK(*at_ == '!'); 152 } 153 std::optional<int> condOffset; 154 if (directiveSentinel_[0] == '$' && directiveSentinel_[1] == '\0') { 155 // OpenMP conditional compilation line. 156 condOffset = 2; 157 } else if (directiveSentinel_[0] == '@' && directiveSentinel_[1] == 'c' && 158 directiveSentinel_[2] == 'u' && directiveSentinel_[3] == 'f' && 159 directiveSentinel_[4] == '\0') { 160 // CUDA conditional compilation line. 161 condOffset = 5; 162 } 163 if (condOffset) { 164 at_ += *condOffset, column_ += *condOffset; 165 if (auto payload{IsIncludeLine(at_)}) { 166 FortranInclude(at_ + *payload); 167 return; 168 } else if (inFixedForm_) { 169 LabelField(tokens); 170 } else { 171 SkipSpaces(); 172 } 173 } else { 174 // Compiler directive. Emit normalized sentinel, squash following spaces. 175 EmitChar(tokens, '!'); 176 ++at_, ++column_; 177 for (const char *sp{directiveSentinel_}; *sp != '\0'; 178 ++sp, ++at_, ++column_) { 179 EmitChar(tokens, *sp); 180 } 181 if (IsSpaceOrTab(at_)) { 182 EmitChar(tokens, ' '); 183 while (int n{IsSpaceOrTab(at_)}) { 184 at_ += n, ++column_; 185 } 186 } 187 tokens.CloseToken(); 188 } 189 break; 190 } 191 case LineClassification::Kind::Source: { 192 BeginStatementAndAdvance(); 193 bool checkLabelField{false}; 194 if (inFixedForm_) { 195 if (features_.IsEnabled(LanguageFeature::OldDebugLines) && 196 (*at_ == 'D' || *at_ == 'd')) { 197 NextChar(); 198 } 199 checkLabelField = true; 200 } else { 201 if (skipLeadingAmpersand_) { 202 skipLeadingAmpersand_ = false; 203 const char *p{SkipWhiteSpace(at_)}; 204 if (p < limit_ && *p == '&') { 205 column_ += ++p - at_; 206 at_ = p; 207 } 208 } else { 209 SkipSpaces(); 210 } 211 } 212 // Check for a leading identifier that might be a keyword macro 213 // that will expand to anything indicating a non-source line, like 214 // a comment marker or directive sentinel. If so, disable line 215 // continuation, so that NextToken() won't consume anything from 216 // following lines. 217 if (IsLegalIdentifierStart(*at_)) { 218 // TODO: Only bother with these cases when any keyword macro has 219 // been defined with replacement text that could begin a comment 220 // or directive sentinel. 221 const char *p{at_}; 222 while (IsLegalInIdentifier(*++p)) { 223 } 224 CharBlock id{at_, static_cast<std::size_t>(p - at_)}; 225 if (preprocessor_.IsNameDefined(id) && 226 !preprocessor_.IsFunctionLikeDefinition(id)) { 227 checkLabelField = false; 228 TokenSequence toks; 229 toks.Put(id, GetProvenance(at_)); 230 if (auto replaced{preprocessor_.MacroReplacement(toks, *this)}) { 231 auto newLineClass{ClassifyLine(*replaced, GetCurrentProvenance())}; 232 if (newLineClass.kind == 233 LineClassification::Kind::CompilerDirective) { 234 directiveSentinel_ = newLineClass.sentinel; 235 disableSourceContinuation_ = false; 236 } else { 237 disableSourceContinuation_ = 238 newLineClass.kind != LineClassification::Kind::Source; 239 } 240 } 241 } 242 } 243 if (checkLabelField) { 244 LabelField(tokens); 245 } 246 } break; 247 } 248 249 while (NextToken(tokens)) { 250 } 251 if (continuationLines_ > 255) { 252 if (features_.ShouldWarn(common::LanguageFeature::MiscSourceExtensions)) { 253 Say(common::LanguageFeature::MiscSourceExtensions, 254 GetProvenance(statementStart), 255 "%d continuation lines is more than the Fortran standard allows"_port_en_US, 256 continuationLines_); 257 } 258 } 259 260 Provenance newlineProvenance{GetCurrentProvenance()}; 261 if (std::optional<TokenSequence> preprocessed{ 262 preprocessor_.MacroReplacement(tokens, *this)}) { 263 // Reprocess the preprocessed line. 264 LineClassification ppl{ClassifyLine(*preprocessed, newlineProvenance)}; 265 switch (ppl.kind) { 266 case LineClassification::Kind::Comment: 267 break; 268 case LineClassification::Kind::IncludeLine: 269 FortranInclude(preprocessed->TokenAt(0).begin() + ppl.payloadOffset); 270 break; 271 case LineClassification::Kind::ConditionalCompilationDirective: 272 case LineClassification::Kind::IncludeDirective: 273 case LineClassification::Kind::DefinitionDirective: 274 case LineClassification::Kind::PreprocessorDirective: 275 if (features_.ShouldWarn(common::UsageWarning::Preprocessing)) { 276 Say(common::UsageWarning::Preprocessing, 277 preprocessed->GetProvenanceRange(), 278 "Preprocessed line resembles a preprocessor directive"_warn_en_US); 279 } 280 CheckAndEmitLine(preprocessed->ToLowerCase(), newlineProvenance); 281 break; 282 case LineClassification::Kind::CompilerDirective: 283 if (preprocessed->HasRedundantBlanks()) { 284 preprocessed->RemoveRedundantBlanks(); 285 } 286 while (CompilerDirectiveContinuation(*preprocessed, ppl.sentinel)) { 287 newlineProvenance = GetCurrentProvenance(); 288 } 289 NormalizeCompilerDirectiveCommentMarker(*preprocessed); 290 preprocessed->ToLowerCase(); 291 SourceFormChange(preprocessed->ToString()); 292 CheckAndEmitLine(preprocessed->ToLowerCase().ClipComment( 293 *this, true /* skip first ! */), 294 newlineProvenance); 295 break; 296 case LineClassification::Kind::Source: 297 if (inFixedForm_) { 298 if (!preprocessingOnly_ && preprocessed->HasBlanks()) { 299 preprocessed->RemoveBlanks(); 300 } 301 } else { 302 while (SourceLineContinuation(*preprocessed)) { 303 newlineProvenance = GetCurrentProvenance(); 304 } 305 if (preprocessed->HasRedundantBlanks()) { 306 preprocessed->RemoveRedundantBlanks(); 307 } 308 } 309 CheckAndEmitLine( 310 preprocessed->ToLowerCase().ClipComment(*this), newlineProvenance); 311 break; 312 } 313 } else { // no macro replacement 314 if (line.kind == LineClassification::Kind::CompilerDirective) { 315 while (CompilerDirectiveContinuation(tokens, line.sentinel)) { 316 newlineProvenance = GetCurrentProvenance(); 317 } 318 tokens.ToLowerCase(); 319 SourceFormChange(tokens.ToString()); 320 } else { // Kind::Source 321 tokens.ToLowerCase(); 322 if (inFixedForm_) { 323 EnforceStupidEndStatementRules(tokens); 324 } 325 } 326 CheckAndEmitLine(tokens, newlineProvenance); 327 } 328 directiveSentinel_ = nullptr; 329 } 330 331 void Prescanner::CheckAndEmitLine( 332 TokenSequence &tokens, Provenance newlineProvenance) { 333 tokens.CheckBadFortranCharacters( 334 messages_, *this, disableSourceContinuation_); 335 // Parenthesis nesting check does not apply while any #include is 336 // active, nor on the lines before and after a top-level #include, 337 // nor before or after conditional source. 338 // Applications play shenanigans with line continuation before and 339 // after #include'd subprogram argument lists and conditional source. 340 if (!isNestedInIncludeDirective_ && !omitNewline_ && 341 !afterPreprocessingDirective_ && tokens.BadlyNestedParentheses() && 342 !preprocessor_.InConditional()) { 343 if (nextLine_ < limit_ && IsPreprocessorDirectiveLine(nextLine_)) { 344 // don't complain 345 } else { 346 tokens.CheckBadParentheses(messages_); 347 } 348 } 349 tokens.Emit(cooked_); 350 if (omitNewline_) { 351 omitNewline_ = false; 352 } else { 353 cooked_.Put('\n', newlineProvenance); 354 afterPreprocessingDirective_ = false; 355 } 356 } 357 358 TokenSequence Prescanner::TokenizePreprocessorDirective() { 359 CHECK(!IsAtEnd() && !inPreprocessorDirective_); 360 inPreprocessorDirective_ = true; 361 BeginStatementAndAdvance(); 362 TokenSequence tokens; 363 while (NextToken(tokens)) { 364 } 365 inPreprocessorDirective_ = false; 366 return tokens; 367 } 368 369 void Prescanner::NextLine() { 370 void *vstart{static_cast<void *>(const_cast<char *>(nextLine_))}; 371 void *v{std::memchr(vstart, '\n', limit_ - nextLine_)}; 372 if (!v) { 373 nextLine_ = limit_; 374 } else { 375 const char *nl{const_cast<const char *>(static_cast<char *>(v))}; 376 nextLine_ = nl + 1; 377 } 378 } 379 380 void Prescanner::LabelField(TokenSequence &token) { 381 int outCol{1}; 382 const char *start{at_}; 383 std::optional<int> badColumn; 384 for (; *at_ != '\n' && column_ <= 6; ++at_) { 385 if (*at_ == '\t') { 386 ++at_; 387 column_ = 7; 388 break; 389 } 390 if (int n{IsSpace(at_)}; n == 0 && 391 !(*at_ == '0' && column_ == 6)) { // '0' in column 6 becomes space 392 EmitChar(token, *at_); 393 ++outCol; 394 if (!badColumn && (column_ == 6 || !IsDecimalDigit(*at_))) { 395 badColumn = column_; 396 } 397 } 398 ++column_; 399 } 400 if (badColumn && !preprocessor_.IsNameDefined(token.CurrentOpenToken())) { 401 if ((prescannerNesting_ > 0 && *badColumn == 6 && 402 cooked_.BufferedBytes() == firstCookedCharacterOffset_) || 403 afterPreprocessingDirective_) { 404 // This is the first source line in #include'd text or conditional 405 // code under #if, or the first source line after such. 406 // If it turns out that the preprocessed text begins with a 407 // fixed form continuation line, the newline at the end 408 // of the latest source line beforehand will be deleted in 409 // CookedSource::Marshal(). 410 cooked_.MarkPossibleFixedFormContinuation(); 411 } else if (features_.ShouldWarn(common::UsageWarning::Scanning)) { 412 Say(common::UsageWarning::Scanning, GetProvenance(start + *badColumn - 1), 413 *badColumn == 6 414 ? "Statement should not begin with a continuation line"_warn_en_US 415 : "Character in fixed-form label field must be a digit"_warn_en_US); 416 } 417 token.clear(); 418 if (*badColumn < 6) { 419 at_ = start; 420 column_ = 1; 421 return; 422 } 423 outCol = 1; 424 } 425 if (outCol == 1) { // empty label field 426 // Emit a space so that, if the line is rescanned after preprocessing, 427 // a leading 'C' or 'D' won't be left-justified and then accidentally 428 // misinterpreted as a comment card. 429 EmitChar(token, ' '); 430 ++outCol; 431 } 432 token.CloseToken(); 433 SkipToNextSignificantCharacter(); 434 if (IsDecimalDigit(*at_)) { 435 if (features_.ShouldWarn(common::LanguageFeature::MiscSourceExtensions)) { 436 Say(common::LanguageFeature::MiscSourceExtensions, GetCurrentProvenance(), 437 "Label digit is not in fixed-form label field"_port_en_US); 438 } 439 } 440 } 441 442 // 6.3.3.5: A program unit END statement, or any other statement whose 443 // initial line resembles an END statement, shall not be continued in 444 // fixed form source. 445 void Prescanner::EnforceStupidEndStatementRules(const TokenSequence &tokens) { 446 CharBlock cBlock{tokens.ToCharBlock()}; 447 const char *str{cBlock.begin()}; 448 std::size_t n{cBlock.size()}; 449 if (n < 3) { 450 return; 451 } 452 std::size_t j{0}; 453 for (; j < n && (str[j] == ' ' || (str[j] >= '0' && str[j] <= '9')); ++j) { 454 } 455 if (j + 3 > n || std::memcmp(str + j, "end", 3) != 0) { 456 return; 457 } 458 // It starts with END, possibly after a label. 459 auto start{allSources_.GetSourcePosition(tokens.GetCharProvenance(j))}; 460 auto end{allSources_.GetSourcePosition(tokens.GetCharProvenance(n - 1))}; 461 if (!start || !end) { 462 return; 463 } 464 if (&*start->sourceFile == &*end->sourceFile && start->line == end->line) { 465 return; // no continuation 466 } 467 j += 3; 468 static const char *const prefixes[]{"program", "subroutine", "function", 469 "blockdata", "module", "submodule", nullptr}; 470 bool isPrefix{j == n || !IsLegalInIdentifier(str[j])}; // prefix is END 471 std::size_t endOfPrefix{j - 1}; 472 for (const char *const *p{prefixes}; *p; ++p) { 473 std::size_t pLen{std::strlen(*p)}; 474 if (j + pLen <= n && std::memcmp(str + j, *p, pLen) == 0) { 475 isPrefix = true; // END thing as prefix 476 j += pLen; 477 endOfPrefix = j - 1; 478 for (; j < n && IsLegalInIdentifier(str[j]); ++j) { 479 } 480 break; 481 } 482 } 483 if (isPrefix) { 484 auto range{tokens.GetTokenProvenanceRange(1)}; 485 if (j == n) { // END or END thing [name] 486 Say(range, 487 "Program unit END statement may not be continued in fixed form source"_err_en_US); 488 } else { 489 auto endOfPrefixPos{ 490 allSources_.GetSourcePosition(tokens.GetCharProvenance(endOfPrefix))}; 491 auto next{allSources_.GetSourcePosition(tokens.GetCharProvenance(j))}; 492 if (endOfPrefixPos && next && 493 &*endOfPrefixPos->sourceFile == &*start->sourceFile && 494 endOfPrefixPos->line == start->line && 495 (&*next->sourceFile != &*start->sourceFile || 496 next->line != start->line)) { 497 Say(range, 498 "Initial line of continued statement must not appear to be a program unit END in fixed form source"_err_en_US); 499 } 500 } 501 } 502 } 503 504 void Prescanner::SkipToEndOfLine() { 505 while (*at_ != '\n') { 506 ++at_, ++column_; 507 } 508 } 509 510 bool Prescanner::MustSkipToEndOfLine() const { 511 if (inFixedForm_ && column_ > fixedFormColumnLimit_ && !tabInCurrentLine_) { 512 return true; // skip over ignored columns in right margin (73:80) 513 } else if (*at_ == '!' && !inCharLiteral_) { 514 return !IsCompilerDirectiveSentinel(at_); 515 } else { 516 return false; 517 } 518 } 519 520 void Prescanner::NextChar() { 521 CHECK(*at_ != '\n'); 522 int n{IsSpace(at_)}; 523 at_ += n ? n : 1; 524 ++column_; 525 while (at_[0] == '\xef' && at_[1] == '\xbb' && at_[2] == '\xbf') { 526 // UTF-8 byte order mark - treat this file as UTF-8 527 at_ += 3; 528 encoding_ = Encoding::UTF_8; 529 } 530 SkipToNextSignificantCharacter(); 531 } 532 533 // Skip everything that should be ignored until the next significant 534 // character is reached; handles C-style comments in preprocessing 535 // directives, Fortran ! comments, stuff after the right margin in 536 // fixed form, and all forms of line continuation. 537 bool Prescanner::SkipToNextSignificantCharacter() { 538 auto anyContinuationLine{false}; 539 if (inPreprocessorDirective_) { 540 SkipCComments(); 541 } else { 542 bool mightNeedSpace{false}; 543 if (MustSkipToEndOfLine()) { 544 SkipToEndOfLine(); 545 } else { 546 mightNeedSpace = *at_ == '\n'; 547 } 548 for (; Continuation(mightNeedSpace); mightNeedSpace = false) { 549 anyContinuationLine = true; 550 ++continuationLines_; 551 if (MustSkipToEndOfLine()) { 552 SkipToEndOfLine(); 553 } 554 } 555 if (*at_ == '\t') { 556 tabInCurrentLine_ = true; 557 } 558 } 559 return anyContinuationLine; 560 } 561 562 void Prescanner::SkipCComments() { 563 while (true) { 564 if (IsCComment(at_)) { 565 if (const char *after{SkipCComment(at_)}) { 566 column_ += after - at_; 567 // May have skipped over one or more newlines; relocate the start of 568 // the next line. 569 nextLine_ = at_ = after; 570 NextLine(); 571 } else { 572 // Don't emit any messages about unclosed C-style comments, because 573 // the sequence /* can appear legally in a FORMAT statement. There's 574 // no ambiguity, since the sequence */ cannot appear legally. 575 break; 576 } 577 } else if (inPreprocessorDirective_ && at_[0] == '\\' && at_ + 2 < limit_ && 578 at_[1] == '\n' && !IsAtEnd()) { 579 BeginSourceLineAndAdvance(); 580 } else { 581 break; 582 } 583 } 584 } 585 586 void Prescanner::SkipSpaces() { 587 while (IsSpaceOrTab(at_)) { 588 NextChar(); 589 } 590 insertASpace_ = false; 591 } 592 593 const char *Prescanner::SkipWhiteSpace(const char *p) { 594 while (int n{IsSpaceOrTab(p)}) { 595 p += n; 596 } 597 return p; 598 } 599 600 const char *Prescanner::SkipWhiteSpaceAndCComments(const char *p) const { 601 while (true) { 602 if (int n{IsSpaceOrTab(p)}) { 603 p += n; 604 } else if (IsCComment(p)) { 605 if (const char *after{SkipCComment(p)}) { 606 p = after; 607 } else { 608 break; 609 } 610 } else { 611 break; 612 } 613 } 614 return p; 615 } 616 617 const char *Prescanner::SkipCComment(const char *p) const { 618 char star{' '}, slash{' '}; 619 p += 2; 620 while (star != '*' || slash != '/') { 621 if (p >= limit_) { 622 return nullptr; // signifies an unterminated comment 623 } 624 star = slash; 625 slash = *p++; 626 } 627 return p; 628 } 629 630 bool Prescanner::NextToken(TokenSequence &tokens) { 631 CHECK(at_ >= start_ && at_ < limit_); 632 if (InFixedFormSource() && !preprocessingOnly_) { 633 SkipSpaces(); 634 } else { 635 if (*at_ == '/' && IsCComment(at_)) { 636 // Recognize and skip over classic C style /*comments*/ when 637 // outside a character literal. 638 if (features_.ShouldWarn(LanguageFeature::ClassicCComments)) { 639 Say(LanguageFeature::ClassicCComments, GetCurrentProvenance(), 640 "nonstandard usage: C-style comment"_port_en_US); 641 } 642 SkipCComments(); 643 } 644 if (IsSpaceOrTab(at_)) { 645 // Compress free-form white space into a single space character. 646 const auto theSpace{at_}; 647 char previous{at_ <= start_ ? ' ' : at_[-1]}; 648 NextChar(); 649 SkipSpaces(); 650 if (*at_ == '\n' && !omitNewline_) { 651 // Discard white space at the end of a line. 652 } else if (!inPreprocessorDirective_ && 653 (previous == '(' || *at_ == '(' || *at_ == ')')) { 654 // Discard white space before/after '(' and before ')', unless in a 655 // preprocessor directive. This helps yield space-free contiguous 656 // names for generic interfaces like OPERATOR( + ) and 657 // READ ( UNFORMATTED ), without misinterpreting #define f (notAnArg). 658 // This has the effect of silently ignoring the illegal spaces in 659 // the array constructor ( /1,2/ ) but that seems benign; it's 660 // hard to avoid that while still removing spaces from OPERATOR( / ) 661 // and OPERATOR( // ). 662 } else { 663 // Preserve the squashed white space as a single space character. 664 tokens.PutNextTokenChar(' ', GetProvenance(theSpace)); 665 tokens.CloseToken(); 666 return true; 667 } 668 } 669 } 670 if (insertASpace_) { 671 tokens.PutNextTokenChar(' ', spaceProvenance_); 672 insertASpace_ = false; 673 } 674 if (*at_ == '\n') { 675 return false; 676 } 677 const char *start{at_}; 678 if (*at_ == '\'' || *at_ == '"') { 679 QuotedCharacterLiteral(tokens, start); 680 preventHollerith_ = false; 681 } else if (IsDecimalDigit(*at_)) { 682 int n{0}, digits{0}; 683 static constexpr int maxHollerith{256 /*lines*/ * (132 - 6 /*columns*/)}; 684 do { 685 if (n < maxHollerith) { 686 n = 10 * n + DecimalDigitValue(*at_); 687 } 688 EmitCharAndAdvance(tokens, *at_); 689 ++digits; 690 if (InFixedFormSource()) { 691 SkipSpaces(); 692 } 693 } while (IsDecimalDigit(*at_)); 694 if ((*at_ == 'h' || *at_ == 'H') && n > 0 && n < maxHollerith && 695 !preventHollerith_) { 696 Hollerith(tokens, n, start); 697 } else if (*at_ == '.') { 698 while (IsDecimalDigit(EmitCharAndAdvance(tokens, *at_))) { 699 } 700 ExponentAndKind(tokens); 701 } else if (ExponentAndKind(tokens)) { 702 } else if (digits == 1 && n == 0 && (*at_ == 'x' || *at_ == 'X') && 703 inPreprocessorDirective_) { 704 do { 705 EmitCharAndAdvance(tokens, *at_); 706 } while (IsHexadecimalDigit(*at_)); 707 } else if (at_[0] == '_' && (at_[1] == '\'' || at_[1] == '"')) { // 4_"..." 708 EmitCharAndAdvance(tokens, *at_); 709 QuotedCharacterLiteral(tokens, start); 710 } else if (IsLetter(*at_) && !preventHollerith_ && 711 parenthesisNesting_ > 0) { 712 // Handles FORMAT(3I9HHOLLERITH) by skipping over the first I so that 713 // we don't misrecognize I9HOLLERITH as an identifier in the next case. 714 EmitCharAndAdvance(tokens, *at_); 715 } 716 preventHollerith_ = false; 717 } else if (*at_ == '.') { 718 char nch{EmitCharAndAdvance(tokens, '.')}; 719 if (!inPreprocessorDirective_ && IsDecimalDigit(nch)) { 720 while (IsDecimalDigit(EmitCharAndAdvance(tokens, *at_))) { 721 } 722 ExponentAndKind(tokens); 723 } else if (nch == '.' && EmitCharAndAdvance(tokens, '.') == '.') { 724 EmitCharAndAdvance(tokens, '.'); // variadic macro definition ellipsis 725 } 726 preventHollerith_ = false; 727 } else if (IsLegalInIdentifier(*at_)) { 728 int parts{1}; 729 const char *afterLast{nullptr}; 730 do { 731 EmitChar(tokens, *at_); 732 ++at_, ++column_; 733 afterLast = at_; 734 if (SkipToNextSignificantCharacter() && IsLegalIdentifierStart(*at_)) { 735 tokens.CloseToken(); 736 ++parts; 737 } 738 } while (IsLegalInIdentifier(*at_)); 739 if (parts >= 3) { 740 // Subtlety: When an identifier is split across three or more continuation 741 // lines (or two continuation lines, immediately preceded or followed 742 // by '&' free form continuation line markers, its parts are kept as 743 // distinct pp-tokens so that macro replacement operates on them 744 // independently. This trick accommodates the historic practice of 745 // using line continuation for token pasting after replacement. 746 } else if (parts == 2) { 747 if (afterLast && afterLast < limit_) { 748 afterLast = SkipWhiteSpace(afterLast); 749 } 750 if ((start > start_ && start[-1] == '&') || 751 (afterLast && afterLast < limit_ && 752 (*afterLast == '&' || *afterLast == '\n'))) { 753 // call & call foo& call foo& 754 // &MACRO& OR &MACRO& OR &MACRO 755 // &foo(...) &(...) 756 } else { 757 tokens.ReopenLastToken(); 758 } 759 } 760 if (InFixedFormSource()) { 761 SkipSpaces(); 762 } 763 if ((*at_ == '\'' || *at_ == '"') && 764 tokens.CharAt(tokens.SizeInChars() - 1) == '_') { // kind_"..." 765 QuotedCharacterLiteral(tokens, start); 766 preventHollerith_ = false; 767 } else { 768 preventHollerith_ = true; // DO 10 H = ... 769 } 770 } else if (*at_ == '*') { 771 if (EmitCharAndAdvance(tokens, '*') == '*') { 772 EmitCharAndAdvance(tokens, '*'); 773 } else { 774 // Subtle ambiguity: 775 // CHARACTER*2H declares H because *2 is a kind specifier 776 // DATAC/N*2H / is repeated Hollerith 777 preventHollerith_ = !slashInCurrentStatement_; 778 } 779 } else { 780 char ch{*at_}; 781 if (ch == '(') { 782 if (parenthesisNesting_++ == 0) { 783 isPossibleMacroCall_ = tokens.SizeInTokens() > 0 && 784 preprocessor_.IsFunctionLikeDefinition( 785 tokens.TokenAt(tokens.SizeInTokens() - 1)); 786 } 787 } else if (ch == ')' && parenthesisNesting_ > 0) { 788 --parenthesisNesting_; 789 } 790 char nch{EmitCharAndAdvance(tokens, ch)}; 791 preventHollerith_ = false; 792 if ((nch == '=' && 793 (ch == '<' || ch == '>' || ch == '/' || ch == '=' || ch == '!')) || 794 (ch == nch && 795 (ch == '/' || ch == ':' || ch == '*' || ch == '#' || ch == '&' || 796 ch == '|' || ch == '<' || ch == '>')) || 797 (ch == '=' && nch == '>')) { 798 // token comprises two characters 799 EmitCharAndAdvance(tokens, nch); 800 } else if (ch == '/') { 801 slashInCurrentStatement_ = true; 802 } else if (ch == ';' && InFixedFormSource()) { 803 SkipSpaces(); 804 if (IsDecimalDigit(*at_)) { 805 if (features_.ShouldWarn( 806 common::LanguageFeature::MiscSourceExtensions)) { 807 Say(common::LanguageFeature::MiscSourceExtensions, 808 GetProvenanceRange(at_, at_ + 1), 809 "Label should be in the label field"_port_en_US); 810 } 811 } 812 } 813 } 814 tokens.CloseToken(); 815 return true; 816 } 817 818 bool Prescanner::ExponentAndKind(TokenSequence &tokens) { 819 char ed{ToLowerCaseLetter(*at_)}; 820 if (ed != 'e' && ed != 'd') { 821 return false; 822 } 823 EmitCharAndAdvance(tokens, ed); 824 if (*at_ == '+' || *at_ == '-') { 825 EmitCharAndAdvance(tokens, *at_); 826 } 827 while (IsDecimalDigit(*at_)) { 828 EmitCharAndAdvance(tokens, *at_); 829 } 830 if (*at_ == '_') { 831 while (IsLegalInIdentifier(EmitCharAndAdvance(tokens, *at_))) { 832 } 833 } 834 return true; 835 } 836 837 void Prescanner::QuotedCharacterLiteral( 838 TokenSequence &tokens, const char *start) { 839 char quote{*at_}; 840 const char *end{at_ + 1}; 841 inCharLiteral_ = true; 842 continuationInCharLiteral_ = true; 843 const auto emit{[&](char ch) { EmitChar(tokens, ch); }}; 844 const auto insert{[&](char ch) { EmitInsertedChar(tokens, ch); }}; 845 bool isEscaped{false}; 846 bool escapesEnabled{features_.IsEnabled(LanguageFeature::BackslashEscapes)}; 847 while (true) { 848 if (*at_ == '\\') { 849 if (escapesEnabled) { 850 isEscaped = !isEscaped; 851 } else { 852 // The parser always processes escape sequences, so don't confuse it 853 // when escapes are disabled. 854 insert('\\'); 855 } 856 } else { 857 isEscaped = false; 858 } 859 EmitQuotedChar(static_cast<unsigned char>(*at_), emit, insert, false, 860 Encoding::LATIN_1); 861 while (PadOutCharacterLiteral(tokens)) { 862 } 863 if (*at_ == '\n') { 864 if (!inPreprocessorDirective_) { 865 Say(GetProvenanceRange(start, end), 866 "Incomplete character literal"_err_en_US); 867 } 868 break; 869 } 870 // Here's a weird edge case. When there's a two or more following 871 // continuation lines at this point, and the entire significant part of 872 // the next continuation line is the name of a keyword macro, replace 873 // it in the character literal with its definition. Example: 874 // #define FOO foo 875 // subroutine subr() bind(c, name="my_& 876 // &FOO& 877 // &_bar") ... 878 // produces a binding name of "my_foo_bar". 879 while (at_[1] == '&' && nextLine_ < limit_ && !InFixedFormSource()) { 880 const char *idStart{nextLine_}; 881 if (const char *amper{SkipWhiteSpace(nextLine_)}; *amper == '&') { 882 idStart = amper + 1; 883 } 884 if (IsLegalIdentifierStart(*idStart)) { 885 std::size_t idLen{1}; 886 for (; IsLegalInIdentifier(idStart[idLen]); ++idLen) { 887 } 888 if (idStart[idLen] == '&') { 889 CharBlock id{idStart, idLen}; 890 if (preprocessor_.IsNameDefined(id)) { 891 TokenSequence ppTokens; 892 ppTokens.Put(id, GetProvenance(idStart)); 893 if (auto replaced{ 894 preprocessor_.MacroReplacement(ppTokens, *this)}) { 895 tokens.Put(*replaced); 896 at_ = &idStart[idLen - 1]; 897 NextLine(); 898 continue; // try again on the next line 899 } 900 } 901 } 902 } 903 break; 904 } 905 end = at_ + 1; 906 NextChar(); 907 if (*at_ == quote && !isEscaped) { 908 // A doubled unescaped quote mark becomes a single instance of that 909 // quote character in the literal (later). There can be spaces between 910 // the quotes in fixed form source. 911 EmitChar(tokens, quote); 912 inCharLiteral_ = false; // for cases like print *, '...'!comment 913 NextChar(); 914 if (InFixedFormSource()) { 915 SkipSpaces(); 916 } 917 if (*at_ != quote) { 918 break; 919 } 920 inCharLiteral_ = true; 921 } 922 } 923 continuationInCharLiteral_ = false; 924 inCharLiteral_ = false; 925 } 926 927 void Prescanner::Hollerith( 928 TokenSequence &tokens, int count, const char *start) { 929 inCharLiteral_ = true; 930 CHECK(*at_ == 'h' || *at_ == 'H'); 931 EmitChar(tokens, 'H'); 932 while (count-- > 0) { 933 if (PadOutCharacterLiteral(tokens)) { 934 } else if (*at_ == '\n') { 935 if (features_.ShouldWarn(common::UsageWarning::Scanning)) { 936 Say(common::UsageWarning::Scanning, GetProvenanceRange(start, at_), 937 "Possible truncated Hollerith literal"_warn_en_US); 938 } 939 break; 940 } else { 941 NextChar(); 942 // Each multi-byte character encoding counts as a single character. 943 // No escape sequences are recognized. 944 // Hollerith is always emitted to the cooked character 945 // stream in UTF-8. 946 DecodedCharacter decoded{DecodeCharacter( 947 encoding_, at_, static_cast<std::size_t>(limit_ - at_), false)}; 948 if (decoded.bytes > 0) { 949 EncodedCharacter utf8{ 950 EncodeCharacter<Encoding::UTF_8>(decoded.codepoint)}; 951 for (int j{0}; j < utf8.bytes; ++j) { 952 EmitChar(tokens, utf8.buffer[j]); 953 } 954 at_ += decoded.bytes - 1; 955 } else { 956 Say(GetProvenanceRange(start, at_), 957 "Bad character in Hollerith literal"_err_en_US); 958 break; 959 } 960 } 961 } 962 if (*at_ != '\n') { 963 NextChar(); 964 } 965 inCharLiteral_ = false; 966 } 967 968 // In fixed form, source card images must be processed as if they were at 969 // least 72 columns wide, at least in character literal contexts. 970 bool Prescanner::PadOutCharacterLiteral(TokenSequence &tokens) { 971 while (inFixedForm_ && !tabInCurrentLine_ && at_[1] == '\n') { 972 if (column_ < fixedFormColumnLimit_) { 973 tokens.PutNextTokenChar(' ', spaceProvenance_); 974 ++column_; 975 return true; 976 } 977 if (!FixedFormContinuation(false /*no need to insert space*/) || 978 tabInCurrentLine_) { 979 return false; 980 } 981 CHECK(column_ == 7); 982 --at_; // point to column 6 of continuation line 983 column_ = 6; 984 } 985 return false; 986 } 987 988 static bool IsAtProcess(const char *p) { 989 static const char pAtProc[]{"process"}; 990 for (std::size_t i{0}; i < sizeof pAtProc - 1; ++i) { 991 if (ToLowerCaseLetter(*++p) != pAtProc[i]) 992 return false; 993 } 994 return true; 995 } 996 997 bool Prescanner::IsFixedFormCommentLine(const char *start) const { 998 const char *p{start}; 999 1000 // The @process directive must start in column 1. 1001 if (*p == '@' && IsAtProcess(p)) { 1002 return true; 1003 } 1004 1005 if (IsFixedFormCommentChar(*p) || *p == '%' || // VAX %list, %eject, &c. 1006 ((*p == 'D' || *p == 'd') && 1007 !features_.IsEnabled(LanguageFeature::OldDebugLines))) { 1008 return true; 1009 } 1010 bool anyTabs{false}; 1011 while (true) { 1012 if (int n{IsSpace(p)}) { 1013 p += n; 1014 } else if (*p == '\t') { 1015 anyTabs = true; 1016 ++p; 1017 } else if (*p == '0' && !anyTabs && p == start + 5) { 1018 ++p; // 0 in column 6 must treated as a space 1019 } else { 1020 break; 1021 } 1022 } 1023 if (!anyTabs && p >= start + fixedFormColumnLimit_) { 1024 return true; 1025 } 1026 if (*p == '!' && !inCharLiteral_ && (anyTabs || p != start + 5)) { 1027 return true; 1028 } 1029 return *p == '\n'; 1030 } 1031 1032 const char *Prescanner::IsFreeFormComment(const char *p) const { 1033 p = SkipWhiteSpaceAndCComments(p); 1034 if (*p == '!' || *p == '\n') { 1035 return p; 1036 } else if (*p == '@') { 1037 return IsAtProcess(p) ? p : nullptr; 1038 } else { 1039 return nullptr; 1040 } 1041 } 1042 1043 std::optional<std::size_t> Prescanner::IsIncludeLine(const char *start) const { 1044 if (!expandIncludeLines_) { 1045 return std::nullopt; 1046 } 1047 const char *p{SkipWhiteSpace(start)}; 1048 if (*p == '0' && inFixedForm_ && p == start + 5) { 1049 // Accept " 0INCLUDE" in fixed form. 1050 p = SkipWhiteSpace(p + 1); 1051 } 1052 for (const char *q{"include"}; *q; ++q) { 1053 if (ToLowerCaseLetter(*p) != *q) { 1054 return std::nullopt; 1055 } 1056 p = SkipWhiteSpace(p + 1); 1057 } 1058 if (IsDecimalDigit(*p)) { // accept & ignore a numeric kind prefix 1059 for (p = SkipWhiteSpace(p + 1); IsDecimalDigit(*p); 1060 p = SkipWhiteSpace(p + 1)) { 1061 } 1062 if (*p != '_') { 1063 return std::nullopt; 1064 } 1065 p = SkipWhiteSpace(p + 1); 1066 } 1067 if (*p == '"' || *p == '\'') { 1068 return {p - start}; 1069 } 1070 return std::nullopt; 1071 } 1072 1073 void Prescanner::FortranInclude(const char *firstQuote) { 1074 const char *p{firstQuote}; 1075 while (*p != '"' && *p != '\'') { 1076 ++p; 1077 } 1078 char quote{*p}; 1079 std::string path; 1080 for (++p; *p != '\n'; ++p) { 1081 if (*p == quote) { 1082 if (p[1] != quote) { 1083 break; 1084 } 1085 ++p; 1086 } 1087 path += *p; 1088 } 1089 if (*p != quote) { 1090 Say(GetProvenanceRange(firstQuote, p), 1091 "malformed path name string"_err_en_US); 1092 return; 1093 } 1094 p = SkipWhiteSpace(p + 1); 1095 if (*p != '\n' && *p != '!') { 1096 const char *garbage{p}; 1097 for (; *p != '\n' && *p != '!'; ++p) { 1098 } 1099 if (features_.ShouldWarn(common::UsageWarning::Scanning)) { 1100 Say(common::UsageWarning::Scanning, GetProvenanceRange(garbage, p), 1101 "excess characters after path name"_warn_en_US); 1102 } 1103 } 1104 std::string buf; 1105 llvm::raw_string_ostream error{buf}; 1106 Provenance provenance{GetProvenance(nextLine_)}; 1107 std::optional<std::string> prependPath; 1108 if (const SourceFile * currentFile{allSources_.GetSourceFile(provenance)}) { 1109 prependPath = DirectoryName(currentFile->path()); 1110 } 1111 const SourceFile *included{ 1112 allSources_.Open(path, error, std::move(prependPath))}; 1113 if (!included) { 1114 Say(provenance, "INCLUDE: %s"_err_en_US, buf); 1115 } else if (included->bytes() > 0) { 1116 ProvenanceRange includeLineRange{ 1117 provenance, static_cast<std::size_t>(p - nextLine_)}; 1118 ProvenanceRange fileRange{ 1119 allSources_.AddIncludedFile(*included, includeLineRange)}; 1120 Preprocessor cleanPrepro{allSources_}; 1121 if (preprocessor_.IsNameDefined("__FILE__"s)) { 1122 cleanPrepro.DefineStandardMacros(); // __FILE__, __LINE__, &c. 1123 } 1124 if (preprocessor_.IsNameDefined("_CUDA"s)) { 1125 cleanPrepro.Define("_CUDA"s, "1"); 1126 } 1127 Prescanner{*this, cleanPrepro, /*isNestedInIncludeDirective=*/false} 1128 .set_encoding(included->encoding()) 1129 .Prescan(fileRange); 1130 } 1131 } 1132 1133 const char *Prescanner::IsPreprocessorDirectiveLine(const char *start) const { 1134 const char *p{start}; 1135 while (int n{IsSpace(p)}) { 1136 p += n; 1137 } 1138 if (*p == '#') { 1139 if (inFixedForm_ && p == start + 5) { 1140 return nullptr; 1141 } 1142 } else { 1143 p = SkipWhiteSpace(p); 1144 if (*p != '#') { 1145 return nullptr; 1146 } 1147 } 1148 return SkipWhiteSpace(p + 1); 1149 } 1150 1151 bool Prescanner::IsNextLinePreprocessorDirective() const { 1152 return IsPreprocessorDirectiveLine(nextLine_) != nullptr; 1153 } 1154 1155 bool Prescanner::SkipCommentLine(bool afterAmpersand) { 1156 if (IsAtEnd()) { 1157 if (afterAmpersand && prescannerNesting_ > 0) { 1158 // A continuation marker at the end of the last line in an 1159 // include file inhibits the newline for that line. 1160 SkipToEndOfLine(); 1161 omitNewline_ = true; 1162 } 1163 } else if (inPreprocessorDirective_) { 1164 } else { 1165 auto lineClass{ClassifyLine(nextLine_)}; 1166 if (lineClass.kind == LineClassification::Kind::Comment) { 1167 NextLine(); 1168 return true; 1169 } else if (lineClass.kind == 1170 LineClassification::Kind::ConditionalCompilationDirective || 1171 lineClass.kind == LineClassification::Kind::PreprocessorDirective) { 1172 // Allow conditional compilation directives (e.g., #ifdef) to affect 1173 // continuation lines. 1174 // Allow other preprocessor directives, too, except #include 1175 // (when it does not follow '&'), #define, and #undef (because 1176 // they cannot be allowed to affect preceding text on a 1177 // continued line). 1178 preprocessor_.Directive(TokenizePreprocessorDirective(), *this); 1179 return true; 1180 } else if (afterAmpersand && 1181 (lineClass.kind == LineClassification::Kind::DefinitionDirective || 1182 lineClass.kind == LineClassification::Kind::IncludeDirective || 1183 lineClass.kind == LineClassification::Kind::IncludeLine)) { 1184 SkipToEndOfLine(); 1185 omitNewline_ = true; 1186 skipLeadingAmpersand_ = true; 1187 } 1188 } 1189 return false; 1190 } 1191 1192 const char *Prescanner::FixedFormContinuationLine(bool mightNeedSpace) { 1193 if (IsAtEnd()) { 1194 return nullptr; 1195 } 1196 tabInCurrentLine_ = false; 1197 char col1{*nextLine_}; 1198 if (IsFixedFormCommentChar(col1)) { 1199 int j{1}; 1200 if (InCompilerDirective()) { 1201 // Must be a continued compiler directive. 1202 for (; j < 5; ++j) { 1203 char ch{directiveSentinel_[j - 1]}; 1204 if (ch == '\0') { 1205 break; 1206 } 1207 if (ch != ToLowerCaseLetter(nextLine_[j])) { 1208 return nullptr; 1209 } 1210 } 1211 } else if (features_.IsEnabled(LanguageFeature::OpenMP)) { 1212 // Fixed Source Form Conditional Compilation Sentinels. 1213 if (nextLine_[1] != '$') { 1214 return nullptr; 1215 } 1216 j++; 1217 } else { 1218 return nullptr; 1219 } 1220 for (; j < 5; ++j) { 1221 if (nextLine_[j] != ' ') { 1222 return nullptr; 1223 } 1224 } 1225 const char *col6{nextLine_ + 5}; 1226 if (*col6 != '\n' && *col6 != '0' && !IsSpaceOrTab(col6)) { 1227 if (mightNeedSpace && !IsSpace(nextLine_ + 6)) { 1228 insertASpace_ = true; 1229 } 1230 return nextLine_ + 6; 1231 } 1232 return nullptr; 1233 } else { 1234 // Normal case: not in a compiler directive. 1235 if (col1 == '&' && 1236 features_.IsEnabled( 1237 LanguageFeature::FixedFormContinuationWithColumn1Ampersand)) { 1238 // Extension: '&' as continuation marker 1239 if (features_.ShouldWarn( 1240 LanguageFeature::FixedFormContinuationWithColumn1Ampersand)) { 1241 Say(LanguageFeature::FixedFormContinuationWithColumn1Ampersand, 1242 GetProvenance(nextLine_), "nonstandard usage"_port_en_US); 1243 } 1244 return nextLine_ + 1; 1245 } 1246 if (col1 == '\t' && nextLine_[1] >= '1' && nextLine_[1] <= '9') { 1247 tabInCurrentLine_ = true; 1248 return nextLine_ + 2; // VAX extension 1249 } 1250 if ((col1 == ' ' || 1251 ((col1 == 'D' || col1 == 'd') && 1252 features_.IsEnabled(LanguageFeature::OldDebugLines))) && 1253 nextLine_[1] == ' ' && nextLine_[2] == ' ' && nextLine_[3] == ' ' && 1254 nextLine_[4] == ' ') { 1255 const char *col6{nextLine_ + 5}; 1256 if (*col6 != '\n' && *col6 != '0' && !IsSpaceOrTab(col6)) { 1257 if ((*col6 == 'i' || *col6 == 'I') && IsIncludeLine(nextLine_)) { 1258 // It's An INCLUDE line, not a continuation 1259 } else { 1260 return nextLine_ + 6; 1261 } 1262 } 1263 } 1264 if (IsImplicitContinuation()) { 1265 return nextLine_; 1266 } 1267 } 1268 return nullptr; // not a continuation line 1269 } 1270 1271 const char *Prescanner::FreeFormContinuationLine(bool ampersand) { 1272 const char *p{nextLine_}; 1273 if (p >= limit_) { 1274 return nullptr; 1275 } 1276 p = SkipWhiteSpace(p); 1277 if (InCompilerDirective()) { 1278 if (*p++ != '!') { 1279 return nullptr; 1280 } 1281 for (const char *s{directiveSentinel_}; *s != '\0'; ++p, ++s) { 1282 if (*s != ToLowerCaseLetter(*p)) { 1283 return nullptr; 1284 } 1285 } 1286 p = SkipWhiteSpace(p); 1287 if (*p == '&') { 1288 if (!ampersand) { 1289 insertASpace_ = true; 1290 } 1291 return p + 1; 1292 } else if (ampersand) { 1293 return p; 1294 } else { 1295 return nullptr; 1296 } 1297 } else { 1298 if (*p == '&') { 1299 return p + 1; 1300 } else if (*p == '!' || *p == '\n' || *p == '#') { 1301 return nullptr; 1302 } else if (ampersand || IsImplicitContinuation()) { 1303 if (continuationInCharLiteral_) { 1304 // 'a'& -> 'a''b' == "a'b" 1305 // 'b' 1306 if (features_.ShouldWarn( 1307 common::LanguageFeature::MiscSourceExtensions)) { 1308 Say(common::LanguageFeature::MiscSourceExtensions, 1309 GetProvenanceRange(p, p + 1), 1310 "Character literal continuation line should have been preceded by '&'"_port_en_US); 1311 } 1312 } else if (p > nextLine_) { 1313 --p; 1314 } else { 1315 insertASpace_ = true; 1316 } 1317 return p; 1318 } else { 1319 return nullptr; 1320 } 1321 } 1322 } 1323 1324 bool Prescanner::FixedFormContinuation(bool mightNeedSpace) { 1325 // N.B. We accept '&' as a continuation indicator in fixed form, too, 1326 // but not in a character literal. 1327 if (*at_ == '&' && inCharLiteral_) { 1328 return false; 1329 } 1330 do { 1331 if (const char *cont{FixedFormContinuationLine(mightNeedSpace)}) { 1332 BeginSourceLine(cont); 1333 column_ = 7; 1334 NextLine(); 1335 return true; 1336 } 1337 } while (SkipCommentLine(false /* not after ampersand */)); 1338 return false; 1339 } 1340 1341 bool Prescanner::FreeFormContinuation() { 1342 const char *p{at_}; 1343 bool ampersand{*p == '&'}; 1344 if (ampersand) { 1345 p = SkipWhiteSpace(p + 1); 1346 } 1347 if (*p != '\n') { 1348 if (inCharLiteral_) { 1349 return false; 1350 } else if (*p == '!') { // & ! comment - ok 1351 } else if (ampersand && isPossibleMacroCall_ && (*p == ',' || *p == ')')) { 1352 return false; // allow & at end of a macro argument 1353 } else if (features_.ShouldWarn(LanguageFeature::CruftAfterAmpersand)) { 1354 Say(LanguageFeature::CruftAfterAmpersand, GetProvenance(p), 1355 "missing ! before comment after &"_warn_en_US); 1356 } 1357 } 1358 do { 1359 if (const char *cont{FreeFormContinuationLine(ampersand)}) { 1360 BeginSourceLine(cont); 1361 NextLine(); 1362 return true; 1363 } 1364 } while (SkipCommentLine(ampersand)); 1365 return false; 1366 } 1367 1368 // Implicit line continuation allows a preprocessor macro call with 1369 // arguments to span multiple lines. 1370 bool Prescanner::IsImplicitContinuation() const { 1371 return !inPreprocessorDirective_ && !inCharLiteral_ && isPossibleMacroCall_ && 1372 parenthesisNesting_ > 0 && !IsAtEnd() && 1373 ClassifyLine(nextLine_).kind == LineClassification::Kind::Source; 1374 } 1375 1376 bool Prescanner::Continuation(bool mightNeedFixedFormSpace) { 1377 if (disableSourceContinuation_) { 1378 return false; 1379 } else if (*at_ == '\n' || *at_ == '&') { 1380 if (inFixedForm_) { 1381 return FixedFormContinuation(mightNeedFixedFormSpace); 1382 } else { 1383 return FreeFormContinuation(); 1384 } 1385 } else if (*at_ == '\\' && at_ + 2 == nextLine_ && 1386 backslashFreeFormContinuation_ && !inFixedForm_ && nextLine_ < limit_) { 1387 // cpp-like handling of \ at end of a free form source line 1388 BeginSourceLine(nextLine_); 1389 NextLine(); 1390 return true; 1391 } else { 1392 return false; 1393 } 1394 } 1395 1396 std::optional<Prescanner::LineClassification> 1397 Prescanner::IsFixedFormCompilerDirectiveLine(const char *start) const { 1398 const char *p{start}; 1399 char col1{*p++}; 1400 if (!IsFixedFormCommentChar(col1)) { 1401 return std::nullopt; 1402 } 1403 char sentinel[5], *sp{sentinel}; 1404 int column{2}; 1405 for (; column < 6; ++column, ++p) { 1406 if (*p == '\n' || IsSpaceOrTab(p)) { 1407 break; 1408 } 1409 if (sp == sentinel + 1 && sentinel[0] == '$' && IsDecimalDigit(*p)) { 1410 // OpenMP conditional compilation line: leave the label alone 1411 break; 1412 } 1413 *sp++ = ToLowerCaseLetter(*p); 1414 } 1415 if (column == 6) { 1416 if (*p == '0') { 1417 ++p; 1418 } else if (int n{IsSpaceOrTab(p)}) { 1419 p += n; 1420 } else { 1421 // This is a Continuation line, not an initial directive line. 1422 return std::nullopt; 1423 } 1424 } 1425 if (sp == sentinel) { 1426 return std::nullopt; 1427 } 1428 *sp = '\0'; 1429 if (const char *ss{IsCompilerDirectiveSentinel( 1430 sentinel, static_cast<std::size_t>(sp - sentinel))}) { 1431 std::size_t payloadOffset = p - start; 1432 return {LineClassification{ 1433 LineClassification::Kind::CompilerDirective, payloadOffset, ss}}; 1434 } 1435 return std::nullopt; 1436 } 1437 1438 std::optional<Prescanner::LineClassification> 1439 Prescanner::IsFreeFormCompilerDirectiveLine(const char *start) const { 1440 if (const char *p{SkipWhiteSpace(start)}; p && *p++ == '!') { 1441 if (auto maybePair{IsCompilerDirectiveSentinel(p)}) { 1442 auto offset{static_cast<std::size_t>(maybePair->second - start)}; 1443 return {LineClassification{LineClassification::Kind::CompilerDirective, 1444 offset, maybePair->first}}; 1445 } 1446 } 1447 return std::nullopt; 1448 } 1449 1450 Prescanner &Prescanner::AddCompilerDirectiveSentinel(const std::string &dir) { 1451 std::uint64_t packed{0}; 1452 for (char ch : dir) { 1453 packed = (packed << 8) | (ToLowerCaseLetter(ch) & 0xff); 1454 } 1455 compilerDirectiveBloomFilter_.set(packed % prime1); 1456 compilerDirectiveBloomFilter_.set(packed % prime2); 1457 compilerDirectiveSentinels_.insert(dir); 1458 return *this; 1459 } 1460 1461 const char *Prescanner::IsCompilerDirectiveSentinel( 1462 const char *sentinel, std::size_t len) const { 1463 std::uint64_t packed{0}; 1464 for (std::size_t j{0}; j < len; ++j) { 1465 packed = (packed << 8) | (sentinel[j] & 0xff); 1466 } 1467 if (len == 0 || !compilerDirectiveBloomFilter_.test(packed % prime1) || 1468 !compilerDirectiveBloomFilter_.test(packed % prime2)) { 1469 return nullptr; 1470 } 1471 const auto iter{compilerDirectiveSentinels_.find(std::string(sentinel, len))}; 1472 return iter == compilerDirectiveSentinels_.end() ? nullptr : iter->c_str(); 1473 } 1474 1475 const char *Prescanner::IsCompilerDirectiveSentinel(CharBlock token) const { 1476 const char *p{token.begin()}; 1477 const char *end{p + token.size()}; 1478 while (p < end && (*p == ' ' || *p == '\n')) { 1479 ++p; 1480 } 1481 if (p < end && *p == '!') { 1482 ++p; 1483 } 1484 while (end > p && (end[-1] == ' ' || end[-1] == '\t')) { 1485 --end; 1486 } 1487 return end > p && IsCompilerDirectiveSentinel(p, end - p) ? p : nullptr; 1488 } 1489 1490 std::optional<std::pair<const char *, const char *>> 1491 Prescanner::IsCompilerDirectiveSentinel(const char *p) const { 1492 char sentinel[8]; 1493 for (std::size_t j{0}; j + 1 < sizeof sentinel && *p != '\n'; ++p, ++j) { 1494 if (int n{*p == '&' ? 1 : IsSpaceOrTab(p)}) { 1495 if (j > 0) { 1496 sentinel[j] = '\0'; 1497 p = SkipWhiteSpace(p + n); 1498 if (*p != '!') { 1499 if (const char *sp{IsCompilerDirectiveSentinel(sentinel, j)}) { 1500 return std::make_pair(sp, p); 1501 } 1502 } 1503 } 1504 break; 1505 } else { 1506 sentinel[j] = ToLowerCaseLetter(*p); 1507 } 1508 } 1509 return std::nullopt; 1510 } 1511 1512 constexpr bool IsDirective(const char *match, const char *dir) { 1513 for (; *match; ++match) { 1514 if (*match != ToLowerCaseLetter(*dir++)) { 1515 return false; 1516 } 1517 } 1518 return true; 1519 } 1520 1521 Prescanner::LineClassification Prescanner::ClassifyLine( 1522 const char *start) const { 1523 if (inFixedForm_) { 1524 if (std::optional<LineClassification> lc{ 1525 IsFixedFormCompilerDirectiveLine(start)}) { 1526 return std::move(*lc); 1527 } 1528 if (IsFixedFormCommentLine(start)) { 1529 return {LineClassification::Kind::Comment}; 1530 } 1531 } else { 1532 if (std::optional<LineClassification> lc{ 1533 IsFreeFormCompilerDirectiveLine(start)}) { 1534 return std::move(*lc); 1535 } 1536 if (const char *bang{IsFreeFormComment(start)}) { 1537 return {LineClassification::Kind::Comment, 1538 static_cast<std::size_t>(bang - start)}; 1539 } 1540 } 1541 if (std::optional<std::size_t> quoteOffset{IsIncludeLine(start)}) { 1542 return {LineClassification::Kind::IncludeLine, *quoteOffset}; 1543 } 1544 if (const char *dir{IsPreprocessorDirectiveLine(start)}) { 1545 if (IsDirective("if", dir) || IsDirective("elif", dir) || 1546 IsDirective("else", dir) || IsDirective("endif", dir)) { 1547 return {LineClassification::Kind::ConditionalCompilationDirective}; 1548 } else if (IsDirective("include", dir)) { 1549 return {LineClassification::Kind::IncludeDirective}; 1550 } else if (IsDirective("define", dir) || IsDirective("undef", dir)) { 1551 return {LineClassification::Kind::DefinitionDirective}; 1552 } else { 1553 return {LineClassification::Kind::PreprocessorDirective}; 1554 } 1555 } 1556 return {LineClassification::Kind::Source}; 1557 } 1558 1559 Prescanner::LineClassification Prescanner::ClassifyLine( 1560 TokenSequence &tokens, Provenance newlineProvenance) const { 1561 // Append a newline temporarily. 1562 tokens.PutNextTokenChar('\n', newlineProvenance); 1563 tokens.CloseToken(); 1564 const char *ppd{tokens.ToCharBlock().begin()}; 1565 LineClassification classification{ClassifyLine(ppd)}; 1566 tokens.pop_back(); // remove the newline 1567 return classification; 1568 } 1569 1570 void Prescanner::SourceFormChange(std::string &&dir) { 1571 if (dir == "!dir$ free") { 1572 inFixedForm_ = false; 1573 } else if (dir == "!dir$ fixed") { 1574 inFixedForm_ = true; 1575 } 1576 } 1577 1578 // Acquire and append compiler directive continuation lines to 1579 // the tokens that constitute a compiler directive, even when those 1580 // directive continuation lines are the result of macro expansion. 1581 // (Not used when neither the original compiler directive line nor 1582 // the directive continuation line result from preprocessing; regular 1583 // line continuation during tokenization handles that normal case.) 1584 bool Prescanner::CompilerDirectiveContinuation( 1585 TokenSequence &tokens, const char *origSentinel) { 1586 if (inFixedForm_ || tokens.empty() || 1587 tokens.TokenAt(tokens.SizeInTokens() - 1) != "&") { 1588 return false; 1589 } 1590 LineClassification followingLine{ClassifyLine(nextLine_)}; 1591 if (followingLine.kind == LineClassification::Kind::Comment) { 1592 nextLine_ += followingLine.payloadOffset; // advance to '!' or newline 1593 NextLine(); 1594 return true; 1595 } 1596 CHECK(origSentinel != nullptr); 1597 directiveSentinel_ = origSentinel; // so InCompilerDirective() is true 1598 const char *nextContinuation{ 1599 followingLine.kind == LineClassification::Kind::CompilerDirective 1600 ? FreeFormContinuationLine(true) 1601 : nullptr}; 1602 if (!nextContinuation && 1603 followingLine.kind != LineClassification::Kind::Source) { 1604 return false; 1605 } 1606 auto origNextLine{nextLine_}; 1607 BeginSourceLine(nextLine_); 1608 NextLine(); 1609 if (nextContinuation) { 1610 // What follows is !DIR$ & xxx; skip over the & so that it 1611 // doesn't cause a spurious continuation. 1612 at_ = nextContinuation; 1613 } else { 1614 // What follows looks like a source line before macro expansion, 1615 // but might become a directive continuation afterwards. 1616 SkipSpaces(); 1617 } 1618 TokenSequence followingTokens; 1619 while (NextToken(followingTokens)) { 1620 } 1621 if (auto followingPrepro{ 1622 preprocessor_.MacroReplacement(followingTokens, *this)}) { 1623 followingTokens = std::move(*followingPrepro); 1624 } 1625 followingTokens.RemoveRedundantBlanks(); 1626 std::size_t startAt{0}; 1627 std::size_t following{followingTokens.SizeInTokens()}; 1628 bool ok{false}; 1629 if (nextContinuation) { 1630 ok = true; 1631 } else { 1632 startAt = 2; 1633 if (startAt < following && followingTokens.TokenAt(0) == "!") { 1634 CharBlock sentinel{followingTokens.TokenAt(1)}; 1635 if (!sentinel.empty() && 1636 std::memcmp(sentinel.begin(), origSentinel, sentinel.size()) == 0) { 1637 ok = true; 1638 while ( 1639 startAt < following && followingTokens.TokenAt(startAt).IsBlank()) { 1640 ++startAt; 1641 } 1642 if (startAt < following && followingTokens.TokenAt(startAt) == "&") { 1643 ++startAt; 1644 } 1645 } 1646 } 1647 } 1648 if (ok) { 1649 tokens.pop_back(); // delete original '&' 1650 tokens.Put(followingTokens, startAt, following - startAt); 1651 tokens.RemoveRedundantBlanks(); 1652 } else { 1653 nextLine_ = origNextLine; 1654 } 1655 return ok; 1656 } 1657 1658 // Similar, but for source line continuation after macro replacement. 1659 bool Prescanner::SourceLineContinuation(TokenSequence &tokens) { 1660 if (!inFixedForm_ && !tokens.empty() && 1661 tokens.TokenAt(tokens.SizeInTokens() - 1) == "&") { 1662 LineClassification followingLine{ClassifyLine(nextLine_)}; 1663 if (followingLine.kind == LineClassification::Kind::Comment) { 1664 nextLine_ += followingLine.payloadOffset; // advance to '!' or newline 1665 NextLine(); 1666 return true; 1667 } else if (const char *nextContinuation{FreeFormContinuationLine(true)}) { 1668 BeginSourceLine(nextLine_); 1669 NextLine(); 1670 TokenSequence followingTokens; 1671 at_ = nextContinuation; 1672 while (NextToken(followingTokens)) { 1673 } 1674 if (auto followingPrepro{ 1675 preprocessor_.MacroReplacement(followingTokens, *this)}) { 1676 followingTokens = std::move(*followingPrepro); 1677 } 1678 followingTokens.RemoveRedundantBlanks(); 1679 tokens.pop_back(); // delete original '&' 1680 tokens.Put(followingTokens); 1681 return true; 1682 } 1683 } 1684 return false; 1685 } 1686 } // namespace Fortran::parser 1687