1 //===-- lib/Parser/prescan.cpp --------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "prescan.h" 10 #include "flang/Common/idioms.h" 11 #include "flang/Parser/characters.h" 12 #include "flang/Parser/message.h" 13 #include "flang/Parser/preprocessor.h" 14 #include "flang/Parser/source.h" 15 #include "flang/Parser/token-sequence.h" 16 #include "llvm/Support/raw_ostream.h" 17 #include <cstddef> 18 #include <cstring> 19 #include <utility> 20 #include <vector> 21 22 namespace Fortran::parser { 23 24 using common::LanguageFeature; 25 26 static constexpr int maxPrescannerNesting{100}; 27 28 Prescanner::Prescanner(Messages &messages, CookedSource &cooked, 29 Preprocessor &preprocessor, common::LanguageFeatureControl lfc) 30 : messages_{messages}, cooked_{cooked}, preprocessor_{preprocessor}, 31 allSources_{preprocessor_.allSources()}, features_{lfc}, 32 backslashFreeFormContinuation_{preprocessor.AnyDefinitions()}, 33 encoding_{allSources_.encoding()} {} 34 35 Prescanner::Prescanner(const Prescanner &that, Preprocessor &prepro, 36 bool isNestedInIncludeDirective) 37 : messages_{that.messages_}, cooked_{that.cooked_}, preprocessor_{prepro}, 38 allSources_{that.allSources_}, features_{that.features_}, 39 preprocessingOnly_{that.preprocessingOnly_}, 40 expandIncludeLines_{that.expandIncludeLines_}, 41 isNestedInIncludeDirective_{isNestedInIncludeDirective}, 42 backslashFreeFormContinuation_{that.backslashFreeFormContinuation_}, 43 inFixedForm_{that.inFixedForm_}, 44 fixedFormColumnLimit_{that.fixedFormColumnLimit_}, 45 encoding_{that.encoding_}, 46 prescannerNesting_{that.prescannerNesting_ + 1}, 47 skipLeadingAmpersand_{that.skipLeadingAmpersand_}, 48 compilerDirectiveBloomFilter_{that.compilerDirectiveBloomFilter_}, 49 compilerDirectiveSentinels_{that.compilerDirectiveSentinels_} {} 50 51 // Returns number of bytes to skip 52 static inline int IsSpace(const char *p) { 53 if (*p == ' ') { 54 return 1; 55 } else if (*p == '\xa0') { // LATIN-1 NBSP non-breaking space 56 return 1; 57 } else if (p[0] == '\xc2' && p[1] == '\xa0') { // UTF-8 NBSP 58 return 2; 59 } else { 60 return 0; 61 } 62 } 63 64 static inline int IsSpaceOrTab(const char *p) { 65 return *p == '\t' ? 1 : IsSpace(p); 66 } 67 68 static inline constexpr bool IsFixedFormCommentChar(char ch) { 69 return ch == '!' || ch == '*' || ch == 'C' || ch == 'c'; 70 } 71 72 static void NormalizeCompilerDirectiveCommentMarker(TokenSequence &dir) { 73 char *p{dir.GetMutableCharData()}; 74 char *limit{p + dir.SizeInChars()}; 75 for (; p < limit; ++p) { 76 if (*p != ' ') { 77 CHECK(IsFixedFormCommentChar(*p)); 78 *p = '!'; 79 return; 80 } 81 } 82 DIE("compiler directive all blank"); 83 } 84 85 void Prescanner::Prescan(ProvenanceRange range) { 86 startProvenance_ = range.start(); 87 start_ = allSources_.GetSource(range); 88 CHECK(start_); 89 limit_ = start_ + range.size(); 90 nextLine_ = start_; 91 const bool beganInFixedForm{inFixedForm_}; 92 if (prescannerNesting_ > maxPrescannerNesting) { 93 Say(GetProvenance(start_), 94 "too many nested INCLUDE/#include files, possibly circular"_err_en_US); 95 return; 96 } 97 while (!IsAtEnd()) { 98 Statement(); 99 } 100 if (inFixedForm_ != beganInFixedForm) { 101 std::string dir{"!dir$ "}; 102 if (beganInFixedForm) { 103 dir += "fixed"; 104 } else { 105 dir += "free"; 106 } 107 dir += '\n'; 108 TokenSequence tokens{dir, allSources_.AddCompilerInsertion(dir).start()}; 109 tokens.Emit(cooked_); 110 } 111 } 112 113 void Prescanner::Statement() { 114 TokenSequence tokens; 115 const char *statementStart{nextLine_}; 116 LineClassification line{ClassifyLine(statementStart)}; 117 switch (line.kind) { 118 case LineClassification::Kind::Comment: 119 nextLine_ += line.payloadOffset; // advance to '!' or newline 120 NextLine(); 121 return; 122 case LineClassification::Kind::IncludeLine: 123 FortranInclude(nextLine_ + line.payloadOffset); 124 NextLine(); 125 return; 126 case LineClassification::Kind::ConditionalCompilationDirective: 127 case LineClassification::Kind::IncludeDirective: 128 preprocessor_.Directive(TokenizePreprocessorDirective(), *this); 129 afterPreprocessingDirective_ = true; 130 skipLeadingAmpersand_ |= !inFixedForm_; 131 return; 132 case LineClassification::Kind::PreprocessorDirective: 133 preprocessor_.Directive(TokenizePreprocessorDirective(), *this); 134 afterPreprocessingDirective_ = true; 135 // Don't set skipLeadingAmpersand_ 136 return; 137 case LineClassification::Kind::DefinitionDirective: 138 preprocessor_.Directive(TokenizePreprocessorDirective(), *this); 139 // Don't set afterPreprocessingDirective_ or skipLeadingAmpersand_ 140 return; 141 case LineClassification::Kind::CompilerDirective: { 142 directiveSentinel_ = line.sentinel; 143 CHECK(InCompilerDirective()); 144 BeginStatementAndAdvance(); 145 if (inFixedForm_) { 146 CHECK(IsFixedFormCommentChar(*at_)); 147 } else { 148 while (int n{IsSpaceOrTab(at_)}) { 149 at_ += n, ++column_; 150 } 151 CHECK(*at_ == '!'); 152 } 153 std::optional<int> condOffset; 154 if (directiveSentinel_[0] == '$' && directiveSentinel_[1] == '\0') { 155 // OpenMP conditional compilation line. 156 condOffset = 2; 157 } else if (directiveSentinel_[0] == '@' && directiveSentinel_[1] == 'c' && 158 directiveSentinel_[2] == 'u' && directiveSentinel_[3] == 'f' && 159 directiveSentinel_[4] == '\0') { 160 // CUDA conditional compilation line. 161 condOffset = 5; 162 } 163 if (condOffset) { 164 at_ += *condOffset, column_ += *condOffset; 165 if (auto payload{IsIncludeLine(at_)}) { 166 FortranInclude(at_ + *payload); 167 return; 168 } else if (inFixedForm_) { 169 LabelField(tokens); 170 } else { 171 SkipSpaces(); 172 } 173 } else { 174 // Compiler directive. Emit normalized sentinel, squash following spaces. 175 EmitChar(tokens, '!'); 176 ++at_, ++column_; 177 for (const char *sp{directiveSentinel_}; *sp != '\0'; 178 ++sp, ++at_, ++column_) { 179 EmitChar(tokens, *sp); 180 } 181 if (IsSpaceOrTab(at_)) { 182 EmitChar(tokens, ' '); 183 while (int n{IsSpaceOrTab(at_)}) { 184 at_ += n, ++column_; 185 } 186 } 187 tokens.CloseToken(); 188 } 189 break; 190 } 191 case LineClassification::Kind::Source: { 192 BeginStatementAndAdvance(); 193 bool checkLabelField{false}; 194 if (inFixedForm_) { 195 if (features_.IsEnabled(LanguageFeature::OldDebugLines) && 196 (*at_ == 'D' || *at_ == 'd')) { 197 NextChar(); 198 } 199 checkLabelField = true; 200 } else { 201 if (skipLeadingAmpersand_) { 202 skipLeadingAmpersand_ = false; 203 const char *p{SkipWhiteSpace(at_)}; 204 if (p < limit_ && *p == '&') { 205 column_ += ++p - at_; 206 at_ = p; 207 } 208 } else { 209 SkipSpaces(); 210 } 211 } 212 // Check for a leading identifier that might be a keyword macro 213 // that will expand to anything indicating a non-source line, like 214 // a comment marker or directive sentinel. If so, disable line 215 // continuation, so that NextToken() won't consume anything from 216 // following lines. 217 if (IsLegalIdentifierStart(*at_)) { 218 // TODO: Only bother with these cases when any keyword macro has 219 // been defined with replacement text that could begin a comment 220 // or directive sentinel. 221 const char *p{at_}; 222 while (IsLegalInIdentifier(*++p)) { 223 } 224 CharBlock id{at_, static_cast<std::size_t>(p - at_)}; 225 if (preprocessor_.IsNameDefined(id) && 226 !preprocessor_.IsFunctionLikeDefinition(id)) { 227 checkLabelField = false; 228 TokenSequence toks; 229 toks.Put(id, GetProvenance(at_)); 230 if (auto replaced{preprocessor_.MacroReplacement(toks, *this)}) { 231 auto newLineClass{ClassifyLine(*replaced, GetCurrentProvenance())}; 232 if (newLineClass.kind == 233 LineClassification::Kind::CompilerDirective) { 234 directiveSentinel_ = newLineClass.sentinel; 235 disableSourceContinuation_ = false; 236 } else { 237 disableSourceContinuation_ = !replaced->empty() && 238 newLineClass.kind != LineClassification::Kind::Source; 239 } 240 } 241 } 242 } 243 if (checkLabelField) { 244 LabelField(tokens); 245 } 246 } break; 247 } 248 249 while (NextToken(tokens)) { 250 } 251 if (continuationLines_ > 255) { 252 if (features_.ShouldWarn(common::LanguageFeature::MiscSourceExtensions)) { 253 Say(common::LanguageFeature::MiscSourceExtensions, 254 GetProvenance(statementStart), 255 "%d continuation lines is more than the Fortran standard allows"_port_en_US, 256 continuationLines_); 257 } 258 } 259 260 Provenance newlineProvenance{GetCurrentProvenance()}; 261 if (std::optional<TokenSequence> preprocessed{ 262 preprocessor_.MacroReplacement(tokens, *this)}) { 263 // Reprocess the preprocessed line. 264 LineClassification ppl{ClassifyLine(*preprocessed, newlineProvenance)}; 265 switch (ppl.kind) { 266 case LineClassification::Kind::Comment: 267 break; 268 case LineClassification::Kind::IncludeLine: 269 FortranInclude(preprocessed->TokenAt(0).begin() + ppl.payloadOffset); 270 break; 271 case LineClassification::Kind::ConditionalCompilationDirective: 272 case LineClassification::Kind::IncludeDirective: 273 case LineClassification::Kind::DefinitionDirective: 274 case LineClassification::Kind::PreprocessorDirective: 275 if (features_.ShouldWarn(common::UsageWarning::Preprocessing)) { 276 Say(common::UsageWarning::Preprocessing, 277 preprocessed->GetProvenanceRange(), 278 "Preprocessed line resembles a preprocessor directive"_warn_en_US); 279 } 280 CheckAndEmitLine(preprocessed->ToLowerCase(), newlineProvenance); 281 break; 282 case LineClassification::Kind::CompilerDirective: 283 if (preprocessed->HasRedundantBlanks()) { 284 preprocessed->RemoveRedundantBlanks(); 285 } 286 while (CompilerDirectiveContinuation(*preprocessed, ppl.sentinel)) { 287 newlineProvenance = GetCurrentProvenance(); 288 } 289 NormalizeCompilerDirectiveCommentMarker(*preprocessed); 290 preprocessed->ToLowerCase(); 291 SourceFormChange(preprocessed->ToString()); 292 CheckAndEmitLine(preprocessed->ToLowerCase().ClipComment( 293 *this, true /* skip first ! */), 294 newlineProvenance); 295 break; 296 case LineClassification::Kind::Source: 297 if (inFixedForm_) { 298 if (!preprocessingOnly_ && preprocessed->HasBlanks()) { 299 preprocessed->RemoveBlanks(); 300 } 301 } else { 302 while (SourceLineContinuation(*preprocessed)) { 303 newlineProvenance = GetCurrentProvenance(); 304 } 305 if (preprocessed->HasRedundantBlanks()) { 306 preprocessed->RemoveRedundantBlanks(); 307 } 308 } 309 CheckAndEmitLine( 310 preprocessed->ToLowerCase().ClipComment(*this), newlineProvenance); 311 break; 312 } 313 } else { // no macro replacement 314 if (line.kind == LineClassification::Kind::CompilerDirective) { 315 while (CompilerDirectiveContinuation(tokens, line.sentinel)) { 316 newlineProvenance = GetCurrentProvenance(); 317 } 318 tokens.ToLowerCase(); 319 SourceFormChange(tokens.ToString()); 320 } else { // Kind::Source 321 tokens.ToLowerCase(); 322 if (inFixedForm_) { 323 EnforceStupidEndStatementRules(tokens); 324 } 325 } 326 CheckAndEmitLine(tokens, newlineProvenance); 327 } 328 directiveSentinel_ = nullptr; 329 } 330 331 void Prescanner::CheckAndEmitLine( 332 TokenSequence &tokens, Provenance newlineProvenance) { 333 tokens.CheckBadFortranCharacters( 334 messages_, *this, disableSourceContinuation_); 335 // Parenthesis nesting check does not apply while any #include is 336 // active, nor on the lines before and after a top-level #include, 337 // nor before or after conditional source. 338 // Applications play shenanigans with line continuation before and 339 // after #include'd subprogram argument lists and conditional source. 340 if (!isNestedInIncludeDirective_ && !omitNewline_ && 341 !afterPreprocessingDirective_ && tokens.BadlyNestedParentheses() && 342 !preprocessor_.InConditional()) { 343 if (nextLine_ < limit_ && IsPreprocessorDirectiveLine(nextLine_)) { 344 // don't complain 345 } else { 346 tokens.CheckBadParentheses(messages_); 347 } 348 } 349 tokens.Emit(cooked_); 350 if (omitNewline_) { 351 omitNewline_ = false; 352 } else { 353 cooked_.Put('\n', newlineProvenance); 354 afterPreprocessingDirective_ = false; 355 } 356 } 357 358 TokenSequence Prescanner::TokenizePreprocessorDirective() { 359 CHECK(!IsAtEnd() && !inPreprocessorDirective_); 360 inPreprocessorDirective_ = true; 361 BeginStatementAndAdvance(); 362 TokenSequence tokens; 363 while (NextToken(tokens)) { 364 } 365 inPreprocessorDirective_ = false; 366 return tokens; 367 } 368 369 void Prescanner::NextLine() { 370 void *vstart{static_cast<void *>(const_cast<char *>(nextLine_))}; 371 void *v{std::memchr(vstart, '\n', limit_ - nextLine_)}; 372 if (!v) { 373 nextLine_ = limit_; 374 } else { 375 const char *nl{const_cast<const char *>(static_cast<char *>(v))}; 376 nextLine_ = nl + 1; 377 } 378 } 379 380 void Prescanner::LabelField(TokenSequence &token) { 381 int outCol{1}; 382 const char *start{at_}; 383 std::optional<int> badColumn; 384 for (; *at_ != '\n' && column_ <= 6; ++at_) { 385 if (*at_ == '\t') { 386 ++at_; 387 column_ = 7; 388 break; 389 } 390 if (int n{IsSpace(at_)}; n == 0 && 391 !(*at_ == '0' && column_ == 6)) { // '0' in column 6 becomes space 392 EmitChar(token, *at_); 393 ++outCol; 394 if (!badColumn && (column_ == 6 || !IsDecimalDigit(*at_))) { 395 badColumn = column_; 396 } 397 } 398 ++column_; 399 } 400 if (badColumn && !preprocessor_.IsNameDefined(token.CurrentOpenToken())) { 401 if ((prescannerNesting_ > 0 && *badColumn == 6 && 402 cooked_.BufferedBytes() == firstCookedCharacterOffset_) || 403 afterPreprocessingDirective_) { 404 // This is the first source line in #include'd text or conditional 405 // code under #if, or the first source line after such. 406 // If it turns out that the preprocessed text begins with a 407 // fixed form continuation line, the newline at the end 408 // of the latest source line beforehand will be deleted in 409 // CookedSource::Marshal(). 410 cooked_.MarkPossibleFixedFormContinuation(); 411 } else if (features_.ShouldWarn(common::UsageWarning::Scanning)) { 412 Say(common::UsageWarning::Scanning, GetProvenance(start + *badColumn - 1), 413 *badColumn == 6 414 ? "Statement should not begin with a continuation line"_warn_en_US 415 : "Character in fixed-form label field must be a digit"_warn_en_US); 416 } 417 token.clear(); 418 if (*badColumn < 6) { 419 at_ = start; 420 column_ = 1; 421 return; 422 } 423 outCol = 1; 424 } 425 if (outCol == 1) { // empty label field 426 // Emit a space so that, if the line is rescanned after preprocessing, 427 // a leading 'C' or 'D' won't be left-justified and then accidentally 428 // misinterpreted as a comment card. 429 EmitChar(token, ' '); 430 ++outCol; 431 } 432 token.CloseToken(); 433 SkipToNextSignificantCharacter(); 434 if (IsDecimalDigit(*at_)) { 435 if (features_.ShouldWarn(common::LanguageFeature::MiscSourceExtensions)) { 436 Say(common::LanguageFeature::MiscSourceExtensions, GetCurrentProvenance(), 437 "Label digit is not in fixed-form label field"_port_en_US); 438 } 439 } 440 } 441 442 // 6.3.3.5: A program unit END statement, or any other statement whose 443 // initial line resembles an END statement, shall not be continued in 444 // fixed form source. 445 void Prescanner::EnforceStupidEndStatementRules(const TokenSequence &tokens) { 446 CharBlock cBlock{tokens.ToCharBlock()}; 447 const char *str{cBlock.begin()}; 448 std::size_t n{cBlock.size()}; 449 if (n < 3) { 450 return; 451 } 452 std::size_t j{0}; 453 for (; j < n && (str[j] == ' ' || (str[j] >= '0' && str[j] <= '9')); ++j) { 454 } 455 if (j + 3 > n || std::memcmp(str + j, "end", 3) != 0) { 456 return; 457 } 458 // It starts with END, possibly after a label. 459 auto start{allSources_.GetSourcePosition(tokens.GetCharProvenance(j))}; 460 auto end{allSources_.GetSourcePosition(tokens.GetCharProvenance(n - 1))}; 461 if (!start || !end) { 462 return; 463 } 464 if (&*start->sourceFile == &*end->sourceFile && start->line == end->line) { 465 return; // no continuation 466 } 467 j += 3; 468 static const char *const prefixes[]{"program", "subroutine", "function", 469 "blockdata", "module", "submodule", nullptr}; 470 bool isPrefix{j == n || !IsLegalInIdentifier(str[j])}; // prefix is END 471 std::size_t endOfPrefix{j - 1}; 472 for (const char *const *p{prefixes}; *p; ++p) { 473 std::size_t pLen{std::strlen(*p)}; 474 if (j + pLen <= n && std::memcmp(str + j, *p, pLen) == 0) { 475 isPrefix = true; // END thing as prefix 476 j += pLen; 477 endOfPrefix = j - 1; 478 for (; j < n && IsLegalInIdentifier(str[j]); ++j) { 479 } 480 break; 481 } 482 } 483 if (isPrefix) { 484 auto range{tokens.GetTokenProvenanceRange(1)}; 485 if (j == n) { // END or END thing [name] 486 Say(range, 487 "Program unit END statement may not be continued in fixed form source"_err_en_US); 488 } else { 489 auto endOfPrefixPos{ 490 allSources_.GetSourcePosition(tokens.GetCharProvenance(endOfPrefix))}; 491 auto next{allSources_.GetSourcePosition(tokens.GetCharProvenance(j))}; 492 if (endOfPrefixPos && next && 493 &*endOfPrefixPos->sourceFile == &*start->sourceFile && 494 endOfPrefixPos->line == start->line && 495 (&*next->sourceFile != &*start->sourceFile || 496 next->line != start->line)) { 497 Say(range, 498 "Initial line of continued statement must not appear to be a program unit END in fixed form source"_err_en_US); 499 } 500 } 501 } 502 } 503 504 void Prescanner::SkipToEndOfLine() { 505 while (*at_ != '\n') { 506 ++at_, ++column_; 507 } 508 } 509 510 bool Prescanner::MustSkipToEndOfLine() const { 511 if (inFixedForm_ && column_ > fixedFormColumnLimit_ && !tabInCurrentLine_) { 512 return true; // skip over ignored columns in right margin (73:80) 513 } else if (*at_ == '!' && !inCharLiteral_) { 514 return !IsCompilerDirectiveSentinel(at_); 515 } else { 516 return false; 517 } 518 } 519 520 void Prescanner::NextChar() { 521 CHECK(*at_ != '\n'); 522 int n{IsSpace(at_)}; 523 at_ += n ? n : 1; 524 ++column_; 525 while (at_[0] == '\xef' && at_[1] == '\xbb' && at_[2] == '\xbf') { 526 // UTF-8 byte order mark - treat this file as UTF-8 527 at_ += 3; 528 encoding_ = Encoding::UTF_8; 529 } 530 SkipToNextSignificantCharacter(); 531 } 532 533 // Skip everything that should be ignored until the next significant 534 // character is reached; handles C-style comments in preprocessing 535 // directives, Fortran ! comments, stuff after the right margin in 536 // fixed form, and all forms of line continuation. 537 bool Prescanner::SkipToNextSignificantCharacter() { 538 auto anyContinuationLine{false}; 539 if (inPreprocessorDirective_) { 540 SkipCComments(); 541 } else { 542 bool mightNeedSpace{false}; 543 if (MustSkipToEndOfLine()) { 544 SkipToEndOfLine(); 545 } else { 546 mightNeedSpace = *at_ == '\n'; 547 } 548 for (; Continuation(mightNeedSpace); mightNeedSpace = false) { 549 anyContinuationLine = true; 550 ++continuationLines_; 551 if (MustSkipToEndOfLine()) { 552 SkipToEndOfLine(); 553 } 554 } 555 if (*at_ == '\t') { 556 tabInCurrentLine_ = true; 557 } 558 } 559 return anyContinuationLine; 560 } 561 562 void Prescanner::SkipCComments() { 563 while (true) { 564 if (IsCComment(at_)) { 565 if (const char *after{SkipCComment(at_)}) { 566 column_ += after - at_; 567 // May have skipped over one or more newlines; relocate the start of 568 // the next line. 569 nextLine_ = at_ = after; 570 NextLine(); 571 } else { 572 // Don't emit any messages about unclosed C-style comments, because 573 // the sequence /* can appear legally in a FORMAT statement. There's 574 // no ambiguity, since the sequence */ cannot appear legally. 575 break; 576 } 577 } else if (inPreprocessorDirective_ && at_[0] == '\\' && at_ + 2 < limit_ && 578 at_[1] == '\n' && !IsAtEnd()) { 579 BeginSourceLineAndAdvance(); 580 } else { 581 break; 582 } 583 } 584 } 585 586 void Prescanner::SkipSpaces() { 587 while (IsSpaceOrTab(at_)) { 588 NextChar(); 589 } 590 insertASpace_ = false; 591 } 592 593 const char *Prescanner::SkipWhiteSpace(const char *p) { 594 while (int n{IsSpaceOrTab(p)}) { 595 p += n; 596 } 597 return p; 598 } 599 600 const char *Prescanner::SkipWhiteSpaceAndCComments(const char *p) const { 601 while (true) { 602 if (int n{IsSpaceOrTab(p)}) { 603 p += n; 604 } else if (IsCComment(p)) { 605 if (const char *after{SkipCComment(p)}) { 606 p = after; 607 } else { 608 break; 609 } 610 } else { 611 break; 612 } 613 } 614 return p; 615 } 616 617 const char *Prescanner::SkipCComment(const char *p) const { 618 char star{' '}, slash{' '}; 619 p += 2; 620 while (star != '*' || slash != '/') { 621 if (p >= limit_) { 622 return nullptr; // signifies an unterminated comment 623 } 624 star = slash; 625 slash = *p++; 626 } 627 return p; 628 } 629 630 bool Prescanner::NextToken(TokenSequence &tokens) { 631 CHECK(at_ >= start_ && at_ < limit_); 632 if (InFixedFormSource() && !preprocessingOnly_) { 633 SkipSpaces(); 634 } else { 635 if (*at_ == '/' && IsCComment(at_)) { 636 // Recognize and skip over classic C style /*comments*/ when 637 // outside a character literal. 638 if (features_.ShouldWarn(LanguageFeature::ClassicCComments)) { 639 Say(LanguageFeature::ClassicCComments, GetCurrentProvenance(), 640 "nonstandard usage: C-style comment"_port_en_US); 641 } 642 SkipCComments(); 643 } 644 if (IsSpaceOrTab(at_)) { 645 // Compress free-form white space into a single space character. 646 const auto theSpace{at_}; 647 char previous{at_ <= start_ ? ' ' : at_[-1]}; 648 NextChar(); 649 SkipSpaces(); 650 if (*at_ == '\n' && !omitNewline_) { 651 // Discard white space at the end of a line. 652 } else if (!inPreprocessorDirective_ && 653 (previous == '(' || *at_ == '(' || *at_ == ')')) { 654 // Discard white space before/after '(' and before ')', unless in a 655 // preprocessor directive. This helps yield space-free contiguous 656 // names for generic interfaces like OPERATOR( + ) and 657 // READ ( UNFORMATTED ), without misinterpreting #define f (notAnArg). 658 // This has the effect of silently ignoring the illegal spaces in 659 // the array constructor ( /1,2/ ) but that seems benign; it's 660 // hard to avoid that while still removing spaces from OPERATOR( / ) 661 // and OPERATOR( // ). 662 } else { 663 // Preserve the squashed white space as a single space character. 664 tokens.PutNextTokenChar(' ', GetProvenance(theSpace)); 665 tokens.CloseToken(); 666 return true; 667 } 668 } 669 } 670 if (insertASpace_) { 671 tokens.PutNextTokenChar(' ', spaceProvenance_); 672 insertASpace_ = false; 673 } 674 if (*at_ == '\n') { 675 return false; 676 } 677 const char *start{at_}; 678 if (*at_ == '\'' || *at_ == '"') { 679 QuotedCharacterLiteral(tokens, start); 680 preventHollerith_ = false; 681 } else if (IsDecimalDigit(*at_)) { 682 int n{0}, digits{0}; 683 static constexpr int maxHollerith{256 /*lines*/ * (132 - 6 /*columns*/)}; 684 do { 685 if (n < maxHollerith) { 686 n = 10 * n + DecimalDigitValue(*at_); 687 } 688 EmitCharAndAdvance(tokens, *at_); 689 ++digits; 690 if (InFixedFormSource()) { 691 SkipSpaces(); 692 } 693 } while (IsDecimalDigit(*at_)); 694 if ((*at_ == 'h' || *at_ == 'H') && n > 0 && n < maxHollerith && 695 !preventHollerith_) { 696 Hollerith(tokens, n, start); 697 } else if (*at_ == '.') { 698 while (IsDecimalDigit(EmitCharAndAdvance(tokens, *at_))) { 699 } 700 ExponentAndKind(tokens); 701 } else if (ExponentAndKind(tokens)) { 702 } else if (digits == 1 && n == 0 && (*at_ == 'x' || *at_ == 'X') && 703 inPreprocessorDirective_) { 704 do { 705 EmitCharAndAdvance(tokens, *at_); 706 } while (IsHexadecimalDigit(*at_)); 707 } else if (at_[0] == '_' && (at_[1] == '\'' || at_[1] == '"')) { // 4_"..." 708 EmitCharAndAdvance(tokens, *at_); 709 QuotedCharacterLiteral(tokens, start); 710 } else if (IsLetter(*at_) && !preventHollerith_ && 711 parenthesisNesting_ > 0) { 712 // Handles FORMAT(3I9HHOLLERITH) by skipping over the first I so that 713 // we don't misrecognize I9HOLLERITH as an identifier in the next case. 714 EmitCharAndAdvance(tokens, *at_); 715 } 716 preventHollerith_ = false; 717 } else if (*at_ == '.') { 718 char nch{EmitCharAndAdvance(tokens, '.')}; 719 if (!inPreprocessorDirective_ && IsDecimalDigit(nch)) { 720 while (IsDecimalDigit(EmitCharAndAdvance(tokens, *at_))) { 721 } 722 ExponentAndKind(tokens); 723 } else if (nch == '.' && EmitCharAndAdvance(tokens, '.') == '.') { 724 EmitCharAndAdvance(tokens, '.'); // variadic macro definition ellipsis 725 } 726 preventHollerith_ = false; 727 } else if (IsLegalInIdentifier(*at_)) { 728 int parts{1}; 729 const char *afterLast{nullptr}; 730 do { 731 EmitChar(tokens, *at_); 732 ++at_, ++column_; 733 afterLast = at_; 734 if (SkipToNextSignificantCharacter() && IsLegalIdentifierStart(*at_)) { 735 tokens.CloseToken(); 736 ++parts; 737 } 738 } while (IsLegalInIdentifier(*at_)); 739 if (parts >= 3) { 740 // Subtlety: When an identifier is split across three or more continuation 741 // lines (or two continuation lines, immediately preceded or followed 742 // by '&' free form continuation line markers, its parts are kept as 743 // distinct pp-tokens so that macro replacement operates on them 744 // independently. This trick accommodates the historic practice of 745 // using line continuation for token pasting after replacement. 746 } else if (parts == 2) { 747 if (afterLast && afterLast < limit_) { 748 afterLast = SkipWhiteSpace(afterLast); 749 } 750 if ((start > start_ && start[-1] == '&') || 751 (afterLast && afterLast < limit_ && 752 (*afterLast == '&' || *afterLast == '\n'))) { 753 // call & call foo& call foo& 754 // &MACRO& OR &MACRO& OR &MACRO 755 // &foo(...) &(...) 756 } else { 757 tokens.ReopenLastToken(); 758 } 759 } 760 if (InFixedFormSource()) { 761 SkipSpaces(); 762 } 763 if ((*at_ == '\'' || *at_ == '"') && 764 tokens.CharAt(tokens.SizeInChars() - 1) == '_') { // kind_"..." 765 QuotedCharacterLiteral(tokens, start); 766 preventHollerith_ = false; 767 } else { 768 preventHollerith_ = true; // DO 10 H = ... 769 } 770 } else if (*at_ == '*') { 771 if (EmitCharAndAdvance(tokens, '*') == '*') { 772 EmitCharAndAdvance(tokens, '*'); 773 } else { 774 // Subtle ambiguity: 775 // CHARACTER*2H declares H because *2 is a kind specifier 776 // DATAC/N*2H / is repeated Hollerith 777 preventHollerith_ = !slashInCurrentStatement_; 778 } 779 } else { 780 char ch{*at_}; 781 if (ch == '(') { 782 if (parenthesisNesting_++ == 0) { 783 isPossibleMacroCall_ = tokens.SizeInTokens() > 0 && 784 preprocessor_.IsFunctionLikeDefinition( 785 tokens.TokenAt(tokens.SizeInTokens() - 1)); 786 } 787 } else if (ch == ')' && parenthesisNesting_ > 0) { 788 --parenthesisNesting_; 789 } 790 char nch{EmitCharAndAdvance(tokens, ch)}; 791 preventHollerith_ = false; 792 if ((nch == '=' && 793 (ch == '<' || ch == '>' || ch == '/' || ch == '=' || ch == '!')) || 794 (ch == nch && 795 (ch == '/' || ch == ':' || ch == '*' || ch == '#' || ch == '&' || 796 ch == '|' || ch == '<' || ch == '>')) || 797 (ch == '=' && nch == '>')) { 798 // token comprises two characters 799 EmitCharAndAdvance(tokens, nch); 800 } else if (ch == '/') { 801 slashInCurrentStatement_ = true; 802 } else if (ch == ';' && InFixedFormSource()) { 803 SkipSpaces(); 804 if (IsDecimalDigit(*at_)) { 805 if (features_.ShouldWarn( 806 common::LanguageFeature::MiscSourceExtensions)) { 807 Say(common::LanguageFeature::MiscSourceExtensions, 808 GetProvenanceRange(at_, at_ + 1), 809 "Label should be in the label field"_port_en_US); 810 } 811 } 812 } 813 } 814 tokens.CloseToken(); 815 return true; 816 } 817 818 bool Prescanner::ExponentAndKind(TokenSequence &tokens) { 819 char ed{ToLowerCaseLetter(*at_)}; 820 if (ed != 'e' && ed != 'd') { 821 return false; 822 } 823 // Do some look-ahead to ensure that this 'e'/'d' is an exponent, 824 // not the start of an identifier that could be a macro. 825 const char *p{at_}; 826 if (int n{IsSpace(++p)}) { 827 p += n; 828 } 829 if (*p == '+' || *p == '-') { 830 if (int n{IsSpace(++p)}) { 831 p += n; 832 } 833 } 834 if (IsDecimalDigit(*p)) { // it's an exponent 835 EmitCharAndAdvance(tokens, ed); 836 if (*at_ == '+' || *at_ == '-') { 837 EmitCharAndAdvance(tokens, *at_); 838 } 839 while (IsDecimalDigit(*at_)) { 840 EmitCharAndAdvance(tokens, *at_); 841 } 842 if (*at_ == '_') { 843 while (IsLegalInIdentifier(EmitCharAndAdvance(tokens, *at_))) { 844 } 845 } 846 return true; 847 } else { 848 return false; 849 } 850 } 851 852 void Prescanner::QuotedCharacterLiteral( 853 TokenSequence &tokens, const char *start) { 854 char quote{*at_}; 855 const char *end{at_ + 1}; 856 inCharLiteral_ = true; 857 continuationInCharLiteral_ = true; 858 const auto emit{[&](char ch) { EmitChar(tokens, ch); }}; 859 const auto insert{[&](char ch) { EmitInsertedChar(tokens, ch); }}; 860 bool isEscaped{false}; 861 bool escapesEnabled{features_.IsEnabled(LanguageFeature::BackslashEscapes)}; 862 while (true) { 863 if (*at_ == '\\') { 864 if (escapesEnabled) { 865 isEscaped = !isEscaped; 866 } else { 867 // The parser always processes escape sequences, so don't confuse it 868 // when escapes are disabled. 869 insert('\\'); 870 } 871 } else { 872 isEscaped = false; 873 } 874 EmitQuotedChar(static_cast<unsigned char>(*at_), emit, insert, false, 875 Encoding::LATIN_1); 876 while (PadOutCharacterLiteral(tokens)) { 877 } 878 if (*at_ == '\n') { 879 if (!inPreprocessorDirective_) { 880 Say(GetProvenanceRange(start, end), 881 "Incomplete character literal"_err_en_US); 882 } 883 break; 884 } 885 // Here's a weird edge case. When there's a two or more following 886 // continuation lines at this point, and the entire significant part of 887 // the next continuation line is the name of a keyword macro, replace 888 // it in the character literal with its definition. Example: 889 // #define FOO foo 890 // subroutine subr() bind(c, name="my_& 891 // &FOO& 892 // &_bar") ... 893 // produces a binding name of "my_foo_bar". 894 while (at_[1] == '&' && nextLine_ < limit_ && !InFixedFormSource()) { 895 const char *idStart{nextLine_}; 896 if (const char *amper{SkipWhiteSpace(nextLine_)}; *amper == '&') { 897 idStart = amper + 1; 898 } 899 if (IsLegalIdentifierStart(*idStart)) { 900 std::size_t idLen{1}; 901 for (; IsLegalInIdentifier(idStart[idLen]); ++idLen) { 902 } 903 if (idStart[idLen] == '&') { 904 CharBlock id{idStart, idLen}; 905 if (preprocessor_.IsNameDefined(id)) { 906 TokenSequence ppTokens; 907 ppTokens.Put(id, GetProvenance(idStart)); 908 if (auto replaced{ 909 preprocessor_.MacroReplacement(ppTokens, *this)}) { 910 tokens.Put(*replaced); 911 at_ = &idStart[idLen - 1]; 912 NextLine(); 913 continue; // try again on the next line 914 } 915 } 916 } 917 } 918 break; 919 } 920 end = at_ + 1; 921 NextChar(); 922 if (*at_ == quote && !isEscaped) { 923 // A doubled unescaped quote mark becomes a single instance of that 924 // quote character in the literal (later). There can be spaces between 925 // the quotes in fixed form source. 926 EmitChar(tokens, quote); 927 inCharLiteral_ = false; // for cases like print *, '...'!comment 928 NextChar(); 929 if (InFixedFormSource()) { 930 SkipSpaces(); 931 } 932 if (*at_ != quote) { 933 break; 934 } 935 inCharLiteral_ = true; 936 } 937 } 938 continuationInCharLiteral_ = false; 939 inCharLiteral_ = false; 940 } 941 942 void Prescanner::Hollerith( 943 TokenSequence &tokens, int count, const char *start) { 944 inCharLiteral_ = true; 945 CHECK(*at_ == 'h' || *at_ == 'H'); 946 EmitChar(tokens, 'H'); 947 while (count-- > 0) { 948 if (PadOutCharacterLiteral(tokens)) { 949 } else if (*at_ == '\n') { 950 if (features_.ShouldWarn(common::UsageWarning::Scanning)) { 951 Say(common::UsageWarning::Scanning, GetProvenanceRange(start, at_), 952 "Possible truncated Hollerith literal"_warn_en_US); 953 } 954 break; 955 } else { 956 NextChar(); 957 // Each multi-byte character encoding counts as a single character. 958 // No escape sequences are recognized. 959 // Hollerith is always emitted to the cooked character 960 // stream in UTF-8. 961 DecodedCharacter decoded{DecodeCharacter( 962 encoding_, at_, static_cast<std::size_t>(limit_ - at_), false)}; 963 if (decoded.bytes > 0) { 964 EncodedCharacter utf8{ 965 EncodeCharacter<Encoding::UTF_8>(decoded.codepoint)}; 966 for (int j{0}; j < utf8.bytes; ++j) { 967 EmitChar(tokens, utf8.buffer[j]); 968 } 969 at_ += decoded.bytes - 1; 970 } else { 971 Say(GetProvenanceRange(start, at_), 972 "Bad character in Hollerith literal"_err_en_US); 973 break; 974 } 975 } 976 } 977 if (*at_ != '\n') { 978 NextChar(); 979 } 980 inCharLiteral_ = false; 981 } 982 983 // In fixed form, source card images must be processed as if they were at 984 // least 72 columns wide, at least in character literal contexts. 985 bool Prescanner::PadOutCharacterLiteral(TokenSequence &tokens) { 986 while (inFixedForm_ && !tabInCurrentLine_ && at_[1] == '\n') { 987 if (column_ < fixedFormColumnLimit_) { 988 tokens.PutNextTokenChar(' ', spaceProvenance_); 989 ++column_; 990 return true; 991 } 992 if (!FixedFormContinuation(false /*no need to insert space*/) || 993 tabInCurrentLine_) { 994 return false; 995 } 996 CHECK(column_ == 7); 997 --at_; // point to column 6 of continuation line 998 column_ = 6; 999 } 1000 return false; 1001 } 1002 1003 static bool IsAtProcess(const char *p) { 1004 static const char pAtProc[]{"process"}; 1005 for (std::size_t i{0}; i < sizeof pAtProc - 1; ++i) { 1006 if (ToLowerCaseLetter(*++p) != pAtProc[i]) 1007 return false; 1008 } 1009 return true; 1010 } 1011 1012 bool Prescanner::IsFixedFormCommentLine(const char *start) const { 1013 const char *p{start}; 1014 1015 // The @process directive must start in column 1. 1016 if (*p == '@' && IsAtProcess(p)) { 1017 return true; 1018 } 1019 1020 if (IsFixedFormCommentChar(*p) || *p == '%' || // VAX %list, %eject, &c. 1021 ((*p == 'D' || *p == 'd') && 1022 !features_.IsEnabled(LanguageFeature::OldDebugLines))) { 1023 return true; 1024 } 1025 bool anyTabs{false}; 1026 while (true) { 1027 if (int n{IsSpace(p)}) { 1028 p += n; 1029 } else if (*p == '\t') { 1030 anyTabs = true; 1031 ++p; 1032 } else if (*p == '0' && !anyTabs && p == start + 5) { 1033 ++p; // 0 in column 6 must treated as a space 1034 } else { 1035 break; 1036 } 1037 } 1038 if (!anyTabs && p >= start + fixedFormColumnLimit_) { 1039 return true; 1040 } 1041 if (*p == '!' && !inCharLiteral_ && (anyTabs || p != start + 5)) { 1042 return true; 1043 } 1044 return *p == '\n'; 1045 } 1046 1047 const char *Prescanner::IsFreeFormComment(const char *p) const { 1048 p = SkipWhiteSpaceAndCComments(p); 1049 if (*p == '!' || *p == '\n') { 1050 return p; 1051 } else if (*p == '@') { 1052 return IsAtProcess(p) ? p : nullptr; 1053 } else { 1054 return nullptr; 1055 } 1056 } 1057 1058 std::optional<std::size_t> Prescanner::IsIncludeLine(const char *start) const { 1059 if (!expandIncludeLines_) { 1060 return std::nullopt; 1061 } 1062 const char *p{SkipWhiteSpace(start)}; 1063 if (*p == '0' && inFixedForm_ && p == start + 5) { 1064 // Accept " 0INCLUDE" in fixed form. 1065 p = SkipWhiteSpace(p + 1); 1066 } 1067 for (const char *q{"include"}; *q; ++q) { 1068 if (ToLowerCaseLetter(*p) != *q) { 1069 return std::nullopt; 1070 } 1071 p = SkipWhiteSpace(p + 1); 1072 } 1073 if (IsDecimalDigit(*p)) { // accept & ignore a numeric kind prefix 1074 for (p = SkipWhiteSpace(p + 1); IsDecimalDigit(*p); 1075 p = SkipWhiteSpace(p + 1)) { 1076 } 1077 if (*p != '_') { 1078 return std::nullopt; 1079 } 1080 p = SkipWhiteSpace(p + 1); 1081 } 1082 if (*p == '"' || *p == '\'') { 1083 return {p - start}; 1084 } 1085 return std::nullopt; 1086 } 1087 1088 void Prescanner::FortranInclude(const char *firstQuote) { 1089 const char *p{firstQuote}; 1090 while (*p != '"' && *p != '\'') { 1091 ++p; 1092 } 1093 char quote{*p}; 1094 std::string path; 1095 for (++p; *p != '\n'; ++p) { 1096 if (*p == quote) { 1097 if (p[1] != quote) { 1098 break; 1099 } 1100 ++p; 1101 } 1102 path += *p; 1103 } 1104 if (*p != quote) { 1105 Say(GetProvenanceRange(firstQuote, p), 1106 "malformed path name string"_err_en_US); 1107 return; 1108 } 1109 p = SkipWhiteSpace(p + 1); 1110 if (*p != '\n' && *p != '!') { 1111 const char *garbage{p}; 1112 for (; *p != '\n' && *p != '!'; ++p) { 1113 } 1114 if (features_.ShouldWarn(common::UsageWarning::Scanning)) { 1115 Say(common::UsageWarning::Scanning, GetProvenanceRange(garbage, p), 1116 "excess characters after path name"_warn_en_US); 1117 } 1118 } 1119 std::string buf; 1120 llvm::raw_string_ostream error{buf}; 1121 Provenance provenance{GetProvenance(nextLine_)}; 1122 std::optional<std::string> prependPath; 1123 if (const SourceFile * currentFile{allSources_.GetSourceFile(provenance)}) { 1124 prependPath = DirectoryName(currentFile->path()); 1125 } 1126 const SourceFile *included{ 1127 allSources_.Open(path, error, std::move(prependPath))}; 1128 if (!included) { 1129 Say(provenance, "INCLUDE: %s"_err_en_US, buf); 1130 } else if (included->bytes() > 0) { 1131 ProvenanceRange includeLineRange{ 1132 provenance, static_cast<std::size_t>(p - nextLine_)}; 1133 ProvenanceRange fileRange{ 1134 allSources_.AddIncludedFile(*included, includeLineRange)}; 1135 Preprocessor cleanPrepro{allSources_}; 1136 if (preprocessor_.IsNameDefined("__FILE__"s)) { 1137 cleanPrepro.DefineStandardMacros(); // __FILE__, __LINE__, &c. 1138 } 1139 if (preprocessor_.IsNameDefined("_CUDA"s)) { 1140 cleanPrepro.Define("_CUDA"s, "1"); 1141 } 1142 Prescanner{*this, cleanPrepro, /*isNestedInIncludeDirective=*/false} 1143 .set_encoding(included->encoding()) 1144 .Prescan(fileRange); 1145 } 1146 } 1147 1148 const char *Prescanner::IsPreprocessorDirectiveLine(const char *start) const { 1149 const char *p{start}; 1150 while (int n{IsSpace(p)}) { 1151 p += n; 1152 } 1153 if (*p == '#') { 1154 if (inFixedForm_ && p == start + 5) { 1155 return nullptr; 1156 } 1157 } else { 1158 p = SkipWhiteSpace(p); 1159 if (*p != '#') { 1160 return nullptr; 1161 } 1162 } 1163 return SkipWhiteSpace(p + 1); 1164 } 1165 1166 bool Prescanner::IsNextLinePreprocessorDirective() const { 1167 return IsPreprocessorDirectiveLine(nextLine_) != nullptr; 1168 } 1169 1170 bool Prescanner::SkipCommentLine(bool afterAmpersand) { 1171 if (IsAtEnd()) { 1172 if (afterAmpersand && prescannerNesting_ > 0) { 1173 // A continuation marker at the end of the last line in an 1174 // include file inhibits the newline for that line. 1175 SkipToEndOfLine(); 1176 omitNewline_ = true; 1177 } 1178 } else if (inPreprocessorDirective_) { 1179 } else { 1180 auto lineClass{ClassifyLine(nextLine_)}; 1181 if (lineClass.kind == LineClassification::Kind::Comment) { 1182 NextLine(); 1183 return true; 1184 } else if (lineClass.kind == 1185 LineClassification::Kind::ConditionalCompilationDirective || 1186 lineClass.kind == LineClassification::Kind::PreprocessorDirective) { 1187 // Allow conditional compilation directives (e.g., #ifdef) to affect 1188 // continuation lines. 1189 // Allow other preprocessor directives, too, except #include 1190 // (when it does not follow '&'), #define, and #undef (because 1191 // they cannot be allowed to affect preceding text on a 1192 // continued line). 1193 preprocessor_.Directive(TokenizePreprocessorDirective(), *this); 1194 return true; 1195 } else if (afterAmpersand && 1196 (lineClass.kind == LineClassification::Kind::DefinitionDirective || 1197 lineClass.kind == LineClassification::Kind::IncludeDirective || 1198 lineClass.kind == LineClassification::Kind::IncludeLine)) { 1199 SkipToEndOfLine(); 1200 omitNewline_ = true; 1201 skipLeadingAmpersand_ = true; 1202 } 1203 } 1204 return false; 1205 } 1206 1207 const char *Prescanner::FixedFormContinuationLine(bool mightNeedSpace) { 1208 if (IsAtEnd()) { 1209 return nullptr; 1210 } 1211 tabInCurrentLine_ = false; 1212 char col1{*nextLine_}; 1213 if (IsFixedFormCommentChar(col1)) { 1214 int j{1}; 1215 if (InCompilerDirective()) { 1216 // Must be a continued compiler directive. 1217 for (; j < 5; ++j) { 1218 char ch{directiveSentinel_[j - 1]}; 1219 if (ch == '\0') { 1220 break; 1221 } 1222 if (ch != ToLowerCaseLetter(nextLine_[j])) { 1223 return nullptr; 1224 } 1225 } 1226 } else if (features_.IsEnabled(LanguageFeature::OpenMP)) { 1227 // Fixed Source Form Conditional Compilation Sentinels. 1228 if (nextLine_[1] != '$') { 1229 return nullptr; 1230 } 1231 j++; 1232 } else { 1233 return nullptr; 1234 } 1235 for (; j < 5; ++j) { 1236 if (nextLine_[j] != ' ') { 1237 return nullptr; 1238 } 1239 } 1240 const char *col6{nextLine_ + 5}; 1241 if (*col6 != '\n' && *col6 != '0' && !IsSpaceOrTab(col6)) { 1242 if (mightNeedSpace && !IsSpace(nextLine_ + 6)) { 1243 insertASpace_ = true; 1244 } 1245 return nextLine_ + 6; 1246 } 1247 return nullptr; 1248 } else { 1249 // Normal case: not in a compiler directive. 1250 if (col1 == '&' && 1251 features_.IsEnabled( 1252 LanguageFeature::FixedFormContinuationWithColumn1Ampersand)) { 1253 // Extension: '&' as continuation marker 1254 if (features_.ShouldWarn( 1255 LanguageFeature::FixedFormContinuationWithColumn1Ampersand)) { 1256 Say(LanguageFeature::FixedFormContinuationWithColumn1Ampersand, 1257 GetProvenance(nextLine_), "nonstandard usage"_port_en_US); 1258 } 1259 return nextLine_ + 1; 1260 } 1261 if (col1 == '\t' && nextLine_[1] >= '1' && nextLine_[1] <= '9') { 1262 tabInCurrentLine_ = true; 1263 return nextLine_ + 2; // VAX extension 1264 } 1265 if ((col1 == ' ' || 1266 ((col1 == 'D' || col1 == 'd') && 1267 features_.IsEnabled(LanguageFeature::OldDebugLines))) && 1268 nextLine_[1] == ' ' && nextLine_[2] == ' ' && nextLine_[3] == ' ' && 1269 nextLine_[4] == ' ') { 1270 const char *col6{nextLine_ + 5}; 1271 if (*col6 != '\n' && *col6 != '0' && !IsSpaceOrTab(col6)) { 1272 if ((*col6 == 'i' || *col6 == 'I') && IsIncludeLine(nextLine_)) { 1273 // It's An INCLUDE line, not a continuation 1274 } else { 1275 return nextLine_ + 6; 1276 } 1277 } 1278 } 1279 if (IsImplicitContinuation()) { 1280 return nextLine_; 1281 } 1282 } 1283 return nullptr; // not a continuation line 1284 } 1285 1286 const char *Prescanner::FreeFormContinuationLine(bool ampersand) { 1287 const char *p{nextLine_}; 1288 if (p >= limit_) { 1289 return nullptr; 1290 } 1291 p = SkipWhiteSpace(p); 1292 if (InCompilerDirective()) { 1293 if (*p++ != '!') { 1294 return nullptr; 1295 } 1296 for (const char *s{directiveSentinel_}; *s != '\0'; ++p, ++s) { 1297 if (*s != ToLowerCaseLetter(*p)) { 1298 return nullptr; 1299 } 1300 } 1301 p = SkipWhiteSpace(p); 1302 if (*p == '&') { 1303 if (!ampersand) { 1304 insertASpace_ = true; 1305 } 1306 return p + 1; 1307 } else if (ampersand) { 1308 return p; 1309 } else { 1310 return nullptr; 1311 } 1312 } else { 1313 if (*p == '&') { 1314 return p + 1; 1315 } else if (*p == '!' || *p == '\n' || *p == '#') { 1316 return nullptr; 1317 } else if (ampersand || IsImplicitContinuation()) { 1318 if (continuationInCharLiteral_) { 1319 // 'a'& -> 'a''b' == "a'b" 1320 // 'b' 1321 if (features_.ShouldWarn( 1322 common::LanguageFeature::MiscSourceExtensions)) { 1323 Say(common::LanguageFeature::MiscSourceExtensions, 1324 GetProvenanceRange(p, p + 1), 1325 "Character literal continuation line should have been preceded by '&'"_port_en_US); 1326 } 1327 } else if (p > nextLine_) { 1328 --p; 1329 } else { 1330 insertASpace_ = true; 1331 } 1332 return p; 1333 } else { 1334 return nullptr; 1335 } 1336 } 1337 } 1338 1339 bool Prescanner::FixedFormContinuation(bool mightNeedSpace) { 1340 // N.B. We accept '&' as a continuation indicator in fixed form, too, 1341 // but not in a character literal. 1342 if (*at_ == '&' && inCharLiteral_) { 1343 return false; 1344 } 1345 do { 1346 if (const char *cont{FixedFormContinuationLine(mightNeedSpace)}) { 1347 BeginSourceLine(cont); 1348 column_ = 7; 1349 NextLine(); 1350 return true; 1351 } 1352 } while (SkipCommentLine(false /* not after ampersand */)); 1353 return false; 1354 } 1355 1356 bool Prescanner::FreeFormContinuation() { 1357 const char *p{at_}; 1358 bool ampersand{*p == '&'}; 1359 if (ampersand) { 1360 p = SkipWhiteSpace(p + 1); 1361 } 1362 if (*p != '\n') { 1363 if (inCharLiteral_) { 1364 return false; 1365 } else if (*p == '!') { // & ! comment - ok 1366 } else if (ampersand && isPossibleMacroCall_ && (*p == ',' || *p == ')')) { 1367 return false; // allow & at end of a macro argument 1368 } else if (features_.ShouldWarn(LanguageFeature::CruftAfterAmpersand)) { 1369 Say(LanguageFeature::CruftAfterAmpersand, GetProvenance(p), 1370 "missing ! before comment after &"_warn_en_US); 1371 } 1372 } 1373 do { 1374 if (const char *cont{FreeFormContinuationLine(ampersand)}) { 1375 BeginSourceLine(cont); 1376 NextLine(); 1377 return true; 1378 } 1379 } while (SkipCommentLine(ampersand)); 1380 return false; 1381 } 1382 1383 // Implicit line continuation allows a preprocessor macro call with 1384 // arguments to span multiple lines. 1385 bool Prescanner::IsImplicitContinuation() const { 1386 return !inPreprocessorDirective_ && !inCharLiteral_ && isPossibleMacroCall_ && 1387 parenthesisNesting_ > 0 && !IsAtEnd() && 1388 ClassifyLine(nextLine_).kind == LineClassification::Kind::Source; 1389 } 1390 1391 bool Prescanner::Continuation(bool mightNeedFixedFormSpace) { 1392 if (disableSourceContinuation_) { 1393 return false; 1394 } else if (*at_ == '\n' || *at_ == '&') { 1395 if (inFixedForm_) { 1396 return FixedFormContinuation(mightNeedFixedFormSpace); 1397 } else { 1398 return FreeFormContinuation(); 1399 } 1400 } else if (*at_ == '\\' && at_ + 2 == nextLine_ && 1401 backslashFreeFormContinuation_ && !inFixedForm_ && nextLine_ < limit_) { 1402 // cpp-like handling of \ at end of a free form source line 1403 BeginSourceLine(nextLine_); 1404 NextLine(); 1405 return true; 1406 } else { 1407 return false; 1408 } 1409 } 1410 1411 std::optional<Prescanner::LineClassification> 1412 Prescanner::IsFixedFormCompilerDirectiveLine(const char *start) const { 1413 const char *p{start}; 1414 char col1{*p++}; 1415 if (!IsFixedFormCommentChar(col1)) { 1416 return std::nullopt; 1417 } 1418 char sentinel[5], *sp{sentinel}; 1419 int column{2}; 1420 for (; column < 6; ++column, ++p) { 1421 if (*p == '\n' || IsSpaceOrTab(p)) { 1422 break; 1423 } 1424 if (sp == sentinel + 1 && sentinel[0] == '$' && IsDecimalDigit(*p)) { 1425 // OpenMP conditional compilation line: leave the label alone 1426 break; 1427 } 1428 *sp++ = ToLowerCaseLetter(*p); 1429 } 1430 if (column == 6) { 1431 if (*p == '0') { 1432 ++p; 1433 } else if (int n{IsSpaceOrTab(p)}) { 1434 p += n; 1435 } else { 1436 // This is a Continuation line, not an initial directive line. 1437 return std::nullopt; 1438 } 1439 } 1440 if (sp == sentinel) { 1441 return std::nullopt; 1442 } 1443 *sp = '\0'; 1444 if (const char *ss{IsCompilerDirectiveSentinel( 1445 sentinel, static_cast<std::size_t>(sp - sentinel))}) { 1446 std::size_t payloadOffset = p - start; 1447 return {LineClassification{ 1448 LineClassification::Kind::CompilerDirective, payloadOffset, ss}}; 1449 } 1450 return std::nullopt; 1451 } 1452 1453 std::optional<Prescanner::LineClassification> 1454 Prescanner::IsFreeFormCompilerDirectiveLine(const char *start) const { 1455 if (const char *p{SkipWhiteSpace(start)}; p && *p++ == '!') { 1456 if (auto maybePair{IsCompilerDirectiveSentinel(p)}) { 1457 auto offset{static_cast<std::size_t>(maybePair->second - start)}; 1458 return {LineClassification{LineClassification::Kind::CompilerDirective, 1459 offset, maybePair->first}}; 1460 } 1461 } 1462 return std::nullopt; 1463 } 1464 1465 Prescanner &Prescanner::AddCompilerDirectiveSentinel(const std::string &dir) { 1466 std::uint64_t packed{0}; 1467 for (char ch : dir) { 1468 packed = (packed << 8) | (ToLowerCaseLetter(ch) & 0xff); 1469 } 1470 compilerDirectiveBloomFilter_.set(packed % prime1); 1471 compilerDirectiveBloomFilter_.set(packed % prime2); 1472 compilerDirectiveSentinels_.insert(dir); 1473 return *this; 1474 } 1475 1476 const char *Prescanner::IsCompilerDirectiveSentinel( 1477 const char *sentinel, std::size_t len) const { 1478 std::uint64_t packed{0}; 1479 for (std::size_t j{0}; j < len; ++j) { 1480 packed = (packed << 8) | (sentinel[j] & 0xff); 1481 } 1482 if (len == 0 || !compilerDirectiveBloomFilter_.test(packed % prime1) || 1483 !compilerDirectiveBloomFilter_.test(packed % prime2)) { 1484 return nullptr; 1485 } 1486 const auto iter{compilerDirectiveSentinels_.find(std::string(sentinel, len))}; 1487 return iter == compilerDirectiveSentinels_.end() ? nullptr : iter->c_str(); 1488 } 1489 1490 const char *Prescanner::IsCompilerDirectiveSentinel(CharBlock token) const { 1491 const char *p{token.begin()}; 1492 const char *end{p + token.size()}; 1493 while (p < end && (*p == ' ' || *p == '\n')) { 1494 ++p; 1495 } 1496 if (p < end && *p == '!') { 1497 ++p; 1498 } 1499 while (end > p && (end[-1] == ' ' || end[-1] == '\t')) { 1500 --end; 1501 } 1502 return end > p && IsCompilerDirectiveSentinel(p, end - p) ? p : nullptr; 1503 } 1504 1505 std::optional<std::pair<const char *, const char *>> 1506 Prescanner::IsCompilerDirectiveSentinel(const char *p) const { 1507 char sentinel[8]; 1508 for (std::size_t j{0}; j + 1 < sizeof sentinel && *p != '\n'; ++p, ++j) { 1509 if (int n{*p == '&' ? 1 : IsSpaceOrTab(p)}) { 1510 if (j > 0) { 1511 sentinel[j] = '\0'; 1512 p = SkipWhiteSpace(p + n); 1513 if (*p != '!') { 1514 if (const char *sp{IsCompilerDirectiveSentinel(sentinel, j)}) { 1515 return std::make_pair(sp, p); 1516 } 1517 } 1518 } 1519 break; 1520 } else { 1521 sentinel[j] = ToLowerCaseLetter(*p); 1522 } 1523 } 1524 return std::nullopt; 1525 } 1526 1527 constexpr bool IsDirective(const char *match, const char *dir) { 1528 for (; *match; ++match) { 1529 if (*match != ToLowerCaseLetter(*dir++)) { 1530 return false; 1531 } 1532 } 1533 return true; 1534 } 1535 1536 Prescanner::LineClassification Prescanner::ClassifyLine( 1537 const char *start) const { 1538 if (inFixedForm_) { 1539 if (std::optional<LineClassification> lc{ 1540 IsFixedFormCompilerDirectiveLine(start)}) { 1541 return std::move(*lc); 1542 } 1543 if (IsFixedFormCommentLine(start)) { 1544 return {LineClassification::Kind::Comment}; 1545 } 1546 } else { 1547 if (std::optional<LineClassification> lc{ 1548 IsFreeFormCompilerDirectiveLine(start)}) { 1549 return std::move(*lc); 1550 } 1551 if (const char *bang{IsFreeFormComment(start)}) { 1552 return {LineClassification::Kind::Comment, 1553 static_cast<std::size_t>(bang - start)}; 1554 } 1555 } 1556 if (std::optional<std::size_t> quoteOffset{IsIncludeLine(start)}) { 1557 return {LineClassification::Kind::IncludeLine, *quoteOffset}; 1558 } 1559 if (const char *dir{IsPreprocessorDirectiveLine(start)}) { 1560 if (IsDirective("if", dir) || IsDirective("elif", dir) || 1561 IsDirective("else", dir) || IsDirective("endif", dir)) { 1562 return {LineClassification::Kind::ConditionalCompilationDirective}; 1563 } else if (IsDirective("include", dir)) { 1564 return {LineClassification::Kind::IncludeDirective}; 1565 } else if (IsDirective("define", dir) || IsDirective("undef", dir)) { 1566 return {LineClassification::Kind::DefinitionDirective}; 1567 } else { 1568 return {LineClassification::Kind::PreprocessorDirective}; 1569 } 1570 } 1571 return {LineClassification::Kind::Source}; 1572 } 1573 1574 Prescanner::LineClassification Prescanner::ClassifyLine( 1575 TokenSequence &tokens, Provenance newlineProvenance) const { 1576 // Append a newline temporarily. 1577 tokens.PutNextTokenChar('\n', newlineProvenance); 1578 tokens.CloseToken(); 1579 const char *ppd{tokens.ToCharBlock().begin()}; 1580 LineClassification classification{ClassifyLine(ppd)}; 1581 tokens.pop_back(); // remove the newline 1582 return classification; 1583 } 1584 1585 void Prescanner::SourceFormChange(std::string &&dir) { 1586 if (dir == "!dir$ free") { 1587 inFixedForm_ = false; 1588 } else if (dir == "!dir$ fixed") { 1589 inFixedForm_ = true; 1590 } 1591 } 1592 1593 // Acquire and append compiler directive continuation lines to 1594 // the tokens that constitute a compiler directive, even when those 1595 // directive continuation lines are the result of macro expansion. 1596 // (Not used when neither the original compiler directive line nor 1597 // the directive continuation line result from preprocessing; regular 1598 // line continuation during tokenization handles that normal case.) 1599 bool Prescanner::CompilerDirectiveContinuation( 1600 TokenSequence &tokens, const char *origSentinel) { 1601 if (inFixedForm_ || tokens.empty() || 1602 tokens.TokenAt(tokens.SizeInTokens() - 1) != "&") { 1603 return false; 1604 } 1605 LineClassification followingLine{ClassifyLine(nextLine_)}; 1606 if (followingLine.kind == LineClassification::Kind::Comment) { 1607 nextLine_ += followingLine.payloadOffset; // advance to '!' or newline 1608 NextLine(); 1609 return true; 1610 } 1611 CHECK(origSentinel != nullptr); 1612 directiveSentinel_ = origSentinel; // so InCompilerDirective() is true 1613 const char *nextContinuation{ 1614 followingLine.kind == LineClassification::Kind::CompilerDirective 1615 ? FreeFormContinuationLine(true) 1616 : nullptr}; 1617 if (!nextContinuation && 1618 followingLine.kind != LineClassification::Kind::Source) { 1619 return false; 1620 } 1621 auto origNextLine{nextLine_}; 1622 BeginSourceLine(nextLine_); 1623 NextLine(); 1624 if (nextContinuation) { 1625 // What follows is !DIR$ & xxx; skip over the & so that it 1626 // doesn't cause a spurious continuation. 1627 at_ = nextContinuation; 1628 } else { 1629 // What follows looks like a source line before macro expansion, 1630 // but might become a directive continuation afterwards. 1631 SkipSpaces(); 1632 } 1633 TokenSequence followingTokens; 1634 while (NextToken(followingTokens)) { 1635 } 1636 if (auto followingPrepro{ 1637 preprocessor_.MacroReplacement(followingTokens, *this)}) { 1638 followingTokens = std::move(*followingPrepro); 1639 } 1640 followingTokens.RemoveRedundantBlanks(); 1641 std::size_t startAt{0}; 1642 std::size_t following{followingTokens.SizeInTokens()}; 1643 bool ok{false}; 1644 if (nextContinuation) { 1645 ok = true; 1646 } else { 1647 startAt = 2; 1648 if (startAt < following && followingTokens.TokenAt(0) == "!") { 1649 CharBlock sentinel{followingTokens.TokenAt(1)}; 1650 if (!sentinel.empty() && 1651 std::memcmp(sentinel.begin(), origSentinel, sentinel.size()) == 0) { 1652 ok = true; 1653 while ( 1654 startAt < following && followingTokens.TokenAt(startAt).IsBlank()) { 1655 ++startAt; 1656 } 1657 if (startAt < following && followingTokens.TokenAt(startAt) == "&") { 1658 ++startAt; 1659 } 1660 } 1661 } 1662 } 1663 if (ok) { 1664 tokens.pop_back(); // delete original '&' 1665 tokens.Put(followingTokens, startAt, following - startAt); 1666 tokens.RemoveRedundantBlanks(); 1667 } else { 1668 nextLine_ = origNextLine; 1669 } 1670 return ok; 1671 } 1672 1673 // Similar, but for source line continuation after macro replacement. 1674 bool Prescanner::SourceLineContinuation(TokenSequence &tokens) { 1675 if (!inFixedForm_ && !tokens.empty() && 1676 tokens.TokenAt(tokens.SizeInTokens() - 1) == "&") { 1677 LineClassification followingLine{ClassifyLine(nextLine_)}; 1678 if (followingLine.kind == LineClassification::Kind::Comment) { 1679 nextLine_ += followingLine.payloadOffset; // advance to '!' or newline 1680 NextLine(); 1681 return true; 1682 } else if (const char *nextContinuation{FreeFormContinuationLine(true)}) { 1683 BeginSourceLine(nextLine_); 1684 NextLine(); 1685 TokenSequence followingTokens; 1686 at_ = nextContinuation; 1687 while (NextToken(followingTokens)) { 1688 } 1689 if (auto followingPrepro{ 1690 preprocessor_.MacroReplacement(followingTokens, *this)}) { 1691 followingTokens = std::move(*followingPrepro); 1692 } 1693 followingTokens.RemoveRedundantBlanks(); 1694 tokens.pop_back(); // delete original '&' 1695 tokens.Put(followingTokens); 1696 return true; 1697 } 1698 } 1699 return false; 1700 } 1701 } // namespace Fortran::parser 1702