1 //===-- lib/Parser/prescan.cpp --------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "prescan.h" 10 #include "flang/Common/idioms.h" 11 #include "flang/Parser/characters.h" 12 #include "flang/Parser/message.h" 13 #include "flang/Parser/preprocessor.h" 14 #include "flang/Parser/source.h" 15 #include "flang/Parser/token-sequence.h" 16 #include "llvm/Support/raw_ostream.h" 17 #include <cstddef> 18 #include <cstring> 19 #include <utility> 20 #include <vector> 21 22 namespace Fortran::parser { 23 24 using common::LanguageFeature; 25 26 static constexpr int maxPrescannerNesting{100}; 27 28 Prescanner::Prescanner(Messages &messages, CookedSource &cooked, 29 Preprocessor &preprocessor, common::LanguageFeatureControl lfc) 30 : messages_{messages}, cooked_{cooked}, preprocessor_{preprocessor}, 31 allSources_{preprocessor_.allSources()}, features_{lfc}, 32 backslashFreeFormContinuation_{preprocessor.AnyDefinitions()}, 33 encoding_{allSources_.encoding()} {} 34 35 Prescanner::Prescanner(const Prescanner &that, Preprocessor &prepro, 36 bool isNestedInIncludeDirective) 37 : messages_{that.messages_}, cooked_{that.cooked_}, preprocessor_{prepro}, 38 allSources_{that.allSources_}, features_{that.features_}, 39 isNestedInIncludeDirective_{isNestedInIncludeDirective}, 40 backslashFreeFormContinuation_{that.backslashFreeFormContinuation_}, 41 inFixedForm_{that.inFixedForm_}, 42 fixedFormColumnLimit_{that.fixedFormColumnLimit_}, 43 encoding_{that.encoding_}, 44 prescannerNesting_{that.prescannerNesting_ + 1}, 45 skipLeadingAmpersand_{that.skipLeadingAmpersand_}, 46 compilerDirectiveBloomFilter_{that.compilerDirectiveBloomFilter_}, 47 compilerDirectiveSentinels_{that.compilerDirectiveSentinels_} {} 48 49 // Returns number of bytes to skip 50 static inline int IsSpace(const char *p) { 51 if (*p == ' ') { 52 return 1; 53 } else if (*p == '\xa0') { // LATIN-1 NBSP non-breaking space 54 return 1; 55 } else if (p[0] == '\xc2' && p[1] == '\xa0') { // UTF-8 NBSP 56 return 2; 57 } else { 58 return 0; 59 } 60 } 61 62 static inline int IsSpaceOrTab(const char *p) { 63 return *p == '\t' ? 1 : IsSpace(p); 64 } 65 66 static inline constexpr bool IsFixedFormCommentChar(char ch) { 67 return ch == '!' || ch == '*' || ch == 'C' || ch == 'c'; 68 } 69 70 static void NormalizeCompilerDirectiveCommentMarker(TokenSequence &dir) { 71 char *p{dir.GetMutableCharData()}; 72 char *limit{p + dir.SizeInChars()}; 73 for (; p < limit; ++p) { 74 if (*p != ' ') { 75 CHECK(IsFixedFormCommentChar(*p)); 76 *p = '!'; 77 return; 78 } 79 } 80 DIE("compiler directive all blank"); 81 } 82 83 void Prescanner::Prescan(ProvenanceRange range) { 84 startProvenance_ = range.start(); 85 start_ = allSources_.GetSource(range); 86 CHECK(start_); 87 limit_ = start_ + range.size(); 88 nextLine_ = start_; 89 const bool beganInFixedForm{inFixedForm_}; 90 if (prescannerNesting_ > maxPrescannerNesting) { 91 Say(GetProvenance(start_), 92 "too many nested INCLUDE/#include files, possibly circular"_err_en_US); 93 return; 94 } 95 while (!IsAtEnd()) { 96 Statement(); 97 } 98 if (inFixedForm_ != beganInFixedForm) { 99 std::string dir{"!dir$ "}; 100 if (beganInFixedForm) { 101 dir += "fixed"; 102 } else { 103 dir += "free"; 104 } 105 dir += '\n'; 106 TokenSequence tokens{dir, allSources_.AddCompilerInsertion(dir).start()}; 107 tokens.Emit(cooked_); 108 } 109 } 110 111 void Prescanner::Statement() { 112 TokenSequence tokens; 113 const char *statementStart{nextLine_}; 114 LineClassification line{ClassifyLine(statementStart)}; 115 switch (line.kind) { 116 case LineClassification::Kind::Comment: 117 nextLine_ += line.payloadOffset; // advance to '!' or newline 118 NextLine(); 119 return; 120 case LineClassification::Kind::IncludeLine: 121 FortranInclude(nextLine_ + line.payloadOffset); 122 NextLine(); 123 return; 124 case LineClassification::Kind::ConditionalCompilationDirective: 125 case LineClassification::Kind::IncludeDirective: 126 preprocessor_.Directive(TokenizePreprocessorDirective(), *this); 127 afterPreprocessingDirective_ = true; 128 skipLeadingAmpersand_ |= !inFixedForm_; 129 return; 130 case LineClassification::Kind::PreprocessorDirective: 131 preprocessor_.Directive(TokenizePreprocessorDirective(), *this); 132 afterPreprocessingDirective_ = true; 133 // Don't set skipLeadingAmpersand_ 134 return; 135 case LineClassification::Kind::DefinitionDirective: 136 preprocessor_.Directive(TokenizePreprocessorDirective(), *this); 137 // Don't set afterPreprocessingDirective_ or skipLeadingAmpersand_ 138 return; 139 case LineClassification::Kind::CompilerDirective: { 140 directiveSentinel_ = line.sentinel; 141 CHECK(InCompilerDirective()); 142 BeginStatementAndAdvance(); 143 if (inFixedForm_) { 144 CHECK(IsFixedFormCommentChar(*at_)); 145 } else { 146 while (int n{IsSpaceOrTab(at_)}) { 147 at_ += n, ++column_; 148 } 149 CHECK(*at_ == '!'); 150 } 151 std::optional<int> condOffset; 152 if (directiveSentinel_[0] == '$' && directiveSentinel_[1] == '\0') { 153 // OpenMP conditional compilation line. 154 condOffset = 2; 155 } else if (directiveSentinel_[0] == '@' && directiveSentinel_[1] == 'c' && 156 directiveSentinel_[2] == 'u' && directiveSentinel_[3] == 'f' && 157 directiveSentinel_[4] == '\0') { 158 // CUDA conditional compilation line. 159 condOffset = 5; 160 } 161 if (condOffset) { 162 at_ += *condOffset, column_ += *condOffset; 163 if (auto payload{IsIncludeLine(at_)}) { 164 FortranInclude(at_ + *payload); 165 return; 166 } else if (inFixedForm_) { 167 LabelField(tokens); 168 } else { 169 SkipSpaces(); 170 } 171 } else { 172 // Compiler directive. Emit normalized sentinel, squash following spaces. 173 EmitChar(tokens, '!'); 174 ++at_, ++column_; 175 for (const char *sp{directiveSentinel_}; *sp != '\0'; 176 ++sp, ++at_, ++column_) { 177 EmitChar(tokens, *sp); 178 } 179 if (IsSpaceOrTab(at_)) { 180 EmitChar(tokens, ' '); 181 while (int n{IsSpaceOrTab(at_)}) { 182 at_ += n, ++column_; 183 } 184 } 185 tokens.CloseToken(); 186 } 187 break; 188 } 189 case LineClassification::Kind::Source: 190 BeginStatementAndAdvance(); 191 if (inFixedForm_) { 192 if (features_.IsEnabled(LanguageFeature::OldDebugLines) && 193 (*at_ == 'D' || *at_ == 'd')) { 194 NextChar(); 195 } 196 LabelField(tokens); 197 } else { 198 if (skipLeadingAmpersand_) { 199 skipLeadingAmpersand_ = false; 200 const char *p{SkipWhiteSpace(at_)}; 201 if (p < limit_ && *p == '&') { 202 column_ += ++p - at_; 203 at_ = p; 204 } 205 } else { 206 SkipSpaces(); 207 } 208 // Check for a leading identifier that might be a keyword macro 209 // that will expand to anything indicating a non-source line, like 210 // a comment marker or directive sentinel. If so, disable line 211 // continuation, so that NextToken() won't consume anything from 212 // following lines. 213 if (IsLegalIdentifierStart(*at_)) { 214 // TODO: Only bother with these cases when any keyword macro has 215 // been defined with replacement text that could begin a comment 216 // or directive sentinel. 217 const char *p{at_}; 218 while (IsLegalInIdentifier(*++p)) { 219 } 220 CharBlock id{at_, static_cast<std::size_t>(p - at_)}; 221 if (preprocessor_.IsNameDefined(id) && 222 !preprocessor_.IsFunctionLikeDefinition(id)) { 223 TokenSequence toks; 224 toks.Put(id, GetProvenance(at_)); 225 if (auto replaced{preprocessor_.MacroReplacement(toks, *this)}) { 226 auto newLineClass{ClassifyLine(*replaced, GetCurrentProvenance())}; 227 if (newLineClass.kind == 228 LineClassification::Kind::CompilerDirective) { 229 directiveSentinel_ = newLineClass.sentinel; 230 disableSourceContinuation_ = false; 231 } else { 232 disableSourceContinuation_ = 233 newLineClass.kind != LineClassification::Kind::Source; 234 } 235 } 236 } 237 } 238 } 239 break; 240 } 241 242 while (NextToken(tokens)) { 243 } 244 if (continuationLines_ > 255) { 245 if (features_.ShouldWarn(common::LanguageFeature::MiscSourceExtensions)) { 246 Say(GetProvenance(statementStart), 247 "%d continuation lines is more than the Fortran standard allows"_port_en_US, 248 continuationLines_); 249 } 250 } 251 252 Provenance newlineProvenance{GetCurrentProvenance()}; 253 if (std::optional<TokenSequence> preprocessed{ 254 preprocessor_.MacroReplacement(tokens, *this)}) { 255 // Reprocess the preprocessed line. 256 LineClassification ppl{ClassifyLine(*preprocessed, newlineProvenance)}; 257 switch (ppl.kind) { 258 case LineClassification::Kind::Comment: 259 break; 260 case LineClassification::Kind::IncludeLine: 261 FortranInclude(preprocessed->TokenAt(0).begin() + ppl.payloadOffset); 262 break; 263 case LineClassification::Kind::ConditionalCompilationDirective: 264 case LineClassification::Kind::IncludeDirective: 265 case LineClassification::Kind::DefinitionDirective: 266 case LineClassification::Kind::PreprocessorDirective: 267 if (features_.ShouldWarn(common::UsageWarning::Preprocessing)) { 268 Say(preprocessed->GetProvenanceRange(), 269 "Preprocessed line resembles a preprocessor directive"_warn_en_US); 270 } 271 CheckAndEmitLine(preprocessed->ToLowerCase(), newlineProvenance); 272 break; 273 case LineClassification::Kind::CompilerDirective: 274 if (preprocessed->HasRedundantBlanks()) { 275 preprocessed->RemoveRedundantBlanks(); 276 } 277 while (CompilerDirectiveContinuation(*preprocessed, ppl.sentinel)) { 278 newlineProvenance = GetCurrentProvenance(); 279 } 280 NormalizeCompilerDirectiveCommentMarker(*preprocessed); 281 preprocessed->ToLowerCase(); 282 SourceFormChange(preprocessed->ToString()); 283 CheckAndEmitLine(preprocessed->ToLowerCase().ClipComment( 284 *this, true /* skip first ! */), 285 newlineProvenance); 286 break; 287 case LineClassification::Kind::Source: 288 if (inFixedForm_) { 289 if (preprocessed->HasBlanks(/*after column*/ 6)) { 290 preprocessed->RemoveBlanks(/*after column*/ 6); 291 } 292 } else { 293 while (SourceLineContinuation(*preprocessed)) { 294 newlineProvenance = GetCurrentProvenance(); 295 } 296 if (preprocessed->HasRedundantBlanks()) { 297 preprocessed->RemoveRedundantBlanks(); 298 } 299 } 300 CheckAndEmitLine( 301 preprocessed->ToLowerCase().ClipComment(*this), newlineProvenance); 302 break; 303 } 304 } else { // no macro replacement 305 if (line.kind == LineClassification::Kind::CompilerDirective) { 306 while (CompilerDirectiveContinuation(tokens, line.sentinel)) { 307 newlineProvenance = GetCurrentProvenance(); 308 } 309 tokens.ToLowerCase(); 310 SourceFormChange(tokens.ToString()); 311 } else { // Kind::Source 312 tokens.ToLowerCase(); 313 if (inFixedForm_) { 314 EnforceStupidEndStatementRules(tokens); 315 } 316 } 317 CheckAndEmitLine(tokens, newlineProvenance); 318 } 319 directiveSentinel_ = nullptr; 320 } 321 322 void Prescanner::CheckAndEmitLine( 323 TokenSequence &tokens, Provenance newlineProvenance) { 324 tokens.CheckBadFortranCharacters( 325 messages_, *this, disableSourceContinuation_); 326 // Parenthesis nesting check does not apply while any #include is 327 // active, nor on the lines before and after a top-level #include, 328 // nor before or after conditional source. 329 // Applications play shenanigans with line continuation before and 330 // after #include'd subprogram argument lists and conditional source. 331 if (!isNestedInIncludeDirective_ && !omitNewline_ && 332 !afterPreprocessingDirective_ && tokens.BadlyNestedParentheses() && 333 !preprocessor_.InConditional()) { 334 if (nextLine_ < limit_ && IsPreprocessorDirectiveLine(nextLine_)) { 335 // don't complain 336 } else { 337 tokens.CheckBadParentheses(messages_); 338 } 339 } 340 tokens.Emit(cooked_); 341 if (omitNewline_) { 342 omitNewline_ = false; 343 } else { 344 cooked_.Put('\n', newlineProvenance); 345 afterPreprocessingDirective_ = false; 346 } 347 } 348 349 TokenSequence Prescanner::TokenizePreprocessorDirective() { 350 CHECK(!IsAtEnd() && !inPreprocessorDirective_); 351 inPreprocessorDirective_ = true; 352 BeginStatementAndAdvance(); 353 TokenSequence tokens; 354 while (NextToken(tokens)) { 355 } 356 inPreprocessorDirective_ = false; 357 return tokens; 358 } 359 360 void Prescanner::NextLine() { 361 void *vstart{static_cast<void *>(const_cast<char *>(nextLine_))}; 362 void *v{std::memchr(vstart, '\n', limit_ - nextLine_)}; 363 if (!v) { 364 nextLine_ = limit_; 365 } else { 366 const char *nl{const_cast<const char *>(static_cast<char *>(v))}; 367 nextLine_ = nl + 1; 368 } 369 } 370 371 void Prescanner::LabelField(TokenSequence &token) { 372 int outCol{1}; 373 const char *start{at_}; 374 std::optional<int> badColumn; 375 for (; *at_ != '\n' && column_ <= 6; ++at_) { 376 if (*at_ == '\t') { 377 ++at_; 378 column_ = 7; 379 break; 380 } 381 if (int n{IsSpace(at_)}; n == 0 && 382 !(*at_ == '0' && column_ == 6)) { // '0' in column 6 becomes space 383 EmitChar(token, *at_); 384 ++outCol; 385 if (!badColumn && (column_ == 6 || !IsDecimalDigit(*at_))) { 386 badColumn = column_; 387 } 388 } 389 ++column_; 390 } 391 if (badColumn && !preprocessor_.IsNameDefined(token.CurrentOpenToken())) { 392 if ((prescannerNesting_ > 0 && *badColumn == 6 && 393 cooked_.BufferedBytes() == firstCookedCharacterOffset_) || 394 afterPreprocessingDirective_) { 395 // This is the first source line in #include'd text or conditional 396 // code under #if, or the first source line after such. 397 // If it turns out that the preprocessed text begins with a 398 // fixed form continuation line, the newline at the end 399 // of the latest source line beforehand will be deleted in 400 // CookedSource::Marshal(). 401 cooked_.MarkPossibleFixedFormContinuation(); 402 } else if (features_.ShouldWarn(common::UsageWarning::Scanning)) { 403 Say(GetProvenance(start + *badColumn - 1), 404 *badColumn == 6 405 ? "Statement should not begin with a continuation line"_warn_en_US 406 : "Character in fixed-form label field must be a digit"_warn_en_US); 407 } 408 token.clear(); 409 if (*badColumn < 6) { 410 at_ = start; 411 column_ = 1; 412 return; 413 } 414 outCol = 1; 415 } 416 if (outCol == 1) { // empty label field 417 // Emit a space so that, if the line is rescanned after preprocessing, 418 // a leading 'C' or 'D' won't be left-justified and then accidentally 419 // misinterpreted as a comment card. 420 EmitChar(token, ' '); 421 ++outCol; 422 } 423 token.CloseToken(); 424 SkipToNextSignificantCharacter(); 425 if (IsDecimalDigit(*at_)) { 426 if (features_.ShouldWarn(common::LanguageFeature::MiscSourceExtensions)) { 427 Say(GetCurrentProvenance(), 428 "Label digit is not in fixed-form label field"_port_en_US); 429 } 430 } 431 } 432 433 // 6.3.3.5: A program unit END statement, or any other statement whose 434 // initial line resembles an END statement, shall not be continued in 435 // fixed form source. 436 void Prescanner::EnforceStupidEndStatementRules(const TokenSequence &tokens) { 437 CharBlock cBlock{tokens.ToCharBlock()}; 438 const char *str{cBlock.begin()}; 439 std::size_t n{cBlock.size()}; 440 if (n < 3) { 441 return; 442 } 443 std::size_t j{0}; 444 for (; j < n && (str[j] == ' ' || (str[j] >= '0' && str[j] <= '9')); ++j) { 445 } 446 if (j + 3 > n || std::memcmp(str + j, "end", 3) != 0) { 447 return; 448 } 449 // It starts with END, possibly after a label. 450 auto start{allSources_.GetSourcePosition(tokens.GetCharProvenance(j))}; 451 auto end{allSources_.GetSourcePosition(tokens.GetCharProvenance(n - 1))}; 452 if (!start || !end) { 453 return; 454 } 455 if (&*start->sourceFile == &*end->sourceFile && start->line == end->line) { 456 return; // no continuation 457 } 458 j += 3; 459 static const char *const prefixes[]{"program", "subroutine", "function", 460 "blockdata", "module", "submodule", nullptr}; 461 bool isPrefix{j == n || !IsLegalInIdentifier(str[j])}; // prefix is END 462 std::size_t endOfPrefix{j - 1}; 463 for (const char *const *p{prefixes}; *p; ++p) { 464 std::size_t pLen{std::strlen(*p)}; 465 if (j + pLen <= n && std::memcmp(str + j, *p, pLen) == 0) { 466 isPrefix = true; // END thing as prefix 467 j += pLen; 468 endOfPrefix = j - 1; 469 for (; j < n && IsLegalInIdentifier(str[j]); ++j) { 470 } 471 break; 472 } 473 } 474 if (isPrefix) { 475 auto range{tokens.GetTokenProvenanceRange(1)}; 476 if (j == n) { // END or END thing [name] 477 Say(range, 478 "Program unit END statement may not be continued in fixed form source"_err_en_US); 479 } else { 480 auto endOfPrefixPos{ 481 allSources_.GetSourcePosition(tokens.GetCharProvenance(endOfPrefix))}; 482 auto next{allSources_.GetSourcePosition(tokens.GetCharProvenance(j))}; 483 if (endOfPrefixPos && next && 484 &*endOfPrefixPos->sourceFile == &*start->sourceFile && 485 endOfPrefixPos->line == start->line && 486 (&*next->sourceFile != &*start->sourceFile || 487 next->line != start->line)) { 488 Say(range, 489 "Initial line of continued statement must not appear to be a program unit END in fixed form source"_err_en_US); 490 } 491 } 492 } 493 } 494 495 void Prescanner::SkipToEndOfLine() { 496 while (*at_ != '\n') { 497 ++at_, ++column_; 498 } 499 } 500 501 bool Prescanner::MustSkipToEndOfLine() const { 502 if (inFixedForm_ && column_ > fixedFormColumnLimit_ && !tabInCurrentLine_) { 503 return true; // skip over ignored columns in right margin (73:80) 504 } else if (*at_ == '!' && !inCharLiteral_) { 505 return !IsCompilerDirectiveSentinel(at_); 506 } else { 507 return false; 508 } 509 } 510 511 void Prescanner::NextChar() { 512 CHECK(*at_ != '\n'); 513 int n{IsSpace(at_)}; 514 at_ += n ? n : 1; 515 ++column_; 516 while (at_[0] == '\xef' && at_[1] == '\xbb' && at_[2] == '\xbf') { 517 // UTF-8 byte order mark - treat this file as UTF-8 518 at_ += 3; 519 encoding_ = Encoding::UTF_8; 520 } 521 SkipToNextSignificantCharacter(); 522 } 523 524 // Skip everything that should be ignored until the next significant 525 // character is reached; handles C-style comments in preprocessing 526 // directives, Fortran ! comments, stuff after the right margin in 527 // fixed form, and all forms of line continuation. 528 bool Prescanner::SkipToNextSignificantCharacter() { 529 auto anyContinuationLine{false}; 530 if (inPreprocessorDirective_) { 531 SkipCComments(); 532 } else { 533 bool mightNeedSpace{false}; 534 if (MustSkipToEndOfLine()) { 535 SkipToEndOfLine(); 536 } else { 537 mightNeedSpace = *at_ == '\n'; 538 } 539 for (; Continuation(mightNeedSpace); mightNeedSpace = false) { 540 anyContinuationLine = true; 541 ++continuationLines_; 542 if (MustSkipToEndOfLine()) { 543 SkipToEndOfLine(); 544 } 545 } 546 if (*at_ == '\t') { 547 tabInCurrentLine_ = true; 548 } 549 } 550 return anyContinuationLine; 551 } 552 553 void Prescanner::SkipCComments() { 554 while (true) { 555 if (IsCComment(at_)) { 556 if (const char *after{SkipCComment(at_)}) { 557 column_ += after - at_; 558 // May have skipped over one or more newlines; relocate the start of 559 // the next line. 560 nextLine_ = at_ = after; 561 NextLine(); 562 } else { 563 // Don't emit any messages about unclosed C-style comments, because 564 // the sequence /* can appear legally in a FORMAT statement. There's 565 // no ambiguity, since the sequence */ cannot appear legally. 566 break; 567 } 568 } else if (inPreprocessorDirective_ && at_[0] == '\\' && at_ + 2 < limit_ && 569 at_[1] == '\n' && !IsAtEnd()) { 570 BeginSourceLineAndAdvance(); 571 } else { 572 break; 573 } 574 } 575 } 576 577 void Prescanner::SkipSpaces() { 578 while (IsSpaceOrTab(at_)) { 579 NextChar(); 580 } 581 insertASpace_ = false; 582 } 583 584 const char *Prescanner::SkipWhiteSpace(const char *p) { 585 while (int n{IsSpaceOrTab(p)}) { 586 p += n; 587 } 588 return p; 589 } 590 591 const char *Prescanner::SkipWhiteSpaceAndCComments(const char *p) const { 592 while (true) { 593 if (int n{IsSpaceOrTab(p)}) { 594 p += n; 595 } else if (IsCComment(p)) { 596 if (const char *after{SkipCComment(p)}) { 597 p = after; 598 } else { 599 break; 600 } 601 } else { 602 break; 603 } 604 } 605 return p; 606 } 607 608 const char *Prescanner::SkipCComment(const char *p) const { 609 char star{' '}, slash{' '}; 610 p += 2; 611 while (star != '*' || slash != '/') { 612 if (p >= limit_) { 613 return nullptr; // signifies an unterminated comment 614 } 615 star = slash; 616 slash = *p++; 617 } 618 return p; 619 } 620 621 bool Prescanner::NextToken(TokenSequence &tokens) { 622 CHECK(at_ >= start_ && at_ < limit_); 623 if (InFixedFormSource()) { 624 SkipSpaces(); 625 } else { 626 if (*at_ == '/' && IsCComment(at_)) { 627 // Recognize and skip over classic C style /*comments*/ when 628 // outside a character literal. 629 if (features_.ShouldWarn(LanguageFeature::ClassicCComments)) { 630 Say(GetCurrentProvenance(), 631 "nonstandard usage: C-style comment"_port_en_US); 632 } 633 SkipCComments(); 634 } 635 if (IsSpaceOrTab(at_)) { 636 // Compress free-form white space into a single space character. 637 const auto theSpace{at_}; 638 char previous{at_ <= start_ ? ' ' : at_[-1]}; 639 NextChar(); 640 SkipSpaces(); 641 if (*at_ == '\n' && !omitNewline_) { 642 // Discard white space at the end of a line. 643 } else if (!inPreprocessorDirective_ && 644 (previous == '(' || *at_ == '(' || *at_ == ')')) { 645 // Discard white space before/after '(' and before ')', unless in a 646 // preprocessor directive. This helps yield space-free contiguous 647 // names for generic interfaces like OPERATOR( + ) and 648 // READ ( UNFORMATTED ), without misinterpreting #define f (notAnArg). 649 // This has the effect of silently ignoring the illegal spaces in 650 // the array constructor ( /1,2/ ) but that seems benign; it's 651 // hard to avoid that while still removing spaces from OPERATOR( / ) 652 // and OPERATOR( // ). 653 } else { 654 // Preserve the squashed white space as a single space character. 655 tokens.PutNextTokenChar(' ', GetProvenance(theSpace)); 656 tokens.CloseToken(); 657 return true; 658 } 659 } 660 } 661 if (insertASpace_) { 662 tokens.PutNextTokenChar(' ', spaceProvenance_); 663 insertASpace_ = false; 664 } 665 if (*at_ == '\n') { 666 return false; 667 } 668 const char *start{at_}; 669 if (*at_ == '\'' || *at_ == '"') { 670 QuotedCharacterLiteral(tokens, start); 671 preventHollerith_ = false; 672 } else if (IsDecimalDigit(*at_)) { 673 int n{0}, digits{0}; 674 static constexpr int maxHollerith{256 /*lines*/ * (132 - 6 /*columns*/)}; 675 do { 676 if (n < maxHollerith) { 677 n = 10 * n + DecimalDigitValue(*at_); 678 } 679 EmitCharAndAdvance(tokens, *at_); 680 ++digits; 681 if (InFixedFormSource()) { 682 SkipSpaces(); 683 } 684 } while (IsDecimalDigit(*at_)); 685 if ((*at_ == 'h' || *at_ == 'H') && n > 0 && n < maxHollerith && 686 !preventHollerith_) { 687 Hollerith(tokens, n, start); 688 } else if (*at_ == '.') { 689 while (IsDecimalDigit(EmitCharAndAdvance(tokens, *at_))) { 690 } 691 ExponentAndKind(tokens); 692 } else if (ExponentAndKind(tokens)) { 693 } else if (digits == 1 && n == 0 && (*at_ == 'x' || *at_ == 'X') && 694 inPreprocessorDirective_) { 695 do { 696 EmitCharAndAdvance(tokens, *at_); 697 } while (IsHexadecimalDigit(*at_)); 698 } else if (at_[0] == '_' && (at_[1] == '\'' || at_[1] == '"')) { // 4_"..." 699 EmitCharAndAdvance(tokens, *at_); 700 QuotedCharacterLiteral(tokens, start); 701 } else if (IsLetter(*at_) && !preventHollerith_ && 702 parenthesisNesting_ > 0) { 703 // Handles FORMAT(3I9HHOLLERITH) by skipping over the first I so that 704 // we don't misrecognize I9HOLLERITH as an identifier in the next case. 705 EmitCharAndAdvance(tokens, *at_); 706 } 707 preventHollerith_ = false; 708 } else if (*at_ == '.') { 709 char nch{EmitCharAndAdvance(tokens, '.')}; 710 if (!inPreprocessorDirective_ && IsDecimalDigit(nch)) { 711 while (IsDecimalDigit(EmitCharAndAdvance(tokens, *at_))) { 712 } 713 ExponentAndKind(tokens); 714 } else if (nch == '.' && EmitCharAndAdvance(tokens, '.') == '.') { 715 EmitCharAndAdvance(tokens, '.'); // variadic macro definition ellipsis 716 } 717 preventHollerith_ = false; 718 } else if (IsLegalInIdentifier(*at_)) { 719 int parts{1}; 720 const char *afterLast{nullptr}; 721 do { 722 EmitChar(tokens, *at_); 723 ++at_, ++column_; 724 afterLast = at_; 725 if (SkipToNextSignificantCharacter() && IsLegalIdentifierStart(*at_)) { 726 tokens.CloseToken(); 727 ++parts; 728 } 729 } while (IsLegalInIdentifier(*at_)); 730 if (parts >= 3) { 731 // Subtlety: When an identifier is split across three or more continuation 732 // lines (or two continuation lines, immediately preceded or followed 733 // by '&' free form continuation line markers, its parts are kept as 734 // distinct pp-tokens so that macro replacement operates on them 735 // independently. This trick accommodates the historic practice of 736 // using line continuation for token pasting after replacement. 737 } else if (parts == 2) { 738 if (afterLast && afterLast < limit_) { 739 afterLast = SkipWhiteSpace(afterLast); 740 } 741 if ((start > start_ && start[-1] == '&') || 742 (afterLast && afterLast < limit_ && 743 (*afterLast == '&' || *afterLast == '\n'))) { 744 // call & call foo& call foo& 745 // &MACRO& OR &MACRO& OR &MACRO 746 // &foo(...) &(...) 747 } else { 748 tokens.ReopenLastToken(); 749 } 750 } 751 if (InFixedFormSource()) { 752 SkipSpaces(); 753 } 754 if ((*at_ == '\'' || *at_ == '"') && 755 tokens.CharAt(tokens.SizeInChars() - 1) == '_') { // kind_"..." 756 QuotedCharacterLiteral(tokens, start); 757 preventHollerith_ = false; 758 } else { 759 preventHollerith_ = true; // DO 10 H = ... 760 } 761 } else if (*at_ == '*') { 762 if (EmitCharAndAdvance(tokens, '*') == '*') { 763 EmitCharAndAdvance(tokens, '*'); 764 } else { 765 // Subtle ambiguity: 766 // CHARACTER*2H declares H because *2 is a kind specifier 767 // DATAC/N*2H / is repeated Hollerith 768 preventHollerith_ = !slashInCurrentStatement_; 769 } 770 } else { 771 char ch{*at_}; 772 if (ch == '(') { 773 if (parenthesisNesting_++ == 0) { 774 isPossibleMacroCall_ = tokens.SizeInTokens() > 0 && 775 preprocessor_.IsFunctionLikeDefinition( 776 tokens.TokenAt(tokens.SizeInTokens() - 1)); 777 } 778 } else if (ch == ')' && parenthesisNesting_ > 0) { 779 --parenthesisNesting_; 780 } 781 char nch{EmitCharAndAdvance(tokens, ch)}; 782 preventHollerith_ = false; 783 if ((nch == '=' && 784 (ch == '<' || ch == '>' || ch == '/' || ch == '=' || ch == '!')) || 785 (ch == nch && 786 (ch == '/' || ch == ':' || ch == '*' || ch == '#' || ch == '&' || 787 ch == '|' || ch == '<' || ch == '>')) || 788 (ch == '=' && nch == '>')) { 789 // token comprises two characters 790 EmitCharAndAdvance(tokens, nch); 791 } else if (ch == '/') { 792 slashInCurrentStatement_ = true; 793 } else if (ch == ';' && InFixedFormSource()) { 794 SkipSpaces(); 795 if (IsDecimalDigit(*at_)) { 796 if (features_.ShouldWarn( 797 common::LanguageFeature::MiscSourceExtensions)) { 798 Say(GetProvenanceRange(at_, at_ + 1), 799 "Label should be in the label field"_port_en_US); 800 } 801 } 802 } 803 } 804 tokens.CloseToken(); 805 return true; 806 } 807 808 bool Prescanner::ExponentAndKind(TokenSequence &tokens) { 809 char ed{ToLowerCaseLetter(*at_)}; 810 if (ed != 'e' && ed != 'd') { 811 return false; 812 } 813 EmitCharAndAdvance(tokens, ed); 814 if (*at_ == '+' || *at_ == '-') { 815 EmitCharAndAdvance(tokens, *at_); 816 } 817 while (IsDecimalDigit(*at_)) { 818 EmitCharAndAdvance(tokens, *at_); 819 } 820 if (*at_ == '_') { 821 while (IsLegalInIdentifier(EmitCharAndAdvance(tokens, *at_))) { 822 } 823 } 824 return true; 825 } 826 827 void Prescanner::QuotedCharacterLiteral( 828 TokenSequence &tokens, const char *start) { 829 char quote{*at_}; 830 const char *end{at_ + 1}; 831 inCharLiteral_ = true; 832 continuationInCharLiteral_ = true; 833 const auto emit{[&](char ch) { EmitChar(tokens, ch); }}; 834 const auto insert{[&](char ch) { EmitInsertedChar(tokens, ch); }}; 835 bool isEscaped{false}; 836 bool escapesEnabled{features_.IsEnabled(LanguageFeature::BackslashEscapes)}; 837 while (true) { 838 if (*at_ == '\\') { 839 if (escapesEnabled) { 840 isEscaped = !isEscaped; 841 } else { 842 // The parser always processes escape sequences, so don't confuse it 843 // when escapes are disabled. 844 insert('\\'); 845 } 846 } else { 847 isEscaped = false; 848 } 849 EmitQuotedChar(static_cast<unsigned char>(*at_), emit, insert, false, 850 Encoding::LATIN_1); 851 while (PadOutCharacterLiteral(tokens)) { 852 } 853 if (*at_ == '\n') { 854 if (!inPreprocessorDirective_) { 855 Say(GetProvenanceRange(start, end), 856 "Incomplete character literal"_err_en_US); 857 } 858 break; 859 } 860 // Here's a weird edge case. When there's a two or more following 861 // continuation lines at this point, and the entire significant part of 862 // the next continuation line is the name of a keyword macro, replace 863 // it in the character literal with its definition. Example: 864 // #define FOO foo 865 // subroutine subr() bind(c, name="my_& 866 // &FOO& 867 // &_bar") ... 868 // produces a binding name of "my_foo_bar". 869 while (at_[1] == '&' && nextLine_ < limit_ && !InFixedFormSource()) { 870 const char *idStart{nextLine_}; 871 if (const char *amper{SkipWhiteSpace(nextLine_)}; *amper == '&') { 872 idStart = amper + 1; 873 } 874 if (IsLegalIdentifierStart(*idStart)) { 875 std::size_t idLen{1}; 876 for (; IsLegalInIdentifier(idStart[idLen]); ++idLen) { 877 } 878 if (idStart[idLen] == '&') { 879 CharBlock id{idStart, idLen}; 880 if (preprocessor_.IsNameDefined(id)) { 881 TokenSequence ppTokens; 882 ppTokens.Put(id, GetProvenance(idStart)); 883 if (auto replaced{ 884 preprocessor_.MacroReplacement(ppTokens, *this)}) { 885 tokens.Put(*replaced); 886 at_ = &idStart[idLen - 1]; 887 NextLine(); 888 continue; // try again on the next line 889 } 890 } 891 } 892 } 893 break; 894 } 895 end = at_ + 1; 896 NextChar(); 897 if (*at_ == quote && !isEscaped) { 898 // A doubled unescaped quote mark becomes a single instance of that 899 // quote character in the literal (later). There can be spaces between 900 // the quotes in fixed form source. 901 EmitChar(tokens, quote); 902 inCharLiteral_ = false; // for cases like print *, '...'!comment 903 NextChar(); 904 if (InFixedFormSource()) { 905 SkipSpaces(); 906 } 907 if (*at_ != quote) { 908 break; 909 } 910 inCharLiteral_ = true; 911 } 912 } 913 continuationInCharLiteral_ = false; 914 inCharLiteral_ = false; 915 } 916 917 void Prescanner::Hollerith( 918 TokenSequence &tokens, int count, const char *start) { 919 inCharLiteral_ = true; 920 CHECK(*at_ == 'h' || *at_ == 'H'); 921 EmitChar(tokens, 'H'); 922 while (count-- > 0) { 923 if (PadOutCharacterLiteral(tokens)) { 924 } else if (*at_ == '\n') { 925 if (features_.ShouldWarn(common::UsageWarning::Scanning)) { 926 Say(GetProvenanceRange(start, at_), 927 "Possible truncated Hollerith literal"_warn_en_US); 928 } 929 break; 930 } else { 931 NextChar(); 932 // Each multi-byte character encoding counts as a single character. 933 // No escape sequences are recognized. 934 // Hollerith is always emitted to the cooked character 935 // stream in UTF-8. 936 DecodedCharacter decoded{DecodeCharacter( 937 encoding_, at_, static_cast<std::size_t>(limit_ - at_), false)}; 938 if (decoded.bytes > 0) { 939 EncodedCharacter utf8{ 940 EncodeCharacter<Encoding::UTF_8>(decoded.codepoint)}; 941 for (int j{0}; j < utf8.bytes; ++j) { 942 EmitChar(tokens, utf8.buffer[j]); 943 } 944 at_ += decoded.bytes - 1; 945 } else { 946 Say(GetProvenanceRange(start, at_), 947 "Bad character in Hollerith literal"_err_en_US); 948 break; 949 } 950 } 951 } 952 if (*at_ != '\n') { 953 NextChar(); 954 } 955 inCharLiteral_ = false; 956 } 957 958 // In fixed form, source card images must be processed as if they were at 959 // least 72 columns wide, at least in character literal contexts. 960 bool Prescanner::PadOutCharacterLiteral(TokenSequence &tokens) { 961 while (inFixedForm_ && !tabInCurrentLine_ && at_[1] == '\n') { 962 if (column_ < fixedFormColumnLimit_) { 963 tokens.PutNextTokenChar(' ', spaceProvenance_); 964 ++column_; 965 return true; 966 } 967 if (!FixedFormContinuation(false /*no need to insert space*/) || 968 tabInCurrentLine_) { 969 return false; 970 } 971 CHECK(column_ == 7); 972 --at_; // point to column 6 of continuation line 973 column_ = 6; 974 } 975 return false; 976 } 977 978 static bool IsAtProcess(const char *p) { 979 static const char pAtProc[]{"process"}; 980 for (std::size_t i{0}; i < sizeof pAtProc - 1; ++i) { 981 if (ToLowerCaseLetter(*++p) != pAtProc[i]) 982 return false; 983 } 984 return true; 985 } 986 987 bool Prescanner::IsFixedFormCommentLine(const char *start) const { 988 const char *p{start}; 989 990 // The @process directive must start in column 1. 991 if (*p == '@' && IsAtProcess(p)) { 992 return true; 993 } 994 995 if (IsFixedFormCommentChar(*p) || *p == '%' || // VAX %list, %eject, &c. 996 ((*p == 'D' || *p == 'd') && 997 !features_.IsEnabled(LanguageFeature::OldDebugLines))) { 998 return true; 999 } 1000 bool anyTabs{false}; 1001 while (true) { 1002 if (int n{IsSpace(p)}) { 1003 p += n; 1004 } else if (*p == '\t') { 1005 anyTabs = true; 1006 ++p; 1007 } else if (*p == '0' && !anyTabs && p == start + 5) { 1008 ++p; // 0 in column 6 must treated as a space 1009 } else { 1010 break; 1011 } 1012 } 1013 if (!anyTabs && p >= start + fixedFormColumnLimit_) { 1014 return true; 1015 } 1016 if (*p == '!' && !inCharLiteral_ && (anyTabs || p != start + 5)) { 1017 return true; 1018 } 1019 return *p == '\n'; 1020 } 1021 1022 const char *Prescanner::IsFreeFormComment(const char *p) const { 1023 p = SkipWhiteSpaceAndCComments(p); 1024 if (*p == '!' || *p == '\n') { 1025 return p; 1026 } else if (*p == '@') { 1027 return IsAtProcess(p) ? p : nullptr; 1028 } else { 1029 return nullptr; 1030 } 1031 } 1032 1033 std::optional<std::size_t> Prescanner::IsIncludeLine(const char *start) const { 1034 const char *p{SkipWhiteSpace(start)}; 1035 if (*p == '0' && inFixedForm_ && p == start + 5) { 1036 // Accept " 0INCLUDE" in fixed form. 1037 p = SkipWhiteSpace(p + 1); 1038 } 1039 for (const char *q{"include"}; *q; ++q) { 1040 if (ToLowerCaseLetter(*p) != *q) { 1041 return std::nullopt; 1042 } 1043 p = SkipWhiteSpace(p + 1); 1044 } 1045 if (IsDecimalDigit(*p)) { // accept & ignore a numeric kind prefix 1046 for (p = SkipWhiteSpace(p + 1); IsDecimalDigit(*p); 1047 p = SkipWhiteSpace(p + 1)) { 1048 } 1049 if (*p != '_') { 1050 return std::nullopt; 1051 } 1052 p = SkipWhiteSpace(p + 1); 1053 } 1054 if (*p == '"' || *p == '\'') { 1055 return {p - start}; 1056 } 1057 return std::nullopt; 1058 } 1059 1060 void Prescanner::FortranInclude(const char *firstQuote) { 1061 const char *p{firstQuote}; 1062 while (*p != '"' && *p != '\'') { 1063 ++p; 1064 } 1065 char quote{*p}; 1066 std::string path; 1067 for (++p; *p != '\n'; ++p) { 1068 if (*p == quote) { 1069 if (p[1] != quote) { 1070 break; 1071 } 1072 ++p; 1073 } 1074 path += *p; 1075 } 1076 if (*p != quote) { 1077 Say(GetProvenanceRange(firstQuote, p), 1078 "malformed path name string"_err_en_US); 1079 return; 1080 } 1081 p = SkipWhiteSpace(p + 1); 1082 if (*p != '\n' && *p != '!') { 1083 const char *garbage{p}; 1084 for (; *p != '\n' && *p != '!'; ++p) { 1085 } 1086 if (features_.ShouldWarn(common::UsageWarning::Scanning)) { 1087 Say(GetProvenanceRange(garbage, p), 1088 "excess characters after path name"_warn_en_US); 1089 } 1090 } 1091 std::string buf; 1092 llvm::raw_string_ostream error{buf}; 1093 Provenance provenance{GetProvenance(nextLine_)}; 1094 std::optional<std::string> prependPath; 1095 if (const SourceFile * currentFile{allSources_.GetSourceFile(provenance)}) { 1096 prependPath = DirectoryName(currentFile->path()); 1097 } 1098 const SourceFile *included{ 1099 allSources_.Open(path, error, std::move(prependPath))}; 1100 if (!included) { 1101 Say(provenance, "INCLUDE: %s"_err_en_US, buf); 1102 } else if (included->bytes() > 0) { 1103 ProvenanceRange includeLineRange{ 1104 provenance, static_cast<std::size_t>(p - nextLine_)}; 1105 ProvenanceRange fileRange{ 1106 allSources_.AddIncludedFile(*included, includeLineRange)}; 1107 Preprocessor cleanPrepro{allSources_}; 1108 if (preprocessor_.IsNameDefined("__FILE__"s)) { 1109 cleanPrepro.DefineStandardMacros(); // __FILE__, __LINE__, &c. 1110 } 1111 if (preprocessor_.IsNameDefined("_CUDA"s)) { 1112 cleanPrepro.Define("_CUDA"s, "1"); 1113 } 1114 Prescanner{*this, cleanPrepro, /*isNestedInIncludeDirective=*/false} 1115 .set_encoding(included->encoding()) 1116 .Prescan(fileRange); 1117 } 1118 } 1119 1120 const char *Prescanner::IsPreprocessorDirectiveLine(const char *start) const { 1121 const char *p{start}; 1122 while (int n{IsSpace(p)}) { 1123 p += n; 1124 } 1125 if (*p == '#') { 1126 if (inFixedForm_ && p == start + 5) { 1127 return nullptr; 1128 } 1129 } else { 1130 p = SkipWhiteSpace(p); 1131 if (*p != '#') { 1132 return nullptr; 1133 } 1134 } 1135 return SkipWhiteSpace(p + 1); 1136 } 1137 1138 bool Prescanner::IsNextLinePreprocessorDirective() const { 1139 return IsPreprocessorDirectiveLine(nextLine_) != nullptr; 1140 } 1141 1142 bool Prescanner::SkipCommentLine(bool afterAmpersand) { 1143 if (IsAtEnd()) { 1144 if (afterAmpersand && prescannerNesting_ > 0) { 1145 // A continuation marker at the end of the last line in an 1146 // include file inhibits the newline for that line. 1147 SkipToEndOfLine(); 1148 omitNewline_ = true; 1149 } 1150 } else if (inPreprocessorDirective_) { 1151 } else { 1152 auto lineClass{ClassifyLine(nextLine_)}; 1153 if (lineClass.kind == LineClassification::Kind::Comment) { 1154 NextLine(); 1155 return true; 1156 } else if (lineClass.kind == 1157 LineClassification::Kind::ConditionalCompilationDirective || 1158 lineClass.kind == LineClassification::Kind::PreprocessorDirective) { 1159 // Allow conditional compilation directives (e.g., #ifdef) to affect 1160 // continuation lines. 1161 // Allow other preprocessor directives, too, except #include 1162 // (when it does not follow '&'), #define, and #undef (because 1163 // they cannot be allowed to affect preceding text on a 1164 // continued line). 1165 preprocessor_.Directive(TokenizePreprocessorDirective(), *this); 1166 return true; 1167 } else if (afterAmpersand && 1168 (lineClass.kind == LineClassification::Kind::DefinitionDirective || 1169 lineClass.kind == LineClassification::Kind::IncludeDirective || 1170 lineClass.kind == LineClassification::Kind::IncludeLine)) { 1171 SkipToEndOfLine(); 1172 omitNewline_ = true; 1173 skipLeadingAmpersand_ = true; 1174 } 1175 } 1176 return false; 1177 } 1178 1179 const char *Prescanner::FixedFormContinuationLine(bool mightNeedSpace) { 1180 if (IsAtEnd()) { 1181 return nullptr; 1182 } 1183 tabInCurrentLine_ = false; 1184 char col1{*nextLine_}; 1185 if (IsFixedFormCommentChar(col1)) { 1186 int j{1}; 1187 if (InCompilerDirective()) { 1188 // Must be a continued compiler directive. 1189 for (; j < 5; ++j) { 1190 char ch{directiveSentinel_[j - 1]}; 1191 if (ch == '\0') { 1192 break; 1193 } 1194 if (ch != ToLowerCaseLetter(nextLine_[j])) { 1195 return nullptr; 1196 } 1197 } 1198 } else if (features_.IsEnabled(LanguageFeature::OpenMP)) { 1199 // Fixed Source Form Conditional Compilation Sentinels. 1200 if (nextLine_[1] != '$') { 1201 return nullptr; 1202 } 1203 j++; 1204 } else { 1205 return nullptr; 1206 } 1207 for (; j < 5; ++j) { 1208 if (nextLine_[j] != ' ') { 1209 return nullptr; 1210 } 1211 } 1212 const char *col6{nextLine_ + 5}; 1213 if (*col6 != '\n' && *col6 != '0' && !IsSpaceOrTab(col6)) { 1214 if (mightNeedSpace && !IsSpace(nextLine_ + 6)) { 1215 insertASpace_ = true; 1216 } 1217 return nextLine_ + 6; 1218 } 1219 return nullptr; 1220 } else { 1221 // Normal case: not in a compiler directive. 1222 if (col1 == '&' && 1223 features_.IsEnabled( 1224 LanguageFeature::FixedFormContinuationWithColumn1Ampersand)) { 1225 // Extension: '&' as continuation marker 1226 if (features_.ShouldWarn( 1227 LanguageFeature::FixedFormContinuationWithColumn1Ampersand)) { 1228 Say(GetProvenance(nextLine_), "nonstandard usage"_port_en_US); 1229 } 1230 return nextLine_ + 1; 1231 } 1232 if (col1 == '\t' && nextLine_[1] >= '1' && nextLine_[1] <= '9') { 1233 tabInCurrentLine_ = true; 1234 return nextLine_ + 2; // VAX extension 1235 } 1236 if ((col1 == ' ' || 1237 ((col1 == 'D' || col1 == 'd') && 1238 features_.IsEnabled(LanguageFeature::OldDebugLines))) && 1239 nextLine_[1] == ' ' && nextLine_[2] == ' ' && nextLine_[3] == ' ' && 1240 nextLine_[4] == ' ') { 1241 const char *col6{nextLine_ + 5}; 1242 if (*col6 != '\n' && *col6 != '0' && !IsSpaceOrTab(col6)) { 1243 if ((*col6 == 'i' || *col6 == 'I') && IsIncludeLine(nextLine_)) { 1244 // It's An INCLUDE line, not a continuation 1245 } else { 1246 return nextLine_ + 6; 1247 } 1248 } 1249 } 1250 if (IsImplicitContinuation()) { 1251 return nextLine_; 1252 } 1253 } 1254 return nullptr; // not a continuation line 1255 } 1256 1257 const char *Prescanner::FreeFormContinuationLine(bool ampersand) { 1258 const char *p{nextLine_}; 1259 if (p >= limit_) { 1260 return nullptr; 1261 } 1262 p = SkipWhiteSpace(p); 1263 if (InCompilerDirective()) { 1264 if (*p++ != '!') { 1265 return nullptr; 1266 } 1267 for (const char *s{directiveSentinel_}; *s != '\0'; ++p, ++s) { 1268 if (*s != ToLowerCaseLetter(*p)) { 1269 return nullptr; 1270 } 1271 } 1272 p = SkipWhiteSpace(p); 1273 if (*p == '&') { 1274 if (!ampersand) { 1275 insertASpace_ = true; 1276 } 1277 return p + 1; 1278 } else if (ampersand) { 1279 return p; 1280 } else { 1281 return nullptr; 1282 } 1283 } else { 1284 if (*p == '&') { 1285 return p + 1; 1286 } else if (*p == '!' || *p == '\n' || *p == '#') { 1287 return nullptr; 1288 } else if (ampersand || IsImplicitContinuation()) { 1289 if (continuationInCharLiteral_) { 1290 // 'a'& -> 'a''b' == "a'b" 1291 // 'b' 1292 if (features_.ShouldWarn( 1293 common::LanguageFeature::MiscSourceExtensions)) { 1294 Say(GetProvenanceRange(p, p + 1), 1295 "Character literal continuation line should have been preceded by '&'"_port_en_US); 1296 } 1297 } else if (p > nextLine_) { 1298 --p; 1299 } else { 1300 insertASpace_ = true; 1301 } 1302 return p; 1303 } else { 1304 return nullptr; 1305 } 1306 } 1307 } 1308 1309 bool Prescanner::FixedFormContinuation(bool mightNeedSpace) { 1310 // N.B. We accept '&' as a continuation indicator in fixed form, too, 1311 // but not in a character literal. 1312 if (*at_ == '&' && inCharLiteral_) { 1313 return false; 1314 } 1315 do { 1316 if (const char *cont{FixedFormContinuationLine(mightNeedSpace)}) { 1317 BeginSourceLine(cont); 1318 column_ = 7; 1319 NextLine(); 1320 return true; 1321 } 1322 } while (SkipCommentLine(false /* not after ampersand */)); 1323 return false; 1324 } 1325 1326 bool Prescanner::FreeFormContinuation() { 1327 const char *p{at_}; 1328 bool ampersand{*p == '&'}; 1329 if (ampersand) { 1330 p = SkipWhiteSpace(p + 1); 1331 } 1332 if (*p != '\n') { 1333 if (inCharLiteral_) { 1334 return false; 1335 } else if (*p == '!') { // & ! comment - ok 1336 } else if (ampersand && isPossibleMacroCall_ && (*p == ',' || *p == ')')) { 1337 return false; // allow & at end of a macro argument 1338 } else if (features_.ShouldWarn(LanguageFeature::CruftAfterAmpersand)) { 1339 Say(GetProvenance(p), "missing ! before comment after &"_warn_en_US); 1340 } 1341 } 1342 do { 1343 if (const char *cont{FreeFormContinuationLine(ampersand)}) { 1344 BeginSourceLine(cont); 1345 NextLine(); 1346 return true; 1347 } 1348 } while (SkipCommentLine(ampersand)); 1349 return false; 1350 } 1351 1352 // Implicit line continuation allows a preprocessor macro call with 1353 // arguments to span multiple lines. 1354 bool Prescanner::IsImplicitContinuation() const { 1355 return !inPreprocessorDirective_ && !inCharLiteral_ && isPossibleMacroCall_ && 1356 parenthesisNesting_ > 0 && !IsAtEnd() && 1357 ClassifyLine(nextLine_).kind == LineClassification::Kind::Source; 1358 } 1359 1360 bool Prescanner::Continuation(bool mightNeedFixedFormSpace) { 1361 if (disableSourceContinuation_) { 1362 return false; 1363 } else if (*at_ == '\n' || *at_ == '&') { 1364 if (inFixedForm_) { 1365 return FixedFormContinuation(mightNeedFixedFormSpace); 1366 } else { 1367 return FreeFormContinuation(); 1368 } 1369 } else if (*at_ == '\\' && at_ + 2 == nextLine_ && 1370 backslashFreeFormContinuation_ && !inFixedForm_ && nextLine_ < limit_) { 1371 // cpp-like handling of \ at end of a free form source line 1372 BeginSourceLine(nextLine_); 1373 NextLine(); 1374 return true; 1375 } else { 1376 return false; 1377 } 1378 } 1379 1380 std::optional<Prescanner::LineClassification> 1381 Prescanner::IsFixedFormCompilerDirectiveLine(const char *start) const { 1382 const char *p{start}; 1383 char col1{*p++}; 1384 if (!IsFixedFormCommentChar(col1)) { 1385 return std::nullopt; 1386 } 1387 char sentinel[5], *sp{sentinel}; 1388 int column{2}; 1389 for (; column < 6; ++column, ++p) { 1390 if (*p == '\n' || IsSpaceOrTab(p)) { 1391 break; 1392 } 1393 if (sp == sentinel + 1 && sentinel[0] == '$' && IsDecimalDigit(*p)) { 1394 // OpenMP conditional compilation line: leave the label alone 1395 break; 1396 } 1397 *sp++ = ToLowerCaseLetter(*p); 1398 } 1399 if (column == 6) { 1400 if (*p == '0') { 1401 ++p; 1402 } else if (int n{IsSpaceOrTab(p)}) { 1403 p += n; 1404 } else { 1405 // This is a Continuation line, not an initial directive line. 1406 return std::nullopt; 1407 } 1408 } 1409 if (sp == sentinel) { 1410 return std::nullopt; 1411 } 1412 *sp = '\0'; 1413 if (const char *ss{IsCompilerDirectiveSentinel( 1414 sentinel, static_cast<std::size_t>(sp - sentinel))}) { 1415 std::size_t payloadOffset = p - start; 1416 return {LineClassification{ 1417 LineClassification::Kind::CompilerDirective, payloadOffset, ss}}; 1418 } 1419 return std::nullopt; 1420 } 1421 1422 std::optional<Prescanner::LineClassification> 1423 Prescanner::IsFreeFormCompilerDirectiveLine(const char *start) const { 1424 if (const char *p{SkipWhiteSpace(start)}; p && *p++ == '!') { 1425 if (auto maybePair{IsCompilerDirectiveSentinel(p)}) { 1426 auto offset{static_cast<std::size_t>(maybePair->second - start)}; 1427 return {LineClassification{LineClassification::Kind::CompilerDirective, 1428 offset, maybePair->first}}; 1429 } 1430 } 1431 return std::nullopt; 1432 } 1433 1434 Prescanner &Prescanner::AddCompilerDirectiveSentinel(const std::string &dir) { 1435 std::uint64_t packed{0}; 1436 for (char ch : dir) { 1437 packed = (packed << 8) | (ToLowerCaseLetter(ch) & 0xff); 1438 } 1439 compilerDirectiveBloomFilter_.set(packed % prime1); 1440 compilerDirectiveBloomFilter_.set(packed % prime2); 1441 compilerDirectiveSentinels_.insert(dir); 1442 return *this; 1443 } 1444 1445 const char *Prescanner::IsCompilerDirectiveSentinel( 1446 const char *sentinel, std::size_t len) const { 1447 std::uint64_t packed{0}; 1448 for (std::size_t j{0}; j < len; ++j) { 1449 packed = (packed << 8) | (sentinel[j] & 0xff); 1450 } 1451 if (len == 0 || !compilerDirectiveBloomFilter_.test(packed % prime1) || 1452 !compilerDirectiveBloomFilter_.test(packed % prime2)) { 1453 return nullptr; 1454 } 1455 const auto iter{compilerDirectiveSentinels_.find(std::string(sentinel, len))}; 1456 return iter == compilerDirectiveSentinels_.end() ? nullptr : iter->c_str(); 1457 } 1458 1459 const char *Prescanner::IsCompilerDirectiveSentinel(CharBlock token) const { 1460 const char *p{token.begin()}; 1461 const char *end{p + token.size()}; 1462 while (p < end && (*p == ' ' || *p == '\n')) { 1463 ++p; 1464 } 1465 if (p < end && *p == '!') { 1466 ++p; 1467 } 1468 while (end > p && (end[-1] == ' ' || end[-1] == '\t')) { 1469 --end; 1470 } 1471 return end > p && IsCompilerDirectiveSentinel(p, end - p) ? p : nullptr; 1472 } 1473 1474 std::optional<std::pair<const char *, const char *>> 1475 Prescanner::IsCompilerDirectiveSentinel(const char *p) const { 1476 char sentinel[8]; 1477 for (std::size_t j{0}; j + 1 < sizeof sentinel && *p != '\n'; ++p, ++j) { 1478 if (int n{*p == '&' ? 1 : IsSpaceOrTab(p)}) { 1479 if (j > 0) { 1480 sentinel[j] = '\0'; 1481 p = SkipWhiteSpace(p + n); 1482 if (*p != '!') { 1483 if (const char *sp{IsCompilerDirectiveSentinel(sentinel, j)}) { 1484 return std::make_pair(sp, p); 1485 } 1486 } 1487 } 1488 break; 1489 } else { 1490 sentinel[j] = ToLowerCaseLetter(*p); 1491 } 1492 } 1493 return std::nullopt; 1494 } 1495 1496 constexpr bool IsDirective(const char *match, const char *dir) { 1497 for (; *match; ++match) { 1498 if (*match != ToLowerCaseLetter(*dir++)) { 1499 return false; 1500 } 1501 } 1502 return true; 1503 } 1504 1505 Prescanner::LineClassification Prescanner::ClassifyLine( 1506 const char *start) const { 1507 if (inFixedForm_) { 1508 if (std::optional<LineClassification> lc{ 1509 IsFixedFormCompilerDirectiveLine(start)}) { 1510 return std::move(*lc); 1511 } 1512 if (IsFixedFormCommentLine(start)) { 1513 return {LineClassification::Kind::Comment}; 1514 } 1515 } else { 1516 if (std::optional<LineClassification> lc{ 1517 IsFreeFormCompilerDirectiveLine(start)}) { 1518 return std::move(*lc); 1519 } 1520 if (const char *bang{IsFreeFormComment(start)}) { 1521 return {LineClassification::Kind::Comment, 1522 static_cast<std::size_t>(bang - start)}; 1523 } 1524 } 1525 if (std::optional<std::size_t> quoteOffset{IsIncludeLine(start)}) { 1526 return {LineClassification::Kind::IncludeLine, *quoteOffset}; 1527 } 1528 if (const char *dir{IsPreprocessorDirectiveLine(start)}) { 1529 if (IsDirective("if", dir) || IsDirective("elif", dir) || 1530 IsDirective("else", dir) || IsDirective("endif", dir)) { 1531 return {LineClassification::Kind::ConditionalCompilationDirective}; 1532 } else if (IsDirective("include", dir)) { 1533 return {LineClassification::Kind::IncludeDirective}; 1534 } else if (IsDirective("define", dir) || IsDirective("undef", dir)) { 1535 return {LineClassification::Kind::DefinitionDirective}; 1536 } else { 1537 return {LineClassification::Kind::PreprocessorDirective}; 1538 } 1539 } 1540 return {LineClassification::Kind::Source}; 1541 } 1542 1543 Prescanner::LineClassification Prescanner::ClassifyLine( 1544 TokenSequence &tokens, Provenance newlineProvenance) const { 1545 // Append a newline temporarily. 1546 tokens.PutNextTokenChar('\n', newlineProvenance); 1547 tokens.CloseToken(); 1548 const char *ppd{tokens.ToCharBlock().begin()}; 1549 LineClassification classification{ClassifyLine(ppd)}; 1550 tokens.pop_back(); // remove the newline 1551 return classification; 1552 } 1553 1554 void Prescanner::SourceFormChange(std::string &&dir) { 1555 if (dir == "!dir$ free") { 1556 inFixedForm_ = false; 1557 } else if (dir == "!dir$ fixed") { 1558 inFixedForm_ = true; 1559 } 1560 } 1561 1562 // Acquire and append compiler directive continuation lines to 1563 // the tokens that constitute a compiler directive, even when those 1564 // directive continuation lines are the result of macro expansion. 1565 // (Not used when neither the original compiler directive line nor 1566 // the directive continuation line result from preprocessing; regular 1567 // line continuation during tokenization handles that normal case.) 1568 bool Prescanner::CompilerDirectiveContinuation( 1569 TokenSequence &tokens, const char *origSentinel) { 1570 if (inFixedForm_ || tokens.empty() || 1571 tokens.TokenAt(tokens.SizeInTokens() - 1) != "&") { 1572 return false; 1573 } 1574 LineClassification followingLine{ClassifyLine(nextLine_)}; 1575 if (followingLine.kind == LineClassification::Kind::Comment) { 1576 nextLine_ += followingLine.payloadOffset; // advance to '!' or newline 1577 NextLine(); 1578 return true; 1579 } 1580 CHECK(origSentinel != nullptr); 1581 directiveSentinel_ = origSentinel; // so InCompilerDirective() is true 1582 const char *nextContinuation{ 1583 followingLine.kind == LineClassification::Kind::CompilerDirective 1584 ? FreeFormContinuationLine(true) 1585 : nullptr}; 1586 if (!nextContinuation && 1587 followingLine.kind != LineClassification::Kind::Source) { 1588 return false; 1589 } 1590 auto origNextLine{nextLine_}; 1591 BeginSourceLine(nextLine_); 1592 NextLine(); 1593 if (nextContinuation) { 1594 // What follows is !DIR$ & xxx; skip over the & so that it 1595 // doesn't cause a spurious continuation. 1596 at_ = nextContinuation; 1597 } else { 1598 // What follows looks like a source line before macro expansion, 1599 // but might become a directive continuation afterwards. 1600 SkipSpaces(); 1601 } 1602 TokenSequence followingTokens; 1603 while (NextToken(followingTokens)) { 1604 } 1605 if (auto followingPrepro{ 1606 preprocessor_.MacroReplacement(followingTokens, *this)}) { 1607 followingTokens = std::move(*followingPrepro); 1608 } 1609 followingTokens.RemoveRedundantBlanks(); 1610 std::size_t startAt{0}; 1611 std::size_t following{followingTokens.SizeInTokens()}; 1612 bool ok{false}; 1613 if (nextContinuation) { 1614 ok = true; 1615 } else { 1616 startAt = 2; 1617 if (startAt < following && followingTokens.TokenAt(0) == "!") { 1618 CharBlock sentinel{followingTokens.TokenAt(1)}; 1619 if (!sentinel.empty() && 1620 std::memcmp(sentinel.begin(), origSentinel, sentinel.size()) == 0) { 1621 ok = true; 1622 while ( 1623 startAt < following && followingTokens.TokenAt(startAt).IsBlank()) { 1624 ++startAt; 1625 } 1626 if (startAt < following && followingTokens.TokenAt(startAt) == "&") { 1627 ++startAt; 1628 } 1629 } 1630 } 1631 } 1632 if (ok) { 1633 tokens.pop_back(); // delete original '&' 1634 tokens.Put(followingTokens, startAt, following - startAt); 1635 tokens.RemoveRedundantBlanks(); 1636 } else { 1637 nextLine_ = origNextLine; 1638 } 1639 return ok; 1640 } 1641 1642 // Similar, but for source line continuation after macro replacement. 1643 bool Prescanner::SourceLineContinuation(TokenSequence &tokens) { 1644 if (!inFixedForm_ && !tokens.empty() && 1645 tokens.TokenAt(tokens.SizeInTokens() - 1) == "&") { 1646 LineClassification followingLine{ClassifyLine(nextLine_)}; 1647 if (followingLine.kind == LineClassification::Kind::Comment) { 1648 nextLine_ += followingLine.payloadOffset; // advance to '!' or newline 1649 NextLine(); 1650 return true; 1651 } else if (const char *nextContinuation{FreeFormContinuationLine(true)}) { 1652 BeginSourceLine(nextLine_); 1653 NextLine(); 1654 TokenSequence followingTokens; 1655 at_ = nextContinuation; 1656 while (NextToken(followingTokens)) { 1657 } 1658 if (auto followingPrepro{ 1659 preprocessor_.MacroReplacement(followingTokens, *this)}) { 1660 followingTokens = std::move(*followingPrepro); 1661 } 1662 followingTokens.RemoveRedundantBlanks(); 1663 tokens.pop_back(); // delete original '&' 1664 tokens.Put(followingTokens); 1665 return true; 1666 } 1667 } 1668 return false; 1669 } 1670 } // namespace Fortran::parser 1671