1 //===-- lib/Parser/prescan.cpp --------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "prescan.h" 10 #include "flang/Common/idioms.h" 11 #include "flang/Parser/characters.h" 12 #include "flang/Parser/message.h" 13 #include "flang/Parser/preprocessor.h" 14 #include "flang/Parser/source.h" 15 #include "flang/Parser/token-sequence.h" 16 #include "llvm/Support/raw_ostream.h" 17 #include <cstddef> 18 #include <cstring> 19 #include <utility> 20 #include <vector> 21 22 namespace Fortran::parser { 23 24 using common::LanguageFeature; 25 26 static constexpr int maxPrescannerNesting{100}; 27 28 Prescanner::Prescanner(Messages &messages, CookedSource &cooked, 29 Preprocessor &preprocessor, common::LanguageFeatureControl lfc) 30 : messages_{messages}, cooked_{cooked}, preprocessor_{preprocessor}, 31 allSources_{preprocessor_.allSources()}, features_{lfc}, 32 backslashFreeFormContinuation_{preprocessor.AnyDefinitions()}, 33 encoding_{allSources_.encoding()} {} 34 35 Prescanner::Prescanner(const Prescanner &that, bool isNestedInIncludeDirective) 36 : messages_{that.messages_}, cooked_{that.cooked_}, 37 preprocessor_{that.preprocessor_}, allSources_{that.allSources_}, 38 features_{that.features_}, 39 isNestedInIncludeDirective_{isNestedInIncludeDirective}, 40 backslashFreeFormContinuation_{that.backslashFreeFormContinuation_}, 41 inFixedForm_{that.inFixedForm_}, 42 fixedFormColumnLimit_{that.fixedFormColumnLimit_}, 43 encoding_{that.encoding_}, 44 prescannerNesting_{that.prescannerNesting_ + 1}, 45 skipLeadingAmpersand_{that.skipLeadingAmpersand_}, 46 compilerDirectiveBloomFilter_{that.compilerDirectiveBloomFilter_}, 47 compilerDirectiveSentinels_{that.compilerDirectiveSentinels_} {} 48 49 static inline constexpr bool IsFixedFormCommentChar(char ch) { 50 return ch == '!' || ch == '*' || ch == 'C' || ch == 'c'; 51 } 52 53 static void NormalizeCompilerDirectiveCommentMarker(TokenSequence &dir) { 54 char *p{dir.GetMutableCharData()}; 55 char *limit{p + dir.SizeInChars()}; 56 for (; p < limit; ++p) { 57 if (*p != ' ') { 58 CHECK(IsFixedFormCommentChar(*p)); 59 *p = '!'; 60 return; 61 } 62 } 63 DIE("compiler directive all blank"); 64 } 65 66 void Prescanner::Prescan(ProvenanceRange range) { 67 startProvenance_ = range.start(); 68 start_ = allSources_.GetSource(range); 69 CHECK(start_); 70 limit_ = start_ + range.size(); 71 nextLine_ = start_; 72 const bool beganInFixedForm{inFixedForm_}; 73 if (prescannerNesting_ > maxPrescannerNesting) { 74 Say(GetProvenance(start_), 75 "too many nested INCLUDE/#include files, possibly circular"_err_en_US); 76 return; 77 } 78 while (!IsAtEnd()) { 79 Statement(); 80 } 81 if (inFixedForm_ != beganInFixedForm) { 82 std::string dir{"!dir$ "}; 83 if (beganInFixedForm) { 84 dir += "fixed"; 85 } else { 86 dir += "free"; 87 } 88 dir += '\n'; 89 TokenSequence tokens{dir, allSources_.AddCompilerInsertion(dir).start()}; 90 tokens.Emit(cooked_); 91 } 92 } 93 94 void Prescanner::Statement() { 95 TokenSequence tokens; 96 const char *statementStart{nextLine_}; 97 LineClassification line{ClassifyLine(statementStart)}; 98 switch (line.kind) { 99 case LineClassification::Kind::Comment: 100 nextLine_ += line.payloadOffset; // advance to '!' or newline 101 NextLine(); 102 return; 103 case LineClassification::Kind::IncludeLine: 104 FortranInclude(nextLine_ + line.payloadOffset); 105 NextLine(); 106 return; 107 case LineClassification::Kind::ConditionalCompilationDirective: 108 case LineClassification::Kind::IncludeDirective: 109 preprocessor_.Directive(TokenizePreprocessorDirective(), *this); 110 afterPreprocessingDirective_ = true; 111 skipLeadingAmpersand_ |= !inFixedForm_; 112 return; 113 case LineClassification::Kind::PreprocessorDirective: 114 preprocessor_.Directive(TokenizePreprocessorDirective(), *this); 115 afterPreprocessingDirective_ = true; 116 // Don't set skipLeadingAmpersand_ 117 return; 118 case LineClassification::Kind::DefinitionDirective: 119 preprocessor_.Directive(TokenizePreprocessorDirective(), *this); 120 // Don't set afterPreprocessingDirective_ or skipLeadingAmpersand_ 121 return; 122 case LineClassification::Kind::CompilerDirective: { 123 directiveSentinel_ = line.sentinel; 124 CHECK(InCompilerDirective()); 125 BeginStatementAndAdvance(); 126 if (inFixedForm_) { 127 CHECK(IsFixedFormCommentChar(*at_)); 128 } else { 129 while (*at_ == ' ' || *at_ == '\t') { 130 ++at_, ++column_; 131 } 132 CHECK(*at_ == '!'); 133 } 134 std::optional<int> condOffset; 135 if (directiveSentinel_[0] == '$' && directiveSentinel_[1] == '\0') { 136 // OpenMP conditional compilation line. 137 condOffset = 2; 138 } else if (directiveSentinel_[0] == '@' && directiveSentinel_[1] == 'c' && 139 directiveSentinel_[2] == 'u' && directiveSentinel_[3] == 'f' && 140 directiveSentinel_[4] == '\0') { 141 // CUDA conditional compilation line. 142 condOffset = 5; 143 } 144 if (condOffset) { 145 at_ += *condOffset, column_ += *condOffset; 146 if (auto payload{IsIncludeLine(at_)}) { 147 FortranInclude(at_ + *payload); 148 return; 149 } else if (inFixedForm_) { 150 LabelField(tokens); 151 } else { 152 SkipSpaces(); 153 } 154 } else { 155 // Compiler directive. Emit normalized sentinel, squash following spaces. 156 EmitChar(tokens, '!'); 157 ++at_, ++column_; 158 for (const char *sp{directiveSentinel_}; *sp != '\0'; 159 ++sp, ++at_, ++column_) { 160 EmitChar(tokens, *sp); 161 } 162 if (*at_ == ' ' || *at_ == '\t') { 163 EmitChar(tokens, ' '); 164 while (*at_ == ' ' || *at_ == '\t') { 165 ++at_, ++column_; 166 } 167 } 168 tokens.CloseToken(); 169 } 170 break; 171 } 172 case LineClassification::Kind::Source: 173 BeginStatementAndAdvance(); 174 if (inFixedForm_) { 175 if (features_.IsEnabled(LanguageFeature::OldDebugLines) && 176 (*at_ == 'D' || *at_ == 'd')) { 177 NextChar(); 178 } 179 LabelField(tokens); 180 } else { 181 if (skipLeadingAmpersand_) { 182 skipLeadingAmpersand_ = false; 183 const char *p{SkipWhiteSpace(at_)}; 184 if (p < limit_ && *p == '&') { 185 column_ += ++p - at_; 186 at_ = p; 187 } 188 } else { 189 SkipSpaces(); 190 } 191 // Check for a leading identifier that might be a keyword macro 192 // that will expand to anything indicating a non-source line, like 193 // a comment marker or directive sentinel. If so, disable line 194 // continuation, so that NextToken() won't consume anything from 195 // following lines. 196 if (IsLegalIdentifierStart(*at_)) { 197 // TODO: Only bother with these cases when any keyword macro has 198 // been defined with replacement text that could begin a comment 199 // or directive sentinel. 200 const char *p{at_}; 201 while (IsLegalInIdentifier(*++p)) { 202 } 203 CharBlock id{at_, static_cast<std::size_t>(p - at_)}; 204 if (preprocessor_.IsNameDefined(id) && 205 !preprocessor_.IsFunctionLikeDefinition(id)) { 206 TokenSequence toks; 207 toks.Put(id, GetProvenance(at_)); 208 if (auto replaced{preprocessor_.MacroReplacement(toks, *this)}) { 209 auto newLineClass{ClassifyLine(*replaced, GetCurrentProvenance())}; 210 if (newLineClass.kind == 211 LineClassification::Kind::CompilerDirective) { 212 directiveSentinel_ = newLineClass.sentinel; 213 disableSourceContinuation_ = false; 214 } else { 215 disableSourceContinuation_ = 216 newLineClass.kind != LineClassification::Kind::Source; 217 } 218 } 219 } 220 } 221 } 222 break; 223 } 224 225 while (NextToken(tokens)) { 226 } 227 if (continuationLines_ > 255) { 228 if (features_.ShouldWarn(common::LanguageFeature::MiscSourceExtensions)) { 229 Say(GetProvenance(statementStart), 230 "%d continuation lines is more than the Fortran standard allows"_port_en_US, 231 continuationLines_); 232 } 233 } 234 235 Provenance newlineProvenance{GetCurrentProvenance()}; 236 if (std::optional<TokenSequence> preprocessed{ 237 preprocessor_.MacroReplacement(tokens, *this)}) { 238 // Reprocess the preprocessed line. 239 LineClassification ppl{ClassifyLine(*preprocessed, newlineProvenance)}; 240 switch (ppl.kind) { 241 case LineClassification::Kind::Comment: 242 break; 243 case LineClassification::Kind::IncludeLine: 244 FortranInclude(preprocessed->TokenAt(0).begin() + ppl.payloadOffset); 245 break; 246 case LineClassification::Kind::ConditionalCompilationDirective: 247 case LineClassification::Kind::IncludeDirective: 248 case LineClassification::Kind::DefinitionDirective: 249 case LineClassification::Kind::PreprocessorDirective: 250 if (features_.ShouldWarn(common::UsageWarning::Preprocessing)) { 251 Say(preprocessed->GetProvenanceRange(), 252 "Preprocessed line resembles a preprocessor directive"_warn_en_US); 253 } 254 CheckAndEmitLine(preprocessed->ToLowerCase(), newlineProvenance); 255 break; 256 case LineClassification::Kind::CompilerDirective: 257 if (preprocessed->HasRedundantBlanks()) { 258 preprocessed->RemoveRedundantBlanks(); 259 } 260 while (CompilerDirectiveContinuation(*preprocessed, ppl.sentinel)) { 261 newlineProvenance = GetCurrentProvenance(); 262 } 263 NormalizeCompilerDirectiveCommentMarker(*preprocessed); 264 preprocessed->ToLowerCase(); 265 SourceFormChange(preprocessed->ToString()); 266 CheckAndEmitLine(preprocessed->ToLowerCase().ClipComment( 267 *this, true /* skip first ! */), 268 newlineProvenance); 269 break; 270 case LineClassification::Kind::Source: 271 if (inFixedForm_) { 272 if (preprocessed->HasBlanks(/*after column*/ 6)) { 273 preprocessed->RemoveBlanks(/*after column*/ 6); 274 } 275 } else { 276 while (SourceLineContinuation(*preprocessed)) { 277 newlineProvenance = GetCurrentProvenance(); 278 } 279 if (preprocessed->HasRedundantBlanks()) { 280 preprocessed->RemoveRedundantBlanks(); 281 } 282 } 283 CheckAndEmitLine( 284 preprocessed->ToLowerCase().ClipComment(*this), newlineProvenance); 285 break; 286 } 287 } else { // no macro replacement 288 if (line.kind == LineClassification::Kind::CompilerDirective) { 289 while (CompilerDirectiveContinuation(tokens, line.sentinel)) { 290 newlineProvenance = GetCurrentProvenance(); 291 } 292 tokens.ToLowerCase(); 293 SourceFormChange(tokens.ToString()); 294 } else { // Kind::Source 295 tokens.ToLowerCase(); 296 if (inFixedForm_) { 297 EnforceStupidEndStatementRules(tokens); 298 } 299 } 300 CheckAndEmitLine(tokens, newlineProvenance); 301 } 302 directiveSentinel_ = nullptr; 303 } 304 305 void Prescanner::CheckAndEmitLine( 306 TokenSequence &tokens, Provenance newlineProvenance) { 307 tokens.CheckBadFortranCharacters( 308 messages_, *this, disableSourceContinuation_); 309 // Parenthesis nesting check does not apply while any #include is 310 // active, nor on the lines before and after a top-level #include, 311 // nor before or after conditional source. 312 // Applications play shenanigans with line continuation before and 313 // after #include'd subprogram argument lists and conditional source. 314 if (!isNestedInIncludeDirective_ && !omitNewline_ && 315 !afterPreprocessingDirective_ && tokens.BadlyNestedParentheses() && 316 !preprocessor_.InConditional()) { 317 if (nextLine_ < limit_ && IsPreprocessorDirectiveLine(nextLine_)) { 318 // don't complain 319 } else { 320 tokens.CheckBadParentheses(messages_); 321 } 322 } 323 tokens.Emit(cooked_); 324 if (omitNewline_) { 325 omitNewline_ = false; 326 } else { 327 cooked_.Put('\n', newlineProvenance); 328 afterPreprocessingDirective_ = false; 329 } 330 } 331 332 TokenSequence Prescanner::TokenizePreprocessorDirective() { 333 CHECK(!IsAtEnd() && !inPreprocessorDirective_); 334 inPreprocessorDirective_ = true; 335 BeginStatementAndAdvance(); 336 TokenSequence tokens; 337 while (NextToken(tokens)) { 338 } 339 inPreprocessorDirective_ = false; 340 return tokens; 341 } 342 343 void Prescanner::NextLine() { 344 void *vstart{static_cast<void *>(const_cast<char *>(nextLine_))}; 345 void *v{std::memchr(vstart, '\n', limit_ - nextLine_)}; 346 if (!v) { 347 nextLine_ = limit_; 348 } else { 349 const char *nl{const_cast<const char *>(static_cast<char *>(v))}; 350 nextLine_ = nl + 1; 351 } 352 } 353 354 void Prescanner::LabelField(TokenSequence &token) { 355 int outCol{1}; 356 const char *start{at_}; 357 std::optional<int> badColumn; 358 for (; *at_ != '\n' && column_ <= 6; ++at_) { 359 if (*at_ == '\t') { 360 ++at_; 361 column_ = 7; 362 break; 363 } 364 if (*at_ != ' ' && 365 !(*at_ == '0' && column_ == 6)) { // '0' in column 6 becomes space 366 EmitChar(token, *at_); 367 ++outCol; 368 if (!badColumn && (column_ == 6 || !IsDecimalDigit(*at_))) { 369 badColumn = column_; 370 } 371 } 372 ++column_; 373 } 374 if (badColumn && !preprocessor_.IsNameDefined(token.CurrentOpenToken())) { 375 if ((prescannerNesting_ > 0 && *badColumn == 6 && 376 cooked_.BufferedBytes() == firstCookedCharacterOffset_) || 377 afterPreprocessingDirective_) { 378 // This is the first source line in #include'd text or conditional 379 // code under #if, or the first source line after such. 380 // If it turns out that the preprocessed text begins with a 381 // fixed form continuation line, the newline at the end 382 // of the latest source line beforehand will be deleted in 383 // CookedSource::Marshal(). 384 cooked_.MarkPossibleFixedFormContinuation(); 385 } else if (features_.ShouldWarn(common::UsageWarning::Scanning)) { 386 Say(GetProvenance(start + *badColumn - 1), 387 *badColumn == 6 388 ? "Statement should not begin with a continuation line"_warn_en_US 389 : "Character in fixed-form label field must be a digit"_warn_en_US); 390 } 391 token.clear(); 392 if (*badColumn < 6) { 393 at_ = start; 394 column_ = 1; 395 return; 396 } 397 outCol = 1; 398 } 399 if (outCol == 1) { // empty label field 400 // Emit a space so that, if the line is rescanned after preprocessing, 401 // a leading 'C' or 'D' won't be left-justified and then accidentally 402 // misinterpreted as a comment card. 403 EmitChar(token, ' '); 404 ++outCol; 405 } 406 token.CloseToken(); 407 SkipToNextSignificantCharacter(); 408 if (IsDecimalDigit(*at_)) { 409 if (features_.ShouldWarn(common::LanguageFeature::MiscSourceExtensions)) { 410 Say(GetCurrentProvenance(), 411 "Label digit is not in fixed-form label field"_port_en_US); 412 } 413 } 414 } 415 416 // 6.3.3.5: A program unit END statement, or any other statement whose 417 // initial line resembles an END statement, shall not be continued in 418 // fixed form source. 419 void Prescanner::EnforceStupidEndStatementRules(const TokenSequence &tokens) { 420 CharBlock cBlock{tokens.ToCharBlock()}; 421 const char *str{cBlock.begin()}; 422 std::size_t n{cBlock.size()}; 423 if (n < 3) { 424 return; 425 } 426 std::size_t j{0}; 427 for (; j < n && (str[j] == ' ' || (str[j] >= '0' && str[j] <= '9')); ++j) { 428 } 429 if (j + 3 > n || std::memcmp(str + j, "end", 3) != 0) { 430 return; 431 } 432 // It starts with END, possibly after a label. 433 auto start{allSources_.GetSourcePosition(tokens.GetCharProvenance(j))}; 434 auto end{allSources_.GetSourcePosition(tokens.GetCharProvenance(n - 1))}; 435 if (!start || !end) { 436 return; 437 } 438 if (&*start->sourceFile == &*end->sourceFile && start->line == end->line) { 439 return; // no continuation 440 } 441 j += 3; 442 static const char *const prefixes[]{"program", "subroutine", "function", 443 "blockdata", "module", "submodule", nullptr}; 444 bool isPrefix{j == n || !IsLegalInIdentifier(str[j])}; // prefix is END 445 std::size_t endOfPrefix{j - 1}; 446 for (const char *const *p{prefixes}; *p; ++p) { 447 std::size_t pLen{std::strlen(*p)}; 448 if (j + pLen <= n && std::memcmp(str + j, *p, pLen) == 0) { 449 isPrefix = true; // END thing as prefix 450 j += pLen; 451 endOfPrefix = j - 1; 452 for (; j < n && IsLegalInIdentifier(str[j]); ++j) { 453 } 454 break; 455 } 456 } 457 if (isPrefix) { 458 auto range{tokens.GetTokenProvenanceRange(1)}; 459 if (j == n) { // END or END thing [name] 460 Say(range, 461 "Program unit END statement may not be continued in fixed form source"_err_en_US); 462 } else { 463 auto endOfPrefixPos{ 464 allSources_.GetSourcePosition(tokens.GetCharProvenance(endOfPrefix))}; 465 auto next{allSources_.GetSourcePosition(tokens.GetCharProvenance(j))}; 466 if (endOfPrefixPos && next && 467 &*endOfPrefixPos->sourceFile == &*start->sourceFile && 468 endOfPrefixPos->line == start->line && 469 (&*next->sourceFile != &*start->sourceFile || 470 next->line != start->line)) { 471 Say(range, 472 "Initial line of continued statement must not appear to be a program unit END in fixed form source"_err_en_US); 473 } 474 } 475 } 476 } 477 478 void Prescanner::SkipToEndOfLine() { 479 while (*at_ != '\n') { 480 ++at_, ++column_; 481 } 482 } 483 484 bool Prescanner::MustSkipToEndOfLine() const { 485 if (inFixedForm_ && column_ > fixedFormColumnLimit_ && !tabInCurrentLine_) { 486 return true; // skip over ignored columns in right margin (73:80) 487 } else if (*at_ == '!' && !inCharLiteral_) { 488 return !IsCompilerDirectiveSentinel(at_); 489 } else { 490 return false; 491 } 492 } 493 494 void Prescanner::NextChar() { 495 CHECK(*at_ != '\n'); 496 ++at_, ++column_; 497 while (at_[0] == '\xef' && at_[1] == '\xbb' && at_[2] == '\xbf') { 498 // UTF-8 byte order mark - treat this file as UTF-8 499 at_ += 3; 500 encoding_ = Encoding::UTF_8; 501 } 502 SkipToNextSignificantCharacter(); 503 } 504 505 // Skip everything that should be ignored until the next significant 506 // character is reached; handles C-style comments in preprocessing 507 // directives, Fortran ! comments, stuff after the right margin in 508 // fixed form, and all forms of line continuation. 509 bool Prescanner::SkipToNextSignificantCharacter() { 510 auto anyContinuationLine{false}; 511 if (inPreprocessorDirective_) { 512 SkipCComments(); 513 } else { 514 bool mightNeedSpace{false}; 515 if (MustSkipToEndOfLine()) { 516 SkipToEndOfLine(); 517 } else { 518 mightNeedSpace = *at_ == '\n'; 519 } 520 for (; Continuation(mightNeedSpace); mightNeedSpace = false) { 521 anyContinuationLine = true; 522 ++continuationLines_; 523 if (MustSkipToEndOfLine()) { 524 SkipToEndOfLine(); 525 } 526 } 527 if (*at_ == '\t') { 528 tabInCurrentLine_ = true; 529 } 530 } 531 return anyContinuationLine; 532 } 533 534 void Prescanner::SkipCComments() { 535 while (true) { 536 if (IsCComment(at_)) { 537 if (const char *after{SkipCComment(at_)}) { 538 column_ += after - at_; 539 // May have skipped over one or more newlines; relocate the start of 540 // the next line. 541 nextLine_ = at_ = after; 542 NextLine(); 543 } else { 544 // Don't emit any messages about unclosed C-style comments, because 545 // the sequence /* can appear legally in a FORMAT statement. There's 546 // no ambiguity, since the sequence */ cannot appear legally. 547 break; 548 } 549 } else if (inPreprocessorDirective_ && at_[0] == '\\' && at_ + 2 < limit_ && 550 at_[1] == '\n' && !IsAtEnd()) { 551 BeginSourceLineAndAdvance(); 552 } else { 553 break; 554 } 555 } 556 } 557 558 void Prescanner::SkipSpaces() { 559 while (*at_ == ' ' || *at_ == '\t') { 560 NextChar(); 561 } 562 insertASpace_ = false; 563 } 564 565 const char *Prescanner::SkipWhiteSpace(const char *p) { 566 while (*p == ' ' || *p == '\t') { 567 ++p; 568 } 569 return p; 570 } 571 572 const char *Prescanner::SkipWhiteSpaceAndCComments(const char *p) const { 573 while (true) { 574 if (*p == ' ' || *p == '\t') { 575 ++p; 576 } else if (IsCComment(p)) { 577 if (const char *after{SkipCComment(p)}) { 578 p = after; 579 } else { 580 break; 581 } 582 } else { 583 break; 584 } 585 } 586 return p; 587 } 588 589 const char *Prescanner::SkipCComment(const char *p) const { 590 char star{' '}, slash{' '}; 591 p += 2; 592 while (star != '*' || slash != '/') { 593 if (p >= limit_) { 594 return nullptr; // signifies an unterminated comment 595 } 596 star = slash; 597 slash = *p++; 598 } 599 return p; 600 } 601 602 bool Prescanner::NextToken(TokenSequence &tokens) { 603 CHECK(at_ >= start_ && at_ < limit_); 604 if (InFixedFormSource()) { 605 SkipSpaces(); 606 } else { 607 if (*at_ == '/' && IsCComment(at_)) { 608 // Recognize and skip over classic C style /*comments*/ when 609 // outside a character literal. 610 if (features_.ShouldWarn(LanguageFeature::ClassicCComments)) { 611 Say(GetCurrentProvenance(), 612 "nonstandard usage: C-style comment"_port_en_US); 613 } 614 SkipCComments(); 615 } 616 if (*at_ == ' ' || *at_ == '\t') { 617 // Compress free-form white space into a single space character. 618 const auto theSpace{at_}; 619 char previous{at_ <= start_ ? ' ' : at_[-1]}; 620 NextChar(); 621 SkipSpaces(); 622 if (*at_ == '\n' && !omitNewline_) { 623 // Discard white space at the end of a line. 624 } else if (!inPreprocessorDirective_ && 625 (previous == '(' || *at_ == '(' || *at_ == ')')) { 626 // Discard white space before/after '(' and before ')', unless in a 627 // preprocessor directive. This helps yield space-free contiguous 628 // names for generic interfaces like OPERATOR( + ) and 629 // READ ( UNFORMATTED ), without misinterpreting #define f (notAnArg). 630 // This has the effect of silently ignoring the illegal spaces in 631 // the array constructor ( /1,2/ ) but that seems benign; it's 632 // hard to avoid that while still removing spaces from OPERATOR( / ) 633 // and OPERATOR( // ). 634 } else { 635 // Preserve the squashed white space as a single space character. 636 tokens.PutNextTokenChar(' ', GetProvenance(theSpace)); 637 tokens.CloseToken(); 638 return true; 639 } 640 } 641 } 642 if (insertASpace_) { 643 tokens.PutNextTokenChar(' ', spaceProvenance_); 644 insertASpace_ = false; 645 } 646 if (*at_ == '\n') { 647 return false; 648 } 649 const char *start{at_}; 650 if (*at_ == '\'' || *at_ == '"') { 651 QuotedCharacterLiteral(tokens, start); 652 preventHollerith_ = false; 653 } else if (IsDecimalDigit(*at_)) { 654 int n{0}, digits{0}; 655 static constexpr int maxHollerith{256 /*lines*/ * (132 - 6 /*columns*/)}; 656 do { 657 if (n < maxHollerith) { 658 n = 10 * n + DecimalDigitValue(*at_); 659 } 660 EmitCharAndAdvance(tokens, *at_); 661 ++digits; 662 if (InFixedFormSource()) { 663 SkipSpaces(); 664 } 665 } while (IsDecimalDigit(*at_)); 666 if ((*at_ == 'h' || *at_ == 'H') && n > 0 && n < maxHollerith && 667 !preventHollerith_) { 668 Hollerith(tokens, n, start); 669 } else if (*at_ == '.') { 670 while (IsDecimalDigit(EmitCharAndAdvance(tokens, *at_))) { 671 } 672 ExponentAndKind(tokens); 673 } else if (ExponentAndKind(tokens)) { 674 } else if (digits == 1 && n == 0 && (*at_ == 'x' || *at_ == 'X') && 675 inPreprocessorDirective_) { 676 do { 677 EmitCharAndAdvance(tokens, *at_); 678 } while (IsHexadecimalDigit(*at_)); 679 } else if (at_[0] == '_' && (at_[1] == '\'' || at_[1] == '"')) { // 4_"..." 680 EmitCharAndAdvance(tokens, *at_); 681 QuotedCharacterLiteral(tokens, start); 682 } else if (IsLetter(*at_) && !preventHollerith_ && 683 parenthesisNesting_ > 0) { 684 // Handles FORMAT(3I9HHOLLERITH) by skipping over the first I so that 685 // we don't misrecognize I9HOLLERITH as an identifier in the next case. 686 EmitCharAndAdvance(tokens, *at_); 687 } 688 preventHollerith_ = false; 689 } else if (*at_ == '.') { 690 char nch{EmitCharAndAdvance(tokens, '.')}; 691 if (!inPreprocessorDirective_ && IsDecimalDigit(nch)) { 692 while (IsDecimalDigit(EmitCharAndAdvance(tokens, *at_))) { 693 } 694 ExponentAndKind(tokens); 695 } else if (nch == '.' && EmitCharAndAdvance(tokens, '.') == '.') { 696 EmitCharAndAdvance(tokens, '.'); // variadic macro definition ellipsis 697 } 698 preventHollerith_ = false; 699 } else if (IsLegalInIdentifier(*at_)) { 700 int parts{1}; 701 const char *afterLast{nullptr}; 702 do { 703 EmitChar(tokens, *at_); 704 ++at_, ++column_; 705 afterLast = at_; 706 if (SkipToNextSignificantCharacter() && IsLegalIdentifierStart(*at_)) { 707 tokens.CloseToken(); 708 ++parts; 709 } 710 } while (IsLegalInIdentifier(*at_)); 711 if (parts >= 3) { 712 // Subtlety: When an identifier is split across three or more continuation 713 // lines (or two continuation lines, immediately preceded or followed 714 // by '&' free form continuation line markers, its parts are kept as 715 // distinct pp-tokens so that macro operates on them independently. 716 // This trick accommodates the historic practice of using line 717 // continuation for token pasting after replacement. 718 } else if (parts == 2) { 719 if ((start > start_ && start[-1] == '&') || 720 (afterLast < limit_ && (*afterLast == '&' || *afterLast == '\n'))) { 721 // call & call foo& call foo& 722 // &MACRO& OR &MACRO& OR &MACRO 723 // &foo(...) &(...) 724 } else { 725 tokens.ReopenLastToken(); 726 } 727 } 728 if (InFixedFormSource()) { 729 SkipSpaces(); 730 } 731 if ((*at_ == '\'' || *at_ == '"') && 732 tokens.CharAt(tokens.SizeInChars() - 1) == '_') { // kind_"..." 733 QuotedCharacterLiteral(tokens, start); 734 preventHollerith_ = false; 735 } else { 736 preventHollerith_ = true; // DO 10 H = ... 737 } 738 } else if (*at_ == '*') { 739 if (EmitCharAndAdvance(tokens, '*') == '*') { 740 EmitCharAndAdvance(tokens, '*'); 741 } else { 742 // Subtle ambiguity: 743 // CHARACTER*2H declares H because *2 is a kind specifier 744 // DATAC/N*2H / is repeated Hollerith 745 preventHollerith_ = !slashInCurrentStatement_; 746 } 747 } else { 748 char ch{*at_}; 749 if (ch == '(') { 750 if (parenthesisNesting_++ == 0) { 751 isPossibleMacroCall_ = tokens.SizeInTokens() > 0 && 752 preprocessor_.IsFunctionLikeDefinition( 753 tokens.TokenAt(tokens.SizeInTokens() - 1)); 754 } 755 } else if (ch == ')' && parenthesisNesting_ > 0) { 756 --parenthesisNesting_; 757 } 758 char nch{EmitCharAndAdvance(tokens, ch)}; 759 preventHollerith_ = false; 760 if ((nch == '=' && 761 (ch == '<' || ch == '>' || ch == '/' || ch == '=' || ch == '!')) || 762 (ch == nch && 763 (ch == '/' || ch == ':' || ch == '*' || ch == '#' || ch == '&' || 764 ch == '|' || ch == '<' || ch == '>')) || 765 (ch == '=' && nch == '>')) { 766 // token comprises two characters 767 EmitCharAndAdvance(tokens, nch); 768 } else if (ch == '/') { 769 slashInCurrentStatement_ = true; 770 } else if (ch == ';' && InFixedFormSource()) { 771 SkipSpaces(); 772 if (IsDecimalDigit(*at_)) { 773 if (features_.ShouldWarn( 774 common::LanguageFeature::MiscSourceExtensions)) { 775 Say(GetProvenanceRange(at_, at_ + 1), 776 "Label should be in the label field"_port_en_US); 777 } 778 } 779 } 780 } 781 tokens.CloseToken(); 782 return true; 783 } 784 785 bool Prescanner::ExponentAndKind(TokenSequence &tokens) { 786 char ed{ToLowerCaseLetter(*at_)}; 787 if (ed != 'e' && ed != 'd') { 788 return false; 789 } 790 EmitCharAndAdvance(tokens, ed); 791 if (*at_ == '+' || *at_ == '-') { 792 EmitCharAndAdvance(tokens, *at_); 793 } 794 while (IsDecimalDigit(*at_)) { 795 EmitCharAndAdvance(tokens, *at_); 796 } 797 if (*at_ == '_') { 798 while (IsLegalInIdentifier(EmitCharAndAdvance(tokens, *at_))) { 799 } 800 } 801 return true; 802 } 803 804 void Prescanner::QuotedCharacterLiteral( 805 TokenSequence &tokens, const char *start) { 806 char quote{*at_}; 807 const char *end{at_ + 1}; 808 inCharLiteral_ = true; 809 continuationInCharLiteral_ = true; 810 const auto emit{[&](char ch) { EmitChar(tokens, ch); }}; 811 const auto insert{[&](char ch) { EmitInsertedChar(tokens, ch); }}; 812 bool isEscaped{false}; 813 bool escapesEnabled{features_.IsEnabled(LanguageFeature::BackslashEscapes)}; 814 while (true) { 815 if (*at_ == '\\') { 816 if (escapesEnabled) { 817 isEscaped = !isEscaped; 818 } else { 819 // The parser always processes escape sequences, so don't confuse it 820 // when escapes are disabled. 821 insert('\\'); 822 } 823 } else { 824 isEscaped = false; 825 } 826 EmitQuotedChar(static_cast<unsigned char>(*at_), emit, insert, false, 827 Encoding::LATIN_1); 828 while (PadOutCharacterLiteral(tokens)) { 829 } 830 if (*at_ == '\n') { 831 if (!inPreprocessorDirective_) { 832 Say(GetProvenanceRange(start, end), 833 "Incomplete character literal"_err_en_US); 834 } 835 break; 836 } 837 // Here's a weird edge case. When there's a two or more following 838 // continuation lines at this point, and the entire significant part of 839 // the next continuation line is the name of a keyword macro, replace 840 // it in the character literal with its definition. Example: 841 // #define FOO foo 842 // subroutine subr() bind(c, name="my_& 843 // &FOO& 844 // &_bar") ... 845 // produces a binding name of "my_foo_bar". 846 while (at_[1] == '&' && nextLine_ < limit_ && !InFixedFormSource()) { 847 const char *idStart{nextLine_}; 848 if (const char *amper{SkipWhiteSpace(nextLine_)}; *amper == '&') { 849 idStart = amper + 1; 850 } 851 if (IsLegalIdentifierStart(*idStart)) { 852 std::size_t idLen{1}; 853 for (; IsLegalInIdentifier(idStart[idLen]); ++idLen) { 854 } 855 if (idStart[idLen] == '&') { 856 CharBlock id{idStart, idLen}; 857 if (preprocessor_.IsNameDefined(id)) { 858 TokenSequence ppTokens; 859 ppTokens.Put(id, GetProvenance(idStart)); 860 if (auto replaced{ 861 preprocessor_.MacroReplacement(ppTokens, *this)}) { 862 tokens.Put(*replaced); 863 at_ = &idStart[idLen - 1]; 864 NextLine(); 865 continue; // try again on the next line 866 } 867 } 868 } 869 } 870 break; 871 } 872 end = at_ + 1; 873 NextChar(); 874 if (*at_ == quote && !isEscaped) { 875 // A doubled unescaped quote mark becomes a single instance of that 876 // quote character in the literal (later). There can be spaces between 877 // the quotes in fixed form source. 878 EmitChar(tokens, quote); 879 inCharLiteral_ = false; // for cases like print *, '...'!comment 880 NextChar(); 881 if (InFixedFormSource()) { 882 SkipSpaces(); 883 } 884 if (*at_ != quote) { 885 break; 886 } 887 inCharLiteral_ = true; 888 } 889 } 890 continuationInCharLiteral_ = false; 891 inCharLiteral_ = false; 892 } 893 894 void Prescanner::Hollerith( 895 TokenSequence &tokens, int count, const char *start) { 896 inCharLiteral_ = true; 897 CHECK(*at_ == 'h' || *at_ == 'H'); 898 EmitChar(tokens, 'H'); 899 while (count-- > 0) { 900 if (PadOutCharacterLiteral(tokens)) { 901 } else if (*at_ == '\n') { 902 if (features_.ShouldWarn(common::UsageWarning::Scanning)) { 903 Say(GetProvenanceRange(start, at_), 904 "Possible truncated Hollerith literal"_warn_en_US); 905 } 906 break; 907 } else { 908 NextChar(); 909 // Each multi-byte character encoding counts as a single character. 910 // No escape sequences are recognized. 911 // Hollerith is always emitted to the cooked character 912 // stream in UTF-8. 913 DecodedCharacter decoded{DecodeCharacter( 914 encoding_, at_, static_cast<std::size_t>(limit_ - at_), false)}; 915 if (decoded.bytes > 0) { 916 EncodedCharacter utf8{ 917 EncodeCharacter<Encoding::UTF_8>(decoded.codepoint)}; 918 for (int j{0}; j < utf8.bytes; ++j) { 919 EmitChar(tokens, utf8.buffer[j]); 920 } 921 at_ += decoded.bytes - 1; 922 } else { 923 Say(GetProvenanceRange(start, at_), 924 "Bad character in Hollerith literal"_err_en_US); 925 break; 926 } 927 } 928 } 929 if (*at_ != '\n') { 930 NextChar(); 931 } 932 inCharLiteral_ = false; 933 } 934 935 // In fixed form, source card images must be processed as if they were at 936 // least 72 columns wide, at least in character literal contexts. 937 bool Prescanner::PadOutCharacterLiteral(TokenSequence &tokens) { 938 while (inFixedForm_ && !tabInCurrentLine_ && at_[1] == '\n') { 939 if (column_ < fixedFormColumnLimit_) { 940 tokens.PutNextTokenChar(' ', spaceProvenance_); 941 ++column_; 942 return true; 943 } 944 if (!FixedFormContinuation(false /*no need to insert space*/) || 945 tabInCurrentLine_) { 946 return false; 947 } 948 CHECK(column_ == 7); 949 --at_; // point to column 6 of continuation line 950 column_ = 6; 951 } 952 return false; 953 } 954 955 static bool IsAtProcess(const char *p) { 956 static const char pAtProc[]{"process"}; 957 for (std::size_t i{0}; i < sizeof pAtProc - 1; ++i) { 958 if (ToLowerCaseLetter(*++p) != pAtProc[i]) 959 return false; 960 } 961 return true; 962 } 963 964 bool Prescanner::IsFixedFormCommentLine(const char *start) const { 965 const char *p{start}; 966 967 // The @process directive must start in column 1. 968 if (*p == '@' && IsAtProcess(p)) { 969 return true; 970 } 971 972 if (IsFixedFormCommentChar(*p) || *p == '%' || // VAX %list, %eject, &c. 973 ((*p == 'D' || *p == 'd') && 974 !features_.IsEnabled(LanguageFeature::OldDebugLines))) { 975 return true; 976 } 977 bool anyTabs{false}; 978 while (true) { 979 if (*p == ' ') { 980 ++p; 981 } else if (*p == '\t') { 982 anyTabs = true; 983 ++p; 984 } else if (*p == '0' && !anyTabs && p == start + 5) { 985 ++p; // 0 in column 6 must treated as a space 986 } else { 987 break; 988 } 989 } 990 if (!anyTabs && p >= start + fixedFormColumnLimit_) { 991 return true; 992 } 993 if (*p == '!' && !inCharLiteral_ && (anyTabs || p != start + 5)) { 994 return true; 995 } 996 return *p == '\n'; 997 } 998 999 const char *Prescanner::IsFreeFormComment(const char *p) const { 1000 p = SkipWhiteSpaceAndCComments(p); 1001 if (*p == '!' || *p == '\n') { 1002 return p; 1003 } else if (*p == '@') { 1004 return IsAtProcess(p) ? p : nullptr; 1005 } else { 1006 return nullptr; 1007 } 1008 } 1009 1010 std::optional<std::size_t> Prescanner::IsIncludeLine(const char *start) const { 1011 const char *p{SkipWhiteSpace(start)}; 1012 if (*p == '0' && inFixedForm_ && p == start + 5) { 1013 // Accept " 0INCLUDE" in fixed form. 1014 p = SkipWhiteSpace(p + 1); 1015 } 1016 for (const char *q{"include"}; *q; ++q) { 1017 if (ToLowerCaseLetter(*p) != *q) { 1018 return std::nullopt; 1019 } 1020 p = SkipWhiteSpace(p + 1); 1021 } 1022 if (IsDecimalDigit(*p)) { // accept & ignore a numeric kind prefix 1023 for (p = SkipWhiteSpace(p + 1); IsDecimalDigit(*p); 1024 p = SkipWhiteSpace(p + 1)) { 1025 } 1026 if (*p != '_') { 1027 return std::nullopt; 1028 } 1029 p = SkipWhiteSpace(p + 1); 1030 } 1031 if (*p == '"' || *p == '\'') { 1032 return {p - start}; 1033 } 1034 return std::nullopt; 1035 } 1036 1037 void Prescanner::FortranInclude(const char *firstQuote) { 1038 const char *p{firstQuote}; 1039 while (*p != '"' && *p != '\'') { 1040 ++p; 1041 } 1042 char quote{*p}; 1043 std::string path; 1044 for (++p; *p != '\n'; ++p) { 1045 if (*p == quote) { 1046 if (p[1] != quote) { 1047 break; 1048 } 1049 ++p; 1050 } 1051 path += *p; 1052 } 1053 if (*p != quote) { 1054 Say(GetProvenanceRange(firstQuote, p), 1055 "malformed path name string"_err_en_US); 1056 return; 1057 } 1058 p = SkipWhiteSpace(p + 1); 1059 if (*p != '\n' && *p != '!') { 1060 const char *garbage{p}; 1061 for (; *p != '\n' && *p != '!'; ++p) { 1062 } 1063 if (features_.ShouldWarn(common::UsageWarning::Scanning)) { 1064 Say(GetProvenanceRange(garbage, p), 1065 "excess characters after path name"_warn_en_US); 1066 } 1067 } 1068 std::string buf; 1069 llvm::raw_string_ostream error{buf}; 1070 Provenance provenance{GetProvenance(nextLine_)}; 1071 std::optional<std::string> prependPath; 1072 if (const SourceFile * currentFile{allSources_.GetSourceFile(provenance)}) { 1073 prependPath = DirectoryName(currentFile->path()); 1074 } 1075 const SourceFile *included{ 1076 allSources_.Open(path, error, std::move(prependPath))}; 1077 if (!included) { 1078 Say(provenance, "INCLUDE: %s"_err_en_US, error.str()); 1079 } else if (included->bytes() > 0) { 1080 ProvenanceRange includeLineRange{ 1081 provenance, static_cast<std::size_t>(p - nextLine_)}; 1082 ProvenanceRange fileRange{ 1083 allSources_.AddIncludedFile(*included, includeLineRange)}; 1084 Prescanner{*this, /*isNestedInIncludeDirective=*/false} 1085 .set_encoding(included->encoding()) 1086 .Prescan(fileRange); 1087 } 1088 } 1089 1090 const char *Prescanner::IsPreprocessorDirectiveLine(const char *start) const { 1091 const char *p{start}; 1092 for (; *p == ' '; ++p) { 1093 } 1094 if (*p == '#') { 1095 if (inFixedForm_ && p == start + 5) { 1096 return nullptr; 1097 } 1098 } else { 1099 p = SkipWhiteSpace(p); 1100 if (*p != '#') { 1101 return nullptr; 1102 } 1103 } 1104 return SkipWhiteSpace(p + 1); 1105 } 1106 1107 bool Prescanner::IsNextLinePreprocessorDirective() const { 1108 return IsPreprocessorDirectiveLine(nextLine_) != nullptr; 1109 } 1110 1111 bool Prescanner::SkipCommentLine(bool afterAmpersand) { 1112 if (IsAtEnd()) { 1113 if (afterAmpersand && prescannerNesting_ > 0) { 1114 // A continuation marker at the end of the last line in an 1115 // include file inhibits the newline for that line. 1116 SkipToEndOfLine(); 1117 omitNewline_ = true; 1118 } 1119 } else if (inPreprocessorDirective_) { 1120 } else { 1121 auto lineClass{ClassifyLine(nextLine_)}; 1122 if (lineClass.kind == LineClassification::Kind::Comment) { 1123 NextLine(); 1124 return true; 1125 } else if (lineClass.kind == 1126 LineClassification::Kind::ConditionalCompilationDirective || 1127 lineClass.kind == LineClassification::Kind::PreprocessorDirective) { 1128 // Allow conditional compilation directives (e.g., #ifdef) to affect 1129 // continuation lines. 1130 // Allow other preprocessor directives, too, except #include 1131 // (when it does not follow '&'), #define, and #undef (because 1132 // they cannot be allowed to affect preceding text on a 1133 // continued line). 1134 preprocessor_.Directive(TokenizePreprocessorDirective(), *this); 1135 return true; 1136 } else if (afterAmpersand && 1137 (lineClass.kind == LineClassification::Kind::DefinitionDirective || 1138 lineClass.kind == LineClassification::Kind::IncludeDirective || 1139 lineClass.kind == LineClassification::Kind::IncludeLine)) { 1140 SkipToEndOfLine(); 1141 omitNewline_ = true; 1142 skipLeadingAmpersand_ = true; 1143 } 1144 } 1145 return false; 1146 } 1147 1148 const char *Prescanner::FixedFormContinuationLine(bool mightNeedSpace) { 1149 if (IsAtEnd()) { 1150 return nullptr; 1151 } 1152 tabInCurrentLine_ = false; 1153 char col1{*nextLine_}; 1154 if (IsFixedFormCommentChar(col1)) { 1155 int j{1}; 1156 if (InCompilerDirective()) { 1157 // Must be a continued compiler directive. 1158 for (; j < 5; ++j) { 1159 char ch{directiveSentinel_[j - 1]}; 1160 if (ch == '\0') { 1161 break; 1162 } 1163 if (ch != ToLowerCaseLetter(nextLine_[j])) { 1164 return nullptr; 1165 } 1166 } 1167 } else if (features_.IsEnabled(LanguageFeature::OpenMP)) { 1168 // Fixed Source Form Conditional Compilation Sentinels. 1169 if (nextLine_[1] != '$') { 1170 return nullptr; 1171 } 1172 j++; 1173 } else { 1174 return nullptr; 1175 } 1176 for (; j < 5; ++j) { 1177 if (nextLine_[j] != ' ') { 1178 return nullptr; 1179 } 1180 } 1181 char col6{nextLine_[5]}; 1182 if (col6 != '\n' && col6 != '\t' && col6 != ' ' && col6 != '0') { 1183 if (nextLine_[6] != ' ' && mightNeedSpace) { 1184 insertASpace_ = true; 1185 } 1186 return nextLine_ + 6; 1187 } 1188 return nullptr; 1189 } else { 1190 // Normal case: not in a compiler directive. 1191 if (col1 == '&' && 1192 features_.IsEnabled( 1193 LanguageFeature::FixedFormContinuationWithColumn1Ampersand)) { 1194 // Extension: '&' as continuation marker 1195 if (features_.ShouldWarn( 1196 LanguageFeature::FixedFormContinuationWithColumn1Ampersand)) { 1197 Say(GetProvenance(nextLine_), "nonstandard usage"_port_en_US); 1198 } 1199 return nextLine_ + 1; 1200 } 1201 if (col1 == '\t' && nextLine_[1] >= '1' && nextLine_[1] <= '9') { 1202 tabInCurrentLine_ = true; 1203 return nextLine_ + 2; // VAX extension 1204 } 1205 if ((col1 == ' ' || 1206 ((col1 == 'D' || col1 == 'd') && 1207 features_.IsEnabled(LanguageFeature::OldDebugLines))) && 1208 nextLine_[1] == ' ' && nextLine_[2] == ' ' && nextLine_[3] == ' ' && 1209 nextLine_[4] == ' ') { 1210 char col6{nextLine_[5]}; 1211 if (col6 != '\n' && col6 != '\t' && col6 != ' ' && col6 != '0') { 1212 if ((col6 == 'i' || col6 == 'I') && IsIncludeLine(nextLine_)) { 1213 // It's An INCLUDE line, not a continuation 1214 } else { 1215 return nextLine_ + 6; 1216 } 1217 } 1218 } 1219 if (IsImplicitContinuation()) { 1220 return nextLine_; 1221 } 1222 } 1223 return nullptr; // not a continuation line 1224 } 1225 1226 const char *Prescanner::FreeFormContinuationLine(bool ampersand) { 1227 const char *p{nextLine_}; 1228 if (p >= limit_) { 1229 return nullptr; 1230 } 1231 p = SkipWhiteSpace(p); 1232 if (InCompilerDirective()) { 1233 if (*p++ != '!') { 1234 return nullptr; 1235 } 1236 for (const char *s{directiveSentinel_}; *s != '\0'; ++p, ++s) { 1237 if (*s != ToLowerCaseLetter(*p)) { 1238 return nullptr; 1239 } 1240 } 1241 p = SkipWhiteSpace(p); 1242 if (*p == '&') { 1243 if (!ampersand) { 1244 insertASpace_ = true; 1245 } 1246 return p + 1; 1247 } else if (ampersand) { 1248 return p; 1249 } else { 1250 return nullptr; 1251 } 1252 } else { 1253 if (*p == '&') { 1254 return p + 1; 1255 } else if (*p == '!' || *p == '\n' || *p == '#') { 1256 return nullptr; 1257 } else if (ampersand || IsImplicitContinuation()) { 1258 if (continuationInCharLiteral_) { 1259 // 'a'& -> 'a''b' == "a'b" 1260 // 'b' 1261 if (features_.ShouldWarn( 1262 common::LanguageFeature::MiscSourceExtensions)) { 1263 Say(GetProvenanceRange(p, p + 1), 1264 "Character literal continuation line should have been preceded by '&'"_port_en_US); 1265 } 1266 } else if (p > nextLine_) { 1267 --p; 1268 } else { 1269 insertASpace_ = true; 1270 } 1271 return p; 1272 } else { 1273 return nullptr; 1274 } 1275 } 1276 } 1277 1278 bool Prescanner::FixedFormContinuation(bool mightNeedSpace) { 1279 // N.B. We accept '&' as a continuation indicator in fixed form, too, 1280 // but not in a character literal. 1281 if (*at_ == '&' && inCharLiteral_) { 1282 return false; 1283 } 1284 do { 1285 if (const char *cont{FixedFormContinuationLine(mightNeedSpace)}) { 1286 BeginSourceLine(cont); 1287 column_ = 7; 1288 NextLine(); 1289 return true; 1290 } 1291 } while (SkipCommentLine(false /* not after ampersand */)); 1292 return false; 1293 } 1294 1295 bool Prescanner::FreeFormContinuation() { 1296 const char *p{at_}; 1297 bool ampersand{*p == '&'}; 1298 if (ampersand) { 1299 p = SkipWhiteSpace(p + 1); 1300 } 1301 if (*p != '\n') { 1302 if (inCharLiteral_) { 1303 return false; 1304 } else if (*p == '!') { // & ! comment - ok 1305 } else if (ampersand && isPossibleMacroCall_ && (*p == ',' || *p == ')')) { 1306 return false; // allow & at end of a macro argument 1307 } else if (features_.ShouldWarn(LanguageFeature::CruftAfterAmpersand)) { 1308 Say(GetProvenance(p), "missing ! before comment after &"_warn_en_US); 1309 } 1310 } 1311 do { 1312 if (const char *cont{FreeFormContinuationLine(ampersand)}) { 1313 BeginSourceLine(cont); 1314 NextLine(); 1315 return true; 1316 } 1317 } while (SkipCommentLine(ampersand)); 1318 return false; 1319 } 1320 1321 // Implicit line continuation allows a preprocessor macro call with 1322 // arguments to span multiple lines. 1323 bool Prescanner::IsImplicitContinuation() const { 1324 return !inPreprocessorDirective_ && !inCharLiteral_ && isPossibleMacroCall_ && 1325 parenthesisNesting_ > 0 && !IsAtEnd() && 1326 ClassifyLine(nextLine_).kind == LineClassification::Kind::Source; 1327 } 1328 1329 bool Prescanner::Continuation(bool mightNeedFixedFormSpace) { 1330 if (disableSourceContinuation_) { 1331 return false; 1332 } else if (*at_ == '\n' || *at_ == '&') { 1333 if (inFixedForm_) { 1334 return FixedFormContinuation(mightNeedFixedFormSpace); 1335 } else { 1336 return FreeFormContinuation(); 1337 } 1338 } else if (*at_ == '\\' && at_ + 2 == nextLine_ && 1339 backslashFreeFormContinuation_ && !inFixedForm_ && nextLine_ < limit_) { 1340 // cpp-like handling of \ at end of a free form source line 1341 BeginSourceLine(nextLine_); 1342 NextLine(); 1343 return true; 1344 } else { 1345 return false; 1346 } 1347 } 1348 1349 std::optional<Prescanner::LineClassification> 1350 Prescanner::IsFixedFormCompilerDirectiveLine(const char *start) const { 1351 const char *p{start}; 1352 char col1{*p++}; 1353 if (!IsFixedFormCommentChar(col1)) { 1354 return std::nullopt; 1355 } 1356 char sentinel[5], *sp{sentinel}; 1357 int column{2}; 1358 for (; column < 6; ++column, ++p) { 1359 if (*p == ' ' || *p == '\n' || *p == '\t') { 1360 break; 1361 } 1362 if (sp == sentinel + 1 && sentinel[0] == '$' && IsDecimalDigit(*p)) { 1363 // OpenMP conditional compilation line: leave the label alone 1364 break; 1365 } 1366 *sp++ = ToLowerCaseLetter(*p); 1367 } 1368 if (column == 6) { 1369 if (*p == ' ' || *p == '\t' || *p == '0') { 1370 ++p; 1371 } else { 1372 // This is a Continuation line, not an initial directive line. 1373 return std::nullopt; 1374 } 1375 } 1376 if (sp == sentinel) { 1377 return std::nullopt; 1378 } 1379 *sp = '\0'; 1380 if (const char *ss{IsCompilerDirectiveSentinel( 1381 sentinel, static_cast<std::size_t>(sp - sentinel))}) { 1382 std::size_t payloadOffset = p - start; 1383 return {LineClassification{ 1384 LineClassification::Kind::CompilerDirective, payloadOffset, ss}}; 1385 } 1386 return std::nullopt; 1387 } 1388 1389 std::optional<Prescanner::LineClassification> 1390 Prescanner::IsFreeFormCompilerDirectiveLine(const char *start) const { 1391 if (const char *p{SkipWhiteSpace(start)}; p && *p++ == '!') { 1392 if (auto maybePair{IsCompilerDirectiveSentinel(p)}) { 1393 auto offset{static_cast<std::size_t>(maybePair->second - start)}; 1394 return {LineClassification{LineClassification::Kind::CompilerDirective, 1395 offset, maybePair->first}}; 1396 } 1397 } 1398 return std::nullopt; 1399 } 1400 1401 Prescanner &Prescanner::AddCompilerDirectiveSentinel(const std::string &dir) { 1402 std::uint64_t packed{0}; 1403 for (char ch : dir) { 1404 packed = (packed << 8) | (ToLowerCaseLetter(ch) & 0xff); 1405 } 1406 compilerDirectiveBloomFilter_.set(packed % prime1); 1407 compilerDirectiveBloomFilter_.set(packed % prime2); 1408 compilerDirectiveSentinels_.insert(dir); 1409 return *this; 1410 } 1411 1412 const char *Prescanner::IsCompilerDirectiveSentinel( 1413 const char *sentinel, std::size_t len) const { 1414 std::uint64_t packed{0}; 1415 for (std::size_t j{0}; j < len; ++j) { 1416 packed = (packed << 8) | (sentinel[j] & 0xff); 1417 } 1418 if (len == 0 || !compilerDirectiveBloomFilter_.test(packed % prime1) || 1419 !compilerDirectiveBloomFilter_.test(packed % prime2)) { 1420 return nullptr; 1421 } 1422 const auto iter{compilerDirectiveSentinels_.find(std::string(sentinel, len))}; 1423 return iter == compilerDirectiveSentinels_.end() ? nullptr : iter->c_str(); 1424 } 1425 1426 const char *Prescanner::IsCompilerDirectiveSentinel(CharBlock token) const { 1427 const char *p{token.begin()}; 1428 const char *end{p + token.size()}; 1429 while (p < end && (*p == ' ' || *p == '\n')) { 1430 ++p; 1431 } 1432 if (p < end && *p == '!') { 1433 ++p; 1434 } 1435 while (end > p && (end[-1] == ' ' || end[-1] == '\t')) { 1436 --end; 1437 } 1438 return end > p && IsCompilerDirectiveSentinel(p, end - p) ? p : nullptr; 1439 } 1440 1441 std::optional<std::pair<const char *, const char *>> 1442 Prescanner::IsCompilerDirectiveSentinel(const char *p) const { 1443 char sentinel[8]; 1444 for (std::size_t j{0}; j + 1 < sizeof sentinel && *p != '\n'; ++p, ++j) { 1445 if (*p == ' ' || *p == '\t' || *p == '&') { 1446 if (j > 0) { 1447 sentinel[j] = '\0'; 1448 p = SkipWhiteSpace(p + 1); 1449 if (*p != '!') { 1450 if (const char *sp{IsCompilerDirectiveSentinel(sentinel, j)}) { 1451 return std::make_pair(sp, p); 1452 } 1453 } 1454 } 1455 break; 1456 } else { 1457 sentinel[j] = ToLowerCaseLetter(*p); 1458 } 1459 } 1460 return std::nullopt; 1461 } 1462 1463 constexpr bool IsDirective(const char *match, const char *dir) { 1464 for (; *match; ++match) { 1465 if (*match != ToLowerCaseLetter(*dir++)) { 1466 return false; 1467 } 1468 } 1469 return true; 1470 } 1471 1472 Prescanner::LineClassification Prescanner::ClassifyLine( 1473 const char *start) const { 1474 if (inFixedForm_) { 1475 if (std::optional<LineClassification> lc{ 1476 IsFixedFormCompilerDirectiveLine(start)}) { 1477 return std::move(*lc); 1478 } 1479 if (IsFixedFormCommentLine(start)) { 1480 return {LineClassification::Kind::Comment}; 1481 } 1482 } else { 1483 if (std::optional<LineClassification> lc{ 1484 IsFreeFormCompilerDirectiveLine(start)}) { 1485 return std::move(*lc); 1486 } 1487 if (const char *bang{IsFreeFormComment(start)}) { 1488 return {LineClassification::Kind::Comment, 1489 static_cast<std::size_t>(bang - start)}; 1490 } 1491 } 1492 if (std::optional<std::size_t> quoteOffset{IsIncludeLine(start)}) { 1493 return {LineClassification::Kind::IncludeLine, *quoteOffset}; 1494 } 1495 if (const char *dir{IsPreprocessorDirectiveLine(start)}) { 1496 if (IsDirective("if", dir) || IsDirective("elif", dir) || 1497 IsDirective("else", dir) || IsDirective("endif", dir)) { 1498 return {LineClassification::Kind::ConditionalCompilationDirective}; 1499 } else if (IsDirective("include", dir)) { 1500 return {LineClassification::Kind::IncludeDirective}; 1501 } else if (IsDirective("define", dir) || IsDirective("undef", dir)) { 1502 return {LineClassification::Kind::DefinitionDirective}; 1503 } else { 1504 return {LineClassification::Kind::PreprocessorDirective}; 1505 } 1506 } 1507 return {LineClassification::Kind::Source}; 1508 } 1509 1510 Prescanner::LineClassification Prescanner::ClassifyLine( 1511 TokenSequence &tokens, Provenance newlineProvenance) const { 1512 // Append a newline temporarily. 1513 tokens.PutNextTokenChar('\n', newlineProvenance); 1514 tokens.CloseToken(); 1515 const char *ppd{tokens.ToCharBlock().begin()}; 1516 LineClassification classification{ClassifyLine(ppd)}; 1517 tokens.pop_back(); // remove the newline 1518 return classification; 1519 } 1520 1521 void Prescanner::SourceFormChange(std::string &&dir) { 1522 if (dir == "!dir$ free") { 1523 inFixedForm_ = false; 1524 } else if (dir == "!dir$ fixed") { 1525 inFixedForm_ = true; 1526 } 1527 } 1528 1529 // Acquire and append compiler directive continuation lines to 1530 // the tokens that constitute a compiler directive, even when those 1531 // directive continuation lines are the result of macro expansion. 1532 // (Not used when neither the original compiler directive line nor 1533 // the directive continuation line result from preprocessing; regular 1534 // line continuation during tokenization handles that normal case.) 1535 bool Prescanner::CompilerDirectiveContinuation( 1536 TokenSequence &tokens, const char *origSentinel) { 1537 if (inFixedForm_ || tokens.empty() || 1538 tokens.TokenAt(tokens.SizeInTokens() - 1) != "&") { 1539 return false; 1540 } 1541 LineClassification followingLine{ClassifyLine(nextLine_)}; 1542 if (followingLine.kind == LineClassification::Kind::Comment) { 1543 nextLine_ += followingLine.payloadOffset; // advance to '!' or newline 1544 NextLine(); 1545 return true; 1546 } 1547 CHECK(origSentinel != nullptr); 1548 directiveSentinel_ = origSentinel; // so InCompilerDirective() is true 1549 const char *nextContinuation{ 1550 followingLine.kind == LineClassification::Kind::CompilerDirective 1551 ? FreeFormContinuationLine(true) 1552 : nullptr}; 1553 if (!nextContinuation && 1554 followingLine.kind != LineClassification::Kind::Source) { 1555 return false; 1556 } 1557 auto origNextLine{nextLine_}; 1558 BeginSourceLine(nextLine_); 1559 NextLine(); 1560 if (nextContinuation) { 1561 // What follows is !DIR$ & xxx; skip over the & so that it 1562 // doesn't cause a spurious continuation. 1563 at_ = nextContinuation; 1564 } else { 1565 // What follows looks like a source line before macro expansion, 1566 // but might become a directive continuation afterwards. 1567 SkipSpaces(); 1568 } 1569 TokenSequence followingTokens; 1570 while (NextToken(followingTokens)) { 1571 } 1572 if (auto followingPrepro{ 1573 preprocessor_.MacroReplacement(followingTokens, *this)}) { 1574 followingTokens = std::move(*followingPrepro); 1575 } 1576 followingTokens.RemoveRedundantBlanks(); 1577 std::size_t startAt{0}; 1578 std::size_t following{followingTokens.SizeInTokens()}; 1579 bool ok{false}; 1580 if (nextContinuation) { 1581 ok = true; 1582 } else { 1583 startAt = 2; 1584 if (startAt < following && followingTokens.TokenAt(0) == "!") { 1585 CharBlock sentinel{followingTokens.TokenAt(1)}; 1586 if (!sentinel.empty() && 1587 std::memcmp(sentinel.begin(), origSentinel, sentinel.size()) == 0) { 1588 ok = true; 1589 while ( 1590 startAt < following && followingTokens.TokenAt(startAt).IsBlank()) { 1591 ++startAt; 1592 } 1593 if (startAt < following && followingTokens.TokenAt(startAt) == "&") { 1594 ++startAt; 1595 } 1596 } 1597 } 1598 } 1599 if (ok) { 1600 tokens.pop_back(); // delete original '&' 1601 tokens.Put(followingTokens, startAt, following - startAt); 1602 tokens.RemoveRedundantBlanks(); 1603 } else { 1604 nextLine_ = origNextLine; 1605 } 1606 return ok; 1607 } 1608 1609 // Similar, but for source line continuation after macro replacement. 1610 bool Prescanner::SourceLineContinuation(TokenSequence &tokens) { 1611 if (!inFixedForm_ && !tokens.empty() && 1612 tokens.TokenAt(tokens.SizeInTokens() - 1) == "&") { 1613 LineClassification followingLine{ClassifyLine(nextLine_)}; 1614 if (followingLine.kind == LineClassification::Kind::Comment) { 1615 nextLine_ += followingLine.payloadOffset; // advance to '!' or newline 1616 NextLine(); 1617 return true; 1618 } else if (const char *nextContinuation{FreeFormContinuationLine(true)}) { 1619 BeginSourceLine(nextLine_); 1620 NextLine(); 1621 TokenSequence followingTokens; 1622 at_ = nextContinuation; 1623 while (NextToken(followingTokens)) { 1624 } 1625 if (auto followingPrepro{ 1626 preprocessor_.MacroReplacement(followingTokens, *this)}) { 1627 followingTokens = std::move(*followingPrepro); 1628 } 1629 followingTokens.RemoveRedundantBlanks(); 1630 tokens.pop_back(); // delete original '&' 1631 tokens.Put(followingTokens); 1632 return true; 1633 } 1634 } 1635 return false; 1636 } 1637 } // namespace Fortran::parser 1638