1 //===-- lib/Parser/prescan.cpp --------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "prescan.h" 10 #include "flang/Common/idioms.h" 11 #include "flang/Parser/characters.h" 12 #include "flang/Parser/message.h" 13 #include "flang/Parser/preprocessor.h" 14 #include "flang/Parser/source.h" 15 #include "flang/Parser/token-sequence.h" 16 #include "llvm/Support/raw_ostream.h" 17 #include <cstddef> 18 #include <cstring> 19 #include <utility> 20 #include <vector> 21 22 namespace Fortran::parser { 23 24 using common::LanguageFeature; 25 26 static constexpr int maxPrescannerNesting{100}; 27 28 Prescanner::Prescanner(Messages &messages, CookedSource &cooked, 29 Preprocessor &preprocessor, common::LanguageFeatureControl lfc) 30 : messages_{messages}, cooked_{cooked}, preprocessor_{preprocessor}, 31 allSources_{preprocessor_.allSources()}, features_{lfc}, 32 backslashFreeFormContinuation_{preprocessor.AnyDefinitions()}, 33 encoding_{allSources_.encoding()} {} 34 35 Prescanner::Prescanner(const Prescanner &that, bool isNestedInIncludeDirective) 36 : messages_{that.messages_}, cooked_{that.cooked_}, 37 preprocessor_{that.preprocessor_}, allSources_{that.allSources_}, 38 features_{that.features_}, 39 isNestedInIncludeDirective_{isNestedInIncludeDirective}, 40 backslashFreeFormContinuation_{that.backslashFreeFormContinuation_}, 41 inFixedForm_{that.inFixedForm_}, 42 fixedFormColumnLimit_{that.fixedFormColumnLimit_}, 43 encoding_{that.encoding_}, 44 prescannerNesting_{that.prescannerNesting_ + 1}, 45 skipLeadingAmpersand_{that.skipLeadingAmpersand_}, 46 compilerDirectiveBloomFilter_{that.compilerDirectiveBloomFilter_}, 47 compilerDirectiveSentinels_{that.compilerDirectiveSentinels_} {} 48 49 static inline constexpr bool IsFixedFormCommentChar(char ch) { 50 return ch == '!' || ch == '*' || ch == 'C' || ch == 'c'; 51 } 52 53 static void NormalizeCompilerDirectiveCommentMarker(TokenSequence &dir) { 54 char *p{dir.GetMutableCharData()}; 55 char *limit{p + dir.SizeInChars()}; 56 for (; p < limit; ++p) { 57 if (*p != ' ') { 58 CHECK(IsFixedFormCommentChar(*p)); 59 *p = '!'; 60 return; 61 } 62 } 63 DIE("compiler directive all blank"); 64 } 65 66 void Prescanner::Prescan(ProvenanceRange range) { 67 startProvenance_ = range.start(); 68 start_ = allSources_.GetSource(range); 69 CHECK(start_); 70 limit_ = start_ + range.size(); 71 nextLine_ = start_; 72 const bool beganInFixedForm{inFixedForm_}; 73 if (prescannerNesting_ > maxPrescannerNesting) { 74 Say(GetProvenance(start_), 75 "too many nested INCLUDE/#include files, possibly circular"_err_en_US); 76 return; 77 } 78 while (!IsAtEnd()) { 79 Statement(); 80 } 81 if (inFixedForm_ != beganInFixedForm) { 82 std::string dir{"!dir$ "}; 83 if (beganInFixedForm) { 84 dir += "fixed"; 85 } else { 86 dir += "free"; 87 } 88 dir += '\n'; 89 TokenSequence tokens{dir, allSources_.AddCompilerInsertion(dir).start()}; 90 tokens.Emit(cooked_); 91 } 92 } 93 94 void Prescanner::Statement() { 95 TokenSequence tokens; 96 const char *statementStart{nextLine_}; 97 LineClassification line{ClassifyLine(statementStart)}; 98 switch (line.kind) { 99 case LineClassification::Kind::Comment: 100 nextLine_ += line.payloadOffset; // advance to '!' or newline 101 NextLine(); 102 return; 103 case LineClassification::Kind::IncludeLine: 104 FortranInclude(nextLine_ + line.payloadOffset); 105 NextLine(); 106 return; 107 case LineClassification::Kind::ConditionalCompilationDirective: 108 case LineClassification::Kind::DefinitionDirective: 109 case LineClassification::Kind::PreprocessorDirective: 110 preprocessor_.Directive(TokenizePreprocessorDirective(), *this); 111 return; 112 case LineClassification::Kind::IncludeDirective: 113 preprocessor_.Directive(TokenizePreprocessorDirective(), *this); 114 afterIncludeDirective_ = true; 115 return; 116 case LineClassification::Kind::CompilerDirective: { 117 directiveSentinel_ = line.sentinel; 118 CHECK(InCompilerDirective()); 119 BeginStatementAndAdvance(); 120 if (inFixedForm_) { 121 CHECK(IsFixedFormCommentChar(*at_)); 122 } else { 123 while (*at_ == ' ' || *at_ == '\t') { 124 ++at_, ++column_; 125 } 126 CHECK(*at_ == '!'); 127 } 128 std::optional<int> condOffset; 129 if (directiveSentinel_[0] == '$' && directiveSentinel_[1] == '\0') { 130 // OpenMP conditional compilation line. 131 condOffset = 2; 132 } else if (directiveSentinel_[0] == '@' && directiveSentinel_[1] == 'c' && 133 directiveSentinel_[2] == 'u' && directiveSentinel_[3] == 'f' && 134 directiveSentinel_[4] == '\0') { 135 // CUDA conditional compilation line. 136 condOffset = 5; 137 } 138 if (condOffset) { 139 at_ += *condOffset, column_ += *condOffset; 140 if (auto payload{IsIncludeLine(at_)}) { 141 FortranInclude(at_ + *payload); 142 return; 143 } else if (inFixedForm_) { 144 LabelField(tokens); 145 } else { 146 SkipSpaces(); 147 } 148 } else { 149 // Compiler directive. Emit normalized sentinel, squash following spaces. 150 EmitChar(tokens, '!'); 151 ++at_, ++column_; 152 for (const char *sp{directiveSentinel_}; *sp != '\0'; 153 ++sp, ++at_, ++column_) { 154 EmitChar(tokens, *sp); 155 } 156 if (*at_ == ' ' || *at_ == '\t') { 157 EmitChar(tokens, ' '); 158 while (*at_ == ' ' || *at_ == '\t') { 159 ++at_, ++column_; 160 } 161 } 162 tokens.CloseToken(); 163 } 164 break; 165 } 166 case LineClassification::Kind::Source: 167 BeginStatementAndAdvance(); 168 if (inFixedForm_) { 169 if (features_.IsEnabled(LanguageFeature::OldDebugLines) && 170 (*at_ == 'D' || *at_ == 'd')) { 171 NextChar(); 172 } 173 LabelField(tokens); 174 } else if (skipLeadingAmpersand_) { 175 skipLeadingAmpersand_ = false; 176 const char *p{SkipWhiteSpace(at_)}; 177 if (p < limit_ && *p == '&') { 178 column_ += ++p - at_; 179 at_ = p; 180 } 181 } else { 182 SkipSpaces(); 183 // Check for a leading identifier that might be a keyword macro 184 // that will expand to anything indicating a non-source line, like 185 // a comment marker or directive sentinel. If so, disable line 186 // continuation, so that NextToken() won't consume anything from 187 // following lines. 188 if (IsLegalIdentifierStart(*at_)) { 189 CHECK(NextToken(tokens)); 190 CHECK(tokens.SizeInTokens() == 1); 191 CharBlock id{tokens.TokenAt(0)}; 192 if (preprocessor_.IsNameDefined(id) && 193 !preprocessor_.IsFunctionLikeDefinition(id)) { 194 if (auto replaced{preprocessor_.MacroReplacement(tokens, *this)}) { 195 auto newLineClass{ClassifyLine(*replaced, GetCurrentProvenance())}; 196 disableSourceContinuation_ = 197 newLineClass.kind != LineClassification::Kind::Source; 198 if (newLineClass.kind == 199 LineClassification::Kind::CompilerDirective) { 200 directiveSentinel_ = newLineClass.sentinel; 201 } 202 } 203 } 204 } 205 } 206 break; 207 } 208 209 while (NextToken(tokens)) { 210 } 211 if (continuationLines_ > 255) { 212 if (features_.ShouldWarn(common::LanguageFeature::MiscSourceExtensions)) { 213 Say(GetProvenance(statementStart), 214 "%d continuation lines is more than the Fortran standard allows"_port_en_US, 215 continuationLines_); 216 } 217 } 218 219 Provenance newlineProvenance{GetCurrentProvenance()}; 220 if (std::optional<TokenSequence> preprocessed{ 221 preprocessor_.MacroReplacement(tokens, *this)}) { 222 // Reprocess the preprocessed line. 223 LineClassification ppl{ClassifyLine(*preprocessed, newlineProvenance)}; 224 switch (ppl.kind) { 225 case LineClassification::Kind::Comment: 226 break; 227 case LineClassification::Kind::IncludeLine: 228 FortranInclude(preprocessed->TokenAt(0).begin() + ppl.payloadOffset); 229 break; 230 case LineClassification::Kind::ConditionalCompilationDirective: 231 case LineClassification::Kind::IncludeDirective: 232 case LineClassification::Kind::DefinitionDirective: 233 case LineClassification::Kind::PreprocessorDirective: 234 if (features_.ShouldWarn(common::UsageWarning::Preprocessing)) { 235 Say(preprocessed->GetProvenanceRange(), 236 "Preprocessed line resembles a preprocessor directive"_warn_en_US); 237 } 238 CheckAndEmitLine(preprocessed->ToLowerCase(), newlineProvenance); 239 break; 240 case LineClassification::Kind::CompilerDirective: 241 if (preprocessed->HasRedundantBlanks()) { 242 preprocessed->RemoveRedundantBlanks(); 243 } 244 while (CompilerDirectiveContinuation(*preprocessed, ppl.sentinel)) { 245 newlineProvenance = GetCurrentProvenance(); 246 } 247 NormalizeCompilerDirectiveCommentMarker(*preprocessed); 248 preprocessed->ToLowerCase(); 249 SourceFormChange(preprocessed->ToString()); 250 CheckAndEmitLine(preprocessed->ToLowerCase().ClipComment( 251 *this, true /* skip first ! */), 252 newlineProvenance); 253 break; 254 case LineClassification::Kind::Source: 255 if (inFixedForm_) { 256 if (preprocessed->HasBlanks(/*after column*/ 6)) { 257 preprocessed->RemoveBlanks(/*after column*/ 6); 258 } 259 } else { 260 while (SourceLineContinuation(*preprocessed)) { 261 newlineProvenance = GetCurrentProvenance(); 262 } 263 if (preprocessed->HasRedundantBlanks()) { 264 preprocessed->RemoveRedundantBlanks(); 265 } 266 } 267 CheckAndEmitLine( 268 preprocessed->ToLowerCase().ClipComment(*this), newlineProvenance); 269 break; 270 } 271 } else { // no macro replacement 272 if (line.kind == LineClassification::Kind::CompilerDirective) { 273 while (CompilerDirectiveContinuation(tokens, line.sentinel)) { 274 newlineProvenance = GetCurrentProvenance(); 275 } 276 tokens.ToLowerCase(); 277 SourceFormChange(tokens.ToString()); 278 } else { // Kind::Source 279 tokens.ToLowerCase(); 280 if (inFixedForm_) { 281 EnforceStupidEndStatementRules(tokens); 282 } 283 } 284 CheckAndEmitLine(tokens, newlineProvenance); 285 } 286 directiveSentinel_ = nullptr; 287 } 288 289 void Prescanner::CheckAndEmitLine( 290 TokenSequence &tokens, Provenance newlineProvenance) { 291 tokens.CheckBadFortranCharacters( 292 messages_, *this, disableSourceContinuation_); 293 // Parenthesis nesting check does not apply while any #include is 294 // active, nor on the lines before and after a top-level #include. 295 // Applications play shenanigans with line continuation before and 296 // after #include'd subprogram argument lists. 297 if (!isNestedInIncludeDirective_ && !omitNewline_ && 298 !afterIncludeDirective_) { 299 tokens.CheckBadParentheses(messages_); 300 } 301 tokens.Emit(cooked_); 302 if (omitNewline_) { 303 omitNewline_ = false; 304 } else { 305 cooked_.Put('\n', newlineProvenance); 306 afterIncludeDirective_ = false; 307 } 308 } 309 310 TokenSequence Prescanner::TokenizePreprocessorDirective() { 311 CHECK(!IsAtEnd() && !inPreprocessorDirective_); 312 inPreprocessorDirective_ = true; 313 BeginStatementAndAdvance(); 314 TokenSequence tokens; 315 while (NextToken(tokens)) { 316 } 317 inPreprocessorDirective_ = false; 318 return tokens; 319 } 320 321 void Prescanner::NextLine() { 322 void *vstart{static_cast<void *>(const_cast<char *>(nextLine_))}; 323 void *v{std::memchr(vstart, '\n', limit_ - nextLine_)}; 324 if (!v) { 325 nextLine_ = limit_; 326 } else { 327 const char *nl{const_cast<const char *>(static_cast<char *>(v))}; 328 nextLine_ = nl + 1; 329 } 330 } 331 332 void Prescanner::LabelField(TokenSequence &token) { 333 int outCol{1}; 334 const char *start{at_}; 335 std::optional<int> badColumn; 336 for (; *at_ != '\n' && column_ <= 6; ++at_) { 337 if (*at_ == '\t') { 338 ++at_; 339 column_ = 7; 340 break; 341 } 342 if (*at_ != ' ' && 343 !(*at_ == '0' && column_ == 6)) { // '0' in column 6 becomes space 344 EmitChar(token, *at_); 345 ++outCol; 346 if (!badColumn && (column_ == 6 || !IsDecimalDigit(*at_))) { 347 badColumn = column_; 348 } 349 } 350 ++column_; 351 } 352 if (badColumn && !preprocessor_.IsNameDefined(token.CurrentOpenToken())) { 353 if (features_.ShouldWarn(common::UsageWarning::Scanning)) { 354 Say(GetProvenance(start + *badColumn - 1), 355 *badColumn == 6 356 ? "Statement should not begin with a continuation line"_warn_en_US 357 : "Character in fixed-form label field must be a digit"_warn_en_US); 358 } 359 token.clear(); 360 if (*badColumn < 6) { 361 at_ = start; 362 column_ = 1; 363 return; 364 } 365 outCol = 1; 366 } 367 if (outCol == 1) { // empty label field 368 // Emit a space so that, if the line is rescanned after preprocessing, 369 // a leading 'C' or 'D' won't be left-justified and then accidentally 370 // misinterpreted as a comment card. 371 EmitChar(token, ' '); 372 ++outCol; 373 } 374 token.CloseToken(); 375 SkipToNextSignificantCharacter(); 376 if (IsDecimalDigit(*at_)) { 377 if (features_.ShouldWarn(common::LanguageFeature::MiscSourceExtensions)) { 378 Say(GetCurrentProvenance(), 379 "Label digit is not in fixed-form label field"_port_en_US); 380 } 381 } 382 } 383 384 // 6.3.3.5: A program unit END statement, or any other statement whose 385 // initial line resembles an END statement, shall not be continued in 386 // fixed form source. 387 void Prescanner::EnforceStupidEndStatementRules(const TokenSequence &tokens) { 388 CharBlock cBlock{tokens.ToCharBlock()}; 389 const char *str{cBlock.begin()}; 390 std::size_t n{cBlock.size()}; 391 if (n < 3) { 392 return; 393 } 394 std::size_t j{0}; 395 for (; j < n && (str[j] == ' ' || (str[j] >= '0' && str[j] <= '9')); ++j) { 396 } 397 if (j + 3 > n || std::memcmp(str + j, "end", 3) != 0) { 398 return; 399 } 400 // It starts with END, possibly after a label. 401 auto start{allSources_.GetSourcePosition(tokens.GetCharProvenance(j))}; 402 auto end{allSources_.GetSourcePosition(tokens.GetCharProvenance(n - 1))}; 403 if (!start || !end) { 404 return; 405 } 406 if (&*start->sourceFile == &*end->sourceFile && start->line == end->line) { 407 return; // no continuation 408 } 409 j += 3; 410 static const char *const prefixes[]{"program", "subroutine", "function", 411 "blockdata", "module", "submodule", nullptr}; 412 bool isPrefix{j == n || !IsLegalInIdentifier(str[j])}; // prefix is END 413 std::size_t endOfPrefix{j - 1}; 414 for (const char *const *p{prefixes}; *p; ++p) { 415 std::size_t pLen{std::strlen(*p)}; 416 if (j + pLen <= n && std::memcmp(str + j, *p, pLen) == 0) { 417 isPrefix = true; // END thing as prefix 418 j += pLen; 419 endOfPrefix = j - 1; 420 for (; j < n && IsLegalInIdentifier(str[j]); ++j) { 421 } 422 break; 423 } 424 } 425 if (isPrefix) { 426 auto range{tokens.GetTokenProvenanceRange(1)}; 427 if (j == n) { // END or END thing [name] 428 Say(range, 429 "Program unit END statement may not be continued in fixed form source"_err_en_US); 430 } else { 431 auto endOfPrefixPos{ 432 allSources_.GetSourcePosition(tokens.GetCharProvenance(endOfPrefix))}; 433 auto next{allSources_.GetSourcePosition(tokens.GetCharProvenance(j))}; 434 if (endOfPrefixPos && next && 435 &*endOfPrefixPos->sourceFile == &*start->sourceFile && 436 endOfPrefixPos->line == start->line && 437 (&*next->sourceFile != &*start->sourceFile || 438 next->line != start->line)) { 439 Say(range, 440 "Initial line of continued statement must not appear to be a program unit END in fixed form source"_err_en_US); 441 } 442 } 443 } 444 } 445 446 void Prescanner::SkipToEndOfLine() { 447 while (*at_ != '\n') { 448 ++at_, ++column_; 449 } 450 } 451 452 bool Prescanner::MustSkipToEndOfLine() const { 453 if (inFixedForm_ && column_ > fixedFormColumnLimit_ && !tabInCurrentLine_) { 454 return true; // skip over ignored columns in right margin (73:80) 455 } else if (*at_ == '!' && !inCharLiteral_) { 456 return true; // inline comment goes to end of source line 457 } else { 458 return false; 459 } 460 } 461 462 void Prescanner::NextChar() { 463 CHECK(*at_ != '\n'); 464 ++at_, ++column_; 465 while (at_[0] == '\xef' && at_[1] == '\xbb' && at_[2] == '\xbf') { 466 // UTF-8 byte order mark - treat this file as UTF-8 467 at_ += 3; 468 encoding_ = Encoding::UTF_8; 469 } 470 SkipToNextSignificantCharacter(); 471 } 472 473 // Skip everything that should be ignored until the next significant 474 // character is reached; handles C-style comments in preprocessing 475 // directives, Fortran ! comments, stuff after the right margin in 476 // fixed form, and all forms of line continuation. 477 bool Prescanner::SkipToNextSignificantCharacter() { 478 auto anyContinuationLine{false}; 479 if (inPreprocessorDirective_) { 480 SkipCComments(); 481 } else { 482 bool mightNeedSpace{false}; 483 if (MustSkipToEndOfLine()) { 484 SkipToEndOfLine(); 485 } else { 486 mightNeedSpace = *at_ == '\n'; 487 } 488 for (; Continuation(mightNeedSpace); mightNeedSpace = false) { 489 anyContinuationLine = true; 490 ++continuationLines_; 491 if (MustSkipToEndOfLine()) { 492 SkipToEndOfLine(); 493 } 494 } 495 if (*at_ == '\t') { 496 tabInCurrentLine_ = true; 497 } 498 } 499 return anyContinuationLine; 500 } 501 502 void Prescanner::SkipCComments() { 503 while (true) { 504 if (IsCComment(at_)) { 505 if (const char *after{SkipCComment(at_)}) { 506 column_ += after - at_; 507 // May have skipped over one or more newlines; relocate the start of 508 // the next line. 509 nextLine_ = at_ = after; 510 NextLine(); 511 } else { 512 // Don't emit any messages about unclosed C-style comments, because 513 // the sequence /* can appear legally in a FORMAT statement. There's 514 // no ambiguity, since the sequence */ cannot appear legally. 515 break; 516 } 517 } else if (inPreprocessorDirective_ && at_[0] == '\\' && at_ + 2 < limit_ && 518 at_[1] == '\n' && !IsAtEnd()) { 519 BeginSourceLineAndAdvance(); 520 } else { 521 break; 522 } 523 } 524 } 525 526 void Prescanner::SkipSpaces() { 527 while (*at_ == ' ' || *at_ == '\t') { 528 NextChar(); 529 } 530 insertASpace_ = false; 531 } 532 533 const char *Prescanner::SkipWhiteSpace(const char *p) { 534 while (*p == ' ' || *p == '\t') { 535 ++p; 536 } 537 return p; 538 } 539 540 const char *Prescanner::SkipWhiteSpaceAndCComments(const char *p) const { 541 while (true) { 542 if (*p == ' ' || *p == '\t') { 543 ++p; 544 } else if (IsCComment(p)) { 545 if (const char *after{SkipCComment(p)}) { 546 p = after; 547 } else { 548 break; 549 } 550 } else { 551 break; 552 } 553 } 554 return p; 555 } 556 557 const char *Prescanner::SkipCComment(const char *p) const { 558 char star{' '}, slash{' '}; 559 p += 2; 560 while (star != '*' || slash != '/') { 561 if (p >= limit_) { 562 return nullptr; // signifies an unterminated comment 563 } 564 star = slash; 565 slash = *p++; 566 } 567 return p; 568 } 569 570 bool Prescanner::NextToken(TokenSequence &tokens) { 571 CHECK(at_ >= start_ && at_ < limit_); 572 if (InFixedFormSource()) { 573 SkipSpaces(); 574 } else { 575 if (*at_ == '/' && IsCComment(at_)) { 576 // Recognize and skip over classic C style /*comments*/ when 577 // outside a character literal. 578 if (features_.ShouldWarn(LanguageFeature::ClassicCComments)) { 579 Say(GetCurrentProvenance(), 580 "nonstandard usage: C-style comment"_port_en_US); 581 } 582 SkipCComments(); 583 } 584 if (*at_ == ' ' || *at_ == '\t') { 585 // Compress free-form white space into a single space character. 586 const auto theSpace{at_}; 587 char previous{at_ <= start_ ? ' ' : at_[-1]}; 588 NextChar(); 589 SkipSpaces(); 590 if (*at_ == '\n') { 591 // Discard white space at the end of a line. 592 } else if (!inPreprocessorDirective_ && 593 (previous == '(' || *at_ == '(' || *at_ == ')')) { 594 // Discard white space before/after '(' and before ')', unless in a 595 // preprocessor directive. This helps yield space-free contiguous 596 // names for generic interfaces like OPERATOR( + ) and 597 // READ ( UNFORMATTED ), without misinterpreting #define f (notAnArg). 598 // This has the effect of silently ignoring the illegal spaces in 599 // the array constructor ( /1,2/ ) but that seems benign; it's 600 // hard to avoid that while still removing spaces from OPERATOR( / ) 601 // and OPERATOR( // ). 602 } else { 603 // Preserve the squashed white space as a single space character. 604 tokens.PutNextTokenChar(' ', GetProvenance(theSpace)); 605 tokens.CloseToken(); 606 return true; 607 } 608 } 609 } 610 if (insertASpace_) { 611 tokens.PutNextTokenChar(' ', spaceProvenance_); 612 insertASpace_ = false; 613 } 614 if (*at_ == '\n') { 615 return false; 616 } 617 const char *start{at_}; 618 if (*at_ == '\'' || *at_ == '"') { 619 QuotedCharacterLiteral(tokens, start); 620 preventHollerith_ = false; 621 } else if (IsDecimalDigit(*at_)) { 622 int n{0}, digits{0}; 623 static constexpr int maxHollerith{256 /*lines*/ * (132 - 6 /*columns*/)}; 624 do { 625 if (n < maxHollerith) { 626 n = 10 * n + DecimalDigitValue(*at_); 627 } 628 EmitCharAndAdvance(tokens, *at_); 629 ++digits; 630 if (InFixedFormSource()) { 631 SkipSpaces(); 632 } 633 } while (IsDecimalDigit(*at_)); 634 if ((*at_ == 'h' || *at_ == 'H') && n > 0 && n < maxHollerith && 635 !preventHollerith_) { 636 Hollerith(tokens, n, start); 637 } else if (*at_ == '.') { 638 while (IsDecimalDigit(EmitCharAndAdvance(tokens, *at_))) { 639 } 640 ExponentAndKind(tokens); 641 } else if (ExponentAndKind(tokens)) { 642 } else if (digits == 1 && n == 0 && (*at_ == 'x' || *at_ == 'X') && 643 inPreprocessorDirective_) { 644 do { 645 EmitCharAndAdvance(tokens, *at_); 646 } while (IsHexadecimalDigit(*at_)); 647 } else if (at_[0] == '_' && (at_[1] == '\'' || at_[1] == '"')) { // 4_"..." 648 EmitCharAndAdvance(tokens, *at_); 649 QuotedCharacterLiteral(tokens, start); 650 } else if (IsLetter(*at_) && !preventHollerith_ && 651 parenthesisNesting_ > 0) { 652 // Handles FORMAT(3I9HHOLLERITH) by skipping over the first I so that 653 // we don't misrecognize I9HOLLERITH as an identifier in the next case. 654 EmitCharAndAdvance(tokens, *at_); 655 } 656 preventHollerith_ = false; 657 } else if (*at_ == '.') { 658 char nch{EmitCharAndAdvance(tokens, '.')}; 659 if (!inPreprocessorDirective_ && IsDecimalDigit(nch)) { 660 while (IsDecimalDigit(EmitCharAndAdvance(tokens, *at_))) { 661 } 662 ExponentAndKind(tokens); 663 } else if (nch == '.' && EmitCharAndAdvance(tokens, '.') == '.') { 664 EmitCharAndAdvance(tokens, '.'); // variadic macro definition ellipsis 665 } 666 preventHollerith_ = false; 667 } else if (IsLegalInIdentifier(*at_)) { 668 int parts{1}; 669 const char *afterLast{nullptr}; 670 do { 671 EmitChar(tokens, *at_); 672 ++at_, ++column_; 673 afterLast = at_; 674 if (SkipToNextSignificantCharacter() && IsLegalIdentifierStart(*at_)) { 675 tokens.CloseToken(); 676 ++parts; 677 } 678 } while (IsLegalInIdentifier(*at_)); 679 if (parts >= 3) { 680 // Subtlety: When an identifier is split across three or more continuation 681 // lines (or two continuation lines, immediately preceded or followed 682 // by '&' free form continuation line markers, its parts are kept as 683 // distinct pp-tokens so that macro operates on them independently. 684 // This trick accommodates the historic practice of using line 685 // continuation for token pasting after replacement. 686 } else if (parts == 2) { 687 if ((start > start_ && start[-1] == '&') || 688 (afterLast < limit_ && (*afterLast == '&' || *afterLast == '\n'))) { 689 // call & call foo& call foo& 690 // &MACRO& OR &MACRO& OR &MACRO 691 // &foo(...) &(...) 692 } else { 693 tokens.ReopenLastToken(); 694 } 695 } 696 if (InFixedFormSource()) { 697 SkipSpaces(); 698 } 699 if ((*at_ == '\'' || *at_ == '"') && 700 tokens.CharAt(tokens.SizeInChars() - 1) == '_') { // kind_"..." 701 QuotedCharacterLiteral(tokens, start); 702 preventHollerith_ = false; 703 } else { 704 preventHollerith_ = true; // DO 10 H = ... 705 } 706 } else if (*at_ == '*') { 707 if (EmitCharAndAdvance(tokens, '*') == '*') { 708 EmitCharAndAdvance(tokens, '*'); 709 } else { 710 // Subtle ambiguity: 711 // CHARACTER*2H declares H because *2 is a kind specifier 712 // DATAC/N*2H / is repeated Hollerith 713 preventHollerith_ = !slashInCurrentStatement_; 714 } 715 } else { 716 char ch{*at_}; 717 if (ch == '(') { 718 if (parenthesisNesting_++ == 0) { 719 isPossibleMacroCall_ = tokens.SizeInTokens() > 0 && 720 preprocessor_.IsFunctionLikeDefinition( 721 tokens.TokenAt(tokens.SizeInTokens() - 1)); 722 } 723 } else if (ch == ')' && parenthesisNesting_ > 0) { 724 --parenthesisNesting_; 725 } 726 char nch{EmitCharAndAdvance(tokens, ch)}; 727 preventHollerith_ = false; 728 if ((nch == '=' && 729 (ch == '<' || ch == '>' || ch == '/' || ch == '=' || ch == '!')) || 730 (ch == nch && 731 (ch == '/' || ch == ':' || ch == '*' || ch == '#' || ch == '&' || 732 ch == '|' || ch == '<' || ch == '>')) || 733 (ch == '=' && nch == '>')) { 734 // token comprises two characters 735 EmitCharAndAdvance(tokens, nch); 736 } else if (ch == '/') { 737 slashInCurrentStatement_ = true; 738 } else if (ch == ';' && InFixedFormSource()) { 739 SkipSpaces(); 740 if (IsDecimalDigit(*at_)) { 741 if (features_.ShouldWarn( 742 common::LanguageFeature::MiscSourceExtensions)) { 743 Say(GetProvenanceRange(at_, at_ + 1), 744 "Label should be in the label field"_port_en_US); 745 } 746 } 747 } 748 } 749 tokens.CloseToken(); 750 return true; 751 } 752 753 bool Prescanner::ExponentAndKind(TokenSequence &tokens) { 754 char ed{ToLowerCaseLetter(*at_)}; 755 if (ed != 'e' && ed != 'd') { 756 return false; 757 } 758 EmitCharAndAdvance(tokens, ed); 759 if (*at_ == '+' || *at_ == '-') { 760 EmitCharAndAdvance(tokens, *at_); 761 } 762 while (IsDecimalDigit(*at_)) { 763 EmitCharAndAdvance(tokens, *at_); 764 } 765 if (*at_ == '_') { 766 while (IsLegalInIdentifier(EmitCharAndAdvance(tokens, *at_))) { 767 } 768 } 769 return true; 770 } 771 772 void Prescanner::QuotedCharacterLiteral( 773 TokenSequence &tokens, const char *start) { 774 char quote{*at_}; 775 const char *end{at_ + 1}; 776 inCharLiteral_ = true; 777 continuationInCharLiteral_ = true; 778 const auto emit{[&](char ch) { EmitChar(tokens, ch); }}; 779 const auto insert{[&](char ch) { EmitInsertedChar(tokens, ch); }}; 780 bool isEscaped{false}; 781 bool escapesEnabled{features_.IsEnabled(LanguageFeature::BackslashEscapes)}; 782 while (true) { 783 if (*at_ == '\\') { 784 if (escapesEnabled) { 785 isEscaped = !isEscaped; 786 } else { 787 // The parser always processes escape sequences, so don't confuse it 788 // when escapes are disabled. 789 insert('\\'); 790 } 791 } else { 792 isEscaped = false; 793 } 794 EmitQuotedChar(static_cast<unsigned char>(*at_), emit, insert, false, 795 Encoding::LATIN_1); 796 while (PadOutCharacterLiteral(tokens)) { 797 } 798 if (*at_ == '\n') { 799 if (!inPreprocessorDirective_) { 800 Say(GetProvenanceRange(start, end), 801 "Incomplete character literal"_err_en_US); 802 } 803 break; 804 } 805 end = at_ + 1; 806 NextChar(); 807 if (*at_ == quote && !isEscaped) { 808 // A doubled unescaped quote mark becomes a single instance of that 809 // quote character in the literal (later). There can be spaces between 810 // the quotes in fixed form source. 811 EmitChar(tokens, quote); 812 inCharLiteral_ = false; // for cases like print *, '...'!comment 813 NextChar(); 814 if (InFixedFormSource()) { 815 SkipSpaces(); 816 } 817 if (*at_ != quote) { 818 break; 819 } 820 inCharLiteral_ = true; 821 } 822 } 823 continuationInCharLiteral_ = false; 824 inCharLiteral_ = false; 825 } 826 827 void Prescanner::Hollerith( 828 TokenSequence &tokens, int count, const char *start) { 829 inCharLiteral_ = true; 830 CHECK(*at_ == 'h' || *at_ == 'H'); 831 EmitChar(tokens, 'H'); 832 while (count-- > 0) { 833 if (PadOutCharacterLiteral(tokens)) { 834 } else if (*at_ == '\n') { 835 if (features_.ShouldWarn(common::UsageWarning::Scanning)) { 836 Say(GetProvenanceRange(start, at_), 837 "Possible truncated Hollerith literal"_warn_en_US); 838 } 839 break; 840 } else { 841 NextChar(); 842 // Each multi-byte character encoding counts as a single character. 843 // No escape sequences are recognized. 844 // Hollerith is always emitted to the cooked character 845 // stream in UTF-8. 846 DecodedCharacter decoded{DecodeCharacter( 847 encoding_, at_, static_cast<std::size_t>(limit_ - at_), false)}; 848 if (decoded.bytes > 0) { 849 EncodedCharacter utf8{ 850 EncodeCharacter<Encoding::UTF_8>(decoded.codepoint)}; 851 for (int j{0}; j < utf8.bytes; ++j) { 852 EmitChar(tokens, utf8.buffer[j]); 853 } 854 at_ += decoded.bytes - 1; 855 } else { 856 Say(GetProvenanceRange(start, at_), 857 "Bad character in Hollerith literal"_err_en_US); 858 break; 859 } 860 } 861 } 862 if (*at_ != '\n') { 863 NextChar(); 864 } 865 inCharLiteral_ = false; 866 } 867 868 // In fixed form, source card images must be processed as if they were at 869 // least 72 columns wide, at least in character literal contexts. 870 bool Prescanner::PadOutCharacterLiteral(TokenSequence &tokens) { 871 while (inFixedForm_ && !tabInCurrentLine_ && at_[1] == '\n') { 872 if (column_ < fixedFormColumnLimit_) { 873 tokens.PutNextTokenChar(' ', spaceProvenance_); 874 ++column_; 875 return true; 876 } 877 if (!FixedFormContinuation(false /*no need to insert space*/) || 878 tabInCurrentLine_) { 879 return false; 880 } 881 CHECK(column_ == 7); 882 --at_; // point to column 6 of continuation line 883 column_ = 6; 884 } 885 return false; 886 } 887 888 static bool IsAtProcess(const char *p) { 889 static const char pAtProc[]{"process"}; 890 for (std::size_t i{0}; i < sizeof pAtProc - 1; ++i) { 891 if (ToLowerCaseLetter(*++p) != pAtProc[i]) 892 return false; 893 } 894 return true; 895 } 896 897 bool Prescanner::IsFixedFormCommentLine(const char *start) const { 898 const char *p{start}; 899 900 // The @process directive must start in column 1. 901 if (*p == '@' && IsAtProcess(p)) { 902 return true; 903 } 904 905 if (IsFixedFormCommentChar(*p) || *p == '%' || // VAX %list, %eject, &c. 906 ((*p == 'D' || *p == 'd') && 907 !features_.IsEnabled(LanguageFeature::OldDebugLines))) { 908 return true; 909 } 910 bool anyTabs{false}; 911 while (true) { 912 if (*p == ' ') { 913 ++p; 914 } else if (*p == '\t') { 915 anyTabs = true; 916 ++p; 917 } else if (*p == '0' && !anyTabs && p == start + 5) { 918 ++p; // 0 in column 6 must treated as a space 919 } else { 920 break; 921 } 922 } 923 if (!anyTabs && p >= start + fixedFormColumnLimit_) { 924 return true; 925 } 926 if (*p == '!' && !inCharLiteral_ && (anyTabs || p != start + 5)) { 927 return true; 928 } 929 return *p == '\n'; 930 } 931 932 const char *Prescanner::IsFreeFormComment(const char *p) const { 933 p = SkipWhiteSpaceAndCComments(p); 934 if (*p == '!' || *p == '\n') { 935 return p; 936 } else if (*p == '@') { 937 return IsAtProcess(p) ? p : nullptr; 938 } else { 939 return nullptr; 940 } 941 } 942 943 std::optional<std::size_t> Prescanner::IsIncludeLine(const char *start) const { 944 const char *p{SkipWhiteSpace(start)}; 945 if (*p == '0' && inFixedForm_ && p == start + 5) { 946 // Accept " 0INCLUDE" in fixed form. 947 p = SkipWhiteSpace(p + 1); 948 } 949 for (const char *q{"include"}; *q; ++q) { 950 if (ToLowerCaseLetter(*p) != *q) { 951 return std::nullopt; 952 } 953 p = SkipWhiteSpace(p + 1); 954 } 955 if (IsDecimalDigit(*p)) { // accept & ignore a numeric kind prefix 956 for (p = SkipWhiteSpace(p + 1); IsDecimalDigit(*p); 957 p = SkipWhiteSpace(p + 1)) { 958 } 959 if (*p != '_') { 960 return std::nullopt; 961 } 962 p = SkipWhiteSpace(p + 1); 963 } 964 if (*p == '"' || *p == '\'') { 965 return {p - start}; 966 } 967 return std::nullopt; 968 } 969 970 void Prescanner::FortranInclude(const char *firstQuote) { 971 const char *p{firstQuote}; 972 while (*p != '"' && *p != '\'') { 973 ++p; 974 } 975 char quote{*p}; 976 std::string path; 977 for (++p; *p != '\n'; ++p) { 978 if (*p == quote) { 979 if (p[1] != quote) { 980 break; 981 } 982 ++p; 983 } 984 path += *p; 985 } 986 if (*p != quote) { 987 Say(GetProvenanceRange(firstQuote, p), 988 "malformed path name string"_err_en_US); 989 return; 990 } 991 p = SkipWhiteSpace(p + 1); 992 if (*p != '\n' && *p != '!') { 993 const char *garbage{p}; 994 for (; *p != '\n' && *p != '!'; ++p) { 995 } 996 if (features_.ShouldWarn(common::UsageWarning::Scanning)) { 997 Say(GetProvenanceRange(garbage, p), 998 "excess characters after path name"_warn_en_US); 999 } 1000 } 1001 std::string buf; 1002 llvm::raw_string_ostream error{buf}; 1003 Provenance provenance{GetProvenance(nextLine_)}; 1004 std::optional<std::string> prependPath; 1005 if (const SourceFile * currentFile{allSources_.GetSourceFile(provenance)}) { 1006 prependPath = DirectoryName(currentFile->path()); 1007 } 1008 const SourceFile *included{ 1009 allSources_.Open(path, error, std::move(prependPath))}; 1010 if (!included) { 1011 Say(provenance, "INCLUDE: %s"_err_en_US, error.str()); 1012 } else if (included->bytes() > 0) { 1013 ProvenanceRange includeLineRange{ 1014 provenance, static_cast<std::size_t>(p - nextLine_)}; 1015 ProvenanceRange fileRange{ 1016 allSources_.AddIncludedFile(*included, includeLineRange)}; 1017 Prescanner{*this, /*isNestedInIncludeDirective=*/false} 1018 .set_encoding(included->encoding()) 1019 .Prescan(fileRange); 1020 } 1021 } 1022 1023 const char *Prescanner::IsPreprocessorDirectiveLine(const char *start) const { 1024 const char *p{start}; 1025 for (; *p == ' '; ++p) { 1026 } 1027 if (*p == '#') { 1028 if (inFixedForm_ && p == start + 5) { 1029 return nullptr; 1030 } 1031 } else { 1032 p = SkipWhiteSpace(p); 1033 if (*p != '#') { 1034 return nullptr; 1035 } 1036 } 1037 return SkipWhiteSpace(p + 1); 1038 } 1039 1040 bool Prescanner::IsNextLinePreprocessorDirective() const { 1041 return IsPreprocessorDirectiveLine(nextLine_) != nullptr; 1042 } 1043 1044 bool Prescanner::SkipCommentLine(bool afterAmpersand) { 1045 if (IsAtEnd()) { 1046 if (afterAmpersand && prescannerNesting_ > 0) { 1047 // A continuation marker at the end of the last line in an 1048 // include file inhibits the newline for that line. 1049 SkipToEndOfLine(); 1050 omitNewline_ = true; 1051 } 1052 return false; 1053 } 1054 auto lineClass{ClassifyLine(nextLine_)}; 1055 if (lineClass.kind == LineClassification::Kind::Comment) { 1056 NextLine(); 1057 return true; 1058 } else if (inPreprocessorDirective_) { 1059 return false; 1060 } else if (lineClass.kind == 1061 LineClassification::Kind::ConditionalCompilationDirective || 1062 lineClass.kind == LineClassification::Kind::PreprocessorDirective) { 1063 // Allow conditional compilation directives (e.g., #ifdef) to affect 1064 // continuation lines. 1065 // Allow other preprocessor directives, too, except #include 1066 // (when it does not follow '&'), #define, and #undef (because 1067 // they cannot be allowed to affect preceding text on a 1068 // continued line). 1069 preprocessor_.Directive(TokenizePreprocessorDirective(), *this); 1070 return true; 1071 } else if (afterAmpersand && 1072 (lineClass.kind == LineClassification::Kind::IncludeDirective || 1073 lineClass.kind == LineClassification::Kind::IncludeLine)) { 1074 SkipToEndOfLine(); 1075 omitNewline_ = true; 1076 skipLeadingAmpersand_ = true; 1077 return false; 1078 } else { 1079 return false; 1080 } 1081 } 1082 1083 const char *Prescanner::FixedFormContinuationLine(bool mightNeedSpace) { 1084 if (IsAtEnd()) { 1085 return nullptr; 1086 } 1087 tabInCurrentLine_ = false; 1088 char col1{*nextLine_}; 1089 if (IsFixedFormCommentChar(col1)) { 1090 int j{1}; 1091 if (InCompilerDirective()) { 1092 // Must be a continued compiler directive. 1093 for (; j < 5; ++j) { 1094 char ch{directiveSentinel_[j - 1]}; 1095 if (ch == '\0') { 1096 break; 1097 } 1098 if (ch != ToLowerCaseLetter(nextLine_[j])) { 1099 return nullptr; 1100 } 1101 } 1102 } else if (features_.IsEnabled(LanguageFeature::OpenMP)) { 1103 // Fixed Source Form Conditional Compilation Sentinels. 1104 if (nextLine_[1] != '$') { 1105 return nullptr; 1106 } 1107 j++; 1108 } else { 1109 return nullptr; 1110 } 1111 for (; j < 5; ++j) { 1112 if (nextLine_[j] != ' ') { 1113 return nullptr; 1114 } 1115 } 1116 char col6{nextLine_[5]}; 1117 if (col6 != '\n' && col6 != '\t' && col6 != ' ' && col6 != '0') { 1118 if (nextLine_[6] != ' ' && mightNeedSpace) { 1119 insertASpace_ = true; 1120 } 1121 return nextLine_ + 6; 1122 } 1123 return nullptr; 1124 } else { 1125 // Normal case: not in a compiler directive. 1126 if (col1 == '&' && 1127 features_.IsEnabled( 1128 LanguageFeature::FixedFormContinuationWithColumn1Ampersand)) { 1129 // Extension: '&' as continuation marker 1130 if (features_.ShouldWarn( 1131 LanguageFeature::FixedFormContinuationWithColumn1Ampersand)) { 1132 Say(GetProvenance(nextLine_), "nonstandard usage"_port_en_US); 1133 } 1134 return nextLine_ + 1; 1135 } 1136 if (col1 == '\t' && nextLine_[1] >= '1' && nextLine_[1] <= '9') { 1137 tabInCurrentLine_ = true; 1138 return nextLine_ + 2; // VAX extension 1139 } 1140 if ((col1 == ' ' || 1141 ((col1 == 'D' || col1 == 'd') && 1142 features_.IsEnabled(LanguageFeature::OldDebugLines))) && 1143 nextLine_[1] == ' ' && nextLine_[2] == ' ' && nextLine_[3] == ' ' && 1144 nextLine_[4] == ' ') { 1145 char col6{nextLine_[5]}; 1146 if (col6 != '\n' && col6 != '\t' && col6 != ' ' && col6 != '0') { 1147 if ((col6 == 'i' || col6 == 'I') && IsIncludeLine(nextLine_)) { 1148 // It's An INCLUDE line, not a continuation 1149 } else { 1150 return nextLine_ + 6; 1151 } 1152 } 1153 } 1154 if (IsImplicitContinuation()) { 1155 return nextLine_; 1156 } 1157 } 1158 return nullptr; // not a continuation line 1159 } 1160 1161 const char *Prescanner::FreeFormContinuationLine(bool ampersand) { 1162 const char *p{nextLine_}; 1163 if (p >= limit_) { 1164 return nullptr; 1165 } 1166 p = SkipWhiteSpace(p); 1167 if (InCompilerDirective()) { 1168 if (*p++ != '!') { 1169 return nullptr; 1170 } 1171 for (const char *s{directiveSentinel_}; *s != '\0'; ++p, ++s) { 1172 if (*s != ToLowerCaseLetter(*p)) { 1173 return nullptr; 1174 } 1175 } 1176 p = SkipWhiteSpace(p); 1177 if (*p == '&') { 1178 if (!ampersand) { 1179 insertASpace_ = true; 1180 } 1181 return p + 1; 1182 } else if (ampersand) { 1183 return p; 1184 } else { 1185 return nullptr; 1186 } 1187 } else { 1188 if (*p == '&') { 1189 return p + 1; 1190 } else if (*p == '!' || *p == '\n' || *p == '#') { 1191 return nullptr; 1192 } else if (ampersand || IsImplicitContinuation()) { 1193 if (continuationInCharLiteral_) { 1194 // 'a'& -> 'a''b' == "a'b" 1195 // 'b' 1196 if (features_.ShouldWarn( 1197 common::LanguageFeature::MiscSourceExtensions)) { 1198 Say(GetProvenanceRange(p, p + 1), 1199 "Character literal continuation line should have been preceded by '&'"_port_en_US); 1200 } 1201 } else if (p > nextLine_) { 1202 --p; 1203 } else { 1204 insertASpace_ = true; 1205 } 1206 return p; 1207 } else { 1208 return nullptr; 1209 } 1210 } 1211 } 1212 1213 bool Prescanner::FixedFormContinuation(bool mightNeedSpace) { 1214 // N.B. We accept '&' as a continuation indicator in fixed form, too, 1215 // but not in a character literal. 1216 if (*at_ == '&' && inCharLiteral_) { 1217 return false; 1218 } 1219 do { 1220 if (const char *cont{FixedFormContinuationLine(mightNeedSpace)}) { 1221 BeginSourceLine(cont); 1222 column_ = 7; 1223 NextLine(); 1224 return true; 1225 } 1226 } while (SkipCommentLine(false /* not after ampersand */)); 1227 return false; 1228 } 1229 1230 bool Prescanner::FreeFormContinuation() { 1231 const char *p{at_}; 1232 bool ampersand{*p == '&'}; 1233 if (ampersand) { 1234 p = SkipWhiteSpace(p + 1); 1235 } 1236 if (*p != '\n') { 1237 if (inCharLiteral_) { 1238 return false; 1239 } else if (*p == '!') { // & ! comment - ok 1240 } else if (ampersand && isPossibleMacroCall_ && (*p == ',' || *p == ')')) { 1241 return false; // allow & at end of a macro argument 1242 } else if (features_.ShouldWarn(LanguageFeature::CruftAfterAmpersand)) { 1243 Say(GetProvenance(p), "missing ! before comment after &"_warn_en_US); 1244 } 1245 } 1246 do { 1247 if (const char *cont{FreeFormContinuationLine(ampersand)}) { 1248 BeginSourceLine(cont); 1249 NextLine(); 1250 return true; 1251 } 1252 } while (SkipCommentLine(ampersand)); 1253 return false; 1254 } 1255 1256 // Implicit line continuation allows a preprocessor macro call with 1257 // arguments to span multiple lines. 1258 bool Prescanner::IsImplicitContinuation() const { 1259 return !inPreprocessorDirective_ && !inCharLiteral_ && isPossibleMacroCall_ && 1260 parenthesisNesting_ > 0 && !IsAtEnd() && 1261 ClassifyLine(nextLine_).kind == LineClassification::Kind::Source; 1262 } 1263 1264 bool Prescanner::Continuation(bool mightNeedFixedFormSpace) { 1265 if (disableSourceContinuation_) { 1266 return false; 1267 } else if (*at_ == '\n' || *at_ == '&') { 1268 if (inFixedForm_) { 1269 return FixedFormContinuation(mightNeedFixedFormSpace); 1270 } else { 1271 return FreeFormContinuation(); 1272 } 1273 } else if (*at_ == '\\' && at_ + 2 == nextLine_ && 1274 backslashFreeFormContinuation_ && !inFixedForm_ && nextLine_ < limit_) { 1275 // cpp-like handling of \ at end of a free form source line 1276 BeginSourceLine(nextLine_); 1277 NextLine(); 1278 return true; 1279 } else { 1280 return false; 1281 } 1282 } 1283 1284 std::optional<Prescanner::LineClassification> 1285 Prescanner::IsFixedFormCompilerDirectiveLine(const char *start) const { 1286 const char *p{start}; 1287 char col1{*p++}; 1288 if (!IsFixedFormCommentChar(col1)) { 1289 return std::nullopt; 1290 } 1291 char sentinel[5], *sp{sentinel}; 1292 int column{2}; 1293 for (; column < 6; ++column, ++p) { 1294 if (*p == ' ' || *p == '\n' || *p == '\t') { 1295 break; 1296 } 1297 if (sp == sentinel + 1 && sentinel[0] == '$' && IsDecimalDigit(*p)) { 1298 // OpenMP conditional compilation line: leave the label alone 1299 break; 1300 } 1301 *sp++ = ToLowerCaseLetter(*p); 1302 } 1303 if (column == 6) { 1304 if (*p == ' ' || *p == '\t' || *p == '0') { 1305 ++p; 1306 } else { 1307 // This is a Continuation line, not an initial directive line. 1308 return std::nullopt; 1309 } 1310 } 1311 if (sp == sentinel) { 1312 return std::nullopt; 1313 } 1314 *sp = '\0'; 1315 if (const char *ss{IsCompilerDirectiveSentinel( 1316 sentinel, static_cast<std::size_t>(sp - sentinel))}) { 1317 std::size_t payloadOffset = p - start; 1318 return {LineClassification{ 1319 LineClassification::Kind::CompilerDirective, payloadOffset, ss}}; 1320 } 1321 return std::nullopt; 1322 } 1323 1324 std::optional<Prescanner::LineClassification> 1325 Prescanner::IsFreeFormCompilerDirectiveLine(const char *start) const { 1326 char sentinel[8]; 1327 const char *p{SkipWhiteSpace(start)}; 1328 if (*p++ != '!') { 1329 return std::nullopt; 1330 } 1331 for (std::size_t j{0}; j + 1 < sizeof sentinel; ++p, ++j) { 1332 if (*p == '\n') { 1333 break; 1334 } 1335 if (*p == ' ' || *p == '\t' || *p == '&') { 1336 if (j == 0) { 1337 break; 1338 } 1339 sentinel[j] = '\0'; 1340 p = SkipWhiteSpace(p + 1); 1341 if (*p == '!') { 1342 break; 1343 } 1344 if (const char *sp{IsCompilerDirectiveSentinel(sentinel, j)}) { 1345 std::size_t offset = p - start; 1346 return {LineClassification{ 1347 LineClassification::Kind::CompilerDirective, offset, sp}}; 1348 } 1349 break; 1350 } 1351 sentinel[j] = ToLowerCaseLetter(*p); 1352 } 1353 return std::nullopt; 1354 } 1355 1356 Prescanner &Prescanner::AddCompilerDirectiveSentinel(const std::string &dir) { 1357 std::uint64_t packed{0}; 1358 for (char ch : dir) { 1359 packed = (packed << 8) | (ToLowerCaseLetter(ch) & 0xff); 1360 } 1361 compilerDirectiveBloomFilter_.set(packed % prime1); 1362 compilerDirectiveBloomFilter_.set(packed % prime2); 1363 compilerDirectiveSentinels_.insert(dir); 1364 return *this; 1365 } 1366 1367 const char *Prescanner::IsCompilerDirectiveSentinel( 1368 const char *sentinel, std::size_t len) const { 1369 std::uint64_t packed{0}; 1370 for (std::size_t j{0}; j < len; ++j) { 1371 packed = (packed << 8) | (sentinel[j] & 0xff); 1372 } 1373 if (len == 0 || !compilerDirectiveBloomFilter_.test(packed % prime1) || 1374 !compilerDirectiveBloomFilter_.test(packed % prime2)) { 1375 return nullptr; 1376 } 1377 const auto iter{compilerDirectiveSentinels_.find(std::string(sentinel, len))}; 1378 return iter == compilerDirectiveSentinels_.end() ? nullptr : iter->c_str(); 1379 } 1380 1381 const char *Prescanner::IsCompilerDirectiveSentinel(CharBlock token) const { 1382 const char *p{token.begin()}; 1383 const char *end{p + token.size()}; 1384 while (p < end && (*p == ' ' || *p == '\n')) { 1385 ++p; 1386 } 1387 if (p < end && *p == '!') { 1388 ++p; 1389 } 1390 while (end > p && (end[-1] == ' ' || end[-1] == '\t')) { 1391 --end; 1392 } 1393 return end > p && IsCompilerDirectiveSentinel(p, end - p) ? p : nullptr; 1394 } 1395 1396 constexpr bool IsDirective(const char *match, const char *dir) { 1397 for (; *match; ++match) { 1398 if (*match != ToLowerCaseLetter(*dir++)) { 1399 return false; 1400 } 1401 } 1402 return true; 1403 } 1404 1405 Prescanner::LineClassification Prescanner::ClassifyLine( 1406 const char *start) const { 1407 if (inFixedForm_) { 1408 if (std::optional<LineClassification> lc{ 1409 IsFixedFormCompilerDirectiveLine(start)}) { 1410 return std::move(*lc); 1411 } 1412 if (IsFixedFormCommentLine(start)) { 1413 return {LineClassification::Kind::Comment}; 1414 } 1415 } else { 1416 if (std::optional<LineClassification> lc{ 1417 IsFreeFormCompilerDirectiveLine(start)}) { 1418 return std::move(*lc); 1419 } 1420 if (const char *bang{IsFreeFormComment(start)}) { 1421 return {LineClassification::Kind::Comment, 1422 static_cast<std::size_t>(bang - start)}; 1423 } 1424 } 1425 if (std::optional<std::size_t> quoteOffset{IsIncludeLine(start)}) { 1426 return {LineClassification::Kind::IncludeLine, *quoteOffset}; 1427 } 1428 if (const char *dir{IsPreprocessorDirectiveLine(start)}) { 1429 if (IsDirective("if", dir) || IsDirective("elif", dir) || 1430 IsDirective("else", dir) || IsDirective("endif", dir)) { 1431 return {LineClassification::Kind::ConditionalCompilationDirective}; 1432 } else if (IsDirective("include", dir)) { 1433 return {LineClassification::Kind::IncludeDirective}; 1434 } else if (IsDirective("define", dir) || IsDirective("undef", dir)) { 1435 return {LineClassification::Kind::DefinitionDirective}; 1436 } else { 1437 return {LineClassification::Kind::PreprocessorDirective}; 1438 } 1439 } 1440 return {LineClassification::Kind::Source}; 1441 } 1442 1443 Prescanner::LineClassification Prescanner::ClassifyLine( 1444 TokenSequence &tokens, Provenance newlineProvenance) const { 1445 // Append a newline temporarily. 1446 tokens.PutNextTokenChar('\n', newlineProvenance); 1447 tokens.CloseToken(); 1448 const char *ppd{tokens.ToCharBlock().begin()}; 1449 LineClassification classification{ClassifyLine(ppd)}; 1450 tokens.pop_back(); // remove the newline 1451 return classification; 1452 } 1453 1454 void Prescanner::SourceFormChange(std::string &&dir) { 1455 if (dir == "!dir$ free") { 1456 inFixedForm_ = false; 1457 } else if (dir == "!dir$ fixed") { 1458 inFixedForm_ = true; 1459 } 1460 } 1461 1462 // Acquire and append compiler directive continuation lines to 1463 // the tokens that constitute a compiler directive, even when those 1464 // directive continuation lines are the result of macro expansion. 1465 // (Not used when neither the original compiler directive line nor 1466 // the directive continuation line result from preprocessing; regular 1467 // line continuation during tokenization handles that normal case.) 1468 bool Prescanner::CompilerDirectiveContinuation( 1469 TokenSequence &tokens, const char *origSentinel) { 1470 if (inFixedForm_ || tokens.empty() || 1471 tokens.TokenAt(tokens.SizeInTokens() - 1) != "&") { 1472 return false; 1473 } 1474 LineClassification followingLine{ClassifyLine(nextLine_)}; 1475 if (followingLine.kind == LineClassification::Kind::Comment) { 1476 nextLine_ += followingLine.payloadOffset; // advance to '!' or newline 1477 NextLine(); 1478 return true; 1479 } 1480 CHECK(origSentinel != nullptr); 1481 directiveSentinel_ = origSentinel; // so InCompilerDirective() is true 1482 const char *nextContinuation{ 1483 followingLine.kind == LineClassification::Kind::CompilerDirective 1484 ? FreeFormContinuationLine(true) 1485 : nullptr}; 1486 if (!nextContinuation && 1487 followingLine.kind != LineClassification::Kind::Source) { 1488 return false; 1489 } 1490 auto origNextLine{nextLine_}; 1491 BeginSourceLine(nextLine_); 1492 NextLine(); 1493 if (nextContinuation) { 1494 // What follows is !DIR$ & xxx; skip over the & so that it 1495 // doesn't cause a spurious continuation. 1496 at_ = nextContinuation; 1497 } else { 1498 // What follows looks like a source line before macro expansion, 1499 // but might become a directive continuation afterwards. 1500 SkipSpaces(); 1501 } 1502 TokenSequence followingTokens; 1503 while (NextToken(followingTokens)) { 1504 } 1505 if (auto followingPrepro{ 1506 preprocessor_.MacroReplacement(followingTokens, *this)}) { 1507 followingTokens = std::move(*followingPrepro); 1508 } 1509 followingTokens.RemoveRedundantBlanks(); 1510 std::size_t startAt{0}; 1511 std::size_t following{followingTokens.SizeInTokens()}; 1512 bool ok{false}; 1513 if (nextContinuation) { 1514 ok = true; 1515 } else { 1516 startAt = 2; 1517 if (startAt < following && followingTokens.TokenAt(0) == "!") { 1518 CharBlock sentinel{followingTokens.TokenAt(1)}; 1519 if (!sentinel.empty() && 1520 std::memcmp(sentinel.begin(), origSentinel, sentinel.size()) == 0) { 1521 ok = true; 1522 while ( 1523 startAt < following && followingTokens.TokenAt(startAt).IsBlank()) { 1524 ++startAt; 1525 } 1526 if (startAt < following && followingTokens.TokenAt(startAt) == "&") { 1527 ++startAt; 1528 } 1529 } 1530 } 1531 } 1532 if (ok) { 1533 tokens.pop_back(); // delete original '&' 1534 tokens.Put(followingTokens, startAt, following - startAt); 1535 tokens.RemoveRedundantBlanks(); 1536 } else { 1537 nextLine_ = origNextLine; 1538 } 1539 return ok; 1540 } 1541 1542 // Similar, but for source line continuation after macro replacement. 1543 bool Prescanner::SourceLineContinuation(TokenSequence &tokens) { 1544 if (!inFixedForm_ && !tokens.empty() && 1545 tokens.TokenAt(tokens.SizeInTokens() - 1) == "&") { 1546 LineClassification followingLine{ClassifyLine(nextLine_)}; 1547 if (followingLine.kind == LineClassification::Kind::Comment) { 1548 nextLine_ += followingLine.payloadOffset; // advance to '!' or newline 1549 NextLine(); 1550 return true; 1551 } else if (const char *nextContinuation{FreeFormContinuationLine(true)}) { 1552 BeginSourceLine(nextLine_); 1553 NextLine(); 1554 TokenSequence followingTokens; 1555 at_ = nextContinuation; 1556 while (NextToken(followingTokens)) { 1557 } 1558 if (auto followingPrepro{ 1559 preprocessor_.MacroReplacement(followingTokens, *this)}) { 1560 followingTokens = std::move(*followingPrepro); 1561 } 1562 followingTokens.RemoveRedundantBlanks(); 1563 tokens.pop_back(); // delete original '&' 1564 tokens.Put(followingTokens); 1565 return true; 1566 } 1567 } 1568 return false; 1569 } 1570 } // namespace Fortran::parser 1571