xref: /llvm-project/flang/lib/Parser/prescan.cpp (revision 4a3e4b99b9ab3016afe8b02c4f83f24635964f4e)
164ab3302SCarolineConcatto //===-- lib/Parser/prescan.cpp --------------------------------------------===//
264ab3302SCarolineConcatto //
364ab3302SCarolineConcatto // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
464ab3302SCarolineConcatto // See https://llvm.org/LICENSE.txt for license information.
564ab3302SCarolineConcatto // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
664ab3302SCarolineConcatto //
764ab3302SCarolineConcatto //===----------------------------------------------------------------------===//
864ab3302SCarolineConcatto 
964ab3302SCarolineConcatto #include "prescan.h"
1064ab3302SCarolineConcatto #include "flang/Common/idioms.h"
1164ab3302SCarolineConcatto #include "flang/Parser/characters.h"
1264ab3302SCarolineConcatto #include "flang/Parser/message.h"
137d60232bSKrzysztof Parzyszek #include "flang/Parser/preprocessor.h"
1464ab3302SCarolineConcatto #include "flang/Parser/source.h"
157d60232bSKrzysztof Parzyszek #include "flang/Parser/token-sequence.h"
168670e499SCaroline Concatto #include "llvm/Support/raw_ostream.h"
1764ab3302SCarolineConcatto #include <cstddef>
1864ab3302SCarolineConcatto #include <cstring>
1964ab3302SCarolineConcatto #include <utility>
2064ab3302SCarolineConcatto #include <vector>
2164ab3302SCarolineConcatto 
2264ab3302SCarolineConcatto namespace Fortran::parser {
2364ab3302SCarolineConcatto 
2464ab3302SCarolineConcatto using common::LanguageFeature;
2564ab3302SCarolineConcatto 
2664ab3302SCarolineConcatto static constexpr int maxPrescannerNesting{100};
2764ab3302SCarolineConcatto 
2864ab3302SCarolineConcatto Prescanner::Prescanner(Messages &messages, CookedSource &cooked,
2901def7f7Speter klausler     Preprocessor &preprocessor, common::LanguageFeatureControl lfc)
3001def7f7Speter klausler     : messages_{messages}, cooked_{cooked}, preprocessor_{preprocessor},
3101def7f7Speter klausler       allSources_{preprocessor_.allSources()}, features_{lfc},
328b512e52SPeter Klausler       backslashFreeFormContinuation_{preprocessor.AnyDefinitions()},
3392a54197Speter klausler       encoding_{allSources_.encoding()} {}
3464ab3302SCarolineConcatto 
35fc1c481cSPeter Klausler Prescanner::Prescanner(const Prescanner &that, Preprocessor &prepro,
36fc1c481cSPeter Klausler     bool isNestedInIncludeDirective)
37fc1c481cSPeter Klausler     : messages_{that.messages_}, cooked_{that.cooked_}, preprocessor_{prepro},
38fc1c481cSPeter Klausler       allSources_{that.allSources_}, features_{that.features_},
399fb2db1eSPeter Klausler       preprocessingOnly_{that.preprocessingOnly_},
409fb2db1eSPeter Klausler       expandIncludeLines_{that.expandIncludeLines_},
410525c201SPeter Klausler       isNestedInIncludeDirective_{isNestedInIncludeDirective},
428b512e52SPeter Klausler       backslashFreeFormContinuation_{that.backslashFreeFormContinuation_},
438b512e52SPeter Klausler       inFixedForm_{that.inFixedForm_},
4464ab3302SCarolineConcatto       fixedFormColumnLimit_{that.fixedFormColumnLimit_},
458b512e52SPeter Klausler       encoding_{that.encoding_},
468b512e52SPeter Klausler       prescannerNesting_{that.prescannerNesting_ + 1},
4764ab3302SCarolineConcatto       skipLeadingAmpersand_{that.skipLeadingAmpersand_},
4864ab3302SCarolineConcatto       compilerDirectiveBloomFilter_{that.compilerDirectiveBloomFilter_},
4964ab3302SCarolineConcatto       compilerDirectiveSentinels_{that.compilerDirectiveSentinels_} {}
5064ab3302SCarolineConcatto 
51143f3fc4SPeter Klausler // Returns number of bytes to skip
52143f3fc4SPeter Klausler static inline int IsSpace(const char *p) {
53143f3fc4SPeter Klausler   if (*p == ' ') {
54143f3fc4SPeter Klausler     return 1;
55143f3fc4SPeter Klausler   } else if (*p == '\xa0') { // LATIN-1 NBSP non-breaking space
56143f3fc4SPeter Klausler     return 1;
57143f3fc4SPeter Klausler   } else if (p[0] == '\xc2' && p[1] == '\xa0') { // UTF-8 NBSP
58143f3fc4SPeter Klausler     return 2;
59143f3fc4SPeter Klausler   } else {
60143f3fc4SPeter Klausler     return 0;
61143f3fc4SPeter Klausler   }
62143f3fc4SPeter Klausler }
63143f3fc4SPeter Klausler 
64143f3fc4SPeter Klausler static inline int IsSpaceOrTab(const char *p) {
65143f3fc4SPeter Klausler   return *p == '\t' ? 1 : IsSpace(p);
66143f3fc4SPeter Klausler }
67143f3fc4SPeter Klausler 
6864ab3302SCarolineConcatto static inline constexpr bool IsFixedFormCommentChar(char ch) {
6964ab3302SCarolineConcatto   return ch == '!' || ch == '*' || ch == 'C' || ch == 'c';
7064ab3302SCarolineConcatto }
7164ab3302SCarolineConcatto 
7264ab3302SCarolineConcatto static void NormalizeCompilerDirectiveCommentMarker(TokenSequence &dir) {
7364ab3302SCarolineConcatto   char *p{dir.GetMutableCharData()};
7464ab3302SCarolineConcatto   char *limit{p + dir.SizeInChars()};
7564ab3302SCarolineConcatto   for (; p < limit; ++p) {
7664ab3302SCarolineConcatto     if (*p != ' ') {
7764ab3302SCarolineConcatto       CHECK(IsFixedFormCommentChar(*p));
7864ab3302SCarolineConcatto       *p = '!';
7964ab3302SCarolineConcatto       return;
8064ab3302SCarolineConcatto     }
8164ab3302SCarolineConcatto   }
8264ab3302SCarolineConcatto   DIE("compiler directive all blank");
8364ab3302SCarolineConcatto }
8464ab3302SCarolineConcatto 
8564ab3302SCarolineConcatto void Prescanner::Prescan(ProvenanceRange range) {
8664ab3302SCarolineConcatto   startProvenance_ = range.start();
875881bf00Speter klausler   start_ = allSources_.GetSource(range);
885881bf00Speter klausler   CHECK(start_);
8964ab3302SCarolineConcatto   limit_ = start_ + range.size();
9064ab3302SCarolineConcatto   nextLine_ = start_;
9164ab3302SCarolineConcatto   const bool beganInFixedForm{inFixedForm_};
9264ab3302SCarolineConcatto   if (prescannerNesting_ > maxPrescannerNesting) {
9364ab3302SCarolineConcatto     Say(GetProvenance(start_),
9464ab3302SCarolineConcatto         "too many nested INCLUDE/#include files, possibly circular"_err_en_US);
9564ab3302SCarolineConcatto     return;
9664ab3302SCarolineConcatto   }
975881bf00Speter klausler   while (!IsAtEnd()) {
9864ab3302SCarolineConcatto     Statement();
9964ab3302SCarolineConcatto   }
10064ab3302SCarolineConcatto   if (inFixedForm_ != beganInFixedForm) {
10164ab3302SCarolineConcatto     std::string dir{"!dir$ "};
10264ab3302SCarolineConcatto     if (beganInFixedForm) {
10364ab3302SCarolineConcatto       dir += "fixed";
10464ab3302SCarolineConcatto     } else {
10564ab3302SCarolineConcatto       dir += "free";
10664ab3302SCarolineConcatto     }
10764ab3302SCarolineConcatto     dir += '\n';
10892a54197Speter klausler     TokenSequence tokens{dir, allSources_.AddCompilerInsertion(dir).start()};
10964ab3302SCarolineConcatto     tokens.Emit(cooked_);
11064ab3302SCarolineConcatto   }
11164ab3302SCarolineConcatto }
11264ab3302SCarolineConcatto 
11364ab3302SCarolineConcatto void Prescanner::Statement() {
11464ab3302SCarolineConcatto   TokenSequence tokens;
1158a1f12c6SPeter Klausler   const char *statementStart{nextLine_};
1168a1f12c6SPeter Klausler   LineClassification line{ClassifyLine(statementStart)};
11764ab3302SCarolineConcatto   switch (line.kind) {
11864ab3302SCarolineConcatto   case LineClassification::Kind::Comment:
11964ab3302SCarolineConcatto     nextLine_ += line.payloadOffset; // advance to '!' or newline
12064ab3302SCarolineConcatto     NextLine();
12164ab3302SCarolineConcatto     return;
12264ab3302SCarolineConcatto   case LineClassification::Kind::IncludeLine:
12364ab3302SCarolineConcatto     FortranInclude(nextLine_ + line.payloadOffset);
12464ab3302SCarolineConcatto     NextLine();
12564ab3302SCarolineConcatto     return;
12664ab3302SCarolineConcatto   case LineClassification::Kind::ConditionalCompilationDirective:
1270525c201SPeter Klausler   case LineClassification::Kind::IncludeDirective:
1280525c201SPeter Klausler     preprocessor_.Directive(TokenizePreprocessorDirective(), *this);
1295d15f606SPeter Klausler     afterPreprocessingDirective_ = true;
1305d15f606SPeter Klausler     skipLeadingAmpersand_ |= !inFixedForm_;
1315d15f606SPeter Klausler     return;
1325d15f606SPeter Klausler   case LineClassification::Kind::PreprocessorDirective:
1331ada2352SPeter Klausler     preprocessor_.Directive(TokenizePreprocessorDirective(), *this);
1341ada2352SPeter Klausler     afterPreprocessingDirective_ = true;
1351ada2352SPeter Klausler     // Don't set skipLeadingAmpersand_
1361ada2352SPeter Klausler     return;
1375d15f606SPeter Klausler   case LineClassification::Kind::DefinitionDirective:
1385d15f606SPeter Klausler     preprocessor_.Directive(TokenizePreprocessorDirective(), *this);
1391ada2352SPeter Klausler     // Don't set afterPreprocessingDirective_ or skipLeadingAmpersand_
1400525c201SPeter Klausler     return;
1412734f154SPeter Klausler   case LineClassification::Kind::CompilerDirective: {
14264ab3302SCarolineConcatto     directiveSentinel_ = line.sentinel;
14364ab3302SCarolineConcatto     CHECK(InCompilerDirective());
144189c0833Speter klausler     BeginStatementAndAdvance();
14564ab3302SCarolineConcatto     if (inFixedForm_) {
14664ab3302SCarolineConcatto       CHECK(IsFixedFormCommentChar(*at_));
14764ab3302SCarolineConcatto     } else {
148143f3fc4SPeter Klausler       while (int n{IsSpaceOrTab(at_)}) {
149143f3fc4SPeter Klausler         at_ += n, ++column_;
15064ab3302SCarolineConcatto       }
15164ab3302SCarolineConcatto       CHECK(*at_ == '!');
15264ab3302SCarolineConcatto     }
1532734f154SPeter Klausler     std::optional<int> condOffset;
15464ab3302SCarolineConcatto     if (directiveSentinel_[0] == '$' && directiveSentinel_[1] == '\0') {
1552734f154SPeter Klausler       // OpenMP conditional compilation line.
1562734f154SPeter Klausler       condOffset = 2;
1574ad72793SPeter Klausler     } else if (directiveSentinel_[0] == '@' && directiveSentinel_[1] == 'c' &&
1584ad72793SPeter Klausler         directiveSentinel_[2] == 'u' && directiveSentinel_[3] == 'f' &&
1594ad72793SPeter Klausler         directiveSentinel_[4] == '\0') {
1602734f154SPeter Klausler       // CUDA conditional compilation line.
1612734f154SPeter Klausler       condOffset = 5;
1622734f154SPeter Klausler     }
1632734f154SPeter Klausler     if (condOffset) {
1642734f154SPeter Klausler       at_ += *condOffset, column_ += *condOffset;
1652734f154SPeter Klausler       if (auto payload{IsIncludeLine(at_)}) {
1662734f154SPeter Klausler         FortranInclude(at_ + *payload);
1672734f154SPeter Klausler         return;
1682734f154SPeter Klausler       } else if (inFixedForm_) {
1694ad72793SPeter Klausler         LabelField(tokens);
1704ad72793SPeter Klausler       } else {
1714ad72793SPeter Klausler         SkipSpaces();
1724ad72793SPeter Klausler       }
17364ab3302SCarolineConcatto     } else {
1747c55dd8dSPeter Klausler       // Compiler directive.  Emit normalized sentinel, squash following spaces.
17564ab3302SCarolineConcatto       EmitChar(tokens, '!');
17664ab3302SCarolineConcatto       ++at_, ++column_;
17764ab3302SCarolineConcatto       for (const char *sp{directiveSentinel_}; *sp != '\0';
17864ab3302SCarolineConcatto            ++sp, ++at_, ++column_) {
17964ab3302SCarolineConcatto         EmitChar(tokens, *sp);
18064ab3302SCarolineConcatto       }
181143f3fc4SPeter Klausler       if (IsSpaceOrTab(at_)) {
18264ab3302SCarolineConcatto         EmitChar(tokens, ' ');
183143f3fc4SPeter Klausler         while (int n{IsSpaceOrTab(at_)}) {
184143f3fc4SPeter Klausler           at_ += n, ++column_;
18564ab3302SCarolineConcatto         }
1867c55dd8dSPeter Klausler       }
18764ab3302SCarolineConcatto       tokens.CloseToken();
18864ab3302SCarolineConcatto     }
18964ab3302SCarolineConcatto     break;
1902734f154SPeter Klausler   }
191bde2f39aSPeter Klausler   case LineClassification::Kind::Source: {
192189c0833Speter klausler     BeginStatementAndAdvance();
193bde2f39aSPeter Klausler     bool checkLabelField{false};
19464ab3302SCarolineConcatto     if (inFixedForm_) {
19514c7754aSJean Perier       if (features_.IsEnabled(LanguageFeature::OldDebugLines) &&
19614c7754aSJean Perier           (*at_ == 'D' || *at_ == 'd')) {
19714c7754aSJean Perier         NextChar();
19814c7754aSJean Perier       }
199bde2f39aSPeter Klausler       checkLabelField = true;
2005d15f606SPeter Klausler     } else {
2015d15f606SPeter Klausler       if (skipLeadingAmpersand_) {
20264ab3302SCarolineConcatto         skipLeadingAmpersand_ = false;
20364ab3302SCarolineConcatto         const char *p{SkipWhiteSpace(at_)};
20464ab3302SCarolineConcatto         if (p < limit_ && *p == '&') {
20564ab3302SCarolineConcatto           column_ += ++p - at_;
20664ab3302SCarolineConcatto           at_ = p;
20764ab3302SCarolineConcatto         }
20864ab3302SCarolineConcatto       } else {
20964ab3302SCarolineConcatto         SkipSpaces();
2105d15f606SPeter Klausler       }
211bde2f39aSPeter Klausler     }
212e286ecfeSPeter Klausler     // Check for a leading identifier that might be a keyword macro
213e286ecfeSPeter Klausler     // that will expand to anything indicating a non-source line, like
214e286ecfeSPeter Klausler     // a comment marker or directive sentinel.  If so, disable line
215e286ecfeSPeter Klausler     // continuation, so that NextToken() won't consume anything from
216e286ecfeSPeter Klausler     // following lines.
2172ec1a39bSPeter Klausler     if (IsLegalIdentifierStart(*at_)) {
2182ec1a39bSPeter Klausler       // TODO: Only bother with these cases when any keyword macro has
2192ec1a39bSPeter Klausler       // been defined with replacement text that could begin a comment
2202ec1a39bSPeter Klausler       // or directive sentinel.
2212ec1a39bSPeter Klausler       const char *p{at_};
2222ec1a39bSPeter Klausler       while (IsLegalInIdentifier(*++p)) {
2232ec1a39bSPeter Klausler       }
2242ec1a39bSPeter Klausler       CharBlock id{at_, static_cast<std::size_t>(p - at_)};
2252ec1a39bSPeter Klausler       if (preprocessor_.IsNameDefined(id) &&
226e286ecfeSPeter Klausler           !preprocessor_.IsFunctionLikeDefinition(id)) {
227bde2f39aSPeter Klausler         checkLabelField = false;
2282ec1a39bSPeter Klausler         TokenSequence toks;
2292ec1a39bSPeter Klausler         toks.Put(id, GetProvenance(at_));
2302ec1a39bSPeter Klausler         if (auto replaced{preprocessor_.MacroReplacement(toks, *this)}) {
231e286ecfeSPeter Klausler           auto newLineClass{ClassifyLine(*replaced, GetCurrentProvenance())};
232e286ecfeSPeter Klausler           if (newLineClass.kind ==
233e286ecfeSPeter Klausler               LineClassification::Kind::CompilerDirective) {
234e286ecfeSPeter Klausler             directiveSentinel_ = newLineClass.sentinel;
235f099f76bSPeter Klausler             disableSourceContinuation_ = false;
236f099f76bSPeter Klausler           } else {
237dfbc80feSPeter Klausler             disableSourceContinuation_ = !replaced->empty() &&
238f099f76bSPeter Klausler                 newLineClass.kind != LineClassification::Kind::Source;
239e286ecfeSPeter Klausler           }
240e286ecfeSPeter Klausler         }
241e286ecfeSPeter Klausler       }
242e286ecfeSPeter Klausler     }
243bde2f39aSPeter Klausler     if (checkLabelField) {
244bde2f39aSPeter Klausler       LabelField(tokens);
24564ab3302SCarolineConcatto     }
246bde2f39aSPeter Klausler   } break;
24764ab3302SCarolineConcatto   }
24864ab3302SCarolineConcatto 
24964ab3302SCarolineConcatto   while (NextToken(tokens)) {
25064ab3302SCarolineConcatto   }
2518a1f12c6SPeter Klausler   if (continuationLines_ > 255) {
2521c91d9bdSPeter Klausler     if (features_.ShouldWarn(common::LanguageFeature::MiscSourceExtensions)) {
2530f973ac7SPeter Klausler       Say(common::LanguageFeature::MiscSourceExtensions,
2540f973ac7SPeter Klausler           GetProvenance(statementStart),
2558a1f12c6SPeter Klausler           "%d continuation lines is more than the Fortran standard allows"_port_en_US,
2568a1f12c6SPeter Klausler           continuationLines_);
2578a1f12c6SPeter Klausler     }
2581c91d9bdSPeter Klausler   }
25964ab3302SCarolineConcatto 
26064ab3302SCarolineConcatto   Provenance newlineProvenance{GetCurrentProvenance()};
26164ab3302SCarolineConcatto   if (std::optional<TokenSequence> preprocessed{
26264ab3302SCarolineConcatto           preprocessor_.MacroReplacement(tokens, *this)}) {
263e286ecfeSPeter Klausler     // Reprocess the preprocessed line.
264e286ecfeSPeter Klausler     LineClassification ppl{ClassifyLine(*preprocessed, newlineProvenance)};
26564ab3302SCarolineConcatto     switch (ppl.kind) {
2661f879005STim Keith     case LineClassification::Kind::Comment:
2671f879005STim Keith       break;
26864ab3302SCarolineConcatto     case LineClassification::Kind::IncludeLine:
269e286ecfeSPeter Klausler       FortranInclude(preprocessed->TokenAt(0).begin() + ppl.payloadOffset);
27064ab3302SCarolineConcatto       break;
27164ab3302SCarolineConcatto     case LineClassification::Kind::ConditionalCompilationDirective:
27264ab3302SCarolineConcatto     case LineClassification::Kind::IncludeDirective:
27364ab3302SCarolineConcatto     case LineClassification::Kind::DefinitionDirective:
27464ab3302SCarolineConcatto     case LineClassification::Kind::PreprocessorDirective:
275505f6da1SPeter Klausler       if (features_.ShouldWarn(common::UsageWarning::Preprocessing)) {
2760f973ac7SPeter Klausler         Say(common::UsageWarning::Preprocessing,
2770f973ac7SPeter Klausler             preprocessed->GetProvenanceRange(),
278a53967cdSPeter Klausler             "Preprocessed line resembles a preprocessor directive"_warn_en_US);
279505f6da1SPeter Klausler       }
2800525c201SPeter Klausler       CheckAndEmitLine(preprocessed->ToLowerCase(), newlineProvenance);
28164ab3302SCarolineConcatto       break;
28264ab3302SCarolineConcatto     case LineClassification::Kind::CompilerDirective:
28364ab3302SCarolineConcatto       if (preprocessed->HasRedundantBlanks()) {
28464ab3302SCarolineConcatto         preprocessed->RemoveRedundantBlanks();
28564ab3302SCarolineConcatto       }
286f706411fSPeter Klausler       while (CompilerDirectiveContinuation(*preprocessed, ppl.sentinel)) {
287f706411fSPeter Klausler         newlineProvenance = GetCurrentProvenance();
288f706411fSPeter Klausler       }
28964ab3302SCarolineConcatto       NormalizeCompilerDirectiveCommentMarker(*preprocessed);
29064ab3302SCarolineConcatto       preprocessed->ToLowerCase();
29164ab3302SCarolineConcatto       SourceFormChange(preprocessed->ToString());
2920525c201SPeter Klausler       CheckAndEmitLine(preprocessed->ToLowerCase().ClipComment(
2930525c201SPeter Klausler                            *this, true /* skip first ! */),
2940525c201SPeter Klausler           newlineProvenance);
29564ab3302SCarolineConcatto       break;
29664ab3302SCarolineConcatto     case LineClassification::Kind::Source:
29764ab3302SCarolineConcatto       if (inFixedForm_) {
2989fb2db1eSPeter Klausler         if (!preprocessingOnly_ && preprocessed->HasBlanks()) {
2999fb2db1eSPeter Klausler           preprocessed->RemoveBlanks();
30064ab3302SCarolineConcatto         }
30164ab3302SCarolineConcatto       } else {
302f706411fSPeter Klausler         while (SourceLineContinuation(*preprocessed)) {
303f706411fSPeter Klausler           newlineProvenance = GetCurrentProvenance();
304f706411fSPeter Klausler         }
30564ab3302SCarolineConcatto         if (preprocessed->HasRedundantBlanks()) {
30664ab3302SCarolineConcatto           preprocessed->RemoveRedundantBlanks();
30764ab3302SCarolineConcatto         }
30864ab3302SCarolineConcatto       }
3090525c201SPeter Klausler       CheckAndEmitLine(
3100525c201SPeter Klausler           preprocessed->ToLowerCase().ClipComment(*this), newlineProvenance);
31164ab3302SCarolineConcatto       break;
31264ab3302SCarolineConcatto     }
3130525c201SPeter Klausler   } else { // no macro replacement
31464ab3302SCarolineConcatto     if (line.kind == LineClassification::Kind::CompilerDirective) {
315f706411fSPeter Klausler       while (CompilerDirectiveContinuation(tokens, line.sentinel)) {
316f706411fSPeter Klausler         newlineProvenance = GetCurrentProvenance();
31764ab3302SCarolineConcatto       }
318f706411fSPeter Klausler       tokens.ToLowerCase();
319f706411fSPeter Klausler       SourceFormChange(tokens.ToString());
320f706411fSPeter Klausler     } else { // Kind::Source
321f706411fSPeter Klausler       tokens.ToLowerCase();
322f706411fSPeter Klausler       if (inFixedForm_) {
323f6ddfac4Speter klausler         EnforceStupidEndStatementRules(tokens);
324f6ddfac4Speter klausler       }
325f706411fSPeter Klausler     }
3260525c201SPeter Klausler     CheckAndEmitLine(tokens, newlineProvenance);
32764ab3302SCarolineConcatto   }
3280525c201SPeter Klausler   directiveSentinel_ = nullptr;
3290525c201SPeter Klausler }
3300525c201SPeter Klausler 
3310525c201SPeter Klausler void Prescanner::CheckAndEmitLine(
3320525c201SPeter Klausler     TokenSequence &tokens, Provenance newlineProvenance) {
333e286ecfeSPeter Klausler   tokens.CheckBadFortranCharacters(
334e286ecfeSPeter Klausler       messages_, *this, disableSourceContinuation_);
3350525c201SPeter Klausler   // Parenthesis nesting check does not apply while any #include is
3365d15f606SPeter Klausler   // active, nor on the lines before and after a top-level #include,
3375d15f606SPeter Klausler   // nor before or after conditional source.
3380525c201SPeter Klausler   // Applications play shenanigans with line continuation before and
3395d15f606SPeter Klausler   // after #include'd subprogram argument lists and conditional source.
3400525c201SPeter Klausler   if (!isNestedInIncludeDirective_ && !omitNewline_ &&
3415d15f606SPeter Klausler       !afterPreprocessingDirective_ && tokens.BadlyNestedParentheses() &&
3425d15f606SPeter Klausler       !preprocessor_.InConditional()) {
3435d15f606SPeter Klausler     if (nextLine_ < limit_ && IsPreprocessorDirectiveLine(nextLine_)) {
34486bee819SPeter Klausler       // don't complain
34586bee819SPeter Klausler     } else {
3460525c201SPeter Klausler       tokens.CheckBadParentheses(messages_);
3470525c201SPeter Klausler     }
34886bee819SPeter Klausler   }
3490525c201SPeter Klausler   tokens.Emit(cooked_);
35064ab3302SCarolineConcatto   if (omitNewline_) {
35164ab3302SCarolineConcatto     omitNewline_ = false;
35264ab3302SCarolineConcatto   } else {
35364ab3302SCarolineConcatto     cooked_.Put('\n', newlineProvenance);
3545d15f606SPeter Klausler     afterPreprocessingDirective_ = false;
35564ab3302SCarolineConcatto   }
35664ab3302SCarolineConcatto }
35764ab3302SCarolineConcatto 
35864ab3302SCarolineConcatto TokenSequence Prescanner::TokenizePreprocessorDirective() {
3595881bf00Speter klausler   CHECK(!IsAtEnd() && !inPreprocessorDirective_);
36064ab3302SCarolineConcatto   inPreprocessorDirective_ = true;
361189c0833Speter klausler   BeginStatementAndAdvance();
36264ab3302SCarolineConcatto   TokenSequence tokens;
36364ab3302SCarolineConcatto   while (NextToken(tokens)) {
36464ab3302SCarolineConcatto   }
36564ab3302SCarolineConcatto   inPreprocessorDirective_ = false;
36664ab3302SCarolineConcatto   return tokens;
36764ab3302SCarolineConcatto }
36864ab3302SCarolineConcatto 
36964ab3302SCarolineConcatto void Prescanner::NextLine() {
37064ab3302SCarolineConcatto   void *vstart{static_cast<void *>(const_cast<char *>(nextLine_))};
37164ab3302SCarolineConcatto   void *v{std::memchr(vstart, '\n', limit_ - nextLine_)};
37264ab3302SCarolineConcatto   if (!v) {
37364ab3302SCarolineConcatto     nextLine_ = limit_;
37464ab3302SCarolineConcatto   } else {
37564ab3302SCarolineConcatto     const char *nl{const_cast<const char *>(static_cast<char *>(v))};
37664ab3302SCarolineConcatto     nextLine_ = nl + 1;
37764ab3302SCarolineConcatto   }
37864ab3302SCarolineConcatto }
37964ab3302SCarolineConcatto 
380ba4cc3b3Speter klausler void Prescanner::LabelField(TokenSequence &token) {
381ba4cc3b3Speter klausler   int outCol{1};
382094b380cSpeter klausler   const char *start{at_};
38382d7a6b2SPeter Klausler   std::optional<int> badColumn;
38464ab3302SCarolineConcatto   for (; *at_ != '\n' && column_ <= 6; ++at_) {
38564ab3302SCarolineConcatto     if (*at_ == '\t') {
38664ab3302SCarolineConcatto       ++at_;
38764ab3302SCarolineConcatto       column_ = 7;
38864ab3302SCarolineConcatto       break;
38964ab3302SCarolineConcatto     }
390143f3fc4SPeter Klausler     if (int n{IsSpace(at_)}; n == 0 &&
39164ab3302SCarolineConcatto         !(*at_ == '0' && column_ == 6)) { // '0' in column 6 becomes space
39264ab3302SCarolineConcatto       EmitChar(token, *at_);
393ba4cc3b3Speter klausler       ++outCol;
39482d7a6b2SPeter Klausler       if (!badColumn && (column_ == 6 || !IsDecimalDigit(*at_))) {
39582d7a6b2SPeter Klausler         badColumn = column_;
39626330a0cSpeter klausler       }
39764ab3302SCarolineConcatto     }
39864ab3302SCarolineConcatto     ++column_;
39964ab3302SCarolineConcatto   }
40082d7a6b2SPeter Klausler   if (badColumn && !preprocessor_.IsNameDefined(token.CurrentOpenToken())) {
4015d15f606SPeter Klausler     if ((prescannerNesting_ > 0 && *badColumn == 6 &&
4025d15f606SPeter Klausler             cooked_.BufferedBytes() == firstCookedCharacterOffset_) ||
4035d15f606SPeter Klausler         afterPreprocessingDirective_) {
4045d15f606SPeter Klausler       // This is the first source line in #include'd text or conditional
4055d15f606SPeter Klausler       // code under #if, or the first source line after such.
40686bee819SPeter Klausler       // If it turns out that the preprocessed text begins with a
40786bee819SPeter Klausler       // fixed form continuation line, the newline at the end
40886bee819SPeter Klausler       // of the latest source line beforehand will be deleted in
40986bee819SPeter Klausler       // CookedSource::Marshal().
41086bee819SPeter Klausler       cooked_.MarkPossibleFixedFormContinuation();
41186bee819SPeter Klausler     } else if (features_.ShouldWarn(common::UsageWarning::Scanning)) {
4120f973ac7SPeter Klausler       Say(common::UsageWarning::Scanning, GetProvenance(start + *badColumn - 1),
41382d7a6b2SPeter Klausler           *badColumn == 6
41482d7a6b2SPeter Klausler               ? "Statement should not begin with a continuation line"_warn_en_US
41582d7a6b2SPeter Klausler               : "Character in fixed-form label field must be a digit"_warn_en_US);
416505f6da1SPeter Klausler     }
417094b380cSpeter klausler     token.clear();
41882d7a6b2SPeter Klausler     if (*badColumn < 6) {
419094b380cSpeter klausler       at_ = start;
42082d7a6b2SPeter Klausler       column_ = 1;
421094b380cSpeter klausler       return;
422094b380cSpeter klausler     }
42382d7a6b2SPeter Klausler     outCol = 1;
42482d7a6b2SPeter Klausler   }
425ba4cc3b3Speter klausler   if (outCol == 1) { // empty label field
426ba4cc3b3Speter klausler     // Emit a space so that, if the line is rescanned after preprocessing,
427ba4cc3b3Speter klausler     // a leading 'C' or 'D' won't be left-justified and then accidentally
428ba4cc3b3Speter klausler     // misinterpreted as a comment card.
429ba4cc3b3Speter klausler     EmitChar(token, ' ');
430ba4cc3b3Speter klausler     ++outCol;
43164ab3302SCarolineConcatto   }
432ba4cc3b3Speter klausler   token.CloseToken();
4335fb5f7b5Speter klausler   SkipToNextSignificantCharacter();
43426330a0cSpeter klausler   if (IsDecimalDigit(*at_)) {
4351c91d9bdSPeter Klausler     if (features_.ShouldWarn(common::LanguageFeature::MiscSourceExtensions)) {
4360f973ac7SPeter Klausler       Say(common::LanguageFeature::MiscSourceExtensions, GetCurrentProvenance(),
437a53967cdSPeter Klausler           "Label digit is not in fixed-form label field"_port_en_US);
43826330a0cSpeter klausler     }
43964ab3302SCarolineConcatto   }
4401c91d9bdSPeter Klausler }
44164ab3302SCarolineConcatto 
442f6ddfac4Speter klausler // 6.3.3.5: A program unit END statement, or any other statement whose
443f6ddfac4Speter klausler // initial line resembles an END statement, shall not be continued in
444f6ddfac4Speter klausler // fixed form source.
445f6ddfac4Speter klausler void Prescanner::EnforceStupidEndStatementRules(const TokenSequence &tokens) {
446f6ddfac4Speter klausler   CharBlock cBlock{tokens.ToCharBlock()};
447f6ddfac4Speter klausler   const char *str{cBlock.begin()};
448f6ddfac4Speter klausler   std::size_t n{cBlock.size()};
449f6ddfac4Speter klausler   if (n < 3) {
450f6ddfac4Speter klausler     return;
451f6ddfac4Speter klausler   }
452f6ddfac4Speter klausler   std::size_t j{0};
453f6ddfac4Speter klausler   for (; j < n && (str[j] == ' ' || (str[j] >= '0' && str[j] <= '9')); ++j) {
454f6ddfac4Speter klausler   }
455f6ddfac4Speter klausler   if (j + 3 > n || std::memcmp(str + j, "end", 3) != 0) {
456f6ddfac4Speter klausler     return;
457f6ddfac4Speter klausler   }
458f6ddfac4Speter klausler   // It starts with END, possibly after a label.
459f6ddfac4Speter klausler   auto start{allSources_.GetSourcePosition(tokens.GetCharProvenance(j))};
460f6ddfac4Speter klausler   auto end{allSources_.GetSourcePosition(tokens.GetCharProvenance(n - 1))};
461f6ddfac4Speter klausler   if (!start || !end) {
462f6ddfac4Speter klausler     return;
463f6ddfac4Speter klausler   }
464e12ffe6aSPeter Klausler   if (&*start->sourceFile == &*end->sourceFile && start->line == end->line) {
465f6ddfac4Speter klausler     return; // no continuation
466f6ddfac4Speter klausler   }
467f6ddfac4Speter klausler   j += 3;
468f6ddfac4Speter klausler   static const char *const prefixes[]{"program", "subroutine", "function",
469f6ddfac4Speter klausler       "blockdata", "module", "submodule", nullptr};
470f6ddfac4Speter klausler   bool isPrefix{j == n || !IsLegalInIdentifier(str[j])}; // prefix is END
471f6ddfac4Speter klausler   std::size_t endOfPrefix{j - 1};
472f6ddfac4Speter klausler   for (const char *const *p{prefixes}; *p; ++p) {
473f6ddfac4Speter klausler     std::size_t pLen{std::strlen(*p)};
474f6ddfac4Speter klausler     if (j + pLen <= n && std::memcmp(str + j, *p, pLen) == 0) {
475f6ddfac4Speter klausler       isPrefix = true; // END thing as prefix
476f6ddfac4Speter klausler       j += pLen;
477f6ddfac4Speter klausler       endOfPrefix = j - 1;
478f6ddfac4Speter klausler       for (; j < n && IsLegalInIdentifier(str[j]); ++j) {
479f6ddfac4Speter klausler       }
480f6ddfac4Speter klausler       break;
481f6ddfac4Speter klausler     }
482f6ddfac4Speter klausler   }
483f6ddfac4Speter klausler   if (isPrefix) {
484f6ddfac4Speter klausler     auto range{tokens.GetTokenProvenanceRange(1)};
485f6ddfac4Speter klausler     if (j == n) { // END or END thing [name]
486f6ddfac4Speter klausler       Say(range,
487f6ddfac4Speter klausler           "Program unit END statement may not be continued in fixed form source"_err_en_US);
488f6ddfac4Speter klausler     } else {
489f6ddfac4Speter klausler       auto endOfPrefixPos{
490f6ddfac4Speter klausler           allSources_.GetSourcePosition(tokens.GetCharProvenance(endOfPrefix))};
491f6ddfac4Speter klausler       auto next{allSources_.GetSourcePosition(tokens.GetCharProvenance(j))};
492e12ffe6aSPeter Klausler       if (endOfPrefixPos && next &&
493e12ffe6aSPeter Klausler           &*endOfPrefixPos->sourceFile == &*start->sourceFile &&
494f6ddfac4Speter klausler           endOfPrefixPos->line == start->line &&
495e12ffe6aSPeter Klausler           (&*next->sourceFile != &*start->sourceFile ||
496e12ffe6aSPeter Klausler               next->line != start->line)) {
497f6ddfac4Speter klausler         Say(range,
498f6ddfac4Speter klausler             "Initial line of continued statement must not appear to be a program unit END in fixed form source"_err_en_US);
499f6ddfac4Speter klausler       }
500f6ddfac4Speter klausler     }
501f6ddfac4Speter klausler   }
502f6ddfac4Speter klausler }
503f6ddfac4Speter klausler 
50464ab3302SCarolineConcatto void Prescanner::SkipToEndOfLine() {
50564ab3302SCarolineConcatto   while (*at_ != '\n') {
50664ab3302SCarolineConcatto     ++at_, ++column_;
50764ab3302SCarolineConcatto   }
50864ab3302SCarolineConcatto }
50964ab3302SCarolineConcatto 
51064ab3302SCarolineConcatto bool Prescanner::MustSkipToEndOfLine() const {
51164ab3302SCarolineConcatto   if (inFixedForm_ && column_ > fixedFormColumnLimit_ && !tabInCurrentLine_) {
51264ab3302SCarolineConcatto     return true; // skip over ignored columns in right margin (73:80)
51364ab3302SCarolineConcatto   } else if (*at_ == '!' && !inCharLiteral_) {
514259ce119SPeter Klausler     return !IsCompilerDirectiveSentinel(at_);
51564ab3302SCarolineConcatto   } else {
51664ab3302SCarolineConcatto     return false;
51764ab3302SCarolineConcatto   }
51864ab3302SCarolineConcatto }
51964ab3302SCarolineConcatto 
52064ab3302SCarolineConcatto void Prescanner::NextChar() {
52164ab3302SCarolineConcatto   CHECK(*at_ != '\n');
522143f3fc4SPeter Klausler   int n{IsSpace(at_)};
523143f3fc4SPeter Klausler   at_ += n ? n : 1;
524143f3fc4SPeter Klausler   ++column_;
52564ab3302SCarolineConcatto   while (at_[0] == '\xef' && at_[1] == '\xbb' && at_[2] == '\xbf') {
52664ab3302SCarolineConcatto     // UTF-8 byte order mark - treat this file as UTF-8
52764ab3302SCarolineConcatto     at_ += 3;
52864ab3302SCarolineConcatto     encoding_ = Encoding::UTF_8;
52964ab3302SCarolineConcatto   }
5305fb5f7b5Speter klausler   SkipToNextSignificantCharacter();
5315fb5f7b5Speter klausler }
5325fb5f7b5Speter klausler 
5335fb5f7b5Speter klausler // Skip everything that should be ignored until the next significant
5345fb5f7b5Speter klausler // character is reached; handles C-style comments in preprocessing
5355fb5f7b5Speter klausler // directives, Fortran ! comments, stuff after the right margin in
5365fb5f7b5Speter klausler // fixed form, and all forms of line continuation.
5374299d9b1SPeter Klausler bool Prescanner::SkipToNextSignificantCharacter() {
5384299d9b1SPeter Klausler   auto anyContinuationLine{false};
53964ab3302SCarolineConcatto   if (inPreprocessorDirective_) {
54064ab3302SCarolineConcatto     SkipCComments();
54164ab3302SCarolineConcatto   } else {
54264ab3302SCarolineConcatto     bool mightNeedSpace{false};
54364ab3302SCarolineConcatto     if (MustSkipToEndOfLine()) {
54464ab3302SCarolineConcatto       SkipToEndOfLine();
54564ab3302SCarolineConcatto     } else {
54664ab3302SCarolineConcatto       mightNeedSpace = *at_ == '\n';
54764ab3302SCarolineConcatto     }
54864ab3302SCarolineConcatto     for (; Continuation(mightNeedSpace); mightNeedSpace = false) {
5494299d9b1SPeter Klausler       anyContinuationLine = true;
5508a1f12c6SPeter Klausler       ++continuationLines_;
55164ab3302SCarolineConcatto       if (MustSkipToEndOfLine()) {
55264ab3302SCarolineConcatto         SkipToEndOfLine();
55364ab3302SCarolineConcatto       }
55464ab3302SCarolineConcatto     }
55564ab3302SCarolineConcatto     if (*at_ == '\t') {
55664ab3302SCarolineConcatto       tabInCurrentLine_ = true;
55764ab3302SCarolineConcatto     }
55864ab3302SCarolineConcatto   }
5594299d9b1SPeter Klausler   return anyContinuationLine;
56064ab3302SCarolineConcatto }
56164ab3302SCarolineConcatto 
56264ab3302SCarolineConcatto void Prescanner::SkipCComments() {
56364ab3302SCarolineConcatto   while (true) {
56464ab3302SCarolineConcatto     if (IsCComment(at_)) {
56564ab3302SCarolineConcatto       if (const char *after{SkipCComment(at_)}) {
56664ab3302SCarolineConcatto         column_ += after - at_;
56764ab3302SCarolineConcatto         // May have skipped over one or more newlines; relocate the start of
56864ab3302SCarolineConcatto         // the next line.
56964ab3302SCarolineConcatto         nextLine_ = at_ = after;
57064ab3302SCarolineConcatto         NextLine();
57164ab3302SCarolineConcatto       } else {
57264ab3302SCarolineConcatto         // Don't emit any messages about unclosed C-style comments, because
57364ab3302SCarolineConcatto         // the sequence /* can appear legally in a FORMAT statement.  There's
57464ab3302SCarolineConcatto         // no ambiguity, since the sequence */ cannot appear legally.
57564ab3302SCarolineConcatto         break;
57664ab3302SCarolineConcatto       }
57764ab3302SCarolineConcatto     } else if (inPreprocessorDirective_ && at_[0] == '\\' && at_ + 2 < limit_ &&
5785881bf00Speter klausler         at_[1] == '\n' && !IsAtEnd()) {
57964ab3302SCarolineConcatto       BeginSourceLineAndAdvance();
58064ab3302SCarolineConcatto     } else {
58164ab3302SCarolineConcatto       break;
58264ab3302SCarolineConcatto     }
58364ab3302SCarolineConcatto   }
58464ab3302SCarolineConcatto }
58564ab3302SCarolineConcatto 
58664ab3302SCarolineConcatto void Prescanner::SkipSpaces() {
587143f3fc4SPeter Klausler   while (IsSpaceOrTab(at_)) {
58864ab3302SCarolineConcatto     NextChar();
58964ab3302SCarolineConcatto   }
59064ab3302SCarolineConcatto   insertASpace_ = false;
59164ab3302SCarolineConcatto }
59264ab3302SCarolineConcatto 
59364ab3302SCarolineConcatto const char *Prescanner::SkipWhiteSpace(const char *p) {
594143f3fc4SPeter Klausler   while (int n{IsSpaceOrTab(p)}) {
595143f3fc4SPeter Klausler     p += n;
59664ab3302SCarolineConcatto   }
59764ab3302SCarolineConcatto   return p;
59864ab3302SCarolineConcatto }
59964ab3302SCarolineConcatto 
60064ab3302SCarolineConcatto const char *Prescanner::SkipWhiteSpaceAndCComments(const char *p) const {
60164ab3302SCarolineConcatto   while (true) {
602143f3fc4SPeter Klausler     if (int n{IsSpaceOrTab(p)}) {
603143f3fc4SPeter Klausler       p += n;
60464ab3302SCarolineConcatto     } else if (IsCComment(p)) {
60564ab3302SCarolineConcatto       if (const char *after{SkipCComment(p)}) {
60664ab3302SCarolineConcatto         p = after;
60764ab3302SCarolineConcatto       } else {
60864ab3302SCarolineConcatto         break;
60964ab3302SCarolineConcatto       }
61064ab3302SCarolineConcatto     } else {
61164ab3302SCarolineConcatto       break;
61264ab3302SCarolineConcatto     }
61364ab3302SCarolineConcatto   }
61464ab3302SCarolineConcatto   return p;
61564ab3302SCarolineConcatto }
61664ab3302SCarolineConcatto 
61764ab3302SCarolineConcatto const char *Prescanner::SkipCComment(const char *p) const {
61864ab3302SCarolineConcatto   char star{' '}, slash{' '};
61964ab3302SCarolineConcatto   p += 2;
62064ab3302SCarolineConcatto   while (star != '*' || slash != '/') {
62164ab3302SCarolineConcatto     if (p >= limit_) {
62264ab3302SCarolineConcatto       return nullptr; // signifies an unterminated comment
62364ab3302SCarolineConcatto     }
62464ab3302SCarolineConcatto     star = slash;
62564ab3302SCarolineConcatto     slash = *p++;
62664ab3302SCarolineConcatto   }
62764ab3302SCarolineConcatto   return p;
62864ab3302SCarolineConcatto }
62964ab3302SCarolineConcatto 
63064ab3302SCarolineConcatto bool Prescanner::NextToken(TokenSequence &tokens) {
63164ab3302SCarolineConcatto   CHECK(at_ >= start_ && at_ < limit_);
6329fb2db1eSPeter Klausler   if (InFixedFormSource() && !preprocessingOnly_) {
63364ab3302SCarolineConcatto     SkipSpaces();
63464ab3302SCarolineConcatto   } else {
63564ab3302SCarolineConcatto     if (*at_ == '/' && IsCComment(at_)) {
63664ab3302SCarolineConcatto       // Recognize and skip over classic C style /*comments*/ when
63764ab3302SCarolineConcatto       // outside a character literal.
63864ab3302SCarolineConcatto       if (features_.ShouldWarn(LanguageFeature::ClassicCComments)) {
6390f973ac7SPeter Klausler         Say(LanguageFeature::ClassicCComments, GetCurrentProvenance(),
640a53967cdSPeter Klausler             "nonstandard usage: C-style comment"_port_en_US);
64164ab3302SCarolineConcatto       }
64264ab3302SCarolineConcatto       SkipCComments();
64364ab3302SCarolineConcatto     }
644143f3fc4SPeter Klausler     if (IsSpaceOrTab(at_)) {
64564ab3302SCarolineConcatto       // Compress free-form white space into a single space character.
64664ab3302SCarolineConcatto       const auto theSpace{at_};
64764ab3302SCarolineConcatto       char previous{at_ <= start_ ? ' ' : at_[-1]};
64864ab3302SCarolineConcatto       NextChar();
64964ab3302SCarolineConcatto       SkipSpaces();
6505d15f606SPeter Klausler       if (*at_ == '\n' && !omitNewline_) {
65164ab3302SCarolineConcatto         // Discard white space at the end of a line.
65264ab3302SCarolineConcatto       } else if (!inPreprocessorDirective_ &&
65364ab3302SCarolineConcatto           (previous == '(' || *at_ == '(' || *at_ == ')')) {
65464ab3302SCarolineConcatto         // Discard white space before/after '(' and before ')', unless in a
65564ab3302SCarolineConcatto         // preprocessor directive.  This helps yield space-free contiguous
65664ab3302SCarolineConcatto         // names for generic interfaces like OPERATOR( + ) and
65764ab3302SCarolineConcatto         // READ ( UNFORMATTED ), without misinterpreting #define f (notAnArg).
65864ab3302SCarolineConcatto         // This has the effect of silently ignoring the illegal spaces in
65964ab3302SCarolineConcatto         // the array constructor ( /1,2/ ) but that seems benign; it's
66064ab3302SCarolineConcatto         // hard to avoid that while still removing spaces from OPERATOR( / )
66164ab3302SCarolineConcatto         // and OPERATOR( // ).
66264ab3302SCarolineConcatto       } else {
66364ab3302SCarolineConcatto         // Preserve the squashed white space as a single space character.
66464ab3302SCarolineConcatto         tokens.PutNextTokenChar(' ', GetProvenance(theSpace));
66564ab3302SCarolineConcatto         tokens.CloseToken();
66664ab3302SCarolineConcatto         return true;
66764ab3302SCarolineConcatto       }
66864ab3302SCarolineConcatto     }
66964ab3302SCarolineConcatto   }
67064ab3302SCarolineConcatto   if (insertASpace_) {
67164ab3302SCarolineConcatto     tokens.PutNextTokenChar(' ', spaceProvenance_);
67264ab3302SCarolineConcatto     insertASpace_ = false;
67364ab3302SCarolineConcatto   }
67464ab3302SCarolineConcatto   if (*at_ == '\n') {
67564ab3302SCarolineConcatto     return false;
67664ab3302SCarolineConcatto   }
67764ab3302SCarolineConcatto   const char *start{at_};
67864ab3302SCarolineConcatto   if (*at_ == '\'' || *at_ == '"') {
67964ab3302SCarolineConcatto     QuotedCharacterLiteral(tokens, start);
68064ab3302SCarolineConcatto     preventHollerith_ = false;
68164ab3302SCarolineConcatto   } else if (IsDecimalDigit(*at_)) {
68264ab3302SCarolineConcatto     int n{0}, digits{0};
68364ab3302SCarolineConcatto     static constexpr int maxHollerith{256 /*lines*/ * (132 - 6 /*columns*/)};
68464ab3302SCarolineConcatto     do {
68564ab3302SCarolineConcatto       if (n < maxHollerith) {
68664ab3302SCarolineConcatto         n = 10 * n + DecimalDigitValue(*at_);
68764ab3302SCarolineConcatto       }
68864ab3302SCarolineConcatto       EmitCharAndAdvance(tokens, *at_);
68964ab3302SCarolineConcatto       ++digits;
69064ab3302SCarolineConcatto       if (InFixedFormSource()) {
69164ab3302SCarolineConcatto         SkipSpaces();
69264ab3302SCarolineConcatto       }
69364ab3302SCarolineConcatto     } while (IsDecimalDigit(*at_));
69464ab3302SCarolineConcatto     if ((*at_ == 'h' || *at_ == 'H') && n > 0 && n < maxHollerith &&
69564ab3302SCarolineConcatto         !preventHollerith_) {
69664ab3302SCarolineConcatto       Hollerith(tokens, n, start);
69764ab3302SCarolineConcatto     } else if (*at_ == '.') {
69864ab3302SCarolineConcatto       while (IsDecimalDigit(EmitCharAndAdvance(tokens, *at_))) {
69964ab3302SCarolineConcatto       }
70064ab3302SCarolineConcatto       ExponentAndKind(tokens);
70164ab3302SCarolineConcatto     } else if (ExponentAndKind(tokens)) {
70264ab3302SCarolineConcatto     } else if (digits == 1 && n == 0 && (*at_ == 'x' || *at_ == 'X') &&
70364ab3302SCarolineConcatto         inPreprocessorDirective_) {
70464ab3302SCarolineConcatto       do {
70564ab3302SCarolineConcatto         EmitCharAndAdvance(tokens, *at_);
70664ab3302SCarolineConcatto       } while (IsHexadecimalDigit(*at_));
70701def7f7Speter klausler     } else if (at_[0] == '_' && (at_[1] == '\'' || at_[1] == '"')) { // 4_"..."
70864ab3302SCarolineConcatto       EmitCharAndAdvance(tokens, *at_);
70964ab3302SCarolineConcatto       QuotedCharacterLiteral(tokens, start);
710776e25afSPeter Klausler     } else if (IsLetter(*at_) && !preventHollerith_ &&
711*4a3e4b99SPeter Klausler         parenthesisNesting_ > 0 &&
712*4a3e4b99SPeter Klausler         !preprocessor_.IsNameDefined(CharBlock{at_, 1})) {
713776e25afSPeter Klausler       // Handles FORMAT(3I9HHOLLERITH) by skipping over the first I so that
714*4a3e4b99SPeter Klausler       // we don't misrecognize I9HHOLLERITH as an identifier in the next case.
715776e25afSPeter Klausler       EmitCharAndAdvance(tokens, *at_);
71664ab3302SCarolineConcatto     }
71764ab3302SCarolineConcatto     preventHollerith_ = false;
71864ab3302SCarolineConcatto   } else if (*at_ == '.') {
71964ab3302SCarolineConcatto     char nch{EmitCharAndAdvance(tokens, '.')};
72064ab3302SCarolineConcatto     if (!inPreprocessorDirective_ && IsDecimalDigit(nch)) {
72164ab3302SCarolineConcatto       while (IsDecimalDigit(EmitCharAndAdvance(tokens, *at_))) {
72264ab3302SCarolineConcatto       }
72364ab3302SCarolineConcatto       ExponentAndKind(tokens);
72464ab3302SCarolineConcatto     } else if (nch == '.' && EmitCharAndAdvance(tokens, '.') == '.') {
72564ab3302SCarolineConcatto       EmitCharAndAdvance(tokens, '.'); // variadic macro definition ellipsis
72664ab3302SCarolineConcatto     }
72764ab3302SCarolineConcatto     preventHollerith_ = false;
72864ab3302SCarolineConcatto   } else if (IsLegalInIdentifier(*at_)) {
7294299d9b1SPeter Klausler     int parts{1};
7301db2859dSPeter Klausler     const char *afterLast{nullptr};
7314299d9b1SPeter Klausler     do {
7324299d9b1SPeter Klausler       EmitChar(tokens, *at_);
7334299d9b1SPeter Klausler       ++at_, ++column_;
7341db2859dSPeter Klausler       afterLast = at_;
7354299d9b1SPeter Klausler       if (SkipToNextSignificantCharacter() && IsLegalIdentifierStart(*at_)) {
7364299d9b1SPeter Klausler         tokens.CloseToken();
7374299d9b1SPeter Klausler         ++parts;
7384299d9b1SPeter Klausler       }
7394299d9b1SPeter Klausler     } while (IsLegalInIdentifier(*at_));
7404299d9b1SPeter Klausler     if (parts >= 3) {
7414299d9b1SPeter Klausler       // Subtlety: When an identifier is split across three or more continuation
7421db2859dSPeter Klausler       // lines (or two continuation lines, immediately preceded or followed
7431db2859dSPeter Klausler       // by '&' free form continuation line markers, its parts are kept as
7441324789aSPeter Klausler       // distinct pp-tokens so that macro replacement operates on them
7451324789aSPeter Klausler       // independently.  This trick accommodates the historic practice of
7461324789aSPeter Klausler       // using line continuation for token pasting after replacement.
7474299d9b1SPeter Klausler     } else if (parts == 2) {
7481324789aSPeter Klausler       if (afterLast && afterLast < limit_) {
7491324789aSPeter Klausler         afterLast = SkipWhiteSpace(afterLast);
7501324789aSPeter Klausler       }
7511db2859dSPeter Klausler       if ((start > start_ && start[-1] == '&') ||
7521324789aSPeter Klausler           (afterLast && afterLast < limit_ &&
7531324789aSPeter Klausler               (*afterLast == '&' || *afterLast == '\n'))) {
7541db2859dSPeter Klausler         // call &                call foo&        call foo&
7551db2859dSPeter Klausler         //   &MACRO&      OR       &MACRO&   OR     &MACRO
7561db2859dSPeter Klausler         //   &foo(...)             &(...)
7571db2859dSPeter Klausler       } else {
7584299d9b1SPeter Klausler         tokens.ReopenLastToken();
759a6569e57SPeter Klausler       }
7601db2859dSPeter Klausler     }
761a6569e57SPeter Klausler     if (InFixedFormSource()) {
762a6569e57SPeter Klausler       SkipSpaces();
763a6569e57SPeter Klausler     }
76401def7f7Speter klausler     if ((*at_ == '\'' || *at_ == '"') &&
76501def7f7Speter klausler         tokens.CharAt(tokens.SizeInChars() - 1) == '_') { // kind_"..."
76664ab3302SCarolineConcatto       QuotedCharacterLiteral(tokens, start);
767189c0833Speter klausler       preventHollerith_ = false;
768a6569e57SPeter Klausler     } else {
769a6569e57SPeter Klausler       preventHollerith_ = true; // DO 10 H = ...
770a6569e57SPeter Klausler     }
77164ab3302SCarolineConcatto   } else if (*at_ == '*') {
77264ab3302SCarolineConcatto     if (EmitCharAndAdvance(tokens, '*') == '*') {
77364ab3302SCarolineConcatto       EmitCharAndAdvance(tokens, '*');
77464ab3302SCarolineConcatto     } else {
77564ab3302SCarolineConcatto       // Subtle ambiguity:
77664ab3302SCarolineConcatto       //  CHARACTER*2H     declares H because *2 is a kind specifier
77764ab3302SCarolineConcatto       //  DATAC/N*2H  /    is repeated Hollerith
778189c0833Speter klausler       preventHollerith_ = !slashInCurrentStatement_;
77964ab3302SCarolineConcatto     }
78064ab3302SCarolineConcatto   } else {
78164ab3302SCarolineConcatto     char ch{*at_};
7826fac3f7bSPeter Klausler     if (ch == '(') {
7836fac3f7bSPeter Klausler       if (parenthesisNesting_++ == 0) {
7846fac3f7bSPeter Klausler         isPossibleMacroCall_ = tokens.SizeInTokens() > 0 &&
78550e1ad6eSRoger Ferrer Ibanez             preprocessor_.IsFunctionLikeDefinition(
7866fac3f7bSPeter Klausler                 tokens.TokenAt(tokens.SizeInTokens() - 1));
7876fac3f7bSPeter Klausler       }
7886fac3f7bSPeter Klausler     } else if (ch == ')' && parenthesisNesting_ > 0) {
7896fac3f7bSPeter Klausler       --parenthesisNesting_;
79064ab3302SCarolineConcatto     }
79164ab3302SCarolineConcatto     char nch{EmitCharAndAdvance(tokens, ch)};
79264ab3302SCarolineConcatto     preventHollerith_ = false;
79364ab3302SCarolineConcatto     if ((nch == '=' &&
79464ab3302SCarolineConcatto             (ch == '<' || ch == '>' || ch == '/' || ch == '=' || ch == '!')) ||
79564ab3302SCarolineConcatto         (ch == nch &&
79664ab3302SCarolineConcatto             (ch == '/' || ch == ':' || ch == '*' || ch == '#' || ch == '&' ||
79764ab3302SCarolineConcatto                 ch == '|' || ch == '<' || ch == '>')) ||
79864ab3302SCarolineConcatto         (ch == '=' && nch == '>')) {
79964ab3302SCarolineConcatto       // token comprises two characters
80064ab3302SCarolineConcatto       EmitCharAndAdvance(tokens, nch);
80164ab3302SCarolineConcatto     } else if (ch == '/') {
802189c0833Speter klausler       slashInCurrentStatement_ = true;
8037c84f6a4SPeter Klausler     } else if (ch == ';' && InFixedFormSource()) {
8047c84f6a4SPeter Klausler       SkipSpaces();
8057c84f6a4SPeter Klausler       if (IsDecimalDigit(*at_)) {
8061c91d9bdSPeter Klausler         if (features_.ShouldWarn(
8071c91d9bdSPeter Klausler                 common::LanguageFeature::MiscSourceExtensions)) {
8080f973ac7SPeter Klausler           Say(common::LanguageFeature::MiscSourceExtensions,
8090f973ac7SPeter Klausler               GetProvenanceRange(at_, at_ + 1),
8107c84f6a4SPeter Klausler               "Label should be in the label field"_port_en_US);
8117c84f6a4SPeter Klausler         }
81264ab3302SCarolineConcatto       }
81364ab3302SCarolineConcatto     }
8141c91d9bdSPeter Klausler   }
81564ab3302SCarolineConcatto   tokens.CloseToken();
81664ab3302SCarolineConcatto   return true;
81764ab3302SCarolineConcatto }
81864ab3302SCarolineConcatto 
81964ab3302SCarolineConcatto bool Prescanner::ExponentAndKind(TokenSequence &tokens) {
82064ab3302SCarolineConcatto   char ed{ToLowerCaseLetter(*at_)};
82164ab3302SCarolineConcatto   if (ed != 'e' && ed != 'd') {
82264ab3302SCarolineConcatto     return false;
82364ab3302SCarolineConcatto   }
824bb23ac65SPeter Klausler   // Do some look-ahead to ensure that this 'e'/'d' is an exponent,
825bb23ac65SPeter Klausler   // not the start of an identifier that could be a macro.
826bb23ac65SPeter Klausler   const char *p{at_};
827bb23ac65SPeter Klausler   if (int n{IsSpace(++p)}) {
828bb23ac65SPeter Klausler     p += n;
829bb23ac65SPeter Klausler   }
830bb23ac65SPeter Klausler   if (*p == '+' || *p == '-') {
831bb23ac65SPeter Klausler     if (int n{IsSpace(++p)}) {
832bb23ac65SPeter Klausler       p += n;
833bb23ac65SPeter Klausler     }
834bb23ac65SPeter Klausler   }
835bb23ac65SPeter Klausler   if (IsDecimalDigit(*p)) { // it's an exponent
83664ab3302SCarolineConcatto     EmitCharAndAdvance(tokens, ed);
83764ab3302SCarolineConcatto     if (*at_ == '+' || *at_ == '-') {
83864ab3302SCarolineConcatto       EmitCharAndAdvance(tokens, *at_);
83964ab3302SCarolineConcatto     }
84064ab3302SCarolineConcatto     while (IsDecimalDigit(*at_)) {
84164ab3302SCarolineConcatto       EmitCharAndAdvance(tokens, *at_);
84264ab3302SCarolineConcatto     }
84364ab3302SCarolineConcatto     if (*at_ == '_') {
84464ab3302SCarolineConcatto       while (IsLegalInIdentifier(EmitCharAndAdvance(tokens, *at_))) {
84564ab3302SCarolineConcatto       }
84664ab3302SCarolineConcatto     }
84764ab3302SCarolineConcatto     return true;
848bb23ac65SPeter Klausler   } else {
849bb23ac65SPeter Klausler     return false;
850bb23ac65SPeter Klausler   }
85164ab3302SCarolineConcatto }
85264ab3302SCarolineConcatto 
85364ab3302SCarolineConcatto void Prescanner::QuotedCharacterLiteral(
85464ab3302SCarolineConcatto     TokenSequence &tokens, const char *start) {
85564ab3302SCarolineConcatto   char quote{*at_};
85664ab3302SCarolineConcatto   const char *end{at_ + 1};
85764ab3302SCarolineConcatto   inCharLiteral_ = true;
858ea3a3b25SPeter Klausler   continuationInCharLiteral_ = true;
85964ab3302SCarolineConcatto   const auto emit{[&](char ch) { EmitChar(tokens, ch); }};
86064ab3302SCarolineConcatto   const auto insert{[&](char ch) { EmitInsertedChar(tokens, ch); }};
86164ab3302SCarolineConcatto   bool isEscaped{false};
86264ab3302SCarolineConcatto   bool escapesEnabled{features_.IsEnabled(LanguageFeature::BackslashEscapes)};
86364ab3302SCarolineConcatto   while (true) {
86464ab3302SCarolineConcatto     if (*at_ == '\\') {
86564ab3302SCarolineConcatto       if (escapesEnabled) {
86664ab3302SCarolineConcatto         isEscaped = !isEscaped;
86764ab3302SCarolineConcatto       } else {
86864ab3302SCarolineConcatto         // The parser always processes escape sequences, so don't confuse it
86964ab3302SCarolineConcatto         // when escapes are disabled.
87064ab3302SCarolineConcatto         insert('\\');
87164ab3302SCarolineConcatto       }
87264ab3302SCarolineConcatto     } else {
87364ab3302SCarolineConcatto       isEscaped = false;
87464ab3302SCarolineConcatto     }
87564ab3302SCarolineConcatto     EmitQuotedChar(static_cast<unsigned char>(*at_), emit, insert, false,
87664ab3302SCarolineConcatto         Encoding::LATIN_1);
87764ab3302SCarolineConcatto     while (PadOutCharacterLiteral(tokens)) {
87864ab3302SCarolineConcatto     }
87964ab3302SCarolineConcatto     if (*at_ == '\n') {
88064ab3302SCarolineConcatto       if (!inPreprocessorDirective_) {
88164ab3302SCarolineConcatto         Say(GetProvenanceRange(start, end),
88264ab3302SCarolineConcatto             "Incomplete character literal"_err_en_US);
88364ab3302SCarolineConcatto       }
88464ab3302SCarolineConcatto       break;
88564ab3302SCarolineConcatto     }
886cddbcd15SPeter Klausler     // Here's a weird edge case.  When there's a two or more following
887cddbcd15SPeter Klausler     // continuation lines at this point, and the entire significant part of
888cddbcd15SPeter Klausler     // the next continuation line is the name of a keyword macro, replace
889cddbcd15SPeter Klausler     // it in the character literal with its definition.  Example:
890cddbcd15SPeter Klausler     //   #define FOO foo
891cddbcd15SPeter Klausler     //   subroutine subr() bind(c, name="my_&
892cddbcd15SPeter Klausler     //     &FOO&
893cddbcd15SPeter Klausler     //     &_bar") ...
894cddbcd15SPeter Klausler     // produces a binding name of "my_foo_bar".
895cddbcd15SPeter Klausler     while (at_[1] == '&' && nextLine_ < limit_ && !InFixedFormSource()) {
896cddbcd15SPeter Klausler       const char *idStart{nextLine_};
897cddbcd15SPeter Klausler       if (const char *amper{SkipWhiteSpace(nextLine_)}; *amper == '&') {
898cddbcd15SPeter Klausler         idStart = amper + 1;
899cddbcd15SPeter Klausler       }
900cddbcd15SPeter Klausler       if (IsLegalIdentifierStart(*idStart)) {
901cddbcd15SPeter Klausler         std::size_t idLen{1};
902cddbcd15SPeter Klausler         for (; IsLegalInIdentifier(idStart[idLen]); ++idLen) {
903cddbcd15SPeter Klausler         }
904cddbcd15SPeter Klausler         if (idStart[idLen] == '&') {
905cddbcd15SPeter Klausler           CharBlock id{idStart, idLen};
906cddbcd15SPeter Klausler           if (preprocessor_.IsNameDefined(id)) {
907cddbcd15SPeter Klausler             TokenSequence ppTokens;
908cddbcd15SPeter Klausler             ppTokens.Put(id, GetProvenance(idStart));
909cddbcd15SPeter Klausler             if (auto replaced{
910cddbcd15SPeter Klausler                     preprocessor_.MacroReplacement(ppTokens, *this)}) {
911cddbcd15SPeter Klausler               tokens.Put(*replaced);
912cddbcd15SPeter Klausler               at_ = &idStart[idLen - 1];
913cddbcd15SPeter Klausler               NextLine();
914cddbcd15SPeter Klausler               continue; // try again on the next line
915cddbcd15SPeter Klausler             }
916cddbcd15SPeter Klausler           }
917cddbcd15SPeter Klausler         }
918cddbcd15SPeter Klausler       }
919cddbcd15SPeter Klausler       break;
920cddbcd15SPeter Klausler     }
92164ab3302SCarolineConcatto     end = at_ + 1;
92264ab3302SCarolineConcatto     NextChar();
92364ab3302SCarolineConcatto     if (*at_ == quote && !isEscaped) {
92464ab3302SCarolineConcatto       // A doubled unescaped quote mark becomes a single instance of that
92564ab3302SCarolineConcatto       // quote character in the literal (later).  There can be spaces between
92664ab3302SCarolineConcatto       // the quotes in fixed form source.
92764ab3302SCarolineConcatto       EmitChar(tokens, quote);
92864ab3302SCarolineConcatto       inCharLiteral_ = false; // for cases like print *, '...'!comment
92964ab3302SCarolineConcatto       NextChar();
93064ab3302SCarolineConcatto       if (InFixedFormSource()) {
93164ab3302SCarolineConcatto         SkipSpaces();
93264ab3302SCarolineConcatto       }
93364ab3302SCarolineConcatto       if (*at_ != quote) {
93464ab3302SCarolineConcatto         break;
93564ab3302SCarolineConcatto       }
93664ab3302SCarolineConcatto       inCharLiteral_ = true;
93728eec1bdSPeter Klausler     }
93864ab3302SCarolineConcatto   }
939ea3a3b25SPeter Klausler   continuationInCharLiteral_ = false;
94064ab3302SCarolineConcatto   inCharLiteral_ = false;
94164ab3302SCarolineConcatto }
94264ab3302SCarolineConcatto 
94364ab3302SCarolineConcatto void Prescanner::Hollerith(
94464ab3302SCarolineConcatto     TokenSequence &tokens, int count, const char *start) {
94564ab3302SCarolineConcatto   inCharLiteral_ = true;
94664ab3302SCarolineConcatto   CHECK(*at_ == 'h' || *at_ == 'H');
94764ab3302SCarolineConcatto   EmitChar(tokens, 'H');
94864ab3302SCarolineConcatto   while (count-- > 0) {
94964ab3302SCarolineConcatto     if (PadOutCharacterLiteral(tokens)) {
95064ab3302SCarolineConcatto     } else if (*at_ == '\n') {
951505f6da1SPeter Klausler       if (features_.ShouldWarn(common::UsageWarning::Scanning)) {
9520f973ac7SPeter Klausler         Say(common::UsageWarning::Scanning, GetProvenanceRange(start, at_),
953a53967cdSPeter Klausler             "Possible truncated Hollerith literal"_warn_en_US);
954505f6da1SPeter Klausler       }
95564ab3302SCarolineConcatto       break;
95664ab3302SCarolineConcatto     } else {
95764ab3302SCarolineConcatto       NextChar();
95864ab3302SCarolineConcatto       // Each multi-byte character encoding counts as a single character.
95964ab3302SCarolineConcatto       // No escape sequences are recognized.
96064ab3302SCarolineConcatto       // Hollerith is always emitted to the cooked character
96164ab3302SCarolineConcatto       // stream in UTF-8.
96264ab3302SCarolineConcatto       DecodedCharacter decoded{DecodeCharacter(
96364ab3302SCarolineConcatto           encoding_, at_, static_cast<std::size_t>(limit_ - at_), false)};
96464ab3302SCarolineConcatto       if (decoded.bytes > 0) {
96564ab3302SCarolineConcatto         EncodedCharacter utf8{
96664ab3302SCarolineConcatto             EncodeCharacter<Encoding::UTF_8>(decoded.codepoint)};
96764ab3302SCarolineConcatto         for (int j{0}; j < utf8.bytes; ++j) {
96864ab3302SCarolineConcatto           EmitChar(tokens, utf8.buffer[j]);
96964ab3302SCarolineConcatto         }
97064ab3302SCarolineConcatto         at_ += decoded.bytes - 1;
97164ab3302SCarolineConcatto       } else {
97264ab3302SCarolineConcatto         Say(GetProvenanceRange(start, at_),
97364ab3302SCarolineConcatto             "Bad character in Hollerith literal"_err_en_US);
97464ab3302SCarolineConcatto         break;
97564ab3302SCarolineConcatto       }
97664ab3302SCarolineConcatto     }
97764ab3302SCarolineConcatto   }
97864ab3302SCarolineConcatto   if (*at_ != '\n') {
97964ab3302SCarolineConcatto     NextChar();
98064ab3302SCarolineConcatto   }
98164ab3302SCarolineConcatto   inCharLiteral_ = false;
98264ab3302SCarolineConcatto }
98364ab3302SCarolineConcatto 
98464ab3302SCarolineConcatto // In fixed form, source card images must be processed as if they were at
98564ab3302SCarolineConcatto // least 72 columns wide, at least in character literal contexts.
98664ab3302SCarolineConcatto bool Prescanner::PadOutCharacterLiteral(TokenSequence &tokens) {
98764ab3302SCarolineConcatto   while (inFixedForm_ && !tabInCurrentLine_ && at_[1] == '\n') {
98864ab3302SCarolineConcatto     if (column_ < fixedFormColumnLimit_) {
98964ab3302SCarolineConcatto       tokens.PutNextTokenChar(' ', spaceProvenance_);
99064ab3302SCarolineConcatto       ++column_;
99164ab3302SCarolineConcatto       return true;
99264ab3302SCarolineConcatto     }
99364ab3302SCarolineConcatto     if (!FixedFormContinuation(false /*no need to insert space*/) ||
99464ab3302SCarolineConcatto         tabInCurrentLine_) {
99564ab3302SCarolineConcatto       return false;
99664ab3302SCarolineConcatto     }
99764ab3302SCarolineConcatto     CHECK(column_ == 7);
99864ab3302SCarolineConcatto     --at_; // point to column 6 of continuation line
99964ab3302SCarolineConcatto     column_ = 6;
100064ab3302SCarolineConcatto   }
100164ab3302SCarolineConcatto   return false;
100264ab3302SCarolineConcatto }
100364ab3302SCarolineConcatto 
10042849e119SKelvin Li static bool IsAtProcess(const char *p) {
10052849e119SKelvin Li   static const char pAtProc[]{"process"};
10062849e119SKelvin Li   for (std::size_t i{0}; i < sizeof pAtProc - 1; ++i) {
10072849e119SKelvin Li     if (ToLowerCaseLetter(*++p) != pAtProc[i])
10082849e119SKelvin Li       return false;
10092849e119SKelvin Li   }
10102849e119SKelvin Li   return true;
10112849e119SKelvin Li }
10122849e119SKelvin Li 
101364ab3302SCarolineConcatto bool Prescanner::IsFixedFormCommentLine(const char *start) const {
101464ab3302SCarolineConcatto   const char *p{start};
10152849e119SKelvin Li 
10162849e119SKelvin Li   // The @process directive must start in column 1.
10172849e119SKelvin Li   if (*p == '@' && IsAtProcess(p)) {
10182849e119SKelvin Li     return true;
10192849e119SKelvin Li   }
10202849e119SKelvin Li 
102164ab3302SCarolineConcatto   if (IsFixedFormCommentChar(*p) || *p == '%' || // VAX %list, %eject, &c.
102264ab3302SCarolineConcatto       ((*p == 'D' || *p == 'd') &&
102364ab3302SCarolineConcatto           !features_.IsEnabled(LanguageFeature::OldDebugLines))) {
102464ab3302SCarolineConcatto     return true;
102564ab3302SCarolineConcatto   }
102664ab3302SCarolineConcatto   bool anyTabs{false};
102764ab3302SCarolineConcatto   while (true) {
1028143f3fc4SPeter Klausler     if (int n{IsSpace(p)}) {
1029143f3fc4SPeter Klausler       p += n;
103064ab3302SCarolineConcatto     } else if (*p == '\t') {
103164ab3302SCarolineConcatto       anyTabs = true;
103264ab3302SCarolineConcatto       ++p;
103364ab3302SCarolineConcatto     } else if (*p == '0' && !anyTabs && p == start + 5) {
103464ab3302SCarolineConcatto       ++p; // 0 in column 6 must treated as a space
103564ab3302SCarolineConcatto     } else {
103664ab3302SCarolineConcatto       break;
103764ab3302SCarolineConcatto     }
103864ab3302SCarolineConcatto   }
103964ab3302SCarolineConcatto   if (!anyTabs && p >= start + fixedFormColumnLimit_) {
104064ab3302SCarolineConcatto     return true;
104164ab3302SCarolineConcatto   }
104264ab3302SCarolineConcatto   if (*p == '!' && !inCharLiteral_ && (anyTabs || p != start + 5)) {
104364ab3302SCarolineConcatto     return true;
104464ab3302SCarolineConcatto   }
104564ab3302SCarolineConcatto   return *p == '\n';
104664ab3302SCarolineConcatto }
104764ab3302SCarolineConcatto 
104864ab3302SCarolineConcatto const char *Prescanner::IsFreeFormComment(const char *p) const {
104964ab3302SCarolineConcatto   p = SkipWhiteSpaceAndCComments(p);
105064ab3302SCarolineConcatto   if (*p == '!' || *p == '\n') {
105164ab3302SCarolineConcatto     return p;
10522849e119SKelvin Li   } else if (*p == '@') {
10532849e119SKelvin Li     return IsAtProcess(p) ? p : nullptr;
105464ab3302SCarolineConcatto   } else {
105564ab3302SCarolineConcatto     return nullptr;
105664ab3302SCarolineConcatto   }
105764ab3302SCarolineConcatto }
105864ab3302SCarolineConcatto 
105964ab3302SCarolineConcatto std::optional<std::size_t> Prescanner::IsIncludeLine(const char *start) const {
10604dfed691SPeter Klausler   if (!expandIncludeLines_) {
10614dfed691SPeter Klausler     return std::nullopt;
10624dfed691SPeter Klausler   }
106364ab3302SCarolineConcatto   const char *p{SkipWhiteSpace(start)};
1064d78701e5SPeter Klausler   if (*p == '0' && inFixedForm_ && p == start + 5) {
1065d78701e5SPeter Klausler     // Accept "     0INCLUDE" in fixed form.
1066d78701e5SPeter Klausler     p = SkipWhiteSpace(p + 1);
1067d78701e5SPeter Klausler   }
1068d78701e5SPeter Klausler   for (const char *q{"include"}; *q; ++q) {
1069d78701e5SPeter Klausler     if (ToLowerCaseLetter(*p) != *q) {
107064ab3302SCarolineConcatto       return std::nullopt;
107164ab3302SCarolineConcatto     }
1072d78701e5SPeter Klausler     p = SkipWhiteSpace(p + 1);
107364ab3302SCarolineConcatto   }
1074d78701e5SPeter Klausler   if (IsDecimalDigit(*p)) { // accept & ignore a numeric kind prefix
1075d78701e5SPeter Klausler     for (p = SkipWhiteSpace(p + 1); IsDecimalDigit(*p);
1076d78701e5SPeter Klausler          p = SkipWhiteSpace(p + 1)) {
1077d78701e5SPeter Klausler     }
1078d78701e5SPeter Klausler     if (*p != '_') {
1079d78701e5SPeter Klausler       return std::nullopt;
1080d78701e5SPeter Klausler     }
1081d78701e5SPeter Klausler     p = SkipWhiteSpace(p + 1);
1082d78701e5SPeter Klausler   }
108364ab3302SCarolineConcatto   if (*p == '"' || *p == '\'') {
108464ab3302SCarolineConcatto     return {p - start};
108564ab3302SCarolineConcatto   }
108664ab3302SCarolineConcatto   return std::nullopt;
108764ab3302SCarolineConcatto }
108864ab3302SCarolineConcatto 
108964ab3302SCarolineConcatto void Prescanner::FortranInclude(const char *firstQuote) {
109064ab3302SCarolineConcatto   const char *p{firstQuote};
109164ab3302SCarolineConcatto   while (*p != '"' && *p != '\'') {
109264ab3302SCarolineConcatto     ++p;
109364ab3302SCarolineConcatto   }
109464ab3302SCarolineConcatto   char quote{*p};
109564ab3302SCarolineConcatto   std::string path;
109664ab3302SCarolineConcatto   for (++p; *p != '\n'; ++p) {
109764ab3302SCarolineConcatto     if (*p == quote) {
109864ab3302SCarolineConcatto       if (p[1] != quote) {
109964ab3302SCarolineConcatto         break;
110064ab3302SCarolineConcatto       }
110164ab3302SCarolineConcatto       ++p;
110264ab3302SCarolineConcatto     }
110364ab3302SCarolineConcatto     path += *p;
110464ab3302SCarolineConcatto   }
110564ab3302SCarolineConcatto   if (*p != quote) {
110664ab3302SCarolineConcatto     Say(GetProvenanceRange(firstQuote, p),
110764ab3302SCarolineConcatto         "malformed path name string"_err_en_US);
110864ab3302SCarolineConcatto     return;
110964ab3302SCarolineConcatto   }
111064ab3302SCarolineConcatto   p = SkipWhiteSpace(p + 1);
111164ab3302SCarolineConcatto   if (*p != '\n' && *p != '!') {
111264ab3302SCarolineConcatto     const char *garbage{p};
111364ab3302SCarolineConcatto     for (; *p != '\n' && *p != '!'; ++p) {
111464ab3302SCarolineConcatto     }
1115505f6da1SPeter Klausler     if (features_.ShouldWarn(common::UsageWarning::Scanning)) {
11160f973ac7SPeter Klausler       Say(common::UsageWarning::Scanning, GetProvenanceRange(garbage, p),
1117a53967cdSPeter Klausler           "excess characters after path name"_warn_en_US);
111864ab3302SCarolineConcatto     }
1119505f6da1SPeter Klausler   }
11208670e499SCaroline Concatto   std::string buf;
11218670e499SCaroline Concatto   llvm::raw_string_ostream error{buf};
112264ab3302SCarolineConcatto   Provenance provenance{GetProvenance(nextLine_)};
11236110e771Speter klausler   std::optional<std::string> prependPath;
11246110e771Speter klausler   if (const SourceFile * currentFile{allSources_.GetSourceFile(provenance)}) {
11256110e771Speter klausler     prependPath = DirectoryName(currentFile->path());
112664ab3302SCarolineConcatto   }
11276110e771Speter klausler   const SourceFile *included{
11286110e771Speter klausler       allSources_.Open(path, error, std::move(prependPath))};
112964ab3302SCarolineConcatto   if (!included) {
1130d5dd7d23SYoungsuk Kim     Say(provenance, "INCLUDE: %s"_err_en_US, buf);
113164ab3302SCarolineConcatto   } else if (included->bytes() > 0) {
113264ab3302SCarolineConcatto     ProvenanceRange includeLineRange{
113364ab3302SCarolineConcatto         provenance, static_cast<std::size_t>(p - nextLine_)};
113464ab3302SCarolineConcatto     ProvenanceRange fileRange{
113592a54197Speter klausler         allSources_.AddIncludedFile(*included, includeLineRange)};
1136fc1c481cSPeter Klausler     Preprocessor cleanPrepro{allSources_};
1137fc1c481cSPeter Klausler     if (preprocessor_.IsNameDefined("__FILE__"s)) {
1138fc1c481cSPeter Klausler       cleanPrepro.DefineStandardMacros(); // __FILE__, __LINE__, &c.
1139fc1c481cSPeter Klausler     }
1140fc1c481cSPeter Klausler     if (preprocessor_.IsNameDefined("_CUDA"s)) {
1141fc1c481cSPeter Klausler       cleanPrepro.Define("_CUDA"s, "1");
1142fc1c481cSPeter Klausler     }
1143fc1c481cSPeter Klausler     Prescanner{*this, cleanPrepro, /*isNestedInIncludeDirective=*/false}
11440525c201SPeter Klausler         .set_encoding(included->encoding())
11450525c201SPeter Klausler         .Prescan(fileRange);
114664ab3302SCarolineConcatto   }
114764ab3302SCarolineConcatto }
114864ab3302SCarolineConcatto 
114964ab3302SCarolineConcatto const char *Prescanner::IsPreprocessorDirectiveLine(const char *start) const {
115064ab3302SCarolineConcatto   const char *p{start};
1151143f3fc4SPeter Klausler   while (int n{IsSpace(p)}) {
1152143f3fc4SPeter Klausler     p += n;
115364ab3302SCarolineConcatto   }
115464ab3302SCarolineConcatto   if (*p == '#') {
115564ab3302SCarolineConcatto     if (inFixedForm_ && p == start + 5) {
115664ab3302SCarolineConcatto       return nullptr;
115764ab3302SCarolineConcatto     }
115864ab3302SCarolineConcatto   } else {
115964ab3302SCarolineConcatto     p = SkipWhiteSpace(p);
116064ab3302SCarolineConcatto     if (*p != '#') {
116164ab3302SCarolineConcatto       return nullptr;
116264ab3302SCarolineConcatto     }
116364ab3302SCarolineConcatto   }
116464ab3302SCarolineConcatto   return SkipWhiteSpace(p + 1);
116564ab3302SCarolineConcatto }
116664ab3302SCarolineConcatto 
116764ab3302SCarolineConcatto bool Prescanner::IsNextLinePreprocessorDirective() const {
116864ab3302SCarolineConcatto   return IsPreprocessorDirectiveLine(nextLine_) != nullptr;
116964ab3302SCarolineConcatto }
117064ab3302SCarolineConcatto 
117164ab3302SCarolineConcatto bool Prescanner::SkipCommentLine(bool afterAmpersand) {
11725881bf00Speter klausler   if (IsAtEnd()) {
117364ab3302SCarolineConcatto     if (afterAmpersand && prescannerNesting_ > 0) {
117464ab3302SCarolineConcatto       // A continuation marker at the end of the last line in an
117564ab3302SCarolineConcatto       // include file inhibits the newline for that line.
117664ab3302SCarolineConcatto       SkipToEndOfLine();
117764ab3302SCarolineConcatto       omitNewline_ = true;
117864ab3302SCarolineConcatto     }
1179f099f76bSPeter Klausler   } else if (inPreprocessorDirective_) {
1180f099f76bSPeter Klausler   } else {
118164ab3302SCarolineConcatto     auto lineClass{ClassifyLine(nextLine_)};
118264ab3302SCarolineConcatto     if (lineClass.kind == LineClassification::Kind::Comment) {
118364ab3302SCarolineConcatto       NextLine();
118464ab3302SCarolineConcatto       return true;
118564ab3302SCarolineConcatto     } else if (lineClass.kind ==
118664ab3302SCarolineConcatto             LineClassification::Kind::ConditionalCompilationDirective ||
118764ab3302SCarolineConcatto         lineClass.kind == LineClassification::Kind::PreprocessorDirective) {
118864ab3302SCarolineConcatto       // Allow conditional compilation directives (e.g., #ifdef) to affect
118964ab3302SCarolineConcatto       // continuation lines.
119064ab3302SCarolineConcatto       // Allow other preprocessor directives, too, except #include
119164ab3302SCarolineConcatto       // (when it does not follow '&'), #define, and #undef (because
119264ab3302SCarolineConcatto       // they cannot be allowed to affect preceding text on a
119364ab3302SCarolineConcatto       // continued line).
1194f411be0dSpeter klausler       preprocessor_.Directive(TokenizePreprocessorDirective(), *this);
119564ab3302SCarolineConcatto       return true;
1196f099f76bSPeter Klausler     } else if (afterAmpersand &&
1197f099f76bSPeter Klausler         (lineClass.kind == LineClassification::Kind::DefinitionDirective ||
1198f099f76bSPeter Klausler             lineClass.kind == LineClassification::Kind::IncludeDirective ||
1199f099f76bSPeter Klausler             lineClass.kind == LineClassification::Kind::IncludeLine)) {
1200f099f76bSPeter Klausler       SkipToEndOfLine();
1201f099f76bSPeter Klausler       omitNewline_ = true;
1202f099f76bSPeter Klausler       skipLeadingAmpersand_ = true;
120364ab3302SCarolineConcatto     }
120464ab3302SCarolineConcatto   }
1205f099f76bSPeter Klausler   return false;
1206f099f76bSPeter Klausler }
120764ab3302SCarolineConcatto 
120864ab3302SCarolineConcatto const char *Prescanner::FixedFormContinuationLine(bool mightNeedSpace) {
12095881bf00Speter klausler   if (IsAtEnd()) {
121064ab3302SCarolineConcatto     return nullptr;
121164ab3302SCarolineConcatto   }
121264ab3302SCarolineConcatto   tabInCurrentLine_ = false;
121364ab3302SCarolineConcatto   char col1{*nextLine_};
12148b91de5dSSiHuaN   if (IsFixedFormCommentChar(col1)) {
12158b91de5dSSiHuaN     int j{1};
121664ab3302SCarolineConcatto     if (InCompilerDirective()) {
121764ab3302SCarolineConcatto       // Must be a continued compiler directive.
121864ab3302SCarolineConcatto       for (; j < 5; ++j) {
121964ab3302SCarolineConcatto         char ch{directiveSentinel_[j - 1]};
122064ab3302SCarolineConcatto         if (ch == '\0') {
122164ab3302SCarolineConcatto           break;
122264ab3302SCarolineConcatto         }
122364ab3302SCarolineConcatto         if (ch != ToLowerCaseLetter(nextLine_[j])) {
122464ab3302SCarolineConcatto           return nullptr;
122564ab3302SCarolineConcatto         }
122664ab3302SCarolineConcatto       }
12278b91de5dSSiHuaN     } else if (features_.IsEnabled(LanguageFeature::OpenMP)) {
12288b91de5dSSiHuaN       // Fixed Source Form Conditional Compilation Sentinels.
12298b91de5dSSiHuaN       if (nextLine_[1] != '$') {
12308b91de5dSSiHuaN         return nullptr;
12318b91de5dSSiHuaN       }
12328b91de5dSSiHuaN       j++;
12338b91de5dSSiHuaN     } else {
12348b91de5dSSiHuaN       return nullptr;
12358b91de5dSSiHuaN     }
123664ab3302SCarolineConcatto     for (; j < 5; ++j) {
123764ab3302SCarolineConcatto       if (nextLine_[j] != ' ') {
123864ab3302SCarolineConcatto         return nullptr;
123964ab3302SCarolineConcatto       }
124064ab3302SCarolineConcatto     }
1241143f3fc4SPeter Klausler     const char *col6{nextLine_ + 5};
1242143f3fc4SPeter Klausler     if (*col6 != '\n' && *col6 != '0' && !IsSpaceOrTab(col6)) {
1243143f3fc4SPeter Klausler       if (mightNeedSpace && !IsSpace(nextLine_ + 6)) {
124464ab3302SCarolineConcatto         insertASpace_ = true;
124564ab3302SCarolineConcatto       }
124664ab3302SCarolineConcatto       return nextLine_ + 6;
124764ab3302SCarolineConcatto     }
124864ab3302SCarolineConcatto     return nullptr;
124964ab3302SCarolineConcatto   } else {
125064ab3302SCarolineConcatto     // Normal case: not in a compiler directive.
125164ab3302SCarolineConcatto     if (col1 == '&' &&
125264ab3302SCarolineConcatto         features_.IsEnabled(
125364ab3302SCarolineConcatto             LanguageFeature::FixedFormContinuationWithColumn1Ampersand)) {
125464ab3302SCarolineConcatto       // Extension: '&' as continuation marker
125564ab3302SCarolineConcatto       if (features_.ShouldWarn(
125664ab3302SCarolineConcatto               LanguageFeature::FixedFormContinuationWithColumn1Ampersand)) {
12570f973ac7SPeter Klausler         Say(LanguageFeature::FixedFormContinuationWithColumn1Ampersand,
12580f973ac7SPeter Klausler             GetProvenance(nextLine_), "nonstandard usage"_port_en_US);
125964ab3302SCarolineConcatto       }
126064ab3302SCarolineConcatto       return nextLine_ + 1;
126164ab3302SCarolineConcatto     }
126264ab3302SCarolineConcatto     if (col1 == '\t' && nextLine_[1] >= '1' && nextLine_[1] <= '9') {
126364ab3302SCarolineConcatto       tabInCurrentLine_ = true;
126464ab3302SCarolineConcatto       return nextLine_ + 2; // VAX extension
126564ab3302SCarolineConcatto     }
1266fa44ec72SPeter Klausler     if ((col1 == ' ' ||
1267fa44ec72SPeter Klausler             ((col1 == 'D' || col1 == 'd') &&
1268fa44ec72SPeter Klausler                 features_.IsEnabled(LanguageFeature::OldDebugLines))) &&
1269fa44ec72SPeter Klausler         nextLine_[1] == ' ' && nextLine_[2] == ' ' && nextLine_[3] == ' ' &&
1270fa44ec72SPeter Klausler         nextLine_[4] == ' ') {
1271143f3fc4SPeter Klausler       const char *col6{nextLine_ + 5};
1272143f3fc4SPeter Klausler       if (*col6 != '\n' && *col6 != '0' && !IsSpaceOrTab(col6)) {
1273143f3fc4SPeter Klausler         if ((*col6 == 'i' || *col6 == 'I') && IsIncludeLine(nextLine_)) {
1274d78701e5SPeter Klausler           // It's An INCLUDE line, not a continuation
1275d78701e5SPeter Klausler         } else {
127664ab3302SCarolineConcatto           return nextLine_ + 6;
127764ab3302SCarolineConcatto         }
127864ab3302SCarolineConcatto       }
1279d78701e5SPeter Klausler     }
1280320389e8Speter klausler     if (IsImplicitContinuation()) {
128164ab3302SCarolineConcatto       return nextLine_;
128264ab3302SCarolineConcatto     }
128364ab3302SCarolineConcatto   }
128464ab3302SCarolineConcatto   return nullptr; // not a continuation line
128564ab3302SCarolineConcatto }
128664ab3302SCarolineConcatto 
128764ab3302SCarolineConcatto const char *Prescanner::FreeFormContinuationLine(bool ampersand) {
128864ab3302SCarolineConcatto   const char *p{nextLine_};
128964ab3302SCarolineConcatto   if (p >= limit_) {
129064ab3302SCarolineConcatto     return nullptr;
129164ab3302SCarolineConcatto   }
129264ab3302SCarolineConcatto   p = SkipWhiteSpace(p);
129319c93483Skd0608   if (*p == '!') {
129419c93483Skd0608     ++p;
129564ab3302SCarolineConcatto     if (InCompilerDirective()) {
129664ab3302SCarolineConcatto       for (const char *s{directiveSentinel_}; *s != '\0'; ++p, ++s) {
129764ab3302SCarolineConcatto         if (*s != ToLowerCaseLetter(*p)) {
129864ab3302SCarolineConcatto           return nullptr;
129964ab3302SCarolineConcatto         }
130064ab3302SCarolineConcatto       }
130119c93483Skd0608     } else if (features_.IsEnabled(LanguageFeature::OpenMP) && *p == '$') {
130219c93483Skd0608       ++p;
130319c93483Skd0608     } else {
130419c93483Skd0608       return nullptr;
130519c93483Skd0608     }
130664ab3302SCarolineConcatto     p = SkipWhiteSpace(p);
130764ab3302SCarolineConcatto     if (*p == '&') {
130864ab3302SCarolineConcatto       if (!ampersand) {
130964ab3302SCarolineConcatto         insertASpace_ = true;
131064ab3302SCarolineConcatto       }
131164ab3302SCarolineConcatto       return p + 1;
131264ab3302SCarolineConcatto     } else if (ampersand) {
131364ab3302SCarolineConcatto       return p;
131464ab3302SCarolineConcatto     } else {
131564ab3302SCarolineConcatto       return nullptr;
131664ab3302SCarolineConcatto     }
131764ab3302SCarolineConcatto   } else {
131864ab3302SCarolineConcatto     if (*p == '&') {
131964ab3302SCarolineConcatto       return p + 1;
132064ab3302SCarolineConcatto     } else if (*p == '!' || *p == '\n' || *p == '#') {
132164ab3302SCarolineConcatto       return nullptr;
1322320389e8Speter klausler     } else if (ampersand || IsImplicitContinuation()) {
1323ea3a3b25SPeter Klausler       if (continuationInCharLiteral_) {
1324ea3a3b25SPeter Klausler         // 'a'&            -> 'a''b' == "a'b"
1325ea3a3b25SPeter Klausler         //   'b'
1326ea3a3b25SPeter Klausler         if (features_.ShouldWarn(
1327ea3a3b25SPeter Klausler                 common::LanguageFeature::MiscSourceExtensions)) {
13280f973ac7SPeter Klausler           Say(common::LanguageFeature::MiscSourceExtensions,
13290f973ac7SPeter Klausler               GetProvenanceRange(p, p + 1),
1330ea3a3b25SPeter Klausler               "Character literal continuation line should have been preceded by '&'"_port_en_US);
1331ea3a3b25SPeter Klausler         }
1332ea3a3b25SPeter Klausler       } else if (p > nextLine_) {
133364ab3302SCarolineConcatto         --p;
133464ab3302SCarolineConcatto       } else {
133564ab3302SCarolineConcatto         insertASpace_ = true;
133664ab3302SCarolineConcatto       }
133764ab3302SCarolineConcatto       return p;
133864ab3302SCarolineConcatto     } else {
133964ab3302SCarolineConcatto       return nullptr;
134064ab3302SCarolineConcatto     }
134164ab3302SCarolineConcatto   }
134264ab3302SCarolineConcatto }
134364ab3302SCarolineConcatto 
134464ab3302SCarolineConcatto bool Prescanner::FixedFormContinuation(bool mightNeedSpace) {
134564ab3302SCarolineConcatto   // N.B. We accept '&' as a continuation indicator in fixed form, too,
134664ab3302SCarolineConcatto   // but not in a character literal.
134764ab3302SCarolineConcatto   if (*at_ == '&' && inCharLiteral_) {
134864ab3302SCarolineConcatto     return false;
134964ab3302SCarolineConcatto   }
135064ab3302SCarolineConcatto   do {
135164ab3302SCarolineConcatto     if (const char *cont{FixedFormContinuationLine(mightNeedSpace)}) {
135264ab3302SCarolineConcatto       BeginSourceLine(cont);
135364ab3302SCarolineConcatto       column_ = 7;
135464ab3302SCarolineConcatto       NextLine();
135564ab3302SCarolineConcatto       return true;
135664ab3302SCarolineConcatto     }
135764ab3302SCarolineConcatto   } while (SkipCommentLine(false /* not after ampersand */));
135864ab3302SCarolineConcatto   return false;
135964ab3302SCarolineConcatto }
136064ab3302SCarolineConcatto 
136164ab3302SCarolineConcatto bool Prescanner::FreeFormContinuation() {
136264ab3302SCarolineConcatto   const char *p{at_};
136364ab3302SCarolineConcatto   bool ampersand{*p == '&'};
136464ab3302SCarolineConcatto   if (ampersand) {
136564ab3302SCarolineConcatto     p = SkipWhiteSpace(p + 1);
136664ab3302SCarolineConcatto   }
136764ab3302SCarolineConcatto   if (*p != '\n') {
136864ab3302SCarolineConcatto     if (inCharLiteral_) {
136964ab3302SCarolineConcatto       return false;
1370f706411fSPeter Klausler     } else if (*p == '!') { // & ! comment - ok
1371f706411fSPeter Klausler     } else if (ampersand && isPossibleMacroCall_ && (*p == ',' || *p == ')')) {
1372f706411fSPeter Klausler       return false; // allow & at end of a macro argument
1373f706411fSPeter Klausler     } else if (features_.ShouldWarn(LanguageFeature::CruftAfterAmpersand)) {
13740f973ac7SPeter Klausler       Say(LanguageFeature::CruftAfterAmpersand, GetProvenance(p),
13750f973ac7SPeter Klausler           "missing ! before comment after &"_warn_en_US);
137664ab3302SCarolineConcatto     }
137764ab3302SCarolineConcatto   }
137864ab3302SCarolineConcatto   do {
137964ab3302SCarolineConcatto     if (const char *cont{FreeFormContinuationLine(ampersand)}) {
138064ab3302SCarolineConcatto       BeginSourceLine(cont);
138164ab3302SCarolineConcatto       NextLine();
138264ab3302SCarolineConcatto       return true;
138364ab3302SCarolineConcatto     }
138464ab3302SCarolineConcatto   } while (SkipCommentLine(ampersand));
138564ab3302SCarolineConcatto   return false;
138664ab3302SCarolineConcatto }
138764ab3302SCarolineConcatto 
1388320389e8Speter klausler // Implicit line continuation allows a preprocessor macro call with
1389320389e8Speter klausler // arguments to span multiple lines.
1390320389e8Speter klausler bool Prescanner::IsImplicitContinuation() const {
13916fac3f7bSPeter Klausler   return !inPreprocessorDirective_ && !inCharLiteral_ && isPossibleMacroCall_ &&
139250e1ad6eSRoger Ferrer Ibanez       parenthesisNesting_ > 0 && !IsAtEnd() &&
1393320389e8Speter klausler       ClassifyLine(nextLine_).kind == LineClassification::Kind::Source;
1394320389e8Speter klausler }
1395320389e8Speter klausler 
139664ab3302SCarolineConcatto bool Prescanner::Continuation(bool mightNeedFixedFormSpace) {
1397e286ecfeSPeter Klausler   if (disableSourceContinuation_) {
1398e286ecfeSPeter Klausler     return false;
1399e286ecfeSPeter Klausler   } else if (*at_ == '\n' || *at_ == '&') {
140064ab3302SCarolineConcatto     if (inFixedForm_) {
140164ab3302SCarolineConcatto       return FixedFormContinuation(mightNeedFixedFormSpace);
140264ab3302SCarolineConcatto     } else {
140364ab3302SCarolineConcatto       return FreeFormContinuation();
140464ab3302SCarolineConcatto     }
14058b512e52SPeter Klausler   } else if (*at_ == '\\' && at_ + 2 == nextLine_ &&
14068b512e52SPeter Klausler       backslashFreeFormContinuation_ && !inFixedForm_ && nextLine_ < limit_) {
14078b512e52SPeter Klausler     // cpp-like handling of \ at end of a free form source line
14088b512e52SPeter Klausler     BeginSourceLine(nextLine_);
14098b512e52SPeter Klausler     NextLine();
14108b512e52SPeter Klausler     return true;
1411e286ecfeSPeter Klausler   } else {
14128b512e52SPeter Klausler     return false;
141364ab3302SCarolineConcatto   }
1414e286ecfeSPeter Klausler }
141564ab3302SCarolineConcatto 
141664ab3302SCarolineConcatto std::optional<Prescanner::LineClassification>
141764ab3302SCarolineConcatto Prescanner::IsFixedFormCompilerDirectiveLine(const char *start) const {
141864ab3302SCarolineConcatto   const char *p{start};
141964ab3302SCarolineConcatto   char col1{*p++};
142064ab3302SCarolineConcatto   if (!IsFixedFormCommentChar(col1)) {
142164ab3302SCarolineConcatto     return std::nullopt;
142264ab3302SCarolineConcatto   }
142364ab3302SCarolineConcatto   char sentinel[5], *sp{sentinel};
142464ab3302SCarolineConcatto   int column{2};
142564ab3302SCarolineConcatto   for (; column < 6; ++column, ++p) {
1426143f3fc4SPeter Klausler     if (*p == '\n' || IsSpaceOrTab(p)) {
142764ab3302SCarolineConcatto       break;
142864ab3302SCarolineConcatto     }
142964ab3302SCarolineConcatto     if (sp == sentinel + 1 && sentinel[0] == '$' && IsDecimalDigit(*p)) {
143064ab3302SCarolineConcatto       // OpenMP conditional compilation line: leave the label alone
143164ab3302SCarolineConcatto       break;
143264ab3302SCarolineConcatto     }
143364ab3302SCarolineConcatto     *sp++ = ToLowerCaseLetter(*p);
143464ab3302SCarolineConcatto   }
143564ab3302SCarolineConcatto   if (column == 6) {
1436143f3fc4SPeter Klausler     if (*p == '0') {
143764ab3302SCarolineConcatto       ++p;
1438143f3fc4SPeter Klausler     } else if (int n{IsSpaceOrTab(p)}) {
1439143f3fc4SPeter Klausler       p += n;
144064ab3302SCarolineConcatto     } else {
144164ab3302SCarolineConcatto       // This is a Continuation line, not an initial directive line.
144264ab3302SCarolineConcatto       return std::nullopt;
144364ab3302SCarolineConcatto     }
144464ab3302SCarolineConcatto   }
144564ab3302SCarolineConcatto   if (sp == sentinel) {
144664ab3302SCarolineConcatto     return std::nullopt;
144764ab3302SCarolineConcatto   }
144864ab3302SCarolineConcatto   *sp = '\0';
1449cbc5d42fSPeter Klausler   if (const char *ss{IsCompilerDirectiveSentinel(
1450cbc5d42fSPeter Klausler           sentinel, static_cast<std::size_t>(sp - sentinel))}) {
145164ab3302SCarolineConcatto     std::size_t payloadOffset = p - start;
145264ab3302SCarolineConcatto     return {LineClassification{
145364ab3302SCarolineConcatto         LineClassification::Kind::CompilerDirective, payloadOffset, ss}};
145464ab3302SCarolineConcatto   }
145564ab3302SCarolineConcatto   return std::nullopt;
145664ab3302SCarolineConcatto }
145764ab3302SCarolineConcatto 
145864ab3302SCarolineConcatto std::optional<Prescanner::LineClassification>
145964ab3302SCarolineConcatto Prescanner::IsFreeFormCompilerDirectiveLine(const char *start) const {
1460259ce119SPeter Klausler   if (const char *p{SkipWhiteSpace(start)}; p && *p++ == '!') {
1461259ce119SPeter Klausler     if (auto maybePair{IsCompilerDirectiveSentinel(p)}) {
1462259ce119SPeter Klausler       auto offset{static_cast<std::size_t>(maybePair->second - start)};
1463259ce119SPeter Klausler       return {LineClassification{LineClassification::Kind::CompilerDirective,
1464259ce119SPeter Klausler           offset, maybePair->first}};
146564ab3302SCarolineConcatto     }
146664ab3302SCarolineConcatto   }
146764ab3302SCarolineConcatto   return std::nullopt;
146864ab3302SCarolineConcatto }
146964ab3302SCarolineConcatto 
147064ab3302SCarolineConcatto Prescanner &Prescanner::AddCompilerDirectiveSentinel(const std::string &dir) {
147164ab3302SCarolineConcatto   std::uint64_t packed{0};
147264ab3302SCarolineConcatto   for (char ch : dir) {
147364ab3302SCarolineConcatto     packed = (packed << 8) | (ToLowerCaseLetter(ch) & 0xff);
147464ab3302SCarolineConcatto   }
147564ab3302SCarolineConcatto   compilerDirectiveBloomFilter_.set(packed % prime1);
147664ab3302SCarolineConcatto   compilerDirectiveBloomFilter_.set(packed % prime2);
147764ab3302SCarolineConcatto   compilerDirectiveSentinels_.insert(dir);
147864ab3302SCarolineConcatto   return *this;
147964ab3302SCarolineConcatto }
148064ab3302SCarolineConcatto 
148164ab3302SCarolineConcatto const char *Prescanner::IsCompilerDirectiveSentinel(
1482cbc5d42fSPeter Klausler     const char *sentinel, std::size_t len) const {
148364ab3302SCarolineConcatto   std::uint64_t packed{0};
1484cbc5d42fSPeter Klausler   for (std::size_t j{0}; j < len; ++j) {
1485cbc5d42fSPeter Klausler     packed = (packed << 8) | (sentinel[j] & 0xff);
148664ab3302SCarolineConcatto   }
1487cbc5d42fSPeter Klausler   if (len == 0 || !compilerDirectiveBloomFilter_.test(packed % prime1) ||
148864ab3302SCarolineConcatto       !compilerDirectiveBloomFilter_.test(packed % prime2)) {
148964ab3302SCarolineConcatto     return nullptr;
149064ab3302SCarolineConcatto   }
1491cbc5d42fSPeter Klausler   const auto iter{compilerDirectiveSentinels_.find(std::string(sentinel, len))};
149264ab3302SCarolineConcatto   return iter == compilerDirectiveSentinels_.end() ? nullptr : iter->c_str();
149364ab3302SCarolineConcatto }
149464ab3302SCarolineConcatto 
1495297230acSPeter Klausler const char *Prescanner::IsCompilerDirectiveSentinel(CharBlock token) const {
1496297230acSPeter Klausler   const char *p{token.begin()};
1497297230acSPeter Klausler   const char *end{p + token.size()};
1498297230acSPeter Klausler   while (p < end && (*p == ' ' || *p == '\n')) {
1499297230acSPeter Klausler     ++p;
1500297230acSPeter Klausler   }
1501297230acSPeter Klausler   if (p < end && *p == '!') {
1502297230acSPeter Klausler     ++p;
1503297230acSPeter Klausler   }
1504297230acSPeter Klausler   while (end > p && (end[-1] == ' ' || end[-1] == '\t')) {
1505297230acSPeter Klausler     --end;
1506297230acSPeter Klausler   }
1507297230acSPeter Klausler   return end > p && IsCompilerDirectiveSentinel(p, end - p) ? p : nullptr;
1508297230acSPeter Klausler }
1509297230acSPeter Klausler 
1510259ce119SPeter Klausler std::optional<std::pair<const char *, const char *>>
1511259ce119SPeter Klausler Prescanner::IsCompilerDirectiveSentinel(const char *p) const {
1512259ce119SPeter Klausler   char sentinel[8];
1513259ce119SPeter Klausler   for (std::size_t j{0}; j + 1 < sizeof sentinel && *p != '\n'; ++p, ++j) {
1514143f3fc4SPeter Klausler     if (int n{*p == '&' ? 1 : IsSpaceOrTab(p)}) {
1515259ce119SPeter Klausler       if (j > 0) {
1516259ce119SPeter Klausler         sentinel[j] = '\0';
1517143f3fc4SPeter Klausler         p = SkipWhiteSpace(p + n);
1518259ce119SPeter Klausler         if (*p != '!') {
1519259ce119SPeter Klausler           if (const char *sp{IsCompilerDirectiveSentinel(sentinel, j)}) {
1520259ce119SPeter Klausler             return std::make_pair(sp, p);
1521259ce119SPeter Klausler           }
1522259ce119SPeter Klausler         }
1523259ce119SPeter Klausler       }
1524259ce119SPeter Klausler       break;
1525259ce119SPeter Klausler     } else {
1526259ce119SPeter Klausler       sentinel[j] = ToLowerCaseLetter(*p);
1527259ce119SPeter Klausler     }
1528259ce119SPeter Klausler   }
1529259ce119SPeter Klausler   return std::nullopt;
1530259ce119SPeter Klausler }
1531259ce119SPeter Klausler 
1532089adc33Speter klausler constexpr bool IsDirective(const char *match, const char *dir) {
1533089adc33Speter klausler   for (; *match; ++match) {
1534089adc33Speter klausler     if (*match != ToLowerCaseLetter(*dir++)) {
1535089adc33Speter klausler       return false;
1536089adc33Speter klausler     }
1537089adc33Speter klausler   }
1538089adc33Speter klausler   return true;
1539089adc33Speter klausler }
1540089adc33Speter klausler 
154164ab3302SCarolineConcatto Prescanner::LineClassification Prescanner::ClassifyLine(
154264ab3302SCarolineConcatto     const char *start) const {
154364ab3302SCarolineConcatto   if (inFixedForm_) {
154464ab3302SCarolineConcatto     if (std::optional<LineClassification> lc{
154564ab3302SCarolineConcatto             IsFixedFormCompilerDirectiveLine(start)}) {
154664ab3302SCarolineConcatto       return std::move(*lc);
154764ab3302SCarolineConcatto     }
154864ab3302SCarolineConcatto     if (IsFixedFormCommentLine(start)) {
154964ab3302SCarolineConcatto       return {LineClassification::Kind::Comment};
155064ab3302SCarolineConcatto     }
155164ab3302SCarolineConcatto   } else {
155264ab3302SCarolineConcatto     if (std::optional<LineClassification> lc{
155364ab3302SCarolineConcatto             IsFreeFormCompilerDirectiveLine(start)}) {
155464ab3302SCarolineConcatto       return std::move(*lc);
155564ab3302SCarolineConcatto     }
155664ab3302SCarolineConcatto     if (const char *bang{IsFreeFormComment(start)}) {
155764ab3302SCarolineConcatto       return {LineClassification::Kind::Comment,
155864ab3302SCarolineConcatto           static_cast<std::size_t>(bang - start)};
155964ab3302SCarolineConcatto     }
156064ab3302SCarolineConcatto   }
156164ab3302SCarolineConcatto   if (std::optional<std::size_t> quoteOffset{IsIncludeLine(start)}) {
156264ab3302SCarolineConcatto     return {LineClassification::Kind::IncludeLine, *quoteOffset};
156364ab3302SCarolineConcatto   }
156464ab3302SCarolineConcatto   if (const char *dir{IsPreprocessorDirectiveLine(start)}) {
1565089adc33Speter klausler     if (IsDirective("if", dir) || IsDirective("elif", dir) ||
1566089adc33Speter klausler         IsDirective("else", dir) || IsDirective("endif", dir)) {
156764ab3302SCarolineConcatto       return {LineClassification::Kind::ConditionalCompilationDirective};
1568089adc33Speter klausler     } else if (IsDirective("include", dir)) {
156964ab3302SCarolineConcatto       return {LineClassification::Kind::IncludeDirective};
1570089adc33Speter klausler     } else if (IsDirective("define", dir) || IsDirective("undef", dir)) {
157164ab3302SCarolineConcatto       return {LineClassification::Kind::DefinitionDirective};
157264ab3302SCarolineConcatto     } else {
157364ab3302SCarolineConcatto       return {LineClassification::Kind::PreprocessorDirective};
157464ab3302SCarolineConcatto     }
157564ab3302SCarolineConcatto   }
157664ab3302SCarolineConcatto   return {LineClassification::Kind::Source};
157764ab3302SCarolineConcatto }
157864ab3302SCarolineConcatto 
1579e286ecfeSPeter Klausler Prescanner::LineClassification Prescanner::ClassifyLine(
1580e286ecfeSPeter Klausler     TokenSequence &tokens, Provenance newlineProvenance) const {
1581e286ecfeSPeter Klausler   // Append a newline temporarily.
1582e286ecfeSPeter Klausler   tokens.PutNextTokenChar('\n', newlineProvenance);
1583e286ecfeSPeter Klausler   tokens.CloseToken();
1584e286ecfeSPeter Klausler   const char *ppd{tokens.ToCharBlock().begin()};
1585e286ecfeSPeter Klausler   LineClassification classification{ClassifyLine(ppd)};
1586e286ecfeSPeter Klausler   tokens.pop_back(); // remove the newline
1587e286ecfeSPeter Klausler   return classification;
1588e286ecfeSPeter Klausler }
1589e286ecfeSPeter Klausler 
159064ab3302SCarolineConcatto void Prescanner::SourceFormChange(std::string &&dir) {
159164ab3302SCarolineConcatto   if (dir == "!dir$ free") {
159264ab3302SCarolineConcatto     inFixedForm_ = false;
159364ab3302SCarolineConcatto   } else if (dir == "!dir$ fixed") {
159464ab3302SCarolineConcatto     inFixedForm_ = true;
159564ab3302SCarolineConcatto   }
159664ab3302SCarolineConcatto }
1597f706411fSPeter Klausler 
1598f706411fSPeter Klausler // Acquire and append compiler directive continuation lines to
1599f706411fSPeter Klausler // the tokens that constitute a compiler directive, even when those
1600f706411fSPeter Klausler // directive continuation lines are the result of macro expansion.
1601f706411fSPeter Klausler // (Not used when neither the original compiler directive line nor
1602f706411fSPeter Klausler // the directive continuation line result from preprocessing; regular
1603f706411fSPeter Klausler // line continuation during tokenization handles that normal case.)
1604f706411fSPeter Klausler bool Prescanner::CompilerDirectiveContinuation(
1605f706411fSPeter Klausler     TokenSequence &tokens, const char *origSentinel) {
1606f706411fSPeter Klausler   if (inFixedForm_ || tokens.empty() ||
1607f706411fSPeter Klausler       tokens.TokenAt(tokens.SizeInTokens() - 1) != "&") {
1608f706411fSPeter Klausler     return false;
1609f706411fSPeter Klausler   }
1610f706411fSPeter Klausler   LineClassification followingLine{ClassifyLine(nextLine_)};
1611f706411fSPeter Klausler   if (followingLine.kind == LineClassification::Kind::Comment) {
1612f706411fSPeter Klausler     nextLine_ += followingLine.payloadOffset; // advance to '!' or newline
1613f706411fSPeter Klausler     NextLine();
1614f706411fSPeter Klausler     return true;
1615f706411fSPeter Klausler   }
1616f706411fSPeter Klausler   CHECK(origSentinel != nullptr);
1617e286ecfeSPeter Klausler   directiveSentinel_ = origSentinel; // so InCompilerDirective() is true
1618f706411fSPeter Klausler   const char *nextContinuation{
1619f706411fSPeter Klausler       followingLine.kind == LineClassification::Kind::CompilerDirective
1620f706411fSPeter Klausler           ? FreeFormContinuationLine(true)
1621f706411fSPeter Klausler           : nullptr};
1622f706411fSPeter Klausler   if (!nextContinuation &&
1623f706411fSPeter Klausler       followingLine.kind != LineClassification::Kind::Source) {
1624f706411fSPeter Klausler     return false;
1625f706411fSPeter Klausler   }
1626f706411fSPeter Klausler   auto origNextLine{nextLine_};
1627f706411fSPeter Klausler   BeginSourceLine(nextLine_);
1628f706411fSPeter Klausler   NextLine();
1629f706411fSPeter Klausler   if (nextContinuation) {
1630f706411fSPeter Klausler     // What follows is !DIR$ & xxx; skip over the & so that it
1631f706411fSPeter Klausler     // doesn't cause a spurious continuation.
1632f706411fSPeter Klausler     at_ = nextContinuation;
1633f706411fSPeter Klausler   } else {
1634f706411fSPeter Klausler     // What follows looks like a source line before macro expansion,
1635f706411fSPeter Klausler     // but might become a directive continuation afterwards.
1636f706411fSPeter Klausler     SkipSpaces();
1637f706411fSPeter Klausler   }
1638e286ecfeSPeter Klausler   TokenSequence followingTokens;
1639f706411fSPeter Klausler   while (NextToken(followingTokens)) {
1640f706411fSPeter Klausler   }
1641f706411fSPeter Klausler   if (auto followingPrepro{
1642f706411fSPeter Klausler           preprocessor_.MacroReplacement(followingTokens, *this)}) {
1643f706411fSPeter Klausler     followingTokens = std::move(*followingPrepro);
1644f706411fSPeter Klausler   }
1645f706411fSPeter Klausler   followingTokens.RemoveRedundantBlanks();
1646f706411fSPeter Klausler   std::size_t startAt{0};
1647e286ecfeSPeter Klausler   std::size_t following{followingTokens.SizeInTokens()};
1648f706411fSPeter Klausler   bool ok{false};
1649f706411fSPeter Klausler   if (nextContinuation) {
1650f706411fSPeter Klausler     ok = true;
1651f706411fSPeter Klausler   } else {
1652e286ecfeSPeter Klausler     startAt = 2;
1653e286ecfeSPeter Klausler     if (startAt < following && followingTokens.TokenAt(0) == "!") {
1654f706411fSPeter Klausler       CharBlock sentinel{followingTokens.TokenAt(1)};
1655f706411fSPeter Klausler       if (!sentinel.empty() &&
1656f706411fSPeter Klausler           std::memcmp(sentinel.begin(), origSentinel, sentinel.size()) == 0) {
1657f706411fSPeter Klausler         ok = true;
1658e286ecfeSPeter Klausler         while (
1659e286ecfeSPeter Klausler             startAt < following && followingTokens.TokenAt(startAt).IsBlank()) {
1660e286ecfeSPeter Klausler           ++startAt;
1661e286ecfeSPeter Klausler         }
1662e286ecfeSPeter Klausler         if (startAt < following && followingTokens.TokenAt(startAt) == "&") {
1663e286ecfeSPeter Klausler           ++startAt;
1664e286ecfeSPeter Klausler         }
1665f706411fSPeter Klausler       }
1666f706411fSPeter Klausler     }
1667f706411fSPeter Klausler   }
1668f706411fSPeter Klausler   if (ok) {
1669f706411fSPeter Klausler     tokens.pop_back(); // delete original '&'
1670e286ecfeSPeter Klausler     tokens.Put(followingTokens, startAt, following - startAt);
1671e286ecfeSPeter Klausler     tokens.RemoveRedundantBlanks();
1672f706411fSPeter Klausler   } else {
1673f706411fSPeter Klausler     nextLine_ = origNextLine;
1674f706411fSPeter Klausler   }
1675f706411fSPeter Klausler   return ok;
1676f706411fSPeter Klausler }
1677f706411fSPeter Klausler 
1678f706411fSPeter Klausler // Similar, but for source line continuation after macro replacement.
1679f706411fSPeter Klausler bool Prescanner::SourceLineContinuation(TokenSequence &tokens) {
1680f706411fSPeter Klausler   if (!inFixedForm_ && !tokens.empty() &&
1681f706411fSPeter Klausler       tokens.TokenAt(tokens.SizeInTokens() - 1) == "&") {
1682f706411fSPeter Klausler     LineClassification followingLine{ClassifyLine(nextLine_)};
1683f706411fSPeter Klausler     if (followingLine.kind == LineClassification::Kind::Comment) {
1684f706411fSPeter Klausler       nextLine_ += followingLine.payloadOffset; // advance to '!' or newline
1685f706411fSPeter Klausler       NextLine();
1686f706411fSPeter Klausler       return true;
1687f706411fSPeter Klausler     } else if (const char *nextContinuation{FreeFormContinuationLine(true)}) {
1688f706411fSPeter Klausler       BeginSourceLine(nextLine_);
1689f706411fSPeter Klausler       NextLine();
1690f706411fSPeter Klausler       TokenSequence followingTokens;
1691f706411fSPeter Klausler       at_ = nextContinuation;
1692f706411fSPeter Klausler       while (NextToken(followingTokens)) {
1693f706411fSPeter Klausler       }
1694f706411fSPeter Klausler       if (auto followingPrepro{
1695f706411fSPeter Klausler               preprocessor_.MacroReplacement(followingTokens, *this)}) {
1696f706411fSPeter Klausler         followingTokens = std::move(*followingPrepro);
1697f706411fSPeter Klausler       }
1698f706411fSPeter Klausler       followingTokens.RemoveRedundantBlanks();
1699f706411fSPeter Klausler       tokens.pop_back(); // delete original '&'
1700f706411fSPeter Klausler       tokens.Put(followingTokens);
1701f706411fSPeter Klausler       return true;
1702f706411fSPeter Klausler     }
1703f706411fSPeter Klausler   }
1704f706411fSPeter Klausler   return false;
1705f706411fSPeter Klausler }
17061f879005STim Keith } // namespace Fortran::parser
1707