xref: /llvm-project/flang/lib/Parser/prescan.cpp (revision d5dd7d230ecaf8242f4429a5e3653e16bf55bcd6)
1 //===-- lib/Parser/prescan.cpp --------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "prescan.h"
10 #include "flang/Common/idioms.h"
11 #include "flang/Parser/characters.h"
12 #include "flang/Parser/message.h"
13 #include "flang/Parser/preprocessor.h"
14 #include "flang/Parser/source.h"
15 #include "flang/Parser/token-sequence.h"
16 #include "llvm/Support/raw_ostream.h"
17 #include <cstddef>
18 #include <cstring>
19 #include <utility>
20 #include <vector>
21 
22 namespace Fortran::parser {
23 
24 using common::LanguageFeature;
25 
26 static constexpr int maxPrescannerNesting{100};
27 
28 Prescanner::Prescanner(Messages &messages, CookedSource &cooked,
29     Preprocessor &preprocessor, common::LanguageFeatureControl lfc)
30     : messages_{messages}, cooked_{cooked}, preprocessor_{preprocessor},
31       allSources_{preprocessor_.allSources()}, features_{lfc},
32       backslashFreeFormContinuation_{preprocessor.AnyDefinitions()},
33       encoding_{allSources_.encoding()} {}
34 
35 Prescanner::Prescanner(const Prescanner &that, Preprocessor &prepro,
36     bool isNestedInIncludeDirective)
37     : messages_{that.messages_}, cooked_{that.cooked_}, preprocessor_{prepro},
38       allSources_{that.allSources_}, features_{that.features_},
39       isNestedInIncludeDirective_{isNestedInIncludeDirective},
40       backslashFreeFormContinuation_{that.backslashFreeFormContinuation_},
41       inFixedForm_{that.inFixedForm_},
42       fixedFormColumnLimit_{that.fixedFormColumnLimit_},
43       encoding_{that.encoding_},
44       prescannerNesting_{that.prescannerNesting_ + 1},
45       skipLeadingAmpersand_{that.skipLeadingAmpersand_},
46       compilerDirectiveBloomFilter_{that.compilerDirectiveBloomFilter_},
47       compilerDirectiveSentinels_{that.compilerDirectiveSentinels_} {}
48 
49 // Returns number of bytes to skip
50 static inline int IsSpace(const char *p) {
51   if (*p == ' ') {
52     return 1;
53   } else if (*p == '\xa0') { // LATIN-1 NBSP non-breaking space
54     return 1;
55   } else if (p[0] == '\xc2' && p[1] == '\xa0') { // UTF-8 NBSP
56     return 2;
57   } else {
58     return 0;
59   }
60 }
61 
62 static inline int IsSpaceOrTab(const char *p) {
63   return *p == '\t' ? 1 : IsSpace(p);
64 }
65 
66 static inline constexpr bool IsFixedFormCommentChar(char ch) {
67   return ch == '!' || ch == '*' || ch == 'C' || ch == 'c';
68 }
69 
70 static void NormalizeCompilerDirectiveCommentMarker(TokenSequence &dir) {
71   char *p{dir.GetMutableCharData()};
72   char *limit{p + dir.SizeInChars()};
73   for (; p < limit; ++p) {
74     if (*p != ' ') {
75       CHECK(IsFixedFormCommentChar(*p));
76       *p = '!';
77       return;
78     }
79   }
80   DIE("compiler directive all blank");
81 }
82 
83 void Prescanner::Prescan(ProvenanceRange range) {
84   startProvenance_ = range.start();
85   start_ = allSources_.GetSource(range);
86   CHECK(start_);
87   limit_ = start_ + range.size();
88   nextLine_ = start_;
89   const bool beganInFixedForm{inFixedForm_};
90   if (prescannerNesting_ > maxPrescannerNesting) {
91     Say(GetProvenance(start_),
92         "too many nested INCLUDE/#include files, possibly circular"_err_en_US);
93     return;
94   }
95   while (!IsAtEnd()) {
96     Statement();
97   }
98   if (inFixedForm_ != beganInFixedForm) {
99     std::string dir{"!dir$ "};
100     if (beganInFixedForm) {
101       dir += "fixed";
102     } else {
103       dir += "free";
104     }
105     dir += '\n';
106     TokenSequence tokens{dir, allSources_.AddCompilerInsertion(dir).start()};
107     tokens.Emit(cooked_);
108   }
109 }
110 
111 void Prescanner::Statement() {
112   TokenSequence tokens;
113   const char *statementStart{nextLine_};
114   LineClassification line{ClassifyLine(statementStart)};
115   switch (line.kind) {
116   case LineClassification::Kind::Comment:
117     nextLine_ += line.payloadOffset; // advance to '!' or newline
118     NextLine();
119     return;
120   case LineClassification::Kind::IncludeLine:
121     FortranInclude(nextLine_ + line.payloadOffset);
122     NextLine();
123     return;
124   case LineClassification::Kind::ConditionalCompilationDirective:
125   case LineClassification::Kind::IncludeDirective:
126     preprocessor_.Directive(TokenizePreprocessorDirective(), *this);
127     afterPreprocessingDirective_ = true;
128     skipLeadingAmpersand_ |= !inFixedForm_;
129     return;
130   case LineClassification::Kind::PreprocessorDirective:
131     preprocessor_.Directive(TokenizePreprocessorDirective(), *this);
132     afterPreprocessingDirective_ = true;
133     // Don't set skipLeadingAmpersand_
134     return;
135   case LineClassification::Kind::DefinitionDirective:
136     preprocessor_.Directive(TokenizePreprocessorDirective(), *this);
137     // Don't set afterPreprocessingDirective_ or skipLeadingAmpersand_
138     return;
139   case LineClassification::Kind::CompilerDirective: {
140     directiveSentinel_ = line.sentinel;
141     CHECK(InCompilerDirective());
142     BeginStatementAndAdvance();
143     if (inFixedForm_) {
144       CHECK(IsFixedFormCommentChar(*at_));
145     } else {
146       while (int n{IsSpaceOrTab(at_)}) {
147         at_ += n, ++column_;
148       }
149       CHECK(*at_ == '!');
150     }
151     std::optional<int> condOffset;
152     if (directiveSentinel_[0] == '$' && directiveSentinel_[1] == '\0') {
153       // OpenMP conditional compilation line.
154       condOffset = 2;
155     } else if (directiveSentinel_[0] == '@' && directiveSentinel_[1] == 'c' &&
156         directiveSentinel_[2] == 'u' && directiveSentinel_[3] == 'f' &&
157         directiveSentinel_[4] == '\0') {
158       // CUDA conditional compilation line.
159       condOffset = 5;
160     }
161     if (condOffset) {
162       at_ += *condOffset, column_ += *condOffset;
163       if (auto payload{IsIncludeLine(at_)}) {
164         FortranInclude(at_ + *payload);
165         return;
166       } else if (inFixedForm_) {
167         LabelField(tokens);
168       } else {
169         SkipSpaces();
170       }
171     } else {
172       // Compiler directive.  Emit normalized sentinel, squash following spaces.
173       EmitChar(tokens, '!');
174       ++at_, ++column_;
175       for (const char *sp{directiveSentinel_}; *sp != '\0';
176            ++sp, ++at_, ++column_) {
177         EmitChar(tokens, *sp);
178       }
179       if (IsSpaceOrTab(at_)) {
180         EmitChar(tokens, ' ');
181         while (int n{IsSpaceOrTab(at_)}) {
182           at_ += n, ++column_;
183         }
184       }
185       tokens.CloseToken();
186     }
187     break;
188   }
189   case LineClassification::Kind::Source:
190     BeginStatementAndAdvance();
191     if (inFixedForm_) {
192       if (features_.IsEnabled(LanguageFeature::OldDebugLines) &&
193           (*at_ == 'D' || *at_ == 'd')) {
194         NextChar();
195       }
196       LabelField(tokens);
197     } else {
198       if (skipLeadingAmpersand_) {
199         skipLeadingAmpersand_ = false;
200         const char *p{SkipWhiteSpace(at_)};
201         if (p < limit_ && *p == '&') {
202           column_ += ++p - at_;
203           at_ = p;
204         }
205       } else {
206         SkipSpaces();
207       }
208       // Check for a leading identifier that might be a keyword macro
209       // that will expand to anything indicating a non-source line, like
210       // a comment marker or directive sentinel.  If so, disable line
211       // continuation, so that NextToken() won't consume anything from
212       // following lines.
213       if (IsLegalIdentifierStart(*at_)) {
214         // TODO: Only bother with these cases when any keyword macro has
215         // been defined with replacement text that could begin a comment
216         // or directive sentinel.
217         const char *p{at_};
218         while (IsLegalInIdentifier(*++p)) {
219         }
220         CharBlock id{at_, static_cast<std::size_t>(p - at_)};
221         if (preprocessor_.IsNameDefined(id) &&
222             !preprocessor_.IsFunctionLikeDefinition(id)) {
223           TokenSequence toks;
224           toks.Put(id, GetProvenance(at_));
225           if (auto replaced{preprocessor_.MacroReplacement(toks, *this)}) {
226             auto newLineClass{ClassifyLine(*replaced, GetCurrentProvenance())};
227             if (newLineClass.kind ==
228                 LineClassification::Kind::CompilerDirective) {
229               directiveSentinel_ = newLineClass.sentinel;
230               disableSourceContinuation_ = false;
231             } else {
232               disableSourceContinuation_ =
233                   newLineClass.kind != LineClassification::Kind::Source;
234             }
235           }
236         }
237       }
238     }
239     break;
240   }
241 
242   while (NextToken(tokens)) {
243   }
244   if (continuationLines_ > 255) {
245     if (features_.ShouldWarn(common::LanguageFeature::MiscSourceExtensions)) {
246       Say(GetProvenance(statementStart),
247           "%d continuation lines is more than the Fortran standard allows"_port_en_US,
248           continuationLines_);
249     }
250   }
251 
252   Provenance newlineProvenance{GetCurrentProvenance()};
253   if (std::optional<TokenSequence> preprocessed{
254           preprocessor_.MacroReplacement(tokens, *this)}) {
255     // Reprocess the preprocessed line.
256     LineClassification ppl{ClassifyLine(*preprocessed, newlineProvenance)};
257     switch (ppl.kind) {
258     case LineClassification::Kind::Comment:
259       break;
260     case LineClassification::Kind::IncludeLine:
261       FortranInclude(preprocessed->TokenAt(0).begin() + ppl.payloadOffset);
262       break;
263     case LineClassification::Kind::ConditionalCompilationDirective:
264     case LineClassification::Kind::IncludeDirective:
265     case LineClassification::Kind::DefinitionDirective:
266     case LineClassification::Kind::PreprocessorDirective:
267       if (features_.ShouldWarn(common::UsageWarning::Preprocessing)) {
268         Say(preprocessed->GetProvenanceRange(),
269             "Preprocessed line resembles a preprocessor directive"_warn_en_US);
270       }
271       CheckAndEmitLine(preprocessed->ToLowerCase(), newlineProvenance);
272       break;
273     case LineClassification::Kind::CompilerDirective:
274       if (preprocessed->HasRedundantBlanks()) {
275         preprocessed->RemoveRedundantBlanks();
276       }
277       while (CompilerDirectiveContinuation(*preprocessed, ppl.sentinel)) {
278         newlineProvenance = GetCurrentProvenance();
279       }
280       NormalizeCompilerDirectiveCommentMarker(*preprocessed);
281       preprocessed->ToLowerCase();
282       SourceFormChange(preprocessed->ToString());
283       CheckAndEmitLine(preprocessed->ToLowerCase().ClipComment(
284                            *this, true /* skip first ! */),
285           newlineProvenance);
286       break;
287     case LineClassification::Kind::Source:
288       if (inFixedForm_) {
289         if (preprocessed->HasBlanks(/*after column*/ 6)) {
290           preprocessed->RemoveBlanks(/*after column*/ 6);
291         }
292       } else {
293         while (SourceLineContinuation(*preprocessed)) {
294           newlineProvenance = GetCurrentProvenance();
295         }
296         if (preprocessed->HasRedundantBlanks()) {
297           preprocessed->RemoveRedundantBlanks();
298         }
299       }
300       CheckAndEmitLine(
301           preprocessed->ToLowerCase().ClipComment(*this), newlineProvenance);
302       break;
303     }
304   } else { // no macro replacement
305     if (line.kind == LineClassification::Kind::CompilerDirective) {
306       while (CompilerDirectiveContinuation(tokens, line.sentinel)) {
307         newlineProvenance = GetCurrentProvenance();
308       }
309       tokens.ToLowerCase();
310       SourceFormChange(tokens.ToString());
311     } else { // Kind::Source
312       tokens.ToLowerCase();
313       if (inFixedForm_) {
314         EnforceStupidEndStatementRules(tokens);
315       }
316     }
317     CheckAndEmitLine(tokens, newlineProvenance);
318   }
319   directiveSentinel_ = nullptr;
320 }
321 
322 void Prescanner::CheckAndEmitLine(
323     TokenSequence &tokens, Provenance newlineProvenance) {
324   tokens.CheckBadFortranCharacters(
325       messages_, *this, disableSourceContinuation_);
326   // Parenthesis nesting check does not apply while any #include is
327   // active, nor on the lines before and after a top-level #include,
328   // nor before or after conditional source.
329   // Applications play shenanigans with line continuation before and
330   // after #include'd subprogram argument lists and conditional source.
331   if (!isNestedInIncludeDirective_ && !omitNewline_ &&
332       !afterPreprocessingDirective_ && tokens.BadlyNestedParentheses() &&
333       !preprocessor_.InConditional()) {
334     if (nextLine_ < limit_ && IsPreprocessorDirectiveLine(nextLine_)) {
335       // don't complain
336     } else {
337       tokens.CheckBadParentheses(messages_);
338     }
339   }
340   tokens.Emit(cooked_);
341   if (omitNewline_) {
342     omitNewline_ = false;
343   } else {
344     cooked_.Put('\n', newlineProvenance);
345     afterPreprocessingDirective_ = false;
346   }
347 }
348 
349 TokenSequence Prescanner::TokenizePreprocessorDirective() {
350   CHECK(!IsAtEnd() && !inPreprocessorDirective_);
351   inPreprocessorDirective_ = true;
352   BeginStatementAndAdvance();
353   TokenSequence tokens;
354   while (NextToken(tokens)) {
355   }
356   inPreprocessorDirective_ = false;
357   return tokens;
358 }
359 
360 void Prescanner::NextLine() {
361   void *vstart{static_cast<void *>(const_cast<char *>(nextLine_))};
362   void *v{std::memchr(vstart, '\n', limit_ - nextLine_)};
363   if (!v) {
364     nextLine_ = limit_;
365   } else {
366     const char *nl{const_cast<const char *>(static_cast<char *>(v))};
367     nextLine_ = nl + 1;
368   }
369 }
370 
371 void Prescanner::LabelField(TokenSequence &token) {
372   int outCol{1};
373   const char *start{at_};
374   std::optional<int> badColumn;
375   for (; *at_ != '\n' && column_ <= 6; ++at_) {
376     if (*at_ == '\t') {
377       ++at_;
378       column_ = 7;
379       break;
380     }
381     if (int n{IsSpace(at_)}; n == 0 &&
382         !(*at_ == '0' && column_ == 6)) { // '0' in column 6 becomes space
383       EmitChar(token, *at_);
384       ++outCol;
385       if (!badColumn && (column_ == 6 || !IsDecimalDigit(*at_))) {
386         badColumn = column_;
387       }
388     }
389     ++column_;
390   }
391   if (badColumn && !preprocessor_.IsNameDefined(token.CurrentOpenToken())) {
392     if ((prescannerNesting_ > 0 && *badColumn == 6 &&
393             cooked_.BufferedBytes() == firstCookedCharacterOffset_) ||
394         afterPreprocessingDirective_) {
395       // This is the first source line in #include'd text or conditional
396       // code under #if, or the first source line after such.
397       // If it turns out that the preprocessed text begins with a
398       // fixed form continuation line, the newline at the end
399       // of the latest source line beforehand will be deleted in
400       // CookedSource::Marshal().
401       cooked_.MarkPossibleFixedFormContinuation();
402     } else if (features_.ShouldWarn(common::UsageWarning::Scanning)) {
403       Say(GetProvenance(start + *badColumn - 1),
404           *badColumn == 6
405               ? "Statement should not begin with a continuation line"_warn_en_US
406               : "Character in fixed-form label field must be a digit"_warn_en_US);
407     }
408     token.clear();
409     if (*badColumn < 6) {
410       at_ = start;
411       column_ = 1;
412       return;
413     }
414     outCol = 1;
415   }
416   if (outCol == 1) { // empty label field
417     // Emit a space so that, if the line is rescanned after preprocessing,
418     // a leading 'C' or 'D' won't be left-justified and then accidentally
419     // misinterpreted as a comment card.
420     EmitChar(token, ' ');
421     ++outCol;
422   }
423   token.CloseToken();
424   SkipToNextSignificantCharacter();
425   if (IsDecimalDigit(*at_)) {
426     if (features_.ShouldWarn(common::LanguageFeature::MiscSourceExtensions)) {
427       Say(GetCurrentProvenance(),
428           "Label digit is not in fixed-form label field"_port_en_US);
429     }
430   }
431 }
432 
433 // 6.3.3.5: A program unit END statement, or any other statement whose
434 // initial line resembles an END statement, shall not be continued in
435 // fixed form source.
436 void Prescanner::EnforceStupidEndStatementRules(const TokenSequence &tokens) {
437   CharBlock cBlock{tokens.ToCharBlock()};
438   const char *str{cBlock.begin()};
439   std::size_t n{cBlock.size()};
440   if (n < 3) {
441     return;
442   }
443   std::size_t j{0};
444   for (; j < n && (str[j] == ' ' || (str[j] >= '0' && str[j] <= '9')); ++j) {
445   }
446   if (j + 3 > n || std::memcmp(str + j, "end", 3) != 0) {
447     return;
448   }
449   // It starts with END, possibly after a label.
450   auto start{allSources_.GetSourcePosition(tokens.GetCharProvenance(j))};
451   auto end{allSources_.GetSourcePosition(tokens.GetCharProvenance(n - 1))};
452   if (!start || !end) {
453     return;
454   }
455   if (&*start->sourceFile == &*end->sourceFile && start->line == end->line) {
456     return; // no continuation
457   }
458   j += 3;
459   static const char *const prefixes[]{"program", "subroutine", "function",
460       "blockdata", "module", "submodule", nullptr};
461   bool isPrefix{j == n || !IsLegalInIdentifier(str[j])}; // prefix is END
462   std::size_t endOfPrefix{j - 1};
463   for (const char *const *p{prefixes}; *p; ++p) {
464     std::size_t pLen{std::strlen(*p)};
465     if (j + pLen <= n && std::memcmp(str + j, *p, pLen) == 0) {
466       isPrefix = true; // END thing as prefix
467       j += pLen;
468       endOfPrefix = j - 1;
469       for (; j < n && IsLegalInIdentifier(str[j]); ++j) {
470       }
471       break;
472     }
473   }
474   if (isPrefix) {
475     auto range{tokens.GetTokenProvenanceRange(1)};
476     if (j == n) { // END or END thing [name]
477       Say(range,
478           "Program unit END statement may not be continued in fixed form source"_err_en_US);
479     } else {
480       auto endOfPrefixPos{
481           allSources_.GetSourcePosition(tokens.GetCharProvenance(endOfPrefix))};
482       auto next{allSources_.GetSourcePosition(tokens.GetCharProvenance(j))};
483       if (endOfPrefixPos && next &&
484           &*endOfPrefixPos->sourceFile == &*start->sourceFile &&
485           endOfPrefixPos->line == start->line &&
486           (&*next->sourceFile != &*start->sourceFile ||
487               next->line != start->line)) {
488         Say(range,
489             "Initial line of continued statement must not appear to be a program unit END in fixed form source"_err_en_US);
490       }
491     }
492   }
493 }
494 
495 void Prescanner::SkipToEndOfLine() {
496   while (*at_ != '\n') {
497     ++at_, ++column_;
498   }
499 }
500 
501 bool Prescanner::MustSkipToEndOfLine() const {
502   if (inFixedForm_ && column_ > fixedFormColumnLimit_ && !tabInCurrentLine_) {
503     return true; // skip over ignored columns in right margin (73:80)
504   } else if (*at_ == '!' && !inCharLiteral_) {
505     return !IsCompilerDirectiveSentinel(at_);
506   } else {
507     return false;
508   }
509 }
510 
511 void Prescanner::NextChar() {
512   CHECK(*at_ != '\n');
513   int n{IsSpace(at_)};
514   at_ += n ? n : 1;
515   ++column_;
516   while (at_[0] == '\xef' && at_[1] == '\xbb' && at_[2] == '\xbf') {
517     // UTF-8 byte order mark - treat this file as UTF-8
518     at_ += 3;
519     encoding_ = Encoding::UTF_8;
520   }
521   SkipToNextSignificantCharacter();
522 }
523 
524 // Skip everything that should be ignored until the next significant
525 // character is reached; handles C-style comments in preprocessing
526 // directives, Fortran ! comments, stuff after the right margin in
527 // fixed form, and all forms of line continuation.
528 bool Prescanner::SkipToNextSignificantCharacter() {
529   auto anyContinuationLine{false};
530   if (inPreprocessorDirective_) {
531     SkipCComments();
532   } else {
533     bool mightNeedSpace{false};
534     if (MustSkipToEndOfLine()) {
535       SkipToEndOfLine();
536     } else {
537       mightNeedSpace = *at_ == '\n';
538     }
539     for (; Continuation(mightNeedSpace); mightNeedSpace = false) {
540       anyContinuationLine = true;
541       ++continuationLines_;
542       if (MustSkipToEndOfLine()) {
543         SkipToEndOfLine();
544       }
545     }
546     if (*at_ == '\t') {
547       tabInCurrentLine_ = true;
548     }
549   }
550   return anyContinuationLine;
551 }
552 
553 void Prescanner::SkipCComments() {
554   while (true) {
555     if (IsCComment(at_)) {
556       if (const char *after{SkipCComment(at_)}) {
557         column_ += after - at_;
558         // May have skipped over one or more newlines; relocate the start of
559         // the next line.
560         nextLine_ = at_ = after;
561         NextLine();
562       } else {
563         // Don't emit any messages about unclosed C-style comments, because
564         // the sequence /* can appear legally in a FORMAT statement.  There's
565         // no ambiguity, since the sequence */ cannot appear legally.
566         break;
567       }
568     } else if (inPreprocessorDirective_ && at_[0] == '\\' && at_ + 2 < limit_ &&
569         at_[1] == '\n' && !IsAtEnd()) {
570       BeginSourceLineAndAdvance();
571     } else {
572       break;
573     }
574   }
575 }
576 
577 void Prescanner::SkipSpaces() {
578   while (IsSpaceOrTab(at_)) {
579     NextChar();
580   }
581   insertASpace_ = false;
582 }
583 
584 const char *Prescanner::SkipWhiteSpace(const char *p) {
585   while (int n{IsSpaceOrTab(p)}) {
586     p += n;
587   }
588   return p;
589 }
590 
591 const char *Prescanner::SkipWhiteSpaceAndCComments(const char *p) const {
592   while (true) {
593     if (int n{IsSpaceOrTab(p)}) {
594       p += n;
595     } else if (IsCComment(p)) {
596       if (const char *after{SkipCComment(p)}) {
597         p = after;
598       } else {
599         break;
600       }
601     } else {
602       break;
603     }
604   }
605   return p;
606 }
607 
608 const char *Prescanner::SkipCComment(const char *p) const {
609   char star{' '}, slash{' '};
610   p += 2;
611   while (star != '*' || slash != '/') {
612     if (p >= limit_) {
613       return nullptr; // signifies an unterminated comment
614     }
615     star = slash;
616     slash = *p++;
617   }
618   return p;
619 }
620 
621 bool Prescanner::NextToken(TokenSequence &tokens) {
622   CHECK(at_ >= start_ && at_ < limit_);
623   if (InFixedFormSource()) {
624     SkipSpaces();
625   } else {
626     if (*at_ == '/' && IsCComment(at_)) {
627       // Recognize and skip over classic C style /*comments*/ when
628       // outside a character literal.
629       if (features_.ShouldWarn(LanguageFeature::ClassicCComments)) {
630         Say(GetCurrentProvenance(),
631             "nonstandard usage: C-style comment"_port_en_US);
632       }
633       SkipCComments();
634     }
635     if (IsSpaceOrTab(at_)) {
636       // Compress free-form white space into a single space character.
637       const auto theSpace{at_};
638       char previous{at_ <= start_ ? ' ' : at_[-1]};
639       NextChar();
640       SkipSpaces();
641       if (*at_ == '\n' && !omitNewline_) {
642         // Discard white space at the end of a line.
643       } else if (!inPreprocessorDirective_ &&
644           (previous == '(' || *at_ == '(' || *at_ == ')')) {
645         // Discard white space before/after '(' and before ')', unless in a
646         // preprocessor directive.  This helps yield space-free contiguous
647         // names for generic interfaces like OPERATOR( + ) and
648         // READ ( UNFORMATTED ), without misinterpreting #define f (notAnArg).
649         // This has the effect of silently ignoring the illegal spaces in
650         // the array constructor ( /1,2/ ) but that seems benign; it's
651         // hard to avoid that while still removing spaces from OPERATOR( / )
652         // and OPERATOR( // ).
653       } else {
654         // Preserve the squashed white space as a single space character.
655         tokens.PutNextTokenChar(' ', GetProvenance(theSpace));
656         tokens.CloseToken();
657         return true;
658       }
659     }
660   }
661   if (insertASpace_) {
662     tokens.PutNextTokenChar(' ', spaceProvenance_);
663     insertASpace_ = false;
664   }
665   if (*at_ == '\n') {
666     return false;
667   }
668   const char *start{at_};
669   if (*at_ == '\'' || *at_ == '"') {
670     QuotedCharacterLiteral(tokens, start);
671     preventHollerith_ = false;
672   } else if (IsDecimalDigit(*at_)) {
673     int n{0}, digits{0};
674     static constexpr int maxHollerith{256 /*lines*/ * (132 - 6 /*columns*/)};
675     do {
676       if (n < maxHollerith) {
677         n = 10 * n + DecimalDigitValue(*at_);
678       }
679       EmitCharAndAdvance(tokens, *at_);
680       ++digits;
681       if (InFixedFormSource()) {
682         SkipSpaces();
683       }
684     } while (IsDecimalDigit(*at_));
685     if ((*at_ == 'h' || *at_ == 'H') && n > 0 && n < maxHollerith &&
686         !preventHollerith_) {
687       Hollerith(tokens, n, start);
688     } else if (*at_ == '.') {
689       while (IsDecimalDigit(EmitCharAndAdvance(tokens, *at_))) {
690       }
691       ExponentAndKind(tokens);
692     } else if (ExponentAndKind(tokens)) {
693     } else if (digits == 1 && n == 0 && (*at_ == 'x' || *at_ == 'X') &&
694         inPreprocessorDirective_) {
695       do {
696         EmitCharAndAdvance(tokens, *at_);
697       } while (IsHexadecimalDigit(*at_));
698     } else if (at_[0] == '_' && (at_[1] == '\'' || at_[1] == '"')) { // 4_"..."
699       EmitCharAndAdvance(tokens, *at_);
700       QuotedCharacterLiteral(tokens, start);
701     } else if (IsLetter(*at_) && !preventHollerith_ &&
702         parenthesisNesting_ > 0) {
703       // Handles FORMAT(3I9HHOLLERITH) by skipping over the first I so that
704       // we don't misrecognize I9HOLLERITH as an identifier in the next case.
705       EmitCharAndAdvance(tokens, *at_);
706     }
707     preventHollerith_ = false;
708   } else if (*at_ == '.') {
709     char nch{EmitCharAndAdvance(tokens, '.')};
710     if (!inPreprocessorDirective_ && IsDecimalDigit(nch)) {
711       while (IsDecimalDigit(EmitCharAndAdvance(tokens, *at_))) {
712       }
713       ExponentAndKind(tokens);
714     } else if (nch == '.' && EmitCharAndAdvance(tokens, '.') == '.') {
715       EmitCharAndAdvance(tokens, '.'); // variadic macro definition ellipsis
716     }
717     preventHollerith_ = false;
718   } else if (IsLegalInIdentifier(*at_)) {
719     int parts{1};
720     const char *afterLast{nullptr};
721     do {
722       EmitChar(tokens, *at_);
723       ++at_, ++column_;
724       afterLast = at_;
725       if (SkipToNextSignificantCharacter() && IsLegalIdentifierStart(*at_)) {
726         tokens.CloseToken();
727         ++parts;
728       }
729     } while (IsLegalInIdentifier(*at_));
730     if (parts >= 3) {
731       // Subtlety: When an identifier is split across three or more continuation
732       // lines (or two continuation lines, immediately preceded or followed
733       // by '&' free form continuation line markers, its parts are kept as
734       // distinct pp-tokens so that macro replacement operates on them
735       // independently.  This trick accommodates the historic practice of
736       // using line continuation for token pasting after replacement.
737     } else if (parts == 2) {
738       if (afterLast && afterLast < limit_) {
739         afterLast = SkipWhiteSpace(afterLast);
740       }
741       if ((start > start_ && start[-1] == '&') ||
742           (afterLast && afterLast < limit_ &&
743               (*afterLast == '&' || *afterLast == '\n'))) {
744         // call &                call foo&        call foo&
745         //   &MACRO&      OR       &MACRO&   OR     &MACRO
746         //   &foo(...)             &(...)
747       } else {
748         tokens.ReopenLastToken();
749       }
750     }
751     if (InFixedFormSource()) {
752       SkipSpaces();
753     }
754     if ((*at_ == '\'' || *at_ == '"') &&
755         tokens.CharAt(tokens.SizeInChars() - 1) == '_') { // kind_"..."
756       QuotedCharacterLiteral(tokens, start);
757       preventHollerith_ = false;
758     } else {
759       preventHollerith_ = true; // DO 10 H = ...
760     }
761   } else if (*at_ == '*') {
762     if (EmitCharAndAdvance(tokens, '*') == '*') {
763       EmitCharAndAdvance(tokens, '*');
764     } else {
765       // Subtle ambiguity:
766       //  CHARACTER*2H     declares H because *2 is a kind specifier
767       //  DATAC/N*2H  /    is repeated Hollerith
768       preventHollerith_ = !slashInCurrentStatement_;
769     }
770   } else {
771     char ch{*at_};
772     if (ch == '(') {
773       if (parenthesisNesting_++ == 0) {
774         isPossibleMacroCall_ = tokens.SizeInTokens() > 0 &&
775             preprocessor_.IsFunctionLikeDefinition(
776                 tokens.TokenAt(tokens.SizeInTokens() - 1));
777       }
778     } else if (ch == ')' && parenthesisNesting_ > 0) {
779       --parenthesisNesting_;
780     }
781     char nch{EmitCharAndAdvance(tokens, ch)};
782     preventHollerith_ = false;
783     if ((nch == '=' &&
784             (ch == '<' || ch == '>' || ch == '/' || ch == '=' || ch == '!')) ||
785         (ch == nch &&
786             (ch == '/' || ch == ':' || ch == '*' || ch == '#' || ch == '&' ||
787                 ch == '|' || ch == '<' || ch == '>')) ||
788         (ch == '=' && nch == '>')) {
789       // token comprises two characters
790       EmitCharAndAdvance(tokens, nch);
791     } else if (ch == '/') {
792       slashInCurrentStatement_ = true;
793     } else if (ch == ';' && InFixedFormSource()) {
794       SkipSpaces();
795       if (IsDecimalDigit(*at_)) {
796         if (features_.ShouldWarn(
797                 common::LanguageFeature::MiscSourceExtensions)) {
798           Say(GetProvenanceRange(at_, at_ + 1),
799               "Label should be in the label field"_port_en_US);
800         }
801       }
802     }
803   }
804   tokens.CloseToken();
805   return true;
806 }
807 
808 bool Prescanner::ExponentAndKind(TokenSequence &tokens) {
809   char ed{ToLowerCaseLetter(*at_)};
810   if (ed != 'e' && ed != 'd') {
811     return false;
812   }
813   EmitCharAndAdvance(tokens, ed);
814   if (*at_ == '+' || *at_ == '-') {
815     EmitCharAndAdvance(tokens, *at_);
816   }
817   while (IsDecimalDigit(*at_)) {
818     EmitCharAndAdvance(tokens, *at_);
819   }
820   if (*at_ == '_') {
821     while (IsLegalInIdentifier(EmitCharAndAdvance(tokens, *at_))) {
822     }
823   }
824   return true;
825 }
826 
827 void Prescanner::QuotedCharacterLiteral(
828     TokenSequence &tokens, const char *start) {
829   char quote{*at_};
830   const char *end{at_ + 1};
831   inCharLiteral_ = true;
832   continuationInCharLiteral_ = true;
833   const auto emit{[&](char ch) { EmitChar(tokens, ch); }};
834   const auto insert{[&](char ch) { EmitInsertedChar(tokens, ch); }};
835   bool isEscaped{false};
836   bool escapesEnabled{features_.IsEnabled(LanguageFeature::BackslashEscapes)};
837   while (true) {
838     if (*at_ == '\\') {
839       if (escapesEnabled) {
840         isEscaped = !isEscaped;
841       } else {
842         // The parser always processes escape sequences, so don't confuse it
843         // when escapes are disabled.
844         insert('\\');
845       }
846     } else {
847       isEscaped = false;
848     }
849     EmitQuotedChar(static_cast<unsigned char>(*at_), emit, insert, false,
850         Encoding::LATIN_1);
851     while (PadOutCharacterLiteral(tokens)) {
852     }
853     if (*at_ == '\n') {
854       if (!inPreprocessorDirective_) {
855         Say(GetProvenanceRange(start, end),
856             "Incomplete character literal"_err_en_US);
857       }
858       break;
859     }
860     // Here's a weird edge case.  When there's a two or more following
861     // continuation lines at this point, and the entire significant part of
862     // the next continuation line is the name of a keyword macro, replace
863     // it in the character literal with its definition.  Example:
864     //   #define FOO foo
865     //   subroutine subr() bind(c, name="my_&
866     //     &FOO&
867     //     &_bar") ...
868     // produces a binding name of "my_foo_bar".
869     while (at_[1] == '&' && nextLine_ < limit_ && !InFixedFormSource()) {
870       const char *idStart{nextLine_};
871       if (const char *amper{SkipWhiteSpace(nextLine_)}; *amper == '&') {
872         idStart = amper + 1;
873       }
874       if (IsLegalIdentifierStart(*idStart)) {
875         std::size_t idLen{1};
876         for (; IsLegalInIdentifier(idStart[idLen]); ++idLen) {
877         }
878         if (idStart[idLen] == '&') {
879           CharBlock id{idStart, idLen};
880           if (preprocessor_.IsNameDefined(id)) {
881             TokenSequence ppTokens;
882             ppTokens.Put(id, GetProvenance(idStart));
883             if (auto replaced{
884                     preprocessor_.MacroReplacement(ppTokens, *this)}) {
885               tokens.Put(*replaced);
886               at_ = &idStart[idLen - 1];
887               NextLine();
888               continue; // try again on the next line
889             }
890           }
891         }
892       }
893       break;
894     }
895     end = at_ + 1;
896     NextChar();
897     if (*at_ == quote && !isEscaped) {
898       // A doubled unescaped quote mark becomes a single instance of that
899       // quote character in the literal (later).  There can be spaces between
900       // the quotes in fixed form source.
901       EmitChar(tokens, quote);
902       inCharLiteral_ = false; // for cases like print *, '...'!comment
903       NextChar();
904       if (InFixedFormSource()) {
905         SkipSpaces();
906       }
907       if (*at_ != quote) {
908         break;
909       }
910       inCharLiteral_ = true;
911     }
912   }
913   continuationInCharLiteral_ = false;
914   inCharLiteral_ = false;
915 }
916 
917 void Prescanner::Hollerith(
918     TokenSequence &tokens, int count, const char *start) {
919   inCharLiteral_ = true;
920   CHECK(*at_ == 'h' || *at_ == 'H');
921   EmitChar(tokens, 'H');
922   while (count-- > 0) {
923     if (PadOutCharacterLiteral(tokens)) {
924     } else if (*at_ == '\n') {
925       if (features_.ShouldWarn(common::UsageWarning::Scanning)) {
926         Say(GetProvenanceRange(start, at_),
927             "Possible truncated Hollerith literal"_warn_en_US);
928       }
929       break;
930     } else {
931       NextChar();
932       // Each multi-byte character encoding counts as a single character.
933       // No escape sequences are recognized.
934       // Hollerith is always emitted to the cooked character
935       // stream in UTF-8.
936       DecodedCharacter decoded{DecodeCharacter(
937           encoding_, at_, static_cast<std::size_t>(limit_ - at_), false)};
938       if (decoded.bytes > 0) {
939         EncodedCharacter utf8{
940             EncodeCharacter<Encoding::UTF_8>(decoded.codepoint)};
941         for (int j{0}; j < utf8.bytes; ++j) {
942           EmitChar(tokens, utf8.buffer[j]);
943         }
944         at_ += decoded.bytes - 1;
945       } else {
946         Say(GetProvenanceRange(start, at_),
947             "Bad character in Hollerith literal"_err_en_US);
948         break;
949       }
950     }
951   }
952   if (*at_ != '\n') {
953     NextChar();
954   }
955   inCharLiteral_ = false;
956 }
957 
958 // In fixed form, source card images must be processed as if they were at
959 // least 72 columns wide, at least in character literal contexts.
960 bool Prescanner::PadOutCharacterLiteral(TokenSequence &tokens) {
961   while (inFixedForm_ && !tabInCurrentLine_ && at_[1] == '\n') {
962     if (column_ < fixedFormColumnLimit_) {
963       tokens.PutNextTokenChar(' ', spaceProvenance_);
964       ++column_;
965       return true;
966     }
967     if (!FixedFormContinuation(false /*no need to insert space*/) ||
968         tabInCurrentLine_) {
969       return false;
970     }
971     CHECK(column_ == 7);
972     --at_; // point to column 6 of continuation line
973     column_ = 6;
974   }
975   return false;
976 }
977 
978 static bool IsAtProcess(const char *p) {
979   static const char pAtProc[]{"process"};
980   for (std::size_t i{0}; i < sizeof pAtProc - 1; ++i) {
981     if (ToLowerCaseLetter(*++p) != pAtProc[i])
982       return false;
983   }
984   return true;
985 }
986 
987 bool Prescanner::IsFixedFormCommentLine(const char *start) const {
988   const char *p{start};
989 
990   // The @process directive must start in column 1.
991   if (*p == '@' && IsAtProcess(p)) {
992     return true;
993   }
994 
995   if (IsFixedFormCommentChar(*p) || *p == '%' || // VAX %list, %eject, &c.
996       ((*p == 'D' || *p == 'd') &&
997           !features_.IsEnabled(LanguageFeature::OldDebugLines))) {
998     return true;
999   }
1000   bool anyTabs{false};
1001   while (true) {
1002     if (int n{IsSpace(p)}) {
1003       p += n;
1004     } else if (*p == '\t') {
1005       anyTabs = true;
1006       ++p;
1007     } else if (*p == '0' && !anyTabs && p == start + 5) {
1008       ++p; // 0 in column 6 must treated as a space
1009     } else {
1010       break;
1011     }
1012   }
1013   if (!anyTabs && p >= start + fixedFormColumnLimit_) {
1014     return true;
1015   }
1016   if (*p == '!' && !inCharLiteral_ && (anyTabs || p != start + 5)) {
1017     return true;
1018   }
1019   return *p == '\n';
1020 }
1021 
1022 const char *Prescanner::IsFreeFormComment(const char *p) const {
1023   p = SkipWhiteSpaceAndCComments(p);
1024   if (*p == '!' || *p == '\n') {
1025     return p;
1026   } else if (*p == '@') {
1027     return IsAtProcess(p) ? p : nullptr;
1028   } else {
1029     return nullptr;
1030   }
1031 }
1032 
1033 std::optional<std::size_t> Prescanner::IsIncludeLine(const char *start) const {
1034   const char *p{SkipWhiteSpace(start)};
1035   if (*p == '0' && inFixedForm_ && p == start + 5) {
1036     // Accept "     0INCLUDE" in fixed form.
1037     p = SkipWhiteSpace(p + 1);
1038   }
1039   for (const char *q{"include"}; *q; ++q) {
1040     if (ToLowerCaseLetter(*p) != *q) {
1041       return std::nullopt;
1042     }
1043     p = SkipWhiteSpace(p + 1);
1044   }
1045   if (IsDecimalDigit(*p)) { // accept & ignore a numeric kind prefix
1046     for (p = SkipWhiteSpace(p + 1); IsDecimalDigit(*p);
1047          p = SkipWhiteSpace(p + 1)) {
1048     }
1049     if (*p != '_') {
1050       return std::nullopt;
1051     }
1052     p = SkipWhiteSpace(p + 1);
1053   }
1054   if (*p == '"' || *p == '\'') {
1055     return {p - start};
1056   }
1057   return std::nullopt;
1058 }
1059 
1060 void Prescanner::FortranInclude(const char *firstQuote) {
1061   const char *p{firstQuote};
1062   while (*p != '"' && *p != '\'') {
1063     ++p;
1064   }
1065   char quote{*p};
1066   std::string path;
1067   for (++p; *p != '\n'; ++p) {
1068     if (*p == quote) {
1069       if (p[1] != quote) {
1070         break;
1071       }
1072       ++p;
1073     }
1074     path += *p;
1075   }
1076   if (*p != quote) {
1077     Say(GetProvenanceRange(firstQuote, p),
1078         "malformed path name string"_err_en_US);
1079     return;
1080   }
1081   p = SkipWhiteSpace(p + 1);
1082   if (*p != '\n' && *p != '!') {
1083     const char *garbage{p};
1084     for (; *p != '\n' && *p != '!'; ++p) {
1085     }
1086     if (features_.ShouldWarn(common::UsageWarning::Scanning)) {
1087       Say(GetProvenanceRange(garbage, p),
1088           "excess characters after path name"_warn_en_US);
1089     }
1090   }
1091   std::string buf;
1092   llvm::raw_string_ostream error{buf};
1093   Provenance provenance{GetProvenance(nextLine_)};
1094   std::optional<std::string> prependPath;
1095   if (const SourceFile * currentFile{allSources_.GetSourceFile(provenance)}) {
1096     prependPath = DirectoryName(currentFile->path());
1097   }
1098   const SourceFile *included{
1099       allSources_.Open(path, error, std::move(prependPath))};
1100   if (!included) {
1101     Say(provenance, "INCLUDE: %s"_err_en_US, buf);
1102   } else if (included->bytes() > 0) {
1103     ProvenanceRange includeLineRange{
1104         provenance, static_cast<std::size_t>(p - nextLine_)};
1105     ProvenanceRange fileRange{
1106         allSources_.AddIncludedFile(*included, includeLineRange)};
1107     Preprocessor cleanPrepro{allSources_};
1108     if (preprocessor_.IsNameDefined("__FILE__"s)) {
1109       cleanPrepro.DefineStandardMacros(); // __FILE__, __LINE__, &c.
1110     }
1111     if (preprocessor_.IsNameDefined("_CUDA"s)) {
1112       cleanPrepro.Define("_CUDA"s, "1");
1113     }
1114     Prescanner{*this, cleanPrepro, /*isNestedInIncludeDirective=*/false}
1115         .set_encoding(included->encoding())
1116         .Prescan(fileRange);
1117   }
1118 }
1119 
1120 const char *Prescanner::IsPreprocessorDirectiveLine(const char *start) const {
1121   const char *p{start};
1122   while (int n{IsSpace(p)}) {
1123     p += n;
1124   }
1125   if (*p == '#') {
1126     if (inFixedForm_ && p == start + 5) {
1127       return nullptr;
1128     }
1129   } else {
1130     p = SkipWhiteSpace(p);
1131     if (*p != '#') {
1132       return nullptr;
1133     }
1134   }
1135   return SkipWhiteSpace(p + 1);
1136 }
1137 
1138 bool Prescanner::IsNextLinePreprocessorDirective() const {
1139   return IsPreprocessorDirectiveLine(nextLine_) != nullptr;
1140 }
1141 
1142 bool Prescanner::SkipCommentLine(bool afterAmpersand) {
1143   if (IsAtEnd()) {
1144     if (afterAmpersand && prescannerNesting_ > 0) {
1145       // A continuation marker at the end of the last line in an
1146       // include file inhibits the newline for that line.
1147       SkipToEndOfLine();
1148       omitNewline_ = true;
1149     }
1150   } else if (inPreprocessorDirective_) {
1151   } else {
1152     auto lineClass{ClassifyLine(nextLine_)};
1153     if (lineClass.kind == LineClassification::Kind::Comment) {
1154       NextLine();
1155       return true;
1156     } else if (lineClass.kind ==
1157             LineClassification::Kind::ConditionalCompilationDirective ||
1158         lineClass.kind == LineClassification::Kind::PreprocessorDirective) {
1159       // Allow conditional compilation directives (e.g., #ifdef) to affect
1160       // continuation lines.
1161       // Allow other preprocessor directives, too, except #include
1162       // (when it does not follow '&'), #define, and #undef (because
1163       // they cannot be allowed to affect preceding text on a
1164       // continued line).
1165       preprocessor_.Directive(TokenizePreprocessorDirective(), *this);
1166       return true;
1167     } else if (afterAmpersand &&
1168         (lineClass.kind == LineClassification::Kind::DefinitionDirective ||
1169             lineClass.kind == LineClassification::Kind::IncludeDirective ||
1170             lineClass.kind == LineClassification::Kind::IncludeLine)) {
1171       SkipToEndOfLine();
1172       omitNewline_ = true;
1173       skipLeadingAmpersand_ = true;
1174     }
1175   }
1176   return false;
1177 }
1178 
1179 const char *Prescanner::FixedFormContinuationLine(bool mightNeedSpace) {
1180   if (IsAtEnd()) {
1181     return nullptr;
1182   }
1183   tabInCurrentLine_ = false;
1184   char col1{*nextLine_};
1185   if (IsFixedFormCommentChar(col1)) {
1186     int j{1};
1187     if (InCompilerDirective()) {
1188       // Must be a continued compiler directive.
1189       for (; j < 5; ++j) {
1190         char ch{directiveSentinel_[j - 1]};
1191         if (ch == '\0') {
1192           break;
1193         }
1194         if (ch != ToLowerCaseLetter(nextLine_[j])) {
1195           return nullptr;
1196         }
1197       }
1198     } else if (features_.IsEnabled(LanguageFeature::OpenMP)) {
1199       // Fixed Source Form Conditional Compilation Sentinels.
1200       if (nextLine_[1] != '$') {
1201         return nullptr;
1202       }
1203       j++;
1204     } else {
1205       return nullptr;
1206     }
1207     for (; j < 5; ++j) {
1208       if (nextLine_[j] != ' ') {
1209         return nullptr;
1210       }
1211     }
1212     const char *col6{nextLine_ + 5};
1213     if (*col6 != '\n' && *col6 != '0' && !IsSpaceOrTab(col6)) {
1214       if (mightNeedSpace && !IsSpace(nextLine_ + 6)) {
1215         insertASpace_ = true;
1216       }
1217       return nextLine_ + 6;
1218     }
1219     return nullptr;
1220   } else {
1221     // Normal case: not in a compiler directive.
1222     if (col1 == '&' &&
1223         features_.IsEnabled(
1224             LanguageFeature::FixedFormContinuationWithColumn1Ampersand)) {
1225       // Extension: '&' as continuation marker
1226       if (features_.ShouldWarn(
1227               LanguageFeature::FixedFormContinuationWithColumn1Ampersand)) {
1228         Say(GetProvenance(nextLine_), "nonstandard usage"_port_en_US);
1229       }
1230       return nextLine_ + 1;
1231     }
1232     if (col1 == '\t' && nextLine_[1] >= '1' && nextLine_[1] <= '9') {
1233       tabInCurrentLine_ = true;
1234       return nextLine_ + 2; // VAX extension
1235     }
1236     if ((col1 == ' ' ||
1237             ((col1 == 'D' || col1 == 'd') &&
1238                 features_.IsEnabled(LanguageFeature::OldDebugLines))) &&
1239         nextLine_[1] == ' ' && nextLine_[2] == ' ' && nextLine_[3] == ' ' &&
1240         nextLine_[4] == ' ') {
1241       const char *col6{nextLine_ + 5};
1242       if (*col6 != '\n' && *col6 != '0' && !IsSpaceOrTab(col6)) {
1243         if ((*col6 == 'i' || *col6 == 'I') && IsIncludeLine(nextLine_)) {
1244           // It's An INCLUDE line, not a continuation
1245         } else {
1246           return nextLine_ + 6;
1247         }
1248       }
1249     }
1250     if (IsImplicitContinuation()) {
1251       return nextLine_;
1252     }
1253   }
1254   return nullptr; // not a continuation line
1255 }
1256 
1257 const char *Prescanner::FreeFormContinuationLine(bool ampersand) {
1258   const char *p{nextLine_};
1259   if (p >= limit_) {
1260     return nullptr;
1261   }
1262   p = SkipWhiteSpace(p);
1263   if (InCompilerDirective()) {
1264     if (*p++ != '!') {
1265       return nullptr;
1266     }
1267     for (const char *s{directiveSentinel_}; *s != '\0'; ++p, ++s) {
1268       if (*s != ToLowerCaseLetter(*p)) {
1269         return nullptr;
1270       }
1271     }
1272     p = SkipWhiteSpace(p);
1273     if (*p == '&') {
1274       if (!ampersand) {
1275         insertASpace_ = true;
1276       }
1277       return p + 1;
1278     } else if (ampersand) {
1279       return p;
1280     } else {
1281       return nullptr;
1282     }
1283   } else {
1284     if (*p == '&') {
1285       return p + 1;
1286     } else if (*p == '!' || *p == '\n' || *p == '#') {
1287       return nullptr;
1288     } else if (ampersand || IsImplicitContinuation()) {
1289       if (continuationInCharLiteral_) {
1290         // 'a'&            -> 'a''b' == "a'b"
1291         //   'b'
1292         if (features_.ShouldWarn(
1293                 common::LanguageFeature::MiscSourceExtensions)) {
1294           Say(GetProvenanceRange(p, p + 1),
1295               "Character literal continuation line should have been preceded by '&'"_port_en_US);
1296         }
1297       } else if (p > nextLine_) {
1298         --p;
1299       } else {
1300         insertASpace_ = true;
1301       }
1302       return p;
1303     } else {
1304       return nullptr;
1305     }
1306   }
1307 }
1308 
1309 bool Prescanner::FixedFormContinuation(bool mightNeedSpace) {
1310   // N.B. We accept '&' as a continuation indicator in fixed form, too,
1311   // but not in a character literal.
1312   if (*at_ == '&' && inCharLiteral_) {
1313     return false;
1314   }
1315   do {
1316     if (const char *cont{FixedFormContinuationLine(mightNeedSpace)}) {
1317       BeginSourceLine(cont);
1318       column_ = 7;
1319       NextLine();
1320       return true;
1321     }
1322   } while (SkipCommentLine(false /* not after ampersand */));
1323   return false;
1324 }
1325 
1326 bool Prescanner::FreeFormContinuation() {
1327   const char *p{at_};
1328   bool ampersand{*p == '&'};
1329   if (ampersand) {
1330     p = SkipWhiteSpace(p + 1);
1331   }
1332   if (*p != '\n') {
1333     if (inCharLiteral_) {
1334       return false;
1335     } else if (*p == '!') { // & ! comment - ok
1336     } else if (ampersand && isPossibleMacroCall_ && (*p == ',' || *p == ')')) {
1337       return false; // allow & at end of a macro argument
1338     } else if (features_.ShouldWarn(LanguageFeature::CruftAfterAmpersand)) {
1339       Say(GetProvenance(p), "missing ! before comment after &"_warn_en_US);
1340     }
1341   }
1342   do {
1343     if (const char *cont{FreeFormContinuationLine(ampersand)}) {
1344       BeginSourceLine(cont);
1345       NextLine();
1346       return true;
1347     }
1348   } while (SkipCommentLine(ampersand));
1349   return false;
1350 }
1351 
1352 // Implicit line continuation allows a preprocessor macro call with
1353 // arguments to span multiple lines.
1354 bool Prescanner::IsImplicitContinuation() const {
1355   return !inPreprocessorDirective_ && !inCharLiteral_ && isPossibleMacroCall_ &&
1356       parenthesisNesting_ > 0 && !IsAtEnd() &&
1357       ClassifyLine(nextLine_).kind == LineClassification::Kind::Source;
1358 }
1359 
1360 bool Prescanner::Continuation(bool mightNeedFixedFormSpace) {
1361   if (disableSourceContinuation_) {
1362     return false;
1363   } else if (*at_ == '\n' || *at_ == '&') {
1364     if (inFixedForm_) {
1365       return FixedFormContinuation(mightNeedFixedFormSpace);
1366     } else {
1367       return FreeFormContinuation();
1368     }
1369   } else if (*at_ == '\\' && at_ + 2 == nextLine_ &&
1370       backslashFreeFormContinuation_ && !inFixedForm_ && nextLine_ < limit_) {
1371     // cpp-like handling of \ at end of a free form source line
1372     BeginSourceLine(nextLine_);
1373     NextLine();
1374     return true;
1375   } else {
1376     return false;
1377   }
1378 }
1379 
1380 std::optional<Prescanner::LineClassification>
1381 Prescanner::IsFixedFormCompilerDirectiveLine(const char *start) const {
1382   const char *p{start};
1383   char col1{*p++};
1384   if (!IsFixedFormCommentChar(col1)) {
1385     return std::nullopt;
1386   }
1387   char sentinel[5], *sp{sentinel};
1388   int column{2};
1389   for (; column < 6; ++column, ++p) {
1390     if (*p == '\n' || IsSpaceOrTab(p)) {
1391       break;
1392     }
1393     if (sp == sentinel + 1 && sentinel[0] == '$' && IsDecimalDigit(*p)) {
1394       // OpenMP conditional compilation line: leave the label alone
1395       break;
1396     }
1397     *sp++ = ToLowerCaseLetter(*p);
1398   }
1399   if (column == 6) {
1400     if (*p == '0') {
1401       ++p;
1402     } else if (int n{IsSpaceOrTab(p)}) {
1403       p += n;
1404     } else {
1405       // This is a Continuation line, not an initial directive line.
1406       return std::nullopt;
1407     }
1408   }
1409   if (sp == sentinel) {
1410     return std::nullopt;
1411   }
1412   *sp = '\0';
1413   if (const char *ss{IsCompilerDirectiveSentinel(
1414           sentinel, static_cast<std::size_t>(sp - sentinel))}) {
1415     std::size_t payloadOffset = p - start;
1416     return {LineClassification{
1417         LineClassification::Kind::CompilerDirective, payloadOffset, ss}};
1418   }
1419   return std::nullopt;
1420 }
1421 
1422 std::optional<Prescanner::LineClassification>
1423 Prescanner::IsFreeFormCompilerDirectiveLine(const char *start) const {
1424   if (const char *p{SkipWhiteSpace(start)}; p && *p++ == '!') {
1425     if (auto maybePair{IsCompilerDirectiveSentinel(p)}) {
1426       auto offset{static_cast<std::size_t>(maybePair->second - start)};
1427       return {LineClassification{LineClassification::Kind::CompilerDirective,
1428           offset, maybePair->first}};
1429     }
1430   }
1431   return std::nullopt;
1432 }
1433 
1434 Prescanner &Prescanner::AddCompilerDirectiveSentinel(const std::string &dir) {
1435   std::uint64_t packed{0};
1436   for (char ch : dir) {
1437     packed = (packed << 8) | (ToLowerCaseLetter(ch) & 0xff);
1438   }
1439   compilerDirectiveBloomFilter_.set(packed % prime1);
1440   compilerDirectiveBloomFilter_.set(packed % prime2);
1441   compilerDirectiveSentinels_.insert(dir);
1442   return *this;
1443 }
1444 
1445 const char *Prescanner::IsCompilerDirectiveSentinel(
1446     const char *sentinel, std::size_t len) const {
1447   std::uint64_t packed{0};
1448   for (std::size_t j{0}; j < len; ++j) {
1449     packed = (packed << 8) | (sentinel[j] & 0xff);
1450   }
1451   if (len == 0 || !compilerDirectiveBloomFilter_.test(packed % prime1) ||
1452       !compilerDirectiveBloomFilter_.test(packed % prime2)) {
1453     return nullptr;
1454   }
1455   const auto iter{compilerDirectiveSentinels_.find(std::string(sentinel, len))};
1456   return iter == compilerDirectiveSentinels_.end() ? nullptr : iter->c_str();
1457 }
1458 
1459 const char *Prescanner::IsCompilerDirectiveSentinel(CharBlock token) const {
1460   const char *p{token.begin()};
1461   const char *end{p + token.size()};
1462   while (p < end && (*p == ' ' || *p == '\n')) {
1463     ++p;
1464   }
1465   if (p < end && *p == '!') {
1466     ++p;
1467   }
1468   while (end > p && (end[-1] == ' ' || end[-1] == '\t')) {
1469     --end;
1470   }
1471   return end > p && IsCompilerDirectiveSentinel(p, end - p) ? p : nullptr;
1472 }
1473 
1474 std::optional<std::pair<const char *, const char *>>
1475 Prescanner::IsCompilerDirectiveSentinel(const char *p) const {
1476   char sentinel[8];
1477   for (std::size_t j{0}; j + 1 < sizeof sentinel && *p != '\n'; ++p, ++j) {
1478     if (int n{*p == '&' ? 1 : IsSpaceOrTab(p)}) {
1479       if (j > 0) {
1480         sentinel[j] = '\0';
1481         p = SkipWhiteSpace(p + n);
1482         if (*p != '!') {
1483           if (const char *sp{IsCompilerDirectiveSentinel(sentinel, j)}) {
1484             return std::make_pair(sp, p);
1485           }
1486         }
1487       }
1488       break;
1489     } else {
1490       sentinel[j] = ToLowerCaseLetter(*p);
1491     }
1492   }
1493   return std::nullopt;
1494 }
1495 
1496 constexpr bool IsDirective(const char *match, const char *dir) {
1497   for (; *match; ++match) {
1498     if (*match != ToLowerCaseLetter(*dir++)) {
1499       return false;
1500     }
1501   }
1502   return true;
1503 }
1504 
1505 Prescanner::LineClassification Prescanner::ClassifyLine(
1506     const char *start) const {
1507   if (inFixedForm_) {
1508     if (std::optional<LineClassification> lc{
1509             IsFixedFormCompilerDirectiveLine(start)}) {
1510       return std::move(*lc);
1511     }
1512     if (IsFixedFormCommentLine(start)) {
1513       return {LineClassification::Kind::Comment};
1514     }
1515   } else {
1516     if (std::optional<LineClassification> lc{
1517             IsFreeFormCompilerDirectiveLine(start)}) {
1518       return std::move(*lc);
1519     }
1520     if (const char *bang{IsFreeFormComment(start)}) {
1521       return {LineClassification::Kind::Comment,
1522           static_cast<std::size_t>(bang - start)};
1523     }
1524   }
1525   if (std::optional<std::size_t> quoteOffset{IsIncludeLine(start)}) {
1526     return {LineClassification::Kind::IncludeLine, *quoteOffset};
1527   }
1528   if (const char *dir{IsPreprocessorDirectiveLine(start)}) {
1529     if (IsDirective("if", dir) || IsDirective("elif", dir) ||
1530         IsDirective("else", dir) || IsDirective("endif", dir)) {
1531       return {LineClassification::Kind::ConditionalCompilationDirective};
1532     } else if (IsDirective("include", dir)) {
1533       return {LineClassification::Kind::IncludeDirective};
1534     } else if (IsDirective("define", dir) || IsDirective("undef", dir)) {
1535       return {LineClassification::Kind::DefinitionDirective};
1536     } else {
1537       return {LineClassification::Kind::PreprocessorDirective};
1538     }
1539   }
1540   return {LineClassification::Kind::Source};
1541 }
1542 
1543 Prescanner::LineClassification Prescanner::ClassifyLine(
1544     TokenSequence &tokens, Provenance newlineProvenance) const {
1545   // Append a newline temporarily.
1546   tokens.PutNextTokenChar('\n', newlineProvenance);
1547   tokens.CloseToken();
1548   const char *ppd{tokens.ToCharBlock().begin()};
1549   LineClassification classification{ClassifyLine(ppd)};
1550   tokens.pop_back(); // remove the newline
1551   return classification;
1552 }
1553 
1554 void Prescanner::SourceFormChange(std::string &&dir) {
1555   if (dir == "!dir$ free") {
1556     inFixedForm_ = false;
1557   } else if (dir == "!dir$ fixed") {
1558     inFixedForm_ = true;
1559   }
1560 }
1561 
1562 // Acquire and append compiler directive continuation lines to
1563 // the tokens that constitute a compiler directive, even when those
1564 // directive continuation lines are the result of macro expansion.
1565 // (Not used when neither the original compiler directive line nor
1566 // the directive continuation line result from preprocessing; regular
1567 // line continuation during tokenization handles that normal case.)
1568 bool Prescanner::CompilerDirectiveContinuation(
1569     TokenSequence &tokens, const char *origSentinel) {
1570   if (inFixedForm_ || tokens.empty() ||
1571       tokens.TokenAt(tokens.SizeInTokens() - 1) != "&") {
1572     return false;
1573   }
1574   LineClassification followingLine{ClassifyLine(nextLine_)};
1575   if (followingLine.kind == LineClassification::Kind::Comment) {
1576     nextLine_ += followingLine.payloadOffset; // advance to '!' or newline
1577     NextLine();
1578     return true;
1579   }
1580   CHECK(origSentinel != nullptr);
1581   directiveSentinel_ = origSentinel; // so InCompilerDirective() is true
1582   const char *nextContinuation{
1583       followingLine.kind == LineClassification::Kind::CompilerDirective
1584           ? FreeFormContinuationLine(true)
1585           : nullptr};
1586   if (!nextContinuation &&
1587       followingLine.kind != LineClassification::Kind::Source) {
1588     return false;
1589   }
1590   auto origNextLine{nextLine_};
1591   BeginSourceLine(nextLine_);
1592   NextLine();
1593   if (nextContinuation) {
1594     // What follows is !DIR$ & xxx; skip over the & so that it
1595     // doesn't cause a spurious continuation.
1596     at_ = nextContinuation;
1597   } else {
1598     // What follows looks like a source line before macro expansion,
1599     // but might become a directive continuation afterwards.
1600     SkipSpaces();
1601   }
1602   TokenSequence followingTokens;
1603   while (NextToken(followingTokens)) {
1604   }
1605   if (auto followingPrepro{
1606           preprocessor_.MacroReplacement(followingTokens, *this)}) {
1607     followingTokens = std::move(*followingPrepro);
1608   }
1609   followingTokens.RemoveRedundantBlanks();
1610   std::size_t startAt{0};
1611   std::size_t following{followingTokens.SizeInTokens()};
1612   bool ok{false};
1613   if (nextContinuation) {
1614     ok = true;
1615   } else {
1616     startAt = 2;
1617     if (startAt < following && followingTokens.TokenAt(0) == "!") {
1618       CharBlock sentinel{followingTokens.TokenAt(1)};
1619       if (!sentinel.empty() &&
1620           std::memcmp(sentinel.begin(), origSentinel, sentinel.size()) == 0) {
1621         ok = true;
1622         while (
1623             startAt < following && followingTokens.TokenAt(startAt).IsBlank()) {
1624           ++startAt;
1625         }
1626         if (startAt < following && followingTokens.TokenAt(startAt) == "&") {
1627           ++startAt;
1628         }
1629       }
1630     }
1631   }
1632   if (ok) {
1633     tokens.pop_back(); // delete original '&'
1634     tokens.Put(followingTokens, startAt, following - startAt);
1635     tokens.RemoveRedundantBlanks();
1636   } else {
1637     nextLine_ = origNextLine;
1638   }
1639   return ok;
1640 }
1641 
1642 // Similar, but for source line continuation after macro replacement.
1643 bool Prescanner::SourceLineContinuation(TokenSequence &tokens) {
1644   if (!inFixedForm_ && !tokens.empty() &&
1645       tokens.TokenAt(tokens.SizeInTokens() - 1) == "&") {
1646     LineClassification followingLine{ClassifyLine(nextLine_)};
1647     if (followingLine.kind == LineClassification::Kind::Comment) {
1648       nextLine_ += followingLine.payloadOffset; // advance to '!' or newline
1649       NextLine();
1650       return true;
1651     } else if (const char *nextContinuation{FreeFormContinuationLine(true)}) {
1652       BeginSourceLine(nextLine_);
1653       NextLine();
1654       TokenSequence followingTokens;
1655       at_ = nextContinuation;
1656       while (NextToken(followingTokens)) {
1657       }
1658       if (auto followingPrepro{
1659               preprocessor_.MacroReplacement(followingTokens, *this)}) {
1660         followingTokens = std::move(*followingPrepro);
1661       }
1662       followingTokens.RemoveRedundantBlanks();
1663       tokens.pop_back(); // delete original '&'
1664       tokens.Put(followingTokens);
1665       return true;
1666     }
1667   }
1668   return false;
1669 }
1670 } // namespace Fortran::parser
1671