xref: /llvm-project/flang/lib/Parser/prescan.h (revision 9fb2db1e1f42ae10a9d8c1d9410b5f4e719fdac0)
1 //===-- lib/Parser/prescan.h ------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef FORTRAN_PARSER_PRESCAN_H_
10 #define FORTRAN_PARSER_PRESCAN_H_
11 
12 // Defines a fast Fortran source prescanning phase that implements some
13 // character-level features of the language that can be inefficient to
14 // support directly in a backtracking parser.  This phase handles Fortran
15 // line continuation, comment removal, card image margins, padding out
16 // fixed form character literals on truncated card images, file
17 // inclusion, and driving the Fortran source preprocessor.
18 
19 #include "flang/Common/Fortran-features.h"
20 #include "flang/Parser/characters.h"
21 #include "flang/Parser/message.h"
22 #include "flang/Parser/provenance.h"
23 #include "flang/Parser/token-sequence.h"
24 #include <bitset>
25 #include <optional>
26 #include <string>
27 #include <unordered_set>
28 
29 namespace Fortran::parser {
30 
31 class Messages;
32 class Preprocessor;
33 
34 class Prescanner {
35 public:
36   Prescanner(Messages &, CookedSource &, Preprocessor &,
37       common::LanguageFeatureControl);
38   Prescanner(
39       const Prescanner &, Preprocessor &, bool isNestedInIncludeDirective);
40   Prescanner(const Prescanner &) = delete;
41   Prescanner(Prescanner &&) = delete;
42 
43   const AllSources &allSources() const { return allSources_; }
44   AllSources &allSources() { return allSources_; }
45   const Messages &messages() const { return messages_; }
46   Messages &messages() { return messages_; }
47   const Preprocessor &preprocessor() const { return preprocessor_; }
48   Preprocessor &preprocessor() { return preprocessor_; }
49   common::LanguageFeatureControl &features() { return features_; }
50 
51   Prescanner &set_preprocessingOnly(bool yes) {
52     preprocessingOnly_ = yes;
53     return *this;
54   }
55   Prescanner &set_expandIncludeLines(bool yes) {
56     expandIncludeLines_ = yes;
57     return *this;
58   }
59   Prescanner &set_fixedForm(bool yes) {
60     inFixedForm_ = yes;
61     return *this;
62   }
63   Prescanner &set_encoding(Encoding code) {
64     encoding_ = code;
65     return *this;
66   }
67   Prescanner &set_fixedFormColumnLimit(int limit) {
68     fixedFormColumnLimit_ = limit;
69     return *this;
70   }
71 
72   Prescanner &AddCompilerDirectiveSentinel(const std::string &);
73 
74   void Prescan(ProvenanceRange);
75   void Statement();
76   void NextLine();
77 
78   // Callbacks for use by Preprocessor.
79   bool IsAtEnd() const { return nextLine_ >= limit_; }
80   bool IsNextLinePreprocessorDirective() const;
81   TokenSequence TokenizePreprocessorDirective();
82   Provenance GetCurrentProvenance() const { return GetProvenance(at_); }
83 
84   const char *IsCompilerDirectiveSentinel(const char *, std::size_t) const;
85   const char *IsCompilerDirectiveSentinel(CharBlock) const;
86   // 'first' is the sentinel, 'second' is beginning of payload
87   std::optional<std::pair<const char *, const char *>>
88   IsCompilerDirectiveSentinel(const char *p) const;
89 
90   template <typename... A> Message &Say(A &&...a) {
91     return messages_.Say(std::forward<A>(a)...);
92   }
93 
94 private:
95   struct LineClassification {
96     enum class Kind {
97       Comment,
98       ConditionalCompilationDirective,
99       IncludeDirective, // #include
100       DefinitionDirective, // #define & #undef
101       PreprocessorDirective,
102       IncludeLine, // Fortran INCLUDE
103       CompilerDirective,
104       Source
105     };
106     LineClassification(Kind k, std::size_t po = 0, const char *s = nullptr)
107         : kind{k}, payloadOffset{po}, sentinel{s} {}
108     LineClassification(LineClassification &&) = default;
109     LineClassification &operator=(LineClassification &&) = default;
110     Kind kind;
111     std::size_t payloadOffset; // byte offset of content
112     const char *sentinel; // if it's a compiler directive
113   };
114 
115   void BeginSourceLine(const char *at) {
116     at_ = at;
117     column_ = 1;
118     tabInCurrentLine_ = false;
119   }
120 
121   void BeginSourceLineAndAdvance() {
122     BeginSourceLine(nextLine_);
123     NextLine();
124   }
125 
126   void BeginStatementAndAdvance() {
127     BeginSourceLineAndAdvance();
128     slashInCurrentStatement_ = false;
129     preventHollerith_ = false;
130     parenthesisNesting_ = 0;
131     continuationLines_ = 0;
132     isPossibleMacroCall_ = false;
133     disableSourceContinuation_ = false;
134   }
135 
136   Provenance GetProvenance(const char *sourceChar) const {
137     return startProvenance_ + (sourceChar - start_);
138   }
139 
140   ProvenanceRange GetProvenanceRange(
141       const char *first, const char *afterLast) const {
142     std::size_t bytes = afterLast - first;
143     return {startProvenance_ + (first - start_), bytes};
144   }
145 
146   void EmitChar(TokenSequence &tokens, char ch) {
147     tokens.PutNextTokenChar(ch, GetCurrentProvenance());
148   }
149 
150   void EmitInsertedChar(TokenSequence &tokens, char ch) {
151     Provenance provenance{allSources_.CompilerInsertionProvenance(ch)};
152     tokens.PutNextTokenChar(ch, provenance);
153   }
154 
155   char EmitCharAndAdvance(TokenSequence &tokens, char ch) {
156     EmitChar(tokens, ch);
157     NextChar();
158     return *at_;
159   }
160 
161   bool InCompilerDirective() const { return directiveSentinel_ != nullptr; }
162   bool InFixedFormSource() const {
163     return inFixedForm_ && !inPreprocessorDirective_ && !InCompilerDirective();
164   }
165 
166   bool IsCComment(const char *p) const {
167     return p[0] == '/' && p[1] == '*' &&
168         (inPreprocessorDirective_ ||
169             (!inCharLiteral_ &&
170                 features_.IsEnabled(
171                     common::LanguageFeature::ClassicCComments)));
172   }
173 
174   void CheckAndEmitLine(TokenSequence &, Provenance newlineProvenance);
175   void LabelField(TokenSequence &);
176   void EnforceStupidEndStatementRules(const TokenSequence &);
177   void SkipToEndOfLine();
178   bool MustSkipToEndOfLine() const;
179   void NextChar();
180   // True when input flowed to a continuation line
181   bool SkipToNextSignificantCharacter();
182   void SkipCComments();
183   void SkipSpaces();
184   static const char *SkipWhiteSpace(const char *);
185   const char *SkipWhiteSpaceAndCComments(const char *) const;
186   const char *SkipCComment(const char *) const;
187   bool NextToken(TokenSequence &);
188   bool ExponentAndKind(TokenSequence &);
189   void QuotedCharacterLiteral(TokenSequence &, const char *start);
190   void Hollerith(TokenSequence &, int count, const char *start);
191   bool PadOutCharacterLiteral(TokenSequence &);
192   bool SkipCommentLine(bool afterAmpersand);
193   bool IsFixedFormCommentLine(const char *) const;
194   const char *IsFreeFormComment(const char *) const;
195   std::optional<std::size_t> IsIncludeLine(const char *) const;
196   void FortranInclude(const char *quote);
197   const char *IsPreprocessorDirectiveLine(const char *) const;
198   const char *FixedFormContinuationLine(bool mightNeedSpace);
199   const char *FreeFormContinuationLine(bool ampersand);
200   bool IsImplicitContinuation() const;
201   bool FixedFormContinuation(bool mightNeedSpace);
202   bool FreeFormContinuation();
203   bool Continuation(bool mightNeedFixedFormSpace);
204   std::optional<LineClassification> IsFixedFormCompilerDirectiveLine(
205       const char *) const;
206   std::optional<LineClassification> IsFreeFormCompilerDirectiveLine(
207       const char *) const;
208   LineClassification ClassifyLine(const char *) const;
209   LineClassification ClassifyLine(
210       TokenSequence &, Provenance newlineProvenance) const;
211   void SourceFormChange(std::string &&);
212   bool CompilerDirectiveContinuation(TokenSequence &, const char *sentinel);
213   bool SourceLineContinuation(TokenSequence &);
214 
215   Messages &messages_;
216   CookedSource &cooked_;
217   Preprocessor &preprocessor_;
218   AllSources &allSources_;
219   common::LanguageFeatureControl features_;
220   bool preprocessingOnly_{false};
221   bool expandIncludeLines_{true};
222   bool isNestedInIncludeDirective_{false};
223   bool backslashFreeFormContinuation_{false};
224   bool inFixedForm_{false};
225   int fixedFormColumnLimit_{72};
226   Encoding encoding_{Encoding::UTF_8};
227   int parenthesisNesting_{0};
228   int prescannerNesting_{0};
229   int continuationLines_{0};
230   bool isPossibleMacroCall_{false};
231   bool afterPreprocessingDirective_{false};
232   bool disableSourceContinuation_{false};
233 
234   Provenance startProvenance_;
235   const char *start_{nullptr}; // beginning of current source file content
236   const char *limit_{nullptr}; // first address after end of current source
237   const char *nextLine_{nullptr}; // next line to process; <= limit_
238   const char *directiveSentinel_{nullptr}; // current compiler directive
239 
240   // These data members are state for processing the source line containing
241   // "at_", which goes to up to the newline character before "nextLine_".
242   const char *at_{nullptr}; // next character to process; < nextLine_
243   int column_{1}; // card image column position of next character
244   bool tabInCurrentLine_{false};
245   bool slashInCurrentStatement_{false};
246   bool preventHollerith_{false}; // CHARACTER*4HIMOM not Hollerith
247   bool inCharLiteral_{false};
248   bool continuationInCharLiteral_{false};
249   bool inPreprocessorDirective_{false};
250 
251   // In some edge cases of compiler directive continuation lines, it
252   // is necessary to treat the line break as a space character by
253   // setting this flag, which is cleared by EmitChar().
254   bool insertASpace_{false};
255 
256   // When a free form continuation marker (&) appears at the end of a line
257   // before a INCLUDE or #include, we delete it and omit the newline, so
258   // that the first line of the included file is truly a continuation of
259   // the line before.  Also used when the & appears at the end of the last
260   // line in an include file.
261   bool omitNewline_{false};
262   bool skipLeadingAmpersand_{false};
263 
264   const std::size_t firstCookedCharacterOffset_{cooked_.BufferedBytes()};
265 
266   const Provenance spaceProvenance_{
267       allSources_.CompilerInsertionProvenance(' ')};
268   const Provenance backslashProvenance_{
269       allSources_.CompilerInsertionProvenance('\\')};
270 
271   // To avoid probing the set of active compiler directive sentinel strings
272   // on every comment line, they're checked first with a cheap Bloom filter.
273   static const int prime1{1019}, prime2{1021};
274   std::bitset<prime2> compilerDirectiveBloomFilter_; // 128 bytes
275   std::unordered_set<std::string> compilerDirectiveSentinels_;
276 };
277 } // namespace Fortran::parser
278 #endif // FORTRAN_PARSER_PRESCAN_H_
279