1 //===-- lib/Parser/prescan.h ------------------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #ifndef FORTRAN_PARSER_PRESCAN_H_ 10 #define FORTRAN_PARSER_PRESCAN_H_ 11 12 // Defines a fast Fortran source prescanning phase that implements some 13 // character-level features of the language that can be inefficient to 14 // support directly in a backtracking parser. This phase handles Fortran 15 // line continuation, comment removal, card image margins, padding out 16 // fixed form character literals on truncated card images, file 17 // inclusion, and driving the Fortran source preprocessor. 18 19 #include "flang/Common/Fortran-features.h" 20 #include "flang/Parser/characters.h" 21 #include "flang/Parser/message.h" 22 #include "flang/Parser/provenance.h" 23 #include "flang/Parser/token-sequence.h" 24 #include <bitset> 25 #include <optional> 26 #include <string> 27 #include <unordered_set> 28 29 namespace Fortran::parser { 30 31 class Messages; 32 class Preprocessor; 33 34 class Prescanner { 35 public: 36 Prescanner(Messages &, CookedSource &, Preprocessor &, 37 common::LanguageFeatureControl); 38 Prescanner( 39 const Prescanner &, Preprocessor &, bool isNestedInIncludeDirective); 40 Prescanner(const Prescanner &) = delete; 41 Prescanner(Prescanner &&) = delete; 42 43 const AllSources &allSources() const { return allSources_; } 44 AllSources &allSources() { return allSources_; } 45 const Messages &messages() const { return messages_; } 46 Messages &messages() { return messages_; } 47 const Preprocessor &preprocessor() const { return preprocessor_; } 48 Preprocessor &preprocessor() { return preprocessor_; } 49 common::LanguageFeatureControl &features() { return features_; } 50 51 Prescanner &set_preprocessingOnly(bool yes) { 52 preprocessingOnly_ = yes; 53 return *this; 54 } 55 Prescanner &set_expandIncludeLines(bool yes) { 56 expandIncludeLines_ = yes; 57 return *this; 58 } 59 Prescanner &set_fixedForm(bool yes) { 60 inFixedForm_ = yes; 61 return *this; 62 } 63 Prescanner &set_encoding(Encoding code) { 64 encoding_ = code; 65 return *this; 66 } 67 Prescanner &set_fixedFormColumnLimit(int limit) { 68 fixedFormColumnLimit_ = limit; 69 return *this; 70 } 71 72 Prescanner &AddCompilerDirectiveSentinel(const std::string &); 73 74 void Prescan(ProvenanceRange); 75 void Statement(); 76 void NextLine(); 77 78 // Callbacks for use by Preprocessor. 79 bool IsAtEnd() const { return nextLine_ >= limit_; } 80 bool IsNextLinePreprocessorDirective() const; 81 TokenSequence TokenizePreprocessorDirective(); 82 Provenance GetCurrentProvenance() const { return GetProvenance(at_); } 83 84 const char *IsCompilerDirectiveSentinel(const char *, std::size_t) const; 85 const char *IsCompilerDirectiveSentinel(CharBlock) const; 86 // 'first' is the sentinel, 'second' is beginning of payload 87 std::optional<std::pair<const char *, const char *>> 88 IsCompilerDirectiveSentinel(const char *p) const; 89 90 template <typename... A> Message &Say(A &&...a) { 91 return messages_.Say(std::forward<A>(a)...); 92 } 93 94 private: 95 struct LineClassification { 96 enum class Kind { 97 Comment, 98 ConditionalCompilationDirective, 99 IncludeDirective, // #include 100 DefinitionDirective, // #define & #undef 101 PreprocessorDirective, 102 IncludeLine, // Fortran INCLUDE 103 CompilerDirective, 104 Source 105 }; 106 LineClassification(Kind k, std::size_t po = 0, const char *s = nullptr) 107 : kind{k}, payloadOffset{po}, sentinel{s} {} 108 LineClassification(LineClassification &&) = default; 109 LineClassification &operator=(LineClassification &&) = default; 110 Kind kind; 111 std::size_t payloadOffset; // byte offset of content 112 const char *sentinel; // if it's a compiler directive 113 }; 114 115 void BeginSourceLine(const char *at) { 116 at_ = at; 117 column_ = 1; 118 tabInCurrentLine_ = false; 119 } 120 121 void BeginSourceLineAndAdvance() { 122 BeginSourceLine(nextLine_); 123 NextLine(); 124 } 125 126 void BeginStatementAndAdvance() { 127 BeginSourceLineAndAdvance(); 128 slashInCurrentStatement_ = false; 129 preventHollerith_ = false; 130 parenthesisNesting_ = 0; 131 continuationLines_ = 0; 132 isPossibleMacroCall_ = false; 133 disableSourceContinuation_ = false; 134 } 135 136 Provenance GetProvenance(const char *sourceChar) const { 137 return startProvenance_ + (sourceChar - start_); 138 } 139 140 ProvenanceRange GetProvenanceRange( 141 const char *first, const char *afterLast) const { 142 std::size_t bytes = afterLast - first; 143 return {startProvenance_ + (first - start_), bytes}; 144 } 145 146 void EmitChar(TokenSequence &tokens, char ch) { 147 tokens.PutNextTokenChar(ch, GetCurrentProvenance()); 148 } 149 150 void EmitInsertedChar(TokenSequence &tokens, char ch) { 151 Provenance provenance{allSources_.CompilerInsertionProvenance(ch)}; 152 tokens.PutNextTokenChar(ch, provenance); 153 } 154 155 char EmitCharAndAdvance(TokenSequence &tokens, char ch) { 156 EmitChar(tokens, ch); 157 NextChar(); 158 return *at_; 159 } 160 161 bool InCompilerDirective() const { return directiveSentinel_ != nullptr; } 162 bool InFixedFormSource() const { 163 return inFixedForm_ && !inPreprocessorDirective_ && !InCompilerDirective(); 164 } 165 166 bool IsCComment(const char *p) const { 167 return p[0] == '/' && p[1] == '*' && 168 (inPreprocessorDirective_ || 169 (!inCharLiteral_ && 170 features_.IsEnabled( 171 common::LanguageFeature::ClassicCComments))); 172 } 173 174 void CheckAndEmitLine(TokenSequence &, Provenance newlineProvenance); 175 void LabelField(TokenSequence &); 176 void EnforceStupidEndStatementRules(const TokenSequence &); 177 void SkipToEndOfLine(); 178 bool MustSkipToEndOfLine() const; 179 void NextChar(); 180 // True when input flowed to a continuation line 181 bool SkipToNextSignificantCharacter(); 182 void SkipCComments(); 183 void SkipSpaces(); 184 static const char *SkipWhiteSpace(const char *); 185 const char *SkipWhiteSpaceAndCComments(const char *) const; 186 const char *SkipCComment(const char *) const; 187 bool NextToken(TokenSequence &); 188 bool ExponentAndKind(TokenSequence &); 189 void QuotedCharacterLiteral(TokenSequence &, const char *start); 190 void Hollerith(TokenSequence &, int count, const char *start); 191 bool PadOutCharacterLiteral(TokenSequence &); 192 bool SkipCommentLine(bool afterAmpersand); 193 bool IsFixedFormCommentLine(const char *) const; 194 const char *IsFreeFormComment(const char *) const; 195 std::optional<std::size_t> IsIncludeLine(const char *) const; 196 void FortranInclude(const char *quote); 197 const char *IsPreprocessorDirectiveLine(const char *) const; 198 const char *FixedFormContinuationLine(bool mightNeedSpace); 199 const char *FreeFormContinuationLine(bool ampersand); 200 bool IsImplicitContinuation() const; 201 bool FixedFormContinuation(bool mightNeedSpace); 202 bool FreeFormContinuation(); 203 bool Continuation(bool mightNeedFixedFormSpace); 204 std::optional<LineClassification> IsFixedFormCompilerDirectiveLine( 205 const char *) const; 206 std::optional<LineClassification> IsFreeFormCompilerDirectiveLine( 207 const char *) const; 208 LineClassification ClassifyLine(const char *) const; 209 LineClassification ClassifyLine( 210 TokenSequence &, Provenance newlineProvenance) const; 211 void SourceFormChange(std::string &&); 212 bool CompilerDirectiveContinuation(TokenSequence &, const char *sentinel); 213 bool SourceLineContinuation(TokenSequence &); 214 215 Messages &messages_; 216 CookedSource &cooked_; 217 Preprocessor &preprocessor_; 218 AllSources &allSources_; 219 common::LanguageFeatureControl features_; 220 bool preprocessingOnly_{false}; 221 bool expandIncludeLines_{true}; 222 bool isNestedInIncludeDirective_{false}; 223 bool backslashFreeFormContinuation_{false}; 224 bool inFixedForm_{false}; 225 int fixedFormColumnLimit_{72}; 226 Encoding encoding_{Encoding::UTF_8}; 227 int parenthesisNesting_{0}; 228 int prescannerNesting_{0}; 229 int continuationLines_{0}; 230 bool isPossibleMacroCall_{false}; 231 bool afterPreprocessingDirective_{false}; 232 bool disableSourceContinuation_{false}; 233 234 Provenance startProvenance_; 235 const char *start_{nullptr}; // beginning of current source file content 236 const char *limit_{nullptr}; // first address after end of current source 237 const char *nextLine_{nullptr}; // next line to process; <= limit_ 238 const char *directiveSentinel_{nullptr}; // current compiler directive 239 240 // These data members are state for processing the source line containing 241 // "at_", which goes to up to the newline character before "nextLine_". 242 const char *at_{nullptr}; // next character to process; < nextLine_ 243 int column_{1}; // card image column position of next character 244 bool tabInCurrentLine_{false}; 245 bool slashInCurrentStatement_{false}; 246 bool preventHollerith_{false}; // CHARACTER*4HIMOM not Hollerith 247 bool inCharLiteral_{false}; 248 bool continuationInCharLiteral_{false}; 249 bool inPreprocessorDirective_{false}; 250 251 // In some edge cases of compiler directive continuation lines, it 252 // is necessary to treat the line break as a space character by 253 // setting this flag, which is cleared by EmitChar(). 254 bool insertASpace_{false}; 255 256 // When a free form continuation marker (&) appears at the end of a line 257 // before a INCLUDE or #include, we delete it and omit the newline, so 258 // that the first line of the included file is truly a continuation of 259 // the line before. Also used when the & appears at the end of the last 260 // line in an include file. 261 bool omitNewline_{false}; 262 bool skipLeadingAmpersand_{false}; 263 264 const std::size_t firstCookedCharacterOffset_{cooked_.BufferedBytes()}; 265 266 const Provenance spaceProvenance_{ 267 allSources_.CompilerInsertionProvenance(' ')}; 268 const Provenance backslashProvenance_{ 269 allSources_.CompilerInsertionProvenance('\\')}; 270 271 // To avoid probing the set of active compiler directive sentinel strings 272 // on every comment line, they're checked first with a cheap Bloom filter. 273 static const int prime1{1019}, prime2{1021}; 274 std::bitset<prime2> compilerDirectiveBloomFilter_; // 128 bytes 275 std::unordered_set<std::string> compilerDirectiveSentinels_; 276 }; 277 } // namespace Fortran::parser 278 #endif // FORTRAN_PARSER_PRESCAN_H_ 279