xref: /llvm-project/flang/include/flang/Parser/provenance.h (revision 0ee0eeb4bb9be6aeef6c84121ca1af463840fb6a)
1 //===-- include/flang/Parser/provenance.h -----------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef FORTRAN_PARSER_PROVENANCE_H_
10 #define FORTRAN_PARSER_PROVENANCE_H_
11 
12 #include "char-block.h"
13 #include "char-buffer.h"
14 #include "characters.h"
15 #include "source.h"
16 #include "flang/Common/idioms.h"
17 #include "flang/Common/interval.h"
18 #include "llvm/Support/raw_ostream.h"
19 #include <cstddef>
20 #include <list>
21 #include <map>
22 #include <memory>
23 #include <optional>
24 #include <string>
25 #include <utility>
26 #include <variant>
27 #include <vector>
28 
29 namespace Fortran::parser {
30 
31 // Each character in the contiguous source stream built by the
32 // prescanner corresponds to a particular character in a source file,
33 // include file, macro expansion, or compiler-inserted text.
34 // The location of this original character to which a parsable character
35 // corresponds is its provenance.
36 //
37 // Provenances are offsets into an (unmaterialized) marshaling of the
38 // entire contents of all the original source files, include files, macro
39 // expansions, &c. for each visit to each source.  These origins of the
40 // original source characters constitute a forest whose roots are
41 // the original source files named on the compiler's command line.
42 // Given a Provenance, we can find the tree node that contains it in time
43 // O(log(# of origins)), and describe the position precisely by walking
44 // up the tree.  (It would be possible, via a time/space trade-off, to
45 // cap the time by the use of an intermediate table that would be indexed
46 // by the upper bits of an offset, but that does not appear to be
47 // necessary.)
48 
49 class AllSources;
50 class AllCookedSources;
51 
52 class Provenance {
53 public:
54   Provenance() {}
55   Provenance(std::size_t offset) : offset_{offset} { CHECK(offset > 0); }
56   Provenance(const Provenance &that) = default;
57   Provenance(Provenance &&that) = default;
58   Provenance &operator=(const Provenance &that) = default;
59   Provenance &operator=(Provenance &&that) = default;
60 
61   std::size_t offset() const { return offset_; }
62 
63   Provenance operator+(ptrdiff_t n) const {
64     CHECK(n > -static_cast<ptrdiff_t>(offset_));
65     return {offset_ + static_cast<std::size_t>(n)};
66   }
67   Provenance operator+(std::size_t n) const { return {offset_ + n}; }
68   std::size_t operator-(Provenance that) const {
69     CHECK(that <= *this);
70     return offset_ - that.offset_;
71   }
72   bool operator<(Provenance that) const { return offset_ < that.offset_; }
73   bool operator<=(Provenance that) const { return !(that < *this); }
74   bool operator==(Provenance that) const { return offset_ == that.offset_; }
75   bool operator!=(Provenance that) const { return !(*this == that); }
76 
77 private:
78   std::size_t offset_{0};
79 };
80 
81 using ProvenanceRange = common::Interval<Provenance>;
82 
83 // Maps contiguous ranges of byte offsets in original source files to
84 // contiguous ranges in the cooked character stream; essentially a
85 // partial inversion of OffsetToProvenanceMappings (below).
86 // Used for implementing the first step of mapping an identifier
87 // selected in a code editor to one of its declarative statements.
88 class ProvenanceRangeToOffsetMappings {
89 public:
90   ProvenanceRangeToOffsetMappings();
91   ~ProvenanceRangeToOffsetMappings();
92   bool empty() const { return map_.empty(); }
93   void Put(ProvenanceRange, std::size_t offset);
94   std::optional<std::size_t> Map(ProvenanceRange) const;
95   llvm::raw_ostream &Dump(llvm::raw_ostream &) const;
96 
97 private:
98   // A comparison function object for use in std::multimap<Compare=>.
99   // Intersecting intervals will effectively compare equal, not being
100   // either < nor >= each other.
101   struct WhollyPrecedes {
102     bool operator()(ProvenanceRange, ProvenanceRange) const;
103   };
104 
105   std::multimap<ProvenanceRange, std::size_t, WhollyPrecedes> map_;
106 };
107 
108 // Maps 0-based local offsets in some contiguous range (e.g., a token
109 // sequence) to their provenances.  Lookup time is on the order of
110 // O(log(#of intervals with contiguous provenances)).  As mentioned
111 // above, this time could be capped via a time/space trade-off.
112 class OffsetToProvenanceMappings {
113 public:
114   OffsetToProvenanceMappings() {}
115   void clear();
116   void swap(OffsetToProvenanceMappings &);
117   void shrink_to_fit();
118   std::size_t SizeInBytes() const;
119   void Put(ProvenanceRange);
120   void Put(const OffsetToProvenanceMappings &);
121   ProvenanceRange Map(std::size_t at) const;
122   void RemoveLastBytes(std::size_t);
123   ProvenanceRangeToOffsetMappings Invert(const AllSources &) const;
124   llvm::raw_ostream &Dump(llvm::raw_ostream &) const;
125 
126 private:
127   struct ContiguousProvenanceMapping {
128     std::size_t start;
129     ProvenanceRange range;
130   };
131 
132   // Elements appear in ascending order of distinct .start values;
133   // their .range values are disjoint and not necessarily adjacent.
134   std::vector<ContiguousProvenanceMapping> provenanceMap_;
135 };
136 
137 // A singleton AllSources instance for the whole compilation
138 // is shared by reference.
139 class AllSources {
140 public:
141   AllSources();
142   ~AllSources();
143 
144   std::size_t size() const { return range_.size(); }
145   const char &operator[](Provenance) const;
146   Encoding encoding() const { return encoding_; }
147   AllSources &set_encoding(Encoding e) {
148     encoding_ = e;
149     return *this;
150   }
151 
152   void ClearSearchPath();
153   void AppendSearchPathDirectory(std::string); // new last directory
154   const SourceFile *OpenPath(std::string path, llvm::raw_ostream &error);
155   const SourceFile *Open(std::string path, llvm::raw_ostream &error,
156       std::optional<std::string> &&prependPath = std::nullopt);
157   const SourceFile *ReadStandardInput(llvm::raw_ostream &error);
158 
159   ProvenanceRange AddIncludedFile(
160       const SourceFile &, ProvenanceRange, bool isModule = false);
161   ProvenanceRange AddMacroCall(
162       ProvenanceRange def, ProvenanceRange use, const std::string &expansion);
163   ProvenanceRange AddCompilerInsertion(std::string);
164 
165   // If provenance is in an expanded macro, return the starting provenance of
166   // the replaced macro. Otherwise, return the input provenance.
167   Provenance GetReplacedProvenance(Provenance) const;
168 
169   bool IsValid(Provenance at) const { return range_.Contains(at); }
170   bool IsValid(ProvenanceRange range) const {
171     return range.size() > 0 && range_.Contains(range);
172   }
173   void setShowColors(bool showColors) { showColors_ = showColors; }
174   bool getShowColors() const { return showColors_; }
175   std::optional<ProvenanceRange> GetInclusionInfo(
176       const std::optional<ProvenanceRange> &) const;
177   void EmitMessage(llvm::raw_ostream &, const std::optional<ProvenanceRange> &,
178       const std::string &message, const std::string &prefix,
179       llvm::raw_ostream::Colors color, bool echoSourceLine = false) const;
180   const SourceFile *GetSourceFile(
181       Provenance, std::size_t *offset = nullptr, bool topLevel = false) const;
182   const char *GetSource(ProvenanceRange) const;
183   std::optional<SourcePosition> GetSourcePosition(Provenance) const;
184   std::optional<ProvenanceRange> GetFirstFileProvenance() const;
185   std::string GetPath(Provenance, bool topLevel = false) const; // __FILE__
186   int GetLineNumber(Provenance) const; // __LINE__
187   Provenance CompilerInsertionProvenance(char ch);
188   ProvenanceRange IntersectionWithSourceFiles(ProvenanceRange) const;
189   llvm::raw_ostream &Dump(llvm::raw_ostream &) const;
190 
191 private:
192   struct Inclusion {
193     const SourceFile &source;
194     bool isModule{false};
195   };
196   struct Macro {
197     ProvenanceRange definition;
198     std::string expansion;
199   };
200   struct CompilerInsertion {
201     std::string text;
202   };
203 
204   struct Origin {
205     Origin(ProvenanceRange, const SourceFile &);
206     Origin(ProvenanceRange, const SourceFile &, ProvenanceRange,
207         bool isModule = false);
208     Origin(ProvenanceRange, ProvenanceRange def, ProvenanceRange use,
209         const std::string &expansion);
210     Origin(ProvenanceRange, const std::string &);
211 
212     const char &operator[](std::size_t) const;
213 
214     std::variant<Inclusion, Macro, CompilerInsertion> u;
215     ProvenanceRange covers, replaces;
216   };
217 
218   const Origin &MapToOrigin(Provenance) const;
219 
220   // Elements are in ascending & contiguous order of .covers.
221   std::vector<Origin> origin_;
222   ProvenanceRange range_;
223   std::map<char, Provenance> compilerInsertionProvenance_;
224   std::vector<std::unique_ptr<SourceFile>> ownedSourceFiles_;
225   std::list<std::string> searchPath_;
226   Encoding encoding_{Encoding::UTF_8};
227   bool showColors_{false};
228 };
229 
230 // Represents the result of preprocessing and prescanning a single source
231 // file (and all its inclusions) or module file.  Parsers operate within
232 // single instances of CookedSource.
233 class CookedSource {
234 public:
235   explicit CookedSource(AllSources &allSources) : allSources_{allSources} {};
236 
237   int number() const { return number_; }
238   void set_number(int n) { number_ = n; }
239 
240   CharBlock AsCharBlock() const { return CharBlock{data_}; }
241   std::optional<ProvenanceRange> GetProvenanceRange(CharBlock) const;
242   std::optional<CharBlock> GetCharBlock(ProvenanceRange) const;
243 
244   // The result of a Put() is the offset that the new data
245   // will have in the eventually marshaled contiguous buffer.
246   std::size_t Put(const char *data, std::size_t bytes) {
247     return buffer_.Put(data, bytes);
248   }
249   std::size_t Put(const std::string &s) { return buffer_.Put(s); }
250   std::size_t Put(char ch) { return buffer_.Put(&ch, 1); }
251   std::size_t Put(char ch, Provenance p) {
252     provenanceMap_.Put(ProvenanceRange{p, 1});
253     return buffer_.Put(&ch, 1);
254   }
255 
256   void PutProvenance(Provenance p) { provenanceMap_.Put(ProvenanceRange{p}); }
257   void PutProvenance(ProvenanceRange pr) { provenanceMap_.Put(pr); }
258   void PutProvenanceMappings(const OffsetToProvenanceMappings &pm) {
259     provenanceMap_.Put(pm);
260   }
261 
262   void MarkPossibleFixedFormContinuation() {
263     possibleFixedFormContinuations_.push_back(BufferedBytes());
264   }
265 
266   std::size_t BufferedBytes() const;
267   void Marshal(AllCookedSources &); // marshals text into one contiguous block
268   void CompileProvenanceRangeToOffsetMappings(AllSources &);
269   llvm::raw_ostream &Dump(llvm::raw_ostream &) const;
270 
271 private:
272   AllSources &allSources_;
273   int number_{0}; // for sorting purposes
274   CharBuffer buffer_; // before Marshal()
275   std::string data_; // all of it, prescanned and preprocessed
276   OffsetToProvenanceMappings provenanceMap_;
277   ProvenanceRangeToOffsetMappings invertedMap_;
278   std::list<std::size_t> possibleFixedFormContinuations_;
279 };
280 
281 class AllCookedSources {
282 public:
283   explicit AllCookedSources(AllSources &);
284   ~AllCookedSources();
285 
286   AllSources &allSources() { return allSources_; }
287   const AllSources &allSources() const { return allSources_; }
288 
289   CookedSource &NewCookedSource();
290 
291   const CookedSource *Find(CharBlock) const;
292   const CookedSource *Find(const char *p) const { return Find(CharBlock{p}); }
293 
294   bool IsValid(ProvenanceRange r) const { return allSources_.IsValid(r); }
295 
296   std::optional<ProvenanceRange> GetProvenanceRange(CharBlock) const;
297   std::optional<CharBlock> GetCharBlockFromLineAndColumns(
298       int line, int startColumn, int endColumn) const;
299   std::optional<std::pair<SourcePosition, SourcePosition>>
300       GetSourcePositionRange(CharBlock) const;
301   std::optional<CharBlock> GetCharBlock(ProvenanceRange) const;
302   void Dump(llvm::raw_ostream &) const;
303 
304   // For sorting symbol names without being dependent on pointer values
305   bool Precedes(CharBlock, CharBlock) const;
306 
307   // Once a CookedSource is complete, add it to index_ and assign its number_
308   void Register(CookedSource &);
309 
310 private:
311   AllSources &allSources_;
312   std::list<CookedSource> cooked_; // owns all CookedSource instances
313   std::map<CharBlock, const CookedSource &, CharBlockPointerComparator> index_;
314 };
315 
316 } // namespace Fortran::parser
317 #endif // FORTRAN_PARSER_PROVENANCE_H_
318