1 //===-- include/flang/Parser/provenance.h -----------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #ifndef FORTRAN_PARSER_PROVENANCE_H_ 10 #define FORTRAN_PARSER_PROVENANCE_H_ 11 12 #include "char-block.h" 13 #include "char-buffer.h" 14 #include "characters.h" 15 #include "source.h" 16 #include "flang/Common/idioms.h" 17 #include "flang/Common/interval.h" 18 #include "llvm/Support/raw_ostream.h" 19 #include <cstddef> 20 #include <list> 21 #include <map> 22 #include <memory> 23 #include <optional> 24 #include <string> 25 #include <utility> 26 #include <variant> 27 #include <vector> 28 29 namespace Fortran::parser { 30 31 // Each character in the contiguous source stream built by the 32 // prescanner corresponds to a particular character in a source file, 33 // include file, macro expansion, or compiler-inserted text. 34 // The location of this original character to which a parsable character 35 // corresponds is its provenance. 36 // 37 // Provenances are offsets into an (unmaterialized) marshaling of the 38 // entire contents of all the original source files, include files, macro 39 // expansions, &c. for each visit to each source. These origins of the 40 // original source characters constitute a forest whose roots are 41 // the original source files named on the compiler's command line. 42 // Given a Provenance, we can find the tree node that contains it in time 43 // O(log(# of origins)), and describe the position precisely by walking 44 // up the tree. (It would be possible, via a time/space trade-off, to 45 // cap the time by the use of an intermediate table that would be indexed 46 // by the upper bits of an offset, but that does not appear to be 47 // necessary.) 48 49 class AllSources; 50 class AllCookedSources; 51 52 class Provenance { 53 public: 54 Provenance() {} 55 Provenance(std::size_t offset) : offset_{offset} { CHECK(offset > 0); } 56 Provenance(const Provenance &that) = default; 57 Provenance(Provenance &&that) = default; 58 Provenance &operator=(const Provenance &that) = default; 59 Provenance &operator=(Provenance &&that) = default; 60 61 std::size_t offset() const { return offset_; } 62 63 Provenance operator+(ptrdiff_t n) const { 64 CHECK(n > -static_cast<ptrdiff_t>(offset_)); 65 return {offset_ + static_cast<std::size_t>(n)}; 66 } 67 Provenance operator+(std::size_t n) const { return {offset_ + n}; } 68 std::size_t operator-(Provenance that) const { 69 CHECK(that <= *this); 70 return offset_ - that.offset_; 71 } 72 bool operator<(Provenance that) const { return offset_ < that.offset_; } 73 bool operator<=(Provenance that) const { return !(that < *this); } 74 bool operator==(Provenance that) const { return offset_ == that.offset_; } 75 bool operator!=(Provenance that) const { return !(*this == that); } 76 77 private: 78 std::size_t offset_{0}; 79 }; 80 81 using ProvenanceRange = common::Interval<Provenance>; 82 83 // Maps contiguous ranges of byte offsets in original source files to 84 // contiguous ranges in the cooked character stream; essentially a 85 // partial inversion of OffsetToProvenanceMappings (below). 86 // Used for implementing the first step of mapping an identifier 87 // selected in a code editor to one of its declarative statements. 88 class ProvenanceRangeToOffsetMappings { 89 public: 90 ProvenanceRangeToOffsetMappings(); 91 ~ProvenanceRangeToOffsetMappings(); 92 bool empty() const { return map_.empty(); } 93 void Put(ProvenanceRange, std::size_t offset); 94 std::optional<std::size_t> Map(ProvenanceRange) const; 95 llvm::raw_ostream &Dump(llvm::raw_ostream &) const; 96 97 private: 98 // A comparison function object for use in std::multimap<Compare=>. 99 // Intersecting intervals will effectively compare equal, not being 100 // either < nor >= each other. 101 struct WhollyPrecedes { 102 bool operator()(ProvenanceRange, ProvenanceRange) const; 103 }; 104 105 std::multimap<ProvenanceRange, std::size_t, WhollyPrecedes> map_; 106 }; 107 108 // Maps 0-based local offsets in some contiguous range (e.g., a token 109 // sequence) to their provenances. Lookup time is on the order of 110 // O(log(#of intervals with contiguous provenances)). As mentioned 111 // above, this time could be capped via a time/space trade-off. 112 class OffsetToProvenanceMappings { 113 public: 114 OffsetToProvenanceMappings() {} 115 void clear(); 116 void swap(OffsetToProvenanceMappings &); 117 void shrink_to_fit(); 118 std::size_t SizeInBytes() const; 119 void Put(ProvenanceRange); 120 void Put(const OffsetToProvenanceMappings &); 121 ProvenanceRange Map(std::size_t at) const; 122 void RemoveLastBytes(std::size_t); 123 ProvenanceRangeToOffsetMappings Invert(const AllSources &) const; 124 llvm::raw_ostream &Dump(llvm::raw_ostream &) const; 125 126 private: 127 struct ContiguousProvenanceMapping { 128 std::size_t start; 129 ProvenanceRange range; 130 }; 131 132 // Elements appear in ascending order of distinct .start values; 133 // their .range values are disjoint and not necessarily adjacent. 134 std::vector<ContiguousProvenanceMapping> provenanceMap_; 135 }; 136 137 // A singleton AllSources instance for the whole compilation 138 // is shared by reference. 139 class AllSources { 140 public: 141 AllSources(); 142 ~AllSources(); 143 144 std::size_t size() const { return range_.size(); } 145 const char &operator[](Provenance) const; 146 Encoding encoding() const { return encoding_; } 147 AllSources &set_encoding(Encoding e) { 148 encoding_ = e; 149 return *this; 150 } 151 152 void ClearSearchPath(); 153 void AppendSearchPathDirectory(std::string); // new last directory 154 const SourceFile *OpenPath(std::string path, llvm::raw_ostream &error); 155 const SourceFile *Open(std::string path, llvm::raw_ostream &error, 156 std::optional<std::string> &&prependPath = std::nullopt); 157 const SourceFile *ReadStandardInput(llvm::raw_ostream &error); 158 159 ProvenanceRange AddIncludedFile( 160 const SourceFile &, ProvenanceRange, bool isModule = false); 161 ProvenanceRange AddMacroCall( 162 ProvenanceRange def, ProvenanceRange use, const std::string &expansion); 163 ProvenanceRange AddCompilerInsertion(std::string); 164 165 // If provenance is in an expanded macro, return the starting provenance of 166 // the replaced macro. Otherwise, return the input provenance. 167 Provenance GetReplacedProvenance(Provenance) const; 168 169 bool IsValid(Provenance at) const { return range_.Contains(at); } 170 bool IsValid(ProvenanceRange range) const { 171 return range.size() > 0 && range_.Contains(range); 172 } 173 void setShowColors(bool showColors) { showColors_ = showColors; } 174 bool getShowColors() const { return showColors_; } 175 std::optional<ProvenanceRange> GetInclusionInfo( 176 const std::optional<ProvenanceRange> &) const; 177 void EmitMessage(llvm::raw_ostream &, const std::optional<ProvenanceRange> &, 178 const std::string &message, const std::string &prefix, 179 llvm::raw_ostream::Colors color, bool echoSourceLine = false) const; 180 const SourceFile *GetSourceFile( 181 Provenance, std::size_t *offset = nullptr, bool topLevel = false) const; 182 const char *GetSource(ProvenanceRange) const; 183 std::optional<SourcePosition> GetSourcePosition(Provenance) const; 184 std::optional<ProvenanceRange> GetFirstFileProvenance() const; 185 std::string GetPath(Provenance, bool topLevel = false) const; // __FILE__ 186 int GetLineNumber(Provenance) const; // __LINE__ 187 Provenance CompilerInsertionProvenance(char ch); 188 ProvenanceRange IntersectionWithSourceFiles(ProvenanceRange) const; 189 llvm::raw_ostream &Dump(llvm::raw_ostream &) const; 190 191 private: 192 struct Inclusion { 193 const SourceFile &source; 194 bool isModule{false}; 195 }; 196 struct Macro { 197 ProvenanceRange definition; 198 std::string expansion; 199 }; 200 struct CompilerInsertion { 201 std::string text; 202 }; 203 204 struct Origin { 205 Origin(ProvenanceRange, const SourceFile &); 206 Origin(ProvenanceRange, const SourceFile &, ProvenanceRange, 207 bool isModule = false); 208 Origin(ProvenanceRange, ProvenanceRange def, ProvenanceRange use, 209 const std::string &expansion); 210 Origin(ProvenanceRange, const std::string &); 211 212 const char &operator[](std::size_t) const; 213 214 std::variant<Inclusion, Macro, CompilerInsertion> u; 215 ProvenanceRange covers, replaces; 216 }; 217 218 const Origin &MapToOrigin(Provenance) const; 219 220 // Elements are in ascending & contiguous order of .covers. 221 std::vector<Origin> origin_; 222 ProvenanceRange range_; 223 std::map<char, Provenance> compilerInsertionProvenance_; 224 std::vector<std::unique_ptr<SourceFile>> ownedSourceFiles_; 225 std::list<std::string> searchPath_; 226 Encoding encoding_{Encoding::UTF_8}; 227 bool showColors_{false}; 228 }; 229 230 // Represents the result of preprocessing and prescanning a single source 231 // file (and all its inclusions) or module file. Parsers operate within 232 // single instances of CookedSource. 233 class CookedSource { 234 public: 235 explicit CookedSource(AllSources &allSources) : allSources_{allSources} {}; 236 237 int number() const { return number_; } 238 void set_number(int n) { number_ = n; } 239 240 CharBlock AsCharBlock() const { return CharBlock{data_}; } 241 std::optional<ProvenanceRange> GetProvenanceRange(CharBlock) const; 242 std::optional<CharBlock> GetCharBlock(ProvenanceRange) const; 243 244 // The result of a Put() is the offset that the new data 245 // will have in the eventually marshaled contiguous buffer. 246 std::size_t Put(const char *data, std::size_t bytes) { 247 return buffer_.Put(data, bytes); 248 } 249 std::size_t Put(const std::string &s) { return buffer_.Put(s); } 250 std::size_t Put(char ch) { return buffer_.Put(&ch, 1); } 251 std::size_t Put(char ch, Provenance p) { 252 provenanceMap_.Put(ProvenanceRange{p, 1}); 253 return buffer_.Put(&ch, 1); 254 } 255 256 void PutProvenance(Provenance p) { provenanceMap_.Put(ProvenanceRange{p}); } 257 void PutProvenance(ProvenanceRange pr) { provenanceMap_.Put(pr); } 258 void PutProvenanceMappings(const OffsetToProvenanceMappings &pm) { 259 provenanceMap_.Put(pm); 260 } 261 262 void MarkPossibleFixedFormContinuation() { 263 possibleFixedFormContinuations_.push_back(BufferedBytes()); 264 } 265 266 std::size_t BufferedBytes() const; 267 void Marshal(AllCookedSources &); // marshals text into one contiguous block 268 void CompileProvenanceRangeToOffsetMappings(AllSources &); 269 llvm::raw_ostream &Dump(llvm::raw_ostream &) const; 270 271 private: 272 AllSources &allSources_; 273 int number_{0}; // for sorting purposes 274 CharBuffer buffer_; // before Marshal() 275 std::string data_; // all of it, prescanned and preprocessed 276 OffsetToProvenanceMappings provenanceMap_; 277 ProvenanceRangeToOffsetMappings invertedMap_; 278 std::list<std::size_t> possibleFixedFormContinuations_; 279 }; 280 281 class AllCookedSources { 282 public: 283 explicit AllCookedSources(AllSources &); 284 ~AllCookedSources(); 285 286 AllSources &allSources() { return allSources_; } 287 const AllSources &allSources() const { return allSources_; } 288 289 CookedSource &NewCookedSource(); 290 291 const CookedSource *Find(CharBlock) const; 292 const CookedSource *Find(const char *p) const { return Find(CharBlock{p}); } 293 294 bool IsValid(ProvenanceRange r) const { return allSources_.IsValid(r); } 295 296 std::optional<ProvenanceRange> GetProvenanceRange(CharBlock) const; 297 std::optional<CharBlock> GetCharBlockFromLineAndColumns( 298 int line, int startColumn, int endColumn) const; 299 std::optional<std::pair<SourcePosition, SourcePosition>> 300 GetSourcePositionRange(CharBlock) const; 301 std::optional<CharBlock> GetCharBlock(ProvenanceRange) const; 302 void Dump(llvm::raw_ostream &) const; 303 304 // For sorting symbol names without being dependent on pointer values 305 bool Precedes(CharBlock, CharBlock) const; 306 307 // Once a CookedSource is complete, add it to index_ and assign its number_ 308 void Register(CookedSource &); 309 310 private: 311 AllSources &allSources_; 312 std::list<CookedSource> cooked_; // owns all CookedSource instances 313 std::map<CharBlock, const CookedSource &, CharBlockPointerComparator> index_; 314 }; 315 316 } // namespace Fortran::parser 317 #endif // FORTRAN_PARSER_PROVENANCE_H_ 318