1 //===--- Headers.h - Include headers -----------------------------*- C++-*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_HEADERS_H 10 #define LLVM_CLANG_TOOLS_EXTRA_CLANGD_HEADERS_H 11 12 #include "Protocol.h" 13 #include "SourceCode.h" 14 #include "index/Symbol.h" 15 #include "support/Path.h" 16 #include "clang/Basic/FileEntry.h" 17 #include "clang/Basic/TokenKinds.h" 18 #include "clang/Format/Format.h" 19 #include "clang/Frontend/CompilerInstance.h" 20 #include "clang/Lex/HeaderSearch.h" 21 #include "clang/Lex/Preprocessor.h" 22 #include "clang/Tooling/Inclusions/HeaderIncludes.h" 23 #include "clang/Tooling/Inclusions/StandardLibrary.h" 24 #include "llvm/ADT/ArrayRef.h" 25 #include "llvm/ADT/DenseSet.h" 26 #include "llvm/ADT/StringRef.h" 27 #include "llvm/ADT/StringSet.h" 28 #include "llvm/Support/Error.h" 29 #include "llvm/Support/FileSystem/UniqueID.h" 30 #include <optional> 31 #include <string> 32 33 namespace clang { 34 namespace clangd { 35 36 using HeaderFilter = llvm::ArrayRef<std::function<bool(llvm::StringRef)>>; 37 38 /// Returns true if \p Include is literal include like "path" or <path>. 39 bool isLiteralInclude(llvm::StringRef Include); 40 41 /// Represents a header file to be #include'd. 42 struct HeaderFile { 43 std::string File; 44 /// If this is true, `File` is a literal string quoted with <> or "" that 45 /// can be #included directly; otherwise, `File` is an absolute file path. 46 bool Verbatim; 47 48 bool valid() const; 49 }; 50 51 /// A header and directives as stored in a Symbol. 52 struct SymbolInclude { 53 /// The header to include. This is either a URI or a verbatim include which is 54 /// quoted with <> or "". 55 llvm::StringRef Header; 56 /// The include directive(s) that can be used, e.g. #import and/or #include. 57 Symbol::IncludeDirective Directive; 58 }; 59 60 /// Creates a `HeaderFile` from \p Header which can be either a URI or a literal 61 /// include. 62 llvm::Expected<HeaderFile> toHeaderFile(llvm::StringRef Header, 63 llvm::StringRef HintPath); 64 65 // Returns include headers for \p Sym sorted by popularity. If two headers are 66 // equally popular, prefer the shorter one. 67 llvm::SmallVector<SymbolInclude, 1> getRankedIncludes(const Symbol &Sym); 68 69 // An #include directive that we found in the main file. 70 struct Inclusion { 71 tok::PPKeywordKind Directive; // Directive used for inclusion, e.g. import 72 std::string Written; // Inclusion name as written e.g. <vector>. 73 Path Resolved; // Resolved path of included file. Empty if not resolved. 74 unsigned HashOffset = 0; // Byte offset from start of file to #. 75 int HashLine = 0; // Line number containing the directive, 0-indexed. 76 SrcMgr::CharacteristicKind FileKind = SrcMgr::C_User; 77 std::optional<unsigned> HeaderID; 78 }; 79 llvm::raw_ostream &operator<<(llvm::raw_ostream &, const Inclusion &); 80 bool operator==(const Inclusion &LHS, const Inclusion &RHS); 81 82 // Contains information about one file in the build graph and its direct 83 // dependencies. Doesn't own the strings it references (IncludeGraph is 84 // self-contained). 85 struct IncludeGraphNode { 86 enum class SourceFlag : uint8_t { 87 None = 0, 88 // Whether current file is a main file rather than a header. 89 IsTU = 1 << 0, 90 // Whether current file had any uncompilable errors during indexing. 91 HadErrors = 1 << 1, 92 }; 93 94 SourceFlag Flags = SourceFlag::None; 95 llvm::StringRef URI; 96 FileDigest Digest{{0}}; 97 std::vector<llvm::StringRef> DirectIncludes; 98 }; 99 // FileURI and FileInclusions are references to keys of the map containing 100 // them. 101 // Important: The graph generated by those callbacks might contain cycles, self 102 // edges and multi edges. 103 using IncludeGraph = llvm::StringMap<IncludeGraphNode>; 104 105 inline IncludeGraphNode::SourceFlag operator|(IncludeGraphNode::SourceFlag A, 106 IncludeGraphNode::SourceFlag B) { 107 return static_cast<IncludeGraphNode::SourceFlag>(static_cast<uint8_t>(A) | 108 static_cast<uint8_t>(B)); 109 } 110 111 inline bool operator&(IncludeGraphNode::SourceFlag A, 112 IncludeGraphNode::SourceFlag B) { 113 return static_cast<uint8_t>(A) & static_cast<uint8_t>(B); 114 } 115 116 inline IncludeGraphNode::SourceFlag & 117 operator|=(IncludeGraphNode::SourceFlag &A, IncludeGraphNode::SourceFlag B) { 118 return A = A | B; 119 } 120 121 // Information captured about the inclusion graph in a translation unit. 122 // This includes detailed information about the direct #includes, and summary 123 // information about all transitive includes. 124 // 125 // It should be built incrementally with collectIncludeStructureCallback(). 126 // When we build the preamble, we capture and store its include structure along 127 // with the preamble data. When we use the preamble, we can copy its 128 // IncludeStructure and use another collectIncludeStructureCallback() to fill 129 // in any non-preamble inclusions. 130 class IncludeStructure { 131 public: 132 IncludeStructure() { 133 // Reserve HeaderID = 0 for the main file. 134 RealPathNames.emplace_back(); 135 } 136 137 // Inserts a PPCallback and CommentHandler that visits all includes in the 138 // main file and populates the structure. It will also scan for IWYU pragmas 139 // in comments. 140 void collect(const CompilerInstance &CI); 141 142 // HeaderID identifies file in the include graph. It corresponds to a 143 // FileEntry rather than a FileID, but stays stable across preamble & main 144 // file builds. 145 enum class HeaderID : unsigned {}; 146 147 std::optional<HeaderID> getID(const FileEntry *Entry) const; 148 HeaderID getOrCreateID(FileEntryRef Entry); 149 150 StringRef getRealPath(HeaderID ID) const { 151 assert(static_cast<unsigned>(ID) <= RealPathNames.size()); 152 return RealPathNames[static_cast<unsigned>(ID)]; 153 } 154 155 // Return all transitively reachable files. 156 llvm::ArrayRef<std::string> allHeaders() const { return RealPathNames; } 157 158 // Returns includes inside the main file with the given spelling. 159 // Spelling should include brackets or quotes, e.g. <foo>. 160 llvm::SmallVector<const Inclusion *> 161 mainFileIncludesWithSpelling(llvm::StringRef Spelling) const; 162 163 // Return all transitively reachable files, and their minimum include depth. 164 // All transitive includes (absolute paths), with their minimum include depth. 165 // Root --> 0, #included file --> 1, etc. 166 // Root is the ID of the header being visited first. 167 llvm::DenseMap<HeaderID, unsigned> 168 includeDepth(HeaderID Root = MainFileID) const; 169 170 // Maps HeaderID to the ids of the files included from it. 171 llvm::DenseMap<HeaderID, SmallVector<HeaderID>> IncludeChildren; 172 173 llvm::DenseMap<tooling::stdlib::Header, llvm::SmallVector<HeaderID>> 174 StdlibHeaders; 175 176 std::vector<Inclusion> MainFileIncludes; 177 178 // The entries of the header search path. (HeaderSearch::search_dir_range()) 179 // Only includes the plain-directory entries (not header maps or frameworks). 180 // All paths are canonical (FileManager::getCanonicalPath()). 181 std::vector<std::string> SearchPathsCanonical; 182 183 // We reserve HeaderID(0) for the main file and will manually check for that 184 // in getID and getOrCreateID because the UniqueID is not stable when the 185 // content of the main file changes. 186 static const HeaderID MainFileID = HeaderID(0u); 187 188 class RecordHeaders; 189 190 private: 191 // MainFileEntry will be used to check if the queried file is the main file 192 // or not. 193 const FileEntry *MainFileEntry = nullptr; 194 195 std::vector<std::string> RealPathNames; // In HeaderID order. 196 // FileEntry::UniqueID is mapped to the internal representation (HeaderID). 197 // Identifying files in a way that persists from preamble build to subsequent 198 // builds is surprisingly hard. FileID is unavailable in InclusionDirective(), 199 // and RealPathName and UniqueID are not preserved in 200 // the preamble. 201 llvm::DenseMap<llvm::sys::fs::UniqueID, HeaderID> UIDToIndex; 202 203 // Maps written includes to indices in MainFileInclude for easier lookup by 204 // spelling. 205 llvm::StringMap<llvm::SmallVector<unsigned>> MainFileIncludesBySpelling; 206 }; 207 208 // Calculates insertion edit for including a new header in a file. 209 class IncludeInserter { 210 public: 211 // If \p HeaderSearchInfo is nullptr (e.g. when compile command is 212 // infeasible), this will only try to insert verbatim headers, and 213 // include path of non-verbatim header will not be shortened. 214 IncludeInserter(StringRef FileName, StringRef Code, 215 const format::FormatStyle &Style, StringRef BuildDir, 216 HeaderSearch *HeaderSearchInfo, HeaderFilter QuotedHeaders, 217 HeaderFilter AngledHeaders) 218 : FileName(FileName), Code(Code), BuildDir(BuildDir), 219 HeaderSearchInfo(HeaderSearchInfo), 220 Inserter(FileName, Code, Style.IncludeStyle), 221 QuotedHeaders(QuotedHeaders), AngledHeaders(AngledHeaders) {} 222 223 void addExisting(const Inclusion &Inc); 224 225 /// Checks whether to add an #include of the header into \p File. 226 /// An #include will not be added if: 227 /// - Either \p DeclaringHeader or \p InsertedHeader is already (directly) 228 /// in \p Inclusions (including those included via different paths). 229 /// - \p DeclaringHeader or \p InsertedHeader is the same as \p File. 230 /// 231 /// \param DeclaringHeader is path of the original header corresponding to \p 232 /// InsertedHeader e.g. the header that declares a symbol. 233 /// \param InsertedHeader The preferred header to be inserted. This could be 234 /// the same as DeclaringHeader but must be provided. 235 bool shouldInsertInclude(PathRef DeclaringHeader, 236 const HeaderFile &InsertedHeader) const; 237 238 /// Determines the preferred way to #include a file, taking into account the 239 /// search path. Usually this will prefer a shorter representation like 240 /// 'Foo/Bar.h' over a longer one like 'Baz/include/Foo/Bar.h'. 241 /// 242 /// \param InsertedHeader The preferred header to be inserted. 243 /// 244 /// \param IncludingFile is the absolute path of the file that InsertedHeader 245 /// will be inserted. 246 /// 247 /// \return A quoted "path" or <path> to be included, or std::nullopt if it 248 /// couldn't be shortened. 249 std::optional<std::string> 250 calculateIncludePath(const HeaderFile &InsertedHeader, 251 llvm::StringRef IncludingFile) const; 252 253 /// Calculates an edit that inserts \p VerbatimHeader into code. If the header 254 /// is already included, this returns std::nullopt. 255 std::optional<TextEdit> insert(llvm::StringRef VerbatimHeader, 256 tooling::IncludeDirective Directive) const; 257 258 private: 259 StringRef FileName; 260 StringRef Code; 261 StringRef BuildDir; 262 HeaderSearch *HeaderSearchInfo = nullptr; 263 llvm::StringSet<> IncludedHeaders; // Both written and resolved. 264 tooling::HeaderIncludes Inserter; // Computers insertion replacement. 265 HeaderFilter QuotedHeaders; 266 HeaderFilter AngledHeaders; 267 }; 268 269 } // namespace clangd 270 } // namespace clang 271 272 namespace llvm { 273 274 // Support HeaderIDs as DenseMap keys. 275 template <> struct DenseMapInfo<clang::clangd::IncludeStructure::HeaderID> { 276 static inline clang::clangd::IncludeStructure::HeaderID getEmptyKey() { 277 return static_cast<clang::clangd::IncludeStructure::HeaderID>(-1); 278 } 279 280 static inline clang::clangd::IncludeStructure::HeaderID getTombstoneKey() { 281 return static_cast<clang::clangd::IncludeStructure::HeaderID>(-2); 282 } 283 284 static unsigned 285 getHashValue(const clang::clangd::IncludeStructure::HeaderID &Tag) { 286 return hash_value(static_cast<unsigned>(Tag)); 287 } 288 289 static bool isEqual(const clang::clangd::IncludeStructure::HeaderID &LHS, 290 const clang::clangd::IncludeStructure::HeaderID &RHS) { 291 return LHS == RHS; 292 } 293 }; 294 295 } // namespace llvm 296 297 #endif // LLVM_CLANG_TOOLS_EXTRA_CLANGD_HEADERS_H 298