xref: /llvm-project/clang-tools-extra/clangd/Headers.h (revision 1f90797f6a9d91d61e0f66b465b0467e4c66d0e0)
1 //===--- Headers.h - Include headers -----------------------------*- C++-*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_HEADERS_H
10 #define LLVM_CLANG_TOOLS_EXTRA_CLANGD_HEADERS_H
11 
12 #include "Protocol.h"
13 #include "SourceCode.h"
14 #include "index/Symbol.h"
15 #include "support/Path.h"
16 #include "clang/Basic/FileEntry.h"
17 #include "clang/Basic/TokenKinds.h"
18 #include "clang/Format/Format.h"
19 #include "clang/Frontend/CompilerInstance.h"
20 #include "clang/Lex/HeaderSearch.h"
21 #include "clang/Lex/Preprocessor.h"
22 #include "clang/Tooling/Inclusions/HeaderIncludes.h"
23 #include "clang/Tooling/Inclusions/StandardLibrary.h"
24 #include "llvm/ADT/ArrayRef.h"
25 #include "llvm/ADT/DenseSet.h"
26 #include "llvm/ADT/StringRef.h"
27 #include "llvm/ADT/StringSet.h"
28 #include "llvm/Support/Error.h"
29 #include "llvm/Support/FileSystem/UniqueID.h"
30 #include <optional>
31 #include <string>
32 
33 namespace clang {
34 namespace clangd {
35 
36 using HeaderFilter = llvm::ArrayRef<std::function<bool(llvm::StringRef)>>;
37 
38 /// Returns true if \p Include is literal include like "path" or <path>.
39 bool isLiteralInclude(llvm::StringRef Include);
40 
41 /// Represents a header file to be #include'd.
42 struct HeaderFile {
43   std::string File;
44   /// If this is true, `File` is a literal string quoted with <> or "" that
45   /// can be #included directly; otherwise, `File` is an absolute file path.
46   bool Verbatim;
47 
48   bool valid() const;
49 };
50 
51 /// A header and directives as stored in a Symbol.
52 struct SymbolInclude {
53   /// The header to include. This is either a URI or a verbatim include which is
54   /// quoted with <> or "".
55   llvm::StringRef Header;
56   /// The include directive(s) that can be used, e.g. #import and/or #include.
57   Symbol::IncludeDirective Directive;
58 };
59 
60 /// Creates a `HeaderFile` from \p Header which can be either a URI or a literal
61 /// include.
62 llvm::Expected<HeaderFile> toHeaderFile(llvm::StringRef Header,
63                                         llvm::StringRef HintPath);
64 
65 // Returns include headers for \p Sym sorted by popularity. If two headers are
66 // equally popular, prefer the shorter one.
67 llvm::SmallVector<SymbolInclude, 1> getRankedIncludes(const Symbol &Sym);
68 
69 // An #include directive that we found in the main file.
70 struct Inclusion {
71   tok::PPKeywordKind Directive; // Directive used for inclusion, e.g. import
72   std::string Written;          // Inclusion name as written e.g. <vector>.
73   Path Resolved; // Resolved path of included file. Empty if not resolved.
74   unsigned HashOffset = 0; // Byte offset from start of file to #.
75   int HashLine = 0;        // Line number containing the directive, 0-indexed.
76   SrcMgr::CharacteristicKind FileKind = SrcMgr::C_User;
77   std::optional<unsigned> HeaderID;
78 };
79 llvm::raw_ostream &operator<<(llvm::raw_ostream &, const Inclusion &);
80 bool operator==(const Inclusion &LHS, const Inclusion &RHS);
81 
82 // Contains information about one file in the build graph and its direct
83 // dependencies. Doesn't own the strings it references (IncludeGraph is
84 // self-contained).
85 struct IncludeGraphNode {
86   enum class SourceFlag : uint8_t {
87     None = 0,
88     // Whether current file is a main file rather than a header.
89     IsTU = 1 << 0,
90     // Whether current file had any uncompilable errors during indexing.
91     HadErrors = 1 << 1,
92   };
93 
94   SourceFlag Flags = SourceFlag::None;
95   llvm::StringRef URI;
96   FileDigest Digest{{0}};
97   std::vector<llvm::StringRef> DirectIncludes;
98 };
99 // FileURI and FileInclusions are references to keys of the map containing
100 // them.
101 // Important: The graph generated by those callbacks might contain cycles, self
102 // edges and multi edges.
103 using IncludeGraph = llvm::StringMap<IncludeGraphNode>;
104 
105 inline IncludeGraphNode::SourceFlag operator|(IncludeGraphNode::SourceFlag A,
106                                               IncludeGraphNode::SourceFlag B) {
107   return static_cast<IncludeGraphNode::SourceFlag>(static_cast<uint8_t>(A) |
108                                                    static_cast<uint8_t>(B));
109 }
110 
111 inline bool operator&(IncludeGraphNode::SourceFlag A,
112                       IncludeGraphNode::SourceFlag B) {
113   return static_cast<uint8_t>(A) & static_cast<uint8_t>(B);
114 }
115 
116 inline IncludeGraphNode::SourceFlag &
117 operator|=(IncludeGraphNode::SourceFlag &A, IncludeGraphNode::SourceFlag B) {
118   return A = A | B;
119 }
120 
121 // Information captured about the inclusion graph in a translation unit.
122 // This includes detailed information about the direct #includes, and summary
123 // information about all transitive includes.
124 //
125 // It should be built incrementally with collectIncludeStructureCallback().
126 // When we build the preamble, we capture and store its include structure along
127 // with the preamble data. When we use the preamble, we can copy its
128 // IncludeStructure and use another collectIncludeStructureCallback() to fill
129 // in any non-preamble inclusions.
130 class IncludeStructure {
131 public:
132   IncludeStructure() {
133     // Reserve HeaderID = 0 for the main file.
134     RealPathNames.emplace_back();
135   }
136 
137   // Inserts a PPCallback and CommentHandler that visits all includes in the
138   // main file and populates the structure. It will also scan for IWYU pragmas
139   // in comments.
140   void collect(const CompilerInstance &CI);
141 
142   // HeaderID identifies file in the include graph. It corresponds to a
143   // FileEntry rather than a FileID, but stays stable across preamble & main
144   // file builds.
145   enum class HeaderID : unsigned {};
146 
147   std::optional<HeaderID> getID(const FileEntry *Entry) const;
148   HeaderID getOrCreateID(FileEntryRef Entry);
149 
150   StringRef getRealPath(HeaderID ID) const {
151     assert(static_cast<unsigned>(ID) <= RealPathNames.size());
152     return RealPathNames[static_cast<unsigned>(ID)];
153   }
154 
155   // Return all transitively reachable files.
156   llvm::ArrayRef<std::string> allHeaders() const { return RealPathNames; }
157 
158   // Returns includes inside the main file with the given spelling.
159   // Spelling should include brackets or quotes, e.g. <foo>.
160   llvm::SmallVector<const Inclusion *>
161   mainFileIncludesWithSpelling(llvm::StringRef Spelling) const;
162 
163   // Return all transitively reachable files, and their minimum include depth.
164   // All transitive includes (absolute paths), with their minimum include depth.
165   // Root --> 0, #included file --> 1, etc.
166   // Root is the ID of the header being visited first.
167   llvm::DenseMap<HeaderID, unsigned>
168   includeDepth(HeaderID Root = MainFileID) const;
169 
170   // Maps HeaderID to the ids of the files included from it.
171   llvm::DenseMap<HeaderID, SmallVector<HeaderID>> IncludeChildren;
172 
173   llvm::DenseMap<tooling::stdlib::Header, llvm::SmallVector<HeaderID>>
174       StdlibHeaders;
175 
176   std::vector<Inclusion> MainFileIncludes;
177 
178   // The entries of the header search path. (HeaderSearch::search_dir_range())
179   // Only includes the plain-directory entries (not header maps or frameworks).
180   // All paths are canonical (FileManager::getCanonicalPath()).
181   std::vector<std::string> SearchPathsCanonical;
182 
183   // We reserve HeaderID(0) for the main file and will manually check for that
184   // in getID and getOrCreateID because the UniqueID is not stable when the
185   // content of the main file changes.
186   static const HeaderID MainFileID = HeaderID(0u);
187 
188   class RecordHeaders;
189 
190 private:
191   // MainFileEntry will be used to check if the queried file is the main file
192   // or not.
193   const FileEntry *MainFileEntry = nullptr;
194 
195   std::vector<std::string> RealPathNames; // In HeaderID order.
196   // FileEntry::UniqueID is mapped to the internal representation (HeaderID).
197   // Identifying files in a way that persists from preamble build to subsequent
198   // builds is surprisingly hard. FileID is unavailable in InclusionDirective(),
199   // and RealPathName and UniqueID are not preserved in
200   // the preamble.
201   llvm::DenseMap<llvm::sys::fs::UniqueID, HeaderID> UIDToIndex;
202 
203   // Maps written includes to indices in MainFileInclude for easier lookup by
204   // spelling.
205   llvm::StringMap<llvm::SmallVector<unsigned>> MainFileIncludesBySpelling;
206 };
207 
208 // Calculates insertion edit for including a new header in a file.
209 class IncludeInserter {
210 public:
211   // If \p HeaderSearchInfo is nullptr (e.g. when compile command is
212   // infeasible), this will only try to insert verbatim headers, and
213   // include path of non-verbatim header will not be shortened.
214   IncludeInserter(StringRef FileName, StringRef Code,
215                   const format::FormatStyle &Style, StringRef BuildDir,
216                   HeaderSearch *HeaderSearchInfo, HeaderFilter QuotedHeaders,
217                   HeaderFilter AngledHeaders)
218       : FileName(FileName), Code(Code), BuildDir(BuildDir),
219         HeaderSearchInfo(HeaderSearchInfo),
220         Inserter(FileName, Code, Style.IncludeStyle),
221         QuotedHeaders(QuotedHeaders), AngledHeaders(AngledHeaders) {}
222 
223   void addExisting(const Inclusion &Inc);
224 
225   /// Checks whether to add an #include of the header into \p File.
226   /// An #include will not be added if:
227   ///   - Either \p DeclaringHeader or \p InsertedHeader is already (directly)
228   ///   in \p Inclusions (including those included via different paths).
229   ///   - \p DeclaringHeader or \p InsertedHeader is the same as \p File.
230   ///
231   /// \param DeclaringHeader is path of the original header corresponding to \p
232   /// InsertedHeader e.g. the header that declares a symbol.
233   /// \param InsertedHeader The preferred header to be inserted. This could be
234   /// the same as DeclaringHeader but must be provided.
235   bool shouldInsertInclude(PathRef DeclaringHeader,
236                            const HeaderFile &InsertedHeader) const;
237 
238   /// Determines the preferred way to #include a file, taking into account the
239   /// search path. Usually this will prefer a shorter representation like
240   /// 'Foo/Bar.h' over a longer one like 'Baz/include/Foo/Bar.h'.
241   ///
242   /// \param InsertedHeader The preferred header to be inserted.
243   ///
244   /// \param IncludingFile is the absolute path of the file that InsertedHeader
245   /// will be inserted.
246   ///
247   /// \return A quoted "path" or <path> to be included, or std::nullopt if it
248   /// couldn't be shortened.
249   std::optional<std::string>
250   calculateIncludePath(const HeaderFile &InsertedHeader,
251                        llvm::StringRef IncludingFile) const;
252 
253   /// Calculates an edit that inserts \p VerbatimHeader into code. If the header
254   /// is already included, this returns std::nullopt.
255   std::optional<TextEdit> insert(llvm::StringRef VerbatimHeader,
256                                  tooling::IncludeDirective Directive) const;
257 
258 private:
259   StringRef FileName;
260   StringRef Code;
261   StringRef BuildDir;
262   HeaderSearch *HeaderSearchInfo = nullptr;
263   llvm::StringSet<> IncludedHeaders; // Both written and resolved.
264   tooling::HeaderIncludes Inserter;  // Computers insertion replacement.
265   HeaderFilter QuotedHeaders;
266   HeaderFilter AngledHeaders;
267 };
268 
269 } // namespace clangd
270 } // namespace clang
271 
272 namespace llvm {
273 
274 // Support HeaderIDs as DenseMap keys.
275 template <> struct DenseMapInfo<clang::clangd::IncludeStructure::HeaderID> {
276   static inline clang::clangd::IncludeStructure::HeaderID getEmptyKey() {
277     return static_cast<clang::clangd::IncludeStructure::HeaderID>(-1);
278   }
279 
280   static inline clang::clangd::IncludeStructure::HeaderID getTombstoneKey() {
281     return static_cast<clang::clangd::IncludeStructure::HeaderID>(-2);
282   }
283 
284   static unsigned
285   getHashValue(const clang::clangd::IncludeStructure::HeaderID &Tag) {
286     return hash_value(static_cast<unsigned>(Tag));
287   }
288 
289   static bool isEqual(const clang::clangd::IncludeStructure::HeaderID &LHS,
290                       const clang::clangd::IncludeStructure::HeaderID &RHS) {
291     return LHS == RHS;
292   }
293 };
294 
295 } // namespace llvm
296 
297 #endif // LLVM_CLANG_TOOLS_EXTRA_CLANGD_HEADERS_H
298