xref: /llvm-project/clang-tools-extra/clangd/index/SymbolCollector.h (revision 61fe67a4017375fd675f75652e857e837f77fa51)
1 //===--- SymbolCollector.h ---------------------------------------*- C++-*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 #ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_SYMBOLCOLLECTOR_H
9 #define LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_SYMBOLCOLLECTOR_H
10 
11 #include "CollectMacros.h"
12 #include "clang-include-cleaner/Record.h"
13 #include "clang-include-cleaner/Types.h"
14 #include "index/Ref.h"
15 #include "index/Relation.h"
16 #include "index/Symbol.h"
17 #include "index/SymbolID.h"
18 #include "index/SymbolLocation.h"
19 #include "index/SymbolOrigin.h"
20 #include "clang/AST/ASTContext.h"
21 #include "clang/AST/Decl.h"
22 #include "clang/Basic/LLVM.h"
23 #include "clang/Basic/SourceLocation.h"
24 #include "clang/Basic/SourceManager.h"
25 #include "clang/Index/IndexDataConsumer.h"
26 #include "clang/Index/IndexSymbol.h"
27 #include "clang/Sema/CodeCompleteConsumer.h"
28 #include "llvm/ADT/DenseMap.h"
29 #include "llvm/ADT/DenseSet.h"
30 #include "llvm/ADT/SmallVector.h"
31 #include "llvm/ADT/StringRef.h"
32 #include <functional>
33 #include <memory>
34 #include <optional>
35 #include <string>
36 #include <utility>
37 
38 namespace clang {
39 namespace clangd {
40 
41 /// Collect declarations (symbols) from an AST.
42 /// It collects most declarations except:
43 /// - Implicit declarations
44 /// - Anonymous declarations (anonymous enum/class/struct, etc)
45 /// - Declarations in anonymous namespaces in headers
46 /// - Local declarations (in function bodies, blocks, etc)
47 /// - Template specializations
48 /// - Library-specific private declarations (e.g. private declaration generated
49 /// by protobuf compiler)
50 ///
51 /// References to main-file symbols are not collected.
52 ///
53 /// See also shouldCollectSymbol(...).
54 ///
55 /// Clients (e.g. clangd) can use SymbolCollector together with
56 /// index::indexTopLevelDecls to retrieve all symbols when the source file is
57 /// changed.
58 class SymbolCollector : public index::IndexDataConsumer {
59 public:
60   struct Options {
61     /// When symbol paths cannot be resolved to absolute paths (e.g. files in
62     /// VFS that does not have absolute path), combine the fallback directory
63     /// with symbols' paths to get absolute paths. This must be an absolute
64     /// path.
65     std::string FallbackDir;
66     bool CollectIncludePath = false;
67     /// If set, this is used to map symbol #include path to a potentially
68     /// different #include path specified by IWYU pragmas.
69     const include_cleaner::PragmaIncludes *PragmaIncludes = nullptr;
70     // Populate the Symbol.References field.
71     bool CountReferences = false;
72     /// The symbol ref kinds that will be collected.
73     /// If not set, SymbolCollector will not collect refs.
74     /// Note that references of namespace decls are not collected, as they
75     /// contribute large part of the index, and they are less useful compared
76     /// with other decls.
77     RefKind RefFilter = RefKind::Unknown;
78     /// If set to true, SymbolCollector will collect all refs (from main file
79     /// and included headers); otherwise, only refs from main file will be
80     /// collected.
81     /// This flag is only meaningful when RefFilter is set.
82     bool RefsInHeaders = false;
83     // Every symbol collected will be stamped with this origin.
84     SymbolOrigin Origin = SymbolOrigin::Unknown;
85     /// Collect macros.
86     /// Note that SymbolCollector must be run with preprocessor in order to
87     /// collect macros. For example, `indexTopLevelDecls` will not index any
88     /// macro even if this is true.
89     bool CollectMacro = false;
90     /// Collect symbols local to main-files, such as static functions, symbols
91     /// inside an anonymous namespace, function-local classes and its member
92     /// functions.
93     bool CollectMainFileSymbols = true;
94     /// Collect references to main-file symbols.
95     bool CollectMainFileRefs = false;
96     /// Collect symbols with reserved names, like __Vector_base.
97     /// This does not currently affect macros (many like _WIN32 are important!)
98     /// This only affects system headers.
99     bool CollectReserved = false;
100     /// If set to true, SymbolCollector will collect doc for all symbols.
101     /// Note that documents of symbols being indexed for completion will always
102     /// be collected regardless of this option.
103     bool StoreAllDocumentation = false;
104     /// If this is set, only collect symbols/references from a file if
105     /// `FileFilter(SM, FID)` is true. If not set, all files are indexed.
106     std::function<bool(const SourceManager &, FileID)> FileFilter = nullptr;
107   };
108 
109   SymbolCollector(Options Opts);
110   ~SymbolCollector();
111 
112   /// Returns true is \p ND should be collected.
113   static bool shouldCollectSymbol(const NamedDecl &ND, const ASTContext &ASTCtx,
114                                   const Options &Opts, bool IsMainFileSymbol);
115 
116   // Given a ref contained in enclosing decl `Enclosing`, return
117   // the decl that should be used as that ref's Ref::Container. This is
118   // usually `Enclosing` itself, but in cases where `Enclosing` is not
119   // indexed, we walk further up because Ref::Container should always be
120   // an indexed symbol.
121   // Note: we don't use DeclContext as the container as in some cases
122   // it's useful to use a Decl which is not a DeclContext. For example,
123   // for a ref occurring in the initializer of a namespace-scope variable,
124   // it's useful to use that variable as the container, as otherwise the
125   // next enclosing DeclContext would be a NamespaceDecl or TranslationUnitDecl,
126   // which are both not indexed and less granular than we'd like for use cases
127   // like call hierarchy.
128   static const Decl *getRefContainer(const Decl *Enclosing,
129                                      const SymbolCollector::Options &Opts);
130 
131   void initialize(ASTContext &Ctx) override;
132 
133   void setPreprocessor(std::shared_ptr<Preprocessor> PP) override {
134     this->PP = PP.get();
135   }
136   void setPreprocessor(Preprocessor &PP) { this->PP = &PP; }
137 
138   bool
139   handleDeclOccurrence(const Decl *D, index::SymbolRoleSet Roles,
140                        ArrayRef<index::SymbolRelation> Relations,
141                        SourceLocation Loc,
142                        index::IndexDataConsumer::ASTNodeInfo ASTNode) override;
143 
144   bool handleMacroOccurrence(const IdentifierInfo *Name, const MacroInfo *MI,
145                              index::SymbolRoleSet Roles,
146                              SourceLocation Loc) override;
147 
148   void handleMacros(const MainFileMacros &MacroRefsToIndex);
149 
150   SymbolSlab takeSymbols() { return std::move(Symbols).build(); }
151   RefSlab takeRefs() { return std::move(Refs).build(); }
152   RelationSlab takeRelations() { return std::move(Relations).build(); }
153 
154   /// Returns true if we are interested in references and declarations from \p
155   /// FID. If this function return false, bodies of functions inside those files
156   /// will be skipped to decrease indexing time.
157   bool shouldIndexFile(FileID FID);
158 
159   void finish() override;
160 
161 private:
162   const Symbol *addDeclaration(const NamedDecl &, SymbolID,
163                                bool IsMainFileSymbol);
164   void addDefinition(const NamedDecl &, const Symbol &DeclSymbol,
165                      bool SkipDocCheck);
166   void processRelations(const NamedDecl &ND, const SymbolID &ID,
167                         ArrayRef<index::SymbolRelation> Relations);
168 
169   std::optional<SymbolLocation> getTokenLocation(SourceLocation TokLoc);
170 
171   std::optional<std::string> getIncludeHeader(const Symbol &S, FileID);
172 
173   SymbolID getSymbolIDCached(const Decl *D);
174   SymbolID getSymbolIDCached(const llvm::StringRef MacroName,
175                              const MacroInfo *MI, const SourceManager &SM);
176 
177   // All Symbols collected from the AST.
178   SymbolSlab::Builder Symbols;
179   // File IDs used to determine if the code contains Obj-C constructs.
180   // For Obj-C symbols, these File IDs are used to compute the include
181   // headers.
182   llvm::DenseMap<SymbolID, FileID> IncludeFiles;
183   void setIncludeLocation(const Symbol &S, SourceLocation,
184                           const include_cleaner::Symbol &Sym);
185 
186   // Providers for Symbol.IncludeHeaders.
187   // The final spelling is calculated in finish().
188   llvm::DenseMap<SymbolID, llvm::SmallVector<include_cleaner::Header>>
189       SymbolProviders;
190   // Files which contain ObjC symbols.
191   // This is finalized and used in finish().
192   llvm::DenseSet<FileID> FilesWithObjCConstructs;
193 
194   // Indexed macros, to be erased if they turned out to be include guards.
195   llvm::DenseSet<const IdentifierInfo *> IndexedMacros;
196   // All refs collected from the AST. It includes:
197   //   1) symbols declared in the preamble and referenced from the main file (
198   //     which is not a header), or
199   //   2) symbols declared and referenced from the main file (which is a header)
200   RefSlab::Builder Refs;
201   // All relations collected from the AST.
202   RelationSlab::Builder Relations;
203   ASTContext *ASTCtx;
204   Preprocessor *PP = nullptr;
205   std::shared_ptr<GlobalCodeCompletionAllocator> CompletionAllocator;
206   std::unique_ptr<CodeCompletionTUInfo> CompletionTUInfo;
207   Options Opts;
208   struct SymbolRef {
209     SourceLocation Loc;
210     FileID FID;
211     index::SymbolRoleSet Roles;
212     index::SymbolKind Kind;
213     const Decl *Container;
214     bool Spelled;
215   };
216   void addRef(SymbolID ID, const SymbolRef &SR);
217   // Symbols referenced from the current TU, flushed on finish().
218   llvm::DenseSet<SymbolID> ReferencedSymbols;
219   // Maps canonical declaration provided by clang to canonical declaration for
220   // an index symbol, if clangd prefers a different declaration than that
221   // provided by clang. For example, friend declaration might be considered
222   // canonical by clang but should not be considered canonical in the index
223   // unless it's a definition.
224   llvm::DenseMap<const Decl *, const Decl *> CanonicalDecls;
225   // Cache whether to index a file or not.
226   llvm::DenseMap<FileID, bool> FilesToIndexCache;
227   // Encapsulates calculations and caches around header paths, which headers
228   // to insert for which symbol, etc.
229   class HeaderFileURICache;
230   std::unique_ptr<HeaderFileURICache> HeaderFileURIs;
231   llvm::DenseMap<const Decl *, SymbolID> DeclToIDCache;
232   llvm::DenseMap<const MacroInfo *, SymbolID> MacroToIDCache;
233 };
234 
235 } // namespace clangd
236 } // namespace clang
237 
238 #endif
239