1 //===--- SymbolCollector.h ---------------------------------------*- C++-*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 #ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_SYMBOLCOLLECTOR_H 9 #define LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_SYMBOLCOLLECTOR_H 10 11 #include "CollectMacros.h" 12 #include "clang-include-cleaner/Record.h" 13 #include "clang-include-cleaner/Types.h" 14 #include "index/Ref.h" 15 #include "index/Relation.h" 16 #include "index/Symbol.h" 17 #include "index/SymbolID.h" 18 #include "index/SymbolLocation.h" 19 #include "index/SymbolOrigin.h" 20 #include "clang/AST/ASTContext.h" 21 #include "clang/AST/Decl.h" 22 #include "clang/Basic/LLVM.h" 23 #include "clang/Basic/SourceLocation.h" 24 #include "clang/Basic/SourceManager.h" 25 #include "clang/Index/IndexDataConsumer.h" 26 #include "clang/Index/IndexSymbol.h" 27 #include "clang/Sema/CodeCompleteConsumer.h" 28 #include "llvm/ADT/DenseMap.h" 29 #include "llvm/ADT/DenseSet.h" 30 #include "llvm/ADT/SmallVector.h" 31 #include "llvm/ADT/StringRef.h" 32 #include <functional> 33 #include <memory> 34 #include <optional> 35 #include <string> 36 #include <utility> 37 38 namespace clang { 39 namespace clangd { 40 41 /// Collect declarations (symbols) from an AST. 42 /// It collects most declarations except: 43 /// - Implicit declarations 44 /// - Anonymous declarations (anonymous enum/class/struct, etc) 45 /// - Declarations in anonymous namespaces in headers 46 /// - Local declarations (in function bodies, blocks, etc) 47 /// - Template specializations 48 /// - Library-specific private declarations (e.g. private declaration generated 49 /// by protobuf compiler) 50 /// 51 /// References to main-file symbols are not collected. 52 /// 53 /// See also shouldCollectSymbol(...). 54 /// 55 /// Clients (e.g. clangd) can use SymbolCollector together with 56 /// index::indexTopLevelDecls to retrieve all symbols when the source file is 57 /// changed. 58 class SymbolCollector : public index::IndexDataConsumer { 59 public: 60 struct Options { 61 /// When symbol paths cannot be resolved to absolute paths (e.g. files in 62 /// VFS that does not have absolute path), combine the fallback directory 63 /// with symbols' paths to get absolute paths. This must be an absolute 64 /// path. 65 std::string FallbackDir; 66 bool CollectIncludePath = false; 67 /// If set, this is used to map symbol #include path to a potentially 68 /// different #include path specified by IWYU pragmas. 69 const include_cleaner::PragmaIncludes *PragmaIncludes = nullptr; 70 // Populate the Symbol.References field. 71 bool CountReferences = false; 72 /// The symbol ref kinds that will be collected. 73 /// If not set, SymbolCollector will not collect refs. 74 /// Note that references of namespace decls are not collected, as they 75 /// contribute large part of the index, and they are less useful compared 76 /// with other decls. 77 RefKind RefFilter = RefKind::Unknown; 78 /// If set to true, SymbolCollector will collect all refs (from main file 79 /// and included headers); otherwise, only refs from main file will be 80 /// collected. 81 /// This flag is only meaningful when RefFilter is set. 82 bool RefsInHeaders = false; 83 // Every symbol collected will be stamped with this origin. 84 SymbolOrigin Origin = SymbolOrigin::Unknown; 85 /// Collect macros. 86 /// Note that SymbolCollector must be run with preprocessor in order to 87 /// collect macros. For example, `indexTopLevelDecls` will not index any 88 /// macro even if this is true. 89 bool CollectMacro = false; 90 /// Collect symbols local to main-files, such as static functions, symbols 91 /// inside an anonymous namespace, function-local classes and its member 92 /// functions. 93 bool CollectMainFileSymbols = true; 94 /// Collect references to main-file symbols. 95 bool CollectMainFileRefs = false; 96 /// Collect symbols with reserved names, like __Vector_base. 97 /// This does not currently affect macros (many like _WIN32 are important!) 98 /// This only affects system headers. 99 bool CollectReserved = false; 100 /// If set to true, SymbolCollector will collect doc for all symbols. 101 /// Note that documents of symbols being indexed for completion will always 102 /// be collected regardless of this option. 103 bool StoreAllDocumentation = false; 104 /// If this is set, only collect symbols/references from a file if 105 /// `FileFilter(SM, FID)` is true. If not set, all files are indexed. 106 std::function<bool(const SourceManager &, FileID)> FileFilter = nullptr; 107 }; 108 109 SymbolCollector(Options Opts); 110 ~SymbolCollector(); 111 112 /// Returns true is \p ND should be collected. 113 static bool shouldCollectSymbol(const NamedDecl &ND, const ASTContext &ASTCtx, 114 const Options &Opts, bool IsMainFileSymbol); 115 116 // Given a ref contained in enclosing decl `Enclosing`, return 117 // the decl that should be used as that ref's Ref::Container. This is 118 // usually `Enclosing` itself, but in cases where `Enclosing` is not 119 // indexed, we walk further up because Ref::Container should always be 120 // an indexed symbol. 121 // Note: we don't use DeclContext as the container as in some cases 122 // it's useful to use a Decl which is not a DeclContext. For example, 123 // for a ref occurring in the initializer of a namespace-scope variable, 124 // it's useful to use that variable as the container, as otherwise the 125 // next enclosing DeclContext would be a NamespaceDecl or TranslationUnitDecl, 126 // which are both not indexed and less granular than we'd like for use cases 127 // like call hierarchy. 128 static const Decl *getRefContainer(const Decl *Enclosing, 129 const SymbolCollector::Options &Opts); 130 131 void initialize(ASTContext &Ctx) override; 132 133 void setPreprocessor(std::shared_ptr<Preprocessor> PP) override { 134 this->PP = PP.get(); 135 } 136 void setPreprocessor(Preprocessor &PP) { this->PP = &PP; } 137 138 bool 139 handleDeclOccurrence(const Decl *D, index::SymbolRoleSet Roles, 140 ArrayRef<index::SymbolRelation> Relations, 141 SourceLocation Loc, 142 index::IndexDataConsumer::ASTNodeInfo ASTNode) override; 143 144 bool handleMacroOccurrence(const IdentifierInfo *Name, const MacroInfo *MI, 145 index::SymbolRoleSet Roles, 146 SourceLocation Loc) override; 147 148 void handleMacros(const MainFileMacros &MacroRefsToIndex); 149 150 SymbolSlab takeSymbols() { return std::move(Symbols).build(); } 151 RefSlab takeRefs() { return std::move(Refs).build(); } 152 RelationSlab takeRelations() { return std::move(Relations).build(); } 153 154 /// Returns true if we are interested in references and declarations from \p 155 /// FID. If this function return false, bodies of functions inside those files 156 /// will be skipped to decrease indexing time. 157 bool shouldIndexFile(FileID FID); 158 159 void finish() override; 160 161 private: 162 const Symbol *addDeclaration(const NamedDecl &, SymbolID, 163 bool IsMainFileSymbol); 164 void addDefinition(const NamedDecl &, const Symbol &DeclSymbol, 165 bool SkipDocCheck); 166 void processRelations(const NamedDecl &ND, const SymbolID &ID, 167 ArrayRef<index::SymbolRelation> Relations); 168 169 std::optional<SymbolLocation> getTokenLocation(SourceLocation TokLoc); 170 171 std::optional<std::string> getIncludeHeader(const Symbol &S, FileID); 172 173 SymbolID getSymbolIDCached(const Decl *D); 174 SymbolID getSymbolIDCached(const llvm::StringRef MacroName, 175 const MacroInfo *MI, const SourceManager &SM); 176 177 // All Symbols collected from the AST. 178 SymbolSlab::Builder Symbols; 179 // File IDs used to determine if the code contains Obj-C constructs. 180 // For Obj-C symbols, these File IDs are used to compute the include 181 // headers. 182 llvm::DenseMap<SymbolID, FileID> IncludeFiles; 183 void setIncludeLocation(const Symbol &S, SourceLocation, 184 const include_cleaner::Symbol &Sym); 185 186 // Providers for Symbol.IncludeHeaders. 187 // The final spelling is calculated in finish(). 188 llvm::DenseMap<SymbolID, llvm::SmallVector<include_cleaner::Header>> 189 SymbolProviders; 190 // Files which contain ObjC symbols. 191 // This is finalized and used in finish(). 192 llvm::DenseSet<FileID> FilesWithObjCConstructs; 193 194 // Indexed macros, to be erased if they turned out to be include guards. 195 llvm::DenseSet<const IdentifierInfo *> IndexedMacros; 196 // All refs collected from the AST. It includes: 197 // 1) symbols declared in the preamble and referenced from the main file ( 198 // which is not a header), or 199 // 2) symbols declared and referenced from the main file (which is a header) 200 RefSlab::Builder Refs; 201 // All relations collected from the AST. 202 RelationSlab::Builder Relations; 203 ASTContext *ASTCtx; 204 Preprocessor *PP = nullptr; 205 std::shared_ptr<GlobalCodeCompletionAllocator> CompletionAllocator; 206 std::unique_ptr<CodeCompletionTUInfo> CompletionTUInfo; 207 Options Opts; 208 struct SymbolRef { 209 SourceLocation Loc; 210 FileID FID; 211 index::SymbolRoleSet Roles; 212 index::SymbolKind Kind; 213 const Decl *Container; 214 bool Spelled; 215 }; 216 void addRef(SymbolID ID, const SymbolRef &SR); 217 // Symbols referenced from the current TU, flushed on finish(). 218 llvm::DenseSet<SymbolID> ReferencedSymbols; 219 // Maps canonical declaration provided by clang to canonical declaration for 220 // an index symbol, if clangd prefers a different declaration than that 221 // provided by clang. For example, friend declaration might be considered 222 // canonical by clang but should not be considered canonical in the index 223 // unless it's a definition. 224 llvm::DenseMap<const Decl *, const Decl *> CanonicalDecls; 225 // Cache whether to index a file or not. 226 llvm::DenseMap<FileID, bool> FilesToIndexCache; 227 // Encapsulates calculations and caches around header paths, which headers 228 // to insert for which symbol, etc. 229 class HeaderFileURICache; 230 std::unique_ptr<HeaderFileURICache> HeaderFileURIs; 231 llvm::DenseMap<const Decl *, SymbolID> DeclToIDCache; 232 llvm::DenseMap<const MacroInfo *, SymbolID> MacroToIDCache; 233 }; 234 235 } // namespace clangd 236 } // namespace clang 237 238 #endif 239