xref: /llvm-project/clang-tools-extra/clangd/index/SymbolCollector.cpp (revision ec6c3448d31056db5d63d7aed3e9f207edb49321)
1 //===--- SymbolCollector.cpp -------------------------------------*- C++-*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "SymbolCollector.h"
10 #include "AST.h"
11 #include "CodeComplete.h"
12 #include "CodeCompletionStrings.h"
13 #include "ExpectedTypes.h"
14 #include "SourceCode.h"
15 #include "URI.h"
16 #include "clang-include-cleaner/Analysis.h"
17 #include "clang-include-cleaner/IncludeSpeller.h"
18 #include "clang-include-cleaner/Record.h"
19 #include "clang-include-cleaner/Types.h"
20 #include "index/CanonicalIncludes.h"
21 #include "index/Ref.h"
22 #include "index/Relation.h"
23 #include "index/Symbol.h"
24 #include "index/SymbolID.h"
25 #include "index/SymbolLocation.h"
26 #include "clang/AST/Decl.h"
27 #include "clang/AST/DeclBase.h"
28 #include "clang/AST/DeclObjC.h"
29 #include "clang/AST/DeclTemplate.h"
30 #include "clang/AST/DeclarationName.h"
31 #include "clang/AST/Expr.h"
32 #include "clang/Basic/FileEntry.h"
33 #include "clang/Basic/LangOptions.h"
34 #include "clang/Basic/SourceLocation.h"
35 #include "clang/Basic/SourceManager.h"
36 #include "clang/Index/IndexSymbol.h"
37 #include "clang/Lex/Preprocessor.h"
38 #include "clang/Lex/Token.h"
39 #include "clang/Tooling/Inclusions/HeaderAnalysis.h"
40 #include "clang/Tooling/Inclusions/StandardLibrary.h"
41 #include "llvm/ADT/ArrayRef.h"
42 #include "llvm/ADT/DenseMap.h"
43 #include "llvm/ADT/SmallVector.h"
44 #include "llvm/ADT/StringRef.h"
45 #include "llvm/Support/Casting.h"
46 #include "llvm/Support/ErrorHandling.h"
47 #include "llvm/Support/FileSystem.h"
48 #include "llvm/Support/Path.h"
49 #include <cassert>
50 #include <memory>
51 #include <optional>
52 #include <string>
53 #include <utility>
54 
55 namespace clang {
56 namespace clangd {
57 namespace {
58 
59 /// If \p ND is a template specialization, returns the described template.
60 /// Otherwise, returns \p ND.
61 const NamedDecl &getTemplateOrThis(const NamedDecl &ND) {
62   if (auto *T = ND.getDescribedTemplate())
63     return *T;
64   return ND;
65 }
66 
67 // Checks whether the decl is a private symbol in a header generated by
68 // protobuf compiler.
69 // FIXME: make filtering extensible when there are more use cases for symbol
70 // filters.
71 bool isPrivateProtoDecl(const NamedDecl &ND) {
72   const auto &SM = ND.getASTContext().getSourceManager();
73   if (!isProtoFile(nameLocation(ND, SM), SM))
74     return false;
75 
76   // ND without identifier can be operators.
77   if (ND.getIdentifier() == nullptr)
78     return false;
79   auto Name = ND.getIdentifier()->getName();
80   // There are some internal helpers like _internal_set_foo();
81   if (Name.contains("_internal_"))
82     return true;
83 
84   // https://protobuf.dev/reference/cpp/cpp-generated/#nested-types
85   // Nested entities (messages/enums) has two names, one at the top-level scope,
86   // with a mangled name created by prepending all the outer types. These names
87   // are almost never preferred by the developers, so exclude them from index.
88   // e.g.
89   //   message Foo {
90   //    message Bar {}
91   //    enum E { A }
92   //   }
93   //
94   // yields:
95   //   class Foo_Bar {};
96   //   enum Foo_E { Foo_E_A };
97   //   class Foo {
98   //    using Bar = Foo_Bar;
99   //    static constexpr Foo_E A = Foo_E_A;
100   //   };
101 
102   // We get rid of Foo_Bar and Foo_E by discarding any top-level entries with
103   // `_` in the name. This relies on original message/enum not having `_` in the
104   // name. Hence might go wrong in certain cases.
105   if (ND.getDeclContext()->isNamespace()) {
106     // Strip off some known public suffix helpers for enums, rest of the helpers
107     // are generated inside record decls so we don't care.
108     // https://protobuf.dev/reference/cpp/cpp-generated/#enum
109     Name.consume_back("_descriptor");
110     Name.consume_back("_IsValid");
111     Name.consume_back("_Name");
112     Name.consume_back("_Parse");
113     Name.consume_back("_MIN");
114     Name.consume_back("_MAX");
115     Name.consume_back("_ARRAYSIZE");
116     return Name.contains('_');
117   }
118 
119   // EnumConstantDecls need some special attention, despite being nested in a
120   // TagDecl, they might still have mangled names. We filter those by checking
121   // if it has parent's name as a prefix.
122   // This might go wrong if a nested entity has a name that starts with parent's
123   // name, e.g: enum Foo { Foo_X }.
124   if (llvm::isa<EnumConstantDecl>(&ND)) {
125     auto *DC = llvm::cast<EnumDecl>(ND.getDeclContext());
126     if (!DC || !DC->getIdentifier())
127       return false;
128     auto CtxName = DC->getIdentifier()->getName();
129     return !CtxName.empty() && Name.consume_front(CtxName) &&
130            Name.consume_front("_");
131   }
132 
133   // Now we're only left with fields/methods without an `_internal_` in the
134   // name, they're intended for public use.
135   return false;
136 }
137 
138 // We only collect #include paths for symbols that are suitable for global code
139 // completion, except for namespaces since #include path for a namespace is hard
140 // to define.
141 Symbol::IncludeDirective shouldCollectIncludePath(index::SymbolKind Kind) {
142   using SK = index::SymbolKind;
143   switch (Kind) {
144   case SK::Macro:
145   case SK::Enum:
146   case SK::Struct:
147   case SK::Class:
148   case SK::Union:
149   case SK::TypeAlias:
150   case SK::Using:
151   case SK::Function:
152   case SK::Variable:
153   case SK::EnumConstant:
154   case SK::Concept:
155     return Symbol::Include | Symbol::Import;
156   case SK::Protocol:
157     return Symbol::Import;
158   default:
159     return Symbol::Invalid;
160   }
161 }
162 
163 // Return the symbol range of the token at \p TokLoc.
164 std::pair<SymbolLocation::Position, SymbolLocation::Position>
165 getTokenRange(SourceLocation TokLoc, const SourceManager &SM,
166               const LangOptions &LangOpts) {
167   auto CreatePosition = [&SM](SourceLocation Loc) {
168     auto LSPLoc = sourceLocToPosition(SM, Loc);
169     SymbolLocation::Position Pos;
170     Pos.setLine(LSPLoc.line);
171     Pos.setColumn(LSPLoc.character);
172     return Pos;
173   };
174 
175   auto TokenLength = clang::Lexer::MeasureTokenLength(TokLoc, SM, LangOpts);
176   return {CreatePosition(TokLoc),
177           CreatePosition(TokLoc.getLocWithOffset(TokenLength))};
178 }
179 
180 // Checks whether \p ND is a good candidate to be the *canonical* declaration of
181 // its symbol (e.g. a go-to-declaration target). This overrides the default of
182 // using Clang's canonical declaration, which is the first in the TU.
183 //
184 // Example: preferring a class declaration over its forward declaration.
185 bool isPreferredDeclaration(const NamedDecl &ND, index::SymbolRoleSet Roles) {
186   const auto &SM = ND.getASTContext().getSourceManager();
187   if (isa<TagDecl>(ND))
188     return (Roles & static_cast<unsigned>(index::SymbolRole::Definition)) &&
189            !isInsideMainFile(ND.getLocation(), SM);
190   if (const auto *ID = dyn_cast<ObjCInterfaceDecl>(&ND))
191     return ID->isThisDeclarationADefinition();
192   if (const auto *PD = dyn_cast<ObjCProtocolDecl>(&ND))
193     return PD->isThisDeclarationADefinition();
194   return false;
195 }
196 
197 RefKind toRefKind(index::SymbolRoleSet Roles, bool Spelled = false) {
198   RefKind Result = RefKind::Unknown;
199   if (Roles & static_cast<unsigned>(index::SymbolRole::Declaration))
200     Result |= RefKind::Declaration;
201   if (Roles & static_cast<unsigned>(index::SymbolRole::Definition))
202     Result |= RefKind::Definition;
203   if (Roles & static_cast<unsigned>(index::SymbolRole::Reference))
204     Result |= RefKind::Reference;
205   if (Spelled)
206     Result |= RefKind::Spelled;
207   return Result;
208 }
209 
210 std::optional<RelationKind> indexableRelation(const index::SymbolRelation &R) {
211   if (R.Roles & static_cast<unsigned>(index::SymbolRole::RelationBaseOf))
212     return RelationKind::BaseOf;
213   if (R.Roles & static_cast<unsigned>(index::SymbolRole::RelationOverrideOf))
214     return RelationKind::OverriddenBy;
215   return std::nullopt;
216 }
217 
218 // Check if there is an exact spelling of \p ND at \p Loc.
219 bool isSpelled(SourceLocation Loc, const NamedDecl &ND) {
220   auto Name = ND.getDeclName();
221   const auto NameKind = Name.getNameKind();
222   if (NameKind != DeclarationName::Identifier &&
223       NameKind != DeclarationName::CXXConstructorName &&
224       NameKind != DeclarationName::ObjCZeroArgSelector &&
225       NameKind != DeclarationName::ObjCOneArgSelector &&
226       NameKind != DeclarationName::ObjCMultiArgSelector)
227     return false;
228   const auto &AST = ND.getASTContext();
229   const auto &SM = AST.getSourceManager();
230   const auto &LO = AST.getLangOpts();
231   clang::Token Tok;
232   if (clang::Lexer::getRawToken(Loc, Tok, SM, LO))
233     return false;
234   auto TokSpelling = clang::Lexer::getSpelling(Tok, SM, LO);
235   if (const auto *MD = dyn_cast<ObjCMethodDecl>(&ND))
236     return TokSpelling == MD->getSelector().getNameForSlot(0);
237   return TokSpelling == Name.getAsString();
238 }
239 } // namespace
240 
241 // Encapsulates decisions about how to record header paths in the index,
242 // including filename normalization, URI conversion etc.
243 // Expensive checks are cached internally.
244 class SymbolCollector::HeaderFileURICache {
245   struct FrameworkUmbrellaSpelling {
246     // Spelling for the public umbrella header, e.g. <Foundation/Foundation.h>
247     std::optional<std::string> PublicHeader;
248     // Spelling for the private umbrella header, e.g.
249     // <Foundation/Foundation_Private.h>
250     std::optional<std::string> PrivateHeader;
251   };
252   // Weird double-indirect access to PP, which might not be ready yet when
253   // HeaderFiles is created but will be by the time it's used.
254   // (IndexDataConsumer::setPreprocessor can happen before or after initialize)
255   Preprocessor *&PP;
256   const SourceManager &SM;
257   const include_cleaner::PragmaIncludes *PI;
258   llvm::StringRef FallbackDir;
259   llvm::DenseMap<const FileEntry *, const std::string *> CacheFEToURI;
260   llvm::StringMap<std::string> CachePathToURI;
261   llvm::DenseMap<FileID, llvm::StringRef> CacheFIDToInclude;
262   llvm::StringMap<std::string> CachePathToFrameworkSpelling;
263   llvm::StringMap<FrameworkUmbrellaSpelling>
264       CacheFrameworkToUmbrellaHeaderSpelling;
265 
266 public:
267   HeaderFileURICache(Preprocessor *&PP, const SourceManager &SM,
268                      const SymbolCollector::Options &Opts)
269       : PP(PP), SM(SM), PI(Opts.PragmaIncludes), FallbackDir(Opts.FallbackDir) {
270   }
271 
272   // Returns a canonical URI for the file \p FE.
273   // We attempt to make the path absolute first.
274   const std::string &toURI(const FileEntryRef FE) {
275     auto R = CacheFEToURI.try_emplace(FE);
276     if (R.second) {
277       auto CanonPath = getCanonicalPath(FE, SM.getFileManager());
278       R.first->second = &toURIInternal(CanonPath ? *CanonPath : FE.getName());
279     }
280     return *R.first->second;
281   }
282 
283   // Returns a canonical URI for \p Path.
284   // If the file is in the FileManager, use that to canonicalize the path.
285   // We attempt to make the path absolute in any case.
286   const std::string &toURI(llvm::StringRef Path) {
287     if (auto File = SM.getFileManager().getFileRef(Path))
288       return toURI(*File);
289     return toURIInternal(Path);
290   }
291 
292   // Gets a canonical include (URI of the header or <header> or "header") for
293   // header of \p FID (which should usually be the *expansion* file).
294   // This does not account for any per-symbol overrides!
295   // Returns "" if includes should not be inserted for this file.
296   llvm::StringRef getIncludeHeader(FileID FID) {
297     auto R = CacheFIDToInclude.try_emplace(FID);
298     if (R.second)
299       R.first->second = getIncludeHeaderUncached(FID);
300     return R.first->second;
301   }
302 
303   // If a file is mapped by canonical headers, use that mapping, regardless
304   // of whether it's an otherwise-good header (header guards etc).
305   llvm::StringRef mapCanonical(llvm::StringRef HeaderPath) {
306     if (!PP)
307       return "";
308     // Populate the system header mapping as late as possible to
309     // ensure the preprocessor has been set already.
310     CanonicalIncludes SysHeaderMapping;
311     SysHeaderMapping.addSystemHeadersMapping(PP->getLangOpts());
312     auto Canonical = SysHeaderMapping.mapHeader(HeaderPath);
313     if (Canonical.empty())
314       return "";
315     // If we had a mapping, always use it.
316     assert(Canonical.starts_with("<") || Canonical.starts_with("\""));
317     return Canonical;
318   }
319 
320 private:
321   // This takes care of making paths absolute and path->URI caching, but no
322   // FileManager-based canonicalization.
323   const std::string &toURIInternal(llvm::StringRef Path) {
324     auto R = CachePathToURI.try_emplace(Path);
325     if (R.second) {
326       llvm::SmallString<256> AbsPath = Path;
327       if (!llvm::sys::path::is_absolute(AbsPath) && !FallbackDir.empty())
328         llvm::sys::fs::make_absolute(FallbackDir, AbsPath);
329       assert(llvm::sys::path::is_absolute(AbsPath) &&
330              "If the VFS can't make paths absolute, a FallbackDir must be "
331              "provided");
332       llvm::sys::path::remove_dots(AbsPath, /*remove_dot_dot=*/true);
333       R.first->second = URI::create(AbsPath).toString();
334     }
335     return R.first->second;
336   }
337 
338   struct FrameworkHeaderPath {
339     // Path to the frameworks directory containing the .framework directory.
340     llvm::StringRef FrameworkParentDir;
341     // Name of the framework.
342     llvm::StringRef FrameworkName;
343     // Subpath relative to the Headers or PrivateHeaders dir, e.g. NSObject.h
344     // Note: This is NOT relative to the `HeadersParentDir`.
345     llvm::StringRef HeaderSubpath;
346     // Whether this header is under the PrivateHeaders dir
347     bool IsPrivateHeader;
348   };
349 
350   std::optional<FrameworkHeaderPath>
351   splitFrameworkHeaderPath(llvm::StringRef Path) {
352     using namespace llvm::sys;
353     path::reverse_iterator I = path::rbegin(Path);
354     path::reverse_iterator Prev = I;
355     path::reverse_iterator E = path::rend(Path);
356     FrameworkHeaderPath HeaderPath;
357     while (I != E) {
358       if (*I == "Headers" || *I == "PrivateHeaders") {
359         HeaderPath.HeaderSubpath = Path.substr(Prev - E);
360         HeaderPath.IsPrivateHeader = *I == "PrivateHeaders";
361         if (++I == E)
362           break;
363         HeaderPath.FrameworkName = *I;
364         if (!HeaderPath.FrameworkName.consume_back(".framework"))
365           break;
366         HeaderPath.FrameworkParentDir = Path.substr(0, I - E);
367         return HeaderPath;
368       }
369       Prev = I;
370       ++I;
371     }
372     // Unexpected, must not be a framework header.
373     return std::nullopt;
374   }
375 
376   // Frameworks typically have an umbrella header of the same name, e.g.
377   // <Foundation/Foundation.h> instead of <Foundation/NSObject.h> or
378   // <Foundation/Foundation_Private.h> instead of
379   // <Foundation/NSObject_Private.h> which should be used instead of directly
380   // importing the header.
381   std::optional<std::string>
382   getFrameworkUmbrellaSpelling(const HeaderSearch &HS,
383                                FrameworkHeaderPath &HeaderPath) {
384     StringRef Framework = HeaderPath.FrameworkName;
385     auto Res = CacheFrameworkToUmbrellaHeaderSpelling.try_emplace(Framework);
386     auto *CachedSpelling = &Res.first->second;
387     if (!Res.second) {
388       return HeaderPath.IsPrivateHeader ? CachedSpelling->PrivateHeader
389                                         : CachedSpelling->PublicHeader;
390     }
391     SmallString<256> UmbrellaPath(HeaderPath.FrameworkParentDir);
392     llvm::sys::path::append(UmbrellaPath, Framework + ".framework", "Headers",
393                             Framework + ".h");
394 
395     llvm::vfs::Status Status;
396     auto StatErr = HS.getFileMgr().getNoncachedStatValue(UmbrellaPath, Status);
397     if (!StatErr)
398       CachedSpelling->PublicHeader = llvm::formatv("<{0}/{0}.h>", Framework);
399 
400     UmbrellaPath = HeaderPath.FrameworkParentDir;
401     llvm::sys::path::append(UmbrellaPath, Framework + ".framework",
402                             "PrivateHeaders", Framework + "_Private.h");
403 
404     StatErr = HS.getFileMgr().getNoncachedStatValue(UmbrellaPath, Status);
405     if (!StatErr)
406       CachedSpelling->PrivateHeader =
407           llvm::formatv("<{0}/{0}_Private.h>", Framework);
408 
409     return HeaderPath.IsPrivateHeader ? CachedSpelling->PrivateHeader
410                                       : CachedSpelling->PublicHeader;
411   }
412 
413   // Compute the framework include spelling for `FE` which is in a framework
414   // named `Framework`, e.g. `NSObject.h` in framework `Foundation` would
415   // give <Foundation/Foundation.h> if the umbrella header exists, otherwise
416   // <Foundation/NSObject.h>.
417   std::optional<llvm::StringRef>
418   getFrameworkHeaderIncludeSpelling(FileEntryRef FE, HeaderSearch &HS) {
419     auto Res = CachePathToFrameworkSpelling.try_emplace(FE.getName());
420     auto *CachedHeaderSpelling = &Res.first->second;
421     if (!Res.second)
422       return llvm::StringRef(*CachedHeaderSpelling);
423 
424     auto HeaderPath = splitFrameworkHeaderPath(FE.getName());
425     if (!HeaderPath) {
426       // Unexpected: must not be a proper framework header, don't cache the
427       // failure.
428       CachePathToFrameworkSpelling.erase(Res.first);
429       return std::nullopt;
430     }
431     if (auto UmbrellaSpelling =
432             getFrameworkUmbrellaSpelling(HS, *HeaderPath)) {
433       *CachedHeaderSpelling = *UmbrellaSpelling;
434       return llvm::StringRef(*CachedHeaderSpelling);
435     }
436 
437     *CachedHeaderSpelling =
438         llvm::formatv("<{0}/{1}>", HeaderPath->FrameworkName,
439                       HeaderPath->HeaderSubpath)
440             .str();
441     return llvm::StringRef(*CachedHeaderSpelling);
442   }
443 
444   llvm::StringRef getIncludeHeaderUncached(FileID FID) {
445     const auto FE = SM.getFileEntryRefForID(FID);
446     if (!FE || FE->getName().empty())
447       return "";
448 
449     if (auto Verbatim = PI->getPublic(*FE); !Verbatim.empty())
450       return Verbatim;
451 
452     llvm::StringRef Filename = FE->getName();
453     if (auto Canonical = mapCanonical(Filename); !Canonical.empty())
454       return Canonical;
455 
456     // Framework headers are spelled as <FrameworkName/Foo.h>, not
457     // "path/FrameworkName.framework/Headers/Foo.h".
458     auto &HS = PP->getHeaderSearchInfo();
459     if (auto Spelling = getFrameworkHeaderIncludeSpelling(*FE, HS))
460       return *Spelling;
461 
462     if (!tooling::isSelfContainedHeader(*FE, PP->getSourceManager(),
463                                         PP->getHeaderSearchInfo())) {
464       // A .inc or .def file is often included into a real header to define
465       // symbols (e.g. LLVM tablegen files).
466       if (Filename.ends_with(".inc") || Filename.ends_with(".def"))
467         // Don't use cache reentrantly due to iterator invalidation.
468         return getIncludeHeaderUncached(SM.getFileID(SM.getIncludeLoc(FID)));
469       // Conservatively refuse to insert #includes to files without guards.
470       return "";
471     }
472     // Standard case: just insert the file itself.
473     return toURI(*FE);
474   }
475 };
476 
477 // Return the symbol location of the token at \p TokLoc.
478 std::optional<SymbolLocation>
479 SymbolCollector::getTokenLocation(SourceLocation TokLoc) {
480   const auto &SM = ASTCtx->getSourceManager();
481   const auto FE = SM.getFileEntryRefForID(SM.getFileID(TokLoc));
482   if (!FE)
483     return std::nullopt;
484 
485   SymbolLocation Result;
486   Result.FileURI = HeaderFileURIs->toURI(*FE).c_str();
487   auto Range = getTokenRange(TokLoc, SM, ASTCtx->getLangOpts());
488   Result.Start = Range.first;
489   Result.End = Range.second;
490 
491   return Result;
492 }
493 
494 SymbolCollector::SymbolCollector(Options Opts) : Opts(std::move(Opts)) {}
495 SymbolCollector::~SymbolCollector() = default;
496 
497 void SymbolCollector::initialize(ASTContext &Ctx) {
498   ASTCtx = &Ctx;
499   HeaderFileURIs = std::make_unique<HeaderFileURICache>(
500       this->PP, ASTCtx->getSourceManager(), Opts);
501   CompletionAllocator = std::make_shared<GlobalCodeCompletionAllocator>();
502   CompletionTUInfo =
503       std::make_unique<CodeCompletionTUInfo>(CompletionAllocator);
504 }
505 
506 bool SymbolCollector::shouldCollectSymbol(const NamedDecl &ND,
507                                           const ASTContext &ASTCtx,
508                                           const Options &Opts,
509                                           bool IsMainFileOnly) {
510   // Skip anonymous declarations, e.g (anonymous enum/class/struct).
511   if (ND.getDeclName().isEmpty())
512     return false;
513 
514   // Skip main-file symbols if we are not collecting them.
515   if (IsMainFileOnly && !Opts.CollectMainFileSymbols)
516     return false;
517 
518   // Skip symbols in anonymous namespaces in header files.
519   if (!IsMainFileOnly && ND.isInAnonymousNamespace())
520     return false;
521 
522   // For function local symbols, index only classes and its member functions.
523   if (index::isFunctionLocalSymbol(&ND))
524     return isa<RecordDecl>(ND) ||
525            (ND.isCXXInstanceMember() && ND.isFunctionOrFunctionTemplate());
526 
527   // We want most things but not "local" symbols such as symbols inside
528   // FunctionDecl, BlockDecl, ObjCMethodDecl and OMPDeclareReductionDecl.
529   // FIXME: Need a matcher for ExportDecl in order to include symbols declared
530   // within an export.
531   const auto *DeclCtx = ND.getDeclContext();
532   switch (DeclCtx->getDeclKind()) {
533   case Decl::TranslationUnit:
534   case Decl::Namespace:
535   case Decl::LinkageSpec:
536   case Decl::Enum:
537   case Decl::ObjCProtocol:
538   case Decl::ObjCInterface:
539   case Decl::ObjCCategory:
540   case Decl::ObjCCategoryImpl:
541   case Decl::ObjCImplementation:
542     break;
543   default:
544     // Record has a few derivations (e.g. CXXRecord, Class specialization), it's
545     // easier to cast.
546     if (!isa<RecordDecl>(DeclCtx))
547       return false;
548   }
549 
550   // Avoid indexing internal symbols in protobuf generated headers.
551   if (isPrivateProtoDecl(ND))
552     return false;
553 
554   // System headers that end with `intrin.h` likely contain useful symbols.
555   if (!Opts.CollectReserved &&
556       (hasReservedName(ND) || hasReservedScope(*ND.getDeclContext())) &&
557       ASTCtx.getSourceManager().isInSystemHeader(ND.getLocation()) &&
558       !ASTCtx.getSourceManager()
559            .getFilename(ND.getLocation())
560            .ends_with("intrin.h"))
561     return false;
562 
563   return true;
564 }
565 
566 const Decl *
567 SymbolCollector::getRefContainer(const Decl *Enclosing,
568                                  const SymbolCollector::Options &Opts) {
569   while (Enclosing) {
570     const auto *ND = dyn_cast<NamedDecl>(Enclosing);
571     if (ND && shouldCollectSymbol(*ND, ND->getASTContext(), Opts, true)) {
572       break;
573     }
574     Enclosing = dyn_cast_or_null<Decl>(Enclosing->getDeclContext());
575   }
576   return Enclosing;
577 }
578 
579 // Always return true to continue indexing.
580 bool SymbolCollector::handleDeclOccurrence(
581     const Decl *D, index::SymbolRoleSet Roles,
582     llvm::ArrayRef<index::SymbolRelation> Relations, SourceLocation Loc,
583     index::IndexDataConsumer::ASTNodeInfo ASTNode) {
584   assert(ASTCtx && PP && HeaderFileURIs);
585   assert(CompletionAllocator && CompletionTUInfo);
586   assert(ASTNode.OrigD);
587   // Indexing API puts canonical decl into D, which might not have a valid
588   // source location for implicit/built-in decls. Fallback to original decl in
589   // such cases.
590   if (D->getLocation().isInvalid())
591     D = ASTNode.OrigD;
592   // If OrigD is an declaration associated with a friend declaration and it's
593   // not a definition, skip it. Note that OrigD is the occurrence that the
594   // collector is currently visiting.
595   if ((ASTNode.OrigD->getFriendObjectKind() !=
596        Decl::FriendObjectKind::FOK_None) &&
597       !(Roles & static_cast<unsigned>(index::SymbolRole::Definition)))
598     return true;
599   // A declaration created for a friend declaration should not be used as the
600   // canonical declaration in the index. Use OrigD instead, unless we've already
601   // picked a replacement for D
602   if (D->getFriendObjectKind() != Decl::FriendObjectKind::FOK_None)
603     D = CanonicalDecls.try_emplace(D, ASTNode.OrigD).first->second;
604   // Flag to mark that D should be considered canonical meaning its declaration
605   // will override any previous declaration for the Symbol.
606   bool DeclIsCanonical = false;
607   // Avoid treating ObjCImplementationDecl as a canonical declaration if it has
608   // a corresponding non-implicit and non-forward declared ObjcInterfaceDecl.
609   if (const auto *IID = dyn_cast<ObjCImplementationDecl>(D)) {
610     DeclIsCanonical = true;
611     if (const auto *CID = IID->getClassInterface())
612       if (const auto *DD = CID->getDefinition())
613         if (!DD->isImplicitInterfaceDecl())
614           D = DD;
615   }
616   // Avoid treating ObjCCategoryImplDecl as a canonical declaration in favor of
617   // its ObjCCategoryDecl if it has one.
618   if (const auto *CID = dyn_cast<ObjCCategoryImplDecl>(D)) {
619     DeclIsCanonical = true;
620     if (const auto *CD = CID->getCategoryDecl())
621       D = CD;
622   }
623   const NamedDecl *ND = dyn_cast<NamedDecl>(D);
624   if (!ND)
625     return true;
626 
627   auto ID = getSymbolIDCached(ND);
628   if (!ID)
629     return true;
630 
631   // Mark D as referenced if this is a reference coming from the main file.
632   // D may not be an interesting symbol, but it's cheaper to check at the end.
633   auto &SM = ASTCtx->getSourceManager();
634   if (Opts.CountReferences &&
635       (Roles & static_cast<unsigned>(index::SymbolRole::Reference)) &&
636       SM.getFileID(SM.getSpellingLoc(Loc)) == SM.getMainFileID())
637     ReferencedSymbols.insert(ID);
638 
639   // ND is the canonical (i.e. first) declaration. If it's in the main file
640   // (which is not a header), then no public declaration was visible, so assume
641   // it's main-file only.
642   bool IsMainFileOnly =
643       SM.isWrittenInMainFile(SM.getExpansionLoc(ND->getBeginLoc())) &&
644       !isHeaderFile(SM.getFileEntryRefForID(SM.getMainFileID())->getName(),
645                     ASTCtx->getLangOpts());
646   // In C, printf is a redecl of an implicit builtin! So check OrigD instead.
647   if (ASTNode.OrigD->isImplicit() ||
648       !shouldCollectSymbol(*ND, *ASTCtx, Opts, IsMainFileOnly))
649     return true;
650 
651   // Note: we need to process relations for all decl occurrences, including
652   // refs, because the indexing code only populates relations for specific
653   // occurrences. For example, RelationBaseOf is only populated for the
654   // occurrence inside the base-specifier.
655   processRelations(*ND, ID, Relations);
656 
657   bool CollectRef = static_cast<bool>(Opts.RefFilter & toRefKind(Roles));
658   // Unlike other fields, e.g. Symbols (which use spelling locations), we use
659   // file locations for references (as it aligns the behavior of clangd's
660   // AST-based xref).
661   // FIXME: we should try to use the file locations for other fields.
662   if (CollectRef &&
663       (!IsMainFileOnly || Opts.CollectMainFileRefs ||
664        ND->isExternallyVisible()) &&
665       !isa<NamespaceDecl>(ND)) {
666     auto FileLoc = SM.getFileLoc(Loc);
667     auto FID = SM.getFileID(FileLoc);
668     if (Opts.RefsInHeaders || FID == SM.getMainFileID()) {
669       addRef(ID, SymbolRef{FileLoc, FID, Roles, index::getSymbolInfo(ND).Kind,
670                            getRefContainer(ASTNode.Parent, Opts),
671                            isSpelled(FileLoc, *ND)});
672     }
673   }
674   // Don't continue indexing if this is a mere reference.
675   if (!(Roles & (static_cast<unsigned>(index::SymbolRole::Declaration) |
676                  static_cast<unsigned>(index::SymbolRole::Definition))))
677     return true;
678 
679   // FIXME: ObjCPropertyDecl are not properly indexed here:
680   // - ObjCPropertyDecl may have an OrigD of ObjCPropertyImplDecl, which is
681   // not a NamedDecl.
682   auto *OriginalDecl = dyn_cast<NamedDecl>(ASTNode.OrigD);
683   if (!OriginalDecl)
684     return true;
685 
686   const Symbol *BasicSymbol = Symbols.find(ID);
687   bool SkipDocCheckInDef = false;
688   if (isPreferredDeclaration(*OriginalDecl, Roles)) {
689     // If OriginalDecl is preferred, replace/create the existing canonical
690     // declaration (e.g. a class forward declaration). There should be at most
691     // one duplicate as we expect to see only one preferred declaration per
692     // TU, because in practice they are definitions.
693     BasicSymbol = addDeclaration(*OriginalDecl, std::move(ID), IsMainFileOnly);
694     SkipDocCheckInDef = true;
695   } else if (!BasicSymbol || DeclIsCanonical) {
696     BasicSymbol = addDeclaration(*ND, std::move(ID), IsMainFileOnly);
697     SkipDocCheckInDef = true;
698   }
699 
700   if (Roles & static_cast<unsigned>(index::SymbolRole::Definition))
701     addDefinition(*OriginalDecl, *BasicSymbol, SkipDocCheckInDef);
702 
703   return true;
704 }
705 
706 void SymbolCollector::handleMacros(const MainFileMacros &MacroRefsToIndex) {
707   assert(HeaderFileURIs && PP);
708   const auto &SM = PP->getSourceManager();
709   const auto MainFileEntryRef = SM.getFileEntryRefForID(SM.getMainFileID());
710   assert(MainFileEntryRef);
711 
712   const std::string &MainFileURI = HeaderFileURIs->toURI(*MainFileEntryRef);
713   // Add macro references.
714   for (const auto &IDToRefs : MacroRefsToIndex.MacroRefs) {
715     for (const auto &MacroRef : IDToRefs.second) {
716       const auto &Range = MacroRef.toRange(SM);
717       bool IsDefinition = MacroRef.IsDefinition;
718       Ref R;
719       R.Location.Start.setLine(Range.start.line);
720       R.Location.Start.setColumn(Range.start.character);
721       R.Location.End.setLine(Range.end.line);
722       R.Location.End.setColumn(Range.end.character);
723       R.Location.FileURI = MainFileURI.c_str();
724       R.Kind = IsDefinition ? RefKind::Definition : RefKind::Reference;
725       Refs.insert(IDToRefs.first, R);
726       if (IsDefinition) {
727         Symbol S;
728         S.ID = IDToRefs.first;
729         auto StartLoc = cantFail(sourceLocationInMainFile(SM, Range.start));
730         auto EndLoc = cantFail(sourceLocationInMainFile(SM, Range.end));
731         S.Name = toSourceCode(SM, SourceRange(StartLoc, EndLoc));
732         S.SymInfo.Kind = index::SymbolKind::Macro;
733         S.SymInfo.SubKind = index::SymbolSubKind::None;
734         S.SymInfo.Properties = index::SymbolPropertySet();
735         S.SymInfo.Lang = index::SymbolLanguage::C;
736         S.Origin = Opts.Origin;
737         S.CanonicalDeclaration = R.Location;
738         // Make the macro visible for code completion if main file is an
739         // include-able header.
740         if (!HeaderFileURIs->getIncludeHeader(SM.getMainFileID()).empty()) {
741           S.Flags |= Symbol::IndexedForCodeCompletion;
742           S.Flags |= Symbol::VisibleOutsideFile;
743         }
744         Symbols.insert(S);
745       }
746     }
747   }
748 }
749 
750 bool SymbolCollector::handleMacroOccurrence(const IdentifierInfo *Name,
751                                             const MacroInfo *MI,
752                                             index::SymbolRoleSet Roles,
753                                             SourceLocation Loc) {
754   assert(PP);
755   // Builtin macros don't have useful locations and aren't needed in completion.
756   if (MI->isBuiltinMacro())
757     return true;
758 
759   const auto &SM = PP->getSourceManager();
760   auto DefLoc = MI->getDefinitionLoc();
761   // Also avoid storing macros that aren't defined in any file, i.e. predefined
762   // macros like __DBL_MIN__ and those defined on the command line.
763   if (SM.isWrittenInBuiltinFile(DefLoc) ||
764       SM.isWrittenInCommandLineFile(DefLoc) ||
765       Name->getName() == "__GCC_HAVE_DWARF2_CFI_ASM")
766     return true;
767 
768   auto ID = getSymbolIDCached(Name->getName(), MI, SM);
769   if (!ID)
770     return true;
771 
772   auto SpellingLoc = SM.getSpellingLoc(Loc);
773   bool IsMainFileOnly =
774       SM.isInMainFile(SM.getExpansionLoc(DefLoc)) &&
775       !isHeaderFile(SM.getFileEntryRefForID(SM.getMainFileID())->getName(),
776                     ASTCtx->getLangOpts());
777   // Do not store references to main-file macros.
778   if ((static_cast<unsigned>(Opts.RefFilter) & Roles) && !IsMainFileOnly &&
779       (Opts.RefsInHeaders || SM.getFileID(SpellingLoc) == SM.getMainFileID())) {
780     // FIXME: Populate container information for macro references.
781     // FIXME: All MacroRefs are marked as Spelled now, but this should be
782     // checked.
783     addRef(ID,
784            SymbolRef{Loc, SM.getFileID(Loc), Roles, index::SymbolKind::Macro,
785                      /*Container=*/nullptr,
786                      /*Spelled=*/true});
787   }
788 
789   // Collect symbols.
790   if (!Opts.CollectMacro)
791     return true;
792 
793   // Skip main-file macros if we are not collecting them.
794   if (IsMainFileOnly && !Opts.CollectMainFileSymbols)
795     return false;
796 
797   // Mark the macro as referenced if this is a reference coming from the main
798   // file. The macro may not be an interesting symbol, but it's cheaper to check
799   // at the end.
800   if (Opts.CountReferences &&
801       (Roles & static_cast<unsigned>(index::SymbolRole::Reference)) &&
802       SM.getFileID(SpellingLoc) == SM.getMainFileID())
803     ReferencedSymbols.insert(ID);
804 
805   // Don't continue indexing if this is a mere reference.
806   // FIXME: remove macro with ID if it is undefined.
807   if (!(Roles & static_cast<unsigned>(index::SymbolRole::Declaration) ||
808         Roles & static_cast<unsigned>(index::SymbolRole::Definition)))
809     return true;
810 
811   // Only collect one instance in case there are multiple.
812   if (Symbols.find(ID) != nullptr)
813     return true;
814 
815   Symbol S;
816   S.ID = std::move(ID);
817   S.Name = Name->getName();
818   if (!IsMainFileOnly) {
819     S.Flags |= Symbol::IndexedForCodeCompletion;
820     S.Flags |= Symbol::VisibleOutsideFile;
821   }
822   S.SymInfo = index::getSymbolInfoForMacro(*MI);
823   S.Origin = Opts.Origin;
824   // FIXME: use the result to filter out symbols.
825   shouldIndexFile(SM.getFileID(Loc));
826   if (auto DeclLoc = getTokenLocation(DefLoc))
827     S.CanonicalDeclaration = *DeclLoc;
828 
829   CodeCompletionResult SymbolCompletion(Name);
830   const auto *CCS = SymbolCompletion.CreateCodeCompletionStringForMacro(
831       *PP, *CompletionAllocator, *CompletionTUInfo);
832   std::string Signature;
833   std::string SnippetSuffix;
834   getSignature(*CCS, &Signature, &SnippetSuffix, SymbolCompletion.Kind,
835                SymbolCompletion.CursorKind);
836   S.Signature = Signature;
837   S.CompletionSnippetSuffix = SnippetSuffix;
838 
839   IndexedMacros.insert(Name);
840 
841   setIncludeLocation(S, DefLoc, include_cleaner::Macro{Name, DefLoc});
842   Symbols.insert(S);
843   return true;
844 }
845 
846 void SymbolCollector::processRelations(
847     const NamedDecl &ND, const SymbolID &ID,
848     ArrayRef<index::SymbolRelation> Relations) {
849   for (const auto &R : Relations) {
850     auto RKind = indexableRelation(R);
851     if (!RKind)
852       continue;
853     const Decl *Object = R.RelatedSymbol;
854 
855     auto ObjectID = getSymbolIDCached(Object);
856     if (!ObjectID)
857       continue;
858 
859     // Record the relation.
860     // TODO: There may be cases where the object decl is not indexed for some
861     // reason. Those cases should probably be removed in due course, but for
862     // now there are two possible ways to handle it:
863     //   (A) Avoid storing the relation in such cases.
864     //   (B) Store it anyways. Clients will likely lookup() the SymbolID
865     //       in the index and find nothing, but that's a situation they
866     //       probably need to handle for other reasons anyways.
867     // We currently do (B) because it's simpler.
868     if (*RKind == RelationKind::BaseOf)
869       this->Relations.insert({ID, *RKind, ObjectID});
870     else if (*RKind == RelationKind::OverriddenBy)
871       this->Relations.insert({ObjectID, *RKind, ID});
872   }
873 }
874 
875 void SymbolCollector::setIncludeLocation(const Symbol &S, SourceLocation DefLoc,
876                                          const include_cleaner::Symbol &Sym) {
877   const auto &SM = PP->getSourceManager();
878   if (!Opts.CollectIncludePath ||
879       shouldCollectIncludePath(S.SymInfo.Kind) == Symbol::Invalid)
880     return;
881 
882   // Use the expansion location to get the #include header since this is
883   // where the symbol is exposed.
884   if (FileID FID = SM.getDecomposedExpansionLoc(DefLoc).first; FID.isValid())
885     IncludeFiles[S.ID] = FID;
886 
887   // We update providers for a symbol with each occurence, as SymbolCollector
888   // might run while parsing, rather than at the end of a translation unit.
889   // Hence we see more and more redecls over time.
890   SymbolProviders[S.ID] =
891       include_cleaner::headersForSymbol(Sym, *PP, Opts.PragmaIncludes);
892 }
893 
894 llvm::StringRef getStdHeader(const Symbol *S, const LangOptions &LangOpts) {
895   tooling::stdlib::Lang Lang = tooling::stdlib::Lang::CXX;
896   if (LangOpts.C11)
897     Lang = tooling::stdlib::Lang::C;
898   else if(!LangOpts.CPlusPlus)
899     return "";
900 
901   if (S->Scope == "std::" && S->Name == "move") {
902     if (!S->Signature.contains(','))
903       return "<utility>";
904     return "<algorithm>";
905   }
906 
907   if (auto StdSym = tooling::stdlib::Symbol::named(S->Scope, S->Name, Lang))
908     if (auto Header = StdSym->header())
909       return Header->name();
910   return "";
911 }
912 
913 void SymbolCollector::finish() {
914   // At the end of the TU, add 1 to the refcount of all referenced symbols.
915   for (const auto &ID : ReferencedSymbols) {
916     if (const auto *S = Symbols.find(ID)) {
917       // SymbolSlab::Builder returns const symbols because strings are interned
918       // and modifying returned symbols without inserting again wouldn't go
919       // well. const_cast is safe here as we're modifying a data owned by the
920       // Symbol. This reduces time spent in SymbolCollector by ~1%.
921       ++const_cast<Symbol *>(S)->References;
922     }
923   }
924   if (Opts.CollectMacro) {
925     assert(PP);
926     // First, drop header guards. We can't identify these until EOF.
927     for (const IdentifierInfo *II : IndexedMacros) {
928       if (const auto *MI = PP->getMacroDefinition(II).getMacroInfo())
929         if (auto ID =
930                 getSymbolIDCached(II->getName(), MI, PP->getSourceManager()))
931           if (MI->isUsedForHeaderGuard())
932             Symbols.erase(ID);
933     }
934   }
935   llvm::DenseMap<FileID, bool> FileToContainsImportsOrObjC;
936   llvm::DenseMap<include_cleaner::Header, std::string> HeaderSpelling;
937   // Fill in IncludeHeaders.
938   // We delay this until end of TU so header guards are all resolved.
939   for (const auto &[SID, Providers] : SymbolProviders) {
940     const Symbol *S = Symbols.find(SID);
941     if (!S)
942       continue;
943 
944     FileID FID = IncludeFiles.lookup(SID);
945     // Determine if the FID is #include'd or #import'ed.
946     Symbol::IncludeDirective Directives = Symbol::Invalid;
947     auto CollectDirectives = shouldCollectIncludePath(S->SymInfo.Kind);
948     if ((CollectDirectives & Symbol::Include) != 0)
949       Directives |= Symbol::Include;
950     // Only allow #import for symbols from ObjC-like files.
951     if ((CollectDirectives & Symbol::Import) != 0 && FID.isValid()) {
952       auto [It, Inserted] = FileToContainsImportsOrObjC.try_emplace(FID);
953       if (Inserted)
954         It->second = FilesWithObjCConstructs.contains(FID) ||
955                      tooling::codeContainsImports(
956                          ASTCtx->getSourceManager().getBufferData(FID));
957       if (It->second)
958         Directives |= Symbol::Import;
959     }
960 
961     if (Directives == Symbol::Invalid)
962       continue;
963 
964     // Use the include location-based logic for Objective-C symbols.
965     if (Directives & Symbol::Import) {
966       llvm::StringRef IncludeHeader = getStdHeader(S, ASTCtx->getLangOpts());
967       if (IncludeHeader.empty())
968         IncludeHeader = HeaderFileURIs->getIncludeHeader(FID);
969 
970       if (!IncludeHeader.empty()) {
971         auto NewSym = *S;
972         NewSym.IncludeHeaders.push_back({IncludeHeader, 1, Directives});
973         Symbols.insert(NewSym);
974       }
975       // FIXME: use providers from include-cleaner library once it's polished
976       // for Objective-C.
977       continue;
978     }
979 
980     // For #include's, use the providers computed by the include-cleaner
981     // library.
982     assert(Directives == Symbol::Include);
983     // Ignore providers that are not self-contained, this is especially
984     // important for symbols defined in the main-file. We want to prefer the
985     // header, if possible.
986     // TODO: Limit this to specifically ignore main file, when we're indexing a
987     // non-header file?
988     auto SelfContainedProvider =
989         [this](llvm::ArrayRef<include_cleaner::Header> Providers)
990         -> std::optional<include_cleaner::Header> {
991       for (const auto &H : Providers) {
992         if (H.kind() != include_cleaner::Header::Physical)
993           return H;
994         if (tooling::isSelfContainedHeader(H.physical(), PP->getSourceManager(),
995                                            PP->getHeaderSearchInfo()))
996           return H;
997       }
998       return std::nullopt;
999     };
1000     const auto OptionalProvider = SelfContainedProvider(Providers);
1001     if (!OptionalProvider)
1002       continue;
1003     const auto &H = *OptionalProvider;
1004     const auto [SpellingIt, Inserted] = HeaderSpelling.try_emplace(H);
1005     if (Inserted) {
1006       auto &SM = ASTCtx->getSourceManager();
1007       if (H.kind() == include_cleaner::Header::Kind::Physical) {
1008         // FIXME: Get rid of this once include-cleaner has support for system
1009         // headers.
1010         if (auto Canonical =
1011                 HeaderFileURIs->mapCanonical(H.physical().getName());
1012             !Canonical.empty())
1013           SpellingIt->second = Canonical;
1014         // For physical files, prefer URIs as spellings might change
1015         // depending on the translation unit.
1016         else if (tooling::isSelfContainedHeader(H.physical(), SM,
1017                                                 PP->getHeaderSearchInfo()))
1018           SpellingIt->second =
1019               HeaderFileURIs->toURI(H.physical());
1020       } else {
1021         SpellingIt->second = include_cleaner::spellHeader(
1022             {H, PP->getHeaderSearchInfo(),
1023              SM.getFileEntryForID(SM.getMainFileID())});
1024       }
1025     }
1026 
1027     if (!SpellingIt->second.empty()) {
1028       auto NewSym = *S;
1029       NewSym.IncludeHeaders.push_back({SpellingIt->second, 1, Directives});
1030       Symbols.insert(NewSym);
1031     }
1032   }
1033 
1034   ReferencedSymbols.clear();
1035   IncludeFiles.clear();
1036   SymbolProviders.clear();
1037   FilesWithObjCConstructs.clear();
1038 }
1039 
1040 const Symbol *SymbolCollector::addDeclaration(const NamedDecl &ND, SymbolID ID,
1041                                               bool IsMainFileOnly) {
1042   auto &Ctx = ND.getASTContext();
1043   auto &SM = Ctx.getSourceManager();
1044 
1045   Symbol S;
1046   S.ID = std::move(ID);
1047   std::string QName = printQualifiedName(ND);
1048   // FIXME: this returns foo:bar: for objective-C methods, we prefer only foo:
1049   // for consistency with CodeCompletionString and a clean name/signature split.
1050   std::tie(S.Scope, S.Name) = splitQualifiedName(QName);
1051   std::string TemplateSpecializationArgs = printTemplateSpecializationArgs(ND);
1052   S.TemplateSpecializationArgs = TemplateSpecializationArgs;
1053 
1054   // We collect main-file symbols, but do not use them for code completion.
1055   if (!IsMainFileOnly && isIndexedForCodeCompletion(ND, Ctx))
1056     S.Flags |= Symbol::IndexedForCodeCompletion;
1057   if (isImplementationDetail(&ND))
1058     S.Flags |= Symbol::ImplementationDetail;
1059   if (!IsMainFileOnly)
1060     S.Flags |= Symbol::VisibleOutsideFile;
1061   S.SymInfo = index::getSymbolInfo(&ND);
1062   auto Loc = nameLocation(ND, SM);
1063   assert(Loc.isValid() && "Invalid source location for NamedDecl");
1064   // FIXME: use the result to filter out symbols.
1065   auto FID = SM.getFileID(Loc);
1066   shouldIndexFile(FID);
1067   if (auto DeclLoc = getTokenLocation(Loc))
1068     S.CanonicalDeclaration = *DeclLoc;
1069 
1070   S.Origin = Opts.Origin;
1071   if (ND.getAvailability() == AR_Deprecated)
1072     S.Flags |= Symbol::Deprecated;
1073 
1074   // Add completion info.
1075   // FIXME: we may want to choose a different redecl, or combine from several.
1076   assert(ASTCtx && PP && "ASTContext and Preprocessor must be set.");
1077   // We use the primary template, as clang does during code completion.
1078   CodeCompletionResult SymbolCompletion(&getTemplateOrThis(ND), 0);
1079   const auto *CCS = SymbolCompletion.CreateCodeCompletionString(
1080       *ASTCtx, *PP, CodeCompletionContext::CCC_Symbol, *CompletionAllocator,
1081       *CompletionTUInfo,
1082       /*IncludeBriefComments*/ false);
1083   std::string DocComment;
1084   std::string Documentation;
1085   bool AlreadyHasDoc = S.Flags & Symbol::HasDocComment;
1086   if (!AlreadyHasDoc) {
1087     DocComment = getDocComment(Ctx, SymbolCompletion,
1088                                /*CommentsFromHeaders=*/true);
1089     Documentation = formatDocumentation(*CCS, DocComment);
1090   }
1091   const auto UpdateDoc = [&] {
1092     if (!AlreadyHasDoc) {
1093       if (!DocComment.empty())
1094         S.Flags |= Symbol::HasDocComment;
1095       S.Documentation = Documentation;
1096     }
1097   };
1098   if (!(S.Flags & Symbol::IndexedForCodeCompletion)) {
1099     if (Opts.StoreAllDocumentation)
1100       UpdateDoc();
1101     Symbols.insert(S);
1102     return Symbols.find(S.ID);
1103   }
1104   UpdateDoc();
1105   std::string Signature;
1106   std::string SnippetSuffix;
1107   getSignature(*CCS, &Signature, &SnippetSuffix, SymbolCompletion.Kind,
1108                SymbolCompletion.CursorKind);
1109   S.Signature = Signature;
1110   S.CompletionSnippetSuffix = SnippetSuffix;
1111   std::string ReturnType = getReturnType(*CCS);
1112   S.ReturnType = ReturnType;
1113 
1114   std::optional<OpaqueType> TypeStorage;
1115   if (S.Flags & Symbol::IndexedForCodeCompletion) {
1116     TypeStorage = OpaqueType::fromCompletionResult(*ASTCtx, SymbolCompletion);
1117     if (TypeStorage)
1118       S.Type = TypeStorage->raw();
1119   }
1120 
1121   Symbols.insert(S);
1122   setIncludeLocation(S, ND.getLocation(), include_cleaner::Symbol{ND});
1123   if (S.SymInfo.Lang == index::SymbolLanguage::ObjC)
1124     FilesWithObjCConstructs.insert(FID);
1125   return Symbols.find(S.ID);
1126 }
1127 
1128 void SymbolCollector::addDefinition(const NamedDecl &ND, const Symbol &DeclSym,
1129                                     bool SkipDocCheck) {
1130   if (DeclSym.Definition)
1131     return;
1132   const auto &SM = ND.getASTContext().getSourceManager();
1133   auto Loc = nameLocation(ND, SM);
1134   shouldIndexFile(SM.getFileID(Loc));
1135   auto DefLoc = getTokenLocation(Loc);
1136   // If we saw some forward declaration, we end up copying the symbol.
1137   // This is not ideal, but avoids duplicating the "is this a definition" check
1138   // in clang::index. We should only see one definition.
1139   if (!DefLoc)
1140     return;
1141   Symbol S = DeclSym;
1142   // FIXME: use the result to filter out symbols.
1143   S.Definition = *DefLoc;
1144 
1145   std::string DocComment;
1146   std::string Documentation;
1147   if (!SkipDocCheck && !(S.Flags & Symbol::HasDocComment) &&
1148       (llvm::isa<FunctionDecl>(ND) || llvm::isa<CXXMethodDecl>(ND))) {
1149     CodeCompletionResult SymbolCompletion(&getTemplateOrThis(ND), 0);
1150     const auto *CCS = SymbolCompletion.CreateCodeCompletionString(
1151         *ASTCtx, *PP, CodeCompletionContext::CCC_Symbol, *CompletionAllocator,
1152         *CompletionTUInfo,
1153         /*IncludeBriefComments*/ false);
1154     DocComment = getDocComment(ND.getASTContext(), SymbolCompletion,
1155                                /*CommentsFromHeaders=*/true);
1156     if (!S.Documentation.empty())
1157       Documentation = S.Documentation.str() + '\n' + DocComment;
1158     else
1159       Documentation = formatDocumentation(*CCS, DocComment);
1160     if (!DocComment.empty())
1161       S.Flags |= Symbol::HasDocComment;
1162     S.Documentation = Documentation;
1163   }
1164 
1165   Symbols.insert(S);
1166 }
1167 
1168 bool SymbolCollector::shouldIndexFile(FileID FID) {
1169   if (!Opts.FileFilter)
1170     return true;
1171   auto I = FilesToIndexCache.try_emplace(FID);
1172   if (I.second)
1173     I.first->second = Opts.FileFilter(ASTCtx->getSourceManager(), FID);
1174   return I.first->second;
1175 }
1176 
1177 static bool refIsCall(index::SymbolKind Kind) {
1178   using SK = index::SymbolKind;
1179   return Kind == SK::Function || Kind == SK::InstanceMethod ||
1180          Kind == SK::ClassMethod || Kind == SK::StaticMethod ||
1181          Kind == SK::Constructor || Kind == SK::Destructor ||
1182          Kind == SK::ConversionFunction;
1183 }
1184 
1185 void SymbolCollector::addRef(SymbolID ID, const SymbolRef &SR) {
1186   const auto &SM = ASTCtx->getSourceManager();
1187   // FIXME: use the result to filter out references.
1188   shouldIndexFile(SR.FID);
1189   if (const auto FE = SM.getFileEntryRefForID(SR.FID)) {
1190     auto Range = getTokenRange(SR.Loc, SM, ASTCtx->getLangOpts());
1191     Ref R;
1192     R.Location.Start = Range.first;
1193     R.Location.End = Range.second;
1194     R.Location.FileURI = HeaderFileURIs->toURI(*FE).c_str();
1195     R.Kind = toRefKind(SR.Roles, SR.Spelled);
1196     if (refIsCall(SR.Kind)) {
1197       R.Kind |= RefKind::Call;
1198     }
1199     R.Container = getSymbolIDCached(SR.Container);
1200     Refs.insert(ID, R);
1201   }
1202 }
1203 
1204 SymbolID SymbolCollector::getSymbolIDCached(const Decl *D) {
1205   auto It = DeclToIDCache.try_emplace(D, SymbolID{});
1206   if (It.second)
1207     It.first->second = getSymbolID(D);
1208   return It.first->second;
1209 }
1210 
1211 SymbolID SymbolCollector::getSymbolIDCached(const llvm::StringRef MacroName,
1212                                             const MacroInfo *MI,
1213                                             const SourceManager &SM) {
1214   auto It = MacroToIDCache.try_emplace(MI, SymbolID{});
1215   if (It.second)
1216     It.first->second = getSymbolID(MacroName, MI, SM);
1217   return It.first->second;
1218 }
1219 } // namespace clangd
1220 } // namespace clang
1221