xref: /llvm-project/clang-tools-extra/clangd/index/FileIndex.cpp (revision 61fe67a4017375fd675f75652e857e837f77fa51)
1 //===--- FileIndex.cpp - Indexes for files. ------------------------ C++-*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "FileIndex.h"
10 #include "CollectMacros.h"
11 #include "ParsedAST.h"
12 #include "clang-include-cleaner/Record.h"
13 #include "index/Index.h"
14 #include "index/MemIndex.h"
15 #include "index/Merge.h"
16 #include "index/Ref.h"
17 #include "index/Relation.h"
18 #include "index/Serialization.h"
19 #include "index/Symbol.h"
20 #include "index/SymbolCollector.h"
21 #include "index/SymbolID.h"
22 #include "index/SymbolOrigin.h"
23 #include "index/dex/Dex.h"
24 #include "support/Logger.h"
25 #include "support/MemoryTree.h"
26 #include "support/Path.h"
27 #include "clang/AST/ASTContext.h"
28 #include "clang/Index/IndexingAction.h"
29 #include "clang/Index/IndexingOptions.h"
30 #include "clang/Lex/Preprocessor.h"
31 #include "llvm/ADT/DenseMap.h"
32 #include "llvm/ADT/STLExtras.h"
33 #include "llvm/ADT/StringMap.h"
34 #include "llvm/ADT/StringRef.h"
35 #include <algorithm>
36 #include <memory>
37 #include <optional>
38 #include <tuple>
39 #include <utility>
40 #include <vector>
41 
42 namespace clang {
43 namespace clangd {
44 namespace {
45 
46 SlabTuple indexSymbols(ASTContext &AST, Preprocessor &PP,
47                        llvm::ArrayRef<Decl *> DeclsToIndex,
48                        const MainFileMacros *MacroRefsToIndex,
49                        const include_cleaner::PragmaIncludes &PI,
50                        bool IsIndexMainAST, llvm::StringRef Version,
51                        bool CollectMainFileRefs) {
52   SymbolCollector::Options CollectorOpts;
53   CollectorOpts.CollectIncludePath = true;
54   CollectorOpts.PragmaIncludes = &PI;
55   CollectorOpts.CountReferences = false;
56   CollectorOpts.Origin =
57       IsIndexMainAST ? SymbolOrigin::Open : SymbolOrigin::Preamble;
58   CollectorOpts.CollectMainFileRefs = CollectMainFileRefs;
59   // We want stdlib implementation details in the index only if we've opened the
60   // file in question. This does means xrefs won't work, though.
61   CollectorOpts.CollectReserved = IsIndexMainAST;
62 
63   index::IndexingOptions IndexOpts;
64   // We only need declarations, because we don't count references.
65   IndexOpts.SystemSymbolFilter =
66       index::IndexingOptions::SystemSymbolFilterKind::DeclarationsOnly;
67   // We index function-local classes and its member functions only.
68   IndexOpts.IndexFunctionLocals = true;
69   if (IsIndexMainAST) {
70     // We only collect refs when indexing main AST.
71     CollectorOpts.RefFilter = RefKind::All;
72     // Comments for main file can always be obtained from sema, do not store
73     // them in the index.
74     CollectorOpts.StoreAllDocumentation = false;
75   } else {
76     IndexOpts.IndexMacrosInPreprocessor = true;
77     CollectorOpts.CollectMacro = true;
78     CollectorOpts.StoreAllDocumentation = true;
79   }
80 
81   SymbolCollector Collector(std::move(CollectorOpts));
82   Collector.setPreprocessor(PP);
83   index::indexTopLevelDecls(AST, PP, DeclsToIndex, Collector, IndexOpts);
84   if (MacroRefsToIndex)
85     Collector.handleMacros(*MacroRefsToIndex);
86 
87   const auto &SM = AST.getSourceManager();
88   const auto MainFileEntry = SM.getFileEntryRefForID(SM.getMainFileID());
89   std::string FileName =
90       std::string(MainFileEntry ? MainFileEntry->getName() : "");
91 
92   auto Syms = Collector.takeSymbols();
93   auto Refs = Collector.takeRefs();
94   auto Relations = Collector.takeRelations();
95 
96   vlog("indexed {0} AST for {1} version {2}:\n"
97        "  symbol slab: {3} symbols, {4} bytes\n"
98        "  ref slab: {5} symbols, {6} refs, {7} bytes\n"
99        "  relations slab: {8} relations, {9} bytes",
100        IsIndexMainAST ? "file" : "preamble", FileName, Version, Syms.size(),
101        Syms.bytes(), Refs.size(), Refs.numRefs(), Refs.bytes(),
102        Relations.size(), Relations.bytes());
103   return std::make_tuple(std::move(Syms), std::move(Refs),
104                          std::move(Relations));
105 }
106 
107 // We keep only the node "U" and its edges. Any node other than "U" will be
108 // empty in the resultant graph.
109 IncludeGraph getSubGraph(llvm::StringRef URI, const IncludeGraph &FullGraph) {
110   IncludeGraph IG;
111 
112   auto Entry = IG.try_emplace(URI).first;
113   auto &Node = Entry->getValue();
114   Node = FullGraph.lookup(Entry->getKey());
115   Node.URI = Entry->getKey();
116 
117   // URIs inside nodes must point into the keys of the same IncludeGraph.
118   for (auto &Include : Node.DirectIncludes) {
119     auto I = IG.try_emplace(Include).first;
120     I->getValue().URI = I->getKey();
121     Include = I->getKey();
122   }
123   return IG;
124 }
125 } // namespace
126 
127 FileShardedIndex::FileShardedIndex(IndexFileIn Input)
128     : Index(std::move(Input)) {
129   // Used to build RelationSlabs.
130   llvm::DenseMap<SymbolID, FileShard *> SymbolIDToFile;
131 
132   // Attribute each Symbol to both their declaration and definition locations.
133   if (Index.Symbols) {
134     for (const auto &S : *Index.Symbols) {
135       auto It = Shards.try_emplace(S.CanonicalDeclaration.FileURI);
136       It.first->getValue().Symbols.insert(&S);
137       SymbolIDToFile[S.ID] = &It.first->getValue();
138       // Only bother if definition file is different than declaration file.
139       if (S.Definition &&
140           S.Definition.FileURI != S.CanonicalDeclaration.FileURI) {
141         auto It = Shards.try_emplace(S.Definition.FileURI);
142         It.first->getValue().Symbols.insert(&S);
143       }
144     }
145   }
146   // Attribute references into each file they occured in.
147   if (Index.Refs) {
148     for (const auto &SymRefs : *Index.Refs) {
149       for (const auto &R : SymRefs.second) {
150         const auto It = Shards.try_emplace(R.Location.FileURI);
151         It.first->getValue().Refs.insert(&R);
152         RefToSymID[&R] = SymRefs.first;
153       }
154     }
155   }
156   // The Subject and/or Object shards might be part of multiple TUs. In
157   // such cases there will be a race and the last TU to write the shard
158   // will win and all the other relations will be lost. To avoid this,
159   // we store relations in both shards. A race might still happen if the
160   // same translation unit produces different relations under different
161   // configurations, but that's something clangd doesn't handle in general.
162   if (Index.Relations) {
163     for (const auto &R : *Index.Relations) {
164       // FIXME: RelationSlab shouldn't contain dangling relations.
165       FileShard *SubjectFile = SymbolIDToFile.lookup(R.Subject);
166       FileShard *ObjectFile = SymbolIDToFile.lookup(R.Object);
167       if (SubjectFile)
168         SubjectFile->Relations.insert(&R);
169       if (ObjectFile && ObjectFile != SubjectFile)
170         ObjectFile->Relations.insert(&R);
171     }
172   }
173   // Store only the direct includes of a file in a shard.
174   if (Index.Sources) {
175     const auto &FullGraph = *Index.Sources;
176     for (const auto &It : FullGraph) {
177       auto ShardIt = Shards.try_emplace(It.first());
178       ShardIt.first->getValue().IG = getSubGraph(It.first(), FullGraph);
179     }
180   }
181 }
182 std::vector<llvm::StringRef> FileShardedIndex::getAllSources() const {
183   // It should be enough to construct a vector with {Shards.keys().begin(),
184   // Shards.keys().end()} but MSVC fails to compile that.
185   std::vector<PathRef> Result;
186   Result.reserve(Shards.size());
187   for (auto Key : Shards.keys())
188     Result.push_back(Key);
189   return Result;
190 }
191 
192 std::optional<IndexFileIn>
193 FileShardedIndex::getShard(llvm::StringRef Uri) const {
194   auto It = Shards.find(Uri);
195   if (It == Shards.end())
196     return std::nullopt;
197 
198   IndexFileIn IF;
199   IF.Sources = It->getValue().IG;
200   IF.Cmd = Index.Cmd;
201 
202   SymbolSlab::Builder SymB;
203   for (const auto *S : It->getValue().Symbols)
204     SymB.insert(*S);
205   IF.Symbols = std::move(SymB).build();
206 
207   RefSlab::Builder RefB;
208   for (const auto *Ref : It->getValue().Refs) {
209     auto SID = RefToSymID.lookup(Ref);
210     RefB.insert(SID, *Ref);
211   }
212   IF.Refs = std::move(RefB).build();
213 
214   RelationSlab::Builder RelB;
215   for (const auto *Rel : It->getValue().Relations) {
216     RelB.insert(*Rel);
217   }
218   IF.Relations = std::move(RelB).build();
219   // Explicit move here is needed by some compilers.
220   return std::move(IF);
221 }
222 
223 SlabTuple indexMainDecls(ParsedAST &AST) {
224   return indexSymbols(
225       AST.getASTContext(), AST.getPreprocessor(), AST.getLocalTopLevelDecls(),
226       &AST.getMacros(), AST.getPragmaIncludes(),
227       /*IsIndexMainAST=*/true, AST.version(), /*CollectMainFileRefs=*/true);
228 }
229 
230 SlabTuple indexHeaderSymbols(llvm::StringRef Version, ASTContext &AST,
231                              Preprocessor &PP,
232                              const include_cleaner::PragmaIncludes &PI) {
233   std::vector<Decl *> DeclsToIndex(
234       AST.getTranslationUnitDecl()->decls().begin(),
235       AST.getTranslationUnitDecl()->decls().end());
236   return indexSymbols(AST, PP, DeclsToIndex,
237                       /*MainFileMacros=*/nullptr, PI,
238                       /*IsIndexMainAST=*/false, Version,
239                       /*CollectMainFileRefs=*/false);
240 }
241 
242 FileSymbols::FileSymbols(IndexContents IdxContents, bool SupportContainedRefs)
243     : IdxContents(IdxContents), SupportContainedRefs(SupportContainedRefs) {}
244 
245 void FileSymbols::update(llvm::StringRef Key,
246                          std::unique_ptr<SymbolSlab> Symbols,
247                          std::unique_ptr<RefSlab> Refs,
248                          std::unique_ptr<RelationSlab> Relations,
249                          bool CountReferences) {
250   std::lock_guard<std::mutex> Lock(Mutex);
251   ++Version;
252   if (!Symbols)
253     SymbolsSnapshot.erase(Key);
254   else
255     SymbolsSnapshot[Key] = std::move(Symbols);
256   if (!Refs) {
257     RefsSnapshot.erase(Key);
258   } else {
259     RefSlabAndCountReferences Item;
260     Item.CountReferences = CountReferences;
261     Item.Slab = std::move(Refs);
262     RefsSnapshot[Key] = std::move(Item);
263   }
264   if (!Relations)
265     RelationsSnapshot.erase(Key);
266   else
267     RelationsSnapshot[Key] = std::move(Relations);
268 }
269 
270 std::unique_ptr<SymbolIndex>
271 FileSymbols::buildIndex(IndexType Type, DuplicateHandling DuplicateHandle,
272                         size_t *Version) {
273   std::vector<std::shared_ptr<SymbolSlab>> SymbolSlabs;
274   std::vector<std::shared_ptr<RefSlab>> RefSlabs;
275   std::vector<std::shared_ptr<RelationSlab>> RelationSlabs;
276   llvm::StringSet<> Files;
277   std::vector<RefSlab *> MainFileRefs;
278   {
279     std::lock_guard<std::mutex> Lock(Mutex);
280     for (const auto &FileAndSymbols : SymbolsSnapshot) {
281       SymbolSlabs.push_back(FileAndSymbols.second);
282       Files.insert(FileAndSymbols.first());
283     }
284     for (const auto &FileAndRefs : RefsSnapshot) {
285       RefSlabs.push_back(FileAndRefs.second.Slab);
286       Files.insert(FileAndRefs.first());
287       if (FileAndRefs.second.CountReferences)
288         MainFileRefs.push_back(RefSlabs.back().get());
289     }
290     for (const auto &FileAndRelations : RelationsSnapshot) {
291       Files.insert(FileAndRelations.first());
292       RelationSlabs.push_back(FileAndRelations.second);
293     }
294 
295     if (Version)
296       *Version = this->Version;
297   }
298   std::vector<const Symbol *> AllSymbols;
299   std::vector<Symbol> SymsStorage;
300   switch (DuplicateHandle) {
301   case DuplicateHandling::Merge: {
302     llvm::DenseMap<SymbolID, Symbol> Merged;
303     for (const auto &Slab : SymbolSlabs) {
304       for (const auto &Sym : *Slab) {
305         assert(Sym.References == 0 &&
306                "Symbol with non-zero references sent to FileSymbols");
307         auto I = Merged.try_emplace(Sym.ID, Sym);
308         if (!I.second)
309           I.first->second = mergeSymbol(I.first->second, Sym);
310       }
311     }
312     for (const RefSlab *Refs : MainFileRefs)
313       for (const auto &Sym : *Refs) {
314         auto It = Merged.find(Sym.first);
315         // This might happen while background-index is still running.
316         if (It == Merged.end())
317           continue;
318         It->getSecond().References += Sym.second.size();
319       }
320     SymsStorage.reserve(Merged.size());
321     for (auto &Sym : Merged) {
322       SymsStorage.push_back(std::move(Sym.second));
323       AllSymbols.push_back(&SymsStorage.back());
324     }
325     break;
326   }
327   case DuplicateHandling::PickOne: {
328     llvm::DenseSet<SymbolID> AddedSymbols;
329     for (const auto &Slab : SymbolSlabs)
330       for (const auto &Sym : *Slab) {
331         assert(Sym.References == 0 &&
332                "Symbol with non-zero references sent to FileSymbols");
333         if (AddedSymbols.insert(Sym.ID).second)
334           AllSymbols.push_back(&Sym);
335       }
336     break;
337   }
338   }
339 
340   std::vector<Ref> RefsStorage; // Contiguous ranges for each SymbolID.
341   llvm::DenseMap<SymbolID, llvm::ArrayRef<Ref>> AllRefs;
342   {
343     llvm::DenseMap<SymbolID, llvm::SmallVector<Ref, 4>> MergedRefs;
344     size_t Count = 0;
345     for (const auto &RefSlab : RefSlabs)
346       for (const auto &Sym : *RefSlab) {
347         MergedRefs[Sym.first].append(Sym.second.begin(), Sym.second.end());
348         Count += Sym.second.size();
349       }
350     RefsStorage.reserve(Count);
351     AllRefs.reserve(MergedRefs.size());
352     for (auto &Sym : MergedRefs) {
353       auto &SymRefs = Sym.second;
354       // Sorting isn't required, but yields more stable results over rebuilds.
355       llvm::sort(SymRefs);
356       llvm::copy(SymRefs, back_inserter(RefsStorage));
357       AllRefs.try_emplace(
358           Sym.first,
359           llvm::ArrayRef<Ref>(&RefsStorage[RefsStorage.size() - SymRefs.size()],
360                               SymRefs.size()));
361     }
362   }
363 
364   std::vector<Relation> AllRelations;
365   for (const auto &RelationSlab : RelationSlabs) {
366     for (const auto &R : *RelationSlab)
367       AllRelations.push_back(R);
368   }
369   // Sort relations and remove duplicates that could arise due to
370   // relations being stored in both the shards containing their
371   // subject and object.
372   llvm::sort(AllRelations);
373   AllRelations.erase(std::unique(AllRelations.begin(), AllRelations.end()),
374                      AllRelations.end());
375 
376   size_t StorageSize =
377       RefsStorage.size() * sizeof(Ref) + SymsStorage.size() * sizeof(Symbol);
378   for (const auto &Slab : SymbolSlabs)
379     StorageSize += Slab->bytes();
380   for (const auto &RefSlab : RefSlabs)
381     StorageSize += RefSlab->bytes();
382 
383   // Index must keep the slabs and contiguous ranges alive.
384   switch (Type) {
385   case IndexType::Light:
386     return std::make_unique<MemIndex>(
387         llvm::make_pointee_range(AllSymbols), std::move(AllRefs),
388         std::move(AllRelations), std::move(Files), IdxContents,
389         std::make_tuple(std::move(SymbolSlabs), std::move(RefSlabs),
390                         std::move(RefsStorage), std::move(SymsStorage)),
391         StorageSize);
392   case IndexType::Heavy:
393     return std::make_unique<dex::Dex>(
394         llvm::make_pointee_range(AllSymbols), std::move(AllRefs),
395         std::move(AllRelations), std::move(Files), IdxContents,
396         std::make_tuple(std::move(SymbolSlabs), std::move(RefSlabs),
397                         std::move(RefsStorage), std::move(SymsStorage)),
398         StorageSize, SupportContainedRefs);
399   }
400   llvm_unreachable("Unknown clangd::IndexType");
401 }
402 
403 void FileSymbols::profile(MemoryTree &MT) const {
404   std::lock_guard<std::mutex> Lock(Mutex);
405   for (const auto &SymSlab : SymbolsSnapshot) {
406     MT.detail(SymSlab.first())
407         .child("symbols")
408         .addUsage(SymSlab.second->bytes());
409   }
410   for (const auto &RefSlab : RefsSnapshot) {
411     MT.detail(RefSlab.first())
412         .child("references")
413         .addUsage(RefSlab.second.Slab->bytes());
414   }
415   for (const auto &RelSlab : RelationsSnapshot) {
416     MT.detail(RelSlab.first())
417         .child("relations")
418         .addUsage(RelSlab.second->bytes());
419   }
420 }
421 
422 FileIndex::FileIndex(bool SupportContainedRefs)
423     : MergedIndex(&MainFileIndex, &PreambleIndex),
424       PreambleSymbols(IndexContents::Symbols | IndexContents::Relations,
425                       SupportContainedRefs),
426       PreambleIndex(std::make_unique<MemIndex>()),
427       MainFileSymbols(IndexContents::All, SupportContainedRefs),
428       MainFileIndex(std::make_unique<MemIndex>()) {}
429 
430 void FileIndex::updatePreamble(IndexFileIn IF) {
431   FileShardedIndex ShardedIndex(std::move(IF));
432   for (auto Uri : ShardedIndex.getAllSources()) {
433     auto IF = ShardedIndex.getShard(Uri);
434     // We are using the key received from ShardedIndex, so it should always
435     // exist.
436     assert(IF);
437     PreambleSymbols.update(
438         Uri, std::make_unique<SymbolSlab>(std::move(*IF->Symbols)),
439         std::make_unique<RefSlab>(),
440         std::make_unique<RelationSlab>(std::move(*IF->Relations)),
441         /*CountReferences=*/false);
442   }
443   size_t IndexVersion = 0;
444   auto NewIndex = PreambleSymbols.buildIndex(
445       IndexType::Heavy, DuplicateHandling::PickOne, &IndexVersion);
446   {
447     std::lock_guard<std::mutex> Lock(UpdateIndexMu);
448     if (IndexVersion <= PreambleIndexVersion) {
449       // We lost the race, some other thread built a later version.
450       return;
451     }
452     PreambleIndexVersion = IndexVersion;
453     PreambleIndex.reset(std::move(NewIndex));
454     vlog(
455         "Build dynamic index for header symbols with estimated memory usage of "
456         "{0} bytes",
457         PreambleIndex.estimateMemoryUsage());
458   }
459 }
460 
461 void FileIndex::updatePreamble(PathRef Path, llvm::StringRef Version,
462                                ASTContext &AST, Preprocessor &PP,
463                                const include_cleaner::PragmaIncludes &PI) {
464   IndexFileIn IF;
465   std::tie(IF.Symbols, std::ignore, IF.Relations) =
466       indexHeaderSymbols(Version, AST, PP, PI);
467   updatePreamble(std::move(IF));
468 }
469 
470 void FileIndex::updateMain(PathRef Path, ParsedAST &AST) {
471   auto Contents = indexMainDecls(AST);
472   MainFileSymbols.update(
473       URI::create(Path).toString(),
474       std::make_unique<SymbolSlab>(std::move(std::get<0>(Contents))),
475       std::make_unique<RefSlab>(std::move(std::get<1>(Contents))),
476       std::make_unique<RelationSlab>(std::move(std::get<2>(Contents))),
477       /*CountReferences=*/true);
478   size_t IndexVersion = 0;
479   auto NewIndex = MainFileSymbols.buildIndex(
480       IndexType::Light, DuplicateHandling::Merge, &IndexVersion);
481   {
482     std::lock_guard<std::mutex> Lock(UpdateIndexMu);
483     if (IndexVersion <= MainIndexVersion) {
484       // We lost the race, some other thread built a later version.
485       return;
486     }
487     MainIndexVersion = IndexVersion;
488     MainFileIndex.reset(std::move(NewIndex));
489     vlog(
490         "Build dynamic index for main-file symbols with estimated memory usage "
491         "of {0} bytes",
492         MainFileIndex.estimateMemoryUsage());
493   }
494 }
495 
496 void FileIndex::profile(MemoryTree &MT) const {
497   PreambleSymbols.profile(MT.child("preamble").child("slabs"));
498   MT.child("preamble")
499       .child("index")
500       .addUsage(PreambleIndex.estimateMemoryUsage());
501   MainFileSymbols.profile(MT.child("main_file").child("slabs"));
502   MT.child("main_file")
503       .child("index")
504       .addUsage(MainFileIndex.estimateMemoryUsage());
505 }
506 } // namespace clangd
507 } // namespace clang
508