xref: /llvm-project/clang-tools-extra/clangd/index/Merge.cpp (revision d77cab823fc03f6933c3375baaddaae1477bb1d2)
1 //===--- Merge.cpp -----------------------------------------------*- C++-*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "Merge.h"
10 #include "index/Symbol.h"
11 #include "index/SymbolLocation.h"
12 #include "index/SymbolOrigin.h"
13 #include "support/Trace.h"
14 #include "llvm/ADT/StringRef.h"
15 #include <algorithm>
16 #include <iterator>
17 
18 namespace clang {
19 namespace clangd {
20 
21 namespace {
22 
23 // Returns true if file defining/declaring \p S is covered by \p Index.
24 bool isIndexAuthoritative(const SymbolIndex::IndexedFiles &Index,
25                           const Symbol &S) {
26   // We expect the definition to see the canonical declaration, so it seems to
27   // be enough to check only the definition if it exists.
28   const char *OwningFile =
29       S.Definition ? S.Definition.FileURI : S.CanonicalDeclaration.FileURI;
30   return (Index(OwningFile) & IndexContents::Symbols) != IndexContents::None;
31 }
32 } // namespace
33 
34 bool MergedIndex::fuzzyFind(
35     const FuzzyFindRequest &Req,
36     llvm::function_ref<void(const Symbol &)> Callback) const {
37   // We can't step through both sources in parallel. So:
38   //  1) query all dynamic symbols, slurping results into a slab
39   //  2) query the static symbols, for each one:
40   //    a) if it's not in the dynamic slab, yield it directly
41   //    b) if it's in the dynamic slab, merge it and yield the result
42   //  3) now yield all the dynamic symbols we haven't processed.
43   trace::Span Tracer("MergedIndex fuzzyFind");
44   bool More = false; // We'll be incomplete if either source was.
45   SymbolSlab::Builder DynB;
46   unsigned DynamicCount = 0;
47   unsigned StaticCount = 0;
48   unsigned MergedCount = 0;
49   // Number of results ignored due to staleness.
50   unsigned StaticDropped = 0;
51   More |= Dynamic->fuzzyFind(Req, [&](const Symbol &S) {
52     ++DynamicCount;
53     DynB.insert(S);
54   });
55   SymbolSlab Dyn = std::move(DynB).build();
56 
57   llvm::DenseSet<SymbolID> ReportedDynSymbols;
58   {
59     auto DynamicContainsFile = Dynamic->indexedFiles();
60     More |= Static->fuzzyFind(Req, [&](const Symbol &S) {
61       ++StaticCount;
62       auto DynS = Dyn.find(S.ID);
63       // If symbol also exist in the dynamic index, just merge and report.
64       if (DynS != Dyn.end()) {
65         ++MergedCount;
66         ReportedDynSymbols.insert(S.ID);
67         return Callback(mergeSymbol(*DynS, S));
68       }
69 
70       // Otherwise, if the dynamic index owns the symbol's file, it means static
71       // index is stale just drop the symbol.
72       if (isIndexAuthoritative(DynamicContainsFile, S)) {
73         ++StaticDropped;
74         return;
75       }
76 
77       // If not just report the symbol from static index as is.
78       return Callback(S);
79     });
80   }
81   SPAN_ATTACH(Tracer, "dynamic", DynamicCount);
82   SPAN_ATTACH(Tracer, "static", StaticCount);
83   SPAN_ATTACH(Tracer, "static_dropped", StaticDropped);
84   SPAN_ATTACH(Tracer, "merged", MergedCount);
85   for (const Symbol &S : Dyn)
86     if (!ReportedDynSymbols.count(S.ID))
87       Callback(S);
88   return More;
89 }
90 
91 void MergedIndex::lookup(
92     const LookupRequest &Req,
93     llvm::function_ref<void(const Symbol &)> Callback) const {
94   trace::Span Tracer("MergedIndex lookup");
95   SymbolSlab::Builder B;
96 
97   Dynamic->lookup(Req, [&](const Symbol &S) { B.insert(S); });
98 
99   auto RemainingIDs = Req.IDs;
100   {
101     auto DynamicContainsFile = Dynamic->indexedFiles();
102     Static->lookup(Req, [&](const Symbol &S) {
103       // If we've seen the symbol before, just merge.
104       if (const Symbol *Sym = B.find(S.ID)) {
105         RemainingIDs.erase(S.ID);
106         return Callback(mergeSymbol(*Sym, S));
107       }
108 
109       // If symbol is missing in dynamic index, and dynamic index owns the
110       // symbol's file. Static index is stale, just drop the symbol.
111       if (isIndexAuthoritative(DynamicContainsFile, S))
112         return;
113 
114       // Dynamic index doesn't know about this file, just use the symbol from
115       // static index.
116       RemainingIDs.erase(S.ID);
117       Callback(S);
118     });
119   }
120   for (const auto &ID : RemainingIDs)
121     if (const Symbol *Sym = B.find(ID))
122       Callback(*Sym);
123 }
124 
125 bool MergedIndex::refs(const RefsRequest &Req,
126                        llvm::function_ref<void(const Ref &)> Callback) const {
127   trace::Span Tracer("MergedIndex refs");
128   bool More = false;
129   uint32_t Remaining = Req.Limit.value_or(std::numeric_limits<uint32_t>::max());
130   // We don't want duplicated refs from the static/dynamic indexes,
131   // and we can't reliably deduplicate them because offsets may differ slightly.
132   // We consider the dynamic index authoritative and report all its refs,
133   // and only report static index refs from other files.
134   More |= Dynamic->refs(Req, [&](const Ref &O) {
135     Callback(O);
136     assert(Remaining != 0);
137     --Remaining;
138   });
139   if (Remaining == 0 && More)
140     return More;
141   auto DynamicContainsFile = Dynamic->indexedFiles();
142   // We return less than Req.Limit if static index returns more refs for dirty
143   // files.
144   bool StaticHadMore = Static->refs(Req, [&](const Ref &O) {
145     if ((DynamicContainsFile(O.Location.FileURI) & IndexContents::References) !=
146         IndexContents::None)
147       return; // ignore refs that have been seen from dynamic index.
148     if (Remaining == 0) {
149       More = true;
150       return;
151     }
152     --Remaining;
153     Callback(O);
154   });
155   return More || StaticHadMore;
156 }
157 
158 llvm::unique_function<IndexContents(llvm::StringRef) const>
159 MergedIndex::indexedFiles() const {
160   return [DynamicContainsFile{Dynamic->indexedFiles()},
161           StaticContainsFile{Static->indexedFiles()}](llvm::StringRef FileURI) {
162     return DynamicContainsFile(FileURI) | StaticContainsFile(FileURI);
163   };
164 }
165 
166 void MergedIndex::relations(
167     const RelationsRequest &Req,
168     llvm::function_ref<void(const SymbolID &, const Symbol &)> Callback) const {
169   uint32_t Remaining = Req.Limit.value_or(std::numeric_limits<uint32_t>::max());
170   // Return results from both indexes but avoid duplicates.
171   // We might return stale relations from the static index;
172   // we don't currently have a good way of identifying them.
173   llvm::DenseSet<std::pair<SymbolID, SymbolID>> SeenRelations;
174   Dynamic->relations(Req, [&](const SymbolID &Subject, const Symbol &Object) {
175     Callback(Subject, Object);
176     SeenRelations.insert(std::make_pair(Subject, Object.ID));
177     --Remaining;
178   });
179   if (Remaining == 0)
180     return;
181   Static->relations(Req, [&](const SymbolID &Subject, const Symbol &Object) {
182     if (Remaining > 0 &&
183         !SeenRelations.count(std::make_pair(Subject, Object.ID))) {
184       --Remaining;
185       Callback(Subject, Object);
186     }
187   });
188 }
189 
190 // Returns true if \p L is (strictly) preferred to \p R (e.g. by file paths). If
191 // neither is preferred, this returns false.
192 static bool prefer(const SymbolLocation &L, const SymbolLocation &R) {
193   if (!L)
194     return false;
195   if (!R)
196     return true;
197   auto HasCodeGenSuffix = [](const SymbolLocation &Loc) {
198     constexpr static const char *CodegenSuffixes[] = {".proto"};
199     return llvm::any_of(CodegenSuffixes, [&](llvm::StringRef Suffix) {
200       return llvm::StringRef(Loc.FileURI).ends_with(Suffix);
201     });
202   };
203   return HasCodeGenSuffix(L) && !HasCodeGenSuffix(R);
204 }
205 
206 Symbol mergeSymbol(const Symbol &L, const Symbol &R) {
207   assert(L.ID == R.ID);
208   // We prefer information from TUs that saw the definition.
209   // Classes: this is the def itself. Functions: hopefully the header decl.
210   // If both did (or both didn't), continue to prefer L over R.
211   bool PreferR = R.Definition && !L.Definition;
212   // Merge include headers only if both have definitions or both have no
213   // definition; otherwise, only accumulate references of common includes.
214   assert(L.Definition.FileURI && R.Definition.FileURI);
215   bool MergeIncludes =
216       bool(*L.Definition.FileURI) == bool(*R.Definition.FileURI);
217   Symbol S = PreferR ? R : L;        // The target symbol we're merging into.
218   const Symbol &O = PreferR ? L : R; // The "other" less-preferred symbol.
219 
220   // Only use locations in \p O if it's (strictly) preferred.
221   if (prefer(O.CanonicalDeclaration, S.CanonicalDeclaration))
222     S.CanonicalDeclaration = O.CanonicalDeclaration;
223   if (prefer(O.Definition, S.Definition))
224     S.Definition = O.Definition;
225   S.References += O.References;
226   if (S.Signature == "")
227     S.Signature = O.Signature;
228   if (S.CompletionSnippetSuffix == "")
229     S.CompletionSnippetSuffix = O.CompletionSnippetSuffix;
230   if (S.Documentation == "") {
231     // Don't accept documentation from bare forward class declarations, if there
232     // is a definition and it didn't provide one. S is often an undocumented
233     // class, and O is a non-canonical forward decl preceded by an irrelevant
234     // comment.
235     bool IsClass = S.SymInfo.Kind == index::SymbolKind::Class ||
236                    S.SymInfo.Kind == index::SymbolKind::Struct ||
237                    S.SymInfo.Kind == index::SymbolKind::Union;
238     if (!IsClass || !S.Definition)
239       S.Documentation = O.Documentation;
240   }
241   if (S.ReturnType == "")
242     S.ReturnType = O.ReturnType;
243   if (S.Type == "")
244     S.Type = O.Type;
245   for (const auto &OI : O.IncludeHeaders) {
246     bool Found = false;
247     for (auto &SI : S.IncludeHeaders) {
248       if (SI.IncludeHeader == OI.IncludeHeader) {
249         Found = true;
250         SI.References += OI.References;
251         SI.SupportedDirectives |= OI.SupportedDirectives;
252         break;
253       }
254     }
255     if (!Found && MergeIncludes)
256       S.IncludeHeaders.emplace_back(OI.IncludeHeader, OI.References,
257                                     OI.supportedDirectives());
258   }
259 
260   S.Origin |= O.Origin | SymbolOrigin::Merge;
261   S.Flags |= O.Flags;
262   return S;
263 }
264 
265 } // namespace clangd
266 } // namespace clang
267