xref: /llvm-project/clang-tools-extra/clangd/index/Merge.cpp (revision 61fe67a4017375fd675f75652e857e837f77fa51)
1 //===--- Merge.cpp -----------------------------------------------*- C++-*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "Merge.h"
10 #include "index/Symbol.h"
11 #include "index/SymbolLocation.h"
12 #include "index/SymbolOrigin.h"
13 #include "support/Trace.h"
14 #include "llvm/ADT/StringRef.h"
15 #include <algorithm>
16 #include <iterator>
17 
18 namespace clang {
19 namespace clangd {
20 
21 namespace {
22 
23 // Returns true if file defining/declaring \p S is covered by \p Index.
24 bool isIndexAuthoritative(const SymbolIndex::IndexedFiles &Index,
25                           const Symbol &S) {
26   // We expect the definition to see the canonical declaration, so it seems to
27   // be enough to check only the definition if it exists.
28   const char *OwningFile =
29       S.Definition ? S.Definition.FileURI : S.CanonicalDeclaration.FileURI;
30   return (Index(OwningFile) & IndexContents::Symbols) != IndexContents::None;
31 }
32 } // namespace
33 
34 bool MergedIndex::fuzzyFind(
35     const FuzzyFindRequest &Req,
36     llvm::function_ref<void(const Symbol &)> Callback) const {
37   // We can't step through both sources in parallel. So:
38   //  1) query all dynamic symbols, slurping results into a slab
39   //  2) query the static symbols, for each one:
40   //    a) if it's not in the dynamic slab, yield it directly
41   //    b) if it's in the dynamic slab, merge it and yield the result
42   //  3) now yield all the dynamic symbols we haven't processed.
43   trace::Span Tracer("MergedIndex fuzzyFind");
44   bool More = false; // We'll be incomplete if either source was.
45   SymbolSlab::Builder DynB;
46   unsigned DynamicCount = 0;
47   unsigned StaticCount = 0;
48   unsigned MergedCount = 0;
49   // Number of results ignored due to staleness.
50   unsigned StaticDropped = 0;
51   More |= Dynamic->fuzzyFind(Req, [&](const Symbol &S) {
52     ++DynamicCount;
53     DynB.insert(S);
54   });
55   SymbolSlab Dyn = std::move(DynB).build();
56 
57   llvm::DenseSet<SymbolID> ReportedDynSymbols;
58   {
59     auto DynamicContainsFile = Dynamic->indexedFiles();
60     More |= Static->fuzzyFind(Req, [&](const Symbol &S) {
61       ++StaticCount;
62       auto DynS = Dyn.find(S.ID);
63       // If symbol also exist in the dynamic index, just merge and report.
64       if (DynS != Dyn.end()) {
65         ++MergedCount;
66         ReportedDynSymbols.insert(S.ID);
67         return Callback(mergeSymbol(*DynS, S));
68       }
69 
70       // Otherwise, if the dynamic index owns the symbol's file, it means static
71       // index is stale just drop the symbol.
72       if (isIndexAuthoritative(DynamicContainsFile, S)) {
73         ++StaticDropped;
74         return;
75       }
76 
77       // If not just report the symbol from static index as is.
78       return Callback(S);
79     });
80   }
81   SPAN_ATTACH(Tracer, "dynamic", DynamicCount);
82   SPAN_ATTACH(Tracer, "static", StaticCount);
83   SPAN_ATTACH(Tracer, "static_dropped", StaticDropped);
84   SPAN_ATTACH(Tracer, "merged", MergedCount);
85   for (const Symbol &S : Dyn)
86     if (!ReportedDynSymbols.count(S.ID))
87       Callback(S);
88   return More;
89 }
90 
91 void MergedIndex::lookup(
92     const LookupRequest &Req,
93     llvm::function_ref<void(const Symbol &)> Callback) const {
94   trace::Span Tracer("MergedIndex lookup");
95   SymbolSlab::Builder B;
96 
97   Dynamic->lookup(Req, [&](const Symbol &S) { B.insert(S); });
98 
99   auto RemainingIDs = Req.IDs;
100   {
101     auto DynamicContainsFile = Dynamic->indexedFiles();
102     Static->lookup(Req, [&](const Symbol &S) {
103       // If we've seen the symbol before, just merge.
104       if (const Symbol *Sym = B.find(S.ID)) {
105         RemainingIDs.erase(S.ID);
106         return Callback(mergeSymbol(*Sym, S));
107       }
108 
109       // If symbol is missing in dynamic index, and dynamic index owns the
110       // symbol's file. Static index is stale, just drop the symbol.
111       if (isIndexAuthoritative(DynamicContainsFile, S))
112         return;
113 
114       // Dynamic index doesn't know about this file, just use the symbol from
115       // static index.
116       RemainingIDs.erase(S.ID);
117       Callback(S);
118     });
119   }
120   for (const auto &ID : RemainingIDs)
121     if (const Symbol *Sym = B.find(ID))
122       Callback(*Sym);
123 }
124 
125 bool MergedIndex::refs(const RefsRequest &Req,
126                        llvm::function_ref<void(const Ref &)> Callback) const {
127   trace::Span Tracer("MergedIndex refs");
128   bool More = false;
129   uint32_t Remaining = Req.Limit.value_or(std::numeric_limits<uint32_t>::max());
130   // We don't want duplicated refs from the static/dynamic indexes,
131   // and we can't reliably deduplicate them because offsets may differ slightly.
132   // We consider the dynamic index authoritative and report all its refs,
133   // and only report static index refs from other files.
134   More |= Dynamic->refs(Req, [&](const Ref &O) {
135     Callback(O);
136     assert(Remaining != 0);
137     --Remaining;
138   });
139   if (Remaining == 0 && More)
140     return More;
141   auto DynamicContainsFile = Dynamic->indexedFiles();
142   // We return less than Req.Limit if static index returns more refs for dirty
143   // files.
144   bool StaticHadMore = Static->refs(Req, [&](const Ref &O) {
145     if ((DynamicContainsFile(O.Location.FileURI) & IndexContents::References) !=
146         IndexContents::None)
147       return; // ignore refs that have been seen from dynamic index.
148     if (Remaining == 0) {
149       More = true;
150       return;
151     }
152     --Remaining;
153     Callback(O);
154   });
155   return More || StaticHadMore;
156 }
157 
158 bool MergedIndex::containedRefs(
159     const ContainedRefsRequest &Req,
160     llvm::function_ref<void(const ContainedRefsResult &)> Callback) const {
161   trace::Span Tracer("MergedIndex refersTo");
162   bool More = false;
163   uint32_t Remaining = Req.Limit.value_or(std::numeric_limits<uint32_t>::max());
164   // We don't want duplicated refs from the static/dynamic indexes,
165   // and we can't reliably deduplicate them because offsets may differ slightly.
166   // We consider the dynamic index authoritative and report all its refs,
167   // and only report static index refs from other files.
168   More |= Dynamic->containedRefs(Req, [&](const auto &O) {
169     Callback(O);
170     assert(Remaining != 0);
171     --Remaining;
172   });
173   if (Remaining == 0 && More)
174     return More;
175   auto DynamicContainsFile = Dynamic->indexedFiles();
176   // We return less than Req.Limit if static index returns more refs for dirty
177   // files.
178   bool StaticHadMore = Static->containedRefs(Req, [&](const auto &O) {
179     if ((DynamicContainsFile(O.Location.FileURI) & IndexContents::References) !=
180         IndexContents::None)
181       return; // ignore refs that have been seen from dynamic index.
182     if (Remaining == 0) {
183       More = true;
184       return;
185     }
186     --Remaining;
187     Callback(O);
188   });
189   return More || StaticHadMore;
190 }
191 
192 llvm::unique_function<IndexContents(llvm::StringRef) const>
193 MergedIndex::indexedFiles() const {
194   return [DynamicContainsFile{Dynamic->indexedFiles()},
195           StaticContainsFile{Static->indexedFiles()}](llvm::StringRef FileURI) {
196     return DynamicContainsFile(FileURI) | StaticContainsFile(FileURI);
197   };
198 }
199 
200 void MergedIndex::relations(
201     const RelationsRequest &Req,
202     llvm::function_ref<void(const SymbolID &, const Symbol &)> Callback) const {
203   uint32_t Remaining = Req.Limit.value_or(std::numeric_limits<uint32_t>::max());
204   // Return results from both indexes but avoid duplicates.
205   // We might return stale relations from the static index;
206   // we don't currently have a good way of identifying them.
207   llvm::DenseSet<std::pair<SymbolID, SymbolID>> SeenRelations;
208   Dynamic->relations(Req, [&](const SymbolID &Subject, const Symbol &Object) {
209     Callback(Subject, Object);
210     SeenRelations.insert(std::make_pair(Subject, Object.ID));
211     --Remaining;
212   });
213   if (Remaining == 0)
214     return;
215   Static->relations(Req, [&](const SymbolID &Subject, const Symbol &Object) {
216     if (Remaining > 0 &&
217         !SeenRelations.count(std::make_pair(Subject, Object.ID))) {
218       --Remaining;
219       Callback(Subject, Object);
220     }
221   });
222 }
223 
224 // Returns true if \p L is (strictly) preferred to \p R (e.g. by file paths). If
225 // neither is preferred, this returns false.
226 static bool prefer(const SymbolLocation &L, const SymbolLocation &R) {
227   if (!L)
228     return false;
229   if (!R)
230     return true;
231   auto HasCodeGenSuffix = [](const SymbolLocation &Loc) {
232     constexpr static const char *CodegenSuffixes[] = {".proto"};
233     return llvm::any_of(CodegenSuffixes, [&](llvm::StringRef Suffix) {
234       return llvm::StringRef(Loc.FileURI).ends_with(Suffix);
235     });
236   };
237   return HasCodeGenSuffix(L) && !HasCodeGenSuffix(R);
238 }
239 
240 Symbol mergeSymbol(const Symbol &L, const Symbol &R) {
241   assert(L.ID == R.ID);
242   // We prefer information from TUs that saw the definition.
243   // Classes: this is the def itself. Functions: hopefully the header decl.
244   // If both did (or both didn't), continue to prefer L over R.
245   bool PreferR = R.Definition && !L.Definition;
246   // Merge include headers only if both have definitions or both have no
247   // definition; otherwise, only accumulate references of common includes.
248   assert(L.Definition.FileURI && R.Definition.FileURI);
249   bool MergeIncludes =
250       bool(*L.Definition.FileURI) == bool(*R.Definition.FileURI);
251   Symbol S = PreferR ? R : L;        // The target symbol we're merging into.
252   const Symbol &O = PreferR ? L : R; // The "other" less-preferred symbol.
253 
254   // Only use locations in \p O if it's (strictly) preferred.
255   if (prefer(O.CanonicalDeclaration, S.CanonicalDeclaration))
256     S.CanonicalDeclaration = O.CanonicalDeclaration;
257   if (prefer(O.Definition, S.Definition))
258     S.Definition = O.Definition;
259   S.References += O.References;
260   if (S.Signature == "")
261     S.Signature = O.Signature;
262   if (S.CompletionSnippetSuffix == "")
263     S.CompletionSnippetSuffix = O.CompletionSnippetSuffix;
264   if (S.Documentation == "") {
265     // Don't accept documentation from bare forward class declarations, if there
266     // is a definition and it didn't provide one. S is often an undocumented
267     // class, and O is a non-canonical forward decl preceded by an irrelevant
268     // comment.
269     bool IsClass = S.SymInfo.Kind == index::SymbolKind::Class ||
270                    S.SymInfo.Kind == index::SymbolKind::Struct ||
271                    S.SymInfo.Kind == index::SymbolKind::Union;
272     if (!IsClass || !S.Definition)
273       S.Documentation = O.Documentation;
274   }
275   if (S.ReturnType == "")
276     S.ReturnType = O.ReturnType;
277   if (S.Type == "")
278     S.Type = O.Type;
279   for (const auto &OI : O.IncludeHeaders) {
280     bool Found = false;
281     for (auto &SI : S.IncludeHeaders) {
282       if (SI.IncludeHeader == OI.IncludeHeader) {
283         Found = true;
284         SI.References += OI.References;
285         SI.SupportedDirectives |= OI.SupportedDirectives;
286         break;
287       }
288     }
289     if (!Found && MergeIncludes)
290       S.IncludeHeaders.emplace_back(OI.IncludeHeader, OI.References,
291                                     OI.supportedDirectives());
292   }
293 
294   S.Origin |= O.Origin | SymbolOrigin::Merge;
295   S.Flags |= O.Flags;
296   return S;
297 }
298 
299 } // namespace clangd
300 } // namespace clang
301