xref: /llvm-project/clang-tools-extra/clang-include-fixer/SymbolIndexManager.cpp (revision 732bccb8c1b4ea724919db6ff02b1188e20850e7)
1 //===-- SymbolIndexManager.cpp - Managing multiple SymbolIndices-*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "SymbolIndexManager.h"
10 
11 #include <cmath>
12 
13 #include "find-all-symbols/SymbolInfo.h"
14 #include "llvm/ADT/STLExtras.h"
15 #include "llvm/ADT/SmallVector.h"
16 #include "llvm/ADT/StringMap.h"
17 #include "llvm/Support/Debug.h"
18 #include "llvm/Support/Path.h"
19 
20 #define DEBUG_TYPE "clang-include-fixer"
21 
22 namespace clang {
23 namespace include_fixer {
24 
25 using find_all_symbols::SymbolInfo;
26 using find_all_symbols::SymbolAndSignals;
27 
28 // Calculate a score based on whether we think the given header is closely
29 // related to the given source file.
similarityScore(llvm::StringRef FileName,llvm::StringRef Header)30 static double similarityScore(llvm::StringRef FileName,
31                               llvm::StringRef Header) {
32   // Compute the maximum number of common path segments between Header and
33   // a suffix of FileName.
34   // We do not do a full longest common substring computation, as Header
35   // specifies the path we would directly #include, so we assume it is rooted
36   // relatively to a subproject of the repository.
37   int MaxSegments = 1;
38   for (auto FileI = llvm::sys::path::begin(FileName),
39             FileE = llvm::sys::path::end(FileName);
40        FileI != FileE; ++FileI) {
41     int Segments = 0;
42     for (auto HeaderI = llvm::sys::path::begin(Header),
43               HeaderE = llvm::sys::path::end(Header), I = FileI;
44          HeaderI != HeaderE && *I == *HeaderI && I != FileE; ++I, ++HeaderI) {
45       ++Segments;
46     }
47     MaxSegments = std::max(Segments, MaxSegments);
48   }
49   return MaxSegments;
50 }
51 
rank(std::vector<SymbolAndSignals> & Symbols,llvm::StringRef FileName)52 static void rank(std::vector<SymbolAndSignals> &Symbols,
53                  llvm::StringRef FileName) {
54   llvm::StringMap<double> Score;
55   for (const auto &Symbol : Symbols) {
56     // Calculate a score from the similarity of the header the symbol is in
57     // with the current file and the popularity of the symbol.
58     double NewScore = similarityScore(FileName, Symbol.Symbol.getFilePath()) *
59                       (1.0 + std::log2(1 + Symbol.Signals.Seen));
60     double &S = Score[Symbol.Symbol.getFilePath()];
61     S = std::max(S, NewScore);
62   }
63   // Sort by the gathered scores. Use file name as a tie breaker so we can
64   // deduplicate.
65   llvm::sort(Symbols,
66              [&](const SymbolAndSignals &A, const SymbolAndSignals &B) {
67                auto AS = Score[A.Symbol.getFilePath()];
68                auto BS = Score[B.Symbol.getFilePath()];
69                if (AS != BS)
70                  return AS > BS;
71                return A.Symbol.getFilePath() < B.Symbol.getFilePath();
72              });
73 }
74 
75 std::vector<find_all_symbols::SymbolInfo>
search(llvm::StringRef Identifier,bool IsNestedSearch,llvm::StringRef FileName) const76 SymbolIndexManager::search(llvm::StringRef Identifier,
77                            bool IsNestedSearch,
78                            llvm::StringRef FileName) const {
79   // The identifier may be fully qualified, so split it and get all the context
80   // names.
81   llvm::SmallVector<llvm::StringRef, 8> Names;
82   Identifier.split(Names, "::");
83 
84   bool IsFullyQualified = false;
85   if (Identifier.starts_with("::")) {
86     Names.erase(Names.begin()); // Drop first (empty) element.
87     IsFullyQualified = true;
88   }
89 
90   // As long as we don't find a result keep stripping name parts from the end.
91   // This is to support nested classes which aren't recorded in the database.
92   // Eventually we will either hit a class (namespaces aren't in the database
93   // either) and can report that result.
94   bool TookPrefix = false;
95   std::vector<SymbolAndSignals> MatchedSymbols;
96   do {
97     std::vector<SymbolAndSignals> Symbols;
98     for (const auto &DB : SymbolIndices) {
99       auto Res = DB.get()->search(Names.back());
100       Symbols.insert(Symbols.end(), Res.begin(), Res.end());
101     }
102 
103     LLVM_DEBUG(llvm::dbgs() << "Searching " << Names.back() << "... got "
104                             << Symbols.size() << " results...\n");
105 
106     for (auto &SymAndSig : Symbols) {
107       const SymbolInfo &Symbol = SymAndSig.Symbol;
108       // Match the identifier name without qualifier.
109       bool IsMatched = true;
110       auto SymbolContext = Symbol.getContexts().begin();
111       auto IdentiferContext = Names.rbegin() + 1; // Skip identifier name.
112       // Match the remaining context names.
113       while (IdentiferContext != Names.rend() &&
114              SymbolContext != Symbol.getContexts().end()) {
115         if (SymbolContext->second == *IdentiferContext) {
116           ++IdentiferContext;
117           ++SymbolContext;
118         } else if (SymbolContext->first ==
119                    find_all_symbols::SymbolInfo::ContextType::EnumDecl) {
120           // Skip non-scoped enum context.
121           ++SymbolContext;
122         } else {
123           IsMatched = false;
124           break;
125         }
126       }
127 
128       // If the name was qualified we only want to add results if we evaluated
129       // all contexts.
130       if (IsFullyQualified)
131         IsMatched &= (SymbolContext == Symbol.getContexts().end());
132 
133       // FIXME: Support full match. At this point, we only find symbols in
134       // database which end with the same contexts with the identifier.
135       if (IsMatched && IdentiferContext == Names.rend()) {
136         // If we're in a situation where we took a prefix but the thing we
137         // found couldn't possibly have a nested member ignore it.
138         if (TookPrefix &&
139             (Symbol.getSymbolKind() == SymbolInfo::SymbolKind::Function ||
140              Symbol.getSymbolKind() == SymbolInfo::SymbolKind::Variable ||
141              Symbol.getSymbolKind() ==
142                  SymbolInfo::SymbolKind::EnumConstantDecl ||
143              Symbol.getSymbolKind() == SymbolInfo::SymbolKind::Macro))
144           continue;
145 
146         MatchedSymbols.push_back(std::move(SymAndSig));
147       }
148     }
149     Names.pop_back();
150     TookPrefix = true;
151   } while (MatchedSymbols.empty() && !Names.empty() && IsNestedSearch);
152 
153   rank(MatchedSymbols, FileName);
154   // Strip signals, they are no longer needed.
155   std::vector<SymbolInfo> Res;
156   Res.reserve(MatchedSymbols.size());
157   for (auto &SymAndSig : MatchedSymbols)
158     Res.push_back(std::move(SymAndSig.Symbol));
159   return Res;
160 }
161 
162 } // namespace include_fixer
163 } // namespace clang
164