xref: /llvm-project/clang-tools-extra/clang-include-fixer/SymbolIndexManager.cpp (revision 732bccb8c1b4ea724919db6ff02b1188e20850e7)
143356f56SNico Weber //===-- SymbolIndexManager.cpp - Managing multiple SymbolIndices-*- C++ -*-===//
243356f56SNico Weber //
343356f56SNico Weber // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
443356f56SNico Weber // See https://llvm.org/LICENSE.txt for license information.
543356f56SNico Weber // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
643356f56SNico Weber //
743356f56SNico Weber //===----------------------------------------------------------------------===//
843356f56SNico Weber 
943356f56SNico Weber #include "SymbolIndexManager.h"
10d4a6513aSNico Weber 
11d4a6513aSNico Weber #include <cmath>
12d4a6513aSNico Weber 
1343356f56SNico Weber #include "find-all-symbols/SymbolInfo.h"
14cff19273SDanila Kutenin #include "llvm/ADT/STLExtras.h"
1543356f56SNico Weber #include "llvm/ADT/SmallVector.h"
16cff19273SDanila Kutenin #include "llvm/ADT/StringMap.h"
1743356f56SNico Weber #include "llvm/Support/Debug.h"
1843356f56SNico Weber #include "llvm/Support/Path.h"
1943356f56SNico Weber 
2043356f56SNico Weber #define DEBUG_TYPE "clang-include-fixer"
2143356f56SNico Weber 
2243356f56SNico Weber namespace clang {
2343356f56SNico Weber namespace include_fixer {
2443356f56SNico Weber 
2543356f56SNico Weber using find_all_symbols::SymbolInfo;
2643356f56SNico Weber using find_all_symbols::SymbolAndSignals;
2743356f56SNico Weber 
2843356f56SNico Weber // Calculate a score based on whether we think the given header is closely
2943356f56SNico Weber // related to the given source file.
similarityScore(llvm::StringRef FileName,llvm::StringRef Header)3043356f56SNico Weber static double similarityScore(llvm::StringRef FileName,
3143356f56SNico Weber                               llvm::StringRef Header) {
32dd5571d5SKazuaki Ishizaki   // Compute the maximum number of common path segments between Header and
3343356f56SNico Weber   // a suffix of FileName.
3443356f56SNico Weber   // We do not do a full longest common substring computation, as Header
3543356f56SNico Weber   // specifies the path we would directly #include, so we assume it is rooted
3643356f56SNico Weber   // relatively to a subproject of the repository.
3743356f56SNico Weber   int MaxSegments = 1;
3843356f56SNico Weber   for (auto FileI = llvm::sys::path::begin(FileName),
3943356f56SNico Weber             FileE = llvm::sys::path::end(FileName);
4043356f56SNico Weber        FileI != FileE; ++FileI) {
4143356f56SNico Weber     int Segments = 0;
4243356f56SNico Weber     for (auto HeaderI = llvm::sys::path::begin(Header),
4343356f56SNico Weber               HeaderE = llvm::sys::path::end(Header), I = FileI;
4443356f56SNico Weber          HeaderI != HeaderE && *I == *HeaderI && I != FileE; ++I, ++HeaderI) {
4543356f56SNico Weber       ++Segments;
4643356f56SNico Weber     }
4743356f56SNico Weber     MaxSegments = std::max(Segments, MaxSegments);
4843356f56SNico Weber   }
4943356f56SNico Weber   return MaxSegments;
5043356f56SNico Weber }
5143356f56SNico Weber 
rank(std::vector<SymbolAndSignals> & Symbols,llvm::StringRef FileName)5243356f56SNico Weber static void rank(std::vector<SymbolAndSignals> &Symbols,
5343356f56SNico Weber                  llvm::StringRef FileName) {
54cff19273SDanila Kutenin   llvm::StringMap<double> Score;
5543356f56SNico Weber   for (const auto &Symbol : Symbols) {
5643356f56SNico Weber     // Calculate a score from the similarity of the header the symbol is in
5743356f56SNico Weber     // with the current file and the popularity of the symbol.
5843356f56SNico Weber     double NewScore = similarityScore(FileName, Symbol.Symbol.getFilePath()) *
5943356f56SNico Weber                       (1.0 + std::log2(1 + Symbol.Signals.Seen));
6043356f56SNico Weber     double &S = Score[Symbol.Symbol.getFilePath()];
6143356f56SNico Weber     S = std::max(S, NewScore);
6243356f56SNico Weber   }
6343356f56SNico Weber   // Sort by the gathered scores. Use file name as a tie breaker so we can
6443356f56SNico Weber   // deduplicate.
65cd9a5cfdSDmitri Gribenko   llvm::sort(Symbols,
6643356f56SNico Weber              [&](const SymbolAndSignals &A, const SymbolAndSignals &B) {
6743356f56SNico Weber                auto AS = Score[A.Symbol.getFilePath()];
6843356f56SNico Weber                auto BS = Score[B.Symbol.getFilePath()];
6943356f56SNico Weber                if (AS != BS)
7043356f56SNico Weber                  return AS > BS;
7143356f56SNico Weber                return A.Symbol.getFilePath() < B.Symbol.getFilePath();
7243356f56SNico Weber              });
7343356f56SNico Weber }
7443356f56SNico Weber 
7543356f56SNico Weber std::vector<find_all_symbols::SymbolInfo>
search(llvm::StringRef Identifier,bool IsNestedSearch,llvm::StringRef FileName) const7643356f56SNico Weber SymbolIndexManager::search(llvm::StringRef Identifier,
7743356f56SNico Weber                            bool IsNestedSearch,
7843356f56SNico Weber                            llvm::StringRef FileName) const {
7943356f56SNico Weber   // The identifier may be fully qualified, so split it and get all the context
8043356f56SNico Weber   // names.
8143356f56SNico Weber   llvm::SmallVector<llvm::StringRef, 8> Names;
8243356f56SNico Weber   Identifier.split(Names, "::");
8343356f56SNico Weber 
8443356f56SNico Weber   bool IsFullyQualified = false;
85*732bccb8SKazu Hirata   if (Identifier.starts_with("::")) {
8643356f56SNico Weber     Names.erase(Names.begin()); // Drop first (empty) element.
8743356f56SNico Weber     IsFullyQualified = true;
8843356f56SNico Weber   }
8943356f56SNico Weber 
9043356f56SNico Weber   // As long as we don't find a result keep stripping name parts from the end.
9143356f56SNico Weber   // This is to support nested classes which aren't recorded in the database.
9243356f56SNico Weber   // Eventually we will either hit a class (namespaces aren't in the database
9343356f56SNico Weber   // either) and can report that result.
9443356f56SNico Weber   bool TookPrefix = false;
9543356f56SNico Weber   std::vector<SymbolAndSignals> MatchedSymbols;
9643356f56SNico Weber   do {
9743356f56SNico Weber     std::vector<SymbolAndSignals> Symbols;
9843356f56SNico Weber     for (const auto &DB : SymbolIndices) {
9943356f56SNico Weber       auto Res = DB.get()->search(Names.back());
10043356f56SNico Weber       Symbols.insert(Symbols.end(), Res.begin(), Res.end());
10143356f56SNico Weber     }
10243356f56SNico Weber 
10343356f56SNico Weber     LLVM_DEBUG(llvm::dbgs() << "Searching " << Names.back() << "... got "
10443356f56SNico Weber                             << Symbols.size() << " results...\n");
10543356f56SNico Weber 
10643356f56SNico Weber     for (auto &SymAndSig : Symbols) {
10743356f56SNico Weber       const SymbolInfo &Symbol = SymAndSig.Symbol;
10843356f56SNico Weber       // Match the identifier name without qualifier.
10943356f56SNico Weber       bool IsMatched = true;
11043356f56SNico Weber       auto SymbolContext = Symbol.getContexts().begin();
11143356f56SNico Weber       auto IdentiferContext = Names.rbegin() + 1; // Skip identifier name.
11243356f56SNico Weber       // Match the remaining context names.
11343356f56SNico Weber       while (IdentiferContext != Names.rend() &&
11443356f56SNico Weber              SymbolContext != Symbol.getContexts().end()) {
11543356f56SNico Weber         if (SymbolContext->second == *IdentiferContext) {
11643356f56SNico Weber           ++IdentiferContext;
11743356f56SNico Weber           ++SymbolContext;
11843356f56SNico Weber         } else if (SymbolContext->first ==
11943356f56SNico Weber                    find_all_symbols::SymbolInfo::ContextType::EnumDecl) {
12043356f56SNico Weber           // Skip non-scoped enum context.
12143356f56SNico Weber           ++SymbolContext;
12243356f56SNico Weber         } else {
12343356f56SNico Weber           IsMatched = false;
12443356f56SNico Weber           break;
12543356f56SNico Weber         }
12643356f56SNico Weber       }
12743356f56SNico Weber 
12843356f56SNico Weber       // If the name was qualified we only want to add results if we evaluated
12943356f56SNico Weber       // all contexts.
13043356f56SNico Weber       if (IsFullyQualified)
13143356f56SNico Weber         IsMatched &= (SymbolContext == Symbol.getContexts().end());
13243356f56SNico Weber 
13343356f56SNico Weber       // FIXME: Support full match. At this point, we only find symbols in
13443356f56SNico Weber       // database which end with the same contexts with the identifier.
13543356f56SNico Weber       if (IsMatched && IdentiferContext == Names.rend()) {
13643356f56SNico Weber         // If we're in a situation where we took a prefix but the thing we
13743356f56SNico Weber         // found couldn't possibly have a nested member ignore it.
13843356f56SNico Weber         if (TookPrefix &&
13943356f56SNico Weber             (Symbol.getSymbolKind() == SymbolInfo::SymbolKind::Function ||
14043356f56SNico Weber              Symbol.getSymbolKind() == SymbolInfo::SymbolKind::Variable ||
14143356f56SNico Weber              Symbol.getSymbolKind() ==
14243356f56SNico Weber                  SymbolInfo::SymbolKind::EnumConstantDecl ||
14343356f56SNico Weber              Symbol.getSymbolKind() == SymbolInfo::SymbolKind::Macro))
14443356f56SNico Weber           continue;
14543356f56SNico Weber 
14643356f56SNico Weber         MatchedSymbols.push_back(std::move(SymAndSig));
14743356f56SNico Weber       }
14843356f56SNico Weber     }
14943356f56SNico Weber     Names.pop_back();
15043356f56SNico Weber     TookPrefix = true;
15143356f56SNico Weber   } while (MatchedSymbols.empty() && !Names.empty() && IsNestedSearch);
15243356f56SNico Weber 
15343356f56SNico Weber   rank(MatchedSymbols, FileName);
15443356f56SNico Weber   // Strip signals, they are no longer needed.
15543356f56SNico Weber   std::vector<SymbolInfo> Res;
15608274d7dSSam McCall   Res.reserve(MatchedSymbols.size());
15743356f56SNico Weber   for (auto &SymAndSig : MatchedSymbols)
15843356f56SNico Weber     Res.push_back(std::move(SymAndSig.Symbol));
15943356f56SNico Weber   return Res;
16043356f56SNico Weber }
16143356f56SNico Weber 
16243356f56SNico Weber } // namespace include_fixer
16343356f56SNico Weber } // namespace clang
164