143356f56SNico Weber //===-- SymbolIndexManager.cpp - Managing multiple SymbolIndices-*- C++ -*-===//
243356f56SNico Weber //
343356f56SNico Weber // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
443356f56SNico Weber // See https://llvm.org/LICENSE.txt for license information.
543356f56SNico Weber // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
643356f56SNico Weber //
743356f56SNico Weber //===----------------------------------------------------------------------===//
843356f56SNico Weber
943356f56SNico Weber #include "SymbolIndexManager.h"
10d4a6513aSNico Weber
11d4a6513aSNico Weber #include <cmath>
12d4a6513aSNico Weber
1343356f56SNico Weber #include "find-all-symbols/SymbolInfo.h"
14cff19273SDanila Kutenin #include "llvm/ADT/STLExtras.h"
1543356f56SNico Weber #include "llvm/ADT/SmallVector.h"
16cff19273SDanila Kutenin #include "llvm/ADT/StringMap.h"
1743356f56SNico Weber #include "llvm/Support/Debug.h"
1843356f56SNico Weber #include "llvm/Support/Path.h"
1943356f56SNico Weber
2043356f56SNico Weber #define DEBUG_TYPE "clang-include-fixer"
2143356f56SNico Weber
2243356f56SNico Weber namespace clang {
2343356f56SNico Weber namespace include_fixer {
2443356f56SNico Weber
2543356f56SNico Weber using find_all_symbols::SymbolInfo;
2643356f56SNico Weber using find_all_symbols::SymbolAndSignals;
2743356f56SNico Weber
2843356f56SNico Weber // Calculate a score based on whether we think the given header is closely
2943356f56SNico Weber // related to the given source file.
similarityScore(llvm::StringRef FileName,llvm::StringRef Header)3043356f56SNico Weber static double similarityScore(llvm::StringRef FileName,
3143356f56SNico Weber llvm::StringRef Header) {
32dd5571d5SKazuaki Ishizaki // Compute the maximum number of common path segments between Header and
3343356f56SNico Weber // a suffix of FileName.
3443356f56SNico Weber // We do not do a full longest common substring computation, as Header
3543356f56SNico Weber // specifies the path we would directly #include, so we assume it is rooted
3643356f56SNico Weber // relatively to a subproject of the repository.
3743356f56SNico Weber int MaxSegments = 1;
3843356f56SNico Weber for (auto FileI = llvm::sys::path::begin(FileName),
3943356f56SNico Weber FileE = llvm::sys::path::end(FileName);
4043356f56SNico Weber FileI != FileE; ++FileI) {
4143356f56SNico Weber int Segments = 0;
4243356f56SNico Weber for (auto HeaderI = llvm::sys::path::begin(Header),
4343356f56SNico Weber HeaderE = llvm::sys::path::end(Header), I = FileI;
4443356f56SNico Weber HeaderI != HeaderE && *I == *HeaderI && I != FileE; ++I, ++HeaderI) {
4543356f56SNico Weber ++Segments;
4643356f56SNico Weber }
4743356f56SNico Weber MaxSegments = std::max(Segments, MaxSegments);
4843356f56SNico Weber }
4943356f56SNico Weber return MaxSegments;
5043356f56SNico Weber }
5143356f56SNico Weber
rank(std::vector<SymbolAndSignals> & Symbols,llvm::StringRef FileName)5243356f56SNico Weber static void rank(std::vector<SymbolAndSignals> &Symbols,
5343356f56SNico Weber llvm::StringRef FileName) {
54cff19273SDanila Kutenin llvm::StringMap<double> Score;
5543356f56SNico Weber for (const auto &Symbol : Symbols) {
5643356f56SNico Weber // Calculate a score from the similarity of the header the symbol is in
5743356f56SNico Weber // with the current file and the popularity of the symbol.
5843356f56SNico Weber double NewScore = similarityScore(FileName, Symbol.Symbol.getFilePath()) *
5943356f56SNico Weber (1.0 + std::log2(1 + Symbol.Signals.Seen));
6043356f56SNico Weber double &S = Score[Symbol.Symbol.getFilePath()];
6143356f56SNico Weber S = std::max(S, NewScore);
6243356f56SNico Weber }
6343356f56SNico Weber // Sort by the gathered scores. Use file name as a tie breaker so we can
6443356f56SNico Weber // deduplicate.
65cd9a5cfdSDmitri Gribenko llvm::sort(Symbols,
6643356f56SNico Weber [&](const SymbolAndSignals &A, const SymbolAndSignals &B) {
6743356f56SNico Weber auto AS = Score[A.Symbol.getFilePath()];
6843356f56SNico Weber auto BS = Score[B.Symbol.getFilePath()];
6943356f56SNico Weber if (AS != BS)
7043356f56SNico Weber return AS > BS;
7143356f56SNico Weber return A.Symbol.getFilePath() < B.Symbol.getFilePath();
7243356f56SNico Weber });
7343356f56SNico Weber }
7443356f56SNico Weber
7543356f56SNico Weber std::vector<find_all_symbols::SymbolInfo>
search(llvm::StringRef Identifier,bool IsNestedSearch,llvm::StringRef FileName) const7643356f56SNico Weber SymbolIndexManager::search(llvm::StringRef Identifier,
7743356f56SNico Weber bool IsNestedSearch,
7843356f56SNico Weber llvm::StringRef FileName) const {
7943356f56SNico Weber // The identifier may be fully qualified, so split it and get all the context
8043356f56SNico Weber // names.
8143356f56SNico Weber llvm::SmallVector<llvm::StringRef, 8> Names;
8243356f56SNico Weber Identifier.split(Names, "::");
8343356f56SNico Weber
8443356f56SNico Weber bool IsFullyQualified = false;
85*732bccb8SKazu Hirata if (Identifier.starts_with("::")) {
8643356f56SNico Weber Names.erase(Names.begin()); // Drop first (empty) element.
8743356f56SNico Weber IsFullyQualified = true;
8843356f56SNico Weber }
8943356f56SNico Weber
9043356f56SNico Weber // As long as we don't find a result keep stripping name parts from the end.
9143356f56SNico Weber // This is to support nested classes which aren't recorded in the database.
9243356f56SNico Weber // Eventually we will either hit a class (namespaces aren't in the database
9343356f56SNico Weber // either) and can report that result.
9443356f56SNico Weber bool TookPrefix = false;
9543356f56SNico Weber std::vector<SymbolAndSignals> MatchedSymbols;
9643356f56SNico Weber do {
9743356f56SNico Weber std::vector<SymbolAndSignals> Symbols;
9843356f56SNico Weber for (const auto &DB : SymbolIndices) {
9943356f56SNico Weber auto Res = DB.get()->search(Names.back());
10043356f56SNico Weber Symbols.insert(Symbols.end(), Res.begin(), Res.end());
10143356f56SNico Weber }
10243356f56SNico Weber
10343356f56SNico Weber LLVM_DEBUG(llvm::dbgs() << "Searching " << Names.back() << "... got "
10443356f56SNico Weber << Symbols.size() << " results...\n");
10543356f56SNico Weber
10643356f56SNico Weber for (auto &SymAndSig : Symbols) {
10743356f56SNico Weber const SymbolInfo &Symbol = SymAndSig.Symbol;
10843356f56SNico Weber // Match the identifier name without qualifier.
10943356f56SNico Weber bool IsMatched = true;
11043356f56SNico Weber auto SymbolContext = Symbol.getContexts().begin();
11143356f56SNico Weber auto IdentiferContext = Names.rbegin() + 1; // Skip identifier name.
11243356f56SNico Weber // Match the remaining context names.
11343356f56SNico Weber while (IdentiferContext != Names.rend() &&
11443356f56SNico Weber SymbolContext != Symbol.getContexts().end()) {
11543356f56SNico Weber if (SymbolContext->second == *IdentiferContext) {
11643356f56SNico Weber ++IdentiferContext;
11743356f56SNico Weber ++SymbolContext;
11843356f56SNico Weber } else if (SymbolContext->first ==
11943356f56SNico Weber find_all_symbols::SymbolInfo::ContextType::EnumDecl) {
12043356f56SNico Weber // Skip non-scoped enum context.
12143356f56SNico Weber ++SymbolContext;
12243356f56SNico Weber } else {
12343356f56SNico Weber IsMatched = false;
12443356f56SNico Weber break;
12543356f56SNico Weber }
12643356f56SNico Weber }
12743356f56SNico Weber
12843356f56SNico Weber // If the name was qualified we only want to add results if we evaluated
12943356f56SNico Weber // all contexts.
13043356f56SNico Weber if (IsFullyQualified)
13143356f56SNico Weber IsMatched &= (SymbolContext == Symbol.getContexts().end());
13243356f56SNico Weber
13343356f56SNico Weber // FIXME: Support full match. At this point, we only find symbols in
13443356f56SNico Weber // database which end with the same contexts with the identifier.
13543356f56SNico Weber if (IsMatched && IdentiferContext == Names.rend()) {
13643356f56SNico Weber // If we're in a situation where we took a prefix but the thing we
13743356f56SNico Weber // found couldn't possibly have a nested member ignore it.
13843356f56SNico Weber if (TookPrefix &&
13943356f56SNico Weber (Symbol.getSymbolKind() == SymbolInfo::SymbolKind::Function ||
14043356f56SNico Weber Symbol.getSymbolKind() == SymbolInfo::SymbolKind::Variable ||
14143356f56SNico Weber Symbol.getSymbolKind() ==
14243356f56SNico Weber SymbolInfo::SymbolKind::EnumConstantDecl ||
14343356f56SNico Weber Symbol.getSymbolKind() == SymbolInfo::SymbolKind::Macro))
14443356f56SNico Weber continue;
14543356f56SNico Weber
14643356f56SNico Weber MatchedSymbols.push_back(std::move(SymAndSig));
14743356f56SNico Weber }
14843356f56SNico Weber }
14943356f56SNico Weber Names.pop_back();
15043356f56SNico Weber TookPrefix = true;
15143356f56SNico Weber } while (MatchedSymbols.empty() && !Names.empty() && IsNestedSearch);
15243356f56SNico Weber
15343356f56SNico Weber rank(MatchedSymbols, FileName);
15443356f56SNico Weber // Strip signals, they are no longer needed.
15543356f56SNico Weber std::vector<SymbolInfo> Res;
15608274d7dSSam McCall Res.reserve(MatchedSymbols.size());
15743356f56SNico Weber for (auto &SymAndSig : MatchedSymbols)
15843356f56SNico Weber Res.push_back(std::move(SymAndSig.Symbol));
15943356f56SNico Weber return Res;
16043356f56SNico Weber }
16143356f56SNico Weber
16243356f56SNico Weber } // namespace include_fixer
16343356f56SNico Weber } // namespace clang
164