1ea9773acSSam McCall //===- InterpolatingCompilationDatabase.cpp ---------------------*- C++ -*-===//
2ea9773acSSam McCall //
32946cd70SChandler Carruth // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
42946cd70SChandler Carruth // See https://llvm.org/LICENSE.txt for license information.
52946cd70SChandler Carruth // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6ea9773acSSam McCall //
7ea9773acSSam McCall //===----------------------------------------------------------------------===//
8ea9773acSSam McCall //
9ea9773acSSam McCall // InterpolatingCompilationDatabase wraps another CompilationDatabase and
10ea9773acSSam McCall // attempts to heuristically determine appropriate compile commands for files
11ea9773acSSam McCall // that are not included, such as headers or newly created files.
12ea9773acSSam McCall //
13ea9773acSSam McCall // Motivating cases include:
14ea9773acSSam McCall // Header files that live next to their implementation files. These typically
15ea9773acSSam McCall // share a base filename. (libclang/CXString.h, libclang/CXString.cpp).
16ea9773acSSam McCall // Some projects separate headers from includes. Filenames still typically
17ea9773acSSam McCall // match, maybe other path segments too. (include/llvm/IR/Use.h, lib/IR/Use.cc).
18ea9773acSSam McCall // Matches are sometimes only approximate (Sema.h, SemaDecl.cpp). This goes
19ea9773acSSam McCall // for directories too (Support/Unix/Process.inc, lib/Support/Process.cpp).
20ea9773acSSam McCall // Even if we can't find a "right" compile command, even a random one from
21ea9773acSSam McCall // the project will tend to get important flags like -I and -x right.
22ea9773acSSam McCall //
23ea9773acSSam McCall // We "borrow" the compile command for the closest available file:
24ea9773acSSam McCall // - points are awarded if the filename matches (ignoring extension)
25ea9773acSSam McCall // - points are awarded if the directory structure matches
26ea9773acSSam McCall // - ties are broken by length of path prefix match
27ea9773acSSam McCall //
28ea9773acSSam McCall // The compile command is adjusted, replacing the filename and removing output
29ea9773acSSam McCall // file arguments. The -x and -std flags may be affected too.
30ea9773acSSam McCall //
31ea9773acSSam McCall // Source language is a tricky issue: is it OK to use a .c file's command
32ea9773acSSam McCall // for building a .cc file? What language is a .h file in?
33ea9773acSSam McCall // - We only consider compile commands for c-family languages as candidates.
34ea9773acSSam McCall // - For files whose language is implied by the filename (e.g. .m, .hpp)
35ea9773acSSam McCall // we prefer candidates from the same language.
36ea9773acSSam McCall // If we must cross languages, we drop any -x and -std flags.
37ea9773acSSam McCall // - For .h files, candidates from any c-family language are acceptable.
38ea9773acSSam McCall // We use the candidate's language, inserting e.g. -x c++-header.
39ea9773acSSam McCall //
40ea9773acSSam McCall // This class is only useful when wrapping databases that can enumerate all
41ea9773acSSam McCall // their compile commands. If getAllFilenames() is empty, no inference occurs.
42ea9773acSSam McCall //
43ea9773acSSam McCall //===----------------------------------------------------------------------===//
44ea9773acSSam McCall
4509d890d7SRainer Orth #include "clang/Basic/LangStandard.h"
46ce90b60bSKadir Cetinkaya #include "clang/Driver/Driver.h"
47ea9773acSSam McCall #include "clang/Driver/Options.h"
48ea9773acSSam McCall #include "clang/Driver/Types.h"
49ea9773acSSam McCall #include "clang/Tooling/CompilationDatabase.h"
50ce90b60bSKadir Cetinkaya #include "llvm/ADT/ArrayRef.h"
51ea9773acSSam McCall #include "llvm/ADT/DenseMap.h"
52ea9773acSSam McCall #include "llvm/ADT/StringExtras.h"
53ea9773acSSam McCall #include "llvm/Option/ArgList.h"
54ea9773acSSam McCall #include "llvm/Option/OptTable.h"
55ea9773acSSam McCall #include "llvm/Support/Debug.h"
56ea9773acSSam McCall #include "llvm/Support/Path.h"
57ea9773acSSam McCall #include "llvm/Support/StringSaver.h"
58ea9773acSSam McCall #include "llvm/Support/raw_ostream.h"
59ea9773acSSam McCall #include <memory>
60a1580d7bSKazu Hirata #include <optional>
61ea9773acSSam McCall
62ea9773acSSam McCall namespace clang {
63ea9773acSSam McCall namespace tooling {
64ea9773acSSam McCall namespace {
65ea9773acSSam McCall using namespace llvm;
66ea9773acSSam McCall namespace types = clang::driver::types;
67ea9773acSSam McCall namespace path = llvm::sys::path;
68ea9773acSSam McCall
69ea9773acSSam McCall // The length of the prefix these two strings have in common.
matchingPrefix(StringRef L,StringRef R)70ea9773acSSam McCall size_t matchingPrefix(StringRef L, StringRef R) {
71ea9773acSSam McCall size_t Limit = std::min(L.size(), R.size());
72ea9773acSSam McCall for (size_t I = 0; I < Limit; ++I)
73ea9773acSSam McCall if (L[I] != R[I])
74ea9773acSSam McCall return I;
75ea9773acSSam McCall return Limit;
76ea9773acSSam McCall }
77ea9773acSSam McCall
78ea9773acSSam McCall // A comparator for searching SubstringWithIndexes with std::equal_range etc.
79ea9773acSSam McCall // Optionaly prefix semantics: compares equal if the key is a prefix.
80ea9773acSSam McCall template <bool Prefix> struct Less {
operator ()clang::tooling::__anonba67d0cc0111::Less81ea9773acSSam McCall bool operator()(StringRef Key, std::pair<StringRef, size_t> Value) const {
82ea9773acSSam McCall StringRef V = Prefix ? Value.first.substr(0, Key.size()) : Value.first;
83ea9773acSSam McCall return Key < V;
84ea9773acSSam McCall }
operator ()clang::tooling::__anonba67d0cc0111::Less85ea9773acSSam McCall bool operator()(std::pair<StringRef, size_t> Value, StringRef Key) const {
86ea9773acSSam McCall StringRef V = Prefix ? Value.first.substr(0, Key.size()) : Value.first;
87ea9773acSSam McCall return V < Key;
88ea9773acSSam McCall }
89ea9773acSSam McCall };
90ea9773acSSam McCall
91ea9773acSSam McCall // Infer type from filename. If we might have gotten it wrong, set *Certain.
92ea9773acSSam McCall // *.h will be inferred as a C header, but not certain.
guessType(StringRef Filename,bool * Certain=nullptr)93ea9773acSSam McCall types::ID guessType(StringRef Filename, bool *Certain = nullptr) {
94ea9773acSSam McCall // path::extension is ".cpp", lookupTypeForExtension wants "cpp".
95ea9773acSSam McCall auto Lang =
96ea9773acSSam McCall types::lookupTypeForExtension(path::extension(Filename).substr(1));
97ea9773acSSam McCall if (Certain)
98ea9773acSSam McCall *Certain = Lang != types::TY_CHeader && Lang != types::TY_INVALID;
99ea9773acSSam McCall return Lang;
100ea9773acSSam McCall }
101ea9773acSSam McCall
102ea9773acSSam McCall // Return Lang as one of the canonical supported types.
103ea9773acSSam McCall // e.g. c-header --> c; fortran --> TY_INVALID
foldType(types::ID Lang)104ea9773acSSam McCall static types::ID foldType(types::ID Lang) {
105ea9773acSSam McCall switch (Lang) {
106ea9773acSSam McCall case types::TY_C:
107ea9773acSSam McCall case types::TY_CHeader:
108ea9773acSSam McCall return types::TY_C;
109ea9773acSSam McCall case types::TY_ObjC:
110ea9773acSSam McCall case types::TY_ObjCHeader:
111ea9773acSSam McCall return types::TY_ObjC;
112ea9773acSSam McCall case types::TY_CXX:
113ea9773acSSam McCall case types::TY_CXXHeader:
114ea9773acSSam McCall return types::TY_CXX;
115ea9773acSSam McCall case types::TY_ObjCXX:
116ea9773acSSam McCall case types::TY_ObjCXXHeader:
117ea9773acSSam McCall return types::TY_ObjCXX;
1186ed88afdSADRA case types::TY_CUDA:
1196ed88afdSADRA case types::TY_CUDA_DEVICE:
1202a1418f2SChristopher Tetreault return types::TY_CUDA;
121ea9773acSSam McCall default:
122ea9773acSSam McCall return types::TY_INVALID;
123ea9773acSSam McCall }
124ea9773acSSam McCall }
125ea9773acSSam McCall
126ea9773acSSam McCall // A CompileCommand that can be applied to another file.
127ea9773acSSam McCall struct TransferableCommand {
128ea9773acSSam McCall // Flags that should not apply to all files are stripped from CommandLine.
129ea9773acSSam McCall CompileCommand Cmd;
1305167e2d1SIlya Biryukov // Language detected from -x or the filename. Never TY_INVALID.
1316ad0788cSKazu Hirata std::optional<types::ID> Type;
132ea9773acSSam McCall // Standard specified by -std.
133ea9773acSSam McCall LangStandard::Kind Std = LangStandard::lang_unspecified;
134e05bf606SHamza Sood // Whether the command line is for the cl-compatible driver.
135e05bf606SHamza Sood bool ClangCLMode;
136ea9773acSSam McCall
TransferableCommandclang::tooling::__anonba67d0cc0111::TransferableCommand137ea9773acSSam McCall TransferableCommand(CompileCommand C)
138ce90b60bSKadir Cetinkaya : Cmd(std::move(C)), Type(guessType(Cmd.Filename)) {
139e05bf606SHamza Sood std::vector<std::string> OldArgs = std::move(Cmd.CommandLine);
140e05bf606SHamza Sood Cmd.CommandLine.clear();
141e05bf606SHamza Sood
142e05bf606SHamza Sood // Wrap the old arguments in an InputArgList.
143e05bf606SHamza Sood llvm::opt::InputArgList ArgList;
144e05bf606SHamza Sood {
145e05bf606SHamza Sood SmallVector<const char *, 16> TmpArgv;
146e05bf606SHamza Sood for (const std::string &S : OldArgs)
147e05bf606SHamza Sood TmpArgv.push_back(S.c_str());
148ce90b60bSKadir Cetinkaya ClangCLMode = !TmpArgv.empty() &&
149ce90b60bSKadir Cetinkaya driver::IsClangCL(driver::getDriverMode(
150a3c248dbSserge-sans-paille TmpArgv.front(), llvm::ArrayRef(TmpArgv).slice(1)));
151e05bf606SHamza Sood ArgList = {TmpArgv.begin(), TmpArgv.end()};
152e05bf606SHamza Sood }
153e05bf606SHamza Sood
154ea9773acSSam McCall // Parse the old args in order to strip out and record unwanted flags.
155e05bf606SHamza Sood // We parse each argument individually so that we can retain the exact
156e05bf606SHamza Sood // spelling of each argument; re-rendering is lossy for aliased flags.
157e05bf606SHamza Sood // E.g. in CL mode, /W4 maps to -Wall.
15843392759SIlya Biryukov auto &OptTable = clang::driver::getDriverOptTable();
15960ca31a7SSerge Guelton if (!OldArgs.empty())
160e05bf606SHamza Sood Cmd.CommandLine.emplace_back(OldArgs.front());
161e05bf606SHamza Sood for (unsigned Pos = 1; Pos < OldArgs.size();) {
162e05bf606SHamza Sood using namespace driver::options;
163e05bf606SHamza Sood
164e05bf606SHamza Sood const unsigned OldPos = Pos;
16543392759SIlya Biryukov std::unique_ptr<llvm::opt::Arg> Arg(OptTable.ParseOneArg(
166e05bf606SHamza Sood ArgList, Pos,
167*9478f661SJustin Bogner llvm::opt::Visibility(ClangCLMode ? CLOption : ClangOption)));
168e05bf606SHamza Sood
169e05bf606SHamza Sood if (!Arg)
170e05bf606SHamza Sood continue;
171e05bf606SHamza Sood
172e05bf606SHamza Sood const llvm::opt::Option &Opt = Arg->getOption();
173e05bf606SHamza Sood
174ea9773acSSam McCall // Strip input and output files.
175e05bf606SHamza Sood if (Opt.matches(OPT_INPUT) || Opt.matches(OPT_o) ||
176e05bf606SHamza Sood (ClangCLMode && (Opt.matches(OPT__SLASH_Fa) ||
177e05bf606SHamza Sood Opt.matches(OPT__SLASH_Fe) ||
178e05bf606SHamza Sood Opt.matches(OPT__SLASH_Fi) ||
179e05bf606SHamza Sood Opt.matches(OPT__SLASH_Fo))))
180ea9773acSSam McCall continue;
181e05bf606SHamza Sood
182e030ce3eSJanusz Nykiel // ...including when the inputs are passed after --.
183e030ce3eSJanusz Nykiel if (Opt.matches(OPT__DASH_DASH))
184e030ce3eSJanusz Nykiel break;
185e030ce3eSJanusz Nykiel
186ea9773acSSam McCall // Strip -x, but record the overridden language.
187e05bf606SHamza Sood if (const auto GivenType = tryParseTypeArg(*Arg)) {
188e05bf606SHamza Sood Type = *GivenType;
189ea9773acSSam McCall continue;
190ea9773acSSam McCall }
191e05bf606SHamza Sood
192e05bf606SHamza Sood // Strip -std, but record the value.
193e05bf606SHamza Sood if (const auto GivenStd = tryParseStdArg(*Arg)) {
194e05bf606SHamza Sood if (*GivenStd != LangStandard::lang_unspecified)
195e05bf606SHamza Sood Std = *GivenStd;
196ea9773acSSam McCall continue;
197ea9773acSSam McCall }
198e05bf606SHamza Sood
199e05bf606SHamza Sood Cmd.CommandLine.insert(Cmd.CommandLine.end(),
200fd1dc75bSHamza Sood OldArgs.data() + OldPos, OldArgs.data() + Pos);
201ea9773acSSam McCall }
202ea9773acSSam McCall
203c2377eaeSKadir Cetinkaya // Make use of -std iff -x was missing.
204c2377eaeSKadir Cetinkaya if (Type == types::TY_INVALID && Std != LangStandard::lang_unspecified)
205ea9773acSSam McCall Type = toType(LangStandard::getLangStandardForKind(Std).getLanguage());
2065167e2d1SIlya Biryukov Type = foldType(*Type);
2075167e2d1SIlya Biryukov // The contract is to store None instead of TY_INVALID.
2085167e2d1SIlya Biryukov if (Type == types::TY_INVALID)
2095891420eSKazu Hirata Type = std::nullopt;
210ea9773acSSam McCall }
211ea9773acSSam McCall
212ea9773acSSam McCall // Produce a CompileCommand for \p filename, based on this one.
213588db1ccSSam McCall // (This consumes the TransferableCommand just to avoid copying Cmd).
transferToclang::tooling::__anonba67d0cc0111::TransferableCommand214588db1ccSSam McCall CompileCommand transferTo(StringRef Filename) && {
215588db1ccSSam McCall CompileCommand Result = std::move(Cmd);
216588db1ccSSam McCall Result.Heuristic = "inferred from " + Result.Filename;
217adcd0268SBenjamin Kramer Result.Filename = std::string(Filename);
218ea9773acSSam McCall bool TypeCertain;
219ea9773acSSam McCall auto TargetType = guessType(Filename, &TypeCertain);
220ea9773acSSam McCall // If the filename doesn't determine the language (.h), transfer with -x.
22187ad30beSSam McCall if ((!TargetType || !TypeCertain) && Type) {
22287ad30beSSam McCall // Use *Type, or its header variant if the file is a header.
22387ad30beSSam McCall // Treat no/invalid extension as header (e.g. C++ standard library).
22487ad30beSSam McCall TargetType =
22587ad30beSSam McCall (!TargetType || types::onlyPrecompileType(TargetType)) // header?
2265167e2d1SIlya Biryukov ? types::lookupHeaderTypeForSourceType(*Type)
2275167e2d1SIlya Biryukov : *Type;
228e05bf606SHamza Sood if (ClangCLMode) {
229e05bf606SHamza Sood const StringRef Flag = toCLFlag(TargetType);
230e05bf606SHamza Sood if (!Flag.empty())
231adcd0268SBenjamin Kramer Result.CommandLine.push_back(std::string(Flag));
232e05bf606SHamza Sood } else {
233ea9773acSSam McCall Result.CommandLine.push_back("-x");
234ea9773acSSam McCall Result.CommandLine.push_back(types::getTypeName(TargetType));
235ea9773acSSam McCall }
236e05bf606SHamza Sood }
237ea9773acSSam McCall // --std flag may only be transferred if the language is the same.
238ea9773acSSam McCall // We may consider "translating" these, e.g. c++11 -> c11.
239ea9773acSSam McCall if (Std != LangStandard::lang_unspecified && foldType(TargetType) == Type) {
240e05bf606SHamza Sood Result.CommandLine.emplace_back((
241e05bf606SHamza Sood llvm::Twine(ClangCLMode ? "/std:" : "-std=") +
242e05bf606SHamza Sood LangStandard::getLangStandardForKind(Std).getName()).str());
243ea9773acSSam McCall }
244e030ce3eSJanusz Nykiel Result.CommandLine.push_back("--");
245adcd0268SBenjamin Kramer Result.CommandLine.push_back(std::string(Filename));
246ea9773acSSam McCall return Result;
247ea9773acSSam McCall }
248ea9773acSSam McCall
249ea9773acSSam McCall private:
250ea9773acSSam McCall // Map the language from the --std flag to that of the -x flag.
toTypeclang::tooling::__anonba67d0cc0111::TransferableCommand25109d890d7SRainer Orth static types::ID toType(Language Lang) {
252ea9773acSSam McCall switch (Lang) {
25309d890d7SRainer Orth case Language::C:
254ea9773acSSam McCall return types::TY_C;
25509d890d7SRainer Orth case Language::CXX:
256ea9773acSSam McCall return types::TY_CXX;
25709d890d7SRainer Orth case Language::ObjC:
258ea9773acSSam McCall return types::TY_ObjC;
25909d890d7SRainer Orth case Language::ObjCXX:
260ea9773acSSam McCall return types::TY_ObjCXX;
261ea9773acSSam McCall default:
262ea9773acSSam McCall return types::TY_INVALID;
263ea9773acSSam McCall }
264ea9773acSSam McCall }
265e05bf606SHamza Sood
266e05bf606SHamza Sood // Convert a file type to the matching CL-style type flag.
toCLFlagclang::tooling::__anonba67d0cc0111::TransferableCommand267e05bf606SHamza Sood static StringRef toCLFlag(types::ID Type) {
268e05bf606SHamza Sood switch (Type) {
269e05bf606SHamza Sood case types::TY_C:
270e05bf606SHamza Sood case types::TY_CHeader:
271e05bf606SHamza Sood return "/TC";
272e05bf606SHamza Sood case types::TY_CXX:
273e05bf606SHamza Sood case types::TY_CXXHeader:
274e05bf606SHamza Sood return "/TP";
275e05bf606SHamza Sood default:
276e05bf606SHamza Sood return StringRef();
277e05bf606SHamza Sood }
278e05bf606SHamza Sood }
279e05bf606SHamza Sood
280e05bf606SHamza Sood // Try to interpret the argument as a type specifier, e.g. '-x'.
tryParseTypeArgclang::tooling::__anonba67d0cc0111::TransferableCommand2816ad0788cSKazu Hirata std::optional<types::ID> tryParseTypeArg(const llvm::opt::Arg &Arg) {
282e05bf606SHamza Sood const llvm::opt::Option &Opt = Arg.getOption();
283e05bf606SHamza Sood using namespace driver::options;
284e05bf606SHamza Sood if (ClangCLMode) {
285e05bf606SHamza Sood if (Opt.matches(OPT__SLASH_TC) || Opt.matches(OPT__SLASH_Tc))
286e05bf606SHamza Sood return types::TY_C;
287e05bf606SHamza Sood if (Opt.matches(OPT__SLASH_TP) || Opt.matches(OPT__SLASH_Tp))
288e05bf606SHamza Sood return types::TY_CXX;
289e05bf606SHamza Sood } else {
290e05bf606SHamza Sood if (Opt.matches(driver::options::OPT_x))
291e05bf606SHamza Sood return types::lookupTypeForTypeSpecifier(Arg.getValue());
292e05bf606SHamza Sood }
2935891420eSKazu Hirata return std::nullopt;
294e05bf606SHamza Sood }
295e05bf606SHamza Sood
296e05bf606SHamza Sood // Try to interpret the argument as '-std='.
tryParseStdArgclang::tooling::__anonba67d0cc0111::TransferableCommand2976ad0788cSKazu Hirata std::optional<LangStandard::Kind> tryParseStdArg(const llvm::opt::Arg &Arg) {
298e05bf606SHamza Sood using namespace driver::options;
29909d890d7SRainer Orth if (Arg.getOption().matches(ClangCLMode ? OPT__SLASH_std : OPT_std_EQ))
30009d890d7SRainer Orth return LangStandard::getLangKind(Arg.getValue());
3015891420eSKazu Hirata return std::nullopt;
302e05bf606SHamza Sood }
303ea9773acSSam McCall };
304ea9773acSSam McCall
3055167e2d1SIlya Biryukov // Given a filename, FileIndex picks the best matching file from the underlying
3065167e2d1SIlya Biryukov // DB. This is the proxy file whose CompileCommand will be reused. The
3075167e2d1SIlya Biryukov // heuristics incorporate file name, extension, and directory structure.
3085167e2d1SIlya Biryukov // Strategy:
309ea9773acSSam McCall // - Build indexes of each of the substrings we want to look up by.
310ea9773acSSam McCall // These indexes are just sorted lists of the substrings.
311ea9773acSSam McCall // - Each criterion corresponds to a range lookup into the index, so we only
312ea9773acSSam McCall // need O(log N) string comparisons to determine scores.
3135167e2d1SIlya Biryukov //
3145167e2d1SIlya Biryukov // Apart from path proximity signals, also takes file extensions into account
3155167e2d1SIlya Biryukov // when scoring the candidates.
3165167e2d1SIlya Biryukov class FileIndex {
317ea9773acSSam McCall public:
FileIndex(std::vector<std::string> Files)3185167e2d1SIlya Biryukov FileIndex(std::vector<std::string> Files)
3195167e2d1SIlya Biryukov : OriginalPaths(std::move(Files)), Strings(Arena) {
320ea9773acSSam McCall // Sort commands by filename for determinism (index is a tiebreaker later).
32155fab260SFangrui Song llvm::sort(OriginalPaths);
3225167e2d1SIlya Biryukov Paths.reserve(OriginalPaths.size());
3235167e2d1SIlya Biryukov Types.reserve(OriginalPaths.size());
3245167e2d1SIlya Biryukov Stems.reserve(OriginalPaths.size());
3255167e2d1SIlya Biryukov for (size_t I = 0; I < OriginalPaths.size(); ++I) {
3265167e2d1SIlya Biryukov StringRef Path = Strings.save(StringRef(OriginalPaths[I]).lower());
3275167e2d1SIlya Biryukov
3285167e2d1SIlya Biryukov Paths.emplace_back(Path, I);
32987468e85SIshaan Gandhi Types.push_back(foldType(guessType(OriginalPaths[I])));
330ea9773acSSam McCall Stems.emplace_back(sys::path::stem(Path), I);
331ea9773acSSam McCall auto Dir = ++sys::path::rbegin(Path), DirEnd = sys::path::rend(Path);
332ea9773acSSam McCall for (int J = 0; J < DirectorySegmentsIndexed && Dir != DirEnd; ++J, ++Dir)
333ea9773acSSam McCall if (Dir->size() > ShortDirectorySegment) // not trivial ones
334ea9773acSSam McCall Components.emplace_back(*Dir, I);
335ea9773acSSam McCall }
33655fab260SFangrui Song llvm::sort(Paths);
33755fab260SFangrui Song llvm::sort(Stems);
33855fab260SFangrui Song llvm::sort(Components);
339ea9773acSSam McCall }
340ea9773acSSam McCall
empty() const3415167e2d1SIlya Biryukov bool empty() const { return Paths.empty(); }
342ea9773acSSam McCall
3435167e2d1SIlya Biryukov // Returns the path for the file that best fits OriginalFilename.
3445167e2d1SIlya Biryukov // Candidates with extensions matching PreferLanguage will be chosen over
3455167e2d1SIlya Biryukov // others (unless it's TY_INVALID, or all candidates are bad).
chooseProxy(StringRef OriginalFilename,types::ID PreferLanguage) const3465167e2d1SIlya Biryukov StringRef chooseProxy(StringRef OriginalFilename,
347ea9773acSSam McCall types::ID PreferLanguage) const {
348ea9773acSSam McCall assert(!empty() && "need at least one candidate!");
349ea9773acSSam McCall std::string Filename = OriginalFilename.lower();
350ea9773acSSam McCall auto Candidates = scoreCandidates(Filename);
351ea9773acSSam McCall std::pair<size_t, int> Best =
352ea9773acSSam McCall pickWinner(Candidates, Filename, PreferLanguage);
353ea9773acSSam McCall
3545167e2d1SIlya Biryukov DEBUG_WITH_TYPE(
3555167e2d1SIlya Biryukov "interpolate",
3565167e2d1SIlya Biryukov llvm::dbgs() << "interpolate: chose " << OriginalPaths[Best.first]
3575167e2d1SIlya Biryukov << " as proxy for " << OriginalFilename << " preferring "
358ea9773acSSam McCall << (PreferLanguage == types::TY_INVALID
359ea9773acSSam McCall ? "none"
360ea9773acSSam McCall : types::getTypeName(PreferLanguage))
361ea9773acSSam McCall << " score=" << Best.second << "\n");
3625167e2d1SIlya Biryukov return OriginalPaths[Best.first];
363ea9773acSSam McCall }
364ea9773acSSam McCall
365ea9773acSSam McCall private:
366ea9773acSSam McCall using SubstringAndIndex = std::pair<StringRef, size_t>;
367ea9773acSSam McCall // Directory matching parameters: we look at the last two segments of the
368ea9773acSSam McCall // parent directory (usually the semantically significant ones in practice).
369ea9773acSSam McCall // We search only the last four of each candidate (for efficiency).
370ea9773acSSam McCall constexpr static int DirectorySegmentsIndexed = 4;
371ea9773acSSam McCall constexpr static int DirectorySegmentsQueried = 2;
372ea9773acSSam McCall constexpr static int ShortDirectorySegment = 1; // Only look at longer names.
373ea9773acSSam McCall
374ea9773acSSam McCall // Award points to candidate entries that should be considered for the file.
375ea9773acSSam McCall // Returned keys are indexes into paths, and the values are (nonzero) scores.
scoreCandidates(StringRef Filename) const376ea9773acSSam McCall DenseMap<size_t, int> scoreCandidates(StringRef Filename) const {
377ea9773acSSam McCall // Decompose Filename into the parts we care about.
378ea9773acSSam McCall // /some/path/complicated/project/Interesting.h
379ea9773acSSam McCall // [-prefix--][---dir---] [-dir-] [--stem---]
380ea9773acSSam McCall StringRef Stem = sys::path::stem(Filename);
381ea9773acSSam McCall llvm::SmallVector<StringRef, DirectorySegmentsQueried> Dirs;
382ea9773acSSam McCall llvm::StringRef Prefix;
383ea9773acSSam McCall auto Dir = ++sys::path::rbegin(Filename),
384ea9773acSSam McCall DirEnd = sys::path::rend(Filename);
385ea9773acSSam McCall for (int I = 0; I < DirectorySegmentsQueried && Dir != DirEnd; ++I, ++Dir) {
386ea9773acSSam McCall if (Dir->size() > ShortDirectorySegment)
387ea9773acSSam McCall Dirs.push_back(*Dir);
388ea9773acSSam McCall Prefix = Filename.substr(0, Dir - DirEnd);
389ea9773acSSam McCall }
390ea9773acSSam McCall
391ea9773acSSam McCall // Now award points based on lookups into our various indexes.
392ea9773acSSam McCall DenseMap<size_t, int> Candidates; // Index -> score.
393ea9773acSSam McCall auto Award = [&](int Points, ArrayRef<SubstringAndIndex> Range) {
394ea9773acSSam McCall for (const auto &Entry : Range)
395ea9773acSSam McCall Candidates[Entry.second] += Points;
396ea9773acSSam McCall };
397ea9773acSSam McCall // Award one point if the file's basename is a prefix of the candidate,
398ea9773acSSam McCall // and another if it's an exact match (so exact matches get two points).
399ea9773acSSam McCall Award(1, indexLookup</*Prefix=*/true>(Stem, Stems));
400ea9773acSSam McCall Award(1, indexLookup</*Prefix=*/false>(Stem, Stems));
401ea9773acSSam McCall // For each of the last few directories in the Filename, award a point
402ea9773acSSam McCall // if it's present in the candidate.
403ea9773acSSam McCall for (StringRef Dir : Dirs)
404ea9773acSSam McCall Award(1, indexLookup</*Prefix=*/false>(Dir, Components));
405ea9773acSSam McCall // Award one more point if the whole rest of the path matches.
406ea9773acSSam McCall if (sys::path::root_directory(Prefix) != Prefix)
407ea9773acSSam McCall Award(1, indexLookup</*Prefix=*/true>(Prefix, Paths));
408ea9773acSSam McCall return Candidates;
409ea9773acSSam McCall }
410ea9773acSSam McCall
411ea9773acSSam McCall // Pick a single winner from the set of scored candidates.
412ea9773acSSam McCall // Returns (index, score).
pickWinner(const DenseMap<size_t,int> & Candidates,StringRef Filename,types::ID PreferredLanguage) const413ea9773acSSam McCall std::pair<size_t, int> pickWinner(const DenseMap<size_t, int> &Candidates,
414ea9773acSSam McCall StringRef Filename,
415ea9773acSSam McCall types::ID PreferredLanguage) const {
416ea9773acSSam McCall struct ScoredCandidate {
417ea9773acSSam McCall size_t Index;
418ea9773acSSam McCall bool Preferred;
419ea9773acSSam McCall int Points;
420ea9773acSSam McCall size_t PrefixLength;
421ea9773acSSam McCall };
422ea9773acSSam McCall // Choose the best candidate by (preferred, points, prefix length, alpha).
423ea9773acSSam McCall ScoredCandidate Best = {size_t(-1), false, 0, 0};
424ea9773acSSam McCall for (const auto &Candidate : Candidates) {
425ea9773acSSam McCall ScoredCandidate S;
426ea9773acSSam McCall S.Index = Candidate.first;
427ea9773acSSam McCall S.Preferred = PreferredLanguage == types::TY_INVALID ||
4285167e2d1SIlya Biryukov PreferredLanguage == Types[S.Index];
429ea9773acSSam McCall S.Points = Candidate.second;
430ea9773acSSam McCall if (!S.Preferred && Best.Preferred)
431ea9773acSSam McCall continue;
432ea9773acSSam McCall if (S.Preferred == Best.Preferred) {
433ea9773acSSam McCall if (S.Points < Best.Points)
434ea9773acSSam McCall continue;
435ea9773acSSam McCall if (S.Points == Best.Points) {
436ea9773acSSam McCall S.PrefixLength = matchingPrefix(Filename, Paths[S.Index].first);
437ea9773acSSam McCall if (S.PrefixLength < Best.PrefixLength)
438ea9773acSSam McCall continue;
439ea9773acSSam McCall // hidden heuristics should at least be deterministic!
440ea9773acSSam McCall if (S.PrefixLength == Best.PrefixLength)
441ea9773acSSam McCall if (S.Index > Best.Index)
442ea9773acSSam McCall continue;
443ea9773acSSam McCall }
444ea9773acSSam McCall }
445ea9773acSSam McCall // PrefixLength was only set above if actually needed for a tiebreak.
446ea9773acSSam McCall // But it definitely needs to be set to break ties in the future.
447ea9773acSSam McCall S.PrefixLength = matchingPrefix(Filename, Paths[S.Index].first);
448ea9773acSSam McCall Best = S;
449ea9773acSSam McCall }
450ea9773acSSam McCall // Edge case: no candidate got any points.
451ea9773acSSam McCall // We ignore PreferredLanguage at this point (not ideal).
452ea9773acSSam McCall if (Best.Index == size_t(-1))
453ea9773acSSam McCall return {longestMatch(Filename, Paths).second, 0};
454ea9773acSSam McCall return {Best.Index, Best.Points};
455ea9773acSSam McCall }
456ea9773acSSam McCall
457ea9773acSSam McCall // Returns the range within a sorted index that compares equal to Key.
458ea9773acSSam McCall // If Prefix is true, it's instead the range starting with Key.
459ea9773acSSam McCall template <bool Prefix>
460ea9773acSSam McCall ArrayRef<SubstringAndIndex>
indexLookup(StringRef Key,ArrayRef<SubstringAndIndex> Idx) const4615167e2d1SIlya Biryukov indexLookup(StringRef Key, ArrayRef<SubstringAndIndex> Idx) const {
462ea9773acSSam McCall // Use pointers as iteratiors to ease conversion of result to ArrayRef.
46344f2f4ecSSam McCall auto Range = std::equal_range(Idx.data(), Idx.data() + Idx.size(), Key,
46444f2f4ecSSam McCall Less<Prefix>());
465ea9773acSSam McCall return {Range.first, Range.second};
466ea9773acSSam McCall }
467ea9773acSSam McCall
468ea9773acSSam McCall // Performs a point lookup into a nonempty index, returning a longest match.
longestMatch(StringRef Key,ArrayRef<SubstringAndIndex> Idx) const4695167e2d1SIlya Biryukov SubstringAndIndex longestMatch(StringRef Key,
4705167e2d1SIlya Biryukov ArrayRef<SubstringAndIndex> Idx) const {
471ea9773acSSam McCall assert(!Idx.empty());
472ea9773acSSam McCall // Longest substring match will be adjacent to a direct lookup.
4737264a474SFangrui Song auto It = llvm::lower_bound(Idx, SubstringAndIndex{Key, 0});
474ea9773acSSam McCall if (It == Idx.begin())
475ea9773acSSam McCall return *It;
476ea9773acSSam McCall if (It == Idx.end())
477ea9773acSSam McCall return *--It;
478ea9773acSSam McCall // Have to choose between It and It-1
479ea9773acSSam McCall size_t Prefix = matchingPrefix(Key, It->first);
480ea9773acSSam McCall size_t PrevPrefix = matchingPrefix(Key, (It - 1)->first);
481ea9773acSSam McCall return Prefix > PrevPrefix ? *It : *--It;
482ea9773acSSam McCall }
483ea9773acSSam McCall
4845167e2d1SIlya Biryukov // Original paths, everything else is in lowercase.
4855167e2d1SIlya Biryukov std::vector<std::string> OriginalPaths;
486ea9773acSSam McCall BumpPtrAllocator Arena;
487ea9773acSSam McCall StringSaver Strings;
488ea9773acSSam McCall // Indexes of candidates by certain substrings.
489ea9773acSSam McCall // String is lowercase and sorted, index points into OriginalPaths.
490ea9773acSSam McCall std::vector<SubstringAndIndex> Paths; // Full path.
4915167e2d1SIlya Biryukov // Lang types obtained by guessing on the corresponding path. I-th element is
4925167e2d1SIlya Biryukov // a type for the I-th path.
4935167e2d1SIlya Biryukov std::vector<types::ID> Types;
494ea9773acSSam McCall std::vector<SubstringAndIndex> Stems; // Basename, without extension.
495ea9773acSSam McCall std::vector<SubstringAndIndex> Components; // Last path components.
496ea9773acSSam McCall };
497ea9773acSSam McCall
498ea9773acSSam McCall // The actual CompilationDatabase wrapper delegates to its inner database.
4995167e2d1SIlya Biryukov // If no match, looks up a proxy file in FileIndex and transfers its
5005167e2d1SIlya Biryukov // command to the requested file.
501ea9773acSSam McCall class InterpolatingCompilationDatabase : public CompilationDatabase {
502ea9773acSSam McCall public:
InterpolatingCompilationDatabase(std::unique_ptr<CompilationDatabase> Inner)503ea9773acSSam McCall InterpolatingCompilationDatabase(std::unique_ptr<CompilationDatabase> Inner)
5045167e2d1SIlya Biryukov : Inner(std::move(Inner)), Index(this->Inner->getAllFiles()) {}
505ea9773acSSam McCall
506ea9773acSSam McCall std::vector<CompileCommand>
getCompileCommands(StringRef Filename) const507ea9773acSSam McCall getCompileCommands(StringRef Filename) const override {
508ea9773acSSam McCall auto Known = Inner->getCompileCommands(Filename);
509ea9773acSSam McCall if (Index.empty() || !Known.empty())
510ea9773acSSam McCall return Known;
511ea9773acSSam McCall bool TypeCertain;
512ea9773acSSam McCall auto Lang = guessType(Filename, &TypeCertain);
513ea9773acSSam McCall if (!TypeCertain)
514ea9773acSSam McCall Lang = types::TY_INVALID;
5155167e2d1SIlya Biryukov auto ProxyCommands =
5165167e2d1SIlya Biryukov Inner->getCompileCommands(Index.chooseProxy(Filename, foldType(Lang)));
5175167e2d1SIlya Biryukov if (ProxyCommands.empty())
5185167e2d1SIlya Biryukov return {};
519588db1ccSSam McCall return {transferCompileCommand(std::move(ProxyCommands.front()), Filename)};
520ea9773acSSam McCall }
521ea9773acSSam McCall
getAllFiles() const522ea9773acSSam McCall std::vector<std::string> getAllFiles() const override {
523ea9773acSSam McCall return Inner->getAllFiles();
524ea9773acSSam McCall }
525ea9773acSSam McCall
getAllCompileCommands() const526ea9773acSSam McCall std::vector<CompileCommand> getAllCompileCommands() const override {
527ea9773acSSam McCall return Inner->getAllCompileCommands();
528ea9773acSSam McCall }
529ea9773acSSam McCall
530ea9773acSSam McCall private:
531ea9773acSSam McCall std::unique_ptr<CompilationDatabase> Inner;
5325167e2d1SIlya Biryukov FileIndex Index;
533ea9773acSSam McCall };
534ea9773acSSam McCall
535ea9773acSSam McCall } // namespace
536ea9773acSSam McCall
537ea9773acSSam McCall std::unique_ptr<CompilationDatabase>
inferMissingCompileCommands(std::unique_ptr<CompilationDatabase> Inner)538ea9773acSSam McCall inferMissingCompileCommands(std::unique_ptr<CompilationDatabase> Inner) {
5392b3d49b6SJonas Devlieghere return std::make_unique<InterpolatingCompilationDatabase>(std::move(Inner));
540ea9773acSSam McCall }
541ea9773acSSam McCall
transferCompileCommand(CompileCommand Cmd,StringRef Filename)542588db1ccSSam McCall tooling::CompileCommand transferCompileCommand(CompileCommand Cmd,
543588db1ccSSam McCall StringRef Filename) {
544588db1ccSSam McCall return TransferableCommand(std::move(Cmd)).transferTo(Filename);
545588db1ccSSam McCall }
546588db1ccSSam McCall
547ea9773acSSam McCall } // namespace tooling
548ea9773acSSam McCall } // namespace clang
549