1e5dd7070Spatrick //===- InterpolatingCompilationDatabase.cpp ---------------------*- C++ -*-===//
2e5dd7070Spatrick //
3e5dd7070Spatrick // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4e5dd7070Spatrick // See https://llvm.org/LICENSE.txt for license information.
5e5dd7070Spatrick // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6e5dd7070Spatrick //
7e5dd7070Spatrick //===----------------------------------------------------------------------===//
8e5dd7070Spatrick //
9e5dd7070Spatrick // InterpolatingCompilationDatabase wraps another CompilationDatabase and
10e5dd7070Spatrick // attempts to heuristically determine appropriate compile commands for files
11e5dd7070Spatrick // that are not included, such as headers or newly created files.
12e5dd7070Spatrick //
13e5dd7070Spatrick // Motivating cases include:
14e5dd7070Spatrick // Header files that live next to their implementation files. These typically
15e5dd7070Spatrick // share a base filename. (libclang/CXString.h, libclang/CXString.cpp).
16e5dd7070Spatrick // Some projects separate headers from includes. Filenames still typically
17e5dd7070Spatrick // match, maybe other path segments too. (include/llvm/IR/Use.h, lib/IR/Use.cc).
18e5dd7070Spatrick // Matches are sometimes only approximate (Sema.h, SemaDecl.cpp). This goes
19e5dd7070Spatrick // for directories too (Support/Unix/Process.inc, lib/Support/Process.cpp).
20e5dd7070Spatrick // Even if we can't find a "right" compile command, even a random one from
21e5dd7070Spatrick // the project will tend to get important flags like -I and -x right.
22e5dd7070Spatrick //
23e5dd7070Spatrick // We "borrow" the compile command for the closest available file:
24e5dd7070Spatrick // - points are awarded if the filename matches (ignoring extension)
25e5dd7070Spatrick // - points are awarded if the directory structure matches
26e5dd7070Spatrick // - ties are broken by length of path prefix match
27e5dd7070Spatrick //
28e5dd7070Spatrick // The compile command is adjusted, replacing the filename and removing output
29e5dd7070Spatrick // file arguments. The -x and -std flags may be affected too.
30e5dd7070Spatrick //
31e5dd7070Spatrick // Source language is a tricky issue: is it OK to use a .c file's command
32e5dd7070Spatrick // for building a .cc file? What language is a .h file in?
33e5dd7070Spatrick // - We only consider compile commands for c-family languages as candidates.
34e5dd7070Spatrick // - For files whose language is implied by the filename (e.g. .m, .hpp)
35e5dd7070Spatrick // we prefer candidates from the same language.
36e5dd7070Spatrick // If we must cross languages, we drop any -x and -std flags.
37e5dd7070Spatrick // - For .h files, candidates from any c-family language are acceptable.
38e5dd7070Spatrick // We use the candidate's language, inserting e.g. -x c++-header.
39e5dd7070Spatrick //
40e5dd7070Spatrick // This class is only useful when wrapping databases that can enumerate all
41e5dd7070Spatrick // their compile commands. If getAllFilenames() is empty, no inference occurs.
42e5dd7070Spatrick //
43e5dd7070Spatrick //===----------------------------------------------------------------------===//
44e5dd7070Spatrick
45e5dd7070Spatrick #include "clang/Basic/LangStandard.h"
46a9ac8606Spatrick #include "clang/Driver/Driver.h"
47e5dd7070Spatrick #include "clang/Driver/Options.h"
48e5dd7070Spatrick #include "clang/Driver/Types.h"
49e5dd7070Spatrick #include "clang/Tooling/CompilationDatabase.h"
50a9ac8606Spatrick #include "llvm/ADT/ArrayRef.h"
51e5dd7070Spatrick #include "llvm/ADT/DenseMap.h"
52e5dd7070Spatrick #include "llvm/ADT/StringExtras.h"
53e5dd7070Spatrick #include "llvm/Option/ArgList.h"
54e5dd7070Spatrick #include "llvm/Option/OptTable.h"
55e5dd7070Spatrick #include "llvm/Support/Debug.h"
56e5dd7070Spatrick #include "llvm/Support/Path.h"
57e5dd7070Spatrick #include "llvm/Support/StringSaver.h"
58e5dd7070Spatrick #include "llvm/Support/raw_ostream.h"
59e5dd7070Spatrick #include <memory>
60*12c85518Srobert #include <optional>
61e5dd7070Spatrick
62e5dd7070Spatrick namespace clang {
63e5dd7070Spatrick namespace tooling {
64e5dd7070Spatrick namespace {
65e5dd7070Spatrick using namespace llvm;
66e5dd7070Spatrick namespace types = clang::driver::types;
67e5dd7070Spatrick namespace path = llvm::sys::path;
68e5dd7070Spatrick
69e5dd7070Spatrick // The length of the prefix these two strings have in common.
matchingPrefix(StringRef L,StringRef R)70e5dd7070Spatrick size_t matchingPrefix(StringRef L, StringRef R) {
71e5dd7070Spatrick size_t Limit = std::min(L.size(), R.size());
72e5dd7070Spatrick for (size_t I = 0; I < Limit; ++I)
73e5dd7070Spatrick if (L[I] != R[I])
74e5dd7070Spatrick return I;
75e5dd7070Spatrick return Limit;
76e5dd7070Spatrick }
77e5dd7070Spatrick
78e5dd7070Spatrick // A comparator for searching SubstringWithIndexes with std::equal_range etc.
79e5dd7070Spatrick // Optionaly prefix semantics: compares equal if the key is a prefix.
80e5dd7070Spatrick template <bool Prefix> struct Less {
operator ()clang::tooling::__anone90b9a300111::Less81e5dd7070Spatrick bool operator()(StringRef Key, std::pair<StringRef, size_t> Value) const {
82e5dd7070Spatrick StringRef V = Prefix ? Value.first.substr(0, Key.size()) : Value.first;
83e5dd7070Spatrick return Key < V;
84e5dd7070Spatrick }
operator ()clang::tooling::__anone90b9a300111::Less85e5dd7070Spatrick bool operator()(std::pair<StringRef, size_t> Value, StringRef Key) const {
86e5dd7070Spatrick StringRef V = Prefix ? Value.first.substr(0, Key.size()) : Value.first;
87e5dd7070Spatrick return V < Key;
88e5dd7070Spatrick }
89e5dd7070Spatrick };
90e5dd7070Spatrick
91e5dd7070Spatrick // Infer type from filename. If we might have gotten it wrong, set *Certain.
92e5dd7070Spatrick // *.h will be inferred as a C header, but not certain.
guessType(StringRef Filename,bool * Certain=nullptr)93e5dd7070Spatrick types::ID guessType(StringRef Filename, bool *Certain = nullptr) {
94e5dd7070Spatrick // path::extension is ".cpp", lookupTypeForExtension wants "cpp".
95e5dd7070Spatrick auto Lang =
96e5dd7070Spatrick types::lookupTypeForExtension(path::extension(Filename).substr(1));
97e5dd7070Spatrick if (Certain)
98e5dd7070Spatrick *Certain = Lang != types::TY_CHeader && Lang != types::TY_INVALID;
99e5dd7070Spatrick return Lang;
100e5dd7070Spatrick }
101e5dd7070Spatrick
102e5dd7070Spatrick // Return Lang as one of the canonical supported types.
103e5dd7070Spatrick // e.g. c-header --> c; fortran --> TY_INVALID
foldType(types::ID Lang)104e5dd7070Spatrick static types::ID foldType(types::ID Lang) {
105e5dd7070Spatrick switch (Lang) {
106e5dd7070Spatrick case types::TY_C:
107e5dd7070Spatrick case types::TY_CHeader:
108e5dd7070Spatrick return types::TY_C;
109e5dd7070Spatrick case types::TY_ObjC:
110e5dd7070Spatrick case types::TY_ObjCHeader:
111e5dd7070Spatrick return types::TY_ObjC;
112e5dd7070Spatrick case types::TY_CXX:
113e5dd7070Spatrick case types::TY_CXXHeader:
114e5dd7070Spatrick return types::TY_CXX;
115e5dd7070Spatrick case types::TY_ObjCXX:
116e5dd7070Spatrick case types::TY_ObjCXXHeader:
117e5dd7070Spatrick return types::TY_ObjCXX;
118ec727ea7Spatrick case types::TY_CUDA:
119ec727ea7Spatrick case types::TY_CUDA_DEVICE:
120ec727ea7Spatrick return types::TY_CUDA;
121e5dd7070Spatrick default:
122e5dd7070Spatrick return types::TY_INVALID;
123e5dd7070Spatrick }
124e5dd7070Spatrick }
125e5dd7070Spatrick
126e5dd7070Spatrick // A CompileCommand that can be applied to another file.
127e5dd7070Spatrick struct TransferableCommand {
128e5dd7070Spatrick // Flags that should not apply to all files are stripped from CommandLine.
129e5dd7070Spatrick CompileCommand Cmd;
130e5dd7070Spatrick // Language detected from -x or the filename. Never TY_INVALID.
131*12c85518Srobert std::optional<types::ID> Type;
132e5dd7070Spatrick // Standard specified by -std.
133e5dd7070Spatrick LangStandard::Kind Std = LangStandard::lang_unspecified;
134e5dd7070Spatrick // Whether the command line is for the cl-compatible driver.
135e5dd7070Spatrick bool ClangCLMode;
136e5dd7070Spatrick
TransferableCommandclang::tooling::__anone90b9a300111::TransferableCommand137e5dd7070Spatrick TransferableCommand(CompileCommand C)
138a9ac8606Spatrick : Cmd(std::move(C)), Type(guessType(Cmd.Filename)) {
139e5dd7070Spatrick std::vector<std::string> OldArgs = std::move(Cmd.CommandLine);
140e5dd7070Spatrick Cmd.CommandLine.clear();
141e5dd7070Spatrick
142e5dd7070Spatrick // Wrap the old arguments in an InputArgList.
143e5dd7070Spatrick llvm::opt::InputArgList ArgList;
144e5dd7070Spatrick {
145e5dd7070Spatrick SmallVector<const char *, 16> TmpArgv;
146e5dd7070Spatrick for (const std::string &S : OldArgs)
147e5dd7070Spatrick TmpArgv.push_back(S.c_str());
148a9ac8606Spatrick ClangCLMode = !TmpArgv.empty() &&
149a9ac8606Spatrick driver::IsClangCL(driver::getDriverMode(
150*12c85518Srobert TmpArgv.front(), llvm::ArrayRef(TmpArgv).slice(1)));
151e5dd7070Spatrick ArgList = {TmpArgv.begin(), TmpArgv.end()};
152e5dd7070Spatrick }
153e5dd7070Spatrick
154e5dd7070Spatrick // Parse the old args in order to strip out and record unwanted flags.
155e5dd7070Spatrick // We parse each argument individually so that we can retain the exact
156e5dd7070Spatrick // spelling of each argument; re-rendering is lossy for aliased flags.
157e5dd7070Spatrick // E.g. in CL mode, /W4 maps to -Wall.
158e5dd7070Spatrick auto &OptTable = clang::driver::getDriverOptTable();
159e5dd7070Spatrick if (!OldArgs.empty())
160e5dd7070Spatrick Cmd.CommandLine.emplace_back(OldArgs.front());
161e5dd7070Spatrick for (unsigned Pos = 1; Pos < OldArgs.size();) {
162e5dd7070Spatrick using namespace driver::options;
163e5dd7070Spatrick
164e5dd7070Spatrick const unsigned OldPos = Pos;
165e5dd7070Spatrick std::unique_ptr<llvm::opt::Arg> Arg(OptTable.ParseOneArg(
166e5dd7070Spatrick ArgList, Pos,
167*12c85518Srobert /* Include */ ClangCLMode ? CoreOption | CLOption | CLDXCOption : 0,
168*12c85518Srobert /* Exclude */ ClangCLMode ? 0 : CLOption | CLDXCOption));
169e5dd7070Spatrick
170e5dd7070Spatrick if (!Arg)
171e5dd7070Spatrick continue;
172e5dd7070Spatrick
173e5dd7070Spatrick const llvm::opt::Option &Opt = Arg->getOption();
174e5dd7070Spatrick
175e5dd7070Spatrick // Strip input and output files.
176e5dd7070Spatrick if (Opt.matches(OPT_INPUT) || Opt.matches(OPT_o) ||
177e5dd7070Spatrick (ClangCLMode && (Opt.matches(OPT__SLASH_Fa) ||
178e5dd7070Spatrick Opt.matches(OPT__SLASH_Fe) ||
179e5dd7070Spatrick Opt.matches(OPT__SLASH_Fi) ||
180e5dd7070Spatrick Opt.matches(OPT__SLASH_Fo))))
181e5dd7070Spatrick continue;
182e5dd7070Spatrick
183a9ac8606Spatrick // ...including when the inputs are passed after --.
184a9ac8606Spatrick if (Opt.matches(OPT__DASH_DASH))
185a9ac8606Spatrick break;
186a9ac8606Spatrick
187e5dd7070Spatrick // Strip -x, but record the overridden language.
188e5dd7070Spatrick if (const auto GivenType = tryParseTypeArg(*Arg)) {
189e5dd7070Spatrick Type = *GivenType;
190e5dd7070Spatrick continue;
191e5dd7070Spatrick }
192e5dd7070Spatrick
193e5dd7070Spatrick // Strip -std, but record the value.
194e5dd7070Spatrick if (const auto GivenStd = tryParseStdArg(*Arg)) {
195e5dd7070Spatrick if (*GivenStd != LangStandard::lang_unspecified)
196e5dd7070Spatrick Std = *GivenStd;
197e5dd7070Spatrick continue;
198e5dd7070Spatrick }
199e5dd7070Spatrick
200e5dd7070Spatrick Cmd.CommandLine.insert(Cmd.CommandLine.end(),
201e5dd7070Spatrick OldArgs.data() + OldPos, OldArgs.data() + Pos);
202e5dd7070Spatrick }
203e5dd7070Spatrick
204e5dd7070Spatrick // Make use of -std iff -x was missing.
205e5dd7070Spatrick if (Type == types::TY_INVALID && Std != LangStandard::lang_unspecified)
206e5dd7070Spatrick Type = toType(LangStandard::getLangStandardForKind(Std).getLanguage());
207e5dd7070Spatrick Type = foldType(*Type);
208e5dd7070Spatrick // The contract is to store None instead of TY_INVALID.
209e5dd7070Spatrick if (Type == types::TY_INVALID)
210*12c85518Srobert Type = std::nullopt;
211e5dd7070Spatrick }
212e5dd7070Spatrick
213e5dd7070Spatrick // Produce a CompileCommand for \p filename, based on this one.
214a9ac8606Spatrick // (This consumes the TransferableCommand just to avoid copying Cmd).
transferToclang::tooling::__anone90b9a300111::TransferableCommand215a9ac8606Spatrick CompileCommand transferTo(StringRef Filename) && {
216a9ac8606Spatrick CompileCommand Result = std::move(Cmd);
217a9ac8606Spatrick Result.Heuristic = "inferred from " + Result.Filename;
218ec727ea7Spatrick Result.Filename = std::string(Filename);
219e5dd7070Spatrick bool TypeCertain;
220e5dd7070Spatrick auto TargetType = guessType(Filename, &TypeCertain);
221e5dd7070Spatrick // If the filename doesn't determine the language (.h), transfer with -x.
222e5dd7070Spatrick if ((!TargetType || !TypeCertain) && Type) {
223e5dd7070Spatrick // Use *Type, or its header variant if the file is a header.
224e5dd7070Spatrick // Treat no/invalid extension as header (e.g. C++ standard library).
225e5dd7070Spatrick TargetType =
226e5dd7070Spatrick (!TargetType || types::onlyPrecompileType(TargetType)) // header?
227e5dd7070Spatrick ? types::lookupHeaderTypeForSourceType(*Type)
228e5dd7070Spatrick : *Type;
229e5dd7070Spatrick if (ClangCLMode) {
230e5dd7070Spatrick const StringRef Flag = toCLFlag(TargetType);
231e5dd7070Spatrick if (!Flag.empty())
232ec727ea7Spatrick Result.CommandLine.push_back(std::string(Flag));
233e5dd7070Spatrick } else {
234e5dd7070Spatrick Result.CommandLine.push_back("-x");
235e5dd7070Spatrick Result.CommandLine.push_back(types::getTypeName(TargetType));
236e5dd7070Spatrick }
237e5dd7070Spatrick }
238e5dd7070Spatrick // --std flag may only be transferred if the language is the same.
239e5dd7070Spatrick // We may consider "translating" these, e.g. c++11 -> c11.
240e5dd7070Spatrick if (Std != LangStandard::lang_unspecified && foldType(TargetType) == Type) {
241e5dd7070Spatrick Result.CommandLine.emplace_back((
242e5dd7070Spatrick llvm::Twine(ClangCLMode ? "/std:" : "-std=") +
243e5dd7070Spatrick LangStandard::getLangStandardForKind(Std).getName()).str());
244e5dd7070Spatrick }
245a9ac8606Spatrick Result.CommandLine.push_back("--");
246ec727ea7Spatrick Result.CommandLine.push_back(std::string(Filename));
247e5dd7070Spatrick return Result;
248e5dd7070Spatrick }
249e5dd7070Spatrick
250e5dd7070Spatrick private:
251e5dd7070Spatrick // Map the language from the --std flag to that of the -x flag.
toTypeclang::tooling::__anone90b9a300111::TransferableCommand252e5dd7070Spatrick static types::ID toType(Language Lang) {
253e5dd7070Spatrick switch (Lang) {
254e5dd7070Spatrick case Language::C:
255e5dd7070Spatrick return types::TY_C;
256e5dd7070Spatrick case Language::CXX:
257e5dd7070Spatrick return types::TY_CXX;
258e5dd7070Spatrick case Language::ObjC:
259e5dd7070Spatrick return types::TY_ObjC;
260e5dd7070Spatrick case Language::ObjCXX:
261e5dd7070Spatrick return types::TY_ObjCXX;
262e5dd7070Spatrick default:
263e5dd7070Spatrick return types::TY_INVALID;
264e5dd7070Spatrick }
265e5dd7070Spatrick }
266e5dd7070Spatrick
267e5dd7070Spatrick // Convert a file type to the matching CL-style type flag.
toCLFlagclang::tooling::__anone90b9a300111::TransferableCommand268e5dd7070Spatrick static StringRef toCLFlag(types::ID Type) {
269e5dd7070Spatrick switch (Type) {
270e5dd7070Spatrick case types::TY_C:
271e5dd7070Spatrick case types::TY_CHeader:
272e5dd7070Spatrick return "/TC";
273e5dd7070Spatrick case types::TY_CXX:
274e5dd7070Spatrick case types::TY_CXXHeader:
275e5dd7070Spatrick return "/TP";
276e5dd7070Spatrick default:
277e5dd7070Spatrick return StringRef();
278e5dd7070Spatrick }
279e5dd7070Spatrick }
280e5dd7070Spatrick
281e5dd7070Spatrick // Try to interpret the argument as a type specifier, e.g. '-x'.
tryParseTypeArgclang::tooling::__anone90b9a300111::TransferableCommand282*12c85518Srobert std::optional<types::ID> tryParseTypeArg(const llvm::opt::Arg &Arg) {
283e5dd7070Spatrick const llvm::opt::Option &Opt = Arg.getOption();
284e5dd7070Spatrick using namespace driver::options;
285e5dd7070Spatrick if (ClangCLMode) {
286e5dd7070Spatrick if (Opt.matches(OPT__SLASH_TC) || Opt.matches(OPT__SLASH_Tc))
287e5dd7070Spatrick return types::TY_C;
288e5dd7070Spatrick if (Opt.matches(OPT__SLASH_TP) || Opt.matches(OPT__SLASH_Tp))
289e5dd7070Spatrick return types::TY_CXX;
290e5dd7070Spatrick } else {
291e5dd7070Spatrick if (Opt.matches(driver::options::OPT_x))
292e5dd7070Spatrick return types::lookupTypeForTypeSpecifier(Arg.getValue());
293e5dd7070Spatrick }
294*12c85518Srobert return std::nullopt;
295e5dd7070Spatrick }
296e5dd7070Spatrick
297e5dd7070Spatrick // Try to interpret the argument as '-std='.
tryParseStdArgclang::tooling::__anone90b9a300111::TransferableCommand298*12c85518Srobert std::optional<LangStandard::Kind> tryParseStdArg(const llvm::opt::Arg &Arg) {
299e5dd7070Spatrick using namespace driver::options;
300e5dd7070Spatrick if (Arg.getOption().matches(ClangCLMode ? OPT__SLASH_std : OPT_std_EQ))
301e5dd7070Spatrick return LangStandard::getLangKind(Arg.getValue());
302*12c85518Srobert return std::nullopt;
303e5dd7070Spatrick }
304e5dd7070Spatrick };
305e5dd7070Spatrick
306e5dd7070Spatrick // Given a filename, FileIndex picks the best matching file from the underlying
307e5dd7070Spatrick // DB. This is the proxy file whose CompileCommand will be reused. The
308e5dd7070Spatrick // heuristics incorporate file name, extension, and directory structure.
309e5dd7070Spatrick // Strategy:
310e5dd7070Spatrick // - Build indexes of each of the substrings we want to look up by.
311e5dd7070Spatrick // These indexes are just sorted lists of the substrings.
312e5dd7070Spatrick // - Each criterion corresponds to a range lookup into the index, so we only
313e5dd7070Spatrick // need O(log N) string comparisons to determine scores.
314e5dd7070Spatrick //
315e5dd7070Spatrick // Apart from path proximity signals, also takes file extensions into account
316e5dd7070Spatrick // when scoring the candidates.
317e5dd7070Spatrick class FileIndex {
318e5dd7070Spatrick public:
FileIndex(std::vector<std::string> Files)319e5dd7070Spatrick FileIndex(std::vector<std::string> Files)
320e5dd7070Spatrick : OriginalPaths(std::move(Files)), Strings(Arena) {
321e5dd7070Spatrick // Sort commands by filename for determinism (index is a tiebreaker later).
322e5dd7070Spatrick llvm::sort(OriginalPaths);
323e5dd7070Spatrick Paths.reserve(OriginalPaths.size());
324e5dd7070Spatrick Types.reserve(OriginalPaths.size());
325e5dd7070Spatrick Stems.reserve(OriginalPaths.size());
326e5dd7070Spatrick for (size_t I = 0; I < OriginalPaths.size(); ++I) {
327e5dd7070Spatrick StringRef Path = Strings.save(StringRef(OriginalPaths[I]).lower());
328e5dd7070Spatrick
329e5dd7070Spatrick Paths.emplace_back(Path, I);
330*12c85518Srobert Types.push_back(foldType(guessType(OriginalPaths[I])));
331e5dd7070Spatrick Stems.emplace_back(sys::path::stem(Path), I);
332e5dd7070Spatrick auto Dir = ++sys::path::rbegin(Path), DirEnd = sys::path::rend(Path);
333e5dd7070Spatrick for (int J = 0; J < DirectorySegmentsIndexed && Dir != DirEnd; ++J, ++Dir)
334e5dd7070Spatrick if (Dir->size() > ShortDirectorySegment) // not trivial ones
335e5dd7070Spatrick Components.emplace_back(*Dir, I);
336e5dd7070Spatrick }
337e5dd7070Spatrick llvm::sort(Paths);
338e5dd7070Spatrick llvm::sort(Stems);
339e5dd7070Spatrick llvm::sort(Components);
340e5dd7070Spatrick }
341e5dd7070Spatrick
empty() const342e5dd7070Spatrick bool empty() const { return Paths.empty(); }
343e5dd7070Spatrick
344e5dd7070Spatrick // Returns the path for the file that best fits OriginalFilename.
345e5dd7070Spatrick // Candidates with extensions matching PreferLanguage will be chosen over
346e5dd7070Spatrick // others (unless it's TY_INVALID, or all candidates are bad).
chooseProxy(StringRef OriginalFilename,types::ID PreferLanguage) const347e5dd7070Spatrick StringRef chooseProxy(StringRef OriginalFilename,
348e5dd7070Spatrick types::ID PreferLanguage) const {
349e5dd7070Spatrick assert(!empty() && "need at least one candidate!");
350e5dd7070Spatrick std::string Filename = OriginalFilename.lower();
351e5dd7070Spatrick auto Candidates = scoreCandidates(Filename);
352e5dd7070Spatrick std::pair<size_t, int> Best =
353e5dd7070Spatrick pickWinner(Candidates, Filename, PreferLanguage);
354e5dd7070Spatrick
355e5dd7070Spatrick DEBUG_WITH_TYPE(
356e5dd7070Spatrick "interpolate",
357e5dd7070Spatrick llvm::dbgs() << "interpolate: chose " << OriginalPaths[Best.first]
358e5dd7070Spatrick << " as proxy for " << OriginalFilename << " preferring "
359e5dd7070Spatrick << (PreferLanguage == types::TY_INVALID
360e5dd7070Spatrick ? "none"
361e5dd7070Spatrick : types::getTypeName(PreferLanguage))
362e5dd7070Spatrick << " score=" << Best.second << "\n");
363e5dd7070Spatrick return OriginalPaths[Best.first];
364e5dd7070Spatrick }
365e5dd7070Spatrick
366e5dd7070Spatrick private:
367e5dd7070Spatrick using SubstringAndIndex = std::pair<StringRef, size_t>;
368e5dd7070Spatrick // Directory matching parameters: we look at the last two segments of the
369e5dd7070Spatrick // parent directory (usually the semantically significant ones in practice).
370e5dd7070Spatrick // We search only the last four of each candidate (for efficiency).
371e5dd7070Spatrick constexpr static int DirectorySegmentsIndexed = 4;
372e5dd7070Spatrick constexpr static int DirectorySegmentsQueried = 2;
373e5dd7070Spatrick constexpr static int ShortDirectorySegment = 1; // Only look at longer names.
374e5dd7070Spatrick
375e5dd7070Spatrick // Award points to candidate entries that should be considered for the file.
376e5dd7070Spatrick // Returned keys are indexes into paths, and the values are (nonzero) scores.
scoreCandidates(StringRef Filename) const377e5dd7070Spatrick DenseMap<size_t, int> scoreCandidates(StringRef Filename) const {
378e5dd7070Spatrick // Decompose Filename into the parts we care about.
379e5dd7070Spatrick // /some/path/complicated/project/Interesting.h
380e5dd7070Spatrick // [-prefix--][---dir---] [-dir-] [--stem---]
381e5dd7070Spatrick StringRef Stem = sys::path::stem(Filename);
382e5dd7070Spatrick llvm::SmallVector<StringRef, DirectorySegmentsQueried> Dirs;
383e5dd7070Spatrick llvm::StringRef Prefix;
384e5dd7070Spatrick auto Dir = ++sys::path::rbegin(Filename),
385e5dd7070Spatrick DirEnd = sys::path::rend(Filename);
386e5dd7070Spatrick for (int I = 0; I < DirectorySegmentsQueried && Dir != DirEnd; ++I, ++Dir) {
387e5dd7070Spatrick if (Dir->size() > ShortDirectorySegment)
388e5dd7070Spatrick Dirs.push_back(*Dir);
389e5dd7070Spatrick Prefix = Filename.substr(0, Dir - DirEnd);
390e5dd7070Spatrick }
391e5dd7070Spatrick
392e5dd7070Spatrick // Now award points based on lookups into our various indexes.
393e5dd7070Spatrick DenseMap<size_t, int> Candidates; // Index -> score.
394e5dd7070Spatrick auto Award = [&](int Points, ArrayRef<SubstringAndIndex> Range) {
395e5dd7070Spatrick for (const auto &Entry : Range)
396e5dd7070Spatrick Candidates[Entry.second] += Points;
397e5dd7070Spatrick };
398e5dd7070Spatrick // Award one point if the file's basename is a prefix of the candidate,
399e5dd7070Spatrick // and another if it's an exact match (so exact matches get two points).
400e5dd7070Spatrick Award(1, indexLookup</*Prefix=*/true>(Stem, Stems));
401e5dd7070Spatrick Award(1, indexLookup</*Prefix=*/false>(Stem, Stems));
402e5dd7070Spatrick // For each of the last few directories in the Filename, award a point
403e5dd7070Spatrick // if it's present in the candidate.
404e5dd7070Spatrick for (StringRef Dir : Dirs)
405e5dd7070Spatrick Award(1, indexLookup</*Prefix=*/false>(Dir, Components));
406e5dd7070Spatrick // Award one more point if the whole rest of the path matches.
407e5dd7070Spatrick if (sys::path::root_directory(Prefix) != Prefix)
408e5dd7070Spatrick Award(1, indexLookup</*Prefix=*/true>(Prefix, Paths));
409e5dd7070Spatrick return Candidates;
410e5dd7070Spatrick }
411e5dd7070Spatrick
412e5dd7070Spatrick // Pick a single winner from the set of scored candidates.
413e5dd7070Spatrick // Returns (index, score).
pickWinner(const DenseMap<size_t,int> & Candidates,StringRef Filename,types::ID PreferredLanguage) const414e5dd7070Spatrick std::pair<size_t, int> pickWinner(const DenseMap<size_t, int> &Candidates,
415e5dd7070Spatrick StringRef Filename,
416e5dd7070Spatrick types::ID PreferredLanguage) const {
417e5dd7070Spatrick struct ScoredCandidate {
418e5dd7070Spatrick size_t Index;
419e5dd7070Spatrick bool Preferred;
420e5dd7070Spatrick int Points;
421e5dd7070Spatrick size_t PrefixLength;
422e5dd7070Spatrick };
423e5dd7070Spatrick // Choose the best candidate by (preferred, points, prefix length, alpha).
424e5dd7070Spatrick ScoredCandidate Best = {size_t(-1), false, 0, 0};
425e5dd7070Spatrick for (const auto &Candidate : Candidates) {
426e5dd7070Spatrick ScoredCandidate S;
427e5dd7070Spatrick S.Index = Candidate.first;
428e5dd7070Spatrick S.Preferred = PreferredLanguage == types::TY_INVALID ||
429e5dd7070Spatrick PreferredLanguage == Types[S.Index];
430e5dd7070Spatrick S.Points = Candidate.second;
431e5dd7070Spatrick if (!S.Preferred && Best.Preferred)
432e5dd7070Spatrick continue;
433e5dd7070Spatrick if (S.Preferred == Best.Preferred) {
434e5dd7070Spatrick if (S.Points < Best.Points)
435e5dd7070Spatrick continue;
436e5dd7070Spatrick if (S.Points == Best.Points) {
437e5dd7070Spatrick S.PrefixLength = matchingPrefix(Filename, Paths[S.Index].first);
438e5dd7070Spatrick if (S.PrefixLength < Best.PrefixLength)
439e5dd7070Spatrick continue;
440e5dd7070Spatrick // hidden heuristics should at least be deterministic!
441e5dd7070Spatrick if (S.PrefixLength == Best.PrefixLength)
442e5dd7070Spatrick if (S.Index > Best.Index)
443e5dd7070Spatrick continue;
444e5dd7070Spatrick }
445e5dd7070Spatrick }
446e5dd7070Spatrick // PrefixLength was only set above if actually needed for a tiebreak.
447e5dd7070Spatrick // But it definitely needs to be set to break ties in the future.
448e5dd7070Spatrick S.PrefixLength = matchingPrefix(Filename, Paths[S.Index].first);
449e5dd7070Spatrick Best = S;
450e5dd7070Spatrick }
451e5dd7070Spatrick // Edge case: no candidate got any points.
452e5dd7070Spatrick // We ignore PreferredLanguage at this point (not ideal).
453e5dd7070Spatrick if (Best.Index == size_t(-1))
454e5dd7070Spatrick return {longestMatch(Filename, Paths).second, 0};
455e5dd7070Spatrick return {Best.Index, Best.Points};
456e5dd7070Spatrick }
457e5dd7070Spatrick
458e5dd7070Spatrick // Returns the range within a sorted index that compares equal to Key.
459e5dd7070Spatrick // If Prefix is true, it's instead the range starting with Key.
460e5dd7070Spatrick template <bool Prefix>
461e5dd7070Spatrick ArrayRef<SubstringAndIndex>
indexLookup(StringRef Key,ArrayRef<SubstringAndIndex> Idx) const462e5dd7070Spatrick indexLookup(StringRef Key, ArrayRef<SubstringAndIndex> Idx) const {
463e5dd7070Spatrick // Use pointers as iteratiors to ease conversion of result to ArrayRef.
464e5dd7070Spatrick auto Range = std::equal_range(Idx.data(), Idx.data() + Idx.size(), Key,
465e5dd7070Spatrick Less<Prefix>());
466e5dd7070Spatrick return {Range.first, Range.second};
467e5dd7070Spatrick }
468e5dd7070Spatrick
469e5dd7070Spatrick // Performs a point lookup into a nonempty index, returning a longest match.
longestMatch(StringRef Key,ArrayRef<SubstringAndIndex> Idx) const470e5dd7070Spatrick SubstringAndIndex longestMatch(StringRef Key,
471e5dd7070Spatrick ArrayRef<SubstringAndIndex> Idx) const {
472e5dd7070Spatrick assert(!Idx.empty());
473e5dd7070Spatrick // Longest substring match will be adjacent to a direct lookup.
474e5dd7070Spatrick auto It = llvm::lower_bound(Idx, SubstringAndIndex{Key, 0});
475e5dd7070Spatrick if (It == Idx.begin())
476e5dd7070Spatrick return *It;
477e5dd7070Spatrick if (It == Idx.end())
478e5dd7070Spatrick return *--It;
479e5dd7070Spatrick // Have to choose between It and It-1
480e5dd7070Spatrick size_t Prefix = matchingPrefix(Key, It->first);
481e5dd7070Spatrick size_t PrevPrefix = matchingPrefix(Key, (It - 1)->first);
482e5dd7070Spatrick return Prefix > PrevPrefix ? *It : *--It;
483e5dd7070Spatrick }
484e5dd7070Spatrick
485e5dd7070Spatrick // Original paths, everything else is in lowercase.
486e5dd7070Spatrick std::vector<std::string> OriginalPaths;
487e5dd7070Spatrick BumpPtrAllocator Arena;
488e5dd7070Spatrick StringSaver Strings;
489e5dd7070Spatrick // Indexes of candidates by certain substrings.
490e5dd7070Spatrick // String is lowercase and sorted, index points into OriginalPaths.
491e5dd7070Spatrick std::vector<SubstringAndIndex> Paths; // Full path.
492e5dd7070Spatrick // Lang types obtained by guessing on the corresponding path. I-th element is
493e5dd7070Spatrick // a type for the I-th path.
494e5dd7070Spatrick std::vector<types::ID> Types;
495e5dd7070Spatrick std::vector<SubstringAndIndex> Stems; // Basename, without extension.
496e5dd7070Spatrick std::vector<SubstringAndIndex> Components; // Last path components.
497e5dd7070Spatrick };
498e5dd7070Spatrick
499e5dd7070Spatrick // The actual CompilationDatabase wrapper delegates to its inner database.
500e5dd7070Spatrick // If no match, looks up a proxy file in FileIndex and transfers its
501e5dd7070Spatrick // command to the requested file.
502e5dd7070Spatrick class InterpolatingCompilationDatabase : public CompilationDatabase {
503e5dd7070Spatrick public:
InterpolatingCompilationDatabase(std::unique_ptr<CompilationDatabase> Inner)504e5dd7070Spatrick InterpolatingCompilationDatabase(std::unique_ptr<CompilationDatabase> Inner)
505e5dd7070Spatrick : Inner(std::move(Inner)), Index(this->Inner->getAllFiles()) {}
506e5dd7070Spatrick
507e5dd7070Spatrick std::vector<CompileCommand>
getCompileCommands(StringRef Filename) const508e5dd7070Spatrick getCompileCommands(StringRef Filename) const override {
509e5dd7070Spatrick auto Known = Inner->getCompileCommands(Filename);
510e5dd7070Spatrick if (Index.empty() || !Known.empty())
511e5dd7070Spatrick return Known;
512e5dd7070Spatrick bool TypeCertain;
513e5dd7070Spatrick auto Lang = guessType(Filename, &TypeCertain);
514e5dd7070Spatrick if (!TypeCertain)
515e5dd7070Spatrick Lang = types::TY_INVALID;
516e5dd7070Spatrick auto ProxyCommands =
517e5dd7070Spatrick Inner->getCompileCommands(Index.chooseProxy(Filename, foldType(Lang)));
518e5dd7070Spatrick if (ProxyCommands.empty())
519e5dd7070Spatrick return {};
520a9ac8606Spatrick return {transferCompileCommand(std::move(ProxyCommands.front()), Filename)};
521e5dd7070Spatrick }
522e5dd7070Spatrick
getAllFiles() const523e5dd7070Spatrick std::vector<std::string> getAllFiles() const override {
524e5dd7070Spatrick return Inner->getAllFiles();
525e5dd7070Spatrick }
526e5dd7070Spatrick
getAllCompileCommands() const527e5dd7070Spatrick std::vector<CompileCommand> getAllCompileCommands() const override {
528e5dd7070Spatrick return Inner->getAllCompileCommands();
529e5dd7070Spatrick }
530e5dd7070Spatrick
531e5dd7070Spatrick private:
532e5dd7070Spatrick std::unique_ptr<CompilationDatabase> Inner;
533e5dd7070Spatrick FileIndex Index;
534e5dd7070Spatrick };
535e5dd7070Spatrick
536e5dd7070Spatrick } // namespace
537e5dd7070Spatrick
538e5dd7070Spatrick std::unique_ptr<CompilationDatabase>
inferMissingCompileCommands(std::unique_ptr<CompilationDatabase> Inner)539e5dd7070Spatrick inferMissingCompileCommands(std::unique_ptr<CompilationDatabase> Inner) {
540e5dd7070Spatrick return std::make_unique<InterpolatingCompilationDatabase>(std::move(Inner));
541e5dd7070Spatrick }
542e5dd7070Spatrick
transferCompileCommand(CompileCommand Cmd,StringRef Filename)543a9ac8606Spatrick tooling::CompileCommand transferCompileCommand(CompileCommand Cmd,
544a9ac8606Spatrick StringRef Filename) {
545a9ac8606Spatrick return TransferableCommand(std::move(Cmd)).transferTo(Filename);
546a9ac8606Spatrick }
547a9ac8606Spatrick
548e5dd7070Spatrick } // namespace tooling
549e5dd7070Spatrick } // namespace clang
550