xref: /openbsd-src/gnu/llvm/clang/lib/Tooling/InterpolatingCompilationDatabase.cpp (revision 12c855180aad702bbcca06e0398d774beeafb155)
1e5dd7070Spatrick //===- InterpolatingCompilationDatabase.cpp ---------------------*- C++ -*-===//
2e5dd7070Spatrick //
3e5dd7070Spatrick // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4e5dd7070Spatrick // See https://llvm.org/LICENSE.txt for license information.
5e5dd7070Spatrick // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6e5dd7070Spatrick //
7e5dd7070Spatrick //===----------------------------------------------------------------------===//
8e5dd7070Spatrick //
9e5dd7070Spatrick // InterpolatingCompilationDatabase wraps another CompilationDatabase and
10e5dd7070Spatrick // attempts to heuristically determine appropriate compile commands for files
11e5dd7070Spatrick // that are not included, such as headers or newly created files.
12e5dd7070Spatrick //
13e5dd7070Spatrick // Motivating cases include:
14e5dd7070Spatrick //   Header files that live next to their implementation files. These typically
15e5dd7070Spatrick // share a base filename. (libclang/CXString.h, libclang/CXString.cpp).
16e5dd7070Spatrick //   Some projects separate headers from includes. Filenames still typically
17e5dd7070Spatrick // match, maybe other path segments too. (include/llvm/IR/Use.h, lib/IR/Use.cc).
18e5dd7070Spatrick //   Matches are sometimes only approximate (Sema.h, SemaDecl.cpp). This goes
19e5dd7070Spatrick // for directories too (Support/Unix/Process.inc, lib/Support/Process.cpp).
20e5dd7070Spatrick //   Even if we can't find a "right" compile command, even a random one from
21e5dd7070Spatrick // the project will tend to get important flags like -I and -x right.
22e5dd7070Spatrick //
23e5dd7070Spatrick // We "borrow" the compile command for the closest available file:
24e5dd7070Spatrick //   - points are awarded if the filename matches (ignoring extension)
25e5dd7070Spatrick //   - points are awarded if the directory structure matches
26e5dd7070Spatrick //   - ties are broken by length of path prefix match
27e5dd7070Spatrick //
28e5dd7070Spatrick // The compile command is adjusted, replacing the filename and removing output
29e5dd7070Spatrick // file arguments. The -x and -std flags may be affected too.
30e5dd7070Spatrick //
31e5dd7070Spatrick // Source language is a tricky issue: is it OK to use a .c file's command
32e5dd7070Spatrick // for building a .cc file? What language is a .h file in?
33e5dd7070Spatrick //   - We only consider compile commands for c-family languages as candidates.
34e5dd7070Spatrick //   - For files whose language is implied by the filename (e.g. .m, .hpp)
35e5dd7070Spatrick //     we prefer candidates from the same language.
36e5dd7070Spatrick //     If we must cross languages, we drop any -x and -std flags.
37e5dd7070Spatrick //   - For .h files, candidates from any c-family language are acceptable.
38e5dd7070Spatrick //     We use the candidate's language, inserting  e.g. -x c++-header.
39e5dd7070Spatrick //
40e5dd7070Spatrick // This class is only useful when wrapping databases that can enumerate all
41e5dd7070Spatrick // their compile commands. If getAllFilenames() is empty, no inference occurs.
42e5dd7070Spatrick //
43e5dd7070Spatrick //===----------------------------------------------------------------------===//
44e5dd7070Spatrick 
45e5dd7070Spatrick #include "clang/Basic/LangStandard.h"
46a9ac8606Spatrick #include "clang/Driver/Driver.h"
47e5dd7070Spatrick #include "clang/Driver/Options.h"
48e5dd7070Spatrick #include "clang/Driver/Types.h"
49e5dd7070Spatrick #include "clang/Tooling/CompilationDatabase.h"
50a9ac8606Spatrick #include "llvm/ADT/ArrayRef.h"
51e5dd7070Spatrick #include "llvm/ADT/DenseMap.h"
52e5dd7070Spatrick #include "llvm/ADT/StringExtras.h"
53e5dd7070Spatrick #include "llvm/Option/ArgList.h"
54e5dd7070Spatrick #include "llvm/Option/OptTable.h"
55e5dd7070Spatrick #include "llvm/Support/Debug.h"
56e5dd7070Spatrick #include "llvm/Support/Path.h"
57e5dd7070Spatrick #include "llvm/Support/StringSaver.h"
58e5dd7070Spatrick #include "llvm/Support/raw_ostream.h"
59e5dd7070Spatrick #include <memory>
60*12c85518Srobert #include <optional>
61e5dd7070Spatrick 
62e5dd7070Spatrick namespace clang {
63e5dd7070Spatrick namespace tooling {
64e5dd7070Spatrick namespace {
65e5dd7070Spatrick using namespace llvm;
66e5dd7070Spatrick namespace types = clang::driver::types;
67e5dd7070Spatrick namespace path = llvm::sys::path;
68e5dd7070Spatrick 
69e5dd7070Spatrick // The length of the prefix these two strings have in common.
matchingPrefix(StringRef L,StringRef R)70e5dd7070Spatrick size_t matchingPrefix(StringRef L, StringRef R) {
71e5dd7070Spatrick   size_t Limit = std::min(L.size(), R.size());
72e5dd7070Spatrick   for (size_t I = 0; I < Limit; ++I)
73e5dd7070Spatrick     if (L[I] != R[I])
74e5dd7070Spatrick       return I;
75e5dd7070Spatrick   return Limit;
76e5dd7070Spatrick }
77e5dd7070Spatrick 
78e5dd7070Spatrick // A comparator for searching SubstringWithIndexes with std::equal_range etc.
79e5dd7070Spatrick // Optionaly prefix semantics: compares equal if the key is a prefix.
80e5dd7070Spatrick template <bool Prefix> struct Less {
operator ()clang::tooling::__anone90b9a300111::Less81e5dd7070Spatrick   bool operator()(StringRef Key, std::pair<StringRef, size_t> Value) const {
82e5dd7070Spatrick     StringRef V = Prefix ? Value.first.substr(0, Key.size()) : Value.first;
83e5dd7070Spatrick     return Key < V;
84e5dd7070Spatrick   }
operator ()clang::tooling::__anone90b9a300111::Less85e5dd7070Spatrick   bool operator()(std::pair<StringRef, size_t> Value, StringRef Key) const {
86e5dd7070Spatrick     StringRef V = Prefix ? Value.first.substr(0, Key.size()) : Value.first;
87e5dd7070Spatrick     return V < Key;
88e5dd7070Spatrick   }
89e5dd7070Spatrick };
90e5dd7070Spatrick 
91e5dd7070Spatrick // Infer type from filename. If we might have gotten it wrong, set *Certain.
92e5dd7070Spatrick // *.h will be inferred as a C header, but not certain.
guessType(StringRef Filename,bool * Certain=nullptr)93e5dd7070Spatrick types::ID guessType(StringRef Filename, bool *Certain = nullptr) {
94e5dd7070Spatrick   // path::extension is ".cpp", lookupTypeForExtension wants "cpp".
95e5dd7070Spatrick   auto Lang =
96e5dd7070Spatrick       types::lookupTypeForExtension(path::extension(Filename).substr(1));
97e5dd7070Spatrick   if (Certain)
98e5dd7070Spatrick     *Certain = Lang != types::TY_CHeader && Lang != types::TY_INVALID;
99e5dd7070Spatrick   return Lang;
100e5dd7070Spatrick }
101e5dd7070Spatrick 
102e5dd7070Spatrick // Return Lang as one of the canonical supported types.
103e5dd7070Spatrick // e.g. c-header --> c; fortran --> TY_INVALID
foldType(types::ID Lang)104e5dd7070Spatrick static types::ID foldType(types::ID Lang) {
105e5dd7070Spatrick   switch (Lang) {
106e5dd7070Spatrick   case types::TY_C:
107e5dd7070Spatrick   case types::TY_CHeader:
108e5dd7070Spatrick     return types::TY_C;
109e5dd7070Spatrick   case types::TY_ObjC:
110e5dd7070Spatrick   case types::TY_ObjCHeader:
111e5dd7070Spatrick     return types::TY_ObjC;
112e5dd7070Spatrick   case types::TY_CXX:
113e5dd7070Spatrick   case types::TY_CXXHeader:
114e5dd7070Spatrick     return types::TY_CXX;
115e5dd7070Spatrick   case types::TY_ObjCXX:
116e5dd7070Spatrick   case types::TY_ObjCXXHeader:
117e5dd7070Spatrick     return types::TY_ObjCXX;
118ec727ea7Spatrick   case types::TY_CUDA:
119ec727ea7Spatrick   case types::TY_CUDA_DEVICE:
120ec727ea7Spatrick     return types::TY_CUDA;
121e5dd7070Spatrick   default:
122e5dd7070Spatrick     return types::TY_INVALID;
123e5dd7070Spatrick   }
124e5dd7070Spatrick }
125e5dd7070Spatrick 
126e5dd7070Spatrick // A CompileCommand that can be applied to another file.
127e5dd7070Spatrick struct TransferableCommand {
128e5dd7070Spatrick   // Flags that should not apply to all files are stripped from CommandLine.
129e5dd7070Spatrick   CompileCommand Cmd;
130e5dd7070Spatrick   // Language detected from -x or the filename. Never TY_INVALID.
131*12c85518Srobert   std::optional<types::ID> Type;
132e5dd7070Spatrick   // Standard specified by -std.
133e5dd7070Spatrick   LangStandard::Kind Std = LangStandard::lang_unspecified;
134e5dd7070Spatrick   // Whether the command line is for the cl-compatible driver.
135e5dd7070Spatrick   bool ClangCLMode;
136e5dd7070Spatrick 
TransferableCommandclang::tooling::__anone90b9a300111::TransferableCommand137e5dd7070Spatrick   TransferableCommand(CompileCommand C)
138a9ac8606Spatrick       : Cmd(std::move(C)), Type(guessType(Cmd.Filename)) {
139e5dd7070Spatrick     std::vector<std::string> OldArgs = std::move(Cmd.CommandLine);
140e5dd7070Spatrick     Cmd.CommandLine.clear();
141e5dd7070Spatrick 
142e5dd7070Spatrick     // Wrap the old arguments in an InputArgList.
143e5dd7070Spatrick     llvm::opt::InputArgList ArgList;
144e5dd7070Spatrick     {
145e5dd7070Spatrick       SmallVector<const char *, 16> TmpArgv;
146e5dd7070Spatrick       for (const std::string &S : OldArgs)
147e5dd7070Spatrick         TmpArgv.push_back(S.c_str());
148a9ac8606Spatrick       ClangCLMode = !TmpArgv.empty() &&
149a9ac8606Spatrick                     driver::IsClangCL(driver::getDriverMode(
150*12c85518Srobert                         TmpArgv.front(), llvm::ArrayRef(TmpArgv).slice(1)));
151e5dd7070Spatrick       ArgList = {TmpArgv.begin(), TmpArgv.end()};
152e5dd7070Spatrick     }
153e5dd7070Spatrick 
154e5dd7070Spatrick     // Parse the old args in order to strip out and record unwanted flags.
155e5dd7070Spatrick     // We parse each argument individually so that we can retain the exact
156e5dd7070Spatrick     // spelling of each argument; re-rendering is lossy for aliased flags.
157e5dd7070Spatrick     // E.g. in CL mode, /W4 maps to -Wall.
158e5dd7070Spatrick     auto &OptTable = clang::driver::getDriverOptTable();
159e5dd7070Spatrick     if (!OldArgs.empty())
160e5dd7070Spatrick       Cmd.CommandLine.emplace_back(OldArgs.front());
161e5dd7070Spatrick     for (unsigned Pos = 1; Pos < OldArgs.size();) {
162e5dd7070Spatrick       using namespace driver::options;
163e5dd7070Spatrick 
164e5dd7070Spatrick       const unsigned OldPos = Pos;
165e5dd7070Spatrick       std::unique_ptr<llvm::opt::Arg> Arg(OptTable.ParseOneArg(
166e5dd7070Spatrick           ArgList, Pos,
167*12c85518Srobert           /* Include */ ClangCLMode ? CoreOption | CLOption | CLDXCOption : 0,
168*12c85518Srobert           /* Exclude */ ClangCLMode ? 0 : CLOption | CLDXCOption));
169e5dd7070Spatrick 
170e5dd7070Spatrick       if (!Arg)
171e5dd7070Spatrick         continue;
172e5dd7070Spatrick 
173e5dd7070Spatrick       const llvm::opt::Option &Opt = Arg->getOption();
174e5dd7070Spatrick 
175e5dd7070Spatrick       // Strip input and output files.
176e5dd7070Spatrick       if (Opt.matches(OPT_INPUT) || Opt.matches(OPT_o) ||
177e5dd7070Spatrick           (ClangCLMode && (Opt.matches(OPT__SLASH_Fa) ||
178e5dd7070Spatrick                            Opt.matches(OPT__SLASH_Fe) ||
179e5dd7070Spatrick                            Opt.matches(OPT__SLASH_Fi) ||
180e5dd7070Spatrick                            Opt.matches(OPT__SLASH_Fo))))
181e5dd7070Spatrick         continue;
182e5dd7070Spatrick 
183a9ac8606Spatrick       // ...including when the inputs are passed after --.
184a9ac8606Spatrick       if (Opt.matches(OPT__DASH_DASH))
185a9ac8606Spatrick         break;
186a9ac8606Spatrick 
187e5dd7070Spatrick       // Strip -x, but record the overridden language.
188e5dd7070Spatrick       if (const auto GivenType = tryParseTypeArg(*Arg)) {
189e5dd7070Spatrick         Type = *GivenType;
190e5dd7070Spatrick         continue;
191e5dd7070Spatrick       }
192e5dd7070Spatrick 
193e5dd7070Spatrick       // Strip -std, but record the value.
194e5dd7070Spatrick       if (const auto GivenStd = tryParseStdArg(*Arg)) {
195e5dd7070Spatrick         if (*GivenStd != LangStandard::lang_unspecified)
196e5dd7070Spatrick           Std = *GivenStd;
197e5dd7070Spatrick         continue;
198e5dd7070Spatrick       }
199e5dd7070Spatrick 
200e5dd7070Spatrick       Cmd.CommandLine.insert(Cmd.CommandLine.end(),
201e5dd7070Spatrick                              OldArgs.data() + OldPos, OldArgs.data() + Pos);
202e5dd7070Spatrick     }
203e5dd7070Spatrick 
204e5dd7070Spatrick     // Make use of -std iff -x was missing.
205e5dd7070Spatrick     if (Type == types::TY_INVALID && Std != LangStandard::lang_unspecified)
206e5dd7070Spatrick       Type = toType(LangStandard::getLangStandardForKind(Std).getLanguage());
207e5dd7070Spatrick     Type = foldType(*Type);
208e5dd7070Spatrick     // The contract is to store None instead of TY_INVALID.
209e5dd7070Spatrick     if (Type == types::TY_INVALID)
210*12c85518Srobert       Type = std::nullopt;
211e5dd7070Spatrick   }
212e5dd7070Spatrick 
213e5dd7070Spatrick   // Produce a CompileCommand for \p filename, based on this one.
214a9ac8606Spatrick   // (This consumes the TransferableCommand just to avoid copying Cmd).
transferToclang::tooling::__anone90b9a300111::TransferableCommand215a9ac8606Spatrick   CompileCommand transferTo(StringRef Filename) && {
216a9ac8606Spatrick     CompileCommand Result = std::move(Cmd);
217a9ac8606Spatrick     Result.Heuristic = "inferred from " + Result.Filename;
218ec727ea7Spatrick     Result.Filename = std::string(Filename);
219e5dd7070Spatrick     bool TypeCertain;
220e5dd7070Spatrick     auto TargetType = guessType(Filename, &TypeCertain);
221e5dd7070Spatrick     // If the filename doesn't determine the language (.h), transfer with -x.
222e5dd7070Spatrick     if ((!TargetType || !TypeCertain) && Type) {
223e5dd7070Spatrick       // Use *Type, or its header variant if the file is a header.
224e5dd7070Spatrick       // Treat no/invalid extension as header (e.g. C++ standard library).
225e5dd7070Spatrick       TargetType =
226e5dd7070Spatrick           (!TargetType || types::onlyPrecompileType(TargetType)) // header?
227e5dd7070Spatrick               ? types::lookupHeaderTypeForSourceType(*Type)
228e5dd7070Spatrick               : *Type;
229e5dd7070Spatrick       if (ClangCLMode) {
230e5dd7070Spatrick         const StringRef Flag = toCLFlag(TargetType);
231e5dd7070Spatrick         if (!Flag.empty())
232ec727ea7Spatrick           Result.CommandLine.push_back(std::string(Flag));
233e5dd7070Spatrick       } else {
234e5dd7070Spatrick         Result.CommandLine.push_back("-x");
235e5dd7070Spatrick         Result.CommandLine.push_back(types::getTypeName(TargetType));
236e5dd7070Spatrick       }
237e5dd7070Spatrick     }
238e5dd7070Spatrick     // --std flag may only be transferred if the language is the same.
239e5dd7070Spatrick     // We may consider "translating" these, e.g. c++11 -> c11.
240e5dd7070Spatrick     if (Std != LangStandard::lang_unspecified && foldType(TargetType) == Type) {
241e5dd7070Spatrick       Result.CommandLine.emplace_back((
242e5dd7070Spatrick           llvm::Twine(ClangCLMode ? "/std:" : "-std=") +
243e5dd7070Spatrick           LangStandard::getLangStandardForKind(Std).getName()).str());
244e5dd7070Spatrick     }
245a9ac8606Spatrick     Result.CommandLine.push_back("--");
246ec727ea7Spatrick     Result.CommandLine.push_back(std::string(Filename));
247e5dd7070Spatrick     return Result;
248e5dd7070Spatrick   }
249e5dd7070Spatrick 
250e5dd7070Spatrick private:
251e5dd7070Spatrick   // Map the language from the --std flag to that of the -x flag.
toTypeclang::tooling::__anone90b9a300111::TransferableCommand252e5dd7070Spatrick   static types::ID toType(Language Lang) {
253e5dd7070Spatrick     switch (Lang) {
254e5dd7070Spatrick     case Language::C:
255e5dd7070Spatrick       return types::TY_C;
256e5dd7070Spatrick     case Language::CXX:
257e5dd7070Spatrick       return types::TY_CXX;
258e5dd7070Spatrick     case Language::ObjC:
259e5dd7070Spatrick       return types::TY_ObjC;
260e5dd7070Spatrick     case Language::ObjCXX:
261e5dd7070Spatrick       return types::TY_ObjCXX;
262e5dd7070Spatrick     default:
263e5dd7070Spatrick       return types::TY_INVALID;
264e5dd7070Spatrick     }
265e5dd7070Spatrick   }
266e5dd7070Spatrick 
267e5dd7070Spatrick   // Convert a file type to the matching CL-style type flag.
toCLFlagclang::tooling::__anone90b9a300111::TransferableCommand268e5dd7070Spatrick   static StringRef toCLFlag(types::ID Type) {
269e5dd7070Spatrick     switch (Type) {
270e5dd7070Spatrick     case types::TY_C:
271e5dd7070Spatrick     case types::TY_CHeader:
272e5dd7070Spatrick       return "/TC";
273e5dd7070Spatrick     case types::TY_CXX:
274e5dd7070Spatrick     case types::TY_CXXHeader:
275e5dd7070Spatrick       return "/TP";
276e5dd7070Spatrick     default:
277e5dd7070Spatrick       return StringRef();
278e5dd7070Spatrick     }
279e5dd7070Spatrick   }
280e5dd7070Spatrick 
281e5dd7070Spatrick   // Try to interpret the argument as a type specifier, e.g. '-x'.
tryParseTypeArgclang::tooling::__anone90b9a300111::TransferableCommand282*12c85518Srobert   std::optional<types::ID> tryParseTypeArg(const llvm::opt::Arg &Arg) {
283e5dd7070Spatrick     const llvm::opt::Option &Opt = Arg.getOption();
284e5dd7070Spatrick     using namespace driver::options;
285e5dd7070Spatrick     if (ClangCLMode) {
286e5dd7070Spatrick       if (Opt.matches(OPT__SLASH_TC) || Opt.matches(OPT__SLASH_Tc))
287e5dd7070Spatrick         return types::TY_C;
288e5dd7070Spatrick       if (Opt.matches(OPT__SLASH_TP) || Opt.matches(OPT__SLASH_Tp))
289e5dd7070Spatrick         return types::TY_CXX;
290e5dd7070Spatrick     } else {
291e5dd7070Spatrick       if (Opt.matches(driver::options::OPT_x))
292e5dd7070Spatrick         return types::lookupTypeForTypeSpecifier(Arg.getValue());
293e5dd7070Spatrick     }
294*12c85518Srobert     return std::nullopt;
295e5dd7070Spatrick   }
296e5dd7070Spatrick 
297e5dd7070Spatrick   // Try to interpret the argument as '-std='.
tryParseStdArgclang::tooling::__anone90b9a300111::TransferableCommand298*12c85518Srobert   std::optional<LangStandard::Kind> tryParseStdArg(const llvm::opt::Arg &Arg) {
299e5dd7070Spatrick     using namespace driver::options;
300e5dd7070Spatrick     if (Arg.getOption().matches(ClangCLMode ? OPT__SLASH_std : OPT_std_EQ))
301e5dd7070Spatrick       return LangStandard::getLangKind(Arg.getValue());
302*12c85518Srobert     return std::nullopt;
303e5dd7070Spatrick   }
304e5dd7070Spatrick };
305e5dd7070Spatrick 
306e5dd7070Spatrick // Given a filename, FileIndex picks the best matching file from the underlying
307e5dd7070Spatrick // DB. This is the proxy file whose CompileCommand will be reused. The
308e5dd7070Spatrick // heuristics incorporate file name, extension, and directory structure.
309e5dd7070Spatrick // Strategy:
310e5dd7070Spatrick // - Build indexes of each of the substrings we want to look up by.
311e5dd7070Spatrick //   These indexes are just sorted lists of the substrings.
312e5dd7070Spatrick // - Each criterion corresponds to a range lookup into the index, so we only
313e5dd7070Spatrick //   need O(log N) string comparisons to determine scores.
314e5dd7070Spatrick //
315e5dd7070Spatrick // Apart from path proximity signals, also takes file extensions into account
316e5dd7070Spatrick // when scoring the candidates.
317e5dd7070Spatrick class FileIndex {
318e5dd7070Spatrick public:
FileIndex(std::vector<std::string> Files)319e5dd7070Spatrick   FileIndex(std::vector<std::string> Files)
320e5dd7070Spatrick       : OriginalPaths(std::move(Files)), Strings(Arena) {
321e5dd7070Spatrick     // Sort commands by filename for determinism (index is a tiebreaker later).
322e5dd7070Spatrick     llvm::sort(OriginalPaths);
323e5dd7070Spatrick     Paths.reserve(OriginalPaths.size());
324e5dd7070Spatrick     Types.reserve(OriginalPaths.size());
325e5dd7070Spatrick     Stems.reserve(OriginalPaths.size());
326e5dd7070Spatrick     for (size_t I = 0; I < OriginalPaths.size(); ++I) {
327e5dd7070Spatrick       StringRef Path = Strings.save(StringRef(OriginalPaths[I]).lower());
328e5dd7070Spatrick 
329e5dd7070Spatrick       Paths.emplace_back(Path, I);
330*12c85518Srobert       Types.push_back(foldType(guessType(OriginalPaths[I])));
331e5dd7070Spatrick       Stems.emplace_back(sys::path::stem(Path), I);
332e5dd7070Spatrick       auto Dir = ++sys::path::rbegin(Path), DirEnd = sys::path::rend(Path);
333e5dd7070Spatrick       for (int J = 0; J < DirectorySegmentsIndexed && Dir != DirEnd; ++J, ++Dir)
334e5dd7070Spatrick         if (Dir->size() > ShortDirectorySegment) // not trivial ones
335e5dd7070Spatrick           Components.emplace_back(*Dir, I);
336e5dd7070Spatrick     }
337e5dd7070Spatrick     llvm::sort(Paths);
338e5dd7070Spatrick     llvm::sort(Stems);
339e5dd7070Spatrick     llvm::sort(Components);
340e5dd7070Spatrick   }
341e5dd7070Spatrick 
empty() const342e5dd7070Spatrick   bool empty() const { return Paths.empty(); }
343e5dd7070Spatrick 
344e5dd7070Spatrick   // Returns the path for the file that best fits OriginalFilename.
345e5dd7070Spatrick   // Candidates with extensions matching PreferLanguage will be chosen over
346e5dd7070Spatrick   // others (unless it's TY_INVALID, or all candidates are bad).
chooseProxy(StringRef OriginalFilename,types::ID PreferLanguage) const347e5dd7070Spatrick   StringRef chooseProxy(StringRef OriginalFilename,
348e5dd7070Spatrick                         types::ID PreferLanguage) const {
349e5dd7070Spatrick     assert(!empty() && "need at least one candidate!");
350e5dd7070Spatrick     std::string Filename = OriginalFilename.lower();
351e5dd7070Spatrick     auto Candidates = scoreCandidates(Filename);
352e5dd7070Spatrick     std::pair<size_t, int> Best =
353e5dd7070Spatrick         pickWinner(Candidates, Filename, PreferLanguage);
354e5dd7070Spatrick 
355e5dd7070Spatrick     DEBUG_WITH_TYPE(
356e5dd7070Spatrick         "interpolate",
357e5dd7070Spatrick         llvm::dbgs() << "interpolate: chose " << OriginalPaths[Best.first]
358e5dd7070Spatrick                      << " as proxy for " << OriginalFilename << " preferring "
359e5dd7070Spatrick                      << (PreferLanguage == types::TY_INVALID
360e5dd7070Spatrick                              ? "none"
361e5dd7070Spatrick                              : types::getTypeName(PreferLanguage))
362e5dd7070Spatrick                      << " score=" << Best.second << "\n");
363e5dd7070Spatrick     return OriginalPaths[Best.first];
364e5dd7070Spatrick   }
365e5dd7070Spatrick 
366e5dd7070Spatrick private:
367e5dd7070Spatrick   using SubstringAndIndex = std::pair<StringRef, size_t>;
368e5dd7070Spatrick   // Directory matching parameters: we look at the last two segments of the
369e5dd7070Spatrick   // parent directory (usually the semantically significant ones in practice).
370e5dd7070Spatrick   // We search only the last four of each candidate (for efficiency).
371e5dd7070Spatrick   constexpr static int DirectorySegmentsIndexed = 4;
372e5dd7070Spatrick   constexpr static int DirectorySegmentsQueried = 2;
373e5dd7070Spatrick   constexpr static int ShortDirectorySegment = 1; // Only look at longer names.
374e5dd7070Spatrick 
375e5dd7070Spatrick   // Award points to candidate entries that should be considered for the file.
376e5dd7070Spatrick   // Returned keys are indexes into paths, and the values are (nonzero) scores.
scoreCandidates(StringRef Filename) const377e5dd7070Spatrick   DenseMap<size_t, int> scoreCandidates(StringRef Filename) const {
378e5dd7070Spatrick     // Decompose Filename into the parts we care about.
379e5dd7070Spatrick     // /some/path/complicated/project/Interesting.h
380e5dd7070Spatrick     // [-prefix--][---dir---] [-dir-] [--stem---]
381e5dd7070Spatrick     StringRef Stem = sys::path::stem(Filename);
382e5dd7070Spatrick     llvm::SmallVector<StringRef, DirectorySegmentsQueried> Dirs;
383e5dd7070Spatrick     llvm::StringRef Prefix;
384e5dd7070Spatrick     auto Dir = ++sys::path::rbegin(Filename),
385e5dd7070Spatrick          DirEnd = sys::path::rend(Filename);
386e5dd7070Spatrick     for (int I = 0; I < DirectorySegmentsQueried && Dir != DirEnd; ++I, ++Dir) {
387e5dd7070Spatrick       if (Dir->size() > ShortDirectorySegment)
388e5dd7070Spatrick         Dirs.push_back(*Dir);
389e5dd7070Spatrick       Prefix = Filename.substr(0, Dir - DirEnd);
390e5dd7070Spatrick     }
391e5dd7070Spatrick 
392e5dd7070Spatrick     // Now award points based on lookups into our various indexes.
393e5dd7070Spatrick     DenseMap<size_t, int> Candidates; // Index -> score.
394e5dd7070Spatrick     auto Award = [&](int Points, ArrayRef<SubstringAndIndex> Range) {
395e5dd7070Spatrick       for (const auto &Entry : Range)
396e5dd7070Spatrick         Candidates[Entry.second] += Points;
397e5dd7070Spatrick     };
398e5dd7070Spatrick     // Award one point if the file's basename is a prefix of the candidate,
399e5dd7070Spatrick     // and another if it's an exact match (so exact matches get two points).
400e5dd7070Spatrick     Award(1, indexLookup</*Prefix=*/true>(Stem, Stems));
401e5dd7070Spatrick     Award(1, indexLookup</*Prefix=*/false>(Stem, Stems));
402e5dd7070Spatrick     // For each of the last few directories in the Filename, award a point
403e5dd7070Spatrick     // if it's present in the candidate.
404e5dd7070Spatrick     for (StringRef Dir : Dirs)
405e5dd7070Spatrick       Award(1, indexLookup</*Prefix=*/false>(Dir, Components));
406e5dd7070Spatrick     // Award one more point if the whole rest of the path matches.
407e5dd7070Spatrick     if (sys::path::root_directory(Prefix) != Prefix)
408e5dd7070Spatrick       Award(1, indexLookup</*Prefix=*/true>(Prefix, Paths));
409e5dd7070Spatrick     return Candidates;
410e5dd7070Spatrick   }
411e5dd7070Spatrick 
412e5dd7070Spatrick   // Pick a single winner from the set of scored candidates.
413e5dd7070Spatrick   // Returns (index, score).
pickWinner(const DenseMap<size_t,int> & Candidates,StringRef Filename,types::ID PreferredLanguage) const414e5dd7070Spatrick   std::pair<size_t, int> pickWinner(const DenseMap<size_t, int> &Candidates,
415e5dd7070Spatrick                                     StringRef Filename,
416e5dd7070Spatrick                                     types::ID PreferredLanguage) const {
417e5dd7070Spatrick     struct ScoredCandidate {
418e5dd7070Spatrick       size_t Index;
419e5dd7070Spatrick       bool Preferred;
420e5dd7070Spatrick       int Points;
421e5dd7070Spatrick       size_t PrefixLength;
422e5dd7070Spatrick     };
423e5dd7070Spatrick     // Choose the best candidate by (preferred, points, prefix length, alpha).
424e5dd7070Spatrick     ScoredCandidate Best = {size_t(-1), false, 0, 0};
425e5dd7070Spatrick     for (const auto &Candidate : Candidates) {
426e5dd7070Spatrick       ScoredCandidate S;
427e5dd7070Spatrick       S.Index = Candidate.first;
428e5dd7070Spatrick       S.Preferred = PreferredLanguage == types::TY_INVALID ||
429e5dd7070Spatrick                     PreferredLanguage == Types[S.Index];
430e5dd7070Spatrick       S.Points = Candidate.second;
431e5dd7070Spatrick       if (!S.Preferred && Best.Preferred)
432e5dd7070Spatrick         continue;
433e5dd7070Spatrick       if (S.Preferred == Best.Preferred) {
434e5dd7070Spatrick         if (S.Points < Best.Points)
435e5dd7070Spatrick           continue;
436e5dd7070Spatrick         if (S.Points == Best.Points) {
437e5dd7070Spatrick           S.PrefixLength = matchingPrefix(Filename, Paths[S.Index].first);
438e5dd7070Spatrick           if (S.PrefixLength < Best.PrefixLength)
439e5dd7070Spatrick             continue;
440e5dd7070Spatrick           // hidden heuristics should at least be deterministic!
441e5dd7070Spatrick           if (S.PrefixLength == Best.PrefixLength)
442e5dd7070Spatrick             if (S.Index > Best.Index)
443e5dd7070Spatrick               continue;
444e5dd7070Spatrick         }
445e5dd7070Spatrick       }
446e5dd7070Spatrick       // PrefixLength was only set above if actually needed for a tiebreak.
447e5dd7070Spatrick       // But it definitely needs to be set to break ties in the future.
448e5dd7070Spatrick       S.PrefixLength = matchingPrefix(Filename, Paths[S.Index].first);
449e5dd7070Spatrick       Best = S;
450e5dd7070Spatrick     }
451e5dd7070Spatrick     // Edge case: no candidate got any points.
452e5dd7070Spatrick     // We ignore PreferredLanguage at this point (not ideal).
453e5dd7070Spatrick     if (Best.Index == size_t(-1))
454e5dd7070Spatrick       return {longestMatch(Filename, Paths).second, 0};
455e5dd7070Spatrick     return {Best.Index, Best.Points};
456e5dd7070Spatrick   }
457e5dd7070Spatrick 
458e5dd7070Spatrick   // Returns the range within a sorted index that compares equal to Key.
459e5dd7070Spatrick   // If Prefix is true, it's instead the range starting with Key.
460e5dd7070Spatrick   template <bool Prefix>
461e5dd7070Spatrick   ArrayRef<SubstringAndIndex>
indexLookup(StringRef Key,ArrayRef<SubstringAndIndex> Idx) const462e5dd7070Spatrick   indexLookup(StringRef Key, ArrayRef<SubstringAndIndex> Idx) const {
463e5dd7070Spatrick     // Use pointers as iteratiors to ease conversion of result to ArrayRef.
464e5dd7070Spatrick     auto Range = std::equal_range(Idx.data(), Idx.data() + Idx.size(), Key,
465e5dd7070Spatrick                                   Less<Prefix>());
466e5dd7070Spatrick     return {Range.first, Range.second};
467e5dd7070Spatrick   }
468e5dd7070Spatrick 
469e5dd7070Spatrick   // Performs a point lookup into a nonempty index, returning a longest match.
longestMatch(StringRef Key,ArrayRef<SubstringAndIndex> Idx) const470e5dd7070Spatrick   SubstringAndIndex longestMatch(StringRef Key,
471e5dd7070Spatrick                                  ArrayRef<SubstringAndIndex> Idx) const {
472e5dd7070Spatrick     assert(!Idx.empty());
473e5dd7070Spatrick     // Longest substring match will be adjacent to a direct lookup.
474e5dd7070Spatrick     auto It = llvm::lower_bound(Idx, SubstringAndIndex{Key, 0});
475e5dd7070Spatrick     if (It == Idx.begin())
476e5dd7070Spatrick       return *It;
477e5dd7070Spatrick     if (It == Idx.end())
478e5dd7070Spatrick       return *--It;
479e5dd7070Spatrick     // Have to choose between It and It-1
480e5dd7070Spatrick     size_t Prefix = matchingPrefix(Key, It->first);
481e5dd7070Spatrick     size_t PrevPrefix = matchingPrefix(Key, (It - 1)->first);
482e5dd7070Spatrick     return Prefix > PrevPrefix ? *It : *--It;
483e5dd7070Spatrick   }
484e5dd7070Spatrick 
485e5dd7070Spatrick   // Original paths, everything else is in lowercase.
486e5dd7070Spatrick   std::vector<std::string> OriginalPaths;
487e5dd7070Spatrick   BumpPtrAllocator Arena;
488e5dd7070Spatrick   StringSaver Strings;
489e5dd7070Spatrick   // Indexes of candidates by certain substrings.
490e5dd7070Spatrick   // String is lowercase and sorted, index points into OriginalPaths.
491e5dd7070Spatrick   std::vector<SubstringAndIndex> Paths;      // Full path.
492e5dd7070Spatrick   // Lang types obtained by guessing on the corresponding path. I-th element is
493e5dd7070Spatrick   // a type for the I-th path.
494e5dd7070Spatrick   std::vector<types::ID> Types;
495e5dd7070Spatrick   std::vector<SubstringAndIndex> Stems;      // Basename, without extension.
496e5dd7070Spatrick   std::vector<SubstringAndIndex> Components; // Last path components.
497e5dd7070Spatrick };
498e5dd7070Spatrick 
499e5dd7070Spatrick // The actual CompilationDatabase wrapper delegates to its inner database.
500e5dd7070Spatrick // If no match, looks up a proxy file in FileIndex and transfers its
501e5dd7070Spatrick // command to the requested file.
502e5dd7070Spatrick class InterpolatingCompilationDatabase : public CompilationDatabase {
503e5dd7070Spatrick public:
InterpolatingCompilationDatabase(std::unique_ptr<CompilationDatabase> Inner)504e5dd7070Spatrick   InterpolatingCompilationDatabase(std::unique_ptr<CompilationDatabase> Inner)
505e5dd7070Spatrick       : Inner(std::move(Inner)), Index(this->Inner->getAllFiles()) {}
506e5dd7070Spatrick 
507e5dd7070Spatrick   std::vector<CompileCommand>
getCompileCommands(StringRef Filename) const508e5dd7070Spatrick   getCompileCommands(StringRef Filename) const override {
509e5dd7070Spatrick     auto Known = Inner->getCompileCommands(Filename);
510e5dd7070Spatrick     if (Index.empty() || !Known.empty())
511e5dd7070Spatrick       return Known;
512e5dd7070Spatrick     bool TypeCertain;
513e5dd7070Spatrick     auto Lang = guessType(Filename, &TypeCertain);
514e5dd7070Spatrick     if (!TypeCertain)
515e5dd7070Spatrick       Lang = types::TY_INVALID;
516e5dd7070Spatrick     auto ProxyCommands =
517e5dd7070Spatrick         Inner->getCompileCommands(Index.chooseProxy(Filename, foldType(Lang)));
518e5dd7070Spatrick     if (ProxyCommands.empty())
519e5dd7070Spatrick       return {};
520a9ac8606Spatrick     return {transferCompileCommand(std::move(ProxyCommands.front()), Filename)};
521e5dd7070Spatrick   }
522e5dd7070Spatrick 
getAllFiles() const523e5dd7070Spatrick   std::vector<std::string> getAllFiles() const override {
524e5dd7070Spatrick     return Inner->getAllFiles();
525e5dd7070Spatrick   }
526e5dd7070Spatrick 
getAllCompileCommands() const527e5dd7070Spatrick   std::vector<CompileCommand> getAllCompileCommands() const override {
528e5dd7070Spatrick     return Inner->getAllCompileCommands();
529e5dd7070Spatrick   }
530e5dd7070Spatrick 
531e5dd7070Spatrick private:
532e5dd7070Spatrick   std::unique_ptr<CompilationDatabase> Inner;
533e5dd7070Spatrick   FileIndex Index;
534e5dd7070Spatrick };
535e5dd7070Spatrick 
536e5dd7070Spatrick } // namespace
537e5dd7070Spatrick 
538e5dd7070Spatrick std::unique_ptr<CompilationDatabase>
inferMissingCompileCommands(std::unique_ptr<CompilationDatabase> Inner)539e5dd7070Spatrick inferMissingCompileCommands(std::unique_ptr<CompilationDatabase> Inner) {
540e5dd7070Spatrick   return std::make_unique<InterpolatingCompilationDatabase>(std::move(Inner));
541e5dd7070Spatrick }
542e5dd7070Spatrick 
transferCompileCommand(CompileCommand Cmd,StringRef Filename)543a9ac8606Spatrick tooling::CompileCommand transferCompileCommand(CompileCommand Cmd,
544a9ac8606Spatrick                                                StringRef Filename) {
545a9ac8606Spatrick   return TransferableCommand(std::move(Cmd)).transferTo(Filename);
546a9ac8606Spatrick }
547a9ac8606Spatrick 
548e5dd7070Spatrick } // namespace tooling
549e5dd7070Spatrick } // namespace clang
550