xref: /llvm-project/clang/lib/Tooling/InterpolatingCompilationDatabase.cpp (revision 9478f661c26fbc22491218477917df5d8d73c51c)
1ea9773acSSam McCall //===- InterpolatingCompilationDatabase.cpp ---------------------*- C++ -*-===//
2ea9773acSSam McCall //
32946cd70SChandler Carruth // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
42946cd70SChandler Carruth // See https://llvm.org/LICENSE.txt for license information.
52946cd70SChandler Carruth // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6ea9773acSSam McCall //
7ea9773acSSam McCall //===----------------------------------------------------------------------===//
8ea9773acSSam McCall //
9ea9773acSSam McCall // InterpolatingCompilationDatabase wraps another CompilationDatabase and
10ea9773acSSam McCall // attempts to heuristically determine appropriate compile commands for files
11ea9773acSSam McCall // that are not included, such as headers or newly created files.
12ea9773acSSam McCall //
13ea9773acSSam McCall // Motivating cases include:
14ea9773acSSam McCall //   Header files that live next to their implementation files. These typically
15ea9773acSSam McCall // share a base filename. (libclang/CXString.h, libclang/CXString.cpp).
16ea9773acSSam McCall //   Some projects separate headers from includes. Filenames still typically
17ea9773acSSam McCall // match, maybe other path segments too. (include/llvm/IR/Use.h, lib/IR/Use.cc).
18ea9773acSSam McCall //   Matches are sometimes only approximate (Sema.h, SemaDecl.cpp). This goes
19ea9773acSSam McCall // for directories too (Support/Unix/Process.inc, lib/Support/Process.cpp).
20ea9773acSSam McCall //   Even if we can't find a "right" compile command, even a random one from
21ea9773acSSam McCall // the project will tend to get important flags like -I and -x right.
22ea9773acSSam McCall //
23ea9773acSSam McCall // We "borrow" the compile command for the closest available file:
24ea9773acSSam McCall //   - points are awarded if the filename matches (ignoring extension)
25ea9773acSSam McCall //   - points are awarded if the directory structure matches
26ea9773acSSam McCall //   - ties are broken by length of path prefix match
27ea9773acSSam McCall //
28ea9773acSSam McCall // The compile command is adjusted, replacing the filename and removing output
29ea9773acSSam McCall // file arguments. The -x and -std flags may be affected too.
30ea9773acSSam McCall //
31ea9773acSSam McCall // Source language is a tricky issue: is it OK to use a .c file's command
32ea9773acSSam McCall // for building a .cc file? What language is a .h file in?
33ea9773acSSam McCall //   - We only consider compile commands for c-family languages as candidates.
34ea9773acSSam McCall //   - For files whose language is implied by the filename (e.g. .m, .hpp)
35ea9773acSSam McCall //     we prefer candidates from the same language.
36ea9773acSSam McCall //     If we must cross languages, we drop any -x and -std flags.
37ea9773acSSam McCall //   - For .h files, candidates from any c-family language are acceptable.
38ea9773acSSam McCall //     We use the candidate's language, inserting  e.g. -x c++-header.
39ea9773acSSam McCall //
40ea9773acSSam McCall // This class is only useful when wrapping databases that can enumerate all
41ea9773acSSam McCall // their compile commands. If getAllFilenames() is empty, no inference occurs.
42ea9773acSSam McCall //
43ea9773acSSam McCall //===----------------------------------------------------------------------===//
44ea9773acSSam McCall 
4509d890d7SRainer Orth #include "clang/Basic/LangStandard.h"
46ce90b60bSKadir Cetinkaya #include "clang/Driver/Driver.h"
47ea9773acSSam McCall #include "clang/Driver/Options.h"
48ea9773acSSam McCall #include "clang/Driver/Types.h"
49ea9773acSSam McCall #include "clang/Tooling/CompilationDatabase.h"
50ce90b60bSKadir Cetinkaya #include "llvm/ADT/ArrayRef.h"
51ea9773acSSam McCall #include "llvm/ADT/DenseMap.h"
52ea9773acSSam McCall #include "llvm/ADT/StringExtras.h"
53ea9773acSSam McCall #include "llvm/Option/ArgList.h"
54ea9773acSSam McCall #include "llvm/Option/OptTable.h"
55ea9773acSSam McCall #include "llvm/Support/Debug.h"
56ea9773acSSam McCall #include "llvm/Support/Path.h"
57ea9773acSSam McCall #include "llvm/Support/StringSaver.h"
58ea9773acSSam McCall #include "llvm/Support/raw_ostream.h"
59ea9773acSSam McCall #include <memory>
60a1580d7bSKazu Hirata #include <optional>
61ea9773acSSam McCall 
62ea9773acSSam McCall namespace clang {
63ea9773acSSam McCall namespace tooling {
64ea9773acSSam McCall namespace {
65ea9773acSSam McCall using namespace llvm;
66ea9773acSSam McCall namespace types = clang::driver::types;
67ea9773acSSam McCall namespace path = llvm::sys::path;
68ea9773acSSam McCall 
69ea9773acSSam McCall // The length of the prefix these two strings have in common.
matchingPrefix(StringRef L,StringRef R)70ea9773acSSam McCall size_t matchingPrefix(StringRef L, StringRef R) {
71ea9773acSSam McCall   size_t Limit = std::min(L.size(), R.size());
72ea9773acSSam McCall   for (size_t I = 0; I < Limit; ++I)
73ea9773acSSam McCall     if (L[I] != R[I])
74ea9773acSSam McCall       return I;
75ea9773acSSam McCall   return Limit;
76ea9773acSSam McCall }
77ea9773acSSam McCall 
78ea9773acSSam McCall // A comparator for searching SubstringWithIndexes with std::equal_range etc.
79ea9773acSSam McCall // Optionaly prefix semantics: compares equal if the key is a prefix.
80ea9773acSSam McCall template <bool Prefix> struct Less {
operator ()clang::tooling::__anonba67d0cc0111::Less81ea9773acSSam McCall   bool operator()(StringRef Key, std::pair<StringRef, size_t> Value) const {
82ea9773acSSam McCall     StringRef V = Prefix ? Value.first.substr(0, Key.size()) : Value.first;
83ea9773acSSam McCall     return Key < V;
84ea9773acSSam McCall   }
operator ()clang::tooling::__anonba67d0cc0111::Less85ea9773acSSam McCall   bool operator()(std::pair<StringRef, size_t> Value, StringRef Key) const {
86ea9773acSSam McCall     StringRef V = Prefix ? Value.first.substr(0, Key.size()) : Value.first;
87ea9773acSSam McCall     return V < Key;
88ea9773acSSam McCall   }
89ea9773acSSam McCall };
90ea9773acSSam McCall 
91ea9773acSSam McCall // Infer type from filename. If we might have gotten it wrong, set *Certain.
92ea9773acSSam McCall // *.h will be inferred as a C header, but not certain.
guessType(StringRef Filename,bool * Certain=nullptr)93ea9773acSSam McCall types::ID guessType(StringRef Filename, bool *Certain = nullptr) {
94ea9773acSSam McCall   // path::extension is ".cpp", lookupTypeForExtension wants "cpp".
95ea9773acSSam McCall   auto Lang =
96ea9773acSSam McCall       types::lookupTypeForExtension(path::extension(Filename).substr(1));
97ea9773acSSam McCall   if (Certain)
98ea9773acSSam McCall     *Certain = Lang != types::TY_CHeader && Lang != types::TY_INVALID;
99ea9773acSSam McCall   return Lang;
100ea9773acSSam McCall }
101ea9773acSSam McCall 
102ea9773acSSam McCall // Return Lang as one of the canonical supported types.
103ea9773acSSam McCall // e.g. c-header --> c; fortran --> TY_INVALID
foldType(types::ID Lang)104ea9773acSSam McCall static types::ID foldType(types::ID Lang) {
105ea9773acSSam McCall   switch (Lang) {
106ea9773acSSam McCall   case types::TY_C:
107ea9773acSSam McCall   case types::TY_CHeader:
108ea9773acSSam McCall     return types::TY_C;
109ea9773acSSam McCall   case types::TY_ObjC:
110ea9773acSSam McCall   case types::TY_ObjCHeader:
111ea9773acSSam McCall     return types::TY_ObjC;
112ea9773acSSam McCall   case types::TY_CXX:
113ea9773acSSam McCall   case types::TY_CXXHeader:
114ea9773acSSam McCall     return types::TY_CXX;
115ea9773acSSam McCall   case types::TY_ObjCXX:
116ea9773acSSam McCall   case types::TY_ObjCXXHeader:
117ea9773acSSam McCall     return types::TY_ObjCXX;
1186ed88afdSADRA   case types::TY_CUDA:
1196ed88afdSADRA   case types::TY_CUDA_DEVICE:
1202a1418f2SChristopher Tetreault     return types::TY_CUDA;
121ea9773acSSam McCall   default:
122ea9773acSSam McCall     return types::TY_INVALID;
123ea9773acSSam McCall   }
124ea9773acSSam McCall }
125ea9773acSSam McCall 
126ea9773acSSam McCall // A CompileCommand that can be applied to another file.
127ea9773acSSam McCall struct TransferableCommand {
128ea9773acSSam McCall   // Flags that should not apply to all files are stripped from CommandLine.
129ea9773acSSam McCall   CompileCommand Cmd;
1305167e2d1SIlya Biryukov   // Language detected from -x or the filename. Never TY_INVALID.
1316ad0788cSKazu Hirata   std::optional<types::ID> Type;
132ea9773acSSam McCall   // Standard specified by -std.
133ea9773acSSam McCall   LangStandard::Kind Std = LangStandard::lang_unspecified;
134e05bf606SHamza Sood   // Whether the command line is for the cl-compatible driver.
135e05bf606SHamza Sood   bool ClangCLMode;
136ea9773acSSam McCall 
TransferableCommandclang::tooling::__anonba67d0cc0111::TransferableCommand137ea9773acSSam McCall   TransferableCommand(CompileCommand C)
138ce90b60bSKadir Cetinkaya       : Cmd(std::move(C)), Type(guessType(Cmd.Filename)) {
139e05bf606SHamza Sood     std::vector<std::string> OldArgs = std::move(Cmd.CommandLine);
140e05bf606SHamza Sood     Cmd.CommandLine.clear();
141e05bf606SHamza Sood 
142e05bf606SHamza Sood     // Wrap the old arguments in an InputArgList.
143e05bf606SHamza Sood     llvm::opt::InputArgList ArgList;
144e05bf606SHamza Sood     {
145e05bf606SHamza Sood       SmallVector<const char *, 16> TmpArgv;
146e05bf606SHamza Sood       for (const std::string &S : OldArgs)
147e05bf606SHamza Sood         TmpArgv.push_back(S.c_str());
148ce90b60bSKadir Cetinkaya       ClangCLMode = !TmpArgv.empty() &&
149ce90b60bSKadir Cetinkaya                     driver::IsClangCL(driver::getDriverMode(
150a3c248dbSserge-sans-paille                         TmpArgv.front(), llvm::ArrayRef(TmpArgv).slice(1)));
151e05bf606SHamza Sood       ArgList = {TmpArgv.begin(), TmpArgv.end()};
152e05bf606SHamza Sood     }
153e05bf606SHamza Sood 
154ea9773acSSam McCall     // Parse the old args in order to strip out and record unwanted flags.
155e05bf606SHamza Sood     // We parse each argument individually so that we can retain the exact
156e05bf606SHamza Sood     // spelling of each argument; re-rendering is lossy for aliased flags.
157e05bf606SHamza Sood     // E.g. in CL mode, /W4 maps to -Wall.
15843392759SIlya Biryukov     auto &OptTable = clang::driver::getDriverOptTable();
15960ca31a7SSerge Guelton     if (!OldArgs.empty())
160e05bf606SHamza Sood       Cmd.CommandLine.emplace_back(OldArgs.front());
161e05bf606SHamza Sood     for (unsigned Pos = 1; Pos < OldArgs.size();) {
162e05bf606SHamza Sood       using namespace driver::options;
163e05bf606SHamza Sood 
164e05bf606SHamza Sood       const unsigned OldPos = Pos;
16543392759SIlya Biryukov       std::unique_ptr<llvm::opt::Arg> Arg(OptTable.ParseOneArg(
166e05bf606SHamza Sood           ArgList, Pos,
167*9478f661SJustin Bogner           llvm::opt::Visibility(ClangCLMode ? CLOption : ClangOption)));
168e05bf606SHamza Sood 
169e05bf606SHamza Sood       if (!Arg)
170e05bf606SHamza Sood         continue;
171e05bf606SHamza Sood 
172e05bf606SHamza Sood       const llvm::opt::Option &Opt = Arg->getOption();
173e05bf606SHamza Sood 
174ea9773acSSam McCall       // Strip input and output files.
175e05bf606SHamza Sood       if (Opt.matches(OPT_INPUT) || Opt.matches(OPT_o) ||
176e05bf606SHamza Sood           (ClangCLMode && (Opt.matches(OPT__SLASH_Fa) ||
177e05bf606SHamza Sood                            Opt.matches(OPT__SLASH_Fe) ||
178e05bf606SHamza Sood                            Opt.matches(OPT__SLASH_Fi) ||
179e05bf606SHamza Sood                            Opt.matches(OPT__SLASH_Fo))))
180ea9773acSSam McCall         continue;
181e05bf606SHamza Sood 
182e030ce3eSJanusz Nykiel       // ...including when the inputs are passed after --.
183e030ce3eSJanusz Nykiel       if (Opt.matches(OPT__DASH_DASH))
184e030ce3eSJanusz Nykiel         break;
185e030ce3eSJanusz Nykiel 
186ea9773acSSam McCall       // Strip -x, but record the overridden language.
187e05bf606SHamza Sood       if (const auto GivenType = tryParseTypeArg(*Arg)) {
188e05bf606SHamza Sood         Type = *GivenType;
189ea9773acSSam McCall         continue;
190ea9773acSSam McCall       }
191e05bf606SHamza Sood 
192e05bf606SHamza Sood       // Strip -std, but record the value.
193e05bf606SHamza Sood       if (const auto GivenStd = tryParseStdArg(*Arg)) {
194e05bf606SHamza Sood         if (*GivenStd != LangStandard::lang_unspecified)
195e05bf606SHamza Sood           Std = *GivenStd;
196ea9773acSSam McCall         continue;
197ea9773acSSam McCall       }
198e05bf606SHamza Sood 
199e05bf606SHamza Sood       Cmd.CommandLine.insert(Cmd.CommandLine.end(),
200fd1dc75bSHamza Sood                              OldArgs.data() + OldPos, OldArgs.data() + Pos);
201ea9773acSSam McCall     }
202ea9773acSSam McCall 
203c2377eaeSKadir Cetinkaya     // Make use of -std iff -x was missing.
204c2377eaeSKadir Cetinkaya     if (Type == types::TY_INVALID && Std != LangStandard::lang_unspecified)
205ea9773acSSam McCall       Type = toType(LangStandard::getLangStandardForKind(Std).getLanguage());
2065167e2d1SIlya Biryukov     Type = foldType(*Type);
2075167e2d1SIlya Biryukov     // The contract is to store None instead of TY_INVALID.
2085167e2d1SIlya Biryukov     if (Type == types::TY_INVALID)
2095891420eSKazu Hirata       Type = std::nullopt;
210ea9773acSSam McCall   }
211ea9773acSSam McCall 
212ea9773acSSam McCall   // Produce a CompileCommand for \p filename, based on this one.
213588db1ccSSam McCall   // (This consumes the TransferableCommand just to avoid copying Cmd).
transferToclang::tooling::__anonba67d0cc0111::TransferableCommand214588db1ccSSam McCall   CompileCommand transferTo(StringRef Filename) && {
215588db1ccSSam McCall     CompileCommand Result = std::move(Cmd);
216588db1ccSSam McCall     Result.Heuristic = "inferred from " + Result.Filename;
217adcd0268SBenjamin Kramer     Result.Filename = std::string(Filename);
218ea9773acSSam McCall     bool TypeCertain;
219ea9773acSSam McCall     auto TargetType = guessType(Filename, &TypeCertain);
220ea9773acSSam McCall     // If the filename doesn't determine the language (.h), transfer with -x.
22187ad30beSSam McCall     if ((!TargetType || !TypeCertain) && Type) {
22287ad30beSSam McCall       // Use *Type, or its header variant if the file is a header.
22387ad30beSSam McCall       // Treat no/invalid extension as header (e.g. C++ standard library).
22487ad30beSSam McCall       TargetType =
22587ad30beSSam McCall           (!TargetType || types::onlyPrecompileType(TargetType)) // header?
2265167e2d1SIlya Biryukov               ? types::lookupHeaderTypeForSourceType(*Type)
2275167e2d1SIlya Biryukov               : *Type;
228e05bf606SHamza Sood       if (ClangCLMode) {
229e05bf606SHamza Sood         const StringRef Flag = toCLFlag(TargetType);
230e05bf606SHamza Sood         if (!Flag.empty())
231adcd0268SBenjamin Kramer           Result.CommandLine.push_back(std::string(Flag));
232e05bf606SHamza Sood       } else {
233ea9773acSSam McCall         Result.CommandLine.push_back("-x");
234ea9773acSSam McCall         Result.CommandLine.push_back(types::getTypeName(TargetType));
235ea9773acSSam McCall       }
236e05bf606SHamza Sood     }
237ea9773acSSam McCall     // --std flag may only be transferred if the language is the same.
238ea9773acSSam McCall     // We may consider "translating" these, e.g. c++11 -> c11.
239ea9773acSSam McCall     if (Std != LangStandard::lang_unspecified && foldType(TargetType) == Type) {
240e05bf606SHamza Sood       Result.CommandLine.emplace_back((
241e05bf606SHamza Sood           llvm::Twine(ClangCLMode ? "/std:" : "-std=") +
242e05bf606SHamza Sood           LangStandard::getLangStandardForKind(Std).getName()).str());
243ea9773acSSam McCall     }
244e030ce3eSJanusz Nykiel     Result.CommandLine.push_back("--");
245adcd0268SBenjamin Kramer     Result.CommandLine.push_back(std::string(Filename));
246ea9773acSSam McCall     return Result;
247ea9773acSSam McCall   }
248ea9773acSSam McCall 
249ea9773acSSam McCall private:
250ea9773acSSam McCall   // Map the language from the --std flag to that of the -x flag.
toTypeclang::tooling::__anonba67d0cc0111::TransferableCommand25109d890d7SRainer Orth   static types::ID toType(Language Lang) {
252ea9773acSSam McCall     switch (Lang) {
25309d890d7SRainer Orth     case Language::C:
254ea9773acSSam McCall       return types::TY_C;
25509d890d7SRainer Orth     case Language::CXX:
256ea9773acSSam McCall       return types::TY_CXX;
25709d890d7SRainer Orth     case Language::ObjC:
258ea9773acSSam McCall       return types::TY_ObjC;
25909d890d7SRainer Orth     case Language::ObjCXX:
260ea9773acSSam McCall       return types::TY_ObjCXX;
261ea9773acSSam McCall     default:
262ea9773acSSam McCall       return types::TY_INVALID;
263ea9773acSSam McCall     }
264ea9773acSSam McCall   }
265e05bf606SHamza Sood 
266e05bf606SHamza Sood   // Convert a file type to the matching CL-style type flag.
toCLFlagclang::tooling::__anonba67d0cc0111::TransferableCommand267e05bf606SHamza Sood   static StringRef toCLFlag(types::ID Type) {
268e05bf606SHamza Sood     switch (Type) {
269e05bf606SHamza Sood     case types::TY_C:
270e05bf606SHamza Sood     case types::TY_CHeader:
271e05bf606SHamza Sood       return "/TC";
272e05bf606SHamza Sood     case types::TY_CXX:
273e05bf606SHamza Sood     case types::TY_CXXHeader:
274e05bf606SHamza Sood       return "/TP";
275e05bf606SHamza Sood     default:
276e05bf606SHamza Sood       return StringRef();
277e05bf606SHamza Sood     }
278e05bf606SHamza Sood   }
279e05bf606SHamza Sood 
280e05bf606SHamza Sood   // Try to interpret the argument as a type specifier, e.g. '-x'.
tryParseTypeArgclang::tooling::__anonba67d0cc0111::TransferableCommand2816ad0788cSKazu Hirata   std::optional<types::ID> tryParseTypeArg(const llvm::opt::Arg &Arg) {
282e05bf606SHamza Sood     const llvm::opt::Option &Opt = Arg.getOption();
283e05bf606SHamza Sood     using namespace driver::options;
284e05bf606SHamza Sood     if (ClangCLMode) {
285e05bf606SHamza Sood       if (Opt.matches(OPT__SLASH_TC) || Opt.matches(OPT__SLASH_Tc))
286e05bf606SHamza Sood         return types::TY_C;
287e05bf606SHamza Sood       if (Opt.matches(OPT__SLASH_TP) || Opt.matches(OPT__SLASH_Tp))
288e05bf606SHamza Sood         return types::TY_CXX;
289e05bf606SHamza Sood     } else {
290e05bf606SHamza Sood       if (Opt.matches(driver::options::OPT_x))
291e05bf606SHamza Sood         return types::lookupTypeForTypeSpecifier(Arg.getValue());
292e05bf606SHamza Sood     }
2935891420eSKazu Hirata     return std::nullopt;
294e05bf606SHamza Sood   }
295e05bf606SHamza Sood 
296e05bf606SHamza Sood   // Try to interpret the argument as '-std='.
tryParseStdArgclang::tooling::__anonba67d0cc0111::TransferableCommand2976ad0788cSKazu Hirata   std::optional<LangStandard::Kind> tryParseStdArg(const llvm::opt::Arg &Arg) {
298e05bf606SHamza Sood     using namespace driver::options;
29909d890d7SRainer Orth     if (Arg.getOption().matches(ClangCLMode ? OPT__SLASH_std : OPT_std_EQ))
30009d890d7SRainer Orth       return LangStandard::getLangKind(Arg.getValue());
3015891420eSKazu Hirata     return std::nullopt;
302e05bf606SHamza Sood   }
303ea9773acSSam McCall };
304ea9773acSSam McCall 
3055167e2d1SIlya Biryukov // Given a filename, FileIndex picks the best matching file from the underlying
3065167e2d1SIlya Biryukov // DB. This is the proxy file whose CompileCommand will be reused. The
3075167e2d1SIlya Biryukov // heuristics incorporate file name, extension, and directory structure.
3085167e2d1SIlya Biryukov // Strategy:
309ea9773acSSam McCall // - Build indexes of each of the substrings we want to look up by.
310ea9773acSSam McCall //   These indexes are just sorted lists of the substrings.
311ea9773acSSam McCall // - Each criterion corresponds to a range lookup into the index, so we only
312ea9773acSSam McCall //   need O(log N) string comparisons to determine scores.
3135167e2d1SIlya Biryukov //
3145167e2d1SIlya Biryukov // Apart from path proximity signals, also takes file extensions into account
3155167e2d1SIlya Biryukov // when scoring the candidates.
3165167e2d1SIlya Biryukov class FileIndex {
317ea9773acSSam McCall public:
FileIndex(std::vector<std::string> Files)3185167e2d1SIlya Biryukov   FileIndex(std::vector<std::string> Files)
3195167e2d1SIlya Biryukov       : OriginalPaths(std::move(Files)), Strings(Arena) {
320ea9773acSSam McCall     // Sort commands by filename for determinism (index is a tiebreaker later).
32155fab260SFangrui Song     llvm::sort(OriginalPaths);
3225167e2d1SIlya Biryukov     Paths.reserve(OriginalPaths.size());
3235167e2d1SIlya Biryukov     Types.reserve(OriginalPaths.size());
3245167e2d1SIlya Biryukov     Stems.reserve(OriginalPaths.size());
3255167e2d1SIlya Biryukov     for (size_t I = 0; I < OriginalPaths.size(); ++I) {
3265167e2d1SIlya Biryukov       StringRef Path = Strings.save(StringRef(OriginalPaths[I]).lower());
3275167e2d1SIlya Biryukov 
3285167e2d1SIlya Biryukov       Paths.emplace_back(Path, I);
32987468e85SIshaan Gandhi       Types.push_back(foldType(guessType(OriginalPaths[I])));
330ea9773acSSam McCall       Stems.emplace_back(sys::path::stem(Path), I);
331ea9773acSSam McCall       auto Dir = ++sys::path::rbegin(Path), DirEnd = sys::path::rend(Path);
332ea9773acSSam McCall       for (int J = 0; J < DirectorySegmentsIndexed && Dir != DirEnd; ++J, ++Dir)
333ea9773acSSam McCall         if (Dir->size() > ShortDirectorySegment) // not trivial ones
334ea9773acSSam McCall           Components.emplace_back(*Dir, I);
335ea9773acSSam McCall     }
33655fab260SFangrui Song     llvm::sort(Paths);
33755fab260SFangrui Song     llvm::sort(Stems);
33855fab260SFangrui Song     llvm::sort(Components);
339ea9773acSSam McCall   }
340ea9773acSSam McCall 
empty() const3415167e2d1SIlya Biryukov   bool empty() const { return Paths.empty(); }
342ea9773acSSam McCall 
3435167e2d1SIlya Biryukov   // Returns the path for the file that best fits OriginalFilename.
3445167e2d1SIlya Biryukov   // Candidates with extensions matching PreferLanguage will be chosen over
3455167e2d1SIlya Biryukov   // others (unless it's TY_INVALID, or all candidates are bad).
chooseProxy(StringRef OriginalFilename,types::ID PreferLanguage) const3465167e2d1SIlya Biryukov   StringRef chooseProxy(StringRef OriginalFilename,
347ea9773acSSam McCall                         types::ID PreferLanguage) const {
348ea9773acSSam McCall     assert(!empty() && "need at least one candidate!");
349ea9773acSSam McCall     std::string Filename = OriginalFilename.lower();
350ea9773acSSam McCall     auto Candidates = scoreCandidates(Filename);
351ea9773acSSam McCall     std::pair<size_t, int> Best =
352ea9773acSSam McCall         pickWinner(Candidates, Filename, PreferLanguage);
353ea9773acSSam McCall 
3545167e2d1SIlya Biryukov     DEBUG_WITH_TYPE(
3555167e2d1SIlya Biryukov         "interpolate",
3565167e2d1SIlya Biryukov         llvm::dbgs() << "interpolate: chose " << OriginalPaths[Best.first]
3575167e2d1SIlya Biryukov                      << " as proxy for " << OriginalFilename << " preferring "
358ea9773acSSam McCall                      << (PreferLanguage == types::TY_INVALID
359ea9773acSSam McCall                              ? "none"
360ea9773acSSam McCall                              : types::getTypeName(PreferLanguage))
361ea9773acSSam McCall                      << " score=" << Best.second << "\n");
3625167e2d1SIlya Biryukov     return OriginalPaths[Best.first];
363ea9773acSSam McCall   }
364ea9773acSSam McCall 
365ea9773acSSam McCall private:
366ea9773acSSam McCall   using SubstringAndIndex = std::pair<StringRef, size_t>;
367ea9773acSSam McCall   // Directory matching parameters: we look at the last two segments of the
368ea9773acSSam McCall   // parent directory (usually the semantically significant ones in practice).
369ea9773acSSam McCall   // We search only the last four of each candidate (for efficiency).
370ea9773acSSam McCall   constexpr static int DirectorySegmentsIndexed = 4;
371ea9773acSSam McCall   constexpr static int DirectorySegmentsQueried = 2;
372ea9773acSSam McCall   constexpr static int ShortDirectorySegment = 1; // Only look at longer names.
373ea9773acSSam McCall 
374ea9773acSSam McCall   // Award points to candidate entries that should be considered for the file.
375ea9773acSSam McCall   // Returned keys are indexes into paths, and the values are (nonzero) scores.
scoreCandidates(StringRef Filename) const376ea9773acSSam McCall   DenseMap<size_t, int> scoreCandidates(StringRef Filename) const {
377ea9773acSSam McCall     // Decompose Filename into the parts we care about.
378ea9773acSSam McCall     // /some/path/complicated/project/Interesting.h
379ea9773acSSam McCall     // [-prefix--][---dir---] [-dir-] [--stem---]
380ea9773acSSam McCall     StringRef Stem = sys::path::stem(Filename);
381ea9773acSSam McCall     llvm::SmallVector<StringRef, DirectorySegmentsQueried> Dirs;
382ea9773acSSam McCall     llvm::StringRef Prefix;
383ea9773acSSam McCall     auto Dir = ++sys::path::rbegin(Filename),
384ea9773acSSam McCall          DirEnd = sys::path::rend(Filename);
385ea9773acSSam McCall     for (int I = 0; I < DirectorySegmentsQueried && Dir != DirEnd; ++I, ++Dir) {
386ea9773acSSam McCall       if (Dir->size() > ShortDirectorySegment)
387ea9773acSSam McCall         Dirs.push_back(*Dir);
388ea9773acSSam McCall       Prefix = Filename.substr(0, Dir - DirEnd);
389ea9773acSSam McCall     }
390ea9773acSSam McCall 
391ea9773acSSam McCall     // Now award points based on lookups into our various indexes.
392ea9773acSSam McCall     DenseMap<size_t, int> Candidates; // Index -> score.
393ea9773acSSam McCall     auto Award = [&](int Points, ArrayRef<SubstringAndIndex> Range) {
394ea9773acSSam McCall       for (const auto &Entry : Range)
395ea9773acSSam McCall         Candidates[Entry.second] += Points;
396ea9773acSSam McCall     };
397ea9773acSSam McCall     // Award one point if the file's basename is a prefix of the candidate,
398ea9773acSSam McCall     // and another if it's an exact match (so exact matches get two points).
399ea9773acSSam McCall     Award(1, indexLookup</*Prefix=*/true>(Stem, Stems));
400ea9773acSSam McCall     Award(1, indexLookup</*Prefix=*/false>(Stem, Stems));
401ea9773acSSam McCall     // For each of the last few directories in the Filename, award a point
402ea9773acSSam McCall     // if it's present in the candidate.
403ea9773acSSam McCall     for (StringRef Dir : Dirs)
404ea9773acSSam McCall       Award(1, indexLookup</*Prefix=*/false>(Dir, Components));
405ea9773acSSam McCall     // Award one more point if the whole rest of the path matches.
406ea9773acSSam McCall     if (sys::path::root_directory(Prefix) != Prefix)
407ea9773acSSam McCall       Award(1, indexLookup</*Prefix=*/true>(Prefix, Paths));
408ea9773acSSam McCall     return Candidates;
409ea9773acSSam McCall   }
410ea9773acSSam McCall 
411ea9773acSSam McCall   // Pick a single winner from the set of scored candidates.
412ea9773acSSam McCall   // Returns (index, score).
pickWinner(const DenseMap<size_t,int> & Candidates,StringRef Filename,types::ID PreferredLanguage) const413ea9773acSSam McCall   std::pair<size_t, int> pickWinner(const DenseMap<size_t, int> &Candidates,
414ea9773acSSam McCall                                     StringRef Filename,
415ea9773acSSam McCall                                     types::ID PreferredLanguage) const {
416ea9773acSSam McCall     struct ScoredCandidate {
417ea9773acSSam McCall       size_t Index;
418ea9773acSSam McCall       bool Preferred;
419ea9773acSSam McCall       int Points;
420ea9773acSSam McCall       size_t PrefixLength;
421ea9773acSSam McCall     };
422ea9773acSSam McCall     // Choose the best candidate by (preferred, points, prefix length, alpha).
423ea9773acSSam McCall     ScoredCandidate Best = {size_t(-1), false, 0, 0};
424ea9773acSSam McCall     for (const auto &Candidate : Candidates) {
425ea9773acSSam McCall       ScoredCandidate S;
426ea9773acSSam McCall       S.Index = Candidate.first;
427ea9773acSSam McCall       S.Preferred = PreferredLanguage == types::TY_INVALID ||
4285167e2d1SIlya Biryukov                     PreferredLanguage == Types[S.Index];
429ea9773acSSam McCall       S.Points = Candidate.second;
430ea9773acSSam McCall       if (!S.Preferred && Best.Preferred)
431ea9773acSSam McCall         continue;
432ea9773acSSam McCall       if (S.Preferred == Best.Preferred) {
433ea9773acSSam McCall         if (S.Points < Best.Points)
434ea9773acSSam McCall           continue;
435ea9773acSSam McCall         if (S.Points == Best.Points) {
436ea9773acSSam McCall           S.PrefixLength = matchingPrefix(Filename, Paths[S.Index].first);
437ea9773acSSam McCall           if (S.PrefixLength < Best.PrefixLength)
438ea9773acSSam McCall             continue;
439ea9773acSSam McCall           // hidden heuristics should at least be deterministic!
440ea9773acSSam McCall           if (S.PrefixLength == Best.PrefixLength)
441ea9773acSSam McCall             if (S.Index > Best.Index)
442ea9773acSSam McCall               continue;
443ea9773acSSam McCall         }
444ea9773acSSam McCall       }
445ea9773acSSam McCall       // PrefixLength was only set above if actually needed for a tiebreak.
446ea9773acSSam McCall       // But it definitely needs to be set to break ties in the future.
447ea9773acSSam McCall       S.PrefixLength = matchingPrefix(Filename, Paths[S.Index].first);
448ea9773acSSam McCall       Best = S;
449ea9773acSSam McCall     }
450ea9773acSSam McCall     // Edge case: no candidate got any points.
451ea9773acSSam McCall     // We ignore PreferredLanguage at this point (not ideal).
452ea9773acSSam McCall     if (Best.Index == size_t(-1))
453ea9773acSSam McCall       return {longestMatch(Filename, Paths).second, 0};
454ea9773acSSam McCall     return {Best.Index, Best.Points};
455ea9773acSSam McCall   }
456ea9773acSSam McCall 
457ea9773acSSam McCall   // Returns the range within a sorted index that compares equal to Key.
458ea9773acSSam McCall   // If Prefix is true, it's instead the range starting with Key.
459ea9773acSSam McCall   template <bool Prefix>
460ea9773acSSam McCall   ArrayRef<SubstringAndIndex>
indexLookup(StringRef Key,ArrayRef<SubstringAndIndex> Idx) const4615167e2d1SIlya Biryukov   indexLookup(StringRef Key, ArrayRef<SubstringAndIndex> Idx) const {
462ea9773acSSam McCall     // Use pointers as iteratiors to ease conversion of result to ArrayRef.
46344f2f4ecSSam McCall     auto Range = std::equal_range(Idx.data(), Idx.data() + Idx.size(), Key,
46444f2f4ecSSam McCall                                   Less<Prefix>());
465ea9773acSSam McCall     return {Range.first, Range.second};
466ea9773acSSam McCall   }
467ea9773acSSam McCall 
468ea9773acSSam McCall   // Performs a point lookup into a nonempty index, returning a longest match.
longestMatch(StringRef Key,ArrayRef<SubstringAndIndex> Idx) const4695167e2d1SIlya Biryukov   SubstringAndIndex longestMatch(StringRef Key,
4705167e2d1SIlya Biryukov                                  ArrayRef<SubstringAndIndex> Idx) const {
471ea9773acSSam McCall     assert(!Idx.empty());
472ea9773acSSam McCall     // Longest substring match will be adjacent to a direct lookup.
4737264a474SFangrui Song     auto It = llvm::lower_bound(Idx, SubstringAndIndex{Key, 0});
474ea9773acSSam McCall     if (It == Idx.begin())
475ea9773acSSam McCall       return *It;
476ea9773acSSam McCall     if (It == Idx.end())
477ea9773acSSam McCall       return *--It;
478ea9773acSSam McCall     // Have to choose between It and It-1
479ea9773acSSam McCall     size_t Prefix = matchingPrefix(Key, It->first);
480ea9773acSSam McCall     size_t PrevPrefix = matchingPrefix(Key, (It - 1)->first);
481ea9773acSSam McCall     return Prefix > PrevPrefix ? *It : *--It;
482ea9773acSSam McCall   }
483ea9773acSSam McCall 
4845167e2d1SIlya Biryukov   // Original paths, everything else is in lowercase.
4855167e2d1SIlya Biryukov   std::vector<std::string> OriginalPaths;
486ea9773acSSam McCall   BumpPtrAllocator Arena;
487ea9773acSSam McCall   StringSaver Strings;
488ea9773acSSam McCall   // Indexes of candidates by certain substrings.
489ea9773acSSam McCall   // String is lowercase and sorted, index points into OriginalPaths.
490ea9773acSSam McCall   std::vector<SubstringAndIndex> Paths;      // Full path.
4915167e2d1SIlya Biryukov   // Lang types obtained by guessing on the corresponding path. I-th element is
4925167e2d1SIlya Biryukov   // a type for the I-th path.
4935167e2d1SIlya Biryukov   std::vector<types::ID> Types;
494ea9773acSSam McCall   std::vector<SubstringAndIndex> Stems;      // Basename, without extension.
495ea9773acSSam McCall   std::vector<SubstringAndIndex> Components; // Last path components.
496ea9773acSSam McCall };
497ea9773acSSam McCall 
498ea9773acSSam McCall // The actual CompilationDatabase wrapper delegates to its inner database.
4995167e2d1SIlya Biryukov // If no match, looks up a proxy file in FileIndex and transfers its
5005167e2d1SIlya Biryukov // command to the requested file.
501ea9773acSSam McCall class InterpolatingCompilationDatabase : public CompilationDatabase {
502ea9773acSSam McCall public:
InterpolatingCompilationDatabase(std::unique_ptr<CompilationDatabase> Inner)503ea9773acSSam McCall   InterpolatingCompilationDatabase(std::unique_ptr<CompilationDatabase> Inner)
5045167e2d1SIlya Biryukov       : Inner(std::move(Inner)), Index(this->Inner->getAllFiles()) {}
505ea9773acSSam McCall 
506ea9773acSSam McCall   std::vector<CompileCommand>
getCompileCommands(StringRef Filename) const507ea9773acSSam McCall   getCompileCommands(StringRef Filename) const override {
508ea9773acSSam McCall     auto Known = Inner->getCompileCommands(Filename);
509ea9773acSSam McCall     if (Index.empty() || !Known.empty())
510ea9773acSSam McCall       return Known;
511ea9773acSSam McCall     bool TypeCertain;
512ea9773acSSam McCall     auto Lang = guessType(Filename, &TypeCertain);
513ea9773acSSam McCall     if (!TypeCertain)
514ea9773acSSam McCall       Lang = types::TY_INVALID;
5155167e2d1SIlya Biryukov     auto ProxyCommands =
5165167e2d1SIlya Biryukov         Inner->getCompileCommands(Index.chooseProxy(Filename, foldType(Lang)));
5175167e2d1SIlya Biryukov     if (ProxyCommands.empty())
5185167e2d1SIlya Biryukov       return {};
519588db1ccSSam McCall     return {transferCompileCommand(std::move(ProxyCommands.front()), Filename)};
520ea9773acSSam McCall   }
521ea9773acSSam McCall 
getAllFiles() const522ea9773acSSam McCall   std::vector<std::string> getAllFiles() const override {
523ea9773acSSam McCall     return Inner->getAllFiles();
524ea9773acSSam McCall   }
525ea9773acSSam McCall 
getAllCompileCommands() const526ea9773acSSam McCall   std::vector<CompileCommand> getAllCompileCommands() const override {
527ea9773acSSam McCall     return Inner->getAllCompileCommands();
528ea9773acSSam McCall   }
529ea9773acSSam McCall 
530ea9773acSSam McCall private:
531ea9773acSSam McCall   std::unique_ptr<CompilationDatabase> Inner;
5325167e2d1SIlya Biryukov   FileIndex Index;
533ea9773acSSam McCall };
534ea9773acSSam McCall 
535ea9773acSSam McCall } // namespace
536ea9773acSSam McCall 
537ea9773acSSam McCall std::unique_ptr<CompilationDatabase>
inferMissingCompileCommands(std::unique_ptr<CompilationDatabase> Inner)538ea9773acSSam McCall inferMissingCompileCommands(std::unique_ptr<CompilationDatabase> Inner) {
5392b3d49b6SJonas Devlieghere   return std::make_unique<InterpolatingCompilationDatabase>(std::move(Inner));
540ea9773acSSam McCall }
541ea9773acSSam McCall 
transferCompileCommand(CompileCommand Cmd,StringRef Filename)542588db1ccSSam McCall tooling::CompileCommand transferCompileCommand(CompileCommand Cmd,
543588db1ccSSam McCall                                                StringRef Filename) {
544588db1ccSSam McCall   return TransferableCommand(std::move(Cmd)).transferTo(Filename);
545588db1ccSSam McCall }
546588db1ccSSam McCall 
547ea9773acSSam McCall } // namespace tooling
548ea9773acSSam McCall } // namespace clang
549