xref: /llvm-project/clang-tools-extra/clangd/SystemIncludeExtractor.cpp (revision f5838cc17ffb1a0015a0d2687a72bf39b2847f6d)
1428ac8f3SNathan Ridge //===--- SystemIncludeExtractor.cpp ------------------------------*- C++-*-===//
2428ac8f3SNathan Ridge //
3428ac8f3SNathan Ridge // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4428ac8f3SNathan Ridge // See https://llvm.org/LICENSE.txt for license information.
5428ac8f3SNathan Ridge // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6428ac8f3SNathan Ridge //
7428ac8f3SNathan Ridge //===----------------------------------------------------------------------===//
8428ac8f3SNathan Ridge // Some compiler drivers have implicit search mechanism for system headers.
9428ac8f3SNathan Ridge // This compilation database implementation tries to extract that information by
10428ac8f3SNathan Ridge // executing the driver in verbose mode. gcc-compatible drivers print something
11428ac8f3SNathan Ridge // like:
12428ac8f3SNathan Ridge // ....
13428ac8f3SNathan Ridge // ....
14428ac8f3SNathan Ridge // #include <...> search starts here:
15428ac8f3SNathan Ridge //  /usr/lib/gcc/x86_64-linux-gnu/7/include
16428ac8f3SNathan Ridge //  /usr/local/include
17428ac8f3SNathan Ridge //  /usr/lib/gcc/x86_64-linux-gnu/7/include-fixed
18428ac8f3SNathan Ridge //  /usr/include/x86_64-linux-gnu
19428ac8f3SNathan Ridge //  /usr/include
20428ac8f3SNathan Ridge // End of search list.
21428ac8f3SNathan Ridge // ....
22428ac8f3SNathan Ridge // ....
23428ac8f3SNathan Ridge // This component parses that output and adds each path to command line args
24428ac8f3SNathan Ridge // provided by Base, after prepending them with -isystem. Therefore current
25428ac8f3SNathan Ridge // implementation would not work with a driver that is not gcc-compatible.
26428ac8f3SNathan Ridge //
27428ac8f3SNathan Ridge // First argument of the command line received from underlying compilation
28428ac8f3SNathan Ridge // database is used as compiler driver path. Due to this arbitrary binary
29428ac8f3SNathan Ridge // execution, this mechanism is not used by default and only executes binaries
30428ac8f3SNathan Ridge // in the paths that are explicitly included by the user.
31428ac8f3SNathan Ridge 
32428ac8f3SNathan Ridge #include "CompileCommands.h"
33428ac8f3SNathan Ridge #include "GlobalCompilationDatabase.h"
34428ac8f3SNathan Ridge #include "support/Logger.h"
35f099f2feSKadir Cetinkaya #include "support/Threading.h"
36428ac8f3SNathan Ridge #include "support/Trace.h"
37428ac8f3SNathan Ridge #include "clang/Basic/Diagnostic.h"
38f099f2feSKadir Cetinkaya #include "clang/Basic/DiagnosticIDs.h"
39f099f2feSKadir Cetinkaya #include "clang/Basic/DiagnosticOptions.h"
40428ac8f3SNathan Ridge #include "clang/Basic/TargetInfo.h"
41428ac8f3SNathan Ridge #include "clang/Basic/TargetOptions.h"
42428ac8f3SNathan Ridge #include "clang/Driver/Types.h"
43428ac8f3SNathan Ridge #include "clang/Tooling/CompilationDatabase.h"
44f099f2feSKadir Cetinkaya #include "llvm/ADT/ArrayRef.h"
45f099f2feSKadir Cetinkaya #include "llvm/ADT/DenseMap.h"
46f099f2feSKadir Cetinkaya #include "llvm/ADT/Hashing.h"
47f099f2feSKadir Cetinkaya #include "llvm/ADT/IntrusiveRefCntPtr.h"
48f099f2feSKadir Cetinkaya #include "llvm/ADT/STLExtras.h"
49428ac8f3SNathan Ridge #include "llvm/ADT/ScopeExit.h"
50428ac8f3SNathan Ridge #include "llvm/ADT/SmallString.h"
51f099f2feSKadir Cetinkaya #include "llvm/ADT/SmallVector.h"
52428ac8f3SNathan Ridge #include "llvm/ADT/StringExtras.h"
53428ac8f3SNathan Ridge #include "llvm/ADT/StringRef.h"
54f099f2feSKadir Cetinkaya #include "llvm/Support/ErrorHandling.h"
55428ac8f3SNathan Ridge #include "llvm/Support/FileSystem.h"
56428ac8f3SNathan Ridge #include "llvm/Support/MemoryBuffer.h"
57428ac8f3SNathan Ridge #include "llvm/Support/Path.h"
58428ac8f3SNathan Ridge #include "llvm/Support/Program.h"
59428ac8f3SNathan Ridge #include "llvm/Support/Regex.h"
60428ac8f3SNathan Ridge #include "llvm/Support/ScopedPrinter.h"
61f099f2feSKadir Cetinkaya #include "llvm/Support/raw_ostream.h"
62f099f2feSKadir Cetinkaya #include <cassert>
63f099f2feSKadir Cetinkaya #include <cstddef>
64428ac8f3SNathan Ridge #include <iterator>
65f099f2feSKadir Cetinkaya #include <memory>
6671f55735SKazu Hirata #include <optional>
67428ac8f3SNathan Ridge #include <string>
68f099f2feSKadir Cetinkaya #include <tuple>
69f099f2feSKadir Cetinkaya #include <utility>
70428ac8f3SNathan Ridge #include <vector>
71428ac8f3SNathan Ridge 
72f099f2feSKadir Cetinkaya namespace clang::clangd {
73428ac8f3SNathan Ridge namespace {
74428ac8f3SNathan Ridge 
75428ac8f3SNathan Ridge struct DriverInfo {
76428ac8f3SNathan Ridge   std::vector<std::string> SystemIncludes;
77428ac8f3SNathan Ridge   std::string Target;
78428ac8f3SNathan Ridge };
79428ac8f3SNathan Ridge 
80f099f2feSKadir Cetinkaya struct DriverArgs {
81f099f2feSKadir Cetinkaya   // Name of the driver program to execute or absolute path to it.
82f099f2feSKadir Cetinkaya   std::string Driver;
83f099f2feSKadir Cetinkaya   // Whether certain includes should be part of query.
84f099f2feSKadir Cetinkaya   bool StandardIncludes = true;
85f099f2feSKadir Cetinkaya   bool StandardCXXIncludes = true;
86f099f2feSKadir Cetinkaya   // Language to use while querying.
87f099f2feSKadir Cetinkaya   std::string Lang;
88f099f2feSKadir Cetinkaya   std::string Sysroot;
89f099f2feSKadir Cetinkaya   std::string ISysroot;
904af340a6SMatthew Mirvish   std::string Target;
913935a29aSChris Carlon   std::string Stdlib;
92de750085SChris Carlon   llvm::SmallVector<std::string> Specs;
93f099f2feSKadir Cetinkaya 
94f099f2feSKadir Cetinkaya   bool operator==(const DriverArgs &RHS) const {
950478ef2dSSam McCall     return std::tie(Driver, StandardIncludes, StandardCXXIncludes, Lang,
96de750085SChris Carlon                     Sysroot, ISysroot, Target, Stdlib, Specs) ==
97f099f2feSKadir Cetinkaya            std::tie(RHS.Driver, RHS.StandardIncludes, RHS.StandardCXXIncludes,
98de750085SChris Carlon                     RHS.Lang, RHS.Sysroot, RHS.ISysroot, RHS.Target, RHS.Stdlib,
99de750085SChris Carlon                     RHS.Specs);
100f099f2feSKadir Cetinkaya   }
101f099f2feSKadir Cetinkaya 
102f099f2feSKadir Cetinkaya   DriverArgs(const tooling::CompileCommand &Cmd, llvm::StringRef File) {
103f099f2feSKadir Cetinkaya     llvm::SmallString<128> Driver(Cmd.CommandLine.front());
104f099f2feSKadir Cetinkaya     // Driver is a not a single executable name but instead a path (either
105f099f2feSKadir Cetinkaya     // relative or absolute).
106f099f2feSKadir Cetinkaya     if (llvm::any_of(Driver,
107f099f2feSKadir Cetinkaya                      [](char C) { return llvm::sys::path::is_separator(C); })) {
108f099f2feSKadir Cetinkaya       llvm::sys::fs::make_absolute(Cmd.Directory, Driver);
109f099f2feSKadir Cetinkaya     }
110f099f2feSKadir Cetinkaya     this->Driver = Driver.str().str();
111f099f2feSKadir Cetinkaya     for (size_t I = 0, E = Cmd.CommandLine.size(); I < E; ++I) {
112f099f2feSKadir Cetinkaya       llvm::StringRef Arg = Cmd.CommandLine[I];
113f099f2feSKadir Cetinkaya 
114f099f2feSKadir Cetinkaya       // Look for Language related flags.
115f099f2feSKadir Cetinkaya       if (Arg.consume_front("-x")) {
116f099f2feSKadir Cetinkaya         if (Arg.empty() && I + 1 < E)
117f099f2feSKadir Cetinkaya           Lang = Cmd.CommandLine[I + 1];
118f099f2feSKadir Cetinkaya         else
119f099f2feSKadir Cetinkaya           Lang = Arg.str();
120f099f2feSKadir Cetinkaya       }
121f099f2feSKadir Cetinkaya       // Look for standard/builtin includes.
122f099f2feSKadir Cetinkaya       else if (Arg == "-nostdinc" || Arg == "--no-standard-includes")
123f099f2feSKadir Cetinkaya         StandardIncludes = false;
124f099f2feSKadir Cetinkaya       else if (Arg == "-nostdinc++")
125f099f2feSKadir Cetinkaya         StandardCXXIncludes = false;
126f099f2feSKadir Cetinkaya       // Figure out sysroot
127f099f2feSKadir Cetinkaya       else if (Arg.consume_front("--sysroot")) {
128f099f2feSKadir Cetinkaya         if (Arg.consume_front("="))
129f099f2feSKadir Cetinkaya           Sysroot = Arg.str();
130f099f2feSKadir Cetinkaya         else if (Arg.empty() && I + 1 < E)
131f099f2feSKadir Cetinkaya           Sysroot = Cmd.CommandLine[I + 1];
132f099f2feSKadir Cetinkaya       } else if (Arg.consume_front("-isysroot")) {
133f099f2feSKadir Cetinkaya         if (Arg.empty() && I + 1 < E)
134f099f2feSKadir Cetinkaya           ISysroot = Cmd.CommandLine[I + 1];
135f099f2feSKadir Cetinkaya         else
136f099f2feSKadir Cetinkaya           ISysroot = Arg.str();
1374af340a6SMatthew Mirvish       } else if (Arg.consume_front("--target=")) {
1384af340a6SMatthew Mirvish         Target = Arg.str();
1394af340a6SMatthew Mirvish       } else if (Arg.consume_front("-target")) {
1404af340a6SMatthew Mirvish         if (Arg.empty() && I + 1 < E)
1414af340a6SMatthew Mirvish           Target = Cmd.CommandLine[I + 1];
1423935a29aSChris Carlon       } else if (Arg.consume_front("--stdlib")) {
1433935a29aSChris Carlon         if (Arg.consume_front("="))
1443935a29aSChris Carlon           Stdlib = Arg.str();
1453935a29aSChris Carlon         else if (Arg.empty() && I + 1 < E)
1463935a29aSChris Carlon           Stdlib = Cmd.CommandLine[I + 1];
1473935a29aSChris Carlon       } else if (Arg.consume_front("-stdlib=")) {
1483935a29aSChris Carlon         Stdlib = Arg.str();
149*d5953e3eSKazu Hirata       } else if (Arg.starts_with("-specs=")) {
150de750085SChris Carlon         // clang requires a single token like `-specs=file` or `--specs=file`,
151de750085SChris Carlon         // but gcc will accept two tokens like `--specs file`. Since the
152de750085SChris Carlon         // compilation database is presumably correct, we just forward the flags
153de750085SChris Carlon         // as-is.
154de750085SChris Carlon         Specs.push_back(Arg.str());
155*d5953e3eSKazu Hirata       } else if (Arg.starts_with("--specs=")) {
156de750085SChris Carlon         Specs.push_back(Arg.str());
157de750085SChris Carlon       } else if (Arg == "--specs" && I + 1 < E) {
158de750085SChris Carlon         Specs.push_back(Arg.str());
159de750085SChris Carlon         Specs.push_back(Cmd.CommandLine[I + 1]);
160f099f2feSKadir Cetinkaya       }
161f099f2feSKadir Cetinkaya     }
162f099f2feSKadir Cetinkaya 
163bd74186fSNathan Ridge     // Downgrade objective-c++-header (used in clangd's fallback flags for .h
164bd74186fSNathan Ridge     // files) to c++-header, as some drivers may fail to run the extraction
165bd74186fSNathan Ridge     // command if it contains `-xobjective-c++-header` and objective-c++ support
166bd74186fSNathan Ridge     // is not installed.
167bd74186fSNathan Ridge     // In practice, we don't see different include paths for the two on
168bd74186fSNathan Ridge     // clang+mac, which is the most common objectve-c compiler.
169bd74186fSNathan Ridge     if (Lang == "objective-c++-header") {
170bd74186fSNathan Ridge       Lang = "c++-header";
171bd74186fSNathan Ridge     }
172bd74186fSNathan Ridge 
173f099f2feSKadir Cetinkaya     // If language is not explicit in the flags, infer from the file.
174f099f2feSKadir Cetinkaya     // This is important as we want to cache each language separately.
175f099f2feSKadir Cetinkaya     if (Lang.empty()) {
176f099f2feSKadir Cetinkaya       llvm::StringRef Ext = llvm::sys::path::extension(File).trim('.');
177f099f2feSKadir Cetinkaya       auto Type = driver::types::lookupTypeForExtension(Ext);
178f099f2feSKadir Cetinkaya       if (Type == driver::types::TY_INVALID) {
179f099f2feSKadir Cetinkaya         elog("System include extraction: invalid file type for {0}", Ext);
180f099f2feSKadir Cetinkaya       } else {
181f099f2feSKadir Cetinkaya         Lang = driver::types::getTypeName(Type);
182f099f2feSKadir Cetinkaya       }
183f099f2feSKadir Cetinkaya     }
184f099f2feSKadir Cetinkaya   }
185f099f2feSKadir Cetinkaya   llvm::SmallVector<llvm::StringRef> render() const {
186f099f2feSKadir Cetinkaya     // FIXME: Don't treat lang specially?
187f099f2feSKadir Cetinkaya     assert(!Lang.empty());
188f099f2feSKadir Cetinkaya     llvm::SmallVector<llvm::StringRef> Args = {"-x", Lang};
189f099f2feSKadir Cetinkaya     if (!StandardIncludes)
190f099f2feSKadir Cetinkaya       Args.push_back("-nostdinc");
191f099f2feSKadir Cetinkaya     if (!StandardCXXIncludes)
192f099f2feSKadir Cetinkaya       Args.push_back("-nostdinc++");
193f099f2feSKadir Cetinkaya     if (!Sysroot.empty())
194f099f2feSKadir Cetinkaya       Args.append({"--sysroot", Sysroot});
195f099f2feSKadir Cetinkaya     if (!ISysroot.empty())
196f099f2feSKadir Cetinkaya       Args.append({"-isysroot", ISysroot});
1974af340a6SMatthew Mirvish     if (!Target.empty())
1984af340a6SMatthew Mirvish       Args.append({"-target", Target});
1993935a29aSChris Carlon     if (!Stdlib.empty())
2003935a29aSChris Carlon       Args.append({"--stdlib", Stdlib});
201de750085SChris Carlon 
202de750085SChris Carlon     for (llvm::StringRef Spec : Specs) {
203de750085SChris Carlon       Args.push_back(Spec);
204de750085SChris Carlon     }
205de750085SChris Carlon 
206f099f2feSKadir Cetinkaya     return Args;
207f099f2feSKadir Cetinkaya   }
208f099f2feSKadir Cetinkaya 
209f099f2feSKadir Cetinkaya   static DriverArgs getEmpty() { return {}; }
210f099f2feSKadir Cetinkaya 
211f099f2feSKadir Cetinkaya private:
212f099f2feSKadir Cetinkaya   DriverArgs() = default;
213f099f2feSKadir Cetinkaya };
214f099f2feSKadir Cetinkaya } // namespace
215f099f2feSKadir Cetinkaya } // namespace clang::clangd
216f099f2feSKadir Cetinkaya namespace llvm {
217f099f2feSKadir Cetinkaya using DriverArgs = clang::clangd::DriverArgs;
218f099f2feSKadir Cetinkaya template <> struct DenseMapInfo<DriverArgs> {
219f099f2feSKadir Cetinkaya   static DriverArgs getEmptyKey() {
220f099f2feSKadir Cetinkaya     auto Driver = DriverArgs::getEmpty();
221f099f2feSKadir Cetinkaya     Driver.Driver = "EMPTY_KEY";
222f099f2feSKadir Cetinkaya     return Driver;
223f099f2feSKadir Cetinkaya   }
224f099f2feSKadir Cetinkaya   static DriverArgs getTombstoneKey() {
225f099f2feSKadir Cetinkaya     auto Driver = DriverArgs::getEmpty();
226f099f2feSKadir Cetinkaya     Driver.Driver = "TOMBSTONE_KEY";
227f099f2feSKadir Cetinkaya     return Driver;
228f099f2feSKadir Cetinkaya   }
229f099f2feSKadir Cetinkaya   static unsigned getHashValue(const DriverArgs &Val) {
230de750085SChris Carlon     unsigned FixedFieldsHash = llvm::hash_value(std::tuple{
231f099f2feSKadir Cetinkaya         Val.Driver,
232f099f2feSKadir Cetinkaya         Val.StandardIncludes,
233f099f2feSKadir Cetinkaya         Val.StandardCXXIncludes,
234f099f2feSKadir Cetinkaya         Val.Lang,
235f099f2feSKadir Cetinkaya         Val.Sysroot,
236f099f2feSKadir Cetinkaya         Val.ISysroot,
2373935a29aSChris Carlon         Val.Target,
2383935a29aSChris Carlon         Val.Stdlib,
239f099f2feSKadir Cetinkaya     });
240de750085SChris Carlon 
241de750085SChris Carlon     unsigned SpecsHash =
242de750085SChris Carlon         llvm::hash_combine_range(Val.Specs.begin(), Val.Specs.end());
243de750085SChris Carlon 
244de750085SChris Carlon     return llvm::hash_combine(FixedFieldsHash, SpecsHash);
245f099f2feSKadir Cetinkaya   }
246f099f2feSKadir Cetinkaya   static bool isEqual(const DriverArgs &LHS, const DriverArgs &RHS) {
247f099f2feSKadir Cetinkaya     return LHS == RHS;
248f099f2feSKadir Cetinkaya   }
249f099f2feSKadir Cetinkaya };
250f099f2feSKadir Cetinkaya } // namespace llvm
251f099f2feSKadir Cetinkaya namespace clang::clangd {
252f099f2feSKadir Cetinkaya namespace {
253428ac8f3SNathan Ridge bool isValidTarget(llvm::StringRef Triple) {
254428ac8f3SNathan Ridge   std::shared_ptr<TargetOptions> TargetOpts(new TargetOptions);
255428ac8f3SNathan Ridge   TargetOpts->Triple = Triple.str();
256428ac8f3SNathan Ridge   DiagnosticsEngine Diags(new DiagnosticIDs, new DiagnosticOptions,
257428ac8f3SNathan Ridge                           new IgnoringDiagConsumer);
258f099f2feSKadir Cetinkaya   llvm::IntrusiveRefCntPtr<TargetInfo> Target =
259428ac8f3SNathan Ridge       TargetInfo::CreateTargetInfo(Diags, TargetOpts);
260428ac8f3SNathan Ridge   return bool(Target);
261428ac8f3SNathan Ridge }
262428ac8f3SNathan Ridge 
263f71ffd3bSKazu Hirata std::optional<DriverInfo> parseDriverOutput(llvm::StringRef Output) {
264428ac8f3SNathan Ridge   DriverInfo Info;
265428ac8f3SNathan Ridge   const char SIS[] = "#include <...> search starts here:";
266428ac8f3SNathan Ridge   const char SIE[] = "End of search list.";
267428ac8f3SNathan Ridge   const char TS[] = "Target: ";
268428ac8f3SNathan Ridge   llvm::SmallVector<llvm::StringRef> Lines;
269428ac8f3SNathan Ridge   Output.split(Lines, '\n', /*MaxSplit=*/-1, /*KeepEmpty=*/false);
270428ac8f3SNathan Ridge 
271428ac8f3SNathan Ridge   enum {
272428ac8f3SNathan Ridge     Initial,            // Initial state: searching for target or includes list.
273428ac8f3SNathan Ridge     IncludesExtracting, // Includes extracting.
274428ac8f3SNathan Ridge     Done                // Includes and target extraction done.
275428ac8f3SNathan Ridge   } State = Initial;
276428ac8f3SNathan Ridge   bool SeenIncludes = false;
277428ac8f3SNathan Ridge   bool SeenTarget = false;
278428ac8f3SNathan Ridge   for (auto *It = Lines.begin(); State != Done && It != Lines.end(); ++It) {
279428ac8f3SNathan Ridge     auto Line = *It;
280428ac8f3SNathan Ridge     switch (State) {
281428ac8f3SNathan Ridge     case Initial:
282428ac8f3SNathan Ridge       if (!SeenIncludes && Line.trim() == SIS) {
283428ac8f3SNathan Ridge         SeenIncludes = true;
284428ac8f3SNathan Ridge         State = IncludesExtracting;
285*d5953e3eSKazu Hirata       } else if (!SeenTarget && Line.trim().starts_with(TS)) {
286428ac8f3SNathan Ridge         SeenTarget = true;
287428ac8f3SNathan Ridge         llvm::StringRef TargetLine = Line.trim();
288428ac8f3SNathan Ridge         TargetLine.consume_front(TS);
289428ac8f3SNathan Ridge         // Only detect targets that clang understands
290428ac8f3SNathan Ridge         if (!isValidTarget(TargetLine)) {
291428ac8f3SNathan Ridge           elog("System include extraction: invalid target \"{0}\", ignoring",
292428ac8f3SNathan Ridge                TargetLine);
293428ac8f3SNathan Ridge         } else {
294428ac8f3SNathan Ridge           Info.Target = TargetLine.str();
295428ac8f3SNathan Ridge           vlog("System include extraction: target extracted: \"{0}\"",
296428ac8f3SNathan Ridge                TargetLine);
297428ac8f3SNathan Ridge         }
298428ac8f3SNathan Ridge       }
299428ac8f3SNathan Ridge       break;
300428ac8f3SNathan Ridge     case IncludesExtracting:
301428ac8f3SNathan Ridge       if (Line.trim() == SIE) {
302428ac8f3SNathan Ridge         State = SeenTarget ? Done : Initial;
303428ac8f3SNathan Ridge       } else {
304428ac8f3SNathan Ridge         Info.SystemIncludes.push_back(Line.trim().str());
305428ac8f3SNathan Ridge         vlog("System include extraction: adding {0}", Line);
306428ac8f3SNathan Ridge       }
307428ac8f3SNathan Ridge       break;
308428ac8f3SNathan Ridge     default:
309428ac8f3SNathan Ridge       llvm_unreachable("Impossible state of the driver output parser");
310428ac8f3SNathan Ridge       break;
311428ac8f3SNathan Ridge     }
312428ac8f3SNathan Ridge   }
313428ac8f3SNathan Ridge   if (!SeenIncludes) {
314428ac8f3SNathan Ridge     elog("System include extraction: start marker not found: {0}", Output);
315059a23c0SKazu Hirata     return std::nullopt;
316428ac8f3SNathan Ridge   }
317428ac8f3SNathan Ridge   if (State == IncludesExtracting) {
318428ac8f3SNathan Ridge     elog("System include extraction: end marker missing: {0}", Output);
319059a23c0SKazu Hirata     return std::nullopt;
320428ac8f3SNathan Ridge   }
321428ac8f3SNathan Ridge   return std::move(Info);
322428ac8f3SNathan Ridge }
323428ac8f3SNathan Ridge 
3240478ef2dSSam McCall std::optional<std::string> run(llvm::ArrayRef<llvm::StringRef> Argv,
3250478ef2dSSam McCall                                bool OutputIsStderr) {
3260478ef2dSSam McCall   llvm::SmallString<128> OutputPath;
3270478ef2dSSam McCall   if (auto EC = llvm::sys::fs::createTemporaryFile("system-includes", "clangd",
3280478ef2dSSam McCall                                                    OutputPath)) {
3290478ef2dSSam McCall     elog("System include extraction: failed to create temporary file with "
3300478ef2dSSam McCall          "error {0}",
3310478ef2dSSam McCall          EC.message());
3320478ef2dSSam McCall     return std::nullopt;
3330478ef2dSSam McCall   }
3340478ef2dSSam McCall   auto CleanUp = llvm::make_scope_exit(
3350478ef2dSSam McCall       [&OutputPath]() { llvm::sys::fs::remove(OutputPath); });
3360478ef2dSSam McCall 
3370478ef2dSSam McCall   std::optional<llvm::StringRef> Redirects[] = {{""}, {""}, {""}};
3380478ef2dSSam McCall   Redirects[OutputIsStderr ? 2 : 1] = OutputPath.str();
3390478ef2dSSam McCall 
3400478ef2dSSam McCall   std::string ErrMsg;
3410478ef2dSSam McCall   if (int RC =
3420478ef2dSSam McCall           llvm::sys::ExecuteAndWait(Argv.front(), Argv, /*Env=*/std::nullopt,
3430478ef2dSSam McCall                                     Redirects, /*SecondsToWait=*/0,
3440478ef2dSSam McCall                                     /*MemoryLimit=*/0, &ErrMsg)) {
3450478ef2dSSam McCall     elog("System include extraction: driver execution failed with return code: "
3460478ef2dSSam McCall          "{0} - '{1}'. Args: [{2}]",
3470478ef2dSSam McCall          llvm::to_string(RC), ErrMsg, printArgv(Argv));
3480478ef2dSSam McCall     return std::nullopt;
3490478ef2dSSam McCall   }
3500478ef2dSSam McCall 
3510478ef2dSSam McCall   auto BufOrError = llvm::MemoryBuffer::getFile(OutputPath);
3520478ef2dSSam McCall   if (!BufOrError) {
3530478ef2dSSam McCall     elog("System include extraction: failed to read {0} with error {1}",
3540478ef2dSSam McCall          OutputPath, BufOrError.getError().message());
3550478ef2dSSam McCall     return std::nullopt;
3560478ef2dSSam McCall   }
3570478ef2dSSam McCall   return BufOrError.get().get()->getBuffer().str();
3580478ef2dSSam McCall }
3590478ef2dSSam McCall 
360f71ffd3bSKazu Hirata std::optional<DriverInfo>
361f099f2feSKadir Cetinkaya extractSystemIncludesAndTarget(const DriverArgs &InputArgs,
362428ac8f3SNathan Ridge                                const llvm::Regex &QueryDriverRegex) {
363428ac8f3SNathan Ridge   trace::Span Tracer("Extract system includes and target");
364428ac8f3SNathan Ridge 
365f099f2feSKadir Cetinkaya   std::string Driver = InputArgs.Driver;
366428ac8f3SNathan Ridge   if (!llvm::sys::path::is_absolute(Driver)) {
367428ac8f3SNathan Ridge     auto DriverProgram = llvm::sys::findProgramByName(Driver);
368428ac8f3SNathan Ridge     if (DriverProgram) {
369428ac8f3SNathan Ridge       vlog("System include extraction: driver {0} expanded to {1}", Driver,
370428ac8f3SNathan Ridge            *DriverProgram);
371428ac8f3SNathan Ridge       Driver = *DriverProgram;
372428ac8f3SNathan Ridge     } else {
373428ac8f3SNathan Ridge       elog("System include extraction: driver {0} not found in PATH", Driver);
374059a23c0SKazu Hirata       return std::nullopt;
375428ac8f3SNathan Ridge     }
376428ac8f3SNathan Ridge   }
377428ac8f3SNathan Ridge 
378428ac8f3SNathan Ridge   SPAN_ATTACH(Tracer, "driver", Driver);
379f099f2feSKadir Cetinkaya   SPAN_ATTACH(Tracer, "lang", InputArgs.Lang);
380428ac8f3SNathan Ridge 
38101d3045dSSam McCall   // If driver was "../foo" then having to allowlist "/path/a/../foo" rather
38201d3045dSSam McCall   // than "/path/foo" is absurd.
38301d3045dSSam McCall   // Allow either to match the allowlist, then proceed with "/path/a/../foo".
38401d3045dSSam McCall   // This was our historical behavior, and it *could* resolve to something else.
38501d3045dSSam McCall   llvm::SmallString<256> NoDots(Driver);
38601d3045dSSam McCall   llvm::sys::path::remove_dots(NoDots, /*remove_dot_dot=*/true);
38701d3045dSSam McCall   if (!QueryDriverRegex.match(Driver) && !QueryDriverRegex.match(NoDots)) {
388428ac8f3SNathan Ridge     vlog("System include extraction: not allowed driver {0}", Driver);
389059a23c0SKazu Hirata     return std::nullopt;
390428ac8f3SNathan Ridge   }
391428ac8f3SNathan Ridge 
392f099f2feSKadir Cetinkaya   llvm::SmallVector<llvm::StringRef> Args = {Driver, "-E", "-v"};
393f099f2feSKadir Cetinkaya   Args.append(InputArgs.render());
394f099f2feSKadir Cetinkaya   // Input needs to go after Lang flags.
395f099f2feSKadir Cetinkaya   Args.push_back("-");
3960478ef2dSSam McCall   auto Output = run(Args, /*OutputIsStderr=*/true);
3970478ef2dSSam McCall   if (!Output)
398059a23c0SKazu Hirata     return std::nullopt;
399428ac8f3SNathan Ridge 
4000478ef2dSSam McCall   std::optional<DriverInfo> Info = parseDriverOutput(*Output);
401428ac8f3SNathan Ridge   if (!Info)
402059a23c0SKazu Hirata     return std::nullopt;
4030478ef2dSSam McCall 
4040478ef2dSSam McCall   // The built-in headers are tightly coupled to parser builtins.
4050478ef2dSSam McCall   // (These are clang's "resource dir", GCC's GCC_INCLUDE_DIR.)
4060478ef2dSSam McCall   // We should keep using clangd's versions, so exclude the queried builtins.
4070478ef2dSSam McCall   // They're not specially marked in the -v output, but we can get the path
4080478ef2dSSam McCall   // with `$DRIVER -print-file-name=include`.
4090478ef2dSSam McCall   if (auto BuiltinHeaders =
4100478ef2dSSam McCall           run({Driver, "-print-file-name=include"}, /*OutputIsStderr=*/false)) {
4110478ef2dSSam McCall     auto Path = llvm::StringRef(*BuiltinHeaders).trim();
4120478ef2dSSam McCall     if (!Path.empty() && llvm::sys::path::is_absolute(Path)) {
4130478ef2dSSam McCall       auto Size = Info->SystemIncludes.size();
414f9306f6dSKazu Hirata       llvm::erase(Info->SystemIncludes, Path);
4150478ef2dSSam McCall       vlog("System includes extractor: builtin headers {0} {1}", Path,
4160478ef2dSSam McCall            (Info->SystemIncludes.size() != Size)
4170478ef2dSSam McCall                ? "excluded"
4180478ef2dSSam McCall                : "not found in driver's response");
4190478ef2dSSam McCall     }
4200478ef2dSSam McCall   }
4210478ef2dSSam McCall 
422428ac8f3SNathan Ridge   log("System includes extractor: successfully executed {0}\n\tgot includes: "
423428ac8f3SNathan Ridge       "\"{1}\"\n\tgot target: \"{2}\"",
424428ac8f3SNathan Ridge       Driver, llvm::join(Info->SystemIncludes, ", "), Info->Target);
425428ac8f3SNathan Ridge   return Info;
426428ac8f3SNathan Ridge }
427428ac8f3SNathan Ridge 
428428ac8f3SNathan Ridge tooling::CompileCommand &
429428ac8f3SNathan Ridge addSystemIncludes(tooling::CompileCommand &Cmd,
430428ac8f3SNathan Ridge                   llvm::ArrayRef<std::string> SystemIncludes) {
431428ac8f3SNathan Ridge   std::vector<std::string> ToAppend;
432428ac8f3SNathan Ridge   for (llvm::StringRef Include : SystemIncludes) {
433428ac8f3SNathan Ridge     // FIXME(kadircet): This doesn't work when we have "--driver-mode=cl"
434428ac8f3SNathan Ridge     ToAppend.push_back("-isystem");
435428ac8f3SNathan Ridge     ToAppend.push_back(Include.str());
436428ac8f3SNathan Ridge   }
437428ac8f3SNathan Ridge   if (!ToAppend.empty()) {
438428ac8f3SNathan Ridge     // Just append when `--` isn't present.
439428ac8f3SNathan Ridge     auto InsertAt = llvm::find(Cmd.CommandLine, "--");
440428ac8f3SNathan Ridge     Cmd.CommandLine.insert(InsertAt, std::make_move_iterator(ToAppend.begin()),
441428ac8f3SNathan Ridge                            std::make_move_iterator(ToAppend.end()));
442428ac8f3SNathan Ridge   }
443428ac8f3SNathan Ridge   return Cmd;
444428ac8f3SNathan Ridge }
445428ac8f3SNathan Ridge 
446428ac8f3SNathan Ridge tooling::CompileCommand &setTarget(tooling::CompileCommand &Cmd,
447428ac8f3SNathan Ridge                                    const std::string &Target) {
448428ac8f3SNathan Ridge   if (!Target.empty()) {
449428ac8f3SNathan Ridge     // We do not want to override existing target with extracted one.
450428ac8f3SNathan Ridge     for (llvm::StringRef Arg : Cmd.CommandLine) {
451*d5953e3eSKazu Hirata       if (Arg == "-target" || Arg.starts_with("--target="))
452428ac8f3SNathan Ridge         return Cmd;
453428ac8f3SNathan Ridge     }
454428ac8f3SNathan Ridge     // Just append when `--` isn't present.
455428ac8f3SNathan Ridge     auto InsertAt = llvm::find(Cmd.CommandLine, "--");
456428ac8f3SNathan Ridge     Cmd.CommandLine.insert(InsertAt, "--target=" + Target);
457428ac8f3SNathan Ridge   }
458428ac8f3SNathan Ridge   return Cmd;
459428ac8f3SNathan Ridge }
460428ac8f3SNathan Ridge 
461428ac8f3SNathan Ridge /// Converts a glob containing only ** or * into a regex.
462428ac8f3SNathan Ridge std::string convertGlobToRegex(llvm::StringRef Glob) {
463428ac8f3SNathan Ridge   std::string RegText;
464428ac8f3SNathan Ridge   llvm::raw_string_ostream RegStream(RegText);
465428ac8f3SNathan Ridge   RegStream << '^';
466428ac8f3SNathan Ridge   for (size_t I = 0, E = Glob.size(); I < E; ++I) {
467428ac8f3SNathan Ridge     if (Glob[I] == '*') {
468428ac8f3SNathan Ridge       if (I + 1 < E && Glob[I + 1] == '*') {
469428ac8f3SNathan Ridge         // Double star, accept any sequence.
470428ac8f3SNathan Ridge         RegStream << ".*";
471428ac8f3SNathan Ridge         // Also skip the second star.
472428ac8f3SNathan Ridge         ++I;
473428ac8f3SNathan Ridge       } else {
474428ac8f3SNathan Ridge         // Single star, accept any sequence without a slash.
475428ac8f3SNathan Ridge         RegStream << "[^/]*";
476428ac8f3SNathan Ridge       }
477428ac8f3SNathan Ridge     } else if (llvm::sys::path::is_separator(Glob[I]) &&
478428ac8f3SNathan Ridge                llvm::sys::path::is_separator('/') &&
479428ac8f3SNathan Ridge                llvm::sys::path::is_separator('\\')) {
480428ac8f3SNathan Ridge       RegStream << R"([/\\])"; // Accept either slash on windows.
481428ac8f3SNathan Ridge     } else {
482428ac8f3SNathan Ridge       RegStream << llvm::Regex::escape(Glob.substr(I, 1));
483428ac8f3SNathan Ridge     }
484428ac8f3SNathan Ridge   }
485428ac8f3SNathan Ridge   RegStream << '$';
486428ac8f3SNathan Ridge   return RegText;
487428ac8f3SNathan Ridge }
488428ac8f3SNathan Ridge 
489428ac8f3SNathan Ridge /// Converts a glob containing only ** or * into a regex.
490428ac8f3SNathan Ridge llvm::Regex convertGlobsToRegex(llvm::ArrayRef<std::string> Globs) {
491428ac8f3SNathan Ridge   assert(!Globs.empty() && "Globs cannot be empty!");
492428ac8f3SNathan Ridge   std::vector<std::string> RegTexts;
493428ac8f3SNathan Ridge   RegTexts.reserve(Globs.size());
494428ac8f3SNathan Ridge   for (llvm::StringRef Glob : Globs)
495428ac8f3SNathan Ridge     RegTexts.push_back(convertGlobToRegex(Glob));
496428ac8f3SNathan Ridge 
497428ac8f3SNathan Ridge   // Tempting to pass IgnoreCase, but we don't know the FS sensitivity.
498428ac8f3SNathan Ridge   llvm::Regex Reg(llvm::join(RegTexts, "|"));
499428ac8f3SNathan Ridge   assert(Reg.isValid(RegTexts.front()) &&
500428ac8f3SNathan Ridge          "Created an invalid regex from globs");
501428ac8f3SNathan Ridge   return Reg;
502428ac8f3SNathan Ridge }
503428ac8f3SNathan Ridge 
504428ac8f3SNathan Ridge /// Extracts system includes from a trusted driver by parsing the output of
505428ac8f3SNathan Ridge /// include search path and appends them to the commands coming from underlying
506428ac8f3SNathan Ridge /// compilation database.
507428ac8f3SNathan Ridge class SystemIncludeExtractor {
508428ac8f3SNathan Ridge public:
509428ac8f3SNathan Ridge   SystemIncludeExtractor(llvm::ArrayRef<std::string> QueryDriverGlobs)
510428ac8f3SNathan Ridge       : QueryDriverRegex(convertGlobsToRegex(QueryDriverGlobs)) {}
511428ac8f3SNathan Ridge 
512428ac8f3SNathan Ridge   void operator()(tooling::CompileCommand &Cmd, llvm::StringRef File) const {
513428ac8f3SNathan Ridge     if (Cmd.CommandLine.empty())
514428ac8f3SNathan Ridge       return;
515428ac8f3SNathan Ridge 
516f099f2feSKadir Cetinkaya     DriverArgs Args(Cmd, File);
517f099f2feSKadir Cetinkaya     if (Args.Lang.empty())
518428ac8f3SNathan Ridge       return;
519f099f2feSKadir Cetinkaya     if (auto Info = QueriedDrivers.get(Args, [&] {
520f099f2feSKadir Cetinkaya           return extractSystemIncludesAndTarget(Args, QueryDriverRegex);
521428ac8f3SNathan Ridge         })) {
522428ac8f3SNathan Ridge       setTarget(addSystemIncludes(Cmd, Info->SystemIncludes), Info->Target);
523428ac8f3SNathan Ridge     }
524428ac8f3SNathan Ridge   }
525428ac8f3SNathan Ridge 
526428ac8f3SNathan Ridge private:
527428ac8f3SNathan Ridge   // Caches includes extracted from a driver. Key is driver:lang.
528f099f2feSKadir Cetinkaya   Memoize<llvm::DenseMap<DriverArgs, std::optional<DriverInfo>>> QueriedDrivers;
529428ac8f3SNathan Ridge   llvm::Regex QueryDriverRegex;
530428ac8f3SNathan Ridge };
531428ac8f3SNathan Ridge } // namespace
532428ac8f3SNathan Ridge 
533428ac8f3SNathan Ridge SystemIncludeExtractorFn
534428ac8f3SNathan Ridge getSystemIncludeExtractor(llvm::ArrayRef<std::string> QueryDriverGlobs) {
535428ac8f3SNathan Ridge   if (QueryDriverGlobs.empty())
536428ac8f3SNathan Ridge     return nullptr;
537428ac8f3SNathan Ridge   return SystemIncludeExtractor(QueryDriverGlobs);
538428ac8f3SNathan Ridge }
539428ac8f3SNathan Ridge 
540f099f2feSKadir Cetinkaya } // namespace clang::clangd
541