1428ac8f3SNathan Ridge //===--- SystemIncludeExtractor.cpp ------------------------------*- C++-*-===// 2428ac8f3SNathan Ridge // 3428ac8f3SNathan Ridge // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4428ac8f3SNathan Ridge // See https://llvm.org/LICENSE.txt for license information. 5428ac8f3SNathan Ridge // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6428ac8f3SNathan Ridge // 7428ac8f3SNathan Ridge //===----------------------------------------------------------------------===// 8428ac8f3SNathan Ridge // Some compiler drivers have implicit search mechanism for system headers. 9428ac8f3SNathan Ridge // This compilation database implementation tries to extract that information by 10428ac8f3SNathan Ridge // executing the driver in verbose mode. gcc-compatible drivers print something 11428ac8f3SNathan Ridge // like: 12428ac8f3SNathan Ridge // .... 13428ac8f3SNathan Ridge // .... 14428ac8f3SNathan Ridge // #include <...> search starts here: 15428ac8f3SNathan Ridge // /usr/lib/gcc/x86_64-linux-gnu/7/include 16428ac8f3SNathan Ridge // /usr/local/include 17428ac8f3SNathan Ridge // /usr/lib/gcc/x86_64-linux-gnu/7/include-fixed 18428ac8f3SNathan Ridge // /usr/include/x86_64-linux-gnu 19428ac8f3SNathan Ridge // /usr/include 20428ac8f3SNathan Ridge // End of search list. 21428ac8f3SNathan Ridge // .... 22428ac8f3SNathan Ridge // .... 23428ac8f3SNathan Ridge // This component parses that output and adds each path to command line args 24428ac8f3SNathan Ridge // provided by Base, after prepending them with -isystem. Therefore current 25428ac8f3SNathan Ridge // implementation would not work with a driver that is not gcc-compatible. 26428ac8f3SNathan Ridge // 27428ac8f3SNathan Ridge // First argument of the command line received from underlying compilation 28428ac8f3SNathan Ridge // database is used as compiler driver path. Due to this arbitrary binary 29428ac8f3SNathan Ridge // execution, this mechanism is not used by default and only executes binaries 30428ac8f3SNathan Ridge // in the paths that are explicitly included by the user. 31428ac8f3SNathan Ridge 32428ac8f3SNathan Ridge #include "CompileCommands.h" 33428ac8f3SNathan Ridge #include "GlobalCompilationDatabase.h" 34428ac8f3SNathan Ridge #include "support/Logger.h" 35f099f2feSKadir Cetinkaya #include "support/Threading.h" 36428ac8f3SNathan Ridge #include "support/Trace.h" 37428ac8f3SNathan Ridge #include "clang/Basic/Diagnostic.h" 38f099f2feSKadir Cetinkaya #include "clang/Basic/DiagnosticIDs.h" 39f099f2feSKadir Cetinkaya #include "clang/Basic/DiagnosticOptions.h" 40428ac8f3SNathan Ridge #include "clang/Basic/TargetInfo.h" 41428ac8f3SNathan Ridge #include "clang/Basic/TargetOptions.h" 42428ac8f3SNathan Ridge #include "clang/Driver/Types.h" 43428ac8f3SNathan Ridge #include "clang/Tooling/CompilationDatabase.h" 44f099f2feSKadir Cetinkaya #include "llvm/ADT/ArrayRef.h" 45f099f2feSKadir Cetinkaya #include "llvm/ADT/DenseMap.h" 46f099f2feSKadir Cetinkaya #include "llvm/ADT/Hashing.h" 47f099f2feSKadir Cetinkaya #include "llvm/ADT/IntrusiveRefCntPtr.h" 48f099f2feSKadir Cetinkaya #include "llvm/ADT/STLExtras.h" 49428ac8f3SNathan Ridge #include "llvm/ADT/ScopeExit.h" 50428ac8f3SNathan Ridge #include "llvm/ADT/SmallString.h" 51f099f2feSKadir Cetinkaya #include "llvm/ADT/SmallVector.h" 52428ac8f3SNathan Ridge #include "llvm/ADT/StringExtras.h" 53428ac8f3SNathan Ridge #include "llvm/ADT/StringRef.h" 54f099f2feSKadir Cetinkaya #include "llvm/Support/ErrorHandling.h" 55428ac8f3SNathan Ridge #include "llvm/Support/FileSystem.h" 56428ac8f3SNathan Ridge #include "llvm/Support/MemoryBuffer.h" 57428ac8f3SNathan Ridge #include "llvm/Support/Path.h" 58428ac8f3SNathan Ridge #include "llvm/Support/Program.h" 59428ac8f3SNathan Ridge #include "llvm/Support/Regex.h" 60428ac8f3SNathan Ridge #include "llvm/Support/ScopedPrinter.h" 61f099f2feSKadir Cetinkaya #include "llvm/Support/raw_ostream.h" 62f099f2feSKadir Cetinkaya #include <cassert> 63f099f2feSKadir Cetinkaya #include <cstddef> 64428ac8f3SNathan Ridge #include <iterator> 65f099f2feSKadir Cetinkaya #include <memory> 6671f55735SKazu Hirata #include <optional> 67428ac8f3SNathan Ridge #include <string> 68f099f2feSKadir Cetinkaya #include <tuple> 69f099f2feSKadir Cetinkaya #include <utility> 70428ac8f3SNathan Ridge #include <vector> 71428ac8f3SNathan Ridge 72f099f2feSKadir Cetinkaya namespace clang::clangd { 73428ac8f3SNathan Ridge namespace { 74428ac8f3SNathan Ridge 75428ac8f3SNathan Ridge struct DriverInfo { 76428ac8f3SNathan Ridge std::vector<std::string> SystemIncludes; 77428ac8f3SNathan Ridge std::string Target; 78428ac8f3SNathan Ridge }; 79428ac8f3SNathan Ridge 80f099f2feSKadir Cetinkaya struct DriverArgs { 81f099f2feSKadir Cetinkaya // Name of the driver program to execute or absolute path to it. 82f099f2feSKadir Cetinkaya std::string Driver; 83f099f2feSKadir Cetinkaya // Whether certain includes should be part of query. 84f099f2feSKadir Cetinkaya bool StandardIncludes = true; 85f099f2feSKadir Cetinkaya bool StandardCXXIncludes = true; 86f099f2feSKadir Cetinkaya // Language to use while querying. 87f099f2feSKadir Cetinkaya std::string Lang; 88f099f2feSKadir Cetinkaya std::string Sysroot; 89f099f2feSKadir Cetinkaya std::string ISysroot; 904af340a6SMatthew Mirvish std::string Target; 913935a29aSChris Carlon std::string Stdlib; 92de750085SChris Carlon llvm::SmallVector<std::string> Specs; 93f099f2feSKadir Cetinkaya 94f099f2feSKadir Cetinkaya bool operator==(const DriverArgs &RHS) const { 950478ef2dSSam McCall return std::tie(Driver, StandardIncludes, StandardCXXIncludes, Lang, 96de750085SChris Carlon Sysroot, ISysroot, Target, Stdlib, Specs) == 97f099f2feSKadir Cetinkaya std::tie(RHS.Driver, RHS.StandardIncludes, RHS.StandardCXXIncludes, 98de750085SChris Carlon RHS.Lang, RHS.Sysroot, RHS.ISysroot, RHS.Target, RHS.Stdlib, 99de750085SChris Carlon RHS.Specs); 100f099f2feSKadir Cetinkaya } 101f099f2feSKadir Cetinkaya 102f099f2feSKadir Cetinkaya DriverArgs(const tooling::CompileCommand &Cmd, llvm::StringRef File) { 103f099f2feSKadir Cetinkaya llvm::SmallString<128> Driver(Cmd.CommandLine.front()); 104f099f2feSKadir Cetinkaya // Driver is a not a single executable name but instead a path (either 105f099f2feSKadir Cetinkaya // relative or absolute). 106f099f2feSKadir Cetinkaya if (llvm::any_of(Driver, 107f099f2feSKadir Cetinkaya [](char C) { return llvm::sys::path::is_separator(C); })) { 108f099f2feSKadir Cetinkaya llvm::sys::fs::make_absolute(Cmd.Directory, Driver); 109f099f2feSKadir Cetinkaya } 110f099f2feSKadir Cetinkaya this->Driver = Driver.str().str(); 111f099f2feSKadir Cetinkaya for (size_t I = 0, E = Cmd.CommandLine.size(); I < E; ++I) { 112f099f2feSKadir Cetinkaya llvm::StringRef Arg = Cmd.CommandLine[I]; 113f099f2feSKadir Cetinkaya 114f099f2feSKadir Cetinkaya // Look for Language related flags. 115f099f2feSKadir Cetinkaya if (Arg.consume_front("-x")) { 116f099f2feSKadir Cetinkaya if (Arg.empty() && I + 1 < E) 117f099f2feSKadir Cetinkaya Lang = Cmd.CommandLine[I + 1]; 118f099f2feSKadir Cetinkaya else 119f099f2feSKadir Cetinkaya Lang = Arg.str(); 120f099f2feSKadir Cetinkaya } 121f099f2feSKadir Cetinkaya // Look for standard/builtin includes. 122f099f2feSKadir Cetinkaya else if (Arg == "-nostdinc" || Arg == "--no-standard-includes") 123f099f2feSKadir Cetinkaya StandardIncludes = false; 124f099f2feSKadir Cetinkaya else if (Arg == "-nostdinc++") 125f099f2feSKadir Cetinkaya StandardCXXIncludes = false; 126f099f2feSKadir Cetinkaya // Figure out sysroot 127f099f2feSKadir Cetinkaya else if (Arg.consume_front("--sysroot")) { 128f099f2feSKadir Cetinkaya if (Arg.consume_front("=")) 129f099f2feSKadir Cetinkaya Sysroot = Arg.str(); 130f099f2feSKadir Cetinkaya else if (Arg.empty() && I + 1 < E) 131f099f2feSKadir Cetinkaya Sysroot = Cmd.CommandLine[I + 1]; 132f099f2feSKadir Cetinkaya } else if (Arg.consume_front("-isysroot")) { 133f099f2feSKadir Cetinkaya if (Arg.empty() && I + 1 < E) 134f099f2feSKadir Cetinkaya ISysroot = Cmd.CommandLine[I + 1]; 135f099f2feSKadir Cetinkaya else 136f099f2feSKadir Cetinkaya ISysroot = Arg.str(); 1374af340a6SMatthew Mirvish } else if (Arg.consume_front("--target=")) { 1384af340a6SMatthew Mirvish Target = Arg.str(); 1394af340a6SMatthew Mirvish } else if (Arg.consume_front("-target")) { 1404af340a6SMatthew Mirvish if (Arg.empty() && I + 1 < E) 1414af340a6SMatthew Mirvish Target = Cmd.CommandLine[I + 1]; 1423935a29aSChris Carlon } else if (Arg.consume_front("--stdlib")) { 1433935a29aSChris Carlon if (Arg.consume_front("=")) 1443935a29aSChris Carlon Stdlib = Arg.str(); 1453935a29aSChris Carlon else if (Arg.empty() && I + 1 < E) 1463935a29aSChris Carlon Stdlib = Cmd.CommandLine[I + 1]; 1473935a29aSChris Carlon } else if (Arg.consume_front("-stdlib=")) { 1483935a29aSChris Carlon Stdlib = Arg.str(); 149*d5953e3eSKazu Hirata } else if (Arg.starts_with("-specs=")) { 150de750085SChris Carlon // clang requires a single token like `-specs=file` or `--specs=file`, 151de750085SChris Carlon // but gcc will accept two tokens like `--specs file`. Since the 152de750085SChris Carlon // compilation database is presumably correct, we just forward the flags 153de750085SChris Carlon // as-is. 154de750085SChris Carlon Specs.push_back(Arg.str()); 155*d5953e3eSKazu Hirata } else if (Arg.starts_with("--specs=")) { 156de750085SChris Carlon Specs.push_back(Arg.str()); 157de750085SChris Carlon } else if (Arg == "--specs" && I + 1 < E) { 158de750085SChris Carlon Specs.push_back(Arg.str()); 159de750085SChris Carlon Specs.push_back(Cmd.CommandLine[I + 1]); 160f099f2feSKadir Cetinkaya } 161f099f2feSKadir Cetinkaya } 162f099f2feSKadir Cetinkaya 163bd74186fSNathan Ridge // Downgrade objective-c++-header (used in clangd's fallback flags for .h 164bd74186fSNathan Ridge // files) to c++-header, as some drivers may fail to run the extraction 165bd74186fSNathan Ridge // command if it contains `-xobjective-c++-header` and objective-c++ support 166bd74186fSNathan Ridge // is not installed. 167bd74186fSNathan Ridge // In practice, we don't see different include paths for the two on 168bd74186fSNathan Ridge // clang+mac, which is the most common objectve-c compiler. 169bd74186fSNathan Ridge if (Lang == "objective-c++-header") { 170bd74186fSNathan Ridge Lang = "c++-header"; 171bd74186fSNathan Ridge } 172bd74186fSNathan Ridge 173f099f2feSKadir Cetinkaya // If language is not explicit in the flags, infer from the file. 174f099f2feSKadir Cetinkaya // This is important as we want to cache each language separately. 175f099f2feSKadir Cetinkaya if (Lang.empty()) { 176f099f2feSKadir Cetinkaya llvm::StringRef Ext = llvm::sys::path::extension(File).trim('.'); 177f099f2feSKadir Cetinkaya auto Type = driver::types::lookupTypeForExtension(Ext); 178f099f2feSKadir Cetinkaya if (Type == driver::types::TY_INVALID) { 179f099f2feSKadir Cetinkaya elog("System include extraction: invalid file type for {0}", Ext); 180f099f2feSKadir Cetinkaya } else { 181f099f2feSKadir Cetinkaya Lang = driver::types::getTypeName(Type); 182f099f2feSKadir Cetinkaya } 183f099f2feSKadir Cetinkaya } 184f099f2feSKadir Cetinkaya } 185f099f2feSKadir Cetinkaya llvm::SmallVector<llvm::StringRef> render() const { 186f099f2feSKadir Cetinkaya // FIXME: Don't treat lang specially? 187f099f2feSKadir Cetinkaya assert(!Lang.empty()); 188f099f2feSKadir Cetinkaya llvm::SmallVector<llvm::StringRef> Args = {"-x", Lang}; 189f099f2feSKadir Cetinkaya if (!StandardIncludes) 190f099f2feSKadir Cetinkaya Args.push_back("-nostdinc"); 191f099f2feSKadir Cetinkaya if (!StandardCXXIncludes) 192f099f2feSKadir Cetinkaya Args.push_back("-nostdinc++"); 193f099f2feSKadir Cetinkaya if (!Sysroot.empty()) 194f099f2feSKadir Cetinkaya Args.append({"--sysroot", Sysroot}); 195f099f2feSKadir Cetinkaya if (!ISysroot.empty()) 196f099f2feSKadir Cetinkaya Args.append({"-isysroot", ISysroot}); 1974af340a6SMatthew Mirvish if (!Target.empty()) 1984af340a6SMatthew Mirvish Args.append({"-target", Target}); 1993935a29aSChris Carlon if (!Stdlib.empty()) 2003935a29aSChris Carlon Args.append({"--stdlib", Stdlib}); 201de750085SChris Carlon 202de750085SChris Carlon for (llvm::StringRef Spec : Specs) { 203de750085SChris Carlon Args.push_back(Spec); 204de750085SChris Carlon } 205de750085SChris Carlon 206f099f2feSKadir Cetinkaya return Args; 207f099f2feSKadir Cetinkaya } 208f099f2feSKadir Cetinkaya 209f099f2feSKadir Cetinkaya static DriverArgs getEmpty() { return {}; } 210f099f2feSKadir Cetinkaya 211f099f2feSKadir Cetinkaya private: 212f099f2feSKadir Cetinkaya DriverArgs() = default; 213f099f2feSKadir Cetinkaya }; 214f099f2feSKadir Cetinkaya } // namespace 215f099f2feSKadir Cetinkaya } // namespace clang::clangd 216f099f2feSKadir Cetinkaya namespace llvm { 217f099f2feSKadir Cetinkaya using DriverArgs = clang::clangd::DriverArgs; 218f099f2feSKadir Cetinkaya template <> struct DenseMapInfo<DriverArgs> { 219f099f2feSKadir Cetinkaya static DriverArgs getEmptyKey() { 220f099f2feSKadir Cetinkaya auto Driver = DriverArgs::getEmpty(); 221f099f2feSKadir Cetinkaya Driver.Driver = "EMPTY_KEY"; 222f099f2feSKadir Cetinkaya return Driver; 223f099f2feSKadir Cetinkaya } 224f099f2feSKadir Cetinkaya static DriverArgs getTombstoneKey() { 225f099f2feSKadir Cetinkaya auto Driver = DriverArgs::getEmpty(); 226f099f2feSKadir Cetinkaya Driver.Driver = "TOMBSTONE_KEY"; 227f099f2feSKadir Cetinkaya return Driver; 228f099f2feSKadir Cetinkaya } 229f099f2feSKadir Cetinkaya static unsigned getHashValue(const DriverArgs &Val) { 230de750085SChris Carlon unsigned FixedFieldsHash = llvm::hash_value(std::tuple{ 231f099f2feSKadir Cetinkaya Val.Driver, 232f099f2feSKadir Cetinkaya Val.StandardIncludes, 233f099f2feSKadir Cetinkaya Val.StandardCXXIncludes, 234f099f2feSKadir Cetinkaya Val.Lang, 235f099f2feSKadir Cetinkaya Val.Sysroot, 236f099f2feSKadir Cetinkaya Val.ISysroot, 2373935a29aSChris Carlon Val.Target, 2383935a29aSChris Carlon Val.Stdlib, 239f099f2feSKadir Cetinkaya }); 240de750085SChris Carlon 241de750085SChris Carlon unsigned SpecsHash = 242de750085SChris Carlon llvm::hash_combine_range(Val.Specs.begin(), Val.Specs.end()); 243de750085SChris Carlon 244de750085SChris Carlon return llvm::hash_combine(FixedFieldsHash, SpecsHash); 245f099f2feSKadir Cetinkaya } 246f099f2feSKadir Cetinkaya static bool isEqual(const DriverArgs &LHS, const DriverArgs &RHS) { 247f099f2feSKadir Cetinkaya return LHS == RHS; 248f099f2feSKadir Cetinkaya } 249f099f2feSKadir Cetinkaya }; 250f099f2feSKadir Cetinkaya } // namespace llvm 251f099f2feSKadir Cetinkaya namespace clang::clangd { 252f099f2feSKadir Cetinkaya namespace { 253428ac8f3SNathan Ridge bool isValidTarget(llvm::StringRef Triple) { 254428ac8f3SNathan Ridge std::shared_ptr<TargetOptions> TargetOpts(new TargetOptions); 255428ac8f3SNathan Ridge TargetOpts->Triple = Triple.str(); 256428ac8f3SNathan Ridge DiagnosticsEngine Diags(new DiagnosticIDs, new DiagnosticOptions, 257428ac8f3SNathan Ridge new IgnoringDiagConsumer); 258f099f2feSKadir Cetinkaya llvm::IntrusiveRefCntPtr<TargetInfo> Target = 259428ac8f3SNathan Ridge TargetInfo::CreateTargetInfo(Diags, TargetOpts); 260428ac8f3SNathan Ridge return bool(Target); 261428ac8f3SNathan Ridge } 262428ac8f3SNathan Ridge 263f71ffd3bSKazu Hirata std::optional<DriverInfo> parseDriverOutput(llvm::StringRef Output) { 264428ac8f3SNathan Ridge DriverInfo Info; 265428ac8f3SNathan Ridge const char SIS[] = "#include <...> search starts here:"; 266428ac8f3SNathan Ridge const char SIE[] = "End of search list."; 267428ac8f3SNathan Ridge const char TS[] = "Target: "; 268428ac8f3SNathan Ridge llvm::SmallVector<llvm::StringRef> Lines; 269428ac8f3SNathan Ridge Output.split(Lines, '\n', /*MaxSplit=*/-1, /*KeepEmpty=*/false); 270428ac8f3SNathan Ridge 271428ac8f3SNathan Ridge enum { 272428ac8f3SNathan Ridge Initial, // Initial state: searching for target or includes list. 273428ac8f3SNathan Ridge IncludesExtracting, // Includes extracting. 274428ac8f3SNathan Ridge Done // Includes and target extraction done. 275428ac8f3SNathan Ridge } State = Initial; 276428ac8f3SNathan Ridge bool SeenIncludes = false; 277428ac8f3SNathan Ridge bool SeenTarget = false; 278428ac8f3SNathan Ridge for (auto *It = Lines.begin(); State != Done && It != Lines.end(); ++It) { 279428ac8f3SNathan Ridge auto Line = *It; 280428ac8f3SNathan Ridge switch (State) { 281428ac8f3SNathan Ridge case Initial: 282428ac8f3SNathan Ridge if (!SeenIncludes && Line.trim() == SIS) { 283428ac8f3SNathan Ridge SeenIncludes = true; 284428ac8f3SNathan Ridge State = IncludesExtracting; 285*d5953e3eSKazu Hirata } else if (!SeenTarget && Line.trim().starts_with(TS)) { 286428ac8f3SNathan Ridge SeenTarget = true; 287428ac8f3SNathan Ridge llvm::StringRef TargetLine = Line.trim(); 288428ac8f3SNathan Ridge TargetLine.consume_front(TS); 289428ac8f3SNathan Ridge // Only detect targets that clang understands 290428ac8f3SNathan Ridge if (!isValidTarget(TargetLine)) { 291428ac8f3SNathan Ridge elog("System include extraction: invalid target \"{0}\", ignoring", 292428ac8f3SNathan Ridge TargetLine); 293428ac8f3SNathan Ridge } else { 294428ac8f3SNathan Ridge Info.Target = TargetLine.str(); 295428ac8f3SNathan Ridge vlog("System include extraction: target extracted: \"{0}\"", 296428ac8f3SNathan Ridge TargetLine); 297428ac8f3SNathan Ridge } 298428ac8f3SNathan Ridge } 299428ac8f3SNathan Ridge break; 300428ac8f3SNathan Ridge case IncludesExtracting: 301428ac8f3SNathan Ridge if (Line.trim() == SIE) { 302428ac8f3SNathan Ridge State = SeenTarget ? Done : Initial; 303428ac8f3SNathan Ridge } else { 304428ac8f3SNathan Ridge Info.SystemIncludes.push_back(Line.trim().str()); 305428ac8f3SNathan Ridge vlog("System include extraction: adding {0}", Line); 306428ac8f3SNathan Ridge } 307428ac8f3SNathan Ridge break; 308428ac8f3SNathan Ridge default: 309428ac8f3SNathan Ridge llvm_unreachable("Impossible state of the driver output parser"); 310428ac8f3SNathan Ridge break; 311428ac8f3SNathan Ridge } 312428ac8f3SNathan Ridge } 313428ac8f3SNathan Ridge if (!SeenIncludes) { 314428ac8f3SNathan Ridge elog("System include extraction: start marker not found: {0}", Output); 315059a23c0SKazu Hirata return std::nullopt; 316428ac8f3SNathan Ridge } 317428ac8f3SNathan Ridge if (State == IncludesExtracting) { 318428ac8f3SNathan Ridge elog("System include extraction: end marker missing: {0}", Output); 319059a23c0SKazu Hirata return std::nullopt; 320428ac8f3SNathan Ridge } 321428ac8f3SNathan Ridge return std::move(Info); 322428ac8f3SNathan Ridge } 323428ac8f3SNathan Ridge 3240478ef2dSSam McCall std::optional<std::string> run(llvm::ArrayRef<llvm::StringRef> Argv, 3250478ef2dSSam McCall bool OutputIsStderr) { 3260478ef2dSSam McCall llvm::SmallString<128> OutputPath; 3270478ef2dSSam McCall if (auto EC = llvm::sys::fs::createTemporaryFile("system-includes", "clangd", 3280478ef2dSSam McCall OutputPath)) { 3290478ef2dSSam McCall elog("System include extraction: failed to create temporary file with " 3300478ef2dSSam McCall "error {0}", 3310478ef2dSSam McCall EC.message()); 3320478ef2dSSam McCall return std::nullopt; 3330478ef2dSSam McCall } 3340478ef2dSSam McCall auto CleanUp = llvm::make_scope_exit( 3350478ef2dSSam McCall [&OutputPath]() { llvm::sys::fs::remove(OutputPath); }); 3360478ef2dSSam McCall 3370478ef2dSSam McCall std::optional<llvm::StringRef> Redirects[] = {{""}, {""}, {""}}; 3380478ef2dSSam McCall Redirects[OutputIsStderr ? 2 : 1] = OutputPath.str(); 3390478ef2dSSam McCall 3400478ef2dSSam McCall std::string ErrMsg; 3410478ef2dSSam McCall if (int RC = 3420478ef2dSSam McCall llvm::sys::ExecuteAndWait(Argv.front(), Argv, /*Env=*/std::nullopt, 3430478ef2dSSam McCall Redirects, /*SecondsToWait=*/0, 3440478ef2dSSam McCall /*MemoryLimit=*/0, &ErrMsg)) { 3450478ef2dSSam McCall elog("System include extraction: driver execution failed with return code: " 3460478ef2dSSam McCall "{0} - '{1}'. Args: [{2}]", 3470478ef2dSSam McCall llvm::to_string(RC), ErrMsg, printArgv(Argv)); 3480478ef2dSSam McCall return std::nullopt; 3490478ef2dSSam McCall } 3500478ef2dSSam McCall 3510478ef2dSSam McCall auto BufOrError = llvm::MemoryBuffer::getFile(OutputPath); 3520478ef2dSSam McCall if (!BufOrError) { 3530478ef2dSSam McCall elog("System include extraction: failed to read {0} with error {1}", 3540478ef2dSSam McCall OutputPath, BufOrError.getError().message()); 3550478ef2dSSam McCall return std::nullopt; 3560478ef2dSSam McCall } 3570478ef2dSSam McCall return BufOrError.get().get()->getBuffer().str(); 3580478ef2dSSam McCall } 3590478ef2dSSam McCall 360f71ffd3bSKazu Hirata std::optional<DriverInfo> 361f099f2feSKadir Cetinkaya extractSystemIncludesAndTarget(const DriverArgs &InputArgs, 362428ac8f3SNathan Ridge const llvm::Regex &QueryDriverRegex) { 363428ac8f3SNathan Ridge trace::Span Tracer("Extract system includes and target"); 364428ac8f3SNathan Ridge 365f099f2feSKadir Cetinkaya std::string Driver = InputArgs.Driver; 366428ac8f3SNathan Ridge if (!llvm::sys::path::is_absolute(Driver)) { 367428ac8f3SNathan Ridge auto DriverProgram = llvm::sys::findProgramByName(Driver); 368428ac8f3SNathan Ridge if (DriverProgram) { 369428ac8f3SNathan Ridge vlog("System include extraction: driver {0} expanded to {1}", Driver, 370428ac8f3SNathan Ridge *DriverProgram); 371428ac8f3SNathan Ridge Driver = *DriverProgram; 372428ac8f3SNathan Ridge } else { 373428ac8f3SNathan Ridge elog("System include extraction: driver {0} not found in PATH", Driver); 374059a23c0SKazu Hirata return std::nullopt; 375428ac8f3SNathan Ridge } 376428ac8f3SNathan Ridge } 377428ac8f3SNathan Ridge 378428ac8f3SNathan Ridge SPAN_ATTACH(Tracer, "driver", Driver); 379f099f2feSKadir Cetinkaya SPAN_ATTACH(Tracer, "lang", InputArgs.Lang); 380428ac8f3SNathan Ridge 38101d3045dSSam McCall // If driver was "../foo" then having to allowlist "/path/a/../foo" rather 38201d3045dSSam McCall // than "/path/foo" is absurd. 38301d3045dSSam McCall // Allow either to match the allowlist, then proceed with "/path/a/../foo". 38401d3045dSSam McCall // This was our historical behavior, and it *could* resolve to something else. 38501d3045dSSam McCall llvm::SmallString<256> NoDots(Driver); 38601d3045dSSam McCall llvm::sys::path::remove_dots(NoDots, /*remove_dot_dot=*/true); 38701d3045dSSam McCall if (!QueryDriverRegex.match(Driver) && !QueryDriverRegex.match(NoDots)) { 388428ac8f3SNathan Ridge vlog("System include extraction: not allowed driver {0}", Driver); 389059a23c0SKazu Hirata return std::nullopt; 390428ac8f3SNathan Ridge } 391428ac8f3SNathan Ridge 392f099f2feSKadir Cetinkaya llvm::SmallVector<llvm::StringRef> Args = {Driver, "-E", "-v"}; 393f099f2feSKadir Cetinkaya Args.append(InputArgs.render()); 394f099f2feSKadir Cetinkaya // Input needs to go after Lang flags. 395f099f2feSKadir Cetinkaya Args.push_back("-"); 3960478ef2dSSam McCall auto Output = run(Args, /*OutputIsStderr=*/true); 3970478ef2dSSam McCall if (!Output) 398059a23c0SKazu Hirata return std::nullopt; 399428ac8f3SNathan Ridge 4000478ef2dSSam McCall std::optional<DriverInfo> Info = parseDriverOutput(*Output); 401428ac8f3SNathan Ridge if (!Info) 402059a23c0SKazu Hirata return std::nullopt; 4030478ef2dSSam McCall 4040478ef2dSSam McCall // The built-in headers are tightly coupled to parser builtins. 4050478ef2dSSam McCall // (These are clang's "resource dir", GCC's GCC_INCLUDE_DIR.) 4060478ef2dSSam McCall // We should keep using clangd's versions, so exclude the queried builtins. 4070478ef2dSSam McCall // They're not specially marked in the -v output, but we can get the path 4080478ef2dSSam McCall // with `$DRIVER -print-file-name=include`. 4090478ef2dSSam McCall if (auto BuiltinHeaders = 4100478ef2dSSam McCall run({Driver, "-print-file-name=include"}, /*OutputIsStderr=*/false)) { 4110478ef2dSSam McCall auto Path = llvm::StringRef(*BuiltinHeaders).trim(); 4120478ef2dSSam McCall if (!Path.empty() && llvm::sys::path::is_absolute(Path)) { 4130478ef2dSSam McCall auto Size = Info->SystemIncludes.size(); 414f9306f6dSKazu Hirata llvm::erase(Info->SystemIncludes, Path); 4150478ef2dSSam McCall vlog("System includes extractor: builtin headers {0} {1}", Path, 4160478ef2dSSam McCall (Info->SystemIncludes.size() != Size) 4170478ef2dSSam McCall ? "excluded" 4180478ef2dSSam McCall : "not found in driver's response"); 4190478ef2dSSam McCall } 4200478ef2dSSam McCall } 4210478ef2dSSam McCall 422428ac8f3SNathan Ridge log("System includes extractor: successfully executed {0}\n\tgot includes: " 423428ac8f3SNathan Ridge "\"{1}\"\n\tgot target: \"{2}\"", 424428ac8f3SNathan Ridge Driver, llvm::join(Info->SystemIncludes, ", "), Info->Target); 425428ac8f3SNathan Ridge return Info; 426428ac8f3SNathan Ridge } 427428ac8f3SNathan Ridge 428428ac8f3SNathan Ridge tooling::CompileCommand & 429428ac8f3SNathan Ridge addSystemIncludes(tooling::CompileCommand &Cmd, 430428ac8f3SNathan Ridge llvm::ArrayRef<std::string> SystemIncludes) { 431428ac8f3SNathan Ridge std::vector<std::string> ToAppend; 432428ac8f3SNathan Ridge for (llvm::StringRef Include : SystemIncludes) { 433428ac8f3SNathan Ridge // FIXME(kadircet): This doesn't work when we have "--driver-mode=cl" 434428ac8f3SNathan Ridge ToAppend.push_back("-isystem"); 435428ac8f3SNathan Ridge ToAppend.push_back(Include.str()); 436428ac8f3SNathan Ridge } 437428ac8f3SNathan Ridge if (!ToAppend.empty()) { 438428ac8f3SNathan Ridge // Just append when `--` isn't present. 439428ac8f3SNathan Ridge auto InsertAt = llvm::find(Cmd.CommandLine, "--"); 440428ac8f3SNathan Ridge Cmd.CommandLine.insert(InsertAt, std::make_move_iterator(ToAppend.begin()), 441428ac8f3SNathan Ridge std::make_move_iterator(ToAppend.end())); 442428ac8f3SNathan Ridge } 443428ac8f3SNathan Ridge return Cmd; 444428ac8f3SNathan Ridge } 445428ac8f3SNathan Ridge 446428ac8f3SNathan Ridge tooling::CompileCommand &setTarget(tooling::CompileCommand &Cmd, 447428ac8f3SNathan Ridge const std::string &Target) { 448428ac8f3SNathan Ridge if (!Target.empty()) { 449428ac8f3SNathan Ridge // We do not want to override existing target with extracted one. 450428ac8f3SNathan Ridge for (llvm::StringRef Arg : Cmd.CommandLine) { 451*d5953e3eSKazu Hirata if (Arg == "-target" || Arg.starts_with("--target=")) 452428ac8f3SNathan Ridge return Cmd; 453428ac8f3SNathan Ridge } 454428ac8f3SNathan Ridge // Just append when `--` isn't present. 455428ac8f3SNathan Ridge auto InsertAt = llvm::find(Cmd.CommandLine, "--"); 456428ac8f3SNathan Ridge Cmd.CommandLine.insert(InsertAt, "--target=" + Target); 457428ac8f3SNathan Ridge } 458428ac8f3SNathan Ridge return Cmd; 459428ac8f3SNathan Ridge } 460428ac8f3SNathan Ridge 461428ac8f3SNathan Ridge /// Converts a glob containing only ** or * into a regex. 462428ac8f3SNathan Ridge std::string convertGlobToRegex(llvm::StringRef Glob) { 463428ac8f3SNathan Ridge std::string RegText; 464428ac8f3SNathan Ridge llvm::raw_string_ostream RegStream(RegText); 465428ac8f3SNathan Ridge RegStream << '^'; 466428ac8f3SNathan Ridge for (size_t I = 0, E = Glob.size(); I < E; ++I) { 467428ac8f3SNathan Ridge if (Glob[I] == '*') { 468428ac8f3SNathan Ridge if (I + 1 < E && Glob[I + 1] == '*') { 469428ac8f3SNathan Ridge // Double star, accept any sequence. 470428ac8f3SNathan Ridge RegStream << ".*"; 471428ac8f3SNathan Ridge // Also skip the second star. 472428ac8f3SNathan Ridge ++I; 473428ac8f3SNathan Ridge } else { 474428ac8f3SNathan Ridge // Single star, accept any sequence without a slash. 475428ac8f3SNathan Ridge RegStream << "[^/]*"; 476428ac8f3SNathan Ridge } 477428ac8f3SNathan Ridge } else if (llvm::sys::path::is_separator(Glob[I]) && 478428ac8f3SNathan Ridge llvm::sys::path::is_separator('/') && 479428ac8f3SNathan Ridge llvm::sys::path::is_separator('\\')) { 480428ac8f3SNathan Ridge RegStream << R"([/\\])"; // Accept either slash on windows. 481428ac8f3SNathan Ridge } else { 482428ac8f3SNathan Ridge RegStream << llvm::Regex::escape(Glob.substr(I, 1)); 483428ac8f3SNathan Ridge } 484428ac8f3SNathan Ridge } 485428ac8f3SNathan Ridge RegStream << '$'; 486428ac8f3SNathan Ridge return RegText; 487428ac8f3SNathan Ridge } 488428ac8f3SNathan Ridge 489428ac8f3SNathan Ridge /// Converts a glob containing only ** or * into a regex. 490428ac8f3SNathan Ridge llvm::Regex convertGlobsToRegex(llvm::ArrayRef<std::string> Globs) { 491428ac8f3SNathan Ridge assert(!Globs.empty() && "Globs cannot be empty!"); 492428ac8f3SNathan Ridge std::vector<std::string> RegTexts; 493428ac8f3SNathan Ridge RegTexts.reserve(Globs.size()); 494428ac8f3SNathan Ridge for (llvm::StringRef Glob : Globs) 495428ac8f3SNathan Ridge RegTexts.push_back(convertGlobToRegex(Glob)); 496428ac8f3SNathan Ridge 497428ac8f3SNathan Ridge // Tempting to pass IgnoreCase, but we don't know the FS sensitivity. 498428ac8f3SNathan Ridge llvm::Regex Reg(llvm::join(RegTexts, "|")); 499428ac8f3SNathan Ridge assert(Reg.isValid(RegTexts.front()) && 500428ac8f3SNathan Ridge "Created an invalid regex from globs"); 501428ac8f3SNathan Ridge return Reg; 502428ac8f3SNathan Ridge } 503428ac8f3SNathan Ridge 504428ac8f3SNathan Ridge /// Extracts system includes from a trusted driver by parsing the output of 505428ac8f3SNathan Ridge /// include search path and appends them to the commands coming from underlying 506428ac8f3SNathan Ridge /// compilation database. 507428ac8f3SNathan Ridge class SystemIncludeExtractor { 508428ac8f3SNathan Ridge public: 509428ac8f3SNathan Ridge SystemIncludeExtractor(llvm::ArrayRef<std::string> QueryDriverGlobs) 510428ac8f3SNathan Ridge : QueryDriverRegex(convertGlobsToRegex(QueryDriverGlobs)) {} 511428ac8f3SNathan Ridge 512428ac8f3SNathan Ridge void operator()(tooling::CompileCommand &Cmd, llvm::StringRef File) const { 513428ac8f3SNathan Ridge if (Cmd.CommandLine.empty()) 514428ac8f3SNathan Ridge return; 515428ac8f3SNathan Ridge 516f099f2feSKadir Cetinkaya DriverArgs Args(Cmd, File); 517f099f2feSKadir Cetinkaya if (Args.Lang.empty()) 518428ac8f3SNathan Ridge return; 519f099f2feSKadir Cetinkaya if (auto Info = QueriedDrivers.get(Args, [&] { 520f099f2feSKadir Cetinkaya return extractSystemIncludesAndTarget(Args, QueryDriverRegex); 521428ac8f3SNathan Ridge })) { 522428ac8f3SNathan Ridge setTarget(addSystemIncludes(Cmd, Info->SystemIncludes), Info->Target); 523428ac8f3SNathan Ridge } 524428ac8f3SNathan Ridge } 525428ac8f3SNathan Ridge 526428ac8f3SNathan Ridge private: 527428ac8f3SNathan Ridge // Caches includes extracted from a driver. Key is driver:lang. 528f099f2feSKadir Cetinkaya Memoize<llvm::DenseMap<DriverArgs, std::optional<DriverInfo>>> QueriedDrivers; 529428ac8f3SNathan Ridge llvm::Regex QueryDriverRegex; 530428ac8f3SNathan Ridge }; 531428ac8f3SNathan Ridge } // namespace 532428ac8f3SNathan Ridge 533428ac8f3SNathan Ridge SystemIncludeExtractorFn 534428ac8f3SNathan Ridge getSystemIncludeExtractor(llvm::ArrayRef<std::string> QueryDriverGlobs) { 535428ac8f3SNathan Ridge if (QueryDriverGlobs.empty()) 536428ac8f3SNathan Ridge return nullptr; 537428ac8f3SNathan Ridge return SystemIncludeExtractor(QueryDriverGlobs); 538428ac8f3SNathan Ridge } 539428ac8f3SNathan Ridge 540f099f2feSKadir Cetinkaya } // namespace clang::clangd 541