1 //===-- StdLib.cpp ----------------------------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 #include "StdLib.h" 9 #include <fstream> 10 #include <memory> 11 #include <optional> 12 #include <string> 13 #include <vector> 14 15 #include "Compiler.h" 16 #include "Config.h" 17 #include "SymbolCollector.h" 18 #include "index/IndexAction.h" 19 #include "support/Logger.h" 20 #include "support/ThreadsafeFS.h" 21 #include "support/Trace.h" 22 #include "clang/Basic/LangOptions.h" 23 #include "clang/Frontend/CompilerInvocation.h" 24 #include "clang/Lex/PreprocessorOptions.h" 25 #include "clang/Tooling/Inclusions/StandardLibrary.h" 26 #include "llvm/ADT/IntrusiveRefCntPtr.h" 27 #include "llvm/ADT/StringRef.h" 28 #include "llvm/Support/MemoryBuffer.h" 29 #include "llvm/Support/Path.h" 30 31 namespace clang { 32 namespace clangd { 33 namespace { 34 35 enum Lang { C, CXX }; 36 37 Lang langFromOpts(const LangOptions &LO) { return LO.CPlusPlus ? CXX : C; } 38 llvm::StringLiteral mandatoryHeader(Lang L) { 39 switch (L) { 40 case C: 41 return "stdio.h"; 42 case CXX: 43 return "vector"; 44 } 45 llvm_unreachable("unhandled Lang"); 46 } 47 48 LangStandard::Kind standardFromOpts(const LangOptions &LO) { 49 if (LO.CPlusPlus) { 50 if (LO.CPlusPlus23) 51 return LangStandard::lang_cxx23; 52 if (LO.CPlusPlus20) 53 return LangStandard::lang_cxx20; 54 if (LO.CPlusPlus17) 55 return LangStandard::lang_cxx17; 56 if (LO.CPlusPlus14) 57 return LangStandard::lang_cxx14; 58 if (LO.CPlusPlus11) 59 return LangStandard::lang_cxx11; 60 return LangStandard::lang_cxx98; 61 } 62 if (LO.C23) 63 return LangStandard::lang_c23; 64 // C17 has no new features, so treat {C11,C17} as C17. 65 if (LO.C11) 66 return LangStandard::lang_c17; 67 return LangStandard::lang_c99; 68 } 69 70 std::string buildUmbrella(llvm::StringLiteral Mandatory, 71 llvm::ArrayRef<tooling::stdlib::Header> Headers) { 72 std::string Result; 73 llvm::raw_string_ostream OS(Result); 74 75 // We __has_include guard all our #includes to avoid errors when using older 76 // stdlib version that don't have headers for the newest language standards. 77 // But make sure we get *some* error if things are totally broken. 78 OS << llvm::formatv( 79 "#if !__has_include(<{0}>)\n" 80 "#error Mandatory header <{0}> not found in standard library!\n" 81 "#endif\n", 82 Mandatory); 83 84 for (auto Header : Headers) { 85 OS << llvm::formatv("#if __has_include({0})\n" 86 "#include {0}\n" 87 "#endif\n", 88 Header); 89 } 90 return Result; 91 } 92 93 } // namespace 94 95 llvm::StringRef getStdlibUmbrellaHeader(const LangOptions &LO) { 96 // The umbrella header is the same for all versions of each language. 97 // Headers that are unsupported in old lang versions are usually guarded by 98 // #if. Some headers may be not present in old stdlib versions, the umbrella 99 // header guards with __has_include for this purpose. 100 Lang L = langFromOpts(LO); 101 switch (L) { 102 case CXX: 103 static std::string *UmbrellaCXX = new std::string(buildUmbrella( 104 mandatoryHeader(L), 105 tooling::stdlib::Header::all(tooling::stdlib::Lang::CXX))); 106 return *UmbrellaCXX; 107 case C: 108 static std::string *UmbrellaC = new std::string( 109 buildUmbrella(mandatoryHeader(L), 110 tooling::stdlib::Header::all(tooling::stdlib::Lang::C))); 111 return *UmbrellaC; 112 } 113 llvm_unreachable("invalid Lang in langFromOpts"); 114 } 115 116 namespace { 117 118 // Including the standard library leaks unwanted transitively included symbols. 119 // 120 // We want to drop these, they're a bit tricky to identify: 121 // - we don't want to limit to symbols on our list, as our list has only 122 // top-level symbols (and there may be legitimate stdlib extensions). 123 // - we can't limit to only symbols defined in known stdlib headers, as stdlib 124 // internal structure is murky 125 // - we can't strictly require symbols to come from a particular path, e.g. 126 // libstdc++ is mostly under /usr/include/c++/10/... 127 // but std::ctype_base is under /usr/include/<platform>/c++/10/... 128 // We require the symbol to come from a header that is *either* from 129 // the standard library path (as identified by the location of <vector>), or 130 // another header that defines a symbol from our stdlib list. 131 SymbolSlab filter(SymbolSlab Slab, const StdLibLocation &Loc) { 132 SymbolSlab::Builder Result; 133 134 static auto &StandardHeaders = *[] { 135 auto *Set = new llvm::DenseSet<llvm::StringRef>(); 136 for (auto Header : tooling::stdlib::Header::all(tooling::stdlib::Lang::CXX)) 137 Set->insert(Header.name()); 138 for (auto Header : tooling::stdlib::Header::all(tooling::stdlib::Lang::C)) 139 Set->insert(Header.name()); 140 return Set; 141 }(); 142 143 // Form prefixes like file:///usr/include/c++/10/ 144 // These can be trivially prefix-compared with URIs in the indexed symbols. 145 llvm::SmallVector<std::string> StdLibURIPrefixes; 146 for (const auto &Path : Loc.Paths) { 147 StdLibURIPrefixes.push_back(URI::create(Path).toString()); 148 if (StdLibURIPrefixes.back().back() != '/') 149 StdLibURIPrefixes.back().push_back('/'); 150 } 151 // For each header URI, is it *either* prefixed by StdLibURIPrefixes *or* 152 // owner of a symbol whose insertable header is in StandardHeaders? 153 // Pointer key because strings in a SymbolSlab are interned. 154 llvm::DenseMap<const char *, bool> GoodHeader; 155 for (const Symbol &S : Slab) { 156 if (!S.IncludeHeaders.empty() && 157 StandardHeaders.contains(S.IncludeHeaders.front().IncludeHeader)) { 158 GoodHeader[S.CanonicalDeclaration.FileURI] = true; 159 GoodHeader[S.Definition.FileURI] = true; 160 continue; 161 } 162 for (const char *URI : 163 {S.CanonicalDeclaration.FileURI, S.Definition.FileURI}) { 164 auto R = GoodHeader.try_emplace(URI, false); 165 if (R.second) { 166 R.first->second = llvm::any_of( 167 StdLibURIPrefixes, 168 [&, URIStr(llvm::StringRef(URI))](const std::string &Prefix) { 169 return URIStr.starts_with(Prefix); 170 }); 171 } 172 } 173 } 174 #ifndef NDEBUG 175 for (const auto &Good : GoodHeader) 176 if (Good.second && *Good.first) 177 dlog("Stdlib header: {0}", Good.first); 178 #endif 179 // Empty URIs aren't considered good. (Definition can be blank). 180 auto IsGoodHeader = [&](const char *C) { return *C && GoodHeader.lookup(C); }; 181 182 for (const Symbol &S : Slab) { 183 if (!(IsGoodHeader(S.CanonicalDeclaration.FileURI) || 184 IsGoodHeader(S.Definition.FileURI))) { 185 dlog("Ignoring wrong-header symbol {0}{1} in {2}", S.Scope, S.Name, 186 S.CanonicalDeclaration.FileURI); 187 continue; 188 } 189 Result.insert(S); 190 } 191 192 return std::move(Result).build(); 193 } 194 195 } // namespace 196 197 SymbolSlab indexStandardLibrary(llvm::StringRef HeaderSources, 198 std::unique_ptr<CompilerInvocation> CI, 199 const StdLibLocation &Loc, 200 const ThreadsafeFS &TFS) { 201 if (CI->getFrontendOpts().Inputs.size() != 1 || 202 !CI->getPreprocessorOpts().ImplicitPCHInclude.empty()) { 203 elog("Indexing standard library failed: bad CompilerInvocation"); 204 assert(false && "indexing stdlib with a dubious CompilerInvocation!"); 205 return SymbolSlab(); 206 } 207 const FrontendInputFile &Input = CI->getFrontendOpts().Inputs.front(); 208 trace::Span Tracer("StandardLibraryIndex"); 209 LangStandard::Kind LangStd = standardFromOpts(CI->getLangOpts()); 210 log("Indexing {0} standard library in the context of {1}", 211 LangStandard::getLangStandardForKind(LangStd).getName(), Input.getFile()); 212 213 SymbolSlab Symbols; 214 IgnoreDiagnostics IgnoreDiags; 215 // CompilerInvocation is taken from elsewhere, and may map a dirty buffer. 216 CI->getPreprocessorOpts().clearRemappedFiles(); 217 auto Clang = prepareCompilerInstance( 218 std::move(CI), /*Preamble=*/nullptr, 219 llvm::MemoryBuffer::getMemBuffer(HeaderSources, Input.getFile()), 220 TFS.view(/*CWD=*/std::nullopt), IgnoreDiags); 221 if (!Clang) { 222 elog("Standard Library Index: Couldn't build compiler instance"); 223 return Symbols; 224 } 225 226 SymbolCollector::Options IndexOpts; 227 IndexOpts.Origin = SymbolOrigin::StdLib; 228 IndexOpts.CollectMainFileSymbols = false; 229 IndexOpts.CollectMainFileRefs = false; 230 IndexOpts.CollectMacro = true; 231 IndexOpts.StoreAllDocumentation = true; 232 // Sadly we can't use IndexOpts.FileFilter to restrict indexing scope. 233 // Files from outside the StdLibLocation may define true std symbols anyway. 234 // We end up "blessing" such headers, and can only do that by indexing 235 // everything first. 236 237 // Refs, relations, include graph in the stdlib mostly aren't useful. 238 auto Action = createStaticIndexingAction( 239 IndexOpts, [&](SymbolSlab S) { Symbols = std::move(S); }, nullptr, 240 nullptr, nullptr); 241 242 if (!Action->BeginSourceFile(*Clang, Input)) { 243 elog("Standard Library Index: BeginSourceFile() failed"); 244 return Symbols; 245 } 246 247 if (llvm::Error Err = Action->Execute()) { 248 elog("Standard Library Index: Execute failed: {0}", std::move(Err)); 249 return Symbols; 250 } 251 252 Action->EndSourceFile(); 253 254 unsigned SymbolsBeforeFilter = Symbols.size(); 255 Symbols = filter(std::move(Symbols), Loc); 256 bool Errors = Clang->hasDiagnostics() && 257 Clang->getDiagnostics().hasUncompilableErrorOccurred(); 258 log("Indexed {0} standard library{3}: {1} symbols, {2} filtered", 259 LangStandard::getLangStandardForKind(LangStd).getName(), Symbols.size(), 260 SymbolsBeforeFilter - Symbols.size(), 261 Errors ? " (incomplete due to errors)" : ""); 262 SPAN_ATTACH(Tracer, "symbols", int(Symbols.size())); 263 return Symbols; 264 } 265 266 SymbolSlab indexStandardLibrary(std::unique_ptr<CompilerInvocation> Invocation, 267 const StdLibLocation &Loc, 268 const ThreadsafeFS &TFS) { 269 llvm::StringRef Header = getStdlibUmbrellaHeader(Invocation->getLangOpts()); 270 return indexStandardLibrary(Header, std::move(Invocation), Loc, TFS); 271 } 272 273 bool StdLibSet::isBest(const LangOptions &LO) const { 274 return standardFromOpts(LO) >= 275 Best[langFromOpts(LO)].load(std::memory_order_acquire); 276 } 277 278 std::optional<StdLibLocation> StdLibSet::add(const LangOptions &LO, 279 const HeaderSearch &HS) { 280 Lang L = langFromOpts(LO); 281 int OldVersion = Best[L].load(std::memory_order_acquire); 282 int NewVersion = standardFromOpts(LO); 283 dlog("Index stdlib? {0}", 284 LangStandard::getLangStandardForKind(standardFromOpts(LO)).getName()); 285 286 if (!Config::current().Index.StandardLibrary) { 287 dlog("No: disabled in config"); 288 return std::nullopt; 289 } 290 291 if (NewVersion <= OldVersion) { 292 dlog("No: have {0}, {1}>={2}", 293 LangStandard::getLangStandardForKind( 294 static_cast<LangStandard::Kind>(NewVersion)) 295 .getName(), 296 OldVersion, NewVersion); 297 return std::nullopt; 298 } 299 300 // We'd like to index a standard library here if there is one. 301 // Check for the existence of <vector> on the search path. 302 // We could cache this, but we only get here repeatedly when there's no 303 // stdlib, and even then only once per preamble build. 304 llvm::StringLiteral ProbeHeader = mandatoryHeader(L); 305 llvm::SmallString<256> Path; // Scratch space. 306 llvm::SmallVector<std::string> SearchPaths; 307 auto RecordHeaderPath = [&](llvm::StringRef HeaderPath) { 308 llvm::StringRef DirPath = llvm::sys::path::parent_path(HeaderPath); 309 if (!HS.getFileMgr().getVirtualFileSystem().getRealPath(DirPath, Path)) 310 SearchPaths.emplace_back(Path); 311 }; 312 for (const auto &DL : 313 llvm::make_range(HS.search_dir_begin(), HS.search_dir_end())) { 314 switch (DL.getLookupType()) { 315 case DirectoryLookup::LT_NormalDir: { 316 Path = DL.getDirRef()->getName(); 317 llvm::sys::path::append(Path, ProbeHeader); 318 llvm::vfs::Status Stat; 319 if (!HS.getFileMgr().getNoncachedStatValue(Path, Stat) && 320 Stat.isRegularFile()) 321 RecordHeaderPath(Path); 322 break; 323 } 324 case DirectoryLookup::LT_Framework: 325 // stdlib can't be a framework (framework includes must have a slash) 326 continue; 327 case DirectoryLookup::LT_HeaderMap: 328 llvm::StringRef Target = 329 DL.getHeaderMap()->lookupFilename(ProbeHeader, Path); 330 if (!Target.empty()) 331 RecordHeaderPath(Target); 332 break; 333 } 334 } 335 if (SearchPaths.empty()) 336 return std::nullopt; 337 338 dlog("Found standard library in {0}", llvm::join(SearchPaths, ", ")); 339 340 while (!Best[L].compare_exchange_weak(OldVersion, NewVersion, 341 std::memory_order_acq_rel)) 342 if (OldVersion >= NewVersion) { 343 dlog("No: lost the race"); 344 return std::nullopt; // Another thread won the race while we were 345 // checking. 346 } 347 348 dlog("Yes, index stdlib!"); 349 return StdLibLocation{std::move(SearchPaths)}; 350 } 351 352 } // namespace clangd 353 } // namespace clang 354