xref: /llvm-project/clang-tools-extra/clangd/index/StdLib.cpp (revision f5838cc17ffb1a0015a0d2687a72bf39b2847f6d)
1 //===-- StdLib.cpp ----------------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 #include "StdLib.h"
9 #include <fstream>
10 #include <memory>
11 #include <optional>
12 #include <string>
13 #include <vector>
14 
15 #include "Compiler.h"
16 #include "Config.h"
17 #include "SymbolCollector.h"
18 #include "index/IndexAction.h"
19 #include "support/Logger.h"
20 #include "support/ThreadsafeFS.h"
21 #include "support/Trace.h"
22 #include "clang/Basic/LangOptions.h"
23 #include "clang/Frontend/CompilerInvocation.h"
24 #include "clang/Lex/PreprocessorOptions.h"
25 #include "clang/Tooling/Inclusions/StandardLibrary.h"
26 #include "llvm/ADT/IntrusiveRefCntPtr.h"
27 #include "llvm/ADT/StringRef.h"
28 #include "llvm/Support/MemoryBuffer.h"
29 #include "llvm/Support/Path.h"
30 
31 namespace clang {
32 namespace clangd {
33 namespace {
34 
35 enum Lang { C, CXX };
36 
37 Lang langFromOpts(const LangOptions &LO) { return LO.CPlusPlus ? CXX : C; }
38 llvm::StringLiteral mandatoryHeader(Lang L) {
39   switch (L) {
40   case C:
41     return "stdio.h";
42   case CXX:
43     return "vector";
44   }
45   llvm_unreachable("unhandled Lang");
46 }
47 
48 LangStandard::Kind standardFromOpts(const LangOptions &LO) {
49   if (LO.CPlusPlus) {
50     if (LO.CPlusPlus23)
51       return LangStandard::lang_cxx23;
52     if (LO.CPlusPlus20)
53       return LangStandard::lang_cxx20;
54     if (LO.CPlusPlus17)
55       return LangStandard::lang_cxx17;
56     if (LO.CPlusPlus14)
57       return LangStandard::lang_cxx14;
58     if (LO.CPlusPlus11)
59       return LangStandard::lang_cxx11;
60     return LangStandard::lang_cxx98;
61   }
62   if (LO.C23)
63     return LangStandard::lang_c23;
64   // C17 has no new features, so treat {C11,C17} as C17.
65   if (LO.C11)
66     return LangStandard::lang_c17;
67   return LangStandard::lang_c99;
68 }
69 
70 std::string buildUmbrella(llvm::StringLiteral Mandatory,
71                           llvm::ArrayRef<tooling::stdlib::Header> Headers) {
72   std::string Result;
73   llvm::raw_string_ostream OS(Result);
74 
75   // We __has_include guard all our #includes to avoid errors when using older
76   // stdlib version that don't have headers for the newest language standards.
77   // But make sure we get *some* error if things are totally broken.
78   OS << llvm::formatv(
79       "#if !__has_include(<{0}>)\n"
80       "#error Mandatory header <{0}> not found in standard library!\n"
81       "#endif\n",
82       Mandatory);
83 
84   for (auto Header : Headers) {
85     OS << llvm::formatv("#if __has_include({0})\n"
86                         "#include {0}\n"
87                         "#endif\n",
88                         Header);
89   }
90   return Result;
91 }
92 
93 } // namespace
94 
95 llvm::StringRef getStdlibUmbrellaHeader(const LangOptions &LO) {
96   // The umbrella header is the same for all versions of each language.
97   // Headers that are unsupported in old lang versions are usually guarded by
98   // #if. Some headers may be not present in old stdlib versions, the umbrella
99   // header guards with __has_include for this purpose.
100   Lang L = langFromOpts(LO);
101   switch (L) {
102   case CXX:
103     static std::string *UmbrellaCXX = new std::string(buildUmbrella(
104         mandatoryHeader(L),
105         tooling::stdlib::Header::all(tooling::stdlib::Lang::CXX)));
106     return *UmbrellaCXX;
107   case C:
108     static std::string *UmbrellaC = new std::string(
109         buildUmbrella(mandatoryHeader(L),
110                       tooling::stdlib::Header::all(tooling::stdlib::Lang::C)));
111     return *UmbrellaC;
112   }
113   llvm_unreachable("invalid Lang in langFromOpts");
114 }
115 
116 namespace {
117 
118 // Including the standard library leaks unwanted transitively included symbols.
119 //
120 // We want to drop these, they're a bit tricky to identify:
121 //  - we don't want to limit to symbols on our list, as our list has only
122 //    top-level symbols (and there may be legitimate stdlib extensions).
123 //  - we can't limit to only symbols defined in known stdlib headers, as stdlib
124 //    internal structure is murky
125 //  - we can't strictly require symbols to come from a particular path, e.g.
126 //      libstdc++ is mostly under /usr/include/c++/10/...
127 //      but std::ctype_base is under /usr/include/<platform>/c++/10/...
128 // We require the symbol to come from a header that is *either* from
129 // the standard library path (as identified by the location of <vector>), or
130 // another header that defines a symbol from our stdlib list.
131 SymbolSlab filter(SymbolSlab Slab, const StdLibLocation &Loc) {
132   SymbolSlab::Builder Result;
133 
134   static auto &StandardHeaders = *[] {
135     auto *Set = new llvm::DenseSet<llvm::StringRef>();
136     for (auto Header : tooling::stdlib::Header::all(tooling::stdlib::Lang::CXX))
137       Set->insert(Header.name());
138     for (auto Header : tooling::stdlib::Header::all(tooling::stdlib::Lang::C))
139       Set->insert(Header.name());
140     return Set;
141   }();
142 
143   // Form prefixes like file:///usr/include/c++/10/
144   // These can be trivially prefix-compared with URIs in the indexed symbols.
145   llvm::SmallVector<std::string> StdLibURIPrefixes;
146   for (const auto &Path : Loc.Paths) {
147     StdLibURIPrefixes.push_back(URI::create(Path).toString());
148     if (StdLibURIPrefixes.back().back() != '/')
149       StdLibURIPrefixes.back().push_back('/');
150   }
151   // For each header URI, is it *either* prefixed by StdLibURIPrefixes *or*
152   // owner of a symbol whose insertable header is in StandardHeaders?
153   // Pointer key because strings in a SymbolSlab are interned.
154   llvm::DenseMap<const char *, bool> GoodHeader;
155   for (const Symbol &S : Slab) {
156     if (!S.IncludeHeaders.empty() &&
157         StandardHeaders.contains(S.IncludeHeaders.front().IncludeHeader)) {
158       GoodHeader[S.CanonicalDeclaration.FileURI] = true;
159       GoodHeader[S.Definition.FileURI] = true;
160       continue;
161     }
162     for (const char *URI :
163          {S.CanonicalDeclaration.FileURI, S.Definition.FileURI}) {
164       auto R = GoodHeader.try_emplace(URI, false);
165       if (R.second) {
166         R.first->second = llvm::any_of(
167             StdLibURIPrefixes,
168             [&, URIStr(llvm::StringRef(URI))](const std::string &Prefix) {
169               return URIStr.starts_with(Prefix);
170             });
171       }
172     }
173   }
174 #ifndef NDEBUG
175   for (const auto &Good : GoodHeader)
176     if (Good.second && *Good.first)
177       dlog("Stdlib header: {0}", Good.first);
178 #endif
179   // Empty URIs aren't considered good. (Definition can be blank).
180   auto IsGoodHeader = [&](const char *C) { return *C && GoodHeader.lookup(C); };
181 
182   for (const Symbol &S : Slab) {
183     if (!(IsGoodHeader(S.CanonicalDeclaration.FileURI) ||
184           IsGoodHeader(S.Definition.FileURI))) {
185       dlog("Ignoring wrong-header symbol {0}{1} in {2}", S.Scope, S.Name,
186            S.CanonicalDeclaration.FileURI);
187       continue;
188     }
189     Result.insert(S);
190   }
191 
192   return std::move(Result).build();
193 }
194 
195 } // namespace
196 
197 SymbolSlab indexStandardLibrary(llvm::StringRef HeaderSources,
198                                 std::unique_ptr<CompilerInvocation> CI,
199                                 const StdLibLocation &Loc,
200                                 const ThreadsafeFS &TFS) {
201   if (CI->getFrontendOpts().Inputs.size() != 1 ||
202       !CI->getPreprocessorOpts().ImplicitPCHInclude.empty()) {
203     elog("Indexing standard library failed: bad CompilerInvocation");
204     assert(false && "indexing stdlib with a dubious CompilerInvocation!");
205     return SymbolSlab();
206   }
207   const FrontendInputFile &Input = CI->getFrontendOpts().Inputs.front();
208   trace::Span Tracer("StandardLibraryIndex");
209   LangStandard::Kind LangStd = standardFromOpts(CI->getLangOpts());
210   log("Indexing {0} standard library in the context of {1}",
211       LangStandard::getLangStandardForKind(LangStd).getName(), Input.getFile());
212 
213   SymbolSlab Symbols;
214   IgnoreDiagnostics IgnoreDiags;
215   // CompilerInvocation is taken from elsewhere, and may map a dirty buffer.
216   CI->getPreprocessorOpts().clearRemappedFiles();
217   auto Clang = prepareCompilerInstance(
218       std::move(CI), /*Preamble=*/nullptr,
219       llvm::MemoryBuffer::getMemBuffer(HeaderSources, Input.getFile()),
220       TFS.view(/*CWD=*/std::nullopt), IgnoreDiags);
221   if (!Clang) {
222     elog("Standard Library Index: Couldn't build compiler instance");
223     return Symbols;
224   }
225 
226   SymbolCollector::Options IndexOpts;
227   IndexOpts.Origin = SymbolOrigin::StdLib;
228   IndexOpts.CollectMainFileSymbols = false;
229   IndexOpts.CollectMainFileRefs = false;
230   IndexOpts.CollectMacro = true;
231   IndexOpts.StoreAllDocumentation = true;
232   // Sadly we can't use IndexOpts.FileFilter to restrict indexing scope.
233   // Files from outside the StdLibLocation may define true std symbols anyway.
234   // We end up "blessing" such headers, and can only do that by indexing
235   // everything first.
236 
237   // Refs, relations, include graph in the stdlib mostly aren't useful.
238   auto Action = createStaticIndexingAction(
239       IndexOpts, [&](SymbolSlab S) { Symbols = std::move(S); }, nullptr,
240       nullptr, nullptr);
241 
242   if (!Action->BeginSourceFile(*Clang, Input)) {
243     elog("Standard Library Index: BeginSourceFile() failed");
244     return Symbols;
245   }
246 
247   if (llvm::Error Err = Action->Execute()) {
248     elog("Standard Library Index: Execute failed: {0}", std::move(Err));
249     return Symbols;
250   }
251 
252   Action->EndSourceFile();
253 
254   unsigned SymbolsBeforeFilter = Symbols.size();
255   Symbols = filter(std::move(Symbols), Loc);
256   bool Errors = Clang->hasDiagnostics() &&
257                 Clang->getDiagnostics().hasUncompilableErrorOccurred();
258   log("Indexed {0} standard library{3}: {1} symbols, {2} filtered",
259       LangStandard::getLangStandardForKind(LangStd).getName(), Symbols.size(),
260       SymbolsBeforeFilter - Symbols.size(),
261       Errors ? " (incomplete due to errors)" : "");
262   SPAN_ATTACH(Tracer, "symbols", int(Symbols.size()));
263   return Symbols;
264 }
265 
266 SymbolSlab indexStandardLibrary(std::unique_ptr<CompilerInvocation> Invocation,
267                                 const StdLibLocation &Loc,
268                                 const ThreadsafeFS &TFS) {
269   llvm::StringRef Header = getStdlibUmbrellaHeader(Invocation->getLangOpts());
270   return indexStandardLibrary(Header, std::move(Invocation), Loc, TFS);
271 }
272 
273 bool StdLibSet::isBest(const LangOptions &LO) const {
274   return standardFromOpts(LO) >=
275          Best[langFromOpts(LO)].load(std::memory_order_acquire);
276 }
277 
278 std::optional<StdLibLocation> StdLibSet::add(const LangOptions &LO,
279                                              const HeaderSearch &HS) {
280   Lang L = langFromOpts(LO);
281   int OldVersion = Best[L].load(std::memory_order_acquire);
282   int NewVersion = standardFromOpts(LO);
283   dlog("Index stdlib? {0}",
284        LangStandard::getLangStandardForKind(standardFromOpts(LO)).getName());
285 
286   if (!Config::current().Index.StandardLibrary) {
287     dlog("No: disabled in config");
288     return std::nullopt;
289   }
290 
291   if (NewVersion <= OldVersion) {
292     dlog("No: have {0}, {1}>={2}",
293          LangStandard::getLangStandardForKind(
294              static_cast<LangStandard::Kind>(NewVersion))
295              .getName(),
296          OldVersion, NewVersion);
297     return std::nullopt;
298   }
299 
300   // We'd like to index a standard library here if there is one.
301   // Check for the existence of <vector> on the search path.
302   // We could cache this, but we only get here repeatedly when there's no
303   // stdlib, and even then only once per preamble build.
304   llvm::StringLiteral ProbeHeader = mandatoryHeader(L);
305   llvm::SmallString<256> Path; // Scratch space.
306   llvm::SmallVector<std::string> SearchPaths;
307   auto RecordHeaderPath = [&](llvm::StringRef HeaderPath) {
308     llvm::StringRef DirPath = llvm::sys::path::parent_path(HeaderPath);
309     if (!HS.getFileMgr().getVirtualFileSystem().getRealPath(DirPath, Path))
310       SearchPaths.emplace_back(Path);
311   };
312   for (const auto &DL :
313        llvm::make_range(HS.search_dir_begin(), HS.search_dir_end())) {
314     switch (DL.getLookupType()) {
315     case DirectoryLookup::LT_NormalDir: {
316       Path = DL.getDirRef()->getName();
317       llvm::sys::path::append(Path, ProbeHeader);
318       llvm::vfs::Status Stat;
319       if (!HS.getFileMgr().getNoncachedStatValue(Path, Stat) &&
320           Stat.isRegularFile())
321         RecordHeaderPath(Path);
322       break;
323     }
324     case DirectoryLookup::LT_Framework:
325       // stdlib can't be a framework (framework includes must have a slash)
326       continue;
327     case DirectoryLookup::LT_HeaderMap:
328       llvm::StringRef Target =
329           DL.getHeaderMap()->lookupFilename(ProbeHeader, Path);
330       if (!Target.empty())
331         RecordHeaderPath(Target);
332       break;
333     }
334   }
335   if (SearchPaths.empty())
336     return std::nullopt;
337 
338   dlog("Found standard library in {0}", llvm::join(SearchPaths, ", "));
339 
340   while (!Best[L].compare_exchange_weak(OldVersion, NewVersion,
341                                         std::memory_order_acq_rel))
342     if (OldVersion >= NewVersion) {
343       dlog("No: lost the race");
344       return std::nullopt; // Another thread won the race while we were
345                            // checking.
346     }
347 
348   dlog("Yes, index stdlib!");
349   return StdLibLocation{std::move(SearchPaths)};
350 }
351 
352 } // namespace clangd
353 } // namespace clang
354