1 //===--- Preamble.h - Reusing expensive parts of the AST ---------*- C++-*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // The vast majority of code in a typical translation unit is in the headers 10 // included at the top of the file. 11 // 12 // The preamble optimization says that we can parse this code once, and reuse 13 // the result multiple times. The preamble is invalidated by changes to the 14 // code in the preamble region, to the compile command, or to files on disk. 15 // 16 // This is the most important optimization in clangd: it allows operations like 17 // code-completion to have sub-second latency. It is supported by the 18 // PrecompiledPreamble functionality in clang, which wraps the techniques used 19 // by PCH files, modules etc into a convenient interface. 20 // 21 //===----------------------------------------------------------------------===// 22 #ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_PREAMBLE_H 23 #define LLVM_CLANG_TOOLS_EXTRA_CLANGD_PREAMBLE_H 24 25 #include "CollectMacros.h" 26 #include "Compiler.h" 27 #include "Diagnostics.h" 28 #include "FS.h" 29 #include "Headers.h" 30 #include "ModulesBuilder.h" 31 32 #include "clang-include-cleaner/Record.h" 33 #include "support/Path.h" 34 #include "clang/Basic/SourceManager.h" 35 #include "clang/Basic/TargetOptions.h" 36 #include "clang/Frontend/CompilerInvocation.h" 37 #include "clang/Frontend/PrecompiledPreamble.h" 38 #include "clang/Lex/Lexer.h" 39 #include "clang/Tooling/CompilationDatabase.h" 40 #include "llvm/ADT/ArrayRef.h" 41 #include "llvm/ADT/StringRef.h" 42 43 #include <cstddef> 44 #include <functional> 45 #include <memory> 46 #include <string> 47 #include <utility> 48 #include <vector> 49 50 namespace clang { 51 namespace clangd { 52 53 /// The captured AST context. 54 /// Keeps necessary structs for an ASTContext and Preprocessor alive. 55 /// This enables consuming them after context that produced the AST is gone. 56 /// (e.g. indexing a preamble ast on a separate thread). ASTContext stored 57 /// inside is still not thread-safe. 58 59 struct CapturedASTCtx { 60 public: 61 CapturedASTCtx(CompilerInstance &Clang) 62 : Invocation(Clang.getInvocationPtr()), 63 Diagnostics(Clang.getDiagnosticsPtr()), Target(Clang.getTargetPtr()), 64 AuxTarget(Clang.getAuxTarget()), FileMgr(Clang.getFileManagerPtr()), 65 SourceMgr(Clang.getSourceManagerPtr()), PP(Clang.getPreprocessorPtr()), 66 Context(Clang.getASTContextPtr()) {} 67 68 CapturedASTCtx(const CapturedASTCtx &) = delete; 69 CapturedASTCtx &operator=(const CapturedASTCtx &) = delete; 70 CapturedASTCtx(CapturedASTCtx &&) = default; 71 CapturedASTCtx &operator=(CapturedASTCtx &&) = default; 72 73 ASTContext &getASTContext() { return *Context; } 74 Preprocessor &getPreprocessor() { return *PP; } 75 CompilerInvocation &getCompilerInvocation() { return *Invocation; } 76 FileManager &getFileManager() { return *FileMgr; } 77 void setStatCache(std::shared_ptr<PreambleFileStatusCache> StatCache) { 78 this->StatCache = StatCache; 79 } 80 81 private: 82 std::shared_ptr<CompilerInvocation> Invocation; 83 IntrusiveRefCntPtr<DiagnosticsEngine> Diagnostics; 84 IntrusiveRefCntPtr<TargetInfo> Target; 85 IntrusiveRefCntPtr<TargetInfo> AuxTarget; 86 IntrusiveRefCntPtr<FileManager> FileMgr; 87 IntrusiveRefCntPtr<SourceManager> SourceMgr; 88 std::shared_ptr<Preprocessor> PP; 89 IntrusiveRefCntPtr<ASTContext> Context; 90 std::shared_ptr<PreambleFileStatusCache> StatCache; 91 }; 92 93 /// The parsed preamble and associated data. 94 /// 95 /// As we must avoid re-parsing the preamble, any information that can only 96 /// be obtained during parsing must be eagerly captured and stored here. 97 struct PreambleData { 98 PreambleData(PrecompiledPreamble Preamble) : Preamble(std::move(Preamble)) {} 99 100 // Version of the ParseInputs this preamble was built from. 101 std::string Version; 102 tooling::CompileCommand CompileCommand; 103 // Target options used when building the preamble. Changes in target can cause 104 // crashes when deserializing preamble, this enables consumers to use the 105 // same target (without reparsing CompileCommand). 106 std::shared_ptr<TargetOptions> TargetOpts = nullptr; 107 PrecompiledPreamble Preamble; 108 std::vector<Diag> Diags; 109 // Processes like code completions and go-to-definitions will need #include 110 // information, and their compile action skips preamble range. 111 IncludeStructure Includes; 112 // Captures #include-mapping information in #included headers. 113 std::shared_ptr<const include_cleaner::PragmaIncludes> Pragmas; 114 // Information about required module files for this preamble. 115 std::unique_ptr<PrerequisiteModules> RequiredModules; 116 // Macros defined in the preamble section of the main file. 117 // Users care about headers vs main-file, not preamble vs non-preamble. 118 // These should be treated as main-file entities e.g. for code completion. 119 MainFileMacros Macros; 120 // Pragma marks defined in the preamble section of the main file. 121 std::vector<PragmaMark> Marks; 122 // Cache of FS operations performed when building the preamble. 123 // When reusing a preamble, this cache can be consumed to save IO. 124 std::shared_ptr<PreambleFileStatusCache> StatCache; 125 // Whether there was a (possibly-incomplete) include-guard on the main file. 126 // We need to propagate this information "by hand" to subsequent parses. 127 bool MainIsIncludeGuarded = false; 128 }; 129 130 using PreambleParsedCallback = 131 std::function<void(CapturedASTCtx ASTCtx, 132 std::shared_ptr<const include_cleaner::PragmaIncludes>)>; 133 134 /// Timings and statistics from the premble build. Unlike PreambleData, these 135 /// do not need to be stored for later, but can be useful for logging, metrics, 136 /// etc. 137 struct PreambleBuildStats { 138 /// Total wall time it took to build preamble, in seconds. 139 double TotalBuildTime; 140 /// Time spent in filesystem operations during the build, in seconds. 141 double FileSystemTime; 142 143 /// Estimate of the memory used while building the preamble. 144 /// This memory has been released when buildPreamble returns. 145 /// For example, this includes the size of the in-memory AST (ASTContext). 146 size_t BuildSize; 147 /// The serialized size of the preamble. 148 /// This storage is needed while the preamble is used (but may be on disk). 149 size_t SerializedSize; 150 }; 151 152 /// Build a preamble for the new inputs unless an old one can be reused. 153 /// If \p PreambleCallback is set, it will be run on top of the AST while 154 /// building the preamble. 155 /// If Stats is not non-null, build statistics will be exported there. 156 std::shared_ptr<const PreambleData> 157 buildPreamble(PathRef FileName, CompilerInvocation CI, 158 const ParseInputs &Inputs, bool StoreInMemory, 159 PreambleParsedCallback PreambleCallback, 160 PreambleBuildStats *Stats = nullptr); 161 162 /// Returns true if \p Preamble is reusable for \p Inputs. Note that it will 163 /// return true when some missing headers are now available. 164 /// FIXME: Should return more information about the delta between \p Preamble 165 /// and \p Inputs, e.g. new headers. 166 bool isPreambleCompatible(const PreambleData &Preamble, 167 const ParseInputs &Inputs, PathRef FileName, 168 const CompilerInvocation &CI); 169 170 /// Stores information required to parse a TU using a (possibly stale) Baseline 171 /// preamble. Later on this information can be injected into the main file by 172 /// updating compiler invocation with \c apply. This injected section 173 /// approximately reflects additions to the preamble in Modified contents, e.g. 174 /// new include directives. 175 class PreamblePatch { 176 public: 177 enum class PatchType { MacroDirectives, All }; 178 /// \p Preamble is used verbatim. 179 static PreamblePatch unmodified(const PreambleData &Preamble); 180 /// Builds a patch that contains new PP directives introduced to the preamble 181 /// section of \p Modified compared to \p Baseline. 182 /// FIXME: This only handles include directives, we should at least handle 183 /// define/undef. 184 static PreamblePatch createFullPatch(llvm::StringRef FileName, 185 const ParseInputs &Modified, 186 const PreambleData &Baseline); 187 static PreamblePatch createMacroPatch(llvm::StringRef FileName, 188 const ParseInputs &Modified, 189 const PreambleData &Baseline); 190 /// Returns the FileEntry for the preamble patch of MainFilePath in SM, if 191 /// any. 192 static OptionalFileEntryRef getPatchEntry(llvm::StringRef MainFilePath, 193 const SourceManager &SM); 194 195 /// Adjusts CI (which compiles the modified inputs) to be used with the 196 /// baseline preamble. This is done by inserting an artificial include to the 197 /// \p CI that contains new directives calculated in create. 198 void apply(CompilerInvocation &CI) const; 199 200 /// Returns #include directives from the \c Modified preamble that were 201 /// resolved using the \c Baseline preamble. This covers the new locations of 202 /// inclusions that were moved around, but not inclusions of new files. Those 203 /// will be recorded when parsing the main file: the includes in the injected 204 /// section will be resolved back to their spelled positions in the main file 205 /// using the presumed-location mechanism. 206 std::vector<Inclusion> preambleIncludes() const; 207 208 /// Returns preamble bounds for the Modified. 209 PreambleBounds modifiedBounds() const { return ModifiedBounds; } 210 211 /// Returns textual patch contents. 212 llvm::StringRef text() const { return PatchContents; } 213 214 /// Returns diag locations for Modified contents. 215 llvm::ArrayRef<Diag> patchedDiags() const { return PatchedDiags; } 216 217 static constexpr llvm::StringLiteral HeaderName = "__preamble_patch__.h"; 218 219 llvm::ArrayRef<PragmaMark> marks() const; 220 const MainFileMacros &mainFileMacros() const; 221 222 private: 223 static PreamblePatch create(llvm::StringRef FileName, 224 const ParseInputs &Modified, 225 const PreambleData &Baseline, 226 PatchType PatchType); 227 228 PreamblePatch() = default; 229 std::string PatchContents; 230 std::string PatchFileName; 231 // Includes that are present in both Baseline and Modified. Used for 232 // patching includes of baseline preamble. 233 std::vector<Inclusion> PreambleIncludes; 234 // Diags that were attached to a line preserved in Modified contents. 235 std::vector<Diag> PatchedDiags; 236 PreambleBounds ModifiedBounds = {0, false}; 237 const PreambleData *Baseline = nullptr; 238 std::vector<PragmaMark> PatchedMarks; 239 MainFileMacros PatchedMacros; 240 }; 241 242 } // namespace clangd 243 } // namespace clang 244 245 #endif // LLVM_CLANG_TOOLS_EXTRA_CLANGD_PREAMBLE_H 246