xref: /llvm-project/clang-tools-extra/clangd/Preamble.h (revision fe6c24000f2d7316899d4ec4c12273892326ed47)
1 //===--- Preamble.h - Reusing expensive parts of the AST ---------*- C++-*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // The vast majority of code in a typical translation unit is in the headers
10 // included at the top of the file.
11 //
12 // The preamble optimization says that we can parse this code once, and reuse
13 // the result multiple times. The preamble is invalidated by changes to the
14 // code in the preamble region, to the compile command, or to files on disk.
15 //
16 // This is the most important optimization in clangd: it allows operations like
17 // code-completion to have sub-second latency. It is supported by the
18 // PrecompiledPreamble functionality in clang, which wraps the techniques used
19 // by PCH files, modules etc into a convenient interface.
20 //
21 //===----------------------------------------------------------------------===//
22 #ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_PREAMBLE_H
23 #define LLVM_CLANG_TOOLS_EXTRA_CLANGD_PREAMBLE_H
24 
25 #include "CollectMacros.h"
26 #include "Compiler.h"
27 #include "Diagnostics.h"
28 #include "FS.h"
29 #include "Headers.h"
30 #include "ModulesBuilder.h"
31 
32 #include "clang-include-cleaner/Record.h"
33 #include "support/Path.h"
34 #include "clang/Basic/SourceManager.h"
35 #include "clang/Basic/TargetOptions.h"
36 #include "clang/Frontend/CompilerInvocation.h"
37 #include "clang/Frontend/PrecompiledPreamble.h"
38 #include "clang/Lex/Lexer.h"
39 #include "clang/Tooling/CompilationDatabase.h"
40 #include "llvm/ADT/ArrayRef.h"
41 #include "llvm/ADT/StringRef.h"
42 
43 #include <cstddef>
44 #include <functional>
45 #include <memory>
46 #include <string>
47 #include <utility>
48 #include <vector>
49 
50 namespace clang {
51 namespace clangd {
52 
53 /// The captured AST context.
54 /// Keeps necessary structs for an ASTContext and Preprocessor alive.
55 /// This enables consuming them after context that produced the AST is gone.
56 /// (e.g. indexing a preamble ast on a separate thread). ASTContext stored
57 /// inside is still not thread-safe.
58 
59 struct CapturedASTCtx {
60 public:
61   CapturedASTCtx(CompilerInstance &Clang)
62       : Invocation(Clang.getInvocationPtr()),
63         Diagnostics(Clang.getDiagnosticsPtr()), Target(Clang.getTargetPtr()),
64         AuxTarget(Clang.getAuxTarget()), FileMgr(Clang.getFileManagerPtr()),
65         SourceMgr(Clang.getSourceManagerPtr()), PP(Clang.getPreprocessorPtr()),
66         Context(Clang.getASTContextPtr()) {}
67 
68   CapturedASTCtx(const CapturedASTCtx &) = delete;
69   CapturedASTCtx &operator=(const CapturedASTCtx &) = delete;
70   CapturedASTCtx(CapturedASTCtx &&) = default;
71   CapturedASTCtx &operator=(CapturedASTCtx &&) = default;
72 
73   ASTContext &getASTContext() { return *Context; }
74   Preprocessor &getPreprocessor() { return *PP; }
75   CompilerInvocation &getCompilerInvocation() { return *Invocation; }
76   FileManager &getFileManager() { return *FileMgr; }
77   void setStatCache(std::shared_ptr<PreambleFileStatusCache> StatCache) {
78     this->StatCache = StatCache;
79   }
80 
81 private:
82   std::shared_ptr<CompilerInvocation> Invocation;
83   IntrusiveRefCntPtr<DiagnosticsEngine> Diagnostics;
84   IntrusiveRefCntPtr<TargetInfo> Target;
85   IntrusiveRefCntPtr<TargetInfo> AuxTarget;
86   IntrusiveRefCntPtr<FileManager> FileMgr;
87   IntrusiveRefCntPtr<SourceManager> SourceMgr;
88   std::shared_ptr<Preprocessor> PP;
89   IntrusiveRefCntPtr<ASTContext> Context;
90   std::shared_ptr<PreambleFileStatusCache> StatCache;
91 };
92 
93 /// The parsed preamble and associated data.
94 ///
95 /// As we must avoid re-parsing the preamble, any information that can only
96 /// be obtained during parsing must be eagerly captured and stored here.
97 struct PreambleData {
98   PreambleData(PrecompiledPreamble Preamble) : Preamble(std::move(Preamble)) {}
99 
100   // Version of the ParseInputs this preamble was built from.
101   std::string Version;
102   tooling::CompileCommand CompileCommand;
103   // Target options used when building the preamble. Changes in target can cause
104   // crashes when deserializing preamble, this enables consumers to use the
105   // same target (without reparsing CompileCommand).
106   std::shared_ptr<TargetOptions> TargetOpts = nullptr;
107   PrecompiledPreamble Preamble;
108   std::vector<Diag> Diags;
109   // Processes like code completions and go-to-definitions will need #include
110   // information, and their compile action skips preamble range.
111   IncludeStructure Includes;
112   // Captures #include-mapping information in #included headers.
113   std::shared_ptr<const include_cleaner::PragmaIncludes> Pragmas;
114   // Information about required module files for this preamble.
115   std::unique_ptr<PrerequisiteModules> RequiredModules;
116   // Macros defined in the preamble section of the main file.
117   // Users care about headers vs main-file, not preamble vs non-preamble.
118   // These should be treated as main-file entities e.g. for code completion.
119   MainFileMacros Macros;
120   // Pragma marks defined in the preamble section of the main file.
121   std::vector<PragmaMark> Marks;
122   // Cache of FS operations performed when building the preamble.
123   // When reusing a preamble, this cache can be consumed to save IO.
124   std::shared_ptr<PreambleFileStatusCache> StatCache;
125   // Whether there was a (possibly-incomplete) include-guard on the main file.
126   // We need to propagate this information "by hand" to subsequent parses.
127   bool MainIsIncludeGuarded = false;
128 };
129 
130 using PreambleParsedCallback =
131     std::function<void(CapturedASTCtx ASTCtx,
132                        std::shared_ptr<const include_cleaner::PragmaIncludes>)>;
133 
134 /// Timings and statistics from the premble build. Unlike PreambleData, these
135 /// do not need to be stored for later, but can be useful for logging, metrics,
136 /// etc.
137 struct PreambleBuildStats {
138   /// Total wall time it took to build preamble, in seconds.
139   double TotalBuildTime;
140   /// Time spent in filesystem operations during the build, in seconds.
141   double FileSystemTime;
142 
143   /// Estimate of the memory used while building the preamble.
144   /// This memory has been released when buildPreamble returns.
145   /// For example, this includes the size of the in-memory AST (ASTContext).
146   size_t BuildSize;
147   /// The serialized size of the preamble.
148   /// This storage is needed while the preamble is used (but may be on disk).
149   size_t SerializedSize;
150 };
151 
152 /// Build a preamble for the new inputs unless an old one can be reused.
153 /// If \p PreambleCallback is set, it will be run on top of the AST while
154 /// building the preamble.
155 /// If Stats is not non-null, build statistics will be exported there.
156 std::shared_ptr<const PreambleData>
157 buildPreamble(PathRef FileName, CompilerInvocation CI,
158               const ParseInputs &Inputs, bool StoreInMemory,
159               PreambleParsedCallback PreambleCallback,
160               PreambleBuildStats *Stats = nullptr);
161 
162 /// Returns true if \p Preamble is reusable for \p Inputs. Note that it will
163 /// return true when some missing headers are now available.
164 /// FIXME: Should return more information about the delta between \p Preamble
165 /// and \p Inputs, e.g. new headers.
166 bool isPreambleCompatible(const PreambleData &Preamble,
167                           const ParseInputs &Inputs, PathRef FileName,
168                           const CompilerInvocation &CI);
169 
170 /// Stores information required to parse a TU using a (possibly stale) Baseline
171 /// preamble. Later on this information can be injected into the main file by
172 /// updating compiler invocation with \c apply. This injected section
173 /// approximately reflects additions to the preamble in Modified contents, e.g.
174 /// new include directives.
175 class PreamblePatch {
176 public:
177   enum class PatchType { MacroDirectives, All };
178   /// \p Preamble is used verbatim.
179   static PreamblePatch unmodified(const PreambleData &Preamble);
180   /// Builds a patch that contains new PP directives introduced to the preamble
181   /// section of \p Modified compared to \p Baseline.
182   /// FIXME: This only handles include directives, we should at least handle
183   /// define/undef.
184   static PreamblePatch createFullPatch(llvm::StringRef FileName,
185                                        const ParseInputs &Modified,
186                                        const PreambleData &Baseline);
187   static PreamblePatch createMacroPatch(llvm::StringRef FileName,
188                                         const ParseInputs &Modified,
189                                         const PreambleData &Baseline);
190   /// Returns the FileEntry for the preamble patch of MainFilePath in SM, if
191   /// any.
192   static OptionalFileEntryRef getPatchEntry(llvm::StringRef MainFilePath,
193                                             const SourceManager &SM);
194 
195   /// Adjusts CI (which compiles the modified inputs) to be used with the
196   /// baseline preamble. This is done by inserting an artificial include to the
197   /// \p CI that contains new directives calculated in create.
198   void apply(CompilerInvocation &CI) const;
199 
200   /// Returns #include directives from the \c Modified preamble that were
201   /// resolved using the \c Baseline preamble. This covers the new locations of
202   /// inclusions that were moved around, but not inclusions of new files. Those
203   /// will be recorded when parsing the main file: the includes in the injected
204   /// section will be resolved back to their spelled positions in the main file
205   /// using the presumed-location mechanism.
206   std::vector<Inclusion> preambleIncludes() const;
207 
208   /// Returns preamble bounds for the Modified.
209   PreambleBounds modifiedBounds() const { return ModifiedBounds; }
210 
211   /// Returns textual patch contents.
212   llvm::StringRef text() const { return PatchContents; }
213 
214   /// Returns diag locations for Modified contents.
215   llvm::ArrayRef<Diag> patchedDiags() const { return PatchedDiags; }
216 
217   static constexpr llvm::StringLiteral HeaderName = "__preamble_patch__.h";
218 
219   llvm::ArrayRef<PragmaMark> marks() const;
220   const MainFileMacros &mainFileMacros() const;
221 
222 private:
223   static PreamblePatch create(llvm::StringRef FileName,
224                               const ParseInputs &Modified,
225                               const PreambleData &Baseline,
226                               PatchType PatchType);
227 
228   PreamblePatch() = default;
229   std::string PatchContents;
230   std::string PatchFileName;
231   // Includes that are present in both Baseline and Modified. Used for
232   // patching includes of baseline preamble.
233   std::vector<Inclusion> PreambleIncludes;
234   // Diags that were attached to a line preserved in Modified contents.
235   std::vector<Diag> PatchedDiags;
236   PreambleBounds ModifiedBounds = {0, false};
237   const PreambleData *Baseline = nullptr;
238   std::vector<PragmaMark> PatchedMarks;
239   MainFileMacros PatchedMacros;
240 };
241 
242 } // namespace clangd
243 } // namespace clang
244 
245 #endif // LLVM_CLANG_TOOLS_EXTRA_CLANGD_PREAMBLE_H
246