xref: /llvm-project/clang-tools-extra/clangd/Preamble.cpp (revision 0865ecc5150b9a55ba1f9e30b6d463a66ac362a6)
1 //===--- Preamble.cpp - Reusing expensive parts of the AST ----------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "Preamble.h"
10 #include "CollectMacros.h"
11 #include "Compiler.h"
12 #include "Config.h"
13 #include "Diagnostics.h"
14 #include "FS.h"
15 #include "FeatureModule.h"
16 #include "Headers.h"
17 #include "Protocol.h"
18 #include "SourceCode.h"
19 #include "clang-include-cleaner/Record.h"
20 #include "support/Logger.h"
21 #include "support/Path.h"
22 #include "support/ThreadsafeFS.h"
23 #include "support/Trace.h"
24 #include "clang/AST/DeclTemplate.h"
25 #include "clang/AST/Type.h"
26 #include "clang/Basic/Diagnostic.h"
27 #include "clang/Basic/DiagnosticLex.h"
28 #include "clang/Basic/DiagnosticOptions.h"
29 #include "clang/Basic/LangOptions.h"
30 #include "clang/Basic/SourceLocation.h"
31 #include "clang/Basic/SourceManager.h"
32 #include "clang/Basic/TargetInfo.h"
33 #include "clang/Basic/TokenKinds.h"
34 #include "clang/Frontend/CompilerInstance.h"
35 #include "clang/Frontend/CompilerInvocation.h"
36 #include "clang/Frontend/FrontendActions.h"
37 #include "clang/Frontend/PrecompiledPreamble.h"
38 #include "clang/Lex/HeaderSearch.h"
39 #include "clang/Lex/Lexer.h"
40 #include "clang/Lex/PPCallbacks.h"
41 #include "clang/Lex/Preprocessor.h"
42 #include "clang/Lex/PreprocessorOptions.h"
43 #include "clang/Serialization/ASTReader.h"
44 #include "clang/Tooling/CompilationDatabase.h"
45 #include "llvm/ADT/ArrayRef.h"
46 #include "llvm/ADT/DenseMap.h"
47 #include "llvm/ADT/IntrusiveRefCntPtr.h"
48 #include "llvm/ADT/STLExtras.h"
49 #include "llvm/ADT/SmallString.h"
50 #include "llvm/ADT/SmallVector.h"
51 #include "llvm/ADT/StringExtras.h"
52 #include "llvm/ADT/StringMap.h"
53 #include "llvm/ADT/StringRef.h"
54 #include "llvm/Support/Casting.h"
55 #include "llvm/Support/Error.h"
56 #include "llvm/Support/ErrorHandling.h"
57 #include "llvm/Support/ErrorOr.h"
58 #include "llvm/Support/FormatVariadic.h"
59 #include "llvm/Support/MemoryBuffer.h"
60 #include "llvm/Support/Path.h"
61 #include "llvm/Support/VirtualFileSystem.h"
62 #include "llvm/Support/raw_ostream.h"
63 #include <cassert>
64 #include <chrono>
65 #include <cstddef>
66 #include <cstdint>
67 #include <cstdlib>
68 #include <functional>
69 #include <memory>
70 #include <optional>
71 #include <string>
72 #include <system_error>
73 #include <tuple>
74 #include <utility>
75 #include <vector>
76 
77 namespace clang {
78 namespace clangd {
79 namespace {
80 
81 bool compileCommandsAreEqual(const tooling::CompileCommand &LHS,
82                              const tooling::CompileCommand &RHS) {
83   // We don't check for Output, it should not matter to clangd.
84   return LHS.Directory == RHS.Directory && LHS.Filename == RHS.Filename &&
85          llvm::ArrayRef(LHS.CommandLine).equals(RHS.CommandLine);
86 }
87 
88 class CppFilePreambleCallbacks : public PreambleCallbacks {
89 public:
90   CppFilePreambleCallbacks(
91       PathRef File, PreambleBuildStats *Stats, bool ParseForwardingFunctions,
92       std::function<void(CompilerInstance &)> BeforeExecuteCallback)
93       : File(File), Stats(Stats),
94         ParseForwardingFunctions(ParseForwardingFunctions),
95         BeforeExecuteCallback(std::move(BeforeExecuteCallback)) {}
96 
97   IncludeStructure takeIncludes() { return std::move(Includes); }
98 
99   MainFileMacros takeMacros() { return std::move(Macros); }
100 
101   std::vector<PragmaMark> takeMarks() { return std::move(Marks); }
102 
103   include_cleaner::PragmaIncludes takePragmaIncludes() {
104     return std::move(Pragmas);
105   }
106 
107   std::optional<CapturedASTCtx> takeLife() { return std::move(CapturedCtx); }
108 
109   bool isMainFileIncludeGuarded() const { return IsMainFileIncludeGuarded; }
110 
111   void AfterExecute(CompilerInstance &CI) override {
112     // As part of the Preamble compilation, ASTConsumer
113     // PrecompilePreambleConsumer/PCHGenerator is setup. This would be called
114     // when Preamble consists of modules. Therefore while capturing AST context,
115     // we have to reset ast consumer and ASTMutationListener.
116     if (CI.getASTReader()) {
117       CI.getASTReader()->setDeserializationListener(nullptr);
118       // This just sets consumer to null when DeserializationListener is null.
119       CI.getASTReader()->StartTranslationUnit(nullptr);
120     }
121     CI.getASTContext().setASTMutationListener(nullptr);
122     CapturedCtx.emplace(CI);
123 
124     const SourceManager &SM = CI.getSourceManager();
125     OptionalFileEntryRef MainFE = SM.getFileEntryRefForID(SM.getMainFileID());
126     IsMainFileIncludeGuarded =
127         CI.getPreprocessor().getHeaderSearchInfo().isFileMultipleIncludeGuarded(
128             *MainFE);
129 
130     if (Stats) {
131       const ASTContext &AST = CI.getASTContext();
132       Stats->BuildSize = AST.getASTAllocatedMemory();
133       Stats->BuildSize += AST.getSideTableAllocatedMemory();
134       Stats->BuildSize += AST.Idents.getAllocator().getTotalMemory();
135       Stats->BuildSize += AST.Selectors.getTotalMemory();
136 
137       Stats->BuildSize += AST.getSourceManager().getContentCacheSize();
138       Stats->BuildSize += AST.getSourceManager().getDataStructureSizes();
139       Stats->BuildSize +=
140           AST.getSourceManager().getMemoryBufferSizes().malloc_bytes;
141 
142       const Preprocessor &PP = CI.getPreprocessor();
143       Stats->BuildSize += PP.getTotalMemory();
144       if (PreprocessingRecord *PRec = PP.getPreprocessingRecord())
145         Stats->BuildSize += PRec->getTotalMemory();
146       Stats->BuildSize += PP.getHeaderSearchInfo().getTotalMemory();
147     }
148   }
149 
150   void BeforeExecute(CompilerInstance &CI) override {
151     LangOpts = &CI.getLangOpts();
152     SourceMgr = &CI.getSourceManager();
153     PP = &CI.getPreprocessor();
154     Includes.collect(CI);
155     Pragmas.record(CI);
156     if (BeforeExecuteCallback)
157       BeforeExecuteCallback(CI);
158   }
159 
160   std::unique_ptr<PPCallbacks> createPPCallbacks() override {
161     assert(SourceMgr && LangOpts && PP &&
162            "SourceMgr, LangOpts and PP must be set at this point");
163 
164     return std::make_unique<PPChainedCallbacks>(
165         std::make_unique<CollectMainFileMacros>(*PP, Macros),
166         collectPragmaMarksCallback(*SourceMgr, Marks));
167   }
168 
169   static bool isLikelyForwardingFunction(FunctionTemplateDecl *FT) {
170     const auto *FD = FT->getTemplatedDecl();
171     const auto NumParams = FD->getNumParams();
172     // Check whether its last parameter is a parameter pack...
173     if (NumParams > 0) {
174       const auto *LastParam = FD->getParamDecl(NumParams - 1);
175       if (const auto *PET = dyn_cast<PackExpansionType>(LastParam->getType())) {
176         // ... of the type T&&... or T...
177         const auto BaseType = PET->getPattern().getNonReferenceType();
178         if (const auto *TTPT =
179                 dyn_cast<TemplateTypeParmType>(BaseType.getTypePtr())) {
180           // ... whose template parameter comes from the function directly
181           if (FT->getTemplateParameters()->getDepth() == TTPT->getDepth()) {
182             return true;
183           }
184         }
185       }
186     }
187     return false;
188   }
189 
190   bool shouldSkipFunctionBody(Decl *D) override {
191     // Usually we don't need to look inside the bodies of header functions
192     // to understand the program. However when forwarding function like
193     // emplace() forward their arguments to some other function, the
194     // interesting overload resolution happens inside the forwarding
195     // function's body. To provide more meaningful diagnostics,
196     // code completion, and parameter hints we should parse (and later
197     // instantiate) the bodies.
198     if (auto *FT = llvm::dyn_cast<clang::FunctionTemplateDecl>(D)) {
199       if (ParseForwardingFunctions) {
200         // Don't skip parsing the body if it looks like a forwarding function
201         if (isLikelyForwardingFunction(FT))
202           return false;
203       } else {
204         // By default, only take care of make_unique
205         // std::make_unique is trivial, and we diagnose bad constructor calls.
206         if (const auto *II = FT->getDeclName().getAsIdentifierInfo()) {
207           if (II->isStr("make_unique") && FT->isInStdNamespace())
208             return false;
209         }
210       }
211     }
212     return true;
213   }
214 
215 private:
216   PathRef File;
217   IncludeStructure Includes;
218   include_cleaner::PragmaIncludes Pragmas;
219   MainFileMacros Macros;
220   std::vector<PragmaMark> Marks;
221   bool IsMainFileIncludeGuarded = false;
222   const clang::LangOptions *LangOpts = nullptr;
223   const SourceManager *SourceMgr = nullptr;
224   const Preprocessor *PP = nullptr;
225   PreambleBuildStats *Stats;
226   bool ParseForwardingFunctions;
227   std::function<void(CompilerInstance &)> BeforeExecuteCallback;
228   std::optional<CapturedASTCtx> CapturedCtx;
229 };
230 
231 // Represents directives other than includes, where basic textual information is
232 // enough.
233 struct TextualPPDirective {
234   unsigned DirectiveLine;
235   // Full text that's representing the directive, including the `#`.
236   std::string Text;
237   unsigned Offset;
238   tok::PPKeywordKind Directive = tok::PPKeywordKind::pp_not_keyword;
239   // Name of the macro being defined in the case of a #define directive.
240   std::string MacroName;
241 
242   bool operator==(const TextualPPDirective &RHS) const {
243     return std::tie(DirectiveLine, Offset, Text) ==
244            std::tie(RHS.DirectiveLine, RHS.Offset, RHS.Text);
245   }
246 };
247 
248 // Formats a PP directive consisting of Prefix (e.g. "#define ") and Body ("X
249 // 10"). The formatting is copied so that the tokens in Body have PresumedLocs
250 // with correct columns and lines.
251 std::string spellDirective(llvm::StringRef Prefix,
252                            CharSourceRange DirectiveRange,
253                            const LangOptions &LangOpts, const SourceManager &SM,
254                            unsigned &DirectiveLine, unsigned &Offset) {
255   std::string SpelledDirective;
256   llvm::raw_string_ostream OS(SpelledDirective);
257   OS << Prefix;
258 
259   // Make sure DirectiveRange is a char range and doesn't contain macro ids.
260   DirectiveRange = SM.getExpansionRange(DirectiveRange);
261   if (DirectiveRange.isTokenRange()) {
262     DirectiveRange.setEnd(
263         Lexer::getLocForEndOfToken(DirectiveRange.getEnd(), 0, SM, LangOpts));
264   }
265 
266   auto DecompLoc = SM.getDecomposedLoc(DirectiveRange.getBegin());
267   DirectiveLine = SM.getLineNumber(DecompLoc.first, DecompLoc.second);
268   Offset = DecompLoc.second;
269   auto TargetColumn = SM.getColumnNumber(DecompLoc.first, DecompLoc.second) - 1;
270 
271   // Pad with spaces before DirectiveRange to make sure it will be on right
272   // column when patched.
273   if (Prefix.size() <= TargetColumn) {
274     // There is enough space for Prefix and space before directive, use it.
275     // We try to squeeze the Prefix into the same line whenever we can, as
276     // putting onto a separate line won't work at the beginning of the file.
277     OS << std::string(TargetColumn - Prefix.size(), ' ');
278   } else {
279     // Prefix was longer than the space we had. We produce e.g.:
280     // #line N-1
281     // #define \
282     //    X 10
283     OS << "\\\n" << std::string(TargetColumn, ' ');
284     // Decrement because we put an additional line break before
285     // DirectiveRange.begin().
286     --DirectiveLine;
287   }
288   OS << toSourceCode(SM, DirectiveRange.getAsRange());
289   return OS.str();
290 }
291 
292 // Collects #define directives inside the main file.
293 struct DirectiveCollector : public PPCallbacks {
294   DirectiveCollector(const Preprocessor &PP,
295                      std::vector<TextualPPDirective> &TextualDirectives)
296       : LangOpts(PP.getLangOpts()), SM(PP.getSourceManager()),
297         TextualDirectives(TextualDirectives) {}
298 
299   void FileChanged(SourceLocation Loc, FileChangeReason Reason,
300                    SrcMgr::CharacteristicKind FileType,
301                    FileID PrevFID) override {
302     InMainFile = SM.isWrittenInMainFile(Loc);
303   }
304 
305   void MacroDefined(const Token &MacroNameTok,
306                     const MacroDirective *MD) override {
307     if (!InMainFile)
308       return;
309     TextualDirectives.emplace_back();
310     TextualPPDirective &TD = TextualDirectives.back();
311     TD.Directive = tok::pp_define;
312     TD.MacroName = MacroNameTok.getIdentifierInfo()->getName().str();
313 
314     const auto *MI = MD->getMacroInfo();
315     TD.Text =
316         spellDirective("#define ",
317                        CharSourceRange::getTokenRange(
318                            MI->getDefinitionLoc(), MI->getDefinitionEndLoc()),
319                        LangOpts, SM, TD.DirectiveLine, TD.Offset);
320   }
321 
322 private:
323   bool InMainFile = true;
324   const LangOptions &LangOpts;
325   const SourceManager &SM;
326   std::vector<TextualPPDirective> &TextualDirectives;
327 };
328 
329 struct ScannedPreamble {
330   std::vector<Inclusion> Includes;
331   std::vector<TextualPPDirective> TextualDirectives;
332   // Literal lines of the preamble contents.
333   std::vector<llvm::StringRef> Lines;
334   PreambleBounds Bounds = {0, false};
335   std::vector<PragmaMark> Marks;
336   MainFileMacros Macros;
337 };
338 
339 /// Scans the preprocessor directives in the preamble section of the file by
340 /// running preprocessor over \p Contents. Returned includes do not contain
341 /// resolved paths. \p Cmd is used to build the compiler invocation, which might
342 /// stat/read files.
343 llvm::Expected<ScannedPreamble>
344 scanPreamble(llvm::StringRef Contents, const tooling::CompileCommand &Cmd) {
345   class EmptyFS : public ThreadsafeFS {
346   private:
347     llvm::IntrusiveRefCntPtr<llvm::vfs::FileSystem> viewImpl() const override {
348       return new llvm::vfs::InMemoryFileSystem;
349     }
350   };
351   EmptyFS FS;
352   // Build and run Preprocessor over the preamble.
353   ParseInputs PI;
354   // Memory buffers below expect null-terminated && non-null strings. So make
355   // sure to always use PI.Contents!
356   PI.Contents = Contents.str();
357   PI.TFS = &FS;
358   PI.CompileCommand = Cmd;
359   IgnoringDiagConsumer IgnoreDiags;
360   auto CI = buildCompilerInvocation(PI, IgnoreDiags);
361   if (!CI)
362     return error("failed to create compiler invocation");
363   CI->getDiagnosticOpts().IgnoreWarnings = true;
364   auto ContentsBuffer = llvm::MemoryBuffer::getMemBuffer(PI.Contents);
365   // This means we're scanning (though not preprocessing) the preamble section
366   // twice. However, it's important to precisely follow the preamble bounds used
367   // elsewhere.
368   auto Bounds = ComputePreambleBounds(CI->getLangOpts(), *ContentsBuffer, 0);
369   auto PreambleContents = llvm::MemoryBuffer::getMemBufferCopy(
370       llvm::StringRef(PI.Contents).take_front(Bounds.Size));
371   auto Clang = prepareCompilerInstance(
372       std::move(CI), nullptr, std::move(PreambleContents),
373       // Provide an empty FS to prevent preprocessor from performing IO. This
374       // also implies missing resolved paths for includes.
375       FS.view(std::nullopt), IgnoreDiags);
376   if (Clang->getFrontendOpts().Inputs.empty())
377     return error("compiler instance had no inputs");
378   // We are only interested in main file includes.
379   Clang->getPreprocessorOpts().SingleFileParseMode = true;
380   Clang->getPreprocessorOpts().UsePredefines = false;
381   PreprocessOnlyAction Action;
382   if (!Action.BeginSourceFile(*Clang, Clang->getFrontendOpts().Inputs[0]))
383     return error("failed BeginSourceFile");
384   Preprocessor &PP = Clang->getPreprocessor();
385   const auto &SM = PP.getSourceManager();
386   IncludeStructure Includes;
387   Includes.collect(*Clang);
388   ScannedPreamble SP;
389   SP.Bounds = Bounds;
390   PP.addPPCallbacks(
391       std::make_unique<DirectiveCollector>(PP, SP.TextualDirectives));
392   PP.addPPCallbacks(collectPragmaMarksCallback(SM, SP.Marks));
393   PP.addPPCallbacks(std::make_unique<CollectMainFileMacros>(PP, SP.Macros));
394   if (llvm::Error Err = Action.Execute())
395     return std::move(Err);
396   Action.EndSourceFile();
397   SP.Includes = std::move(Includes.MainFileIncludes);
398   llvm::append_range(SP.Lines, llvm::split(Contents, "\n"));
399   return SP;
400 }
401 
402 const char *spellingForIncDirective(tok::PPKeywordKind IncludeDirective) {
403   switch (IncludeDirective) {
404   case tok::pp_include:
405     return "include";
406   case tok::pp_import:
407     return "import";
408   case tok::pp_include_next:
409     return "include_next";
410   default:
411     break;
412   }
413   llvm_unreachable("not an include directive");
414 }
415 
416 // Accumulating wall time timer. Similar to llvm::Timer, but much cheaper,
417 // it only tracks wall time.
418 // Since this is a generic timer, We may want to move this to support/ if we
419 // find a use case outside of FS time tracking.
420 class WallTimer {
421 public:
422   WallTimer() : TotalTime(std::chrono::steady_clock::duration::zero()) {}
423   // [Re-]Start the timer.
424   void startTimer() { StartTime = std::chrono::steady_clock::now(); }
425   // Stop the timer and update total time.
426   void stopTimer() {
427     TotalTime += std::chrono::steady_clock::now() - StartTime;
428   }
429   // Return total time, in seconds.
430   double getTime() { return std::chrono::duration<double>(TotalTime).count(); }
431 
432 private:
433   std::chrono::steady_clock::duration TotalTime;
434   std::chrono::steady_clock::time_point StartTime;
435 };
436 
437 class WallTimerRegion {
438 public:
439   WallTimerRegion(WallTimer &T) : T(T) { T.startTimer(); }
440   ~WallTimerRegion() { T.stopTimer(); }
441 
442 private:
443   WallTimer &T;
444 };
445 
446 // Used by TimerFS, tracks time spent in status() and getBuffer() calls while
447 // proxying to underlying File implementation.
448 class TimerFile : public llvm::vfs::File {
449 public:
450   TimerFile(WallTimer &Timer, std::unique_ptr<File> InnerFile)
451       : Timer(Timer), InnerFile(std::move(InnerFile)) {}
452 
453   llvm::ErrorOr<llvm::vfs::Status> status() override {
454     WallTimerRegion T(Timer);
455     return InnerFile->status();
456   }
457   llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>>
458   getBuffer(const Twine &Name, int64_t FileSize, bool RequiresNullTerminator,
459             bool IsVolatile) override {
460     WallTimerRegion T(Timer);
461     return InnerFile->getBuffer(Name, FileSize, RequiresNullTerminator,
462                                 IsVolatile);
463   }
464   std::error_code close() override {
465     WallTimerRegion T(Timer);
466     return InnerFile->close();
467   }
468 
469 private:
470   WallTimer &Timer;
471   std::unique_ptr<llvm::vfs::File> InnerFile;
472 };
473 
474 // A wrapper for FileSystems that tracks the amount of time spent in status()
475 // and openFileForRead() calls.
476 class TimerFS : public llvm::vfs::ProxyFileSystem {
477 public:
478   TimerFS(llvm::IntrusiveRefCntPtr<llvm::vfs::FileSystem> FS)
479       : ProxyFileSystem(std::move(FS)) {}
480 
481   llvm::ErrorOr<std::unique_ptr<llvm::vfs::File>>
482   openFileForRead(const llvm::Twine &Path) override {
483     WallTimerRegion T(Timer);
484     auto FileOr = getUnderlyingFS().openFileForRead(Path);
485     if (!FileOr)
486       return FileOr;
487     return std::make_unique<TimerFile>(Timer, std::move(FileOr.get()));
488   }
489 
490   llvm::ErrorOr<llvm::vfs::Status> status(const llvm::Twine &Path) override {
491     WallTimerRegion T(Timer);
492     return getUnderlyingFS().status(Path);
493   }
494 
495   double getTime() { return Timer.getTime(); }
496 
497 private:
498   WallTimer Timer;
499 };
500 
501 // Helpers for patching diagnostics between two versions of file contents.
502 class DiagPatcher {
503   llvm::ArrayRef<llvm::StringRef> OldLines;
504   llvm::ArrayRef<llvm::StringRef> CurrentLines;
505   llvm::StringMap<llvm::SmallVector<int>> CurrentContentsToLine;
506 
507   // Translates a range from old lines to current lines.
508   // Finds the consecutive set of lines that corresponds to the same contents in
509   // old and current, and applies the same translation to the range.
510   // Returns true if translation succeeded.
511   bool translateRange(Range &R) {
512     int OldStart = R.start.line;
513     int OldEnd = R.end.line;
514     assert(OldStart <= OldEnd);
515 
516     size_t RangeLen = OldEnd - OldStart + 1;
517     auto RangeContents = OldLines.slice(OldStart).take_front(RangeLen);
518     // Make sure the whole range is covered in old contents.
519     if (RangeContents.size() < RangeLen)
520       return false;
521 
522     std::optional<int> Closest;
523     for (int AlternateLine : CurrentContentsToLine.lookup(RangeContents[0])) {
524       // Check if AlternateLine matches all lines in the range.
525       if (RangeContents !=
526           CurrentLines.slice(AlternateLine).take_front(RangeLen))
527         continue;
528       int Delta = AlternateLine - OldStart;
529       if (!Closest.has_value() || abs(Delta) < abs(*Closest))
530         Closest = Delta;
531     }
532     // Couldn't find any viable matches in the current contents.
533     if (!Closest.has_value())
534       return false;
535     R.start.line += *Closest;
536     R.end.line += *Closest;
537     return true;
538   }
539 
540   // Translates a Note by patching its range when inside main file. Returns true
541   // on success.
542   bool translateNote(Note &N) {
543     if (!N.InsideMainFile)
544       return true;
545     if (translateRange(N.Range))
546       return true;
547     return false;
548   }
549 
550   // Tries to translate all the edit ranges inside the fix. Returns true on
551   // success. On failure fixes might be in an invalid state.
552   bool translateFix(Fix &F) {
553     return llvm::all_of(
554         F.Edits, [this](TextEdit &E) { return translateRange(E.range); });
555   }
556 
557 public:
558   DiagPatcher(llvm::ArrayRef<llvm::StringRef> OldLines,
559               llvm::ArrayRef<llvm::StringRef> CurrentLines) {
560     this->OldLines = OldLines;
561     this->CurrentLines = CurrentLines;
562     for (int Line = 0, E = CurrentLines.size(); Line != E; ++Line) {
563       llvm::StringRef Contents = CurrentLines[Line];
564       CurrentContentsToLine[Contents].push_back(Line);
565     }
566   }
567   // Translate diagnostic by moving its main range to new location (if inside
568   // the main file). Preserve all the notes and fixes that can be translated to
569   // new contents.
570   // Drops the whole diagnostic if main range can't be patched.
571   std::optional<Diag> translateDiag(const Diag &D) {
572     Range NewRange = D.Range;
573     // Patch range if it's inside main file.
574     if (D.InsideMainFile && !translateRange(NewRange)) {
575       // Drop the diagnostic if we couldn't patch the range.
576       return std::nullopt;
577     }
578 
579     Diag NewD = D;
580     NewD.Range = NewRange;
581     // Translate ranges inside notes and fixes too, dropping the ones that are
582     // no longer relevant.
583     llvm::erase_if(NewD.Notes, [this](Note &N) { return !translateNote(N); });
584     llvm::erase_if(NewD.Fixes, [this](Fix &F) { return !translateFix(F); });
585     return NewD;
586   }
587 };
588 } // namespace
589 
590 std::shared_ptr<const PreambleData>
591 buildPreamble(PathRef FileName, CompilerInvocation CI,
592               const ParseInputs &Inputs, bool StoreInMemory,
593               PreambleParsedCallback PreambleCallback,
594               PreambleBuildStats *Stats) {
595   // Note that we don't need to copy the input contents, preamble can live
596   // without those.
597   auto ContentsBuffer =
598       llvm::MemoryBuffer::getMemBuffer(Inputs.Contents, FileName);
599   auto Bounds = ComputePreambleBounds(CI.getLangOpts(), *ContentsBuffer, 0);
600 
601   trace::Span Tracer("BuildPreamble");
602   SPAN_ATTACH(Tracer, "File", FileName);
603   std::vector<std::unique_ptr<FeatureModule::ASTListener>> ASTListeners;
604   if (Inputs.FeatureModules) {
605     for (auto &M : *Inputs.FeatureModules) {
606       if (auto Listener = M.astListeners())
607         ASTListeners.emplace_back(std::move(Listener));
608     }
609   }
610   StoreDiags PreambleDiagnostics;
611   PreambleDiagnostics.setDiagCallback(
612       [&ASTListeners](const clang::Diagnostic &D, clangd::Diag &Diag) {
613         for (const auto &L : ASTListeners)
614           L->sawDiagnostic(D, Diag);
615       });
616   auto VFS = Inputs.TFS->view(Inputs.CompileCommand.Directory);
617   llvm::IntrusiveRefCntPtr<DiagnosticsEngine> PreambleDiagsEngine =
618       CompilerInstance::createDiagnostics(*VFS, &CI.getDiagnosticOpts(),
619                                           &PreambleDiagnostics,
620                                           /*ShouldOwnClient=*/false);
621   const Config &Cfg = Config::current();
622   PreambleDiagnostics.setLevelAdjuster([&](DiagnosticsEngine::Level DiagLevel,
623                                            const clang::Diagnostic &Info) {
624     if (Cfg.Diagnostics.SuppressAll ||
625         isDiagnosticSuppressed(Info, Cfg.Diagnostics.Suppress,
626                                CI.getLangOpts()))
627       return DiagnosticsEngine::Ignored;
628     switch (Info.getID()) {
629     case diag::warn_no_newline_eof:
630     case diag::warn_cxx98_compat_no_newline_eof:
631     case diag::ext_no_newline_eof:
632       // If the preamble doesn't span the whole file, drop the no newline at
633       // eof warnings.
634       return Bounds.Size != ContentsBuffer->getBufferSize()
635                  ? DiagnosticsEngine::Level::Ignored
636                  : DiagLevel;
637     }
638     return DiagLevel;
639   });
640 
641   // Skip function bodies when building the preamble to speed up building
642   // the preamble and make it smaller.
643   assert(!CI.getFrontendOpts().SkipFunctionBodies);
644   CI.getFrontendOpts().SkipFunctionBodies = true;
645   // We don't want to write comment locations into PCH. They are racy and slow
646   // to read back. We rely on dynamic index for the comments instead.
647   CI.getPreprocessorOpts().WriteCommentListToPCH = false;
648 
649   CppFilePreambleCallbacks CapturedInfo(
650       FileName, Stats, Inputs.Opts.PreambleParseForwardingFunctions,
651       [&ASTListeners](CompilerInstance &CI) {
652         for (const auto &L : ASTListeners)
653           L->beforeExecute(CI);
654       });
655   llvm::SmallString<32> AbsFileName(FileName);
656   VFS->makeAbsolute(AbsFileName);
657   auto StatCache = std::make_shared<PreambleFileStatusCache>(AbsFileName);
658   auto StatCacheFS = StatCache->getProducingFS(VFS);
659   llvm::IntrusiveRefCntPtr<TimerFS> TimedFS(new TimerFS(StatCacheFS));
660 
661   WallTimer PreambleTimer;
662   PreambleTimer.startTimer();
663   auto BuiltPreamble = PrecompiledPreamble::Build(
664       CI, ContentsBuffer.get(), Bounds, *PreambleDiagsEngine,
665       Stats ? TimedFS : StatCacheFS, std::make_shared<PCHContainerOperations>(),
666       StoreInMemory, /*StoragePath=*/"", CapturedInfo);
667 
668   PreambleTimer.stopTimer();
669 
670   // We have to setup DiagnosticConsumer that will be alife
671   // while preamble callback is executed
672   PreambleDiagsEngine->setClient(new IgnoringDiagConsumer, true);
673   // Reset references to ref-counted-ptrs before executing the callbacks, to
674   // prevent resetting them concurrently.
675   PreambleDiagsEngine.reset();
676   CI.DiagnosticOpts.reset();
677 
678   // When building the AST for the main file, we do want the function
679   // bodies.
680   CI.getFrontendOpts().SkipFunctionBodies = false;
681 
682   if (Stats != nullptr) {
683     Stats->TotalBuildTime = PreambleTimer.getTime();
684     Stats->FileSystemTime = TimedFS->getTime();
685     Stats->SerializedSize = BuiltPreamble ? BuiltPreamble->getSize() : 0;
686   }
687 
688   if (BuiltPreamble) {
689     log("Built preamble of size {0} for file {1} version {2} in {3} seconds",
690         BuiltPreamble->getSize(), FileName, Inputs.Version,
691         PreambleTimer.getTime());
692     std::vector<Diag> Diags = PreambleDiagnostics.take();
693     auto Result = std::make_shared<PreambleData>(std::move(*BuiltPreamble));
694     Result->Version = Inputs.Version;
695     Result->CompileCommand = Inputs.CompileCommand;
696     Result->Diags = std::move(Diags);
697     Result->Includes = CapturedInfo.takeIncludes();
698     Result->Pragmas = std::make_shared<const include_cleaner::PragmaIncludes>(
699         CapturedInfo.takePragmaIncludes());
700 
701     if (Inputs.ModulesManager) {
702       WallTimer PrerequisiteModuleTimer;
703       PrerequisiteModuleTimer.startTimer();
704       Result->RequiredModules =
705           Inputs.ModulesManager->buildPrerequisiteModulesFor(FileName,
706                                                              *Inputs.TFS);
707       PrerequisiteModuleTimer.stopTimer();
708 
709       log("Built prerequisite modules for file {0} in {1} seconds", FileName,
710           PrerequisiteModuleTimer.getTime());
711     }
712 
713     Result->Macros = CapturedInfo.takeMacros();
714     Result->Marks = CapturedInfo.takeMarks();
715     Result->StatCache = StatCache;
716     Result->MainIsIncludeGuarded = CapturedInfo.isMainFileIncludeGuarded();
717     Result->TargetOpts = CI.TargetOpts;
718     if (PreambleCallback) {
719       trace::Span Tracer("Running PreambleCallback");
720       auto Ctx = CapturedInfo.takeLife();
721       // Stat cache is thread safe only when there are no producers. Hence
722       // change the VFS underneath to a consuming fs.
723       Ctx->getFileManager().setVirtualFileSystem(
724           Result->StatCache->getConsumingFS(VFS));
725       // While extending the life of FileMgr and VFS, StatCache should also be
726       // extended.
727       Ctx->setStatCache(Result->StatCache);
728 
729       PreambleCallback(std::move(*Ctx), Result->Pragmas);
730     }
731     return Result;
732   }
733 
734   elog("Could not build a preamble for file {0} version {1}: {2}", FileName,
735        Inputs.Version, BuiltPreamble.getError().message());
736   for (const Diag &D : PreambleDiagnostics.take()) {
737     if (D.Severity < DiagnosticsEngine::Error)
738       continue;
739     // Not an ideal way to show errors, but better than nothing!
740     elog("  error: {0}", D.Message);
741   }
742   return nullptr;
743 }
744 
745 bool isPreambleCompatible(const PreambleData &Preamble,
746                           const ParseInputs &Inputs, PathRef FileName,
747                           const CompilerInvocation &CI) {
748   auto ContentsBuffer =
749       llvm::MemoryBuffer::getMemBuffer(Inputs.Contents, FileName);
750   auto Bounds = ComputePreambleBounds(CI.getLangOpts(), *ContentsBuffer, 0);
751   auto VFS = Inputs.TFS->view(Inputs.CompileCommand.Directory);
752   return compileCommandsAreEqual(Inputs.CompileCommand,
753                                  Preamble.CompileCommand) &&
754          Preamble.Preamble.CanReuse(CI, *ContentsBuffer, Bounds, *VFS) &&
755          (!Preamble.RequiredModules ||
756           Preamble.RequiredModules->canReuse(CI, VFS));
757 }
758 
759 void escapeBackslashAndQuotes(llvm::StringRef Text, llvm::raw_ostream &OS) {
760   for (char C : Text) {
761     switch (C) {
762     case '\\':
763     case '"':
764       OS << '\\';
765       break;
766     default:
767       break;
768     }
769     OS << C;
770   }
771 }
772 
773 // Translate diagnostics from baseline into modified for the lines that have the
774 // same spelling.
775 static std::vector<Diag> patchDiags(llvm::ArrayRef<Diag> BaselineDiags,
776                                     const ScannedPreamble &BaselineScan,
777                                     const ScannedPreamble &ModifiedScan) {
778   std::vector<Diag> PatchedDiags;
779   if (BaselineDiags.empty())
780     return PatchedDiags;
781   DiagPatcher Patcher(BaselineScan.Lines, ModifiedScan.Lines);
782   for (auto &D : BaselineDiags) {
783     if (auto NewD = Patcher.translateDiag(D))
784       PatchedDiags.emplace_back(std::move(*NewD));
785   }
786   return PatchedDiags;
787 }
788 
789 static std::string getPatchName(llvm::StringRef FileName) {
790   // This shouldn't coincide with any real file name.
791   llvm::SmallString<128> PatchName;
792   llvm::sys::path::append(PatchName, llvm::sys::path::parent_path(FileName),
793                           PreamblePatch::HeaderName);
794   return PatchName.str().str();
795 }
796 
797 PreamblePatch PreamblePatch::create(llvm::StringRef FileName,
798                                     const ParseInputs &Modified,
799                                     const PreambleData &Baseline,
800                                     PatchType PatchType) {
801   trace::Span Tracer("CreatePreamblePatch");
802   SPAN_ATTACH(Tracer, "File", FileName);
803   assert(llvm::sys::path::is_absolute(FileName) && "relative FileName!");
804   // First scan preprocessor directives in Baseline and Modified. These will be
805   // used to figure out newly added directives in Modified. Scanning can fail,
806   // the code just bails out and creates an empty patch in such cases, as:
807   // - If scanning for Baseline fails, no knowledge of existing includes hence
808   //   patch will contain all the includes in Modified. Leading to rebuild of
809   //   whole preamble, which is terribly slow.
810   // - If scanning for Modified fails, cannot figure out newly added ones so
811   //   there's nothing to do but generate an empty patch.
812   auto BaselineScan =
813       scanPreamble(Baseline.Preamble.getContents(), Modified.CompileCommand);
814   if (!BaselineScan) {
815     elog("Failed to scan baseline of {0}: {1}", FileName,
816          BaselineScan.takeError());
817     return PreamblePatch::unmodified(Baseline);
818   }
819   auto ModifiedScan = scanPreamble(Modified.Contents, Modified.CompileCommand);
820   if (!ModifiedScan) {
821     elog("Failed to scan modified contents of {0}: {1}", FileName,
822          ModifiedScan.takeError());
823     return PreamblePatch::unmodified(Baseline);
824   }
825 
826   bool IncludesChanged = BaselineScan->Includes != ModifiedScan->Includes;
827   bool DirectivesChanged =
828       BaselineScan->TextualDirectives != ModifiedScan->TextualDirectives;
829   if ((PatchType == PatchType::MacroDirectives || !IncludesChanged) &&
830       !DirectivesChanged)
831     return PreamblePatch::unmodified(Baseline);
832 
833   PreamblePatch PP;
834   PP.Baseline = &Baseline;
835   PP.PatchFileName = getPatchName(FileName);
836   PP.ModifiedBounds = ModifiedScan->Bounds;
837 
838   llvm::raw_string_ostream Patch(PP.PatchContents);
839   // Set default filename for subsequent #line directives
840   Patch << "#line 0 \"";
841   // FileName part of a line directive is subject to backslash escaping, which
842   // might lead to problems on windows especially.
843   escapeBackslashAndQuotes(FileName, Patch);
844   Patch << "\"\n";
845 
846   if (IncludesChanged && PatchType == PatchType::All) {
847     // We are only interested in newly added includes, record the ones in
848     // Baseline for exclusion.
849     llvm::DenseMap<std::pair<tok::PPKeywordKind, llvm::StringRef>,
850                    const Inclusion *>
851         ExistingIncludes;
852     for (const auto &Inc : Baseline.Includes.MainFileIncludes)
853       ExistingIncludes[{Inc.Directive, Inc.Written}] = &Inc;
854     // There might be includes coming from disabled regions, record these for
855     // exclusion too. note that we don't have resolved paths for those.
856     for (const auto &Inc : BaselineScan->Includes)
857       ExistingIncludes.try_emplace({Inc.Directive, Inc.Written});
858     // Calculate extra includes that needs to be inserted.
859     for (auto &Inc : ModifiedScan->Includes) {
860       auto It = ExistingIncludes.find({Inc.Directive, Inc.Written});
861       // Include already present in the baseline preamble. Set resolved path and
862       // put into preamble includes.
863       if (It != ExistingIncludes.end()) {
864         if (It->second) {
865           // If this header is included in an active region of the baseline
866           // preamble, preserve it.
867           auto &PatchedInc = PP.PreambleIncludes.emplace_back();
868           // Copy everything from existing include, apart from the location,
869           // when it's coming from baseline preamble.
870           PatchedInc = *It->second;
871           PatchedInc.HashLine = Inc.HashLine;
872           PatchedInc.HashOffset = Inc.HashOffset;
873         }
874         continue;
875       }
876       // Include is new in the modified preamble. Inject it into the patch and
877       // use #line to set the presumed location to where it is spelled.
878       auto LineCol = offsetToClangLineColumn(Modified.Contents, Inc.HashOffset);
879       Patch << llvm::formatv("#line {0}\n", LineCol.first);
880       Patch << llvm::formatv(
881           "#{0} {1}\n", spellingForIncDirective(Inc.Directive), Inc.Written);
882     }
883   } else {
884     // Make sure we have the full set of includes available even when we're not
885     // patching. As these are used by features we provide afterwards like hover,
886     // go-to-def or include-cleaner when preamble is stale.
887     PP.PreambleIncludes = Baseline.Includes.MainFileIncludes;
888   }
889 
890   if (DirectivesChanged) {
891     // We need to patch all the directives, since they are order dependent. e.g:
892     // #define BAR(X) NEW(X) // Newly introduced in Modified
893     // #define BAR(X) OLD(X) // Exists in the Baseline
894     //
895     // If we've patched only the first directive, the macro definition would've
896     // been wrong for the rest of the file, since patch is applied after the
897     // baseline preamble.
898     //
899     // Note that we deliberately ignore conditional directives and undefs to
900     // reduce complexity. The former might cause problems because scanning is
901     // imprecise and might pick directives from disabled regions.
902     for (const auto &TD : ModifiedScan->TextualDirectives) {
903       // Introduce an #undef directive before #defines to suppress any
904       // re-definition warnings.
905       if (TD.Directive == tok::pp_define)
906         Patch << "#undef " << TD.MacroName << '\n';
907       Patch << "#line " << TD.DirectiveLine << '\n';
908       Patch << TD.Text << '\n';
909     }
910   }
911 
912   PP.PatchedDiags = patchDiags(Baseline.Diags, *BaselineScan, *ModifiedScan);
913   PP.PatchedMarks = std::move(ModifiedScan->Marks);
914   PP.PatchedMacros = std::move(ModifiedScan->Macros);
915   dlog("Created preamble patch: {0}", Patch.str());
916   return PP;
917 }
918 
919 PreamblePatch PreamblePatch::createFullPatch(llvm::StringRef FileName,
920                                              const ParseInputs &Modified,
921                                              const PreambleData &Baseline) {
922   return create(FileName, Modified, Baseline, PatchType::All);
923 }
924 
925 PreamblePatch PreamblePatch::createMacroPatch(llvm::StringRef FileName,
926                                               const ParseInputs &Modified,
927                                               const PreambleData &Baseline) {
928   return create(FileName, Modified, Baseline, PatchType::MacroDirectives);
929 }
930 
931 void PreamblePatch::apply(CompilerInvocation &CI) const {
932   // Make sure the compilation uses same target opts as the preamble. Clang has
933   // no guarantees around using arbitrary options when reusing PCHs, and
934   // different target opts can result in crashes, see
935   // ParsedASTTest.PreambleWithDifferentTarget.
936   // Make sure this is a deep copy, as the same Baseline might be used
937   // concurrently.
938   *CI.TargetOpts = *Baseline->TargetOpts;
939 
940   // No need to map an empty file.
941   if (PatchContents.empty())
942     return;
943   auto &PPOpts = CI.getPreprocessorOpts();
944   auto PatchBuffer =
945       // we copy here to ensure contents are still valid if CI outlives the
946       // PreamblePatch.
947       llvm::MemoryBuffer::getMemBufferCopy(PatchContents, PatchFileName);
948   // CI will take care of the lifetime of the buffer.
949   PPOpts.addRemappedFile(PatchFileName, PatchBuffer.release());
950   // The patch will be parsed after loading the preamble ast and before parsing
951   // the main file.
952   PPOpts.Includes.push_back(PatchFileName);
953 }
954 
955 std::vector<Inclusion> PreamblePatch::preambleIncludes() const {
956   return PreambleIncludes;
957 }
958 
959 PreamblePatch PreamblePatch::unmodified(const PreambleData &Preamble) {
960   PreamblePatch PP;
961   PP.Baseline = &Preamble;
962   PP.PreambleIncludes = Preamble.Includes.MainFileIncludes;
963   PP.ModifiedBounds = Preamble.Preamble.getBounds();
964   PP.PatchedDiags = Preamble.Diags;
965   return PP;
966 }
967 
968 llvm::ArrayRef<PragmaMark> PreamblePatch::marks() const {
969   if (PatchContents.empty())
970     return Baseline->Marks;
971   return PatchedMarks;
972 }
973 
974 const MainFileMacros &PreamblePatch::mainFileMacros() const {
975   if (PatchContents.empty())
976     return Baseline->Macros;
977   return PatchedMacros;
978 }
979 
980 OptionalFileEntryRef PreamblePatch::getPatchEntry(llvm::StringRef MainFilePath,
981                                                   const SourceManager &SM) {
982   auto PatchFilePath = getPatchName(MainFilePath);
983   return SM.getFileManager().getOptionalFileRef(PatchFilePath);
984 }
985 } // namespace clangd
986 } // namespace clang
987