xref: /llvm-project/clang-tools-extra/include-cleaner/lib/Record.cpp (revision 5574a5894fdb7f9a46a4fbe6c8970fd39890dc9b)
1 //===--- Record.cpp - Record compiler events ------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "clang-include-cleaner/Record.h"
10 #include "clang-include-cleaner/Types.h"
11 #include "clang/AST/ASTConsumer.h"
12 #include "clang/AST/ASTContext.h"
13 #include "clang/AST/DeclGroup.h"
14 #include "clang/Basic/FileEntry.h"
15 #include "clang/Basic/FileManager.h"
16 #include "clang/Basic/LLVM.h"
17 #include "clang/Basic/SourceLocation.h"
18 #include "clang/Basic/SourceManager.h"
19 #include "clang/Basic/Specifiers.h"
20 #include "clang/Frontend/CompilerInstance.h"
21 #include "clang/Lex/DirectoryLookup.h"
22 #include "clang/Lex/MacroInfo.h"
23 #include "clang/Lex/PPCallbacks.h"
24 #include "clang/Lex/Preprocessor.h"
25 #include "clang/Tooling/Inclusions/HeaderAnalysis.h"
26 #include "clang/Tooling/Inclusions/StandardLibrary.h"
27 #include "llvm/ADT/ArrayRef.h"
28 #include "llvm/ADT/DenseMap.h"
29 #include "llvm/ADT/STLExtras.h"
30 #include "llvm/ADT/SmallSet.h"
31 #include "llvm/ADT/SmallVector.h"
32 #include "llvm/ADT/StringRef.h"
33 #include "llvm/ADT/iterator_range.h"
34 #include "llvm/Support/Allocator.h"
35 #include "llvm/Support/Error.h"
36 #include "llvm/Support/FileSystem/UniqueID.h"
37 #include "llvm/Support/Path.h"
38 #include "llvm/Support/StringSaver.h"
39 #include <algorithm>
40 #include <assert.h>
41 #include <memory>
42 #include <optional>
43 #include <set>
44 #include <utility>
45 #include <vector>
46 
47 namespace clang::include_cleaner {
48 namespace {
49 
50 class PPRecorder : public PPCallbacks {
51 public:
PPRecorder(RecordedPP & Recorded,const Preprocessor & PP)52   PPRecorder(RecordedPP &Recorded, const Preprocessor &PP)
53       : Recorded(Recorded), PP(PP), SM(PP.getSourceManager()) {
54     for (const auto &Dir : PP.getHeaderSearchInfo().search_dir_range())
55       if (Dir.getLookupType() == DirectoryLookup::LT_NormalDir)
56         Recorded.Includes.addSearchDirectory(Dir.getDirRef()->getName());
57   }
58 
FileChanged(SourceLocation Loc,FileChangeReason Reason,SrcMgr::CharacteristicKind FileType,FileID PrevFID)59   void FileChanged(SourceLocation Loc, FileChangeReason Reason,
60                    SrcMgr::CharacteristicKind FileType,
61                    FileID PrevFID) override {
62     Active = SM.isWrittenInMainFile(Loc);
63   }
64 
InclusionDirective(SourceLocation Hash,const Token & IncludeTok,StringRef SpelledFilename,bool IsAngled,CharSourceRange FilenameRange,OptionalFileEntryRef File,StringRef SearchPath,StringRef RelativePath,const Module * SuggestedModule,bool ModuleImported,SrcMgr::CharacteristicKind)65   void InclusionDirective(SourceLocation Hash, const Token &IncludeTok,
66                           StringRef SpelledFilename, bool IsAngled,
67                           CharSourceRange FilenameRange,
68                           OptionalFileEntryRef File, StringRef SearchPath,
69                           StringRef RelativePath, const Module *SuggestedModule,
70                           bool ModuleImported,
71                           SrcMgr::CharacteristicKind) override {
72     if (!Active)
73       return;
74 
75     Include I;
76     I.HashLocation = Hash;
77     I.Resolved = File;
78     I.Line = SM.getSpellingLineNumber(Hash);
79     I.Spelled = SpelledFilename;
80     I.Angled = IsAngled;
81     Recorded.Includes.add(I);
82   }
83 
MacroExpands(const Token & MacroName,const MacroDefinition & MD,SourceRange Range,const MacroArgs * Args)84   void MacroExpands(const Token &MacroName, const MacroDefinition &MD,
85                     SourceRange Range, const MacroArgs *Args) override {
86     if (!Active)
87       return;
88     recordMacroRef(MacroName, *MD.getMacroInfo());
89   }
90 
MacroDefined(const Token & MacroName,const MacroDirective * MD)91   void MacroDefined(const Token &MacroName, const MacroDirective *MD) override {
92     if (!Active)
93       return;
94 
95     const auto *MI = MD->getMacroInfo();
96     // The tokens of a macro definition could refer to a macro.
97     // Formally this reference isn't resolved until this macro is expanded,
98     // but we want to treat it as a reference anyway.
99     for (const auto &Tok : MI->tokens()) {
100       auto *II = Tok.getIdentifierInfo();
101       // Could this token be a reference to a macro? (Not param to this macro).
102       if (!II || !II->hadMacroDefinition() ||
103           llvm::is_contained(MI->params(), II))
104         continue;
105       if (const MacroInfo *MI = PP.getMacroInfo(II))
106         recordMacroRef(Tok, *MI);
107     }
108   }
109 
MacroUndefined(const Token & MacroName,const MacroDefinition & MD,const MacroDirective *)110   void MacroUndefined(const Token &MacroName, const MacroDefinition &MD,
111                       const MacroDirective *) override {
112     if (!Active)
113       return;
114     if (const auto *MI = MD.getMacroInfo())
115       recordMacroRef(MacroName, *MI);
116   }
117 
Ifdef(SourceLocation Loc,const Token & MacroNameTok,const MacroDefinition & MD)118   void Ifdef(SourceLocation Loc, const Token &MacroNameTok,
119              const MacroDefinition &MD) override {
120     if (!Active)
121       return;
122     if (const auto *MI = MD.getMacroInfo())
123       recordMacroRef(MacroNameTok, *MI, RefType::Ambiguous);
124   }
125 
Ifndef(SourceLocation Loc,const Token & MacroNameTok,const MacroDefinition & MD)126   void Ifndef(SourceLocation Loc, const Token &MacroNameTok,
127               const MacroDefinition &MD) override {
128     if (!Active)
129       return;
130     if (const auto *MI = MD.getMacroInfo())
131       recordMacroRef(MacroNameTok, *MI, RefType::Ambiguous);
132   }
133 
134   using PPCallbacks::Elifdef;
135   using PPCallbacks::Elifndef;
Elifdef(SourceLocation Loc,const Token & MacroNameTok,const MacroDefinition & MD)136   void Elifdef(SourceLocation Loc, const Token &MacroNameTok,
137                const MacroDefinition &MD) override {
138     if (!Active)
139       return;
140     if (const auto *MI = MD.getMacroInfo())
141       recordMacroRef(MacroNameTok, *MI, RefType::Ambiguous);
142   }
Elifndef(SourceLocation Loc,const Token & MacroNameTok,const MacroDefinition & MD)143   void Elifndef(SourceLocation Loc, const Token &MacroNameTok,
144                 const MacroDefinition &MD) override {
145     if (!Active)
146       return;
147     if (const auto *MI = MD.getMacroInfo())
148       recordMacroRef(MacroNameTok, *MI, RefType::Ambiguous);
149   }
150 
Defined(const Token & MacroNameTok,const MacroDefinition & MD,SourceRange Range)151   void Defined(const Token &MacroNameTok, const MacroDefinition &MD,
152                SourceRange Range) override {
153     if (!Active)
154       return;
155     if (const auto *MI = MD.getMacroInfo())
156       recordMacroRef(MacroNameTok, *MI, RefType::Ambiguous);
157   }
158 
159 private:
recordMacroRef(const Token & Tok,const MacroInfo & MI,RefType RT=RefType::Explicit)160   void recordMacroRef(const Token &Tok, const MacroInfo &MI,
161                       RefType RT = RefType::Explicit) {
162     if (MI.isBuiltinMacro())
163       return; // __FILE__ is not a reference.
164     Recorded.MacroReferences.push_back(
165         SymbolReference{Macro{Tok.getIdentifierInfo(), MI.getDefinitionLoc()},
166                         Tok.getLocation(), RT});
167   }
168 
169   bool Active = false;
170   RecordedPP &Recorded;
171   const Preprocessor &PP;
172   const SourceManager &SM;
173 };
174 
175 } // namespace
176 
177 class PragmaIncludes::RecordPragma : public PPCallbacks, public CommentHandler {
178 public:
RecordPragma(const CompilerInstance & CI,PragmaIncludes * Out)179   RecordPragma(const CompilerInstance &CI, PragmaIncludes *Out)
180       : RecordPragma(CI.getPreprocessor(), Out) {}
RecordPragma(const Preprocessor & P,PragmaIncludes * Out)181   RecordPragma(const Preprocessor &P, PragmaIncludes *Out)
182       : SM(P.getSourceManager()), HeaderInfo(P.getHeaderSearchInfo()), Out(Out),
183         Arena(std::make_shared<llvm::BumpPtrAllocator>()),
184         UniqueStrings(*Arena),
185         MainFileStem(llvm::sys::path::stem(
186             SM.getNonBuiltinFilenameForID(SM.getMainFileID()).value_or(""))) {}
187 
FileChanged(SourceLocation Loc,FileChangeReason Reason,SrcMgr::CharacteristicKind FileType,FileID PrevFID)188   void FileChanged(SourceLocation Loc, FileChangeReason Reason,
189                    SrcMgr::CharacteristicKind FileType,
190                    FileID PrevFID) override {
191     InMainFile = SM.isWrittenInMainFile(Loc);
192 
193     if (Reason == PPCallbacks::ExitFile) {
194       // At file exit time HeaderSearchInfo is valid and can be used to
195       // determine whether the file was a self-contained header or not.
196       if (OptionalFileEntryRef FE = SM.getFileEntryRefForID(PrevFID)) {
197         if (tooling::isSelfContainedHeader(*FE, SM, HeaderInfo))
198           Out->NonSelfContainedFiles.erase(FE->getUniqueID());
199         else
200           Out->NonSelfContainedFiles.insert(FE->getUniqueID());
201       }
202     }
203   }
204 
EndOfMainFile()205   void EndOfMainFile() override {
206     for (auto &It : Out->IWYUExportBy) {
207       llvm::sort(It.getSecond());
208       It.getSecond().erase(
209           std::unique(It.getSecond().begin(), It.getSecond().end()),
210           It.getSecond().end());
211     }
212     Out->Arena.emplace_back(std::move(Arena));
213   }
214 
InclusionDirective(SourceLocation HashLoc,const Token & IncludeTok,llvm::StringRef FileName,bool IsAngled,CharSourceRange,OptionalFileEntryRef File,llvm::StringRef,llvm::StringRef,const clang::Module *,bool,SrcMgr::CharacteristicKind FileKind)215   void InclusionDirective(SourceLocation HashLoc, const Token &IncludeTok,
216                           llvm::StringRef FileName, bool IsAngled,
217                           CharSourceRange /*FilenameRange*/,
218                           OptionalFileEntryRef File,
219                           llvm::StringRef /*SearchPath*/,
220                           llvm::StringRef /*RelativePath*/,
221                           const clang::Module * /*SuggestedModule*/,
222                           bool /*ModuleImported*/,
223                           SrcMgr::CharacteristicKind FileKind) override {
224     FileID HashFID = SM.getFileID(HashLoc);
225     int HashLine = SM.getLineNumber(HashFID, SM.getFileOffset(HashLoc));
226     std::optional<Header> IncludedHeader;
227     if (IsAngled)
228       if (auto StandardHeader =
229               tooling::stdlib::Header::named("<" + FileName.str() + ">")) {
230         IncludedHeader = *StandardHeader;
231       }
232     if (!IncludedHeader && File)
233       IncludedHeader = *File;
234     checkForExport(HashFID, HashLine, IncludedHeader, File);
235     checkForKeep(HashLine, File);
236     checkForDeducedAssociated(IncludedHeader);
237   }
238 
checkForExport(FileID IncludingFile,int HashLine,std::optional<Header> IncludedHeader,OptionalFileEntryRef IncludedFile)239   void checkForExport(FileID IncludingFile, int HashLine,
240                       std::optional<Header> IncludedHeader,
241                       OptionalFileEntryRef IncludedFile) {
242     if (ExportStack.empty())
243       return;
244     auto &Top = ExportStack.back();
245     if (Top.SeenAtFile != IncludingFile)
246       return;
247     // Make sure current include is covered by the export pragma.
248     if ((Top.Block && HashLine > Top.SeenAtLine) ||
249         Top.SeenAtLine == HashLine) {
250       if (IncludedFile)
251         Out->IWYUExportBy[IncludedFile->getUniqueID()].push_back(Top.Path);
252       if (IncludedHeader && IncludedHeader->kind() == Header::Standard)
253         Out->StdIWYUExportBy[IncludedHeader->standard()].push_back(Top.Path);
254       // main-file #include with export pragma should never be removed.
255       if (Top.SeenAtFile == SM.getMainFileID() && IncludedFile)
256         Out->ShouldKeep.insert(IncludedFile->getUniqueID());
257     }
258     if (!Top.Block) // Pop immediately for single-line export pragma.
259       ExportStack.pop_back();
260   }
261 
checkForKeep(int HashLine,OptionalFileEntryRef IncludedFile)262   void checkForKeep(int HashLine, OptionalFileEntryRef IncludedFile) {
263     if (!InMainFile || KeepStack.empty())
264       return;
265     KeepPragma &Top = KeepStack.back();
266     // Check if the current include is covered by a keep pragma.
267     if (IncludedFile && ((Top.Block && HashLine > Top.SeenAtLine) ||
268                          Top.SeenAtLine == HashLine)) {
269       Out->ShouldKeep.insert(IncludedFile->getUniqueID());
270     }
271 
272     if (!Top.Block)
273       KeepStack.pop_back(); // Pop immediately for single-line keep pragma.
274   }
275 
276   // Consider marking H as the "associated header" of the main file.
277   //
278   // Our heuristic:
279   // - it must be the first #include in the main file
280   // - it must have the same name stem as the main file (foo.h and foo.cpp)
281   // (IWYU pragma: associated is also supported, just not by this function).
282   //
283   // We consider the associated header as if it had a keep pragma.
284   // (Unlike IWYU, we don't treat #includes inside the associated header as if
285   // they were written in the main file.)
checkForDeducedAssociated(std::optional<Header> H)286   void checkForDeducedAssociated(std::optional<Header> H) {
287     namespace path = llvm::sys::path;
288     if (!InMainFile || SeenAssociatedCandidate)
289       return;
290     SeenAssociatedCandidate = true; // Only the first #include is our candidate.
291     if (!H || H->kind() != Header::Physical)
292       return;
293     if (path::stem(H->physical().getName(), path::Style::posix) == MainFileStem)
294       Out->ShouldKeep.insert(H->physical().getUniqueID());
295   }
296 
HandleComment(Preprocessor & PP,SourceRange Range)297   bool HandleComment(Preprocessor &PP, SourceRange Range) override {
298     auto &SM = PP.getSourceManager();
299     auto Pragma =
300         tooling::parseIWYUPragma(SM.getCharacterData(Range.getBegin()));
301     if (!Pragma)
302       return false;
303 
304     auto [CommentFID, CommentOffset] = SM.getDecomposedLoc(Range.getBegin());
305     int CommentLine = SM.getLineNumber(CommentFID, CommentOffset);
306 
307     if (InMainFile) {
308       if (Pragma->starts_with("keep") ||
309           // Limited support for associated headers: never consider unused.
310           Pragma->starts_with("associated")) {
311         KeepStack.push_back({CommentLine, false});
312       } else if (Pragma->starts_with("begin_keep")) {
313         KeepStack.push_back({CommentLine, true});
314       } else if (Pragma->starts_with("end_keep") && !KeepStack.empty()) {
315         assert(KeepStack.back().Block);
316         KeepStack.pop_back();
317       }
318     }
319 
320     auto FE = SM.getFileEntryRefForID(CommentFID);
321     if (!FE) {
322       // This can only happen when the buffer was registered virtually into
323       // SourceManager and FileManager has no idea about it. In such a scenario,
324       // that file cannot be discovered by HeaderSearch, therefore no "explicit"
325       // includes for that file.
326       return false;
327     }
328     auto CommentUID = FE->getUniqueID();
329     if (Pragma->consume_front("private")) {
330       StringRef PublicHeader;
331       if (Pragma->consume_front(", include ")) {
332         // We always insert using the spelling from the pragma.
333         PublicHeader =
334             save(Pragma->starts_with("<") || Pragma->starts_with("\"")
335                      ? (*Pragma)
336                      : ("\"" + *Pragma + "\"").str());
337       }
338       Out->IWYUPublic.insert({CommentUID, PublicHeader});
339       return false;
340     }
341     if (Pragma->consume_front("always_keep")) {
342       Out->ShouldKeep.insert(CommentUID);
343       return false;
344     }
345     auto Filename = FE->getName();
346     // Record export pragma.
347     if (Pragma->starts_with("export")) {
348       ExportStack.push_back({CommentLine, CommentFID, save(Filename), false});
349     } else if (Pragma->starts_with("begin_exports")) {
350       ExportStack.push_back({CommentLine, CommentFID, save(Filename), true});
351     } else if (Pragma->starts_with("end_exports")) {
352       // FIXME: be robust on unmatching cases. We should only pop the stack if
353       // the begin_exports and end_exports is in the same file.
354       if (!ExportStack.empty()) {
355         assert(ExportStack.back().Block);
356         ExportStack.pop_back();
357       }
358     }
359     return false;
360   }
361 
362 private:
save(llvm::StringRef S)363   StringRef save(llvm::StringRef S) { return UniqueStrings.save(S); }
364 
365   bool InMainFile = false;
366   const SourceManager &SM;
367   const HeaderSearch &HeaderInfo;
368   PragmaIncludes *Out;
369   std::shared_ptr<llvm::BumpPtrAllocator> Arena;
370   /// Intern table for strings. Contents are on the arena.
371   llvm::StringSaver UniqueStrings;
372   // Used when deducing associated header.
373   llvm::StringRef MainFileStem;
374   bool SeenAssociatedCandidate = false;
375 
376   struct ExportPragma {
377     // The line number where we saw the begin_exports or export pragma.
378     int SeenAtLine = 0; // 1-based line number.
379     // The file where we saw the pragma.
380     FileID SeenAtFile;
381     // Name (per FileEntry::getName()) of the file SeenAtFile.
382     StringRef Path;
383     // true if it is a block begin/end_exports pragma; false if it is a
384     // single-line export pragma.
385     bool Block = false;
386   };
387   // A stack for tracking all open begin_exports or single-line export.
388   std::vector<ExportPragma> ExportStack;
389 
390   struct KeepPragma {
391     // The line number where we saw the begin_keep or keep pragma.
392     int SeenAtLine = 0; // 1-based line number.
393     // true if it is a block begin/end_keep pragma; false if it is a
394     // single-line keep pragma.
395     bool Block = false;
396   };
397   // A stack for tracking all open begin_keep pragmas or single-line keeps.
398   std::vector<KeepPragma> KeepStack;
399 };
400 
record(const CompilerInstance & CI)401 void PragmaIncludes::record(const CompilerInstance &CI) {
402   auto Record = std::make_unique<RecordPragma>(CI, this);
403   CI.getPreprocessor().addCommentHandler(Record.get());
404   CI.getPreprocessor().addPPCallbacks(std::move(Record));
405 }
406 
record(Preprocessor & P)407 void PragmaIncludes::record(Preprocessor &P) {
408   auto Record = std::make_unique<RecordPragma>(P, this);
409   P.addCommentHandler(Record.get());
410   P.addPPCallbacks(std::move(Record));
411 }
412 
getPublic(const FileEntry * F) const413 llvm::StringRef PragmaIncludes::getPublic(const FileEntry *F) const {
414   auto It = IWYUPublic.find(F->getUniqueID());
415   if (It == IWYUPublic.end())
416     return "";
417   return It->getSecond();
418 }
419 
420 static llvm::SmallVector<FileEntryRef>
toFileEntries(llvm::ArrayRef<StringRef> FileNames,FileManager & FM)421 toFileEntries(llvm::ArrayRef<StringRef> FileNames, FileManager &FM) {
422   llvm::SmallVector<FileEntryRef> Results;
423 
424   for (auto FName : FileNames) {
425     // FIMXE: log the failing cases?
426     if (auto FE = FM.getOptionalFileRef(FName))
427       Results.push_back(*FE);
428   }
429   return Results;
430 }
431 llvm::SmallVector<FileEntryRef>
getExporters(const FileEntry * File,FileManager & FM) const432 PragmaIncludes::getExporters(const FileEntry *File, FileManager &FM) const {
433   auto It = IWYUExportBy.find(File->getUniqueID());
434   if (It == IWYUExportBy.end())
435     return {};
436 
437   return toFileEntries(It->getSecond(), FM);
438 }
439 llvm::SmallVector<FileEntryRef>
getExporters(tooling::stdlib::Header StdHeader,FileManager & FM) const440 PragmaIncludes::getExporters(tooling::stdlib::Header StdHeader,
441                              FileManager &FM) const {
442   auto It = StdIWYUExportBy.find(StdHeader);
443   if (It == StdIWYUExportBy.end())
444     return {};
445   return toFileEntries(It->getSecond(), FM);
446 }
447 
isSelfContained(const FileEntry * FE) const448 bool PragmaIncludes::isSelfContained(const FileEntry *FE) const {
449   return !NonSelfContainedFiles.contains(FE->getUniqueID());
450 }
451 
isPrivate(const FileEntry * FE) const452 bool PragmaIncludes::isPrivate(const FileEntry *FE) const {
453   return IWYUPublic.contains(FE->getUniqueID());
454 }
455 
shouldKeep(const FileEntry * FE) const456 bool PragmaIncludes::shouldKeep(const FileEntry *FE) const {
457   return ShouldKeep.contains(FE->getUniqueID()) ||
458          NonSelfContainedFiles.contains(FE->getUniqueID());
459 }
460 
461 namespace {
isImplicitTemplateSpecialization(const Decl * D)462 template <typename T> bool isImplicitTemplateSpecialization(const Decl *D) {
463   if (const auto *TD = dyn_cast<T>(D))
464     return TD->getTemplateSpecializationKind() == TSK_ImplicitInstantiation;
465   return false;
466 }
467 } // namespace
468 
record()469 std::unique_ptr<ASTConsumer> RecordedAST::record() {
470   class Recorder : public ASTConsumer {
471     RecordedAST *Out;
472 
473   public:
474     Recorder(RecordedAST *Out) : Out(Out) {}
475     void Initialize(ASTContext &Ctx) override { Out->Ctx = &Ctx; }
476     bool HandleTopLevelDecl(DeclGroupRef DG) override {
477       const auto &SM = Out->Ctx->getSourceManager();
478       for (Decl *D : DG) {
479         if (!SM.isWrittenInMainFile(SM.getExpansionLoc(D->getLocation())))
480           continue;
481         if (isImplicitTemplateSpecialization<FunctionDecl>(D) ||
482             isImplicitTemplateSpecialization<CXXRecordDecl>(D) ||
483             isImplicitTemplateSpecialization<VarDecl>(D))
484           continue;
485         // FIXME: Filter out certain Obj-C as well.
486         Out->Roots.push_back(D);
487       }
488       return ASTConsumer::HandleTopLevelDecl(DG);
489     }
490   };
491 
492   return std::make_unique<Recorder>(this);
493 }
494 
record(const Preprocessor & PP)495 std::unique_ptr<PPCallbacks> RecordedPP::record(const Preprocessor &PP) {
496   return std::make_unique<PPRecorder>(*this, PP);
497 }
498 
499 } // namespace clang::include_cleaner
500