xref: /llvm-project/clang-tools-extra/include-cleaner/tool/IncludeCleaner.cpp (revision ec6c3448d31056db5d63d7aed3e9f207edb49321)
1 //===--- IncludeCleaner.cpp - standalone tool for include analysis --------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "AnalysisInternal.h"
10 #include "clang-include-cleaner/Analysis.h"
11 #include "clang-include-cleaner/Record.h"
12 #include "clang/Frontend/CompilerInstance.h"
13 #include "clang/Frontend/FrontendAction.h"
14 #include "clang/Lex/Preprocessor.h"
15 #include "clang/Tooling/CommonOptionsParser.h"
16 #include "clang/Tooling/Tooling.h"
17 #include "llvm/ADT/STLFunctionalExtras.h"
18 #include "llvm/ADT/SmallVector.h"
19 #include "llvm/ADT/StringMap.h"
20 #include "llvm/ADT/StringRef.h"
21 #include "llvm/Support/CommandLine.h"
22 #include "llvm/Support/FormatVariadic.h"
23 #include "llvm/Support/Regex.h"
24 #include "llvm/Support/Signals.h"
25 #include "llvm/Support/raw_ostream.h"
26 #include <functional>
27 #include <memory>
28 #include <string>
29 #include <utility>
30 #include <vector>
31 
32 namespace clang {
33 namespace include_cleaner {
34 namespace {
35 namespace cl = llvm::cl;
36 
37 llvm::StringRef Overview = llvm::StringLiteral(R"(
38 clang-include-cleaner analyzes the #include directives in source code.
39 
40 It suggests removing headers that the code is not using.
41 It suggests inserting headers that the code relies on, but does not include.
42 These changes make the file more self-contained and (at scale) make the codebase
43 easier to reason about and modify.
44 
45 The tool operates on *working* source code. This means it can suggest including
46 headers that are only indirectly included, but cannot suggest those that are
47 missing entirely. (clang-include-fixer can do this).
48 )")
49                                .trim();
50 
51 cl::OptionCategory IncludeCleaner("clang-include-cleaner");
52 
53 cl::opt<std::string> HTMLReportPath{
54     "html",
55     cl::desc("Specify an output filename for an HTML report. "
56              "This describes both recommendations and reasons for changes."),
57     cl::cat(IncludeCleaner),
58 };
59 
60 cl::opt<std::string> OnlyHeaders{
61     "only-headers",
62     cl::desc("A comma-separated list of regexes to match against suffix of a "
63              "header. Only headers that match will be analyzed."),
64     cl::init(""),
65     cl::cat(IncludeCleaner),
66 };
67 
68 cl::opt<std::string> IgnoreHeaders{
69     "ignore-headers",
70     cl::desc("A comma-separated list of regexes to match against suffix of a "
71              "header, and disable analysis if matched."),
72     cl::init(""),
73     cl::cat(IncludeCleaner),
74 };
75 
76 enum class PrintStyle { Changes, Final };
77 cl::opt<PrintStyle> Print{
78     "print",
79     cl::values(
80         clEnumValN(PrintStyle::Changes, "changes", "Print symbolic changes"),
81         clEnumValN(PrintStyle::Final, "", "Print final code")),
82     cl::ValueOptional,
83     cl::init(PrintStyle::Final),
84     cl::desc("Print the list of headers to insert and remove"),
85     cl::cat(IncludeCleaner),
86 };
87 
88 cl::opt<bool> Edit{
89     "edit",
90     cl::desc("Apply edits to analyzed source files"),
91     cl::cat(IncludeCleaner),
92 };
93 
94 cl::opt<bool> Insert{
95     "insert",
96     cl::desc("Allow header insertions"),
97     cl::init(true),
98     cl::cat(IncludeCleaner),
99 };
100 cl::opt<bool> Remove{
101     "remove",
102     cl::desc("Allow header removals"),
103     cl::init(true),
104     cl::cat(IncludeCleaner),
105 };
106 
107 std::atomic<unsigned> Errors = ATOMIC_VAR_INIT(0);
108 
109 format::FormatStyle getStyle(llvm::StringRef Filename) {
110   auto S = format::getStyle(format::DefaultFormatStyle, Filename,
111                             format::DefaultFallbackStyle);
112   if (!S || !S->isCpp()) {
113     consumeError(S.takeError());
114     return format::getLLVMStyle();
115   }
116   return std::move(*S);
117 }
118 
119 class Action : public clang::ASTFrontendAction {
120 public:
121   Action(llvm::function_ref<bool(llvm::StringRef)> HeaderFilter,
122          llvm::StringMap<std::string> &EditedFiles)
123       : HeaderFilter(HeaderFilter), EditedFiles(EditedFiles) {}
124 
125 private:
126   RecordedAST AST;
127   RecordedPP PP;
128   PragmaIncludes PI;
129   llvm::function_ref<bool(llvm::StringRef)> HeaderFilter;
130   llvm::StringMap<std::string> &EditedFiles;
131 
132   bool BeginInvocation(CompilerInstance &CI) override {
133     // We only perform include-cleaner analysis. So we disable diagnostics that
134     // won't affect our analysis to make the tool more robust against
135     // in-development code.
136     CI.getLangOpts().ModulesDeclUse = false;
137     CI.getLangOpts().ModulesStrictDeclUse = false;
138     return true;
139   }
140 
141   void ExecuteAction() override {
142     const auto &CI = getCompilerInstance();
143 
144     // Disable all warnings when running include-cleaner, as we are only
145     // interested in include-cleaner related findings. This makes the tool both
146     // more resilient around in-development code, and possibly faster as we
147     // skip some extra analysis.
148     auto &Diags = CI.getDiagnostics();
149     Diags.setEnableAllWarnings(false);
150     Diags.setSeverityForAll(clang::diag::Flavor::WarningOrError,
151                             clang::diag::Severity::Ignored);
152     auto &P = CI.getPreprocessor();
153     P.addPPCallbacks(PP.record(P));
154     PI.record(getCompilerInstance());
155     ASTFrontendAction::ExecuteAction();
156   }
157 
158   std::unique_ptr<ASTConsumer> CreateASTConsumer(CompilerInstance &CI,
159                                                  StringRef File) override {
160     return AST.record();
161   }
162 
163   void EndSourceFile() override {
164     const auto &SM = getCompilerInstance().getSourceManager();
165     if (SM.getDiagnostics().hasUncompilableErrorOccurred()) {
166       llvm::errs()
167           << "Skipping file " << getCurrentFile()
168           << " due to compiler errors. clang-include-cleaner expects to "
169              "work on compilable source code.\n";
170       return;
171     }
172 
173     if (!HTMLReportPath.empty())
174       writeHTML();
175 
176     // Source File's path of compiler invocation, converted to absolute path.
177     llvm::SmallString<256> AbsPath(
178         SM.getFileEntryRefForID(SM.getMainFileID())->getName());
179     assert(!AbsPath.empty() && "Main file path not known?");
180     SM.getFileManager().makeAbsolutePath(AbsPath);
181     llvm::StringRef Code = SM.getBufferData(SM.getMainFileID());
182 
183     auto Results =
184         analyze(AST.Roots, PP.MacroReferences, PP.Includes, &PI,
185                 getCompilerInstance().getPreprocessor(), HeaderFilter);
186     if (!Insert)
187       Results.Missing.clear();
188     if (!Remove)
189       Results.Unused.clear();
190     std::string Final = fixIncludes(Results, AbsPath, Code, getStyle(AbsPath));
191 
192     if (Print.getNumOccurrences()) {
193       switch (Print) {
194       case PrintStyle::Changes:
195         for (const Include *I : Results.Unused)
196           llvm::outs() << "- " << I->quote() << " @Line:" << I->Line << "\n";
197         for (const auto &[I, _] : Results.Missing)
198           llvm::outs() << "+ " << I << "\n";
199         break;
200       case PrintStyle::Final:
201         llvm::outs() << Final;
202         break;
203       }
204     }
205 
206     if (!Results.Missing.empty() || !Results.Unused.empty())
207       EditedFiles.try_emplace(AbsPath, Final);
208   }
209 
210   void writeHTML() {
211     std::error_code EC;
212     llvm::raw_fd_ostream OS(HTMLReportPath, EC);
213     if (EC) {
214       llvm::errs() << "Unable to write HTML report to " << HTMLReportPath
215                    << ": " << EC.message() << "\n";
216       ++Errors;
217       return;
218     }
219     writeHTMLReport(AST.Ctx->getSourceManager().getMainFileID(), PP.Includes,
220                     AST.Roots, PP.MacroReferences, *AST.Ctx,
221                     getCompilerInstance().getPreprocessor(), &PI, OS);
222   }
223 };
224 class ActionFactory : public tooling::FrontendActionFactory {
225 public:
226   ActionFactory(llvm::function_ref<bool(llvm::StringRef)> HeaderFilter)
227       : HeaderFilter(HeaderFilter) {}
228 
229   std::unique_ptr<clang::FrontendAction> create() override {
230     return std::make_unique<Action>(HeaderFilter, EditedFiles);
231   }
232 
233   const llvm::StringMap<std::string> &editedFiles() const {
234     return EditedFiles;
235   }
236 
237 private:
238   llvm::function_ref<bool(llvm::StringRef)> HeaderFilter;
239   // Map from file name to final code with the include edits applied.
240   llvm::StringMap<std::string> EditedFiles;
241 };
242 
243 // Compiles a regex list into a function that return true if any match a header.
244 // Prints and returns nullptr if any regexes are invalid.
245 std::function<bool(llvm::StringRef)> matchesAny(llvm::StringRef RegexFlag) {
246   auto FilterRegs = std::make_shared<std::vector<llvm::Regex>>();
247   llvm::SmallVector<llvm::StringRef> Headers;
248   RegexFlag.split(Headers, ',', -1, /*KeepEmpty=*/false);
249   for (auto HeaderPattern : Headers) {
250     std::string AnchoredPattern = "(" + HeaderPattern.str() + ")$";
251     llvm::Regex CompiledRegex(AnchoredPattern);
252     std::string RegexError;
253     if (!CompiledRegex.isValid(RegexError)) {
254       llvm::errs() << llvm::formatv("Invalid regular expression '{0}': {1}\n",
255                                     HeaderPattern, RegexError);
256       return nullptr;
257     }
258     FilterRegs->push_back(std::move(CompiledRegex));
259   }
260   return [FilterRegs](llvm::StringRef Path) {
261     for (const auto &F : *FilterRegs) {
262       if (F.match(Path))
263         return true;
264     }
265     return false;
266   };
267 }
268 
269 std::function<bool(llvm::StringRef)> headerFilter() {
270   auto OnlyMatches = matchesAny(OnlyHeaders);
271   auto IgnoreMatches = matchesAny(IgnoreHeaders);
272   if (!OnlyMatches || !IgnoreMatches)
273     return nullptr;
274 
275   return [OnlyMatches, IgnoreMatches](llvm::StringRef Header) {
276     if (!OnlyHeaders.empty() && !OnlyMatches(Header))
277       return true;
278     if (!IgnoreHeaders.empty() && IgnoreMatches(Header))
279       return true;
280     return false;
281   };
282 }
283 
284 // Maps absolute path of each files of each compilation commands to the
285 // absolute path of the input file.
286 llvm::Expected<std::map<std::string, std::string>>
287 mapInputsToAbsPaths(clang::tooling::CompilationDatabase &CDB,
288                     llvm::IntrusiveRefCntPtr<llvm::vfs::FileSystem> VFS,
289                     const std::vector<std::string> &Inputs) {
290   std::map<std::string, std::string> CDBToAbsPaths;
291   // Factory.editedFiles()` will contain the final code, along with the
292   // path given in the compilation database. That path can be
293   // absolute or relative, and if it is relative, it is relative to the
294   // "Directory" field in the compilation database. We need to make it
295   // absolute to write the final code to the correct path.
296   for (auto &Source : Inputs) {
297     llvm::SmallString<256> AbsPath(Source);
298     if (auto Err = VFS->makeAbsolute(AbsPath)) {
299       llvm::errs() << "Failed to get absolute path for " << Source << " : "
300                    << Err.message() << '\n';
301       return llvm::errorCodeToError(Err);
302     }
303     std::vector<clang::tooling::CompileCommand> Cmds =
304         CDB.getCompileCommands(AbsPath);
305     if (Cmds.empty()) {
306       // It should be found in the compilation database, even user didn't
307       // specify the compilation database, the `FixedCompilationDatabase` will
308       // create an entry from the arguments. So it is an error if we can't
309       // find the compile commands.
310       std::string ErrorMsg =
311           llvm::formatv("No compile commands found for {0}", AbsPath).str();
312       llvm::errs() << ErrorMsg << '\n';
313       return llvm::make_error<llvm::StringError>(
314           ErrorMsg, llvm::inconvertibleErrorCode());
315     }
316     for (const auto &Cmd : Cmds) {
317       llvm::SmallString<256> CDBPath(Cmd.Filename);
318       std::string Directory(Cmd.Directory);
319       llvm::sys::fs::make_absolute(Cmd.Directory, CDBPath);
320       CDBToAbsPaths[std::string(CDBPath)] = std::string(AbsPath);
321     }
322   }
323   return CDBToAbsPaths;
324 }
325 
326 } // namespace
327 } // namespace include_cleaner
328 } // namespace clang
329 
330 int main(int argc, const char **argv) {
331   using namespace clang::include_cleaner;
332 
333   llvm::sys::PrintStackTraceOnErrorSignal(argv[0]);
334   auto OptionsParser =
335       clang::tooling::CommonOptionsParser::create(argc, argv, IncludeCleaner);
336   if (!OptionsParser) {
337     llvm::errs() << toString(OptionsParser.takeError());
338     return 1;
339   }
340 
341   if (OptionsParser->getSourcePathList().size() != 1) {
342     std::vector<cl::Option *> IncompatibleFlags = {&HTMLReportPath, &Print};
343     for (const auto *Flag : IncompatibleFlags) {
344       if (Flag->getNumOccurrences()) {
345         llvm::errs() << "-" << Flag->ArgStr << " requires a single input file";
346         return 1;
347       }
348     }
349   }
350 
351   auto VFS = llvm::vfs::getRealFileSystem();
352   auto &CDB = OptionsParser->getCompilations();
353   // CDBToAbsPaths is a map from the path in the compilation database to the
354   // writable absolute path of the file.
355   auto CDBToAbsPaths =
356       mapInputsToAbsPaths(CDB, VFS, OptionsParser->getSourcePathList());
357   if (!CDBToAbsPaths)
358     return 1;
359 
360   clang::tooling::ClangTool Tool(CDB, OptionsParser->getSourcePathList());
361 
362   auto HeaderFilter = headerFilter();
363   if (!HeaderFilter)
364     return 1; // error already reported.
365   ActionFactory Factory(HeaderFilter);
366   auto ErrorCode = Tool.run(&Factory);
367   if (Edit) {
368     for (const auto &NameAndContent : Factory.editedFiles()) {
369       llvm::StringRef FileName = NameAndContent.first();
370       if (auto It = CDBToAbsPaths->find(FileName.str());
371           It != CDBToAbsPaths->end())
372         FileName = It->second;
373 
374       const std::string &FinalCode = NameAndContent.second;
375       if (auto Err = llvm::writeToOutput(
376               FileName, [&](llvm::raw_ostream &OS) -> llvm::Error {
377                 OS << FinalCode;
378                 return llvm::Error::success();
379               })) {
380         llvm::errs() << "Failed to apply edits to " << FileName << ": "
381                      << toString(std::move(Err)) << "\n";
382         ++Errors;
383       }
384     }
385   }
386   return ErrorCode || Errors != 0;
387 }
388