1 //===--- IncludeCleaner.cpp - standalone tool for include analysis --------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "AnalysisInternal.h" 10 #include "clang-include-cleaner/Analysis.h" 11 #include "clang-include-cleaner/Record.h" 12 #include "clang/Frontend/CompilerInstance.h" 13 #include "clang/Frontend/FrontendAction.h" 14 #include "clang/Lex/Preprocessor.h" 15 #include "clang/Tooling/CommonOptionsParser.h" 16 #include "clang/Tooling/Tooling.h" 17 #include "llvm/ADT/STLFunctionalExtras.h" 18 #include "llvm/ADT/SmallVector.h" 19 #include "llvm/ADT/StringMap.h" 20 #include "llvm/ADT/StringRef.h" 21 #include "llvm/Support/CommandLine.h" 22 #include "llvm/Support/FormatVariadic.h" 23 #include "llvm/Support/Regex.h" 24 #include "llvm/Support/Signals.h" 25 #include "llvm/Support/raw_ostream.h" 26 #include <functional> 27 #include <memory> 28 #include <string> 29 #include <utility> 30 #include <vector> 31 32 namespace clang { 33 namespace include_cleaner { 34 namespace { 35 namespace cl = llvm::cl; 36 37 llvm::StringRef Overview = llvm::StringLiteral(R"( 38 clang-include-cleaner analyzes the #include directives in source code. 39 40 It suggests removing headers that the code is not using. 41 It suggests inserting headers that the code relies on, but does not include. 42 These changes make the file more self-contained and (at scale) make the codebase 43 easier to reason about and modify. 44 45 The tool operates on *working* source code. This means it can suggest including 46 headers that are only indirectly included, but cannot suggest those that are 47 missing entirely. (clang-include-fixer can do this). 48 )") 49 .trim(); 50 51 cl::OptionCategory IncludeCleaner("clang-include-cleaner"); 52 53 cl::opt<std::string> HTMLReportPath{ 54 "html", 55 cl::desc("Specify an output filename for an HTML report. " 56 "This describes both recommendations and reasons for changes."), 57 cl::cat(IncludeCleaner), 58 }; 59 60 cl::opt<std::string> OnlyHeaders{ 61 "only-headers", 62 cl::desc("A comma-separated list of regexes to match against suffix of a " 63 "header. Only headers that match will be analyzed."), 64 cl::init(""), 65 cl::cat(IncludeCleaner), 66 }; 67 68 cl::opt<std::string> IgnoreHeaders{ 69 "ignore-headers", 70 cl::desc("A comma-separated list of regexes to match against suffix of a " 71 "header, and disable analysis if matched."), 72 cl::init(""), 73 cl::cat(IncludeCleaner), 74 }; 75 76 enum class PrintStyle { Changes, Final }; 77 cl::opt<PrintStyle> Print{ 78 "print", 79 cl::values( 80 clEnumValN(PrintStyle::Changes, "changes", "Print symbolic changes"), 81 clEnumValN(PrintStyle::Final, "", "Print final code")), 82 cl::ValueOptional, 83 cl::init(PrintStyle::Final), 84 cl::desc("Print the list of headers to insert and remove"), 85 cl::cat(IncludeCleaner), 86 }; 87 88 cl::opt<bool> Edit{ 89 "edit", 90 cl::desc("Apply edits to analyzed source files"), 91 cl::cat(IncludeCleaner), 92 }; 93 94 cl::opt<bool> Insert{ 95 "insert", 96 cl::desc("Allow header insertions"), 97 cl::init(true), 98 cl::cat(IncludeCleaner), 99 }; 100 cl::opt<bool> Remove{ 101 "remove", 102 cl::desc("Allow header removals"), 103 cl::init(true), 104 cl::cat(IncludeCleaner), 105 }; 106 107 std::atomic<unsigned> Errors = ATOMIC_VAR_INIT(0); 108 109 format::FormatStyle getStyle(llvm::StringRef Filename) { 110 auto S = format::getStyle(format::DefaultFormatStyle, Filename, 111 format::DefaultFallbackStyle); 112 if (!S || !S->isCpp()) { 113 consumeError(S.takeError()); 114 return format::getLLVMStyle(); 115 } 116 return std::move(*S); 117 } 118 119 class Action : public clang::ASTFrontendAction { 120 public: 121 Action(llvm::function_ref<bool(llvm::StringRef)> HeaderFilter, 122 llvm::StringMap<std::string> &EditedFiles) 123 : HeaderFilter(HeaderFilter), EditedFiles(EditedFiles) {} 124 125 private: 126 RecordedAST AST; 127 RecordedPP PP; 128 PragmaIncludes PI; 129 llvm::function_ref<bool(llvm::StringRef)> HeaderFilter; 130 llvm::StringMap<std::string> &EditedFiles; 131 132 bool BeginInvocation(CompilerInstance &CI) override { 133 // We only perform include-cleaner analysis. So we disable diagnostics that 134 // won't affect our analysis to make the tool more robust against 135 // in-development code. 136 CI.getLangOpts().ModulesDeclUse = false; 137 CI.getLangOpts().ModulesStrictDeclUse = false; 138 return true; 139 } 140 141 void ExecuteAction() override { 142 const auto &CI = getCompilerInstance(); 143 144 // Disable all warnings when running include-cleaner, as we are only 145 // interested in include-cleaner related findings. This makes the tool both 146 // more resilient around in-development code, and possibly faster as we 147 // skip some extra analysis. 148 auto &Diags = CI.getDiagnostics(); 149 Diags.setEnableAllWarnings(false); 150 Diags.setSeverityForAll(clang::diag::Flavor::WarningOrError, 151 clang::diag::Severity::Ignored); 152 auto &P = CI.getPreprocessor(); 153 P.addPPCallbacks(PP.record(P)); 154 PI.record(getCompilerInstance()); 155 ASTFrontendAction::ExecuteAction(); 156 } 157 158 std::unique_ptr<ASTConsumer> CreateASTConsumer(CompilerInstance &CI, 159 StringRef File) override { 160 return AST.record(); 161 } 162 163 void EndSourceFile() override { 164 const auto &SM = getCompilerInstance().getSourceManager(); 165 if (SM.getDiagnostics().hasUncompilableErrorOccurred()) { 166 llvm::errs() 167 << "Skipping file " << getCurrentFile() 168 << " due to compiler errors. clang-include-cleaner expects to " 169 "work on compilable source code.\n"; 170 return; 171 } 172 173 if (!HTMLReportPath.empty()) 174 writeHTML(); 175 176 // Source File's path of compiler invocation, converted to absolute path. 177 llvm::SmallString<256> AbsPath( 178 SM.getFileEntryRefForID(SM.getMainFileID())->getName()); 179 assert(!AbsPath.empty() && "Main file path not known?"); 180 SM.getFileManager().makeAbsolutePath(AbsPath); 181 llvm::StringRef Code = SM.getBufferData(SM.getMainFileID()); 182 183 auto Results = 184 analyze(AST.Roots, PP.MacroReferences, PP.Includes, &PI, 185 getCompilerInstance().getPreprocessor(), HeaderFilter); 186 if (!Insert) 187 Results.Missing.clear(); 188 if (!Remove) 189 Results.Unused.clear(); 190 std::string Final = fixIncludes(Results, AbsPath, Code, getStyle(AbsPath)); 191 192 if (Print.getNumOccurrences()) { 193 switch (Print) { 194 case PrintStyle::Changes: 195 for (const Include *I : Results.Unused) 196 llvm::outs() << "- " << I->quote() << " @Line:" << I->Line << "\n"; 197 for (const auto &[I, _] : Results.Missing) 198 llvm::outs() << "+ " << I << "\n"; 199 break; 200 case PrintStyle::Final: 201 llvm::outs() << Final; 202 break; 203 } 204 } 205 206 if (!Results.Missing.empty() || !Results.Unused.empty()) 207 EditedFiles.try_emplace(AbsPath, Final); 208 } 209 210 void writeHTML() { 211 std::error_code EC; 212 llvm::raw_fd_ostream OS(HTMLReportPath, EC); 213 if (EC) { 214 llvm::errs() << "Unable to write HTML report to " << HTMLReportPath 215 << ": " << EC.message() << "\n"; 216 ++Errors; 217 return; 218 } 219 writeHTMLReport(AST.Ctx->getSourceManager().getMainFileID(), PP.Includes, 220 AST.Roots, PP.MacroReferences, *AST.Ctx, 221 getCompilerInstance().getPreprocessor(), &PI, OS); 222 } 223 }; 224 class ActionFactory : public tooling::FrontendActionFactory { 225 public: 226 ActionFactory(llvm::function_ref<bool(llvm::StringRef)> HeaderFilter) 227 : HeaderFilter(HeaderFilter) {} 228 229 std::unique_ptr<clang::FrontendAction> create() override { 230 return std::make_unique<Action>(HeaderFilter, EditedFiles); 231 } 232 233 const llvm::StringMap<std::string> &editedFiles() const { 234 return EditedFiles; 235 } 236 237 private: 238 llvm::function_ref<bool(llvm::StringRef)> HeaderFilter; 239 // Map from file name to final code with the include edits applied. 240 llvm::StringMap<std::string> EditedFiles; 241 }; 242 243 // Compiles a regex list into a function that return true if any match a header. 244 // Prints and returns nullptr if any regexes are invalid. 245 std::function<bool(llvm::StringRef)> matchesAny(llvm::StringRef RegexFlag) { 246 auto FilterRegs = std::make_shared<std::vector<llvm::Regex>>(); 247 llvm::SmallVector<llvm::StringRef> Headers; 248 RegexFlag.split(Headers, ',', -1, /*KeepEmpty=*/false); 249 for (auto HeaderPattern : Headers) { 250 std::string AnchoredPattern = "(" + HeaderPattern.str() + ")$"; 251 llvm::Regex CompiledRegex(AnchoredPattern); 252 std::string RegexError; 253 if (!CompiledRegex.isValid(RegexError)) { 254 llvm::errs() << llvm::formatv("Invalid regular expression '{0}': {1}\n", 255 HeaderPattern, RegexError); 256 return nullptr; 257 } 258 FilterRegs->push_back(std::move(CompiledRegex)); 259 } 260 return [FilterRegs](llvm::StringRef Path) { 261 for (const auto &F : *FilterRegs) { 262 if (F.match(Path)) 263 return true; 264 } 265 return false; 266 }; 267 } 268 269 std::function<bool(llvm::StringRef)> headerFilter() { 270 auto OnlyMatches = matchesAny(OnlyHeaders); 271 auto IgnoreMatches = matchesAny(IgnoreHeaders); 272 if (!OnlyMatches || !IgnoreMatches) 273 return nullptr; 274 275 return [OnlyMatches, IgnoreMatches](llvm::StringRef Header) { 276 if (!OnlyHeaders.empty() && !OnlyMatches(Header)) 277 return true; 278 if (!IgnoreHeaders.empty() && IgnoreMatches(Header)) 279 return true; 280 return false; 281 }; 282 } 283 284 // Maps absolute path of each files of each compilation commands to the 285 // absolute path of the input file. 286 llvm::Expected<std::map<std::string, std::string>> 287 mapInputsToAbsPaths(clang::tooling::CompilationDatabase &CDB, 288 llvm::IntrusiveRefCntPtr<llvm::vfs::FileSystem> VFS, 289 const std::vector<std::string> &Inputs) { 290 std::map<std::string, std::string> CDBToAbsPaths; 291 // Factory.editedFiles()` will contain the final code, along with the 292 // path given in the compilation database. That path can be 293 // absolute or relative, and if it is relative, it is relative to the 294 // "Directory" field in the compilation database. We need to make it 295 // absolute to write the final code to the correct path. 296 for (auto &Source : Inputs) { 297 llvm::SmallString<256> AbsPath(Source); 298 if (auto Err = VFS->makeAbsolute(AbsPath)) { 299 llvm::errs() << "Failed to get absolute path for " << Source << " : " 300 << Err.message() << '\n'; 301 return llvm::errorCodeToError(Err); 302 } 303 std::vector<clang::tooling::CompileCommand> Cmds = 304 CDB.getCompileCommands(AbsPath); 305 if (Cmds.empty()) { 306 // It should be found in the compilation database, even user didn't 307 // specify the compilation database, the `FixedCompilationDatabase` will 308 // create an entry from the arguments. So it is an error if we can't 309 // find the compile commands. 310 std::string ErrorMsg = 311 llvm::formatv("No compile commands found for {0}", AbsPath).str(); 312 llvm::errs() << ErrorMsg << '\n'; 313 return llvm::make_error<llvm::StringError>( 314 ErrorMsg, llvm::inconvertibleErrorCode()); 315 } 316 for (const auto &Cmd : Cmds) { 317 llvm::SmallString<256> CDBPath(Cmd.Filename); 318 std::string Directory(Cmd.Directory); 319 llvm::sys::fs::make_absolute(Cmd.Directory, CDBPath); 320 CDBToAbsPaths[std::string(CDBPath)] = std::string(AbsPath); 321 } 322 } 323 return CDBToAbsPaths; 324 } 325 326 } // namespace 327 } // namespace include_cleaner 328 } // namespace clang 329 330 int main(int argc, const char **argv) { 331 using namespace clang::include_cleaner; 332 333 llvm::sys::PrintStackTraceOnErrorSignal(argv[0]); 334 auto OptionsParser = 335 clang::tooling::CommonOptionsParser::create(argc, argv, IncludeCleaner); 336 if (!OptionsParser) { 337 llvm::errs() << toString(OptionsParser.takeError()); 338 return 1; 339 } 340 341 if (OptionsParser->getSourcePathList().size() != 1) { 342 std::vector<cl::Option *> IncompatibleFlags = {&HTMLReportPath, &Print}; 343 for (const auto *Flag : IncompatibleFlags) { 344 if (Flag->getNumOccurrences()) { 345 llvm::errs() << "-" << Flag->ArgStr << " requires a single input file"; 346 return 1; 347 } 348 } 349 } 350 351 auto VFS = llvm::vfs::getRealFileSystem(); 352 auto &CDB = OptionsParser->getCompilations(); 353 // CDBToAbsPaths is a map from the path in the compilation database to the 354 // writable absolute path of the file. 355 auto CDBToAbsPaths = 356 mapInputsToAbsPaths(CDB, VFS, OptionsParser->getSourcePathList()); 357 if (!CDBToAbsPaths) 358 return 1; 359 360 clang::tooling::ClangTool Tool(CDB, OptionsParser->getSourcePathList()); 361 362 auto HeaderFilter = headerFilter(); 363 if (!HeaderFilter) 364 return 1; // error already reported. 365 ActionFactory Factory(HeaderFilter); 366 auto ErrorCode = Tool.run(&Factory); 367 if (Edit) { 368 for (const auto &NameAndContent : Factory.editedFiles()) { 369 llvm::StringRef FileName = NameAndContent.first(); 370 if (auto It = CDBToAbsPaths->find(FileName.str()); 371 It != CDBToAbsPaths->end()) 372 FileName = It->second; 373 374 const std::string &FinalCode = NameAndContent.second; 375 if (auto Err = llvm::writeToOutput( 376 FileName, [&](llvm::raw_ostream &OS) -> llvm::Error { 377 OS << FinalCode; 378 return llvm::Error::success(); 379 })) { 380 llvm::errs() << "Failed to apply edits to " << FileName << ": " 381 << toString(std::move(Err)) << "\n"; 382 ++Errors; 383 } 384 } 385 } 386 return ErrorCode || Errors != 0; 387 } 388