1 //===--- IncludeCleaner.cpp - Unused/Missing Headers Analysis ---*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "IncludeCleaner.h" 10 #include "Diagnostics.h" 11 #include "Headers.h" 12 #include "ParsedAST.h" 13 #include "Preamble.h" 14 #include "Protocol.h" 15 #include "SourceCode.h" 16 #include "clang-include-cleaner/Analysis.h" 17 #include "clang-include-cleaner/IncludeSpeller.h" 18 #include "clang-include-cleaner/Record.h" 19 #include "clang-include-cleaner/Types.h" 20 #include "support/Logger.h" 21 #include "support/Path.h" 22 #include "support/Trace.h" 23 #include "clang/AST/ASTContext.h" 24 #include "clang/Basic/Diagnostic.h" 25 #include "clang/Basic/LLVM.h" 26 #include "clang/Basic/SourceLocation.h" 27 #include "clang/Basic/SourceManager.h" 28 #include "clang/Format/Format.h" 29 #include "clang/Lex/DirectoryLookup.h" 30 #include "clang/Lex/HeaderSearch.h" 31 #include "clang/Lex/Preprocessor.h" 32 #include "clang/Tooling/Core/Replacement.h" 33 #include "clang/Tooling/Inclusions/HeaderIncludes.h" 34 #include "clang/Tooling/Inclusions/StandardLibrary.h" 35 #include "clang/Tooling/Syntax/Tokens.h" 36 #include "llvm/ADT/ArrayRef.h" 37 #include "llvm/ADT/DenseSet.h" 38 #include "llvm/ADT/GenericUniformityImpl.h" 39 #include "llvm/ADT/STLExtras.h" 40 #include "llvm/ADT/SmallString.h" 41 #include "llvm/ADT/SmallVector.h" 42 #include "llvm/ADT/StringRef.h" 43 #include "llvm/Support/Error.h" 44 #include "llvm/Support/ErrorHandling.h" 45 #include "llvm/Support/FormatVariadic.h" 46 #include "llvm/Support/Path.h" 47 #include "llvm/Support/Regex.h" 48 #include <cassert> 49 #include <iterator> 50 #include <map> 51 #include <optional> 52 #include <string> 53 #include <utility> 54 #include <vector> 55 56 namespace clang::clangd { 57 namespace { 58 59 bool isIgnored(llvm::StringRef HeaderPath, HeaderFilter IgnoreHeaders) { 60 // Convert the path to Unix slashes and try to match against the filter. 61 llvm::SmallString<64> NormalizedPath(HeaderPath); 62 llvm::sys::path::native(NormalizedPath, llvm::sys::path::Style::posix); 63 for (auto &Filter : IgnoreHeaders) { 64 if (Filter(NormalizedPath)) 65 return true; 66 } 67 return false; 68 } 69 70 bool mayConsiderUnused(const Inclusion &Inc, ParsedAST &AST, 71 const include_cleaner::PragmaIncludes *PI, 72 bool AnalyzeAngledIncludes) { 73 assert(Inc.HeaderID); 74 auto HID = static_cast<IncludeStructure::HeaderID>(*Inc.HeaderID); 75 auto FE = AST.getSourceManager().getFileManager().getFileRef( 76 AST.getIncludeStructure().getRealPath(HID)); 77 assert(FE); 78 if (FE->getDir() == AST.getPreprocessor() 79 .getHeaderSearchInfo() 80 .getModuleMap() 81 .getBuiltinDir()) 82 return false; 83 if (PI && PI->shouldKeep(*FE)) 84 return false; 85 // FIXME(kirillbobyrev): We currently do not support the umbrella headers. 86 // System headers are likely to be standard library headers. 87 // Until we have good support for umbrella headers, don't warn about them 88 // (unless analysis is explicitly enabled). 89 if (Inc.Written.front() == '<') { 90 if (tooling::stdlib::Header::named(Inc.Written)) 91 return true; 92 if (!AnalyzeAngledIncludes) 93 return false; 94 } 95 if (PI) { 96 // Check if main file is the public interface for a private header. If so we 97 // shouldn't diagnose it as unused. 98 if (auto PHeader = PI->getPublic(*FE); !PHeader.empty()) { 99 PHeader = PHeader.trim("<>\""); 100 // Since most private -> public mappings happen in a verbatim way, we 101 // check textually here. This might go wrong in presence of symlinks or 102 // header mappings. But that's not different than rest of the places. 103 if (AST.tuPath().ends_with(PHeader)) 104 return false; 105 } 106 } 107 // Headers without include guards have side effects and are not 108 // self-contained, skip them. 109 if (!AST.getPreprocessor().getHeaderSearchInfo().isFileMultipleIncludeGuarded( 110 *FE)) { 111 dlog("{0} doesn't have header guard and will not be considered unused", 112 FE->getName()); 113 return false; 114 } 115 return true; 116 } 117 118 std::vector<Diag> generateMissingIncludeDiagnostics( 119 ParsedAST &AST, llvm::ArrayRef<MissingIncludeDiagInfo> MissingIncludes, 120 llvm::StringRef Code, HeaderFilter IgnoreHeaders, const ThreadsafeFS &TFS) { 121 std::vector<Diag> Result; 122 const SourceManager &SM = AST.getSourceManager(); 123 const FileEntry *MainFile = SM.getFileEntryForID(SM.getMainFileID()); 124 125 auto FileStyle = getFormatStyleForFile(AST.tuPath(), Code, TFS, false); 126 127 tooling::HeaderIncludes HeaderIncludes(AST.tuPath(), Code, 128 FileStyle.IncludeStyle); 129 for (const auto &SymbolWithMissingInclude : MissingIncludes) { 130 llvm::StringRef ResolvedPath = 131 SymbolWithMissingInclude.Providers.front().resolvedPath(); 132 if (isIgnored(ResolvedPath, IgnoreHeaders)) { 133 dlog("IncludeCleaner: not diagnosing missing include {0}, filtered by " 134 "config", 135 ResolvedPath); 136 continue; 137 } 138 139 std::string Spelling = include_cleaner::spellHeader( 140 {SymbolWithMissingInclude.Providers.front(), 141 AST.getPreprocessor().getHeaderSearchInfo(), MainFile}); 142 143 llvm::StringRef HeaderRef{Spelling}; 144 bool Angled = HeaderRef.starts_with("<"); 145 // We might suggest insertion of an existing include in edge cases, e.g., 146 // include is present in a PP-disabled region, or spelling of the header 147 // turns out to be the same as one of the unresolved includes in the 148 // main file. 149 std::optional<tooling::Replacement> Replacement = HeaderIncludes.insert( 150 HeaderRef.trim("\"<>"), Angled, tooling::IncludeDirective::Include); 151 if (!Replacement.has_value()) 152 continue; 153 154 Diag &D = Result.emplace_back(); 155 D.Message = 156 llvm::formatv("No header providing \"{0}\" is directly included", 157 SymbolWithMissingInclude.Symbol.name()); 158 D.Name = "missing-includes"; 159 D.Source = Diag::DiagSource::Clangd; 160 D.File = AST.tuPath(); 161 D.InsideMainFile = true; 162 // We avoid the "warning" severity here in favor of LSP's "information". 163 // 164 // Users treat most warnings on code being edited as high-priority. 165 // They don't think of include cleanups the same way: they want to edit 166 // lines with existing violations without fixing them. 167 // Diagnostics at the same level tend to be visually indistinguishable, 168 // and a few missing includes can cause many diagnostics. 169 // Marking these as "information" leaves them visible, but less intrusive. 170 // 171 // (These concerns don't apply to unused #include warnings: these are fewer, 172 // they appear on infrequently-edited lines with few other warnings, and 173 // the 'Unneccesary' tag often result in a different rendering) 174 // 175 // Usually clang's "note" severity usually has special semantics, being 176 // translated into LSP RelatedInformation of a parent diagnostic. 177 // But not here: these aren't processed by clangd's DiagnosticConsumer. 178 D.Severity = DiagnosticsEngine::Note; 179 D.Range = clangd::Range{ 180 offsetToPosition(Code, 181 SymbolWithMissingInclude.SymRefRange.beginOffset()), 182 offsetToPosition(Code, 183 SymbolWithMissingInclude.SymRefRange.endOffset())}; 184 auto &F = D.Fixes.emplace_back(); 185 F.Message = "#include " + Spelling; 186 TextEdit Edit = replacementToEdit(Code, *Replacement); 187 F.Edits.emplace_back(std::move(Edit)); 188 } 189 return Result; 190 } 191 192 std::vector<Diag> generateUnusedIncludeDiagnostics( 193 PathRef FileName, llvm::ArrayRef<const Inclusion *> UnusedIncludes, 194 llvm::StringRef Code, HeaderFilter IgnoreHeaders) { 195 std::vector<Diag> Result; 196 for (const auto *Inc : UnusedIncludes) { 197 if (isIgnored(Inc->Resolved, IgnoreHeaders)) 198 continue; 199 Diag &D = Result.emplace_back(); 200 D.Message = 201 llvm::formatv("included header {0} is not used directly", 202 llvm::sys::path::filename( 203 Inc->Written.substr(1, Inc->Written.size() - 2), 204 llvm::sys::path::Style::posix)); 205 D.Name = "unused-includes"; 206 D.Source = Diag::DiagSource::Clangd; 207 D.File = FileName; 208 D.InsideMainFile = true; 209 D.Severity = DiagnosticsEngine::Warning; 210 D.Tags.push_back(Unnecessary); 211 D.Range = rangeTillEOL(Code, Inc->HashOffset); 212 // FIXME(kirillbobyrev): Removing inclusion might break the code if the 213 // used headers are only reachable transitively through this one. Suggest 214 // including them directly instead. 215 // FIXME(kirillbobyrev): Add fix suggestion for adding IWYU pragmas 216 // (keep/export) remove the warning once we support IWYU pragmas. 217 auto &F = D.Fixes.emplace_back(); 218 F.Message = "remove #include directive"; 219 F.Edits.emplace_back(); 220 F.Edits.back().range.start.line = Inc->HashLine; 221 F.Edits.back().range.end.line = Inc->HashLine + 1; 222 } 223 return Result; 224 } 225 226 std::optional<Fix> 227 removeAllUnusedIncludes(llvm::ArrayRef<Diag> UnusedIncludes) { 228 if (UnusedIncludes.empty()) 229 return std::nullopt; 230 231 Fix RemoveAll; 232 RemoveAll.Message = "remove all unused includes"; 233 for (const auto &Diag : UnusedIncludes) { 234 assert(Diag.Fixes.size() == 1 && "Expected exactly one fix."); 235 RemoveAll.Edits.insert(RemoveAll.Edits.end(), 236 Diag.Fixes.front().Edits.begin(), 237 Diag.Fixes.front().Edits.end()); 238 } 239 return RemoveAll; 240 } 241 242 std::optional<Fix> 243 addAllMissingIncludes(llvm::ArrayRef<Diag> MissingIncludeDiags) { 244 if (MissingIncludeDiags.empty()) 245 return std::nullopt; 246 247 Fix AddAllMissing; 248 AddAllMissing.Message = "add all missing includes"; 249 // A map to deduplicate the edits with the same new text. 250 // newText (#include "my_missing_header.h") -> TextEdit. 251 std::map<std::string, TextEdit> Edits; 252 for (const auto &Diag : MissingIncludeDiags) { 253 assert(Diag.Fixes.size() == 1 && "Expected exactly one fix."); 254 for (const auto &Edit : Diag.Fixes.front().Edits) { 255 Edits.try_emplace(Edit.newText, Edit); 256 } 257 } 258 for (auto &It : Edits) 259 AddAllMissing.Edits.push_back(std::move(It.second)); 260 return AddAllMissing; 261 } 262 Fix fixAll(const Fix &RemoveAllUnused, const Fix &AddAllMissing) { 263 Fix FixAll; 264 FixAll.Message = "fix all includes"; 265 266 for (const auto &F : RemoveAllUnused.Edits) 267 FixAll.Edits.push_back(F); 268 for (const auto &F : AddAllMissing.Edits) 269 FixAll.Edits.push_back(F); 270 return FixAll; 271 } 272 273 std::vector<const Inclusion *> 274 getUnused(ParsedAST &AST, 275 const llvm::DenseSet<IncludeStructure::HeaderID> &ReferencedFiles, 276 bool AnalyzeAngledIncludes) { 277 trace::Span Tracer("IncludeCleaner::getUnused"); 278 std::vector<const Inclusion *> Unused; 279 for (const Inclusion &MFI : AST.getIncludeStructure().MainFileIncludes) { 280 if (!MFI.HeaderID) 281 continue; 282 auto IncludeID = static_cast<IncludeStructure::HeaderID>(*MFI.HeaderID); 283 if (ReferencedFiles.contains(IncludeID)) 284 continue; 285 if (!mayConsiderUnused(MFI, AST, &AST.getPragmaIncludes(), 286 AnalyzeAngledIncludes)) { 287 dlog("{0} was not used, but is not eligible to be diagnosed as unused", 288 MFI.Written); 289 continue; 290 } 291 Unused.push_back(&MFI); 292 } 293 return Unused; 294 } 295 296 } // namespace 297 298 std::vector<include_cleaner::SymbolReference> 299 collectMacroReferences(ParsedAST &AST) { 300 const auto &SM = AST.getSourceManager(); 301 auto &PP = AST.getPreprocessor(); 302 std::vector<include_cleaner::SymbolReference> Macros; 303 for (const auto &[_, Refs] : AST.getMacros().MacroRefs) { 304 for (const auto &Ref : Refs) { 305 auto Loc = SM.getComposedLoc(SM.getMainFileID(), Ref.StartOffset); 306 const auto *Tok = AST.getTokens().spelledTokenContaining(Loc); 307 if (!Tok) 308 continue; 309 auto Macro = locateMacroAt(*Tok, PP); 310 if (!Macro) 311 continue; 312 auto DefLoc = Macro->NameLoc; 313 if (!DefLoc.isValid()) 314 continue; 315 Macros.push_back( 316 {include_cleaner::Macro{/*Name=*/PP.getIdentifierInfo(Tok->text(SM)), 317 DefLoc}, 318 Tok->location(), 319 Ref.InConditionalDirective ? include_cleaner::RefType::Ambiguous 320 : include_cleaner::RefType::Explicit}); 321 } 322 } 323 324 return Macros; 325 } 326 327 include_cleaner::Includes convertIncludes(const ParsedAST &AST) { 328 auto &SM = AST.getSourceManager(); 329 330 include_cleaner::Includes ConvertedIncludes; 331 // We satisfy Includes's contract that search dirs and included files have 332 // matching path styles: both ultimately use FileManager::getCanonicalName(). 333 for (const auto &Dir : AST.getIncludeStructure().SearchPathsCanonical) 334 ConvertedIncludes.addSearchDirectory(Dir); 335 336 for (const Inclusion &Inc : AST.getIncludeStructure().MainFileIncludes) { 337 include_cleaner::Include TransformedInc; 338 llvm::StringRef WrittenRef = llvm::StringRef(Inc.Written); 339 TransformedInc.Spelled = WrittenRef.trim("\"<>"); 340 TransformedInc.HashLocation = 341 SM.getComposedLoc(SM.getMainFileID(), Inc.HashOffset); 342 TransformedInc.Line = Inc.HashLine + 1; 343 TransformedInc.Angled = WrittenRef.starts_with("<"); 344 // Inc.Resolved is canonicalized with clangd::getCanonicalPath(), 345 // which is based on FileManager::getCanonicalName(ParentDir). 346 auto FE = SM.getFileManager().getFileRef(Inc.Resolved); 347 if (!FE) { 348 elog("IncludeCleaner: Failed to get an entry for resolved path {0}: {1}", 349 Inc.Resolved, FE.takeError()); 350 continue; 351 } 352 TransformedInc.Resolved = *FE; 353 ConvertedIncludes.add(std::move(TransformedInc)); 354 } 355 return ConvertedIncludes; 356 } 357 358 IncludeCleanerFindings 359 computeIncludeCleanerFindings(ParsedAST &AST, bool AnalyzeAngledIncludes) { 360 // Interaction is only polished for C/CPP. 361 if (AST.getLangOpts().ObjC) 362 return {}; 363 const auto &SM = AST.getSourceManager(); 364 include_cleaner::Includes ConvertedIncludes = convertIncludes(AST); 365 const FileEntry *MainFile = SM.getFileEntryForID(SM.getMainFileID()); 366 auto PreamblePatch = PreamblePatch::getPatchEntry(AST.tuPath(), SM); 367 368 std::vector<include_cleaner::SymbolReference> Macros = 369 collectMacroReferences(AST); 370 std::vector<MissingIncludeDiagInfo> MissingIncludes; 371 llvm::DenseSet<IncludeStructure::HeaderID> Used; 372 trace::Span Tracer("include_cleaner::walkUsed"); 373 OptionalDirectoryEntryRef ResourceDir = AST.getPreprocessor() 374 .getHeaderSearchInfo() 375 .getModuleMap() 376 .getBuiltinDir(); 377 include_cleaner::walkUsed( 378 AST.getLocalTopLevelDecls(), /*MacroRefs=*/Macros, 379 &AST.getPragmaIncludes(), AST.getPreprocessor(), 380 [&](const include_cleaner::SymbolReference &Ref, 381 llvm::ArrayRef<include_cleaner::Header> Providers) { 382 bool Satisfied = false; 383 for (const auto &H : Providers) { 384 if (H.kind() == include_cleaner::Header::Physical && 385 (H.physical() == MainFile || H.physical() == PreamblePatch || 386 H.physical().getDir() == ResourceDir)) { 387 Satisfied = true; 388 continue; 389 } 390 for (auto *Inc : ConvertedIncludes.match(H)) { 391 Satisfied = true; 392 auto HeaderID = 393 AST.getIncludeStructure().getID(&Inc->Resolved->getFileEntry()); 394 assert(HeaderID.has_value() && 395 "ConvertedIncludes only contains resolved includes."); 396 Used.insert(*HeaderID); 397 } 398 } 399 400 if (Satisfied || Providers.empty() || 401 Ref.RT != include_cleaner::RefType::Explicit) 402 return; 403 404 // Check if we have any headers with the same spelling, in edge cases 405 // like `#include_next "foo.h"`, the user can't ever include the 406 // physical foo.h, but can have a spelling that refers to it. 407 // We postpone this check because spelling a header for every usage is 408 // expensive. 409 std::string Spelling = include_cleaner::spellHeader( 410 {Providers.front(), AST.getPreprocessor().getHeaderSearchInfo(), 411 MainFile}); 412 for (auto *Inc : 413 ConvertedIncludes.match(include_cleaner::Header{Spelling})) { 414 Satisfied = true; 415 auto HeaderID = 416 AST.getIncludeStructure().getID(&Inc->Resolved->getFileEntry()); 417 assert(HeaderID.has_value() && 418 "ConvertedIncludes only contains resolved includes."); 419 Used.insert(*HeaderID); 420 } 421 if (Satisfied) 422 return; 423 424 // We actually always want to map usages to their spellings, but 425 // spelling locations can point into preamble section. Using these 426 // offsets could lead into crashes in presence of stale preambles. Hence 427 // we use "getFileLoc" instead to make sure it always points into main 428 // file. 429 // FIXME: Use presumed locations to map such usages back to patched 430 // locations safely. 431 auto Loc = SM.getFileLoc(Ref.RefLocation); 432 // File locations can be outside of the main file if macro is expanded 433 // through an #include. 434 while (SM.getFileID(Loc) != SM.getMainFileID()) 435 Loc = SM.getIncludeLoc(SM.getFileID(Loc)); 436 auto TouchingTokens = 437 syntax::spelledTokensTouching(Loc, AST.getTokens()); 438 assert(!TouchingTokens.empty()); 439 // Loc points to the start offset of the ref token, here we use the last 440 // element of the TouchingTokens, e.g. avoid getting the "::" for 441 // "ns::^abc". 442 MissingIncludeDiagInfo DiagInfo{ 443 Ref.Target, TouchingTokens.back().range(SM), Providers}; 444 MissingIncludes.push_back(std::move(DiagInfo)); 445 }); 446 // Put possibly equal diagnostics together for deduplication. 447 // The duplicates might be from macro arguments that get expanded multiple 448 // times. 449 llvm::stable_sort(MissingIncludes, [](const MissingIncludeDiagInfo &LHS, 450 const MissingIncludeDiagInfo &RHS) { 451 // First sort by reference location. 452 if (LHS.SymRefRange != RHS.SymRefRange) { 453 // We can get away just by comparing the offsets as all the ranges are in 454 // main file. 455 return LHS.SymRefRange.beginOffset() < RHS.SymRefRange.beginOffset(); 456 } 457 // For the same location, break ties using the symbol. Note that this won't 458 // be stable across runs. 459 using MapInfo = llvm::DenseMapInfo<include_cleaner::Symbol>; 460 return MapInfo::getHashValue(LHS.Symbol) < 461 MapInfo::getHashValue(RHS.Symbol); 462 }); 463 MissingIncludes.erase(llvm::unique(MissingIncludes), MissingIncludes.end()); 464 std::vector<const Inclusion *> UnusedIncludes = 465 getUnused(AST, Used, AnalyzeAngledIncludes); 466 return {std::move(UnusedIncludes), std::move(MissingIncludes)}; 467 } 468 469 bool isPreferredProvider(const Inclusion &Inc, 470 const include_cleaner::Includes &Includes, 471 llvm::ArrayRef<include_cleaner::Header> Providers) { 472 for (const auto &H : Providers) { 473 auto Matches = Includes.match(H); 474 for (const include_cleaner::Include *Match : Matches) 475 if (Match->Line == unsigned(Inc.HashLine + 1)) 476 return true; // this header is (equal) best 477 if (!Matches.empty()) 478 return false; // another header is better 479 } 480 return false; // no header provides the symbol 481 } 482 483 std::vector<Diag> 484 issueIncludeCleanerDiagnostics(ParsedAST &AST, llvm::StringRef Code, 485 const IncludeCleanerFindings &Findings, 486 const ThreadsafeFS &TFS, 487 HeaderFilter IgnoreHeaders) { 488 trace::Span Tracer("IncludeCleaner::issueIncludeCleanerDiagnostics"); 489 std::vector<Diag> UnusedIncludes = generateUnusedIncludeDiagnostics( 490 AST.tuPath(), Findings.UnusedIncludes, Code, IgnoreHeaders); 491 std::optional<Fix> RemoveAllUnused = removeAllUnusedIncludes(UnusedIncludes); 492 493 std::vector<Diag> MissingIncludeDiags = generateMissingIncludeDiagnostics( 494 AST, Findings.MissingIncludes, Code, IgnoreHeaders, TFS); 495 std::optional<Fix> AddAllMissing = addAllMissingIncludes(MissingIncludeDiags); 496 497 std::optional<Fix> FixAll; 498 if (RemoveAllUnused && AddAllMissing) 499 FixAll = fixAll(*RemoveAllUnused, *AddAllMissing); 500 501 auto AddBatchFix = [](const std::optional<Fix> &F, clang::clangd::Diag *Out) { 502 if (!F) 503 return; 504 Out->Fixes.push_back(*F); 505 }; 506 for (auto &Diag : MissingIncludeDiags) { 507 AddBatchFix(MissingIncludeDiags.size() > 1 ? AddAllMissing : std::nullopt, 508 &Diag); 509 AddBatchFix(FixAll, &Diag); 510 } 511 for (auto &Diag : UnusedIncludes) { 512 AddBatchFix(UnusedIncludes.size() > 1 ? RemoveAllUnused : std::nullopt, 513 &Diag); 514 AddBatchFix(FixAll, &Diag); 515 } 516 517 auto Result = std::move(MissingIncludeDiags); 518 llvm::move(UnusedIncludes, std::back_inserter(Result)); 519 return Result; 520 } 521 522 } // namespace clang::clangd 523