xref: /llvm-project/clang-tools-extra/clangd/IncludeCleaner.cpp (revision 2f5dc596b5719e5ed7f6978dafbce994f425a033)
1 //===--- IncludeCleaner.cpp - Unused/Missing Headers Analysis ---*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "IncludeCleaner.h"
10 #include "Diagnostics.h"
11 #include "Headers.h"
12 #include "ParsedAST.h"
13 #include "Preamble.h"
14 #include "Protocol.h"
15 #include "SourceCode.h"
16 #include "clang-include-cleaner/Analysis.h"
17 #include "clang-include-cleaner/IncludeSpeller.h"
18 #include "clang-include-cleaner/Record.h"
19 #include "clang-include-cleaner/Types.h"
20 #include "support/Logger.h"
21 #include "support/Path.h"
22 #include "support/Trace.h"
23 #include "clang/AST/ASTContext.h"
24 #include "clang/Basic/Diagnostic.h"
25 #include "clang/Basic/LLVM.h"
26 #include "clang/Basic/SourceLocation.h"
27 #include "clang/Basic/SourceManager.h"
28 #include "clang/Format/Format.h"
29 #include "clang/Lex/DirectoryLookup.h"
30 #include "clang/Lex/HeaderSearch.h"
31 #include "clang/Lex/Preprocessor.h"
32 #include "clang/Tooling/Core/Replacement.h"
33 #include "clang/Tooling/Inclusions/HeaderIncludes.h"
34 #include "clang/Tooling/Inclusions/StandardLibrary.h"
35 #include "clang/Tooling/Syntax/Tokens.h"
36 #include "llvm/ADT/ArrayRef.h"
37 #include "llvm/ADT/DenseSet.h"
38 #include "llvm/ADT/GenericUniformityImpl.h"
39 #include "llvm/ADT/STLExtras.h"
40 #include "llvm/ADT/SmallString.h"
41 #include "llvm/ADT/SmallVector.h"
42 #include "llvm/ADT/StringRef.h"
43 #include "llvm/Support/Error.h"
44 #include "llvm/Support/ErrorHandling.h"
45 #include "llvm/Support/FormatVariadic.h"
46 #include "llvm/Support/Path.h"
47 #include "llvm/Support/Regex.h"
48 #include <cassert>
49 #include <iterator>
50 #include <map>
51 #include <optional>
52 #include <string>
53 #include <utility>
54 #include <vector>
55 
56 namespace clang::clangd {
57 namespace {
58 
59 bool isIgnored(llvm::StringRef HeaderPath, HeaderFilter IgnoreHeaders) {
60   // Convert the path to Unix slashes and try to match against the filter.
61   llvm::SmallString<64> NormalizedPath(HeaderPath);
62   llvm::sys::path::native(NormalizedPath, llvm::sys::path::Style::posix);
63   for (auto &Filter : IgnoreHeaders) {
64     if (Filter(NormalizedPath))
65       return true;
66   }
67   return false;
68 }
69 
70 bool mayConsiderUnused(const Inclusion &Inc, ParsedAST &AST,
71                        const include_cleaner::PragmaIncludes *PI,
72                        bool AnalyzeAngledIncludes) {
73   assert(Inc.HeaderID);
74   auto HID = static_cast<IncludeStructure::HeaderID>(*Inc.HeaderID);
75   auto FE = AST.getSourceManager().getFileManager().getFileRef(
76       AST.getIncludeStructure().getRealPath(HID));
77   assert(FE);
78   if (FE->getDir() == AST.getPreprocessor()
79                           .getHeaderSearchInfo()
80                           .getModuleMap()
81                           .getBuiltinDir())
82     return false;
83   if (PI && PI->shouldKeep(*FE))
84     return false;
85   // FIXME(kirillbobyrev): We currently do not support the umbrella headers.
86   // System headers are likely to be standard library headers.
87   // Until we have good support for umbrella headers, don't warn about them
88   // (unless analysis is explicitly enabled).
89   if (Inc.Written.front() == '<') {
90     if (tooling::stdlib::Header::named(Inc.Written))
91       return true;
92     if (!AnalyzeAngledIncludes)
93       return false;
94   }
95   if (PI) {
96     // Check if main file is the public interface for a private header. If so we
97     // shouldn't diagnose it as unused.
98     if (auto PHeader = PI->getPublic(*FE); !PHeader.empty()) {
99       PHeader = PHeader.trim("<>\"");
100       // Since most private -> public mappings happen in a verbatim way, we
101       // check textually here. This might go wrong in presence of symlinks or
102       // header mappings. But that's not different than rest of the places.
103       if (AST.tuPath().ends_with(PHeader))
104         return false;
105     }
106   }
107   // Headers without include guards have side effects and are not
108   // self-contained, skip them.
109   if (!AST.getPreprocessor().getHeaderSearchInfo().isFileMultipleIncludeGuarded(
110           *FE)) {
111     dlog("{0} doesn't have header guard and will not be considered unused",
112          FE->getName());
113     return false;
114   }
115   return true;
116 }
117 
118 std::vector<Diag> generateMissingIncludeDiagnostics(
119     ParsedAST &AST, llvm::ArrayRef<MissingIncludeDiagInfo> MissingIncludes,
120     llvm::StringRef Code, HeaderFilter IgnoreHeaders, const ThreadsafeFS &TFS) {
121   std::vector<Diag> Result;
122   const SourceManager &SM = AST.getSourceManager();
123   const FileEntry *MainFile = SM.getFileEntryForID(SM.getMainFileID());
124 
125   auto FileStyle = getFormatStyleForFile(AST.tuPath(), Code, TFS, false);
126 
127   tooling::HeaderIncludes HeaderIncludes(AST.tuPath(), Code,
128                                          FileStyle.IncludeStyle);
129   for (const auto &SymbolWithMissingInclude : MissingIncludes) {
130     llvm::StringRef ResolvedPath =
131         SymbolWithMissingInclude.Providers.front().resolvedPath();
132     if (isIgnored(ResolvedPath, IgnoreHeaders)) {
133       dlog("IncludeCleaner: not diagnosing missing include {0}, filtered by "
134            "config",
135            ResolvedPath);
136       continue;
137     }
138 
139     std::string Spelling = include_cleaner::spellHeader(
140         {SymbolWithMissingInclude.Providers.front(),
141          AST.getPreprocessor().getHeaderSearchInfo(), MainFile});
142 
143     llvm::StringRef HeaderRef{Spelling};
144     bool Angled = HeaderRef.starts_with("<");
145     // We might suggest insertion of an existing include in edge cases, e.g.,
146     // include is present in a PP-disabled region, or spelling of the header
147     // turns out to be the same as one of the unresolved includes in the
148     // main file.
149     std::optional<tooling::Replacement> Replacement = HeaderIncludes.insert(
150         HeaderRef.trim("\"<>"), Angled, tooling::IncludeDirective::Include);
151     if (!Replacement.has_value())
152       continue;
153 
154     Diag &D = Result.emplace_back();
155     D.Message =
156         llvm::formatv("No header providing \"{0}\" is directly included",
157                       SymbolWithMissingInclude.Symbol.name());
158     D.Name = "missing-includes";
159     D.Source = Diag::DiagSource::Clangd;
160     D.File = AST.tuPath();
161     D.InsideMainFile = true;
162     // We avoid the "warning" severity here in favor of LSP's "information".
163     //
164     // Users treat most warnings on code being edited as high-priority.
165     // They don't think of include cleanups the same way: they want to edit
166     // lines with existing violations without fixing them.
167     // Diagnostics at the same level tend to be visually indistinguishable,
168     // and a few missing includes can cause many diagnostics.
169     // Marking these as "information" leaves them visible, but less intrusive.
170     //
171     // (These concerns don't apply to unused #include warnings: these are fewer,
172     // they appear on infrequently-edited lines with few other warnings, and
173     // the 'Unneccesary' tag often result in a different rendering)
174     //
175     // Usually clang's "note" severity usually has special semantics, being
176     // translated into LSP RelatedInformation of a parent diagnostic.
177     // But not here: these aren't processed by clangd's DiagnosticConsumer.
178     D.Severity = DiagnosticsEngine::Note;
179     D.Range = clangd::Range{
180         offsetToPosition(Code,
181                          SymbolWithMissingInclude.SymRefRange.beginOffset()),
182         offsetToPosition(Code,
183                          SymbolWithMissingInclude.SymRefRange.endOffset())};
184     auto &F = D.Fixes.emplace_back();
185     F.Message = "#include " + Spelling;
186     TextEdit Edit = replacementToEdit(Code, *Replacement);
187     F.Edits.emplace_back(std::move(Edit));
188   }
189   return Result;
190 }
191 
192 std::vector<Diag> generateUnusedIncludeDiagnostics(
193     PathRef FileName, llvm::ArrayRef<const Inclusion *> UnusedIncludes,
194     llvm::StringRef Code, HeaderFilter IgnoreHeaders) {
195   std::vector<Diag> Result;
196   for (const auto *Inc : UnusedIncludes) {
197     if (isIgnored(Inc->Resolved, IgnoreHeaders))
198       continue;
199     Diag &D = Result.emplace_back();
200     D.Message =
201         llvm::formatv("included header {0} is not used directly",
202                       llvm::sys::path::filename(
203                           Inc->Written.substr(1, Inc->Written.size() - 2),
204                           llvm::sys::path::Style::posix));
205     D.Name = "unused-includes";
206     D.Source = Diag::DiagSource::Clangd;
207     D.File = FileName;
208     D.InsideMainFile = true;
209     D.Severity = DiagnosticsEngine::Warning;
210     D.Tags.push_back(Unnecessary);
211     D.Range = rangeTillEOL(Code, Inc->HashOffset);
212     // FIXME(kirillbobyrev): Removing inclusion might break the code if the
213     // used headers are only reachable transitively through this one. Suggest
214     // including them directly instead.
215     // FIXME(kirillbobyrev): Add fix suggestion for adding IWYU pragmas
216     // (keep/export) remove the warning once we support IWYU pragmas.
217     auto &F = D.Fixes.emplace_back();
218     F.Message = "remove #include directive";
219     F.Edits.emplace_back();
220     F.Edits.back().range.start.line = Inc->HashLine;
221     F.Edits.back().range.end.line = Inc->HashLine + 1;
222   }
223   return Result;
224 }
225 
226 std::optional<Fix>
227 removeAllUnusedIncludes(llvm::ArrayRef<Diag> UnusedIncludes) {
228   if (UnusedIncludes.empty())
229     return std::nullopt;
230 
231   Fix RemoveAll;
232   RemoveAll.Message = "remove all unused includes";
233   for (const auto &Diag : UnusedIncludes) {
234     assert(Diag.Fixes.size() == 1 && "Expected exactly one fix.");
235     RemoveAll.Edits.insert(RemoveAll.Edits.end(),
236                            Diag.Fixes.front().Edits.begin(),
237                            Diag.Fixes.front().Edits.end());
238   }
239   return RemoveAll;
240 }
241 
242 std::optional<Fix>
243 addAllMissingIncludes(llvm::ArrayRef<Diag> MissingIncludeDiags) {
244   if (MissingIncludeDiags.empty())
245     return std::nullopt;
246 
247   Fix AddAllMissing;
248   AddAllMissing.Message = "add all missing includes";
249   // A map to deduplicate the edits with the same new text.
250   // newText (#include "my_missing_header.h") -> TextEdit.
251   std::map<std::string, TextEdit> Edits;
252   for (const auto &Diag : MissingIncludeDiags) {
253     assert(Diag.Fixes.size() == 1 && "Expected exactly one fix.");
254     for (const auto &Edit : Diag.Fixes.front().Edits) {
255       Edits.try_emplace(Edit.newText, Edit);
256     }
257   }
258   for (auto &It : Edits)
259     AddAllMissing.Edits.push_back(std::move(It.second));
260   return AddAllMissing;
261 }
262 Fix fixAll(const Fix &RemoveAllUnused, const Fix &AddAllMissing) {
263   Fix FixAll;
264   FixAll.Message = "fix all includes";
265 
266   for (const auto &F : RemoveAllUnused.Edits)
267     FixAll.Edits.push_back(F);
268   for (const auto &F : AddAllMissing.Edits)
269     FixAll.Edits.push_back(F);
270   return FixAll;
271 }
272 
273 std::vector<const Inclusion *>
274 getUnused(ParsedAST &AST,
275           const llvm::DenseSet<IncludeStructure::HeaderID> &ReferencedFiles,
276           bool AnalyzeAngledIncludes) {
277   trace::Span Tracer("IncludeCleaner::getUnused");
278   std::vector<const Inclusion *> Unused;
279   for (const Inclusion &MFI : AST.getIncludeStructure().MainFileIncludes) {
280     if (!MFI.HeaderID)
281       continue;
282     auto IncludeID = static_cast<IncludeStructure::HeaderID>(*MFI.HeaderID);
283     if (ReferencedFiles.contains(IncludeID))
284       continue;
285     if (!mayConsiderUnused(MFI, AST, &AST.getPragmaIncludes(),
286                            AnalyzeAngledIncludes)) {
287       dlog("{0} was not used, but is not eligible to be diagnosed as unused",
288            MFI.Written);
289       continue;
290     }
291     Unused.push_back(&MFI);
292   }
293   return Unused;
294 }
295 
296 } // namespace
297 
298 std::vector<include_cleaner::SymbolReference>
299 collectMacroReferences(ParsedAST &AST) {
300   const auto &SM = AST.getSourceManager();
301   auto &PP = AST.getPreprocessor();
302   std::vector<include_cleaner::SymbolReference> Macros;
303   for (const auto &[_, Refs] : AST.getMacros().MacroRefs) {
304     for (const auto &Ref : Refs) {
305       auto Loc = SM.getComposedLoc(SM.getMainFileID(), Ref.StartOffset);
306       const auto *Tok = AST.getTokens().spelledTokenContaining(Loc);
307       if (!Tok)
308         continue;
309       auto Macro = locateMacroAt(*Tok, PP);
310       if (!Macro)
311         continue;
312       auto DefLoc = Macro->NameLoc;
313       if (!DefLoc.isValid())
314         continue;
315       Macros.push_back(
316           {include_cleaner::Macro{/*Name=*/PP.getIdentifierInfo(Tok->text(SM)),
317                                   DefLoc},
318            Tok->location(),
319            Ref.InConditionalDirective ? include_cleaner::RefType::Ambiguous
320                                       : include_cleaner::RefType::Explicit});
321     }
322   }
323 
324   return Macros;
325 }
326 
327 include_cleaner::Includes convertIncludes(const ParsedAST &AST) {
328   auto &SM = AST.getSourceManager();
329 
330   include_cleaner::Includes ConvertedIncludes;
331   // We satisfy Includes's contract that search dirs and included files have
332   // matching path styles: both ultimately use FileManager::getCanonicalName().
333   for (const auto &Dir : AST.getIncludeStructure().SearchPathsCanonical)
334     ConvertedIncludes.addSearchDirectory(Dir);
335 
336   for (const Inclusion &Inc : AST.getIncludeStructure().MainFileIncludes) {
337     include_cleaner::Include TransformedInc;
338     llvm::StringRef WrittenRef = llvm::StringRef(Inc.Written);
339     TransformedInc.Spelled = WrittenRef.trim("\"<>");
340     TransformedInc.HashLocation =
341         SM.getComposedLoc(SM.getMainFileID(), Inc.HashOffset);
342     TransformedInc.Line = Inc.HashLine + 1;
343     TransformedInc.Angled = WrittenRef.starts_with("<");
344     // Inc.Resolved is canonicalized with clangd::getCanonicalPath(),
345     // which is based on FileManager::getCanonicalName(ParentDir).
346     auto FE = SM.getFileManager().getFileRef(Inc.Resolved);
347     if (!FE) {
348       elog("IncludeCleaner: Failed to get an entry for resolved path {0}: {1}",
349            Inc.Resolved, FE.takeError());
350       continue;
351     }
352     TransformedInc.Resolved = *FE;
353     ConvertedIncludes.add(std::move(TransformedInc));
354   }
355   return ConvertedIncludes;
356 }
357 
358 IncludeCleanerFindings
359 computeIncludeCleanerFindings(ParsedAST &AST, bool AnalyzeAngledIncludes) {
360   // Interaction is only polished for C/CPP.
361   if (AST.getLangOpts().ObjC)
362     return {};
363   const auto &SM = AST.getSourceManager();
364   include_cleaner::Includes ConvertedIncludes = convertIncludes(AST);
365   const FileEntry *MainFile = SM.getFileEntryForID(SM.getMainFileID());
366   auto PreamblePatch = PreamblePatch::getPatchEntry(AST.tuPath(), SM);
367 
368   std::vector<include_cleaner::SymbolReference> Macros =
369       collectMacroReferences(AST);
370   std::vector<MissingIncludeDiagInfo> MissingIncludes;
371   llvm::DenseSet<IncludeStructure::HeaderID> Used;
372   trace::Span Tracer("include_cleaner::walkUsed");
373   OptionalDirectoryEntryRef ResourceDir = AST.getPreprocessor()
374                                               .getHeaderSearchInfo()
375                                               .getModuleMap()
376                                               .getBuiltinDir();
377   include_cleaner::walkUsed(
378       AST.getLocalTopLevelDecls(), /*MacroRefs=*/Macros,
379       &AST.getPragmaIncludes(), AST.getPreprocessor(),
380       [&](const include_cleaner::SymbolReference &Ref,
381           llvm::ArrayRef<include_cleaner::Header> Providers) {
382         bool Satisfied = false;
383         for (const auto &H : Providers) {
384           if (H.kind() == include_cleaner::Header::Physical &&
385               (H.physical() == MainFile || H.physical() == PreamblePatch ||
386                H.physical().getDir() == ResourceDir)) {
387             Satisfied = true;
388             continue;
389           }
390           for (auto *Inc : ConvertedIncludes.match(H)) {
391             Satisfied = true;
392             auto HeaderID =
393                 AST.getIncludeStructure().getID(&Inc->Resolved->getFileEntry());
394             assert(HeaderID.has_value() &&
395                    "ConvertedIncludes only contains resolved includes.");
396             Used.insert(*HeaderID);
397           }
398         }
399 
400         if (Satisfied || Providers.empty() ||
401             Ref.RT != include_cleaner::RefType::Explicit)
402           return;
403 
404         // Check if we have any headers with the same spelling, in edge cases
405         // like `#include_next "foo.h"`, the user can't ever include the
406         // physical foo.h, but can have a spelling that refers to it.
407         // We postpone this check because spelling a header for every usage is
408         // expensive.
409         std::string Spelling = include_cleaner::spellHeader(
410             {Providers.front(), AST.getPreprocessor().getHeaderSearchInfo(),
411              MainFile});
412         for (auto *Inc :
413              ConvertedIncludes.match(include_cleaner::Header{Spelling})) {
414           Satisfied = true;
415           auto HeaderID =
416               AST.getIncludeStructure().getID(&Inc->Resolved->getFileEntry());
417           assert(HeaderID.has_value() &&
418                  "ConvertedIncludes only contains resolved includes.");
419           Used.insert(*HeaderID);
420         }
421         if (Satisfied)
422           return;
423 
424         // We actually always want to map usages to their spellings, but
425         // spelling locations can point into preamble section. Using these
426         // offsets could lead into crashes in presence of stale preambles. Hence
427         // we use "getFileLoc" instead to make sure it always points into main
428         // file.
429         // FIXME: Use presumed locations to map such usages back to patched
430         // locations safely.
431         auto Loc = SM.getFileLoc(Ref.RefLocation);
432         // File locations can be outside of the main file if macro is expanded
433         // through an #include.
434         while (SM.getFileID(Loc) != SM.getMainFileID())
435           Loc = SM.getIncludeLoc(SM.getFileID(Loc));
436         auto TouchingTokens =
437             syntax::spelledTokensTouching(Loc, AST.getTokens());
438         assert(!TouchingTokens.empty());
439         // Loc points to the start offset of the ref token, here we use the last
440         // element of the TouchingTokens, e.g. avoid getting the "::" for
441         // "ns::^abc".
442         MissingIncludeDiagInfo DiagInfo{
443             Ref.Target, TouchingTokens.back().range(SM), Providers};
444         MissingIncludes.push_back(std::move(DiagInfo));
445       });
446   // Put possibly equal diagnostics together for deduplication.
447   // The duplicates might be from macro arguments that get expanded multiple
448   // times.
449   llvm::stable_sort(MissingIncludes, [](const MissingIncludeDiagInfo &LHS,
450                                         const MissingIncludeDiagInfo &RHS) {
451     // First sort by reference location.
452     if (LHS.SymRefRange != RHS.SymRefRange) {
453       // We can get away just by comparing the offsets as all the ranges are in
454       // main file.
455       return LHS.SymRefRange.beginOffset() < RHS.SymRefRange.beginOffset();
456     }
457     // For the same location, break ties using the symbol. Note that this won't
458     // be stable across runs.
459     using MapInfo = llvm::DenseMapInfo<include_cleaner::Symbol>;
460     return MapInfo::getHashValue(LHS.Symbol) <
461            MapInfo::getHashValue(RHS.Symbol);
462   });
463   MissingIncludes.erase(llvm::unique(MissingIncludes), MissingIncludes.end());
464   std::vector<const Inclusion *> UnusedIncludes =
465       getUnused(AST, Used, AnalyzeAngledIncludes);
466   return {std::move(UnusedIncludes), std::move(MissingIncludes)};
467 }
468 
469 bool isPreferredProvider(const Inclusion &Inc,
470                          const include_cleaner::Includes &Includes,
471                          llvm::ArrayRef<include_cleaner::Header> Providers) {
472   for (const auto &H : Providers) {
473     auto Matches = Includes.match(H);
474     for (const include_cleaner::Include *Match : Matches)
475       if (Match->Line == unsigned(Inc.HashLine + 1))
476         return true; // this header is (equal) best
477     if (!Matches.empty())
478       return false; // another header is better
479   }
480   return false; // no header provides the symbol
481 }
482 
483 std::vector<Diag>
484 issueIncludeCleanerDiagnostics(ParsedAST &AST, llvm::StringRef Code,
485                                const IncludeCleanerFindings &Findings,
486                                const ThreadsafeFS &TFS,
487                                HeaderFilter IgnoreHeaders) {
488   trace::Span Tracer("IncludeCleaner::issueIncludeCleanerDiagnostics");
489   std::vector<Diag> UnusedIncludes = generateUnusedIncludeDiagnostics(
490       AST.tuPath(), Findings.UnusedIncludes, Code, IgnoreHeaders);
491   std::optional<Fix> RemoveAllUnused = removeAllUnusedIncludes(UnusedIncludes);
492 
493   std::vector<Diag> MissingIncludeDiags = generateMissingIncludeDiagnostics(
494       AST, Findings.MissingIncludes, Code, IgnoreHeaders, TFS);
495   std::optional<Fix> AddAllMissing = addAllMissingIncludes(MissingIncludeDiags);
496 
497   std::optional<Fix> FixAll;
498   if (RemoveAllUnused && AddAllMissing)
499     FixAll = fixAll(*RemoveAllUnused, *AddAllMissing);
500 
501   auto AddBatchFix = [](const std::optional<Fix> &F, clang::clangd::Diag *Out) {
502     if (!F)
503       return;
504     Out->Fixes.push_back(*F);
505   };
506   for (auto &Diag : MissingIncludeDiags) {
507     AddBatchFix(MissingIncludeDiags.size() > 1 ? AddAllMissing : std::nullopt,
508                 &Diag);
509     AddBatchFix(FixAll, &Diag);
510   }
511   for (auto &Diag : UnusedIncludes) {
512     AddBatchFix(UnusedIncludes.size() > 1 ? RemoveAllUnused : std::nullopt,
513                 &Diag);
514     AddBatchFix(FixAll, &Diag);
515   }
516 
517   auto Result = std::move(MissingIncludeDiags);
518   llvm::move(UnusedIncludes, std::back_inserter(Result));
519   return Result;
520 }
521 
522 } // namespace clang::clangd
523