xref: /llvm-project/clang-tools-extra/include-cleaner/lib/FindHeaders.cpp (revision ec6c3448d31056db5d63d7aed3e9f207edb49321)
1 //===--- FindHeaders.cpp --------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "AnalysisInternal.h"
10 #include "TypesInternal.h"
11 #include "clang-include-cleaner/Record.h"
12 #include "clang-include-cleaner/Types.h"
13 #include "clang/AST/ASTContext.h"
14 #include "clang/AST/Decl.h"
15 #include "clang/AST/DeclBase.h"
16 #include "clang/AST/DeclCXX.h"
17 #include "clang/Basic/Builtins.h"
18 #include "clang/Basic/FileEntry.h"
19 #include "clang/Basic/SourceLocation.h"
20 #include "clang/Basic/SourceManager.h"
21 #include "clang/Lex/Preprocessor.h"
22 #include "clang/Tooling/Inclusions/StandardLibrary.h"
23 #include "llvm/ADT/ArrayRef.h"
24 #include "llvm/ADT/STLExtras.h"
25 #include "llvm/ADT/SmallVector.h"
26 #include "llvm/ADT/StringRef.h"
27 #include "llvm/Support/Casting.h"
28 #include "llvm/Support/ErrorHandling.h"
29 #include <optional>
30 #include <queue>
31 #include <set>
32 #include <utility>
33 
34 namespace clang::include_cleaner {
35 namespace {
36 llvm::SmallVector<Hinted<Header>>
37 applyHints(llvm::SmallVector<Hinted<Header>> Headers, Hints H) {
38   for (auto &Header : Headers)
39     Header.Hint |= H;
40   return Headers;
41 }
42 
43 llvm::SmallVector<Header> ranked(llvm::SmallVector<Hinted<Header>> Headers) {
44   llvm::stable_sort(llvm::reverse(Headers),
45                     [](const Hinted<Header> &LHS, const Hinted<Header> &RHS) {
46                       return LHS < RHS;
47                     });
48   return llvm::SmallVector<Header>(Headers.begin(), Headers.end());
49 }
50 
51 // Return the basename from a verbatim header spelling, leaves only the file
52 // name.
53 llvm::StringRef basename(llvm::StringRef Header) {
54   Header = Header.trim("<>\"");
55   Header = llvm::sys::path::filename(Header);
56   // Drop everything after first `.` (dot).
57   // foo.h -> foo
58   // foo.cu.h -> foo
59   Header = Header.substr(0, Header.find('.'));
60   return Header;
61 }
62 
63 // Check if spelling of \p H matches \p DeclName.
64 bool nameMatch(llvm::StringRef DeclName, Header H) {
65   switch (H.kind()) {
66   case Header::Physical:
67     return basename(H.physical().getName()).equals_insensitive(DeclName);
68   case Header::Standard:
69     return basename(H.standard().name()).equals_insensitive(DeclName);
70   case Header::Verbatim:
71     return basename(H.verbatim()).equals_insensitive(DeclName);
72   }
73   llvm_unreachable("unhandled Header kind!");
74 }
75 
76 llvm::StringRef symbolName(const Symbol &S) {
77   switch (S.kind()) {
78   case Symbol::Declaration:
79     // Unnamed decls like operators and anonymous structs won't get any name
80     // match.
81     if (const auto *ND = llvm::dyn_cast<NamedDecl>(&S.declaration()))
82       if (auto *II = ND->getIdentifier())
83         return II->getName();
84     return "";
85   case Symbol::Macro:
86     return S.macro().Name->getName();
87   }
88   llvm_unreachable("unhandled Symbol kind!");
89 }
90 
91 Hints isPublicHeader(const FileEntry *FE, const PragmaIncludes &PI) {
92   if (PI.isPrivate(FE) || !PI.isSelfContained(FE))
93     return Hints::None;
94   return Hints::PublicHeader;
95 }
96 
97 llvm::SmallVector<Hinted<Header>>
98 hintedHeadersForStdHeaders(llvm::ArrayRef<tooling::stdlib::Header> Headers,
99                            const SourceManager &SM, const PragmaIncludes *PI) {
100   llvm::SmallVector<Hinted<Header>> Results;
101   for (const auto &H : Headers) {
102     Results.emplace_back(H, Hints::PublicHeader | Hints::OriginHeader);
103     if (!PI)
104       continue;
105     for (FileEntryRef Export : PI->getExporters(H, SM.getFileManager()))
106       Results.emplace_back(Header(Export), isPublicHeader(Export, *PI));
107   }
108   // StandardLibrary returns headers in preference order, so only mark the
109   // first.
110   if (!Results.empty())
111     Results.front().Hint |= Hints::PreferredHeader;
112   return Results;
113 }
114 
115 // Symbol to header mapping for std::move and std::remove, based on number of
116 // parameters.
117 std::optional<tooling::stdlib::Header>
118 headerForAmbiguousStdSymbol(const NamedDecl *ND) {
119   if (!ND->isInStdNamespace())
120     return {};
121   if (auto* USD = llvm::dyn_cast<UsingShadowDecl>(ND))
122     ND = USD->getTargetDecl();
123   const auto *FD = ND->getAsFunction();
124   if (!FD)
125     return std::nullopt;
126   llvm::StringRef FName = symbolName(*ND);
127   if (FName == "move") {
128     if (FD->getNumParams() == 1)
129       // move(T&& t)
130       return tooling::stdlib::Header::named("<utility>");
131     if (FD->getNumParams() == 3 || FD->getNumParams() == 4)
132       // move(InputIt first, InputIt last, OutputIt dest);
133       // move(ExecutionPolicy&& policy, ForwardIt1 first,
134       // ForwardIt1 last, ForwardIt2 d_first);
135       return tooling::stdlib::Header::named("<algorithm>");
136   } else if (FName == "remove") {
137     if (FD->getNumParams() == 1)
138       // remove(const char*);
139       return tooling::stdlib::Header::named("<cstdio>");
140     if (FD->getNumParams() == 3)
141       // remove(ForwardIt first, ForwardIt last, const T& value);
142       return tooling::stdlib::Header::named("<algorithm>");
143   }
144   return std::nullopt;
145 }
146 
147 // Special-case symbols without proper locations, like the ambiguous standard
148 // library symbols (e.g. std::move) or builtin declarations.
149 std::optional<llvm::SmallVector<Hinted<Header>>>
150 headersForSpecialSymbol(const Symbol &S, const SourceManager &SM,
151                         const PragmaIncludes *PI) {
152   // Our special casing logic only deals with decls, so bail out early for
153   // macros.
154   if (S.kind() != Symbol::Declaration)
155     return std::nullopt;
156   const auto *ND = llvm::cast<NamedDecl>(&S.declaration());
157   // We map based on names, so again bail out early if there are no names.
158   if (!ND)
159     return std::nullopt;
160   auto *II = ND->getIdentifier();
161   if (!II)
162     return std::nullopt;
163 
164   // Check first for symbols that are part of our stdlib mapping. As we have
165   // header names for those.
166   if (auto Header = headerForAmbiguousStdSymbol(ND)) {
167     return applyHints(hintedHeadersForStdHeaders({*Header}, SM, PI),
168                       Hints::CompleteSymbol);
169   }
170 
171   // Now check for builtin symbols, we shouldn't suggest any headers for ones
172   // without any headers.
173   if (auto ID = II->getBuiltinID()) {
174     const char *BuiltinHeader =
175         ND->getASTContext().BuiltinInfo.getHeaderName(ID);
176     if (!BuiltinHeader)
177       return llvm::SmallVector<Hinted<Header>>{};
178     // FIXME: Use the header mapping for builtins with a known header.
179   }
180   return std::nullopt;
181 }
182 
183 } // namespace
184 
185 llvm::SmallVector<Hinted<Header>> findHeaders(const SymbolLocation &Loc,
186                                               const SourceManager &SM,
187                                               const PragmaIncludes *PI) {
188   llvm::SmallVector<Hinted<Header>> Results;
189   switch (Loc.kind()) {
190   case SymbolLocation::Physical: {
191     FileID FID = SM.getFileID(SM.getExpansionLoc(Loc.physical()));
192     OptionalFileEntryRef FE = SM.getFileEntryRefForID(FID);
193     if (!FE)
194       return {};
195     if (!PI)
196       return {{*FE, Hints::PublicHeader | Hints::OriginHeader}};
197     bool IsOrigin = true;
198     std::queue<FileEntryRef> Exporters;
199     while (FE) {
200       Results.emplace_back(*FE,
201                            isPublicHeader(*FE, *PI) |
202                                (IsOrigin ? Hints::OriginHeader : Hints::None));
203       for (FileEntryRef Export : PI->getExporters(*FE, SM.getFileManager()))
204         Exporters.push(Export);
205 
206       if (auto Verbatim = PI->getPublic(*FE); !Verbatim.empty()) {
207         Results.emplace_back(Verbatim,
208                              Hints::PublicHeader | Hints::PreferredHeader);
209         break;
210       }
211       if (PI->isSelfContained(*FE) || FID == SM.getMainFileID())
212         break;
213 
214       // Walkup the include stack for non self-contained headers.
215       FID = SM.getDecomposedIncludedLoc(FID).first;
216       FE = SM.getFileEntryRefForID(FID);
217       IsOrigin = false;
218     }
219     // Now traverse provider trees rooted at exporters.
220     // Note that we only traverse export edges, and ignore private -> public
221     // mappings, as those pragmas apply to exporter, and not the main provider
222     // being exported in this header.
223     std::set<const FileEntry *> SeenExports;
224     while (!Exporters.empty()) {
225       FileEntryRef Export = Exporters.front();
226       Exporters.pop();
227       if (!SeenExports.insert(Export).second) // In case of cyclic exports
228         continue;
229       Results.emplace_back(Export, isPublicHeader(Export, *PI));
230       for (FileEntryRef Export : PI->getExporters(Export, SM.getFileManager()))
231         Exporters.push(Export);
232     }
233     return Results;
234   }
235   case SymbolLocation::Standard: {
236     return hintedHeadersForStdHeaders(Loc.standard().headers(), SM, PI);
237   }
238   }
239   llvm_unreachable("unhandled SymbolLocation kind!");
240 }
241 
242 llvm::SmallVector<Header> headersForSymbol(const Symbol &S,
243                                            const Preprocessor &PP,
244                                            const PragmaIncludes *PI) {
245   const auto &SM = PP.getSourceManager();
246   // Get headers for all the locations providing Symbol. Same header can be
247   // reached through different traversals, deduplicate those into a single
248   // Header by merging their hints.
249   llvm::SmallVector<Hinted<Header>> Headers;
250   if (auto SpecialHeaders = headersForSpecialSymbol(S, SM, PI)) {
251     Headers = std::move(*SpecialHeaders);
252   } else {
253     for (auto &Loc : locateSymbol(S, PP.getLangOpts()))
254       Headers.append(applyHints(findHeaders(Loc, SM, PI), Loc.Hint));
255   }
256   // If two Headers probably refer to the same file (e.g. Verbatim(foo.h) and
257   // Physical(/path/to/foo.h), we won't deduplicate them or merge their hints
258   llvm::stable_sort(
259       Headers, [](const Hinted<Header> &LHS, const Hinted<Header> &RHS) {
260         return static_cast<Header>(LHS) < static_cast<Header>(RHS);
261       });
262   auto *Write = Headers.begin();
263   for (auto *Read = Headers.begin(); Read != Headers.end(); ++Write) {
264     *Write = *Read++;
265     while (Read != Headers.end() &&
266            static_cast<Header>(*Write) == static_cast<Header>(*Read)) {
267       Write->Hint |= Read->Hint;
268       ++Read;
269     }
270   }
271   Headers.erase(Write, Headers.end());
272 
273   // Add name match hints to deduplicated providers.
274   llvm::StringRef SymbolName = symbolName(S);
275   for (auto &H : Headers) {
276     // Don't apply name match hints to standard headers as the standard headers
277     // are already ranked in the stdlib mapping.
278     if (H.kind() == Header::Standard)
279       continue;
280     // Don't apply name match hints to exporting headers. As they usually have
281     // names similar to the original header, e.g. foo_wrapper/foo.h vs
282     // foo/foo.h, but shouldn't be preferred (unless marked as the public
283     // interface).
284     if ((H.Hint & Hints::OriginHeader) == Hints::None)
285       continue;
286     if (nameMatch(SymbolName, H))
287       H.Hint |= Hints::PreferredHeader;
288   }
289 
290   // FIXME: Introduce a MainFile header kind or signal and boost it.
291   return ranked(std::move(Headers));
292 }
293 } // namespace clang::include_cleaner
294