xref: /llvm-project/clang-tools-extra/include-cleaner/lib/FindHeaders.cpp (revision ec6c3448d31056db5d63d7aed3e9f207edb49321)
179431692SSam McCall //===--- FindHeaders.cpp --------------------------------------------------===//
279431692SSam McCall //
379431692SSam McCall // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
479431692SSam McCall // See https://llvm.org/LICENSE.txt for license information.
579431692SSam McCall // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
679431692SSam McCall //
779431692SSam McCall //===----------------------------------------------------------------------===//
879431692SSam McCall 
979431692SSam McCall #include "AnalysisInternal.h"
10749c6a70SKadir Cetinkaya #include "TypesInternal.h"
1179431692SSam McCall #include "clang-include-cleaner/Record.h"
12749c6a70SKadir Cetinkaya #include "clang-include-cleaner/Types.h"
133402b77dSKadir Cetinkaya #include "clang/AST/ASTContext.h"
14749c6a70SKadir Cetinkaya #include "clang/AST/Decl.h"
15749c6a70SKadir Cetinkaya #include "clang/AST/DeclBase.h"
167b3db807SHaojian Wu #include "clang/AST/DeclCXX.h"
173402b77dSKadir Cetinkaya #include "clang/Basic/Builtins.h"
18749c6a70SKadir Cetinkaya #include "clang/Basic/FileEntry.h"
19749c6a70SKadir Cetinkaya #include "clang/Basic/SourceLocation.h"
2079431692SSam McCall #include "clang/Basic/SourceManager.h"
21*ec6c3448Skadir çetinkaya #include "clang/Lex/Preprocessor.h"
22749c6a70SKadir Cetinkaya #include "clang/Tooling/Inclusions/StandardLibrary.h"
233402b77dSKadir Cetinkaya #include "llvm/ADT/ArrayRef.h"
24749c6a70SKadir Cetinkaya #include "llvm/ADT/STLExtras.h"
25749c6a70SKadir Cetinkaya #include "llvm/ADT/SmallVector.h"
26749c6a70SKadir Cetinkaya #include "llvm/ADT/StringRef.h"
27749c6a70SKadir Cetinkaya #include "llvm/Support/Casting.h"
28749c6a70SKadir Cetinkaya #include "llvm/Support/ErrorHandling.h"
293402b77dSKadir Cetinkaya #include <optional>
30c3ad4b76SViktoriia Bakalova #include <queue>
31c3ad4b76SViktoriia Bakalova #include <set>
32749c6a70SKadir Cetinkaya #include <utility>
3379431692SSam McCall 
3479431692SSam McCall namespace clang::include_cleaner {
35749c6a70SKadir Cetinkaya namespace {
36749c6a70SKadir Cetinkaya llvm::SmallVector<Hinted<Header>>
37749c6a70SKadir Cetinkaya applyHints(llvm::SmallVector<Hinted<Header>> Headers, Hints H) {
38749c6a70SKadir Cetinkaya   for (auto &Header : Headers)
39749c6a70SKadir Cetinkaya     Header.Hint |= H;
40749c6a70SKadir Cetinkaya   return Headers;
41749c6a70SKadir Cetinkaya }
4279431692SSam McCall 
43749c6a70SKadir Cetinkaya llvm::SmallVector<Header> ranked(llvm::SmallVector<Hinted<Header>> Headers) {
44749c6a70SKadir Cetinkaya   llvm::stable_sort(llvm::reverse(Headers),
45749c6a70SKadir Cetinkaya                     [](const Hinted<Header> &LHS, const Hinted<Header> &RHS) {
46749c6a70SKadir Cetinkaya                       return LHS < RHS;
47749c6a70SKadir Cetinkaya                     });
48749c6a70SKadir Cetinkaya   return llvm::SmallVector<Header>(Headers.begin(), Headers.end());
49749c6a70SKadir Cetinkaya }
50749c6a70SKadir Cetinkaya 
51749c6a70SKadir Cetinkaya // Return the basename from a verbatim header spelling, leaves only the file
52749c6a70SKadir Cetinkaya // name.
53749c6a70SKadir Cetinkaya llvm::StringRef basename(llvm::StringRef Header) {
54749c6a70SKadir Cetinkaya   Header = Header.trim("<>\"");
55eea232daSJan Svoboda   Header = llvm::sys::path::filename(Header);
56749c6a70SKadir Cetinkaya   // Drop everything after first `.` (dot).
57749c6a70SKadir Cetinkaya   // foo.h -> foo
58749c6a70SKadir Cetinkaya   // foo.cu.h -> foo
59749c6a70SKadir Cetinkaya   Header = Header.substr(0, Header.find('.'));
60749c6a70SKadir Cetinkaya   return Header;
61749c6a70SKadir Cetinkaya }
62749c6a70SKadir Cetinkaya 
63749c6a70SKadir Cetinkaya // Check if spelling of \p H matches \p DeclName.
64749c6a70SKadir Cetinkaya bool nameMatch(llvm::StringRef DeclName, Header H) {
65749c6a70SKadir Cetinkaya   switch (H.kind()) {
66749c6a70SKadir Cetinkaya   case Header::Physical:
6798e6deb6SJan Svoboda     return basename(H.physical().getName()).equals_insensitive(DeclName);
68749c6a70SKadir Cetinkaya   case Header::Standard:
69749c6a70SKadir Cetinkaya     return basename(H.standard().name()).equals_insensitive(DeclName);
70749c6a70SKadir Cetinkaya   case Header::Verbatim:
71749c6a70SKadir Cetinkaya     return basename(H.verbatim()).equals_insensitive(DeclName);
72749c6a70SKadir Cetinkaya   }
73749c6a70SKadir Cetinkaya   llvm_unreachable("unhandled Header kind!");
74749c6a70SKadir Cetinkaya }
75749c6a70SKadir Cetinkaya 
76749c6a70SKadir Cetinkaya llvm::StringRef symbolName(const Symbol &S) {
77749c6a70SKadir Cetinkaya   switch (S.kind()) {
78749c6a70SKadir Cetinkaya   case Symbol::Declaration:
79749c6a70SKadir Cetinkaya     // Unnamed decls like operators and anonymous structs won't get any name
80749c6a70SKadir Cetinkaya     // match.
81749c6a70SKadir Cetinkaya     if (const auto *ND = llvm::dyn_cast<NamedDecl>(&S.declaration()))
82749c6a70SKadir Cetinkaya       if (auto *II = ND->getIdentifier())
83749c6a70SKadir Cetinkaya         return II->getName();
84749c6a70SKadir Cetinkaya     return "";
85749c6a70SKadir Cetinkaya   case Symbol::Macro:
86749c6a70SKadir Cetinkaya     return S.macro().Name->getName();
87749c6a70SKadir Cetinkaya   }
88749c6a70SKadir Cetinkaya   llvm_unreachable("unhandled Symbol kind!");
89749c6a70SKadir Cetinkaya }
90749c6a70SKadir Cetinkaya 
91504aa8aeSHaojian Wu Hints isPublicHeader(const FileEntry *FE, const PragmaIncludes &PI) {
92504aa8aeSHaojian Wu   if (PI.isPrivate(FE) || !PI.isSelfContained(FE))
93504aa8aeSHaojian Wu     return Hints::None;
94504aa8aeSHaojian Wu   return Hints::PublicHeader;
95504aa8aeSHaojian Wu }
96504aa8aeSHaojian Wu 
97504aa8aeSHaojian Wu llvm::SmallVector<Hinted<Header>>
98504aa8aeSHaojian Wu hintedHeadersForStdHeaders(llvm::ArrayRef<tooling::stdlib::Header> Headers,
99504aa8aeSHaojian Wu                            const SourceManager &SM, const PragmaIncludes *PI) {
100504aa8aeSHaojian Wu   llvm::SmallVector<Hinted<Header>> Results;
101504aa8aeSHaojian Wu   for (const auto &H : Headers) {
1025933d265SKadir Cetinkaya     Results.emplace_back(H, Hints::PublicHeader | Hints::OriginHeader);
103504aa8aeSHaojian Wu     if (!PI)
104504aa8aeSHaojian Wu       continue;
10598e6deb6SJan Svoboda     for (FileEntryRef Export : PI->getExporters(H, SM.getFileManager()))
106504aa8aeSHaojian Wu       Results.emplace_back(Header(Export), isPublicHeader(Export, *PI));
107504aa8aeSHaojian Wu   }
108504aa8aeSHaojian Wu   // StandardLibrary returns headers in preference order, so only mark the
109504aa8aeSHaojian Wu   // first.
110504aa8aeSHaojian Wu   if (!Results.empty())
111504aa8aeSHaojian Wu     Results.front().Hint |= Hints::PreferredHeader;
112504aa8aeSHaojian Wu   return Results;
113504aa8aeSHaojian Wu }
114504aa8aeSHaojian Wu 
1153402b77dSKadir Cetinkaya // Symbol to header mapping for std::move and std::remove, based on number of
1163402b77dSKadir Cetinkaya // parameters.
1173402b77dSKadir Cetinkaya std::optional<tooling::stdlib::Header>
1183402b77dSKadir Cetinkaya headerForAmbiguousStdSymbol(const NamedDecl *ND) {
1193402b77dSKadir Cetinkaya   if (!ND->isInStdNamespace())
120504aa8aeSHaojian Wu     return {};
1217b3db807SHaojian Wu   if (auto* USD = llvm::dyn_cast<UsingShadowDecl>(ND))
1227b3db807SHaojian Wu     ND = USD->getTargetDecl();
1233402b77dSKadir Cetinkaya   const auto *FD = ND->getAsFunction();
124504aa8aeSHaojian Wu   if (!FD)
1253402b77dSKadir Cetinkaya     return std::nullopt;
1263402b77dSKadir Cetinkaya   llvm::StringRef FName = symbolName(*ND);
127504aa8aeSHaojian Wu   if (FName == "move") {
128504aa8aeSHaojian Wu     if (FD->getNumParams() == 1)
129504aa8aeSHaojian Wu       // move(T&& t)
1303402b77dSKadir Cetinkaya       return tooling::stdlib::Header::named("<utility>");
131d71adebbSViktoriia Bakalova     if (FD->getNumParams() == 3 || FD->getNumParams() == 4)
132504aa8aeSHaojian Wu       // move(InputIt first, InputIt last, OutputIt dest);
133d71adebbSViktoriia Bakalova       // move(ExecutionPolicy&& policy, ForwardIt1 first,
134d71adebbSViktoriia Bakalova       // ForwardIt1 last, ForwardIt2 d_first);
1353402b77dSKadir Cetinkaya       return tooling::stdlib::Header::named("<algorithm>");
136504aa8aeSHaojian Wu   } else if (FName == "remove") {
137504aa8aeSHaojian Wu     if (FD->getNumParams() == 1)
138504aa8aeSHaojian Wu       // remove(const char*);
1393402b77dSKadir Cetinkaya       return tooling::stdlib::Header::named("<cstdio>");
140504aa8aeSHaojian Wu     if (FD->getNumParams() == 3)
141504aa8aeSHaojian Wu       // remove(ForwardIt first, ForwardIt last, const T& value);
1423402b77dSKadir Cetinkaya       return tooling::stdlib::Header::named("<algorithm>");
143504aa8aeSHaojian Wu   }
1443402b77dSKadir Cetinkaya   return std::nullopt;
1453402b77dSKadir Cetinkaya }
1463402b77dSKadir Cetinkaya 
1473402b77dSKadir Cetinkaya // Special-case symbols without proper locations, like the ambiguous standard
1483402b77dSKadir Cetinkaya // library symbols (e.g. std::move) or builtin declarations.
1493402b77dSKadir Cetinkaya std::optional<llvm::SmallVector<Hinted<Header>>>
1503402b77dSKadir Cetinkaya headersForSpecialSymbol(const Symbol &S, const SourceManager &SM,
1513402b77dSKadir Cetinkaya                         const PragmaIncludes *PI) {
1523402b77dSKadir Cetinkaya   // Our special casing logic only deals with decls, so bail out early for
1533402b77dSKadir Cetinkaya   // macros.
1543402b77dSKadir Cetinkaya   if (S.kind() != Symbol::Declaration)
1553402b77dSKadir Cetinkaya     return std::nullopt;
1563402b77dSKadir Cetinkaya   const auto *ND = llvm::cast<NamedDecl>(&S.declaration());
1573402b77dSKadir Cetinkaya   // We map based on names, so again bail out early if there are no names.
1583402b77dSKadir Cetinkaya   if (!ND)
1593402b77dSKadir Cetinkaya     return std::nullopt;
1603402b77dSKadir Cetinkaya   auto *II = ND->getIdentifier();
1613402b77dSKadir Cetinkaya   if (!II)
1623402b77dSKadir Cetinkaya     return std::nullopt;
1633402b77dSKadir Cetinkaya 
1643402b77dSKadir Cetinkaya   // Check first for symbols that are part of our stdlib mapping. As we have
1653402b77dSKadir Cetinkaya   // header names for those.
1663402b77dSKadir Cetinkaya   if (auto Header = headerForAmbiguousStdSymbol(ND)) {
1673402b77dSKadir Cetinkaya     return applyHints(hintedHeadersForStdHeaders({*Header}, SM, PI),
168504aa8aeSHaojian Wu                       Hints::CompleteSymbol);
169504aa8aeSHaojian Wu   }
170504aa8aeSHaojian Wu 
1713402b77dSKadir Cetinkaya   // Now check for builtin symbols, we shouldn't suggest any headers for ones
1723402b77dSKadir Cetinkaya   // without any headers.
1733402b77dSKadir Cetinkaya   if (auto ID = II->getBuiltinID()) {
1743402b77dSKadir Cetinkaya     const char *BuiltinHeader =
1753402b77dSKadir Cetinkaya         ND->getASTContext().BuiltinInfo.getHeaderName(ID);
1763402b77dSKadir Cetinkaya     if (!BuiltinHeader)
1773402b77dSKadir Cetinkaya       return llvm::SmallVector<Hinted<Header>>{};
1783402b77dSKadir Cetinkaya     // FIXME: Use the header mapping for builtins with a known header.
1793402b77dSKadir Cetinkaya   }
1803402b77dSKadir Cetinkaya   return std::nullopt;
1813402b77dSKadir Cetinkaya }
1823402b77dSKadir Cetinkaya 
183749c6a70SKadir Cetinkaya } // namespace
184749c6a70SKadir Cetinkaya 
185749c6a70SKadir Cetinkaya llvm::SmallVector<Hinted<Header>> findHeaders(const SymbolLocation &Loc,
18679431692SSam McCall                                               const SourceManager &SM,
18726757732SSam McCall                                               const PragmaIncludes *PI) {
188749c6a70SKadir Cetinkaya   llvm::SmallVector<Hinted<Header>> Results;
18979431692SSam McCall   switch (Loc.kind()) {
19079431692SSam McCall   case SymbolLocation::Physical: {
19181c37391SViktoriia Bakalova     FileID FID = SM.getFileID(SM.getExpansionLoc(Loc.physical()));
19298e6deb6SJan Svoboda     OptionalFileEntryRef FE = SM.getFileEntryRefForID(FID);
193749c6a70SKadir Cetinkaya     if (!FE)
194749c6a70SKadir Cetinkaya       return {};
195749c6a70SKadir Cetinkaya     if (!PI)
19698e6deb6SJan Svoboda       return {{*FE, Hints::PublicHeader | Hints::OriginHeader}};
1975933d265SKadir Cetinkaya     bool IsOrigin = true;
19898e6deb6SJan Svoboda     std::queue<FileEntryRef> Exporters;
1990cf88851SHaojian Wu     while (FE) {
20098e6deb6SJan Svoboda       Results.emplace_back(*FE,
20198e6deb6SJan Svoboda                            isPublicHeader(*FE, *PI) |
2025933d265SKadir Cetinkaya                                (IsOrigin ? Hints::OriginHeader : Hints::None));
20398e6deb6SJan Svoboda       for (FileEntryRef Export : PI->getExporters(*FE, SM.getFileManager()))
204c3ad4b76SViktoriia Bakalova         Exporters.push(Export);
20526757732SSam McCall 
20698e6deb6SJan Svoboda       if (auto Verbatim = PI->getPublic(*FE); !Verbatim.empty()) {
207749c6a70SKadir Cetinkaya         Results.emplace_back(Verbatim,
208749c6a70SKadir Cetinkaya                              Hints::PublicHeader | Hints::PreferredHeader);
2090cf88851SHaojian Wu         break;
2100cf88851SHaojian Wu       }
21198e6deb6SJan Svoboda       if (PI->isSelfContained(*FE) || FID == SM.getMainFileID())
2120cf88851SHaojian Wu         break;
2130cf88851SHaojian Wu 
2140cf88851SHaojian Wu       // Walkup the include stack for non self-contained headers.
2150cf88851SHaojian Wu       FID = SM.getDecomposedIncludedLoc(FID).first;
21698e6deb6SJan Svoboda       FE = SM.getFileEntryRefForID(FID);
2175933d265SKadir Cetinkaya       IsOrigin = false;
2180cf88851SHaojian Wu     }
219c3ad4b76SViktoriia Bakalova     // Now traverse provider trees rooted at exporters.
220c3ad4b76SViktoriia Bakalova     // Note that we only traverse export edges, and ignore private -> public
221c3ad4b76SViktoriia Bakalova     // mappings, as those pragmas apply to exporter, and not the main provider
222c3ad4b76SViktoriia Bakalova     // being exported in this header.
223c3ad4b76SViktoriia Bakalova     std::set<const FileEntry *> SeenExports;
224c3ad4b76SViktoriia Bakalova     while (!Exporters.empty()) {
22598e6deb6SJan Svoboda       FileEntryRef Export = Exporters.front();
226c3ad4b76SViktoriia Bakalova       Exporters.pop();
227c3ad4b76SViktoriia Bakalova       if (!SeenExports.insert(Export).second) // In case of cyclic exports
228c3ad4b76SViktoriia Bakalova         continue;
229c3ad4b76SViktoriia Bakalova       Results.emplace_back(Export, isPublicHeader(Export, *PI));
23098e6deb6SJan Svoboda       for (FileEntryRef Export : PI->getExporters(Export, SM.getFileManager()))
231c3ad4b76SViktoriia Bakalova         Exporters.push(Export);
232c3ad4b76SViktoriia Bakalova     }
23379431692SSam McCall     return Results;
23479431692SSam McCall   }
23579431692SSam McCall   case SymbolLocation::Standard: {
236504aa8aeSHaojian Wu     return hintedHeadersForStdHeaders(Loc.standard().headers(), SM, PI);
23779431692SSam McCall   }
23879431692SSam McCall   }
23979431692SSam McCall   llvm_unreachable("unhandled SymbolLocation kind!");
24079431692SSam McCall }
24179431692SSam McCall 
242749c6a70SKadir Cetinkaya llvm::SmallVector<Header> headersForSymbol(const Symbol &S,
243*ec6c3448Skadir çetinkaya                                            const Preprocessor &PP,
244749c6a70SKadir Cetinkaya                                            const PragmaIncludes *PI) {
245*ec6c3448Skadir çetinkaya   const auto &SM = PP.getSourceManager();
246749c6a70SKadir Cetinkaya   // Get headers for all the locations providing Symbol. Same header can be
247749c6a70SKadir Cetinkaya   // reached through different traversals, deduplicate those into a single
248749c6a70SKadir Cetinkaya   // Header by merging their hints.
2493402b77dSKadir Cetinkaya   llvm::SmallVector<Hinted<Header>> Headers;
2503402b77dSKadir Cetinkaya   if (auto SpecialHeaders = headersForSpecialSymbol(S, SM, PI)) {
2513402b77dSKadir Cetinkaya     Headers = std::move(*SpecialHeaders);
2523402b77dSKadir Cetinkaya   } else {
253*ec6c3448Skadir çetinkaya     for (auto &Loc : locateSymbol(S, PP.getLangOpts()))
254749c6a70SKadir Cetinkaya       Headers.append(applyHints(findHeaders(Loc, SM, PI), Loc.Hint));
2553402b77dSKadir Cetinkaya   }
256749c6a70SKadir Cetinkaya   // If two Headers probably refer to the same file (e.g. Verbatim(foo.h) and
257749c6a70SKadir Cetinkaya   // Physical(/path/to/foo.h), we won't deduplicate them or merge their hints
258749c6a70SKadir Cetinkaya   llvm::stable_sort(
259749c6a70SKadir Cetinkaya       Headers, [](const Hinted<Header> &LHS, const Hinted<Header> &RHS) {
260749c6a70SKadir Cetinkaya         return static_cast<Header>(LHS) < static_cast<Header>(RHS);
261749c6a70SKadir Cetinkaya       });
262749c6a70SKadir Cetinkaya   auto *Write = Headers.begin();
263749c6a70SKadir Cetinkaya   for (auto *Read = Headers.begin(); Read != Headers.end(); ++Write) {
264749c6a70SKadir Cetinkaya     *Write = *Read++;
265749c6a70SKadir Cetinkaya     while (Read != Headers.end() &&
266749c6a70SKadir Cetinkaya            static_cast<Header>(*Write) == static_cast<Header>(*Read)) {
267749c6a70SKadir Cetinkaya       Write->Hint |= Read->Hint;
268749c6a70SKadir Cetinkaya       ++Read;
269749c6a70SKadir Cetinkaya     }
270749c6a70SKadir Cetinkaya   }
271749c6a70SKadir Cetinkaya   Headers.erase(Write, Headers.end());
272749c6a70SKadir Cetinkaya 
273749c6a70SKadir Cetinkaya   // Add name match hints to deduplicated providers.
274749c6a70SKadir Cetinkaya   llvm::StringRef SymbolName = symbolName(S);
275749c6a70SKadir Cetinkaya   for (auto &H : Headers) {
27648967c6eSHaojian Wu     // Don't apply name match hints to standard headers as the standard headers
27748967c6eSHaojian Wu     // are already ranked in the stdlib mapping.
27848967c6eSHaojian Wu     if (H.kind() == Header::Standard)
27948967c6eSHaojian Wu       continue;
280599adf30Skadir çetinkaya     // Don't apply name match hints to exporting headers. As they usually have
281599adf30Skadir çetinkaya     // names similar to the original header, e.g. foo_wrapper/foo.h vs
282599adf30Skadir çetinkaya     // foo/foo.h, but shouldn't be preferred (unless marked as the public
283599adf30Skadir çetinkaya     // interface).
284599adf30Skadir çetinkaya     if ((H.Hint & Hints::OriginHeader) == Hints::None)
285599adf30Skadir çetinkaya       continue;
286749c6a70SKadir Cetinkaya     if (nameMatch(SymbolName, H))
287749c6a70SKadir Cetinkaya       H.Hint |= Hints::PreferredHeader;
288749c6a70SKadir Cetinkaya   }
289749c6a70SKadir Cetinkaya 
290749c6a70SKadir Cetinkaya   // FIXME: Introduce a MainFile header kind or signal and boost it.
291749c6a70SKadir Cetinkaya   return ranked(std::move(Headers));
292749c6a70SKadir Cetinkaya }
29379431692SSam McCall } // namespace clang::include_cleaner
294