xref: /llvm-project/clang-tools-extra/clang-tidy/misc/NoRecursionCheck.cpp (revision 7d2ea6c422d3f5712b7253407005e1a465a76946)
149bffa5fSRoman Lebedev //===--- NoRecursionCheck.cpp - clang-tidy --------------------------------===//
249bffa5fSRoman Lebedev //
349bffa5fSRoman Lebedev // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
449bffa5fSRoman Lebedev // See https://llvm.org/LICENSE.txt for license information.
549bffa5fSRoman Lebedev // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
649bffa5fSRoman Lebedev //
749bffa5fSRoman Lebedev //===----------------------------------------------------------------------===//
849bffa5fSRoman Lebedev 
949bffa5fSRoman Lebedev #include "NoRecursionCheck.h"
1049bffa5fSRoman Lebedev #include "clang/AST/ASTContext.h"
1149bffa5fSRoman Lebedev #include "clang/ASTMatchers/ASTMatchFinder.h"
1249bffa5fSRoman Lebedev #include "clang/Analysis/CallGraph.h"
1349bffa5fSRoman Lebedev #include "llvm/ADT/DenseMapInfo.h"
1449bffa5fSRoman Lebedev #include "llvm/ADT/SCCIterator.h"
1549bffa5fSRoman Lebedev 
1649bffa5fSRoman Lebedev using namespace clang::ast_matchers;
1749bffa5fSRoman Lebedev 
18*7d2ea6c4SCarlos Galvez namespace clang::tidy::misc {
1949bffa5fSRoman Lebedev 
2049bffa5fSRoman Lebedev namespace {
2149bffa5fSRoman Lebedev 
2249bffa5fSRoman Lebedev /// Much like SmallSet, with two differences:
2349bffa5fSRoman Lebedev /// 1. It can *only* be constructed from an ArrayRef<>. If the element count
2449bffa5fSRoman Lebedev ///    is small, there is no copy and said storage *must* outlive us.
2549bffa5fSRoman Lebedev /// 2. it is immutable, the way it was constructed it will stay.
2649bffa5fSRoman Lebedev template <typename T, unsigned SmallSize> class ImmutableSmallSet {
2749bffa5fSRoman Lebedev   ArrayRef<T> Vector;
2849bffa5fSRoman Lebedev   llvm::DenseSet<T> Set;
2949bffa5fSRoman Lebedev 
3049bffa5fSRoman Lebedev   static_assert(SmallSize <= 32, "N should be small");
3149bffa5fSRoman Lebedev 
isSmall() const3249bffa5fSRoman Lebedev   bool isSmall() const { return Set.empty(); }
3349bffa5fSRoman Lebedev 
3449bffa5fSRoman Lebedev public:
3549bffa5fSRoman Lebedev   using size_type = size_t;
3649bffa5fSRoman Lebedev 
3749bffa5fSRoman Lebedev   ImmutableSmallSet() = delete;
3849bffa5fSRoman Lebedev   ImmutableSmallSet(const ImmutableSmallSet &) = delete;
3949bffa5fSRoman Lebedev   ImmutableSmallSet(ImmutableSmallSet &&) = delete;
4049bffa5fSRoman Lebedev   T &operator=(const ImmutableSmallSet &) = delete;
4149bffa5fSRoman Lebedev   T &operator=(ImmutableSmallSet &&) = delete;
4249bffa5fSRoman Lebedev 
4349bffa5fSRoman Lebedev   // WARNING: Storage *must* outlive us if we decide that the size is small.
ImmutableSmallSet(ArrayRef<T> Storage)4449bffa5fSRoman Lebedev   ImmutableSmallSet(ArrayRef<T> Storage) {
4549bffa5fSRoman Lebedev     // Is size small-enough to just keep using the existing storage?
4649bffa5fSRoman Lebedev     if (Storage.size() <= SmallSize) {
4749bffa5fSRoman Lebedev       Vector = Storage;
4849bffa5fSRoman Lebedev       return;
4949bffa5fSRoman Lebedev     }
5049bffa5fSRoman Lebedev 
5149bffa5fSRoman Lebedev     // We've decided that it isn't performant to keep using vector.
5249bffa5fSRoman Lebedev     // Let's migrate the data into Set.
5349bffa5fSRoman Lebedev     Set.reserve(Storage.size());
5449bffa5fSRoman Lebedev     Set.insert(Storage.begin(), Storage.end());
5549bffa5fSRoman Lebedev   }
5649bffa5fSRoman Lebedev 
5749bffa5fSRoman Lebedev   /// count - Return 1 if the element is in the set, 0 otherwise.
count(const T & V) const5849bffa5fSRoman Lebedev   size_type count(const T &V) const {
5949bffa5fSRoman Lebedev     if (isSmall()) {
6049bffa5fSRoman Lebedev       // Since the collection is small, just do a linear search.
617542e721SKazu Hirata       return llvm::is_contained(Vector, V) ? 1 : 0;
6249bffa5fSRoman Lebedev     }
6349bffa5fSRoman Lebedev 
6449bffa5fSRoman Lebedev     return Set.count(V);
6549bffa5fSRoman Lebedev   }
6649bffa5fSRoman Lebedev };
6749bffa5fSRoman Lebedev 
6849bffa5fSRoman Lebedev /// Much like SmallSetVector, but with one difference:
6949bffa5fSRoman Lebedev /// when the size is \p SmallSize or less, when checking whether an element is
7049bffa5fSRoman Lebedev /// already in the set or not, we perform linear search over the vector,
7149bffa5fSRoman Lebedev /// but if the size is larger than \p SmallSize, we look in set.
7249bffa5fSRoman Lebedev /// FIXME: upstream this into SetVector/SmallSetVector itself.
7349bffa5fSRoman Lebedev template <typename T, unsigned SmallSize> class SmartSmallSetVector {
7449bffa5fSRoman Lebedev public:
7549bffa5fSRoman Lebedev   using size_type = size_t;
7649bffa5fSRoman Lebedev 
7749bffa5fSRoman Lebedev private:
7849bffa5fSRoman Lebedev   SmallVector<T, SmallSize> Vector;
7949bffa5fSRoman Lebedev   llvm::DenseSet<T> Set;
8049bffa5fSRoman Lebedev 
8149bffa5fSRoman Lebedev   static_assert(SmallSize <= 32, "N should be small");
8249bffa5fSRoman Lebedev 
8349bffa5fSRoman Lebedev   // Are we still using Vector for uniqness tracking?
isSmall() const8449bffa5fSRoman Lebedev   bool isSmall() const { return Set.empty(); }
8549bffa5fSRoman Lebedev 
8649bffa5fSRoman Lebedev   // Will one more entry cause Vector to switch away from small-size storage?
entiretyOfVectorSmallSizeIsOccupied() const8749bffa5fSRoman Lebedev   bool entiretyOfVectorSmallSizeIsOccupied() const {
8849bffa5fSRoman Lebedev     assert(isSmall() && Vector.size() <= SmallSize &&
8949bffa5fSRoman Lebedev            "Shouldn't ask if we have already [should have] migrated into Set.");
9049bffa5fSRoman Lebedev     return Vector.size() == SmallSize;
9149bffa5fSRoman Lebedev   }
9249bffa5fSRoman Lebedev 
populateSet()9349bffa5fSRoman Lebedev   void populateSet() {
9449bffa5fSRoman Lebedev     assert(Set.empty() && "Should not have already utilized the Set.");
9549bffa5fSRoman Lebedev     // Magical growth factor prediction - to how many elements do we expect to
9649bffa5fSRoman Lebedev     // sanely grow after switching away from small-size storage?
9749bffa5fSRoman Lebedev     const size_t NewMaxElts = 4 * Vector.size();
9849bffa5fSRoman Lebedev     Vector.reserve(NewMaxElts);
9949bffa5fSRoman Lebedev     Set.reserve(NewMaxElts);
10049bffa5fSRoman Lebedev     Set.insert(Vector.begin(), Vector.end());
10149bffa5fSRoman Lebedev   }
10249bffa5fSRoman Lebedev 
10349bffa5fSRoman Lebedev   /// count - Return 1 if the element is in the set, 0 otherwise.
count(const T & V) const10449bffa5fSRoman Lebedev   size_type count(const T &V) const {
10549bffa5fSRoman Lebedev     if (isSmall()) {
10649bffa5fSRoman Lebedev       // Since the collection is small, just do a linear search.
1077542e721SKazu Hirata       return llvm::is_contained(Vector, V) ? 1 : 0;
10849bffa5fSRoman Lebedev     }
10949bffa5fSRoman Lebedev     // Look-up in the Set.
11049bffa5fSRoman Lebedev     return Set.count(V);
11149bffa5fSRoman Lebedev   }
11249bffa5fSRoman Lebedev 
setInsert(const T & V)11349bffa5fSRoman Lebedev   bool setInsert(const T &V) {
11449bffa5fSRoman Lebedev     if (count(V) != 0)
11549bffa5fSRoman Lebedev       return false; // Already exists.
11649bffa5fSRoman Lebedev     // Does not exist, Can/need to record it.
11749bffa5fSRoman Lebedev     if (isSmall()) { // Are we still using Vector for uniqness tracking?
11849bffa5fSRoman Lebedev       // Will one more entry fit within small-sized Vector?
11949bffa5fSRoman Lebedev       if (!entiretyOfVectorSmallSizeIsOccupied())
12049bffa5fSRoman Lebedev         return true; // We'll insert into vector right afterwards anyway.
12149bffa5fSRoman Lebedev       // Time to switch to Set.
12249bffa5fSRoman Lebedev       populateSet();
12349bffa5fSRoman Lebedev     }
12449bffa5fSRoman Lebedev     // Set time!
12549bffa5fSRoman Lebedev     // Note that this must be after `populateSet()` might have been called.
12649bffa5fSRoman Lebedev     bool SetInsertionSucceeded = Set.insert(V).second;
12749bffa5fSRoman Lebedev     (void)SetInsertionSucceeded;
12849bffa5fSRoman Lebedev     assert(SetInsertionSucceeded && "We did check that no such value existed");
12949bffa5fSRoman Lebedev     return true;
13049bffa5fSRoman Lebedev   }
13149bffa5fSRoman Lebedev 
13249bffa5fSRoman Lebedev public:
13349bffa5fSRoman Lebedev   /// Insert a new element into the SmartSmallSetVector.
13449bffa5fSRoman Lebedev   /// \returns true if the element was inserted into the SmartSmallSetVector.
insert(const T & X)13549bffa5fSRoman Lebedev   bool insert(const T &X) {
136ab2d3ce4SAlexander Kornienko     bool Result = setInsert(X);
137ab2d3ce4SAlexander Kornienko     if (Result)
13849bffa5fSRoman Lebedev       Vector.push_back(X);
139ab2d3ce4SAlexander Kornienko     return Result;
14049bffa5fSRoman Lebedev   }
14149bffa5fSRoman Lebedev 
14249bffa5fSRoman Lebedev   /// Clear the SmartSmallSetVector and return the underlying vector.
takeVector()14349bffa5fSRoman Lebedev   decltype(Vector) takeVector() {
14449bffa5fSRoman Lebedev     Set.clear();
14549bffa5fSRoman Lebedev     return std::move(Vector);
14649bffa5fSRoman Lebedev   }
14749bffa5fSRoman Lebedev };
14849bffa5fSRoman Lebedev 
14949bffa5fSRoman Lebedev constexpr unsigned SmallCallStackSize = 16;
15049bffa5fSRoman Lebedev constexpr unsigned SmallSCCSize = 32;
15149bffa5fSRoman Lebedev 
15249bffa5fSRoman Lebedev using CallStackTy =
15349bffa5fSRoman Lebedev     llvm::SmallVector<CallGraphNode::CallRecord, SmallCallStackSize>;
15449bffa5fSRoman Lebedev 
15549bffa5fSRoman Lebedev // In given SCC, find *some* call stack that will be cyclic.
15649bffa5fSRoman Lebedev // This will only find *one* such stack, it might not be the smallest one,
15749bffa5fSRoman Lebedev // and there may be other loops.
pathfindSomeCycle(ArrayRef<CallGraphNode * > SCC)158ab2d3ce4SAlexander Kornienko CallStackTy pathfindSomeCycle(ArrayRef<CallGraphNode *> SCC) {
15949bffa5fSRoman Lebedev   // We'll need to be able to performantly look up whether some CallGraphNode
16049bffa5fSRoman Lebedev   // is in SCC or not, so cache all the SCC elements in a set.
16149bffa5fSRoman Lebedev   const ImmutableSmallSet<CallGraphNode *, SmallSCCSize> SCCElts(SCC);
16249bffa5fSRoman Lebedev 
16349bffa5fSRoman Lebedev   // Is node N part if the current SCC?
16449bffa5fSRoman Lebedev   auto NodeIsPartOfSCC = [&SCCElts](CallGraphNode *N) {
16549bffa5fSRoman Lebedev     return SCCElts.count(N) != 0;
16649bffa5fSRoman Lebedev   };
16749bffa5fSRoman Lebedev 
16849bffa5fSRoman Lebedev   // Track the call stack that will cause a cycle.
16949bffa5fSRoman Lebedev   SmartSmallSetVector<CallGraphNode::CallRecord, SmallCallStackSize>
17049bffa5fSRoman Lebedev       CallStackSet;
17149bffa5fSRoman Lebedev 
172ade0662cSSalman Javed   // Arbitrarily take the first element of SCC as entry point.
17349bffa5fSRoman Lebedev   CallGraphNode::CallRecord EntryNode(SCC.front(), /*CallExpr=*/nullptr);
17449bffa5fSRoman Lebedev   // Continue recursing into subsequent callees that are part of this SCC,
17549bffa5fSRoman Lebedev   // and are thus known to be part of the call graph loop, until loop forms.
17649bffa5fSRoman Lebedev   CallGraphNode::CallRecord *Node = &EntryNode;
17749bffa5fSRoman Lebedev   while (true) {
17849bffa5fSRoman Lebedev     // Did we see this node before?
17949bffa5fSRoman Lebedev     if (!CallStackSet.insert(*Node))
18049bffa5fSRoman Lebedev       break; // Cycle completed! Note that didn't insert the node into stack!
18149bffa5fSRoman Lebedev     // Else, perform depth-first traversal: out of all callees, pick first one
18249bffa5fSRoman Lebedev     // that is part of this SCC. This is not guaranteed to yield shortest cycle.
18349bffa5fSRoman Lebedev     Node = llvm::find_if(Node->Callee->callees(), NodeIsPartOfSCC);
18449bffa5fSRoman Lebedev   }
18549bffa5fSRoman Lebedev 
18649bffa5fSRoman Lebedev   // Note that we failed to insert the last node, that completes the cycle.
18749bffa5fSRoman Lebedev   // But we really want to have it. So insert it manually into stack only.
18849bffa5fSRoman Lebedev   CallStackTy CallStack = CallStackSet.takeVector();
18949bffa5fSRoman Lebedev   CallStack.emplace_back(*Node);
19049bffa5fSRoman Lebedev 
19149bffa5fSRoman Lebedev   return CallStack;
19249bffa5fSRoman Lebedev }
19349bffa5fSRoman Lebedev 
19449bffa5fSRoman Lebedev } // namespace
19549bffa5fSRoman Lebedev 
registerMatchers(MatchFinder * Finder)19649bffa5fSRoman Lebedev void NoRecursionCheck::registerMatchers(MatchFinder *Finder) {
19749bffa5fSRoman Lebedev   Finder->addMatcher(translationUnitDecl().bind("TUDecl"), this);
19849bffa5fSRoman Lebedev }
19949bffa5fSRoman Lebedev 
handleSCC(ArrayRef<CallGraphNode * > SCC)20049bffa5fSRoman Lebedev void NoRecursionCheck::handleSCC(ArrayRef<CallGraphNode *> SCC) {
20149bffa5fSRoman Lebedev   assert(!SCC.empty() && "Empty SCC does not make sense.");
20249bffa5fSRoman Lebedev 
203dd5571d5SKazuaki Ishizaki   // First of all, call out every strongly connected function.
20449bffa5fSRoman Lebedev   for (CallGraphNode *N : SCC) {
205c8f9e526SRoman Lebedev     FunctionDecl *D = N->getDefinition();
206c8f9e526SRoman Lebedev     diag(D->getLocation(), "function %0 is within a recursive call chain") << D;
20749bffa5fSRoman Lebedev   }
20849bffa5fSRoman Lebedev 
20949bffa5fSRoman Lebedev   // Now, SCC only tells us about strongly connected function declarations in
21049bffa5fSRoman Lebedev   // the call graph. It doesn't *really* tell us about the cycles they form.
21149bffa5fSRoman Lebedev   // And there may be more than one cycle in SCC.
21249bffa5fSRoman Lebedev   // So let's form a call stack that eventually exposes *some* cycle.
213ab2d3ce4SAlexander Kornienko   const CallStackTy EventuallyCyclicCallStack = pathfindSomeCycle(SCC);
21449bffa5fSRoman Lebedev   assert(!EventuallyCyclicCallStack.empty() && "We should've found the cycle");
21549bffa5fSRoman Lebedev 
21649bffa5fSRoman Lebedev   // While last node of the call stack does cause a loop, due to the way we
217dd5571d5SKazuaki Ishizaki   // pathfind the cycle, the loop does not necessarily begin at the first node
21849bffa5fSRoman Lebedev   // of the call stack, so drop front nodes of the call stack until it does.
21949bffa5fSRoman Lebedev   const auto CyclicCallStack =
22049bffa5fSRoman Lebedev       ArrayRef<CallGraphNode::CallRecord>(EventuallyCyclicCallStack)
22149bffa5fSRoman Lebedev           .drop_until([LastNode = EventuallyCyclicCallStack.back()](
22249bffa5fSRoman Lebedev                           CallGraphNode::CallRecord FrontNode) {
22349bffa5fSRoman Lebedev             return FrontNode == LastNode;
22449bffa5fSRoman Lebedev           });
22549bffa5fSRoman Lebedev   assert(CyclicCallStack.size() >= 2 && "Cycle requires at least 2 frames");
22649bffa5fSRoman Lebedev 
22749bffa5fSRoman Lebedev   // Which function we decided to be the entry point that lead to the recursion?
228c8f9e526SRoman Lebedev   FunctionDecl *CycleEntryFn = CyclicCallStack.front().Callee->getDefinition();
22949bffa5fSRoman Lebedev   // And now, for ease of understanding, let's print the call sequence that
23049bffa5fSRoman Lebedev   // forms the cycle in question.
23149bffa5fSRoman Lebedev   diag(CycleEntryFn->getLocation(),
23249bffa5fSRoman Lebedev        "example recursive call chain, starting from function %0",
23349bffa5fSRoman Lebedev        DiagnosticIDs::Note)
234c8f9e526SRoman Lebedev       << CycleEntryFn;
23549bffa5fSRoman Lebedev   for (int CurFrame = 1, NumFrames = CyclicCallStack.size();
23649bffa5fSRoman Lebedev        CurFrame != NumFrames; ++CurFrame) {
23749bffa5fSRoman Lebedev     CallGraphNode::CallRecord PrevNode = CyclicCallStack[CurFrame - 1];
23849bffa5fSRoman Lebedev     CallGraphNode::CallRecord CurrNode = CyclicCallStack[CurFrame];
23949bffa5fSRoman Lebedev 
24049bffa5fSRoman Lebedev     Decl *PrevDecl = PrevNode.Callee->getDecl();
24149bffa5fSRoman Lebedev     Decl *CurrDecl = CurrNode.Callee->getDecl();
24249bffa5fSRoman Lebedev 
24349bffa5fSRoman Lebedev     diag(CurrNode.CallExpr->getBeginLoc(),
24449bffa5fSRoman Lebedev          "Frame #%0: function %1 calls function %2 here:", DiagnosticIDs::Note)
24549bffa5fSRoman Lebedev         << CurFrame << cast<NamedDecl>(PrevDecl) << cast<NamedDecl>(CurrDecl);
24649bffa5fSRoman Lebedev   }
24749bffa5fSRoman Lebedev 
24849bffa5fSRoman Lebedev   diag(CyclicCallStack.back().CallExpr->getBeginLoc(),
24949bffa5fSRoman Lebedev        "... which was the starting point of the recursive call chain; there "
25049bffa5fSRoman Lebedev        "may be other cycles",
25149bffa5fSRoman Lebedev        DiagnosticIDs::Note);
25249bffa5fSRoman Lebedev }
25349bffa5fSRoman Lebedev 
check(const MatchFinder::MatchResult & Result)25449bffa5fSRoman Lebedev void NoRecursionCheck::check(const MatchFinder::MatchResult &Result) {
25549bffa5fSRoman Lebedev   // Build call graph for the entire translation unit.
25649bffa5fSRoman Lebedev   const auto *TU = Result.Nodes.getNodeAs<TranslationUnitDecl>("TUDecl");
25749bffa5fSRoman Lebedev   CallGraph CG;
25849bffa5fSRoman Lebedev   CG.addToCallGraph(const_cast<TranslationUnitDecl *>(TU));
25949bffa5fSRoman Lebedev 
26049bffa5fSRoman Lebedev   // Look for cycles in call graph,
261dd5571d5SKazuaki Ishizaki   // by looking for Strongly Connected Components (SCC's)
26249bffa5fSRoman Lebedev   for (llvm::scc_iterator<CallGraph *> SCCI = llvm::scc_begin(&CG),
26349bffa5fSRoman Lebedev                                        SCCE = llvm::scc_end(&CG);
26449bffa5fSRoman Lebedev        SCCI != SCCE; ++SCCI) {
26521390eabSStefanos Baziotis     if (!SCCI.hasCycle()) // We only care about cycles, not standalone nodes.
26649bffa5fSRoman Lebedev       continue;
26749bffa5fSRoman Lebedev     handleSCC(*SCCI);
26849bffa5fSRoman Lebedev   }
26949bffa5fSRoman Lebedev }
27049bffa5fSRoman Lebedev 
271*7d2ea6c4SCarlos Galvez } // namespace clang::tidy::misc
272