xref: /llvm-project/clang-tools-extra/clang-tidy/utils/ExprSequence.cpp (revision 915372a8db7a8d7a1af19cc9ec6ccb5a0d592d1f)
1 //===---------- ExprSequence.cpp - clang-tidy -----------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "ExprSequence.h"
10 #include "clang/AST/ParentMapContext.h"
11 #include "llvm/ADT/SmallVector.h"
12 #include <optional>
13 
14 namespace clang::tidy::utils {
15 
16 // Returns the Stmt nodes that are parents of 'S', skipping any potential
17 // intermediate non-Stmt nodes.
18 //
19 // In almost all cases, this function returns a single parent or no parents at
20 // all.
21 //
22 // The case that a Stmt has multiple parents is rare but does actually occur in
23 // the parts of the AST that we're interested in. Specifically, InitListExpr
24 // nodes cause ASTContext::getParent() to return multiple parents for certain
25 // nodes in their subtree because RecursiveASTVisitor visits both the syntactic
26 // and semantic forms of InitListExpr, and the parent-child relationships are
27 // different between the two forms.
getParentStmts(const Stmt * S,ASTContext * Context)28 static SmallVector<const Stmt *, 1> getParentStmts(const Stmt *S,
29                                                    ASTContext *Context) {
30   SmallVector<const Stmt *, 1> Result;
31 
32   TraversalKindScope RAII(*Context, TK_AsIs);
33   DynTypedNodeList Parents = Context->getParents(*S);
34 
35   SmallVector<DynTypedNode, 1> NodesToProcess(Parents.begin(), Parents.end());
36 
37   while (!NodesToProcess.empty()) {
38     DynTypedNode Node = NodesToProcess.back();
39     NodesToProcess.pop_back();
40 
41     if (const auto *S = Node.get<Stmt>()) {
42       Result.push_back(S);
43     } else {
44       Parents = Context->getParents(Node);
45       NodesToProcess.append(Parents.begin(), Parents.end());
46     }
47   }
48 
49   return Result;
50 }
51 
52 namespace {
53 
isDescendantOrEqual(const Stmt * Descendant,const Stmt * Ancestor,ASTContext * Context)54 bool isDescendantOrEqual(const Stmt *Descendant, const Stmt *Ancestor,
55                          ASTContext *Context) {
56   if (Descendant == Ancestor)
57     return true;
58   return llvm::any_of(getParentStmts(Descendant, Context),
59                       [Ancestor, Context](const Stmt *Parent) {
60                         return isDescendantOrEqual(Parent, Ancestor, Context);
61                       });
62 }
63 
isDescendantOfArgs(const Stmt * Descendant,const CallExpr * Call,ASTContext * Context)64 bool isDescendantOfArgs(const Stmt *Descendant, const CallExpr *Call,
65                         ASTContext *Context) {
66   return llvm::any_of(Call->arguments(),
67                       [Descendant, Context](const Expr *Arg) {
68                         return isDescendantOrEqual(Descendant, Arg, Context);
69                       });
70 }
71 
72 llvm::SmallVector<const InitListExpr *>
getAllInitListForms(const InitListExpr * InitList)73 getAllInitListForms(const InitListExpr *InitList) {
74   llvm::SmallVector<const InitListExpr *> result = {InitList};
75   if (const InitListExpr *AltForm = InitList->getSyntacticForm())
76     result.push_back(AltForm);
77   if (const InitListExpr *AltForm = InitList->getSemanticForm())
78     result.push_back(AltForm);
79   return result;
80 }
81 
82 } // namespace
83 
ExprSequence(const CFG * TheCFG,const Stmt * Root,ASTContext * TheContext)84 ExprSequence::ExprSequence(const CFG *TheCFG, const Stmt *Root,
85                            ASTContext *TheContext)
86     : Context(TheContext), Root(Root) {
87   for (const auto &SyntheticStmt : TheCFG->synthetic_stmts()) {
88     SyntheticStmtSourceMap[SyntheticStmt.first] = SyntheticStmt.second;
89   }
90 }
91 
inSequence(const Stmt * Before,const Stmt * After) const92 bool ExprSequence::inSequence(const Stmt *Before, const Stmt *After) const {
93   Before = resolveSyntheticStmt(Before);
94   After = resolveSyntheticStmt(After);
95 
96   // If 'After' is in the subtree of the siblings that follow 'Before' in the
97   // chain of successors, we know that 'After' is sequenced after 'Before'.
98   for (const Stmt *Successor = getSequenceSuccessor(Before); Successor;
99        Successor = getSequenceSuccessor(Successor)) {
100     if (isDescendantOrEqual(After, Successor, Context))
101       return true;
102   }
103 
104   SmallVector<const Stmt *, 1> BeforeParents = getParentStmts(Before, Context);
105 
106   // Since C++17, the callee of a call expression is guaranteed to be sequenced
107   // before all of the arguments.
108   // We handle this as a special case rather than using the general
109   // `getSequenceSuccessor` logic above because the callee expression doesn't
110   // have an unambiguous successor; the order in which arguments are evaluated
111   // is indeterminate.
112   for (const Stmt *Parent : BeforeParents) {
113     // Special case: If the callee is a `MemberExpr` with a `DeclRefExpr` as its
114     // base, we consider it to be sequenced _after_ the arguments. This is
115     // because the variable referenced in the base will only actually be
116     // accessed when the call happens, i.e. once all of the arguments have been
117     // evaluated. This has no basis in the C++ standard, but it reflects actual
118     // behavior that is relevant to a use-after-move scenario:
119     //
120     // ```
121     // a.bar(consumeA(std::move(a));
122     // ```
123     //
124     // In this example, we end up accessing `a` after it has been moved from,
125     // even though nominally the callee `a.bar` is evaluated before the argument
126     // `consumeA(std::move(a))`. Note that this is not specific to C++17, so
127     // we implement this logic unconditionally.
128     if (const auto *Call = dyn_cast<CXXMemberCallExpr>(Parent)) {
129       if (is_contained(Call->arguments(), Before) &&
130           isa<DeclRefExpr>(
131               Call->getImplicitObjectArgument()->IgnoreParenImpCasts()) &&
132           isDescendantOrEqual(After, Call->getImplicitObjectArgument(),
133                               Context))
134         return true;
135 
136       // We need this additional early exit so that we don't fall through to the
137       // more general logic below.
138       if (const auto *Member = dyn_cast<MemberExpr>(Before);
139           Member && Call->getCallee() == Member &&
140           isa<DeclRefExpr>(Member->getBase()->IgnoreParenImpCasts()) &&
141           isDescendantOfArgs(After, Call, Context))
142         return false;
143     }
144 
145     if (!Context->getLangOpts().CPlusPlus17)
146       continue;
147 
148     if (const auto *Call = dyn_cast<CallExpr>(Parent);
149         Call && Call->getCallee() == Before &&
150         isDescendantOfArgs(After, Call, Context))
151       return true;
152   }
153 
154   // If 'After' is a parent of 'Before' or is sequenced after one of these
155   // parents, we know that it is sequenced after 'Before'.
156   for (const Stmt *Parent : BeforeParents) {
157     if (Parent == After || inSequence(Parent, After))
158       return true;
159   }
160 
161   return false;
162 }
163 
potentiallyAfter(const Stmt * After,const Stmt * Before) const164 bool ExprSequence::potentiallyAfter(const Stmt *After,
165                                     const Stmt *Before) const {
166   return !inSequence(After, Before);
167 }
168 
getSequenceSuccessor(const Stmt * S) const169 const Stmt *ExprSequence::getSequenceSuccessor(const Stmt *S) const {
170   for (const Stmt *Parent : getParentStmts(S, Context)) {
171     // If a statement has multiple parents, make sure we're using the parent
172     // that lies within the sub-tree under Root.
173     if (!isDescendantOrEqual(Parent, Root, Context))
174       continue;
175 
176     if (const auto *BO = dyn_cast<BinaryOperator>(Parent)) {
177       // Comma operator: Right-hand side is sequenced after the left-hand side.
178       if (BO->getLHS() == S && BO->getOpcode() == BO_Comma)
179         return BO->getRHS();
180     } else if (const auto *InitList = dyn_cast<InitListExpr>(Parent)) {
181       // Initializer list: Each initializer clause is sequenced after the
182       // clauses that precede it.
183       for (const InitListExpr *Form : getAllInitListForms(InitList)) {
184         for (unsigned I = 1; I < Form->getNumInits(); ++I) {
185           if (Form->getInit(I - 1) == S) {
186             return Form->getInit(I);
187           }
188         }
189       }
190     } else if (const auto *ConstructExpr = dyn_cast<CXXConstructExpr>(Parent)) {
191       // Constructor arguments are sequenced if the constructor call is written
192       // as list-initialization.
193       if (ConstructExpr->isListInitialization()) {
194         for (unsigned I = 1; I < ConstructExpr->getNumArgs(); ++I) {
195           if (ConstructExpr->getArg(I - 1) == S) {
196             return ConstructExpr->getArg(I);
197           }
198         }
199       }
200     } else if (const auto *Compound = dyn_cast<CompoundStmt>(Parent)) {
201       // Compound statement: Each sub-statement is sequenced after the
202       // statements that precede it.
203       const Stmt *Previous = nullptr;
204       for (const auto *Child : Compound->body()) {
205         if (Previous == S)
206           return Child;
207         Previous = Child;
208       }
209     } else if (const auto *TheDeclStmt = dyn_cast<DeclStmt>(Parent)) {
210       // Declaration: Every initializer expression is sequenced after the
211       // initializer expressions that precede it.
212       const Expr *PreviousInit = nullptr;
213       for (const Decl *TheDecl : TheDeclStmt->decls()) {
214         if (const auto *TheVarDecl = dyn_cast<VarDecl>(TheDecl)) {
215           if (const Expr *Init = TheVarDecl->getInit()) {
216             if (PreviousInit == S)
217               return Init;
218             PreviousInit = Init;
219           }
220         }
221       }
222     } else if (const auto *ForRange = dyn_cast<CXXForRangeStmt>(Parent)) {
223       // Range-based for: Loop variable declaration is sequenced before the
224       // body. (We need this rule because these get placed in the same
225       // CFGBlock.)
226       if (S == ForRange->getLoopVarStmt())
227         return ForRange->getBody();
228     } else if (const auto *TheIfStmt = dyn_cast<IfStmt>(Parent)) {
229       // If statement:
230       // - Sequence init statement before variable declaration, if present;
231       //   before condition evaluation, otherwise.
232       // - Sequence variable declaration (along with the expression used to
233       //   initialize it) before the evaluation of the condition.
234       if (S == TheIfStmt->getInit()) {
235         if (TheIfStmt->getConditionVariableDeclStmt() != nullptr)
236           return TheIfStmt->getConditionVariableDeclStmt();
237         return TheIfStmt->getCond();
238       }
239       if (S == TheIfStmt->getConditionVariableDeclStmt())
240         return TheIfStmt->getCond();
241     } else if (const auto *TheSwitchStmt = dyn_cast<SwitchStmt>(Parent)) {
242       // Ditto for switch statements.
243       if (S == TheSwitchStmt->getInit()) {
244         if (TheSwitchStmt->getConditionVariableDeclStmt() != nullptr)
245           return TheSwitchStmt->getConditionVariableDeclStmt();
246         return TheSwitchStmt->getCond();
247       }
248       if (S == TheSwitchStmt->getConditionVariableDeclStmt())
249         return TheSwitchStmt->getCond();
250     } else if (const auto *TheWhileStmt = dyn_cast<WhileStmt>(Parent)) {
251       // While statement: Sequence variable declaration (along with the
252       // expression used to initialize it) before the evaluation of the
253       // condition.
254       if (S == TheWhileStmt->getConditionVariableDeclStmt())
255         return TheWhileStmt->getCond();
256     }
257   }
258 
259   return nullptr;
260 }
261 
resolveSyntheticStmt(const Stmt * S) const262 const Stmt *ExprSequence::resolveSyntheticStmt(const Stmt *S) const {
263   if (SyntheticStmtSourceMap.count(S))
264     return SyntheticStmtSourceMap.lookup(S);
265   return S;
266 }
267 
StmtToBlockMap(const CFG * TheCFG,ASTContext * TheContext)268 StmtToBlockMap::StmtToBlockMap(const CFG *TheCFG, ASTContext *TheContext)
269     : Context(TheContext) {
270   for (const auto *B : *TheCFG) {
271     for (const auto &Elem : *B) {
272       if (std::optional<CFGStmt> S = Elem.getAs<CFGStmt>())
273         Map[S->getStmt()] = B;
274     }
275   }
276 }
277 
blockContainingStmt(const Stmt * S) const278 const CFGBlock *StmtToBlockMap::blockContainingStmt(const Stmt *S) const {
279   while (!Map.count(S)) {
280     SmallVector<const Stmt *, 1> Parents = getParentStmts(S, Context);
281     if (Parents.empty())
282       return nullptr;
283     S = Parents[0];
284   }
285 
286   return Map.lookup(S);
287 }
288 
289 } // namespace clang::tidy::utils
290