xref: /llvm-project/clang-tools-extra/clang-tidy/bugprone/BranchCloneCheck.cpp (revision 8ac2b77a11c9db9879557ce1c26e38628e1ef45f)
1 //===--- BranchCloneCheck.cpp - clang-tidy --------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "BranchCloneCheck.h"
10 #include "../utils/ASTUtils.h"
11 #include "clang/AST/ASTContext.h"
12 #include "clang/AST/RecursiveASTVisitor.h"
13 #include "clang/ASTMatchers/ASTMatchFinder.h"
14 #include "clang/Analysis/CloneDetection.h"
15 #include "clang/Lex/Lexer.h"
16 #include "llvm/Support/Casting.h"
17 
18 using namespace clang;
19 using namespace clang::ast_matchers;
20 
21 namespace {
22 /// A branch in a switch may consist of several statements; while a branch in
23 /// an if/else if/else chain is one statement (which may be a CompoundStmt).
24 using SwitchBranch = llvm::SmallVector<const Stmt *, 2>;
25 } // anonymous namespace
26 
27 /// Determines if the bodies of two branches in a switch statements are Type I
28 /// clones of each other. This function only examines the body of the branch
29 /// and ignores the `case X:` or `default:` at the start of the branch.
30 static bool areSwitchBranchesIdentical(const SwitchBranch &LHS,
31                                        const SwitchBranch &RHS,
32                                        const ASTContext &Context) {
33   if (LHS.size() != RHS.size())
34     return false;
35 
36   for (size_t I = 0, Size = LHS.size(); I < Size; I++) {
37     // NOTE: We strip goto labels and annotations in addition to stripping
38     // the `case X:` or `default:` labels, but it is very unlikely that this
39     // would cause false positives in real-world code.
40     if (!tidy::utils::areStatementsIdentical(LHS[I]->stripLabelLikeStatements(),
41                                              RHS[I]->stripLabelLikeStatements(),
42                                              Context)) {
43       return false;
44     }
45   }
46 
47   return true;
48 }
49 
50 static bool isFallthroughSwitchBranch(const SwitchBranch &Branch) {
51   struct SwitchCaseVisitor : RecursiveASTVisitor<SwitchCaseVisitor> {
52     using RecursiveASTVisitor<SwitchCaseVisitor>::DataRecursionQueue;
53 
54     bool TraverseLambdaExpr(LambdaExpr *, DataRecursionQueue * = nullptr) {
55       return true; // Ignore lambdas
56     }
57 
58     bool TraverseDecl(Decl *) {
59       return true; // No need to check declarations
60     }
61 
62     bool TraverseSwitchStmt(SwitchStmt *, DataRecursionQueue * = nullptr) {
63       return true; // Ignore sub-switches
64     }
65 
66     bool TraverseSwitchCase(SwitchCase *, DataRecursionQueue * = nullptr) {
67       return true; // Ignore cases
68     }
69 
70     bool TraverseDefaultStmt(DefaultStmt *, DataRecursionQueue * = nullptr) {
71       return true; // Ignore defaults
72     }
73 
74     bool TraverseAttributedStmt(AttributedStmt *S) {
75       if (!S)
76         return true;
77 
78       for (const Attr *A : S->getAttrs()) {
79         if (isa<FallThroughAttr>(A))
80           return false;
81       }
82 
83       return true;
84     }
85   } Visitor;
86 
87   for (const Stmt *Elem : Branch) {
88     if (!Visitor.TraverseStmt(const_cast<Stmt *>(Elem)))
89       return true;
90   }
91   return false;
92 }
93 
94 namespace clang::tidy::bugprone {
95 
96 void BranchCloneCheck::registerMatchers(MatchFinder *Finder) {
97   Finder->addMatcher(
98       ifStmt(unless(allOf(isConstexpr(), isInTemplateInstantiation())),
99              stmt().bind("if"),
100              hasParent(stmt(unless(ifStmt(hasElse(equalsBoundNode("if")))))),
101              hasElse(stmt().bind("else"))),
102       this);
103   Finder->addMatcher(switchStmt().bind("switch"), this);
104   Finder->addMatcher(conditionalOperator().bind("condOp"), this);
105   Finder->addMatcher(
106       ifStmt((hasThen(hasDescendant(ifStmt())))).bind("ifWithDescendantIf"),
107       this);
108 }
109 
110 /// Determines whether two statement trees are identical regarding
111 /// operators and symbols.
112 ///
113 /// Exceptions: expressions containing macros or functions with possible side
114 /// effects are never considered identical.
115 /// Limitations: (t + u) and (u + t) are not considered identical.
116 /// t*(u + t) and t*u + t*t are not considered identical.
117 ///
118 static bool isIdenticalStmt(const ASTContext &Ctx, const Stmt *Stmt1,
119                             const Stmt *Stmt2, bool IgnoreSideEffects) {
120 
121   if (!Stmt1 || !Stmt2)
122     return !Stmt1 && !Stmt2;
123 
124   // If Stmt1 & Stmt2 are of different class then they are not
125   // identical statements.
126   if (Stmt1->getStmtClass() != Stmt2->getStmtClass())
127     return false;
128 
129   const auto *Expr1 = dyn_cast<Expr>(Stmt1);
130   const auto *Expr2 = dyn_cast<Expr>(Stmt2);
131 
132   if (Expr1 && Expr2) {
133     // If Stmt1 has side effects then don't warn even if expressions
134     // are identical.
135     if (!IgnoreSideEffects && Expr1->HasSideEffects(Ctx) &&
136         Expr2->HasSideEffects(Ctx))
137       return false;
138     // If either expression comes from a macro then don't warn even if
139     // the expressions are identical.
140     if ((Expr1->getExprLoc().isMacroID()) || (Expr2->getExprLoc().isMacroID()))
141       return false;
142 
143     // If all children of two expressions are identical, return true.
144     Expr::const_child_iterator I1 = Expr1->child_begin();
145     Expr::const_child_iterator I2 = Expr2->child_begin();
146     while (I1 != Expr1->child_end() && I2 != Expr2->child_end()) {
147       if (!isIdenticalStmt(Ctx, *I1, *I2, IgnoreSideEffects))
148         return false;
149       ++I1;
150       ++I2;
151     }
152     // If there are different number of children in the statements, return
153     // false.
154     if (I1 != Expr1->child_end())
155       return false;
156     if (I2 != Expr2->child_end())
157       return false;
158   }
159 
160   switch (Stmt1->getStmtClass()) {
161   default:
162     return false;
163   case Stmt::CallExprClass:
164   case Stmt::ArraySubscriptExprClass:
165   case Stmt::ArraySectionExprClass:
166   case Stmt::OMPArrayShapingExprClass:
167   case Stmt::OMPIteratorExprClass:
168   case Stmt::ImplicitCastExprClass:
169   case Stmt::ParenExprClass:
170   case Stmt::BreakStmtClass:
171   case Stmt::ContinueStmtClass:
172   case Stmt::NullStmtClass:
173     return true;
174   case Stmt::CStyleCastExprClass: {
175     const auto *CastExpr1 = cast<CStyleCastExpr>(Stmt1);
176     const auto *CastExpr2 = cast<CStyleCastExpr>(Stmt2);
177 
178     return CastExpr1->getTypeAsWritten() == CastExpr2->getTypeAsWritten();
179   }
180   case Stmt::ReturnStmtClass: {
181     const auto *ReturnStmt1 = cast<ReturnStmt>(Stmt1);
182     const auto *ReturnStmt2 = cast<ReturnStmt>(Stmt2);
183 
184     return isIdenticalStmt(Ctx, ReturnStmt1->getRetValue(),
185                            ReturnStmt2->getRetValue(), IgnoreSideEffects);
186   }
187   case Stmt::ForStmtClass: {
188     const auto *ForStmt1 = cast<ForStmt>(Stmt1);
189     const auto *ForStmt2 = cast<ForStmt>(Stmt2);
190 
191     if (!isIdenticalStmt(Ctx, ForStmt1->getInit(), ForStmt2->getInit(),
192                          IgnoreSideEffects))
193       return false;
194     if (!isIdenticalStmt(Ctx, ForStmt1->getCond(), ForStmt2->getCond(),
195                          IgnoreSideEffects))
196       return false;
197     if (!isIdenticalStmt(Ctx, ForStmt1->getInc(), ForStmt2->getInc(),
198                          IgnoreSideEffects))
199       return false;
200     if (!isIdenticalStmt(Ctx, ForStmt1->getBody(), ForStmt2->getBody(),
201                          IgnoreSideEffects))
202       return false;
203     return true;
204   }
205   case Stmt::DoStmtClass: {
206     const auto *DStmt1 = cast<DoStmt>(Stmt1);
207     const auto *DStmt2 = cast<DoStmt>(Stmt2);
208 
209     if (!isIdenticalStmt(Ctx, DStmt1->getCond(), DStmt2->getCond(),
210                          IgnoreSideEffects))
211       return false;
212     if (!isIdenticalStmt(Ctx, DStmt1->getBody(), DStmt2->getBody(),
213                          IgnoreSideEffects))
214       return false;
215     return true;
216   }
217   case Stmt::WhileStmtClass: {
218     const auto *WStmt1 = cast<WhileStmt>(Stmt1);
219     const auto *WStmt2 = cast<WhileStmt>(Stmt2);
220 
221     if (!isIdenticalStmt(Ctx, WStmt1->getCond(), WStmt2->getCond(),
222                          IgnoreSideEffects))
223       return false;
224     if (!isIdenticalStmt(Ctx, WStmt1->getBody(), WStmt2->getBody(),
225                          IgnoreSideEffects))
226       return false;
227     return true;
228   }
229   case Stmt::IfStmtClass: {
230     const auto *IStmt1 = cast<IfStmt>(Stmt1);
231     const auto *IStmt2 = cast<IfStmt>(Stmt2);
232 
233     if (!isIdenticalStmt(Ctx, IStmt1->getCond(), IStmt2->getCond(),
234                          IgnoreSideEffects))
235       return false;
236     if (!isIdenticalStmt(Ctx, IStmt1->getThen(), IStmt2->getThen(),
237                          IgnoreSideEffects))
238       return false;
239     if (!isIdenticalStmt(Ctx, IStmt1->getElse(), IStmt2->getElse(),
240                          IgnoreSideEffects))
241       return false;
242     return true;
243   }
244   case Stmt::CompoundStmtClass: {
245     const auto *CompStmt1 = cast<CompoundStmt>(Stmt1);
246     const auto *CompStmt2 = cast<CompoundStmt>(Stmt2);
247 
248     if (CompStmt1->size() != CompStmt2->size())
249       return false;
250 
251     if (!llvm::all_of(llvm::zip(CompStmt1->body(), CompStmt2->body()),
252                       [&Ctx, IgnoreSideEffects](
253                           std::tuple<const Stmt *, const Stmt *> stmtPair) {
254                         const Stmt *stmt0 = std::get<0>(stmtPair);
255                         const Stmt *stmt1 = std::get<1>(stmtPair);
256                         return isIdenticalStmt(Ctx, stmt0, stmt1,
257                                                IgnoreSideEffects);
258                       })) {
259       return false;
260     }
261 
262     return true;
263   }
264   case Stmt::CompoundAssignOperatorClass:
265   case Stmt::BinaryOperatorClass: {
266     const auto *BinOp1 = cast<BinaryOperator>(Stmt1);
267     const auto *BinOp2 = cast<BinaryOperator>(Stmt2);
268     return BinOp1->getOpcode() == BinOp2->getOpcode();
269   }
270   case Stmt::CharacterLiteralClass: {
271     const auto *CharLit1 = cast<CharacterLiteral>(Stmt1);
272     const auto *CharLit2 = cast<CharacterLiteral>(Stmt2);
273     return CharLit1->getValue() == CharLit2->getValue();
274   }
275   case Stmt::DeclRefExprClass: {
276     const auto *DeclRef1 = cast<DeclRefExpr>(Stmt1);
277     const auto *DeclRef2 = cast<DeclRefExpr>(Stmt2);
278     return DeclRef1->getDecl() == DeclRef2->getDecl();
279   }
280   case Stmt::IntegerLiteralClass: {
281     const auto *IntLit1 = cast<IntegerLiteral>(Stmt1);
282     const auto *IntLit2 = cast<IntegerLiteral>(Stmt2);
283 
284     llvm::APInt I1 = IntLit1->getValue();
285     llvm::APInt I2 = IntLit2->getValue();
286     if (I1.getBitWidth() != I2.getBitWidth())
287       return false;
288     return I1 == I2;
289   }
290   case Stmt::FloatingLiteralClass: {
291     const auto *FloatLit1 = cast<FloatingLiteral>(Stmt1);
292     const auto *FloatLit2 = cast<FloatingLiteral>(Stmt2);
293     return FloatLit1->getValue().bitwiseIsEqual(FloatLit2->getValue());
294   }
295   case Stmt::StringLiteralClass: {
296     const auto *StringLit1 = cast<StringLiteral>(Stmt1);
297     const auto *StringLit2 = cast<StringLiteral>(Stmt2);
298     return StringLit1->getBytes() == StringLit2->getBytes();
299   }
300   case Stmt::MemberExprClass: {
301     const auto *MemberStmt1 = cast<MemberExpr>(Stmt1);
302     const auto *MemberStmt2 = cast<MemberExpr>(Stmt2);
303     return MemberStmt1->getMemberDecl() == MemberStmt2->getMemberDecl();
304   }
305   case Stmt::UnaryOperatorClass: {
306     const auto *UnaryOp1 = cast<UnaryOperator>(Stmt1);
307     const auto *UnaryOp2 = cast<UnaryOperator>(Stmt2);
308     return UnaryOp1->getOpcode() == UnaryOp2->getOpcode();
309   }
310   }
311 }
312 
313 void BranchCloneCheck::check(const MatchFinder::MatchResult &Result) {
314   const ASTContext &Context = *Result.Context;
315 
316   if (const auto *IS = Result.Nodes.getNodeAs<IfStmt>("if")) {
317     const Stmt *Then = IS->getThen();
318     assert(Then && "An IfStmt must have a `then` branch!");
319 
320     const Stmt *Else = Result.Nodes.getNodeAs<Stmt>("else");
321     assert(Else && "We only look for `if` statements with an `else` branch!");
322 
323     if (!isa<IfStmt>(Else)) {
324       // Just a simple if with no `else if` branch.
325       if (utils::areStatementsIdentical(Then->IgnoreContainers(),
326                                         Else->IgnoreContainers(), Context)) {
327         diag(IS->getBeginLoc(), "if with identical then and else branches");
328         diag(IS->getElseLoc(), "else branch starts here", DiagnosticIDs::Note);
329       }
330       return;
331     }
332 
333     // This is the complicated case when we start an if/else if/else chain.
334     // To find all the duplicates, we collect all the branches into a vector.
335     llvm::SmallVector<const Stmt *, 4> Branches;
336     const IfStmt *Cur = IS;
337     while (true) {
338       // Store the `then` branch.
339       Branches.push_back(Cur->getThen());
340 
341       Else = Cur->getElse();
342       // The chain ends if there is no `else` branch.
343       if (!Else)
344         break;
345 
346       // Check if there is another `else if`...
347       Cur = dyn_cast<IfStmt>(Else);
348       if (!Cur) {
349         // ...this is just a plain `else` branch at the end of the chain.
350         Branches.push_back(Else);
351         break;
352       }
353     }
354 
355     size_t N = Branches.size();
356     llvm::BitVector KnownAsClone(N);
357 
358     for (size_t I = 0; I + 1 < N; I++) {
359       // We have already seen Branches[i] as a clone of an earlier branch.
360       if (KnownAsClone[I])
361         continue;
362 
363       int NumCopies = 1;
364 
365       for (size_t J = I + 1; J < N; J++) {
366         if (KnownAsClone[J] || !utils::areStatementsIdentical(
367                                    Branches[I]->IgnoreContainers(),
368                                    Branches[J]->IgnoreContainers(), Context))
369           continue;
370 
371         NumCopies++;
372         KnownAsClone[J] = true;
373 
374         if (NumCopies == 2) {
375           // We report the first occurrence only when we find the second one.
376           diag(Branches[I]->getBeginLoc(),
377                "repeated branch body in conditional chain");
378           SourceLocation End =
379               Lexer::getLocForEndOfToken(Branches[I]->getEndLoc(), 0,
380                                          *Result.SourceManager, getLangOpts());
381           if (End.isValid()) {
382             diag(End, "end of the original", DiagnosticIDs::Note);
383           }
384         }
385 
386         diag(Branches[J]->getBeginLoc(), "clone %0 starts here",
387              DiagnosticIDs::Note)
388             << (NumCopies - 1);
389       }
390     }
391     return;
392   }
393 
394   if (const auto *CO = Result.Nodes.getNodeAs<ConditionalOperator>("condOp")) {
395     // We do not try to detect chains of ?: operators.
396     if (utils::areStatementsIdentical(CO->getTrueExpr(), CO->getFalseExpr(),
397                                       Context))
398       diag(CO->getQuestionLoc(),
399            "conditional operator with identical true and false expressions");
400 
401     return;
402   }
403 
404   if (const auto *SS = Result.Nodes.getNodeAs<SwitchStmt>("switch")) {
405     const auto *Body = dyn_cast_or_null<CompoundStmt>(SS->getBody());
406 
407     // Code like
408     //   switch (x) case 0: case 1: foobar();
409     // is legal and calls foobar() if and only if x is either 0 or 1;
410     // but we do not try to distinguish branches in such code.
411     if (!Body)
412       return;
413 
414     // We will first collect the branches of the switch statements. For the
415     // sake of simplicity we say that branches are delimited by the SwitchCase
416     // (`case:` or `default:`) children of Body; that is, we ignore `case:` or
417     // `default:` labels embedded inside other statements and we do not follow
418     // the effects of `break` and other manipulation of the control-flow.
419     llvm::SmallVector<SwitchBranch, 4> Branches;
420     for (const Stmt *S : Body->body()) {
421       // If this is a `case` or `default`, we start a new, empty branch.
422       if (isa<SwitchCase>(S))
423         Branches.emplace_back();
424 
425       // There may be code before the first branch (which can be dead code
426       // and can be code reached either through goto or through case labels
427       // that are embedded inside e.g. inner compound statements); we do not
428       // store those statements in branches.
429       if (!Branches.empty())
430         Branches.back().push_back(S);
431     }
432 
433     auto *End = Branches.end();
434     auto *BeginCurrent = Branches.begin();
435     while (BeginCurrent < End) {
436       if (isFallthroughSwitchBranch(*BeginCurrent)) {
437         ++BeginCurrent;
438         continue;
439       }
440 
441       auto *EndCurrent = BeginCurrent + 1;
442       while (EndCurrent < End &&
443              areSwitchBranchesIdentical(*BeginCurrent, *EndCurrent, Context)) {
444         ++EndCurrent;
445       }
446       // At this point the iterator range {BeginCurrent, EndCurrent} contains a
447       // complete family of consecutive identical branches.
448 
449       if (EndCurrent == (BeginCurrent + 1)) {
450         // No consecutive identical branches that start on BeginCurrent
451         BeginCurrent = EndCurrent;
452         continue;
453       }
454 
455       diag(BeginCurrent->front()->getBeginLoc(),
456            "switch has %0 consecutive identical branches")
457           << static_cast<int>(std::distance(BeginCurrent, EndCurrent));
458 
459       SourceLocation EndLoc = (EndCurrent - 1)->back()->getEndLoc();
460       // If the case statement is generated from a macro, it's SourceLocation
461       // may be invalid, resulting in an assertion failure down the line.
462       // While not optimal, try the begin location in this case, it's still
463       // better then nothing.
464       if (EndLoc.isInvalid())
465         EndLoc = (EndCurrent - 1)->back()->getBeginLoc();
466       if (EndLoc.isMacroID())
467         EndLoc = Context.getSourceManager().getExpansionLoc(EndLoc);
468       EndLoc = Lexer::getLocForEndOfToken(EndLoc, 0, *Result.SourceManager,
469                                           getLangOpts());
470       if (EndLoc.isValid()) {
471         diag(EndLoc, "last of these clones ends here", DiagnosticIDs::Note);
472       }
473       BeginCurrent = EndCurrent;
474     }
475     return;
476   }
477 
478   if (const auto *IS = Result.Nodes.getNodeAs<IfStmt>("ifWithDescendantIf")) {
479     const Stmt *Then = IS->getThen();
480     auto CS = dyn_cast<CompoundStmt>(Then);
481     if (CS && (!CS->body_empty())) {
482       const auto *InnerIf = dyn_cast<IfStmt>(*CS->body_begin());
483       if (InnerIf && isIdenticalStmt(Context, IS->getCond(), InnerIf->getCond(),
484                                      /*IgnoreSideEffects=*/false)) {
485         diag(IS->getBeginLoc(), "if with identical inner if statement");
486         diag(InnerIf->getBeginLoc(), "inner if starts here",
487              DiagnosticIDs::Note);
488       }
489     }
490     return;
491   }
492 
493   llvm_unreachable("No if statement and no switch statement.");
494 }
495 
496 } // namespace clang::tidy::bugprone
497