1 //===--- BranchCloneCheck.cpp - clang-tidy --------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "BranchCloneCheck.h" 10 #include "../utils/ASTUtils.h" 11 #include "clang/AST/ASTContext.h" 12 #include "clang/AST/RecursiveASTVisitor.h" 13 #include "clang/ASTMatchers/ASTMatchFinder.h" 14 #include "clang/Analysis/CloneDetection.h" 15 #include "clang/Lex/Lexer.h" 16 #include "llvm/Support/Casting.h" 17 18 using namespace clang; 19 using namespace clang::ast_matchers; 20 21 namespace { 22 /// A branch in a switch may consist of several statements; while a branch in 23 /// an if/else if/else chain is one statement (which may be a CompoundStmt). 24 using SwitchBranch = llvm::SmallVector<const Stmt *, 2>; 25 } // anonymous namespace 26 27 /// Determines if the bodies of two branches in a switch statements are Type I 28 /// clones of each other. This function only examines the body of the branch 29 /// and ignores the `case X:` or `default:` at the start of the branch. 30 static bool areSwitchBranchesIdentical(const SwitchBranch &LHS, 31 const SwitchBranch &RHS, 32 const ASTContext &Context) { 33 if (LHS.size() != RHS.size()) 34 return false; 35 36 for (size_t I = 0, Size = LHS.size(); I < Size; I++) { 37 // NOTE: We strip goto labels and annotations in addition to stripping 38 // the `case X:` or `default:` labels, but it is very unlikely that this 39 // would cause false positives in real-world code. 40 if (!tidy::utils::areStatementsIdentical(LHS[I]->stripLabelLikeStatements(), 41 RHS[I]->stripLabelLikeStatements(), 42 Context)) { 43 return false; 44 } 45 } 46 47 return true; 48 } 49 50 static bool isFallthroughSwitchBranch(const SwitchBranch &Branch) { 51 struct SwitchCaseVisitor : RecursiveASTVisitor<SwitchCaseVisitor> { 52 using RecursiveASTVisitor<SwitchCaseVisitor>::DataRecursionQueue; 53 54 bool TraverseLambdaExpr(LambdaExpr *, DataRecursionQueue * = nullptr) { 55 return true; // Ignore lambdas 56 } 57 58 bool TraverseDecl(Decl *) { 59 return true; // No need to check declarations 60 } 61 62 bool TraverseSwitchStmt(SwitchStmt *, DataRecursionQueue * = nullptr) { 63 return true; // Ignore sub-switches 64 } 65 66 bool TraverseSwitchCase(SwitchCase *, DataRecursionQueue * = nullptr) { 67 return true; // Ignore cases 68 } 69 70 bool TraverseDefaultStmt(DefaultStmt *, DataRecursionQueue * = nullptr) { 71 return true; // Ignore defaults 72 } 73 74 bool TraverseAttributedStmt(AttributedStmt *S) { 75 if (!S) 76 return true; 77 78 for (const Attr *A : S->getAttrs()) { 79 if (isa<FallThroughAttr>(A)) 80 return false; 81 } 82 83 return true; 84 } 85 } Visitor; 86 87 for (const Stmt *Elem : Branch) { 88 if (!Visitor.TraverseStmt(const_cast<Stmt *>(Elem))) 89 return true; 90 } 91 return false; 92 } 93 94 namespace clang::tidy::bugprone { 95 96 void BranchCloneCheck::registerMatchers(MatchFinder *Finder) { 97 Finder->addMatcher( 98 ifStmt(unless(allOf(isConstexpr(), isInTemplateInstantiation())), 99 stmt().bind("if"), 100 hasParent(stmt(unless(ifStmt(hasElse(equalsBoundNode("if")))))), 101 hasElse(stmt().bind("else"))), 102 this); 103 Finder->addMatcher(switchStmt().bind("switch"), this); 104 Finder->addMatcher(conditionalOperator().bind("condOp"), this); 105 Finder->addMatcher( 106 ifStmt((hasThen(hasDescendant(ifStmt())))).bind("ifWithDescendantIf"), 107 this); 108 } 109 110 /// Determines whether two statement trees are identical regarding 111 /// operators and symbols. 112 /// 113 /// Exceptions: expressions containing macros or functions with possible side 114 /// effects are never considered identical. 115 /// Limitations: (t + u) and (u + t) are not considered identical. 116 /// t*(u + t) and t*u + t*t are not considered identical. 117 /// 118 static bool isIdenticalStmt(const ASTContext &Ctx, const Stmt *Stmt1, 119 const Stmt *Stmt2, bool IgnoreSideEffects) { 120 121 if (!Stmt1 || !Stmt2) 122 return !Stmt1 && !Stmt2; 123 124 // If Stmt1 & Stmt2 are of different class then they are not 125 // identical statements. 126 if (Stmt1->getStmtClass() != Stmt2->getStmtClass()) 127 return false; 128 129 const auto *Expr1 = dyn_cast<Expr>(Stmt1); 130 const auto *Expr2 = dyn_cast<Expr>(Stmt2); 131 132 if (Expr1 && Expr2) { 133 // If Stmt1 has side effects then don't warn even if expressions 134 // are identical. 135 if (!IgnoreSideEffects && Expr1->HasSideEffects(Ctx) && 136 Expr2->HasSideEffects(Ctx)) 137 return false; 138 // If either expression comes from a macro then don't warn even if 139 // the expressions are identical. 140 if ((Expr1->getExprLoc().isMacroID()) || (Expr2->getExprLoc().isMacroID())) 141 return false; 142 143 // If all children of two expressions are identical, return true. 144 Expr::const_child_iterator I1 = Expr1->child_begin(); 145 Expr::const_child_iterator I2 = Expr2->child_begin(); 146 while (I1 != Expr1->child_end() && I2 != Expr2->child_end()) { 147 if (!isIdenticalStmt(Ctx, *I1, *I2, IgnoreSideEffects)) 148 return false; 149 ++I1; 150 ++I2; 151 } 152 // If there are different number of children in the statements, return 153 // false. 154 if (I1 != Expr1->child_end()) 155 return false; 156 if (I2 != Expr2->child_end()) 157 return false; 158 } 159 160 switch (Stmt1->getStmtClass()) { 161 default: 162 return false; 163 case Stmt::CallExprClass: 164 case Stmt::ArraySubscriptExprClass: 165 case Stmt::ArraySectionExprClass: 166 case Stmt::OMPArrayShapingExprClass: 167 case Stmt::OMPIteratorExprClass: 168 case Stmt::ImplicitCastExprClass: 169 case Stmt::ParenExprClass: 170 case Stmt::BreakStmtClass: 171 case Stmt::ContinueStmtClass: 172 case Stmt::NullStmtClass: 173 return true; 174 case Stmt::CStyleCastExprClass: { 175 const auto *CastExpr1 = cast<CStyleCastExpr>(Stmt1); 176 const auto *CastExpr2 = cast<CStyleCastExpr>(Stmt2); 177 178 return CastExpr1->getTypeAsWritten() == CastExpr2->getTypeAsWritten(); 179 } 180 case Stmt::ReturnStmtClass: { 181 const auto *ReturnStmt1 = cast<ReturnStmt>(Stmt1); 182 const auto *ReturnStmt2 = cast<ReturnStmt>(Stmt2); 183 184 return isIdenticalStmt(Ctx, ReturnStmt1->getRetValue(), 185 ReturnStmt2->getRetValue(), IgnoreSideEffects); 186 } 187 case Stmt::ForStmtClass: { 188 const auto *ForStmt1 = cast<ForStmt>(Stmt1); 189 const auto *ForStmt2 = cast<ForStmt>(Stmt2); 190 191 if (!isIdenticalStmt(Ctx, ForStmt1->getInit(), ForStmt2->getInit(), 192 IgnoreSideEffects)) 193 return false; 194 if (!isIdenticalStmt(Ctx, ForStmt1->getCond(), ForStmt2->getCond(), 195 IgnoreSideEffects)) 196 return false; 197 if (!isIdenticalStmt(Ctx, ForStmt1->getInc(), ForStmt2->getInc(), 198 IgnoreSideEffects)) 199 return false; 200 if (!isIdenticalStmt(Ctx, ForStmt1->getBody(), ForStmt2->getBody(), 201 IgnoreSideEffects)) 202 return false; 203 return true; 204 } 205 case Stmt::DoStmtClass: { 206 const auto *DStmt1 = cast<DoStmt>(Stmt1); 207 const auto *DStmt2 = cast<DoStmt>(Stmt2); 208 209 if (!isIdenticalStmt(Ctx, DStmt1->getCond(), DStmt2->getCond(), 210 IgnoreSideEffects)) 211 return false; 212 if (!isIdenticalStmt(Ctx, DStmt1->getBody(), DStmt2->getBody(), 213 IgnoreSideEffects)) 214 return false; 215 return true; 216 } 217 case Stmt::WhileStmtClass: { 218 const auto *WStmt1 = cast<WhileStmt>(Stmt1); 219 const auto *WStmt2 = cast<WhileStmt>(Stmt2); 220 221 if (!isIdenticalStmt(Ctx, WStmt1->getCond(), WStmt2->getCond(), 222 IgnoreSideEffects)) 223 return false; 224 if (!isIdenticalStmt(Ctx, WStmt1->getBody(), WStmt2->getBody(), 225 IgnoreSideEffects)) 226 return false; 227 return true; 228 } 229 case Stmt::IfStmtClass: { 230 const auto *IStmt1 = cast<IfStmt>(Stmt1); 231 const auto *IStmt2 = cast<IfStmt>(Stmt2); 232 233 if (!isIdenticalStmt(Ctx, IStmt1->getCond(), IStmt2->getCond(), 234 IgnoreSideEffects)) 235 return false; 236 if (!isIdenticalStmt(Ctx, IStmt1->getThen(), IStmt2->getThen(), 237 IgnoreSideEffects)) 238 return false; 239 if (!isIdenticalStmt(Ctx, IStmt1->getElse(), IStmt2->getElse(), 240 IgnoreSideEffects)) 241 return false; 242 return true; 243 } 244 case Stmt::CompoundStmtClass: { 245 const auto *CompStmt1 = cast<CompoundStmt>(Stmt1); 246 const auto *CompStmt2 = cast<CompoundStmt>(Stmt2); 247 248 if (CompStmt1->size() != CompStmt2->size()) 249 return false; 250 251 if (!llvm::all_of(llvm::zip(CompStmt1->body(), CompStmt2->body()), 252 [&Ctx, IgnoreSideEffects]( 253 std::tuple<const Stmt *, const Stmt *> stmtPair) { 254 const Stmt *stmt0 = std::get<0>(stmtPair); 255 const Stmt *stmt1 = std::get<1>(stmtPair); 256 return isIdenticalStmt(Ctx, stmt0, stmt1, 257 IgnoreSideEffects); 258 })) { 259 return false; 260 } 261 262 return true; 263 } 264 case Stmt::CompoundAssignOperatorClass: 265 case Stmt::BinaryOperatorClass: { 266 const auto *BinOp1 = cast<BinaryOperator>(Stmt1); 267 const auto *BinOp2 = cast<BinaryOperator>(Stmt2); 268 return BinOp1->getOpcode() == BinOp2->getOpcode(); 269 } 270 case Stmt::CharacterLiteralClass: { 271 const auto *CharLit1 = cast<CharacterLiteral>(Stmt1); 272 const auto *CharLit2 = cast<CharacterLiteral>(Stmt2); 273 return CharLit1->getValue() == CharLit2->getValue(); 274 } 275 case Stmt::DeclRefExprClass: { 276 const auto *DeclRef1 = cast<DeclRefExpr>(Stmt1); 277 const auto *DeclRef2 = cast<DeclRefExpr>(Stmt2); 278 return DeclRef1->getDecl() == DeclRef2->getDecl(); 279 } 280 case Stmt::IntegerLiteralClass: { 281 const auto *IntLit1 = cast<IntegerLiteral>(Stmt1); 282 const auto *IntLit2 = cast<IntegerLiteral>(Stmt2); 283 284 llvm::APInt I1 = IntLit1->getValue(); 285 llvm::APInt I2 = IntLit2->getValue(); 286 if (I1.getBitWidth() != I2.getBitWidth()) 287 return false; 288 return I1 == I2; 289 } 290 case Stmt::FloatingLiteralClass: { 291 const auto *FloatLit1 = cast<FloatingLiteral>(Stmt1); 292 const auto *FloatLit2 = cast<FloatingLiteral>(Stmt2); 293 return FloatLit1->getValue().bitwiseIsEqual(FloatLit2->getValue()); 294 } 295 case Stmt::StringLiteralClass: { 296 const auto *StringLit1 = cast<StringLiteral>(Stmt1); 297 const auto *StringLit2 = cast<StringLiteral>(Stmt2); 298 return StringLit1->getBytes() == StringLit2->getBytes(); 299 } 300 case Stmt::MemberExprClass: { 301 const auto *MemberStmt1 = cast<MemberExpr>(Stmt1); 302 const auto *MemberStmt2 = cast<MemberExpr>(Stmt2); 303 return MemberStmt1->getMemberDecl() == MemberStmt2->getMemberDecl(); 304 } 305 case Stmt::UnaryOperatorClass: { 306 const auto *UnaryOp1 = cast<UnaryOperator>(Stmt1); 307 const auto *UnaryOp2 = cast<UnaryOperator>(Stmt2); 308 return UnaryOp1->getOpcode() == UnaryOp2->getOpcode(); 309 } 310 } 311 } 312 313 void BranchCloneCheck::check(const MatchFinder::MatchResult &Result) { 314 const ASTContext &Context = *Result.Context; 315 316 if (const auto *IS = Result.Nodes.getNodeAs<IfStmt>("if")) { 317 const Stmt *Then = IS->getThen(); 318 assert(Then && "An IfStmt must have a `then` branch!"); 319 320 const Stmt *Else = Result.Nodes.getNodeAs<Stmt>("else"); 321 assert(Else && "We only look for `if` statements with an `else` branch!"); 322 323 if (!isa<IfStmt>(Else)) { 324 // Just a simple if with no `else if` branch. 325 if (utils::areStatementsIdentical(Then->IgnoreContainers(), 326 Else->IgnoreContainers(), Context)) { 327 diag(IS->getBeginLoc(), "if with identical then and else branches"); 328 diag(IS->getElseLoc(), "else branch starts here", DiagnosticIDs::Note); 329 } 330 return; 331 } 332 333 // This is the complicated case when we start an if/else if/else chain. 334 // To find all the duplicates, we collect all the branches into a vector. 335 llvm::SmallVector<const Stmt *, 4> Branches; 336 const IfStmt *Cur = IS; 337 while (true) { 338 // Store the `then` branch. 339 Branches.push_back(Cur->getThen()); 340 341 Else = Cur->getElse(); 342 // The chain ends if there is no `else` branch. 343 if (!Else) 344 break; 345 346 // Check if there is another `else if`... 347 Cur = dyn_cast<IfStmt>(Else); 348 if (!Cur) { 349 // ...this is just a plain `else` branch at the end of the chain. 350 Branches.push_back(Else); 351 break; 352 } 353 } 354 355 size_t N = Branches.size(); 356 llvm::BitVector KnownAsClone(N); 357 358 for (size_t I = 0; I + 1 < N; I++) { 359 // We have already seen Branches[i] as a clone of an earlier branch. 360 if (KnownAsClone[I]) 361 continue; 362 363 int NumCopies = 1; 364 365 for (size_t J = I + 1; J < N; J++) { 366 if (KnownAsClone[J] || !utils::areStatementsIdentical( 367 Branches[I]->IgnoreContainers(), 368 Branches[J]->IgnoreContainers(), Context)) 369 continue; 370 371 NumCopies++; 372 KnownAsClone[J] = true; 373 374 if (NumCopies == 2) { 375 // We report the first occurrence only when we find the second one. 376 diag(Branches[I]->getBeginLoc(), 377 "repeated branch body in conditional chain"); 378 SourceLocation End = 379 Lexer::getLocForEndOfToken(Branches[I]->getEndLoc(), 0, 380 *Result.SourceManager, getLangOpts()); 381 if (End.isValid()) { 382 diag(End, "end of the original", DiagnosticIDs::Note); 383 } 384 } 385 386 diag(Branches[J]->getBeginLoc(), "clone %0 starts here", 387 DiagnosticIDs::Note) 388 << (NumCopies - 1); 389 } 390 } 391 return; 392 } 393 394 if (const auto *CO = Result.Nodes.getNodeAs<ConditionalOperator>("condOp")) { 395 // We do not try to detect chains of ?: operators. 396 if (utils::areStatementsIdentical(CO->getTrueExpr(), CO->getFalseExpr(), 397 Context)) 398 diag(CO->getQuestionLoc(), 399 "conditional operator with identical true and false expressions"); 400 401 return; 402 } 403 404 if (const auto *SS = Result.Nodes.getNodeAs<SwitchStmt>("switch")) { 405 const auto *Body = dyn_cast_or_null<CompoundStmt>(SS->getBody()); 406 407 // Code like 408 // switch (x) case 0: case 1: foobar(); 409 // is legal and calls foobar() if and only if x is either 0 or 1; 410 // but we do not try to distinguish branches in such code. 411 if (!Body) 412 return; 413 414 // We will first collect the branches of the switch statements. For the 415 // sake of simplicity we say that branches are delimited by the SwitchCase 416 // (`case:` or `default:`) children of Body; that is, we ignore `case:` or 417 // `default:` labels embedded inside other statements and we do not follow 418 // the effects of `break` and other manipulation of the control-flow. 419 llvm::SmallVector<SwitchBranch, 4> Branches; 420 for (const Stmt *S : Body->body()) { 421 // If this is a `case` or `default`, we start a new, empty branch. 422 if (isa<SwitchCase>(S)) 423 Branches.emplace_back(); 424 425 // There may be code before the first branch (which can be dead code 426 // and can be code reached either through goto or through case labels 427 // that are embedded inside e.g. inner compound statements); we do not 428 // store those statements in branches. 429 if (!Branches.empty()) 430 Branches.back().push_back(S); 431 } 432 433 auto *End = Branches.end(); 434 auto *BeginCurrent = Branches.begin(); 435 while (BeginCurrent < End) { 436 if (isFallthroughSwitchBranch(*BeginCurrent)) { 437 ++BeginCurrent; 438 continue; 439 } 440 441 auto *EndCurrent = BeginCurrent + 1; 442 while (EndCurrent < End && 443 areSwitchBranchesIdentical(*BeginCurrent, *EndCurrent, Context)) { 444 ++EndCurrent; 445 } 446 // At this point the iterator range {BeginCurrent, EndCurrent} contains a 447 // complete family of consecutive identical branches. 448 449 if (EndCurrent == (BeginCurrent + 1)) { 450 // No consecutive identical branches that start on BeginCurrent 451 BeginCurrent = EndCurrent; 452 continue; 453 } 454 455 diag(BeginCurrent->front()->getBeginLoc(), 456 "switch has %0 consecutive identical branches") 457 << static_cast<int>(std::distance(BeginCurrent, EndCurrent)); 458 459 SourceLocation EndLoc = (EndCurrent - 1)->back()->getEndLoc(); 460 // If the case statement is generated from a macro, it's SourceLocation 461 // may be invalid, resulting in an assertion failure down the line. 462 // While not optimal, try the begin location in this case, it's still 463 // better then nothing. 464 if (EndLoc.isInvalid()) 465 EndLoc = (EndCurrent - 1)->back()->getBeginLoc(); 466 if (EndLoc.isMacroID()) 467 EndLoc = Context.getSourceManager().getExpansionLoc(EndLoc); 468 EndLoc = Lexer::getLocForEndOfToken(EndLoc, 0, *Result.SourceManager, 469 getLangOpts()); 470 if (EndLoc.isValid()) { 471 diag(EndLoc, "last of these clones ends here", DiagnosticIDs::Note); 472 } 473 BeginCurrent = EndCurrent; 474 } 475 return; 476 } 477 478 if (const auto *IS = Result.Nodes.getNodeAs<IfStmt>("ifWithDescendantIf")) { 479 const Stmt *Then = IS->getThen(); 480 auto CS = dyn_cast<CompoundStmt>(Then); 481 if (CS && (!CS->body_empty())) { 482 const auto *InnerIf = dyn_cast<IfStmt>(*CS->body_begin()); 483 if (InnerIf && isIdenticalStmt(Context, IS->getCond(), InnerIf->getCond(), 484 /*IgnoreSideEffects=*/false)) { 485 diag(IS->getBeginLoc(), "if with identical inner if statement"); 486 diag(InnerIf->getBeginLoc(), "inner if starts here", 487 DiagnosticIDs::Note); 488 } 489 } 490 return; 491 } 492 493 llvm_unreachable("No if statement and no switch statement."); 494 } 495 496 } // namespace clang::tidy::bugprone 497