1 //===--- InefficientVectorOperationCheck.cpp - clang-tidy------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "InefficientVectorOperationCheck.h" 10 #include "../utils/DeclRefExprUtils.h" 11 #include "../utils/OptionsUtils.h" 12 #include "clang/AST/ASTContext.h" 13 #include "clang/ASTMatchers/ASTMatchFinder.h" 14 #include "clang/Lex/Lexer.h" 15 16 using namespace clang::ast_matchers; 17 18 namespace clang::tidy::performance { 19 20 namespace { 21 22 // Matcher names. Given the code: 23 // 24 // \code 25 // void f() { 26 // vector<T> v; 27 // for (int i = 0; i < 10 + 1; ++i) { 28 // v.push_back(i); 29 // } 30 // 31 // SomeProto p; 32 // for (int i = 0; i < 10 + 1; ++i) { 33 // p.add_xxx(i); 34 // } 35 // } 36 // \endcode 37 // 38 // The matcher names are bound to following parts of the AST: 39 // - LoopCounterName: The entire for loop (as ForStmt). 40 // - LoopParentName: The body of function f (as CompoundStmt). 41 // - VectorVarDeclName: 'v' (as VarDecl). 42 // - VectorVarDeclStmatName: The entire 'std::vector<T> v;' statement (as 43 // DeclStmt). 44 // - PushBackOrEmplaceBackCallName: 'v.push_back(i)' (as cxxMemberCallExpr). 45 // - LoopInitVarName: 'i' (as VarDecl). 46 // - LoopEndExpr: '10+1' (as Expr). 47 // If EnableProto, the proto related names are bound to the following parts: 48 // - ProtoVarDeclName: 'p' (as VarDecl). 49 // - ProtoVarDeclStmtName: The entire 'SomeProto p;' statement (as DeclStmt). 50 // - ProtoAddFieldCallName: 'p.add_xxx(i)' (as cxxMemberCallExpr). 51 static const char LoopCounterName[] = "for_loop_counter"; 52 static const char LoopParentName[] = "loop_parent"; 53 static const char VectorVarDeclName[] = "vector_var_decl"; 54 static const char VectorVarDeclStmtName[] = "vector_var_decl_stmt"; 55 static const char PushBackOrEmplaceBackCallName[] = "append_call"; 56 static const char ProtoVarDeclName[] = "proto_var_decl"; 57 static const char ProtoVarDeclStmtName[] = "proto_var_decl_stmt"; 58 static const char ProtoAddFieldCallName[] = "proto_add_field"; 59 static const char LoopInitVarName[] = "loop_init_var"; 60 static const char LoopEndExprName[] = "loop_end_expr"; 61 static const char RangeLoopName[] = "for_range_loop"; 62 63 ast_matchers::internal::Matcher<Expr> supportedContainerTypesMatcher() { 64 return hasType(cxxRecordDecl(hasAnyName( 65 "::std::vector", "::std::set", "::std::unordered_set", "::std::map", 66 "::std::unordered_map", "::std::array", "::std::deque"))); 67 } 68 69 AST_MATCHER(Expr, hasSideEffects) { 70 return Node.HasSideEffects(Finder->getASTContext()); 71 } 72 73 } // namespace 74 75 InefficientVectorOperationCheck::InefficientVectorOperationCheck( 76 StringRef Name, ClangTidyContext *Context) 77 : ClangTidyCheck(Name, Context), 78 VectorLikeClasses(utils::options::parseStringList( 79 Options.get("VectorLikeClasses", "::std::vector"))), 80 EnableProto(Options.get("EnableProto", false)) {} 81 82 void InefficientVectorOperationCheck::storeOptions( 83 ClangTidyOptions::OptionMap &Opts) { 84 Options.store(Opts, "VectorLikeClasses", 85 utils::options::serializeStringList(VectorLikeClasses)); 86 Options.store(Opts, "EnableProto", EnableProto); 87 } 88 89 void InefficientVectorOperationCheck::addMatcher( 90 const DeclarationMatcher &TargetRecordDecl, StringRef VarDeclName, 91 StringRef VarDeclStmtName, const DeclarationMatcher &AppendMethodDecl, 92 StringRef AppendCallName, MatchFinder *Finder) { 93 const auto DefaultConstructorCall = cxxConstructExpr( 94 hasType(TargetRecordDecl), 95 hasDeclaration(cxxConstructorDecl(isDefaultConstructor()))); 96 const auto TargetVarDecl = 97 varDecl(hasInitializer(DefaultConstructorCall)).bind(VarDeclName); 98 const auto TargetVarDefStmt = 99 declStmt(hasSingleDecl(equalsBoundNode(std::string(VarDeclName)))) 100 .bind(VarDeclStmtName); 101 102 const auto AppendCallExpr = 103 cxxMemberCallExpr( 104 callee(AppendMethodDecl), on(hasType(TargetRecordDecl)), 105 onImplicitObjectArgument(declRefExpr(to(TargetVarDecl)))) 106 .bind(AppendCallName); 107 const auto AppendCall = expr(ignoringImplicit(AppendCallExpr)); 108 const auto LoopVarInit = declStmt(hasSingleDecl( 109 varDecl(hasInitializer(ignoringParenImpCasts(integerLiteral(equals(0))))) 110 .bind(LoopInitVarName))); 111 const auto RefersToLoopVar = ignoringParenImpCasts( 112 declRefExpr(to(varDecl(equalsBoundNode(LoopInitVarName))))); 113 114 // Matchers for the loop whose body has only 1 push_back/emplace_back calling 115 // statement. 116 const auto HasInterestingLoopBody = hasBody( 117 anyOf(compoundStmt(statementCountIs(1), has(AppendCall)), AppendCall)); 118 const auto InInterestingCompoundStmt = 119 hasParent(compoundStmt(has(TargetVarDefStmt)).bind(LoopParentName)); 120 121 // Match counter-based for loops: 122 // for (int i = 0; i < n; ++i) { 123 // v.push_back(...); 124 // // Or: proto.add_xxx(...); 125 // } 126 // 127 // FIXME: Support more types of counter-based loops like decrement loops. 128 Finder->addMatcher( 129 forStmt( 130 hasLoopInit(LoopVarInit), 131 hasCondition(binaryOperator( 132 hasOperatorName("<"), hasLHS(RefersToLoopVar), 133 hasRHS(expr(unless(hasDescendant(expr(RefersToLoopVar)))) 134 .bind(LoopEndExprName)))), 135 hasIncrement(unaryOperator(hasOperatorName("++"), 136 hasUnaryOperand(RefersToLoopVar))), 137 HasInterestingLoopBody, InInterestingCompoundStmt) 138 .bind(LoopCounterName), 139 this); 140 141 // Match for-range loops: 142 // for (const auto& E : data) { 143 // v.push_back(...); 144 // // Or: proto.add_xxx(...); 145 // } 146 // 147 // FIXME: Support more complex range-expressions. 148 Finder->addMatcher( 149 cxxForRangeStmt( 150 hasRangeInit( 151 anyOf(declRefExpr(supportedContainerTypesMatcher()), 152 memberExpr(hasObjectExpression(unless(hasSideEffects())), 153 supportedContainerTypesMatcher()))), 154 HasInterestingLoopBody, InInterestingCompoundStmt) 155 .bind(RangeLoopName), 156 this); 157 } 158 159 void InefficientVectorOperationCheck::registerMatchers(MatchFinder *Finder) { 160 const auto VectorDecl = cxxRecordDecl(hasAnyName(VectorLikeClasses)); 161 const auto AppendMethodDecl = 162 cxxMethodDecl(hasAnyName("push_back", "emplace_back")); 163 addMatcher(VectorDecl, VectorVarDeclName, VectorVarDeclStmtName, 164 AppendMethodDecl, PushBackOrEmplaceBackCallName, Finder); 165 166 if (EnableProto) { 167 const auto ProtoDecl = 168 cxxRecordDecl(isDerivedFrom("::proto2::MessageLite")); 169 170 // A method's name starts with "add_" might not mean it's an add field 171 // call; it could be the getter for a proto field of which the name starts 172 // with "add_". So we exclude const methods. 173 const auto AddFieldMethodDecl = 174 cxxMethodDecl(matchesName("::add_"), unless(isConst())); 175 addMatcher(ProtoDecl, ProtoVarDeclName, ProtoVarDeclStmtName, 176 AddFieldMethodDecl, ProtoAddFieldCallName, Finder); 177 } 178 } 179 180 void InefficientVectorOperationCheck::check( 181 const MatchFinder::MatchResult &Result) { 182 auto* Context = Result.Context; 183 if (Context->getDiagnostics().hasUncompilableErrorOccurred()) 184 return; 185 186 const SourceManager &SM = *Result.SourceManager; 187 const auto *VectorVarDecl = 188 Result.Nodes.getNodeAs<VarDecl>(VectorVarDeclName); 189 const auto *ForLoop = Result.Nodes.getNodeAs<ForStmt>(LoopCounterName); 190 const auto *RangeLoop = 191 Result.Nodes.getNodeAs<CXXForRangeStmt>(RangeLoopName); 192 const auto *VectorAppendCall = 193 Result.Nodes.getNodeAs<CXXMemberCallExpr>(PushBackOrEmplaceBackCallName); 194 const auto *ProtoVarDecl = Result.Nodes.getNodeAs<VarDecl>(ProtoVarDeclName); 195 const auto *ProtoAddFieldCall = 196 Result.Nodes.getNodeAs<CXXMemberCallExpr>(ProtoAddFieldCallName); 197 const auto *LoopEndExpr = Result.Nodes.getNodeAs<Expr>(LoopEndExprName); 198 const auto *LoopParent = Result.Nodes.getNodeAs<CompoundStmt>(LoopParentName); 199 200 const CXXMemberCallExpr *AppendCall = 201 VectorAppendCall ? VectorAppendCall : ProtoAddFieldCall; 202 assert(AppendCall && "no append call expression"); 203 204 const Stmt *LoopStmt = ForLoop; 205 if (!LoopStmt) 206 LoopStmt = RangeLoop; 207 208 const auto *TargetVarDecl = VectorVarDecl; 209 if (!TargetVarDecl) 210 TargetVarDecl = ProtoVarDecl; 211 212 llvm::SmallPtrSet<const DeclRefExpr *, 16> AllVarRefs = 213 utils::decl_ref_expr::allDeclRefExprs(*TargetVarDecl, *LoopParent, 214 *Context); 215 for (const auto *Ref : AllVarRefs) { 216 // Skip cases where there are usages (defined as DeclRefExpr that refers 217 // to "v") of vector variable / proto variable `v` before the for loop. We 218 // consider these usages are operations causing memory preallocation (e.g. 219 // "v.resize(n)", "v.reserve(n)"). 220 // 221 // FIXME: make it more intelligent to identify the pre-allocating 222 // operations before the for loop. 223 if (SM.isBeforeInTranslationUnit(Ref->getLocation(), 224 LoopStmt->getBeginLoc())) { 225 return; 226 } 227 } 228 229 std::string PartialReserveStmt; 230 if (VectorAppendCall != nullptr) { 231 PartialReserveStmt = ".reserve"; 232 } else { 233 llvm::StringRef FieldName = ProtoAddFieldCall->getMethodDecl()->getName(); 234 FieldName.consume_front("add_"); 235 std::string MutableFieldName = ("mutable_" + FieldName).str(); 236 PartialReserveStmt = "." + MutableFieldName + 237 "()->Reserve"; // e.g., ".mutable_xxx()->Reserve" 238 } 239 240 llvm::StringRef VarName = Lexer::getSourceText( 241 CharSourceRange::getTokenRange( 242 AppendCall->getImplicitObjectArgument()->getSourceRange()), 243 SM, Context->getLangOpts()); 244 245 std::string ReserveSize; 246 // Handle for-range loop cases. 247 if (RangeLoop) { 248 // Get the range-expression in a for-range statement represented as 249 // `for (range-declarator: range-expression)`. 250 StringRef RangeInitExpName = 251 Lexer::getSourceText(CharSourceRange::getTokenRange( 252 RangeLoop->getRangeInit()->getSourceRange()), 253 SM, Context->getLangOpts()); 254 ReserveSize = (RangeInitExpName + ".size()").str(); 255 } else if (ForLoop) { 256 // Handle counter-based loop cases. 257 StringRef LoopEndSource = Lexer::getSourceText( 258 CharSourceRange::getTokenRange(LoopEndExpr->getSourceRange()), SM, 259 Context->getLangOpts()); 260 ReserveSize = std::string(LoopEndSource); 261 } 262 263 auto Diag = diag(AppendCall->getBeginLoc(), 264 "%0 is called inside a loop; consider pre-allocating the " 265 "container capacity before the loop") 266 << AppendCall->getMethodDecl()->getDeclName(); 267 if (!ReserveSize.empty()) { 268 std::string ReserveStmt = 269 (VarName + PartialReserveStmt + "(" + ReserveSize + ");\n").str(); 270 Diag << FixItHint::CreateInsertion(LoopStmt->getBeginLoc(), ReserveStmt); 271 } 272 } 273 274 } // namespace clang::tidy::performance 275