xref: /llvm-project/clang-tools-extra/clang-tidy/performance/InefficientVectorOperationCheck.cpp (revision aaadaee7b228d7010ff7076f5002ebb96b5e03dc)
1 //===--- InefficientVectorOperationCheck.cpp - clang-tidy------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "InefficientVectorOperationCheck.h"
10 #include "../utils/DeclRefExprUtils.h"
11 #include "../utils/OptionsUtils.h"
12 #include "clang/AST/ASTContext.h"
13 #include "clang/ASTMatchers/ASTMatchFinder.h"
14 #include "clang/Lex/Lexer.h"
15 
16 using namespace clang::ast_matchers;
17 
18 namespace clang::tidy::performance {
19 
20 namespace {
21 
22 // Matcher names. Given the code:
23 //
24 // \code
25 // void f() {
26 //   vector<T> v;
27 //   for (int i = 0; i < 10 + 1; ++i) {
28 //     v.push_back(i);
29 //   }
30 //
31 //   SomeProto p;
32 //   for (int i = 0; i < 10 + 1; ++i) {
33 //     p.add_xxx(i);
34 //   }
35 // }
36 // \endcode
37 //
38 // The matcher names are bound to following parts of the AST:
39 //   - LoopCounterName: The entire for loop (as ForStmt).
40 //   - LoopParentName: The body of function f (as CompoundStmt).
41 //   - VectorVarDeclName: 'v' (as VarDecl).
42 //   - VectorVarDeclStmatName: The entire 'std::vector<T> v;' statement (as
43 //     DeclStmt).
44 //   - PushBackOrEmplaceBackCallName: 'v.push_back(i)' (as cxxMemberCallExpr).
45 //   - LoopInitVarName: 'i' (as VarDecl).
46 //   - LoopEndExpr: '10+1' (as Expr).
47 // If EnableProto, the proto related names are bound to the following parts:
48 //   - ProtoVarDeclName: 'p' (as VarDecl).
49 //   - ProtoVarDeclStmtName: The entire 'SomeProto p;' statement (as DeclStmt).
50 //   - ProtoAddFieldCallName: 'p.add_xxx(i)' (as cxxMemberCallExpr).
51 static const char LoopCounterName[] = "for_loop_counter";
52 static const char LoopParentName[] = "loop_parent";
53 static const char VectorVarDeclName[] = "vector_var_decl";
54 static const char VectorVarDeclStmtName[] = "vector_var_decl_stmt";
55 static const char PushBackOrEmplaceBackCallName[] = "append_call";
56 static const char ProtoVarDeclName[] = "proto_var_decl";
57 static const char ProtoVarDeclStmtName[] = "proto_var_decl_stmt";
58 static const char ProtoAddFieldCallName[] = "proto_add_field";
59 static const char LoopInitVarName[] = "loop_init_var";
60 static const char LoopEndExprName[] = "loop_end_expr";
61 static const char RangeLoopName[] = "for_range_loop";
62 
63 ast_matchers::internal::Matcher<Expr> supportedContainerTypesMatcher() {
64   return hasType(cxxRecordDecl(hasAnyName(
65       "::std::vector", "::std::set", "::std::unordered_set", "::std::map",
66       "::std::unordered_map", "::std::array", "::std::deque")));
67 }
68 
69 AST_MATCHER(Expr, hasSideEffects) {
70   return Node.HasSideEffects(Finder->getASTContext());
71 }
72 
73 } // namespace
74 
75 InefficientVectorOperationCheck::InefficientVectorOperationCheck(
76     StringRef Name, ClangTidyContext *Context)
77     : ClangTidyCheck(Name, Context),
78       VectorLikeClasses(utils::options::parseStringList(
79           Options.get("VectorLikeClasses", "::std::vector"))),
80       EnableProto(Options.get("EnableProto", false)) {}
81 
82 void InefficientVectorOperationCheck::storeOptions(
83     ClangTidyOptions::OptionMap &Opts) {
84   Options.store(Opts, "VectorLikeClasses",
85                 utils::options::serializeStringList(VectorLikeClasses));
86   Options.store(Opts, "EnableProto", EnableProto);
87 }
88 
89 void InefficientVectorOperationCheck::addMatcher(
90     const DeclarationMatcher &TargetRecordDecl, StringRef VarDeclName,
91     StringRef VarDeclStmtName, const DeclarationMatcher &AppendMethodDecl,
92     StringRef AppendCallName, MatchFinder *Finder) {
93   const auto DefaultConstructorCall = cxxConstructExpr(
94       hasType(TargetRecordDecl),
95       hasDeclaration(cxxConstructorDecl(isDefaultConstructor())));
96   const auto TargetVarDecl =
97       varDecl(hasInitializer(DefaultConstructorCall)).bind(VarDeclName);
98   const auto TargetVarDefStmt =
99       declStmt(hasSingleDecl(equalsBoundNode(std::string(VarDeclName))))
100           .bind(VarDeclStmtName);
101 
102   const auto AppendCallExpr =
103       cxxMemberCallExpr(
104           callee(AppendMethodDecl), on(hasType(TargetRecordDecl)),
105           onImplicitObjectArgument(declRefExpr(to(TargetVarDecl))))
106           .bind(AppendCallName);
107   const auto AppendCall = expr(ignoringImplicit(AppendCallExpr));
108   const auto LoopVarInit = declStmt(hasSingleDecl(
109       varDecl(hasInitializer(ignoringParenImpCasts(integerLiteral(equals(0)))))
110           .bind(LoopInitVarName)));
111   const auto RefersToLoopVar = ignoringParenImpCasts(
112       declRefExpr(to(varDecl(equalsBoundNode(LoopInitVarName)))));
113 
114   // Matchers for the loop whose body has only 1 push_back/emplace_back calling
115   // statement.
116   const auto HasInterestingLoopBody = hasBody(
117       anyOf(compoundStmt(statementCountIs(1), has(AppendCall)), AppendCall));
118   const auto InInterestingCompoundStmt =
119       hasParent(compoundStmt(has(TargetVarDefStmt)).bind(LoopParentName));
120 
121   // Match counter-based for loops:
122   //  for (int i = 0; i < n; ++i) {
123   //    v.push_back(...);
124   //    // Or: proto.add_xxx(...);
125   //  }
126   //
127   // FIXME: Support more types of counter-based loops like decrement loops.
128   Finder->addMatcher(
129       forStmt(
130           hasLoopInit(LoopVarInit),
131           hasCondition(binaryOperator(
132               hasOperatorName("<"), hasLHS(RefersToLoopVar),
133               hasRHS(expr(unless(hasDescendant(expr(RefersToLoopVar))))
134                          .bind(LoopEndExprName)))),
135           hasIncrement(unaryOperator(hasOperatorName("++"),
136                                      hasUnaryOperand(RefersToLoopVar))),
137           HasInterestingLoopBody, InInterestingCompoundStmt)
138           .bind(LoopCounterName),
139       this);
140 
141   // Match for-range loops:
142   //   for (const auto& E : data) {
143   //     v.push_back(...);
144   //     // Or: proto.add_xxx(...);
145   //   }
146   //
147   // FIXME: Support more complex range-expressions.
148   Finder->addMatcher(
149       cxxForRangeStmt(
150           hasRangeInit(
151               anyOf(declRefExpr(supportedContainerTypesMatcher()),
152                     memberExpr(hasObjectExpression(unless(hasSideEffects())),
153                                supportedContainerTypesMatcher()))),
154           HasInterestingLoopBody, InInterestingCompoundStmt)
155           .bind(RangeLoopName),
156       this);
157 }
158 
159 void InefficientVectorOperationCheck::registerMatchers(MatchFinder *Finder) {
160   const auto VectorDecl = cxxRecordDecl(hasAnyName(VectorLikeClasses));
161   const auto AppendMethodDecl =
162       cxxMethodDecl(hasAnyName("push_back", "emplace_back"));
163   addMatcher(VectorDecl, VectorVarDeclName, VectorVarDeclStmtName,
164              AppendMethodDecl, PushBackOrEmplaceBackCallName, Finder);
165 
166   if (EnableProto) {
167     const auto ProtoDecl =
168         cxxRecordDecl(isDerivedFrom("::proto2::MessageLite"));
169 
170     // A method's name starts with "add_" might not mean it's an add field
171     // call; it could be the getter for a proto field of which the name starts
172     // with "add_". So we exclude const methods.
173     const auto AddFieldMethodDecl =
174         cxxMethodDecl(matchesName("::add_"), unless(isConst()));
175     addMatcher(ProtoDecl, ProtoVarDeclName, ProtoVarDeclStmtName,
176                AddFieldMethodDecl, ProtoAddFieldCallName, Finder);
177   }
178 }
179 
180 void InefficientVectorOperationCheck::check(
181     const MatchFinder::MatchResult &Result) {
182   auto* Context = Result.Context;
183   if (Context->getDiagnostics().hasUncompilableErrorOccurred())
184     return;
185 
186   const SourceManager &SM = *Result.SourceManager;
187   const auto *VectorVarDecl =
188       Result.Nodes.getNodeAs<VarDecl>(VectorVarDeclName);
189   const auto *ForLoop = Result.Nodes.getNodeAs<ForStmt>(LoopCounterName);
190   const auto *RangeLoop =
191       Result.Nodes.getNodeAs<CXXForRangeStmt>(RangeLoopName);
192   const auto *VectorAppendCall =
193       Result.Nodes.getNodeAs<CXXMemberCallExpr>(PushBackOrEmplaceBackCallName);
194   const auto *ProtoVarDecl = Result.Nodes.getNodeAs<VarDecl>(ProtoVarDeclName);
195   const auto *ProtoAddFieldCall =
196       Result.Nodes.getNodeAs<CXXMemberCallExpr>(ProtoAddFieldCallName);
197   const auto *LoopEndExpr = Result.Nodes.getNodeAs<Expr>(LoopEndExprName);
198   const auto *LoopParent = Result.Nodes.getNodeAs<CompoundStmt>(LoopParentName);
199 
200   const CXXMemberCallExpr *AppendCall =
201       VectorAppendCall ? VectorAppendCall : ProtoAddFieldCall;
202   assert(AppendCall && "no append call expression");
203 
204   const Stmt *LoopStmt = ForLoop;
205   if (!LoopStmt)
206     LoopStmt = RangeLoop;
207 
208   const auto *TargetVarDecl = VectorVarDecl;
209   if (!TargetVarDecl)
210     TargetVarDecl = ProtoVarDecl;
211 
212   llvm::SmallPtrSet<const DeclRefExpr *, 16> AllVarRefs =
213       utils::decl_ref_expr::allDeclRefExprs(*TargetVarDecl, *LoopParent,
214                                             *Context);
215   for (const auto *Ref : AllVarRefs) {
216     // Skip cases where there are usages (defined as DeclRefExpr that refers
217     // to "v") of vector variable / proto variable `v` before the for loop. We
218     // consider these usages are operations causing memory preallocation (e.g.
219     // "v.resize(n)", "v.reserve(n)").
220     //
221     // FIXME: make it more intelligent to identify the pre-allocating
222     // operations before the for loop.
223     if (SM.isBeforeInTranslationUnit(Ref->getLocation(),
224                                      LoopStmt->getBeginLoc())) {
225       return;
226     }
227   }
228 
229   std::string PartialReserveStmt;
230   if (VectorAppendCall != nullptr) {
231     PartialReserveStmt = ".reserve";
232   } else {
233     llvm::StringRef FieldName = ProtoAddFieldCall->getMethodDecl()->getName();
234     FieldName.consume_front("add_");
235     std::string MutableFieldName = ("mutable_" + FieldName).str();
236     PartialReserveStmt = "." + MutableFieldName +
237                          "()->Reserve"; // e.g., ".mutable_xxx()->Reserve"
238   }
239 
240   llvm::StringRef VarName = Lexer::getSourceText(
241       CharSourceRange::getTokenRange(
242           AppendCall->getImplicitObjectArgument()->getSourceRange()),
243       SM, Context->getLangOpts());
244 
245   std::string ReserveSize;
246   // Handle for-range loop cases.
247   if (RangeLoop) {
248     // Get the range-expression in a for-range statement represented as
249     // `for (range-declarator: range-expression)`.
250     StringRef RangeInitExpName =
251         Lexer::getSourceText(CharSourceRange::getTokenRange(
252                                  RangeLoop->getRangeInit()->getSourceRange()),
253                              SM, Context->getLangOpts());
254     ReserveSize = (RangeInitExpName + ".size()").str();
255   } else if (ForLoop) {
256     // Handle counter-based loop cases.
257     StringRef LoopEndSource = Lexer::getSourceText(
258         CharSourceRange::getTokenRange(LoopEndExpr->getSourceRange()), SM,
259         Context->getLangOpts());
260     ReserveSize = std::string(LoopEndSource);
261   }
262 
263   auto Diag = diag(AppendCall->getBeginLoc(),
264                    "%0 is called inside a loop; consider pre-allocating the "
265                    "container capacity before the loop")
266               << AppendCall->getMethodDecl()->getDeclName();
267   if (!ReserveSize.empty()) {
268     std::string ReserveStmt =
269         (VarName + PartialReserveStmt + "(" + ReserveSize + ");\n").str();
270     Diag << FixItHint::CreateInsertion(LoopStmt->getBeginLoc(), ReserveStmt);
271   }
272 }
273 
274 } // namespace clang::tidy::performance
275